summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 01:02:30 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 01:02:30 +0000
commit76cb841cb886eef6b3bee341a2266c76578724ad (patch)
treef5892e5ba6cc11949952a6ce4ecbe6d516d6ce58 /tools
parentInitial commit. (diff)
downloadlinux-76cb841cb886eef6b3bee341a2266c76578724ad.tar.xz
linux-76cb841cb886eef6b3bee341a2266c76578724ad.zip
Adding upstream version 4.19.249.upstream/4.19.249
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile179
-rw-r--r--tools/accounting/.gitignore1
-rw-r--r--tools/accounting/Makefile10
-rw-r--r--tools/accounting/getdelays.c551
-rw-r--r--tools/arch/alpha/include/asm/barrier.h9
-rw-r--r--tools/arch/alpha/include/uapi/asm/bitsperlong.h9
-rw-r--r--tools/arch/alpha/include/uapi/asm/errno.h128
-rw-r--r--tools/arch/alpha/include/uapi/asm/mman.h48
-rw-r--r--tools/arch/arc/include/uapi/asm/mman.h7
-rw-r--r--tools/arch/arm/include/asm/barrier.h12
-rw-r--r--tools/arch/arm/include/uapi/asm/kvm.h299
-rw-r--r--tools/arch/arm/include/uapi/asm/mman.h7
-rw-r--r--tools/arch/arm/include/uapi/asm/perf_regs.h24
-rw-r--r--tools/arch/arm64/include/asm/barrier.h17
-rw-r--r--tools/arch/arm64/include/uapi/asm/bitsperlong.h24
-rw-r--r--tools/arch/arm64/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h312
-rw-r--r--tools/arch/arm64/include/uapi/asm/mman.h7
-rw-r--r--tools/arch/arm64/include/uapi/asm/perf_regs.h41
-rw-r--r--tools/arch/arm64/include/uapi/asm/unistd.h20
-rw-r--r--tools/arch/h8300/include/asm/bitsperlong.h15
-rw-r--r--tools/arch/h8300/include/uapi/asm/mman.h7
-rw-r--r--tools/arch/hexagon/include/uapi/asm/bitsperlong.h27
-rw-r--r--tools/arch/hexagon/include/uapi/asm/mman.h7
-rw-r--r--tools/arch/ia64/include/asm/barrier.h46
-rw-r--r--tools/arch/ia64/include/uapi/asm/bitsperlong.h9
-rw-r--r--tools/arch/ia64/include/uapi/asm/mman.h7
-rw-r--r--tools/arch/microblaze/include/uapi/asm/bitsperlong.h2
-rw-r--r--tools/arch/microblaze/include/uapi/asm/mman.h7
-rw-r--r--tools/arch/mips/include/asm/barrier.h21
-rw-r--r--tools/arch/mips/include/asm/errno.h17
-rw-r--r--tools/arch/mips/include/uapi/asm/bitsperlong.h9
-rw-r--r--tools/arch/mips/include/uapi/asm/errno.h130
-rw-r--r--tools/arch/mips/include/uapi/asm/kvm.h209
-rw-r--r--tools/arch/mips/include/uapi/asm/mman.h47
-rw-r--r--tools/arch/parisc/include/uapi/asm/bitsperlong.h15
-rw-r--r--tools/arch/parisc/include/uapi/asm/errno.h127
-rw-r--r--tools/arch/parisc/include/uapi/asm/mman.h46
-rw-r--r--tools/arch/powerpc/include/asm/barrier.h30
-rw-r--r--tools/arch/powerpc/include/uapi/asm/bitsperlong.h13
-rw-r--r--tools/arch/powerpc/include/uapi/asm/errno.h10
-rw-r--r--tools/arch/powerpc/include/uapi/asm/kvm.h677
-rw-r--r--tools/arch/powerpc/include/uapi/asm/mman.h16
-rw-r--r--tools/arch/powerpc/include/uapi/asm/perf_regs.h51
-rw-r--r--tools/arch/powerpc/include/uapi/asm/unistd.h404
-rw-r--r--tools/arch/riscv/include/uapi/asm/bitsperlong.h25
-rw-r--r--tools/arch/s390/include/asm/barrier.h31
-rw-r--r--tools/arch/s390/include/uapi/asm/bitsperlong.h13
-rw-r--r--tools/arch/s390/include/uapi/asm/bpf_perf_event.h9
-rw-r--r--tools/arch/s390/include/uapi/asm/kvm.h277
-rw-r--r--tools/arch/s390/include/uapi/asm/kvm_perf.h22
-rw-r--r--tools/arch/s390/include/uapi/asm/mman.h7
-rw-r--r--tools/arch/s390/include/uapi/asm/perf_regs.h44
-rw-r--r--tools/arch/s390/include/uapi/asm/ptrace.h457
-rw-r--r--tools/arch/s390/include/uapi/asm/sie.h252
-rw-r--r--tools/arch/sh/include/asm/barrier.h33
-rw-r--r--tools/arch/sh/include/uapi/asm/mman.h7
-rw-r--r--tools/arch/sparc/include/asm/barrier.h9
-rw-r--r--tools/arch/sparc/include/asm/barrier_32.h7
-rw-r--r--tools/arch/sparc/include/asm/barrier_64.h43
-rw-r--r--tools/arch/sparc/include/uapi/asm/bitsperlong.h13
-rw-r--r--tools/arch/sparc/include/uapi/asm/errno.h118
-rw-r--r--tools/arch/sparc/include/uapi/asm/mman.h16
-rw-r--r--tools/arch/x86/include/asm/atomic.h73
-rw-r--r--tools/arch/x86/include/asm/barrier.h29
-rw-r--r--tools/arch/x86/include/asm/cmpxchg.h90
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h381
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h83
-rw-r--r--tools/arch/x86/include/asm/mcsafe_test.h13
-rw-r--r--tools/arch/x86/include/asm/required-features.h106
-rw-r--r--tools/arch/x86/include/asm/rmwcc.h42
-rw-r--r--tools/arch/x86/include/asm/unistd_32.h16
-rw-r--r--tools/arch/x86/include/asm/unistd_64.h16
-rw-r--r--tools/arch/x86/include/uapi/asm/bitsperlong.h13
-rw-r--r--tools/arch/x86/include/uapi/asm/errno.h1
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h419
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm_perf.h17
-rw-r--r--tools/arch/x86/include/uapi/asm/mman.h6
-rw-r--r--tools/arch/x86/include/uapi/asm/perf_regs.h34
-rw-r--r--tools/arch/x86/include/uapi/asm/svm.h179
-rw-r--r--tools/arch/x86/include/uapi/asm/unistd.h18
-rw-r--r--tools/arch/x86/include/uapi/asm/vmx.h151
-rw-r--r--tools/arch/x86/lib/memcpy_64.S297
-rw-r--r--tools/arch/x86/lib/memset_64.S139
-rw-r--r--tools/arch/xtensa/include/asm/barrier.h18
-rw-r--r--tools/arch/xtensa/include/uapi/asm/mman.h48
-rw-r--r--tools/bpf/.gitignore5
-rw-r--r--tools/bpf/Makefile107
-rw-r--r--tools/bpf/Makefile.helpers59
-rw-r--r--tools/bpf/bpf_asm.c52
-rw-r--r--tools/bpf/bpf_dbg.c1398
-rw-r--r--tools/bpf/bpf_exp.l198
-rw-r--r--tools/bpf/bpf_exp.y656
-rw-r--r--tools/bpf/bpf_jit_disasm.c329
-rw-r--r--tools/bpf/bpftool/.gitignore5
-rw-r--r--tools/bpf/bpftool/Documentation/Makefile52
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst140
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst161
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-perf.rst81
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst194
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst61
-rw-r--r--tools/bpf/bpftool/Makefile132
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool559
-rw-r--r--tools/bpf/bpftool/btf_dumper.c251
-rw-r--r--tools/bpf/bpftool/cfg.c514
-rw-r--r--tools/bpf/bpftool/cfg.h43
-rw-r--r--tools/bpf/bpftool/cgroup.c479
-rw-r--r--tools/bpf/bpftool/common.c626
-rw-r--r--tools/bpf/bpftool/jit_disasm.c188
-rw-r--r--tools/bpf/bpftool/json_writer.c356
-rw-r--r--tools/bpf/bpftool/json_writer.h72
-rw-r--r--tools/bpf/bpftool/main.c410
-rw-r--r--tools/bpf/bpftool/main.h168
-rw-r--r--tools/bpf/bpftool/map.c1065
-rw-r--r--tools/bpf/bpftool/map_perf_ring.c309
-rw-r--r--tools/bpf/bpftool/perf.c247
-rw-r--r--tools/bpf/bpftool/prog.c983
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c346
-rw-r--r--tools/bpf/bpftool/xlated_dumper.h67
-rw-r--r--tools/build/.gitignore1
-rw-r--r--tools/build/Build3
-rw-r--r--tools/build/Build.include102
-rw-r--r--tools/build/Documentation/Build.txt168
-rw-r--r--tools/build/Makefile46
-rw-r--r--tools/build/Makefile.build162
-rw-r--r--tools/build/Makefile.feature241
-rw-r--r--tools/build/Makefile.include9
-rw-r--r--tools/build/feature/.gitignore3
-rw-r--r--tools/build/feature/Makefile304
-rw-r--r--tools/build/feature/test-all.c219
-rw-r--r--tools/build/feature/test-backtrace.c14
-rw-r--r--tools/build/feature/test-bionic.c7
-rw-r--r--tools/build/feature/test-bpf.c42
-rw-r--r--tools/build/feature/test-clang.cpp22
-rw-r--r--tools/build/feature/test-compile.c7
-rw-r--r--tools/build/feature/test-cplus-demangle.c15
-rw-r--r--tools/build/feature/test-cxx.cpp16
-rw-r--r--tools/build/feature/test-disassembler-four-args.c15
-rw-r--r--tools/build/feature/test-dwarf.c11
-rw-r--r--tools/build/feature/test-dwarf_getlocations.c13
-rw-r--r--tools/build/feature/test-eventfd.c9
-rw-r--r--tools/build/feature/test-fortify-source.c6
-rw-r--r--tools/build/feature/test-get_cpuid.c8
-rw-r--r--tools/build/feature/test-get_current_dir_name.c10
-rw-r--r--tools/build/feature/test-gettid.c11
-rw-r--r--tools/build/feature/test-glibc.c20
-rw-r--r--tools/build/feature/test-gtk2-infobar.c12
-rw-r--r--tools/build/feature/test-gtk2.c11
-rw-r--r--tools/build/feature/test-hello.c6
-rw-r--r--tools/build/feature/test-jvmti.c14
-rw-r--r--tools/build/feature/test-libaudit.c11
-rw-r--r--tools/build/feature/test-libbabeltrace.c10
-rw-r--r--tools/build/feature/test-libbfd.c16
-rw-r--r--tools/build/feature/test-libcrypto.c18
-rw-r--r--tools/build/feature/test-libdw-dwarf-unwind.c14
-rw-r--r--tools/build/feature/test-libelf-gelf_getnote.c8
-rw-r--r--tools/build/feature/test-libelf-getphdrnum.c9
-rw-r--r--tools/build/feature/test-libelf-getshdrstrndx.c9
-rw-r--r--tools/build/feature/test-libelf-mmap.c9
-rw-r--r--tools/build/feature/test-libelf.c9
-rw-r--r--tools/build/feature/test-libnuma.c10
-rw-r--r--tools/build/feature/test-libopencsd.c8
-rw-r--r--tools/build/feature/test-libperl.c10
-rw-r--r--tools/build/feature/test-libpython.c9
-rw-r--r--tools/build/feature/test-libslang.c7
-rw-r--r--tools/build/feature/test-libunwind-aarch64.c27
-rw-r--r--tools/build/feature/test-libunwind-arm.c28
-rw-r--r--tools/build/feature/test-libunwind-debug-frame-aarch64.c17
-rw-r--r--tools/build/feature/test-libunwind-debug-frame-arm.c17
-rw-r--r--tools/build/feature/test-libunwind-debug-frame.c17
-rw-r--r--tools/build/feature/test-libunwind-x86.c28
-rw-r--r--tools/build/feature/test-libunwind-x86_64.c28
-rw-r--r--tools/build/feature/test-libunwind.c28
-rw-r--r--tools/build/feature/test-llvm-version.cpp12
-rw-r--r--tools/build/feature/test-llvm.cpp14
-rw-r--r--tools/build/feature/test-lzma.c11
-rw-r--r--tools/build/feature/test-numa_num_possible_cpus.c7
-rw-r--r--tools/build/feature/test-pthread-attr-setaffinity-np.c18
-rw-r--r--tools/build/feature/test-pthread-barrier.c12
-rw-r--r--tools/build/feature/test-reallocarray.c8
-rw-r--r--tools/build/feature/test-sched_getcpu.c10
-rw-r--r--tools/build/feature/test-sdt.c8
-rw-r--r--tools/build/feature/test-setns.c7
-rw-r--r--tools/build/feature/test-stackprotector-all.c7
-rw-r--r--tools/build/feature/test-sync-compare-and-swap.c15
-rw-r--r--tools/build/feature/test-timerfd.c19
-rw-r--r--tools/build/feature/test-zlib.c10
-rw-r--r--tools/build/fixdep.c170
-rw-r--r--tools/build/tests/ex/Build11
-rw-r--r--tools/build/tests/ex/Makefile27
-rw-r--r--tools/build/tests/ex/a.c6
-rw-r--r--tools/build/tests/ex/arch/Build2
-rw-r--r--tools/build/tests/ex/arch/e.c6
-rw-r--r--tools/build/tests/ex/arch/f.c6
-rw-r--r--tools/build/tests/ex/b.c6
-rw-r--r--tools/build/tests/ex/c.c6
-rw-r--r--tools/build/tests/ex/d.c6
-rw-r--r--tools/build/tests/ex/empty/Build0
-rw-r--r--tools/build/tests/ex/empty2/README2
-rw-r--r--tools/build/tests/ex/ex.c22
-rw-r--r--tools/build/tests/ex/inc.c9
-rwxr-xr-xtools/build/tests/run.sh70
-rw-r--r--tools/cgroup/.gitignore1
-rw-r--r--tools/cgroup/Makefile11
-rw-r--r--tools/cgroup/cgroup_event_listener.c83
-rw-r--r--tools/firewire/Makefile20
-rw-r--r--tools/firewire/decode-fcp.c214
-rw-r--r--tools/firewire/list.h63
-rw-r--r--tools/firewire/nosy-dump.c1035
-rw-r--r--tools/firewire/nosy-dump.h174
-rw-r--r--tools/gpio/.gitignore4
-rw-r--r--tools/gpio/Build4
-rw-r--r--tools/gpio/Makefile82
-rw-r--r--tools/gpio/gpio-event-mon.c194
-rw-r--r--tools/gpio/gpio-hammer.c171
-rw-r--r--tools/gpio/gpio-utils.c270
-rw-r--r--tools/gpio/gpio-utils.h43
-rw-r--r--tools/gpio/lsgpio.c195
-rw-r--r--tools/hv/Makefile35
-rw-r--r--tools/hv/hv_fcopy_daemon.c248
-rwxr-xr-xtools/hv/hv_get_dhcp_info.sh29
-rwxr-xr-xtools/hv/hv_get_dns_info.sh13
-rw-r--r--tools/hv/hv_kvp_daemon.c1632
-rwxr-xr-xtools/hv/hv_set_ifconfig.sh65
-rw-r--r--tools/hv/hv_vss_daemon.c339
-rw-r--r--tools/hv/lsvmbus112
-rw-r--r--tools/iio/Build3
-rw-r--r--tools/iio/Makefile66
-rw-r--r--tools/iio/iio_event_monitor.c358
-rw-r--r--tools/iio/iio_generic_buffer.c693
-rw-r--r--tools/iio/iio_utils.c994
-rw-r--r--tools/iio/iio_utils.h85
-rw-r--r--tools/iio/lsiio.c191
-rw-r--r--tools/include/asm-generic/atomic-gcc.h72
-rw-r--r--tools/include/asm-generic/barrier.h44
-rw-r--r--tools/include/asm-generic/bitops.h31
-rw-r--r--tools/include/asm-generic/bitops/__ffs.h45
-rw-r--r--tools/include/asm-generic/bitops/__ffz.h12
-rw-r--r--tools/include/asm-generic/bitops/__fls.h44
-rw-r--r--tools/include/asm-generic/bitops/arch_hweight.h26
-rw-r--r--tools/include/asm-generic/bitops/atomic.h27
-rw-r--r--tools/include/asm-generic/bitops/const_hweight.h44
-rw-r--r--tools/include/asm-generic/bitops/find.h78
-rw-r--r--tools/include/asm-generic/bitops/fls.h42
-rw-r--r--tools/include/asm-generic/bitops/fls64.h37
-rw-r--r--tools/include/asm-generic/bitops/hweight.h8
-rw-r--r--tools/include/asm-generic/bitsperlong.h21
-rw-r--r--tools/include/asm-generic/hugetlb_encode.h34
-rw-r--r--tools/include/asm/alternative-asm.h10
-rw-r--r--tools/include/asm/atomic.h11
-rw-r--r--tools/include/asm/barrier.h28
-rw-r--r--tools/include/asm/bug.h45
-rw-r--r--tools/include/asm/export.h7
-rw-r--r--tools/include/asm/sections.h4
-rw-r--r--tools/include/linux/atomic.h13
-rw-r--r--tools/include/linux/bitmap.h150
-rw-r--r--tools/include/linux/bitops.h72
-rw-r--r--tools/include/linux/bits.h26
-rw-r--r--tools/include/linux/bug.h11
-rw-r--r--tools/include/linux/compiler-gcc.h42
-rw-r--r--tools/include/linux/compiler.h175
-rw-r--r--tools/include/linux/coresight-pmu.h34
-rw-r--r--tools/include/linux/debug_locks.h14
-rw-r--r--tools/include/linux/delay.h4
-rw-r--r--tools/include/linux/err.h55
-rw-r--r--tools/include/linux/export.h10
-rw-r--r--tools/include/linux/filter.h286
-rw-r--r--tools/include/linux/ftrace.h4
-rw-r--r--tools/include/linux/gfp.h4
-rw-r--r--tools/include/linux/hardirq.h12
-rw-r--r--tools/include/linux/hash.h104
-rw-r--r--tools/include/linux/hashtable.h149
-rw-r--r--tools/include/linux/interrupt.h4
-rw-r--r--tools/include/linux/irqflags.h39
-rw-r--r--tools/include/linux/jhash.h175
-rw-r--r--tools/include/linux/kallsyms.h34
-rw-r--r--tools/include/linux/kern_levels.h26
-rw-r--r--tools/include/linux/kernel.h120
-rw-r--r--tools/include/linux/linkage.h4
-rw-r--r--tools/include/linux/list.h772
-rw-r--r--tools/include/linux/lockdep.h72
-rw-r--r--tools/include/linux/log2.h179
-rw-r--r--tools/include/linux/module.h12
-rw-r--r--tools/include/linux/mutex.h4
-rw-r--r--tools/include/linux/nmi.h0
-rw-r--r--tools/include/linux/overflow.h278
-rw-r--r--tools/include/linux/poison.h96
-rw-r--r--tools/include/linux/proc_fs.h4
-rw-r--r--tools/include/linux/rbtree.h104
-rw-r--r--tools/include/linux/rbtree_augmented.h245
-rw-r--r--tools/include/linux/rcu.h25
-rw-r--r--tools/include/linux/refcount.h152
-rw-r--r--tools/include/linux/sched/clock.h4
-rw-r--r--tools/include/linux/sched/mm.h4
-rw-r--r--tools/include/linux/sched/task.h4
-rw-r--r--tools/include/linux/seq_file.h4
-rw-r--r--tools/include/linux/spinlock.h34
-rw-r--r--tools/include/linux/stacktrace.h33
-rw-r--r--tools/include/linux/string.h40
-rw-r--r--tools/include/linux/stringify.h13
-rw-r--r--tools/include/linux/time64.h13
-rw-r--r--tools/include/linux/types.h81
-rw-r--r--tools/include/linux/unaligned/packed_struct.h47
-rw-r--r--tools/include/tools/be_byteshift.h71
-rw-r--r--tools/include/tools/config.h34
-rw-r--r--tools/include/tools/endian.h57
-rw-r--r--tools/include/tools/le_byteshift.h71
-rw-r--r--tools/include/tools/libc_compat.h20
-rw-r--r--tools/include/trace/events/lock.h4
-rw-r--r--tools/include/uapi/asm-generic/bitsperlong.h15
-rw-r--r--tools/include/uapi/asm-generic/bpf_perf_event.h9
-rw-r--r--tools/include/uapi/asm-generic/errno-base.h40
-rw-r--r--tools/include/uapi/asm-generic/errno.h123
-rw-r--r--tools/include/uapi/asm-generic/fcntl.h220
-rw-r--r--tools/include/uapi/asm-generic/ioctls.h119
-rw-r--r--tools/include/uapi/asm-generic/mman-common.h77
-rw-r--r--tools/include/uapi/asm-generic/mman.h24
-rw-r--r--tools/include/uapi/asm-generic/unistd.h785
-rw-r--r--tools/include/uapi/asm/bitsperlong.h22
-rw-r--r--tools/include/uapi/asm/bpf_perf_event.h7
-rw-r--r--tools/include/uapi/asm/errno.h18
-rw-r--r--tools/include/uapi/drm/drm.h1012
-rw-r--r--tools/include/uapi/drm/i915_drm.h1724
-rw-r--r--tools/include/uapi/linux/bpf.h2796
-rw-r--r--tools/include/uapi/linux/bpf_common.h57
-rw-r--r--tools/include/uapi/linux/bpf_perf_event.h18
-rw-r--r--tools/include/uapi/linux/btf.h113
-rw-r--r--tools/include/uapi/linux/erspan.h52
-rw-r--r--tools/include/uapi/linux/fcntl.h94
-rw-r--r--tools/include/uapi/linux/hw_breakpoint.h35
-rw-r--r--tools/include/uapi/linux/if_link.h1002
-rw-r--r--tools/include/uapi/linux/in.h304
-rw-r--r--tools/include/uapi/linux/kcmp.h28
-rw-r--r--tools/include/uapi/linux/kvm.h1552
-rw-r--r--tools/include/uapi/linux/lirc.h217
-rw-r--r--tools/include/uapi/linux/mman.h36
-rw-r--r--tools/include/uapi/linux/netlink.h251
-rw-r--r--tools/include/uapi/linux/perf_event.h1130
-rw-r--r--tools/include/uapi/linux/prctl.h223
-rw-r--r--tools/include/uapi/linux/sched.h58
-rw-r--r--tools/include/uapi/linux/seg6.h55
-rw-r--r--tools/include/uapi/linux/seg6_local.h80
-rw-r--r--tools/include/uapi/linux/stat.h174
-rw-r--r--tools/include/uapi/linux/vhost.h228
-rw-r--r--tools/include/uapi/sound/asound.h1037
-rw-r--r--tools/kvm/kvm_stat/Makefile42
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat1716
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt105
-rw-r--r--tools/laptop/dslm/.gitignore1
-rw-r--r--tools/laptop/dslm/Makefile10
-rw-r--r--tools/laptop/dslm/dslm.c166
-rw-r--r--tools/laptop/freefall/Makefile17
-rw-r--r--tools/laptop/freefall/freefall.c174
-rw-r--r--tools/leds/.gitignore1
-rw-r--r--tools/leds/Makefile13
-rw-r--r--tools/leds/led_hw_brightness_mon.c85
-rw-r--r--tools/leds/uledmon.c64
-rw-r--r--tools/lib/api/Build9
-rw-r--r--tools/lib/api/Makefile67
-rw-r--r--tools/lib/api/cpu.c19
-rw-r--r--tools/lib/api/cpu.h7
-rw-r--r--tools/lib/api/debug-internal.h21
-rw-r--r--tools/lib/api/debug.c29
-rw-r--r--tools/lib/api/debug.h11
-rw-r--r--tools/lib/api/fd/Build1
-rw-r--r--tools/lib/api/fd/array.c128
-rw-r--r--tools/lib/api/fd/array.h49
-rw-r--r--tools/lib/api/fs/Build2
-rw-r--r--tools/lib/api/fs/fs.c558
-rw-r--r--tools/lib/api/fs/fs.h60
-rw-r--r--tools/lib/api/fs/tracing_path.c167
-rw-r--r--tools/lib/api/fs/tracing_path.h22
-rw-r--r--tools/lib/bitmap.c75
-rw-r--r--tools/lib/bpf/.gitignore2
-rw-r--r--tools/lib/bpf/Build1
-rw-r--r--tools/lib/bpf/Makefile221
-rw-r--r--tools/lib/bpf/bpf.c691
-rw-r--r--tools/lib/bpf/bpf.h115
-rw-r--r--tools/lib/bpf/btf.c395
-rw-r--r--tools/lib/bpf/btf.h26
-rw-r--r--tools/lib/bpf/libbpf.c2431
-rw-r--r--tools/lib/bpf/libbpf.h300
-rw-r--r--tools/lib/bpf/libbpf_errno.c75
-rw-r--r--tools/lib/bpf/nlattr.c187
-rw-r--r--tools/lib/bpf/nlattr.h72
-rw-r--r--tools/lib/bpf/str_error.c18
-rw-r--r--tools/lib/bpf/str_error.h6
-rw-r--r--tools/lib/find_bit.c128
-rw-r--r--tools/lib/hweight.c63
-rw-r--r--tools/lib/lockdep/.gitignore1
-rw-r--r--tools/lib/lockdep/Build1
-rw-r--r--tools/lib/lockdep/Makefile162
-rw-r--r--tools/lib/lockdep/common.c29
-rw-r--r--tools/lib/lockdep/include/liblockdep/common.h52
-rw-r--r--tools/lib/lockdep/include/liblockdep/mutex.h71
-rw-r--r--tools/lib/lockdep/include/liblockdep/rwlock.h87
-rwxr-xr-xtools/lib/lockdep/lockdep3
-rw-r--r--tools/lib/lockdep/lockdep.c28
-rw-r--r--tools/lib/lockdep/lockdep_internals.h1
-rw-r--r--tools/lib/lockdep/lockdep_states.h1
-rw-r--r--tools/lib/lockdep/preload.c443
-rw-r--r--tools/lib/lockdep/rbtree.c1
-rwxr-xr-xtools/lib/lockdep/run_tests.sh32
-rw-r--r--tools/lib/lockdep/tests/AA.c14
-rw-r--r--tools/lib/lockdep/tests/ABA.c14
-rw-r--r--tools/lib/lockdep/tests/ABBA.c14
-rw-r--r--tools/lib/lockdep/tests/ABBA_2threads.c47
-rw-r--r--tools/lib/lockdep/tests/ABBCCA.c16
-rw-r--r--tools/lib/lockdep/tests/ABBCCDDA.c18
-rw-r--r--tools/lib/lockdep/tests/ABCABC.c16
-rw-r--r--tools/lib/lockdep/tests/ABCDBCDA.c18
-rw-r--r--tools/lib/lockdep/tests/ABCDBDDA.c18
-rw-r--r--tools/lib/lockdep/tests/WW.c14
-rw-r--r--tools/lib/lockdep/tests/common.h13
-rw-r--r--tools/lib/lockdep/tests/unlock_balance.c13
-rw-r--r--tools/lib/rbtree.c548
-rw-r--r--tools/lib/str_error_r.c27
-rw-r--r--tools/lib/string.c115
-rw-r--r--tools/lib/subcmd/Build7
-rw-r--r--tools/lib/subcmd/Makefile69
-rw-r--r--tools/lib/subcmd/exec-cmd.c210
-rw-r--r--tools/lib/subcmd/exec-cmd.h17
-rw-r--r--tools/lib/subcmd/help.c270
-rw-r--r--tools/lib/subcmd/help.h36
-rw-r--r--tools/lib/subcmd/pager.c120
-rw-r--r--tools/lib/subcmd/pager.h11
-rw-r--r--tools/lib/subcmd/parse-options.c1006
-rw-r--r--tools/lib/subcmd/parse-options.h242
-rw-r--r--tools/lib/subcmd/run-command.c229
-rw-r--r--tools/lib/subcmd/run-command.h61
-rw-r--r--tools/lib/subcmd/sigchain.c54
-rw-r--r--tools/lib/subcmd/sigchain.h11
-rw-r--r--tools/lib/subcmd/subcmd-config.c12
-rw-r--r--tools/lib/subcmd/subcmd-config.h15
-rw-r--r--tools/lib/subcmd/subcmd-util.h76
-rw-r--r--tools/lib/symbol/kallsyms.c76
-rw-r--r--tools/lib/symbol/kallsyms.h29
-rw-r--r--tools/lib/traceevent/.gitignore3
-rw-r--r--tools/lib/traceevent/Build17
-rw-r--r--tools/lib/traceevent/Makefile301
-rw-r--r--tools/lib/traceevent/event-parse.c6892
-rw-r--r--tools/lib/traceevent/event-parse.h991
-rw-r--r--tools/lib/traceevent/event-plugin.c444
-rw-r--r--tools/lib/traceevent/event-utils.h67
-rw-r--r--tools/lib/traceevent/kbuffer-parse.c729
-rw-r--r--tools/lib/traceevent/kbuffer.h68
-rw-r--r--tools/lib/traceevent/parse-filter.c2445
-rw-r--r--tools/lib/traceevent/parse-utils.c71
-rw-r--r--tools/lib/traceevent/plugin_cfg80211.c43
-rw-r--r--tools/lib/traceevent/plugin_function.c194
-rw-r--r--tools/lib/traceevent/plugin_hrtimer.c88
-rw-r--r--tools/lib/traceevent/plugin_jbd2.c75
-rw-r--r--tools/lib/traceevent/plugin_kmem.c94
-rw-r--r--tools/lib/traceevent/plugin_kvm.c522
-rw-r--r--tools/lib/traceevent/plugin_mac80211.c102
-rw-r--r--tools/lib/traceevent/plugin_sched_switch.c160
-rw-r--r--tools/lib/traceevent/plugin_scsi.c433
-rw-r--r--tools/lib/traceevent/plugin_xen.c137
-rw-r--r--tools/lib/traceevent/trace-seq.c240
-rw-r--r--tools/lib/vsprintf.c25
-rw-r--r--tools/memory-model/Documentation/cheatsheet.txt30
-rw-r--r--tools/memory-model/Documentation/explanation.txt1896
-rw-r--r--tools/memory-model/Documentation/recipes.txt570
-rw-r--r--tools/memory-model/Documentation/references.txt114
-rw-r--r--tools/memory-model/README206
-rw-r--r--tools/memory-model/linux-kernel.bell52
-rw-r--r--tools/memory-model/linux-kernel.cat128
-rw-r--r--tools/memory-model/linux-kernel.cfg21
-rw-r--r--tools/memory-model/linux-kernel.def108
-rw-r--r--tools/memory-model/litmus-tests/.gitignore1
-rw-r--r--tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus26
-rw-r--r--tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus25
-rw-r--r--tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus25
-rw-r--r--tools/memory-model/litmus-tests/CoWW+poonceonce.litmus18
-rw-r--r--tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus45
-rw-r--r--tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus43
-rw-r--r--tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus41
-rw-r--r--tools/memory-model/litmus-tests/ISA2+poonceonces.litmus37
-rw-r--r--tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus39
-rw-r--r--tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus34
-rw-r--r--tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus29
-rw-r--r--tools/memory-model/litmus-tests/LB+poonceonces.litmus28
-rw-r--r--tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus30
-rw-r--r--tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus34
-rw-r--r--tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus35
-rw-r--r--tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus34
-rw-r--r--tools/memory-model/litmus-tests/MP+polocks.litmus35
-rw-r--r--tools/memory-model/litmus-tests/MP+poonceonces.litmus27
-rw-r--r--tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus28
-rw-r--r--tools/memory-model/litmus-tests/MP+porevlocks.litmus35
-rw-r--r--tools/memory-model/litmus-tests/R+fencembonceonces.litmus30
-rw-r--r--tools/memory-model/litmus-tests/R+poonceonces.litmus27
-rw-r--r--tools/memory-model/litmus-tests/README153
-rw-r--r--tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus27
-rw-r--r--tools/memory-model/litmus-tests/S+poonceonces.litmus28
-rw-r--r--tools/memory-model/litmus-tests/SB+fencembonceonces.litmus32
-rw-r--r--tools/memory-model/litmus-tests/SB+poonceonces.litmus29
-rw-r--r--tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus32
-rw-r--r--tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus35
-rw-r--r--tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus38
-rw-r--r--tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus42
-rw-r--r--tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus40
-rw-r--r--tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus42
-rw-r--r--tools/memory-model/lock.cat146
-rwxr-xr-xtools/memory-model/scripts/checkalllitmus.sh73
-rwxr-xr-xtools/memory-model/scripts/checklitmus.sh86
-rwxr-xr-xtools/nfsd/inject_fault.sh50
-rw-r--r--tools/objtool/.gitignore3
-rw-r--r--tools/objtool/Build22
-rw-r--r--tools/objtool/Documentation/stack-validation.txt317
-rw-r--r--tools/objtool/Makefile64
-rw-r--r--tools/objtool/arch.h82
-rw-r--r--tools/objtool/arch/x86/Build12
-rw-r--r--tools/objtool/arch/x86/decode.c496
-rw-r--r--tools/objtool/arch/x86/include/asm/inat.h244
-rw-r--r--tools/objtool/arch/x86/include/asm/inat_types.h29
-rw-r--r--tools/objtool/arch/x86/include/asm/insn.h244
-rw-r--r--tools/objtool/arch/x86/include/asm/orc_types.h109
-rw-r--r--tools/objtool/arch/x86/lib/inat.c97
-rw-r--r--tools/objtool/arch/x86/lib/insn.c606
-rw-r--r--tools/objtool/arch/x86/lib/x86-opcode-map.txt1078
-rw-r--r--tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk393
-rw-r--r--tools/objtool/builtin-check.c59
-rw-r--r--tools/objtool/builtin-orc.c68
-rw-r--r--tools/objtool/builtin.h28
-rw-r--r--tools/objtool/cfi.h55
-rw-r--r--tools/objtool/check.c2281
-rw-r--r--tools/objtool/check.h82
-rw-r--r--tools/objtool/elf.c693
-rw-r--r--tools/objtool/elf.h107
-rw-r--r--tools/objtool/objtool.c134
-rw-r--r--tools/objtool/orc.h30
-rw-r--r--tools/objtool/orc_dump.c224
-rw-r--r--tools/objtool/orc_gen.c242
-rw-r--r--tools/objtool/special.c193
-rw-r--r--tools/objtool/special.h42
-rwxr-xr-xtools/objtool/sync-check.sh29
-rw-r--r--tools/objtool/warn.h70
-rw-r--r--tools/pci/pcitest.c233
-rw-r--r--tools/pci/pcitest.sh72
-rw-r--r--tools/pcmcia/.gitignore1
-rw-r--r--tools/pcmcia/Makefile10
-rw-r--r--tools/pcmcia/crc32hash.c32
-rw-r--r--tools/perf/.gitignore36
-rw-r--r--tools/perf/Build55
-rw-r--r--tools/perf/CREDITS30
-rw-r--r--tools/perf/Documentation/Build.txt49
-rw-r--r--tools/perf/Documentation/Makefile362
-rw-r--r--tools/perf/Documentation/android.txt78
-rw-r--r--tools/perf/Documentation/asciidoc.conf91
-rw-r--r--tools/perf/Documentation/asciidoctor-extensions.rb29
-rw-r--r--tools/perf/Documentation/callchain-overhead-calculation.txt108
-rw-r--r--tools/perf/Documentation/examples.txt225
-rw-r--r--tools/perf/Documentation/intel-bts.txt86
-rw-r--r--tools/perf/Documentation/intel-pt.txt891
-rw-r--r--tools/perf/Documentation/itrace.txt36
-rw-r--r--tools/perf/Documentation/jit-interface.txt15
-rw-r--r--tools/perf/Documentation/jitdump-specification.txt170
-rw-r--r--tools/perf/Documentation/manpage-1.72.xsl14
-rw-r--r--tools/perf/Documentation/manpage-base.xsl35
-rw-r--r--tools/perf/Documentation/manpage-bold-literal.xsl17
-rw-r--r--tools/perf/Documentation/manpage-normal.xsl13
-rw-r--r--tools/perf/Documentation/manpage-suppress-sp.xsl21
-rw-r--r--tools/perf/Documentation/perf-annotate.txt132
-rw-r--r--tools/perf/Documentation/perf-archive.txt22
-rw-r--r--tools/perf/Documentation/perf-bench.txt209
-rw-r--r--tools/perf/Documentation/perf-buildid-cache.txt79
-rw-r--r--tools/perf/Documentation/perf-buildid-list.txt43
-rw-r--r--tools/perf/Documentation/perf-c2c.txt290
-rw-r--r--tools/perf/Documentation/perf-config.txt520
-rw-r--r--tools/perf/Documentation/perf-data.txt48
-rw-r--r--tools/perf/Documentation/perf-diff.txt227
-rw-r--r--tools/perf/Documentation/perf-evlist.txt45
-rw-r--r--tools/perf/Documentation/perf-ftrace.txt87
-rw-r--r--tools/perf/Documentation/perf-help.txt38
-rw-r--r--tools/perf/Documentation/perf-inject.txt69
-rw-r--r--tools/perf/Documentation/perf-kallsyms.txt24
-rw-r--r--tools/perf/Documentation/perf-kmem.txt77
-rw-r--r--tools/perf/Documentation/perf-kvm.txt164
-rw-r--r--tools/perf/Documentation/perf-list.txt291
-rw-r--r--tools/perf/Documentation/perf-lock.txt70
-rw-r--r--tools/perf/Documentation/perf-mem.txt92
-rw-r--r--tools/perf/Documentation/perf-probe.txt299
-rw-r--r--tools/perf/Documentation/perf-record.txt509
-rw-r--r--tools/perf/Documentation/perf-report.txt493
-rw-r--r--tools/perf/Documentation/perf-sched.txt167
-rw-r--r--tools/perf/Documentation/perf-script-perl.txt216
-rw-r--r--tools/perf/Documentation/perf-script-python.txt641
-rw-r--r--tools/perf/Documentation/perf-script.txt389
-rw-r--r--tools/perf/Documentation/perf-stat.txt378
-rw-r--r--tools/perf/Documentation/perf-test.txt36
-rw-r--r--tools/perf/Documentation/perf-timechart.txt128
-rw-r--r--tools/perf/Documentation/perf-top.txt294
-rw-r--r--tools/perf/Documentation/perf-trace.txt243
-rw-r--r--tools/perf/Documentation/perf-version.txt24
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt497
-rw-r--r--tools/perf/Documentation/perf.txt49
-rw-r--r--tools/perf/Documentation/perfconfig.example38
-rw-r--r--tools/perf/Documentation/tips.txt36
-rw-r--r--tools/perf/MANIFEST18
-rw-r--r--tools/perf/Makefile112
-rw-r--r--tools/perf/Makefile.config1048
-rw-r--r--tools/perf/Makefile.perf901
-rw-r--r--tools/perf/arch/Build2
-rw-r--r--tools/perf/arch/alpha/Build1
-rw-r--r--tools/perf/arch/arm/Build2
-rw-r--r--tools/perf/arch/arm/Makefile4
-rw-r--r--tools/perf/arch/arm/annotate/instructions.c61
-rw-r--r--tools/perf/arch/arm/include/arch-tests.h12
-rw-r--r--tools/perf/arch/arm/include/dwarf-regs-table.h10
-rw-r--r--tools/perf/arch/arm/include/perf_regs.h60
-rw-r--r--tools/perf/arch/arm/tests/Build4
-rw-r--r--tools/perf/arch/arm/tests/arch-tests.c16
-rw-r--r--tools/perf/arch/arm/tests/dwarf-unwind.c62
-rw-r--r--tools/perf/arch/arm/tests/regs_load.S59
-rw-r--r--tools/perf/arch/arm/util/Build6
-rw-r--r--tools/perf/arch/arm/util/auxtrace.c108
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c770
-rw-r--r--tools/perf/arch/arm/util/cs-etm.h15
-rw-r--r--tools/perf/arch/arm/util/dwarf-regs.c64
-rw-r--r--tools/perf/arch/arm/util/pmu.c31
-rw-r--r--tools/perf/arch/arm/util/unwind-libdw.c38
-rw-r--r--tools/perf/arch/arm/util/unwind-libunwind.c50
-rw-r--r--tools/perf/arch/arm64/Build2
-rw-r--r--tools/perf/arch/arm64/Makefile28
-rw-r--r--tools/perf/arch/arm64/annotate/instructions.c64
-rwxr-xr-xtools/perf/arch/arm64/entry/syscalls/mksyscalltbl62
-rw-r--r--tools/perf/arch/arm64/include/arch-tests.h12
-rw-r--r--tools/perf/arch/arm64/include/dwarf-regs-table.h14
-rw-r--r--tools/perf/arch/arm64/include/perf_regs.h94
-rw-r--r--tools/perf/arch/arm64/tests/Build4
-rw-r--r--tools/perf/arch/arm64/tests/arch-tests.c16
-rw-r--r--tools/perf/arch/arm64/tests/dwarf-unwind.c62
-rw-r--r--tools/perf/arch/arm64/tests/regs_load.S47
-rw-r--r--tools/perf/arch/arm64/util/Build10
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c226
-rw-r--r--tools/perf/arch/arm64/util/dwarf-regs.c96
-rw-r--r--tools/perf/arch/arm64/util/header.c65
-rw-r--r--tools/perf/arch/arm64/util/sym-handling.c22
-rw-r--r--tools/perf/arch/arm64/util/unwind-libdw.c60
-rw-r--r--tools/perf/arch/arm64/util/unwind-libunwind.c86
-rw-r--r--tools/perf/arch/common.c212
-rw-r--r--tools/perf/arch/common.h10
-rw-r--r--tools/perf/arch/mips/Build1
-rw-r--r--tools/perf/arch/parisc/Build1
-rw-r--r--tools/perf/arch/powerpc/Build2
-rw-r--r--tools/perf/arch/powerpc/Makefile33
-rw-r--r--tools/perf/arch/powerpc/annotate/instructions.c61
-rwxr-xr-xtools/perf/arch/powerpc/entry/syscalls/mksyscalltbl37
-rw-r--r--tools/perf/arch/powerpc/include/arch-tests.h14
-rw-r--r--tools/perf/arch/powerpc/include/dwarf-regs-table.h28
-rw-r--r--tools/perf/arch/powerpc/include/perf_regs.h72
-rw-r--r--tools/perf/arch/powerpc/tests/Build4
-rw-r--r--tools/perf/arch/powerpc/tests/arch-tests.c16
-rw-r--r--tools/perf/arch/powerpc/tests/dwarf-unwind.c63
-rw-r--r--tools/perf/arch/powerpc/tests/regs_load.S95
-rw-r--r--tools/perf/arch/powerpc/util/Build10
-rw-r--r--tools/perf/arch/powerpc/util/book3s_hcalls.h124
-rw-r--r--tools/perf/arch/powerpc/util/book3s_hv_exits.h34
-rw-r--r--tools/perf/arch/powerpc/util/dwarf-regs.c105
-rw-r--r--tools/perf/arch/powerpc/util/header.c47
-rw-r--r--tools/perf/arch/powerpc/util/kvm-stat.c172
-rw-r--r--tools/perf/arch/powerpc/util/perf_regs.c162
-rw-r--r--tools/perf/arch/powerpc/util/skip-callchain-idx.c283
-rw-r--r--tools/perf/arch/powerpc/util/sym-handling.c155
-rw-r--r--tools/perf/arch/powerpc/util/unwind-libdw.c74
-rw-r--r--tools/perf/arch/powerpc/util/unwind-libunwind.c96
-rw-r--r--tools/perf/arch/s390/Build1
-rw-r--r--tools/perf/arch/s390/Makefile30
-rw-r--r--tools/perf/arch/s390/annotate/instructions.c174
-rwxr-xr-xtools/perf/arch/s390/entry/syscalls/mksyscalltbl32
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl390
-rw-r--r--tools/perf/arch/s390/include/dwarf-regs-table.h72
-rw-r--r--tools/perf/arch/s390/include/perf_regs.h95
-rw-r--r--tools/perf/arch/s390/util/Build9
-rw-r--r--tools/perf/arch/s390/util/auxtrace.c120
-rw-r--r--tools/perf/arch/s390/util/dwarf-regs.c44
-rw-r--r--tools/perf/arch/s390/util/header.c149
-rw-r--r--tools/perf/arch/s390/util/kvm-stat.c112
-rw-r--r--tools/perf/arch/s390/util/machine.c52
-rw-r--r--tools/perf/arch/s390/util/unwind-libdw.c63
-rw-r--r--tools/perf/arch/sh/Build1
-rw-r--r--tools/perf/arch/sh/Makefile3
-rw-r--r--tools/perf/arch/sh/include/dwarf-regs-table.h26
-rw-r--r--tools/perf/arch/sh/util/Build1
-rw-r--r--tools/perf/arch/sh/util/dwarf-regs.c55
-rw-r--r--tools/perf/arch/sparc/Build1
-rw-r--r--tools/perf/arch/sparc/Makefile3
-rw-r--r--tools/perf/arch/sparc/include/dwarf-regs-table.h19
-rw-r--r--tools/perf/arch/sparc/util/Build1
-rw-r--r--tools/perf/arch/sparc/util/dwarf-regs.c43
-rw-r--r--tools/perf/arch/x86/Build2
-rw-r--r--tools/perf/arch/x86/Makefile27
-rw-r--r--tools/perf/arch/x86/annotate/instructions.c204
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl388
-rwxr-xr-xtools/perf/arch/x86/entry/syscalls/syscalltbl.sh40
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h23
-rw-r--r--tools/perf/arch/x86/include/dwarf-regs-table.h15
-rw-r--r--tools/perf/arch/x86/include/perf_regs.h87
-rw-r--r--tools/perf/arch/x86/tests/Build8
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c37
-rw-r--r--tools/perf/arch/x86/tests/bp-modify.c213
-rw-r--r--tools/perf/arch/x86/tests/dwarf-unwind.c63
-rw-r--r--tools/perf/arch/x86/tests/gen-insn-x86-dat.awk75
-rwxr-xr-xtools/perf/arch/x86/tests/gen-insn-x86-dat.sh43
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-32.c1679
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-64.c1729
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-src.c2693
-rw-r--r--tools/perf/arch/x86/tests/insn-x86.c186
-rw-r--r--tools/perf/arch/x86/tests/intel-cqm.c128
-rw-r--r--tools/perf/arch/x86/tests/perf-time-to-tsc.c168
-rw-r--r--tools/perf/arch/x86/tests/rdpmc.c180
-rw-r--r--tools/perf/arch/x86/tests/regs_load.S99
-rw-r--r--tools/perf/arch/x86/util/Build18
-rw-r--r--tools/perf/arch/x86/util/auxtrace.c80
-rw-r--r--tools/perf/arch/x86/util/dwarf-regs.c129
-rw-r--r--tools/perf/arch/x86/util/event.c76
-rw-r--r--tools/perf/arch/x86/util/group.c28
-rw-r--r--tools/perf/arch/x86/util/header.c79
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c462
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c1122
-rw-r--r--tools/perf/arch/x86/util/kvm-stat.c166
-rw-r--r--tools/perf/arch/x86/util/machine.c103
-rw-r--r--tools/perf/arch/x86/util/perf_regs.c256
-rw-r--r--tools/perf/arch/x86/util/pmu.c20
-rw-r--r--tools/perf/arch/x86/util/tsc.c82
-rw-r--r--tools/perf/arch/x86/util/unwind-libdw.c53
-rw-r--r--tools/perf/arch/x86/util/unwind-libunwind.c115
-rw-r--r--tools/perf/arch/xtensa/Build1
-rw-r--r--tools/perf/arch/xtensa/Makefile3
-rw-r--r--tools/perf/arch/xtensa/include/dwarf-regs-table.h9
-rw-r--r--tools/perf/arch/xtensa/util/Build1
-rw-r--r--tools/perf/arch/xtensa/util/dwarf-regs.c25
-rw-r--r--tools/perf/bench/Build14
-rw-r--r--tools/perf/bench/bench.h55
-rw-r--r--tools/perf/bench/futex-hash.c227
-rw-r--r--tools/perf/bench/futex-lock-pi.c229
-rw-r--r--tools/perf/bench/futex-requeue.c221
-rw-r--r--tools/perf/bench/futex-wake-parallel.c323
-rw-r--r--tools/perf/bench/futex-wake.c212
-rw-r--r--tools/perf/bench/futex.h101
-rw-r--r--tools/perf/bench/mem-functions.c374
-rw-r--r--tools/perf/bench/mem-memcpy-arch.h13
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h13
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S17
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-lib.c24
-rw-r--r--tools/perf/bench/mem-memset-arch.h13
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm-def.h13
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm.S12
-rw-r--r--tools/perf/bench/numa.c1844
-rw-r--r--tools/perf/bench/sched-messaging.c332
-rw-r--r--tools/perf/bench/sched-pipe.c186
-rw-r--r--tools/perf/builtin-annotate.c639
-rw-r--r--tools/perf/builtin-bench.c293
-rw-r--r--tools/perf/builtin-buildid-cache.c527
-rw-r--r--tools/perf/builtin-buildid-list.c119
-rw-r--r--tools/perf/builtin-c2c.c2987
-rw-r--r--tools/perf/builtin-config.c260
-rw-r--r--tools/perf/builtin-data.c128
-rw-r--r--tools/perf/builtin-diff.c1363
-rw-r--r--tools/perf/builtin-evlist.c81
-rw-r--r--tools/perf/builtin-ftrace.c521
-rw-r--r--tools/perf/builtin-help.c493
-rw-r--r--tools/perf/builtin-inject.c876
-rw-r--r--tools/perf/builtin-kallsyms.c68
-rw-r--r--tools/perf/builtin-kmem.c2020
-rw-r--r--tools/perf/builtin-kvm.c1641
-rw-r--r--tools/perf/builtin-list.c127
-rw-r--r--tools/perf/builtin-lock.c1034
-rw-r--r--tools/perf/builtin-mem.c459
-rw-r--r--tools/perf/builtin-probe.c748
-rw-r--r--tools/perf/builtin-record.c1902
-rw-r--r--tools/perf/builtin-report.c1404
-rw-r--r--tools/perf/builtin-sched.c3532
-rw-r--r--tools/perf/builtin-script.c3542
-rw-r--r--tools/perf/builtin-stat.c3154
-rw-r--r--tools/perf/builtin-timechart.c2018
-rw-r--r--tools/perf/builtin-top.c1518
-rw-r--r--tools/perf/builtin-trace.c3387
-rw-r--r--tools/perf/builtin-version.c95
-rw-r--r--tools/perf/builtin.h44
-rwxr-xr-xtools/perf/check-headers.sh108
-rw-r--r--tools/perf/command-list.txt32
-rw-r--r--tools/perf/design.txt462
-rw-r--r--tools/perf/examples/bpf/5sec.c49
-rw-r--r--tools/perf/examples/bpf/augmented_syscalls.c55
-rw-r--r--tools/perf/examples/bpf/empty.c3
-rw-r--r--tools/perf/examples/bpf/hello.c9
-rw-r--r--tools/perf/examples/bpf/sys_enter_openat.c33
-rw-r--r--tools/perf/include/bpf/bpf.h36
-rw-r--r--tools/perf/include/bpf/stdio.h19
-rw-r--r--tools/perf/jvmti/Build8
-rw-r--r--tools/perf/jvmti/jvmti_agent.c496
-rw-r--r--tools/perf/jvmti/jvmti_agent.h34
-rw-r--r--tools/perf/jvmti/libjvmti.c422
-rw-r--r--tools/perf/perf-archive.sh48
-rw-r--r--tools/perf/perf-completion.sh298
-rw-r--r--tools/perf/perf-read-vdso.c35
-rw-r--r--tools/perf/perf-sys.h74
-rw-r--r--tools/perf/perf-with-kcore.sh259
-rw-r--r--tools/perf/perf.c539
-rw-r--r--tools/perf/perf.h93
-rw-r--r--tools/perf/pmu-events/Build15
-rw-r--r--tools/perf/pmu-events/README152
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/emag/core-imp-def.json32
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json25
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json8
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json27
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json12
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json28
-rw-r--r--tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json52
-rw-r--r--tools/perf/pmu-events/arch/arm64/armv8-recommended.json452
-rw-r--r--tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json113
-rw-r--r--tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json122
-rw-r--r--tools/perf/pmu-events/arch/arm64/mapfile.csv19
-rw-r--r--tools/perf/pmu-events/arch/powerpc/mapfile.csv17
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/cache.json176
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/floating-point.json14
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/frontend.json470
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/marked.json794
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/memory.json212
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/other.json4064
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/pipeline.json350
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/pmc.json140
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/translation.json176
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/cache.json107
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/floating-point.json32
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/frontend.json357
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/marked.json627
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/memory.json127
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/other.json2337
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/pipeline.json532
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/pmc.json117
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/translation.json227
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z10/basic.json86
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z10/crypto.json114
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z10/extended.json128
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z13/basic.json86
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z13/crypto.json114
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z13/extended.json394
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z13/transaction.json7
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z14/basic.json58
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z14/crypto.json114
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z14/extended.json373
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z14/transaction.json7
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z196/basic.json86
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z196/crypto.json114
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z196/extended.json170
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_zec12/basic.json86
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json114
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_zec12/extended.json247
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_zec12/transaction.json7
-rw-r--r--tools/perf/pmu-events/arch/s390/mapfile.csv6
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/cache.json746
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/floating-point.json261
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/frontend.json83
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/memory.json154
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/other.json450
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/pipeline.json364
-rw-r--r--tools/perf/pmu-events/arch/x86/bonnell/virtual-memory.json124
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/cache.json3399
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/floating-point.json165
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/frontend.json286
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/memory.json3045
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/other.json44
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/pipeline.json1427
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/uncore.json278
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json388
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/cache.json809
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json165
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/frontend.json286
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/memory.json432
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/other.json44
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json1427
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json317
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json86
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json92
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/virtual-memory.json388
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/cache.json965
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json165
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/frontend.json286
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/memory.json679
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/other.json44
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json1427
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json317
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json86
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json92
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/virtual-memory.json388
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmont/cache.json2045
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmont/frontend.json52
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmont/memory.json294
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmont/other.json82
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmont/pipeline.json455
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json75
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/cache.json1453
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/frontend.json62
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/memory.json38
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/other.json98
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json544
-rw-r--r--tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json218
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/cache.json1064
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/floating-point.json83
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/frontend.json294
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json158
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/memory.json676
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/other.json43
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/pipeline.json1338
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/uncore.json374
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/virtual-memory.json484
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/cache.json1100
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/floating-point.json83
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/frontend.json294
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json158
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/memory.json767
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/other.json43
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/pipeline.json1338
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json317
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json86
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json92
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/virtual-memory.json484
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/cache.json1102
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/floating-point.json151
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/frontend.json305
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/memory.json236
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/other.json44
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json1309
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/uncore.json314
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/virtual-memory.json180
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/cache.json1260
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/floating-point.json151
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/frontend.json305
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/memory.json503
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/other.json44
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/pipeline.json1309
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json322
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json48
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json78
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json274
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/virtual-memory.json198
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/cache.json1290
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/floating-point.json138
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/frontend.json305
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json140
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/memory.json422
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/other.json58
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/pipeline.json1220
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json210
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json48
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json82
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json273
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/virtual-memory.json149
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/cache.json2305
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/frontend.json34
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/memory.json1110
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/pipeline.json435
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/uncore-memory.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/knightslanding/virtual-memory.json65
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv34
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemep/cache.json3229
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemep/floating-point.json229
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemep/frontend.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemep/memory.json739
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemep/other.json210
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemep/pipeline.json881
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemep/virtual-memory.json109
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/cache.json3184
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/floating-point.json229
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/frontend.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/memory.json739
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/other.json210
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/pipeline.json881
-rw-r--r--tools/perf/pmu-events/arch/x86/nehalemex/virtual-memory.json109
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/cache.json1879
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/floating-point.json138
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/frontend.json305
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/memory.json445
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/other.json58
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/pipeline.json1220
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json140
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/uncore.json314
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/virtual-memory.json149
-rw-r--r--tools/perf/pmu-events/arch/x86/silvermont/cache.json812
-rw-r--r--tools/perf/pmu-events/arch/x86/silvermont/frontend.json47
-rw-r--r--tools/perf/pmu-events/arch/x86/silvermont/memory.json11
-rw-r--r--tools/perf/pmu-events/arch/x86/silvermont/pipeline.json359
-rw-r--r--tools/perf/pmu-events/arch/x86/silvermont/virtual-memory.json69
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/cache.json939
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/floating-point.json67
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/frontend.json482
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/memory.json615
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/other.json48
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/pipeline.json950
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/uncore.json254
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json284
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/cache.json1749
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/floating-point.json87
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/frontend.json482
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/memory.json1473
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/other.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/pipeline.json950
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json164
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json172
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json1156
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json284
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-dp/cache.json2817
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-dp/floating-point.json229
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-dp/frontend.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-dp/memory.json758
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-dp/other.json287
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-dp/pipeline.json899
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-dp/virtual-memory.json173
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-sp/cache.json3233
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-sp/floating-point.json229
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-sp/frontend.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-sp/memory.json739
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-sp/other.json287
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-sp/pipeline.json899
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereep-sp/virtual-memory.json149
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereex/cache.json3225
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereex/floating-point.json229
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereex/frontend.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereex/memory.json747
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereex/other.json287
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereex/pipeline.json905
-rw-r--r--tools/perf/pmu-events/arch/x86/westmereex/virtual-memory.json173
-rw-r--r--tools/perf/pmu-events/jevents.c1172
-rw-r--r--tools/perf/pmu-events/jevents.h22
-rw-r--r--tools/perf/pmu-events/jsmn.c313
-rw-r--r--tools/perf/pmu-events/jsmn.h68
-rw-r--r--tools/perf/pmu-events/json.c162
-rw-r--r--tools/perf/pmu-events/json.h39
-rw-r--r--tools/perf/pmu-events/pmu-events.h44
-rwxr-xr-xtools/perf/python/tracepoint.py48
-rwxr-xr-xtools/perf/python/twatch.py68
-rw-r--r--tools/perf/scripts/Build2
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Build5
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.c138
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.xs42
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL18
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/README59
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm55
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm192
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm94
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/typemap1
-rw-r--r--tools/perf/scripts/perl/bin/check-perf-trace-record2
-rw-r--r--tools/perf/scripts/perl/bin/failed-syscalls-record3
-rw-r--r--tools/perf/scripts/perl/bin/failed-syscalls-report10
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-file-record3
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-file-report10
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-pid-record2
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-pid-report3
-rw-r--r--tools/perf/scripts/perl/bin/rwtop-record2
-rw-r--r--tools/perf/scripts/perl/bin/rwtop-report20
-rw-r--r--tools/perf/scripts/perl/bin/wakeup-latency-record6
-rw-r--r--tools/perf/scripts/perl/bin/wakeup-latency-report3
-rw-r--r--tools/perf/scripts/perl/check-perf-trace.pl106
-rw-r--r--tools/perf/scripts/perl/failed-syscalls.pl47
-rw-r--r--tools/perf/scripts/perl/rw-by-file.pl106
-rw-r--r--tools/perf/scripts/perl/rw-by-pid.pl184
-rw-r--r--tools/perf/scripts/perl/rwtop.pl203
-rw-r--r--tools/perf/scripts/perl/wakeup-latency.pl107
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Build3
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Context.c116
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py116
-rwxr-xr-xtools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py97
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py184
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py91
-rw-r--r--tools/perf/scripts/python/bin/compaction-times-record2
-rw-r--r--tools/perf/scripts/python/bin/compaction-times-report4
-rw-r--r--tools/perf/scripts/python/bin/event_analyzing_sample-record8
-rw-r--r--tools/perf/scripts/python/bin/event_analyzing_sample-report3
-rw-r--r--tools/perf/scripts/python/bin/export-to-postgresql-record8
-rw-r--r--tools/perf/scripts/python/bin/export-to-postgresql-report29
-rw-r--r--tools/perf/scripts/python/bin/export-to-sqlite-record8
-rw-r--r--tools/perf/scripts/python/bin/export-to-sqlite-report29
-rw-r--r--tools/perf/scripts/python/bin/failed-syscalls-by-pid-record3
-rw-r--r--tools/perf/scripts/python/bin/failed-syscalls-by-pid-report10
-rw-r--r--tools/perf/scripts/python/bin/futex-contention-record2
-rw-r--r--tools/perf/scripts/python/bin/futex-contention-report4
-rw-r--r--tools/perf/scripts/python/bin/intel-pt-events-record13
-rw-r--r--tools/perf/scripts/python/bin/intel-pt-events-report3
-rw-r--r--tools/perf/scripts/python/bin/mem-phys-addr-record19
-rw-r--r--tools/perf/scripts/python/bin/mem-phys-addr-report3
-rwxr-xr-xtools/perf/scripts/python/bin/net_dropmonitor-record2
-rwxr-xr-xtools/perf/scripts/python/bin/net_dropmonitor-report4
-rw-r--r--tools/perf/scripts/python/bin/netdev-times-record8
-rw-r--r--tools/perf/scripts/python/bin/netdev-times-report5
-rw-r--r--tools/perf/scripts/python/bin/powerpc-hcalls-record2
-rw-r--r--tools/perf/scripts/python/bin/powerpc-hcalls-report2
-rw-r--r--tools/perf/scripts/python/bin/sched-migration-record2
-rw-r--r--tools/perf/scripts/python/bin/sched-migration-report3
-rw-r--r--tools/perf/scripts/python/bin/sctop-record3
-rw-r--r--tools/perf/scripts/python/bin/sctop-report24
-rwxr-xr-xtools/perf/scripts/python/bin/stackcollapse-record8
-rwxr-xr-xtools/perf/scripts/python/bin/stackcollapse-report3
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-by-pid-record3
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-by-pid-report10
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-record3
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-report10
-rw-r--r--tools/perf/scripts/python/call-graph-from-sql.py339
-rw-r--r--tools/perf/scripts/python/check-perf-trace.py82
-rw-r--r--tools/perf/scripts/python/compaction-times.py311
-rw-r--r--tools/perf/scripts/python/event_analyzing_sample.py190
-rw-r--r--tools/perf/scripts/python/export-to-postgresql.py734
-rw-r--r--tools/perf/scripts/python/export-to-sqlite.py455
-rw-r--r--tools/perf/scripts/python/failed-syscalls-by-pid.py78
-rw-r--r--tools/perf/scripts/python/futex-contention.py50
-rw-r--r--tools/perf/scripts/python/intel-pt-events.py128
-rw-r--r--tools/perf/scripts/python/mem-phys-addr.py95
-rwxr-xr-xtools/perf/scripts/python/net_dropmonitor.py76
-rw-r--r--tools/perf/scripts/python/netdev-times.py468
-rw-r--r--tools/perf/scripts/python/powerpc-hcalls.py200
-rw-r--r--tools/perf/scripts/python/sched-migration.py464
-rw-r--r--tools/perf/scripts/python/sctop.py80
-rwxr-xr-xtools/perf/scripts/python/stackcollapse.py126
-rw-r--r--tools/perf/scripts/python/stat-cpi.py78
-rw-r--r--tools/perf/scripts/python/syscall-counts-by-pid.py74
-rw-r--r--tools/perf/scripts/python/syscall-counts.py64
-rw-r--r--tools/perf/tests/.gitignore4
-rw-r--r--tools/perf/tests/Build87
-rw-r--r--tools/perf/tests/attr.c198
-rw-r--r--tools/perf/tests/attr.py392
-rw-r--r--tools/perf/tests/attr/README64
-rw-r--r--tools/perf/tests/attr/base-record41
-rw-r--r--tools/perf/tests/attr/base-stat41
-rw-r--r--tools/perf/tests/attr/test-record-C014
-rw-r--r--tools/perf/tests/attr/test-record-basic6
-rw-r--r--tools/perf/tests/attr/test-record-branch-any8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-any8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-any_call8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-any_ret8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-hv8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-ind_call8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-k8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-u8
-rw-r--r--tools/perf/tests/attr/test-record-count9
-rw-r--r--tools/perf/tests/attr/test-record-data10
-rw-r--r--tools/perf/tests/attr/test-record-freq7
-rw-r--r--tools/perf/tests/attr/test-record-graph-default7
-rw-r--r--tools/perf/tests/attr/test-record-graph-dwarf12
-rw-r--r--tools/perf/tests/attr/test-record-graph-fp7
-rw-r--r--tools/perf/tests/attr/test-record-group22
-rw-r--r--tools/perf/tests/attr/test-record-group-sampling39
-rw-r--r--tools/perf/tests/attr/test-record-group123
-rw-r--r--tools/perf/tests/attr/test-record-no-buffering9
-rw-r--r--tools/perf/tests/attr/test-record-no-inherit8
-rw-r--r--tools/perf/tests/attr/test-record-no-samples7
-rw-r--r--tools/perf/tests/attr/test-record-period8
-rw-r--r--tools/perf/tests/attr/test-record-raw7
-rw-r--r--tools/perf/tests/attr/test-stat-C010
-rw-r--r--tools/perf/tests/attr/test-stat-basic7
-rw-r--r--tools/perf/tests/attr/test-stat-default70
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-1111
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-2171
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-3191
-rw-r--r--tools/perf/tests/attr/test-stat-group17
-rw-r--r--tools/perf/tests/attr/test-stat-group117
-rw-r--r--tools/perf/tests/attr/test-stat-no-inherit8
-rw-r--r--tools/perf/tests/backward-ring-buffer.c155
-rw-r--r--tools/perf/tests/bitmap.c52
-rw-r--r--tools/perf/tests/bp_account.c193
-rw-r--r--tools/perf/tests/bp_signal.c312
-rw-r--r--tools/perf/tests/bp_signal_overflow.c134
-rw-r--r--tools/perf/tests/bpf-script-example.c48
-rw-r--r--tools/perf/tests/bpf-script-test-kbuild.c20
-rw-r--r--tools/perf/tests/bpf-script-test-prologue.c46
-rw-r--r--tools/perf/tests/bpf-script-test-relocation.c50
-rw-r--r--tools/perf/tests/bpf.c363
-rw-r--r--tools/perf/tests/builtin-test.c710
-rw-r--r--tools/perf/tests/clang.c48
-rw-r--r--tools/perf/tests/code-reading.c741
-rw-r--r--tools/perf/tests/cpumap.c120
-rw-r--r--tools/perf/tests/dso-data.c385
-rw-r--r--tools/perf/tests/dwarf-unwind.c213
-rw-r--r--tools/perf/tests/event-times.c239
-rw-r--r--tools/perf/tests/event_update.c118
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c119
-rw-r--r--tools/perf/tests/evsel-tp-sched.c90
-rw-r--r--tools/perf/tests/expr.c65
-rw-r--r--tools/perf/tests/fdarray.c176
-rw-r--r--tools/perf/tests/hists_common.c217
-rw-r--r--tools/perf/tests/hists_common.h76
-rw-r--r--tools/perf/tests/hists_cumulate.c737
-rw-r--r--tools/perf/tests/hists_filter.c328
-rw-r--r--tools/perf/tests/hists_link.c342
-rw-r--r--tools/perf/tests/hists_output.c624
-rw-r--r--tools/perf/tests/is_printable_array.c38
-rw-r--r--tools/perf/tests/keep-tracking.c157
-rw-r--r--tools/perf/tests/kmod-path.c159
-rw-r--r--tools/perf/tests/llvm.c172
-rw-r--r--tools/perf/tests/llvm.h31
-rw-r--r--tools/perf/tests/make368
-rw-r--r--tools/perf/tests/mem.c56
-rw-r--r--tools/perf/tests/mem2node.c73
-rw-r--r--tools/perf/tests/mmap-basic.c162
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c235
-rw-r--r--tools/perf/tests/openat-syscall-all-cpus.c127
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c140
-rw-r--r--tools/perf/tests/openat-syscall.c66
-rw-r--r--tools/perf/tests/parse-events.c1914
-rw-r--r--tools/perf/tests/parse-no-sample-id-all.c107
-rw-r--r--tools/perf/tests/perf-hooks.c48
-rw-r--r--tools/perf/tests/perf-record.c328
-rwxr-xr-xtools/perf/tests/perf-targz-src-pkg22
-rw-r--r--tools/perf/tests/pmu.c178
-rw-r--r--tools/perf/tests/python-use.c24
-rw-r--r--tools/perf/tests/sample-parsing.c353
-rw-r--r--tools/perf/tests/sdt.c118
-rw-r--r--tools/perf/tests/shell/lib/probe.sh6
-rw-r--r--tools/perf/tests/shell/lib/probe_vfs_getname.sh24
-rwxr-xr-xtools/perf/tests/shell/probe_vfs_getname.sh14
-rwxr-xr-xtools/perf/tests/shell/record+probe_libc_inet_pton.sh94
-rwxr-xr-xtools/perf/tests/shell/record+script_probe_vfs_getname.sh41
-rwxr-xr-xtools/perf/tests/shell/trace+probe_vfs_getname.sh40
-rw-r--r--tools/perf/tests/stat.c112
-rw-r--r--tools/perf/tests/sw-clock.c144
-rw-r--r--tools/perf/tests/switch-tracking.c580
-rw-r--r--tools/perf/tests/task-exit.c144
-rw-r--r--tools/perf/tests/tests.h118
-rw-r--r--tools/perf/tests/thread-map.c140
-rw-r--r--tools/perf/tests/thread-mg-share.c99
-rw-r--r--tools/perf/tests/topology.c141
-rw-r--r--tools/perf/tests/unit_number__scnprintf.c39
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c244
-rw-r--r--tools/perf/trace/beauty/Build11
-rw-r--r--tools/perf/trace/beauty/arch_errno_names.c1
-rwxr-xr-xtools/perf/trace/beauty/arch_errno_names.sh100
-rw-r--r--tools/perf/trace/beauty/beauty.h125
-rw-r--r--tools/perf/trace/beauty/clone.c75
-rwxr-xr-xtools/perf/trace/beauty/drm_ioctl.sh14
-rw-r--r--tools/perf/trace/beauty/eventfd.c37
-rw-r--r--tools/perf/trace/beauty/fcntl.c100
-rw-r--r--tools/perf/trace/beauty/flock.c49
-rw-r--r--tools/perf/trace/beauty/futex_op.c61
-rw-r--r--tools/perf/trace/beauty/futex_val3.c18
-rw-r--r--tools/perf/trace/beauty/ioctl.c162
-rw-r--r--tools/perf/trace/beauty/kcmp.c44
-rwxr-xr-xtools/perf/trace/beauty/kcmp_type.sh10
-rwxr-xr-xtools/perf/trace/beauty/kvm_ioctl.sh11
-rwxr-xr-xtools/perf/trace/beauty/madvise_behavior.sh10
-rw-r--r--tools/perf/trace/beauty/mmap.c121
-rw-r--r--tools/perf/trace/beauty/mode_t.c69
-rw-r--r--tools/perf/trace/beauty/msg_flags.c62
-rw-r--r--tools/perf/trace/beauty/open_flags.c85
-rw-r--r--tools/perf/trace/beauty/perf_event_open.c44
-rwxr-xr-xtools/perf/trace/beauty/perf_ioctl.sh10
-rw-r--r--tools/perf/trace/beauty/pid.c20
-rw-r--r--tools/perf/trace/beauty/pkey_alloc.c50
-rwxr-xr-xtools/perf/trace/beauty/pkey_alloc_access_rights.sh10
-rw-r--r--tools/perf/trace/beauty/prctl.c82
-rwxr-xr-xtools/perf/trace/beauty/prctl_option.sh17
-rw-r--r--tools/perf/trace/beauty/sched_policy.c48
-rw-r--r--tools/perf/trace/beauty/seccomp.c51
-rw-r--r--tools/perf/trace/beauty/signum.c55
-rwxr-xr-xtools/perf/trace/beauty/sndrv_ctl_ioctl.sh8
-rwxr-xr-xtools/perf/trace/beauty/sndrv_pcm_ioctl.sh8
-rw-r--r--tools/perf/trace/beauty/socket.c28
-rwxr-xr-xtools/perf/trace/beauty/socket_ipproto.sh11
-rw-r--r--tools/perf/trace/beauty/socket_type.c61
-rw-r--r--tools/perf/trace/beauty/statx.c72
-rwxr-xr-xtools/perf/trace/beauty/vhost_virtio_ioctl.sh17
-rw-r--r--tools/perf/trace/beauty/waitid_options.c27
-rw-r--r--tools/perf/trace/strace/groups/file20
-rw-r--r--tools/perf/ui/Build14
-rw-r--r--tools/perf/ui/browser.c793
-rw-r--r--tools/perf/ui/browser.h84
-rw-r--r--tools/perf/ui/browsers/Build10
-rw-r--r--tools/perf/ui/browsers/annotate.c940
-rw-r--r--tools/perf/ui/browsers/header.c131
-rw-r--r--tools/perf/ui/browsers/hists.c3310
-rw-r--r--tools/perf/ui/browsers/hists.h38
-rw-r--r--tools/perf/ui/browsers/map.c132
-rw-r--r--tools/perf/ui/browsers/map.h7
-rw-r--r--tools/perf/ui/browsers/scripts.c188
-rw-r--r--tools/perf/ui/gtk/Build9
-rw-r--r--tools/perf/ui/gtk/annotate.c260
-rw-r--r--tools/perf/ui/gtk/browser.c88
-rw-r--r--tools/perf/ui/gtk/gtk.h68
-rw-r--r--tools/perf/ui/gtk/helpline.c58
-rw-r--r--tools/perf/ui/gtk/hists.c681
-rw-r--r--tools/perf/ui/gtk/progress.c60
-rw-r--r--tools/perf/ui/gtk/setup.c24
-rw-r--r--tools/perf/ui/gtk/util.c114
-rw-r--r--tools/perf/ui/helpline.c85
-rw-r--r--tools/perf/ui/helpline.h31
-rw-r--r--tools/perf/ui/hist.c822
-rw-r--r--tools/perf/ui/keysyms.h29
-rw-r--r--tools/perf/ui/libslang.h33
-rw-r--r--tools/perf/ui/progress.c48
-rw-r--r--tools/perf/ui/progress.h34
-rw-r--r--tools/perf/ui/setup.c120
-rw-r--r--tools/perf/ui/stdio/hist.c850
-rw-r--r--tools/perf/ui/tui/Build4
-rw-r--r--tools/perf/ui/tui/helpline.c62
-rw-r--r--tools/perf/ui/tui/progress.c88
-rw-r--r--tools/perf/ui/tui/setup.c178
-rw-r--r--tools/perf/ui/tui/tui.h7
-rw-r--r--tools/perf/ui/tui/util.c257
-rw-r--r--tools/perf/ui/ui.h34
-rw-r--r--tools/perf/ui/util.c85
-rw-r--r--tools/perf/ui/util.h22
-rw-r--r--tools/perf/util/Build224
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN51
-rw-r--r--tools/perf/util/annotate.c2878
-rw-r--r--tools/perf/util/annotate.h403
-rw-r--r--tools/perf/util/arm-spe-pkt-decoder.c462
-rw-r--r--tools/perf/util/arm-spe-pkt-decoder.h43
-rw-r--r--tools/perf/util/arm-spe.c231
-rw-r--r--tools/perf/util/arm-spe.h31
-rw-r--r--tools/perf/util/auxtrace.c2166
-rw-r--r--tools/perf/util/auxtrace.h725
-rw-r--r--tools/perf/util/block-range.c329
-rw-r--r--tools/perf/util/block-range.h72
-rw-r--r--tools/perf/util/bpf-loader.c1801
-rw-r--r--tools/perf/util/bpf-loader.h214
-rw-r--r--tools/perf/util/bpf-prologue.c502
-rw-r--r--tools/perf/util/bpf-prologue.h37
-rw-r--r--tools/perf/util/branch.c147
-rw-r--r--tools/perf/util/branch.h25
-rw-r--r--tools/perf/util/build-id.c876
-rw-r--r--tools/perf/util/build-id.h59
-rw-r--r--tools/perf/util/c++/Build2
-rw-r--r--tools/perf/util/c++/clang-c.h45
-rw-r--r--tools/perf/util/c++/clang-test.cpp63
-rw-r--r--tools/perf/util/c++/clang.cpp212
-rw-r--r--tools/perf/util/c++/clang.h27
-rw-r--r--tools/perf/util/cache.h31
-rw-r--r--tools/perf/util/call-path.c122
-rw-r--r--tools/perf/util/call-path.h77
-rw-r--r--tools/perf/util/callchain.c1579
-rw-r--r--tools/perf/util/callchain.h302
-rw-r--r--tools/perf/util/cgroup.c251
-rw-r--r--tools/perf/util/cgroup.h29
-rw-r--r--tools/perf/util/cloexec.c104
-rw-r--r--tools/perf/util/cloexec.h7
-rw-r--r--tools/perf/util/color.c222
-rw-r--r--tools/perf/util/color.h48
-rw-r--r--tools/perf/util/comm.c152
-rw-r--r--tools/perf/util/comm.h28
-rw-r--r--tools/perf/util/compress.h15
-rw-r--r--tools/perf/util/config.c829
-rw-r--r--tools/perf/util/config.h70
-rw-r--r--tools/perf/util/counts.c55
-rw-r--r--tools/perf/util/counts.h39
-rw-r--r--tools/perf/util/cpumap.c735
-rw-r--r--tools/perf/util/cpumap.h77
-rw-r--r--tools/perf/util/cs-etm-decoder/Build1
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c562
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.h108
-rw-r--r--tools/perf/util/cs-etm.c1507
-rw-r--r--tools/perf/util/cs-etm.h85
-rw-r--r--tools/perf/util/ctype.c49
-rw-r--r--tools/perf/util/data-convert-bt.c1677
-rw-r--r--tools/perf/util/data-convert-bt.h11
-rw-r--r--tools/perf/util/data-convert.h10
-rw-r--r--tools/perf/util/data.c189
-rw-r--r--tools/perf/util/data.h66
-rw-r--r--tools/perf/util/db-export.c507
-rw-r--r--tools/perf/util/db-export.h109
-rw-r--r--tools/perf/util/debug.c284
-rw-r--r--tools/perf/util/debug.h64
-rw-r--r--tools/perf/util/demangle-java.c202
-rw-r--r--tools/perf/util/demangle-java.h11
-rw-r--r--tools/perf/util/demangle-rust.c270
-rw-r--r--tools/perf/util/demangle-rust.h8
-rw-r--r--tools/perf/util/drv_configs.c78
-rw-r--r--tools/perf/util/drv_configs.h26
-rw-r--r--tools/perf/util/dso.c1541
-rw-r--r--tools/perf/util/dso.h382
-rw-r--r--tools/perf/util/dump-insn.c15
-rw-r--r--tools/perf/util/dump-insn.h23
-rw-r--r--tools/perf/util/dwarf-aux.c1353
-rw-r--r--tools/perf/util/dwarf-aux.h142
-rw-r--r--tools/perf/util/dwarf-regs.c61
-rw-r--r--tools/perf/util/env.c177
-rw-r--r--tools/perf/util/env.h82
-rw-r--r--tools/perf/util/event.c1718
-rw-r--r--tools/perf/util/event.h833
-rw-r--r--tools/perf/util/evlist.c1816
-rw-r--r--tools/perf/util/evlist.h315
-rw-r--r--tools/perf/util/evsel.c2954
-rw-r--r--tools/perf/util/evsel.h484
-rw-r--r--tools/perf/util/evsel_fprintf.c236
-rw-r--r--tools/perf/util/expr.h26
-rw-r--r--tools/perf/util/expr.y236
-rw-r--r--tools/perf/util/find-vdso-map.c31
-rw-r--r--tools/perf/util/genelf.c554
-rw-r--r--tools/perf/util/genelf.h65
-rw-r--r--tools/perf/util/genelf_debug.c611
-rwxr-xr-xtools/perf/util/generate-cmdlist.sh55
-rw-r--r--tools/perf/util/get_current_dir_name.c18
-rw-r--r--tools/perf/util/group.h8
-rw-r--r--tools/perf/util/header.c3946
-rw-r--r--tools/perf/util/header.h180
-rw-r--r--tools/perf/util/help-unknown-cmd.c116
-rw-r--r--tools/perf/util/help-unknown-cmd.h0
-rw-r--r--tools/perf/util/hist.c2650
-rw-r--r--tools/perf/util/hist.h527
-rw-r--r--tools/perf/util/include/asm/asm-offsets.h2
-rw-r--r--tools/perf/util/include/asm/cpufeature.h10
-rw-r--r--tools/perf/util/include/asm/dwarf2.h14
-rw-r--r--tools/perf/util/include/asm/swab.h1
-rw-r--r--tools/perf/util/include/asm/system.h1
-rw-r--r--tools/perf/util/include/asm/uaccess.h15
-rw-r--r--tools/perf/util/include/dwarf-regs.h23
-rw-r--r--tools/perf/util/include/linux/ctype.h1
-rw-r--r--tools/perf/util/include/linux/linkage.h14
-rw-r--r--tools/perf/util/intel-bts.c952
-rw-r--r--tools/perf/util/intel-bts.h43
-rw-r--r--tools/perf/util/intel-pt-decoder/Build36
-rw-r--r--tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk392
-rw-r--r--tools/perf/util/intel-pt-decoder/inat.c96
-rw-r--r--tools/perf/util/intel-pt-decoder/inat.h244
-rw-r--r--tools/perf/util/intel-pt-decoder/inat_types.h29
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.c606
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.h244
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c2727
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h133
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c274
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h63
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.c156
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-log.h78
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c638
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h77
-rw-r--r--tools/perf/util/intel-pt-decoder/x86-opcode-map.txt1072
-rw-r--r--tools/perf/util/intel-pt.c2636
-rw-r--r--tools/perf/util/intel-pt.h56
-rw-r--r--tools/perf/util/intlist.c146
-rw-r--r--tools/perf/util/intlist.h78
-rw-r--r--tools/perf/util/jit.h12
-rw-r--r--tools/perf/util/jitdump.c793
-rw-r--r--tools/perf/util/jitdump.h139
-rw-r--r--tools/perf/util/kvm-stat.h147
-rw-r--r--tools/perf/util/levenshtein.c87
-rw-r--r--tools/perf/util/levenshtein.h9
-rw-r--r--tools/perf/util/libunwind/arm64.c41
-rw-r--r--tools/perf/util/libunwind/x86_32.c44
-rw-r--r--tools/perf/util/llvm-utils.c579
-rw-r--r--tools/perf/util/llvm-utils.h69
-rw-r--r--tools/perf/util/lzma.c123
-rw-r--r--tools/perf/util/machine.c2607
-rw-r--r--tools/perf/util/machine.h296
-rw-r--r--tools/perf/util/map.c920
-rw-r--r--tools/perf/util/map.h255
-rw-r--r--tools/perf/util/mem-events.c452
-rw-r--r--tools/perf/util/mem-events.h75
-rw-r--r--tools/perf/util/mem2node.c135
-rw-r--r--tools/perf/util/mem2node.h19
-rw-r--r--tools/perf/util/memswap.c25
-rw-r--r--tools/perf/util/memswap.h8
-rw-r--r--tools/perf/util/metricgroup.c540
-rw-r--r--tools/perf/util/metricgroup.h32
-rw-r--r--tools/perf/util/mmap.c339
-rw-r--r--tools/perf/util/mmap.h102
-rw-r--r--tools/perf/util/namespaces.c265
-rw-r--r--tools/perf/util/namespaces.h67
-rw-r--r--tools/perf/util/ordered-events.c322
-rw-r--r--tools/perf/util/ordered-events.h66
-rw-r--r--tools/perf/util/parse-branch-options.c106
-rw-r--r--tools/perf/util/parse-branch-options.h7
-rw-r--r--tools/perf/util/parse-events.c2768
-rw-r--r--tools/perf/util/parse-events.h234
-rw-r--r--tools/perf/util/parse-events.l378
-rw-r--r--tools/perf/util/parse-events.y714
-rw-r--r--tools/perf/util/parse-regs-options.c72
-rw-r--r--tools/perf/util/parse-regs-options.h6
-rw-r--r--tools/perf/util/path.c93
-rw-r--r--tools/perf/util/path.h13
-rw-r--r--tools/perf/util/perf-hooks-list.h3
-rw-r--r--tools/perf/util/perf-hooks.c90
-rw-r--r--tools/perf/util/perf-hooks.h40
-rw-r--r--tools/perf/util/perf_regs.c40
-rw-r--r--tools/perf/util/perf_regs.h47
-rw-r--r--tools/perf/util/pmu.c1553
-rw-r--r--tools/perf/util/pmu.h98
-rw-r--r--tools/perf/util/pmu.l43
-rw-r--r--tools/perf/util/pmu.y92
-rw-r--r--tools/perf/util/print_binary.c58
-rw-r--r--tools/perf/util/print_binary.h37
-rw-r--r--tools/perf/util/probe-event.c3547
-rw-r--r--tools/perf/util/probe-event.h193
-rw-r--r--tools/perf/util/probe-file.c1066
-rw-r--r--tools/perf/util/probe-file.h79
-rw-r--r--tools/perf/util/probe-finder.c1910
-rw-r--r--tools/perf/util/probe-finder.h122
-rw-r--r--tools/perf/util/pstack.c84
-rw-r--r--tools/perf/util/pstack.h16
-rw-r--r--tools/perf/util/python-ext-sources33
-rw-r--r--tools/perf/util/python.c1377
-rw-r--r--tools/perf/util/rb_resort.h151
-rw-r--r--tools/perf/util/rblist.c133
-rw-r--r--tools/perf/util/rblist.h50
-rw-r--r--tools/perf/util/record.c326
-rw-r--r--tools/perf/util/rwsem.c32
-rw-r--r--tools/perf/util/rwsem.h19
-rw-r--r--tools/perf/util/s390-cpumsf-kernel.h71
-rw-r--r--tools/perf/util/s390-cpumsf.c950
-rw-r--r--tools/perf/util/s390-cpumsf.h21
-rw-r--r--tools/perf/util/sane_ctype.h52
-rw-r--r--tools/perf/util/scripting-engines/Build6
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c752
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c1784
-rw-r--r--tools/perf/util/session.c2251
-rw-r--r--tools/perf/util/session.h132
-rw-r--r--tools/perf/util/setns.c8
-rw-r--r--tools/perf/util/setup.py69
-rw-r--r--tools/perf/util/smt.c44
-rw-r--r--tools/perf/util/smt.h6
-rw-r--r--tools/perf/util/sort.c3015
-rw-r--r--tools/perf/util/sort.h303
-rw-r--r--tools/perf/util/srcline.c720
-rw-r--r--tools/perf/util/srcline.h54
-rw-r--r--tools/perf/util/stat-shadow.c999
-rw-r--r--tools/perf/util/stat.c456
-rw-r--r--tools/perf/util/stat.h175
-rw-r--r--tools/perf/util/strbuf.c164
-rw-r--r--tools/perf/util/strbuf.h95
-rw-r--r--tools/perf/util/strfilter.c311
-rw-r--r--tools/perf/util/strfilter.h84
-rw-r--r--tools/perf/util/string.c444
-rw-r--r--tools/perf/util/string2.h45
-rw-r--r--tools/perf/util/strlist.c209
-rw-r--r--tools/perf/util/strlist.h90
-rw-r--r--tools/perf/util/svghelper.c809
-rw-r--r--tools/perf/util/svghelper.h37
-rw-r--r--tools/perf/util/symbol-elf.c2251
-rw-r--r--tools/perf/util/symbol-minimal.c383
-rw-r--r--tools/perf/util/symbol.c2257
-rw-r--r--tools/perf/util/symbol.h401
-rw-r--r--tools/perf/util/symbol_fprintf.c73
-rw-r--r--tools/perf/util/syscalltbl.c183
-rw-r--r--tools/perf/util/syscalltbl.h24
-rw-r--r--tools/perf/util/target.c152
-rw-r--r--tools/perf/util/target.h87
-rw-r--r--tools/perf/util/term.c40
-rw-r--r--tools/perf/util/term.h11
-rw-r--r--tools/perf/util/thread-stack.c652
-rw-r--r--tools/perf/util/thread-stack.h102
-rw-r--r--tools/perf/util/thread.c416
-rw-r--r--tools/perf/util/thread.h138
-rw-r--r--tools/perf/util/thread_map.c489
-rw-r--r--tools/perf/util/thread_map.h65
-rw-r--r--tools/perf/util/time-utils.c422
-rw-r--r--tools/perf/util/time-utils.h30
-rw-r--r--tools/perf/util/tool.h83
-rw-r--r--tools/perf/util/top.c117
-rw-r--r--tools/perf/util/top.h49
-rw-r--r--tools/perf/util/trace-event-info.c618
-rw-r--r--tools/perf/util/trace-event-parse.c257
-rw-r--r--tools/perf/util/trace-event-read.c498
-rw-r--r--tools/perf/util/trace-event-scripting.c176
-rw-r--r--tools/perf/util/trace-event.c120
-rw-r--r--tools/perf/util/trace-event.h105
-rw-r--r--tools/perf/util/trigger.h96
-rw-r--r--tools/perf/util/tsc.c31
-rw-r--r--tools/perf/util/tsc.h32
-rw-r--r--tools/perf/util/units.c69
-rw-r--r--tools/perf/util/units.h18
-rw-r--r--tools/perf/util/unwind-libdw.c256
-rw-r--r--tools/perf/util/unwind-libdw.h26
-rw-r--r--tools/perf/util/unwind-libunwind-local.c739
-rw-r--r--tools/perf/util/unwind-libunwind.c84
-rw-r--r--tools/perf/util/unwind.h85
-rw-r--r--tools/perf/util/usage.c26
-rw-r--r--tools/perf/util/util-cxx.h27
-rw-r--r--tools/perf/util/util.c508
-rw-r--r--tools/perf/util/util.h87
-rw-r--r--tools/perf/util/values.c304
-rw-r--r--tools/perf/util/values.h28
-rw-r--r--tools/perf/util/vdso.c358
-rw-r--r--tools/perf/util/vdso.h30
-rw-r--r--tools/perf/util/xyarray.c33
-rw-r--r--tools/perf/util/xyarray.h36
-rw-r--r--tools/perf/util/zlib.c99
-rw-r--r--tools/power/acpi/.gitignore4
-rw-r--r--tools/power/acpi/Makefile27
-rw-r--r--tools/power/acpi/Makefile.config91
-rw-r--r--tools/power/acpi/Makefile.rules51
-rw-r--r--tools/power/acpi/common/cmfsize.c68
-rw-r--r--tools/power/acpi/common/getopt.c205
-rw-r--r--tools/power/acpi/man/acpidump.8127
-rw-r--r--tools/power/acpi/os_specific/service_layers/oslinuxtbl.c1372
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixdir.c170
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixmap.c117
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixxf.c1317
-rw-r--r--tools/power/acpi/tools/acpidbg/Makefile25
-rw-r--r--tools/power/acpi/tools/acpidbg/acpidbg.c444
-rw-r--r--tools/power/acpi/tools/acpidump/Makefile56
-rw-r--r--tools/power/acpi/tools/acpidump/acpidump.h85
-rw-r--r--tools/power/acpi/tools/acpidump/apdump.c402
-rw-r--r--tools/power/acpi/tools/acpidump/apfiles.c223
-rw-r--r--tools/power/acpi/tools/acpidump/apmain.c366
-rw-r--r--tools/power/acpi/tools/ec/Makefile17
-rw-r--r--tools/power/acpi/tools/ec/ec_access.c238
-rw-r--r--tools/power/cpupower/.gitignore28
-rw-r--r--tools/power/cpupower/Makefile334
-rw-r--r--tools/power/cpupower/README47
-rw-r--r--tools/power/cpupower/ToDo10
-rw-r--r--tools/power/cpupower/bench/Makefile43
-rw-r--r--tools/power/cpupower/bench/README-BENCH124
-rw-r--r--tools/power/cpupower/bench/benchmark.c194
-rw-r--r--tools/power/cpupower/bench/benchmark.h29
-rw-r--r--tools/power/cpupower/bench/config.h36
-rw-r--r--tools/power/cpupower/bench/cpufreq-bench_plot.sh104
-rw-r--r--tools/power/cpupower/bench/cpufreq-bench_script.sh101
-rw-r--r--tools/power/cpupower/bench/example.cfg11
-rw-r--r--tools/power/cpupower/bench/main.c202
-rw-r--r--tools/power/cpupower/bench/parse.c238
-rw-r--r--tools/power/cpupower/bench/parse.h53
-rw-r--r--tools/power/cpupower/bench/system.c192
-rw-r--r--tools/power/cpupower/bench/system.h29
-rw-r--r--tools/power/cpupower/debug/i386/Makefile42
-rw-r--r--tools/power/cpupower/debug/i386/centrino-decode.c113
-rw-r--r--tools/power/cpupower/debug/i386/dump_psb.c196
-rw-r--r--tools/power/cpupower/debug/i386/intel_gsic.c78
-rw-r--r--tools/power/cpupower/debug/i386/powernow-k8-decode.c96
-rw-r--r--tools/power/cpupower/debug/kernel/Makefile23
-rw-r--r--tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c110
-rw-r--r--tools/power/cpupower/debug/x86_64/Makefile31
-rw-r--r--tools/power/cpupower/lib/cpufreq.c716
-rw-r--r--tools/power/cpupower/lib/cpufreq.h212
-rw-r--r--tools/power/cpupower/lib/cpuidle.c380
-rw-r--r--tools/power/cpupower/lib/cpuidle.h24
-rw-r--r--tools/power/cpupower/lib/cpupower.c192
-rw-r--r--tools/power/cpupower/lib/cpupower.h36
-rw-r--r--tools/power/cpupower/lib/cpupower_intern.h6
-rw-r--r--tools/power/cpupower/man/cpupower-frequency-info.177
-rw-r--r--tools/power/cpupower/man/cpupower-frequency-set.152
-rw-r--r--tools/power/cpupower/man/cpupower-idle-info.191
-rw-r--r--tools/power/cpupower/man/cpupower-idle-set.179
-rw-r--r--tools/power/cpupower/man/cpupower-info.119
-rw-r--r--tools/power/cpupower/man/cpupower-monitor.1198
-rw-r--r--tools/power/cpupower/man/cpupower-set.165
-rw-r--r--tools/power/cpupower/man/cpupower.172
-rw-r--r--tools/power/cpupower/po/cs.po944
-rw-r--r--tools/power/cpupower/po/de.po961
-rw-r--r--tools/power/cpupower/po/fr.po947
-rw-r--r--tools/power/cpupower/po/it.po961
-rw-r--r--tools/power/cpupower/po/pt.po957
-rw-r--r--tools/power/cpupower/utils/builtin.h13
-rw-r--r--tools/power/cpupower/utils/cpufreq-info.c648
-rw-r--r--tools/power/cpupower/utils/cpufreq-set.c333
-rw-r--r--tools/power/cpupower/utils/cpuidle-info.c214
-rw-r--r--tools/power/cpupower/utils/cpuidle-set.c185
-rw-r--r--tools/power/cpupower/utils/cpupower-info.c106
-rw-r--r--tools/power/cpupower/utils/cpupower-set.c99
-rw-r--r--tools/power/cpupower/utils/cpupower.c238
-rw-r--r--tools/power/cpupower/utils/helpers/amd.c156
-rw-r--r--tools/power/cpupower/utils/helpers/bitmask.c293
-rw-r--r--tools/power/cpupower/utils/helpers/bitmask.h34
-rw-r--r--tools/power/cpupower/utils/helpers/cpuid.c180
-rw-r--r--tools/power/cpupower/utils/helpers/helpers.h174
-rw-r--r--tools/power/cpupower/utils/helpers/misc.c43
-rw-r--r--tools/power/cpupower/utils/helpers/msr.c116
-rw-r--r--tools/power/cpupower/utils/helpers/pci.c63
-rw-r--r--tools/power/cpupower/utils/helpers/sysfs.c472
-rw-r--r--tools/power/cpupower/utils/helpers/sysfs.h39
-rw-r--r--tools/power/cpupower/utils/helpers/topology.c22
-rw-r--r--tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c335
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c214
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c469
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h94
-rw-r--r--tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c195
-rw-r--r--tools/power/cpupower/utils/idle_monitor/idle_monitors.def8
-rw-r--r--tools/power/cpupower/utils/idle_monitor/idle_monitors.h18
-rw-r--r--tools/power/cpupower/utils/idle_monitor/mperf_monitor.c340
-rw-r--r--tools/power/cpupower/utils/idle_monitor/nhm_idle.c216
-rw-r--r--tools/power/cpupower/utils/idle_monitor/snb_idle.c200
-rwxr-xr-xtools/power/cpupower/utils/version-gen.sh36
-rw-r--r--tools/power/pm-graph/Makefile47
-rw-r--r--tools/power/pm-graph/bootgraph.8173
-rwxr-xr-xtools/power/pm-graph/bootgraph.py1085
-rw-r--r--tools/power/pm-graph/config/cgskip.txt65
-rw-r--r--tools/power/pm-graph/config/custom-timeline-functions.cfg205
-rw-r--r--tools/power/pm-graph/config/example.cfg133
-rw-r--r--tools/power/pm-graph/config/freeze-callgraph.cfg94
-rw-r--r--tools/power/pm-graph/config/freeze-dev.cfg93
-rw-r--r--tools/power/pm-graph/config/freeze.cfg93
-rw-r--r--tools/power/pm-graph/config/standby-callgraph.cfg94
-rw-r--r--tools/power/pm-graph/config/standby-dev.cfg93
-rw-r--r--tools/power/pm-graph/config/standby.cfg93
-rw-r--r--tools/power/pm-graph/config/suspend-callgraph.cfg98
-rw-r--r--tools/power/pm-graph/config/suspend-dev.cfg93
-rw-r--r--tools/power/pm-graph/config/suspend-x2-proc.cfg93
-rw-r--r--tools/power/pm-graph/config/suspend.cfg93
-rw-r--r--tools/power/pm-graph/sleepgraph.8292
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py6057
-rwxr-xr-xtools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py617
-rw-r--r--tools/power/x86/turbostat/.gitignore1
-rw-r--r--tools/power/x86/turbostat/Makefile28
-rw-r--r--tools/power/x86/turbostat/turbostat.8351
-rw-r--r--tools/power/x86/turbostat/turbostat.c5620
-rw-r--r--tools/power/x86/x86_energy_perf_policy/Makefile28
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8213
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c1430
-rw-r--r--tools/scripts/Makefile.arch41
-rw-r--r--tools/scripts/Makefile.include134
-rw-r--r--tools/scripts/utilities.mak179
-rw-r--r--tools/spi/.gitignore2
-rw-r--r--tools/spi/Build2
-rw-r--r--tools/spi/Makefile64
-rw-r--r--tools/spi/spidev_fdx.c159
-rw-r--r--tools/spi/spidev_test.c483
-rw-r--r--tools/testing/fault-injection/failcmd.sh220
-rwxr-xr-xtools/testing/ktest/compare-ktest-sample.pl33
-rwxr-xr-xtools/testing/ktest/config-bisect.pl770
-rw-r--r--tools/testing/ktest/examples/README32
-rw-r--r--tools/testing/ktest/examples/crosstests.conf225
-rw-r--r--tools/testing/ktest/examples/include/bisect.conf90
-rw-r--r--tools/testing/ktest/examples/include/defaults.conf157
-rw-r--r--tools/testing/ktest/examples/include/min-config.conf60
-rw-r--r--tools/testing/ktest/examples/include/patchcheck.conf111
-rw-r--r--tools/testing/ktest/examples/include/tests.conf74
-rw-r--r--tools/testing/ktest/examples/kvm.conf92
-rw-r--r--tools/testing/ktest/examples/snowball.conf53
-rw-r--r--tools/testing/ktest/examples/test.conf62
-rwxr-xr-xtools/testing/ktest/ktest.pl4425
-rw-r--r--tools/testing/ktest/sample.conf1348
-rw-r--r--tools/testing/nvdimm/Kbuild84
-rw-r--r--tools/testing/nvdimm/Makefile8
-rw-r--r--tools/testing/nvdimm/acpi_nfit_test.c8
-rw-r--r--tools/testing/nvdimm/config_check.c18
-rw-r--r--tools/testing/nvdimm/dax-dev.c49
-rw-r--r--tools/testing/nvdimm/device_dax_test.c8
-rw-r--r--tools/testing/nvdimm/libnvdimm_test.c8
-rw-r--r--tools/testing/nvdimm/pmem-dax.c57
-rw-r--r--tools/testing/nvdimm/pmem_test.c8
-rw-r--r--tools/testing/nvdimm/test/Kbuild9
-rw-r--r--tools/testing/nvdimm/test/iomap.c403
-rw-r--r--tools/testing/nvdimm/test/nfit.c2957
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h249
-rw-r--r--tools/testing/nvdimm/watermark.h21
-rw-r--r--tools/testing/radix-tree/.gitignore6
-rw-r--r--tools/testing/radix-tree/Makefile51
-rw-r--r--tools/testing/radix-tree/benchmark.c257
-rw-r--r--tools/testing/radix-tree/generated/autoconf.h1
-rw-r--r--tools/testing/radix-tree/idr-test.c512
-rw-r--r--tools/testing/radix-tree/iteration_check.c221
-rw-r--r--tools/testing/radix-tree/linux.c112
-rw-r--r--tools/testing/radix-tree/linux/bug.h1
-rw-r--r--tools/testing/radix-tree/linux/compiler_types.h0
-rw-r--r--tools/testing/radix-tree/linux/cpu.h1
-rw-r--r--tools/testing/radix-tree/linux/gfp.h33
-rw-r--r--tools/testing/radix-tree/linux/idr.h1
-rw-r--r--tools/testing/radix-tree/linux/init.h1
-rw-r--r--tools/testing/radix-tree/linux/kernel.h20
-rw-r--r--tools/testing/radix-tree/linux/kmemleak.h1
-rw-r--r--tools/testing/radix-tree/linux/percpu.h11
-rw-r--r--tools/testing/radix-tree/linux/preempt.h15
-rw-r--r--tools/testing/radix-tree/linux/radix-tree.h27
-rw-r--r--tools/testing/radix-tree/linux/rcupdate.h10
-rw-r--r--tools/testing/radix-tree/linux/slab.h27
-rw-r--r--tools/testing/radix-tree/linux/xarray.h2
-rw-r--r--tools/testing/radix-tree/main.c388
-rw-r--r--tools/testing/radix-tree/multiorder.c719
-rw-r--r--tools/testing/radix-tree/regression.h9
-rw-r--r--tools/testing/radix-tree/regression1.c221
-rw-r--r--tools/testing/radix-tree/regression2.c123
-rw-r--r--tools/testing/radix-tree/regression3.c118
-rw-r--r--tools/testing/radix-tree/tag_check.c380
-rw-r--r--tools/testing/radix-tree/test.c334
-rw-r--r--tools/testing/radix-tree/test.h65
-rw-r--r--tools/testing/scatterlist/Makefile30
-rw-r--r--tools/testing/scatterlist/linux/mm.h125
-rw-r--r--tools/testing/scatterlist/main.c79
-rw-r--r--tools/testing/selftests/.gitignore5
-rw-r--r--tools/testing/selftests/Makefile167
-rw-r--r--tools/testing/selftests/android/Makefile38
-rw-r--r--tools/testing/selftests/android/config5
-rw-r--r--tools/testing/selftests/android/ion/.gitignore3
-rw-r--r--tools/testing/selftests/android/ion/Makefile19
-rw-r--r--tools/testing/selftests/android/ion/README101
-rw-r--r--tools/testing/selftests/android/ion/ion.h143
-rwxr-xr-xtools/testing/selftests/android/ion/ion_test.sh58
-rw-r--r--tools/testing/selftests/android/ion/ionapp_export.c136
-rw-r--r--tools/testing/selftests/android/ion/ionapp_import.c88
-rw-r--r--tools/testing/selftests/android/ion/ionmap_test.c136
-rw-r--r--tools/testing/selftests/android/ion/ionutils.c253
-rw-r--r--tools/testing/selftests/android/ion/ionutils.h55
-rw-r--r--tools/testing/selftests/android/ion/ipcsocket.c227
-rw-r--r--tools/testing/selftests/android/ion/ipcsocket.h35
-rwxr-xr-xtools/testing/selftests/android/run.sh3
-rw-r--r--tools/testing/selftests/bpf/.gitignore21
-rw-r--r--tools/testing/selftests/bpf/Makefile145
-rw-r--r--tools/testing/selftests/bpf/bpf_endian.h57
-rw-r--r--tools/testing/selftests/bpf/bpf_helpers.h336
-rw-r--r--tools/testing/selftests/bpf/bpf_rand.h80
-rw-r--r--tools/testing/selftests/bpf/bpf_rlimit.h28
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h61
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c235
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h18
-rw-r--r--tools/testing/selftests/bpf/config20
-rw-r--r--tools/testing/selftests/bpf/connect4_prog.c45
-rw-r--r--tools/testing/selftests/bpf/connect6_prog.c61
-rw-r--r--tools/testing/selftests/bpf/dev_cgroup.c60
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_kern.c40
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c149
-rw-r--r--tools/testing/selftests/bpf/gnu/stubs.h1
-rw-r--r--tools/testing/selftests/bpf/include/uapi/linux/types.h23
-rw-r--r--tools/testing/selftests/bpf/sample_map_ret0.c34
-rw-r--r--tools/testing/selftests/bpf/sample_ret0.c7
-rw-r--r--tools/testing/selftests/bpf/sendmsg4_prog.c49
-rw-r--r--tools/testing/selftests/bpf/sendmsg6_prog.c59
-rw-r--r--tools/testing/selftests/bpf/socket_cookie_prog.c60
-rw-r--r--tools/testing/selftests/bpf/sockmap_parse_prog.c46
-rw-r--r--tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c33
-rw-r--r--tools/testing/selftests/bpf/sockmap_verdict_prog.c73
-rwxr-xr-xtools/testing/selftests/bpf/tcp_client.py51
-rwxr-xr-xtools/testing/selftests/bpf/tcp_server.py83
-rw-r--r--tools/testing/selftests/bpf/test_adjust_tail.c30
-rw-r--r--tools/testing/selftests/bpf/test_align.c719
-rw-r--r--tools/testing/selftests/bpf/test_btf.c2840
-rw-r--r--tools/testing/selftests/bpf/test_btf_haskv.c45
-rw-r--r--tools/testing/selftests/bpf/test_btf_nokv.c43
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c131
-rw-r--r--tools/testing/selftests/bpf/test_dev_cgroup.c96
-rw-r--r--tools/testing/selftests/bpf/test_get_stack_rawtp.c102
-rw-r--r--tools/testing/selftests/bpf/test_iptunnel_common.h37
-rwxr-xr-xtools/testing/selftests/bpf/test_kmod.sh61
-rw-r--r--tools/testing/selftests/bpf/test_l4lb.c473
-rw-r--r--tools/testing/selftests/bpf/test_l4lb_noinline.c473
-rwxr-xr-xtools/testing/selftests/bpf/test_libbpf.sh43
-rw-r--r--tools/testing/selftests/bpf/test_libbpf_open.c152
-rwxr-xr-xtools/testing/selftests/bpf/test_lirc_mode2.sh40
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_kern.c23
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_user.c149
-rw-r--r--tools/testing/selftests/bpf/test_lpm_map.c804
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c646
-rw-r--r--tools/testing/selftests/bpf/test_lwt_seg6local.c437
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh149
-rw-r--r--tools/testing/selftests/bpf/test_maps.c1451
-rw-r--r--tools/testing/selftests/bpf/test_obj_id.c35
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py1288
-rw-r--r--tools/testing/selftests/bpf/test_pkt_access.c65
-rw-r--r--tools/testing/selftests/bpf/test_pkt_md_access.c46
-rw-r--r--tools/testing/selftests/bpf/test_progs.c1756
-rw-r--r--tools/testing/selftests/bpf/test_select_reuseport.c700
-rw-r--r--tools/testing/selftests/bpf/test_select_reuseport_common.h36
-rw-r--r--tools/testing/selftests/bpf/test_select_reuseport_kern.c180
-rwxr-xr-xtools/testing/selftests/bpf/test_skb_cgroup_id.sh62
-rw-r--r--tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c47
-rw-r--r--tools/testing/selftests/bpf/test_skb_cgroup_id_user.c187
-rw-r--r--tools/testing/selftests/bpf/test_sock.c480
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c1441
-rwxr-xr-xtools/testing/selftests/bpf/test_sock_addr.sh57
-rw-r--r--tools/testing/selftests/bpf/test_socket_cookie.c225
-rw-r--r--tools/testing/selftests/bpf/test_sockhash_kern.c5
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c1527
-rw-r--r--tools/testing/selftests/bpf/test_sockmap_kern.c5
-rw-r--r--tools/testing/selftests/bpf/test_sockmap_kern.h363
-rw-r--r--tools/testing/selftests/bpf/test_stacktrace_build_id.c76
-rw-r--r--tools/testing/selftests/bpf/test_stacktrace_map.c75
-rw-r--r--tools/testing/selftests/bpf/test_tag.c202
-rw-r--r--tools/testing/selftests/bpf/test_tcp_estats.c258
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf.h17
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_kern.c121
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_user.c131
-rw-r--r--tools/testing/selftests/bpf/test_tracepoint.c26
-rwxr-xr-xtools/testing/selftests/bpf/test_tunnel.sh731
-rw-r--r--tools/testing/selftests/bpf/test_tunnel_kern.c713
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c13243
-rw-r--r--tools/testing/selftests/bpf/test_verifier_log.c174
-rw-r--r--tools/testing/selftests/bpf/test_xdp.c235
-rw-r--r--tools/testing/selftests/bpf/test_xdp_meta.c53
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_meta.sh52
-rw-r--r--tools/testing/selftests/bpf/test_xdp_noinline.c833
-rw-r--r--tools/testing/selftests/bpf/test_xdp_redirect.c28
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect.sh61
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c214
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h25
-rw-r--r--tools/testing/selftests/bpf/urandom_read.c28
-rw-r--r--tools/testing/selftests/breakpoints/.gitignore2
-rw-r--r--tools/testing/selftests/breakpoints/Makefile16
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test.c401
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test_arm64.c258
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c220
-rw-r--r--tools/testing/selftests/capabilities/.gitignore2
-rw-r--r--tools/testing/selftests/capabilities/Makefile9
-rw-r--r--tools/testing/selftests/capabilities/test_execve.c460
-rw-r--r--tools/testing/selftests/capabilities/validate_cap.c80
-rw-r--r--tools/testing/selftests/cgroup/.gitignore2
-rw-r--r--tools/testing/selftests/cgroup/Makefile12
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c370
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h43
-rw-r--r--tools/testing/selftests/cgroup/test_core.c567
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c1228
-rw-r--r--tools/testing/selftests/cpu-hotplug/Makefile11
-rw-r--r--tools/testing/selftests/cpu-hotplug/config1
-rwxr-xr-xtools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh293
-rw-r--r--tools/testing/selftests/cpufreq/Makefile9
-rw-r--r--tools/testing/selftests/cpufreq/config15
-rwxr-xr-xtools/testing/selftests/cpufreq/cpu.sh85
-rwxr-xr-xtools/testing/selftests/cpufreq/cpufreq.sh242
-rwxr-xr-xtools/testing/selftests/cpufreq/governor.sh154
-rwxr-xr-xtools/testing/selftests/cpufreq/main.sh198
-rwxr-xr-xtools/testing/selftests/cpufreq/module.sh244
-rwxr-xr-xtools/testing/selftests/cpufreq/special-tests.sh116
-rwxr-xr-xtools/testing/selftests/drivers/gpu/drm_mm.sh16
-rwxr-xr-xtools/testing/selftests/drivers/gpu/i915.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh217
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh197
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh189
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh233
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/router_scale.sh167
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh366
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh119
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh117
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh13
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh55
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh18
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh19
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh134
-rwxr-xr-xtools/testing/selftests/drivers/usb/usbip/usbip_test.sh200
-rw-r--r--tools/testing/selftests/efivarfs/.gitignore2
-rw-r--r--tools/testing/selftests/efivarfs/Makefile7
-rw-r--r--tools/testing/selftests/efivarfs/config1
-rw-r--r--tools/testing/selftests/efivarfs/create-read.c39
-rwxr-xr-xtools/testing/selftests/efivarfs/efivarfs.sh215
-rw-r--r--tools/testing/selftests/efivarfs/open-unlink.c134
-rw-r--r--tools/testing/selftests/exec/.gitignore9
-rw-r--r--tools/testing/selftests/exec/Makefile24
-rw-r--r--tools/testing/selftests/exec/execveat.c425
-rw-r--r--tools/testing/selftests/filesystems/.gitignore2
-rw-r--r--tools/testing/selftests/filesystems/Makefile7
-rw-r--r--tools/testing/selftests/filesystems/devpts_pts.c316
-rw-r--r--tools/testing/selftests/filesystems/dnotify_test.c35
-rw-r--r--tools/testing/selftests/firmware/Makefile12
-rw-r--r--tools/testing/selftests/firmware/config5
-rwxr-xr-xtools/testing/selftests/firmware/fw_fallback.sh283
-rwxr-xr-xtools/testing/selftests/firmware/fw_filesystem.sh332
-rwxr-xr-xtools/testing/selftests/firmware/fw_lib.sh205
-rwxr-xr-xtools/testing/selftests/firmware/fw_run_tests.sh70
-rw-r--r--tools/testing/selftests/ftrace/.gitignore1
-rw-r--r--tools/testing/selftests/ftrace/Makefile8
-rw-r--r--tools/testing/selftests/ftrace/README82
-rw-r--r--tools/testing/selftests/ftrace/config9
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest330
-rw-r--r--tools/testing/selftests/ftrace/samples/fail.tc4
-rw-r--r--tools/testing/selftests/ftrace/samples/pass.tc3
-rw-r--r--tools/testing/selftests/ftrace/samples/unresolved.tc4
-rw-r--r--tools/testing/selftests/ftrace/samples/unsupported.tc3
-rw-r--r--tools/testing/selftests/ftrace/samples/untested.tc3
-rw-r--r--tools/testing/selftests/ftrace/samples/xfail.tc3
-rw-r--r--tools/testing/selftests/ftrace/settings1
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/basic1.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/basic2.tc9
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/basic3.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/basic4.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc28
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-enable.tc62
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-pid.tc74
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc62
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc65
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc92
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc53
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc62
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc118
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc123
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc81
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc170
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc186
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions100
-rw-r--r--tools/testing/selftests/ftrace/test.d/instances/instance-event.tc146
-rw-r--r--tools/testing/selftests/ftrace/test.d/instances/instance.tc86
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc13
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc15
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc18
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc48
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc107
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc38
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc37
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc56
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc29
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc17
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc41
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc39
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc43
-rw-r--r--tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc73
-rw-r--r--tools/testing/selftests/ftrace/test.d/template10
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc39
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc54
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc58
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc44
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc50
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc50
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc48
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc54
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc80
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc66
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc61
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc76
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc84
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc74
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc62
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc54
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc49
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc74
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc68
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc66
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc59
-rw-r--r--tools/testing/selftests/futex/Makefile37
-rw-r--r--tools/testing/selftests/futex/README62
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore7
-rw-r--r--tools/testing/selftests/futex/functional/Makefile25
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c412
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c138
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c225
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c128
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c89
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c126
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_wouldblock.c81
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh79
-rw-r--r--tools/testing/selftests/futex/include/atomic.h83
-rw-r--r--tools/testing/selftests/futex/include/futextest.h266
-rw-r--r--tools/testing/selftests/futex/include/logging.h152
-rwxr-xr-xtools/testing/selftests/futex/run.sh33
-rwxr-xr-xtools/testing/selftests/gen_kselftest_tar.sh53
-rw-r--r--tools/testing/selftests/gpio/.gitignore1
-rw-r--r--tools/testing/selftests/gpio/Makefile29
-rw-r--r--tools/testing/selftests/gpio/config2
-rw-r--r--tools/testing/selftests/gpio/gpio-mockup-chardev.c327
-rwxr-xr-xtools/testing/selftests/gpio/gpio-mockup-sysfs.sh135
-rwxr-xr-xtools/testing/selftests/gpio/gpio-mockup.sh206
-rw-r--r--tools/testing/selftests/ia64/.gitignore1
-rw-r--r--tools/testing/selftests/ia64/Makefile9
-rw-r--r--tools/testing/selftests/ia64/aliasing-test.c263
-rw-r--r--tools/testing/selftests/intel_pstate/.gitignore2
-rw-r--r--tools/testing/selftests/intel_pstate/Makefile16
-rw-r--r--tools/testing/selftests/intel_pstate/aperf.c83
-rw-r--r--tools/testing/selftests/intel_pstate/msr.c40
-rwxr-xr-xtools/testing/selftests/intel_pstate/run.sh128
-rw-r--r--tools/testing/selftests/ipc/.gitignore2
-rw-r--r--tools/testing/selftests/ipc/Makefile18
-rw-r--r--tools/testing/selftests/ipc/config2
-rw-r--r--tools/testing/selftests/ipc/msgque.c255
-rw-r--r--tools/testing/selftests/kcmp/.gitignore2
-rw-r--r--tools/testing/selftests/kcmp/Makefile8
-rw-r--r--tools/testing/selftests/kcmp/kcmp_test.c166
-rw-r--r--tools/testing/selftests/kmod/Makefile11
-rw-r--r--tools/testing/selftests/kmod/config7
-rwxr-xr-xtools/testing/selftests/kmod/kmod.sh623
-rw-r--r--tools/testing/selftests/kselftest.h183
-rw-r--r--tools/testing/selftests/kselftest_harness.h779
-rwxr-xr-xtools/testing/selftests/kselftest_install.sh35
-rw-r--r--tools/testing/selftests/kvm/.gitignore6
-rw-r--r--tools/testing/selftests/kvm/Makefile43
-rw-r--r--tools/testing/selftests/kvm/config3
-rw-r--r--tools/testing/selftests/kvm/cr4_cpuid_sync_test.c113
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c308
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h194
-rw-r--r--tools/testing/selftests/kvm/include/sparsebit.h75
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h44
-rw-r--r--tools/testing/selftests/kvm/include/vmx.h552
-rw-r--r--tools/testing/selftests/kvm/include/x86.h1047
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c92
-rw-r--r--tools/testing/selftests/kvm/lib/elf.c197
-rw-r--r--tools/testing/selftests/kvm/lib/io.c158
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c1680
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h72
-rw-r--r--tools/testing/selftests/kvm/lib/sparsebit.c2087
-rw-r--r--tools/testing/selftests/kvm/lib/vmx.c283
-rw-r--r--tools/testing/selftests/kvm/lib/x86.c893
-rw-r--r--tools/testing/selftests/kvm/platform_info_test.c110
-rw-r--r--tools/testing/selftests/kvm/set_sregs_test.c54
-rw-r--r--tools/testing/selftests/kvm/state_test.c196
-rw-r--r--tools/testing/selftests/kvm/sync_regs_test.c237
-rw-r--r--tools/testing/selftests/kvm/vmx_tsc_adjust_test.c175
-rw-r--r--tools/testing/selftests/lib.mk169
-rw-r--r--tools/testing/selftests/lib/Makefile8
-rwxr-xr-xtools/testing/selftests/lib/bitmap.sh19
-rw-r--r--tools/testing/selftests/lib/config3
-rwxr-xr-xtools/testing/selftests/lib/prime_numbers.sh19
-rwxr-xr-xtools/testing/selftests/lib/printf.sh19
-rw-r--r--tools/testing/selftests/locking/Makefile10
-rwxr-xr-xtools/testing/selftests/locking/ww_mutex.sh19
-rw-r--r--tools/testing/selftests/media_tests/.gitignore3
-rw-r--r--tools/testing/selftests/media_tests/Makefile6
-rwxr-xr-xtools/testing/selftests/media_tests/bind_unbind_sample.sh13
-rw-r--r--tools/testing/selftests/media_tests/media_device_open.c82
-rw-r--r--tools/testing/selftests/media_tests/media_device_test.c103
-rwxr-xr-xtools/testing/selftests/media_tests/open_loop_test.sh11
-rw-r--r--tools/testing/selftests/media_tests/regression_test.txt43
-rw-r--r--tools/testing/selftests/media_tests/video_device_test.c101
-rw-r--r--tools/testing/selftests/membarrier/.gitignore1
-rw-r--r--tools/testing/selftests/membarrier/Makefile6
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test.c312
-rw-r--r--tools/testing/selftests/memfd/.gitignore4
-rw-r--r--tools/testing/selftests/memfd/Makefile20
-rw-r--r--tools/testing/selftests/memfd/common.c46
-rw-r--r--tools/testing/selftests/memfd/common.h9
-rw-r--r--tools/testing/selftests/memfd/config1
-rw-r--r--tools/testing/selftests/memfd/fuse_mnt.c111
-rw-r--r--tools/testing/selftests/memfd/fuse_test.c330
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c970
-rwxr-xr-xtools/testing/selftests/memfd/run_fuse_test.sh15
-rwxr-xr-xtools/testing/selftests/memfd/run_hugetlbfs_test.sh68
-rw-r--r--tools/testing/selftests/memory-hotplug/Makefile11
-rw-r--r--tools/testing/selftests/memory-hotplug/config5
-rwxr-xr-xtools/testing/selftests/memory-hotplug/mem-on-off-test.sh291
-rw-r--r--tools/testing/selftests/mount/.gitignore1
-rw-r--r--tools/testing/selftests/mount/Makefile9
-rw-r--r--tools/testing/selftests/mount/config1
-rwxr-xr-xtools/testing/selftests/mount/run_tests.sh12
-rw-r--r--tools/testing/selftests/mount/unprivileged-remount-test.c371
-rw-r--r--tools/testing/selftests/mqueue/.gitignore2
-rw-r--r--tools/testing/selftests/mqueue/Makefile7
-rw-r--r--tools/testing/selftests/mqueue/mq_open_tests.c502
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c752
-rw-r--r--tools/testing/selftests/net/.gitignore16
-rw-r--r--tools/testing/selftests/net/Makefile23
-rw-r--r--tools/testing/selftests/net/config16
-rwxr-xr-xtools/testing/selftests/net/fib-onlink-tests.sh467
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh255
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh1439
-rw-r--r--tools/testing/selftests/net/forwarding/.gitignore1
-rw-r--r--tools/testing/selftests/net/forwarding/README58
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_port_isolation.sh151
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh151
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh104
-rw-r--r--tools/testing/selftests/net/forwarding/config12
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh108
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample35
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath.sh257
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh964
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre.sh160
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bound.sh226
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh132
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh132
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh129
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh283
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_changes.sh271
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_flower.sh137
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh285
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_lib.sh130
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_neigh.sh115
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_nh.sh131
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh94
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan.sh92
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh283
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh132
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_topo_lib.sh101
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_vlan.sh131
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh135
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge.sh113
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan.sh132
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_broadcast.sh233
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh342
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh213
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_chains.sh205
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh25
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh260
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_shblocks.sh125
-rwxr-xr-xtools/testing/selftests/net/in_netns.sh23
-rwxr-xr-xtools/testing/selftests/net/ip6_gre_headroom.sh65
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c810
-rwxr-xr-xtools/testing/selftests/net/msg_zerocopy.sh120
-rwxr-xr-xtools/testing/selftests/net/netdevice.sh205
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh477
-rw-r--r--tools/testing/selftests/net/psock_fanout.c486
-rw-r--r--tools/testing/selftests/net/psock_lib.h158
-rw-r--r--tools/testing/selftests/net/psock_snd.c397
-rwxr-xr-xtools/testing/selftests/net/psock_snd.sh98
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c864
-rw-r--r--tools/testing/selftests/net/reuseaddr_conflict.c114
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c641
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_cpu.c259
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_numa.c258
-rw-r--r--tools/testing/selftests/net/reuseport_dualstack.c210
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh1023
-rwxr-xr-xtools/testing/selftests/net/run_afpackettests27
-rwxr-xr-xtools/testing/selftests/net/run_netsocktests13
-rw-r--r--tools/testing/selftests/net/socket.c93
-rw-r--r--tools/testing/selftests/net/tcp_inq.c189
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c447
-rwxr-xr-xtools/testing/selftests/net/test_bpf.sh11
-rw-r--r--tools/testing/selftests/net/tls.c741
-rw-r--r--tools/testing/selftests/net/udpgso.c685
-rwxr-xr-xtools/testing/selftests/net/udpgso.sh29
-rwxr-xr-xtools/testing/selftests/net/udpgso_bench.sh71
-rw-r--r--tools/testing/selftests/net/udpgso_bench_rx.c265
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c426
-rw-r--r--tools/testing/selftests/netfilter/Makefile6
-rw-r--r--tools/testing/selftests/netfilter/config2
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_icmp_related.sh283
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat.sh766
-rwxr-xr-xtools/testing/selftests/netfilter/nft_trans_stress.sh78
-rw-r--r--tools/testing/selftests/networking/timestamping/.gitignore4
-rw-r--r--tools/testing/selftests/networking/timestamping/Makefile13
-rw-r--r--tools/testing/selftests/networking/timestamping/hwtstamp_config.c135
-rw-r--r--tools/testing/selftests/networking/timestamping/rxtimestamp.c389
-rw-r--r--tools/testing/selftests/networking/timestamping/timestamping.c534
-rw-r--r--tools/testing/selftests/networking/timestamping/txtimestamp.c564
-rw-r--r--tools/testing/selftests/nsfs/.gitignore2
-rw-r--r--tools/testing/selftests/nsfs/Makefile5
-rw-r--r--tools/testing/selftests/nsfs/config3
-rw-r--r--tools/testing/selftests/nsfs/owner.c92
-rw-r--r--tools/testing/selftests/nsfs/pidns.c79
-rwxr-xr-xtools/testing/selftests/ntb/ntb_test.sh590
-rw-r--r--tools/testing/selftests/powerpc/Makefile74
-rw-r--r--tools/testing/selftests/powerpc/alignment/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/alignment/Makefile6
-rw-r--r--tools/testing/selftests/powerpc/alignment/alignment_handler.c575
-rw-r--r--tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c72
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/.gitignore7
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/Makefile18
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/context_switch.c506
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/exec_target.c16
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/fork.c325
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/futex_bench.c43
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/gettimeofday.c31
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/mmap_bench.c90
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/null_syscall.c157
-rw-r--r--tools/testing/selftests/powerpc/cache_shape/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/cache_shape/Makefile7
-rw-r--r--tools/testing/selftests/powerpc/cache_shape/cache_shape.c125
-rw-r--r--tools/testing/selftests/powerpc/copyloops/.gitignore13
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile53
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h0
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/export.h2
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h0
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h47
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/processor.h0
-rw-r--r--tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S24
l---------tools/testing/selftests/powerpc/copyloops/copyuser_64.S1
l---------tools/testing/selftests/powerpc/copyloops/copyuser_power7.S1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/exc_validate.c124
l---------tools/testing/selftests/powerpc/copyloops/memcpy_64.S1
l---------tools/testing/selftests/powerpc/copyloops/memcpy_power7.S1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/stubs.S19
-rw-r--r--tools/testing/selftests/powerpc/copyloops/validate.c100
-rw-r--r--tools/testing/selftests/powerpc/dscr/.gitignore7
-rw-r--r--tools/testing/selftests/powerpc/dscr/Makefile11
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr.h125
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_default_test.c127
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c71
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c109
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c87
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c101
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c80
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_user_test.c61
-rw-r--r--tools/testing/selftests/powerpc/harness.c131
-rw-r--r--tools/testing/selftests/powerpc/include/basic_asm.h74
-rw-r--r--tools/testing/selftests/powerpc/include/fpu_asm.h80
-rw-r--r--tools/testing/selftests/powerpc/include/gpr_asm.h96
-rw-r--r--tools/testing/selftests/powerpc/include/instructions.h69
-rw-r--r--tools/testing/selftests/powerpc/include/reg.h146
-rw-r--r--tools/testing/selftests/powerpc/include/subunit.h52
-rw-r--r--tools/testing/selftests/powerpc/include/utils.h83
-rw-r--r--tools/testing/selftests/powerpc/include/vmx_asm.h96
-rw-r--r--tools/testing/selftests/powerpc/include/vsx_asm.h71
-rw-r--r--tools/testing/selftests/powerpc/lib/reg.S397
-rw-r--r--tools/testing/selftests/powerpc/math/.gitignore7
-rw-r--r--tools/testing/selftests/powerpc/math/Makefile19
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_asm.S135
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_preempt.c113
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_signal.c135
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_syscall.c90
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_asm.S152
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_preempt.c112
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_signal.c156
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_syscall.c91
-rw-r--r--tools/testing/selftests/powerpc/math/vsx_asm.S61
-rw-r--r--tools/testing/selftests/powerpc/math/vsx_preempt.c147
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore5
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile19
-rw-r--r--tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c77
-rw-r--r--tools/testing/selftests/powerpc/mm/prot_sao.c42
-rw-r--r--tools/testing/selftests/powerpc/mm/segv_errors.c78
-rw-r--r--tools/testing/selftests/powerpc/mm/subpage_prot.c236
-rw-r--r--tools/testing/selftests/powerpc/mm/tlbie_test.c734
-rw-r--r--tools/testing/selftests/powerpc/pmu/.gitignore3
-rw-r--r--tools/testing/selftests/powerpc/pmu/Makefile46
-rw-r--r--tools/testing/selftests/powerpc/pmu/count_instructions.c147
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/.gitignore22
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile28
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c106
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S271
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c61
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c95
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c91
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c58
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c117
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c91
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.c485
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.h78
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S365
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c88
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c92
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c88
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c133
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S43
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c81
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c167
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c102
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c86
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c109
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c61
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c106
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c93
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c40
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c93
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c85
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/trace.c300
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/trace.h41
-rw-r--r--tools/testing/selftests/powerpc/pmu/event.c131
-rw-r--r--tools/testing/selftests/powerpc/pmu/event.h43
-rw-r--r--tools/testing/selftests/powerpc/pmu/l3_bank_test.c48
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.c227
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.h40
-rw-r--r--tools/testing/selftests/powerpc/pmu/loop.S43
-rw-r--r--tools/testing/selftests/powerpc/pmu/per_event_excludes.c114
-rw-r--r--tools/testing/selftests/powerpc/primitives/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/primitives/Makefile8
l---------tools/testing/selftests/powerpc/primitives/asm/asm-compat.h1
l---------tools/testing/selftests/powerpc/primitives/asm/asm-const.h1
l---------tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h1
-rw-r--r--tools/testing/selftests/powerpc/primitives/asm/firmware.h0
-rw-r--r--tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h0
l---------tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h1
-rw-r--r--tools/testing/selftests/powerpc/primitives/asm/processor.h0
-rw-r--r--tools/testing/selftests/powerpc/primitives/linux/stringify.h0
-rw-r--r--tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c159
l---------tools/testing/selftests/powerpc/primitives/word-at-a-time.h1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/.gitignore12
-rw-r--r--tools/testing/selftests/powerpc/ptrace/Makefile15
-rw-r--r--tools/testing/selftests/powerpc/ptrace/child.h139
-rw-r--r--tools/testing/selftests/powerpc/ptrace/core-pkey.c462
-rw-r--r--tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c195
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c123
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h74
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c342
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c330
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tar.c135
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tar.h50
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c158
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c169
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c174
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c184
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c167
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c160
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c167
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c117
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-vsx.h127
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace.h749
-rwxr-xr-xtools/testing/selftests/powerpc/scripts/hmi.sh89
-rw-r--r--tools/testing/selftests/powerpc/signal/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/signal/Makefile10
-rw-r--r--tools/testing/selftests/powerpc/signal/signal.S50
-rw-r--r--tools/testing/selftests/powerpc/signal/signal.c111
-rw-r--r--tools/testing/selftests/powerpc/signal/signal_tm.c110
-rw-r--r--tools/testing/selftests/powerpc/stringloops/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/stringloops/Makefile35
-rw-r--r--tools/testing/selftests/powerpc/stringloops/asm/cache.h1
-rw-r--r--tools/testing/selftests/powerpc/stringloops/asm/export.h1
-rw-r--r--tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h39
-rw-r--r--tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h33
-rw-r--r--tools/testing/selftests/powerpc/stringloops/memcmp.c159
l---------tools/testing/selftests/powerpc/stringloops/memcmp_32.S1
l---------tools/testing/selftests/powerpc/stringloops/memcmp_64.S1
-rw-r--r--tools/testing/selftests/powerpc/stringloops/string.c21
-rw-r--r--tools/testing/selftests/powerpc/stringloops/strlen.c127
l---------tools/testing/selftests/powerpc/stringloops/strlen_32.S1
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/Makefile18
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/check.S101
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/common.h7
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S82
-rw-r--r--tools/testing/selftests/powerpc/syscalls/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/syscalls/Makefile8
-rw-r--r--tools/testing/selftests/powerpc/syscalls/ipc.h48
-rw-r--r--tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c61
-rw-r--r--tools/testing/selftests/powerpc/tm/.gitignore17
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile26
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-exec.c70
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-fork.c42
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-resched-dscr.c100
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c92
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c90
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c110
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c125
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c74
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-stack.c76
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal.S114
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-sigreturn.c93
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall-asm.S28
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall.c106
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tar.c91
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tmspr.c150
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-trap.c331
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c407
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c118
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-vmxcopy.c104
-rw-r--r--tools/testing/selftests/powerpc/tm/tm.h96
-rw-r--r--tools/testing/selftests/powerpc/utils.c136
-rw-r--r--tools/testing/selftests/powerpc/vphn/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/vphn/Makefile9
-rw-r--r--tools/testing/selftests/powerpc/vphn/test-vphn.c411
l---------tools/testing/selftests/powerpc/vphn/vphn.c1
l---------tools/testing/selftests/powerpc/vphn/vphn.h1
-rw-r--r--tools/testing/selftests/prctl/.gitignore3
-rw-r--r--tools/testing/selftests/prctl/Makefile16
-rw-r--r--tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c98
-rw-r--r--tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c97
-rw-r--r--tools/testing/selftests/prctl/disable-tsc-test.c96
-rw-r--r--tools/testing/selftests/proc/.gitignore14
-rw-r--r--tools/testing/selftests/proc/Makefile20
-rw-r--r--tools/testing/selftests/proc/config1
-rw-r--r--tools/testing/selftests/proc/fd-001-lookup.c168
-rw-r--r--tools/testing/selftests/proc/fd-002-posix-eq.c57
-rw-r--r--tools/testing/selftests/proc/fd-003-kthread.c178
-rw-r--r--tools/testing/selftests/proc/proc-loadavg-001.c82
-rw-r--r--tools/testing/selftests/proc/proc-self-map-files-001.c82
-rw-r--r--tools/testing/selftests/proc/proc-self-map-files-002.c90
-rw-r--r--tools/testing/selftests/proc/proc-self-syscall.c59
-rw-r--r--tools/testing/selftests/proc/proc-self-wchan.c40
-rw-r--r--tools/testing/selftests/proc/proc-uptime-001.c45
-rw-r--r--tools/testing/selftests/proc/proc-uptime-002.c78
-rw-r--r--tools/testing/selftests/proc/proc-uptime.h60
-rw-r--r--tools/testing/selftests/proc/proc.h51
-rw-r--r--tools/testing/selftests/proc/read.c132
-rw-r--r--tools/testing/selftests/proc/self.c39
-rw-r--r--tools/testing/selftests/proc/setns-dcache.c129
-rw-r--r--tools/testing/selftests/proc/thread-self.c64
-rw-r--r--tools/testing/selftests/pstore/.gitignore2
-rw-r--r--tools/testing/selftests/pstore/Makefile14
-rwxr-xr-xtools/testing/selftests/pstore/common_tests83
-rw-r--r--tools/testing/selftests/pstore/config5
-rwxr-xr-xtools/testing/selftests/pstore/pstore_crash_test30
-rwxr-xr-xtools/testing/selftests/pstore/pstore_post_reboot_tests80
-rwxr-xr-xtools/testing/selftests/pstore/pstore_tests30
-rw-r--r--tools/testing/selftests/ptp/.gitignore1
-rw-r--r--tools/testing/selftests/ptp/Makefile10
-rw-r--r--tools/testing/selftests/ptp/testptp.c521
-rw-r--r--tools/testing/selftests/ptp/testptp.mk33
-rw-r--r--tools/testing/selftests/ptrace/.gitignore1
-rw-r--r--tools/testing/selftests/ptrace/Makefile5
-rw-r--r--tools/testing/selftests/ptrace/peeksiginfo.c219
-rw-r--r--tools/testing/selftests/rcutorture/.gitignore4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configNR_CPUS.sh45
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/config_override.sh61
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configcheck.sh54
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh72
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/cpus2use.sh41
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh279
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh90
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh60
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh56
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh51
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh74
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh122
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh96
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh73
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh271
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh471
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-build.sh62
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh176
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/BUSTED6
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/CFLIST7
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK016
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK026
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK036
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK046
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK04.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK056
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK066
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK076
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh42
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED7
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFLIST18
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N8
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P12
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-t10
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-u10
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS0110
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS024
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS0312
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY0113
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY0214
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0118
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0223
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0318
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0420
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0520
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0623
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot7
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0717
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0823
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0918
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh56
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/TINY16
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/TREE19
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/TREE5422
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh29
-rw-r--r--tools/testing/selftests/rcutorture/doc/TINY_RCU.txt38
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt81
-rw-r--r--tools/testing/selftests/rcutorture/doc/initrd.txt113
-rw-r--r--tools/testing/selftests/rcutorture/doc/rcu-test-image.txt42
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile17
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h156
-rwxr-xr-xtools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk376
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h17
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h41
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h14
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c14
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h28
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c32
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h34
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h221
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c12
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h58
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h93
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c79
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h59
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c51
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h103
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile12
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c73
-rwxr-xr-xtools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh103
-rw-r--r--tools/testing/selftests/rseq/.gitignore6
-rw-r--r--tools/testing/selftests/rseq/Makefile30
-rw-r--r--tools/testing/selftests/rseq/basic_percpu_ops_test.c312
-rw-r--r--tools/testing/selftests/rseq/basic_test.c56
-rw-r--r--tools/testing/selftests/rseq/param_test.c1331
-rw-r--r--tools/testing/selftests/rseq/rseq-arm.h716
-rw-r--r--tools/testing/selftests/rseq/rseq-arm64.h594
-rw-r--r--tools/testing/selftests/rseq/rseq-mips.h725
-rw-r--r--tools/testing/selftests/rseq/rseq-ppc.h671
-rw-r--r--tools/testing/selftests/rseq/rseq-s390.h513
-rw-r--r--tools/testing/selftests/rseq/rseq-skip.h65
-rw-r--r--tools/testing/selftests/rseq/rseq-x86.h1132
-rw-r--r--tools/testing/selftests/rseq/rseq.c117
-rw-r--r--tools/testing/selftests/rseq/rseq.h163
-rwxr-xr-xtools/testing/selftests/rseq/run_param_test.sh121
-rw-r--r--tools/testing/selftests/rseq/settings1
-rw-r--r--tools/testing/selftests/rtc/.gitignore2
-rw-r--r--tools/testing/selftests/rtc/Makefile9
-rw-r--r--tools/testing/selftests/rtc/rtctest.c337
-rw-r--r--tools/testing/selftests/rtc/setdate.c86
-rw-r--r--tools/testing/selftests/seccomp/.gitignore2
-rw-r--r--tools/testing/selftests/seccomp/Makefile17
-rw-r--r--tools/testing/selftests/seccomp/config2
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c99
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c2997
-rw-r--r--tools/testing/selftests/sigaltstack/.gitignore1
-rw-r--r--tools/testing/selftests/sigaltstack/Makefile5
-rw-r--r--tools/testing/selftests/sigaltstack/sas.c191
-rw-r--r--tools/testing/selftests/size/.gitignore1
-rw-r--r--tools/testing/selftests/size/Makefile5
-rw-r--r--tools/testing/selftests/size/get_size.c117
-rw-r--r--tools/testing/selftests/sparc64/Makefile50
-rw-r--r--tools/testing/selftests/sparc64/drivers/.gitignore1
-rw-r--r--tools/testing/selftests/sparc64/drivers/Makefile15
-rw-r--r--tools/testing/selftests/sparc64/drivers/adi-test.c721
-rwxr-xr-xtools/testing/selftests/sparc64/drivers/drivers_test.sh30
-rwxr-xr-xtools/testing/selftests/sparc64/run.sh3
-rw-r--r--tools/testing/selftests/splice/.gitignore1
-rw-r--r--tools/testing/selftests/splice/Makefile5
-rw-r--r--tools/testing/selftests/splice/default_file_splice_read.c9
-rwxr-xr-xtools/testing/selftests/splice/default_file_splice_read.sh8
-rw-r--r--tools/testing/selftests/static_keys/Makefile8
-rw-r--r--tools/testing/selftests/static_keys/config1
-rwxr-xr-xtools/testing/selftests/static_keys/test_static_keys.sh30
-rw-r--r--tools/testing/selftests/sync/.gitignore1
-rw-r--r--tools/testing/selftests/sync/Makefile38
-rw-r--r--tools/testing/selftests/sync/config4
-rw-r--r--tools/testing/selftests/sync/sw_sync.h46
-rw-r--r--tools/testing/selftests/sync/sync.c221
-rw-r--r--tools/testing/selftests/sync/sync.h40
-rw-r--r--tools/testing/selftests/sync/sync_alloc.c74
-rw-r--r--tools/testing/selftests/sync/sync_fence.c132
-rw-r--r--tools/testing/selftests/sync/sync_merge.c60
-rw-r--r--tools/testing/selftests/sync/sync_stress_consumer.c185
-rw-r--r--tools/testing/selftests/sync/sync_stress_merge.c115
-rw-r--r--tools/testing/selftests/sync/sync_stress_parallelism.c111
-rw-r--r--tools/testing/selftests/sync/sync_test.c113
-rw-r--r--tools/testing/selftests/sync/sync_wait.c91
-rw-r--r--tools/testing/selftests/sync/synctest.h67
-rw-r--r--tools/testing/selftests/sysctl/Makefile12
-rw-r--r--tools/testing/selftests/sysctl/config1
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh780
-rw-r--r--tools/testing/selftests/tc-testing/.gitignore2
-rw-r--r--tools/testing/selftests/tc-testing/README247
-rw-r--r--tools/testing/selftests/tc-testing/TODO.txt31
-rw-r--r--tools/testing/selftests/tc-testing/TdcPlugin.py74
-rw-r--r--tools/testing/selftests/tc-testing/bpf/Makefile29
-rw-r--r--tools/testing/selftests/tc-testing/bpf/action.c23
-rw-r--r--tools/testing/selftests/tc-testing/config48
-rw-r--r--tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt104
-rw-r--r--tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt100
-rw-r--r--tools/testing/selftests/tc-testing/creating-testcases/example.json55
-rw-r--r--tools/testing/selftests/tc-testing/creating-testcases/template.json51
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS27
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py141
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py19
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py142
-rw-r--r--tools/testing/selftests/tc-testing/plugins/__init__.py0
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json294
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json291
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/csum.json504
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/gact.json540
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ife.json1064
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json438
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/nat.json593
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json719
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/sample.json612
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/simple.json130
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json488
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json396
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json888
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json692
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/fw.json1049
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/tests.json42
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py663
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc_batch.py62
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config.py36
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config_local_template.py23
-rw-r--r--tools/testing/selftests/tc-testing/tdc_helper.py67
-rw-r--r--tools/testing/selftests/timers/.gitignore21
-rw-r--r--tools/testing/selftests/timers/Makefile23
-rw-r--r--tools/testing/selftests/timers/adjtick.c211
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c178
-rw-r--r--tools/testing/selftests/timers/change_skew.c96
-rw-r--r--tools/testing/selftests/timers/clocksource-switch.c168
-rw-r--r--tools/testing/selftests/timers/freq-step.c271
-rw-r--r--tools/testing/selftests/timers/inconsistency-check.c193
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c378
-rw-r--r--tools/testing/selftests/timers/leapcrash.c108
-rw-r--r--tools/testing/selftests/timers/mqueue-lat.c114
-rw-r--r--tools/testing/selftests/timers/nanosleep.c165
-rw-r--r--tools/testing/selftests/timers/nsleep-lat.c180
-rw-r--r--tools/testing/selftests/timers/posix_timers.c222
-rw-r--r--tools/testing/selftests/timers/raw_skew.c148
-rw-r--r--tools/testing/selftests/timers/rtcpie.c142
-rw-r--r--tools/testing/selftests/timers/set-2038.c133
-rw-r--r--tools/testing/selftests/timers/set-tai.c69
-rw-r--r--tools/testing/selftests/timers/set-timer-lat.c283
-rw-r--r--tools/testing/selftests/timers/set-tz.c110
-rw-r--r--tools/testing/selftests/timers/skew_consistency.c78
-rw-r--r--tools/testing/selftests/timers/threadtest.c193
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c330
-rw-r--r--tools/testing/selftests/uevent/Makefile17
-rw-r--r--tools/testing/selftests/uevent/config2
-rw-r--r--tools/testing/selftests/uevent/uevent_filtering.c486
-rw-r--r--tools/testing/selftests/user/Makefile8
-rw-r--r--tools/testing/selftests/user/config1
-rwxr-xr-xtools/testing/selftests/user/test_user_copy.sh18
-rw-r--r--tools/testing/selftests/vDSO/.gitignore2
-rw-r--r--tools/testing/selftests/vDSO/Makefile26
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.c269
-rw-r--r--tools/testing/selftests/vDSO/vdso_standalone_test_x86.c128
-rw-r--r--tools/testing/selftests/vDSO/vdso_test.c68
-rw-r--r--tools/testing/selftests/vm/.gitignore15
-rw-r--r--tools/testing/selftests/vm/Makefile34
-rw-r--r--tools/testing/selftests/vm/compaction_test.c230
-rw-r--r--tools/testing/selftests/vm/config2
-rw-r--r--tools/testing/selftests/vm/gup_benchmark.c93
-rw-r--r--tools/testing/selftests/vm/hugepage-mmap.c93
-rw-r--r--tools/testing/selftests/vm/hugepage-shm.c101
-rw-r--r--tools/testing/selftests/vm/map_hugetlb.c84
-rw-r--r--tools/testing/selftests/vm/map_populate.c113
-rw-r--r--tools/testing/selftests/vm/mlock-random-test.c294
-rw-r--r--tools/testing/selftests/vm/mlock2-tests.c520
-rw-r--r--tools/testing/selftests/vm/mlock2.h63
-rw-r--r--tools/testing/selftests/vm/on-fault-limit.c48
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests214
-rw-r--r--tools/testing/selftests/vm/thuge-gen.c257
-rw-r--r--tools/testing/selftests/vm/transhuge-stress.c144
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c1333
-rw-r--r--tools/testing/selftests/vm/va_128TBswitch.c297
-rw-r--r--tools/testing/selftests/vm/virtual_address_range.c139
-rw-r--r--tools/testing/selftests/watchdog/.gitignore1
-rw-r--r--tools/testing/selftests/watchdog/Makefile4
-rw-r--r--tools/testing/selftests/watchdog/watchdog-test.c167
-rw-r--r--tools/testing/selftests/x86/.gitignore15
-rw-r--r--tools/testing/selftests/x86/Makefile105
-rwxr-xr-xtools/testing/selftests/x86/check_cc.sh16
-rw-r--r--tools/testing/selftests/x86/check_initial_reg_state.c109
-rw-r--r--tools/testing/selftests/x86/entry_from_vm86.c349
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c427
-rw-r--r--tools/testing/selftests/x86/ioperm.c171
-rw-r--r--tools/testing/selftests/x86/iopl.c136
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c927
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c285
-rw-r--r--tools/testing/selftests/x86/mpx-debug.h15
-rw-r--r--tools/testing/selftests/x86/mpx-dig.c499
-rw-r--r--tools/testing/selftests/x86/mpx-hw.h124
-rw-r--r--tools/testing/selftests/x86/mpx-mini-test.c1616
-rw-r--r--tools/testing/selftests/x86/mpx-mm.h10
-rw-r--r--tools/testing/selftests/x86/pkey-helpers.h219
-rw-r--r--tools/testing/selftests/x86/protection_keys.c1508
-rw-r--r--tools/testing/selftests/x86/ptrace_syscall.c430
-rw-r--r--tools/testing/selftests/x86/raw_syscall_helper_32.S47
-rw-r--r--tools/testing/selftests/x86/sigreturn.c871
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c187
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c130
-rw-r--r--tools/testing/selftests/x86/syscall_nt.c96
-rw-r--r--tools/testing/selftests/x86/sysret_rip.c195
-rw-r--r--tools/testing/selftests/x86/sysret_ss_attrs.c112
-rw-r--r--tools/testing/selftests/x86/test_FCMOV.c94
-rw-r--r--tools/testing/selftests/x86/test_FCOMI.c332
-rw-r--r--tools/testing/selftests/x86/test_FISTTP.c138
-rw-r--r--tools/testing/selftests/x86/test_mremap_vdso.c115
-rw-r--r--tools/testing/selftests/x86/test_syscall_vdso.c408
-rw-r--r--tools/testing/selftests/x86/test_vdso.c337
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c506
-rw-r--r--tools/testing/selftests/x86/thunks.S67
-rw-r--r--tools/testing/selftests/x86/thunks_32.S55
-rw-r--r--tools/testing/selftests/x86/trivial_32bit_program.c18
-rw-r--r--tools/testing/selftests/x86/trivial_64bit_program.c18
-rw-r--r--tools/testing/selftests/x86/trivial_program.c10
-rw-r--r--tools/testing/selftests/x86/unwind_vdso.c211
-rw-r--r--tools/testing/selftests/x86/vdso_restorer.c88
-rw-r--r--tools/testing/selftests/zram/Makefile9
-rw-r--r--tools/testing/selftests/zram/README40
-rw-r--r--tools/testing/selftests/zram/config2
-rwxr-xr-xtools/testing/selftests/zram/zram.sh18
-rwxr-xr-xtools/testing/selftests/zram/zram01.sh84
-rwxr-xr-xtools/testing/selftests/zram/zram02.sh53
-rwxr-xr-xtools/testing/selftests/zram/zram_lib.sh278
-rw-r--r--tools/testing/vsock/.gitignore2
-rw-r--r--tools/testing/vsock/Makefile9
-rw-r--r--tools/testing/vsock/README36
-rw-r--r--tools/testing/vsock/control.c219
-rw-r--r--tools/testing/vsock/control.h13
-rw-r--r--tools/testing/vsock/timeout.c64
-rw-r--r--tools/testing/vsock/timeout.h14
-rw-r--r--tools/testing/vsock/vsock_diag_test.c681
-rw-r--r--tools/thermal/tmon/.gitignore1
-rw-r--r--tools/thermal/tmon/Makefile58
-rw-r--r--tools/thermal/tmon/README50
-rw-r--r--tools/thermal/tmon/pid.c131
-rw-r--r--tools/thermal/tmon/sysfs.c598
-rw-r--r--tools/thermal/tmon/tmon.8145
-rw-r--r--tools/thermal/tmon/tmon.c385
-rw-r--r--tools/thermal/tmon/tmon.h204
-rw-r--r--tools/thermal/tmon/tui.c665
-rwxr-xr-xtools/time/udelay_test.sh66
-rw-r--r--tools/usb/.gitignore2
-rw-r--r--tools/usb/Makefile14
-rw-r--r--tools/usb/ffs-aio-example/multibuff/device_app/aio_multibuff.c380
-rw-r--r--tools/usb/ffs-aio-example/multibuff/host_app/Makefile14
-rw-r--r--tools/usb/ffs-aio-example/multibuff/host_app/test.c175
-rw-r--r--tools/usb/ffs-aio-example/simple/device_app/aio_simple.c368
-rw-r--r--tools/usb/ffs-aio-example/simple/host_app/Makefile13
-rw-r--r--tools/usb/ffs-aio-example/simple/host_app/test.c178
-rw-r--r--tools/usb/ffs-test.c694
-rw-r--r--tools/usb/hcd-tests.sh276
-rw-r--r--tools/usb/testusb.c541
-rw-r--r--tools/usb/usbip/.gitignore32
-rw-r--r--tools/usb/usbip/AUTHORS3
-rw-r--r--tools/usb/usbip/COPYING340
-rw-r--r--tools/usb/usbip/INSTALL237
-rw-r--r--tools/usb/usbip/Makefile.am8
-rw-r--r--tools/usb/usbip/README257
-rwxr-xr-xtools/usb/usbip/autogen.sh9
-rwxr-xr-xtools/usb/usbip/cleanup.sh13
-rw-r--r--tools/usb/usbip/configure.ac111
-rw-r--r--tools/usb/usbip/doc/usbip.895
-rw-r--r--tools/usb/usbip/doc/usbipd.891
-rw-r--r--tools/usb/usbip/libsrc/Makefile.am11
-rw-r--r--tools/usb/usbip/libsrc/list.h137
-rw-r--r--tools/usb/usbip/libsrc/names.c504
-rw-r--r--tools/usb/usbip/libsrc/names.h41
-rw-r--r--tools/usb/usbip/libsrc/sysfs_utils.c32
-rw-r--r--tools/usb/usbip/libsrc/sysfs_utils.h9
-rw-r--r--tools/usb/usbip/libsrc/usbip_common.c316
-rw-r--r--tools/usb/usbip/libsrc/usbip_common.h152
-rw-r--r--tools/usb/usbip/libsrc/usbip_device_driver.c168
-rw-r--r--tools/usb/usbip/libsrc/usbip_device_driver.h34
-rw-r--r--tools/usb/usbip/libsrc/usbip_host_common.c294
-rw-r--r--tools/usb/usbip/libsrc/usbip_host_common.h104
-rw-r--r--tools/usb/usbip/libsrc/usbip_host_driver.c65
-rw-r--r--tools/usb/usbip/libsrc/usbip_host_driver.h32
-rw-r--r--tools/usb/usbip/libsrc/vhci_driver.c467
-rw-r--r--tools/usb/usbip/libsrc/vhci_driver.h67
-rw-r--r--tools/usb/usbip/src/Makefile.am12
-rw-r--r--tools/usb/usbip/src/usbip.c203
-rw-r--r--tools/usb/usbip/src/usbip.h40
-rw-r--r--tools/usb/usbip/src/usbip_attach.c255
-rw-r--r--tools/usb/usbip/src/usbip_bind.c223
-rw-r--r--tools/usb/usbip/src/usbip_detach.c136
-rw-r--r--tools/usb/usbip/src/usbip_list.c384
-rw-r--r--tools/usb/usbip/src/usbip_network.c315
-rw-r--r--tools/usb/usbip/src/usbip_network.h178
-rw-r--r--tools/usb/usbip/src/usbip_port.c66
-rw-r--r--tools/usb/usbip/src/usbip_unbind.c141
-rw-r--r--tools/usb/usbip/src/usbipd.c698
-rw-r--r--tools/usb/usbip/src/utils.c55
-rw-r--r--tools/usb/usbip/src/utils.h25
-rwxr-xr-xtools/usb/usbip/vudc/vudc_server_example.sh107
-rw-r--r--tools/virtio/.gitignore3
-rw-r--r--tools/virtio/Makefile15
-rw-r--r--tools/virtio/asm/barrier.h21
-rw-r--r--tools/virtio/linux/bug.h11
-rw-r--r--tools/virtio/linux/compiler.h10
-rw-r--r--tools/virtio/linux/device.h2
-rw-r--r--tools/virtio/linux/dma-mapping.h32
-rw-r--r--tools/virtio/linux/err.h27
-rw-r--r--tools/virtio/linux/export.h3
-rw-r--r--tools/virtio/linux/hrtimer.h0
-rw-r--r--tools/virtio/linux/irqreturn.h1
-rw-r--r--tools/virtio/linux/kernel.h144
-rw-r--r--tools/virtio/linux/kmemleak.h3
-rw-r--r--tools/virtio/linux/module.h7
-rw-r--r--tools/virtio/linux/printk.h4
-rw-r--r--tools/virtio/linux/ratelimit.h4
-rw-r--r--tools/virtio/linux/scatterlist.h172
-rw-r--r--tools/virtio/linux/slab.h7
-rw-r--r--tools/virtio/linux/thread_info.h1
-rw-r--r--tools/virtio/linux/uaccess.h52
-rw-r--r--tools/virtio/linux/uio.h3
-rw-r--r--tools/virtio/linux/virtio.h68
-rw-r--r--tools/virtio/linux/virtio_byteorder.h9
-rw-r--r--tools/virtio/linux/virtio_config.h92
-rw-r--r--tools/virtio/linux/virtio_ring.h1
-rw-r--r--tools/virtio/linux/vringh.h1
-rw-r--r--tools/virtio/ringtest/Makefile31
-rw-r--r--tools/virtio/ringtest/README6
-rw-r--r--tools/virtio/ringtest/main.c391
-rw-r--r--tools/virtio/ringtest/main.h194
-rw-r--r--tools/virtio/ringtest/noring.c72
-rw-r--r--tools/virtio/ringtest/ptr_ring.c210
-rw-r--r--tools/virtio/ringtest/ring.c270
-rwxr-xr-xtools/virtio/ringtest/run-on-all.sh26
-rw-r--r--tools/virtio/ringtest/virtio_ring_0_9.c333
-rw-r--r--tools/virtio/ringtest/virtio_ring_inorder.c2
-rw-r--r--tools/virtio/ringtest/virtio_ring_poll.c2
-rw-r--r--tools/virtio/uapi/linux/uio.h1
-rw-r--r--tools/virtio/uapi/linux/virtio_config.h1
-rw-r--r--tools/virtio/uapi/linux/virtio_ring.h5
-rw-r--r--tools/virtio/uapi/linux/virtio_types.h1
-rw-r--r--tools/virtio/vhost_test/Makefile2
-rw-r--r--tools/virtio/vhost_test/vhost_test.c1
-rw-r--r--tools/virtio/virtio-trace/Makefile14
-rw-r--r--tools/virtio/virtio-trace/README118
-rw-r--r--tools/virtio/virtio-trace/trace-agent-ctl.c137
-rw-r--r--tools/virtio/virtio-trace/trace-agent-rw.c192
-rw-r--r--tools/virtio/virtio-trace/trace-agent.c270
-rw-r--r--tools/virtio/virtio-trace/trace-agent.h76
-rw-r--r--tools/virtio/virtio_test.c303
-rw-r--r--tools/virtio/vringh_test.c751
-rw-r--r--tools/vm/.gitignore2
-rw-r--r--tools/vm/Makefile32
-rw-r--r--tools/vm/page-types.c1384
-rw-r--r--tools/vm/page_owner_sort.c150
-rw-r--r--tools/vm/slabinfo-gnuplot.sh275
-rw-r--r--tools/vm/slabinfo.c1483
-rw-r--r--tools/wmi/Makefile17
-rw-r--r--tools/wmi/dell-smbios-example.c210
2901 files changed, 644103 insertions, 0 deletions
diff --git a/tools/Makefile b/tools/Makefile
new file mode 100644
index 000000000..be02c8b90
--- /dev/null
+++ b/tools/Makefile
@@ -0,0 +1,179 @@
+# SPDX-License-Identifier: GPL-2.0
+# Some of the tools (perf) use same make variables
+# as in kernel build.
+export srctree=
+export objtree=
+
+include scripts/Makefile.include
+
+help:
+ @echo 'Possible targets:'
+ @echo ''
+ @echo ' acpi - ACPI tools'
+ @echo ' cgroup - cgroup tools'
+ @echo ' cpupower - a tool for all things x86 CPU power'
+ @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer'
+ @echo ' freefall - laptop accelerometer program for disk protection'
+ @echo ' gpio - GPIO tools'
+ @echo ' hv - tools used when in Hyper-V clients'
+ @echo ' iio - IIO tools'
+ @echo ' kvm_stat - top-like utility for displaying kvm statistics'
+ @echo ' leds - LEDs tools'
+ @echo ' liblockdep - user-space wrapper for kernel locking-validator'
+ @echo ' bpf - misc BPF tools'
+ @echo ' perf - Linux performance measurement and analysis tool'
+ @echo ' selftests - various kernel selftests'
+ @echo ' spi - spi tools'
+ @echo ' objtool - an ELF object analysis tool'
+ @echo ' tmon - thermal monitoring and tuning tool'
+ @echo ' turbostat - Intel CPU idle stats and freq reporting tool'
+ @echo ' usb - USB testing tools'
+ @echo ' virtio - vhost test module'
+ @echo ' vm - misc vm tools'
+ @echo ' wmi - WMI interface examples'
+ @echo ' x86_energy_perf_policy - Intel energy policy tool'
+ @echo ''
+ @echo 'You can do:'
+ @echo ' $$ make -C tools/ <tool>_install'
+ @echo ''
+ @echo ' from the kernel command line to build and install one of'
+ @echo ' the tools above'
+ @echo ''
+ @echo ' $$ make tools/all'
+ @echo ''
+ @echo ' builds all tools.'
+ @echo ''
+ @echo ' $$ make tools/install'
+ @echo ''
+ @echo ' installs all tools.'
+ @echo ''
+ @echo 'Cleaning targets:'
+ @echo ''
+ @echo ' all of the above with the "_clean" string appended cleans'
+ @echo ' the respective build directory.'
+ @echo ' clean: a summary clean target to clean _all_ folders'
+
+acpi: FORCE
+ $(call descend,power/$@)
+
+cpupower: FORCE
+ $(call descend,power/$@)
+
+cgroup firewire hv guest spi usb virtio vm bpf iio gpio objtool leds wmi: FORCE
+ $(call descend,$@)
+
+liblockdep: FORCE
+ $(call descend,lib/lockdep)
+
+libapi: FORCE
+ $(call descend,lib/api)
+
+# The perf build does not follow the descend function setup,
+# invoking it via it's own make rule.
+PERF_O = $(if $(O),$(O)/tools/perf,)
+
+perf: FORCE
+ $(Q)mkdir -p $(PERF_O) .
+ $(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir=
+
+selftests: FORCE
+ $(call descend,testing/$@)
+
+turbostat x86_energy_perf_policy: FORCE
+ $(call descend,power/x86/$@)
+
+tmon: FORCE
+ $(call descend,thermal/$@)
+
+freefall: FORCE
+ $(call descend,laptop/$@)
+
+kvm_stat: FORCE
+ $(call descend,kvm/$@)
+
+all: acpi cgroup cpupower gpio hv firewire liblockdep \
+ perf selftests spi turbostat usb \
+ virtio vm bpf x86_energy_perf_policy \
+ tmon freefall iio objtool kvm_stat wmi
+
+acpi_install:
+ $(call descend,power/$(@:_install=),install)
+
+cpupower_install:
+ $(call descend,power/$(@:_install=),install)
+
+cgroup_install firewire_install gpio_install hv_install iio_install perf_install spi_install usb_install virtio_install vm_install bpf_install objtool_install wmi_install:
+ $(call descend,$(@:_install=),install)
+
+liblockdep_install:
+ $(call descend,lib/lockdep,install)
+
+selftests_install:
+ $(call descend,testing/$(@:_install=),install)
+
+turbostat_install x86_energy_perf_policy_install:
+ $(call descend,power/x86/$(@:_install=),install)
+
+tmon_install:
+ $(call descend,thermal/$(@:_install=),install)
+
+freefall_install:
+ $(call descend,laptop/$(@:_install=),install)
+
+kvm_stat_install:
+ $(call descend,kvm/$(@:_install=),install)
+
+install: acpi_install cgroup_install cpupower_install gpio_install \
+ hv_install firewire_install iio_install liblockdep_install \
+ perf_install selftests_install turbostat_install usb_install \
+ virtio_install vm_install bpf_install x86_energy_perf_policy_install \
+ tmon_install freefall_install objtool_install kvm_stat_install \
+ wmi_install
+
+acpi_clean:
+ $(call descend,power/acpi,clean)
+
+cpupower_clean:
+ $(call descend,power/cpupower,clean)
+
+cgroup_clean hv_clean firewire_clean spi_clean usb_clean virtio_clean vm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean:
+ $(call descend,$(@:_clean=),clean)
+
+liblockdep_clean:
+ $(call descend,lib/lockdep,clean)
+
+libapi_clean:
+ $(call descend,lib/api,clean)
+
+libbpf_clean:
+ $(call descend,lib/bpf,clean)
+
+libsubcmd_clean:
+ $(call descend,lib/subcmd,clean)
+
+perf_clean:
+ $(Q)mkdir -p $(PERF_O) .
+ $(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir= clean
+
+selftests_clean:
+ $(call descend,testing/$(@:_clean=),clean)
+
+turbostat_clean x86_energy_perf_policy_clean:
+ $(call descend,power/x86/$(@:_clean=),clean)
+
+tmon_clean:
+ $(call descend,thermal/tmon,clean)
+
+freefall_clean:
+ $(call descend,laptop/freefall,clean)
+
+build_clean:
+ $(call descend,build,clean)
+
+clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean \
+ perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \
+ vm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
+ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \
+ gpio_clean objtool_clean leds_clean wmi_clean
+
+.PHONY: FORCE
diff --git a/tools/accounting/.gitignore b/tools/accounting/.gitignore
new file mode 100644
index 000000000..86485203c
--- /dev/null
+++ b/tools/accounting/.gitignore
@@ -0,0 +1 @@
+getdelays
diff --git a/tools/accounting/Makefile b/tools/accounting/Makefile
new file mode 100644
index 000000000..03687f19c
--- /dev/null
+++ b/tools/accounting/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+CC := $(CROSS_COMPILE)gcc
+CFLAGS := -I../../usr/include
+
+PROGS := getdelays
+
+all: $(PROGS)
+
+clean:
+ rm -fr $(PROGS)
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
new file mode 100644
index 000000000..6bf6a2043
--- /dev/null
+++ b/tools/accounting/getdelays.c
@@ -0,0 +1,551 @@
+// SPDX-License-Identifier: GPL-2.0
+/* getdelays.c
+ *
+ * Utility to get per-pid and per-tgid delay accounting statistics
+ * Also illustrates usage of the taskstats interface
+ *
+ * Copyright (C) Shailabh Nagar, IBM Corp. 2005
+ * Copyright (C) Balbir Singh, IBM Corp. 2006
+ * Copyright (c) Jay Lan, SGI. 2006
+ *
+ * Compile with
+ * gcc -I/usr/src/linux/include getdelays.c -o getdelays
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <unistd.h>
+#include <poll.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <signal.h>
+
+#include <linux/genetlink.h>
+#include <linux/taskstats.h>
+#include <linux/cgroupstats.h>
+
+/*
+ * Generic macros for dealing with netlink sockets. Might be duplicated
+ * elsewhere. It is recommended that commercial grade applications use
+ * libnl or libnetlink and use the interfaces provided by the library
+ */
+#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
+#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
+#define NLA_DATA(na) ((void *)((char*)(na) + NLA_HDRLEN))
+#define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
+
+#define err(code, fmt, arg...) \
+ do { \
+ fprintf(stderr, fmt, ##arg); \
+ exit(code); \
+ } while (0)
+
+int done;
+int rcvbufsz;
+char name[100];
+int dbg;
+int print_delays;
+int print_io_accounting;
+int print_task_context_switch_counts;
+
+#define PRINTF(fmt, arg...) { \
+ if (dbg) { \
+ printf(fmt, ##arg); \
+ } \
+ }
+
+/* Maximum size of response requested or message sent */
+#define MAX_MSG_SIZE 1024
+/* Maximum number of cpus expected to be specified in a cpumask */
+#define MAX_CPUS 32
+
+struct msgtemplate {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[MAX_MSG_SIZE];
+};
+
+char cpumask[100+6*MAX_CPUS];
+
+static void usage(void)
+{
+ fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] "
+ "[-m cpumask] [-t tgid] [-p pid]\n");
+ fprintf(stderr, " -d: print delayacct stats\n");
+ fprintf(stderr, " -i: print IO accounting (works only with -p)\n");
+ fprintf(stderr, " -l: listen forever\n");
+ fprintf(stderr, " -v: debug on\n");
+ fprintf(stderr, " -C: container path\n");
+}
+
+/*
+ * Create a raw netlink socket and bind
+ */
+static int create_nl_socket(int protocol)
+{
+ int fd;
+ struct sockaddr_nl local;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, protocol);
+ if (fd < 0)
+ return -1;
+
+ if (rcvbufsz)
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
+ &rcvbufsz, sizeof(rcvbufsz)) < 0) {
+ fprintf(stderr, "Unable to set socket rcv buf size to %d\n",
+ rcvbufsz);
+ goto error;
+ }
+
+ memset(&local, 0, sizeof(local));
+ local.nl_family = AF_NETLINK;
+
+ if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
+ goto error;
+
+ return fd;
+error:
+ close(fd);
+ return -1;
+}
+
+
+static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
+ __u8 genl_cmd, __u16 nla_type,
+ void *nla_data, int nla_len)
+{
+ struct nlattr *na;
+ struct sockaddr_nl nladdr;
+ int r, buflen;
+ char *buf;
+
+ struct msgtemplate msg;
+
+ msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+ msg.n.nlmsg_type = nlmsg_type;
+ msg.n.nlmsg_flags = NLM_F_REQUEST;
+ msg.n.nlmsg_seq = 0;
+ msg.n.nlmsg_pid = nlmsg_pid;
+ msg.g.cmd = genl_cmd;
+ msg.g.version = 0x1;
+ na = (struct nlattr *) GENLMSG_DATA(&msg);
+ na->nla_type = nla_type;
+ na->nla_len = nla_len + NLA_HDRLEN;
+ memcpy(NLA_DATA(na), nla_data, nla_len);
+ msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
+
+ buf = (char *) &msg;
+ buflen = msg.n.nlmsg_len ;
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+ while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
+ sizeof(nladdr))) < buflen) {
+ if (r > 0) {
+ buf += r;
+ buflen -= r;
+ } else if (errno != EAGAIN)
+ return -1;
+ }
+ return 0;
+}
+
+
+/*
+ * Probe the controller in genetlink to find the family id
+ * for the TASKSTATS family
+ */
+static int get_family_id(int sd)
+{
+ struct {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[256];
+ } ans;
+
+ int id = 0, rc;
+ struct nlattr *na;
+ int rep_len;
+
+ strcpy(name, TASKSTATS_GENL_NAME);
+ rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
+ CTRL_ATTR_FAMILY_NAME, (void *)name,
+ strlen(TASKSTATS_GENL_NAME)+1);
+ if (rc < 0)
+ return 0; /* sendto() failure? */
+
+ rep_len = recv(sd, &ans, sizeof(ans), 0);
+ if (ans.n.nlmsg_type == NLMSG_ERROR ||
+ (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
+ return 0;
+
+ na = (struct nlattr *) GENLMSG_DATA(&ans);
+ na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
+ if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
+ id = *(__u16 *) NLA_DATA(na);
+ }
+ return id;
+}
+
+#define average_ms(t, c) (t / 1000000ULL / (c ? c : 1))
+
+static void print_delayacct(struct taskstats *t)
+{
+ printf("\n\nCPU %15s%15s%15s%15s%15s\n"
+ " %15llu%15llu%15llu%15llu%15.3fms\n"
+ "IO %15s%15s%15s\n"
+ " %15llu%15llu%15llums\n"
+ "SWAP %15s%15s%15s\n"
+ " %15llu%15llu%15llums\n"
+ "RECLAIM %12s%15s%15s\n"
+ " %15llu%15llu%15llums\n",
+ "count", "real total", "virtual total",
+ "delay total", "delay average",
+ (unsigned long long)t->cpu_count,
+ (unsigned long long)t->cpu_run_real_total,
+ (unsigned long long)t->cpu_run_virtual_total,
+ (unsigned long long)t->cpu_delay_total,
+ average_ms((double)t->cpu_delay_total, t->cpu_count),
+ "count", "delay total", "delay average",
+ (unsigned long long)t->blkio_count,
+ (unsigned long long)t->blkio_delay_total,
+ average_ms(t->blkio_delay_total, t->blkio_count),
+ "count", "delay total", "delay average",
+ (unsigned long long)t->swapin_count,
+ (unsigned long long)t->swapin_delay_total,
+ average_ms(t->swapin_delay_total, t->swapin_count),
+ "count", "delay total", "delay average",
+ (unsigned long long)t->freepages_count,
+ (unsigned long long)t->freepages_delay_total,
+ average_ms(t->freepages_delay_total, t->freepages_count));
+}
+
+static void task_context_switch_counts(struct taskstats *t)
+{
+ printf("\n\nTask %15s%15s\n"
+ " %15llu%15llu\n",
+ "voluntary", "nonvoluntary",
+ (unsigned long long)t->nvcsw, (unsigned long long)t->nivcsw);
+}
+
+static void print_cgroupstats(struct cgroupstats *c)
+{
+ printf("sleeping %llu, blocked %llu, running %llu, stopped %llu, "
+ "uninterruptible %llu\n", (unsigned long long)c->nr_sleeping,
+ (unsigned long long)c->nr_io_wait,
+ (unsigned long long)c->nr_running,
+ (unsigned long long)c->nr_stopped,
+ (unsigned long long)c->nr_uninterruptible);
+}
+
+
+static void print_ioacct(struct taskstats *t)
+{
+ printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
+ t->ac_comm,
+ (unsigned long long)t->read_bytes,
+ (unsigned long long)t->write_bytes,
+ (unsigned long long)t->cancelled_write_bytes);
+}
+
+int main(int argc, char *argv[])
+{
+ int c, rc, rep_len, aggr_len, len2;
+ int cmd_type = TASKSTATS_CMD_ATTR_UNSPEC;
+ __u16 id;
+ __u32 mypid;
+
+ struct nlattr *na;
+ int nl_sd = -1;
+ int len = 0;
+ pid_t tid = 0;
+ pid_t rtid = 0;
+
+ int fd = 0;
+ int count = 0;
+ int write_file = 0;
+ int maskset = 0;
+ char *logfile = NULL;
+ int loop = 0;
+ int containerset = 0;
+ char *containerpath = NULL;
+ int cfd = 0;
+ int forking = 0;
+ sigset_t sigset;
+
+ struct msgtemplate msg;
+
+ while (!forking) {
+ c = getopt(argc, argv, "qdiw:r:m:t:p:vlC:c:");
+ if (c < 0)
+ break;
+
+ switch (c) {
+ case 'd':
+ printf("print delayacct stats ON\n");
+ print_delays = 1;
+ break;
+ case 'i':
+ printf("printing IO accounting\n");
+ print_io_accounting = 1;
+ break;
+ case 'q':
+ printf("printing task/process context switch rates\n");
+ print_task_context_switch_counts = 1;
+ break;
+ case 'C':
+ containerset = 1;
+ containerpath = optarg;
+ break;
+ case 'w':
+ logfile = strdup(optarg);
+ printf("write to file %s\n", logfile);
+ write_file = 1;
+ break;
+ case 'r':
+ rcvbufsz = atoi(optarg);
+ printf("receive buf size %d\n", rcvbufsz);
+ if (rcvbufsz < 0)
+ err(1, "Invalid rcv buf size\n");
+ break;
+ case 'm':
+ strncpy(cpumask, optarg, sizeof(cpumask));
+ cpumask[sizeof(cpumask) - 1] = '\0';
+ maskset = 1;
+ printf("cpumask %s maskset %d\n", cpumask, maskset);
+ break;
+ case 't':
+ tid = atoi(optarg);
+ if (!tid)
+ err(1, "Invalid tgid\n");
+ cmd_type = TASKSTATS_CMD_ATTR_TGID;
+ break;
+ case 'p':
+ tid = atoi(optarg);
+ if (!tid)
+ err(1, "Invalid pid\n");
+ cmd_type = TASKSTATS_CMD_ATTR_PID;
+ break;
+ case 'c':
+
+ /* Block SIGCHLD for sigwait() later */
+ if (sigemptyset(&sigset) == -1)
+ err(1, "Failed to empty sigset");
+ if (sigaddset(&sigset, SIGCHLD))
+ err(1, "Failed to set sigchld in sigset");
+ sigprocmask(SIG_BLOCK, &sigset, NULL);
+
+ /* fork/exec a child */
+ tid = fork();
+ if (tid < 0)
+ err(1, "Fork failed\n");
+ if (tid == 0)
+ if (execvp(argv[optind - 1],
+ &argv[optind - 1]) < 0)
+ exit(-1);
+
+ /* Set the command type and avoid further processing */
+ cmd_type = TASKSTATS_CMD_ATTR_PID;
+ forking = 1;
+ break;
+ case 'v':
+ printf("debug on\n");
+ dbg = 1;
+ break;
+ case 'l':
+ printf("listen forever\n");
+ loop = 1;
+ break;
+ default:
+ usage();
+ exit(-1);
+ }
+ }
+
+ if (write_file) {
+ fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd == -1) {
+ perror("Cannot open output file\n");
+ exit(1);
+ }
+ }
+
+ nl_sd = create_nl_socket(NETLINK_GENERIC);
+ if (nl_sd < 0)
+ err(1, "error creating Netlink socket\n");
+
+
+ mypid = getpid();
+ id = get_family_id(nl_sd);
+ if (!id) {
+ fprintf(stderr, "Error getting family id, errno %d\n", errno);
+ goto err;
+ }
+ PRINTF("family id %d\n", id);
+
+ if (maskset) {
+ rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
+ TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
+ &cpumask, strlen(cpumask) + 1);
+ PRINTF("Sent register cpumask, retval %d\n", rc);
+ if (rc < 0) {
+ fprintf(stderr, "error sending register cpumask\n");
+ goto err;
+ }
+ }
+
+ if (tid && containerset) {
+ fprintf(stderr, "Select either -t or -C, not both\n");
+ goto err;
+ }
+
+ /*
+ * If we forked a child, wait for it to exit. Cannot use waitpid()
+ * as all the delicious data would be reaped as part of the wait
+ */
+ if (tid && forking) {
+ int sig_received;
+ sigwait(&sigset, &sig_received);
+ }
+
+ if (tid) {
+ rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
+ cmd_type, &tid, sizeof(__u32));
+ PRINTF("Sent pid/tgid, retval %d\n", rc);
+ if (rc < 0) {
+ fprintf(stderr, "error sending tid/tgid cmd\n");
+ goto done;
+ }
+ }
+
+ if (containerset) {
+ cfd = open(containerpath, O_RDONLY);
+ if (cfd < 0) {
+ perror("error opening container file");
+ goto err;
+ }
+ rc = send_cmd(nl_sd, id, mypid, CGROUPSTATS_CMD_GET,
+ CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32));
+ if (rc < 0) {
+ perror("error sending cgroupstats command");
+ goto err;
+ }
+ }
+ if (!maskset && !tid && !containerset) {
+ usage();
+ goto err;
+ }
+
+ do {
+ rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
+ PRINTF("received %d bytes\n", rep_len);
+
+ if (rep_len < 0) {
+ fprintf(stderr, "nonfatal reply error: errno %d\n",
+ errno);
+ continue;
+ }
+ if (msg.n.nlmsg_type == NLMSG_ERROR ||
+ !NLMSG_OK((&msg.n), rep_len)) {
+ struct nlmsgerr *err = NLMSG_DATA(&msg);
+ fprintf(stderr, "fatal reply error, errno %d\n",
+ err->error);
+ goto done;
+ }
+
+ PRINTF("nlmsghdr size=%zu, nlmsg_len=%d, rep_len=%d\n",
+ sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
+
+
+ rep_len = GENLMSG_PAYLOAD(&msg.n);
+
+ na = (struct nlattr *) GENLMSG_DATA(&msg);
+ len = 0;
+ while (len < rep_len) {
+ len += NLA_ALIGN(na->nla_len);
+ switch (na->nla_type) {
+ case TASKSTATS_TYPE_AGGR_TGID:
+ /* Fall through */
+ case TASKSTATS_TYPE_AGGR_PID:
+ aggr_len = NLA_PAYLOAD(na->nla_len);
+ len2 = 0;
+ /* For nested attributes, na follows */
+ na = (struct nlattr *) NLA_DATA(na);
+ done = 0;
+ while (len2 < aggr_len) {
+ switch (na->nla_type) {
+ case TASKSTATS_TYPE_PID:
+ rtid = *(int *) NLA_DATA(na);
+ if (print_delays)
+ printf("PID\t%d\n", rtid);
+ break;
+ case TASKSTATS_TYPE_TGID:
+ rtid = *(int *) NLA_DATA(na);
+ if (print_delays)
+ printf("TGID\t%d\n", rtid);
+ break;
+ case TASKSTATS_TYPE_STATS:
+ count++;
+ if (print_delays)
+ print_delayacct((struct taskstats *) NLA_DATA(na));
+ if (print_io_accounting)
+ print_ioacct((struct taskstats *) NLA_DATA(na));
+ if (print_task_context_switch_counts)
+ task_context_switch_counts((struct taskstats *) NLA_DATA(na));
+ if (fd) {
+ if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
+ err(1,"write error\n");
+ }
+ }
+ if (!loop)
+ goto done;
+ break;
+ case TASKSTATS_TYPE_NULL:
+ break;
+ default:
+ fprintf(stderr, "Unknown nested"
+ " nla_type %d\n",
+ na->nla_type);
+ break;
+ }
+ len2 += NLA_ALIGN(na->nla_len);
+ na = (struct nlattr *)((char *)na +
+ NLA_ALIGN(na->nla_len));
+ }
+ break;
+
+ case CGROUPSTATS_TYPE_CGROUP_STATS:
+ print_cgroupstats(NLA_DATA(na));
+ break;
+ default:
+ fprintf(stderr, "Unknown nla_type %d\n",
+ na->nla_type);
+ case TASKSTATS_TYPE_NULL:
+ break;
+ }
+ na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
+ }
+ } while (loop);
+done:
+ if (maskset) {
+ rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
+ TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
+ &cpumask, strlen(cpumask) + 1);
+ printf("Sent deregister mask, retval %d\n", rc);
+ if (rc < 0)
+ err(rc, "error sending deregister cpumask\n");
+ }
+err:
+ close(nl_sd);
+ if (fd)
+ close(fd);
+ if (cfd)
+ close(cfd);
+ return 0;
+}
diff --git a/tools/arch/alpha/include/asm/barrier.h b/tools/arch/alpha/include/asm/barrier.h
new file mode 100644
index 000000000..da8d6457e
--- /dev/null
+++ b/tools/arch/alpha/include/asm/barrier.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_LINUX_ASM_ALPHA_BARRIER_H
+#define __TOOLS_LINUX_ASM_ALPHA_BARRIER_H
+
+#define mb() __asm__ __volatile__("mb": : :"memory")
+#define rmb() __asm__ __volatile__("mb": : :"memory")
+#define wmb() __asm__ __volatile__("wmb": : :"memory")
+
+#endif /* __TOOLS_LINUX_ASM_ALPHA_BARRIER_H */
diff --git a/tools/arch/alpha/include/uapi/asm/bitsperlong.h b/tools/arch/alpha/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..6c5bf7d03
--- /dev/null
+++ b/tools/arch/alpha/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_ALPHA_BITSPERLONG_H
+#define __ASM_ALPHA_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_ALPHA_BITSPERLONG_H */
diff --git a/tools/arch/alpha/include/uapi/asm/errno.h b/tools/arch/alpha/include/uapi/asm/errno.h
new file mode 100644
index 000000000..3d265f6ba
--- /dev/null
+++ b/tools/arch/alpha/include/uapi/asm/errno.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ALPHA_ERRNO_H
+#define _ALPHA_ERRNO_H
+
+#include <asm-generic/errno-base.h>
+
+#undef EAGAIN /* 11 in errno-base.h */
+
+#define EDEADLK 11 /* Resource deadlock would occur */
+
+#define EAGAIN 35 /* Try again */
+#define EWOULDBLOCK EAGAIN /* Operation would block */
+#define EINPROGRESS 36 /* Operation now in progress */
+#define EALREADY 37 /* Operation already in progress */
+#define ENOTSOCK 38 /* Socket operation on non-socket */
+#define EDESTADDRREQ 39 /* Destination address required */
+#define EMSGSIZE 40 /* Message too long */
+#define EPROTOTYPE 41 /* Protocol wrong type for socket */
+#define ENOPROTOOPT 42 /* Protocol not available */
+#define EPROTONOSUPPORT 43 /* Protocol not supported */
+#define ESOCKTNOSUPPORT 44 /* Socket type not supported */
+#define EOPNOTSUPP 45 /* Operation not supported on transport endpoint */
+#define EPFNOSUPPORT 46 /* Protocol family not supported */
+#define EAFNOSUPPORT 47 /* Address family not supported by protocol */
+#define EADDRINUSE 48 /* Address already in use */
+#define EADDRNOTAVAIL 49 /* Cannot assign requested address */
+#define ENETDOWN 50 /* Network is down */
+#define ENETUNREACH 51 /* Network is unreachable */
+#define ENETRESET 52 /* Network dropped connection because of reset */
+#define ECONNABORTED 53 /* Software caused connection abort */
+#define ECONNRESET 54 /* Connection reset by peer */
+#define ENOBUFS 55 /* No buffer space available */
+#define EISCONN 56 /* Transport endpoint is already connected */
+#define ENOTCONN 57 /* Transport endpoint is not connected */
+#define ESHUTDOWN 58 /* Cannot send after transport endpoint shutdown */
+#define ETOOMANYREFS 59 /* Too many references: cannot splice */
+#define ETIMEDOUT 60 /* Connection timed out */
+#define ECONNREFUSED 61 /* Connection refused */
+#define ELOOP 62 /* Too many symbolic links encountered */
+#define ENAMETOOLONG 63 /* File name too long */
+#define EHOSTDOWN 64 /* Host is down */
+#define EHOSTUNREACH 65 /* No route to host */
+#define ENOTEMPTY 66 /* Directory not empty */
+
+#define EUSERS 68 /* Too many users */
+#define EDQUOT 69 /* Quota exceeded */
+#define ESTALE 70 /* Stale file handle */
+#define EREMOTE 71 /* Object is remote */
+
+#define ENOLCK 77 /* No record locks available */
+#define ENOSYS 78 /* Function not implemented */
+
+#define ENOMSG 80 /* No message of desired type */
+#define EIDRM 81 /* Identifier removed */
+#define ENOSR 82 /* Out of streams resources */
+#define ETIME 83 /* Timer expired */
+#define EBADMSG 84 /* Not a data message */
+#define EPROTO 85 /* Protocol error */
+#define ENODATA 86 /* No data available */
+#define ENOSTR 87 /* Device not a stream */
+
+#define ENOPKG 92 /* Package not installed */
+
+#define EILSEQ 116 /* Illegal byte sequence */
+
+/* The following are just random noise.. */
+#define ECHRNG 88 /* Channel number out of range */
+#define EL2NSYNC 89 /* Level 2 not synchronized */
+#define EL3HLT 90 /* Level 3 halted */
+#define EL3RST 91 /* Level 3 reset */
+
+#define ELNRNG 93 /* Link number out of range */
+#define EUNATCH 94 /* Protocol driver not attached */
+#define ENOCSI 95 /* No CSI structure available */
+#define EL2HLT 96 /* Level 2 halted */
+#define EBADE 97 /* Invalid exchange */
+#define EBADR 98 /* Invalid request descriptor */
+#define EXFULL 99 /* Exchange full */
+#define ENOANO 100 /* No anode */
+#define EBADRQC 101 /* Invalid request code */
+#define EBADSLT 102 /* Invalid slot */
+
+#define EDEADLOCK EDEADLK
+
+#define EBFONT 104 /* Bad font file format */
+#define ENONET 105 /* Machine is not on the network */
+#define ENOLINK 106 /* Link has been severed */
+#define EADV 107 /* Advertise error */
+#define ESRMNT 108 /* Srmount error */
+#define ECOMM 109 /* Communication error on send */
+#define EMULTIHOP 110 /* Multihop attempted */
+#define EDOTDOT 111 /* RFS specific error */
+#define EOVERFLOW 112 /* Value too large for defined data type */
+#define ENOTUNIQ 113 /* Name not unique on network */
+#define EBADFD 114 /* File descriptor in bad state */
+#define EREMCHG 115 /* Remote address changed */
+
+#define EUCLEAN 117 /* Structure needs cleaning */
+#define ENOTNAM 118 /* Not a XENIX named type file */
+#define ENAVAIL 119 /* No XENIX semaphores available */
+#define EISNAM 120 /* Is a named type file */
+#define EREMOTEIO 121 /* Remote I/O error */
+
+#define ELIBACC 122 /* Can not access a needed shared library */
+#define ELIBBAD 123 /* Accessing a corrupted shared library */
+#define ELIBSCN 124 /* .lib section in a.out corrupted */
+#define ELIBMAX 125 /* Attempting to link in too many shared libraries */
+#define ELIBEXEC 126 /* Cannot exec a shared library directly */
+#define ERESTART 127 /* Interrupted system call should be restarted */
+#define ESTRPIPE 128 /* Streams pipe error */
+
+#define ENOMEDIUM 129 /* No medium found */
+#define EMEDIUMTYPE 130 /* Wrong medium type */
+#define ECANCELED 131 /* Operation Cancelled */
+#define ENOKEY 132 /* Required key not available */
+#define EKEYEXPIRED 133 /* Key has expired */
+#define EKEYREVOKED 134 /* Key has been revoked */
+#define EKEYREJECTED 135 /* Key was rejected by service */
+
+/* for robust mutexes */
+#define EOWNERDEAD 136 /* Owner died */
+#define ENOTRECOVERABLE 137 /* State not recoverable */
+
+#define ERFKILL 138 /* Operation not possible due to RF-kill */
+
+#define EHWPOISON 139 /* Memory page has hardware error */
+
+#endif
diff --git a/tools/arch/alpha/include/uapi/asm/mman.h b/tools/arch/alpha/include/uapi/asm/mman.h
new file mode 100644
index 000000000..c317d3e68
--- /dev/null
+++ b/tools/arch/alpha/include/uapi/asm/mman.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_ALPHA_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ALPHA_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP 17
+#define MADV_DOFORK 11
+#define MADV_DONTDUMP 16
+#define MADV_DONTFORK 10
+#define MADV_DONTNEED 6
+#define MADV_FREE 8
+#define MADV_HUGEPAGE 14
+#define MADV_MERGEABLE 12
+#define MADV_NOHUGEPAGE 15
+#define MADV_NORMAL 0
+#define MADV_RANDOM 1
+#define MADV_REMOVE 9
+#define MADV_SEQUENTIAL 2
+#define MADV_UNMERGEABLE 13
+#define MADV_WILLNEED 3
+#define MAP_ANONYMOUS 0x10
+#define MAP_DENYWRITE 0x02000
+#define MAP_EXECUTABLE 0x04000
+#define MAP_FILE 0
+#define MAP_FIXED 0x100
+#define MAP_GROWSDOWN 0x01000
+#define MAP_HUGETLB 0x100000
+#define MAP_LOCKED 0x08000
+#define MAP_NONBLOCK 0x40000
+#define MAP_NORESERVE 0x10000
+#define MAP_POPULATE 0x20000
+#define MAP_PRIVATE 0x02
+#define MAP_SHARED 0x01
+#define MAP_STACK 0x80000
+#define PROT_EXEC 0x4
+#define PROT_GROWSDOWN 0x01000000
+#define PROT_GROWSUP 0x02000000
+#define PROT_NONE 0x0
+#define PROT_READ 0x1
+#define PROT_SEM 0x8
+#define PROT_WRITE 0x2
+/* MADV_HWPOISON is undefined on alpha, fix it for perf */
+#define MADV_HWPOISON 100
+/* MADV_SOFT_OFFLINE is undefined on alpha, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on alpha, fix it for perf */
+#define MAP_32BIT 0
+/* MAP_UNINITIALIZED is undefined on alpha, fix it for perf */
+#define MAP_UNINITIALIZED 0
+#endif
diff --git a/tools/arch/arc/include/uapi/asm/mman.h b/tools/arch/arc/include/uapi/asm/mman.h
new file mode 100644
index 000000000..81f0f9bf0
--- /dev/null
+++ b/tools/arch/arc/include/uapi/asm/mman.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_ARC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ARC_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on arc, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/arm/include/asm/barrier.h b/tools/arch/arm/include/asm/barrier.h
new file mode 100644
index 000000000..005c618a0
--- /dev/null
+++ b/tools/arch/arm/include/asm/barrier.h
@@ -0,0 +1,12 @@
+#ifndef _TOOLS_LINUX_ASM_ARM_BARRIER_H
+#define _TOOLS_LINUX_ASM_ARM_BARRIER_H
+
+/*
+ * Use the __kuser_memory_barrier helper in the CPU helper page. See
+ * arch/arm/kernel/entry-armv.S in the kernel source for details.
+ */
+#define mb() ((void(*)(void))0xffff0fa0)()
+#define wmb() ((void(*)(void))0xffff0fa0)()
+#define rmb() ((void(*)(void))0xffff0fa0)()
+
+#endif /* _TOOLS_LINUX_ASM_ARM_BARRIER_H */
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
new file mode 100644
index 000000000..4602464eb
--- /dev/null
+++ b/tools/arch/arm/include/uapi/asm/kvm.h
@@ -0,0 +1,299 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#include <linux/types.h>
+#include <linux/psci.h>
+#include <asm/ptrace.h>
+
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
+
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+#define KVM_REG_SIZE(id) \
+ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+/* Valid for svc_regs, abt_regs, und_regs, irq_regs in struct kvm_regs */
+#define KVM_ARM_SVC_sp svc_regs[0]
+#define KVM_ARM_SVC_lr svc_regs[1]
+#define KVM_ARM_SVC_spsr svc_regs[2]
+#define KVM_ARM_ABT_sp abt_regs[0]
+#define KVM_ARM_ABT_lr abt_regs[1]
+#define KVM_ARM_ABT_spsr abt_regs[2]
+#define KVM_ARM_UND_sp und_regs[0]
+#define KVM_ARM_UND_lr und_regs[1]
+#define KVM_ARM_UND_spsr und_regs[2]
+#define KVM_ARM_IRQ_sp irq_regs[0]
+#define KVM_ARM_IRQ_lr irq_regs[1]
+#define KVM_ARM_IRQ_spsr irq_regs[2]
+
+/* Valid only for fiq_regs in struct kvm_regs */
+#define KVM_ARM_FIQ_r8 fiq_regs[0]
+#define KVM_ARM_FIQ_r9 fiq_regs[1]
+#define KVM_ARM_FIQ_r10 fiq_regs[2]
+#define KVM_ARM_FIQ_fp fiq_regs[3]
+#define KVM_ARM_FIQ_ip fiq_regs[4]
+#define KVM_ARM_FIQ_sp fiq_regs[5]
+#define KVM_ARM_FIQ_lr fiq_regs[6]
+#define KVM_ARM_FIQ_spsr fiq_regs[7]
+
+struct kvm_regs {
+ struct pt_regs usr_regs; /* R0_usr - R14_usr, PC, CPSR */
+ unsigned long svc_regs[3]; /* SP_svc, LR_svc, SPSR_svc */
+ unsigned long abt_regs[3]; /* SP_abt, LR_abt, SPSR_abt */
+ unsigned long und_regs[3]; /* SP_und, LR_und, SPSR_und */
+ unsigned long irq_regs[3]; /* SP_irq, LR_irq, SPSR_irq */
+ unsigned long fiq_regs[8]; /* R8_fiq - R14_fiq, SPSR_fiq */
+};
+
+/* Supported Processor Types */
+#define KVM_ARM_TARGET_CORTEX_A15 0
+#define KVM_ARM_TARGET_CORTEX_A7 1
+#define KVM_ARM_NUM_TARGETS 2
+
+/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
+#define KVM_ARM_DEVICE_TYPE_SHIFT 0
+#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_ID_SHIFT 16
+#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT)
+
+/* Supported device IDs */
+#define KVM_ARM_DEVICE_VGIC_V2 0
+
+/* Supported VGIC address types */
+#define KVM_VGIC_V2_ADDR_TYPE_DIST 0
+#define KVM_VGIC_V2_ADDR_TYPE_CPU 1
+
+#define KVM_VGIC_V2_DIST_SIZE 0x1000
+#define KVM_VGIC_V2_CPU_SIZE 0x2000
+
+/* Supported VGICv3 address types */
+#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
+#define KVM_VGIC_ITS_ADDR_TYPE 4
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5
+
+#define KVM_VGIC_V3_DIST_SIZE SZ_64K
+#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K)
+
+#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */
+
+struct kvm_vcpu_init {
+ __u32 target;
+ __u32 features[7];
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+struct kvm_guest_debug_arch {
+};
+
+struct kvm_debug_exit_arch {
+};
+
+struct kvm_sync_regs {
+ /* Used with KVM_CAP_ARM_USER_IRQ */
+ __u64 device_irq_level;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+ struct {
+ __u8 serror_pending;
+ __u8 serror_has_esr;
+ /* Align it to 8 bytes */
+ __u8 pad[6];
+ __u64 serror_esr;
+ } exception;
+ __u32 reserved[12];
+};
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
+#define KVM_REG_ARM_COPROC_SHIFT 16
+#define KVM_REG_ARM_32_OPC2_MASK 0x0000000000000007
+#define KVM_REG_ARM_32_OPC2_SHIFT 0
+#define KVM_REG_ARM_OPC1_MASK 0x0000000000000078
+#define KVM_REG_ARM_OPC1_SHIFT 3
+#define KVM_REG_ARM_CRM_MASK 0x0000000000000780
+#define KVM_REG_ARM_CRM_SHIFT 7
+#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800
+#define KVM_REG_ARM_32_CRN_SHIFT 11
+/*
+ * For KVM currently all guest registers are nonsecure, but we reserve a bit
+ * in the encoding to distinguish secure from nonsecure for AArch32 system
+ * registers that are banked by security. This is 1 for the secure banked
+ * register, and 0 for the nonsecure banked register or if the register is
+ * not banked by security.
+ */
+#define KVM_REG_ARM_SECURE_MASK 0x0000000010000000
+#define KVM_REG_ARM_SECURE_SHIFT 28
+
+#define ARM_CP15_REG_SHIFT_MASK(x,n) \
+ (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
+
+#define __ARM_CP15_REG(op1,crn,crm,op2) \
+ (KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
+ ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
+ ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
+ ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
+ ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
+
+#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
+
+#define __ARM_CP15_REG64(op1,crm) \
+ (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
+#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
+
+/* PL1 Physical Timer Registers */
+#define KVM_REG_ARM_PTIMER_CTL ARM_CP15_REG32(0, 14, 2, 1)
+#define KVM_REG_ARM_PTIMER_CNT ARM_CP15_REG64(0, 14)
+#define KVM_REG_ARM_PTIMER_CVAL ARM_CP15_REG64(2, 14)
+
+/* Virtual Timer Registers */
+#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14)
+#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14)
+
+/* Normal registers are mapped as coprocessor 16. */
+#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4)
+
+/* Some registers need more space to represent values. */
+#define KVM_REG_ARM_DEMUX (0x0011 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_DEMUX_ID_MASK 0x000000000000FF00
+#define KVM_REG_ARM_DEMUX_ID_SHIFT 8
+#define KVM_REG_ARM_DEMUX_ID_CCSIDR (0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT)
+#define KVM_REG_ARM_DEMUX_VAL_MASK 0x00000000000000FF
+#define KVM_REG_ARM_DEMUX_VAL_SHIFT 0
+
+/* VFP registers: we could overload CP10 like ARM does, but that's ugly. */
+#define KVM_REG_ARM_VFP (0x0012 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_VFP_MASK 0x000000000000FFFF
+#define KVM_REG_ARM_VFP_BASE_REG 0x0
+#define KVM_REG_ARM_VFP_FPSID 0x1000
+#define KVM_REG_ARM_VFP_FPSCR 0x1001
+#define KVM_REG_ARM_VFP_MVFR1 0x1006
+#define KVM_REG_ARM_VFP_MVFR0 0x1007
+#define KVM_REG_ARM_VFP_FPEXC 0x1008
+#define KVM_REG_ARM_VFP_FPINST 0x1009
+#define KVM_REG_ARM_VFP_FPINST2 0x100A
+
+/* KVM-as-firmware specific pseudo-registers */
+#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \
+ KVM_REG_ARM_FW | ((r) & 0xffff))
+#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
+
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
+#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
+#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+ (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
+#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
+#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
+#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
+
+/* Device Control API on vcpu fd */
+#define KVM_ARM_VCPU_PMU_V3_CTRL 0
+#define KVM_ARM_VCPU_PMU_V3_IRQ 0
+#define KVM_ARM_VCPU_PMU_V3_INIT 1
+#define KVM_ARM_VCPU_TIMER_CTRL 1
+#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
+#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
+
+#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
+#define KVM_DEV_ARM_ITS_SAVE_TABLES 1
+#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
+#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3
+#define KVM_DEV_ARM_ITS_CTRL_RESET 4
+
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_TYPE_SHIFT 24
+#define KVM_ARM_IRQ_TYPE_MASK 0xff
+#define KVM_ARM_IRQ_VCPU_SHIFT 16
+#define KVM_ARM_IRQ_VCPU_MASK 0xff
+#define KVM_ARM_IRQ_NUM_SHIFT 0
+#define KVM_ARM_IRQ_NUM_MASK 0xffff
+
+/* irq_type field */
+#define KVM_ARM_IRQ_TYPE_CPU 0
+#define KVM_ARM_IRQ_TYPE_SPI 1
+#define KVM_ARM_IRQ_TYPE_PPI 2
+
+/* out-of-kernel GIC cpu interrupt injection irq_number field */
+#define KVM_ARM_IRQ_CPU_IRQ 0
+#define KVM_ARM_IRQ_CPU_FIQ 1
+
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
+#define KVM_ARM_IRQ_GIC_MAX 127
+#endif
+
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS 1
+
+/* PSCI interface */
+#define KVM_PSCI_FN_BASE 0x95c1ba5e
+#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n))
+
+#define KVM_PSCI_FN_CPU_SUSPEND KVM_PSCI_FN(0)
+#define KVM_PSCI_FN_CPU_OFF KVM_PSCI_FN(1)
+#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
+#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3)
+
+#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
+
+#endif /* __ARM_KVM_H__ */
diff --git a/tools/arch/arm/include/uapi/asm/mman.h b/tools/arch/arm/include/uapi/asm/mman.h
new file mode 100644
index 000000000..a6d46321e
--- /dev/null
+++ b/tools/arch/arm/include/uapi/asm/mman.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_ARM_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ARM_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on arm, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/arm/include/uapi/asm/perf_regs.h b/tools/arch/arm/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000..a3c046174
--- /dev/null
+++ b/tools/arch/arm/include/uapi/asm/perf_regs.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_ARM_PERF_REGS_H
+#define _ASM_ARM_PERF_REGS_H
+
+enum perf_event_arm_regs {
+ PERF_REG_ARM_R0,
+ PERF_REG_ARM_R1,
+ PERF_REG_ARM_R2,
+ PERF_REG_ARM_R3,
+ PERF_REG_ARM_R4,
+ PERF_REG_ARM_R5,
+ PERF_REG_ARM_R6,
+ PERF_REG_ARM_R7,
+ PERF_REG_ARM_R8,
+ PERF_REG_ARM_R9,
+ PERF_REG_ARM_R10,
+ PERF_REG_ARM_FP,
+ PERF_REG_ARM_IP,
+ PERF_REG_ARM_SP,
+ PERF_REG_ARM_LR,
+ PERF_REG_ARM_PC,
+ PERF_REG_ARM_MAX,
+};
+#endif /* _ASM_ARM_PERF_REGS_H */
diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h
new file mode 100644
index 000000000..40bde6b23
--- /dev/null
+++ b/tools/arch/arm64/include/asm/barrier.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_ASM_AARCH64_BARRIER_H
+#define _TOOLS_LINUX_ASM_AARCH64_BARRIER_H
+
+/*
+ * From tools/perf/perf-sys.h, last modified in:
+ * f428ebd184c82a7914b2aa7e9f868918aaf7ea78 perf tools: Fix AAAAARGH64 memory barriers
+ *
+ * XXX: arch/arm64/include/asm/barrier.h in the kernel sources use dsb, is this
+ * a case like for arm32 where we do things differently in userspace?
+ */
+
+#define mb() asm volatile("dmb ish" ::: "memory")
+#define wmb() asm volatile("dmb ishst" ::: "memory")
+#define rmb() asm volatile("dmb ishld" ::: "memory")
+
+#endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */
diff --git a/tools/arch/arm64/include/uapi/asm/bitsperlong.h b/tools/arch/arm64/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..485d60bee
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_BITSPERLONG_H
+#define __ASM_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_BITSPERLONG_H */
diff --git a/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h b/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000..b551b7416
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include <asm/ptrace.h>
+
+typedef struct user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
new file mode 100644
index 000000000..97c3478ee
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -0,0 +1,312 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012,2013 - ARM Ltd
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * Derived from arch/arm/include/uapi/asm/kvm.h:
+ * Copyright (C) 2012 - Virtual Open Systems and Columbia University
+ * Author: Christoffer Dall <c.dall@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ARM_KVM_H__
+#define __ARM_KVM_H__
+
+#define KVM_SPSR_EL1 0
+#define KVM_SPSR_SVC KVM_SPSR_EL1
+#define KVM_SPSR_ABT 1
+#define KVM_SPSR_UND 2
+#define KVM_SPSR_IRQ 3
+#define KVM_SPSR_FIQ 4
+#define KVM_NR_SPSR 5
+
+#ifndef __ASSEMBLY__
+#include <linux/psci.h>
+#include <linux/types.h>
+#include <asm/ptrace.h>
+
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_READONLY_MEM
+#define __KVM_HAVE_VCPU_EVENTS
+
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+#define KVM_REG_SIZE(id) \
+ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+struct kvm_regs {
+ struct user_pt_regs regs; /* sp = sp_el0 */
+
+ __u64 sp_el1;
+ __u64 elr_el1;
+
+ __u64 spsr[KVM_NR_SPSR];
+
+ struct user_fpsimd_state fp_regs;
+};
+
+/*
+ * Supported CPU Targets - Adding a new target type is not recommended,
+ * unless there are some special registers not supported by the
+ * genericv8 syreg table.
+ */
+#define KVM_ARM_TARGET_AEM_V8 0
+#define KVM_ARM_TARGET_FOUNDATION_V8 1
+#define KVM_ARM_TARGET_CORTEX_A57 2
+#define KVM_ARM_TARGET_XGENE_POTENZA 3
+#define KVM_ARM_TARGET_CORTEX_A53 4
+/* Generic ARM v8 target */
+#define KVM_ARM_TARGET_GENERIC_V8 5
+
+#define KVM_ARM_NUM_TARGETS 6
+
+/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
+#define KVM_ARM_DEVICE_TYPE_SHIFT 0
+#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
+#define KVM_ARM_DEVICE_ID_SHIFT 16
+#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT)
+
+/* Supported device IDs */
+#define KVM_ARM_DEVICE_VGIC_V2 0
+
+/* Supported VGIC address types */
+#define KVM_VGIC_V2_ADDR_TYPE_DIST 0
+#define KVM_VGIC_V2_ADDR_TYPE_CPU 1
+
+#define KVM_VGIC_V2_DIST_SIZE 0x1000
+#define KVM_VGIC_V2_CPU_SIZE 0x2000
+
+/* Supported VGICv3 address types */
+#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
+#define KVM_VGIC_ITS_ADDR_TYPE 4
+#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5
+
+#define KVM_VGIC_V3_DIST_SIZE SZ_64K
+#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K)
+#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K)
+
+#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
+#define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */
+#define KVM_ARM_VCPU_PSCI_0_2 2 /* CPU uses PSCI v0.2 */
+#define KVM_ARM_VCPU_PMU_V3 3 /* Support guest PMUv3 */
+
+struct kvm_vcpu_init {
+ __u32 target;
+ __u32 features[7];
+};
+
+struct kvm_sregs {
+};
+
+struct kvm_fpu {
+};
+
+/*
+ * See v8 ARM ARM D7.3: Debug Registers
+ *
+ * The architectural limit is 16 debug registers of each type although
+ * in practice there are usually less (see ID_AA64DFR0_EL1).
+ *
+ * Although the control registers are architecturally defined as 32
+ * bits wide we use a 64 bit structure here to keep parity with
+ * KVM_GET/SET_ONE_REG behaviour which treats all system registers as
+ * 64 bit values. It also allows for the possibility of the
+ * architecture expanding the control registers without having to
+ * change the userspace ABI.
+ */
+#define KVM_ARM_MAX_DBG_REGS 16
+struct kvm_guest_debug_arch {
+ __u64 dbg_bcr[KVM_ARM_MAX_DBG_REGS];
+ __u64 dbg_bvr[KVM_ARM_MAX_DBG_REGS];
+ __u64 dbg_wcr[KVM_ARM_MAX_DBG_REGS];
+ __u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS];
+};
+
+struct kvm_debug_exit_arch {
+ __u32 hsr;
+ __u64 far; /* used for watchpoints */
+};
+
+/*
+ * Architecture specific defines for kvm_guest_debug->control
+ */
+
+#define KVM_GUESTDBG_USE_SW_BP (1 << 16)
+#define KVM_GUESTDBG_USE_HW (1 << 17)
+
+struct kvm_sync_regs {
+ /* Used with KVM_CAP_ARM_USER_IRQ */
+ __u64 device_irq_level;
+};
+
+struct kvm_arch_memory_slot {
+};
+
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+ struct {
+ __u8 serror_pending;
+ __u8 serror_has_esr;
+ /* Align it to 8 bytes */
+ __u8 pad[6];
+ __u64 serror_esr;
+ } exception;
+ __u32 reserved[12];
+};
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
+#define KVM_REG_ARM_COPROC_SHIFT 16
+
+/* Normal registers are mapped as coprocessor 16. */
+#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / sizeof(__u32))
+
+/* Some registers need more space to represent values. */
+#define KVM_REG_ARM_DEMUX (0x0011 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_DEMUX_ID_MASK 0x000000000000FF00
+#define KVM_REG_ARM_DEMUX_ID_SHIFT 8
+#define KVM_REG_ARM_DEMUX_ID_CCSIDR (0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT)
+#define KVM_REG_ARM_DEMUX_VAL_MASK 0x00000000000000FF
+#define KVM_REG_ARM_DEMUX_VAL_SHIFT 0
+
+/* AArch64 system registers */
+#define KVM_REG_ARM64_SYSREG (0x0013 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM64_SYSREG_OP0_MASK 0x000000000000c000
+#define KVM_REG_ARM64_SYSREG_OP0_SHIFT 14
+#define KVM_REG_ARM64_SYSREG_OP1_MASK 0x0000000000003800
+#define KVM_REG_ARM64_SYSREG_OP1_SHIFT 11
+#define KVM_REG_ARM64_SYSREG_CRN_MASK 0x0000000000000780
+#define KVM_REG_ARM64_SYSREG_CRN_SHIFT 7
+#define KVM_REG_ARM64_SYSREG_CRM_MASK 0x0000000000000078
+#define KVM_REG_ARM64_SYSREG_CRM_SHIFT 3
+#define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007
+#define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0
+
+#define ARM64_SYS_REG_SHIFT_MASK(x,n) \
+ (((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \
+ KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
+
+#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
+ (KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
+ ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+ ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+ ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+ ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+ ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
+
+/* Physical Timer EL0 Registers */
+#define KVM_REG_ARM_PTIMER_CTL ARM64_SYS_REG(3, 3, 14, 2, 1)
+#define KVM_REG_ARM_PTIMER_CVAL ARM64_SYS_REG(3, 3, 14, 2, 2)
+#define KVM_REG_ARM_PTIMER_CNT ARM64_SYS_REG(3, 3, 14, 0, 1)
+
+/* EL0 Virtual Timer Registers */
+#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2)
+#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2)
+
+/* KVM-as-firmware specific pseudo-registers */
+#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+ KVM_REG_ARM_FW | ((r) & 0xffff))
+#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
+
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
+#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
+#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
+#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
+ (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
+#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
+#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
+#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
+#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
+#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
+#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
+#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
+#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
+#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
+ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
+#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
+#define VGIC_LEVEL_INFO_LINE_LEVEL 0
+
+#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
+#define KVM_DEV_ARM_ITS_SAVE_TABLES 1
+#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
+#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3
+#define KVM_DEV_ARM_ITS_CTRL_RESET 4
+
+/* Device Control API on vcpu fd */
+#define KVM_ARM_VCPU_PMU_V3_CTRL 0
+#define KVM_ARM_VCPU_PMU_V3_IRQ 0
+#define KVM_ARM_VCPU_PMU_V3_INIT 1
+#define KVM_ARM_VCPU_TIMER_CTRL 1
+#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
+#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
+
+/* KVM_IRQ_LINE irq field index values */
+#define KVM_ARM_IRQ_TYPE_SHIFT 24
+#define KVM_ARM_IRQ_TYPE_MASK 0xff
+#define KVM_ARM_IRQ_VCPU_SHIFT 16
+#define KVM_ARM_IRQ_VCPU_MASK 0xff
+#define KVM_ARM_IRQ_NUM_SHIFT 0
+#define KVM_ARM_IRQ_NUM_MASK 0xffff
+
+/* irq_type field */
+#define KVM_ARM_IRQ_TYPE_CPU 0
+#define KVM_ARM_IRQ_TYPE_SPI 1
+#define KVM_ARM_IRQ_TYPE_PPI 2
+
+/* out-of-kernel GIC cpu interrupt injection irq_number field */
+#define KVM_ARM_IRQ_CPU_IRQ 0
+#define KVM_ARM_IRQ_CPU_FIQ 1
+
+/*
+ * This used to hold the highest supported SPI, but it is now obsolete
+ * and only here to provide source code level compatibility with older
+ * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
+ */
+#ifndef __KERNEL__
+#define KVM_ARM_IRQ_GIC_MAX 127
+#endif
+
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS 1
+
+/* PSCI interface */
+#define KVM_PSCI_FN_BASE 0x95c1ba5e
+#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n))
+
+#define KVM_PSCI_FN_CPU_SUSPEND KVM_PSCI_FN(0)
+#define KVM_PSCI_FN_CPU_OFF KVM_PSCI_FN(1)
+#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
+#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3)
+
+#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS
+#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED
+#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
+#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
+
+#endif
+
+#endif /* __ARM_KVM_H__ */
diff --git a/tools/arch/arm64/include/uapi/asm/mman.h b/tools/arch/arm64/include/uapi/asm/mman.h
new file mode 100644
index 000000000..2ee288e44
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/mman.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_ARM64_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_ARM64_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on arm64, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/arm64/include/uapi/asm/perf_regs.h b/tools/arch/arm64/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000..d54daafa8
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/perf_regs.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_ARM64_PERF_REGS_H
+#define _ASM_ARM64_PERF_REGS_H
+
+enum perf_event_arm_regs {
+ PERF_REG_ARM64_X0,
+ PERF_REG_ARM64_X1,
+ PERF_REG_ARM64_X2,
+ PERF_REG_ARM64_X3,
+ PERF_REG_ARM64_X4,
+ PERF_REG_ARM64_X5,
+ PERF_REG_ARM64_X6,
+ PERF_REG_ARM64_X7,
+ PERF_REG_ARM64_X8,
+ PERF_REG_ARM64_X9,
+ PERF_REG_ARM64_X10,
+ PERF_REG_ARM64_X11,
+ PERF_REG_ARM64_X12,
+ PERF_REG_ARM64_X13,
+ PERF_REG_ARM64_X14,
+ PERF_REG_ARM64_X15,
+ PERF_REG_ARM64_X16,
+ PERF_REG_ARM64_X17,
+ PERF_REG_ARM64_X18,
+ PERF_REG_ARM64_X19,
+ PERF_REG_ARM64_X20,
+ PERF_REG_ARM64_X21,
+ PERF_REG_ARM64_X22,
+ PERF_REG_ARM64_X23,
+ PERF_REG_ARM64_X24,
+ PERF_REG_ARM64_X25,
+ PERF_REG_ARM64_X26,
+ PERF_REG_ARM64_X27,
+ PERF_REG_ARM64_X28,
+ PERF_REG_ARM64_X29,
+ PERF_REG_ARM64_LR,
+ PERF_REG_ARM64_SP,
+ PERF_REG_ARM64_PC,
+ PERF_REG_ARM64_MAX,
+};
+#endif /* _ASM_ARM64_PERF_REGS_H */
diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h
new file mode 100644
index 000000000..5072cbd15
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/unistd.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define __ARCH_WANT_RENAMEAT
+
+#include <asm-generic/unistd.h>
diff --git a/tools/arch/h8300/include/asm/bitsperlong.h b/tools/arch/h8300/include/asm/bitsperlong.h
new file mode 100644
index 000000000..fa1508337
--- /dev/null
+++ b/tools/arch/h8300/include/asm/bitsperlong.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_H8300_BITS_PER_LONG
+#define __ASM_H8300_BITS_PER_LONG
+
+#include <asm-generic/bitsperlong.h>
+
+#if !defined(__ASSEMBLY__)
+/* h8300-unknown-linux required long */
+#define __kernel_size_t __kernel_size_t
+typedef unsigned long __kernel_size_t;
+typedef long __kernel_ssize_t;
+typedef long __kernel_ptrdiff_t;
+#endif
+
+#endif /* __ASM_H8300_BITS_PER_LONG */
diff --git a/tools/arch/h8300/include/uapi/asm/mman.h b/tools/arch/h8300/include/uapi/asm/mman.h
new file mode 100644
index 000000000..be7bbe052
--- /dev/null
+++ b/tools/arch/h8300/include/uapi/asm/mman.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_H8300_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_H8300_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on h8300, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/hexagon/include/uapi/asm/bitsperlong.h b/tools/arch/hexagon/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..5adca0d26
--- /dev/null
+++ b/tools/arch/hexagon/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+#ifndef __ASM_HEXAGON_BITSPERLONG_H
+#define __ASM_HEXAGON_BITSPERLONG_H
+
+#define __BITS_PER_LONG 32
+
+#include <asm-generic/bitsperlong.h>
+
+#endif
diff --git a/tools/arch/hexagon/include/uapi/asm/mman.h b/tools/arch/hexagon/include/uapi/asm/mman.h
new file mode 100644
index 000000000..cd59ba932
--- /dev/null
+++ b/tools/arch/hexagon/include/uapi/asm/mman.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_HEXAGON_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_HEXAGON_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on hexagon, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h
new file mode 100644
index 000000000..90f8bbd9a
--- /dev/null
+++ b/tools/arch/ia64/include/asm/barrier.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copied from the kernel sources to tools/:
+ *
+ * Memory barrier definitions. This is based on information published
+ * in the Processor Abstraction Layer and the System Abstraction Layer
+ * manual.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+#ifndef _TOOLS_LINUX_ASM_IA64_BARRIER_H
+#define _TOOLS_LINUX_ASM_IA64_BARRIER_H
+
+#include <linux/compiler.h>
+
+/*
+ * Macros to force memory ordering. In these descriptions, "previous"
+ * and "subsequent" refer to program order; "visible" means that all
+ * architecturally visible effects of a memory access have occurred
+ * (at a minimum, this means the memory has been read or written).
+ *
+ * wmb(): Guarantees that all preceding stores to memory-
+ * like regions are visible before any subsequent
+ * stores and that all following stores will be
+ * visible only after all previous stores.
+ * rmb(): Like wmb(), but for reads.
+ * mb(): wmb()/rmb() combo, i.e., all previous memory
+ * accesses are visible before all subsequent
+ * accesses and vice versa. This is also known as
+ * a "fence."
+ *
+ * Note: "mb()" and its variants cannot be used as a fence to order
+ * accesses to memory mapped I/O registers. For that, mf.a needs to
+ * be used. However, we don't want to always use mf.a because (a)
+ * it's (presumably) much slower than mf and (b) mf.a is supported for
+ * sequential memory pages only.
+ */
+
+#define mb() ia64_mf()
+#define rmb() mb()
+#define wmb() mb()
+
+#endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */
diff --git a/tools/arch/ia64/include/uapi/asm/bitsperlong.h b/tools/arch/ia64/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..1146d5556
--- /dev/null
+++ b/tools/arch/ia64/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_IA64_BITSPERLONG_H
+#define __ASM_IA64_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_IA64_BITSPERLONG_H */
diff --git a/tools/arch/ia64/include/uapi/asm/mman.h b/tools/arch/ia64/include/uapi/asm/mman.h
new file mode 100644
index 000000000..2a19bb1db
--- /dev/null
+++ b/tools/arch/ia64/include/uapi/asm/mman.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on ia64, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/microblaze/include/uapi/asm/bitsperlong.h b/tools/arch/microblaze/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..76da34b10
--- /dev/null
+++ b/tools/arch/microblaze/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#include <asm-generic/bitsperlong.h>
diff --git a/tools/arch/microblaze/include/uapi/asm/mman.h b/tools/arch/microblaze/include/uapi/asm/mman.h
new file mode 100644
index 000000000..f3f2103fd
--- /dev/null
+++ b/tools/arch/microblaze/include/uapi/asm/mman.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_MICROBLAZE_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_MICROBLAZE_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on microblaze, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/mips/include/asm/barrier.h b/tools/arch/mips/include/asm/barrier.h
new file mode 100644
index 000000000..0d1191523
--- /dev/null
+++ b/tools/arch/mips/include/asm/barrier.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_ASM_MIPS_BARRIER_H
+#define _TOOLS_LINUX_ASM_MIPS_BARRIER_H
+/*
+ * FIXME: This came from tools/perf/perf-sys.h, where it was first introduced
+ * in c1e028ef40b8d6943b767028ba17d4f2ba020edb, more work needed to make it
+ * more closely follow the Linux kernel arch/mips/include/asm/barrier.h file.
+ * Probably when we continue work on tools/ Kconfig support to have all the
+ * CONFIG_ needed for properly doing that.
+ */
+#define mb() asm volatile( \
+ ".set mips2\n\t" \
+ "sync\n\t" \
+ ".set mips0" \
+ : /* no output */ \
+ : /* no input */ \
+ : "memory")
+#define wmb() mb()
+#define rmb() mb()
+
+#endif /* _TOOLS_LINUX_ASM_MIPS_BARRIER_H */
diff --git a/tools/arch/mips/include/asm/errno.h b/tools/arch/mips/include/asm/errno.h
new file mode 100644
index 000000000..21d91cdfe
--- /dev/null
+++ b/tools/arch/mips/include/asm/errno.h
@@ -0,0 +1,17 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995, 1999, 2001, 2002 by Ralf Baechle
+ */
+#ifndef _ASM_ERRNO_H
+#define _ASM_ERRNO_H
+
+#include <uapi/asm/errno.h>
+
+
+/* The biggest error number defined here or in <linux/errno.h>. */
+#define EMAXERRNO 1133
+
+#endif /* _ASM_ERRNO_H */
diff --git a/tools/arch/mips/include/uapi/asm/bitsperlong.h b/tools/arch/mips/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..7268380d8
--- /dev/null
+++ b/tools/arch/mips/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_MIPS_BITSPERLONG_H
+#define __ASM_MIPS_BITSPERLONG_H
+
+#define __BITS_PER_LONG _MIPS_SZLONG
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_MIPS_BITSPERLONG_H */
diff --git a/tools/arch/mips/include/uapi/asm/errno.h b/tools/arch/mips/include/uapi/asm/errno.h
new file mode 100644
index 000000000..2fb714e2d
--- /dev/null
+++ b/tools/arch/mips/include/uapi/asm/errno.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1995, 1999, 2001, 2002 by Ralf Baechle
+ */
+#ifndef _UAPI_ASM_ERRNO_H
+#define _UAPI_ASM_ERRNO_H
+
+/*
+ * These error numbers are intended to be MIPS ABI compatible
+ */
+
+#include <asm-generic/errno-base.h>
+
+#define ENOMSG 35 /* No message of desired type */
+#define EIDRM 36 /* Identifier removed */
+#define ECHRNG 37 /* Channel number out of range */
+#define EL2NSYNC 38 /* Level 2 not synchronized */
+#define EL3HLT 39 /* Level 3 halted */
+#define EL3RST 40 /* Level 3 reset */
+#define ELNRNG 41 /* Link number out of range */
+#define EUNATCH 42 /* Protocol driver not attached */
+#define ENOCSI 43 /* No CSI structure available */
+#define EL2HLT 44 /* Level 2 halted */
+#define EDEADLK 45 /* Resource deadlock would occur */
+#define ENOLCK 46 /* No record locks available */
+#define EBADE 50 /* Invalid exchange */
+#define EBADR 51 /* Invalid request descriptor */
+#define EXFULL 52 /* Exchange full */
+#define ENOANO 53 /* No anode */
+#define EBADRQC 54 /* Invalid request code */
+#define EBADSLT 55 /* Invalid slot */
+#define EDEADLOCK 56 /* File locking deadlock error */
+#define EBFONT 59 /* Bad font file format */
+#define ENOSTR 60 /* Device not a stream */
+#define ENODATA 61 /* No data available */
+#define ETIME 62 /* Timer expired */
+#define ENOSR 63 /* Out of streams resources */
+#define ENONET 64 /* Machine is not on the network */
+#define ENOPKG 65 /* Package not installed */
+#define EREMOTE 66 /* Object is remote */
+#define ENOLINK 67 /* Link has been severed */
+#define EADV 68 /* Advertise error */
+#define ESRMNT 69 /* Srmount error */
+#define ECOMM 70 /* Communication error on send */
+#define EPROTO 71 /* Protocol error */
+#define EDOTDOT 73 /* RFS specific error */
+#define EMULTIHOP 74 /* Multihop attempted */
+#define EBADMSG 77 /* Not a data message */
+#define ENAMETOOLONG 78 /* File name too long */
+#define EOVERFLOW 79 /* Value too large for defined data type */
+#define ENOTUNIQ 80 /* Name not unique on network */
+#define EBADFD 81 /* File descriptor in bad state */
+#define EREMCHG 82 /* Remote address changed */
+#define ELIBACC 83 /* Can not access a needed shared library */
+#define ELIBBAD 84 /* Accessing a corrupted shared library */
+#define ELIBSCN 85 /* .lib section in a.out corrupted */
+#define ELIBMAX 86 /* Attempting to link in too many shared libraries */
+#define ELIBEXEC 87 /* Cannot exec a shared library directly */
+#define EILSEQ 88 /* Illegal byte sequence */
+#define ENOSYS 89 /* Function not implemented */
+#define ELOOP 90 /* Too many symbolic links encountered */
+#define ERESTART 91 /* Interrupted system call should be restarted */
+#define ESTRPIPE 92 /* Streams pipe error */
+#define ENOTEMPTY 93 /* Directory not empty */
+#define EUSERS 94 /* Too many users */
+#define ENOTSOCK 95 /* Socket operation on non-socket */
+#define EDESTADDRREQ 96 /* Destination address required */
+#define EMSGSIZE 97 /* Message too long */
+#define EPROTOTYPE 98 /* Protocol wrong type for socket */
+#define ENOPROTOOPT 99 /* Protocol not available */
+#define EPROTONOSUPPORT 120 /* Protocol not supported */
+#define ESOCKTNOSUPPORT 121 /* Socket type not supported */
+#define EOPNOTSUPP 122 /* Operation not supported on transport endpoint */
+#define EPFNOSUPPORT 123 /* Protocol family not supported */
+#define EAFNOSUPPORT 124 /* Address family not supported by protocol */
+#define EADDRINUSE 125 /* Address already in use */
+#define EADDRNOTAVAIL 126 /* Cannot assign requested address */
+#define ENETDOWN 127 /* Network is down */
+#define ENETUNREACH 128 /* Network is unreachable */
+#define ENETRESET 129 /* Network dropped connection because of reset */
+#define ECONNABORTED 130 /* Software caused connection abort */
+#define ECONNRESET 131 /* Connection reset by peer */
+#define ENOBUFS 132 /* No buffer space available */
+#define EISCONN 133 /* Transport endpoint is already connected */
+#define ENOTCONN 134 /* Transport endpoint is not connected */
+#define EUCLEAN 135 /* Structure needs cleaning */
+#define ENOTNAM 137 /* Not a XENIX named type file */
+#define ENAVAIL 138 /* No XENIX semaphores available */
+#define EISNAM 139 /* Is a named type file */
+#define EREMOTEIO 140 /* Remote I/O error */
+#define EINIT 141 /* Reserved */
+#define EREMDEV 142 /* Error 142 */
+#define ESHUTDOWN 143 /* Cannot send after transport endpoint shutdown */
+#define ETOOMANYREFS 144 /* Too many references: cannot splice */
+#define ETIMEDOUT 145 /* Connection timed out */
+#define ECONNREFUSED 146 /* Connection refused */
+#define EHOSTDOWN 147 /* Host is down */
+#define EHOSTUNREACH 148 /* No route to host */
+#define EWOULDBLOCK EAGAIN /* Operation would block */
+#define EALREADY 149 /* Operation already in progress */
+#define EINPROGRESS 150 /* Operation now in progress */
+#define ESTALE 151 /* Stale file handle */
+#define ECANCELED 158 /* AIO operation canceled */
+
+/*
+ * These error are Linux extensions.
+ */
+#define ENOMEDIUM 159 /* No medium found */
+#define EMEDIUMTYPE 160 /* Wrong medium type */
+#define ENOKEY 161 /* Required key not available */
+#define EKEYEXPIRED 162 /* Key has expired */
+#define EKEYREVOKED 163 /* Key has been revoked */
+#define EKEYREJECTED 164 /* Key was rejected by service */
+
+/* for robust mutexes */
+#define EOWNERDEAD 165 /* Owner died */
+#define ENOTRECOVERABLE 166 /* State not recoverable */
+
+#define ERFKILL 167 /* Operation not possible due to RF-kill */
+
+#define EHWPOISON 168 /* Memory page has hardware error */
+
+#define EDQUOT 1133 /* Quota exceeded */
+
+
+#endif /* _UAPI_ASM_ERRNO_H */
diff --git a/tools/arch/mips/include/uapi/asm/kvm.h b/tools/arch/mips/include/uapi/asm/kvm.h
new file mode 100644
index 000000000..d2714cc1c
--- /dev/null
+++ b/tools/arch/mips/include/uapi/asm/kvm.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
+ * Copyright (C) 2013 Cavium, Inc.
+ * Authors: Sanjay Lal <sanjayl@kymasys.com>
+ */
+
+#ifndef __LINUX_KVM_MIPS_H
+#define __LINUX_KVM_MIPS_H
+
+#include <linux/types.h>
+
+/*
+ * KVM MIPS specific structures and definitions.
+ *
+ * Some parts derived from the x86 version of this file.
+ */
+
+/*
+ * for KVM_GET_REGS and KVM_SET_REGS
+ *
+ * If Config[AT] is zero (32-bit CPU), the register contents are
+ * stored in the lower 32-bits of the struct kvm_regs fields and sign
+ * extended to 64-bits.
+ */
+struct kvm_regs {
+ /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
+ __u64 gpr[32];
+ __u64 hi;
+ __u64 lo;
+ __u64 pc;
+};
+
+/*
+ * for KVM_GET_FPU and KVM_SET_FPU
+ */
+struct kvm_fpu {
+};
+
+
+/*
+ * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various
+ * registers. The id field is broken down as follows:
+ *
+ * bits[63..52] - As per linux/kvm.h
+ * bits[51..32] - Must be zero.
+ * bits[31..16] - Register set.
+ *
+ * Register set = 0: GP registers from kvm_regs (see definitions below).
+ *
+ * Register set = 1: CP0 registers.
+ * bits[15..8] - Must be zero.
+ * bits[7..3] - Register 'rd' index.
+ * bits[2..0] - Register 'sel' index.
+ *
+ * Register set = 2: KVM specific registers (see definitions below).
+ *
+ * Register set = 3: FPU / MSA registers (see definitions below).
+ *
+ * Other sets registers may be added in the future. Each set would
+ * have its own identifier in bits[31..16].
+ */
+
+#define KVM_REG_MIPS_GP (KVM_REG_MIPS | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_CP0 (KVM_REG_MIPS | 0x0000000000010000ULL)
+#define KVM_REG_MIPS_KVM (KVM_REG_MIPS | 0x0000000000020000ULL)
+#define KVM_REG_MIPS_FPU (KVM_REG_MIPS | 0x0000000000030000ULL)
+
+
+/*
+ * KVM_REG_MIPS_GP - General purpose registers from kvm_regs.
+ */
+
+#define KVM_REG_MIPS_R0 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 0)
+#define KVM_REG_MIPS_R1 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 1)
+#define KVM_REG_MIPS_R2 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 2)
+#define KVM_REG_MIPS_R3 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 3)
+#define KVM_REG_MIPS_R4 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 4)
+#define KVM_REG_MIPS_R5 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 5)
+#define KVM_REG_MIPS_R6 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 6)
+#define KVM_REG_MIPS_R7 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 7)
+#define KVM_REG_MIPS_R8 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 8)
+#define KVM_REG_MIPS_R9 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 9)
+#define KVM_REG_MIPS_R10 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 10)
+#define KVM_REG_MIPS_R11 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 11)
+#define KVM_REG_MIPS_R12 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 12)
+#define KVM_REG_MIPS_R13 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 13)
+#define KVM_REG_MIPS_R14 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 14)
+#define KVM_REG_MIPS_R15 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 15)
+#define KVM_REG_MIPS_R16 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 16)
+#define KVM_REG_MIPS_R17 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 17)
+#define KVM_REG_MIPS_R18 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 18)
+#define KVM_REG_MIPS_R19 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 19)
+#define KVM_REG_MIPS_R20 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 20)
+#define KVM_REG_MIPS_R21 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 21)
+#define KVM_REG_MIPS_R22 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 22)
+#define KVM_REG_MIPS_R23 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 23)
+#define KVM_REG_MIPS_R24 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 24)
+#define KVM_REG_MIPS_R25 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 25)
+#define KVM_REG_MIPS_R26 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 26)
+#define KVM_REG_MIPS_R27 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 27)
+#define KVM_REG_MIPS_R28 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 28)
+#define KVM_REG_MIPS_R29 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 29)
+#define KVM_REG_MIPS_R30 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 30)
+#define KVM_REG_MIPS_R31 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 31)
+
+#define KVM_REG_MIPS_HI (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 32)
+#define KVM_REG_MIPS_LO (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 33)
+#define KVM_REG_MIPS_PC (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 34)
+
+
+/*
+ * KVM_REG_MIPS_KVM - KVM specific control registers.
+ */
+
+/*
+ * CP0_Count control
+ * DC: Set 0: Master disable CP0_Count and set COUNT_RESUME to now
+ * Set 1: Master re-enable CP0_Count with unchanged bias, handling timer
+ * interrupts since COUNT_RESUME
+ * This can be used to freeze the timer to get a consistent snapshot of
+ * the CP0_Count and timer interrupt pending state, while also resuming
+ * safely without losing time or guest timer interrupts.
+ * Other: Reserved, do not change.
+ */
+#define KVM_REG_MIPS_COUNT_CTL (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 0)
+#define KVM_REG_MIPS_COUNT_CTL_DC 0x00000001
+
+/*
+ * CP0_Count resume monotonic nanoseconds
+ * The monotonic nanosecond time of the last set of COUNT_CTL.DC (master
+ * disable). Any reads and writes of Count related registers while
+ * COUNT_CTL.DC=1 will appear to occur at this time. When COUNT_CTL.DC is
+ * cleared again (master enable) any timer interrupts since this time will be
+ * emulated.
+ * Modifications to times in the future are rejected.
+ */
+#define KVM_REG_MIPS_COUNT_RESUME (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 1)
+/*
+ * CP0_Count rate in Hz
+ * Specifies the rate of the CP0_Count timer in Hz. Modifications occur without
+ * discontinuities in CP0_Count.
+ */
+#define KVM_REG_MIPS_COUNT_HZ (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 2)
+
+
+/*
+ * KVM_REG_MIPS_FPU - Floating Point and MIPS SIMD Architecture (MSA) registers.
+ *
+ * bits[15..8] - Register subset (see definitions below).
+ * bits[7..5] - Must be zero.
+ * bits[4..0] - Register number within register subset.
+ */
+
+#define KVM_REG_MIPS_FPR (KVM_REG_MIPS_FPU | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_FCR (KVM_REG_MIPS_FPU | 0x0000000000000100ULL)
+#define KVM_REG_MIPS_MSACR (KVM_REG_MIPS_FPU | 0x0000000000000200ULL)
+
+/*
+ * KVM_REG_MIPS_FPR - Floating point / Vector registers.
+ */
+#define KVM_REG_MIPS_FPR_32(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U32 | (n))
+#define KVM_REG_MIPS_FPR_64(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U64 | (n))
+#define KVM_REG_MIPS_VEC_128(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U128 | (n))
+
+/*
+ * KVM_REG_MIPS_FCR - Floating point control registers.
+ */
+#define KVM_REG_MIPS_FCR_IR (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 0)
+#define KVM_REG_MIPS_FCR_CSR (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 31)
+
+/*
+ * KVM_REG_MIPS_MSACR - MIPS SIMD Architecture (MSA) control registers.
+ */
+#define KVM_REG_MIPS_MSA_IR (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 | 0)
+#define KVM_REG_MIPS_MSA_CSR (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 | 1)
+
+
+/*
+ * KVM MIPS specific structures and definitions
+ *
+ */
+struct kvm_debug_exit_arch {
+ __u64 epc;
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
+/* dummy definition */
+struct kvm_sregs {
+};
+
+struct kvm_mips_interrupt {
+ /* in */
+ __u32 cpu;
+ __u32 irq;
+};
+
+#endif /* __LINUX_KVM_MIPS_H */
diff --git a/tools/arch/mips/include/uapi/asm/mman.h b/tools/arch/mips/include/uapi/asm/mman.h
new file mode 100644
index 000000000..de2206883
--- /dev/null
+++ b/tools/arch/mips/include/uapi/asm/mman.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_MIPS_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_MIPS_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP 17
+#define MADV_DOFORK 11
+#define MADV_DONTDUMP 16
+#define MADV_DONTFORK 10
+#define MADV_DONTNEED 4
+#define MADV_FREE 8
+#define MADV_HUGEPAGE 14
+#define MADV_HWPOISON 100
+#define MADV_MERGEABLE 12
+#define MADV_NOHUGEPAGE 15
+#define MADV_NORMAL 0
+#define MADV_RANDOM 1
+#define MADV_REMOVE 9
+#define MADV_SEQUENTIAL 2
+#define MADV_UNMERGEABLE 13
+#define MADV_WILLNEED 3
+#define MAP_ANONYMOUS 0x0800
+#define MAP_DENYWRITE 0x2000
+#define MAP_EXECUTABLE 0x4000
+#define MAP_FILE 0
+#define MAP_FIXED 0x010
+#define MAP_GROWSDOWN 0x1000
+#define MAP_HUGETLB 0x80000
+#define MAP_LOCKED 0x8000
+#define MAP_NONBLOCK 0x20000
+#define MAP_NORESERVE 0x0400
+#define MAP_POPULATE 0x10000
+#define MAP_PRIVATE 0x002
+#define MAP_SHARED 0x001
+#define MAP_STACK 0x40000
+#define PROT_EXEC 0x04
+#define PROT_GROWSDOWN 0x01000000
+#define PROT_GROWSUP 0x02000000
+#define PROT_NONE 0x00
+#define PROT_READ 0x01
+#define PROT_SEM 0x10
+#define PROT_WRITE 0x02
+/* MADV_SOFT_OFFLINE is undefined on mips, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on mips, fix it for perf */
+#define MAP_32BIT 0
+/* MAP_UNINITIALIZED is undefined on mips, fix it for perf */
+#define MAP_UNINITIALIZED 0
+#endif
diff --git a/tools/arch/parisc/include/uapi/asm/bitsperlong.h b/tools/arch/parisc/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..23ac75629
--- /dev/null
+++ b/tools/arch/parisc/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_PARISC_BITSPERLONG_H
+#define __ASM_PARISC_BITSPERLONG_H
+
+#if defined(__LP64__)
+#define __BITS_PER_LONG 64
+#define SHIFT_PER_LONG 6
+#else
+#define __BITS_PER_LONG 32
+#define SHIFT_PER_LONG 5
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_PARISC_BITSPERLONG_H */
diff --git a/tools/arch/parisc/include/uapi/asm/errno.h b/tools/arch/parisc/include/uapi/asm/errno.h
new file mode 100644
index 000000000..87245c584
--- /dev/null
+++ b/tools/arch/parisc/include/uapi/asm/errno.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _PARISC_ERRNO_H
+#define _PARISC_ERRNO_H
+
+#include <asm-generic/errno-base.h>
+
+#define ENOMSG 35 /* No message of desired type */
+#define EIDRM 36 /* Identifier removed */
+#define ECHRNG 37 /* Channel number out of range */
+#define EL2NSYNC 38 /* Level 2 not synchronized */
+#define EL3HLT 39 /* Level 3 halted */
+#define EL3RST 40 /* Level 3 reset */
+#define ELNRNG 41 /* Link number out of range */
+#define EUNATCH 42 /* Protocol driver not attached */
+#define ENOCSI 43 /* No CSI structure available */
+#define EL2HLT 44 /* Level 2 halted */
+#define EDEADLK 45 /* Resource deadlock would occur */
+#define EDEADLOCK EDEADLK
+#define ENOLCK 46 /* No record locks available */
+#define EILSEQ 47 /* Illegal byte sequence */
+
+#define ENONET 50 /* Machine is not on the network */
+#define ENODATA 51 /* No data available */
+#define ETIME 52 /* Timer expired */
+#define ENOSR 53 /* Out of streams resources */
+#define ENOSTR 54 /* Device not a stream */
+#define ENOPKG 55 /* Package not installed */
+
+#define ENOLINK 57 /* Link has been severed */
+#define EADV 58 /* Advertise error */
+#define ESRMNT 59 /* Srmount error */
+#define ECOMM 60 /* Communication error on send */
+#define EPROTO 61 /* Protocol error */
+
+#define EMULTIHOP 64 /* Multihop attempted */
+
+#define EDOTDOT 66 /* RFS specific error */
+#define EBADMSG 67 /* Not a data message */
+#define EUSERS 68 /* Too many users */
+#define EDQUOT 69 /* Quota exceeded */
+#define ESTALE 70 /* Stale file handle */
+#define EREMOTE 71 /* Object is remote */
+#define EOVERFLOW 72 /* Value too large for defined data type */
+
+/* these errnos are defined by Linux but not HPUX. */
+
+#define EBADE 160 /* Invalid exchange */
+#define EBADR 161 /* Invalid request descriptor */
+#define EXFULL 162 /* Exchange full */
+#define ENOANO 163 /* No anode */
+#define EBADRQC 164 /* Invalid request code */
+#define EBADSLT 165 /* Invalid slot */
+#define EBFONT 166 /* Bad font file format */
+#define ENOTUNIQ 167 /* Name not unique on network */
+#define EBADFD 168 /* File descriptor in bad state */
+#define EREMCHG 169 /* Remote address changed */
+#define ELIBACC 170 /* Can not access a needed shared library */
+#define ELIBBAD 171 /* Accessing a corrupted shared library */
+#define ELIBSCN 172 /* .lib section in a.out corrupted */
+#define ELIBMAX 173 /* Attempting to link in too many shared libraries */
+#define ELIBEXEC 174 /* Cannot exec a shared library directly */
+#define ERESTART 175 /* Interrupted system call should be restarted */
+#define ESTRPIPE 176 /* Streams pipe error */
+#define EUCLEAN 177 /* Structure needs cleaning */
+#define ENOTNAM 178 /* Not a XENIX named type file */
+#define ENAVAIL 179 /* No XENIX semaphores available */
+#define EISNAM 180 /* Is a named type file */
+#define EREMOTEIO 181 /* Remote I/O error */
+#define ENOMEDIUM 182 /* No medium found */
+#define EMEDIUMTYPE 183 /* Wrong medium type */
+#define ENOKEY 184 /* Required key not available */
+#define EKEYEXPIRED 185 /* Key has expired */
+#define EKEYREVOKED 186 /* Key has been revoked */
+#define EKEYREJECTED 187 /* Key was rejected by service */
+
+/* We now return you to your regularly scheduled HPUX. */
+
+#define ENOSYM 215 /* symbol does not exist in executable */
+#define ENOTSOCK 216 /* Socket operation on non-socket */
+#define EDESTADDRREQ 217 /* Destination address required */
+#define EMSGSIZE 218 /* Message too long */
+#define EPROTOTYPE 219 /* Protocol wrong type for socket */
+#define ENOPROTOOPT 220 /* Protocol not available */
+#define EPROTONOSUPPORT 221 /* Protocol not supported */
+#define ESOCKTNOSUPPORT 222 /* Socket type not supported */
+#define EOPNOTSUPP 223 /* Operation not supported on transport endpoint */
+#define EPFNOSUPPORT 224 /* Protocol family not supported */
+#define EAFNOSUPPORT 225 /* Address family not supported by protocol */
+#define EADDRINUSE 226 /* Address already in use */
+#define EADDRNOTAVAIL 227 /* Cannot assign requested address */
+#define ENETDOWN 228 /* Network is down */
+#define ENETUNREACH 229 /* Network is unreachable */
+#define ENETRESET 230 /* Network dropped connection because of reset */
+#define ECONNABORTED 231 /* Software caused connection abort */
+#define ECONNRESET 232 /* Connection reset by peer */
+#define ENOBUFS 233 /* No buffer space available */
+#define EISCONN 234 /* Transport endpoint is already connected */
+#define ENOTCONN 235 /* Transport endpoint is not connected */
+#define ESHUTDOWN 236 /* Cannot send after transport endpoint shutdown */
+#define ETOOMANYREFS 237 /* Too many references: cannot splice */
+#define ETIMEDOUT 238 /* Connection timed out */
+#define ECONNREFUSED 239 /* Connection refused */
+#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */
+#define EREMOTERELEASE 240 /* Remote peer released connection */
+#define EHOSTDOWN 241 /* Host is down */
+#define EHOSTUNREACH 242 /* No route to host */
+
+#define EALREADY 244 /* Operation already in progress */
+#define EINPROGRESS 245 /* Operation now in progress */
+#define EWOULDBLOCK EAGAIN /* Operation would block (Not HPUX compliant) */
+#define ENOTEMPTY 247 /* Directory not empty */
+#define ENAMETOOLONG 248 /* File name too long */
+#define ELOOP 249 /* Too many symbolic links encountered */
+#define ENOSYS 251 /* Function not implemented */
+
+#define ECANCELLED 253 /* aio request was canceled before complete (POSIX.4 / HPUX) */
+#define ECANCELED ECANCELLED /* SuSv3 and Solaris wants one 'L' */
+
+/* for robust mutexes */
+#define EOWNERDEAD 254 /* Owner died */
+#define ENOTRECOVERABLE 255 /* State not recoverable */
+
+#define ERFKILL 256 /* Operation not possible due to RF-kill */
+
+#define EHWPOISON 257 /* Memory page has hardware error */
+
+#endif
diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h
new file mode 100644
index 000000000..1bd78758b
--- /dev/null
+++ b/tools/arch/parisc/include/uapi/asm/mman.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP 70
+#define MADV_DOFORK 11
+#define MADV_DONTDUMP 69
+#define MADV_DONTFORK 10
+#define MADV_DONTNEED 4
+#define MADV_FREE 8
+#define MADV_HUGEPAGE 67
+#define MADV_MERGEABLE 65
+#define MADV_NOHUGEPAGE 68
+#define MADV_NORMAL 0
+#define MADV_RANDOM 1
+#define MADV_REMOVE 9
+#define MADV_SEQUENTIAL 2
+#define MADV_UNMERGEABLE 66
+#define MADV_WILLNEED 3
+#define MAP_ANONYMOUS 0x10
+#define MAP_DENYWRITE 0x0800
+#define MAP_EXECUTABLE 0x1000
+#define MAP_FILE 0
+#define MAP_FIXED 0x04
+#define MAP_GROWSDOWN 0x8000
+#define MAP_HUGETLB 0x80000
+#define MAP_LOCKED 0x2000
+#define MAP_NONBLOCK 0x20000
+#define MAP_NORESERVE 0x4000
+#define MAP_POPULATE 0x10000
+#define MAP_PRIVATE 0x02
+#define MAP_SHARED 0x01
+#define MAP_STACK 0x40000
+#define PROT_EXEC 0x4
+#define PROT_GROWSDOWN 0x01000000
+#define PROT_GROWSUP 0x02000000
+#define PROT_NONE 0x0
+#define PROT_READ 0x1
+#define PROT_SEM 0x8
+#define PROT_WRITE 0x2
+#define MADV_HWPOISON 100
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on parisc, fix it for perf */
+#define MAP_32BIT 0
+/* MAP_UNINITIALIZED is undefined on parisc, fix it for perf */
+#define MAP_UNINITIALIZED 0
+#endif
diff --git a/tools/arch/powerpc/include/asm/barrier.h b/tools/arch/powerpc/include/asm/barrier.h
new file mode 100644
index 000000000..a634da05b
--- /dev/null
+++ b/tools/arch/powerpc/include/asm/barrier.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copied from the kernel sources:
+ *
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _TOOLS_LINUX_ASM_POWERPC_BARRIER_H
+#define _TOOLS_LINUX_ASM_POWERPC_BARRIER_H
+
+/*
+ * Memory barrier.
+ * The sync instruction guarantees that all memory accesses initiated
+ * by this processor have been performed (with respect to all other
+ * mechanisms that access memory). The eieio instruction is a barrier
+ * providing an ordering (separately) for (a) cacheable stores and (b)
+ * loads and stores to non-cacheable memory (e.g. I/O devices).
+ *
+ * mb() prevents loads and stores being reordered across this point.
+ * rmb() prevents loads being reordered across this point.
+ * wmb() prevents stores being reordered across this point.
+ *
+ * *mb() variants without smp_ prefix must order all types of memory
+ * operations with one another. sync is the only instruction sufficient
+ * to do this.
+ */
+#define mb() __asm__ __volatile__ ("sync" : : : "memory")
+#define rmb() __asm__ __volatile__ ("sync" : : : "memory")
+#define wmb() __asm__ __volatile__ ("sync" : : : "memory")
+
+#endif /* _TOOLS_LINUX_ASM_POWERPC_BARRIER_H */
diff --git a/tools/arch/powerpc/include/uapi/asm/bitsperlong.h b/tools/arch/powerpc/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..46ece3ecf
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_POWERPC_BITSPERLONG_H
+#define __ASM_POWERPC_BITSPERLONG_H
+
+#if defined(__powerpc64__)
+# define __BITS_PER_LONG 64
+#else
+# define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_POWERPC_BITSPERLONG_H */
diff --git a/tools/arch/powerpc/include/uapi/asm/errno.h b/tools/arch/powerpc/include/uapi/asm/errno.h
new file mode 100644
index 000000000..cc7985689
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/errno.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_POWERPC_ERRNO_H
+#define _ASM_POWERPC_ERRNO_H
+
+#include <asm-generic/errno.h>
+
+#undef EDEADLOCK
+#define EDEADLOCK 58 /* File locking deadlock error */
+
+#endif /* _ASM_POWERPC_ERRNO_H */
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
new file mode 100644
index 000000000..1b32b56a0
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -0,0 +1,677 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * Copyright IBM Corp. 2007
+ *
+ * Authors: Hollis Blanchard <hollisb@us.ibm.com>
+ */
+
+#ifndef __LINUX_KVM_POWERPC_H
+#define __LINUX_KVM_POWERPC_H
+
+#include <linux/types.h>
+
+/* Select powerpc specific features in <linux/kvm.h> */
+#define __KVM_HAVE_SPAPR_TCE
+#define __KVM_HAVE_PPC_SMT
+#define __KVM_HAVE_IRQCHIP
+#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_GUEST_DEBUG
+
+/* Not always available, but if it is, this is the correct offset. */
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+struct kvm_regs {
+ __u64 pc;
+ __u64 cr;
+ __u64 ctr;
+ __u64 lr;
+ __u64 xer;
+ __u64 msr;
+ __u64 srr0;
+ __u64 srr1;
+ __u64 pid;
+
+ __u64 sprg0;
+ __u64 sprg1;
+ __u64 sprg2;
+ __u64 sprg3;
+ __u64 sprg4;
+ __u64 sprg5;
+ __u64 sprg6;
+ __u64 sprg7;
+
+ __u64 gpr[32];
+};
+
+#define KVM_SREGS_E_IMPL_NONE 0
+#define KVM_SREGS_E_IMPL_FSL 1
+
+#define KVM_SREGS_E_FSL_PIDn (1 << 0) /* PID1/PID2 */
+
+/* flags for kvm_run.flags */
+#define KVM_RUN_PPC_NMI_DISP_MASK (3 << 0)
+#define KVM_RUN_PPC_NMI_DISP_FULLY_RECOV (1 << 0)
+#define KVM_RUN_PPC_NMI_DISP_LIMITED_RECOV (2 << 0)
+#define KVM_RUN_PPC_NMI_DISP_NOT_RECOV (3 << 0)
+
+/*
+ * Feature bits indicate which sections of the sregs struct are valid,
+ * both in KVM_GET_SREGS and KVM_SET_SREGS. On KVM_SET_SREGS, registers
+ * corresponding to unset feature bits will not be modified. This allows
+ * restoring a checkpoint made without that feature, while keeping the
+ * default values of the new registers.
+ *
+ * KVM_SREGS_E_BASE contains:
+ * CSRR0/1 (refers to SRR2/3 on 40x)
+ * ESR
+ * DEAR
+ * MCSR
+ * TSR
+ * TCR
+ * DEC
+ * TB
+ * VRSAVE (USPRG0)
+ */
+#define KVM_SREGS_E_BASE (1 << 0)
+
+/*
+ * KVM_SREGS_E_ARCH206 contains:
+ *
+ * PIR
+ * MCSRR0/1
+ * DECAR
+ * IVPR
+ */
+#define KVM_SREGS_E_ARCH206 (1 << 1)
+
+/*
+ * Contains EPCR, plus the upper half of 64-bit registers
+ * that are 32-bit on 32-bit implementations.
+ */
+#define KVM_SREGS_E_64 (1 << 2)
+
+#define KVM_SREGS_E_SPRG8 (1 << 3)
+#define KVM_SREGS_E_MCIVPR (1 << 4)
+
+/*
+ * IVORs are used -- contains IVOR0-15, plus additional IVORs
+ * in combination with an appropriate feature bit.
+ */
+#define KVM_SREGS_E_IVOR (1 << 5)
+
+/*
+ * Contains MAS0-4, MAS6-7, TLBnCFG, MMUCFG.
+ * Also TLBnPS if MMUCFG[MAVN] = 1.
+ */
+#define KVM_SREGS_E_ARCH206_MMU (1 << 6)
+
+/* DBSR, DBCR, IAC, DAC, DVC */
+#define KVM_SREGS_E_DEBUG (1 << 7)
+
+/* Enhanced debug -- DSRR0/1, SPRG9 */
+#define KVM_SREGS_E_ED (1 << 8)
+
+/* Embedded Floating Point (SPE) -- IVOR32-34 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_SPE (1 << 9)
+
+/*
+ * DEPRECATED! USE ONE_REG FOR THIS ONE!
+ * External Proxy (EXP) -- EPR
+ */
+#define KVM_SREGS_EXP (1 << 10)
+
+/* External PID (E.PD) -- EPSC/EPLC */
+#define KVM_SREGS_E_PD (1 << 11)
+
+/* Processor Control (E.PC) -- IVOR36-37 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PC (1 << 12)
+
+/* Page table (E.PT) -- EPTCFG */
+#define KVM_SREGS_E_PT (1 << 13)
+
+/* Embedded Performance Monitor (E.PM) -- IVOR35 if KVM_SREGS_E_IVOR */
+#define KVM_SREGS_E_PM (1 << 14)
+
+/*
+ * Special updates:
+ *
+ * Some registers may change even while a vcpu is not running.
+ * To avoid losing these changes, by default these registers are
+ * not updated by KVM_SET_SREGS. To force an update, set the bit
+ * in u.e.update_special corresponding to the register to be updated.
+ *
+ * The update_special field is zero on return from KVM_GET_SREGS.
+ *
+ * When restoring a checkpoint, the caller can set update_special
+ * to 0xffffffff to ensure that everything is restored, even new features
+ * that the caller doesn't know about.
+ */
+#define KVM_SREGS_E_UPDATE_MCSR (1 << 0)
+#define KVM_SREGS_E_UPDATE_TSR (1 << 1)
+#define KVM_SREGS_E_UPDATE_DEC (1 << 2)
+#define KVM_SREGS_E_UPDATE_DBSR (1 << 3)
+
+/*
+ * In KVM_SET_SREGS, reserved/pad fields must be left untouched from a
+ * previous KVM_GET_REGS.
+ *
+ * Unless otherwise indicated, setting any register with KVM_SET_SREGS
+ * directly sets its value. It does not trigger any special semantics such
+ * as write-one-to-clear. Calling KVM_SET_SREGS on an unmodified struct
+ * just received from KVM_GET_SREGS is always a no-op.
+ */
+struct kvm_sregs {
+ __u32 pvr;
+ union {
+ struct {
+ __u64 sdr1;
+ struct {
+ struct {
+ __u64 slbe;
+ __u64 slbv;
+ } slb[64];
+ } ppc64;
+ struct {
+ __u32 sr[16];
+ __u64 ibat[8];
+ __u64 dbat[8];
+ } ppc32;
+ } s;
+ struct {
+ union {
+ struct { /* KVM_SREGS_E_IMPL_FSL */
+ __u32 features; /* KVM_SREGS_E_FSL_ */
+ __u32 svr;
+ __u64 mcar;
+ __u32 hid0;
+
+ /* KVM_SREGS_E_FSL_PIDn */
+ __u32 pid1, pid2;
+ } fsl;
+ __u8 pad[256];
+ } impl;
+
+ __u32 features; /* KVM_SREGS_E_ */
+ __u32 impl_id; /* KVM_SREGS_E_IMPL_ */
+ __u32 update_special; /* KVM_SREGS_E_UPDATE_ */
+ __u32 pir; /* read-only */
+ __u64 sprg8;
+ __u64 sprg9; /* E.ED */
+ __u64 csrr0;
+ __u64 dsrr0; /* E.ED */
+ __u64 mcsrr0;
+ __u32 csrr1;
+ __u32 dsrr1; /* E.ED */
+ __u32 mcsrr1;
+ __u32 esr;
+ __u64 dear;
+ __u64 ivpr;
+ __u64 mcivpr;
+ __u64 mcsr; /* KVM_SREGS_E_UPDATE_MCSR */
+
+ __u32 tsr; /* KVM_SREGS_E_UPDATE_TSR */
+ __u32 tcr;
+ __u32 decar;
+ __u32 dec; /* KVM_SREGS_E_UPDATE_DEC */
+
+ /*
+ * Userspace can read TB directly, but the
+ * value reported here is consistent with "dec".
+ *
+ * Read-only.
+ */
+ __u64 tb;
+
+ __u32 dbsr; /* KVM_SREGS_E_UPDATE_DBSR */
+ __u32 dbcr[3];
+ /*
+ * iac/dac registers are 64bit wide, while this API
+ * interface provides only lower 32 bits on 64 bit
+ * processors. ONE_REG interface is added for 64bit
+ * iac/dac registers.
+ */
+ __u32 iac[4];
+ __u32 dac[2];
+ __u32 dvc[2];
+ __u8 num_iac; /* read-only */
+ __u8 num_dac; /* read-only */
+ __u8 num_dvc; /* read-only */
+ __u8 pad;
+
+ __u32 epr; /* EXP */
+ __u32 vrsave; /* a.k.a. USPRG0 */
+ __u32 epcr; /* KVM_SREGS_E_64 */
+
+ __u32 mas0;
+ __u32 mas1;
+ __u64 mas2;
+ __u64 mas7_3;
+ __u32 mas4;
+ __u32 mas6;
+
+ __u32 ivor_low[16]; /* IVOR0-15 */
+ __u32 ivor_high[18]; /* IVOR32+, plus room to expand */
+
+ __u32 mmucfg; /* read-only */
+ __u32 eptcfg; /* E.PT, read-only */
+ __u32 tlbcfg[4];/* read-only */
+ __u32 tlbps[4]; /* read-only */
+
+ __u32 eplc, epsc; /* E.PD */
+ } e;
+ __u8 pad[1020];
+ } u;
+};
+
+struct kvm_fpu {
+ __u64 fpr[32];
+};
+
+/*
+ * Defines for h/w breakpoint, watchpoint (read, write or both) and
+ * software breakpoint.
+ * These are used as "type" in KVM_SET_GUEST_DEBUG ioctl and "status"
+ * for KVM_DEBUG_EXIT.
+ */
+#define KVMPPC_DEBUG_NONE 0x0
+#define KVMPPC_DEBUG_BREAKPOINT (1UL << 1)
+#define KVMPPC_DEBUG_WATCH_WRITE (1UL << 2)
+#define KVMPPC_DEBUG_WATCH_READ (1UL << 3)
+struct kvm_debug_exit_arch {
+ __u64 address;
+ /*
+ * exiting to userspace because of h/w breakpoint, watchpoint
+ * (read, write or both) and software breakpoint.
+ */
+ __u32 status;
+ __u32 reserved;
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+ struct {
+ /* H/W breakpoint/watchpoint address */
+ __u64 addr;
+ /*
+ * Type denotes h/w breakpoint, read watchpoint, write
+ * watchpoint or watchpoint (both read and write).
+ */
+ __u32 type;
+ __u32 reserved;
+ } bp[16];
+};
+
+/* Debug related defines */
+/*
+ * kvm_guest_debug->control is a 32 bit field. The lower 16 bits are generic
+ * and upper 16 bits are architecture specific. Architecture specific defines
+ * that ioctl is for setting hardware breakpoint or software breakpoint.
+ */
+#define KVM_GUESTDBG_USE_SW_BP 0x00010000
+#define KVM_GUESTDBG_USE_HW_BP 0x00020000
+
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
+#define KVM_INTERRUPT_SET -1U
+#define KVM_INTERRUPT_UNSET -2U
+#define KVM_INTERRUPT_SET_LEVEL -3U
+
+#define KVM_CPU_440 1
+#define KVM_CPU_E500V2 2
+#define KVM_CPU_3S_32 3
+#define KVM_CPU_3S_64 4
+#define KVM_CPU_E500MC 5
+
+/* for KVM_CAP_SPAPR_TCE */
+struct kvm_create_spapr_tce {
+ __u64 liobn;
+ __u32 window_size;
+};
+
+/* for KVM_CAP_SPAPR_TCE_64 */
+struct kvm_create_spapr_tce_64 {
+ __u64 liobn;
+ __u32 page_shift;
+ __u32 flags;
+ __u64 offset; /* in pages */
+ __u64 size; /* in pages */
+};
+
+/* for KVM_ALLOCATE_RMA */
+struct kvm_allocate_rma {
+ __u64 rma_size;
+};
+
+/* for KVM_CAP_PPC_RTAS */
+struct kvm_rtas_token_args {
+ char name[120];
+ __u64 token; /* Use a token of 0 to undefine a mapping */
+};
+
+struct kvm_book3e_206_tlb_entry {
+ __u32 mas8;
+ __u32 mas1;
+ __u64 mas2;
+ __u64 mas7_3;
+};
+
+struct kvm_book3e_206_tlb_params {
+ /*
+ * For mmu types KVM_MMU_FSL_BOOKE_NOHV and KVM_MMU_FSL_BOOKE_HV:
+ *
+ * - The number of ways of TLB0 must be a power of two between 2 and
+ * 16.
+ * - TLB1 must be fully associative.
+ * - The size of TLB0 must be a multiple of the number of ways, and
+ * the number of sets must be a power of two.
+ * - The size of TLB1 may not exceed 64 entries.
+ * - TLB0 supports 4 KiB pages.
+ * - The page sizes supported by TLB1 are as indicated by
+ * TLB1CFG (if MMUCFG[MAVN] = 0) or TLB1PS (if MMUCFG[MAVN] = 1)
+ * as returned by KVM_GET_SREGS.
+ * - TLB2 and TLB3 are reserved, and their entries in tlb_sizes[]
+ * and tlb_ways[] must be zero.
+ *
+ * tlb_ways[n] = tlb_sizes[n] means the array is fully associative.
+ *
+ * KVM will adjust TLBnCFG based on the sizes configured here,
+ * though arrays greater than 2048 entries will have TLBnCFG[NENTRY]
+ * set to zero.
+ */
+ __u32 tlb_sizes[4];
+ __u32 tlb_ways[4];
+ __u32 reserved[8];
+};
+
+/* For KVM_PPC_GET_HTAB_FD */
+struct kvm_get_htab_fd {
+ __u64 flags;
+ __u64 start_index;
+ __u64 reserved[2];
+};
+
+/* Values for kvm_get_htab_fd.flags */
+#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
+#define KVM_GET_HTAB_WRITE ((__u64)0x2)
+
+/*
+ * Data read on the file descriptor is formatted as a series of
+ * records, each consisting of a header followed by a series of
+ * `n_valid' HPTEs (16 bytes each), which are all valid. Following
+ * those valid HPTEs there are `n_invalid' invalid HPTEs, which
+ * are not represented explicitly in the stream. The same format
+ * is used for writing.
+ */
+struct kvm_get_htab_header {
+ __u32 index;
+ __u16 n_valid;
+ __u16 n_invalid;
+};
+
+/* For KVM_PPC_CONFIGURE_V3_MMU */
+struct kvm_ppc_mmuv3_cfg {
+ __u64 flags;
+ __u64 process_table; /* second doubleword of partition table entry */
+};
+
+/* Flag values for KVM_PPC_CONFIGURE_V3_MMU */
+#define KVM_PPC_MMUV3_RADIX 1 /* 1 = radix mode, 0 = HPT */
+#define KVM_PPC_MMUV3_GTSE 2 /* global translation shootdown enb. */
+
+/* For KVM_PPC_GET_RMMU_INFO */
+struct kvm_ppc_rmmu_info {
+ struct kvm_ppc_radix_geom {
+ __u8 page_shift;
+ __u8 level_bits[4];
+ __u8 pad[3];
+ } geometries[8];
+ __u32 ap_encodings[8];
+};
+
+/* For KVM_PPC_GET_CPU_CHAR */
+struct kvm_ppc_cpu_char {
+ __u64 character; /* characteristics of the CPU */
+ __u64 behaviour; /* recommended software behaviour */
+ __u64 character_mask; /* valid bits in character */
+ __u64 behaviour_mask; /* valid bits in behaviour */
+};
+
+/*
+ * Values for character and character_mask.
+ * These are identical to the values used by H_GET_CPU_CHARACTERISTICS.
+ */
+#define KVM_PPC_CPU_CHAR_SPEC_BAR_ORI31 (1ULL << 63)
+#define KVM_PPC_CPU_CHAR_BCCTRL_SERIALISED (1ULL << 62)
+#define KVM_PPC_CPU_CHAR_L1D_FLUSH_ORI30 (1ULL << 61)
+#define KVM_PPC_CPU_CHAR_L1D_FLUSH_TRIG2 (1ULL << 60)
+#define KVM_PPC_CPU_CHAR_L1D_THREAD_PRIV (1ULL << 59)
+#define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58)
+#define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57)
+#define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56)
+
+#define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63)
+#define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62)
+#define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61)
+
+/* Per-vcpu XICS interrupt controller state */
+#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c)
+
+#define KVM_REG_PPC_ICP_CPPR_SHIFT 56 /* current proc priority */
+#define KVM_REG_PPC_ICP_CPPR_MASK 0xff
+#define KVM_REG_PPC_ICP_XISR_SHIFT 32 /* interrupt status field */
+#define KVM_REG_PPC_ICP_XISR_MASK 0xffffff
+#define KVM_REG_PPC_ICP_MFRR_SHIFT 24 /* pending IPI priority */
+#define KVM_REG_PPC_ICP_MFRR_MASK 0xff
+#define KVM_REG_PPC_ICP_PPRI_SHIFT 16 /* pending irq priority */
+#define KVM_REG_PPC_ICP_PPRI_MASK 0xff
+
+/* Device control API: PPC-specific devices */
+#define KVM_DEV_MPIC_GRP_MISC 1
+#define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */
+
+#define KVM_DEV_MPIC_GRP_REGISTER 2 /* 32-bit */
+#define KVM_DEV_MPIC_GRP_IRQ_ACTIVE 3 /* 32-bit */
+
+/* One-Reg API: PPC-specific registers */
+#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
+#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_PPC_IAC3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_PPC_IAC4 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_PPC_DAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_PPC_DAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_PPC_DABR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_PPC_DSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9)
+#define KVM_REG_PPC_PURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa)
+#define KVM_REG_PPC_SPURR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb)
+#define KVM_REG_PPC_DAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc)
+#define KVM_REG_PPC_DSISR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xd)
+#define KVM_REG_PPC_AMR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xe)
+#define KVM_REG_PPC_UAMOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xf)
+
+#define KVM_REG_PPC_MMCR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x10)
+#define KVM_REG_PPC_MMCR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x11)
+#define KVM_REG_PPC_MMCRA (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x12)
+#define KVM_REG_PPC_MMCR2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x13)
+#define KVM_REG_PPC_MMCRS (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x14)
+#define KVM_REG_PPC_SIAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x15)
+#define KVM_REG_PPC_SDAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x16)
+#define KVM_REG_PPC_SIER (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x17)
+
+#define KVM_REG_PPC_PMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x18)
+#define KVM_REG_PPC_PMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x19)
+#define KVM_REG_PPC_PMC3 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1a)
+#define KVM_REG_PPC_PMC4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1b)
+#define KVM_REG_PPC_PMC5 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1c)
+#define KVM_REG_PPC_PMC6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1d)
+#define KVM_REG_PPC_PMC7 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1e)
+#define KVM_REG_PPC_PMC8 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x1f)
+
+/* 32 floating-point registers */
+#define KVM_REG_PPC_FPR0 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x20)
+#define KVM_REG_PPC_FPR(n) (KVM_REG_PPC_FPR0 + (n))
+#define KVM_REG_PPC_FPR31 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3f)
+
+/* 32 VMX/Altivec vector registers */
+#define KVM_REG_PPC_VR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x40)
+#define KVM_REG_PPC_VR(n) (KVM_REG_PPC_VR0 + (n))
+#define KVM_REG_PPC_VR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x5f)
+
+/* 32 double-width FP registers for VSX */
+/* High-order halves overlap with FP regs */
+#define KVM_REG_PPC_VSR0 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x60)
+#define KVM_REG_PPC_VSR(n) (KVM_REG_PPC_VSR0 + (n))
+#define KVM_REG_PPC_VSR31 (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x7f)
+
+/* FP and vector status/control registers */
+#define KVM_REG_PPC_FPSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x80)
+/*
+ * VSCR register is documented as a 32-bit register in the ISA, but it can
+ * only be accesses via a vector register. Expose VSCR as a 32-bit register
+ * even though the kernel represents it as a 128-bit vector.
+ */
+#define KVM_REG_PPC_VSCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x81)
+
+/* Virtual processor areas */
+/* For SLB & DTL, address in high (first) half, length in low half */
+#define KVM_REG_PPC_VPA_ADDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x82)
+#define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83)
+#define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84)
+
+#define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
+#define KVM_REG_PPC_EPR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x86)
+
+/* Timer Status Register OR/CLEAR interface */
+#define KVM_REG_PPC_OR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x87)
+#define KVM_REG_PPC_CLEAR_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x88)
+#define KVM_REG_PPC_TCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x89)
+#define KVM_REG_PPC_TSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8a)
+
+/* Debugging: Special instruction for software breakpoint */
+#define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b)
+
+/* MMU registers */
+#define KVM_REG_PPC_MAS0 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8c)
+#define KVM_REG_PPC_MAS1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8d)
+#define KVM_REG_PPC_MAS2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8e)
+#define KVM_REG_PPC_MAS7_3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8f)
+#define KVM_REG_PPC_MAS4 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x90)
+#define KVM_REG_PPC_MAS6 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x91)
+#define KVM_REG_PPC_MMUCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x92)
+/*
+ * TLBnCFG fields TLBnCFG_N_ENTRY and TLBnCFG_ASSOC can be changed only using
+ * KVM_CAP_SW_TLB ioctl
+ */
+#define KVM_REG_PPC_TLB0CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x93)
+#define KVM_REG_PPC_TLB1CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x94)
+#define KVM_REG_PPC_TLB2CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x95)
+#define KVM_REG_PPC_TLB3CFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x96)
+#define KVM_REG_PPC_TLB0PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x97)
+#define KVM_REG_PPC_TLB1PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x98)
+#define KVM_REG_PPC_TLB2PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x99)
+#define KVM_REG_PPC_TLB3PS (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9a)
+#define KVM_REG_PPC_EPTCFG (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9b)
+
+/* Timebase offset */
+#define KVM_REG_PPC_TB_OFFSET (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9c)
+
+/* POWER8 registers */
+#define KVM_REG_PPC_SPMC1 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9d)
+#define KVM_REG_PPC_SPMC2 (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x9e)
+#define KVM_REG_PPC_IAMR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x9f)
+#define KVM_REG_PPC_TFHAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa0)
+#define KVM_REG_PPC_TFIAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa1)
+#define KVM_REG_PPC_TEXASR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa2)
+#define KVM_REG_PPC_FSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa3)
+#define KVM_REG_PPC_PSPB (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xa4)
+#define KVM_REG_PPC_EBBHR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa5)
+#define KVM_REG_PPC_EBBRR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa6)
+#define KVM_REG_PPC_BESCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa7)
+#define KVM_REG_PPC_TAR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa8)
+#define KVM_REG_PPC_DPDES (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xa9)
+#define KVM_REG_PPC_DAWR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaa)
+#define KVM_REG_PPC_DAWRX (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xab)
+#define KVM_REG_PPC_CIABR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xac)
+#define KVM_REG_PPC_IC (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xad)
+#define KVM_REG_PPC_VTB (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xae)
+#define KVM_REG_PPC_CSIGR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xaf)
+#define KVM_REG_PPC_TACR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb0)
+#define KVM_REG_PPC_TCSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb1)
+#define KVM_REG_PPC_PID (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb2)
+#define KVM_REG_PPC_ACOP (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb3)
+
+#define KVM_REG_PPC_VRSAVE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb4)
+#define KVM_REG_PPC_LPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb5)
+#define KVM_REG_PPC_LPCR_64 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb5)
+#define KVM_REG_PPC_PPR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb6)
+
+/* Architecture compatibility level */
+#define KVM_REG_PPC_ARCH_COMPAT (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb7)
+
+#define KVM_REG_PPC_DABRX (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xb8)
+#define KVM_REG_PPC_WORT (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xb9)
+#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
+#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
+
+/* POWER9 registers */
+#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
+#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+
+#define KVM_REG_PPC_DEC_EXPIRY (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbe)
+#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
+
+/* Transactional Memory checkpointed state:
+ * This is all GPRs, all VSX regs and a subset of SPRs
+ */
+#define KVM_REG_PPC_TM (KVM_REG_PPC | 0x80000000)
+/* TM GPRs */
+#define KVM_REG_PPC_TM_GPR0 (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0)
+#define KVM_REG_PPC_TM_GPR(n) (KVM_REG_PPC_TM_GPR0 + (n))
+#define KVM_REG_PPC_TM_GPR31 (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x1f)
+/* TM VSX */
+#define KVM_REG_PPC_TM_VSR0 (KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x20)
+#define KVM_REG_PPC_TM_VSR(n) (KVM_REG_PPC_TM_VSR0 + (n))
+#define KVM_REG_PPC_TM_VSR63 (KVM_REG_PPC_TM | KVM_REG_SIZE_U128 | 0x5f)
+/* TM SPRS */
+#define KVM_REG_PPC_TM_CR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x60)
+#define KVM_REG_PPC_TM_LR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x61)
+#define KVM_REG_PPC_TM_CTR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x62)
+#define KVM_REG_PPC_TM_FPSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x63)
+#define KVM_REG_PPC_TM_AMR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x64)
+#define KVM_REG_PPC_TM_PPR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x65)
+#define KVM_REG_PPC_TM_VRSAVE (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x66)
+#define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
+#define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
+#define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
+#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a)
+
+/* PPC64 eXternal Interrupt Controller Specification */
+#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
+
+/* Layout of 64-bit source attribute values */
+#define KVM_XICS_DESTINATION_SHIFT 0
+#define KVM_XICS_DESTINATION_MASK 0xffffffffULL
+#define KVM_XICS_PRIORITY_SHIFT 32
+#define KVM_XICS_PRIORITY_MASK 0xff
+#define KVM_XICS_LEVEL_SENSITIVE (1ULL << 40)
+#define KVM_XICS_MASKED (1ULL << 41)
+#define KVM_XICS_PENDING (1ULL << 42)
+#define KVM_XICS_PRESENTED (1ULL << 43)
+#define KVM_XICS_QUEUED (1ULL << 44)
+
+#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/tools/arch/powerpc/include/uapi/asm/mman.h b/tools/arch/powerpc/include/uapi/asm/mman.h
new file mode 100644
index 000000000..f33105bc5
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/mman.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_POWERPC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_POWERPC_UAPI_ASM_MMAN_FIX_H
+#define MAP_DENYWRITE 0x0800
+#define MAP_EXECUTABLE 0x1000
+#define MAP_GROWSDOWN 0x0100
+#define MAP_HUGETLB 0x40000
+#define MAP_LOCKED 0x80
+#define MAP_NONBLOCK 0x10000
+#define MAP_NORESERVE 0x40
+#define MAP_POPULATE 0x8000
+#define MAP_STACK 0x20000
+#include <uapi/asm-generic/mman-common.h>
+/* MAP_32BIT is undefined on powerpc, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000..9e52c86cc
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_POWERPC_PERF_REGS_H
+#define _UAPI_ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+ PERF_REG_POWERPC_R0,
+ PERF_REG_POWERPC_R1,
+ PERF_REG_POWERPC_R2,
+ PERF_REG_POWERPC_R3,
+ PERF_REG_POWERPC_R4,
+ PERF_REG_POWERPC_R5,
+ PERF_REG_POWERPC_R6,
+ PERF_REG_POWERPC_R7,
+ PERF_REG_POWERPC_R8,
+ PERF_REG_POWERPC_R9,
+ PERF_REG_POWERPC_R10,
+ PERF_REG_POWERPC_R11,
+ PERF_REG_POWERPC_R12,
+ PERF_REG_POWERPC_R13,
+ PERF_REG_POWERPC_R14,
+ PERF_REG_POWERPC_R15,
+ PERF_REG_POWERPC_R16,
+ PERF_REG_POWERPC_R17,
+ PERF_REG_POWERPC_R18,
+ PERF_REG_POWERPC_R19,
+ PERF_REG_POWERPC_R20,
+ PERF_REG_POWERPC_R21,
+ PERF_REG_POWERPC_R22,
+ PERF_REG_POWERPC_R23,
+ PERF_REG_POWERPC_R24,
+ PERF_REG_POWERPC_R25,
+ PERF_REG_POWERPC_R26,
+ PERF_REG_POWERPC_R27,
+ PERF_REG_POWERPC_R28,
+ PERF_REG_POWERPC_R29,
+ PERF_REG_POWERPC_R30,
+ PERF_REG_POWERPC_R31,
+ PERF_REG_POWERPC_NIP,
+ PERF_REG_POWERPC_MSR,
+ PERF_REG_POWERPC_ORIG_R3,
+ PERF_REG_POWERPC_CTR,
+ PERF_REG_POWERPC_LINK,
+ PERF_REG_POWERPC_XER,
+ PERF_REG_POWERPC_CCR,
+ PERF_REG_POWERPC_SOFTE,
+ PERF_REG_POWERPC_TRAP,
+ PERF_REG_POWERPC_DAR,
+ PERF_REG_POWERPC_DSISR,
+ PERF_REG_POWERPC_MAX,
+};
+#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/tools/arch/powerpc/include/uapi/asm/unistd.h b/tools/arch/powerpc/include/uapi/asm/unistd.h
new file mode 100644
index 000000000..985534d0b
--- /dev/null
+++ b/tools/arch/powerpc/include/uapi/asm/unistd.h
@@ -0,0 +1,404 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * This file contains the system call numbers.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_ASM_POWERPC_UNISTD_H_
+#define _UAPI_ASM_POWERPC_UNISTD_H_
+
+
+#define __NR_restart_syscall 0
+#define __NR_exit 1
+#define __NR_fork 2
+#define __NR_read 3
+#define __NR_write 4
+#define __NR_open 5
+#define __NR_close 6
+#define __NR_waitpid 7
+#define __NR_creat 8
+#define __NR_link 9
+#define __NR_unlink 10
+#define __NR_execve 11
+#define __NR_chdir 12
+#define __NR_time 13
+#define __NR_mknod 14
+#define __NR_chmod 15
+#define __NR_lchown 16
+#define __NR_break 17
+#define __NR_oldstat 18
+#define __NR_lseek 19
+#define __NR_getpid 20
+#define __NR_mount 21
+#define __NR_umount 22
+#define __NR_setuid 23
+#define __NR_getuid 24
+#define __NR_stime 25
+#define __NR_ptrace 26
+#define __NR_alarm 27
+#define __NR_oldfstat 28
+#define __NR_pause 29
+#define __NR_utime 30
+#define __NR_stty 31
+#define __NR_gtty 32
+#define __NR_access 33
+#define __NR_nice 34
+#define __NR_ftime 35
+#define __NR_sync 36
+#define __NR_kill 37
+#define __NR_rename 38
+#define __NR_mkdir 39
+#define __NR_rmdir 40
+#define __NR_dup 41
+#define __NR_pipe 42
+#define __NR_times 43
+#define __NR_prof 44
+#define __NR_brk 45
+#define __NR_setgid 46
+#define __NR_getgid 47
+#define __NR_signal 48
+#define __NR_geteuid 49
+#define __NR_getegid 50
+#define __NR_acct 51
+#define __NR_umount2 52
+#define __NR_lock 53
+#define __NR_ioctl 54
+#define __NR_fcntl 55
+#define __NR_mpx 56
+#define __NR_setpgid 57
+#define __NR_ulimit 58
+#define __NR_oldolduname 59
+#define __NR_umask 60
+#define __NR_chroot 61
+#define __NR_ustat 62
+#define __NR_dup2 63
+#define __NR_getppid 64
+#define __NR_getpgrp 65
+#define __NR_setsid 66
+#define __NR_sigaction 67
+#define __NR_sgetmask 68
+#define __NR_ssetmask 69
+#define __NR_setreuid 70
+#define __NR_setregid 71
+#define __NR_sigsuspend 72
+#define __NR_sigpending 73
+#define __NR_sethostname 74
+#define __NR_setrlimit 75
+#define __NR_getrlimit 76
+#define __NR_getrusage 77
+#define __NR_gettimeofday 78
+#define __NR_settimeofday 79
+#define __NR_getgroups 80
+#define __NR_setgroups 81
+#define __NR_select 82
+#define __NR_symlink 83
+#define __NR_oldlstat 84
+#define __NR_readlink 85
+#define __NR_uselib 86
+#define __NR_swapon 87
+#define __NR_reboot 88
+#define __NR_readdir 89
+#define __NR_mmap 90
+#define __NR_munmap 91
+#define __NR_truncate 92
+#define __NR_ftruncate 93
+#define __NR_fchmod 94
+#define __NR_fchown 95
+#define __NR_getpriority 96
+#define __NR_setpriority 97
+#define __NR_profil 98
+#define __NR_statfs 99
+#define __NR_fstatfs 100
+#define __NR_ioperm 101
+#define __NR_socketcall 102
+#define __NR_syslog 103
+#define __NR_setitimer 104
+#define __NR_getitimer 105
+#define __NR_stat 106
+#define __NR_lstat 107
+#define __NR_fstat 108
+#define __NR_olduname 109
+#define __NR_iopl 110
+#define __NR_vhangup 111
+#define __NR_idle 112
+#define __NR_vm86 113
+#define __NR_wait4 114
+#define __NR_swapoff 115
+#define __NR_sysinfo 116
+#define __NR_ipc 117
+#define __NR_fsync 118
+#define __NR_sigreturn 119
+#define __NR_clone 120
+#define __NR_setdomainname 121
+#define __NR_uname 122
+#define __NR_modify_ldt 123
+#define __NR_adjtimex 124
+#define __NR_mprotect 125
+#define __NR_sigprocmask 126
+#define __NR_create_module 127
+#define __NR_init_module 128
+#define __NR_delete_module 129
+#define __NR_get_kernel_syms 130
+#define __NR_quotactl 131
+#define __NR_getpgid 132
+#define __NR_fchdir 133
+#define __NR_bdflush 134
+#define __NR_sysfs 135
+#define __NR_personality 136
+#define __NR_afs_syscall 137 /* Syscall for Andrew File System */
+#define __NR_setfsuid 138
+#define __NR_setfsgid 139
+#define __NR__llseek 140
+#define __NR_getdents 141
+#define __NR__newselect 142
+#define __NR_flock 143
+#define __NR_msync 144
+#define __NR_readv 145
+#define __NR_writev 146
+#define __NR_getsid 147
+#define __NR_fdatasync 148
+#define __NR__sysctl 149
+#define __NR_mlock 150
+#define __NR_munlock 151
+#define __NR_mlockall 152
+#define __NR_munlockall 153
+#define __NR_sched_setparam 154
+#define __NR_sched_getparam 155
+#define __NR_sched_setscheduler 156
+#define __NR_sched_getscheduler 157
+#define __NR_sched_yield 158
+#define __NR_sched_get_priority_max 159
+#define __NR_sched_get_priority_min 160
+#define __NR_sched_rr_get_interval 161
+#define __NR_nanosleep 162
+#define __NR_mremap 163
+#define __NR_setresuid 164
+#define __NR_getresuid 165
+#define __NR_query_module 166
+#define __NR_poll 167
+#define __NR_nfsservctl 168
+#define __NR_setresgid 169
+#define __NR_getresgid 170
+#define __NR_prctl 171
+#define __NR_rt_sigreturn 172
+#define __NR_rt_sigaction 173
+#define __NR_rt_sigprocmask 174
+#define __NR_rt_sigpending 175
+#define __NR_rt_sigtimedwait 176
+#define __NR_rt_sigqueueinfo 177
+#define __NR_rt_sigsuspend 178
+#define __NR_pread64 179
+#define __NR_pwrite64 180
+#define __NR_chown 181
+#define __NR_getcwd 182
+#define __NR_capget 183
+#define __NR_capset 184
+#define __NR_sigaltstack 185
+#define __NR_sendfile 186
+#define __NR_getpmsg 187 /* some people actually want streams */
+#define __NR_putpmsg 188 /* some people actually want streams */
+#define __NR_vfork 189
+#define __NR_ugetrlimit 190 /* SuS compliant getrlimit */
+#define __NR_readahead 191
+#ifndef __powerpc64__ /* these are 32-bit only */
+#define __NR_mmap2 192
+#define __NR_truncate64 193
+#define __NR_ftruncate64 194
+#define __NR_stat64 195
+#define __NR_lstat64 196
+#define __NR_fstat64 197
+#endif
+#define __NR_pciconfig_read 198
+#define __NR_pciconfig_write 199
+#define __NR_pciconfig_iobase 200
+#define __NR_multiplexer 201
+#define __NR_getdents64 202
+#define __NR_pivot_root 203
+#ifndef __powerpc64__
+#define __NR_fcntl64 204
+#endif
+#define __NR_madvise 205
+#define __NR_mincore 206
+#define __NR_gettid 207
+#define __NR_tkill 208
+#define __NR_setxattr 209
+#define __NR_lsetxattr 210
+#define __NR_fsetxattr 211
+#define __NR_getxattr 212
+#define __NR_lgetxattr 213
+#define __NR_fgetxattr 214
+#define __NR_listxattr 215
+#define __NR_llistxattr 216
+#define __NR_flistxattr 217
+#define __NR_removexattr 218
+#define __NR_lremovexattr 219
+#define __NR_fremovexattr 220
+#define __NR_futex 221
+#define __NR_sched_setaffinity 222
+#define __NR_sched_getaffinity 223
+/* 224 currently unused */
+#define __NR_tuxcall 225
+#ifndef __powerpc64__
+#define __NR_sendfile64 226
+#endif
+#define __NR_io_setup 227
+#define __NR_io_destroy 228
+#define __NR_io_getevents 229
+#define __NR_io_submit 230
+#define __NR_io_cancel 231
+#define __NR_set_tid_address 232
+#define __NR_fadvise64 233
+#define __NR_exit_group 234
+#define __NR_lookup_dcookie 235
+#define __NR_epoll_create 236
+#define __NR_epoll_ctl 237
+#define __NR_epoll_wait 238
+#define __NR_remap_file_pages 239
+#define __NR_timer_create 240
+#define __NR_timer_settime 241
+#define __NR_timer_gettime 242
+#define __NR_timer_getoverrun 243
+#define __NR_timer_delete 244
+#define __NR_clock_settime 245
+#define __NR_clock_gettime 246
+#define __NR_clock_getres 247
+#define __NR_clock_nanosleep 248
+#define __NR_swapcontext 249
+#define __NR_tgkill 250
+#define __NR_utimes 251
+#define __NR_statfs64 252
+#define __NR_fstatfs64 253
+#ifndef __powerpc64__
+#define __NR_fadvise64_64 254
+#endif
+#define __NR_rtas 255
+#define __NR_sys_debug_setcontext 256
+/* Number 257 is reserved for vserver */
+#define __NR_migrate_pages 258
+#define __NR_mbind 259
+#define __NR_get_mempolicy 260
+#define __NR_set_mempolicy 261
+#define __NR_mq_open 262
+#define __NR_mq_unlink 263
+#define __NR_mq_timedsend 264
+#define __NR_mq_timedreceive 265
+#define __NR_mq_notify 266
+#define __NR_mq_getsetattr 267
+#define __NR_kexec_load 268
+#define __NR_add_key 269
+#define __NR_request_key 270
+#define __NR_keyctl 271
+#define __NR_waitid 272
+#define __NR_ioprio_set 273
+#define __NR_ioprio_get 274
+#define __NR_inotify_init 275
+#define __NR_inotify_add_watch 276
+#define __NR_inotify_rm_watch 277
+#define __NR_spu_run 278
+#define __NR_spu_create 279
+#define __NR_pselect6 280
+#define __NR_ppoll 281
+#define __NR_unshare 282
+#define __NR_splice 283
+#define __NR_tee 284
+#define __NR_vmsplice 285
+#define __NR_openat 286
+#define __NR_mkdirat 287
+#define __NR_mknodat 288
+#define __NR_fchownat 289
+#define __NR_futimesat 290
+#ifdef __powerpc64__
+#define __NR_newfstatat 291
+#else
+#define __NR_fstatat64 291
+#endif
+#define __NR_unlinkat 292
+#define __NR_renameat 293
+#define __NR_linkat 294
+#define __NR_symlinkat 295
+#define __NR_readlinkat 296
+#define __NR_fchmodat 297
+#define __NR_faccessat 298
+#define __NR_get_robust_list 299
+#define __NR_set_robust_list 300
+#define __NR_move_pages 301
+#define __NR_getcpu 302
+#define __NR_epoll_pwait 303
+#define __NR_utimensat 304
+#define __NR_signalfd 305
+#define __NR_timerfd_create 306
+#define __NR_eventfd 307
+#define __NR_sync_file_range2 308
+#define __NR_fallocate 309
+#define __NR_subpage_prot 310
+#define __NR_timerfd_settime 311
+#define __NR_timerfd_gettime 312
+#define __NR_signalfd4 313
+#define __NR_eventfd2 314
+#define __NR_epoll_create1 315
+#define __NR_dup3 316
+#define __NR_pipe2 317
+#define __NR_inotify_init1 318
+#define __NR_perf_event_open 319
+#define __NR_preadv 320
+#define __NR_pwritev 321
+#define __NR_rt_tgsigqueueinfo 322
+#define __NR_fanotify_init 323
+#define __NR_fanotify_mark 324
+#define __NR_prlimit64 325
+#define __NR_socket 326
+#define __NR_bind 327
+#define __NR_connect 328
+#define __NR_listen 329
+#define __NR_accept 330
+#define __NR_getsockname 331
+#define __NR_getpeername 332
+#define __NR_socketpair 333
+#define __NR_send 334
+#define __NR_sendto 335
+#define __NR_recv 336
+#define __NR_recvfrom 337
+#define __NR_shutdown 338
+#define __NR_setsockopt 339
+#define __NR_getsockopt 340
+#define __NR_sendmsg 341
+#define __NR_recvmsg 342
+#define __NR_recvmmsg 343
+#define __NR_accept4 344
+#define __NR_name_to_handle_at 345
+#define __NR_open_by_handle_at 346
+#define __NR_clock_adjtime 347
+#define __NR_syncfs 348
+#define __NR_sendmmsg 349
+#define __NR_setns 350
+#define __NR_process_vm_readv 351
+#define __NR_process_vm_writev 352
+#define __NR_finit_module 353
+#define __NR_kcmp 354
+#define __NR_sched_setattr 355
+#define __NR_sched_getattr 356
+#define __NR_renameat2 357
+#define __NR_seccomp 358
+#define __NR_getrandom 359
+#define __NR_memfd_create 360
+#define __NR_bpf 361
+#define __NR_execveat 362
+#define __NR_switch_endian 363
+#define __NR_userfaultfd 364
+#define __NR_membarrier 365
+#define __NR_mlock2 378
+#define __NR_copy_file_range 379
+#define __NR_preadv2 380
+#define __NR_pwritev2 381
+#define __NR_kexec_file_load 382
+#define __NR_statx 383
+#define __NR_pkey_alloc 384
+#define __NR_pkey_free 385
+#define __NR_pkey_mprotect 386
+#define __NR_rseq 387
+#define __NR_io_pgetevents 388
+
+#endif /* _UAPI_ASM_POWERPC_UNISTD_H_ */
diff --git a/tools/arch/riscv/include/uapi/asm/bitsperlong.h b/tools/arch/riscv/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..0b3cb52fd
--- /dev/null
+++ b/tools/arch/riscv/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2015 Regents of the University of California
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _UAPI_ASM_RISCV_BITSPERLONG_H
+#define _UAPI_ASM_RISCV_BITSPERLONG_H
+
+#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8)
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* _UAPI_ASM_RISCV_BITSPERLONG_H */
diff --git a/tools/arch/s390/include/asm/barrier.h b/tools/arch/s390/include/asm/barrier.h
new file mode 100644
index 000000000..5030c99f4
--- /dev/null
+++ b/tools/arch/s390/include/asm/barrier.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copied from the kernel sources:
+ *
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __TOOLS_LINUX_ASM_BARRIER_H
+#define __TOOLS_LINUX_ASM_BARRIER_H
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+/* Fast-BCR without checkpoint synchronization */
+#define __ASM_BARRIER "bcr 14,0\n"
+#else
+#define __ASM_BARRIER "bcr 15,0\n"
+#endif
+
+#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
+
+#define rmb() mb()
+#define wmb() mb()
+
+#endif /* __TOOLS_LIB_ASM_BARRIER_H */
diff --git a/tools/arch/s390/include/uapi/asm/bitsperlong.h b/tools/arch/s390/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..d2bb62011
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_S390_BITSPERLONG_H
+#define __ASM_S390_BITSPERLONG_H
+
+#ifndef __s390x__
+#define __BITS_PER_LONG 32
+#else
+#define __BITS_PER_LONG 64
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_S390_BITSPERLONG_H */
diff --git a/tools/arch/s390/include/uapi/asm/bpf_perf_event.h b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000..0a8e37a51
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_BPF_PERF_EVENT_H__
+
+#include "ptrace.h"
+
+typedef user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
new file mode 100644
index 000000000..9a50f02b9
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -0,0 +1,277 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_KVM_S390_H
+#define __LINUX_KVM_S390_H
+/*
+ * KVM s390 specific structures and definitions
+ *
+ * Copyright IBM Corp. 2008, 2018
+ *
+ * Author(s): Carsten Otte <cotte@de.ibm.com>
+ * Christian Borntraeger <borntraeger@de.ibm.com>
+ */
+#include <linux/types.h>
+
+#define __KVM_S390
+#define __KVM_HAVE_GUEST_DEBUG
+
+/* Device control API: s390-specific devices */
+#define KVM_DEV_FLIC_GET_ALL_IRQS 1
+#define KVM_DEV_FLIC_ENQUEUE 2
+#define KVM_DEV_FLIC_CLEAR_IRQS 3
+#define KVM_DEV_FLIC_APF_ENABLE 4
+#define KVM_DEV_FLIC_APF_DISABLE_WAIT 5
+#define KVM_DEV_FLIC_ADAPTER_REGISTER 6
+#define KVM_DEV_FLIC_ADAPTER_MODIFY 7
+#define KVM_DEV_FLIC_CLEAR_IO_IRQ 8
+#define KVM_DEV_FLIC_AISM 9
+#define KVM_DEV_FLIC_AIRQ_INJECT 10
+#define KVM_DEV_FLIC_AISM_ALL 11
+/*
+ * We can have up to 4*64k pending subchannels + 8 adapter interrupts,
+ * as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
+ * There are also sclp and machine checks. This gives us
+ * sizeof(kvm_s390_irq)*(4*65536+8+64*64+1+1) = 72 * 266250 = 19170000
+ * Lets round up to 8192 pages.
+ */
+#define KVM_S390_MAX_FLOAT_IRQS 266250
+#define KVM_S390_FLIC_MAX_BUFFER 0x2000000
+
+struct kvm_s390_io_adapter {
+ __u32 id;
+ __u8 isc;
+ __u8 maskable;
+ __u8 swap;
+ __u8 flags;
+};
+
+#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01
+
+struct kvm_s390_ais_req {
+ __u8 isc;
+ __u16 mode;
+};
+
+struct kvm_s390_ais_all {
+ __u8 simm;
+ __u8 nimm;
+};
+
+#define KVM_S390_IO_ADAPTER_MASK 1
+#define KVM_S390_IO_ADAPTER_MAP 2
+#define KVM_S390_IO_ADAPTER_UNMAP 3
+
+struct kvm_s390_io_adapter_req {
+ __u32 id;
+ __u8 type;
+ __u8 mask;
+ __u16 pad0;
+ __u64 addr;
+};
+
+/* kvm attr_group on vm fd */
+#define KVM_S390_VM_MEM_CTRL 0
+#define KVM_S390_VM_TOD 1
+#define KVM_S390_VM_CRYPTO 2
+#define KVM_S390_VM_CPU_MODEL 3
+#define KVM_S390_VM_MIGRATION 4
+
+/* kvm attributes for mem_ctrl */
+#define KVM_S390_VM_MEM_ENABLE_CMMA 0
+#define KVM_S390_VM_MEM_CLR_CMMA 1
+#define KVM_S390_VM_MEM_LIMIT_SIZE 2
+
+#define KVM_S390_NO_MEM_LIMIT U64_MAX
+
+/* kvm attributes for KVM_S390_VM_TOD */
+#define KVM_S390_VM_TOD_LOW 0
+#define KVM_S390_VM_TOD_HIGH 1
+#define KVM_S390_VM_TOD_EXT 2
+
+struct kvm_s390_vm_tod_clock {
+ __u8 epoch_idx;
+ __u64 tod;
+};
+
+/* kvm attributes for KVM_S390_VM_CPU_MODEL */
+/* processor related attributes are r/w */
+#define KVM_S390_VM_CPU_PROCESSOR 0
+struct kvm_s390_vm_cpu_processor {
+ __u64 cpuid;
+ __u16 ibc;
+ __u8 pad[6];
+ __u64 fac_list[256];
+};
+
+/* machine related attributes are r/o */
+#define KVM_S390_VM_CPU_MACHINE 1
+struct kvm_s390_vm_cpu_machine {
+ __u64 cpuid;
+ __u32 ibc;
+ __u8 pad[4];
+ __u64 fac_mask[256];
+ __u64 fac_list[256];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_FEAT 2
+#define KVM_S390_VM_CPU_MACHINE_FEAT 3
+
+#define KVM_S390_VM_CPU_FEAT_NR_BITS 1024
+#define KVM_S390_VM_CPU_FEAT_ESOP 0
+#define KVM_S390_VM_CPU_FEAT_SIEF2 1
+#define KVM_S390_VM_CPU_FEAT_64BSCAO 2
+#define KVM_S390_VM_CPU_FEAT_SIIF 3
+#define KVM_S390_VM_CPU_FEAT_GPERE 4
+#define KVM_S390_VM_CPU_FEAT_GSLS 5
+#define KVM_S390_VM_CPU_FEAT_IB 6
+#define KVM_S390_VM_CPU_FEAT_CEI 7
+#define KVM_S390_VM_CPU_FEAT_IBS 8
+#define KVM_S390_VM_CPU_FEAT_SKEY 9
+#define KVM_S390_VM_CPU_FEAT_CMMA 10
+#define KVM_S390_VM_CPU_FEAT_PFMFI 11
+#define KVM_S390_VM_CPU_FEAT_SIGPIF 12
+#define KVM_S390_VM_CPU_FEAT_KSS 13
+struct kvm_s390_vm_cpu_feat {
+ __u64 feat[16];
+};
+
+#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC 4
+#define KVM_S390_VM_CPU_MACHINE_SUBFUNC 5
+/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */
+struct kvm_s390_vm_cpu_subfunc {
+ __u8 plo[32]; /* always */
+ __u8 ptff[16]; /* with TOD-clock steering */
+ __u8 kmac[16]; /* with MSA */
+ __u8 kmc[16]; /* with MSA */
+ __u8 km[16]; /* with MSA */
+ __u8 kimd[16]; /* with MSA */
+ __u8 klmd[16]; /* with MSA */
+ __u8 pckmo[16]; /* with MSA3 */
+ __u8 kmctr[16]; /* with MSA4 */
+ __u8 kmf[16]; /* with MSA4 */
+ __u8 kmo[16]; /* with MSA4 */
+ __u8 pcc[16]; /* with MSA4 */
+ __u8 ppno[16]; /* with MSA5 */
+ __u8 kma[16]; /* with MSA8 */
+ __u8 reserved[1808];
+};
+
+/* kvm attributes for crypto */
+#define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0
+#define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1
+#define KVM_S390_VM_CRYPTO_DISABLE_AES_KW 2
+#define KVM_S390_VM_CRYPTO_DISABLE_DEA_KW 3
+
+/* kvm attributes for migration mode */
+#define KVM_S390_VM_MIGRATION_STOP 0
+#define KVM_S390_VM_MIGRATION_START 1
+#define KVM_S390_VM_MIGRATION_STATUS 2
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+ /* general purpose regs for s390 */
+ __u64 gprs[16];
+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+ __u32 acrs[16];
+ __u64 crs[16];
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+ __u32 fpc;
+ __u64 fprs[16];
+};
+
+#define KVM_GUESTDBG_USE_HW_BP 0x00010000
+
+#define KVM_HW_BP 1
+#define KVM_HW_WP_WRITE 2
+#define KVM_SINGLESTEP 4
+
+struct kvm_debug_exit_arch {
+ __u64 addr;
+ __u8 type;
+ __u8 pad[7]; /* Should be set to 0 */
+};
+
+struct kvm_hw_breakpoint {
+ __u64 addr;
+ __u64 phys_addr;
+ __u64 len;
+ __u8 type;
+ __u8 pad[7]; /* Should be set to 0 */
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+ __u32 nr_hw_bp;
+ __u32 pad; /* Should be set to 0 */
+ struct kvm_hw_breakpoint __user *hw_bp;
+};
+
+/* for KVM_SYNC_PFAULT and KVM_REG_S390_PFTOKEN */
+#define KVM_S390_PFAULT_TOKEN_INVALID 0xffffffffffffffffULL
+
+#define KVM_SYNC_PREFIX (1UL << 0)
+#define KVM_SYNC_GPRS (1UL << 1)
+#define KVM_SYNC_ACRS (1UL << 2)
+#define KVM_SYNC_CRS (1UL << 3)
+#define KVM_SYNC_ARCH0 (1UL << 4)
+#define KVM_SYNC_PFAULT (1UL << 5)
+#define KVM_SYNC_VRS (1UL << 6)
+#define KVM_SYNC_RICCB (1UL << 7)
+#define KVM_SYNC_FPRS (1UL << 8)
+#define KVM_SYNC_GSCB (1UL << 9)
+#define KVM_SYNC_BPBC (1UL << 10)
+#define KVM_SYNC_ETOKEN (1UL << 11)
+/* length and alignment of the sdnx as a power of two */
+#define SDNXC 8
+#define SDNXL (1UL << SDNXC)
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+ __u64 prefix; /* prefix register */
+ __u64 gprs[16]; /* general purpose registers */
+ __u32 acrs[16]; /* access registers */
+ __u64 crs[16]; /* control registers */
+ __u64 todpr; /* tod programmable register [ARCH0] */
+ __u64 cputm; /* cpu timer [ARCH0] */
+ __u64 ckc; /* clock comparator [ARCH0] */
+ __u64 pp; /* program parameter [ARCH0] */
+ __u64 gbea; /* guest breaking-event address [ARCH0] */
+ __u64 pft; /* pfault token [PFAULT] */
+ __u64 pfs; /* pfault select [PFAULT] */
+ __u64 pfc; /* pfault compare [PFAULT] */
+ union {
+ __u64 vrs[32][2]; /* vector registers (KVM_SYNC_VRS) */
+ __u64 fprs[16]; /* fp registers (KVM_SYNC_FPRS) */
+ };
+ __u8 reserved[512]; /* for future vector expansion */
+ __u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
+ __u8 bpbc : 1; /* bp mode */
+ __u8 reserved2 : 7;
+ __u8 padding1[51]; /* riccb needs to be 64byte aligned */
+ __u8 riccb[64]; /* runtime instrumentation controls block */
+ __u8 padding2[192]; /* sdnx needs to be 256byte aligned */
+ union {
+ __u8 sdnx[SDNXL]; /* state description annex */
+ struct {
+ __u64 reserved1[2];
+ __u64 gscb[4];
+ __u64 etoken;
+ __u64 etoken_extension;
+ };
+ };
+};
+
+#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
+#define KVM_REG_S390_EPOCHDIFF (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x2)
+#define KVM_REG_S390_CPU_TIMER (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x3)
+#define KVM_REG_S390_CLOCK_COMP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x4)
+#define KVM_REG_S390_PFTOKEN (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x5)
+#define KVM_REG_S390_PFCOMPARE (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x6)
+#define KVM_REG_S390_PFSELECT (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x7)
+#define KVM_REG_S390_PP (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x8)
+#define KVM_REG_S390_GBEA (KVM_REG_S390 | KVM_REG_SIZE_U64 | 0x9)
+#endif
diff --git a/tools/arch/s390/include/uapi/asm/kvm_perf.h b/tools/arch/s390/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000000000..84606b8cc
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Definitions for perf-kvm on s390
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
+ */
+
+#ifndef __LINUX_KVM_PERF_S390_H
+#define __LINUX_KVM_PERF_S390_H
+
+#include <asm/sie.h>
+
+#define DECODE_STR_LEN 40
+
+#define VCPU_ID "id"
+
+#define KVM_ENTRY_TRACE "kvm:kvm_s390_sie_enter"
+#define KVM_EXIT_TRACE "kvm:kvm_s390_sie_exit"
+#define KVM_EXIT_REASON "icptcode"
+
+#endif
diff --git a/tools/arch/s390/include/uapi/asm/mman.h b/tools/arch/s390/include/uapi/asm/mman.h
new file mode 100644
index 000000000..4ec32e425
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/mman.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_S390_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_S390_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on s390, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/s390/include/uapi/asm/perf_regs.h b/tools/arch/s390/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000..d17dd9e5d
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/perf_regs.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_S390_PERF_REGS_H
+#define _ASM_S390_PERF_REGS_H
+
+enum perf_event_s390_regs {
+ PERF_REG_S390_R0,
+ PERF_REG_S390_R1,
+ PERF_REG_S390_R2,
+ PERF_REG_S390_R3,
+ PERF_REG_S390_R4,
+ PERF_REG_S390_R5,
+ PERF_REG_S390_R6,
+ PERF_REG_S390_R7,
+ PERF_REG_S390_R8,
+ PERF_REG_S390_R9,
+ PERF_REG_S390_R10,
+ PERF_REG_S390_R11,
+ PERF_REG_S390_R12,
+ PERF_REG_S390_R13,
+ PERF_REG_S390_R14,
+ PERF_REG_S390_R15,
+ PERF_REG_S390_FP0,
+ PERF_REG_S390_FP1,
+ PERF_REG_S390_FP2,
+ PERF_REG_S390_FP3,
+ PERF_REG_S390_FP4,
+ PERF_REG_S390_FP5,
+ PERF_REG_S390_FP6,
+ PERF_REG_S390_FP7,
+ PERF_REG_S390_FP8,
+ PERF_REG_S390_FP9,
+ PERF_REG_S390_FP10,
+ PERF_REG_S390_FP11,
+ PERF_REG_S390_FP12,
+ PERF_REG_S390_FP13,
+ PERF_REG_S390_FP14,
+ PERF_REG_S390_FP15,
+ PERF_REG_S390_MASK,
+ PERF_REG_S390_PC,
+
+ PERF_REG_S390_MAX
+};
+
+#endif /* _ASM_S390_PERF_REGS_H */
diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h
new file mode 100644
index 000000000..543dd70e1
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/ptrace.h
@@ -0,0 +1,457 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * S390 version
+ * Copyright IBM Corp. 1999, 2000
+ * Author(s): Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com)
+ */
+
+#ifndef _UAPI_S390_PTRACE_H
+#define _UAPI_S390_PTRACE_H
+
+/*
+ * Offsets in the user_regs_struct. They are used for the ptrace
+ * system call and in entry.S
+ */
+#ifndef __s390x__
+
+#define PT_PSWMASK 0x00
+#define PT_PSWADDR 0x04
+#define PT_GPR0 0x08
+#define PT_GPR1 0x0C
+#define PT_GPR2 0x10
+#define PT_GPR3 0x14
+#define PT_GPR4 0x18
+#define PT_GPR5 0x1C
+#define PT_GPR6 0x20
+#define PT_GPR7 0x24
+#define PT_GPR8 0x28
+#define PT_GPR9 0x2C
+#define PT_GPR10 0x30
+#define PT_GPR11 0x34
+#define PT_GPR12 0x38
+#define PT_GPR13 0x3C
+#define PT_GPR14 0x40
+#define PT_GPR15 0x44
+#define PT_ACR0 0x48
+#define PT_ACR1 0x4C
+#define PT_ACR2 0x50
+#define PT_ACR3 0x54
+#define PT_ACR4 0x58
+#define PT_ACR5 0x5C
+#define PT_ACR6 0x60
+#define PT_ACR7 0x64
+#define PT_ACR8 0x68
+#define PT_ACR9 0x6C
+#define PT_ACR10 0x70
+#define PT_ACR11 0x74
+#define PT_ACR12 0x78
+#define PT_ACR13 0x7C
+#define PT_ACR14 0x80
+#define PT_ACR15 0x84
+#define PT_ORIGGPR2 0x88
+#define PT_FPC 0x90
+/*
+ * A nasty fact of life that the ptrace api
+ * only supports passing of longs.
+ */
+#define PT_FPR0_HI 0x98
+#define PT_FPR0_LO 0x9C
+#define PT_FPR1_HI 0xA0
+#define PT_FPR1_LO 0xA4
+#define PT_FPR2_HI 0xA8
+#define PT_FPR2_LO 0xAC
+#define PT_FPR3_HI 0xB0
+#define PT_FPR3_LO 0xB4
+#define PT_FPR4_HI 0xB8
+#define PT_FPR4_LO 0xBC
+#define PT_FPR5_HI 0xC0
+#define PT_FPR5_LO 0xC4
+#define PT_FPR6_HI 0xC8
+#define PT_FPR6_LO 0xCC
+#define PT_FPR7_HI 0xD0
+#define PT_FPR7_LO 0xD4
+#define PT_FPR8_HI 0xD8
+#define PT_FPR8_LO 0XDC
+#define PT_FPR9_HI 0xE0
+#define PT_FPR9_LO 0xE4
+#define PT_FPR10_HI 0xE8
+#define PT_FPR10_LO 0xEC
+#define PT_FPR11_HI 0xF0
+#define PT_FPR11_LO 0xF4
+#define PT_FPR12_HI 0xF8
+#define PT_FPR12_LO 0xFC
+#define PT_FPR13_HI 0x100
+#define PT_FPR13_LO 0x104
+#define PT_FPR14_HI 0x108
+#define PT_FPR14_LO 0x10C
+#define PT_FPR15_HI 0x110
+#define PT_FPR15_LO 0x114
+#define PT_CR_9 0x118
+#define PT_CR_10 0x11C
+#define PT_CR_11 0x120
+#define PT_IEEE_IP 0x13C
+#define PT_LASTOFF PT_IEEE_IP
+#define PT_ENDREGS 0x140-1
+
+#define GPR_SIZE 4
+#define CR_SIZE 4
+
+#define STACK_FRAME_OVERHEAD 96 /* size of minimum stack frame */
+
+#else /* __s390x__ */
+
+#define PT_PSWMASK 0x00
+#define PT_PSWADDR 0x08
+#define PT_GPR0 0x10
+#define PT_GPR1 0x18
+#define PT_GPR2 0x20
+#define PT_GPR3 0x28
+#define PT_GPR4 0x30
+#define PT_GPR5 0x38
+#define PT_GPR6 0x40
+#define PT_GPR7 0x48
+#define PT_GPR8 0x50
+#define PT_GPR9 0x58
+#define PT_GPR10 0x60
+#define PT_GPR11 0x68
+#define PT_GPR12 0x70
+#define PT_GPR13 0x78
+#define PT_GPR14 0x80
+#define PT_GPR15 0x88
+#define PT_ACR0 0x90
+#define PT_ACR1 0x94
+#define PT_ACR2 0x98
+#define PT_ACR3 0x9C
+#define PT_ACR4 0xA0
+#define PT_ACR5 0xA4
+#define PT_ACR6 0xA8
+#define PT_ACR7 0xAC
+#define PT_ACR8 0xB0
+#define PT_ACR9 0xB4
+#define PT_ACR10 0xB8
+#define PT_ACR11 0xBC
+#define PT_ACR12 0xC0
+#define PT_ACR13 0xC4
+#define PT_ACR14 0xC8
+#define PT_ACR15 0xCC
+#define PT_ORIGGPR2 0xD0
+#define PT_FPC 0xD8
+#define PT_FPR0 0xE0
+#define PT_FPR1 0xE8
+#define PT_FPR2 0xF0
+#define PT_FPR3 0xF8
+#define PT_FPR4 0x100
+#define PT_FPR5 0x108
+#define PT_FPR6 0x110
+#define PT_FPR7 0x118
+#define PT_FPR8 0x120
+#define PT_FPR9 0x128
+#define PT_FPR10 0x130
+#define PT_FPR11 0x138
+#define PT_FPR12 0x140
+#define PT_FPR13 0x148
+#define PT_FPR14 0x150
+#define PT_FPR15 0x158
+#define PT_CR_9 0x160
+#define PT_CR_10 0x168
+#define PT_CR_11 0x170
+#define PT_IEEE_IP 0x1A8
+#define PT_LASTOFF PT_IEEE_IP
+#define PT_ENDREGS 0x1B0-1
+
+#define GPR_SIZE 8
+#define CR_SIZE 8
+
+#define STACK_FRAME_OVERHEAD 160 /* size of minimum stack frame */
+
+#endif /* __s390x__ */
+
+#define NUM_GPRS 16
+#define NUM_FPRS 16
+#define NUM_CRS 16
+#define NUM_ACRS 16
+
+#define NUM_CR_WORDS 3
+
+#define FPR_SIZE 8
+#define FPC_SIZE 4
+#define FPC_PAD_SIZE 4 /* gcc insists on aligning the fpregs */
+#define ACR_SIZE 4
+
+
+#define PTRACE_OLDSETOPTIONS 21
+
+#ifndef __ASSEMBLY__
+#include <linux/stddef.h>
+#include <linux/types.h>
+
+typedef union {
+ float f;
+ double d;
+ __u64 ui;
+ struct
+ {
+ __u32 hi;
+ __u32 lo;
+ } fp;
+} freg_t;
+
+typedef struct {
+ __u32 fpc;
+ __u32 pad;
+ freg_t fprs[NUM_FPRS];
+} s390_fp_regs;
+
+#define FPC_EXCEPTION_MASK 0xF8000000
+#define FPC_FLAGS_MASK 0x00F80000
+#define FPC_DXC_MASK 0x0000FF00
+#define FPC_RM_MASK 0x00000003
+
+/* this typedef defines how a Program Status Word looks like */
+typedef struct {
+ unsigned long mask;
+ unsigned long addr;
+} __attribute__ ((aligned(8))) psw_t;
+
+#ifndef __s390x__
+
+#define PSW_MASK_PER 0x40000000UL
+#define PSW_MASK_DAT 0x04000000UL
+#define PSW_MASK_IO 0x02000000UL
+#define PSW_MASK_EXT 0x01000000UL
+#define PSW_MASK_KEY 0x00F00000UL
+#define PSW_MASK_BASE 0x00080000UL /* always one */
+#define PSW_MASK_MCHECK 0x00040000UL
+#define PSW_MASK_WAIT 0x00020000UL
+#define PSW_MASK_PSTATE 0x00010000UL
+#define PSW_MASK_ASC 0x0000C000UL
+#define PSW_MASK_CC 0x00003000UL
+#define PSW_MASK_PM 0x00000F00UL
+#define PSW_MASK_RI 0x00000000UL
+#define PSW_MASK_EA 0x00000000UL
+#define PSW_MASK_BA 0x00000000UL
+
+#define PSW_MASK_USER 0x0000FF00UL
+
+#define PSW_ADDR_AMODE 0x80000000UL
+#define PSW_ADDR_INSN 0x7FFFFFFFUL
+
+#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 20)
+
+#define PSW_ASC_PRIMARY 0x00000000UL
+#define PSW_ASC_ACCREG 0x00004000UL
+#define PSW_ASC_SECONDARY 0x00008000UL
+#define PSW_ASC_HOME 0x0000C000UL
+
+#else /* __s390x__ */
+
+#define PSW_MASK_PER 0x4000000000000000UL
+#define PSW_MASK_DAT 0x0400000000000000UL
+#define PSW_MASK_IO 0x0200000000000000UL
+#define PSW_MASK_EXT 0x0100000000000000UL
+#define PSW_MASK_BASE 0x0000000000000000UL
+#define PSW_MASK_KEY 0x00F0000000000000UL
+#define PSW_MASK_MCHECK 0x0004000000000000UL
+#define PSW_MASK_WAIT 0x0002000000000000UL
+#define PSW_MASK_PSTATE 0x0001000000000000UL
+#define PSW_MASK_ASC 0x0000C00000000000UL
+#define PSW_MASK_CC 0x0000300000000000UL
+#define PSW_MASK_PM 0x00000F0000000000UL
+#define PSW_MASK_RI 0x0000008000000000UL
+#define PSW_MASK_EA 0x0000000100000000UL
+#define PSW_MASK_BA 0x0000000080000000UL
+
+#define PSW_MASK_USER 0x0000FF0180000000UL
+
+#define PSW_ADDR_AMODE 0x0000000000000000UL
+#define PSW_ADDR_INSN 0xFFFFFFFFFFFFFFFFUL
+
+#define PSW_DEFAULT_KEY (((unsigned long) PAGE_DEFAULT_ACC) << 52)
+
+#define PSW_ASC_PRIMARY 0x0000000000000000UL
+#define PSW_ASC_ACCREG 0x0000400000000000UL
+#define PSW_ASC_SECONDARY 0x0000800000000000UL
+#define PSW_ASC_HOME 0x0000C00000000000UL
+
+#endif /* __s390x__ */
+
+
+/*
+ * The s390_regs structure is used to define the elf_gregset_t.
+ */
+typedef struct {
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+ unsigned int acrs[NUM_ACRS];
+ unsigned long orig_gpr2;
+} s390_regs;
+
+/*
+ * The user_pt_regs structure exports the beginning of
+ * the in-kernel pt_regs structure to user space.
+ */
+typedef struct {
+ unsigned long args[1];
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+} user_pt_regs;
+
+/*
+ * Now for the user space program event recording (trace) definitions.
+ * The following structures are used only for the ptrace interface, don't
+ * touch or even look at it if you don't want to modify the user-space
+ * ptrace interface. In particular stay away from it for in-kernel PER.
+ */
+typedef struct {
+ unsigned long cr[NUM_CR_WORDS];
+} per_cr_words;
+
+#define PER_EM_MASK 0xE8000000UL
+
+typedef struct {
+#ifdef __s390x__
+ unsigned : 32;
+#endif /* __s390x__ */
+ unsigned em_branching : 1;
+ unsigned em_instruction_fetch : 1;
+ /*
+ * Switching on storage alteration automatically fixes
+ * the storage alteration event bit in the users std.
+ */
+ unsigned em_storage_alteration : 1;
+ unsigned em_gpr_alt_unused : 1;
+ unsigned em_store_real_address : 1;
+ unsigned : 3;
+ unsigned branch_addr_ctl : 1;
+ unsigned : 1;
+ unsigned storage_alt_space_ctl : 1;
+ unsigned : 21;
+ unsigned long starting_addr;
+ unsigned long ending_addr;
+} per_cr_bits;
+
+typedef struct {
+ unsigned short perc_atmid;
+ unsigned long address;
+ unsigned char access_id;
+} per_lowcore_words;
+
+typedef struct {
+ unsigned perc_branching : 1;
+ unsigned perc_instruction_fetch : 1;
+ unsigned perc_storage_alteration : 1;
+ unsigned perc_gpr_alt_unused : 1;
+ unsigned perc_store_real_address : 1;
+ unsigned : 3;
+ unsigned atmid_psw_bit_31 : 1;
+ unsigned atmid_validity_bit : 1;
+ unsigned atmid_psw_bit_32 : 1;
+ unsigned atmid_psw_bit_5 : 1;
+ unsigned atmid_psw_bit_16 : 1;
+ unsigned atmid_psw_bit_17 : 1;
+ unsigned si : 2;
+ unsigned long address;
+ unsigned : 4;
+ unsigned access_id : 4;
+} per_lowcore_bits;
+
+typedef struct {
+ union {
+ per_cr_words words;
+ per_cr_bits bits;
+ } control_regs;
+ /*
+ * The single_step and instruction_fetch bits are obsolete,
+ * the kernel always sets them to zero. To enable single
+ * stepping use ptrace(PTRACE_SINGLESTEP) instead.
+ */
+ unsigned single_step : 1;
+ unsigned instruction_fetch : 1;
+ unsigned : 30;
+ /*
+ * These addresses are copied into cr10 & cr11 if single
+ * stepping is switched off
+ */
+ unsigned long starting_addr;
+ unsigned long ending_addr;
+ union {
+ per_lowcore_words words;
+ per_lowcore_bits bits;
+ } lowcore;
+} per_struct;
+
+typedef struct {
+ unsigned int len;
+ unsigned long kernel_addr;
+ unsigned long process_addr;
+} ptrace_area;
+
+/*
+ * S/390 specific non posix ptrace requests. I chose unusual values so
+ * they are unlikely to clash with future ptrace definitions.
+ */
+#define PTRACE_PEEKUSR_AREA 0x5000
+#define PTRACE_POKEUSR_AREA 0x5001
+#define PTRACE_PEEKTEXT_AREA 0x5002
+#define PTRACE_PEEKDATA_AREA 0x5003
+#define PTRACE_POKETEXT_AREA 0x5004
+#define PTRACE_POKEDATA_AREA 0x5005
+#define PTRACE_GET_LAST_BREAK 0x5006
+#define PTRACE_PEEK_SYSTEM_CALL 0x5007
+#define PTRACE_POKE_SYSTEM_CALL 0x5008
+#define PTRACE_ENABLE_TE 0x5009
+#define PTRACE_DISABLE_TE 0x5010
+#define PTRACE_TE_ABORT_RAND 0x5011
+
+/*
+ * The numbers chosen here are somewhat arbitrary but absolutely MUST
+ * not overlap with any of the number assigned in <linux/ptrace.h>.
+ */
+#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */
+
+/*
+ * PT_PROT definition is loosely based on hppa bsd definition in
+ * gdb/hppab-nat.c
+ */
+#define PTRACE_PROT 21
+
+typedef enum {
+ ptprot_set_access_watchpoint,
+ ptprot_set_write_watchpoint,
+ ptprot_disable_watchpoint
+} ptprot_flags;
+
+typedef struct {
+ unsigned long lowaddr;
+ unsigned long hiaddr;
+ ptprot_flags prot;
+} ptprot_area;
+
+/* Sequence of bytes for breakpoint illegal instruction. */
+#define S390_BREAKPOINT {0x0,0x1}
+#define S390_BREAKPOINT_U16 ((__u16)0x0001)
+#define S390_SYSCALL_OPCODE ((__u16)0x0a00)
+#define S390_SYSCALL_SIZE 2
+
+/*
+ * The user_regs_struct defines the way the user registers are
+ * store on the stack for signal handling.
+ */
+struct user_regs_struct {
+ psw_t psw;
+ unsigned long gprs[NUM_GPRS];
+ unsigned int acrs[NUM_ACRS];
+ unsigned long orig_gpr2;
+ s390_fp_regs fp_regs;
+ /*
+ * These per registers are in here so that gdb can modify them
+ * itself as there is no "official" ptrace interface for hardware
+ * watchpoints. This is the way intel does it.
+ */
+ per_struct per_info;
+ unsigned long ieee_instruction_pointer; /* obsolete, always 0 */
+};
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _UAPI_S390_PTRACE_H */
diff --git a/tools/arch/s390/include/uapi/asm/sie.h b/tools/arch/s390/include/uapi/asm/sie.h
new file mode 100644
index 000000000..6ca1e68d7
--- /dev/null
+++ b/tools/arch/s390/include/uapi/asm/sie.h
@@ -0,0 +1,252 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_S390_SIE_H
+#define _UAPI_ASM_S390_SIE_H
+
+#define diagnose_codes \
+ { 0x10, "DIAG (0x10) release pages" }, \
+ { 0x44, "DIAG (0x44) time slice end" }, \
+ { 0x9c, "DIAG (0x9c) time slice end directed" }, \
+ { 0x204, "DIAG (0x204) logical-cpu utilization" }, \
+ { 0x258, "DIAG (0x258) page-reference services" }, \
+ { 0x288, "DIAG (0x288) watchdog functions" }, \
+ { 0x308, "DIAG (0x308) ipl functions" }, \
+ { 0x500, "DIAG (0x500) KVM virtio functions" }, \
+ { 0x501, "DIAG (0x501) KVM breakpoint" }
+
+#define sigp_order_codes \
+ { 0x01, "SIGP sense" }, \
+ { 0x02, "SIGP external call" }, \
+ { 0x03, "SIGP emergency signal" }, \
+ { 0x04, "SIGP start" }, \
+ { 0x05, "SIGP stop" }, \
+ { 0x06, "SIGP restart" }, \
+ { 0x09, "SIGP stop and store status" }, \
+ { 0x0b, "SIGP initial cpu reset" }, \
+ { 0x0c, "SIGP cpu reset" }, \
+ { 0x0d, "SIGP set prefix" }, \
+ { 0x0e, "SIGP store status at address" }, \
+ { 0x12, "SIGP set architecture" }, \
+ { 0x13, "SIGP conditional emergency signal" }, \
+ { 0x15, "SIGP sense running" }, \
+ { 0x16, "SIGP set multithreading"}, \
+ { 0x17, "SIGP store additional status ait address"}
+
+#define icpt_prog_codes \
+ { 0x0001, "Prog Operation" }, \
+ { 0x0002, "Prog Privileged Operation" }, \
+ { 0x0003, "Prog Execute" }, \
+ { 0x0004, "Prog Protection" }, \
+ { 0x0005, "Prog Addressing" }, \
+ { 0x0006, "Prog Specification" }, \
+ { 0x0007, "Prog Data" }, \
+ { 0x0008, "Prog Fixedpoint overflow" }, \
+ { 0x0009, "Prog Fixedpoint divide" }, \
+ { 0x000A, "Prog Decimal overflow" }, \
+ { 0x000B, "Prog Decimal divide" }, \
+ { 0x000C, "Prog HFP exponent overflow" }, \
+ { 0x000D, "Prog HFP exponent underflow" }, \
+ { 0x000E, "Prog HFP significance" }, \
+ { 0x000F, "Prog HFP divide" }, \
+ { 0x0010, "Prog Segment translation" }, \
+ { 0x0011, "Prog Page translation" }, \
+ { 0x0012, "Prog Translation specification" }, \
+ { 0x0013, "Prog Special operation" }, \
+ { 0x0015, "Prog Operand" }, \
+ { 0x0016, "Prog Trace table" }, \
+ { 0x0017, "Prog ASNtranslation specification" }, \
+ { 0x001C, "Prog Spaceswitch event" }, \
+ { 0x001D, "Prog HFP square root" }, \
+ { 0x001F, "Prog PCtranslation specification" }, \
+ { 0x0020, "Prog AFX translation" }, \
+ { 0x0021, "Prog ASX translation" }, \
+ { 0x0022, "Prog LX translation" }, \
+ { 0x0023, "Prog EX translation" }, \
+ { 0x0024, "Prog Primary authority" }, \
+ { 0x0025, "Prog Secondary authority" }, \
+ { 0x0026, "Prog LFXtranslation exception" }, \
+ { 0x0027, "Prog LSXtranslation exception" }, \
+ { 0x0028, "Prog ALET specification" }, \
+ { 0x0029, "Prog ALEN translation" }, \
+ { 0x002A, "Prog ALE sequence" }, \
+ { 0x002B, "Prog ASTE validity" }, \
+ { 0x002C, "Prog ASTE sequence" }, \
+ { 0x002D, "Prog Extended authority" }, \
+ { 0x002E, "Prog LSTE sequence" }, \
+ { 0x002F, "Prog ASTE instance" }, \
+ { 0x0030, "Prog Stack full" }, \
+ { 0x0031, "Prog Stack empty" }, \
+ { 0x0032, "Prog Stack specification" }, \
+ { 0x0033, "Prog Stack type" }, \
+ { 0x0034, "Prog Stack operation" }, \
+ { 0x0039, "Prog Region first translation" }, \
+ { 0x003A, "Prog Region second translation" }, \
+ { 0x003B, "Prog Region third translation" }, \
+ { 0x0040, "Prog Monitor event" }, \
+ { 0x0080, "Prog PER event" }, \
+ { 0x0119, "Prog Crypto operation" }
+
+#define exit_code_ipa0(ipa0, opcode, mnemonic) \
+ { (ipa0 << 8 | opcode), #ipa0 " " mnemonic }
+#define exit_code(opcode, mnemonic) \
+ { opcode, mnemonic }
+
+#define icpt_insn_codes \
+ exit_code_ipa0(0x01, 0x01, "PR"), \
+ exit_code_ipa0(0x01, 0x04, "PTFF"), \
+ exit_code_ipa0(0x01, 0x07, "SCKPF"), \
+ exit_code_ipa0(0xAA, 0x00, "RINEXT"), \
+ exit_code_ipa0(0xAA, 0x01, "RION"), \
+ exit_code_ipa0(0xAA, 0x02, "TRIC"), \
+ exit_code_ipa0(0xAA, 0x03, "RIOFF"), \
+ exit_code_ipa0(0xAA, 0x04, "RIEMIT"), \
+ exit_code_ipa0(0xB2, 0x02, "STIDP"), \
+ exit_code_ipa0(0xB2, 0x04, "SCK"), \
+ exit_code_ipa0(0xB2, 0x05, "STCK"), \
+ exit_code_ipa0(0xB2, 0x06, "SCKC"), \
+ exit_code_ipa0(0xB2, 0x07, "STCKC"), \
+ exit_code_ipa0(0xB2, 0x08, "SPT"), \
+ exit_code_ipa0(0xB2, 0x09, "STPT"), \
+ exit_code_ipa0(0xB2, 0x0d, "PTLB"), \
+ exit_code_ipa0(0xB2, 0x10, "SPX"), \
+ exit_code_ipa0(0xB2, 0x11, "STPX"), \
+ exit_code_ipa0(0xB2, 0x12, "STAP"), \
+ exit_code_ipa0(0xB2, 0x14, "SIE"), \
+ exit_code_ipa0(0xB2, 0x16, "SETR"), \
+ exit_code_ipa0(0xB2, 0x17, "STETR"), \
+ exit_code_ipa0(0xB2, 0x18, "PC"), \
+ exit_code_ipa0(0xB2, 0x20, "SERVC"), \
+ exit_code_ipa0(0xB2, 0x21, "IPTE"), \
+ exit_code_ipa0(0xB2, 0x28, "PT"), \
+ exit_code_ipa0(0xB2, 0x29, "ISKE"), \
+ exit_code_ipa0(0xB2, 0x2a, "RRBE"), \
+ exit_code_ipa0(0xB2, 0x2b, "SSKE"), \
+ exit_code_ipa0(0xB2, 0x2c, "TB"), \
+ exit_code_ipa0(0xB2, 0x2e, "PGIN"), \
+ exit_code_ipa0(0xB2, 0x2f, "PGOUT"), \
+ exit_code_ipa0(0xB2, 0x30, "CSCH"), \
+ exit_code_ipa0(0xB2, 0x31, "HSCH"), \
+ exit_code_ipa0(0xB2, 0x32, "MSCH"), \
+ exit_code_ipa0(0xB2, 0x33, "SSCH"), \
+ exit_code_ipa0(0xB2, 0x34, "STSCH"), \
+ exit_code_ipa0(0xB2, 0x35, "TSCH"), \
+ exit_code_ipa0(0xB2, 0x36, "TPI"), \
+ exit_code_ipa0(0xB2, 0x37, "SAL"), \
+ exit_code_ipa0(0xB2, 0x38, "RSCH"), \
+ exit_code_ipa0(0xB2, 0x39, "STCRW"), \
+ exit_code_ipa0(0xB2, 0x3a, "STCPS"), \
+ exit_code_ipa0(0xB2, 0x3b, "RCHP"), \
+ exit_code_ipa0(0xB2, 0x3c, "SCHM"), \
+ exit_code_ipa0(0xB2, 0x40, "BAKR"), \
+ exit_code_ipa0(0xB2, 0x48, "PALB"), \
+ exit_code_ipa0(0xB2, 0x4c, "TAR"), \
+ exit_code_ipa0(0xB2, 0x50, "CSP"), \
+ exit_code_ipa0(0xB2, 0x54, "MVPG"), \
+ exit_code_ipa0(0xB2, 0x56, "STHYI"), \
+ exit_code_ipa0(0xB2, 0x58, "BSG"), \
+ exit_code_ipa0(0xB2, 0x5a, "BSA"), \
+ exit_code_ipa0(0xB2, 0x5f, "CHSC"), \
+ exit_code_ipa0(0xB2, 0x74, "SIGA"), \
+ exit_code_ipa0(0xB2, 0x76, "XSCH"), \
+ exit_code_ipa0(0xB2, 0x78, "STCKE"), \
+ exit_code_ipa0(0xB2, 0x7c, "STCKF"), \
+ exit_code_ipa0(0xB2, 0x7d, "STSI"), \
+ exit_code_ipa0(0xB2, 0xb0, "STFLE"), \
+ exit_code_ipa0(0xB2, 0xb1, "STFL"), \
+ exit_code_ipa0(0xB2, 0xb2, "LPSWE"), \
+ exit_code_ipa0(0xB2, 0xf8, "TEND"), \
+ exit_code_ipa0(0xB2, 0xfc, "TABORT"), \
+ exit_code_ipa0(0xB9, 0x1e, "KMAC"), \
+ exit_code_ipa0(0xB9, 0x28, "PCKMO"), \
+ exit_code_ipa0(0xB9, 0x2a, "KMF"), \
+ exit_code_ipa0(0xB9, 0x2b, "KMO"), \
+ exit_code_ipa0(0xB9, 0x2d, "KMCTR"), \
+ exit_code_ipa0(0xB9, 0x2e, "KM"), \
+ exit_code_ipa0(0xB9, 0x2f, "KMC"), \
+ exit_code_ipa0(0xB9, 0x3e, "KIMD"), \
+ exit_code_ipa0(0xB9, 0x3f, "KLMD"), \
+ exit_code_ipa0(0xB9, 0x8a, "CSPG"), \
+ exit_code_ipa0(0xB9, 0x8d, "EPSW"), \
+ exit_code_ipa0(0xB9, 0x8e, "IDTE"), \
+ exit_code_ipa0(0xB9, 0x8f, "CRDTE"), \
+ exit_code_ipa0(0xB9, 0x9c, "EQBS"), \
+ exit_code_ipa0(0xB9, 0xa2, "PTF"), \
+ exit_code_ipa0(0xB9, 0xab, "ESSA"), \
+ exit_code_ipa0(0xB9, 0xae, "RRBM"), \
+ exit_code_ipa0(0xB9, 0xaf, "PFMF"), \
+ exit_code_ipa0(0xE3, 0x03, "LRAG"), \
+ exit_code_ipa0(0xE3, 0x13, "LRAY"), \
+ exit_code_ipa0(0xE3, 0x25, "NTSTG"), \
+ exit_code_ipa0(0xE5, 0x00, "LASP"), \
+ exit_code_ipa0(0xE5, 0x01, "TPROT"), \
+ exit_code_ipa0(0xE5, 0x60, "TBEGIN"), \
+ exit_code_ipa0(0xE5, 0x61, "TBEGINC"), \
+ exit_code_ipa0(0xEB, 0x25, "STCTG"), \
+ exit_code_ipa0(0xEB, 0x2f, "LCTLG"), \
+ exit_code_ipa0(0xEB, 0x60, "LRIC"), \
+ exit_code_ipa0(0xEB, 0x61, "STRIC"), \
+ exit_code_ipa0(0xEB, 0x62, "MRIC"), \
+ exit_code_ipa0(0xEB, 0x8a, "SQBS"), \
+ exit_code_ipa0(0xC8, 0x01, "ECTG"), \
+ exit_code(0x0a, "SVC"), \
+ exit_code(0x80, "SSM"), \
+ exit_code(0x82, "LPSW"), \
+ exit_code(0x83, "DIAG"), \
+ exit_code(0xae, "SIGP"), \
+ exit_code(0xac, "STNSM"), \
+ exit_code(0xad, "STOSM"), \
+ exit_code(0xb1, "LRA"), \
+ exit_code(0xb6, "STCTL"), \
+ exit_code(0xb7, "LCTL"), \
+ exit_code(0xee, "PLO")
+
+#define sie_intercept_code \
+ { 0x00, "Host interruption" }, \
+ { 0x04, "Instruction" }, \
+ { 0x08, "Program interruption" }, \
+ { 0x0c, "Instruction and program interruption" }, \
+ { 0x10, "External request" }, \
+ { 0x14, "External interruption" }, \
+ { 0x18, "I/O request" }, \
+ { 0x1c, "Wait state" }, \
+ { 0x20, "Validity" }, \
+ { 0x28, "Stop request" }, \
+ { 0x2c, "Operation exception" }, \
+ { 0x38, "Partial-execution" }, \
+ { 0x3c, "I/O interruption" }, \
+ { 0x40, "I/O instruction" }, \
+ { 0x48, "Timing subset" }
+
+/*
+ * This is the simple interceptable instructions decoder.
+ *
+ * It will be used as userspace interface and it can be used in places
+ * that does not allow to use general decoder functions,
+ * such as trace events declarations.
+ *
+ * Some userspace tools may want to parse this code
+ * and would be confused by switch(), if() and other statements,
+ * but they can understand conditional operator.
+ */
+#define INSN_DECODE_IPA0(ipa0, insn, rshift, mask) \
+ (insn >> 56) == (ipa0) ? \
+ ((ipa0 << 8) | ((insn >> rshift) & mask)) :
+
+#define INSN_DECODE(insn) (insn >> 56)
+
+/*
+ * The macro icpt_insn_decoder() takes an intercepted instruction
+ * and returns a key, which can be used to find a mnemonic name
+ * of the instruction in the icpt_insn_codes table.
+ */
+#define icpt_insn_decoder(insn) ( \
+ INSN_DECODE_IPA0(0x01, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f) \
+ INSN_DECODE_IPA0(0xb2, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xb9, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xe3, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xe5, insn, 48, 0xff) \
+ INSN_DECODE_IPA0(0xeb, insn, 16, 0xff) \
+ INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f) \
+ INSN_DECODE(insn))
+
+#endif /* _UAPI_ASM_S390_SIE_H */
diff --git a/tools/arch/sh/include/asm/barrier.h b/tools/arch/sh/include/asm/barrier.h
new file mode 100644
index 000000000..bde5221af
--- /dev/null
+++ b/tools/arch/sh/include/asm/barrier.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copied from the kernel sources:
+ *
+ * Copyright (C) 1999, 2000 Niibe Yutaka & Kaz Kojima
+ * Copyright (C) 2002 Paul Mundt
+ */
+#ifndef __TOOLS_LINUX_ASM_SH_BARRIER_H
+#define __TOOLS_LINUX_ASM_SH_BARRIER_H
+
+/*
+ * A brief note on ctrl_barrier(), the control register write barrier.
+ *
+ * Legacy SH cores typically require a sequence of 8 nops after
+ * modification of a control register in order for the changes to take
+ * effect. On newer cores (like the sh4a and sh5) this is accomplished
+ * with icbi.
+ *
+ * Also note that on sh4a in the icbi case we can forego a synco for the
+ * write barrier, as it's not necessary for control registers.
+ *
+ * Historically we have only done this type of barrier for the MMUCR, but
+ * it's also necessary for the CCR, so we make it generic here instead.
+ */
+#if defined(__SH4A__) || defined(__SH5__)
+#define mb() __asm__ __volatile__ ("synco": : :"memory")
+#define rmb() mb()
+#define wmb() mb()
+#endif
+
+#include <asm-generic/barrier.h>
+
+#endif /* __TOOLS_LINUX_ASM_SH_BARRIER_H */
diff --git a/tools/arch/sh/include/uapi/asm/mman.h b/tools/arch/sh/include/uapi/asm/mman.h
new file mode 100644
index 000000000..88c0e2930
--- /dev/null
+++ b/tools/arch/sh/include/uapi/asm/mman.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_SH_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_SH_UAPI_ASM_MMAN_FIX_H
+#include <uapi/asm-generic/mman.h>
+/* MAP_32BIT is undefined on sh, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/sparc/include/asm/barrier.h b/tools/arch/sparc/include/asm/barrier.h
new file mode 100644
index 000000000..95d161846
--- /dev/null
+++ b/tools/arch/sparc/include/asm/barrier.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ___TOOLS_LINUX_ASM_SPARC_BARRIER_H
+#define ___TOOLS_LINUX_ASM_SPARC_BARRIER_H
+#if defined(__sparc__) && defined(__arch64__)
+#include "barrier_64.h"
+#else
+#include "barrier_32.h"
+#endif
+#endif
diff --git a/tools/arch/sparc/include/asm/barrier_32.h b/tools/arch/sparc/include/asm/barrier_32.h
new file mode 100644
index 000000000..cc19ed1dd
--- /dev/null
+++ b/tools/arch/sparc/include/asm/barrier_32.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_PERF_SPARC_BARRIER_H
+#define __TOOLS_PERF_SPARC_BARRIER_H
+
+#include <asm-generic/barrier.h>
+
+#endif /* !(__TOOLS_PERF_SPARC_BARRIER_H) */
diff --git a/tools/arch/sparc/include/asm/barrier_64.h b/tools/arch/sparc/include/asm/barrier_64.h
new file mode 100644
index 000000000..ba6134428
--- /dev/null
+++ b/tools/arch/sparc/include/asm/barrier_64.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_LINUX_SPARC64_BARRIER_H
+#define __TOOLS_LINUX_SPARC64_BARRIER_H
+
+/* Copied from the kernel sources to tools/:
+ *
+ * These are here in an effort to more fully work around Spitfire Errata
+ * #51. Essentially, if a memory barrier occurs soon after a mispredicted
+ * branch, the chip can stop executing instructions until a trap occurs.
+ * Therefore, if interrupts are disabled, the chip can hang forever.
+ *
+ * It used to be believed that the memory barrier had to be right in the
+ * delay slot, but a case has been traced recently wherein the memory barrier
+ * was one instruction after the branch delay slot and the chip still hung.
+ * The offending sequence was the following in sym_wakeup_done() of the
+ * sym53c8xx_2 driver:
+ *
+ * call sym_ccb_from_dsa, 0
+ * movge %icc, 0, %l0
+ * brz,pn %o0, .LL1303
+ * mov %o0, %l2
+ * membar #LoadLoad
+ *
+ * The branch has to be mispredicted for the bug to occur. Therefore, we put
+ * the memory barrier explicitly into a "branch always, predicted taken"
+ * delay slot to avoid the problem case.
+ */
+#define membar_safe(type) \
+do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \
+ " membar " type "\n" \
+ "1:\n" \
+ : : : "memory"); \
+} while (0)
+
+/* The kernel always executes in TSO memory model these days,
+ * and furthermore most sparc64 chips implement more stringent
+ * memory ordering than required by the specifications.
+ */
+#define mb() membar_safe("#StoreLoad")
+#define rmb() __asm__ __volatile__("":::"memory")
+#define wmb() __asm__ __volatile__("":::"memory")
+
+#endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */
diff --git a/tools/arch/sparc/include/uapi/asm/bitsperlong.h b/tools/arch/sparc/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..3b4e61740
--- /dev/null
+++ b/tools/arch/sparc/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_ALPHA_BITSPERLONG_H
+#define __ASM_ALPHA_BITSPERLONG_H
+
+#if defined(__sparc__) && defined(__arch64__)
+#define __BITS_PER_LONG 64
+#else
+#define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_ALPHA_BITSPERLONG_H */
diff --git a/tools/arch/sparc/include/uapi/asm/errno.h b/tools/arch/sparc/include/uapi/asm/errno.h
new file mode 100644
index 000000000..81a732b90
--- /dev/null
+++ b/tools/arch/sparc/include/uapi/asm/errno.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _SPARC_ERRNO_H
+#define _SPARC_ERRNO_H
+
+/* These match the SunOS error numbering scheme. */
+
+#include <asm-generic/errno-base.h>
+
+#define EWOULDBLOCK EAGAIN /* Operation would block */
+#define EINPROGRESS 36 /* Operation now in progress */
+#define EALREADY 37 /* Operation already in progress */
+#define ENOTSOCK 38 /* Socket operation on non-socket */
+#define EDESTADDRREQ 39 /* Destination address required */
+#define EMSGSIZE 40 /* Message too long */
+#define EPROTOTYPE 41 /* Protocol wrong type for socket */
+#define ENOPROTOOPT 42 /* Protocol not available */
+#define EPROTONOSUPPORT 43 /* Protocol not supported */
+#define ESOCKTNOSUPPORT 44 /* Socket type not supported */
+#define EOPNOTSUPP 45 /* Op not supported on transport endpoint */
+#define EPFNOSUPPORT 46 /* Protocol family not supported */
+#define EAFNOSUPPORT 47 /* Address family not supported by protocol */
+#define EADDRINUSE 48 /* Address already in use */
+#define EADDRNOTAVAIL 49 /* Cannot assign requested address */
+#define ENETDOWN 50 /* Network is down */
+#define ENETUNREACH 51 /* Network is unreachable */
+#define ENETRESET 52 /* Net dropped connection because of reset */
+#define ECONNABORTED 53 /* Software caused connection abort */
+#define ECONNRESET 54 /* Connection reset by peer */
+#define ENOBUFS 55 /* No buffer space available */
+#define EISCONN 56 /* Transport endpoint is already connected */
+#define ENOTCONN 57 /* Transport endpoint is not connected */
+#define ESHUTDOWN 58 /* No send after transport endpoint shutdown */
+#define ETOOMANYREFS 59 /* Too many references: cannot splice */
+#define ETIMEDOUT 60 /* Connection timed out */
+#define ECONNREFUSED 61 /* Connection refused */
+#define ELOOP 62 /* Too many symbolic links encountered */
+#define ENAMETOOLONG 63 /* File name too long */
+#define EHOSTDOWN 64 /* Host is down */
+#define EHOSTUNREACH 65 /* No route to host */
+#define ENOTEMPTY 66 /* Directory not empty */
+#define EPROCLIM 67 /* SUNOS: Too many processes */
+#define EUSERS 68 /* Too many users */
+#define EDQUOT 69 /* Quota exceeded */
+#define ESTALE 70 /* Stale file handle */
+#define EREMOTE 71 /* Object is remote */
+#define ENOSTR 72 /* Device not a stream */
+#define ETIME 73 /* Timer expired */
+#define ENOSR 74 /* Out of streams resources */
+#define ENOMSG 75 /* No message of desired type */
+#define EBADMSG 76 /* Not a data message */
+#define EIDRM 77 /* Identifier removed */
+#define EDEADLK 78 /* Resource deadlock would occur */
+#define ENOLCK 79 /* No record locks available */
+#define ENONET 80 /* Machine is not on the network */
+#define ERREMOTE 81 /* SunOS: Too many lvls of remote in path */
+#define ENOLINK 82 /* Link has been severed */
+#define EADV 83 /* Advertise error */
+#define ESRMNT 84 /* Srmount error */
+#define ECOMM 85 /* Communication error on send */
+#define EPROTO 86 /* Protocol error */
+#define EMULTIHOP 87 /* Multihop attempted */
+#define EDOTDOT 88 /* RFS specific error */
+#define EREMCHG 89 /* Remote address changed */
+#define ENOSYS 90 /* Function not implemented */
+
+/* The rest have no SunOS equivalent. */
+#define ESTRPIPE 91 /* Streams pipe error */
+#define EOVERFLOW 92 /* Value too large for defined data type */
+#define EBADFD 93 /* File descriptor in bad state */
+#define ECHRNG 94 /* Channel number out of range */
+#define EL2NSYNC 95 /* Level 2 not synchronized */
+#define EL3HLT 96 /* Level 3 halted */
+#define EL3RST 97 /* Level 3 reset */
+#define ELNRNG 98 /* Link number out of range */
+#define EUNATCH 99 /* Protocol driver not attached */
+#define ENOCSI 100 /* No CSI structure available */
+#define EL2HLT 101 /* Level 2 halted */
+#define EBADE 102 /* Invalid exchange */
+#define EBADR 103 /* Invalid request descriptor */
+#define EXFULL 104 /* Exchange full */
+#define ENOANO 105 /* No anode */
+#define EBADRQC 106 /* Invalid request code */
+#define EBADSLT 107 /* Invalid slot */
+#define EDEADLOCK 108 /* File locking deadlock error */
+#define EBFONT 109 /* Bad font file format */
+#define ELIBEXEC 110 /* Cannot exec a shared library directly */
+#define ENODATA 111 /* No data available */
+#define ELIBBAD 112 /* Accessing a corrupted shared library */
+#define ENOPKG 113 /* Package not installed */
+#define ELIBACC 114 /* Can not access a needed shared library */
+#define ENOTUNIQ 115 /* Name not unique on network */
+#define ERESTART 116 /* Interrupted syscall should be restarted */
+#define EUCLEAN 117 /* Structure needs cleaning */
+#define ENOTNAM 118 /* Not a XENIX named type file */
+#define ENAVAIL 119 /* No XENIX semaphores available */
+#define EISNAM 120 /* Is a named type file */
+#define EREMOTEIO 121 /* Remote I/O error */
+#define EILSEQ 122 /* Illegal byte sequence */
+#define ELIBMAX 123 /* Atmpt to link in too many shared libs */
+#define ELIBSCN 124 /* .lib section in a.out corrupted */
+
+#define ENOMEDIUM 125 /* No medium found */
+#define EMEDIUMTYPE 126 /* Wrong medium type */
+#define ECANCELED 127 /* Operation Cancelled */
+#define ENOKEY 128 /* Required key not available */
+#define EKEYEXPIRED 129 /* Key has expired */
+#define EKEYREVOKED 130 /* Key has been revoked */
+#define EKEYREJECTED 131 /* Key was rejected by service */
+
+/* for robust mutexes */
+#define EOWNERDEAD 132 /* Owner died */
+#define ENOTRECOVERABLE 133 /* State not recoverable */
+
+#define ERFKILL 134 /* Operation not possible due to RF-kill */
+
+#define EHWPOISON 135 /* Memory page has hardware error */
+
+#endif
diff --git a/tools/arch/sparc/include/uapi/asm/mman.h b/tools/arch/sparc/include/uapi/asm/mman.h
new file mode 100644
index 000000000..38920eed8
--- /dev/null
+++ b/tools/arch/sparc/include/uapi/asm/mman.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_SPARC_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_SPARC_UAPI_ASM_MMAN_FIX_H
+#define MAP_DENYWRITE 0x0800
+#define MAP_EXECUTABLE 0x1000
+#define MAP_GROWSDOWN 0x0200
+#define MAP_HUGETLB 0x40000
+#define MAP_LOCKED 0x100
+#define MAP_NONBLOCK 0x10000
+#define MAP_NORESERVE 0x40
+#define MAP_POPULATE 0x8000
+#define MAP_STACK 0x20000
+#include <uapi/asm-generic/mman-common.h>
+/* MAP_32BIT is undefined on sparc, fix it for perf */
+#define MAP_32BIT 0
+#endif
diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h
new file mode 100644
index 000000000..1f5e26aae
--- /dev/null
+++ b/tools/arch/x86/include/asm/atomic.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_ASM_X86_ATOMIC_H
+#define _TOOLS_LINUX_ASM_X86_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include "rmwcc.h"
+
+#define LOCK_PREFIX "\n\tlock; "
+
+#include <asm/cmpxchg.h>
+
+/*
+ * Atomic operations that C can't guarantee us. Useful for
+ * resource counting etc..
+ */
+
+#define ATOMIC_INIT(i) { (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline int atomic_read(const atomic_t *v)
+{
+ return READ_ONCE((v)->counter);
+}
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static inline void atomic_set(atomic_t *v, int i)
+{
+ v->counter = i;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1.
+ */
+static inline void atomic_inc(atomic_t *v)
+{
+ asm volatile(LOCK_PREFIX "incl %0"
+ : "+m" (v->counter));
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic_dec_and_test(atomic_t *v)
+{
+ GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
+}
+
+static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
+{
+ return cmpxchg(&v->counter, old, new);
+}
+
+#endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
diff --git a/tools/arch/x86/include/asm/barrier.h b/tools/arch/x86/include/asm/barrier.h
new file mode 100644
index 000000000..8774dee27
--- /dev/null
+++ b/tools/arch/x86/include/asm/barrier.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_ASM_X86_BARRIER_H
+#define _TOOLS_LINUX_ASM_X86_BARRIER_H
+
+/*
+ * Copied from the Linux kernel sources, and also moving code
+ * out from tools/perf/perf-sys.h so as to make it be located
+ * in a place similar as in the kernel sources.
+ *
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+
+#if defined(__i386__)
+/*
+ * Some non-Intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#elif defined(__x86_64__)
+#define mb() asm volatile("mfence":::"memory")
+#define rmb() asm volatile("lfence":::"memory")
+#define wmb() asm volatile("sfence" ::: "memory")
+#endif
+
+#endif /* _TOOLS_LINUX_ASM_X86_BARRIER_H */
diff --git a/tools/arch/x86/include/asm/cmpxchg.h b/tools/arch/x86/include/asm/cmpxchg.h
new file mode 100644
index 000000000..0ed9ca276
--- /dev/null
+++ b/tools/arch/x86/include/asm/cmpxchg.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TOOLS_ASM_X86_CMPXCHG_H
+#define TOOLS_ASM_X86_CMPXCHG_H
+
+#include <linux/compiler.h>
+
+/*
+ * Non-existant functions to indicate usage errors at link time
+ * (or compile-time if the compiler implements __compiletime_error().
+ */
+extern void __cmpxchg_wrong_size(void)
+ __compiletime_error("Bad argument size for cmpxchg");
+
+/*
+ * Constants for operation sizes. On 32-bit, the 64-bit size it set to
+ * -1 because sizeof will never return -1, thereby making those switch
+ * case statements guaranteeed dead code which the compiler will
+ * eliminate, and allowing the "missing symbol in the default case" to
+ * indicate a usage error.
+ */
+#define __X86_CASE_B 1
+#define __X86_CASE_W 2
+#define __X86_CASE_L 4
+#ifdef __x86_64__
+#define __X86_CASE_Q 8
+#else
+#define __X86_CASE_Q -1 /* sizeof will never return -1 */
+#endif
+
+/*
+ * Atomic compare and exchange. Compare OLD with MEM, if identical,
+ * store NEW in MEM. Return the initial value in MEM. Success is
+ * indicated by comparing RETURN with OLD.
+ */
+#define __raw_cmpxchg(ptr, old, new, size, lock) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __typeof__(*(ptr)) __old = (old); \
+ __typeof__(*(ptr)) __new = (new); \
+ switch (size) { \
+ case __X86_CASE_B: \
+ { \
+ volatile u8 *__ptr = (volatile u8 *)(ptr); \
+ asm volatile(lock "cmpxchgb %2,%1" \
+ : "=a" (__ret), "+m" (*__ptr) \
+ : "q" (__new), "0" (__old) \
+ : "memory"); \
+ break; \
+ } \
+ case __X86_CASE_W: \
+ { \
+ volatile u16 *__ptr = (volatile u16 *)(ptr); \
+ asm volatile(lock "cmpxchgw %2,%1" \
+ : "=a" (__ret), "+m" (*__ptr) \
+ : "r" (__new), "0" (__old) \
+ : "memory"); \
+ break; \
+ } \
+ case __X86_CASE_L: \
+ { \
+ volatile u32 *__ptr = (volatile u32 *)(ptr); \
+ asm volatile(lock "cmpxchgl %2,%1" \
+ : "=a" (__ret), "+m" (*__ptr) \
+ : "r" (__new), "0" (__old) \
+ : "memory"); \
+ break; \
+ } \
+ case __X86_CASE_Q: \
+ { \
+ volatile u64 *__ptr = (volatile u64 *)(ptr); \
+ asm volatile(lock "cmpxchgq %2,%1" \
+ : "=a" (__ret), "+m" (*__ptr) \
+ : "r" (__new), "0" (__old) \
+ : "memory"); \
+ break; \
+ } \
+ default: \
+ __cmpxchg_wrong_size(); \
+ } \
+ __ret; \
+})
+
+#define __cmpxchg(ptr, old, new, size) \
+ __raw_cmpxchg((ptr), (old), (new), (size), LOCK_PREFIX)
+
+#define cmpxchg(ptr, old, new) \
+ __cmpxchg(ptr, old, new, sizeof(*(ptr)))
+
+
+#endif /* TOOLS_ASM_X86_CMPXCHG_H */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
new file mode 100644
index 000000000..beab4d4e4
--- /dev/null
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -0,0 +1,381 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CPUFEATURES_H
+#define _ASM_X86_CPUFEATURES_H
+
+#ifndef _ASM_X86_REQUIRED_FEATURES_H
+#include <asm/required-features.h>
+#endif
+
+#ifndef _ASM_X86_DISABLED_FEATURES_H
+#include <asm/disabled-features.h>
+#endif
+
+/*
+ * Defines x86 CPU feature bits
+ */
+#define NCAPINTS 19 /* N 32-bit words worth of info */
+#define NBUGINTS 1 /* N 32-bit bug flags */
+
+/*
+ * Note: If the comment begins with a quoted string, that string is used
+ * in /proc/cpuinfo instead of the macro name. If the string is "",
+ * this feature bit is not displayed in /proc/cpuinfo at all.
+ *
+ * When adding new features here that depend on other features,
+ * please update the table in kernel/cpu/cpuid-deps.c as well.
+ */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
+#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
+#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
+#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
+#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
+#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
+#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
+#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
+#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
+#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
+#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
+#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
+#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
+#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
+#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
+#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
+#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
+#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
+#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
+#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
+#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
+#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
+#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
+#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
+#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
+#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
+
+/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
+/* Don't duplicate feature flags which are redundant with Intel! */
+#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
+#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
+#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
+#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
+#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
+#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
+#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */
+#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
+
+/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
+#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
+#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
+#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
+
+/* Other features, Linux-defined mapping, word 3 */
+/* This range is used for feature bits which conflict or are synthesized */
+#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
+
+/* CPU types for specific tunings: */
+#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
+#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
+#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
+#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */
+#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
+#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
+#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
+#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
+#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
+#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
+#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
+#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
+#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
+#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
+#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
+#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
+#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
+#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
+#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
+#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
+#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
+#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */
+#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
+#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
+#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
+#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
+#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
+#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */
+#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
+#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */
+#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
+#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
+#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
+#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */
+#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
+#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */
+#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
+#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
+#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
+#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
+#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */
+#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
+
+/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
+#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
+#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
+#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
+#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
+#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
+#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
+#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
+
+/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
+#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
+#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */
+#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
+#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
+#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
+#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
+#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
+#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
+#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
+#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
+#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
+#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */
+#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
+#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */
+#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
+#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */
+#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */
+#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
+#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
+
+/*
+ * Auxiliary flags: Linux defined - For features scattered in various
+ * CPUID levels like 0x6, 0xA etc, word 7.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
+#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
+#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
+#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
+#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
+#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
+#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
+#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
+#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
+#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCEs for Spectre variant 2 */
+#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
+#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
+#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
+#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
+#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
+#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
+#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
+#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
+
+/* Virtualization flags: Linux defined, word 8 */
+#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
+#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
+#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
+#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
+
+#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
+#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
+#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
+#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
+#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
+#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
+#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
+#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
+#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
+#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
+#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
+#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
+#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
+#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */
+#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
+#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
+#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */
+#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+
+/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
+#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
+#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
+#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
+#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
+#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
+
+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
+#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
+
+/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
+#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
+#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
+#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
+#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
+#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
+
+/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
+#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
+#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
+#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
+#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
+#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
+#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
+
+/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
+#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
+#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
+#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
+#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
+#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
+#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
+#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
+#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
+#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */
+#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
+#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
+#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
+#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
+#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */
+#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
+#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
+#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
+#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
+
+/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
+#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
+#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
+#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
+#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
+
+/*
+ * BUG word(s)
+ */
+#define X86_BUG(x) (NCAPINTS*32 + (x))
+
+#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
+#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
+#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
+#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
+#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#ifdef CONFIG_X86_32
+/*
+ * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
+ * to avoid confusion.
+ */
+#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#endif
+#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
+#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
+
+#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
new file mode 100644
index 000000000..33833d190
--- /dev/null
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -0,0 +1,83 @@
+#ifndef _ASM_X86_DISABLED_FEATURES_H
+#define _ASM_X86_DISABLED_FEATURES_H
+
+/* These features, although they might be available in a CPU
+ * will not be used because the compile options to support
+ * them are not present.
+ *
+ * This code allows them to be checked and disabled at
+ * compile time without an explicit #ifdef. Use
+ * cpu_feature_enabled().
+ */
+
+#ifdef CONFIG_X86_INTEL_MPX
+# define DISABLE_MPX 0
+#else
+# define DISABLE_MPX (1<<(X86_FEATURE_MPX & 31))
+#endif
+
+#ifdef CONFIG_X86_INTEL_UMIP
+# define DISABLE_UMIP 0
+#else
+# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31))
+#endif
+
+#ifdef CONFIG_X86_64
+# define DISABLE_VME (1<<(X86_FEATURE_VME & 31))
+# define DISABLE_K6_MTRR (1<<(X86_FEATURE_K6_MTRR & 31))
+# define DISABLE_CYRIX_ARR (1<<(X86_FEATURE_CYRIX_ARR & 31))
+# define DISABLE_CENTAUR_MCR (1<<(X86_FEATURE_CENTAUR_MCR & 31))
+# define DISABLE_PCID 0
+#else
+# define DISABLE_VME 0
+# define DISABLE_K6_MTRR 0
+# define DISABLE_CYRIX_ARR 0
+# define DISABLE_CENTAUR_MCR 0
+# define DISABLE_PCID (1<<(X86_FEATURE_PCID & 31))
+#endif /* CONFIG_X86_64 */
+
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+# define DISABLE_PKU 0
+# define DISABLE_OSPKE 0
+#else
+# define DISABLE_PKU (1<<(X86_FEATURE_PKU & 31))
+# define DISABLE_OSPKE (1<<(X86_FEATURE_OSPKE & 31))
+#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
+
+#ifdef CONFIG_X86_5LEVEL
+# define DISABLE_LA57 0
+#else
+# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
+#endif
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+# define DISABLE_PTI 0
+#else
+# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
+#endif
+
+/*
+ * Make sure to add features to the correct mask
+ */
+#define DISABLED_MASK0 (DISABLE_VME)
+#define DISABLED_MASK1 0
+#define DISABLED_MASK2 0
+#define DISABLED_MASK3 (DISABLE_CYRIX_ARR|DISABLE_CENTAUR_MCR|DISABLE_K6_MTRR)
+#define DISABLED_MASK4 (DISABLE_PCID)
+#define DISABLED_MASK5 0
+#define DISABLED_MASK6 0
+#define DISABLED_MASK7 (DISABLE_PTI)
+#define DISABLED_MASK8 0
+#define DISABLED_MASK9 (DISABLE_MPX)
+#define DISABLED_MASK10 0
+#define DISABLED_MASK11 0
+#define DISABLED_MASK12 0
+#define DISABLED_MASK13 0
+#define DISABLED_MASK14 0
+#define DISABLED_MASK15 0
+#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
+#define DISABLED_MASK17 0
+#define DISABLED_MASK18 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+
+#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h
new file mode 100644
index 000000000..2ccd588fb
--- /dev/null
+++ b/tools/arch/x86/include/asm/mcsafe_test.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MCSAFE_TEST_H_
+#define _MCSAFE_TEST_H_
+
+.macro MCSAFE_TEST_CTL
+.endm
+
+.macro MCSAFE_TEST_SRC reg count target
+.endm
+
+.macro MCSAFE_TEST_DST reg count target
+.endm
+#endif /* _MCSAFE_TEST_H_ */
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
new file mode 100644
index 000000000..6847d8540
--- /dev/null
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -0,0 +1,106 @@
+#ifndef _ASM_X86_REQUIRED_FEATURES_H
+#define _ASM_X86_REQUIRED_FEATURES_H
+
+/* Define minimum CPUID feature set for kernel These bits are checked
+ really early to actually display a visible error message before the
+ kernel dies. Make sure to assign features to the proper mask!
+
+ Some requirements that are not in CPUID yet are also in the
+ CONFIG_X86_MINIMUM_CPU_FAMILY which is checked too.
+
+ The real information is in arch/x86/Kconfig.cpu, this just converts
+ the CONFIGs into a bitmask */
+
+#ifndef CONFIG_MATH_EMULATION
+# define NEED_FPU (1<<(X86_FEATURE_FPU & 31))
+#else
+# define NEED_FPU 0
+#endif
+
+#if defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
+# define NEED_PAE (1<<(X86_FEATURE_PAE & 31))
+#else
+# define NEED_PAE 0
+#endif
+
+#ifdef CONFIG_X86_CMPXCHG64
+# define NEED_CX8 (1<<(X86_FEATURE_CX8 & 31))
+#else
+# define NEED_CX8 0
+#endif
+
+#if defined(CONFIG_X86_CMOV) || defined(CONFIG_X86_64)
+# define NEED_CMOV (1<<(X86_FEATURE_CMOV & 31))
+#else
+# define NEED_CMOV 0
+#endif
+
+#ifdef CONFIG_X86_USE_3DNOW
+# define NEED_3DNOW (1<<(X86_FEATURE_3DNOW & 31))
+#else
+# define NEED_3DNOW 0
+#endif
+
+#if defined(CONFIG_X86_P6_NOP) || defined(CONFIG_X86_64)
+# define NEED_NOPL (1<<(X86_FEATURE_NOPL & 31))
+#else
+# define NEED_NOPL 0
+#endif
+
+#ifdef CONFIG_MATOM
+# define NEED_MOVBE (1<<(X86_FEATURE_MOVBE & 31))
+#else
+# define NEED_MOVBE 0
+#endif
+
+#ifdef CONFIG_X86_64
+#ifdef CONFIG_PARAVIRT
+/* Paravirtualized systems may not have PSE or PGE available */
+#define NEED_PSE 0
+#define NEED_PGE 0
+#else
+#define NEED_PSE (1<<(X86_FEATURE_PSE) & 31)
+#define NEED_PGE (1<<(X86_FEATURE_PGE) & 31)
+#endif
+#define NEED_MSR (1<<(X86_FEATURE_MSR & 31))
+#define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31))
+#define NEED_XMM (1<<(X86_FEATURE_XMM & 31))
+#define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31))
+#define NEED_LM (1<<(X86_FEATURE_LM & 31))
+#else
+#define NEED_PSE 0
+#define NEED_MSR 0
+#define NEED_PGE 0
+#define NEED_FXSR 0
+#define NEED_XMM 0
+#define NEED_XMM2 0
+#define NEED_LM 0
+#endif
+
+#define REQUIRED_MASK0 (NEED_FPU|NEED_PSE|NEED_MSR|NEED_PAE|\
+ NEED_CX8|NEED_PGE|NEED_FXSR|NEED_CMOV|\
+ NEED_XMM|NEED_XMM2)
+#define SSE_MASK (NEED_XMM|NEED_XMM2)
+
+#define REQUIRED_MASK1 (NEED_LM|NEED_3DNOW)
+
+#define REQUIRED_MASK2 0
+#define REQUIRED_MASK3 (NEED_NOPL)
+#define REQUIRED_MASK4 (NEED_MOVBE)
+#define REQUIRED_MASK5 0
+#define REQUIRED_MASK6 0
+#define REQUIRED_MASK7 0
+#define REQUIRED_MASK8 0
+#define REQUIRED_MASK9 0
+#define REQUIRED_MASK10 0
+#define REQUIRED_MASK11 0
+#define REQUIRED_MASK12 0
+#define REQUIRED_MASK13 0
+#define REQUIRED_MASK14 0
+#define REQUIRED_MASK15 0
+#define REQUIRED_MASK16 0
+#define REQUIRED_MASK17 0
+#define REQUIRED_MASK18 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
+
+#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h
new file mode 100644
index 000000000..fee7983a9
--- /dev/null
+++ b/tools/arch/x86/include/asm/rmwcc.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_ASM_X86_RMWcc
+#define _TOOLS_LINUX_ASM_X86_RMWcc
+
+#ifdef CONFIG_CC_HAS_ASM_GOTO
+
+#define __GEN_RMWcc(fullop, var, cc, ...) \
+do { \
+ asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \
+ : : "m" (var), ## __VA_ARGS__ \
+ : "memory" : cc_label); \
+ return 0; \
+cc_label: \
+ return 1; \
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
+ __GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
+ __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
+
+#else /* !CONFIG_CC_HAS_ASM_GOTO */
+
+#define __GEN_RMWcc(fullop, var, cc, ...) \
+do { \
+ char c; \
+ asm volatile (fullop "; set" cc " %1" \
+ : "+m" (var), "=qm" (c) \
+ : __VA_ARGS__ : "memory"); \
+ return c != 0; \
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc) \
+ __GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \
+ __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
+
+#endif /* CONFIG_CC_HAS_ASM_GOTO */
+
+#endif /* _TOOLS_LINUX_ASM_X86_RMWcc */
diff --git a/tools/arch/x86/include/asm/unistd_32.h b/tools/arch/x86/include/asm/unistd_32.h
new file mode 100644
index 000000000..60a89dba0
--- /dev/null
+++ b/tools/arch/x86/include/asm/unistd_32.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NR_perf_event_open
+# define __NR_perf_event_open 336
+#endif
+#ifndef __NR_futex
+# define __NR_futex 240
+#endif
+#ifndef __NR_gettid
+# define __NR_gettid 224
+#endif
+#ifndef __NR_getcpu
+# define __NR_getcpu 318
+#endif
+#ifndef __NR_setns
+# define __NR_setns 346
+#endif
diff --git a/tools/arch/x86/include/asm/unistd_64.h b/tools/arch/x86/include/asm/unistd_64.h
new file mode 100644
index 000000000..cb52a3a8b
--- /dev/null
+++ b/tools/arch/x86/include/asm/unistd_64.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NR_perf_event_open
+# define __NR_perf_event_open 298
+#endif
+#ifndef __NR_futex
+# define __NR_futex 202
+#endif
+#ifndef __NR_gettid
+# define __NR_gettid 186
+#endif
+#ifndef __NR_getcpu
+# define __NR_getcpu 309
+#endif
+#ifndef __NR_setns
+#define __NR_setns 308
+#endif
diff --git a/tools/arch/x86/include/uapi/asm/bitsperlong.h b/tools/arch/x86/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..f8a92e000
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_X86_BITSPERLONG_H
+#define __ASM_X86_BITSPERLONG_H
+
+#if defined(__x86_64__) && !defined(__ILP32__)
+# define __BITS_PER_LONG 64
+#else
+# define __BITS_PER_LONG 32
+#endif
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_X86_BITSPERLONG_H */
diff --git a/tools/arch/x86/include/uapi/asm/errno.h b/tools/arch/x86/include/uapi/asm/errno.h
new file mode 100644
index 000000000..4c82b503d
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/errno.h
@@ -0,0 +1 @@
+#include <asm-generic/errno.h>
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
new file mode 100644
index 000000000..fd23d5778
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -0,0 +1,419 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_X86_KVM_H
+#define _ASM_X86_KVM_H
+
+/*
+ * KVM x86 specific structures and definitions
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define KVM_PIO_PAGE_OFFSET 1
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+
+#define DE_VECTOR 0
+#define DB_VECTOR 1
+#define BP_VECTOR 3
+#define OF_VECTOR 4
+#define BR_VECTOR 5
+#define UD_VECTOR 6
+#define NM_VECTOR 7
+#define DF_VECTOR 8
+#define TS_VECTOR 10
+#define NP_VECTOR 11
+#define SS_VECTOR 12
+#define GP_VECTOR 13
+#define PF_VECTOR 14
+#define MF_VECTOR 16
+#define AC_VECTOR 17
+#define MC_VECTOR 18
+#define XM_VECTOR 19
+#define VE_VECTOR 20
+
+/* Select x86 specific features in <linux/kvm.h> */
+#define __KVM_HAVE_PIT
+#define __KVM_HAVE_IOAPIC
+#define __KVM_HAVE_IRQ_LINE
+#define __KVM_HAVE_MSI
+#define __KVM_HAVE_USER_NMI
+#define __KVM_HAVE_GUEST_DEBUG
+#define __KVM_HAVE_MSIX
+#define __KVM_HAVE_MCE
+#define __KVM_HAVE_PIT_STATE2
+#define __KVM_HAVE_XEN_HVM
+#define __KVM_HAVE_VCPU_EVENTS
+#define __KVM_HAVE_DEBUGREGS
+#define __KVM_HAVE_XSAVE
+#define __KVM_HAVE_XCRS
+#define __KVM_HAVE_READONLY_MEM
+
+/* Architectural interrupt line count. */
+#define KVM_NR_INTERRUPTS 256
+
+struct kvm_memory_alias {
+ __u32 slot; /* this has a different namespace than memory slots */
+ __u32 flags;
+ __u64 guest_phys_addr;
+ __u64 memory_size;
+ __u64 target_phys_addr;
+};
+
+/* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
+struct kvm_pic_state {
+ __u8 last_irr; /* edge detection */
+ __u8 irr; /* interrupt request register */
+ __u8 imr; /* interrupt mask register */
+ __u8 isr; /* interrupt service register */
+ __u8 priority_add; /* highest irq priority */
+ __u8 irq_base;
+ __u8 read_reg_select;
+ __u8 poll;
+ __u8 special_mask;
+ __u8 init_state;
+ __u8 auto_eoi;
+ __u8 rotate_on_auto_eoi;
+ __u8 special_fully_nested_mode;
+ __u8 init4; /* true if 4 byte init */
+ __u8 elcr; /* PIIX edge/trigger selection */
+ __u8 elcr_mask;
+};
+
+#define KVM_IOAPIC_NUM_PINS 24
+struct kvm_ioapic_state {
+ __u64 base_address;
+ __u32 ioregsel;
+ __u32 id;
+ __u32 irr;
+ __u32 pad;
+ union {
+ __u64 bits;
+ struct {
+ __u8 vector;
+ __u8 delivery_mode:3;
+ __u8 dest_mode:1;
+ __u8 delivery_status:1;
+ __u8 polarity:1;
+ __u8 remote_irr:1;
+ __u8 trig_mode:1;
+ __u8 mask:1;
+ __u8 reserve:7;
+ __u8 reserved[4];
+ __u8 dest_id;
+ } fields;
+ } redirtbl[KVM_IOAPIC_NUM_PINS];
+};
+
+#define KVM_IRQCHIP_PIC_MASTER 0
+#define KVM_IRQCHIP_PIC_SLAVE 1
+#define KVM_IRQCHIP_IOAPIC 2
+#define KVM_NR_IRQCHIPS 3
+
+#define KVM_RUN_X86_SMM (1 << 0)
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+ /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
+ __u64 rax, rbx, rcx, rdx;
+ __u64 rsi, rdi, rsp, rbp;
+ __u64 r8, r9, r10, r11;
+ __u64 r12, r13, r14, r15;
+ __u64 rip, rflags;
+};
+
+/* for KVM_GET_LAPIC and KVM_SET_LAPIC */
+#define KVM_APIC_REG_SIZE 0x400
+struct kvm_lapic_state {
+ char regs[KVM_APIC_REG_SIZE];
+};
+
+struct kvm_segment {
+ __u64 base;
+ __u32 limit;
+ __u16 selector;
+ __u8 type;
+ __u8 present, dpl, db, s, l, g, avl;
+ __u8 unusable;
+ __u8 padding;
+};
+
+struct kvm_dtable {
+ __u64 base;
+ __u16 limit;
+ __u16 padding[3];
+};
+
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+ /* out (KVM_GET_SREGS) / in (KVM_SET_SREGS) */
+ struct kvm_segment cs, ds, es, fs, gs, ss;
+ struct kvm_segment tr, ldt;
+ struct kvm_dtable gdt, idt;
+ __u64 cr0, cr2, cr3, cr4, cr8;
+ __u64 efer;
+ __u64 apic_base;
+ __u64 interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+ __u8 fpr[8][16];
+ __u16 fcw;
+ __u16 fsw;
+ __u8 ftwx; /* in fxsave format */
+ __u8 pad1;
+ __u16 last_opcode;
+ __u64 last_ip;
+ __u64 last_dp;
+ __u8 xmm[16][16];
+ __u32 mxcsr;
+ __u32 pad2;
+};
+
+struct kvm_msr_entry {
+ __u32 index;
+ __u32 reserved;
+ __u64 data;
+};
+
+/* for KVM_GET_MSRS and KVM_SET_MSRS */
+struct kvm_msrs {
+ __u32 nmsrs; /* number of msrs in entries */
+ __u32 pad;
+
+ struct kvm_msr_entry entries[0];
+};
+
+/* for KVM_GET_MSR_INDEX_LIST */
+struct kvm_msr_list {
+ __u32 nmsrs; /* number of msrs in entries */
+ __u32 indices[0];
+};
+
+
+struct kvm_cpuid_entry {
+ __u32 function;
+ __u32 eax;
+ __u32 ebx;
+ __u32 ecx;
+ __u32 edx;
+ __u32 padding;
+};
+
+/* for KVM_SET_CPUID */
+struct kvm_cpuid {
+ __u32 nent;
+ __u32 padding;
+ struct kvm_cpuid_entry entries[0];
+};
+
+struct kvm_cpuid_entry2 {
+ __u32 function;
+ __u32 index;
+ __u32 flags;
+ __u32 eax;
+ __u32 ebx;
+ __u32 ecx;
+ __u32 edx;
+ __u32 padding[3];
+};
+
+#define KVM_CPUID_FLAG_SIGNIFCANT_INDEX (1 << 0)
+#define KVM_CPUID_FLAG_STATEFUL_FUNC (1 << 1)
+#define KVM_CPUID_FLAG_STATE_READ_NEXT (1 << 2)
+
+/* for KVM_SET_CPUID2 */
+struct kvm_cpuid2 {
+ __u32 nent;
+ __u32 padding;
+ struct kvm_cpuid_entry2 entries[0];
+};
+
+/* for KVM_GET_PIT and KVM_SET_PIT */
+struct kvm_pit_channel_state {
+ __u32 count; /* can be 65536 */
+ __u16 latched_count;
+ __u8 count_latched;
+ __u8 status_latched;
+ __u8 status;
+ __u8 read_state;
+ __u8 write_state;
+ __u8 write_latch;
+ __u8 rw_mode;
+ __u8 mode;
+ __u8 bcd;
+ __u8 gate;
+ __s64 count_load_time;
+};
+
+struct kvm_debug_exit_arch {
+ __u32 exception;
+ __u32 pad;
+ __u64 pc;
+ __u64 dr6;
+ __u64 dr7;
+};
+
+#define KVM_GUESTDBG_USE_SW_BP 0x00010000
+#define KVM_GUESTDBG_USE_HW_BP 0x00020000
+#define KVM_GUESTDBG_INJECT_DB 0x00040000
+#define KVM_GUESTDBG_INJECT_BP 0x00080000
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+ __u64 debugreg[8];
+};
+
+struct kvm_pit_state {
+ struct kvm_pit_channel_state channels[3];
+};
+
+#define KVM_PIT_FLAGS_HPET_LEGACY 0x00000001
+
+struct kvm_pit_state2 {
+ struct kvm_pit_channel_state channels[3];
+ __u32 flags;
+ __u32 reserved[9];
+};
+
+struct kvm_reinject_control {
+ __u8 pit_reinject;
+ __u8 reserved[31];
+};
+
+/* When set in flags, include corresponding fields on KVM_SET_VCPU_EVENTS */
+#define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001
+#define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002
+#define KVM_VCPUEVENT_VALID_SHADOW 0x00000004
+#define KVM_VCPUEVENT_VALID_SMM 0x00000008
+
+/* Interrupt shadow states */
+#define KVM_X86_SHADOW_INT_MOV_SS 0x01
+#define KVM_X86_SHADOW_INT_STI 0x02
+
+/* for KVM_GET/SET_VCPU_EVENTS */
+struct kvm_vcpu_events {
+ struct {
+ __u8 injected;
+ __u8 nr;
+ __u8 has_error_code;
+ __u8 pad;
+ __u32 error_code;
+ } exception;
+ struct {
+ __u8 injected;
+ __u8 nr;
+ __u8 soft;
+ __u8 shadow;
+ } interrupt;
+ struct {
+ __u8 injected;
+ __u8 pending;
+ __u8 masked;
+ __u8 pad;
+ } nmi;
+ __u32 sipi_vector;
+ __u32 flags;
+ struct {
+ __u8 smm;
+ __u8 pending;
+ __u8 smm_inside_nmi;
+ __u8 latched_init;
+ } smi;
+ __u32 reserved[9];
+};
+
+/* for KVM_GET/SET_DEBUGREGS */
+struct kvm_debugregs {
+ __u64 db[4];
+ __u64 dr6;
+ __u64 dr7;
+ __u64 flags;
+ __u64 reserved[9];
+};
+
+/* for KVM_CAP_XSAVE */
+struct kvm_xsave {
+ __u32 region[1024];
+};
+
+#define KVM_MAX_XCRS 16
+
+struct kvm_xcr {
+ __u32 xcr;
+ __u32 reserved;
+ __u64 value;
+};
+
+struct kvm_xcrs {
+ __u32 nr_xcrs;
+ __u32 flags;
+ struct kvm_xcr xcrs[KVM_MAX_XCRS];
+ __u64 padding[16];
+};
+
+#define KVM_SYNC_X86_REGS (1UL << 0)
+#define KVM_SYNC_X86_SREGS (1UL << 1)
+#define KVM_SYNC_X86_EVENTS (1UL << 2)
+
+#define KVM_SYNC_X86_VALID_FIELDS \
+ (KVM_SYNC_X86_REGS| \
+ KVM_SYNC_X86_SREGS| \
+ KVM_SYNC_X86_EVENTS)
+
+/* kvm_sync_regs struct included by kvm_run struct */
+struct kvm_sync_regs {
+ /* Members of this structure are potentially malicious.
+ * Care must be taken by code reading, esp. interpreting,
+ * data fields from them inside KVM to prevent TOCTOU and
+ * double-fetch types of vulnerabilities.
+ */
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+ struct kvm_vcpu_events events;
+};
+
+#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
+#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
+
+#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
+#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
+
+#define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001
+#define KVM_STATE_NESTED_SMM_VMXON 0x00000002
+
+struct kvm_vmx_nested_state {
+ __u64 vmxon_pa;
+ __u64 vmcs_pa;
+
+ struct {
+ __u16 flags;
+ } smm;
+};
+
+/* for KVM_CAP_NESTED_STATE */
+struct kvm_nested_state {
+ /* KVM_STATE_* flags */
+ __u16 flags;
+
+ /* 0 for VMX, 1 for SVM. */
+ __u16 format;
+
+ /* 128 for SVM, 128 + VMCS size for VMX. */
+ __u32 size;
+
+ union {
+ /* VMXON, VMCS */
+ struct kvm_vmx_nested_state vmx;
+
+ /* Pad the header to 128 bytes. */
+ __u8 pad[120];
+ };
+
+ __u8 data[0];
+};
+
+#endif /* _ASM_X86_KVM_H */
diff --git a/tools/arch/x86/include/uapi/asm/kvm_perf.h b/tools/arch/x86/include/uapi/asm/kvm_perf.h
new file mode 100644
index 000000000..125cf5cdf
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/kvm_perf.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_X86_KVM_PERF_H
+#define _ASM_X86_KVM_PERF_H
+
+#include <asm/svm.h>
+#include <asm/vmx.h>
+#include <asm/kvm.h>
+
+#define DECODE_STR_LEN 20
+
+#define VCPU_ID "vcpu_id"
+
+#define KVM_ENTRY_TRACE "kvm:kvm_entry"
+#define KVM_EXIT_TRACE "kvm:kvm_exit"
+#define KVM_EXIT_REASON "exit_reason"
+
+#endif /* _ASM_X86_KVM_PERF_H */
diff --git a/tools/arch/x86/include/uapi/asm/mman.h b/tools/arch/x86/include/uapi/asm/mman.h
new file mode 100644
index 000000000..8449778de
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/mman.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_X86_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_X86_UAPI_ASM_MMAN_FIX_H
+#define MAP_32BIT 0x40
+#include <uapi/asm-generic/mman.h>
+#endif
diff --git a/tools/arch/x86/include/uapi/asm/perf_regs.h b/tools/arch/x86/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000..f3329cabc
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/perf_regs.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_X86_PERF_REGS_H
+#define _ASM_X86_PERF_REGS_H
+
+enum perf_event_x86_regs {
+ PERF_REG_X86_AX,
+ PERF_REG_X86_BX,
+ PERF_REG_X86_CX,
+ PERF_REG_X86_DX,
+ PERF_REG_X86_SI,
+ PERF_REG_X86_DI,
+ PERF_REG_X86_BP,
+ PERF_REG_X86_SP,
+ PERF_REG_X86_IP,
+ PERF_REG_X86_FLAGS,
+ PERF_REG_X86_CS,
+ PERF_REG_X86_SS,
+ PERF_REG_X86_DS,
+ PERF_REG_X86_ES,
+ PERF_REG_X86_FS,
+ PERF_REG_X86_GS,
+ PERF_REG_X86_R8,
+ PERF_REG_X86_R9,
+ PERF_REG_X86_R10,
+ PERF_REG_X86_R11,
+ PERF_REG_X86_R12,
+ PERF_REG_X86_R13,
+ PERF_REG_X86_R14,
+ PERF_REG_X86_R15,
+
+ PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
+ PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+};
+#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
new file mode 100644
index 000000000..a9731f8a4
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -0,0 +1,179 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__SVM_H
+#define _UAPI__SVM_H
+
+#define SVM_EXIT_READ_CR0 0x000
+#define SVM_EXIT_READ_CR2 0x002
+#define SVM_EXIT_READ_CR3 0x003
+#define SVM_EXIT_READ_CR4 0x004
+#define SVM_EXIT_READ_CR8 0x008
+#define SVM_EXIT_WRITE_CR0 0x010
+#define SVM_EXIT_WRITE_CR2 0x012
+#define SVM_EXIT_WRITE_CR3 0x013
+#define SVM_EXIT_WRITE_CR4 0x014
+#define SVM_EXIT_WRITE_CR8 0x018
+#define SVM_EXIT_READ_DR0 0x020
+#define SVM_EXIT_READ_DR1 0x021
+#define SVM_EXIT_READ_DR2 0x022
+#define SVM_EXIT_READ_DR3 0x023
+#define SVM_EXIT_READ_DR4 0x024
+#define SVM_EXIT_READ_DR5 0x025
+#define SVM_EXIT_READ_DR6 0x026
+#define SVM_EXIT_READ_DR7 0x027
+#define SVM_EXIT_WRITE_DR0 0x030
+#define SVM_EXIT_WRITE_DR1 0x031
+#define SVM_EXIT_WRITE_DR2 0x032
+#define SVM_EXIT_WRITE_DR3 0x033
+#define SVM_EXIT_WRITE_DR4 0x034
+#define SVM_EXIT_WRITE_DR5 0x035
+#define SVM_EXIT_WRITE_DR6 0x036
+#define SVM_EXIT_WRITE_DR7 0x037
+#define SVM_EXIT_EXCP_BASE 0x040
+#define SVM_EXIT_INTR 0x060
+#define SVM_EXIT_NMI 0x061
+#define SVM_EXIT_SMI 0x062
+#define SVM_EXIT_INIT 0x063
+#define SVM_EXIT_VINTR 0x064
+#define SVM_EXIT_CR0_SEL_WRITE 0x065
+#define SVM_EXIT_IDTR_READ 0x066
+#define SVM_EXIT_GDTR_READ 0x067
+#define SVM_EXIT_LDTR_READ 0x068
+#define SVM_EXIT_TR_READ 0x069
+#define SVM_EXIT_IDTR_WRITE 0x06a
+#define SVM_EXIT_GDTR_WRITE 0x06b
+#define SVM_EXIT_LDTR_WRITE 0x06c
+#define SVM_EXIT_TR_WRITE 0x06d
+#define SVM_EXIT_RDTSC 0x06e
+#define SVM_EXIT_RDPMC 0x06f
+#define SVM_EXIT_PUSHF 0x070
+#define SVM_EXIT_POPF 0x071
+#define SVM_EXIT_CPUID 0x072
+#define SVM_EXIT_RSM 0x073
+#define SVM_EXIT_IRET 0x074
+#define SVM_EXIT_SWINT 0x075
+#define SVM_EXIT_INVD 0x076
+#define SVM_EXIT_PAUSE 0x077
+#define SVM_EXIT_HLT 0x078
+#define SVM_EXIT_INVLPG 0x079
+#define SVM_EXIT_INVLPGA 0x07a
+#define SVM_EXIT_IOIO 0x07b
+#define SVM_EXIT_MSR 0x07c
+#define SVM_EXIT_TASK_SWITCH 0x07d
+#define SVM_EXIT_FERR_FREEZE 0x07e
+#define SVM_EXIT_SHUTDOWN 0x07f
+#define SVM_EXIT_VMRUN 0x080
+#define SVM_EXIT_VMMCALL 0x081
+#define SVM_EXIT_VMLOAD 0x082
+#define SVM_EXIT_VMSAVE 0x083
+#define SVM_EXIT_STGI 0x084
+#define SVM_EXIT_CLGI 0x085
+#define SVM_EXIT_SKINIT 0x086
+#define SVM_EXIT_RDTSCP 0x087
+#define SVM_EXIT_ICEBP 0x088
+#define SVM_EXIT_WBINVD 0x089
+#define SVM_EXIT_MONITOR 0x08a
+#define SVM_EXIT_MWAIT 0x08b
+#define SVM_EXIT_MWAIT_COND 0x08c
+#define SVM_EXIT_XSETBV 0x08d
+#define SVM_EXIT_NPF 0x400
+#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
+#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
+
+#define SVM_EXIT_ERR -1
+
+#define SVM_EXIT_REASONS \
+ { SVM_EXIT_READ_CR0, "read_cr0" }, \
+ { SVM_EXIT_READ_CR2, "read_cr2" }, \
+ { SVM_EXIT_READ_CR3, "read_cr3" }, \
+ { SVM_EXIT_READ_CR4, "read_cr4" }, \
+ { SVM_EXIT_READ_CR8, "read_cr8" }, \
+ { SVM_EXIT_WRITE_CR0, "write_cr0" }, \
+ { SVM_EXIT_WRITE_CR2, "write_cr2" }, \
+ { SVM_EXIT_WRITE_CR3, "write_cr3" }, \
+ { SVM_EXIT_WRITE_CR4, "write_cr4" }, \
+ { SVM_EXIT_WRITE_CR8, "write_cr8" }, \
+ { SVM_EXIT_READ_DR0, "read_dr0" }, \
+ { SVM_EXIT_READ_DR1, "read_dr1" }, \
+ { SVM_EXIT_READ_DR2, "read_dr2" }, \
+ { SVM_EXIT_READ_DR3, "read_dr3" }, \
+ { SVM_EXIT_READ_DR4, "read_dr4" }, \
+ { SVM_EXIT_READ_DR5, "read_dr5" }, \
+ { SVM_EXIT_READ_DR6, "read_dr6" }, \
+ { SVM_EXIT_READ_DR7, "read_dr7" }, \
+ { SVM_EXIT_WRITE_DR0, "write_dr0" }, \
+ { SVM_EXIT_WRITE_DR1, "write_dr1" }, \
+ { SVM_EXIT_WRITE_DR2, "write_dr2" }, \
+ { SVM_EXIT_WRITE_DR3, "write_dr3" }, \
+ { SVM_EXIT_WRITE_DR4, "write_dr4" }, \
+ { SVM_EXIT_WRITE_DR5, "write_dr5" }, \
+ { SVM_EXIT_WRITE_DR6, "write_dr6" }, \
+ { SVM_EXIT_WRITE_DR7, "write_dr7" }, \
+ { SVM_EXIT_EXCP_BASE + DE_VECTOR, "DE excp" }, \
+ { SVM_EXIT_EXCP_BASE + DB_VECTOR, "DB excp" }, \
+ { SVM_EXIT_EXCP_BASE + BP_VECTOR, "BP excp" }, \
+ { SVM_EXIT_EXCP_BASE + OF_VECTOR, "OF excp" }, \
+ { SVM_EXIT_EXCP_BASE + BR_VECTOR, "BR excp" }, \
+ { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \
+ { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \
+ { SVM_EXIT_EXCP_BASE + DF_VECTOR, "DF excp" }, \
+ { SVM_EXIT_EXCP_BASE + TS_VECTOR, "TS excp" }, \
+ { SVM_EXIT_EXCP_BASE + NP_VECTOR, "NP excp" }, \
+ { SVM_EXIT_EXCP_BASE + SS_VECTOR, "SS excp" }, \
+ { SVM_EXIT_EXCP_BASE + GP_VECTOR, "GP excp" }, \
+ { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \
+ { SVM_EXIT_EXCP_BASE + MF_VECTOR, "MF excp" }, \
+ { SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \
+ { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \
+ { SVM_EXIT_EXCP_BASE + XM_VECTOR, "XF excp" }, \
+ { SVM_EXIT_INTR, "interrupt" }, \
+ { SVM_EXIT_NMI, "nmi" }, \
+ { SVM_EXIT_SMI, "smi" }, \
+ { SVM_EXIT_INIT, "init" }, \
+ { SVM_EXIT_VINTR, "vintr" }, \
+ { SVM_EXIT_CR0_SEL_WRITE, "cr0_sel_write" }, \
+ { SVM_EXIT_IDTR_READ, "read_idtr" }, \
+ { SVM_EXIT_GDTR_READ, "read_gdtr" }, \
+ { SVM_EXIT_LDTR_READ, "read_ldtr" }, \
+ { SVM_EXIT_TR_READ, "read_rt" }, \
+ { SVM_EXIT_IDTR_WRITE, "write_idtr" }, \
+ { SVM_EXIT_GDTR_WRITE, "write_gdtr" }, \
+ { SVM_EXIT_LDTR_WRITE, "write_ldtr" }, \
+ { SVM_EXIT_TR_WRITE, "write_rt" }, \
+ { SVM_EXIT_RDTSC, "rdtsc" }, \
+ { SVM_EXIT_RDPMC, "rdpmc" }, \
+ { SVM_EXIT_PUSHF, "pushf" }, \
+ { SVM_EXIT_POPF, "popf" }, \
+ { SVM_EXIT_CPUID, "cpuid" }, \
+ { SVM_EXIT_RSM, "rsm" }, \
+ { SVM_EXIT_IRET, "iret" }, \
+ { SVM_EXIT_SWINT, "swint" }, \
+ { SVM_EXIT_INVD, "invd" }, \
+ { SVM_EXIT_PAUSE, "pause" }, \
+ { SVM_EXIT_HLT, "hlt" }, \
+ { SVM_EXIT_INVLPG, "invlpg" }, \
+ { SVM_EXIT_INVLPGA, "invlpga" }, \
+ { SVM_EXIT_IOIO, "io" }, \
+ { SVM_EXIT_MSR, "msr" }, \
+ { SVM_EXIT_TASK_SWITCH, "task_switch" }, \
+ { SVM_EXIT_FERR_FREEZE, "ferr_freeze" }, \
+ { SVM_EXIT_SHUTDOWN, "shutdown" }, \
+ { SVM_EXIT_VMRUN, "vmrun" }, \
+ { SVM_EXIT_VMMCALL, "hypercall" }, \
+ { SVM_EXIT_VMLOAD, "vmload" }, \
+ { SVM_EXIT_VMSAVE, "vmsave" }, \
+ { SVM_EXIT_STGI, "stgi" }, \
+ { SVM_EXIT_CLGI, "clgi" }, \
+ { SVM_EXIT_SKINIT, "skinit" }, \
+ { SVM_EXIT_RDTSCP, "rdtscp" }, \
+ { SVM_EXIT_ICEBP, "icebp" }, \
+ { SVM_EXIT_WBINVD, "wbinvd" }, \
+ { SVM_EXIT_MONITOR, "monitor" }, \
+ { SVM_EXIT_MWAIT, "mwait" }, \
+ { SVM_EXIT_XSETBV, "xsetbv" }, \
+ { SVM_EXIT_NPF, "npf" }, \
+ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
+ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \
+ { SVM_EXIT_ERR, "invalid_guest_state" }
+
+
+#endif /* _UAPI__SVM_H */
diff --git a/tools/arch/x86/include/uapi/asm/unistd.h b/tools/arch/x86/include/uapi/asm/unistd.h
new file mode 100644
index 000000000..30d7d04d7
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/unistd.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_ASM_X86_UNISTD_H
+#define _UAPI_ASM_X86_UNISTD_H
+
+/* x32 syscall flag bit */
+#define __X32_SYSCALL_BIT 0x40000000
+
+#ifndef __KERNEL__
+# ifdef __i386__
+# include <asm/unistd_32.h>
+# elif defined(__ILP32__)
+# include <asm/unistd_x32.h>
+# else
+# include <asm/unistd_64.h>
+# endif
+#endif
+
+#endif /* _UAPI_ASM_X86_UNISTD_H */
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
new file mode 100644
index 000000000..f0b0c90dd
--- /dev/null
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * vmx.h: VMX Architecture related definitions
+ * Copyright (c) 2004, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * A few random additions are:
+ * Copyright (C) 2006 Qumranet
+ * Avi Kivity <avi@qumranet.com>
+ * Yaniv Kamay <yaniv@qumranet.com>
+ *
+ */
+#ifndef _UAPIVMX_H
+#define _UAPIVMX_H
+
+
+#define VMX_EXIT_REASONS_FAILED_VMENTRY 0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI 0
+#define EXIT_REASON_EXTERNAL_INTERRUPT 1
+#define EXIT_REASON_TRIPLE_FAULT 2
+
+#define EXIT_REASON_PENDING_INTERRUPT 7
+#define EXIT_REASON_NMI_WINDOW 8
+#define EXIT_REASON_TASK_SWITCH 9
+#define EXIT_REASON_CPUID 10
+#define EXIT_REASON_HLT 12
+#define EXIT_REASON_INVD 13
+#define EXIT_REASON_INVLPG 14
+#define EXIT_REASON_RDPMC 15
+#define EXIT_REASON_RDTSC 16
+#define EXIT_REASON_VMCALL 18
+#define EXIT_REASON_VMCLEAR 19
+#define EXIT_REASON_VMLAUNCH 20
+#define EXIT_REASON_VMPTRLD 21
+#define EXIT_REASON_VMPTRST 22
+#define EXIT_REASON_VMREAD 23
+#define EXIT_REASON_VMRESUME 24
+#define EXIT_REASON_VMWRITE 25
+#define EXIT_REASON_VMOFF 26
+#define EXIT_REASON_VMON 27
+#define EXIT_REASON_CR_ACCESS 28
+#define EXIT_REASON_DR_ACCESS 29
+#define EXIT_REASON_IO_INSTRUCTION 30
+#define EXIT_REASON_MSR_READ 31
+#define EXIT_REASON_MSR_WRITE 32
+#define EXIT_REASON_INVALID_STATE 33
+#define EXIT_REASON_MSR_LOAD_FAIL 34
+#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_TRAP_FLAG 37
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
+#define EXIT_REASON_MCE_DURING_VMENTRY 41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EOI_INDUCED 45
+#define EXIT_REASON_GDTR_IDTR 46
+#define EXIT_REASON_LDTR_TR 47
+#define EXIT_REASON_EPT_VIOLATION 48
+#define EXIT_REASON_EPT_MISCONFIG 49
+#define EXIT_REASON_INVEPT 50
+#define EXIT_REASON_RDTSCP 51
+#define EXIT_REASON_PREEMPTION_TIMER 52
+#define EXIT_REASON_INVVPID 53
+#define EXIT_REASON_WBINVD 54
+#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_APIC_WRITE 56
+#define EXIT_REASON_RDRAND 57
+#define EXIT_REASON_INVPCID 58
+#define EXIT_REASON_VMFUNC 59
+#define EXIT_REASON_ENCLS 60
+#define EXIT_REASON_RDSEED 61
+#define EXIT_REASON_PML_FULL 62
+#define EXIT_REASON_XSAVES 63
+#define EXIT_REASON_XRSTORS 64
+
+#define VMX_EXIT_REASONS \
+ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
+ { EXIT_REASON_EXTERNAL_INTERRUPT, "EXTERNAL_INTERRUPT" }, \
+ { EXIT_REASON_TRIPLE_FAULT, "TRIPLE_FAULT" }, \
+ { EXIT_REASON_PENDING_INTERRUPT, "PENDING_INTERRUPT" }, \
+ { EXIT_REASON_NMI_WINDOW, "NMI_WINDOW" }, \
+ { EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
+ { EXIT_REASON_CPUID, "CPUID" }, \
+ { EXIT_REASON_HLT, "HLT" }, \
+ { EXIT_REASON_INVD, "INVD" }, \
+ { EXIT_REASON_INVLPG, "INVLPG" }, \
+ { EXIT_REASON_RDPMC, "RDPMC" }, \
+ { EXIT_REASON_RDTSC, "RDTSC" }, \
+ { EXIT_REASON_VMCALL, "VMCALL" }, \
+ { EXIT_REASON_VMCLEAR, "VMCLEAR" }, \
+ { EXIT_REASON_VMLAUNCH, "VMLAUNCH" }, \
+ { EXIT_REASON_VMPTRLD, "VMPTRLD" }, \
+ { EXIT_REASON_VMPTRST, "VMPTRST" }, \
+ { EXIT_REASON_VMREAD, "VMREAD" }, \
+ { EXIT_REASON_VMRESUME, "VMRESUME" }, \
+ { EXIT_REASON_VMWRITE, "VMWRITE" }, \
+ { EXIT_REASON_VMOFF, "VMOFF" }, \
+ { EXIT_REASON_VMON, "VMON" }, \
+ { EXIT_REASON_CR_ACCESS, "CR_ACCESS" }, \
+ { EXIT_REASON_DR_ACCESS, "DR_ACCESS" }, \
+ { EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
+ { EXIT_REASON_MSR_READ, "MSR_READ" }, \
+ { EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
+ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
+ { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \
+ { EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
+ { EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \
+ { EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
+ { EXIT_REASON_PAUSE_INSTRUCTION, "PAUSE_INSTRUCTION" }, \
+ { EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
+ { EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
+ { EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
+ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
+ { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \
+ { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \
+ { EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
+ { EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
+ { EXIT_REASON_INVEPT, "INVEPT" }, \
+ { EXIT_REASON_RDTSCP, "RDTSCP" }, \
+ { EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \
+ { EXIT_REASON_INVVPID, "INVVPID" }, \
+ { EXIT_REASON_WBINVD, "WBINVD" }, \
+ { EXIT_REASON_XSETBV, "XSETBV" }, \
+ { EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
+ { EXIT_REASON_RDRAND, "RDRAND" }, \
+ { EXIT_REASON_INVPCID, "INVPCID" }, \
+ { EXIT_REASON_VMFUNC, "VMFUNC" }, \
+ { EXIT_REASON_ENCLS, "ENCLS" }, \
+ { EXIT_REASON_RDSEED, "RDSEED" }, \
+ { EXIT_REASON_PML_FULL, "PML_FULL" }, \
+ { EXIT_REASON_XSAVES, "XSAVES" }, \
+ { EXIT_REASON_XRSTORS, "XRSTORS" }
+
+#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
+#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
+#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
+
+#endif /* _UAPIVMX_H */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
new file mode 100644
index 000000000..3b24dc052
--- /dev/null
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -0,0 +1,297 @@
+/* Copyright 2002 Andi Kleen */
+
+#include <linux/linkage.h>
+#include <asm/errno.h>
+#include <asm/cpufeatures.h>
+#include <asm/mcsafe_test.h>
+#include <asm/alternative-asm.h>
+#include <asm/export.h>
+
+/*
+ * We build a jump to memcpy_orig by default which gets NOPped out on
+ * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
+ * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
+ * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
+ */
+
+.weak memcpy
+
+/*
+ * memcpy - Copy a memory block.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:
+ * rax original destination
+ */
+ENTRY(__memcpy)
+ENTRY(memcpy)
+ ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
+ "jmp memcpy_erms", X86_FEATURE_ERMS
+
+ movq %rdi, %rax
+ movq %rdx, %rcx
+ shrq $3, %rcx
+ andl $7, %edx
+ rep movsq
+ movl %edx, %ecx
+ rep movsb
+ ret
+ENDPROC(memcpy)
+ENDPROC(__memcpy)
+EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL(__memcpy)
+
+/*
+ * memcpy_erms() - enhanced fast string memcpy. This is faster and
+ * simpler than memcpy. Use memcpy_erms when possible.
+ */
+ENTRY(memcpy_erms)
+ movq %rdi, %rax
+ movq %rdx, %rcx
+ rep movsb
+ ret
+ENDPROC(memcpy_erms)
+
+ENTRY(memcpy_orig)
+ movq %rdi, %rax
+
+ cmpq $0x20, %rdx
+ jb .Lhandle_tail
+
+ /*
+ * We check whether memory false dependence could occur,
+ * then jump to corresponding copy mode.
+ */
+ cmp %dil, %sil
+ jl .Lcopy_backward
+ subq $0x20, %rdx
+.Lcopy_forward_loop:
+ subq $0x20, %rdx
+
+ /*
+ * Move in blocks of 4x8 bytes:
+ */
+ movq 0*8(%rsi), %r8
+ movq 1*8(%rsi), %r9
+ movq 2*8(%rsi), %r10
+ movq 3*8(%rsi), %r11
+ leaq 4*8(%rsi), %rsi
+
+ movq %r8, 0*8(%rdi)
+ movq %r9, 1*8(%rdi)
+ movq %r10, 2*8(%rdi)
+ movq %r11, 3*8(%rdi)
+ leaq 4*8(%rdi), %rdi
+ jae .Lcopy_forward_loop
+ addl $0x20, %edx
+ jmp .Lhandle_tail
+
+.Lcopy_backward:
+ /*
+ * Calculate copy position to tail.
+ */
+ addq %rdx, %rsi
+ addq %rdx, %rdi
+ subq $0x20, %rdx
+ /*
+ * At most 3 ALU operations in one cycle,
+ * so append NOPS in the same 16 bytes trunk.
+ */
+ .p2align 4
+.Lcopy_backward_loop:
+ subq $0x20, %rdx
+ movq -1*8(%rsi), %r8
+ movq -2*8(%rsi), %r9
+ movq -3*8(%rsi), %r10
+ movq -4*8(%rsi), %r11
+ leaq -4*8(%rsi), %rsi
+ movq %r8, -1*8(%rdi)
+ movq %r9, -2*8(%rdi)
+ movq %r10, -3*8(%rdi)
+ movq %r11, -4*8(%rdi)
+ leaq -4*8(%rdi), %rdi
+ jae .Lcopy_backward_loop
+
+ /*
+ * Calculate copy position to head.
+ */
+ addl $0x20, %edx
+ subq %rdx, %rsi
+ subq %rdx, %rdi
+.Lhandle_tail:
+ cmpl $16, %edx
+ jb .Lless_16bytes
+
+ /*
+ * Move data from 16 bytes to 31 bytes.
+ */
+ movq 0*8(%rsi), %r8
+ movq 1*8(%rsi), %r9
+ movq -2*8(%rsi, %rdx), %r10
+ movq -1*8(%rsi, %rdx), %r11
+ movq %r8, 0*8(%rdi)
+ movq %r9, 1*8(%rdi)
+ movq %r10, -2*8(%rdi, %rdx)
+ movq %r11, -1*8(%rdi, %rdx)
+ retq
+ .p2align 4
+.Lless_16bytes:
+ cmpl $8, %edx
+ jb .Lless_8bytes
+ /*
+ * Move data from 8 bytes to 15 bytes.
+ */
+ movq 0*8(%rsi), %r8
+ movq -1*8(%rsi, %rdx), %r9
+ movq %r8, 0*8(%rdi)
+ movq %r9, -1*8(%rdi, %rdx)
+ retq
+ .p2align 4
+.Lless_8bytes:
+ cmpl $4, %edx
+ jb .Lless_3bytes
+
+ /*
+ * Move data from 4 bytes to 7 bytes.
+ */
+ movl (%rsi), %ecx
+ movl -4(%rsi, %rdx), %r8d
+ movl %ecx, (%rdi)
+ movl %r8d, -4(%rdi, %rdx)
+ retq
+ .p2align 4
+.Lless_3bytes:
+ subl $1, %edx
+ jb .Lend
+ /*
+ * Move data from 1 bytes to 3 bytes.
+ */
+ movzbl (%rsi), %ecx
+ jz .Lstore_1byte
+ movzbq 1(%rsi), %r8
+ movzbq (%rsi, %rdx), %r9
+ movb %r8b, 1(%rdi)
+ movb %r9b, (%rdi, %rdx)
+.Lstore_1byte:
+ movb %cl, (%rdi)
+
+.Lend:
+ retq
+ENDPROC(memcpy_orig)
+
+#ifndef CONFIG_UML
+
+MCSAFE_TEST_CTL
+
+/*
+ * __memcpy_mcsafe - memory copy with machine check exception handling
+ * Note that we only catch machine checks when reading the source addresses.
+ * Writes to target are posted and don't generate machine checks.
+ */
+ENTRY(__memcpy_mcsafe)
+ cmpl $8, %edx
+ /* Less than 8 bytes? Go to byte copy loop */
+ jb .L_no_whole_words
+
+ /* Check for bad alignment of source */
+ testl $7, %esi
+ /* Already aligned */
+ jz .L_8byte_aligned
+
+ /* Copy one byte at a time until source is 8-byte aligned */
+ movl %esi, %ecx
+ andl $7, %ecx
+ subl $8, %ecx
+ negl %ecx
+ subl %ecx, %edx
+.L_read_leading_bytes:
+ movb (%rsi), %al
+ MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
+ MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
+.L_write_leading_bytes:
+ movb %al, (%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz .L_read_leading_bytes
+
+.L_8byte_aligned:
+ movl %edx, %ecx
+ andl $7, %edx
+ shrl $3, %ecx
+ jz .L_no_whole_words
+
+.L_read_words:
+ movq (%rsi), %r8
+ MCSAFE_TEST_SRC %rsi 8 .E_read_words
+ MCSAFE_TEST_DST %rdi 8 .E_write_words
+.L_write_words:
+ movq %r8, (%rdi)
+ addq $8, %rsi
+ addq $8, %rdi
+ decl %ecx
+ jnz .L_read_words
+
+ /* Any trailing bytes? */
+.L_no_whole_words:
+ andl %edx, %edx
+ jz .L_done_memcpy_trap
+
+ /* Copy trailing bytes */
+ movl %edx, %ecx
+.L_read_trailing_bytes:
+ movb (%rsi), %al
+ MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
+ MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
+.L_write_trailing_bytes:
+ movb %al, (%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz .L_read_trailing_bytes
+
+ /* Copy successful. Return zero */
+.L_done_memcpy_trap:
+ xorl %eax, %eax
+ ret
+ENDPROC(__memcpy_mcsafe)
+EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
+
+ .section .fixup, "ax"
+ /*
+ * Return number of bytes not copied for any failure. Note that
+ * there is no "tail" handling since the source buffer is 8-byte
+ * aligned and poison is cacheline aligned.
+ */
+.E_read_words:
+ shll $3, %ecx
+.E_leading_bytes:
+ addl %edx, %ecx
+.E_trailing_bytes:
+ mov %ecx, %eax
+ ret
+
+ /*
+ * For write fault handling, given the destination is unaligned,
+ * we handle faults on multi-byte writes with a byte-by-byte
+ * copy up to the write-protected page.
+ */
+.E_write_words:
+ shll $3, %ecx
+ addl %edx, %ecx
+ movl %ecx, %edx
+ jmp mcsafe_handle_tail
+
+ .previous
+
+ _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
+ _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
+ _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
+ _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
+ _ASM_EXTABLE(.L_write_words, .E_write_words)
+ _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
+#endif
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S
new file mode 100644
index 000000000..f8f3dc0a6
--- /dev/null
+++ b/tools/arch/x86/lib/memset_64.S
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2002 Andi Kleen, SuSE Labs */
+
+#include <linux/linkage.h>
+#include <asm/cpufeatures.h>
+#include <asm/alternative-asm.h>
+
+.weak memset
+
+/*
+ * ISO C memset - set a memory block to a byte value. This function uses fast
+ * string to get better performance than the original function. The code is
+ * simpler and shorter than the original function as well.
+ *
+ * rdi destination
+ * rsi value (char)
+ * rdx count (bytes)
+ *
+ * rax original destination
+ */
+ENTRY(memset)
+ENTRY(__memset)
+ /*
+ * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
+ * to use it when possible. If not available, use fast string instructions.
+ *
+ * Otherwise, use original memset function.
+ */
+ ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
+ "jmp memset_erms", X86_FEATURE_ERMS
+
+ movq %rdi,%r9
+ movq %rdx,%rcx
+ andl $7,%edx
+ shrq $3,%rcx
+ /* expand byte value */
+ movzbl %sil,%esi
+ movabs $0x0101010101010101,%rax
+ imulq %rsi,%rax
+ rep stosq
+ movl %edx,%ecx
+ rep stosb
+ movq %r9,%rax
+ ret
+ENDPROC(memset)
+ENDPROC(__memset)
+
+/*
+ * ISO C memset - set a memory block to a byte value. This function uses
+ * enhanced rep stosb to override the fast string function.
+ * The code is simpler and shorter than the fast string function as well.
+ *
+ * rdi destination
+ * rsi value (char)
+ * rdx count (bytes)
+ *
+ * rax original destination
+ */
+ENTRY(memset_erms)
+ movq %rdi,%r9
+ movb %sil,%al
+ movq %rdx,%rcx
+ rep stosb
+ movq %r9,%rax
+ ret
+ENDPROC(memset_erms)
+
+ENTRY(memset_orig)
+ movq %rdi,%r10
+
+ /* expand byte value */
+ movzbl %sil,%ecx
+ movabs $0x0101010101010101,%rax
+ imulq %rcx,%rax
+
+ /* align dst */
+ movl %edi,%r9d
+ andl $7,%r9d
+ jnz .Lbad_alignment
+.Lafter_bad_alignment:
+
+ movq %rdx,%rcx
+ shrq $6,%rcx
+ jz .Lhandle_tail
+
+ .p2align 4
+.Lloop_64:
+ decq %rcx
+ movq %rax,(%rdi)
+ movq %rax,8(%rdi)
+ movq %rax,16(%rdi)
+ movq %rax,24(%rdi)
+ movq %rax,32(%rdi)
+ movq %rax,40(%rdi)
+ movq %rax,48(%rdi)
+ movq %rax,56(%rdi)
+ leaq 64(%rdi),%rdi
+ jnz .Lloop_64
+
+ /* Handle tail in loops. The loops should be faster than hard
+ to predict jump tables. */
+ .p2align 4
+.Lhandle_tail:
+ movl %edx,%ecx
+ andl $63&(~7),%ecx
+ jz .Lhandle_7
+ shrl $3,%ecx
+ .p2align 4
+.Lloop_8:
+ decl %ecx
+ movq %rax,(%rdi)
+ leaq 8(%rdi),%rdi
+ jnz .Lloop_8
+
+.Lhandle_7:
+ andl $7,%edx
+ jz .Lende
+ .p2align 4
+.Lloop_1:
+ decl %edx
+ movb %al,(%rdi)
+ leaq 1(%rdi),%rdi
+ jnz .Lloop_1
+
+.Lende:
+ movq %r10,%rax
+ ret
+
+.Lbad_alignment:
+ cmpq $7,%rdx
+ jbe .Lhandle_7
+ movq %rax,(%rdi) /* unaligned store */
+ movq $8,%r8
+ subq %r9,%r8
+ addq %r8,%rdi
+ subq %r8,%rdx
+ jmp .Lafter_bad_alignment
+.Lfinal:
+ENDPROC(memset_orig)
diff --git a/tools/arch/xtensa/include/asm/barrier.h b/tools/arch/xtensa/include/asm/barrier.h
new file mode 100644
index 000000000..583800bd7
--- /dev/null
+++ b/tools/arch/xtensa/include/asm/barrier.h
@@ -0,0 +1,18 @@
+/*
+ * Copied from the kernel sources to tools/:
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001 - 2012 Tensilica Inc.
+ */
+
+#ifndef _TOOLS_LINUX_XTENSA_SYSTEM_H
+#define _TOOLS_LINUX_XTENSA_SYSTEM_H
+
+#define mb() ({ __asm__ __volatile__("memw" : : : "memory"); })
+#define rmb() barrier()
+#define wmb() mb()
+
+#endif /* _TOOLS_LINUX_XTENSA_SYSTEM_H */
diff --git a/tools/arch/xtensa/include/uapi/asm/mman.h b/tools/arch/xtensa/include/uapi/asm/mman.h
new file mode 100644
index 000000000..34dde6f44
--- /dev/null
+++ b/tools/arch/xtensa/include/uapi/asm/mman.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef TOOLS_ARCH_XTENSA_UAPI_ASM_MMAN_FIX_H
+#define TOOLS_ARCH_XTENSA_UAPI_ASM_MMAN_FIX_H
+#define MADV_DODUMP 17
+#define MADV_DOFORK 11
+#define MADV_DONTDUMP 16
+#define MADV_DONTFORK 10
+#define MADV_DONTNEED 4
+#define MADV_FREE 8
+#define MADV_HUGEPAGE 14
+#define MADV_MERGEABLE 12
+#define MADV_NOHUGEPAGE 15
+#define MADV_NORMAL 0
+#define MADV_RANDOM 1
+#define MADV_REMOVE 9
+#define MADV_SEQUENTIAL 2
+#define MADV_UNMERGEABLE 13
+#define MADV_WILLNEED 3
+#define MAP_ANONYMOUS 0x0800
+#define MAP_DENYWRITE 0x2000
+#define MAP_EXECUTABLE 0x4000
+#define MAP_FILE 0
+#define MAP_FIXED 0x010
+#define MAP_GROWSDOWN 0x1000
+#define MAP_HUGETLB 0x80000
+#define MAP_LOCKED 0x8000
+#define MAP_NONBLOCK 0x20000
+#define MAP_NORESERVE 0x0400
+#define MAP_POPULATE 0x10000
+#define MAP_PRIVATE 0x002
+#define MAP_SHARED 0x001
+#define MAP_STACK 0x40000
+#define PROT_EXEC 0x4
+#define PROT_GROWSDOWN 0x01000000
+#define PROT_GROWSUP 0x02000000
+#define PROT_NONE 0x0
+#define PROT_READ 0x1
+#define PROT_SEM 0x10
+#define PROT_WRITE 0x2
+/* MADV_HWPOISON is undefined on xtensa, fix it for perf */
+#define MADV_HWPOISON 100
+/* MADV_SOFT_OFFLINE is undefined on xtensa, fix it for perf */
+#define MADV_SOFT_OFFLINE 101
+/* MAP_32BIT is undefined on xtensa, fix it for perf */
+#define MAP_32BIT 0
+/* MAP_UNINITIALIZED is undefined on xtensa, fix it for perf */
+#define MAP_UNINITIALIZED 0
+#endif
diff --git a/tools/bpf/.gitignore b/tools/bpf/.gitignore
new file mode 100644
index 000000000..dfe2bd5a4
--- /dev/null
+++ b/tools/bpf/.gitignore
@@ -0,0 +1,5 @@
+FEATURE-DUMP.bpf
+bpf_asm
+bpf_dbg
+bpf_exp.yacc.*
+bpf_jit_disasm
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
new file mode 100644
index 000000000..53b60ad45
--- /dev/null
+++ b/tools/bpf/Makefile
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../scripts/Makefile.include
+
+prefix ?= /usr/local
+
+CC = gcc
+LEX = flex
+YACC = bison
+MAKE = make
+INSTALL ?= install
+
+CFLAGS += -Wall -O2
+CFLAGS += -D__EXPORTED_HEADERS__ -I$(srctree)/include/uapi -I$(srctree)/include
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+ifeq ($(V),1)
+ Q =
+else
+ Q = @
+endif
+
+FEATURE_USER = .bpf
+FEATURE_TESTS = libbfd disassembler-four-args
+FEATURE_DISPLAY = libbfd disassembler-four-args
+
+check_feat := 1
+NON_CHECK_FEAT_TARGETS := clean bpftool_clean
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
+ check_feat := 0
+endif
+endif
+
+ifeq ($(check_feat),1)
+ifeq ($(FEATURES_DUMP),)
+include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
+endif
+
+ifeq ($(feature-disassembler-four-args), 1)
+CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+endif
+
+$(OUTPUT)%.yacc.c: $(srctree)/tools/bpf/%.y
+ $(QUIET_BISON)$(YACC) -o $@ -d $<
+
+$(OUTPUT)%.lex.c: $(srctree)/tools/bpf/%.l
+ $(QUIET_FLEX)$(LEX) -o $@ $<
+
+$(OUTPUT)%.o: $(srctree)/tools/bpf/%.c
+ $(QUIET_CC)$(COMPILE.c) -o $@ $<
+
+$(OUTPUT)%.yacc.o: $(OUTPUT)%.yacc.c
+ $(QUIET_CC)$(COMPILE.c) -o $@ $<
+$(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
+ $(QUIET_CC)$(COMPILE.c) -o $@ $<
+
+PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
+
+all: $(PROGS) bpftool
+
+$(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm'
+$(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o
+ $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ -lopcodes -lbfd -ldl
+
+$(OUTPUT)bpf_dbg: $(OUTPUT)bpf_dbg.o
+ $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ -lreadline
+
+$(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.lex.o
+ $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^
+
+$(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
+$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
+$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
+
+clean: bpftool_clean
+ $(call QUIET_CLEAN, bpf-progs)
+ $(Q)rm -rf $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
+ $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
+ $(call QUIET_CLEAN, core-gen)
+ $(Q)rm -f $(OUTPUT)FEATURE-DUMP.bpf
+
+install: $(PROGS) bpftool_install
+ $(call QUIET_INSTALL, bpf_jit_disasm)
+ $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin
+ $(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm
+ $(call QUIET_INSTALL, bpf_dbg)
+ $(Q)$(INSTALL) $(OUTPUT)bpf_dbg $(DESTDIR)$(prefix)/bin/bpf_dbg
+ $(call QUIET_INSTALL, bpf_asm)
+ $(Q)$(INSTALL) $(OUTPUT)bpf_asm $(DESTDIR)$(prefix)/bin/bpf_asm
+
+bpftool:
+ $(call descend,bpftool)
+
+bpftool_install:
+ $(call descend,bpftool,install)
+
+bpftool_clean:
+ $(call descend,bpftool,clean)
+
+.PHONY: all install clean bpftool bpftool_install bpftool_clean
diff --git a/tools/bpf/Makefile.helpers b/tools/bpf/Makefile.helpers
new file mode 100644
index 000000000..c34fea77f
--- /dev/null
+++ b/tools/bpf/Makefile.helpers
@@ -0,0 +1,59 @@
+ifndef allow-override
+ include ../scripts/Makefile.include
+ include ../scripts/utilities.mak
+else
+ # Assume Makefile.helpers is being run from bpftool/Documentation
+ # subdirectory. Go up two more directories to fetch bpf.h header and
+ # associated script.
+ UP2DIR := ../../
+endif
+
+INSTALL ?= install
+RM ?= rm -f
+RMDIR ?= rmdir --ignore-fail-on-non-empty
+
+ifeq ($(V),1)
+ Q =
+else
+ Q = @
+endif
+
+prefix ?= /usr/local
+mandir ?= $(prefix)/man
+man7dir = $(mandir)/man7
+
+HELPERS_RST = bpf-helpers.rst
+MAN7_RST = $(HELPERS_RST)
+
+_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
+DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
+
+helpers: man7
+man7: $(DOC_MAN7)
+
+RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+
+$(OUTPUT)$(HELPERS_RST): $(UP2DIR)../../include/uapi/linux/bpf.h
+ $(QUIET_GEN)$(UP2DIR)../../scripts/bpf_helpers_doc.py --filename $< > $@
+
+$(OUTPUT)%.7: $(OUTPUT)%.rst
+ifndef RST2MAN_DEP
+ $(error "rst2man not found, but required to generate man pages")
+endif
+ $(QUIET_GEN)rst2man $< > $@
+
+helpers-clean:
+ $(call QUIET_CLEAN, eBPF_helpers-manpage)
+ $(Q)$(RM) $(DOC_MAN7) $(OUTPUT)$(HELPERS_RST)
+
+helpers-install: helpers
+ $(call QUIET_INSTALL, eBPF_helpers-manpage)
+ $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
+ $(Q)$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+
+helpers-uninstall:
+ $(call QUIET_UNINST, eBPF_helpers-manpage)
+ $(Q)$(RM) $(addprefix $(DESTDIR)$(man7dir)/,$(_DOC_MAN7))
+ $(Q)$(RMDIR) $(DESTDIR)$(man7dir)
+
+.PHONY: helpers helpers-clean helpers-install helpers-uninstall
diff --git a/tools/bpf/bpf_asm.c b/tools/bpf/bpf_asm.c
new file mode 100644
index 000000000..c15aef097
--- /dev/null
+++ b/tools/bpf/bpf_asm.c
@@ -0,0 +1,52 @@
+/*
+ * Minimal BPF assembler
+ *
+ * Instead of libpcap high-level filter expressions, it can be quite
+ * useful to define filters in low-level BPF assembler (that is kept
+ * close to Steven McCanne and Van Jacobson's original BPF paper).
+ * In particular for BPF JIT implementors, JIT security auditors, or
+ * just for defining BPF expressions that contain extensions which are
+ * not supported by compilers.
+ *
+ * How to get into it:
+ *
+ * 1) read Documentation/networking/filter.txt
+ * 2) Run `bpf_asm [-c] <filter-prog file>` to translate into binary
+ * blob that is loadable with xt_bpf, cls_bpf et al. Note: -c will
+ * pretty print a C-like construct.
+ *
+ * Copyright 2013 Daniel Borkmann <borkmann@redhat.com>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+extern void bpf_asm_compile(FILE *fp, bool cstyle);
+
+int main(int argc, char **argv)
+{
+ FILE *fp = stdin;
+ bool cstyle = false;
+ int i;
+
+ for (i = 1; i < argc; i++) {
+ if (!strncmp("-c", argv[i], 2)) {
+ cstyle = true;
+ continue;
+ }
+
+ fp = fopen(argv[i], "r");
+ if (!fp) {
+ fp = stdin;
+ continue;
+ }
+
+ break;
+ }
+
+ bpf_asm_compile(fp, cstyle);
+
+ return 0;
+}
diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c
new file mode 100644
index 000000000..61b9aa5d6
--- /dev/null
+++ b/tools/bpf/bpf_dbg.c
@@ -0,0 +1,1398 @@
+/*
+ * Minimal BPF debugger
+ *
+ * Minimal BPF debugger that mimics the kernel's engine (w/o extensions)
+ * and allows for single stepping through selected packets from a pcap
+ * with a provided user filter in order to facilitate verification of a
+ * BPF program. Besides others, this is useful to verify BPF programs
+ * before attaching to a live system, and can be used in socket filters,
+ * cls_bpf, xt_bpf, team driver and e.g. PTP code; in particular when a
+ * single more complex BPF program is being used. Reasons for a more
+ * complex BPF program are likely primarily to optimize execution time
+ * for making a verdict when multiple simple BPF programs are combined
+ * into one in order to prevent parsing same headers multiple times.
+ *
+ * More on how to debug BPF opcodes see Documentation/networking/filter.txt
+ * which is the main document on BPF. Mini howto for getting started:
+ *
+ * 1) `./bpf_dbg` to enter the shell (shell cmds denoted with '>'):
+ * 2) > load bpf 6,40 0 0 12,21 0 3 20... (output from `bpf_asm` or
+ * `tcpdump -iem1 -ddd port 22 | tr '\n' ','` to load as filter)
+ * 3) > load pcap foo.pcap
+ * 4) > run <n>/disassemble/dump/quit (self-explanatory)
+ * 5) > breakpoint 2 (sets bp at loaded BPF insns 2, do `run` then;
+ * multiple bps can be set, of course, a call to `breakpoint`
+ * w/o args shows currently loaded bps, `breakpoint reset` for
+ * resetting all breakpoints)
+ * 6) > select 3 (`run` etc will start from the 3rd packet in the pcap)
+ * 7) > step [-<n>, +<n>] (performs single stepping through the BPF)
+ *
+ * Copyright 2013 Daniel Borkmann <borkmann@redhat.com>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <setjmp.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <readline/readline.h>
+#include <readline/history.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <signal.h>
+#include <arpa/inet.h>
+#include <net/ethernet.h>
+
+#define TCPDUMP_MAGIC 0xa1b2c3d4
+
+#define BPF_LDX_B (BPF_LDX | BPF_B)
+#define BPF_LDX_W (BPF_LDX | BPF_W)
+#define BPF_JMP_JA (BPF_JMP | BPF_JA)
+#define BPF_JMP_JEQ (BPF_JMP | BPF_JEQ)
+#define BPF_JMP_JGT (BPF_JMP | BPF_JGT)
+#define BPF_JMP_JGE (BPF_JMP | BPF_JGE)
+#define BPF_JMP_JSET (BPF_JMP | BPF_JSET)
+#define BPF_ALU_ADD (BPF_ALU | BPF_ADD)
+#define BPF_ALU_SUB (BPF_ALU | BPF_SUB)
+#define BPF_ALU_MUL (BPF_ALU | BPF_MUL)
+#define BPF_ALU_DIV (BPF_ALU | BPF_DIV)
+#define BPF_ALU_MOD (BPF_ALU | BPF_MOD)
+#define BPF_ALU_NEG (BPF_ALU | BPF_NEG)
+#define BPF_ALU_AND (BPF_ALU | BPF_AND)
+#define BPF_ALU_OR (BPF_ALU | BPF_OR)
+#define BPF_ALU_XOR (BPF_ALU | BPF_XOR)
+#define BPF_ALU_LSH (BPF_ALU | BPF_LSH)
+#define BPF_ALU_RSH (BPF_ALU | BPF_RSH)
+#define BPF_MISC_TAX (BPF_MISC | BPF_TAX)
+#define BPF_MISC_TXA (BPF_MISC | BPF_TXA)
+#define BPF_LD_B (BPF_LD | BPF_B)
+#define BPF_LD_H (BPF_LD | BPF_H)
+#define BPF_LD_W (BPF_LD | BPF_W)
+
+#ifndef array_size
+# define array_size(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#ifndef __check_format_printf
+# define __check_format_printf(pos_fmtstr, pos_fmtargs) \
+ __attribute__ ((format (printf, (pos_fmtstr), (pos_fmtargs))))
+#endif
+
+enum {
+ CMD_OK,
+ CMD_ERR,
+ CMD_EX,
+};
+
+struct shell_cmd {
+ const char *name;
+ int (*func)(char *args);
+};
+
+struct pcap_filehdr {
+ uint32_t magic;
+ uint16_t version_major;
+ uint16_t version_minor;
+ int32_t thiszone;
+ uint32_t sigfigs;
+ uint32_t snaplen;
+ uint32_t linktype;
+};
+
+struct pcap_timeval {
+ int32_t tv_sec;
+ int32_t tv_usec;
+};
+
+struct pcap_pkthdr {
+ struct pcap_timeval ts;
+ uint32_t caplen;
+ uint32_t len;
+};
+
+struct bpf_regs {
+ uint32_t A;
+ uint32_t X;
+ uint32_t M[BPF_MEMWORDS];
+ uint32_t R;
+ bool Rs;
+ uint16_t Pc;
+};
+
+static struct sock_filter bpf_image[BPF_MAXINSNS + 1];
+static unsigned int bpf_prog_len;
+
+static int bpf_breakpoints[64];
+static struct bpf_regs bpf_regs[BPF_MAXINSNS + 1];
+static struct bpf_regs bpf_curr;
+static unsigned int bpf_regs_len;
+
+static int pcap_fd = -1;
+static unsigned int pcap_packet;
+static size_t pcap_map_size;
+static char *pcap_ptr_va_start, *pcap_ptr_va_curr;
+
+static const char * const op_table[] = {
+ [BPF_ST] = "st",
+ [BPF_STX] = "stx",
+ [BPF_LD_B] = "ldb",
+ [BPF_LD_H] = "ldh",
+ [BPF_LD_W] = "ld",
+ [BPF_LDX] = "ldx",
+ [BPF_LDX_B] = "ldxb",
+ [BPF_JMP_JA] = "ja",
+ [BPF_JMP_JEQ] = "jeq",
+ [BPF_JMP_JGT] = "jgt",
+ [BPF_JMP_JGE] = "jge",
+ [BPF_JMP_JSET] = "jset",
+ [BPF_ALU_ADD] = "add",
+ [BPF_ALU_SUB] = "sub",
+ [BPF_ALU_MUL] = "mul",
+ [BPF_ALU_DIV] = "div",
+ [BPF_ALU_MOD] = "mod",
+ [BPF_ALU_NEG] = "neg",
+ [BPF_ALU_AND] = "and",
+ [BPF_ALU_OR] = "or",
+ [BPF_ALU_XOR] = "xor",
+ [BPF_ALU_LSH] = "lsh",
+ [BPF_ALU_RSH] = "rsh",
+ [BPF_MISC_TAX] = "tax",
+ [BPF_MISC_TXA] = "txa",
+ [BPF_RET] = "ret",
+};
+
+static __check_format_printf(1, 2) int rl_printf(const char *fmt, ...)
+{
+ int ret;
+ va_list vl;
+
+ va_start(vl, fmt);
+ ret = vfprintf(rl_outstream, fmt, vl);
+ va_end(vl);
+
+ return ret;
+}
+
+static int matches(const char *cmd, const char *pattern)
+{
+ int len = strlen(cmd);
+
+ if (len > strlen(pattern))
+ return -1;
+
+ return memcmp(pattern, cmd, len);
+}
+
+static void hex_dump(const uint8_t *buf, size_t len)
+{
+ int i;
+
+ rl_printf("%3u: ", 0);
+ for (i = 0; i < len; i++) {
+ if (i && !(i % 16))
+ rl_printf("\n%3u: ", i);
+ rl_printf("%02x ", buf[i]);
+ }
+ rl_printf("\n");
+}
+
+static bool bpf_prog_loaded(void)
+{
+ if (bpf_prog_len == 0)
+ rl_printf("no bpf program loaded!\n");
+
+ return bpf_prog_len > 0;
+}
+
+static void bpf_disasm(const struct sock_filter f, unsigned int i)
+{
+ const char *op, *fmt;
+ int val = f.k;
+ char buf[256];
+
+ switch (f.code) {
+ case BPF_RET | BPF_K:
+ op = op_table[BPF_RET];
+ fmt = "#%#x";
+ break;
+ case BPF_RET | BPF_A:
+ op = op_table[BPF_RET];
+ fmt = "a";
+ break;
+ case BPF_RET | BPF_X:
+ op = op_table[BPF_RET];
+ fmt = "x";
+ break;
+ case BPF_MISC_TAX:
+ op = op_table[BPF_MISC_TAX];
+ fmt = "";
+ break;
+ case BPF_MISC_TXA:
+ op = op_table[BPF_MISC_TXA];
+ fmt = "";
+ break;
+ case BPF_ST:
+ op = op_table[BPF_ST];
+ fmt = "M[%d]";
+ break;
+ case BPF_STX:
+ op = op_table[BPF_STX];
+ fmt = "M[%d]";
+ break;
+ case BPF_LD_W | BPF_ABS:
+ op = op_table[BPF_LD_W];
+ fmt = "[%d]";
+ break;
+ case BPF_LD_H | BPF_ABS:
+ op = op_table[BPF_LD_H];
+ fmt = "[%d]";
+ break;
+ case BPF_LD_B | BPF_ABS:
+ op = op_table[BPF_LD_B];
+ fmt = "[%d]";
+ break;
+ case BPF_LD_W | BPF_LEN:
+ op = op_table[BPF_LD_W];
+ fmt = "#len";
+ break;
+ case BPF_LD_W | BPF_IND:
+ op = op_table[BPF_LD_W];
+ fmt = "[x+%d]";
+ break;
+ case BPF_LD_H | BPF_IND:
+ op = op_table[BPF_LD_H];
+ fmt = "[x+%d]";
+ break;
+ case BPF_LD_B | BPF_IND:
+ op = op_table[BPF_LD_B];
+ fmt = "[x+%d]";
+ break;
+ case BPF_LD | BPF_IMM:
+ op = op_table[BPF_LD_W];
+ fmt = "#%#x";
+ break;
+ case BPF_LDX | BPF_IMM:
+ op = op_table[BPF_LDX];
+ fmt = "#%#x";
+ break;
+ case BPF_LDX_B | BPF_MSH:
+ op = op_table[BPF_LDX_B];
+ fmt = "4*([%d]&0xf)";
+ break;
+ case BPF_LD | BPF_MEM:
+ op = op_table[BPF_LD_W];
+ fmt = "M[%d]";
+ break;
+ case BPF_LDX | BPF_MEM:
+ op = op_table[BPF_LDX];
+ fmt = "M[%d]";
+ break;
+ case BPF_JMP_JA:
+ op = op_table[BPF_JMP_JA];
+ fmt = "%d";
+ val = i + 1 + f.k;
+ break;
+ case BPF_JMP_JGT | BPF_X:
+ op = op_table[BPF_JMP_JGT];
+ fmt = "x";
+ break;
+ case BPF_JMP_JGT | BPF_K:
+ op = op_table[BPF_JMP_JGT];
+ fmt = "#%#x";
+ break;
+ case BPF_JMP_JGE | BPF_X:
+ op = op_table[BPF_JMP_JGE];
+ fmt = "x";
+ break;
+ case BPF_JMP_JGE | BPF_K:
+ op = op_table[BPF_JMP_JGE];
+ fmt = "#%#x";
+ break;
+ case BPF_JMP_JEQ | BPF_X:
+ op = op_table[BPF_JMP_JEQ];
+ fmt = "x";
+ break;
+ case BPF_JMP_JEQ | BPF_K:
+ op = op_table[BPF_JMP_JEQ];
+ fmt = "#%#x";
+ break;
+ case BPF_JMP_JSET | BPF_X:
+ op = op_table[BPF_JMP_JSET];
+ fmt = "x";
+ break;
+ case BPF_JMP_JSET | BPF_K:
+ op = op_table[BPF_JMP_JSET];
+ fmt = "#%#x";
+ break;
+ case BPF_ALU_NEG:
+ op = op_table[BPF_ALU_NEG];
+ fmt = "";
+ break;
+ case BPF_ALU_LSH | BPF_X:
+ op = op_table[BPF_ALU_LSH];
+ fmt = "x";
+ break;
+ case BPF_ALU_LSH | BPF_K:
+ op = op_table[BPF_ALU_LSH];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_RSH | BPF_X:
+ op = op_table[BPF_ALU_RSH];
+ fmt = "x";
+ break;
+ case BPF_ALU_RSH | BPF_K:
+ op = op_table[BPF_ALU_RSH];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_ADD | BPF_X:
+ op = op_table[BPF_ALU_ADD];
+ fmt = "x";
+ break;
+ case BPF_ALU_ADD | BPF_K:
+ op = op_table[BPF_ALU_ADD];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_SUB | BPF_X:
+ op = op_table[BPF_ALU_SUB];
+ fmt = "x";
+ break;
+ case BPF_ALU_SUB | BPF_K:
+ op = op_table[BPF_ALU_SUB];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_MUL | BPF_X:
+ op = op_table[BPF_ALU_MUL];
+ fmt = "x";
+ break;
+ case BPF_ALU_MUL | BPF_K:
+ op = op_table[BPF_ALU_MUL];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_DIV | BPF_X:
+ op = op_table[BPF_ALU_DIV];
+ fmt = "x";
+ break;
+ case BPF_ALU_DIV | BPF_K:
+ op = op_table[BPF_ALU_DIV];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_MOD | BPF_X:
+ op = op_table[BPF_ALU_MOD];
+ fmt = "x";
+ break;
+ case BPF_ALU_MOD | BPF_K:
+ op = op_table[BPF_ALU_MOD];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_AND | BPF_X:
+ op = op_table[BPF_ALU_AND];
+ fmt = "x";
+ break;
+ case BPF_ALU_AND | BPF_K:
+ op = op_table[BPF_ALU_AND];
+ fmt = "#%#x";
+ break;
+ case BPF_ALU_OR | BPF_X:
+ op = op_table[BPF_ALU_OR];
+ fmt = "x";
+ break;
+ case BPF_ALU_OR | BPF_K:
+ op = op_table[BPF_ALU_OR];
+ fmt = "#%#x";
+ break;
+ case BPF_ALU_XOR | BPF_X:
+ op = op_table[BPF_ALU_XOR];
+ fmt = "x";
+ break;
+ case BPF_ALU_XOR | BPF_K:
+ op = op_table[BPF_ALU_XOR];
+ fmt = "#%#x";
+ break;
+ default:
+ op = "nosup";
+ fmt = "%#x";
+ val = f.code;
+ break;
+ }
+
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, sizeof(buf), fmt, val);
+ buf[sizeof(buf) - 1] = 0;
+
+ if ((BPF_CLASS(f.code) == BPF_JMP && BPF_OP(f.code) != BPF_JA))
+ rl_printf("l%d:\t%s %s, l%d, l%d\n", i, op, buf,
+ i + 1 + f.jt, i + 1 + f.jf);
+ else
+ rl_printf("l%d:\t%s %s\n", i, op, buf);
+}
+
+static void bpf_dump_curr(struct bpf_regs *r, struct sock_filter *f)
+{
+ int i, m = 0;
+
+ rl_printf("pc: [%u]\n", r->Pc);
+ rl_printf("code: [%u] jt[%u] jf[%u] k[%u]\n",
+ f->code, f->jt, f->jf, f->k);
+ rl_printf("curr: ");
+ bpf_disasm(*f, r->Pc);
+
+ if (f->jt || f->jf) {
+ rl_printf("jt: ");
+ bpf_disasm(*(f + f->jt + 1), r->Pc + f->jt + 1);
+ rl_printf("jf: ");
+ bpf_disasm(*(f + f->jf + 1), r->Pc + f->jf + 1);
+ }
+
+ rl_printf("A: [%#08x][%u]\n", r->A, r->A);
+ rl_printf("X: [%#08x][%u]\n", r->X, r->X);
+ if (r->Rs)
+ rl_printf("ret: [%#08x][%u]!\n", r->R, r->R);
+
+ for (i = 0; i < BPF_MEMWORDS; i++) {
+ if (r->M[i]) {
+ m++;
+ rl_printf("M[%d]: [%#08x][%u]\n", i, r->M[i], r->M[i]);
+ }
+ }
+ if (m == 0)
+ rl_printf("M[0,%d]: [%#08x][%u]\n", BPF_MEMWORDS - 1, 0, 0);
+}
+
+static void bpf_dump_pkt(uint8_t *pkt, uint32_t pkt_caplen, uint32_t pkt_len)
+{
+ if (pkt_caplen != pkt_len)
+ rl_printf("cap: %u, len: %u\n", pkt_caplen, pkt_len);
+ else
+ rl_printf("len: %u\n", pkt_len);
+
+ hex_dump(pkt, pkt_caplen);
+}
+
+static void bpf_disasm_all(const struct sock_filter *f, unsigned int len)
+{
+ unsigned int i;
+
+ for (i = 0; i < len; i++)
+ bpf_disasm(f[i], i);
+}
+
+static void bpf_dump_all(const struct sock_filter *f, unsigned int len)
+{
+ unsigned int i;
+
+ rl_printf("/* { op, jt, jf, k }, */\n");
+ for (i = 0; i < len; i++)
+ rl_printf("{ %#04x, %2u, %2u, %#010x },\n",
+ f[i].code, f[i].jt, f[i].jf, f[i].k);
+}
+
+static bool bpf_runnable(struct sock_filter *f, unsigned int len)
+{
+ int sock, ret, i;
+ struct sock_fprog bpf = {
+ .filter = f,
+ .len = len,
+ };
+
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0) {
+ rl_printf("cannot open socket!\n");
+ return false;
+ }
+ ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf));
+ close(sock);
+ if (ret < 0) {
+ rl_printf("program not allowed to run by kernel!\n");
+ return false;
+ }
+ for (i = 0; i < len; i++) {
+ if (BPF_CLASS(f[i].code) == BPF_LD &&
+ f[i].k > SKF_AD_OFF) {
+ rl_printf("extensions currently not supported!\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void bpf_reset_breakpoints(void)
+{
+ int i;
+
+ for (i = 0; i < array_size(bpf_breakpoints); i++)
+ bpf_breakpoints[i] = -1;
+}
+
+static void bpf_set_breakpoints(unsigned int where)
+{
+ int i;
+ bool set = false;
+
+ for (i = 0; i < array_size(bpf_breakpoints); i++) {
+ if (bpf_breakpoints[i] == (int) where) {
+ rl_printf("breakpoint already set!\n");
+ set = true;
+ break;
+ }
+
+ if (bpf_breakpoints[i] == -1 && set == false) {
+ bpf_breakpoints[i] = where;
+ set = true;
+ }
+ }
+
+ if (!set)
+ rl_printf("too many breakpoints set, reset first!\n");
+}
+
+static void bpf_dump_breakpoints(void)
+{
+ int i;
+
+ rl_printf("breakpoints: ");
+
+ for (i = 0; i < array_size(bpf_breakpoints); i++) {
+ if (bpf_breakpoints[i] < 0)
+ continue;
+ rl_printf("%d ", bpf_breakpoints[i]);
+ }
+
+ rl_printf("\n");
+}
+
+static void bpf_reset(void)
+{
+ bpf_regs_len = 0;
+
+ memset(bpf_regs, 0, sizeof(bpf_regs));
+ memset(&bpf_curr, 0, sizeof(bpf_curr));
+}
+
+static void bpf_safe_regs(void)
+{
+ memcpy(&bpf_regs[bpf_regs_len++], &bpf_curr, sizeof(bpf_curr));
+}
+
+static bool bpf_restore_regs(int off)
+{
+ unsigned int index = bpf_regs_len - 1 + off;
+
+ if (index == 0) {
+ bpf_reset();
+ return true;
+ } else if (index < bpf_regs_len) {
+ memcpy(&bpf_curr, &bpf_regs[index], sizeof(bpf_curr));
+ bpf_regs_len = index;
+ return true;
+ } else {
+ rl_printf("reached bottom of register history stack!\n");
+ return false;
+ }
+}
+
+static uint32_t extract_u32(uint8_t *pkt, uint32_t off)
+{
+ uint32_t r;
+
+ memcpy(&r, &pkt[off], sizeof(r));
+
+ return ntohl(r);
+}
+
+static uint16_t extract_u16(uint8_t *pkt, uint32_t off)
+{
+ uint16_t r;
+
+ memcpy(&r, &pkt[off], sizeof(r));
+
+ return ntohs(r);
+}
+
+static uint8_t extract_u8(uint8_t *pkt, uint32_t off)
+{
+ return pkt[off];
+}
+
+static void set_return(struct bpf_regs *r)
+{
+ r->R = 0;
+ r->Rs = true;
+}
+
+static void bpf_single_step(struct bpf_regs *r, struct sock_filter *f,
+ uint8_t *pkt, uint32_t pkt_caplen,
+ uint32_t pkt_len)
+{
+ uint32_t K = f->k;
+ int d;
+
+ switch (f->code) {
+ case BPF_RET | BPF_K:
+ r->R = K;
+ r->Rs = true;
+ break;
+ case BPF_RET | BPF_A:
+ r->R = r->A;
+ r->Rs = true;
+ break;
+ case BPF_RET | BPF_X:
+ r->R = r->X;
+ r->Rs = true;
+ break;
+ case BPF_MISC_TAX:
+ r->X = r->A;
+ break;
+ case BPF_MISC_TXA:
+ r->A = r->X;
+ break;
+ case BPF_ST:
+ r->M[K] = r->A;
+ break;
+ case BPF_STX:
+ r->M[K] = r->X;
+ break;
+ case BPF_LD_W | BPF_ABS:
+ d = pkt_caplen - K;
+ if (d >= sizeof(uint32_t))
+ r->A = extract_u32(pkt, K);
+ else
+ set_return(r);
+ break;
+ case BPF_LD_H | BPF_ABS:
+ d = pkt_caplen - K;
+ if (d >= sizeof(uint16_t))
+ r->A = extract_u16(pkt, K);
+ else
+ set_return(r);
+ break;
+ case BPF_LD_B | BPF_ABS:
+ d = pkt_caplen - K;
+ if (d >= sizeof(uint8_t))
+ r->A = extract_u8(pkt, K);
+ else
+ set_return(r);
+ break;
+ case BPF_LD_W | BPF_IND:
+ d = pkt_caplen - (r->X + K);
+ if (d >= sizeof(uint32_t))
+ r->A = extract_u32(pkt, r->X + K);
+ break;
+ case BPF_LD_H | BPF_IND:
+ d = pkt_caplen - (r->X + K);
+ if (d >= sizeof(uint16_t))
+ r->A = extract_u16(pkt, r->X + K);
+ else
+ set_return(r);
+ break;
+ case BPF_LD_B | BPF_IND:
+ d = pkt_caplen - (r->X + K);
+ if (d >= sizeof(uint8_t))
+ r->A = extract_u8(pkt, r->X + K);
+ else
+ set_return(r);
+ break;
+ case BPF_LDX_B | BPF_MSH:
+ d = pkt_caplen - K;
+ if (d >= sizeof(uint8_t)) {
+ r->X = extract_u8(pkt, K);
+ r->X = (r->X & 0xf) << 2;
+ } else
+ set_return(r);
+ break;
+ case BPF_LD_W | BPF_LEN:
+ r->A = pkt_len;
+ break;
+ case BPF_LDX_W | BPF_LEN:
+ r->A = pkt_len;
+ break;
+ case BPF_LD | BPF_IMM:
+ r->A = K;
+ break;
+ case BPF_LDX | BPF_IMM:
+ r->X = K;
+ break;
+ case BPF_LD | BPF_MEM:
+ r->A = r->M[K];
+ break;
+ case BPF_LDX | BPF_MEM:
+ r->X = r->M[K];
+ break;
+ case BPF_JMP_JA:
+ r->Pc += K;
+ break;
+ case BPF_JMP_JGT | BPF_X:
+ r->Pc += r->A > r->X ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JGT | BPF_K:
+ r->Pc += r->A > K ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JGE | BPF_X:
+ r->Pc += r->A >= r->X ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JGE | BPF_K:
+ r->Pc += r->A >= K ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JEQ | BPF_X:
+ r->Pc += r->A == r->X ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JEQ | BPF_K:
+ r->Pc += r->A == K ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JSET | BPF_X:
+ r->Pc += r->A & r->X ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JSET | BPF_K:
+ r->Pc += r->A & K ? f->jt : f->jf;
+ break;
+ case BPF_ALU_NEG:
+ r->A = -r->A;
+ break;
+ case BPF_ALU_LSH | BPF_X:
+ r->A <<= r->X;
+ break;
+ case BPF_ALU_LSH | BPF_K:
+ r->A <<= K;
+ break;
+ case BPF_ALU_RSH | BPF_X:
+ r->A >>= r->X;
+ break;
+ case BPF_ALU_RSH | BPF_K:
+ r->A >>= K;
+ break;
+ case BPF_ALU_ADD | BPF_X:
+ r->A += r->X;
+ break;
+ case BPF_ALU_ADD | BPF_K:
+ r->A += K;
+ break;
+ case BPF_ALU_SUB | BPF_X:
+ r->A -= r->X;
+ break;
+ case BPF_ALU_SUB | BPF_K:
+ r->A -= K;
+ break;
+ case BPF_ALU_MUL | BPF_X:
+ r->A *= r->X;
+ break;
+ case BPF_ALU_MUL | BPF_K:
+ r->A *= K;
+ break;
+ case BPF_ALU_DIV | BPF_X:
+ case BPF_ALU_MOD | BPF_X:
+ if (r->X == 0) {
+ set_return(r);
+ break;
+ }
+ goto do_div;
+ case BPF_ALU_DIV | BPF_K:
+ case BPF_ALU_MOD | BPF_K:
+ if (K == 0) {
+ set_return(r);
+ break;
+ }
+do_div:
+ switch (f->code) {
+ case BPF_ALU_DIV | BPF_X:
+ r->A /= r->X;
+ break;
+ case BPF_ALU_DIV | BPF_K:
+ r->A /= K;
+ break;
+ case BPF_ALU_MOD | BPF_X:
+ r->A %= r->X;
+ break;
+ case BPF_ALU_MOD | BPF_K:
+ r->A %= K;
+ break;
+ }
+ break;
+ case BPF_ALU_AND | BPF_X:
+ r->A &= r->X;
+ break;
+ case BPF_ALU_AND | BPF_K:
+ r->A &= K;
+ break;
+ case BPF_ALU_OR | BPF_X:
+ r->A |= r->X;
+ break;
+ case BPF_ALU_OR | BPF_K:
+ r->A |= K;
+ break;
+ case BPF_ALU_XOR | BPF_X:
+ r->A ^= r->X;
+ break;
+ case BPF_ALU_XOR | BPF_K:
+ r->A ^= K;
+ break;
+ }
+}
+
+static bool bpf_pc_has_breakpoint(uint16_t pc)
+{
+ int i;
+
+ for (i = 0; i < array_size(bpf_breakpoints); i++) {
+ if (bpf_breakpoints[i] < 0)
+ continue;
+ if (bpf_breakpoints[i] == pc)
+ return true;
+ }
+
+ return false;
+}
+
+static bool bpf_handle_breakpoint(struct bpf_regs *r, struct sock_filter *f,
+ uint8_t *pkt, uint32_t pkt_caplen,
+ uint32_t pkt_len)
+{
+ rl_printf("-- register dump --\n");
+ bpf_dump_curr(r, &f[r->Pc]);
+ rl_printf("-- packet dump --\n");
+ bpf_dump_pkt(pkt, pkt_caplen, pkt_len);
+ rl_printf("(breakpoint)\n");
+ return true;
+}
+
+static int bpf_run_all(struct sock_filter *f, uint16_t bpf_len, uint8_t *pkt,
+ uint32_t pkt_caplen, uint32_t pkt_len)
+{
+ bool stop = false;
+
+ while (bpf_curr.Rs == false && stop == false) {
+ bpf_safe_regs();
+
+ if (bpf_pc_has_breakpoint(bpf_curr.Pc))
+ stop = bpf_handle_breakpoint(&bpf_curr, f, pkt,
+ pkt_caplen, pkt_len);
+
+ bpf_single_step(&bpf_curr, &f[bpf_curr.Pc], pkt, pkt_caplen,
+ pkt_len);
+ bpf_curr.Pc++;
+ }
+
+ return stop ? -1 : bpf_curr.R;
+}
+
+static int bpf_run_stepping(struct sock_filter *f, uint16_t bpf_len,
+ uint8_t *pkt, uint32_t pkt_caplen,
+ uint32_t pkt_len, int next)
+{
+ bool stop = false;
+ int i = 1;
+
+ while (bpf_curr.Rs == false && stop == false) {
+ bpf_safe_regs();
+
+ if (i++ == next)
+ stop = bpf_handle_breakpoint(&bpf_curr, f, pkt,
+ pkt_caplen, pkt_len);
+
+ bpf_single_step(&bpf_curr, &f[bpf_curr.Pc], pkt, pkt_caplen,
+ pkt_len);
+ bpf_curr.Pc++;
+ }
+
+ return stop ? -1 : bpf_curr.R;
+}
+
+static bool pcap_loaded(void)
+{
+ if (pcap_fd < 0)
+ rl_printf("no pcap file loaded!\n");
+
+ return pcap_fd >= 0;
+}
+
+static struct pcap_pkthdr *pcap_curr_pkt(void)
+{
+ return (void *) pcap_ptr_va_curr;
+}
+
+static bool pcap_next_pkt(void)
+{
+ struct pcap_pkthdr *hdr = pcap_curr_pkt();
+
+ if (pcap_ptr_va_curr + sizeof(*hdr) -
+ pcap_ptr_va_start >= pcap_map_size)
+ return false;
+ if (hdr->caplen == 0 || hdr->len == 0 || hdr->caplen > hdr->len)
+ return false;
+ if (pcap_ptr_va_curr + sizeof(*hdr) + hdr->caplen -
+ pcap_ptr_va_start >= pcap_map_size)
+ return false;
+
+ pcap_ptr_va_curr += (sizeof(*hdr) + hdr->caplen);
+ return true;
+}
+
+static void pcap_reset_pkt(void)
+{
+ pcap_ptr_va_curr = pcap_ptr_va_start + sizeof(struct pcap_filehdr);
+}
+
+static int try_load_pcap(const char *file)
+{
+ struct pcap_filehdr *hdr;
+ struct stat sb;
+ int ret;
+
+ pcap_fd = open(file, O_RDONLY);
+ if (pcap_fd < 0) {
+ rl_printf("cannot open pcap [%s]!\n", strerror(errno));
+ return CMD_ERR;
+ }
+
+ ret = fstat(pcap_fd, &sb);
+ if (ret < 0) {
+ rl_printf("cannot fstat pcap file!\n");
+ return CMD_ERR;
+ }
+
+ if (!S_ISREG(sb.st_mode)) {
+ rl_printf("not a regular pcap file, duh!\n");
+ return CMD_ERR;
+ }
+
+ pcap_map_size = sb.st_size;
+ if (pcap_map_size <= sizeof(struct pcap_filehdr)) {
+ rl_printf("pcap file too small!\n");
+ return CMD_ERR;
+ }
+
+ pcap_ptr_va_start = mmap(NULL, pcap_map_size, PROT_READ,
+ MAP_SHARED | MAP_LOCKED, pcap_fd, 0);
+ if (pcap_ptr_va_start == MAP_FAILED) {
+ rl_printf("mmap of file failed!");
+ return CMD_ERR;
+ }
+
+ hdr = (void *) pcap_ptr_va_start;
+ if (hdr->magic != TCPDUMP_MAGIC) {
+ rl_printf("wrong pcap magic!\n");
+ return CMD_ERR;
+ }
+
+ pcap_reset_pkt();
+
+ return CMD_OK;
+
+}
+
+static void try_close_pcap(void)
+{
+ if (pcap_fd >= 0) {
+ munmap(pcap_ptr_va_start, pcap_map_size);
+ close(pcap_fd);
+
+ pcap_ptr_va_start = pcap_ptr_va_curr = NULL;
+ pcap_map_size = 0;
+ pcap_packet = 0;
+ pcap_fd = -1;
+ }
+}
+
+static int cmd_load_bpf(char *bpf_string)
+{
+ char sp, *token, separator = ',';
+ unsigned short bpf_len, i = 0;
+ struct sock_filter tmp;
+
+ bpf_prog_len = 0;
+ memset(bpf_image, 0, sizeof(bpf_image));
+
+ if (sscanf(bpf_string, "%hu%c", &bpf_len, &sp) != 2 ||
+ sp != separator || bpf_len > BPF_MAXINSNS || bpf_len == 0) {
+ rl_printf("syntax error in head length encoding!\n");
+ return CMD_ERR;
+ }
+
+ token = bpf_string;
+ while ((token = strchr(token, separator)) && (++token)[0]) {
+ if (i >= bpf_len) {
+ rl_printf("program exceeds encoded length!\n");
+ return CMD_ERR;
+ }
+
+ if (sscanf(token, "%hu %hhu %hhu %u,",
+ &tmp.code, &tmp.jt, &tmp.jf, &tmp.k) != 4) {
+ rl_printf("syntax error at instruction %d!\n", i);
+ return CMD_ERR;
+ }
+
+ bpf_image[i].code = tmp.code;
+ bpf_image[i].jt = tmp.jt;
+ bpf_image[i].jf = tmp.jf;
+ bpf_image[i].k = tmp.k;
+
+ i++;
+ }
+
+ if (i != bpf_len) {
+ rl_printf("syntax error exceeding encoded length!\n");
+ return CMD_ERR;
+ } else
+ bpf_prog_len = bpf_len;
+ if (!bpf_runnable(bpf_image, bpf_prog_len))
+ bpf_prog_len = 0;
+
+ return CMD_OK;
+}
+
+static int cmd_load_pcap(char *file)
+{
+ char *file_trim, *tmp;
+
+ file_trim = strtok_r(file, " ", &tmp);
+ if (file_trim == NULL)
+ return CMD_ERR;
+
+ try_close_pcap();
+
+ return try_load_pcap(file_trim);
+}
+
+static int cmd_load(char *arg)
+{
+ char *subcmd, *cont = NULL, *tmp = strdup(arg);
+ int ret = CMD_OK;
+
+ subcmd = strtok_r(tmp, " ", &cont);
+ if (subcmd == NULL)
+ goto out;
+ if (matches(subcmd, "bpf") == 0) {
+ bpf_reset();
+ bpf_reset_breakpoints();
+
+ if (!cont)
+ ret = CMD_ERR;
+ else
+ ret = cmd_load_bpf(cont);
+ } else if (matches(subcmd, "pcap") == 0) {
+ ret = cmd_load_pcap(cont);
+ } else {
+out:
+ rl_printf("bpf <code>: load bpf code\n");
+ rl_printf("pcap <file>: load pcap file\n");
+ ret = CMD_ERR;
+ }
+
+ free(tmp);
+ return ret;
+}
+
+static int cmd_step(char *num)
+{
+ struct pcap_pkthdr *hdr;
+ int steps, ret;
+
+ if (!bpf_prog_loaded() || !pcap_loaded())
+ return CMD_ERR;
+
+ steps = strtol(num, NULL, 10);
+ if (steps == 0 || strlen(num) == 0)
+ steps = 1;
+ if (steps < 0) {
+ if (!bpf_restore_regs(steps))
+ return CMD_ERR;
+ steps = 1;
+ }
+
+ hdr = pcap_curr_pkt();
+ ret = bpf_run_stepping(bpf_image, bpf_prog_len,
+ (uint8_t *) hdr + sizeof(*hdr),
+ hdr->caplen, hdr->len, steps);
+ if (ret >= 0 || bpf_curr.Rs) {
+ bpf_reset();
+ if (!pcap_next_pkt()) {
+ rl_printf("(going back to first packet)\n");
+ pcap_reset_pkt();
+ } else {
+ rl_printf("(next packet)\n");
+ }
+ }
+
+ return CMD_OK;
+}
+
+static int cmd_select(char *num)
+{
+ unsigned int which, i;
+ bool have_next = true;
+
+ if (!pcap_loaded() || strlen(num) == 0)
+ return CMD_ERR;
+
+ which = strtoul(num, NULL, 10);
+ if (which == 0) {
+ rl_printf("packet count starts with 1, clamping!\n");
+ which = 1;
+ }
+
+ pcap_reset_pkt();
+ bpf_reset();
+
+ for (i = 0; i < which && (have_next = pcap_next_pkt()); i++)
+ /* noop */;
+ if (!have_next || pcap_curr_pkt() == NULL) {
+ rl_printf("no packet #%u available!\n", which);
+ pcap_reset_pkt();
+ return CMD_ERR;
+ }
+
+ return CMD_OK;
+}
+
+static int cmd_breakpoint(char *subcmd)
+{
+ if (!bpf_prog_loaded())
+ return CMD_ERR;
+ if (strlen(subcmd) == 0)
+ bpf_dump_breakpoints();
+ else if (matches(subcmd, "reset") == 0)
+ bpf_reset_breakpoints();
+ else {
+ unsigned int where = strtoul(subcmd, NULL, 10);
+
+ if (where < bpf_prog_len) {
+ bpf_set_breakpoints(where);
+ rl_printf("breakpoint at: ");
+ bpf_disasm(bpf_image[where], where);
+ }
+ }
+
+ return CMD_OK;
+}
+
+static int cmd_run(char *num)
+{
+ static uint32_t pass, fail;
+ bool has_limit = true;
+ int pkts = 0, i = 0;
+
+ if (!bpf_prog_loaded() || !pcap_loaded())
+ return CMD_ERR;
+
+ pkts = strtol(num, NULL, 10);
+ if (pkts == 0 || strlen(num) == 0)
+ has_limit = false;
+
+ do {
+ struct pcap_pkthdr *hdr = pcap_curr_pkt();
+ int ret = bpf_run_all(bpf_image, bpf_prog_len,
+ (uint8_t *) hdr + sizeof(*hdr),
+ hdr->caplen, hdr->len);
+ if (ret > 0)
+ pass++;
+ else if (ret == 0)
+ fail++;
+ else
+ return CMD_OK;
+ bpf_reset();
+ } while (pcap_next_pkt() && (!has_limit || (has_limit && ++i < pkts)));
+
+ rl_printf("bpf passes:%u fails:%u\n", pass, fail);
+
+ pcap_reset_pkt();
+ bpf_reset();
+
+ pass = fail = 0;
+ return CMD_OK;
+}
+
+static int cmd_disassemble(char *line_string)
+{
+ bool single_line = false;
+ unsigned long line;
+
+ if (!bpf_prog_loaded())
+ return CMD_ERR;
+ if (strlen(line_string) > 0 &&
+ (line = strtoul(line_string, NULL, 10)) < bpf_prog_len)
+ single_line = true;
+ if (single_line)
+ bpf_disasm(bpf_image[line], line);
+ else
+ bpf_disasm_all(bpf_image, bpf_prog_len);
+
+ return CMD_OK;
+}
+
+static int cmd_dump(char *dontcare)
+{
+ if (!bpf_prog_loaded())
+ return CMD_ERR;
+
+ bpf_dump_all(bpf_image, bpf_prog_len);
+
+ return CMD_OK;
+}
+
+static int cmd_quit(char *dontcare)
+{
+ return CMD_EX;
+}
+
+static const struct shell_cmd cmds[] = {
+ { .name = "load", .func = cmd_load },
+ { .name = "select", .func = cmd_select },
+ { .name = "step", .func = cmd_step },
+ { .name = "run", .func = cmd_run },
+ { .name = "breakpoint", .func = cmd_breakpoint },
+ { .name = "disassemble", .func = cmd_disassemble },
+ { .name = "dump", .func = cmd_dump },
+ { .name = "quit", .func = cmd_quit },
+};
+
+static int execf(char *arg)
+{
+ char *cmd, *cont, *tmp = strdup(arg);
+ int i, ret = 0, len;
+
+ cmd = strtok_r(tmp, " ", &cont);
+ if (cmd == NULL)
+ goto out;
+ len = strlen(cmd);
+ for (i = 0; i < array_size(cmds); i++) {
+ if (len != strlen(cmds[i].name))
+ continue;
+ if (strncmp(cmds[i].name, cmd, len) == 0) {
+ ret = cmds[i].func(cont);
+ break;
+ }
+ }
+out:
+ free(tmp);
+ return ret;
+}
+
+static char *shell_comp_gen(const char *buf, int state)
+{
+ static int list_index, len;
+
+ if (!state) {
+ list_index = 0;
+ len = strlen(buf);
+ }
+
+ for (; list_index < array_size(cmds); ) {
+ const char *name = cmds[list_index].name;
+
+ list_index++;
+ if (strncmp(name, buf, len) == 0)
+ return strdup(name);
+ }
+
+ return NULL;
+}
+
+static char **shell_completion(const char *buf, int start, int end)
+{
+ char **matches = NULL;
+
+ if (start == 0)
+ matches = rl_completion_matches(buf, shell_comp_gen);
+
+ return matches;
+}
+
+static void intr_shell(int sig)
+{
+ if (rl_end)
+ rl_kill_line(-1, 0);
+
+ rl_crlf();
+ rl_refresh_line(0, 0);
+ rl_free_line_state();
+}
+
+static void init_shell(FILE *fin, FILE *fout)
+{
+ char file[128];
+
+ snprintf(file, sizeof(file), "%s/.bpf_dbg_history", getenv("HOME"));
+ read_history(file);
+
+ rl_instream = fin;
+ rl_outstream = fout;
+
+ rl_readline_name = "bpf_dbg";
+ rl_terminal_name = getenv("TERM");
+
+ rl_catch_signals = 0;
+ rl_catch_sigwinch = 1;
+
+ rl_attempted_completion_function = shell_completion;
+
+ rl_bind_key('\t', rl_complete);
+
+ rl_bind_key_in_map('\t', rl_complete, emacs_meta_keymap);
+ rl_bind_key_in_map('\033', rl_complete, emacs_meta_keymap);
+
+ snprintf(file, sizeof(file), "%s/.bpf_dbg_init", getenv("HOME"));
+ rl_read_init_file(file);
+
+ rl_prep_terminal(0);
+ rl_set_signals();
+
+ signal(SIGINT, intr_shell);
+}
+
+static void exit_shell(FILE *fin, FILE *fout)
+{
+ char file[128];
+
+ snprintf(file, sizeof(file), "%s/.bpf_dbg_history", getenv("HOME"));
+ write_history(file);
+
+ clear_history();
+ rl_deprep_terminal();
+
+ try_close_pcap();
+
+ if (fin != stdin)
+ fclose(fin);
+ if (fout != stdout)
+ fclose(fout);
+}
+
+static int run_shell_loop(FILE *fin, FILE *fout)
+{
+ char *buf;
+
+ init_shell(fin, fout);
+
+ while ((buf = readline("> ")) != NULL) {
+ int ret = execf(buf);
+ if (ret == CMD_EX)
+ break;
+ if (ret == CMD_OK && strlen(buf) > 0)
+ add_history(buf);
+
+ free(buf);
+ }
+
+ exit_shell(fin, fout);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ FILE *fin = NULL, *fout = NULL;
+
+ if (argc >= 2)
+ fin = fopen(argv[1], "r");
+ if (argc >= 3)
+ fout = fopen(argv[2], "w");
+
+ return run_shell_loop(fin ? : stdin, fout ? : stdout);
+}
diff --git a/tools/bpf/bpf_exp.l b/tools/bpf/bpf_exp.l
new file mode 100644
index 000000000..4da8d053d
--- /dev/null
+++ b/tools/bpf/bpf_exp.l
@@ -0,0 +1,198 @@
+/*
+ * BPF asm code lexer
+ *
+ * This program is free software; you can distribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * Syntax kept close to:
+ *
+ * Steven McCanne and Van Jacobson. 1993. The BSD packet filter: a new
+ * architecture for user-level packet capture. In Proceedings of the
+ * USENIX Winter 1993 Conference Proceedings on USENIX Winter 1993
+ * Conference Proceedings (USENIX'93). USENIX Association, Berkeley,
+ * CA, USA, 2-2.
+ *
+ * Copyright 2013 Daniel Borkmann <borkmann@redhat.com>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+%{
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <linux/filter.h>
+
+#include "bpf_exp.yacc.h"
+
+extern void yyerror(const char *str);
+
+%}
+
+%option align
+%option ecs
+
+%option nounput
+%option noreject
+%option noinput
+%option noyywrap
+
+%option 8bit
+%option caseless
+%option yylineno
+
+%%
+
+"ldb" { return OP_LDB; }
+"ldh" { return OP_LDH; }
+"ld" { return OP_LD; }
+"ldi" { return OP_LDI; }
+"ldx" { return OP_LDX; }
+"ldxi" { return OP_LDXI; }
+"ldxb" { return OP_LDXB; }
+"st" { return OP_ST; }
+"stx" { return OP_STX; }
+"jmp" { return OP_JMP; }
+"ja" { return OP_JMP; }
+"jeq" { return OP_JEQ; }
+"jneq" { return OP_JNEQ; }
+"jne" { return OP_JNEQ; }
+"jlt" { return OP_JLT; }
+"jle" { return OP_JLE; }
+"jgt" { return OP_JGT; }
+"jge" { return OP_JGE; }
+"jset" { return OP_JSET; }
+"add" { return OP_ADD; }
+"sub" { return OP_SUB; }
+"mul" { return OP_MUL; }
+"div" { return OP_DIV; }
+"mod" { return OP_MOD; }
+"neg" { return OP_NEG; }
+"and" { return OP_AND; }
+"xor" { return OP_XOR; }
+"or" { return OP_OR; }
+"lsh" { return OP_LSH; }
+"rsh" { return OP_RSH; }
+"ret" { return OP_RET; }
+"tax" { return OP_TAX; }
+"txa" { return OP_TXA; }
+
+"#"?("len") { return K_PKT_LEN; }
+
+"#"?("proto") {
+ yylval.number = SKF_AD_PROTOCOL;
+ return extension;
+ }
+"#"?("type") {
+ yylval.number = SKF_AD_PKTTYPE;
+ return extension;
+ }
+"#"?("poff") {
+ yylval.number = SKF_AD_PAY_OFFSET;
+ return extension;
+ }
+"#"?("ifidx") {
+ yylval.number = SKF_AD_IFINDEX;
+ return extension;
+ }
+"#"?("nla") {
+ yylval.number = SKF_AD_NLATTR;
+ return extension;
+ }
+"#"?("nlan") {
+ yylval.number = SKF_AD_NLATTR_NEST;
+ return extension;
+ }
+"#"?("mark") {
+ yylval.number = SKF_AD_MARK;
+ return extension;
+ }
+"#"?("queue") {
+ yylval.number = SKF_AD_QUEUE;
+ return extension;
+ }
+"#"?("hatype") {
+ yylval.number = SKF_AD_HATYPE;
+ return extension;
+ }
+"#"?("rxhash") {
+ yylval.number = SKF_AD_RXHASH;
+ return extension;
+ }
+"#"?("cpu") {
+ yylval.number = SKF_AD_CPU;
+ return extension;
+ }
+"#"?("vlan_tci") {
+ yylval.number = SKF_AD_VLAN_TAG;
+ return extension;
+ }
+"#"?("vlan_pr") {
+ yylval.number = SKF_AD_VLAN_TAG_PRESENT;
+ return extension;
+ }
+"#"?("vlan_avail") {
+ yylval.number = SKF_AD_VLAN_TAG_PRESENT;
+ return extension;
+ }
+"#"?("vlan_tpid") {
+ yylval.number = SKF_AD_VLAN_TPID;
+ return extension;
+ }
+"#"?("rand") {
+ yylval.number = SKF_AD_RANDOM;
+ return extension;
+ }
+
+":" { return ':'; }
+"," { return ','; }
+"#" { return '#'; }
+"%" { return '%'; }
+"[" { return '['; }
+"]" { return ']'; }
+"(" { return '('; }
+")" { return ')'; }
+"x" { return 'x'; }
+"a" { return 'a'; }
+"+" { return '+'; }
+"M" { return 'M'; }
+"*" { return '*'; }
+"&" { return '&'; }
+
+([0][x][a-fA-F0-9]+) {
+ yylval.number = strtoul(yytext, NULL, 16);
+ return number;
+ }
+([0][b][0-1]+) {
+ yylval.number = strtol(yytext + 2, NULL, 2);
+ return number;
+ }
+(([0])|([-+]?[1-9][0-9]*)) {
+ yylval.number = strtol(yytext, NULL, 10);
+ return number;
+ }
+([0][0-7]+) {
+ yylval.number = strtol(yytext + 1, NULL, 8);
+ return number;
+ }
+[a-zA-Z_][a-zA-Z0-9_]+ {
+ yylval.label = strdup(yytext);
+ return label;
+ }
+
+"/*"([^\*]|\*[^/])*"*/" { /* NOP */ }
+";"[^\n]* { /* NOP */ }
+^#.* { /* NOP */ }
+[ \t]+ { /* NOP */ }
+[ \n]+ { /* NOP */ }
+
+. {
+ printf("unknown character \'%s\'", yytext);
+ yyerror("lex unknown character");
+ }
+
+%%
diff --git a/tools/bpf/bpf_exp.y b/tools/bpf/bpf_exp.y
new file mode 100644
index 000000000..56ba1de50
--- /dev/null
+++ b/tools/bpf/bpf_exp.y
@@ -0,0 +1,656 @@
+/*
+ * BPF asm code parser
+ *
+ * This program is free software; you can distribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * Syntax kept close to:
+ *
+ * Steven McCanne and Van Jacobson. 1993. The BSD packet filter: a new
+ * architecture for user-level packet capture. In Proceedings of the
+ * USENIX Winter 1993 Conference Proceedings on USENIX Winter 1993
+ * Conference Proceedings (USENIX'93). USENIX Association, Berkeley,
+ * CA, USA, 2-2.
+ *
+ * Copyright 2013 Daniel Borkmann <borkmann@redhat.com>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+%{
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include <linux/filter.h>
+
+#include "bpf_exp.yacc.h"
+
+enum jmp_type { JTL, JFL, JKL };
+
+extern FILE *yyin;
+extern int yylineno;
+extern int yylex(void);
+extern void yyerror(const char *str);
+
+extern void bpf_asm_compile(FILE *fp, bool cstyle);
+static void bpf_set_curr_instr(uint16_t op, uint8_t jt, uint8_t jf, uint32_t k);
+static void bpf_set_curr_label(char *label);
+static void bpf_set_jmp_label(char *label, enum jmp_type type);
+
+%}
+
+%union {
+ char *label;
+ uint32_t number;
+}
+
+%token OP_LDB OP_LDH OP_LD OP_LDX OP_ST OP_STX OP_JMP OP_JEQ OP_JGT OP_JGE
+%token OP_JSET OP_ADD OP_SUB OP_MUL OP_DIV OP_AND OP_OR OP_XOR OP_LSH OP_RSH
+%token OP_RET OP_TAX OP_TXA OP_LDXB OP_MOD OP_NEG OP_JNEQ OP_JLT OP_JLE OP_LDI
+%token OP_LDXI
+
+%token K_PKT_LEN
+
+%token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%'
+
+%token extension number label
+
+%type <label> label
+%type <number> extension
+%type <number> number
+
+%%
+
+prog
+ : line
+ | prog line
+ ;
+
+line
+ : instr
+ | labelled_instr
+ ;
+
+labelled_instr
+ : labelled instr
+ ;
+
+instr
+ : ldb
+ | ldh
+ | ld
+ | ldi
+ | ldx
+ | ldxi
+ | st
+ | stx
+ | jmp
+ | jeq
+ | jneq
+ | jlt
+ | jle
+ | jgt
+ | jge
+ | jset
+ | add
+ | sub
+ | mul
+ | div
+ | mod
+ | neg
+ | and
+ | or
+ | xor
+ | lsh
+ | rsh
+ | ret
+ | tax
+ | txa
+ ;
+
+labelled
+ : label ':' { bpf_set_curr_label($1); }
+ ;
+
+ldb
+ : OP_LDB '[' 'x' '+' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_IND, 0, 0, $5); }
+ | OP_LDB '[' '%' 'x' '+' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_IND, 0, 0, $6); }
+ | OP_LDB '[' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, $3); }
+ | OP_LDB extension {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + $2); }
+ ;
+
+ldh
+ : OP_LDH '[' 'x' '+' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_IND, 0, 0, $5); }
+ | OP_LDH '[' '%' 'x' '+' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_IND, 0, 0, $6); }
+ | OP_LDH '[' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, $3); }
+ | OP_LDH extension {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + $2); }
+ ;
+
+ldi
+ : OP_LDI '#' number {
+ bpf_set_curr_instr(BPF_LD | BPF_IMM, 0, 0, $3); }
+ | OP_LDI number {
+ bpf_set_curr_instr(BPF_LD | BPF_IMM, 0, 0, $2); }
+ ;
+
+ld
+ : OP_LD '#' number {
+ bpf_set_curr_instr(BPF_LD | BPF_IMM, 0, 0, $3); }
+ | OP_LD K_PKT_LEN {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_LEN, 0, 0, 0); }
+ | OP_LD extension {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + $2); }
+ | OP_LD 'M' '[' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); }
+ | OP_LD '[' 'x' '+' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_IND, 0, 0, $5); }
+ | OP_LD '[' '%' 'x' '+' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_IND, 0, 0, $6); }
+ | OP_LD '[' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, $3); }
+ ;
+
+ldxi
+ : OP_LDXI '#' number {
+ bpf_set_curr_instr(BPF_LDX | BPF_IMM, 0, 0, $3); }
+ | OP_LDXI number {
+ bpf_set_curr_instr(BPF_LDX | BPF_IMM, 0, 0, $2); }
+ ;
+
+ldx
+ : OP_LDX '#' number {
+ bpf_set_curr_instr(BPF_LDX | BPF_IMM, 0, 0, $3); }
+ | OP_LDX K_PKT_LEN {
+ bpf_set_curr_instr(BPF_LDX | BPF_W | BPF_LEN, 0, 0, 0); }
+ | OP_LDX 'M' '[' number ']' {
+ bpf_set_curr_instr(BPF_LDX | BPF_MEM, 0, 0, $4); }
+ | OP_LDXB number '*' '(' '[' number ']' '&' number ')' {
+ if ($2 != 4 || $9 != 0xf) {
+ fprintf(stderr, "ldxb offset not supported!\n");
+ exit(0);
+ } else {
+ bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
+ | OP_LDX number '*' '(' '[' number ']' '&' number ')' {
+ if ($2 != 4 || $9 != 0xf) {
+ fprintf(stderr, "ldxb offset not supported!\n");
+ exit(0);
+ } else {
+ bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
+ ;
+
+st
+ : OP_ST 'M' '[' number ']' {
+ bpf_set_curr_instr(BPF_ST, 0, 0, $4); }
+ ;
+
+stx
+ : OP_STX 'M' '[' number ']' {
+ bpf_set_curr_instr(BPF_STX, 0, 0, $4); }
+ ;
+
+jmp
+ : OP_JMP label {
+ bpf_set_jmp_label($2, JKL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JA, 0, 0, 0); }
+ ;
+
+jeq
+ : OP_JEQ '#' number ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, $3); }
+ | OP_JEQ 'x' ',' label ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_jmp_label($6, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 0); }
+ | OP_JEQ '%' 'x' ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 0); }
+ | OP_JEQ '#' number ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, $3); }
+ | OP_JEQ 'x' ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 0); }
+ | OP_JEQ '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 0); }
+ ;
+
+jneq
+ : OP_JNEQ '#' number ',' label {
+ bpf_set_jmp_label($5, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, $3); }
+ | OP_JNEQ 'x' ',' label {
+ bpf_set_jmp_label($4, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 0); }
+ | OP_JNEQ '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 0); }
+ ;
+
+jlt
+ : OP_JLT '#' number ',' label {
+ bpf_set_jmp_label($5, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_K, 0, 0, $3); }
+ | OP_JLT 'x' ',' label {
+ bpf_set_jmp_label($4, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 0); }
+ | OP_JLT '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 0); }
+ ;
+
+jle
+ : OP_JLE '#' number ',' label {
+ bpf_set_jmp_label($5, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_K, 0, 0, $3); }
+ | OP_JLE 'x' ',' label {
+ bpf_set_jmp_label($4, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 0); }
+ | OP_JLE '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 0); }
+ ;
+
+jgt
+ : OP_JGT '#' number ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_K, 0, 0, $3); }
+ | OP_JGT 'x' ',' label ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_jmp_label($6, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 0); }
+ | OP_JGT '%' 'x' ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 0); }
+ | OP_JGT '#' number ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_K, 0, 0, $3); }
+ | OP_JGT 'x' ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 0); }
+ | OP_JGT '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 0); }
+ ;
+
+jge
+ : OP_JGE '#' number ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_K, 0, 0, $3); }
+ | OP_JGE 'x' ',' label ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_jmp_label($6, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 0); }
+ | OP_JGE '%' 'x' ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 0); }
+ | OP_JGE '#' number ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_K, 0, 0, $3); }
+ | OP_JGE 'x' ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 0); }
+ | OP_JGE '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 0); }
+ ;
+
+jset
+ : OP_JSET '#' number ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JSET | BPF_K, 0, 0, $3); }
+ | OP_JSET 'x' ',' label ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_jmp_label($6, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JSET | BPF_X, 0, 0, 0); }
+ | OP_JSET '%' 'x' ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JSET | BPF_X, 0, 0, 0); }
+ | OP_JSET '#' number ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JSET | BPF_K, 0, 0, $3); }
+ | OP_JSET 'x' ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JSET | BPF_X, 0, 0, 0); }
+ | OP_JSET '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JSET | BPF_X, 0, 0, 0); }
+ ;
+
+add
+ : OP_ADD '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_ADD | BPF_K, 0, 0, $3); }
+ | OP_ADD 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_ADD | BPF_X, 0, 0, 0); }
+ | OP_ADD '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_ADD | BPF_X, 0, 0, 0); }
+ ;
+
+sub
+ : OP_SUB '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_SUB | BPF_K, 0, 0, $3); }
+ | OP_SUB 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_SUB | BPF_X, 0, 0, 0); }
+ | OP_SUB '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_SUB | BPF_X, 0, 0, 0); }
+ ;
+
+mul
+ : OP_MUL '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_MUL | BPF_K, 0, 0, $3); }
+ | OP_MUL 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_MUL | BPF_X, 0, 0, 0); }
+ | OP_MUL '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_MUL | BPF_X, 0, 0, 0); }
+ ;
+
+div
+ : OP_DIV '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_DIV | BPF_K, 0, 0, $3); }
+ | OP_DIV 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_DIV | BPF_X, 0, 0, 0); }
+ | OP_DIV '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_DIV | BPF_X, 0, 0, 0); }
+ ;
+
+mod
+ : OP_MOD '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_MOD | BPF_K, 0, 0, $3); }
+ | OP_MOD 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_MOD | BPF_X, 0, 0, 0); }
+ | OP_MOD '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_MOD | BPF_X, 0, 0, 0); }
+ ;
+
+neg
+ : OP_NEG {
+ bpf_set_curr_instr(BPF_ALU | BPF_NEG, 0, 0, 0); }
+ ;
+
+and
+ : OP_AND '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_AND | BPF_K, 0, 0, $3); }
+ | OP_AND 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_AND | BPF_X, 0, 0, 0); }
+ | OP_AND '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_AND | BPF_X, 0, 0, 0); }
+ ;
+
+or
+ : OP_OR '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_OR | BPF_K, 0, 0, $3); }
+ | OP_OR 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_OR | BPF_X, 0, 0, 0); }
+ | OP_OR '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_OR | BPF_X, 0, 0, 0); }
+ ;
+
+xor
+ : OP_XOR '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_XOR | BPF_K, 0, 0, $3); }
+ | OP_XOR 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_XOR | BPF_X, 0, 0, 0); }
+ | OP_XOR '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_XOR | BPF_X, 0, 0, 0); }
+ ;
+
+lsh
+ : OP_LSH '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_LSH | BPF_K, 0, 0, $3); }
+ | OP_LSH 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_LSH | BPF_X, 0, 0, 0); }
+ | OP_LSH '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_LSH | BPF_X, 0, 0, 0); }
+ ;
+
+rsh
+ : OP_RSH '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_RSH | BPF_K, 0, 0, $3); }
+ | OP_RSH 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_RSH | BPF_X, 0, 0, 0); }
+ | OP_RSH '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_RSH | BPF_X, 0, 0, 0); }
+ ;
+
+ret
+ : OP_RET 'a' {
+ bpf_set_curr_instr(BPF_RET | BPF_A, 0, 0, 0); }
+ | OP_RET '%' 'a' {
+ bpf_set_curr_instr(BPF_RET | BPF_A, 0, 0, 0); }
+ | OP_RET 'x' {
+ bpf_set_curr_instr(BPF_RET | BPF_X, 0, 0, 0); }
+ | OP_RET '%' 'x' {
+ bpf_set_curr_instr(BPF_RET | BPF_X, 0, 0, 0); }
+ | OP_RET '#' number {
+ bpf_set_curr_instr(BPF_RET | BPF_K, 0, 0, $3); }
+ ;
+
+tax
+ : OP_TAX {
+ bpf_set_curr_instr(BPF_MISC | BPF_TAX, 0, 0, 0); }
+ ;
+
+txa
+ : OP_TXA {
+ bpf_set_curr_instr(BPF_MISC | BPF_TXA, 0, 0, 0); }
+ ;
+
+%%
+
+static int curr_instr = 0;
+static struct sock_filter out[BPF_MAXINSNS];
+static char **labels, **labels_jt, **labels_jf, **labels_k;
+
+static void bpf_assert_max(void)
+{
+ if (curr_instr >= BPF_MAXINSNS) {
+ fprintf(stderr, "only max %u insns allowed!\n", BPF_MAXINSNS);
+ exit(0);
+ }
+}
+
+static void bpf_set_curr_instr(uint16_t code, uint8_t jt, uint8_t jf,
+ uint32_t k)
+{
+ bpf_assert_max();
+ out[curr_instr].code = code;
+ out[curr_instr].jt = jt;
+ out[curr_instr].jf = jf;
+ out[curr_instr].k = k;
+ curr_instr++;
+}
+
+static void bpf_set_curr_label(char *label)
+{
+ bpf_assert_max();
+ labels[curr_instr] = label;
+}
+
+static void bpf_set_jmp_label(char *label, enum jmp_type type)
+{
+ bpf_assert_max();
+ switch (type) {
+ case JTL:
+ labels_jt[curr_instr] = label;
+ break;
+ case JFL:
+ labels_jf[curr_instr] = label;
+ break;
+ case JKL:
+ labels_k[curr_instr] = label;
+ break;
+ }
+}
+
+static int bpf_find_insns_offset(const char *label)
+{
+ int i, max = curr_instr, ret = -ENOENT;
+
+ for (i = 0; i < max; i++) {
+ if (labels[i] && !strcmp(label, labels[i])) {
+ ret = i;
+ break;
+ }
+ }
+
+ if (ret == -ENOENT) {
+ fprintf(stderr, "no such label \'%s\'!\n", label);
+ exit(0);
+ }
+
+ return ret;
+}
+
+static void bpf_stage_1_insert_insns(void)
+{
+ yyparse();
+}
+
+static void bpf_reduce_k_jumps(void)
+{
+ int i;
+
+ for (i = 0; i < curr_instr; i++) {
+ if (labels_k[i]) {
+ int off = bpf_find_insns_offset(labels_k[i]);
+ out[i].k = (uint32_t) (off - i - 1);
+ }
+ }
+}
+
+static void bpf_reduce_jt_jumps(void)
+{
+ int i;
+
+ for (i = 0; i < curr_instr; i++) {
+ if (labels_jt[i]) {
+ int off = bpf_find_insns_offset(labels_jt[i]);
+ out[i].jt = (uint8_t) (off - i -1);
+ }
+ }
+}
+
+static void bpf_reduce_jf_jumps(void)
+{
+ int i;
+
+ for (i = 0; i < curr_instr; i++) {
+ if (labels_jf[i]) {
+ int off = bpf_find_insns_offset(labels_jf[i]);
+ out[i].jf = (uint8_t) (off - i - 1);
+ }
+ }
+}
+
+static void bpf_stage_2_reduce_labels(void)
+{
+ bpf_reduce_k_jumps();
+ bpf_reduce_jt_jumps();
+ bpf_reduce_jf_jumps();
+}
+
+static void bpf_pretty_print_c(void)
+{
+ int i;
+
+ for (i = 0; i < curr_instr; i++)
+ printf("{ %#04x, %2u, %2u, %#010x },\n", out[i].code,
+ out[i].jt, out[i].jf, out[i].k);
+}
+
+static void bpf_pretty_print(void)
+{
+ int i;
+
+ printf("%u,", curr_instr);
+ for (i = 0; i < curr_instr; i++)
+ printf("%u %u %u %u,", out[i].code,
+ out[i].jt, out[i].jf, out[i].k);
+ printf("\n");
+}
+
+static void bpf_init(void)
+{
+ memset(out, 0, sizeof(out));
+
+ labels = calloc(BPF_MAXINSNS, sizeof(*labels));
+ assert(labels);
+ labels_jt = calloc(BPF_MAXINSNS, sizeof(*labels_jt));
+ assert(labels_jt);
+ labels_jf = calloc(BPF_MAXINSNS, sizeof(*labels_jf));
+ assert(labels_jf);
+ labels_k = calloc(BPF_MAXINSNS, sizeof(*labels_k));
+ assert(labels_k);
+}
+
+static void bpf_destroy_labels(void)
+{
+ int i;
+
+ for (i = 0; i < curr_instr; i++) {
+ free(labels_jf[i]);
+ free(labels_jt[i]);
+ free(labels_k[i]);
+ free(labels[i]);
+ }
+}
+
+static void bpf_destroy(void)
+{
+ bpf_destroy_labels();
+ free(labels_jt);
+ free(labels_jf);
+ free(labels_k);
+ free(labels);
+}
+
+void bpf_asm_compile(FILE *fp, bool cstyle)
+{
+ yyin = fp;
+
+ bpf_init();
+ bpf_stage_1_insert_insns();
+ bpf_stage_2_reduce_labels();
+ bpf_destroy();
+
+ if (cstyle)
+ bpf_pretty_print_c();
+ else
+ bpf_pretty_print();
+
+ if (fp != stdin)
+ fclose(yyin);
+}
+
+void yyerror(const char *str)
+{
+ fprintf(stderr, "error: %s at line %d\n", str, yylineno);
+ exit(1);
+}
diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c
new file mode 100644
index 000000000..58c2bab4e
--- /dev/null
+++ b/tools/bpf/bpf_jit_disasm.c
@@ -0,0 +1,329 @@
+/*
+ * Minimal BPF JIT image disassembler
+ *
+ * Disassembles BPF JIT compiler emitted opcodes back to asm insn's for
+ * debugging or verification purposes.
+ *
+ * To get the disassembly of the JIT code, do the following:
+ *
+ * 1) `echo 2 > /proc/sys/net/core/bpf_jit_enable`
+ * 2) Load a BPF filter (e.g. `tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24`)
+ * 3) Run e.g. `bpf_jit_disasm -o` to read out the last JIT code
+ *
+ * Copyright 2013 Daniel Borkmann <borkmann@redhat.com>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+#include <bfd.h>
+#include <dis-asm.h>
+#include <regex.h>
+#include <fcntl.h>
+#include <sys/klog.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <limits.h>
+
+#define CMD_ACTION_SIZE_BUFFER 10
+#define CMD_ACTION_READ_ALL 3
+
+static void get_exec_path(char *tpath, size_t size)
+{
+ char *path;
+ ssize_t len;
+
+ snprintf(tpath, size, "/proc/%d/exe", (int) getpid());
+ tpath[size - 1] = 0;
+
+ path = strdup(tpath);
+ assert(path);
+
+ len = readlink(path, tpath, size);
+ tpath[len] = 0;
+
+ free(path);
+}
+
+static void get_asm_insns(uint8_t *image, size_t len, int opcodes)
+{
+ int count, i, pc = 0;
+ char tpath[PATH_MAX];
+ struct disassemble_info info;
+ disassembler_ftype disassemble;
+ bfd *bfdf;
+
+ memset(tpath, 0, sizeof(tpath));
+ get_exec_path(tpath, sizeof(tpath));
+
+ bfdf = bfd_openr(tpath, NULL);
+ assert(bfdf);
+ assert(bfd_check_format(bfdf, bfd_object));
+
+ init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf);
+ info.arch = bfd_get_arch(bfdf);
+ info.mach = bfd_get_mach(bfdf);
+ info.buffer = image;
+ info.buffer_length = len;
+
+ disassemble_init_for_target(&info);
+
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+ disassemble = disassembler(info.arch,
+ bfd_big_endian(bfdf),
+ info.mach,
+ bfdf);
+#else
+ disassemble = disassembler(bfdf);
+#endif
+ assert(disassemble);
+
+ do {
+ printf("%4x:\t", pc);
+
+ count = disassemble(pc, &info);
+
+ if (opcodes) {
+ printf("\n\t");
+ for (i = 0; i < count; ++i)
+ printf("%02x ", (uint8_t) image[pc + i]);
+ }
+ printf("\n");
+
+ pc += count;
+ } while(count > 0 && pc < len);
+
+ bfd_close(bfdf);
+}
+
+static char *get_klog_buff(unsigned int *klen)
+{
+ int ret, len;
+ char *buff;
+
+ len = klogctl(CMD_ACTION_SIZE_BUFFER, NULL, 0);
+ if (len < 0)
+ return NULL;
+
+ buff = malloc(len);
+ if (!buff)
+ return NULL;
+
+ ret = klogctl(CMD_ACTION_READ_ALL, buff, len);
+ if (ret < 0) {
+ free(buff);
+ return NULL;
+ }
+
+ *klen = ret;
+ return buff;
+}
+
+static char *get_flog_buff(const char *file, unsigned int *klen)
+{
+ int fd, ret, len;
+ struct stat fi;
+ char *buff;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+
+ ret = fstat(fd, &fi);
+ if (ret < 0 || !S_ISREG(fi.st_mode))
+ goto out;
+
+ len = fi.st_size + 1;
+ buff = malloc(len);
+ if (!buff)
+ goto out;
+
+ memset(buff, 0, len);
+ ret = read(fd, buff, len - 1);
+ if (ret <= 0)
+ goto out_free;
+
+ close(fd);
+ *klen = ret;
+ return buff;
+out_free:
+ free(buff);
+out:
+ close(fd);
+ return NULL;
+}
+
+static char *get_log_buff(const char *file, unsigned int *klen)
+{
+ return file ? get_flog_buff(file, klen) : get_klog_buff(klen);
+}
+
+static void put_log_buff(char *buff)
+{
+ free(buff);
+}
+
+static uint8_t *get_last_jit_image(char *haystack, size_t hlen,
+ unsigned int *ilen)
+{
+ char *ptr, *pptr, *tmp;
+ off_t off = 0;
+ unsigned int proglen;
+ int ret, flen, pass, ulen = 0;
+ regmatch_t pmatch[1];
+ unsigned long base;
+ regex_t regex;
+ uint8_t *image;
+
+ if (hlen == 0)
+ return NULL;
+
+ ret = regcomp(&regex, "flen=[[:alnum:]]+ proglen=[[:digit:]]+ "
+ "pass=[[:digit:]]+ image=[[:xdigit:]]+", REG_EXTENDED);
+ assert(ret == 0);
+
+ ptr = haystack;
+ memset(pmatch, 0, sizeof(pmatch));
+
+ while (1) {
+ ret = regexec(&regex, ptr, 1, pmatch, 0);
+ if (ret == 0) {
+ ptr += pmatch[0].rm_eo;
+ off += pmatch[0].rm_eo;
+ assert(off < hlen);
+ } else
+ break;
+ }
+
+ ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so);
+ ret = sscanf(ptr, "flen=%d proglen=%u pass=%d image=%lx",
+ &flen, &proglen, &pass, &base);
+ if (ret != 4) {
+ regfree(&regex);
+ return NULL;
+ }
+ if (proglen > 1000000) {
+ printf("proglen of %d too big, stopping\n", proglen);
+ return NULL;
+ }
+
+ image = malloc(proglen);
+ if (!image) {
+ printf("Out of memory\n");
+ return NULL;
+ }
+ memset(image, 0, proglen);
+
+ tmp = ptr = haystack + off;
+ while ((ptr = strtok(tmp, "\n")) != NULL && ulen < proglen) {
+ tmp = NULL;
+ if (!strstr(ptr, "JIT code"))
+ continue;
+ pptr = ptr;
+ while ((ptr = strstr(pptr, ":")))
+ pptr = ptr + 1;
+ ptr = pptr;
+ do {
+ image[ulen++] = (uint8_t) strtoul(pptr, &pptr, 16);
+ if (ptr == pptr) {
+ ulen--;
+ break;
+ }
+ if (ulen >= proglen)
+ break;
+ ptr = pptr;
+ } while (1);
+ }
+
+ assert(ulen == proglen);
+ printf("%u bytes emitted from JIT compiler (pass:%d, flen:%d)\n",
+ proglen, pass, flen);
+ printf("%lx + <x>:\n", base);
+
+ regfree(&regex);
+ *ilen = ulen;
+ return image;
+}
+
+static void usage(void)
+{
+ printf("Usage: bpf_jit_disasm [...]\n");
+ printf(" -o Also display related opcodes (default: off).\n");
+ printf(" -O <file> Write binary image of code to file, don't disassemble to stdout.\n");
+ printf(" -f <file> Read last image dump from file or stdin (default: klog).\n");
+ printf(" -h Display this help.\n");
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int len, klen, opt, opcodes = 0;
+ char *kbuff, *file = NULL;
+ char *ofile = NULL;
+ int ofd;
+ ssize_t nr;
+ uint8_t *pos;
+ uint8_t *image = NULL;
+
+ while ((opt = getopt(argc, argv, "of:O:")) != -1) {
+ switch (opt) {
+ case 'o':
+ opcodes = 1;
+ break;
+ case 'O':
+ ofile = optarg;
+ break;
+ case 'f':
+ file = optarg;
+ break;
+ default:
+ usage();
+ return -1;
+ }
+ }
+
+ bfd_init();
+
+ kbuff = get_log_buff(file, &klen);
+ if (!kbuff) {
+ fprintf(stderr, "Could not retrieve log buffer!\n");
+ return -1;
+ }
+
+ image = get_last_jit_image(kbuff, klen, &len);
+ if (!image) {
+ fprintf(stderr, "No JIT image found!\n");
+ goto done;
+ }
+ if (!ofile) {
+ get_asm_insns(image, len, opcodes);
+ goto done;
+ }
+
+ ofd = open(ofile, O_WRONLY | O_CREAT | O_TRUNC, DEFFILEMODE);
+ if (ofd < 0) {
+ fprintf(stderr, "Could not open file %s for writing: ", ofile);
+ perror(NULL);
+ goto done;
+ }
+ pos = image;
+ do {
+ nr = write(ofd, pos, len);
+ if (nr < 0) {
+ fprintf(stderr, "Could not write data to %s: ", ofile);
+ perror(NULL);
+ goto done;
+ }
+ len -= nr;
+ pos += nr;
+ } while (len);
+ close(ofd);
+
+done:
+ put_log_buff(kbuff);
+ free(image);
+ return 0;
+}
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
new file mode 100644
index 000000000..8248b8dd8
--- /dev/null
+++ b/tools/bpf/bpftool/.gitignore
@@ -0,0 +1,5 @@
+*.d
+/bpftool
+bpftool*.8
+bpf-helpers.*
+FEATURE-DUMP.bpftool
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
new file mode 100644
index 000000000..f7663a3e6
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -0,0 +1,52 @@
+include ../../../scripts/Makefile.include
+include ../../../scripts/utilities.mak
+
+INSTALL ?= install
+RM ?= rm -f
+RMDIR ?= rmdir --ignore-fail-on-non-empty
+
+ifeq ($(V),1)
+ Q =
+else
+ Q = @
+endif
+
+prefix ?= /usr/local
+mandir ?= $(prefix)/man
+man8dir = $(mandir)/man8
+
+# Load targets for building eBPF helpers man page.
+include ../../Makefile.helpers
+
+MAN8_RST = $(filter-out $(HELPERS_RST),$(wildcard *.rst))
+
+_DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
+DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
+
+man: man8 helpers
+man8: $(DOC_MAN8)
+
+RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+
+$(OUTPUT)%.8: %.rst
+ifndef RST2MAN_DEP
+ $(error "rst2man not found, but required to generate man pages")
+endif
+ $(QUIET_GEN)rst2man $< > $@
+
+clean: helpers-clean
+ $(call QUIET_CLEAN, Documentation)
+ $(Q)$(RM) $(DOC_MAN8)
+
+install: man helpers-install
+ $(call QUIET_INSTALL, Documentation-man)
+ $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
+ $(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir)
+
+uninstall: helpers-uninstall
+ $(call QUIET_UNINST, Documentation-man)
+ $(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8))
+ $(Q)$(RMDIR) $(DESTDIR)$(man8dir)
+
+.PHONY: man man8 clean install uninstall
+.DEFAULT_GOAL := man
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
new file mode 100644
index 000000000..edbe81534
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -0,0 +1,140 @@
+================
+bpftool-cgroup
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF progs
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **cgroup** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+
+ *COMMANDS* :=
+ { **show** | **list** | **tree** | **attach** | **detach** | **help** }
+
+MAP COMMANDS
+=============
+
+| **bpftool** **cgroup { show | list }** *CGROUP*
+| **bpftool** **cgroup tree** [*CGROUP_ROOT*]
+| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
+| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
+| **bpftool** **cgroup help**
+|
+| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
+| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
+| **sendmsg4** | **sendmsg6** }
+| *ATTACH_FLAGS* := { **multi** | **override** }
+
+DESCRIPTION
+===========
+ **bpftool cgroup { show | list }** *CGROUP*
+ List all programs attached to the cgroup *CGROUP*.
+
+ Output will start with program ID followed by attach type,
+ attach flags and program name.
+
+ **bpftool cgroup tree** [*CGROUP_ROOT*]
+ Iterate over all cgroups in *CGROUP_ROOT* and list all
+ attached programs. If *CGROUP_ROOT* is not specified,
+ bpftool uses cgroup v2 mountpoint.
+
+ The output is similar to the output of cgroup show/list
+ commands: it starts with absolute cgroup path, followed by
+ program ID, attach type, attach flags and program name.
+
+ **bpftool cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
+ Attach program *PROG* to the cgroup *CGROUP* with attach type
+ *ATTACH_TYPE* and optional *ATTACH_FLAGS*.
+
+ *ATTACH_FLAGS* can be one of: **override** if a sub-cgroup installs
+ some bpf program, the program in this cgroup yields to sub-cgroup
+ program; **multi** if a sub-cgroup installs some bpf program,
+ that cgroup program gets run in addition to the program in this
+ cgroup.
+
+ Only one program is allowed to be attached to a cgroup with
+ no attach flags or the **override** flag. Attaching another
+ program will release old program and attach the new one.
+
+ Multiple programs are allowed to be attached to a cgroup with
+ **multi**. They are executed in FIFO order (those that were
+ attached first, run first).
+
+ Non-default *ATTACH_FLAGS* are supported by kernel version 4.14
+ and later.
+
+ *ATTACH_TYPE* can be on of:
+ **ingress** ingress path of the inet socket (since 4.10);
+ **egress** egress path of the inet socket (since 4.10);
+ **sock_create** opening of an inet socket (since 4.10);
+ **sock_ops** various socket operations (since 4.12);
+ **device** device access (since 4.15);
+ **bind4** call to bind(2) for an inet4 socket (since 4.17);
+ **bind6** call to bind(2) for an inet6 socket (since 4.17);
+ **post_bind4** return from bind(2) for an inet4 socket (since 4.17);
+ **post_bind6** return from bind(2) for an inet6 socket (since 4.17);
+ **connect4** call to connect(2) for an inet4 socket (since 4.17);
+ **connect6** call to connect(2) for an inet6 socket (since 4.17);
+ **sendmsg4** call to sendto(2), sendmsg(2), sendmmsg(2) for an
+ unconnected udp4 socket (since 4.18);
+ **sendmsg6** call to sendto(2), sendmsg(2), sendmmsg(2) for an
+ unconnected udp6 socket (since 4.18).
+
+ **bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
+ Detach *PROG* from the cgroup *CGROUP* and attach type
+ *ATTACH_TYPE*.
+
+ **bpftool prog help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -v, --version
+ Print version number (similar to **bpftool version**).
+
+ -j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+ -p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+ -f, --bpffs
+ Show file names of pinned programs.
+
+EXAMPLES
+========
+|
+| **# mount -t bpf none /sys/fs/bpf/**
+| **# mkdir /sys/fs/cgroup/test.slice**
+| **# bpftool prog load ./device_cgroup.o /sys/fs/bpf/prog**
+| **# bpftool cgroup attach /sys/fs/cgroup/test.slice/ device id 1 allow_multi**
+
+**# bpftool cgroup list /sys/fs/cgroup/test.slice/**
+
+::
+
+ ID AttachType AttachFlags Name
+ 1 device allow_multi bpf_prog1
+
+|
+| **# bpftool cgroup detach /sys/fs/cgroup/test.slice/ device id 1**
+| **# bpftool cgroup list /sys/fs/cgroup/test.slice/**
+
+::
+
+ ID AttachType AttachFlags Name
+
+SEE ALSO
+========
+ **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
new file mode 100644
index 000000000..a6258bc8e
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -0,0 +1,161 @@
+================
+bpftool-map
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF maps
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **map** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+
+ *COMMANDS* :=
+ { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
+ | **pin** | **help** }
+
+MAP COMMANDS
+=============
+
+| **bpftool** **map { show | list }** [*MAP*]
+| **bpftool** **map dump** *MAP*
+| **bpftool** **map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*]
+| **bpftool** **map lookup** *MAP* **key** *DATA*
+| **bpftool** **map getnext** *MAP* [**key** *DATA*]
+| **bpftool** **map delete** *MAP* **key** *DATA*
+| **bpftool** **map pin** *MAP* *FILE*
+| **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
+| **bpftool** **map help**
+|
+| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
+| *DATA* := { [**hex**] *BYTES* }
+| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+| *VALUE* := { *DATA* | *MAP* | *PROG* }
+| *UPDATE_FLAGS* := { **any** | **exist** | **noexist** }
+
+DESCRIPTION
+===========
+ **bpftool map { show | list }** [*MAP*]
+ Show information about loaded maps. If *MAP* is specified
+ show information only about given map, otherwise list all
+ maps currently loaded on the system.
+
+ Output will start with map ID followed by map type and
+ zero or more named attributes (depending on kernel version).
+
+ **bpftool map dump** *MAP*
+ Dump all entries in a given *MAP*.
+
+ **bpftool map update** *MAP* **key** *DATA* **value** *VALUE* [*UPDATE_FLAGS*]
+ Update map entry for a given *KEY*.
+
+ *UPDATE_FLAGS* can be one of: **any** update existing entry
+ or add if doesn't exit; **exist** update only if entry already
+ exists; **noexist** update only if entry doesn't exist.
+
+ If the **hex** keyword is provided in front of the bytes
+ sequence, the bytes are parsed as hexadeximal values, even if
+ no "0x" prefix is added. If the keyword is not provided, then
+ the bytes are parsed as decimal values, unless a "0x" prefix
+ (for hexadecimal) or a "0" prefix (for octal) is provided.
+
+ **bpftool map lookup** *MAP* **key** *DATA*
+ Lookup **key** in the map.
+
+ **bpftool map getnext** *MAP* [**key** *DATA*]
+ Get next key. If *key* is not specified, get first key.
+
+ **bpftool map delete** *MAP* **key** *DATA*
+ Remove entry from the map.
+
+ **bpftool map pin** *MAP* *FILE*
+ Pin map *MAP* as *FILE*.
+
+ Note: *FILE* must be located in *bpffs* mount.
+
+ **bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
+ Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
+
+ Install perf rings into a perf event array map and dump
+ output of any bpf_perf_event_output() call in the kernel.
+ By default read the number of CPUs on the system and
+ install perf ring for each CPU in the corresponding index
+ in the array.
+
+ If **cpu** and **index** are specified, install perf ring
+ for given **cpu** at **index** in the array (single ring).
+
+ Note that installing a perf ring into an array will silently
+ replace any existing ring. Any other application will stop
+ receiving events if it installed its rings earlier.
+
+ **bpftool map help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -v, --version
+ Print version number (similar to **bpftool version**).
+
+ -j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+ -p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+ -f, --bpffs
+ Show file names of pinned maps.
+
+EXAMPLES
+========
+**# bpftool map show**
+::
+
+ 10: hash name some_map flags 0x0
+ key 4B value 8B max_entries 2048 memlock 167936B
+
+The following three commands are equivalent:
+
+|
+| **# bpftool map update id 10 key hex 20 c4 b7 00 value hex 0f ff ff ab 01 02 03 4c**
+| **# bpftool map update id 10 key 0x20 0xc4 0xb7 0x00 value 0x0f 0xff 0xff 0xab 0x01 0x02 0x03 0x4c**
+| **# bpftool map update id 10 key 32 196 183 0 value 15 255 255 171 1 2 3 76**
+
+**# bpftool map lookup id 10 key 0 1 2 3**
+
+::
+
+ key: 00 01 02 03 value: 00 01 02 03 04 05 06 07
+
+
+**# bpftool map dump id 10**
+::
+
+ key: 00 01 02 03 value: 00 01 02 03 04 05 06 07
+ key: 0d 00 07 00 value: 02 00 00 00 01 02 03 04
+ Found 2 elements
+
+**# bpftool map getnext id 10 key 0 1 2 3**
+::
+
+ key:
+ 00 01 02 03
+ next key:
+ 0d 00 07 00
+
+|
+| **# mount -t bpf none /sys/fs/bpf/**
+| **# bpftool map pin id 10 /sys/fs/bpf/map**
+| **# bpftool map del pinned /sys/fs/bpf/map key 13 00 07 00**
+
+SEE ALSO
+========
+ **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
new file mode 100644
index 000000000..e3eb0eab7
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -0,0 +1,81 @@
+================
+bpftool-perf
+================
+-------------------------------------------------------------------------------
+tool for inspection of perf related bpf prog attachments
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **perf** *COMMAND*
+
+ *OPTIONS* := { [{ **-j** | **--json** }] [{ **-p** | **--pretty** }] }
+
+ *COMMANDS* :=
+ { **show** | **list** | **help** }
+
+PERF COMMANDS
+=============
+
+| **bpftool** **perf { show | list }**
+| **bpftool** **perf help**
+
+DESCRIPTION
+===========
+ **bpftool perf { show | list }**
+ List all raw_tracepoint, tracepoint, kprobe attachment in the system.
+
+ Output will start with process id and file descriptor in that process,
+ followed by bpf program id, attachment information, and attachment point.
+ The attachment point for raw_tracepoint/tracepoint is the trace probe name.
+ The attachment point for k[ret]probe is either symbol name and offset,
+ or a kernel virtual address.
+ The attachment point for u[ret]probe is the file name and the file offset.
+
+ **bpftool perf help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -v, --version
+ Print version number (similar to **bpftool version**).
+
+ -j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+ -p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+EXAMPLES
+========
+
+| **# bpftool perf**
+
+::
+
+ pid 21711 fd 5: prog_id 5 kprobe func __x64_sys_write offset 0
+ pid 21765 fd 5: prog_id 7 kretprobe func __x64_sys_nanosleep offset 0
+ pid 21767 fd 5: prog_id 8 tracepoint sys_enter_nanosleep
+ pid 21800 fd 5: prog_id 9 uprobe filename /home/yhs/a.out offset 1159
+
+|
+| **# bpftool -j perf**
+
+::
+
+ [{"pid":21711,"fd":5,"prog_id":5,"fd_type":"kprobe","func":"__x64_sys_write","offset":0}, \
+ {"pid":21765,"fd":5,"prog_id":7,"fd_type":"kretprobe","func":"__x64_sys_nanosleep","offset":0}, \
+ {"pid":21767,"fd":5,"prog_id":8,"fd_type":"tracepoint","tracepoint":"sys_enter_nanosleep"}, \
+ {"pid":21800,"fd":5,"prog_id":9,"fd_type":"uprobe","filename":"/home/yhs/a.out","offset":1159}]
+
+
+SEE ALSO
+========
+ **bpftool**\ (8), **bpftool-prog**\ (8), **bpftool-map**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
new file mode 100644
index 000000000..64156a16d
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -0,0 +1,194 @@
+================
+bpftool-prog
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF progs
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **prog** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+
+ *COMMANDS* :=
+ { **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** | **help** }
+
+MAP COMMANDS
+=============
+
+| **bpftool** **prog { show | list }** [*PROG*]
+| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual**}]
+| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes**}]
+| **bpftool** **prog pin** *PROG* *FILE*
+| **bpftool** **prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
+| **bpftool** **prog help**
+|
+| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
+| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
+| *TYPE* := {
+| **socket** | **kprobe** | **kretprobe** | **classifier** | **action** |
+| **tracepoint** | **raw_tracepoint** | **xdp** | **perf_event** | **cgroup/skb** |
+| **cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** |
+| **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** |
+| **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
+| **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6**
+| }
+
+
+DESCRIPTION
+===========
+ **bpftool prog { show | list }** [*PROG*]
+ Show information about loaded programs. If *PROG* is
+ specified show information only about given program, otherwise
+ list all programs currently loaded on the system.
+
+ Output will start with program ID followed by program type and
+ zero or more named attributes (depending on kernel version).
+
+ **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** }]
+ Dump eBPF instructions of the program from the kernel. By
+ default, eBPF will be disassembled and printed to standard
+ output in human-readable format. In this case, **opcodes**
+ controls if raw opcodes should be printed as well.
+
+ If **file** is specified, the binary image will instead be
+ written to *FILE*.
+
+ If **visual** is specified, control flow graph (CFG) will be
+ built instead, and eBPF instructions will be presented with
+ CFG in DOT format, on standard output.
+
+ **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** }]
+ Dump jited image (host machine code) of the program.
+ If *FILE* is specified image will be written to a file,
+ otherwise it will be disassembled and printed to stdout.
+
+ **opcodes** controls if raw opcodes will be printed.
+
+ **bpftool prog pin** *PROG* *FILE*
+ Pin program *PROG* as *FILE*.
+
+ Note: *FILE* must be located in *bpffs* mount.
+
+ **bpftool prog load** *OBJ* *FILE* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*]
+ Load bpf program from binary *OBJ* and pin as *FILE*.
+ **type** is optional, if not specified program type will be
+ inferred from section names.
+ By default bpftool will create new maps as declared in the ELF
+ object being loaded. **map** parameter allows for the reuse
+ of existing maps. It can be specified multiple times, each
+ time for a different map. *IDX* refers to index of the map
+ to be replaced in the ELF file counting from 0, while *NAME*
+ allows to replace a map by name. *MAP* specifies the map to
+ use, referring to it by **id** or through a **pinned** file.
+ If **dev** *NAME* is specified program will be loaded onto
+ given networking device (offload).
+
+ Note: *FILE* must be located in *bpffs* mount.
+
+ **bpftool prog help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -v, --version
+ Print version number (similar to **bpftool version**).
+
+ -j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+ -p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+ -f, --bpffs
+ Show file names of pinned programs.
+
+EXAMPLES
+========
+**# bpftool prog show**
+::
+
+ 10: xdp name some_prog tag 005a3d2123620c8b gpl
+ loaded_at Sep 29/20:11 uid 0
+ xlated 528B jited 370B memlock 4096B map_ids 10
+
+**# bpftool --json --pretty prog show**
+
+::
+
+ {
+ "programs": [{
+ "id": 10,
+ "type": "xdp",
+ "tag": "005a3d2123620c8b",
+ "gpl_compatible": true,
+ "loaded_at": "Sep 29/20:11",
+ "uid": 0,
+ "bytes_xlated": 528,
+ "jited": true,
+ "bytes_jited": 370,
+ "bytes_memlock": 4096,
+ "map_ids": [10
+ ]
+ }
+ ]
+ }
+
+|
+| **# bpftool prog dump xlated id 10 file /tmp/t**
+| **# ls -l /tmp/t**
+| -rw------- 1 root root 560 Jul 22 01:42 /tmp/t
+
+**# bpftool prog dum jited tag 005a3d2123620c8b**
+
+::
+
+ push %rbp
+ mov %rsp,%rbp
+ sub $0x228,%rsp
+ sub $0x28,%rbp
+ mov %rbx,0x0(%rbp)
+
+|
+| **# mount -t bpf none /sys/fs/bpf/**
+| **# bpftool prog pin id 10 /sys/fs/bpf/prog**
+| **# bpftool prog load ./my_prog.o /sys/fs/bpf/prog2**
+| **# ls -l /sys/fs/bpf/**
+| -rw------- 1 root root 0 Jul 22 01:43 prog
+| -rw------- 1 root root 0 Jul 22 01:44 prog2
+
+**# bpftool prog dum jited pinned /sys/fs/bpf/prog opcodes**
+
+::
+
+ push %rbp
+ 55
+ mov %rsp,%rbp
+ 48 89 e5
+ sub $0x228,%rsp
+ 48 81 ec 28 02 00 00
+ sub $0x28,%rbp
+ 48 83 ed 28
+ mov %rbx,0x0(%rbp)
+ 48 89 5d 00
+
+|
+| **# bpftool prog load xdp1_kern.o /sys/fs/bpf/xdp1 type xdp map name rxcnt id 7**
+| **# bpftool prog show pinned /sys/fs/bpf/xdp1**
+| 9: xdp name xdp_prog1 tag 539ec6ce11b52f98 gpl
+| loaded_at 2018-06-25T16:17:31-0700 uid 0
+| xlated 488B jited 336B memlock 4096B map_ids 7
+| **# rm /sys/fs/bpf/xdp1**
+|
+
+SEE ALSO
+========
+ **bpftool**\ (8), **bpftool-map**\ (8), **bpftool-cgroup**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
new file mode 100644
index 000000000..b6f5d5604
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -0,0 +1,61 @@
+================
+BPFTOOL
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF programs and maps
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] *OBJECT* { *COMMAND* | **help** }
+
+ **bpftool** **batch file** *FILE*
+
+ **bpftool** **version**
+
+ *OBJECT* := { **map** | **program** | **cgroup** | **perf** }
+
+ *OPTIONS* := { { **-V** | **--version** } | { **-h** | **--help** }
+ | { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+
+ *MAP-COMMANDS* :=
+ { **show** | **list** | **dump** | **update** | **lookup** | **getnext** | **delete**
+ | **pin** | **event_pipe** | **help** }
+
+ *PROG-COMMANDS* := { **show** | **list** | **dump jited** | **dump xlated** | **pin**
+ | **load** | **help** }
+
+ *CGROUP-COMMANDS* := { **show** | **list** | **attach** | **detach** | **help** }
+
+ *PERF-COMMANDS* := { **show** | **list** | **help** }
+
+DESCRIPTION
+===========
+ *bpftool* allows for inspection and simple modification of BPF objects
+ on the system.
+
+ Note that format of the output of all tools is not guaranteed to be
+ stable and should not be depended upon.
+
+OPTIONS
+=======
+ -h, --help
+ Print short help message (similar to **bpftool help**).
+
+ -v, --version
+ Print version number (similar to **bpftool version**).
+
+ -j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+ -p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+SEE ALSO
+========
+ **bpftool-map**\ (8), **bpftool-prog**\ (8), **bpftool-cgroup**\ (8)
+ **bpftool-perf**\ (8)
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
new file mode 100644
index 000000000..74288a219
--- /dev/null
+++ b/tools/bpf/bpftool/Makefile
@@ -0,0 +1,132 @@
+include ../../scripts/Makefile.include
+include ../../scripts/utilities.mak
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+ifeq ($(V),1)
+ Q =
+else
+ Q = @
+endif
+
+BPF_DIR = $(srctree)/tools/lib/bpf/
+
+ifneq ($(OUTPUT),)
+ BPF_PATH = $(OUTPUT)
+else
+ BPF_PATH = $(BPF_DIR)
+endif
+
+LIBBPF = $(BPF_PATH)libbpf.a
+
+BPFTOOL_VERSION := $(shell make --no-print-directory -sC ../../.. kernelversion)
+
+$(LIBBPF): FORCE
+ $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) $(OUTPUT)libbpf.a
+
+$(LIBBPF)-clean:
+ $(call QUIET_CLEAN, libbpf)
+ $(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(OUTPUT) clean >/dev/null
+
+prefix ?= /usr/local
+bash_compdir ?= /usr/share/bash-completion/completions
+
+CC = gcc
+
+CFLAGS += -O2
+CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wshadow -Wno-missing-field-initializers
+CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
+ -I$(srctree)/kernel/bpf/ \
+ -I$(srctree)/tools/include \
+ -I$(srctree)/tools/include/uapi \
+ -I$(srctree)/tools/lib/bpf \
+ -I$(srctree)/tools/perf
+CFLAGS += -DBPFTOOL_VERSION='"$(BPFTOOL_VERSION)"'
+LIBS = -lelf -lbfd -lopcodes $(LIBBPF)
+
+INSTALL ?= install
+RM ?= rm -f
+
+FEATURE_USER = .bpftool
+FEATURE_TESTS = libbfd disassembler-four-args reallocarray
+FEATURE_DISPLAY = libbfd disassembler-four-args
+
+check_feat := 1
+NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
+ check_feat := 0
+endif
+endif
+
+ifeq ($(check_feat),1)
+ifeq ($(FEATURES_DUMP),)
+include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
+endif
+
+ifeq ($(feature-disassembler-four-args), 1)
+CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+endif
+
+ifeq ($(feature-reallocarray), 0)
+CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
+endif
+
+include $(wildcard $(OUTPUT)*.d)
+
+all: $(OUTPUT)bpftool
+
+SRCS = $(wildcard *.c)
+OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
+
+$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
+ $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
+
+$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
+ $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+$(OUTPUT)%.o: %.c
+ $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
+
+clean: $(LIBBPF)-clean
+ $(call QUIET_CLEAN, bpftool)
+ $(Q)$(RM) $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+ $(call QUIET_CLEAN, core-gen)
+ $(Q)$(RM) $(OUTPUT)FEATURE-DUMP.bpftool
+
+install: $(OUTPUT)bpftool
+ $(call QUIET_INSTALL, bpftool)
+ $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/sbin
+ $(Q)$(INSTALL) $(OUTPUT)bpftool $(DESTDIR)$(prefix)/sbin/bpftool
+ $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(bash_compdir)
+ $(Q)$(INSTALL) -m 0644 bash-completion/bpftool $(DESTDIR)$(bash_compdir)
+
+uninstall:
+ $(call QUIET_UNINST, bpftool)
+ $(Q)$(RM) $(DESTDIR)$(prefix)/sbin/bpftool
+ $(Q)$(RM) $(DESTDIR)$(bash_compdir)/bpftool
+
+doc:
+ $(call descend,Documentation)
+
+doc-clean:
+ $(call descend,Documentation,clean)
+
+doc-install:
+ $(call descend,Documentation,install)
+
+doc-uninstall:
+ $(call descend,Documentation,uninstall)
+
+FORCE:
+
+.PHONY: all FORCE clean install uninstall
+.PHONY: doc doc-clean doc-install doc-uninstall
+.DEFAULT_GOAL := all
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
new file mode 100644
index 000000000..c2b6b2176
--- /dev/null
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -0,0 +1,559 @@
+# bpftool(8) bash completion -*- shell-script -*-
+#
+# Copyright (C) 2017-2018 Netronome Systems, Inc.
+#
+# This software is dual licensed under the GNU General License
+# Version 2, June 1991 as shown in the file COPYING in the top-level
+# directory of this source tree or the BSD 2-Clause License provided
+# below. You have the option to license this software under the
+# complete terms of either license.
+#
+# The BSD 2-Clause License:
+#
+# Redistribution and use in source and binary forms, with or
+# without modification, are permitted provided that the following
+# conditions are met:
+#
+# 1. Redistributions of source code must retain the above
+# copyright notice, this list of conditions and the following
+# disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Author: Quentin Monnet <quentin.monnet@netronome.com>
+
+# Takes a list of words in argument; each one of them is added to COMPREPLY if
+# it is not already present on the command line. Returns no value.
+_bpftool_once_attr()
+{
+ local w idx found
+ for w in $*; do
+ found=0
+ for (( idx=3; idx < ${#words[@]}-1; idx++ )); do
+ if [[ $w == ${words[idx]} ]]; then
+ found=1
+ break
+ fi
+ done
+ [[ $found -eq 0 ]] && \
+ COMPREPLY+=( $( compgen -W "$w" -- "$cur" ) )
+ done
+}
+
+# Takes a list of words as argument; if any of those words is present on the
+# command line, return 0. Otherwise, return 1.
+_bpftool_search_list()
+{
+ local w idx
+ for w in $*; do
+ for (( idx=3; idx < ${#words[@]}-1; idx++ )); do
+ [[ $w == ${words[idx]} ]] && return 0
+ done
+ done
+ return 1
+}
+
+# Takes a list of words in argument; adds them all to COMPREPLY if none of them
+# is already present on the command line. Returns no value.
+_bpftool_one_of_list()
+{
+ _bpftool_search_list $* && return 1
+ COMPREPLY+=( $( compgen -W "$*" -- "$cur" ) )
+}
+
+_bpftool_get_map_ids()
+{
+ COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \
+ command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
+}
+
+_bpftool_get_perf_map_ids()
+{
+ COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \
+ command grep -C2 perf_event_array | \
+ command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
+}
+
+
+_bpftool_get_prog_ids()
+{
+ COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
+ command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
+}
+
+_bpftool_get_prog_tags()
+{
+ COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \
+ command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) )
+}
+
+_bpftool_get_obj_map_names()
+{
+ local obj
+
+ obj=$1
+
+ maps=$(objdump -j maps -t $obj 2>/dev/null | \
+ command awk '/g . maps/ {print $NF}')
+
+ COMPREPLY+=( $( compgen -W "$maps" -- "$cur" ) )
+}
+
+_bpftool_get_obj_map_idxs()
+{
+ local obj
+
+ obj=$1
+
+ nmaps=$(objdump -j maps -t $obj 2>/dev/null | grep -c 'g . maps')
+
+ COMPREPLY+=( $( compgen -W "$(seq 0 $((nmaps - 1)))" -- "$cur" ) )
+}
+
+_sysfs_get_netdevs()
+{
+ COMPREPLY+=( $( compgen -W "$( ls /sys/class/net 2>/dev/null )" -- \
+ "$cur" ) )
+}
+
+# For bpftool map update: retrieve type of the map to update.
+_bpftool_map_update_map_type()
+{
+ local keyword ref
+ for (( idx=3; idx < ${#words[@]}-1; idx++ )); do
+ if [[ ${words[$((idx-2))]} == "update" ]]; then
+ keyword=${words[$((idx-1))]}
+ ref=${words[$((idx))]}
+ fi
+ done
+ [[ -z $ref ]] && return 0
+
+ local type
+ type=$(bpftool -jp map show $keyword $ref | \
+ command sed -n 's/.*"type": "\(.*\)",$/\1/p')
+ [[ -n $type ]] && printf $type
+}
+
+_bpftool_map_update_get_id()
+{
+ # Is it the map to update, or a map to insert into the map to update?
+ # Search for "value" keyword.
+ local idx value
+ for (( idx=7; idx < ${#words[@]}-1; idx++ )); do
+ if [[ ${words[idx]} == "value" ]]; then
+ value=1
+ break
+ fi
+ done
+ [[ $value -eq 0 ]] && _bpftool_get_map_ids && return 0
+
+ # Id to complete is for a value. It can be either prog id or map id. This
+ # depends on the type of the map to update.
+ local type=$(_bpftool_map_update_map_type)
+ case $type in
+ array_of_maps|hash_of_maps)
+ _bpftool_get_map_ids
+ return 0
+ ;;
+ prog_array)
+ _bpftool_get_prog_ids
+ return 0
+ ;;
+ *)
+ return 0
+ ;;
+ esac
+}
+
+_bpftool()
+{
+ local cur prev words objword
+ _init_completion || return
+
+ # Deal with options
+ if [[ ${words[cword]} == -* ]]; then
+ local c='--version --json --pretty --bpffs'
+ COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
+ return 0
+ fi
+
+ # Deal with simplest keywords
+ case $prev in
+ help|hex|opcodes|visual)
+ return 0
+ ;;
+ tag)
+ _bpftool_get_prog_tags
+ return 0
+ ;;
+ file|pinned)
+ _filedir
+ return 0
+ ;;
+ batch)
+ COMPREPLY=( $( compgen -W 'file' -- "$cur" ) )
+ return 0
+ ;;
+ esac
+
+ # Remove all options so completions don't have to deal with them.
+ local i
+ for (( i=1; i < ${#words[@]}; )); do
+ if [[ ${words[i]::1} == - ]]; then
+ words=( "${words[@]:0:i}" "${words[@]:i+1}" )
+ [[ $i -le $cword ]] && cword=$(( cword - 1 ))
+ else
+ i=$(( ++i ))
+ fi
+ done
+ cur=${words[cword]}
+ prev=${words[cword - 1]}
+
+ local object=${words[1]} command=${words[2]}
+
+ if [[ -z $object || $cword -eq 1 ]]; then
+ case $cur in
+ *)
+ COMPREPLY=( $( compgen -W "$( bpftool help 2>&1 | \
+ command sed \
+ -e '/OBJECT := /!d' \
+ -e 's/.*{//' \
+ -e 's/}.*//' \
+ -e 's/|//g' )" -- "$cur" ) )
+ COMPREPLY+=( $( compgen -W 'batch help' -- "$cur" ) )
+ return 0
+ ;;
+ esac
+ fi
+
+ [[ $command == help ]] && return 0
+
+ # Completion depends on object and command in use
+ case $object in
+ prog)
+ if [[ $command != "load" ]]; then
+ case $prev in
+ id)
+ _bpftool_get_prog_ids
+ return 0
+ ;;
+ esac
+ fi
+
+ local PROG_TYPE='id pinned tag'
+ case $command in
+ show|list)
+ [[ $prev != "$command" ]] && return 0
+ COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ dump)
+ case $prev in
+ $command)
+ COMPREPLY+=( $( compgen -W "xlated jited" -- \
+ "$cur" ) )
+ return 0
+ ;;
+ xlated|jited)
+ COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
+ "$cur" ) )
+ return 0
+ ;;
+ *)
+ _bpftool_once_attr 'file'
+ if _bpftool_search_list 'xlated'; then
+ COMPREPLY+=( $( compgen -W 'opcodes visual' -- \
+ "$cur" ) )
+ else
+ COMPREPLY+=( $( compgen -W 'opcodes' -- \
+ "$cur" ) )
+ fi
+ return 0
+ ;;
+ esac
+ ;;
+ pin)
+ if [[ $prev == "$command" ]]; then
+ COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
+ else
+ _filedir
+ fi
+ return 0
+ ;;
+ load)
+ local obj
+
+ if [[ ${#words[@]} -lt 6 ]]; then
+ _filedir
+ return 0
+ fi
+
+ obj=${words[3]}
+
+ if [[ ${words[-4]} == "map" ]]; then
+ COMPREPLY=( $( compgen -W "id pinned" -- "$cur" ) )
+ return 0
+ fi
+ if [[ ${words[-3]} == "map" ]]; then
+ if [[ ${words[-2]} == "idx" ]]; then
+ _bpftool_get_obj_map_idxs $obj
+ elif [[ ${words[-2]} == "name" ]]; then
+ _bpftool_get_obj_map_names $obj
+ fi
+ return 0
+ fi
+ if [[ ${words[-2]} == "map" ]]; then
+ COMPREPLY=( $( compgen -W "idx name" -- "$cur" ) )
+ return 0
+ fi
+
+ case $prev in
+ type)
+ COMPREPLY=( $( compgen -W "socket kprobe kretprobe classifier action tracepoint raw_tracepoint xdp perf_event cgroup/skb cgroup/sock cgroup/dev lwt_in lwt_out lwt_xmit lwt_seg6local sockops sk_skb sk_msg lirc_mode2 cgroup/bind4 cgroup/bind6 cgroup/connect4 cgroup/connect6 cgroup/sendmsg4 cgroup/sendmsg6 cgroup/post_bind4 cgroup/post_bind6" -- \
+ "$cur" ) )
+ return 0
+ ;;
+ id)
+ _bpftool_get_map_ids
+ return 0
+ ;;
+ pinned)
+ _filedir
+ return 0
+ ;;
+ dev)
+ _sysfs_get_netdevs
+ return 0
+ ;;
+ *)
+ COMPREPLY=( $( compgen -W "map" -- "$cur" ) )
+ _bpftool_once_attr 'type'
+ _bpftool_once_attr 'dev'
+ return 0
+ ;;
+ esac
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'dump help pin load \
+ show list' -- "$cur" ) )
+ ;;
+ esac
+ ;;
+ map)
+ local MAP_TYPE='id pinned'
+ case $command in
+ show|list|dump)
+ case $prev in
+ $command)
+ COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ id)
+ _bpftool_get_map_ids
+ return 0
+ ;;
+ *)
+ return 0
+ ;;
+ esac
+ ;;
+ lookup|getnext|delete)
+ case $prev in
+ $command)
+ COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ id)
+ _bpftool_get_map_ids
+ return 0
+ ;;
+ key)
+ COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
+ ;;
+ *)
+ _bpftool_once_attr 'key'
+ return 0
+ ;;
+ esac
+ ;;
+ update)
+ case $prev in
+ $command)
+ COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ id)
+ _bpftool_map_update_get_id
+ return 0
+ ;;
+ key)
+ COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) )
+ ;;
+ value)
+ # We can have bytes, or references to a prog or a
+ # map, depending on the type of the map to update.
+ case $(_bpftool_map_update_map_type) in
+ array_of_maps|hash_of_maps)
+ local MAP_TYPE='id pinned'
+ COMPREPLY+=( $( compgen -W "$MAP_TYPE" \
+ -- "$cur" ) )
+ return 0
+ ;;
+ prog_array)
+ local PROG_TYPE='id pinned tag'
+ COMPREPLY+=( $( compgen -W "$PROG_TYPE" \
+ -- "$cur" ) )
+ return 0
+ ;;
+ *)
+ COMPREPLY+=( $( compgen -W 'hex' \
+ -- "$cur" ) )
+ return 0
+ ;;
+ esac
+ return 0
+ ;;
+ *)
+ _bpftool_once_attr 'key'
+ local UPDATE_FLAGS='any exist noexist'
+ for (( idx=3; idx < ${#words[@]}-1; idx++ )); do
+ if [[ ${words[idx]} == 'value' ]]; then
+ # 'value' is present, but is not the last
+ # word i.e. we can now have UPDATE_FLAGS.
+ _bpftool_one_of_list "$UPDATE_FLAGS"
+ return 0
+ fi
+ done
+ for (( idx=3; idx < ${#words[@]}-1; idx++ )); do
+ if [[ ${words[idx]} == 'key' ]]; then
+ # 'key' is present, but is not the last
+ # word i.e. we can now have 'value'.
+ _bpftool_once_attr 'value'
+ return 0
+ fi
+ done
+ return 0
+ ;;
+ esac
+ ;;
+ pin)
+ if [[ $prev == "$command" ]]; then
+ COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
+ else
+ _filedir
+ fi
+ return 0
+ ;;
+ event_pipe)
+ case $prev in
+ $command)
+ COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ id)
+ _bpftool_get_perf_map_ids
+ return 0
+ ;;
+ cpu)
+ return 0
+ ;;
+ index)
+ return 0
+ ;;
+ *)
+ _bpftool_once_attr 'cpu'
+ _bpftool_once_attr 'index'
+ return 0
+ ;;
+ esac
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'delete dump getnext help \
+ lookup pin event_pipe show list update' -- \
+ "$cur" ) )
+ ;;
+ esac
+ ;;
+ cgroup)
+ case $command in
+ show|list)
+ _filedir
+ return 0
+ ;;
+ tree)
+ _filedir
+ return 0
+ ;;
+ attach|detach)
+ local ATTACH_TYPES='ingress egress sock_create sock_ops \
+ device bind4 bind6 post_bind4 post_bind6 connect4 \
+ connect6 sendmsg4 sendmsg6'
+ local ATTACH_FLAGS='multi override'
+ local PROG_TYPE='id pinned tag'
+ case $prev in
+ $command)
+ _filedir
+ return 0
+ ;;
+ ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
+ post_bind4|post_bind6|connect4|connect6|sendmsg4|\
+ sendmsg6)
+ COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
+ "$cur" ) )
+ return 0
+ ;;
+ id)
+ _bpftool_get_prog_ids
+ return 0
+ ;;
+ *)
+ if ! _bpftool_search_list "$ATTACH_TYPES"; then
+ COMPREPLY=( $( compgen -W "$ATTACH_TYPES" -- \
+ "$cur" ) )
+ elif [[ "$command" == "attach" ]]; then
+ # We have an attach type on the command line,
+ # but it is not the previous word, or
+ # "id|pinned|tag" (we already checked for
+ # that). This should only leave the case when
+ # we need attach flags for "attach" commamnd.
+ _bpftool_one_of_list "$ATTACH_FLAGS"
+ fi
+ return 0
+ ;;
+ esac
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'help attach detach \
+ show list tree' -- "$cur" ) )
+ ;;
+ esac
+ ;;
+ perf)
+ case $command in
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'help \
+ show list' -- "$cur" ) )
+ ;;
+ esac
+ ;;
+ esac
+} &&
+complete -F _bpftool bpftool
+
+# ex: ts=4 sw=4 et filetype=sh
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
new file mode 100644
index 000000000..1e7c61922
--- /dev/null
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <ctype.h>
+#include <stdio.h> /* for (FILE *) used by json_writer */
+#include <string.h>
+#include <asm/byteorder.h>
+#include <linux/bitops.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+
+#include "btf.h"
+#include "json_writer.h"
+#include "main.h"
+
+#define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
+#define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
+#define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
+#define BITS_ROUNDUP_BYTES(bits) \
+ (BITS_ROUNDDOWN_BYTES(bits) + !!BITS_PER_BYTE_MASKED(bits))
+
+static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
+ __u8 bit_offset, const void *data);
+
+static void btf_dumper_ptr(const void *data, json_writer_t *jw,
+ bool is_plain_text)
+{
+ if (is_plain_text)
+ jsonw_printf(jw, "%p", *(void **)data);
+ else
+ jsonw_printf(jw, "%lu", *(unsigned long *)data);
+}
+
+static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id,
+ __u8 bit_offset, const void *data)
+{
+ int actual_type_id;
+
+ actual_type_id = btf__resolve_type(d->btf, type_id);
+ if (actual_type_id < 0)
+ return actual_type_id;
+
+ return btf_dumper_do_type(d, actual_type_id, bit_offset, data);
+}
+
+static void btf_dumper_enum(const void *data, json_writer_t *jw)
+{
+ jsonw_printf(jw, "%d", *(int *)data);
+}
+
+static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id,
+ const void *data)
+{
+ const struct btf_type *t = btf__type_by_id(d->btf, type_id);
+ struct btf_array *arr = (struct btf_array *)(t + 1);
+ long long elem_size;
+ int ret = 0;
+ __u32 i;
+
+ elem_size = btf__resolve_size(d->btf, arr->type);
+ if (elem_size < 0)
+ return elem_size;
+
+ jsonw_start_array(d->jw);
+ for (i = 0; i < arr->nelems; i++) {
+ ret = btf_dumper_do_type(d, arr->type, 0,
+ data + i * elem_size);
+ if (ret)
+ break;
+ }
+
+ jsonw_end_array(d->jw);
+ return ret;
+}
+
+static void btf_dumper_int_bits(__u32 int_type, __u8 bit_offset,
+ const void *data, json_writer_t *jw,
+ bool is_plain_text)
+{
+ int left_shift_bits, right_shift_bits;
+ int nr_bits = BTF_INT_BITS(int_type);
+ int total_bits_offset;
+ int bytes_to_copy;
+ int bits_to_copy;
+ __u64 print_num;
+
+ total_bits_offset = bit_offset + BTF_INT_OFFSET(int_type);
+ data += BITS_ROUNDDOWN_BYTES(total_bits_offset);
+ bit_offset = BITS_PER_BYTE_MASKED(total_bits_offset);
+ bits_to_copy = bit_offset + nr_bits;
+ bytes_to_copy = BITS_ROUNDUP_BYTES(bits_to_copy);
+
+ print_num = 0;
+ memcpy(&print_num, data, bytes_to_copy);
+#if defined(__BIG_ENDIAN_BITFIELD)
+ left_shift_bits = bit_offset;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+ left_shift_bits = 64 - bits_to_copy;
+#else
+#error neither big nor little endian
+#endif
+ right_shift_bits = 64 - nr_bits;
+
+ print_num <<= left_shift_bits;
+ print_num >>= right_shift_bits;
+ if (is_plain_text)
+ jsonw_printf(jw, "0x%llx", print_num);
+ else
+ jsonw_printf(jw, "%llu", print_num);
+}
+
+static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset,
+ const void *data, json_writer_t *jw,
+ bool is_plain_text)
+{
+ __u32 *int_type;
+ __u32 nr_bits;
+
+ int_type = (__u32 *)(t + 1);
+ nr_bits = BTF_INT_BITS(*int_type);
+ /* if this is bit field */
+ if (bit_offset || BTF_INT_OFFSET(*int_type) ||
+ BITS_PER_BYTE_MASKED(nr_bits)) {
+ btf_dumper_int_bits(*int_type, bit_offset, data, jw,
+ is_plain_text);
+ return 0;
+ }
+
+ switch (BTF_INT_ENCODING(*int_type)) {
+ case 0:
+ if (BTF_INT_BITS(*int_type) == 64)
+ jsonw_printf(jw, "%llu", *(__u64 *)data);
+ else if (BTF_INT_BITS(*int_type) == 32)
+ jsonw_printf(jw, "%u", *(__u32 *)data);
+ else if (BTF_INT_BITS(*int_type) == 16)
+ jsonw_printf(jw, "%hu", *(__u16 *)data);
+ else if (BTF_INT_BITS(*int_type) == 8)
+ jsonw_printf(jw, "%hhu", *(__u8 *)data);
+ else
+ btf_dumper_int_bits(*int_type, bit_offset, data, jw,
+ is_plain_text);
+ break;
+ case BTF_INT_SIGNED:
+ if (BTF_INT_BITS(*int_type) == 64)
+ jsonw_printf(jw, "%lld", *(long long *)data);
+ else if (BTF_INT_BITS(*int_type) == 32)
+ jsonw_printf(jw, "%d", *(int *)data);
+ else if (BTF_INT_BITS(*int_type) == 16)
+ jsonw_printf(jw, "%hd", *(short *)data);
+ else if (BTF_INT_BITS(*int_type) == 8)
+ jsonw_printf(jw, "%hhd", *(char *)data);
+ else
+ btf_dumper_int_bits(*int_type, bit_offset, data, jw,
+ is_plain_text);
+ break;
+ case BTF_INT_CHAR:
+ if (isprint(*(char *)data))
+ jsonw_printf(jw, "\"%c\"", *(char *)data);
+ else
+ if (is_plain_text)
+ jsonw_printf(jw, "0x%hhx", *(char *)data);
+ else
+ jsonw_printf(jw, "\"\\u00%02hhx\"",
+ *(char *)data);
+ break;
+ case BTF_INT_BOOL:
+ jsonw_bool(jw, *(int *)data);
+ break;
+ default:
+ /* shouldn't happen */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int btf_dumper_struct(const struct btf_dumper *d, __u32 type_id,
+ const void *data)
+{
+ const struct btf_type *t;
+ struct btf_member *m;
+ const void *data_off;
+ int ret = 0;
+ int i, vlen;
+
+ t = btf__type_by_id(d->btf, type_id);
+ if (!t)
+ return -EINVAL;
+
+ vlen = BTF_INFO_VLEN(t->info);
+ jsonw_start_object(d->jw);
+ m = (struct btf_member *)(t + 1);
+
+ for (i = 0; i < vlen; i++) {
+ data_off = data + BITS_ROUNDDOWN_BYTES(m[i].offset);
+ jsonw_name(d->jw, btf__name_by_offset(d->btf, m[i].name_off));
+ ret = btf_dumper_do_type(d, m[i].type,
+ BITS_PER_BYTE_MASKED(m[i].offset),
+ data_off);
+ if (ret)
+ break;
+ }
+
+ jsonw_end_object(d->jw);
+
+ return ret;
+}
+
+static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
+ __u8 bit_offset, const void *data)
+{
+ const struct btf_type *t = btf__type_by_id(d->btf, type_id);
+
+ switch (BTF_INFO_KIND(t->info)) {
+ case BTF_KIND_INT:
+ return btf_dumper_int(t, bit_offset, data, d->jw,
+ d->is_plain_text);
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ return btf_dumper_struct(d, type_id, data);
+ case BTF_KIND_ARRAY:
+ return btf_dumper_array(d, type_id, data);
+ case BTF_KIND_ENUM:
+ btf_dumper_enum(data, d->jw);
+ return 0;
+ case BTF_KIND_PTR:
+ btf_dumper_ptr(data, d->jw, d->is_plain_text);
+ return 0;
+ case BTF_KIND_UNKN:
+ jsonw_printf(d->jw, "(unknown)");
+ return 0;
+ case BTF_KIND_FWD:
+ /* map key or value can't be forward */
+ jsonw_printf(d->jw, "(fwd-kind-invalid)");
+ return -EINVAL;
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ return btf_dumper_modifier(d, type_id, bit_offset, data);
+ default:
+ jsonw_printf(d->jw, "(unsupported-kind");
+ return -EINVAL;
+ }
+}
+
+int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
+ const void *data)
+{
+ return btf_dumper_do_type(d, type_id, 0, data);
+}
diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
new file mode 100644
index 000000000..f30b3a4a8
--- /dev/null
+++ b/tools/bpf/bpftool/cfg.c
@@ -0,0 +1,514 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/list.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "cfg.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+struct cfg {
+ struct list_head funcs;
+ int func_num;
+};
+
+struct func_node {
+ struct list_head l;
+ struct list_head bbs;
+ struct bpf_insn *start;
+ struct bpf_insn *end;
+ int idx;
+ int bb_num;
+};
+
+struct bb_node {
+ struct list_head l;
+ struct list_head e_prevs;
+ struct list_head e_succs;
+ struct bpf_insn *head;
+ struct bpf_insn *tail;
+ int idx;
+};
+
+#define EDGE_FLAG_EMPTY 0x0
+#define EDGE_FLAG_FALLTHROUGH 0x1
+#define EDGE_FLAG_JUMP 0x2
+struct edge_node {
+ struct list_head l;
+ struct bb_node *src;
+ struct bb_node *dst;
+ int flags;
+};
+
+#define ENTRY_BLOCK_INDEX 0
+#define EXIT_BLOCK_INDEX 1
+#define NUM_FIXED_BLOCKS 2
+#define func_prev(func) list_prev_entry(func, l)
+#define func_next(func) list_next_entry(func, l)
+#define bb_prev(bb) list_prev_entry(bb, l)
+#define bb_next(bb) list_next_entry(bb, l)
+#define entry_bb(func) func_first_bb(func)
+#define exit_bb(func) func_last_bb(func)
+#define cfg_first_func(cfg) \
+ list_first_entry(&cfg->funcs, struct func_node, l)
+#define cfg_last_func(cfg) \
+ list_last_entry(&cfg->funcs, struct func_node, l)
+#define func_first_bb(func) \
+ list_first_entry(&func->bbs, struct bb_node, l)
+#define func_last_bb(func) \
+ list_last_entry(&func->bbs, struct bb_node, l)
+
+static struct func_node *cfg_append_func(struct cfg *cfg, struct bpf_insn *insn)
+{
+ struct func_node *new_func, *func;
+
+ list_for_each_entry(func, &cfg->funcs, l) {
+ if (func->start == insn)
+ return func;
+ else if (func->start > insn)
+ break;
+ }
+
+ func = func_prev(func);
+ new_func = calloc(1, sizeof(*new_func));
+ if (!new_func) {
+ p_err("OOM when allocating FUNC node");
+ return NULL;
+ }
+ new_func->start = insn;
+ new_func->idx = cfg->func_num;
+ list_add(&new_func->l, &func->l);
+ cfg->func_num++;
+
+ return new_func;
+}
+
+static struct bb_node *func_append_bb(struct func_node *func,
+ struct bpf_insn *insn)
+{
+ struct bb_node *new_bb, *bb;
+
+ list_for_each_entry(bb, &func->bbs, l) {
+ if (bb->head == insn)
+ return bb;
+ else if (bb->head > insn)
+ break;
+ }
+
+ bb = bb_prev(bb);
+ new_bb = calloc(1, sizeof(*new_bb));
+ if (!new_bb) {
+ p_err("OOM when allocating BB node");
+ return NULL;
+ }
+ new_bb->head = insn;
+ INIT_LIST_HEAD(&new_bb->e_prevs);
+ INIT_LIST_HEAD(&new_bb->e_succs);
+ list_add(&new_bb->l, &bb->l);
+
+ return new_bb;
+}
+
+static struct bb_node *func_insert_dummy_bb(struct list_head *after)
+{
+ struct bb_node *bb;
+
+ bb = calloc(1, sizeof(*bb));
+ if (!bb) {
+ p_err("OOM when allocating BB node");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&bb->e_prevs);
+ INIT_LIST_HEAD(&bb->e_succs);
+ list_add(&bb->l, after);
+
+ return bb;
+}
+
+static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
+ struct bpf_insn *end)
+{
+ struct func_node *func, *last_func;
+
+ func = cfg_append_func(cfg, cur);
+ if (!func)
+ return true;
+
+ for (; cur < end; cur++) {
+ if (cur->code != (BPF_JMP | BPF_CALL))
+ continue;
+ if (cur->src_reg != BPF_PSEUDO_CALL)
+ continue;
+ func = cfg_append_func(cfg, cur + cur->off + 1);
+ if (!func)
+ return true;
+ }
+
+ last_func = cfg_last_func(cfg);
+ last_func->end = end - 1;
+ func = cfg_first_func(cfg);
+ list_for_each_entry_from(func, &last_func->l, l) {
+ func->end = func_next(func)->start - 1;
+ }
+
+ return false;
+}
+
+static bool func_partition_bb_head(struct func_node *func)
+{
+ struct bpf_insn *cur, *end;
+ struct bb_node *bb;
+
+ cur = func->start;
+ end = func->end;
+ INIT_LIST_HEAD(&func->bbs);
+ bb = func_append_bb(func, cur);
+ if (!bb)
+ return true;
+
+ for (; cur <= end; cur++) {
+ if (BPF_CLASS(cur->code) == BPF_JMP) {
+ u8 opcode = BPF_OP(cur->code);
+
+ if (opcode == BPF_EXIT || opcode == BPF_CALL)
+ continue;
+
+ bb = func_append_bb(func, cur + cur->off + 1);
+ if (!bb)
+ return true;
+
+ if (opcode != BPF_JA) {
+ bb = func_append_bb(func, cur + 1);
+ if (!bb)
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+static void func_partition_bb_tail(struct func_node *func)
+{
+ unsigned int bb_idx = NUM_FIXED_BLOCKS;
+ struct bb_node *bb, *last;
+
+ last = func_last_bb(func);
+ last->tail = func->end;
+ bb = func_first_bb(func);
+ list_for_each_entry_from(bb, &last->l, l) {
+ bb->tail = bb_next(bb)->head - 1;
+ bb->idx = bb_idx++;
+ }
+
+ last->idx = bb_idx++;
+ func->bb_num = bb_idx;
+}
+
+static bool func_add_special_bb(struct func_node *func)
+{
+ struct bb_node *bb;
+
+ bb = func_insert_dummy_bb(&func->bbs);
+ if (!bb)
+ return true;
+ bb->idx = ENTRY_BLOCK_INDEX;
+
+ bb = func_insert_dummy_bb(&func_last_bb(func)->l);
+ if (!bb)
+ return true;
+ bb->idx = EXIT_BLOCK_INDEX;
+
+ return false;
+}
+
+static bool func_partition_bb(struct func_node *func)
+{
+ if (func_partition_bb_head(func))
+ return true;
+
+ func_partition_bb_tail(func);
+
+ return false;
+}
+
+static struct bb_node *func_search_bb_with_head(struct func_node *func,
+ struct bpf_insn *insn)
+{
+ struct bb_node *bb;
+
+ list_for_each_entry(bb, &func->bbs, l) {
+ if (bb->head == insn)
+ return bb;
+ }
+
+ return NULL;
+}
+
+static struct edge_node *new_edge(struct bb_node *src, struct bb_node *dst,
+ int flags)
+{
+ struct edge_node *e;
+
+ e = calloc(1, sizeof(*e));
+ if (!e) {
+ p_err("OOM when allocating edge node");
+ return NULL;
+ }
+
+ if (src)
+ e->src = src;
+ if (dst)
+ e->dst = dst;
+
+ e->flags |= flags;
+
+ return e;
+}
+
+static bool func_add_bb_edges(struct func_node *func)
+{
+ struct bpf_insn *insn;
+ struct edge_node *e;
+ struct bb_node *bb;
+
+ bb = entry_bb(func);
+ e = new_edge(bb, bb_next(bb), EDGE_FLAG_FALLTHROUGH);
+ if (!e)
+ return true;
+ list_add_tail(&e->l, &bb->e_succs);
+
+ bb = exit_bb(func);
+ e = new_edge(bb_prev(bb), bb, EDGE_FLAG_FALLTHROUGH);
+ if (!e)
+ return true;
+ list_add_tail(&e->l, &bb->e_prevs);
+
+ bb = entry_bb(func);
+ bb = bb_next(bb);
+ list_for_each_entry_from(bb, &exit_bb(func)->l, l) {
+ e = new_edge(bb, NULL, EDGE_FLAG_EMPTY);
+ if (!e)
+ return true;
+ e->src = bb;
+
+ insn = bb->tail;
+ if (BPF_CLASS(insn->code) != BPF_JMP ||
+ BPF_OP(insn->code) == BPF_EXIT) {
+ e->dst = bb_next(bb);
+ e->flags |= EDGE_FLAG_FALLTHROUGH;
+ list_add_tail(&e->l, &bb->e_succs);
+ continue;
+ } else if (BPF_OP(insn->code) == BPF_JA) {
+ e->dst = func_search_bb_with_head(func,
+ insn + insn->off + 1);
+ e->flags |= EDGE_FLAG_JUMP;
+ list_add_tail(&e->l, &bb->e_succs);
+ continue;
+ }
+
+ e->dst = bb_next(bb);
+ e->flags |= EDGE_FLAG_FALLTHROUGH;
+ list_add_tail(&e->l, &bb->e_succs);
+
+ e = new_edge(bb, NULL, EDGE_FLAG_JUMP);
+ if (!e)
+ return true;
+ e->src = bb;
+ e->dst = func_search_bb_with_head(func, insn + insn->off + 1);
+ list_add_tail(&e->l, &bb->e_succs);
+ }
+
+ return false;
+}
+
+static bool cfg_build(struct cfg *cfg, struct bpf_insn *insn, unsigned int len)
+{
+ int cnt = len / sizeof(*insn);
+ struct func_node *func;
+
+ INIT_LIST_HEAD(&cfg->funcs);
+
+ if (cfg_partition_funcs(cfg, insn, insn + cnt))
+ return true;
+
+ list_for_each_entry(func, &cfg->funcs, l) {
+ if (func_partition_bb(func) || func_add_special_bb(func))
+ return true;
+
+ if (func_add_bb_edges(func))
+ return true;
+ }
+
+ return false;
+}
+
+static void cfg_destroy(struct cfg *cfg)
+{
+ struct func_node *func, *func2;
+
+ list_for_each_entry_safe(func, func2, &cfg->funcs, l) {
+ struct bb_node *bb, *bb2;
+
+ list_for_each_entry_safe(bb, bb2, &func->bbs, l) {
+ struct edge_node *e, *e2;
+
+ list_for_each_entry_safe(e, e2, &bb->e_prevs, l) {
+ list_del(&e->l);
+ free(e);
+ }
+
+ list_for_each_entry_safe(e, e2, &bb->e_succs, l) {
+ list_del(&e->l);
+ free(e);
+ }
+
+ list_del(&bb->l);
+ free(bb);
+ }
+
+ list_del(&func->l);
+ free(func);
+ }
+}
+
+static void draw_bb_node(struct func_node *func, struct bb_node *bb)
+{
+ const char *shape;
+
+ if (bb->idx == ENTRY_BLOCK_INDEX || bb->idx == EXIT_BLOCK_INDEX)
+ shape = "Mdiamond";
+ else
+ shape = "record";
+
+ printf("\tfn_%d_bb_%d [shape=%s,style=filled,label=\"",
+ func->idx, bb->idx, shape);
+
+ if (bb->idx == ENTRY_BLOCK_INDEX) {
+ printf("ENTRY");
+ } else if (bb->idx == EXIT_BLOCK_INDEX) {
+ printf("EXIT");
+ } else {
+ unsigned int start_idx;
+ struct dump_data dd = {};
+
+ printf("{");
+ kernel_syms_load(&dd);
+ start_idx = bb->head - func->start;
+ dump_xlated_for_graph(&dd, bb->head, bb->tail, start_idx);
+ kernel_syms_destroy(&dd);
+ printf("}");
+ }
+
+ printf("\"];\n\n");
+}
+
+static void draw_bb_succ_edges(struct func_node *func, struct bb_node *bb)
+{
+ const char *style = "\"solid,bold\"";
+ const char *color = "black";
+ int func_idx = func->idx;
+ struct edge_node *e;
+ int weight = 10;
+
+ if (list_empty(&bb->e_succs))
+ return;
+
+ list_for_each_entry(e, &bb->e_succs, l) {
+ printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=%s, color=%s, weight=%d, constraint=true",
+ func_idx, e->src->idx, func_idx, e->dst->idx,
+ style, color, weight);
+ printf("];\n");
+ }
+}
+
+static void func_output_bb_def(struct func_node *func)
+{
+ struct bb_node *bb;
+
+ list_for_each_entry(bb, &func->bbs, l) {
+ draw_bb_node(func, bb);
+ }
+}
+
+static void func_output_edges(struct func_node *func)
+{
+ int func_idx = func->idx;
+ struct bb_node *bb;
+
+ list_for_each_entry(bb, &func->bbs, l) {
+ draw_bb_succ_edges(func, bb);
+ }
+
+ /* Add an invisible edge from ENTRY to EXIT, this is to
+ * improve the graph layout.
+ */
+ printf("\tfn_%d_bb_%d:s -> fn_%d_bb_%d:n [style=\"invis\", constraint=true];\n",
+ func_idx, ENTRY_BLOCK_INDEX, func_idx, EXIT_BLOCK_INDEX);
+}
+
+static void cfg_dump(struct cfg *cfg)
+{
+ struct func_node *func;
+
+ printf("digraph \"DOT graph for eBPF program\" {\n");
+ list_for_each_entry(func, &cfg->funcs, l) {
+ printf("subgraph \"cluster_%d\" {\n\tstyle=\"dashed\";\n\tcolor=\"black\";\n\tlabel=\"func_%d ()\";\n",
+ func->idx, func->idx);
+ func_output_bb_def(func);
+ func_output_edges(func);
+ printf("}\n");
+ }
+ printf("}\n");
+}
+
+void dump_xlated_cfg(void *buf, unsigned int len)
+{
+ struct bpf_insn *insn = buf;
+ struct cfg cfg;
+
+ memset(&cfg, 0, sizeof(cfg));
+ if (cfg_build(&cfg, insn, len))
+ return;
+
+ cfg_dump(&cfg);
+
+ cfg_destroy(&cfg);
+}
diff --git a/tools/bpf/bpftool/cfg.h b/tools/bpf/bpftool/cfg.h
new file mode 100644
index 000000000..2cc9bd990
--- /dev/null
+++ b/tools/bpf/bpftool/cfg.h
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_CFG_H
+#define __BPF_TOOL_CFG_H
+
+void dump_xlated_cfg(void *buf, unsigned int len);
+
+#endif /* __BPF_TOOL_CFG_H */
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
new file mode 100644
index 000000000..adbcd8481
--- /dev/null
+++ b/tools/bpf/bpftool/cgroup.c
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2017 Facebook
+// Author: Roman Gushchin <guro@fb.com>
+
+#define _XOPEN_SOURCE 500
+#include <errno.h>
+#include <fcntl.h>
+#include <ftw.h>
+#include <mntent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+#define HELP_SPEC_ATTACH_FLAGS \
+ "ATTACH_FLAGS := { multi | override }"
+
+#define HELP_SPEC_ATTACH_TYPES \
+ " ATTACH_TYPE := { ingress | egress | sock_create |\n" \
+ " sock_ops | device | bind4 | bind6 |\n" \
+ " post_bind4 | post_bind6 | connect4 |\n" \
+ " connect6 | sendmsg4 | sendmsg6 }"
+
+static const char * const attach_type_strings[] = {
+ [BPF_CGROUP_INET_INGRESS] = "ingress",
+ [BPF_CGROUP_INET_EGRESS] = "egress",
+ [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
+ [BPF_CGROUP_SOCK_OPS] = "sock_ops",
+ [BPF_CGROUP_DEVICE] = "device",
+ [BPF_CGROUP_INET4_BIND] = "bind4",
+ [BPF_CGROUP_INET6_BIND] = "bind6",
+ [BPF_CGROUP_INET4_CONNECT] = "connect4",
+ [BPF_CGROUP_INET6_CONNECT] = "connect6",
+ [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
+ [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
+ [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
+ [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
+ [__MAX_BPF_ATTACH_TYPE] = NULL,
+};
+
+static enum bpf_attach_type parse_attach_type(const char *str)
+{
+ enum bpf_attach_type type;
+
+ for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+ if (attach_type_strings[type] &&
+ is_prefix(str, attach_type_strings[type]))
+ return type;
+ }
+
+ return __MAX_BPF_ATTACH_TYPE;
+}
+
+static int show_bpf_prog(int id, const char *attach_type_str,
+ const char *attach_flags_str,
+ int level)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int prog_fd;
+
+ prog_fd = bpf_prog_get_fd_by_id(id);
+ if (prog_fd < 0)
+ return -1;
+
+ if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+ close(prog_fd);
+ return -1;
+ }
+
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "id", info.id);
+ jsonw_string_field(json_wtr, "attach_type",
+ attach_type_str);
+ jsonw_string_field(json_wtr, "attach_flags",
+ attach_flags_str);
+ jsonw_string_field(json_wtr, "name", info.name);
+ jsonw_end_object(json_wtr);
+ } else {
+ printf("%s%-8u %-15s %-15s %-15s\n", level ? " " : "",
+ info.id,
+ attach_type_str,
+ attach_flags_str,
+ info.name);
+ }
+
+ close(prog_fd);
+ return 0;
+}
+
+static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type)
+{
+ __u32 prog_cnt = 0;
+ int ret;
+
+ ret = bpf_prog_query(cgroup_fd, type, 0, NULL, NULL, &prog_cnt);
+ if (ret)
+ return -1;
+
+ return prog_cnt;
+}
+
+static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
+ int level)
+{
+ __u32 prog_ids[1024] = {0};
+ char *attach_flags_str;
+ __u32 prog_cnt, iter;
+ __u32 attach_flags;
+ char buf[32];
+ int ret;
+
+ prog_cnt = ARRAY_SIZE(prog_ids);
+ ret = bpf_prog_query(cgroup_fd, type, 0, &attach_flags, prog_ids,
+ &prog_cnt);
+ if (ret)
+ return ret;
+
+ if (prog_cnt == 0)
+ return 0;
+
+ switch (attach_flags) {
+ case BPF_F_ALLOW_MULTI:
+ attach_flags_str = "multi";
+ break;
+ case BPF_F_ALLOW_OVERRIDE:
+ attach_flags_str = "override";
+ break;
+ case 0:
+ attach_flags_str = "";
+ break;
+ default:
+ snprintf(buf, sizeof(buf), "unknown(%x)", attach_flags);
+ attach_flags_str = buf;
+ }
+
+ for (iter = 0; iter < prog_cnt; iter++)
+ show_bpf_prog(prog_ids[iter], attach_type_strings[type],
+ attach_flags_str, level);
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ enum bpf_attach_type type;
+ int cgroup_fd;
+ int ret = -1;
+
+ if (argc < 1) {
+ p_err("too few parameters for cgroup show");
+ goto exit;
+ } else if (argc > 1) {
+ p_err("too many parameters for cgroup show");
+ goto exit;
+ }
+
+ cgroup_fd = open(argv[0], O_RDONLY);
+ if (cgroup_fd < 0) {
+ p_err("can't open cgroup %s", argv[0]);
+ goto exit;
+ }
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ else
+ printf("%-8s %-15s %-15s %-15s\n", "ID", "AttachType",
+ "AttachFlags", "Name");
+
+ for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+ /*
+ * Not all attach types may be supported, so it's expected,
+ * that some requests will fail.
+ * If we were able to get the show for at least one
+ * attach type, let's return 0.
+ */
+ if (show_attached_bpf_progs(cgroup_fd, type, 0) == 0)
+ ret = 0;
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ close(cgroup_fd);
+exit:
+ return ret;
+}
+
+/*
+ * To distinguish nftw() errors and do_show_tree_fn() errors
+ * and avoid duplicating error messages, let's return -2
+ * from do_show_tree_fn() in case of error.
+ */
+#define NFTW_ERR -1
+#define SHOW_TREE_FN_ERR -2
+static int do_show_tree_fn(const char *fpath, const struct stat *sb,
+ int typeflag, struct FTW *ftw)
+{
+ enum bpf_attach_type type;
+ bool skip = true;
+ int cgroup_fd;
+
+ if (typeflag != FTW_D)
+ return 0;
+
+ cgroup_fd = open(fpath, O_RDONLY);
+ if (cgroup_fd < 0) {
+ p_err("can't open cgroup %s: %s", fpath, strerror(errno));
+ return SHOW_TREE_FN_ERR;
+ }
+
+ for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
+ int count = count_attached_bpf_progs(cgroup_fd, type);
+
+ if (count < 0 && errno != EINVAL) {
+ p_err("can't query bpf programs attached to %s: %s",
+ fpath, strerror(errno));
+ close(cgroup_fd);
+ return SHOW_TREE_FN_ERR;
+ }
+ if (count > 0) {
+ skip = false;
+ break;
+ }
+ }
+
+ if (skip) {
+ close(cgroup_fd);
+ return 0;
+ }
+
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_string_field(json_wtr, "cgroup", fpath);
+ jsonw_name(json_wtr, "programs");
+ jsonw_start_array(json_wtr);
+ } else {
+ printf("%s\n", fpath);
+ }
+
+ for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
+ show_attached_bpf_progs(cgroup_fd, type, ftw->level);
+
+ if (json_output) {
+ jsonw_end_array(json_wtr);
+ jsonw_end_object(json_wtr);
+ }
+
+ close(cgroup_fd);
+
+ return 0;
+}
+
+static char *find_cgroup_root(void)
+{
+ struct mntent *mnt;
+ FILE *f;
+
+ f = fopen("/proc/mounts", "r");
+ if (f == NULL)
+ return NULL;
+
+ while ((mnt = getmntent(f))) {
+ if (strcmp(mnt->mnt_type, "cgroup2") == 0) {
+ fclose(f);
+ return strdup(mnt->mnt_dir);
+ }
+ }
+
+ fclose(f);
+ return NULL;
+}
+
+static int do_show_tree(int argc, char **argv)
+{
+ char *cgroup_root;
+ int ret;
+
+ switch (argc) {
+ case 0:
+ cgroup_root = find_cgroup_root();
+ if (!cgroup_root) {
+ p_err("cgroup v2 isn't mounted");
+ return -1;
+ }
+ break;
+ case 1:
+ cgroup_root = argv[0];
+ break;
+ default:
+ p_err("too many parameters for cgroup tree");
+ return -1;
+ }
+
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ else
+ printf("%s\n"
+ "%-8s %-15s %-15s %-15s\n",
+ "CgroupPath",
+ "ID", "AttachType", "AttachFlags", "Name");
+
+ switch (nftw(cgroup_root, do_show_tree_fn, 1024, FTW_MOUNT)) {
+ case NFTW_ERR:
+ p_err("can't iterate over %s: %s", cgroup_root,
+ strerror(errno));
+ ret = -1;
+ break;
+ case SHOW_TREE_FN_ERR:
+ ret = -1;
+ break;
+ default:
+ ret = 0;
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ if (argc == 0)
+ free(cgroup_root);
+
+ return ret;
+}
+
+static int do_attach(int argc, char **argv)
+{
+ enum bpf_attach_type attach_type;
+ int cgroup_fd, prog_fd;
+ int attach_flags = 0;
+ int ret = -1;
+ int i;
+
+ if (argc < 4) {
+ p_err("too few parameters for cgroup attach");
+ goto exit;
+ }
+
+ cgroup_fd = open(argv[0], O_RDONLY);
+ if (cgroup_fd < 0) {
+ p_err("can't open cgroup %s", argv[0]);
+ goto exit;
+ }
+
+ attach_type = parse_attach_type(argv[1]);
+ if (attach_type == __MAX_BPF_ATTACH_TYPE) {
+ p_err("invalid attach type");
+ goto exit_cgroup;
+ }
+
+ argc -= 2;
+ argv = &argv[2];
+ prog_fd = prog_parse_fd(&argc, &argv);
+ if (prog_fd < 0)
+ goto exit_cgroup;
+
+ for (i = 0; i < argc; i++) {
+ if (is_prefix(argv[i], "multi")) {
+ attach_flags |= BPF_F_ALLOW_MULTI;
+ } else if (is_prefix(argv[i], "override")) {
+ attach_flags |= BPF_F_ALLOW_OVERRIDE;
+ } else {
+ p_err("unknown option: %s", argv[i]);
+ goto exit_cgroup;
+ }
+ }
+
+ if (bpf_prog_attach(prog_fd, cgroup_fd, attach_type, attach_flags)) {
+ p_err("failed to attach program");
+ goto exit_prog;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ ret = 0;
+
+exit_prog:
+ close(prog_fd);
+exit_cgroup:
+ close(cgroup_fd);
+exit:
+ return ret;
+}
+
+static int do_detach(int argc, char **argv)
+{
+ enum bpf_attach_type attach_type;
+ int prog_fd, cgroup_fd;
+ int ret = -1;
+
+ if (argc < 4) {
+ p_err("too few parameters for cgroup detach");
+ goto exit;
+ }
+
+ cgroup_fd = open(argv[0], O_RDONLY);
+ if (cgroup_fd < 0) {
+ p_err("can't open cgroup %s", argv[0]);
+ goto exit;
+ }
+
+ attach_type = parse_attach_type(argv[1]);
+ if (attach_type == __MAX_BPF_ATTACH_TYPE) {
+ p_err("invalid attach type");
+ goto exit_cgroup;
+ }
+
+ argc -= 2;
+ argv = &argv[2];
+ prog_fd = prog_parse_fd(&argc, &argv);
+ if (prog_fd < 0)
+ goto exit_cgroup;
+
+ if (bpf_prog_detach2(prog_fd, cgroup_fd, attach_type)) {
+ p_err("failed to detach program");
+ goto exit_prog;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ ret = 0;
+
+exit_prog:
+ close(prog_fd);
+exit_cgroup:
+ close(cgroup_fd);
+exit:
+ return ret;
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %s %s { show | list } CGROUP\n"
+ " %s %s tree [CGROUP_ROOT]\n"
+ " %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
+ " %s %s detach CGROUP ATTACH_TYPE PROG\n"
+ " %s %s help\n"
+ "\n"
+ HELP_SPEC_ATTACH_TYPES "\n"
+ " " HELP_SPEC_ATTACH_FLAGS "\n"
+ " " HELP_SPEC_PROGRAM "\n"
+ " " HELP_SPEC_OPTIONS "\n"
+ "",
+ bin_name, argv[-2],
+ bin_name, argv[-2], bin_name, argv[-2],
+ bin_name, argv[-2], bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "tree", do_show_tree },
+ { "attach", do_attach },
+ { "detach", do_detach },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_cgroup(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
new file mode 100644
index 000000000..7faf24ef3
--- /dev/null
+++ b/tools/bpf/bpftool/common.c
@@ -0,0 +1,626 @@
+/*
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <fts.h>
+#include <libgen.h>
+#include <mntent.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/limits.h>
+#include <linux/magic.h>
+#include <net/if.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+#ifndef BPF_FS_MAGIC
+#define BPF_FS_MAGIC 0xcafe4a11
+#endif
+
+void p_err(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "error");
+ jsonw_vprintf_enquote(json_wtr, fmt, ap);
+ jsonw_end_object(json_wtr);
+ } else {
+ fprintf(stderr, "Error: ");
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ }
+ va_end(ap);
+}
+
+void p_info(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (json_output)
+ return;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+}
+
+static bool is_bpffs(char *path)
+{
+ struct statfs st_fs;
+
+ if (statfs(path, &st_fs) < 0)
+ return false;
+
+ return (unsigned long)st_fs.f_type == BPF_FS_MAGIC;
+}
+
+static int mnt_bpffs(const char *target, char *buff, size_t bufflen)
+{
+ bool bind_done = false;
+
+ while (mount("", target, "none", MS_PRIVATE | MS_REC, NULL)) {
+ if (errno != EINVAL || bind_done) {
+ snprintf(buff, bufflen,
+ "mount --make-private %s failed: %s",
+ target, strerror(errno));
+ return -1;
+ }
+
+ if (mount(target, target, "none", MS_BIND, NULL)) {
+ snprintf(buff, bufflen,
+ "mount --bind %s %s failed: %s",
+ target, target, strerror(errno));
+ return -1;
+ }
+
+ bind_done = true;
+ }
+
+ if (mount("bpf", target, "bpf", 0, "mode=0700")) {
+ snprintf(buff, bufflen, "mount -t bpf bpf %s failed: %s",
+ target, strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+int open_obj_pinned(char *path, bool quiet)
+{
+ int fd;
+
+ fd = bpf_obj_get(path);
+ if (fd < 0) {
+ if (!quiet)
+ p_err("bpf obj get (%s): %s", path,
+ errno == EACCES && !is_bpffs(dirname(path)) ?
+ "directory not in bpf file system (bpffs)" :
+ strerror(errno));
+ return -1;
+ }
+
+ return fd;
+}
+
+int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type)
+{
+ enum bpf_obj_type type;
+ int fd;
+
+ fd = open_obj_pinned(path, false);
+ if (fd < 0)
+ return -1;
+
+ type = get_fd_type(fd);
+ if (type < 0) {
+ close(fd);
+ return type;
+ }
+ if (type != exp_type) {
+ p_err("incorrect object type: %s", get_fd_type_name(type));
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+int do_pin_fd(int fd, const char *name)
+{
+ char err_str[ERR_MAX_LEN];
+ char *file;
+ char *dir;
+ int err = 0;
+
+ err = bpf_obj_pin(fd, name);
+ if (!err)
+ goto out;
+
+ file = malloc(strlen(name) + 1);
+ if (!file) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+
+ strcpy(file, name);
+ dir = dirname(file);
+
+ if (errno != EPERM || is_bpffs(dir)) {
+ p_err("can't pin the object (%s): %s", name, strerror(errno));
+ goto out_free;
+ }
+
+ /* Attempt to mount bpffs, then retry pinning. */
+ err = mnt_bpffs(dir, err_str, ERR_MAX_LEN);
+ if (!err) {
+ err = bpf_obj_pin(fd, name);
+ if (err)
+ p_err("can't pin the object (%s): %s", name,
+ strerror(errno));
+ } else {
+ err_str[ERR_MAX_LEN - 1] = '\0';
+ p_err("can't mount BPF file system to pin the object (%s): %s",
+ name, err_str);
+ }
+
+out_free:
+ free(file);
+out:
+ return err;
+}
+
+int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
+{
+ unsigned int id;
+ char *endptr;
+ int err;
+ int fd;
+
+ if (argc < 3) {
+ p_err("too few arguments, id ID and FILE path is required");
+ return -1;
+ } else if (argc > 3) {
+ p_err("too many arguments");
+ return -1;
+ }
+
+ if (!is_prefix(*argv, "id")) {
+ p_err("expected 'id' got %s", *argv);
+ return -1;
+ }
+ NEXT_ARG();
+
+ id = strtoul(*argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as ID", *argv);
+ return -1;
+ }
+ NEXT_ARG();
+
+ fd = get_fd_by_id(id);
+ if (fd < 0) {
+ p_err("can't open object by id (%u): %s", id, strerror(errno));
+ return -1;
+ }
+
+ err = do_pin_fd(fd, *argv);
+
+ close(fd);
+ return err;
+}
+
+const char *get_fd_type_name(enum bpf_obj_type type)
+{
+ static const char * const names[] = {
+ [BPF_OBJ_UNKNOWN] = "unknown",
+ [BPF_OBJ_PROG] = "prog",
+ [BPF_OBJ_MAP] = "map",
+ };
+
+ if (type < 0 || type >= ARRAY_SIZE(names) || !names[type])
+ return names[BPF_OBJ_UNKNOWN];
+
+ return names[type];
+}
+
+int get_fd_type(int fd)
+{
+ char path[PATH_MAX];
+ char buf[512];
+ ssize_t n;
+
+ snprintf(path, sizeof(path), "/proc/%d/fd/%d", getpid(), fd);
+
+ n = readlink(path, buf, sizeof(buf));
+ if (n < 0) {
+ p_err("can't read link type: %s", strerror(errno));
+ return -1;
+ }
+ if (n == sizeof(path)) {
+ p_err("can't read link type: path too long!");
+ return -1;
+ }
+
+ if (strstr(buf, "bpf-map"))
+ return BPF_OBJ_MAP;
+ else if (strstr(buf, "bpf-prog"))
+ return BPF_OBJ_PROG;
+
+ return BPF_OBJ_UNKNOWN;
+}
+
+char *get_fdinfo(int fd, const char *key)
+{
+ char path[PATH_MAX];
+ char *line = NULL;
+ size_t line_n = 0;
+ ssize_t n;
+ FILE *fdi;
+
+ snprintf(path, sizeof(path), "/proc/%d/fdinfo/%d", getpid(), fd);
+
+ fdi = fopen(path, "r");
+ if (!fdi) {
+ p_err("can't open fdinfo: %s", strerror(errno));
+ return NULL;
+ }
+
+ while ((n = getline(&line, &line_n, fdi)) > 0) {
+ char *value;
+ int len;
+
+ if (!strstr(line, key))
+ continue;
+
+ fclose(fdi);
+
+ value = strchr(line, '\t');
+ if (!value || !value[1]) {
+ p_err("malformed fdinfo!?");
+ free(line);
+ return NULL;
+ }
+ value++;
+
+ len = strlen(value);
+ memmove(line, value, len);
+ line[len - 1] = '\0';
+
+ return line;
+ }
+
+ p_err("key '%s' not found in fdinfo", key);
+ free(line);
+ fclose(fdi);
+ return NULL;
+}
+
+void print_data_json(uint8_t *data, size_t len)
+{
+ unsigned int i;
+
+ jsonw_start_array(json_wtr);
+ for (i = 0; i < len; i++)
+ jsonw_printf(json_wtr, "%d", data[i]);
+ jsonw_end_array(json_wtr);
+}
+
+void print_hex_data_json(uint8_t *data, size_t len)
+{
+ unsigned int i;
+
+ jsonw_start_array(json_wtr);
+ for (i = 0; i < len; i++)
+ jsonw_printf(json_wtr, "\"0x%02hhx\"", data[i]);
+ jsonw_end_array(json_wtr);
+}
+
+int build_pinned_obj_table(struct pinned_obj_table *tab,
+ enum bpf_obj_type type)
+{
+ struct bpf_prog_info pinned_info = {};
+ struct pinned_obj *obj_node = NULL;
+ __u32 len = sizeof(pinned_info);
+ struct mntent *mntent = NULL;
+ enum bpf_obj_type objtype;
+ FILE *mntfile = NULL;
+ FTSENT *ftse = NULL;
+ FTS *fts = NULL;
+ int fd, err;
+
+ mntfile = setmntent("/proc/mounts", "r");
+ if (!mntfile)
+ return -1;
+
+ while ((mntent = getmntent(mntfile))) {
+ char *path[] = { mntent->mnt_dir, NULL };
+
+ if (strncmp(mntent->mnt_type, "bpf", 3) != 0)
+ continue;
+
+ fts = fts_open(path, 0, NULL);
+ if (!fts)
+ continue;
+
+ while ((ftse = fts_read(fts))) {
+ if (!(ftse->fts_info & FTS_F))
+ continue;
+ fd = open_obj_pinned(ftse->fts_path, true);
+ if (fd < 0)
+ continue;
+
+ objtype = get_fd_type(fd);
+ if (objtype != type) {
+ close(fd);
+ continue;
+ }
+ memset(&pinned_info, 0, sizeof(pinned_info));
+ err = bpf_obj_get_info_by_fd(fd, &pinned_info, &len);
+ if (err) {
+ close(fd);
+ continue;
+ }
+
+ obj_node = malloc(sizeof(*obj_node));
+ if (!obj_node) {
+ close(fd);
+ fts_close(fts);
+ fclose(mntfile);
+ return -1;
+ }
+
+ memset(obj_node, 0, sizeof(*obj_node));
+ obj_node->id = pinned_info.id;
+ obj_node->path = strdup(ftse->fts_path);
+ hash_add(tab->table, &obj_node->hash, obj_node->id);
+
+ close(fd);
+ }
+ fts_close(fts);
+ }
+ fclose(mntfile);
+ return 0;
+}
+
+void delete_pinned_obj_table(struct pinned_obj_table *tab)
+{
+ struct pinned_obj *obj;
+ struct hlist_node *tmp;
+ unsigned int bkt;
+
+ hash_for_each_safe(tab->table, bkt, tmp, obj, hash) {
+ hash_del(&obj->hash);
+ free(obj->path);
+ free(obj);
+ }
+}
+
+unsigned int get_page_size(void)
+{
+ static int result;
+
+ if (!result)
+ result = getpagesize();
+ return result;
+}
+
+unsigned int get_possible_cpus(void)
+{
+ static unsigned int result;
+ char buf[128];
+ long int n;
+ char *ptr;
+ int fd;
+
+ if (result)
+ return result;
+
+ fd = open("/sys/devices/system/cpu/possible", O_RDONLY);
+ if (fd < 0) {
+ p_err("can't open sysfs possible cpus");
+ exit(-1);
+ }
+
+ n = read(fd, buf, sizeof(buf));
+ if (n < 2) {
+ p_err("can't read sysfs possible cpus");
+ exit(-1);
+ }
+ close(fd);
+
+ if (n == sizeof(buf)) {
+ p_err("read sysfs possible cpus overflow");
+ exit(-1);
+ }
+
+ ptr = buf;
+ n = 0;
+ while (*ptr && *ptr != '\n') {
+ unsigned int a, b;
+
+ if (sscanf(ptr, "%u-%u", &a, &b) == 2) {
+ n += b - a + 1;
+
+ ptr = strchr(ptr, '-') + 1;
+ } else if (sscanf(ptr, "%u", &a) == 1) {
+ n++;
+ } else {
+ assert(0);
+ }
+
+ while (isdigit(*ptr))
+ ptr++;
+ if (*ptr == ',')
+ ptr++;
+ }
+
+ result = n;
+
+ return result;
+}
+
+static char *
+ifindex_to_name_ns(__u32 ifindex, __u32 ns_dev, __u32 ns_ino, char *buf)
+{
+ struct stat st;
+ int err;
+
+ err = stat("/proc/self/ns/net", &st);
+ if (err) {
+ p_err("Can't stat /proc/self: %s", strerror(errno));
+ return NULL;
+ }
+
+ if (st.st_dev != ns_dev || st.st_ino != ns_ino)
+ return NULL;
+
+ return if_indextoname(ifindex, buf);
+}
+
+static int read_sysfs_hex_int(char *path)
+{
+ char vendor_id_buf[8];
+ int len;
+ int fd;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ p_err("Can't open %s: %s", path, strerror(errno));
+ return -1;
+ }
+
+ len = read(fd, vendor_id_buf, sizeof(vendor_id_buf));
+ close(fd);
+ if (len < 0) {
+ p_err("Can't read %s: %s", path, strerror(errno));
+ return -1;
+ }
+ if (len >= (int)sizeof(vendor_id_buf)) {
+ p_err("Value in %s too long", path);
+ return -1;
+ }
+
+ vendor_id_buf[len] = 0;
+
+ return strtol(vendor_id_buf, NULL, 0);
+}
+
+static int read_sysfs_netdev_hex_int(char *devname, const char *entry_name)
+{
+ char full_path[64];
+
+ snprintf(full_path, sizeof(full_path), "/sys/class/net/%s/device/%s",
+ devname, entry_name);
+
+ return read_sysfs_hex_int(full_path);
+}
+
+const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino)
+{
+ char devname[IF_NAMESIZE];
+ int vendor_id;
+ int device_id;
+
+ if (!ifindex_to_name_ns(ifindex, ns_dev, ns_ino, devname)) {
+ p_err("Can't get net device name for ifindex %d: %s", ifindex,
+ strerror(errno));
+ return NULL;
+ }
+
+ vendor_id = read_sysfs_netdev_hex_int(devname, "vendor");
+ if (vendor_id < 0) {
+ p_err("Can't get device vendor id for %s", devname);
+ return NULL;
+ }
+
+ switch (vendor_id) {
+ case 0x19ee:
+ device_id = read_sysfs_netdev_hex_int(devname, "device");
+ if (device_id != 0x4000 &&
+ device_id != 0x6000 &&
+ device_id != 0x6003)
+ p_info("Unknown NFP device ID, assuming it is NFP-6xxx arch");
+ return "NFP-6xxx";
+ default:
+ p_err("Can't get bfd arch name for device vendor id 0x%04x",
+ vendor_id);
+ return NULL;
+ }
+}
+
+void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
+{
+ char name[IF_NAMESIZE];
+
+ if (!ifindex)
+ return;
+
+ printf(" dev ");
+ if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name))
+ printf("%s", name);
+ else
+ printf("ifindex %u ns_dev %llu ns_ino %llu",
+ ifindex, ns_dev, ns_inode);
+}
+
+void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode)
+{
+ char name[IF_NAMESIZE];
+
+ if (!ifindex)
+ return;
+
+ jsonw_name(json_wtr, "dev");
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "ifindex", ifindex);
+ jsonw_uint_field(json_wtr, "ns_dev", ns_dev);
+ jsonw_uint_field(json_wtr, "ns_inode", ns_inode);
+ if (ifindex_to_name_ns(ifindex, ns_dev, ns_inode, name))
+ jsonw_string_field(json_wtr, "ifname", name);
+ jsonw_end_object(json_wtr);
+}
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
new file mode 100644
index 000000000..e4cfbecf0
--- /dev/null
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -0,0 +1,188 @@
+/*
+ * Based on:
+ *
+ * Minimal BPF JIT image disassembler
+ *
+ * Disassembles BPF JIT compiler emitted opcodes back to asm insn's for
+ * debugging or verification purposes.
+ *
+ * Copyright 2013 Daniel Borkmann <daniel@iogearbox.net>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+#include <bfd.h>
+#include <dis-asm.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <limits.h>
+
+#include "json_writer.h"
+#include "main.h"
+
+static void get_exec_path(char *tpath, size_t size)
+{
+ ssize_t len;
+ char *path;
+
+ snprintf(tpath, size, "/proc/%d/exe", (int) getpid());
+ tpath[size - 1] = 0;
+
+ path = strdup(tpath);
+ assert(path);
+
+ len = readlink(path, tpath, size - 1);
+ assert(len > 0);
+ tpath[len] = 0;
+
+ free(path);
+}
+
+static int oper_count;
+static int fprintf_json(void *out, const char *fmt, ...)
+{
+ va_list ap;
+ char *s;
+ int err;
+
+ va_start(ap, fmt);
+ err = vasprintf(&s, fmt, ap);
+ va_end(ap);
+ if (err < 0)
+ return -1;
+
+ if (!oper_count) {
+ int i;
+
+ /* Strip trailing spaces */
+ i = strlen(s) - 1;
+ while (s[i] == ' ')
+ s[i--] = '\0';
+
+ jsonw_string_field(json_wtr, "operation", s);
+ jsonw_name(json_wtr, "operands");
+ jsonw_start_array(json_wtr);
+ oper_count++;
+ } else if (!strcmp(fmt, ",")) {
+ /* Skip */
+ } else {
+ jsonw_string(json_wtr, s);
+ oper_count++;
+ }
+ free(s);
+ return 0;
+}
+
+void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+ const char *arch)
+{
+ disassembler_ftype disassemble;
+ struct disassemble_info info;
+ int count, i, pc = 0;
+ char tpath[PATH_MAX];
+ bfd *bfdf;
+
+ if (!len)
+ return;
+
+ memset(tpath, 0, sizeof(tpath));
+ get_exec_path(tpath, sizeof(tpath));
+
+ bfdf = bfd_openr(tpath, NULL);
+ assert(bfdf);
+ assert(bfd_check_format(bfdf, bfd_object));
+
+ if (json_output)
+ init_disassemble_info(&info, stdout,
+ (fprintf_ftype) fprintf_json);
+ else
+ init_disassemble_info(&info, stdout,
+ (fprintf_ftype) fprintf);
+
+ /* Update architecture info for offload. */
+ if (arch) {
+ const bfd_arch_info_type *inf = bfd_scan_arch(arch);
+
+ if (inf) {
+ bfdf->arch_info = inf;
+ } else {
+ p_err("No libfd support for %s", arch);
+ return;
+ }
+ }
+
+ info.arch = bfd_get_arch(bfdf);
+ info.mach = bfd_get_mach(bfdf);
+ info.buffer = image;
+ info.buffer_length = len;
+
+ disassemble_init_for_target(&info);
+
+#ifdef DISASM_FOUR_ARGS_SIGNATURE
+ disassemble = disassembler(info.arch,
+ bfd_big_endian(bfdf),
+ info.mach,
+ bfdf);
+#else
+ disassemble = disassembler(bfdf);
+#endif
+ assert(disassemble);
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ do {
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ oper_count = 0;
+ jsonw_name(json_wtr, "pc");
+ jsonw_printf(json_wtr, "\"0x%x\"", pc);
+ } else {
+ printf("%4x:\t", pc);
+ }
+
+ count = disassemble(pc, &info);
+ if (json_output) {
+ /* Operand array, was started in fprintf_json. Before
+ * that, make sure we have a _null_ value if no operand
+ * other than operation code was present.
+ */
+ if (oper_count == 1)
+ jsonw_null(json_wtr);
+ jsonw_end_array(json_wtr);
+ }
+
+ if (opcodes) {
+ if (json_output) {
+ jsonw_name(json_wtr, "opcodes");
+ jsonw_start_array(json_wtr);
+ for (i = 0; i < count; ++i)
+ jsonw_printf(json_wtr, "\"0x%02hhx\"",
+ (uint8_t)image[pc + i]);
+ jsonw_end_array(json_wtr);
+ } else {
+ printf("\n\t");
+ for (i = 0; i < count; ++i)
+ printf("%02x ",
+ (uint8_t)image[pc + i]);
+ }
+ }
+ if (json_output)
+ jsonw_end_object(json_wtr);
+ else
+ printf("\n");
+
+ pc += count;
+ } while (count > 0 && pc < len);
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ bfd_close(bfdf);
+}
diff --git a/tools/bpf/bpftool/json_writer.c b/tools/bpf/bpftool/json_writer.c
new file mode 100644
index 000000000..c6eef7632
--- /dev/null
+++ b/tools/bpf/bpftool/json_writer.c
@@ -0,0 +1,356 @@
+/*
+ * Simple streaming JSON writer
+ *
+ * This takes care of the annoying bits of JSON syntax like the commas
+ * after elements
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <malloc.h>
+#include <inttypes.h>
+#include <stdint.h>
+
+#include "json_writer.h"
+
+struct json_writer {
+ FILE *out; /* output file */
+ unsigned depth; /* nesting */
+ bool pretty; /* optional whitepace */
+ char sep; /* either nul or comma */
+};
+
+/* indentation for pretty print */
+static void jsonw_indent(json_writer_t *self)
+{
+ unsigned i;
+ for (i = 0; i < self->depth; ++i)
+ fputs(" ", self->out);
+}
+
+/* end current line and indent if pretty printing */
+static void jsonw_eol(json_writer_t *self)
+{
+ if (!self->pretty)
+ return;
+
+ putc('\n', self->out);
+ jsonw_indent(self);
+}
+
+/* If current object is not empty print a comma */
+static void jsonw_eor(json_writer_t *self)
+{
+ if (self->sep != '\0')
+ putc(self->sep, self->out);
+ self->sep = ',';
+}
+
+
+/* Output JSON encoded string */
+/* Handles C escapes, does not do Unicode */
+static void jsonw_puts(json_writer_t *self, const char *str)
+{
+ putc('"', self->out);
+ for (; *str; ++str)
+ switch (*str) {
+ case '\t':
+ fputs("\\t", self->out);
+ break;
+ case '\n':
+ fputs("\\n", self->out);
+ break;
+ case '\r':
+ fputs("\\r", self->out);
+ break;
+ case '\f':
+ fputs("\\f", self->out);
+ break;
+ case '\b':
+ fputs("\\b", self->out);
+ break;
+ case '\\':
+ fputs("\\n", self->out);
+ break;
+ case '"':
+ fputs("\\\"", self->out);
+ break;
+ case '\'':
+ fputs("\\\'", self->out);
+ break;
+ default:
+ putc(*str, self->out);
+ }
+ putc('"', self->out);
+}
+
+/* Create a new JSON stream */
+json_writer_t *jsonw_new(FILE *f)
+{
+ json_writer_t *self = malloc(sizeof(*self));
+ if (self) {
+ self->out = f;
+ self->depth = 0;
+ self->pretty = false;
+ self->sep = '\0';
+ }
+ return self;
+}
+
+/* End output to JSON stream */
+void jsonw_destroy(json_writer_t **self_p)
+{
+ json_writer_t *self = *self_p;
+
+ assert(self->depth == 0);
+ fputs("\n", self->out);
+ fflush(self->out);
+ free(self);
+ *self_p = NULL;
+}
+
+void jsonw_pretty(json_writer_t *self, bool on)
+{
+ self->pretty = on;
+}
+
+/* Basic blocks */
+static void jsonw_begin(json_writer_t *self, int c)
+{
+ jsonw_eor(self);
+ putc(c, self->out);
+ ++self->depth;
+ self->sep = '\0';
+}
+
+static void jsonw_end(json_writer_t *self, int c)
+{
+ assert(self->depth > 0);
+
+ --self->depth;
+ if (self->sep != '\0')
+ jsonw_eol(self);
+ putc(c, self->out);
+ self->sep = ',';
+}
+
+
+/* Add a JSON property name */
+void jsonw_name(json_writer_t *self, const char *name)
+{
+ jsonw_eor(self);
+ jsonw_eol(self);
+ self->sep = '\0';
+ jsonw_puts(self, name);
+ putc(':', self->out);
+ if (self->pretty)
+ putc(' ', self->out);
+}
+
+void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap)
+{
+ jsonw_eor(self);
+ putc('"', self->out);
+ vfprintf(self->out, fmt, ap);
+ putc('"', self->out);
+}
+
+void jsonw_printf(json_writer_t *self, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ jsonw_eor(self);
+ vfprintf(self->out, fmt, ap);
+ va_end(ap);
+}
+
+/* Collections */
+void jsonw_start_object(json_writer_t *self)
+{
+ jsonw_begin(self, '{');
+}
+
+void jsonw_end_object(json_writer_t *self)
+{
+ jsonw_end(self, '}');
+}
+
+void jsonw_start_array(json_writer_t *self)
+{
+ jsonw_begin(self, '[');
+}
+
+void jsonw_end_array(json_writer_t *self)
+{
+ jsonw_end(self, ']');
+}
+
+/* JSON value types */
+void jsonw_string(json_writer_t *self, const char *value)
+{
+ jsonw_eor(self);
+ jsonw_puts(self, value);
+}
+
+void jsonw_bool(json_writer_t *self, bool val)
+{
+ jsonw_printf(self, "%s", val ? "true" : "false");
+}
+
+void jsonw_null(json_writer_t *self)
+{
+ jsonw_printf(self, "null");
+}
+
+void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num)
+{
+ jsonw_printf(self, fmt, num);
+}
+
+#ifdef notused
+void jsonw_float(json_writer_t *self, double num)
+{
+ jsonw_printf(self, "%g", num);
+}
+#endif
+
+void jsonw_hu(json_writer_t *self, unsigned short num)
+{
+ jsonw_printf(self, "%hu", num);
+}
+
+void jsonw_uint(json_writer_t *self, uint64_t num)
+{
+ jsonw_printf(self, "%"PRIu64, num);
+}
+
+void jsonw_lluint(json_writer_t *self, unsigned long long int num)
+{
+ jsonw_printf(self, "%llu", num);
+}
+
+void jsonw_int(json_writer_t *self, int64_t num)
+{
+ jsonw_printf(self, "%"PRId64, num);
+}
+
+/* Basic name/value objects */
+void jsonw_string_field(json_writer_t *self, const char *prop, const char *val)
+{
+ jsonw_name(self, prop);
+ jsonw_string(self, val);
+}
+
+void jsonw_bool_field(json_writer_t *self, const char *prop, bool val)
+{
+ jsonw_name(self, prop);
+ jsonw_bool(self, val);
+}
+
+#ifdef notused
+void jsonw_float_field(json_writer_t *self, const char *prop, double val)
+{
+ jsonw_name(self, prop);
+ jsonw_float(self, val);
+}
+#endif
+
+void jsonw_float_field_fmt(json_writer_t *self,
+ const char *prop,
+ const char *fmt,
+ double val)
+{
+ jsonw_name(self, prop);
+ jsonw_float_fmt(self, fmt, val);
+}
+
+void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num)
+{
+ jsonw_name(self, prop);
+ jsonw_uint(self, num);
+}
+
+void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num)
+{
+ jsonw_name(self, prop);
+ jsonw_hu(self, num);
+}
+
+void jsonw_lluint_field(json_writer_t *self,
+ const char *prop,
+ unsigned long long int num)
+{
+ jsonw_name(self, prop);
+ jsonw_lluint(self, num);
+}
+
+void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num)
+{
+ jsonw_name(self, prop);
+ jsonw_int(self, num);
+}
+
+void jsonw_null_field(json_writer_t *self, const char *prop)
+{
+ jsonw_name(self, prop);
+ jsonw_null(self);
+}
+
+#ifdef TEST
+int main(int argc, char **argv)
+{
+ json_writer_t *wr = jsonw_new(stdout);
+
+ jsonw_start_object(wr);
+ jsonw_pretty(wr, true);
+ jsonw_name(wr, "Vyatta");
+ jsonw_start_object(wr);
+ jsonw_string_field(wr, "url", "http://vyatta.com");
+ jsonw_uint_field(wr, "downloads", 2000000ul);
+ jsonw_float_field(wr, "stock", 8.16);
+
+ jsonw_name(wr, "ARGV");
+ jsonw_start_array(wr);
+ while (--argc)
+ jsonw_string(wr, *++argv);
+ jsonw_end_array(wr);
+
+ jsonw_name(wr, "empty");
+ jsonw_start_array(wr);
+ jsonw_end_array(wr);
+
+ jsonw_name(wr, "NIL");
+ jsonw_start_object(wr);
+ jsonw_end_object(wr);
+
+ jsonw_null_field(wr, "my_null");
+
+ jsonw_name(wr, "special chars");
+ jsonw_start_array(wr);
+ jsonw_string_field(wr, "slash", "/");
+ jsonw_string_field(wr, "newline", "\n");
+ jsonw_string_field(wr, "tab", "\t");
+ jsonw_string_field(wr, "ff", "\f");
+ jsonw_string_field(wr, "quote", "\"");
+ jsonw_string_field(wr, "tick", "\'");
+ jsonw_string_field(wr, "backslash", "\\");
+ jsonw_end_array(wr);
+
+ jsonw_end_object(wr);
+
+ jsonw_end_object(wr);
+ jsonw_destroy(&wr);
+ return 0;
+}
+
+#endif
diff --git a/tools/bpf/bpftool/json_writer.h b/tools/bpf/bpftool/json_writer.h
new file mode 100644
index 000000000..0fa2fb1b6
--- /dev/null
+++ b/tools/bpf/bpftool/json_writer.h
@@ -0,0 +1,72 @@
+/*
+ * Simple streaming JSON writer
+ *
+ * This takes care of the annoying bits of JSON syntax like the commas
+ * after elements
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _JSON_WRITER_H_
+#define _JSON_WRITER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdarg.h>
+
+/* Opaque class structure */
+typedef struct json_writer json_writer_t;
+
+/* Create a new JSON stream */
+json_writer_t *jsonw_new(FILE *f);
+/* End output to JSON stream */
+void jsonw_destroy(json_writer_t **self_p);
+
+/* Cause output to have pretty whitespace */
+void jsonw_pretty(json_writer_t *self, bool on);
+
+/* Add property name */
+void jsonw_name(json_writer_t *self, const char *name);
+
+/* Add value */
+void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap);
+void jsonw_printf(json_writer_t *self, const char *fmt, ...);
+void jsonw_string(json_writer_t *self, const char *value);
+void jsonw_bool(json_writer_t *self, bool value);
+void jsonw_float(json_writer_t *self, double number);
+void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num);
+void jsonw_uint(json_writer_t *self, uint64_t number);
+void jsonw_hu(json_writer_t *self, unsigned short number);
+void jsonw_int(json_writer_t *self, int64_t number);
+void jsonw_null(json_writer_t *self);
+void jsonw_lluint(json_writer_t *self, unsigned long long int num);
+
+/* Useful Combinations of name and value */
+void jsonw_string_field(json_writer_t *self, const char *prop, const char *val);
+void jsonw_bool_field(json_writer_t *self, const char *prop, bool value);
+void jsonw_float_field(json_writer_t *self, const char *prop, double num);
+void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num);
+void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num);
+void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num);
+void jsonw_null_field(json_writer_t *self, const char *prop);
+void jsonw_lluint_field(json_writer_t *self, const char *prop,
+ unsigned long long int num);
+void jsonw_float_field_fmt(json_writer_t *self, const char *prop,
+ const char *fmt, double val);
+
+/* Collections */
+void jsonw_start_object(json_writer_t *self);
+void jsonw_end_object(json_writer_t *self);
+
+void jsonw_start_array(json_writer_t *self);
+void jsonw_end_array(json_writer_t *self);
+
+/* Override default exception handling */
+typedef void (jsonw_err_handler_fn)(const char *);
+
+#endif /* _JSON_WRITER_H_ */
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
new file mode 100644
index 000000000..37610144f
--- /dev/null
+++ b/tools/bpf/bpftool/main.c
@@ -0,0 +1,410 @@
+/*
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <bfd.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <linux/bpf.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+#define BATCH_LINE_LEN_MAX 65536
+#define BATCH_ARG_NB_MAX 4096
+
+const char *bin_name;
+static int last_argc;
+static char **last_argv;
+static int (*last_do_help)(int argc, char **argv);
+json_writer_t *json_wtr;
+bool pretty_output;
+bool json_output;
+bool show_pinned;
+struct pinned_obj_table prog_table;
+struct pinned_obj_table map_table;
+
+static void __noreturn clean_and_exit(int i)
+{
+ if (json_output)
+ jsonw_destroy(&json_wtr);
+
+ exit(i);
+}
+
+void usage(void)
+{
+ last_do_help(last_argc - 1, last_argv + 1);
+
+ clean_and_exit(-1);
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %s [OPTIONS] OBJECT { COMMAND | help }\n"
+ " %s batch file FILE\n"
+ " %s version\n"
+ "\n"
+ " OBJECT := { prog | map | cgroup | perf }\n"
+ " " HELP_SPEC_OPTIONS "\n"
+ "",
+ bin_name, bin_name, bin_name);
+
+ return 0;
+}
+
+static int do_version(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "version");
+ jsonw_printf(json_wtr, "\"%s\"", BPFTOOL_VERSION);
+ jsonw_end_object(json_wtr);
+ } else {
+ printf("%s v%s\n", bin_name, BPFTOOL_VERSION);
+ }
+ return 0;
+}
+
+int cmd_select(const struct cmd *cmds, int argc, char **argv,
+ int (*help)(int argc, char **argv))
+{
+ unsigned int i;
+
+ last_argc = argc;
+ last_argv = argv;
+ last_do_help = help;
+
+ if (argc < 1 && cmds[0].func)
+ return cmds[0].func(argc, argv);
+
+ for (i = 0; cmds[i].func; i++)
+ if (is_prefix(*argv, cmds[i].cmd))
+ return cmds[i].func(argc - 1, argv + 1);
+
+ help(argc - 1, argv + 1);
+
+ return -1;
+}
+
+bool is_prefix(const char *pfx, const char *str)
+{
+ if (!pfx)
+ return false;
+ if (strlen(str) < strlen(pfx))
+ return false;
+
+ return !memcmp(str, pfx, strlen(pfx));
+}
+
+void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep)
+{
+ unsigned char *data = arg;
+ unsigned int i;
+
+ for (i = 0; i < n; i++) {
+ const char *pfx = "";
+
+ if (!i)
+ /* nothing */;
+ else if (!(i % 16))
+ fprintf(f, "\n");
+ else if (!(i % 8))
+ fprintf(f, " ");
+ else
+ pfx = sep;
+
+ fprintf(f, "%s%02hhx", i ? pfx : "", data[i]);
+ }
+}
+
+/* Split command line into argument vector. */
+static int make_args(char *line, char *n_argv[], int maxargs, int cmd_nb)
+{
+ static const char ws[] = " \t\r\n";
+ char *cp = line;
+ int n_argc = 0;
+
+ while (*cp) {
+ /* Skip leading whitespace. */
+ cp += strspn(cp, ws);
+
+ if (*cp == '\0')
+ break;
+
+ if (n_argc >= (maxargs - 1)) {
+ p_err("too many arguments to command %d", cmd_nb);
+ return -1;
+ }
+
+ /* Word begins with quote. */
+ if (*cp == '\'' || *cp == '"') {
+ char quote = *cp++;
+
+ n_argv[n_argc++] = cp;
+ /* Find ending quote. */
+ cp = strchr(cp, quote);
+ if (!cp) {
+ p_err("unterminated quoted string in command %d",
+ cmd_nb);
+ return -1;
+ }
+ } else {
+ n_argv[n_argc++] = cp;
+
+ /* Find end of word. */
+ cp += strcspn(cp, ws);
+ if (*cp == '\0')
+ break;
+ }
+
+ /* Separate words. */
+ *cp++ = 0;
+ }
+ n_argv[n_argc] = NULL;
+
+ return n_argc;
+}
+
+static int do_batch(int argc, char **argv);
+
+static const struct cmd cmds[] = {
+ { "help", do_help },
+ { "batch", do_batch },
+ { "prog", do_prog },
+ { "map", do_map },
+ { "cgroup", do_cgroup },
+ { "perf", do_perf },
+ { "version", do_version },
+ { 0 }
+};
+
+static int do_batch(int argc, char **argv)
+{
+ char buf[BATCH_LINE_LEN_MAX], contline[BATCH_LINE_LEN_MAX];
+ char *n_argv[BATCH_ARG_NB_MAX];
+ unsigned int lines = 0;
+ int n_argc;
+ FILE *fp;
+ char *cp;
+ int err;
+ int i;
+
+ if (argc < 2) {
+ p_err("too few parameters for batch");
+ return -1;
+ } else if (!is_prefix(*argv, "file")) {
+ p_err("expected 'file', got: %s", *argv);
+ return -1;
+ } else if (argc > 2) {
+ p_err("too many parameters for batch");
+ return -1;
+ }
+ NEXT_ARG();
+
+ if (!strcmp(*argv, "-"))
+ fp = stdin;
+ else
+ fp = fopen(*argv, "r");
+ if (!fp) {
+ p_err("Can't open file (%s): %s", *argv, strerror(errno));
+ return -1;
+ }
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ while (fgets(buf, sizeof(buf), fp)) {
+ cp = strchr(buf, '#');
+ if (cp)
+ *cp = '\0';
+
+ if (strlen(buf) == sizeof(buf) - 1) {
+ errno = E2BIG;
+ break;
+ }
+
+ /* Append continuation lines if any (coming after a line ending
+ * with '\' in the batch file).
+ */
+ while ((cp = strstr(buf, "\\\n")) != NULL) {
+ if (!fgets(contline, sizeof(contline), fp) ||
+ strlen(contline) == 0) {
+ p_err("missing continuation line on command %d",
+ lines);
+ err = -1;
+ goto err_close;
+ }
+
+ cp = strchr(contline, '#');
+ if (cp)
+ *cp = '\0';
+
+ if (strlen(buf) + strlen(contline) + 1 > sizeof(buf)) {
+ p_err("command %d is too long", lines);
+ err = -1;
+ goto err_close;
+ }
+ buf[strlen(buf) - 2] = '\0';
+ strcat(buf, contline);
+ }
+
+ n_argc = make_args(buf, n_argv, BATCH_ARG_NB_MAX, lines);
+ if (!n_argc)
+ continue;
+ if (n_argc < 0) {
+ err = n_argc;
+ goto err_close;
+ }
+
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "command");
+ jsonw_start_array(json_wtr);
+ for (i = 0; i < n_argc; i++)
+ jsonw_string(json_wtr, n_argv[i]);
+ jsonw_end_array(json_wtr);
+ jsonw_name(json_wtr, "output");
+ }
+
+ err = cmd_select(cmds, n_argc, n_argv, do_help);
+
+ if (json_output)
+ jsonw_end_object(json_wtr);
+
+ if (err)
+ goto err_close;
+
+ lines++;
+ }
+
+ if (errno && errno != ENOENT) {
+ p_err("reading batch file failed: %s", strerror(errno));
+ err = -1;
+ } else {
+ p_info("processed %d commands", lines);
+ err = 0;
+ }
+err_close:
+ if (fp != stdin)
+ fclose(fp);
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ static const struct option options[] = {
+ { "json", no_argument, NULL, 'j' },
+ { "help", no_argument, NULL, 'h' },
+ { "pretty", no_argument, NULL, 'p' },
+ { "version", no_argument, NULL, 'V' },
+ { "bpffs", no_argument, NULL, 'f' },
+ { 0 }
+ };
+ int opt, ret;
+
+ last_do_help = do_help;
+ pretty_output = false;
+ json_output = false;
+ show_pinned = false;
+ bin_name = argv[0];
+
+ hash_init(prog_table.table);
+ hash_init(map_table.table);
+
+ opterr = 0;
+ while ((opt = getopt_long(argc, argv, "Vhpjf",
+ options, NULL)) >= 0) {
+ switch (opt) {
+ case 'V':
+ return do_version(argc, argv);
+ case 'h':
+ return do_help(argc, argv);
+ case 'p':
+ pretty_output = true;
+ /* fall through */
+ case 'j':
+ if (!json_output) {
+ json_wtr = jsonw_new(stdout);
+ if (!json_wtr) {
+ p_err("failed to create JSON writer");
+ return -1;
+ }
+ json_output = true;
+ }
+ jsonw_pretty(json_wtr, pretty_output);
+ break;
+ case 'f':
+ show_pinned = true;
+ break;
+ default:
+ p_err("unrecognized option '%s'", argv[optind - 1]);
+ if (json_output)
+ clean_and_exit(-1);
+ else
+ usage();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+ if (argc < 0)
+ usage();
+
+ bfd_init();
+
+ ret = cmd_select(cmds, argc, argv, do_help);
+
+ if (json_output)
+ jsonw_destroy(&json_wtr);
+
+ if (show_pinned) {
+ delete_pinned_obj_table(&prog_table);
+ delete_pinned_obj_table(&map_table);
+ }
+
+ return ret;
+}
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
new file mode 100644
index 000000000..057a227bd
--- /dev/null
+++ b/tools/bpf/bpftool/main.h
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __BPF_TOOL_H
+#define __BPF_TOOL_H
+
+/* BFD and kernel.h both define GCC_VERSION, differently */
+#undef GCC_VERSION
+#include <stdbool.h>
+#include <stdio.h>
+#include <linux/bpf.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/hashtable.h>
+#include <tools/libc_compat.h>
+
+#include "json_writer.h"
+
+#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
+
+#define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); })
+#define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); })
+#define BAD_ARG() ({ p_err("what is '%s'?", *argv); -1; })
+#define GET_ARG() ({ argc--; *argv++; })
+#define REQ_ARGS(cnt) \
+ ({ \
+ int _cnt = (cnt); \
+ bool _res; \
+ \
+ if (argc < _cnt) { \
+ p_err("'%s' needs at least %d arguments, %d found", \
+ argv[-1], _cnt, argc); \
+ _res = false; \
+ } else { \
+ _res = true; \
+ } \
+ _res; \
+ })
+
+#define ERR_MAX_LEN 1024
+
+#define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
+
+#define HELP_SPEC_PROGRAM \
+ "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }"
+#define HELP_SPEC_OPTIONS \
+ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} }"
+#define HELP_SPEC_MAP \
+ "MAP := { id MAP_ID | pinned FILE }"
+
+enum bpf_obj_type {
+ BPF_OBJ_UNKNOWN,
+ BPF_OBJ_PROG,
+ BPF_OBJ_MAP,
+};
+
+extern const char *bin_name;
+
+extern json_writer_t *json_wtr;
+extern bool json_output;
+extern bool show_pinned;
+extern struct pinned_obj_table prog_table;
+extern struct pinned_obj_table map_table;
+
+void p_err(const char *fmt, ...);
+void p_info(const char *fmt, ...);
+
+bool is_prefix(const char *pfx, const char *str);
+void fprint_hex(FILE *f, void *arg, unsigned int n, const char *sep);
+void usage(void) __noreturn;
+
+struct pinned_obj_table {
+ DECLARE_HASHTABLE(table, 16);
+};
+
+struct pinned_obj {
+ __u32 id;
+ char *path;
+ struct hlist_node hash;
+};
+
+int build_pinned_obj_table(struct pinned_obj_table *table,
+ enum bpf_obj_type type);
+void delete_pinned_obj_table(struct pinned_obj_table *tab);
+void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
+void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
+
+struct cmd {
+ const char *cmd;
+ int (*func)(int argc, char **argv);
+};
+
+int cmd_select(const struct cmd *cmds, int argc, char **argv,
+ int (*help)(int argc, char **argv));
+
+int get_fd_type(int fd);
+const char *get_fd_type_name(enum bpf_obj_type type);
+char *get_fdinfo(int fd, const char *key);
+int open_obj_pinned(char *path, bool quiet);
+int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
+int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
+int do_pin_fd(int fd, const char *name);
+
+int do_prog(int argc, char **arg);
+int do_map(int argc, char **arg);
+int do_event_pipe(int argc, char **argv);
+int do_cgroup(int argc, char **arg);
+int do_perf(int argc, char **arg);
+
+int prog_parse_fd(int *argc, char ***argv);
+int map_parse_fd(int *argc, char ***argv);
+int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
+
+void disasm_print_insn(unsigned char *image, ssize_t len, int opcodes,
+ const char *arch);
+void print_data_json(uint8_t *data, size_t len);
+void print_hex_data_json(uint8_t *data, size_t len);
+
+unsigned int get_page_size(void);
+unsigned int get_possible_cpus(void);
+const char *ifindex_to_bfd_name_ns(__u32 ifindex, __u64 ns_dev, __u64 ns_ino);
+
+struct btf_dumper {
+ const struct btf *btf;
+ json_writer_t *jw;
+ bool is_plain_text;
+};
+
+/* btf_dumper_type - print data along with type information
+ * @d: an instance containing context for dumping types
+ * @type_id: index in btf->types array. this points to the type to be dumped
+ * @data: pointer the actual data, i.e. the values to be printed
+ *
+ * Returns zero on success and negative error code otherwise
+ */
+int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
+ const void *data);
+#endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
new file mode 100644
index 000000000..ec73d83d0
--- /dev/null
+++ b/tools/bpf/bpftool/map.c
@@ -0,0 +1,1065 @@
+/*
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <bpf.h>
+
+#include "btf.h"
+#include "json_writer.h"
+#include "main.h"
+
+static const char * const map_type_name[] = {
+ [BPF_MAP_TYPE_UNSPEC] = "unspec",
+ [BPF_MAP_TYPE_HASH] = "hash",
+ [BPF_MAP_TYPE_ARRAY] = "array",
+ [BPF_MAP_TYPE_PROG_ARRAY] = "prog_array",
+ [BPF_MAP_TYPE_PERF_EVENT_ARRAY] = "perf_event_array",
+ [BPF_MAP_TYPE_PERCPU_HASH] = "percpu_hash",
+ [BPF_MAP_TYPE_PERCPU_ARRAY] = "percpu_array",
+ [BPF_MAP_TYPE_STACK_TRACE] = "stack_trace",
+ [BPF_MAP_TYPE_CGROUP_ARRAY] = "cgroup_array",
+ [BPF_MAP_TYPE_LRU_HASH] = "lru_hash",
+ [BPF_MAP_TYPE_LRU_PERCPU_HASH] = "lru_percpu_hash",
+ [BPF_MAP_TYPE_LPM_TRIE] = "lpm_trie",
+ [BPF_MAP_TYPE_ARRAY_OF_MAPS] = "array_of_maps",
+ [BPF_MAP_TYPE_HASH_OF_MAPS] = "hash_of_maps",
+ [BPF_MAP_TYPE_DEVMAP] = "devmap",
+ [BPF_MAP_TYPE_SOCKMAP] = "sockmap",
+ [BPF_MAP_TYPE_CPUMAP] = "cpumap",
+ [BPF_MAP_TYPE_XSKMAP] = "xskmap",
+ [BPF_MAP_TYPE_SOCKHASH] = "sockhash",
+ [BPF_MAP_TYPE_CGROUP_STORAGE] = "cgroup_storage",
+};
+
+static bool map_is_per_cpu(__u32 type)
+{
+ return type == BPF_MAP_TYPE_PERCPU_HASH ||
+ type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+ type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
+static bool map_is_map_of_maps(__u32 type)
+{
+ return type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+ type == BPF_MAP_TYPE_HASH_OF_MAPS;
+}
+
+static bool map_is_map_of_progs(__u32 type)
+{
+ return type == BPF_MAP_TYPE_PROG_ARRAY;
+}
+
+static void *alloc_value(struct bpf_map_info *info)
+{
+ if (map_is_per_cpu(info->type))
+ return malloc(round_up(info->value_size, 8) *
+ get_possible_cpus());
+ else
+ return malloc(info->value_size);
+}
+
+int map_parse_fd(int *argc, char ***argv)
+{
+ int fd;
+
+ if (is_prefix(**argv, "id")) {
+ unsigned int id;
+ char *endptr;
+
+ NEXT_ARGP();
+
+ id = strtoul(**argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as ID", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0)
+ p_err("get map by id (%u): %s", id, strerror(errno));
+ return fd;
+ } else if (is_prefix(**argv, "pinned")) {
+ char *path;
+
+ NEXT_ARGP();
+
+ path = **argv;
+ NEXT_ARGP();
+
+ return open_obj_pinned_any(path, BPF_OBJ_MAP);
+ }
+
+ p_err("expected 'id' or 'pinned', got: '%s'?", **argv);
+ return -1;
+}
+
+int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
+{
+ int err;
+ int fd;
+
+ fd = map_parse_fd(argc, argv);
+ if (fd < 0)
+ return -1;
+
+ err = bpf_obj_get_info_by_fd(fd, info, info_len);
+ if (err) {
+ p_err("can't get map info: %s", strerror(errno));
+ close(fd);
+ return err;
+ }
+
+ return fd;
+}
+
+static int do_dump_btf(const struct btf_dumper *d,
+ struct bpf_map_info *map_info, void *key,
+ void *value)
+{
+ int ret;
+
+ /* start of key-value pair */
+ jsonw_start_object(d->jw);
+
+ jsonw_name(d->jw, "key");
+
+ ret = btf_dumper_type(d, map_info->btf_key_type_id, key);
+ if (ret)
+ goto err_end_obj;
+
+ jsonw_name(d->jw, "value");
+
+ ret = btf_dumper_type(d, map_info->btf_value_type_id, value);
+
+err_end_obj:
+ /* end of key-value pair */
+ jsonw_end_object(d->jw);
+
+ return ret;
+}
+
+static int get_btf(struct bpf_map_info *map_info, struct btf **btf)
+{
+ struct bpf_btf_info btf_info = { 0 };
+ __u32 len = sizeof(btf_info);
+ __u32 last_size;
+ int btf_fd;
+ void *ptr;
+ int err;
+
+ err = 0;
+ *btf = NULL;
+ btf_fd = bpf_btf_get_fd_by_id(map_info->btf_id);
+ if (btf_fd < 0)
+ return 0;
+
+ /* we won't know btf_size until we call bpf_obj_get_info_by_fd(). so
+ * let's start with a sane default - 4KiB here - and resize it only if
+ * bpf_obj_get_info_by_fd() needs a bigger buffer.
+ */
+ btf_info.btf_size = 4096;
+ last_size = btf_info.btf_size;
+ ptr = malloc(last_size);
+ if (!ptr) {
+ err = -ENOMEM;
+ goto exit_free;
+ }
+
+ bzero(ptr, last_size);
+ btf_info.btf = ptr_to_u64(ptr);
+ err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+
+ if (!err && btf_info.btf_size > last_size) {
+ void *temp_ptr;
+
+ last_size = btf_info.btf_size;
+ temp_ptr = realloc(ptr, last_size);
+ if (!temp_ptr) {
+ err = -ENOMEM;
+ goto exit_free;
+ }
+ ptr = temp_ptr;
+ bzero(ptr, last_size);
+ btf_info.btf = ptr_to_u64(ptr);
+ err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len);
+ }
+
+ if (err || btf_info.btf_size > last_size) {
+ err = errno;
+ goto exit_free;
+ }
+
+ *btf = btf__new((__u8 *)btf_info.btf, btf_info.btf_size, NULL);
+ if (IS_ERR(*btf)) {
+ err = PTR_ERR(*btf);
+ *btf = NULL;
+ }
+
+exit_free:
+ close(btf_fd);
+ free(ptr);
+
+ return err;
+}
+
+static json_writer_t *get_btf_writer(void)
+{
+ json_writer_t *jw = jsonw_new(stdout);
+
+ if (!jw)
+ return NULL;
+ jsonw_pretty(jw, true);
+
+ return jw;
+}
+
+static void print_entry_json(struct bpf_map_info *info, unsigned char *key,
+ unsigned char *value, struct btf *btf)
+{
+ jsonw_start_object(json_wtr);
+
+ if (!map_is_per_cpu(info->type)) {
+ jsonw_name(json_wtr, "key");
+ print_hex_data_json(key, info->key_size);
+ jsonw_name(json_wtr, "value");
+ print_hex_data_json(value, info->value_size);
+ if (btf) {
+ struct btf_dumper d = {
+ .btf = btf,
+ .jw = json_wtr,
+ .is_plain_text = false,
+ };
+
+ jsonw_name(json_wtr, "formatted");
+ do_dump_btf(&d, info, key, value);
+ }
+ } else {
+ unsigned int i, n, step;
+
+ n = get_possible_cpus();
+ step = round_up(info->value_size, 8);
+
+ jsonw_name(json_wtr, "key");
+ print_hex_data_json(key, info->key_size);
+
+ jsonw_name(json_wtr, "values");
+ jsonw_start_array(json_wtr);
+ for (i = 0; i < n; i++) {
+ jsonw_start_object(json_wtr);
+
+ jsonw_int_field(json_wtr, "cpu", i);
+
+ jsonw_name(json_wtr, "value");
+ print_hex_data_json(value + i * step,
+ info->value_size);
+
+ jsonw_end_object(json_wtr);
+ }
+ jsonw_end_array(json_wtr);
+ }
+
+ jsonw_end_object(json_wtr);
+}
+
+static void print_entry_plain(struct bpf_map_info *info, unsigned char *key,
+ unsigned char *value)
+{
+ if (!map_is_per_cpu(info->type)) {
+ bool single_line, break_names;
+
+ break_names = info->key_size > 16 || info->value_size > 16;
+ single_line = info->key_size + info->value_size <= 24 &&
+ !break_names;
+
+ printf("key:%c", break_names ? '\n' : ' ');
+ fprint_hex(stdout, key, info->key_size, " ");
+
+ printf(single_line ? " " : "\n");
+
+ printf("value:%c", break_names ? '\n' : ' ');
+ fprint_hex(stdout, value, info->value_size, " ");
+
+ printf("\n");
+ } else {
+ unsigned int i, n, step;
+
+ n = get_possible_cpus();
+ step = round_up(info->value_size, 8);
+
+ printf("key:\n");
+ fprint_hex(stdout, key, info->key_size, " ");
+ printf("\n");
+ for (i = 0; i < n; i++) {
+ printf("value (CPU %02d):%c",
+ i, info->value_size > 16 ? '\n' : ' ');
+ fprint_hex(stdout, value + i * step,
+ info->value_size, " ");
+ printf("\n");
+ }
+ }
+}
+
+static char **parse_bytes(char **argv, const char *name, unsigned char *val,
+ unsigned int n)
+{
+ unsigned int i = 0, base = 0;
+ char *endptr;
+
+ if (is_prefix(*argv, "hex")) {
+ base = 16;
+ argv++;
+ }
+
+ while (i < n && argv[i]) {
+ val[i] = strtoul(argv[i], &endptr, base);
+ if (*endptr) {
+ p_err("error parsing byte: %s", argv[i]);
+ return NULL;
+ }
+ i++;
+ }
+
+ if (i != n) {
+ p_err("%s expected %d bytes got %d", name, n, i);
+ return NULL;
+ }
+
+ return argv + i;
+}
+
+/* on per cpu maps we must copy the provided value on all value instances */
+static void fill_per_cpu_value(struct bpf_map_info *info, void *value)
+{
+ unsigned int i, n, step;
+
+ if (!map_is_per_cpu(info->type))
+ return;
+
+ n = get_possible_cpus();
+ step = round_up(info->value_size, 8);
+ for (i = 1; i < n; i++)
+ memcpy(value + i * step, value, info->value_size);
+}
+
+static int parse_elem(char **argv, struct bpf_map_info *info,
+ void *key, void *value, __u32 key_size, __u32 value_size,
+ __u32 *flags, __u32 **value_fd)
+{
+ if (!*argv) {
+ if (!key && !value)
+ return 0;
+ p_err("did not find %s", key ? "key" : "value");
+ return -1;
+ }
+
+ if (is_prefix(*argv, "key")) {
+ if (!key) {
+ if (key_size)
+ p_err("duplicate key");
+ else
+ p_err("unnecessary key");
+ return -1;
+ }
+
+ argv = parse_bytes(argv + 1, "key", key, key_size);
+ if (!argv)
+ return -1;
+
+ return parse_elem(argv, info, NULL, value, key_size, value_size,
+ flags, value_fd);
+ } else if (is_prefix(*argv, "value")) {
+ int fd;
+
+ if (!value) {
+ if (value_size)
+ p_err("duplicate value");
+ else
+ p_err("unnecessary value");
+ return -1;
+ }
+
+ argv++;
+
+ if (map_is_map_of_maps(info->type)) {
+ int argc = 2;
+
+ if (value_size != 4) {
+ p_err("value smaller than 4B for map in map?");
+ return -1;
+ }
+ if (!argv[0] || !argv[1]) {
+ p_err("not enough value arguments for map in map");
+ return -1;
+ }
+
+ fd = map_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return -1;
+
+ *value_fd = value;
+ **value_fd = fd;
+ } else if (map_is_map_of_progs(info->type)) {
+ int argc = 2;
+
+ if (value_size != 4) {
+ p_err("value smaller than 4B for map of progs?");
+ return -1;
+ }
+ if (!argv[0] || !argv[1]) {
+ p_err("not enough value arguments for map of progs");
+ return -1;
+ }
+
+ fd = prog_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return -1;
+
+ *value_fd = value;
+ **value_fd = fd;
+ } else {
+ argv = parse_bytes(argv, "value", value, value_size);
+ if (!argv)
+ return -1;
+
+ fill_per_cpu_value(info, value);
+ }
+
+ return parse_elem(argv, info, key, NULL, key_size, value_size,
+ flags, NULL);
+ } else if (is_prefix(*argv, "any") || is_prefix(*argv, "noexist") ||
+ is_prefix(*argv, "exist")) {
+ if (!flags) {
+ p_err("flags specified multiple times: %s", *argv);
+ return -1;
+ }
+
+ if (is_prefix(*argv, "any"))
+ *flags = BPF_ANY;
+ else if (is_prefix(*argv, "noexist"))
+ *flags = BPF_NOEXIST;
+ else if (is_prefix(*argv, "exist"))
+ *flags = BPF_EXIST;
+
+ return parse_elem(argv + 1, info, key, value, key_size,
+ value_size, NULL, value_fd);
+ }
+
+ p_err("expected key or value, got: %s", *argv);
+ return -1;
+}
+
+static int show_map_close_json(int fd, struct bpf_map_info *info)
+{
+ char *memlock;
+
+ memlock = get_fdinfo(fd, "memlock");
+ close(fd);
+
+ jsonw_start_object(json_wtr);
+
+ jsonw_uint_field(json_wtr, "id", info->id);
+ if (info->type < ARRAY_SIZE(map_type_name))
+ jsonw_string_field(json_wtr, "type",
+ map_type_name[info->type]);
+ else
+ jsonw_uint_field(json_wtr, "type", info->type);
+
+ if (*info->name)
+ jsonw_string_field(json_wtr, "name", info->name);
+
+ jsonw_name(json_wtr, "flags");
+ jsonw_printf(json_wtr, "%d", info->map_flags);
+
+ print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
+
+ jsonw_uint_field(json_wtr, "bytes_key", info->key_size);
+ jsonw_uint_field(json_wtr, "bytes_value", info->value_size);
+ jsonw_uint_field(json_wtr, "max_entries", info->max_entries);
+
+ if (memlock)
+ jsonw_int_field(json_wtr, "bytes_memlock", atoi(memlock));
+ free(memlock);
+
+ if (!hash_empty(map_table.table)) {
+ struct pinned_obj *obj;
+
+ jsonw_name(json_wtr, "pinned");
+ jsonw_start_array(json_wtr);
+ hash_for_each_possible(map_table.table, obj, hash, info->id) {
+ if (obj->id == info->id)
+ jsonw_string(json_wtr, obj->path);
+ }
+ jsonw_end_array(json_wtr);
+ }
+
+ jsonw_end_object(json_wtr);
+
+ return 0;
+}
+
+static int show_map_close_plain(int fd, struct bpf_map_info *info)
+{
+ char *memlock;
+
+ memlock = get_fdinfo(fd, "memlock");
+ close(fd);
+
+ printf("%u: ", info->id);
+ if (info->type < ARRAY_SIZE(map_type_name))
+ printf("%s ", map_type_name[info->type]);
+ else
+ printf("type %u ", info->type);
+
+ if (*info->name)
+ printf("name %s ", info->name);
+
+ printf("flags 0x%x", info->map_flags);
+ print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
+ printf("\n");
+ printf("\tkey %uB value %uB max_entries %u",
+ info->key_size, info->value_size, info->max_entries);
+
+ if (memlock)
+ printf(" memlock %sB", memlock);
+ free(memlock);
+
+ printf("\n");
+ if (!hash_empty(map_table.table)) {
+ struct pinned_obj *obj;
+
+ hash_for_each_possible(map_table.table, obj, hash, info->id) {
+ if (obj->id == info->id)
+ printf("\tpinned %s\n", obj->path);
+ }
+ }
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ __u32 id = 0;
+ int err;
+ int fd;
+
+ if (show_pinned)
+ build_pinned_obj_table(&map_table, BPF_OBJ_MAP);
+
+ if (argc == 2) {
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ if (fd < 0)
+ return -1;
+
+ if (json_output)
+ return show_map_close_json(fd, &info);
+ else
+ return show_map_close_plain(fd, &info);
+ }
+
+ if (argc)
+ return BAD_ARG();
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ while (true) {
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ break;
+ p_err("can't get next map: %s%s", strerror(errno),
+ errno == EINVAL ? " -- kernel too old?" : "");
+ break;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ p_err("can't get map by id (%u): %s",
+ id, strerror(errno));
+ break;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get map info: %s", strerror(errno));
+ close(fd);
+ break;
+ }
+
+ if (json_output)
+ show_map_close_json(fd, &info);
+ else
+ show_map_close_plain(fd, &info);
+ }
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ return errno == ENOENT ? 0 : -1;
+}
+
+static int do_dump(int argc, char **argv)
+{
+ struct bpf_map_info info = {};
+ void *key, *value, *prev_key;
+ unsigned int num_elems = 0;
+ __u32 len = sizeof(info);
+ json_writer_t *btf_wtr;
+ struct btf *btf = NULL;
+ int err;
+ int fd;
+
+ if (argc != 2)
+ usage();
+
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ if (fd < 0)
+ return -1;
+
+ if (map_is_map_of_maps(info.type) || map_is_map_of_progs(info.type)) {
+ p_err("Dumping maps of maps and program maps not supported");
+ close(fd);
+ return -1;
+ }
+
+ key = malloc(info.key_size);
+ value = alloc_value(&info);
+ if (!key || !value) {
+ p_err("mem alloc failed");
+ err = -1;
+ goto exit_free;
+ }
+
+ prev_key = NULL;
+
+ err = get_btf(&info, &btf);
+ if (err) {
+ p_err("failed to get btf");
+ goto exit_free;
+ }
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ else
+ if (btf) {
+ btf_wtr = get_btf_writer();
+ if (!btf_wtr) {
+ p_info("failed to create json writer for btf. falling back to plain output");
+ btf__free(btf);
+ btf = NULL;
+ } else {
+ jsonw_start_array(btf_wtr);
+ }
+ }
+
+ while (true) {
+ err = bpf_map_get_next_key(fd, prev_key, key);
+ if (err) {
+ if (errno == ENOENT)
+ err = 0;
+ break;
+ }
+
+ if (!bpf_map_lookup_elem(fd, key, value)) {
+ if (json_output)
+ print_entry_json(&info, key, value, btf);
+ else
+ if (btf) {
+ struct btf_dumper d = {
+ .btf = btf,
+ .jw = btf_wtr,
+ .is_plain_text = true,
+ };
+
+ do_dump_btf(&d, &info, key, value);
+ } else {
+ print_entry_plain(&info, key, value);
+ }
+ } else {
+ if (json_output) {
+ jsonw_name(json_wtr, "key");
+ print_hex_data_json(key, info.key_size);
+ jsonw_name(json_wtr, "value");
+ jsonw_start_object(json_wtr);
+ jsonw_string_field(json_wtr, "error",
+ "can't lookup element");
+ jsonw_end_object(json_wtr);
+ } else {
+ p_info("can't lookup element with key: ");
+ fprint_hex(stderr, key, info.key_size, " ");
+ fprintf(stderr, "\n");
+ }
+ }
+
+ prev_key = key;
+ num_elems++;
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+ else if (btf) {
+ jsonw_end_array(btf_wtr);
+ jsonw_destroy(&btf_wtr);
+ } else {
+ printf("Found %u element%s\n", num_elems,
+ num_elems != 1 ? "s" : "");
+ }
+
+exit_free:
+ free(key);
+ free(value);
+ close(fd);
+ btf__free(btf);
+
+ return err;
+}
+
+static int do_update(int argc, char **argv)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ __u32 *value_fd = NULL;
+ __u32 flags = BPF_ANY;
+ void *key, *value;
+ int fd, err;
+
+ if (argc < 2)
+ usage();
+
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ if (fd < 0)
+ return -1;
+
+ key = malloc(info.key_size);
+ value = alloc_value(&info);
+ if (!key || !value) {
+ p_err("mem alloc failed");
+ err = -1;
+ goto exit_free;
+ }
+
+ err = parse_elem(argv, &info, key, value, info.key_size,
+ info.value_size, &flags, &value_fd);
+ if (err)
+ goto exit_free;
+
+ err = bpf_map_update_elem(fd, key, value, flags);
+ if (err) {
+ p_err("update failed: %s", strerror(errno));
+ goto exit_free;
+ }
+
+exit_free:
+ if (value_fd)
+ close(*value_fd);
+ free(key);
+ free(value);
+ close(fd);
+
+ if (!err && json_output)
+ jsonw_null(json_wtr);
+ return err;
+}
+
+static int do_lookup(int argc, char **argv)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ json_writer_t *btf_wtr;
+ struct btf *btf = NULL;
+ void *key, *value;
+ int err;
+ int fd;
+
+ if (argc < 2)
+ usage();
+
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ if (fd < 0)
+ return -1;
+
+ key = malloc(info.key_size);
+ value = alloc_value(&info);
+ if (!key || !value) {
+ p_err("mem alloc failed");
+ err = -1;
+ goto exit_free;
+ }
+
+ err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL);
+ if (err)
+ goto exit_free;
+
+ err = bpf_map_lookup_elem(fd, key, value);
+ if (err) {
+ if (errno == ENOENT) {
+ if (json_output) {
+ jsonw_null(json_wtr);
+ } else {
+ printf("key:\n");
+ fprint_hex(stdout, key, info.key_size, " ");
+ printf("\n\nNot found\n");
+ }
+ } else {
+ p_err("lookup failed: %s", strerror(errno));
+ }
+
+ goto exit_free;
+ }
+
+ /* here means bpf_map_lookup_elem() succeeded */
+ err = get_btf(&info, &btf);
+ if (err) {
+ p_err("failed to get btf");
+ goto exit_free;
+ }
+
+ if (json_output) {
+ print_entry_json(&info, key, value, btf);
+ } else if (btf) {
+ /* if here json_wtr wouldn't have been initialised,
+ * so let's create separate writer for btf
+ */
+ btf_wtr = get_btf_writer();
+ if (!btf_wtr) {
+ p_info("failed to create json writer for btf. falling back to plain output");
+ btf__free(btf);
+ btf = NULL;
+ print_entry_plain(&info, key, value);
+ } else {
+ struct btf_dumper d = {
+ .btf = btf,
+ .jw = btf_wtr,
+ .is_plain_text = true,
+ };
+
+ do_dump_btf(&d, &info, key, value);
+ jsonw_destroy(&btf_wtr);
+ }
+ } else {
+ print_entry_plain(&info, key, value);
+ }
+
+exit_free:
+ free(key);
+ free(value);
+ close(fd);
+ btf__free(btf);
+
+ return err;
+}
+
+static int do_getnext(int argc, char **argv)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ void *key, *nextkey;
+ int err;
+ int fd;
+
+ if (argc < 2)
+ usage();
+
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ if (fd < 0)
+ return -1;
+
+ key = malloc(info.key_size);
+ nextkey = malloc(info.key_size);
+ if (!key || !nextkey) {
+ p_err("mem alloc failed");
+ err = -1;
+ goto exit_free;
+ }
+
+ if (argc) {
+ err = parse_elem(argv, &info, key, NULL, info.key_size, 0,
+ NULL, NULL);
+ if (err)
+ goto exit_free;
+ } else {
+ free(key);
+ key = NULL;
+ }
+
+ err = bpf_map_get_next_key(fd, key, nextkey);
+ if (err) {
+ p_err("can't get next key: %s", strerror(errno));
+ goto exit_free;
+ }
+
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ if (key) {
+ jsonw_name(json_wtr, "key");
+ print_hex_data_json(key, info.key_size);
+ } else {
+ jsonw_null_field(json_wtr, "key");
+ }
+ jsonw_name(json_wtr, "next_key");
+ print_hex_data_json(nextkey, info.key_size);
+ jsonw_end_object(json_wtr);
+ } else {
+ if (key) {
+ printf("key:\n");
+ fprint_hex(stdout, key, info.key_size, " ");
+ printf("\n");
+ } else {
+ printf("key: None\n");
+ }
+ printf("next key:\n");
+ fprint_hex(stdout, nextkey, info.key_size, " ");
+ printf("\n");
+ }
+
+exit_free:
+ free(nextkey);
+ free(key);
+ close(fd);
+
+ return err;
+}
+
+static int do_delete(int argc, char **argv)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ void *key;
+ int err;
+ int fd;
+
+ if (argc < 2)
+ usage();
+
+ fd = map_parse_fd_and_info(&argc, &argv, &info, &len);
+ if (fd < 0)
+ return -1;
+
+ key = malloc(info.key_size);
+ if (!key) {
+ p_err("mem alloc failed");
+ err = -1;
+ goto exit_free;
+ }
+
+ err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL);
+ if (err)
+ goto exit_free;
+
+ err = bpf_map_delete_elem(fd, key);
+ if (err)
+ p_err("delete failed: %s", strerror(errno));
+
+exit_free:
+ free(key);
+ close(fd);
+
+ if (!err && json_output)
+ jsonw_null(json_wtr);
+ return err;
+}
+
+static int do_pin(int argc, char **argv)
+{
+ int err;
+
+ err = do_pin_any(argc, argv, bpf_map_get_fd_by_id);
+ if (!err && json_output)
+ jsonw_null(json_wtr);
+ return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %s %s { show | list } [MAP]\n"
+ " %s %s dump MAP\n"
+ " %s %s update MAP key DATA value VALUE [UPDATE_FLAGS]\n"
+ " %s %s lookup MAP key DATA\n"
+ " %s %s getnext MAP [key DATA]\n"
+ " %s %s delete MAP key DATA\n"
+ " %s %s pin MAP FILE\n"
+ " %s %s event_pipe MAP [cpu N index M]\n"
+ " %s %s help\n"
+ "\n"
+ " " HELP_SPEC_MAP "\n"
+ " DATA := { [hex] BYTES }\n"
+ " " HELP_SPEC_PROGRAM "\n"
+ " VALUE := { DATA | MAP | PROG }\n"
+ " UPDATE_FLAGS := { any | exist | noexist }\n"
+ " " HELP_SPEC_OPTIONS "\n"
+ "",
+ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "help", do_help },
+ { "dump", do_dump },
+ { "update", do_update },
+ { "lookup", do_lookup },
+ { "getnext", do_getnext },
+ { "delete", do_delete },
+ { "pin", do_pin },
+ { "event_pipe", do_event_pipe },
+ { 0 }
+};
+
+int do_map(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
new file mode 100644
index 000000000..8ec0148d7
--- /dev/null
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2018 Netronome Systems, Inc. */
+/* This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <libbpf.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+#include <bpf.h>
+#include <perf-sys.h>
+
+#include "main.h"
+
+#define MMAP_PAGE_CNT 16
+
+static bool stop;
+
+struct event_ring_info {
+ int fd;
+ int key;
+ unsigned int cpu;
+ void *mem;
+};
+
+struct perf_event_sample {
+ struct perf_event_header header;
+ u64 time;
+ __u32 size;
+ unsigned char data[];
+};
+
+static void int_exit(int signo)
+{
+ fprintf(stderr, "Stopping...\n");
+ stop = true;
+}
+
+static enum bpf_perf_event_ret print_bpf_output(void *event, void *priv)
+{
+ struct event_ring_info *ring = priv;
+ struct perf_event_sample *e = event;
+ struct {
+ struct perf_event_header header;
+ __u64 id;
+ __u64 lost;
+ } *lost = event;
+
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "type");
+ jsonw_uint(json_wtr, e->header.type);
+ jsonw_name(json_wtr, "cpu");
+ jsonw_uint(json_wtr, ring->cpu);
+ jsonw_name(json_wtr, "index");
+ jsonw_uint(json_wtr, ring->key);
+ if (e->header.type == PERF_RECORD_SAMPLE) {
+ jsonw_name(json_wtr, "timestamp");
+ jsonw_uint(json_wtr, e->time);
+ jsonw_name(json_wtr, "data");
+ print_data_json(e->data, e->size);
+ } else if (e->header.type == PERF_RECORD_LOST) {
+ jsonw_name(json_wtr, "lost");
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "id");
+ jsonw_uint(json_wtr, lost->id);
+ jsonw_name(json_wtr, "count");
+ jsonw_uint(json_wtr, lost->lost);
+ jsonw_end_object(json_wtr);
+ }
+ jsonw_end_object(json_wtr);
+ } else {
+ if (e->header.type == PERF_RECORD_SAMPLE) {
+ printf("== @%lld.%09lld CPU: %d index: %d =====\n",
+ e->time / 1000000000ULL, e->time % 1000000000ULL,
+ ring->cpu, ring->key);
+ fprint_hex(stdout, e->data, e->size, " ");
+ printf("\n");
+ } else if (e->header.type == PERF_RECORD_LOST) {
+ printf("lost %lld events\n", lost->lost);
+ } else {
+ printf("unknown event type=%d size=%d\n",
+ e->header.type, e->header.size);
+ }
+ }
+
+ return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void
+perf_event_read(struct event_ring_info *ring, void **buf, size_t *buf_len)
+{
+ enum bpf_perf_event_ret ret;
+
+ ret = bpf_perf_event_read_simple(ring->mem,
+ MMAP_PAGE_CNT * get_page_size(),
+ get_page_size(), buf, buf_len,
+ print_bpf_output, ring);
+ if (ret != LIBBPF_PERF_EVENT_CONT) {
+ fprintf(stderr, "perf read loop failed with %d\n", ret);
+ stop = true;
+ }
+}
+
+static int perf_mmap_size(void)
+{
+ return get_page_size() * (MMAP_PAGE_CNT + 1);
+}
+
+static void *perf_event_mmap(int fd)
+{
+ int mmap_size = perf_mmap_size();
+ void *base;
+
+ base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (base == MAP_FAILED) {
+ p_err("event mmap failed: %s\n", strerror(errno));
+ return NULL;
+ }
+
+ return base;
+}
+
+static void perf_event_unmap(void *mem)
+{
+ if (munmap(mem, perf_mmap_size()))
+ fprintf(stderr, "Can't unmap ring memory!\n");
+}
+
+static int bpf_perf_event_open(int map_fd, int key, int cpu)
+{
+ struct perf_event_attr attr = {
+ .sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_TIME,
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_BPF_OUTPUT,
+ };
+ int pmu_fd;
+
+ pmu_fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+ if (pmu_fd < 0) {
+ p_err("failed to open perf event %d for CPU %d", key, cpu);
+ return -1;
+ }
+
+ if (bpf_map_update_elem(map_fd, &key, &pmu_fd, BPF_ANY)) {
+ p_err("failed to update map for event %d for CPU %d", key, cpu);
+ goto err_close;
+ }
+ if (ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
+ p_err("failed to enable event %d for CPU %d", key, cpu);
+ goto err_close;
+ }
+
+ return pmu_fd;
+
+err_close:
+ close(pmu_fd);
+ return -1;
+}
+
+int do_event_pipe(int argc, char **argv)
+{
+ int i, nfds, map_fd, index = -1, cpu = -1;
+ struct bpf_map_info map_info = {};
+ struct event_ring_info *rings;
+ size_t tmp_buf_sz = 0;
+ void *tmp_buf = NULL;
+ struct pollfd *pfds;
+ __u32 map_info_len;
+ bool do_all = true;
+
+ map_info_len = sizeof(map_info);
+ map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len);
+ if (map_fd < 0)
+ return -1;
+
+ if (map_info.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ p_err("map is not a perf event array");
+ goto err_close_map;
+ }
+
+ while (argc) {
+ if (argc < 2) {
+ BAD_ARG();
+ goto err_close_map;
+ }
+
+ if (is_prefix(*argv, "cpu")) {
+ char *endptr;
+
+ NEXT_ARG();
+ cpu = strtoul(*argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as CPU ID", *argv);
+ goto err_close_map;
+ }
+
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "index")) {
+ char *endptr;
+
+ NEXT_ARG();
+ index = strtoul(*argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as index", *argv);
+ goto err_close_map;
+ }
+
+ NEXT_ARG();
+ } else {
+ BAD_ARG();
+ goto err_close_map;
+ }
+
+ do_all = false;
+ }
+
+ if (!do_all) {
+ if (index == -1 || cpu == -1) {
+ p_err("cpu and index must be specified together");
+ goto err_close_map;
+ }
+
+ nfds = 1;
+ } else {
+ nfds = min(get_possible_cpus(), map_info.max_entries);
+ cpu = 0;
+ index = 0;
+ }
+
+ rings = calloc(nfds, sizeof(rings[0]));
+ if (!rings)
+ goto err_close_map;
+
+ pfds = calloc(nfds, sizeof(pfds[0]));
+ if (!pfds)
+ goto err_free_rings;
+
+ for (i = 0; i < nfds; i++) {
+ rings[i].cpu = cpu + i;
+ rings[i].key = index + i;
+
+ rings[i].fd = bpf_perf_event_open(map_fd, rings[i].key,
+ rings[i].cpu);
+ if (rings[i].fd < 0)
+ goto err_close_fds_prev;
+
+ rings[i].mem = perf_event_mmap(rings[i].fd);
+ if (!rings[i].mem)
+ goto err_close_fds_current;
+
+ pfds[i].fd = rings[i].fd;
+ pfds[i].events = POLLIN;
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGHUP, int_exit);
+ signal(SIGTERM, int_exit);
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+
+ while (!stop) {
+ poll(pfds, nfds, 200);
+ for (i = 0; i < nfds; i++)
+ perf_event_read(&rings[i], &tmp_buf, &tmp_buf_sz);
+ }
+ free(tmp_buf);
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ for (i = 0; i < nfds; i++) {
+ perf_event_unmap(rings[i].mem);
+ close(rings[i].fd);
+ }
+ free(pfds);
+ free(rings);
+ close(map_fd);
+
+ return 0;
+
+err_close_fds_prev:
+ while (i--) {
+ perf_event_unmap(rings[i].mem);
+err_close_fds_current:
+ close(rings[i].fd);
+ }
+ free(pfds);
+err_free_rings:
+ free(rings);
+err_close_map:
+ close(map_fd);
+ return -1;
+}
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
new file mode 100644
index 000000000..b76b77dcf
--- /dev/null
+++ b/tools/bpf/bpftool/perf.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0+
+// Copyright (C) 2018 Facebook
+// Author: Yonghong Song <yhs@fb.com>
+
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <ftw.h>
+
+#include <bpf.h>
+
+#include "main.h"
+
+/* 0: undecided, 1: supported, 2: not supported */
+static int perf_query_supported;
+static bool has_perf_query_support(void)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ char buf[256];
+ int fd;
+
+ if (perf_query_supported)
+ goto out;
+
+ fd = open("/", O_RDONLY);
+ if (fd < 0) {
+ p_err("perf_query_support: cannot open directory \"/\" (%s)",
+ strerror(errno));
+ goto out;
+ }
+
+ /* the following query will fail as no bpf attachment,
+ * the expected errno is ENOTSUPP
+ */
+ errno = 0;
+ len = sizeof(buf);
+ bpf_task_fd_query(getpid(), fd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+
+ if (errno == 524 /* ENOTSUPP */) {
+ perf_query_supported = 1;
+ goto close_fd;
+ }
+
+ perf_query_supported = 2;
+ p_err("perf_query_support: %s", strerror(errno));
+ fprintf(stderr,
+ "HINT: non root or kernel doesn't support TASK_FD_QUERY\n");
+
+close_fd:
+ close(fd);
+out:
+ return perf_query_supported == 1;
+}
+
+static void print_perf_json(int pid, int fd, __u32 prog_id, __u32 fd_type,
+ char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+ jsonw_start_object(json_wtr);
+ jsonw_int_field(json_wtr, "pid", pid);
+ jsonw_int_field(json_wtr, "fd", fd);
+ jsonw_uint_field(json_wtr, "prog_id", prog_id);
+ switch (fd_type) {
+ case BPF_FD_TYPE_RAW_TRACEPOINT:
+ jsonw_string_field(json_wtr, "fd_type", "raw_tracepoint");
+ jsonw_string_field(json_wtr, "tracepoint", buf);
+ break;
+ case BPF_FD_TYPE_TRACEPOINT:
+ jsonw_string_field(json_wtr, "fd_type", "tracepoint");
+ jsonw_string_field(json_wtr, "tracepoint", buf);
+ break;
+ case BPF_FD_TYPE_KPROBE:
+ jsonw_string_field(json_wtr, "fd_type", "kprobe");
+ if (buf[0] != '\0') {
+ jsonw_string_field(json_wtr, "func", buf);
+ jsonw_lluint_field(json_wtr, "offset", probe_offset);
+ } else {
+ jsonw_lluint_field(json_wtr, "addr", probe_addr);
+ }
+ break;
+ case BPF_FD_TYPE_KRETPROBE:
+ jsonw_string_field(json_wtr, "fd_type", "kretprobe");
+ if (buf[0] != '\0') {
+ jsonw_string_field(json_wtr, "func", buf);
+ jsonw_lluint_field(json_wtr, "offset", probe_offset);
+ } else {
+ jsonw_lluint_field(json_wtr, "addr", probe_addr);
+ }
+ break;
+ case BPF_FD_TYPE_UPROBE:
+ jsonw_string_field(json_wtr, "fd_type", "uprobe");
+ jsonw_string_field(json_wtr, "filename", buf);
+ jsonw_lluint_field(json_wtr, "offset", probe_offset);
+ break;
+ case BPF_FD_TYPE_URETPROBE:
+ jsonw_string_field(json_wtr, "fd_type", "uretprobe");
+ jsonw_string_field(json_wtr, "filename", buf);
+ jsonw_lluint_field(json_wtr, "offset", probe_offset);
+ break;
+ }
+ jsonw_end_object(json_wtr);
+}
+
+static void print_perf_plain(int pid, int fd, __u32 prog_id, __u32 fd_type,
+ char *buf, __u64 probe_offset, __u64 probe_addr)
+{
+ printf("pid %d fd %d: prog_id %u ", pid, fd, prog_id);
+ switch (fd_type) {
+ case BPF_FD_TYPE_RAW_TRACEPOINT:
+ printf("raw_tracepoint %s\n", buf);
+ break;
+ case BPF_FD_TYPE_TRACEPOINT:
+ printf("tracepoint %s\n", buf);
+ break;
+ case BPF_FD_TYPE_KPROBE:
+ if (buf[0] != '\0')
+ printf("kprobe func %s offset %llu\n", buf,
+ probe_offset);
+ else
+ printf("kprobe addr %llu\n", probe_addr);
+ break;
+ case BPF_FD_TYPE_KRETPROBE:
+ if (buf[0] != '\0')
+ printf("kretprobe func %s offset %llu\n", buf,
+ probe_offset);
+ else
+ printf("kretprobe addr %llu\n", probe_addr);
+ break;
+ case BPF_FD_TYPE_UPROBE:
+ printf("uprobe filename %s offset %llu\n", buf, probe_offset);
+ break;
+ case BPF_FD_TYPE_URETPROBE:
+ printf("uretprobe filename %s offset %llu\n", buf,
+ probe_offset);
+ break;
+ }
+}
+
+static int show_proc(const char *fpath, const struct stat *sb,
+ int tflag, struct FTW *ftwbuf)
+{
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ int err, pid = 0, fd = 0;
+ const char *pch;
+ char buf[4096];
+
+ /* prefix always /proc */
+ pch = fpath + 5;
+ if (*pch == '\0')
+ return 0;
+
+ /* pid should be all numbers */
+ pch++;
+ while (isdigit(*pch)) {
+ pid = pid * 10 + *pch - '0';
+ pch++;
+ }
+ if (*pch == '\0')
+ return 0;
+ if (*pch != '/')
+ return FTW_SKIP_SUBTREE;
+
+ /* check /proc/<pid>/fd directory */
+ pch++;
+ if (strncmp(pch, "fd", 2))
+ return FTW_SKIP_SUBTREE;
+ pch += 2;
+ if (*pch == '\0')
+ return 0;
+ if (*pch != '/')
+ return FTW_SKIP_SUBTREE;
+
+ /* check /proc/<pid>/fd/<fd_num> */
+ pch++;
+ while (isdigit(*pch)) {
+ fd = fd * 10 + *pch - '0';
+ pch++;
+ }
+ if (*pch != '\0')
+ return FTW_SKIP_SUBTREE;
+
+ /* query (pid, fd) for potential perf events */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(pid, fd, 0, buf, &len, &prog_id, &fd_type,
+ &probe_offset, &probe_addr);
+ if (err < 0)
+ return 0;
+
+ if (json_output)
+ print_perf_json(pid, fd, prog_id, fd_type, buf, probe_offset,
+ probe_addr);
+ else
+ print_perf_plain(pid, fd, prog_id, fd_type, buf, probe_offset,
+ probe_addr);
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ int flags = FTW_ACTIONRETVAL | FTW_PHYS;
+ int err = 0, nopenfd = 16;
+
+ if (!has_perf_query_support())
+ return -1;
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ if (nftw("/proc", show_proc, nopenfd, flags) == -1) {
+ p_err("%s", strerror(errno));
+ err = -1;
+ }
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+ fprintf(stderr,
+ "Usage: %s %s { show | list | help }\n"
+ "",
+ bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_perf(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
new file mode 100644
index 000000000..4f9611af4
--- /dev/null
+++ b/tools/bpf/bpftool/prog.c
@@ -0,0 +1,983 @@
+/*
+ * Copyright (C) 2017-2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/err.h>
+
+#include <bpf.h>
+#include <libbpf.h>
+
+#include "cfg.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+static const char * const prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = "unspec",
+ [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
+ [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
+ [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
+ [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
+ [BPF_PROG_TYPE_XDP] = "xdp",
+ [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
+ [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
+ [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
+ [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
+ [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
+ [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
+ [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
+ [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
+ [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
+ [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
+ [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
+ [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
+};
+
+static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
+{
+ struct timespec real_time_ts, boot_time_ts;
+ time_t wallclock_secs;
+ struct tm load_tm;
+
+ buf[--size] = '\0';
+
+ if (clock_gettime(CLOCK_REALTIME, &real_time_ts) ||
+ clock_gettime(CLOCK_BOOTTIME, &boot_time_ts)) {
+ perror("Can't read clocks");
+ snprintf(buf, size, "%llu", nsecs / 1000000000);
+ return;
+ }
+
+ wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) +
+ (real_time_ts.tv_nsec - boot_time_ts.tv_nsec + nsecs) /
+ 1000000000;
+
+
+ if (!localtime_r(&wallclock_secs, &load_tm)) {
+ snprintf(buf, size, "%llu", nsecs / 1000000000);
+ return;
+ }
+
+ if (json_output)
+ strftime(buf, size, "%s", &load_tm);
+ else
+ strftime(buf, size, "%FT%T%z", &load_tm);
+}
+
+static int prog_fd_by_tag(unsigned char *tag)
+{
+ unsigned int id = 0;
+ int err;
+ int fd;
+
+ while (true) {
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+
+ err = bpf_prog_get_next_id(id, &id);
+ if (err) {
+ p_err("%s", strerror(errno));
+ return -1;
+ }
+
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0) {
+ p_err("can't get prog by id (%u): %s",
+ id, strerror(errno));
+ return -1;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get prog info (%u): %s",
+ id, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ if (!memcmp(tag, info.tag, BPF_TAG_SIZE))
+ return fd;
+
+ close(fd);
+ }
+}
+
+int prog_parse_fd(int *argc, char ***argv)
+{
+ int fd;
+
+ if (is_prefix(**argv, "id")) {
+ unsigned int id;
+ char *endptr;
+
+ NEXT_ARGP();
+
+ id = strtoul(**argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as ID", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0)
+ p_err("get by id (%u): %s", id, strerror(errno));
+ return fd;
+ } else if (is_prefix(**argv, "tag")) {
+ unsigned char tag[BPF_TAG_SIZE];
+
+ NEXT_ARGP();
+
+ if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2,
+ tag + 3, tag + 4, tag + 5, tag + 6, tag + 7)
+ != BPF_TAG_SIZE) {
+ p_err("can't parse tag");
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return prog_fd_by_tag(tag);
+ } else if (is_prefix(**argv, "pinned")) {
+ char *path;
+
+ NEXT_ARGP();
+
+ path = **argv;
+ NEXT_ARGP();
+
+ return open_obj_pinned_any(path, BPF_OBJ_PROG);
+ }
+
+ p_err("expected 'id', 'tag' or 'pinned', got: '%s'?", **argv);
+ return -1;
+}
+
+static void show_prog_maps(int fd, u32 num_maps)
+{
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ __u32 map_ids[num_maps];
+ unsigned int i;
+ int err;
+
+ info.nr_map_ids = num_maps;
+ info.map_ids = ptr_to_u64(map_ids);
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err || !info.nr_map_ids)
+ return;
+
+ if (json_output) {
+ jsonw_name(json_wtr, "map_ids");
+ jsonw_start_array(json_wtr);
+ for (i = 0; i < info.nr_map_ids; i++)
+ jsonw_uint(json_wtr, map_ids[i]);
+ jsonw_end_array(json_wtr);
+ } else {
+ printf(" map_ids ");
+ for (i = 0; i < info.nr_map_ids; i++)
+ printf("%u%s", map_ids[i],
+ i == info.nr_map_ids - 1 ? "" : ",");
+ }
+}
+
+static void print_prog_json(struct bpf_prog_info *info, int fd)
+{
+ char *memlock;
+
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "id", info->id);
+ if (info->type < ARRAY_SIZE(prog_type_name))
+ jsonw_string_field(json_wtr, "type",
+ prog_type_name[info->type]);
+ else
+ jsonw_uint_field(json_wtr, "type", info->type);
+
+ if (*info->name)
+ jsonw_string_field(json_wtr, "name", info->name);
+
+ jsonw_name(json_wtr, "tag");
+ jsonw_printf(json_wtr, "\"" BPF_TAG_FMT "\"",
+ info->tag[0], info->tag[1], info->tag[2], info->tag[3],
+ info->tag[4], info->tag[5], info->tag[6], info->tag[7]);
+
+ jsonw_bool_field(json_wtr, "gpl_compatible", info->gpl_compatible);
+
+ print_dev_json(info->ifindex, info->netns_dev, info->netns_ino);
+
+ if (info->load_time) {
+ char buf[32];
+
+ print_boot_time(info->load_time, buf, sizeof(buf));
+
+ /* Piggy back on load_time, since 0 uid is a valid one */
+ jsonw_name(json_wtr, "loaded_at");
+ jsonw_printf(json_wtr, "%s", buf);
+ jsonw_uint_field(json_wtr, "uid", info->created_by_uid);
+ }
+
+ jsonw_uint_field(json_wtr, "bytes_xlated", info->xlated_prog_len);
+
+ if (info->jited_prog_len) {
+ jsonw_bool_field(json_wtr, "jited", true);
+ jsonw_uint_field(json_wtr, "bytes_jited", info->jited_prog_len);
+ } else {
+ jsonw_bool_field(json_wtr, "jited", false);
+ }
+
+ memlock = get_fdinfo(fd, "memlock");
+ if (memlock)
+ jsonw_int_field(json_wtr, "bytes_memlock", atoi(memlock));
+ free(memlock);
+
+ if (info->nr_map_ids)
+ show_prog_maps(fd, info->nr_map_ids);
+
+ if (!hash_empty(prog_table.table)) {
+ struct pinned_obj *obj;
+
+ jsonw_name(json_wtr, "pinned");
+ jsonw_start_array(json_wtr);
+ hash_for_each_possible(prog_table.table, obj, hash, info->id) {
+ if (obj->id == info->id)
+ jsonw_string(json_wtr, obj->path);
+ }
+ jsonw_end_array(json_wtr);
+ }
+
+ jsonw_end_object(json_wtr);
+}
+
+static void print_prog_plain(struct bpf_prog_info *info, int fd)
+{
+ char *memlock;
+
+ printf("%u: ", info->id);
+ if (info->type < ARRAY_SIZE(prog_type_name))
+ printf("%s ", prog_type_name[info->type]);
+ else
+ printf("type %u ", info->type);
+
+ if (*info->name)
+ printf("name %s ", info->name);
+
+ printf("tag ");
+ fprint_hex(stdout, info->tag, BPF_TAG_SIZE, "");
+ print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino);
+ printf("%s", info->gpl_compatible ? " gpl" : "");
+ printf("\n");
+
+ if (info->load_time) {
+ char buf[32];
+
+ print_boot_time(info->load_time, buf, sizeof(buf));
+
+ /* Piggy back on load_time, since 0 uid is a valid one */
+ printf("\tloaded_at %s uid %u\n", buf, info->created_by_uid);
+ }
+
+ printf("\txlated %uB", info->xlated_prog_len);
+
+ if (info->jited_prog_len)
+ printf(" jited %uB", info->jited_prog_len);
+ else
+ printf(" not jited");
+
+ memlock = get_fdinfo(fd, "memlock");
+ if (memlock)
+ printf(" memlock %sB", memlock);
+ free(memlock);
+
+ if (info->nr_map_ids)
+ show_prog_maps(fd, info->nr_map_ids);
+
+ if (!hash_empty(prog_table.table)) {
+ struct pinned_obj *obj;
+
+ printf("\n");
+ hash_for_each_possible(prog_table.table, obj, hash, info->id) {
+ if (obj->id == info->id)
+ printf("\tpinned %s\n", obj->path);
+ }
+ }
+
+ printf("\n");
+}
+
+static int show_prog(int fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get prog info: %s", strerror(errno));
+ return -1;
+ }
+
+ if (json_output)
+ print_prog_json(&info, fd);
+ else
+ print_prog_plain(&info, fd);
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ __u32 id = 0;
+ int err;
+ int fd;
+
+ if (show_pinned)
+ build_pinned_obj_table(&prog_table, BPF_OBJ_PROG);
+
+ if (argc == 2) {
+ fd = prog_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return -1;
+
+ err = show_prog(fd);
+ close(fd);
+ return err;
+ }
+
+ if (argc)
+ return BAD_ARG();
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ while (true) {
+ err = bpf_prog_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT) {
+ err = 0;
+ break;
+ }
+ p_err("can't get next program: %s%s", strerror(errno),
+ errno == EINVAL ? " -- kernel too old?" : "");
+ err = -1;
+ break;
+ }
+
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ p_err("can't get prog by id (%u): %s",
+ id, strerror(errno));
+ err = -1;
+ break;
+ }
+
+ err = show_prog(fd);
+ close(fd);
+ if (err)
+ break;
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ return err;
+}
+
+static int do_dump(int argc, char **argv)
+{
+ unsigned long *func_ksyms = NULL;
+ struct bpf_prog_info info = {};
+ unsigned int *func_lens = NULL;
+ unsigned int nr_func_ksyms;
+ unsigned int nr_func_lens;
+ struct dump_data dd = {};
+ __u32 len = sizeof(info);
+ unsigned int buf_size;
+ char *filepath = NULL;
+ bool opcodes = false;
+ bool visual = false;
+ unsigned char *buf;
+ __u32 *member_len;
+ __u64 *member_ptr;
+ ssize_t n;
+ int err;
+ int fd;
+
+ if (is_prefix(*argv, "jited")) {
+ member_len = &info.jited_prog_len;
+ member_ptr = &info.jited_prog_insns;
+ } else if (is_prefix(*argv, "xlated")) {
+ member_len = &info.xlated_prog_len;
+ member_ptr = &info.xlated_prog_insns;
+ } else {
+ p_err("expected 'xlated' or 'jited', got: %s", *argv);
+ return -1;
+ }
+ NEXT_ARG();
+
+ if (argc < 2)
+ usage();
+
+ fd = prog_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return -1;
+
+ if (is_prefix(*argv, "file")) {
+ NEXT_ARG();
+ if (!argc) {
+ p_err("expected file path");
+ return -1;
+ }
+
+ filepath = *argv;
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "opcodes")) {
+ opcodes = true;
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "visual")) {
+ visual = true;
+ NEXT_ARG();
+ }
+
+ if (argc) {
+ usage();
+ return -1;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get prog info: %s", strerror(errno));
+ return -1;
+ }
+
+ if (!*member_len) {
+ p_info("no instructions returned");
+ close(fd);
+ return 0;
+ }
+
+ buf_size = *member_len;
+
+ buf = malloc(buf_size);
+ if (!buf) {
+ p_err("mem alloc failed");
+ close(fd);
+ return -1;
+ }
+
+ nr_func_ksyms = info.nr_jited_ksyms;
+ if (nr_func_ksyms) {
+ func_ksyms = malloc(nr_func_ksyms * sizeof(__u64));
+ if (!func_ksyms) {
+ p_err("mem alloc failed");
+ close(fd);
+ goto err_free;
+ }
+ }
+
+ nr_func_lens = info.nr_jited_func_lens;
+ if (nr_func_lens) {
+ func_lens = malloc(nr_func_lens * sizeof(__u32));
+ if (!func_lens) {
+ p_err("mem alloc failed");
+ close(fd);
+ goto err_free;
+ }
+ }
+
+ memset(&info, 0, sizeof(info));
+
+ *member_ptr = ptr_to_u64(buf);
+ *member_len = buf_size;
+ info.jited_ksyms = ptr_to_u64(func_ksyms);
+ info.nr_jited_ksyms = nr_func_ksyms;
+ info.jited_func_lens = ptr_to_u64(func_lens);
+ info.nr_jited_func_lens = nr_func_lens;
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ close(fd);
+ if (err) {
+ p_err("can't get prog info: %s", strerror(errno));
+ goto err_free;
+ }
+
+ if (*member_len > buf_size) {
+ p_err("too many instructions returned");
+ goto err_free;
+ }
+
+ if (info.nr_jited_ksyms > nr_func_ksyms) {
+ p_err("too many addresses returned");
+ goto err_free;
+ }
+
+ if (info.nr_jited_func_lens > nr_func_lens) {
+ p_err("too many values returned");
+ goto err_free;
+ }
+
+ if ((member_len == &info.jited_prog_len &&
+ info.jited_prog_insns == 0) ||
+ (member_len == &info.xlated_prog_len &&
+ info.xlated_prog_insns == 0)) {
+ p_err("error retrieving insn dump: kernel.kptr_restrict set?");
+ goto err_free;
+ }
+
+ if (filepath) {
+ fd = open(filepath, O_WRONLY | O_CREAT | O_TRUNC, 0600);
+ if (fd < 0) {
+ p_err("can't open file %s: %s", filepath,
+ strerror(errno));
+ goto err_free;
+ }
+
+ n = write(fd, buf, *member_len);
+ close(fd);
+ if (n != *member_len) {
+ p_err("error writing output file: %s",
+ n < 0 ? strerror(errno) : "short write");
+ goto err_free;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+ } else if (member_len == &info.jited_prog_len) {
+ const char *name = NULL;
+
+ if (info.ifindex) {
+ name = ifindex_to_bfd_name_ns(info.ifindex,
+ info.netns_dev,
+ info.netns_ino);
+ if (!name)
+ goto err_free;
+ }
+
+ if (info.nr_jited_func_lens && info.jited_func_lens) {
+ struct kernel_sym *sym = NULL;
+ char sym_name[SYM_MAX_NAME];
+ unsigned char *img = buf;
+ __u64 *ksyms = NULL;
+ __u32 *lens;
+ __u32 i;
+
+ if (info.nr_jited_ksyms) {
+ kernel_syms_load(&dd);
+ ksyms = (__u64 *) info.jited_ksyms;
+ }
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+
+ lens = (__u32 *) info.jited_func_lens;
+ for (i = 0; i < info.nr_jited_func_lens; i++) {
+ if (ksyms) {
+ sym = kernel_syms_search(&dd, ksyms[i]);
+ if (sym)
+ sprintf(sym_name, "%s", sym->name);
+ else
+ sprintf(sym_name, "0x%016llx", ksyms[i]);
+ } else {
+ strcpy(sym_name, "unknown");
+ }
+
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "name");
+ jsonw_string(json_wtr, sym_name);
+ jsonw_name(json_wtr, "insns");
+ } else {
+ printf("%s:\n", sym_name);
+ }
+
+ disasm_print_insn(img, lens[i], opcodes, name);
+ img += lens[i];
+
+ if (json_output)
+ jsonw_end_object(json_wtr);
+ else
+ printf("\n");
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+ } else {
+ disasm_print_insn(buf, *member_len, opcodes, name);
+ }
+ } else if (visual) {
+ if (json_output)
+ jsonw_null(json_wtr);
+ else
+ dump_xlated_cfg(buf, *member_len);
+ } else {
+ kernel_syms_load(&dd);
+ dd.nr_jited_ksyms = info.nr_jited_ksyms;
+ dd.jited_ksyms = (__u64 *) info.jited_ksyms;
+
+ if (json_output)
+ dump_xlated_json(&dd, buf, *member_len, opcodes);
+ else
+ dump_xlated_plain(&dd, buf, *member_len, opcodes);
+ kernel_syms_destroy(&dd);
+ }
+
+ free(buf);
+ free(func_ksyms);
+ free(func_lens);
+ return 0;
+
+err_free:
+ free(buf);
+ free(func_ksyms);
+ free(func_lens);
+ return -1;
+}
+
+static int do_pin(int argc, char **argv)
+{
+ int err;
+
+ err = do_pin_any(argc, argv, bpf_prog_get_fd_by_id);
+ if (!err && json_output)
+ jsonw_null(json_wtr);
+ return err;
+}
+
+struct map_replace {
+ int idx;
+ int fd;
+ char *name;
+};
+
+int map_replace_compar(const void *p1, const void *p2)
+{
+ const struct map_replace *a = p1, *b = p2;
+
+ return a->idx - b->idx;
+}
+
+static int do_load(int argc, char **argv)
+{
+ enum bpf_attach_type expected_attach_type;
+ struct bpf_object_open_attr attr = {
+ .prog_type = BPF_PROG_TYPE_UNSPEC,
+ };
+ struct map_replace *map_replace = NULL;
+ unsigned int old_map_fds = 0;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ struct bpf_map *map;
+ const char *pinfile;
+ unsigned int i, j;
+ __u32 ifindex = 0;
+ int idx, err;
+
+ if (!REQ_ARGS(2))
+ return -1;
+ attr.file = GET_ARG();
+ pinfile = GET_ARG();
+
+ while (argc) {
+ if (is_prefix(*argv, "type")) {
+ char *type;
+
+ NEXT_ARG();
+
+ if (attr.prog_type != BPF_PROG_TYPE_UNSPEC) {
+ p_err("program type already specified");
+ goto err_free_reuse_maps;
+ }
+ if (!REQ_ARGS(1))
+ goto err_free_reuse_maps;
+
+ /* Put a '/' at the end of type to appease libbpf */
+ type = malloc(strlen(*argv) + 2);
+ if (!type) {
+ p_err("mem alloc failed");
+ goto err_free_reuse_maps;
+ }
+ *type = 0;
+ strcat(type, *argv);
+ strcat(type, "/");
+
+ err = libbpf_prog_type_by_name(type, &attr.prog_type,
+ &expected_attach_type);
+ free(type);
+ if (err < 0) {
+ p_err("unknown program type '%s'", *argv);
+ goto err_free_reuse_maps;
+ }
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "map")) {
+ void *new_map_replace;
+ char *endptr, *name;
+ int fd;
+
+ NEXT_ARG();
+
+ if (!REQ_ARGS(4))
+ goto err_free_reuse_maps;
+
+ if (is_prefix(*argv, "idx")) {
+ NEXT_ARG();
+
+ idx = strtoul(*argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as IDX", *argv);
+ goto err_free_reuse_maps;
+ }
+ name = NULL;
+ } else if (is_prefix(*argv, "name")) {
+ NEXT_ARG();
+
+ name = *argv;
+ idx = -1;
+ } else {
+ p_err("expected 'idx' or 'name', got: '%s'?",
+ *argv);
+ goto err_free_reuse_maps;
+ }
+ NEXT_ARG();
+
+ fd = map_parse_fd(&argc, &argv);
+ if (fd < 0)
+ goto err_free_reuse_maps;
+
+ new_map_replace = reallocarray(map_replace,
+ old_map_fds + 1,
+ sizeof(*map_replace));
+ if (!new_map_replace) {
+ p_err("mem alloc failed");
+ goto err_free_reuse_maps;
+ }
+ map_replace = new_map_replace;
+
+ map_replace[old_map_fds].idx = idx;
+ map_replace[old_map_fds].name = name;
+ map_replace[old_map_fds].fd = fd;
+ old_map_fds++;
+ } else if (is_prefix(*argv, "dev")) {
+ NEXT_ARG();
+
+ if (ifindex) {
+ p_err("offload device already specified");
+ goto err_free_reuse_maps;
+ }
+ if (!REQ_ARGS(1))
+ goto err_free_reuse_maps;
+
+ ifindex = if_nametoindex(*argv);
+ if (!ifindex) {
+ p_err("unrecognized netdevice '%s': %s",
+ *argv, strerror(errno));
+ goto err_free_reuse_maps;
+ }
+ NEXT_ARG();
+ } else {
+ p_err("expected no more arguments, 'type', 'map' or 'dev', got: '%s'?",
+ *argv);
+ goto err_free_reuse_maps;
+ }
+ }
+
+ obj = bpf_object__open_xattr(&attr);
+ if (IS_ERR_OR_NULL(obj)) {
+ p_err("failed to open object file");
+ goto err_free_reuse_maps;
+ }
+
+ prog = bpf_program__next(NULL, obj);
+ if (!prog) {
+ p_err("object file doesn't contain any bpf program");
+ goto err_close_obj;
+ }
+
+ bpf_program__set_ifindex(prog, ifindex);
+ if (attr.prog_type == BPF_PROG_TYPE_UNSPEC) {
+ const char *sec_name = bpf_program__title(prog, false);
+
+ err = libbpf_prog_type_by_name(sec_name, &attr.prog_type,
+ &expected_attach_type);
+ if (err < 0) {
+ p_err("failed to guess program type based on section name %s\n",
+ sec_name);
+ goto err_close_obj;
+ }
+ }
+ bpf_program__set_type(prog, attr.prog_type);
+ bpf_program__set_expected_attach_type(prog, expected_attach_type);
+
+ qsort(map_replace, old_map_fds, sizeof(*map_replace),
+ map_replace_compar);
+
+ /* After the sort maps by name will be first on the list, because they
+ * have idx == -1. Resolve them.
+ */
+ j = 0;
+ while (j < old_map_fds && map_replace[j].name) {
+ i = 0;
+ bpf_map__for_each(map, obj) {
+ if (!strcmp(bpf_map__name(map), map_replace[j].name)) {
+ map_replace[j].idx = i;
+ break;
+ }
+ i++;
+ }
+ if (map_replace[j].idx == -1) {
+ p_err("unable to find map '%s'", map_replace[j].name);
+ goto err_close_obj;
+ }
+ j++;
+ }
+ /* Resort if any names were resolved */
+ if (j)
+ qsort(map_replace, old_map_fds, sizeof(*map_replace),
+ map_replace_compar);
+
+ /* Set ifindex and name reuse */
+ j = 0;
+ idx = 0;
+ bpf_map__for_each(map, obj) {
+ if (!bpf_map__is_offload_neutral(map))
+ bpf_map__set_ifindex(map, ifindex);
+
+ if (j < old_map_fds && idx == map_replace[j].idx) {
+ err = bpf_map__reuse_fd(map, map_replace[j++].fd);
+ if (err) {
+ p_err("unable to set up map reuse: %d", err);
+ goto err_close_obj;
+ }
+
+ /* Next reuse wants to apply to the same map */
+ if (j < old_map_fds && map_replace[j].idx == idx) {
+ p_err("replacement for map idx %d specified more than once",
+ idx);
+ goto err_close_obj;
+ }
+ }
+
+ idx++;
+ }
+ if (j < old_map_fds) {
+ p_err("map idx '%d' not used", map_replace[j].idx);
+ goto err_close_obj;
+ }
+
+ err = bpf_object__load(obj);
+ if (err) {
+ p_err("failed to load object file");
+ goto err_close_obj;
+ }
+
+ if (do_pin_fd(bpf_program__fd(prog), pinfile))
+ goto err_close_obj;
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ bpf_object__close(obj);
+ for (i = 0; i < old_map_fds; i++)
+ close(map_replace[i].fd);
+ free(map_replace);
+
+ return 0;
+
+err_close_obj:
+ bpf_object__close(obj);
+err_free_reuse_maps:
+ for (i = 0; i < old_map_fds; i++)
+ close(map_replace[i].fd);
+ free(map_replace);
+ return -1;
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %s %s { show | list } [PROG]\n"
+ " %s %s dump xlated PROG [{ file FILE | opcodes | visual }]\n"
+ " %s %s dump jited PROG [{ file FILE | opcodes }]\n"
+ " %s %s pin PROG FILE\n"
+ " %s %s load OBJ FILE [type TYPE] [dev NAME] \\\n"
+ " [map { idx IDX | name NAME } MAP]\n"
+ " %s %s help\n"
+ "\n"
+ " " HELP_SPEC_MAP "\n"
+ " " HELP_SPEC_PROGRAM "\n"
+ " TYPE := { socket | kprobe | kretprobe | classifier | action |\n"
+ " tracepoint | raw_tracepoint | xdp | perf_event | cgroup/skb |\n"
+ " cgroup/sock | cgroup/dev | lwt_in | lwt_out | lwt_xmit |\n"
+ " lwt_seg6local | sockops | sk_skb | sk_msg | lirc_mode2 |\n"
+ " cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
+ " cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
+ " cgroup/sendmsg4 | cgroup/sendmsg6 }\n"
+ " " HELP_SPEC_OPTIONS "\n"
+ "",
+ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+ bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "help", do_help },
+ { "dump", do_dump },
+ { "pin", do_pin },
+ { "load", do_load },
+ { 0 }
+};
+
+int do_prog(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
new file mode 100644
index 000000000..3284759df
--- /dev/null
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -0,0 +1,346 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define _GNU_SOURCE
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "disasm.h"
+#include "json_writer.h"
+#include "main.h"
+#include "xlated_dumper.h"
+
+static int kernel_syms_cmp(const void *sym_a, const void *sym_b)
+{
+ return ((struct kernel_sym *)sym_a)->address -
+ ((struct kernel_sym *)sym_b)->address;
+}
+
+void kernel_syms_load(struct dump_data *dd)
+{
+ struct kernel_sym *sym;
+ char buff[256];
+ void *tmp, *address;
+ FILE *fp;
+
+ fp = fopen("/proc/kallsyms", "r");
+ if (!fp)
+ return;
+
+ while (!feof(fp)) {
+ if (!fgets(buff, sizeof(buff), fp))
+ break;
+ tmp = reallocarray(dd->sym_mapping, dd->sym_count + 1,
+ sizeof(*dd->sym_mapping));
+ if (!tmp) {
+out:
+ free(dd->sym_mapping);
+ dd->sym_mapping = NULL;
+ fclose(fp);
+ return;
+ }
+ dd->sym_mapping = tmp;
+ sym = &dd->sym_mapping[dd->sym_count];
+ if (sscanf(buff, "%p %*c %s", &address, sym->name) != 2)
+ continue;
+ sym->address = (unsigned long)address;
+ if (!strcmp(sym->name, "__bpf_call_base")) {
+ dd->address_call_base = sym->address;
+ /* sysctl kernel.kptr_restrict was set */
+ if (!sym->address)
+ goto out;
+ }
+ if (sym->address)
+ dd->sym_count++;
+ }
+
+ fclose(fp);
+
+ qsort(dd->sym_mapping, dd->sym_count,
+ sizeof(*dd->sym_mapping), kernel_syms_cmp);
+}
+
+void kernel_syms_destroy(struct dump_data *dd)
+{
+ free(dd->sym_mapping);
+}
+
+struct kernel_sym *kernel_syms_search(struct dump_data *dd,
+ unsigned long key)
+{
+ struct kernel_sym sym = {
+ .address = key,
+ };
+
+ return dd->sym_mapping ?
+ bsearch(&sym, dd->sym_mapping, dd->sym_count,
+ sizeof(*dd->sym_mapping), kernel_syms_cmp) : NULL;
+}
+
+static void print_insn(void *private_data, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ va_end(args);
+}
+
+static void
+print_insn_for_graph(void *private_data, const char *fmt, ...)
+{
+ char buf[64], *p;
+ va_list args;
+
+ va_start(args, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, args);
+ va_end(args);
+
+ p = buf;
+ while (*p != '\0') {
+ if (*p == '\n') {
+ memmove(p + 3, p, strlen(buf) + 1 - (p - buf));
+ /* Align each instruction dump row left. */
+ *p++ = '\\';
+ *p++ = 'l';
+ /* Output multiline concatenation. */
+ *p++ = '\\';
+ } else if (*p == '<' || *p == '>' || *p == '|' || *p == '&') {
+ memmove(p + 1, p, strlen(buf) + 1 - (p - buf));
+ /* Escape special character. */
+ *p++ = '\\';
+ }
+
+ p++;
+ }
+
+ printf("%s", buf);
+}
+
+static void print_insn_json(void *private_data, const char *fmt, ...)
+{
+ unsigned int l = strlen(fmt);
+ char chomped_fmt[l];
+ va_list args;
+
+ va_start(args, fmt);
+ if (l > 0) {
+ strncpy(chomped_fmt, fmt, l - 1);
+ chomped_fmt[l - 1] = '\0';
+ }
+ jsonw_vprintf_enquote(json_wtr, chomped_fmt, args);
+ va_end(args);
+}
+
+static const char *print_call_pcrel(struct dump_data *dd,
+ struct kernel_sym *sym,
+ unsigned long address,
+ const struct bpf_insn *insn)
+{
+ if (!dd->nr_jited_ksyms)
+ /* Do not show address for interpreted programs */
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "%+d", insn->off);
+ else if (sym)
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "%+d#%s", insn->off, sym->name);
+ else
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "%+d#0x%lx", insn->off, address);
+ return dd->scratch_buff;
+}
+
+static const char *print_call_helper(struct dump_data *dd,
+ struct kernel_sym *sym,
+ unsigned long address)
+{
+ if (sym)
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "%s", sym->name);
+ else
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "0x%lx", address);
+ return dd->scratch_buff;
+}
+
+static const char *print_call(void *private_data,
+ const struct bpf_insn *insn)
+{
+ struct dump_data *dd = private_data;
+ unsigned long address = dd->address_call_base + insn->imm;
+ struct kernel_sym *sym;
+
+ if (insn->src_reg == BPF_PSEUDO_CALL &&
+ (__u32) insn->imm < dd->nr_jited_ksyms)
+ address = dd->jited_ksyms[insn->imm];
+
+ sym = kernel_syms_search(dd, address);
+ if (insn->src_reg == BPF_PSEUDO_CALL)
+ return print_call_pcrel(dd, sym, address, insn);
+ else
+ return print_call_helper(dd, sym, address);
+}
+
+static const char *print_imm(void *private_data,
+ const struct bpf_insn *insn,
+ __u64 full_imm)
+{
+ struct dump_data *dd = private_data;
+
+ if (insn->src_reg == BPF_PSEUDO_MAP_FD)
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "map[id:%u]", insn->imm);
+ else
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "0x%llx", (unsigned long long)full_imm);
+ return dd->scratch_buff;
+}
+
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+ bool opcodes)
+{
+ const struct bpf_insn_cbs cbs = {
+ .cb_print = print_insn_json,
+ .cb_call = print_call,
+ .cb_imm = print_imm,
+ .private_data = dd,
+ };
+ struct bpf_insn *insn = buf;
+ bool double_insn = false;
+ unsigned int i;
+
+ jsonw_start_array(json_wtr);
+ for (i = 0; i < len / sizeof(*insn); i++) {
+ if (double_insn) {
+ double_insn = false;
+ continue;
+ }
+ double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "disasm");
+ print_bpf_insn(&cbs, insn + i, true);
+
+ if (opcodes) {
+ jsonw_name(json_wtr, "opcodes");
+ jsonw_start_object(json_wtr);
+
+ jsonw_name(json_wtr, "code");
+ jsonw_printf(json_wtr, "\"0x%02hhx\"", insn[i].code);
+
+ jsonw_name(json_wtr, "src_reg");
+ jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].src_reg);
+
+ jsonw_name(json_wtr, "dst_reg");
+ jsonw_printf(json_wtr, "\"0x%hhx\"", insn[i].dst_reg);
+
+ jsonw_name(json_wtr, "off");
+ print_hex_data_json((uint8_t *)(&insn[i].off), 2);
+
+ jsonw_name(json_wtr, "imm");
+ if (double_insn && i < len - 1)
+ print_hex_data_json((uint8_t *)(&insn[i].imm),
+ 12);
+ else
+ print_hex_data_json((uint8_t *)(&insn[i].imm),
+ 4);
+ jsonw_end_object(json_wtr);
+ }
+ jsonw_end_object(json_wtr);
+ }
+ jsonw_end_array(json_wtr);
+}
+
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+ bool opcodes)
+{
+ const struct bpf_insn_cbs cbs = {
+ .cb_print = print_insn,
+ .cb_call = print_call,
+ .cb_imm = print_imm,
+ .private_data = dd,
+ };
+ struct bpf_insn *insn = buf;
+ bool double_insn = false;
+ unsigned int i;
+
+ for (i = 0; i < len / sizeof(*insn); i++) {
+ if (double_insn) {
+ double_insn = false;
+ continue;
+ }
+
+ double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+
+ printf("% 4d: ", i);
+ print_bpf_insn(&cbs, insn + i, true);
+
+ if (opcodes) {
+ printf(" ");
+ fprint_hex(stdout, insn + i, 8, " ");
+ if (double_insn && i < len - 1) {
+ printf(" ");
+ fprint_hex(stdout, insn + i + 1, 8, " ");
+ }
+ printf("\n");
+ }
+ }
+}
+
+void dump_xlated_for_graph(struct dump_data *dd, void *buf_start, void *buf_end,
+ unsigned int start_idx)
+{
+ const struct bpf_insn_cbs cbs = {
+ .cb_print = print_insn_for_graph,
+ .cb_call = print_call,
+ .cb_imm = print_imm,
+ .private_data = dd,
+ };
+ struct bpf_insn *insn_start = buf_start;
+ struct bpf_insn *insn_end = buf_end;
+ struct bpf_insn *cur = insn_start;
+
+ for (; cur <= insn_end; cur++) {
+ printf("% 4d: ", (int)(cur - insn_start + start_idx));
+ print_bpf_insn(&cbs, cur, true);
+ if (cur != insn_end)
+ printf(" | ");
+ }
+}
diff --git a/tools/bpf/bpftool/xlated_dumper.h b/tools/bpf/bpftool/xlated_dumper.h
new file mode 100644
index 000000000..33d86e2b3
--- /dev/null
+++ b/tools/bpf/bpftool/xlated_dumper.h
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2018 Netronome Systems, Inc.
+ *
+ * This software is dual licensed under the GNU General License Version 2,
+ * June 1991 as shown in the file COPYING in the top-level directory of this
+ * source tree or the BSD 2-Clause License provided below. You have the
+ * option to license this software under the complete terms of either license.
+ *
+ * The BSD 2-Clause License:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __BPF_TOOL_XLATED_DUMPER_H
+#define __BPF_TOOL_XLATED_DUMPER_H
+
+#define SYM_MAX_NAME 256
+
+struct kernel_sym {
+ unsigned long address;
+ char name[SYM_MAX_NAME];
+};
+
+struct dump_data {
+ unsigned long address_call_base;
+ struct kernel_sym *sym_mapping;
+ __u32 sym_count;
+ __u64 *jited_ksyms;
+ __u32 nr_jited_ksyms;
+ char scratch_buff[SYM_MAX_NAME + 8];
+};
+
+void kernel_syms_load(struct dump_data *dd);
+void kernel_syms_destroy(struct dump_data *dd);
+struct kernel_sym *kernel_syms_search(struct dump_data *dd, unsigned long key);
+void dump_xlated_json(struct dump_data *dd, void *buf, unsigned int len,
+ bool opcodes);
+void dump_xlated_plain(struct dump_data *dd, void *buf, unsigned int len,
+ bool opcodes);
+void dump_xlated_for_graph(struct dump_data *dd, void *buf, void *buf_end,
+ unsigned int start_index);
+
+#endif
diff --git a/tools/build/.gitignore b/tools/build/.gitignore
new file mode 100644
index 000000000..a776371a3
--- /dev/null
+++ b/tools/build/.gitignore
@@ -0,0 +1 @@
+fixdep
diff --git a/tools/build/Build b/tools/build/Build
new file mode 100644
index 000000000..76d1a4960
--- /dev/null
+++ b/tools/build/Build
@@ -0,0 +1,3 @@
+hostprogs := fixdep
+
+fixdep-y := fixdep.o
diff --git a/tools/build/Build.include b/tools/build/Build.include
new file mode 100644
index 000000000..585486e40
--- /dev/null
+++ b/tools/build/Build.include
@@ -0,0 +1,102 @@
+###
+# build: Generic definitions
+#
+# Lots of this code have been borrowed or heavily inspired from parts
+# of kbuild code, which is not credited, but mostly developed by:
+#
+# Copyright (C) Sam Ravnborg <sam@mars.ravnborg.org>, 2015
+# Copyright (C) Linus Torvalds <torvalds@linux-foundation.org>, 2015
+#
+
+###
+# Convenient variables
+comma := ,
+squote := '
+pound := \#
+
+###
+# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o
+dot-target = $(dir $@).$(notdir $@)
+
+###
+# filename of target with directory and extension stripped
+basetarget = $(basename $(notdir $@))
+
+###
+# The temporary file to save gcc -MD generated dependencies must not
+# contain a comma
+depfile = $(subst $(comma),_,$(dot-target).d)
+
+###
+# Check if both arguments has same arguments. Result is empty string if equal.
+arg-check = $(strip $(filter-out $(cmd_$(1)), $(cmd_$@)) \
+ $(filter-out $(cmd_$@), $(cmd_$(1))) )
+
+###
+# Escape single quote for use in echo statements
+escsq = $(subst $(squote),'\$(squote)',$1)
+
+# Echo command
+# Short version is used, if $(quiet) equals `quiet_', otherwise full one.
+echo-cmd = $(if $($(quiet)cmd_$(1)),\
+ echo ' $(call escsq,$($(quiet)cmd_$(1)))';)
+
+###
+# Replace >$< with >$$< to preserve $ when reloading the .cmd file
+# (needed for make)
+# Replace >#< with >$(pound)< to avoid starting a comment in the .cmd file
+# (needed for make)
+# Replace >'< with >'\''< to be able to enclose the whole string in '...'
+# (needed for the shell)
+make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1)))))
+
+###
+# Find any prerequisites that is newer than target or that does not exist.
+# PHONY targets skipped in both cases.
+any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^)
+
+###
+# Copy dependency data into .cmd file
+# - gcc -M dependency info
+# - command line to create object 'cmd_object :='
+dep-cmd = $(if $(wildcard $(fixdep)), \
+ $(fixdep) $(depfile) $@ '$(make-cmd)' > $(dot-target).tmp; \
+ rm -f $(depfile); \
+ mv -f $(dot-target).tmp $(dot-target).cmd, \
+ printf '$(pound) cannot find fixdep (%s)\n' $(fixdep) > $(dot-target).cmd; \
+ printf '$(pound) using basic dep data\n\n' >> $(dot-target).cmd; \
+ cat $(depfile) >> $(dot-target).cmd; \
+ printf '\n%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd)
+
+###
+# if_changed_dep - execute command if any prerequisite is newer than
+# target, or command line has changed and update
+# dependencies in the cmd file
+if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \
+ @set -e; \
+ $(echo-cmd) $(cmd_$(1)); \
+ $(dep-cmd))
+
+# if_changed - execute command if any prerequisite is newer than
+# target, or command line has changed
+if_changed = $(if $(strip $(any-prereq) $(arg-check)), \
+ @set -e; \
+ $(echo-cmd) $(cmd_$(1)); \
+ printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd)
+
+###
+# C flags to be used in rule definitions, includes:
+# - depfile generation
+# - global $(CFLAGS)
+# - per target C flags
+# - per object C flags
+# - BUILD_STR macro to allow '-D"$(variable)"' constructs
+c_flags_1 = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj))
+c_flags_2 = $(filter-out $(CFLAGS_REMOVE_$(basetarget).o), $(c_flags_1))
+c_flags = $(filter-out $(CFLAGS_REMOVE_$(obj)), $(c_flags_2))
+cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXXFLAGS_$(basetarget).o) $(CXXFLAGS_$(obj))
+
+###
+## HOSTCC C flags
+
+host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(KBUILD_HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj))
diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt
new file mode 100644
index 000000000..a22587475
--- /dev/null
+++ b/tools/build/Documentation/Build.txt
@@ -0,0 +1,168 @@
+Build Framework
+===============
+
+The perf build framework was adopted from the kernel build system, hence the
+idea and the way how objects are built is the same.
+
+Basically the user provides set of 'Build' files that list objects and
+directories to nest for specific target to be build.
+
+Unlike the kernel we don't have a single build object 'obj-y' list that where
+we setup source objects, but we support more. This allows one 'Build' file to
+carry a sources list for multiple build objects.
+
+
+Build framework makefiles
+-------------------------
+
+The build framework consists of 2 Makefiles:
+
+ Build.include
+ Makefile.build
+
+While the 'Build.include' file contains just some generic definitions, the
+'Makefile.build' file is the makefile used from the outside. It's
+interface/usage is following:
+
+ $ make -f tools/build/Makefile.build srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT)
+
+where:
+
+ KSRC - is the path to kernel sources
+ DIR - is the path to the project to be built
+ OBJECT - is the name of the build object
+
+When succefully finished the $(DIR) directory contains the final object file
+called $(OBJECT)-in.o:
+
+ $ ls $(DIR)/$(OBJECT)-in.o
+
+which includes all compiled sources described in 'Build' makefiles.
+
+
+Build makefiles
+---------------
+
+The user supplies 'Build' makefiles that contains a objects list, and connects
+the build to nested directories.
+
+Assume we have the following project structure:
+
+ ex/a.c
+ /b.c
+ /c.c
+ /d.c
+ /arch/e.c
+ /arch/f.c
+
+Out of which you build the 'ex' binary ' and the 'libex.a' library:
+
+ 'ex' - consists of 'a.o', 'b.o' and libex.a
+ 'libex.a' - consists of 'c.o', 'd.o', 'e.o' and 'f.o'
+
+The build framework does not create the 'ex' and 'libex.a' binaries for you, it
+only prepares proper objects to be compiled and grouped together.
+
+To follow the above example, the user provides following 'Build' files:
+
+ ex/Build:
+ ex-y += a.o
+ ex-y += b.o
+ ex-y += b.o # duplicates in the lists are allowed
+
+ libex-y += c.o
+ libex-y += d.o
+ libex-y += arch/
+
+ ex/arch/Build:
+ libex-y += e.o
+ libex-y += f.o
+
+and runs:
+
+ $ make -f tools/build/Makefile.build dir=. obj=ex
+ $ make -f tools/build/Makefile.build dir=. obj=libex
+
+which creates the following objects:
+
+ ex/ex-in.o
+ ex/libex-in.o
+
+that contain request objects names in Build files.
+
+It's only a matter of 2 single commands to create the final binaries:
+
+ $ ar rcs libex.a libex-in.o
+ $ gcc -o ex ex-in.o libex.a
+
+You can check the 'ex' example in 'tools/build/tests/ex' for more details.
+
+
+Makefile.include
+----------------
+
+The tools/build/Makefile.include makefile could be included
+via user makefiles to get usefull definitions.
+
+It defines following interface:
+
+ - build macro definition:
+ build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+ to make it easier to invoke build like:
+ make $(build)=ex
+
+
+Fixdep
+------
+It is necessary to build the fixdep helper before invoking the build.
+The Makefile.include file adds the fixdep target, that could be
+invoked by the user.
+
+
+Rules
+-----
+
+The build framework provides standard compilation rules to handle .S and .c
+compilation.
+
+It's possible to include special rule if needed (like we do for flex or bison
+code generation).
+
+
+CFLAGS
+------
+
+It's possible to alter the standard object C flags in the following way:
+
+ CFLAGS_perf.o += '...' - adds CFLAGS for perf.o object
+ CFLAGS_gtk += '...' - adds CFLAGS for gtk build object
+ CFLAGS_REMOVE_perf.o += '...' - removes CFLAGS for perf.o object
+ CFLAGS_REMOVE_gtk += '...' - removes CFLAGS for gtk build object
+
+This C flags changes has the scope of the Build makefile they are defined in.
+
+
+Dependencies
+------------
+
+For each built object file 'a.o' the '.a.cmd' is created and holds:
+
+ - Command line used to built that object
+ (for each object)
+
+ - Dependency rules generated by 'gcc -Wp,-MD,...'
+ (for compiled object)
+
+All existing '.cmd' files are included in the Build process to follow properly
+the dependencies and trigger a rebuild when necessary.
+
+
+Single rules
+------------
+
+It's possible to build single object file by choice, like:
+
+ $ make util/map.o # objects
+ $ make util/map.i # preprocessor
+ $ make util/map.s # assembly
diff --git a/tools/build/Makefile b/tools/build/Makefile
new file mode 100644
index 000000000..8a55378e8
--- /dev/null
+++ b/tools/build/Makefile
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+include $(srctree)/tools//scripts/Makefile.include
+
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)
+
+export HOSTCC HOSTLD HOSTAR
+
+ifeq ($(V),1)
+ Q =
+else
+ Q = @
+endif
+
+export Q srctree CC LD
+
+MAKEFLAGS := --no-print-directory
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+all: $(OUTPUT)fixdep
+
+clean:
+ $(call QUIET_CLEAN, fixdep)
+ $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+ $(Q)rm -f $(OUTPUT)fixdep
+
+$(OUTPUT)fixdep-in.o: FORCE
+ $(Q)$(MAKE) $(build)=fixdep
+
+$(OUTPUT)fixdep: $(OUTPUT)fixdep-in.o
+ $(QUIET_LINK)$(HOSTCC) $(KBUILD_HOSTLDFLAGS) -o $@ $<
+
+FORCE:
+
+.PHONY: FORCE
diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
new file mode 100644
index 000000000..cd72016c3
--- /dev/null
+++ b/tools/build/Makefile.build
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: GPL-2.0
+###
+# Main build makefile.
+#
+# Lots of this code have been borrowed or heavily inspired from parts
+# of kbuild code, which is not credited, but mostly developed by:
+#
+# Copyright (C) Sam Ravnborg <sam@mars.ravnborg.org>, 2015
+# Copyright (C) Linus Torvalds <torvalds@linux-foundation.org>, 2015
+#
+
+PHONY := __build
+__build:
+
+ifeq ($(V),1)
+ quiet =
+ Q =
+else
+ quiet=quiet_
+ Q=@
+endif
+
+ifneq ($(findstring s,$(filter-out --%,$(MAKEFLAGS))),)
+ quiet=silent_
+endif
+
+build-dir := $(srctree)/tools/build
+
+# Define $(fixdep) for dep-cmd function
+ifeq ($(OUTPUT),)
+ fixdep := $(build-dir)/fixdep
+else
+ fixdep := $(OUTPUT)/fixdep
+endif
+
+# Generic definitions
+include $(build-dir)/Build.include
+
+# do not force detected configuration
+-include $(OUTPUT).config-detected
+
+# Init all relevant variables used in build files so
+# 1) they have correct type
+# 2) they do not inherit any value from the environment
+subdir-y :=
+obj-y :=
+subdir-y :=
+subdir-obj-y :=
+
+# Build definitions
+build-file := $(dir)/Build
+-include $(build-file)
+
+quiet_cmd_flex = FLEX $@
+quiet_cmd_bison = BISON $@
+
+# Create directory unless it exists
+quiet_cmd_mkdir = MKDIR $(dir $@)
+ cmd_mkdir = mkdir -p $(dir $@)
+ rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir))
+
+# Compile command
+quiet_cmd_cc_o_c = CC $@
+ cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $<
+
+quiet_cmd_host_cc_o_c = HOSTCC $@
+ cmd_host_cc_o_c = $(HOSTCC) $(host_c_flags) -c -o $@ $<
+
+quiet_cmd_cxx_o_c = CXX $@
+ cmd_cxx_o_c = $(CXX) $(cxx_flags) -c -o $@ $<
+
+quiet_cmd_cpp_i_c = CPP $@
+ cmd_cpp_i_c = $(CC) $(c_flags) -E -o $@ $<
+
+quiet_cmd_cc_s_c = AS $@
+ cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $<
+
+quiet_cmd_gen = GEN $@
+
+# Link agregate command
+# If there's nothing to link, create empty $@ object.
+quiet_cmd_ld_multi = LD $@
+ cmd_ld_multi = $(if $(strip $(obj-y)),\
+ $(LD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(AR) rcs $@)
+
+quiet_cmd_host_ld_multi = HOSTLD $@
+ cmd_host_ld_multi = $(if $(strip $(obj-y)),\
+ $(HOSTLD) -r -o $@ $(filter $(obj-y),$^),rm -f $@; $(HOSTAR) rcs $@)
+
+ifneq ($(filter $(obj),$(hostprogs)),)
+ host = host_
+endif
+
+# Build rules
+$(OUTPUT)%.o: %.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,$(host)cc_o_c)
+
+$(OUTPUT)%.o: %.cpp FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cxx_o_c)
+
+$(OUTPUT)%.o: %.S FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,$(host)cc_o_c)
+
+$(OUTPUT)%.i: %.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cpp_i_c)
+
+$(OUTPUT)%.s: %.S FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cpp_i_c)
+
+$(OUTPUT)%.s: %.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_s_c)
+
+# Gather build data:
+# obj-y - list of build objects
+# subdir-y - list of directories to nest
+# subdir-obj-y - list of directories objects 'dir/$(obj)-in.o'
+obj-y := $($(obj)-y)
+subdir-y := $(patsubst %/,%,$(filter %/, $(obj-y)))
+obj-y := $(patsubst %/, %/$(obj)-in.o, $(obj-y))
+subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y))
+
+# '$(OUTPUT)/dir' prefix to all objects
+objprefix := $(subst ./,,$(OUTPUT)$(dir)/)
+obj-y := $(addprefix $(objprefix),$(obj-y))
+subdir-obj-y := $(addprefix $(objprefix),$(subdir-obj-y))
+
+# Final '$(obj)-in.o' object
+in-target := $(objprefix)$(obj)-in.o
+
+PHONY += $(subdir-y)
+
+$(subdir-y):
+ $(Q)$(MAKE) -f $(build-dir)/Makefile.build dir=$(dir)/$@ obj=$(obj)
+
+$(sort $(subdir-obj-y)): $(subdir-y) ;
+
+$(in-target): $(obj-y) FORCE
+ $(call rule_mkdir)
+ $(call if_changed,$(host)ld_multi)
+
+__build: $(in-target)
+ @:
+
+PHONY += FORCE
+FORCE:
+
+# Include all cmd files to get all the dependency rules
+# for all objects included
+targets := $(wildcard $(sort $(obj-y) $(in-target) $(MAKECMDGOALS)))
+cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd))
+
+ifneq ($(cmd_files),)
+ include $(cmd_files)
+endif
+
+.PHONY: $(PHONY)
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
new file mode 100644
index 000000000..bbc9efa20
--- /dev/null
+++ b/tools/build/Makefile.feature
@@ -0,0 +1,241 @@
+feature_dir := $(srctree)/tools/build/feature
+
+ifneq ($(OUTPUT),)
+ OUTPUT_FEATURES = $(OUTPUT)feature/
+ $(shell mkdir -p $(OUTPUT_FEATURES))
+endif
+
+feature_check = $(eval $(feature_check_code))
+define feature_check_code
+ feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC="$(CC)" CXX="$(CXX)" CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
+endef
+
+feature_set = $(eval $(feature_set_code))
+define feature_set_code
+ feature-$(1) := 1
+endef
+
+#
+# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output:
+#
+
+#
+# Note that this is not a complete list of all feature tests, just
+# those that are typically built on a fully configured system.
+#
+# [ Feature tests not mentioned here have to be built explicitly in
+# the rule that uses them - an example for that is the 'bionic'
+# feature check. ]
+#
+FEATURE_TESTS_BASIC := \
+ backtrace \
+ dwarf \
+ dwarf_getlocations \
+ eventfd \
+ fortify-source \
+ sync-compare-and-swap \
+ get_current_dir_name \
+ gettid \
+ glibc \
+ gtk2 \
+ gtk2-infobar \
+ libaudit \
+ libbfd \
+ libelf \
+ libelf-getphdrnum \
+ libelf-gelf_getnote \
+ libelf-getshdrstrndx \
+ libelf-mmap \
+ libnuma \
+ numa_num_possible_cpus \
+ libperl \
+ libpython \
+ libslang \
+ libcrypto \
+ libunwind \
+ libunwind-x86 \
+ libunwind-x86_64 \
+ libunwind-arm \
+ libunwind-aarch64 \
+ pthread-attr-setaffinity-np \
+ pthread-barrier \
+ reallocarray \
+ stackprotector-all \
+ timerfd \
+ libdw-dwarf-unwind \
+ zlib \
+ lzma \
+ get_cpuid \
+ bpf \
+ sched_getcpu \
+ sdt \
+ setns \
+ libopencsd
+
+# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
+# of all feature tests
+FEATURE_TESTS_EXTRA := \
+ bionic \
+ compile-32 \
+ compile-x32 \
+ cplus-demangle \
+ hello \
+ libbabeltrace \
+ libbfd-liberty \
+ libbfd-liberty-z \
+ libunwind-debug-frame \
+ libunwind-debug-frame-arm \
+ libunwind-debug-frame-aarch64 \
+ cxx \
+ llvm \
+ llvm-version \
+ clang
+
+FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
+
+ifeq ($(FEATURE_TESTS),all)
+ FEATURE_TESTS := $(FEATURE_TESTS_BASIC) $(FEATURE_TESTS_EXTRA)
+endif
+
+FEATURE_DISPLAY ?= \
+ dwarf \
+ dwarf_getlocations \
+ glibc \
+ gtk2 \
+ libaudit \
+ libbfd \
+ libelf \
+ libnuma \
+ numa_num_possible_cpus \
+ libperl \
+ libpython \
+ libslang \
+ libcrypto \
+ libunwind \
+ libdw-dwarf-unwind \
+ zlib \
+ lzma \
+ get_cpuid \
+ bpf
+
+# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
+# If in the future we need per-feature checks/flags for features not
+# mentioned in this list we need to refactor this ;-).
+set_test_all_flags = $(eval $(set_test_all_flags_code))
+define set_test_all_flags_code
+ FEATURE_CHECK_CFLAGS-all += $(FEATURE_CHECK_CFLAGS-$(1))
+ FEATURE_CHECK_LDFLAGS-all += $(FEATURE_CHECK_LDFLAGS-$(1))
+endef
+
+$(foreach feat,$(FEATURE_TESTS),$(call set_test_all_flags,$(feat)))
+
+#
+# Special fast-path for the 'all features are available' case:
+#
+$(call feature_check,all,$(MSG))
+
+#
+# Just in case the build freshly failed, make sure we print the
+# feature matrix:
+#
+ifeq ($(feature-all), 1)
+ #
+ # test-all.c passed - just set all the core feature flags to 1:
+ #
+ $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat)))
+ #
+ # test-all.c does not comprise these tests, so we need to
+ # for this case to get features proper values
+ #
+ $(call feature_check,compile-32)
+ $(call feature_check,compile-x32)
+ $(call feature_check,bionic)
+ $(call feature_check,libbabeltrace)
+else
+ $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat)))
+endif
+
+#
+# Print the result of the feature test:
+#
+feature_print_status = $(eval $(feature_print_status_code)) $(info $(MSG))
+
+define feature_print_status_code
+ ifeq ($(feature-$(1)), 1)
+ MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1))
+ else
+ MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1))
+ endif
+endef
+
+feature_print_text = $(eval $(feature_print_text_code)) $(info $(MSG))
+define feature_print_text_code
+ MSG = $(shell printf '...%30s: %s' $(1) $(2))
+endef
+
+#
+# generates feature value assignment for name, like:
+# $(call feature_assign,dwarf) == feature-dwarf=1
+#
+feature_assign = feature-$(1)=$(feature-$(1))
+
+FEATURE_DUMP_FILENAME = $(OUTPUT)FEATURE-DUMP$(FEATURE_USER)
+FEATURE_DUMP := $(shell touch $(FEATURE_DUMP_FILENAME); cat $(FEATURE_DUMP_FILENAME))
+
+feature_dump_check = $(eval $(feature_dump_check_code))
+define feature_dump_check_code
+ ifeq ($(findstring $(1),$(FEATURE_DUMP)),)
+ $(2) := 1
+ endif
+endef
+
+#
+# First check if any test from FEATURE_DISPLAY
+# and set feature_display := 1 if it does
+$(foreach feat,$(FEATURE_DISPLAY),$(call feature_dump_check,$(call feature_assign,$(feat)),feature_display))
+
+#
+# Now also check if any other test changed,
+# so we force FEATURE-DUMP generation
+$(foreach feat,$(FEATURE_TESTS),$(call feature_dump_check,$(call feature_assign,$(feat)),feature_dump_changed))
+
+# The $(feature_display) controls the default detection message
+# output. It's set if:
+# - detected features differes from stored features from
+# last build (in $(FEATURE_DUMP_FILENAME) file)
+# - one of the $(FEATURE_DISPLAY) is not detected
+# - VF is enabled
+
+ifeq ($(feature_dump_changed),1)
+ $(shell rm -f $(FEATURE_DUMP_FILENAME))
+ $(foreach feat,$(FEATURE_TESTS),$(shell echo "$(call feature_assign,$(feat))" >> $(FEATURE_DUMP_FILENAME)))
+endif
+
+feature_display_check = $(eval $(feature_check_display_code))
+define feature_check_display_code
+ ifneq ($(feature-$(1)), 1)
+ feature_display := 1
+ endif
+endef
+
+$(foreach feat,$(FEATURE_DISPLAY),$(call feature_display_check,$(feat)))
+
+ifeq ($(VF),1)
+ feature_display := 1
+ feature_verbose := 1
+endif
+
+ifeq ($(feature_display),1)
+ $(info )
+ $(info Auto-detecting system features:)
+ $(foreach feat,$(FEATURE_DISPLAY),$(call feature_print_status,$(feat),))
+ ifneq ($(feature_verbose),1)
+ $(info )
+ endif
+endif
+
+ifeq ($(feature_verbose),1)
+ TMP := $(filter-out $(FEATURE_DISPLAY),$(FEATURE_TESTS))
+ $(foreach feat,$(TMP),$(call feature_print_status,$(feat),))
+ $(info )
+endif
diff --git a/tools/build/Makefile.include b/tools/build/Makefile.include
new file mode 100644
index 000000000..d360f39a4
--- /dev/null
+++ b/tools/build/Makefile.include
@@ -0,0 +1,9 @@
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+fixdep:
+ $(Q)$(MAKE) -C $(srctree)/tools/build CFLAGS= LDFLAGS= $(OUTPUT)fixdep
+
+fixdep-clean:
+ $(Q)$(MAKE) -C $(srctree)/tools/build clean
+
+.PHONY: fixdep
diff --git a/tools/build/feature/.gitignore b/tools/build/feature/.gitignore
new file mode 100644
index 000000000..09b335b98
--- /dev/null
+++ b/tools/build/feature/.gitignore
@@ -0,0 +1,3 @@
+*.d
+*.bin
+*.output
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
new file mode 100644
index 000000000..763ee60ac
--- /dev/null
+++ b/tools/build/feature/Makefile
@@ -0,0 +1,304 @@
+# SPDX-License-Identifier: GPL-2.0
+FILES= \
+ test-all.bin \
+ test-backtrace.bin \
+ test-bionic.bin \
+ test-dwarf.bin \
+ test-dwarf_getlocations.bin \
+ test-eventfd.bin \
+ test-fortify-source.bin \
+ test-sync-compare-and-swap.bin \
+ test-get_current_dir_name.bin \
+ test-glibc.bin \
+ test-gtk2.bin \
+ test-gtk2-infobar.bin \
+ test-hello.bin \
+ test-libaudit.bin \
+ test-libbfd.bin \
+ test-disassembler-four-args.bin \
+ test-reallocarray.bin \
+ test-libbfd-liberty.bin \
+ test-libbfd-liberty-z.bin \
+ test-cplus-demangle.bin \
+ test-libelf.bin \
+ test-libelf-getphdrnum.bin \
+ test-libelf-gelf_getnote.bin \
+ test-libelf-getshdrstrndx.bin \
+ test-libelf-mmap.bin \
+ test-libnuma.bin \
+ test-numa_num_possible_cpus.bin \
+ test-libperl.bin \
+ test-libpython.bin \
+ test-libslang.bin \
+ test-libcrypto.bin \
+ test-libunwind.bin \
+ test-libunwind-debug-frame.bin \
+ test-libunwind-x86.bin \
+ test-libunwind-x86_64.bin \
+ test-libunwind-arm.bin \
+ test-libunwind-aarch64.bin \
+ test-libunwind-debug-frame-arm.bin \
+ test-libunwind-debug-frame-aarch64.bin \
+ test-pthread-attr-setaffinity-np.bin \
+ test-pthread-barrier.bin \
+ test-stackprotector-all.bin \
+ test-timerfd.bin \
+ test-libdw-dwarf-unwind.bin \
+ test-libbabeltrace.bin \
+ test-compile-32.bin \
+ test-compile-x32.bin \
+ test-zlib.bin \
+ test-lzma.bin \
+ test-bpf.bin \
+ test-get_cpuid.bin \
+ test-sdt.bin \
+ test-cxx.bin \
+ test-gettid.bin \
+ test-jvmti.bin \
+ test-sched_getcpu.bin \
+ test-setns.bin \
+ test-libopencsd.bin \
+ test-clang.bin \
+ test-llvm.bin \
+ test-llvm-version.bin
+
+FILES := $(addprefix $(OUTPUT),$(FILES))
+
+PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
+LLVM_CONFIG ?= llvm-config
+
+all: $(FILES)
+
+__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
+ BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
+
+__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
+ BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
+
+###############################
+
+$(OUTPUT)test-all.bin:
+ $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma
+
+$(OUTPUT)test-hello.bin:
+ $(BUILD)
+
+$(OUTPUT)test-pthread-attr-setaffinity-np.bin:
+ $(BUILD) -D_GNU_SOURCE -lpthread
+
+$(OUTPUT)test-pthread-barrier.bin:
+ $(BUILD) -lpthread
+
+$(OUTPUT)test-stackprotector-all.bin:
+ $(BUILD) -fstack-protector-all
+
+$(OUTPUT)test-fortify-source.bin:
+ $(BUILD) -O2 -D_FORTIFY_SOURCE=2
+
+$(OUTPUT)test-bionic.bin:
+ $(BUILD)
+
+$(OUTPUT)test-libelf.bin:
+ $(BUILD) -lelf
+
+$(OUTPUT)test-eventfd.bin:
+ $(BUILD)
+
+$(OUTPUT)test-get_current_dir_name.bin:
+ $(BUILD)
+
+$(OUTPUT)test-glibc.bin:
+ $(BUILD)
+
+$(OUTPUT)test-sched_getcpu.bin:
+ $(BUILD)
+
+$(OUTPUT)test-setns.bin:
+ $(BUILD)
+
+$(OUTPUT)test-libopencsd.bin:
+ $(BUILD) # -lopencsd_c_api -lopencsd provided by
+ # $(FEATURE_CHECK_LDFLAGS-libopencsd)
+
+DWARFLIBS := -ldw
+ifeq ($(findstring -static,${LDFLAGS}),-static)
+DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
+endif
+
+$(OUTPUT)test-dwarf.bin:
+ $(BUILD) $(DWARFLIBS)
+
+$(OUTPUT)test-dwarf_getlocations.bin:
+ $(BUILD) $(DWARFLIBS)
+
+$(OUTPUT)test-libelf-mmap.bin:
+ $(BUILD) -lelf
+
+$(OUTPUT)test-libelf-getphdrnum.bin:
+ $(BUILD) -lelf
+
+$(OUTPUT)test-libelf-gelf_getnote.bin:
+ $(BUILD) -lelf
+
+$(OUTPUT)test-libelf-getshdrstrndx.bin:
+ $(BUILD) -lelf
+
+$(OUTPUT)test-libnuma.bin:
+ $(BUILD) -lnuma
+
+$(OUTPUT)test-numa_num_possible_cpus.bin:
+ $(BUILD) -lnuma
+
+$(OUTPUT)test-libunwind.bin:
+ $(BUILD) -lelf
+
+$(OUTPUT)test-libunwind-debug-frame.bin:
+ $(BUILD) -lelf
+$(OUTPUT)test-libunwind-x86.bin:
+ $(BUILD) -lelf -lunwind-x86
+
+$(OUTPUT)test-libunwind-x86_64.bin:
+ $(BUILD) -lelf -lunwind-x86_64
+
+$(OUTPUT)test-libunwind-arm.bin:
+ $(BUILD) -lelf -lunwind-arm
+
+$(OUTPUT)test-libunwind-aarch64.bin:
+ $(BUILD) -lelf -lunwind-aarch64
+
+$(OUTPUT)test-libunwind-debug-frame-arm.bin:
+ $(BUILD) -lelf -lunwind-arm
+
+$(OUTPUT)test-libunwind-debug-frame-aarch64.bin:
+ $(BUILD) -lelf -lunwind-aarch64
+
+$(OUTPUT)test-libaudit.bin:
+ $(BUILD) -laudit
+
+$(OUTPUT)test-libslang.bin:
+ $(BUILD) -I/usr/include/slang -lslang
+
+$(OUTPUT)test-libcrypto.bin:
+ $(BUILD) -lcrypto
+
+$(OUTPUT)test-gtk2.bin:
+ $(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
+
+$(OUTPUT)test-gtk2-infobar.bin:
+ $(BUILD) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
+
+grep-libs = $(filter -l%,$(1))
+strip-libs = $(filter-out -l%,$(1))
+
+PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
+PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
+PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
+PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
+FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
+
+ifeq ($(CC_NO_CLANG), 0)
+ PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
+ PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS))
+ PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+ FLAGS_PERL_EMBED += -Wno-compound-token-split-by-macro
+endif
+
+$(OUTPUT)test-libperl.bin:
+ $(BUILD) $(FLAGS_PERL_EMBED)
+
+$(OUTPUT)test-libpython.bin:
+ $(BUILD) $(FLAGS_PYTHON_EMBED)
+
+$(OUTPUT)test-libbfd.bin:
+ $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
+
+$(OUTPUT)test-disassembler-four-args.bin:
+ $(BUILD) -DPACKAGE='"perf"' -lbfd -lopcodes
+
+$(OUTPUT)test-reallocarray.bin:
+ $(BUILD)
+
+$(OUTPUT)test-libbfd-liberty.bin:
+ $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty
+
+$(OUTPUT)test-libbfd-liberty-z.bin:
+ $(CC) $(CFLAGS) -Wall -Werror -o $@ test-libbfd.c -DPACKAGE='"perf"' $(LDFLAGS) -lbfd -ldl -liberty -lz
+
+$(OUTPUT)test-cplus-demangle.bin:
+ $(BUILD) -liberty
+
+$(OUTPUT)test-backtrace.bin:
+ $(BUILD)
+
+$(OUTPUT)test-timerfd.bin:
+ $(BUILD)
+
+$(OUTPUT)test-libdw-dwarf-unwind.bin:
+ $(BUILD) # -ldw provided by $(FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind)
+
+$(OUTPUT)test-libbabeltrace.bin:
+ $(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace)
+
+$(OUTPUT)test-sync-compare-and-swap.bin:
+ $(BUILD)
+
+$(OUTPUT)test-compile-32.bin:
+ $(CC) -m32 -o $@ test-compile.c
+
+$(OUTPUT)test-compile-x32.bin:
+ $(CC) -mx32 -o $@ test-compile.c
+
+$(OUTPUT)test-zlib.bin:
+ $(BUILD) -lz
+
+$(OUTPUT)test-lzma.bin:
+ $(BUILD) -llzma
+
+$(OUTPUT)test-get_cpuid.bin:
+ $(BUILD)
+
+$(OUTPUT)test-bpf.bin:
+ $(BUILD)
+
+$(OUTPUT)test-sdt.bin:
+ $(BUILD)
+
+$(OUTPUT)test-cxx.bin:
+ $(BUILDXX) -std=gnu++11
+
+$(OUTPUT)test-gettid.bin:
+ $(BUILD)
+
+$(OUTPUT)test-jvmti.bin:
+ $(BUILD)
+
+$(OUTPUT)test-llvm.bin:
+ $(BUILDXX) -std=gnu++11 \
+ -I$(shell $(LLVM_CONFIG) --includedir) \
+ -L$(shell $(LLVM_CONFIG) --libdir) \
+ $(shell $(LLVM_CONFIG) --libs Core BPF) \
+ $(shell $(LLVM_CONFIG) --system-libs) \
+ > $(@:.bin=.make.output) 2>&1
+
+$(OUTPUT)test-llvm-version.bin:
+ $(BUILDXX) -std=gnu++11 \
+ -I$(shell $(LLVM_CONFIG) --includedir) \
+ > $(@:.bin=.make.output) 2>&1
+
+$(OUTPUT)test-clang.bin:
+ $(BUILDXX) -std=gnu++11 \
+ -I$(shell $(LLVM_CONFIG) --includedir) \
+ -L$(shell $(LLVM_CONFIG) --libdir) \
+ -Wl,--start-group -lclangBasic -lclangDriver \
+ -lclangFrontend -lclangEdit -lclangLex \
+ -lclangAST -Wl,--end-group \
+ $(shell $(LLVM_CONFIG) --libs Core option) \
+ $(shell $(LLVM_CONFIG) --system-libs) \
+ > $(@:.bin=.make.output) 2>&1
+
+-include $(OUTPUT)*.d
+
+###############################
+
+clean:
+ rm -f $(FILES) $(OUTPUT)*.d $(FILES:.bin=.make.output)
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
new file mode 100644
index 000000000..1004a8cd7
--- /dev/null
+++ b/tools/build/feature/test-all.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * test-all.c: Try to build all the main testcases at once.
+ *
+ * A well-configured system will have all the prereqs installed, so we can speed
+ * up auto-detection on such systems.
+ */
+
+/*
+ * Quirk: Python and Perl headers cannot be in arbitrary places, so keep
+ * these 3 testcases at the top:
+ */
+#define main main_test_libpython
+# include "test-libpython.c"
+#undef main
+
+#define main main_test_libperl
+# include "test-libperl.c"
+#undef main
+
+#define main main_test_hello
+# include "test-hello.c"
+#undef main
+
+#define main main_test_libelf
+# include "test-libelf.c"
+#undef main
+
+#define main main_test_libelf_mmap
+# include "test-libelf-mmap.c"
+#undef main
+
+#define main main_test_get_current_dir_name
+# include "test-get_current_dir_name.c"
+#undef main
+
+#define main main_test_gettid
+# include "test-gettid.c"
+#undef main
+
+#define main main_test_glibc
+# include "test-glibc.c"
+#undef main
+
+#define main main_test_dwarf
+# include "test-dwarf.c"
+#undef main
+
+#define main main_test_dwarf_getlocations
+# include "test-dwarf_getlocations.c"
+#undef main
+
+#define main main_test_eventfd
+# include "test-eventfd.c"
+#undef main
+
+#define main main_test_libelf_getphdrnum
+# include "test-libelf-getphdrnum.c"
+#undef main
+
+#define main main_test_libelf_gelf_getnote
+# include "test-libelf-gelf_getnote.c"
+#undef main
+
+#define main main_test_libelf_getshdrstrndx
+# include "test-libelf-getshdrstrndx.c"
+#undef main
+
+#define main main_test_libunwind
+# include "test-libunwind.c"
+#undef main
+
+#define main main_test_libaudit
+# include "test-libaudit.c"
+#undef main
+
+#define main main_test_libslang
+# include "test-libslang.c"
+#undef main
+
+#define main main_test_gtk2
+# include "test-gtk2.c"
+#undef main
+
+#define main main_test_gtk2_infobar
+# include "test-gtk2-infobar.c"
+#undef main
+
+#define main main_test_libbfd
+# include "test-libbfd.c"
+#undef main
+
+#define main main_test_backtrace
+# include "test-backtrace.c"
+#undef main
+
+#define main main_test_libnuma
+# include "test-libnuma.c"
+#undef main
+
+#define main main_test_numa_num_possible_cpus
+# include "test-numa_num_possible_cpus.c"
+#undef main
+
+#define main main_test_timerfd
+# include "test-timerfd.c"
+#undef main
+
+#define main main_test_stackprotector_all
+# include "test-stackprotector-all.c"
+#undef main
+
+#define main main_test_libdw_dwarf_unwind
+# include "test-libdw-dwarf-unwind.c"
+#undef main
+
+#define main main_test_sync_compare_and_swap
+# include "test-sync-compare-and-swap.c"
+#undef main
+
+#define main main_test_zlib
+# include "test-zlib.c"
+#undef main
+
+#define main main_test_pthread_attr_setaffinity_np
+# include "test-pthread-attr-setaffinity-np.c"
+#undef main
+
+#define main main_test_pthread_barrier
+# include "test-pthread-barrier.c"
+#undef main
+
+#define main main_test_sched_getcpu
+# include "test-sched_getcpu.c"
+#undef main
+
+# if 0
+/*
+ * Disable libbabeltrace check for test-all, because the requested
+ * library version is not released yet in most distributions. Will
+ * reenable later.
+ */
+
+#define main main_test_libbabeltrace
+# include "test-libbabeltrace.c"
+#undef main
+#endif
+
+#define main main_test_lzma
+# include "test-lzma.c"
+#undef main
+
+#define main main_test_get_cpuid
+# include "test-get_cpuid.c"
+#undef main
+
+#define main main_test_bpf
+# include "test-bpf.c"
+#undef main
+
+#define main main_test_libcrypto
+# include "test-libcrypto.c"
+#undef main
+
+#define main main_test_sdt
+# include "test-sdt.c"
+#undef main
+
+#define main main_test_setns
+# include "test-setns.c"
+#undef main
+
+#define main main_test_libopencsd
+# include "test-libopencsd.c"
+#undef main
+
+int main(int argc, char *argv[])
+{
+ main_test_libpython();
+ main_test_libperl();
+ main_test_hello();
+ main_test_libelf();
+ main_test_libelf_mmap();
+ main_test_get_current_dir_name();
+ main_test_gettid();
+ main_test_glibc();
+ main_test_dwarf();
+ main_test_dwarf_getlocations();
+ main_test_eventfd();
+ main_test_libelf_getphdrnum();
+ main_test_libelf_gelf_getnote();
+ main_test_libelf_getshdrstrndx();
+ main_test_libunwind();
+ main_test_libaudit();
+ main_test_libslang();
+ main_test_gtk2(argc, argv);
+ main_test_gtk2_infobar(argc, argv);
+ main_test_libbfd();
+ main_test_backtrace();
+ main_test_libnuma();
+ main_test_numa_num_possible_cpus();
+ main_test_timerfd();
+ main_test_stackprotector_all();
+ main_test_libdw_dwarf_unwind();
+ main_test_sync_compare_and_swap(argc, argv);
+ main_test_zlib();
+ main_test_pthread_attr_setaffinity_np();
+ main_test_pthread_barrier();
+ main_test_lzma();
+ main_test_get_cpuid();
+ main_test_bpf();
+ main_test_libcrypto();
+ main_test_sched_getcpu();
+ main_test_sdt();
+ main_test_setns();
+ main_test_libopencsd();
+
+ return 0;
+}
diff --git a/tools/build/feature/test-backtrace.c b/tools/build/feature/test-backtrace.c
new file mode 100644
index 000000000..e9ddd27c6
--- /dev/null
+++ b/tools/build/feature/test-backtrace.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <execinfo.h>
+#include <stdio.h>
+
+int main(void)
+{
+ void *backtrace_fns[10];
+ size_t entries;
+
+ entries = backtrace(backtrace_fns, 10);
+ backtrace_symbols_fd(backtrace_fns, entries, 1);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-bionic.c b/tools/build/feature/test-bionic.c
new file mode 100644
index 000000000..4bcc97765
--- /dev/null
+++ b/tools/build/feature/test-bionic.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <android/api-level.h>
+
+int main(void)
+{
+ return __ANDROID_API__;
+}
diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c
new file mode 100644
index 000000000..82070eadf
--- /dev/null
+++ b/tools/build/feature/test-bpf.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/unistd.h>
+#include <linux/bpf.h>
+#include <unistd.h>
+
+#ifndef __NR_bpf
+# if defined(__i386__)
+# define __NR_bpf 357
+# elif defined(__x86_64__)
+# define __NR_bpf 321
+# elif defined(__aarch64__)
+# define __NR_bpf 280
+# elif defined(__sparc__)
+# define __NR_bpf 349
+# elif defined(__s390__)
+# define __NR_bpf 351
+# else
+# error __NR_bpf not defined. libbpf does not support your arch.
+# endif
+#endif
+
+int main(void)
+{
+ union bpf_attr attr;
+
+ /* Check fields in attr */
+ attr.prog_type = BPF_PROG_TYPE_KPROBE;
+ attr.insn_cnt = 0;
+ attr.insns = 0;
+ attr.license = 0;
+ attr.log_buf = 0;
+ attr.log_size = 0;
+ attr.log_level = 0;
+ attr.kern_version = 0;
+ attr.prog_flags = 0;
+
+ /*
+ * Test existence of __NR_bpf and BPF_PROG_LOAD.
+ * This call should fail if we run the testcase.
+ */
+ return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+}
diff --git a/tools/build/feature/test-clang.cpp b/tools/build/feature/test-clang.cpp
new file mode 100644
index 000000000..a2b3f092d
--- /dev/null
+++ b/tools/build/feature/test-clang.cpp
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "clang/Basic/VirtualFileSystem.h"
+#include "clang/Driver/Driver.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace clang;
+using namespace clang::driver;
+
+int main()
+{
+ IntrusiveRefCntPtr<DiagnosticIDs> DiagID(new DiagnosticIDs());
+ IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
+
+ DiagnosticsEngine Diags(DiagID, &*DiagOpts);
+ Driver TheDriver("test", "bpf-pc-linux", Diags);
+
+ llvm::llvm_shutdown();
+ return 0;
+}
diff --git a/tools/build/feature/test-compile.c b/tools/build/feature/test-compile.c
new file mode 100644
index 000000000..9821b8271
--- /dev/null
+++ b/tools/build/feature/test-compile.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+int main(void)
+{
+ printf("Hello World!\n");
+ return 0;
+}
diff --git a/tools/build/feature/test-cplus-demangle.c b/tools/build/feature/test-cplus-demangle.c
new file mode 100644
index 000000000..2ba56474a
--- /dev/null
+++ b/tools/build/feature/test-cplus-demangle.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+extern int printf(const char *format, ...);
+extern char *cplus_demangle(const char *, int);
+
+int main(void)
+{
+ char symbol[4096] = "FieldName__9ClassNameFd";
+ char *tmp;
+
+ tmp = cplus_demangle(symbol, 0);
+
+ printf("demangled symbol: {%s}\n", tmp);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-cxx.cpp b/tools/build/feature/test-cxx.cpp
new file mode 100644
index 000000000..396aaedd2
--- /dev/null
+++ b/tools/build/feature/test-cxx.cpp
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <iostream>
+#include <memory>
+
+static void print_str(std::string s)
+{
+ std::cout << s << std::endl;
+}
+
+int main()
+{
+ std::string s("Hello World!");
+ print_str(std::move(s));
+ std::cout << "|" << s << "|" << std::endl;
+ return 0;
+}
diff --git a/tools/build/feature/test-disassembler-four-args.c b/tools/build/feature/test-disassembler-four-args.c
new file mode 100644
index 000000000..45ce65cfd
--- /dev/null
+++ b/tools/build/feature/test-disassembler-four-args.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bfd.h>
+#include <dis-asm.h>
+
+int main(void)
+{
+ bfd *abfd = bfd_openr(NULL, NULL);
+
+ disassembler(bfd_get_arch(abfd),
+ bfd_big_endian(abfd),
+ bfd_get_mach(abfd),
+ abfd);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-dwarf.c b/tools/build/feature/test-dwarf.c
new file mode 100644
index 000000000..8d474bd73
--- /dev/null
+++ b/tools/build/feature/test-dwarf.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <elfutils/version.h>
+
+int main(void)
+{
+ Dwarf *dbg = dwarf_begin(0, DWARF_C_READ);
+
+ return (long)dbg;
+}
diff --git a/tools/build/feature/test-dwarf_getlocations.c b/tools/build/feature/test-dwarf_getlocations.c
new file mode 100644
index 000000000..78fb4a1fa
--- /dev/null
+++ b/tools/build/feature/test-dwarf_getlocations.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <elfutils/libdw.h>
+
+int main(void)
+{
+ Dwarf_Addr base, start, end;
+ Dwarf_Attribute attr;
+ Dwarf_Op *op;
+ size_t nops;
+ ptrdiff_t offset = 0;
+ return (int)dwarf_getlocations(&attr, offset, &base, &start, &end, &op, &nops);
+}
diff --git a/tools/build/feature/test-eventfd.c b/tools/build/feature/test-eventfd.c
new file mode 100644
index 000000000..f4de7ef00
--- /dev/null
+++ b/tools/build/feature/test-eventfd.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+
+#include <sys/eventfd.h>
+
+int main(void)
+{
+ return eventfd(0, EFD_NONBLOCK);
+}
diff --git a/tools/build/feature/test-fortify-source.c b/tools/build/feature/test-fortify-source.c
new file mode 100644
index 000000000..c9f398d87
--- /dev/null
+++ b/tools/build/feature/test-fortify-source.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+ return puts("hi");
+}
diff --git a/tools/build/feature/test-get_cpuid.c b/tools/build/feature/test-get_cpuid.c
new file mode 100644
index 000000000..bb4f065f2
--- /dev/null
+++ b/tools/build/feature/test-get_cpuid.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <cpuid.h>
+
+int main(void)
+{
+ unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+ return __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
+}
diff --git a/tools/build/feature/test-get_current_dir_name.c b/tools/build/feature/test-get_current_dir_name.c
new file mode 100644
index 000000000..573000f93
--- /dev/null
+++ b/tools/build/feature/test-get_current_dir_name.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdlib.h>
+
+int main(void)
+{
+ free(get_current_dir_name());
+ return 0;
+}
diff --git a/tools/build/feature/test-gettid.c b/tools/build/feature/test-gettid.c
new file mode 100644
index 000000000..ef24e42d3
--- /dev/null
+++ b/tools/build/feature/test-gettid.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+#define _GNU_SOURCE
+#include <unistd.h>
+
+int main(void)
+{
+ return gettid();
+}
+
+#undef _GNU_SOURCE
diff --git a/tools/build/feature/test-glibc.c b/tools/build/feature/test-glibc.c
new file mode 100644
index 000000000..9ab8e90e7
--- /dev/null
+++ b/tools/build/feature/test-glibc.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+
+#if !defined(__UCLIBC__)
+#include <gnu/libc-version.h>
+#else
+#define XSTR(s) STR(s)
+#define STR(s) #s
+#endif
+
+int main(void)
+{
+#if !defined(__UCLIBC__)
+ const char *version = gnu_get_libc_version();
+#else
+ const char *version = XSTR(__GLIBC__) "." XSTR(__GLIBC_MINOR__);
+#endif
+
+ return (long)version;
+}
diff --git a/tools/build/feature/test-gtk2-infobar.c b/tools/build/feature/test-gtk2-infobar.c
new file mode 100644
index 000000000..b1b716dd5
--- /dev/null
+++ b/tools/build/feature/test-gtk2-infobar.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+#include <gtk/gtk.h>
+#pragma GCC diagnostic error "-Wstrict-prototypes"
+
+int main(int argc, char *argv[])
+{
+ gtk_init(&argc, &argv);
+ gtk_info_bar_new();
+
+ return 0;
+}
diff --git a/tools/build/feature/test-gtk2.c b/tools/build/feature/test-gtk2.c
new file mode 100644
index 000000000..2aaf4bfc2
--- /dev/null
+++ b/tools/build/feature/test-gtk2.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+#include <gtk/gtk.h>
+#pragma GCC diagnostic error "-Wstrict-prototypes"
+
+int main(int argc, char *argv[])
+{
+ gtk_init(&argc, &argv);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-hello.c b/tools/build/feature/test-hello.c
new file mode 100644
index 000000000..c9f398d87
--- /dev/null
+++ b/tools/build/feature/test-hello.c
@@ -0,0 +1,6 @@
+#include <stdio.h>
+
+int main(void)
+{
+ return puts("hi");
+}
diff --git a/tools/build/feature/test-jvmti.c b/tools/build/feature/test-jvmti.c
new file mode 100644
index 000000000..5cf31192f
--- /dev/null
+++ b/tools/build/feature/test-jvmti.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <jvmti.h>
+#include <jvmticmlr.h>
+
+int main(void)
+{
+ JavaVM jvm __attribute__((unused));
+ jvmtiEventCallbacks cb __attribute__((unused));
+ jvmtiCapabilities caps __attribute__((unused));
+ jvmtiJlocationFormat format __attribute__((unused));
+ jvmtiEnv jvmti __attribute__((unused));
+
+ return 0;
+}
diff --git a/tools/build/feature/test-libaudit.c b/tools/build/feature/test-libaudit.c
new file mode 100644
index 000000000..f5b0863fa
--- /dev/null
+++ b/tools/build/feature/test-libaudit.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <libaudit.h>
+
+extern int printf(const char *format, ...);
+
+int main(void)
+{
+ printf("error message: %s\n", audit_errno_to_name(0));
+
+ return audit_open();
+}
diff --git a/tools/build/feature/test-libbabeltrace.c b/tools/build/feature/test-libbabeltrace.c
new file mode 100644
index 000000000..10bb69d55
--- /dev/null
+++ b/tools/build/feature/test-libbabeltrace.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <babeltrace/ctf-writer/writer.h>
+#include <babeltrace/ctf-ir/stream-class.h>
+
+int main(void)
+{
+ bt_ctf_stream_class_get_packet_context_type((void *) 0);
+ return 0;
+}
diff --git a/tools/build/feature/test-libbfd.c b/tools/build/feature/test-libbfd.c
new file mode 100644
index 000000000..afa46b046
--- /dev/null
+++ b/tools/build/feature/test-libbfd.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bfd.h>
+
+extern int printf(const char *format, ...);
+
+int main(void)
+{
+ char symbol[4096] = "FieldName__9ClassNameFd";
+ char *tmp;
+
+ tmp = bfd_demangle(0, symbol, 0);
+
+ printf("demangled symbol: {%s}\n", tmp);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-libcrypto.c b/tools/build/feature/test-libcrypto.c
new file mode 100644
index 000000000..a98174e05
--- /dev/null
+++ b/tools/build/feature/test-libcrypto.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <openssl/sha.h>
+#include <openssl/md5.h>
+
+int main(void)
+{
+ MD5_CTX context;
+ unsigned char md[MD5_DIGEST_LENGTH + SHA_DIGEST_LENGTH];
+ unsigned char dat[] = "12345";
+
+ MD5_Init(&context);
+ MD5_Update(&context, &dat[0], sizeof(dat));
+ MD5_Final(&md[0], &context);
+
+ SHA1(&dat[0], sizeof(dat), &md[0]);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-libdw-dwarf-unwind.c b/tools/build/feature/test-libdw-dwarf-unwind.c
new file mode 100644
index 000000000..ed03d9505
--- /dev/null
+++ b/tools/build/feature/test-libdw-dwarf-unwind.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <elfutils/libdwfl.h>
+
+int main(void)
+{
+ /*
+ * This function is guarded via: __nonnull_attribute__ (1, 2).
+ * Passing '1' as arguments value. This code is never executed,
+ * only compiled.
+ */
+ dwfl_thread_getframes((void *) 1, (void *) 1, NULL);
+ return 0;
+}
diff --git a/tools/build/feature/test-libelf-gelf_getnote.c b/tools/build/feature/test-libelf-gelf_getnote.c
new file mode 100644
index 000000000..075d062fe
--- /dev/null
+++ b/tools/build/feature/test-libelf-gelf_getnote.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <gelf.h>
+
+int main(void)
+{
+ return gelf_getnote(NULL, 0, NULL, NULL, NULL);
+}
diff --git a/tools/build/feature/test-libelf-getphdrnum.c b/tools/build/feature/test-libelf-getphdrnum.c
new file mode 100644
index 000000000..96a7f8d30
--- /dev/null
+++ b/tools/build/feature/test-libelf-getphdrnum.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <libelf.h>
+
+int main(void)
+{
+ size_t dst;
+
+ return elf_getphdrnum(0, &dst);
+}
diff --git a/tools/build/feature/test-libelf-getshdrstrndx.c b/tools/build/feature/test-libelf-getshdrstrndx.c
new file mode 100644
index 000000000..ae9f2fff5
--- /dev/null
+++ b/tools/build/feature/test-libelf-getshdrstrndx.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <libelf.h>
+
+int main(void)
+{
+ size_t dst;
+
+ return elf_getshdrstrndx(0, &dst);
+}
diff --git a/tools/build/feature/test-libelf-mmap.c b/tools/build/feature/test-libelf-mmap.c
new file mode 100644
index 000000000..2c3ef81af
--- /dev/null
+++ b/tools/build/feature/test-libelf-mmap.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <libelf.h>
+
+int main(void)
+{
+ Elf *elf = elf_begin(0, ELF_C_READ_MMAP, 0);
+
+ return (long)elf;
+}
diff --git a/tools/build/feature/test-libelf.c b/tools/build/feature/test-libelf.c
new file mode 100644
index 000000000..905044127
--- /dev/null
+++ b/tools/build/feature/test-libelf.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <libelf.h>
+
+int main(void)
+{
+ Elf *elf = elf_begin(0, ELF_C_READ, 0);
+
+ return (long)elf;
+}
diff --git a/tools/build/feature/test-libnuma.c b/tools/build/feature/test-libnuma.c
new file mode 100644
index 000000000..b3aa59f8b
--- /dev/null
+++ b/tools/build/feature/test-libnuma.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <numa.h>
+#include <numaif.h>
+
+int main(void)
+{
+ numa_available();
+
+ return 0;
+}
diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
new file mode 100644
index 000000000..5ff1246e6
--- /dev/null
+++ b/tools/build/feature/test-libopencsd.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <opencsd/c_api/opencsd_c_api.h>
+
+int main(void)
+{
+ (void)ocsd_get_version();
+ return 0;
+}
diff --git a/tools/build/feature/test-libperl.c b/tools/build/feature/test-libperl.c
new file mode 100644
index 000000000..0415f437e
--- /dev/null
+++ b/tools/build/feature/test-libperl.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <EXTERN.h>
+#include <perl.h>
+
+int main(void)
+{
+ perl_alloc();
+
+ return 0;
+}
diff --git a/tools/build/feature/test-libpython.c b/tools/build/feature/test-libpython.c
new file mode 100644
index 000000000..0c1641b0d
--- /dev/null
+++ b/tools/build/feature/test-libpython.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <Python.h>
+
+int main(void)
+{
+ Py_Initialize();
+
+ return 0;
+}
diff --git a/tools/build/feature/test-libslang.c b/tools/build/feature/test-libslang.c
new file mode 100644
index 000000000..9cbff8d1d
--- /dev/null
+++ b/tools/build/feature/test-libslang.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <slang.h>
+
+int main(void)
+{
+ return SLsmg_init_smg();
+}
diff --git a/tools/build/feature/test-libunwind-aarch64.c b/tools/build/feature/test-libunwind-aarch64.c
new file mode 100644
index 000000000..323803f49
--- /dev/null
+++ b/tools/build/feature/test-libunwind-aarch64.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <libunwind-aarch64.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+ unw_addr_space_t addr_space;
+
+ addr_space = unw_create_addr_space(&accessors, 0);
+ if (addr_space)
+ return 0;
+
+ unw_init_remote(NULL, addr_space, NULL);
+ dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-libunwind-arm.c b/tools/build/feature/test-libunwind-arm.c
new file mode 100644
index 000000000..cb378b7d6
--- /dev/null
+++ b/tools/build/feature/test-libunwind-arm.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <libunwind-arm.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+ unw_addr_space_t addr_space;
+
+ addr_space = unw_create_addr_space(&accessors, 0);
+ if (addr_space)
+ return 0;
+
+ unw_init_remote(NULL, addr_space, NULL);
+ dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-libunwind-debug-frame-aarch64.c b/tools/build/feature/test-libunwind-debug-frame-aarch64.c
new file mode 100644
index 000000000..36d6646c1
--- /dev/null
+++ b/tools/build/feature/test-libunwind-debug-frame-aarch64.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <libunwind-aarch64.h>
+#include <stdlib.h>
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+ unw_word_t ip, unw_word_t segbase,
+ const char *obj_name, unw_word_t start,
+ unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+int main(void)
+{
+ dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0);
+ return 0;
+}
diff --git a/tools/build/feature/test-libunwind-debug-frame-arm.c b/tools/build/feature/test-libunwind-debug-frame-arm.c
new file mode 100644
index 000000000..8696e48e1
--- /dev/null
+++ b/tools/build/feature/test-libunwind-debug-frame-arm.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <libunwind-arm.h>
+#include <stdlib.h>
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+ unw_word_t ip, unw_word_t segbase,
+ const char *obj_name, unw_word_t start,
+ unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+int main(void)
+{
+ dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0);
+ return 0;
+}
diff --git a/tools/build/feature/test-libunwind-debug-frame.c b/tools/build/feature/test-libunwind-debug-frame.c
new file mode 100644
index 000000000..efb55cdd8
--- /dev/null
+++ b/tools/build/feature/test-libunwind-debug-frame.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <libunwind.h>
+#include <stdlib.h>
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+ unw_word_t ip, unw_word_t segbase,
+ const char *obj_name, unw_word_t start,
+ unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+int main(void)
+{
+ dwarf_find_debug_frame(0, NULL, 0, 0, NULL, 0, 0);
+ return 0;
+}
diff --git a/tools/build/feature/test-libunwind-x86.c b/tools/build/feature/test-libunwind-x86.c
new file mode 100644
index 000000000..e5e0f6c89
--- /dev/null
+++ b/tools/build/feature/test-libunwind-x86.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <libunwind-x86.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+ unw_addr_space_t addr_space;
+
+ addr_space = unw_create_addr_space(&accessors, 0);
+ if (addr_space)
+ return 0;
+
+ unw_init_remote(NULL, addr_space, NULL);
+ dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-libunwind-x86_64.c b/tools/build/feature/test-libunwind-x86_64.c
new file mode 100644
index 000000000..62ae4db59
--- /dev/null
+++ b/tools/build/feature/test-libunwind-x86_64.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <libunwind-x86_64.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+ unw_addr_space_t addr_space;
+
+ addr_space = unw_create_addr_space(&accessors, 0);
+ if (addr_space)
+ return 0;
+
+ unw_init_remote(NULL, addr_space, NULL);
+ dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-libunwind.c b/tools/build/feature/test-libunwind.c
new file mode 100644
index 000000000..53fd26614
--- /dev/null
+++ b/tools/build/feature/test-libunwind.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <libunwind.h>
+#include <stdlib.h>
+
+extern int UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+static unw_accessors_t accessors;
+
+int main(void)
+{
+ unw_addr_space_t addr_space;
+
+ addr_space = unw_create_addr_space(&accessors, 0);
+ if (addr_space)
+ return 0;
+
+ unw_init_remote(NULL, addr_space, NULL);
+ dwarf_search_unwind_table(addr_space, 0, NULL, NULL, 0, NULL);
+
+ return 0;
+}
diff --git a/tools/build/feature/test-llvm-version.cpp b/tools/build/feature/test-llvm-version.cpp
new file mode 100644
index 000000000..8a0916254
--- /dev/null
+++ b/tools/build/feature/test-llvm-version.cpp
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <cstdio>
+#include "llvm/Config/llvm-config.h"
+
+#define NUM_VERSION (((LLVM_VERSION_MAJOR) << 16) + (LLVM_VERSION_MINOR << 8) + LLVM_VERSION_PATCH)
+#define pass int main() {printf("%x\n", NUM_VERSION); return 0;}
+
+#if NUM_VERSION >= 0x030900
+pass
+#else
+# error This LLVM is not tested yet.
+#endif
diff --git a/tools/build/feature/test-llvm.cpp b/tools/build/feature/test-llvm.cpp
new file mode 100644
index 000000000..88a3d1bdd
--- /dev/null
+++ b/tools/build/feature/test-llvm.cpp
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+#define NUM_VERSION (((LLVM_VERSION_MAJOR) << 16) + (LLVM_VERSION_MINOR << 8) + LLVM_VERSION_PATCH)
+
+#if NUM_VERSION < 0x030900
+# error "LLVM version too low"
+#endif
+int main()
+{
+ llvm::errs() << "Hello World!\n";
+ llvm::llvm_shutdown();
+ return 0;
+}
diff --git a/tools/build/feature/test-lzma.c b/tools/build/feature/test-lzma.c
new file mode 100644
index 000000000..78682bb01
--- /dev/null
+++ b/tools/build/feature/test-lzma.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <lzma.h>
+
+int main(void)
+{
+ lzma_stream strm = LZMA_STREAM_INIT;
+ int ret;
+
+ ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED);
+ return ret ? -1 : 0;
+}
diff --git a/tools/build/feature/test-numa_num_possible_cpus.c b/tools/build/feature/test-numa_num_possible_cpus.c
new file mode 100644
index 000000000..573d07b9c
--- /dev/null
+++ b/tools/build/feature/test-numa_num_possible_cpus.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <numa.h>
+
+int main(void)
+{
+ return numa_num_possible_cpus();
+}
diff --git a/tools/build/feature/test-pthread-attr-setaffinity-np.c b/tools/build/feature/test-pthread-attr-setaffinity-np.c
new file mode 100644
index 000000000..38c71131c
--- /dev/null
+++ b/tools/build/feature/test-pthread-attr-setaffinity-np.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <pthread.h>
+#include <sched.h>
+
+int main(void)
+{
+ int ret = 0;
+ pthread_attr_t thread_attr;
+ cpu_set_t cs;
+
+ pthread_attr_init(&thread_attr);
+ CPU_ZERO(&cs);
+
+ ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cs), &cs);
+
+ return ret;
+}
diff --git a/tools/build/feature/test-pthread-barrier.c b/tools/build/feature/test-pthread-barrier.c
new file mode 100644
index 000000000..0558d9334
--- /dev/null
+++ b/tools/build/feature/test-pthread-barrier.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <pthread.h>
+
+int main(void)
+{
+ pthread_barrier_t barrier;
+
+ pthread_barrier_init(&barrier, NULL, 1);
+ pthread_barrier_wait(&barrier);
+ return pthread_barrier_destroy(&barrier);
+}
diff --git a/tools/build/feature/test-reallocarray.c b/tools/build/feature/test-reallocarray.c
new file mode 100644
index 000000000..8170de351
--- /dev/null
+++ b/tools/build/feature/test-reallocarray.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdlib.h>
+
+int main(void)
+{
+ return !!reallocarray(NULL, 1, 1);
+}
diff --git a/tools/build/feature/test-sched_getcpu.c b/tools/build/feature/test-sched_getcpu.c
new file mode 100644
index 000000000..e448deb41
--- /dev/null
+++ b/tools/build/feature/test-sched_getcpu.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <sched.h>
+
+int main(void)
+{
+ return sched_getcpu();
+}
diff --git a/tools/build/feature/test-sdt.c b/tools/build/feature/test-sdt.c
new file mode 100644
index 000000000..22737b0da
--- /dev/null
+++ b/tools/build/feature/test-sdt.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/sdt.h>
+
+int main(void)
+{
+ DTRACE_PROBE(provider, name);
+ return 0;
+}
diff --git a/tools/build/feature/test-setns.c b/tools/build/feature/test-setns.c
new file mode 100644
index 000000000..1f714d2a6
--- /dev/null
+++ b/tools/build/feature/test-setns.c
@@ -0,0 +1,7 @@
+#define _GNU_SOURCE
+#include <sched.h>
+
+int main(void)
+{
+ return setns(0, 0);
+}
diff --git a/tools/build/feature/test-stackprotector-all.c b/tools/build/feature/test-stackprotector-all.c
new file mode 100644
index 000000000..c8a57194f
--- /dev/null
+++ b/tools/build/feature/test-stackprotector-all.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+
+int main(void)
+{
+ return puts("hi");
+}
diff --git a/tools/build/feature/test-sync-compare-and-swap.c b/tools/build/feature/test-sync-compare-and-swap.c
new file mode 100644
index 000000000..1e38d1930
--- /dev/null
+++ b/tools/build/feature/test-sync-compare-and-swap.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+
+volatile uint64_t x;
+
+int main(int argc, char *argv[])
+{
+ uint64_t old, new = argc;
+
+ argv = argv;
+ do {
+ old = __sync_val_compare_and_swap(&x, 0, 0);
+ } while (!__sync_bool_compare_and_swap(&x, old, new));
+ return old == new;
+}
diff --git a/tools/build/feature/test-timerfd.c b/tools/build/feature/test-timerfd.c
new file mode 100644
index 000000000..9c72c697a
--- /dev/null
+++ b/tools/build/feature/test-timerfd.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * test for timerfd functions used by perf-kvm-stat-live
+ */
+#include <sys/timerfd.h>
+
+int main(void)
+{
+ struct itimerspec new_value;
+
+ int fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
+ if (fd < 0)
+ return 1;
+
+ if (timerfd_settime(fd, 0, &new_value, NULL) != 0)
+ return 1;
+
+ return 0;
+}
diff --git a/tools/build/feature/test-zlib.c b/tools/build/feature/test-zlib.c
new file mode 100644
index 000000000..da6c35794
--- /dev/null
+++ b/tools/build/feature/test-zlib.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <zlib.h>
+
+int main(void)
+{
+ z_stream zs;
+
+ inflateInit(&zs);
+ return 0;
+}
diff --git a/tools/build/fixdep.c b/tools/build/fixdep.c
new file mode 100644
index 000000000..2501fea7a
--- /dev/null
+++ b/tools/build/fixdep.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * "Optimize" a list of dependencies as spit out by gcc -MD
+ * for the build framework.
+ *
+ * Original author:
+ * Copyright 2002 by Kai Germaschewski <kai.germaschewski@gmx.de>
+ *
+ * This code has been borrowed from kbuild's fixdep (scripts/basic/fixdep.c),
+ * Please check it for detailed explanation. This fixdep borow only the
+ * base transformation of dependecies without the CONFIG mangle.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <limits.h>
+
+char *target;
+char *depfile;
+char *cmdline;
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: fixdep <depfile> <target> <cmdline>\n");
+ exit(1);
+}
+
+/*
+ * Print out the commandline prefixed with cmd_<target filename> :=
+ */
+static void print_cmdline(void)
+{
+ printf("cmd_%s := %s\n\n", target, cmdline);
+}
+
+/*
+ * Important: The below generated source_foo.o and deps_foo.o variable
+ * assignments are parsed not only by make, but also by the rather simple
+ * parser in scripts/mod/sumversion.c.
+ */
+static void parse_dep_file(void *map, size_t len)
+{
+ char *m = map;
+ char *end = m + len;
+ char *p;
+ char s[PATH_MAX];
+ int is_target, has_target = 0;
+ int saw_any_target = 0;
+ int is_first_dep = 0;
+
+ while (m < end) {
+ /* Skip any "white space" */
+ while (m < end && (*m == ' ' || *m == '\\' || *m == '\n'))
+ m++;
+ /* Find next "white space" */
+ p = m;
+ while (p < end && *p != ' ' && *p != '\\' && *p != '\n')
+ p++;
+ /* Is the token we found a target name? */
+ is_target = (*(p-1) == ':');
+ /* Don't write any target names into the dependency file */
+ if (is_target) {
+ /* The /next/ file is the first dependency */
+ is_first_dep = 1;
+ has_target = 1;
+ } else if (has_target) {
+ /* Save this token/filename */
+ memcpy(s, m, p-m);
+ s[p - m] = 0;
+
+ /*
+ * Do not list the source file as dependency,
+ * so that kbuild is not confused if a .c file
+ * is rewritten into .S or vice versa. Storing
+ * it in source_* is needed for modpost to
+ * compute srcversions.
+ */
+ if (is_first_dep) {
+ /*
+ * If processing the concatenation of
+ * multiple dependency files, only
+ * process the first target name, which
+ * will be the original source name,
+ * and ignore any other target names,
+ * which will be intermediate temporary
+ * files.
+ */
+ if (!saw_any_target) {
+ saw_any_target = 1;
+ printf("source_%s := %s\n\n",
+ target, s);
+ printf("deps_%s := \\\n",
+ target);
+ }
+ is_first_dep = 0;
+ } else
+ printf(" %s \\\n", s);
+ }
+ /*
+ * Start searching for next token immediately after the first
+ * "whitespace" character that follows this token.
+ */
+ m = p + 1;
+ }
+
+ if (!saw_any_target) {
+ fprintf(stderr, "fixdep: parse error; no targets found\n");
+ exit(1);
+ }
+
+ printf("\n%s: $(deps_%s)\n\n", target, target);
+ printf("$(deps_%s):\n", target);
+}
+
+static void print_deps(void)
+{
+ struct stat st;
+ int fd;
+ void *map;
+
+ fd = open(depfile, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "fixdep: error opening depfile: ");
+ perror(depfile);
+ exit(2);
+ }
+ if (fstat(fd, &st) < 0) {
+ fprintf(stderr, "fixdep: error fstat'ing depfile: ");
+ perror(depfile);
+ exit(2);
+ }
+ if (st.st_size == 0) {
+ fprintf(stderr, "fixdep: %s is empty\n", depfile);
+ close(fd);
+ return;
+ }
+ map = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if ((long) map == -1) {
+ perror("fixdep: mmap");
+ close(fd);
+ return;
+ }
+
+ parse_dep_file(map, st.st_size);
+
+ munmap(map, st.st_size);
+
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ if (argc != 4)
+ usage();
+
+ depfile = argv[1];
+ target = argv[2];
+ cmdline = argv[3];
+
+ print_cmdline();
+ print_deps();
+
+ return 0;
+}
diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build
new file mode 100644
index 000000000..4d502f9b1
--- /dev/null
+++ b/tools/build/tests/ex/Build
@@ -0,0 +1,11 @@
+ex-y += ex.o
+ex-y += a.o
+ex-y += b.o
+ex-y += b.o
+ex-y += empty/
+ex-y += empty2/
+ex-y += inc.o
+
+libex-y += c.o
+libex-y += d.o
+libex-y += arch/
diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile
new file mode 100644
index 000000000..fee032e06
--- /dev/null
+++ b/tools/build/tests/ex/Makefile
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+export srctree := $(abspath ../../../..)
+export CC := gcc
+export LD := ld
+export AR := ar
+
+ex:
+
+include $(srctree)/tools/build/Makefile.include
+
+ex: ex-in.o libex-in.o
+ $(CC) -o $@ $^
+
+ex.%: fixdep FORCE
+ make -f $(srctree)/tools/build/Makefile.build dir=. $@
+
+ex-in.o: fixdep FORCE
+ make $(build)=ex
+
+libex-in.o: fixdep FORCE
+ make $(build)=libex
+
+clean:
+ find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+ rm -f ex ex.i ex.s
+
+.PHONY: FORCE
diff --git a/tools/build/tests/ex/a.c b/tools/build/tests/ex/a.c
new file mode 100644
index 000000000..66017a9f4
--- /dev/null
+++ b/tools/build/tests/ex/a.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+
+int a(void)
+{
+ return 0;
+}
diff --git a/tools/build/tests/ex/arch/Build b/tools/build/tests/ex/arch/Build
new file mode 100644
index 000000000..55506189e
--- /dev/null
+++ b/tools/build/tests/ex/arch/Build
@@ -0,0 +1,2 @@
+libex-y += e.o
+libex-y += f.o
diff --git a/tools/build/tests/ex/arch/e.c b/tools/build/tests/ex/arch/e.c
new file mode 100644
index 000000000..f6ef585b5
--- /dev/null
+++ b/tools/build/tests/ex/arch/e.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+
+int e(void)
+{
+ return 0;
+}
diff --git a/tools/build/tests/ex/arch/f.c b/tools/build/tests/ex/arch/f.c
new file mode 100644
index 000000000..bffd9c67e
--- /dev/null
+++ b/tools/build/tests/ex/arch/f.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+
+int f(void)
+{
+ return 0;
+}
diff --git a/tools/build/tests/ex/b.c b/tools/build/tests/ex/b.c
new file mode 100644
index 000000000..2b29fb4d3
--- /dev/null
+++ b/tools/build/tests/ex/b.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+
+int b(void)
+{
+ return 0;
+}
diff --git a/tools/build/tests/ex/c.c b/tools/build/tests/ex/c.c
new file mode 100644
index 000000000..a63b20ab8
--- /dev/null
+++ b/tools/build/tests/ex/c.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+
+int c(void)
+{
+ return 0;
+}
diff --git a/tools/build/tests/ex/d.c b/tools/build/tests/ex/d.c
new file mode 100644
index 000000000..e114e8dca
--- /dev/null
+++ b/tools/build/tests/ex/d.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+
+int d(void)
+{
+ return 0;
+}
diff --git a/tools/build/tests/ex/empty/Build b/tools/build/tests/ex/empty/Build
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/build/tests/ex/empty/Build
diff --git a/tools/build/tests/ex/empty2/README b/tools/build/tests/ex/empty2/README
new file mode 100644
index 000000000..2107cc5bf
--- /dev/null
+++ b/tools/build/tests/ex/empty2/README
@@ -0,0 +1,2 @@
+This directory is left intentionally without Build file
+to test proper nesting into Build-less directories.
diff --git a/tools/build/tests/ex/ex.c b/tools/build/tests/ex/ex.c
new file mode 100644
index 000000000..3c02756ef
--- /dev/null
+++ b/tools/build/tests/ex/ex.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+
+int a(void);
+int b(void);
+int c(void);
+int d(void);
+int e(void);
+int f(void);
+int inc(void);
+
+int main(void)
+{
+ a();
+ b();
+ c();
+ d();
+ e();
+ f();
+ inc();
+
+ return 0;
+}
diff --git a/tools/build/tests/ex/inc.c b/tools/build/tests/ex/inc.c
new file mode 100644
index 000000000..3636ab5bf
--- /dev/null
+++ b/tools/build/tests/ex/inc.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifdef INCLUDE
+#include "krava.h"
+#endif
+
+int inc(void)
+{
+ return 0;
+}
diff --git a/tools/build/tests/run.sh b/tools/build/tests/run.sh
new file mode 100755
index 000000000..2c54e4d43
--- /dev/null
+++ b/tools/build/tests/run.sh
@@ -0,0 +1,70 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+function test_ex {
+ make -C ex V=1 clean > ex.out 2>&1
+ make -C ex V=1 >> ex.out 2>&1
+
+ if [ ! -x ./ex/ex ]; then
+ echo FAILED
+ exit -1
+ fi
+
+ make -C ex V=1 clean > /dev/null 2>&1
+ rm -f ex.out
+}
+
+function test_ex_suffix {
+ make -C ex V=1 clean > ex.out 2>&1
+
+ # use -rR to disable make's builtin rules
+ make -rR -C ex V=1 ex.o >> ex.out 2>&1
+ make -rR -C ex V=1 ex.i >> ex.out 2>&1
+ make -rR -C ex V=1 ex.s >> ex.out 2>&1
+
+ if [ -x ./ex/ex ]; then
+ echo FAILED
+ exit -1
+ fi
+
+ if [ ! -f ./ex/ex.o -o ! -f ./ex/ex.i -o ! -f ./ex/ex.s ]; then
+ echo FAILED
+ exit -1
+ fi
+
+ make -C ex V=1 clean > /dev/null 2>&1
+ rm -f ex.out
+}
+
+function test_ex_include {
+ make -C ex V=1 clean > ex.out 2>&1
+
+ # build with krava.h include
+ touch ex/krava.h
+ make -C ex V=1 CFLAGS=-DINCLUDE >> ex.out 2>&1
+
+ if [ ! -x ./ex/ex ]; then
+ echo FAILED
+ exit -1
+ fi
+
+ # build without the include
+ rm -f ex/krava.h ex/ex
+ make -C ex V=1 >> ex.out 2>&1
+
+ if [ ! -x ./ex/ex ]; then
+ echo FAILED
+ exit -1
+ fi
+
+ make -C ex V=1 clean > /dev/null 2>&1
+ rm -f ex.out
+}
+
+echo -n Testing..
+
+test_ex
+test_ex_suffix
+test_ex_include
+
+echo OK
diff --git a/tools/cgroup/.gitignore b/tools/cgroup/.gitignore
new file mode 100644
index 000000000..633cd9b87
--- /dev/null
+++ b/tools/cgroup/.gitignore
@@ -0,0 +1 @@
+cgroup_event_listener
diff --git a/tools/cgroup/Makefile b/tools/cgroup/Makefile
new file mode 100644
index 000000000..ffca068e4
--- /dev/null
+++ b/tools/cgroup/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for cgroup tools
+
+CFLAGS = -Wall -Wextra
+
+all: cgroup_event_listener
+%: %.c
+ $(CC) $(CFLAGS) -o $@ $^
+
+clean:
+ $(RM) cgroup_event_listener
diff --git a/tools/cgroup/cgroup_event_listener.c b/tools/cgroup/cgroup_event_listener.c
new file mode 100644
index 000000000..3d70dc831
--- /dev/null
+++ b/tools/cgroup/cgroup_event_listener.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * cgroup_event_listener.c - Simple listener of cgroup events
+ *
+ * Copyright (C) Kirill A. Shutemov <kirill@shutemov.name>
+ */
+
+#include <assert.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+
+#define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>"
+
+int main(int argc, char **argv)
+{
+ int efd = -1;
+ int cfd = -1;
+ int event_control = -1;
+ char event_control_path[PATH_MAX];
+ char line[LINE_MAX];
+ int ret;
+
+ if (argc != 3)
+ errx(1, "%s", USAGE_STR);
+
+ cfd = open(argv[1], O_RDONLY);
+ if (cfd == -1)
+ err(1, "Cannot open %s", argv[1]);
+
+ ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control",
+ dirname(argv[1]));
+ if (ret >= PATH_MAX)
+ errx(1, "Path to cgroup.event_control is too long");
+
+ event_control = open(event_control_path, O_WRONLY);
+ if (event_control == -1)
+ err(1, "Cannot open %s", event_control_path);
+
+ efd = eventfd(0, 0);
+ if (efd == -1)
+ err(1, "eventfd() failed");
+
+ ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]);
+ if (ret >= LINE_MAX)
+ errx(1, "Arguments string is too long");
+
+ ret = write(event_control, line, strlen(line) + 1);
+ if (ret == -1)
+ err(1, "Cannot write to cgroup.event_control");
+
+ while (1) {
+ uint64_t result;
+
+ ret = read(efd, &result, sizeof(result));
+ if (ret == -1) {
+ if (errno == EINTR)
+ continue;
+ err(1, "Cannot read from eventfd");
+ }
+ assert(ret == sizeof(result));
+
+ ret = access(event_control_path, W_OK);
+ if ((ret == -1) && (errno == ENOENT)) {
+ puts("The cgroup seems to have removed.");
+ break;
+ }
+
+ if (ret == -1)
+ err(1, "cgroup.event_control is not accessible any more");
+
+ printf("%s %s: crossed\n", argv[1], argv[2]);
+ }
+
+ return 0;
+}
diff --git a/tools/firewire/Makefile b/tools/firewire/Makefile
new file mode 100644
index 000000000..67b6e9fca
--- /dev/null
+++ b/tools/firewire/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+prefix = /usr
+nosy-dump-version = 0.4
+
+CC = gcc
+
+all : nosy-dump
+
+nosy-dump : CFLAGS = -Wall -O2 -g
+nosy-dump : CPPFLAGS = -DVERSION=\"$(nosy-dump-version)\" -I../../drivers/firewire
+nosy-dump : LDFLAGS = -g
+nosy-dump : LDLIBS = -lpopt
+
+nosy-dump : nosy-dump.o decode-fcp.o
+
+clean :
+ rm -rf *.o nosy-dump
+
+install :
+ install nosy-dump $(prefix)/bin/nosy-dump
diff --git a/tools/firewire/decode-fcp.c b/tools/firewire/decode-fcp.c
new file mode 100644
index 000000000..b67ebc884
--- /dev/null
+++ b/tools/firewire/decode-fcp.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/firewire-constants.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "list.h"
+#include "nosy-dump.h"
+
+#define CSR_FCP_COMMAND 0xfffff0000b00ull
+#define CSR_FCP_RESPONSE 0xfffff0000d00ull
+
+static const char * const ctype_names[] = {
+ [0x0] = "control", [0x8] = "not implemented",
+ [0x1] = "status", [0x9] = "accepted",
+ [0x2] = "specific inquiry", [0xa] = "rejected",
+ [0x3] = "notify", [0xb] = "in transition",
+ [0x4] = "general inquiry", [0xc] = "stable",
+ [0x5] = "(reserved 0x05)", [0xd] = "changed",
+ [0x6] = "(reserved 0x06)", [0xe] = "(reserved 0x0e)",
+ [0x7] = "(reserved 0x07)", [0xf] = "interim",
+};
+
+static const char * const subunit_type_names[] = {
+ [0x00] = "monitor", [0x10] = "(reserved 0x10)",
+ [0x01] = "audio", [0x11] = "(reserved 0x11)",
+ [0x02] = "printer", [0x12] = "(reserved 0x12)",
+ [0x03] = "disc", [0x13] = "(reserved 0x13)",
+ [0x04] = "tape recorder/player",[0x14] = "(reserved 0x14)",
+ [0x05] = "tuner", [0x15] = "(reserved 0x15)",
+ [0x06] = "ca", [0x16] = "(reserved 0x16)",
+ [0x07] = "camera", [0x17] = "(reserved 0x17)",
+ [0x08] = "(reserved 0x08)", [0x18] = "(reserved 0x18)",
+ [0x09] = "panel", [0x19] = "(reserved 0x19)",
+ [0x0a] = "bulletin board", [0x1a] = "(reserved 0x1a)",
+ [0x0b] = "camera storage", [0x1b] = "(reserved 0x1b)",
+ [0x0c] = "(reserved 0x0c)", [0x1c] = "vendor unique",
+ [0x0d] = "(reserved 0x0d)", [0x1d] = "all subunit types",
+ [0x0e] = "(reserved 0x0e)", [0x1e] = "subunit_type extended to next byte",
+ [0x0f] = "(reserved 0x0f)", [0x1f] = "unit",
+};
+
+struct avc_enum {
+ int value;
+ const char *name;
+};
+
+struct avc_field {
+ const char *name; /* Short name for field. */
+ int offset; /* Location of field, specified in bits; */
+ /* negative means from end of packet. */
+ int width; /* Width of field, 0 means use data_length. */
+ struct avc_enum *names;
+};
+
+struct avc_opcode_info {
+ const char *name;
+ struct avc_field fields[8];
+};
+
+struct avc_enum power_field_names[] = {
+ { 0x70, "on" },
+ { 0x60, "off" },
+ { }
+};
+
+static const struct avc_opcode_info opcode_info[256] = {
+
+ /* TA Document 1999026 */
+ /* AV/C Digital Interface Command Set General Specification 4.0 */
+ [0xb2] = { "power", {
+ { "state", 0, 8, power_field_names }
+ }
+ },
+ [0x30] = { "unit info", {
+ { "foo", 0, 8 },
+ { "unit_type", 8, 5 },
+ { "unit", 13, 3 },
+ { "company id", 16, 24 },
+ }
+ },
+ [0x31] = { "subunit info" },
+ [0x01] = { "reserve" },
+ [0xb0] = { "version" },
+ [0x00] = { "vendor dependent" },
+ [0x02] = { "plug info" },
+ [0x12] = { "channel usage" },
+ [0x24] = { "connect" },
+ [0x20] = { "connect av" },
+ [0x22] = { "connections" },
+ [0x11] = { "digital input" },
+ [0x10] = { "digital output" },
+ [0x25] = { "disconnect" },
+ [0x21] = { "disconnect av" },
+ [0x19] = { "input plug signal format" },
+ [0x18] = { "output plug signal format" },
+ [0x1f] = { "general bus setup" },
+
+ /* TA Document 1999025 */
+ /* AV/C Descriptor Mechanism Specification Version 1.0 */
+ [0x0c] = { "create descriptor" },
+ [0x08] = { "open descriptor" },
+ [0x09] = { "read descriptor" },
+ [0x0a] = { "write descriptor" },
+ [0x05] = { "open info block" },
+ [0x06] = { "read info block" },
+ [0x07] = { "write info block" },
+ [0x0b] = { "search descriptor" },
+ [0x0d] = { "object number select" },
+
+ /* TA Document 1999015 */
+ /* AV/C Command Set for Rate Control of Isochronous Data Flow 1.0 */
+ [0xb3] = { "rate", {
+ { "subfunction", 0, 8 },
+ { "result", 8, 8 },
+ { "plug_type", 16, 8 },
+ { "plug_id", 16, 8 },
+ }
+ },
+
+ /* TA Document 1999008 */
+ /* AV/C Audio Subunit Specification 1.0 */
+ [0xb8] = { "function block" },
+
+ /* TA Document 2001001 */
+ /* AV/C Panel Subunit Specification 1.1 */
+ [0x7d] = { "gui update" },
+ [0x7e] = { "push gui data" },
+ [0x7f] = { "user action" },
+ [0x7c] = { "pass through" },
+
+ /* */
+ [0x26] = { "asynchronous connection" },
+};
+
+struct avc_frame {
+ uint32_t operand0:8;
+ uint32_t opcode:8;
+ uint32_t subunit_id:3;
+ uint32_t subunit_type:5;
+ uint32_t ctype:4;
+ uint32_t cts:4;
+};
+
+static void
+decode_avc(struct link_transaction *t)
+{
+ struct avc_frame *frame =
+ (struct avc_frame *) t->request->packet.write_block.data;
+ const struct avc_opcode_info *info;
+ const char *name;
+ char buffer[32];
+ int i;
+
+ info = &opcode_info[frame->opcode];
+ if (info->name == NULL) {
+ snprintf(buffer, sizeof(buffer),
+ "(unknown opcode 0x%02x)", frame->opcode);
+ name = buffer;
+ } else {
+ name = info->name;
+ }
+
+ printf("av/c %s, subunit_type=%s, subunit_id=%d, opcode=%s",
+ ctype_names[frame->ctype], subunit_type_names[frame->subunit_type],
+ frame->subunit_id, name);
+
+ for (i = 0; info->fields[i].name != NULL; i++)
+ printf(", %s", info->fields[i].name);
+
+ printf("\n");
+}
+
+int
+decode_fcp(struct link_transaction *t)
+{
+ struct avc_frame *frame =
+ (struct avc_frame *) t->request->packet.write_block.data;
+ unsigned long long offset =
+ ((unsigned long long) t->request->packet.common.offset_high << 32) |
+ t->request->packet.common.offset_low;
+
+ if (t->request->packet.common.tcode != TCODE_WRITE_BLOCK_REQUEST)
+ return 0;
+
+ if (offset == CSR_FCP_COMMAND || offset == CSR_FCP_RESPONSE) {
+ switch (frame->cts) {
+ case 0x00:
+ decode_avc(t);
+ break;
+ case 0x01:
+ printf("cal fcp frame (cts=0x01)\n");
+ break;
+ case 0x02:
+ printf("ehs fcp frame (cts=0x02)\n");
+ break;
+ case 0x03:
+ printf("havi fcp frame (cts=0x03)\n");
+ break;
+ case 0x0e:
+ printf("vendor specific fcp frame (cts=0x0e)\n");
+ break;
+ case 0x0f:
+ printf("extended cts\n");
+ break;
+ default:
+ printf("reserved fcp frame (ctx=0x%02x)\n", frame->cts);
+ break;
+ }
+ return 1;
+ }
+
+ return 0;
+}
+
diff --git a/tools/firewire/list.h b/tools/firewire/list.h
new file mode 100644
index 000000000..6278d08e9
--- /dev/null
+++ b/tools/firewire/list.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+struct list {
+ struct list *next, *prev;
+};
+
+static inline void
+list_init(struct list *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+static inline int
+list_empty(struct list *list)
+{
+ return list->next == list;
+}
+
+static inline void
+list_insert(struct list *link, struct list *new_link)
+{
+ new_link->prev = link->prev;
+ new_link->next = link;
+ new_link->prev->next = new_link;
+ new_link->next->prev = new_link;
+}
+
+static inline void
+list_append(struct list *list, struct list *new_link)
+{
+ list_insert((struct list *)list, new_link);
+}
+
+static inline void
+list_prepend(struct list *list, struct list *new_link)
+{
+ list_insert(list->next, new_link);
+}
+
+static inline void
+list_remove(struct list *link)
+{
+ link->prev->next = link->next;
+ link->next->prev = link->prev;
+}
+
+#define list_entry(link, type, member) \
+ ((type *)((char *)(link)-(unsigned long)(&((type *)0)->member)))
+
+#define list_head(list, type, member) \
+ list_entry((list)->next, type, member)
+
+#define list_tail(list, type, member) \
+ list_entry((list)->prev, type, member)
+
+#define list_next(elm, member) \
+ list_entry((elm)->member.next, typeof(*elm), member)
+
+#define list_for_each_entry(pos, list, member) \
+ for (pos = list_head(list, typeof(*pos), member); \
+ &pos->member != (list); \
+ pos = list_next(pos, member))
+
diff --git a/tools/firewire/nosy-dump.c b/tools/firewire/nosy-dump.c
new file mode 100644
index 000000000..3179c711b
--- /dev/null
+++ b/tools/firewire/nosy-dump.c
@@ -0,0 +1,1035 @@
+/*
+ * nosy-dump - Interface to snoop mode driver for TI PCILynx 1394 controllers
+ * Copyright (C) 2002-2006 Kristian Høgsberg
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <byteswap.h>
+#include <endian.h>
+#include <fcntl.h>
+#include <linux/firewire-constants.h>
+#include <poll.h>
+#include <popt.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "list.h"
+#include "nosy-dump.h"
+#include "nosy-user.h"
+
+enum {
+ PACKET_FIELD_DETAIL = 0x01,
+ PACKET_FIELD_DATA_LENGTH = 0x02,
+ /* Marks the fields we print in transaction view. */
+ PACKET_FIELD_TRANSACTION = 0x04,
+};
+
+static void print_packet(uint32_t *data, size_t length);
+static void decode_link_packet(struct link_packet *packet, size_t length,
+ int include_flags, int exclude_flags);
+static int run = 1;
+sig_t sys_sigint_handler;
+
+static char *option_nosy_device = "/dev/nosy";
+static char *option_view = "packet";
+static char *option_output;
+static char *option_input;
+static int option_hex;
+static int option_iso;
+static int option_cycle_start;
+static int option_version;
+static int option_verbose;
+
+enum {
+ VIEW_TRANSACTION,
+ VIEW_PACKET,
+ VIEW_STATS,
+};
+
+static const struct poptOption options[] = {
+ {
+ .longName = "device",
+ .shortName = 'd',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &option_nosy_device,
+ .descrip = "Path to nosy device.",
+ .argDescrip = "DEVICE"
+ },
+ {
+ .longName = "view",
+ .argInfo = POPT_ARG_STRING,
+ .arg = &option_view,
+ .descrip = "Specify view of bus traffic: packet, transaction or stats.",
+ .argDescrip = "VIEW"
+ },
+ {
+ .longName = "hex",
+ .shortName = 'x',
+ .argInfo = POPT_ARG_NONE,
+ .arg = &option_hex,
+ .descrip = "Print each packet in hex.",
+ },
+ {
+ .longName = "iso",
+ .argInfo = POPT_ARG_NONE,
+ .arg = &option_iso,
+ .descrip = "Print iso packets.",
+ },
+ {
+ .longName = "cycle-start",
+ .argInfo = POPT_ARG_NONE,
+ .arg = &option_cycle_start,
+ .descrip = "Print cycle start packets.",
+ },
+ {
+ .longName = "verbose",
+ .shortName = 'v',
+ .argInfo = POPT_ARG_NONE,
+ .arg = &option_verbose,
+ .descrip = "Verbose packet view.",
+ },
+ {
+ .longName = "output",
+ .shortName = 'o',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &option_output,
+ .descrip = "Log to output file.",
+ .argDescrip = "FILENAME"
+ },
+ {
+ .longName = "input",
+ .shortName = 'i',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &option_input,
+ .descrip = "Decode log from file.",
+ .argDescrip = "FILENAME"
+ },
+ {
+ .longName = "version",
+ .argInfo = POPT_ARG_NONE,
+ .arg = &option_version,
+ .descrip = "Specify print version info.",
+ },
+ POPT_AUTOHELP
+ POPT_TABLEEND
+};
+
+/* Allow all ^C except the first to interrupt the program in the usual way. */
+static void
+sigint_handler(int signal_num)
+{
+ if (run == 1) {
+ run = 0;
+ signal(SIGINT, SIG_DFL);
+ }
+}
+
+static struct subaction *
+subaction_create(uint32_t *data, size_t length)
+{
+ struct subaction *sa;
+
+ /* we put the ack in the subaction struct for easy access. */
+ sa = malloc(sizeof *sa - sizeof sa->packet + length);
+ if (!sa)
+ exit(EXIT_FAILURE);
+ sa->ack = data[length / 4 - 1];
+ sa->length = length;
+ memcpy(&sa->packet, data, length);
+
+ return sa;
+}
+
+static void
+subaction_destroy(struct subaction *sa)
+{
+ free(sa);
+}
+
+static struct list pending_transaction_list = {
+ &pending_transaction_list, &pending_transaction_list
+};
+
+static struct link_transaction *
+link_transaction_lookup(int request_node, int response_node, int tlabel)
+{
+ struct link_transaction *t;
+
+ list_for_each_entry(t, &pending_transaction_list, link) {
+ if (t->request_node == request_node &&
+ t->response_node == response_node &&
+ t->tlabel == tlabel)
+ return t;
+ }
+
+ t = malloc(sizeof *t);
+ if (!t)
+ exit(EXIT_FAILURE);
+ t->request_node = request_node;
+ t->response_node = response_node;
+ t->tlabel = tlabel;
+ list_init(&t->request_list);
+ list_init(&t->response_list);
+
+ list_append(&pending_transaction_list, &t->link);
+
+ return t;
+}
+
+static void
+link_transaction_destroy(struct link_transaction *t)
+{
+ struct subaction *sa;
+
+ while (!list_empty(&t->request_list)) {
+ sa = list_head(&t->request_list, struct subaction, link);
+ list_remove(&sa->link);
+ subaction_destroy(sa);
+ }
+ while (!list_empty(&t->response_list)) {
+ sa = list_head(&t->response_list, struct subaction, link);
+ list_remove(&sa->link);
+ subaction_destroy(sa);
+ }
+ free(t);
+}
+
+struct protocol_decoder {
+ const char *name;
+ int (*decode)(struct link_transaction *t);
+};
+
+static const struct protocol_decoder protocol_decoders[] = {
+ { "FCP", decode_fcp }
+};
+
+static void
+handle_transaction(struct link_transaction *t)
+{
+ struct subaction *sa;
+ int i;
+
+ if (!t->request) {
+ printf("BUG in handle_transaction\n");
+ return;
+ }
+
+ for (i = 0; i < array_length(protocol_decoders); i++)
+ if (protocol_decoders[i].decode(t))
+ break;
+
+ /* HACK: decode only fcp right now. */
+ return;
+
+ decode_link_packet(&t->request->packet, t->request->length,
+ PACKET_FIELD_TRANSACTION, 0);
+ if (t->response)
+ decode_link_packet(&t->response->packet, t->request->length,
+ PACKET_FIELD_TRANSACTION, 0);
+ else
+ printf("[no response]");
+
+ if (option_verbose) {
+ list_for_each_entry(sa, &t->request_list, link)
+ print_packet((uint32_t *) &sa->packet, sa->length);
+ list_for_each_entry(sa, &t->response_list, link)
+ print_packet((uint32_t *) &sa->packet, sa->length);
+ }
+ printf("\r\n");
+
+ link_transaction_destroy(t);
+}
+
+static void
+clear_pending_transaction_list(void)
+{
+ struct link_transaction *t;
+
+ while (!list_empty(&pending_transaction_list)) {
+ t = list_head(&pending_transaction_list,
+ struct link_transaction, link);
+ list_remove(&t->link);
+ link_transaction_destroy(t);
+ /* print unfinished transactions */
+ }
+}
+
+static const char * const tcode_names[] = {
+ [0x0] = "write_quadlet_request", [0x6] = "read_quadlet_response",
+ [0x1] = "write_block_request", [0x7] = "read_block_response",
+ [0x2] = "write_response", [0x8] = "cycle_start",
+ [0x3] = "reserved", [0x9] = "lock_request",
+ [0x4] = "read_quadlet_request", [0xa] = "iso_data",
+ [0x5] = "read_block_request", [0xb] = "lock_response",
+};
+
+static const char * const ack_names[] = {
+ [0x0] = "no ack", [0x8] = "reserved (0x08)",
+ [0x1] = "ack_complete", [0x9] = "reserved (0x09)",
+ [0x2] = "ack_pending", [0xa] = "reserved (0x0a)",
+ [0x3] = "reserved (0x03)", [0xb] = "reserved (0x0b)",
+ [0x4] = "ack_busy_x", [0xc] = "reserved (0x0c)",
+ [0x5] = "ack_busy_a", [0xd] = "ack_data_error",
+ [0x6] = "ack_busy_b", [0xe] = "ack_type_error",
+ [0x7] = "reserved (0x07)", [0xf] = "reserved (0x0f)",
+};
+
+static const char * const rcode_names[] = {
+ [0x0] = "complete", [0x4] = "conflict_error",
+ [0x1] = "reserved (0x01)", [0x5] = "data_error",
+ [0x2] = "reserved (0x02)", [0x6] = "type_error",
+ [0x3] = "reserved (0x03)", [0x7] = "address_error",
+};
+
+static const char * const retry_names[] = {
+ [0x0] = "retry_1",
+ [0x1] = "retry_x",
+ [0x2] = "retry_a",
+ [0x3] = "retry_b",
+};
+
+enum {
+ PACKET_RESERVED,
+ PACKET_REQUEST,
+ PACKET_RESPONSE,
+ PACKET_OTHER,
+};
+
+struct packet_info {
+ const char *name;
+ int type;
+ int response_tcode;
+ const struct packet_field *fields;
+ int field_count;
+};
+
+struct packet_field {
+ const char *name; /* Short name for field. */
+ int offset; /* Location of field, specified in bits; */
+ /* negative means from end of packet. */
+ int width; /* Width of field, 0 means use data_length. */
+ int flags; /* Show options. */
+ const char * const *value_names;
+};
+
+#define COMMON_REQUEST_FIELDS \
+ { "dest", 0, 16, PACKET_FIELD_TRANSACTION }, \
+ { "tl", 16, 6 }, \
+ { "rt", 22, 2, PACKET_FIELD_DETAIL, retry_names }, \
+ { "tcode", 24, 4, PACKET_FIELD_TRANSACTION, tcode_names }, \
+ { "pri", 28, 4, PACKET_FIELD_DETAIL }, \
+ { "src", 32, 16, PACKET_FIELD_TRANSACTION }, \
+ { "offs", 48, 48, PACKET_FIELD_TRANSACTION }
+
+#define COMMON_RESPONSE_FIELDS \
+ { "dest", 0, 16 }, \
+ { "tl", 16, 6 }, \
+ { "rt", 22, 2, PACKET_FIELD_DETAIL, retry_names }, \
+ { "tcode", 24, 4, 0, tcode_names }, \
+ { "pri", 28, 4, PACKET_FIELD_DETAIL }, \
+ { "src", 32, 16 }, \
+ { "rcode", 48, 4, PACKET_FIELD_TRANSACTION, rcode_names }
+
+static const struct packet_field read_quadlet_request_fields[] = {
+ COMMON_REQUEST_FIELDS,
+ { "crc", 96, 32, PACKET_FIELD_DETAIL },
+ { "ack", 156, 4, 0, ack_names },
+};
+
+static const struct packet_field read_quadlet_response_fields[] = {
+ COMMON_RESPONSE_FIELDS,
+ { "data", 96, 32, PACKET_FIELD_TRANSACTION },
+ { "crc", 128, 32, PACKET_FIELD_DETAIL },
+ { "ack", 188, 4, 0, ack_names },
+};
+
+static const struct packet_field read_block_request_fields[] = {
+ COMMON_REQUEST_FIELDS,
+ { "data_length", 96, 16, PACKET_FIELD_TRANSACTION },
+ { "extended_tcode", 112, 16 },
+ { "crc", 128, 32, PACKET_FIELD_DETAIL },
+ { "ack", 188, 4, 0, ack_names },
+};
+
+static const struct packet_field block_response_fields[] = {
+ COMMON_RESPONSE_FIELDS,
+ { "data_length", 96, 16, PACKET_FIELD_DATA_LENGTH },
+ { "extended_tcode", 112, 16 },
+ { "crc", 128, 32, PACKET_FIELD_DETAIL },
+ { "data", 160, 0, PACKET_FIELD_TRANSACTION },
+ { "crc", -64, 32, PACKET_FIELD_DETAIL },
+ { "ack", -4, 4, 0, ack_names },
+};
+
+static const struct packet_field write_quadlet_request_fields[] = {
+ COMMON_REQUEST_FIELDS,
+ { "data", 96, 32, PACKET_FIELD_TRANSACTION },
+ { "ack", -4, 4, 0, ack_names },
+};
+
+static const struct packet_field block_request_fields[] = {
+ COMMON_REQUEST_FIELDS,
+ { "data_length", 96, 16, PACKET_FIELD_DATA_LENGTH | PACKET_FIELD_TRANSACTION },
+ { "extended_tcode", 112, 16, PACKET_FIELD_TRANSACTION },
+ { "crc", 128, 32, PACKET_FIELD_DETAIL },
+ { "data", 160, 0, PACKET_FIELD_TRANSACTION },
+ { "crc", -64, 32, PACKET_FIELD_DETAIL },
+ { "ack", -4, 4, 0, ack_names },
+};
+
+static const struct packet_field write_response_fields[] = {
+ COMMON_RESPONSE_FIELDS,
+ { "reserved", 64, 32, PACKET_FIELD_DETAIL },
+ { "ack", -4, 4, 0, ack_names },
+};
+
+static const struct packet_field iso_data_fields[] = {
+ { "data_length", 0, 16, PACKET_FIELD_DATA_LENGTH },
+ { "tag", 16, 2 },
+ { "channel", 18, 6 },
+ { "tcode", 24, 4, 0, tcode_names },
+ { "sy", 28, 4 },
+ { "crc", 32, 32, PACKET_FIELD_DETAIL },
+ { "data", 64, 0 },
+ { "crc", -64, 32, PACKET_FIELD_DETAIL },
+ { "ack", -4, 4, 0, ack_names },
+};
+
+static const struct packet_info packet_info[] = {
+ {
+ .name = "write_quadlet_request",
+ .type = PACKET_REQUEST,
+ .response_tcode = TCODE_WRITE_RESPONSE,
+ .fields = write_quadlet_request_fields,
+ .field_count = array_length(write_quadlet_request_fields)
+ },
+ {
+ .name = "write_block_request",
+ .type = PACKET_REQUEST,
+ .response_tcode = TCODE_WRITE_RESPONSE,
+ .fields = block_request_fields,
+ .field_count = array_length(block_request_fields)
+ },
+ {
+ .name = "write_response",
+ .type = PACKET_RESPONSE,
+ .fields = write_response_fields,
+ .field_count = array_length(write_response_fields)
+ },
+ {
+ .name = "reserved",
+ .type = PACKET_RESERVED,
+ },
+ {
+ .name = "read_quadlet_request",
+ .type = PACKET_REQUEST,
+ .response_tcode = TCODE_READ_QUADLET_RESPONSE,
+ .fields = read_quadlet_request_fields,
+ .field_count = array_length(read_quadlet_request_fields)
+ },
+ {
+ .name = "read_block_request",
+ .type = PACKET_REQUEST,
+ .response_tcode = TCODE_READ_BLOCK_RESPONSE,
+ .fields = read_block_request_fields,
+ .field_count = array_length(read_block_request_fields)
+ },
+ {
+ .name = "read_quadlet_response",
+ .type = PACKET_RESPONSE,
+ .fields = read_quadlet_response_fields,
+ .field_count = array_length(read_quadlet_response_fields)
+ },
+ {
+ .name = "read_block_response",
+ .type = PACKET_RESPONSE,
+ .fields = block_response_fields,
+ .field_count = array_length(block_response_fields)
+ },
+ {
+ .name = "cycle_start",
+ .type = PACKET_OTHER,
+ .fields = write_quadlet_request_fields,
+ .field_count = array_length(write_quadlet_request_fields)
+ },
+ {
+ .name = "lock_request",
+ .type = PACKET_REQUEST,
+ .fields = block_request_fields,
+ .field_count = array_length(block_request_fields)
+ },
+ {
+ .name = "iso_data",
+ .type = PACKET_OTHER,
+ .fields = iso_data_fields,
+ .field_count = array_length(iso_data_fields)
+ },
+ {
+ .name = "lock_response",
+ .type = PACKET_RESPONSE,
+ .fields = block_response_fields,
+ .field_count = array_length(block_response_fields)
+ },
+};
+
+static int
+handle_request_packet(uint32_t *data, size_t length)
+{
+ struct link_packet *p = (struct link_packet *) data;
+ struct subaction *sa, *prev;
+ struct link_transaction *t;
+
+ t = link_transaction_lookup(p->common.source, p->common.destination,
+ p->common.tlabel);
+ sa = subaction_create(data, length);
+ t->request = sa;
+
+ if (!list_empty(&t->request_list)) {
+ prev = list_tail(&t->request_list,
+ struct subaction, link);
+
+ if (!ACK_BUSY(prev->ack)) {
+ /*
+ * error, we should only see ack_busy_* before the
+ * ack_pending/ack_complete -- this is an ack_pending
+ * instead (ack_complete would have finished the
+ * transaction).
+ */
+ }
+
+ if (prev->packet.common.tcode != sa->packet.common.tcode ||
+ prev->packet.common.tlabel != sa->packet.common.tlabel) {
+ /* memcmp() ? */
+ /* error, these should match for retries. */
+ }
+ }
+
+ list_append(&t->request_list, &sa->link);
+
+ switch (sa->ack) {
+ case ACK_COMPLETE:
+ if (p->common.tcode != TCODE_WRITE_QUADLET_REQUEST &&
+ p->common.tcode != TCODE_WRITE_BLOCK_REQUEST)
+ /* error, unified transactions only allowed for write */;
+ list_remove(&t->link);
+ handle_transaction(t);
+ break;
+
+ case ACK_NO_ACK:
+ case ACK_DATA_ERROR:
+ case ACK_TYPE_ERROR:
+ list_remove(&t->link);
+ handle_transaction(t);
+ break;
+
+ case ACK_PENDING:
+ /* request subaction phase over, wait for response. */
+ break;
+
+ case ACK_BUSY_X:
+ case ACK_BUSY_A:
+ case ACK_BUSY_B:
+ /* ok, wait for retry. */
+ /* check that retry protocol is respected. */
+ break;
+ }
+
+ return 1;
+}
+
+static int
+handle_response_packet(uint32_t *data, size_t length)
+{
+ struct link_packet *p = (struct link_packet *) data;
+ struct subaction *sa, *prev;
+ struct link_transaction *t;
+
+ t = link_transaction_lookup(p->common.destination, p->common.source,
+ p->common.tlabel);
+ if (list_empty(&t->request_list)) {
+ /* unsolicited response */
+ }
+
+ sa = subaction_create(data, length);
+ t->response = sa;
+
+ if (!list_empty(&t->response_list)) {
+ prev = list_tail(&t->response_list, struct subaction, link);
+
+ if (!ACK_BUSY(prev->ack)) {
+ /*
+ * error, we should only see ack_busy_* before the
+ * ack_pending/ack_complete
+ */
+ }
+
+ if (prev->packet.common.tcode != sa->packet.common.tcode ||
+ prev->packet.common.tlabel != sa->packet.common.tlabel) {
+ /* use memcmp() instead? */
+ /* error, these should match for retries. */
+ }
+ } else {
+ prev = list_tail(&t->request_list, struct subaction, link);
+ if (prev->ack != ACK_PENDING) {
+ /*
+ * error, should not get response unless last request got
+ * ack_pending.
+ */
+ }
+
+ if (packet_info[prev->packet.common.tcode].response_tcode !=
+ sa->packet.common.tcode) {
+ /* error, tcode mismatch */
+ }
+ }
+
+ list_append(&t->response_list, &sa->link);
+
+ switch (sa->ack) {
+ case ACK_COMPLETE:
+ case ACK_NO_ACK:
+ case ACK_DATA_ERROR:
+ case ACK_TYPE_ERROR:
+ list_remove(&t->link);
+ handle_transaction(t);
+ /* transaction complete, remove t from pending list. */
+ break;
+
+ case ACK_PENDING:
+ /* error for responses. */
+ break;
+
+ case ACK_BUSY_X:
+ case ACK_BUSY_A:
+ case ACK_BUSY_B:
+ /* no problem, wait for next retry */
+ break;
+ }
+
+ return 1;
+}
+
+static int
+handle_packet(uint32_t *data, size_t length)
+{
+ if (length == 0) {
+ printf("bus reset\r\n");
+ clear_pending_transaction_list();
+ } else if (length > sizeof(struct phy_packet)) {
+ struct link_packet *p = (struct link_packet *) data;
+
+ switch (packet_info[p->common.tcode].type) {
+ case PACKET_REQUEST:
+ return handle_request_packet(data, length);
+
+ case PACKET_RESPONSE:
+ return handle_response_packet(data, length);
+
+ case PACKET_OTHER:
+ case PACKET_RESERVED:
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static unsigned int
+get_bits(struct link_packet *packet, int offset, int width)
+{
+ uint32_t *data = (uint32_t *) packet;
+ uint32_t index, shift, mask;
+
+ index = offset / 32 + 1;
+ shift = 32 - (offset & 31) - width;
+ mask = width == 32 ? ~0 : (1 << width) - 1;
+
+ return (data[index] >> shift) & mask;
+}
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define byte_index(i) ((i) ^ 3)
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define byte_index(i) (i)
+#else
+#error unsupported byte order.
+#endif
+
+static void
+dump_data(unsigned char *data, int length)
+{
+ int i, print_length;
+
+ if (length > 128)
+ print_length = 128;
+ else
+ print_length = length;
+
+ for (i = 0; i < print_length; i++)
+ printf("%s%02hhx",
+ (i % 4 == 0 && i != 0) ? " " : "",
+ data[byte_index(i)]);
+
+ if (print_length < length)
+ printf(" (%d more bytes)", length - print_length);
+}
+
+static void
+decode_link_packet(struct link_packet *packet, size_t length,
+ int include_flags, int exclude_flags)
+{
+ const struct packet_info *pi;
+ int data_length = 0;
+ int i;
+
+ pi = &packet_info[packet->common.tcode];
+
+ for (i = 0; i < pi->field_count; i++) {
+ const struct packet_field *f = &pi->fields[i];
+ int offset;
+
+ if (f->flags & exclude_flags)
+ continue;
+ if (include_flags && !(f->flags & include_flags))
+ continue;
+
+ if (f->offset < 0)
+ offset = length * 8 + f->offset - 32;
+ else
+ offset = f->offset;
+
+ if (f->value_names != NULL) {
+ uint32_t bits;
+
+ bits = get_bits(packet, offset, f->width);
+ printf("%s", f->value_names[bits]);
+ } else if (f->width == 0) {
+ printf("%s=[", f->name);
+ dump_data((unsigned char *) packet + (offset / 8 + 4), data_length);
+ printf("]");
+ } else {
+ unsigned long long bits;
+ int high_width, low_width;
+
+ if ((offset & ~31) != ((offset + f->width - 1) & ~31)) {
+ /* Bit field spans quadlet boundary. */
+ high_width = ((offset + 31) & ~31) - offset;
+ low_width = f->width - high_width;
+
+ bits = get_bits(packet, offset, high_width);
+ bits = (bits << low_width) |
+ get_bits(packet, offset + high_width, low_width);
+ } else {
+ bits = get_bits(packet, offset, f->width);
+ }
+
+ printf("%s=0x%0*llx", f->name, (f->width + 3) / 4, bits);
+
+ if (f->flags & PACKET_FIELD_DATA_LENGTH)
+ data_length = bits;
+ }
+
+ if (i < pi->field_count - 1)
+ printf(", ");
+ }
+}
+
+static void
+print_packet(uint32_t *data, size_t length)
+{
+ int i;
+
+ printf("%6u ", data[0]);
+
+ if (length == 4) {
+ printf("bus reset");
+ } else if (length < sizeof(struct phy_packet)) {
+ printf("short packet: ");
+ for (i = 1; i < length / 4; i++)
+ printf("%s%08x", i == 0 ? "[" : " ", data[i]);
+ printf("]");
+
+ } else if (length == sizeof(struct phy_packet) && data[1] == ~data[2]) {
+ struct phy_packet *pp = (struct phy_packet *) data;
+
+ /* phy packet are 3 quadlets: the 1 quadlet payload,
+ * the bitwise inverse of the payload and the snoop
+ * mode ack */
+
+ switch (pp->common.identifier) {
+ case PHY_PACKET_CONFIGURATION:
+ if (!pp->phy_config.set_root && !pp->phy_config.set_gap_count) {
+ printf("ext phy config: phy_id=%02x", pp->phy_config.root_id);
+ } else {
+ printf("phy config:");
+ if (pp->phy_config.set_root)
+ printf(" set_root_id=%02x", pp->phy_config.root_id);
+ if (pp->phy_config.set_gap_count)
+ printf(" set_gap_count=%d", pp->phy_config.gap_count);
+ }
+ break;
+
+ case PHY_PACKET_LINK_ON:
+ printf("link-on packet, phy_id=%02x", pp->link_on.phy_id);
+ break;
+
+ case PHY_PACKET_SELF_ID:
+ if (pp->self_id.extended) {
+ printf("extended self id: phy_id=%02x, seq=%d",
+ pp->ext_self_id.phy_id, pp->ext_self_id.sequence);
+ } else {
+ static const char * const speed_names[] = {
+ "S100", "S200", "S400", "BETA"
+ };
+ printf("self id: phy_id=%02x, link %s, gap_count=%d, speed=%s%s%s",
+ pp->self_id.phy_id,
+ (pp->self_id.link_active ? "active" : "not active"),
+ pp->self_id.gap_count,
+ speed_names[pp->self_id.phy_speed],
+ (pp->self_id.contender ? ", irm contender" : ""),
+ (pp->self_id.initiated_reset ? ", initiator" : ""));
+ }
+ break;
+ default:
+ printf("unknown phy packet: ");
+ for (i = 1; i < length / 4; i++)
+ printf("%s%08x", i == 0 ? "[" : " ", data[i]);
+ printf("]");
+ break;
+ }
+ } else {
+ struct link_packet *packet = (struct link_packet *) data;
+
+ decode_link_packet(packet, length, 0,
+ option_verbose ? 0 : PACKET_FIELD_DETAIL);
+ }
+
+ if (option_hex) {
+ printf(" [");
+ dump_data((unsigned char *) data + 4, length - 4);
+ printf("]");
+ }
+
+ printf("\r\n");
+}
+
+#define HIDE_CURSOR "\033[?25l"
+#define SHOW_CURSOR "\033[?25h"
+#define CLEAR "\033[H\033[2J"
+
+static void
+print_stats(uint32_t *data, size_t length)
+{
+ static int bus_reset_count, short_packet_count, phy_packet_count;
+ static int tcode_count[16];
+ static struct timeval last_update;
+ struct timeval now;
+ int i;
+
+ if (length == 0)
+ bus_reset_count++;
+ else if (length < sizeof(struct phy_packet))
+ short_packet_count++;
+ else if (length == sizeof(struct phy_packet) && data[1] == ~data[2])
+ phy_packet_count++;
+ else {
+ struct link_packet *packet = (struct link_packet *) data;
+ tcode_count[packet->common.tcode]++;
+ }
+
+ gettimeofday(&now, NULL);
+ if (now.tv_sec <= last_update.tv_sec &&
+ now.tv_usec < last_update.tv_usec + 500000)
+ return;
+
+ last_update = now;
+ printf(CLEAR HIDE_CURSOR
+ " bus resets : %8d\n"
+ " short packets : %8d\n"
+ " phy packets : %8d\n",
+ bus_reset_count, short_packet_count, phy_packet_count);
+
+ for (i = 0; i < array_length(packet_info); i++)
+ if (packet_info[i].type != PACKET_RESERVED)
+ printf(" %-24s: %8d\n", packet_info[i].name, tcode_count[i]);
+ printf(SHOW_CURSOR "\n");
+}
+
+static struct termios saved_attributes;
+
+static void
+reset_input_mode(void)
+{
+ tcsetattr(STDIN_FILENO, TCSANOW, &saved_attributes);
+}
+
+static void
+set_input_mode(void)
+{
+ struct termios tattr;
+
+ /* Make sure stdin is a terminal. */
+ if (!isatty(STDIN_FILENO)) {
+ fprintf(stderr, "Not a terminal.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Save the terminal attributes so we can restore them later. */
+ tcgetattr(STDIN_FILENO, &saved_attributes);
+ atexit(reset_input_mode);
+
+ /* Set the funny terminal modes. */
+ tcgetattr(STDIN_FILENO, &tattr);
+ tattr.c_lflag &= ~(ICANON|ECHO); /* Clear ICANON and ECHO. */
+ tattr.c_cc[VMIN] = 1;
+ tattr.c_cc[VTIME] = 0;
+ tcsetattr(STDIN_FILENO, TCSAFLUSH, &tattr);
+}
+
+int main(int argc, const char *argv[])
+{
+ uint32_t buf[128 * 1024];
+ uint32_t filter;
+ int length, retval, view;
+ int fd = -1;
+ FILE *output = NULL, *input = NULL;
+ poptContext con;
+ char c;
+ struct pollfd pollfds[2];
+
+ sys_sigint_handler = signal(SIGINT, sigint_handler);
+
+ con = poptGetContext(NULL, argc, argv, options, 0);
+ retval = poptGetNextOpt(con);
+ if (retval < -1) {
+ poptPrintUsage(con, stdout, 0);
+ return -1;
+ }
+
+ if (option_version) {
+ printf("dump tool for nosy sniffer, version %s\n", VERSION);
+ return 0;
+ }
+
+ if (__BYTE_ORDER != __LITTLE_ENDIAN)
+ fprintf(stderr, "warning: nosy has only been tested on little "
+ "endian machines\n");
+
+ if (option_input != NULL) {
+ input = fopen(option_input, "r");
+ if (input == NULL) {
+ fprintf(stderr, "Could not open %s, %m\n", option_input);
+ return -1;
+ }
+ } else {
+ fd = open(option_nosy_device, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "Could not open %s, %m\n", option_nosy_device);
+ return -1;
+ }
+ set_input_mode();
+ }
+
+ if (strcmp(option_view, "transaction") == 0)
+ view = VIEW_TRANSACTION;
+ else if (strcmp(option_view, "stats") == 0)
+ view = VIEW_STATS;
+ else
+ view = VIEW_PACKET;
+
+ if (option_output) {
+ output = fopen(option_output, "w");
+ if (output == NULL) {
+ fprintf(stderr, "Could not open %s, %m\n", option_output);
+ return -1;
+ }
+ }
+
+ setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
+
+ filter = ~0;
+ if (!option_iso)
+ filter &= ~(1 << TCODE_STREAM_DATA);
+ if (!option_cycle_start)
+ filter &= ~(1 << TCODE_CYCLE_START);
+ if (view == VIEW_STATS)
+ filter = ~(1 << TCODE_CYCLE_START);
+
+ ioctl(fd, NOSY_IOC_FILTER, filter);
+
+ ioctl(fd, NOSY_IOC_START);
+
+ pollfds[0].fd = fd;
+ pollfds[0].events = POLLIN;
+ pollfds[1].fd = STDIN_FILENO;
+ pollfds[1].events = POLLIN;
+
+ while (run) {
+ if (input != NULL) {
+ if (fread(&length, sizeof length, 1, input) != 1)
+ return 0;
+ fread(buf, 1, length, input);
+ } else {
+ poll(pollfds, 2, -1);
+ if (pollfds[1].revents) {
+ read(STDIN_FILENO, &c, sizeof c);
+ switch (c) {
+ case 'q':
+ if (output != NULL)
+ fclose(output);
+ return 0;
+ }
+ }
+
+ if (pollfds[0].revents)
+ length = read(fd, buf, sizeof buf);
+ else
+ continue;
+ }
+
+ if (output != NULL) {
+ fwrite(&length, sizeof length, 1, output);
+ fwrite(buf, 1, length, output);
+ }
+
+ switch (view) {
+ case VIEW_TRANSACTION:
+ handle_packet(buf, length);
+ break;
+ case VIEW_PACKET:
+ print_packet(buf, length);
+ break;
+ case VIEW_STATS:
+ print_stats(buf, length);
+ break;
+ }
+ }
+
+ if (output != NULL)
+ fclose(output);
+
+ close(fd);
+
+ poptFreeContext(con);
+
+ return 0;
+}
diff --git a/tools/firewire/nosy-dump.h b/tools/firewire/nosy-dump.h
new file mode 100644
index 000000000..69e5e594f
--- /dev/null
+++ b/tools/firewire/nosy-dump.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __nosy_dump_h__
+#define __nosy_dump_h__
+
+#define array_length(array) (sizeof(array) / sizeof(array[0]))
+
+#define ACK_NO_ACK 0x0
+#define ACK_DONE(a) ((a >> 2) == 0)
+#define ACK_BUSY(a) ((a >> 2) == 1)
+#define ACK_ERROR(a) ((a >> 2) == 3)
+
+#include <stdint.h>
+
+struct phy_packet {
+ uint32_t timestamp;
+ union {
+ struct {
+ uint32_t zero:24;
+ uint32_t phy_id:6;
+ uint32_t identifier:2;
+ } common, link_on;
+
+ struct {
+ uint32_t zero:16;
+ uint32_t gap_count:6;
+ uint32_t set_gap_count:1;
+ uint32_t set_root:1;
+ uint32_t root_id:6;
+ uint32_t identifier:2;
+ } phy_config;
+
+ struct {
+ uint32_t more_packets:1;
+ uint32_t initiated_reset:1;
+ uint32_t port2:2;
+ uint32_t port1:2;
+ uint32_t port0:2;
+ uint32_t power_class:3;
+ uint32_t contender:1;
+ uint32_t phy_delay:2;
+ uint32_t phy_speed:2;
+ uint32_t gap_count:6;
+ uint32_t link_active:1;
+ uint32_t extended:1;
+ uint32_t phy_id:6;
+ uint32_t identifier:2;
+ } self_id;
+
+ struct {
+ uint32_t more_packets:1;
+ uint32_t reserved1:1;
+ uint32_t porth:2;
+ uint32_t portg:2;
+ uint32_t portf:2;
+ uint32_t porte:2;
+ uint32_t portd:2;
+ uint32_t portc:2;
+ uint32_t portb:2;
+ uint32_t porta:2;
+ uint32_t reserved0:2;
+ uint32_t sequence:3;
+ uint32_t extended:1;
+ uint32_t phy_id:6;
+ uint32_t identifier:2;
+ } ext_self_id;
+ };
+ uint32_t inverted;
+ uint32_t ack;
+};
+
+#define TCODE_PHY_PACKET 0x10
+
+#define PHY_PACKET_CONFIGURATION 0x00
+#define PHY_PACKET_LINK_ON 0x01
+#define PHY_PACKET_SELF_ID 0x02
+
+struct link_packet {
+ uint32_t timestamp;
+ union {
+ struct {
+ uint32_t priority:4;
+ uint32_t tcode:4;
+ uint32_t rt:2;
+ uint32_t tlabel:6;
+ uint32_t destination:16;
+
+ uint32_t offset_high:16;
+ uint32_t source:16;
+
+ uint32_t offset_low;
+ } common;
+
+ struct {
+ uint32_t common[3];
+ uint32_t crc;
+ } read_quadlet;
+
+ struct {
+ uint32_t common[3];
+ uint32_t data;
+ uint32_t crc;
+ } read_quadlet_response;
+
+ struct {
+ uint32_t common[3];
+ uint32_t extended_tcode:16;
+ uint32_t data_length:16;
+ uint32_t crc;
+ } read_block;
+
+ struct {
+ uint32_t common[3];
+ uint32_t extended_tcode:16;
+ uint32_t data_length:16;
+ uint32_t crc;
+ uint32_t data[0];
+ /* crc and ack follows. */
+ } read_block_response;
+
+ struct {
+ uint32_t common[3];
+ uint32_t data;
+ uint32_t crc;
+ } write_quadlet;
+
+ struct {
+ uint32_t common[3];
+ uint32_t extended_tcode:16;
+ uint32_t data_length:16;
+ uint32_t crc;
+ uint32_t data[0];
+ /* crc and ack follows. */
+ } write_block;
+
+ struct {
+ uint32_t common[3];
+ uint32_t crc;
+ } write_response;
+
+ struct {
+ uint32_t common[3];
+ uint32_t data;
+ uint32_t crc;
+ } cycle_start;
+
+ struct {
+ uint32_t sy:4;
+ uint32_t tcode:4;
+ uint32_t channel:6;
+ uint32_t tag:2;
+ uint32_t data_length:16;
+
+ uint32_t crc;
+ } iso_data;
+ };
+};
+
+struct subaction {
+ uint32_t ack;
+ size_t length;
+ struct list link;
+ struct link_packet packet;
+};
+
+struct link_transaction {
+ int request_node, response_node, tlabel;
+ struct subaction *request, *response;
+ struct list request_list, response_list;
+ struct list link;
+};
+
+int decode_fcp(struct link_transaction *t);
+
+#endif /* __nosy_dump_h__ */
diff --git a/tools/gpio/.gitignore b/tools/gpio/.gitignore
new file mode 100644
index 000000000..9e9dd4b68
--- /dev/null
+++ b/tools/gpio/.gitignore
@@ -0,0 +1,4 @@
+gpio-event-mon
+gpio-hammer
+lsgpio
+
diff --git a/tools/gpio/Build b/tools/gpio/Build
new file mode 100644
index 000000000..4141f3583
--- /dev/null
+++ b/tools/gpio/Build
@@ -0,0 +1,4 @@
+gpio-utils-y += gpio-utils.o
+lsgpio-y += lsgpio.o gpio-utils.o
+gpio-hammer-y += gpio-hammer.o gpio-utils.o
+gpio-event-mon-y += gpio-event-mon.o gpio-utils.o
diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile
new file mode 100644
index 000000000..3dbf7e8b0
--- /dev/null
+++ b/tools/gpio/Makefile
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../scripts/Makefile.include
+
+bindir ?= /usr/bin
+
+# This will work when gpio is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is set to ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifndef building_out_of_srctree
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
+
+all: $(ALL_PROGRAMS)
+
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+#
+# We need the following to be outside of kernel tree
+#
+$(OUTPUT)include/linux/gpio.h: ../../include/uapi/linux/gpio.h
+ mkdir -p $(OUTPUT)include/linux 2>&1 || true
+ ln -sf $(CURDIR)/../../include/uapi/linux/gpio.h $@
+
+prepare: $(OUTPUT)include/linux/gpio.h
+
+GPIO_UTILS_IN := $(OUTPUT)gpio-utils-in.o
+$(GPIO_UTILS_IN): prepare FORCE
+ $(Q)$(MAKE) $(build)=gpio-utils
+
+#
+# lsgpio
+#
+LSGPIO_IN := $(OUTPUT)lsgpio-in.o
+$(LSGPIO_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o
+ $(Q)$(MAKE) $(build)=lsgpio
+$(OUTPUT)lsgpio: $(LSGPIO_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+#
+# gpio-hammer
+#
+GPIO_HAMMER_IN := $(OUTPUT)gpio-hammer-in.o
+$(GPIO_HAMMER_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o
+ $(Q)$(MAKE) $(build)=gpio-hammer
+$(OUTPUT)gpio-hammer: $(GPIO_HAMMER_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+#
+# gpio-event-mon
+#
+GPIO_EVENT_MON_IN := $(OUTPUT)gpio-event-mon-in.o
+$(GPIO_EVENT_MON_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o
+ $(Q)$(MAKE) $(build)=gpio-event-mon
+$(OUTPUT)gpio-event-mon: $(GPIO_EVENT_MON_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+clean:
+ rm -f $(ALL_PROGRAMS)
+ rm -f $(OUTPUT)include/linux/gpio.h
+ find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
+
+install: $(ALL_PROGRAMS)
+ install -d -m 755 $(DESTDIR)$(bindir); \
+ for program in $(ALL_PROGRAMS); do \
+ install $$program $(DESTDIR)$(bindir); \
+ done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare
diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
new file mode 100644
index 000000000..c864544ef
--- /dev/null
+++ b/tools/gpio/gpio-event-mon.c
@@ -0,0 +1,194 @@
+/*
+ * gpio-event-mon - monitor GPIO line events from userspace
+ *
+ * Copyright (C) 2016 Linus Walleij
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * Usage:
+ * gpio-event-mon -n <device-name> -o <offset>
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+#include <poll.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <linux/gpio.h>
+
+int monitor_device(const char *device_name,
+ unsigned int line,
+ uint32_t handleflags,
+ uint32_t eventflags,
+ unsigned int loops)
+{
+ struct gpioevent_request req;
+ struct gpiohandle_data data;
+ char *chrdev_name;
+ int fd;
+ int ret;
+ int i = 0;
+
+ ret = asprintf(&chrdev_name, "/dev/%s", device_name);
+ if (ret < 0)
+ return -ENOMEM;
+
+ fd = open(chrdev_name, 0);
+ if (fd == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to open %s\n", chrdev_name);
+ goto exit_close_error;
+ }
+
+ req.lineoffset = line;
+ req.handleflags = handleflags;
+ req.eventflags = eventflags;
+ strcpy(req.consumer_label, "gpio-event-mon");
+
+ ret = ioctl(fd, GPIO_GET_LINEEVENT_IOCTL, &req);
+ if (ret == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to issue GET EVENT "
+ "IOCTL (%d)\n",
+ ret);
+ goto exit_close_error;
+ }
+
+ /* Read initial states */
+ ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data);
+ if (ret == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE "
+ "VALUES IOCTL (%d)\n",
+ ret);
+ goto exit_close_error;
+ }
+
+ fprintf(stdout, "Monitoring line %d on %s\n", line, device_name);
+ fprintf(stdout, "Initial line value: %d\n", data.values[0]);
+
+ while (1) {
+ struct gpioevent_data event;
+
+ ret = read(req.fd, &event, sizeof(event));
+ if (ret == -1) {
+ if (errno == -EAGAIN) {
+ fprintf(stderr, "nothing available\n");
+ continue;
+ } else {
+ ret = -errno;
+ fprintf(stderr, "Failed to read event (%d)\n",
+ ret);
+ break;
+ }
+ }
+
+ if (ret != sizeof(event)) {
+ fprintf(stderr, "Reading event failed\n");
+ ret = -EIO;
+ break;
+ }
+ fprintf(stdout, "GPIO EVENT %llu: ", event.timestamp);
+ switch (event.id) {
+ case GPIOEVENT_EVENT_RISING_EDGE:
+ fprintf(stdout, "rising edge");
+ break;
+ case GPIOEVENT_EVENT_FALLING_EDGE:
+ fprintf(stdout, "falling edge");
+ break;
+ default:
+ fprintf(stdout, "unknown event");
+ }
+ fprintf(stdout, "\n");
+
+ i++;
+ if (i == loops)
+ break;
+ }
+
+exit_close_error:
+ if (close(fd) == -1)
+ perror("Failed to close GPIO character device file");
+ free(chrdev_name);
+ return ret;
+}
+
+void print_usage(void)
+{
+ fprintf(stderr, "Usage: gpio-event-mon [options]...\n"
+ "Listen to events on GPIO lines, 0->1 1->0\n"
+ " -n <name> Listen on GPIOs on a named device (must be stated)\n"
+ " -o <n> Offset to monitor\n"
+ " -d Set line as open drain\n"
+ " -s Set line as open source\n"
+ " -r Listen for rising edges\n"
+ " -f Listen for falling edges\n"
+ " [-c <n>] Do <n> loops (optional, infinite loop if not stated)\n"
+ " -? This helptext\n"
+ "\n"
+ "Example:\n"
+ "gpio-event-mon -n gpiochip0 -o 4 -r -f\n"
+ );
+}
+
+int main(int argc, char **argv)
+{
+ const char *device_name = NULL;
+ unsigned int line = -1;
+ unsigned int loops = 0;
+ uint32_t handleflags = GPIOHANDLE_REQUEST_INPUT;
+ uint32_t eventflags = 0;
+ int c;
+
+ while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) {
+ switch (c) {
+ case 'c':
+ loops = strtoul(optarg, NULL, 10);
+ break;
+ case 'n':
+ device_name = optarg;
+ break;
+ case 'o':
+ line = strtoul(optarg, NULL, 10);
+ break;
+ case 'd':
+ handleflags |= GPIOHANDLE_REQUEST_OPEN_DRAIN;
+ break;
+ case 's':
+ handleflags |= GPIOHANDLE_REQUEST_OPEN_SOURCE;
+ break;
+ case 'r':
+ eventflags |= GPIOEVENT_REQUEST_RISING_EDGE;
+ break;
+ case 'f':
+ eventflags |= GPIOEVENT_REQUEST_FALLING_EDGE;
+ break;
+ case '?':
+ print_usage();
+ return -1;
+ }
+ }
+
+ if (!device_name || line == -1) {
+ print_usage();
+ return -1;
+ }
+ if (!eventflags) {
+ printf("No flags specified, listening on both rising and "
+ "falling edges\n");
+ eventflags = GPIOEVENT_REQUEST_BOTH_EDGES;
+ }
+ return monitor_device(device_name, line, handleflags,
+ eventflags, loops);
+}
diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c
new file mode 100644
index 000000000..3da5462a0
--- /dev/null
+++ b/tools/gpio/gpio-hammer.c
@@ -0,0 +1,171 @@
+/*
+ * gpio-hammer - example swiss army knife to shake GPIO lines on a system
+ *
+ * Copyright (C) 2016 Linus Walleij
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * Usage:
+ * gpio-hammer -n <device-name> -o <offset1> -o <offset2>
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+#include <poll.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <linux/gpio.h>
+#include "gpio-utils.h"
+
+int hammer_device(const char *device_name, unsigned int *lines, int nlines,
+ unsigned int loops)
+{
+ struct gpiohandle_data data;
+ char swirr[] = "-\\|/";
+ int fd;
+ int ret;
+ int i, j;
+ unsigned int iteration = 0;
+
+ memset(&data.values, 0, sizeof(data.values));
+ ret = gpiotools_request_linehandle(device_name, lines, nlines,
+ GPIOHANDLE_REQUEST_OUTPUT, &data,
+ "gpio-hammer");
+ if (ret < 0)
+ goto exit_error;
+ else
+ fd = ret;
+
+ ret = gpiotools_get_values(fd, &data);
+ if (ret < 0)
+ goto exit_close_error;
+
+ fprintf(stdout, "Hammer lines [");
+ for (i = 0; i < nlines; i++) {
+ fprintf(stdout, "%d", lines[i]);
+ if (i != (nlines - 1))
+ fprintf(stdout, ", ");
+ }
+ fprintf(stdout, "] on %s, initial states: [", device_name);
+ for (i = 0; i < nlines; i++) {
+ fprintf(stdout, "%d", data.values[i]);
+ if (i != (nlines - 1))
+ fprintf(stdout, ", ");
+ }
+ fprintf(stdout, "]\n");
+
+ /* Hammertime! */
+ j = 0;
+ while (1) {
+ /* Invert all lines so we blink */
+ for (i = 0; i < nlines; i++)
+ data.values[i] = !data.values[i];
+
+ ret = gpiotools_set_values(fd, &data);
+ if (ret < 0)
+ goto exit_close_error;
+
+ /* Re-read values to get status */
+ ret = gpiotools_get_values(fd, &data);
+ if (ret < 0)
+ goto exit_close_error;
+
+ fprintf(stdout, "[%c] ", swirr[j]);
+ j++;
+ if (j == sizeof(swirr)-1)
+ j = 0;
+
+ fprintf(stdout, "[");
+ for (i = 0; i < nlines; i++) {
+ fprintf(stdout, "%d: %d", lines[i], data.values[i]);
+ if (i != (nlines - 1))
+ fprintf(stdout, ", ");
+ }
+ fprintf(stdout, "]\r");
+ fflush(stdout);
+ sleep(1);
+ iteration++;
+ if (loops && iteration == loops)
+ break;
+ }
+ fprintf(stdout, "\n");
+ ret = 0;
+
+exit_close_error:
+ gpiotools_release_linehandle(fd);
+exit_error:
+ return ret;
+}
+
+void print_usage(void)
+{
+ fprintf(stderr, "Usage: gpio-hammer [options]...\n"
+ "Hammer GPIO lines, 0->1->0->1...\n"
+ " -n <name> Hammer GPIOs on a named device (must be stated)\n"
+ " -o <n> Offset[s] to hammer, at least one, several can be stated\n"
+ " [-c <n>] Do <n> loops (optional, infinite loop if not stated)\n"
+ " -? This helptext\n"
+ "\n"
+ "Example:\n"
+ "gpio-hammer -n gpiochip0 -o 4\n"
+ );
+}
+
+int main(int argc, char **argv)
+{
+ const char *device_name = NULL;
+ unsigned int lines[GPIOHANDLES_MAX];
+ unsigned int loops = 0;
+ int nlines;
+ int c;
+ int i;
+
+ i = 0;
+ while ((c = getopt(argc, argv, "c:n:o:?")) != -1) {
+ switch (c) {
+ case 'c':
+ loops = strtoul(optarg, NULL, 10);
+ break;
+ case 'n':
+ device_name = optarg;
+ break;
+ case 'o':
+ /*
+ * Avoid overflow. Do not immediately error, we want to
+ * be able to accurately report on the amount of times
+ * '-o' was given to give an accurate error message
+ */
+ if (i < GPIOHANDLES_MAX)
+ lines[i] = strtoul(optarg, NULL, 10);
+
+ i++;
+ break;
+ case '?':
+ print_usage();
+ return -1;
+ }
+ }
+
+ if (i >= GPIOHANDLES_MAX) {
+ fprintf(stderr,
+ "Only %d occurences of '-o' are allowed, %d were found\n",
+ GPIOHANDLES_MAX, i + 1);
+ return -1;
+ }
+
+ nlines = i;
+
+ if (!device_name || !nlines) {
+ print_usage();
+ return -1;
+ }
+ return hammer_device(device_name, lines, nlines, loops);
+}
diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c
new file mode 100644
index 000000000..cf7e2f341
--- /dev/null
+++ b/tools/gpio/gpio-utils.c
@@ -0,0 +1,270 @@
+/*
+ * GPIO tools - helpers library for the GPIO tools
+ *
+ * Copyright (C) 2015 Linus Walleij
+ * Copyright (C) 2016 Bamvor Jian Zhang
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <linux/gpio.h>
+#include "gpio-utils.h"
+
+#define COMSUMER "gpio-utils"
+
+/**
+ * doc: Operation of gpio
+ *
+ * Provide the api of gpiochip for chardev interface. There are two
+ * types of api. The first one provide as same function as each
+ * ioctl, including request and release for lines of gpio, read/write
+ * the value of gpio. If the user want to do lots of read and write of
+ * lines of gpio, user should use this type of api.
+ *
+ * The second one provide the easy to use api for user. Each of the
+ * following api will request gpio lines, do the operation and then
+ * release these lines.
+ */
+/**
+ * gpiotools_request_linehandle() - request gpio lines in a gpiochip
+ * @device_name: The name of gpiochip without prefix "/dev/",
+ * such as "gpiochip0"
+ * @lines: An array desired lines, specified by offset
+ * index for the associated GPIO device.
+ * @nline: The number of lines to request.
+ * @flag: The new flag for requsted gpio. Reference
+ * "linux/gpio.h" for the meaning of flag.
+ * @data: Default value will be set to gpio when flag is
+ * GPIOHANDLE_REQUEST_OUTPUT.
+ * @consumer_label: The name of consumer, such as "sysfs",
+ * "powerkey". This is useful for other users to
+ * know who is using.
+ *
+ * Request gpio lines through the ioctl provided by chardev. User
+ * could call gpiotools_set_values() and gpiotools_get_values() to
+ * read and write respectively through the returned fd. Call
+ * gpiotools_release_linehandle() to release these lines after that.
+ *
+ * Return: On success return the fd;
+ * On failure return the errno.
+ */
+int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
+ unsigned int nlines, unsigned int flag,
+ struct gpiohandle_data *data,
+ const char *consumer_label)
+{
+ struct gpiohandle_request req;
+ char *chrdev_name;
+ int fd;
+ int i;
+ int ret;
+
+ ret = asprintf(&chrdev_name, "/dev/%s", device_name);
+ if (ret < 0)
+ return -ENOMEM;
+
+ fd = open(chrdev_name, 0);
+ if (fd == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to open %s, %s\n",
+ chrdev_name, strerror(errno));
+ goto exit_close_error;
+ }
+
+ for (i = 0; i < nlines; i++)
+ req.lineoffsets[i] = lines[i];
+
+ req.flags = flag;
+ strcpy(req.consumer_label, consumer_label);
+ req.lines = nlines;
+ if (flag & GPIOHANDLE_REQUEST_OUTPUT)
+ memcpy(req.default_values, data, sizeof(req.default_values));
+
+ ret = ioctl(fd, GPIO_GET_LINEHANDLE_IOCTL, &req);
+ if (ret == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to issue %s (%d), %s\n",
+ "GPIO_GET_LINEHANDLE_IOCTL", ret, strerror(errno));
+ }
+
+exit_close_error:
+ if (close(fd) == -1)
+ perror("Failed to close GPIO character device file");
+ free(chrdev_name);
+ return ret < 0 ? ret : req.fd;
+}
+/**
+ * gpiotools_set_values(): Set the value of gpio(s)
+ * @fd: The fd returned by
+ * gpiotools_request_linehandle().
+ * @data: The array of values want to set.
+ *
+ * Return: On success return 0;
+ * On failure return the errno.
+ */
+int gpiotools_set_values(const int fd, struct gpiohandle_data *data)
+{
+ int ret;
+
+ ret = ioctl(fd, GPIOHANDLE_SET_LINE_VALUES_IOCTL, data);
+ if (ret == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to issue %s (%d), %s\n",
+ "GPIOHANDLE_SET_LINE_VALUES_IOCTL", ret,
+ strerror(errno));
+ }
+
+ return ret;
+}
+
+/**
+ * gpiotools_get_values(): Get the value of gpio(s)
+ * @fd: The fd returned by
+ * gpiotools_request_linehandle().
+ * @data: The array of values get from hardware.
+ *
+ * Return: On success return 0;
+ * On failure return the errno.
+ */
+int gpiotools_get_values(const int fd, struct gpiohandle_data *data)
+{
+ int ret;
+
+ ret = ioctl(fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, data);
+ if (ret == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to issue %s (%d), %s\n",
+ "GPIOHANDLE_GET_LINE_VALUES_IOCTL", ret,
+ strerror(errno));
+ }
+
+ return ret;
+}
+
+/**
+ * gpiotools_release_linehandle(): Release the line(s) of gpiochip
+ * @fd: The fd returned by
+ * gpiotools_request_linehandle().
+ *
+ * Return: On success return 0;
+ * On failure return the errno.
+ */
+int gpiotools_release_linehandle(const int fd)
+{
+ int ret;
+
+ ret = close(fd);
+ if (ret == -1) {
+ perror("Failed to close GPIO LINEHANDLE device file");
+ ret = -errno;
+ }
+
+ return ret;
+}
+
+/**
+ * gpiotools_get(): Get value from specific line
+ * @device_name: The name of gpiochip without prefix "/dev/",
+ * such as "gpiochip0"
+ * @line: number of line, such as 2.
+ *
+ * Return: On success return 0;
+ * On failure return the errno.
+ */
+int gpiotools_get(const char *device_name, unsigned int line)
+{
+ struct gpiohandle_data data;
+ unsigned int lines[] = {line};
+
+ gpiotools_gets(device_name, lines, 1, &data);
+ return data.values[0];
+}
+
+
+/**
+ * gpiotools_gets(): Get values from specific lines.
+ * @device_name: The name of gpiochip without prefix "/dev/",
+ * such as "gpiochip0".
+ * @lines: An array desired lines, specified by offset
+ * index for the associated GPIO device.
+ * @nline: The number of lines to request.
+ * @data: The array of values get from gpiochip.
+ *
+ * Return: On success return 0;
+ * On failure return the errno.
+ */
+int gpiotools_gets(const char *device_name, unsigned int *lines,
+ unsigned int nlines, struct gpiohandle_data *data)
+{
+ int fd;
+ int ret;
+ int ret_close;
+
+ ret = gpiotools_request_linehandle(device_name, lines, nlines,
+ GPIOHANDLE_REQUEST_INPUT, data,
+ COMSUMER);
+ if (ret < 0)
+ return ret;
+
+ fd = ret;
+ ret = gpiotools_get_values(fd, data);
+ ret_close = gpiotools_release_linehandle(fd);
+ return ret < 0 ? ret : ret_close;
+}
+
+/**
+ * gpiotools_set(): Set value to specific line
+ * @device_name: The name of gpiochip without prefix "/dev/",
+ * such as "gpiochip0"
+ * @line: number of line, such as 2.
+ * @value: The value of gpio, must be 0(low) or 1(high).
+ *
+ * Return: On success return 0;
+ * On failure return the errno.
+ */
+int gpiotools_set(const char *device_name, unsigned int line,
+ unsigned int value)
+{
+ struct gpiohandle_data data;
+ unsigned int lines[] = {line};
+
+ data.values[0] = value;
+ return gpiotools_sets(device_name, lines, 1, &data);
+}
+
+/**
+ * gpiotools_sets(): Set values to specific lines.
+ * @device_name: The name of gpiochip without prefix "/dev/",
+ * such as "gpiochip0".
+ * @lines: An array desired lines, specified by offset
+ * index for the associated GPIO device.
+ * @nline: The number of lines to request.
+ * @data: The array of values set to gpiochip, must be
+ * 0(low) or 1(high).
+ *
+ * Return: On success return 0;
+ * On failure return the errno.
+ */
+int gpiotools_sets(const char *device_name, unsigned int *lines,
+ unsigned int nlines, struct gpiohandle_data *data)
+{
+ int ret;
+
+ ret = gpiotools_request_linehandle(device_name, lines, nlines,
+ GPIOHANDLE_REQUEST_OUTPUT, data,
+ COMSUMER);
+ if (ret < 0)
+ return ret;
+
+ return gpiotools_release_linehandle(ret);
+}
diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h
new file mode 100644
index 000000000..344ea041f
--- /dev/null
+++ b/tools/gpio/gpio-utils.h
@@ -0,0 +1,43 @@
+/*
+ * GPIO tools - utility helpers library for the GPIO tools
+ *
+ * Copyright (C) 2015 Linus Walleij
+ *
+ * Portions copied from iio_utils and lssio:
+ * Copyright (c) 2010 Manuel Stahl <manuel.stahl@iis.fraunhofer.de>
+ * Copyright (c) 2008 Jonathan Cameron
+ * *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+#ifndef _GPIO_UTILS_H_
+#define _GPIO_UTILS_H_
+
+#include <string.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+static inline int check_prefix(const char *str, const char *prefix)
+{
+ return strlen(str) > strlen(prefix) &&
+ strncmp(str, prefix, strlen(prefix)) == 0;
+}
+
+int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
+ unsigned int nlines, unsigned int flag,
+ struct gpiohandle_data *data,
+ const char *consumer_label);
+int gpiotools_set_values(const int fd, struct gpiohandle_data *data);
+int gpiotools_get_values(const int fd, struct gpiohandle_data *data);
+int gpiotools_release_linehandle(const int fd);
+
+int gpiotools_get(const char *device_name, unsigned int line);
+int gpiotools_gets(const char *device_name, unsigned int *lines,
+ unsigned int nlines, struct gpiohandle_data *data);
+int gpiotools_set(const char *device_name, unsigned int line,
+ unsigned int value);
+int gpiotools_sets(const char *device_name, unsigned int *lines,
+ unsigned int nlines, struct gpiohandle_data *data);
+
+#endif /* _GPIO_UTILS_H_ */
diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c
new file mode 100644
index 000000000..eb3f56efd
--- /dev/null
+++ b/tools/gpio/lsgpio.c
@@ -0,0 +1,195 @@
+/*
+ * lsgpio - example on how to list the GPIO lines on a system
+ *
+ * Copyright (C) 2015 Linus Walleij
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * Usage:
+ * lsgpio <-n device-name>
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+#include <poll.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <linux/gpio.h>
+
+#include "gpio-utils.h"
+
+struct gpio_flag {
+ char *name;
+ unsigned long mask;
+};
+
+struct gpio_flag flagnames[] = {
+ {
+ .name = "kernel",
+ .mask = GPIOLINE_FLAG_KERNEL,
+ },
+ {
+ .name = "output",
+ .mask = GPIOLINE_FLAG_IS_OUT,
+ },
+ {
+ .name = "active-low",
+ .mask = GPIOLINE_FLAG_ACTIVE_LOW,
+ },
+ {
+ .name = "open-drain",
+ .mask = GPIOLINE_FLAG_OPEN_DRAIN,
+ },
+ {
+ .name = "open-source",
+ .mask = GPIOLINE_FLAG_OPEN_SOURCE,
+ },
+};
+
+void print_flags(unsigned long flags)
+{
+ int i;
+ int printed = 0;
+
+ for (i = 0; i < ARRAY_SIZE(flagnames); i++) {
+ if (flags & flagnames[i].mask) {
+ if (printed)
+ fprintf(stdout, " ");
+ fprintf(stdout, "%s", flagnames[i].name);
+ printed++;
+ }
+ }
+}
+
+int list_device(const char *device_name)
+{
+ struct gpiochip_info cinfo;
+ char *chrdev_name;
+ int fd;
+ int ret;
+ int i;
+
+ ret = asprintf(&chrdev_name, "/dev/%s", device_name);
+ if (ret < 0)
+ return -ENOMEM;
+
+ fd = open(chrdev_name, 0);
+ if (fd == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to open %s\n", chrdev_name);
+ goto exit_close_error;
+ }
+
+ /* Inspect this GPIO chip */
+ ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, &cinfo);
+ if (ret == -1) {
+ ret = -errno;
+ perror("Failed to issue CHIPINFO IOCTL\n");
+ goto exit_close_error;
+ }
+ fprintf(stdout, "GPIO chip: %s, \"%s\", %u GPIO lines\n",
+ cinfo.name, cinfo.label, cinfo.lines);
+
+ /* Loop over the lines and print info */
+ for (i = 0; i < cinfo.lines; i++) {
+ struct gpioline_info linfo;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.line_offset = i;
+
+ ret = ioctl(fd, GPIO_GET_LINEINFO_IOCTL, &linfo);
+ if (ret == -1) {
+ ret = -errno;
+ perror("Failed to issue LINEINFO IOCTL\n");
+ goto exit_close_error;
+ }
+ fprintf(stdout, "\tline %2d:", linfo.line_offset);
+ if (linfo.name[0])
+ fprintf(stdout, " \"%s\"", linfo.name);
+ else
+ fprintf(stdout, " unnamed");
+ if (linfo.consumer[0])
+ fprintf(stdout, " \"%s\"", linfo.consumer);
+ else
+ fprintf(stdout, " unused");
+ if (linfo.flags) {
+ fprintf(stdout, " [");
+ print_flags(linfo.flags);
+ fprintf(stdout, "]");
+ }
+ fprintf(stdout, "\n");
+
+ }
+
+exit_close_error:
+ if (close(fd) == -1)
+ perror("Failed to close GPIO character device file");
+ free(chrdev_name);
+ return ret;
+}
+
+void print_usage(void)
+{
+ fprintf(stderr, "Usage: lsgpio [options]...\n"
+ "List GPIO chips, lines and states\n"
+ " -n <name> List GPIOs on a named device\n"
+ " -? This helptext\n"
+ );
+}
+
+int main(int argc, char **argv)
+{
+ const char *device_name = NULL;
+ int ret;
+ int c;
+
+ while ((c = getopt(argc, argv, "n:")) != -1) {
+ switch (c) {
+ case 'n':
+ device_name = optarg;
+ break;
+ case '?':
+ print_usage();
+ return -1;
+ }
+ }
+
+ if (device_name)
+ ret = list_device(device_name);
+ else {
+ const struct dirent *ent;
+ DIR *dp;
+
+ /* List all GPIO devices one at a time */
+ dp = opendir("/dev");
+ if (!dp) {
+ ret = -errno;
+ goto error_out;
+ }
+
+ ret = -ENOENT;
+ while (ent = readdir(dp), ent) {
+ if (check_prefix(ent->d_name, "gpiochip")) {
+ ret = list_device(ent->d_name);
+ if (ret)
+ break;
+ }
+ }
+
+ ret = 0;
+ if (closedir(dp) == -1) {
+ perror("scanning devices: Failed to close directory");
+ ret = -errno;
+ }
+ }
+error_out:
+ return ret;
+}
diff --git a/tools/hv/Makefile b/tools/hv/Makefile
new file mode 100644
index 000000000..5db5e62ce
--- /dev/null
+++ b/tools/hv/Makefile
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for Hyper-V tools
+
+WARNINGS = -Wall -Wextra
+CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS)
+
+CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+
+sbindir ?= /usr/sbin
+libexecdir ?= /usr/libexec
+sharedstatedir ?= /var/lib
+
+ALL_PROGRAMS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+
+ALL_SCRIPTS := hv_get_dhcp_info.sh hv_get_dns_info.sh hv_set_ifconfig.sh
+
+all: $(ALL_PROGRAMS)
+
+%: %.c
+ $(CC) $(CFLAGS) -o $@ $^
+
+clean:
+ $(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+
+install: all
+ install -d -m 755 $(DESTDIR)$(sbindir); \
+ install -d -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd; \
+ install -d -m 755 $(DESTDIR)$(sharedstatedir); \
+ for program in $(ALL_PROGRAMS); do \
+ install $$program -m 755 $(DESTDIR)$(sbindir); \
+ done; \
+ install -m 755 lsvmbus $(DESTDIR)$(sbindir); \
+ for script in $(ALL_SCRIPTS); do \
+ install $$script -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd/$${script%.sh}; \
+ done
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
new file mode 100644
index 000000000..8ff8cb1a1
--- /dev/null
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -0,0 +1,248 @@
+/*
+ * An implementation of host to guest copy functionality for Linux.
+ *
+ * Copyright (C) 2014, Microsoft, Inc.
+ *
+ * Author : K. Y. Srinivasan <kys@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ */
+
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/hyperv.h>
+#include <linux/limits.h>
+#include <syslog.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <getopt.h>
+
+static int target_fd;
+static char target_fname[PATH_MAX];
+static unsigned long long filesize;
+
+static int hv_start_fcopy(struct hv_start_fcopy *smsg)
+{
+ int error = HV_E_FAIL;
+ char *q, *p;
+
+ filesize = 0;
+ p = (char *)smsg->path_name;
+ snprintf(target_fname, sizeof(target_fname), "%s/%s",
+ (char *)smsg->path_name, (char *)smsg->file_name);
+
+ syslog(LOG_INFO, "Target file name: %s", target_fname);
+ /*
+ * Check to see if the path is already in place; if not,
+ * create if required.
+ */
+ while ((q = strchr(p, '/')) != NULL) {
+ if (q == p) {
+ p++;
+ continue;
+ }
+ *q = '\0';
+ if (access((char *)smsg->path_name, F_OK)) {
+ if (smsg->copy_flags & CREATE_PATH) {
+ if (mkdir((char *)smsg->path_name, 0755)) {
+ syslog(LOG_ERR, "Failed to create %s",
+ (char *)smsg->path_name);
+ goto done;
+ }
+ } else {
+ syslog(LOG_ERR, "Invalid path: %s",
+ (char *)smsg->path_name);
+ goto done;
+ }
+ }
+ p = q + 1;
+ *q = '/';
+ }
+
+ if (!access(target_fname, F_OK)) {
+ syslog(LOG_INFO, "File: %s exists", target_fname);
+ if (!(smsg->copy_flags & OVER_WRITE)) {
+ error = HV_ERROR_ALREADY_EXISTS;
+ goto done;
+ }
+ }
+
+ target_fd = open(target_fname,
+ O_RDWR | O_CREAT | O_TRUNC | O_CLOEXEC, 0744);
+ if (target_fd == -1) {
+ syslog(LOG_INFO, "Open Failed: %s", strerror(errno));
+ goto done;
+ }
+
+ error = 0;
+done:
+ return error;
+}
+
+static int hv_copy_data(struct hv_do_fcopy *cpmsg)
+{
+ ssize_t bytes_written;
+ int ret = 0;
+
+ bytes_written = pwrite(target_fd, cpmsg->data, cpmsg->size,
+ cpmsg->offset);
+
+ filesize += cpmsg->size;
+ if (bytes_written != cpmsg->size) {
+ switch (errno) {
+ case ENOSPC:
+ ret = HV_ERROR_DISK_FULL;
+ break;
+ default:
+ ret = HV_E_FAIL;
+ break;
+ }
+ syslog(LOG_ERR, "pwrite failed to write %llu bytes: %ld (%s)",
+ filesize, (long)bytes_written, strerror(errno));
+ }
+
+ return ret;
+}
+
+static int hv_copy_finished(void)
+{
+ close(target_fd);
+ return 0;
+}
+static int hv_copy_cancel(void)
+{
+ close(target_fd);
+ unlink(target_fname);
+ return 0;
+
+}
+
+void print_usage(char *argv[])
+{
+ fprintf(stderr, "Usage: %s [options]\n"
+ "Options are:\n"
+ " -n, --no-daemon stay in foreground, don't daemonize\n"
+ " -h, --help print this help\n", argv[0]);
+}
+
+int main(int argc, char *argv[])
+{
+ int fcopy_fd;
+ int error;
+ int daemonize = 1, long_index = 0, opt;
+ int version = FCOPY_CURRENT_VERSION;
+ union {
+ struct hv_fcopy_hdr hdr;
+ struct hv_start_fcopy start;
+ struct hv_do_fcopy copy;
+ __u32 kernel_modver;
+ } buffer = { };
+ int in_handshake = 1;
+
+ static struct option long_options[] = {
+ {"help", no_argument, 0, 'h' },
+ {"no-daemon", no_argument, 0, 'n' },
+ {0, 0, 0, 0 }
+ };
+
+ while ((opt = getopt_long(argc, argv, "hn", long_options,
+ &long_index)) != -1) {
+ switch (opt) {
+ case 'n':
+ daemonize = 0;
+ break;
+ case 'h':
+ default:
+ print_usage(argv);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if (daemonize && daemon(1, 0)) {
+ syslog(LOG_ERR, "daemon() failed; error: %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ openlog("HV_FCOPY", 0, LOG_USER);
+ syslog(LOG_INFO, "starting; pid is:%d", getpid());
+
+ fcopy_fd = open("/dev/vmbus/hv_fcopy", O_RDWR);
+
+ if (fcopy_fd < 0) {
+ syslog(LOG_ERR, "open /dev/vmbus/hv_fcopy failed; error: %d %s",
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Register with the kernel.
+ */
+ if ((write(fcopy_fd, &version, sizeof(int))) != sizeof(int)) {
+ syslog(LOG_ERR, "Registration failed: %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ while (1) {
+ /*
+ * In this loop we process fcopy messages after the
+ * handshake is complete.
+ */
+ ssize_t len;
+
+ len = pread(fcopy_fd, &buffer, sizeof(buffer), 0);
+ if (len < 0) {
+ syslog(LOG_ERR, "pread failed: %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ if (in_handshake) {
+ if (len != sizeof(buffer.kernel_modver)) {
+ syslog(LOG_ERR, "invalid version negotiation");
+ exit(EXIT_FAILURE);
+ }
+ in_handshake = 0;
+ syslog(LOG_INFO, "kernel module version: %u",
+ buffer.kernel_modver);
+ continue;
+ }
+
+ switch (buffer.hdr.operation) {
+ case START_FILE_COPY:
+ error = hv_start_fcopy(&buffer.start);
+ break;
+ case WRITE_TO_FILE:
+ error = hv_copy_data(&buffer.copy);
+ break;
+ case COMPLETE_FCOPY:
+ error = hv_copy_finished();
+ break;
+ case CANCEL_FCOPY:
+ error = hv_copy_cancel();
+ break;
+
+ default:
+ error = HV_E_FAIL;
+ syslog(LOG_ERR, "Unknown operation: %d",
+ buffer.hdr.operation);
+
+ }
+
+ if (pwrite(fcopy_fd, &error, sizeof(int), 0) != sizeof(int)) {
+ syslog(LOG_ERR, "pwrite failed: %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ }
+}
diff --git a/tools/hv/hv_get_dhcp_info.sh b/tools/hv/hv_get_dhcp_info.sh
new file mode 100755
index 000000000..c38686c44
--- /dev/null
+++ b/tools/hv/hv_get_dhcp_info.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This example script retrieves the DHCP state of a given interface.
+# In the interest of keeping the KVP daemon code free of distro specific
+# information; the kvp daemon code invokes this external script to gather
+# DHCP setting for the specific interface.
+#
+# Input: Name of the interface
+#
+# Output: The script prints the string "Enabled" to stdout to indicate
+# that DHCP is enabled on the interface. If DHCP is not enabled,
+# the script prints the string "Disabled" to stdout.
+#
+# Each Distro is expected to implement this script in a distro specific
+# fashion. For instance on Distros that ship with Network Manager enabled,
+# this script can be based on the Network Manager APIs for retrieving DHCP
+# information.
+
+if_file="/etc/sysconfig/network-scripts/ifcfg-"$1
+
+dhcp=$(grep "dhcp" $if_file 2>/dev/null)
+
+if [ "$dhcp" != "" ];
+then
+echo "Enabled"
+else
+echo "Disabled"
+fi
diff --git a/tools/hv/hv_get_dns_info.sh b/tools/hv/hv_get_dns_info.sh
new file mode 100755
index 000000000..058c17b46
--- /dev/null
+++ b/tools/hv/hv_get_dns_info.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# This example script parses /etc/resolv.conf to retrive DNS information.
+# In the interest of keeping the KVP daemon code free of distro specific
+# information; the kvp daemon code invokes this external script to gather
+# DNS information.
+# This script is expected to print the nameserver values to stdout.
+# Each Distro is expected to implement this script in a distro specific
+# fashion. For instance on Distros that ship with Network Manager enabled,
+# this script can be based on the Network Manager APIs for retrieving DNS
+# entries.
+
+cat /etc/resolv.conf 2>/dev/null | awk '/^nameserver/ { print $2 }'
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
new file mode 100644
index 000000000..ef8a82f29
--- /dev/null
+++ b/tools/hv/hv_kvp_daemon.c
@@ -0,0 +1,1632 @@
+/*
+ * An implementation of key value pair (KVP) functionality for Linux.
+ *
+ *
+ * Copyright (C) 2010, Novell, Inc.
+ * Author : K. Y. Srinivasan <ksrinivasan@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+
+#include <sys/poll.h>
+#include <sys/utsname.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <arpa/inet.h>
+#include <linux/hyperv.h>
+#include <ifaddrs.h>
+#include <netdb.h>
+#include <syslog.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <net/if.h>
+#include <limits.h>
+#include <getopt.h>
+
+/*
+ * KVP protocol: The user mode component first registers with the
+ * the kernel component. Subsequently, the kernel component requests, data
+ * for the specified keys. In response to this message the user mode component
+ * fills in the value corresponding to the specified key. We overload the
+ * sequence field in the cn_msg header to define our KVP message types.
+ *
+ * We use this infrastructure for also supporting queries from user mode
+ * application for state that may be maintained in the KVP kernel component.
+ *
+ */
+
+
+enum key_index {
+ FullyQualifiedDomainName = 0,
+ IntegrationServicesVersion, /*This key is serviced in the kernel*/
+ NetworkAddressIPv4,
+ NetworkAddressIPv6,
+ OSBuildNumber,
+ OSName,
+ OSMajorVersion,
+ OSMinorVersion,
+ OSVersion,
+ ProcessorArchitecture
+};
+
+
+enum {
+ IPADDR = 0,
+ NETMASK,
+ GATEWAY,
+ DNS
+};
+
+static int in_hand_shake = 1;
+
+static char *os_name = "";
+static char *os_major = "";
+static char *os_minor = "";
+static char *processor_arch;
+static char *os_build;
+static char *os_version;
+static char *lic_version = "Unknown version";
+static char full_domain_name[HV_KVP_EXCHANGE_MAX_VALUE_SIZE];
+static struct utsname uts_buf;
+
+/*
+ * The location of the interface configuration file.
+ */
+
+#define KVP_CONFIG_LOC "/var/lib/hyperv"
+
+#ifndef KVP_SCRIPTS_PATH
+#define KVP_SCRIPTS_PATH "/usr/libexec/hypervkvpd/"
+#endif
+
+#define KVP_NET_DIR "/sys/class/net/"
+
+#define MAX_FILE_NAME 100
+#define ENTRIES_PER_BLOCK 50
+
+struct kvp_record {
+ char key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
+ char value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE];
+};
+
+struct kvp_file_state {
+ int fd;
+ int num_blocks;
+ struct kvp_record *records;
+ int num_records;
+ char fname[MAX_FILE_NAME];
+};
+
+static struct kvp_file_state kvp_file_info[KVP_POOL_COUNT];
+
+static void kvp_acquire_lock(int pool)
+{
+ struct flock fl = {F_WRLCK, SEEK_SET, 0, 0, 0};
+ fl.l_pid = getpid();
+
+ if (fcntl(kvp_file_info[pool].fd, F_SETLKW, &fl) == -1) {
+ syslog(LOG_ERR, "Failed to acquire the lock pool: %d; error: %d %s", pool,
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void kvp_release_lock(int pool)
+{
+ struct flock fl = {F_UNLCK, SEEK_SET, 0, 0, 0};
+ fl.l_pid = getpid();
+
+ if (fcntl(kvp_file_info[pool].fd, F_SETLK, &fl) == -1) {
+ syslog(LOG_ERR, "Failed to release the lock pool: %d; error: %d %s", pool,
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void kvp_update_file(int pool)
+{
+ FILE *filep;
+
+ /*
+ * We are going to write our in-memory registry out to
+ * disk; acquire the lock first.
+ */
+ kvp_acquire_lock(pool);
+
+ filep = fopen(kvp_file_info[pool].fname, "we");
+ if (!filep) {
+ syslog(LOG_ERR, "Failed to open file, pool: %d; error: %d %s", pool,
+ errno, strerror(errno));
+ kvp_release_lock(pool);
+ exit(EXIT_FAILURE);
+ }
+
+ fwrite(kvp_file_info[pool].records, sizeof(struct kvp_record),
+ kvp_file_info[pool].num_records, filep);
+
+ if (ferror(filep) || fclose(filep)) {
+ kvp_release_lock(pool);
+ syslog(LOG_ERR, "Failed to write file, pool: %d", pool);
+ exit(EXIT_FAILURE);
+ }
+
+ kvp_release_lock(pool);
+}
+
+static void kvp_update_mem_state(int pool)
+{
+ FILE *filep;
+ size_t records_read = 0;
+ struct kvp_record *record = kvp_file_info[pool].records;
+ struct kvp_record *readp;
+ int num_blocks = kvp_file_info[pool].num_blocks;
+ int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
+
+ kvp_acquire_lock(pool);
+
+ filep = fopen(kvp_file_info[pool].fname, "re");
+ if (!filep) {
+ syslog(LOG_ERR, "Failed to open file, pool: %d; error: %d %s", pool,
+ errno, strerror(errno));
+ kvp_release_lock(pool);
+ exit(EXIT_FAILURE);
+ }
+ for (;;) {
+ readp = &record[records_read];
+ records_read += fread(readp, sizeof(struct kvp_record),
+ ENTRIES_PER_BLOCK * num_blocks - records_read,
+ filep);
+
+ if (ferror(filep)) {
+ syslog(LOG_ERR,
+ "Failed to read file, pool: %d; error: %d %s",
+ pool, errno, strerror(errno));
+ kvp_release_lock(pool);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!feof(filep)) {
+ /*
+ * We have more data to read.
+ */
+ num_blocks++;
+ record = realloc(record, alloc_unit * num_blocks);
+
+ if (record == NULL) {
+ syslog(LOG_ERR, "malloc failed");
+ kvp_release_lock(pool);
+ exit(EXIT_FAILURE);
+ }
+ continue;
+ }
+ break;
+ }
+
+ kvp_file_info[pool].num_blocks = num_blocks;
+ kvp_file_info[pool].records = record;
+ kvp_file_info[pool].num_records = records_read;
+
+ fclose(filep);
+ kvp_release_lock(pool);
+}
+
+static int kvp_file_init(void)
+{
+ int fd;
+ char *fname;
+ int i;
+ int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
+
+ if (access(KVP_CONFIG_LOC, F_OK)) {
+ if (mkdir(KVP_CONFIG_LOC, 0755 /* rwxr-xr-x */)) {
+ syslog(LOG_ERR, "Failed to create '%s'; error: %d %s", KVP_CONFIG_LOC,
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ for (i = 0; i < KVP_POOL_COUNT; i++) {
+ fname = kvp_file_info[i].fname;
+ sprintf(fname, "%s/.kvp_pool_%d", KVP_CONFIG_LOC, i);
+ fd = open(fname, O_RDWR | O_CREAT | O_CLOEXEC, 0644 /* rw-r--r-- */);
+
+ if (fd == -1)
+ return 1;
+
+ kvp_file_info[i].fd = fd;
+ kvp_file_info[i].num_blocks = 1;
+ kvp_file_info[i].records = malloc(alloc_unit);
+ if (kvp_file_info[i].records == NULL)
+ return 1;
+ kvp_file_info[i].num_records = 0;
+ kvp_update_mem_state(i);
+ }
+
+ return 0;
+}
+
+static int kvp_key_delete(int pool, const __u8 *key, int key_size)
+{
+ int i;
+ int j, k;
+ int num_records;
+ struct kvp_record *record;
+
+ /*
+ * First update the in-memory state.
+ */
+ kvp_update_mem_state(pool);
+
+ num_records = kvp_file_info[pool].num_records;
+ record = kvp_file_info[pool].records;
+
+ for (i = 0; i < num_records; i++) {
+ if (memcmp(key, record[i].key, key_size))
+ continue;
+ /*
+ * Found a match; just move the remaining
+ * entries up.
+ */
+ if (i == (num_records - 1)) {
+ kvp_file_info[pool].num_records--;
+ kvp_update_file(pool);
+ return 0;
+ }
+
+ j = i;
+ k = j + 1;
+ for (; k < num_records; k++) {
+ strcpy(record[j].key, record[k].key);
+ strcpy(record[j].value, record[k].value);
+ j++;
+ }
+
+ kvp_file_info[pool].num_records--;
+ kvp_update_file(pool);
+ return 0;
+ }
+ return 1;
+}
+
+static int kvp_key_add_or_modify(int pool, const __u8 *key, int key_size,
+ const __u8 *value, int value_size)
+{
+ int i;
+ int num_records;
+ struct kvp_record *record;
+ int num_blocks;
+
+ if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) ||
+ (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE))
+ return 1;
+
+ /*
+ * First update the in-memory state.
+ */
+ kvp_update_mem_state(pool);
+
+ num_records = kvp_file_info[pool].num_records;
+ record = kvp_file_info[pool].records;
+ num_blocks = kvp_file_info[pool].num_blocks;
+
+ for (i = 0; i < num_records; i++) {
+ if (memcmp(key, record[i].key, key_size))
+ continue;
+ /*
+ * Found a match; just update the value -
+ * this is the modify case.
+ */
+ memcpy(record[i].value, value, value_size);
+ kvp_update_file(pool);
+ return 0;
+ }
+
+ /*
+ * Need to add a new entry;
+ */
+ if (num_records == (ENTRIES_PER_BLOCK * num_blocks)) {
+ /* Need to allocate a larger array for reg entries. */
+ record = realloc(record, sizeof(struct kvp_record) *
+ ENTRIES_PER_BLOCK * (num_blocks + 1));
+
+ if (record == NULL)
+ return 1;
+ kvp_file_info[pool].num_blocks++;
+
+ }
+ memcpy(record[i].value, value, value_size);
+ memcpy(record[i].key, key, key_size);
+ kvp_file_info[pool].records = record;
+ kvp_file_info[pool].num_records++;
+ kvp_update_file(pool);
+ return 0;
+}
+
+static int kvp_get_value(int pool, const __u8 *key, int key_size, __u8 *value,
+ int value_size)
+{
+ int i;
+ int num_records;
+ struct kvp_record *record;
+
+ if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) ||
+ (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE))
+ return 1;
+
+ /*
+ * First update the in-memory state.
+ */
+ kvp_update_mem_state(pool);
+
+ num_records = kvp_file_info[pool].num_records;
+ record = kvp_file_info[pool].records;
+
+ for (i = 0; i < num_records; i++) {
+ if (memcmp(key, record[i].key, key_size))
+ continue;
+ /*
+ * Found a match; just copy the value out.
+ */
+ memcpy(value, record[i].value, value_size);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int kvp_pool_enumerate(int pool, int index, __u8 *key, int key_size,
+ __u8 *value, int value_size)
+{
+ struct kvp_record *record;
+
+ /*
+ * First update our in-memory database.
+ */
+ kvp_update_mem_state(pool);
+ record = kvp_file_info[pool].records;
+
+ if (index >= kvp_file_info[pool].num_records) {
+ return 1;
+ }
+
+ memcpy(key, record[index].key, key_size);
+ memcpy(value, record[index].value, value_size);
+ return 0;
+}
+
+
+void kvp_get_os_info(void)
+{
+ FILE *file;
+ char *p, buf[512];
+
+ uname(&uts_buf);
+ os_version = uts_buf.release;
+ os_build = strdup(uts_buf.release);
+
+ os_name = uts_buf.sysname;
+ processor_arch = uts_buf.machine;
+
+ /*
+ * The current windows host (win7) expects the build
+ * string to be of the form: x.y.z
+ * Strip additional information we may have.
+ */
+ p = strchr(os_version, '-');
+ if (p)
+ *p = '\0';
+
+ /*
+ * Parse the /etc/os-release file if present:
+ * http://www.freedesktop.org/software/systemd/man/os-release.html
+ */
+ file = fopen("/etc/os-release", "r");
+ if (file != NULL) {
+ while (fgets(buf, sizeof(buf), file)) {
+ char *value, *q;
+
+ /* Ignore comments */
+ if (buf[0] == '#')
+ continue;
+
+ /* Split into name=value */
+ p = strchr(buf, '=');
+ if (!p)
+ continue;
+ *p++ = 0;
+
+ /* Remove quotes and newline; un-escape */
+ value = p;
+ q = p;
+ while (*p) {
+ if (*p == '\\') {
+ ++p;
+ if (!*p)
+ break;
+ *q++ = *p++;
+ } else if (*p == '\'' || *p == '"' ||
+ *p == '\n') {
+ ++p;
+ } else {
+ *q++ = *p++;
+ }
+ }
+ *q = 0;
+
+ if (!strcmp(buf, "NAME")) {
+ p = strdup(value);
+ if (!p)
+ break;
+ os_name = p;
+ } else if (!strcmp(buf, "VERSION_ID")) {
+ p = strdup(value);
+ if (!p)
+ break;
+ os_major = p;
+ }
+ }
+ fclose(file);
+ return;
+ }
+
+ /* Fallback for older RH/SUSE releases */
+ file = fopen("/etc/SuSE-release", "r");
+ if (file != NULL)
+ goto kvp_osinfo_found;
+ file = fopen("/etc/redhat-release", "r");
+ if (file != NULL)
+ goto kvp_osinfo_found;
+
+ /*
+ * We don't have information about the os.
+ */
+ return;
+
+kvp_osinfo_found:
+ /* up to three lines */
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+ p = strdup(buf);
+ if (!p)
+ goto done;
+ os_name = p;
+
+ /* second line */
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+ p = strdup(buf);
+ if (!p)
+ goto done;
+ os_major = p;
+
+ /* third line */
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+ p = strdup(buf);
+ if (p)
+ os_minor = p;
+ }
+ }
+ }
+
+done:
+ fclose(file);
+ return;
+}
+
+
+
+/*
+ * Retrieve an interface name corresponding to the specified guid.
+ * If there is a match, the function returns a pointer
+ * to the interface name and if not, a NULL is returned.
+ * If a match is found, the caller is responsible for
+ * freeing the memory.
+ */
+
+static char *kvp_get_if_name(char *guid)
+{
+ DIR *dir;
+ struct dirent *entry;
+ FILE *file;
+ char *p, *x;
+ char *if_name = NULL;
+ char buf[256];
+ char dev_id[PATH_MAX];
+
+ dir = opendir(KVP_NET_DIR);
+ if (dir == NULL)
+ return NULL;
+
+ while ((entry = readdir(dir)) != NULL) {
+ /*
+ * Set the state for the next pass.
+ */
+ snprintf(dev_id, sizeof(dev_id), "%s%s/device/device_id",
+ KVP_NET_DIR, entry->d_name);
+
+ file = fopen(dev_id, "r");
+ if (file == NULL)
+ continue;
+
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ x = strchr(p, '\n');
+ if (x)
+ *x = '\0';
+
+ if (!strcmp(p, guid)) {
+ /*
+ * Found the guid match; return the interface
+ * name. The caller will free the memory.
+ */
+ if_name = strdup(entry->d_name);
+ fclose(file);
+ break;
+ }
+ }
+ fclose(file);
+ }
+
+ closedir(dir);
+ return if_name;
+}
+
+/*
+ * Retrieve the MAC address given the interface name.
+ */
+
+static char *kvp_if_name_to_mac(char *if_name)
+{
+ FILE *file;
+ char *p, *x;
+ char buf[256];
+ char addr_file[PATH_MAX];
+ unsigned int i;
+ char *mac_addr = NULL;
+
+ snprintf(addr_file, sizeof(addr_file), "%s%s%s", KVP_NET_DIR,
+ if_name, "/address");
+
+ file = fopen(addr_file, "r");
+ if (file == NULL)
+ return NULL;
+
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ x = strchr(p, '\n');
+ if (x)
+ *x = '\0';
+ for (i = 0; i < strlen(p); i++)
+ p[i] = toupper(p[i]);
+ mac_addr = strdup(p);
+ }
+
+ fclose(file);
+ return mac_addr;
+}
+
+static void kvp_process_ipconfig_file(char *cmd,
+ char *config_buf, unsigned int len,
+ int element_size, int offset)
+{
+ char buf[256];
+ char *p;
+ char *x;
+ FILE *file;
+
+ /*
+ * First execute the command.
+ */
+ file = popen(cmd, "r");
+ if (file == NULL)
+ return;
+
+ if (offset == 0)
+ memset(config_buf, 0, len);
+ while ((p = fgets(buf, sizeof(buf), file)) != NULL) {
+ if (len < strlen(config_buf) + element_size + 1)
+ break;
+
+ x = strchr(p, '\n');
+ if (x)
+ *x = '\0';
+
+ strcat(config_buf, p);
+ strcat(config_buf, ";");
+ }
+ pclose(file);
+}
+
+static void kvp_get_ipconfig_info(char *if_name,
+ struct hv_kvp_ipaddr_value *buffer)
+{
+ char cmd[512];
+ char dhcp_info[128];
+ char *p;
+ FILE *file;
+
+ /*
+ * Get the address of default gateway (ipv4).
+ */
+ sprintf(cmd, "%s %s", "ip route show dev", if_name);
+ strcat(cmd, " | awk '/default/ {print $3 }'");
+
+ /*
+ * Execute the command to gather gateway info.
+ */
+ kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
+ (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0);
+
+ /*
+ * Get the address of default gateway (ipv6).
+ */
+ sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name);
+ strcat(cmd, " | awk '/default/ {print $3 }'");
+
+ /*
+ * Execute the command to gather gateway info (ipv6).
+ */
+ kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
+ (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1);
+
+
+ /*
+ * Gather the DNS state.
+ * Since there is no standard way to get this information
+ * across various distributions of interest; we just invoke
+ * an external script that needs to be ported across distros
+ * of interest.
+ *
+ * Following is the expected format of the information from the script:
+ *
+ * ipaddr1 (nameserver1)
+ * ipaddr2 (nameserver2)
+ * .
+ * .
+ */
+
+ sprintf(cmd, KVP_SCRIPTS_PATH "%s", "hv_get_dns_info");
+
+ /*
+ * Execute the command to gather DNS info.
+ */
+ kvp_process_ipconfig_file(cmd, (char *)buffer->dns_addr,
+ (MAX_IP_ADDR_SIZE * 2), INET_ADDRSTRLEN, 0);
+
+ /*
+ * Gather the DHCP state.
+ * We will gather this state by invoking an external script.
+ * The parameter to the script is the interface name.
+ * Here is the expected output:
+ *
+ * Enabled: DHCP enabled.
+ */
+
+ sprintf(cmd, KVP_SCRIPTS_PATH "%s %s", "hv_get_dhcp_info", if_name);
+
+ file = popen(cmd, "r");
+ if (file == NULL)
+ return;
+
+ p = fgets(dhcp_info, sizeof(dhcp_info), file);
+ if (p == NULL) {
+ pclose(file);
+ return;
+ }
+
+ if (!strncmp(p, "Enabled", 7))
+ buffer->dhcp_enabled = 1;
+ else
+ buffer->dhcp_enabled = 0;
+
+ pclose(file);
+}
+
+
+static unsigned int hweight32(unsigned int *w)
+{
+ unsigned int res = *w - ((*w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res + (res >> 4)) & 0x0F0F0F0F;
+ res = res + (res >> 8);
+ return (res + (res >> 16)) & 0x000000FF;
+}
+
+static int kvp_process_ip_address(void *addrp,
+ int family, char *buffer,
+ int length, int *offset)
+{
+ struct sockaddr_in *addr;
+ struct sockaddr_in6 *addr6;
+ int addr_length;
+ char tmp[50];
+ const char *str;
+
+ if (family == AF_INET) {
+ addr = (struct sockaddr_in *)addrp;
+ str = inet_ntop(family, &addr->sin_addr, tmp, 50);
+ addr_length = INET_ADDRSTRLEN;
+ } else {
+ addr6 = (struct sockaddr_in6 *)addrp;
+ str = inet_ntop(family, &addr6->sin6_addr.s6_addr, tmp, 50);
+ addr_length = INET6_ADDRSTRLEN;
+ }
+
+ if ((length - *offset) < addr_length + 2)
+ return HV_E_FAIL;
+ if (str == NULL) {
+ strcpy(buffer, "inet_ntop failed\n");
+ return HV_E_FAIL;
+ }
+ if (*offset == 0)
+ strcpy(buffer, tmp);
+ else {
+ strcat(buffer, ";");
+ strcat(buffer, tmp);
+ }
+
+ *offset += strlen(str) + 1;
+
+ return 0;
+}
+
+static int
+kvp_get_ip_info(int family, char *if_name, int op,
+ void *out_buffer, unsigned int length)
+{
+ struct ifaddrs *ifap;
+ struct ifaddrs *curp;
+ int offset = 0;
+ int sn_offset = 0;
+ int error = 0;
+ char *buffer;
+ struct hv_kvp_ipaddr_value *ip_buffer = NULL;
+ char cidr_mask[5]; /* /xyz */
+ int weight;
+ int i;
+ unsigned int *w;
+ char *sn_str;
+ struct sockaddr_in6 *addr6;
+
+ if (op == KVP_OP_ENUMERATE) {
+ buffer = out_buffer;
+ } else {
+ ip_buffer = out_buffer;
+ buffer = (char *)ip_buffer->ip_addr;
+ ip_buffer->addr_family = 0;
+ }
+ /*
+ * On entry into this function, the buffer is capable of holding the
+ * maximum key value.
+ */
+
+ if (getifaddrs(&ifap)) {
+ strcpy(buffer, "getifaddrs failed\n");
+ return HV_E_FAIL;
+ }
+
+ curp = ifap;
+ while (curp != NULL) {
+ if (curp->ifa_addr == NULL) {
+ curp = curp->ifa_next;
+ continue;
+ }
+
+ if ((if_name != NULL) &&
+ (strncmp(curp->ifa_name, if_name, strlen(if_name)))) {
+ /*
+ * We want info about a specific interface;
+ * just continue.
+ */
+ curp = curp->ifa_next;
+ continue;
+ }
+
+ /*
+ * We only support two address families: AF_INET and AF_INET6.
+ * If a family value of 0 is specified, we collect both
+ * supported address families; if not we gather info on
+ * the specified address family.
+ */
+ if ((((family != 0) &&
+ (curp->ifa_addr->sa_family != family))) ||
+ (curp->ifa_flags & IFF_LOOPBACK)) {
+ curp = curp->ifa_next;
+ continue;
+ }
+ if ((curp->ifa_addr->sa_family != AF_INET) &&
+ (curp->ifa_addr->sa_family != AF_INET6)) {
+ curp = curp->ifa_next;
+ continue;
+ }
+
+ if (op == KVP_OP_GET_IP_INFO) {
+ /*
+ * Gather info other than the IP address.
+ * IP address info will be gathered later.
+ */
+ if (curp->ifa_addr->sa_family == AF_INET) {
+ ip_buffer->addr_family |= ADDR_FAMILY_IPV4;
+ /*
+ * Get subnet info.
+ */
+ error = kvp_process_ip_address(
+ curp->ifa_netmask,
+ AF_INET,
+ (char *)
+ ip_buffer->sub_net,
+ length,
+ &sn_offset);
+ if (error)
+ goto gather_ipaddr;
+ } else {
+ ip_buffer->addr_family |= ADDR_FAMILY_IPV6;
+
+ /*
+ * Get subnet info in CIDR format.
+ */
+ weight = 0;
+ sn_str = (char *)ip_buffer->sub_net;
+ addr6 = (struct sockaddr_in6 *)
+ curp->ifa_netmask;
+ w = addr6->sin6_addr.s6_addr32;
+
+ for (i = 0; i < 4; i++)
+ weight += hweight32(&w[i]);
+
+ sprintf(cidr_mask, "/%d", weight);
+ if (length < sn_offset + strlen(cidr_mask) + 1)
+ goto gather_ipaddr;
+
+ if (sn_offset == 0)
+ strcpy(sn_str, cidr_mask);
+ else {
+ strcat((char *)ip_buffer->sub_net, ";");
+ strcat(sn_str, cidr_mask);
+ }
+ sn_offset += strlen(sn_str) + 1;
+ }
+
+ /*
+ * Collect other ip related configuration info.
+ */
+
+ kvp_get_ipconfig_info(if_name, ip_buffer);
+ }
+
+gather_ipaddr:
+ error = kvp_process_ip_address(curp->ifa_addr,
+ curp->ifa_addr->sa_family,
+ buffer,
+ length, &offset);
+ if (error)
+ goto getaddr_done;
+
+ curp = curp->ifa_next;
+ }
+
+getaddr_done:
+ freeifaddrs(ifap);
+ return error;
+}
+
+/*
+ * Retrieve the IP given the MAC address.
+ */
+static int kvp_mac_to_ip(struct hv_kvp_ipaddr_value *kvp_ip_val)
+{
+ char *mac = (char *)kvp_ip_val->adapter_id;
+ DIR *dir;
+ struct dirent *entry;
+ FILE *file;
+ char *p, *x;
+ char *if_name = NULL;
+ char buf[256];
+ char dev_id[PATH_MAX];
+ unsigned int i;
+ int error = HV_E_FAIL;
+
+ dir = opendir(KVP_NET_DIR);
+ if (dir == NULL)
+ return HV_E_FAIL;
+
+ while ((entry = readdir(dir)) != NULL) {
+ /*
+ * Set the state for the next pass.
+ */
+ snprintf(dev_id, sizeof(dev_id), "%s%s/address", KVP_NET_DIR,
+ entry->d_name);
+
+ file = fopen(dev_id, "r");
+ if (file == NULL)
+ continue;
+
+ p = fgets(buf, sizeof(buf), file);
+ fclose(file);
+ if (!p)
+ continue;
+
+ x = strchr(p, '\n');
+ if (x)
+ *x = '\0';
+
+ for (i = 0; i < strlen(p); i++)
+ p[i] = toupper(p[i]);
+
+ if (strcmp(p, mac))
+ continue;
+
+ /*
+ * Found the MAC match.
+ * A NIC (e.g. VF) matching the MAC, but without IP, is skipped.
+ */
+ if_name = entry->d_name;
+ if (!if_name)
+ continue;
+
+ error = kvp_get_ip_info(0, if_name, KVP_OP_GET_IP_INFO,
+ kvp_ip_val, MAX_IP_ADDR_SIZE * 2);
+
+ if (!error && strlen((char *)kvp_ip_val->ip_addr))
+ break;
+ }
+
+ closedir(dir);
+ return error;
+}
+
+static int expand_ipv6(char *addr, int type)
+{
+ int ret;
+ struct in6_addr v6_addr;
+
+ ret = inet_pton(AF_INET6, addr, &v6_addr);
+
+ if (ret != 1) {
+ if (type == NETMASK)
+ return 1;
+ return 0;
+ }
+
+ sprintf(addr, "%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:"
+ "%02x%02x:%02x%02x:%02x%02x",
+ (int)v6_addr.s6_addr[0], (int)v6_addr.s6_addr[1],
+ (int)v6_addr.s6_addr[2], (int)v6_addr.s6_addr[3],
+ (int)v6_addr.s6_addr[4], (int)v6_addr.s6_addr[5],
+ (int)v6_addr.s6_addr[6], (int)v6_addr.s6_addr[7],
+ (int)v6_addr.s6_addr[8], (int)v6_addr.s6_addr[9],
+ (int)v6_addr.s6_addr[10], (int)v6_addr.s6_addr[11],
+ (int)v6_addr.s6_addr[12], (int)v6_addr.s6_addr[13],
+ (int)v6_addr.s6_addr[14], (int)v6_addr.s6_addr[15]);
+
+ return 1;
+
+}
+
+static int is_ipv4(char *addr)
+{
+ int ret;
+ struct in_addr ipv4_addr;
+
+ ret = inet_pton(AF_INET, addr, &ipv4_addr);
+
+ if (ret == 1)
+ return 1;
+ return 0;
+}
+
+static int parse_ip_val_buffer(char *in_buf, int *offset,
+ char *out_buf, int out_len)
+{
+ char *x;
+ char *start;
+
+ /*
+ * in_buf has sequence of characters that are seperated by
+ * the character ';'. The last sequence does not have the
+ * terminating ";" character.
+ */
+ start = in_buf + *offset;
+
+ x = strchr(start, ';');
+ if (x)
+ *x = 0;
+ else
+ x = start + strlen(start);
+
+ if (strlen(start) != 0) {
+ int i = 0;
+ /*
+ * Get rid of leading spaces.
+ */
+ while (start[i] == ' ')
+ i++;
+
+ if ((x - start) <= out_len) {
+ strcpy(out_buf, (start + i));
+ *offset += (x - start) + 1;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int kvp_write_file(FILE *f, char *s1, char *s2, char *s3)
+{
+ int ret;
+
+ ret = fprintf(f, "%s%s%s%s\n", s1, s2, "=", s3);
+
+ if (ret < 0)
+ return HV_E_FAIL;
+
+ return 0;
+}
+
+
+static int process_ip_string(FILE *f, char *ip_string, int type)
+{
+ int error = 0;
+ char addr[INET6_ADDRSTRLEN];
+ int i = 0;
+ int j = 0;
+ char str[256];
+ char sub_str[13];
+ int offset = 0;
+
+ memset(addr, 0, sizeof(addr));
+
+ while (parse_ip_val_buffer(ip_string, &offset, addr,
+ (MAX_IP_ADDR_SIZE * 2))) {
+
+ sub_str[0] = 0;
+ if (is_ipv4(addr)) {
+ switch (type) {
+ case IPADDR:
+ snprintf(str, sizeof(str), "%s", "IPADDR");
+ break;
+ case NETMASK:
+ snprintf(str, sizeof(str), "%s", "NETMASK");
+ break;
+ case GATEWAY:
+ snprintf(str, sizeof(str), "%s", "GATEWAY");
+ break;
+ case DNS:
+ snprintf(str, sizeof(str), "%s", "DNS");
+ break;
+ }
+
+ if (type == DNS) {
+ snprintf(sub_str, sizeof(sub_str), "%d", ++i);
+ } else if (type == GATEWAY && i == 0) {
+ ++i;
+ } else {
+ snprintf(sub_str, sizeof(sub_str), "%d", i++);
+ }
+
+
+ } else if (expand_ipv6(addr, type)) {
+ switch (type) {
+ case IPADDR:
+ snprintf(str, sizeof(str), "%s", "IPV6ADDR");
+ break;
+ case NETMASK:
+ snprintf(str, sizeof(str), "%s", "IPV6NETMASK");
+ break;
+ case GATEWAY:
+ snprintf(str, sizeof(str), "%s",
+ "IPV6_DEFAULTGW");
+ break;
+ case DNS:
+ snprintf(str, sizeof(str), "%s", "DNS");
+ break;
+ }
+
+ if (type == DNS) {
+ snprintf(sub_str, sizeof(sub_str), "%d", ++i);
+ } else if (j == 0) {
+ ++j;
+ } else {
+ snprintf(sub_str, sizeof(sub_str), "_%d", j++);
+ }
+ } else {
+ return HV_INVALIDARG;
+ }
+
+ error = kvp_write_file(f, str, sub_str, addr);
+ if (error)
+ return error;
+ memset(addr, 0, sizeof(addr));
+ }
+
+ return 0;
+}
+
+static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
+{
+ int error = 0;
+ char if_file[PATH_MAX];
+ FILE *file;
+ char cmd[PATH_MAX];
+ char *mac_addr;
+ int str_len;
+
+ /*
+ * Set the configuration for the specified interface with
+ * the information provided. Since there is no standard
+ * way to configure an interface, we will have an external
+ * script that does the job of configuring the interface and
+ * flushing the configuration.
+ *
+ * The parameters passed to this external script are:
+ * 1. A configuration file that has the specified configuration.
+ *
+ * We will embed the name of the interface in the configuration
+ * file: ifcfg-ethx (where ethx is the interface name).
+ *
+ * The information provided here may be more than what is needed
+ * in a given distro to configure the interface and so are free
+ * ignore information that may not be relevant.
+ *
+ * Here is the format of the ip configuration file:
+ *
+ * HWADDR=macaddr
+ * DEVICE=interface name
+ * BOOTPROTO=<protocol> (where <protocol> is "dhcp" if DHCP is configured
+ * or "none" if no boot-time protocol should be used)
+ *
+ * IPADDR0=ipaddr1
+ * IPADDR1=ipaddr2
+ * IPADDRx=ipaddry (where y = x + 1)
+ *
+ * NETMASK0=netmask1
+ * NETMASKx=netmasky (where y = x + 1)
+ *
+ * GATEWAY=ipaddr1
+ * GATEWAYx=ipaddry (where y = x + 1)
+ *
+ * DNSx=ipaddrx (where first DNS address is tagged as DNS1 etc)
+ *
+ * IPV6 addresses will be tagged as IPV6ADDR, IPV6 gateway will be
+ * tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as
+ * IPV6NETMASK.
+ *
+ * The host can specify multiple ipv4 and ipv6 addresses to be
+ * configured for the interface. Furthermore, the configuration
+ * needs to be persistent. A subsequent GET call on the interface
+ * is expected to return the configuration that is set via the SET
+ * call.
+ */
+
+ snprintf(if_file, sizeof(if_file), "%s%s%s", KVP_CONFIG_LOC,
+ "/ifcfg-", if_name);
+
+ file = fopen(if_file, "w");
+
+ if (file == NULL) {
+ syslog(LOG_ERR, "Failed to open config file; error: %d %s",
+ errno, strerror(errno));
+ return HV_E_FAIL;
+ }
+
+ /*
+ * First write out the MAC address.
+ */
+
+ mac_addr = kvp_if_name_to_mac(if_name);
+ if (mac_addr == NULL) {
+ error = HV_E_FAIL;
+ goto setval_error;
+ }
+
+ error = kvp_write_file(file, "HWADDR", "", mac_addr);
+ free(mac_addr);
+ if (error)
+ goto setval_error;
+
+ error = kvp_write_file(file, "DEVICE", "", if_name);
+ if (error)
+ goto setval_error;
+
+ /*
+ * The dhcp_enabled flag is only for IPv4. In the case the host only
+ * injects an IPv6 address, the flag is true, but we still need to
+ * proceed to parse and pass the IPv6 information to the
+ * disto-specific script hv_set_ifconfig.
+ */
+ if (new_val->dhcp_enabled) {
+ error = kvp_write_file(file, "BOOTPROTO", "", "dhcp");
+ if (error)
+ goto setval_error;
+
+ } else {
+ error = kvp_write_file(file, "BOOTPROTO", "", "none");
+ if (error)
+ goto setval_error;
+ }
+
+ /*
+ * Write the configuration for ipaddress, netmask, gateway and
+ * name servers.
+ */
+
+ error = process_ip_string(file, (char *)new_val->ip_addr, IPADDR);
+ if (error)
+ goto setval_error;
+
+ error = process_ip_string(file, (char *)new_val->sub_net, NETMASK);
+ if (error)
+ goto setval_error;
+
+ error = process_ip_string(file, (char *)new_val->gate_way, GATEWAY);
+ if (error)
+ goto setval_error;
+
+ error = process_ip_string(file, (char *)new_val->dns_addr, DNS);
+ if (error)
+ goto setval_error;
+
+ fclose(file);
+
+ /*
+ * Now that we have populated the configuration file,
+ * invoke the external script to do its magic.
+ */
+
+ str_len = snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s",
+ "hv_set_ifconfig", if_file);
+ /*
+ * This is a little overcautious, but it's necessary to suppress some
+ * false warnings from gcc 8.0.1.
+ */
+ if (str_len <= 0 || (unsigned int)str_len >= sizeof(cmd)) {
+ syslog(LOG_ERR, "Cmd '%s' (len=%d) may be too long",
+ cmd, str_len);
+ return HV_E_FAIL;
+ }
+
+ if (system(cmd)) {
+ syslog(LOG_ERR, "Failed to execute cmd '%s'; error: %d %s",
+ cmd, errno, strerror(errno));
+ return HV_E_FAIL;
+ }
+ return 0;
+
+setval_error:
+ syslog(LOG_ERR, "Failed to write config file");
+ fclose(file);
+ return error;
+}
+
+
+static void
+kvp_get_domain_name(char *buffer, int length)
+{
+ struct addrinfo hints, *info ;
+ int error = 0;
+
+ gethostname(buffer, length);
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_INET; /*Get only ipv4 addrinfo. */
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_CANONNAME;
+
+ error = getaddrinfo(buffer, NULL, &hints, &info);
+ if (error != 0) {
+ snprintf(buffer, length, "getaddrinfo failed: 0x%x %s",
+ error, gai_strerror(error));
+ return;
+ }
+ snprintf(buffer, length, "%s", info->ai_canonname);
+ freeaddrinfo(info);
+}
+
+void print_usage(char *argv[])
+{
+ fprintf(stderr, "Usage: %s [options]\n"
+ "Options are:\n"
+ " -n, --no-daemon stay in foreground, don't daemonize\n"
+ " -h, --help print this help\n", argv[0]);
+}
+
+int main(int argc, char *argv[])
+{
+ int kvp_fd, len;
+ int error;
+ struct pollfd pfd;
+ char *p;
+ struct hv_kvp_msg hv_msg[1];
+ char *key_value;
+ char *key_name;
+ int op;
+ int pool;
+ char *if_name;
+ struct hv_kvp_ipaddr_value *kvp_ip_val;
+ int daemonize = 1, long_index = 0, opt;
+
+ static struct option long_options[] = {
+ {"help", no_argument, 0, 'h' },
+ {"no-daemon", no_argument, 0, 'n' },
+ {0, 0, 0, 0 }
+ };
+
+ while ((opt = getopt_long(argc, argv, "hn", long_options,
+ &long_index)) != -1) {
+ switch (opt) {
+ case 'n':
+ daemonize = 0;
+ break;
+ case 'h':
+ print_usage(argv);
+ exit(0);
+ default:
+ print_usage(argv);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if (daemonize && daemon(1, 0))
+ return 1;
+
+ openlog("KVP", 0, LOG_USER);
+ syslog(LOG_INFO, "KVP starting; pid is:%d", getpid());
+
+ kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR | O_CLOEXEC);
+
+ if (kvp_fd < 0) {
+ syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s",
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Retrieve OS release information.
+ */
+ kvp_get_os_info();
+ /*
+ * Cache Fully Qualified Domain Name because getaddrinfo takes an
+ * unpredictable amount of time to finish.
+ */
+ kvp_get_domain_name(full_domain_name, sizeof(full_domain_name));
+
+ if (kvp_file_init()) {
+ syslog(LOG_ERR, "Failed to initialize the pools");
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Register ourselves with the kernel.
+ */
+ hv_msg->kvp_hdr.operation = KVP_OP_REGISTER1;
+ len = write(kvp_fd, hv_msg, sizeof(struct hv_kvp_msg));
+ if (len != sizeof(struct hv_kvp_msg)) {
+ syslog(LOG_ERR, "registration to kernel failed; error: %d %s",
+ errno, strerror(errno));
+ close(kvp_fd);
+ exit(EXIT_FAILURE);
+ }
+
+ pfd.fd = kvp_fd;
+
+ while (1) {
+ pfd.events = POLLIN;
+ pfd.revents = 0;
+
+ if (poll(&pfd, 1, -1) < 0) {
+ syslog(LOG_ERR, "poll failed; error: %d %s", errno, strerror(errno));
+ if (errno == EINVAL) {
+ close(kvp_fd);
+ exit(EXIT_FAILURE);
+ }
+ else
+ continue;
+ }
+
+ len = read(kvp_fd, hv_msg, sizeof(struct hv_kvp_msg));
+
+ if (len != sizeof(struct hv_kvp_msg)) {
+ syslog(LOG_ERR, "read failed; error:%d %s",
+ errno, strerror(errno));
+
+ close(kvp_fd);
+ return EXIT_FAILURE;
+ }
+
+ /*
+ * We will use the KVP header information to pass back
+ * the error from this daemon. So, first copy the state
+ * and set the error code to success.
+ */
+ op = hv_msg->kvp_hdr.operation;
+ pool = hv_msg->kvp_hdr.pool;
+ hv_msg->error = HV_S_OK;
+
+ if ((in_hand_shake) && (op == KVP_OP_REGISTER1)) {
+ /*
+ * Driver is registering with us; stash away the version
+ * information.
+ */
+ in_hand_shake = 0;
+ p = (char *)hv_msg->body.kvp_register.version;
+ lic_version = malloc(strlen(p) + 1);
+ if (lic_version) {
+ strcpy(lic_version, p);
+ syslog(LOG_INFO, "KVP LIC Version: %s",
+ lic_version);
+ } else {
+ syslog(LOG_ERR, "malloc failed");
+ }
+ continue;
+ }
+
+ switch (op) {
+ case KVP_OP_GET_IP_INFO:
+ kvp_ip_val = &hv_msg->body.kvp_ip_val;
+
+ error = kvp_mac_to_ip(kvp_ip_val);
+
+ if (error)
+ hv_msg->error = error;
+
+ break;
+
+ case KVP_OP_SET_IP_INFO:
+ kvp_ip_val = &hv_msg->body.kvp_ip_val;
+ if_name = kvp_get_if_name(
+ (char *)kvp_ip_val->adapter_id);
+ if (if_name == NULL) {
+ /*
+ * We could not map the guid to an
+ * interface name; return error.
+ */
+ hv_msg->error = HV_GUID_NOTFOUND;
+ break;
+ }
+ error = kvp_set_ip_info(if_name, kvp_ip_val);
+ if (error)
+ hv_msg->error = error;
+
+ free(if_name);
+ break;
+
+ case KVP_OP_SET:
+ if (kvp_key_add_or_modify(pool,
+ hv_msg->body.kvp_set.data.key,
+ hv_msg->body.kvp_set.data.key_size,
+ hv_msg->body.kvp_set.data.value,
+ hv_msg->body.kvp_set.data.value_size))
+ hv_msg->error = HV_S_CONT;
+ break;
+
+ case KVP_OP_GET:
+ if (kvp_get_value(pool,
+ hv_msg->body.kvp_set.data.key,
+ hv_msg->body.kvp_set.data.key_size,
+ hv_msg->body.kvp_set.data.value,
+ hv_msg->body.kvp_set.data.value_size))
+ hv_msg->error = HV_S_CONT;
+ break;
+
+ case KVP_OP_DELETE:
+ if (kvp_key_delete(pool,
+ hv_msg->body.kvp_delete.key,
+ hv_msg->body.kvp_delete.key_size))
+ hv_msg->error = HV_S_CONT;
+ break;
+
+ default:
+ break;
+ }
+
+ if (op != KVP_OP_ENUMERATE)
+ goto kvp_done;
+
+ /*
+ * If the pool is KVP_POOL_AUTO, dynamically generate
+ * both the key and the value; if not read from the
+ * appropriate pool.
+ */
+ if (pool != KVP_POOL_AUTO) {
+ if (kvp_pool_enumerate(pool,
+ hv_msg->body.kvp_enum_data.index,
+ hv_msg->body.kvp_enum_data.data.key,
+ HV_KVP_EXCHANGE_MAX_KEY_SIZE,
+ hv_msg->body.kvp_enum_data.data.value,
+ HV_KVP_EXCHANGE_MAX_VALUE_SIZE))
+ hv_msg->error = HV_S_CONT;
+ goto kvp_done;
+ }
+
+ key_name = (char *)hv_msg->body.kvp_enum_data.data.key;
+ key_value = (char *)hv_msg->body.kvp_enum_data.data.value;
+
+ switch (hv_msg->body.kvp_enum_data.index) {
+ case FullyQualifiedDomainName:
+ strcpy(key_value, full_domain_name);
+ strcpy(key_name, "FullyQualifiedDomainName");
+ break;
+ case IntegrationServicesVersion:
+ strcpy(key_name, "IntegrationServicesVersion");
+ strcpy(key_value, lic_version);
+ break;
+ case NetworkAddressIPv4:
+ kvp_get_ip_info(AF_INET, NULL, KVP_OP_ENUMERATE,
+ key_value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
+ strcpy(key_name, "NetworkAddressIPv4");
+ break;
+ case NetworkAddressIPv6:
+ kvp_get_ip_info(AF_INET6, NULL, KVP_OP_ENUMERATE,
+ key_value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
+ strcpy(key_name, "NetworkAddressIPv6");
+ break;
+ case OSBuildNumber:
+ strcpy(key_value, os_build);
+ strcpy(key_name, "OSBuildNumber");
+ break;
+ case OSName:
+ strcpy(key_value, os_name);
+ strcpy(key_name, "OSName");
+ break;
+ case OSMajorVersion:
+ strcpy(key_value, os_major);
+ strcpy(key_name, "OSMajorVersion");
+ break;
+ case OSMinorVersion:
+ strcpy(key_value, os_minor);
+ strcpy(key_name, "OSMinorVersion");
+ break;
+ case OSVersion:
+ strcpy(key_value, os_version);
+ strcpy(key_name, "OSVersion");
+ break;
+ case ProcessorArchitecture:
+ strcpy(key_value, processor_arch);
+ strcpy(key_name, "ProcessorArchitecture");
+ break;
+ default:
+ hv_msg->error = HV_S_CONT;
+ break;
+ }
+
+ /* Send the value back to the kernel. */
+kvp_done:
+ len = write(kvp_fd, hv_msg, sizeof(struct hv_kvp_msg));
+ if (len != sizeof(struct hv_kvp_msg)) {
+ syslog(LOG_ERR, "write failed; error: %d %s", errno,
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ close(kvp_fd);
+ exit(0);
+}
diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh
new file mode 100755
index 000000000..7ed9f85ef
--- /dev/null
+++ b/tools/hv/hv_set_ifconfig.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This example script activates an interface based on the specified
+# configuration.
+#
+# In the interest of keeping the KVP daemon code free of distro specific
+# information; the kvp daemon code invokes this external script to configure
+# the interface.
+#
+# The only argument to this script is the configuration file that is to
+# be used to configure the interface.
+#
+# Each Distro is expected to implement this script in a distro specific
+# fashion. For instance on Distros that ship with Network Manager enabled,
+# this script can be based on the Network Manager APIs for configuring the
+# interface.
+#
+# This example script is based on a RHEL environment.
+#
+# Here is the format of the ip configuration file:
+#
+# HWADDR=macaddr
+# DEVICE=interface name
+# BOOTPROTO=<protocol> (where <protocol> is "dhcp" if DHCP is configured
+# or "none" if no boot-time protocol should be used)
+#
+# IPADDR0=ipaddr1
+# IPADDR1=ipaddr2
+# IPADDRx=ipaddry (where y = x + 1)
+#
+# NETMASK0=netmask1
+# NETMASKx=netmasky (where y = x + 1)
+#
+# GATEWAY=ipaddr1
+# GATEWAYx=ipaddry (where y = x + 1)
+#
+# DNSx=ipaddrx (where first DNS address is tagged as DNS1 etc)
+#
+# IPV6 addresses will be tagged as IPV6ADDR, IPV6 gateway will be
+# tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as
+# IPV6NETMASK.
+#
+# The host can specify multiple ipv4 and ipv6 addresses to be
+# configured for the interface. Furthermore, the configuration
+# needs to be persistent. A subsequent GET call on the interface
+# is expected to return the configuration that is set via the SET
+# call.
+#
+
+
+
+echo "IPV6INIT=yes" >> $1
+echo "NM_CONTROLLED=no" >> $1
+echo "PEERDNS=yes" >> $1
+echo "ONBOOT=yes" >> $1
+
+
+cp $1 /etc/sysconfig/network-scripts/
+
+
+interface=$(echo $1 | awk -F - '{ print $2 }')
+
+/sbin/ifdown $interface 2>/dev/null
+/sbin/ifup $interface 2>/dev/null
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
new file mode 100644
index 000000000..c2bb8a360
--- /dev/null
+++ b/tools/hv/hv_vss_daemon.c
@@ -0,0 +1,339 @@
+/*
+ * An implementation of the host initiated guest snapshot for Hyper-V.
+ *
+ *
+ * Copyright (C) 2013, Microsoft, Inc.
+ * Author : K. Y. Srinivasan <kys@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ */
+
+
+#include <sys/types.h>
+#include <sys/poll.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <mntent.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <linux/fs.h>
+#include <linux/major.h>
+#include <linux/hyperv.h>
+#include <syslog.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <dirent.h>
+
+/* Don't use syslog() in the function since that can cause write to disk */
+static int vss_do_freeze(char *dir, unsigned int cmd)
+{
+ int ret, fd = open(dir, O_RDONLY);
+
+ if (fd < 0)
+ return 1;
+
+ ret = ioctl(fd, cmd, 0);
+
+ /*
+ * If a partition is mounted more than once, only the first
+ * FREEZE/THAW can succeed and the later ones will get
+ * EBUSY/EINVAL respectively: there could be 2 cases:
+ * 1) a user may mount the same partition to differnt directories
+ * by mistake or on purpose;
+ * 2) The subvolume of btrfs appears to have the same partition
+ * mounted more than once.
+ */
+ if (ret) {
+ if ((cmd == FIFREEZE && errno == EBUSY) ||
+ (cmd == FITHAW && errno == EINVAL)) {
+ close(fd);
+ return 0;
+ }
+ }
+
+ close(fd);
+ return !!ret;
+}
+
+static bool is_dev_loop(const char *blkname)
+{
+ char *buffer;
+ DIR *dir;
+ struct dirent *entry;
+ bool ret = false;
+
+ buffer = malloc(PATH_MAX);
+ if (!buffer) {
+ syslog(LOG_ERR, "Can't allocate memory!");
+ exit(1);
+ }
+
+ snprintf(buffer, PATH_MAX, "%s/loop", blkname);
+ if (!access(buffer, R_OK | X_OK)) {
+ ret = true;
+ goto free_buffer;
+ } else if (errno != ENOENT) {
+ syslog(LOG_ERR, "Can't access: %s; error:%d %s!",
+ buffer, errno, strerror(errno));
+ }
+
+ snprintf(buffer, PATH_MAX, "%s/slaves", blkname);
+ dir = opendir(buffer);
+ if (!dir) {
+ if (errno != ENOENT)
+ syslog(LOG_ERR, "Can't opendir: %s; error:%d %s!",
+ buffer, errno, strerror(errno));
+ goto free_buffer;
+ }
+
+ while ((entry = readdir(dir)) != NULL) {
+ if (strcmp(entry->d_name, ".") == 0 ||
+ strcmp(entry->d_name, "..") == 0)
+ continue;
+
+ snprintf(buffer, PATH_MAX, "%s/slaves/%s", blkname,
+ entry->d_name);
+ if (is_dev_loop(buffer)) {
+ ret = true;
+ break;
+ }
+ }
+ closedir(dir);
+free_buffer:
+ free(buffer);
+ return ret;
+}
+
+static int vss_operate(int operation)
+{
+ char match[] = "/dev/";
+ FILE *mounts;
+ struct mntent *ent;
+ struct stat sb;
+ char errdir[1024] = {0};
+ char blkdir[23]; /* /sys/dev/block/XXX:XXX */
+ unsigned int cmd;
+ int error = 0, root_seen = 0, save_errno = 0;
+
+ switch (operation) {
+ case VSS_OP_FREEZE:
+ cmd = FIFREEZE;
+ break;
+ case VSS_OP_THAW:
+ cmd = FITHAW;
+ break;
+ default:
+ return -1;
+ }
+
+ mounts = setmntent("/proc/mounts", "r");
+ if (mounts == NULL)
+ return -1;
+
+ while ((ent = getmntent(mounts))) {
+ if (strncmp(ent->mnt_fsname, match, strlen(match)))
+ continue;
+ if (stat(ent->mnt_fsname, &sb)) {
+ syslog(LOG_ERR, "Can't stat: %s; error:%d %s!",
+ ent->mnt_fsname, errno, strerror(errno));
+ } else {
+ sprintf(blkdir, "/sys/dev/block/%d:%d",
+ major(sb.st_rdev), minor(sb.st_rdev));
+ if (is_dev_loop(blkdir))
+ continue;
+ }
+ if (hasmntopt(ent, MNTOPT_RO) != NULL)
+ continue;
+ if (strcmp(ent->mnt_type, "vfat") == 0)
+ continue;
+ if (strcmp(ent->mnt_dir, "/") == 0) {
+ root_seen = 1;
+ continue;
+ }
+ error |= vss_do_freeze(ent->mnt_dir, cmd);
+ if (error && operation == VSS_OP_FREEZE)
+ goto err;
+ }
+
+ endmntent(mounts);
+
+ if (root_seen) {
+ error |= vss_do_freeze("/", cmd);
+ if (error && operation == VSS_OP_FREEZE)
+ goto err;
+ }
+
+ goto out;
+err:
+ save_errno = errno;
+ if (ent) {
+ strncpy(errdir, ent->mnt_dir, sizeof(errdir)-1);
+ endmntent(mounts);
+ }
+ vss_operate(VSS_OP_THAW);
+ /* Call syslog after we thaw all filesystems */
+ if (ent)
+ syslog(LOG_ERR, "FREEZE of %s failed; error:%d %s",
+ errdir, save_errno, strerror(save_errno));
+ else
+ syslog(LOG_ERR, "FREEZE of / failed; error:%d %s", save_errno,
+ strerror(save_errno));
+out:
+ return error;
+}
+
+void print_usage(char *argv[])
+{
+ fprintf(stderr, "Usage: %s [options]\n"
+ "Options are:\n"
+ " -n, --no-daemon stay in foreground, don't daemonize\n"
+ " -h, --help print this help\n", argv[0]);
+}
+
+int main(int argc, char *argv[])
+{
+ int vss_fd, len;
+ int error;
+ struct pollfd pfd;
+ int op;
+ struct hv_vss_msg vss_msg[1];
+ int daemonize = 1, long_index = 0, opt;
+ int in_handshake = 1;
+ __u32 kernel_modver;
+
+ static struct option long_options[] = {
+ {"help", no_argument, 0, 'h' },
+ {"no-daemon", no_argument, 0, 'n' },
+ {0, 0, 0, 0 }
+ };
+
+ while ((opt = getopt_long(argc, argv, "hn", long_options,
+ &long_index)) != -1) {
+ switch (opt) {
+ case 'n':
+ daemonize = 0;
+ break;
+ case 'h':
+ print_usage(argv);
+ exit(0);
+ default:
+ print_usage(argv);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if (daemonize && daemon(1, 0))
+ return 1;
+
+ openlog("Hyper-V VSS", 0, LOG_USER);
+ syslog(LOG_INFO, "VSS starting; pid is:%d", getpid());
+
+ vss_fd = open("/dev/vmbus/hv_vss", O_RDWR);
+ if (vss_fd < 0) {
+ syslog(LOG_ERR, "open /dev/vmbus/hv_vss failed; error: %d %s",
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ /*
+ * Register ourselves with the kernel.
+ */
+ vss_msg->vss_hdr.operation = VSS_OP_REGISTER1;
+
+ len = write(vss_fd, vss_msg, sizeof(struct hv_vss_msg));
+ if (len < 0) {
+ syslog(LOG_ERR, "registration to kernel failed; error: %d %s",
+ errno, strerror(errno));
+ close(vss_fd);
+ exit(EXIT_FAILURE);
+ }
+
+ pfd.fd = vss_fd;
+
+ while (1) {
+ pfd.events = POLLIN;
+ pfd.revents = 0;
+
+ if (poll(&pfd, 1, -1) < 0) {
+ syslog(LOG_ERR, "poll failed; error:%d %s", errno, strerror(errno));
+ if (errno == EINVAL) {
+ close(vss_fd);
+ exit(EXIT_FAILURE);
+ }
+ else
+ continue;
+ }
+
+ len = read(vss_fd, vss_msg, sizeof(struct hv_vss_msg));
+
+ if (in_handshake) {
+ if (len != sizeof(kernel_modver)) {
+ syslog(LOG_ERR, "invalid version negotiation");
+ exit(EXIT_FAILURE);
+ }
+ kernel_modver = *(__u32 *)vss_msg;
+ in_handshake = 0;
+ syslog(LOG_INFO, "VSS: kernel module version: %d",
+ kernel_modver);
+ continue;
+ }
+
+ if (len != sizeof(struct hv_vss_msg)) {
+ syslog(LOG_ERR, "read failed; error:%d %s",
+ errno, strerror(errno));
+ close(vss_fd);
+ return EXIT_FAILURE;
+ }
+
+ op = vss_msg->vss_hdr.operation;
+ error = HV_S_OK;
+
+ switch (op) {
+ case VSS_OP_FREEZE:
+ case VSS_OP_THAW:
+ error = vss_operate(op);
+ syslog(LOG_INFO, "VSS: op=%s: %s\n",
+ op == VSS_OP_FREEZE ? "FREEZE" : "THAW",
+ error ? "failed" : "succeeded");
+
+ if (error) {
+ error = HV_E_FAIL;
+ syslog(LOG_ERR, "op=%d failed!", op);
+ syslog(LOG_ERR, "report it with these files:");
+ syslog(LOG_ERR, "/etc/fstab and /proc/mounts");
+ }
+ break;
+ case VSS_OP_HOT_BACKUP:
+ syslog(LOG_INFO, "VSS: op=CHECK HOT BACKUP\n");
+ break;
+ default:
+ syslog(LOG_ERR, "Illegal op:%d\n", op);
+ }
+ vss_msg->error = error;
+ len = write(vss_fd, vss_msg, sizeof(struct hv_vss_msg));
+ if (len != sizeof(struct hv_vss_msg)) {
+ syslog(LOG_ERR, "write failed; error: %d %s", errno,
+ strerror(errno));
+
+ if (op == VSS_OP_FREEZE)
+ vss_operate(VSS_OP_THAW);
+ }
+ }
+
+ close(vss_fd);
+ exit(0);
+}
diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus
new file mode 100644
index 000000000..099f2c44d
--- /dev/null
+++ b/tools/hv/lsvmbus
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+from optparse import OptionParser
+
+help_msg = "print verbose messages. Try -vv, -vvv for more verbose messages"
+parser = OptionParser()
+parser.add_option(
+ "-v", "--verbose", dest="verbose", help=help_msg, action="count")
+
+(options, args) = parser.parse_args()
+
+verbose = 0
+if options.verbose is not None:
+ verbose = options.verbose
+
+vmbus_sys_path = '/sys/bus/vmbus/devices'
+if not os.path.isdir(vmbus_sys_path):
+ print("%s doesn't exist: exiting..." % vmbus_sys_path)
+ exit(-1)
+
+vmbus_dev_dict = {
+ '{0e0b6031-5213-4934-818b-38d90ced39db}': '[Operating system shutdown]',
+ '{9527e630-d0ae-497b-adce-e80ab0175caf}': '[Time Synchronization]',
+ '{57164f39-9115-4e78-ab55-382f3bd5422d}': '[Heartbeat]',
+ '{a9a0f4e7-5a45-4d96-b827-8a841e8c03e6}': '[Data Exchange]',
+ '{35fa2e29-ea23-4236-96ae-3a6ebacba440}': '[Backup (volume checkpoint)]',
+ '{34d14be3-dee4-41c8-9ae7-6b174977c192}': '[Guest services]',
+ '{525074dc-8985-46e2-8057-a307dc18a502}': '[Dynamic Memory]',
+ '{cfa8b69e-5b4a-4cc0-b98b-8ba1a1f3f95a}': 'Synthetic mouse',
+ '{f912ad6d-2b17-48ea-bd65-f927a61c7684}': 'Synthetic keyboard',
+ '{da0a7802-e377-4aac-8e77-0558eb1073f8}': 'Synthetic framebuffer adapter',
+ '{f8615163-df3e-46c5-913f-f2d2f965ed0e}': 'Synthetic network adapter',
+ '{32412632-86cb-44a2-9b5c-50d1417354f5}': 'Synthetic IDE Controller',
+ '{ba6163d9-04a1-4d29-b605-72e2ffb1dc7f}': 'Synthetic SCSI Controller',
+ '{2f9bcc4a-0069-4af3-b76b-6fd0be528cda}': 'Synthetic fiber channel adapter',
+ '{8c2eaf3d-32a7-4b09-ab99-bd1f1c86b501}': 'Synthetic RDMA adapter',
+ '{44c4f61d-4444-4400-9d52-802e27ede19f}': 'PCI Express pass-through',
+ '{276aacf4-ac15-426c-98dd-7521ad3f01fe}': '[Reserved system device]',
+ '{f8e65716-3cb3-4a06-9a60-1889c5cccab5}': '[Reserved system device]',
+ '{3375baf4-9e15-4b30-b765-67acb10d607b}': '[Reserved system device]',
+}
+
+
+def get_vmbus_dev_attr(dev_name, attr):
+ try:
+ f = open('%s/%s/%s' % (vmbus_sys_path, dev_name, attr), 'r')
+ lines = f.readlines()
+ f.close()
+ except IOError:
+ lines = []
+
+ return lines
+
+
+class VMBus_Dev:
+ pass
+
+
+vmbus_dev_list = []
+
+for f in os.listdir(vmbus_sys_path):
+ vmbus_id = get_vmbus_dev_attr(f, 'id')[0].strip()
+ class_id = get_vmbus_dev_attr(f, 'class_id')[0].strip()
+ device_id = get_vmbus_dev_attr(f, 'device_id')[0].strip()
+ dev_desc = vmbus_dev_dict.get(class_id, 'Unknown')
+
+ chn_vp_mapping = get_vmbus_dev_attr(f, 'channel_vp_mapping')
+ chn_vp_mapping = [c.strip() for c in chn_vp_mapping]
+ chn_vp_mapping = sorted(
+ chn_vp_mapping, key=lambda c: int(c.split(':')[0]))
+
+ chn_vp_mapping = [
+ '\tRel_ID=%s, target_cpu=%s' %
+ (c.split(':')[0], c.split(':')[1]) for c in chn_vp_mapping
+ ]
+ d = VMBus_Dev()
+ d.sysfs_path = '%s/%s' % (vmbus_sys_path, f)
+ d.vmbus_id = vmbus_id
+ d.class_id = class_id
+ d.device_id = device_id
+ d.dev_desc = dev_desc
+ d.chn_vp_mapping = '\n'.join(chn_vp_mapping)
+ if d.chn_vp_mapping:
+ d.chn_vp_mapping += '\n'
+
+ vmbus_dev_list.append(d)
+
+
+vmbus_dev_list = sorted(vmbus_dev_list, key=lambda d: int(d.vmbus_id))
+
+format0 = '%2s: %s'
+format1 = '%2s: Class_ID = %s - %s\n%s'
+format2 = '%2s: Class_ID = %s - %s\n\tDevice_ID = %s\n\tSysfs path: %s\n%s'
+
+for d in vmbus_dev_list:
+ if verbose == 0:
+ print(('VMBUS ID ' + format0) % (d.vmbus_id, d.dev_desc))
+ elif verbose == 1:
+ print(
+ ('VMBUS ID ' + format1) %
+ (d.vmbus_id, d.class_id, d.dev_desc, d.chn_vp_mapping)
+ )
+ else:
+ print(
+ ('VMBUS ID ' + format2) %
+ (
+ d.vmbus_id, d.class_id, d.dev_desc,
+ d.device_id, d.sysfs_path, d.chn_vp_mapping
+ )
+ )
diff --git a/tools/iio/Build b/tools/iio/Build
new file mode 100644
index 000000000..f74cbda64
--- /dev/null
+++ b/tools/iio/Build
@@ -0,0 +1,3 @@
+lsiio-y += lsiio.o iio_utils.o
+iio_event_monitor-y += iio_event_monitor.o iio_utils.o
+iio_generic_buffer-y += iio_generic_buffer.o iio_utils.o
diff --git a/tools/iio/Makefile b/tools/iio/Makefile
new file mode 100644
index 000000000..332ed2f6c
--- /dev/null
+++ b/tools/iio/Makefile
@@ -0,0 +1,66 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../scripts/Makefile.include
+
+bindir ?= /usr/bin
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := iio_event_monitor lsiio iio_generic_buffer
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
+
+all: $(ALL_PROGRAMS)
+
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+#
+# We need the following to be outside of kernel tree
+#
+$(OUTPUT)include/linux/iio: ../../include/uapi/linux/iio
+ mkdir -p $(OUTPUT)include/linux/iio 2>&1 || true
+ ln -sf $(CURDIR)/../../include/uapi/linux/iio/events.h $@
+ ln -sf $(CURDIR)/../../include/uapi/linux/iio/types.h $@
+
+prepare: $(OUTPUT)include/linux/iio
+
+LSIIO_IN := $(OUTPUT)lsiio-in.o
+$(LSIIO_IN): prepare FORCE
+ $(Q)$(MAKE) $(build)=lsiio
+$(OUTPUT)lsiio: $(LSIIO_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+IIO_EVENT_MONITOR_IN := $(OUTPUT)iio_event_monitor-in.o
+$(IIO_EVENT_MONITOR_IN): prepare FORCE
+ $(Q)$(MAKE) $(build)=iio_event_monitor
+$(OUTPUT)iio_event_monitor: $(IIO_EVENT_MONITOR_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+IIO_GENERIC_BUFFER_IN := $(OUTPUT)iio_generic_buffer-in.o
+$(IIO_GENERIC_BUFFER_IN): prepare FORCE
+ $(Q)$(MAKE) $(build)=iio_generic_buffer
+$(OUTPUT)iio_generic_buffer: $(IIO_GENERIC_BUFFER_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+clean:
+ rm -f $(ALL_PROGRAMS)
+ rm -rf $(OUTPUT)include/linux/iio
+ find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
+
+install: $(ALL_PROGRAMS)
+ install -d -m 755 $(DESTDIR)$(bindir); \
+ for program in $(ALL_PROGRAMS); do \
+ install $$program $(DESTDIR)$(bindir); \
+ done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
new file mode 100644
index 000000000..ac2de6b7e
--- /dev/null
+++ b/tools/iio/iio_event_monitor.c
@@ -0,0 +1,358 @@
+/* Industrialio event test code.
+ *
+ * Copyright (c) 2011-2012 Lars-Peter Clausen <lars@metafoo.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is primarily intended as an example application.
+ * Reads the current buffer setup from sysfs and starts a short capture
+ * from the specified device, pretty printing the result after appropriate
+ * conversion.
+ *
+ * Usage:
+ * iio_event_monitor <device_name>
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <poll.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include "iio_utils.h"
+#include <linux/iio/events.h>
+#include <linux/iio/types.h>
+
+static const char * const iio_chan_type_name_spec[] = {
+ [IIO_VOLTAGE] = "voltage",
+ [IIO_CURRENT] = "current",
+ [IIO_POWER] = "power",
+ [IIO_ACCEL] = "accel",
+ [IIO_ANGL_VEL] = "anglvel",
+ [IIO_MAGN] = "magn",
+ [IIO_LIGHT] = "illuminance",
+ [IIO_INTENSITY] = "intensity",
+ [IIO_PROXIMITY] = "proximity",
+ [IIO_TEMP] = "temp",
+ [IIO_INCLI] = "incli",
+ [IIO_ROT] = "rot",
+ [IIO_ANGL] = "angl",
+ [IIO_TIMESTAMP] = "timestamp",
+ [IIO_CAPACITANCE] = "capacitance",
+ [IIO_ALTVOLTAGE] = "altvoltage",
+ [IIO_CCT] = "cct",
+ [IIO_PRESSURE] = "pressure",
+ [IIO_HUMIDITYRELATIVE] = "humidityrelative",
+ [IIO_ACTIVITY] = "activity",
+ [IIO_STEPS] = "steps",
+ [IIO_ENERGY] = "energy",
+ [IIO_DISTANCE] = "distance",
+ [IIO_VELOCITY] = "velocity",
+ [IIO_CONCENTRATION] = "concentration",
+ [IIO_RESISTANCE] = "resistance",
+ [IIO_PH] = "ph",
+ [IIO_UVINDEX] = "uvindex",
+ [IIO_GRAVITY] = "gravity",
+ [IIO_POSITIONRELATIVE] = "positionrelative",
+ [IIO_PHASE] = "phase",
+};
+
+static const char * const iio_ev_type_text[] = {
+ [IIO_EV_TYPE_THRESH] = "thresh",
+ [IIO_EV_TYPE_MAG] = "mag",
+ [IIO_EV_TYPE_ROC] = "roc",
+ [IIO_EV_TYPE_THRESH_ADAPTIVE] = "thresh_adaptive",
+ [IIO_EV_TYPE_MAG_ADAPTIVE] = "mag_adaptive",
+ [IIO_EV_TYPE_CHANGE] = "change",
+};
+
+static const char * const iio_ev_dir_text[] = {
+ [IIO_EV_DIR_EITHER] = "either",
+ [IIO_EV_DIR_RISING] = "rising",
+ [IIO_EV_DIR_FALLING] = "falling"
+};
+
+static const char * const iio_modifier_names[] = {
+ [IIO_MOD_X] = "x",
+ [IIO_MOD_Y] = "y",
+ [IIO_MOD_Z] = "z",
+ [IIO_MOD_X_AND_Y] = "x&y",
+ [IIO_MOD_X_AND_Z] = "x&z",
+ [IIO_MOD_Y_AND_Z] = "y&z",
+ [IIO_MOD_X_AND_Y_AND_Z] = "x&y&z",
+ [IIO_MOD_X_OR_Y] = "x|y",
+ [IIO_MOD_X_OR_Z] = "x|z",
+ [IIO_MOD_Y_OR_Z] = "y|z",
+ [IIO_MOD_X_OR_Y_OR_Z] = "x|y|z",
+ [IIO_MOD_LIGHT_BOTH] = "both",
+ [IIO_MOD_LIGHT_IR] = "ir",
+ [IIO_MOD_ROOT_SUM_SQUARED_X_Y] = "sqrt(x^2+y^2)",
+ [IIO_MOD_SUM_SQUARED_X_Y_Z] = "x^2+y^2+z^2",
+ [IIO_MOD_LIGHT_CLEAR] = "clear",
+ [IIO_MOD_LIGHT_RED] = "red",
+ [IIO_MOD_LIGHT_GREEN] = "green",
+ [IIO_MOD_LIGHT_BLUE] = "blue",
+ [IIO_MOD_LIGHT_UV] = "uv",
+ [IIO_MOD_LIGHT_DUV] = "duv",
+ [IIO_MOD_QUATERNION] = "quaternion",
+ [IIO_MOD_TEMP_AMBIENT] = "ambient",
+ [IIO_MOD_TEMP_OBJECT] = "object",
+ [IIO_MOD_NORTH_MAGN] = "from_north_magnetic",
+ [IIO_MOD_NORTH_TRUE] = "from_north_true",
+ [IIO_MOD_NORTH_MAGN_TILT_COMP] = "from_north_magnetic_tilt_comp",
+ [IIO_MOD_NORTH_TRUE_TILT_COMP] = "from_north_true_tilt_comp",
+ [IIO_MOD_RUNNING] = "running",
+ [IIO_MOD_JOGGING] = "jogging",
+ [IIO_MOD_WALKING] = "walking",
+ [IIO_MOD_STILL] = "still",
+ [IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z] = "sqrt(x^2+y^2+z^2)",
+ [IIO_MOD_I] = "i",
+ [IIO_MOD_Q] = "q",
+ [IIO_MOD_CO2] = "co2",
+ [IIO_MOD_VOC] = "voc",
+};
+
+static bool event_is_known(struct iio_event_data *event)
+{
+ enum iio_chan_type type = IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(event->id);
+ enum iio_modifier mod = IIO_EVENT_CODE_EXTRACT_MODIFIER(event->id);
+ enum iio_event_type ev_type = IIO_EVENT_CODE_EXTRACT_TYPE(event->id);
+ enum iio_event_direction dir = IIO_EVENT_CODE_EXTRACT_DIR(event->id);
+
+ switch (type) {
+ case IIO_VOLTAGE:
+ case IIO_CURRENT:
+ case IIO_POWER:
+ case IIO_ACCEL:
+ case IIO_ANGL_VEL:
+ case IIO_MAGN:
+ case IIO_LIGHT:
+ case IIO_INTENSITY:
+ case IIO_PROXIMITY:
+ case IIO_TEMP:
+ case IIO_INCLI:
+ case IIO_ROT:
+ case IIO_ANGL:
+ case IIO_TIMESTAMP:
+ case IIO_CAPACITANCE:
+ case IIO_ALTVOLTAGE:
+ case IIO_CCT:
+ case IIO_PRESSURE:
+ case IIO_HUMIDITYRELATIVE:
+ case IIO_ACTIVITY:
+ case IIO_STEPS:
+ case IIO_ENERGY:
+ case IIO_DISTANCE:
+ case IIO_VELOCITY:
+ case IIO_CONCENTRATION:
+ case IIO_RESISTANCE:
+ case IIO_PH:
+ case IIO_UVINDEX:
+ case IIO_GRAVITY:
+ case IIO_POSITIONRELATIVE:
+ case IIO_PHASE:
+ break;
+ default:
+ return false;
+ }
+
+ switch (mod) {
+ case IIO_NO_MOD:
+ case IIO_MOD_X:
+ case IIO_MOD_Y:
+ case IIO_MOD_Z:
+ case IIO_MOD_X_AND_Y:
+ case IIO_MOD_X_AND_Z:
+ case IIO_MOD_Y_AND_Z:
+ case IIO_MOD_X_AND_Y_AND_Z:
+ case IIO_MOD_X_OR_Y:
+ case IIO_MOD_X_OR_Z:
+ case IIO_MOD_Y_OR_Z:
+ case IIO_MOD_X_OR_Y_OR_Z:
+ case IIO_MOD_LIGHT_BOTH:
+ case IIO_MOD_LIGHT_IR:
+ case IIO_MOD_ROOT_SUM_SQUARED_X_Y:
+ case IIO_MOD_SUM_SQUARED_X_Y_Z:
+ case IIO_MOD_LIGHT_CLEAR:
+ case IIO_MOD_LIGHT_RED:
+ case IIO_MOD_LIGHT_GREEN:
+ case IIO_MOD_LIGHT_BLUE:
+ case IIO_MOD_LIGHT_UV:
+ case IIO_MOD_LIGHT_DUV:
+ case IIO_MOD_QUATERNION:
+ case IIO_MOD_TEMP_AMBIENT:
+ case IIO_MOD_TEMP_OBJECT:
+ case IIO_MOD_NORTH_MAGN:
+ case IIO_MOD_NORTH_TRUE:
+ case IIO_MOD_NORTH_MAGN_TILT_COMP:
+ case IIO_MOD_NORTH_TRUE_TILT_COMP:
+ case IIO_MOD_RUNNING:
+ case IIO_MOD_JOGGING:
+ case IIO_MOD_WALKING:
+ case IIO_MOD_STILL:
+ case IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z:
+ case IIO_MOD_I:
+ case IIO_MOD_Q:
+ case IIO_MOD_CO2:
+ case IIO_MOD_VOC:
+ break;
+ default:
+ return false;
+ }
+
+ switch (ev_type) {
+ case IIO_EV_TYPE_THRESH:
+ case IIO_EV_TYPE_MAG:
+ case IIO_EV_TYPE_ROC:
+ case IIO_EV_TYPE_THRESH_ADAPTIVE:
+ case IIO_EV_TYPE_MAG_ADAPTIVE:
+ case IIO_EV_TYPE_CHANGE:
+ break;
+ default:
+ return false;
+ }
+
+ switch (dir) {
+ case IIO_EV_DIR_EITHER:
+ case IIO_EV_DIR_RISING:
+ case IIO_EV_DIR_FALLING:
+ case IIO_EV_DIR_NONE:
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+static void print_event(struct iio_event_data *event)
+{
+ enum iio_chan_type type = IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(event->id);
+ enum iio_modifier mod = IIO_EVENT_CODE_EXTRACT_MODIFIER(event->id);
+ enum iio_event_type ev_type = IIO_EVENT_CODE_EXTRACT_TYPE(event->id);
+ enum iio_event_direction dir = IIO_EVENT_CODE_EXTRACT_DIR(event->id);
+ int chan = IIO_EVENT_CODE_EXTRACT_CHAN(event->id);
+ int chan2 = IIO_EVENT_CODE_EXTRACT_CHAN2(event->id);
+ bool diff = IIO_EVENT_CODE_EXTRACT_DIFF(event->id);
+
+ if (!event_is_known(event)) {
+ fprintf(stderr, "Unknown event: time: %lld, id: %llx\n",
+ event->timestamp, event->id);
+
+ return;
+ }
+
+ printf("Event: time: %lld, type: %s", event->timestamp,
+ iio_chan_type_name_spec[type]);
+
+ if (mod != IIO_NO_MOD)
+ printf("(%s)", iio_modifier_names[mod]);
+
+ if (chan >= 0) {
+ printf(", channel: %d", chan);
+ if (diff && chan2 >= 0)
+ printf("-%d", chan2);
+ }
+
+ printf(", evtype: %s", iio_ev_type_text[ev_type]);
+
+ if (dir != IIO_EV_DIR_NONE)
+ printf(", direction: %s", iio_ev_dir_text[dir]);
+
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct iio_event_data event;
+ const char *device_name;
+ char *chrdev_name;
+ int ret;
+ int dev_num;
+ int fd, event_fd;
+
+ if (argc <= 1) {
+ fprintf(stderr, "Usage: %s <device_name>\n", argv[0]);
+ return -1;
+ }
+
+ device_name = argv[1];
+
+ dev_num = find_type_by_name(device_name, "iio:device");
+ if (dev_num >= 0) {
+ printf("Found IIO device with name %s with device number %d\n",
+ device_name, dev_num);
+ ret = asprintf(&chrdev_name, "/dev/iio:device%d", dev_num);
+ if (ret < 0)
+ return -ENOMEM;
+ } else {
+ /*
+ * If we can't find an IIO device by name assume device_name is
+ * an IIO chrdev
+ */
+ chrdev_name = strdup(device_name);
+ if (!chrdev_name)
+ return -ENOMEM;
+ }
+
+ fd = open(chrdev_name, 0);
+ if (fd == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to open %s\n", chrdev_name);
+ goto error_free_chrdev_name;
+ }
+
+ ret = ioctl(fd, IIO_GET_EVENT_FD_IOCTL, &event_fd);
+ if (ret == -1 || event_fd == -1) {
+ ret = -errno;
+ if (ret == -ENODEV)
+ fprintf(stderr,
+ "This device does not support events\n");
+ else
+ fprintf(stderr, "Failed to retrieve event fd\n");
+ if (close(fd) == -1)
+ perror("Failed to close character device file");
+
+ goto error_free_chrdev_name;
+ }
+
+ if (close(fd) == -1) {
+ ret = -errno;
+ goto error_free_chrdev_name;
+ }
+
+ while (true) {
+ ret = read(event_fd, &event, sizeof(event));
+ if (ret == -1) {
+ if (errno == EAGAIN) {
+ fprintf(stderr, "nothing available\n");
+ continue;
+ } else {
+ ret = -errno;
+ perror("Failed to read event from device");
+ break;
+ }
+ }
+
+ if (ret != sizeof(event)) {
+ fprintf(stderr, "Reading event failed!\n");
+ ret = -EIO;
+ break;
+ }
+
+ print_event(&event);
+ }
+
+ if (close(event_fd) == -1)
+ perror("Failed to close event file");
+
+error_free_chrdev_name:
+ free(chrdev_name);
+
+ return ret;
+}
diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c
new file mode 100644
index 000000000..84545666a
--- /dev/null
+++ b/tools/iio/iio_generic_buffer.c
@@ -0,0 +1,693 @@
+/* Industrialio buffer test code.
+ *
+ * Copyright (c) 2008 Jonathan Cameron
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is primarily intended as an example application.
+ * Reads the current buffer setup from sysfs and starts a short capture
+ * from the specified device, pretty printing the result after appropriate
+ * conversion.
+ *
+ * Command line parameters
+ * generic_buffer -n <device_name> -t <trigger_name>
+ * If trigger name is not specified the program assumes you want a dataready
+ * trigger associated with the device and goes looking for it.
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/dir.h>
+#include <linux/types.h>
+#include <string.h>
+#include <poll.h>
+#include <endian.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <signal.h>
+#include "iio_utils.h"
+
+/**
+ * enum autochan - state for the automatic channel enabling mechanism
+ */
+enum autochan {
+ AUTOCHANNELS_DISABLED,
+ AUTOCHANNELS_ENABLED,
+ AUTOCHANNELS_ACTIVE,
+};
+
+/**
+ * size_from_channelarray() - calculate the storage size of a scan
+ * @channels: the channel info array
+ * @num_channels: number of channels
+ *
+ * Has the side effect of filling the channels[i].location values used
+ * in processing the buffer output.
+ **/
+int size_from_channelarray(struct iio_channel_info *channels, int num_channels)
+{
+ int bytes = 0;
+ int i = 0;
+
+ while (i < num_channels) {
+ if (bytes % channels[i].bytes == 0)
+ channels[i].location = bytes;
+ else
+ channels[i].location = bytes - bytes % channels[i].bytes
+ + channels[i].bytes;
+
+ bytes = channels[i].location + channels[i].bytes;
+ i++;
+ }
+
+ return bytes;
+}
+
+void print1byte(uint8_t input, struct iio_channel_info *info)
+{
+ /*
+ * Shift before conversion to avoid sign extension
+ * of left aligned data
+ */
+ input >>= info->shift;
+ input &= info->mask;
+ if (info->is_signed) {
+ int8_t val = (int8_t)(input << (8 - info->bits_used)) >>
+ (8 - info->bits_used);
+ printf("%05f ", ((float)val + info->offset) * info->scale);
+ } else {
+ printf("%05f ", ((float)input + info->offset) * info->scale);
+ }
+}
+
+void print2byte(uint16_t input, struct iio_channel_info *info)
+{
+ /* First swap if incorrect endian */
+ if (info->be)
+ input = be16toh(input);
+ else
+ input = le16toh(input);
+
+ /*
+ * Shift before conversion to avoid sign extension
+ * of left aligned data
+ */
+ input >>= info->shift;
+ input &= info->mask;
+ if (info->is_signed) {
+ int16_t val = (int16_t)(input << (16 - info->bits_used)) >>
+ (16 - info->bits_used);
+ printf("%05f ", ((float)val + info->offset) * info->scale);
+ } else {
+ printf("%05f ", ((float)input + info->offset) * info->scale);
+ }
+}
+
+void print4byte(uint32_t input, struct iio_channel_info *info)
+{
+ /* First swap if incorrect endian */
+ if (info->be)
+ input = be32toh(input);
+ else
+ input = le32toh(input);
+
+ /*
+ * Shift before conversion to avoid sign extension
+ * of left aligned data
+ */
+ input >>= info->shift;
+ input &= info->mask;
+ if (info->is_signed) {
+ int32_t val = (int32_t)(input << (32 - info->bits_used)) >>
+ (32 - info->bits_used);
+ printf("%05f ", ((float)val + info->offset) * info->scale);
+ } else {
+ printf("%05f ", ((float)input + info->offset) * info->scale);
+ }
+}
+
+void print8byte(uint64_t input, struct iio_channel_info *info)
+{
+ /* First swap if incorrect endian */
+ if (info->be)
+ input = be64toh(input);
+ else
+ input = le64toh(input);
+
+ /*
+ * Shift before conversion to avoid sign extension
+ * of left aligned data
+ */
+ input >>= info->shift;
+ input &= info->mask;
+ if (info->is_signed) {
+ int64_t val = (int64_t)(input << (64 - info->bits_used)) >>
+ (64 - info->bits_used);
+ /* special case for timestamp */
+ if (info->scale == 1.0f && info->offset == 0.0f)
+ printf("%" PRId64 " ", val);
+ else
+ printf("%05f ",
+ ((float)val + info->offset) * info->scale);
+ } else {
+ printf("%05f ", ((float)input + info->offset) * info->scale);
+ }
+}
+
+/**
+ * process_scan() - print out the values in SI units
+ * @data: pointer to the start of the scan
+ * @channels: information about the channels.
+ * Note: size_from_channelarray must have been called first
+ * to fill the location offsets.
+ * @num_channels: number of channels
+ **/
+void process_scan(char *data,
+ struct iio_channel_info *channels,
+ int num_channels)
+{
+ int k;
+
+ for (k = 0; k < num_channels; k++)
+ switch (channels[k].bytes) {
+ /* only a few cases implemented so far */
+ case 1:
+ print1byte(*(uint8_t *)(data + channels[k].location),
+ &channels[k]);
+ break;
+ case 2:
+ print2byte(*(uint16_t *)(data + channels[k].location),
+ &channels[k]);
+ break;
+ case 4:
+ print4byte(*(uint32_t *)(data + channels[k].location),
+ &channels[k]);
+ break;
+ case 8:
+ print8byte(*(uint64_t *)(data + channels[k].location),
+ &channels[k]);
+ break;
+ default:
+ break;
+ }
+ printf("\n");
+}
+
+static int enable_disable_all_channels(char *dev_dir_name, int enable)
+{
+ const struct dirent *ent;
+ char scanelemdir[256];
+ DIR *dp;
+ int ret;
+
+ snprintf(scanelemdir, sizeof(scanelemdir),
+ FORMAT_SCAN_ELEMENTS_DIR, dev_dir_name);
+ scanelemdir[sizeof(scanelemdir)-1] = '\0';
+
+ dp = opendir(scanelemdir);
+ if (!dp) {
+ fprintf(stderr, "Enabling/disabling channels: can't open %s\n",
+ scanelemdir);
+ return -EIO;
+ }
+
+ ret = -ENOENT;
+ while (ent = readdir(dp), ent) {
+ if (iioutils_check_suffix(ent->d_name, "_en")) {
+ printf("%sabling: %s\n",
+ enable ? "En" : "Dis",
+ ent->d_name);
+ ret = write_sysfs_int(ent->d_name, scanelemdir,
+ enable);
+ if (ret < 0)
+ fprintf(stderr, "Failed to enable/disable %s\n",
+ ent->d_name);
+ }
+ }
+
+ if (closedir(dp) == -1) {
+ perror("Enabling/disabling channels: "
+ "Failed to close directory");
+ return -errno;
+ }
+ return 0;
+}
+
+void print_usage(void)
+{
+ fprintf(stderr, "Usage: generic_buffer [options]...\n"
+ "Capture, convert and output data from IIO device buffer\n"
+ " -a Auto-activate all available channels\n"
+ " -A Force-activate ALL channels\n"
+ " -c <n> Do n conversions, or loop forever if n < 0\n"
+ " -e Disable wait for event (new data)\n"
+ " -g Use trigger-less mode\n"
+ " -l <n> Set buffer length to n samples\n"
+ " --device-name -n <name>\n"
+ " --device-num -N <num>\n"
+ " Set device by name or number (mandatory)\n"
+ " --trigger-name -t <name>\n"
+ " --trigger-num -T <num>\n"
+ " Set trigger by name or number\n"
+ " -w <n> Set delay between reads in us (event-less mode)\n");
+}
+
+enum autochan autochannels = AUTOCHANNELS_DISABLED;
+char *dev_dir_name = NULL;
+char *buf_dir_name = NULL;
+bool current_trigger_set = false;
+
+void cleanup(void)
+{
+ int ret;
+
+ /* Disable trigger */
+ if (dev_dir_name && current_trigger_set) {
+ /* Disconnect the trigger - just write a dummy name. */
+ ret = write_sysfs_string("trigger/current_trigger",
+ dev_dir_name, "NULL");
+ if (ret < 0)
+ fprintf(stderr, "Failed to disable trigger: %s\n",
+ strerror(-ret));
+ current_trigger_set = false;
+ }
+
+ /* Disable buffer */
+ if (buf_dir_name) {
+ ret = write_sysfs_int("enable", buf_dir_name, 0);
+ if (ret < 0)
+ fprintf(stderr, "Failed to disable buffer: %s\n",
+ strerror(-ret));
+ }
+
+ /* Disable channels if auto-enabled */
+ if (dev_dir_name && autochannels == AUTOCHANNELS_ACTIVE) {
+ ret = enable_disable_all_channels(dev_dir_name, 0);
+ if (ret)
+ fprintf(stderr, "Failed to disable all channels\n");
+ autochannels = AUTOCHANNELS_DISABLED;
+ }
+}
+
+void sig_handler(int signum)
+{
+ fprintf(stderr, "Caught signal %d\n", signum);
+ cleanup();
+ exit(-signum);
+}
+
+void register_cleanup(void)
+{
+ struct sigaction sa = { .sa_handler = sig_handler };
+ const int signums[] = { SIGINT, SIGTERM, SIGABRT };
+ int ret, i;
+
+ for (i = 0; i < ARRAY_SIZE(signums); ++i) {
+ ret = sigaction(signums[i], &sa, NULL);
+ if (ret) {
+ perror("Failed to register signal handler");
+ exit(-1);
+ }
+ }
+}
+
+static const struct option longopts[] = {
+ { "device-name", 1, 0, 'n' },
+ { "device-num", 1, 0, 'N' },
+ { "trigger-name", 1, 0, 't' },
+ { "trigger-num", 1, 0, 'T' },
+ { },
+};
+
+int main(int argc, char **argv)
+{
+ long long num_loops = 2;
+ unsigned long timedelay = 1000000;
+ unsigned long buf_len = 128;
+
+ ssize_t i;
+ unsigned long long j;
+ unsigned long toread;
+ int ret, c;
+ int fp = -1;
+
+ int num_channels = 0;
+ char *trigger_name = NULL, *device_name = NULL;
+
+ char *data = NULL;
+ ssize_t read_size;
+ int dev_num = -1, trig_num = -1;
+ char *buffer_access = NULL;
+ int scan_size;
+ int noevents = 0;
+ int notrigger = 0;
+ char *dummy;
+ bool force_autochannels = false;
+
+ struct iio_channel_info *channels = NULL;
+
+ register_cleanup();
+
+ while ((c = getopt_long(argc, argv, "aAc:egl:n:N:t:T:w:?", longopts,
+ NULL)) != -1) {
+ switch (c) {
+ case 'a':
+ autochannels = AUTOCHANNELS_ENABLED;
+ break;
+ case 'A':
+ autochannels = AUTOCHANNELS_ENABLED;
+ force_autochannels = true;
+ break;
+ case 'c':
+ errno = 0;
+ num_loops = strtoll(optarg, &dummy, 10);
+ if (errno) {
+ ret = -errno;
+ goto error;
+ }
+
+ break;
+ case 'e':
+ noevents = 1;
+ break;
+ case 'g':
+ notrigger = 1;
+ break;
+ case 'l':
+ errno = 0;
+ buf_len = strtoul(optarg, &dummy, 10);
+ if (errno) {
+ ret = -errno;
+ goto error;
+ }
+
+ break;
+ case 'n':
+ device_name = strdup(optarg);
+ break;
+ case 'N':
+ errno = 0;
+ dev_num = strtoul(optarg, &dummy, 10);
+ if (errno) {
+ ret = -errno;
+ goto error;
+ }
+ break;
+ case 't':
+ trigger_name = strdup(optarg);
+ break;
+ case 'T':
+ errno = 0;
+ trig_num = strtoul(optarg, &dummy, 10);
+ if (errno)
+ return -errno;
+ break;
+ case 'w':
+ errno = 0;
+ timedelay = strtoul(optarg, &dummy, 10);
+ if (errno) {
+ ret = -errno;
+ goto error;
+ }
+ break;
+ case '?':
+ print_usage();
+ ret = -1;
+ goto error;
+ }
+ }
+
+ /* Find the device requested */
+ if (dev_num < 0 && !device_name) {
+ fprintf(stderr, "Device not set\n");
+ print_usage();
+ ret = -1;
+ goto error;
+ } else if (dev_num >= 0 && device_name) {
+ fprintf(stderr, "Only one of --device-num or --device-name needs to be set\n");
+ print_usage();
+ ret = -1;
+ goto error;
+ } else if (dev_num < 0) {
+ dev_num = find_type_by_name(device_name, "iio:device");
+ if (dev_num < 0) {
+ fprintf(stderr, "Failed to find the %s\n", device_name);
+ ret = dev_num;
+ goto error;
+ }
+ }
+ printf("iio device number being used is %d\n", dev_num);
+
+ ret = asprintf(&dev_dir_name, "%siio:device%d", iio_dir, dev_num);
+ if (ret < 0)
+ return -ENOMEM;
+ /* Fetch device_name if specified by number */
+ if (!device_name) {
+ device_name = malloc(IIO_MAX_NAME_LENGTH);
+ if (!device_name) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ ret = read_sysfs_string("name", dev_dir_name, device_name);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to read name of device %d\n", dev_num);
+ goto error;
+ }
+ }
+
+ if (notrigger) {
+ printf("trigger-less mode selected\n");
+ } else if (trig_num >= 0) {
+ char *trig_dev_name;
+ ret = asprintf(&trig_dev_name, "%strigger%d", iio_dir, trig_num);
+ if (ret < 0) {
+ return -ENOMEM;
+ }
+ trigger_name = malloc(IIO_MAX_NAME_LENGTH);
+ ret = read_sysfs_string("name", trig_dev_name, trigger_name);
+ free(trig_dev_name);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to read trigger%d name from\n", trig_num);
+ return ret;
+ }
+ printf("iio trigger number being used is %d\n", trig_num);
+ } else {
+ if (!trigger_name) {
+ /*
+ * Build the trigger name. If it is device associated
+ * its name is <device_name>_dev[n] where n matches
+ * the device number found above.
+ */
+ ret = asprintf(&trigger_name,
+ "%s-dev%d", device_name, dev_num);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ }
+
+ /* Look for this "-devN" trigger */
+ trig_num = find_type_by_name(trigger_name, "trigger");
+ if (trig_num < 0) {
+ /* OK try the simpler "-trigger" suffix instead */
+ free(trigger_name);
+ ret = asprintf(&trigger_name,
+ "%s-trigger", device_name);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto error;
+ }
+ }
+
+ trig_num = find_type_by_name(trigger_name, "trigger");
+ if (trig_num < 0) {
+ fprintf(stderr, "Failed to find the trigger %s\n",
+ trigger_name);
+ ret = trig_num;
+ goto error;
+ }
+
+ printf("iio trigger number being used is %d\n", trig_num);
+ }
+
+ /*
+ * Parse the files in scan_elements to identify what channels are
+ * present
+ */
+ ret = build_channel_array(dev_dir_name, &channels, &num_channels);
+ if (ret) {
+ fprintf(stderr, "Problem reading scan element information\n"
+ "diag %s\n", dev_dir_name);
+ goto error;
+ }
+ if (num_channels && autochannels == AUTOCHANNELS_ENABLED &&
+ !force_autochannels) {
+ fprintf(stderr, "Auto-channels selected but some channels "
+ "are already activated in sysfs\n");
+ fprintf(stderr, "Proceeding without activating any channels\n");
+ }
+
+ if ((!num_channels && autochannels == AUTOCHANNELS_ENABLED) ||
+ (autochannels == AUTOCHANNELS_ENABLED && force_autochannels)) {
+ fprintf(stderr, "Enabling all channels\n");
+
+ ret = enable_disable_all_channels(dev_dir_name, 1);
+ if (ret) {
+ fprintf(stderr, "Failed to enable all channels\n");
+ goto error;
+ }
+
+ /* This flags that we need to disable the channels again */
+ autochannels = AUTOCHANNELS_ACTIVE;
+
+ ret = build_channel_array(dev_dir_name, &channels,
+ &num_channels);
+ if (ret) {
+ fprintf(stderr, "Problem reading scan element "
+ "information\n"
+ "diag %s\n", dev_dir_name);
+ goto error;
+ }
+ if (!num_channels) {
+ fprintf(stderr, "Still no channels after "
+ "auto-enabling, giving up\n");
+ goto error;
+ }
+ }
+
+ if (!num_channels && autochannels == AUTOCHANNELS_DISABLED) {
+ fprintf(stderr,
+ "No channels are enabled, we have nothing to scan.\n");
+ fprintf(stderr, "Enable channels manually in "
+ FORMAT_SCAN_ELEMENTS_DIR
+ "/*_en or pass -a to autoenable channels and "
+ "try again.\n", dev_dir_name);
+ ret = -ENOENT;
+ goto error;
+ }
+
+ /*
+ * Construct the directory name for the associated buffer.
+ * As we know that the lis3l02dq has only one buffer this may
+ * be built rather than found.
+ */
+ ret = asprintf(&buf_dir_name,
+ "%siio:device%d/buffer", iio_dir, dev_num);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ if (!notrigger) {
+ printf("%s %s\n", dev_dir_name, trigger_name);
+ /*
+ * Set the device trigger to be the data ready trigger found
+ * above
+ */
+ ret = write_sysfs_string_and_verify("trigger/current_trigger",
+ dev_dir_name,
+ trigger_name);
+ if (ret < 0) {
+ fprintf(stderr,
+ "Failed to write current_trigger file\n");
+ goto error;
+ }
+ }
+
+ /* Setup ring buffer parameters */
+ ret = write_sysfs_int("length", buf_dir_name, buf_len);
+ if (ret < 0)
+ goto error;
+
+ /* Enable the buffer */
+ ret = write_sysfs_int("enable", buf_dir_name, 1);
+ if (ret < 0) {
+ fprintf(stderr,
+ "Failed to enable buffer: %s\n", strerror(-ret));
+ goto error;
+ }
+
+ scan_size = size_from_channelarray(channels, num_channels);
+ data = malloc(scan_size * buf_len);
+ if (!data) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ ret = asprintf(&buffer_access, "/dev/iio:device%d", dev_num);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ /* Attempt to open non blocking the access dev */
+ fp = open(buffer_access, O_RDONLY | O_NONBLOCK);
+ if (fp == -1) { /* TODO: If it isn't there make the node */
+ ret = -errno;
+ fprintf(stderr, "Failed to open %s\n", buffer_access);
+ goto error;
+ }
+
+ for (j = 0; j < num_loops || num_loops < 0; j++) {
+ if (!noevents) {
+ struct pollfd pfd = {
+ .fd = fp,
+ .events = POLLIN,
+ };
+
+ ret = poll(&pfd, 1, -1);
+ if (ret < 0) {
+ ret = -errno;
+ goto error;
+ } else if (ret == 0) {
+ continue;
+ }
+
+ toread = buf_len;
+ } else {
+ usleep(timedelay);
+ toread = 64;
+ }
+
+ read_size = read(fp, data, toread * scan_size);
+ if (read_size < 0) {
+ if (errno == EAGAIN) {
+ fprintf(stderr, "nothing available\n");
+ continue;
+ } else {
+ break;
+ }
+ }
+ for (i = 0; i < read_size / scan_size; i++)
+ process_scan(data + scan_size * i, channels,
+ num_channels);
+ }
+
+error:
+ cleanup();
+
+ if (fp >= 0 && close(fp) == -1)
+ perror("Failed to close buffer");
+ free(buffer_access);
+ free(data);
+ free(buf_dir_name);
+ for (i = num_channels - 1; i >= 0; i--) {
+ free(channels[i].name);
+ free(channels[i].generic_name);
+ }
+ free(channels);
+ free(trigger_name);
+ free(device_name);
+ free(dev_dir_name);
+
+ return ret;
+}
diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c
new file mode 100644
index 000000000..55272fef3
--- /dev/null
+++ b/tools/iio/iio_utils.c
@@ -0,0 +1,994 @@
+/* IIO - useful set of util functionality
+ *
+ * Copyright (c) 2008 Jonathan Cameron
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <dirent.h>
+#include <errno.h>
+#include <ctype.h>
+#include "iio_utils.h"
+
+const char *iio_dir = "/sys/bus/iio/devices/";
+
+static char * const iio_direction[] = {
+ "in",
+ "out",
+};
+
+/**
+ * iioutils_break_up_name() - extract generic name from full channel name
+ * @full_name: the full channel name
+ * @generic_name: the output generic channel name
+ *
+ * Returns 0 on success, or a negative error code if string extraction failed.
+ **/
+int iioutils_break_up_name(const char *full_name, char **generic_name)
+{
+ char *current;
+ char *w, *r;
+ char *working, *prefix = "";
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(iio_direction); i++)
+ if (!strncmp(full_name, iio_direction[i],
+ strlen(iio_direction[i]))) {
+ prefix = iio_direction[i];
+ break;
+ }
+
+ current = strdup(full_name + strlen(prefix) + 1);
+ if (!current)
+ return -ENOMEM;
+
+ working = strtok(current, "_\0");
+ if (!working) {
+ free(current);
+ return -EINVAL;
+ }
+
+ w = working;
+ r = working;
+
+ while (*r != '\0') {
+ if (!isdigit(*r)) {
+ *w = *r;
+ w++;
+ }
+
+ r++;
+ }
+ *w = '\0';
+ ret = asprintf(generic_name, "%s_%s", prefix, working);
+ free(current);
+
+ return (ret == -1) ? -ENOMEM : 0;
+}
+
+/**
+ * iioutils_get_type() - find and process _type attribute data
+ * @is_signed: output whether channel is signed
+ * @bytes: output how many bytes the channel storage occupies
+ * @bits_used: output number of valid bits of data
+ * @shift: output amount of bits to shift right data before applying bit mask
+ * @mask: output a bit mask for the raw data
+ * @be: output if data in big endian
+ * @device_dir: the IIO device directory
+ * @name: the channel name
+ * @generic_name: the channel type name
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
+int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used,
+ unsigned *shift, uint64_t *mask, unsigned *be,
+ const char *device_dir, const char *name,
+ const char *generic_name)
+{
+ FILE *sysfsfp;
+ int ret;
+ DIR *dp;
+ char *scan_el_dir, *builtname, *builtname_generic, *filename = 0;
+ char signchar, endianchar;
+ unsigned padint;
+ const struct dirent *ent;
+
+ ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir);
+ if (ret < 0)
+ return -ENOMEM;
+
+ ret = asprintf(&builtname, FORMAT_TYPE_FILE, name);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto error_free_scan_el_dir;
+ }
+ ret = asprintf(&builtname_generic, FORMAT_TYPE_FILE, generic_name);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto error_free_builtname;
+ }
+
+ dp = opendir(scan_el_dir);
+ if (!dp) {
+ ret = -errno;
+ goto error_free_builtname_generic;
+ }
+
+ ret = -ENOENT;
+ while (ent = readdir(dp), ent)
+ if ((strcmp(builtname, ent->d_name) == 0) ||
+ (strcmp(builtname_generic, ent->d_name) == 0)) {
+ ret = asprintf(&filename,
+ "%s/%s", scan_el_dir, ent->d_name);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto error_closedir;
+ }
+
+ sysfsfp = fopen(filename, "r");
+ if (!sysfsfp) {
+ ret = -errno;
+ fprintf(stderr, "failed to open %s\n",
+ filename);
+ goto error_free_filename;
+ }
+
+ ret = fscanf(sysfsfp,
+ "%ce:%c%u/%u>>%u",
+ &endianchar,
+ &signchar,
+ bits_used,
+ &padint, shift);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "failed to pass scan type description\n");
+ goto error_close_sysfsfp;
+ } else if (ret != 5) {
+ ret = -EIO;
+ fprintf(stderr,
+ "scan type description didn't match\n");
+ goto error_close_sysfsfp;
+ }
+
+ *be = (endianchar == 'b');
+ *bytes = padint / 8;
+ if (*bits_used == 64)
+ *mask = ~(0ULL);
+ else
+ *mask = (1ULL << *bits_used) - 1ULL;
+
+ *is_signed = (signchar == 's');
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+ fprintf(stderr, "Failed to close %s\n",
+ filename);
+ goto error_free_filename;
+ }
+
+ sysfsfp = 0;
+ free(filename);
+ filename = 0;
+
+ /*
+ * Avoid having a more generic entry overwriting
+ * the settings.
+ */
+ if (strcmp(builtname, ent->d_name) == 0)
+ break;
+ }
+
+error_close_sysfsfp:
+ if (sysfsfp)
+ if (fclose(sysfsfp))
+ perror("iioutils_get_type(): Failed to close file");
+
+error_free_filename:
+ if (filename)
+ free(filename);
+
+error_closedir:
+ if (closedir(dp) == -1)
+ perror("iioutils_get_type(): Failed to close directory");
+
+error_free_builtname_generic:
+ free(builtname_generic);
+error_free_builtname:
+ free(builtname);
+error_free_scan_el_dir:
+ free(scan_el_dir);
+
+ return ret;
+}
+
+/**
+ * iioutils_get_param_float() - read a float value from a channel parameter
+ * @output: output the float value
+ * @param_name: the parameter name to read
+ * @device_dir: the IIO device directory in sysfs
+ * @name: the channel name
+ * @generic_name: the channel type name
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
+int iioutils_get_param_float(float *output, const char *param_name,
+ const char *device_dir, const char *name,
+ const char *generic_name)
+{
+ FILE *sysfsfp;
+ int ret;
+ DIR *dp;
+ char *builtname, *builtname_generic;
+ char *filename = NULL;
+ const struct dirent *ent;
+
+ ret = asprintf(&builtname, "%s_%s", name, param_name);
+ if (ret < 0)
+ return -ENOMEM;
+
+ ret = asprintf(&builtname_generic,
+ "%s_%s", generic_name, param_name);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto error_free_builtname;
+ }
+
+ dp = opendir(device_dir);
+ if (!dp) {
+ ret = -errno;
+ goto error_free_builtname_generic;
+ }
+
+ ret = -ENOENT;
+ while (ent = readdir(dp), ent)
+ if ((strcmp(builtname, ent->d_name) == 0) ||
+ (strcmp(builtname_generic, ent->d_name) == 0)) {
+ ret = asprintf(&filename,
+ "%s/%s", device_dir, ent->d_name);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto error_closedir;
+ }
+
+ sysfsfp = fopen(filename, "r");
+ if (!sysfsfp) {
+ ret = -errno;
+ goto error_free_filename;
+ }
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%f", output) != 1)
+ ret = errno ? -errno : -ENODATA;
+
+ break;
+ }
+error_free_filename:
+ if (filename)
+ free(filename);
+
+error_closedir:
+ if (closedir(dp) == -1)
+ perror("iioutils_get_param_float(): Failed to close directory");
+
+error_free_builtname_generic:
+ free(builtname_generic);
+error_free_builtname:
+ free(builtname);
+
+ return ret;
+}
+
+/**
+ * bsort_channel_array_by_index() - sort the array in index order
+ * @ci_array: the iio_channel_info array to be sorted
+ * @cnt: the amount of array elements
+ **/
+
+void bsort_channel_array_by_index(struct iio_channel_info *ci_array, int cnt)
+{
+ struct iio_channel_info temp;
+ int x, y;
+
+ for (x = 0; x < cnt; x++)
+ for (y = 0; y < (cnt - 1); y++)
+ if (ci_array[y].index > ci_array[y + 1].index) {
+ temp = ci_array[y + 1];
+ ci_array[y + 1] = ci_array[y];
+ ci_array[y] = temp;
+ }
+}
+
+/**
+ * build_channel_array() - function to figure out what channels are present
+ * @device_dir: the IIO device directory in sysfs
+ * @ci_array: output the resulting array of iio_channel_info
+ * @counter: output the amount of array elements
+ *
+ * Returns 0 on success, otherwise a negative error code.
+ **/
+int build_channel_array(const char *device_dir,
+ struct iio_channel_info **ci_array, int *counter)
+{
+ DIR *dp;
+ FILE *sysfsfp;
+ int count = 0, i;
+ struct iio_channel_info *current;
+ int ret;
+ const struct dirent *ent;
+ char *scan_el_dir;
+ char *filename;
+
+ *counter = 0;
+ ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir);
+ if (ret < 0)
+ return -ENOMEM;
+
+ dp = opendir(scan_el_dir);
+ if (!dp) {
+ ret = -errno;
+ goto error_free_name;
+ }
+
+ while (ent = readdir(dp), ent)
+ if (strcmp(ent->d_name + strlen(ent->d_name) - strlen("_en"),
+ "_en") == 0) {
+ ret = asprintf(&filename,
+ "%s/%s", scan_el_dir, ent->d_name);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto error_close_dir;
+ }
+
+ sysfsfp = fopen(filename, "r");
+ if (!sysfsfp) {
+ ret = -errno;
+ free(filename);
+ goto error_close_dir;
+ }
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%i", &ret) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("build_channel_array(): Failed to close file");
+
+ free(filename);
+ goto error_close_dir;
+ }
+ if (ret == 1)
+ (*counter)++;
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+ free(filename);
+ goto error_close_dir;
+ }
+
+ free(filename);
+ }
+
+ *ci_array = malloc(sizeof(**ci_array) * (*counter));
+ if (!*ci_array) {
+ ret = -ENOMEM;
+ goto error_close_dir;
+ }
+
+ seekdir(dp, 0);
+ while (ent = readdir(dp), ent) {
+ if (strcmp(ent->d_name + strlen(ent->d_name) - strlen("_en"),
+ "_en") == 0) {
+ int current_enabled = 0;
+
+ current = &(*ci_array)[count++];
+ ret = asprintf(&filename,
+ "%s/%s", scan_el_dir, ent->d_name);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ /* decrement count to avoid freeing name */
+ count--;
+ goto error_cleanup_array;
+ }
+
+ sysfsfp = fopen(filename, "r");
+ if (!sysfsfp) {
+ ret = -errno;
+ free(filename);
+ count--;
+ goto error_cleanup_array;
+ }
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%i", &current_enabled) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ free(filename);
+ count--;
+ goto error_cleanup_array;
+ }
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+ free(filename);
+ count--;
+ goto error_cleanup_array;
+ }
+
+ if (!current_enabled) {
+ free(filename);
+ count--;
+ continue;
+ }
+
+ current->scale = 1.0;
+ current->offset = 0;
+ current->name = strndup(ent->d_name,
+ strlen(ent->d_name) -
+ strlen("_en"));
+ if (!current->name) {
+ free(filename);
+ ret = -ENOMEM;
+ count--;
+ goto error_cleanup_array;
+ }
+
+ /* Get the generic and specific name elements */
+ ret = iioutils_break_up_name(current->name,
+ &current->generic_name);
+ if (ret) {
+ free(filename);
+ free(current->name);
+ count--;
+ goto error_cleanup_array;
+ }
+
+ ret = asprintf(&filename,
+ "%s/%s_index",
+ scan_el_dir,
+ current->name);
+ if (ret < 0) {
+ free(filename);
+ ret = -ENOMEM;
+ goto error_cleanup_array;
+ }
+
+ sysfsfp = fopen(filename, "r");
+ if (!sysfsfp) {
+ ret = -errno;
+ fprintf(stderr, "failed to open %s\n",
+ filename);
+ free(filename);
+ goto error_cleanup_array;
+ }
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%u", &current->index) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("build_channel_array(): Failed to close file");
+
+ free(filename);
+ goto error_cleanup_array;
+ }
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+ free(filename);
+ goto error_cleanup_array;
+ }
+
+ free(filename);
+ /* Find the scale */
+ ret = iioutils_get_param_float(&current->scale,
+ "scale",
+ device_dir,
+ current->name,
+ current->generic_name);
+ if ((ret < 0) && (ret != -ENOENT))
+ goto error_cleanup_array;
+
+ ret = iioutils_get_param_float(&current->offset,
+ "offset",
+ device_dir,
+ current->name,
+ current->generic_name);
+ if ((ret < 0) && (ret != -ENOENT))
+ goto error_cleanup_array;
+
+ ret = iioutils_get_type(&current->is_signed,
+ &current->bytes,
+ &current->bits_used,
+ &current->shift,
+ &current->mask,
+ &current->be,
+ device_dir,
+ current->name,
+ current->generic_name);
+ if (ret < 0)
+ goto error_cleanup_array;
+ }
+ }
+
+ if (closedir(dp) == -1) {
+ ret = -errno;
+ goto error_cleanup_array;
+ }
+
+ free(scan_el_dir);
+ /* reorder so that the array is in index order */
+ bsort_channel_array_by_index(*ci_array, *counter);
+
+ return 0;
+
+error_cleanup_array:
+ for (i = count - 1; i >= 0; i--) {
+ free((*ci_array)[i].name);
+ free((*ci_array)[i].generic_name);
+ }
+ free(*ci_array);
+ *ci_array = NULL;
+ *counter = 0;
+error_close_dir:
+ if (dp)
+ if (closedir(dp) == -1)
+ perror("build_channel_array(): Failed to close dir");
+
+error_free_name:
+ free(scan_el_dir);
+
+ return ret;
+}
+
+static int calc_digits(int num)
+{
+ int count = 0;
+
+ while (num != 0) {
+ num /= 10;
+ count++;
+ }
+
+ return count;
+}
+
+/**
+ * find_type_by_name() - function to match top level types by name
+ * @name: top level type instance name
+ * @type: the type of top level instance being searched
+ *
+ * Returns the device number of a matched IIO device on success, otherwise a
+ * negative error code.
+ * Typical types this is used for are device and trigger.
+ **/
+int find_type_by_name(const char *name, const char *type)
+{
+ const struct dirent *ent;
+ int number, numstrlen, ret;
+
+ FILE *namefp;
+ DIR *dp;
+ char thisname[IIO_MAX_NAME_LENGTH];
+ char *filename;
+
+ dp = opendir(iio_dir);
+ if (!dp) {
+ fprintf(stderr, "No industrialio devices available\n");
+ return -ENODEV;
+ }
+
+ while (ent = readdir(dp), ent) {
+ if (strcmp(ent->d_name, ".") != 0 &&
+ strcmp(ent->d_name, "..") != 0 &&
+ strlen(ent->d_name) > strlen(type) &&
+ strncmp(ent->d_name, type, strlen(type)) == 0) {
+ errno = 0;
+ ret = sscanf(ent->d_name + strlen(type), "%d", &number);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "failed to read element number\n");
+ goto error_close_dir;
+ } else if (ret != 1) {
+ ret = -EIO;
+ fprintf(stderr,
+ "failed to match element number\n");
+ goto error_close_dir;
+ }
+
+ numstrlen = calc_digits(number);
+ /* verify the next character is not a colon */
+ if (strncmp(ent->d_name + strlen(type) + numstrlen,
+ ":", 1) != 0) {
+ filename = malloc(strlen(iio_dir) + strlen(type)
+ + numstrlen + 6);
+ if (!filename) {
+ ret = -ENOMEM;
+ goto error_close_dir;
+ }
+
+ ret = sprintf(filename, "%s%s%d/name", iio_dir,
+ type, number);
+ if (ret < 0) {
+ free(filename);
+ goto error_close_dir;
+ }
+
+ namefp = fopen(filename, "r");
+ if (!namefp) {
+ free(filename);
+ continue;
+ }
+
+ free(filename);
+ errno = 0;
+ if (fscanf(namefp, "%s", thisname) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ goto error_close_dir;
+ }
+
+ if (fclose(namefp)) {
+ ret = -errno;
+ goto error_close_dir;
+ }
+
+ if (strcmp(name, thisname) == 0) {
+ if (closedir(dp) == -1)
+ return -errno;
+
+ return number;
+ }
+ }
+ }
+ }
+ if (closedir(dp) == -1)
+ return -errno;
+
+ return -ENODEV;
+
+error_close_dir:
+ if (closedir(dp) == -1)
+ perror("find_type_by_name(): Failed to close directory");
+
+ return ret;
+}
+
+static int _write_sysfs_int(const char *filename, const char *basedir, int val,
+ int verify)
+{
+ int ret = 0;
+ FILE *sysfsfp;
+ int test;
+ char *temp = malloc(strlen(basedir) + strlen(filename) + 2);
+
+ if (!temp)
+ return -ENOMEM;
+
+ ret = sprintf(temp, "%s/%s", basedir, filename);
+ if (ret < 0)
+ goto error_free;
+
+ sysfsfp = fopen(temp, "w");
+ if (!sysfsfp) {
+ ret = -errno;
+ fprintf(stderr, "failed to open %s\n", temp);
+ goto error_free;
+ }
+
+ ret = fprintf(sysfsfp, "%d", val);
+ if (ret < 0) {
+ if (fclose(sysfsfp))
+ perror("_write_sysfs_int(): Failed to close dir");
+
+ goto error_free;
+ }
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+ goto error_free;
+ }
+
+ if (verify) {
+ sysfsfp = fopen(temp, "r");
+ if (!sysfsfp) {
+ ret = -errno;
+ fprintf(stderr, "failed to open %s\n", temp);
+ goto error_free;
+ }
+
+ if (fscanf(sysfsfp, "%d", &test) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("_write_sysfs_int(): Failed to close dir");
+
+ goto error_free;
+ }
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+ goto error_free;
+ }
+
+ if (test != val) {
+ fprintf(stderr,
+ "Possible failure in int write %d to %s/%s\n",
+ val, basedir, filename);
+ ret = -1;
+ }
+ }
+
+error_free:
+ free(temp);
+ return ret;
+}
+
+/**
+ * write_sysfs_int() - write an integer value to a sysfs file
+ * @filename: name of the file to write to
+ * @basedir: the sysfs directory in which the file is to be found
+ * @val: integer value to write to file
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
+int write_sysfs_int(const char *filename, const char *basedir, int val)
+{
+ return _write_sysfs_int(filename, basedir, val, 0);
+}
+
+/**
+ * write_sysfs_int_and_verify() - write an integer value to a sysfs file
+ * and verify
+ * @filename: name of the file to write to
+ * @basedir: the sysfs directory in which the file is to be found
+ * @val: integer value to write to file
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
+int write_sysfs_int_and_verify(const char *filename, const char *basedir,
+ int val)
+{
+ return _write_sysfs_int(filename, basedir, val, 1);
+}
+
+static int _write_sysfs_string(const char *filename, const char *basedir,
+ const char *val, int verify)
+{
+ int ret = 0;
+ FILE *sysfsfp;
+ char *temp = malloc(strlen(basedir) + strlen(filename) + 2);
+
+ if (!temp) {
+ fprintf(stderr, "Memory allocation failed\n");
+ return -ENOMEM;
+ }
+
+ ret = sprintf(temp, "%s/%s", basedir, filename);
+ if (ret < 0)
+ goto error_free;
+
+ sysfsfp = fopen(temp, "w");
+ if (!sysfsfp) {
+ ret = -errno;
+ fprintf(stderr, "Could not open %s\n", temp);
+ goto error_free;
+ }
+
+ ret = fprintf(sysfsfp, "%s", val);
+ if (ret < 0) {
+ if (fclose(sysfsfp))
+ perror("_write_sysfs_string(): Failed to close dir");
+
+ goto error_free;
+ }
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+ goto error_free;
+ }
+
+ if (verify) {
+ sysfsfp = fopen(temp, "r");
+ if (!sysfsfp) {
+ ret = -errno;
+ fprintf(stderr, "Could not open file to verify\n");
+ goto error_free;
+ }
+
+ if (fscanf(sysfsfp, "%s", temp) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("_write_sysfs_string(): Failed to close dir");
+
+ goto error_free;
+ }
+
+ if (fclose(sysfsfp)) {
+ ret = -errno;
+ goto error_free;
+ }
+
+ if (strcmp(temp, val) != 0) {
+ fprintf(stderr,
+ "Possible failure in string write of %s "
+ "Should be %s written to %s/%s\n", temp, val,
+ basedir, filename);
+ ret = -1;
+ }
+ }
+
+error_free:
+ free(temp);
+
+ return ret;
+}
+
+/**
+ * write_sysfs_string_and_verify() - string write, readback and verify
+ * @filename: name of file to write to
+ * @basedir: the sysfs directory in which the file is to be found
+ * @val: the string to write
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
+int write_sysfs_string_and_verify(const char *filename, const char *basedir,
+ const char *val)
+{
+ return _write_sysfs_string(filename, basedir, val, 1);
+}
+
+/**
+ * write_sysfs_string() - write string to a sysfs file
+ * @filename: name of file to write to
+ * @basedir: the sysfs directory in which the file is to be found
+ * @val: the string to write
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
+int write_sysfs_string(const char *filename, const char *basedir,
+ const char *val)
+{
+ return _write_sysfs_string(filename, basedir, val, 0);
+}
+
+/**
+ * read_sysfs_posint() - read an integer value from file
+ * @filename: name of file to read from
+ * @basedir: the sysfs directory in which the file is to be found
+ *
+ * Returns the read integer value >= 0 on success, otherwise a negative error
+ * code.
+ **/
+int read_sysfs_posint(const char *filename, const char *basedir)
+{
+ int ret;
+ FILE *sysfsfp;
+ char *temp = malloc(strlen(basedir) + strlen(filename) + 2);
+
+ if (!temp) {
+ fprintf(stderr, "Memory allocation failed");
+ return -ENOMEM;
+ }
+
+ ret = sprintf(temp, "%s/%s", basedir, filename);
+ if (ret < 0)
+ goto error_free;
+
+ sysfsfp = fopen(temp, "r");
+ if (!sysfsfp) {
+ ret = -errno;
+ goto error_free;
+ }
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%d\n", &ret) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("read_sysfs_posint(): Failed to close dir");
+
+ goto error_free;
+ }
+
+ if (fclose(sysfsfp))
+ ret = -errno;
+
+error_free:
+ free(temp);
+
+ return ret;
+}
+
+/**
+ * read_sysfs_float() - read a float value from file
+ * @filename: name of file to read from
+ * @basedir: the sysfs directory in which the file is to be found
+ * @val: output the read float value
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
+int read_sysfs_float(const char *filename, const char *basedir, float *val)
+{
+ int ret = 0;
+ FILE *sysfsfp;
+ char *temp = malloc(strlen(basedir) + strlen(filename) + 2);
+
+ if (!temp) {
+ fprintf(stderr, "Memory allocation failed");
+ return -ENOMEM;
+ }
+
+ ret = sprintf(temp, "%s/%s", basedir, filename);
+ if (ret < 0)
+ goto error_free;
+
+ sysfsfp = fopen(temp, "r");
+ if (!sysfsfp) {
+ ret = -errno;
+ goto error_free;
+ }
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%f\n", val) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("read_sysfs_float(): Failed to close dir");
+
+ goto error_free;
+ }
+
+ if (fclose(sysfsfp))
+ ret = -errno;
+
+error_free:
+ free(temp);
+
+ return ret;
+}
+
+/**
+ * read_sysfs_string() - read a string from file
+ * @filename: name of file to read from
+ * @basedir: the sysfs directory in which the file is to be found
+ * @str: output the read string
+ *
+ * Returns a value >= 0 on success, otherwise a negative error code.
+ **/
+int read_sysfs_string(const char *filename, const char *basedir, char *str)
+{
+ int ret = 0;
+ FILE *sysfsfp;
+ char *temp = malloc(strlen(basedir) + strlen(filename) + 2);
+
+ if (!temp) {
+ fprintf(stderr, "Memory allocation failed");
+ return -ENOMEM;
+ }
+
+ ret = sprintf(temp, "%s/%s", basedir, filename);
+ if (ret < 0)
+ goto error_free;
+
+ sysfsfp = fopen(temp, "r");
+ if (!sysfsfp) {
+ ret = -errno;
+ goto error_free;
+ }
+
+ errno = 0;
+ if (fscanf(sysfsfp, "%s\n", str) != 1) {
+ ret = errno ? -errno : -ENODATA;
+ if (fclose(sysfsfp))
+ perror("read_sysfs_string(): Failed to close dir");
+
+ goto error_free;
+ }
+
+ if (fclose(sysfsfp))
+ ret = -errno;
+
+error_free:
+ free(temp);
+
+ return ret;
+}
diff --git a/tools/iio/iio_utils.h b/tools/iio/iio_utils.h
new file mode 100644
index 000000000..8b379da26
--- /dev/null
+++ b/tools/iio/iio_utils.h
@@ -0,0 +1,85 @@
+#ifndef _IIO_UTILS_H_
+#define _IIO_UTILS_H_
+
+/* IIO - useful set of util functionality
+ *
+ * Copyright (c) 2008 Jonathan Cameron
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <stdint.h>
+
+/* Made up value to limit allocation sizes */
+#define IIO_MAX_NAME_LENGTH 64
+
+#define FORMAT_SCAN_ELEMENTS_DIR "%s/scan_elements"
+#define FORMAT_TYPE_FILE "%s_type"
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
+
+extern const char *iio_dir;
+
+/**
+ * struct iio_channel_info - information about a given channel
+ * @name: channel name
+ * @generic_name: general name for channel type
+ * @scale: scale factor to be applied for conversion to si units
+ * @offset: offset to be applied for conversion to si units
+ * @index: the channel index in the buffer output
+ * @bytes: number of bytes occupied in buffer output
+ * @bits_used: number of valid bits of data
+ * @shift: amount of bits to shift right data before applying bit mask
+ * @mask: a bit mask for the raw output
+ * @be: flag if data is big endian
+ * @is_signed: is the raw value stored signed
+ * @location: data offset for this channel inside the buffer (in bytes)
+ **/
+struct iio_channel_info {
+ char *name;
+ char *generic_name;
+ float scale;
+ float offset;
+ unsigned index;
+ unsigned bytes;
+ unsigned bits_used;
+ unsigned shift;
+ uint64_t mask;
+ unsigned be;
+ unsigned is_signed;
+ unsigned location;
+};
+
+static inline int iioutils_check_suffix(const char *str, const char *suffix)
+{
+ return strlen(str) >= strlen(suffix) &&
+ strncmp(str+strlen(str)-strlen(suffix),
+ suffix, strlen(suffix)) == 0;
+}
+
+int iioutils_break_up_name(const char *full_name, char **generic_name);
+int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used,
+ unsigned *shift, uint64_t *mask, unsigned *be,
+ const char *device_dir, const char *name,
+ const char *generic_name);
+int iioutils_get_param_float(float *output, const char *param_name,
+ const char *device_dir, const char *name,
+ const char *generic_name);
+void bsort_channel_array_by_index(struct iio_channel_info *ci_array, int cnt);
+int build_channel_array(const char *device_dir,
+ struct iio_channel_info **ci_array, int *counter);
+int find_type_by_name(const char *name, const char *type);
+int write_sysfs_int(const char *filename, const char *basedir, int val);
+int write_sysfs_int_and_verify(const char *filename, const char *basedir,
+ int val);
+int write_sysfs_string_and_verify(const char *filename, const char *basedir,
+ const char *val);
+int write_sysfs_string(const char *filename, const char *basedir,
+ const char *val);
+int read_sysfs_posint(const char *filename, const char *basedir);
+int read_sysfs_float(const char *filename, const char *basedir, float *val);
+int read_sysfs_string(const char *filename, const char *basedir, char *str);
+
+#endif /* _IIO_UTILS_H_ */
diff --git a/tools/iio/lsiio.c b/tools/iio/lsiio.c
new file mode 100644
index 000000000..ab0f5cf16
--- /dev/null
+++ b/tools/iio/lsiio.c
@@ -0,0 +1,191 @@
+/*
+ * Industrial I/O utilities - lsiio.c
+ *
+ * Copyright (c) 2010 Manuel Stahl <manuel.stahl@iis.fraunhofer.de>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include <string.h>
+#include <dirent.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/dir.h>
+#include "iio_utils.h"
+
+static enum verbosity {
+ VERBLEVEL_DEFAULT, /* 0 gives lspci behaviour */
+ VERBLEVEL_SENSORS, /* 1 lists sensors */
+} verblevel = VERBLEVEL_DEFAULT;
+
+const char *type_device = "iio:device";
+const char *type_trigger = "trigger";
+
+static inline int check_prefix(const char *str, const char *prefix)
+{
+ return strlen(str) > strlen(prefix) &&
+ strncmp(str, prefix, strlen(prefix)) == 0;
+}
+
+static inline int check_postfix(const char *str, const char *postfix)
+{
+ return strlen(str) > strlen(postfix) &&
+ strcmp(str + strlen(str) - strlen(postfix), postfix) == 0;
+}
+
+static int dump_channels(const char *dev_dir_name)
+{
+ DIR *dp;
+ const struct dirent *ent;
+
+ dp = opendir(dev_dir_name);
+ if (!dp)
+ return -errno;
+
+ while (ent = readdir(dp), ent)
+ if (check_prefix(ent->d_name, "in_") &&
+ (check_postfix(ent->d_name, "_raw") ||
+ check_postfix(ent->d_name, "_input")))
+ printf(" %-10s\n", ent->d_name);
+
+ return (closedir(dp) == -1) ? -errno : 0;
+}
+
+static int dump_one_device(const char *dev_dir_name)
+{
+ char name[IIO_MAX_NAME_LENGTH];
+ int dev_idx;
+ int ret;
+
+ ret = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_device), "%i",
+ &dev_idx);
+ if (ret != 1)
+ return -EINVAL;
+
+ ret = read_sysfs_string("name", dev_dir_name, name);
+ if (ret < 0)
+ return ret;
+
+ printf("Device %03d: %s\n", dev_idx, name);
+
+ if (verblevel >= VERBLEVEL_SENSORS)
+ return dump_channels(dev_dir_name);
+
+ return 0;
+}
+
+static int dump_one_trigger(const char *dev_dir_name)
+{
+ char name[IIO_MAX_NAME_LENGTH];
+ int dev_idx;
+ int ret;
+
+ ret = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_trigger),
+ "%i", &dev_idx);
+ if (ret != 1)
+ return -EINVAL;
+
+ ret = read_sysfs_string("name", dev_dir_name, name);
+ if (ret < 0)
+ return ret;
+
+ printf("Trigger %03d: %s\n", dev_idx, name);
+
+ return 0;
+}
+
+static int dump_devices(void)
+{
+ const struct dirent *ent;
+ int ret;
+ DIR *dp;
+
+ dp = opendir(iio_dir);
+ if (!dp) {
+ fprintf(stderr, "No industrial I/O devices available\n");
+ return -ENODEV;
+ }
+
+ while (ent = readdir(dp), ent) {
+ if (check_prefix(ent->d_name, type_device)) {
+ char *dev_dir_name;
+
+ if (asprintf(&dev_dir_name, "%s%s", iio_dir,
+ ent->d_name) < 0) {
+ ret = -ENOMEM;
+ goto error_close_dir;
+ }
+
+ ret = dump_one_device(dev_dir_name);
+ if (ret) {
+ free(dev_dir_name);
+ goto error_close_dir;
+ }
+
+ free(dev_dir_name);
+ if (verblevel >= VERBLEVEL_SENSORS)
+ printf("\n");
+ }
+ }
+ rewinddir(dp);
+ while (ent = readdir(dp), ent) {
+ if (check_prefix(ent->d_name, type_trigger)) {
+ char *dev_dir_name;
+
+ if (asprintf(&dev_dir_name, "%s%s", iio_dir,
+ ent->d_name) < 0) {
+ ret = -ENOMEM;
+ goto error_close_dir;
+ }
+
+ ret = dump_one_trigger(dev_dir_name);
+ if (ret) {
+ free(dev_dir_name);
+ goto error_close_dir;
+ }
+
+ free(dev_dir_name);
+ }
+ }
+
+ return (closedir(dp) == -1) ? -errno : 0;
+
+error_close_dir:
+ if (closedir(dp) == -1)
+ perror("dump_devices(): Failed to close directory");
+
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ int c, err = 0;
+
+ while ((c = getopt(argc, argv, "v")) != EOF) {
+ switch (c) {
+ case 'v':
+ verblevel++;
+ break;
+
+ case '?':
+ default:
+ err++;
+ break;
+ }
+ }
+ if (err || argc > optind) {
+ fprintf(stderr, "Usage: lsiio [options]...\n"
+ "List industrial I/O devices\n"
+ " -v Increase verbosity (may be given multiple times)\n");
+ exit(1);
+ }
+
+ return dump_devices();
+}
diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h
new file mode 100644
index 000000000..4c1966f7c
--- /dev/null
+++ b/tools/include/asm-generic/atomic-gcc.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_ASM_GENERIC_ATOMIC_H
+#define __TOOLS_ASM_GENERIC_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+/*
+ * Atomic operations that C can't guarantee us. Useful for
+ * resource counting etc..
+ *
+ * Excerpts obtained from the Linux kernel sources.
+ */
+
+#define ATOMIC_INIT(i) { (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline int atomic_read(const atomic_t *v)
+{
+ return READ_ONCE((v)->counter);
+}
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static inline void atomic_set(atomic_t *v, int i)
+{
+ v->counter = i;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1.
+ */
+static inline void atomic_inc(atomic_t *v)
+{
+ __sync_add_and_fetch(&v->counter, 1);
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic_dec_and_test(atomic_t *v)
+{
+ return __sync_sub_and_fetch(&v->counter, 1) == 0;
+}
+
+#define cmpxchg(ptr, oldval, newval) \
+ __sync_val_compare_and_swap(ptr, oldval, newval)
+
+static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval)
+{
+ return cmpxchg(&(v)->counter, oldval, newval);
+}
+
+#endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */
diff --git a/tools/include/asm-generic/barrier.h b/tools/include/asm-generic/barrier.h
new file mode 100644
index 000000000..52278d880
--- /dev/null
+++ b/tools/include/asm-generic/barrier.h
@@ -0,0 +1,44 @@
+/*
+ * Copied from the kernel sources to tools/perf/:
+ *
+ * Generic barrier definitions.
+ *
+ * It should be possible to use these on really simple architectures,
+ * but it serves more as a starting point for new ports.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef __TOOLS_LINUX_ASM_GENERIC_BARRIER_H
+#define __TOOLS_LINUX_ASM_GENERIC_BARRIER_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+
+/*
+ * Force strict CPU ordering. And yes, this is required on UP too when we're
+ * talking to devices.
+ *
+ * Fall back to compiler barriers if nothing better is provided.
+ */
+
+#ifndef mb
+#define mb() barrier()
+#endif
+
+#ifndef rmb
+#define rmb() mb()
+#endif
+
+#ifndef wmb
+#define wmb() mb()
+#endif
+
+#endif /* !__ASSEMBLY__ */
+#endif /* __TOOLS_LINUX_ASM_GENERIC_BARRIER_H */
diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h
new file mode 100644
index 000000000..9bce3b56b
--- /dev/null
+++ b/tools/include/asm-generic/bitops.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_ASM_GENERIC_BITOPS_H
+#define __TOOLS_ASM_GENERIC_BITOPS_H
+
+/*
+ * tools/ copied this from include/asm-generic/bitops.h, bit by bit as it needed
+ * some functions.
+ *
+ * For the benefit of those who are trying to port Linux to another
+ * architecture, here are some C-language equivalents. You should
+ * recode these in the native assembly language, if at all possible.
+ *
+ * C language equivalents written by Theodore Ts'o, 9/26/92
+ */
+
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/__ffz.h>
+#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/find.h>
+
+#ifndef _TOOLS_LINUX_BITOPS_H_
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <asm-generic/bitops/hweight.h>
+
+#include <asm-generic/bitops/atomic.h>
+
+#endif /* __TOOLS_ASM_GENERIC_BITOPS_H */
diff --git a/tools/include/asm-generic/bitops/__ffs.h b/tools/include/asm-generic/bitops/__ffs.h
new file mode 100644
index 000000000..9d1310519
--- /dev/null
+++ b/tools/include/asm-generic/bitops/__ffs.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_
+#define _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_
+
+#include <asm/types.h>
+#include <asm/bitsperlong.h>
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned long __ffs(unsigned long word)
+{
+ int num = 0;
+
+#if __BITS_PER_LONG == 64
+ if ((word & 0xffffffff) == 0) {
+ num += 32;
+ word >>= 32;
+ }
+#endif
+ if ((word & 0xffff) == 0) {
+ num += 16;
+ word >>= 16;
+ }
+ if ((word & 0xff) == 0) {
+ num += 8;
+ word >>= 8;
+ }
+ if ((word & 0xf) == 0) {
+ num += 4;
+ word >>= 4;
+ }
+ if ((word & 0x3) == 0) {
+ num += 2;
+ word >>= 2;
+ }
+ if ((word & 0x1) == 0)
+ num += 1;
+ return num;
+}
+
+#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_ */
diff --git a/tools/include/asm-generic/bitops/__ffz.h b/tools/include/asm-generic/bitops/__ffz.h
new file mode 100644
index 000000000..6744bd4cd
--- /dev/null
+++ b/tools/include/asm-generic/bitops/__ffz.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_GENERIC_BITOPS_FFZ_H_
+#define _ASM_GENERIC_BITOPS_FFZ_H_
+
+/*
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+#define ffz(x) __ffs(~(x))
+
+#endif /* _ASM_GENERIC_BITOPS_FFZ_H_ */
diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h
new file mode 100644
index 000000000..03f721a8a
--- /dev/null
+++ b/tools/include/asm-generic/bitops/__fls.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS___FLS_H_
+#define _ASM_GENERIC_BITOPS___FLS_H_
+
+#include <asm/types.h>
+
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned long __fls(unsigned long word)
+{
+ int num = BITS_PER_LONG - 1;
+
+#if BITS_PER_LONG == 64
+ if (!(word & (~0ul << 32))) {
+ num -= 32;
+ word <<= 32;
+ }
+#endif
+ if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
+ num -= 16;
+ word <<= 16;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
+ num -= 8;
+ word <<= 8;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
+ num -= 4;
+ word <<= 4;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
+ num -= 2;
+ word <<= 2;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-1))))
+ num -= 1;
+ return num;
+}
+
+#endif /* _ASM_GENERIC_BITOPS___FLS_H_ */
diff --git a/tools/include/asm-generic/bitops/arch_hweight.h b/tools/include/asm-generic/bitops/arch_hweight.h
new file mode 100644
index 000000000..c2705e1d2
--- /dev/null
+++ b/tools/include/asm-generic/bitops/arch_hweight.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_
+#define _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_
+
+#include <asm/types.h>
+
+static inline unsigned int __arch_hweight32(unsigned int w)
+{
+ return __sw_hweight32(w);
+}
+
+static inline unsigned int __arch_hweight16(unsigned int w)
+{
+ return __sw_hweight16(w);
+}
+
+static inline unsigned int __arch_hweight8(unsigned int w)
+{
+ return __sw_hweight8(w);
+}
+
+static inline unsigned long __arch_hweight64(__u64 w)
+{
+ return __sw_hweight64(w);
+}
+#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h
new file mode 100644
index 000000000..21c41ccd1
--- /dev/null
+++ b/tools/include/asm-generic/bitops/atomic.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_
+#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_
+
+#include <asm/types.h>
+#include <asm/bitsperlong.h>
+
+static inline void set_bit(int nr, unsigned long *addr)
+{
+ addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG);
+}
+
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+ addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG));
+}
+
+static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
+{
+ return ((1UL << (nr % __BITS_PER_LONG)) &
+ (((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0;
+}
+
+#define __set_bit(nr, addr) set_bit(nr, addr)
+#define __clear_bit(nr, addr) clear_bit(nr, addr)
+
+#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */
diff --git a/tools/include/asm-generic/bitops/const_hweight.h b/tools/include/asm-generic/bitops/const_hweight.h
new file mode 100644
index 000000000..149faeeee
--- /dev/null
+++ b/tools/include/asm-generic/bitops/const_hweight.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_
+#define _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_
+
+/*
+ * Compile time versions of __arch_hweightN()
+ */
+#define __const_hweight8(w) \
+ ((unsigned int) \
+ ((!!((w) & (1ULL << 0))) + \
+ (!!((w) & (1ULL << 1))) + \
+ (!!((w) & (1ULL << 2))) + \
+ (!!((w) & (1ULL << 3))) + \
+ (!!((w) & (1ULL << 4))) + \
+ (!!((w) & (1ULL << 5))) + \
+ (!!((w) & (1ULL << 6))) + \
+ (!!((w) & (1ULL << 7)))))
+
+#define __const_hweight16(w) (__const_hweight8(w) + __const_hweight8((w) >> 8 ))
+#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16))
+#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32))
+
+/*
+ * Generic interface.
+ */
+#define hweight8(w) (__builtin_constant_p(w) ? __const_hweight8(w) : __arch_hweight8(w))
+#define hweight16(w) (__builtin_constant_p(w) ? __const_hweight16(w) : __arch_hweight16(w))
+#define hweight32(w) (__builtin_constant_p(w) ? __const_hweight32(w) : __arch_hweight32(w))
+#define hweight64(w) (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w))
+
+/*
+ * Interface for known constant arguments
+ */
+#define HWEIGHT8(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight8(w))
+#define HWEIGHT16(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight16(w))
+#define HWEIGHT32(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight32(w))
+#define HWEIGHT64(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight64(w))
+
+/*
+ * Type invariant interface to the compile time constant hweight functions.
+ */
+#define HWEIGHT(w) HWEIGHT64((u64)w)
+
+#endif /* _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ */
diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h
new file mode 100644
index 000000000..16ed1982c
--- /dev/null
+++ b/tools/include/asm-generic/bitops/find.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
+#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
+
+#ifndef find_next_bit
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
+ size, unsigned long offset);
+#endif
+
+#ifndef find_next_and_bit
+/**
+ * find_next_and_bit - find the next set bit in both memory regions
+ * @addr1: The first address to base the search on
+ * @addr2: The second address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+extern unsigned long find_next_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long size,
+ unsigned long offset);
+#endif
+
+#ifndef find_next_zero_bit
+
+/**
+ * find_next_zero_bit - find the next cleared bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number of the next zero bit
+ * If no bits are zero, returns @size.
+ */
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset);
+#endif
+
+#ifndef find_first_bit
+
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum number of bits to search
+ *
+ * Returns the bit number of the first set bit.
+ * If no bits are set, returns @size.
+ */
+extern unsigned long find_first_bit(const unsigned long *addr,
+ unsigned long size);
+
+#endif /* find_first_bit */
+
+#ifndef find_first_zero_bit
+
+/**
+ * find_first_zero_bit - find the first cleared bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum number of bits to search
+ *
+ * Returns the bit number of the first cleared bit.
+ * If no bits are zero, returns @size.
+ */
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size);
+#endif
+
+#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h
new file mode 100644
index 000000000..753aecaab
--- /dev/null
+++ b/tools/include/asm-generic/bitops/fls.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_FLS_H_
+#define _ASM_GENERIC_BITOPS_FLS_H_
+
+/**
+ * fls - find last (most-significant) bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+
+static __always_inline int fls(int x)
+{
+ int r = 32;
+
+ if (!x)
+ return 0;
+ if (!(x & 0xffff0000u)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xff000000u)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xf0000000u)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xc0000000u)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x80000000u)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+
+#endif /* _ASM_GENERIC_BITOPS_FLS_H_ */
diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h
new file mode 100644
index 000000000..866f2b230
--- /dev/null
+++ b/tools/include/asm-generic/bitops/fls64.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_FLS64_H_
+#define _ASM_GENERIC_BITOPS_FLS64_H_
+
+#include <asm/types.h>
+
+/**
+ * fls64 - find last set bit in a 64-bit word
+ * @x: the word to search
+ *
+ * This is defined in a similar way as the libc and compiler builtin
+ * ffsll, but returns the position of the most significant set bit.
+ *
+ * fls64(value) returns 0 if value is 0 or the position of the last
+ * set bit if value is nonzero. The last (most significant) bit is
+ * at position 64.
+ */
+#if BITS_PER_LONG == 32
+static __always_inline int fls64(__u64 x)
+{
+ __u32 h = x >> 32;
+ if (h)
+ return fls(h) + 32;
+ return fls(x);
+}
+#elif BITS_PER_LONG == 64
+static __always_inline int fls64(__u64 x)
+{
+ if (x == 0)
+ return 0;
+ return __fls(x) + 1;
+}
+#else
+#error BITS_PER_LONG not 32 or 64
+#endif
+
+#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */
diff --git a/tools/include/asm-generic/bitops/hweight.h b/tools/include/asm-generic/bitops/hweight.h
new file mode 100644
index 000000000..3e681982b
--- /dev/null
+++ b/tools/include/asm-generic/bitops/hweight.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_HWEIGHT_H_
+#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_HWEIGHT_H_
+
+#include <asm-generic/bitops/arch_hweight.h>
+#include <asm-generic/bitops/const_hweight.h>
+
+#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_HWEIGHT_H_ */
diff --git a/tools/include/asm-generic/bitsperlong.h b/tools/include/asm-generic/bitsperlong.h
new file mode 100644
index 000000000..8f2283052
--- /dev/null
+++ b/tools/include/asm-generic/bitsperlong.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_GENERIC_BITS_PER_LONG
+#define __ASM_GENERIC_BITS_PER_LONG
+
+#include <uapi/asm-generic/bitsperlong.h>
+
+#ifdef __SIZEOF_LONG__
+#define BITS_PER_LONG (__CHAR_BIT__ * __SIZEOF_LONG__)
+#else
+#define BITS_PER_LONG __WORDSIZE
+#endif
+
+#if BITS_PER_LONG != __BITS_PER_LONG
+#error Inconsistent word size. Check asm/bitsperlong.h
+#endif
+
+#ifndef BITS_PER_LONG_LONG
+#define BITS_PER_LONG_LONG 64
+#endif
+
+#endif /* __ASM_GENERIC_BITS_PER_LONG */
diff --git a/tools/include/asm-generic/hugetlb_encode.h b/tools/include/asm-generic/hugetlb_encode.h
new file mode 100644
index 000000000..e4732d3c2
--- /dev/null
+++ b/tools/include/asm-generic/hugetlb_encode.h
@@ -0,0 +1,34 @@
+#ifndef _ASM_GENERIC_HUGETLB_ENCODE_H_
+#define _ASM_GENERIC_HUGETLB_ENCODE_H_
+
+/*
+ * Several system calls take a flag to request "hugetlb" huge pages.
+ * Without further specification, these system calls will use the
+ * system's default huge page size. If a system supports multiple
+ * huge page sizes, the desired huge page size can be specified in
+ * bits [26:31] of the flag arguments. The value in these 6 bits
+ * will encode the log2 of the huge page size.
+ *
+ * The following definitions are associated with this huge page size
+ * encoding in flag arguments. System call specific header files
+ * that use this encoding should include this file. They can then
+ * provide definitions based on these with their own specific prefix.
+ * for example:
+ * #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
+ */
+
+#define HUGETLB_FLAG_ENCODE_SHIFT 26
+#define HUGETLB_FLAG_ENCODE_MASK 0x3f
+
+#define HUGETLB_FLAG_ENCODE_64KB (16 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_512KB (19 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1MB (20 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2MB (21 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_8MB (23 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16MB (24 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_256MB (28 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_1GB (30 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_2GB (31 << HUGETLB_FLAG_ENCODE_SHIFT)
+#define HUGETLB_FLAG_ENCODE_16GB (34 << HUGETLB_FLAG_ENCODE_SHIFT)
+
+#endif /* _ASM_GENERIC_HUGETLB_ENCODE_H_ */
diff --git a/tools/include/asm/alternative-asm.h b/tools/include/asm/alternative-asm.h
new file mode 100644
index 000000000..b54bd860d
--- /dev/null
+++ b/tools/include/asm/alternative-asm.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H
+#define _TOOLS_ASM_ALTERNATIVE_ASM_H
+
+/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
+
+#define altinstruction_entry #
+#define ALTERNATIVE_2 #
+
+#endif
diff --git a/tools/include/asm/atomic.h b/tools/include/asm/atomic.h
new file mode 100644
index 000000000..8c9bfffd4
--- /dev/null
+++ b/tools/include/asm/atomic.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_LINUX_ASM_ATOMIC_H
+#define __TOOLS_LINUX_ASM_ATOMIC_H
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/asm/atomic.h"
+#else
+#include <asm-generic/atomic-gcc.h>
+#endif
+
+#endif /* __TOOLS_LINUX_ASM_ATOMIC_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
new file mode 100644
index 000000000..391d94253
--- /dev/null
+++ b/tools/include/asm/barrier.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/asm/barrier.h"
+#elif defined(__arm__)
+#include "../../arch/arm/include/asm/barrier.h"
+#elif defined(__aarch64__)
+#include "../../arch/arm64/include/asm/barrier.h"
+#elif defined(__powerpc__)
+#include "../../arch/powerpc/include/asm/barrier.h"
+#elif defined(__s390__)
+#include "../../arch/s390/include/asm/barrier.h"
+#elif defined(__sh__)
+#include "../../arch/sh/include/asm/barrier.h"
+#elif defined(__sparc__)
+#include "../../arch/sparc/include/asm/barrier.h"
+#elif defined(__tile__)
+#include "../../arch/tile/include/asm/barrier.h"
+#elif defined(__alpha__)
+#include "../../arch/alpha/include/asm/barrier.h"
+#elif defined(__mips__)
+#include "../../arch/mips/include/asm/barrier.h"
+#elif defined(__ia64__)
+#include "../../arch/ia64/include/asm/barrier.h"
+#elif defined(__xtensa__)
+#include "../../arch/xtensa/include/asm/barrier.h"
+#else
+#include <asm-generic/barrier.h>
+#endif
diff --git a/tools/include/asm/bug.h b/tools/include/asm/bug.h
new file mode 100644
index 000000000..bbd75ac8b
--- /dev/null
+++ b/tools/include/asm/bug.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_BUG_H
+#define _TOOLS_ASM_BUG_H
+
+#include <linux/compiler.h>
+
+#define __WARN_printf(arg...) do { fprintf(stderr, arg); } while (0)
+
+#define WARN(condition, format...) ({ \
+ int __ret_warn_on = !!(condition); \
+ if (unlikely(__ret_warn_on)) \
+ __WARN_printf(format); \
+ unlikely(__ret_warn_on); \
+})
+
+#define WARN_ON(condition) ({ \
+ int __ret_warn_on = !!(condition); \
+ if (unlikely(__ret_warn_on)) \
+ __WARN_printf("assertion failed at %s:%d\n", \
+ __FILE__, __LINE__); \
+ unlikely(__ret_warn_on); \
+})
+
+#define WARN_ON_ONCE(condition) ({ \
+ static int __warned; \
+ int __ret_warn_once = !!(condition); \
+ \
+ if (unlikely(__ret_warn_once && !__warned)) { \
+ __warned = true; \
+ WARN_ON(1); \
+ } \
+ unlikely(__ret_warn_once); \
+})
+
+#define WARN_ONCE(condition, format...) ({ \
+ static int __warned; \
+ int __ret_warn_once = !!(condition); \
+ \
+ if (unlikely(__ret_warn_once)) \
+ if (WARN(!__warned, format)) \
+ __warned = 1; \
+ unlikely(__ret_warn_once); \
+})
+
+#endif /* _TOOLS_ASM_BUG_H */
diff --git a/tools/include/asm/export.h b/tools/include/asm/export.h
new file mode 100644
index 000000000..2cb1a0d83
--- /dev/null
+++ b/tools/include/asm/export.h
@@ -0,0 +1,7 @@
+#ifndef _TOOLS_ASM_EXPORT_H
+#define _TOOLS_ASM_EXPORT_H
+
+#define EXPORT_SYMBOL(x)
+#define EXPORT_SYMBOL_GPL(x)
+
+#endif /* _TOOLS_ASM_EXPORT_H */
diff --git a/tools/include/asm/sections.h b/tools/include/asm/sections.h
new file mode 100644
index 000000000..a80643d7a
--- /dev/null
+++ b/tools/include/asm/sections.h
@@ -0,0 +1,4 @@
+#ifndef __TOOLS_INCLUDE_LINUX_ASM_SECTIONS_H
+#define __TOOLS_INCLUDE_LINUX_ASM_SECTIONS_H
+
+#endif /* __TOOLS_INCLUDE_LINUX_ASM_SECTIONS_H */
diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h
new file mode 100644
index 000000000..00a6c4ca5
--- /dev/null
+++ b/tools/include/linux/atomic.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_LINUX_ATOMIC_H
+#define __TOOLS_LINUX_ATOMIC_H
+
+#include <asm/atomic.h>
+
+/* atomic_cmpxchg_relaxed */
+#ifndef atomic_cmpxchg_relaxed
+#define atomic_cmpxchg_relaxed atomic_cmpxchg
+#define atomic_cmpxchg_release atomic_cmpxchg
+#endif /* atomic_cmpxchg_relaxed */
+
+#endif /* __TOOLS_LINUX_ATOMIC_H */
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
new file mode 100644
index 000000000..e63662db1
--- /dev/null
+++ b/tools/include/linux/bitmap.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_BITOPS_H
+#define _PERF_BITOPS_H
+
+#include <string.h>
+#include <linux/bitops.h>
+#include <stdlib.h>
+#include <linux/kernel.h>
+
+#define DECLARE_BITMAP(name,bits) \
+ unsigned long name[BITS_TO_LONGS(bits)]
+
+int __bitmap_weight(const unsigned long *bitmap, int bits);
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits);
+int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int bits);
+
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
+
+#define BITMAP_LAST_WORD_MASK(nbits) \
+( \
+ ((nbits) % BITS_PER_LONG) ? \
+ (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \
+)
+
+#define small_const_nbits(nbits) \
+ (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG)
+
+static inline void bitmap_zero(unsigned long *dst, int nbits)
+{
+ if (small_const_nbits(nbits))
+ *dst = 0UL;
+ else {
+ int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
+ memset(dst, 0, len);
+ }
+}
+
+static inline void bitmap_fill(unsigned long *dst, unsigned int nbits)
+{
+ unsigned int nlongs = BITS_TO_LONGS(nbits);
+ if (!small_const_nbits(nbits)) {
+ unsigned int len = (nlongs - 1) * sizeof(unsigned long);
+ memset(dst, 0xff, len);
+ }
+ dst[nlongs - 1] = BITMAP_LAST_WORD_MASK(nbits);
+}
+
+static inline int bitmap_empty(const unsigned long *src, unsigned nbits)
+{
+ if (small_const_nbits(nbits))
+ return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
+
+ return find_first_bit(src, nbits) == nbits;
+}
+
+static inline int bitmap_full(const unsigned long *src, unsigned int nbits)
+{
+ if (small_const_nbits(nbits))
+ return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
+
+ return find_first_zero_bit(src, nbits) == nbits;
+}
+
+static inline int bitmap_weight(const unsigned long *src, int nbits)
+{
+ if (small_const_nbits(nbits))
+ return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
+ return __bitmap_weight(src, nbits);
+}
+
+static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, int nbits)
+{
+ if (small_const_nbits(nbits))
+ *dst = *src1 | *src2;
+ else
+ __bitmap_or(dst, src1, src2, nbits);
+}
+
+/**
+ * test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ */
+static inline int test_and_set_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+ unsigned long old;
+
+ old = *p;
+ *p = old | mask;
+
+ return (old & mask) != 0;
+}
+
+/**
+ * test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ */
+static inline int test_and_clear_bit(int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+ unsigned long old;
+
+ old = *p;
+ *p = old & ~mask;
+
+ return (old & mask) != 0;
+}
+
+/**
+ * bitmap_alloc - Allocate bitmap
+ * @nbits: Number of bits
+ */
+static inline unsigned long *bitmap_alloc(int nbits)
+{
+ return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long));
+}
+
+/*
+ * bitmap_scnprintf - print bitmap list into buffer
+ * @bitmap: bitmap
+ * @nbits: size of bitmap
+ * @buf: buffer to store output
+ * @size: size of @buf
+ */
+size_t bitmap_scnprintf(unsigned long *bitmap, int nbits,
+ char *buf, size_t size);
+
+/**
+ * bitmap_and - Do logical and on bitmaps
+ * @dst: resulting bitmap
+ * @src1: operand 1
+ * @src2: operand 2
+ * @nbits: size of bitmap
+ */
+static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
+ const unsigned long *src2, unsigned int nbits)
+{
+ if (small_const_nbits(nbits))
+ return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
+ return __bitmap_and(dst, src1, src2, nbits);
+}
+
+#endif /* _PERF_BITOPS_H */
diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h
new file mode 100644
index 000000000..0b0ef3abc
--- /dev/null
+++ b/tools/include/linux/bitops.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_BITOPS_H_
+#define _TOOLS_LINUX_BITOPS_H_
+
+#include <asm/types.h>
+#ifndef __WORDSIZE
+#define __WORDSIZE (__SIZEOF_LONG__ * 8)
+#endif
+
+#ifndef BITS_PER_LONG
+# define BITS_PER_LONG __WORDSIZE
+#endif
+#include <linux/bits.h>
+#include <linux/compiler.h>
+
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
+#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
+#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE)
+
+extern unsigned int __sw_hweight8(unsigned int w);
+extern unsigned int __sw_hweight16(unsigned int w);
+extern unsigned int __sw_hweight32(unsigned int w);
+extern unsigned long __sw_hweight64(__u64 w);
+
+/*
+ * Include this here because some architectures need generic_ffs/fls in
+ * scope
+ *
+ * XXX: this needs to be asm/bitops.h, when we get to per arch optimizations
+ */
+#include <asm-generic/bitops.h>
+
+#define for_each_set_bit(bit, addr, size) \
+ for ((bit) = find_first_bit((addr), (size)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+#define for_each_clear_bit(bit, addr, size) \
+ for ((bit) = find_first_zero_bit((addr), (size)); \
+ (bit) < (size); \
+ (bit) = find_next_zero_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_from(bit, addr, size) \
+ for ((bit) = find_next_bit((addr), (size), (bit)); \
+ (bit) < (size); \
+ (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+static inline unsigned long hweight_long(unsigned long w)
+{
+ return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
+}
+
+static inline unsigned fls_long(unsigned long l)
+{
+ if (sizeof(l) == 4)
+ return fls(l);
+ return fls64(l);
+}
+
+/**
+ * rol32 - rotate a 32-bit value left
+ * @word: value to rotate
+ * @shift: bits to roll
+ */
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+#endif
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
new file mode 100644
index 000000000..2b7b532c1
--- /dev/null
+++ b/tools/include/linux/bits.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_BITS_H
+#define __LINUX_BITS_H
+#include <asm/bitsperlong.h>
+
+#define BIT(nr) (1UL << (nr))
+#define BIT_ULL(nr) (1ULL << (nr))
+#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
+#define BIT_ULL_MASK(nr) (1ULL << ((nr) % BITS_PER_LONG_LONG))
+#define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG)
+#define BITS_PER_BYTE 8
+
+/*
+ * Create a contiguous bitmask starting at bit position @l and ending at
+ * position @h. For example
+ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
+ */
+#define GENMASK(h, l) \
+ (((~0UL) - (1UL << (l)) + 1) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
+
+#define GENMASK_ULL(h, l) \
+ (((~0ULL) - (1ULL << (l)) + 1) & \
+ (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
+
+#endif /* __LINUX_BITS_H */
diff --git a/tools/include/linux/bug.h b/tools/include/linux/bug.h
new file mode 100644
index 000000000..85f80258a
--- /dev/null
+++ b/tools/include/linux/bug.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_PERF_LINUX_BUG_H
+#define _TOOLS_PERF_LINUX_BUG_H
+
+/* Force a compilation error if condition is true, but also produce a
+ result (of value 0 and type size_t), so the expression can be used
+ e.g. in a structure initializer (or where-ever else comma expressions
+ aren't permitted). */
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+
+#endif /* _TOOLS_PERF_LINUX_BUG_H */
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
new file mode 100644
index 000000000..0d35f1800
--- /dev/null
+++ b/tools/include/linux/compiler-gcc.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_COMPILER_H_
+#error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead."
+#endif
+
+/*
+ * Common definitions for all gcc versions go here.
+ */
+#define GCC_VERSION (__GNUC__ * 10000 \
+ + __GNUC_MINOR__ * 100 \
+ + __GNUC_PATCHLEVEL__)
+
+#if GCC_VERSION >= 70000 && !defined(__CHECKER__)
+# define __fallthrough __attribute__ ((fallthrough))
+#endif
+
+#if GCC_VERSION >= 40300
+# define __compiletime_error(message) __attribute__((error(message)))
+#endif /* GCC_VERSION >= 40300 */
+
+/* &a[0] degrades to a pointer: a different type from an array */
+#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0]))
+
+#ifndef __pure
+#define __pure __attribute__((pure))
+#endif
+#define noinline __attribute__((noinline))
+#ifndef __packed
+#define __packed __attribute__((packed))
+#endif
+#ifndef __noreturn
+#define __noreturn __attribute__((noreturn))
+#endif
+#ifndef __aligned
+#define __aligned(x) __attribute__((aligned(x)))
+#endif
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+#define __scanf(a, b) __attribute__((format(scanf, a, b)))
+
+#if GCC_VERSION >= 50100
+#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1
+#endif
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
new file mode 100644
index 000000000..1827c2f97
--- /dev/null
+++ b/tools/include/linux/compiler.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_COMPILER_H_
+#define _TOOLS_LINUX_COMPILER_H_
+
+#ifdef __GNUC__
+#include <linux/compiler-gcc.h>
+#endif
+
+#ifndef __compiletime_error
+# define __compiletime_error(message)
+#endif
+
+/* Optimization barrier */
+/* The "volatile" is due to gcc bugs */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
+#ifndef __always_inline
+# define __always_inline inline __attribute__((always_inline))
+#endif
+
+#ifndef noinline
+#define noinline
+#endif
+
+/* Are two types/vars the same type (ignoring qualifiers)? */
+#ifndef __same_type
+# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b))
+#endif
+
+#ifdef __ANDROID__
+/*
+ * FIXME: Big hammer to get rid of tons of:
+ * "warning: always_inline function might not be inlinable"
+ *
+ * At least on android-ndk-r12/platforms/android-24/arch-arm
+ */
+#undef __always_inline
+#define __always_inline inline
+#endif
+
+#define __user
+#define __rcu
+#define __read_mostly
+
+#ifndef __attribute_const__
+# define __attribute_const__
+#endif
+
+#ifndef __maybe_unused
+# define __maybe_unused __attribute__((unused))
+#endif
+
+#ifndef __used
+# define __used __attribute__((__unused__))
+#endif
+
+#ifndef __packed
+# define __packed __attribute__((__packed__))
+#endif
+
+#ifndef __force
+# define __force
+#endif
+
+#ifndef __weak
+# define __weak __attribute__((weak))
+#endif
+
+#ifndef likely
+# define likely(x) __builtin_expect(!!(x), 1)
+#endif
+
+#ifndef unlikely
+# define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+
+#ifndef __init
+# define __init
+#endif
+
+#ifndef noinline
+# define noinline
+#endif
+
+#define uninitialized_var(x) x = *(&(x))
+
+#include <linux/types.h>
+
+/*
+ * Following functions are taken from kernel sources and
+ * break aliasing rules in their original form.
+ *
+ * While kernel is compiled with -fno-strict-aliasing,
+ * perf uses -Wstrict-aliasing=3 which makes build fail
+ * under gcc 4.4.
+ *
+ * Using extra __may_alias__ type to allow aliasing
+ * in this case.
+ */
+typedef __u8 __attribute__((__may_alias__)) __u8_alias_t;
+typedef __u16 __attribute__((__may_alias__)) __u16_alias_t;
+typedef __u32 __attribute__((__may_alias__)) __u32_alias_t;
+typedef __u64 __attribute__((__may_alias__)) __u64_alias_t;
+
+static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(__u8_alias_t *) res = *(volatile __u8_alias_t *) p; break;
+ case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break;
+ case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break;
+ case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break;
+ default:
+ barrier();
+ __builtin_memcpy((void *)res, (const void *)p, size);
+ barrier();
+ }
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(volatile __u8_alias_t *) p = *(__u8_alias_t *) res; break;
+ case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break;
+ case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break;
+ case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break;
+ default:
+ barrier();
+ __builtin_memcpy((void *)p, (const void *)res, size);
+ barrier();
+ }
+}
+
+/*
+ * Prevent the compiler from merging or refetching reads or writes. The
+ * compiler is also forbidden from reordering successive instances of
+ * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some
+ * particular ordering. One way to make the compiler aware of ordering is to
+ * put the two invocations of READ_ONCE or WRITE_ONCE in different C
+ * statements.
+ *
+ * These two macros will also work on aggregate data types like structs or
+ * unions. If the size of the accessed data type exceeds the word size of
+ * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will
+ * fall back to memcpy and print a compile-time warning.
+ *
+ * Their two major use cases are: (1) Mediating communication between
+ * process-level code and irq/NMI handlers, all running on the same CPU,
+ * and (2) Ensuring that the compiler does not fold, spindle, or otherwise
+ * mutilate accesses that either do not require ordering or that interact
+ * with an explicit memory barrier or atomic instruction that provides the
+ * required ordering.
+ */
+
+#define READ_ONCE(x) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__c = { 0 } }; \
+ __read_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+#define WRITE_ONCE(x, val) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__val = (val) }; \
+ __write_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+
+#ifndef __fallthrough
+# define __fallthrough
+#endif
+
+#endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h
new file mode 100644
index 000000000..a1a959ba2
--- /dev/null
+++ b/tools/include/linux/coresight-pmu.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef _LINUX_CORESIGHT_PMU_H
+#define _LINUX_CORESIGHT_PMU_H
+
+#define CORESIGHT_ETM_PMU_NAME "cs_etm"
+#define CORESIGHT_ETM_PMU_SEED 0x10
+
+/* ETMv3.5/PTM's ETMCR config bit */
+#define ETM_OPT_CYCACC 12
+#define ETM_OPT_TS 28
+#define ETM_OPT_RETSTK 29
+
+/* ETMv4 CONFIGR programming bits for the ETM OPTs */
+#define ETM4_CFG_BIT_CYCACC 4
+#define ETM4_CFG_BIT_TS 11
+#define ETM4_CFG_BIT_RETSTK 12
+
+static inline int coresight_get_trace_id(int cpu)
+{
+ /*
+ * A trace ID of value 0 is invalid, so let's start at some
+ * random value that fits in 7 bits and go from there. Since
+ * the common convention is to have data trace IDs be I(N) + 1,
+ * set instruction trace IDs as a function of the CPU number.
+ */
+ return (CORESIGHT_ETM_PMU_SEED + (cpu * 2));
+}
+
+#endif
diff --git a/tools/include/linux/debug_locks.h b/tools/include/linux/debug_locks.h
new file mode 100644
index 000000000..72d595ce7
--- /dev/null
+++ b/tools/include/linux/debug_locks.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_DEBUG_LOCKS_H_
+#define _LIBLOCKDEP_DEBUG_LOCKS_H_
+
+#include <stddef.h>
+#include <linux/compiler.h>
+#include <asm/bug.h>
+
+#define DEBUG_LOCKS_WARN_ON(x) WARN_ON(x)
+
+extern bool debug_locks;
+extern bool debug_locks_silent;
+
+#endif
diff --git a/tools/include/linux/delay.h b/tools/include/linux/delay.h
new file mode 100644
index 000000000..55aa4173a
--- /dev/null
+++ b/tools/include/linux/delay.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_DELAY_H
+#define _TOOLS_INCLUDE_LINUX_DELAY_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_DELAY_H */
diff --git a/tools/include/linux/err.h b/tools/include/linux/err.h
new file mode 100644
index 000000000..7a8b61ad4
--- /dev/null
+++ b/tools/include/linux/err.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_LINUX_ERR_H
+#define __TOOLS_LINUX_ERR_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include <asm/errno.h>
+
+/*
+ * Original kernel header comment:
+ *
+ * Kernel pointers have redundant information, so we can use a
+ * scheme where we can return either an error code or a normal
+ * pointer with the same return value.
+ *
+ * This should be a per-architecture thing, to allow different
+ * error and pointer decisions.
+ *
+ * Userspace note:
+ * The same principle works for userspace, because 'error' pointers
+ * fall down to the unused hole far from user space, as described
+ * in Documentation/x86/x86_64/mm.txt for x86_64 arch:
+ *
+ * 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension
+ * ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
+ *
+ * It should be the same case for other architectures, because
+ * this code is used in generic kernel code.
+ */
+#define MAX_ERRNO 4095
+
+#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void * __must_check ERR_PTR(long error_)
+{
+ return (void *) error_;
+}
+
+static inline long __must_check PTR_ERR(__force const void *ptr)
+{
+ return (long) ptr;
+}
+
+static inline bool __must_check IS_ERR(__force const void *ptr)
+{
+ return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr)
+{
+ return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr);
+}
+
+#endif /* _LINUX_ERR_H */
diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h
new file mode 100644
index 000000000..d07e586b9
--- /dev/null
+++ b/tools/include/linux/export.h
@@ -0,0 +1,10 @@
+#ifndef _TOOLS_LINUX_EXPORT_H_
+#define _TOOLS_LINUX_EXPORT_H_
+
+#define EXPORT_SYMBOL(sym)
+#define EXPORT_SYMBOL_GPL(sym)
+#define EXPORT_SYMBOL_GPL_FUTURE(sym)
+#define EXPORT_UNUSED_SYMBOL(sym)
+#define EXPORT_UNUSED_SYMBOL_GPL(sym)
+
+#endif
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
new file mode 100644
index 000000000..af55acf73
--- /dev/null
+++ b/tools/include/linux/filter.h
@@ -0,0 +1,286 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Socket Filter Data Structures
+ */
+#ifndef __TOOLS_LINUX_FILTER_H
+#define __TOOLS_LINUX_FILTER_H
+
+#include <linux/bpf.h>
+
+/* ArgX, context and stack frame pointer register positions. Note,
+ * Arg1, Arg2, Arg3, etc are used as argument mappings of function
+ * calls in BPF_CALL instruction.
+ */
+#define BPF_REG_ARG1 BPF_REG_1
+#define BPF_REG_ARG2 BPF_REG_2
+#define BPF_REG_ARG3 BPF_REG_3
+#define BPF_REG_ARG4 BPF_REG_4
+#define BPF_REG_ARG5 BPF_REG_5
+#define BPF_REG_CTX BPF_REG_6
+#define BPF_REG_FP BPF_REG_10
+
+/* Additional register mappings for converted user programs. */
+#define BPF_REG_A BPF_REG_0
+#define BPF_REG_X BPF_REG_7
+#define BPF_REG_TMP BPF_REG_8
+
+/* BPF program can access up to 512 bytes of stack space. */
+#define MAX_BPF_STACK 512
+
+/* Helper macros for filter block array initializers. */
+
+/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
+
+#define BPF_ALU64_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_ALU32_REG(OP, DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
+
+#define BPF_ALU64_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_ALU32_IMM(OP, DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
+
+#define BPF_ENDIAN(TYPE, DST, LEN) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_END | BPF_SRC(TYPE), \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = LEN })
+
+/* Short form of mov, dst_reg = src_reg */
+
+#define BPF_MOV64_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+#define BPF_MOV32_REG(DST, SRC) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = 0 })
+
+/* Short form of mov, dst_reg = imm32 */
+
+#define BPF_MOV64_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_MOV32_IMM(DST, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
+
+#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE), \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = IMM })
+
+#define BPF_MOV32_RAW(TYPE, DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ALU | BPF_MOV | BPF_SRC(TYPE), \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
+
+#define BPF_LD_ABS(SIZE, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */
+
+#define BPF_LD_IND(SIZE, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_SIZE(SIZE) | BPF_IND, \
+ .dst_reg = 0, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = IMM })
+
+/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
+
+#define BPF_LDX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
+
+#define BPF_STX_MEM(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
+
+#define BPF_ST_MEM(SIZE, DST, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
+
+#define BPF_JMP_REG(OP, DST, SRC, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_X, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
+
+#define BPF_JMP_IMM(OP, DST, IMM, OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_OP(OP) | BPF_K, \
+ .dst_reg = DST, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* Unconditional jumps, goto pc + off16 */
+
+#define BPF_JMP_A(OFF) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_JA, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = OFF, \
+ .imm = 0 })
+
+/* Function call */
+
+#define BPF_EMIT_CALL(FUNC) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_CALL, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((FUNC) - BPF_FUNC_unspec) })
+
+/* Raw code statement block */
+
+#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM) \
+ ((struct bpf_insn) { \
+ .code = CODE, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = OFF, \
+ .imm = IMM })
+
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+
+#define BPF_LD_IMM64(DST, IMM) \
+ BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \
+ ((struct bpf_insn) { \
+ .code = BPF_LD | BPF_DW | BPF_IMM, \
+ .dst_reg = DST, \
+ .src_reg = SRC, \
+ .off = 0, \
+ .imm = (__u32) (IMM) }), \
+ ((struct bpf_insn) { \
+ .code = 0, /* zero is reserved opcode */ \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = ((__u64) (IMM)) >> 32 })
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+
+#define BPF_LD_MAP_FD(DST, MAP_FD) \
+ BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
+/* Relative call */
+
+#define BPF_CALL_REL(TGT) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_CALL, \
+ .dst_reg = 0, \
+ .src_reg = BPF_PSEUDO_CALL, \
+ .off = 0, \
+ .imm = TGT })
+
+/* Program exit */
+
+#define BPF_EXIT_INSN() \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_EXIT, \
+ .dst_reg = 0, \
+ .src_reg = 0, \
+ .off = 0, \
+ .imm = 0 })
+
+#endif /* __TOOLS_LINUX_FILTER_H */
diff --git a/tools/include/linux/ftrace.h b/tools/include/linux/ftrace.h
new file mode 100644
index 000000000..949f541ce
--- /dev/null
+++ b/tools/include/linux/ftrace.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_FTRACE_H
+#define _TOOLS_INCLUDE_LINUX_FTRACE_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_FTRACE_H */
diff --git a/tools/include/linux/gfp.h b/tools/include/linux/gfp.h
new file mode 100644
index 000000000..22030756f
--- /dev/null
+++ b/tools/include/linux/gfp.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_GFP_H
+#define _TOOLS_INCLUDE_LINUX_GFP_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_GFP_H */
diff --git a/tools/include/linux/hardirq.h b/tools/include/linux/hardirq.h
new file mode 100644
index 000000000..b25580b6a
--- /dev/null
+++ b/tools/include/linux/hardirq.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_LINUX_HARDIRQ_H_
+#define _LIBLOCKDEP_LINUX_HARDIRQ_H_
+
+#define SOFTIRQ_BITS 0UL
+#define HARDIRQ_BITS 0UL
+#define SOFTIRQ_SHIFT 0UL
+#define HARDIRQ_SHIFT 0UL
+#define hardirq_count() 0UL
+#define softirq_count() 0UL
+
+#endif
diff --git a/tools/include/linux/hash.h b/tools/include/linux/hash.h
new file mode 100644
index 000000000..ad6fa21d9
--- /dev/null
+++ b/tools/include/linux/hash.h
@@ -0,0 +1,104 @@
+#ifndef _LINUX_HASH_H
+#define _LINUX_HASH_H
+/* Fast hashing routine for ints, longs and pointers.
+ (C) 2002 Nadia Yvette Chambers, IBM */
+
+#include <asm/types.h>
+#include <linux/compiler.h>
+
+/*
+ * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and
+ * fs/inode.c. It's not actually prime any more (the previous primes
+ * were actively bad for hashing), but the name remains.
+ */
+#if BITS_PER_LONG == 32
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32
+#define hash_long(val, bits) hash_32(val, bits)
+#elif BITS_PER_LONG == 64
+#define hash_long(val, bits) hash_64(val, bits)
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64
+#else
+#error Wordsize not 32 or 64
+#endif
+
+/*
+ * This hash multiplies the input by a large odd number and takes the
+ * high bits. Since multiplication propagates changes to the most
+ * significant end only, it is essential that the high bits of the
+ * product be used for the hash value.
+ *
+ * Chuck Lever verified the effectiveness of this technique:
+ * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
+ *
+ * Although a random odd number will do, it turns out that the golden
+ * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice
+ * properties. (See Knuth vol 3, section 6.4, exercise 9.)
+ *
+ * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2,
+ * which is very slightly easier to multiply by and makes no
+ * difference to the hash distribution.
+ */
+#define GOLDEN_RATIO_32 0x61C88647
+#define GOLDEN_RATIO_64 0x61C8864680B583EBull
+
+#ifdef CONFIG_HAVE_ARCH_HASH
+/* This header may use the GOLDEN_RATIO_xx constants */
+#include <asm/hash.h>
+#endif
+
+/*
+ * The _generic versions exist only so lib/test_hash.c can compare
+ * the arch-optimized versions with the generic.
+ *
+ * Note that if you change these, any <asm/hash.h> that aren't updated
+ * to match need to have their HAVE_ARCH_* define values updated so the
+ * self-test will not false-positive.
+ */
+#ifndef HAVE_ARCH__HASH_32
+#define __hash_32 __hash_32_generic
+#endif
+static inline u32 __hash_32_generic(u32 val)
+{
+ return val * GOLDEN_RATIO_32;
+}
+
+#ifndef HAVE_ARCH_HASH_32
+#define hash_32 hash_32_generic
+#endif
+static inline u32 hash_32_generic(u32 val, unsigned int bits)
+{
+ /* High bits are more random, so use them. */
+ return __hash_32(val) >> (32 - bits);
+}
+
+#ifndef HAVE_ARCH_HASH_64
+#define hash_64 hash_64_generic
+#endif
+static __always_inline u32 hash_64_generic(u64 val, unsigned int bits)
+{
+#if BITS_PER_LONG == 64
+ /* 64x64-bit multiply is efficient on all 64-bit processors */
+ return val * GOLDEN_RATIO_64 >> (64 - bits);
+#else
+ /* Hash 64 bits using only 32x32-bit multiply. */
+ return hash_32((u32)val ^ __hash_32(val >> 32), bits);
+#endif
+}
+
+static inline u32 hash_ptr(const void *ptr, unsigned int bits)
+{
+ return hash_long((unsigned long)ptr, bits);
+}
+
+/* This really should be called fold32_ptr; it does no hashing to speak of. */
+static inline u32 hash32_ptr(const void *ptr)
+{
+ unsigned long val = (unsigned long)ptr;
+
+#if BITS_PER_LONG == 64
+ val ^= (val >> 32);
+#endif
+ return (u32)val;
+}
+
+#endif /* _LINUX_HASH_H */
diff --git a/tools/include/linux/hashtable.h b/tools/include/linux/hashtable.h
new file mode 100644
index 000000000..434dd5ac6
--- /dev/null
+++ b/tools/include/linux/hashtable.h
@@ -0,0 +1,149 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Statically sized hash table implementation
+ * (C) 2012 Sasha Levin <levinsasha928@gmail.com>
+ */
+
+#ifndef _LINUX_HASHTABLE_H
+#define _LINUX_HASHTABLE_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/hash.h>
+#include <linux/log2.h>
+
+#define DEFINE_HASHTABLE(name, bits) \
+ struct hlist_head name[1 << (bits)] = \
+ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
+
+#define DECLARE_HASHTABLE(name, bits) \
+ struct hlist_head name[1 << (bits)]
+
+#define HASH_SIZE(name) (ARRAY_SIZE(name))
+#define HASH_BITS(name) ilog2(HASH_SIZE(name))
+
+/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */
+#define hash_min(val, bits) \
+ (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits))
+
+static inline void __hash_init(struct hlist_head *ht, unsigned int sz)
+{
+ unsigned int i;
+
+ for (i = 0; i < sz; i++)
+ INIT_HLIST_HEAD(&ht[i]);
+}
+
+/**
+ * hash_init - initialize a hash table
+ * @hashtable: hashtable to be initialized
+ *
+ * Calculates the size of the hashtable from the given parameter, otherwise
+ * same as hash_init_size.
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable))
+
+/**
+ * hash_add - add an object to a hashtable
+ * @hashtable: hashtable to add to
+ * @node: the &struct hlist_node of the object to be added
+ * @key: the key of the object to be added
+ */
+#define hash_add(hashtable, node, key) \
+ hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))])
+
+/**
+ * hash_hashed - check whether an object is in any hashtable
+ * @node: the &struct hlist_node of the object to be checked
+ */
+static inline bool hash_hashed(struct hlist_node *node)
+{
+ return !hlist_unhashed(node);
+}
+
+static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz)
+{
+ unsigned int i;
+
+ for (i = 0; i < sz; i++)
+ if (!hlist_empty(&ht[i]))
+ return false;
+
+ return true;
+}
+
+/**
+ * hash_empty - check whether a hashtable is empty
+ * @hashtable: hashtable to check
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable))
+
+/**
+ * hash_del - remove an object from a hashtable
+ * @node: &struct hlist_node of the object to remove
+ */
+static inline void hash_del(struct hlist_node *node)
+{
+ hlist_del_init(node);
+}
+
+/**
+ * hash_for_each - iterate over a hashtable
+ * @name: hashtable to iterate
+ * @bkt: integer to use as bucket loop cursor
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ */
+#define hash_for_each(name, bkt, obj, member) \
+ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
+ (bkt)++)\
+ hlist_for_each_entry(obj, &name[bkt], member)
+
+/**
+ * hash_for_each_safe - iterate over a hashtable safe against removal of
+ * hash entry
+ * @name: hashtable to iterate
+ * @bkt: integer to use as bucket loop cursor
+ * @tmp: a &struct used for temporary storage
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ */
+#define hash_for_each_safe(name, bkt, tmp, obj, member) \
+ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
+ (bkt)++)\
+ hlist_for_each_entry_safe(obj, tmp, &name[bkt], member)
+
+/**
+ * hash_for_each_possible - iterate over all possible objects hashing to the
+ * same bucket
+ * @name: hashtable to iterate
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ * @key: the key of the objects to iterate over
+ */
+#define hash_for_each_possible(name, obj, member, key) \
+ hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member)
+
+/**
+ * hash_for_each_possible_safe - iterate over all possible objects hashing to the
+ * same bucket safe against removals
+ * @name: hashtable to iterate
+ * @obj: the type * to use as a loop cursor for each entry
+ * @tmp: a &struct used for temporary storage
+ * @member: the name of the hlist_node within the struct
+ * @key: the key of the objects to iterate over
+ */
+#define hash_for_each_possible_safe(name, obj, tmp, member, key) \
+ hlist_for_each_entry_safe(obj, tmp,\
+ &name[hash_min(key, HASH_BITS(name))], member)
+
+
+#endif
diff --git a/tools/include/linux/interrupt.h b/tools/include/linux/interrupt.h
new file mode 100644
index 000000000..6be25bbdc
--- /dev/null
+++ b/tools/include/linux/interrupt.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_INTERRUPT_H
+#define _TOOLS_INCLUDE_LINUX_INTERRUPT_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_INTERRUPT_H */
diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h
new file mode 100644
index 000000000..e734da3e5
--- /dev/null
+++ b/tools/include/linux/irqflags.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
+#define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
+
+# define trace_hardirq_context(p) 0
+# define trace_softirq_context(p) 0
+# define trace_hardirqs_enabled(p) 0
+# define trace_softirqs_enabled(p) 0
+# define trace_hardirq_enter() do { } while (0)
+# define trace_hardirq_exit() do { } while (0)
+# define lockdep_softirq_enter() do { } while (0)
+# define lockdep_softirq_exit() do { } while (0)
+# define INIT_TRACE_IRQFLAGS
+
+# define stop_critical_timings() do { } while (0)
+# define start_critical_timings() do { } while (0)
+
+#define raw_local_irq_disable() do { } while (0)
+#define raw_local_irq_enable() do { } while (0)
+#define raw_local_irq_save(flags) ((flags) = 0)
+#define raw_local_irq_restore(flags) ((void)(flags))
+#define raw_local_save_flags(flags) ((flags) = 0)
+#define raw_irqs_disabled_flags(flags) ((void)(flags))
+#define raw_irqs_disabled() 0
+#define raw_safe_halt()
+
+#define local_irq_enable() do { } while (0)
+#define local_irq_disable() do { } while (0)
+#define local_irq_save(flags) ((flags) = 0)
+#define local_irq_restore(flags) ((void)(flags))
+#define local_save_flags(flags) ((flags) = 0)
+#define irqs_disabled() (1)
+#define irqs_disabled_flags(flags) ((void)(flags), 0)
+#define safe_halt() do { } while (0)
+
+#define trace_lock_release(x, y)
+#define trace_lock_acquire(a, b, c, d, e, f, g)
+
+#endif
diff --git a/tools/include/linux/jhash.h b/tools/include/linux/jhash.h
new file mode 100644
index 000000000..348c6f47e
--- /dev/null
+++ b/tools/include/linux/jhash.h
@@ -0,0 +1,175 @@
+#ifndef _LINUX_JHASH_H
+#define _LINUX_JHASH_H
+
+/* jhash.h: Jenkins hash support.
+ *
+ * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions. Routines to test the hash are included
+ * if SELF_TEST is defined. You can use this free for any purpose. It's in
+ * the public domain. It has no warranty.
+ *
+ * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are my fault.
+ * Jozsef
+ */
+#include <linux/bitops.h>
+#include <linux/unaligned/packed_struct.h>
+
+/* Best hash sizes are of power of two */
+#define jhash_size(n) ((u32)1<<(n))
+/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
+#define jhash_mask(n) (jhash_size(n)-1)
+
+/* __jhash_mix -- mix 3 32-bit values reversibly. */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+/* An arbitrary initial parameter */
+#define JHASH_INITVAL 0xdeadbeef
+
+/* jhash - hash an arbitrary key
+ * @k: sequence of bytes as key
+ * @length: the length of the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * The generic version, hashes an arbitrary sequence of bytes.
+ * No alignment or length assumptions are made about the input key.
+ *
+ * Returns the hash value of the key. The result depends on endianness.
+ */
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const u8 *k = key;
+
+ /* Set up the internal state */
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ /* All but the last block: affect some 32 bits of (a,b,c) */
+ while (length > 12) {
+ a += __get_unaligned_cpu32(k);
+ b += __get_unaligned_cpu32(k + 4);
+ c += __get_unaligned_cpu32(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ /* Last block: affect all 32 bits of (c) */
+ /* All the case statements fall through */
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+/* jhash2 - hash an array of u32's
+ * @k: the key which must be an array of u32's
+ * @length: the number of u32's in the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * Returns the hash value of the key.
+ */
+static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
+{
+ u32 a, b, c;
+
+ /* Set up the internal state */
+ a = b = c = JHASH_INITVAL + (length<<2) + initval;
+
+ /* Handle most of the key */
+ while (length > 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ __jhash_mix(a, b, c);
+ length -= 3;
+ k += 3;
+ }
+
+ /* Handle the last 3 u32's: all the case statements fall through */
+ switch (length) {
+ case 3: c += k[2];
+ case 2: b += k[1];
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+
+/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
+static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+
+ __jhash_final(a, b, c);
+
+ return c;
+}
+
+static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
+{
+ return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2));
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+static inline u32 jhash_1word(u32 a, u32 initval)
+{
+ return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2));
+}
+
+#endif /* _LINUX_JHASH_H */
diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h
new file mode 100644
index 000000000..89ca6fe25
--- /dev/null
+++ b/tools/include/linux/kallsyms.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_LINUX_KALLSYMS_H_
+#define _LIBLOCKDEP_LINUX_KALLSYMS_H_
+
+#include <linux/kernel.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#define KSYM_NAME_LEN 128
+
+struct module;
+
+static inline const char *kallsyms_lookup(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset,
+ char **modname, char *namebuf)
+{
+ return NULL;
+}
+
+#include <execinfo.h>
+#include <stdlib.h>
+static inline void print_ip_sym(unsigned long ip)
+{
+ char **name;
+
+ name = backtrace_symbols((void **)&ip, 1);
+
+ dprintf(STDOUT_FILENO, "%s\n", *name);
+
+ free(name);
+}
+
+#endif
diff --git a/tools/include/linux/kern_levels.h b/tools/include/linux/kern_levels.h
new file mode 100644
index 000000000..778ecb984
--- /dev/null
+++ b/tools/include/linux/kern_levels.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KERN_LEVELS_H__
+#define __KERN_LEVELS_H__
+
+#define KERN_SOH "" /* ASCII Start Of Header */
+#define KERN_SOH_ASCII ''
+
+#define KERN_EMERG KERN_SOH "" /* system is unusable */
+#define KERN_ALERT KERN_SOH "" /* action must be taken immediately */
+#define KERN_CRIT KERN_SOH "" /* critical conditions */
+#define KERN_ERR KERN_SOH "" /* error conditions */
+#define KERN_WARNING KERN_SOH "" /* warning conditions */
+#define KERN_NOTICE KERN_SOH "" /* normal but significant condition */
+#define KERN_INFO KERN_SOH "" /* informational */
+#define KERN_DEBUG KERN_SOH "" /* debug-level messages */
+
+#define KERN_DEFAULT KERN_SOH "" /* the default kernel loglevel */
+
+/*
+ * Annotation for a "continued" line of log printout (only done after a
+ * line that had no enclosing \n). Only to be used by core/arch code
+ * during early bootup (a continued line is not SMP-safe otherwise).
+ */
+#define KERN_CONT ""
+
+#endif
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
new file mode 100644
index 000000000..0ad884452
--- /dev/null
+++ b/tools/include/linux/kernel.h
@@ -0,0 +1,120 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_LINUX_KERNEL_H
+#define __TOOLS_LINUX_KERNEL_H
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <assert.h>
+#include <linux/compiler.h>
+#include <endian.h>
+#include <byteswap.h>
+
+#ifndef UINT_MAX
+#define UINT_MAX (~0U)
+#endif
+
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+
+#define PERF_ALIGN(x, a) __PERF_ALIGN_MASK(x, (typeof(x))(a)-1)
+#define __PERF_ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+#ifndef container_of
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr: the pointer to the member.
+ * @type: the type of the container struct this is embedded in.
+ * @member: the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({ \
+ const typeof(((type *)0)->member) * __mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)); })
+#endif
+
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
+
+#ifndef max
+#define max(x, y) ({ \
+ typeof(x) _max1 = (x); \
+ typeof(y) _max2 = (y); \
+ (void) (&_max1 == &_max2); \
+ _max1 > _max2 ? _max1 : _max2; })
+#endif
+
+#ifndef min
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+#endif
+
+#ifndef roundup
+#define roundup(x, y) ( \
+{ \
+ const typeof(y) __y = y; \
+ (((x) + (__y - 1)) / __y) * __y; \
+} \
+)
+#endif
+
+#ifndef BUG_ON
+#ifdef NDEBUG
+#define BUG_ON(cond) do { if (cond) {} } while (0)
+#else
+#define BUG_ON(cond) assert(!(cond))
+#endif
+#endif
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define cpu_to_le16 bswap_16
+#define cpu_to_le32 bswap_32
+#define cpu_to_le64 bswap_64
+#define le16_to_cpu bswap_16
+#define le32_to_cpu bswap_32
+#define le64_to_cpu bswap_64
+#define cpu_to_be16
+#define cpu_to_be32
+#define cpu_to_be64
+#define be16_to_cpu
+#define be32_to_cpu
+#define be64_to_cpu
+#else
+#define cpu_to_le16
+#define cpu_to_le32
+#define cpu_to_le64
+#define le16_to_cpu
+#define le32_to_cpu
+#define le64_to_cpu
+#define cpu_to_be16 bswap_16
+#define cpu_to_be32 bswap_32
+#define cpu_to_be64 bswap_64
+#define be16_to_cpu bswap_16
+#define be32_to_cpu bswap_32
+#define be64_to_cpu bswap_64
+#endif
+
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args);
+int scnprintf(char * buf, size_t size, const char * fmt, ...);
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
+
+/*
+ * This looks more complex than it should be. But we need to
+ * get the type for the ~ right in round_down (it needs to be
+ * as wide as the result!), and we want to evaluate the macro
+ * arguments just once each.
+ */
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#define round_down(x, y) ((x) & ~__round_mask(x, y))
+
+#define current_gfp_context(k) 0
+#define synchronize_sched()
+
+#endif
diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h
new file mode 100644
index 000000000..bc763d500
--- /dev/null
+++ b/tools/include/linux/linkage.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_LINKAGE_H
+#define _TOOLS_INCLUDE_LINUX_LINKAGE_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */
diff --git a/tools/include/linux/list.h b/tools/include/linux/list.h
new file mode 100644
index 000000000..b2fc48d54
--- /dev/null
+++ b/tools/include/linux/list.h
@@ -0,0 +1,772 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_LINUX_LIST_H
+#define __TOOLS_LINUX_LIST_H
+
+#include <linux/types.h>
+#include <linux/poison.h>
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+#else
+extern void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next);
+#endif
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+
+/**
+ * list_add_tail - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it before
+ *
+ * Insert a new entry before the specified head.
+ * This is useful for implementing queues.
+ */
+static inline void list_add_tail(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head->prev, head);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ WRITE_ONCE(prev->next, next);
+}
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty() on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+#ifndef CONFIG_DEBUG_LIST
+static inline void __list_del_entry(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = LIST_POISON1;
+ entry->prev = LIST_POISON2;
+}
+#else
+extern void __list_del_entry(struct list_head *entry);
+extern void list_del(struct list_head *entry);
+#endif
+
+/**
+ * list_replace - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ *
+ * If @old was empty, it will be overwritten.
+ */
+static inline void list_replace(struct list_head *old,
+ struct list_head *new)
+{
+ new->next = old->next;
+ new->next->prev = new;
+ new->prev = old->prev;
+ new->prev->next = new;
+}
+
+static inline void list_replace_init(struct list_head *old,
+ struct list_head *new)
+{
+ list_replace(old, new);
+ INIT_LIST_HEAD(old);
+}
+
+/**
+ * list_del_init - deletes entry from list and reinitialize it.
+ * @entry: the element to delete from the list.
+ */
+static inline void list_del_init(struct list_head *entry)
+{
+ __list_del_entry(entry);
+ INIT_LIST_HEAD(entry);
+}
+
+/**
+ * list_move - delete from one list and add as another's head
+ * @list: the entry to move
+ * @head: the head that will precede our entry
+ */
+static inline void list_move(struct list_head *list, struct list_head *head)
+{
+ __list_del_entry(list);
+ list_add(list, head);
+}
+
+/**
+ * list_move_tail - delete from one list and add as another's tail
+ * @list: the entry to move
+ * @head: the head that will follow our entry
+ */
+static inline void list_move_tail(struct list_head *list,
+ struct list_head *head)
+{
+ __list_del_entry(list);
+ list_add_tail(list, head);
+}
+
+/**
+ * list_is_last - tests whether @list is the last entry in list @head
+ * @list: the entry to test
+ * @head: the head of the list
+ */
+static inline int list_is_last(const struct list_head *list,
+ const struct list_head *head)
+{
+ return list->next == head;
+}
+
+/**
+ * list_empty - tests whether a list is empty
+ * @head: the list to test.
+ */
+static inline int list_empty(const struct list_head *head)
+{
+ return head->next == head;
+}
+
+/**
+ * list_empty_careful - tests whether a list is empty and not being modified
+ * @head: the list to test
+ *
+ * Description:
+ * tests whether a list is empty _and_ checks that no other CPU might be
+ * in the process of modifying either member (next or prev)
+ *
+ * NOTE: using list_empty_careful() without synchronization
+ * can only be safe if the only activity that can happen
+ * to the list entry is list_del_init(). Eg. it cannot be used
+ * if another CPU could re-list_add() it.
+ */
+static inline int list_empty_careful(const struct list_head *head)
+{
+ struct list_head *next = head->next;
+ return (next == head) && (next == head->prev);
+}
+
+/**
+ * list_rotate_left - rotate the list to the left
+ * @head: the head of the list
+ */
+static inline void list_rotate_left(struct list_head *head)
+{
+ struct list_head *first;
+
+ if (!list_empty(head)) {
+ first = head->next;
+ list_move_tail(first, head);
+ }
+}
+
+/**
+ * list_is_singular - tests whether a list has just one entry.
+ * @head: the list to test.
+ */
+static inline int list_is_singular(const struct list_head *head)
+{
+ return !list_empty(head) && (head->next == head->prev);
+}
+
+static inline void __list_cut_position(struct list_head *list,
+ struct list_head *head, struct list_head *entry)
+{
+ struct list_head *new_first = entry->next;
+ list->next = head->next;
+ list->next->prev = list;
+ list->prev = entry;
+ entry->next = list;
+ head->next = new_first;
+ new_first->prev = head;
+}
+
+/**
+ * list_cut_position - cut a list into two
+ * @list: a new list to add all removed entries
+ * @head: a list with entries
+ * @entry: an entry within head, could be the head itself
+ * and if so we won't cut the list
+ *
+ * This helper moves the initial part of @head, up to and
+ * including @entry, from @head to @list. You should
+ * pass on @entry an element you know is on @head. @list
+ * should be an empty list or a list you do not care about
+ * losing its data.
+ *
+ */
+static inline void list_cut_position(struct list_head *list,
+ struct list_head *head, struct list_head *entry)
+{
+ if (list_empty(head))
+ return;
+ if (list_is_singular(head) &&
+ (head->next != entry && head != entry))
+ return;
+ if (entry == head)
+ INIT_LIST_HEAD(list);
+ else
+ __list_cut_position(list, head, entry);
+}
+
+static inline void __list_splice(const struct list_head *list,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ struct list_head *first = list->next;
+ struct list_head *last = list->prev;
+
+ first->prev = prev;
+ prev->next = first;
+
+ last->next = next;
+ next->prev = last;
+}
+
+/**
+ * list_splice - join two lists, this is designed for stacks
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice(const struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head, head->next);
+}
+
+/**
+ * list_splice_tail - join two lists, each list being a queue
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ */
+static inline void list_splice_tail(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list))
+ __list_splice(list, head->prev, head);
+}
+
+/**
+ * list_splice_init - join two lists and reinitialise the emptied list.
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head, head->next);
+ INIT_LIST_HEAD(list);
+ }
+}
+
+/**
+ * list_splice_tail_init - join two lists and reinitialise the emptied list
+ * @list: the new list to add.
+ * @head: the place to add it in the first list.
+ *
+ * Each of the lists is a queue.
+ * The list at @list is reinitialised
+ */
+static inline void list_splice_tail_init(struct list_head *list,
+ struct list_head *head)
+{
+ if (!list_empty(list)) {
+ __list_splice(list, head->prev, head);
+ INIT_LIST_HEAD(list);
+ }
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_head within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+
+/**
+ * list_first_entry - get the first element from a list
+ * @ptr: the list head to take the element from.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_head within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+/**
+ * list_last_entry - get the last element from a list
+ * @ptr: the list head to take the element from.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_head within the struct.
+ *
+ * Note, that list is expected to be not empty.
+ */
+#define list_last_entry(ptr, type, member) \
+ list_entry((ptr)->prev, type, member)
+
+/**
+ * list_first_entry_or_null - get the first element from a list
+ * @ptr: the list head to take the element from.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_head within the struct.
+ *
+ * Note that if the list is empty, it returns NULL.
+ */
+#define list_first_entry_or_null(ptr, type, member) \
+ (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL)
+
+/**
+ * list_next_entry - get the next element in list
+ * @pos: the type * to cursor
+ * @member: the name of the list_head within the struct.
+ */
+#define list_next_entry(pos, member) \
+ list_entry((pos)->member.next, typeof(*(pos)), member)
+
+/**
+ * list_prev_entry - get the prev element in list
+ * @pos: the type * to cursor
+ * @member: the name of the list_head within the struct.
+ */
+#define list_prev_entry(pos, member) \
+ list_entry((pos)->member.prev, typeof(*(pos)), member)
+
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_prev - iterate over a list backwards
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @head: the head for your list.
+ */
+#define list_for_each_prev(pos, head) \
+ for (pos = (head)->prev; pos != (head); pos = pos->prev)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+/**
+ * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_prev_safe(pos, n, head) \
+ for (pos = (head)->prev, n = pos->prev; \
+ pos != (head); \
+ pos = n, n = pos->prev)
+
+/**
+ * list_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_head within the struct.
+ */
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_first_entry(head, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_next_entry(pos, member))
+
+/**
+ * list_for_each_entry_reverse - iterate backwards over list of given type.
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_head within the struct.
+ */
+#define list_for_each_entry_reverse(pos, head, member) \
+ for (pos = list_last_entry(head, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_prev_entry(pos, member))
+
+/**
+ * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue()
+ * @pos: the type * to use as a start point
+ * @head: the head of the list
+ * @member: the name of the list_head within the struct.
+ *
+ * Prepares a pos entry for use as a start point in list_for_each_entry_continue().
+ */
+#define list_prepare_entry(pos, head, member) \
+ ((pos) ? : list_entry(head, typeof(*pos), member))
+
+/**
+ * list_for_each_entry_continue - continue iteration over list of given type
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_head within the struct.
+ *
+ * Continue to iterate over list of given type, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue(pos, head, member) \
+ for (pos = list_next_entry(pos, member); \
+ &pos->member != (head); \
+ pos = list_next_entry(pos, member))
+
+/**
+ * list_for_each_entry_continue_reverse - iterate backwards from the given point
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_head within the struct.
+ *
+ * Start to iterate over list of given type backwards, continuing after
+ * the current position.
+ */
+#define list_for_each_entry_continue_reverse(pos, head, member) \
+ for (pos = list_prev_entry(pos, member); \
+ &pos->member != (head); \
+ pos = list_prev_entry(pos, member))
+
+/**
+ * list_for_each_entry_from - iterate over list of given type from the current point
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the list_head within the struct.
+ *
+ * Iterate over list of given type, continuing from current position.
+ */
+#define list_for_each_entry_from(pos, head, member) \
+ for (; &pos->member != (head); \
+ pos = list_next_entry(pos, member))
+
+/**
+ * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_head within the struct.
+ */
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_first_entry(head, typeof(*pos), member), \
+ n = list_next_entry(pos, member); \
+ &pos->member != (head); \
+ pos = n, n = list_next_entry(n, member))
+
+/**
+ * list_for_each_entry_safe_continue - continue list iteration safe against removal
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_head within the struct.
+ *
+ * Iterate over list of given type, continuing after current point,
+ * safe against removal of list entry.
+ */
+#define list_for_each_entry_safe_continue(pos, n, head, member) \
+ for (pos = list_next_entry(pos, member), \
+ n = list_next_entry(pos, member); \
+ &pos->member != (head); \
+ pos = n, n = list_next_entry(n, member))
+
+/**
+ * list_for_each_entry_safe_from - iterate over list from current point safe against removal
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_head within the struct.
+ *
+ * Iterate over list of given type from current point, safe against
+ * removal of list entry.
+ */
+#define list_for_each_entry_safe_from(pos, n, head, member) \
+ for (n = list_next_entry(pos, member); \
+ &pos->member != (head); \
+ pos = n, n = list_next_entry(n, member))
+
+/**
+ * list_for_each_entry_safe_reverse - iterate backwards over list safe against removal
+ * @pos: the type * to use as a loop cursor.
+ * @n: another type * to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the list_head within the struct.
+ *
+ * Iterate backwards over list of given type, safe against removal
+ * of list entry.
+ */
+#define list_for_each_entry_safe_reverse(pos, n, head, member) \
+ for (pos = list_last_entry(head, typeof(*pos), member), \
+ n = list_prev_entry(pos, member); \
+ &pos->member != (head); \
+ pos = n, n = list_prev_entry(n, member))
+
+/**
+ * list_safe_reset_next - reset a stale list_for_each_entry_safe loop
+ * @pos: the loop cursor used in the list_for_each_entry_safe loop
+ * @n: temporary storage used in list_for_each_entry_safe
+ * @member: the name of the list_head within the struct.
+ *
+ * list_safe_reset_next is not safe to use in general if the list may be
+ * modified concurrently (eg. the lock is dropped in the loop body). An
+ * exception to this is if the cursor element (pos) is pinned in the list,
+ * and list_safe_reset_next is called after re-taking the lock and before
+ * completing the current iteration of the loop body.
+ */
+#define list_safe_reset_next(pos, n, member) \
+ n = list_next_entry(pos, member)
+
+/*
+ * Double linked lists with a single pointer list head.
+ * Mostly useful for hash tables where the two pointer list head is
+ * too wasteful.
+ * You lose the ability to access the tail in O(1).
+ */
+
+#define HLIST_HEAD_INIT { .first = NULL }
+#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL }
+#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
+static inline void INIT_HLIST_NODE(struct hlist_node *h)
+{
+ h->next = NULL;
+ h->pprev = NULL;
+}
+
+static inline int hlist_unhashed(const struct hlist_node *h)
+{
+ return !h->pprev;
+}
+
+static inline int hlist_empty(const struct hlist_head *h)
+{
+ return !h->first;
+}
+
+static inline void __hlist_del(struct hlist_node *n)
+{
+ struct hlist_node *next = n->next;
+ struct hlist_node **pprev = n->pprev;
+
+ WRITE_ONCE(*pprev, next);
+ if (next)
+ next->pprev = pprev;
+}
+
+static inline void hlist_del(struct hlist_node *n)
+{
+ __hlist_del(n);
+ n->next = LIST_POISON1;
+ n->pprev = LIST_POISON2;
+}
+
+static inline void hlist_del_init(struct hlist_node *n)
+{
+ if (!hlist_unhashed(n)) {
+ __hlist_del(n);
+ INIT_HLIST_NODE(n);
+ }
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+ struct hlist_node *first = h->first;
+ n->next = first;
+ if (first)
+ first->pprev = &n->next;
+ h->first = n;
+ n->pprev = &h->first;
+}
+
+/* next must be != NULL */
+static inline void hlist_add_before(struct hlist_node *n,
+ struct hlist_node *next)
+{
+ n->pprev = next->pprev;
+ n->next = next;
+ next->pprev = &n->next;
+ *(n->pprev) = n;
+}
+
+static inline void hlist_add_behind(struct hlist_node *n,
+ struct hlist_node *prev)
+{
+ n->next = prev->next;
+ prev->next = n;
+ n->pprev = &prev->next;
+
+ if (n->next)
+ n->next->pprev = &n->next;
+}
+
+/* after that we'll appear to be on some hlist and hlist_del will work */
+static inline void hlist_add_fake(struct hlist_node *n)
+{
+ n->pprev = &n->next;
+}
+
+static inline bool hlist_fake(struct hlist_node *h)
+{
+ return h->pprev == &h->next;
+}
+
+/*
+ * Move a list from one list head to another. Fixup the pprev
+ * reference of the first entry if it exists.
+ */
+static inline void hlist_move_list(struct hlist_head *old,
+ struct hlist_head *new)
+{
+ new->first = old->first;
+ if (new->first)
+ new->first->pprev = &new->first;
+ old->first = NULL;
+}
+
+#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
+
+#define hlist_for_each(pos, head) \
+ for (pos = (head)->first; pos ; pos = pos->next)
+
+#define hlist_for_each_safe(pos, n, head) \
+ for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
+ pos = n)
+
+#define hlist_entry_safe(ptr, type, member) \
+ ({ typeof(ptr) ____ptr = (ptr); \
+ ____ptr ? hlist_entry(____ptr, type, member) : NULL; \
+ })
+
+/**
+ * hlist_for_each_entry - iterate over list of given type
+ * @pos: the type * to use as a loop cursor.
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry(pos, head, member) \
+ for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member);\
+ pos; \
+ pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
+
+/**
+ * hlist_for_each_entry_continue - iterate over a hlist continuing after current point
+ * @pos: the type * to use as a loop cursor.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue(pos, member) \
+ for (pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member);\
+ pos; \
+ pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
+
+/**
+ * hlist_for_each_entry_from - iterate over a hlist continuing from current point
+ * @pos: the type * to use as a loop cursor.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from(pos, member) \
+ for (; pos; \
+ pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
+
+/**
+ * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
+ * @pos: the type * to use as a loop cursor.
+ * @n: another &struct hlist_node to use as temporary storage
+ * @head: the head for your list.
+ * @member: the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_safe(pos, n, head, member) \
+ for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);\
+ pos && ({ n = pos->member.next; 1; }); \
+ pos = hlist_entry_safe(n, typeof(*pos), member))
+
+/**
+ * list_del_range - deletes range of entries from list.
+ * @begin: first element in the range to delete from the list.
+ * @end: last element in the range to delete from the list.
+ * Note: list_empty on the range of entries does not return true after this,
+ * the entries is in an undefined state.
+ */
+static inline void list_del_range(struct list_head *begin,
+ struct list_head *end)
+{
+ begin->prev->next = end->next;
+ end->next->prev = begin->prev;
+}
+
+/**
+ * list_for_each_from - iterate over a list from one of its nodes
+ * @pos: the &struct list_head to use as a loop cursor, from where to start
+ * @head: the head for your list.
+ */
+#define list_for_each_from(pos, head) \
+ for (; pos != (head); pos = pos->next)
+
+#endif /* __TOOLS_LINUX_LIST_H */
diff --git a/tools/include/linux/lockdep.h b/tools/include/linux/lockdep.h
new file mode 100644
index 000000000..e56997288
--- /dev/null
+++ b/tools/include/linux/lockdep.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_LOCKDEP_H_
+#define _LIBLOCKDEP_LOCKDEP_H_
+
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <limits.h>
+#include <linux/utsname.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/kern_levels.h>
+#include <linux/err.h>
+#include <linux/rcu.h>
+#include <linux/list.h>
+#include <linux/hardirq.h>
+#include <unistd.h>
+
+#define MAX_LOCK_DEPTH 63UL
+
+#define asmlinkage
+#define __visible
+
+#include "../../../include/linux/lockdep.h"
+
+struct task_struct {
+ u64 curr_chain_key;
+ int lockdep_depth;
+ unsigned int lockdep_recursion;
+ struct held_lock held_locks[MAX_LOCK_DEPTH];
+ gfp_t lockdep_reclaim_gfp;
+ int pid;
+ int state;
+ char comm[17];
+};
+
+#define TASK_RUNNING 0
+
+extern struct task_struct *__curr(void);
+
+#define current (__curr())
+
+static inline int debug_locks_off(void)
+{
+ return 1;
+}
+
+#define task_pid_nr(tsk) ((tsk)->pid)
+
+#define KSYM_NAME_LEN 128
+#define printk(...) dprintf(STDOUT_FILENO, __VA_ARGS__)
+#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#define pr_warn pr_err
+#define pr_cont pr_err
+
+#define list_del_rcu list_del
+
+#define atomic_t unsigned long
+#define atomic_inc(x) ((*(x))++)
+
+#define print_tainted() ""
+#define static_obj(x) 1
+
+#define debug_show_all_locks()
+extern void debug_check_no_locks_held(void);
+
+static __used bool __is_kernel_percpu_address(unsigned long addr, void *can_addr)
+{
+ return false;
+}
+
+#endif
diff --git a/tools/include/linux/log2.h b/tools/include/linux/log2.h
new file mode 100644
index 000000000..0325cefc2
--- /dev/null
+++ b/tools/include/linux/log2.h
@@ -0,0 +1,179 @@
+/* Integer base 2 logarithm calculation
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _TOOLS_LINUX_LOG2_H
+#define _TOOLS_LINUX_LOG2_H
+
+#include <linux/bitops.h>
+#include <linux/types.h>
+
+/*
+ * non-constant log of base 2 calculators
+ * - the arch may override these in asm/bitops.h if they can be implemented
+ * more efficiently than using fls() and fls64()
+ * - the arch is not required to handle n==0 if implementing the fallback
+ */
+static inline __attribute__((const))
+int __ilog2_u32(u32 n)
+{
+ return fls(n) - 1;
+}
+
+static inline __attribute__((const))
+int __ilog2_u64(u64 n)
+{
+ return fls64(n) - 1;
+}
+
+/*
+ * Determine whether some value is a power of two, where zero is
+ * *not* considered a power of two.
+ */
+
+static inline __attribute__((const))
+bool is_power_of_2(unsigned long n)
+{
+ return (n != 0 && ((n & (n - 1)) == 0));
+}
+
+/*
+ * round up to nearest power of two
+ */
+static inline __attribute__((const))
+unsigned long __roundup_pow_of_two(unsigned long n)
+{
+ return 1UL << fls_long(n - 1);
+}
+
+/*
+ * round down to nearest power of two
+ */
+static inline __attribute__((const))
+unsigned long __rounddown_pow_of_two(unsigned long n)
+{
+ return 1UL << (fls_long(n) - 1);
+}
+
+/**
+ * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
+ * @n - parameter
+ *
+ * constant-capable log of base 2 calculation
+ * - this can be used to initialise global variables from constant data, hence
+ * the massive ternary operator construction
+ *
+ * selects the appropriately-sized optimised version depending on sizeof(n)
+ */
+#define ilog2(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (n) < 2 ? 0 : \
+ (n) & (1ULL << 63) ? 63 : \
+ (n) & (1ULL << 62) ? 62 : \
+ (n) & (1ULL << 61) ? 61 : \
+ (n) & (1ULL << 60) ? 60 : \
+ (n) & (1ULL << 59) ? 59 : \
+ (n) & (1ULL << 58) ? 58 : \
+ (n) & (1ULL << 57) ? 57 : \
+ (n) & (1ULL << 56) ? 56 : \
+ (n) & (1ULL << 55) ? 55 : \
+ (n) & (1ULL << 54) ? 54 : \
+ (n) & (1ULL << 53) ? 53 : \
+ (n) & (1ULL << 52) ? 52 : \
+ (n) & (1ULL << 51) ? 51 : \
+ (n) & (1ULL << 50) ? 50 : \
+ (n) & (1ULL << 49) ? 49 : \
+ (n) & (1ULL << 48) ? 48 : \
+ (n) & (1ULL << 47) ? 47 : \
+ (n) & (1ULL << 46) ? 46 : \
+ (n) & (1ULL << 45) ? 45 : \
+ (n) & (1ULL << 44) ? 44 : \
+ (n) & (1ULL << 43) ? 43 : \
+ (n) & (1ULL << 42) ? 42 : \
+ (n) & (1ULL << 41) ? 41 : \
+ (n) & (1ULL << 40) ? 40 : \
+ (n) & (1ULL << 39) ? 39 : \
+ (n) & (1ULL << 38) ? 38 : \
+ (n) & (1ULL << 37) ? 37 : \
+ (n) & (1ULL << 36) ? 36 : \
+ (n) & (1ULL << 35) ? 35 : \
+ (n) & (1ULL << 34) ? 34 : \
+ (n) & (1ULL << 33) ? 33 : \
+ (n) & (1ULL << 32) ? 32 : \
+ (n) & (1ULL << 31) ? 31 : \
+ (n) & (1ULL << 30) ? 30 : \
+ (n) & (1ULL << 29) ? 29 : \
+ (n) & (1ULL << 28) ? 28 : \
+ (n) & (1ULL << 27) ? 27 : \
+ (n) & (1ULL << 26) ? 26 : \
+ (n) & (1ULL << 25) ? 25 : \
+ (n) & (1ULL << 24) ? 24 : \
+ (n) & (1ULL << 23) ? 23 : \
+ (n) & (1ULL << 22) ? 22 : \
+ (n) & (1ULL << 21) ? 21 : \
+ (n) & (1ULL << 20) ? 20 : \
+ (n) & (1ULL << 19) ? 19 : \
+ (n) & (1ULL << 18) ? 18 : \
+ (n) & (1ULL << 17) ? 17 : \
+ (n) & (1ULL << 16) ? 16 : \
+ (n) & (1ULL << 15) ? 15 : \
+ (n) & (1ULL << 14) ? 14 : \
+ (n) & (1ULL << 13) ? 13 : \
+ (n) & (1ULL << 12) ? 12 : \
+ (n) & (1ULL << 11) ? 11 : \
+ (n) & (1ULL << 10) ? 10 : \
+ (n) & (1ULL << 9) ? 9 : \
+ (n) & (1ULL << 8) ? 8 : \
+ (n) & (1ULL << 7) ? 7 : \
+ (n) & (1ULL << 6) ? 6 : \
+ (n) & (1ULL << 5) ? 5 : \
+ (n) & (1ULL << 4) ? 4 : \
+ (n) & (1ULL << 3) ? 3 : \
+ (n) & (1ULL << 2) ? 2 : \
+ 1 ) : \
+ (sizeof(n) <= 4) ? \
+ __ilog2_u32(n) : \
+ __ilog2_u64(n) \
+ )
+
+/**
+ * roundup_pow_of_two - round the given value up to nearest power of two
+ * @n - parameter
+ *
+ * round the given value up to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define roundup_pow_of_two(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (n == 1) ? 1 : \
+ (1UL << (ilog2((n) - 1) + 1)) \
+ ) : \
+ __roundup_pow_of_two(n) \
+ )
+
+/**
+ * rounddown_pow_of_two - round the given value down to nearest power of two
+ * @n - parameter
+ *
+ * round the given value down to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define rounddown_pow_of_two(n) \
+( \
+ __builtin_constant_p(n) ? ( \
+ (1UL << ilog2(n))) : \
+ __rounddown_pow_of_two(n) \
+ )
+
+#endif /* _TOOLS_LINUX_LOG2_H */
diff --git a/tools/include/linux/module.h b/tools/include/linux/module.h
new file mode 100644
index 000000000..2c999abf6
--- /dev/null
+++ b/tools/include/linux/module.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_LINUX_MODULE_H_
+#define _LIBLOCKDEP_LINUX_MODULE_H_
+
+#define module_param(name, type, perm)
+
+static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr)
+{
+ return false;
+}
+
+#endif
diff --git a/tools/include/linux/mutex.h b/tools/include/linux/mutex.h
new file mode 100644
index 000000000..a8180d25f
--- /dev/null
+++ b/tools/include/linux/mutex.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_MUTEX_H
+#define _TOOLS_INCLUDE_LINUX_MUTEX_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_MUTEX_H */
diff --git a/tools/include/linux/nmi.h b/tools/include/linux/nmi.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/include/linux/nmi.h
diff --git a/tools/include/linux/overflow.h b/tools/include/linux/overflow.h
new file mode 100644
index 000000000..8712ff709
--- /dev/null
+++ b/tools/include/linux/overflow.h
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+#ifndef __LINUX_OVERFLOW_H
+#define __LINUX_OVERFLOW_H
+
+#include <linux/compiler.h>
+
+/*
+ * In the fallback code below, we need to compute the minimum and
+ * maximum values representable in a given type. These macros may also
+ * be useful elsewhere, so we provide them outside the
+ * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block.
+ *
+ * It would seem more obvious to do something like
+ *
+ * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0)
+ * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0)
+ *
+ * Unfortunately, the middle expressions, strictly speaking, have
+ * undefined behaviour, and at least some versions of gcc warn about
+ * the type_max expression (but not if -fsanitize=undefined is in
+ * effect; in that case, the warning is deferred to runtime...).
+ *
+ * The slightly excessive casting in type_min is to make sure the
+ * macros also produce sensible values for the exotic type _Bool. [The
+ * overflow checkers only almost work for _Bool, but that's
+ * a-feature-not-a-bug, since people shouldn't be doing arithmetic on
+ * _Bools. Besides, the gcc builtins don't allow _Bool* as third
+ * argument.]
+ *
+ * Idea stolen from
+ * https://mail-index.netbsd.org/tech-misc/2007/02/05/0000.html -
+ * credit to Christian Biere.
+ */
+#define is_signed_type(type) (((type)(-1)) < (type)1)
+#define __type_half_max(type) ((type)1 << (8*sizeof(type) - 1 - is_signed_type(type)))
+#define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T)))
+#define type_min(T) ((T)((T)-type_max(T)-(T)1))
+
+
+#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW
+/*
+ * For simplicity and code hygiene, the fallback code below insists on
+ * a, b and *d having the same type (similar to the min() and max()
+ * macros), whereas gcc's type-generic overflow checkers accept
+ * different types. Hence we don't just make check_add_overflow an
+ * alias for __builtin_add_overflow, but add type checks similar to
+ * below.
+ */
+#define check_add_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_add_overflow(__a, __b, __d); \
+})
+
+#define check_sub_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_sub_overflow(__a, __b, __d); \
+})
+
+#define check_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ __builtin_mul_overflow(__a, __b, __d); \
+})
+
+#else
+
+
+/* Checking for unsigned overflow is relatively easy without causing UB. */
+#define __unsigned_add_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a + __b; \
+ *__d < __a; \
+})
+#define __unsigned_sub_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a - __b; \
+ __a < __b; \
+})
+/*
+ * If one of a or b is a compile-time constant, this avoids a division.
+ */
+#define __unsigned_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = __a * __b; \
+ __builtin_constant_p(__b) ? \
+ __b > 0 && __a > type_max(typeof(__a)) / __b : \
+ __a > 0 && __b > type_max(typeof(__b)) / __a; \
+})
+
+/*
+ * For signed types, detecting overflow is much harder, especially if
+ * we want to avoid UB. But the interface of these macros is such that
+ * we must provide a result in *d, and in fact we must produce the
+ * result promised by gcc's builtins, which is simply the possibly
+ * wrapped-around value. Fortunately, we can just formally do the
+ * operations in the widest relevant unsigned type (u64) and then
+ * truncate the result - gcc is smart enough to generate the same code
+ * with and without the (u64) casts.
+ */
+
+/*
+ * Adding two signed integers can overflow only if they have the same
+ * sign, and overflow has happened iff the result has the opposite
+ * sign.
+ */
+#define __signed_add_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (u64)__a + (u64)__b; \
+ (((~(__a ^ __b)) & (*__d ^ __a)) \
+ & type_min(typeof(__a))) != 0; \
+})
+
+/*
+ * Subtraction is similar, except that overflow can now happen only
+ * when the signs are opposite. In this case, overflow has happened if
+ * the result has the opposite sign of a.
+ */
+#define __signed_sub_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (u64)__a - (u64)__b; \
+ ((((__a ^ __b)) & (*__d ^ __a)) \
+ & type_min(typeof(__a))) != 0; \
+})
+
+/*
+ * Signed multiplication is rather hard. gcc always follows C99, so
+ * division is truncated towards 0. This means that we can write the
+ * overflow check like this:
+ *
+ * (a > 0 && (b > MAX/a || b < MIN/a)) ||
+ * (a < -1 && (b > MIN/a || b < MAX/a) ||
+ * (a == -1 && b == MIN)
+ *
+ * The redundant casts of -1 are to silence an annoying -Wtype-limits
+ * (included in -Wextra) warning: When the type is u8 or u16, the
+ * __b_c_e in check_mul_overflow obviously selects
+ * __unsigned_mul_overflow, but unfortunately gcc still parses this
+ * code and warns about the limited range of __b.
+ */
+
+#define __signed_mul_overflow(a, b, d) ({ \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ typeof(d) __d = (d); \
+ typeof(a) __tmax = type_max(typeof(a)); \
+ typeof(a) __tmin = type_min(typeof(a)); \
+ (void) (&__a == &__b); \
+ (void) (&__a == __d); \
+ *__d = (u64)__a * (u64)__b; \
+ (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \
+ (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \
+ (__b == (typeof(__b))-1 && __a == __tmin); \
+})
+
+
+#define check_add_overflow(a, b, d) \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_add_overflow(a, b, d), \
+ __unsigned_add_overflow(a, b, d))
+
+#define check_sub_overflow(a, b, d) \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_sub_overflow(a, b, d), \
+ __unsigned_sub_overflow(a, b, d))
+
+#define check_mul_overflow(a, b, d) \
+ __builtin_choose_expr(is_signed_type(typeof(a)), \
+ __signed_mul_overflow(a, b, d), \
+ __unsigned_mul_overflow(a, b, d))
+
+
+#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */
+
+/**
+ * array_size() - Calculate size of 2-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ *
+ * Calculates size of 2-dimensional array: @a * @b.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array_size(size_t a, size_t b)
+{
+ size_t bytes;
+
+ if (check_mul_overflow(a, b, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+}
+
+/**
+ * array3_size() - Calculate size of 3-dimensional array.
+ *
+ * @a: dimension one
+ * @b: dimension two
+ * @c: dimension three
+ *
+ * Calculates size of 3-dimensional array: @a * @b * @c.
+ *
+ * Returns: number of bytes needed to represent the array or SIZE_MAX on
+ * overflow.
+ */
+static inline __must_check size_t array3_size(size_t a, size_t b, size_t c)
+{
+ size_t bytes;
+
+ if (check_mul_overflow(a, b, &bytes))
+ return SIZE_MAX;
+ if (check_mul_overflow(bytes, c, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+}
+
+static inline __must_check size_t __ab_c_size(size_t n, size_t size, size_t c)
+{
+ size_t bytes;
+
+ if (check_mul_overflow(n, size, &bytes))
+ return SIZE_MAX;
+ if (check_add_overflow(bytes, c, &bytes))
+ return SIZE_MAX;
+
+ return bytes;
+}
+
+/**
+ * struct_size() - Calculate size of structure with trailing array.
+ * @p: Pointer to the structure.
+ * @member: Name of the array member.
+ * @n: Number of elements in the array.
+ *
+ * Calculates size of memory needed for structure @p followed by an
+ * array of @n @member elements.
+ *
+ * Return: number of bytes needed or SIZE_MAX on overflow.
+ */
+#define struct_size(p, member, n) \
+ __ab_c_size(n, \
+ sizeof(*(p)->member) + __must_be_array((p)->member),\
+ sizeof(*(p)))
+
+#endif /* __LINUX_OVERFLOW_H */
diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
new file mode 100644
index 000000000..9fdcd3eaa
--- /dev/null
+++ b/tools/include/linux/poison.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_POISON_H
+#define _LINUX_POISON_H
+
+/********** include/linux/list.h **********/
+
+/*
+ * Architectures might want to move the poison pointer offset
+ * into some well-recognized area such as 0xdead000000000000,
+ * that is also not mappable by user-space exploits:
+ */
+#ifdef CONFIG_ILLEGAL_POINTER_VALUE
+# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL)
+#else
+# define POISON_POINTER_DELTA 0
+#endif
+
+#ifdef __cplusplus
+#define LIST_POISON1 NULL
+#define LIST_POISON2 NULL
+#else
+/*
+ * These are non-NULL pointers that will result in page faults
+ * under normal circumstances, used to verify that nobody uses
+ * non-initialized list entries.
+ */
+#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA)
+#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA)
+#endif
+
+/********** include/linux/timer.h **********/
+/*
+ * Magic number "tsta" to indicate a static timer initializer
+ * for the object debugging code.
+ */
+#define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA)
+
+/********** mm/debug-pagealloc.c **********/
+#ifdef CONFIG_PAGE_POISONING_ZERO
+#define PAGE_POISON 0x00
+#else
+#define PAGE_POISON 0xaa
+#endif
+
+/********** mm/page_alloc.c ************/
+
+#define TAIL_MAPPING ((void *) 0x400 + POISON_POINTER_DELTA)
+
+/********** mm/slab.c **********/
+/*
+ * Magic nums for obj red zoning.
+ * Placed in the first word before and the first word after an obj.
+ */
+#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */
+#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */
+
+#define SLUB_RED_INACTIVE 0xbb
+#define SLUB_RED_ACTIVE 0xcc
+
+/* ...and for poisoning */
+#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */
+#define POISON_FREE 0x6b /* for use-after-free poisoning */
+#define POISON_END 0xa5 /* end-byte of poisoning */
+
+/********** arch/$ARCH/mm/init.c **********/
+#define POISON_FREE_INITMEM 0xcc
+
+/********** arch/ia64/hp/common/sba_iommu.c **********/
+/*
+ * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a
+ * value of "SBAIOMMU POISON\0" for spill-over poisoning.
+ */
+
+/********** fs/jbd/journal.c **********/
+#define JBD_POISON_FREE 0x5b
+#define JBD2_POISON_FREE 0x5c
+
+/********** drivers/base/dmapool.c **********/
+#define POOL_POISON_FREED 0xa7 /* !inuse */
+#define POOL_POISON_ALLOCATED 0xa9 /* !initted */
+
+/********** drivers/atm/ **********/
+#define ATM_POISON_FREE 0x12
+#define ATM_POISON 0xdeadbeef
+
+/********** kernel/mutexes **********/
+#define MUTEX_DEBUG_INIT 0x11
+#define MUTEX_DEBUG_FREE 0x22
+
+/********** lib/flex_array.c **********/
+#define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */
+
+/********** security/ **********/
+#define KEY_DESTROY 0xbd
+
+#endif
diff --git a/tools/include/linux/proc_fs.h b/tools/include/linux/proc_fs.h
new file mode 100644
index 000000000..8b3b03b64
--- /dev/null
+++ b/tools/include/linux/proc_fs.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_PROC_FS_H
+#define _TOOLS_INCLUDE_LINUX_PROC_FS_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_PROC_FS_H */
diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h
new file mode 100644
index 000000000..112582253
--- /dev/null
+++ b/tools/include/linux/rbtree.h
@@ -0,0 +1,104 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/include/linux/rbtree.h
+
+ To use rbtrees you'll have to implement your own insert and search cores.
+ This will avoid us to use callbacks and to drop drammatically performances.
+ I know it's not the cleaner way, but in C (not in C++) to get
+ performances and genericity...
+
+ See Documentation/rbtree.txt for documentation and samples.
+*/
+
+#ifndef __TOOLS_LINUX_PERF_RBTREE_H
+#define __TOOLS_LINUX_PERF_RBTREE_H
+
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+
+struct rb_node {
+ unsigned long __rb_parent_color;
+ struct rb_node *rb_right;
+ struct rb_node *rb_left;
+} __attribute__((aligned(sizeof(long))));
+ /* The alignment might seem pointless, but allegedly CRIS needs it */
+
+struct rb_root {
+ struct rb_node *rb_node;
+};
+
+
+#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3))
+
+#define RB_ROOT (struct rb_root) { NULL, }
+#define rb_entry(ptr, type, member) container_of(ptr, type, member)
+
+#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL)
+
+/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */
+#define RB_EMPTY_NODE(node) \
+ ((node)->__rb_parent_color == (unsigned long)(node))
+#define RB_CLEAR_NODE(node) \
+ ((node)->__rb_parent_color = (unsigned long)(node))
+
+
+extern void rb_insert_color(struct rb_node *, struct rb_root *);
+extern void rb_erase(struct rb_node *, struct rb_root *);
+
+
+/* Find logical next and previous nodes in a tree */
+extern struct rb_node *rb_next(const struct rb_node *);
+extern struct rb_node *rb_prev(const struct rb_node *);
+extern struct rb_node *rb_first(const struct rb_root *);
+extern struct rb_node *rb_last(const struct rb_root *);
+
+/* Postorder iteration - always visit the parent after its children */
+extern struct rb_node *rb_first_postorder(const struct rb_root *);
+extern struct rb_node *rb_next_postorder(const struct rb_node *);
+
+/* Fast replacement of a single node without remove/rebalance/add/rebalance */
+extern void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+ struct rb_root *root);
+
+static inline void rb_link_node(struct rb_node *node, struct rb_node *parent,
+ struct rb_node **rb_link)
+{
+ node->__rb_parent_color = (unsigned long)parent;
+ node->rb_left = node->rb_right = NULL;
+
+ *rb_link = node;
+}
+
+#define rb_entry_safe(ptr, type, member) \
+ ({ typeof(ptr) ____ptr = (ptr); \
+ ____ptr ? rb_entry(____ptr, type, member) : NULL; \
+ })
+
+
+/*
+ * Handy for checking that we are not deleting an entry that is
+ * already in a list, found in block/{blk-throttle,cfq-iosched}.c,
+ * probably should be moved to lib/rbtree.c...
+ */
+static inline void rb_erase_init(struct rb_node *n, struct rb_root *root)
+{
+ rb_erase(n, root);
+ RB_CLEAR_NODE(n);
+}
+#endif /* __TOOLS_LINUX_PERF_RBTREE_H */
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
new file mode 100644
index 000000000..43be941db
--- /dev/null
+++ b/tools/include/linux/rbtree_augmented.h
@@ -0,0 +1,245 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+ (C) 2002 David Woodhouse <dwmw2@infradead.org>
+ (C) 2012 Michel Lespinasse <walken@google.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ tools/linux/include/linux/rbtree_augmented.h
+
+ Copied from:
+ linux/include/linux/rbtree_augmented.h
+*/
+
+#ifndef _TOOLS_LINUX_RBTREE_AUGMENTED_H
+#define _TOOLS_LINUX_RBTREE_AUGMENTED_H
+
+#include <linux/compiler.h>
+#include <linux/rbtree.h>
+
+/*
+ * Please note - only struct rb_augment_callbacks and the prototypes for
+ * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
+ * The rest are implementation details you are not expected to depend on.
+ *
+ * See Documentation/rbtree.txt for documentation and samples.
+ */
+
+struct rb_augment_callbacks {
+ void (*propagate)(struct rb_node *node, struct rb_node *stop);
+ void (*copy)(struct rb_node *old, struct rb_node *new);
+ void (*rotate)(struct rb_node *old, struct rb_node *new);
+};
+
+extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+/*
+ * Fixup the rbtree and update the augmented information when rebalancing.
+ *
+ * On insertion, the user must update the augmented information on the path
+ * leading to the inserted node, then call rb_link_node() as usual and
+ * rb_augment_inserted() instead of the usual rb_insert_color() call.
+ * If rb_augment_inserted() rebalances the rbtree, it will callback into
+ * a user provided function to update the augmented information on the
+ * affected subtrees.
+ */
+static inline void
+rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+ const struct rb_augment_callbacks *augment)
+{
+ __rb_insert_augmented(node, root, augment->rotate);
+}
+
+#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \
+ rbtype, rbaugmented, rbcompute) \
+static inline void \
+rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \
+{ \
+ while (rb != stop) { \
+ rbstruct *node = rb_entry(rb, rbstruct, rbfield); \
+ rbtype augmented = rbcompute(node); \
+ if (node->rbaugmented == augmented) \
+ break; \
+ node->rbaugmented = augmented; \
+ rb = rb_parent(&node->rbfield); \
+ } \
+} \
+static inline void \
+rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \
+{ \
+ rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \
+ rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \
+ new->rbaugmented = old->rbaugmented; \
+} \
+static void \
+rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
+{ \
+ rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \
+ rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \
+ new->rbaugmented = old->rbaugmented; \
+ old->rbaugmented = rbcompute(old); \
+} \
+rbstatic const struct rb_augment_callbacks rbname = { \
+ rbname ## _propagate, rbname ## _copy, rbname ## _rotate \
+};
+
+
+#define RB_RED 0
+#define RB_BLACK 1
+
+#define __rb_parent(pc) ((struct rb_node *)(pc & ~3))
+
+#define __rb_color(pc) ((pc) & 1)
+#define __rb_is_black(pc) __rb_color(pc)
+#define __rb_is_red(pc) (!__rb_color(pc))
+#define rb_color(rb) __rb_color((rb)->__rb_parent_color)
+#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color)
+#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+ rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
+}
+
+static inline void rb_set_parent_color(struct rb_node *rb,
+ struct rb_node *p, int color)
+{
+ rb->__rb_parent_color = (unsigned long)p | color;
+}
+
+static inline void
+__rb_change_child(struct rb_node *old, struct rb_node *new,
+ struct rb_node *parent, struct rb_root *root)
+{
+ if (parent) {
+ if (parent->rb_left == old)
+ parent->rb_left = new;
+ else
+ parent->rb_right = new;
+ } else
+ root->rb_node = new;
+}
+
+extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+
+static __always_inline struct rb_node *
+__rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+ const struct rb_augment_callbacks *augment)
+{
+ struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+ struct rb_node *parent, *rebalance;
+ unsigned long pc;
+
+ if (!tmp) {
+ /*
+ * Case 1: node to erase has no more than 1 child (easy!)
+ *
+ * Note that if there is one child it must be red due to 5)
+ * and node must be black due to 4). We adjust colors locally
+ * so as to bypass __rb_erase_color() later on.
+ */
+ pc = node->__rb_parent_color;
+ parent = __rb_parent(pc);
+ __rb_change_child(node, child, parent, root);
+ if (child) {
+ child->__rb_parent_color = pc;
+ rebalance = NULL;
+ } else
+ rebalance = __rb_is_black(pc) ? parent : NULL;
+ tmp = parent;
+ } else if (!child) {
+ /* Still case 1, but this time the child is node->rb_left */
+ tmp->__rb_parent_color = pc = node->__rb_parent_color;
+ parent = __rb_parent(pc);
+ __rb_change_child(node, tmp, parent, root);
+ rebalance = NULL;
+ tmp = parent;
+ } else {
+ struct rb_node *successor = child, *child2;
+ tmp = child->rb_left;
+ if (!tmp) {
+ /*
+ * Case 2: node's successor is its right child
+ *
+ * (n) (s)
+ * / \ / \
+ * (x) (s) -> (x) (c)
+ * \
+ * (c)
+ */
+ parent = successor;
+ child2 = successor->rb_right;
+ augment->copy(node, successor);
+ } else {
+ /*
+ * Case 3: node's successor is leftmost under
+ * node's right child subtree
+ *
+ * (n) (s)
+ * / \ / \
+ * (x) (y) -> (x) (y)
+ * / /
+ * (p) (p)
+ * / /
+ * (s) (c)
+ * \
+ * (c)
+ */
+ do {
+ parent = successor;
+ successor = tmp;
+ tmp = tmp->rb_left;
+ } while (tmp);
+ parent->rb_left = child2 = successor->rb_right;
+ successor->rb_right = child;
+ rb_set_parent(child, successor);
+ augment->copy(node, successor);
+ augment->propagate(parent, successor);
+ }
+
+ successor->rb_left = tmp = node->rb_left;
+ rb_set_parent(tmp, successor);
+
+ pc = node->__rb_parent_color;
+ tmp = __rb_parent(pc);
+ __rb_change_child(node, successor, tmp, root);
+ if (child2) {
+ successor->__rb_parent_color = pc;
+ rb_set_parent_color(child2, parent, RB_BLACK);
+ rebalance = NULL;
+ } else {
+ unsigned long pc2 = successor->__rb_parent_color;
+ successor->__rb_parent_color = pc;
+ rebalance = __rb_is_black(pc2) ? parent : NULL;
+ }
+ tmp = successor;
+ }
+
+ augment->propagate(tmp, NULL);
+ return rebalance;
+}
+
+static __always_inline void
+rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+ const struct rb_augment_callbacks *augment)
+{
+ struct rb_node *rebalance = __rb_erase_augmented(node, root, augment);
+ if (rebalance)
+ __rb_erase_color(rebalance, root, augment->rotate);
+}
+
+#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */
diff --git a/tools/include/linux/rcu.h b/tools/include/linux/rcu.h
new file mode 100644
index 000000000..7d02527e5
--- /dev/null
+++ b/tools/include/linux/rcu.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_RCU_H_
+#define _LIBLOCKDEP_RCU_H_
+
+int rcu_scheduler_active;
+
+static inline int rcu_lockdep_current_cpu_online(void)
+{
+ return 1;
+}
+
+static inline int rcu_is_cpu_idle(void)
+{
+ return 1;
+}
+
+static inline bool rcu_is_watching(void)
+{
+ return false;
+}
+
+#define rcu_assign_pointer(p, v) ((p) = (v))
+#define RCU_INIT_POINTER(p, v) p=(v)
+
+#endif
diff --git a/tools/include/linux/refcount.h b/tools/include/linux/refcount.h
new file mode 100644
index 000000000..36cb29bc5
--- /dev/null
+++ b/tools/include/linux/refcount.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_REFCOUNT_H
+#define _TOOLS_LINUX_REFCOUNT_H
+
+/*
+ * Variant of atomic_t specialized for reference counts.
+ *
+ * The interface matches the atomic_t interface (to aid in porting) but only
+ * provides the few functions one should use for reference counting.
+ *
+ * It differs in that the counter saturates at UINT_MAX and will not move once
+ * there. This avoids wrapping the counter and causing 'spurious'
+ * use-after-free issues.
+ *
+ * Memory ordering rules are slightly relaxed wrt regular atomic_t functions
+ * and provide only what is strictly required for refcounts.
+ *
+ * The increments are fully relaxed; these will not provide ordering. The
+ * rationale is that whatever is used to obtain the object we're increasing the
+ * reference count on will provide the ordering. For locked data structures,
+ * its the lock acquire, for RCU/lockless data structures its the dependent
+ * load.
+ *
+ * Do note that inc_not_zero() provides a control dependency which will order
+ * future stores against the inc, this ensures we'll never modify the object
+ * if we did not in fact acquire a reference.
+ *
+ * The decrements will provide release order, such that all the prior loads and
+ * stores will be issued before, it also provides a control dependency, which
+ * will order us against the subsequent free().
+ *
+ * The control dependency is against the load of the cmpxchg (ll/sc) that
+ * succeeded. This means the stores aren't fully ordered, but this is fine
+ * because the 1->0 transition indicates no concurrency.
+ *
+ * Note that the allocator is responsible for ordering things between free()
+ * and alloc().
+ *
+ */
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+
+#ifdef NDEBUG
+#define REFCOUNT_WARN(cond, str) (void)(cond)
+#define __refcount_check
+#else
+#define REFCOUNT_WARN(cond, str) BUG_ON(cond)
+#define __refcount_check __must_check
+#endif
+
+typedef struct refcount_struct {
+ atomic_t refs;
+} refcount_t;
+
+#define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), }
+
+static inline void refcount_set(refcount_t *r, unsigned int n)
+{
+ atomic_set(&r->refs, n);
+}
+
+static inline unsigned int refcount_read(const refcount_t *r)
+{
+ return atomic_read(&r->refs);
+}
+
+/*
+ * Similar to atomic_inc_not_zero(), will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller has guaranteed the
+ * object memory to be stable (RCU, etc.). It does provide a control dependency
+ * and thereby orders future stores. See the comment on top.
+ */
+static inline __refcount_check
+bool refcount_inc_not_zero(refcount_t *r)
+{
+ unsigned int old, new, val = atomic_read(&r->refs);
+
+ for (;;) {
+ new = val + 1;
+
+ if (!val)
+ return false;
+
+ if (unlikely(!new))
+ return true;
+
+ old = atomic_cmpxchg_relaxed(&r->refs, val, new);
+ if (old == val)
+ break;
+
+ val = old;
+ }
+
+ REFCOUNT_WARN(new == UINT_MAX, "refcount_t: saturated; leaking memory.\n");
+
+ return true;
+}
+
+/*
+ * Similar to atomic_inc(), will saturate at UINT_MAX and WARN.
+ *
+ * Provides no memory ordering, it is assumed the caller already has a
+ * reference on the object, will WARN when this is not so.
+ */
+static inline void refcount_inc(refcount_t *r)
+{
+ REFCOUNT_WARN(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n");
+}
+
+/*
+ * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to
+ * decrement when saturated at UINT_MAX.
+ *
+ * Provides release memory ordering, such that prior loads and stores are done
+ * before, and provides a control dependency such that free() must come after.
+ * See the comment on top.
+ */
+static inline __refcount_check
+bool refcount_sub_and_test(unsigned int i, refcount_t *r)
+{
+ unsigned int old, new, val = atomic_read(&r->refs);
+
+ for (;;) {
+ if (unlikely(val == UINT_MAX))
+ return false;
+
+ new = val - i;
+ if (new > val) {
+ REFCOUNT_WARN(new > val, "refcount_t: underflow; use-after-free.\n");
+ return false;
+ }
+
+ old = atomic_cmpxchg_release(&r->refs, val, new);
+ if (old == val)
+ break;
+
+ val = old;
+ }
+
+ return !new;
+}
+
+static inline __refcount_check
+bool refcount_dec_and_test(refcount_t *r)
+{
+ return refcount_sub_and_test(1, r);
+}
+
+
+#endif /* _ATOMIC_LINUX_REFCOUNT_H */
diff --git a/tools/include/linux/sched/clock.h b/tools/include/linux/sched/clock.h
new file mode 100644
index 000000000..5837d17c4
--- /dev/null
+++ b/tools/include/linux/sched/clock.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_PERF_LINUX_SCHED_CLOCK_H
+#define _TOOLS_PERF_LINUX_SCHED_CLOCK_H
+
+#endif /* _TOOLS_PERF_LINUX_SCHED_CLOCK_H */
diff --git a/tools/include/linux/sched/mm.h b/tools/include/linux/sched/mm.h
new file mode 100644
index 000000000..c8d9f19c1
--- /dev/null
+++ b/tools/include/linux/sched/mm.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_PERF_LINUX_SCHED_MM_H
+#define _TOOLS_PERF_LINUX_SCHED_MM_H
+
+#endif /* _TOOLS_PERF_LINUX_SCHED_MM_H */
diff --git a/tools/include/linux/sched/task.h b/tools/include/linux/sched/task.h
new file mode 100644
index 000000000..a97890eca
--- /dev/null
+++ b/tools/include/linux/sched/task.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_PERF_LINUX_SCHED_TASK_H
+#define _TOOLS_PERF_LINUX_SCHED_TASK_H
+
+#endif /* _TOOLS_PERF_LINUX_SCHED_TASK_H */
diff --git a/tools/include/linux/seq_file.h b/tools/include/linux/seq_file.h
new file mode 100644
index 000000000..102fd9217
--- /dev/null
+++ b/tools/include/linux/seq_file.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_LINUX_SEQ_FILE_H
+#define _TOOLS_INCLUDE_LINUX_SEQ_FILE_H
+
+#endif /* _TOOLS_INCLUDE_LINUX_SEQ_FILE_H */
diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h
new file mode 100644
index 000000000..1738c0391
--- /dev/null
+++ b/tools/include/linux/spinlock.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_SPINLOCK_H_
+#define __LINUX_SPINLOCK_H_
+
+#include <pthread.h>
+#include <stdbool.h>
+
+#define spinlock_t pthread_mutex_t
+#define DEFINE_SPINLOCK(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER
+#define __SPIN_LOCK_UNLOCKED(x) (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER
+#define spin_lock_init(x) pthread_mutex_init(x, NULL)
+
+#define spin_lock_irqsave(x, f) (void)f, pthread_mutex_lock(x)
+#define spin_unlock_irqrestore(x, f) (void)f, pthread_mutex_unlock(x)
+
+#define arch_spinlock_t pthread_mutex_t
+#define __ARCH_SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER
+
+static inline void arch_spin_lock(arch_spinlock_t *mutex)
+{
+ pthread_mutex_lock(mutex);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *mutex)
+{
+ pthread_mutex_unlock(mutex);
+}
+
+static inline bool arch_spin_is_locked(arch_spinlock_t *mutex)
+{
+ return true;
+}
+
+#endif
diff --git a/tools/include/linux/stacktrace.h b/tools/include/linux/stacktrace.h
new file mode 100644
index 000000000..ae343ac35
--- /dev/null
+++ b/tools/include/linux/stacktrace.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_LINUX_STACKTRACE_H_
+#define _LIBLOCKDEP_LINUX_STACKTRACE_H_
+
+#include <execinfo.h>
+
+struct stack_trace {
+ unsigned int nr_entries, max_entries;
+ unsigned long *entries;
+ int skip;
+};
+
+static inline void print_stack_trace(struct stack_trace *trace, int spaces)
+{
+ backtrace_symbols_fd((void **)trace->entries, trace->nr_entries, 1);
+}
+
+#define save_stack_trace(trace) \
+ ((trace)->nr_entries = \
+ backtrace((void **)(trace)->entries, (trace)->max_entries))
+
+static inline int dump_stack(void)
+{
+ void *array[64];
+ size_t size;
+
+ size = backtrace(array, 64);
+ backtrace_symbols_fd(array, size, 1);
+
+ return 0;
+}
+
+#endif
diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
new file mode 100644
index 000000000..0ec646f12
--- /dev/null
+++ b/tools/include/linux/string.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_STRING_H_
+#define _TOOLS_LINUX_STRING_H_
+
+#include <linux/types.h> /* for size_t */
+#include <string.h>
+
+void *memdup(const void *src, size_t len);
+
+int strtobool(const char *s, bool *res);
+
+/*
+ * glibc based builds needs the extern while uClibc doesn't.
+ * However uClibc headers also define __GLIBC__ hence the hack below
+ */
+#if defined(__GLIBC__) && !defined(__UCLIBC__)
+// pragma diagnostic was introduced in gcc 4.6
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wredundant-decls"
+#endif
+extern size_t strlcpy(char *dest, const char *src, size_t size);
+#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+#pragma GCC diagnostic pop
+#endif
+#endif
+
+char *str_error_r(int errnum, char *buf, size_t buflen);
+
+/**
+ * strstarts - does @str start with @prefix?
+ * @str: string to examine
+ * @prefix: prefix to look for.
+ */
+static inline bool strstarts(const char *str, const char *prefix)
+{
+ return strncmp(str, prefix, strlen(prefix)) == 0;
+}
+
+#endif /* _LINUX_STRING_H_ */
diff --git a/tools/include/linux/stringify.h b/tools/include/linux/stringify.h
new file mode 100644
index 000000000..60e2c187d
--- /dev/null
+++ b/tools/include/linux/stringify.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_STRINGIFY_H
+#define __LINUX_STRINGIFY_H
+
+/* Indirect stringification. Doing two levels allows the parameter to be a
+ * macro itself. For example, compile with -DFOO=bar, __stringify(FOO)
+ * converts to "bar".
+ */
+
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+#endif /* !__LINUX_STRINGIFY_H */
diff --git a/tools/include/linux/time64.h b/tools/include/linux/time64.h
new file mode 100644
index 000000000..55fa644b9
--- /dev/null
+++ b/tools/include/linux/time64.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_TIME64_H
+#define _TOOLS_LINUX_TIME64_H
+
+#define MSEC_PER_SEC 1000L
+#define USEC_PER_MSEC 1000L
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_MSEC 1000000L
+#define USEC_PER_SEC 1000000L
+#define NSEC_PER_SEC 1000000000L
+#define FSEC_PER_SEC 1000000000000000LL
+
+#endif /* _LINUX_TIME64_H */
diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h
new file mode 100644
index 000000000..154eb4e3c
--- /dev/null
+++ b/tools/include/linux/types.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_TYPES_H_
+#define _TOOLS_LINUX_TYPES_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
+#include <asm/types.h>
+#include <asm/posix_types.h>
+
+struct page;
+struct kmem_cache;
+
+typedef enum {
+ GFP_KERNEL,
+ GFP_ATOMIC,
+ __GFP_HIGHMEM,
+ __GFP_HIGH
+} gfp_t;
+
+/*
+ * We define u64 as uint64_t for every architecture
+ * so that we can print it with "%"PRIx64 without getting warnings.
+ *
+ * typedef __u64 u64;
+ * typedef __s64 s64;
+ */
+typedef uint64_t u64;
+typedef int64_t s64;
+
+typedef __u32 u32;
+typedef __s32 s32;
+
+typedef __u16 u16;
+typedef __s16 s16;
+
+typedef __u8 u8;
+typedef __s8 s8;
+
+#ifdef __CHECKER__
+#define __bitwise__ __attribute__((bitwise))
+#else
+#define __bitwise__
+#endif
+#define __bitwise __bitwise__
+
+#define __force
+#define __user
+#define __must_check
+#define __cold
+
+typedef __u16 __bitwise __le16;
+typedef __u16 __bitwise __be16;
+typedef __u32 __bitwise __le32;
+typedef __u32 __bitwise __be32;
+typedef __u64 __bitwise __le64;
+typedef __u64 __bitwise __be64;
+
+typedef struct {
+ int counter;
+} atomic_t;
+
+#ifndef __aligned_u64
+# define __aligned_u64 __u64 __attribute__((aligned(8)))
+#endif
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+struct hlist_head {
+ struct hlist_node *first;
+};
+
+struct hlist_node {
+ struct hlist_node *next, **pprev;
+};
+
+#endif /* _TOOLS_LINUX_TYPES_H_ */
diff --git a/tools/include/linux/unaligned/packed_struct.h b/tools/include/linux/unaligned/packed_struct.h
new file mode 100644
index 000000000..dbd93c7df
--- /dev/null
+++ b/tools/include/linux/unaligned/packed_struct.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H
+#define _LINUX_UNALIGNED_PACKED_STRUCT_H
+
+#include <linux/kernel.h>
+
+struct __una_u16 { u16 x; } __packed;
+struct __una_u32 { u32 x; } __packed;
+struct __una_u64 { u64 x; } __packed;
+
+static inline u16 __get_unaligned_cpu16(const void *p)
+{
+ const struct __una_u16 *ptr = (const struct __una_u16 *)p;
+ return ptr->x;
+}
+
+static inline u32 __get_unaligned_cpu32(const void *p)
+{
+ const struct __una_u32 *ptr = (const struct __una_u32 *)p;
+ return ptr->x;
+}
+
+static inline u64 __get_unaligned_cpu64(const void *p)
+{
+ const struct __una_u64 *ptr = (const struct __una_u64 *)p;
+ return ptr->x;
+}
+
+static inline void __put_unaligned_cpu16(u16 val, void *p)
+{
+ struct __una_u16 *ptr = (struct __una_u16 *)p;
+ ptr->x = val;
+}
+
+static inline void __put_unaligned_cpu32(u32 val, void *p)
+{
+ struct __una_u32 *ptr = (struct __una_u32 *)p;
+ ptr->x = val;
+}
+
+static inline void __put_unaligned_cpu64(u64 val, void *p)
+{
+ struct __una_u64 *ptr = (struct __una_u64 *)p;
+ ptr->x = val;
+}
+
+#endif /* _LINUX_UNALIGNED_PACKED_STRUCT_H */
diff --git a/tools/include/tools/be_byteshift.h b/tools/include/tools/be_byteshift.h
new file mode 100644
index 000000000..f7d1d1698
--- /dev/null
+++ b/tools/include/tools/be_byteshift.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_BE_BYTESHIFT_H
+#define _TOOLS_BE_BYTESHIFT_H
+
+#include <stdint.h>
+
+static inline uint16_t __get_unaligned_be16(const uint8_t *p)
+{
+ return p[0] << 8 | p[1];
+}
+
+static inline uint32_t __get_unaligned_be32(const uint8_t *p)
+{
+ return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
+}
+
+static inline uint64_t __get_unaligned_be64(const uint8_t *p)
+{
+ return (uint64_t)__get_unaligned_be32(p) << 32 |
+ __get_unaligned_be32(p + 4);
+}
+
+static inline void __put_unaligned_be16(uint16_t val, uint8_t *p)
+{
+ *p++ = val >> 8;
+ *p++ = val;
+}
+
+static inline void __put_unaligned_be32(uint32_t val, uint8_t *p)
+{
+ __put_unaligned_be16(val >> 16, p);
+ __put_unaligned_be16(val, p + 2);
+}
+
+static inline void __put_unaligned_be64(uint64_t val, uint8_t *p)
+{
+ __put_unaligned_be32(val >> 32, p);
+ __put_unaligned_be32(val, p + 4);
+}
+
+static inline uint16_t get_unaligned_be16(const void *p)
+{
+ return __get_unaligned_be16((const uint8_t *)p);
+}
+
+static inline uint32_t get_unaligned_be32(const void *p)
+{
+ return __get_unaligned_be32((const uint8_t *)p);
+}
+
+static inline uint64_t get_unaligned_be64(const void *p)
+{
+ return __get_unaligned_be64((const uint8_t *)p);
+}
+
+static inline void put_unaligned_be16(uint16_t val, void *p)
+{
+ __put_unaligned_be16(val, p);
+}
+
+static inline void put_unaligned_be32(uint32_t val, void *p)
+{
+ __put_unaligned_be32(val, p);
+}
+
+static inline void put_unaligned_be64(uint64_t val, void *p)
+{
+ __put_unaligned_be64(val, p);
+}
+
+#endif /* _TOOLS_BE_BYTESHIFT_H */
diff --git a/tools/include/tools/config.h b/tools/include/tools/config.h
new file mode 100644
index 000000000..08ade7df8
--- /dev/null
+++ b/tools/include/tools/config.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_CONFIG_H
+#define _TOOLS_CONFIG_H
+
+/* Subset of include/linux/kconfig.h */
+
+#define __ARG_PLACEHOLDER_1 0,
+#define __take_second_arg(__ignored, val, ...) val
+
+/*
+ * Helper macros to use CONFIG_ options in C/CPP expressions. Note that
+ * these only work with boolean and tristate options.
+ */
+
+/*
+ * Getting something that works in C and CPP for an arg that may or may
+ * not be defined is tricky. Here, if we have "#define CONFIG_BOOGER 1"
+ * we match on the placeholder define, insert the "0," for arg1 and generate
+ * the triplet (0, 1, 0). Then the last step cherry picks the 2nd arg (a one).
+ * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when
+ * the last step cherry picks the 2nd arg, we get a zero.
+ */
+#define __is_defined(x) ___is_defined(x)
+#define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val)
+#define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0)
+
+/*
+ * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0
+ * otherwise. For boolean options, this is equivalent to
+ * IS_ENABLED(CONFIG_FOO).
+ */
+#define IS_BUILTIN(option) __is_defined(option)
+
+#endif /* _TOOLS_CONFIG_H */
diff --git a/tools/include/tools/endian.h b/tools/include/tools/endian.h
new file mode 100644
index 000000000..c67888fd4
--- /dev/null
+++ b/tools/include/tools/endian.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ENDIAN_H
+#define _TOOLS_ENDIAN_H
+
+#include <byteswap.h>
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+
+#ifndef htole16
+#define htole16(x) (x)
+#endif
+#ifndef htole32
+#define htole32(x) (x)
+#endif
+#ifndef htole64
+#define htole64(x) (x)
+#endif
+
+#ifndef le16toh
+#define le16toh(x) (x)
+#endif
+
+#ifndef le32toh
+#define le32toh(x) (x)
+#endif
+
+#ifndef le64toh
+#define le64toh(x) (x)
+#endif
+
+#else /* __BYTE_ORDER */
+
+#ifndef htole16
+#define htole16(x) __bswap_16(x)
+#endif
+#ifndef htole32
+#define htole32(x) __bswap_32(x)
+#endif
+#ifndef htole64
+#define htole64(x) __bswap_64(x)
+#endif
+
+#ifndef le16toh
+#define le16toh(x) __bswap_16(x)
+#endif
+
+#ifndef le32toh
+#define le32toh(x) __bswap_32(x)
+#endif
+
+#ifndef le64toh
+#define le64toh(x) __bswap_64(x)
+#endif
+
+#endif
+
+#endif /* _TOOLS_ENDIAN_H */
diff --git a/tools/include/tools/le_byteshift.h b/tools/include/tools/le_byteshift.h
new file mode 100644
index 000000000..dc8565f39
--- /dev/null
+++ b/tools/include/tools/le_byteshift.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LE_BYTESHIFT_H
+#define _TOOLS_LE_BYTESHIFT_H
+
+#include <stdint.h>
+
+static inline uint16_t __get_unaligned_le16(const uint8_t *p)
+{
+ return p[0] | p[1] << 8;
+}
+
+static inline uint32_t __get_unaligned_le32(const uint8_t *p)
+{
+ return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
+}
+
+static inline uint64_t __get_unaligned_le64(const uint8_t *p)
+{
+ return (uint64_t)__get_unaligned_le32(p + 4) << 32 |
+ __get_unaligned_le32(p);
+}
+
+static inline void __put_unaligned_le16(uint16_t val, uint8_t *p)
+{
+ *p++ = val;
+ *p++ = val >> 8;
+}
+
+static inline void __put_unaligned_le32(uint32_t val, uint8_t *p)
+{
+ __put_unaligned_le16(val >> 16, p + 2);
+ __put_unaligned_le16(val, p);
+}
+
+static inline void __put_unaligned_le64(uint64_t val, uint8_t *p)
+{
+ __put_unaligned_le32(val >> 32, p + 4);
+ __put_unaligned_le32(val, p);
+}
+
+static inline uint16_t get_unaligned_le16(const void *p)
+{
+ return __get_unaligned_le16((const uint8_t *)p);
+}
+
+static inline uint32_t get_unaligned_le32(const void *p)
+{
+ return __get_unaligned_le32((const uint8_t *)p);
+}
+
+static inline uint64_t get_unaligned_le64(const void *p)
+{
+ return __get_unaligned_le64((const uint8_t *)p);
+}
+
+static inline void put_unaligned_le16(uint16_t val, void *p)
+{
+ __put_unaligned_le16(val, p);
+}
+
+static inline void put_unaligned_le32(uint32_t val, void *p)
+{
+ __put_unaligned_le32(val, p);
+}
+
+static inline void put_unaligned_le64(uint64_t val, void *p)
+{
+ __put_unaligned_le64(val, p);
+}
+
+#endif /* _TOOLS_LE_BYTESHIFT_H */
diff --git a/tools/include/tools/libc_compat.h b/tools/include/tools/libc_compat.h
new file mode 100644
index 000000000..e907ba6f1
--- /dev/null
+++ b/tools/include/tools/libc_compat.h
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: (LGPL-2.0+ OR BSD-2-Clause)
+/* Copyright (C) 2018 Netronome Systems, Inc. */
+
+#ifndef __TOOLS_LIBC_COMPAT_H
+#define __TOOLS_LIBC_COMPAT_H
+
+#include <stdlib.h>
+#include <linux/overflow.h>
+
+#ifdef COMPAT_NEED_REALLOCARRAY
+static inline void *reallocarray(void *ptr, size_t nmemb, size_t size)
+{
+ size_t bytes;
+
+ if (unlikely(check_mul_overflow(nmemb, size, &bytes)))
+ return NULL;
+ return realloc(ptr, bytes);
+}
+#endif
+#endif
diff --git a/tools/include/trace/events/lock.h b/tools/include/trace/events/lock.h
new file mode 100644
index 000000000..5b15fd5ee
--- /dev/null
+++ b/tools/include/trace/events/lock.h
@@ -0,0 +1,4 @@
+#ifndef _TOOLS_INCLUDE_TRACE_EVENTS_LOCK_H
+#define _TOOLS_INCLUDE_TRACE_EVENTS_LOCK_H
+
+#endif /* _TOOLS_INCLUDE_TRACE_EVENTS_LOCK_H */
diff --git a/tools/include/uapi/asm-generic/bitsperlong.h b/tools/include/uapi/asm-generic/bitsperlong.h
new file mode 100644
index 000000000..23e6c416b
--- /dev/null
+++ b/tools/include/uapi/asm-generic/bitsperlong.h
@@ -0,0 +1,15 @@
+#ifndef _UAPI__ASM_GENERIC_BITS_PER_LONG
+#define _UAPI__ASM_GENERIC_BITS_PER_LONG
+
+/*
+ * There seems to be no way of detecting this automatically from user
+ * space, so 64 bit architectures should override this in their
+ * bitsperlong.h. In particular, an architecture that supports
+ * both 32 and 64 bit user space must not rely on CONFIG_64BIT
+ * to decide it, but rather check a compiler provided macro.
+ */
+#ifndef __BITS_PER_LONG
+#define __BITS_PER_LONG 32
+#endif
+
+#endif /* _UAPI__ASM_GENERIC_BITS_PER_LONG */
diff --git a/tools/include/uapi/asm-generic/bpf_perf_event.h b/tools/include/uapi/asm-generic/bpf_perf_event.h
new file mode 100644
index 000000000..53815d2cd
--- /dev/null
+++ b/tools/include/uapi/asm-generic/bpf_perf_event.h
@@ -0,0 +1,9 @@
+#ifndef _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__
+#define _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__
+
+#include <linux/ptrace.h>
+
+/* Export kernel pt_regs structure */
+typedef struct pt_regs bpf_user_pt_regs_t;
+
+#endif /* _UAPI__ASM_GENERIC_BPF_PERF_EVENT_H__ */
diff --git a/tools/include/uapi/asm-generic/errno-base.h b/tools/include/uapi/asm-generic/errno-base.h
new file mode 100644
index 000000000..9653140bf
--- /dev/null
+++ b/tools/include/uapi/asm-generic/errno-base.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_GENERIC_ERRNO_BASE_H
+#define _ASM_GENERIC_ERRNO_BASE_H
+
+#define EPERM 1 /* Operation not permitted */
+#define ENOENT 2 /* No such file or directory */
+#define ESRCH 3 /* No such process */
+#define EINTR 4 /* Interrupted system call */
+#define EIO 5 /* I/O error */
+#define ENXIO 6 /* No such device or address */
+#define E2BIG 7 /* Argument list too long */
+#define ENOEXEC 8 /* Exec format error */
+#define EBADF 9 /* Bad file number */
+#define ECHILD 10 /* No child processes */
+#define EAGAIN 11 /* Try again */
+#define ENOMEM 12 /* Out of memory */
+#define EACCES 13 /* Permission denied */
+#define EFAULT 14 /* Bad address */
+#define ENOTBLK 15 /* Block device required */
+#define EBUSY 16 /* Device or resource busy */
+#define EEXIST 17 /* File exists */
+#define EXDEV 18 /* Cross-device link */
+#define ENODEV 19 /* No such device */
+#define ENOTDIR 20 /* Not a directory */
+#define EISDIR 21 /* Is a directory */
+#define EINVAL 22 /* Invalid argument */
+#define ENFILE 23 /* File table overflow */
+#define EMFILE 24 /* Too many open files */
+#define ENOTTY 25 /* Not a typewriter */
+#define ETXTBSY 26 /* Text file busy */
+#define EFBIG 27 /* File too large */
+#define ENOSPC 28 /* No space left on device */
+#define ESPIPE 29 /* Illegal seek */
+#define EROFS 30 /* Read-only file system */
+#define EMLINK 31 /* Too many links */
+#define EPIPE 32 /* Broken pipe */
+#define EDOM 33 /* Math argument out of domain of func */
+#define ERANGE 34 /* Math result not representable */
+
+#endif
diff --git a/tools/include/uapi/asm-generic/errno.h b/tools/include/uapi/asm-generic/errno.h
new file mode 100644
index 000000000..cf9c51ac4
--- /dev/null
+++ b/tools/include/uapi/asm-generic/errno.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _ASM_GENERIC_ERRNO_H
+#define _ASM_GENERIC_ERRNO_H
+
+#include <asm-generic/errno-base.h>
+
+#define EDEADLK 35 /* Resource deadlock would occur */
+#define ENAMETOOLONG 36 /* File name too long */
+#define ENOLCK 37 /* No record locks available */
+
+/*
+ * This error code is special: arch syscall entry code will return
+ * -ENOSYS if users try to call a syscall that doesn't exist. To keep
+ * failures of syscalls that really do exist distinguishable from
+ * failures due to attempts to use a nonexistent syscall, syscall
+ * implementations should refrain from returning -ENOSYS.
+ */
+#define ENOSYS 38 /* Invalid system call number */
+
+#define ENOTEMPTY 39 /* Directory not empty */
+#define ELOOP 40 /* Too many symbolic links encountered */
+#define EWOULDBLOCK EAGAIN /* Operation would block */
+#define ENOMSG 42 /* No message of desired type */
+#define EIDRM 43 /* Identifier removed */
+#define ECHRNG 44 /* Channel number out of range */
+#define EL2NSYNC 45 /* Level 2 not synchronized */
+#define EL3HLT 46 /* Level 3 halted */
+#define EL3RST 47 /* Level 3 reset */
+#define ELNRNG 48 /* Link number out of range */
+#define EUNATCH 49 /* Protocol driver not attached */
+#define ENOCSI 50 /* No CSI structure available */
+#define EL2HLT 51 /* Level 2 halted */
+#define EBADE 52 /* Invalid exchange */
+#define EBADR 53 /* Invalid request descriptor */
+#define EXFULL 54 /* Exchange full */
+#define ENOANO 55 /* No anode */
+#define EBADRQC 56 /* Invalid request code */
+#define EBADSLT 57 /* Invalid slot */
+
+#define EDEADLOCK EDEADLK
+
+#define EBFONT 59 /* Bad font file format */
+#define ENOSTR 60 /* Device not a stream */
+#define ENODATA 61 /* No data available */
+#define ETIME 62 /* Timer expired */
+#define ENOSR 63 /* Out of streams resources */
+#define ENONET 64 /* Machine is not on the network */
+#define ENOPKG 65 /* Package not installed */
+#define EREMOTE 66 /* Object is remote */
+#define ENOLINK 67 /* Link has been severed */
+#define EADV 68 /* Advertise error */
+#define ESRMNT 69 /* Srmount error */
+#define ECOMM 70 /* Communication error on send */
+#define EPROTO 71 /* Protocol error */
+#define EMULTIHOP 72 /* Multihop attempted */
+#define EDOTDOT 73 /* RFS specific error */
+#define EBADMSG 74 /* Not a data message */
+#define EOVERFLOW 75 /* Value too large for defined data type */
+#define ENOTUNIQ 76 /* Name not unique on network */
+#define EBADFD 77 /* File descriptor in bad state */
+#define EREMCHG 78 /* Remote address changed */
+#define ELIBACC 79 /* Can not access a needed shared library */
+#define ELIBBAD 80 /* Accessing a corrupted shared library */
+#define ELIBSCN 81 /* .lib section in a.out corrupted */
+#define ELIBMAX 82 /* Attempting to link in too many shared libraries */
+#define ELIBEXEC 83 /* Cannot exec a shared library directly */
+#define EILSEQ 84 /* Illegal byte sequence */
+#define ERESTART 85 /* Interrupted system call should be restarted */
+#define ESTRPIPE 86 /* Streams pipe error */
+#define EUSERS 87 /* Too many users */
+#define ENOTSOCK 88 /* Socket operation on non-socket */
+#define EDESTADDRREQ 89 /* Destination address required */
+#define EMSGSIZE 90 /* Message too long */
+#define EPROTOTYPE 91 /* Protocol wrong type for socket */
+#define ENOPROTOOPT 92 /* Protocol not available */
+#define EPROTONOSUPPORT 93 /* Protocol not supported */
+#define ESOCKTNOSUPPORT 94 /* Socket type not supported */
+#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */
+#define EPFNOSUPPORT 96 /* Protocol family not supported */
+#define EAFNOSUPPORT 97 /* Address family not supported by protocol */
+#define EADDRINUSE 98 /* Address already in use */
+#define EADDRNOTAVAIL 99 /* Cannot assign requested address */
+#define ENETDOWN 100 /* Network is down */
+#define ENETUNREACH 101 /* Network is unreachable */
+#define ENETRESET 102 /* Network dropped connection because of reset */
+#define ECONNABORTED 103 /* Software caused connection abort */
+#define ECONNRESET 104 /* Connection reset by peer */
+#define ENOBUFS 105 /* No buffer space available */
+#define EISCONN 106 /* Transport endpoint is already connected */
+#define ENOTCONN 107 /* Transport endpoint is not connected */
+#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
+#define ETOOMANYREFS 109 /* Too many references: cannot splice */
+#define ETIMEDOUT 110 /* Connection timed out */
+#define ECONNREFUSED 111 /* Connection refused */
+#define EHOSTDOWN 112 /* Host is down */
+#define EHOSTUNREACH 113 /* No route to host */
+#define EALREADY 114 /* Operation already in progress */
+#define EINPROGRESS 115 /* Operation now in progress */
+#define ESTALE 116 /* Stale file handle */
+#define EUCLEAN 117 /* Structure needs cleaning */
+#define ENOTNAM 118 /* Not a XENIX named type file */
+#define ENAVAIL 119 /* No XENIX semaphores available */
+#define EISNAM 120 /* Is a named type file */
+#define EREMOTEIO 121 /* Remote I/O error */
+#define EDQUOT 122 /* Quota exceeded */
+
+#define ENOMEDIUM 123 /* No medium found */
+#define EMEDIUMTYPE 124 /* Wrong medium type */
+#define ECANCELED 125 /* Operation Canceled */
+#define ENOKEY 126 /* Required key not available */
+#define EKEYEXPIRED 127 /* Key has expired */
+#define EKEYREVOKED 128 /* Key has been revoked */
+#define EKEYREJECTED 129 /* Key was rejected by service */
+
+/* for robust mutexes */
+#define EOWNERDEAD 130 /* Owner died */
+#define ENOTRECOVERABLE 131 /* State not recoverable */
+
+#define ERFKILL 132 /* Operation not possible due to RF-kill */
+
+#define EHWPOISON 133 /* Memory page has hardware error */
+
+#endif
diff --git a/tools/include/uapi/asm-generic/fcntl.h b/tools/include/uapi/asm-generic/fcntl.h
new file mode 100644
index 000000000..ac190958c
--- /dev/null
+++ b/tools/include/uapi/asm-generic/fcntl.h
@@ -0,0 +1,220 @@
+#ifndef _ASM_GENERIC_FCNTL_H
+#define _ASM_GENERIC_FCNTL_H
+
+#include <linux/types.h>
+
+/*
+ * FMODE_EXEC is 0x20
+ * FMODE_NONOTIFY is 0x4000000
+ * These cannot be used by userspace O_* until internal and external open
+ * flags are split.
+ * -Eric Paris
+ */
+
+/*
+ * When introducing new O_* bits, please check its uniqueness in fcntl_init().
+ */
+
+#define O_ACCMODE 00000003
+#define O_RDONLY 00000000
+#define O_WRONLY 00000001
+#define O_RDWR 00000002
+#ifndef O_CREAT
+#define O_CREAT 00000100 /* not fcntl */
+#endif
+#ifndef O_EXCL
+#define O_EXCL 00000200 /* not fcntl */
+#endif
+#ifndef O_NOCTTY
+#define O_NOCTTY 00000400 /* not fcntl */
+#endif
+#ifndef O_TRUNC
+#define O_TRUNC 00001000 /* not fcntl */
+#endif
+#ifndef O_APPEND
+#define O_APPEND 00002000
+#endif
+#ifndef O_NONBLOCK
+#define O_NONBLOCK 00004000
+#endif
+#ifndef O_DSYNC
+#define O_DSYNC 00010000 /* used to be O_SYNC, see below */
+#endif
+#ifndef FASYNC
+#define FASYNC 00020000 /* fcntl, for BSD compatibility */
+#endif
+#ifndef O_DIRECT
+#define O_DIRECT 00040000 /* direct disk access hint */
+#endif
+#ifndef O_LARGEFILE
+#define O_LARGEFILE 00100000
+#endif
+#ifndef O_DIRECTORY
+#define O_DIRECTORY 00200000 /* must be a directory */
+#endif
+#ifndef O_NOFOLLOW
+#define O_NOFOLLOW 00400000 /* don't follow links */
+#endif
+#ifndef O_NOATIME
+#define O_NOATIME 01000000
+#endif
+#ifndef O_CLOEXEC
+#define O_CLOEXEC 02000000 /* set close_on_exec */
+#endif
+
+/*
+ * Before Linux 2.6.33 only O_DSYNC semantics were implemented, but using
+ * the O_SYNC flag. We continue to use the existing numerical value
+ * for O_DSYNC semantics now, but using the correct symbolic name for it.
+ * This new value is used to request true Posix O_SYNC semantics. It is
+ * defined in this strange way to make sure applications compiled against
+ * new headers get at least O_DSYNC semantics on older kernels.
+ *
+ * This has the nice side-effect that we can simply test for O_DSYNC
+ * wherever we do not care if O_DSYNC or O_SYNC is used.
+ *
+ * Note: __O_SYNC must never be used directly.
+ */
+#ifndef O_SYNC
+#define __O_SYNC 04000000
+#define O_SYNC (__O_SYNC|O_DSYNC)
+#endif
+
+#ifndef O_PATH
+#define O_PATH 010000000
+#endif
+
+#ifndef __O_TMPFILE
+#define __O_TMPFILE 020000000
+#endif
+
+/* a horrid kludge trying to make sure that this will fail on old kernels */
+#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
+#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
+
+#ifndef O_NDELAY
+#define O_NDELAY O_NONBLOCK
+#endif
+
+#define F_DUPFD 0 /* dup */
+#define F_GETFD 1 /* get close_on_exec */
+#define F_SETFD 2 /* set/clear close_on_exec */
+#define F_GETFL 3 /* get file->f_flags */
+#define F_SETFL 4 /* set file->f_flags */
+#ifndef F_GETLK
+#define F_GETLK 5
+#define F_SETLK 6
+#define F_SETLKW 7
+#endif
+#ifndef F_SETOWN
+#define F_SETOWN 8 /* for sockets. */
+#define F_GETOWN 9 /* for sockets. */
+#endif
+#ifndef F_SETSIG
+#define F_SETSIG 10 /* for sockets. */
+#define F_GETSIG 11 /* for sockets. */
+#endif
+
+#ifndef CONFIG_64BIT
+#ifndef F_GETLK64
+#define F_GETLK64 12 /* using 'struct flock64' */
+#define F_SETLK64 13
+#define F_SETLKW64 14
+#endif
+#endif
+
+#ifndef F_SETOWN_EX
+#define F_SETOWN_EX 15
+#define F_GETOWN_EX 16
+#endif
+
+#ifndef F_GETOWNER_UIDS
+#define F_GETOWNER_UIDS 17
+#endif
+
+/*
+ * Open File Description Locks
+ *
+ * Usually record locks held by a process are released on *any* close and are
+ * not inherited across a fork().
+ *
+ * These cmd values will set locks that conflict with process-associated
+ * record locks, but are "owned" by the open file description, not the
+ * process. This means that they are inherited across fork() like BSD (flock)
+ * locks, and they are only released automatically when the last reference to
+ * the the open file against which they were acquired is put.
+ */
+#define F_OFD_GETLK 36
+#define F_OFD_SETLK 37
+#define F_OFD_SETLKW 38
+
+#define F_OWNER_TID 0
+#define F_OWNER_PID 1
+#define F_OWNER_PGRP 2
+
+struct f_owner_ex {
+ int type;
+ __kernel_pid_t pid;
+};
+
+/* for F_[GET|SET]FL */
+#define FD_CLOEXEC 1 /* actually anything with low bit set goes */
+
+/* for posix fcntl() and lockf() */
+#ifndef F_RDLCK
+#define F_RDLCK 0
+#define F_WRLCK 1
+#define F_UNLCK 2
+#endif
+
+/* for old implementation of bsd flock () */
+#ifndef F_EXLCK
+#define F_EXLCK 4 /* or 3 */
+#define F_SHLCK 8 /* or 4 */
+#endif
+
+/* operations for bsd flock(), also used by the kernel implementation */
+#define LOCK_SH 1 /* shared lock */
+#define LOCK_EX 2 /* exclusive lock */
+#define LOCK_NB 4 /* or'd with one of the above to prevent
+ blocking */
+#define LOCK_UN 8 /* remove lock */
+
+#define LOCK_MAND 32 /* This is a mandatory flock ... */
+#define LOCK_READ 64 /* which allows concurrent read operations */
+#define LOCK_WRITE 128 /* which allows concurrent write operations */
+#define LOCK_RW 192 /* which allows concurrent read & write ops */
+
+#define F_LINUX_SPECIFIC_BASE 1024
+
+#ifndef HAVE_ARCH_STRUCT_FLOCK
+#ifndef __ARCH_FLOCK_PAD
+#define __ARCH_FLOCK_PAD
+#endif
+
+struct flock {
+ short l_type;
+ short l_whence;
+ __kernel_off_t l_start;
+ __kernel_off_t l_len;
+ __kernel_pid_t l_pid;
+ __ARCH_FLOCK_PAD
+};
+#endif
+
+#ifndef HAVE_ARCH_STRUCT_FLOCK64
+#ifndef __ARCH_FLOCK64_PAD
+#define __ARCH_FLOCK64_PAD
+#endif
+
+struct flock64 {
+ short l_type;
+ short l_whence;
+ __kernel_loff_t l_start;
+ __kernel_loff_t l_len;
+ __kernel_pid_t l_pid;
+ __ARCH_FLOCK64_PAD
+};
+#endif
+
+#endif /* _ASM_GENERIC_FCNTL_H */
diff --git a/tools/include/uapi/asm-generic/ioctls.h b/tools/include/uapi/asm-generic/ioctls.h
new file mode 100644
index 000000000..040651735
--- /dev/null
+++ b/tools/include/uapi/asm-generic/ioctls.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_GENERIC_IOCTLS_H
+#define __ASM_GENERIC_IOCTLS_H
+
+#include <linux/ioctl.h>
+
+/*
+ * These are the most common definitions for tty ioctl numbers.
+ * Most of them do not use the recommended _IOC(), but there is
+ * probably some source code out there hardcoding the number,
+ * so we might as well use them for all new platforms.
+ *
+ * The architectures that use different values here typically
+ * try to be compatible with some Unix variants for the same
+ * architecture.
+ */
+
+/* 0x54 is just a magic number to make these relatively unique ('T') */
+
+#define TCGETS 0x5401
+#define TCSETS 0x5402
+#define TCSETSW 0x5403
+#define TCSETSF 0x5404
+#define TCGETA 0x5405
+#define TCSETA 0x5406
+#define TCSETAW 0x5407
+#define TCSETAF 0x5408
+#define TCSBRK 0x5409
+#define TCXONC 0x540A
+#define TCFLSH 0x540B
+#define TIOCEXCL 0x540C
+#define TIOCNXCL 0x540D
+#define TIOCSCTTY 0x540E
+#define TIOCGPGRP 0x540F
+#define TIOCSPGRP 0x5410
+#define TIOCOUTQ 0x5411
+#define TIOCSTI 0x5412
+#define TIOCGWINSZ 0x5413
+#define TIOCSWINSZ 0x5414
+#define TIOCMGET 0x5415
+#define TIOCMBIS 0x5416
+#define TIOCMBIC 0x5417
+#define TIOCMSET 0x5418
+#define TIOCGSOFTCAR 0x5419
+#define TIOCSSOFTCAR 0x541A
+#define FIONREAD 0x541B
+#define TIOCINQ FIONREAD
+#define TIOCLINUX 0x541C
+#define TIOCCONS 0x541D
+#define TIOCGSERIAL 0x541E
+#define TIOCSSERIAL 0x541F
+#define TIOCPKT 0x5420
+#define FIONBIO 0x5421
+#define TIOCNOTTY 0x5422
+#define TIOCSETD 0x5423
+#define TIOCGETD 0x5424
+#define TCSBRKP 0x5425 /* Needed for POSIX tcsendbreak() */
+#define TIOCSBRK 0x5427 /* BSD compatibility */
+#define TIOCCBRK 0x5428 /* BSD compatibility */
+#define TIOCGSID 0x5429 /* Return the session ID of FD */
+#define TCGETS2 _IOR('T', 0x2A, struct termios2)
+#define TCSETS2 _IOW('T', 0x2B, struct termios2)
+#define TCSETSW2 _IOW('T', 0x2C, struct termios2)
+#define TCSETSF2 _IOW('T', 0x2D, struct termios2)
+#define TIOCGRS485 0x542E
+#ifndef TIOCSRS485
+#define TIOCSRS485 0x542F
+#endif
+#define TIOCGPTN _IOR('T', 0x30, unsigned int) /* Get Pty Number (of pty-mux device) */
+#define TIOCSPTLCK _IOW('T', 0x31, int) /* Lock/unlock Pty */
+#define TIOCGDEV _IOR('T', 0x32, unsigned int) /* Get primary device node of /dev/console */
+#define TCGETX 0x5432 /* SYS5 TCGETX compatibility */
+#define TCSETX 0x5433
+#define TCSETXF 0x5434
+#define TCSETXW 0x5435
+#define TIOCSIG _IOW('T', 0x36, int) /* pty: generate signal */
+#define TIOCVHANGUP 0x5437
+#define TIOCGPKT _IOR('T', 0x38, int) /* Get packet mode state */
+#define TIOCGPTLCK _IOR('T', 0x39, int) /* Get Pty lock state */
+#define TIOCGEXCL _IOR('T', 0x40, int) /* Get exclusive mode state */
+#define TIOCGPTPEER _IO('T', 0x41) /* Safely open the slave */
+
+#define FIONCLEX 0x5450
+#define FIOCLEX 0x5451
+#define FIOASYNC 0x5452
+#define TIOCSERCONFIG 0x5453
+#define TIOCSERGWILD 0x5454
+#define TIOCSERSWILD 0x5455
+#define TIOCGLCKTRMIOS 0x5456
+#define TIOCSLCKTRMIOS 0x5457
+#define TIOCSERGSTRUCT 0x5458 /* For debugging only */
+#define TIOCSERGETLSR 0x5459 /* Get line status register */
+#define TIOCSERGETMULTI 0x545A /* Get multiport config */
+#define TIOCSERSETMULTI 0x545B /* Set multiport config */
+
+#define TIOCMIWAIT 0x545C /* wait for a change on serial input line(s) */
+#define TIOCGICOUNT 0x545D /* read serial port inline interrupt counts */
+
+/*
+ * Some arches already define FIOQSIZE due to a historical
+ * conflict with a Hayes modem-specific ioctl value.
+ */
+#ifndef FIOQSIZE
+# define FIOQSIZE 0x5460
+#endif
+
+/* Used for packet mode */
+#define TIOCPKT_DATA 0
+#define TIOCPKT_FLUSHREAD 1
+#define TIOCPKT_FLUSHWRITE 2
+#define TIOCPKT_STOP 4
+#define TIOCPKT_START 8
+#define TIOCPKT_NOSTOP 16
+#define TIOCPKT_DOSTOP 32
+#define TIOCPKT_IOCTL 64
+
+#define TIOCSER_TEMT 0x01 /* Transmitter physically empty */
+
+#endif /* __ASM_GENERIC_IOCTLS_H */
diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h
new file mode 100644
index 000000000..e7ee32861
--- /dev/null
+++ b/tools/include/uapi/asm-generic/mman-common.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_GENERIC_MMAN_COMMON_H
+#define __ASM_GENERIC_MMAN_COMMON_H
+
+/*
+ Author: Michael S. Tsirkin <mst@mellanox.co.il>, Mellanox Technologies Ltd.
+ Based on: asm-xxx/mman.h
+*/
+
+#define PROT_READ 0x1 /* page can be read */
+#define PROT_WRITE 0x2 /* page can be written */
+#define PROT_EXEC 0x4 /* page can be executed */
+#define PROT_SEM 0x8 /* page may be used for atomic ops */
+#define PROT_NONE 0x0 /* page can not be accessed */
+#define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */
+#define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */
+
+#define MAP_SHARED 0x01 /* Share changes */
+#define MAP_PRIVATE 0x02 /* Changes are private */
+#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */
+#define MAP_TYPE 0x0f /* Mask for type of mapping */
+#define MAP_FIXED 0x10 /* Interpret addr exactly */
+#define MAP_ANONYMOUS 0x20 /* don't use a file */
+#ifdef CONFIG_MMAP_ALLOW_UNINITIALIZED
+# define MAP_UNINITIALIZED 0x4000000 /* For anonymous mmap, memory could be uninitialized */
+#else
+# define MAP_UNINITIALIZED 0x0 /* Don't support this flag */
+#endif
+
+/* 0x0100 - 0x80000 flags are defined in asm-generic/mman.h */
+#define MAP_FIXED_NOREPLACE 0x100000 /* MAP_FIXED which doesn't unmap underlying mapping */
+
+/*
+ * Flags for mlock
+ */
+#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */
+
+#define MS_ASYNC 1 /* sync memory asynchronously */
+#define MS_INVALIDATE 2 /* invalidate the caches */
+#define MS_SYNC 4 /* synchronous memory sync */
+
+#define MADV_NORMAL 0 /* no further special treatment */
+#define MADV_RANDOM 1 /* expect random page references */
+#define MADV_SEQUENTIAL 2 /* expect sequential page references */
+#define MADV_WILLNEED 3 /* will need these pages */
+#define MADV_DONTNEED 4 /* don't need these pages */
+
+/* common parameters: try to keep these consistent across architectures */
+#define MADV_FREE 8 /* free pages only if memory pressure */
+#define MADV_REMOVE 9 /* remove these pages & resources */
+#define MADV_DONTFORK 10 /* don't inherit across fork */
+#define MADV_DOFORK 11 /* do inherit across fork */
+#define MADV_HWPOISON 100 /* poison a page for testing */
+#define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */
+
+#define MADV_MERGEABLE 12 /* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13 /* KSM may not merge identical pages */
+
+#define MADV_HUGEPAGE 14 /* Worth backing with hugepages */
+#define MADV_NOHUGEPAGE 15 /* Not worth backing with hugepages */
+
+#define MADV_DONTDUMP 16 /* Explicity exclude from the core dump,
+ overrides the coredump filter bits */
+#define MADV_DODUMP 17 /* Clear the MADV_DONTDUMP flag */
+
+#define MADV_WIPEONFORK 18 /* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19 /* Undo MADV_WIPEONFORK */
+
+/* compatibility flags */
+#define MAP_FILE 0
+
+#define PKEY_DISABLE_ACCESS 0x1
+#define PKEY_DISABLE_WRITE 0x2
+#define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\
+ PKEY_DISABLE_WRITE)
+
+#endif /* __ASM_GENERIC_MMAN_COMMON_H */
diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h
new file mode 100644
index 000000000..653687d97
--- /dev/null
+++ b/tools/include/uapi/asm-generic/mman.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __ASM_GENERIC_MMAN_H
+#define __ASM_GENERIC_MMAN_H
+
+#include <asm-generic/mman-common.h>
+
+#define MAP_GROWSDOWN 0x0100 /* stack-like segment */
+#define MAP_DENYWRITE 0x0800 /* ETXTBSY */
+#define MAP_EXECUTABLE 0x1000 /* mark it as an executable */
+#define MAP_LOCKED 0x2000 /* pages are locked */
+#define MAP_NORESERVE 0x4000 /* don't check for reservations */
+#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
+#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_STACK 0x20000 /* give out an address that is best suited for process/thread stacks */
+#define MAP_HUGETLB 0x40000 /* create a huge page mapping */
+#define MAP_SYNC 0x80000 /* perform synchronous page faults for the mapping */
+
+/* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
+
+#define MCL_CURRENT 1 /* lock all current mappings */
+#define MCL_FUTURE 2 /* lock all future mappings */
+#define MCL_ONFAULT 4 /* lock all pages that are faulted in */
+
+#endif /* __ASM_GENERIC_MMAN_H */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
new file mode 100644
index 000000000..df4bedb9b
--- /dev/null
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -0,0 +1,785 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#include <asm/bitsperlong.h>
+
+/*
+ * This file contains the system call numbers, based on the
+ * layout of the x86-64 architecture, which embeds the
+ * pointer to the syscall in the table.
+ *
+ * As a basic principle, no duplication of functionality
+ * should be added, e.g. we don't use lseek when llseek
+ * is present. New architectures should use this file
+ * and implement the less feature-full calls in user space.
+ */
+
+#ifndef __SYSCALL
+#define __SYSCALL(x, y)
+#endif
+
+#if __BITS_PER_LONG == 32 || defined(__SYSCALL_COMPAT)
+#define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _32)
+#else
+#define __SC_3264(_nr, _32, _64) __SYSCALL(_nr, _64)
+#endif
+
+#ifdef __SYSCALL_COMPAT
+#define __SC_COMP(_nr, _sys, _comp) __SYSCALL(_nr, _comp)
+#define __SC_COMP_3264(_nr, _32, _64, _comp) __SYSCALL(_nr, _comp)
+#else
+#define __SC_COMP(_nr, _sys, _comp) __SYSCALL(_nr, _sys)
+#define __SC_COMP_3264(_nr, _32, _64, _comp) __SC_3264(_nr, _32, _64)
+#endif
+
+#define __NR_io_setup 0
+__SC_COMP(__NR_io_setup, sys_io_setup, compat_sys_io_setup)
+#define __NR_io_destroy 1
+__SYSCALL(__NR_io_destroy, sys_io_destroy)
+#define __NR_io_submit 2
+__SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit)
+#define __NR_io_cancel 3
+__SYSCALL(__NR_io_cancel, sys_io_cancel)
+#define __NR_io_getevents 4
+__SC_COMP(__NR_io_getevents, sys_io_getevents, compat_sys_io_getevents)
+
+/* fs/xattr.c */
+#define __NR_setxattr 5
+__SYSCALL(__NR_setxattr, sys_setxattr)
+#define __NR_lsetxattr 6
+__SYSCALL(__NR_lsetxattr, sys_lsetxattr)
+#define __NR_fsetxattr 7
+__SYSCALL(__NR_fsetxattr, sys_fsetxattr)
+#define __NR_getxattr 8
+__SYSCALL(__NR_getxattr, sys_getxattr)
+#define __NR_lgetxattr 9
+__SYSCALL(__NR_lgetxattr, sys_lgetxattr)
+#define __NR_fgetxattr 10
+__SYSCALL(__NR_fgetxattr, sys_fgetxattr)
+#define __NR_listxattr 11
+__SYSCALL(__NR_listxattr, sys_listxattr)
+#define __NR_llistxattr 12
+__SYSCALL(__NR_llistxattr, sys_llistxattr)
+#define __NR_flistxattr 13
+__SYSCALL(__NR_flistxattr, sys_flistxattr)
+#define __NR_removexattr 14
+__SYSCALL(__NR_removexattr, sys_removexattr)
+#define __NR_lremovexattr 15
+__SYSCALL(__NR_lremovexattr, sys_lremovexattr)
+#define __NR_fremovexattr 16
+__SYSCALL(__NR_fremovexattr, sys_fremovexattr)
+
+/* fs/dcache.c */
+#define __NR_getcwd 17
+__SYSCALL(__NR_getcwd, sys_getcwd)
+
+/* fs/cookies.c */
+#define __NR_lookup_dcookie 18
+__SC_COMP(__NR_lookup_dcookie, sys_lookup_dcookie, compat_sys_lookup_dcookie)
+
+/* fs/eventfd.c */
+#define __NR_eventfd2 19
+__SYSCALL(__NR_eventfd2, sys_eventfd2)
+
+/* fs/eventpoll.c */
+#define __NR_epoll_create1 20
+__SYSCALL(__NR_epoll_create1, sys_epoll_create1)
+#define __NR_epoll_ctl 21
+__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl)
+#define __NR_epoll_pwait 22
+__SC_COMP(__NR_epoll_pwait, sys_epoll_pwait, compat_sys_epoll_pwait)
+
+/* fs/fcntl.c */
+#define __NR_dup 23
+__SYSCALL(__NR_dup, sys_dup)
+#define __NR_dup3 24
+__SYSCALL(__NR_dup3, sys_dup3)
+#define __NR3264_fcntl 25
+__SC_COMP_3264(__NR3264_fcntl, sys_fcntl64, sys_fcntl, compat_sys_fcntl64)
+
+/* fs/inotify_user.c */
+#define __NR_inotify_init1 26
+__SYSCALL(__NR_inotify_init1, sys_inotify_init1)
+#define __NR_inotify_add_watch 27
+__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
+#define __NR_inotify_rm_watch 28
+__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
+
+/* fs/ioctl.c */
+#define __NR_ioctl 29
+__SC_COMP(__NR_ioctl, sys_ioctl, compat_sys_ioctl)
+
+/* fs/ioprio.c */
+#define __NR_ioprio_set 30
+__SYSCALL(__NR_ioprio_set, sys_ioprio_set)
+#define __NR_ioprio_get 31
+__SYSCALL(__NR_ioprio_get, sys_ioprio_get)
+
+/* fs/locks.c */
+#define __NR_flock 32
+__SYSCALL(__NR_flock, sys_flock)
+
+/* fs/namei.c */
+#define __NR_mknodat 33
+__SYSCALL(__NR_mknodat, sys_mknodat)
+#define __NR_mkdirat 34
+__SYSCALL(__NR_mkdirat, sys_mkdirat)
+#define __NR_unlinkat 35
+__SYSCALL(__NR_unlinkat, sys_unlinkat)
+#define __NR_symlinkat 36
+__SYSCALL(__NR_symlinkat, sys_symlinkat)
+#define __NR_linkat 37
+__SYSCALL(__NR_linkat, sys_linkat)
+#ifdef __ARCH_WANT_RENAMEAT
+/* renameat is superseded with flags by renameat2 */
+#define __NR_renameat 38
+__SYSCALL(__NR_renameat, sys_renameat)
+#endif /* __ARCH_WANT_RENAMEAT */
+
+/* fs/namespace.c */
+#define __NR_umount2 39
+__SYSCALL(__NR_umount2, sys_umount)
+#define __NR_mount 40
+__SC_COMP(__NR_mount, sys_mount, compat_sys_mount)
+#define __NR_pivot_root 41
+__SYSCALL(__NR_pivot_root, sys_pivot_root)
+
+/* fs/nfsctl.c */
+#define __NR_nfsservctl 42
+__SYSCALL(__NR_nfsservctl, sys_ni_syscall)
+
+/* fs/open.c */
+#define __NR3264_statfs 43
+__SC_COMP_3264(__NR3264_statfs, sys_statfs64, sys_statfs, \
+ compat_sys_statfs64)
+#define __NR3264_fstatfs 44
+__SC_COMP_3264(__NR3264_fstatfs, sys_fstatfs64, sys_fstatfs, \
+ compat_sys_fstatfs64)
+#define __NR3264_truncate 45
+__SC_COMP_3264(__NR3264_truncate, sys_truncate64, sys_truncate, \
+ compat_sys_truncate64)
+#define __NR3264_ftruncate 46
+__SC_COMP_3264(__NR3264_ftruncate, sys_ftruncate64, sys_ftruncate, \
+ compat_sys_ftruncate64)
+
+#define __NR_fallocate 47
+__SC_COMP(__NR_fallocate, sys_fallocate, compat_sys_fallocate)
+#define __NR_faccessat 48
+__SYSCALL(__NR_faccessat, sys_faccessat)
+#define __NR_chdir 49
+__SYSCALL(__NR_chdir, sys_chdir)
+#define __NR_fchdir 50
+__SYSCALL(__NR_fchdir, sys_fchdir)
+#define __NR_chroot 51
+__SYSCALL(__NR_chroot, sys_chroot)
+#define __NR_fchmod 52
+__SYSCALL(__NR_fchmod, sys_fchmod)
+#define __NR_fchmodat 53
+__SYSCALL(__NR_fchmodat, sys_fchmodat)
+#define __NR_fchownat 54
+__SYSCALL(__NR_fchownat, sys_fchownat)
+#define __NR_fchown 55
+__SYSCALL(__NR_fchown, sys_fchown)
+#define __NR_openat 56
+__SC_COMP(__NR_openat, sys_openat, compat_sys_openat)
+#define __NR_close 57
+__SYSCALL(__NR_close, sys_close)
+#define __NR_vhangup 58
+__SYSCALL(__NR_vhangup, sys_vhangup)
+
+/* fs/pipe.c */
+#define __NR_pipe2 59
+__SYSCALL(__NR_pipe2, sys_pipe2)
+
+/* fs/quota.c */
+#define __NR_quotactl 60
+__SYSCALL(__NR_quotactl, sys_quotactl)
+
+/* fs/readdir.c */
+#define __NR_getdents64 61
+__SYSCALL(__NR_getdents64, sys_getdents64)
+
+/* fs/read_write.c */
+#define __NR3264_lseek 62
+__SC_3264(__NR3264_lseek, sys_llseek, sys_lseek)
+#define __NR_read 63
+__SYSCALL(__NR_read, sys_read)
+#define __NR_write 64
+__SYSCALL(__NR_write, sys_write)
+#define __NR_readv 65
+__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
+#define __NR_writev 66
+__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
+#define __NR_pread64 67
+__SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
+#define __NR_pwrite64 68
+__SC_COMP(__NR_pwrite64, sys_pwrite64, compat_sys_pwrite64)
+#define __NR_preadv 69
+__SC_COMP(__NR_preadv, sys_preadv, compat_sys_preadv)
+#define __NR_pwritev 70
+__SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev)
+
+/* fs/sendfile.c */
+#define __NR3264_sendfile 71
+__SYSCALL(__NR3264_sendfile, sys_sendfile64)
+
+/* fs/select.c */
+#define __NR_pselect6 72
+__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6)
+#define __NR_ppoll 73
+__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll)
+
+/* fs/signalfd.c */
+#define __NR_signalfd4 74
+__SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
+
+/* fs/splice.c */
+#define __NR_vmsplice 75
+__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
+#define __NR_splice 76
+__SYSCALL(__NR_splice, sys_splice)
+#define __NR_tee 77
+__SYSCALL(__NR_tee, sys_tee)
+
+/* fs/stat.c */
+#define __NR_readlinkat 78
+__SYSCALL(__NR_readlinkat, sys_readlinkat)
+#define __NR3264_fstatat 79
+__SC_3264(__NR3264_fstatat, sys_fstatat64, sys_newfstatat)
+#define __NR3264_fstat 80
+__SC_3264(__NR3264_fstat, sys_fstat64, sys_newfstat)
+
+/* fs/sync.c */
+#define __NR_sync 81
+__SYSCALL(__NR_sync, sys_sync)
+#define __NR_fsync 82
+__SYSCALL(__NR_fsync, sys_fsync)
+#define __NR_fdatasync 83
+__SYSCALL(__NR_fdatasync, sys_fdatasync)
+#ifdef __ARCH_WANT_SYNC_FILE_RANGE2
+#define __NR_sync_file_range2 84
+__SC_COMP(__NR_sync_file_range2, sys_sync_file_range2, \
+ compat_sys_sync_file_range2)
+#else
+#define __NR_sync_file_range 84
+__SC_COMP(__NR_sync_file_range, sys_sync_file_range, \
+ compat_sys_sync_file_range)
+#endif
+
+/* fs/timerfd.c */
+#define __NR_timerfd_create 85
+__SYSCALL(__NR_timerfd_create, sys_timerfd_create)
+#define __NR_timerfd_settime 86
+__SC_COMP(__NR_timerfd_settime, sys_timerfd_settime, \
+ compat_sys_timerfd_settime)
+#define __NR_timerfd_gettime 87
+__SC_COMP(__NR_timerfd_gettime, sys_timerfd_gettime, \
+ compat_sys_timerfd_gettime)
+
+/* fs/utimes.c */
+#define __NR_utimensat 88
+__SC_COMP(__NR_utimensat, sys_utimensat, compat_sys_utimensat)
+
+/* kernel/acct.c */
+#define __NR_acct 89
+__SYSCALL(__NR_acct, sys_acct)
+
+/* kernel/capability.c */
+#define __NR_capget 90
+__SYSCALL(__NR_capget, sys_capget)
+#define __NR_capset 91
+__SYSCALL(__NR_capset, sys_capset)
+
+/* kernel/exec_domain.c */
+#define __NR_personality 92
+__SYSCALL(__NR_personality, sys_personality)
+
+/* kernel/exit.c */
+#define __NR_exit 93
+__SYSCALL(__NR_exit, sys_exit)
+#define __NR_exit_group 94
+__SYSCALL(__NR_exit_group, sys_exit_group)
+#define __NR_waitid 95
+__SC_COMP(__NR_waitid, sys_waitid, compat_sys_waitid)
+
+/* kernel/fork.c */
+#define __NR_set_tid_address 96
+__SYSCALL(__NR_set_tid_address, sys_set_tid_address)
+#define __NR_unshare 97
+__SYSCALL(__NR_unshare, sys_unshare)
+
+/* kernel/futex.c */
+#define __NR_futex 98
+__SC_COMP(__NR_futex, sys_futex, compat_sys_futex)
+#define __NR_set_robust_list 99
+__SC_COMP(__NR_set_robust_list, sys_set_robust_list, \
+ compat_sys_set_robust_list)
+#define __NR_get_robust_list 100
+__SC_COMP(__NR_get_robust_list, sys_get_robust_list, \
+ compat_sys_get_robust_list)
+
+/* kernel/hrtimer.c */
+#define __NR_nanosleep 101
+__SC_COMP(__NR_nanosleep, sys_nanosleep, compat_sys_nanosleep)
+
+/* kernel/itimer.c */
+#define __NR_getitimer 102
+__SC_COMP(__NR_getitimer, sys_getitimer, compat_sys_getitimer)
+#define __NR_setitimer 103
+__SC_COMP(__NR_setitimer, sys_setitimer, compat_sys_setitimer)
+
+/* kernel/kexec.c */
+#define __NR_kexec_load 104
+__SC_COMP(__NR_kexec_load, sys_kexec_load, compat_sys_kexec_load)
+
+/* kernel/module.c */
+#define __NR_init_module 105
+__SYSCALL(__NR_init_module, sys_init_module)
+#define __NR_delete_module 106
+__SYSCALL(__NR_delete_module, sys_delete_module)
+
+/* kernel/posix-timers.c */
+#define __NR_timer_create 107
+__SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create)
+#define __NR_timer_gettime 108
+__SC_COMP(__NR_timer_gettime, sys_timer_gettime, compat_sys_timer_gettime)
+#define __NR_timer_getoverrun 109
+__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun)
+#define __NR_timer_settime 110
+__SC_COMP(__NR_timer_settime, sys_timer_settime, compat_sys_timer_settime)
+#define __NR_timer_delete 111
+__SYSCALL(__NR_timer_delete, sys_timer_delete)
+#define __NR_clock_settime 112
+__SC_COMP(__NR_clock_settime, sys_clock_settime, compat_sys_clock_settime)
+#define __NR_clock_gettime 113
+__SC_COMP(__NR_clock_gettime, sys_clock_gettime, compat_sys_clock_gettime)
+#define __NR_clock_getres 114
+__SC_COMP(__NR_clock_getres, sys_clock_getres, compat_sys_clock_getres)
+#define __NR_clock_nanosleep 115
+__SC_COMP(__NR_clock_nanosleep, sys_clock_nanosleep, \
+ compat_sys_clock_nanosleep)
+
+/* kernel/printk.c */
+#define __NR_syslog 116
+__SYSCALL(__NR_syslog, sys_syslog)
+
+/* kernel/ptrace.c */
+#define __NR_ptrace 117
+__SYSCALL(__NR_ptrace, sys_ptrace)
+
+/* kernel/sched/core.c */
+#define __NR_sched_setparam 118
+__SYSCALL(__NR_sched_setparam, sys_sched_setparam)
+#define __NR_sched_setscheduler 119
+__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler)
+#define __NR_sched_getscheduler 120
+__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler)
+#define __NR_sched_getparam 121
+__SYSCALL(__NR_sched_getparam, sys_sched_getparam)
+#define __NR_sched_setaffinity 122
+__SC_COMP(__NR_sched_setaffinity, sys_sched_setaffinity, \
+ compat_sys_sched_setaffinity)
+#define __NR_sched_getaffinity 123
+__SC_COMP(__NR_sched_getaffinity, sys_sched_getaffinity, \
+ compat_sys_sched_getaffinity)
+#define __NR_sched_yield 124
+__SYSCALL(__NR_sched_yield, sys_sched_yield)
+#define __NR_sched_get_priority_max 125
+__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max)
+#define __NR_sched_get_priority_min 126
+__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min)
+#define __NR_sched_rr_get_interval 127
+__SC_COMP(__NR_sched_rr_get_interval, sys_sched_rr_get_interval, \
+ compat_sys_sched_rr_get_interval)
+
+/* kernel/signal.c */
+#define __NR_restart_syscall 128
+__SYSCALL(__NR_restart_syscall, sys_restart_syscall)
+#define __NR_kill 129
+__SYSCALL(__NR_kill, sys_kill)
+#define __NR_tkill 130
+__SYSCALL(__NR_tkill, sys_tkill)
+#define __NR_tgkill 131
+__SYSCALL(__NR_tgkill, sys_tgkill)
+#define __NR_sigaltstack 132
+__SC_COMP(__NR_sigaltstack, sys_sigaltstack, compat_sys_sigaltstack)
+#define __NR_rt_sigsuspend 133
+__SC_COMP(__NR_rt_sigsuspend, sys_rt_sigsuspend, compat_sys_rt_sigsuspend)
+#define __NR_rt_sigaction 134
+__SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction)
+#define __NR_rt_sigprocmask 135
+__SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask)
+#define __NR_rt_sigpending 136
+__SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending)
+#define __NR_rt_sigtimedwait 137
+__SC_COMP(__NR_rt_sigtimedwait, sys_rt_sigtimedwait, \
+ compat_sys_rt_sigtimedwait)
+#define __NR_rt_sigqueueinfo 138
+__SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \
+ compat_sys_rt_sigqueueinfo)
+#define __NR_rt_sigreturn 139
+__SC_COMP(__NR_rt_sigreturn, sys_rt_sigreturn, compat_sys_rt_sigreturn)
+
+/* kernel/sys.c */
+#define __NR_setpriority 140
+__SYSCALL(__NR_setpriority, sys_setpriority)
+#define __NR_getpriority 141
+__SYSCALL(__NR_getpriority, sys_getpriority)
+#define __NR_reboot 142
+__SYSCALL(__NR_reboot, sys_reboot)
+#define __NR_setregid 143
+__SYSCALL(__NR_setregid, sys_setregid)
+#define __NR_setgid 144
+__SYSCALL(__NR_setgid, sys_setgid)
+#define __NR_setreuid 145
+__SYSCALL(__NR_setreuid, sys_setreuid)
+#define __NR_setuid 146
+__SYSCALL(__NR_setuid, sys_setuid)
+#define __NR_setresuid 147
+__SYSCALL(__NR_setresuid, sys_setresuid)
+#define __NR_getresuid 148
+__SYSCALL(__NR_getresuid, sys_getresuid)
+#define __NR_setresgid 149
+__SYSCALL(__NR_setresgid, sys_setresgid)
+#define __NR_getresgid 150
+__SYSCALL(__NR_getresgid, sys_getresgid)
+#define __NR_setfsuid 151
+__SYSCALL(__NR_setfsuid, sys_setfsuid)
+#define __NR_setfsgid 152
+__SYSCALL(__NR_setfsgid, sys_setfsgid)
+#define __NR_times 153
+__SC_COMP(__NR_times, sys_times, compat_sys_times)
+#define __NR_setpgid 154
+__SYSCALL(__NR_setpgid, sys_setpgid)
+#define __NR_getpgid 155
+__SYSCALL(__NR_getpgid, sys_getpgid)
+#define __NR_getsid 156
+__SYSCALL(__NR_getsid, sys_getsid)
+#define __NR_setsid 157
+__SYSCALL(__NR_setsid, sys_setsid)
+#define __NR_getgroups 158
+__SYSCALL(__NR_getgroups, sys_getgroups)
+#define __NR_setgroups 159
+__SYSCALL(__NR_setgroups, sys_setgroups)
+#define __NR_uname 160
+__SYSCALL(__NR_uname, sys_newuname)
+#define __NR_sethostname 161
+__SYSCALL(__NR_sethostname, sys_sethostname)
+#define __NR_setdomainname 162
+__SYSCALL(__NR_setdomainname, sys_setdomainname)
+#define __NR_getrlimit 163
+__SC_COMP(__NR_getrlimit, sys_getrlimit, compat_sys_getrlimit)
+#define __NR_setrlimit 164
+__SC_COMP(__NR_setrlimit, sys_setrlimit, compat_sys_setrlimit)
+#define __NR_getrusage 165
+__SC_COMP(__NR_getrusage, sys_getrusage, compat_sys_getrusage)
+#define __NR_umask 166
+__SYSCALL(__NR_umask, sys_umask)
+#define __NR_prctl 167
+__SYSCALL(__NR_prctl, sys_prctl)
+#define __NR_getcpu 168
+__SYSCALL(__NR_getcpu, sys_getcpu)
+
+/* kernel/time.c */
+#define __NR_gettimeofday 169
+__SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday)
+#define __NR_settimeofday 170
+__SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday)
+#define __NR_adjtimex 171
+__SC_COMP(__NR_adjtimex, sys_adjtimex, compat_sys_adjtimex)
+
+/* kernel/timer.c */
+#define __NR_getpid 172
+__SYSCALL(__NR_getpid, sys_getpid)
+#define __NR_getppid 173
+__SYSCALL(__NR_getppid, sys_getppid)
+#define __NR_getuid 174
+__SYSCALL(__NR_getuid, sys_getuid)
+#define __NR_geteuid 175
+__SYSCALL(__NR_geteuid, sys_geteuid)
+#define __NR_getgid 176
+__SYSCALL(__NR_getgid, sys_getgid)
+#define __NR_getegid 177
+__SYSCALL(__NR_getegid, sys_getegid)
+#define __NR_gettid 178
+__SYSCALL(__NR_gettid, sys_gettid)
+#define __NR_sysinfo 179
+__SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo)
+
+/* ipc/mqueue.c */
+#define __NR_mq_open 180
+__SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open)
+#define __NR_mq_unlink 181
+__SYSCALL(__NR_mq_unlink, sys_mq_unlink)
+#define __NR_mq_timedsend 182
+__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, compat_sys_mq_timedsend)
+#define __NR_mq_timedreceive 183
+__SC_COMP(__NR_mq_timedreceive, sys_mq_timedreceive, \
+ compat_sys_mq_timedreceive)
+#define __NR_mq_notify 184
+__SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify)
+#define __NR_mq_getsetattr 185
+__SC_COMP(__NR_mq_getsetattr, sys_mq_getsetattr, compat_sys_mq_getsetattr)
+
+/* ipc/msg.c */
+#define __NR_msgget 186
+__SYSCALL(__NR_msgget, sys_msgget)
+#define __NR_msgctl 187
+__SC_COMP(__NR_msgctl, sys_msgctl, compat_sys_msgctl)
+#define __NR_msgrcv 188
+__SC_COMP(__NR_msgrcv, sys_msgrcv, compat_sys_msgrcv)
+#define __NR_msgsnd 189
+__SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd)
+
+/* ipc/sem.c */
+#define __NR_semget 190
+__SYSCALL(__NR_semget, sys_semget)
+#define __NR_semctl 191
+__SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl)
+#define __NR_semtimedop 192
+__SC_COMP(__NR_semtimedop, sys_semtimedop, compat_sys_semtimedop)
+#define __NR_semop 193
+__SYSCALL(__NR_semop, sys_semop)
+
+/* ipc/shm.c */
+#define __NR_shmget 194
+__SYSCALL(__NR_shmget, sys_shmget)
+#define __NR_shmctl 195
+__SC_COMP(__NR_shmctl, sys_shmctl, compat_sys_shmctl)
+#define __NR_shmat 196
+__SC_COMP(__NR_shmat, sys_shmat, compat_sys_shmat)
+#define __NR_shmdt 197
+__SYSCALL(__NR_shmdt, sys_shmdt)
+
+/* net/socket.c */
+#define __NR_socket 198
+__SYSCALL(__NR_socket, sys_socket)
+#define __NR_socketpair 199
+__SYSCALL(__NR_socketpair, sys_socketpair)
+#define __NR_bind 200
+__SYSCALL(__NR_bind, sys_bind)
+#define __NR_listen 201
+__SYSCALL(__NR_listen, sys_listen)
+#define __NR_accept 202
+__SYSCALL(__NR_accept, sys_accept)
+#define __NR_connect 203
+__SYSCALL(__NR_connect, sys_connect)
+#define __NR_getsockname 204
+__SYSCALL(__NR_getsockname, sys_getsockname)
+#define __NR_getpeername 205
+__SYSCALL(__NR_getpeername, sys_getpeername)
+#define __NR_sendto 206
+__SYSCALL(__NR_sendto, sys_sendto)
+#define __NR_recvfrom 207
+__SC_COMP(__NR_recvfrom, sys_recvfrom, compat_sys_recvfrom)
+#define __NR_setsockopt 208
+__SC_COMP(__NR_setsockopt, sys_setsockopt, compat_sys_setsockopt)
+#define __NR_getsockopt 209
+__SC_COMP(__NR_getsockopt, sys_getsockopt, compat_sys_getsockopt)
+#define __NR_shutdown 210
+__SYSCALL(__NR_shutdown, sys_shutdown)
+#define __NR_sendmsg 211
+__SC_COMP(__NR_sendmsg, sys_sendmsg, compat_sys_sendmsg)
+#define __NR_recvmsg 212
+__SC_COMP(__NR_recvmsg, sys_recvmsg, compat_sys_recvmsg)
+
+/* mm/filemap.c */
+#define __NR_readahead 213
+__SC_COMP(__NR_readahead, sys_readahead, compat_sys_readahead)
+
+/* mm/nommu.c, also with MMU */
+#define __NR_brk 214
+__SYSCALL(__NR_brk, sys_brk)
+#define __NR_munmap 215
+__SYSCALL(__NR_munmap, sys_munmap)
+#define __NR_mremap 216
+__SYSCALL(__NR_mremap, sys_mremap)
+
+/* security/keys/keyctl.c */
+#define __NR_add_key 217
+__SYSCALL(__NR_add_key, sys_add_key)
+#define __NR_request_key 218
+__SYSCALL(__NR_request_key, sys_request_key)
+#define __NR_keyctl 219
+__SC_COMP(__NR_keyctl, sys_keyctl, compat_sys_keyctl)
+
+/* arch/example/kernel/sys_example.c */
+#define __NR_clone 220
+__SYSCALL(__NR_clone, sys_clone)
+#define __NR_execve 221
+__SC_COMP(__NR_execve, sys_execve, compat_sys_execve)
+
+#define __NR3264_mmap 222
+__SC_3264(__NR3264_mmap, sys_mmap2, sys_mmap)
+/* mm/fadvise.c */
+#define __NR3264_fadvise64 223
+__SC_COMP(__NR3264_fadvise64, sys_fadvise64_64, compat_sys_fadvise64_64)
+
+/* mm/, CONFIG_MMU only */
+#ifndef __ARCH_NOMMU
+#define __NR_swapon 224
+__SYSCALL(__NR_swapon, sys_swapon)
+#define __NR_swapoff 225
+__SYSCALL(__NR_swapoff, sys_swapoff)
+#define __NR_mprotect 226
+__SYSCALL(__NR_mprotect, sys_mprotect)
+#define __NR_msync 227
+__SYSCALL(__NR_msync, sys_msync)
+#define __NR_mlock 228
+__SYSCALL(__NR_mlock, sys_mlock)
+#define __NR_munlock 229
+__SYSCALL(__NR_munlock, sys_munlock)
+#define __NR_mlockall 230
+__SYSCALL(__NR_mlockall, sys_mlockall)
+#define __NR_munlockall 231
+__SYSCALL(__NR_munlockall, sys_munlockall)
+#define __NR_mincore 232
+__SYSCALL(__NR_mincore, sys_mincore)
+#define __NR_madvise 233
+__SYSCALL(__NR_madvise, sys_madvise)
+#define __NR_remap_file_pages 234
+__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages)
+#define __NR_mbind 235
+__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind)
+#define __NR_get_mempolicy 236
+__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy)
+#define __NR_set_mempolicy 237
+__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy)
+#define __NR_migrate_pages 238
+__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages)
+#define __NR_move_pages 239
+__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages)
+#endif
+
+#define __NR_rt_tgsigqueueinfo 240
+__SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \
+ compat_sys_rt_tgsigqueueinfo)
+#define __NR_perf_event_open 241
+__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
+#define __NR_accept4 242
+__SYSCALL(__NR_accept4, sys_accept4)
+#define __NR_recvmmsg 243
+__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg)
+
+/*
+ * Architectures may provide up to 16 syscalls of their own
+ * starting with this value.
+ */
+#define __NR_arch_specific_syscall 244
+
+#define __NR_wait4 260
+__SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4)
+#define __NR_prlimit64 261
+__SYSCALL(__NR_prlimit64, sys_prlimit64)
+#define __NR_fanotify_init 262
+__SYSCALL(__NR_fanotify_init, sys_fanotify_init)
+#define __NR_fanotify_mark 263
+__SYSCALL(__NR_fanotify_mark, sys_fanotify_mark)
+#define __NR_name_to_handle_at 264
+__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at)
+#define __NR_open_by_handle_at 265
+__SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \
+ compat_sys_open_by_handle_at)
+#define __NR_clock_adjtime 266
+__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime)
+#define __NR_syncfs 267
+__SYSCALL(__NR_syncfs, sys_syncfs)
+#define __NR_setns 268
+__SYSCALL(__NR_setns, sys_setns)
+#define __NR_sendmmsg 269
+__SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
+#define __NR_process_vm_readv 270
+__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
+ compat_sys_process_vm_readv)
+#define __NR_process_vm_writev 271
+__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
+ compat_sys_process_vm_writev)
+#define __NR_kcmp 272
+__SYSCALL(__NR_kcmp, sys_kcmp)
+#define __NR_finit_module 273
+__SYSCALL(__NR_finit_module, sys_finit_module)
+#define __NR_sched_setattr 274
+__SYSCALL(__NR_sched_setattr, sys_sched_setattr)
+#define __NR_sched_getattr 275
+__SYSCALL(__NR_sched_getattr, sys_sched_getattr)
+#define __NR_renameat2 276
+__SYSCALL(__NR_renameat2, sys_renameat2)
+#define __NR_seccomp 277
+__SYSCALL(__NR_seccomp, sys_seccomp)
+#define __NR_getrandom 278
+__SYSCALL(__NR_getrandom, sys_getrandom)
+#define __NR_memfd_create 279
+__SYSCALL(__NR_memfd_create, sys_memfd_create)
+#define __NR_bpf 280
+__SYSCALL(__NR_bpf, sys_bpf)
+#define __NR_execveat 281
+__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
+#define __NR_userfaultfd 282
+__SYSCALL(__NR_userfaultfd, sys_userfaultfd)
+#define __NR_membarrier 283
+__SYSCALL(__NR_membarrier, sys_membarrier)
+#define __NR_mlock2 284
+__SYSCALL(__NR_mlock2, sys_mlock2)
+#define __NR_copy_file_range 285
+__SYSCALL(__NR_copy_file_range, sys_copy_file_range)
+#define __NR_preadv2 286
+__SC_COMP(__NR_preadv2, sys_preadv2, compat_sys_preadv2)
+#define __NR_pwritev2 287
+__SC_COMP(__NR_pwritev2, sys_pwritev2, compat_sys_pwritev2)
+#define __NR_pkey_mprotect 288
+__SYSCALL(__NR_pkey_mprotect, sys_pkey_mprotect)
+#define __NR_pkey_alloc 289
+__SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
+#define __NR_pkey_free 290
+__SYSCALL(__NR_pkey_free, sys_pkey_free)
+#define __NR_statx 291
+__SYSCALL(__NR_statx, sys_statx)
+#define __NR_io_pgetevents 292
+__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
+#define __NR_rseq 293
+__SYSCALL(__NR_rseq, sys_rseq)
+
+#undef __NR_syscalls
+#define __NR_syscalls 294
+
+/*
+ * 32 bit systems traditionally used different
+ * syscalls for off_t and loff_t arguments, while
+ * 64 bit systems only need the off_t version.
+ * For new 32 bit platforms, there is no need to
+ * implement the old 32 bit off_t syscalls, so
+ * they take different names.
+ * Here we map the numbers so that both versions
+ * use the same syscall table layout.
+ */
+#if __BITS_PER_LONG == 64 && !defined(__SYSCALL_COMPAT)
+#define __NR_fcntl __NR3264_fcntl
+#define __NR_statfs __NR3264_statfs
+#define __NR_fstatfs __NR3264_fstatfs
+#define __NR_truncate __NR3264_truncate
+#define __NR_ftruncate __NR3264_ftruncate
+#define __NR_lseek __NR3264_lseek
+#define __NR_sendfile __NR3264_sendfile
+#define __NR_newfstatat __NR3264_fstatat
+#define __NR_fstat __NR3264_fstat
+#define __NR_mmap __NR3264_mmap
+#define __NR_fadvise64 __NR3264_fadvise64
+#ifdef __NR3264_stat
+#define __NR_stat __NR3264_stat
+#define __NR_lstat __NR3264_lstat
+#endif
+#else
+#define __NR_fcntl64 __NR3264_fcntl
+#define __NR_statfs64 __NR3264_statfs
+#define __NR_fstatfs64 __NR3264_fstatfs
+#define __NR_truncate64 __NR3264_truncate
+#define __NR_ftruncate64 __NR3264_ftruncate
+#define __NR_llseek __NR3264_lseek
+#define __NR_sendfile64 __NR3264_sendfile
+#define __NR_fstatat64 __NR3264_fstatat
+#define __NR_fstat64 __NR3264_fstat
+#define __NR_mmap2 __NR3264_mmap
+#define __NR_fadvise64_64 __NR3264_fadvise64
+#ifdef __NR3264_stat
+#define __NR_stat64 __NR3264_stat
+#define __NR_lstat64 __NR3264_lstat
+#endif
+#endif
diff --git a/tools/include/uapi/asm/bitsperlong.h b/tools/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000..edba4d93e
--- /dev/null
+++ b/tools/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../../arch/x86/include/uapi/asm/bitsperlong.h"
+#elif defined(__aarch64__)
+#include "../../../arch/arm64/include/uapi/asm/bitsperlong.h"
+#elif defined(__powerpc__)
+#include "../../../arch/powerpc/include/uapi/asm/bitsperlong.h"
+#elif defined(__s390__)
+#include "../../../arch/s390/include/uapi/asm/bitsperlong.h"
+#elif defined(__sparc__)
+#include "../../../arch/sparc/include/uapi/asm/bitsperlong.h"
+#elif defined(__mips__)
+#include "../../../arch/mips/include/uapi/asm/bitsperlong.h"
+#elif defined(__ia64__)
+#include "../../../arch/ia64/include/uapi/asm/bitsperlong.h"
+#elif defined(__riscv)
+#include "../../../arch/riscv/include/uapi/asm/bitsperlong.h"
+#elif defined(__alpha__)
+#include "../../../arch/alpha/include/uapi/asm/bitsperlong.h"
+#else
+#include <asm-generic/bitsperlong.h>
+#endif
diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h
new file mode 100644
index 000000000..13a58531e
--- /dev/null
+++ b/tools/include/uapi/asm/bpf_perf_event.h
@@ -0,0 +1,7 @@
+#if defined(__aarch64__)
+#include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h"
+#elif defined(__s390__)
+#include "../../arch/s390/include/uapi/asm/bpf_perf_event.h"
+#else
+#include <uapi/asm-generic/bpf_perf_event.h>
+#endif
diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h
new file mode 100644
index 000000000..ce3c5945a
--- /dev/null
+++ b/tools/include/uapi/asm/errno.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/uapi/asm/errno.h"
+#elif defined(__powerpc__)
+#include "../../arch/powerpc/include/uapi/asm/errno.h"
+#elif defined(__sparc__)
+#include "../../arch/sparc/include/uapi/asm/errno.h"
+#elif defined(__alpha__)
+#include "../../arch/alpha/include/uapi/asm/errno.h"
+#elif defined(__mips__)
+#include "../../arch/mips/include/uapi/asm/errno.h"
+#elif defined(__ia64__)
+#include "../../arch/ia64/include/uapi/asm/errno.h"
+#elif defined(__xtensa__)
+#include "../../arch/xtensa/include/uapi/asm/errno.h"
+#else
+#include <asm-generic/errno.h>
+#endif
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
new file mode 100644
index 000000000..300f33663
--- /dev/null
+++ b/tools/include/uapi/drm/drm.h
@@ -0,0 +1,1012 @@
+/**
+ * \file drm.h
+ * Header for the Direct Rendering Manager
+ *
+ * \author Rickard E. (Rik) Faith <faith@valinux.com>
+ *
+ * \par Acknowledgments:
+ * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic \c cmpxchg.
+ */
+
+/*
+ * Copyright 1999 Precision Insight, Inc., Cedar Park, Texas.
+ * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
+ * All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DRM_H_
+#define _DRM_H_
+
+#if defined(__KERNEL__)
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+typedef unsigned int drm_handle_t;
+
+#elif defined(__linux__)
+
+#include <linux/types.h>
+#include <asm/ioctl.h>
+typedef unsigned int drm_handle_t;
+
+#else /* One of the BSDs */
+
+#include <sys/ioccom.h>
+#include <sys/types.h>
+typedef int8_t __s8;
+typedef uint8_t __u8;
+typedef int16_t __s16;
+typedef uint16_t __u16;
+typedef int32_t __s32;
+typedef uint32_t __u32;
+typedef int64_t __s64;
+typedef uint64_t __u64;
+typedef size_t __kernel_size_t;
+typedef unsigned long drm_handle_t;
+
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_NAME "drm" /**< Name in kernel, /dev, and /proc */
+#define DRM_MIN_ORDER 5 /**< At least 2^5 bytes = 32 bytes */
+#define DRM_MAX_ORDER 22 /**< Up to 2^22 bytes = 4MB */
+#define DRM_RAM_PERCENT 10 /**< How much system ram can we lock? */
+
+#define _DRM_LOCK_HELD 0x80000000U /**< Hardware lock is held */
+#define _DRM_LOCK_CONT 0x40000000U /**< Hardware lock is contended */
+#define _DRM_LOCK_IS_HELD(lock) ((lock) & _DRM_LOCK_HELD)
+#define _DRM_LOCK_IS_CONT(lock) ((lock) & _DRM_LOCK_CONT)
+#define _DRM_LOCKING_CONTEXT(lock) ((lock) & ~(_DRM_LOCK_HELD|_DRM_LOCK_CONT))
+
+typedef unsigned int drm_context_t;
+typedef unsigned int drm_drawable_t;
+typedef unsigned int drm_magic_t;
+
+/**
+ * Cliprect.
+ *
+ * \warning: If you change this structure, make sure you change
+ * XF86DRIClipRectRec in the server as well
+ *
+ * \note KW: Actually it's illegal to change either for
+ * backwards-compatibility reasons.
+ */
+struct drm_clip_rect {
+ unsigned short x1;
+ unsigned short y1;
+ unsigned short x2;
+ unsigned short y2;
+};
+
+/**
+ * Drawable information.
+ */
+struct drm_drawable_info {
+ unsigned int num_rects;
+ struct drm_clip_rect *rects;
+};
+
+/**
+ * Texture region,
+ */
+struct drm_tex_region {
+ unsigned char next;
+ unsigned char prev;
+ unsigned char in_use;
+ unsigned char padding;
+ unsigned int age;
+};
+
+/**
+ * Hardware lock.
+ *
+ * The lock structure is a simple cache-line aligned integer. To avoid
+ * processor bus contention on a multiprocessor system, there should not be any
+ * other data stored in the same cache line.
+ */
+struct drm_hw_lock {
+ __volatile__ unsigned int lock; /**< lock variable */
+ char padding[60]; /**< Pad to cache line */
+};
+
+/**
+ * DRM_IOCTL_VERSION ioctl argument type.
+ *
+ * \sa drmGetVersion().
+ */
+struct drm_version {
+ int version_major; /**< Major version */
+ int version_minor; /**< Minor version */
+ int version_patchlevel; /**< Patch level */
+ __kernel_size_t name_len; /**< Length of name buffer */
+ char __user *name; /**< Name of driver */
+ __kernel_size_t date_len; /**< Length of date buffer */
+ char __user *date; /**< User-space buffer to hold date */
+ __kernel_size_t desc_len; /**< Length of desc buffer */
+ char __user *desc; /**< User-space buffer to hold desc */
+};
+
+/**
+ * DRM_IOCTL_GET_UNIQUE ioctl argument type.
+ *
+ * \sa drmGetBusid() and drmSetBusId().
+ */
+struct drm_unique {
+ __kernel_size_t unique_len; /**< Length of unique */
+ char __user *unique; /**< Unique name for driver instantiation */
+};
+
+struct drm_list {
+ int count; /**< Length of user-space structures */
+ struct drm_version __user *version;
+};
+
+struct drm_block {
+ int unused;
+};
+
+/**
+ * DRM_IOCTL_CONTROL ioctl argument type.
+ *
+ * \sa drmCtlInstHandler() and drmCtlUninstHandler().
+ */
+struct drm_control {
+ enum {
+ DRM_ADD_COMMAND,
+ DRM_RM_COMMAND,
+ DRM_INST_HANDLER,
+ DRM_UNINST_HANDLER
+ } func;
+ int irq;
+};
+
+/**
+ * Type of memory to map.
+ */
+enum drm_map_type {
+ _DRM_FRAME_BUFFER = 0, /**< WC (no caching), no core dump */
+ _DRM_REGISTERS = 1, /**< no caching, no core dump */
+ _DRM_SHM = 2, /**< shared, cached */
+ _DRM_AGP = 3, /**< AGP/GART */
+ _DRM_SCATTER_GATHER = 4, /**< Scatter/gather memory for PCI DMA */
+ _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */
+};
+
+/**
+ * Memory mapping flags.
+ */
+enum drm_map_flags {
+ _DRM_RESTRICTED = 0x01, /**< Cannot be mapped to user-virtual */
+ _DRM_READ_ONLY = 0x02,
+ _DRM_LOCKED = 0x04, /**< shared, cached, locked */
+ _DRM_KERNEL = 0x08, /**< kernel requires access */
+ _DRM_WRITE_COMBINING = 0x10, /**< use write-combining if available */
+ _DRM_CONTAINS_LOCK = 0x20, /**< SHM page that contains lock */
+ _DRM_REMOVABLE = 0x40, /**< Removable mapping */
+ _DRM_DRIVER = 0x80 /**< Managed by driver */
+};
+
+struct drm_ctx_priv_map {
+ unsigned int ctx_id; /**< Context requesting private mapping */
+ void *handle; /**< Handle of map */
+};
+
+/**
+ * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls
+ * argument type.
+ *
+ * \sa drmAddMap().
+ */
+struct drm_map {
+ unsigned long offset; /**< Requested physical address (0 for SAREA)*/
+ unsigned long size; /**< Requested physical size (bytes) */
+ enum drm_map_type type; /**< Type of memory to map */
+ enum drm_map_flags flags; /**< Flags */
+ void *handle; /**< User-space: "Handle" to pass to mmap() */
+ /**< Kernel-space: kernel-virtual address */
+ int mtrr; /**< MTRR slot used */
+ /* Private data */
+};
+
+/**
+ * DRM_IOCTL_GET_CLIENT ioctl argument type.
+ */
+struct drm_client {
+ int idx; /**< Which client desired? */
+ int auth; /**< Is client authenticated? */
+ unsigned long pid; /**< Process ID */
+ unsigned long uid; /**< User ID */
+ unsigned long magic; /**< Magic */
+ unsigned long iocs; /**< Ioctl count */
+};
+
+enum drm_stat_type {
+ _DRM_STAT_LOCK,
+ _DRM_STAT_OPENS,
+ _DRM_STAT_CLOSES,
+ _DRM_STAT_IOCTLS,
+ _DRM_STAT_LOCKS,
+ _DRM_STAT_UNLOCKS,
+ _DRM_STAT_VALUE, /**< Generic value */
+ _DRM_STAT_BYTE, /**< Generic byte counter (1024bytes/K) */
+ _DRM_STAT_COUNT, /**< Generic non-byte counter (1000/k) */
+
+ _DRM_STAT_IRQ, /**< IRQ */
+ _DRM_STAT_PRIMARY, /**< Primary DMA bytes */
+ _DRM_STAT_SECONDARY, /**< Secondary DMA bytes */
+ _DRM_STAT_DMA, /**< DMA */
+ _DRM_STAT_SPECIAL, /**< Special DMA (e.g., priority or polled) */
+ _DRM_STAT_MISSED /**< Missed DMA opportunity */
+ /* Add to the *END* of the list */
+};
+
+/**
+ * DRM_IOCTL_GET_STATS ioctl argument type.
+ */
+struct drm_stats {
+ unsigned long count;
+ struct {
+ unsigned long value;
+ enum drm_stat_type type;
+ } data[15];
+};
+
+/**
+ * Hardware locking flags.
+ */
+enum drm_lock_flags {
+ _DRM_LOCK_READY = 0x01, /**< Wait until hardware is ready for DMA */
+ _DRM_LOCK_QUIESCENT = 0x02, /**< Wait until hardware quiescent */
+ _DRM_LOCK_FLUSH = 0x04, /**< Flush this context's DMA queue first */
+ _DRM_LOCK_FLUSH_ALL = 0x08, /**< Flush all DMA queues first */
+ /* These *HALT* flags aren't supported yet
+ -- they will be used to support the
+ full-screen DGA-like mode. */
+ _DRM_HALT_ALL_QUEUES = 0x10, /**< Halt all current and future queues */
+ _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */
+};
+
+/**
+ * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type.
+ *
+ * \sa drmGetLock() and drmUnlock().
+ */
+struct drm_lock {
+ int context;
+ enum drm_lock_flags flags;
+};
+
+/**
+ * DMA flags
+ *
+ * \warning
+ * These values \e must match xf86drm.h.
+ *
+ * \sa drm_dma.
+ */
+enum drm_dma_flags {
+ /* Flags for DMA buffer dispatch */
+ _DRM_DMA_BLOCK = 0x01, /**<
+ * Block until buffer dispatched.
+ *
+ * \note The buffer may not yet have
+ * been processed by the hardware --
+ * getting a hardware lock with the
+ * hardware quiescent will ensure
+ * that the buffer has been
+ * processed.
+ */
+ _DRM_DMA_WHILE_LOCKED = 0x02, /**< Dispatch while lock held */
+ _DRM_DMA_PRIORITY = 0x04, /**< High priority dispatch */
+
+ /* Flags for DMA buffer request */
+ _DRM_DMA_WAIT = 0x10, /**< Wait for free buffers */
+ _DRM_DMA_SMALLER_OK = 0x20, /**< Smaller-than-requested buffers OK */
+ _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */
+};
+
+/**
+ * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type.
+ *
+ * \sa drmAddBufs().
+ */
+struct drm_buf_desc {
+ int count; /**< Number of buffers of this size */
+ int size; /**< Size in bytes */
+ int low_mark; /**< Low water mark */
+ int high_mark; /**< High water mark */
+ enum {
+ _DRM_PAGE_ALIGN = 0x01, /**< Align on page boundaries for DMA */
+ _DRM_AGP_BUFFER = 0x02, /**< Buffer is in AGP space */
+ _DRM_SG_BUFFER = 0x04, /**< Scatter/gather memory buffer */
+ _DRM_FB_BUFFER = 0x08, /**< Buffer is in frame buffer */
+ _DRM_PCI_BUFFER_RO = 0x10 /**< Map PCI DMA buffer read-only */
+ } flags;
+ unsigned long agp_start; /**<
+ * Start address of where the AGP buffers are
+ * in the AGP aperture
+ */
+};
+
+/**
+ * DRM_IOCTL_INFO_BUFS ioctl argument type.
+ */
+struct drm_buf_info {
+ int count; /**< Entries in list */
+ struct drm_buf_desc __user *list;
+};
+
+/**
+ * DRM_IOCTL_FREE_BUFS ioctl argument type.
+ */
+struct drm_buf_free {
+ int count;
+ int __user *list;
+};
+
+/**
+ * Buffer information
+ *
+ * \sa drm_buf_map.
+ */
+struct drm_buf_pub {
+ int idx; /**< Index into the master buffer list */
+ int total; /**< Buffer size */
+ int used; /**< Amount of buffer in use (for DMA) */
+ void __user *address; /**< Address of buffer */
+};
+
+/**
+ * DRM_IOCTL_MAP_BUFS ioctl argument type.
+ */
+struct drm_buf_map {
+ int count; /**< Length of the buffer list */
+#ifdef __cplusplus
+ void __user *virt;
+#else
+ void __user *virtual; /**< Mmap'd area in user-virtual */
+#endif
+ struct drm_buf_pub __user *list; /**< Buffer information */
+};
+
+/**
+ * DRM_IOCTL_DMA ioctl argument type.
+ *
+ * Indices here refer to the offset into the buffer list in drm_buf_get.
+ *
+ * \sa drmDMA().
+ */
+struct drm_dma {
+ int context; /**< Context handle */
+ int send_count; /**< Number of buffers to send */
+ int __user *send_indices; /**< List of handles to buffers */
+ int __user *send_sizes; /**< Lengths of data to send */
+ enum drm_dma_flags flags; /**< Flags */
+ int request_count; /**< Number of buffers requested */
+ int request_size; /**< Desired size for buffers */
+ int __user *request_indices; /**< Buffer information */
+ int __user *request_sizes;
+ int granted_count; /**< Number of buffers granted */
+};
+
+enum drm_ctx_flags {
+ _DRM_CONTEXT_PRESERVED = 0x01,
+ _DRM_CONTEXT_2DONLY = 0x02
+};
+
+/**
+ * DRM_IOCTL_ADD_CTX ioctl argument type.
+ *
+ * \sa drmCreateContext() and drmDestroyContext().
+ */
+struct drm_ctx {
+ drm_context_t handle;
+ enum drm_ctx_flags flags;
+};
+
+/**
+ * DRM_IOCTL_RES_CTX ioctl argument type.
+ */
+struct drm_ctx_res {
+ int count;
+ struct drm_ctx __user *contexts;
+};
+
+/**
+ * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type.
+ */
+struct drm_draw {
+ drm_drawable_t handle;
+};
+
+/**
+ * DRM_IOCTL_UPDATE_DRAW ioctl argument type.
+ */
+typedef enum {
+ DRM_DRAWABLE_CLIPRECTS
+} drm_drawable_info_type_t;
+
+struct drm_update_draw {
+ drm_drawable_t handle;
+ unsigned int type;
+ unsigned int num;
+ unsigned long long data;
+};
+
+/**
+ * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type.
+ */
+struct drm_auth {
+ drm_magic_t magic;
+};
+
+/**
+ * DRM_IOCTL_IRQ_BUSID ioctl argument type.
+ *
+ * \sa drmGetInterruptFromBusID().
+ */
+struct drm_irq_busid {
+ int irq; /**< IRQ number */
+ int busnum; /**< bus number */
+ int devnum; /**< device number */
+ int funcnum; /**< function number */
+};
+
+enum drm_vblank_seq_type {
+ _DRM_VBLANK_ABSOLUTE = 0x0, /**< Wait for specific vblank sequence number */
+ _DRM_VBLANK_RELATIVE = 0x1, /**< Wait for given number of vblanks */
+ /* bits 1-6 are reserved for high crtcs */
+ _DRM_VBLANK_HIGH_CRTC_MASK = 0x0000003e,
+ _DRM_VBLANK_EVENT = 0x4000000, /**< Send event instead of blocking */
+ _DRM_VBLANK_FLIP = 0x8000000, /**< Scheduled buffer swap should flip */
+ _DRM_VBLANK_NEXTONMISS = 0x10000000, /**< If missed, wait for next vblank */
+ _DRM_VBLANK_SECONDARY = 0x20000000, /**< Secondary display controller */
+ _DRM_VBLANK_SIGNAL = 0x40000000 /**< Send signal instead of blocking, unsupported */
+};
+#define _DRM_VBLANK_HIGH_CRTC_SHIFT 1
+
+#define _DRM_VBLANK_TYPES_MASK (_DRM_VBLANK_ABSOLUTE | _DRM_VBLANK_RELATIVE)
+#define _DRM_VBLANK_FLAGS_MASK (_DRM_VBLANK_EVENT | _DRM_VBLANK_SIGNAL | \
+ _DRM_VBLANK_SECONDARY | _DRM_VBLANK_NEXTONMISS)
+
+struct drm_wait_vblank_request {
+ enum drm_vblank_seq_type type;
+ unsigned int sequence;
+ unsigned long signal;
+};
+
+struct drm_wait_vblank_reply {
+ enum drm_vblank_seq_type type;
+ unsigned int sequence;
+ long tval_sec;
+ long tval_usec;
+};
+
+/**
+ * DRM_IOCTL_WAIT_VBLANK ioctl argument type.
+ *
+ * \sa drmWaitVBlank().
+ */
+union drm_wait_vblank {
+ struct drm_wait_vblank_request request;
+ struct drm_wait_vblank_reply reply;
+};
+
+#define _DRM_PRE_MODESET 1
+#define _DRM_POST_MODESET 2
+
+/**
+ * DRM_IOCTL_MODESET_CTL ioctl argument type
+ *
+ * \sa drmModesetCtl().
+ */
+struct drm_modeset_ctl {
+ __u32 crtc;
+ __u32 cmd;
+};
+
+/**
+ * DRM_IOCTL_AGP_ENABLE ioctl argument type.
+ *
+ * \sa drmAgpEnable().
+ */
+struct drm_agp_mode {
+ unsigned long mode; /**< AGP mode */
+};
+
+/**
+ * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type.
+ *
+ * \sa drmAgpAlloc() and drmAgpFree().
+ */
+struct drm_agp_buffer {
+ unsigned long size; /**< In bytes -- will round to page boundary */
+ unsigned long handle; /**< Used for binding / unbinding */
+ unsigned long type; /**< Type of memory to allocate */
+ unsigned long physical; /**< Physical used by i810 */
+};
+
+/**
+ * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type.
+ *
+ * \sa drmAgpBind() and drmAgpUnbind().
+ */
+struct drm_agp_binding {
+ unsigned long handle; /**< From drm_agp_buffer */
+ unsigned long offset; /**< In bytes -- will round to page boundary */
+};
+
+/**
+ * DRM_IOCTL_AGP_INFO ioctl argument type.
+ *
+ * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(),
+ * drmAgpBase(), drmAgpSize(), drmAgpMemoryUsed(), drmAgpMemoryAvail(),
+ * drmAgpVendorId() and drmAgpDeviceId().
+ */
+struct drm_agp_info {
+ int agp_version_major;
+ int agp_version_minor;
+ unsigned long mode;
+ unsigned long aperture_base; /* physical address */
+ unsigned long aperture_size; /* bytes */
+ unsigned long memory_allowed; /* bytes */
+ unsigned long memory_used;
+
+ /* PCI information */
+ unsigned short id_vendor;
+ unsigned short id_device;
+};
+
+/**
+ * DRM_IOCTL_SG_ALLOC ioctl argument type.
+ */
+struct drm_scatter_gather {
+ unsigned long size; /**< In bytes -- will round to page boundary */
+ unsigned long handle; /**< Used for mapping / unmapping */
+};
+
+/**
+ * DRM_IOCTL_SET_VERSION ioctl argument type.
+ */
+struct drm_set_version {
+ int drm_di_major;
+ int drm_di_minor;
+ int drm_dd_major;
+ int drm_dd_minor;
+};
+
+/** DRM_IOCTL_GEM_CLOSE ioctl argument type */
+struct drm_gem_close {
+ /** Handle of the object to be closed. */
+ __u32 handle;
+ __u32 pad;
+};
+
+/** DRM_IOCTL_GEM_FLINK ioctl argument type */
+struct drm_gem_flink {
+ /** Handle for the object being named */
+ __u32 handle;
+
+ /** Returned global name */
+ __u32 name;
+};
+
+/** DRM_IOCTL_GEM_OPEN ioctl argument type */
+struct drm_gem_open {
+ /** Name of object being opened */
+ __u32 name;
+
+ /** Returned handle for the object */
+ __u32 handle;
+
+ /** Returned size of the object */
+ __u64 size;
+};
+
+#define DRM_CAP_DUMB_BUFFER 0x1
+#define DRM_CAP_VBLANK_HIGH_CRTC 0x2
+#define DRM_CAP_DUMB_PREFERRED_DEPTH 0x3
+#define DRM_CAP_DUMB_PREFER_SHADOW 0x4
+#define DRM_CAP_PRIME 0x5
+#define DRM_PRIME_CAP_IMPORT 0x1
+#define DRM_PRIME_CAP_EXPORT 0x2
+#define DRM_CAP_TIMESTAMP_MONOTONIC 0x6
+#define DRM_CAP_ASYNC_PAGE_FLIP 0x7
+/*
+ * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight
+ * combination for the hardware cursor. The intention is that a hardware
+ * agnostic userspace can query a cursor plane size to use.
+ *
+ * Note that the cross-driver contract is to merely return a valid size;
+ * drivers are free to attach another meaning on top, eg. i915 returns the
+ * maximum plane size.
+ */
+#define DRM_CAP_CURSOR_WIDTH 0x8
+#define DRM_CAP_CURSOR_HEIGHT 0x9
+#define DRM_CAP_ADDFB2_MODIFIERS 0x10
+#define DRM_CAP_PAGE_FLIP_TARGET 0x11
+#define DRM_CAP_CRTC_IN_VBLANK_EVENT 0x12
+#define DRM_CAP_SYNCOBJ 0x13
+
+/** DRM_IOCTL_GET_CAP ioctl argument type */
+struct drm_get_cap {
+ __u64 capability;
+ __u64 value;
+};
+
+/**
+ * DRM_CLIENT_CAP_STEREO_3D
+ *
+ * if set to 1, the DRM core will expose the stereo 3D capabilities of the
+ * monitor by advertising the supported 3D layouts in the flags of struct
+ * drm_mode_modeinfo.
+ */
+#define DRM_CLIENT_CAP_STEREO_3D 1
+
+/**
+ * DRM_CLIENT_CAP_UNIVERSAL_PLANES
+ *
+ * If set to 1, the DRM core will expose all planes (overlay, primary, and
+ * cursor) to userspace.
+ */
+#define DRM_CLIENT_CAP_UNIVERSAL_PLANES 2
+
+/**
+ * DRM_CLIENT_CAP_ATOMIC
+ *
+ * If set to 1, the DRM core will expose atomic properties to userspace
+ */
+#define DRM_CLIENT_CAP_ATOMIC 3
+
+/**
+ * DRM_CLIENT_CAP_ASPECT_RATIO
+ *
+ * If set to 1, the DRM core will provide aspect ratio information in modes.
+ */
+#define DRM_CLIENT_CAP_ASPECT_RATIO 4
+
+/**
+ * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS
+ *
+ * If set to 1, the DRM core will expose special connectors to be used for
+ * writing back to memory the scene setup in the commit. Depends on client
+ * also supporting DRM_CLIENT_CAP_ATOMIC
+ */
+#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5
+
+/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
+struct drm_set_client_cap {
+ __u64 capability;
+ __u64 value;
+};
+
+#define DRM_RDWR O_RDWR
+#define DRM_CLOEXEC O_CLOEXEC
+struct drm_prime_handle {
+ __u32 handle;
+
+ /** Flags.. only applicable for handle->fd */
+ __u32 flags;
+
+ /** Returned dmabuf file descriptor */
+ __s32 fd;
+};
+
+struct drm_syncobj_create {
+ __u32 handle;
+#define DRM_SYNCOBJ_CREATE_SIGNALED (1 << 0)
+ __u32 flags;
+};
+
+struct drm_syncobj_destroy {
+ __u32 handle;
+ __u32 pad;
+};
+
+#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0)
+#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0)
+struct drm_syncobj_handle {
+ __u32 handle;
+ __u32 flags;
+
+ __s32 fd;
+ __u32 pad;
+};
+
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL (1 << 0)
+#define DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT (1 << 1)
+struct drm_syncobj_wait {
+ __u64 handles;
+ /* absolute timeout */
+ __s64 timeout_nsec;
+ __u32 count_handles;
+ __u32 flags;
+ __u32 first_signaled; /* only valid when not waiting all */
+ __u32 pad;
+};
+
+struct drm_syncobj_array {
+ __u64 handles;
+ __u32 count_handles;
+ __u32 pad;
+};
+
+/* Query current scanout sequence number */
+struct drm_crtc_get_sequence {
+ __u32 crtc_id; /* requested crtc_id */
+ __u32 active; /* return: crtc output is active */
+ __u64 sequence; /* return: most recent vblank sequence */
+ __s64 sequence_ns; /* return: most recent time of first pixel out */
+};
+
+/* Queue event to be delivered at specified sequence. Time stamp marks
+ * when the first pixel of the refresh cycle leaves the display engine
+ * for the display
+ */
+#define DRM_CRTC_SEQUENCE_RELATIVE 0x00000001 /* sequence is relative to current */
+#define DRM_CRTC_SEQUENCE_NEXT_ON_MISS 0x00000002 /* Use next sequence if we've missed */
+
+struct drm_crtc_queue_sequence {
+ __u32 crtc_id;
+ __u32 flags;
+ __u64 sequence; /* on input, target sequence. on output, actual sequence */
+ __u64 user_data; /* user data passed to event */
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#include "drm_mode.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_IOCTL_BASE 'd'
+#define DRM_IO(nr) _IO(DRM_IOCTL_BASE,nr)
+#define DRM_IOR(nr,type) _IOR(DRM_IOCTL_BASE,nr,type)
+#define DRM_IOW(nr,type) _IOW(DRM_IOCTL_BASE,nr,type)
+#define DRM_IOWR(nr,type) _IOWR(DRM_IOCTL_BASE,nr,type)
+
+#define DRM_IOCTL_VERSION DRM_IOWR(0x00, struct drm_version)
+#define DRM_IOCTL_GET_UNIQUE DRM_IOWR(0x01, struct drm_unique)
+#define DRM_IOCTL_GET_MAGIC DRM_IOR( 0x02, struct drm_auth)
+#define DRM_IOCTL_IRQ_BUSID DRM_IOWR(0x03, struct drm_irq_busid)
+#define DRM_IOCTL_GET_MAP DRM_IOWR(0x04, struct drm_map)
+#define DRM_IOCTL_GET_CLIENT DRM_IOWR(0x05, struct drm_client)
+#define DRM_IOCTL_GET_STATS DRM_IOR( 0x06, struct drm_stats)
+#define DRM_IOCTL_SET_VERSION DRM_IOWR(0x07, struct drm_set_version)
+#define DRM_IOCTL_MODESET_CTL DRM_IOW(0x08, struct drm_modeset_ctl)
+#define DRM_IOCTL_GEM_CLOSE DRM_IOW (0x09, struct drm_gem_close)
+#define DRM_IOCTL_GEM_FLINK DRM_IOWR(0x0a, struct drm_gem_flink)
+#define DRM_IOCTL_GEM_OPEN DRM_IOWR(0x0b, struct drm_gem_open)
+#define DRM_IOCTL_GET_CAP DRM_IOWR(0x0c, struct drm_get_cap)
+#define DRM_IOCTL_SET_CLIENT_CAP DRM_IOW( 0x0d, struct drm_set_client_cap)
+
+#define DRM_IOCTL_SET_UNIQUE DRM_IOW( 0x10, struct drm_unique)
+#define DRM_IOCTL_AUTH_MAGIC DRM_IOW( 0x11, struct drm_auth)
+#define DRM_IOCTL_BLOCK DRM_IOWR(0x12, struct drm_block)
+#define DRM_IOCTL_UNBLOCK DRM_IOWR(0x13, struct drm_block)
+#define DRM_IOCTL_CONTROL DRM_IOW( 0x14, struct drm_control)
+#define DRM_IOCTL_ADD_MAP DRM_IOWR(0x15, struct drm_map)
+#define DRM_IOCTL_ADD_BUFS DRM_IOWR(0x16, struct drm_buf_desc)
+#define DRM_IOCTL_MARK_BUFS DRM_IOW( 0x17, struct drm_buf_desc)
+#define DRM_IOCTL_INFO_BUFS DRM_IOWR(0x18, struct drm_buf_info)
+#define DRM_IOCTL_MAP_BUFS DRM_IOWR(0x19, struct drm_buf_map)
+#define DRM_IOCTL_FREE_BUFS DRM_IOW( 0x1a, struct drm_buf_free)
+
+#define DRM_IOCTL_RM_MAP DRM_IOW( 0x1b, struct drm_map)
+
+#define DRM_IOCTL_SET_SAREA_CTX DRM_IOW( 0x1c, struct drm_ctx_priv_map)
+#define DRM_IOCTL_GET_SAREA_CTX DRM_IOWR(0x1d, struct drm_ctx_priv_map)
+
+#define DRM_IOCTL_SET_MASTER DRM_IO(0x1e)
+#define DRM_IOCTL_DROP_MASTER DRM_IO(0x1f)
+
+#define DRM_IOCTL_ADD_CTX DRM_IOWR(0x20, struct drm_ctx)
+#define DRM_IOCTL_RM_CTX DRM_IOWR(0x21, struct drm_ctx)
+#define DRM_IOCTL_MOD_CTX DRM_IOW( 0x22, struct drm_ctx)
+#define DRM_IOCTL_GET_CTX DRM_IOWR(0x23, struct drm_ctx)
+#define DRM_IOCTL_SWITCH_CTX DRM_IOW( 0x24, struct drm_ctx)
+#define DRM_IOCTL_NEW_CTX DRM_IOW( 0x25, struct drm_ctx)
+#define DRM_IOCTL_RES_CTX DRM_IOWR(0x26, struct drm_ctx_res)
+#define DRM_IOCTL_ADD_DRAW DRM_IOWR(0x27, struct drm_draw)
+#define DRM_IOCTL_RM_DRAW DRM_IOWR(0x28, struct drm_draw)
+#define DRM_IOCTL_DMA DRM_IOWR(0x29, struct drm_dma)
+#define DRM_IOCTL_LOCK DRM_IOW( 0x2a, struct drm_lock)
+#define DRM_IOCTL_UNLOCK DRM_IOW( 0x2b, struct drm_lock)
+#define DRM_IOCTL_FINISH DRM_IOW( 0x2c, struct drm_lock)
+
+#define DRM_IOCTL_PRIME_HANDLE_TO_FD DRM_IOWR(0x2d, struct drm_prime_handle)
+#define DRM_IOCTL_PRIME_FD_TO_HANDLE DRM_IOWR(0x2e, struct drm_prime_handle)
+
+#define DRM_IOCTL_AGP_ACQUIRE DRM_IO( 0x30)
+#define DRM_IOCTL_AGP_RELEASE DRM_IO( 0x31)
+#define DRM_IOCTL_AGP_ENABLE DRM_IOW( 0x32, struct drm_agp_mode)
+#define DRM_IOCTL_AGP_INFO DRM_IOR( 0x33, struct drm_agp_info)
+#define DRM_IOCTL_AGP_ALLOC DRM_IOWR(0x34, struct drm_agp_buffer)
+#define DRM_IOCTL_AGP_FREE DRM_IOW( 0x35, struct drm_agp_buffer)
+#define DRM_IOCTL_AGP_BIND DRM_IOW( 0x36, struct drm_agp_binding)
+#define DRM_IOCTL_AGP_UNBIND DRM_IOW( 0x37, struct drm_agp_binding)
+
+#define DRM_IOCTL_SG_ALLOC DRM_IOWR(0x38, struct drm_scatter_gather)
+#define DRM_IOCTL_SG_FREE DRM_IOW( 0x39, struct drm_scatter_gather)
+
+#define DRM_IOCTL_WAIT_VBLANK DRM_IOWR(0x3a, union drm_wait_vblank)
+
+#define DRM_IOCTL_CRTC_GET_SEQUENCE DRM_IOWR(0x3b, struct drm_crtc_get_sequence)
+#define DRM_IOCTL_CRTC_QUEUE_SEQUENCE DRM_IOWR(0x3c, struct drm_crtc_queue_sequence)
+
+#define DRM_IOCTL_UPDATE_DRAW DRM_IOW(0x3f, struct drm_update_draw)
+
+#define DRM_IOCTL_MODE_GETRESOURCES DRM_IOWR(0xA0, struct drm_mode_card_res)
+#define DRM_IOCTL_MODE_GETCRTC DRM_IOWR(0xA1, struct drm_mode_crtc)
+#define DRM_IOCTL_MODE_SETCRTC DRM_IOWR(0xA2, struct drm_mode_crtc)
+#define DRM_IOCTL_MODE_CURSOR DRM_IOWR(0xA3, struct drm_mode_cursor)
+#define DRM_IOCTL_MODE_GETGAMMA DRM_IOWR(0xA4, struct drm_mode_crtc_lut)
+#define DRM_IOCTL_MODE_SETGAMMA DRM_IOWR(0xA5, struct drm_mode_crtc_lut)
+#define DRM_IOCTL_MODE_GETENCODER DRM_IOWR(0xA6, struct drm_mode_get_encoder)
+#define DRM_IOCTL_MODE_GETCONNECTOR DRM_IOWR(0xA7, struct drm_mode_get_connector)
+#define DRM_IOCTL_MODE_ATTACHMODE DRM_IOWR(0xA8, struct drm_mode_mode_cmd) /* deprecated (never worked) */
+#define DRM_IOCTL_MODE_DETACHMODE DRM_IOWR(0xA9, struct drm_mode_mode_cmd) /* deprecated (never worked) */
+
+#define DRM_IOCTL_MODE_GETPROPERTY DRM_IOWR(0xAA, struct drm_mode_get_property)
+#define DRM_IOCTL_MODE_SETPROPERTY DRM_IOWR(0xAB, struct drm_mode_connector_set_property)
+#define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob)
+#define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd)
+#define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd)
+#define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int)
+#define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip)
+#define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd)
+
+#define DRM_IOCTL_MODE_CREATE_DUMB DRM_IOWR(0xB2, struct drm_mode_create_dumb)
+#define DRM_IOCTL_MODE_MAP_DUMB DRM_IOWR(0xB3, struct drm_mode_map_dumb)
+#define DRM_IOCTL_MODE_DESTROY_DUMB DRM_IOWR(0xB4, struct drm_mode_destroy_dumb)
+#define DRM_IOCTL_MODE_GETPLANERESOURCES DRM_IOWR(0xB5, struct drm_mode_get_plane_res)
+#define DRM_IOCTL_MODE_GETPLANE DRM_IOWR(0xB6, struct drm_mode_get_plane)
+#define DRM_IOCTL_MODE_SETPLANE DRM_IOWR(0xB7, struct drm_mode_set_plane)
+#define DRM_IOCTL_MODE_ADDFB2 DRM_IOWR(0xB8, struct drm_mode_fb_cmd2)
+#define DRM_IOCTL_MODE_OBJ_GETPROPERTIES DRM_IOWR(0xB9, struct drm_mode_obj_get_properties)
+#define DRM_IOCTL_MODE_OBJ_SETPROPERTY DRM_IOWR(0xBA, struct drm_mode_obj_set_property)
+#define DRM_IOCTL_MODE_CURSOR2 DRM_IOWR(0xBB, struct drm_mode_cursor2)
+#define DRM_IOCTL_MODE_ATOMIC DRM_IOWR(0xBC, struct drm_mode_atomic)
+#define DRM_IOCTL_MODE_CREATEPROPBLOB DRM_IOWR(0xBD, struct drm_mode_create_blob)
+#define DRM_IOCTL_MODE_DESTROYPROPBLOB DRM_IOWR(0xBE, struct drm_mode_destroy_blob)
+
+#define DRM_IOCTL_SYNCOBJ_CREATE DRM_IOWR(0xBF, struct drm_syncobj_create)
+#define DRM_IOCTL_SYNCOBJ_DESTROY DRM_IOWR(0xC0, struct drm_syncobj_destroy)
+#define DRM_IOCTL_SYNCOBJ_HANDLE_TO_FD DRM_IOWR(0xC1, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_FD_TO_HANDLE DRM_IOWR(0xC2, struct drm_syncobj_handle)
+#define DRM_IOCTL_SYNCOBJ_WAIT DRM_IOWR(0xC3, struct drm_syncobj_wait)
+#define DRM_IOCTL_SYNCOBJ_RESET DRM_IOWR(0xC4, struct drm_syncobj_array)
+#define DRM_IOCTL_SYNCOBJ_SIGNAL DRM_IOWR(0xC5, struct drm_syncobj_array)
+
+#define DRM_IOCTL_MODE_CREATE_LEASE DRM_IOWR(0xC6, struct drm_mode_create_lease)
+#define DRM_IOCTL_MODE_LIST_LESSEES DRM_IOWR(0xC7, struct drm_mode_list_lessees)
+#define DRM_IOCTL_MODE_GET_LEASE DRM_IOWR(0xC8, struct drm_mode_get_lease)
+#define DRM_IOCTL_MODE_REVOKE_LEASE DRM_IOWR(0xC9, struct drm_mode_revoke_lease)
+
+/**
+ * Device specific ioctls should only be in their respective headers
+ * The device specific ioctl range is from 0x40 to 0x9f.
+ * Generic IOCTLS restart at 0xA0.
+ *
+ * \sa drmCommandNone(), drmCommandRead(), drmCommandWrite(), and
+ * drmCommandReadWrite().
+ */
+#define DRM_COMMAND_BASE 0x40
+#define DRM_COMMAND_END 0xA0
+
+/**
+ * Header for events written back to userspace on the drm fd. The
+ * type defines the type of event, the length specifies the total
+ * length of the event (including the header), and user_data is
+ * typically a 64 bit value passed with the ioctl that triggered the
+ * event. A read on the drm fd will always only return complete
+ * events, that is, if for example the read buffer is 100 bytes, and
+ * there are two 64 byte events pending, only one will be returned.
+ *
+ * Event types 0 - 0x7fffffff are generic drm events, 0x80000000 and
+ * up are chipset specific.
+ */
+struct drm_event {
+ __u32 type;
+ __u32 length;
+};
+
+#define DRM_EVENT_VBLANK 0x01
+#define DRM_EVENT_FLIP_COMPLETE 0x02
+#define DRM_EVENT_CRTC_SEQUENCE 0x03
+
+struct drm_event_vblank {
+ struct drm_event base;
+ __u64 user_data;
+ __u32 tv_sec;
+ __u32 tv_usec;
+ __u32 sequence;
+ __u32 crtc_id; /* 0 on older kernels that do not support this */
+};
+
+/* Event delivered at sequence. Time stamp marks when the first pixel
+ * of the refresh cycle leaves the display engine for the display
+ */
+struct drm_event_crtc_sequence {
+ struct drm_event base;
+ __u64 user_data;
+ __s64 time_ns;
+ __u64 sequence;
+};
+
+/* typedef area */
+#ifndef __KERNEL__
+typedef struct drm_clip_rect drm_clip_rect_t;
+typedef struct drm_drawable_info drm_drawable_info_t;
+typedef struct drm_tex_region drm_tex_region_t;
+typedef struct drm_hw_lock drm_hw_lock_t;
+typedef struct drm_version drm_version_t;
+typedef struct drm_unique drm_unique_t;
+typedef struct drm_list drm_list_t;
+typedef struct drm_block drm_block_t;
+typedef struct drm_control drm_control_t;
+typedef enum drm_map_type drm_map_type_t;
+typedef enum drm_map_flags drm_map_flags_t;
+typedef struct drm_ctx_priv_map drm_ctx_priv_map_t;
+typedef struct drm_map drm_map_t;
+typedef struct drm_client drm_client_t;
+typedef enum drm_stat_type drm_stat_type_t;
+typedef struct drm_stats drm_stats_t;
+typedef enum drm_lock_flags drm_lock_flags_t;
+typedef struct drm_lock drm_lock_t;
+typedef enum drm_dma_flags drm_dma_flags_t;
+typedef struct drm_buf_desc drm_buf_desc_t;
+typedef struct drm_buf_info drm_buf_info_t;
+typedef struct drm_buf_free drm_buf_free_t;
+typedef struct drm_buf_pub drm_buf_pub_t;
+typedef struct drm_buf_map drm_buf_map_t;
+typedef struct drm_dma drm_dma_t;
+typedef union drm_wait_vblank drm_wait_vblank_t;
+typedef struct drm_agp_mode drm_agp_mode_t;
+typedef enum drm_ctx_flags drm_ctx_flags_t;
+typedef struct drm_ctx drm_ctx_t;
+typedef struct drm_ctx_res drm_ctx_res_t;
+typedef struct drm_draw drm_draw_t;
+typedef struct drm_update_draw drm_update_draw_t;
+typedef struct drm_auth drm_auth_t;
+typedef struct drm_irq_busid drm_irq_busid_t;
+typedef enum drm_vblank_seq_type drm_vblank_seq_type_t;
+
+typedef struct drm_agp_buffer drm_agp_buffer_t;
+typedef struct drm_agp_binding drm_agp_binding_t;
+typedef struct drm_agp_info drm_agp_info_t;
+typedef struct drm_scatter_gather drm_scatter_gather_t;
+typedef struct drm_set_version drm_set_version_t;
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
new file mode 100644
index 000000000..7f5634ce8
--- /dev/null
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -0,0 +1,1724 @@
+/*
+ * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _UAPI_I915_DRM_H_
+#define _UAPI_I915_DRM_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Please note that modifications to all structs defined here are
+ * subject to backwards-compatibility constraints.
+ */
+
+/**
+ * DOC: uevents generated by i915 on it's device node
+ *
+ * I915_L3_PARITY_UEVENT - Generated when the driver receives a parity mismatch
+ * event from the gpu l3 cache. Additional information supplied is ROW,
+ * BANK, SUBBANK, SLICE of the affected cacheline. Userspace should keep
+ * track of these events and if a specific cache-line seems to have a
+ * persistent error remap it with the l3 remapping tool supplied in
+ * intel-gpu-tools. The value supplied with the event is always 1.
+ *
+ * I915_ERROR_UEVENT - Generated upon error detection, currently only via
+ * hangcheck. The error detection event is a good indicator of when things
+ * began to go badly. The value supplied with the event is a 1 upon error
+ * detection, and a 0 upon reset completion, signifying no more error
+ * exists. NOTE: Disabling hangcheck or reset via module parameter will
+ * cause the related events to not be seen.
+ *
+ * I915_RESET_UEVENT - Event is generated just before an attempt to reset the
+ * the GPU. The value supplied with the event is always 1. NOTE: Disable
+ * reset via module parameter will cause this event to not be seen.
+ */
+#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR"
+#define I915_ERROR_UEVENT "ERROR"
+#define I915_RESET_UEVENT "RESET"
+
+/*
+ * MOCS indexes used for GPU surfaces, defining the cacheability of the
+ * surface data and the coherency for this data wrt. CPU vs. GPU accesses.
+ */
+enum i915_mocs_table_index {
+ /*
+ * Not cached anywhere, coherency between CPU and GPU accesses is
+ * guaranteed.
+ */
+ I915_MOCS_UNCACHED,
+ /*
+ * Cacheability and coherency controlled by the kernel automatically
+ * based on the DRM_I915_GEM_SET_CACHING IOCTL setting and the current
+ * usage of the surface (used for display scanout or not).
+ */
+ I915_MOCS_PTE,
+ /*
+ * Cached in all GPU caches available on the platform.
+ * Coherency between CPU and GPU accesses to the surface is not
+ * guaranteed without extra synchronization.
+ */
+ I915_MOCS_CACHED,
+};
+
+/*
+ * Different engines serve different roles, and there may be more than one
+ * engine serving each role. enum drm_i915_gem_engine_class provides a
+ * classification of the role of the engine, which may be used when requesting
+ * operations to be performed on a certain subset of engines, or for providing
+ * information about that group.
+ */
+enum drm_i915_gem_engine_class {
+ I915_ENGINE_CLASS_RENDER = 0,
+ I915_ENGINE_CLASS_COPY = 1,
+ I915_ENGINE_CLASS_VIDEO = 2,
+ I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
+
+ I915_ENGINE_CLASS_INVALID = -1
+};
+
+/**
+ * DOC: perf_events exposed by i915 through /sys/bus/event_sources/drivers/i915
+ *
+ */
+
+enum drm_i915_pmu_engine_sample {
+ I915_SAMPLE_BUSY = 0,
+ I915_SAMPLE_WAIT = 1,
+ I915_SAMPLE_SEMA = 2
+};
+
+#define I915_PMU_SAMPLE_BITS (4)
+#define I915_PMU_SAMPLE_MASK (0xf)
+#define I915_PMU_SAMPLE_INSTANCE_BITS (8)
+#define I915_PMU_CLASS_SHIFT \
+ (I915_PMU_SAMPLE_BITS + I915_PMU_SAMPLE_INSTANCE_BITS)
+
+#define __I915_PMU_ENGINE(class, instance, sample) \
+ ((class) << I915_PMU_CLASS_SHIFT | \
+ (instance) << I915_PMU_SAMPLE_BITS | \
+ (sample))
+
+#define I915_PMU_ENGINE_BUSY(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_BUSY)
+
+#define I915_PMU_ENGINE_WAIT(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_WAIT)
+
+#define I915_PMU_ENGINE_SEMA(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
+
+#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+
+#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
+#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
+#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2)
+#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3)
+
+#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
+
+/* Each region is a minimum of 16k, and there are at most 255 of them.
+ */
+#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
+ * of chars for next/prev indices */
+#define I915_LOG_MIN_TEX_REGION_SIZE 14
+
+typedef struct _drm_i915_init {
+ enum {
+ I915_INIT_DMA = 0x01,
+ I915_CLEANUP_DMA = 0x02,
+ I915_RESUME_DMA = 0x03
+ } func;
+ unsigned int mmio_offset;
+ int sarea_priv_offset;
+ unsigned int ring_start;
+ unsigned int ring_end;
+ unsigned int ring_size;
+ unsigned int front_offset;
+ unsigned int back_offset;
+ unsigned int depth_offset;
+ unsigned int w;
+ unsigned int h;
+ unsigned int pitch;
+ unsigned int pitch_bits;
+ unsigned int back_pitch;
+ unsigned int depth_pitch;
+ unsigned int cpp;
+ unsigned int chipset;
+} drm_i915_init_t;
+
+typedef struct _drm_i915_sarea {
+ struct drm_tex_region texList[I915_NR_TEX_REGIONS + 1];
+ int last_upload; /* last time texture was uploaded */
+ int last_enqueue; /* last time a buffer was enqueued */
+ int last_dispatch; /* age of the most recently dispatched buffer */
+ int ctxOwner; /* last context to upload state */
+ int texAge;
+ int pf_enabled; /* is pageflipping allowed? */
+ int pf_active;
+ int pf_current_page; /* which buffer is being displayed? */
+ int perf_boxes; /* performance boxes to be displayed */
+ int width, height; /* screen size in pixels */
+
+ drm_handle_t front_handle;
+ int front_offset;
+ int front_size;
+
+ drm_handle_t back_handle;
+ int back_offset;
+ int back_size;
+
+ drm_handle_t depth_handle;
+ int depth_offset;
+ int depth_size;
+
+ drm_handle_t tex_handle;
+ int tex_offset;
+ int tex_size;
+ int log_tex_granularity;
+ int pitch;
+ int rotation; /* 0, 90, 180 or 270 */
+ int rotated_offset;
+ int rotated_size;
+ int rotated_pitch;
+ int virtualX, virtualY;
+
+ unsigned int front_tiled;
+ unsigned int back_tiled;
+ unsigned int depth_tiled;
+ unsigned int rotated_tiled;
+ unsigned int rotated2_tiled;
+
+ int pipeA_x;
+ int pipeA_y;
+ int pipeA_w;
+ int pipeA_h;
+ int pipeB_x;
+ int pipeB_y;
+ int pipeB_w;
+ int pipeB_h;
+
+ /* fill out some space for old userspace triple buffer */
+ drm_handle_t unused_handle;
+ __u32 unused1, unused2, unused3;
+
+ /* buffer object handles for static buffers. May change
+ * over the lifetime of the client.
+ */
+ __u32 front_bo_handle;
+ __u32 back_bo_handle;
+ __u32 unused_bo_handle;
+ __u32 depth_bo_handle;
+
+} drm_i915_sarea_t;
+
+/* due to userspace building against these headers we need some compat here */
+#define planeA_x pipeA_x
+#define planeA_y pipeA_y
+#define planeA_w pipeA_w
+#define planeA_h pipeA_h
+#define planeB_x pipeB_x
+#define planeB_y pipeB_y
+#define planeB_w pipeB_w
+#define planeB_h pipeB_h
+
+/* Flags for perf_boxes
+ */
+#define I915_BOX_RING_EMPTY 0x1
+#define I915_BOX_FLIP 0x2
+#define I915_BOX_WAIT 0x4
+#define I915_BOX_TEXTURE_LOAD 0x8
+#define I915_BOX_LOST_CONTEXT 0x10
+
+/*
+ * i915 specific ioctls.
+ *
+ * The device specific ioctl range is [DRM_COMMAND_BASE, DRM_COMMAND_END) ie
+ * [0x40, 0xa0) (a0 is excluded). The numbers below are defined as offset
+ * against DRM_COMMAND_BASE and should be between [0x0, 0x60).
+ */
+#define DRM_I915_INIT 0x00
+#define DRM_I915_FLUSH 0x01
+#define DRM_I915_FLIP 0x02
+#define DRM_I915_BATCHBUFFER 0x03
+#define DRM_I915_IRQ_EMIT 0x04
+#define DRM_I915_IRQ_WAIT 0x05
+#define DRM_I915_GETPARAM 0x06
+#define DRM_I915_SETPARAM 0x07
+#define DRM_I915_ALLOC 0x08
+#define DRM_I915_FREE 0x09
+#define DRM_I915_INIT_HEAP 0x0a
+#define DRM_I915_CMDBUFFER 0x0b
+#define DRM_I915_DESTROY_HEAP 0x0c
+#define DRM_I915_SET_VBLANK_PIPE 0x0d
+#define DRM_I915_GET_VBLANK_PIPE 0x0e
+#define DRM_I915_VBLANK_SWAP 0x0f
+#define DRM_I915_HWS_ADDR 0x11
+#define DRM_I915_GEM_INIT 0x13
+#define DRM_I915_GEM_EXECBUFFER 0x14
+#define DRM_I915_GEM_PIN 0x15
+#define DRM_I915_GEM_UNPIN 0x16
+#define DRM_I915_GEM_BUSY 0x17
+#define DRM_I915_GEM_THROTTLE 0x18
+#define DRM_I915_GEM_ENTERVT 0x19
+#define DRM_I915_GEM_LEAVEVT 0x1a
+#define DRM_I915_GEM_CREATE 0x1b
+#define DRM_I915_GEM_PREAD 0x1c
+#define DRM_I915_GEM_PWRITE 0x1d
+#define DRM_I915_GEM_MMAP 0x1e
+#define DRM_I915_GEM_SET_DOMAIN 0x1f
+#define DRM_I915_GEM_SW_FINISH 0x20
+#define DRM_I915_GEM_SET_TILING 0x21
+#define DRM_I915_GEM_GET_TILING 0x22
+#define DRM_I915_GEM_GET_APERTURE 0x23
+#define DRM_I915_GEM_MMAP_GTT 0x24
+#define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25
+#define DRM_I915_GEM_MADVISE 0x26
+#define DRM_I915_OVERLAY_PUT_IMAGE 0x27
+#define DRM_I915_OVERLAY_ATTRS 0x28
+#define DRM_I915_GEM_EXECBUFFER2 0x29
+#define DRM_I915_GEM_EXECBUFFER2_WR DRM_I915_GEM_EXECBUFFER2
+#define DRM_I915_GET_SPRITE_COLORKEY 0x2a
+#define DRM_I915_SET_SPRITE_COLORKEY 0x2b
+#define DRM_I915_GEM_WAIT 0x2c
+#define DRM_I915_GEM_CONTEXT_CREATE 0x2d
+#define DRM_I915_GEM_CONTEXT_DESTROY 0x2e
+#define DRM_I915_GEM_SET_CACHING 0x2f
+#define DRM_I915_GEM_GET_CACHING 0x30
+#define DRM_I915_REG_READ 0x31
+#define DRM_I915_GET_RESET_STATS 0x32
+#define DRM_I915_GEM_USERPTR 0x33
+#define DRM_I915_GEM_CONTEXT_GETPARAM 0x34
+#define DRM_I915_GEM_CONTEXT_SETPARAM 0x35
+#define DRM_I915_PERF_OPEN 0x36
+#define DRM_I915_PERF_ADD_CONFIG 0x37
+#define DRM_I915_PERF_REMOVE_CONFIG 0x38
+#define DRM_I915_QUERY 0x39
+
+#define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
+#define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
+#define DRM_IOCTL_I915_FLIP DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLIP)
+#define DRM_IOCTL_I915_BATCHBUFFER DRM_IOW( DRM_COMMAND_BASE + DRM_I915_BATCHBUFFER, drm_i915_batchbuffer_t)
+#define DRM_IOCTL_I915_IRQ_EMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_IRQ_EMIT, drm_i915_irq_emit_t)
+#define DRM_IOCTL_I915_IRQ_WAIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_IRQ_WAIT, drm_i915_irq_wait_t)
+#define DRM_IOCTL_I915_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GETPARAM, drm_i915_getparam_t)
+#define DRM_IOCTL_I915_SETPARAM DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SETPARAM, drm_i915_setparam_t)
+#define DRM_IOCTL_I915_ALLOC DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_ALLOC, drm_i915_mem_alloc_t)
+#define DRM_IOCTL_I915_FREE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_FREE, drm_i915_mem_free_t)
+#define DRM_IOCTL_I915_INIT_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT_HEAP, drm_i915_mem_init_heap_t)
+#define DRM_IOCTL_I915_CMDBUFFER DRM_IOW( DRM_COMMAND_BASE + DRM_I915_CMDBUFFER, drm_i915_cmdbuffer_t)
+#define DRM_IOCTL_I915_DESTROY_HEAP DRM_IOW( DRM_COMMAND_BASE + DRM_I915_DESTROY_HEAP, drm_i915_mem_destroy_heap_t)
+#define DRM_IOCTL_I915_SET_VBLANK_PIPE DRM_IOW( DRM_COMMAND_BASE + DRM_I915_SET_VBLANK_PIPE, drm_i915_vblank_pipe_t)
+#define DRM_IOCTL_I915_GET_VBLANK_PIPE DRM_IOR( DRM_COMMAND_BASE + DRM_I915_GET_VBLANK_PIPE, drm_i915_vblank_pipe_t)
+#define DRM_IOCTL_I915_VBLANK_SWAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t)
+#define DRM_IOCTL_I915_HWS_ADDR DRM_IOW(DRM_COMMAND_BASE + DRM_I915_HWS_ADDR, struct drm_i915_gem_init)
+#define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2_WR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2_WR, struct drm_i915_gem_execbuffer2)
+#define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
+#define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
+#define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
+#define DRM_IOCTL_I915_GEM_SET_CACHING DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_SET_CACHING, struct drm_i915_gem_caching)
+#define DRM_IOCTL_I915_GEM_GET_CACHING DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHING, struct drm_i915_gem_caching)
+#define DRM_IOCTL_I915_GEM_THROTTLE DRM_IO ( DRM_COMMAND_BASE + DRM_I915_GEM_THROTTLE)
+#define DRM_IOCTL_I915_GEM_ENTERVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_ENTERVT)
+#define DRM_IOCTL_I915_GEM_LEAVEVT DRM_IO(DRM_COMMAND_BASE + DRM_I915_GEM_LEAVEVT)
+#define DRM_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct drm_i915_gem_create)
+#define DRM_IOCTL_I915_GEM_PREAD DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PREAD, struct drm_i915_gem_pread)
+#define DRM_IOCTL_I915_GEM_PWRITE DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_PWRITE, struct drm_i915_gem_pwrite)
+#define DRM_IOCTL_I915_GEM_MMAP DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP, struct drm_i915_gem_mmap)
+#define DRM_IOCTL_I915_GEM_MMAP_GTT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MMAP_GTT, struct drm_i915_gem_mmap_gtt)
+#define DRM_IOCTL_I915_GEM_SET_DOMAIN DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SET_DOMAIN, struct drm_i915_gem_set_domain)
+#define DRM_IOCTL_I915_GEM_SW_FINISH DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_SW_FINISH, struct drm_i915_gem_sw_finish)
+#define DRM_IOCTL_I915_GEM_SET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_SET_TILING, struct drm_i915_gem_set_tiling)
+#define DRM_IOCTL_I915_GEM_GET_TILING DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_TILING, struct drm_i915_gem_get_tiling)
+#define DRM_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture)
+#define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_i915_get_pipe_from_crtc_id)
+#define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
+#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_I915_OVERLAY_PUT_IMAGE, struct drm_intel_overlay_put_image)
+#define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
+#define DRM_IOCTL_I915_SET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_SET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
+#define DRM_IOCTL_I915_GET_SPRITE_COLORKEY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GET_SPRITE_COLORKEY, struct drm_intel_sprite_colorkey)
+#define DRM_IOCTL_I915_GEM_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_WAIT, struct drm_i915_gem_wait)
+#define DRM_IOCTL_I915_GEM_CONTEXT_CREATE DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_CREATE, struct drm_i915_gem_context_create)
+#define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
+#define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
+#define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
+#define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
+#define DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_GETPARAM, struct drm_i915_gem_context_param)
+#define DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_SETPARAM, struct drm_i915_gem_context_param)
+#define DRM_IOCTL_I915_PERF_OPEN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_OPEN, struct drm_i915_perf_open_param)
+#define DRM_IOCTL_I915_PERF_ADD_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_ADD_CONFIG, struct drm_i915_perf_oa_config)
+#define DRM_IOCTL_I915_PERF_REMOVE_CONFIG DRM_IOW(DRM_COMMAND_BASE + DRM_I915_PERF_REMOVE_CONFIG, __u64)
+#define DRM_IOCTL_I915_QUERY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_QUERY, struct drm_i915_query)
+
+/* Allow drivers to submit batchbuffers directly to hardware, relying
+ * on the security mechanisms provided by hardware.
+ */
+typedef struct drm_i915_batchbuffer {
+ int start; /* agp offset */
+ int used; /* nr bytes in use */
+ int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */
+ int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */
+ int num_cliprects; /* mulitpass with multiple cliprects? */
+ struct drm_clip_rect __user *cliprects; /* pointer to userspace cliprects */
+} drm_i915_batchbuffer_t;
+
+/* As above, but pass a pointer to userspace buffer which can be
+ * validated by the kernel prior to sending to hardware.
+ */
+typedef struct _drm_i915_cmdbuffer {
+ char __user *buf; /* pointer to userspace command buffer */
+ int sz; /* nr bytes in buf */
+ int DR1; /* hw flags for GFX_OP_DRAWRECT_INFO */
+ int DR4; /* window origin for GFX_OP_DRAWRECT_INFO */
+ int num_cliprects; /* mulitpass with multiple cliprects? */
+ struct drm_clip_rect __user *cliprects; /* pointer to userspace cliprects */
+} drm_i915_cmdbuffer_t;
+
+/* Userspace can request & wait on irq's:
+ */
+typedef struct drm_i915_irq_emit {
+ int __user *irq_seq;
+} drm_i915_irq_emit_t;
+
+typedef struct drm_i915_irq_wait {
+ int irq_seq;
+} drm_i915_irq_wait_t;
+
+/* Ioctl to query kernel params:
+ */
+#define I915_PARAM_IRQ_ACTIVE 1
+#define I915_PARAM_ALLOW_BATCHBUFFER 2
+#define I915_PARAM_LAST_DISPATCH 3
+#define I915_PARAM_CHIPSET_ID 4
+#define I915_PARAM_HAS_GEM 5
+#define I915_PARAM_NUM_FENCES_AVAIL 6
+#define I915_PARAM_HAS_OVERLAY 7
+#define I915_PARAM_HAS_PAGEFLIPPING 8
+#define I915_PARAM_HAS_EXECBUF2 9
+#define I915_PARAM_HAS_BSD 10
+#define I915_PARAM_HAS_BLT 11
+#define I915_PARAM_HAS_RELAXED_FENCING 12
+#define I915_PARAM_HAS_COHERENT_RINGS 13
+#define I915_PARAM_HAS_EXEC_CONSTANTS 14
+#define I915_PARAM_HAS_RELAXED_DELTA 15
+#define I915_PARAM_HAS_GEN7_SOL_RESET 16
+#define I915_PARAM_HAS_LLC 17
+#define I915_PARAM_HAS_ALIASING_PPGTT 18
+#define I915_PARAM_HAS_WAIT_TIMEOUT 19
+#define I915_PARAM_HAS_SEMAPHORES 20
+#define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21
+#define I915_PARAM_HAS_VEBOX 22
+#define I915_PARAM_HAS_SECURE_BATCHES 23
+#define I915_PARAM_HAS_PINNED_BATCHES 24
+#define I915_PARAM_HAS_EXEC_NO_RELOC 25
+#define I915_PARAM_HAS_EXEC_HANDLE_LUT 26
+#define I915_PARAM_HAS_WT 27
+#define I915_PARAM_CMD_PARSER_VERSION 28
+#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
+#define I915_PARAM_MMAP_VERSION 30
+#define I915_PARAM_HAS_BSD2 31
+#define I915_PARAM_REVISION 32
+#define I915_PARAM_SUBSLICE_TOTAL 33
+#define I915_PARAM_EU_TOTAL 34
+#define I915_PARAM_HAS_GPU_RESET 35
+#define I915_PARAM_HAS_RESOURCE_STREAMER 36
+#define I915_PARAM_HAS_EXEC_SOFTPIN 37
+#define I915_PARAM_HAS_POOLED_EU 38
+#define I915_PARAM_MIN_EU_IN_POOL 39
+#define I915_PARAM_MMAP_GTT_VERSION 40
+
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution
+ * priorities and the driver will attempt to execute batches in priority order.
+ * The param returns a capability bitmask, nonzero implies that the scheduler
+ * is enabled, with different features present according to the mask.
+ *
+ * The initial priority for each batch is supplied by the context and is
+ * controlled via I915_CONTEXT_PARAM_PRIORITY.
+ */
+#define I915_PARAM_HAS_SCHEDULER 41
+#define I915_SCHEDULER_CAP_ENABLED (1ul << 0)
+#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1)
+#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2)
+
+#define I915_PARAM_HUC_STATUS 42
+
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of
+ * synchronisation with implicit fencing on individual objects.
+ * See EXEC_OBJECT_ASYNC.
+ */
+#define I915_PARAM_HAS_EXEC_ASYNC 43
+
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support -
+ * both being able to pass in a sync_file fd to wait upon before executing,
+ * and being able to return a new sync_file fd that is signaled when the
+ * current request is complete. See I915_EXEC_FENCE_IN and I915_EXEC_FENCE_OUT.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE 44
+
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture
+ * user specified bufffers for post-mortem debugging of GPU hangs. See
+ * EXEC_OBJECT_CAPTURE.
+ */
+#define I915_PARAM_HAS_EXEC_CAPTURE 45
+
+#define I915_PARAM_SLICE_MASK 46
+
+/* Assuming it's uniform for each slice, this queries the mask of subslices
+ * per-slice for this system.
+ */
+#define I915_PARAM_SUBSLICE_MASK 47
+
+/*
+ * Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying the batch buffer
+ * as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST.
+ */
+#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48
+
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY.
+ */
+#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49
+
+/*
+ * Query whether every context (both per-file default and user created) is
+ * isolated (insofar as HW supports). If this parameter is not true, then
+ * freshly created contexts may inherit values from an existing context,
+ * rather than default HW values. If true, it also ensures (insofar as HW
+ * supports) that all state set by this context will not leak to any other
+ * context.
+ *
+ * As not every engine across every gen support contexts, the returned
+ * value reports the support of context isolation for individual engines by
+ * returning a bitmask of each engine class set to true if that class supports
+ * isolation.
+ */
+#define I915_PARAM_HAS_CONTEXT_ISOLATION 50
+
+/* Frequency of the command streamer timestamps given by the *_TIMESTAMP
+ * registers. This used to be fixed per platform but from CNL onwards, this
+ * might vary depending on the parts.
+ */
+#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51
+
+typedef struct drm_i915_getparam {
+ __s32 param;
+ /*
+ * WARNING: Using pointers instead of fixed-size u64 means we need to write
+ * compat32 code. Don't repeat this mistake.
+ */
+ int __user *value;
+} drm_i915_getparam_t;
+
+/* Ioctl to set kernel params:
+ */
+#define I915_SETPARAM_USE_MI_BATCHBUFFER_START 1
+#define I915_SETPARAM_TEX_LRU_LOG_GRANULARITY 2
+#define I915_SETPARAM_ALLOW_BATCHBUFFER 3
+#define I915_SETPARAM_NUM_USED_FENCES 4
+
+typedef struct drm_i915_setparam {
+ int param;
+ int value;
+} drm_i915_setparam_t;
+
+/* A memory manager for regions of shared memory:
+ */
+#define I915_MEM_REGION_AGP 1
+
+typedef struct drm_i915_mem_alloc {
+ int region;
+ int alignment;
+ int size;
+ int __user *region_offset; /* offset from start of fb or agp */
+} drm_i915_mem_alloc_t;
+
+typedef struct drm_i915_mem_free {
+ int region;
+ int region_offset;
+} drm_i915_mem_free_t;
+
+typedef struct drm_i915_mem_init_heap {
+ int region;
+ int size;
+ int start;
+} drm_i915_mem_init_heap_t;
+
+/* Allow memory manager to be torn down and re-initialized (eg on
+ * rotate):
+ */
+typedef struct drm_i915_mem_destroy_heap {
+ int region;
+} drm_i915_mem_destroy_heap_t;
+
+/* Allow X server to configure which pipes to monitor for vblank signals
+ */
+#define DRM_I915_VBLANK_PIPE_A 1
+#define DRM_I915_VBLANK_PIPE_B 2
+
+typedef struct drm_i915_vblank_pipe {
+ int pipe;
+} drm_i915_vblank_pipe_t;
+
+/* Schedule buffer swap at given vertical blank:
+ */
+typedef struct drm_i915_vblank_swap {
+ drm_drawable_t drawable;
+ enum drm_vblank_seq_type seqtype;
+ unsigned int sequence;
+} drm_i915_vblank_swap_t;
+
+typedef struct drm_i915_hws_addr {
+ __u64 addr;
+} drm_i915_hws_addr_t;
+
+struct drm_i915_gem_init {
+ /**
+ * Beginning offset in the GTT to be managed by the DRM memory
+ * manager.
+ */
+ __u64 gtt_start;
+ /**
+ * Ending offset in the GTT to be managed by the DRM memory
+ * manager.
+ */
+ __u64 gtt_end;
+};
+
+struct drm_i915_gem_create {
+ /**
+ * Requested size for the object.
+ *
+ * The (page-aligned) allocated size for the object will be returned.
+ */
+ __u64 size;
+ /**
+ * Returned handle for the object.
+ *
+ * Object handles are nonzero.
+ */
+ __u32 handle;
+ __u32 pad;
+};
+
+struct drm_i915_gem_pread {
+ /** Handle for the object being read. */
+ __u32 handle;
+ __u32 pad;
+ /** Offset into the object to read from */
+ __u64 offset;
+ /** Length of data to read */
+ __u64 size;
+ /**
+ * Pointer to write the data into.
+ *
+ * This is a fixed-size type for 32/64 compatibility.
+ */
+ __u64 data_ptr;
+};
+
+struct drm_i915_gem_pwrite {
+ /** Handle for the object being written to. */
+ __u32 handle;
+ __u32 pad;
+ /** Offset into the object to write to */
+ __u64 offset;
+ /** Length of data to write */
+ __u64 size;
+ /**
+ * Pointer to read the data from.
+ *
+ * This is a fixed-size type for 32/64 compatibility.
+ */
+ __u64 data_ptr;
+};
+
+struct drm_i915_gem_mmap {
+ /** Handle for the object being mapped. */
+ __u32 handle;
+ __u32 pad;
+ /** Offset in the object to map. */
+ __u64 offset;
+ /**
+ * Length of data to map.
+ *
+ * The value will be page-aligned.
+ */
+ __u64 size;
+ /**
+ * Returned pointer the data was mapped at.
+ *
+ * This is a fixed-size type for 32/64 compatibility.
+ */
+ __u64 addr_ptr;
+
+ /**
+ * Flags for extended behaviour.
+ *
+ * Added in version 2.
+ */
+ __u64 flags;
+#define I915_MMAP_WC 0x1
+};
+
+struct drm_i915_gem_mmap_gtt {
+ /** Handle for the object being mapped. */
+ __u32 handle;
+ __u32 pad;
+ /**
+ * Fake offset to use for subsequent mmap call
+ *
+ * This is a fixed-size type for 32/64 compatibility.
+ */
+ __u64 offset;
+};
+
+struct drm_i915_gem_set_domain {
+ /** Handle for the object */
+ __u32 handle;
+
+ /** New read domains */
+ __u32 read_domains;
+
+ /** New write domain */
+ __u32 write_domain;
+};
+
+struct drm_i915_gem_sw_finish {
+ /** Handle for the object */
+ __u32 handle;
+};
+
+struct drm_i915_gem_relocation_entry {
+ /**
+ * Handle of the buffer being pointed to by this relocation entry.
+ *
+ * It's appealing to make this be an index into the mm_validate_entry
+ * list to refer to the buffer, but this allows the driver to create
+ * a relocation list for state buffers and not re-write it per
+ * exec using the buffer.
+ */
+ __u32 target_handle;
+
+ /**
+ * Value to be added to the offset of the target buffer to make up
+ * the relocation entry.
+ */
+ __u32 delta;
+
+ /** Offset in the buffer the relocation entry will be written into */
+ __u64 offset;
+
+ /**
+ * Offset value of the target buffer that the relocation entry was last
+ * written as.
+ *
+ * If the buffer has the same offset as last time, we can skip syncing
+ * and writing the relocation. This value is written back out by
+ * the execbuffer ioctl when the relocation is written.
+ */
+ __u64 presumed_offset;
+
+ /**
+ * Target memory domains read by this operation.
+ */
+ __u32 read_domains;
+
+ /**
+ * Target memory domains written by this operation.
+ *
+ * Note that only one domain may be written by the whole
+ * execbuffer operation, so that where there are conflicts,
+ * the application will get -EINVAL back.
+ */
+ __u32 write_domain;
+};
+
+/** @{
+ * Intel memory domains
+ *
+ * Most of these just align with the various caches in
+ * the system and are used to flush and invalidate as
+ * objects end up cached in different domains.
+ */
+/** CPU cache */
+#define I915_GEM_DOMAIN_CPU 0x00000001
+/** Render cache, used by 2D and 3D drawing */
+#define I915_GEM_DOMAIN_RENDER 0x00000002
+/** Sampler cache, used by texture engine */
+#define I915_GEM_DOMAIN_SAMPLER 0x00000004
+/** Command queue, used to load batch buffers */
+#define I915_GEM_DOMAIN_COMMAND 0x00000008
+/** Instruction cache, used by shader programs */
+#define I915_GEM_DOMAIN_INSTRUCTION 0x00000010
+/** Vertex address cache */
+#define I915_GEM_DOMAIN_VERTEX 0x00000020
+/** GTT domain - aperture and scanout */
+#define I915_GEM_DOMAIN_GTT 0x00000040
+/** WC domain - uncached access */
+#define I915_GEM_DOMAIN_WC 0x00000080
+/** @} */
+
+struct drm_i915_gem_exec_object {
+ /**
+ * User's handle for a buffer to be bound into the GTT for this
+ * operation.
+ */
+ __u32 handle;
+
+ /** Number of relocations to be performed on this buffer */
+ __u32 relocation_count;
+ /**
+ * Pointer to array of struct drm_i915_gem_relocation_entry containing
+ * the relocations to be performed in this buffer.
+ */
+ __u64 relocs_ptr;
+
+ /** Required alignment in graphics aperture */
+ __u64 alignment;
+
+ /**
+ * Returned value of the updated offset of the object, for future
+ * presumed_offset writes.
+ */
+ __u64 offset;
+};
+
+struct drm_i915_gem_execbuffer {
+ /**
+ * List of buffers to be validated with their relocations to be
+ * performend on them.
+ *
+ * This is a pointer to an array of struct drm_i915_gem_validate_entry.
+ *
+ * These buffers must be listed in an order such that all relocations
+ * a buffer is performing refer to buffers that have already appeared
+ * in the validate list.
+ */
+ __u64 buffers_ptr;
+ __u32 buffer_count;
+
+ /** Offset in the batchbuffer to start execution from. */
+ __u32 batch_start_offset;
+ /** Bytes used in batchbuffer from batch_start_offset */
+ __u32 batch_len;
+ __u32 DR1;
+ __u32 DR4;
+ __u32 num_cliprects;
+ /** This is a struct drm_clip_rect *cliprects */
+ __u64 cliprects_ptr;
+};
+
+struct drm_i915_gem_exec_object2 {
+ /**
+ * User's handle for a buffer to be bound into the GTT for this
+ * operation.
+ */
+ __u32 handle;
+
+ /** Number of relocations to be performed on this buffer */
+ __u32 relocation_count;
+ /**
+ * Pointer to array of struct drm_i915_gem_relocation_entry containing
+ * the relocations to be performed in this buffer.
+ */
+ __u64 relocs_ptr;
+
+ /** Required alignment in graphics aperture */
+ __u64 alignment;
+
+ /**
+ * When the EXEC_OBJECT_PINNED flag is specified this is populated by
+ * the user with the GTT offset at which this object will be pinned.
+ * When the I915_EXEC_NO_RELOC flag is specified this must contain the
+ * presumed_offset of the object.
+ * During execbuffer2 the kernel populates it with the value of the
+ * current GTT offset of the object, for future presumed_offset writes.
+ */
+ __u64 offset;
+
+#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
+#define EXEC_OBJECT_NEEDS_GTT (1<<1)
+#define EXEC_OBJECT_WRITE (1<<2)
+#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
+#define EXEC_OBJECT_PINNED (1<<4)
+#define EXEC_OBJECT_PAD_TO_SIZE (1<<5)
+/* The kernel implicitly tracks GPU activity on all GEM objects, and
+ * synchronises operations with outstanding rendering. This includes
+ * rendering on other devices if exported via dma-buf. However, sometimes
+ * this tracking is too coarse and the user knows better. For example,
+ * if the object is split into non-overlapping ranges shared between different
+ * clients or engines (i.e. suballocating objects), the implicit tracking
+ * by kernel assumes that each operation affects the whole object rather
+ * than an individual range, causing needless synchronisation between clients.
+ * The kernel will also forgo any CPU cache flushes prior to rendering from
+ * the object as the client is expected to be also handling such domain
+ * tracking.
+ *
+ * The kernel maintains the implicit tracking in order to manage resources
+ * used by the GPU - this flag only disables the synchronisation prior to
+ * rendering with this object in this execbuf.
+ *
+ * Opting out of implicit synhronisation requires the user to do its own
+ * explicit tracking to avoid rendering corruption. See, for example,
+ * I915_PARAM_HAS_EXEC_FENCE to order execbufs and execute them asynchronously.
+ */
+#define EXEC_OBJECT_ASYNC (1<<6)
+/* Request that the contents of this execobject be copied into the error
+ * state upon a GPU hang involving this batch for post-mortem debugging.
+ * These buffers are recorded in no particular order as "user" in
+ * /sys/class/drm/cardN/error. Query I915_PARAM_HAS_EXEC_CAPTURE to see
+ * if the kernel supports this flag.
+ */
+#define EXEC_OBJECT_CAPTURE (1<<7)
+/* All remaining bits are MBZ and RESERVED FOR FUTURE USE */
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_CAPTURE<<1)
+ __u64 flags;
+
+ union {
+ __u64 rsvd1;
+ __u64 pad_to_size;
+ };
+ __u64 rsvd2;
+};
+
+struct drm_i915_gem_exec_fence {
+ /**
+ * User's handle for a drm_syncobj to wait on or signal.
+ */
+ __u32 handle;
+
+#define I915_EXEC_FENCE_WAIT (1<<0)
+#define I915_EXEC_FENCE_SIGNAL (1<<1)
+#define __I915_EXEC_FENCE_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SIGNAL << 1))
+ __u32 flags;
+};
+
+struct drm_i915_gem_execbuffer2 {
+ /**
+ * List of gem_exec_object2 structs
+ */
+ __u64 buffers_ptr;
+ __u32 buffer_count;
+
+ /** Offset in the batchbuffer to start execution from. */
+ __u32 batch_start_offset;
+ /** Bytes used in batchbuffer from batch_start_offset */
+ __u32 batch_len;
+ __u32 DR1;
+ __u32 DR4;
+ __u32 num_cliprects;
+ /**
+ * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
+ * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a
+ * struct drm_i915_gem_exec_fence *fences.
+ */
+ __u64 cliprects_ptr;
+#define I915_EXEC_RING_MASK (7<<0)
+#define I915_EXEC_DEFAULT (0<<0)
+#define I915_EXEC_RENDER (1<<0)
+#define I915_EXEC_BSD (2<<0)
+#define I915_EXEC_BLT (3<<0)
+#define I915_EXEC_VEBOX (4<<0)
+
+/* Used for switching the constants addressing mode on gen4+ RENDER ring.
+ * Gen6+ only supports relative addressing to dynamic state (default) and
+ * absolute addressing.
+ *
+ * These flags are ignored for the BSD and BLT rings.
+ */
+#define I915_EXEC_CONSTANTS_MASK (3<<6)
+#define I915_EXEC_CONSTANTS_REL_GENERAL (0<<6) /* default */
+#define I915_EXEC_CONSTANTS_ABSOLUTE (1<<6)
+#define I915_EXEC_CONSTANTS_REL_SURFACE (2<<6) /* gen4/5 only */
+ __u64 flags;
+ __u64 rsvd1; /* now used for context info */
+ __u64 rsvd2;
+};
+
+/** Resets the SO write offset registers for transform feedback on gen7. */
+#define I915_EXEC_GEN7_SOL_RESET (1<<8)
+
+/** Request a privileged ("secure") batch buffer. Note only available for
+ * DRM_ROOT_ONLY | DRM_MASTER processes.
+ */
+#define I915_EXEC_SECURE (1<<9)
+
+/** Inform the kernel that the batch is and will always be pinned. This
+ * negates the requirement for a workaround to be performed to avoid
+ * an incoherent CS (such as can be found on 830/845). If this flag is
+ * not passed, the kernel will endeavour to make sure the batch is
+ * coherent with the CS before execution. If this flag is passed,
+ * userspace assumes the responsibility for ensuring the same.
+ */
+#define I915_EXEC_IS_PINNED (1<<10)
+
+/** Provide a hint to the kernel that the command stream and auxiliary
+ * state buffers already holds the correct presumed addresses and so the
+ * relocation process may be skipped if no buffers need to be moved in
+ * preparation for the execbuffer.
+ */
+#define I915_EXEC_NO_RELOC (1<<11)
+
+/** Use the reloc.handle as an index into the exec object array rather
+ * than as the per-file handle.
+ */
+#define I915_EXEC_HANDLE_LUT (1<<12)
+
+/** Used for switching BSD rings on the platforms with two BSD rings */
+#define I915_EXEC_BSD_SHIFT (13)
+#define I915_EXEC_BSD_MASK (3 << I915_EXEC_BSD_SHIFT)
+/* default ping-pong mode */
+#define I915_EXEC_BSD_DEFAULT (0 << I915_EXEC_BSD_SHIFT)
+#define I915_EXEC_BSD_RING1 (1 << I915_EXEC_BSD_SHIFT)
+#define I915_EXEC_BSD_RING2 (2 << I915_EXEC_BSD_SHIFT)
+
+/** Tell the kernel that the batchbuffer is processed by
+ * the resource streamer.
+ */
+#define I915_EXEC_RESOURCE_STREAMER (1<<15)
+
+/* Setting I915_EXEC_FENCE_IN implies that lower_32_bits(rsvd2) represent
+ * a sync_file fd to wait upon (in a nonblocking manner) prior to executing
+ * the batch.
+ *
+ * Returns -EINVAL if the sync_file fd cannot be found.
+ */
+#define I915_EXEC_FENCE_IN (1<<16)
+
+/* Setting I915_EXEC_FENCE_OUT causes the ioctl to return a sync_file fd
+ * in the upper_32_bits(rsvd2) upon success. Ownership of the fd is given
+ * to the caller, and it should be close() after use. (The fd is a regular
+ * file descriptor and will be cleaned up on process termination. It holds
+ * a reference to the request, but nothing else.)
+ *
+ * The sync_file fd can be combined with other sync_file and passed either
+ * to execbuf using I915_EXEC_FENCE_IN, to atomic KMS ioctls (so that a flip
+ * will only occur after this request completes), or to other devices.
+ *
+ * Using I915_EXEC_FENCE_OUT requires use of
+ * DRM_IOCTL_I915_GEM_EXECBUFFER2_WR ioctl so that the result is written
+ * back to userspace. Failure to do so will cause the out-fence to always
+ * be reported as zero, and the real fence fd to be leaked.
+ */
+#define I915_EXEC_FENCE_OUT (1<<17)
+
+/*
+ * Traditionally the execbuf ioctl has only considered the final element in
+ * the execobject[] to be the executable batch. Often though, the client
+ * will known the batch object prior to construction and being able to place
+ * it into the execobject[] array first can simplify the relocation tracking.
+ * Setting I915_EXEC_BATCH_FIRST tells execbuf to use element 0 of the
+ * execobject[] as the * batch instead (the default is to use the last
+ * element).
+ */
+#define I915_EXEC_BATCH_FIRST (1<<18)
+
+/* Setting I915_FENCE_ARRAY implies that num_cliprects and cliprects_ptr
+ * define an array of i915_gem_exec_fence structures which specify a set of
+ * dma fences to wait upon or signal.
+ */
+#define I915_EXEC_FENCE_ARRAY (1<<19)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_ARRAY<<1))
+
+#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
+#define i915_execbuffer2_set_context_id(eb2, context) \
+ (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK
+#define i915_execbuffer2_get_context_id(eb2) \
+ ((eb2).rsvd1 & I915_EXEC_CONTEXT_ID_MASK)
+
+struct drm_i915_gem_pin {
+ /** Handle of the buffer to be pinned. */
+ __u32 handle;
+ __u32 pad;
+
+ /** alignment required within the aperture */
+ __u64 alignment;
+
+ /** Returned GTT offset of the buffer. */
+ __u64 offset;
+};
+
+struct drm_i915_gem_unpin {
+ /** Handle of the buffer to be unpinned. */
+ __u32 handle;
+ __u32 pad;
+};
+
+struct drm_i915_gem_busy {
+ /** Handle of the buffer to check for busy */
+ __u32 handle;
+
+ /** Return busy status
+ *
+ * A return of 0 implies that the object is idle (after
+ * having flushed any pending activity), and a non-zero return that
+ * the object is still in-flight on the GPU. (The GPU has not yet
+ * signaled completion for all pending requests that reference the
+ * object.) An object is guaranteed to become idle eventually (so
+ * long as no new GPU commands are executed upon it). Due to the
+ * asynchronous nature of the hardware, an object reported
+ * as busy may become idle before the ioctl is completed.
+ *
+ * Furthermore, if the object is busy, which engine is busy is only
+ * provided as a guide. There are race conditions which prevent the
+ * report of which engines are busy from being always accurate.
+ * However, the converse is not true. If the object is idle, the
+ * result of the ioctl, that all engines are idle, is accurate.
+ *
+ * The returned dword is split into two fields to indicate both
+ * the engines on which the object is being read, and the
+ * engine on which it is currently being written (if any).
+ *
+ * The low word (bits 0:15) indicate if the object is being written
+ * to by any engine (there can only be one, as the GEM implicit
+ * synchronisation rules force writes to be serialised). Only the
+ * engine for the last write is reported.
+ *
+ * The high word (bits 16:31) are a bitmask of which engines are
+ * currently reading from the object. Multiple engines may be
+ * reading from the object simultaneously.
+ *
+ * The value of each engine is the same as specified in the
+ * EXECBUFFER2 ioctl, i.e. I915_EXEC_RENDER, I915_EXEC_BSD etc.
+ * Note I915_EXEC_DEFAULT is a symbolic value and is mapped to
+ * the I915_EXEC_RENDER engine for execution, and so it is never
+ * reported as active itself. Some hardware may have parallel
+ * execution engines, e.g. multiple media engines, which are
+ * mapped to the same identifier in the EXECBUFFER2 ioctl and
+ * so are not separately reported for busyness.
+ *
+ * Caveat emptor:
+ * Only the boolean result of this query is reliable; that is whether
+ * the object is idle or busy. The report of which engines are busy
+ * should be only used as a heuristic.
+ */
+ __u32 busy;
+};
+
+/**
+ * I915_CACHING_NONE
+ *
+ * GPU access is not coherent with cpu caches. Default for machines without an
+ * LLC.
+ */
+#define I915_CACHING_NONE 0
+/**
+ * I915_CACHING_CACHED
+ *
+ * GPU access is coherent with cpu caches and furthermore the data is cached in
+ * last-level caches shared between cpu cores and the gpu GT. Default on
+ * machines with HAS_LLC.
+ */
+#define I915_CACHING_CACHED 1
+/**
+ * I915_CACHING_DISPLAY
+ *
+ * Special GPU caching mode which is coherent with the scanout engines.
+ * Transparently falls back to I915_CACHING_NONE on platforms where no special
+ * cache mode (like write-through or gfdt flushing) is available. The kernel
+ * automatically sets this mode when using a buffer as a scanout target.
+ * Userspace can manually set this mode to avoid a costly stall and clflush in
+ * the hotpath of drawing the first frame.
+ */
+#define I915_CACHING_DISPLAY 2
+
+struct drm_i915_gem_caching {
+ /**
+ * Handle of the buffer to set/get the caching level of. */
+ __u32 handle;
+
+ /**
+ * Cacheing level to apply or return value
+ *
+ * bits0-15 are for generic caching control (i.e. the above defined
+ * values). bits16-31 are reserved for platform-specific variations
+ * (e.g. l3$ caching on gen7). */
+ __u32 caching;
+};
+
+#define I915_TILING_NONE 0
+#define I915_TILING_X 1
+#define I915_TILING_Y 2
+#define I915_TILING_LAST I915_TILING_Y
+
+#define I915_BIT_6_SWIZZLE_NONE 0
+#define I915_BIT_6_SWIZZLE_9 1
+#define I915_BIT_6_SWIZZLE_9_10 2
+#define I915_BIT_6_SWIZZLE_9_11 3
+#define I915_BIT_6_SWIZZLE_9_10_11 4
+/* Not seen by userland */
+#define I915_BIT_6_SWIZZLE_UNKNOWN 5
+/* Seen by userland. */
+#define I915_BIT_6_SWIZZLE_9_17 6
+#define I915_BIT_6_SWIZZLE_9_10_17 7
+
+struct drm_i915_gem_set_tiling {
+ /** Handle of the buffer to have its tiling state updated */
+ __u32 handle;
+
+ /**
+ * Tiling mode for the object (I915_TILING_NONE, I915_TILING_X,
+ * I915_TILING_Y).
+ *
+ * This value is to be set on request, and will be updated by the
+ * kernel on successful return with the actual chosen tiling layout.
+ *
+ * The tiling mode may be demoted to I915_TILING_NONE when the system
+ * has bit 6 swizzling that can't be managed correctly by GEM.
+ *
+ * Buffer contents become undefined when changing tiling_mode.
+ */
+ __u32 tiling_mode;
+
+ /**
+ * Stride in bytes for the object when in I915_TILING_X or
+ * I915_TILING_Y.
+ */
+ __u32 stride;
+
+ /**
+ * Returned address bit 6 swizzling required for CPU access through
+ * mmap mapping.
+ */
+ __u32 swizzle_mode;
+};
+
+struct drm_i915_gem_get_tiling {
+ /** Handle of the buffer to get tiling state for. */
+ __u32 handle;
+
+ /**
+ * Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X,
+ * I915_TILING_Y).
+ */
+ __u32 tiling_mode;
+
+ /**
+ * Returned address bit 6 swizzling required for CPU access through
+ * mmap mapping.
+ */
+ __u32 swizzle_mode;
+
+ /**
+ * Returned address bit 6 swizzling required for CPU access through
+ * mmap mapping whilst bound.
+ */
+ __u32 phys_swizzle_mode;
+};
+
+struct drm_i915_gem_get_aperture {
+ /** Total size of the aperture used by i915_gem_execbuffer, in bytes */
+ __u64 aper_size;
+
+ /**
+ * Available space in the aperture used by i915_gem_execbuffer, in
+ * bytes
+ */
+ __u64 aper_available_size;
+};
+
+struct drm_i915_get_pipe_from_crtc_id {
+ /** ID of CRTC being requested **/
+ __u32 crtc_id;
+
+ /** pipe of requested CRTC **/
+ __u32 pipe;
+};
+
+#define I915_MADV_WILLNEED 0
+#define I915_MADV_DONTNEED 1
+#define __I915_MADV_PURGED 2 /* internal state */
+
+struct drm_i915_gem_madvise {
+ /** Handle of the buffer to change the backing store advice */
+ __u32 handle;
+
+ /* Advice: either the buffer will be needed again in the near future,
+ * or wont be and could be discarded under memory pressure.
+ */
+ __u32 madv;
+
+ /** Whether the backing store still exists. */
+ __u32 retained;
+};
+
+/* flags */
+#define I915_OVERLAY_TYPE_MASK 0xff
+#define I915_OVERLAY_YUV_PLANAR 0x01
+#define I915_OVERLAY_YUV_PACKED 0x02
+#define I915_OVERLAY_RGB 0x03
+
+#define I915_OVERLAY_DEPTH_MASK 0xff00
+#define I915_OVERLAY_RGB24 0x1000
+#define I915_OVERLAY_RGB16 0x2000
+#define I915_OVERLAY_RGB15 0x3000
+#define I915_OVERLAY_YUV422 0x0100
+#define I915_OVERLAY_YUV411 0x0200
+#define I915_OVERLAY_YUV420 0x0300
+#define I915_OVERLAY_YUV410 0x0400
+
+#define I915_OVERLAY_SWAP_MASK 0xff0000
+#define I915_OVERLAY_NO_SWAP 0x000000
+#define I915_OVERLAY_UV_SWAP 0x010000
+#define I915_OVERLAY_Y_SWAP 0x020000
+#define I915_OVERLAY_Y_AND_UV_SWAP 0x030000
+
+#define I915_OVERLAY_FLAGS_MASK 0xff000000
+#define I915_OVERLAY_ENABLE 0x01000000
+
+struct drm_intel_overlay_put_image {
+ /* various flags and src format description */
+ __u32 flags;
+ /* source picture description */
+ __u32 bo_handle;
+ /* stride values and offsets are in bytes, buffer relative */
+ __u16 stride_Y; /* stride for packed formats */
+ __u16 stride_UV;
+ __u32 offset_Y; /* offset for packet formats */
+ __u32 offset_U;
+ __u32 offset_V;
+ /* in pixels */
+ __u16 src_width;
+ __u16 src_height;
+ /* to compensate the scaling factors for partially covered surfaces */
+ __u16 src_scan_width;
+ __u16 src_scan_height;
+ /* output crtc description */
+ __u32 crtc_id;
+ __u16 dst_x;
+ __u16 dst_y;
+ __u16 dst_width;
+ __u16 dst_height;
+};
+
+/* flags */
+#define I915_OVERLAY_UPDATE_ATTRS (1<<0)
+#define I915_OVERLAY_UPDATE_GAMMA (1<<1)
+#define I915_OVERLAY_DISABLE_DEST_COLORKEY (1<<2)
+struct drm_intel_overlay_attrs {
+ __u32 flags;
+ __u32 color_key;
+ __s32 brightness;
+ __u32 contrast;
+ __u32 saturation;
+ __u32 gamma0;
+ __u32 gamma1;
+ __u32 gamma2;
+ __u32 gamma3;
+ __u32 gamma4;
+ __u32 gamma5;
+};
+
+/*
+ * Intel sprite handling
+ *
+ * Color keying works with a min/mask/max tuple. Both source and destination
+ * color keying is allowed.
+ *
+ * Source keying:
+ * Sprite pixels within the min & max values, masked against the color channels
+ * specified in the mask field, will be transparent. All other pixels will
+ * be displayed on top of the primary plane. For RGB surfaces, only the min
+ * and mask fields will be used; ranged compares are not allowed.
+ *
+ * Destination keying:
+ * Primary plane pixels that match the min value, masked against the color
+ * channels specified in the mask field, will be replaced by corresponding
+ * pixels from the sprite plane.
+ *
+ * Note that source & destination keying are exclusive; only one can be
+ * active on a given plane.
+ */
+
+#define I915_SET_COLORKEY_NONE (1<<0) /* Deprecated. Instead set
+ * flags==0 to disable colorkeying.
+ */
+#define I915_SET_COLORKEY_DESTINATION (1<<1)
+#define I915_SET_COLORKEY_SOURCE (1<<2)
+struct drm_intel_sprite_colorkey {
+ __u32 plane_id;
+ __u32 min_value;
+ __u32 channel_mask;
+ __u32 max_value;
+ __u32 flags;
+};
+
+struct drm_i915_gem_wait {
+ /** Handle of BO we shall wait on */
+ __u32 bo_handle;
+ __u32 flags;
+ /** Number of nanoseconds to wait, Returns time remaining. */
+ __s64 timeout_ns;
+};
+
+struct drm_i915_gem_context_create {
+ /* output: id of new context*/
+ __u32 ctx_id;
+ __u32 pad;
+};
+
+struct drm_i915_gem_context_destroy {
+ __u32 ctx_id;
+ __u32 pad;
+};
+
+struct drm_i915_reg_read {
+ /*
+ * Register offset.
+ * For 64bit wide registers where the upper 32bits don't immediately
+ * follow the lower 32bits, the offset of the lower 32bits must
+ * be specified
+ */
+ __u64 offset;
+#define I915_REG_READ_8B_WA (1ul << 0)
+
+ __u64 val; /* Return value */
+};
+/* Known registers:
+ *
+ * Render engine timestamp - 0x2358 + 64bit - gen7+
+ * - Note this register returns an invalid value if using the default
+ * single instruction 8byte read, in order to workaround that pass
+ * flag I915_REG_READ_8B_WA in offset field.
+ *
+ */
+
+struct drm_i915_reset_stats {
+ __u32 ctx_id;
+ __u32 flags;
+
+ /* All resets since boot/module reload, for all contexts */
+ __u32 reset_count;
+
+ /* Number of batches lost when active in GPU, for this context */
+ __u32 batch_active;
+
+ /* Number of batches lost pending for execution, for this context */
+ __u32 batch_pending;
+
+ __u32 pad;
+};
+
+struct drm_i915_gem_userptr {
+ __u64 user_ptr;
+ __u64 user_size;
+ __u32 flags;
+#define I915_USERPTR_READ_ONLY 0x1
+#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
+ /**
+ * Returned handle for the object.
+ *
+ * Object handles are nonzero.
+ */
+ __u32 handle;
+};
+
+struct drm_i915_gem_context_param {
+ __u32 ctx_id;
+ __u32 size;
+ __u64 param;
+#define I915_CONTEXT_PARAM_BAN_PERIOD 0x1
+#define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2
+#define I915_CONTEXT_PARAM_GTT_SIZE 0x3
+#define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4
+#define I915_CONTEXT_PARAM_BANNABLE 0x5
+#define I915_CONTEXT_PARAM_PRIORITY 0x6
+#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
+#define I915_CONTEXT_DEFAULT_PRIORITY 0
+#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
+ __u64 value;
+};
+
+enum drm_i915_oa_format {
+ I915_OA_FORMAT_A13 = 1, /* HSW only */
+ I915_OA_FORMAT_A29, /* HSW only */
+ I915_OA_FORMAT_A13_B8_C8, /* HSW only */
+ I915_OA_FORMAT_B4_C8, /* HSW only */
+ I915_OA_FORMAT_A45_B8_C8, /* HSW only */
+ I915_OA_FORMAT_B4_C8_A16, /* HSW only */
+ I915_OA_FORMAT_C4_B8, /* HSW+ */
+
+ /* Gen8+ */
+ I915_OA_FORMAT_A12,
+ I915_OA_FORMAT_A12_B8_C8,
+ I915_OA_FORMAT_A32u40_A4u32_B8_C8,
+
+ I915_OA_FORMAT_MAX /* non-ABI */
+};
+
+enum drm_i915_perf_property_id {
+ /**
+ * Open the stream for a specific context handle (as used with
+ * execbuffer2). A stream opened for a specific context this way
+ * won't typically require root privileges.
+ */
+ DRM_I915_PERF_PROP_CTX_HANDLE = 1,
+
+ /**
+ * A value of 1 requests the inclusion of raw OA unit reports as
+ * part of stream samples.
+ */
+ DRM_I915_PERF_PROP_SAMPLE_OA,
+
+ /**
+ * The value specifies which set of OA unit metrics should be
+ * be configured, defining the contents of any OA unit reports.
+ */
+ DRM_I915_PERF_PROP_OA_METRICS_SET,
+
+ /**
+ * The value specifies the size and layout of OA unit reports.
+ */
+ DRM_I915_PERF_PROP_OA_FORMAT,
+
+ /**
+ * Specifying this property implicitly requests periodic OA unit
+ * sampling and (at least on Haswell) the sampling frequency is derived
+ * from this exponent as follows:
+ *
+ * 80ns * 2^(period_exponent + 1)
+ */
+ DRM_I915_PERF_PROP_OA_EXPONENT,
+
+ DRM_I915_PERF_PROP_MAX /* non-ABI */
+};
+
+struct drm_i915_perf_open_param {
+ __u32 flags;
+#define I915_PERF_FLAG_FD_CLOEXEC (1<<0)
+#define I915_PERF_FLAG_FD_NONBLOCK (1<<1)
+#define I915_PERF_FLAG_DISABLED (1<<2)
+
+ /** The number of u64 (id, value) pairs */
+ __u32 num_properties;
+
+ /**
+ * Pointer to array of u64 (id, value) pairs configuring the stream
+ * to open.
+ */
+ __u64 properties_ptr;
+};
+
+/**
+ * Enable data capture for a stream that was either opened in a disabled state
+ * via I915_PERF_FLAG_DISABLED or was later disabled via
+ * I915_PERF_IOCTL_DISABLE.
+ *
+ * It is intended to be cheaper to disable and enable a stream than it may be
+ * to close and re-open a stream with the same configuration.
+ *
+ * It's undefined whether any pending data for the stream will be lost.
+ */
+#define I915_PERF_IOCTL_ENABLE _IO('i', 0x0)
+
+/**
+ * Disable data capture for a stream.
+ *
+ * It is an error to try and read a stream that is disabled.
+ */
+#define I915_PERF_IOCTL_DISABLE _IO('i', 0x1)
+
+/**
+ * Common to all i915 perf records
+ */
+struct drm_i915_perf_record_header {
+ __u32 type;
+ __u16 pad;
+ __u16 size;
+};
+
+enum drm_i915_perf_record_type {
+
+ /**
+ * Samples are the work horse record type whose contents are extensible
+ * and defined when opening an i915 perf stream based on the given
+ * properties.
+ *
+ * Boolean properties following the naming convention
+ * DRM_I915_PERF_SAMPLE_xyz_PROP request the inclusion of 'xyz' data in
+ * every sample.
+ *
+ * The order of these sample properties given by userspace has no
+ * affect on the ordering of data within a sample. The order is
+ * documented here.
+ *
+ * struct {
+ * struct drm_i915_perf_record_header header;
+ *
+ * { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
+ * };
+ */
+ DRM_I915_PERF_RECORD_SAMPLE = 1,
+
+ /*
+ * Indicates that one or more OA reports were not written by the
+ * hardware. This can happen for example if an MI_REPORT_PERF_COUNT
+ * command collides with periodic sampling - which would be more likely
+ * at higher sampling frequencies.
+ */
+ DRM_I915_PERF_RECORD_OA_REPORT_LOST = 2,
+
+ /**
+ * An error occurred that resulted in all pending OA reports being lost.
+ */
+ DRM_I915_PERF_RECORD_OA_BUFFER_LOST = 3,
+
+ DRM_I915_PERF_RECORD_MAX /* non-ABI */
+};
+
+/**
+ * Structure to upload perf dynamic configuration into the kernel.
+ */
+struct drm_i915_perf_oa_config {
+ /** String formatted like "%08x-%04x-%04x-%04x-%012x" */
+ char uuid[36];
+
+ __u32 n_mux_regs;
+ __u32 n_boolean_regs;
+ __u32 n_flex_regs;
+
+ /*
+ * These fields are pointers to tuples of u32 values (register address,
+ * value). For example the expected length of the buffer pointed by
+ * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
+ */
+ __u64 mux_regs_ptr;
+ __u64 boolean_regs_ptr;
+ __u64 flex_regs_ptr;
+};
+
+struct drm_i915_query_item {
+ __u64 query_id;
+#define DRM_I915_QUERY_TOPOLOGY_INFO 1
+
+ /*
+ * When set to zero by userspace, this is filled with the size of the
+ * data to be written at the data_ptr pointer. The kernel sets this
+ * value to a negative value to signal an error on a particular query
+ * item.
+ */
+ __s32 length;
+
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 flags;
+
+ /*
+ * Data will be written at the location pointed by data_ptr when the
+ * value of length matches the length of the data to be written by the
+ * kernel.
+ */
+ __u64 data_ptr;
+};
+
+struct drm_i915_query {
+ __u32 num_items;
+
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u32 flags;
+
+ /*
+ * This points to an array of num_items drm_i915_query_item structures.
+ */
+ __u64 items_ptr;
+};
+
+/*
+ * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO :
+ *
+ * data: contains the 3 pieces of information :
+ *
+ * - the slice mask with one bit per slice telling whether a slice is
+ * available. The availability of slice X can be queried with the following
+ * formula :
+ *
+ * (data[X / 8] >> (X % 8)) & 1
+ *
+ * - the subslice mask for each slice with one bit per subslice telling
+ * whether a subslice is available. The availability of subslice Y in slice
+ * X can be queried with the following formula :
+ *
+ * (data[subslice_offset +
+ * X * subslice_stride +
+ * Y / 8] >> (Y % 8)) & 1
+ *
+ * - the EU mask for each subslice in each slice with one bit per EU telling
+ * whether an EU is available. The availability of EU Z in subslice Y in
+ * slice X can be queried with the following formula :
+ *
+ * (data[eu_offset +
+ * (X * max_subslices + Y) * eu_stride +
+ * Z / 8] >> (Z % 8)) & 1
+ */
+struct drm_i915_query_topology_info {
+ /*
+ * Unused for now. Must be cleared to zero.
+ */
+ __u16 flags;
+
+ __u16 max_slices;
+ __u16 max_subslices;
+ __u16 max_eus_per_subslice;
+
+ /*
+ * Offset in data[] at which the subslice masks are stored.
+ */
+ __u16 subslice_offset;
+
+ /*
+ * Stride at which each of the subslice masks for each slice are
+ * stored.
+ */
+ __u16 subslice_stride;
+
+ /*
+ * Offset in data[] at which the EU masks are stored.
+ */
+ __u16 eu_offset;
+
+ /*
+ * Stride at which each of the EU masks for each subslice are stored.
+ */
+ __u16 eu_stride;
+
+ __u8 data[];
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _UAPI_I915_DRM_H_ */
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
new file mode 100644
index 000000000..0c30ab898
--- /dev/null
+++ b/tools/include/uapi/linux/bpf.h
@@ -0,0 +1,2796 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_H__
+#define _UAPI__LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64 0x07 /* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW 0x18 /* double word (64-bit) */
+#define BPF_XADD 0xc0 /* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV 0xb0 /* mov reg to reg */
+#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END 0xd0 /* flags for endianness conversion: */
+#define BPF_TO_LE 0x00 /* convert to little-endian */
+#define BPF_TO_BE 0x08 /* convert to big-endian */
+#define BPF_FROM_LE BPF_TO_LE
+#define BPF_FROM_BE BPF_TO_BE
+
+/* jmp encodings */
+#define BPF_JNE 0x50 /* jump != */
+#define BPF_JLT 0xa0 /* LT is unsigned, '<' */
+#define BPF_JLE 0xb0 /* LE is unsigned, '<=' */
+#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
+#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
+#define BPF_JSLT 0xc0 /* SLT is signed, '<' */
+#define BPF_JSLE 0xd0 /* SLE is signed, '<=' */
+#define BPF_CALL 0x80 /* function call */
+#define BPF_EXIT 0x90 /* function return */
+
+/* Register numbers */
+enum {
+ BPF_REG_0 = 0,
+ BPF_REG_1,
+ BPF_REG_2,
+ BPF_REG_3,
+ BPF_REG_4,
+ BPF_REG_5,
+ BPF_REG_6,
+ BPF_REG_7,
+ BPF_REG_8,
+ BPF_REG_9,
+ BPF_REG_10,
+ __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG __MAX_BPF_REG
+
+struct bpf_insn {
+ __u8 code; /* opcode */
+ __u8 dst_reg:4; /* dest register */
+ __u8 src_reg:4; /* source register */
+ __s16 off; /* signed offset */
+ __s32 imm; /* signed immediate constant */
+};
+
+/* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
+struct bpf_lpm_trie_key {
+ __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */
+ __u8 data[0]; /* Arbitrary size */
+};
+
+struct bpf_cgroup_storage_key {
+ __u64 cgroup_inode_id; /* cgroup inode id */
+ __u32 attach_type; /* program attach type */
+};
+
+/* BPF syscall commands, see bpf(2) man-page for details. */
+enum bpf_cmd {
+ BPF_MAP_CREATE,
+ BPF_MAP_LOOKUP_ELEM,
+ BPF_MAP_UPDATE_ELEM,
+ BPF_MAP_DELETE_ELEM,
+ BPF_MAP_GET_NEXT_KEY,
+ BPF_PROG_LOAD,
+ BPF_OBJ_PIN,
+ BPF_OBJ_GET,
+ BPF_PROG_ATTACH,
+ BPF_PROG_DETACH,
+ BPF_PROG_TEST_RUN,
+ BPF_PROG_GET_NEXT_ID,
+ BPF_MAP_GET_NEXT_ID,
+ BPF_PROG_GET_FD_BY_ID,
+ BPF_MAP_GET_FD_BY_ID,
+ BPF_OBJ_GET_INFO_BY_FD,
+ BPF_PROG_QUERY,
+ BPF_RAW_TRACEPOINT_OPEN,
+ BPF_BTF_LOAD,
+ BPF_BTF_GET_FD_BY_ID,
+ BPF_TASK_FD_QUERY,
+};
+
+enum bpf_map_type {
+ BPF_MAP_TYPE_UNSPEC,
+ BPF_MAP_TYPE_HASH,
+ BPF_MAP_TYPE_ARRAY,
+ BPF_MAP_TYPE_PROG_ARRAY,
+ BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ BPF_MAP_TYPE_PERCPU_HASH,
+ BPF_MAP_TYPE_PERCPU_ARRAY,
+ BPF_MAP_TYPE_STACK_TRACE,
+ BPF_MAP_TYPE_CGROUP_ARRAY,
+ BPF_MAP_TYPE_LRU_HASH,
+ BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ BPF_MAP_TYPE_LPM_TRIE,
+ BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ BPF_MAP_TYPE_HASH_OF_MAPS,
+ BPF_MAP_TYPE_DEVMAP,
+ BPF_MAP_TYPE_SOCKMAP,
+ BPF_MAP_TYPE_CPUMAP,
+ BPF_MAP_TYPE_XSKMAP,
+ BPF_MAP_TYPE_SOCKHASH,
+ BPF_MAP_TYPE_CGROUP_STORAGE,
+ BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+};
+
+enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ BPF_PROG_TYPE_SCHED_CLS,
+ BPF_PROG_TYPE_SCHED_ACT,
+ BPF_PROG_TYPE_TRACEPOINT,
+ BPF_PROG_TYPE_XDP,
+ BPF_PROG_TYPE_PERF_EVENT,
+ BPF_PROG_TYPE_CGROUP_SKB,
+ BPF_PROG_TYPE_CGROUP_SOCK,
+ BPF_PROG_TYPE_LWT_IN,
+ BPF_PROG_TYPE_LWT_OUT,
+ BPF_PROG_TYPE_LWT_XMIT,
+ BPF_PROG_TYPE_SOCK_OPS,
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_CGROUP_DEVICE,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_RAW_TRACEPOINT,
+ BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_PROG_TYPE_LWT_SEG6LOCAL,
+ BPF_PROG_TYPE_LIRC_MODE2,
+ BPF_PROG_TYPE_SK_REUSEPORT,
+};
+
+enum bpf_attach_type {
+ BPF_CGROUP_INET_INGRESS,
+ BPF_CGROUP_INET_EGRESS,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_SOCK_OPS,
+ BPF_SK_SKB_STREAM_PARSER,
+ BPF_SK_SKB_STREAM_VERDICT,
+ BPF_CGROUP_DEVICE,
+ BPF_SK_MSG_VERDICT,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_LIRC_MODE2,
+ __MAX_BPF_ATTACH_TYPE
+};
+
+#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+
+/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
+ *
+ * NONE(default): No further bpf programs allowed in the subtree.
+ *
+ * BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program,
+ * the program in this cgroup yields to sub-cgroup program.
+ *
+ * BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program,
+ * that cgroup program gets run in addition to the program in this cgroup.
+ *
+ * Only one program is allowed to be attached to a cgroup with
+ * NONE or BPF_F_ALLOW_OVERRIDE flag.
+ * Attaching another program on top of NONE or BPF_F_ALLOW_OVERRIDE will
+ * release old program and attach the new one. Attach flags has to match.
+ *
+ * Multiple programs are allowed to be attached to a cgroup with
+ * BPF_F_ALLOW_MULTI flag. They are executed in FIFO order
+ * (those that were attached first, run first)
+ * The programs of sub-cgroup are executed first, then programs of
+ * this cgroup and then programs of parent cgroup.
+ * When children program makes decision (like picking TCP CA or sock bind)
+ * parent program has a chance to override it.
+ *
+ * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups.
+ * A cgroup with NONE doesn't allow any programs in sub-cgroups.
+ * Ex1:
+ * cgrp1 (MULTI progs A, B) ->
+ * cgrp2 (OVERRIDE prog C) ->
+ * cgrp3 (MULTI prog D) ->
+ * cgrp4 (OVERRIDE prog E) ->
+ * cgrp5 (NONE prog F)
+ * the event in cgrp5 triggers execution of F,D,A,B in that order.
+ * if prog F is detached, the execution is E,D,A,B
+ * if prog F and D are detached, the execution is E,A,B
+ * if prog F, E and D are detached, the execution is C,A,B
+ *
+ * All eligible programs are executed regardless of return code from
+ * earlier programs.
+ */
+#define BPF_F_ALLOW_OVERRIDE (1U << 0)
+#define BPF_F_ALLOW_MULTI (1U << 1)
+
+/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the
+ * verifier will perform strict alignment checking as if the kernel
+ * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set,
+ * and NET_IP_ALIGN defined to 2.
+ */
+#define BPF_F_STRICT_ALIGNMENT (1U << 0)
+
+/* If BPF_F_ANY_ALIGNMENT is used in BPF_PROF_LOAD command, the
+ * verifier will allow any alignment whatsoever. On platforms
+ * with strict alignment requirements for loads ands stores (such
+ * as sparc and mips) the verifier validates that all loads and
+ * stores provably follow this requirement. This flag turns that
+ * checking and enforcement off.
+ *
+ * It is mostly used for testing when we want to validate the
+ * context and memory access aspects of the verifier, but because
+ * of an unaligned access the alignment check would trigger before
+ * the one we are interested in.
+ */
+#define BPF_F_ANY_ALIGNMENT (1U << 1)
+
+/* when bpf_ldimm64->src_reg == BPF_PSEUDO_MAP_FD, bpf_ldimm64->imm == fd */
+#define BPF_PSEUDO_MAP_FD 1
+
+/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
+ * offset to another bpf function
+ */
+#define BPF_PSEUDO_CALL 1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY 0 /* create new element or update existing */
+#define BPF_NOEXIST 1 /* create new element if it didn't exist */
+#define BPF_EXIST 2 /* update existing element */
+
+/* flags for BPF_MAP_CREATE command */
+#define BPF_F_NO_PREALLOC (1U << 0)
+/* Instead of having one common LRU list in the
+ * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
+ * which can scale and perform better.
+ * Note, the LRU nodes (including free nodes) cannot be moved
+ * across different LRU lists.
+ */
+#define BPF_F_NO_COMMON_LRU (1U << 1)
+/* Specify numa node during map creation */
+#define BPF_F_NUMA_NODE (1U << 2)
+
+/* flags for BPF_PROG_QUERY */
+#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+
+#define BPF_OBJ_NAME_LEN 16U
+
+/* Flags for accessing BPF object */
+#define BPF_F_RDONLY (1U << 3)
+#define BPF_F_WRONLY (1U << 4)
+
+/* Flag for stack_map, store build_id+offset instead of pointer */
+#define BPF_F_STACK_BUILD_ID (1U << 5)
+
+enum bpf_stack_build_id_status {
+ /* user space need an empty entry to identify end of a trace */
+ BPF_STACK_BUILD_ID_EMPTY = 0,
+ /* with valid build_id and offset */
+ BPF_STACK_BUILD_ID_VALID = 1,
+ /* couldn't get build_id, fallback to ip */
+ BPF_STACK_BUILD_ID_IP = 2,
+};
+
+#define BPF_BUILD_ID_SIZE 20
+struct bpf_stack_build_id {
+ __s32 status;
+ unsigned char build_id[BPF_BUILD_ID_SIZE];
+ union {
+ __u64 offset;
+ __u64 ip;
+ };
+};
+
+union bpf_attr {
+ struct { /* anonymous struct used by BPF_MAP_CREATE command */
+ __u32 map_type; /* one of enum bpf_map_type */
+ __u32 key_size; /* size of key in bytes */
+ __u32 value_size; /* size of value in bytes */
+ __u32 max_entries; /* max number of entries in a map */
+ __u32 map_flags; /* BPF_MAP_CREATE related
+ * flags defined above.
+ */
+ __u32 inner_map_fd; /* fd pointing to the inner map */
+ __u32 numa_node; /* numa node (effective only if
+ * BPF_F_NUMA_NODE is set).
+ */
+ char map_name[BPF_OBJ_NAME_LEN];
+ __u32 map_ifindex; /* ifindex of netdev to create on */
+ __u32 btf_fd; /* fd pointing to a BTF type data */
+ __u32 btf_key_type_id; /* BTF type_id of the key */
+ __u32 btf_value_type_id; /* BTF type_id of the value */
+ };
+
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ __u32 map_fd;
+ __aligned_u64 key;
+ union {
+ __aligned_u64 value;
+ __aligned_u64 next_key;
+ };
+ __u64 flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_LOAD command */
+ __u32 prog_type; /* one of enum bpf_prog_type */
+ __u32 insn_cnt;
+ __aligned_u64 insns;
+ __aligned_u64 license;
+ __u32 log_level; /* verbosity level of verifier */
+ __u32 log_size; /* size of user buffer */
+ __aligned_u64 log_buf; /* user supplied buffer */
+ __u32 kern_version; /* checked when prog_type=kprobe */
+ __u32 prog_flags;
+ char prog_name[BPF_OBJ_NAME_LEN];
+ __u32 prog_ifindex; /* ifindex of netdev to prep for */
+ /* For some prog types expected attach type must be known at
+ * load time to verify attach type specific parts of prog
+ * (context accesses, allowed helpers, etc).
+ */
+ __u32 expected_attach_type;
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_* commands */
+ __aligned_u64 pathname;
+ __u32 bpf_fd;
+ __u32 file_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_ATTACH/DETACH commands */
+ __u32 target_fd; /* container object to attach to */
+ __u32 attach_bpf_fd; /* eBPF program to attach */
+ __u32 attach_type;
+ __u32 attach_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */
+ __u32 prog_fd;
+ __u32 retval;
+ __u32 data_size_in;
+ __u32 data_size_out;
+ __aligned_u64 data_in;
+ __aligned_u64 data_out;
+ __u32 repeat;
+ __u32 duration;
+ } test;
+
+ struct { /* anonymous struct used by BPF_*_GET_*_ID */
+ union {
+ __u32 start_id;
+ __u32 prog_id;
+ __u32 map_id;
+ __u32 btf_id;
+ };
+ __u32 next_id;
+ __u32 open_flags;
+ };
+
+ struct { /* anonymous struct used by BPF_OBJ_GET_INFO_BY_FD */
+ __u32 bpf_fd;
+ __u32 info_len;
+ __aligned_u64 info;
+ } info;
+
+ struct { /* anonymous struct used by BPF_PROG_QUERY command */
+ __u32 target_fd; /* container object to query */
+ __u32 attach_type;
+ __u32 query_flags;
+ __u32 attach_flags;
+ __aligned_u64 prog_ids;
+ __u32 prog_cnt;
+ } query;
+
+ struct {
+ __u64 name;
+ __u32 prog_fd;
+ } raw_tracepoint;
+
+ struct { /* anonymous struct for BPF_BTF_LOAD */
+ __aligned_u64 btf;
+ __aligned_u64 btf_log_buf;
+ __u32 btf_size;
+ __u32 btf_log_size;
+ __u32 btf_log_level;
+ };
+
+ struct {
+ __u32 pid; /* input: pid */
+ __u32 fd; /* input: fd */
+ __u32 flags; /* input: flags */
+ __u32 buf_len; /* input/output: buf len */
+ __aligned_u64 buf; /* input/output:
+ * tp_name for tracepoint
+ * symbol for kprobe
+ * filename for uprobe
+ */
+ __u32 prog_id; /* output: prod_id */
+ __u32 fd_type; /* output: BPF_FD_TYPE_* */
+ __u64 probe_offset; /* output: probe_offset */
+ __u64 probe_addr; /* output: probe_addr */
+ } task_fd_query;
+} __attribute__((aligned(8)));
+
+/* The description below is an attempt at providing documentation to eBPF
+ * developers about the multiple available eBPF helper functions. It can be
+ * parsed and used to produce a manual page. The workflow is the following,
+ * and requires the rst2man utility:
+ *
+ * $ ./scripts/bpf_helpers_doc.py \
+ * --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
+ * $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
+ * $ man /tmp/bpf-helpers.7
+ *
+ * Note that in order to produce this external documentation, some RST
+ * formatting is used in the descriptions to get "bold" and "italics" in
+ * manual pages. Also note that the few trailing white spaces are
+ * intentional, removing them would break paragraphs for rst2man.
+ *
+ * Start of BPF helper function descriptions:
+ *
+ * void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Perform a lookup in *map* for an entry associated to *key*.
+ * Return
+ * Map value associated to *key*, or **NULL** if no entry was
+ * found.
+ *
+ * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * Description
+ * Add or update the value of the entry associated to *key* in
+ * *map* with *value*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * Flag value **BPF_NOEXIST** cannot be used for maps of types
+ * **BPF_MAP_TYPE_ARRAY** or **BPF_MAP_TYPE_PERCPU_ARRAY** (all
+ * elements always exist), the helper would return an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * Description
+ * Delete entry with *key* from *map*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read(void *dst, u32 size, const void *src)
+ * Description
+ * For tracing programs, safely attempt to read *size* bytes from
+ * address *src* and store the data in *dst*.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_ktime_get_ns(void)
+ * Description
+ * Return the time elapsed since system boot, in nanoseconds.
+ * Return
+ * Current *ktime*.
+ *
+ * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * Description
+ * This helper is a "printk()-like" facility for debugging. It
+ * prints a message defined by format *fmt* (of size *fmt_size*)
+ * to file *\/sys/kernel/debug/tracing/trace* from DebugFS, if
+ * available. It can take up to three additional **u64**
+ * arguments (as an eBPF helpers, the total number of arguments is
+ * limited to five).
+ *
+ * Each time the helper is called, it appends a line to the trace.
+ * The format of the trace is customizable, and the exact output
+ * one will get depends on the options set in
+ * *\/sys/kernel/debug/tracing/trace_options* (see also the
+ * *README* file under the same directory). However, it usually
+ * defaults to something like:
+ *
+ * ::
+ *
+ * telnet-470 [001] .N.. 419421.045894: 0x00000001: <formatted msg>
+ *
+ * In the above:
+ *
+ * * ``telnet`` is the name of the current task.
+ * * ``470`` is the PID of the current task.
+ * * ``001`` is the CPU number on which the task is
+ * running.
+ * * In ``.N..``, each character refers to a set of
+ * options (whether irqs are enabled, scheduling
+ * options, whether hard/softirqs are running, level of
+ * preempt_disabled respectively). **N** means that
+ * **TIF_NEED_RESCHED** and **PREEMPT_NEED_RESCHED**
+ * are set.
+ * * ``419421.045894`` is a timestamp.
+ * * ``0x00000001`` is a fake value used by BPF for the
+ * instruction pointer register.
+ * * ``<formatted msg>`` is the message formatted with
+ * *fmt*.
+ *
+ * The conversion specifiers supported by *fmt* are similar, but
+ * more limited than for printk(). They are **%d**, **%i**,
+ * **%u**, **%x**, **%ld**, **%li**, **%lu**, **%lx**, **%lld**,
+ * **%lli**, **%llu**, **%llx**, **%p**, **%s**. No modifier (size
+ * of field, padding with zeroes, etc.) is available, and the
+ * helper will return **-EINVAL** (but print nothing) if it
+ * encounters an unknown specifier.
+ *
+ * Also, note that **bpf_trace_printk**\ () is slow, and should
+ * only be used for debugging purposes. For this reason, a notice
+ * bloc (spanning several lines) is printed to kernel logs and
+ * states that the helper should not be used "for production use"
+ * the first time this helper is used (or more precisely, when
+ * **trace_printk**\ () buffers are allocated). For passing values
+ * to user space, perf events should be preferred.
+ * Return
+ * The number of bytes written to the buffer, or a negative error
+ * in case of failure.
+ *
+ * u32 bpf_get_prandom_u32(void)
+ * Description
+ * Get a pseudo-random number.
+ *
+ * From a security point of view, this helper uses its own
+ * pseudo-random internal state, and cannot be used to infer the
+ * seed of other random functions in the kernel. However, it is
+ * essential to note that the generator used by the helper is not
+ * cryptographically secure.
+ * Return
+ * A random 32-bit unsigned value.
+ *
+ * u32 bpf_get_smp_processor_id(void)
+ * Description
+ * Get the SMP (symmetric multiprocessing) processor id. Note that
+ * all programs run with preemption disabled, which means that the
+ * SMP processor id is stable during all the execution of the
+ * program.
+ * Return
+ * The SMP id of the processor running the program.
+ *
+ * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. *flags* are a combination of
+ * **BPF_F_RECOMPUTE_CSUM** (automatically recompute the
+ * checksum for the packet after storing the bytes) and
+ * **BPF_F_INVALIDATE_HASH** (set *skb*\ **->hash**, *skb*\
+ * **->swhash** and *skb*\ **->l4hash** to 0).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * Description
+ * Recompute the layer 3 (e.g. IP) checksum for the packet
+ * associated to *skb*. Computation is incremental, so the helper
+ * must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored in *size*.
+ * Alternatively, it is possible to store the difference between
+ * the previous and the new values of the header field in *to*, by
+ * setting *from* and *size* to 0. For both methods, *offset*
+ * indicates the location of the IP checksum within the packet.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * Description
+ * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
+ * packet associated to *skb*. Computation is incremental, so the
+ * helper must know the former value of the header field that was
+ * modified (*from*), the new value of this field (*to*), and the
+ * number of bytes (2 or 4) for this field, stored on the lowest
+ * four bits of *flags*. Alternatively, it is possible to store
+ * the difference between the previous and the new values of the
+ * header field in *to*, by setting *from* and the four lowest
+ * bits of *flags* to 0. For both methods, *offset* indicates the
+ * location of the IP checksum within the packet. In addition to
+ * the size of the field, *flags* can be added (bitwise OR) actual
+ * flags. With **BPF_F_MARK_MANGLED_0**, a null checksum is left
+ * untouched (unless **BPF_F_MARK_ENFORCE** is added as well), and
+ * for updates resulting in a null checksum the value is set to
+ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * the checksum is to be computed against a pseudo-header.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+ * flexibility and can handle sizes larger than 2 or 4 for the
+ * checksum to update.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * Description
+ * This special helper is used to trigger a "tail call", or in
+ * other words, to jump into another eBPF program. The same stack
+ * frame is used (but values on stack and in registers for the
+ * caller are not accessible to the callee). This mechanism allows
+ * for program chaining, either for raising the maximum number of
+ * available eBPF instructions, or to execute given programs in
+ * conditional blocks. For security reasons, there is an upper
+ * limit to the number of successive tail calls that can be
+ * performed.
+ *
+ * Upon call of this helper, the program attempts to jump into a
+ * program referenced at index *index* in *prog_array_map*, a
+ * special map of type **BPF_MAP_TYPE_PROG_ARRAY**, and passes
+ * *ctx*, a pointer to the context.
+ *
+ * If the call succeeds, the kernel immediately runs the first
+ * instruction of the new program. This is not a function call,
+ * and it never returns to the previous program. If the call
+ * fails, then the helper has no effect, and the caller continues
+ * to run its subsequent instructions. A call can fail if the
+ * destination program for the jump does not exist (i.e. *index*
+ * is superior to the number of entries in *prog_array_map*), or
+ * if the maximum number of tail calls has been reached for this
+ * chain of programs. This limit is defined in the kernel by the
+ * macro **MAX_TAIL_CALL_CNT** (not accessible to user space),
+ * which is currently set to 32.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * Description
+ * Clone and redirect the packet associated to *skb* to another
+ * net device of index *ifindex*. Both ingress and egress
+ * interfaces can be used for redirection. The **BPF_F_INGRESS**
+ * value in *flags* is used to make the distinction (ingress path
+ * is selected if the flag is present, egress path otherwise).
+ * This is the only flag supported for now.
+ *
+ * In comparison with **bpf_redirect**\ () helper,
+ * **bpf_clone_redirect**\ () has the associated cost of
+ * duplicating the packet buffer, but this can be executed out of
+ * the eBPF program. Conversely, **bpf_redirect**\ () is more
+ * efficient, but it is handled through an action code where the
+ * redirection happens only after the eBPF program has returned.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_get_current_pid_tgid(void)
+ * Return
+ * A 64-bit integer containing the current tgid and pid, and
+ * created as such:
+ * *current_task*\ **->tgid << 32 \|**
+ * *current_task*\ **->pid**.
+ *
+ * u64 bpf_get_current_uid_gid(void)
+ * Return
+ * A 64-bit integer containing the current GID and UID, and
+ * created as such: *current_gid* **<< 32 \|** *current_uid*.
+ *
+ * int bpf_get_current_comm(char *buf, u32 size_of_buf)
+ * Description
+ * Copy the **comm** attribute of the current task into *buf* of
+ * *size_of_buf*. The **comm** attribute contains the name of
+ * the executable (excluding the path) for the current task. The
+ * *size_of_buf* must be strictly positive. On success, the
+ * helper makes sure that the *buf* is NUL-terminated. On failure,
+ * it is filled with zeroes.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u32 bpf_get_cgroup_classid(struct sk_buff *skb)
+ * Description
+ * Retrieve the classid for the current task, i.e. for the net_cls
+ * cgroup to which *skb* belongs.
+ *
+ * This helper can be used on TC egress path, but not on ingress.
+ *
+ * The net_cls cgroup provides an interface to tag network packets
+ * based on a user-provided identifier for all traffic coming from
+ * the tasks belonging to the related cgroup. See also the related
+ * kernel documentation, available from the Linux sources in file
+ * *Documentation/cgroup-v1/net_cls.txt*.
+ *
+ * The Linux kernel has two versions for cgroups: there are
+ * cgroups v1 and cgroups v2. Both are available to users, who can
+ * use a mixture of them, but note that the net_cls cgroup is for
+ * cgroup v1 only. This makes it incompatible with BPF programs
+ * run on cgroups, which is a cgroup-v2-only feature (a socket can
+ * only hold data for one version of cgroups at a time).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_CGROUP_NET_CLASSID** configuration option set to
+ * "**y**" or to "**m**".
+ * Return
+ * The classid, or 0 for the default unconfigured classid.
+ *
+ * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * Description
+ * Push a *vlan_tci* (VLAN tag control information) of protocol
+ * *vlan_proto* to the packet associated to *skb*, then update
+ * the checksum. Note that if *vlan_proto* is different from
+ * **ETH_P_8021Q** and **ETH_P_8021AD**, it is considered to
+ * be **ETH_P_8021Q**.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * Description
+ * Pop a VLAN header from the packet associated to *skb*.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Get tunnel metadata. This helper takes a pointer *key* to an
+ * empty **struct bpf_tunnel_key** of **size**, that will be
+ * filled with tunnel metadata for the packet associated to *skb*.
+ * The *flags* can be set to **BPF_F_TUNINFO_IPV6**, which
+ * indicates that the tunnel is based on IPv6 protocol instead of
+ * IPv4.
+ *
+ * The **struct bpf_tunnel_key** is an object that generalizes the
+ * principal parameters used by various tunneling protocols into a
+ * single struct. This way, it can be used to easily make a
+ * decision based on the contents of the encapsulation header,
+ * "summarized" in this struct. In particular, it holds the IP
+ * address of the remote end (IPv4 or IPv6, depending on the case)
+ * in *key*\ **->remote_ipv4** or *key*\ **->remote_ipv6**. Also,
+ * this struct exposes the *key*\ **->tunnel_id**, which is
+ * generally mapped to a VNI (Virtual Network Identifier), making
+ * it programmable together with the **bpf_skb_set_tunnel_key**\
+ * () helper.
+ *
+ * Let's imagine that the following code is part of a program
+ * attached to the TC ingress interface, on one end of a GRE
+ * tunnel, and is supposed to filter out all messages coming from
+ * remote ends with IPv4 address other than 10.0.0.1:
+ *
+ * ::
+ *
+ * int ret;
+ * struct bpf_tunnel_key key = {};
+ *
+ * ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ * if (ret < 0)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * if (key.remote_ipv4 != 0x0a000001)
+ * return TC_ACT_SHOT; // drop packet
+ *
+ * return TC_ACT_OK; // accept packet
+ *
+ * This interface can also be used with all encapsulation devices
+ * that can operate in "collect metadata" mode: instead of having
+ * one network device per specific configuration, the "collect
+ * metadata" mode only requires a single device where the
+ * configuration can be extracted from this helper.
+ *
+ * This can be used together with various tunnels such as VXLan,
+ * Geneve, GRE or IP in IP (IPIP).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * Description
+ * Populate tunnel metadata for packet associated to *skb.* The
+ * tunnel metadata is set to the contents of *key*, of *size*. The
+ * *flags* can be set to a combination of the following values:
+ *
+ * **BPF_F_TUNINFO_IPV6**
+ * Indicate that the tunnel is based on IPv6 protocol
+ * instead of IPv4.
+ * **BPF_F_ZERO_CSUM_TX**
+ * For IPv4 packets, add a flag to tunnel metadata
+ * indicating that checksum computation should be skipped
+ * and checksum set to zeroes.
+ * **BPF_F_DONT_FRAGMENT**
+ * Add a flag to tunnel metadata indicating that the
+ * packet should not be fragmented.
+ * **BPF_F_SEQ_NUMBER**
+ * Add a flag to tunnel metadata indicating that a
+ * sequence number should be added to tunnel header before
+ * sending the packet. This flag was added for GRE
+ * encapsulation, but might be used with other protocols
+ * as well in the future.
+ *
+ * Here is a typical usage on the transmit path:
+ *
+ * ::
+ *
+ * struct bpf_tunnel_key key;
+ * populate key ...
+ * bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ * bpf_clone_redirect(skb, vxlan_dev_ifindex, 0);
+ *
+ * See also the description of the **bpf_skb_get_tunnel_key**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_perf_event_read(struct bpf_map *map, u64 flags)
+ * Description
+ * Read the value of a perf event counter. This helper relies on a
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of
+ * the perf event counter is selected when *map* is updated with
+ * perf event file descriptors. The *map* is an array whose size
+ * is the number of available CPUs, and each cell contains a value
+ * relative to one CPU. The value to retrieve is indicated by
+ * *flags*, that contains the index of the CPU to look up, masked
+ * with **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * Note that before Linux 4.13, only hardware perf event can be
+ * retrieved.
+ *
+ * Also, be aware that the newer helper
+ * **bpf_perf_event_read_value**\ () is recommended over
+ * **bpf_perf_event_read**\ () in general. The latter has some ABI
+ * quirks where error and counter value are used as a return code
+ * (which is wrong to do since ranges may overlap). This issue is
+ * fixed with **bpf_perf_event_read_value**\ (), which at the same
+ * time provides more features over the **bpf_perf_event_read**\
+ * () interface. Please refer to the description of
+ * **bpf_perf_event_read_value**\ () for details.
+ * Return
+ * The value of the perf event counter read from the map, or a
+ * negative error code in case of failure.
+ *
+ * int bpf_redirect(u32 ifindex, u64 flags)
+ * Description
+ * Redirect the packet to another net device of index *ifindex*.
+ * This helper is somewhat similar to **bpf_clone_redirect**\
+ * (), except that the packet is not cloned, which provides
+ * increased performance.
+ *
+ * Except for XDP, both ingress and egress interfaces can be used
+ * for redirection. The **BPF_F_INGRESS** value in *flags* is used
+ * to make the distinction (ingress path is selected if the flag
+ * is present, egress path otherwise). Currently, XDP only
+ * supports redirection to the egress interface, and accepts no
+ * flag at all.
+ *
+ * The same effect can be attained with the more generic
+ * **bpf_redirect_map**\ (), which requires specific maps to be
+ * used but offers better performance.
+ * Return
+ * For XDP, the helper returns **XDP_REDIRECT** on success or
+ * **XDP_ABORTED** on error. For other program types, the values
+ * are **TC_ACT_REDIRECT** on success or **TC_ACT_SHOT** on
+ * error.
+ *
+ * u32 bpf_get_route_realm(struct sk_buff *skb)
+ * Description
+ * Retrieve the realm or the route, that is to say the
+ * **tclassid** field of the destination for the *skb*. The
+ * indentifier retrieved is a user-provided tag, similar to the
+ * one used with the net_cls cgroup (see description for
+ * **bpf_get_cgroup_classid**\ () helper), but here this tag is
+ * held by a route (a destination entry), not by a task.
+ *
+ * Retrieving this identifier works with the clsact TC egress hook
+ * (see also **tc-bpf(8)**), or alternatively on conventional
+ * classful egress qdiscs, but not on TC ingress path. In case of
+ * clsact TC egress hook, this has the advantage that, internally,
+ * the destination entry has not been dropped yet in the transmit
+ * path. Therefore, the destination entry does not need to be
+ * artificially held via **netif_keep_dst**\ () for a classful
+ * qdisc until the *skb* is freed.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_IP_ROUTE_CLASSID** configuration option.
+ * Return
+ * The realm of the route for the packet associated to *skb*, or 0
+ * if none was found.
+ *
+ * int bpf_perf_event_output(struct pt_reg *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * The context of the program *ctx* needs also be passed to the
+ * helper.
+ *
+ * On user space, a program willing to read the values needs to
+ * call **perf_event_open**\ () on the perf event (either for
+ * one or for all CPUs) and to store the file descriptor into the
+ * *map*. This must be done before the eBPF program can send data
+ * into it. An example is available in file
+ * *samples/bpf/trace_output_user.c* in the Linux kernel source
+ * tree (the eBPF program counterpart is in
+ * *samples/bpf/trace_output_kern.c*).
+ *
+ * **bpf_perf_event_output**\ () achieves better performance
+ * than **bpf_trace_printk**\ () for sharing data with user
+ * space, and is much better suitable for streaming data from eBPF
+ * programs.
+ *
+ * Note that this helper is not restricted to tracing use cases
+ * and can be used with programs attached to TC or XDP as well,
+ * where it allows for passing data to user space listeners. Data
+ * can be:
+ *
+ * * Only custom structs,
+ * * Only the packet payload, or
+ * * A combination of both.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes(const struct sk_buff *skb, u32 offset, void *to, u32 len)
+ * Description
+ * This helper was provided as an easy way to load data from a
+ * packet. It can be used to load *len* bytes from *offset* from
+ * the packet associated to *skb*, into the buffer pointed by
+ * *to*.
+ *
+ * Since Linux 4.7, usage of this helper has mostly been replaced
+ * by "direct packet access", enabling packet data to be
+ * manipulated with *skb*\ **->data** and *skb*\ **->data_end**
+ * pointing respectively to the first byte of packet data and to
+ * the byte after the last byte of packet data. However, it
+ * remains useful if one wishes to read large quantities of data
+ * at once from a packet into the eBPF stack.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stackid(struct pt_reg *ctx, struct bpf_map *map, u64 flags)
+ * Description
+ * Walk a user or a kernel stack and return its id. To achieve
+ * this, the helper needs *ctx*, which is a pointer to the context
+ * on which the tracing program is executed, and a pointer to a
+ * *map* of type **BPF_MAP_TYPE_STACK_TRACE**.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * a combination of the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_FAST_STACK_CMP**
+ * Compare stacks by hash only.
+ * **BPF_F_REUSE_STACKID**
+ * If two different stacks hash into the same *stackid*,
+ * discard the old one.
+ *
+ * The stack id retrieved is a 32 bit long integer handle which
+ * can be further combined with other data (including other stack
+ * ids) and used as a key into maps. This can be useful for
+ * generating a variety of graphs (such as flame graphs or off-cpu
+ * graphs).
+ *
+ * For walking a stack, this helper is an improvement over
+ * **bpf_probe_read**\ (), which can be used with unrolled loops
+ * but is not efficient and consumes a lot of eBPF instructions.
+ * Instead, **bpf_get_stackid**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * The positive or null stack id on success, or a negative error
+ * in case of failure.
+ *
+ * s64 bpf_csum_diff(__be32 *from, u32 from_size, __be32 *to, u32 to_size, __wsum seed)
+ * Description
+ * Compute a checksum difference, from the raw buffer pointed by
+ * *from*, of length *from_size* (that must be a multiple of 4),
+ * towards the raw buffer pointed by *to*, of size *to_size*
+ * (same remark). An optional *seed* can be added to the value
+ * (this can be cascaded, the seed may come from a previous call
+ * to the helper).
+ *
+ * This is flexible enough to be used in several ways:
+ *
+ * * With *from_size* == 0, *to_size* > 0 and *seed* set to
+ * checksum, it can be used when pushing new data.
+ * * With *from_size* > 0, *to_size* == 0 and *seed* set to
+ * checksum, it can be used when removing data from a packet.
+ * * With *from_size* > 0, *to_size* > 0 and *seed* set to 0, it
+ * can be used to compute a diff. Note that *from_size* and
+ * *to_size* do not need to be equal.
+ *
+ * This helper can be used in combination with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\ (), to
+ * which one can feed in the difference computed with
+ * **bpf_csum_diff**\ ().
+ * Return
+ * The checksum result, or a negative error code in case of
+ * failure.
+ *
+ * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * Description
+ * Retrieve tunnel options metadata for the packet associated to
+ * *skb*, and store the raw tunnel option data to the buffer *opt*
+ * of *size*.
+ *
+ * This helper can be used with encapsulation devices that can
+ * operate in "collect metadata" mode (please refer to the related
+ * note in the description of **bpf_skb_get_tunnel_key**\ () for
+ * more details). A particular example where this can be used is
+ * in combination with the Geneve encapsulation protocol, where it
+ * allows for pushing (with **bpf_skb_get_tunnel_opt**\ () helper)
+ * and retrieving arbitrary TLVs (Type-Length-Value headers) from
+ * the eBPF program. This allows for full customization of these
+ * headers.
+ * Return
+ * The size of the option data retrieved.
+ *
+ * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, u8 *opt, u32 size)
+ * Description
+ * Set tunnel options metadata for the packet associated to *skb*
+ * to the option data contained in the raw buffer *opt* of *size*.
+ *
+ * See also the description of the **bpf_skb_get_tunnel_opt**\ ()
+ * helper for additional information.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * Description
+ * Change the protocol of the *skb* to *proto*. Currently
+ * supported are transition from IPv4 to IPv6, and from IPv6 to
+ * IPv4. The helper takes care of the groundwork for the
+ * transition, including resizing the socket buffer. The eBPF
+ * program is expected to fill the new headers, if any, via
+ * **skb_store_bytes**\ () and to recompute the checksums with
+ * **bpf_l3_csum_replace**\ () and **bpf_l4_csum_replace**\
+ * (). The main case for this helper is to perform NAT64
+ * operations out of an eBPF program.
+ *
+ * Internally, the GSO type is marked as dodgy so that headers are
+ * checked and segments are recalculated by the GSO/GRO engine.
+ * The size for GSO target is adapted as well.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * Description
+ * Change the packet type for the packet associated to *skb*. This
+ * comes down to setting *skb*\ **->pkt_type** to *type*, except
+ * the eBPF program does not have a write access to *skb*\
+ * **->pkt_type** beside this helper. Using a helper here allows
+ * for graceful handling of errors.
+ *
+ * The major use case is to change incoming *skb*s to
+ * **PACKET_HOST** in a programmatic way instead of having to
+ * recirculate via **redirect**\ (..., **BPF_F_INGRESS**), for
+ * example.
+ *
+ * Note that *type* only allows certain values. At this time, they
+ * are:
+ *
+ * **PACKET_HOST**
+ * Packet is for us.
+ * **PACKET_BROADCAST**
+ * Send packet to all.
+ * **PACKET_MULTICAST**
+ * Send packet to group.
+ * **PACKET_OTHERHOST**
+ * Send packet to someone else.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * Description
+ * Check whether *skb* is a descendant of the cgroup2 held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 0, if the *skb* failed the cgroup2 descendant test.
+ * * 1, if the *skb* succeeded the cgroup2 descendant test.
+ * * A negative error code, if an error occurred.
+ *
+ * u32 bpf_get_hash_recalc(struct sk_buff *skb)
+ * Description
+ * Retrieve the hash of the packet, *skb*\ **->hash**. If it is
+ * not set, in particular if the hash was cleared due to mangling,
+ * recompute this hash. Later accesses to the hash can be done
+ * directly with *skb*\ **->hash**.
+ *
+ * Calling **bpf_set_hash_invalid**\ (), changing a packet
+ * prototype with **bpf_skb_change_proto**\ (), or calling
+ * **bpf_skb_store_bytes**\ () with the
+ * **BPF_F_INVALIDATE_HASH** are actions susceptible to clear
+ * the hash and to trigger a new computation for the next call to
+ * **bpf_get_hash_recalc**\ ().
+ * Return
+ * The 32-bit hash.
+ *
+ * u64 bpf_get_current_task(void)
+ * Return
+ * A pointer to the current task struct.
+ *
+ * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * Description
+ * Attempt in a safe way to write *len* bytes from the buffer
+ * *src* to *dst* in memory. It only works for threads that are in
+ * user context, and *dst* must be a valid user space address.
+ *
+ * This helper should not be used to implement any kind of
+ * security mechanism because of TOC-TOU attacks, but rather to
+ * debug, divert, and manipulate execution of semi-cooperative
+ * processes.
+ *
+ * Keep in mind that this feature is meant for experiments, and it
+ * has a risk of crashing the system and running programs.
+ * Therefore, when an eBPF program using this helper is attached,
+ * a warning including PID and process name is printed to kernel
+ * logs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * Description
+ * Check whether the probe is being run is the context of a given
+ * subset of the cgroup2 hierarchy. The cgroup2 to test is held by
+ * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
+ * Return
+ * The return value depends on the result of the test, and can be:
+ *
+ * * 1, if current task belongs to the cgroup2.
+ * * 0, if current task does not belong to the cgroup2.
+ * * A negative error code, if an error occurred.
+ *
+ * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Resize (trim or grow) the packet associated to *skb* to the
+ * new *len*. The *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * The basic idea is that the helper performs the needed work to
+ * change the size of the packet, then the eBPF program rewrites
+ * the rest via helpers like **bpf_skb_store_bytes**\ (),
+ * **bpf_l3_csum_replace**\ (), **bpf_l3_csum_replace**\ ()
+ * and others. This helper is a slow path utility intended for
+ * replies with control messages. And because it is targeted for
+ * slow path, the helper itself can afford to be slow: it
+ * implicitly linearizes, unclones and drops offloads from the
+ * *skb*.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * Description
+ * Pull in non-linear data in case the *skb* is non-linear and not
+ * all of *len* are part of the linear section. Make *len* bytes
+ * from *skb* readable and writable. If a zero value is passed for
+ * *len*, then the whole length of the *skb* is pulled.
+ *
+ * This helper is only needed for reading and writing with direct
+ * packet access.
+ *
+ * For direct packet access, testing that offsets to access
+ * are within packet boundaries (test on *skb*\ **->data_end**) is
+ * susceptible to fail if offsets are invalid, or if the requested
+ * data is in non-linear parts of the *skb*. On failure the
+ * program can just bail out, or in the case of a non-linear
+ * buffer, use a helper to make the data available. The
+ * **bpf_skb_load_bytes**\ () helper is a first solution to access
+ * the data. Another one consists in using **bpf_skb_pull_data**
+ * to pull in once the non-linear parts, then retesting and
+ * eventually access the data.
+ *
+ * At the same time, this also makes sure the *skb* is uncloned,
+ * which is a necessary condition for direct write. As this needs
+ * to be an invariant for the write part only, the verifier
+ * detects writes and adds a prologue that is calling
+ * **bpf_skb_pull_data()** to effectively unclone the *skb* from
+ * the very beginning in case it is indeed cloned.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * s64 bpf_csum_update(struct sk_buff *skb, __wsum csum)
+ * Description
+ * Add the checksum *csum* into *skb*\ **->csum** in case the
+ * driver has supplied a checksum for the entire packet into that
+ * field. Return an error otherwise. This helper is intended to be
+ * used in combination with **bpf_csum_diff**\ (), in particular
+ * when the checksum needs to be updated after data has been
+ * written into the packet through direct packet access.
+ * Return
+ * The checksum on success, or a negative error code in case of
+ * failure.
+ *
+ * void bpf_set_hash_invalid(struct sk_buff *skb)
+ * Description
+ * Invalidate the current *skb*\ **->hash**. It can be used after
+ * mangling on headers through direct packet access, in order to
+ * indicate that the hash is outdated and to trigger a
+ * recalculation the next time the kernel tries to access this
+ * hash or when the **bpf_get_hash_recalc**\ () helper is called.
+ *
+ * int bpf_get_numa_node_id(void)
+ * Description
+ * Return the id of the current NUMA node. The primary use case
+ * for this helper is the selection of sockets for the local NUMA
+ * node, when the program is attached to sockets using the
+ * **SO_ATTACH_REUSEPORT_EBPF** option (see also **socket(7)**),
+ * but the helper is also available to other eBPF program types,
+ * similarly to **bpf_get_smp_processor_id**\ ().
+ * Return
+ * The id of current NUMA node.
+ *
+ * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * Description
+ * Grows headroom of packet associated to *skb* and adjusts the
+ * offset of the MAC header accordingly, adding *len* bytes of
+ * space. It automatically extends and reallocates memory as
+ * required.
+ *
+ * This helper can be used on a layer 3 *skb* to push a MAC header
+ * for redirection into a layer 2 device.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
+ * it is possible to use a negative value for *delta*. This helper
+ * can be used to prepare the packet for pushing or popping
+ * headers.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_probe_read_str(void *dst, int size, const void *unsafe_ptr)
+ * Description
+ * Copy a NUL terminated string from an unsafe address
+ * *unsafe_ptr* to *dst*. The *size* should include the
+ * terminating NUL byte. In case the string length is smaller than
+ * *size*, the target is not padded with further NUL bytes. If the
+ * string length is larger than *size*, just *size*-1 bytes are
+ * copied and the last byte is set to NUL.
+ *
+ * On success, the length of the copied string is returned. This
+ * makes this helper useful in tracing programs for reading
+ * strings, and more importantly to get its length at runtime. See
+ * the following snippet:
+ *
+ * ::
+ *
+ * SEC("kprobe/sys_open")
+ * void bpf_sys_open(struct pt_regs *ctx)
+ * {
+ * char buf[PATHLEN]; // PATHLEN is defined to 256
+ * int res = bpf_probe_read_str(buf, sizeof(buf),
+ * ctx->di);
+ *
+ * // Consume buf, for example push it to
+ * // userspace via bpf_perf_event_output(); we
+ * // can use res (the string length) as event
+ * // size, after checking its boundaries.
+ * }
+ *
+ * In comparison, using **bpf_probe_read()** helper here instead
+ * to read the string would require to estimate the length at
+ * compile time, and would often result in copying more memory
+ * than necessary.
+ *
+ * Another useful use case is when parsing individual process
+ * arguments or individual environment variables navigating
+ * *current*\ **->mm->arg_start** and *current*\
+ * **->mm->env_start**: using this helper and the return value,
+ * one can quickly iterate at the right offset of the memory area.
+ * Return
+ * On success, the strictly positive length of the string,
+ * including the trailing NUL character. On error, a negative
+ * value.
+ *
+ * u64 bpf_get_socket_cookie(struct sk_buff *skb)
+ * Description
+ * If the **struct sk_buff** pointed by *skb* has a known socket,
+ * retrieve the cookie (generated by the kernel) of this socket.
+ * If no cookie has been set yet, generate a new cookie. Once
+ * generated, the socket cookie remains stable for the life of the
+ * socket. This helper can be useful for monitoring per socket
+ * networking traffic statistics as it provides a unique socket
+ * identifier per namespace.
+ * Return
+ * A 8-byte long non-decreasing number on success, or 0 if the
+ * socket field is missing inside *skb*.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_addr** contex.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
+ * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
+ * Description
+ * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * *skb*, but gets socket from **struct bpf_sock_ops** contex.
+ * Return
+ * A 8-byte long non-decreasing number.
+ *
+ * u32 bpf_get_socket_uid(struct sk_buff *skb)
+ * Return
+ * The owner UID of the socket associated to *skb*. If the socket
+ * is **NULL**, or if it is not a full socket (i.e. if it is a
+ * time-wait or a request socket instead), **overflowuid** value
+ * is returned (note that **overflowuid** might also be the actual
+ * UID value for the socket).
+ *
+ * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * Description
+ * Set the full hash for *skb* (set the field *skb*\ **->hash**)
+ * to value *hash*.
+ * Return
+ * 0
+ *
+ * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * Description
+ * Emulate a call to **setsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **setsockopt(2)** for more information.
+ * The option value of length *optlen* is pointed by *optval*.
+ *
+ * This helper actually implements a subset of **setsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **SOL_SOCKET**, which supports the following *optname*\ s:
+ * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
+ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * * **IPPROTO_TCP**, which supports the following *optname*\ s:
+ * **TCP_CONGESTION**, **TCP_BPF_IW**,
+ * **TCP_BPF_SNDCWND_CLAMP**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_adjust_room(struct sk_buff *skb, u32 len_diff, u32 mode, u64 flags)
+ * Description
+ * Grow or shrink the room for data in the packet associated to
+ * *skb* by *len_diff*, and according to the selected *mode*.
+ *
+ * There is a single supported mode at this time:
+ *
+ * * **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
+ * (room space is added or removed below the layer 3 header).
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the endpoint referenced by *map* at
+ * index *key*. Depending on its type, this *map* can contain
+ * references to net devices (for forwarding packets through other
+ * ports), or to CPUs (for redirecting XDP frames to another CPU;
+ * but this is only implemented for native XDP (with driver
+ * support) as of this writing).
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * When used to redirect packets to net devices, this helper
+ * provides a high performance increase over **bpf_redirect**\ ().
+ * This is due to various implementation details of the underlying
+ * mechanisms, one of which is the fact that **bpf_redirect_map**\
+ * () tries to send packet as a "bulk" to the device.
+ * Return
+ * **XDP_REDIRECT** on success, or **XDP_ABORTED** on error.
+ *
+ * int bpf_sk_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * Redirect the packet to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a *map* referencing sockets. The
+ * *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust the address pointed by *xdp_md*\ **->data_meta** by
+ * *delta* (which can be positive or negative). Note that this
+ * operation modifies the address stored in *xdp_md*\ **->data**,
+ * so the latter must be loaded only after the helper has been
+ * called.
+ *
+ * The use of *xdp_md*\ **->data_meta** is optional and programs
+ * are not required to use it. The rationale is that when the
+ * packet is processed with XDP (e.g. as DoS filter), it is
+ * possible to push further meta data along with it before passing
+ * to the stack, and to give the guarantee that an ingress eBPF
+ * program attached as a TC classifier on the same device can pick
+ * this up for further post-processing. Since TC works with socket
+ * buffers, it remains possible to set from XDP the **mark** or
+ * **priority** pointers, or other pointers for the socket buffer.
+ * Having this scratch space generic and programmable allows for
+ * more flexibility as the user is free to store whatever meta
+ * data they need.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * Read the value of a perf event counter, and store it into *buf*
+ * of size *buf_size*. This helper relies on a *map* of type
+ * **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. The nature of the perf event
+ * counter is selected when *map* is updated with perf event file
+ * descriptors. The *map* is an array whose size is the number of
+ * available CPUs, and each cell contains a value relative to one
+ * CPU. The value to retrieve is indicated by *flags*, that
+ * contains the index of the CPU to look up, masked with
+ * **BPF_F_INDEX_MASK**. Alternatively, *flags* can be set to
+ * **BPF_F_CURRENT_CPU** to indicate that the value for the
+ * current CPU should be retrieved.
+ *
+ * This helper behaves in a way close to
+ * **bpf_perf_event_read**\ () helper, save that instead of
+ * just returning the value observed, it fills the *buf*
+ * structure. This allows for additional data to be retrieved: in
+ * particular, the enabled and running times (in *buf*\
+ * **->enabled** and *buf*\ **->running**, respectively) are
+ * copied. In general, **bpf_perf_event_read_value**\ () is
+ * recommended over **bpf_perf_event_read**\ (), which has some
+ * ABI issues and provides fewer functionalities.
+ *
+ * These values are interesting, because hardware PMU (Performance
+ * Monitoring Unit) counters are limited resources. When there are
+ * more PMU based perf events opened than available counters,
+ * kernel will multiplex these events so each event gets certain
+ * percentage (but not all) of the PMU time. In case that
+ * multiplexing happens, the number of samples or counter value
+ * will not reflect the case compared to when no multiplexing
+ * occurs. This makes comparison between different runs difficult.
+ * Typically, the counter value should be normalized before
+ * comparing to other experiments. The usual normalization is done
+ * as follows.
+ *
+ * ::
+ *
+ * normalized_counter = counter * t_enabled / t_running
+ *
+ * Where t_enabled is the time enabled for event and t_running is
+ * the time running for event since last normalization. The
+ * enabled and running times are accumulated since the perf event
+ * open. To achieve scaling factor between two invocations of an
+ * eBPF program, users can can use CPU id as the key (which is
+ * typical for perf array usage model) to remember the previous
+ * value and do the calculation inside the eBPF program.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * Description
+ * For en eBPF program attached to a perf event, retrieve the
+ * value of the event counter associated to *ctx* and store it in
+ * the structure pointed by *buf* and of size *buf_size*. Enabled
+ * and running times are also stored in the structure (see
+ * description of helper **bpf_perf_event_read_value**\ () for
+ * more details).
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, char *optval, int optlen)
+ * Description
+ * Emulate a call to **getsockopt()** on the socket associated to
+ * *bpf_socket*, which must be a full socket. The *level* at
+ * which the option resides and the name *optname* of the option
+ * must be specified, see **getsockopt(2)** for more information.
+ * The retrieved value is stored in the structure pointed by
+ * *opval* and of length *optlen*.
+ *
+ * This helper actually implements a subset of **getsockopt()**.
+ * It supports the following *level*\ s:
+ *
+ * * **IPPROTO_TCP**, which supports *optname*
+ * **TCP_CONGESTION**.
+ * * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
+ * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_override_return(struct pt_reg *regs, u64 rc)
+ * Description
+ * Used for error injection, this helper uses kprobes to override
+ * the return value of the probed function, and to set it to *rc*.
+ * The first argument is the context *regs* on which the kprobe
+ * works.
+ *
+ * This helper works by setting setting the PC (program counter)
+ * to an override function which is run in place of the original
+ * probed function. This means the probed function is not run at
+ * all. The replacement function just returns with the required
+ * value.
+ *
+ * This helper has security implications, and thus is subject to
+ * restrictions. It is only available if the kernel was compiled
+ * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration
+ * option, and in this case it only works on functions tagged with
+ * **ALLOW_ERROR_INJECTION** in the kernel code.
+ *
+ * Also, the helper is only available for the architectures having
+ * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing,
+ * x86 architecture is the only one to support this feature.
+ * Return
+ * 0
+ *
+ * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ * Description
+ * Attempt to set the value of the **bpf_sock_ops_cb_flags** field
+ * for the full TCP socket associated to *bpf_sock_ops* to
+ * *argval*.
+ *
+ * The primary use of this field is to determine if there should
+ * be calls to eBPF programs of type
+ * **BPF_PROG_TYPE_SOCK_OPS** at various points in the TCP
+ * code. A program of the same type can change its value, per
+ * connection and as necessary, when the connection is
+ * established. This field is directly accessible for reading, but
+ * this helper must be used for updates in order to return an
+ * error if an eBPF program tries to set a callback that is not
+ * supported in the current kernel.
+ *
+ * The supported callback values that *argval* can combine are:
+ *
+ * * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
+ * * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
+ * * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
+ *
+ * Here are some examples of where one could call such eBPF
+ * program:
+ *
+ * * When RTO fires.
+ * * When a packet is retransmitted.
+ * * When the connection terminates.
+ * * When a packet is sent.
+ * * When a packet is received.
+ * Return
+ * Code **-EINVAL** if the socket is not a full TCP socket;
+ * otherwise, a positive number containing the bits that could not
+ * be set is returned (which comes down to 0 if all bits were set
+ * as required).
+ *
+ * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, apply the verdict of the eBPF program to
+ * the next *bytes* (number of bytes) of message *msg*.
+ *
+ * For example, this helper can be used in the following cases:
+ *
+ * * A single **sendmsg**\ () or **sendfile**\ () system call
+ * contains multiple logical messages that the eBPF program is
+ * supposed to read and for which it should apply a verdict.
+ * * An eBPF program only cares to read the first *bytes* of a
+ * *msg*. If the message has a large payload, then setting up
+ * and calling the eBPF program repeatedly for all bytes, even
+ * though the verdict is already known, would create unnecessary
+ * overhead.
+ *
+ * When called from within an eBPF program, the helper sets a
+ * counter internal to the BPF infrastructure, that is used to
+ * apply the last verdict to the next *bytes*. If *bytes* is
+ * smaller than the current data being processed from a
+ * **sendmsg**\ () or **sendfile**\ () system call, the first
+ * *bytes* will be sent and the eBPF program will be re-run with
+ * the pointer for start of data pointing to byte number *bytes*
+ * **+ 1**. If *bytes* is larger than the current data being
+ * processed, then the eBPF verdict will be applied to multiple
+ * **sendmsg**\ () or **sendfile**\ () calls until *bytes* are
+ * consumed.
+ *
+ * Note that if a socket closes with the internal counter holding
+ * a non-zero value, this is not a problem because data is not
+ * being buffered for *bytes* and is sent as it is received.
+ * Return
+ * 0
+ *
+ * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * Description
+ * For socket policies, prevent the execution of the verdict eBPF
+ * program for message *msg* until *bytes* (byte number) have been
+ * accumulated.
+ *
+ * This can be used when one needs a specific number of bytes
+ * before a verdict can be assigned, even if the data spans
+ * multiple **sendmsg**\ () or **sendfile**\ () calls. The extreme
+ * case would be a user calling **sendmsg**\ () repeatedly with
+ * 1-byte long message segments. Obviously, this is bad for
+ * performance, but it is still valid. If the eBPF program needs
+ * *bytes* bytes to validate a header, this helper can be used to
+ * prevent the eBPF program to be called again until *bytes* have
+ * been accumulated.
+ * Return
+ * 0
+ *
+ * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * Description
+ * For socket policies, pull in non-linear data from user space
+ * for *msg* and set pointers *msg*\ **->data** and *msg*\
+ * **->data_end** to *start* and *end* bytes offsets into *msg*,
+ * respectively.
+ *
+ * If a program of type **BPF_PROG_TYPE_SK_MSG** is run on a
+ * *msg* it can only parse data that the (**data**, **data_end**)
+ * pointers have already consumed. For **sendmsg**\ () hooks this
+ * is likely the first scatterlist element. But for calls relying
+ * on the **sendpage** handler (e.g. **sendfile**\ ()) this will
+ * be the range (**0**, **0**) because the data is shared with
+ * user space and by default the objective is to avoid allowing
+ * user space to modify data while (or after) eBPF verdict is
+ * being decided. This helper can be used to pull in data and to
+ * set the start and end pointer to given values. Data will be
+ * copied if necessary (i.e. if data was not linear and if start
+ * and end pointers do not point to the same chunk).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ * Description
+ * Bind the socket associated to *ctx* to the address pointed by
+ * *addr*, of length *addr_len*. This allows for making outgoing
+ * connection from the desired IP address, which can be useful for
+ * example when all processes inside a cgroup should use one
+ * single IP address on a host that has multiple IP configured.
+ *
+ * This helper works for IPv4 and IPv6, TCP and UDP sockets. The
+ * domain (*addr*\ **->sa_family**) must be **AF_INET** (or
+ * **AF_INET6**). Looking for a free port to bind to can be
+ * expensive, therefore binding to port is not permitted by the
+ * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
+ * must be set to zero.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * Description
+ * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
+ * only possible to shrink the packet as of this writing,
+ * therefore *delta* must be a negative integer.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * Description
+ * Retrieve the XFRM state (IP transform framework, see also
+ * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
+ *
+ * The retrieved value is stored in the **struct bpf_xfrm_state**
+ * pointed by *xfrm_state* and of length *size*.
+ *
+ * All values for *flags* are reserved for future usage, and must
+ * be left at zero.
+ *
+ * This helper is available only if the kernel was compiled with
+ * **CONFIG_XFRM** configuration option.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_get_stack(struct pt_regs *regs, void *buf, u32 size, u64 flags)
+ * Description
+ * Return a user or a kernel stack in bpf program provided buffer.
+ * To achieve this, the helper needs *ctx*, which is a pointer
+ * to the context on which the tracing program is executed.
+ * To store the stacktrace, the bpf program provides *buf* with
+ * a nonnegative *size*.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_USER_BUILD_ID**
+ * Collect buildid+offset instead of ips for user stack,
+ * only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * **bpf_get_stack**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * to sufficient large buffer size. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * A non-negative value equal to or less than *size* on success,
+ * or a negative error in case of failure.
+ *
+ * int bpf_skb_load_bytes_relative(const struct sk_buff *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * Description
+ * This helper is similar to **bpf_skb_load_bytes**\ () in that
+ * it provides an easy way to load *len* bytes from *offset*
+ * from the packet associated to *skb*, into the buffer pointed
+ * by *to*. The difference to **bpf_skb_load_bytes**\ () is that
+ * a fifth argument *start_header* exists in order to select a
+ * base offset to start from. *start_header* can be one of:
+ *
+ * **BPF_HDR_START_MAC**
+ * Base offset to load data from is *skb*'s mac header.
+ * **BPF_HDR_START_NET**
+ * Base offset to load data from is *skb*'s network header.
+ *
+ * In general, "direct packet access" is the preferred method to
+ * access packet data, however, this helper is in particular useful
+ * in socket filters where *skb*\ **->data** does not always point
+ * to the start of the mac header and where "direct packet access"
+ * is not available.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * Description
+ * Do FIB lookup in kernel tables using parameters in *params*.
+ * If lookup is successful and result shows packet is to be
+ * forwarded, the neighbor tables are searched for the nexthop.
+ * If successful (ie., FIB lookup shows forwarding and nexthop
+ * is resolved), the nexthop address is returned in ipv4_dst
+ * or ipv6_dst based on family, smac is set to mac address of
+ * egress device, dmac is set to nexthop mac address, rt_metric
+ * is set to metric from route (IPv4/IPv6 only), and ifindex
+ * is set to the device index of the nexthop from the FIB lookup.
+ *
+ * *plen* argument is the size of the passed in struct.
+ * *flags* argument can be a combination of one or more of the
+ * following values:
+ *
+ * **BPF_FIB_LOOKUP_DIRECT**
+ * Do a direct table lookup vs full lookup using FIB
+ * rules.
+ * **BPF_FIB_LOOKUP_OUTPUT**
+ * Perform lookup from an egress perspective (default is
+ * ingress).
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** tc cls_act programs.
+ * Return
+ * * < 0 if any input argument is invalid
+ * * 0 on success (packet is forwarded, nexthop neighbor exists)
+ * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
+ * packet is not forwarded or needs assist from full stack
+ *
+ * int bpf_sock_hash_update(struct bpf_sock_ops_kern *skops, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Add an entry to, or update a sockhash *map* referencing sockets.
+ * The *skops* is used as a new value for the entry associated to
+ * *key*. *flags* is one of:
+ *
+ * **BPF_NOEXIST**
+ * The entry for *key* must not exist in the map.
+ * **BPF_EXIST**
+ * The entry for *key* must already exist in the map.
+ * **BPF_ANY**
+ * No condition on the existence of the entry for *key*.
+ *
+ * If the *map* has eBPF programs (parser and verdict), those will
+ * be inherited by the socket being added. If the socket is
+ * already attached to eBPF programs, this results in an error.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * socket level. If the message *msg* is allowed to pass (i.e. if
+ * the verdict eBPF program returns **SK_PASS**), redirect it to
+ * the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress path otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * This helper is used in programs implementing policies at the
+ * skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
+ * if the verdeict eBPF program returns **SK_PASS**), redirect it
+ * to the socket referenced by *map* (of type
+ * **BPF_MAP_TYPE_SOCKHASH**) using hash *key*. Both ingress and
+ * egress interfaces can be used for redirection. The
+ * **BPF_F_INGRESS** value in *flags* is used to make the
+ * distinction (ingress path is selected if the flag is present,
+ * egress otherwise). This is the only flag supported for now.
+ * Return
+ * **SK_PASS** on success, or **SK_DROP** on error.
+ *
+ * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ * Description
+ * Encapsulate the packet associated to *skb* within a Layer 3
+ * protocol header. This header is provided in the buffer at
+ * address *hdr*, with *len* its size in bytes. *type* indicates
+ * the protocol of the header and can be one of:
+ *
+ * **BPF_LWT_ENCAP_SEG6**
+ * IPv6 encapsulation with Segment Routing Header
+ * (**struct ipv6_sr_hdr**). *hdr* only contains the SRH,
+ * the IPv6 header is computed by the kernel.
+ * **BPF_LWT_ENCAP_SEG6_INLINE**
+ * Only works if *skb* contains an IPv6 packet. Insert a
+ * Segment Routing Header (**struct ipv6_sr_hdr**) inside
+ * the IPv6 header.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ * Description
+ * Store *len* bytes from address *from* into the packet
+ * associated to *skb*, at *offset*. Only the flags, tag and TLVs
+ * inside the outermost IPv6 Segment Routing Header can be
+ * modified through this helper.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ * Description
+ * Adjust the size allocated to TLVs in the outermost IPv6
+ * Segment Routing Header contained in the packet associated to
+ * *skb*, at position *offset* by *delta* bytes. Only offsets
+ * after the segments are accepted. *delta* can be as well
+ * positive (growing) as negative (shrinking).
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ * Description
+ * Apply an IPv6 Segment Routing action of type *action* to the
+ * packet associated to *skb*. Each action takes a parameter
+ * contained at address *param*, and of length *param_len* bytes.
+ * *action* can be one of:
+ *
+ * **SEG6_LOCAL_ACTION_END_X**
+ * End.X action: Endpoint with Layer-3 cross-connect.
+ * Type of *param*: **struct in6_addr**.
+ * **SEG6_LOCAL_ACTION_END_T**
+ * End.T action: Endpoint with specific IPv6 table lookup.
+ * Type of *param*: **int**.
+ * **SEG6_LOCAL_ACTION_END_B6**
+ * End.B6 action: Endpoint bound to an SRv6 policy.
+ * Type of param: **struct ipv6_sr_hdr**.
+ * **SEG6_LOCAL_ACTION_END_B6_ENCAP**
+ * End.B6.Encap action: Endpoint bound to an SRv6
+ * encapsulation policy.
+ * Type of param: **struct ipv6_sr_hdr**.
+ *
+ * A call to this helper is susceptible to change the underlaying
+ * packet buffer. Therefore, at load time, all checks on pointers
+ * previously done by the verifier are invalidated and must be
+ * performed again, if the helper is used in combination with
+ * direct packet access.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded key press with *scancode*,
+ * *toggle* value in the given *protocol*. The scancode will be
+ * translated to a keycode using the rc keymap, and reported as
+ * an input key down event. After a period a key up event is
+ * generated. This period can be extended by calling either
+ * **bpf_rc_keydown** () again with the same values, or calling
+ * **bpf_rc_repeat** ().
+ *
+ * Some protocols include a toggle bit, in case the button was
+ * released and pressed again between consecutive scancodes.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * The *protocol* is the decoded protocol number (see
+ * **enum rc_proto** for some predefined values).
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * int bpf_rc_repeat(void *ctx)
+ * Description
+ * This helper is used in programs implementing IR decoding, to
+ * report a successfully decoded repeat key message. This delays
+ * the generation of a key up event for previously generated
+ * key down event.
+ *
+ * Some IR protocols like NEC have a special IR message for
+ * repeating last button, for when a button is held down.
+ *
+ * The *ctx* should point to the lirc sample as passed into
+ * the program.
+ *
+ * This helper is only available is the kernel was compiled with
+ * the **CONFIG_BPF_LIRC_MODE2** configuration option set to
+ * "**y**".
+ * Return
+ * 0
+ *
+ * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * Description
+ * Return the cgroup v2 id of the socket associated with the *skb*.
+ * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+ * helper for cgroup v1 by providing a tag resp. identifier that
+ * can be matched on or used for map lookups e.g. to implement
+ * policy. The cgroup v2 id of a given path in the hierarchy is
+ * exposed in user space through the f_handle API in order to get
+ * to the same 64-bit id.
+ *
+ * This helper can be used on TC egress path, but not on ingress,
+ * and is available only if the kernel was compiled with the
+ * **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *skb* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *skb*, then return value will be same as that
+ * of **bpf_skb_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *skb*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_skb_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_get_current_cgroup_id(void)
+ * Return
+ * A 64-bit integer containing the current cgroup id based
+ * on the cgroup within which the current task is running.
+ *
+ * void* get_local_storage(void *map, u64 flags)
+ * Description
+ * Get the pointer to the local storage area.
+ * The type and the size of the local storage is defined
+ * by the *map* argument.
+ * The *flags* meaning is specific for each map type,
+ * and has to be 0 for cgroup local storage.
+ *
+ * Depending on the bpf program type, a local storage area
+ * can be shared between multiple instances of the bpf program,
+ * running simultaneously.
+ *
+ * A user should care about the synchronization by himself.
+ * For example, by using the BPF_STX_XADD instruction to alter
+ * the shared data.
+ * Return
+ * Pointer to the local storage area.
+ *
+ * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ * Description
+ * Select a SO_REUSEPORT sk from a BPF_MAP_TYPE_REUSEPORT_ARRAY map
+ * It checks the selected sk is matching the incoming
+ * request in the skb.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ */
+#define __BPF_FUNC_MAPPER(FN) \
+ FN(unspec), \
+ FN(map_lookup_elem), \
+ FN(map_update_elem), \
+ FN(map_delete_elem), \
+ FN(probe_read), \
+ FN(ktime_get_ns), \
+ FN(trace_printk), \
+ FN(get_prandom_u32), \
+ FN(get_smp_processor_id), \
+ FN(skb_store_bytes), \
+ FN(l3_csum_replace), \
+ FN(l4_csum_replace), \
+ FN(tail_call), \
+ FN(clone_redirect), \
+ FN(get_current_pid_tgid), \
+ FN(get_current_uid_gid), \
+ FN(get_current_comm), \
+ FN(get_cgroup_classid), \
+ FN(skb_vlan_push), \
+ FN(skb_vlan_pop), \
+ FN(skb_get_tunnel_key), \
+ FN(skb_set_tunnel_key), \
+ FN(perf_event_read), \
+ FN(redirect), \
+ FN(get_route_realm), \
+ FN(perf_event_output), \
+ FN(skb_load_bytes), \
+ FN(get_stackid), \
+ FN(csum_diff), \
+ FN(skb_get_tunnel_opt), \
+ FN(skb_set_tunnel_opt), \
+ FN(skb_change_proto), \
+ FN(skb_change_type), \
+ FN(skb_under_cgroup), \
+ FN(get_hash_recalc), \
+ FN(get_current_task), \
+ FN(probe_write_user), \
+ FN(current_task_under_cgroup), \
+ FN(skb_change_tail), \
+ FN(skb_pull_data), \
+ FN(csum_update), \
+ FN(set_hash_invalid), \
+ FN(get_numa_node_id), \
+ FN(skb_change_head), \
+ FN(xdp_adjust_head), \
+ FN(probe_read_str), \
+ FN(get_socket_cookie), \
+ FN(get_socket_uid), \
+ FN(set_hash), \
+ FN(setsockopt), \
+ FN(skb_adjust_room), \
+ FN(redirect_map), \
+ FN(sk_redirect_map), \
+ FN(sock_map_update), \
+ FN(xdp_adjust_meta), \
+ FN(perf_event_read_value), \
+ FN(perf_prog_read_value), \
+ FN(getsockopt), \
+ FN(override_return), \
+ FN(sock_ops_cb_flags_set), \
+ FN(msg_redirect_map), \
+ FN(msg_apply_bytes), \
+ FN(msg_cork_bytes), \
+ FN(msg_pull_data), \
+ FN(bind), \
+ FN(xdp_adjust_tail), \
+ FN(skb_get_xfrm_state), \
+ FN(get_stack), \
+ FN(skb_load_bytes_relative), \
+ FN(fib_lookup), \
+ FN(sock_hash_update), \
+ FN(msg_redirect_hash), \
+ FN(sk_redirect_hash), \
+ FN(lwt_push_encap), \
+ FN(lwt_seg6_store_bytes), \
+ FN(lwt_seg6_adjust_srh), \
+ FN(lwt_seg6_action), \
+ FN(rc_repeat), \
+ FN(rc_keydown), \
+ FN(skb_cgroup_id), \
+ FN(get_current_cgroup_id), \
+ FN(get_local_storage), \
+ FN(sk_select_reuseport), \
+ FN(skb_ancestor_cgroup_id),
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+#define __BPF_ENUM_FN(x) BPF_FUNC_ ## x
+enum bpf_func_id {
+ __BPF_FUNC_MAPPER(__BPF_ENUM_FN)
+ __BPF_FUNC_MAX_ID,
+};
+#undef __BPF_ENUM_FN
+
+/* All flags used by eBPF helper functions, placed here. */
+
+/* BPF_FUNC_skb_store_bytes flags. */
+#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
+#define BPF_F_INVALIDATE_HASH (1ULL << 1)
+
+/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
+ * First 4 bits are for passing the header field size.
+ */
+#define BPF_F_HDR_FIELD_MASK 0xfULL
+
+/* BPF_FUNC_l4_csum_replace flags. */
+#define BPF_F_PSEUDO_HDR (1ULL << 4)
+#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
+#define BPF_F_MARK_ENFORCE (1ULL << 6)
+
+/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+#define BPF_F_INGRESS (1ULL << 0)
+
+/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
+#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
+
+/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
+#define BPF_F_SKIP_FIELD_MASK 0xffULL
+#define BPF_F_USER_STACK (1ULL << 8)
+/* flags used by BPF_FUNC_get_stackid only. */
+#define BPF_F_FAST_STACK_CMP (1ULL << 9)
+#define BPF_F_REUSE_STACKID (1ULL << 10)
+/* flags used by BPF_FUNC_get_stack only. */
+#define BPF_F_USER_BUILD_ID (1ULL << 11)
+
+/* BPF_FUNC_skb_set_tunnel_key flags. */
+#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
+#define BPF_F_DONT_FRAGMENT (1ULL << 2)
+#define BPF_F_SEQ_NUMBER (1ULL << 3)
+
+/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
+ * BPF_FUNC_perf_event_read_value flags.
+ */
+#define BPF_F_INDEX_MASK 0xffffffffULL
+#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+/* BPF_FUNC_perf_event_output for sk_buff input context. */
+#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
+
+/* Mode for BPF_FUNC_skb_adjust_room helper. */
+enum bpf_adj_room_mode {
+ BPF_ADJ_ROOM_NET,
+};
+
+/* Mode for BPF_FUNC_skb_load_bytes_relative helper. */
+enum bpf_hdr_start_off {
+ BPF_HDR_START_MAC,
+ BPF_HDR_START_NET,
+};
+
+/* Encapsulation type for BPF_FUNC_lwt_push_encap helper. */
+enum bpf_lwt_encap_mode {
+ BPF_LWT_ENCAP_SEG6,
+ BPF_LWT_ENCAP_SEG6_INLINE
+};
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+ __u32 len;
+ __u32 pkt_type;
+ __u32 mark;
+ __u32 queue_mapping;
+ __u32 protocol;
+ __u32 vlan_present;
+ __u32 vlan_tci;
+ __u32 vlan_proto;
+ __u32 priority;
+ __u32 ingress_ifindex;
+ __u32 ifindex;
+ __u32 tc_index;
+ __u32 cb[5];
+ __u32 hash;
+ __u32 tc_classid;
+ __u32 data;
+ __u32 data_end;
+ __u32 napi_id;
+
+ /* Accessed by BPF_PROG_TYPE_sk_skb types from here to ... */
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ /* ... here. */
+
+ __u32 data_meta;
+};
+
+struct bpf_tunnel_key {
+ __u32 tunnel_id;
+ union {
+ __u32 remote_ipv4;
+ __u32 remote_ipv6[4];
+ };
+ __u8 tunnel_tos;
+ __u8 tunnel_ttl;
+ __u16 tunnel_ext; /* Padding, future use. */
+ __u32 tunnel_label;
+};
+
+/* user accessible mirror of in-kernel xfrm_state.
+ * new fields can only be added to the end of this structure
+ */
+struct bpf_xfrm_state {
+ __u32 reqid;
+ __u32 spi; /* Stored in network byte order */
+ __u16 family;
+ __u16 ext; /* Padding, future use. */
+ union {
+ __u32 remote_ipv4; /* Stored in network byte order */
+ __u32 remote_ipv6[4]; /* Stored in network byte order */
+ };
+};
+
+/* Generic BPF return codes which all BPF program types may support.
+ * The values are binary compatible with their TC_ACT_* counter-part to
+ * provide backwards compatibility with existing SCHED_CLS and SCHED_ACT
+ * programs.
+ *
+ * XDP is handled seprately, see XDP_*.
+ */
+enum bpf_ret_code {
+ BPF_OK = 0,
+ /* 1 reserved */
+ BPF_DROP = 2,
+ /* 3-6 reserved */
+ BPF_REDIRECT = 7,
+ /* >127 are reserved for prog type specific return codes */
+};
+
+struct bpf_sock {
+ __u32 bound_dev_if;
+ __u32 family;
+ __u32 type;
+ __u32 protocol;
+ __u32 mark;
+ __u32 priority;
+ __u32 src_ip4; /* Allows 1,2,4-byte read.
+ * Stored in network byte order.
+ */
+ __u32 src_ip6[4]; /* Allows 1,2,4-byte read.
+ * Stored in network byte order.
+ */
+ __u32 src_port; /* Allows 4-byte read.
+ * Stored in host byte order
+ */
+};
+
+#define XDP_PACKET_HEADROOM 256
+
+/* User return codes for XDP prog type.
+ * A valid XDP program must return one of these defined values. All other
+ * return codes are reserved for future use. Unknown return codes will
+ * result in packet drops and a warning via bpf_warn_invalid_xdp_action().
+ */
+enum xdp_action {
+ XDP_ABORTED = 0,
+ XDP_DROP,
+ XDP_PASS,
+ XDP_TX,
+ XDP_REDIRECT,
+};
+
+/* user accessible metadata for XDP packet hook
+ * new fields must be added to the end of this structure
+ */
+struct xdp_md {
+ __u32 data;
+ __u32 data_end;
+ __u32 data_meta;
+ /* Below access go through struct xdp_rxq_info */
+ __u32 ingress_ifindex; /* rxq->dev->ifindex */
+ __u32 rx_queue_index; /* rxq->queue_index */
+};
+
+enum sk_action {
+ SK_DROP = 0,
+ SK_PASS,
+};
+
+/* user accessible metadata for SK_MSG packet hook, new fields must
+ * be added to the end of this structure
+ */
+struct sk_msg_md {
+ void *data;
+ void *data_end;
+
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+};
+
+struct sk_reuseport_md {
+ /*
+ * Start of directly accessible data. It begins from
+ * the tcp/udp header.
+ */
+ void *data;
+ void *data_end; /* End of directly accessible data */
+ /*
+ * Total length of packet (starting from the tcp/udp header).
+ * Note that the directly accessible bytes (data_end - data)
+ * could be less than this "len". Those bytes could be
+ * indirectly read by a helper "bpf_skb_load_bytes()".
+ */
+ __u32 len;
+ /*
+ * Eth protocol in the mac header (network byte order). e.g.
+ * ETH_P_IP(0x0800) and ETH_P_IPV6(0x86DD)
+ */
+ __u32 eth_protocol;
+ __u32 ip_protocol; /* IP protocol. e.g. IPPROTO_TCP, IPPROTO_UDP */
+ __u32 bind_inany; /* Is sock bound to an INANY address? */
+ __u32 hash; /* A hash of the packet 4 tuples */
+};
+
+#define BPF_TAG_SIZE 8
+
+struct bpf_prog_info {
+ __u32 type;
+ __u32 id;
+ __u8 tag[BPF_TAG_SIZE];
+ __u32 jited_prog_len;
+ __u32 xlated_prog_len;
+ __aligned_u64 jited_prog_insns;
+ __aligned_u64 xlated_prog_insns;
+ __u64 load_time; /* ns since boottime */
+ __u32 created_by_uid;
+ __u32 nr_map_ids;
+ __aligned_u64 map_ids;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u32 gpl_compatible:1;
+ __u32 :31; /* alignment pad */
+ __u64 netns_dev;
+ __u64 netns_ino;
+ __u32 nr_jited_ksyms;
+ __u32 nr_jited_func_lens;
+ __aligned_u64 jited_ksyms;
+ __aligned_u64 jited_func_lens;
+} __attribute__((aligned(8)));
+
+struct bpf_map_info {
+ __u32 type;
+ __u32 id;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 map_flags;
+ char name[BPF_OBJ_NAME_LEN];
+ __u32 ifindex;
+ __u32 :32;
+ __u64 netns_dev;
+ __u64 netns_ino;
+ __u32 btf_id;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
+} __attribute__((aligned(8)));
+
+struct bpf_btf_info {
+ __aligned_u64 btf;
+ __u32 btf_size;
+ __u32 id;
+} __attribute__((aligned(8)));
+
+/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
+ * by user and intended to be used by socket (e.g. to bind to, depends on
+ * attach attach type).
+ */
+struct bpf_sock_addr {
+ __u32 user_family; /* Allows 4-byte read, but no write. */
+ __u32 user_ip4; /* Allows 1,2,4-byte read and 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 user_port; /* Allows 4-byte read and write.
+ * Stored in network byte order
+ */
+ __u32 family; /* Allows 4-byte read, but no write */
+ __u32 type; /* Allows 4-byte read, but no write */
+ __u32 protocol; /* Allows 4-byte read, but no write */
+ __u32 msg_src_ip4; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+ __u32 msg_src_ip6[4]; /* Allows 1,2,4-byte read an 4-byte write.
+ * Stored in network byte order.
+ */
+};
+
+/* User bpf_sock_ops struct to access socket values and specify request ops
+ * and their replies.
+ * Some of this fields are in network (bigendian) byte order and may need
+ * to be converted before use (bpf_ntohl() defined in samples/bpf/bpf_endian.h).
+ * New fields can only be added at the end of this structure
+ */
+struct bpf_sock_ops {
+ __u32 op;
+ union {
+ __u32 args[4]; /* Optionally passed to bpf program */
+ __u32 reply; /* Returned by bpf program */
+ __u32 replylong[4]; /* Optionally returned by bpf prog */
+ };
+ __u32 family;
+ __u32 remote_ip4; /* Stored in network byte order */
+ __u32 local_ip4; /* Stored in network byte order */
+ __u32 remote_ip6[4]; /* Stored in network byte order */
+ __u32 local_ip6[4]; /* Stored in network byte order */
+ __u32 remote_port; /* Stored in network byte order */
+ __u32 local_port; /* stored in host byte order */
+ __u32 is_fullsock; /* Some TCP fields are only valid if
+ * there is a full socket. If not, the
+ * fields read as zero.
+ */
+ __u32 snd_cwnd;
+ __u32 srtt_us; /* Averaged RTT << 3 in usecs */
+ __u32 bpf_sock_ops_cb_flags; /* flags defined in uapi/linux/tcp.h */
+ __u32 state;
+ __u32 rtt_min;
+ __u32 snd_ssthresh;
+ __u32 rcv_nxt;
+ __u32 snd_nxt;
+ __u32 snd_una;
+ __u32 mss_cache;
+ __u32 ecn_flags;
+ __u32 rate_delivered;
+ __u32 rate_interval_us;
+ __u32 packets_out;
+ __u32 retrans_out;
+ __u32 total_retrans;
+ __u32 segs_in;
+ __u32 data_segs_in;
+ __u32 segs_out;
+ __u32 data_segs_out;
+ __u32 lost_out;
+ __u32 sacked_out;
+ __u32 sk_txhash;
+ __u64 bytes_received;
+ __u64 bytes_acked;
+};
+
+/* Definitions for bpf_sock_ops_cb_flags */
+#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
+#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
+#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
+#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently
+ * supported cb flags
+ */
+
+/* List of known BPF sock_ops operators.
+ * New entries can only be added at the end
+ */
+enum {
+ BPF_SOCK_OPS_VOID,
+ BPF_SOCK_OPS_TIMEOUT_INIT, /* Should return SYN-RTO value to use or
+ * -1 if default value should be used
+ */
+ BPF_SOCK_OPS_RWND_INIT, /* Should return initial advertized
+ * window (in packets) or -1 if default
+ * value should be used
+ */
+ BPF_SOCK_OPS_TCP_CONNECT_CB, /* Calls BPF program right before an
+ * active connection is initialized
+ */
+ BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB, /* Calls BPF program when an
+ * active connection is
+ * established
+ */
+ BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB, /* Calls BPF program when a
+ * passive connection is
+ * established
+ */
+ BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
+ * needs ECN
+ */
+ BPF_SOCK_OPS_BASE_RTT, /* Get base RTT. The correct value is
+ * based on the path and may be
+ * dependent on the congestion control
+ * algorithm. In general it indicates
+ * a congestion threshold. RTTs above
+ * this indicate congestion
+ */
+ BPF_SOCK_OPS_RTO_CB, /* Called when an RTO has triggered.
+ * Arg1: value of icsk_retransmits
+ * Arg2: value of icsk_rto
+ * Arg3: whether RTO has expired
+ */
+ BPF_SOCK_OPS_RETRANS_CB, /* Called when skb is retransmitted.
+ * Arg1: sequence number of 1st byte
+ * Arg2: # segments
+ * Arg3: return value of
+ * tcp_transmit_skb (0 => success)
+ */
+ BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state.
+ * Arg1: old_state
+ * Arg2: new_state
+ */
+ BPF_SOCK_OPS_TCP_LISTEN_CB, /* Called on listen(2), right after
+ * socket transition to LISTEN state.
+ */
+};
+
+/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect
+ * changes between the TCP and BPF versions. Ideally this should never happen.
+ * If it does, we need to add code to convert them before calling
+ * the BPF sock_ops function.
+ */
+enum {
+ BPF_TCP_ESTABLISHED = 1,
+ BPF_TCP_SYN_SENT,
+ BPF_TCP_SYN_RECV,
+ BPF_TCP_FIN_WAIT1,
+ BPF_TCP_FIN_WAIT2,
+ BPF_TCP_TIME_WAIT,
+ BPF_TCP_CLOSE,
+ BPF_TCP_CLOSE_WAIT,
+ BPF_TCP_LAST_ACK,
+ BPF_TCP_LISTEN,
+ BPF_TCP_CLOSING, /* Now a valid state */
+ BPF_TCP_NEW_SYN_RECV,
+
+ BPF_TCP_MAX_STATES /* Leave at the end! */
+};
+
+#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
+#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+
+struct bpf_perf_event_value {
+ __u64 counter;
+ __u64 enabled;
+ __u64 running;
+};
+
+#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
+#define BPF_DEVCG_ACC_READ (1ULL << 1)
+#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
+
+#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
+#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
+
+struct bpf_cgroup_dev_ctx {
+ /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
+ __u32 access_type;
+ __u32 major;
+ __u32 minor;
+};
+
+struct bpf_raw_tracepoint_args {
+ __u64 args[0];
+};
+
+/* DIRECT: Skip the FIB rules and go to FIB table associated with device
+ * OUTPUT: Do lookup from egress perspective; default is ingress
+ */
+#define BPF_FIB_LOOKUP_DIRECT BIT(0)
+#define BPF_FIB_LOOKUP_OUTPUT BIT(1)
+
+enum {
+ BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
+ BPF_FIB_LKUP_RET_BLACKHOLE, /* dest is blackholed; can be dropped */
+ BPF_FIB_LKUP_RET_UNREACHABLE, /* dest is unreachable; can be dropped */
+ BPF_FIB_LKUP_RET_PROHIBIT, /* dest not allowed; can be dropped */
+ BPF_FIB_LKUP_RET_NOT_FWDED, /* packet is not forwarded */
+ BPF_FIB_LKUP_RET_FWD_DISABLED, /* fwding is not enabled on ingress */
+ BPF_FIB_LKUP_RET_UNSUPP_LWT, /* fwd requires encapsulation */
+ BPF_FIB_LKUP_RET_NO_NEIGH, /* no neighbor entry for nh */
+ BPF_FIB_LKUP_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+};
+
+struct bpf_fib_lookup {
+ /* input: network family for lookup (AF_INET, AF_INET6)
+ * output: network family of egress nexthop
+ */
+ __u8 family;
+
+ /* set if lookup is to consider L4 data - e.g., FIB rules */
+ __u8 l4_protocol;
+ __be16 sport;
+ __be16 dport;
+
+ /* total length of packet from network header - used for MTU check */
+ __u16 tot_len;
+
+ /* input: L3 device index for lookup
+ * output: device index from FIB lookup
+ */
+ __u32 ifindex;
+
+ union {
+ /* inputs to lookup */
+ __u8 tos; /* AF_INET */
+ __be32 flowinfo; /* AF_INET6, flow_label + priority */
+
+ /* output: metric of fib result (IPv4/IPv6 only) */
+ __u32 rt_metric;
+ };
+
+ union {
+ __be32 ipv4_src;
+ __u32 ipv6_src[4]; /* in6_addr; network order */
+ };
+
+ /* input to bpf_fib_lookup, ipv{4,6}_dst is destination address in
+ * network header. output: bpf_fib_lookup sets to gateway address
+ * if FIB lookup returns gateway route
+ */
+ union {
+ __be32 ipv4_dst;
+ __u32 ipv6_dst[4]; /* in6_addr; network order */
+ };
+
+ /* output */
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __u8 smac[6]; /* ETH_ALEN */
+ __u8 dmac[6]; /* ETH_ALEN */
+};
+
+enum bpf_task_fd_type {
+ BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_TRACEPOINT, /* tp name */
+ BPF_FD_TYPE_KPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_KRETPROBE, /* (symbol + offset) or addr */
+ BPF_FD_TYPE_UPROBE, /* filename + offset */
+ BPF_FD_TYPE_URETPROBE, /* filename + offset */
+};
+
+#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/bpf_common.h b/tools/include/uapi/linux/bpf_common.h
new file mode 100644
index 000000000..ee97668bd
--- /dev/null
+++ b/tools/include/uapi/linux/bpf_common.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_BPF_COMMON_H__
+#define _UAPI__LINUX_BPF_COMMON_H__
+
+/* Instruction classes */
+#define BPF_CLASS(code) ((code) & 0x07)
+#define BPF_LD 0x00
+#define BPF_LDX 0x01
+#define BPF_ST 0x02
+#define BPF_STX 0x03
+#define BPF_ALU 0x04
+#define BPF_JMP 0x05
+#define BPF_RET 0x06
+#define BPF_MISC 0x07
+
+/* ld/ldx fields */
+#define BPF_SIZE(code) ((code) & 0x18)
+#define BPF_W 0x00 /* 32-bit */
+#define BPF_H 0x08 /* 16-bit */
+#define BPF_B 0x10 /* 8-bit */
+/* eBPF BPF_DW 0x18 64-bit */
+#define BPF_MODE(code) ((code) & 0xe0)
+#define BPF_IMM 0x00
+#define BPF_ABS 0x20
+#define BPF_IND 0x40
+#define BPF_MEM 0x60
+#define BPF_LEN 0x80
+#define BPF_MSH 0xa0
+
+/* alu/jmp fields */
+#define BPF_OP(code) ((code) & 0xf0)
+#define BPF_ADD 0x00
+#define BPF_SUB 0x10
+#define BPF_MUL 0x20
+#define BPF_DIV 0x30
+#define BPF_OR 0x40
+#define BPF_AND 0x50
+#define BPF_LSH 0x60
+#define BPF_RSH 0x70
+#define BPF_NEG 0x80
+#define BPF_MOD 0x90
+#define BPF_XOR 0xa0
+
+#define BPF_JA 0x00
+#define BPF_JEQ 0x10
+#define BPF_JGT 0x20
+#define BPF_JGE 0x30
+#define BPF_JSET 0x40
+#define BPF_SRC(code) ((code) & 0x08)
+#define BPF_K 0x00
+#define BPF_X 0x08
+
+#ifndef BPF_MAXINSNS
+#define BPF_MAXINSNS 4096
+#endif
+
+#endif /* _UAPI__LINUX_BPF_COMMON_H__ */
diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h
new file mode 100644
index 000000000..8f95303f9
--- /dev/null
+++ b/tools/include/uapi/linux/bpf_perf_event.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__
+#define _UAPI__LINUX_BPF_PERF_EVENT_H__
+
+#include <asm/bpf_perf_event.h>
+
+struct bpf_perf_event_data {
+ bpf_user_pt_regs_t regs;
+ __u64 sample_period;
+};
+
+#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
new file mode 100644
index 000000000..972265f32
--- /dev/null
+++ b/tools/include/uapi/linux/btf.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2018 Facebook */
+#ifndef _UAPI__LINUX_BTF_H__
+#define _UAPI__LINUX_BTF_H__
+
+#include <linux/types.h>
+
+#define BTF_MAGIC 0xeB9F
+#define BTF_VERSION 1
+
+struct btf_header {
+ __u16 magic;
+ __u8 version;
+ __u8 flags;
+ __u32 hdr_len;
+
+ /* All offsets are in bytes relative to the end of this header */
+ __u32 type_off; /* offset of type section */
+ __u32 type_len; /* length of type section */
+ __u32 str_off; /* offset of string section */
+ __u32 str_len; /* length of string section */
+};
+
+/* Max # of type identifier */
+#define BTF_MAX_TYPE 0x0000ffff
+/* Max offset into the string section */
+#define BTF_MAX_NAME_OFFSET 0x0000ffff
+/* Max # of struct/union/enum members or func args */
+#define BTF_MAX_VLEN 0xffff
+
+struct btf_type {
+ __u32 name_off;
+ /* "info" bits arrangement
+ * bits 0-15: vlen (e.g. # of struct's members)
+ * bits 16-23: unused
+ * bits 24-27: kind (e.g. int, ptr, array...etc)
+ * bits 28-31: unused
+ */
+ __u32 info;
+ /* "size" is used by INT, ENUM, STRUCT and UNION.
+ * "size" tells the size of the type it is describing.
+ *
+ * "type" is used by PTR, TYPEDEF, VOLATILE, CONST and RESTRICT.
+ * "type" is a type_id referring to another type.
+ */
+ union {
+ __u32 size;
+ __u32 type;
+ };
+};
+
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
+#define BTF_INFO_VLEN(info) ((info) & 0xffff)
+
+#define BTF_KIND_UNKN 0 /* Unknown */
+#define BTF_KIND_INT 1 /* Integer */
+#define BTF_KIND_PTR 2 /* Pointer */
+#define BTF_KIND_ARRAY 3 /* Array */
+#define BTF_KIND_STRUCT 4 /* Struct */
+#define BTF_KIND_UNION 5 /* Union */
+#define BTF_KIND_ENUM 6 /* Enumeration */
+#define BTF_KIND_FWD 7 /* Forward */
+#define BTF_KIND_TYPEDEF 8 /* Typedef */
+#define BTF_KIND_VOLATILE 9 /* Volatile */
+#define BTF_KIND_CONST 10 /* Const */
+#define BTF_KIND_RESTRICT 11 /* Restrict */
+#define BTF_KIND_MAX 11
+#define NR_BTF_KINDS 12
+
+/* For some specific BTF_KIND, "struct btf_type" is immediately
+ * followed by extra data.
+ */
+
+/* BTF_KIND_INT is followed by a u32 and the following
+ * is the 32 bits arrangement:
+ */
+#define BTF_INT_ENCODING(VAL) (((VAL) & 0x0f000000) >> 24)
+#define BTF_INT_OFFSET(VAL) (((VAL & 0x00ff0000)) >> 16)
+#define BTF_INT_BITS(VAL) ((VAL) & 0x000000ff)
+
+/* Attributes stored in the BTF_INT_ENCODING */
+#define BTF_INT_SIGNED (1 << 0)
+#define BTF_INT_CHAR (1 << 1)
+#define BTF_INT_BOOL (1 << 2)
+
+/* BTF_KIND_ENUM is followed by multiple "struct btf_enum".
+ * The exact number of btf_enum is stored in the vlen (of the
+ * info in "struct btf_type").
+ */
+struct btf_enum {
+ __u32 name_off;
+ __s32 val;
+};
+
+/* BTF_KIND_ARRAY is followed by one "struct btf_array" */
+struct btf_array {
+ __u32 type;
+ __u32 index_type;
+ __u32 nelems;
+};
+
+/* BTF_KIND_STRUCT and BTF_KIND_UNION are followed
+ * by multiple "struct btf_member". The exact number
+ * of btf_member is stored in the vlen (of the info in
+ * "struct btf_type").
+ */
+struct btf_member {
+ __u32 name_off;
+ __u32 type;
+ __u32 offset; /* offset in bits */
+};
+
+#endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/tools/include/uapi/linux/erspan.h b/tools/include/uapi/linux/erspan.h
new file mode 100644
index 000000000..841573019
--- /dev/null
+++ b/tools/include/uapi/linux/erspan.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * ERSPAN Tunnel Metadata
+ *
+ * Copyright (c) 2018 VMware
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * Userspace API for metadata mode ERSPAN tunnel
+ */
+#ifndef _UAPI_ERSPAN_H
+#define _UAPI_ERSPAN_H
+
+#include <linux/types.h> /* For __beXX in userspace */
+#include <asm/byteorder.h>
+
+/* ERSPAN version 2 metadata header */
+struct erspan_md2 {
+ __be32 timestamp;
+ __be16 sgt; /* security group tag */
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u8 hwid_upper:2,
+ ft:5,
+ p:1;
+ __u8 o:1,
+ gra:2,
+ dir:1,
+ hwid:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 p:1,
+ ft:5,
+ hwid_upper:2;
+ __u8 hwid:4,
+ dir:1,
+ gra:2,
+ o:1;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+};
+
+struct erspan_metadata {
+ int version;
+ union {
+ __be32 index; /* Version 1 (type II)*/
+ struct erspan_md2 md2; /* Version 2 (type III) */
+ } u;
+};
+
+#endif /* _UAPI_ERSPAN_H */
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
new file mode 100644
index 000000000..6448cdd9a
--- /dev/null
+++ b/tools/include/uapi/linux/fcntl.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_FCNTL_H
+#define _UAPI_LINUX_FCNTL_H
+
+#include <asm/fcntl.h>
+
+#define F_SETLEASE (F_LINUX_SPECIFIC_BASE + 0)
+#define F_GETLEASE (F_LINUX_SPECIFIC_BASE + 1)
+
+/*
+ * Cancel a blocking posix lock; internal use only until we expose an
+ * asynchronous lock api to userspace:
+ */
+#define F_CANCELLK (F_LINUX_SPECIFIC_BASE + 5)
+
+/* Create a file descriptor with FD_CLOEXEC set. */
+#define F_DUPFD_CLOEXEC (F_LINUX_SPECIFIC_BASE + 6)
+
+/*
+ * Request nofications on a directory.
+ * See below for events that may be notified.
+ */
+#define F_NOTIFY (F_LINUX_SPECIFIC_BASE+2)
+
+/*
+ * Set and get of pipe page size array
+ */
+#define F_SETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 7)
+#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
+
+/*
+ * Set/Get seals
+ */
+#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+
+/*
+ * Types of seals
+ */
+#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
+#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
+#define F_SEAL_GROW 0x0004 /* prevent file from growing */
+#define F_SEAL_WRITE 0x0008 /* prevent writes */
+/* (1U << 31) is reserved for signed error codes */
+
+/*
+ * Set/Get write life time hints. {GET,SET}_RW_HINT operate on the
+ * underlying inode, while {GET,SET}_FILE_RW_HINT operate only on
+ * the specific file.
+ */
+#define F_GET_RW_HINT (F_LINUX_SPECIFIC_BASE + 11)
+#define F_SET_RW_HINT (F_LINUX_SPECIFIC_BASE + 12)
+#define F_GET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 13)
+#define F_SET_FILE_RW_HINT (F_LINUX_SPECIFIC_BASE + 14)
+
+/*
+ * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be
+ * used to clear any hints previously set.
+ */
+#define RWF_WRITE_LIFE_NOT_SET 0
+#define RWH_WRITE_LIFE_NONE 1
+#define RWH_WRITE_LIFE_SHORT 2
+#define RWH_WRITE_LIFE_MEDIUM 3
+#define RWH_WRITE_LIFE_LONG 4
+#define RWH_WRITE_LIFE_EXTREME 5
+
+/*
+ * Types of directory notifications that may be requested.
+ */
+#define DN_ACCESS 0x00000001 /* File accessed */
+#define DN_MODIFY 0x00000002 /* File modified */
+#define DN_CREATE 0x00000004 /* File created */
+#define DN_DELETE 0x00000008 /* File removed */
+#define DN_RENAME 0x00000010 /* File renamed */
+#define DN_ATTRIB 0x00000020 /* File changed attibutes */
+#define DN_MULTISHOT 0x80000000 /* Don't remove notifier */
+
+#define AT_FDCWD -100 /* Special value used to indicate
+ openat should use the current
+ working directory. */
+#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */
+#define AT_REMOVEDIR 0x200 /* Remove directory instead of
+ unlinking file. */
+#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */
+#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */
+#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */
+
+#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */
+#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */
+#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */
+#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */
+
+
+#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/tools/include/uapi/linux/hw_breakpoint.h b/tools/include/uapi/linux/hw_breakpoint.h
new file mode 100644
index 000000000..965e4d860
--- /dev/null
+++ b/tools/include/uapi/linux/hw_breakpoint.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_HW_BREAKPOINT_H
+#define _UAPI_LINUX_HW_BREAKPOINT_H
+
+enum {
+ HW_BREAKPOINT_LEN_1 = 1,
+ HW_BREAKPOINT_LEN_2 = 2,
+ HW_BREAKPOINT_LEN_3 = 3,
+ HW_BREAKPOINT_LEN_4 = 4,
+ HW_BREAKPOINT_LEN_5 = 5,
+ HW_BREAKPOINT_LEN_6 = 6,
+ HW_BREAKPOINT_LEN_7 = 7,
+ HW_BREAKPOINT_LEN_8 = 8,
+};
+
+enum {
+ HW_BREAKPOINT_EMPTY = 0,
+ HW_BREAKPOINT_R = 1,
+ HW_BREAKPOINT_W = 2,
+ HW_BREAKPOINT_RW = HW_BREAKPOINT_R | HW_BREAKPOINT_W,
+ HW_BREAKPOINT_X = 4,
+ HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X,
+};
+
+enum bp_type_idx {
+ TYPE_INST = 0,
+#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS
+ TYPE_DATA = 0,
+#else
+ TYPE_DATA = 1,
+#endif
+ TYPE_MAX
+};
+
+#endif /* _UAPI_LINUX_HW_BREAKPOINT_H */
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
new file mode 100644
index 000000000..43391e2d1
--- /dev/null
+++ b/tools/include/uapi/linux/if_link.h
@@ -0,0 +1,1002 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_IF_LINK_H
+#define _UAPI_LINUX_IF_LINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+/* This struct should be in sync with struct rtnl_link_stats64 */
+struct rtnl_link_stats {
+ __u32 rx_packets; /* total packets received */
+ __u32 tx_packets; /* total packets transmitted */
+ __u32 rx_bytes; /* total bytes received */
+ __u32 tx_bytes; /* total bytes transmitted */
+ __u32 rx_errors; /* bad packets received */
+ __u32 tx_errors; /* packet transmit problems */
+ __u32 rx_dropped; /* no space in linux buffers */
+ __u32 tx_dropped; /* no space available in linux */
+ __u32 multicast; /* multicast packets received */
+ __u32 collisions;
+
+ /* detailed rx_errors: */
+ __u32 rx_length_errors;
+ __u32 rx_over_errors; /* receiver ring buff overflow */
+ __u32 rx_crc_errors; /* recved pkt with crc error */
+ __u32 rx_frame_errors; /* recv'd frame alignment error */
+ __u32 rx_fifo_errors; /* recv'r fifo overrun */
+ __u32 rx_missed_errors; /* receiver missed packet */
+
+ /* detailed tx_errors */
+ __u32 tx_aborted_errors;
+ __u32 tx_carrier_errors;
+ __u32 tx_fifo_errors;
+ __u32 tx_heartbeat_errors;
+ __u32 tx_window_errors;
+
+ /* for cslip etc */
+ __u32 rx_compressed;
+ __u32 tx_compressed;
+
+ __u32 rx_nohandler; /* dropped, no handler found */
+};
+
+/* The main device statistics structure */
+struct rtnl_link_stats64 {
+ __u64 rx_packets; /* total packets received */
+ __u64 tx_packets; /* total packets transmitted */
+ __u64 rx_bytes; /* total bytes received */
+ __u64 tx_bytes; /* total bytes transmitted */
+ __u64 rx_errors; /* bad packets received */
+ __u64 tx_errors; /* packet transmit problems */
+ __u64 rx_dropped; /* no space in linux buffers */
+ __u64 tx_dropped; /* no space available in linux */
+ __u64 multicast; /* multicast packets received */
+ __u64 collisions;
+
+ /* detailed rx_errors: */
+ __u64 rx_length_errors;
+ __u64 rx_over_errors; /* receiver ring buff overflow */
+ __u64 rx_crc_errors; /* recved pkt with crc error */
+ __u64 rx_frame_errors; /* recv'd frame alignment error */
+ __u64 rx_fifo_errors; /* recv'r fifo overrun */
+ __u64 rx_missed_errors; /* receiver missed packet */
+
+ /* detailed tx_errors */
+ __u64 tx_aborted_errors;
+ __u64 tx_carrier_errors;
+ __u64 tx_fifo_errors;
+ __u64 tx_heartbeat_errors;
+ __u64 tx_window_errors;
+
+ /* for cslip etc */
+ __u64 rx_compressed;
+ __u64 tx_compressed;
+
+ __u64 rx_nohandler; /* dropped, no handler found */
+};
+
+/* The struct should be in sync with struct ifmap */
+struct rtnl_link_ifmap {
+ __u64 mem_start;
+ __u64 mem_end;
+ __u64 base_addr;
+ __u16 irq;
+ __u8 dma;
+ __u8 port;
+};
+
+/*
+ * IFLA_AF_SPEC
+ * Contains nested attributes for address family specific attributes.
+ * Each address family may create a attribute with the address family
+ * number as type and create its own attribute structure in it.
+ *
+ * Example:
+ * [IFLA_AF_SPEC] = {
+ * [AF_INET] = {
+ * [IFLA_INET_CONF] = ...,
+ * },
+ * [AF_INET6] = {
+ * [IFLA_INET6_FLAGS] = ...,
+ * [IFLA_INET6_CONF] = ...,
+ * }
+ * }
+ */
+
+enum {
+ IFLA_UNSPEC,
+ IFLA_ADDRESS,
+ IFLA_BROADCAST,
+ IFLA_IFNAME,
+ IFLA_MTU,
+ IFLA_LINK,
+ IFLA_QDISC,
+ IFLA_STATS,
+ IFLA_COST,
+#define IFLA_COST IFLA_COST
+ IFLA_PRIORITY,
+#define IFLA_PRIORITY IFLA_PRIORITY
+ IFLA_MASTER,
+#define IFLA_MASTER IFLA_MASTER
+ IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */
+#define IFLA_WIRELESS IFLA_WIRELESS
+ IFLA_PROTINFO, /* Protocol specific information for a link */
+#define IFLA_PROTINFO IFLA_PROTINFO
+ IFLA_TXQLEN,
+#define IFLA_TXQLEN IFLA_TXQLEN
+ IFLA_MAP,
+#define IFLA_MAP IFLA_MAP
+ IFLA_WEIGHT,
+#define IFLA_WEIGHT IFLA_WEIGHT
+ IFLA_OPERSTATE,
+ IFLA_LINKMODE,
+ IFLA_LINKINFO,
+#define IFLA_LINKINFO IFLA_LINKINFO
+ IFLA_NET_NS_PID,
+ IFLA_IFALIAS,
+ IFLA_NUM_VF, /* Number of VFs if device is SR-IOV PF */
+ IFLA_VFINFO_LIST,
+ IFLA_STATS64,
+ IFLA_VF_PORTS,
+ IFLA_PORT_SELF,
+ IFLA_AF_SPEC,
+ IFLA_GROUP, /* Group the device belongs to */
+ IFLA_NET_NS_FD,
+ IFLA_EXT_MASK, /* Extended info mask, VFs, etc */
+ IFLA_PROMISCUITY, /* Promiscuity count: > 0 means acts PROMISC */
+#define IFLA_PROMISCUITY IFLA_PROMISCUITY
+ IFLA_NUM_TX_QUEUES,
+ IFLA_NUM_RX_QUEUES,
+ IFLA_CARRIER,
+ IFLA_PHYS_PORT_ID,
+ IFLA_CARRIER_CHANGES,
+ IFLA_PHYS_SWITCH_ID,
+ IFLA_LINK_NETNSID,
+ IFLA_PHYS_PORT_NAME,
+ IFLA_PROTO_DOWN,
+ IFLA_GSO_MAX_SEGS,
+ IFLA_GSO_MAX_SIZE,
+ IFLA_PAD,
+ IFLA_XDP,
+ IFLA_EVENT,
+ IFLA_NEW_NETNSID,
+ IFLA_IF_NETNSID,
+ IFLA_CARRIER_UP_COUNT,
+ IFLA_CARRIER_DOWN_COUNT,
+ IFLA_NEW_IFINDEX,
+ IFLA_MIN_MTU,
+ IFLA_MAX_MTU,
+ __IFLA_MAX
+};
+
+
+#define IFLA_MAX (__IFLA_MAX - 1)
+
+/* backwards compatibility for userspace */
+#ifndef __KERNEL__
+#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg))))
+#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg))
+#endif
+
+enum {
+ IFLA_INET_UNSPEC,
+ IFLA_INET_CONF,
+ __IFLA_INET_MAX,
+};
+
+#define IFLA_INET_MAX (__IFLA_INET_MAX - 1)
+
+/* ifi_flags.
+
+ IFF_* flags.
+
+ The only change is:
+ IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are
+ more not changeable by user. They describe link media
+ characteristics and set by device driver.
+
+ Comments:
+ - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid
+ - If neither of these three flags are set;
+ the interface is NBMA.
+
+ - IFF_MULTICAST does not mean anything special:
+ multicasts can be used on all not-NBMA links.
+ IFF_MULTICAST means that this media uses special encapsulation
+ for multicast frames. Apparently, all IFF_POINTOPOINT and
+ IFF_BROADCAST devices are able to use multicasts too.
+ */
+
+/* IFLA_LINK.
+ For usual devices it is equal ifi_index.
+ If it is a "virtual interface" (f.e. tunnel), ifi_link
+ can point to real physical interface (f.e. for bandwidth calculations),
+ or maybe 0, what means, that real media is unknown (usual
+ for IPIP tunnels, when route to endpoint is allowed to change)
+ */
+
+/* Subtype attributes for IFLA_PROTINFO */
+enum {
+ IFLA_INET6_UNSPEC,
+ IFLA_INET6_FLAGS, /* link flags */
+ IFLA_INET6_CONF, /* sysctl parameters */
+ IFLA_INET6_STATS, /* statistics */
+ IFLA_INET6_MCAST, /* MC things. What of them? */
+ IFLA_INET6_CACHEINFO, /* time values and max reasm size */
+ IFLA_INET6_ICMP6STATS, /* statistics (icmpv6) */
+ IFLA_INET6_TOKEN, /* device token */
+ IFLA_INET6_ADDR_GEN_MODE, /* implicit address generator mode */
+ __IFLA_INET6_MAX
+};
+
+#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1)
+
+enum in6_addr_gen_mode {
+ IN6_ADDR_GEN_MODE_EUI64,
+ IN6_ADDR_GEN_MODE_NONE,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
+ IN6_ADDR_GEN_MODE_RANDOM,
+};
+
+/* Bridge section */
+
+enum {
+ IFLA_BR_UNSPEC,
+ IFLA_BR_FORWARD_DELAY,
+ IFLA_BR_HELLO_TIME,
+ IFLA_BR_MAX_AGE,
+ IFLA_BR_AGEING_TIME,
+ IFLA_BR_STP_STATE,
+ IFLA_BR_PRIORITY,
+ IFLA_BR_VLAN_FILTERING,
+ IFLA_BR_VLAN_PROTOCOL,
+ IFLA_BR_GROUP_FWD_MASK,
+ IFLA_BR_ROOT_ID,
+ IFLA_BR_BRIDGE_ID,
+ IFLA_BR_ROOT_PORT,
+ IFLA_BR_ROOT_PATH_COST,
+ IFLA_BR_TOPOLOGY_CHANGE,
+ IFLA_BR_TOPOLOGY_CHANGE_DETECTED,
+ IFLA_BR_HELLO_TIMER,
+ IFLA_BR_TCN_TIMER,
+ IFLA_BR_TOPOLOGY_CHANGE_TIMER,
+ IFLA_BR_GC_TIMER,
+ IFLA_BR_GROUP_ADDR,
+ IFLA_BR_FDB_FLUSH,
+ IFLA_BR_MCAST_ROUTER,
+ IFLA_BR_MCAST_SNOOPING,
+ IFLA_BR_MCAST_QUERY_USE_IFADDR,
+ IFLA_BR_MCAST_QUERIER,
+ IFLA_BR_MCAST_HASH_ELASTICITY,
+ IFLA_BR_MCAST_HASH_MAX,
+ IFLA_BR_MCAST_LAST_MEMBER_CNT,
+ IFLA_BR_MCAST_STARTUP_QUERY_CNT,
+ IFLA_BR_MCAST_LAST_MEMBER_INTVL,
+ IFLA_BR_MCAST_MEMBERSHIP_INTVL,
+ IFLA_BR_MCAST_QUERIER_INTVL,
+ IFLA_BR_MCAST_QUERY_INTVL,
+ IFLA_BR_MCAST_QUERY_RESPONSE_INTVL,
+ IFLA_BR_MCAST_STARTUP_QUERY_INTVL,
+ IFLA_BR_NF_CALL_IPTABLES,
+ IFLA_BR_NF_CALL_IP6TABLES,
+ IFLA_BR_NF_CALL_ARPTABLES,
+ IFLA_BR_VLAN_DEFAULT_PVID,
+ IFLA_BR_PAD,
+ IFLA_BR_VLAN_STATS_ENABLED,
+ IFLA_BR_MCAST_STATS_ENABLED,
+ IFLA_BR_MCAST_IGMP_VERSION,
+ IFLA_BR_MCAST_MLD_VERSION,
+ __IFLA_BR_MAX,
+};
+
+#define IFLA_BR_MAX (__IFLA_BR_MAX - 1)
+
+struct ifla_bridge_id {
+ __u8 prio[2];
+ __u8 addr[6]; /* ETH_ALEN */
+};
+
+enum {
+ BRIDGE_MODE_UNSPEC,
+ BRIDGE_MODE_HAIRPIN,
+};
+
+enum {
+ IFLA_BRPORT_UNSPEC,
+ IFLA_BRPORT_STATE, /* Spanning tree state */
+ IFLA_BRPORT_PRIORITY, /* " priority */
+ IFLA_BRPORT_COST, /* " cost */
+ IFLA_BRPORT_MODE, /* mode (hairpin) */
+ IFLA_BRPORT_GUARD, /* bpdu guard */
+ IFLA_BRPORT_PROTECT, /* root port protection */
+ IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */
+ IFLA_BRPORT_LEARNING, /* mac learning */
+ IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
+ IFLA_BRPORT_PROXYARP, /* proxy ARP */
+ IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
+ IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
+ IFLA_BRPORT_ROOT_ID, /* designated root */
+ IFLA_BRPORT_BRIDGE_ID, /* designated bridge */
+ IFLA_BRPORT_DESIGNATED_PORT,
+ IFLA_BRPORT_DESIGNATED_COST,
+ IFLA_BRPORT_ID,
+ IFLA_BRPORT_NO,
+ IFLA_BRPORT_TOPOLOGY_CHANGE_ACK,
+ IFLA_BRPORT_CONFIG_PENDING,
+ IFLA_BRPORT_MESSAGE_AGE_TIMER,
+ IFLA_BRPORT_FORWARD_DELAY_TIMER,
+ IFLA_BRPORT_HOLD_TIMER,
+ IFLA_BRPORT_FLUSH,
+ IFLA_BRPORT_MULTICAST_ROUTER,
+ IFLA_BRPORT_PAD,
+ IFLA_BRPORT_MCAST_FLOOD,
+ IFLA_BRPORT_MCAST_TO_UCAST,
+ IFLA_BRPORT_VLAN_TUNNEL,
+ IFLA_BRPORT_BCAST_FLOOD,
+ IFLA_BRPORT_GROUP_FWD_MASK,
+ IFLA_BRPORT_NEIGH_SUPPRESS,
+ IFLA_BRPORT_ISOLATED,
+ IFLA_BRPORT_BACKUP_PORT,
+ __IFLA_BRPORT_MAX
+};
+#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
+
+struct ifla_cacheinfo {
+ __u32 max_reasm_len;
+ __u32 tstamp; /* ipv6InterfaceTable updated timestamp */
+ __u32 reachable_time;
+ __u32 retrans_time;
+};
+
+enum {
+ IFLA_INFO_UNSPEC,
+ IFLA_INFO_KIND,
+ IFLA_INFO_DATA,
+ IFLA_INFO_XSTATS,
+ IFLA_INFO_SLAVE_KIND,
+ IFLA_INFO_SLAVE_DATA,
+ __IFLA_INFO_MAX,
+};
+
+#define IFLA_INFO_MAX (__IFLA_INFO_MAX - 1)
+
+/* VLAN section */
+
+enum {
+ IFLA_VLAN_UNSPEC,
+ IFLA_VLAN_ID,
+ IFLA_VLAN_FLAGS,
+ IFLA_VLAN_EGRESS_QOS,
+ IFLA_VLAN_INGRESS_QOS,
+ IFLA_VLAN_PROTOCOL,
+ __IFLA_VLAN_MAX,
+};
+
+#define IFLA_VLAN_MAX (__IFLA_VLAN_MAX - 1)
+
+struct ifla_vlan_flags {
+ __u32 flags;
+ __u32 mask;
+};
+
+enum {
+ IFLA_VLAN_QOS_UNSPEC,
+ IFLA_VLAN_QOS_MAPPING,
+ __IFLA_VLAN_QOS_MAX
+};
+
+#define IFLA_VLAN_QOS_MAX (__IFLA_VLAN_QOS_MAX - 1)
+
+struct ifla_vlan_qos_mapping {
+ __u32 from;
+ __u32 to;
+};
+
+/* MACVLAN section */
+enum {
+ IFLA_MACVLAN_UNSPEC,
+ IFLA_MACVLAN_MODE,
+ IFLA_MACVLAN_FLAGS,
+ IFLA_MACVLAN_MACADDR_MODE,
+ IFLA_MACVLAN_MACADDR,
+ IFLA_MACVLAN_MACADDR_DATA,
+ IFLA_MACVLAN_MACADDR_COUNT,
+ __IFLA_MACVLAN_MAX,
+};
+
+#define IFLA_MACVLAN_MAX (__IFLA_MACVLAN_MAX - 1)
+
+enum macvlan_mode {
+ MACVLAN_MODE_PRIVATE = 1, /* don't talk to other macvlans */
+ MACVLAN_MODE_VEPA = 2, /* talk to other ports through ext bridge */
+ MACVLAN_MODE_BRIDGE = 4, /* talk to bridge ports directly */
+ MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */
+ MACVLAN_MODE_SOURCE = 16,/* use source MAC address list to assign */
+};
+
+enum macvlan_macaddr_mode {
+ MACVLAN_MACADDR_ADD,
+ MACVLAN_MACADDR_DEL,
+ MACVLAN_MACADDR_FLUSH,
+ MACVLAN_MACADDR_SET,
+};
+
+#define MACVLAN_FLAG_NOPROMISC 1
+
+/* VRF section */
+enum {
+ IFLA_VRF_UNSPEC,
+ IFLA_VRF_TABLE,
+ __IFLA_VRF_MAX
+};
+
+#define IFLA_VRF_MAX (__IFLA_VRF_MAX - 1)
+
+enum {
+ IFLA_VRF_PORT_UNSPEC,
+ IFLA_VRF_PORT_TABLE,
+ __IFLA_VRF_PORT_MAX
+};
+
+#define IFLA_VRF_PORT_MAX (__IFLA_VRF_PORT_MAX - 1)
+
+/* MACSEC section */
+enum {
+ IFLA_MACSEC_UNSPEC,
+ IFLA_MACSEC_SCI,
+ IFLA_MACSEC_PORT,
+ IFLA_MACSEC_ICV_LEN,
+ IFLA_MACSEC_CIPHER_SUITE,
+ IFLA_MACSEC_WINDOW,
+ IFLA_MACSEC_ENCODING_SA,
+ IFLA_MACSEC_ENCRYPT,
+ IFLA_MACSEC_PROTECT,
+ IFLA_MACSEC_INC_SCI,
+ IFLA_MACSEC_ES,
+ IFLA_MACSEC_SCB,
+ IFLA_MACSEC_REPLAY_PROTECT,
+ IFLA_MACSEC_VALIDATION,
+ IFLA_MACSEC_PAD,
+ __IFLA_MACSEC_MAX,
+};
+
+#define IFLA_MACSEC_MAX (__IFLA_MACSEC_MAX - 1)
+
+/* XFRM section */
+enum {
+ IFLA_XFRM_UNSPEC,
+ IFLA_XFRM_LINK,
+ IFLA_XFRM_IF_ID,
+ __IFLA_XFRM_MAX
+};
+
+#define IFLA_XFRM_MAX (__IFLA_XFRM_MAX - 1)
+
+enum macsec_validation_type {
+ MACSEC_VALIDATE_DISABLED = 0,
+ MACSEC_VALIDATE_CHECK = 1,
+ MACSEC_VALIDATE_STRICT = 2,
+ __MACSEC_VALIDATE_END,
+ MACSEC_VALIDATE_MAX = __MACSEC_VALIDATE_END - 1,
+};
+
+/* IPVLAN section */
+enum {
+ IFLA_IPVLAN_UNSPEC,
+ IFLA_IPVLAN_MODE,
+ IFLA_IPVLAN_FLAGS,
+ __IFLA_IPVLAN_MAX
+};
+
+#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1)
+
+enum ipvlan_mode {
+ IPVLAN_MODE_L2 = 0,
+ IPVLAN_MODE_L3,
+ IPVLAN_MODE_L3S,
+ IPVLAN_MODE_MAX
+};
+
+#define IPVLAN_F_PRIVATE 0x01
+#define IPVLAN_F_VEPA 0x02
+
+/* VXLAN section */
+enum {
+ IFLA_VXLAN_UNSPEC,
+ IFLA_VXLAN_ID,
+ IFLA_VXLAN_GROUP, /* group or remote address */
+ IFLA_VXLAN_LINK,
+ IFLA_VXLAN_LOCAL,
+ IFLA_VXLAN_TTL,
+ IFLA_VXLAN_TOS,
+ IFLA_VXLAN_LEARNING,
+ IFLA_VXLAN_AGEING,
+ IFLA_VXLAN_LIMIT,
+ IFLA_VXLAN_PORT_RANGE, /* source port */
+ IFLA_VXLAN_PROXY,
+ IFLA_VXLAN_RSC,
+ IFLA_VXLAN_L2MISS,
+ IFLA_VXLAN_L3MISS,
+ IFLA_VXLAN_PORT, /* destination port */
+ IFLA_VXLAN_GROUP6,
+ IFLA_VXLAN_LOCAL6,
+ IFLA_VXLAN_UDP_CSUM,
+ IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
+ IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+ IFLA_VXLAN_REMCSUM_TX,
+ IFLA_VXLAN_REMCSUM_RX,
+ IFLA_VXLAN_GBP,
+ IFLA_VXLAN_REMCSUM_NOPARTIAL,
+ IFLA_VXLAN_COLLECT_METADATA,
+ IFLA_VXLAN_LABEL,
+ IFLA_VXLAN_GPE,
+ IFLA_VXLAN_TTL_INHERIT,
+ __IFLA_VXLAN_MAX
+};
+#define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1)
+
+struct ifla_vxlan_port_range {
+ __be16 low;
+ __be16 high;
+};
+
+/* GENEVE section */
+enum {
+ IFLA_GENEVE_UNSPEC,
+ IFLA_GENEVE_ID,
+ IFLA_GENEVE_REMOTE,
+ IFLA_GENEVE_TTL,
+ IFLA_GENEVE_TOS,
+ IFLA_GENEVE_PORT, /* destination port */
+ IFLA_GENEVE_COLLECT_METADATA,
+ IFLA_GENEVE_REMOTE6,
+ IFLA_GENEVE_UDP_CSUM,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
+ IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
+ IFLA_GENEVE_LABEL,
+ __IFLA_GENEVE_MAX
+};
+#define IFLA_GENEVE_MAX (__IFLA_GENEVE_MAX - 1)
+
+/* PPP section */
+enum {
+ IFLA_PPP_UNSPEC,
+ IFLA_PPP_DEV_FD,
+ __IFLA_PPP_MAX
+};
+#define IFLA_PPP_MAX (__IFLA_PPP_MAX - 1)
+
+/* GTP section */
+
+enum ifla_gtp_role {
+ GTP_ROLE_GGSN = 0,
+ GTP_ROLE_SGSN,
+};
+
+enum {
+ IFLA_GTP_UNSPEC,
+ IFLA_GTP_FD0,
+ IFLA_GTP_FD1,
+ IFLA_GTP_PDP_HASHSIZE,
+ IFLA_GTP_ROLE,
+ __IFLA_GTP_MAX,
+};
+#define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1)
+
+/* Bonding section */
+
+enum {
+ IFLA_BOND_UNSPEC,
+ IFLA_BOND_MODE,
+ IFLA_BOND_ACTIVE_SLAVE,
+ IFLA_BOND_MIIMON,
+ IFLA_BOND_UPDELAY,
+ IFLA_BOND_DOWNDELAY,
+ IFLA_BOND_USE_CARRIER,
+ IFLA_BOND_ARP_INTERVAL,
+ IFLA_BOND_ARP_IP_TARGET,
+ IFLA_BOND_ARP_VALIDATE,
+ IFLA_BOND_ARP_ALL_TARGETS,
+ IFLA_BOND_PRIMARY,
+ IFLA_BOND_PRIMARY_RESELECT,
+ IFLA_BOND_FAIL_OVER_MAC,
+ IFLA_BOND_XMIT_HASH_POLICY,
+ IFLA_BOND_RESEND_IGMP,
+ IFLA_BOND_NUM_PEER_NOTIF,
+ IFLA_BOND_ALL_SLAVES_ACTIVE,
+ IFLA_BOND_MIN_LINKS,
+ IFLA_BOND_LP_INTERVAL,
+ IFLA_BOND_PACKETS_PER_SLAVE,
+ IFLA_BOND_AD_LACP_RATE,
+ IFLA_BOND_AD_SELECT,
+ IFLA_BOND_AD_INFO,
+ IFLA_BOND_AD_ACTOR_SYS_PRIO,
+ IFLA_BOND_AD_USER_PORT_KEY,
+ IFLA_BOND_AD_ACTOR_SYSTEM,
+ IFLA_BOND_TLB_DYNAMIC_LB,
+ __IFLA_BOND_MAX,
+};
+
+#define IFLA_BOND_MAX (__IFLA_BOND_MAX - 1)
+
+enum {
+ IFLA_BOND_AD_INFO_UNSPEC,
+ IFLA_BOND_AD_INFO_AGGREGATOR,
+ IFLA_BOND_AD_INFO_NUM_PORTS,
+ IFLA_BOND_AD_INFO_ACTOR_KEY,
+ IFLA_BOND_AD_INFO_PARTNER_KEY,
+ IFLA_BOND_AD_INFO_PARTNER_MAC,
+ __IFLA_BOND_AD_INFO_MAX,
+};
+
+#define IFLA_BOND_AD_INFO_MAX (__IFLA_BOND_AD_INFO_MAX - 1)
+
+enum {
+ IFLA_BOND_SLAVE_UNSPEC,
+ IFLA_BOND_SLAVE_STATE,
+ IFLA_BOND_SLAVE_MII_STATUS,
+ IFLA_BOND_SLAVE_LINK_FAILURE_COUNT,
+ IFLA_BOND_SLAVE_PERM_HWADDR,
+ IFLA_BOND_SLAVE_QUEUE_ID,
+ IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
+ IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
+ IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+ __IFLA_BOND_SLAVE_MAX,
+};
+
+#define IFLA_BOND_SLAVE_MAX (__IFLA_BOND_SLAVE_MAX - 1)
+
+/* SR-IOV virtual function management section */
+
+enum {
+ IFLA_VF_INFO_UNSPEC,
+ IFLA_VF_INFO,
+ __IFLA_VF_INFO_MAX,
+};
+
+#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
+
+enum {
+ IFLA_VF_UNSPEC,
+ IFLA_VF_MAC, /* Hardware queue specific attributes */
+ IFLA_VF_VLAN, /* VLAN ID and QoS */
+ IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */
+ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */
+ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */
+ IFLA_VF_RATE, /* Min and Max TX Bandwidth Allocation */
+ IFLA_VF_RSS_QUERY_EN, /* RSS Redirection Table and Hash Key query
+ * on/off switch
+ */
+ IFLA_VF_STATS, /* network device statistics */
+ IFLA_VF_TRUST, /* Trust VF */
+ IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */
+ IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */
+ IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */
+ __IFLA_VF_MAX,
+};
+
+#define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
+
+struct ifla_vf_mac {
+ __u32 vf;
+ __u8 mac[32]; /* MAX_ADDR_LEN */
+};
+
+struct ifla_vf_vlan {
+ __u32 vf;
+ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+ __u32 qos;
+};
+
+enum {
+ IFLA_VF_VLAN_INFO_UNSPEC,
+ IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */
+ __IFLA_VF_VLAN_INFO_MAX,
+};
+
+#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1)
+#define MAX_VLAN_LIST_LEN 1
+
+struct ifla_vf_vlan_info {
+ __u32 vf;
+ __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */
+ __u32 qos;
+ __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */
+};
+
+struct ifla_vf_tx_rate {
+ __u32 vf;
+ __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */
+};
+
+struct ifla_vf_rate {
+ __u32 vf;
+ __u32 min_tx_rate; /* Min Bandwidth in Mbps */
+ __u32 max_tx_rate; /* Max Bandwidth in Mbps */
+};
+
+struct ifla_vf_spoofchk {
+ __u32 vf;
+ __u32 setting;
+};
+
+struct ifla_vf_guid {
+ __u32 vf;
+ __u64 guid;
+};
+
+enum {
+ IFLA_VF_LINK_STATE_AUTO, /* link state of the uplink */
+ IFLA_VF_LINK_STATE_ENABLE, /* link always up */
+ IFLA_VF_LINK_STATE_DISABLE, /* link always down */
+ __IFLA_VF_LINK_STATE_MAX,
+};
+
+struct ifla_vf_link_state {
+ __u32 vf;
+ __u32 link_state;
+};
+
+struct ifla_vf_rss_query_en {
+ __u32 vf;
+ __u32 setting;
+};
+
+enum {
+ IFLA_VF_STATS_RX_PACKETS,
+ IFLA_VF_STATS_TX_PACKETS,
+ IFLA_VF_STATS_RX_BYTES,
+ IFLA_VF_STATS_TX_BYTES,
+ IFLA_VF_STATS_BROADCAST,
+ IFLA_VF_STATS_MULTICAST,
+ IFLA_VF_STATS_PAD,
+ IFLA_VF_STATS_RX_DROPPED,
+ IFLA_VF_STATS_TX_DROPPED,
+ __IFLA_VF_STATS_MAX,
+};
+
+#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1)
+
+struct ifla_vf_trust {
+ __u32 vf;
+ __u32 setting;
+};
+
+/* VF ports management section
+ *
+ * Nested layout of set/get msg is:
+ *
+ * [IFLA_NUM_VF]
+ * [IFLA_VF_PORTS]
+ * [IFLA_VF_PORT]
+ * [IFLA_PORT_*], ...
+ * [IFLA_VF_PORT]
+ * [IFLA_PORT_*], ...
+ * ...
+ * [IFLA_PORT_SELF]
+ * [IFLA_PORT_*], ...
+ */
+
+enum {
+ IFLA_VF_PORT_UNSPEC,
+ IFLA_VF_PORT, /* nest */
+ __IFLA_VF_PORT_MAX,
+};
+
+#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1)
+
+enum {
+ IFLA_PORT_UNSPEC,
+ IFLA_PORT_VF, /* __u32 */
+ IFLA_PORT_PROFILE, /* string */
+ IFLA_PORT_VSI_TYPE, /* 802.1Qbg (pre-)standard VDP */
+ IFLA_PORT_INSTANCE_UUID, /* binary UUID */
+ IFLA_PORT_HOST_UUID, /* binary UUID */
+ IFLA_PORT_REQUEST, /* __u8 */
+ IFLA_PORT_RESPONSE, /* __u16, output only */
+ __IFLA_PORT_MAX,
+};
+
+#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1)
+
+#define PORT_PROFILE_MAX 40
+#define PORT_UUID_MAX 16
+#define PORT_SELF_VF -1
+
+enum {
+ PORT_REQUEST_PREASSOCIATE = 0,
+ PORT_REQUEST_PREASSOCIATE_RR,
+ PORT_REQUEST_ASSOCIATE,
+ PORT_REQUEST_DISASSOCIATE,
+};
+
+enum {
+ PORT_VDP_RESPONSE_SUCCESS = 0,
+ PORT_VDP_RESPONSE_INVALID_FORMAT,
+ PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES,
+ PORT_VDP_RESPONSE_UNUSED_VTID,
+ PORT_VDP_RESPONSE_VTID_VIOLATION,
+ PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION,
+ PORT_VDP_RESPONSE_OUT_OF_SYNC,
+ /* 0x08-0xFF reserved for future VDP use */
+ PORT_PROFILE_RESPONSE_SUCCESS = 0x100,
+ PORT_PROFILE_RESPONSE_INPROGRESS,
+ PORT_PROFILE_RESPONSE_INVALID,
+ PORT_PROFILE_RESPONSE_BADSTATE,
+ PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES,
+ PORT_PROFILE_RESPONSE_ERROR,
+};
+
+struct ifla_port_vsi {
+ __u8 vsi_mgr_id;
+ __u8 vsi_type_id[3];
+ __u8 vsi_type_version;
+ __u8 pad[3];
+};
+
+
+/* IPoIB section */
+
+enum {
+ IFLA_IPOIB_UNSPEC,
+ IFLA_IPOIB_PKEY,
+ IFLA_IPOIB_MODE,
+ IFLA_IPOIB_UMCAST,
+ __IFLA_IPOIB_MAX
+};
+
+enum {
+ IPOIB_MODE_DATAGRAM = 0, /* using unreliable datagram QPs */
+ IPOIB_MODE_CONNECTED = 1, /* using connected QPs */
+};
+
+#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
+
+
+/* HSR section */
+
+enum {
+ IFLA_HSR_UNSPEC,
+ IFLA_HSR_SLAVE1,
+ IFLA_HSR_SLAVE2,
+ IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */
+ IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */
+ IFLA_HSR_SEQ_NR,
+ IFLA_HSR_VERSION, /* HSR version */
+ __IFLA_HSR_MAX,
+};
+
+#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
+
+/* STATS section */
+
+struct if_stats_msg {
+ __u8 family;
+ __u8 pad1;
+ __u16 pad2;
+ __u32 ifindex;
+ __u32 filter_mask;
+};
+
+/* A stats attribute can be netdev specific or a global stat.
+ * For netdev stats, lets use the prefix IFLA_STATS_LINK_*
+ */
+enum {
+ IFLA_STATS_UNSPEC, /* also used as 64bit pad attribute */
+ IFLA_STATS_LINK_64,
+ IFLA_STATS_LINK_XSTATS,
+ IFLA_STATS_LINK_XSTATS_SLAVE,
+ IFLA_STATS_LINK_OFFLOAD_XSTATS,
+ IFLA_STATS_AF_SPEC,
+ __IFLA_STATS_MAX,
+};
+
+#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
+
+#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR - 1))
+
+/* These are embedded into IFLA_STATS_LINK_XSTATS:
+ * [IFLA_STATS_LINK_XSTATS]
+ * -> [LINK_XSTATS_TYPE_xxx]
+ * -> [rtnl link type specific attributes]
+ */
+enum {
+ LINK_XSTATS_TYPE_UNSPEC,
+ LINK_XSTATS_TYPE_BRIDGE,
+ __LINK_XSTATS_TYPE_MAX
+};
+#define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1)
+
+/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */
+enum {
+ IFLA_OFFLOAD_XSTATS_UNSPEC,
+ IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */
+ __IFLA_OFFLOAD_XSTATS_MAX
+};
+#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1)
+
+/* XDP section */
+
+#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0)
+#define XDP_FLAGS_SKB_MODE (1U << 1)
+#define XDP_FLAGS_DRV_MODE (1U << 2)
+#define XDP_FLAGS_HW_MODE (1U << 3)
+#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \
+ XDP_FLAGS_DRV_MODE | \
+ XDP_FLAGS_HW_MODE)
+#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \
+ XDP_FLAGS_MODES)
+
+/* These are stored into IFLA_XDP_ATTACHED on dump. */
+enum {
+ XDP_ATTACHED_NONE = 0,
+ XDP_ATTACHED_DRV,
+ XDP_ATTACHED_SKB,
+ XDP_ATTACHED_HW,
+ XDP_ATTACHED_MULTI,
+};
+
+enum {
+ IFLA_XDP_UNSPEC,
+ IFLA_XDP_FD,
+ IFLA_XDP_ATTACHED,
+ IFLA_XDP_FLAGS,
+ IFLA_XDP_PROG_ID,
+ IFLA_XDP_DRV_PROG_ID,
+ IFLA_XDP_SKB_PROG_ID,
+ IFLA_XDP_HW_PROG_ID,
+ __IFLA_XDP_MAX,
+};
+
+#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1)
+
+enum {
+ IFLA_EVENT_NONE,
+ IFLA_EVENT_REBOOT, /* internal reset / reboot */
+ IFLA_EVENT_FEATURES, /* change in offload features */
+ IFLA_EVENT_BONDING_FAILOVER, /* change in active slave */
+ IFLA_EVENT_NOTIFY_PEERS, /* re-sent grat. arp/ndisc */
+ IFLA_EVENT_IGMP_RESEND, /* re-sent IGMP JOIN */
+ IFLA_EVENT_BONDING_OPTIONS, /* change in bonding options */
+};
+
+/* tun section */
+
+enum {
+ IFLA_TUN_UNSPEC,
+ IFLA_TUN_OWNER,
+ IFLA_TUN_GROUP,
+ IFLA_TUN_TYPE,
+ IFLA_TUN_PI,
+ IFLA_TUN_VNET_HDR,
+ IFLA_TUN_PERSIST,
+ IFLA_TUN_MULTI_QUEUE,
+ IFLA_TUN_NUM_QUEUES,
+ IFLA_TUN_NUM_DISABLED_QUEUES,
+ __IFLA_TUN_MAX,
+};
+
+#define IFLA_TUN_MAX (__IFLA_TUN_MAX - 1)
+
+/* rmnet section */
+
+#define RMNET_FLAGS_INGRESS_DEAGGREGATION (1U << 0)
+#define RMNET_FLAGS_INGRESS_MAP_COMMANDS (1U << 1)
+#define RMNET_FLAGS_INGRESS_MAP_CKSUMV4 (1U << 2)
+#define RMNET_FLAGS_EGRESS_MAP_CKSUMV4 (1U << 3)
+
+enum {
+ IFLA_RMNET_UNSPEC,
+ IFLA_RMNET_MUX_ID,
+ IFLA_RMNET_FLAGS,
+ __IFLA_RMNET_MAX,
+};
+
+#define IFLA_RMNET_MAX (__IFLA_RMNET_MAX - 1)
+
+struct ifla_rmnet_flags {
+ __u32 flags;
+ __u32 mask;
+};
+
+#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
new file mode 100644
index 000000000..2a66ab49f
--- /dev/null
+++ b/tools/include/uapi/linux/in.h
@@ -0,0 +1,304 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Definitions of the Internet Protocol.
+ *
+ * Version: @(#)in.h 1.0.1 04/21/93
+ *
+ * Authors: Original taken from the GNU Project <netinet/in.h> file.
+ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_LINUX_IN_H
+#define _UAPI_LINUX_IN_H
+
+#include <linux/types.h>
+#include <linux/libc-compat.h>
+#include <linux/socket.h>
+
+#if __UAPI_DEF_IN_IPPROTO
+/* Standard well-defined IP protocols. */
+enum {
+ IPPROTO_IP = 0, /* Dummy protocol for TCP */
+#define IPPROTO_IP IPPROTO_IP
+ IPPROTO_ICMP = 1, /* Internet Control Message Protocol */
+#define IPPROTO_ICMP IPPROTO_ICMP
+ IPPROTO_IGMP = 2, /* Internet Group Management Protocol */
+#define IPPROTO_IGMP IPPROTO_IGMP
+ IPPROTO_IPIP = 4, /* IPIP tunnels (older KA9Q tunnels use 94) */
+#define IPPROTO_IPIP IPPROTO_IPIP
+ IPPROTO_TCP = 6, /* Transmission Control Protocol */
+#define IPPROTO_TCP IPPROTO_TCP
+ IPPROTO_EGP = 8, /* Exterior Gateway Protocol */
+#define IPPROTO_EGP IPPROTO_EGP
+ IPPROTO_PUP = 12, /* PUP protocol */
+#define IPPROTO_PUP IPPROTO_PUP
+ IPPROTO_UDP = 17, /* User Datagram Protocol */
+#define IPPROTO_UDP IPPROTO_UDP
+ IPPROTO_IDP = 22, /* XNS IDP protocol */
+#define IPPROTO_IDP IPPROTO_IDP
+ IPPROTO_TP = 29, /* SO Transport Protocol Class 4 */
+#define IPPROTO_TP IPPROTO_TP
+ IPPROTO_DCCP = 33, /* Datagram Congestion Control Protocol */
+#define IPPROTO_DCCP IPPROTO_DCCP
+ IPPROTO_IPV6 = 41, /* IPv6-in-IPv4 tunnelling */
+#define IPPROTO_IPV6 IPPROTO_IPV6
+ IPPROTO_RSVP = 46, /* RSVP Protocol */
+#define IPPROTO_RSVP IPPROTO_RSVP
+ IPPROTO_GRE = 47, /* Cisco GRE tunnels (rfc 1701,1702) */
+#define IPPROTO_GRE IPPROTO_GRE
+ IPPROTO_ESP = 50, /* Encapsulation Security Payload protocol */
+#define IPPROTO_ESP IPPROTO_ESP
+ IPPROTO_AH = 51, /* Authentication Header protocol */
+#define IPPROTO_AH IPPROTO_AH
+ IPPROTO_MTP = 92, /* Multicast Transport Protocol */
+#define IPPROTO_MTP IPPROTO_MTP
+ IPPROTO_BEETPH = 94, /* IP option pseudo header for BEET */
+#define IPPROTO_BEETPH IPPROTO_BEETPH
+ IPPROTO_ENCAP = 98, /* Encapsulation Header */
+#define IPPROTO_ENCAP IPPROTO_ENCAP
+ IPPROTO_PIM = 103, /* Protocol Independent Multicast */
+#define IPPROTO_PIM IPPROTO_PIM
+ IPPROTO_COMP = 108, /* Compression Header Protocol */
+#define IPPROTO_COMP IPPROTO_COMP
+ IPPROTO_SCTP = 132, /* Stream Control Transport Protocol */
+#define IPPROTO_SCTP IPPROTO_SCTP
+ IPPROTO_UDPLITE = 136, /* UDP-Lite (RFC 3828) */
+#define IPPROTO_UDPLITE IPPROTO_UDPLITE
+ IPPROTO_MPLS = 137, /* MPLS in IP (RFC 4023) */
+#define IPPROTO_MPLS IPPROTO_MPLS
+ IPPROTO_RAW = 255, /* Raw IP packets */
+#define IPPROTO_RAW IPPROTO_RAW
+ IPPROTO_MAX
+};
+#endif
+
+#if __UAPI_DEF_IN_ADDR
+/* Internet address. */
+struct in_addr {
+ __be32 s_addr;
+};
+#endif
+
+#define IP_TOS 1
+#define IP_TTL 2
+#define IP_HDRINCL 3
+#define IP_OPTIONS 4
+#define IP_ROUTER_ALERT 5
+#define IP_RECVOPTS 6
+#define IP_RETOPTS 7
+#define IP_PKTINFO 8
+#define IP_PKTOPTIONS 9
+#define IP_MTU_DISCOVER 10
+#define IP_RECVERR 11
+#define IP_RECVTTL 12
+#define IP_RECVTOS 13
+#define IP_MTU 14
+#define IP_FREEBIND 15
+#define IP_IPSEC_POLICY 16
+#define IP_XFRM_POLICY 17
+#define IP_PASSSEC 18
+#define IP_TRANSPARENT 19
+
+/* BSD compatibility */
+#define IP_RECVRETOPTS IP_RETOPTS
+
+/* TProxy original addresses */
+#define IP_ORIGDSTADDR 20
+#define IP_RECVORIGDSTADDR IP_ORIGDSTADDR
+
+#define IP_MINTTL 21
+#define IP_NODEFRAG 22
+#define IP_CHECKSUM 23
+#define IP_BIND_ADDRESS_NO_PORT 24
+#define IP_RECVFRAGSIZE 25
+
+/* IP_MTU_DISCOVER values */
+#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
+#define IP_PMTUDISC_WANT 1 /* Use per route hints */
+#define IP_PMTUDISC_DO 2 /* Always DF */
+#define IP_PMTUDISC_PROBE 3 /* Ignore dst pmtu */
+/* Always use interface mtu (ignores dst pmtu) but don't set DF flag.
+ * Also incoming ICMP frag_needed notifications will be ignored on
+ * this socket to prevent accepting spoofed ones.
+ */
+#define IP_PMTUDISC_INTERFACE 4
+/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get
+ * fragmented if they exeed the interface mtu
+ */
+#define IP_PMTUDISC_OMIT 5
+
+#define IP_MULTICAST_IF 32
+#define IP_MULTICAST_TTL 33
+#define IP_MULTICAST_LOOP 34
+#define IP_ADD_MEMBERSHIP 35
+#define IP_DROP_MEMBERSHIP 36
+#define IP_UNBLOCK_SOURCE 37
+#define IP_BLOCK_SOURCE 38
+#define IP_ADD_SOURCE_MEMBERSHIP 39
+#define IP_DROP_SOURCE_MEMBERSHIP 40
+#define IP_MSFILTER 41
+#define MCAST_JOIN_GROUP 42
+#define MCAST_BLOCK_SOURCE 43
+#define MCAST_UNBLOCK_SOURCE 44
+#define MCAST_LEAVE_GROUP 45
+#define MCAST_JOIN_SOURCE_GROUP 46
+#define MCAST_LEAVE_SOURCE_GROUP 47
+#define MCAST_MSFILTER 48
+#define IP_MULTICAST_ALL 49
+#define IP_UNICAST_IF 50
+
+#define MCAST_EXCLUDE 0
+#define MCAST_INCLUDE 1
+
+/* These need to appear somewhere around here */
+#define IP_DEFAULT_MULTICAST_TTL 1
+#define IP_DEFAULT_MULTICAST_LOOP 1
+
+/* Request struct for multicast socket ops */
+
+#if __UAPI_DEF_IP_MREQ
+struct ip_mreq {
+ struct in_addr imr_multiaddr; /* IP multicast address of group */
+ struct in_addr imr_interface; /* local IP address of interface */
+};
+
+struct ip_mreqn {
+ struct in_addr imr_multiaddr; /* IP multicast address of group */
+ struct in_addr imr_address; /* local IP address of interface */
+ int imr_ifindex; /* Interface index */
+};
+
+struct ip_mreq_source {
+ __be32 imr_multiaddr;
+ __be32 imr_interface;
+ __be32 imr_sourceaddr;
+};
+
+struct ip_msfilter {
+ __be32 imsf_multiaddr;
+ __be32 imsf_interface;
+ __u32 imsf_fmode;
+ __u32 imsf_numsrc;
+ __be32 imsf_slist[1];
+};
+
+#define IP_MSFILTER_SIZE(numsrc) \
+ (sizeof(struct ip_msfilter) - sizeof(__u32) \
+ + (numsrc) * sizeof(__u32))
+
+struct group_req {
+ __u32 gr_interface; /* interface index */
+ struct __kernel_sockaddr_storage gr_group; /* group address */
+};
+
+struct group_source_req {
+ __u32 gsr_interface; /* interface index */
+ struct __kernel_sockaddr_storage gsr_group; /* group address */
+ struct __kernel_sockaddr_storage gsr_source; /* source address */
+};
+
+struct group_filter {
+ __u32 gf_interface; /* interface index */
+ struct __kernel_sockaddr_storage gf_group; /* multicast address */
+ __u32 gf_fmode; /* filter mode */
+ __u32 gf_numsrc; /* number of sources */
+ struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */
+};
+
+#define GROUP_FILTER_SIZE(numsrc) \
+ (sizeof(struct group_filter) - sizeof(struct __kernel_sockaddr_storage) \
+ + (numsrc) * sizeof(struct __kernel_sockaddr_storage))
+#endif
+
+#if __UAPI_DEF_IN_PKTINFO
+struct in_pktinfo {
+ int ipi_ifindex;
+ struct in_addr ipi_spec_dst;
+ struct in_addr ipi_addr;
+};
+#endif
+
+/* Structure describing an Internet (IP) socket address. */
+#if __UAPI_DEF_SOCKADDR_IN
+#define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */
+struct sockaddr_in {
+ __kernel_sa_family_t sin_family; /* Address family */
+ __be16 sin_port; /* Port number */
+ struct in_addr sin_addr; /* Internet address */
+
+ /* Pad to size of `struct sockaddr'. */
+ unsigned char __pad[__SOCK_SIZE__ - sizeof(short int) -
+ sizeof(unsigned short int) - sizeof(struct in_addr)];
+};
+#define sin_zero __pad /* for BSD UNIX comp. -FvK */
+#endif
+
+#if __UAPI_DEF_IN_CLASS
+/*
+ * Definitions of the bits in an Internet address integer.
+ * On subnets, host and network parts are found according
+ * to the subnet mask, not these masks.
+ */
+#define IN_CLASSA(a) ((((long int) (a)) & 0x80000000) == 0)
+#define IN_CLASSA_NET 0xff000000
+#define IN_CLASSA_NSHIFT 24
+#define IN_CLASSA_HOST (0xffffffff & ~IN_CLASSA_NET)
+#define IN_CLASSA_MAX 128
+
+#define IN_CLASSB(a) ((((long int) (a)) & 0xc0000000) == 0x80000000)
+#define IN_CLASSB_NET 0xffff0000
+#define IN_CLASSB_NSHIFT 16
+#define IN_CLASSB_HOST (0xffffffff & ~IN_CLASSB_NET)
+#define IN_CLASSB_MAX 65536
+
+#define IN_CLASSC(a) ((((long int) (a)) & 0xe0000000) == 0xc0000000)
+#define IN_CLASSC_NET 0xffffff00
+#define IN_CLASSC_NSHIFT 8
+#define IN_CLASSC_HOST (0xffffffff & ~IN_CLASSC_NET)
+
+#define IN_CLASSD(a) ((((long int) (a)) & 0xf0000000) == 0xe0000000)
+#define IN_MULTICAST(a) IN_CLASSD(a)
+#define IN_MULTICAST_NET 0xF0000000
+
+#define IN_EXPERIMENTAL(a) ((((long int) (a)) & 0xf0000000) == 0xf0000000)
+#define IN_BADCLASS(a) IN_EXPERIMENTAL((a))
+
+/* Address to accept any incoming messages. */
+#define INADDR_ANY ((unsigned long int) 0x00000000)
+
+/* Address to send to all hosts. */
+#define INADDR_BROADCAST ((unsigned long int) 0xffffffff)
+
+/* Address indicating an error return. */
+#define INADDR_NONE ((unsigned long int) 0xffffffff)
+
+/* Dummy address for src of ICMP replies if no real address is set (RFC7600). */
+#define INADDR_DUMMY ((unsigned long int) 0xc0000008)
+
+/* Network number for local host loopback. */
+#define IN_LOOPBACKNET 127
+
+/* Address to loopback in software to local host. */
+#define INADDR_LOOPBACK 0x7f000001 /* 127.0.0.1 */
+#define IN_LOOPBACK(a) ((((long int) (a)) & 0xff000000) == 0x7f000000)
+
+/* Defines for Multicast INADDR */
+#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */
+#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */
+#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */
+#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */
+#endif
+
+/* <asm/byteorder.h> contains the htonl type stuff.. */
+#include <asm/byteorder.h>
+
+
+#endif /* _UAPI_LINUX_IN_H */
diff --git a/tools/include/uapi/linux/kcmp.h b/tools/include/uapi/linux/kcmp.h
new file mode 100644
index 000000000..ef1305010
--- /dev/null
+++ b/tools/include/uapi/linux/kcmp.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_KCMP_H
+#define _UAPI_LINUX_KCMP_H
+
+#include <linux/types.h>
+
+/* Comparison type */
+enum kcmp_type {
+ KCMP_FILE,
+ KCMP_VM,
+ KCMP_FILES,
+ KCMP_FS,
+ KCMP_SIGHAND,
+ KCMP_IO,
+ KCMP_SYSVSEM,
+ KCMP_EPOLL_TFD,
+
+ KCMP_TYPES,
+};
+
+/* Slot for KCMP_EPOLL_TFD */
+struct kcmp_epoll_slot {
+ __u32 efd; /* epoll file descriptor */
+ __u32 tfd; /* target file number */
+ __u32 toff; /* target offset within same numbered sequence */
+};
+
+#endif /* _UAPI_LINUX_KCMP_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
new file mode 100644
index 000000000..251be353f
--- /dev/null
+++ b/tools/include/uapi/linux/kvm.h
@@ -0,0 +1,1552 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_KVM_H
+#define __LINUX_KVM_H
+
+/*
+ * Userspace interface for /dev/kvm - kernel based virtual machine
+ *
+ * Note: you must update KVM_API_VERSION if you change this interface.
+ */
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/ioctl.h>
+#include <asm/kvm.h>
+
+#define KVM_API_VERSION 12
+
+/* *** Deprecated interfaces *** */
+
+#define KVM_TRC_SHIFT 16
+
+#define KVM_TRC_ENTRYEXIT (1 << KVM_TRC_SHIFT)
+#define KVM_TRC_HANDLER (1 << (KVM_TRC_SHIFT + 1))
+
+#define KVM_TRC_VMENTRY (KVM_TRC_ENTRYEXIT + 0x01)
+#define KVM_TRC_VMEXIT (KVM_TRC_ENTRYEXIT + 0x02)
+#define KVM_TRC_PAGE_FAULT (KVM_TRC_HANDLER + 0x01)
+
+#define KVM_TRC_HEAD_SIZE 12
+#define KVM_TRC_CYCLE_SIZE 8
+#define KVM_TRC_EXTRA_MAX 7
+
+#define KVM_TRC_INJ_VIRQ (KVM_TRC_HANDLER + 0x02)
+#define KVM_TRC_REDELIVER_EVT (KVM_TRC_HANDLER + 0x03)
+#define KVM_TRC_PEND_INTR (KVM_TRC_HANDLER + 0x04)
+#define KVM_TRC_IO_READ (KVM_TRC_HANDLER + 0x05)
+#define KVM_TRC_IO_WRITE (KVM_TRC_HANDLER + 0x06)
+#define KVM_TRC_CR_READ (KVM_TRC_HANDLER + 0x07)
+#define KVM_TRC_CR_WRITE (KVM_TRC_HANDLER + 0x08)
+#define KVM_TRC_DR_READ (KVM_TRC_HANDLER + 0x09)
+#define KVM_TRC_DR_WRITE (KVM_TRC_HANDLER + 0x0A)
+#define KVM_TRC_MSR_READ (KVM_TRC_HANDLER + 0x0B)
+#define KVM_TRC_MSR_WRITE (KVM_TRC_HANDLER + 0x0C)
+#define KVM_TRC_CPUID (KVM_TRC_HANDLER + 0x0D)
+#define KVM_TRC_INTR (KVM_TRC_HANDLER + 0x0E)
+#define KVM_TRC_NMI (KVM_TRC_HANDLER + 0x0F)
+#define KVM_TRC_VMMCALL (KVM_TRC_HANDLER + 0x10)
+#define KVM_TRC_HLT (KVM_TRC_HANDLER + 0x11)
+#define KVM_TRC_CLTS (KVM_TRC_HANDLER + 0x12)
+#define KVM_TRC_LMSW (KVM_TRC_HANDLER + 0x13)
+#define KVM_TRC_APIC_ACCESS (KVM_TRC_HANDLER + 0x14)
+#define KVM_TRC_TDP_FAULT (KVM_TRC_HANDLER + 0x15)
+#define KVM_TRC_GTLB_WRITE (KVM_TRC_HANDLER + 0x16)
+#define KVM_TRC_STLB_WRITE (KVM_TRC_HANDLER + 0x17)
+#define KVM_TRC_STLB_INVAL (KVM_TRC_HANDLER + 0x18)
+#define KVM_TRC_PPC_INSTR (KVM_TRC_HANDLER + 0x19)
+
+struct kvm_user_trace_setup {
+ __u32 buf_size;
+ __u32 buf_nr;
+};
+
+#define __KVM_DEPRECATED_MAIN_W_0x06 \
+ _IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
+#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07)
+#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08)
+
+#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq)
+
+struct kvm_breakpoint {
+ __u32 enabled;
+ __u32 padding;
+ __u64 address;
+};
+
+struct kvm_debug_guest {
+ __u32 enabled;
+ __u32 pad;
+ struct kvm_breakpoint breakpoints[4];
+ __u32 singlestep;
+};
+
+#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest)
+
+/* *** End of deprecated interfaces *** */
+
+
+/* for KVM_CREATE_MEMORY_REGION */
+struct kvm_memory_region {
+ __u32 slot;
+ __u32 flags;
+ __u64 guest_phys_addr;
+ __u64 memory_size; /* bytes */
+};
+
+/* for KVM_SET_USER_MEMORY_REGION */
+struct kvm_userspace_memory_region {
+ __u32 slot;
+ __u32 flags;
+ __u64 guest_phys_addr;
+ __u64 memory_size; /* bytes */
+ __u64 userspace_addr; /* start of the userspace allocated memory */
+};
+
+/*
+ * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
+ * other bits are reserved for kvm internal use which are defined in
+ * include/linux/kvm_host.h.
+ */
+#define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0)
+#define KVM_MEM_READONLY (1UL << 1)
+
+/* for KVM_IRQ_LINE */
+struct kvm_irq_level {
+ /*
+ * ACPI gsi notion of irq.
+ * For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
+ * For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
+ * For ARM: See Documentation/virtual/kvm/api.txt
+ */
+ union {
+ __u32 irq;
+ __s32 status;
+ };
+ __u32 level;
+};
+
+
+struct kvm_irqchip {
+ __u32 chip_id;
+ __u32 pad;
+ union {
+ char dummy[512]; /* reserving space */
+#ifdef __KVM_HAVE_PIT
+ struct kvm_pic_state pic;
+#endif
+#ifdef __KVM_HAVE_IOAPIC
+ struct kvm_ioapic_state ioapic;
+#endif
+ } chip;
+};
+
+/* for KVM_CREATE_PIT2 */
+struct kvm_pit_config {
+ __u32 flags;
+ __u32 pad[15];
+};
+
+#define KVM_PIT_SPEAKER_DUMMY 1
+
+struct kvm_s390_skeys {
+ __u64 start_gfn;
+ __u64 count;
+ __u64 skeydata_addr;
+ __u32 flags;
+ __u32 reserved[9];
+};
+
+#define KVM_S390_CMMA_PEEK (1 << 0)
+
+/**
+ * kvm_s390_cmma_log - Used for CMMA migration.
+ *
+ * Used both for input and output.
+ *
+ * @start_gfn: Guest page number to start from.
+ * @count: Size of the result buffer.
+ * @flags: Control operation mode via KVM_S390_CMMA_* flags
+ * @remaining: Used with KVM_S390_GET_CMMA_BITS. Indicates how many dirty
+ * pages are still remaining.
+ * @mask: Used with KVM_S390_SET_CMMA_BITS. Bitmap of bits to actually set
+ * in the PGSTE.
+ * @values: Pointer to the values buffer.
+ *
+ * Used in KVM_S390_{G,S}ET_CMMA_BITS ioctls.
+ */
+struct kvm_s390_cmma_log {
+ __u64 start_gfn;
+ __u32 count;
+ __u32 flags;
+ union {
+ __u64 remaining;
+ __u64 mask;
+ };
+ __u64 values;
+};
+
+struct kvm_hyperv_exit {
+#define KVM_EXIT_HYPERV_SYNIC 1
+#define KVM_EXIT_HYPERV_HCALL 2
+ __u32 type;
+ union {
+ struct {
+ __u32 msr;
+ __u64 control;
+ __u64 evt_page;
+ __u64 msg_page;
+ } synic;
+ struct {
+ __u64 input;
+ __u64 result;
+ __u64 params[2];
+ } hcall;
+ } u;
+};
+
+#define KVM_S390_GET_SKEYS_NONE 1
+#define KVM_S390_SKEYS_MAX 1048576
+
+#define KVM_EXIT_UNKNOWN 0
+#define KVM_EXIT_EXCEPTION 1
+#define KVM_EXIT_IO 2
+#define KVM_EXIT_HYPERCALL 3
+#define KVM_EXIT_DEBUG 4
+#define KVM_EXIT_HLT 5
+#define KVM_EXIT_MMIO 6
+#define KVM_EXIT_IRQ_WINDOW_OPEN 7
+#define KVM_EXIT_SHUTDOWN 8
+#define KVM_EXIT_FAIL_ENTRY 9
+#define KVM_EXIT_INTR 10
+#define KVM_EXIT_SET_TPR 11
+#define KVM_EXIT_TPR_ACCESS 12
+#define KVM_EXIT_S390_SIEIC 13
+#define KVM_EXIT_S390_RESET 14
+#define KVM_EXIT_DCR 15 /* deprecated */
+#define KVM_EXIT_NMI 16
+#define KVM_EXIT_INTERNAL_ERROR 17
+#define KVM_EXIT_OSI 18
+#define KVM_EXIT_PAPR_HCALL 19
+#define KVM_EXIT_S390_UCONTROL 20
+#define KVM_EXIT_WATCHDOG 21
+#define KVM_EXIT_S390_TSCH 22
+#define KVM_EXIT_EPR 23
+#define KVM_EXIT_SYSTEM_EVENT 24
+#define KVM_EXIT_S390_STSI 25
+#define KVM_EXIT_IOAPIC_EOI 26
+#define KVM_EXIT_HYPERV 27
+
+/* For KVM_EXIT_INTERNAL_ERROR */
+/* Emulate instruction failed. */
+#define KVM_INTERNAL_ERROR_EMULATION 1
+/* Encounter unexpected simultaneous exceptions. */
+#define KVM_INTERNAL_ERROR_SIMUL_EX 2
+/* Encounter unexpected vm-exit due to delivery event. */
+#define KVM_INTERNAL_ERROR_DELIVERY_EV 3
+
+/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
+struct kvm_run {
+ /* in */
+ __u8 request_interrupt_window;
+ __u8 immediate_exit;
+ __u8 padding1[6];
+
+ /* out */
+ __u32 exit_reason;
+ __u8 ready_for_interrupt_injection;
+ __u8 if_flag;
+ __u16 flags;
+
+ /* in (pre_kvm_run), out (post_kvm_run) */
+ __u64 cr8;
+ __u64 apic_base;
+
+#ifdef __KVM_S390
+ /* the processor status word for s390 */
+ __u64 psw_mask; /* psw upper half */
+ __u64 psw_addr; /* psw lower half */
+#endif
+ union {
+ /* KVM_EXIT_UNKNOWN */
+ struct {
+ __u64 hardware_exit_reason;
+ } hw;
+ /* KVM_EXIT_FAIL_ENTRY */
+ struct {
+ __u64 hardware_entry_failure_reason;
+ } fail_entry;
+ /* KVM_EXIT_EXCEPTION */
+ struct {
+ __u32 exception;
+ __u32 error_code;
+ } ex;
+ /* KVM_EXIT_IO */
+ struct {
+#define KVM_EXIT_IO_IN 0
+#define KVM_EXIT_IO_OUT 1
+ __u8 direction;
+ __u8 size; /* bytes */
+ __u16 port;
+ __u32 count;
+ __u64 data_offset; /* relative to kvm_run start */
+ } io;
+ /* KVM_EXIT_DEBUG */
+ struct {
+ struct kvm_debug_exit_arch arch;
+ } debug;
+ /* KVM_EXIT_MMIO */
+ struct {
+ __u64 phys_addr;
+ __u8 data[8];
+ __u32 len;
+ __u8 is_write;
+ } mmio;
+ /* KVM_EXIT_HYPERCALL */
+ struct {
+ __u64 nr;
+ __u64 args[6];
+ __u64 ret;
+ __u32 longmode;
+ __u32 pad;
+ } hypercall;
+ /* KVM_EXIT_TPR_ACCESS */
+ struct {
+ __u64 rip;
+ __u32 is_write;
+ __u32 pad;
+ } tpr_access;
+ /* KVM_EXIT_S390_SIEIC */
+ struct {
+ __u8 icptcode;
+ __u16 ipa;
+ __u32 ipb;
+ } s390_sieic;
+ /* KVM_EXIT_S390_RESET */
+#define KVM_S390_RESET_POR 1
+#define KVM_S390_RESET_CLEAR 2
+#define KVM_S390_RESET_SUBSYSTEM 4
+#define KVM_S390_RESET_CPU_INIT 8
+#define KVM_S390_RESET_IPL 16
+ __u64 s390_reset_flags;
+ /* KVM_EXIT_S390_UCONTROL */
+ struct {
+ __u64 trans_exc_code;
+ __u32 pgm_code;
+ } s390_ucontrol;
+ /* KVM_EXIT_DCR (deprecated) */
+ struct {
+ __u32 dcrn;
+ __u32 data;
+ __u8 is_write;
+ } dcr;
+ /* KVM_EXIT_INTERNAL_ERROR */
+ struct {
+ __u32 suberror;
+ /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */
+ __u32 ndata;
+ __u64 data[16];
+ } internal;
+ /* KVM_EXIT_OSI */
+ struct {
+ __u64 gprs[32];
+ } osi;
+ /* KVM_EXIT_PAPR_HCALL */
+ struct {
+ __u64 nr;
+ __u64 ret;
+ __u64 args[9];
+ } papr_hcall;
+ /* KVM_EXIT_S390_TSCH */
+ struct {
+ __u16 subchannel_id;
+ __u16 subchannel_nr;
+ __u32 io_int_parm;
+ __u32 io_int_word;
+ __u32 ipb;
+ __u8 dequeued;
+ } s390_tsch;
+ /* KVM_EXIT_EPR */
+ struct {
+ __u32 epr;
+ } epr;
+ /* KVM_EXIT_SYSTEM_EVENT */
+ struct {
+#define KVM_SYSTEM_EVENT_SHUTDOWN 1
+#define KVM_SYSTEM_EVENT_RESET 2
+#define KVM_SYSTEM_EVENT_CRASH 3
+ __u32 type;
+ __u64 flags;
+ } system_event;
+ /* KVM_EXIT_S390_STSI */
+ struct {
+ __u64 addr;
+ __u8 ar;
+ __u8 reserved;
+ __u8 fc;
+ __u8 sel1;
+ __u16 sel2;
+ } s390_stsi;
+ /* KVM_EXIT_IOAPIC_EOI */
+ struct {
+ __u8 vector;
+ } eoi;
+ /* KVM_EXIT_HYPERV */
+ struct kvm_hyperv_exit hyperv;
+ /* Fix the size of the union. */
+ char padding[256];
+ };
+
+ /* 2048 is the size of the char array used to bound/pad the size
+ * of the union that holds sync regs.
+ */
+ #define SYNC_REGS_SIZE_BYTES 2048
+ /*
+ * shared registers between kvm and userspace.
+ * kvm_valid_regs specifies the register classes set by the host
+ * kvm_dirty_regs specified the register classes dirtied by userspace
+ * struct kvm_sync_regs is architecture specific, as well as the
+ * bits for kvm_valid_regs and kvm_dirty_regs
+ */
+ __u64 kvm_valid_regs;
+ __u64 kvm_dirty_regs;
+ union {
+ struct kvm_sync_regs regs;
+ char padding[SYNC_REGS_SIZE_BYTES];
+ } s;
+};
+
+/* for KVM_REGISTER_COALESCED_MMIO / KVM_UNREGISTER_COALESCED_MMIO */
+
+struct kvm_coalesced_mmio_zone {
+ __u64 addr;
+ __u32 size;
+ __u32 pad;
+};
+
+struct kvm_coalesced_mmio {
+ __u64 phys_addr;
+ __u32 len;
+ __u32 pad;
+ __u8 data[8];
+};
+
+struct kvm_coalesced_mmio_ring {
+ __u32 first, last;
+ struct kvm_coalesced_mmio coalesced_mmio[0];
+};
+
+#define KVM_COALESCED_MMIO_MAX \
+ ((PAGE_SIZE - sizeof(struct kvm_coalesced_mmio_ring)) / \
+ sizeof(struct kvm_coalesced_mmio))
+
+/* for KVM_TRANSLATE */
+struct kvm_translation {
+ /* in */
+ __u64 linear_address;
+
+ /* out */
+ __u64 physical_address;
+ __u8 valid;
+ __u8 writeable;
+ __u8 usermode;
+ __u8 pad[5];
+};
+
+/* for KVM_S390_MEM_OP */
+struct kvm_s390_mem_op {
+ /* in */
+ __u64 gaddr; /* the guest address */
+ __u64 flags; /* flags */
+ __u32 size; /* amount of bytes */
+ __u32 op; /* type of operation */
+ __u64 buf; /* buffer in userspace */
+ __u8 ar; /* the access register number */
+ __u8 reserved[31]; /* should be set to 0 */
+};
+/* types for kvm_s390_mem_op->op */
+#define KVM_S390_MEMOP_LOGICAL_READ 0
+#define KVM_S390_MEMOP_LOGICAL_WRITE 1
+/* flags for kvm_s390_mem_op->flags */
+#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
+#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
+
+/* for KVM_INTERRUPT */
+struct kvm_interrupt {
+ /* in */
+ __u32 irq;
+};
+
+/* for KVM_GET_DIRTY_LOG */
+struct kvm_dirty_log {
+ __u32 slot;
+ __u32 padding1;
+ union {
+ void __user *dirty_bitmap; /* one bit per page */
+ __u64 padding2;
+ };
+};
+
+/* for KVM_SET_SIGNAL_MASK */
+struct kvm_signal_mask {
+ __u32 len;
+ __u8 sigset[0];
+};
+
+/* for KVM_TPR_ACCESS_REPORTING */
+struct kvm_tpr_access_ctl {
+ __u32 enabled;
+ __u32 flags;
+ __u32 reserved[8];
+};
+
+/* for KVM_SET_VAPIC_ADDR */
+struct kvm_vapic_addr {
+ __u64 vapic_addr;
+};
+
+/* for KVM_SET_MP_STATE */
+
+/* not all states are valid on all architectures */
+#define KVM_MP_STATE_RUNNABLE 0
+#define KVM_MP_STATE_UNINITIALIZED 1
+#define KVM_MP_STATE_INIT_RECEIVED 2
+#define KVM_MP_STATE_HALTED 3
+#define KVM_MP_STATE_SIPI_RECEIVED 4
+#define KVM_MP_STATE_STOPPED 5
+#define KVM_MP_STATE_CHECK_STOP 6
+#define KVM_MP_STATE_OPERATING 7
+#define KVM_MP_STATE_LOAD 8
+
+struct kvm_mp_state {
+ __u32 mp_state;
+};
+
+struct kvm_s390_psw {
+ __u64 mask;
+ __u64 addr;
+};
+
+/* valid values for type in kvm_s390_interrupt */
+#define KVM_S390_SIGP_STOP 0xfffe0000u
+#define KVM_S390_PROGRAM_INT 0xfffe0001u
+#define KVM_S390_SIGP_SET_PREFIX 0xfffe0002u
+#define KVM_S390_RESTART 0xfffe0003u
+#define KVM_S390_INT_PFAULT_INIT 0xfffe0004u
+#define KVM_S390_INT_PFAULT_DONE 0xfffe0005u
+#define KVM_S390_MCHK 0xfffe1000u
+#define KVM_S390_INT_CLOCK_COMP 0xffff1004u
+#define KVM_S390_INT_CPU_TIMER 0xffff1005u
+#define KVM_S390_INT_VIRTIO 0xffff2603u
+#define KVM_S390_INT_SERVICE 0xffff2401u
+#define KVM_S390_INT_EMERGENCY 0xffff1201u
+#define KVM_S390_INT_EXTERNAL_CALL 0xffff1202u
+/* Anything below 0xfffe0000u is taken by INT_IO */
+#define KVM_S390_INT_IO(ai,cssid,ssid,schid) \
+ (((schid)) | \
+ ((ssid) << 16) | \
+ ((cssid) << 18) | \
+ ((ai) << 26))
+#define KVM_S390_INT_IO_MIN 0x00000000u
+#define KVM_S390_INT_IO_MAX 0xfffdffffu
+#define KVM_S390_INT_IO_AI_MASK 0x04000000u
+
+
+struct kvm_s390_interrupt {
+ __u32 type;
+ __u32 parm;
+ __u64 parm64;
+};
+
+struct kvm_s390_io_info {
+ __u16 subchannel_id;
+ __u16 subchannel_nr;
+ __u32 io_int_parm;
+ __u32 io_int_word;
+};
+
+struct kvm_s390_ext_info {
+ __u32 ext_params;
+ __u32 pad;
+ __u64 ext_params2;
+};
+
+struct kvm_s390_pgm_info {
+ __u64 trans_exc_code;
+ __u64 mon_code;
+ __u64 per_address;
+ __u32 data_exc_code;
+ __u16 code;
+ __u16 mon_class_nr;
+ __u8 per_code;
+ __u8 per_atmid;
+ __u8 exc_access_id;
+ __u8 per_access_id;
+ __u8 op_access_id;
+#define KVM_S390_PGM_FLAGS_ILC_VALID 0x01
+#define KVM_S390_PGM_FLAGS_ILC_0 0x02
+#define KVM_S390_PGM_FLAGS_ILC_1 0x04
+#define KVM_S390_PGM_FLAGS_ILC_MASK 0x06
+#define KVM_S390_PGM_FLAGS_NO_REWIND 0x08
+ __u8 flags;
+ __u8 pad[2];
+};
+
+struct kvm_s390_prefix_info {
+ __u32 address;
+};
+
+struct kvm_s390_extcall_info {
+ __u16 code;
+};
+
+struct kvm_s390_emerg_info {
+ __u16 code;
+};
+
+#define KVM_S390_STOP_FLAG_STORE_STATUS 0x01
+struct kvm_s390_stop_info {
+ __u32 flags;
+};
+
+struct kvm_s390_mchk_info {
+ __u64 cr14;
+ __u64 mcic;
+ __u64 failing_storage_address;
+ __u32 ext_damage_code;
+ __u32 pad;
+ __u8 fixed_logout[16];
+};
+
+struct kvm_s390_irq {
+ __u64 type;
+ union {
+ struct kvm_s390_io_info io;
+ struct kvm_s390_ext_info ext;
+ struct kvm_s390_pgm_info pgm;
+ struct kvm_s390_emerg_info emerg;
+ struct kvm_s390_extcall_info extcall;
+ struct kvm_s390_prefix_info prefix;
+ struct kvm_s390_stop_info stop;
+ struct kvm_s390_mchk_info mchk;
+ char reserved[64];
+ } u;
+};
+
+struct kvm_s390_irq_state {
+ __u64 buf;
+ __u32 flags; /* will stay unused for compatibility reasons */
+ __u32 len;
+ __u32 reserved[4]; /* will stay unused for compatibility reasons */
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+
+#define KVM_GUESTDBG_ENABLE 0x00000001
+#define KVM_GUESTDBG_SINGLESTEP 0x00000002
+
+struct kvm_guest_debug {
+ __u32 control;
+ __u32 pad;
+ struct kvm_guest_debug_arch arch;
+};
+
+enum {
+ kvm_ioeventfd_flag_nr_datamatch,
+ kvm_ioeventfd_flag_nr_pio,
+ kvm_ioeventfd_flag_nr_deassign,
+ kvm_ioeventfd_flag_nr_virtio_ccw_notify,
+ kvm_ioeventfd_flag_nr_fast_mmio,
+ kvm_ioeventfd_flag_nr_max,
+};
+
+#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch)
+#define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio)
+#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign)
+#define KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY \
+ (1 << kvm_ioeventfd_flag_nr_virtio_ccw_notify)
+
+#define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1)
+
+struct kvm_ioeventfd {
+ __u64 datamatch;
+ __u64 addr; /* legal pio/mmio address */
+ __u32 len; /* 1, 2, 4, or 8 bytes; or 0 to ignore length */
+ __s32 fd;
+ __u32 flags;
+ __u8 pad[36];
+};
+
+#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
+#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
+#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
+#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
+ KVM_X86_DISABLE_EXITS_HLT | \
+ KVM_X86_DISABLE_EXITS_PAUSE)
+
+/* for KVM_ENABLE_CAP */
+struct kvm_enable_cap {
+ /* in */
+ __u32 cap;
+ __u32 flags;
+ __u64 args[4];
+ __u8 pad[64];
+};
+
+/* for KVM_PPC_GET_PVINFO */
+
+#define KVM_PPC_PVINFO_FLAGS_EV_IDLE (1<<0)
+
+struct kvm_ppc_pvinfo {
+ /* out */
+ __u32 flags;
+ __u32 hcall[4];
+ __u8 pad[108];
+};
+
+/* for KVM_PPC_GET_SMMU_INFO */
+#define KVM_PPC_PAGE_SIZES_MAX_SZ 8
+
+struct kvm_ppc_one_page_size {
+ __u32 page_shift; /* Page shift (or 0) */
+ __u32 pte_enc; /* Encoding in the HPTE (>>12) */
+};
+
+struct kvm_ppc_one_seg_page_size {
+ __u32 page_shift; /* Base page shift of segment (or 0) */
+ __u32 slb_enc; /* SLB encoding for BookS */
+ struct kvm_ppc_one_page_size enc[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+#define KVM_PPC_PAGE_SIZES_REAL 0x00000001
+#define KVM_PPC_1T_SEGMENTS 0x00000002
+
+struct kvm_ppc_smmu_info {
+ __u64 flags;
+ __u32 slb_size;
+ __u16 data_keys; /* # storage keys supported for data */
+ __u16 instr_keys; /* # storage keys supported for instructions */
+ struct kvm_ppc_one_seg_page_size sps[KVM_PPC_PAGE_SIZES_MAX_SZ];
+};
+
+/* for KVM_PPC_RESIZE_HPT_{PREPARE,COMMIT} */
+struct kvm_ppc_resize_hpt {
+ __u64 flags;
+ __u32 shift;
+ __u32 pad;
+};
+
+#define KVMIO 0xAE
+
+/* machine type bits, to be used as argument to KVM_CREATE_VM */
+#define KVM_VM_S390_UCONTROL 1
+
+/* on ppc, 0 indicate default, 1 should force HV and 2 PR */
+#define KVM_VM_PPC_HV 1
+#define KVM_VM_PPC_PR 2
+
+/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */
+#define KVM_VM_MIPS_TE 0
+#define KVM_VM_MIPS_VZ 1
+
+#define KVM_S390_SIE_PAGE_OFFSET 1
+
+/*
+ * ioctls for /dev/kvm fds:
+ */
+#define KVM_GET_API_VERSION _IO(KVMIO, 0x00)
+#define KVM_CREATE_VM _IO(KVMIO, 0x01) /* returns a VM fd */
+#define KVM_GET_MSR_INDEX_LIST _IOWR(KVMIO, 0x02, struct kvm_msr_list)
+
+#define KVM_S390_ENABLE_SIE _IO(KVMIO, 0x06)
+/*
+ * Check if a kvm extension is available. Argument is extension number,
+ * return is 1 (yes) or 0 (no, sorry).
+ */
+#define KVM_CHECK_EXTENSION _IO(KVMIO, 0x03)
+/*
+ * Get size for mmap(vcpu_fd)
+ */
+#define KVM_GET_VCPU_MMAP_SIZE _IO(KVMIO, 0x04) /* in bytes */
+#define KVM_GET_SUPPORTED_CPUID _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
+#define KVM_TRACE_ENABLE __KVM_DEPRECATED_MAIN_W_0x06
+#define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07
+#define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08
+#define KVM_GET_EMULATED_CPUID _IOWR(KVMIO, 0x09, struct kvm_cpuid2)
+#define KVM_GET_MSR_FEATURE_INDEX_LIST _IOWR(KVMIO, 0x0a, struct kvm_msr_list)
+
+/*
+ * Extension capability list.
+ */
+#define KVM_CAP_IRQCHIP 0
+#define KVM_CAP_HLT 1
+#define KVM_CAP_MMU_SHADOW_CACHE_CONTROL 2
+#define KVM_CAP_USER_MEMORY 3
+#define KVM_CAP_SET_TSS_ADDR 4
+#define KVM_CAP_VAPIC 6
+#define KVM_CAP_EXT_CPUID 7
+#define KVM_CAP_CLOCKSOURCE 8
+#define KVM_CAP_NR_VCPUS 9 /* returns recommended max vcpus per vm */
+#define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */
+#define KVM_CAP_PIT 11
+#define KVM_CAP_NOP_IO_DELAY 12
+#define KVM_CAP_PV_MMU 13
+#define KVM_CAP_MP_STATE 14
+#define KVM_CAP_COALESCED_MMIO 15
+#define KVM_CAP_SYNC_MMU 16 /* Changes to host mmap are reflected in guest */
+#define KVM_CAP_IOMMU 18
+/* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
+#define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
+#define KVM_CAP_USER_NMI 22
+#ifdef __KVM_HAVE_GUEST_DEBUG
+#define KVM_CAP_SET_GUEST_DEBUG 23
+#endif
+#ifdef __KVM_HAVE_PIT
+#define KVM_CAP_REINJECT_CONTROL 24
+#endif
+#define KVM_CAP_IRQ_ROUTING 25
+#define KVM_CAP_IRQ_INJECT_STATUS 26
+#define KVM_CAP_ASSIGN_DEV_IRQ 29
+/* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
+#define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
+#ifdef __KVM_HAVE_MCE
+#define KVM_CAP_MCE 31
+#endif
+#define KVM_CAP_IRQFD 32
+#ifdef __KVM_HAVE_PIT
+#define KVM_CAP_PIT2 33
+#endif
+#define KVM_CAP_SET_BOOT_CPU_ID 34
+#ifdef __KVM_HAVE_PIT_STATE2
+#define KVM_CAP_PIT_STATE2 35
+#endif
+#define KVM_CAP_IOEVENTFD 36
+#define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
+#ifdef __KVM_HAVE_XEN_HVM
+#define KVM_CAP_XEN_HVM 38
+#endif
+#define KVM_CAP_ADJUST_CLOCK 39
+#define KVM_CAP_INTERNAL_ERROR_DATA 40
+#ifdef __KVM_HAVE_VCPU_EVENTS
+#define KVM_CAP_VCPU_EVENTS 41
+#endif
+#define KVM_CAP_S390_PSW 42
+#define KVM_CAP_PPC_SEGSTATE 43
+#define KVM_CAP_HYPERV 44
+#define KVM_CAP_HYPERV_VAPIC 45
+#define KVM_CAP_HYPERV_SPIN 46
+#define KVM_CAP_PCI_SEGMENT 47
+#define KVM_CAP_PPC_PAIRED_SINGLES 48
+#define KVM_CAP_INTR_SHADOW 49
+#ifdef __KVM_HAVE_DEBUGREGS
+#define KVM_CAP_DEBUGREGS 50
+#endif
+#define KVM_CAP_X86_ROBUST_SINGLESTEP 51
+#define KVM_CAP_PPC_OSI 52
+#define KVM_CAP_PPC_UNSET_IRQ 53
+#define KVM_CAP_ENABLE_CAP 54
+#ifdef __KVM_HAVE_XSAVE
+#define KVM_CAP_XSAVE 55
+#endif
+#ifdef __KVM_HAVE_XCRS
+#define KVM_CAP_XCRS 56
+#endif
+#define KVM_CAP_PPC_GET_PVINFO 57
+#define KVM_CAP_PPC_IRQ_LEVEL 58
+#define KVM_CAP_ASYNC_PF 59
+#define KVM_CAP_TSC_CONTROL 60
+#define KVM_CAP_GET_TSC_KHZ 61
+#define KVM_CAP_PPC_BOOKE_SREGS 62
+#define KVM_CAP_SPAPR_TCE 63
+#define KVM_CAP_PPC_SMT 64
+#define KVM_CAP_PPC_RMA 65
+#define KVM_CAP_MAX_VCPUS 66 /* returns max vcpus per vm */
+#define KVM_CAP_PPC_HIOR 67
+#define KVM_CAP_PPC_PAPR 68
+#define KVM_CAP_SW_TLB 69
+#define KVM_CAP_ONE_REG 70
+#define KVM_CAP_S390_GMAP 71
+#define KVM_CAP_TSC_DEADLINE_TIMER 72
+#define KVM_CAP_S390_UCONTROL 73
+#define KVM_CAP_SYNC_REGS 74
+#define KVM_CAP_PCI_2_3 75
+#define KVM_CAP_KVMCLOCK_CTRL 76
+#define KVM_CAP_SIGNAL_MSI 77
+#define KVM_CAP_PPC_GET_SMMU_INFO 78
+#define KVM_CAP_S390_COW 79
+#define KVM_CAP_PPC_ALLOC_HTAB 80
+#define KVM_CAP_READONLY_MEM 81
+#define KVM_CAP_IRQFD_RESAMPLE 82
+#define KVM_CAP_PPC_BOOKE_WATCHDOG 83
+#define KVM_CAP_PPC_HTAB_FD 84
+#define KVM_CAP_S390_CSS_SUPPORT 85
+#define KVM_CAP_PPC_EPR 86
+#define KVM_CAP_ARM_PSCI 87
+#define KVM_CAP_ARM_SET_DEVICE_ADDR 88
+#define KVM_CAP_DEVICE_CTRL 89
+#define KVM_CAP_IRQ_MPIC 90
+#define KVM_CAP_PPC_RTAS 91
+#define KVM_CAP_IRQ_XICS 92
+#define KVM_CAP_ARM_EL1_32BIT 93
+#define KVM_CAP_SPAPR_MULTITCE 94
+#define KVM_CAP_EXT_EMUL_CPUID 95
+#define KVM_CAP_HYPERV_TIME 96
+#define KVM_CAP_IOAPIC_POLARITY_IGNORED 97
+#define KVM_CAP_ENABLE_CAP_VM 98
+#define KVM_CAP_S390_IRQCHIP 99
+#define KVM_CAP_IOEVENTFD_NO_LENGTH 100
+#define KVM_CAP_VM_ATTRIBUTES 101
+#define KVM_CAP_ARM_PSCI_0_2 102
+#define KVM_CAP_PPC_FIXUP_HCALL 103
+#define KVM_CAP_PPC_ENABLE_HCALL 104
+#define KVM_CAP_CHECK_EXTENSION_VM 105
+#define KVM_CAP_S390_USER_SIGP 106
+#define KVM_CAP_S390_VECTOR_REGISTERS 107
+#define KVM_CAP_S390_MEM_OP 108
+#define KVM_CAP_S390_USER_STSI 109
+#define KVM_CAP_S390_SKEYS 110
+#define KVM_CAP_MIPS_FPU 111
+#define KVM_CAP_MIPS_MSA 112
+#define KVM_CAP_S390_INJECT_IRQ 113
+#define KVM_CAP_S390_IRQ_STATE 114
+#define KVM_CAP_PPC_HWRNG 115
+#define KVM_CAP_DISABLE_QUIRKS 116
+#define KVM_CAP_X86_SMM 117
+#define KVM_CAP_MULTI_ADDRESS_SPACE 118
+#define KVM_CAP_GUEST_DEBUG_HW_BPS 119
+#define KVM_CAP_GUEST_DEBUG_HW_WPS 120
+#define KVM_CAP_SPLIT_IRQCHIP 121
+#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122
+#define KVM_CAP_HYPERV_SYNIC 123
+#define KVM_CAP_S390_RI 124
+#define KVM_CAP_SPAPR_TCE_64 125
+#define KVM_CAP_ARM_PMU_V3 126
+#define KVM_CAP_VCPU_ATTRIBUTES 127
+#define KVM_CAP_MAX_VCPU_ID 128
+#define KVM_CAP_X2APIC_API 129
+#define KVM_CAP_S390_USER_INSTR0 130
+#define KVM_CAP_MSI_DEVID 131
+#define KVM_CAP_PPC_HTM 132
+#define KVM_CAP_SPAPR_RESIZE_HPT 133
+#define KVM_CAP_PPC_MMU_RADIX 134
+#define KVM_CAP_PPC_MMU_HASH_V3 135
+#define KVM_CAP_IMMEDIATE_EXIT 136
+#define KVM_CAP_MIPS_VZ 137
+#define KVM_CAP_MIPS_TE 138
+#define KVM_CAP_MIPS_64BIT 139
+#define KVM_CAP_S390_GS 140
+#define KVM_CAP_S390_AIS 141
+#define KVM_CAP_SPAPR_TCE_VFIO 142
+#define KVM_CAP_X86_DISABLE_EXITS 143
+#define KVM_CAP_ARM_USER_IRQ 144
+#define KVM_CAP_S390_CMMA_MIGRATION 145
+#define KVM_CAP_PPC_FWNMI 146
+#define KVM_CAP_PPC_SMT_POSSIBLE 147
+#define KVM_CAP_HYPERV_SYNIC2 148
+#define KVM_CAP_HYPERV_VP_INDEX 149
+#define KVM_CAP_S390_AIS_MIGRATION 150
+#define KVM_CAP_PPC_GET_CPU_CHAR 151
+#define KVM_CAP_S390_BPB 152
+#define KVM_CAP_GET_MSR_FEATURES 153
+#define KVM_CAP_HYPERV_EVENTFD 154
+#define KVM_CAP_HYPERV_TLBFLUSH 155
+#define KVM_CAP_S390_HPAGE_1M 156
+#define KVM_CAP_NESTED_STATE 157
+#define KVM_CAP_ARM_INJECT_SERROR_ESR 158
+#define KVM_CAP_MSR_PLATFORM_INFO 159
+
+#ifdef KVM_CAP_IRQ_ROUTING
+
+struct kvm_irq_routing_irqchip {
+ __u32 irqchip;
+ __u32 pin;
+};
+
+struct kvm_irq_routing_msi {
+ __u32 address_lo;
+ __u32 address_hi;
+ __u32 data;
+ union {
+ __u32 pad;
+ __u32 devid;
+ };
+};
+
+struct kvm_irq_routing_s390_adapter {
+ __u64 ind_addr;
+ __u64 summary_addr;
+ __u64 ind_offset;
+ __u32 summary_offset;
+ __u32 adapter_id;
+};
+
+struct kvm_irq_routing_hv_sint {
+ __u32 vcpu;
+ __u32 sint;
+};
+
+/* gsi routing entry types */
+#define KVM_IRQ_ROUTING_IRQCHIP 1
+#define KVM_IRQ_ROUTING_MSI 2
+#define KVM_IRQ_ROUTING_S390_ADAPTER 3
+#define KVM_IRQ_ROUTING_HV_SINT 4
+
+struct kvm_irq_routing_entry {
+ __u32 gsi;
+ __u32 type;
+ __u32 flags;
+ __u32 pad;
+ union {
+ struct kvm_irq_routing_irqchip irqchip;
+ struct kvm_irq_routing_msi msi;
+ struct kvm_irq_routing_s390_adapter adapter;
+ struct kvm_irq_routing_hv_sint hv_sint;
+ __u32 pad[8];
+ } u;
+};
+
+struct kvm_irq_routing {
+ __u32 nr;
+ __u32 flags;
+ struct kvm_irq_routing_entry entries[0];
+};
+
+#endif
+
+#ifdef KVM_CAP_MCE
+/* x86 MCE */
+struct kvm_x86_mce {
+ __u64 status;
+ __u64 addr;
+ __u64 misc;
+ __u64 mcg_status;
+ __u8 bank;
+ __u8 pad1[7];
+ __u64 pad2[3];
+};
+#endif
+
+#ifdef KVM_CAP_XEN_HVM
+struct kvm_xen_hvm_config {
+ __u32 flags;
+ __u32 msr;
+ __u64 blob_addr_32;
+ __u64 blob_addr_64;
+ __u8 blob_size_32;
+ __u8 blob_size_64;
+ __u8 pad2[30];
+};
+#endif
+
+#define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
+/*
+ * Available with KVM_CAP_IRQFD_RESAMPLE
+ *
+ * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
+ * the irqfd to operate in resampling mode for level triggered interrupt
+ * emulation. See Documentation/virtual/kvm/api.txt.
+ */
+#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
+
+struct kvm_irqfd {
+ __u32 fd;
+ __u32 gsi;
+ __u32 flags;
+ __u32 resamplefd;
+ __u8 pad[16];
+};
+
+/* For KVM_CAP_ADJUST_CLOCK */
+
+/* Do not use 1, KVM_CHECK_EXTENSION returned it before we had flags. */
+#define KVM_CLOCK_TSC_STABLE 2
+
+struct kvm_clock_data {
+ __u64 clock;
+ __u32 flags;
+ __u32 pad[9];
+};
+
+/* For KVM_CAP_SW_TLB */
+
+#define KVM_MMU_FSL_BOOKE_NOHV 0
+#define KVM_MMU_FSL_BOOKE_HV 1
+
+struct kvm_config_tlb {
+ __u64 params;
+ __u64 array;
+ __u32 mmu_type;
+ __u32 array_len;
+};
+
+struct kvm_dirty_tlb {
+ __u64 bitmap;
+ __u32 num_dirty;
+};
+
+/* Available with KVM_CAP_ONE_REG */
+
+#define KVM_REG_ARCH_MASK 0xff00000000000000ULL
+#define KVM_REG_GENERIC 0x0000000000000000ULL
+
+/*
+ * Architecture specific registers are to be defined in arch headers and
+ * ORed with the arch identifier.
+ */
+#define KVM_REG_PPC 0x1000000000000000ULL
+#define KVM_REG_X86 0x2000000000000000ULL
+#define KVM_REG_IA64 0x3000000000000000ULL
+#define KVM_REG_ARM 0x4000000000000000ULL
+#define KVM_REG_S390 0x5000000000000000ULL
+#define KVM_REG_ARM64 0x6000000000000000ULL
+#define KVM_REG_MIPS 0x7000000000000000ULL
+
+#define KVM_REG_SIZE_SHIFT 52
+#define KVM_REG_SIZE_MASK 0x00f0000000000000ULL
+#define KVM_REG_SIZE_U8 0x0000000000000000ULL
+#define KVM_REG_SIZE_U16 0x0010000000000000ULL
+#define KVM_REG_SIZE_U32 0x0020000000000000ULL
+#define KVM_REG_SIZE_U64 0x0030000000000000ULL
+#define KVM_REG_SIZE_U128 0x0040000000000000ULL
+#define KVM_REG_SIZE_U256 0x0050000000000000ULL
+#define KVM_REG_SIZE_U512 0x0060000000000000ULL
+#define KVM_REG_SIZE_U1024 0x0070000000000000ULL
+
+struct kvm_reg_list {
+ __u64 n; /* number of regs */
+ __u64 reg[0];
+};
+
+struct kvm_one_reg {
+ __u64 id;
+ __u64 addr;
+};
+
+#define KVM_MSI_VALID_DEVID (1U << 0)
+struct kvm_msi {
+ __u32 address_lo;
+ __u32 address_hi;
+ __u32 data;
+ __u32 flags;
+ __u32 devid;
+ __u8 pad[12];
+};
+
+struct kvm_arm_device_addr {
+ __u64 id;
+ __u64 addr;
+};
+
+/*
+ * Device control API, available with KVM_CAP_DEVICE_CTRL
+ */
+#define KVM_CREATE_DEVICE_TEST 1
+
+struct kvm_create_device {
+ __u32 type; /* in: KVM_DEV_TYPE_xxx */
+ __u32 fd; /* out: device handle */
+ __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */
+};
+
+struct kvm_device_attr {
+ __u32 flags; /* no flags currently defined */
+ __u32 group; /* device-defined */
+ __u64 attr; /* group-defined */
+ __u64 addr; /* userspace address of attr data */
+};
+
+#define KVM_DEV_VFIO_GROUP 1
+#define KVM_DEV_VFIO_GROUP_ADD 1
+#define KVM_DEV_VFIO_GROUP_DEL 2
+#define KVM_DEV_VFIO_GROUP_SET_SPAPR_TCE 3
+
+enum kvm_device_type {
+ KVM_DEV_TYPE_FSL_MPIC_20 = 1,
+#define KVM_DEV_TYPE_FSL_MPIC_20 KVM_DEV_TYPE_FSL_MPIC_20
+ KVM_DEV_TYPE_FSL_MPIC_42,
+#define KVM_DEV_TYPE_FSL_MPIC_42 KVM_DEV_TYPE_FSL_MPIC_42
+ KVM_DEV_TYPE_XICS,
+#define KVM_DEV_TYPE_XICS KVM_DEV_TYPE_XICS
+ KVM_DEV_TYPE_VFIO,
+#define KVM_DEV_TYPE_VFIO KVM_DEV_TYPE_VFIO
+ KVM_DEV_TYPE_ARM_VGIC_V2,
+#define KVM_DEV_TYPE_ARM_VGIC_V2 KVM_DEV_TYPE_ARM_VGIC_V2
+ KVM_DEV_TYPE_FLIC,
+#define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC
+ KVM_DEV_TYPE_ARM_VGIC_V3,
+#define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3
+ KVM_DEV_TYPE_ARM_VGIC_ITS,
+#define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS
+ KVM_DEV_TYPE_MAX,
+};
+
+struct kvm_vfio_spapr_tce {
+ __s32 groupfd;
+ __s32 tablefd;
+};
+
+/*
+ * ioctls for VM fds
+ */
+#define KVM_SET_MEMORY_REGION _IOW(KVMIO, 0x40, struct kvm_memory_region)
+/*
+ * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
+ * a vcpu fd.
+ */
+#define KVM_CREATE_VCPU _IO(KVMIO, 0x41)
+#define KVM_GET_DIRTY_LOG _IOW(KVMIO, 0x42, struct kvm_dirty_log)
+/* KVM_SET_MEMORY_ALIAS is obsolete: */
+#define KVM_SET_MEMORY_ALIAS _IOW(KVMIO, 0x43, struct kvm_memory_alias)
+#define KVM_SET_NR_MMU_PAGES _IO(KVMIO, 0x44)
+#define KVM_GET_NR_MMU_PAGES _IO(KVMIO, 0x45)
+#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
+ struct kvm_userspace_memory_region)
+#define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47)
+#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
+
+/* enable ucontrol for s390 */
+struct kvm_s390_ucas_mapping {
+ __u64 user_addr;
+ __u64 vcpu_addr;
+ __u64 length;
+};
+#define KVM_S390_UCAS_MAP _IOW(KVMIO, 0x50, struct kvm_s390_ucas_mapping)
+#define KVM_S390_UCAS_UNMAP _IOW(KVMIO, 0x51, struct kvm_s390_ucas_mapping)
+#define KVM_S390_VCPU_FAULT _IOW(KVMIO, 0x52, unsigned long)
+
+/* Device model IOC */
+#define KVM_CREATE_IRQCHIP _IO(KVMIO, 0x60)
+#define KVM_IRQ_LINE _IOW(KVMIO, 0x61, struct kvm_irq_level)
+#define KVM_GET_IRQCHIP _IOWR(KVMIO, 0x62, struct kvm_irqchip)
+#define KVM_SET_IRQCHIP _IOR(KVMIO, 0x63, struct kvm_irqchip)
+#define KVM_CREATE_PIT _IO(KVMIO, 0x64)
+#define KVM_GET_PIT _IOWR(KVMIO, 0x65, struct kvm_pit_state)
+#define KVM_SET_PIT _IOR(KVMIO, 0x66, struct kvm_pit_state)
+#define KVM_IRQ_LINE_STATUS _IOWR(KVMIO, 0x67, struct kvm_irq_level)
+#define KVM_REGISTER_COALESCED_MMIO \
+ _IOW(KVMIO, 0x67, struct kvm_coalesced_mmio_zone)
+#define KVM_UNREGISTER_COALESCED_MMIO \
+ _IOW(KVMIO, 0x68, struct kvm_coalesced_mmio_zone)
+#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
+ struct kvm_assigned_pci_dev)
+#define KVM_SET_GSI_ROUTING _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
+/* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
+#define KVM_ASSIGN_IRQ __KVM_DEPRECATED_VM_R_0x70
+#define KVM_ASSIGN_DEV_IRQ _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
+#define KVM_REINJECT_CONTROL _IO(KVMIO, 0x71)
+#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
+ struct kvm_assigned_pci_dev)
+#define KVM_ASSIGN_SET_MSIX_NR _IOW(KVMIO, 0x73, \
+ struct kvm_assigned_msix_nr)
+#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO, 0x74, \
+ struct kvm_assigned_msix_entry)
+#define KVM_DEASSIGN_DEV_IRQ _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
+#define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd)
+#define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config)
+#define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78)
+#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
+#define KVM_XEN_HVM_CONFIG _IOW(KVMIO, 0x7a, struct kvm_xen_hvm_config)
+#define KVM_SET_CLOCK _IOW(KVMIO, 0x7b, struct kvm_clock_data)
+#define KVM_GET_CLOCK _IOR(KVMIO, 0x7c, struct kvm_clock_data)
+/* Available with KVM_CAP_PIT_STATE2 */
+#define KVM_GET_PIT2 _IOR(KVMIO, 0x9f, struct kvm_pit_state2)
+#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
+/* Available with KVM_CAP_PPC_GET_PVINFO */
+#define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo)
+/* Available with KVM_CAP_TSC_CONTROL */
+#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2)
+#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3)
+/* Available with KVM_CAP_PCI_2_3 */
+#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \
+ struct kvm_assigned_pci_dev)
+/* Available with KVM_CAP_SIGNAL_MSI */
+#define KVM_SIGNAL_MSI _IOW(KVMIO, 0xa5, struct kvm_msi)
+/* Available with KVM_CAP_PPC_GET_SMMU_INFO */
+#define KVM_PPC_GET_SMMU_INFO _IOR(KVMIO, 0xa6, struct kvm_ppc_smmu_info)
+/* Available with KVM_CAP_PPC_ALLOC_HTAB */
+#define KVM_PPC_ALLOCATE_HTAB _IOWR(KVMIO, 0xa7, __u32)
+#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce)
+#define KVM_CREATE_SPAPR_TCE_64 _IOW(KVMIO, 0xa8, \
+ struct kvm_create_spapr_tce_64)
+/* Available with KVM_CAP_RMA */
+#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma)
+/* Available with KVM_CAP_PPC_HTAB_FD */
+#define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd)
+/* Available with KVM_CAP_ARM_SET_DEVICE_ADDR */
+#define KVM_ARM_SET_DEVICE_ADDR _IOW(KVMIO, 0xab, struct kvm_arm_device_addr)
+/* Available with KVM_CAP_PPC_RTAS */
+#define KVM_PPC_RTAS_DEFINE_TOKEN _IOW(KVMIO, 0xac, struct kvm_rtas_token_args)
+/* Available with KVM_CAP_SPAPR_RESIZE_HPT */
+#define KVM_PPC_RESIZE_HPT_PREPARE _IOR(KVMIO, 0xad, struct kvm_ppc_resize_hpt)
+#define KVM_PPC_RESIZE_HPT_COMMIT _IOR(KVMIO, 0xae, struct kvm_ppc_resize_hpt)
+/* Available with KVM_CAP_PPC_RADIX_MMU or KVM_CAP_PPC_HASH_MMU_V3 */
+#define KVM_PPC_CONFIGURE_V3_MMU _IOW(KVMIO, 0xaf, struct kvm_ppc_mmuv3_cfg)
+/* Available with KVM_CAP_PPC_RADIX_MMU */
+#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
+/* Available with KVM_CAP_PPC_GET_CPU_CHAR */
+#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char)
+
+/* ioctl for vm fd */
+#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)
+
+/* ioctls for fds returned by KVM_CREATE_DEVICE */
+#define KVM_SET_DEVICE_ATTR _IOW(KVMIO, 0xe1, struct kvm_device_attr)
+#define KVM_GET_DEVICE_ATTR _IOW(KVMIO, 0xe2, struct kvm_device_attr)
+#define KVM_HAS_DEVICE_ATTR _IOW(KVMIO, 0xe3, struct kvm_device_attr)
+
+/*
+ * ioctls for vcpu fds
+ */
+#define KVM_RUN _IO(KVMIO, 0x80)
+#define KVM_GET_REGS _IOR(KVMIO, 0x81, struct kvm_regs)
+#define KVM_SET_REGS _IOW(KVMIO, 0x82, struct kvm_regs)
+#define KVM_GET_SREGS _IOR(KVMIO, 0x83, struct kvm_sregs)
+#define KVM_SET_SREGS _IOW(KVMIO, 0x84, struct kvm_sregs)
+#define KVM_TRANSLATE _IOWR(KVMIO, 0x85, struct kvm_translation)
+#define KVM_INTERRUPT _IOW(KVMIO, 0x86, struct kvm_interrupt)
+/* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
+#define KVM_DEBUG_GUEST __KVM_DEPRECATED_VCPU_W_0x87
+#define KVM_GET_MSRS _IOWR(KVMIO, 0x88, struct kvm_msrs)
+#define KVM_SET_MSRS _IOW(KVMIO, 0x89, struct kvm_msrs)
+#define KVM_SET_CPUID _IOW(KVMIO, 0x8a, struct kvm_cpuid)
+#define KVM_SET_SIGNAL_MASK _IOW(KVMIO, 0x8b, struct kvm_signal_mask)
+#define KVM_GET_FPU _IOR(KVMIO, 0x8c, struct kvm_fpu)
+#define KVM_SET_FPU _IOW(KVMIO, 0x8d, struct kvm_fpu)
+#define KVM_GET_LAPIC _IOR(KVMIO, 0x8e, struct kvm_lapic_state)
+#define KVM_SET_LAPIC _IOW(KVMIO, 0x8f, struct kvm_lapic_state)
+#define KVM_SET_CPUID2 _IOW(KVMIO, 0x90, struct kvm_cpuid2)
+#define KVM_GET_CPUID2 _IOWR(KVMIO, 0x91, struct kvm_cpuid2)
+/* Available with KVM_CAP_VAPIC */
+#define KVM_TPR_ACCESS_REPORTING _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl)
+/* Available with KVM_CAP_VAPIC */
+#define KVM_SET_VAPIC_ADDR _IOW(KVMIO, 0x93, struct kvm_vapic_addr)
+/* valid for virtual machine (for floating interrupt)_and_ vcpu */
+#define KVM_S390_INTERRUPT _IOW(KVMIO, 0x94, struct kvm_s390_interrupt)
+/* store status for s390 */
+#define KVM_S390_STORE_STATUS_NOADDR (-1ul)
+#define KVM_S390_STORE_STATUS_PREFIXED (-2ul)
+#define KVM_S390_STORE_STATUS _IOW(KVMIO, 0x95, unsigned long)
+/* initial ipl psw for s390 */
+#define KVM_S390_SET_INITIAL_PSW _IOW(KVMIO, 0x96, struct kvm_s390_psw)
+/* initial reset for s390 */
+#define KVM_S390_INITIAL_RESET _IO(KVMIO, 0x97)
+#define KVM_GET_MP_STATE _IOR(KVMIO, 0x98, struct kvm_mp_state)
+#define KVM_SET_MP_STATE _IOW(KVMIO, 0x99, struct kvm_mp_state)
+/* Available with KVM_CAP_USER_NMI */
+#define KVM_NMI _IO(KVMIO, 0x9a)
+/* Available with KVM_CAP_SET_GUEST_DEBUG */
+#define KVM_SET_GUEST_DEBUG _IOW(KVMIO, 0x9b, struct kvm_guest_debug)
+/* MCE for x86 */
+#define KVM_X86_SETUP_MCE _IOW(KVMIO, 0x9c, __u64)
+#define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO, 0x9d, __u64)
+#define KVM_X86_SET_MCE _IOW(KVMIO, 0x9e, struct kvm_x86_mce)
+/* Available with KVM_CAP_VCPU_EVENTS */
+#define KVM_GET_VCPU_EVENTS _IOR(KVMIO, 0x9f, struct kvm_vcpu_events)
+#define KVM_SET_VCPU_EVENTS _IOW(KVMIO, 0xa0, struct kvm_vcpu_events)
+/* Available with KVM_CAP_DEBUGREGS */
+#define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs)
+#define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs)
+/*
+ * vcpu version available with KVM_ENABLE_CAP
+ * vm version available with KVM_CAP_ENABLE_CAP_VM
+ */
+#define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap)
+/* Available with KVM_CAP_XSAVE */
+#define KVM_GET_XSAVE _IOR(KVMIO, 0xa4, struct kvm_xsave)
+#define KVM_SET_XSAVE _IOW(KVMIO, 0xa5, struct kvm_xsave)
+/* Available with KVM_CAP_XCRS */
+#define KVM_GET_XCRS _IOR(KVMIO, 0xa6, struct kvm_xcrs)
+#define KVM_SET_XCRS _IOW(KVMIO, 0xa7, struct kvm_xcrs)
+/* Available with KVM_CAP_SW_TLB */
+#define KVM_DIRTY_TLB _IOW(KVMIO, 0xaa, struct kvm_dirty_tlb)
+/* Available with KVM_CAP_ONE_REG */
+#define KVM_GET_ONE_REG _IOW(KVMIO, 0xab, struct kvm_one_reg)
+#define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg)
+/* VM is being stopped by host */
+#define KVM_KVMCLOCK_CTRL _IO(KVMIO, 0xad)
+#define KVM_ARM_VCPU_INIT _IOW(KVMIO, 0xae, struct kvm_vcpu_init)
+#define KVM_ARM_PREFERRED_TARGET _IOR(KVMIO, 0xaf, struct kvm_vcpu_init)
+#define KVM_GET_REG_LIST _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
+/* Available with KVM_CAP_S390_MEM_OP */
+#define KVM_S390_MEM_OP _IOW(KVMIO, 0xb1, struct kvm_s390_mem_op)
+/* Available with KVM_CAP_S390_SKEYS */
+#define KVM_S390_GET_SKEYS _IOW(KVMIO, 0xb2, struct kvm_s390_skeys)
+#define KVM_S390_SET_SKEYS _IOW(KVMIO, 0xb3, struct kvm_s390_skeys)
+/* Available with KVM_CAP_S390_INJECT_IRQ */
+#define KVM_S390_IRQ _IOW(KVMIO, 0xb4, struct kvm_s390_irq)
+/* Available with KVM_CAP_S390_IRQ_STATE */
+#define KVM_S390_SET_IRQ_STATE _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state)
+#define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
+/* Available with KVM_CAP_X86_SMM */
+#define KVM_SMI _IO(KVMIO, 0xb7)
+/* Available with KVM_CAP_S390_CMMA_MIGRATION */
+#define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
+#define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
+/* Memory Encryption Commands */
+#define KVM_MEMORY_ENCRYPT_OP _IOWR(KVMIO, 0xba, unsigned long)
+
+struct kvm_enc_region {
+ __u64 addr;
+ __u64 size;
+};
+
+#define KVM_MEMORY_ENCRYPT_REG_REGION _IOR(KVMIO, 0xbb, struct kvm_enc_region)
+#define KVM_MEMORY_ENCRYPT_UNREG_REGION _IOR(KVMIO, 0xbc, struct kvm_enc_region)
+
+/* Available with KVM_CAP_HYPERV_EVENTFD */
+#define KVM_HYPERV_EVENTFD _IOW(KVMIO, 0xbd, struct kvm_hyperv_eventfd)
+
+/* Available with KVM_CAP_NESTED_STATE */
+#define KVM_GET_NESTED_STATE _IOWR(KVMIO, 0xbe, struct kvm_nested_state)
+#define KVM_SET_NESTED_STATE _IOW(KVMIO, 0xbf, struct kvm_nested_state)
+
+/* Secure Encrypted Virtualization command */
+enum sev_cmd_id {
+ /* Guest initialization commands */
+ KVM_SEV_INIT = 0,
+ KVM_SEV_ES_INIT,
+ /* Guest launch commands */
+ KVM_SEV_LAUNCH_START,
+ KVM_SEV_LAUNCH_UPDATE_DATA,
+ KVM_SEV_LAUNCH_UPDATE_VMSA,
+ KVM_SEV_LAUNCH_SECRET,
+ KVM_SEV_LAUNCH_MEASURE,
+ KVM_SEV_LAUNCH_FINISH,
+ /* Guest migration commands (outgoing) */
+ KVM_SEV_SEND_START,
+ KVM_SEV_SEND_UPDATE_DATA,
+ KVM_SEV_SEND_UPDATE_VMSA,
+ KVM_SEV_SEND_FINISH,
+ /* Guest migration commands (incoming) */
+ KVM_SEV_RECEIVE_START,
+ KVM_SEV_RECEIVE_UPDATE_DATA,
+ KVM_SEV_RECEIVE_UPDATE_VMSA,
+ KVM_SEV_RECEIVE_FINISH,
+ /* Guest status and debug commands */
+ KVM_SEV_GUEST_STATUS,
+ KVM_SEV_DBG_DECRYPT,
+ KVM_SEV_DBG_ENCRYPT,
+ /* Guest certificates commands */
+ KVM_SEV_CERT_EXPORT,
+
+ KVM_SEV_NR_MAX,
+};
+
+struct kvm_sev_cmd {
+ __u32 id;
+ __u64 data;
+ __u32 error;
+ __u32 sev_fd;
+};
+
+struct kvm_sev_launch_start {
+ __u32 handle;
+ __u32 policy;
+ __u64 dh_uaddr;
+ __u32 dh_len;
+ __u64 session_uaddr;
+ __u32 session_len;
+};
+
+struct kvm_sev_launch_update_data {
+ __u64 uaddr;
+ __u32 len;
+};
+
+
+struct kvm_sev_launch_secret {
+ __u64 hdr_uaddr;
+ __u32 hdr_len;
+ __u64 guest_uaddr;
+ __u32 guest_len;
+ __u64 trans_uaddr;
+ __u32 trans_len;
+};
+
+struct kvm_sev_launch_measure {
+ __u64 uaddr;
+ __u32 len;
+};
+
+struct kvm_sev_guest_status {
+ __u32 handle;
+ __u32 policy;
+ __u32 state;
+};
+
+struct kvm_sev_dbg {
+ __u64 src_uaddr;
+ __u64 dst_uaddr;
+ __u32 len;
+};
+
+#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
+#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
+#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
+
+struct kvm_assigned_pci_dev {
+ __u32 assigned_dev_id;
+ __u32 busnr;
+ __u32 devfn;
+ __u32 flags;
+ __u32 segnr;
+ union {
+ __u32 reserved[11];
+ };
+};
+
+#define KVM_DEV_IRQ_HOST_INTX (1 << 0)
+#define KVM_DEV_IRQ_HOST_MSI (1 << 1)
+#define KVM_DEV_IRQ_HOST_MSIX (1 << 2)
+
+#define KVM_DEV_IRQ_GUEST_INTX (1 << 8)
+#define KVM_DEV_IRQ_GUEST_MSI (1 << 9)
+#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10)
+
+#define KVM_DEV_IRQ_HOST_MASK 0x00ff
+#define KVM_DEV_IRQ_GUEST_MASK 0xff00
+
+struct kvm_assigned_irq {
+ __u32 assigned_dev_id;
+ __u32 host_irq; /* ignored (legacy field) */
+ __u32 guest_irq;
+ __u32 flags;
+ union {
+ __u32 reserved[12];
+ };
+};
+
+struct kvm_assigned_msix_nr {
+ __u32 assigned_dev_id;
+ __u16 entry_nr;
+ __u16 padding;
+};
+
+#define KVM_MAX_MSIX_PER_DEV 256
+struct kvm_assigned_msix_entry {
+ __u32 assigned_dev_id;
+ __u32 gsi;
+ __u16 entry; /* The index of entry in the MSI-X table */
+ __u16 padding[3];
+};
+
+#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
+#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
+
+/* Available with KVM_CAP_ARM_USER_IRQ */
+
+/* Bits for run->s.regs.device_irq_level */
+#define KVM_ARM_DEV_EL1_VTIMER (1 << 0)
+#define KVM_ARM_DEV_EL1_PTIMER (1 << 1)
+#define KVM_ARM_DEV_PMU (1 << 2)
+
+struct kvm_hyperv_eventfd {
+ __u32 conn_id;
+ __s32 fd;
+ __u32 flags;
+ __u32 padding[3];
+};
+
+#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
+#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
+
+#endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/lirc.h b/tools/include/uapi/linux/lirc.h
new file mode 100644
index 000000000..f18993104
--- /dev/null
+++ b/tools/include/uapi/linux/lirc.h
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * lirc.h - linux infrared remote control header file
+ * last modified 2010/07/13 by Jarod Wilson
+ */
+
+#ifndef _LINUX_LIRC_H
+#define _LINUX_LIRC_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define PULSE_BIT 0x01000000
+#define PULSE_MASK 0x00FFFFFF
+
+#define LIRC_MODE2_SPACE 0x00000000
+#define LIRC_MODE2_PULSE 0x01000000
+#define LIRC_MODE2_FREQUENCY 0x02000000
+#define LIRC_MODE2_TIMEOUT 0x03000000
+
+#define LIRC_VALUE_MASK 0x00FFFFFF
+#define LIRC_MODE2_MASK 0xFF000000
+
+#define LIRC_SPACE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_SPACE)
+#define LIRC_PULSE(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_PULSE)
+#define LIRC_FREQUENCY(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_FREQUENCY)
+#define LIRC_TIMEOUT(val) (((val)&LIRC_VALUE_MASK) | LIRC_MODE2_TIMEOUT)
+
+#define LIRC_VALUE(val) ((val)&LIRC_VALUE_MASK)
+#define LIRC_MODE2(val) ((val)&LIRC_MODE2_MASK)
+
+#define LIRC_IS_SPACE(val) (LIRC_MODE2(val) == LIRC_MODE2_SPACE)
+#define LIRC_IS_PULSE(val) (LIRC_MODE2(val) == LIRC_MODE2_PULSE)
+#define LIRC_IS_FREQUENCY(val) (LIRC_MODE2(val) == LIRC_MODE2_FREQUENCY)
+#define LIRC_IS_TIMEOUT(val) (LIRC_MODE2(val) == LIRC_MODE2_TIMEOUT)
+
+/* used heavily by lirc userspace */
+#define lirc_t int
+
+/*** lirc compatible hardware features ***/
+
+#define LIRC_MODE2SEND(x) (x)
+#define LIRC_SEND2MODE(x) (x)
+#define LIRC_MODE2REC(x) ((x) << 16)
+#define LIRC_REC2MODE(x) ((x) >> 16)
+
+#define LIRC_MODE_RAW 0x00000001
+#define LIRC_MODE_PULSE 0x00000002
+#define LIRC_MODE_MODE2 0x00000004
+#define LIRC_MODE_SCANCODE 0x00000008
+#define LIRC_MODE_LIRCCODE 0x00000010
+
+
+#define LIRC_CAN_SEND_RAW LIRC_MODE2SEND(LIRC_MODE_RAW)
+#define LIRC_CAN_SEND_PULSE LIRC_MODE2SEND(LIRC_MODE_PULSE)
+#define LIRC_CAN_SEND_MODE2 LIRC_MODE2SEND(LIRC_MODE_MODE2)
+#define LIRC_CAN_SEND_LIRCCODE LIRC_MODE2SEND(LIRC_MODE_LIRCCODE)
+
+#define LIRC_CAN_SEND_MASK 0x0000003f
+
+#define LIRC_CAN_SET_SEND_CARRIER 0x00000100
+#define LIRC_CAN_SET_SEND_DUTY_CYCLE 0x00000200
+#define LIRC_CAN_SET_TRANSMITTER_MASK 0x00000400
+
+#define LIRC_CAN_REC_RAW LIRC_MODE2REC(LIRC_MODE_RAW)
+#define LIRC_CAN_REC_PULSE LIRC_MODE2REC(LIRC_MODE_PULSE)
+#define LIRC_CAN_REC_MODE2 LIRC_MODE2REC(LIRC_MODE_MODE2)
+#define LIRC_CAN_REC_SCANCODE LIRC_MODE2REC(LIRC_MODE_SCANCODE)
+#define LIRC_CAN_REC_LIRCCODE LIRC_MODE2REC(LIRC_MODE_LIRCCODE)
+
+#define LIRC_CAN_REC_MASK LIRC_MODE2REC(LIRC_CAN_SEND_MASK)
+
+#define LIRC_CAN_SET_REC_CARRIER (LIRC_CAN_SET_SEND_CARRIER << 16)
+#define LIRC_CAN_SET_REC_DUTY_CYCLE (LIRC_CAN_SET_SEND_DUTY_CYCLE << 16)
+
+#define LIRC_CAN_SET_REC_DUTY_CYCLE_RANGE 0x40000000
+#define LIRC_CAN_SET_REC_CARRIER_RANGE 0x80000000
+#define LIRC_CAN_GET_REC_RESOLUTION 0x20000000
+#define LIRC_CAN_SET_REC_TIMEOUT 0x10000000
+#define LIRC_CAN_SET_REC_FILTER 0x08000000
+
+#define LIRC_CAN_MEASURE_CARRIER 0x02000000
+#define LIRC_CAN_USE_WIDEBAND_RECEIVER 0x04000000
+
+#define LIRC_CAN_SEND(x) ((x)&LIRC_CAN_SEND_MASK)
+#define LIRC_CAN_REC(x) ((x)&LIRC_CAN_REC_MASK)
+
+#define LIRC_CAN_NOTIFY_DECODE 0x01000000
+
+/*** IOCTL commands for lirc driver ***/
+
+#define LIRC_GET_FEATURES _IOR('i', 0x00000000, __u32)
+
+#define LIRC_GET_SEND_MODE _IOR('i', 0x00000001, __u32)
+#define LIRC_GET_REC_MODE _IOR('i', 0x00000002, __u32)
+#define LIRC_GET_REC_RESOLUTION _IOR('i', 0x00000007, __u32)
+
+#define LIRC_GET_MIN_TIMEOUT _IOR('i', 0x00000008, __u32)
+#define LIRC_GET_MAX_TIMEOUT _IOR('i', 0x00000009, __u32)
+
+/* code length in bits, currently only for LIRC_MODE_LIRCCODE */
+#define LIRC_GET_LENGTH _IOR('i', 0x0000000f, __u32)
+
+#define LIRC_SET_SEND_MODE _IOW('i', 0x00000011, __u32)
+#define LIRC_SET_REC_MODE _IOW('i', 0x00000012, __u32)
+/* Note: these can reset the according pulse_width */
+#define LIRC_SET_SEND_CARRIER _IOW('i', 0x00000013, __u32)
+#define LIRC_SET_REC_CARRIER _IOW('i', 0x00000014, __u32)
+#define LIRC_SET_SEND_DUTY_CYCLE _IOW('i', 0x00000015, __u32)
+#define LIRC_SET_TRANSMITTER_MASK _IOW('i', 0x00000017, __u32)
+
+/*
+ * when a timeout != 0 is set the driver will send a
+ * LIRC_MODE2_TIMEOUT data packet, otherwise LIRC_MODE2_TIMEOUT is
+ * never sent, timeout is disabled by default
+ */
+#define LIRC_SET_REC_TIMEOUT _IOW('i', 0x00000018, __u32)
+
+/* 1 enables, 0 disables timeout reports in MODE2 */
+#define LIRC_SET_REC_TIMEOUT_REPORTS _IOW('i', 0x00000019, __u32)
+
+/*
+ * if enabled from the next key press on the driver will send
+ * LIRC_MODE2_FREQUENCY packets
+ */
+#define LIRC_SET_MEASURE_CARRIER_MODE _IOW('i', 0x0000001d, __u32)
+
+/*
+ * to set a range use LIRC_SET_REC_CARRIER_RANGE with the
+ * lower bound first and later LIRC_SET_REC_CARRIER with the upper bound
+ */
+#define LIRC_SET_REC_CARRIER_RANGE _IOW('i', 0x0000001f, __u32)
+
+#define LIRC_SET_WIDEBAND_RECEIVER _IOW('i', 0x00000023, __u32)
+
+/*
+ * struct lirc_scancode - decoded scancode with protocol for use with
+ * LIRC_MODE_SCANCODE
+ *
+ * @timestamp: Timestamp in nanoseconds using CLOCK_MONOTONIC when IR
+ * was decoded.
+ * @flags: should be 0 for transmit. When receiving scancodes,
+ * LIRC_SCANCODE_FLAG_TOGGLE or LIRC_SCANCODE_FLAG_REPEAT can be set
+ * depending on the protocol
+ * @rc_proto: see enum rc_proto
+ * @keycode: the translated keycode. Set to 0 for transmit.
+ * @scancode: the scancode received or to be sent
+ */
+struct lirc_scancode {
+ __u64 timestamp;
+ __u16 flags;
+ __u16 rc_proto;
+ __u32 keycode;
+ __u64 scancode;
+};
+
+/* Set if the toggle bit of rc-5 or rc-6 is enabled */
+#define LIRC_SCANCODE_FLAG_TOGGLE 1
+/* Set if this is a nec or sanyo repeat */
+#define LIRC_SCANCODE_FLAG_REPEAT 2
+
+/**
+ * enum rc_proto - the Remote Controller protocol
+ *
+ * @RC_PROTO_UNKNOWN: Protocol not known
+ * @RC_PROTO_OTHER: Protocol known but proprietary
+ * @RC_PROTO_RC5: Philips RC5 protocol
+ * @RC_PROTO_RC5X_20: Philips RC5x 20 bit protocol
+ * @RC_PROTO_RC5_SZ: StreamZap variant of RC5
+ * @RC_PROTO_JVC: JVC protocol
+ * @RC_PROTO_SONY12: Sony 12 bit protocol
+ * @RC_PROTO_SONY15: Sony 15 bit protocol
+ * @RC_PROTO_SONY20: Sony 20 bit protocol
+ * @RC_PROTO_NEC: NEC protocol
+ * @RC_PROTO_NECX: Extended NEC protocol
+ * @RC_PROTO_NEC32: NEC 32 bit protocol
+ * @RC_PROTO_SANYO: Sanyo protocol
+ * @RC_PROTO_MCIR2_KBD: RC6-ish MCE keyboard
+ * @RC_PROTO_MCIR2_MSE: RC6-ish MCE mouse
+ * @RC_PROTO_RC6_0: Philips RC6-0-16 protocol
+ * @RC_PROTO_RC6_6A_20: Philips RC6-6A-20 protocol
+ * @RC_PROTO_RC6_6A_24: Philips RC6-6A-24 protocol
+ * @RC_PROTO_RC6_6A_32: Philips RC6-6A-32 protocol
+ * @RC_PROTO_RC6_MCE: MCE (Philips RC6-6A-32 subtype) protocol
+ * @RC_PROTO_SHARP: Sharp protocol
+ * @RC_PROTO_XMP: XMP protocol
+ * @RC_PROTO_CEC: CEC protocol
+ * @RC_PROTO_IMON: iMon Pad protocol
+ */
+enum rc_proto {
+ RC_PROTO_UNKNOWN = 0,
+ RC_PROTO_OTHER = 1,
+ RC_PROTO_RC5 = 2,
+ RC_PROTO_RC5X_20 = 3,
+ RC_PROTO_RC5_SZ = 4,
+ RC_PROTO_JVC = 5,
+ RC_PROTO_SONY12 = 6,
+ RC_PROTO_SONY15 = 7,
+ RC_PROTO_SONY20 = 8,
+ RC_PROTO_NEC = 9,
+ RC_PROTO_NECX = 10,
+ RC_PROTO_NEC32 = 11,
+ RC_PROTO_SANYO = 12,
+ RC_PROTO_MCIR2_KBD = 13,
+ RC_PROTO_MCIR2_MSE = 14,
+ RC_PROTO_RC6_0 = 15,
+ RC_PROTO_RC6_6A_20 = 16,
+ RC_PROTO_RC6_6A_24 = 17,
+ RC_PROTO_RC6_6A_32 = 18,
+ RC_PROTO_RC6_MCE = 19,
+ RC_PROTO_SHARP = 20,
+ RC_PROTO_XMP = 21,
+ RC_PROTO_CEC = 22,
+ RC_PROTO_IMON = 23,
+};
+
+#endif
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
new file mode 100644
index 000000000..bfd5938fe
--- /dev/null
+++ b/tools/include/uapi/linux/mman.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_MMAN_H
+#define _UAPI_LINUX_MMAN_H
+
+#include <asm/mman.h>
+#include <asm-generic/hugetlb_encode.h>
+
+#define MREMAP_MAYMOVE 1
+#define MREMAP_FIXED 2
+
+#define OVERCOMMIT_GUESS 0
+#define OVERCOMMIT_ALWAYS 1
+#define OVERCOMMIT_NEVER 2
+
+/*
+ * Huge page size encoding when MAP_HUGETLB is specified, and a huge page
+ * size other than the default is desired. See hugetlb_encode.h.
+ * All known huge page size encodings are provided here. It is the
+ * responsibility of the application to know which sizes are supported on
+ * the running system. See mmap(2) man page for details.
+ */
+#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
+#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
+
+#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB
+#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
+#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB
+#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB
+#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB
+#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB
+#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB
+#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB
+#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
+#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
+
+#endif /* _UAPI_LINUX_MMAN_H */
diff --git a/tools/include/uapi/linux/netlink.h b/tools/include/uapi/linux/netlink.h
new file mode 100644
index 000000000..776bc92e9
--- /dev/null
+++ b/tools/include/uapi/linux/netlink.h
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NETLINK_H
+#define _UAPI__LINUX_NETLINK_H
+
+#include <linux/kernel.h>
+#include <linux/socket.h> /* for __kernel_sa_family_t */
+#include <linux/types.h>
+
+#define NETLINK_ROUTE 0 /* Routing/device hook */
+#define NETLINK_UNUSED 1 /* Unused number */
+#define NETLINK_USERSOCK 2 /* Reserved for user mode socket protocols */
+#define NETLINK_FIREWALL 3 /* Unused number, formerly ip_queue */
+#define NETLINK_SOCK_DIAG 4 /* socket monitoring */
+#define NETLINK_NFLOG 5 /* netfilter/iptables ULOG */
+#define NETLINK_XFRM 6 /* ipsec */
+#define NETLINK_SELINUX 7 /* SELinux event notifications */
+#define NETLINK_ISCSI 8 /* Open-iSCSI */
+#define NETLINK_AUDIT 9 /* auditing */
+#define NETLINK_FIB_LOOKUP 10
+#define NETLINK_CONNECTOR 11
+#define NETLINK_NETFILTER 12 /* netfilter subsystem */
+#define NETLINK_IP6_FW 13
+#define NETLINK_DNRTMSG 14 /* DECnet routing messages */
+#define NETLINK_KOBJECT_UEVENT 15 /* Kernel messages to userspace */
+#define NETLINK_GENERIC 16
+/* leave room for NETLINK_DM (DM Events) */
+#define NETLINK_SCSITRANSPORT 18 /* SCSI Transports */
+#define NETLINK_ECRYPTFS 19
+#define NETLINK_RDMA 20
+#define NETLINK_CRYPTO 21 /* Crypto layer */
+#define NETLINK_SMC 22 /* SMC monitoring */
+
+#define NETLINK_INET_DIAG NETLINK_SOCK_DIAG
+
+#define MAX_LINKS 32
+
+struct sockaddr_nl {
+ __kernel_sa_family_t nl_family; /* AF_NETLINK */
+ unsigned short nl_pad; /* zero */
+ __u32 nl_pid; /* port ID */
+ __u32 nl_groups; /* multicast groups mask */
+};
+
+struct nlmsghdr {
+ __u32 nlmsg_len; /* Length of message including header */
+ __u16 nlmsg_type; /* Message content */
+ __u16 nlmsg_flags; /* Additional flags */
+ __u32 nlmsg_seq; /* Sequence number */
+ __u32 nlmsg_pid; /* Sending process port ID */
+};
+
+/* Flags values */
+
+#define NLM_F_REQUEST 0x01 /* It is request message. */
+#define NLM_F_MULTI 0x02 /* Multipart message, terminated by NLMSG_DONE */
+#define NLM_F_ACK 0x04 /* Reply with ack, with zero or error code */
+#define NLM_F_ECHO 0x08 /* Echo this request */
+#define NLM_F_DUMP_INTR 0x10 /* Dump was inconsistent due to sequence change */
+#define NLM_F_DUMP_FILTERED 0x20 /* Dump was filtered as requested */
+
+/* Modifiers to GET request */
+#define NLM_F_ROOT 0x100 /* specify tree root */
+#define NLM_F_MATCH 0x200 /* return all matching */
+#define NLM_F_ATOMIC 0x400 /* atomic GET */
+#define NLM_F_DUMP (NLM_F_ROOT|NLM_F_MATCH)
+
+/* Modifiers to NEW request */
+#define NLM_F_REPLACE 0x100 /* Override existing */
+#define NLM_F_EXCL 0x200 /* Do not touch, if it exists */
+#define NLM_F_CREATE 0x400 /* Create, if it does not exist */
+#define NLM_F_APPEND 0x800 /* Add to end of list */
+
+/* Modifiers to DELETE request */
+#define NLM_F_NONREC 0x100 /* Do not delete recursively */
+
+/* Flags for ACK message */
+#define NLM_F_CAPPED 0x100 /* request was capped */
+#define NLM_F_ACK_TLVS 0x200 /* extended ACK TVLs were included */
+
+/*
+ 4.4BSD ADD NLM_F_CREATE|NLM_F_EXCL
+ 4.4BSD CHANGE NLM_F_REPLACE
+
+ True CHANGE NLM_F_CREATE|NLM_F_REPLACE
+ Append NLM_F_CREATE
+ Check NLM_F_EXCL
+ */
+
+#define NLMSG_ALIGNTO 4U
+#define NLMSG_ALIGN(len) ( ((len)+NLMSG_ALIGNTO-1) & ~(NLMSG_ALIGNTO-1) )
+#define NLMSG_HDRLEN ((int) NLMSG_ALIGN(sizeof(struct nlmsghdr)))
+#define NLMSG_LENGTH(len) ((len) + NLMSG_HDRLEN)
+#define NLMSG_SPACE(len) NLMSG_ALIGN(NLMSG_LENGTH(len))
+#define NLMSG_DATA(nlh) ((void*)(((char*)nlh) + NLMSG_LENGTH(0)))
+#define NLMSG_NEXT(nlh,len) ((len) -= NLMSG_ALIGN((nlh)->nlmsg_len), \
+ (struct nlmsghdr*)(((char*)(nlh)) + NLMSG_ALIGN((nlh)->nlmsg_len)))
+#define NLMSG_OK(nlh,len) ((len) >= (int)sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len >= sizeof(struct nlmsghdr) && \
+ (nlh)->nlmsg_len <= (len))
+#define NLMSG_PAYLOAD(nlh,len) ((nlh)->nlmsg_len - NLMSG_SPACE((len)))
+
+#define NLMSG_NOOP 0x1 /* Nothing. */
+#define NLMSG_ERROR 0x2 /* Error */
+#define NLMSG_DONE 0x3 /* End of a dump */
+#define NLMSG_OVERRUN 0x4 /* Data lost */
+
+#define NLMSG_MIN_TYPE 0x10 /* < 0x10: reserved control messages */
+
+struct nlmsgerr {
+ int error;
+ struct nlmsghdr msg;
+ /*
+ * followed by the message contents unless NETLINK_CAP_ACK was set
+ * or the ACK indicates success (error == 0)
+ * message length is aligned with NLMSG_ALIGN()
+ */
+ /*
+ * followed by TLVs defined in enum nlmsgerr_attrs
+ * if NETLINK_EXT_ACK was set
+ */
+};
+
+/**
+ * enum nlmsgerr_attrs - nlmsgerr attributes
+ * @NLMSGERR_ATTR_UNUSED: unused
+ * @NLMSGERR_ATTR_MSG: error message string (string)
+ * @NLMSGERR_ATTR_OFFS: offset of the invalid attribute in the original
+ * message, counting from the beginning of the header (u32)
+ * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to
+ * be used - in the success case - to identify a created
+ * object or operation or similar (binary)
+ * @__NLMSGERR_ATTR_MAX: number of attributes
+ * @NLMSGERR_ATTR_MAX: highest attribute number
+ */
+enum nlmsgerr_attrs {
+ NLMSGERR_ATTR_UNUSED,
+ NLMSGERR_ATTR_MSG,
+ NLMSGERR_ATTR_OFFS,
+ NLMSGERR_ATTR_COOKIE,
+
+ __NLMSGERR_ATTR_MAX,
+ NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1
+};
+
+#define NETLINK_ADD_MEMBERSHIP 1
+#define NETLINK_DROP_MEMBERSHIP 2
+#define NETLINK_PKTINFO 3
+#define NETLINK_BROADCAST_ERROR 4
+#define NETLINK_NO_ENOBUFS 5
+#ifndef __KERNEL__
+#define NETLINK_RX_RING 6
+#define NETLINK_TX_RING 7
+#endif
+#define NETLINK_LISTEN_ALL_NSID 8
+#define NETLINK_LIST_MEMBERSHIPS 9
+#define NETLINK_CAP_ACK 10
+#define NETLINK_EXT_ACK 11
+
+struct nl_pktinfo {
+ __u32 group;
+};
+
+struct nl_mmap_req {
+ unsigned int nm_block_size;
+ unsigned int nm_block_nr;
+ unsigned int nm_frame_size;
+ unsigned int nm_frame_nr;
+};
+
+struct nl_mmap_hdr {
+ unsigned int nm_status;
+ unsigned int nm_len;
+ __u32 nm_group;
+ /* credentials */
+ __u32 nm_pid;
+ __u32 nm_uid;
+ __u32 nm_gid;
+};
+
+#ifndef __KERNEL__
+enum nl_mmap_status {
+ NL_MMAP_STATUS_UNUSED,
+ NL_MMAP_STATUS_RESERVED,
+ NL_MMAP_STATUS_VALID,
+ NL_MMAP_STATUS_COPY,
+ NL_MMAP_STATUS_SKIP,
+};
+
+#define NL_MMAP_MSG_ALIGNMENT NLMSG_ALIGNTO
+#define NL_MMAP_MSG_ALIGN(sz) __ALIGN_KERNEL(sz, NL_MMAP_MSG_ALIGNMENT)
+#define NL_MMAP_HDRLEN NL_MMAP_MSG_ALIGN(sizeof(struct nl_mmap_hdr))
+#endif
+
+#define NET_MAJOR 36 /* Major 36 is reserved for networking */
+
+enum {
+ NETLINK_UNCONNECTED = 0,
+ NETLINK_CONNECTED,
+};
+
+/*
+ * <------- NLA_HDRLEN ------> <-- NLA_ALIGN(payload)-->
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ * | Header | Pad | Payload | Pad |
+ * | (struct nlattr) | ing | | ing |
+ * +---------------------+- - -+- - - - - - - - - -+- - -+
+ * <-------------- nlattr->nla_len -------------->
+ */
+
+struct nlattr {
+ __u16 nla_len;
+ __u16 nla_type;
+};
+
+/*
+ * nla_type (16 bits)
+ * +---+---+-------------------------------+
+ * | N | O | Attribute Type |
+ * +---+---+-------------------------------+
+ * N := Carries nested attributes
+ * O := Payload stored in network byte order
+ *
+ * Note: The N and O flag are mutually exclusive.
+ */
+#define NLA_F_NESTED (1 << 15)
+#define NLA_F_NET_BYTEORDER (1 << 14)
+#define NLA_TYPE_MASK ~(NLA_F_NESTED | NLA_F_NET_BYTEORDER)
+
+#define NLA_ALIGNTO 4
+#define NLA_ALIGN(len) (((len) + NLA_ALIGNTO - 1) & ~(NLA_ALIGNTO - 1))
+#define NLA_HDRLEN ((int) NLA_ALIGN(sizeof(struct nlattr)))
+
+/* Generic 32 bitflags attribute content sent to the kernel.
+ *
+ * The value is a bitmap that defines the values being set
+ * The selector is a bitmask that defines which value is legit
+ *
+ * Examples:
+ * value = 0x0, and selector = 0x1
+ * implies we are selecting bit 1 and we want to set its value to 0.
+ *
+ * value = 0x2, and selector = 0x2
+ * implies we are selecting bit 2 and we want to set its value to 1.
+ *
+ */
+struct nla_bitfield32 {
+ __u32 value;
+ __u32 selector;
+};
+
+#endif /* _UAPI__LINUX_NETLINK_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
new file mode 100644
index 000000000..a45e7b4f0
--- /dev/null
+++ b/tools/include/uapi/linux/perf_event.h
@@ -0,0 +1,1130 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Performance events:
+ *
+ * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
+ * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
+ *
+ * Data type definitions, declarations, prototypes.
+ *
+ * Started by: Thomas Gleixner and Ingo Molnar
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+#ifndef _UAPI_LINUX_PERF_EVENT_H
+#define _UAPI_LINUX_PERF_EVENT_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+#include <asm/byteorder.h>
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * attr.type
+ */
+enum perf_type_id {
+ PERF_TYPE_HARDWARE = 0,
+ PERF_TYPE_SOFTWARE = 1,
+ PERF_TYPE_TRACEPOINT = 2,
+ PERF_TYPE_HW_CACHE = 3,
+ PERF_TYPE_RAW = 4,
+ PERF_TYPE_BREAKPOINT = 5,
+
+ PERF_TYPE_MAX, /* non-ABI */
+};
+
+/*
+ * Generalized performance event event_id types, used by the
+ * attr.event_id parameter of the sys_perf_event_open()
+ * syscall:
+ */
+enum perf_hw_id {
+ /*
+ * Common hardware events, generalized by the kernel:
+ */
+ PERF_COUNT_HW_CPU_CYCLES = 0,
+ PERF_COUNT_HW_INSTRUCTIONS = 1,
+ PERF_COUNT_HW_CACHE_REFERENCES = 2,
+ PERF_COUNT_HW_CACHE_MISSES = 3,
+ PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
+ PERF_COUNT_HW_BRANCH_MISSES = 5,
+ PERF_COUNT_HW_BUS_CYCLES = 6,
+ PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
+ PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
+ PERF_COUNT_HW_REF_CPU_CYCLES = 9,
+
+ PERF_COUNT_HW_MAX, /* non-ABI */
+};
+
+/*
+ * Generalized hardware cache events:
+ *
+ * { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
+ * { read, write, prefetch } x
+ * { accesses, misses }
+ */
+enum perf_hw_cache_id {
+ PERF_COUNT_HW_CACHE_L1D = 0,
+ PERF_COUNT_HW_CACHE_L1I = 1,
+ PERF_COUNT_HW_CACHE_LL = 2,
+ PERF_COUNT_HW_CACHE_DTLB = 3,
+ PERF_COUNT_HW_CACHE_ITLB = 4,
+ PERF_COUNT_HW_CACHE_BPU = 5,
+ PERF_COUNT_HW_CACHE_NODE = 6,
+
+ PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
+};
+
+enum perf_hw_cache_op_id {
+ PERF_COUNT_HW_CACHE_OP_READ = 0,
+ PERF_COUNT_HW_CACHE_OP_WRITE = 1,
+ PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
+
+ PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
+};
+
+enum perf_hw_cache_op_result_id {
+ PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
+ PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
+
+ PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
+};
+
+/*
+ * Special "software" events provided by the kernel, even if the hardware
+ * does not support performance events. These events measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+ PERF_COUNT_SW_CPU_CLOCK = 0,
+ PERF_COUNT_SW_TASK_CLOCK = 1,
+ PERF_COUNT_SW_PAGE_FAULTS = 2,
+ PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
+ PERF_COUNT_SW_CPU_MIGRATIONS = 4,
+ PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
+ PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
+ PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
+ PERF_COUNT_SW_EMULATION_FAULTS = 8,
+ PERF_COUNT_SW_DUMMY = 9,
+ PERF_COUNT_SW_BPF_OUTPUT = 10,
+
+ PERF_COUNT_SW_MAX, /* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.sample_type to request information
+ * in the overflow packets.
+ */
+enum perf_event_sample_format {
+ PERF_SAMPLE_IP = 1U << 0,
+ PERF_SAMPLE_TID = 1U << 1,
+ PERF_SAMPLE_TIME = 1U << 2,
+ PERF_SAMPLE_ADDR = 1U << 3,
+ PERF_SAMPLE_READ = 1U << 4,
+ PERF_SAMPLE_CALLCHAIN = 1U << 5,
+ PERF_SAMPLE_ID = 1U << 6,
+ PERF_SAMPLE_CPU = 1U << 7,
+ PERF_SAMPLE_PERIOD = 1U << 8,
+ PERF_SAMPLE_STREAM_ID = 1U << 9,
+ PERF_SAMPLE_RAW = 1U << 10,
+ PERF_SAMPLE_BRANCH_STACK = 1U << 11,
+ PERF_SAMPLE_REGS_USER = 1U << 12,
+ PERF_SAMPLE_STACK_USER = 1U << 13,
+ PERF_SAMPLE_WEIGHT = 1U << 14,
+ PERF_SAMPLE_DATA_SRC = 1U << 15,
+ PERF_SAMPLE_IDENTIFIER = 1U << 16,
+ PERF_SAMPLE_TRANSACTION = 1U << 17,
+ PERF_SAMPLE_REGS_INTR = 1U << 18,
+ PERF_SAMPLE_PHYS_ADDR = 1U << 19,
+
+ PERF_SAMPLE_MAX = 1U << 20, /* non-ABI */
+
+ __PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
+};
+
+/*
+ * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
+ *
+ * If the user does not pass priv level information via branch_sample_type,
+ * the kernel uses the event's priv level. Branch and event priv levels do
+ * not have to match. Branch priv level is checked for permissions.
+ *
+ * The branch types can be combined, however BRANCH_ANY covers all types
+ * of branches and therefore it supersedes all the other types.
+ */
+enum perf_branch_sample_type_shift {
+ PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
+ PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
+ PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
+
+ PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
+ PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
+ PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
+ PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
+ PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
+ PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
+ PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
+ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */
+
+ PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */
+ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */
+ PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */
+
+ PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */
+ PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */
+
+ PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */
+
+ PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
+};
+
+enum perf_branch_sample_type {
+ PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
+ PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
+ PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
+
+ PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
+
+ PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
+
+ PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+
+ PERF_SAMPLE_BRANCH_TYPE_SAVE =
+ 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+
+ PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
+};
+
+/*
+ * Common flow change classification
+ */
+enum {
+ PERF_BR_UNKNOWN = 0, /* unknown */
+ PERF_BR_COND = 1, /* conditional */
+ PERF_BR_UNCOND = 2, /* unconditional */
+ PERF_BR_IND = 3, /* indirect */
+ PERF_BR_CALL = 4, /* function call */
+ PERF_BR_IND_CALL = 5, /* indirect function call */
+ PERF_BR_RET = 6, /* function return */
+ PERF_BR_SYSCALL = 7, /* syscall */
+ PERF_BR_SYSRET = 8, /* syscall return */
+ PERF_BR_COND_CALL = 9, /* conditional function call */
+ PERF_BR_COND_RET = 10, /* conditional function return */
+ PERF_BR_MAX,
+};
+
+#define PERF_SAMPLE_BRANCH_PLM_ALL \
+ (PERF_SAMPLE_BRANCH_USER|\
+ PERF_SAMPLE_BRANCH_KERNEL|\
+ PERF_SAMPLE_BRANCH_HV)
+
+/*
+ * Values to determine ABI of the registers dump.
+ */
+enum perf_sample_regs_abi {
+ PERF_SAMPLE_REGS_ABI_NONE = 0,
+ PERF_SAMPLE_REGS_ABI_32 = 1,
+ PERF_SAMPLE_REGS_ABI_64 = 2,
+};
+
+/*
+ * Values for the memory transaction event qualifier, mostly for
+ * abort events. Multiple bits can be set.
+ */
+enum {
+ PERF_TXN_ELISION = (1 << 0), /* From elision */
+ PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
+ PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
+ PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */
+ PERF_TXN_RETRY = (1 << 4), /* Retry possible */
+ PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
+ PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
+ PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
+
+ PERF_TXN_MAX = (1 << 8), /* non-ABI */
+
+ /* bits 32..63 are reserved for the abort code */
+
+ PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
+ PERF_TXN_ABORT_SHIFT = 32,
+};
+
+/*
+ * The format of the data returned by read() on a perf event fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ * { u64 value;
+ * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
+ * { u64 id; } && PERF_FORMAT_ID
+ * } && !PERF_FORMAT_GROUP
+ *
+ * { u64 nr;
+ * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
+ * { u64 value;
+ * { u64 id; } && PERF_FORMAT_ID
+ * } cntr[nr];
+ * } && PERF_FORMAT_GROUP
+ * };
+ */
+enum perf_event_read_format {
+ PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
+ PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
+ PERF_FORMAT_ID = 1U << 2,
+ PERF_FORMAT_GROUP = 1U << 3,
+
+ PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
+};
+
+#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
+#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
+#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
+#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */
+ /* add: sample_stack_user */
+#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
+#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
+
+/*
+ * Hardware event_id to monitor via a performance monitoring event:
+ *
+ * @sample_max_stack: Max number of frame pointers in a callchain,
+ * should be < /proc/sys/kernel/perf_event_max_stack
+ */
+struct perf_event_attr {
+
+ /*
+ * Major type: hardware/software/tracepoint/etc.
+ */
+ __u32 type;
+
+ /*
+ * Size of the attr structure, for fwd/bwd compat.
+ */
+ __u32 size;
+
+ /*
+ * Type specific configuration information.
+ */
+ __u64 config;
+
+ union {
+ __u64 sample_period;
+ __u64 sample_freq;
+ };
+
+ __u64 sample_type;
+ __u64 read_format;
+
+ __u64 disabled : 1, /* off by default */
+ inherit : 1, /* children inherit it */
+ pinned : 1, /* must always be on PMU */
+ exclusive : 1, /* only group on PMU */
+ exclude_user : 1, /* don't count user */
+ exclude_kernel : 1, /* ditto kernel */
+ exclude_hv : 1, /* ditto hypervisor */
+ exclude_idle : 1, /* don't count when idle */
+ mmap : 1, /* include mmap data */
+ comm : 1, /* include comm data */
+ freq : 1, /* use freq, not period */
+ inherit_stat : 1, /* per task counts */
+ enable_on_exec : 1, /* next exec enables */
+ task : 1, /* trace fork/exit */
+ watermark : 1, /* wakeup_watermark */
+ /*
+ * precise_ip:
+ *
+ * 0 - SAMPLE_IP can have arbitrary skid
+ * 1 - SAMPLE_IP must have constant skid
+ * 2 - SAMPLE_IP requested to have 0 skid
+ * 3 - SAMPLE_IP must have 0 skid
+ *
+ * See also PERF_RECORD_MISC_EXACT_IP
+ */
+ precise_ip : 2, /* skid constraint */
+ mmap_data : 1, /* non-exec mmap data */
+ sample_id_all : 1, /* sample_type all events */
+
+ exclude_host : 1, /* don't count in host */
+ exclude_guest : 1, /* don't count in guest */
+
+ exclude_callchain_kernel : 1, /* exclude kernel callchains */
+ exclude_callchain_user : 1, /* exclude user callchains */
+ mmap2 : 1, /* include mmap with inode data */
+ comm_exec : 1, /* flag comm events that are due to an exec */
+ use_clockid : 1, /* use @clockid for time fields */
+ context_switch : 1, /* context switch data */
+ write_backward : 1, /* Write ring buffer from end to beginning */
+ namespaces : 1, /* include namespaces data */
+ __reserved_1 : 35;
+
+ union {
+ __u32 wakeup_events; /* wakeup every n events */
+ __u32 wakeup_watermark; /* bytes before wakeup */
+ };
+
+ __u32 bp_type;
+ union {
+ __u64 bp_addr;
+ __u64 kprobe_func; /* for perf_kprobe */
+ __u64 uprobe_path; /* for perf_uprobe */
+ __u64 config1; /* extension of config */
+ };
+ union {
+ __u64 bp_len;
+ __u64 kprobe_addr; /* when kprobe_func == NULL */
+ __u64 probe_offset; /* for perf_[k,u]probe */
+ __u64 config2; /* extension of config1 */
+ };
+ __u64 branch_sample_type; /* enum perf_branch_sample_type */
+
+ /*
+ * Defines set of user regs to dump on samples.
+ * See asm/perf_regs.h for details.
+ */
+ __u64 sample_regs_user;
+
+ /*
+ * Defines size of the user stack to dump on samples.
+ */
+ __u32 sample_stack_user;
+
+ __s32 clockid;
+ /*
+ * Defines set of regs to dump for each sample
+ * state captured on:
+ * - precise = 0: PMU interrupt
+ * - precise > 0: sampled instruction
+ *
+ * See asm/perf_regs.h for details.
+ */
+ __u64 sample_regs_intr;
+
+ /*
+ * Wakeup watermark for AUX area
+ */
+ __u32 aux_watermark;
+ __u16 sample_max_stack;
+ __u16 __reserved_2; /* align to __u64 */
+};
+
+/*
+ * Structure used by below PERF_EVENT_IOC_QUERY_BPF command
+ * to query bpf programs attached to the same perf tracepoint
+ * as the given perf event.
+ */
+struct perf_event_query_bpf {
+ /*
+ * The below ids array length
+ */
+ __u32 ids_len;
+ /*
+ * Set by the kernel to indicate the number of
+ * available programs
+ */
+ __u32 prog_cnt;
+ /*
+ * User provided buffer to store program ids
+ */
+ __u32 ids[0];
+};
+
+#define perf_flags(attr) (*(&(attr)->read_format + 1))
+
+/*
+ * Ioctls that can be done on a perf event fd:
+ */
+#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
+#define PERF_EVENT_IOC_RESET _IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64)
+#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
+#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32)
+#define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *)
+#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *)
+
+enum perf_event_ioc_flags {
+ PERF_IOC_FLAG_GROUP = 1U << 0,
+};
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_event_mmap_page {
+ __u32 version; /* version number of this structure */
+ __u32 compat_version; /* lowest version this is compat with */
+
+ /*
+ * Bits needed to read the hw events in user-space.
+ *
+ * u32 seq, time_mult, time_shift, index, width;
+ * u64 count, enabled, running;
+ * u64 cyc, time_offset;
+ * s64 pmc = 0;
+ *
+ * do {
+ * seq = pc->lock;
+ * barrier()
+ *
+ * enabled = pc->time_enabled;
+ * running = pc->time_running;
+ *
+ * if (pc->cap_usr_time && enabled != running) {
+ * cyc = rdtsc();
+ * time_offset = pc->time_offset;
+ * time_mult = pc->time_mult;
+ * time_shift = pc->time_shift;
+ * }
+ *
+ * index = pc->index;
+ * count = pc->offset;
+ * if (pc->cap_user_rdpmc && index) {
+ * width = pc->pmc_width;
+ * pmc = rdpmc(index - 1);
+ * }
+ *
+ * barrier();
+ * } while (pc->lock != seq);
+ *
+ * NOTE: for obvious reason this only works on self-monitoring
+ * processes.
+ */
+ __u32 lock; /* seqlock for synchronization */
+ __u32 index; /* hardware event identifier */
+ __s64 offset; /* add to hardware event value */
+ __u64 time_enabled; /* time event active */
+ __u64 time_running; /* time event on cpu */
+ union {
+ __u64 capabilities;
+ struct {
+ __u64 cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */
+ cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */
+
+ cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */
+ cap_user_time : 1, /* The time_* fields are used */
+ cap_user_time_zero : 1, /* The time_zero field is used */
+ cap_____res : 59;
+ };
+ };
+
+ /*
+ * If cap_user_rdpmc this field provides the bit-width of the value
+ * read using the rdpmc() or equivalent instruction. This can be used
+ * to sign extend the result like:
+ *
+ * pmc <<= 64 - width;
+ * pmc >>= 64 - width; // signed shift right
+ * count += pmc;
+ */
+ __u16 pmc_width;
+
+ /*
+ * If cap_usr_time the below fields can be used to compute the time
+ * delta since time_enabled (in ns) using rdtsc or similar.
+ *
+ * u64 quot, rem;
+ * u64 delta;
+ *
+ * quot = (cyc >> time_shift);
+ * rem = cyc & (((u64)1 << time_shift) - 1);
+ * delta = time_offset + quot * time_mult +
+ * ((rem * time_mult) >> time_shift);
+ *
+ * Where time_offset,time_mult,time_shift and cyc are read in the
+ * seqcount loop described above. This delta can then be added to
+ * enabled and possible running (if index), improving the scaling:
+ *
+ * enabled += delta;
+ * if (index)
+ * running += delta;
+ *
+ * quot = count / running;
+ * rem = count % running;
+ * count = quot * enabled + (rem * enabled) / running;
+ */
+ __u16 time_shift;
+ __u32 time_mult;
+ __u64 time_offset;
+ /*
+ * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
+ * from sample timestamps.
+ *
+ * time = timestamp - time_zero;
+ * quot = time / time_mult;
+ * rem = time % time_mult;
+ * cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
+ *
+ * And vice versa:
+ *
+ * quot = cyc >> time_shift;
+ * rem = cyc & (((u64)1 << time_shift) - 1);
+ * timestamp = time_zero + quot * time_mult +
+ * ((rem * time_mult) >> time_shift);
+ */
+ __u64 time_zero;
+ __u32 size; /* Header size up to __reserved[] fields. */
+
+ /*
+ * Hole for extension of the self monitor capabilities
+ */
+
+ __u8 __reserved[118*8+4]; /* align to 1k. */
+
+ /*
+ * Control data for the mmap() data buffer.
+ *
+ * User-space reading the @data_head value should issue an smp_rmb(),
+ * after reading this value.
+ *
+ * When the mapping is PROT_WRITE the @data_tail value should be
+ * written by userspace to reflect the last read data, after issueing
+ * an smp_mb() to separate the data read from the ->data_tail store.
+ * In this case the kernel will not over-write unread data.
+ *
+ * See perf_output_put_handle() for the data ordering.
+ *
+ * data_{offset,size} indicate the location and size of the perf record
+ * buffer within the mmapped area.
+ */
+ __u64 data_head; /* head in the data section */
+ __u64 data_tail; /* user-space written tail */
+ __u64 data_offset; /* where the buffer starts */
+ __u64 data_size; /* data buffer size */
+
+ /*
+ * AUX area is defined by aux_{offset,size} fields that should be set
+ * by the userspace, so that
+ *
+ * aux_offset >= data_offset + data_size
+ *
+ * prior to mmap()ing it. Size of the mmap()ed area should be aux_size.
+ *
+ * Ring buffer pointers aux_{head,tail} have the same semantics as
+ * data_{head,tail} and same ordering rules apply.
+ */
+ __u64 aux_head;
+ __u64 aux_tail;
+ __u64 aux_offset;
+ __u64 aux_size;
+};
+
+#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0)
+#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
+#define PERF_RECORD_MISC_KERNEL (1 << 0)
+#define PERF_RECORD_MISC_USER (2 << 0)
+#define PERF_RECORD_MISC_HYPERVISOR (3 << 0)
+#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0)
+#define PERF_RECORD_MISC_GUEST_USER (5 << 0)
+
+/*
+ * Indicates that /proc/PID/maps parsing are truncated by time out.
+ */
+#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12)
+/*
+ * Following PERF_RECORD_MISC_* are used on different
+ * events, so can reuse the same bit position:
+ *
+ * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events
+ * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event
+ * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events
+ */
+#define PERF_RECORD_MISC_MMAP_DATA (1 << 13)
+#define PERF_RECORD_MISC_COMM_EXEC (1 << 13)
+#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)
+/*
+ * These PERF_RECORD_MISC_* flags below are safely reused
+ * for the following events:
+ *
+ * PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events
+ * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events
+ *
+ *
+ * PERF_RECORD_MISC_EXACT_IP:
+ * Indicates that the content of PERF_SAMPLE_IP points to
+ * the actual instruction that triggered the event. See also
+ * perf_event_attr::precise_ip.
+ *
+ * PERF_RECORD_MISC_SWITCH_OUT_PREEMPT:
+ * Indicates that thread was preempted in TASK_RUNNING state.
+ */
+#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
+#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
+/*
+ * Reserve the last bit to indicate some extended misc field
+ */
+#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15)
+
+struct perf_event_header {
+ __u32 type;
+ __u16 misc;
+ __u16 size;
+};
+
+struct perf_ns_link_info {
+ __u64 dev;
+ __u64 ino;
+};
+
+enum {
+ NET_NS_INDEX = 0,
+ UTS_NS_INDEX = 1,
+ IPC_NS_INDEX = 2,
+ PID_NS_INDEX = 3,
+ USER_NS_INDEX = 4,
+ MNT_NS_INDEX = 5,
+ CGROUP_NS_INDEX = 6,
+
+ NR_NAMESPACES, /* number of available namespaces */
+};
+
+enum perf_event_type {
+
+ /*
+ * If perf_event_attr.sample_id_all is set then all event types will
+ * have the sample_type selected fields related to where/when
+ * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU,
+ * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed
+ * just after the perf_event_header and the fields already present for
+ * the existing fields, i.e. at the end of the payload. That way a newer
+ * perf.data file will be supported by older perf tools, with these new
+ * optional fields being ignored.
+ *
+ * struct sample_id {
+ * { u32 pid, tid; } && PERF_SAMPLE_TID
+ * { u64 time; } && PERF_SAMPLE_TIME
+ * { u64 id; } && PERF_SAMPLE_ID
+ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
+ * { u32 cpu, res; } && PERF_SAMPLE_CPU
+ * { u64 id; } && PERF_SAMPLE_IDENTIFIER
+ * } && perf_event_attr::sample_id_all
+ *
+ * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID. The
+ * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed
+ * relative to header.size.
+ */
+
+ /*
+ * The MMAP events record the PROT_EXEC mappings so that we can
+ * correlate userspace IPs to code. They have the following structure:
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u32 pid, tid;
+ * u64 addr;
+ * u64 len;
+ * u64 pgoff;
+ * char filename[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_MMAP = 1,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u64 id;
+ * u64 lost;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_LOST = 2,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u32 pid, tid;
+ * char comm[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_COMM = 3,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid, ppid;
+ * u32 tid, ptid;
+ * u64 time;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_EXIT = 4,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u64 time;
+ * u64 id;
+ * u64 stream_id;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_THROTTLE = 5,
+ PERF_RECORD_UNTHROTTLE = 6,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid, ppid;
+ * u32 tid, ptid;
+ * u64 time;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_FORK = 7,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid, tid;
+ *
+ * struct read_format values;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_READ = 8,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ *
+ * #
+ * # Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.
+ * # The advantage of PERF_SAMPLE_IDENTIFIER is that its position
+ * # is fixed relative to header.
+ * #
+ *
+ * { u64 id; } && PERF_SAMPLE_IDENTIFIER
+ * { u64 ip; } && PERF_SAMPLE_IP
+ * { u32 pid, tid; } && PERF_SAMPLE_TID
+ * { u64 time; } && PERF_SAMPLE_TIME
+ * { u64 addr; } && PERF_SAMPLE_ADDR
+ * { u64 id; } && PERF_SAMPLE_ID
+ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
+ * { u32 cpu, res; } && PERF_SAMPLE_CPU
+ * { u64 period; } && PERF_SAMPLE_PERIOD
+ *
+ * { struct read_format values; } && PERF_SAMPLE_READ
+ *
+ * { u64 nr,
+ * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
+ *
+ * #
+ * # The RAW record below is opaque data wrt the ABI
+ * #
+ * # That is, the ABI doesn't make any promises wrt to
+ * # the stability of its content, it may vary depending
+ * # on event, hardware, kernel version and phase of
+ * # the moon.
+ * #
+ * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
+ * #
+ *
+ * { u32 size;
+ * char data[size];}&& PERF_SAMPLE_RAW
+ *
+ * { u64 nr;
+ * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+ *
+ * { u64 abi; # enum perf_sample_regs_abi
+ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+ *
+ * { u64 size;
+ * char data[size];
+ * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
+ *
+ * { u64 weight; } && PERF_SAMPLE_WEIGHT
+ * { u64 data_src; } && PERF_SAMPLE_DATA_SRC
+ * { u64 transaction; } && PERF_SAMPLE_TRANSACTION
+ * { u64 abi; # enum perf_sample_regs_abi
+ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
+ * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR
+ * };
+ */
+ PERF_RECORD_SAMPLE = 9,
+
+ /*
+ * The MMAP2 records are an augmented version of MMAP, they add
+ * maj, min, ino numbers to be used to uniquely identify each mapping
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u32 pid, tid;
+ * u64 addr;
+ * u64 len;
+ * u64 pgoff;
+ * u32 maj;
+ * u32 min;
+ * u64 ino;
+ * u64 ino_generation;
+ * u32 prot, flags;
+ * char filename[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_MMAP2 = 10,
+
+ /*
+ * Records that new data landed in the AUX buffer part.
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u64 aux_offset;
+ * u64 aux_size;
+ * u64 flags;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_AUX = 11,
+
+ /*
+ * Indicates that instruction trace has started
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid;
+ * u32 tid;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_ITRACE_START = 12,
+
+ /*
+ * Records the dropped/lost sample number.
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u64 lost;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_LOST_SAMPLES = 13,
+
+ /*
+ * Records a context switch in or out (flagged by
+ * PERF_RECORD_MISC_SWITCH_OUT). See also
+ * PERF_RECORD_SWITCH_CPU_WIDE.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_SWITCH = 14,
+
+ /*
+ * CPU-wide version of PERF_RECORD_SWITCH with next_prev_pid and
+ * next_prev_tid that are the next (switching out) or previous
+ * (switching in) pid/tid.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u32 next_prev_pid;
+ * u32 next_prev_tid;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_SWITCH_CPU_WIDE = 15,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid;
+ * u32 tid;
+ * u64 nr_namespaces;
+ * { u64 dev, inode; } [nr_namespaces];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_NAMESPACES = 16,
+
+ PERF_RECORD_MAX, /* non-ABI */
+};
+
+#define PERF_MAX_STACK_DEPTH 127
+#define PERF_MAX_CONTEXTS_PER_STACK 8
+
+enum perf_callchain_context {
+ PERF_CONTEXT_HV = (__u64)-32,
+ PERF_CONTEXT_KERNEL = (__u64)-128,
+ PERF_CONTEXT_USER = (__u64)-512,
+
+ PERF_CONTEXT_GUEST = (__u64)-2048,
+ PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
+ PERF_CONTEXT_GUEST_USER = (__u64)-2560,
+
+ PERF_CONTEXT_MAX = (__u64)-4095,
+};
+
+/**
+ * PERF_RECORD_AUX::flags bits
+ */
+#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
+#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */
+#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */
+
+#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
+#define PERF_FLAG_FD_OUTPUT (1UL << 1)
+#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
+#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
+
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+union perf_mem_data_src {
+ __u64 val;
+ struct {
+ __u64 mem_op:5, /* type of opcode */
+ mem_lvl:14, /* memory hierarchy level */
+ mem_snoop:5, /* snoop mode */
+ mem_lock:2, /* lock instr */
+ mem_dtlb:7, /* tlb access */
+ mem_lvl_num:4, /* memory hierarchy level number */
+ mem_remote:1, /* remote */
+ mem_snoopx:2, /* snoop mode, ext */
+ mem_rsvd:24;
+ };
+};
+#elif defined(__BIG_ENDIAN_BITFIELD)
+union perf_mem_data_src {
+ __u64 val;
+ struct {
+ __u64 mem_rsvd:24,
+ mem_snoopx:2, /* snoop mode, ext */
+ mem_remote:1, /* remote */
+ mem_lvl_num:4, /* memory hierarchy level number */
+ mem_dtlb:7, /* tlb access */
+ mem_lock:2, /* lock instr */
+ mem_snoop:5, /* snoop mode */
+ mem_lvl:14, /* memory hierarchy level */
+ mem_op:5; /* type of opcode */
+ };
+};
+#else
+#error "Unknown endianness"
+#endif
+
+/* type of opcode (load/store/prefetch,code) */
+#define PERF_MEM_OP_NA 0x01 /* not available */
+#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
+#define PERF_MEM_OP_STORE 0x04 /* store instruction */
+#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
+#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
+#define PERF_MEM_OP_SHIFT 0
+
+/* memory hierarchy (memory level, hit or miss) */
+#define PERF_MEM_LVL_NA 0x01 /* not available */
+#define PERF_MEM_LVL_HIT 0x02 /* hit level */
+#define PERF_MEM_LVL_MISS 0x04 /* miss level */
+#define PERF_MEM_LVL_L1 0x08 /* L1 */
+#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
+#define PERF_MEM_LVL_L2 0x20 /* L2 */
+#define PERF_MEM_LVL_L3 0x40 /* L3 */
+#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
+#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
+#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
+#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
+#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
+#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
+#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
+#define PERF_MEM_LVL_SHIFT 5
+
+#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */
+#define PERF_MEM_REMOTE_SHIFT 37
+
+#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */
+#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
+#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
+#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
+/* 5-0xa available */
+#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
+#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
+#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
+#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
+#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
+
+#define PERF_MEM_LVLNUM_SHIFT 33
+
+/* snoop mode */
+#define PERF_MEM_SNOOP_NA 0x01 /* not available */
+#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
+#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
+#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
+#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
+#define PERF_MEM_SNOOP_SHIFT 19
+
+#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
+/* 1 free */
+#define PERF_MEM_SNOOPX_SHIFT 38
+
+/* locked instruction */
+#define PERF_MEM_LOCK_NA 0x01 /* not available */
+#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
+#define PERF_MEM_LOCK_SHIFT 24
+
+/* TLB access */
+#define PERF_MEM_TLB_NA 0x01 /* not available */
+#define PERF_MEM_TLB_HIT 0x02 /* hit level */
+#define PERF_MEM_TLB_MISS 0x04 /* miss level */
+#define PERF_MEM_TLB_L1 0x08 /* L1 */
+#define PERF_MEM_TLB_L2 0x10 /* L2 */
+#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
+#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
+#define PERF_MEM_TLB_SHIFT 26
+
+#define PERF_MEM_S(a, s) \
+ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
+
+/*
+ * single taken branch record layout:
+ *
+ * from: source instruction (may not always be a branch insn)
+ * to: branch target
+ * mispred: branch target was mispredicted
+ * predicted: branch target was predicted
+ *
+ * support for mispred, predicted is optional. In case it
+ * is not supported mispred = predicted = 0.
+ *
+ * in_tx: running in a hardware transaction
+ * abort: aborting a hardware transaction
+ * cycles: cycles from last branch (or 0 if not supported)
+ * type: branch type
+ */
+struct perf_branch_entry {
+ __u64 from;
+ __u64 to;
+ __u64 mispred:1, /* target mispredicted */
+ predicted:1,/* target predicted */
+ in_tx:1, /* in transaction */
+ abort:1, /* transaction abort */
+ cycles:16, /* cycle count to last branch */
+ type:4, /* branch type */
+ reserved:40;
+};
+
+#endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
new file mode 100644
index 000000000..b17201edf
--- /dev/null
+++ b/tools/include/uapi/linux/prctl.h
@@ -0,0 +1,223 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_PRCTL_H
+#define _LINUX_PRCTL_H
+
+#include <linux/types.h>
+
+/* Values to pass as first argument to prctl() */
+
+#define PR_SET_PDEATHSIG 1 /* Second arg is a signal */
+#define PR_GET_PDEATHSIG 2 /* Second arg is a ptr to return the signal */
+
+/* Get/set current->mm->dumpable */
+#define PR_GET_DUMPABLE 3
+#define PR_SET_DUMPABLE 4
+
+/* Get/set unaligned access control bits (if meaningful) */
+#define PR_GET_UNALIGN 5
+#define PR_SET_UNALIGN 6
+# define PR_UNALIGN_NOPRINT 1 /* silently fix up unaligned user accesses */
+# define PR_UNALIGN_SIGBUS 2 /* generate SIGBUS on unaligned user access */
+
+/* Get/set whether or not to drop capabilities on setuid() away from
+ * uid 0 (as per security/commoncap.c) */
+#define PR_GET_KEEPCAPS 7
+#define PR_SET_KEEPCAPS 8
+
+/* Get/set floating-point emulation control bits (if meaningful) */
+#define PR_GET_FPEMU 9
+#define PR_SET_FPEMU 10
+# define PR_FPEMU_NOPRINT 1 /* silently emulate fp operations accesses */
+# define PR_FPEMU_SIGFPE 2 /* don't emulate fp operations, send SIGFPE instead */
+
+/* Get/set floating-point exception mode (if meaningful) */
+#define PR_GET_FPEXC 11
+#define PR_SET_FPEXC 12
+# define PR_FP_EXC_SW_ENABLE 0x80 /* Use FPEXC for FP exception enables */
+# define PR_FP_EXC_DIV 0x010000 /* floating point divide by zero */
+# define PR_FP_EXC_OVF 0x020000 /* floating point overflow */
+# define PR_FP_EXC_UND 0x040000 /* floating point underflow */
+# define PR_FP_EXC_RES 0x080000 /* floating point inexact result */
+# define PR_FP_EXC_INV 0x100000 /* floating point invalid operation */
+# define PR_FP_EXC_DISABLED 0 /* FP exceptions disabled */
+# define PR_FP_EXC_NONRECOV 1 /* async non-recoverable exc. mode */
+# define PR_FP_EXC_ASYNC 2 /* async recoverable exception mode */
+# define PR_FP_EXC_PRECISE 3 /* precise exception mode */
+
+/* Get/set whether we use statistical process timing or accurate timestamp
+ * based process timing */
+#define PR_GET_TIMING 13
+#define PR_SET_TIMING 14
+# define PR_TIMING_STATISTICAL 0 /* Normal, traditional,
+ statistical process timing */
+# define PR_TIMING_TIMESTAMP 1 /* Accurate timestamp based
+ process timing */
+
+#define PR_SET_NAME 15 /* Set process name */
+#define PR_GET_NAME 16 /* Get process name */
+
+/* Get/set process endian */
+#define PR_GET_ENDIAN 19
+#define PR_SET_ENDIAN 20
+# define PR_ENDIAN_BIG 0
+# define PR_ENDIAN_LITTLE 1 /* True little endian mode */
+# define PR_ENDIAN_PPC_LITTLE 2 /* "PowerPC" pseudo little endian */
+
+/* Get/set process seccomp mode */
+#define PR_GET_SECCOMP 21
+#define PR_SET_SECCOMP 22
+
+/* Get/set the capability bounding set (as per security/commoncap.c) */
+#define PR_CAPBSET_READ 23
+#define PR_CAPBSET_DROP 24
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
+
+/* Get/set securebits (as per security/commoncap.c) */
+#define PR_GET_SECUREBITS 27
+#define PR_SET_SECUREBITS 28
+
+/*
+ * Get/set the timerslack as used by poll/select/nanosleep
+ * A value of 0 means "use default"
+ */
+#define PR_SET_TIMERSLACK 29
+#define PR_GET_TIMERSLACK 30
+
+#define PR_TASK_PERF_EVENTS_DISABLE 31
+#define PR_TASK_PERF_EVENTS_ENABLE 32
+
+/*
+ * Set early/late kill mode for hwpoison memory corruption.
+ * This influences when the process gets killed on a memory corruption.
+ */
+#define PR_MCE_KILL 33
+# define PR_MCE_KILL_CLEAR 0
+# define PR_MCE_KILL_SET 1
+
+# define PR_MCE_KILL_LATE 0
+# define PR_MCE_KILL_EARLY 1
+# define PR_MCE_KILL_DEFAULT 2
+
+#define PR_MCE_KILL_GET 34
+
+/*
+ * Tune up process memory map specifics.
+ */
+#define PR_SET_MM 35
+# define PR_SET_MM_START_CODE 1
+# define PR_SET_MM_END_CODE 2
+# define PR_SET_MM_START_DATA 3
+# define PR_SET_MM_END_DATA 4
+# define PR_SET_MM_START_STACK 5
+# define PR_SET_MM_START_BRK 6
+# define PR_SET_MM_BRK 7
+# define PR_SET_MM_ARG_START 8
+# define PR_SET_MM_ARG_END 9
+# define PR_SET_MM_ENV_START 10
+# define PR_SET_MM_ENV_END 11
+# define PR_SET_MM_AUXV 12
+# define PR_SET_MM_EXE_FILE 13
+# define PR_SET_MM_MAP 14
+# define PR_SET_MM_MAP_SIZE 15
+
+/*
+ * This structure provides new memory descriptor
+ * map which mostly modifies /proc/pid/stat[m]
+ * output for a task. This mostly done in a
+ * sake of checkpoint/restore functionality.
+ */
+struct prctl_mm_map {
+ __u64 start_code; /* code section bounds */
+ __u64 end_code;
+ __u64 start_data; /* data section bounds */
+ __u64 end_data;
+ __u64 start_brk; /* heap for brk() syscall */
+ __u64 brk;
+ __u64 start_stack; /* stack starts at */
+ __u64 arg_start; /* command line arguments bounds */
+ __u64 arg_end;
+ __u64 env_start; /* environment variables bounds */
+ __u64 env_end;
+ __u64 *auxv; /* auxiliary vector */
+ __u32 auxv_size; /* vector size */
+ __u32 exe_fd; /* /proc/$pid/exe link file */
+};
+
+/*
+ * Set specific pid that is allowed to ptrace the current task.
+ * A value of 0 mean "no process".
+ */
+#define PR_SET_PTRACER 0x59616d61
+# define PR_SET_PTRACER_ANY ((unsigned long)-1)
+
+#define PR_SET_CHILD_SUBREAPER 36
+#define PR_GET_CHILD_SUBREAPER 37
+
+/*
+ * If no_new_privs is set, then operations that grant new privileges (i.e.
+ * execve) will either fail or not grant them. This affects suid/sgid,
+ * file capabilities, and LSMs.
+ *
+ * Operations that merely manipulate or drop existing privileges (setresuid,
+ * capset, etc.) will still work. Drop those privileges if you want them gone.
+ *
+ * Changing LSM security domain is considered a new privilege. So, for example,
+ * asking selinux for a specific new context (e.g. with runcon) will result
+ * in execve returning -EPERM.
+ *
+ * See Documentation/userspace-api/no_new_privs.rst for more details.
+ */
+#define PR_SET_NO_NEW_PRIVS 38
+#define PR_GET_NO_NEW_PRIVS 39
+
+#define PR_GET_TID_ADDRESS 40
+
+#define PR_SET_THP_DISABLE 41
+#define PR_GET_THP_DISABLE 42
+
+/*
+ * Tell the kernel to start/stop helping userspace manage bounds tables.
+ */
+#define PR_MPX_ENABLE_MANAGEMENT 43
+#define PR_MPX_DISABLE_MANAGEMENT 44
+
+#define PR_SET_FP_MODE 45
+#define PR_GET_FP_MODE 46
+# define PR_FP_MODE_FR (1 << 0) /* 64b FP registers */
+# define PR_FP_MODE_FRE (1 << 1) /* 32b compatibility */
+
+/* Control the ambient capability set */
+#define PR_CAP_AMBIENT 47
+# define PR_CAP_AMBIENT_IS_SET 1
+# define PR_CAP_AMBIENT_RAISE 2
+# define PR_CAP_AMBIENT_LOWER 3
+# define PR_CAP_AMBIENT_CLEAR_ALL 4
+
+/* arm64 Scalable Vector Extension controls */
+/* Flag values must be kept in sync with ptrace NT_ARM_SVE interface */
+#define PR_SVE_SET_VL 50 /* set task vector length */
+# define PR_SVE_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */
+#define PR_SVE_GET_VL 51 /* get task vector length */
+/* Bits common to PR_SVE_SET_VL and PR_SVE_GET_VL */
+# define PR_SVE_VL_LEN_MASK 0xffff
+# define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */
+
+/* Per task speculation control */
+#define PR_GET_SPECULATION_CTRL 52
+#define PR_SET_SPECULATION_CTRL 53
+/* Speculation control variants */
+# define PR_SPEC_STORE_BYPASS 0
+# define PR_SPEC_INDIRECT_BRANCH 1
+/* Return and control values for PR_SET/GET_SPECULATION_CTRL */
+# define PR_SPEC_NOT_AFFECTED 0
+# define PR_SPEC_PRCTL (1UL << 0)
+# define PR_SPEC_ENABLE (1UL << 1)
+# define PR_SPEC_DISABLE (1UL << 2)
+# define PR_SPEC_FORCE_DISABLE (1UL << 3)
+
+#endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h
new file mode 100644
index 000000000..22627f800
--- /dev/null
+++ b/tools/include/uapi/linux/sched.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_SCHED_H
+#define _UAPI_LINUX_SCHED_H
+
+/*
+ * cloning flags:
+ */
+#define CSIGNAL 0x000000ff /* signal mask to be sent at exit */
+#define CLONE_VM 0x00000100 /* set if VM shared between processes */
+#define CLONE_FS 0x00000200 /* set if fs info shared between processes */
+#define CLONE_FILES 0x00000400 /* set if open files shared between processes */
+#define CLONE_SIGHAND 0x00000800 /* set if signal handlers and blocked signals shared */
+#define CLONE_PTRACE 0x00002000 /* set if we want to let tracing continue on the child too */
+#define CLONE_VFORK 0x00004000 /* set if the parent wants the child to wake it up on mm_release */
+#define CLONE_PARENT 0x00008000 /* set if we want to have the same parent as the cloner */
+#define CLONE_THREAD 0x00010000 /* Same thread group? */
+#define CLONE_NEWNS 0x00020000 /* New mount namespace group */
+#define CLONE_SYSVSEM 0x00040000 /* share system V SEM_UNDO semantics */
+#define CLONE_SETTLS 0x00080000 /* create a new TLS for the child */
+#define CLONE_PARENT_SETTID 0x00100000 /* set the TID in the parent */
+#define CLONE_CHILD_CLEARTID 0x00200000 /* clear the TID in the child */
+#define CLONE_DETACHED 0x00400000 /* Unused, ignored */
+#define CLONE_UNTRACED 0x00800000 /* set if the tracing process can't force CLONE_PTRACE on this clone */
+#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
+#define CLONE_NEWCGROUP 0x02000000 /* New cgroup namespace */
+#define CLONE_NEWUTS 0x04000000 /* New utsname namespace */
+#define CLONE_NEWIPC 0x08000000 /* New ipc namespace */
+#define CLONE_NEWUSER 0x10000000 /* New user namespace */
+#define CLONE_NEWPID 0x20000000 /* New pid namespace */
+#define CLONE_NEWNET 0x40000000 /* New network namespace */
+#define CLONE_IO 0x80000000 /* Clone io context */
+
+/*
+ * Scheduling policies
+ */
+#define SCHED_NORMAL 0
+#define SCHED_FIFO 1
+#define SCHED_RR 2
+#define SCHED_BATCH 3
+/* SCHED_ISO: reserved but not implemented yet */
+#define SCHED_IDLE 5
+#define SCHED_DEADLINE 6
+
+/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
+#define SCHED_RESET_ON_FORK 0x40000000
+
+/*
+ * For the sched_{set,get}attr() calls
+ */
+#define SCHED_FLAG_RESET_ON_FORK 0x01
+#define SCHED_FLAG_RECLAIM 0x02
+#define SCHED_FLAG_DL_OVERRUN 0x04
+
+#define SCHED_FLAG_ALL (SCHED_FLAG_RESET_ON_FORK | \
+ SCHED_FLAG_RECLAIM | \
+ SCHED_FLAG_DL_OVERRUN)
+
+#endif /* _UAPI_LINUX_SCHED_H */
diff --git a/tools/include/uapi/linux/seg6.h b/tools/include/uapi/linux/seg6.h
new file mode 100644
index 000000000..286e8d6a8
--- /dev/null
+++ b/tools/include/uapi/linux/seg6.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * SR-IPv6 implementation
+ *
+ * Author:
+ * David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_H
+#define _UAPI_LINUX_SEG6_H
+
+#include <linux/types.h>
+#include <linux/in6.h> /* For struct in6_addr. */
+
+/*
+ * SRH
+ */
+struct ipv6_sr_hdr {
+ __u8 nexthdr;
+ __u8 hdrlen;
+ __u8 type;
+ __u8 segments_left;
+ __u8 first_segment; /* Represents the last_entry field of SRH */
+ __u8 flags;
+ __u16 tag;
+
+ struct in6_addr segments[0];
+};
+
+#define SR6_FLAG1_PROTECTED (1 << 6)
+#define SR6_FLAG1_OAM (1 << 5)
+#define SR6_FLAG1_ALERT (1 << 4)
+#define SR6_FLAG1_HMAC (1 << 3)
+
+#define SR6_TLV_INGRESS 1
+#define SR6_TLV_EGRESS 2
+#define SR6_TLV_OPAQUE 3
+#define SR6_TLV_PADDING 4
+#define SR6_TLV_HMAC 5
+
+#define sr_has_hmac(srh) ((srh)->flags & SR6_FLAG1_HMAC)
+
+struct sr6_tlv {
+ __u8 type;
+ __u8 len;
+ __u8 data[0];
+};
+
+#endif
diff --git a/tools/include/uapi/linux/seg6_local.h b/tools/include/uapi/linux/seg6_local.h
new file mode 100644
index 000000000..edc138bdc
--- /dev/null
+++ b/tools/include/uapi/linux/seg6_local.h
@@ -0,0 +1,80 @@
+/*
+ * SR-IPv6 implementation
+ *
+ * Author:
+ * David Lebrun <david.lebrun@uclouvain.be>
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SEG6_LOCAL_H
+#define _UAPI_LINUX_SEG6_LOCAL_H
+
+#include <linux/seg6.h>
+
+enum {
+ SEG6_LOCAL_UNSPEC,
+ SEG6_LOCAL_ACTION,
+ SEG6_LOCAL_SRH,
+ SEG6_LOCAL_TABLE,
+ SEG6_LOCAL_NH4,
+ SEG6_LOCAL_NH6,
+ SEG6_LOCAL_IIF,
+ SEG6_LOCAL_OIF,
+ SEG6_LOCAL_BPF,
+ __SEG6_LOCAL_MAX,
+};
+#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
+
+enum {
+ SEG6_LOCAL_ACTION_UNSPEC = 0,
+ /* node segment */
+ SEG6_LOCAL_ACTION_END = 1,
+ /* adjacency segment (IPv6 cross-connect) */
+ SEG6_LOCAL_ACTION_END_X = 2,
+ /* lookup of next seg NH in table */
+ SEG6_LOCAL_ACTION_END_T = 3,
+ /* decap and L2 cross-connect */
+ SEG6_LOCAL_ACTION_END_DX2 = 4,
+ /* decap and IPv6 cross-connect */
+ SEG6_LOCAL_ACTION_END_DX6 = 5,
+ /* decap and IPv4 cross-connect */
+ SEG6_LOCAL_ACTION_END_DX4 = 6,
+ /* decap and lookup of DA in v6 table */
+ SEG6_LOCAL_ACTION_END_DT6 = 7,
+ /* decap and lookup of DA in v4 table */
+ SEG6_LOCAL_ACTION_END_DT4 = 8,
+ /* binding segment with insertion */
+ SEG6_LOCAL_ACTION_END_B6 = 9,
+ /* binding segment with encapsulation */
+ SEG6_LOCAL_ACTION_END_B6_ENCAP = 10,
+ /* binding segment with MPLS encap */
+ SEG6_LOCAL_ACTION_END_BM = 11,
+ /* lookup last seg in table */
+ SEG6_LOCAL_ACTION_END_S = 12,
+ /* forward to SR-unaware VNF with static proxy */
+ SEG6_LOCAL_ACTION_END_AS = 13,
+ /* forward to SR-unaware VNF with masquerading */
+ SEG6_LOCAL_ACTION_END_AM = 14,
+ /* custom BPF action */
+ SEG6_LOCAL_ACTION_END_BPF = 15,
+
+ __SEG6_LOCAL_ACTION_MAX,
+};
+
+#define SEG6_LOCAL_ACTION_MAX (__SEG6_LOCAL_ACTION_MAX - 1)
+
+enum {
+ SEG6_LOCAL_BPF_PROG_UNSPEC,
+ SEG6_LOCAL_BPF_PROG,
+ SEG6_LOCAL_BPF_PROG_NAME,
+ __SEG6_LOCAL_BPF_PROG_MAX,
+};
+
+#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1)
+
+#endif
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
new file mode 100644
index 000000000..7b35e98d3
--- /dev/null
+++ b/tools/include/uapi/linux/stat.h
@@ -0,0 +1,174 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_LINUX_STAT_H
+#define _UAPI_LINUX_STAT_H
+
+#include <linux/types.h>
+
+#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
+
+#define S_IFMT 00170000
+#define S_IFSOCK 0140000
+#define S_IFLNK 0120000
+#define S_IFREG 0100000
+#define S_IFBLK 0060000
+#define S_IFDIR 0040000
+#define S_IFCHR 0020000
+#define S_IFIFO 0010000
+#define S_ISUID 0004000
+#define S_ISGID 0002000
+#define S_ISVTX 0001000
+
+#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK)
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
+#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR)
+#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK)
+#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO)
+#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK)
+
+#define S_IRWXU 00700
+#define S_IRUSR 00400
+#define S_IWUSR 00200
+#define S_IXUSR 00100
+
+#define S_IRWXG 00070
+#define S_IRGRP 00040
+#define S_IWGRP 00020
+#define S_IXGRP 00010
+
+#define S_IRWXO 00007
+#define S_IROTH 00004
+#define S_IWOTH 00002
+#define S_IXOTH 00001
+
+#endif
+
+/*
+ * Timestamp structure for the timestamps in struct statx.
+ *
+ * tv_sec holds the number of seconds before (negative) or after (positive)
+ * 00:00:00 1st January 1970 UTC.
+ *
+ * tv_nsec holds a number of nanoseconds (0..999,999,999) after the tv_sec time.
+ *
+ * __reserved is held in case we need a yet finer resolution.
+ */
+struct statx_timestamp {
+ __s64 tv_sec;
+ __u32 tv_nsec;
+ __s32 __reserved;
+};
+
+/*
+ * Structures for the extended file attribute retrieval system call
+ * (statx()).
+ *
+ * The caller passes a mask of what they're specifically interested in as a
+ * parameter to statx(). What statx() actually got will be indicated in
+ * st_mask upon return.
+ *
+ * For each bit in the mask argument:
+ *
+ * - if the datum is not supported:
+ *
+ * - the bit will be cleared, and
+ *
+ * - the datum will be set to an appropriate fabricated value if one is
+ * available (eg. CIFS can take a default uid and gid), otherwise
+ *
+ * - the field will be cleared;
+ *
+ * - otherwise, if explicitly requested:
+ *
+ * - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is
+ * set or if the datum is considered out of date, and
+ *
+ * - the field will be filled in and the bit will be set;
+ *
+ * - otherwise, if not requested, but available in approximate form without any
+ * effort, it will be filled in anyway, and the bit will be set upon return
+ * (it might not be up to date, however, and no attempt will be made to
+ * synchronise the internal state first);
+ *
+ * - otherwise the field and the bit will be cleared before returning.
+ *
+ * Items in STATX_BASIC_STATS may be marked unavailable on return, but they
+ * will have values installed for compatibility purposes so that stat() and
+ * co. can be emulated in userspace.
+ */
+struct statx {
+ /* 0x00 */
+ __u32 stx_mask; /* What results were written [uncond] */
+ __u32 stx_blksize; /* Preferred general I/O size [uncond] */
+ __u64 stx_attributes; /* Flags conveying information about the file [uncond] */
+ /* 0x10 */
+ __u32 stx_nlink; /* Number of hard links */
+ __u32 stx_uid; /* User ID of owner */
+ __u32 stx_gid; /* Group ID of owner */
+ __u16 stx_mode; /* File mode */
+ __u16 __spare0[1];
+ /* 0x20 */
+ __u64 stx_ino; /* Inode number */
+ __u64 stx_size; /* File size */
+ __u64 stx_blocks; /* Number of 512-byte blocks allocated */
+ __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */
+ /* 0x40 */
+ struct statx_timestamp stx_atime; /* Last access time */
+ struct statx_timestamp stx_btime; /* File creation time */
+ struct statx_timestamp stx_ctime; /* Last attribute change time */
+ struct statx_timestamp stx_mtime; /* Last data modification time */
+ /* 0x80 */
+ __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */
+ __u32 stx_rdev_minor;
+ __u32 stx_dev_major; /* ID of device containing file [uncond] */
+ __u32 stx_dev_minor;
+ /* 0x90 */
+ __u64 __spare2[14]; /* Spare space for future expansion */
+ /* 0x100 */
+};
+
+/*
+ * Flags to be stx_mask
+ *
+ * Query request/result mask for statx() and struct statx::stx_mask.
+ *
+ * These bits should be set in the mask argument of statx() to request
+ * particular items when calling statx().
+ */
+#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */
+#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */
+#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */
+#define STATX_UID 0x00000008U /* Want/got stx_uid */
+#define STATX_GID 0x00000010U /* Want/got stx_gid */
+#define STATX_ATIME 0x00000020U /* Want/got stx_atime */
+#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */
+#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */
+#define STATX_INO 0x00000100U /* Want/got stx_ino */
+#define STATX_SIZE 0x00000200U /* Want/got stx_size */
+#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
+#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
+#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
+#define STATX_ALL 0x00000fffU /* All currently supported flags */
+#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
+
+/*
+ * Attributes to be found in stx_attributes and masked in stx_attributes_mask.
+ *
+ * These give information about the features or the state of a file that might
+ * be of use to ordinary userspace programs such as GUIs or ls rather than
+ * specialised tools.
+ *
+ * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS
+ * semantically. Where possible, the numerical value is picked to correspond
+ * also.
+ */
+#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */
+#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */
+#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */
+#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */
+#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */
+
+#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */
+
+
+#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
new file mode 100644
index 000000000..84c3de896
--- /dev/null
+++ b/tools/include/uapi/linux/vhost.h
@@ -0,0 +1,228 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_VHOST_H
+#define _LINUX_VHOST_H
+/* Userspace interface for in-kernel virtio accelerators. */
+
+/* vhost is used to reduce the number of system calls involved in virtio.
+ *
+ * Existing virtio net code is used in the guest without modification.
+ *
+ * This header includes interface used by userspace hypervisor for
+ * device configuration.
+ */
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/ioctl.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+
+struct vhost_vring_state {
+ unsigned int index;
+ unsigned int num;
+};
+
+struct vhost_vring_file {
+ unsigned int index;
+ int fd; /* Pass -1 to unbind from file. */
+
+};
+
+struct vhost_vring_addr {
+ unsigned int index;
+ /* Option flags. */
+ unsigned int flags;
+ /* Flag values: */
+ /* Whether log address is valid. If set enables logging. */
+#define VHOST_VRING_F_LOG 0
+
+ /* Start of array of descriptors (virtually contiguous) */
+ __u64 desc_user_addr;
+ /* Used structure address. Must be 32 bit aligned */
+ __u64 used_user_addr;
+ /* Available structure address. Must be 16 bit aligned */
+ __u64 avail_user_addr;
+ /* Logging support. */
+ /* Log writes to used structure, at offset calculated from specified
+ * address. Address must be 32 bit aligned. */
+ __u64 log_guest_addr;
+};
+
+/* no alignment requirement */
+struct vhost_iotlb_msg {
+ __u64 iova;
+ __u64 size;
+ __u64 uaddr;
+#define VHOST_ACCESS_RO 0x1
+#define VHOST_ACCESS_WO 0x2
+#define VHOST_ACCESS_RW 0x3
+ __u8 perm;
+#define VHOST_IOTLB_MISS 1
+#define VHOST_IOTLB_UPDATE 2
+#define VHOST_IOTLB_INVALIDATE 3
+#define VHOST_IOTLB_ACCESS_FAIL 4
+ __u8 type;
+};
+
+#define VHOST_IOTLB_MSG 0x1
+#define VHOST_IOTLB_MSG_V2 0x2
+
+struct vhost_msg {
+ int type;
+ union {
+ struct vhost_iotlb_msg iotlb;
+ __u8 padding[64];
+ };
+};
+
+struct vhost_msg_v2 {
+ __u32 type;
+ __u32 reserved;
+ union {
+ struct vhost_iotlb_msg iotlb;
+ __u8 padding[64];
+ };
+};
+
+struct vhost_memory_region {
+ __u64 guest_phys_addr;
+ __u64 memory_size; /* bytes */
+ __u64 userspace_addr;
+ __u64 flags_padding; /* No flags are currently specified. */
+};
+
+/* All region addresses and sizes must be 4K aligned. */
+#define VHOST_PAGE_SIZE 0x1000
+
+struct vhost_memory {
+ __u32 nregions;
+ __u32 padding;
+ struct vhost_memory_region regions[0];
+};
+
+/* ioctls */
+
+#define VHOST_VIRTIO 0xAF
+
+/* Features bitmask for forward compatibility. Transport bits are used for
+ * vhost specific features. */
+#define VHOST_GET_FEATURES _IOR(VHOST_VIRTIO, 0x00, __u64)
+#define VHOST_SET_FEATURES _IOW(VHOST_VIRTIO, 0x00, __u64)
+
+/* Set current process as the (exclusive) owner of this file descriptor. This
+ * must be called before any other vhost command. Further calls to
+ * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */
+#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
+/* Give up ownership, and reset the device to default values.
+ * Allows subsequent call to VHOST_OWNER_SET to succeed. */
+#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
+
+/* Set up/modify memory layout */
+#define VHOST_SET_MEM_TABLE _IOW(VHOST_VIRTIO, 0x03, struct vhost_memory)
+
+/* Write logging setup. */
+/* Memory writes can optionally be logged by setting bit at an offset
+ * (calculated from the physical address) from specified log base.
+ * The bit is set using an atomic 32 bit operation. */
+/* Set base address for logging. */
+#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
+/* Specify an eventfd file descriptor to signal on log write. */
+#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+
+/* Ring setup. */
+/* Set number of descriptors in ring. This parameter can not
+ * be modified while ring is running (bound to a device). */
+#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
+/* Set addresses for the ring. */
+#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
+/* Base value where queue looks for available descriptors */
+#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+/* Get accessor: reads index, writes value in num */
+#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+
+/* Set the vring byte order in num. Valid values are VHOST_VRING_LITTLE_ENDIAN
+ * or VHOST_VRING_BIG_ENDIAN (other values return -EINVAL).
+ * The byte order cannot be changed while the device is active: trying to do so
+ * returns -EBUSY.
+ * This is a legacy only API that is simply ignored when VIRTIO_F_VERSION_1 is
+ * set.
+ * Not all kernel configurations support this ioctl, but all configurations that
+ * support SET also support GET.
+ */
+#define VHOST_VRING_LITTLE_ENDIAN 0
+#define VHOST_VRING_BIG_ENDIAN 1
+#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
+#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
+
+/* The following ioctls use eventfd file descriptors to signal and poll
+ * for events. */
+
+/* Set eventfd to poll for added buffers */
+#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
+/* Set eventfd to signal when buffers have beed used */
+#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
+/* Set eventfd to signal an error */
+#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
+/* Set busy loop timeout (in us) */
+#define VHOST_SET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x23, \
+ struct vhost_vring_state)
+/* Get busy loop timeout (in us) */
+#define VHOST_GET_VRING_BUSYLOOP_TIMEOUT _IOW(VHOST_VIRTIO, 0x24, \
+ struct vhost_vring_state)
+
+/* Set or get vhost backend capability */
+
+/* Use message type V2 */
+#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
+
+#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
+#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
+
+/* VHOST_NET specific defines */
+
+/* Attach virtio net ring to a raw socket, or tap device.
+ * The socket must be already bound to an ethernet device, this device will be
+ * used for transmit. Pass fd -1 to unbind from the socket and the transmit
+ * device. This can be used to stop the ring (e.g. for migration). */
+#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
+
+/* Feature bits */
+/* Log all write descriptors. Can be changed while device is active. */
+#define VHOST_F_LOG_ALL 26
+/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
+#define VHOST_NET_F_VIRTIO_NET_HDR 27
+
+/* VHOST_SCSI specific definitions */
+
+/*
+ * Used by QEMU userspace to ensure a consistent vhost-scsi ABI.
+ *
+ * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate +
+ * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage
+ * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target.
+ * All the targets under vhost_wwpn can be seen and used by guset.
+ */
+
+#define VHOST_SCSI_ABI_VERSION 1
+
+struct vhost_scsi_target {
+ int abi_version;
+ char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */
+ unsigned short vhost_tpgt;
+ unsigned short reserved;
+};
+
+#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
+#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
+/* Changing this breaks userspace. */
+#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
+/* Set and get the events missed flag */
+#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
+#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
+
+/* VHOST_VSOCK specific defines */
+
+#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64)
+#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int)
+
+#endif
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
new file mode 100644
index 000000000..ed0a120d4
--- /dev/null
+++ b/tools/include/uapi/sound/asound.h
@@ -0,0 +1,1037 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * Advanced Linux Sound Architecture - ALSA - Driver
+ * Copyright (c) 1994-2003 by Jaroslav Kysela <perex@perex.cz>,
+ * Abramo Bagnara <abramo@alsa-project.org>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef _UAPI__SOUND_ASOUND_H
+#define _UAPI__SOUND_ASOUND_H
+
+#if defined(__KERNEL__) || defined(__linux__)
+#include <linux/types.h>
+#else
+#include <sys/ioctl.h>
+#endif
+
+#ifndef __KERNEL__
+#include <stdlib.h>
+#endif
+
+/*
+ * protocol version
+ */
+
+#define SNDRV_PROTOCOL_VERSION(major, minor, subminor) (((major)<<16)|((minor)<<8)|(subminor))
+#define SNDRV_PROTOCOL_MAJOR(version) (((version)>>16)&0xffff)
+#define SNDRV_PROTOCOL_MINOR(version) (((version)>>8)&0xff)
+#define SNDRV_PROTOCOL_MICRO(version) ((version)&0xff)
+#define SNDRV_PROTOCOL_INCOMPATIBLE(kversion, uversion) \
+ (SNDRV_PROTOCOL_MAJOR(kversion) != SNDRV_PROTOCOL_MAJOR(uversion) || \
+ (SNDRV_PROTOCOL_MAJOR(kversion) == SNDRV_PROTOCOL_MAJOR(uversion) && \
+ SNDRV_PROTOCOL_MINOR(kversion) != SNDRV_PROTOCOL_MINOR(uversion)))
+
+/****************************************************************************
+ * *
+ * Digital audio interface *
+ * *
+ ****************************************************************************/
+
+struct snd_aes_iec958 {
+ unsigned char status[24]; /* AES/IEC958 channel status bits */
+ unsigned char subcode[147]; /* AES/IEC958 subcode bits */
+ unsigned char pad; /* nothing */
+ unsigned char dig_subframe[4]; /* AES/IEC958 subframe bits */
+};
+
+/****************************************************************************
+ * *
+ * CEA-861 Audio InfoFrame. Used in HDMI and DisplayPort *
+ * *
+ ****************************************************************************/
+
+struct snd_cea_861_aud_if {
+ unsigned char db1_ct_cc; /* coding type and channel count */
+ unsigned char db2_sf_ss; /* sample frequency and size */
+ unsigned char db3; /* not used, all zeros */
+ unsigned char db4_ca; /* channel allocation code */
+ unsigned char db5_dminh_lsv; /* downmix inhibit & level-shit values */
+};
+
+/****************************************************************************
+ * *
+ * Section for driver hardware dependent interface - /dev/snd/hw? *
+ * *
+ ****************************************************************************/
+
+#define SNDRV_HWDEP_VERSION SNDRV_PROTOCOL_VERSION(1, 0, 1)
+
+enum {
+ SNDRV_HWDEP_IFACE_OPL2 = 0,
+ SNDRV_HWDEP_IFACE_OPL3,
+ SNDRV_HWDEP_IFACE_OPL4,
+ SNDRV_HWDEP_IFACE_SB16CSP, /* Creative Signal Processor */
+ SNDRV_HWDEP_IFACE_EMU10K1, /* FX8010 processor in EMU10K1 chip */
+ SNDRV_HWDEP_IFACE_YSS225, /* Yamaha FX processor */
+ SNDRV_HWDEP_IFACE_ICS2115, /* Wavetable synth */
+ SNDRV_HWDEP_IFACE_SSCAPE, /* Ensoniq SoundScape ISA card (MC68EC000) */
+ SNDRV_HWDEP_IFACE_VX, /* Digigram VX cards */
+ SNDRV_HWDEP_IFACE_MIXART, /* Digigram miXart cards */
+ SNDRV_HWDEP_IFACE_USX2Y, /* Tascam US122, US224 & US428 usb */
+ SNDRV_HWDEP_IFACE_EMUX_WAVETABLE, /* EmuX wavetable */
+ SNDRV_HWDEP_IFACE_BLUETOOTH, /* Bluetooth audio */
+ SNDRV_HWDEP_IFACE_USX2Y_PCM, /* Tascam US122, US224 & US428 rawusb pcm */
+ SNDRV_HWDEP_IFACE_PCXHR, /* Digigram PCXHR */
+ SNDRV_HWDEP_IFACE_SB_RC, /* SB Extigy/Audigy2NX remote control */
+ SNDRV_HWDEP_IFACE_HDA, /* HD-audio */
+ SNDRV_HWDEP_IFACE_USB_STREAM, /* direct access to usb stream */
+ SNDRV_HWDEP_IFACE_FW_DICE, /* TC DICE FireWire device */
+ SNDRV_HWDEP_IFACE_FW_FIREWORKS, /* Echo Audio Fireworks based device */
+ SNDRV_HWDEP_IFACE_FW_BEBOB, /* BridgeCo BeBoB based device */
+ SNDRV_HWDEP_IFACE_FW_OXFW, /* Oxford OXFW970/971 based device */
+ SNDRV_HWDEP_IFACE_FW_DIGI00X, /* Digidesign Digi 002/003 family */
+ SNDRV_HWDEP_IFACE_FW_TASCAM, /* TASCAM FireWire series */
+ SNDRV_HWDEP_IFACE_LINE6, /* Line6 USB processors */
+ SNDRV_HWDEP_IFACE_FW_MOTU, /* MOTU FireWire series */
+ SNDRV_HWDEP_IFACE_FW_FIREFACE, /* RME Fireface series */
+
+ /* Don't forget to change the following: */
+ SNDRV_HWDEP_IFACE_LAST = SNDRV_HWDEP_IFACE_FW_FIREFACE
+};
+
+struct snd_hwdep_info {
+ unsigned int device; /* WR: device number */
+ int card; /* R: card number */
+ unsigned char id[64]; /* ID (user selectable) */
+ unsigned char name[80]; /* hwdep name */
+ int iface; /* hwdep interface */
+ unsigned char reserved[64]; /* reserved for future */
+};
+
+/* generic DSP loader */
+struct snd_hwdep_dsp_status {
+ unsigned int version; /* R: driver-specific version */
+ unsigned char id[32]; /* R: driver-specific ID string */
+ unsigned int num_dsps; /* R: number of DSP images to transfer */
+ unsigned int dsp_loaded; /* R: bit flags indicating the loaded DSPs */
+ unsigned int chip_ready; /* R: 1 = initialization finished */
+ unsigned char reserved[16]; /* reserved for future use */
+};
+
+struct snd_hwdep_dsp_image {
+ unsigned int index; /* W: DSP index */
+ unsigned char name[64]; /* W: ID (e.g. file name) */
+ unsigned char __user *image; /* W: binary image */
+ size_t length; /* W: size of image in bytes */
+ unsigned long driver_data; /* W: driver-specific data */
+};
+
+#define SNDRV_HWDEP_IOCTL_PVERSION _IOR ('H', 0x00, int)
+#define SNDRV_HWDEP_IOCTL_INFO _IOR ('H', 0x01, struct snd_hwdep_info)
+#define SNDRV_HWDEP_IOCTL_DSP_STATUS _IOR('H', 0x02, struct snd_hwdep_dsp_status)
+#define SNDRV_HWDEP_IOCTL_DSP_LOAD _IOW('H', 0x03, struct snd_hwdep_dsp_image)
+
+/*****************************************************************************
+ * *
+ * Digital Audio (PCM) interface - /dev/snd/pcm?? *
+ * *
+ *****************************************************************************/
+
+#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 14)
+
+typedef unsigned long snd_pcm_uframes_t;
+typedef signed long snd_pcm_sframes_t;
+
+enum {
+ SNDRV_PCM_CLASS_GENERIC = 0, /* standard mono or stereo device */
+ SNDRV_PCM_CLASS_MULTI, /* multichannel device */
+ SNDRV_PCM_CLASS_MODEM, /* software modem class */
+ SNDRV_PCM_CLASS_DIGITIZER, /* digitizer class */
+ /* Don't forget to change the following: */
+ SNDRV_PCM_CLASS_LAST = SNDRV_PCM_CLASS_DIGITIZER,
+};
+
+enum {
+ SNDRV_PCM_SUBCLASS_GENERIC_MIX = 0, /* mono or stereo subdevices are mixed together */
+ SNDRV_PCM_SUBCLASS_MULTI_MIX, /* multichannel subdevices are mixed together */
+ /* Don't forget to change the following: */
+ SNDRV_PCM_SUBCLASS_LAST = SNDRV_PCM_SUBCLASS_MULTI_MIX,
+};
+
+enum {
+ SNDRV_PCM_STREAM_PLAYBACK = 0,
+ SNDRV_PCM_STREAM_CAPTURE,
+ SNDRV_PCM_STREAM_LAST = SNDRV_PCM_STREAM_CAPTURE,
+};
+
+typedef int __bitwise snd_pcm_access_t;
+#define SNDRV_PCM_ACCESS_MMAP_INTERLEAVED ((__force snd_pcm_access_t) 0) /* interleaved mmap */
+#define SNDRV_PCM_ACCESS_MMAP_NONINTERLEAVED ((__force snd_pcm_access_t) 1) /* noninterleaved mmap */
+#define SNDRV_PCM_ACCESS_MMAP_COMPLEX ((__force snd_pcm_access_t) 2) /* complex mmap */
+#define SNDRV_PCM_ACCESS_RW_INTERLEAVED ((__force snd_pcm_access_t) 3) /* readi/writei */
+#define SNDRV_PCM_ACCESS_RW_NONINTERLEAVED ((__force snd_pcm_access_t) 4) /* readn/writen */
+#define SNDRV_PCM_ACCESS_LAST SNDRV_PCM_ACCESS_RW_NONINTERLEAVED
+
+typedef int __bitwise snd_pcm_format_t;
+#define SNDRV_PCM_FORMAT_S8 ((__force snd_pcm_format_t) 0)
+#define SNDRV_PCM_FORMAT_U8 ((__force snd_pcm_format_t) 1)
+#define SNDRV_PCM_FORMAT_S16_LE ((__force snd_pcm_format_t) 2)
+#define SNDRV_PCM_FORMAT_S16_BE ((__force snd_pcm_format_t) 3)
+#define SNDRV_PCM_FORMAT_U16_LE ((__force snd_pcm_format_t) 4)
+#define SNDRV_PCM_FORMAT_U16_BE ((__force snd_pcm_format_t) 5)
+#define SNDRV_PCM_FORMAT_S24_LE ((__force snd_pcm_format_t) 6) /* low three bytes */
+#define SNDRV_PCM_FORMAT_S24_BE ((__force snd_pcm_format_t) 7) /* low three bytes */
+#define SNDRV_PCM_FORMAT_U24_LE ((__force snd_pcm_format_t) 8) /* low three bytes */
+#define SNDRV_PCM_FORMAT_U24_BE ((__force snd_pcm_format_t) 9) /* low three bytes */
+#define SNDRV_PCM_FORMAT_S32_LE ((__force snd_pcm_format_t) 10)
+#define SNDRV_PCM_FORMAT_S32_BE ((__force snd_pcm_format_t) 11)
+#define SNDRV_PCM_FORMAT_U32_LE ((__force snd_pcm_format_t) 12)
+#define SNDRV_PCM_FORMAT_U32_BE ((__force snd_pcm_format_t) 13)
+#define SNDRV_PCM_FORMAT_FLOAT_LE ((__force snd_pcm_format_t) 14) /* 4-byte float, IEEE-754 32-bit, range -1.0 to 1.0 */
+#define SNDRV_PCM_FORMAT_FLOAT_BE ((__force snd_pcm_format_t) 15) /* 4-byte float, IEEE-754 32-bit, range -1.0 to 1.0 */
+#define SNDRV_PCM_FORMAT_FLOAT64_LE ((__force snd_pcm_format_t) 16) /* 8-byte float, IEEE-754 64-bit, range -1.0 to 1.0 */
+#define SNDRV_PCM_FORMAT_FLOAT64_BE ((__force snd_pcm_format_t) 17) /* 8-byte float, IEEE-754 64-bit, range -1.0 to 1.0 */
+#define SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE ((__force snd_pcm_format_t) 18) /* IEC-958 subframe, Little Endian */
+#define SNDRV_PCM_FORMAT_IEC958_SUBFRAME_BE ((__force snd_pcm_format_t) 19) /* IEC-958 subframe, Big Endian */
+#define SNDRV_PCM_FORMAT_MU_LAW ((__force snd_pcm_format_t) 20)
+#define SNDRV_PCM_FORMAT_A_LAW ((__force snd_pcm_format_t) 21)
+#define SNDRV_PCM_FORMAT_IMA_ADPCM ((__force snd_pcm_format_t) 22)
+#define SNDRV_PCM_FORMAT_MPEG ((__force snd_pcm_format_t) 23)
+#define SNDRV_PCM_FORMAT_GSM ((__force snd_pcm_format_t) 24)
+#define SNDRV_PCM_FORMAT_S20_LE ((__force snd_pcm_format_t) 25) /* in four bytes, LSB justified */
+#define SNDRV_PCM_FORMAT_S20_BE ((__force snd_pcm_format_t) 26) /* in four bytes, LSB justified */
+#define SNDRV_PCM_FORMAT_U20_LE ((__force snd_pcm_format_t) 27) /* in four bytes, LSB justified */
+#define SNDRV_PCM_FORMAT_U20_BE ((__force snd_pcm_format_t) 28) /* in four bytes, LSB justified */
+/* gap in the numbering for a future standard linear format */
+#define SNDRV_PCM_FORMAT_SPECIAL ((__force snd_pcm_format_t) 31)
+#define SNDRV_PCM_FORMAT_S24_3LE ((__force snd_pcm_format_t) 32) /* in three bytes */
+#define SNDRV_PCM_FORMAT_S24_3BE ((__force snd_pcm_format_t) 33) /* in three bytes */
+#define SNDRV_PCM_FORMAT_U24_3LE ((__force snd_pcm_format_t) 34) /* in three bytes */
+#define SNDRV_PCM_FORMAT_U24_3BE ((__force snd_pcm_format_t) 35) /* in three bytes */
+#define SNDRV_PCM_FORMAT_S20_3LE ((__force snd_pcm_format_t) 36) /* in three bytes */
+#define SNDRV_PCM_FORMAT_S20_3BE ((__force snd_pcm_format_t) 37) /* in three bytes */
+#define SNDRV_PCM_FORMAT_U20_3LE ((__force snd_pcm_format_t) 38) /* in three bytes */
+#define SNDRV_PCM_FORMAT_U20_3BE ((__force snd_pcm_format_t) 39) /* in three bytes */
+#define SNDRV_PCM_FORMAT_S18_3LE ((__force snd_pcm_format_t) 40) /* in three bytes */
+#define SNDRV_PCM_FORMAT_S18_3BE ((__force snd_pcm_format_t) 41) /* in three bytes */
+#define SNDRV_PCM_FORMAT_U18_3LE ((__force snd_pcm_format_t) 42) /* in three bytes */
+#define SNDRV_PCM_FORMAT_U18_3BE ((__force snd_pcm_format_t) 43) /* in three bytes */
+#define SNDRV_PCM_FORMAT_G723_24 ((__force snd_pcm_format_t) 44) /* 8 samples in 3 bytes */
+#define SNDRV_PCM_FORMAT_G723_24_1B ((__force snd_pcm_format_t) 45) /* 1 sample in 1 byte */
+#define SNDRV_PCM_FORMAT_G723_40 ((__force snd_pcm_format_t) 46) /* 8 Samples in 5 bytes */
+#define SNDRV_PCM_FORMAT_G723_40_1B ((__force snd_pcm_format_t) 47) /* 1 sample in 1 byte */
+#define SNDRV_PCM_FORMAT_DSD_U8 ((__force snd_pcm_format_t) 48) /* DSD, 1-byte samples DSD (x8) */
+#define SNDRV_PCM_FORMAT_DSD_U16_LE ((__force snd_pcm_format_t) 49) /* DSD, 2-byte samples DSD (x16), little endian */
+#define SNDRV_PCM_FORMAT_DSD_U32_LE ((__force snd_pcm_format_t) 50) /* DSD, 4-byte samples DSD (x32), little endian */
+#define SNDRV_PCM_FORMAT_DSD_U16_BE ((__force snd_pcm_format_t) 51) /* DSD, 2-byte samples DSD (x16), big endian */
+#define SNDRV_PCM_FORMAT_DSD_U32_BE ((__force snd_pcm_format_t) 52) /* DSD, 4-byte samples DSD (x32), big endian */
+#define SNDRV_PCM_FORMAT_LAST SNDRV_PCM_FORMAT_DSD_U32_BE
+#define SNDRV_PCM_FORMAT_FIRST SNDRV_PCM_FORMAT_S8
+
+#ifdef SNDRV_LITTLE_ENDIAN
+#define SNDRV_PCM_FORMAT_S16 SNDRV_PCM_FORMAT_S16_LE
+#define SNDRV_PCM_FORMAT_U16 SNDRV_PCM_FORMAT_U16_LE
+#define SNDRV_PCM_FORMAT_S24 SNDRV_PCM_FORMAT_S24_LE
+#define SNDRV_PCM_FORMAT_U24 SNDRV_PCM_FORMAT_U24_LE
+#define SNDRV_PCM_FORMAT_S32 SNDRV_PCM_FORMAT_S32_LE
+#define SNDRV_PCM_FORMAT_U32 SNDRV_PCM_FORMAT_U32_LE
+#define SNDRV_PCM_FORMAT_FLOAT SNDRV_PCM_FORMAT_FLOAT_LE
+#define SNDRV_PCM_FORMAT_FLOAT64 SNDRV_PCM_FORMAT_FLOAT64_LE
+#define SNDRV_PCM_FORMAT_IEC958_SUBFRAME SNDRV_PCM_FORMAT_IEC958_SUBFRAME_LE
+#define SNDRV_PCM_FORMAT_S20 SNDRV_PCM_FORMAT_S20_LE
+#define SNDRV_PCM_FORMAT_U20 SNDRV_PCM_FORMAT_U20_LE
+#endif
+#ifdef SNDRV_BIG_ENDIAN
+#define SNDRV_PCM_FORMAT_S16 SNDRV_PCM_FORMAT_S16_BE
+#define SNDRV_PCM_FORMAT_U16 SNDRV_PCM_FORMAT_U16_BE
+#define SNDRV_PCM_FORMAT_S24 SNDRV_PCM_FORMAT_S24_BE
+#define SNDRV_PCM_FORMAT_U24 SNDRV_PCM_FORMAT_U24_BE
+#define SNDRV_PCM_FORMAT_S32 SNDRV_PCM_FORMAT_S32_BE
+#define SNDRV_PCM_FORMAT_U32 SNDRV_PCM_FORMAT_U32_BE
+#define SNDRV_PCM_FORMAT_FLOAT SNDRV_PCM_FORMAT_FLOAT_BE
+#define SNDRV_PCM_FORMAT_FLOAT64 SNDRV_PCM_FORMAT_FLOAT64_BE
+#define SNDRV_PCM_FORMAT_IEC958_SUBFRAME SNDRV_PCM_FORMAT_IEC958_SUBFRAME_BE
+#define SNDRV_PCM_FORMAT_S20 SNDRV_PCM_FORMAT_S20_BE
+#define SNDRV_PCM_FORMAT_U20 SNDRV_PCM_FORMAT_U20_BE
+#endif
+
+typedef int __bitwise snd_pcm_subformat_t;
+#define SNDRV_PCM_SUBFORMAT_STD ((__force snd_pcm_subformat_t) 0)
+#define SNDRV_PCM_SUBFORMAT_LAST SNDRV_PCM_SUBFORMAT_STD
+
+#define SNDRV_PCM_INFO_MMAP 0x00000001 /* hardware supports mmap */
+#define SNDRV_PCM_INFO_MMAP_VALID 0x00000002 /* period data are valid during transfer */
+#define SNDRV_PCM_INFO_DOUBLE 0x00000004 /* Double buffering needed for PCM start/stop */
+#define SNDRV_PCM_INFO_BATCH 0x00000010 /* double buffering */
+#define SNDRV_PCM_INFO_SYNC_APPLPTR 0x00000020 /* need the explicit sync of appl_ptr update */
+#define SNDRV_PCM_INFO_INTERLEAVED 0x00000100 /* channels are interleaved */
+#define SNDRV_PCM_INFO_NONINTERLEAVED 0x00000200 /* channels are not interleaved */
+#define SNDRV_PCM_INFO_COMPLEX 0x00000400 /* complex frame organization (mmap only) */
+#define SNDRV_PCM_INFO_BLOCK_TRANSFER 0x00010000 /* hardware transfer block of samples */
+#define SNDRV_PCM_INFO_OVERRANGE 0x00020000 /* hardware supports ADC (capture) overrange detection */
+#define SNDRV_PCM_INFO_RESUME 0x00040000 /* hardware supports stream resume after suspend */
+#define SNDRV_PCM_INFO_PAUSE 0x00080000 /* pause ioctl is supported */
+#define SNDRV_PCM_INFO_HALF_DUPLEX 0x00100000 /* only half duplex */
+#define SNDRV_PCM_INFO_JOINT_DUPLEX 0x00200000 /* playback and capture stream are somewhat correlated */
+#define SNDRV_PCM_INFO_SYNC_START 0x00400000 /* pcm support some kind of sync go */
+#define SNDRV_PCM_INFO_NO_PERIOD_WAKEUP 0x00800000 /* period wakeup can be disabled */
+#define SNDRV_PCM_INFO_HAS_WALL_CLOCK 0x01000000 /* (Deprecated)has audio wall clock for audio/system time sync */
+#define SNDRV_PCM_INFO_HAS_LINK_ATIME 0x01000000 /* report hardware link audio time, reset on startup */
+#define SNDRV_PCM_INFO_HAS_LINK_ABSOLUTE_ATIME 0x02000000 /* report absolute hardware link audio time, not reset on startup */
+#define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */
+#define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */
+
+#define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */
+#define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */
+
+
+
+typedef int __bitwise snd_pcm_state_t;
+#define SNDRV_PCM_STATE_OPEN ((__force snd_pcm_state_t) 0) /* stream is open */
+#define SNDRV_PCM_STATE_SETUP ((__force snd_pcm_state_t) 1) /* stream has a setup */
+#define SNDRV_PCM_STATE_PREPARED ((__force snd_pcm_state_t) 2) /* stream is ready to start */
+#define SNDRV_PCM_STATE_RUNNING ((__force snd_pcm_state_t) 3) /* stream is running */
+#define SNDRV_PCM_STATE_XRUN ((__force snd_pcm_state_t) 4) /* stream reached an xrun */
+#define SNDRV_PCM_STATE_DRAINING ((__force snd_pcm_state_t) 5) /* stream is draining */
+#define SNDRV_PCM_STATE_PAUSED ((__force snd_pcm_state_t) 6) /* stream is paused */
+#define SNDRV_PCM_STATE_SUSPENDED ((__force snd_pcm_state_t) 7) /* hardware is suspended */
+#define SNDRV_PCM_STATE_DISCONNECTED ((__force snd_pcm_state_t) 8) /* hardware is disconnected */
+#define SNDRV_PCM_STATE_LAST SNDRV_PCM_STATE_DISCONNECTED
+
+enum {
+ SNDRV_PCM_MMAP_OFFSET_DATA = 0x00000000,
+ SNDRV_PCM_MMAP_OFFSET_STATUS = 0x80000000,
+ SNDRV_PCM_MMAP_OFFSET_CONTROL = 0x81000000,
+};
+
+union snd_pcm_sync_id {
+ unsigned char id[16];
+ unsigned short id16[8];
+ unsigned int id32[4];
+};
+
+struct snd_pcm_info {
+ unsigned int device; /* RO/WR (control): device number */
+ unsigned int subdevice; /* RO/WR (control): subdevice number */
+ int stream; /* RO/WR (control): stream direction */
+ int card; /* R: card number */
+ unsigned char id[64]; /* ID (user selectable) */
+ unsigned char name[80]; /* name of this device */
+ unsigned char subname[32]; /* subdevice name */
+ int dev_class; /* SNDRV_PCM_CLASS_* */
+ int dev_subclass; /* SNDRV_PCM_SUBCLASS_* */
+ unsigned int subdevices_count;
+ unsigned int subdevices_avail;
+ union snd_pcm_sync_id sync; /* hardware synchronization ID */
+ unsigned char reserved[64]; /* reserved for future... */
+};
+
+typedef int snd_pcm_hw_param_t;
+#define SNDRV_PCM_HW_PARAM_ACCESS 0 /* Access type */
+#define SNDRV_PCM_HW_PARAM_FORMAT 1 /* Format */
+#define SNDRV_PCM_HW_PARAM_SUBFORMAT 2 /* Subformat */
+#define SNDRV_PCM_HW_PARAM_FIRST_MASK SNDRV_PCM_HW_PARAM_ACCESS
+#define SNDRV_PCM_HW_PARAM_LAST_MASK SNDRV_PCM_HW_PARAM_SUBFORMAT
+
+#define SNDRV_PCM_HW_PARAM_SAMPLE_BITS 8 /* Bits per sample */
+#define SNDRV_PCM_HW_PARAM_FRAME_BITS 9 /* Bits per frame */
+#define SNDRV_PCM_HW_PARAM_CHANNELS 10 /* Channels */
+#define SNDRV_PCM_HW_PARAM_RATE 11 /* Approx rate */
+#define SNDRV_PCM_HW_PARAM_PERIOD_TIME 12 /* Approx distance between
+ * interrupts in us
+ */
+#define SNDRV_PCM_HW_PARAM_PERIOD_SIZE 13 /* Approx frames between
+ * interrupts
+ */
+#define SNDRV_PCM_HW_PARAM_PERIOD_BYTES 14 /* Approx bytes between
+ * interrupts
+ */
+#define SNDRV_PCM_HW_PARAM_PERIODS 15 /* Approx interrupts per
+ * buffer
+ */
+#define SNDRV_PCM_HW_PARAM_BUFFER_TIME 16 /* Approx duration of buffer
+ * in us
+ */
+#define SNDRV_PCM_HW_PARAM_BUFFER_SIZE 17 /* Size of buffer in frames */
+#define SNDRV_PCM_HW_PARAM_BUFFER_BYTES 18 /* Size of buffer in bytes */
+#define SNDRV_PCM_HW_PARAM_TICK_TIME 19 /* Approx tick duration in us */
+#define SNDRV_PCM_HW_PARAM_FIRST_INTERVAL SNDRV_PCM_HW_PARAM_SAMPLE_BITS
+#define SNDRV_PCM_HW_PARAM_LAST_INTERVAL SNDRV_PCM_HW_PARAM_TICK_TIME
+
+#define SNDRV_PCM_HW_PARAMS_NORESAMPLE (1<<0) /* avoid rate resampling */
+#define SNDRV_PCM_HW_PARAMS_EXPORT_BUFFER (1<<1) /* export buffer */
+#define SNDRV_PCM_HW_PARAMS_NO_PERIOD_WAKEUP (1<<2) /* disable period wakeups */
+
+struct snd_interval {
+ unsigned int min, max;
+ unsigned int openmin:1,
+ openmax:1,
+ integer:1,
+ empty:1;
+};
+
+#define SNDRV_MASK_MAX 256
+
+struct snd_mask {
+ __u32 bits[(SNDRV_MASK_MAX+31)/32];
+};
+
+struct snd_pcm_hw_params {
+ unsigned int flags;
+ struct snd_mask masks[SNDRV_PCM_HW_PARAM_LAST_MASK -
+ SNDRV_PCM_HW_PARAM_FIRST_MASK + 1];
+ struct snd_mask mres[5]; /* reserved masks */
+ struct snd_interval intervals[SNDRV_PCM_HW_PARAM_LAST_INTERVAL -
+ SNDRV_PCM_HW_PARAM_FIRST_INTERVAL + 1];
+ struct snd_interval ires[9]; /* reserved intervals */
+ unsigned int rmask; /* W: requested masks */
+ unsigned int cmask; /* R: changed masks */
+ unsigned int info; /* R: Info flags for returned setup */
+ unsigned int msbits; /* R: used most significant bits */
+ unsigned int rate_num; /* R: rate numerator */
+ unsigned int rate_den; /* R: rate denominator */
+ snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */
+ unsigned char reserved[64]; /* reserved for future */
+};
+
+enum {
+ SNDRV_PCM_TSTAMP_NONE = 0,
+ SNDRV_PCM_TSTAMP_ENABLE,
+ SNDRV_PCM_TSTAMP_LAST = SNDRV_PCM_TSTAMP_ENABLE,
+};
+
+struct snd_pcm_sw_params {
+ int tstamp_mode; /* timestamp mode */
+ unsigned int period_step;
+ unsigned int sleep_min; /* min ticks to sleep */
+ snd_pcm_uframes_t avail_min; /* min avail frames for wakeup */
+ snd_pcm_uframes_t xfer_align; /* obsolete: xfer size need to be a multiple */
+ snd_pcm_uframes_t start_threshold; /* min hw_avail frames for automatic start */
+ snd_pcm_uframes_t stop_threshold; /* min avail frames for automatic stop */
+ snd_pcm_uframes_t silence_threshold; /* min distance from noise for silence filling */
+ snd_pcm_uframes_t silence_size; /* silence block size */
+ snd_pcm_uframes_t boundary; /* pointers wrap point */
+ unsigned int proto; /* protocol version */
+ unsigned int tstamp_type; /* timestamp type (req. proto >= 2.0.12) */
+ unsigned char reserved[56]; /* reserved for future */
+};
+
+struct snd_pcm_channel_info {
+ unsigned int channel;
+ __kernel_off_t offset; /* mmap offset */
+ unsigned int first; /* offset to first sample in bits */
+ unsigned int step; /* samples distance in bits */
+};
+
+enum {
+ /*
+ * first definition for backwards compatibility only,
+ * maps to wallclock/link time for HDAudio playback and DEFAULT/DMA time for everything else
+ */
+ SNDRV_PCM_AUDIO_TSTAMP_TYPE_COMPAT = 0,
+
+ /* timestamp definitions */
+ SNDRV_PCM_AUDIO_TSTAMP_TYPE_DEFAULT = 1, /* DMA time, reported as per hw_ptr */
+ SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK = 2, /* link time reported by sample or wallclock counter, reset on startup */
+ SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_ABSOLUTE = 3, /* link time reported by sample or wallclock counter, not reset on startup */
+ SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_ESTIMATED = 4, /* link time estimated indirectly */
+ SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_SYNCHRONIZED = 5, /* link time synchronized with system time */
+ SNDRV_PCM_AUDIO_TSTAMP_TYPE_LAST = SNDRV_PCM_AUDIO_TSTAMP_TYPE_LINK_SYNCHRONIZED
+};
+
+struct snd_pcm_status {
+ snd_pcm_state_t state; /* stream state */
+ struct timespec trigger_tstamp; /* time when stream was started/stopped/paused */
+ struct timespec tstamp; /* reference timestamp */
+ snd_pcm_uframes_t appl_ptr; /* appl ptr */
+ snd_pcm_uframes_t hw_ptr; /* hw ptr */
+ snd_pcm_sframes_t delay; /* current delay in frames */
+ snd_pcm_uframes_t avail; /* number of frames available */
+ snd_pcm_uframes_t avail_max; /* max frames available on hw since last status */
+ snd_pcm_uframes_t overrange; /* count of ADC (capture) overrange detections from last status */
+ snd_pcm_state_t suspended_state; /* suspended stream state */
+ __u32 audio_tstamp_data; /* needed for 64-bit alignment, used for configs/report to/from userspace */
+ struct timespec audio_tstamp; /* sample counter, wall clock, PHC or on-demand sync'ed */
+ struct timespec driver_tstamp; /* useful in case reference system tstamp is reported with delay */
+ __u32 audio_tstamp_accuracy; /* in ns units, only valid if indicated in audio_tstamp_data */
+ unsigned char reserved[52-2*sizeof(struct timespec)]; /* must be filled with zero */
+};
+
+struct snd_pcm_mmap_status {
+ snd_pcm_state_t state; /* RO: state - SNDRV_PCM_STATE_XXXX */
+ int pad1; /* Needed for 64 bit alignment */
+ snd_pcm_uframes_t hw_ptr; /* RO: hw ptr (0...boundary-1) */
+ struct timespec tstamp; /* Timestamp */
+ snd_pcm_state_t suspended_state; /* RO: suspended stream state */
+ struct timespec audio_tstamp; /* from sample counter or wall clock */
+};
+
+struct snd_pcm_mmap_control {
+ snd_pcm_uframes_t appl_ptr; /* RW: appl ptr (0...boundary-1) */
+ snd_pcm_uframes_t avail_min; /* RW: min available frames for wakeup */
+};
+
+#define SNDRV_PCM_SYNC_PTR_HWSYNC (1<<0) /* execute hwsync */
+#define SNDRV_PCM_SYNC_PTR_APPL (1<<1) /* get appl_ptr from driver (r/w op) */
+#define SNDRV_PCM_SYNC_PTR_AVAIL_MIN (1<<2) /* get avail_min from driver */
+
+struct snd_pcm_sync_ptr {
+ unsigned int flags;
+ union {
+ struct snd_pcm_mmap_status status;
+ unsigned char reserved[64];
+ } s;
+ union {
+ struct snd_pcm_mmap_control control;
+ unsigned char reserved[64];
+ } c;
+};
+
+struct snd_xferi {
+ snd_pcm_sframes_t result;
+ void __user *buf;
+ snd_pcm_uframes_t frames;
+};
+
+struct snd_xfern {
+ snd_pcm_sframes_t result;
+ void __user * __user *bufs;
+ snd_pcm_uframes_t frames;
+};
+
+enum {
+ SNDRV_PCM_TSTAMP_TYPE_GETTIMEOFDAY = 0, /* gettimeofday equivalent */
+ SNDRV_PCM_TSTAMP_TYPE_MONOTONIC, /* posix_clock_monotonic equivalent */
+ SNDRV_PCM_TSTAMP_TYPE_MONOTONIC_RAW, /* monotonic_raw (no NTP) */
+ SNDRV_PCM_TSTAMP_TYPE_LAST = SNDRV_PCM_TSTAMP_TYPE_MONOTONIC_RAW,
+};
+
+/* channel positions */
+enum {
+ SNDRV_CHMAP_UNKNOWN = 0,
+ SNDRV_CHMAP_NA, /* N/A, silent */
+ SNDRV_CHMAP_MONO, /* mono stream */
+ /* this follows the alsa-lib mixer channel value + 3 */
+ SNDRV_CHMAP_FL, /* front left */
+ SNDRV_CHMAP_FR, /* front right */
+ SNDRV_CHMAP_RL, /* rear left */
+ SNDRV_CHMAP_RR, /* rear right */
+ SNDRV_CHMAP_FC, /* front center */
+ SNDRV_CHMAP_LFE, /* LFE */
+ SNDRV_CHMAP_SL, /* side left */
+ SNDRV_CHMAP_SR, /* side right */
+ SNDRV_CHMAP_RC, /* rear center */
+ /* new definitions */
+ SNDRV_CHMAP_FLC, /* front left center */
+ SNDRV_CHMAP_FRC, /* front right center */
+ SNDRV_CHMAP_RLC, /* rear left center */
+ SNDRV_CHMAP_RRC, /* rear right center */
+ SNDRV_CHMAP_FLW, /* front left wide */
+ SNDRV_CHMAP_FRW, /* front right wide */
+ SNDRV_CHMAP_FLH, /* front left high */
+ SNDRV_CHMAP_FCH, /* front center high */
+ SNDRV_CHMAP_FRH, /* front right high */
+ SNDRV_CHMAP_TC, /* top center */
+ SNDRV_CHMAP_TFL, /* top front left */
+ SNDRV_CHMAP_TFR, /* top front right */
+ SNDRV_CHMAP_TFC, /* top front center */
+ SNDRV_CHMAP_TRL, /* top rear left */
+ SNDRV_CHMAP_TRR, /* top rear right */
+ SNDRV_CHMAP_TRC, /* top rear center */
+ /* new definitions for UAC2 */
+ SNDRV_CHMAP_TFLC, /* top front left center */
+ SNDRV_CHMAP_TFRC, /* top front right center */
+ SNDRV_CHMAP_TSL, /* top side left */
+ SNDRV_CHMAP_TSR, /* top side right */
+ SNDRV_CHMAP_LLFE, /* left LFE */
+ SNDRV_CHMAP_RLFE, /* right LFE */
+ SNDRV_CHMAP_BC, /* bottom center */
+ SNDRV_CHMAP_BLC, /* bottom left center */
+ SNDRV_CHMAP_BRC, /* bottom right center */
+ SNDRV_CHMAP_LAST = SNDRV_CHMAP_BRC,
+};
+
+#define SNDRV_CHMAP_POSITION_MASK 0xffff
+#define SNDRV_CHMAP_PHASE_INVERSE (0x01 << 16)
+#define SNDRV_CHMAP_DRIVER_SPEC (0x02 << 16)
+
+#define SNDRV_PCM_IOCTL_PVERSION _IOR('A', 0x00, int)
+#define SNDRV_PCM_IOCTL_INFO _IOR('A', 0x01, struct snd_pcm_info)
+#define SNDRV_PCM_IOCTL_TSTAMP _IOW('A', 0x02, int)
+#define SNDRV_PCM_IOCTL_TTSTAMP _IOW('A', 0x03, int)
+#define SNDRV_PCM_IOCTL_USER_PVERSION _IOW('A', 0x04, int)
+#define SNDRV_PCM_IOCTL_HW_REFINE _IOWR('A', 0x10, struct snd_pcm_hw_params)
+#define SNDRV_PCM_IOCTL_HW_PARAMS _IOWR('A', 0x11, struct snd_pcm_hw_params)
+#define SNDRV_PCM_IOCTL_HW_FREE _IO('A', 0x12)
+#define SNDRV_PCM_IOCTL_SW_PARAMS _IOWR('A', 0x13, struct snd_pcm_sw_params)
+#define SNDRV_PCM_IOCTL_STATUS _IOR('A', 0x20, struct snd_pcm_status)
+#define SNDRV_PCM_IOCTL_DELAY _IOR('A', 0x21, snd_pcm_sframes_t)
+#define SNDRV_PCM_IOCTL_HWSYNC _IO('A', 0x22)
+#define SNDRV_PCM_IOCTL_SYNC_PTR _IOWR('A', 0x23, struct snd_pcm_sync_ptr)
+#define SNDRV_PCM_IOCTL_STATUS_EXT _IOWR('A', 0x24, struct snd_pcm_status)
+#define SNDRV_PCM_IOCTL_CHANNEL_INFO _IOR('A', 0x32, struct snd_pcm_channel_info)
+#define SNDRV_PCM_IOCTL_PREPARE _IO('A', 0x40)
+#define SNDRV_PCM_IOCTL_RESET _IO('A', 0x41)
+#define SNDRV_PCM_IOCTL_START _IO('A', 0x42)
+#define SNDRV_PCM_IOCTL_DROP _IO('A', 0x43)
+#define SNDRV_PCM_IOCTL_DRAIN _IO('A', 0x44)
+#define SNDRV_PCM_IOCTL_PAUSE _IOW('A', 0x45, int)
+#define SNDRV_PCM_IOCTL_REWIND _IOW('A', 0x46, snd_pcm_uframes_t)
+#define SNDRV_PCM_IOCTL_RESUME _IO('A', 0x47)
+#define SNDRV_PCM_IOCTL_XRUN _IO('A', 0x48)
+#define SNDRV_PCM_IOCTL_FORWARD _IOW('A', 0x49, snd_pcm_uframes_t)
+#define SNDRV_PCM_IOCTL_WRITEI_FRAMES _IOW('A', 0x50, struct snd_xferi)
+#define SNDRV_PCM_IOCTL_READI_FRAMES _IOR('A', 0x51, struct snd_xferi)
+#define SNDRV_PCM_IOCTL_WRITEN_FRAMES _IOW('A', 0x52, struct snd_xfern)
+#define SNDRV_PCM_IOCTL_READN_FRAMES _IOR('A', 0x53, struct snd_xfern)
+#define SNDRV_PCM_IOCTL_LINK _IOW('A', 0x60, int)
+#define SNDRV_PCM_IOCTL_UNLINK _IO('A', 0x61)
+
+/*****************************************************************************
+ * *
+ * MIDI v1.0 interface *
+ * *
+ *****************************************************************************/
+
+/*
+ * Raw MIDI section - /dev/snd/midi??
+ */
+
+#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 0)
+
+enum {
+ SNDRV_RAWMIDI_STREAM_OUTPUT = 0,
+ SNDRV_RAWMIDI_STREAM_INPUT,
+ SNDRV_RAWMIDI_STREAM_LAST = SNDRV_RAWMIDI_STREAM_INPUT,
+};
+
+#define SNDRV_RAWMIDI_INFO_OUTPUT 0x00000001
+#define SNDRV_RAWMIDI_INFO_INPUT 0x00000002
+#define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004
+
+struct snd_rawmidi_info {
+ unsigned int device; /* RO/WR (control): device number */
+ unsigned int subdevice; /* RO/WR (control): subdevice number */
+ int stream; /* WR: stream */
+ int card; /* R: card number */
+ unsigned int flags; /* SNDRV_RAWMIDI_INFO_XXXX */
+ unsigned char id[64]; /* ID (user selectable) */
+ unsigned char name[80]; /* name of device */
+ unsigned char subname[32]; /* name of active or selected subdevice */
+ unsigned int subdevices_count;
+ unsigned int subdevices_avail;
+ unsigned char reserved[64]; /* reserved for future use */
+};
+
+struct snd_rawmidi_params {
+ int stream;
+ size_t buffer_size; /* queue size in bytes */
+ size_t avail_min; /* minimum avail bytes for wakeup */
+ unsigned int no_active_sensing: 1; /* do not send active sensing byte in close() */
+ unsigned char reserved[16]; /* reserved for future use */
+};
+
+struct snd_rawmidi_status {
+ int stream;
+ struct timespec tstamp; /* Timestamp */
+ size_t avail; /* available bytes */
+ size_t xruns; /* count of overruns since last status (in bytes) */
+ unsigned char reserved[16]; /* reserved for future use */
+};
+
+#define SNDRV_RAWMIDI_IOCTL_PVERSION _IOR('W', 0x00, int)
+#define SNDRV_RAWMIDI_IOCTL_INFO _IOR('W', 0x01, struct snd_rawmidi_info)
+#define SNDRV_RAWMIDI_IOCTL_PARAMS _IOWR('W', 0x10, struct snd_rawmidi_params)
+#define SNDRV_RAWMIDI_IOCTL_STATUS _IOWR('W', 0x20, struct snd_rawmidi_status)
+#define SNDRV_RAWMIDI_IOCTL_DROP _IOW('W', 0x30, int)
+#define SNDRV_RAWMIDI_IOCTL_DRAIN _IOW('W', 0x31, int)
+
+/*
+ * Timer section - /dev/snd/timer
+ */
+
+#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 6)
+
+enum {
+ SNDRV_TIMER_CLASS_NONE = -1,
+ SNDRV_TIMER_CLASS_SLAVE = 0,
+ SNDRV_TIMER_CLASS_GLOBAL,
+ SNDRV_TIMER_CLASS_CARD,
+ SNDRV_TIMER_CLASS_PCM,
+ SNDRV_TIMER_CLASS_LAST = SNDRV_TIMER_CLASS_PCM,
+};
+
+/* slave timer classes */
+enum {
+ SNDRV_TIMER_SCLASS_NONE = 0,
+ SNDRV_TIMER_SCLASS_APPLICATION,
+ SNDRV_TIMER_SCLASS_SEQUENCER, /* alias */
+ SNDRV_TIMER_SCLASS_OSS_SEQUENCER, /* alias */
+ SNDRV_TIMER_SCLASS_LAST = SNDRV_TIMER_SCLASS_OSS_SEQUENCER,
+};
+
+/* global timers (device member) */
+#define SNDRV_TIMER_GLOBAL_SYSTEM 0
+#define SNDRV_TIMER_GLOBAL_RTC 1 /* unused */
+#define SNDRV_TIMER_GLOBAL_HPET 2
+#define SNDRV_TIMER_GLOBAL_HRTIMER 3
+
+/* info flags */
+#define SNDRV_TIMER_FLG_SLAVE (1<<0) /* cannot be controlled */
+
+struct snd_timer_id {
+ int dev_class;
+ int dev_sclass;
+ int card;
+ int device;
+ int subdevice;
+};
+
+struct snd_timer_ginfo {
+ struct snd_timer_id tid; /* requested timer ID */
+ unsigned int flags; /* timer flags - SNDRV_TIMER_FLG_* */
+ int card; /* card number */
+ unsigned char id[64]; /* timer identification */
+ unsigned char name[80]; /* timer name */
+ unsigned long reserved0; /* reserved for future use */
+ unsigned long resolution; /* average period resolution in ns */
+ unsigned long resolution_min; /* minimal period resolution in ns */
+ unsigned long resolution_max; /* maximal period resolution in ns */
+ unsigned int clients; /* active timer clients */
+ unsigned char reserved[32];
+};
+
+struct snd_timer_gparams {
+ struct snd_timer_id tid; /* requested timer ID */
+ unsigned long period_num; /* requested precise period duration (in seconds) - numerator */
+ unsigned long period_den; /* requested precise period duration (in seconds) - denominator */
+ unsigned char reserved[32];
+};
+
+struct snd_timer_gstatus {
+ struct snd_timer_id tid; /* requested timer ID */
+ unsigned long resolution; /* current period resolution in ns */
+ unsigned long resolution_num; /* precise current period resolution (in seconds) - numerator */
+ unsigned long resolution_den; /* precise current period resolution (in seconds) - denominator */
+ unsigned char reserved[32];
+};
+
+struct snd_timer_select {
+ struct snd_timer_id id; /* bind to timer ID */
+ unsigned char reserved[32]; /* reserved */
+};
+
+struct snd_timer_info {
+ unsigned int flags; /* timer flags - SNDRV_TIMER_FLG_* */
+ int card; /* card number */
+ unsigned char id[64]; /* timer identificator */
+ unsigned char name[80]; /* timer name */
+ unsigned long reserved0; /* reserved for future use */
+ unsigned long resolution; /* average period resolution in ns */
+ unsigned char reserved[64]; /* reserved */
+};
+
+#define SNDRV_TIMER_PSFLG_AUTO (1<<0) /* auto start, otherwise one-shot */
+#define SNDRV_TIMER_PSFLG_EXCLUSIVE (1<<1) /* exclusive use, precise start/stop/pause/continue */
+#define SNDRV_TIMER_PSFLG_EARLY_EVENT (1<<2) /* write early event to the poll queue */
+
+struct snd_timer_params {
+ unsigned int flags; /* flags - SNDRV_MIXER_PSFLG_* */
+ unsigned int ticks; /* requested resolution in ticks */
+ unsigned int queue_size; /* total size of queue (32-1024) */
+ unsigned int reserved0; /* reserved, was: failure locations */
+ unsigned int filter; /* event filter (bitmask of SNDRV_TIMER_EVENT_*) */
+ unsigned char reserved[60]; /* reserved */
+};
+
+struct snd_timer_status {
+ struct timespec tstamp; /* Timestamp - last update */
+ unsigned int resolution; /* current period resolution in ns */
+ unsigned int lost; /* counter of master tick lost */
+ unsigned int overrun; /* count of read queue overruns */
+ unsigned int queue; /* used queue size */
+ unsigned char reserved[64]; /* reserved */
+};
+
+#define SNDRV_TIMER_IOCTL_PVERSION _IOR('T', 0x00, int)
+#define SNDRV_TIMER_IOCTL_NEXT_DEVICE _IOWR('T', 0x01, struct snd_timer_id)
+#define SNDRV_TIMER_IOCTL_TREAD _IOW('T', 0x02, int)
+#define SNDRV_TIMER_IOCTL_GINFO _IOWR('T', 0x03, struct snd_timer_ginfo)
+#define SNDRV_TIMER_IOCTL_GPARAMS _IOW('T', 0x04, struct snd_timer_gparams)
+#define SNDRV_TIMER_IOCTL_GSTATUS _IOWR('T', 0x05, struct snd_timer_gstatus)
+#define SNDRV_TIMER_IOCTL_SELECT _IOW('T', 0x10, struct snd_timer_select)
+#define SNDRV_TIMER_IOCTL_INFO _IOR('T', 0x11, struct snd_timer_info)
+#define SNDRV_TIMER_IOCTL_PARAMS _IOW('T', 0x12, struct snd_timer_params)
+#define SNDRV_TIMER_IOCTL_STATUS _IOR('T', 0x14, struct snd_timer_status)
+/* The following four ioctls are changed since 1.0.9 due to confliction */
+#define SNDRV_TIMER_IOCTL_START _IO('T', 0xa0)
+#define SNDRV_TIMER_IOCTL_STOP _IO('T', 0xa1)
+#define SNDRV_TIMER_IOCTL_CONTINUE _IO('T', 0xa2)
+#define SNDRV_TIMER_IOCTL_PAUSE _IO('T', 0xa3)
+
+struct snd_timer_read {
+ unsigned int resolution;
+ unsigned int ticks;
+};
+
+enum {
+ SNDRV_TIMER_EVENT_RESOLUTION = 0, /* val = resolution in ns */
+ SNDRV_TIMER_EVENT_TICK, /* val = ticks */
+ SNDRV_TIMER_EVENT_START, /* val = resolution in ns */
+ SNDRV_TIMER_EVENT_STOP, /* val = 0 */
+ SNDRV_TIMER_EVENT_CONTINUE, /* val = resolution in ns */
+ SNDRV_TIMER_EVENT_PAUSE, /* val = 0 */
+ SNDRV_TIMER_EVENT_EARLY, /* val = 0, early event */
+ SNDRV_TIMER_EVENT_SUSPEND, /* val = 0 */
+ SNDRV_TIMER_EVENT_RESUME, /* val = resolution in ns */
+ /* master timer events for slave timer instances */
+ SNDRV_TIMER_EVENT_MSTART = SNDRV_TIMER_EVENT_START + 10,
+ SNDRV_TIMER_EVENT_MSTOP = SNDRV_TIMER_EVENT_STOP + 10,
+ SNDRV_TIMER_EVENT_MCONTINUE = SNDRV_TIMER_EVENT_CONTINUE + 10,
+ SNDRV_TIMER_EVENT_MPAUSE = SNDRV_TIMER_EVENT_PAUSE + 10,
+ SNDRV_TIMER_EVENT_MSUSPEND = SNDRV_TIMER_EVENT_SUSPEND + 10,
+ SNDRV_TIMER_EVENT_MRESUME = SNDRV_TIMER_EVENT_RESUME + 10,
+};
+
+struct snd_timer_tread {
+ int event;
+ struct timespec tstamp;
+ unsigned int val;
+};
+
+/****************************************************************************
+ * *
+ * Section for driver control interface - /dev/snd/control? *
+ * *
+ ****************************************************************************/
+
+#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 7)
+
+struct snd_ctl_card_info {
+ int card; /* card number */
+ int pad; /* reserved for future (was type) */
+ unsigned char id[16]; /* ID of card (user selectable) */
+ unsigned char driver[16]; /* Driver name */
+ unsigned char name[32]; /* Short name of soundcard */
+ unsigned char longname[80]; /* name + info text about soundcard */
+ unsigned char reserved_[16]; /* reserved for future (was ID of mixer) */
+ unsigned char mixername[80]; /* visual mixer identification */
+ unsigned char components[128]; /* card components / fine identification, delimited with one space (AC97 etc..) */
+};
+
+typedef int __bitwise snd_ctl_elem_type_t;
+#define SNDRV_CTL_ELEM_TYPE_NONE ((__force snd_ctl_elem_type_t) 0) /* invalid */
+#define SNDRV_CTL_ELEM_TYPE_BOOLEAN ((__force snd_ctl_elem_type_t) 1) /* boolean type */
+#define SNDRV_CTL_ELEM_TYPE_INTEGER ((__force snd_ctl_elem_type_t) 2) /* integer type */
+#define SNDRV_CTL_ELEM_TYPE_ENUMERATED ((__force snd_ctl_elem_type_t) 3) /* enumerated type */
+#define SNDRV_CTL_ELEM_TYPE_BYTES ((__force snd_ctl_elem_type_t) 4) /* byte array */
+#define SNDRV_CTL_ELEM_TYPE_IEC958 ((__force snd_ctl_elem_type_t) 5) /* IEC958 (S/PDIF) setup */
+#define SNDRV_CTL_ELEM_TYPE_INTEGER64 ((__force snd_ctl_elem_type_t) 6) /* 64-bit integer type */
+#define SNDRV_CTL_ELEM_TYPE_LAST SNDRV_CTL_ELEM_TYPE_INTEGER64
+
+typedef int __bitwise snd_ctl_elem_iface_t;
+#define SNDRV_CTL_ELEM_IFACE_CARD ((__force snd_ctl_elem_iface_t) 0) /* global control */
+#define SNDRV_CTL_ELEM_IFACE_HWDEP ((__force snd_ctl_elem_iface_t) 1) /* hardware dependent device */
+#define SNDRV_CTL_ELEM_IFACE_MIXER ((__force snd_ctl_elem_iface_t) 2) /* virtual mixer device */
+#define SNDRV_CTL_ELEM_IFACE_PCM ((__force snd_ctl_elem_iface_t) 3) /* PCM device */
+#define SNDRV_CTL_ELEM_IFACE_RAWMIDI ((__force snd_ctl_elem_iface_t) 4) /* RawMidi device */
+#define SNDRV_CTL_ELEM_IFACE_TIMER ((__force snd_ctl_elem_iface_t) 5) /* timer device */
+#define SNDRV_CTL_ELEM_IFACE_SEQUENCER ((__force snd_ctl_elem_iface_t) 6) /* sequencer client */
+#define SNDRV_CTL_ELEM_IFACE_LAST SNDRV_CTL_ELEM_IFACE_SEQUENCER
+
+#define SNDRV_CTL_ELEM_ACCESS_READ (1<<0)
+#define SNDRV_CTL_ELEM_ACCESS_WRITE (1<<1)
+#define SNDRV_CTL_ELEM_ACCESS_READWRITE (SNDRV_CTL_ELEM_ACCESS_READ|SNDRV_CTL_ELEM_ACCESS_WRITE)
+#define SNDRV_CTL_ELEM_ACCESS_VOLATILE (1<<2) /* control value may be changed without a notification */
+#define SNDRV_CTL_ELEM_ACCESS_TIMESTAMP (1<<3) /* when was control changed */
+#define SNDRV_CTL_ELEM_ACCESS_TLV_READ (1<<4) /* TLV read is possible */
+#define SNDRV_CTL_ELEM_ACCESS_TLV_WRITE (1<<5) /* TLV write is possible */
+#define SNDRV_CTL_ELEM_ACCESS_TLV_READWRITE (SNDRV_CTL_ELEM_ACCESS_TLV_READ|SNDRV_CTL_ELEM_ACCESS_TLV_WRITE)
+#define SNDRV_CTL_ELEM_ACCESS_TLV_COMMAND (1<<6) /* TLV command is possible */
+#define SNDRV_CTL_ELEM_ACCESS_INACTIVE (1<<8) /* control does actually nothing, but may be updated */
+#define SNDRV_CTL_ELEM_ACCESS_LOCK (1<<9) /* write lock */
+#define SNDRV_CTL_ELEM_ACCESS_OWNER (1<<10) /* write lock owner */
+#define SNDRV_CTL_ELEM_ACCESS_TLV_CALLBACK (1<<28) /* kernel use a TLV callback */
+#define SNDRV_CTL_ELEM_ACCESS_USER (1<<29) /* user space element */
+/* bits 30 and 31 are obsoleted (for indirect access) */
+
+/* for further details see the ACPI and PCI power management specification */
+#define SNDRV_CTL_POWER_D0 0x0000 /* full On */
+#define SNDRV_CTL_POWER_D1 0x0100 /* partial On */
+#define SNDRV_CTL_POWER_D2 0x0200 /* partial On */
+#define SNDRV_CTL_POWER_D3 0x0300 /* Off */
+#define SNDRV_CTL_POWER_D3hot (SNDRV_CTL_POWER_D3|0x0000) /* Off, with power */
+#define SNDRV_CTL_POWER_D3cold (SNDRV_CTL_POWER_D3|0x0001) /* Off, without power */
+
+#define SNDRV_CTL_ELEM_ID_NAME_MAXLEN 44
+
+struct snd_ctl_elem_id {
+ unsigned int numid; /* numeric identifier, zero = invalid */
+ snd_ctl_elem_iface_t iface; /* interface identifier */
+ unsigned int device; /* device/client number */
+ unsigned int subdevice; /* subdevice (substream) number */
+ unsigned char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; /* ASCII name of item */
+ unsigned int index; /* index of item */
+};
+
+struct snd_ctl_elem_list {
+ unsigned int offset; /* W: first element ID to get */
+ unsigned int space; /* W: count of element IDs to get */
+ unsigned int used; /* R: count of element IDs set */
+ unsigned int count; /* R: count of all elements */
+ struct snd_ctl_elem_id __user *pids; /* R: IDs */
+ unsigned char reserved[50];
+};
+
+struct snd_ctl_elem_info {
+ struct snd_ctl_elem_id id; /* W: element ID */
+ snd_ctl_elem_type_t type; /* R: value type - SNDRV_CTL_ELEM_TYPE_* */
+ unsigned int access; /* R: value access (bitmask) - SNDRV_CTL_ELEM_ACCESS_* */
+ unsigned int count; /* count of values */
+ __kernel_pid_t owner; /* owner's PID of this control */
+ union {
+ struct {
+ long min; /* R: minimum value */
+ long max; /* R: maximum value */
+ long step; /* R: step (0 variable) */
+ } integer;
+ struct {
+ long long min; /* R: minimum value */
+ long long max; /* R: maximum value */
+ long long step; /* R: step (0 variable) */
+ } integer64;
+ struct {
+ unsigned int items; /* R: number of items */
+ unsigned int item; /* W: item number */
+ char name[64]; /* R: value name */
+ __u64 names_ptr; /* W: names list (ELEM_ADD only) */
+ unsigned int names_length;
+ } enumerated;
+ unsigned char reserved[128];
+ } value;
+ union {
+ unsigned short d[4]; /* dimensions */
+ unsigned short *d_ptr; /* indirect - obsoleted */
+ } dimen;
+ unsigned char reserved[64-4*sizeof(unsigned short)];
+};
+
+struct snd_ctl_elem_value {
+ struct snd_ctl_elem_id id; /* W: element ID */
+ unsigned int indirect: 1; /* W: indirect access - obsoleted */
+ union {
+ union {
+ long value[128];
+ long *value_ptr; /* obsoleted */
+ } integer;
+ union {
+ long long value[64];
+ long long *value_ptr; /* obsoleted */
+ } integer64;
+ union {
+ unsigned int item[128];
+ unsigned int *item_ptr; /* obsoleted */
+ } enumerated;
+ union {
+ unsigned char data[512];
+ unsigned char *data_ptr; /* obsoleted */
+ } bytes;
+ struct snd_aes_iec958 iec958;
+ } value; /* RO */
+ struct timespec tstamp;
+ unsigned char reserved[128-sizeof(struct timespec)];
+};
+
+struct snd_ctl_tlv {
+ unsigned int numid; /* control element numeric identification */
+ unsigned int length; /* in bytes aligned to 4 */
+ unsigned int tlv[0]; /* first TLV */
+};
+
+#define SNDRV_CTL_IOCTL_PVERSION _IOR('U', 0x00, int)
+#define SNDRV_CTL_IOCTL_CARD_INFO _IOR('U', 0x01, struct snd_ctl_card_info)
+#define SNDRV_CTL_IOCTL_ELEM_LIST _IOWR('U', 0x10, struct snd_ctl_elem_list)
+#define SNDRV_CTL_IOCTL_ELEM_INFO _IOWR('U', 0x11, struct snd_ctl_elem_info)
+#define SNDRV_CTL_IOCTL_ELEM_READ _IOWR('U', 0x12, struct snd_ctl_elem_value)
+#define SNDRV_CTL_IOCTL_ELEM_WRITE _IOWR('U', 0x13, struct snd_ctl_elem_value)
+#define SNDRV_CTL_IOCTL_ELEM_LOCK _IOW('U', 0x14, struct snd_ctl_elem_id)
+#define SNDRV_CTL_IOCTL_ELEM_UNLOCK _IOW('U', 0x15, struct snd_ctl_elem_id)
+#define SNDRV_CTL_IOCTL_SUBSCRIBE_EVENTS _IOWR('U', 0x16, int)
+#define SNDRV_CTL_IOCTL_ELEM_ADD _IOWR('U', 0x17, struct snd_ctl_elem_info)
+#define SNDRV_CTL_IOCTL_ELEM_REPLACE _IOWR('U', 0x18, struct snd_ctl_elem_info)
+#define SNDRV_CTL_IOCTL_ELEM_REMOVE _IOWR('U', 0x19, struct snd_ctl_elem_id)
+#define SNDRV_CTL_IOCTL_TLV_READ _IOWR('U', 0x1a, struct snd_ctl_tlv)
+#define SNDRV_CTL_IOCTL_TLV_WRITE _IOWR('U', 0x1b, struct snd_ctl_tlv)
+#define SNDRV_CTL_IOCTL_TLV_COMMAND _IOWR('U', 0x1c, struct snd_ctl_tlv)
+#define SNDRV_CTL_IOCTL_HWDEP_NEXT_DEVICE _IOWR('U', 0x20, int)
+#define SNDRV_CTL_IOCTL_HWDEP_INFO _IOR('U', 0x21, struct snd_hwdep_info)
+#define SNDRV_CTL_IOCTL_PCM_NEXT_DEVICE _IOR('U', 0x30, int)
+#define SNDRV_CTL_IOCTL_PCM_INFO _IOWR('U', 0x31, struct snd_pcm_info)
+#define SNDRV_CTL_IOCTL_PCM_PREFER_SUBDEVICE _IOW('U', 0x32, int)
+#define SNDRV_CTL_IOCTL_RAWMIDI_NEXT_DEVICE _IOWR('U', 0x40, int)
+#define SNDRV_CTL_IOCTL_RAWMIDI_INFO _IOWR('U', 0x41, struct snd_rawmidi_info)
+#define SNDRV_CTL_IOCTL_RAWMIDI_PREFER_SUBDEVICE _IOW('U', 0x42, int)
+#define SNDRV_CTL_IOCTL_POWER _IOWR('U', 0xd0, int)
+#define SNDRV_CTL_IOCTL_POWER_STATE _IOR('U', 0xd1, int)
+
+/*
+ * Read interface.
+ */
+
+enum sndrv_ctl_event_type {
+ SNDRV_CTL_EVENT_ELEM = 0,
+ SNDRV_CTL_EVENT_LAST = SNDRV_CTL_EVENT_ELEM,
+};
+
+#define SNDRV_CTL_EVENT_MASK_VALUE (1<<0) /* element value was changed */
+#define SNDRV_CTL_EVENT_MASK_INFO (1<<1) /* element info was changed */
+#define SNDRV_CTL_EVENT_MASK_ADD (1<<2) /* element was added */
+#define SNDRV_CTL_EVENT_MASK_TLV (1<<3) /* element TLV tree was changed */
+#define SNDRV_CTL_EVENT_MASK_REMOVE (~0U) /* element was removed */
+
+struct snd_ctl_event {
+ int type; /* event type - SNDRV_CTL_EVENT_* */
+ union {
+ struct {
+ unsigned int mask;
+ struct snd_ctl_elem_id id;
+ } elem;
+ unsigned char data8[60];
+ } data;
+};
+
+/*
+ * Control names
+ */
+
+#define SNDRV_CTL_NAME_NONE ""
+#define SNDRV_CTL_NAME_PLAYBACK "Playback "
+#define SNDRV_CTL_NAME_CAPTURE "Capture "
+
+#define SNDRV_CTL_NAME_IEC958_NONE ""
+#define SNDRV_CTL_NAME_IEC958_SWITCH "Switch"
+#define SNDRV_CTL_NAME_IEC958_VOLUME "Volume"
+#define SNDRV_CTL_NAME_IEC958_DEFAULT "Default"
+#define SNDRV_CTL_NAME_IEC958_MASK "Mask"
+#define SNDRV_CTL_NAME_IEC958_CON_MASK "Con Mask"
+#define SNDRV_CTL_NAME_IEC958_PRO_MASK "Pro Mask"
+#define SNDRV_CTL_NAME_IEC958_PCM_STREAM "PCM Stream"
+#define SNDRV_CTL_NAME_IEC958(expl,direction,what) "IEC958 " expl SNDRV_CTL_NAME_##direction SNDRV_CTL_NAME_IEC958_##what
+
+#endif /* _UAPI__SOUND_ASOUND_H */
diff --git a/tools/kvm/kvm_stat/Makefile b/tools/kvm/kvm_stat/Makefile
new file mode 100644
index 000000000..c3e36c60d
--- /dev/null
+++ b/tools/kvm/kvm_stat/Makefile
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../../scripts/Makefile.include
+include ../../scripts/utilities.mak
+BINDIR=usr/bin
+MANDIR=usr/share/man
+MAN1DIR=$(MANDIR)/man1
+
+MAN1=kvm_stat.1
+
+A2X=a2x
+a2x_path := $(call get-executable,$(A2X))
+
+all: man
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ ifneq ($(V),1)
+ QUIET_A2X = @echo ' A2X '$@;
+ endif
+endif
+
+%.1: %.txt
+ifeq ($(a2x_path),)
+ $(error "You need to install asciidoc for man pages")
+else
+ $(QUIET_A2X)$(A2X) --doctype manpage --format manpage $<
+endif
+
+clean:
+ rm -f $(MAN1)
+
+man: $(MAN1)
+
+install-man: man
+ install -d -m 755 $(INSTALL_ROOT)/$(MAN1DIR)
+ install -m 644 kvm_stat.1 $(INSTALL_ROOT)/$(MAN1DIR)
+
+install-tools:
+ install -d -m 755 $(INSTALL_ROOT)/$(BINDIR)
+ install -m 755 -p "kvm_stat" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
+
+install: install-tools install-man
+.PHONY: all clean man install-tools install-man install
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
new file mode 100755
index 000000000..f6ca0a216
--- /dev/null
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -0,0 +1,1716 @@
+#!/usr/bin/python
+#
+# top-like utility for displaying kvm statistics
+#
+# Copyright 2006-2008 Qumranet Technologies
+# Copyright 2008-2011 Red Hat, Inc.
+#
+# Authors:
+# Avi Kivity <avi@redhat.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2. See
+# the COPYING file in the top-level directory.
+"""The kvm_stat module outputs statistics about running KVM VMs
+
+Three different ways of output formatting are available:
+- as a top-like text ui
+- in a key -> value format
+- in an all keys, all values format
+
+The data is sampled from the KVM's debugfs entries and its perf events.
+"""
+from __future__ import print_function
+
+import curses
+import sys
+import locale
+import os
+import time
+import optparse
+import ctypes
+import fcntl
+import resource
+import struct
+import re
+import subprocess
+from collections import defaultdict, namedtuple
+
+VMX_EXIT_REASONS = {
+ 'EXCEPTION_NMI': 0,
+ 'EXTERNAL_INTERRUPT': 1,
+ 'TRIPLE_FAULT': 2,
+ 'PENDING_INTERRUPT': 7,
+ 'NMI_WINDOW': 8,
+ 'TASK_SWITCH': 9,
+ 'CPUID': 10,
+ 'HLT': 12,
+ 'INVLPG': 14,
+ 'RDPMC': 15,
+ 'RDTSC': 16,
+ 'VMCALL': 18,
+ 'VMCLEAR': 19,
+ 'VMLAUNCH': 20,
+ 'VMPTRLD': 21,
+ 'VMPTRST': 22,
+ 'VMREAD': 23,
+ 'VMRESUME': 24,
+ 'VMWRITE': 25,
+ 'VMOFF': 26,
+ 'VMON': 27,
+ 'CR_ACCESS': 28,
+ 'DR_ACCESS': 29,
+ 'IO_INSTRUCTION': 30,
+ 'MSR_READ': 31,
+ 'MSR_WRITE': 32,
+ 'INVALID_STATE': 33,
+ 'MWAIT_INSTRUCTION': 36,
+ 'MONITOR_INSTRUCTION': 39,
+ 'PAUSE_INSTRUCTION': 40,
+ 'MCE_DURING_VMENTRY': 41,
+ 'TPR_BELOW_THRESHOLD': 43,
+ 'APIC_ACCESS': 44,
+ 'EPT_VIOLATION': 48,
+ 'EPT_MISCONFIG': 49,
+ 'WBINVD': 54,
+ 'XSETBV': 55,
+ 'APIC_WRITE': 56,
+ 'INVPCID': 58,
+}
+
+SVM_EXIT_REASONS = {
+ 'READ_CR0': 0x000,
+ 'READ_CR3': 0x003,
+ 'READ_CR4': 0x004,
+ 'READ_CR8': 0x008,
+ 'WRITE_CR0': 0x010,
+ 'WRITE_CR3': 0x013,
+ 'WRITE_CR4': 0x014,
+ 'WRITE_CR8': 0x018,
+ 'READ_DR0': 0x020,
+ 'READ_DR1': 0x021,
+ 'READ_DR2': 0x022,
+ 'READ_DR3': 0x023,
+ 'READ_DR4': 0x024,
+ 'READ_DR5': 0x025,
+ 'READ_DR6': 0x026,
+ 'READ_DR7': 0x027,
+ 'WRITE_DR0': 0x030,
+ 'WRITE_DR1': 0x031,
+ 'WRITE_DR2': 0x032,
+ 'WRITE_DR3': 0x033,
+ 'WRITE_DR4': 0x034,
+ 'WRITE_DR5': 0x035,
+ 'WRITE_DR6': 0x036,
+ 'WRITE_DR7': 0x037,
+ 'EXCP_BASE': 0x040,
+ 'INTR': 0x060,
+ 'NMI': 0x061,
+ 'SMI': 0x062,
+ 'INIT': 0x063,
+ 'VINTR': 0x064,
+ 'CR0_SEL_WRITE': 0x065,
+ 'IDTR_READ': 0x066,
+ 'GDTR_READ': 0x067,
+ 'LDTR_READ': 0x068,
+ 'TR_READ': 0x069,
+ 'IDTR_WRITE': 0x06a,
+ 'GDTR_WRITE': 0x06b,
+ 'LDTR_WRITE': 0x06c,
+ 'TR_WRITE': 0x06d,
+ 'RDTSC': 0x06e,
+ 'RDPMC': 0x06f,
+ 'PUSHF': 0x070,
+ 'POPF': 0x071,
+ 'CPUID': 0x072,
+ 'RSM': 0x073,
+ 'IRET': 0x074,
+ 'SWINT': 0x075,
+ 'INVD': 0x076,
+ 'PAUSE': 0x077,
+ 'HLT': 0x078,
+ 'INVLPG': 0x079,
+ 'INVLPGA': 0x07a,
+ 'IOIO': 0x07b,
+ 'MSR': 0x07c,
+ 'TASK_SWITCH': 0x07d,
+ 'FERR_FREEZE': 0x07e,
+ 'SHUTDOWN': 0x07f,
+ 'VMRUN': 0x080,
+ 'VMMCALL': 0x081,
+ 'VMLOAD': 0x082,
+ 'VMSAVE': 0x083,
+ 'STGI': 0x084,
+ 'CLGI': 0x085,
+ 'SKINIT': 0x086,
+ 'RDTSCP': 0x087,
+ 'ICEBP': 0x088,
+ 'WBINVD': 0x089,
+ 'MONITOR': 0x08a,
+ 'MWAIT': 0x08b,
+ 'MWAIT_COND': 0x08c,
+ 'XSETBV': 0x08d,
+ 'NPF': 0x400,
+}
+
+# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
+AARCH64_EXIT_REASONS = {
+ 'UNKNOWN': 0x00,
+ 'WFI': 0x01,
+ 'CP15_32': 0x03,
+ 'CP15_64': 0x04,
+ 'CP14_MR': 0x05,
+ 'CP14_LS': 0x06,
+ 'FP_ASIMD': 0x07,
+ 'CP10_ID': 0x08,
+ 'CP14_64': 0x0C,
+ 'ILL_ISS': 0x0E,
+ 'SVC32': 0x11,
+ 'HVC32': 0x12,
+ 'SMC32': 0x13,
+ 'SVC64': 0x15,
+ 'HVC64': 0x16,
+ 'SMC64': 0x17,
+ 'SYS64': 0x18,
+ 'IABT': 0x20,
+ 'IABT_HYP': 0x21,
+ 'PC_ALIGN': 0x22,
+ 'DABT': 0x24,
+ 'DABT_HYP': 0x25,
+ 'SP_ALIGN': 0x26,
+ 'FP_EXC32': 0x28,
+ 'FP_EXC64': 0x2C,
+ 'SERROR': 0x2F,
+ 'BREAKPT': 0x30,
+ 'BREAKPT_HYP': 0x31,
+ 'SOFTSTP': 0x32,
+ 'SOFTSTP_HYP': 0x33,
+ 'WATCHPT': 0x34,
+ 'WATCHPT_HYP': 0x35,
+ 'BKPT32': 0x38,
+ 'VECTOR32': 0x3A,
+ 'BRK64': 0x3C,
+}
+
+# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
+USERSPACE_EXIT_REASONS = {
+ 'UNKNOWN': 0,
+ 'EXCEPTION': 1,
+ 'IO': 2,
+ 'HYPERCALL': 3,
+ 'DEBUG': 4,
+ 'HLT': 5,
+ 'MMIO': 6,
+ 'IRQ_WINDOW_OPEN': 7,
+ 'SHUTDOWN': 8,
+ 'FAIL_ENTRY': 9,
+ 'INTR': 10,
+ 'SET_TPR': 11,
+ 'TPR_ACCESS': 12,
+ 'S390_SIEIC': 13,
+ 'S390_RESET': 14,
+ 'DCR': 15,
+ 'NMI': 16,
+ 'INTERNAL_ERROR': 17,
+ 'OSI': 18,
+ 'PAPR_HCALL': 19,
+ 'S390_UCONTROL': 20,
+ 'WATCHDOG': 21,
+ 'S390_TSCH': 22,
+ 'EPR': 23,
+ 'SYSTEM_EVENT': 24,
+}
+
+IOCTL_NUMBERS = {
+ 'SET_FILTER': 0x40082406,
+ 'ENABLE': 0x00002400,
+ 'DISABLE': 0x00002401,
+ 'RESET': 0x00002403,
+}
+
+ENCODING = locale.getpreferredencoding(False)
+TRACE_FILTER = re.compile(r'^[^\(]*$')
+
+
+class Arch(object):
+ """Encapsulates global architecture specific data.
+
+ Contains the performance event open syscall and ioctl numbers, as
+ well as the VM exit reasons for the architecture it runs on.
+
+ """
+ @staticmethod
+ def get_arch():
+ machine = os.uname()[4]
+
+ if machine.startswith('ppc'):
+ return ArchPPC()
+ elif machine.startswith('aarch64'):
+ return ArchA64()
+ elif machine.startswith('s390'):
+ return ArchS390()
+ else:
+ # X86_64
+ for line in open('/proc/cpuinfo'):
+ if not line.startswith('flags'):
+ continue
+
+ flags = line.split()
+ if 'vmx' in flags:
+ return ArchX86(VMX_EXIT_REASONS)
+ if 'svm' in flags:
+ return ArchX86(SVM_EXIT_REASONS)
+ return
+
+ def tracepoint_is_child(self, field):
+ if (TRACE_FILTER.match(field)):
+ return None
+ return field.split('(', 1)[0]
+
+
+class ArchX86(Arch):
+ def __init__(self, exit_reasons):
+ self.sc_perf_evt_open = 298
+ self.ioctl_numbers = IOCTL_NUMBERS
+ self.exit_reason_field = 'exit_reason'
+ self.exit_reasons = exit_reasons
+
+ def debugfs_is_child(self, field):
+ """ Returns name of parent if 'field' is a child, None otherwise """
+ return None
+
+
+class ArchPPC(Arch):
+ def __init__(self):
+ self.sc_perf_evt_open = 319
+ self.ioctl_numbers = IOCTL_NUMBERS
+ self.ioctl_numbers['ENABLE'] = 0x20002400
+ self.ioctl_numbers['DISABLE'] = 0x20002401
+ self.ioctl_numbers['RESET'] = 0x20002403
+
+ # PPC comes in 32 and 64 bit and some generated ioctl
+ # numbers depend on the wordsize.
+ char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
+ self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
+ self.exit_reason_field = 'exit_nr'
+ self.exit_reasons = {}
+
+ def debugfs_is_child(self, field):
+ """ Returns name of parent if 'field' is a child, None otherwise """
+ return None
+
+
+class ArchA64(Arch):
+ def __init__(self):
+ self.sc_perf_evt_open = 241
+ self.ioctl_numbers = IOCTL_NUMBERS
+ self.exit_reason_field = 'esr_ec'
+ self.exit_reasons = AARCH64_EXIT_REASONS
+
+ def debugfs_is_child(self, field):
+ """ Returns name of parent if 'field' is a child, None otherwise """
+ return None
+
+
+class ArchS390(Arch):
+ def __init__(self):
+ self.sc_perf_evt_open = 331
+ self.ioctl_numbers = IOCTL_NUMBERS
+ self.exit_reason_field = None
+ self.exit_reasons = None
+
+ def debugfs_is_child(self, field):
+ """ Returns name of parent if 'field' is a child, None otherwise """
+ if field.startswith('instruction_'):
+ return 'exit_instruction'
+
+
+ARCH = Arch.get_arch()
+
+
+class perf_event_attr(ctypes.Structure):
+ """Struct that holds the necessary data to set up a trace event.
+
+ For an extensive explanation see perf_event_open(2) and
+ include/uapi/linux/perf_event.h, struct perf_event_attr
+
+ All fields that are not initialized in the constructor are 0.
+
+ """
+ _fields_ = [('type', ctypes.c_uint32),
+ ('size', ctypes.c_uint32),
+ ('config', ctypes.c_uint64),
+ ('sample_freq', ctypes.c_uint64),
+ ('sample_type', ctypes.c_uint64),
+ ('read_format', ctypes.c_uint64),
+ ('flags', ctypes.c_uint64),
+ ('wakeup_events', ctypes.c_uint32),
+ ('bp_type', ctypes.c_uint32),
+ ('bp_addr', ctypes.c_uint64),
+ ('bp_len', ctypes.c_uint64),
+ ]
+
+ def __init__(self):
+ super(self.__class__, self).__init__()
+ self.type = PERF_TYPE_TRACEPOINT
+ self.size = ctypes.sizeof(self)
+ self.read_format = PERF_FORMAT_GROUP
+
+
+PERF_TYPE_TRACEPOINT = 2
+PERF_FORMAT_GROUP = 1 << 3
+
+
+class Group(object):
+ """Represents a perf event group."""
+
+ def __init__(self):
+ self.events = []
+
+ def add_event(self, event):
+ self.events.append(event)
+
+ def read(self):
+ """Returns a dict with 'event name: value' for all events in the
+ group.
+
+ Values are read by reading from the file descriptor of the
+ event that is the group leader. See perf_event_open(2) for
+ details.
+
+ Read format for the used event configuration is:
+ struct read_format {
+ u64 nr; /* The number of events */
+ struct {
+ u64 value; /* The value of the event */
+ } values[nr];
+ };
+
+ """
+ length = 8 * (1 + len(self.events))
+ read_format = 'xxxxxxxx' + 'Q' * len(self.events)
+ return dict(zip([event.name for event in self.events],
+ struct.unpack(read_format,
+ os.read(self.events[0].fd, length))))
+
+
+class Event(object):
+ """Represents a performance event and manages its life cycle."""
+ def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
+ trace_filter, trace_set='kvm'):
+ self.libc = ctypes.CDLL('libc.so.6', use_errno=True)
+ self.syscall = self.libc.syscall
+ self.name = name
+ self.fd = None
+ self._setup_event(group, trace_cpu, trace_pid, trace_point,
+ trace_filter, trace_set)
+
+ def __del__(self):
+ """Closes the event's file descriptor.
+
+ As no python file object was created for the file descriptor,
+ python will not reference count the descriptor and will not
+ close it itself automatically, so we do it.
+
+ """
+ if self.fd:
+ os.close(self.fd)
+
+ def _perf_event_open(self, attr, pid, cpu, group_fd, flags):
+ """Wrapper for the sys_perf_evt_open() syscall.
+
+ Used to set up performance events, returns a file descriptor or -1
+ on error.
+
+ Attributes are:
+ - syscall number
+ - struct perf_event_attr *
+ - pid or -1 to monitor all pids
+ - cpu number or -1 to monitor all cpus
+ - The file descriptor of the group leader or -1 to create a group.
+ - flags
+
+ """
+ return self.syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
+ ctypes.c_int(pid), ctypes.c_int(cpu),
+ ctypes.c_int(group_fd), ctypes.c_long(flags))
+
+ def _setup_event_attribute(self, trace_set, trace_point):
+ """Returns an initialized ctype perf_event_attr struct."""
+
+ id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
+ trace_point, 'id')
+
+ event_attr = perf_event_attr()
+ event_attr.config = int(open(id_path).read())
+ return event_attr
+
+ def _setup_event(self, group, trace_cpu, trace_pid, trace_point,
+ trace_filter, trace_set):
+ """Sets up the perf event in Linux.
+
+ Issues the syscall to register the event in the kernel and
+ then sets the optional filter.
+
+ """
+
+ event_attr = self._setup_event_attribute(trace_set, trace_point)
+
+ # First event will be group leader.
+ group_leader = -1
+
+ # All others have to pass the leader's descriptor instead.
+ if group.events:
+ group_leader = group.events[0].fd
+
+ fd = self._perf_event_open(event_attr, trace_pid,
+ trace_cpu, group_leader, 0)
+ if fd == -1:
+ err = ctypes.get_errno()
+ raise OSError(err, os.strerror(err),
+ 'while calling sys_perf_event_open().')
+
+ if trace_filter:
+ fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
+ trace_filter)
+
+ self.fd = fd
+
+ def enable(self):
+ """Enables the trace event in the kernel.
+
+ Enabling the group leader makes reading counters from it and the
+ events under it possible.
+
+ """
+ fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
+
+ def disable(self):
+ """Disables the trace event in the kernel.
+
+ Disabling the group leader makes reading all counters under it
+ impossible.
+
+ """
+ fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
+
+ def reset(self):
+ """Resets the count of the trace event in the kernel."""
+ fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
+
+
+class Provider(object):
+ """Encapsulates functionalities used by all providers."""
+ def __init__(self, pid):
+ self.child_events = False
+ self.pid = pid
+
+ @staticmethod
+ def is_field_wanted(fields_filter, field):
+ """Indicate whether field is valid according to fields_filter."""
+ if not fields_filter:
+ return True
+ return re.match(fields_filter, field) is not None
+
+ @staticmethod
+ def walkdir(path):
+ """Returns os.walk() data for specified directory.
+
+ As it is only a wrapper it returns the same 3-tuple of (dirpath,
+ dirnames, filenames).
+ """
+ return next(os.walk(path))
+
+
+class TracepointProvider(Provider):
+ """Data provider for the stats class.
+
+ Manages the events/groups from which it acquires its data.
+
+ """
+ def __init__(self, pid, fields_filter):
+ self.group_leaders = []
+ self.filters = self._get_filters()
+ self.update_fields(fields_filter)
+ super(TracepointProvider, self).__init__(pid)
+
+ @staticmethod
+ def _get_filters():
+ """Returns a dict of trace events, their filter ids and
+ the values that can be filtered.
+
+ Trace events can be filtered for special values by setting a
+ filter string via an ioctl. The string normally has the format
+ identifier==value. For each filter a new event will be created, to
+ be able to distinguish the events.
+
+ """
+ filters = {}
+ filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
+ if ARCH.exit_reason_field and ARCH.exit_reasons:
+ filters['kvm_exit'] = (ARCH.exit_reason_field, ARCH.exit_reasons)
+ return filters
+
+ def _get_available_fields(self):
+ """Returns a list of available events of format 'event name(filter
+ name)'.
+
+ All available events have directories under
+ /sys/kernel/debug/tracing/events/ which export information
+ about the specific event. Therefore, listing the dirs gives us
+ a list of all available events.
+
+ Some events like the vm exit reasons can be filtered for
+ specific values. To take account for that, the routine below
+ creates special fields with the following format:
+ event name(filter name)
+
+ """
+ path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
+ fields = self.walkdir(path)[1]
+ extra = []
+ for field in fields:
+ if field in self.filters:
+ filter_name_, filter_dicts = self.filters[field]
+ for name in filter_dicts:
+ extra.append(field + '(' + name + ')')
+ fields += extra
+ return fields
+
+ def update_fields(self, fields_filter):
+ """Refresh fields, applying fields_filter"""
+ self.fields = [field for field in self._get_available_fields()
+ if self.is_field_wanted(fields_filter, field)]
+ # add parents for child fields - otherwise we won't see any output!
+ for field in self._fields:
+ parent = ARCH.tracepoint_is_child(field)
+ if (parent and parent not in self._fields):
+ self.fields.append(parent)
+
+ @staticmethod
+ def _get_online_cpus():
+ """Returns a list of cpu id integers."""
+ def parse_int_list(list_string):
+ """Returns an int list from a string of comma separated integers and
+ integer ranges."""
+ integers = []
+ members = list_string.split(',')
+
+ for member in members:
+ if '-' not in member:
+ integers.append(int(member))
+ else:
+ int_range = member.split('-')
+ integers.extend(range(int(int_range[0]),
+ int(int_range[1]) + 1))
+
+ return integers
+
+ with open('/sys/devices/system/cpu/online') as cpu_list:
+ cpu_string = cpu_list.readline()
+ return parse_int_list(cpu_string)
+
+ def _setup_traces(self):
+ """Creates all event and group objects needed to be able to retrieve
+ data."""
+ fields = self._get_available_fields()
+ if self._pid > 0:
+ # Fetch list of all threads of the monitored pid, as qemu
+ # starts a thread for each vcpu.
+ path = os.path.join('/proc', str(self._pid), 'task')
+ groupids = self.walkdir(path)[1]
+ else:
+ groupids = self._get_online_cpus()
+
+ # The constant is needed as a buffer for python libs, std
+ # streams and other files that the script opens.
+ newlim = len(groupids) * len(fields) + 50
+ try:
+ softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
+
+ if hardlim < newlim:
+ # Now we need CAP_SYS_RESOURCE, to increase the hard limit.
+ resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
+ else:
+ # Raising the soft limit is sufficient.
+ resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
+
+ except ValueError:
+ sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
+
+ for groupid in groupids:
+ group = Group()
+ for name in fields:
+ tracepoint = name
+ tracefilter = None
+ match = re.match(r'(.*)\((.*)\)', name)
+ if match:
+ tracepoint, sub = match.groups()
+ tracefilter = ('%s==%d\0' %
+ (self.filters[tracepoint][0],
+ self.filters[tracepoint][1][sub]))
+
+ # From perf_event_open(2):
+ # pid > 0 and cpu == -1
+ # This measures the specified process/thread on any CPU.
+ #
+ # pid == -1 and cpu >= 0
+ # This measures all processes/threads on the specified CPU.
+ trace_cpu = groupid if self._pid == 0 else -1
+ trace_pid = int(groupid) if self._pid != 0 else -1
+
+ group.add_event(Event(name=name,
+ group=group,
+ trace_cpu=trace_cpu,
+ trace_pid=trace_pid,
+ trace_point=tracepoint,
+ trace_filter=tracefilter))
+
+ self.group_leaders.append(group)
+
+ @property
+ def fields(self):
+ return self._fields
+
+ @fields.setter
+ def fields(self, fields):
+ """Enables/disables the (un)wanted events"""
+ self._fields = fields
+ for group in self.group_leaders:
+ for index, event in enumerate(group.events):
+ if event.name in fields:
+ event.reset()
+ event.enable()
+ else:
+ # Do not disable the group leader.
+ # It would disable all of its events.
+ if index != 0:
+ event.disable()
+
+ @property
+ def pid(self):
+ return self._pid
+
+ @pid.setter
+ def pid(self, pid):
+ """Changes the monitored pid by setting new traces."""
+ self._pid = pid
+ # The garbage collector will get rid of all Event/Group
+ # objects and open files after removing the references.
+ self.group_leaders = []
+ self._setup_traces()
+ self.fields = self._fields
+
+ def read(self, by_guest=0):
+ """Returns 'event name: current value' for all enabled events."""
+ ret = defaultdict(int)
+ for group in self.group_leaders:
+ for name, val in group.read().items():
+ if name not in self._fields:
+ continue
+ parent = ARCH.tracepoint_is_child(name)
+ if parent:
+ name += ' ' + parent
+ ret[name] += val
+ return ret
+
+ def reset(self):
+ """Reset all field counters"""
+ for group in self.group_leaders:
+ for event in group.events:
+ event.reset()
+
+
+class DebugfsProvider(Provider):
+ """Provides data from the files that KVM creates in the kvm debugfs
+ folder."""
+ def __init__(self, pid, fields_filter, include_past):
+ self.update_fields(fields_filter)
+ self._baseline = {}
+ self.do_read = True
+ self.paths = []
+ super(DebugfsProvider, self).__init__(pid)
+ if include_past:
+ self._restore()
+
+ def _get_available_fields(self):
+ """"Returns a list of available fields.
+
+ The fields are all available KVM debugfs files
+
+ """
+ return self.walkdir(PATH_DEBUGFS_KVM)[2]
+
+ def update_fields(self, fields_filter):
+ """Refresh fields, applying fields_filter"""
+ self._fields = [field for field in self._get_available_fields()
+ if self.is_field_wanted(fields_filter, field)]
+ # add parents for child fields - otherwise we won't see any output!
+ for field in self._fields:
+ parent = ARCH.debugfs_is_child(field)
+ if (parent and parent not in self._fields):
+ self.fields.append(parent)
+
+ @property
+ def fields(self):
+ return self._fields
+
+ @fields.setter
+ def fields(self, fields):
+ self._fields = fields
+ self.reset()
+
+ @property
+ def pid(self):
+ return self._pid
+
+ @pid.setter
+ def pid(self, pid):
+ self._pid = pid
+ if pid != 0:
+ vms = self.walkdir(PATH_DEBUGFS_KVM)[1]
+ if len(vms) == 0:
+ self.do_read = False
+
+ self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms))
+
+ else:
+ self.paths = []
+ self.do_read = True
+
+ def _verify_paths(self):
+ """Remove invalid paths"""
+ for path in self.paths:
+ if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)):
+ self.paths.remove(path)
+ continue
+
+ def read(self, reset=0, by_guest=0):
+ """Returns a dict with format:'file name / field -> current value'.
+
+ Parameter 'reset':
+ 0 plain read
+ 1 reset field counts to 0
+ 2 restore the original field counts
+
+ """
+ results = {}
+
+ # If no debugfs filtering support is available, then don't read.
+ if not self.do_read:
+ return results
+ self._verify_paths()
+
+ paths = self.paths
+ if self._pid == 0:
+ paths = []
+ for entry in os.walk(PATH_DEBUGFS_KVM):
+ for dir in entry[1]:
+ paths.append(dir)
+ for path in paths:
+ for field in self._fields:
+ value = self._read_field(field, path)
+ key = path + field
+ if reset == 1:
+ self._baseline[key] = value
+ if reset == 2:
+ self._baseline[key] = 0
+ if self._baseline.get(key, -1) == -1:
+ self._baseline[key] = value
+ parent = ARCH.debugfs_is_child(field)
+ if parent:
+ field = field + ' ' + parent
+ else:
+ if by_guest:
+ field = key.split('-')[0] # set 'field' to 'pid'
+ increment = value - self._baseline.get(key, 0)
+ if field in results:
+ results[field] += increment
+ else:
+ results[field] = increment
+
+ return results
+
+ def _read_field(self, field, path):
+ """Returns the value of a single field from a specific VM."""
+ try:
+ return int(open(os.path.join(PATH_DEBUGFS_KVM,
+ path,
+ field))
+ .read())
+ except IOError:
+ return 0
+
+ def reset(self):
+ """Reset field counters"""
+ self._baseline = {}
+ self.read(1)
+
+ def _restore(self):
+ """Reset field counters"""
+ self._baseline = {}
+ self.read(2)
+
+
+EventStat = namedtuple('EventStat', ['value', 'delta'])
+
+
+class Stats(object):
+ """Manages the data providers and the data they provide.
+
+ It is used to set filters on the provider's data and collect all
+ provider data.
+
+ """
+ def __init__(self, options):
+ self.providers = self._get_providers(options)
+ self._pid_filter = options.pid
+ self._fields_filter = options.fields
+ self.values = {}
+ self._child_events = False
+
+ def _get_providers(self, options):
+ """Returns a list of data providers depending on the passed options."""
+ providers = []
+
+ if options.debugfs:
+ providers.append(DebugfsProvider(options.pid, options.fields,
+ options.dbgfs_include_past))
+ if options.tracepoints or not providers:
+ providers.append(TracepointProvider(options.pid, options.fields))
+
+ return providers
+
+ def _update_provider_filters(self):
+ """Propagates fields filters to providers."""
+ # As we reset the counters when updating the fields we can
+ # also clear the cache of old values.
+ self.values = {}
+ for provider in self.providers:
+ provider.update_fields(self._fields_filter)
+
+ def reset(self):
+ self.values = {}
+ for provider in self.providers:
+ provider.reset()
+
+ @property
+ def fields_filter(self):
+ return self._fields_filter
+
+ @fields_filter.setter
+ def fields_filter(self, fields_filter):
+ if fields_filter != self._fields_filter:
+ self._fields_filter = fields_filter
+ self._update_provider_filters()
+
+ @property
+ def pid_filter(self):
+ return self._pid_filter
+
+ @pid_filter.setter
+ def pid_filter(self, pid):
+ if pid != self._pid_filter:
+ self._pid_filter = pid
+ self.values = {}
+ for provider in self.providers:
+ provider.pid = self._pid_filter
+
+ @property
+ def child_events(self):
+ return self._child_events
+
+ @child_events.setter
+ def child_events(self, val):
+ self._child_events = val
+ for provider in self.providers:
+ provider.child_events = val
+
+ def get(self, by_guest=0):
+ """Returns a dict with field -> (value, delta to last value) of all
+ provider data.
+ Key formats:
+ * plain: 'key' is event name
+ * child-parent: 'key' is in format '<child> <parent>'
+ * pid: 'key' is the pid of the guest, and the record contains the
+ aggregated event data
+ These formats are generated by the providers, and handled in class TUI.
+ """
+ for provider in self.providers:
+ new = provider.read(by_guest=by_guest)
+ for key in new:
+ oldval = self.values.get(key, EventStat(0, 0)).value
+ newval = new.get(key, 0)
+ newdelta = newval - oldval
+ self.values[key] = EventStat(newval, newdelta)
+ return self.values
+
+ def toggle_display_guests(self, to_pid):
+ """Toggle between collection of stats by individual event and by
+ guest pid
+
+ Events reported by DebugfsProvider change when switching to/from
+ reading by guest values. Hence we have to remove the excess event
+ names from self.values.
+
+ """
+ if any(isinstance(ins, TracepointProvider) for ins in self.providers):
+ return 1
+ if to_pid:
+ for provider in self.providers:
+ if isinstance(provider, DebugfsProvider):
+ for key in provider.fields:
+ if key in self.values.keys():
+ del self.values[key]
+ else:
+ oldvals = self.values.copy()
+ for key in oldvals:
+ if key.isdigit():
+ del self.values[key]
+ # Update oldval (see get())
+ self.get(to_pid)
+ return 0
+
+
+DELAY_DEFAULT = 3.0
+MAX_GUEST_NAME_LEN = 48
+MAX_REGEX_LEN = 44
+SORT_DEFAULT = 0
+
+
+class Tui(object):
+ """Instruments curses to draw a nice text ui."""
+ def __init__(self, stats):
+ self.stats = stats
+ self.screen = None
+ self._delay_initial = 0.25
+ self._delay_regular = DELAY_DEFAULT
+ self._sorting = SORT_DEFAULT
+ self._display_guests = 0
+
+ def __enter__(self):
+ """Initialises curses for later use. Based on curses.wrapper
+ implementation from the Python standard library."""
+ self.screen = curses.initscr()
+ curses.noecho()
+ curses.cbreak()
+
+ # The try/catch works around a minor bit of
+ # over-conscientiousness in the curses module, the error
+ # return from C start_color() is ignorable.
+ try:
+ curses.start_color()
+ except curses.error:
+ pass
+
+ # Hide cursor in extra statement as some monochrome terminals
+ # might support hiding but not colors.
+ try:
+ curses.curs_set(0)
+ except curses.error:
+ pass
+
+ curses.use_default_colors()
+ return self
+
+ def __exit__(self, *exception):
+ """Resets the terminal to its normal state. Based on curses.wrapper
+ implementation from the Python standard library."""
+ if self.screen:
+ self.screen.keypad(0)
+ curses.echo()
+ curses.nocbreak()
+ curses.endwin()
+
+ @staticmethod
+ def get_all_gnames():
+ """Returns a list of (pid, gname) tuples of all running guests"""
+ res = []
+ try:
+ child = subprocess.Popen(['ps', '-A', '--format', 'pid,args'],
+ stdout=subprocess.PIPE)
+ except:
+ raise Exception
+ for line in child.stdout:
+ line = line.decode(ENCODING).lstrip().split(' ', 1)
+ # perform a sanity check before calling the more expensive
+ # function to possibly extract the guest name
+ if ' -name ' in line[1]:
+ res.append((line[0], Tui.get_gname_from_pid(line[0])))
+ child.stdout.close()
+
+ return res
+
+ def _print_all_gnames(self, row):
+ """Print a list of all running guests along with their pids."""
+ self.screen.addstr(row, 2, '%8s %-60s' %
+ ('Pid', 'Guest Name (fuzzy list, might be '
+ 'inaccurate!)'),
+ curses.A_UNDERLINE)
+ row += 1
+ try:
+ for line in self.get_all_gnames():
+ self.screen.addstr(row, 2, '%8s %-60s' % (line[0], line[1]))
+ row += 1
+ if row >= self.screen.getmaxyx()[0]:
+ break
+ except Exception:
+ self.screen.addstr(row + 1, 2, 'Not available')
+
+ @staticmethod
+ def get_pid_from_gname(gname):
+ """Fuzzy function to convert guest name to QEMU process pid.
+
+ Returns a list of potential pids, can be empty if no match found.
+ Throws an exception on processing errors.
+
+ """
+ pids = []
+ for line in Tui.get_all_gnames():
+ if gname == line[1]:
+ pids.append(int(line[0]))
+
+ return pids
+
+ @staticmethod
+ def get_gname_from_pid(pid):
+ """Returns the guest name for a QEMU process pid.
+
+ Extracts the guest name from the QEMU comma line by processing the
+ '-name' option. Will also handle names specified out of sequence.
+
+ """
+ name = ''
+ try:
+ line = open('/proc/{}/cmdline'
+ .format(pid), 'r').read().split('\0')
+ parms = line[line.index('-name') + 1].split(',')
+ while '' in parms:
+ # commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results
+ # in # ['foo', '', 'bar'], which we revert here
+ idx = parms.index('')
+ parms[idx - 1] += ',' + parms[idx + 1]
+ del parms[idx:idx+2]
+ # the '-name' switch allows for two ways to specify the guest name,
+ # where the plain name overrides the name specified via 'guest='
+ for arg in parms:
+ if '=' not in arg:
+ name = arg
+ break
+ if arg[:6] == 'guest=':
+ name = arg[6:]
+ except (ValueError, IOError, IndexError):
+ pass
+
+ return name
+
+ def _update_pid(self, pid):
+ """Propagates pid selection to stats object."""
+ self.screen.addstr(4, 1, 'Updating pid filter...')
+ self.screen.refresh()
+ self.stats.pid_filter = pid
+
+ def _refresh_header(self, pid=None):
+ """Refreshes the header."""
+ if pid is None:
+ pid = self.stats.pid_filter
+ self.screen.erase()
+ gname = self.get_gname_from_pid(pid)
+ self._gname = gname
+ if gname:
+ gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
+ if len(gname) > MAX_GUEST_NAME_LEN
+ else gname))
+ if pid > 0:
+ self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname)
+ else:
+ self._headline = 'kvm statistics - summary'
+ self.screen.addstr(0, 0, self._headline, curses.A_BOLD)
+ if self.stats.fields_filter:
+ regex = self.stats.fields_filter
+ if len(regex) > MAX_REGEX_LEN:
+ regex = regex[:MAX_REGEX_LEN] + '...'
+ self.screen.addstr(1, 17, 'regex filter: {0}'.format(regex))
+ if self._display_guests:
+ col_name = 'Guest Name'
+ else:
+ col_name = 'Event'
+ self.screen.addstr(2, 1, '%-40s %10s%7s %8s' %
+ (col_name, 'Total', '%Total', 'CurAvg/s'),
+ curses.A_STANDOUT)
+ self.screen.addstr(4, 1, 'Collecting data...')
+ self.screen.refresh()
+
+ def _refresh_body(self, sleeptime):
+ def insert_child(sorted_items, child, values, parent):
+ num = len(sorted_items)
+ for i in range(0, num):
+ # only add child if parent is present
+ if parent.startswith(sorted_items[i][0]):
+ sorted_items.insert(i + 1, (' ' + child, values))
+
+ def get_sorted_events(self, stats):
+ """ separate parent and child events """
+ if self._sorting == SORT_DEFAULT:
+ def sortkey(pair):
+ # sort by (delta value, overall value)
+ v = pair[1]
+ return (v.delta, v.value)
+ else:
+ def sortkey(pair):
+ # sort by overall value
+ v = pair[1]
+ return v.value
+
+ childs = []
+ sorted_items = []
+ # we can't rule out child events to appear prior to parents even
+ # when sorted - separate out all children first, and add in later
+ for key, values in sorted(stats.items(), key=sortkey,
+ reverse=True):
+ if values == (0, 0):
+ continue
+ if key.find(' ') != -1:
+ if not self.stats.child_events:
+ continue
+ childs.insert(0, (key, values))
+ else:
+ sorted_items.append((key, values))
+ if self.stats.child_events:
+ for key, values in childs:
+ (child, parent) = key.split(' ')
+ insert_child(sorted_items, child, values, parent)
+
+ return sorted_items
+
+ if not self._is_running_guest(self.stats.pid_filter):
+ if self._gname:
+ try: # ...to identify the guest by name in case it's back
+ pids = self.get_pid_from_gname(self._gname)
+ if len(pids) == 1:
+ self._refresh_header(pids[0])
+ self._update_pid(pids[0])
+ return
+ except:
+ pass
+ self._display_guest_dead()
+ # leave final data on screen
+ return
+ row = 3
+ self.screen.move(row, 0)
+ self.screen.clrtobot()
+ stats = self.stats.get(self._display_guests)
+ total = 0.
+ ctotal = 0.
+ for key, values in stats.items():
+ if self._display_guests:
+ if self.get_gname_from_pid(key):
+ total += values.value
+ continue
+ if not key.find(' ') != -1:
+ total += values.value
+ else:
+ ctotal += values.value
+ if total == 0.:
+ # we don't have any fields, or all non-child events are filtered
+ total = ctotal
+
+ # print events
+ tavg = 0
+ tcur = 0
+ guest_removed = False
+ for key, values in get_sorted_events(self, stats):
+ if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
+ break
+ if self._display_guests:
+ key = self.get_gname_from_pid(key)
+ if not key:
+ continue
+ cur = int(round(values.delta / sleeptime)) if values.delta else 0
+ if cur < 0:
+ guest_removed = True
+ continue
+ if key[0] != ' ':
+ if values.delta:
+ tcur += values.delta
+ ptotal = values.value
+ ltotal = total
+ else:
+ ltotal = ptotal
+ self.screen.addstr(row, 1, '%-40s %10d%7.1f %8s' % (key,
+ values.value,
+ values.value * 100 / float(ltotal), cur))
+ row += 1
+ if row == 3:
+ if guest_removed:
+ self.screen.addstr(4, 1, 'Guest removed, updating...')
+ else:
+ self.screen.addstr(4, 1, 'No matching events reported yet')
+ if row > 4:
+ tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
+ self.screen.addstr(row, 1, '%-40s %10d %8s' %
+ ('Total', total, tavg), curses.A_BOLD)
+ self.screen.refresh()
+
+ def _display_guest_dead(self):
+ marker = ' Guest is DEAD '
+ y = min(len(self._headline), 80 - len(marker))
+ self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT)
+
+ def _show_msg(self, text):
+ """Display message centered text and exit on key press"""
+ hint = 'Press any key to continue'
+ curses.cbreak()
+ self.screen.erase()
+ (x, term_width) = self.screen.getmaxyx()
+ row = 2
+ for line in text:
+ start = (term_width - len(line)) // 2
+ self.screen.addstr(row, start, line)
+ row += 1
+ self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint,
+ curses.A_STANDOUT)
+ self.screen.getkey()
+
+ def _show_help_interactive(self):
+ """Display help with list of interactive commands"""
+ msg = (' b toggle events by guests (debugfs only, honors'
+ ' filters)',
+ ' c clear filter',
+ ' f filter by regular expression',
+ ' g filter by guest name/PID',
+ ' h display interactive commands reference',
+ ' o toggle sorting order (Total vs CurAvg/s)',
+ ' p filter by guest name/PID',
+ ' q quit',
+ ' r reset stats',
+ ' s set update interval',
+ ' x toggle reporting of stats for individual child trace'
+ ' events',
+ 'Any other key refreshes statistics immediately')
+ curses.cbreak()
+ self.screen.erase()
+ self.screen.addstr(0, 0, "Interactive commands reference",
+ curses.A_BOLD)
+ self.screen.addstr(2, 0, "Press any key to exit", curses.A_STANDOUT)
+ row = 4
+ for line in msg:
+ self.screen.addstr(row, 0, line)
+ row += 1
+ self.screen.getkey()
+ self._refresh_header()
+
+ def _show_filter_selection(self):
+ """Draws filter selection mask.
+
+ Asks for a valid regex and sets the fields filter accordingly.
+
+ """
+ msg = ''
+ while True:
+ self.screen.erase()
+ self.screen.addstr(0, 0,
+ "Show statistics for events matching a regex.",
+ curses.A_BOLD)
+ self.screen.addstr(2, 0,
+ "Current regex: {0}"
+ .format(self.stats.fields_filter))
+ self.screen.addstr(5, 0, msg)
+ self.screen.addstr(3, 0, "New regex: ")
+ curses.echo()
+ regex = self.screen.getstr().decode(ENCODING)
+ curses.noecho()
+ if len(regex) == 0:
+ self.stats.fields_filter = ''
+ self._refresh_header()
+ return
+ try:
+ re.compile(regex)
+ self.stats.fields_filter = regex
+ self._refresh_header()
+ return
+ except re.error:
+ msg = '"' + regex + '": Not a valid regular expression'
+ continue
+
+ def _show_set_update_interval(self):
+ """Draws update interval selection mask."""
+ msg = ''
+ while True:
+ self.screen.erase()
+ self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).' %
+ DELAY_DEFAULT, curses.A_BOLD)
+ self.screen.addstr(4, 0, msg)
+ self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %
+ self._delay_regular)
+ curses.echo()
+ val = self.screen.getstr().decode(ENCODING)
+ curses.noecho()
+
+ try:
+ if len(val) > 0:
+ delay = float(val)
+ if delay < 0.1:
+ msg = '"' + str(val) + '": Value must be >=0.1'
+ continue
+ if delay > 25.5:
+ msg = '"' + str(val) + '": Value must be <=25.5'
+ continue
+ else:
+ delay = DELAY_DEFAULT
+ self._delay_regular = delay
+ break
+
+ except ValueError:
+ msg = '"' + str(val) + '": Invalid value'
+ self._refresh_header()
+
+ def _is_running_guest(self, pid):
+ """Check if pid is still a running process."""
+ if not pid:
+ return True
+ return os.path.isdir(os.path.join('/proc/', str(pid)))
+
+ def _show_vm_selection_by_guest(self):
+ """Draws guest selection mask.
+
+ Asks for a guest name or pid until a valid guest name or '' is entered.
+
+ """
+ msg = ''
+ while True:
+ self.screen.erase()
+ self.screen.addstr(0, 0,
+ 'Show statistics for specific guest or pid.',
+ curses.A_BOLD)
+ self.screen.addstr(1, 0,
+ 'This might limit the shown data to the trace '
+ 'statistics.')
+ self.screen.addstr(5, 0, msg)
+ self._print_all_gnames(7)
+ curses.echo()
+ curses.curs_set(1)
+ self.screen.addstr(3, 0, "Guest or pid [ENTER exits]: ")
+ guest = self.screen.getstr().decode(ENCODING)
+ curses.noecho()
+
+ pid = 0
+ if not guest or guest == '0':
+ break
+ if guest.isdigit():
+ if not self._is_running_guest(guest):
+ msg = '"' + guest + '": Not a running process'
+ continue
+ pid = int(guest)
+ break
+ pids = []
+ try:
+ pids = self.get_pid_from_gname(guest)
+ except:
+ msg = '"' + guest + '": Internal error while searching, ' \
+ 'use pid filter instead'
+ continue
+ if len(pids) == 0:
+ msg = '"' + guest + '": Not an active guest'
+ continue
+ if len(pids) > 1:
+ msg = '"' + guest + '": Multiple matches found, use pid ' \
+ 'filter instead'
+ continue
+ pid = pids[0]
+ break
+ curses.curs_set(0)
+ self._refresh_header(pid)
+ self._update_pid(pid)
+
+ def show_stats(self):
+ """Refreshes the screen and processes user input."""
+ sleeptime = self._delay_initial
+ self._refresh_header()
+ start = 0.0 # result based on init value never appears on screen
+ while True:
+ self._refresh_body(time.time() - start)
+ curses.halfdelay(int(sleeptime * 10))
+ start = time.time()
+ sleeptime = self._delay_regular
+ try:
+ char = self.screen.getkey()
+ if char == 'b':
+ self._display_guests = not self._display_guests
+ if self.stats.toggle_display_guests(self._display_guests):
+ self._show_msg(['Command not available with '
+ 'tracepoints enabled', 'Restart with '
+ 'debugfs only (see option \'-d\') and '
+ 'try again!'])
+ self._display_guests = not self._display_guests
+ self._refresh_header()
+ if char == 'c':
+ self.stats.fields_filter = ''
+ self._refresh_header(0)
+ self._update_pid(0)
+ if char == 'f':
+ curses.curs_set(1)
+ self._show_filter_selection()
+ curses.curs_set(0)
+ sleeptime = self._delay_initial
+ if char == 'g' or char == 'p':
+ self._show_vm_selection_by_guest()
+ sleeptime = self._delay_initial
+ if char == 'h':
+ self._show_help_interactive()
+ if char == 'o':
+ self._sorting = not self._sorting
+ if char == 'q':
+ break
+ if char == 'r':
+ self.stats.reset()
+ if char == 's':
+ curses.curs_set(1)
+ self._show_set_update_interval()
+ curses.curs_set(0)
+ sleeptime = self._delay_initial
+ if char == 'x':
+ self.stats.child_events = not self.stats.child_events
+ except KeyboardInterrupt:
+ break
+ except curses.error:
+ continue
+
+
+def batch(stats):
+ """Prints statistics in a key, value format."""
+ try:
+ s = stats.get()
+ time.sleep(1)
+ s = stats.get()
+ for key, values in sorted(s.items()):
+ print('%-42s%10d%10d' % (key.split(' ')[0], values.value,
+ values.delta))
+ except KeyboardInterrupt:
+ pass
+
+
+def log(stats):
+ """Prints statistics as reiterating key block, multiple value blocks."""
+ keys = sorted(stats.get().keys())
+
+ def banner():
+ for key in keys:
+ print(key.split(' ')[0], end=' ')
+ print()
+
+ def statline():
+ s = stats.get()
+ for key in keys:
+ print(' %9d' % s[key].delta, end=' ')
+ print()
+ line = 0
+ banner_repeat = 20
+ while True:
+ try:
+ time.sleep(1)
+ if line % banner_repeat == 0:
+ banner()
+ statline()
+ line += 1
+ except KeyboardInterrupt:
+ break
+
+
+def get_options():
+ """Returns processed program arguments."""
+ description_text = """
+This script displays various statistics about VMs running under KVM.
+The statistics are gathered from the KVM debugfs entries and / or the
+currently available perf traces.
+
+The monitoring takes additional cpu cycles and might affect the VM's
+performance.
+
+Requirements:
+- Access to:
+ %s
+ %s/events/*
+ /proc/pid/task
+- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
+ CAP_SYS_ADMIN and perf events are used.
+- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
+ the large number of files that are possibly opened.
+
+Interactive Commands:
+ b toggle events by guests (debugfs only, honors filters)
+ c clear filter
+ f filter by regular expression
+ g filter by guest name
+ h display interactive commands reference
+ o toggle sorting order (Total vs CurAvg/s)
+ p filter by PID
+ q quit
+ r reset stats
+ s set update interval
+ x toggle reporting of stats for individual child trace events
+Press any other key to refresh statistics immediately.
+""" % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING)
+
+ class PlainHelpFormatter(optparse.IndentedHelpFormatter):
+ def format_description(self, description):
+ if description:
+ return description + "\n"
+ else:
+ return ""
+
+ def cb_guest_to_pid(option, opt, val, parser):
+ try:
+ pids = Tui.get_pid_from_gname(val)
+ except:
+ sys.exit('Error while searching for guest "{}". Use "-p" to '
+ 'specify a pid instead?'.format(val))
+ if len(pids) == 0:
+ sys.exit('Error: No guest by the name "{}" found'.format(val))
+ if len(pids) > 1:
+ sys.exit('Error: Multiple processes found (pids: {}). Use "-p" '
+ 'to specify the desired pid'.format(" ".join(pids)))
+ parser.values.pid = pids[0]
+
+ optparser = optparse.OptionParser(description=description_text,
+ formatter=PlainHelpFormatter())
+ optparser.add_option('-1', '--once', '--batch',
+ action='store_true',
+ default=False,
+ dest='once',
+ help='run in batch mode for one second',
+ )
+ optparser.add_option('-i', '--debugfs-include-past',
+ action='store_true',
+ default=False,
+ dest='dbgfs_include_past',
+ help='include all available data on past events for '
+ 'debugfs',
+ )
+ optparser.add_option('-l', '--log',
+ action='store_true',
+ default=False,
+ dest='log',
+ help='run in logging mode (like vmstat)',
+ )
+ optparser.add_option('-t', '--tracepoints',
+ action='store_true',
+ default=False,
+ dest='tracepoints',
+ help='retrieve statistics from tracepoints',
+ )
+ optparser.add_option('-d', '--debugfs',
+ action='store_true',
+ default=False,
+ dest='debugfs',
+ help='retrieve statistics from debugfs',
+ )
+ optparser.add_option('-f', '--fields',
+ action='store',
+ default='',
+ dest='fields',
+ help='''fields to display (regex)
+ "-f help" for a list of available events''',
+ )
+ optparser.add_option('-p', '--pid',
+ action='store',
+ default=0,
+ type='int',
+ dest='pid',
+ help='restrict statistics to pid',
+ )
+ optparser.add_option('-g', '--guest',
+ action='callback',
+ type='string',
+ dest='pid',
+ metavar='GUEST',
+ help='restrict statistics to guest by name',
+ callback=cb_guest_to_pid,
+ )
+ options, unkn = optparser.parse_args(sys.argv)
+ if len(unkn) != 1:
+ sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:]))
+ try:
+ # verify that we were passed a valid regex up front
+ re.compile(options.fields)
+ except re.error:
+ sys.exit('Error: "' + options.fields + '" is not a valid regular '
+ 'expression')
+
+ return options
+
+
+def check_access(options):
+ """Exits if the current user can't access all needed directories."""
+ if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
+ not options.debugfs):
+ sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
+ "when using the option -t (default).\n"
+ "If it is enabled, make {0} readable by the "
+ "current user.\n"
+ .format(PATH_DEBUGFS_TRACING))
+ if options.tracepoints:
+ sys.exit(1)
+
+ sys.stderr.write("Falling back to debugfs statistics!\n")
+ options.debugfs = True
+ time.sleep(5)
+
+ return options
+
+
+def assign_globals():
+ global PATH_DEBUGFS_KVM
+ global PATH_DEBUGFS_TRACING
+
+ debugfs = ''
+ for line in open('/proc/mounts'):
+ if line.split(' ')[0] == 'debugfs':
+ debugfs = line.split(' ')[1]
+ break
+ if debugfs == '':
+ sys.stderr.write("Please make sure that CONFIG_DEBUG_FS is enabled in "
+ "your kernel, mounted and\nreadable by the current "
+ "user:\n"
+ "('mount -t debugfs debugfs /sys/kernel/debug')\n")
+ sys.exit(1)
+
+ PATH_DEBUGFS_KVM = os.path.join(debugfs, 'kvm')
+ PATH_DEBUGFS_TRACING = os.path.join(debugfs, 'tracing')
+
+ if not os.path.exists(PATH_DEBUGFS_KVM):
+ sys.stderr.write("Please make sure that CONFIG_KVM is enabled in "
+ "your kernel and that the modules are loaded.\n")
+ sys.exit(1)
+
+
+def main():
+ assign_globals()
+ options = get_options()
+ options = check_access(options)
+
+ if (options.pid > 0 and
+ not os.path.isdir(os.path.join('/proc/',
+ str(options.pid)))):
+ sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
+ sys.exit('Specified pid does not exist.')
+
+ stats = Stats(options)
+
+ if options.fields == 'help':
+ stats.fields_filter = None
+ event_list = []
+ for key in stats.get().keys():
+ event_list.append(key.split('(', 1)[0])
+ sys.stdout.write(' ' + '\n '.join(sorted(set(event_list))) + '\n')
+ sys.exit(0)
+
+ if options.log:
+ log(stats)
+ elif not options.once:
+ with Tui(stats) as tui:
+ tui.show_stats()
+ else:
+ batch(stats)
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
new file mode 100644
index 000000000..c057ba523
--- /dev/null
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -0,0 +1,105 @@
+kvm_stat(1)
+===========
+
+NAME
+----
+kvm_stat - Report KVM kernel module event counters
+
+SYNOPSIS
+--------
+[verse]
+'kvm_stat' [OPTION]...
+
+DESCRIPTION
+-----------
+kvm_stat prints counts of KVM kernel module trace events. These events signify
+state transitions such as guest mode entry and exit.
+
+This tool is useful for observing guest behavior from the host perspective.
+Often conclusions about performance or buggy behavior can be drawn from the
+output.
+While running in regular mode, use any of the keys listed in section
+'Interactive Commands' below.
+Use batch and logging modes for scripting purposes.
+
+The set of KVM kernel module trace events may be specific to the kernel version
+or architecture. It is best to check the KVM kernel module source code for the
+meaning of events.
+
+INTERACTIVE COMMANDS
+--------------------
+[horizontal]
+*b*:: toggle events by guests (debugfs only, honors filters)
+
+*c*:: clear filter
+
+*f*:: filter by regular expression
+ :: *Note*: Child events pull in their parents, and parents' stats summarize
+ all child events, not just the filtered ones
+
+*g*:: filter by guest name/PID
+
+*h*:: display interactive commands reference
+
+*o*:: toggle sorting order (Total vs CurAvg/s)
+
+*p*:: filter by guest name/PID
+
+*q*:: quit
+
+*r*:: reset stats
+
+*s*:: set update interval
+
+*x*:: toggle reporting of stats for child trace events
+ :: *Note*: The stats for the parents summarize the respective child trace
+ events
+
+Press any other key to refresh statistics immediately.
+
+OPTIONS
+-------
+-1::
+--once::
+--batch::
+ run in batch mode for one second
+
+-l::
+--log::
+ run in logging mode (like vmstat)
+
+-t::
+--tracepoints::
+ retrieve statistics from tracepoints
+
+-d::
+--debugfs::
+ retrieve statistics from debugfs
+
+-i::
+--debugfs-include-past::
+ include all available data on past events for debugfs
+
+-p<pid>::
+--pid=<pid>::
+ limit statistics to one virtual machine (pid)
+
+-g<guest>::
+--guest=<guest_name>::
+ limit statistics to one virtual machine (guest name)
+
+-f<fields>::
+--fields=<fields>::
+ fields to display (regex), "-f help" for a list of available events
+
+-h::
+--help::
+ show help message
+
+SEE ALSO
+--------
+'perf'(1), 'trace-cmd'(1)
+
+AUTHOR
+------
+Stefan Hajnoczi <stefanha@redhat.com>
diff --git a/tools/laptop/dslm/.gitignore b/tools/laptop/dslm/.gitignore
new file mode 100644
index 000000000..9fc984e64
--- /dev/null
+++ b/tools/laptop/dslm/.gitignore
@@ -0,0 +1 @@
+dslm
diff --git a/tools/laptop/dslm/Makefile b/tools/laptop/dslm/Makefile
new file mode 100644
index 000000000..90f512c4e
--- /dev/null
+++ b/tools/laptop/dslm/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+CC := $(CROSS_COMPILE)gcc
+CFLAGS := -I../../usr/include
+
+PROGS := dslm
+
+all: $(PROGS)
+
+clean:
+ rm -fr $(PROGS)
diff --git a/tools/laptop/dslm/dslm.c b/tools/laptop/dslm/dslm.c
new file mode 100644
index 000000000..d5dd2d4b0
--- /dev/null
+++ b/tools/laptop/dslm/dslm.c
@@ -0,0 +1,166 @@
+/*
+ * dslm.c
+ * Simple Disk Sleep Monitor
+ * by Bartek Kania
+ * Licensed under the GPL
+ */
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <linux/hdreg.h>
+
+#ifdef DEBUG
+#define D(x) x
+#else
+#define D(x)
+#endif
+
+int endit = 0;
+
+/* Check if the disk is in powersave-mode
+ * Most of the code is stolen from hdparm.
+ * 1 = active, 0 = standby/sleep, -1 = unknown */
+static int check_powermode(int fd)
+{
+ unsigned char args[4] = {WIN_CHECKPOWERMODE1,0,0,0};
+ int state;
+
+ if (ioctl(fd, HDIO_DRIVE_CMD, &args)
+ && (args[0] = WIN_CHECKPOWERMODE2) /* try again with 0x98 */
+ && ioctl(fd, HDIO_DRIVE_CMD, &args)) {
+ if (errno != EIO || args[0] != 0 || args[1] != 0) {
+ state = -1; /* "unknown"; */
+ } else
+ state = 0; /* "sleeping"; */
+ } else {
+ state = (args[2] == 255) ? 1 : 0;
+ }
+ D(printf(" drive state is: %d\n", state));
+
+ return state;
+}
+
+static char *state_name(int i)
+{
+ if (i == -1) return "unknown";
+ if (i == 0) return "sleeping";
+ if (i == 1) return "active";
+
+ return "internal error";
+}
+
+static char *myctime(time_t time)
+{
+ char *ts = ctime(&time);
+ ts[strlen(ts) - 1] = 0;
+
+ return ts;
+}
+
+static void measure(int fd)
+{
+ time_t start_time;
+ int last_state;
+ time_t last_time;
+ int curr_state;
+ time_t curr_time = 0;
+ time_t time_diff;
+ time_t active_time = 0;
+ time_t sleep_time = 0;
+ time_t unknown_time = 0;
+ time_t total_time = 0;
+ int changes = 0;
+ float tmp;
+
+ printf("Starting measurements\n");
+
+ last_state = check_powermode(fd);
+ start_time = last_time = time(0);
+ printf(" System is in state %s\n\n", state_name(last_state));
+
+ while(!endit) {
+ sleep(1);
+ curr_state = check_powermode(fd);
+
+ if (curr_state != last_state || endit) {
+ changes++;
+ curr_time = time(0);
+ time_diff = curr_time - last_time;
+
+ if (last_state == 1) active_time += time_diff;
+ else if (last_state == 0) sleep_time += time_diff;
+ else unknown_time += time_diff;
+
+ last_state = curr_state;
+ last_time = curr_time;
+
+ printf("%s: State-change to %s\n", myctime(curr_time),
+ state_name(curr_state));
+ }
+ }
+ changes--; /* Compensate for SIGINT */
+
+ total_time = time(0) - start_time;
+ printf("\nTotal running time: %lus\n", curr_time - start_time);
+ printf(" State changed %d times\n", changes);
+
+ tmp = (float)sleep_time / (float)total_time * 100;
+ printf(" Time in sleep state: %lus (%.2f%%)\n", sleep_time, tmp);
+ tmp = (float)active_time / (float)total_time * 100;
+ printf(" Time in active state: %lus (%.2f%%)\n", active_time, tmp);
+ tmp = (float)unknown_time / (float)total_time * 100;
+ printf(" Time in unknown state: %lus (%.2f%%)\n", unknown_time, tmp);
+}
+
+static void ender(int s)
+{
+ endit = 1;
+}
+
+static void usage(void)
+{
+ puts("usage: dslm [-w <time>] <disk>");
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ int fd;
+ char *disk = 0;
+ int settle_time = 60;
+
+ /* Parse the simple command-line */
+ if (argc == 2)
+ disk = argv[1];
+ else if (argc == 4) {
+ settle_time = atoi(argv[2]);
+ disk = argv[3];
+ } else
+ usage();
+
+ if (!(fd = open(disk, O_RDONLY|O_NONBLOCK))) {
+ printf("Can't open %s, because: %s\n", disk, strerror(errno));
+ exit(-1);
+ }
+
+ if (settle_time) {
+ printf("Waiting %d seconds for the system to settle down to "
+ "'normal'\n", settle_time);
+ sleep(settle_time);
+ } else
+ puts("Not waiting for system to settle down");
+
+ signal(SIGINT, ender);
+
+ measure(fd);
+
+ close(fd);
+
+ return 0;
+}
diff --git a/tools/laptop/freefall/Makefile b/tools/laptop/freefall/Makefile
new file mode 100644
index 000000000..b572d9425
--- /dev/null
+++ b/tools/laptop/freefall/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+PREFIX ?= /usr
+SBINDIR ?= sbin
+INSTALL ?= install
+
+TARGET = freefall
+
+all: $(TARGET)
+
+%: %.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+
+clean:
+ $(RM) $(TARGET)
+
+install: freefall
+ $(INSTALL) -D -m 755 $(TARGET) $(DESTDIR)$(PREFIX)/$(SBINDIR)/$(TARGET)
diff --git a/tools/laptop/freefall/freefall.c b/tools/laptop/freefall/freefall.c
new file mode 100644
index 000000000..5e44b20b1
--- /dev/null
+++ b/tools/laptop/freefall/freefall.c
@@ -0,0 +1,174 @@
+/* Disk protection for HP/DELL machines.
+ *
+ * Copyright 2008 Eric Piel
+ * Copyright 2009 Pavel Machek <pavel@ucw.cz>
+ * Copyright 2012 Sonal Santan
+ * Copyright 2014 Pali Rohár <pali.rohar@gmail.com>
+ *
+ * GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sched.h>
+#include <syslog.h>
+
+static int noled;
+static char unload_heads_path[64];
+static char device_path[32];
+static const char app_name[] = "FREE FALL";
+
+static int set_unload_heads_path(char *device)
+{
+ if (strlen(device) <= 5 || strncmp(device, "/dev/", 5) != 0)
+ return -EINVAL;
+ strncpy(device_path, device, sizeof(device_path) - 1);
+
+ snprintf(unload_heads_path, sizeof(unload_heads_path) - 1,
+ "/sys/block/%s/device/unload_heads", device+5);
+ return 0;
+}
+
+static int valid_disk(void)
+{
+ int fd = open(unload_heads_path, O_RDONLY);
+
+ if (fd < 0) {
+ perror(unload_heads_path);
+ return 0;
+ }
+
+ close(fd);
+ return 1;
+}
+
+static void write_int(char *path, int i)
+{
+ char buf[1024];
+ int fd = open(path, O_RDWR);
+
+ if (fd < 0) {
+ perror("open");
+ exit(1);
+ }
+
+ sprintf(buf, "%d", i);
+
+ if (write(fd, buf, strlen(buf)) != strlen(buf)) {
+ perror("write");
+ exit(1);
+ }
+
+ close(fd);
+}
+
+static void set_led(int on)
+{
+ if (noled)
+ return;
+ write_int("/sys/class/leds/hp::hddprotect/brightness", on);
+}
+
+static void protect(int seconds)
+{
+ const char *str = (seconds == 0) ? "Unparked" : "Parked";
+
+ write_int(unload_heads_path, seconds*1000);
+ syslog(LOG_INFO, "%s %s disk head\n", str, device_path);
+}
+
+static int on_ac(void)
+{
+ /* /sys/class/power_supply/AC0/online */
+ return 1;
+}
+
+static int lid_open(void)
+{
+ /* /proc/acpi/button/lid/LID/state */
+ return 1;
+}
+
+static void ignore_me(int signum)
+{
+ protect(0);
+ set_led(0);
+}
+
+int main(int argc, char **argv)
+{
+ int fd, ret;
+ struct stat st;
+ struct sched_param param;
+
+ if (argc == 1)
+ ret = set_unload_heads_path("/dev/sda");
+ else if (argc == 2)
+ ret = set_unload_heads_path(argv[1]);
+ else
+ ret = -EINVAL;
+
+ if (ret || !valid_disk()) {
+ fprintf(stderr, "usage: %s <device> (default: /dev/sda)\n",
+ argv[0]);
+ exit(1);
+ }
+
+ fd = open("/dev/freefall", O_RDONLY);
+ if (fd < 0) {
+ perror("/dev/freefall");
+ return EXIT_FAILURE;
+ }
+
+ if (stat("/sys/class/leds/hp::hddprotect/brightness", &st))
+ noled = 1;
+
+ if (daemon(0, 0) != 0) {
+ perror("daemon");
+ return EXIT_FAILURE;
+ }
+
+ openlog(app_name, LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL1);
+
+ param.sched_priority = sched_get_priority_max(SCHED_FIFO);
+ sched_setscheduler(0, SCHED_FIFO, &param);
+ mlockall(MCL_CURRENT|MCL_FUTURE);
+
+ signal(SIGALRM, ignore_me);
+
+ for (;;) {
+ unsigned char count;
+
+ ret = read(fd, &count, sizeof(count));
+ alarm(0);
+ if ((ret == -1) && (errno == EINTR)) {
+ /* Alarm expired, time to unpark the heads */
+ continue;
+ }
+
+ if (ret != sizeof(count)) {
+ perror("read");
+ break;
+ }
+
+ protect(21);
+ set_led(1);
+ if (1 || on_ac() || lid_open())
+ alarm(2);
+ else
+ alarm(20);
+ }
+
+ closelog();
+ close(fd);
+ return EXIT_SUCCESS;
+}
diff --git a/tools/leds/.gitignore b/tools/leds/.gitignore
new file mode 100644
index 000000000..ac96d9f53
--- /dev/null
+++ b/tools/leds/.gitignore
@@ -0,0 +1 @@
+uledmon
diff --git a/tools/leds/Makefile b/tools/leds/Makefile
new file mode 100644
index 000000000..7b6bed13d
--- /dev/null
+++ b/tools/leds/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for LEDs tools
+
+CFLAGS = -Wall -Wextra -g -I../../include/uapi
+
+all: uledmon led_hw_brightness_mon
+%: %.c
+ $(CC) $(CFLAGS) -o $@ $^
+
+clean:
+ $(RM) uledmon led_hw_brightness_mon
+
+.PHONY: all clean
diff --git a/tools/leds/led_hw_brightness_mon.c b/tools/leds/led_hw_brightness_mon.c
new file mode 100644
index 000000000..eb65ae988
--- /dev/null
+++ b/tools/leds/led_hw_brightness_mon.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * led_hw_brightness_mon.c
+ *
+ * This program monitors LED brightness level changes having its origin
+ * in hardware/firmware, i.e. outside of kernel control.
+ * A timestamp and brightness value is printed each time the brightness changes.
+ *
+ * Usage: led_hw_brightness_mon <device-name>
+ *
+ * <device-name> is the name of the LED class device to be monitored. Pressing
+ * CTRL+C will exit.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/uleds.h>
+
+int main(int argc, char const *argv[])
+{
+ int fd, ret;
+ char brightness_file_path[LED_MAX_NAME_SIZE + 11];
+ struct pollfd pollfd;
+ struct timespec ts;
+ char buf[11];
+
+ if (argc != 2) {
+ fprintf(stderr, "Requires <device-name> argument\n");
+ return 1;
+ }
+
+ snprintf(brightness_file_path, LED_MAX_NAME_SIZE,
+ "/sys/class/leds/%s/brightness_hw_changed", argv[1]);
+
+ fd = open(brightness_file_path, O_RDONLY);
+ if (fd == -1) {
+ printf("Failed to open %s file\n", brightness_file_path);
+ return 1;
+ }
+
+ /*
+ * read may fail if no hw brightness change has occurred so far,
+ * but it is required to avoid spurious poll notifications in
+ * the opposite case.
+ */
+ read(fd, buf, sizeof(buf));
+
+ pollfd.fd = fd;
+ pollfd.events = POLLPRI;
+
+ while (1) {
+ ret = poll(&pollfd, 1, -1);
+ if (ret == -1) {
+ printf("Failed to poll %s file (%d)\n",
+ brightness_file_path, ret);
+ ret = 1;
+ break;
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+
+ ret = read(fd, buf, sizeof(buf));
+ if (ret < 0)
+ break;
+
+ ret = lseek(pollfd.fd, 0, SEEK_SET);
+ if (ret < 0) {
+ printf("lseek failed (%d)\n", ret);
+ break;
+ }
+
+ printf("[%ld.%09ld] %d\n", ts.tv_sec, ts.tv_nsec, atoi(buf));
+ }
+
+ close(fd);
+
+ return ret;
+}
diff --git a/tools/leds/uledmon.c b/tools/leds/uledmon.c
new file mode 100644
index 000000000..c15a39c1f
--- /dev/null
+++ b/tools/leds/uledmon.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * uledmon.c
+ *
+ * This program creates a new userspace LED class device and monitors it. A
+ * timestamp and brightness value is printed each time the brightness changes.
+ *
+ * Usage: uledmon <device-name>
+ *
+ * <device-name> is the name of the LED class device to be created. Pressing
+ * CTRL+C will exit.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/uleds.h>
+
+int main(int argc, char const *argv[])
+{
+ struct uleds_user_dev uleds_dev;
+ int fd, ret;
+ int brightness;
+ struct timespec ts;
+
+ if (argc != 2) {
+ fprintf(stderr, "Requires <device-name> argument\n");
+ return 1;
+ }
+
+ strncpy(uleds_dev.name, argv[1], LED_MAX_NAME_SIZE);
+ uleds_dev.max_brightness = 100;
+
+ fd = open("/dev/uleds", O_RDWR);
+ if (fd == -1) {
+ perror("Failed to open /dev/uleds");
+ return 1;
+ }
+
+ ret = write(fd, &uleds_dev, sizeof(uleds_dev));
+ if (ret == -1) {
+ perror("Failed to write to /dev/uleds");
+ close(fd);
+ return 1;
+ }
+
+ while (1) {
+ ret = read(fd, &brightness, sizeof(brightness));
+ if (ret == -1) {
+ perror("Failed to read from /dev/uleds");
+ close(fd);
+ return 1;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ printf("[%ld.%09ld] %u\n", ts.tv_sec, ts.tv_nsec, brightness);
+ }
+
+ close(fd);
+
+ return 0;
+}
diff --git a/tools/lib/api/Build b/tools/lib/api/Build
new file mode 100644
index 000000000..6e2373db5
--- /dev/null
+++ b/tools/lib/api/Build
@@ -0,0 +1,9 @@
+libapi-y += fd/
+libapi-y += fs/
+libapi-y += cpu.o
+libapi-y += debug.o
+libapi-y += str_error_r.o
+
+$(OUTPUT)str_error_r.o: ../str_error_r.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
new file mode 100644
index 000000000..a13e9c7f1
--- /dev/null
+++ b/tools/lib/api/Makefile
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../../scripts/Makefile.include
+include ../../scripts/utilities.mak # QUIET_CLEAN
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+CC ?= $(CROSS_COMPILE)gcc
+AR ?= $(CROSS_COMPILE)ar
+LD ?= $(CROSS_COMPILE)ld
+
+MAKEFLAGS += --no-print-directory
+
+LIBFILE = $(OUTPUT)libapi.a
+
+CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -U_FORTIFY_SOURCE -fPIC
+
+ifeq ($(DEBUG),0)
+ifeq ($(CC_NO_CLANG), 0)
+ CFLAGS += -O3
+else
+ CFLAGS += -O6
+endif
+endif
+
+ifeq ($(DEBUG),0)
+ CFLAGS += -D_FORTIFY_SOURCE
+endif
+
+# Treat warnings as errors unless directed not to
+ifneq ($(WERROR),0)
+ CFLAGS += -Werror
+endif
+
+CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+CFLAGS += -I$(srctree)/tools/lib/api
+CFLAGS += -I$(srctree)/tools/include
+
+RM = rm -f
+
+API_IN := $(OUTPUT)libapi-in.o
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
+
+all: fixdep $(LIBFILE)
+
+$(API_IN): FORCE
+ @$(MAKE) $(build)=libapi
+
+$(LIBFILE): $(API_IN)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN)
+
+clean:
+ $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \
+ find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+
+FORCE:
+
+.PHONY: clean FORCE
diff --git a/tools/lib/api/cpu.c b/tools/lib/api/cpu.c
new file mode 100644
index 000000000..4af6d4b7a
--- /dev/null
+++ b/tools/lib/api/cpu.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+
+#include "cpu.h"
+#include "fs/fs.h"
+
+int cpu__get_max_freq(unsigned long long *freq)
+{
+ char entry[PATH_MAX];
+ int cpu;
+
+ if (sysfs__read_int("devices/system/cpu/online", &cpu) < 0)
+ return -1;
+
+ snprintf(entry, sizeof(entry),
+ "devices/system/cpu/cpu%d/cpufreq/cpuinfo_max_freq", cpu);
+
+ return sysfs__read_ull(entry, freq);
+}
diff --git a/tools/lib/api/cpu.h b/tools/lib/api/cpu.h
new file mode 100644
index 000000000..90a102fb2
--- /dev/null
+++ b/tools/lib/api/cpu.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __API_CPU__
+#define __API_CPU__
+
+int cpu__get_max_freq(unsigned long long *freq);
+
+#endif /* __API_CPU__ */
diff --git a/tools/lib/api/debug-internal.h b/tools/lib/api/debug-internal.h
new file mode 100644
index 000000000..80c783497
--- /dev/null
+++ b/tools/lib/api/debug-internal.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __API_DEBUG_INTERNAL_H__
+#define __API_DEBUG_INTERNAL_H__
+
+#include "debug.h"
+
+#define __pr(func, fmt, ...) \
+do { \
+ if ((func)) \
+ (func)("libapi: " fmt, ##__VA_ARGS__); \
+} while (0)
+
+extern libapi_print_fn_t __pr_warning;
+extern libapi_print_fn_t __pr_info;
+extern libapi_print_fn_t __pr_debug;
+
+#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__)
+
+#endif /* __API_DEBUG_INTERNAL_H__ */
diff --git a/tools/lib/api/debug.c b/tools/lib/api/debug.c
new file mode 100644
index 000000000..69b1ba3d1
--- /dev/null
+++ b/tools/lib/api/debug.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdarg.h>
+#include "debug.h"
+#include "debug-internal.h"
+
+static int __base_pr(const char *format, ...)
+{
+ va_list args;
+ int err;
+
+ va_start(args, format);
+ err = vfprintf(stderr, format, args);
+ va_end(args);
+ return err;
+}
+
+libapi_print_fn_t __pr_warning = __base_pr;
+libapi_print_fn_t __pr_info = __base_pr;
+libapi_print_fn_t __pr_debug;
+
+void libapi_set_print(libapi_print_fn_t warn,
+ libapi_print_fn_t info,
+ libapi_print_fn_t debug)
+{
+ __pr_warning = warn;
+ __pr_info = info;
+ __pr_debug = debug;
+}
diff --git a/tools/lib/api/debug.h b/tools/lib/api/debug.h
new file mode 100644
index 000000000..3684dd6e0
--- /dev/null
+++ b/tools/lib/api/debug.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __API_DEBUG_H__
+#define __API_DEBUG_H__
+
+typedef int (*libapi_print_fn_t)(const char *, ...);
+
+void libapi_set_print(libapi_print_fn_t warn,
+ libapi_print_fn_t info,
+ libapi_print_fn_t debug);
+
+#endif /* __API_DEBUG_H__ */
diff --git a/tools/lib/api/fd/Build b/tools/lib/api/fd/Build
new file mode 100644
index 000000000..605d99f6d
--- /dev/null
+++ b/tools/lib/api/fd/Build
@@ -0,0 +1 @@
+libapi-y += array.o
diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c
new file mode 100644
index 000000000..b0a035fc8
--- /dev/null
+++ b/tools/lib/api/fd/array.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright (C) 2014, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include "array.h"
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+void fdarray__init(struct fdarray *fda, int nr_autogrow)
+{
+ fda->entries = NULL;
+ fda->priv = NULL;
+ fda->nr = fda->nr_alloc = 0;
+ fda->nr_autogrow = nr_autogrow;
+}
+
+int fdarray__grow(struct fdarray *fda, int nr)
+{
+ void *priv;
+ int nr_alloc = fda->nr_alloc + nr;
+ size_t psize = sizeof(fda->priv[0]) * nr_alloc;
+ size_t size = sizeof(struct pollfd) * nr_alloc;
+ struct pollfd *entries = realloc(fda->entries, size);
+
+ if (entries == NULL)
+ return -ENOMEM;
+
+ priv = realloc(fda->priv, psize);
+ if (priv == NULL) {
+ free(entries);
+ return -ENOMEM;
+ }
+
+ fda->nr_alloc = nr_alloc;
+ fda->entries = entries;
+ fda->priv = priv;
+ return 0;
+}
+
+struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow)
+{
+ struct fdarray *fda = calloc(1, sizeof(*fda));
+
+ if (fda != NULL) {
+ if (fdarray__grow(fda, nr_alloc)) {
+ free(fda);
+ fda = NULL;
+ } else {
+ fda->nr_autogrow = nr_autogrow;
+ }
+ }
+
+ return fda;
+}
+
+void fdarray__exit(struct fdarray *fda)
+{
+ free(fda->entries);
+ free(fda->priv);
+ fdarray__init(fda, 0);
+}
+
+void fdarray__delete(struct fdarray *fda)
+{
+ fdarray__exit(fda);
+ free(fda);
+}
+
+int fdarray__add(struct fdarray *fda, int fd, short revents)
+{
+ int pos = fda->nr;
+
+ if (fda->nr == fda->nr_alloc &&
+ fdarray__grow(fda, fda->nr_autogrow) < 0)
+ return -ENOMEM;
+
+ fda->entries[fda->nr].fd = fd;
+ fda->entries[fda->nr].events = revents;
+ fda->nr++;
+ return pos;
+}
+
+int fdarray__filter(struct fdarray *fda, short revents,
+ void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
+ void *arg)
+{
+ int fd, nr = 0;
+
+ if (fda->nr == 0)
+ return 0;
+
+ for (fd = 0; fd < fda->nr; ++fd) {
+ if (fda->entries[fd].revents & revents) {
+ if (entry_destructor)
+ entry_destructor(fda, fd, arg);
+
+ continue;
+ }
+
+ if (fd != nr) {
+ fda->entries[nr] = fda->entries[fd];
+ fda->priv[nr] = fda->priv[fd];
+ }
+
+ ++nr;
+ }
+
+ return fda->nr = nr;
+}
+
+int fdarray__poll(struct fdarray *fda, int timeout)
+{
+ return poll(fda->entries, fda->nr, timeout);
+}
+
+int fdarray__fprintf(struct fdarray *fda, FILE *fp)
+{
+ int fd, printed = fprintf(fp, "%d [ ", fda->nr);
+
+ for (fd = 0; fd < fda->nr; ++fd)
+ printed += fprintf(fp, "%s%d", fd ? ", " : "", fda->entries[fd].fd);
+
+ return printed + fprintf(fp, " ]");
+}
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h
new file mode 100644
index 000000000..b39557d1a
--- /dev/null
+++ b/tools/lib/api/fd/array.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __API_FD_ARRAY__
+#define __API_FD_ARRAY__
+
+#include <stdio.h>
+
+struct pollfd;
+
+/**
+ * struct fdarray: Array of file descriptors
+ *
+ * @priv: Per array entry priv area, users should access just its contents,
+ * not set it to anything, as it is kept in synch with @entries, being
+ * realloc'ed, * for instance, in fdarray__{grow,filter}.
+ *
+ * I.e. using 'fda->priv[N].idx = * value' where N < fda->nr is ok,
+ * but doing 'fda->priv = malloc(M)' is not allowed.
+ */
+struct fdarray {
+ int nr;
+ int nr_alloc;
+ int nr_autogrow;
+ struct pollfd *entries;
+ union {
+ int idx;
+ void *ptr;
+ } *priv;
+};
+
+void fdarray__init(struct fdarray *fda, int nr_autogrow);
+void fdarray__exit(struct fdarray *fda);
+
+struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow);
+void fdarray__delete(struct fdarray *fda);
+
+int fdarray__add(struct fdarray *fda, int fd, short revents);
+int fdarray__poll(struct fdarray *fda, int timeout);
+int fdarray__filter(struct fdarray *fda, short revents,
+ void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
+ void *arg);
+int fdarray__grow(struct fdarray *fda, int extra);
+int fdarray__fprintf(struct fdarray *fda, FILE *fp);
+
+static inline int fdarray__available_entries(struct fdarray *fda)
+{
+ return fda->nr_alloc - fda->nr;
+}
+
+#endif /* __API_FD_ARRAY__ */
diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build
new file mode 100644
index 000000000..f4ed9629a
--- /dev/null
+++ b/tools/lib/api/fs/Build
@@ -0,0 +1,2 @@
+libapi-y += fs.o
+libapi-y += tracing_path.o
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
new file mode 100644
index 000000000..4cc69675c
--- /dev/null
+++ b/tools/lib/api/fs/fs.c
@@ -0,0 +1,558 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/vfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mount.h>
+
+#include "fs.h"
+#include "debug-internal.h"
+
+#define _STR(x) #x
+#define STR(x) _STR(x)
+
+#ifndef SYSFS_MAGIC
+#define SYSFS_MAGIC 0x62656572
+#endif
+
+#ifndef PROC_SUPER_MAGIC
+#define PROC_SUPER_MAGIC 0x9fa0
+#endif
+
+#ifndef DEBUGFS_MAGIC
+#define DEBUGFS_MAGIC 0x64626720
+#endif
+
+#ifndef TRACEFS_MAGIC
+#define TRACEFS_MAGIC 0x74726163
+#endif
+
+#ifndef HUGETLBFS_MAGIC
+#define HUGETLBFS_MAGIC 0x958458f6
+#endif
+
+#ifndef BPF_FS_MAGIC
+#define BPF_FS_MAGIC 0xcafe4a11
+#endif
+
+static const char * const sysfs__fs_known_mountpoints[] = {
+ "/sys",
+ 0,
+};
+
+static const char * const procfs__known_mountpoints[] = {
+ "/proc",
+ 0,
+};
+
+#ifndef DEBUGFS_DEFAULT_PATH
+#define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug"
+#endif
+
+static const char * const debugfs__known_mountpoints[] = {
+ DEBUGFS_DEFAULT_PATH,
+ "/debug",
+ 0,
+};
+
+
+#ifndef TRACEFS_DEFAULT_PATH
+#define TRACEFS_DEFAULT_PATH "/sys/kernel/tracing"
+#endif
+
+static const char * const tracefs__known_mountpoints[] = {
+ TRACEFS_DEFAULT_PATH,
+ "/sys/kernel/debug/tracing",
+ "/tracing",
+ "/trace",
+ 0,
+};
+
+static const char * const hugetlbfs__known_mountpoints[] = {
+ 0,
+};
+
+static const char * const bpf_fs__known_mountpoints[] = {
+ "/sys/fs/bpf",
+ 0,
+};
+
+struct fs {
+ const char *name;
+ const char * const *mounts;
+ char path[PATH_MAX];
+ bool found;
+ bool checked;
+ long magic;
+};
+
+enum {
+ FS__SYSFS = 0,
+ FS__PROCFS = 1,
+ FS__DEBUGFS = 2,
+ FS__TRACEFS = 3,
+ FS__HUGETLBFS = 4,
+ FS__BPF_FS = 5,
+};
+
+#ifndef TRACEFS_MAGIC
+#define TRACEFS_MAGIC 0x74726163
+#endif
+
+static struct fs fs__entries[] = {
+ [FS__SYSFS] = {
+ .name = "sysfs",
+ .mounts = sysfs__fs_known_mountpoints,
+ .magic = SYSFS_MAGIC,
+ .checked = false,
+ },
+ [FS__PROCFS] = {
+ .name = "proc",
+ .mounts = procfs__known_mountpoints,
+ .magic = PROC_SUPER_MAGIC,
+ .checked = false,
+ },
+ [FS__DEBUGFS] = {
+ .name = "debugfs",
+ .mounts = debugfs__known_mountpoints,
+ .magic = DEBUGFS_MAGIC,
+ .checked = false,
+ },
+ [FS__TRACEFS] = {
+ .name = "tracefs",
+ .mounts = tracefs__known_mountpoints,
+ .magic = TRACEFS_MAGIC,
+ .checked = false,
+ },
+ [FS__HUGETLBFS] = {
+ .name = "hugetlbfs",
+ .mounts = hugetlbfs__known_mountpoints,
+ .magic = HUGETLBFS_MAGIC,
+ .checked = false,
+ },
+ [FS__BPF_FS] = {
+ .name = "bpf",
+ .mounts = bpf_fs__known_mountpoints,
+ .magic = BPF_FS_MAGIC,
+ .checked = false,
+ },
+};
+
+static bool fs__read_mounts(struct fs *fs)
+{
+ bool found = false;
+ char type[100];
+ FILE *fp;
+
+ fp = fopen("/proc/mounts", "r");
+ if (fp == NULL)
+ return NULL;
+
+ while (!found &&
+ fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
+ fs->path, type) == 2) {
+
+ if (strcmp(type, fs->name) == 0)
+ found = true;
+ }
+
+ fclose(fp);
+ fs->checked = true;
+ return fs->found = found;
+}
+
+static int fs__valid_mount(const char *fs, long magic)
+{
+ struct statfs st_fs;
+
+ if (statfs(fs, &st_fs) < 0)
+ return -ENOENT;
+ else if ((long)st_fs.f_type != magic)
+ return -ENOENT;
+
+ return 0;
+}
+
+static bool fs__check_mounts(struct fs *fs)
+{
+ const char * const *ptr;
+
+ ptr = fs->mounts;
+ while (*ptr) {
+ if (fs__valid_mount(*ptr, fs->magic) == 0) {
+ fs->found = true;
+ strcpy(fs->path, *ptr);
+ return true;
+ }
+ ptr++;
+ }
+
+ return false;
+}
+
+static void mem_toupper(char *f, size_t len)
+{
+ while (len) {
+ *f = toupper(*f);
+ f++;
+ len--;
+ }
+}
+
+/*
+ * Check for "NAME_PATH" environment variable to override fs location (for
+ * testing). This matches the recommendation in Documentation/admin-guide/sysfs-rules.rst
+ * for SYSFS_PATH.
+ */
+static bool fs__env_override(struct fs *fs)
+{
+ char *override_path;
+ size_t name_len = strlen(fs->name);
+ /* name + "_PATH" + '\0' */
+ char upper_name[name_len + 5 + 1];
+
+ memcpy(upper_name, fs->name, name_len);
+ mem_toupper(upper_name, name_len);
+ strcpy(&upper_name[name_len], "_PATH");
+
+ override_path = getenv(upper_name);
+ if (!override_path)
+ return false;
+
+ fs->found = true;
+ fs->checked = true;
+ strncpy(fs->path, override_path, sizeof(fs->path) - 1);
+ fs->path[sizeof(fs->path) - 1] = '\0';
+ return true;
+}
+
+static const char *fs__get_mountpoint(struct fs *fs)
+{
+ if (fs__env_override(fs))
+ return fs->path;
+
+ if (fs__check_mounts(fs))
+ return fs->path;
+
+ if (fs__read_mounts(fs))
+ return fs->path;
+
+ return NULL;
+}
+
+static const char *fs__mountpoint(int idx)
+{
+ struct fs *fs = &fs__entries[idx];
+
+ if (fs->found)
+ return (const char *)fs->path;
+
+ /* the mount point was already checked for the mount point
+ * but and did not exist, so return NULL to avoid scanning again.
+ * This makes the found and not found paths cost equivalent
+ * in case of multiple calls.
+ */
+ if (fs->checked)
+ return NULL;
+
+ return fs__get_mountpoint(fs);
+}
+
+static const char *mount_overload(struct fs *fs)
+{
+ size_t name_len = strlen(fs->name);
+ /* "PERF_" + name + "_ENVIRONMENT" + '\0' */
+ char upper_name[5 + name_len + 12 + 1];
+
+ snprintf(upper_name, name_len, "PERF_%s_ENVIRONMENT", fs->name);
+ mem_toupper(upper_name, name_len);
+
+ return getenv(upper_name) ?: *fs->mounts;
+}
+
+static const char *fs__mount(int idx)
+{
+ struct fs *fs = &fs__entries[idx];
+ const char *mountpoint;
+
+ if (fs__mountpoint(idx))
+ return (const char *)fs->path;
+
+ mountpoint = mount_overload(fs);
+
+ if (mount(NULL, mountpoint, fs->name, 0, NULL) < 0)
+ return NULL;
+
+ return fs__check_mounts(fs) ? fs->path : NULL;
+}
+
+#define FS(name, idx) \
+const char *name##__mountpoint(void) \
+{ \
+ return fs__mountpoint(idx); \
+} \
+ \
+const char *name##__mount(void) \
+{ \
+ return fs__mount(idx); \
+} \
+ \
+bool name##__configured(void) \
+{ \
+ return name##__mountpoint() != NULL; \
+}
+
+FS(sysfs, FS__SYSFS);
+FS(procfs, FS__PROCFS);
+FS(debugfs, FS__DEBUGFS);
+FS(tracefs, FS__TRACEFS);
+FS(hugetlbfs, FS__HUGETLBFS);
+FS(bpf_fs, FS__BPF_FS);
+
+int filename__read_int(const char *filename, int *value)
+{
+ char line[64];
+ int fd = open(filename, O_RDONLY), err = -1;
+
+ if (fd < 0)
+ return -1;
+
+ if (read(fd, line, sizeof(line)) > 0) {
+ *value = atoi(line);
+ err = 0;
+ }
+
+ close(fd);
+ return err;
+}
+
+static int filename__read_ull_base(const char *filename,
+ unsigned long long *value, int base)
+{
+ char line[64];
+ int fd = open(filename, O_RDONLY), err = -1;
+
+ if (fd < 0)
+ return -1;
+
+ if (read(fd, line, sizeof(line)) > 0) {
+ *value = strtoull(line, NULL, base);
+ if (*value != ULLONG_MAX)
+ err = 0;
+ }
+
+ close(fd);
+ return err;
+}
+
+/*
+ * Parses @value out of @filename with strtoull.
+ * By using 16 for base to treat the number as hex.
+ */
+int filename__read_xll(const char *filename, unsigned long long *value)
+{
+ return filename__read_ull_base(filename, value, 16);
+}
+
+/*
+ * Parses @value out of @filename with strtoull.
+ * By using 0 for base, the strtoull detects the
+ * base automatically (see man strtoull).
+ */
+int filename__read_ull(const char *filename, unsigned long long *value)
+{
+ return filename__read_ull_base(filename, value, 0);
+}
+
+#define STRERR_BUFSIZE 128 /* For the buffer size of strerror_r */
+
+int filename__read_str(const char *filename, char **buf, size_t *sizep)
+{
+ size_t size = 0, alloc_size = 0;
+ void *bf = NULL, *nbf;
+ int fd, n, err = 0;
+ char sbuf[STRERR_BUFSIZE];
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ return -errno;
+
+ do {
+ if (size == alloc_size) {
+ alloc_size += BUFSIZ;
+ nbf = realloc(bf, alloc_size);
+ if (!nbf) {
+ err = -ENOMEM;
+ break;
+ }
+
+ bf = nbf;
+ }
+
+ n = read(fd, bf + size, alloc_size - size);
+ if (n < 0) {
+ if (size) {
+ pr_warning("read failed %d: %s\n", errno,
+ strerror_r(errno, sbuf, sizeof(sbuf)));
+ err = 0;
+ } else
+ err = -errno;
+
+ break;
+ }
+
+ size += n;
+ } while (n > 0);
+
+ if (!err) {
+ *sizep = size;
+ *buf = bf;
+ } else
+ free(bf);
+
+ close(fd);
+ return err;
+}
+
+int filename__write_int(const char *filename, int value)
+{
+ int fd = open(filename, O_WRONLY), err = -1;
+ char buf[64];
+
+ if (fd < 0)
+ return err;
+
+ sprintf(buf, "%d", value);
+ if (write(fd, buf, sizeof(buf)) == sizeof(buf))
+ err = 0;
+
+ close(fd);
+ return err;
+}
+
+int procfs__read_str(const char *entry, char **buf, size_t *sizep)
+{
+ char path[PATH_MAX];
+ const char *procfs = procfs__mountpoint();
+
+ if (!procfs)
+ return -1;
+
+ snprintf(path, sizeof(path), "%s/%s", procfs, entry);
+
+ return filename__read_str(path, buf, sizep);
+}
+
+static int sysfs__read_ull_base(const char *entry,
+ unsigned long long *value, int base)
+{
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return -1;
+
+ snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
+
+ return filename__read_ull_base(path, value, base);
+}
+
+int sysfs__read_xll(const char *entry, unsigned long long *value)
+{
+ return sysfs__read_ull_base(entry, value, 16);
+}
+
+int sysfs__read_ull(const char *entry, unsigned long long *value)
+{
+ return sysfs__read_ull_base(entry, value, 0);
+}
+
+int sysfs__read_int(const char *entry, int *value)
+{
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return -1;
+
+ snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
+
+ return filename__read_int(path, value);
+}
+
+int sysfs__read_str(const char *entry, char **buf, size_t *sizep)
+{
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return -1;
+
+ snprintf(path, sizeof(path), "%s/%s", sysfs, entry);
+
+ return filename__read_str(path, buf, sizep);
+}
+
+int sysfs__read_bool(const char *entry, bool *value)
+{
+ char *buf;
+ size_t size;
+ int ret;
+
+ ret = sysfs__read_str(entry, &buf, &size);
+ if (ret < 0)
+ return ret;
+
+ switch (buf[0]) {
+ case '1':
+ case 'y':
+ case 'Y':
+ *value = true;
+ break;
+ case '0':
+ case 'n':
+ case 'N':
+ *value = false;
+ break;
+ default:
+ ret = -1;
+ }
+
+ free(buf);
+
+ return ret;
+}
+int sysctl__read_int(const char *sysctl, int *value)
+{
+ char path[PATH_MAX];
+ const char *procfs = procfs__mountpoint();
+
+ if (!procfs)
+ return -1;
+
+ snprintf(path, sizeof(path), "%s/sys/%s", procfs, sysctl);
+
+ return filename__read_int(path, value);
+}
+
+int sysfs__write_int(const char *entry, int value)
+{
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return -1;
+
+ if (snprintf(path, sizeof(path), "%s/%s", sysfs, entry) >= PATH_MAX)
+ return -1;
+
+ return filename__write_int(path, value);
+}
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
new file mode 100644
index 000000000..3b70003e7
--- /dev/null
+++ b/tools/lib/api/fs/fs.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __API_FS__
+#define __API_FS__
+
+#include <stdbool.h>
+#include <unistd.h>
+
+/*
+ * On most systems <limits.h> would have given us this, but not on some systems
+ * (e.g. GNU/Hurd).
+ */
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#define FS(name) \
+ const char *name##__mountpoint(void); \
+ const char *name##__mount(void); \
+ bool name##__configured(void); \
+
+/*
+ * The xxxx__mountpoint() entry points find the first match mount point for each
+ * filesystems listed below, where xxxx is the filesystem type.
+ *
+ * The interface is as follows:
+ *
+ * - If a mount point is found on first call, it is cached and used for all
+ * subsequent calls.
+ *
+ * - If a mount point is not found, NULL is returned on first call and all
+ * subsequent calls.
+ */
+FS(sysfs)
+FS(procfs)
+FS(debugfs)
+FS(tracefs)
+FS(hugetlbfs)
+FS(bpf_fs)
+
+#undef FS
+
+
+int filename__read_int(const char *filename, int *value);
+int filename__read_ull(const char *filename, unsigned long long *value);
+int filename__read_xll(const char *filename, unsigned long long *value);
+int filename__read_str(const char *filename, char **buf, size_t *sizep);
+
+int filename__write_int(const char *filename, int value);
+
+int procfs__read_str(const char *entry, char **buf, size_t *sizep);
+
+int sysctl__read_int(const char *sysctl, int *value);
+int sysfs__read_int(const char *entry, int *value);
+int sysfs__read_ull(const char *entry, unsigned long long *value);
+int sysfs__read_xll(const char *entry, unsigned long long *value);
+int sysfs__read_str(const char *entry, char **buf, size_t *sizep);
+int sysfs__read_bool(const char *entry, bool *value);
+
+int sysfs__write_int(const char *entry, int value);
+#endif /* __API_FS__ */
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
new file mode 100644
index 000000000..5afb11b30
--- /dev/null
+++ b/tools/lib/api/fs/tracing_path.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/string.h>
+#include <errno.h>
+#include <unistd.h>
+#include "fs.h"
+
+#include "tracing_path.h"
+
+static char tracing_mnt[PATH_MAX] = "/sys/kernel/debug";
+static char tracing_path[PATH_MAX] = "/sys/kernel/debug/tracing";
+static char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events";
+
+static void __tracing_path_set(const char *tracing, const char *mountpoint)
+{
+ snprintf(tracing_mnt, sizeof(tracing_mnt), "%s", mountpoint);
+ snprintf(tracing_path, sizeof(tracing_path), "%s/%s",
+ mountpoint, tracing);
+ snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s",
+ mountpoint, tracing, "events");
+}
+
+static const char *tracing_path_tracefs_mount(void)
+{
+ const char *mnt;
+
+ mnt = tracefs__mount();
+ if (!mnt)
+ return NULL;
+
+ __tracing_path_set("", mnt);
+
+ return tracing_path;
+}
+
+static const char *tracing_path_debugfs_mount(void)
+{
+ const char *mnt;
+
+ mnt = debugfs__mount();
+ if (!mnt)
+ return NULL;
+
+ __tracing_path_set("tracing/", mnt);
+
+ return tracing_path;
+}
+
+const char *tracing_path_mount(void)
+{
+ const char *mnt;
+
+ mnt = tracing_path_tracefs_mount();
+ if (mnt)
+ return mnt;
+
+ mnt = tracing_path_debugfs_mount();
+
+ return mnt;
+}
+
+void tracing_path_set(const char *mntpt)
+{
+ __tracing_path_set("tracing/", mntpt);
+}
+
+char *get_tracing_file(const char *name)
+{
+ char *file;
+
+ if (asprintf(&file, "%s/%s", tracing_path_mount(), name) < 0)
+ return NULL;
+
+ return file;
+}
+
+void put_tracing_file(char *file)
+{
+ free(file);
+}
+
+char *get_events_file(const char *name)
+{
+ char *file;
+
+ if (asprintf(&file, "%s/events/%s", tracing_path_mount(), name) < 0)
+ return NULL;
+
+ return file;
+}
+
+void put_events_file(char *file)
+{
+ free(file);
+}
+
+DIR *tracing_events__opendir(void)
+{
+ DIR *dir = NULL;
+ char *path = get_tracing_file("events");
+
+ if (path) {
+ dir = opendir(path);
+ put_events_file(path);
+ }
+
+ return dir;
+}
+
+int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
+ const char *sys, const char *name)
+{
+ char sbuf[128];
+ char filename[PATH_MAX];
+
+ snprintf(filename, PATH_MAX, "%s/%s", sys, name ?: "*");
+
+ switch (err) {
+ case ENOENT:
+ /*
+ * We will get here if we can't find the tracepoint, but one of
+ * debugfs or tracefs is configured, which means you probably
+ * want some tracepoint which wasn't compiled in your kernel.
+ * - jirka
+ */
+ if (debugfs__configured() || tracefs__configured()) {
+ /* sdt markers */
+ if (!strncmp(filename, "sdt_", 4)) {
+ snprintf(buf, size,
+ "Error:\tFile %s/%s not found.\n"
+ "Hint:\tSDT event cannot be directly recorded on.\n"
+ "\tPlease first use 'perf probe %s:%s' before recording it.\n",
+ tracing_events_path, filename, sys, name);
+ } else {
+ snprintf(buf, size,
+ "Error:\tFile %s/%s not found.\n"
+ "Hint:\tPerhaps this kernel misses some CONFIG_ setting to enable this feature?.\n",
+ tracing_events_path, filename);
+ }
+ break;
+ }
+ snprintf(buf, size, "%s",
+ "Error:\tUnable to find debugfs/tracefs\n"
+ "Hint:\tWas your kernel compiled with debugfs/tracefs support?\n"
+ "Hint:\tIs the debugfs/tracefs filesystem mounted?\n"
+ "Hint:\tTry 'sudo mount -t debugfs nodev /sys/kernel/debug'");
+ break;
+ case EACCES: {
+ snprintf(buf, size,
+ "Error:\tNo permissions to read %s/%s\n"
+ "Hint:\tTry 'sudo mount -o remount,mode=755 %s'\n",
+ tracing_events_path, filename, tracing_path_mount());
+ }
+ break;
+ default:
+ snprintf(buf, size, "%s", str_error_r(err, sbuf, sizeof(sbuf)));
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h
new file mode 100644
index 000000000..a19136b08
--- /dev/null
+++ b/tools/lib/api/fs/tracing_path.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __API_FS_TRACING_PATH_H
+#define __API_FS_TRACING_PATH_H
+
+#include <linux/types.h>
+#include <dirent.h>
+
+DIR *tracing_events__opendir(void);
+
+void tracing_path_set(const char *mountpoint);
+const char *tracing_path_mount(void);
+
+char *get_tracing_file(const char *name);
+void put_tracing_file(char *file);
+
+char *get_events_file(const char *name);
+void put_events_file(char *file);
+
+#define zput_events_file(ptr) ({ free(*ptr); *ptr = NULL; })
+
+int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name);
+#endif /* __API_FS_TRACING_PATH_H */
diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c
new file mode 100644
index 000000000..38748b0e3
--- /dev/null
+++ b/tools/lib/bitmap.c
@@ -0,0 +1,75 @@
+/*
+ * From lib/bitmap.c
+ * Helper functions for bitmap.h.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+#include <linux/bitmap.h>
+
+int __bitmap_weight(const unsigned long *bitmap, int bits)
+{
+ int k, w = 0, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; k++)
+ w += hweight_long(bitmap[k]);
+
+ if (bits % BITS_PER_LONG)
+ w += hweight_long(bitmap[k] & BITMAP_LAST_WORD_MASK(bits));
+
+ return w;
+}
+
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, int bits)
+{
+ int k;
+ int nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++)
+ dst[k] = bitmap1[k] | bitmap2[k];
+}
+
+size_t bitmap_scnprintf(unsigned long *bitmap, int nbits,
+ char *buf, size_t size)
+{
+ /* current bit is 'cur', most recently seen range is [rbot, rtop] */
+ int cur, rbot, rtop;
+ bool first = true;
+ size_t ret = 0;
+
+ rbot = cur = find_first_bit(bitmap, nbits);
+ while (cur < nbits) {
+ rtop = cur;
+ cur = find_next_bit(bitmap, nbits, cur + 1);
+ if (cur < nbits && cur <= rtop + 1)
+ continue;
+
+ if (!first)
+ ret += scnprintf(buf + ret, size - ret, ",");
+
+ first = false;
+
+ ret += scnprintf(buf + ret, size - ret, "%d", rbot);
+ if (rbot < rtop)
+ ret += scnprintf(buf + ret, size - ret, "-%d", rtop);
+
+ rbot = cur;
+ }
+ return ret;
+}
+
+int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int bits)
+{
+ unsigned int k;
+ unsigned int lim = bits/BITS_PER_LONG;
+ unsigned long result = 0;
+
+ for (k = 0; k < lim; k++)
+ result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+ if (bits % BITS_PER_LONG)
+ result |= (dst[k] = bitmap1[k] & bitmap2[k] &
+ BITMAP_LAST_WORD_MASK(bits));
+ return result != 0;
+}
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore
new file mode 100644
index 000000000..f81e549dd
--- /dev/null
+++ b/tools/lib/bpf/.gitignore
@@ -0,0 +1,2 @@
+libbpf_version.h
+FEATURE-DUMP.libbpf
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
new file mode 100644
index 000000000..6eb9bacd1
--- /dev/null
+++ b/tools/lib/bpf/Build
@@ -0,0 +1 @@
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
new file mode 100644
index 000000000..6f57d3844
--- /dev/null
+++ b/tools/lib/bpf/Makefile
@@ -0,0 +1,221 @@
+# SPDX-License-Identifier: GPL-2.0
+# Most of this file is copied from tools/lib/traceevent/Makefile
+
+BPF_VERSION = 0
+BPF_PATCHLEVEL = 0
+BPF_EXTRAVERSION = 1
+
+MAKEFLAGS += --no-print-directory
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+include $(srctree)/tools/scripts/Makefile.arch
+
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+
+prefix ?= /usr/local
+libdir = $(prefix)/$(libdir_relative)
+man_dir = $(prefix)/share/man
+man_dir_SQ = '$(subst ','\'',$(man_dir))'
+
+export man_dir man_dir_SQ INSTALL
+export DESTDIR DESTDIR_SQ
+
+include ../../scripts/Makefile.include
+
+# copy a bit from Linux kbuild
+
+ifeq ("$(origin V)", "command line")
+ VERBOSE = $(V)
+endif
+ifndef VERBOSE
+ VERBOSE = 0
+endif
+
+FEATURE_USER = .libbpf
+FEATURE_TESTS = libelf libelf-mmap bpf reallocarray
+FEATURE_DISPLAY = libelf bpf
+
+INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi -I$(srctree)/tools/perf
+FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
+
+check_feat := 1
+NON_CHECK_FEAT_TARGETS := clean TAGS tags cscope help
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
+ check_feat := 0
+endif
+endif
+
+ifeq ($(check_feat),1)
+ifeq ($(FEATURES_DUMP),)
+include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
+endif
+
+export prefix libdir src obj
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
+
+LIB_FILE = libbpf.a libbpf.so
+
+VERSION = $(BPF_VERSION)
+PATCHLEVEL = $(BPF_PATCHLEVEL)
+EXTRAVERSION = $(BPF_EXTRAVERSION)
+
+OBJ = $@
+N =
+
+LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION)
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+ CFLAGS := $(EXTRA_CFLAGS)
+else
+ CFLAGS := -g -Wall
+endif
+
+ifeq ($(feature-libelf-mmap), 1)
+ override CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
+endif
+
+ifeq ($(feature-reallocarray), 0)
+ override CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
+endif
+
+# Append required CFLAGS
+override CFLAGS += $(EXTRA_WARNINGS)
+override CFLAGS += -Werror -Wall
+override CFLAGS += -fPIC
+override CFLAGS += $(INCLUDES)
+
+ifeq ($(VERBOSE),1)
+ Q =
+else
+ Q = @
+endif
+
+# Disable command line variables (CFLAGS) override from top
+# level Makefile (perf), otherwise build Makefile will get
+# the same command line setup.
+MAKEOVERRIDES=
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
+
+BPF_IN := $(OUTPUT)libbpf-in.o
+LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
+
+CMD_TARGETS = $(LIB_FILE)
+
+TARGETS = $(CMD_TARGETS)
+
+all: fixdep
+ $(Q)$(MAKE) all_cmd
+
+all_cmd: $(CMD_TARGETS)
+
+$(BPF_IN): force elfdep bpfdep
+ @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \
+ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \
+ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true
+ @(test -f ../../include/uapi/linux/bpf_common.h -a -f ../../../include/uapi/linux/bpf_common.h && ( \
+ (diff -B ../../include/uapi/linux/bpf_common.h ../../../include/uapi/linux/bpf_common.h >/dev/null) || \
+ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf_common.h' differs from latest version at 'include/uapi/linux/bpf_common.h'" >&2 )) || true
+ @(test -f ../../include/uapi/linux/netlink.h -a -f ../../../include/uapi/linux/netlink.h && ( \
+ (diff -B ../../include/uapi/linux/netlink.h ../../../include/uapi/linux/netlink.h >/dev/null) || \
+ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/netlink.h' differs from latest version at 'include/uapi/linux/netlink.h'" >&2 )) || true
+ @(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \
+ (diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \
+ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true
+ $(Q)$(MAKE) $(build)=libbpf
+
+$(OUTPUT)libbpf.so: $(BPF_IN)
+ $(QUIET_LINK)$(CC) --shared $^ -o $@
+
+$(OUTPUT)libbpf.a: $(BPF_IN)
+ $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
+
+define do_install
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: all_cmd
+ $(call QUIET_INSTALL, $(LIB_FILE)) \
+ $(call do_install,$(LIB_FILE),$(libdir_SQ))
+
+install_headers:
+ $(call QUIET_INSTALL, headers) \
+ $(call do_install,bpf.h,$(prefix)/include/bpf,644); \
+ $(call do_install,libbpf.h,$(prefix)/include/bpf,644);
+ $(call do_install,btf.h,$(prefix)/include/bpf,644);
+
+install: install_lib
+
+### Cleaning rules
+
+config-clean:
+ $(call QUIET_CLEAN, config)
+ $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null
+
+clean:
+ $(call QUIET_CLEAN, libbpf) $(RM) *.o *~ $(TARGETS) *.a *.so .*.d .*.cmd \
+ $(RM) LIBBPF-CFLAGS
+ $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf
+
+
+
+PHONY += force elfdep bpfdep
+force:
+
+elfdep:
+ @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi
+
+bpfdep:
+ @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi
+
+# Declare the contents of the .PHONY variable as phony. We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
new file mode 100644
index 000000000..f28ae6a68
--- /dev/null
+++ b/tools/lib/bpf/bpf.c
@@ -0,0 +1,691 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+/*
+ * common eBPF ELF operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ */
+
+#include <stdlib.h>
+#include <memory.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <linux/bpf.h>
+#include "bpf.h"
+#include "libbpf.h"
+#include "nlattr.h"
+#include <linux/rtnetlink.h>
+#include <linux/if_link.h>
+#include <sys/socket.h>
+#include <errno.h>
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+/*
+ * When building perf, unistd.h is overridden. __NR_bpf is
+ * required to be defined explicitly.
+ */
+#ifndef __NR_bpf
+# if defined(__i386__)
+# define __NR_bpf 357
+# elif defined(__x86_64__)
+# define __NR_bpf 321
+# elif defined(__aarch64__)
+# define __NR_bpf 280
+# elif defined(__sparc__)
+# define __NR_bpf 349
+# elif defined(__s390__)
+# define __NR_bpf 351
+# elif defined(__arc__)
+# define __NR_bpf 280
+# else
+# error __NR_bpf not defined. libbpf does not support your arch.
+# endif
+#endif
+
+#ifndef min
+#define min(x, y) ((x) < (y) ? (x) : (y))
+#endif
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
+ unsigned int size)
+{
+ return syscall(__NR_bpf, cmd, attr, size);
+}
+
+static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size)
+{
+ int fd;
+
+ do {
+ fd = sys_bpf(BPF_PROG_LOAD, attr, size);
+ } while (fd < 0 && errno == EAGAIN);
+
+ return fd;
+}
+
+int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)
+{
+ __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0;
+ union bpf_attr attr;
+ int ret;
+
+ memset(&attr, '\0', sizeof(attr));
+
+ attr.map_type = create_attr->map_type;
+ attr.key_size = create_attr->key_size;
+ attr.value_size = create_attr->value_size;
+ attr.max_entries = create_attr->max_entries;
+ attr.map_flags = create_attr->map_flags;
+ memcpy(attr.map_name, create_attr->name,
+ min(name_len, BPF_OBJ_NAME_LEN - 1));
+ attr.numa_node = create_attr->numa_node;
+ attr.btf_fd = create_attr->btf_fd;
+ attr.btf_key_type_id = create_attr->btf_key_type_id;
+ attr.btf_value_type_id = create_attr->btf_value_type_id;
+ attr.map_ifindex = create_attr->map_ifindex;
+ attr.inner_map_fd = create_attr->inner_map_fd;
+
+ ret = sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ if (ret < 0 && errno == EINVAL && create_attr->name) {
+ /* Retry the same syscall, but without the name.
+ * Pre v4.14 kernels don't support map names.
+ */
+ memset(attr.map_name, 0, sizeof(attr.map_name));
+ return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+ }
+ return ret;
+}
+
+int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size, int max_entries,
+ __u32 map_flags, int node)
+{
+ struct bpf_create_map_attr map_attr = {};
+
+ map_attr.name = name;
+ map_attr.map_type = map_type;
+ map_attr.map_flags = map_flags;
+ map_attr.key_size = key_size;
+ map_attr.value_size = value_size;
+ map_attr.max_entries = max_entries;
+ if (node >= 0) {
+ map_attr.numa_node = node;
+ map_attr.map_flags |= BPF_F_NUMA_NODE;
+ }
+
+ return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map(enum bpf_map_type map_type, int key_size,
+ int value_size, int max_entries, __u32 map_flags)
+{
+ struct bpf_create_map_attr map_attr = {};
+
+ map_attr.map_type = map_type;
+ map_attr.map_flags = map_flags;
+ map_attr.key_size = key_size;
+ map_attr.value_size = value_size;
+ map_attr.max_entries = max_entries;
+
+ return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size, int max_entries,
+ __u32 map_flags)
+{
+ struct bpf_create_map_attr map_attr = {};
+
+ map_attr.name = name;
+ map_attr.map_type = map_type;
+ map_attr.map_flags = map_flags;
+ map_attr.key_size = key_size;
+ map_attr.value_size = value_size;
+ map_attr.max_entries = max_entries;
+
+ return bpf_create_map_xattr(&map_attr);
+}
+
+int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int inner_map_fd, int max_entries,
+ __u32 map_flags, int node)
+{
+ __u32 name_len = name ? strlen(name) : 0;
+ union bpf_attr attr;
+
+ memset(&attr, '\0', sizeof(attr));
+
+ attr.map_type = map_type;
+ attr.key_size = key_size;
+ attr.value_size = 4;
+ attr.inner_map_fd = inner_map_fd;
+ attr.max_entries = max_entries;
+ attr.map_flags = map_flags;
+ memcpy(attr.map_name, name, min(name_len, BPF_OBJ_NAME_LEN - 1));
+
+ if (node >= 0) {
+ attr.map_flags |= BPF_F_NUMA_NODE;
+ attr.numa_node = node;
+ }
+
+ return sys_bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
+ int key_size, int inner_map_fd, int max_entries,
+ __u32 map_flags)
+{
+ return bpf_create_map_in_map_node(map_type, name, key_size,
+ inner_map_fd, max_entries, map_flags,
+ -1);
+}
+
+int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+ char *log_buf, size_t log_buf_sz)
+{
+ union bpf_attr attr;
+ __u32 name_len;
+ int fd;
+
+ if (!load_attr)
+ return -EINVAL;
+
+ name_len = load_attr->name ? strlen(load_attr->name) : 0;
+
+ bzero(&attr, sizeof(attr));
+ attr.prog_type = load_attr->prog_type;
+ attr.expected_attach_type = load_attr->expected_attach_type;
+ attr.insn_cnt = (__u32)load_attr->insns_cnt;
+ attr.insns = ptr_to_u64(load_attr->insns);
+ attr.license = ptr_to_u64(load_attr->license);
+ attr.log_buf = ptr_to_u64(NULL);
+ attr.log_size = 0;
+ attr.log_level = 0;
+ attr.kern_version = load_attr->kern_version;
+ attr.prog_ifindex = load_attr->prog_ifindex;
+ memcpy(attr.prog_name, load_attr->name,
+ min(name_len, BPF_OBJ_NAME_LEN - 1));
+
+ fd = sys_bpf_prog_load(&attr, sizeof(attr));
+ if (fd >= 0 || !log_buf || !log_buf_sz)
+ return fd;
+
+ /* Try again with log */
+ attr.log_buf = ptr_to_u64(log_buf);
+ attr.log_size = log_buf_sz;
+ attr.log_level = 1;
+ log_buf[0] = 0;
+ return sys_bpf_prog_load(&attr, sizeof(attr));
+}
+
+int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, const char *license,
+ __u32 kern_version, char *log_buf,
+ size_t log_buf_sz)
+{
+ struct bpf_load_program_attr load_attr;
+
+ memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+ load_attr.prog_type = type;
+ load_attr.expected_attach_type = 0;
+ load_attr.name = NULL;
+ load_attr.insns = insns;
+ load_attr.insns_cnt = insns_cnt;
+ load_attr.license = license;
+ load_attr.kern_version = kern_version;
+
+ return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
+}
+
+int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, __u32 prog_flags, const char *license,
+ __u32 kern_version, char *log_buf, size_t log_buf_sz,
+ int log_level)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.prog_type = type;
+ attr.insn_cnt = (__u32)insns_cnt;
+ attr.insns = ptr_to_u64(insns);
+ attr.license = ptr_to_u64(license);
+ attr.log_buf = ptr_to_u64(log_buf);
+ attr.log_size = log_buf_sz;
+ attr.log_level = log_level;
+ log_buf[0] = 0;
+ attr.kern_version = kern_version;
+ attr.prog_flags = prog_flags;
+
+ return sys_bpf_prog_load(&attr, sizeof(attr));
+}
+
+int bpf_map_update_elem(int fd, const void *key, const void *value,
+ __u64 flags)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+ attr.flags = flags;
+
+ return sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_lookup_elem(int fd, const void *key, void *value)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.value = ptr_to_u64(value);
+
+ return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_delete_elem(int fd, const void *key)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+
+ return sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+}
+
+int bpf_map_get_next_key(int fd, const void *key, void *next_key)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.map_fd = fd;
+ attr.key = ptr_to_u64(key);
+ attr.next_key = ptr_to_u64(next_key);
+
+ return sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+}
+
+int bpf_obj_pin(int fd, const char *pathname)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.pathname = ptr_to_u64((void *)pathname);
+ attr.bpf_fd = fd;
+
+ return sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+}
+
+int bpf_obj_get(const char *pathname)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.pathname = ptr_to_u64((void *)pathname);
+
+ return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr));
+}
+
+int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type,
+ unsigned int flags)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.target_fd = target_fd;
+ attr.attach_bpf_fd = prog_fd;
+ attr.attach_type = type;
+ attr.attach_flags = flags;
+
+ return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.target_fd = target_fd;
+ attr.attach_type = type;
+
+ return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.target_fd = target_fd;
+ attr.attach_bpf_fd = prog_fd;
+ attr.attach_type = type;
+
+ return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+}
+
+int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
+ __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
+{
+ union bpf_attr attr;
+ int ret;
+
+ bzero(&attr, sizeof(attr));
+ attr.query.target_fd = target_fd;
+ attr.query.attach_type = type;
+ attr.query.query_flags = query_flags;
+ attr.query.prog_cnt = *prog_cnt;
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+
+ ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
+ if (attach_flags)
+ *attach_flags = attr.query.attach_flags;
+ *prog_cnt = attr.query.prog_cnt;
+ return ret;
+}
+
+int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
+ void *data_out, __u32 *size_out, __u32 *retval,
+ __u32 *duration)
+{
+ union bpf_attr attr;
+ int ret;
+
+ bzero(&attr, sizeof(attr));
+ attr.test.prog_fd = prog_fd;
+ attr.test.data_in = ptr_to_u64(data);
+ attr.test.data_out = ptr_to_u64(data_out);
+ attr.test.data_size_in = size;
+ attr.test.repeat = repeat;
+
+ ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+ if (size_out)
+ *size_out = attr.test.data_size_out;
+ if (retval)
+ *retval = attr.test.retval;
+ if (duration)
+ *duration = attr.test.duration;
+ return ret;
+}
+
+int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id)
+{
+ union bpf_attr attr;
+ int err;
+
+ bzero(&attr, sizeof(attr));
+ attr.start_id = start_id;
+
+ err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr));
+ if (!err)
+ *next_id = attr.next_id;
+
+ return err;
+}
+
+int bpf_map_get_next_id(__u32 start_id, __u32 *next_id)
+{
+ union bpf_attr attr;
+ int err;
+
+ bzero(&attr, sizeof(attr));
+ attr.start_id = start_id;
+
+ err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr));
+ if (!err)
+ *next_id = attr.next_id;
+
+ return err;
+}
+
+int bpf_prog_get_fd_by_id(__u32 id)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.prog_id = id;
+
+ return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_map_get_fd_by_id(__u32 id)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.map_id = id;
+
+ return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_btf_get_fd_by_id(__u32 id)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.btf_id = id;
+
+ return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
+{
+ union bpf_attr attr;
+ int err;
+
+ bzero(&attr, sizeof(attr));
+ attr.info.bpf_fd = prog_fd;
+ attr.info.info_len = *info_len;
+ attr.info.info = ptr_to_u64(info);
+
+ err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
+ if (!err)
+ *info_len = attr.info.info_len;
+
+ return err;
+}
+
+int bpf_raw_tracepoint_open(const char *name, int prog_fd)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.raw_tracepoint.name = ptr_to_u64(name);
+ attr.raw_tracepoint.prog_fd = prog_fd;
+
+ return sys_bpf(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+}
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+ struct sockaddr_nl sa;
+ int sock, seq = 0, len, ret = -1;
+ char buf[4096];
+ struct nlattr *nla, *nla_xdp;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifinfo;
+ char attrbuf[64];
+ } req;
+ struct nlmsghdr *nh;
+ struct nlmsgerr *err;
+ socklen_t addrlen;
+ int one = 1;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.nl_family = AF_NETLINK;
+
+ sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (sock < 0) {
+ return -errno;
+ }
+
+ if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one)) < 0) {
+ fprintf(stderr, "Netlink error reporting not supported\n");
+ }
+
+ if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ addrlen = sizeof(sa);
+ if (getsockname(sock, (struct sockaddr *)&sa, &addrlen) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ if (addrlen != sizeof(sa)) {
+ ret = -LIBBPF_ERRNO__INTERNAL;
+ goto cleanup;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_SETLINK;
+ req.nh.nlmsg_pid = 0;
+ req.nh.nlmsg_seq = ++seq;
+ req.ifinfo.ifi_family = AF_UNSPEC;
+ req.ifinfo.ifi_index = ifindex;
+
+ /* started nested attribute for XDP */
+ nla = (struct nlattr *)(((char *)&req)
+ + NLMSG_ALIGN(req.nh.nlmsg_len));
+ nla->nla_type = NLA_F_NESTED | IFLA_XDP;
+ nla->nla_len = NLA_HDRLEN;
+
+ /* add XDP fd */
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = IFLA_XDP_FD;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+ nla->nla_len += nla_xdp->nla_len;
+
+ /* if user passed in any flags, add those too */
+ if (flags) {
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = IFLA_XDP_FLAGS;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(flags);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &flags, sizeof(flags));
+ nla->nla_len += nla_xdp->nla_len;
+ }
+
+ req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ len = recv(sock, buf, sizeof(buf), 0);
+ if (len < 0) {
+ ret = -errno;
+ goto cleanup;
+ }
+
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_pid != sa.nl_pid) {
+ ret = -LIBBPF_ERRNO__WRNGPID;
+ goto cleanup;
+ }
+ if (nh->nlmsg_seq != seq) {
+ ret = -LIBBPF_ERRNO__INVSEQ;
+ goto cleanup;
+ }
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ ret = err->error;
+ nla_dump_errormsg(nh);
+ goto cleanup;
+ case NLMSG_DONE:
+ break;
+ default:
+ break;
+ }
+ }
+
+ ret = 0;
+
+cleanup:
+ close(sock);
+ return ret;
+}
+
+int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+ bool do_log)
+{
+ union bpf_attr attr = {};
+ int fd;
+
+ attr.btf = ptr_to_u64(btf);
+ attr.btf_size = btf_size;
+
+retry:
+ if (do_log && log_buf && log_buf_size) {
+ attr.btf_log_level = 1;
+ attr.btf_log_size = log_buf_size;
+ attr.btf_log_buf = ptr_to_u64(log_buf);
+ }
+
+ fd = sys_bpf(BPF_BTF_LOAD, &attr, sizeof(attr));
+ if (fd == -1 && !do_log && log_buf && log_buf_size) {
+ do_log = true;
+ goto retry;
+ }
+
+ return fd;
+}
+
+int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
+ __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
+ __u64 *probe_addr)
+{
+ union bpf_attr attr = {};
+ int err;
+
+ attr.task_fd_query.pid = pid;
+ attr.task_fd_query.fd = fd;
+ attr.task_fd_query.flags = flags;
+ attr.task_fd_query.buf = ptr_to_u64(buf);
+ attr.task_fd_query.buf_len = *buf_len;
+
+ err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+ *buf_len = attr.task_fd_query.buf_len;
+ *prog_id = attr.task_fd_query.prog_id;
+ *fd_type = attr.task_fd_query.fd_type;
+ *probe_offset = attr.task_fd_query.probe_offset;
+ *probe_addr = attr.task_fd_query.probe_addr;
+
+ return err;
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
new file mode 100644
index 000000000..7f2e947d9
--- /dev/null
+++ b/tools/lib/bpf/bpf.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+/*
+ * common eBPF ELF operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ */
+#ifndef __BPF_BPF_H
+#define __BPF_BPF_H
+
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+struct bpf_create_map_attr {
+ const char *name;
+ enum bpf_map_type map_type;
+ __u32 map_flags;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 numa_node;
+ __u32 btf_fd;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
+ __u32 map_ifindex;
+ __u32 inner_map_fd;
+};
+
+int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr);
+int bpf_create_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size, int max_entries,
+ __u32 map_flags, int node);
+int bpf_create_map_name(enum bpf_map_type map_type, const char *name,
+ int key_size, int value_size, int max_entries,
+ __u32 map_flags);
+int bpf_create_map(enum bpf_map_type map_type, int key_size, int value_size,
+ int max_entries, __u32 map_flags);
+int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,
+ int key_size, int inner_map_fd, int max_entries,
+ __u32 map_flags, int node);
+int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,
+ int key_size, int inner_map_fd, int max_entries,
+ __u32 map_flags);
+
+struct bpf_load_program_attr {
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ const char *name;
+ const struct bpf_insn *insns;
+ size_t insns_cnt;
+ const char *license;
+ __u32 kern_version;
+ __u32 prog_ifindex;
+};
+
+/* Recommend log buffer size */
+#define BPF_LOG_BUF_SIZE (256 * 1024)
+int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
+ char *log_buf, size_t log_buf_sz);
+int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, const char *license,
+ __u32 kern_version, char *log_buf,
+ size_t log_buf_sz);
+int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, __u32 prog_flags,
+ const char *license, __u32 kern_version,
+ char *log_buf, size_t log_buf_sz, int log_level);
+
+int bpf_map_update_elem(int fd, const void *key, const void *value,
+ __u64 flags);
+
+int bpf_map_lookup_elem(int fd, const void *key, void *value);
+int bpf_map_delete_elem(int fd, const void *key);
+int bpf_map_get_next_key(int fd, const void *key, void *next_key);
+int bpf_obj_pin(int fd, const char *pathname);
+int bpf_obj_get(const char *pathname);
+int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type,
+ unsigned int flags);
+int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
+int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type);
+int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size,
+ void *data_out, __u32 *size_out, __u32 *retval,
+ __u32 *duration);
+int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
+int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
+int bpf_prog_get_fd_by_id(__u32 id);
+int bpf_map_get_fd_by_id(__u32 id);
+int bpf_btf_get_fd_by_id(__u32 id);
+int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
+int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
+ __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt);
+int bpf_raw_tracepoint_open(const char *name, int prog_fd);
+int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size,
+ bool do_log);
+int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
+ __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
+ __u64 *probe_addr);
+#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
new file mode 100644
index 000000000..cf94b0770
--- /dev/null
+++ b/tools/lib/bpf/btf.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: LGPL-2.1
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include "btf.h"
+#include "bpf.h"
+
+#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); }
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+#define BTF_MAX_NR_TYPES 65535
+
+#define IS_MODIFIER(k) (((k) == BTF_KIND_TYPEDEF) || \
+ ((k) == BTF_KIND_VOLATILE) || \
+ ((k) == BTF_KIND_CONST) || \
+ ((k) == BTF_KIND_RESTRICT))
+
+static struct btf_type btf_void;
+
+struct btf {
+ union {
+ struct btf_header *hdr;
+ void *data;
+ };
+ struct btf_type **types;
+ const char *strings;
+ void *nohdr_data;
+ __u32 nr_types;
+ __u32 types_size;
+ __u32 data_size;
+ int fd;
+};
+
+static int btf_add_type(struct btf *btf, struct btf_type *t)
+{
+ if (btf->types_size - btf->nr_types < 2) {
+ struct btf_type **new_types;
+ __u32 expand_by, new_size;
+
+ if (btf->types_size == BTF_MAX_NR_TYPES)
+ return -E2BIG;
+
+ expand_by = max(btf->types_size >> 2, 16);
+ new_size = min(BTF_MAX_NR_TYPES, btf->types_size + expand_by);
+
+ new_types = realloc(btf->types, sizeof(*new_types) * new_size);
+ if (!new_types)
+ return -ENOMEM;
+
+ if (btf->nr_types == 0)
+ new_types[0] = &btf_void;
+
+ btf->types = new_types;
+ btf->types_size = new_size;
+ }
+
+ btf->types[++(btf->nr_types)] = t;
+
+ return 0;
+}
+
+static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log)
+{
+ const struct btf_header *hdr = btf->hdr;
+ __u32 meta_left;
+
+ if (btf->data_size < sizeof(struct btf_header)) {
+ elog("BTF header not found\n");
+ return -EINVAL;
+ }
+
+ if (hdr->magic != BTF_MAGIC) {
+ elog("Invalid BTF magic:%x\n", hdr->magic);
+ return -EINVAL;
+ }
+
+ if (hdr->version != BTF_VERSION) {
+ elog("Unsupported BTF version:%u\n", hdr->version);
+ return -ENOTSUP;
+ }
+
+ if (hdr->flags) {
+ elog("Unsupported BTF flags:%x\n", hdr->flags);
+ return -ENOTSUP;
+ }
+
+ meta_left = btf->data_size - sizeof(*hdr);
+ if (!meta_left) {
+ elog("BTF has no data\n");
+ return -EINVAL;
+ }
+
+ if (meta_left < hdr->type_off) {
+ elog("Invalid BTF type section offset:%u\n", hdr->type_off);
+ return -EINVAL;
+ }
+
+ if (meta_left < hdr->str_off) {
+ elog("Invalid BTF string section offset:%u\n", hdr->str_off);
+ return -EINVAL;
+ }
+
+ if (hdr->type_off >= hdr->str_off) {
+ elog("BTF type section offset >= string section offset. No type?\n");
+ return -EINVAL;
+ }
+
+ if (hdr->type_off & 0x02) {
+ elog("BTF type section is not aligned to 4 bytes\n");
+ return -EINVAL;
+ }
+
+ btf->nohdr_data = btf->hdr + 1;
+
+ return 0;
+}
+
+static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log)
+{
+ const struct btf_header *hdr = btf->hdr;
+ const char *start = btf->nohdr_data + hdr->str_off;
+ const char *end = start + btf->hdr->str_len;
+
+ if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET ||
+ start[0] || end[-1]) {
+ elog("Invalid BTF string section\n");
+ return -EINVAL;
+ }
+
+ btf->strings = start;
+
+ return 0;
+}
+
+static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log)
+{
+ struct btf_header *hdr = btf->hdr;
+ void *nohdr_data = btf->nohdr_data;
+ void *next_type = nohdr_data + hdr->type_off;
+ void *end_type = nohdr_data + hdr->str_off;
+
+ while (next_type < end_type) {
+ struct btf_type *t = next_type;
+ __u16 vlen = BTF_INFO_VLEN(t->info);
+ int err;
+
+ next_type += sizeof(*t);
+ switch (BTF_INFO_KIND(t->info)) {
+ case BTF_KIND_INT:
+ next_type += sizeof(int);
+ break;
+ case BTF_KIND_ARRAY:
+ next_type += sizeof(struct btf_array);
+ break;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ next_type += vlen * sizeof(struct btf_member);
+ break;
+ case BTF_KIND_ENUM:
+ next_type += vlen * sizeof(struct btf_enum);
+ break;
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_PTR:
+ case BTF_KIND_FWD:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ break;
+ default:
+ elog("Unsupported BTF_KIND:%u\n",
+ BTF_INFO_KIND(t->info));
+ return -EINVAL;
+ }
+
+ err = btf_add_type(btf, t);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
+{
+ if (type_id > btf->nr_types)
+ return NULL;
+
+ return btf->types[type_id];
+}
+
+static bool btf_type_is_void(const struct btf_type *t)
+{
+ return t == &btf_void || BTF_INFO_KIND(t->info) == BTF_KIND_FWD;
+}
+
+static bool btf_type_is_void_or_null(const struct btf_type *t)
+{
+ return !t || btf_type_is_void(t);
+}
+
+static __s64 btf_type_size(const struct btf_type *t)
+{
+ switch (BTF_INFO_KIND(t->info)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ return t->size;
+ case BTF_KIND_PTR:
+ return sizeof(void *);
+ default:
+ return -EINVAL;
+ }
+}
+
+#define MAX_RESOLVE_DEPTH 32
+
+__s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
+{
+ const struct btf_array *array;
+ const struct btf_type *t;
+ __u32 nelems = 1;
+ __s64 size = -1;
+ int i;
+
+ t = btf__type_by_id(btf, type_id);
+ for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t);
+ i++) {
+ size = btf_type_size(t);
+ if (size >= 0)
+ break;
+
+ switch (BTF_INFO_KIND(t->info)) {
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ type_id = t->type;
+ break;
+ case BTF_KIND_ARRAY:
+ array = (const struct btf_array *)(t + 1);
+ if (nelems && array->nelems > UINT32_MAX / nelems)
+ return -E2BIG;
+ nelems *= array->nelems;
+ type_id = array->type;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ t = btf__type_by_id(btf, type_id);
+ }
+
+ if (size < 0)
+ return -EINVAL;
+
+ if (nelems && size > UINT32_MAX / nelems)
+ return -E2BIG;
+
+ return nelems * size;
+}
+
+int btf__resolve_type(const struct btf *btf, __u32 type_id)
+{
+ const struct btf_type *t;
+ int depth = 0;
+
+ t = btf__type_by_id(btf, type_id);
+ while (depth < MAX_RESOLVE_DEPTH &&
+ !btf_type_is_void_or_null(t) &&
+ IS_MODIFIER(BTF_INFO_KIND(t->info))) {
+ type_id = t->type;
+ t = btf__type_by_id(btf, type_id);
+ depth++;
+ }
+
+ if (depth == MAX_RESOLVE_DEPTH || btf_type_is_void_or_null(t))
+ return -EINVAL;
+
+ return type_id;
+}
+
+__s32 btf__find_by_name(const struct btf *btf, const char *type_name)
+{
+ __u32 i;
+
+ if (!strcmp(type_name, "void"))
+ return 0;
+
+ for (i = 1; i <= btf->nr_types; i++) {
+ const struct btf_type *t = btf->types[i];
+ const char *name = btf__name_by_offset(btf, t->name_off);
+
+ if (name && !strcmp(type_name, name))
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+void btf__free(struct btf *btf)
+{
+ if (!btf)
+ return;
+
+ if (btf->fd != -1)
+ close(btf->fd);
+
+ free(btf->data);
+ free(btf->types);
+ free(btf);
+}
+
+struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log)
+{
+ __u32 log_buf_size = 0;
+ char *log_buf = NULL;
+ struct btf *btf;
+ int err;
+
+ btf = calloc(1, sizeof(struct btf));
+ if (!btf)
+ return ERR_PTR(-ENOMEM);
+
+ btf->fd = -1;
+
+ if (err_log) {
+ log_buf = malloc(BPF_LOG_BUF_SIZE);
+ if (!log_buf) {
+ err = -ENOMEM;
+ goto done;
+ }
+ *log_buf = 0;
+ log_buf_size = BPF_LOG_BUF_SIZE;
+ }
+
+ btf->data = malloc(size);
+ if (!btf->data) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ memcpy(btf->data, data, size);
+ btf->data_size = size;
+
+ btf->fd = bpf_load_btf(btf->data, btf->data_size,
+ log_buf, log_buf_size, false);
+
+ if (btf->fd == -1) {
+ err = -errno;
+ elog("Error loading BTF: %s(%d)\n", strerror(errno), errno);
+ if (log_buf && *log_buf)
+ elog("%s\n", log_buf);
+ goto done;
+ }
+
+ err = btf_parse_hdr(btf, err_log);
+ if (err)
+ goto done;
+
+ err = btf_parse_str_sec(btf, err_log);
+ if (err)
+ goto done;
+
+ err = btf_parse_type_sec(btf, err_log);
+
+done:
+ free(log_buf);
+
+ if (err) {
+ btf__free(btf);
+ return ERR_PTR(err);
+ }
+
+ return btf;
+}
+
+int btf__fd(const struct btf *btf)
+{
+ return btf->fd;
+}
+
+const char *btf__name_by_offset(const struct btf *btf, __u32 offset)
+{
+ if (offset < btf->hdr->str_len)
+ return &btf->strings[offset];
+ else
+ return NULL;
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
new file mode 100644
index 000000000..4897e0724
--- /dev/null
+++ b/tools/lib/bpf/btf.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef __BPF_BTF_H
+#define __BPF_BTF_H
+
+#include <linux/types.h>
+
+#define BTF_ELF_SEC ".BTF"
+
+struct btf;
+struct btf_type;
+
+typedef int (*btf_print_fn_t)(const char *, ...)
+ __attribute__((format(printf, 1, 2)));
+
+void btf__free(struct btf *btf);
+struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log);
+__s32 btf__find_by_name(const struct btf *btf, const char *type_name);
+const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id);
+__s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
+int btf__resolve_type(const struct btf *btf, __u32 type_id);
+int btf__fd(const struct btf *btf);
+const char *btf__name_by_offset(const struct btf *btf, __u32 offset);
+
+#endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
new file mode 100644
index 000000000..249fa8d73
--- /dev/null
+++ b/tools/lib/bpf/libbpf.c
@@ -0,0 +1,2431 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+/*
+ * Common eBPF ELF object loading operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ * Copyright (C) 2017 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <libgen.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <perf-sys.h>
+#include <asm/unistd.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/list.h>
+#include <linux/limits.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <tools/libc_compat.h>
+#include <libelf.h>
+#include <gelf.h>
+
+#include "libbpf.h"
+#include "bpf.h"
+#include "btf.h"
+#include "str_error.h"
+
+#ifndef EM_BPF
+#define EM_BPF 247
+#endif
+
+#ifndef BPF_FS_MAGIC
+#define BPF_FS_MAGIC 0xcafe4a11
+#endif
+
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+
+__printf(1, 2)
+static int __base_pr(const char *format, ...)
+{
+ va_list args;
+ int err;
+
+ va_start(args, format);
+ err = vfprintf(stderr, format, args);
+ va_end(args);
+ return err;
+}
+
+static __printf(1, 2) libbpf_print_fn_t __pr_warning = __base_pr;
+static __printf(1, 2) libbpf_print_fn_t __pr_info = __base_pr;
+static __printf(1, 2) libbpf_print_fn_t __pr_debug;
+
+#define __pr(func, fmt, ...) \
+do { \
+ if ((func)) \
+ (func)("libbpf: " fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__)
+#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__)
+#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__)
+
+void libbpf_set_print(libbpf_print_fn_t warn,
+ libbpf_print_fn_t info,
+ libbpf_print_fn_t debug)
+{
+ __pr_warning = warn;
+ __pr_info = info;
+ __pr_debug = debug;
+}
+
+#define STRERR_BUFSIZE 128
+
+#define CHECK_ERR(action, err, out) do { \
+ err = action; \
+ if (err) \
+ goto out; \
+} while(0)
+
+
+/* Copied from tools/perf/util/util.h */
+#ifndef zfree
+# define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
+#endif
+
+#ifndef zclose
+# define zclose(fd) ({ \
+ int ___err = 0; \
+ if ((fd) >= 0) \
+ ___err = close((fd)); \
+ fd = -1; \
+ ___err; })
+#endif
+
+#ifdef HAVE_LIBELF_MMAP_SUPPORT
+# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ_MMAP
+#else
+# define LIBBPF_ELF_C_READ_MMAP ELF_C_READ
+#endif
+
+/*
+ * bpf_prog should be a better name but it has been used in
+ * linux/filter.h.
+ */
+struct bpf_program {
+ /* Index in elf obj file, for relocation use. */
+ int idx;
+ char *name;
+ int prog_ifindex;
+ char *section_name;
+ struct bpf_insn *insns;
+ size_t insns_cnt, main_prog_cnt;
+ enum bpf_prog_type type;
+
+ struct reloc_desc {
+ enum {
+ RELO_LD64,
+ RELO_CALL,
+ } type;
+ int insn_idx;
+ union {
+ int map_idx;
+ int text_off;
+ };
+ } *reloc_desc;
+ int nr_reloc;
+
+ struct {
+ int nr;
+ int *fds;
+ } instances;
+ bpf_program_prep_t preprocessor;
+
+ struct bpf_object *obj;
+ void *priv;
+ bpf_program_clear_priv_t clear_priv;
+
+ enum bpf_attach_type expected_attach_type;
+};
+
+struct bpf_map {
+ int fd;
+ char *name;
+ size_t offset;
+ int map_ifindex;
+ struct bpf_map_def def;
+ __u32 btf_key_type_id;
+ __u32 btf_value_type_id;
+ void *priv;
+ bpf_map_clear_priv_t clear_priv;
+};
+
+static LIST_HEAD(bpf_objects_list);
+
+struct bpf_object {
+ char license[64];
+ u32 kern_version;
+
+ struct bpf_program *programs;
+ size_t nr_programs;
+ struct bpf_map *maps;
+ size_t nr_maps;
+
+ bool loaded;
+ bool has_pseudo_calls;
+
+ /*
+ * Information when doing elf related work. Only valid if fd
+ * is valid.
+ */
+ struct {
+ int fd;
+ void *obj_buf;
+ size_t obj_buf_sz;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ Elf_Data *symbols;
+ size_t strtabidx;
+ struct {
+ GElf_Shdr shdr;
+ Elf_Data *data;
+ } *reloc;
+ int nr_reloc;
+ int maps_shndx;
+ int text_shndx;
+ } efile;
+ /*
+ * All loaded bpf_object is linked in a list, which is
+ * hidden to caller. bpf_objects__<func> handlers deal with
+ * all objects.
+ */
+ struct list_head list;
+
+ struct btf *btf;
+
+ void *priv;
+ bpf_object_clear_priv_t clear_priv;
+
+ char path[];
+};
+#define obj_elf_valid(o) ((o)->efile.elf)
+
+static void bpf_program__unload(struct bpf_program *prog)
+{
+ int i;
+
+ if (!prog)
+ return;
+
+ /*
+ * If the object is opened but the program was never loaded,
+ * it is possible that prog->instances.nr == -1.
+ */
+ if (prog->instances.nr > 0) {
+ for (i = 0; i < prog->instances.nr; i++)
+ zclose(prog->instances.fds[i]);
+ } else if (prog->instances.nr != -1) {
+ pr_warning("Internal error: instances.nr is %d\n",
+ prog->instances.nr);
+ }
+
+ prog->instances.nr = -1;
+ zfree(&prog->instances.fds);
+}
+
+static void bpf_program__exit(struct bpf_program *prog)
+{
+ if (!prog)
+ return;
+
+ if (prog->clear_priv)
+ prog->clear_priv(prog, prog->priv);
+
+ prog->priv = NULL;
+ prog->clear_priv = NULL;
+
+ bpf_program__unload(prog);
+ zfree(&prog->name);
+ zfree(&prog->section_name);
+ zfree(&prog->insns);
+ zfree(&prog->reloc_desc);
+
+ prog->nr_reloc = 0;
+ prog->insns_cnt = 0;
+ prog->idx = -1;
+}
+
+static int
+bpf_program__init(void *data, size_t size, char *section_name, int idx,
+ struct bpf_program *prog)
+{
+ if (size < sizeof(struct bpf_insn)) {
+ pr_warning("corrupted section '%s'\n", section_name);
+ return -EINVAL;
+ }
+
+ bzero(prog, sizeof(*prog));
+
+ prog->section_name = strdup(section_name);
+ if (!prog->section_name) {
+ pr_warning("failed to alloc name for prog under section(%d) %s\n",
+ idx, section_name);
+ goto errout;
+ }
+
+ prog->insns = malloc(size);
+ if (!prog->insns) {
+ pr_warning("failed to alloc insns for prog under section %s\n",
+ section_name);
+ goto errout;
+ }
+ prog->insns_cnt = size / sizeof(struct bpf_insn);
+ memcpy(prog->insns, data,
+ prog->insns_cnt * sizeof(struct bpf_insn));
+ prog->idx = idx;
+ prog->instances.fds = NULL;
+ prog->instances.nr = -1;
+ prog->type = BPF_PROG_TYPE_KPROBE;
+
+ return 0;
+errout:
+ bpf_program__exit(prog);
+ return -ENOMEM;
+}
+
+static int
+bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,
+ char *section_name, int idx)
+{
+ struct bpf_program prog, *progs;
+ int nr_progs, err;
+
+ err = bpf_program__init(data, size, section_name, idx, &prog);
+ if (err)
+ return err;
+
+ progs = obj->programs;
+ nr_progs = obj->nr_programs;
+
+ progs = reallocarray(progs, nr_progs + 1, sizeof(progs[0]));
+ if (!progs) {
+ /*
+ * In this case the original obj->programs
+ * is still valid, so don't need special treat for
+ * bpf_close_object().
+ */
+ pr_warning("failed to alloc a new program under section '%s'\n",
+ section_name);
+ bpf_program__exit(&prog);
+ return -ENOMEM;
+ }
+
+ pr_debug("found program %s\n", prog.section_name);
+ obj->programs = progs;
+ obj->nr_programs = nr_progs + 1;
+ prog.obj = obj;
+ progs[nr_progs] = prog;
+ return 0;
+}
+
+static int
+bpf_object__init_prog_names(struct bpf_object *obj)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ struct bpf_program *prog;
+ size_t pi, si;
+
+ for (pi = 0; pi < obj->nr_programs; pi++) {
+ const char *name = NULL;
+
+ prog = &obj->programs[pi];
+
+ for (si = 0; si < symbols->d_size / sizeof(GElf_Sym) && !name;
+ si++) {
+ GElf_Sym sym;
+
+ if (!gelf_getsym(symbols, si, &sym))
+ continue;
+ if (sym.st_shndx != prog->idx)
+ continue;
+ if (GELF_ST_BIND(sym.st_info) != STB_GLOBAL)
+ continue;
+
+ name = elf_strptr(obj->efile.elf,
+ obj->efile.strtabidx,
+ sym.st_name);
+ if (!name) {
+ pr_warning("failed to get sym name string for prog %s\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__LIBELF;
+ }
+ }
+
+ if (!name && prog->idx == obj->efile.text_shndx)
+ name = ".text";
+
+ if (!name) {
+ pr_warning("failed to find sym for prog %s\n",
+ prog->section_name);
+ return -EINVAL;
+ }
+
+ prog->name = strdup(name);
+ if (!prog->name) {
+ pr_warning("failed to allocate memory for prog sym %s\n",
+ name);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+static struct bpf_object *bpf_object__new(const char *path,
+ void *obj_buf,
+ size_t obj_buf_sz)
+{
+ struct bpf_object *obj;
+
+ obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);
+ if (!obj) {
+ pr_warning("alloc memory failed for %s\n", path);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ strcpy(obj->path, path);
+ obj->efile.fd = -1;
+
+ /*
+ * Caller of this function should also calls
+ * bpf_object__elf_finish() after data collection to return
+ * obj_buf to user. If not, we should duplicate the buffer to
+ * avoid user freeing them before elf finish.
+ */
+ obj->efile.obj_buf = obj_buf;
+ obj->efile.obj_buf_sz = obj_buf_sz;
+ obj->efile.maps_shndx = -1;
+
+ obj->loaded = false;
+
+ INIT_LIST_HEAD(&obj->list);
+ list_add(&obj->list, &bpf_objects_list);
+ return obj;
+}
+
+static void bpf_object__elf_finish(struct bpf_object *obj)
+{
+ if (!obj_elf_valid(obj))
+ return;
+
+ if (obj->efile.elf) {
+ elf_end(obj->efile.elf);
+ obj->efile.elf = NULL;
+ }
+ obj->efile.symbols = NULL;
+
+ zfree(&obj->efile.reloc);
+ obj->efile.nr_reloc = 0;
+ zclose(obj->efile.fd);
+ obj->efile.obj_buf = NULL;
+ obj->efile.obj_buf_sz = 0;
+}
+
+static int bpf_object__elf_init(struct bpf_object *obj)
+{
+ int err = 0;
+ GElf_Ehdr *ep;
+
+ if (obj_elf_valid(obj)) {
+ pr_warning("elf init: internal error\n");
+ return -LIBBPF_ERRNO__LIBELF;
+ }
+
+ if (obj->efile.obj_buf_sz > 0) {
+ /*
+ * obj_buf should have been validated by
+ * bpf_object__open_buffer().
+ */
+ obj->efile.elf = elf_memory(obj->efile.obj_buf,
+ obj->efile.obj_buf_sz);
+ } else {
+ obj->efile.fd = open(obj->path, O_RDONLY);
+ if (obj->efile.fd < 0) {
+ char errmsg[STRERR_BUFSIZE];
+ char *cp = str_error(errno, errmsg, sizeof(errmsg));
+
+ pr_warning("failed to open %s: %s\n", obj->path, cp);
+ return -errno;
+ }
+
+ obj->efile.elf = elf_begin(obj->efile.fd,
+ LIBBPF_ELF_C_READ_MMAP,
+ NULL);
+ }
+
+ if (!obj->efile.elf) {
+ pr_warning("failed to open %s as ELF file\n",
+ obj->path);
+ err = -LIBBPF_ERRNO__LIBELF;
+ goto errout;
+ }
+
+ if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) {
+ pr_warning("failed to get EHDR from %s\n",
+ obj->path);
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto errout;
+ }
+ ep = &obj->efile.ehdr;
+
+ /* Old LLVM set e_machine to EM_NONE */
+ if ((ep->e_type != ET_REL) || (ep->e_machine && (ep->e_machine != EM_BPF))) {
+ pr_warning("%s is not an eBPF object file\n",
+ obj->path);
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto errout;
+ }
+
+ return 0;
+errout:
+ bpf_object__elf_finish(obj);
+ return err;
+}
+
+static int
+bpf_object__check_endianness(struct bpf_object *obj)
+{
+ static unsigned int const endian = 1;
+
+ switch (obj->efile.ehdr.e_ident[EI_DATA]) {
+ case ELFDATA2LSB:
+ /* We are big endian, BPF obj is little endian. */
+ if (*(unsigned char const *)&endian != 1)
+ goto mismatch;
+ break;
+
+ case ELFDATA2MSB:
+ /* We are little endian, BPF obj is big endian. */
+ if (*(unsigned char const *)&endian != 0)
+ goto mismatch;
+ break;
+ default:
+ return -LIBBPF_ERRNO__ENDIAN;
+ }
+
+ return 0;
+
+mismatch:
+ pr_warning("Error: endianness mismatch.\n");
+ return -LIBBPF_ERRNO__ENDIAN;
+}
+
+static int
+bpf_object__init_license(struct bpf_object *obj,
+ void *data, size_t size)
+{
+ memcpy(obj->license, data,
+ min(size, sizeof(obj->license) - 1));
+ pr_debug("license of %s is %s\n", obj->path, obj->license);
+ return 0;
+}
+
+static int
+bpf_object__init_kversion(struct bpf_object *obj,
+ void *data, size_t size)
+{
+ u32 kver;
+
+ if (size != sizeof(kver)) {
+ pr_warning("invalid kver section in %s\n", obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ memcpy(&kver, data, sizeof(kver));
+ obj->kern_version = kver;
+ pr_debug("kernel version of %s is %x\n", obj->path,
+ obj->kern_version);
+ return 0;
+}
+
+static int compare_bpf_map(const void *_a, const void *_b)
+{
+ const struct bpf_map *a = _a;
+ const struct bpf_map *b = _b;
+
+ return a->offset - b->offset;
+}
+
+static int
+bpf_object__init_maps(struct bpf_object *obj)
+{
+ int i, map_idx, map_def_sz, nr_maps = 0;
+ Elf_Scn *scn;
+ Elf_Data *data;
+ Elf_Data *symbols = obj->efile.symbols;
+
+ if (obj->efile.maps_shndx < 0)
+ return -EINVAL;
+ if (!symbols)
+ return -EINVAL;
+
+ scn = elf_getscn(obj->efile.elf, obj->efile.maps_shndx);
+ if (scn)
+ data = elf_getdata(scn, NULL);
+ if (!scn || !data) {
+ pr_warning("failed to get Elf_Data from map section %d\n",
+ obj->efile.maps_shndx);
+ return -EINVAL;
+ }
+
+ /*
+ * Count number of maps. Each map has a name.
+ * Array of maps is not supported: only the first element is
+ * considered.
+ *
+ * TODO: Detect array of map and report error.
+ */
+ for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+ GElf_Sym sym;
+
+ if (!gelf_getsym(symbols, i, &sym))
+ continue;
+ if (sym.st_shndx != obj->efile.maps_shndx)
+ continue;
+ nr_maps++;
+ }
+
+ /* Alloc obj->maps and fill nr_maps. */
+ pr_debug("maps in %s: %d maps in %zd bytes\n", obj->path,
+ nr_maps, data->d_size);
+
+ if (!nr_maps)
+ return 0;
+
+ /* Assume equally sized map definitions */
+ map_def_sz = data->d_size / nr_maps;
+ if (!data->d_size || (data->d_size % nr_maps) != 0) {
+ pr_warning("unable to determine map definition size "
+ "section %s, %d maps in %zd bytes\n",
+ obj->path, nr_maps, data->d_size);
+ return -EINVAL;
+ }
+
+ obj->maps = calloc(nr_maps, sizeof(obj->maps[0]));
+ if (!obj->maps) {
+ pr_warning("alloc maps for object failed\n");
+ return -ENOMEM;
+ }
+ obj->nr_maps = nr_maps;
+
+ /*
+ * fill all fd with -1 so won't close incorrect
+ * fd (fd=0 is stdin) when failure (zclose won't close
+ * negative fd)).
+ */
+ for (i = 0; i < nr_maps; i++)
+ obj->maps[i].fd = -1;
+
+ /*
+ * Fill obj->maps using data in "maps" section.
+ */
+ for (i = 0, map_idx = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+ GElf_Sym sym;
+ const char *map_name;
+ struct bpf_map_def *def;
+
+ if (!gelf_getsym(symbols, i, &sym))
+ continue;
+ if (sym.st_shndx != obj->efile.maps_shndx)
+ continue;
+
+ map_name = elf_strptr(obj->efile.elf,
+ obj->efile.strtabidx,
+ sym.st_name);
+ obj->maps[map_idx].offset = sym.st_value;
+ if (sym.st_value + map_def_sz > data->d_size) {
+ pr_warning("corrupted maps section in %s: last map \"%s\" too small\n",
+ obj->path, map_name);
+ return -EINVAL;
+ }
+
+ obj->maps[map_idx].name = strdup(map_name);
+ if (!obj->maps[map_idx].name) {
+ pr_warning("failed to alloc map name\n");
+ return -ENOMEM;
+ }
+ pr_debug("map %d is \"%s\"\n", map_idx,
+ obj->maps[map_idx].name);
+ def = (struct bpf_map_def *)(data->d_buf + sym.st_value);
+ /*
+ * If the definition of the map in the object file fits in
+ * bpf_map_def, copy it. Any extra fields in our version
+ * of bpf_map_def will default to zero as a result of the
+ * calloc above.
+ */
+ if (map_def_sz <= sizeof(struct bpf_map_def)) {
+ memcpy(&obj->maps[map_idx].def, def, map_def_sz);
+ } else {
+ /*
+ * Here the map structure being read is bigger than what
+ * we expect, truncate if the excess bits are all zero.
+ * If they are not zero, reject this map as
+ * incompatible.
+ */
+ char *b;
+ for (b = ((char *)def) + sizeof(struct bpf_map_def);
+ b < ((char *)def) + map_def_sz; b++) {
+ if (*b != 0) {
+ pr_warning("maps section in %s: \"%s\" "
+ "has unrecognized, non-zero "
+ "options\n",
+ obj->path, map_name);
+ return -EINVAL;
+ }
+ }
+ memcpy(&obj->maps[map_idx].def, def,
+ sizeof(struct bpf_map_def));
+ }
+ map_idx++;
+ }
+
+ qsort(obj->maps, obj->nr_maps, sizeof(obj->maps[0]), compare_bpf_map);
+ return 0;
+}
+
+static bool section_have_execinstr(struct bpf_object *obj, int idx)
+{
+ Elf_Scn *scn;
+ GElf_Shdr sh;
+
+ scn = elf_getscn(obj->efile.elf, idx);
+ if (!scn)
+ return false;
+
+ if (gelf_getshdr(scn, &sh) != &sh)
+ return false;
+
+ if (sh.sh_flags & SHF_EXECINSTR)
+ return true;
+
+ return false;
+}
+
+static int bpf_object__elf_collect(struct bpf_object *obj)
+{
+ Elf *elf = obj->efile.elf;
+ GElf_Ehdr *ep = &obj->efile.ehdr;
+ Elf_Scn *scn = NULL;
+ int idx = 0, err = 0;
+
+ /* Elf is corrupted/truncated, avoid calling elf_strptr. */
+ if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) {
+ pr_warning("failed to get e_shstrndx from %s\n",
+ obj->path);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ while ((scn = elf_nextscn(elf, scn)) != NULL) {
+ char *name;
+ GElf_Shdr sh;
+ Elf_Data *data;
+
+ idx++;
+ if (gelf_getshdr(scn, &sh) != &sh) {
+ pr_warning("failed to get section(%d) header from %s\n",
+ idx, obj->path);
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ }
+
+ name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);
+ if (!name) {
+ pr_warning("failed to get section(%d) name from %s\n",
+ idx, obj->path);
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ }
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ pr_warning("failed to get section(%d) data from %s(%s)\n",
+ idx, name, obj->path);
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto out;
+ }
+ pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n",
+ idx, name, (unsigned long)data->d_size,
+ (int)sh.sh_link, (unsigned long)sh.sh_flags,
+ (int)sh.sh_type);
+
+ if (strcmp(name, "license") == 0)
+ err = bpf_object__init_license(obj,
+ data->d_buf,
+ data->d_size);
+ else if (strcmp(name, "version") == 0)
+ err = bpf_object__init_kversion(obj,
+ data->d_buf,
+ data->d_size);
+ else if (strcmp(name, "maps") == 0)
+ obj->efile.maps_shndx = idx;
+ else if (strcmp(name, BTF_ELF_SEC) == 0) {
+ obj->btf = btf__new(data->d_buf, data->d_size,
+ __pr_debug);
+ if (IS_ERR(obj->btf)) {
+ pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n",
+ BTF_ELF_SEC, PTR_ERR(obj->btf));
+ obj->btf = NULL;
+ }
+ } else if (sh.sh_type == SHT_SYMTAB) {
+ if (obj->efile.symbols) {
+ pr_warning("bpf: multiple SYMTAB in %s\n",
+ obj->path);
+ err = -LIBBPF_ERRNO__FORMAT;
+ } else {
+ obj->efile.symbols = data;
+ obj->efile.strtabidx = sh.sh_link;
+ }
+ } else if ((sh.sh_type == SHT_PROGBITS) &&
+ (sh.sh_flags & SHF_EXECINSTR) &&
+ (data->d_size > 0)) {
+ if (strcmp(name, ".text") == 0)
+ obj->efile.text_shndx = idx;
+ err = bpf_object__add_program(obj, data->d_buf,
+ data->d_size, name, idx);
+ if (err) {
+ char errmsg[STRERR_BUFSIZE];
+ char *cp = str_error(-err, errmsg, sizeof(errmsg));
+
+ pr_warning("failed to alloc program %s (%s): %s",
+ name, obj->path, cp);
+ }
+ } else if (sh.sh_type == SHT_REL) {
+ void *reloc = obj->efile.reloc;
+ int nr_reloc = obj->efile.nr_reloc + 1;
+ int sec = sh.sh_info; /* points to other section */
+
+ /* Only do relo for section with exec instructions */
+ if (!section_have_execinstr(obj, sec)) {
+ pr_debug("skip relo %s(%d) for section(%d)\n",
+ name, idx, sec);
+ continue;
+ }
+
+ reloc = reallocarray(reloc, nr_reloc,
+ sizeof(*obj->efile.reloc));
+ if (!reloc) {
+ pr_warning("realloc failed\n");
+ err = -ENOMEM;
+ } else {
+ int n = nr_reloc - 1;
+
+ obj->efile.reloc = reloc;
+ obj->efile.nr_reloc = nr_reloc;
+
+ obj->efile.reloc[n].shdr = sh;
+ obj->efile.reloc[n].data = data;
+ }
+ } else {
+ pr_debug("skip section(%d) %s\n", idx, name);
+ }
+ if (err)
+ goto out;
+ }
+
+ if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) {
+ pr_warning("Corrupted ELF file: index of strtab invalid\n");
+ return LIBBPF_ERRNO__FORMAT;
+ }
+ if (obj->efile.maps_shndx >= 0) {
+ err = bpf_object__init_maps(obj);
+ if (err)
+ goto out;
+ }
+ err = bpf_object__init_prog_names(obj);
+out:
+ return err;
+}
+
+static struct bpf_program *
+bpf_object__find_prog_by_idx(struct bpf_object *obj, int idx)
+{
+ struct bpf_program *prog;
+ size_t i;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+ if (prog->idx == idx)
+ return prog;
+ }
+ return NULL;
+}
+
+struct bpf_program *
+bpf_object__find_program_by_title(struct bpf_object *obj, const char *title)
+{
+ struct bpf_program *pos;
+
+ bpf_object__for_each_program(pos, obj) {
+ if (pos->section_name && !strcmp(pos->section_name, title))
+ return pos;
+ }
+ return NULL;
+}
+
+static int
+bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,
+ Elf_Data *data, struct bpf_object *obj)
+{
+ Elf_Data *symbols = obj->efile.symbols;
+ int text_shndx = obj->efile.text_shndx;
+ int maps_shndx = obj->efile.maps_shndx;
+ struct bpf_map *maps = obj->maps;
+ size_t nr_maps = obj->nr_maps;
+ int i, nrels;
+
+ pr_debug("collecting relocating info for: '%s'\n",
+ prog->section_name);
+ nrels = shdr->sh_size / shdr->sh_entsize;
+
+ prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);
+ if (!prog->reloc_desc) {
+ pr_warning("failed to alloc memory in relocation\n");
+ return -ENOMEM;
+ }
+ prog->nr_reloc = nrels;
+
+ for (i = 0; i < nrels; i++) {
+ GElf_Sym sym;
+ GElf_Rel rel;
+ unsigned int insn_idx;
+ struct bpf_insn *insns = prog->insns;
+ size_t map_idx;
+
+ if (!gelf_getrel(data, i, &rel)) {
+ pr_warning("relocation: failed to get %d reloc\n", i);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+
+ if (!gelf_getsym(symbols,
+ GELF_R_SYM(rel.r_info),
+ &sym)) {
+ pr_warning("relocation: symbol %"PRIx64" not found\n",
+ GELF_R_SYM(rel.r_info));
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ pr_debug("relo for %lld value %lld name %d\n",
+ (long long) (rel.r_info >> 32),
+ (long long) sym.st_value, sym.st_name);
+
+ if (sym.st_shndx != maps_shndx && sym.st_shndx != text_shndx) {
+ pr_warning("Program '%s' contains non-map related relo data pointing to section %u\n",
+ prog->section_name, sym.st_shndx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ insn_idx = rel.r_offset / sizeof(struct bpf_insn);
+ pr_debug("relocation: insn_idx=%u\n", insn_idx);
+
+ if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) {
+ if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) {
+ pr_warning("incorrect bpf_call opcode\n");
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ prog->reloc_desc[i].type = RELO_CALL;
+ prog->reloc_desc[i].insn_idx = insn_idx;
+ prog->reloc_desc[i].text_off = sym.st_value;
+ obj->has_pseudo_calls = true;
+ continue;
+ }
+
+ if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) {
+ pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n",
+ insn_idx, insns[insn_idx].code);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ /* TODO: 'maps' is sorted. We can use bsearch to make it faster. */
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+ if (maps[map_idx].offset == sym.st_value) {
+ pr_debug("relocation: find map %zd (%s) for insn %u\n",
+ map_idx, maps[map_idx].name, insn_idx);
+ break;
+ }
+ }
+
+ if (map_idx >= nr_maps) {
+ pr_warning("bpf relocation: map_idx %d large than %d\n",
+ (int)map_idx, (int)nr_maps - 1);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ prog->reloc_desc[i].type = RELO_LD64;
+ prog->reloc_desc[i].insn_idx = insn_idx;
+ prog->reloc_desc[i].map_idx = map_idx;
+ }
+ return 0;
+}
+
+static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf)
+{
+ const struct btf_type *container_type;
+ const struct btf_member *key, *value;
+ struct bpf_map_def *def = &map->def;
+ const size_t max_name = 256;
+ char container_name[max_name];
+ __s64 key_size, value_size;
+ __s32 container_id;
+
+ if (snprintf(container_name, max_name, "____btf_map_%s", map->name) ==
+ max_name) {
+ pr_warning("map:%s length of '____btf_map_%s' is too long\n",
+ map->name, map->name);
+ return -EINVAL;
+ }
+
+ container_id = btf__find_by_name(btf, container_name);
+ if (container_id < 0) {
+ pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n",
+ map->name, container_name);
+ return container_id;
+ }
+
+ container_type = btf__type_by_id(btf, container_id);
+ if (!container_type) {
+ pr_warning("map:%s cannot find BTF type for container_id:%u\n",
+ map->name, container_id);
+ return -EINVAL;
+ }
+
+ if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT ||
+ BTF_INFO_VLEN(container_type->info) < 2) {
+ pr_warning("map:%s container_name:%s is an invalid container struct\n",
+ map->name, container_name);
+ return -EINVAL;
+ }
+
+ key = (struct btf_member *)(container_type + 1);
+ value = key + 1;
+
+ key_size = btf__resolve_size(btf, key->type);
+ if (key_size < 0) {
+ pr_warning("map:%s invalid BTF key_type_size\n",
+ map->name);
+ return key_size;
+ }
+
+ if (def->key_size != key_size) {
+ pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n",
+ map->name, (__u32)key_size, def->key_size);
+ return -EINVAL;
+ }
+
+ value_size = btf__resolve_size(btf, value->type);
+ if (value_size < 0) {
+ pr_warning("map:%s invalid BTF value_type_size\n", map->name);
+ return value_size;
+ }
+
+ if (def->value_size != value_size) {
+ pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n",
+ map->name, (__u32)value_size, def->value_size);
+ return -EINVAL;
+ }
+
+ map->btf_key_type_id = key->type;
+ map->btf_value_type_id = value->type;
+
+ return 0;
+}
+
+int bpf_map__reuse_fd(struct bpf_map *map, int fd)
+{
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ int new_fd, err;
+ char *new_name;
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err)
+ return err;
+
+ new_name = strdup(info.name);
+ if (!new_name)
+ return -errno;
+
+ new_fd = open("/", O_RDONLY | O_CLOEXEC);
+ if (new_fd < 0) {
+ err = -errno;
+ goto err_free_new_name;
+ }
+
+ new_fd = dup3(fd, new_fd, O_CLOEXEC);
+ if (new_fd < 0) {
+ err = -errno;
+ goto err_close_new_fd;
+ }
+
+ err = zclose(map->fd);
+ if (err) {
+ err = -errno;
+ goto err_close_new_fd;
+ }
+ free(map->name);
+
+ map->fd = new_fd;
+ map->name = new_name;
+ map->def.type = info.type;
+ map->def.key_size = info.key_size;
+ map->def.value_size = info.value_size;
+ map->def.max_entries = info.max_entries;
+ map->def.map_flags = info.map_flags;
+ map->btf_key_type_id = info.btf_key_type_id;
+ map->btf_value_type_id = info.btf_value_type_id;
+
+ return 0;
+
+err_close_new_fd:
+ close(new_fd);
+err_free_new_name:
+ free(new_name);
+ return err;
+}
+
+static int
+bpf_object__create_maps(struct bpf_object *obj)
+{
+ struct bpf_create_map_attr create_attr = {};
+ unsigned int i;
+ int err;
+
+ for (i = 0; i < obj->nr_maps; i++) {
+ struct bpf_map *map = &obj->maps[i];
+ struct bpf_map_def *def = &map->def;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int *pfd = &map->fd;
+
+ if (map->fd >= 0) {
+ pr_debug("skip map create (preset) %s: fd=%d\n",
+ map->name, map->fd);
+ continue;
+ }
+
+ create_attr.name = map->name;
+ create_attr.map_ifindex = map->map_ifindex;
+ create_attr.map_type = def->type;
+ create_attr.map_flags = def->map_flags;
+ create_attr.key_size = def->key_size;
+ create_attr.value_size = def->value_size;
+ create_attr.max_entries = def->max_entries;
+ create_attr.btf_fd = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
+
+ if (obj->btf && !bpf_map_find_btf_info(map, obj->btf)) {
+ create_attr.btf_fd = btf__fd(obj->btf);
+ create_attr.btf_key_type_id = map->btf_key_type_id;
+ create_attr.btf_value_type_id = map->btf_value_type_id;
+ }
+
+ *pfd = bpf_create_map_xattr(&create_attr);
+ if (*pfd < 0 && create_attr.btf_key_type_id) {
+ cp = str_error(errno, errmsg, sizeof(errmsg));
+ pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
+ map->name, cp, errno);
+ create_attr.btf_fd = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
+ map->btf_key_type_id = 0;
+ map->btf_value_type_id = 0;
+ *pfd = bpf_create_map_xattr(&create_attr);
+ }
+
+ if (*pfd < 0) {
+ size_t j;
+
+ err = *pfd;
+ cp = str_error(errno, errmsg, sizeof(errmsg));
+ pr_warning("failed to create map (name: '%s'): %s\n",
+ map->name, cp);
+ for (j = 0; j < i; j++)
+ zclose(obj->maps[j].fd);
+ return err;
+ }
+ pr_debug("create map %s: fd=%d\n", map->name, *pfd);
+ }
+
+ return 0;
+}
+
+static int
+bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,
+ struct reloc_desc *relo)
+{
+ struct bpf_insn *insn, *new_insn;
+ struct bpf_program *text;
+ size_t new_cnt;
+
+ if (relo->type != RELO_CALL)
+ return -LIBBPF_ERRNO__RELOC;
+
+ if (prog->idx == obj->efile.text_shndx) {
+ pr_warning("relo in .text insn %d into off %d\n",
+ relo->insn_idx, relo->text_off);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ if (prog->main_prog_cnt == 0) {
+ text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);
+ if (!text) {
+ pr_warning("no .text section found yet relo into text exist\n");
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ new_cnt = prog->insns_cnt + text->insns_cnt;
+ new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));
+ if (!new_insn) {
+ pr_warning("oom in prog realloc\n");
+ return -ENOMEM;
+ }
+ memcpy(new_insn + prog->insns_cnt, text->insns,
+ text->insns_cnt * sizeof(*insn));
+ prog->insns = new_insn;
+ prog->main_prog_cnt = prog->insns_cnt;
+ prog->insns_cnt = new_cnt;
+ pr_debug("added %zd insn from %s to prog %s\n",
+ text->insns_cnt, text->section_name,
+ prog->section_name);
+ }
+ insn = &prog->insns[relo->insn_idx];
+ insn->imm += prog->main_prog_cnt - relo->insn_idx;
+ return 0;
+}
+
+static int
+bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
+{
+ int i, err;
+
+ if (!prog || !prog->reloc_desc)
+ return 0;
+
+ for (i = 0; i < prog->nr_reloc; i++) {
+ if (prog->reloc_desc[i].type == RELO_LD64) {
+ struct bpf_insn *insns = prog->insns;
+ int insn_idx, map_idx;
+
+ insn_idx = prog->reloc_desc[i].insn_idx;
+ map_idx = prog->reloc_desc[i].map_idx;
+
+ if (insn_idx >= (int)prog->insns_cnt) {
+ pr_warning("relocation out of range: '%s'\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
+ insns[insn_idx].imm = obj->maps[map_idx].fd;
+ } else {
+ err = bpf_program__reloc_text(prog, obj,
+ &prog->reloc_desc[i]);
+ if (err)
+ return err;
+ }
+ }
+
+ zfree(&prog->reloc_desc);
+ prog->nr_reloc = 0;
+ return 0;
+}
+
+
+static int
+bpf_object__relocate(struct bpf_object *obj)
+{
+ struct bpf_program *prog;
+ size_t i;
+ int err;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ prog = &obj->programs[i];
+
+ err = bpf_program__relocate(prog, obj);
+ if (err) {
+ pr_warning("failed to relocate '%s'\n",
+ prog->section_name);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static int bpf_object__collect_reloc(struct bpf_object *obj)
+{
+ int i, err;
+
+ if (!obj_elf_valid(obj)) {
+ pr_warning("Internal error: elf object is closed\n");
+ return -LIBBPF_ERRNO__INTERNAL;
+ }
+
+ for (i = 0; i < obj->efile.nr_reloc; i++) {
+ GElf_Shdr *shdr = &obj->efile.reloc[i].shdr;
+ Elf_Data *data = obj->efile.reloc[i].data;
+ int idx = shdr->sh_info;
+ struct bpf_program *prog;
+
+ if (shdr->sh_type != SHT_REL) {
+ pr_warning("internal error at %d\n", __LINE__);
+ return -LIBBPF_ERRNO__INTERNAL;
+ }
+
+ prog = bpf_object__find_prog_by_idx(obj, idx);
+ if (!prog) {
+ pr_warning("relocation failed: no section(%d)\n", idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ err = bpf_program__collect_reloc(prog,
+ shdr, data,
+ obj);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int
+load_program(enum bpf_prog_type type, enum bpf_attach_type expected_attach_type,
+ const char *name, struct bpf_insn *insns, int insns_cnt,
+ char *license, u32 kern_version, int *pfd, int prog_ifindex)
+{
+ struct bpf_load_program_attr load_attr;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ char *log_buf;
+ int ret;
+
+ memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+ load_attr.prog_type = type;
+ load_attr.expected_attach_type = expected_attach_type;
+ load_attr.name = name;
+ load_attr.insns = insns;
+ load_attr.insns_cnt = insns_cnt;
+ load_attr.license = license;
+ load_attr.kern_version = kern_version;
+ load_attr.prog_ifindex = prog_ifindex;
+
+ if (!load_attr.insns || !load_attr.insns_cnt)
+ return -EINVAL;
+
+ log_buf = malloc(BPF_LOG_BUF_SIZE);
+ if (!log_buf)
+ pr_warning("Alloc log buffer for bpf loader error, continue without log\n");
+
+ ret = bpf_load_program_xattr(&load_attr, log_buf, BPF_LOG_BUF_SIZE);
+
+ if (ret >= 0) {
+ *pfd = ret;
+ ret = 0;
+ goto out;
+ }
+
+ ret = -LIBBPF_ERRNO__LOAD;
+ cp = str_error(errno, errmsg, sizeof(errmsg));
+ pr_warning("load bpf program failed: %s\n", cp);
+
+ if (log_buf && log_buf[0] != '\0') {
+ ret = -LIBBPF_ERRNO__VERIFY;
+ pr_warning("-- BEGIN DUMP LOG ---\n");
+ pr_warning("\n%s\n", log_buf);
+ pr_warning("-- END LOG --\n");
+ } else if (load_attr.insns_cnt >= BPF_MAXINSNS) {
+ pr_warning("Program too large (%zu insns), at most %d insns\n",
+ load_attr.insns_cnt, BPF_MAXINSNS);
+ ret = -LIBBPF_ERRNO__PROG2BIG;
+ } else {
+ /* Wrong program type? */
+ if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {
+ int fd;
+
+ load_attr.prog_type = BPF_PROG_TYPE_KPROBE;
+ load_attr.expected_attach_type = 0;
+ fd = bpf_load_program_xattr(&load_attr, NULL, 0);
+ if (fd >= 0) {
+ close(fd);
+ ret = -LIBBPF_ERRNO__PROGTYPE;
+ goto out;
+ }
+ }
+
+ if (log_buf)
+ ret = -LIBBPF_ERRNO__KVER;
+ }
+
+out:
+ free(log_buf);
+ return ret;
+}
+
+static int
+bpf_program__load(struct bpf_program *prog,
+ char *license, u32 kern_version)
+{
+ int err = 0, fd, i;
+
+ if (prog->instances.nr < 0 || !prog->instances.fds) {
+ if (prog->preprocessor) {
+ pr_warning("Internal error: can't load program '%s'\n",
+ prog->section_name);
+ return -LIBBPF_ERRNO__INTERNAL;
+ }
+
+ prog->instances.fds = malloc(sizeof(int));
+ if (!prog->instances.fds) {
+ pr_warning("Not enough memory for BPF fds\n");
+ return -ENOMEM;
+ }
+ prog->instances.nr = 1;
+ prog->instances.fds[0] = -1;
+ }
+
+ if (!prog->preprocessor) {
+ if (prog->instances.nr != 1) {
+ pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n",
+ prog->section_name, prog->instances.nr);
+ }
+ err = load_program(prog->type, prog->expected_attach_type,
+ prog->name, prog->insns, prog->insns_cnt,
+ license, kern_version, &fd,
+ prog->prog_ifindex);
+ if (!err)
+ prog->instances.fds[0] = fd;
+ goto out;
+ }
+
+ for (i = 0; i < prog->instances.nr; i++) {
+ struct bpf_prog_prep_result result;
+ bpf_program_prep_t preprocessor = prog->preprocessor;
+
+ bzero(&result, sizeof(result));
+ err = preprocessor(prog, i, prog->insns,
+ prog->insns_cnt, &result);
+ if (err) {
+ pr_warning("Preprocessing the %dth instance of program '%s' failed\n",
+ i, prog->section_name);
+ goto out;
+ }
+
+ if (!result.new_insn_ptr || !result.new_insn_cnt) {
+ pr_debug("Skip loading the %dth instance of program '%s'\n",
+ i, prog->section_name);
+ prog->instances.fds[i] = -1;
+ if (result.pfd)
+ *result.pfd = -1;
+ continue;
+ }
+
+ err = load_program(prog->type, prog->expected_attach_type,
+ prog->name, result.new_insn_ptr,
+ result.new_insn_cnt,
+ license, kern_version, &fd,
+ prog->prog_ifindex);
+
+ if (err) {
+ pr_warning("Loading the %dth instance of program '%s' failed\n",
+ i, prog->section_name);
+ goto out;
+ }
+
+ if (result.pfd)
+ *result.pfd = fd;
+ prog->instances.fds[i] = fd;
+ }
+out:
+ if (err)
+ pr_warning("failed to load program '%s'\n",
+ prog->section_name);
+ zfree(&prog->insns);
+ prog->insns_cnt = 0;
+ return err;
+}
+
+static bool bpf_program__is_function_storage(struct bpf_program *prog,
+ struct bpf_object *obj)
+{
+ return prog->idx == obj->efile.text_shndx && obj->has_pseudo_calls;
+}
+
+static int
+bpf_object__load_progs(struct bpf_object *obj)
+{
+ size_t i;
+ int err;
+
+ for (i = 0; i < obj->nr_programs; i++) {
+ if (bpf_program__is_function_storage(&obj->programs[i], obj))
+ continue;
+ err = bpf_program__load(&obj->programs[i],
+ obj->license,
+ obj->kern_version);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
+{
+ switch (type) {
+ case BPF_PROG_TYPE_SOCKET_FILTER:
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_SCHED_ACT:
+ case BPF_PROG_TYPE_XDP:
+ case BPF_PROG_TYPE_CGROUP_SKB:
+ case BPF_PROG_TYPE_CGROUP_SOCK:
+ case BPF_PROG_TYPE_LWT_IN:
+ case BPF_PROG_TYPE_LWT_OUT:
+ case BPF_PROG_TYPE_LWT_XMIT:
+ case BPF_PROG_TYPE_LWT_SEG6LOCAL:
+ case BPF_PROG_TYPE_SOCK_OPS:
+ case BPF_PROG_TYPE_SK_SKB:
+ case BPF_PROG_TYPE_CGROUP_DEVICE:
+ case BPF_PROG_TYPE_SK_MSG:
+ case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+ case BPF_PROG_TYPE_LIRC_MODE2:
+ case BPF_PROG_TYPE_SK_REUSEPORT:
+ return false;
+ case BPF_PROG_TYPE_UNSPEC:
+ case BPF_PROG_TYPE_KPROBE:
+ case BPF_PROG_TYPE_TRACEPOINT:
+ case BPF_PROG_TYPE_PERF_EVENT:
+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
+ default:
+ return true;
+ }
+}
+
+static int bpf_object__validate(struct bpf_object *obj, bool needs_kver)
+{
+ if (needs_kver && obj->kern_version == 0) {
+ pr_warning("%s doesn't provide kernel version\n",
+ obj->path);
+ return -LIBBPF_ERRNO__KVERSION;
+ }
+ return 0;
+}
+
+static struct bpf_object *
+__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz,
+ bool needs_kver)
+{
+ struct bpf_object *obj;
+ int err;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ pr_warning("failed to init libelf for %s\n", path);
+ return ERR_PTR(-LIBBPF_ERRNO__LIBELF);
+ }
+
+ obj = bpf_object__new(path, obj_buf, obj_buf_sz);
+ if (IS_ERR(obj))
+ return obj;
+
+ CHECK_ERR(bpf_object__elf_init(obj), err, out);
+ CHECK_ERR(bpf_object__check_endianness(obj), err, out);
+ CHECK_ERR(bpf_object__elf_collect(obj), err, out);
+ CHECK_ERR(bpf_object__collect_reloc(obj), err, out);
+ CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out);
+
+ bpf_object__elf_finish(obj);
+ return obj;
+out:
+ bpf_object__close(obj);
+ return ERR_PTR(err);
+}
+
+struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr)
+{
+ /* param validation */
+ if (!attr->file)
+ return NULL;
+
+ pr_debug("loading %s\n", attr->file);
+
+ return __bpf_object__open(attr->file, NULL, 0,
+ bpf_prog_type__needs_kver(attr->prog_type));
+}
+
+struct bpf_object *bpf_object__open(const char *path)
+{
+ struct bpf_object_open_attr attr = {
+ .file = path,
+ .prog_type = BPF_PROG_TYPE_UNSPEC,
+ };
+
+ return bpf_object__open_xattr(&attr);
+}
+
+struct bpf_object *bpf_object__open_buffer(void *obj_buf,
+ size_t obj_buf_sz,
+ const char *name)
+{
+ char tmp_name[64];
+
+ /* param validation */
+ if (!obj_buf || obj_buf_sz <= 0)
+ return NULL;
+
+ if (!name) {
+ snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx",
+ (unsigned long)obj_buf,
+ (unsigned long)obj_buf_sz);
+ tmp_name[sizeof(tmp_name) - 1] = '\0';
+ name = tmp_name;
+ }
+ pr_debug("loading object '%s' from buffer\n",
+ name);
+
+ return __bpf_object__open(name, obj_buf, obj_buf_sz, true);
+}
+
+int bpf_object__unload(struct bpf_object *obj)
+{
+ size_t i;
+
+ if (!obj)
+ return -EINVAL;
+
+ for (i = 0; i < obj->nr_maps; i++)
+ zclose(obj->maps[i].fd);
+
+ for (i = 0; i < obj->nr_programs; i++)
+ bpf_program__unload(&obj->programs[i]);
+
+ return 0;
+}
+
+int bpf_object__load(struct bpf_object *obj)
+{
+ int err;
+
+ if (!obj)
+ return -EINVAL;
+
+ if (obj->loaded) {
+ pr_warning("object should not be loaded twice\n");
+ return -EINVAL;
+ }
+
+ obj->loaded = true;
+
+ CHECK_ERR(bpf_object__create_maps(obj), err, out);
+ CHECK_ERR(bpf_object__relocate(obj), err, out);
+ CHECK_ERR(bpf_object__load_progs(obj), err, out);
+
+ return 0;
+out:
+ bpf_object__unload(obj);
+ pr_warning("failed to load object '%s'\n", obj->path);
+ return err;
+}
+
+static int check_path(const char *path)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ struct statfs st_fs;
+ char *dname, *dir;
+ int err = 0;
+
+ if (path == NULL)
+ return -EINVAL;
+
+ dname = strdup(path);
+ if (dname == NULL)
+ return -ENOMEM;
+
+ dir = dirname(dname);
+ if (statfs(dir, &st_fs)) {
+ cp = str_error(errno, errmsg, sizeof(errmsg));
+ pr_warning("failed to statfs %s: %s\n", dir, cp);
+ err = -errno;
+ }
+ free(dname);
+
+ if (!err && st_fs.f_type != BPF_FS_MAGIC) {
+ pr_warning("specified path %s is not on BPF FS\n", path);
+ err = -EINVAL;
+ }
+
+ return err;
+}
+
+int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
+ int instance)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err;
+
+ err = check_path(path);
+ if (err)
+ return err;
+
+ if (prog == NULL) {
+ pr_warning("invalid program pointer\n");
+ return -EINVAL;
+ }
+
+ if (instance < 0 || instance >= prog->instances.nr) {
+ pr_warning("invalid prog instance %d of prog %s (max %d)\n",
+ instance, prog->section_name, prog->instances.nr);
+ return -EINVAL;
+ }
+
+ if (bpf_obj_pin(prog->instances.fds[instance], path)) {
+ cp = str_error(errno, errmsg, sizeof(errmsg));
+ pr_warning("failed to pin program: %s\n", cp);
+ return -errno;
+ }
+ pr_debug("pinned program '%s'\n", path);
+
+ return 0;
+}
+
+static int make_dir(const char *path)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err = 0;
+
+ if (mkdir(path, 0700) && errno != EEXIST)
+ err = -errno;
+
+ if (err) {
+ cp = str_error(-err, errmsg, sizeof(errmsg));
+ pr_warning("failed to mkdir %s: %s\n", path, cp);
+ }
+ return err;
+}
+
+int bpf_program__pin(struct bpf_program *prog, const char *path)
+{
+ int i, err;
+
+ err = check_path(path);
+ if (err)
+ return err;
+
+ if (prog == NULL) {
+ pr_warning("invalid program pointer\n");
+ return -EINVAL;
+ }
+
+ if (prog->instances.nr <= 0) {
+ pr_warning("no instances of prog %s to pin\n",
+ prog->section_name);
+ return -EINVAL;
+ }
+
+ err = make_dir(path);
+ if (err)
+ return err;
+
+ for (i = 0; i < prog->instances.nr; i++) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%d", path, i);
+ if (len < 0)
+ return -EINVAL;
+ else if (len >= PATH_MAX)
+ return -ENAMETOOLONG;
+
+ err = bpf_program__pin_instance(prog, buf, i);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int bpf_map__pin(struct bpf_map *map, const char *path)
+{
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err;
+
+ err = check_path(path);
+ if (err)
+ return err;
+
+ if (map == NULL) {
+ pr_warning("invalid map pointer\n");
+ return -EINVAL;
+ }
+
+ if (bpf_obj_pin(map->fd, path)) {
+ cp = str_error(errno, errmsg, sizeof(errmsg));
+ pr_warning("failed to pin map: %s\n", cp);
+ return -errno;
+ }
+
+ pr_debug("pinned map '%s'\n", path);
+ return 0;
+}
+
+int bpf_object__pin(struct bpf_object *obj, const char *path)
+{
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ int err;
+
+ if (!obj)
+ return -ENOENT;
+
+ if (!obj->loaded) {
+ pr_warning("object not yet loaded; load it first\n");
+ return -ENOENT;
+ }
+
+ err = make_dir(path);
+ if (err)
+ return err;
+
+ bpf_map__for_each(map, obj) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
+ bpf_map__name(map));
+ if (len < 0)
+ return -EINVAL;
+ else if (len >= PATH_MAX)
+ return -ENAMETOOLONG;
+
+ err = bpf_map__pin(map, buf);
+ if (err)
+ return err;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ char buf[PATH_MAX];
+ int len;
+
+ len = snprintf(buf, PATH_MAX, "%s/%s", path,
+ prog->section_name);
+ if (len < 0)
+ return -EINVAL;
+ else if (len >= PATH_MAX)
+ return -ENAMETOOLONG;
+
+ err = bpf_program__pin(prog, buf);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+void bpf_object__close(struct bpf_object *obj)
+{
+ size_t i;
+
+ if (!obj)
+ return;
+
+ if (obj->clear_priv)
+ obj->clear_priv(obj, obj->priv);
+
+ bpf_object__elf_finish(obj);
+ bpf_object__unload(obj);
+ btf__free(obj->btf);
+
+ for (i = 0; i < obj->nr_maps; i++) {
+ zfree(&obj->maps[i].name);
+ if (obj->maps[i].clear_priv)
+ obj->maps[i].clear_priv(&obj->maps[i],
+ obj->maps[i].priv);
+ obj->maps[i].priv = NULL;
+ obj->maps[i].clear_priv = NULL;
+ }
+ zfree(&obj->maps);
+ obj->nr_maps = 0;
+
+ if (obj->programs && obj->nr_programs) {
+ for (i = 0; i < obj->nr_programs; i++)
+ bpf_program__exit(&obj->programs[i]);
+ }
+ zfree(&obj->programs);
+
+ list_del(&obj->list);
+ free(obj);
+}
+
+struct bpf_object *
+bpf_object__next(struct bpf_object *prev)
+{
+ struct bpf_object *next;
+
+ if (!prev)
+ next = list_first_entry(&bpf_objects_list,
+ struct bpf_object,
+ list);
+ else
+ next = list_next_entry(prev, list);
+
+ /* Empty list is noticed here so don't need checking on entry. */
+ if (&next->list == &bpf_objects_list)
+ return NULL;
+
+ return next;
+}
+
+const char *bpf_object__name(struct bpf_object *obj)
+{
+ return obj ? obj->path : ERR_PTR(-EINVAL);
+}
+
+unsigned int bpf_object__kversion(struct bpf_object *obj)
+{
+ return obj ? obj->kern_version : 0;
+}
+
+int bpf_object__btf_fd(const struct bpf_object *obj)
+{
+ return obj->btf ? btf__fd(obj->btf) : -1;
+}
+
+int bpf_object__set_priv(struct bpf_object *obj, void *priv,
+ bpf_object_clear_priv_t clear_priv)
+{
+ if (obj->priv && obj->clear_priv)
+ obj->clear_priv(obj, obj->priv);
+
+ obj->priv = priv;
+ obj->clear_priv = clear_priv;
+ return 0;
+}
+
+void *bpf_object__priv(struct bpf_object *obj)
+{
+ return obj ? obj->priv : ERR_PTR(-EINVAL);
+}
+
+static struct bpf_program *
+__bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
+{
+ size_t idx;
+
+ if (!obj->programs)
+ return NULL;
+ /* First handler */
+ if (prev == NULL)
+ return &obj->programs[0];
+
+ if (prev->obj != obj) {
+ pr_warning("error: program handler doesn't match object\n");
+ return NULL;
+ }
+
+ idx = (prev - obj->programs) + 1;
+ if (idx >= obj->nr_programs)
+ return NULL;
+ return &obj->programs[idx];
+}
+
+struct bpf_program *
+bpf_program__next(struct bpf_program *prev, struct bpf_object *obj)
+{
+ struct bpf_program *prog = prev;
+
+ do {
+ prog = __bpf_program__next(prog, obj);
+ } while (prog && bpf_program__is_function_storage(prog, obj));
+
+ return prog;
+}
+
+int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+ bpf_program_clear_priv_t clear_priv)
+{
+ if (prog->priv && prog->clear_priv)
+ prog->clear_priv(prog, prog->priv);
+
+ prog->priv = priv;
+ prog->clear_priv = clear_priv;
+ return 0;
+}
+
+void *bpf_program__priv(struct bpf_program *prog)
+{
+ return prog ? prog->priv : ERR_PTR(-EINVAL);
+}
+
+void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex)
+{
+ prog->prog_ifindex = ifindex;
+}
+
+const char *bpf_program__title(struct bpf_program *prog, bool needs_copy)
+{
+ const char *title;
+
+ title = prog->section_name;
+ if (needs_copy) {
+ title = strdup(title);
+ if (!title) {
+ pr_warning("failed to strdup program title\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+
+ return title;
+}
+
+int bpf_program__fd(struct bpf_program *prog)
+{
+ return bpf_program__nth_fd(prog, 0);
+}
+
+int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,
+ bpf_program_prep_t prep)
+{
+ int *instances_fds;
+
+ if (nr_instances <= 0 || !prep)
+ return -EINVAL;
+
+ if (prog->instances.nr > 0 || prog->instances.fds) {
+ pr_warning("Can't set pre-processor after loading\n");
+ return -EINVAL;
+ }
+
+ instances_fds = malloc(sizeof(int) * nr_instances);
+ if (!instances_fds) {
+ pr_warning("alloc memory failed for fds\n");
+ return -ENOMEM;
+ }
+
+ /* fill all fd with -1 */
+ memset(instances_fds, -1, sizeof(int) * nr_instances);
+
+ prog->instances.nr = nr_instances;
+ prog->instances.fds = instances_fds;
+ prog->preprocessor = prep;
+ return 0;
+}
+
+int bpf_program__nth_fd(struct bpf_program *prog, int n)
+{
+ int fd;
+
+ if (!prog)
+ return -EINVAL;
+
+ if (n >= prog->instances.nr || n < 0) {
+ pr_warning("Can't get the %dth fd from program %s: only %d instances\n",
+ n, prog->section_name, prog->instances.nr);
+ return -EINVAL;
+ }
+
+ fd = prog->instances.fds[n];
+ if (fd < 0) {
+ pr_warning("%dth instance of program '%s' is invalid\n",
+ n, prog->section_name);
+ return -ENOENT;
+ }
+
+ return fd;
+}
+
+void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)
+{
+ prog->type = type;
+}
+
+static bool bpf_program__is_type(struct bpf_program *prog,
+ enum bpf_prog_type type)
+{
+ return prog ? (prog->type == type) : false;
+}
+
+#define BPF_PROG_TYPE_FNS(NAME, TYPE) \
+int bpf_program__set_##NAME(struct bpf_program *prog) \
+{ \
+ if (!prog) \
+ return -EINVAL; \
+ bpf_program__set_type(prog, TYPE); \
+ return 0; \
+} \
+ \
+bool bpf_program__is_##NAME(struct bpf_program *prog) \
+{ \
+ return bpf_program__is_type(prog, TYPE); \
+} \
+
+BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
+BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
+BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
+BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
+BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
+BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
+BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
+BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
+
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+ enum bpf_attach_type type)
+{
+ prog->expected_attach_type = type;
+}
+
+#define BPF_PROG_SEC_FULL(string, ptype, atype) \
+ { string, sizeof(string) - 1, ptype, atype }
+
+#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
+
+#define BPF_S_PROG_SEC(string, ptype) \
+ BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype)
+
+#define BPF_SA_PROG_SEC(string, ptype) \
+ BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
+
+static const struct {
+ const char *sec;
+ size_t len;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+} section_names[] = {
+ BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
+ BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE),
+ BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS),
+ BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT),
+ BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT),
+ BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT),
+ BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
+ BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
+ BPF_PROG_SEC("cgroup/skb", BPF_PROG_TYPE_CGROUP_SKB),
+ BPF_PROG_SEC("cgroup/sock", BPF_PROG_TYPE_CGROUP_SOCK),
+ BPF_PROG_SEC("cgroup/dev", BPF_PROG_TYPE_CGROUP_DEVICE),
+ BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
+ BPF_PROG_SEC("lwt_out", BPF_PROG_TYPE_LWT_OUT),
+ BPF_PROG_SEC("lwt_xmit", BPF_PROG_TYPE_LWT_XMIT),
+ BPF_PROG_SEC("lwt_seg6local", BPF_PROG_TYPE_LWT_SEG6LOCAL),
+ BPF_PROG_SEC("sockops", BPF_PROG_TYPE_SOCK_OPS),
+ BPF_PROG_SEC("sk_skb", BPF_PROG_TYPE_SK_SKB),
+ BPF_PROG_SEC("sk_msg", BPF_PROG_TYPE_SK_MSG),
+ BPF_PROG_SEC("lirc_mode2", BPF_PROG_TYPE_LIRC_MODE2),
+ BPF_SA_PROG_SEC("cgroup/bind4", BPF_CGROUP_INET4_BIND),
+ BPF_SA_PROG_SEC("cgroup/bind6", BPF_CGROUP_INET6_BIND),
+ BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
+ BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
+ BPF_SA_PROG_SEC("cgroup/sendmsg4", BPF_CGROUP_UDP4_SENDMSG),
+ BPF_SA_PROG_SEC("cgroup/sendmsg6", BPF_CGROUP_UDP6_SENDMSG),
+ BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND),
+ BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND),
+};
+
+#undef BPF_PROG_SEC
+#undef BPF_PROG_SEC_FULL
+#undef BPF_S_PROG_SEC
+#undef BPF_SA_PROG_SEC
+
+int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+ enum bpf_attach_type *expected_attach_type)
+{
+ int i;
+
+ if (!name)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(section_names); i++) {
+ if (strncmp(name, section_names[i].sec, section_names[i].len))
+ continue;
+ *prog_type = section_names[i].prog_type;
+ *expected_attach_type = section_names[i].expected_attach_type;
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static int
+bpf_program__identify_section(struct bpf_program *prog,
+ enum bpf_prog_type *prog_type,
+ enum bpf_attach_type *expected_attach_type)
+{
+ return libbpf_prog_type_by_name(prog->section_name, prog_type,
+ expected_attach_type);
+}
+
+int bpf_map__fd(struct bpf_map *map)
+{
+ return map ? map->fd : -EINVAL;
+}
+
+const struct bpf_map_def *bpf_map__def(struct bpf_map *map)
+{
+ return map ? &map->def : ERR_PTR(-EINVAL);
+}
+
+const char *bpf_map__name(struct bpf_map *map)
+{
+ return map ? map->name : NULL;
+}
+
+__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
+{
+ return map ? map->btf_key_type_id : 0;
+}
+
+__u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
+{
+ return map ? map->btf_value_type_id : 0;
+}
+
+int bpf_map__set_priv(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv)
+{
+ if (!map)
+ return -EINVAL;
+
+ if (map->priv) {
+ if (map->clear_priv)
+ map->clear_priv(map, map->priv);
+ }
+
+ map->priv = priv;
+ map->clear_priv = clear_priv;
+ return 0;
+}
+
+void *bpf_map__priv(struct bpf_map *map)
+{
+ return map ? map->priv : ERR_PTR(-EINVAL);
+}
+
+bool bpf_map__is_offload_neutral(struct bpf_map *map)
+{
+ return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
+}
+
+void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+{
+ map->map_ifindex = ifindex;
+}
+
+struct bpf_map *
+bpf_map__next(struct bpf_map *prev, struct bpf_object *obj)
+{
+ size_t idx;
+ struct bpf_map *s, *e;
+
+ if (!obj || !obj->maps)
+ return NULL;
+
+ s = obj->maps;
+ e = obj->maps + obj->nr_maps;
+
+ if (prev == NULL)
+ return s;
+
+ if ((prev < s) || (prev >= e)) {
+ pr_warning("error in %s: map handler doesn't belong to object\n",
+ __func__);
+ return NULL;
+ }
+
+ idx = (prev - obj->maps) + 1;
+ if (idx >= obj->nr_maps)
+ return NULL;
+ return &obj->maps[idx];
+}
+
+struct bpf_map *
+bpf_object__find_map_by_name(struct bpf_object *obj, const char *name)
+{
+ struct bpf_map *pos;
+
+ bpf_map__for_each(pos, obj) {
+ if (pos->name && !strcmp(pos->name, name))
+ return pos;
+ }
+ return NULL;
+}
+
+struct bpf_map *
+bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset)
+{
+ int i;
+
+ for (i = 0; i < obj->nr_maps; i++) {
+ if (obj->maps[i].offset == offset)
+ return &obj->maps[i];
+ }
+ return ERR_PTR(-ENOENT);
+}
+
+long libbpf_get_error(const void *ptr)
+{
+ if (IS_ERR(ptr))
+ return PTR_ERR(ptr);
+ return 0;
+}
+
+int bpf_prog_load(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd)
+{
+ struct bpf_prog_load_attr attr;
+
+ memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+ attr.file = file;
+ attr.prog_type = type;
+ attr.expected_attach_type = 0;
+
+ return bpf_prog_load_xattr(&attr, pobj, prog_fd);
+}
+
+int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+ struct bpf_object **pobj, int *prog_fd)
+{
+ struct bpf_object_open_attr open_attr = {};
+ struct bpf_program *prog, *first_prog = NULL;
+ enum bpf_attach_type expected_attach_type;
+ enum bpf_prog_type prog_type;
+ struct bpf_object *obj;
+ struct bpf_map *map;
+ int err;
+
+ if (!attr)
+ return -EINVAL;
+ if (!attr->file)
+ return -EINVAL;
+
+ open_attr.file = attr->file;
+ open_attr.prog_type = attr->prog_type;
+
+ obj = bpf_object__open_xattr(&open_attr);
+ if (IS_ERR_OR_NULL(obj))
+ return -ENOENT;
+
+ bpf_object__for_each_program(prog, obj) {
+ /*
+ * If type is not specified, try to guess it based on
+ * section name.
+ */
+ prog_type = attr->prog_type;
+ prog->prog_ifindex = attr->ifindex;
+ expected_attach_type = attr->expected_attach_type;
+ if (prog_type == BPF_PROG_TYPE_UNSPEC) {
+ err = bpf_program__identify_section(prog, &prog_type,
+ &expected_attach_type);
+ if (err < 0) {
+ pr_warning("failed to guess program type based on section name %s\n",
+ prog->section_name);
+ bpf_object__close(obj);
+ return -EINVAL;
+ }
+ }
+
+ bpf_program__set_type(prog, prog_type);
+ bpf_program__set_expected_attach_type(prog,
+ expected_attach_type);
+
+ if (!bpf_program__is_function_storage(prog, obj) && !first_prog)
+ first_prog = prog;
+ }
+
+ bpf_map__for_each(map, obj) {
+ if (!bpf_map__is_offload_neutral(map))
+ map->map_ifindex = attr->ifindex;
+ }
+
+ if (!first_prog) {
+ pr_warning("object file doesn't contain bpf program\n");
+ bpf_object__close(obj);
+ return -ENOENT;
+ }
+
+ err = bpf_object__load(obj);
+ if (err) {
+ bpf_object__close(obj);
+ return -EINVAL;
+ }
+
+ *pobj = obj;
+ *prog_fd = bpf_program__fd(first_prog);
+ return 0;
+}
+
+enum bpf_perf_event_ret
+bpf_perf_event_read_simple(void *mem, unsigned long size,
+ unsigned long page_size, void **buf, size_t *buf_len,
+ bpf_perf_event_print_t fn, void *priv)
+{
+ volatile struct perf_event_mmap_page *header = mem;
+ __u64 data_tail = header->data_tail;
+ __u64 data_head = header->data_head;
+ int ret = LIBBPF_PERF_EVENT_ERROR;
+ void *base, *begin, *end;
+
+ asm volatile("" ::: "memory"); /* in real code it should be smp_rmb() */
+ if (data_head == data_tail)
+ return LIBBPF_PERF_EVENT_CONT;
+
+ base = ((char *)header) + page_size;
+
+ begin = base + data_tail % size;
+ end = base + data_head % size;
+
+ while (begin != end) {
+ struct perf_event_header *ehdr;
+
+ ehdr = begin;
+ if (begin + ehdr->size > base + size) {
+ long len = base + size - begin;
+
+ if (*buf_len < ehdr->size) {
+ free(*buf);
+ *buf = malloc(ehdr->size);
+ if (!*buf) {
+ ret = LIBBPF_PERF_EVENT_ERROR;
+ break;
+ }
+ *buf_len = ehdr->size;
+ }
+
+ memcpy(*buf, begin, len);
+ memcpy(*buf + len, base, ehdr->size - len);
+ ehdr = (void *)*buf;
+ begin = base + ehdr->size - len;
+ } else if (begin + ehdr->size == base + size) {
+ begin = base;
+ } else {
+ begin += ehdr->size;
+ }
+
+ ret = fn(ehdr, priv);
+ if (ret != LIBBPF_PERF_EVENT_CONT)
+ break;
+
+ data_tail += ehdr->size;
+ }
+
+ __sync_synchronize(); /* smp_mb() */
+ header->data_tail = data_tail;
+
+ return ret;
+}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
new file mode 100644
index 000000000..96c55fac5
--- /dev/null
+++ b/tools/lib/bpf/libbpf.h
@@ -0,0 +1,300 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+/*
+ * Common eBPF ELF object loading operations.
+ *
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ */
+#ifndef __BPF_LIBBPF_H
+#define __BPF_LIBBPF_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <sys/types.h> // for size_t
+#include <linux/bpf.h>
+
+enum libbpf_errno {
+ __LIBBPF_ERRNO__START = 4000,
+
+ /* Something wrong in libelf */
+ LIBBPF_ERRNO__LIBELF = __LIBBPF_ERRNO__START,
+ LIBBPF_ERRNO__FORMAT, /* BPF object format invalid */
+ LIBBPF_ERRNO__KVERSION, /* Incorrect or no 'version' section */
+ LIBBPF_ERRNO__ENDIAN, /* Endian mismatch */
+ LIBBPF_ERRNO__INTERNAL, /* Internal error in libbpf */
+ LIBBPF_ERRNO__RELOC, /* Relocation failed */
+ LIBBPF_ERRNO__LOAD, /* Load program failure for unknown reason */
+ LIBBPF_ERRNO__VERIFY, /* Kernel verifier blocks program loading */
+ LIBBPF_ERRNO__PROG2BIG, /* Program too big */
+ LIBBPF_ERRNO__KVER, /* Incorrect kernel version */
+ LIBBPF_ERRNO__PROGTYPE, /* Kernel doesn't support this program type */
+ LIBBPF_ERRNO__WRNGPID, /* Wrong pid in netlink message */
+ LIBBPF_ERRNO__INVSEQ, /* Invalid netlink sequence */
+ __LIBBPF_ERRNO__END,
+};
+
+int libbpf_strerror(int err, char *buf, size_t size);
+
+/*
+ * __printf is defined in include/linux/compiler-gcc.h. However,
+ * it would be better if libbpf.h didn't depend on Linux header files.
+ * So instead of __printf, here we use gcc attribute directly.
+ */
+typedef int (*libbpf_print_fn_t)(const char *, ...)
+ __attribute__((format(printf, 1, 2)));
+
+void libbpf_set_print(libbpf_print_fn_t warn,
+ libbpf_print_fn_t info,
+ libbpf_print_fn_t debug);
+
+/* Hide internal to user */
+struct bpf_object;
+
+struct bpf_object_open_attr {
+ const char *file;
+ enum bpf_prog_type prog_type;
+};
+
+struct bpf_object *bpf_object__open(const char *path);
+struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr);
+struct bpf_object *bpf_object__open_buffer(void *obj_buf,
+ size_t obj_buf_sz,
+ const char *name);
+int bpf_object__pin(struct bpf_object *object, const char *path);
+void bpf_object__close(struct bpf_object *object);
+
+/* Load/unload object into/from kernel */
+int bpf_object__load(struct bpf_object *obj);
+int bpf_object__unload(struct bpf_object *obj);
+const char *bpf_object__name(struct bpf_object *obj);
+unsigned int bpf_object__kversion(struct bpf_object *obj);
+int bpf_object__btf_fd(const struct bpf_object *obj);
+
+struct bpf_program *
+bpf_object__find_program_by_title(struct bpf_object *obj, const char *title);
+
+struct bpf_object *bpf_object__next(struct bpf_object *prev);
+#define bpf_object__for_each_safe(pos, tmp) \
+ for ((pos) = bpf_object__next(NULL), \
+ (tmp) = bpf_object__next(pos); \
+ (pos) != NULL; \
+ (pos) = (tmp), (tmp) = bpf_object__next(tmp))
+
+typedef void (*bpf_object_clear_priv_t)(struct bpf_object *, void *);
+int bpf_object__set_priv(struct bpf_object *obj, void *priv,
+ bpf_object_clear_priv_t clear_priv);
+void *bpf_object__priv(struct bpf_object *prog);
+
+int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+ enum bpf_attach_type *expected_attach_type);
+
+/* Accessors of bpf_program */
+struct bpf_program;
+struct bpf_program *bpf_program__next(struct bpf_program *prog,
+ struct bpf_object *obj);
+
+#define bpf_object__for_each_program(pos, obj) \
+ for ((pos) = bpf_program__next(NULL, (obj)); \
+ (pos) != NULL; \
+ (pos) = bpf_program__next((pos), (obj)))
+
+typedef void (*bpf_program_clear_priv_t)(struct bpf_program *,
+ void *);
+
+int bpf_program__set_priv(struct bpf_program *prog, void *priv,
+ bpf_program_clear_priv_t clear_priv);
+
+void *bpf_program__priv(struct bpf_program *prog);
+void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex);
+
+const char *bpf_program__title(struct bpf_program *prog, bool needs_copy);
+
+int bpf_program__fd(struct bpf_program *prog);
+int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
+ int instance);
+int bpf_program__pin(struct bpf_program *prog, const char *path);
+
+struct bpf_insn;
+
+/*
+ * Libbpf allows callers to adjust BPF programs before being loaded
+ * into kernel. One program in an object file can be transformed into
+ * multiple variants to be attached to different hooks.
+ *
+ * bpf_program_prep_t, bpf_program__set_prep and bpf_program__nth_fd
+ * form an API for this purpose.
+ *
+ * - bpf_program_prep_t:
+ * Defines a 'preprocessor', which is a caller defined function
+ * passed to libbpf through bpf_program__set_prep(), and will be
+ * called before program is loaded. The processor should adjust
+ * the program one time for each instance according to the instance id
+ * passed to it.
+ *
+ * - bpf_program__set_prep:
+ * Attaches a preprocessor to a BPF program. The number of instances
+ * that should be created is also passed through this function.
+ *
+ * - bpf_program__nth_fd:
+ * After the program is loaded, get resulting FD of a given instance
+ * of the BPF program.
+ *
+ * If bpf_program__set_prep() is not used, the program would be loaded
+ * without adjustment during bpf_object__load(). The program has only
+ * one instance. In this case bpf_program__fd(prog) is equal to
+ * bpf_program__nth_fd(prog, 0).
+ */
+
+struct bpf_prog_prep_result {
+ /*
+ * If not NULL, load new instruction array.
+ * If set to NULL, don't load this instance.
+ */
+ struct bpf_insn *new_insn_ptr;
+ int new_insn_cnt;
+
+ /* If not NULL, result FD is written to it. */
+ int *pfd;
+};
+
+/*
+ * Parameters of bpf_program_prep_t:
+ * - prog: The bpf_program being loaded.
+ * - n: Index of instance being generated.
+ * - insns: BPF instructions array.
+ * - insns_cnt:Number of instructions in insns.
+ * - res: Output parameter, result of transformation.
+ *
+ * Return value:
+ * - Zero: pre-processing success.
+ * - Non-zero: pre-processing error, stop loading.
+ */
+typedef int (*bpf_program_prep_t)(struct bpf_program *prog, int n,
+ struct bpf_insn *insns, int insns_cnt,
+ struct bpf_prog_prep_result *res);
+
+int bpf_program__set_prep(struct bpf_program *prog, int nr_instance,
+ bpf_program_prep_t prep);
+
+int bpf_program__nth_fd(struct bpf_program *prog, int n);
+
+/*
+ * Adjust type of BPF program. Default is kprobe.
+ */
+int bpf_program__set_socket_filter(struct bpf_program *prog);
+int bpf_program__set_tracepoint(struct bpf_program *prog);
+int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
+int bpf_program__set_kprobe(struct bpf_program *prog);
+int bpf_program__set_sched_cls(struct bpf_program *prog);
+int bpf_program__set_sched_act(struct bpf_program *prog);
+int bpf_program__set_xdp(struct bpf_program *prog);
+int bpf_program__set_perf_event(struct bpf_program *prog);
+void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
+void bpf_program__set_expected_attach_type(struct bpf_program *prog,
+ enum bpf_attach_type type);
+
+bool bpf_program__is_socket_filter(struct bpf_program *prog);
+bool bpf_program__is_tracepoint(struct bpf_program *prog);
+bool bpf_program__is_raw_tracepoint(struct bpf_program *prog);
+bool bpf_program__is_kprobe(struct bpf_program *prog);
+bool bpf_program__is_sched_cls(struct bpf_program *prog);
+bool bpf_program__is_sched_act(struct bpf_program *prog);
+bool bpf_program__is_xdp(struct bpf_program *prog);
+bool bpf_program__is_perf_event(struct bpf_program *prog);
+
+/*
+ * No need for __attribute__((packed)), all members of 'bpf_map_def'
+ * are all aligned. In addition, using __attribute__((packed))
+ * would trigger a -Wpacked warning message, and lead to an error
+ * if -Werror is set.
+ */
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+ unsigned int map_flags;
+};
+
+/*
+ * The 'struct bpf_map' in include/linux/bpf.h is internal to the kernel,
+ * so no need to worry about a name clash.
+ */
+struct bpf_map;
+struct bpf_map *
+bpf_object__find_map_by_name(struct bpf_object *obj, const char *name);
+
+/*
+ * Get bpf_map through the offset of corresponding struct bpf_map_def
+ * in the BPF object file.
+ */
+struct bpf_map *
+bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);
+
+struct bpf_map *
+bpf_map__next(struct bpf_map *map, struct bpf_object *obj);
+#define bpf_map__for_each(pos, obj) \
+ for ((pos) = bpf_map__next(NULL, (obj)); \
+ (pos) != NULL; \
+ (pos) = bpf_map__next((pos), (obj)))
+
+int bpf_map__fd(struct bpf_map *map);
+const struct bpf_map_def *bpf_map__def(struct bpf_map *map);
+const char *bpf_map__name(struct bpf_map *map);
+__u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
+__u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
+
+typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
+int bpf_map__set_priv(struct bpf_map *map, void *priv,
+ bpf_map_clear_priv_t clear_priv);
+void *bpf_map__priv(struct bpf_map *map);
+int bpf_map__reuse_fd(struct bpf_map *map, int fd);
+bool bpf_map__is_offload_neutral(struct bpf_map *map);
+void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
+int bpf_map__pin(struct bpf_map *map, const char *path);
+
+long libbpf_get_error(const void *ptr);
+
+struct bpf_prog_load_attr {
+ const char *file;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ int ifindex;
+};
+
+int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
+ struct bpf_object **pobj, int *prog_fd);
+int bpf_prog_load(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd);
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+
+enum bpf_perf_event_ret {
+ LIBBPF_PERF_EVENT_DONE = 0,
+ LIBBPF_PERF_EVENT_ERROR = -1,
+ LIBBPF_PERF_EVENT_CONT = -2,
+};
+
+typedef enum bpf_perf_event_ret (*bpf_perf_event_print_t)(void *event,
+ void *priv);
+int bpf_perf_event_read_simple(void *mem, unsigned long size,
+ unsigned long page_size,
+ void **buf, size_t *buf_len,
+ bpf_perf_event_print_t fn, void *priv);
+#endif
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
new file mode 100644
index 000000000..d2d17226d
--- /dev/null
+++ b/tools/lib/bpf/libbpf_errno.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+/*
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ * Copyright (C) 2017 Nicira, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ */
+
+#undef _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+
+#include "libbpf.h"
+
+#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START)
+#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c)
+#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
+
+static const char *libbpf_strerror_table[NR_ERRNO] = {
+ [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf",
+ [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid",
+ [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost",
+ [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch",
+ [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf",
+ [ERRCODE_OFFSET(RELOC)] = "Relocation failed",
+ [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading",
+ [ERRCODE_OFFSET(PROG2BIG)] = "Program too big",
+ [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version",
+ [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type",
+ [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message",
+ [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence",
+};
+
+int libbpf_strerror(int err, char *buf, size_t size)
+{
+ if (!buf || !size)
+ return -1;
+
+ err = err > 0 ? err : -err;
+
+ if (err < __LIBBPF_ERRNO__START) {
+ int ret;
+
+ ret = strerror_r(err, buf, size);
+ buf[size - 1] = '\0';
+ return ret;
+ }
+
+ if (err < __LIBBPF_ERRNO__END) {
+ const char *msg;
+
+ msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
+ snprintf(buf, size, "%s", msg);
+ buf[size - 1] = '\0';
+ return 0;
+ }
+
+ snprintf(buf, size, "Unknown libbpf error %d", err);
+ buf[size - 1] = '\0';
+ return -1;
+}
diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
new file mode 100644
index 000000000..471943427
--- /dev/null
+++ b/tools/lib/bpf/nlattr.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: LGPL-2.1
+
+/*
+ * NETLINK Netlink attributes
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation version 2.1
+ * of the License.
+ *
+ * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
+ */
+
+#include <errno.h>
+#include "nlattr.h"
+#include <linux/rtnetlink.h>
+#include <string.h>
+#include <stdio.h>
+
+static uint16_t nla_attr_minlen[NLA_TYPE_MAX+1] = {
+ [NLA_U8] = sizeof(uint8_t),
+ [NLA_U16] = sizeof(uint16_t),
+ [NLA_U32] = sizeof(uint32_t),
+ [NLA_U64] = sizeof(uint64_t),
+ [NLA_STRING] = 1,
+ [NLA_FLAG] = 0,
+};
+
+static int nla_len(const struct nlattr *nla)
+{
+ return nla->nla_len - NLA_HDRLEN;
+}
+
+static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
+{
+ int totlen = NLA_ALIGN(nla->nla_len);
+
+ *remaining -= totlen;
+ return (struct nlattr *) ((char *) nla + totlen);
+}
+
+static int nla_ok(const struct nlattr *nla, int remaining)
+{
+ return remaining >= sizeof(*nla) &&
+ nla->nla_len >= sizeof(*nla) &&
+ nla->nla_len <= remaining;
+}
+
+static void *nla_data(const struct nlattr *nla)
+{
+ return (char *) nla + NLA_HDRLEN;
+}
+
+static int nla_type(const struct nlattr *nla)
+{
+ return nla->nla_type & NLA_TYPE_MASK;
+}
+
+static int validate_nla(struct nlattr *nla, int maxtype,
+ struct nla_policy *policy)
+{
+ struct nla_policy *pt;
+ unsigned int minlen = 0;
+ int type = nla_type(nla);
+
+ if (type < 0 || type > maxtype)
+ return 0;
+
+ pt = &policy[type];
+
+ if (pt->type > NLA_TYPE_MAX)
+ return 0;
+
+ if (pt->minlen)
+ minlen = pt->minlen;
+ else if (pt->type != NLA_UNSPEC)
+ minlen = nla_attr_minlen[pt->type];
+
+ if (nla_len(nla) < minlen)
+ return -1;
+
+ if (pt->maxlen && nla_len(nla) > pt->maxlen)
+ return -1;
+
+ if (pt->type == NLA_STRING) {
+ char *data = nla_data(nla);
+ if (data[nla_len(nla) - 1] != '\0')
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int nlmsg_len(const struct nlmsghdr *nlh)
+{
+ return nlh->nlmsg_len - NLMSG_HDRLEN;
+}
+
+/**
+ * Create attribute index based on a stream of attributes.
+ * @arg tb Index array to be filled (maxtype+1 elements).
+ * @arg maxtype Maximum attribute type expected and accepted.
+ * @arg head Head of attribute stream.
+ * @arg len Length of attribute stream.
+ * @arg policy Attribute validation policy.
+ *
+ * Iterates over the stream of attributes and stores a pointer to each
+ * attribute in the index array using the attribute type as index to
+ * the array. Attribute with a type greater than the maximum type
+ * specified will be silently ignored in order to maintain backwards
+ * compatibility. If \a policy is not NULL, the attribute will be
+ * validated using the specified policy.
+ *
+ * @see nla_validate
+ * @return 0 on success or a negative error code.
+ */
+static int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len,
+ struct nla_policy *policy)
+{
+ struct nlattr *nla;
+ int rem, err;
+
+ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+
+ nla_for_each_attr(nla, head, len, rem) {
+ int type = nla_type(nla);
+
+ if (type > maxtype)
+ continue;
+
+ if (policy) {
+ err = validate_nla(nla, maxtype, policy);
+ if (err < 0)
+ goto errout;
+ }
+
+ if (tb[type])
+ fprintf(stderr, "Attribute of type %#x found multiple times in message, "
+ "previous attribute is being ignored.\n", type);
+
+ tb[type] = nla;
+ }
+
+ err = 0;
+errout:
+ return err;
+}
+
+/* dump netlink extended ack error message */
+int nla_dump_errormsg(struct nlmsghdr *nlh)
+{
+ struct nla_policy extack_policy[NLMSGERR_ATTR_MAX + 1] = {
+ [NLMSGERR_ATTR_MSG] = { .type = NLA_STRING },
+ [NLMSGERR_ATTR_OFFS] = { .type = NLA_U32 },
+ };
+ struct nlattr *tb[NLMSGERR_ATTR_MAX + 1], *attr;
+ struct nlmsgerr *err;
+ char *errmsg = NULL;
+ int hlen, alen;
+
+ /* no TLVs, nothing to do here */
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return 0;
+
+ err = (struct nlmsgerr *)NLMSG_DATA(nlh);
+ hlen = sizeof(*err);
+
+ /* if NLM_F_CAPPED is set then the inner err msg was capped */
+ if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ hlen += nlmsg_len(&err->msg);
+
+ attr = (struct nlattr *) ((void *) err + hlen);
+ alen = nlh->nlmsg_len - hlen;
+
+ if (nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, extack_policy) != 0) {
+ fprintf(stderr,
+ "Failed to parse extended error attributes\n");
+ return 0;
+ }
+
+ if (tb[NLMSGERR_ATTR_MSG])
+ errmsg = (char *) nla_data(tb[NLMSGERR_ATTR_MSG]);
+
+ fprintf(stderr, "Kernel error message: %s\n", errmsg);
+
+ return 0;
+}
diff --git a/tools/lib/bpf/nlattr.h b/tools/lib/bpf/nlattr.h
new file mode 100644
index 000000000..931a71f68
--- /dev/null
+++ b/tools/lib/bpf/nlattr.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+
+/*
+ * NETLINK Netlink attributes
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation version 2.1
+ * of the License.
+ *
+ * Copyright (c) 2003-2013 Thomas Graf <tgraf@suug.ch>
+ */
+
+#ifndef __NLATTR_H
+#define __NLATTR_H
+
+#include <stdint.h>
+#include <linux/netlink.h>
+/* avoid multiple definition of netlink features */
+#define __LINUX_NETLINK_H
+
+/**
+ * Standard attribute types to specify validation policy
+ */
+enum {
+ NLA_UNSPEC, /**< Unspecified type, binary data chunk */
+ NLA_U8, /**< 8 bit integer */
+ NLA_U16, /**< 16 bit integer */
+ NLA_U32, /**< 32 bit integer */
+ NLA_U64, /**< 64 bit integer */
+ NLA_STRING, /**< NUL terminated character string */
+ NLA_FLAG, /**< Flag */
+ NLA_MSECS, /**< Micro seconds (64bit) */
+ NLA_NESTED, /**< Nested attributes */
+ __NLA_TYPE_MAX,
+};
+
+#define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1)
+
+/**
+ * @ingroup attr
+ * Attribute validation policy.
+ *
+ * See section @core_doc{core_attr_parse,Attribute Parsing} for more details.
+ */
+struct nla_policy {
+ /** Type of attribute or NLA_UNSPEC */
+ uint16_t type;
+
+ /** Minimal length of payload required */
+ uint16_t minlen;
+
+ /** Maximal length of payload allowed */
+ uint16_t maxlen;
+};
+
+/**
+ * @ingroup attr
+ * Iterate over a stream of attributes
+ * @arg pos loop counter, set to current attribute
+ * @arg head head of attribute stream
+ * @arg len length of attribute stream
+ * @arg rem initialized to len, holds bytes currently remaining in stream
+ */
+#define nla_for_each_attr(pos, head, len, rem) \
+ for (pos = head, rem = len; \
+ nla_ok(pos, rem); \
+ pos = nla_next(pos, &(rem)))
+
+int nla_dump_errormsg(struct nlmsghdr *nlh);
+
+#endif /* __NLATTR_H */
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
new file mode 100644
index 000000000..b8798114a
--- /dev/null
+++ b/tools/lib/bpf/str_error.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: LGPL-2.1
+#undef _GNU_SOURCE
+#include <string.h>
+#include <stdio.h>
+#include "str_error.h"
+
+/*
+ * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl
+ * libc, while checking strerror_r() return to avoid having to check this in
+ * all places calling it.
+ */
+char *str_error(int err, char *dst, int len)
+{
+ int ret = strerror_r(err, dst, len);
+ if (ret)
+ snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
+ return dst;
+}
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
new file mode 100644
index 000000000..355b1db57
--- /dev/null
+++ b/tools/lib/bpf/str_error.h
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
+#ifndef BPF_STR_ERROR
+#define BPF_STR_ERROR
+
+char *str_error(int err, char *dst, int len);
+#endif // BPF_STR_ERROR
diff --git a/tools/lib/find_bit.c b/tools/lib/find_bit.c
new file mode 100644
index 000000000..a88bd5070
--- /dev/null
+++ b/tools/lib/find_bit.c
@@ -0,0 +1,128 @@
+/* bit search implementation
+ *
+ * Copied from lib/find_bit.c to tools/lib/find_bit.c
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
+ * size and improve performance, 2015.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <linux/bitmap.h>
+#include <linux/kernel.h>
+
+#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
+ !defined(find_next_and_bit)
+
+/*
+ * This is a common helper function for find_next_bit, find_next_zero_bit, and
+ * find_next_and_bit. The differences are:
+ * - The "invert" argument, which is XORed with each fetched word before
+ * searching it for one bits.
+ * - The optional "addr2", which is anded with "addr1" if present.
+ */
+static inline unsigned long _find_next_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long nbits,
+ unsigned long start, unsigned long invert)
+{
+ unsigned long tmp;
+
+ if (unlikely(start >= nbits))
+ return nbits;
+
+ tmp = addr1[start / BITS_PER_LONG];
+ if (addr2)
+ tmp &= addr2[start / BITS_PER_LONG];
+ tmp ^= invert;
+
+ /* Handle 1st word. */
+ tmp &= BITMAP_FIRST_WORD_MASK(start);
+ start = round_down(start, BITS_PER_LONG);
+
+ while (!tmp) {
+ start += BITS_PER_LONG;
+ if (start >= nbits)
+ return nbits;
+
+ tmp = addr1[start / BITS_PER_LONG];
+ if (addr2)
+ tmp &= addr2[start / BITS_PER_LONG];
+ tmp ^= invert;
+ }
+
+ return min(start + __ffs(tmp), nbits);
+}
+#endif
+
+#ifndef find_next_bit
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ return _find_next_bit(addr, NULL, size, offset, 0UL);
+}
+#endif
+
+#ifndef find_first_bit
+/*
+ * Find the first set bit in a memory region.
+ */
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+ unsigned long idx;
+
+ for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+ if (addr[idx])
+ return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size);
+ }
+
+ return size;
+}
+#endif
+
+#ifndef find_first_zero_bit
+/*
+ * Find the first cleared bit in a memory region.
+ */
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+ unsigned long idx;
+
+ for (idx = 0; idx * BITS_PER_LONG < size; idx++) {
+ if (addr[idx] != ~0UL)
+ return min(idx * BITS_PER_LONG + ffz(addr[idx]), size);
+ }
+
+ return size;
+}
+#endif
+
+#ifndef find_next_zero_bit
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ return _find_next_bit(addr, NULL, size, offset, ~0UL);
+}
+#endif
+
+#ifndef find_next_and_bit
+unsigned long find_next_and_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long size,
+ unsigned long offset)
+{
+ return _find_next_bit(addr1, addr2, size, offset, 0UL);
+}
+#endif
diff --git a/tools/lib/hweight.c b/tools/lib/hweight.c
new file mode 100644
index 000000000..a16ebf515
--- /dev/null
+++ b/tools/lib/hweight.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bitops.h>
+#include <asm/types.h>
+
+/**
+ * hweightN - returns the hamming weight of a N-bit word
+ * @x: the word to weigh
+ *
+ * The Hamming Weight of a number is the total number of bits set in it.
+ */
+
+unsigned int __sw_hweight32(unsigned int w)
+{
+#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
+ w -= (w >> 1) & 0x55555555;
+ w = (w & 0x33333333) + ((w >> 2) & 0x33333333);
+ w = (w + (w >> 4)) & 0x0f0f0f0f;
+ return (w * 0x01010101) >> 24;
+#else
+ unsigned int res = w - ((w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res + (res >> 4)) & 0x0F0F0F0F;
+ res = res + (res >> 8);
+ return (res + (res >> 16)) & 0x000000FF;
+#endif
+}
+
+unsigned int __sw_hweight16(unsigned int w)
+{
+ unsigned int res = w - ((w >> 1) & 0x5555);
+ res = (res & 0x3333) + ((res >> 2) & 0x3333);
+ res = (res + (res >> 4)) & 0x0F0F;
+ return (res + (res >> 8)) & 0x00FF;
+}
+
+unsigned int __sw_hweight8(unsigned int w)
+{
+ unsigned int res = w - ((w >> 1) & 0x55);
+ res = (res & 0x33) + ((res >> 2) & 0x33);
+ return (res + (res >> 4)) & 0x0F;
+}
+
+unsigned long __sw_hweight64(__u64 w)
+{
+#if BITS_PER_LONG == 32
+ return __sw_hweight32((unsigned int)(w >> 32)) +
+ __sw_hweight32((unsigned int)w);
+#elif BITS_PER_LONG == 64
+#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
+ w -= (w >> 1) & 0x5555555555555555ul;
+ w = (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul);
+ w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful;
+ return (w * 0x0101010101010101ul) >> 56;
+#else
+ __u64 res = w - ((w >> 1) & 0x5555555555555555ul);
+ res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul);
+ res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful;
+ res = res + (res >> 8);
+ res = res + (res >> 16);
+ return (res + (res >> 32)) & 0x00000000000000FFul;
+#endif
+#endif
+}
diff --git a/tools/lib/lockdep/.gitignore b/tools/lib/lockdep/.gitignore
new file mode 100644
index 000000000..cc0e7a9f9
--- /dev/null
+++ b/tools/lib/lockdep/.gitignore
@@ -0,0 +1 @@
+liblockdep.so.*
diff --git a/tools/lib/lockdep/Build b/tools/lib/lockdep/Build
new file mode 100644
index 000000000..6f667355b
--- /dev/null
+++ b/tools/lib/lockdep/Build
@@ -0,0 +1 @@
+liblockdep-y += common.o lockdep.o preload.o rbtree.o
diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile
new file mode 100644
index 000000000..9dafb8cb7
--- /dev/null
+++ b/tools/lib/lockdep/Makefile
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: GPL-2.0
+# file format version
+FILE_VERSION = 1
+
+LIBLOCKDEP_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion)
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)
+
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+prefix ?= /usr/local
+libdir_relative = lib
+libdir = $(prefix)/$(libdir_relative)
+bindir_relative = bin
+bindir = $(prefix)/$(bindir_relative)
+
+export DESTDIR DESTDIR_SQ INSTALL
+
+MAKEFLAGS += --no-print-directory
+
+include ../../scripts/Makefile.include
+
+# copy a bit from Linux kbuild
+
+ifeq ("$(origin V)", "command line")
+ VERBOSE = $(V)
+endif
+ifndef VERBOSE
+ VERBOSE = 0
+endif
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+bindir_SQ = $(subst ','\'',$(bindir))
+
+LIB_IN := $(OUTPUT)liblockdep-in.o
+
+BIN_FILE = lockdep
+LIB_FILE = $(OUTPUT)liblockdep.a $(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION)
+
+CONFIG_INCLUDES =
+CONFIG_LIBS =
+CONFIG_FLAGS =
+
+OBJ = $@
+N =
+
+export Q VERBOSE
+
+INCLUDES = -I. -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES)
+
+# Set compile option CFLAGS if not set elsewhere
+CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g
+CFLAGS += -fPIC
+CFLAGS += -Wall
+
+override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
+
+ifeq ($(VERBOSE),1)
+ Q =
+ print_shared_lib_compile =
+ print_install =
+else
+ Q = @
+ print_shared_lib_compile = echo ' LD '$(OBJ);
+ print_static_lib_build = echo ' LD '$(OBJ);
+ print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2';
+endif
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
+
+do_compile_shared_library = \
+ ($(print_shared_lib_compile) \
+ $(CC) $(LDFLAGS) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='$(@F)';$(shell ln -sf $(@F) $(@D)/liblockdep.so))
+
+do_build_static_lib = \
+ ($(print_static_lib_build) \
+ $(RM) $@; $(AR) rcs $@ $^)
+
+CMD_TARGETS = $(LIB_FILE)
+
+TARGETS = $(CMD_TARGETS)
+
+
+all: fixdep all_cmd
+
+all_cmd: $(CMD_TARGETS)
+
+$(LIB_IN): force
+ $(Q)$(MAKE) $(build)=liblockdep
+
+$(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN)
+ $(Q)$(do_compile_shared_library)
+
+$(OUTPUT)liblockdep.a: $(LIB_IN)
+ $(Q)$(do_build_static_lib)
+
+tags: force
+ $(RM) tags
+ find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
+ --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
+
+TAGS: force
+ $(RM) TAGS
+ find . -name '*.[ch]' | xargs etags \
+ --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
+
+define do_install
+ $(print_install) \
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) $1 '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: all_cmd
+ $(Q)$(call do_install,$(LIB_FILE),$(libdir_SQ))
+ $(Q)$(call do_install,$(BIN_FILE),$(bindir_SQ))
+
+install: install_lib
+
+clean:
+ $(RM) $(OUTPUT)*.o *~ $(TARGETS) $(OUTPUT)*.a $(OUTPUT)*liblockdep*.so* $(VERSION_FILES) $(OUTPUT).*.d $(OUTPUT).*.cmd
+ $(RM) tags TAGS
+
+PHONY += force
+force:
+
+# Declare the contents of the .PHONY variable as phony. We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
diff --git a/tools/lib/lockdep/common.c b/tools/lib/lockdep/common.c
new file mode 100644
index 000000000..5c3b58cce
--- /dev/null
+++ b/tools/lib/lockdep/common.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/compiler.h>
+#include <linux/lockdep.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static __thread struct task_struct current_obj;
+
+/* lockdep wants these */
+bool debug_locks = true;
+bool debug_locks_silent;
+
+__attribute__((destructor)) static void liblockdep_exit(void)
+{
+ debug_check_no_locks_held();
+}
+
+struct task_struct *__curr(void)
+{
+ if (current_obj.pid == 0) {
+ /* Makes lockdep output pretty */
+ prctl(PR_GET_NAME, current_obj.comm);
+ current_obj.pid = syscall(__NR_gettid);
+ }
+
+ return &current_obj;
+}
diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h
new file mode 100644
index 000000000..8862da809
--- /dev/null
+++ b/tools/lib/lockdep/include/liblockdep/common.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_COMMON_H
+#define _LIBLOCKDEP_COMMON_H
+
+#include <pthread.h>
+
+#define NR_LOCKDEP_CACHING_CLASSES 2
+#define MAX_LOCKDEP_SUBCLASSES 8UL
+
+#ifndef CALLER_ADDR0
+#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
+#endif
+
+#ifndef _RET_IP_
+#define _RET_IP_ CALLER_ADDR0
+#endif
+
+#ifndef _THIS_IP_
+#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
+#endif
+
+struct lockdep_subclass_key {
+ char __one_byte;
+};
+
+struct lock_class_key {
+ struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
+};
+
+struct lockdep_map {
+ struct lock_class_key *key;
+ struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
+ const char *name;
+#ifdef CONFIG_LOCK_STAT
+ int cpu;
+ unsigned long ip;
+#endif
+};
+
+void lockdep_init_map(struct lockdep_map *lock, const char *name,
+ struct lock_class_key *key, int subclass);
+void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
+ int trylock, int read, int check,
+ struct lockdep_map *nest_lock, unsigned long ip);
+void lock_release(struct lockdep_map *lock, int nested,
+ unsigned long ip);
+extern void debug_check_no_locks_freed(const void *from, unsigned long len);
+
+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
+ { .name = (_name), .key = (void *)(_key), }
+
+#endif
diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h b/tools/lib/lockdep/include/liblockdep/mutex.h
new file mode 100644
index 000000000..a80ac39f9
--- /dev/null
+++ b/tools/lib/lockdep/include/liblockdep/mutex.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_MUTEX_H
+#define _LIBLOCKDEP_MUTEX_H
+
+#include <pthread.h>
+#include "common.h"
+
+struct liblockdep_pthread_mutex {
+ pthread_mutex_t mutex;
+ struct lockdep_map dep_map;
+};
+
+typedef struct liblockdep_pthread_mutex liblockdep_pthread_mutex_t;
+
+#define LIBLOCKDEP_PTHREAD_MUTEX_INITIALIZER(mtx) \
+ (const struct liblockdep_pthread_mutex) { \
+ .mutex = PTHREAD_MUTEX_INITIALIZER, \
+ .dep_map = STATIC_LOCKDEP_MAP_INIT(#mtx, &((&(mtx))->dep_map)), \
+}
+
+static inline int __mutex_init(liblockdep_pthread_mutex_t *lock,
+ const char *name,
+ struct lock_class_key *key,
+ const pthread_mutexattr_t *__mutexattr)
+{
+ lockdep_init_map(&lock->dep_map, name, key, 0);
+ return pthread_mutex_init(&lock->mutex, __mutexattr);
+}
+
+#define liblockdep_pthread_mutex_init(mutex, mutexattr) \
+({ \
+ static struct lock_class_key __key; \
+ \
+ __mutex_init((mutex), #mutex, &__key, (mutexattr)); \
+})
+
+static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock)
+{
+ lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+ return pthread_mutex_lock(&lock->mutex);
+}
+
+static inline int liblockdep_pthread_mutex_unlock(liblockdep_pthread_mutex_t *lock)
+{
+ lock_release(&lock->dep_map, 0, (unsigned long)_RET_IP_);
+ return pthread_mutex_unlock(&lock->mutex);
+}
+
+static inline int liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *lock)
+{
+ lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
+ return pthread_mutex_trylock(&lock->mutex) == 0 ? 1 : 0;
+}
+
+static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t *lock)
+{
+ return pthread_mutex_destroy(&lock->mutex);
+}
+
+#ifdef __USE_LIBLOCKDEP
+
+#define pthread_mutex_t liblockdep_pthread_mutex_t
+#define pthread_mutex_init liblockdep_pthread_mutex_init
+#define pthread_mutex_lock liblockdep_pthread_mutex_lock
+#define pthread_mutex_unlock liblockdep_pthread_mutex_unlock
+#define pthread_mutex_trylock liblockdep_pthread_mutex_trylock
+#define pthread_mutex_destroy liblockdep_pthread_mutex_destroy
+
+#endif
+
+#endif
diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h b/tools/lib/lockdep/include/liblockdep/rwlock.h
new file mode 100644
index 000000000..a96c3bf0f
--- /dev/null
+++ b/tools/lib/lockdep/include/liblockdep/rwlock.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_RWLOCK_H
+#define _LIBLOCKDEP_RWLOCK_H
+
+#include <pthread.h>
+#include "common.h"
+
+struct liblockdep_pthread_rwlock {
+ pthread_rwlock_t rwlock;
+ struct lockdep_map dep_map;
+};
+
+typedef struct liblockdep_pthread_rwlock liblockdep_pthread_rwlock_t;
+
+#define LIBLOCKDEP_PTHREAD_RWLOCK_INITIALIZER(rwl) \
+ (struct liblockdep_pthread_rwlock) { \
+ .rwlock = PTHREAD_RWLOCK_INITIALIZER, \
+ .dep_map = STATIC_LOCKDEP_MAP_INIT(#rwl, &((&(rwl))->dep_map)), \
+}
+
+static inline int __rwlock_init(liblockdep_pthread_rwlock_t *lock,
+ const char *name,
+ struct lock_class_key *key,
+ const pthread_rwlockattr_t *attr)
+{
+ lockdep_init_map(&lock->dep_map, name, key, 0);
+
+ return pthread_rwlock_init(&lock->rwlock, attr);
+}
+
+#define liblockdep_pthread_rwlock_init(lock, attr) \
+({ \
+ static struct lock_class_key __key; \
+ \
+ __rwlock_init((lock), #lock, &__key, (attr)); \
+})
+
+static inline int liblockdep_pthread_rwlock_rdlock(liblockdep_pthread_rwlock_t *lock)
+{
+ lock_acquire(&lock->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_);
+ return pthread_rwlock_rdlock(&lock->rwlock);
+
+}
+
+static inline int liblockdep_pthread_rwlock_unlock(liblockdep_pthread_rwlock_t *lock)
+{
+ lock_release(&lock->dep_map, 0, (unsigned long)_RET_IP_);
+ return pthread_rwlock_unlock(&lock->rwlock);
+}
+
+static inline int liblockdep_pthread_rwlock_wrlock(liblockdep_pthread_rwlock_t *lock)
+{
+ lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+ return pthread_rwlock_wrlock(&lock->rwlock);
+}
+
+static inline int liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_t *lock)
+{
+ lock_acquire(&lock->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_);
+ return pthread_rwlock_tryrdlock(&lock->rwlock) == 0 ? 1 : 0;
+}
+
+static inline int liblockdep_pthread_rwlock_trywlock(liblockdep_pthread_rwlock_t *lock)
+{
+ lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
+ return pthread_rwlock_trywlock(&lock->rwlock) == 0 ? 1 : 0;
+}
+
+static inline int liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock)
+{
+ return pthread_rwlock_destroy(&lock->rwlock);
+}
+
+#ifdef __USE_LIBLOCKDEP
+
+#define pthread_rwlock_t liblockdep_pthread_rwlock_t
+#define pthread_rwlock_init liblockdep_pthread_rwlock_init
+#define pthread_rwlock_rdlock liblockdep_pthread_rwlock_rdlock
+#define pthread_rwlock_unlock liblockdep_pthread_rwlock_unlock
+#define pthread_rwlock_wrlock liblockdep_pthread_rwlock_wrlock
+#define pthread_rwlock_tryrdlock liblockdep_pthread_rwlock_tryrdlock
+#define pthread_rwlock_trywlock liblockdep_pthread_rwlock_trywlock
+#define pthread_rwlock_destroy liblockdep_rwlock_destroy
+
+#endif
+
+#endif
diff --git a/tools/lib/lockdep/lockdep b/tools/lib/lockdep/lockdep
new file mode 100755
index 000000000..49af9fe19
--- /dev/null
+++ b/tools/lib/lockdep/lockdep
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+LD_PRELOAD="./liblockdep.so $LD_PRELOAD" "$@"
diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c
new file mode 100644
index 000000000..6002fcf2f
--- /dev/null
+++ b/tools/lib/lockdep/lockdep.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/lockdep.h>
+#include <stdlib.h>
+
+/* Trivial API wrappers, we don't (yet) have RCU in user-space: */
+#define hlist_for_each_entry_rcu hlist_for_each_entry
+#define hlist_add_head_rcu hlist_add_head
+#define hlist_del_rcu hlist_del
+#define list_for_each_entry_rcu list_for_each_entry
+#define list_add_tail_rcu list_add_tail
+
+u32 prandom_u32(void)
+{
+ /* Used only by lock_pin_lock() which is dead code */
+ abort();
+}
+
+static struct new_utsname *init_utsname(void)
+{
+ static struct new_utsname n = (struct new_utsname) {
+ .release = "liblockdep",
+ .version = LIBLOCKDEP_VERSION,
+ };
+
+ return &n;
+}
+
+#include "../../../kernel/locking/lockdep.c"
diff --git a/tools/lib/lockdep/lockdep_internals.h b/tools/lib/lockdep/lockdep_internals.h
new file mode 100644
index 000000000..29d0c954c
--- /dev/null
+++ b/tools/lib/lockdep/lockdep_internals.h
@@ -0,0 +1 @@
+#include "../../../kernel/locking/lockdep_internals.h"
diff --git a/tools/lib/lockdep/lockdep_states.h b/tools/lib/lockdep/lockdep_states.h
new file mode 100644
index 000000000..248d235ef
--- /dev/null
+++ b/tools/lib/lockdep/lockdep_states.h
@@ -0,0 +1 @@
+#include "../../../kernel/locking/lockdep_states.h"
diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
new file mode 100644
index 000000000..76245d161
--- /dev/null
+++ b/tools/lib/lockdep/preload.c
@@ -0,0 +1,443 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <stdio.h>
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <sysexits.h>
+#include <unistd.h>
+#include "include/liblockdep/mutex.h"
+#include "../../include/linux/rbtree.h"
+
+/**
+ * struct lock_lookup - liblockdep's view of a single unique lock
+ * @orig: pointer to the original pthread lock, used for lookups
+ * @dep_map: lockdep's dep_map structure
+ * @key: lockdep's key structure
+ * @node: rb-tree node used to store the lock in a global tree
+ * @name: a unique name for the lock
+ */
+struct lock_lookup {
+ void *orig; /* Original pthread lock, used for lookups */
+ struct lockdep_map dep_map; /* Since all locks are dynamic, we need
+ * a dep_map and a key for each lock */
+ /*
+ * Wait, there's no support for key classes? Yup :(
+ * Most big projects wrap the pthread api with their own calls to
+ * be compatible with different locking methods. This means that
+ * "classes" will be brokes since the function that creates all
+ * locks will point to a generic locking function instead of the
+ * actual code that wants to do the locking.
+ */
+ struct lock_class_key key;
+ struct rb_node node;
+#define LIBLOCKDEP_MAX_LOCK_NAME 22
+ char name[LIBLOCKDEP_MAX_LOCK_NAME];
+};
+
+/* This is where we store our locks */
+static struct rb_root locks = RB_ROOT;
+static pthread_rwlock_t locks_rwlock = PTHREAD_RWLOCK_INITIALIZER;
+
+/* pthread mutex API */
+
+#ifdef __GLIBC__
+extern int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr);
+extern int __pthread_mutex_lock(pthread_mutex_t *mutex);
+extern int __pthread_mutex_trylock(pthread_mutex_t *mutex);
+extern int __pthread_mutex_unlock(pthread_mutex_t *mutex);
+extern int __pthread_mutex_destroy(pthread_mutex_t *mutex);
+#else
+#define __pthread_mutex_init NULL
+#define __pthread_mutex_lock NULL
+#define __pthread_mutex_trylock NULL
+#define __pthread_mutex_unlock NULL
+#define __pthread_mutex_destroy NULL
+#endif
+static int (*ll_pthread_mutex_init)(pthread_mutex_t *mutex,
+ const pthread_mutexattr_t *attr) = __pthread_mutex_init;
+static int (*ll_pthread_mutex_lock)(pthread_mutex_t *mutex) = __pthread_mutex_lock;
+static int (*ll_pthread_mutex_trylock)(pthread_mutex_t *mutex) = __pthread_mutex_trylock;
+static int (*ll_pthread_mutex_unlock)(pthread_mutex_t *mutex) = __pthread_mutex_unlock;
+static int (*ll_pthread_mutex_destroy)(pthread_mutex_t *mutex) = __pthread_mutex_destroy;
+
+/* pthread rwlock API */
+
+#ifdef __GLIBC__
+extern int __pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr);
+extern int __pthread_rwlock_destroy(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_wrlock(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_rdlock(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_unlock(pthread_rwlock_t *rwlock);
+#else
+#define __pthread_rwlock_init NULL
+#define __pthread_rwlock_destroy NULL
+#define __pthread_rwlock_wrlock NULL
+#define __pthread_rwlock_trywrlock NULL
+#define __pthread_rwlock_rdlock NULL
+#define __pthread_rwlock_tryrdlock NULL
+#define __pthread_rwlock_unlock NULL
+#endif
+
+static int (*ll_pthread_rwlock_init)(pthread_rwlock_t *rwlock,
+ const pthread_rwlockattr_t *attr) = __pthread_rwlock_init;
+static int (*ll_pthread_rwlock_destroy)(pthread_rwlock_t *rwlock) = __pthread_rwlock_destroy;
+static int (*ll_pthread_rwlock_rdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_rdlock;
+static int (*ll_pthread_rwlock_tryrdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_tryrdlock;
+static int (*ll_pthread_rwlock_trywrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_trywrlock;
+static int (*ll_pthread_rwlock_wrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_wrlock;
+static int (*ll_pthread_rwlock_unlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_unlock;
+
+enum { none, prepare, done, } __init_state;
+static void init_preload(void);
+static void try_init_preload(void)
+{
+ if (__init_state != done)
+ init_preload();
+}
+
+static struct rb_node **__get_lock_node(void *lock, struct rb_node **parent)
+{
+ struct rb_node **node = &locks.rb_node;
+ struct lock_lookup *l;
+
+ *parent = NULL;
+
+ while (*node) {
+ l = rb_entry(*node, struct lock_lookup, node);
+
+ *parent = *node;
+ if (lock < l->orig)
+ node = &l->node.rb_left;
+ else if (lock > l->orig)
+ node = &l->node.rb_right;
+ else
+ return node;
+ }
+
+ return node;
+}
+
+#ifndef LIBLOCKDEP_STATIC_ENTRIES
+#define LIBLOCKDEP_STATIC_ENTRIES 1024
+#endif
+
+static struct lock_lookup __locks[LIBLOCKDEP_STATIC_ENTRIES];
+static int __locks_nr;
+
+static inline bool is_static_lock(struct lock_lookup *lock)
+{
+ return lock >= __locks && lock < __locks + ARRAY_SIZE(__locks);
+}
+
+static struct lock_lookup *alloc_lock(void)
+{
+ if (__init_state != done) {
+ /*
+ * Some programs attempt to initialize and use locks in their
+ * allocation path. This means that a call to malloc() would
+ * result in locks being initialized and locked.
+ *
+ * Why is it an issue for us? dlsym() below will try allocating
+ * to give us the original function. Since this allocation will
+ * result in a locking operations, we have to let pthread deal
+ * with it, but we can't! we don't have the pointer to the
+ * original API since we're inside dlsym() trying to get it
+ */
+
+ int idx = __locks_nr++;
+ if (idx >= ARRAY_SIZE(__locks)) {
+ dprintf(STDERR_FILENO,
+ "LOCKDEP error: insufficient LIBLOCKDEP_STATIC_ENTRIES\n");
+ exit(EX_UNAVAILABLE);
+ }
+ return __locks + idx;
+ }
+
+ return malloc(sizeof(struct lock_lookup));
+}
+
+static inline void free_lock(struct lock_lookup *lock)
+{
+ if (likely(!is_static_lock(lock)))
+ free(lock);
+}
+
+/**
+ * __get_lock - find or create a lock instance
+ * @lock: pointer to a pthread lock function
+ *
+ * Try to find an existing lock in the rbtree using the provided pointer. If
+ * one wasn't found - create it.
+ */
+static struct lock_lookup *__get_lock(void *lock)
+{
+ struct rb_node **node, *parent;
+ struct lock_lookup *l;
+
+ ll_pthread_rwlock_rdlock(&locks_rwlock);
+ node = __get_lock_node(lock, &parent);
+ ll_pthread_rwlock_unlock(&locks_rwlock);
+ if (*node) {
+ return rb_entry(*node, struct lock_lookup, node);
+ }
+
+ /* We didn't find the lock, let's create it */
+ l = alloc_lock();
+ if (l == NULL)
+ return NULL;
+
+ l->orig = lock;
+ /*
+ * Currently the name of the lock is the ptr value of the pthread lock,
+ * while not optimal, it makes debugging a bit easier.
+ *
+ * TODO: Get the real name of the lock using libdwarf
+ */
+ sprintf(l->name, "%p", lock);
+ lockdep_init_map(&l->dep_map, l->name, &l->key, 0);
+
+ ll_pthread_rwlock_wrlock(&locks_rwlock);
+ /* This might have changed since the last time we fetched it */
+ node = __get_lock_node(lock, &parent);
+ rb_link_node(&l->node, parent, node);
+ rb_insert_color(&l->node, &locks);
+ ll_pthread_rwlock_unlock(&locks_rwlock);
+
+ return l;
+}
+
+static void __del_lock(struct lock_lookup *lock)
+{
+ ll_pthread_rwlock_wrlock(&locks_rwlock);
+ rb_erase(&lock->node, &locks);
+ ll_pthread_rwlock_unlock(&locks_rwlock);
+ free_lock(lock);
+}
+
+int pthread_mutex_init(pthread_mutex_t *mutex,
+ const pthread_mutexattr_t *attr)
+{
+ int r;
+
+ /*
+ * We keep trying to init our preload module because there might be
+ * code in init sections that tries to touch locks before we are
+ * initialized, in that case we'll need to manually call preload
+ * to get us going.
+ *
+ * Funny enough, kernel's lockdep had the same issue, and used
+ * (almost) the same solution. See look_up_lock_class() in
+ * kernel/locking/lockdep.c for details.
+ */
+ try_init_preload();
+
+ r = ll_pthread_mutex_init(mutex, attr);
+ if (r == 0)
+ /*
+ * We do a dummy initialization here so that lockdep could
+ * warn us if something fishy is going on - such as
+ * initializing a held lock.
+ */
+ __get_lock(mutex);
+
+ return r;
+}
+
+int pthread_mutex_lock(pthread_mutex_t *mutex)
+{
+ int r;
+
+ try_init_preload();
+
+ lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL,
+ (unsigned long)_RET_IP_);
+ /*
+ * Here's the thing with pthread mutexes: unlike the kernel variant,
+ * they can fail.
+ *
+ * This means that the behaviour here is a bit different from what's
+ * going on in the kernel: there we just tell lockdep that we took the
+ * lock before actually taking it, but here we must deal with the case
+ * that locking failed.
+ *
+ * To do that we'll "release" the lock if locking failed - this way
+ * we'll get lockdep doing the correct checks when we try to take
+ * the lock, and if that fails - we'll be back to the correct
+ * state by releasing it.
+ */
+ r = ll_pthread_mutex_lock(mutex);
+ if (r)
+ lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_mutex_trylock(pthread_mutex_t *mutex)
+{
+ int r;
+
+ try_init_preload();
+
+ lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
+ r = ll_pthread_mutex_trylock(mutex);
+ if (r)
+ lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_mutex_unlock(pthread_mutex_t *mutex)
+{
+ int r;
+
+ try_init_preload();
+
+ lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_);
+ /*
+ * Just like taking a lock, only in reverse!
+ *
+ * If we fail releasing the lock, tell lockdep we're holding it again.
+ */
+ r = ll_pthread_mutex_unlock(mutex);
+ if (r)
+ lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_mutex_destroy(pthread_mutex_t *mutex)
+{
+ try_init_preload();
+
+ /*
+ * Let's see if we're releasing a lock that's held.
+ *
+ * TODO: Hook into free() and add that check there as well.
+ */
+ debug_check_no_locks_freed(mutex, sizeof(*mutex));
+ __del_lock(__get_lock(mutex));
+ return ll_pthread_mutex_destroy(mutex);
+}
+
+/* This is the rwlock part, very similar to what happened with mutex above */
+int pthread_rwlock_init(pthread_rwlock_t *rwlock,
+ const pthread_rwlockattr_t *attr)
+{
+ int r;
+
+ try_init_preload();
+
+ r = ll_pthread_rwlock_init(rwlock, attr);
+ if (r == 0)
+ __get_lock(rwlock);
+
+ return r;
+}
+
+int pthread_rwlock_destroy(pthread_rwlock_t *rwlock)
+{
+ try_init_preload();
+
+ debug_check_no_locks_freed(rwlock, sizeof(*rwlock));
+ __del_lock(__get_lock(rwlock));
+ return ll_pthread_rwlock_destroy(rwlock);
+}
+
+int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock)
+{
+ int r;
+
+ init_preload();
+
+ lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_);
+ r = ll_pthread_rwlock_rdlock(rwlock);
+ if (r)
+ lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock)
+{
+ int r;
+
+ init_preload();
+
+ lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_);
+ r = ll_pthread_rwlock_tryrdlock(rwlock);
+ if (r)
+ lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock)
+{
+ int r;
+
+ init_preload();
+
+ lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
+ r = ll_pthread_rwlock_trywrlock(rwlock);
+ if (r)
+ lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock)
+{
+ int r;
+
+ init_preload();
+
+ lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+ r = ll_pthread_rwlock_wrlock(rwlock);
+ if (r)
+ lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_rwlock_unlock(pthread_rwlock_t *rwlock)
+{
+ int r;
+
+ init_preload();
+
+ lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
+ r = ll_pthread_rwlock_unlock(rwlock);
+ if (r)
+ lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+__attribute__((constructor)) static void init_preload(void)
+{
+ if (__init_state == done)
+ return;
+
+#ifndef __GLIBC__
+ __init_state = prepare;
+
+ ll_pthread_mutex_init = dlsym(RTLD_NEXT, "pthread_mutex_init");
+ ll_pthread_mutex_lock = dlsym(RTLD_NEXT, "pthread_mutex_lock");
+ ll_pthread_mutex_trylock = dlsym(RTLD_NEXT, "pthread_mutex_trylock");
+ ll_pthread_mutex_unlock = dlsym(RTLD_NEXT, "pthread_mutex_unlock");
+ ll_pthread_mutex_destroy = dlsym(RTLD_NEXT, "pthread_mutex_destroy");
+
+ ll_pthread_rwlock_init = dlsym(RTLD_NEXT, "pthread_rwlock_init");
+ ll_pthread_rwlock_destroy = dlsym(RTLD_NEXT, "pthread_rwlock_destroy");
+ ll_pthread_rwlock_rdlock = dlsym(RTLD_NEXT, "pthread_rwlock_rdlock");
+ ll_pthread_rwlock_tryrdlock = dlsym(RTLD_NEXT, "pthread_rwlock_tryrdlock");
+ ll_pthread_rwlock_wrlock = dlsym(RTLD_NEXT, "pthread_rwlock_wrlock");
+ ll_pthread_rwlock_trywrlock = dlsym(RTLD_NEXT, "pthread_rwlock_trywrlock");
+ ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock");
+#endif
+
+ __init_state = done;
+}
diff --git a/tools/lib/lockdep/rbtree.c b/tools/lib/lockdep/rbtree.c
new file mode 100644
index 000000000..297c30457
--- /dev/null
+++ b/tools/lib/lockdep/rbtree.c
@@ -0,0 +1 @@
+#include "../../lib/rbtree.c"
diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
new file mode 100755
index 000000000..2e570a188
--- /dev/null
+++ b/tools/lib/lockdep/run_tests.sh
@@ -0,0 +1,32 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+make &> /dev/null
+
+for i in `ls tests/*.c`; do
+ testname=$(basename "$i" .c)
+ gcc -o tests/$testname -pthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null
+ echo -ne "$testname... "
+ if [ $(timeout 1 ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then
+ echo "PASSED!"
+ else
+ echo "FAILED!"
+ fi
+ if [ -f "tests/$testname" ]; then
+ rm tests/$testname
+ fi
+done
+
+for i in `ls tests/*.c`; do
+ testname=$(basename "$i" .c)
+ gcc -o tests/$testname -pthread -Iinclude $i &> /dev/null
+ echo -ne "(PRELOAD) $testname... "
+ if [ $(timeout 1 ./lockdep ./tests/$testname 2>&1 | wc -l) -gt 0 ]; then
+ echo "PASSED!"
+ else
+ echo "FAILED!"
+ fi
+ if [ -f "tests/$testname" ]; then
+ rm tests/$testname
+ fi
+done
diff --git a/tools/lib/lockdep/tests/AA.c b/tools/lib/lockdep/tests/AA.c
new file mode 100644
index 000000000..63c7ce97b
--- /dev/null
+++ b/tools/lib/lockdep/tests/AA.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+
+int main(void)
+{
+ pthread_mutex_t a;
+
+ pthread_mutex_init(&a, NULL);
+
+ pthread_mutex_lock(&a);
+ pthread_mutex_lock(&a);
+
+ return 0;
+}
diff --git a/tools/lib/lockdep/tests/ABA.c b/tools/lib/lockdep/tests/ABA.c
new file mode 100644
index 000000000..efa39b23f
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABA.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+
+void main(void)
+{
+ pthread_mutex_t a, b;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+
+ pthread_mutex_lock(&a);
+ pthread_mutex_lock(&b);
+ pthread_mutex_lock(&a);
+}
diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c
new file mode 100644
index 000000000..1460afd33
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+ pthread_mutex_t a, b;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+
+ LOCK_UNLOCK_2(a, b);
+ LOCK_UNLOCK_2(b, a);
+}
diff --git a/tools/lib/lockdep/tests/ABBA_2threads.c b/tools/lib/lockdep/tests/ABBA_2threads.c
new file mode 100644
index 000000000..39325ef8a
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA_2threads.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <pthread.h>
+
+pthread_mutex_t a = PTHREAD_MUTEX_INITIALIZER;
+pthread_mutex_t b = PTHREAD_MUTEX_INITIALIZER;
+pthread_barrier_t bar;
+
+void *ba_lock(void *arg)
+{
+ int ret, i;
+
+ pthread_mutex_lock(&b);
+
+ if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
+ pthread_barrier_destroy(&bar);
+
+ pthread_mutex_lock(&a);
+
+ pthread_mutex_unlock(&a);
+ pthread_mutex_unlock(&b);
+}
+
+int main(void)
+{
+ pthread_t t;
+
+ pthread_barrier_init(&bar, NULL, 2);
+
+ if (pthread_create(&t, NULL, ba_lock, NULL)) {
+ fprintf(stderr, "pthread_create() failed\n");
+ return 1;
+ }
+ pthread_mutex_lock(&a);
+
+ if (pthread_barrier_wait(&bar) == PTHREAD_BARRIER_SERIAL_THREAD)
+ pthread_barrier_destroy(&bar);
+
+ pthread_mutex_lock(&b);
+
+ pthread_mutex_unlock(&b);
+ pthread_mutex_unlock(&a);
+
+ pthread_join(t, NULL);
+
+ return 0;
+}
diff --git a/tools/lib/lockdep/tests/ABBCCA.c b/tools/lib/lockdep/tests/ABBCCA.c
new file mode 100644
index 000000000..a54c1b2af
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCA.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+ pthread_mutex_t a, b, c;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+ pthread_mutex_init(&c, NULL);
+
+ LOCK_UNLOCK_2(a, b);
+ LOCK_UNLOCK_2(b, c);
+ LOCK_UNLOCK_2(c, a);
+}
diff --git a/tools/lib/lockdep/tests/ABBCCDDA.c b/tools/lib/lockdep/tests/ABBCCDDA.c
new file mode 100644
index 000000000..aa5d194e8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCDDA.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+ pthread_mutex_t a, b, c, d;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+ pthread_mutex_init(&c, NULL);
+ pthread_mutex_init(&d, NULL);
+
+ LOCK_UNLOCK_2(a, b);
+ LOCK_UNLOCK_2(b, c);
+ LOCK_UNLOCK_2(c, d);
+ LOCK_UNLOCK_2(d, a);
+}
diff --git a/tools/lib/lockdep/tests/ABCABC.c b/tools/lib/lockdep/tests/ABCABC.c
new file mode 100644
index 000000000..b54a08e60
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCABC.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+ pthread_mutex_t a, b, c;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+ pthread_mutex_init(&c, NULL);
+
+ LOCK_UNLOCK_2(a, b);
+ LOCK_UNLOCK_2(c, a);
+ LOCK_UNLOCK_2(b, c);
+}
diff --git a/tools/lib/lockdep/tests/ABCDBCDA.c b/tools/lib/lockdep/tests/ABCDBCDA.c
new file mode 100644
index 000000000..a56742250
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCDBCDA.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+ pthread_mutex_t a, b, c, d;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+ pthread_mutex_init(&c, NULL);
+ pthread_mutex_init(&d, NULL);
+
+ LOCK_UNLOCK_2(a, b);
+ LOCK_UNLOCK_2(c, d);
+ LOCK_UNLOCK_2(b, c);
+ LOCK_UNLOCK_2(d, a);
+}
diff --git a/tools/lib/lockdep/tests/ABCDBDDA.c b/tools/lib/lockdep/tests/ABCDBDDA.c
new file mode 100644
index 000000000..238a3353f
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCDBDDA.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+ pthread_mutex_t a, b, c, d;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+ pthread_mutex_init(&c, NULL);
+ pthread_mutex_init(&d, NULL);
+
+ LOCK_UNLOCK_2(a, b);
+ LOCK_UNLOCK_2(c, d);
+ LOCK_UNLOCK_2(b, d);
+ LOCK_UNLOCK_2(d, a);
+}
diff --git a/tools/lib/lockdep/tests/WW.c b/tools/lib/lockdep/tests/WW.c
new file mode 100644
index 000000000..eee88df7f
--- /dev/null
+++ b/tools/lib/lockdep/tests/WW.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/rwlock.h>
+
+void main(void)
+{
+ pthread_rwlock_t a, b;
+
+ pthread_rwlock_init(&a, NULL);
+ pthread_rwlock_init(&b, NULL);
+
+ pthread_rwlock_wrlock(&a);
+ pthread_rwlock_rdlock(&b);
+ pthread_rwlock_wrlock(&a);
+}
diff --git a/tools/lib/lockdep/tests/common.h b/tools/lib/lockdep/tests/common.h
new file mode 100644
index 000000000..3026c29cc
--- /dev/null
+++ b/tools/lib/lockdep/tests/common.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIBLOCKDEP_TEST_COMMON_H
+#define _LIBLOCKDEP_TEST_COMMON_H
+
+#define LOCK_UNLOCK_2(a, b) \
+ do { \
+ pthread_mutex_lock(&(a)); \
+ pthread_mutex_lock(&(b)); \
+ pthread_mutex_unlock(&(b)); \
+ pthread_mutex_unlock(&(a)); \
+ } while(0)
+
+#endif
diff --git a/tools/lib/lockdep/tests/unlock_balance.c b/tools/lib/lockdep/tests/unlock_balance.c
new file mode 100644
index 000000000..34cf32f68
--- /dev/null
+++ b/tools/lib/lockdep/tests/unlock_balance.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <liblockdep/mutex.h>
+
+void main(void)
+{
+ pthread_mutex_t a;
+
+ pthread_mutex_init(&a, NULL);
+
+ pthread_mutex_lock(&a);
+ pthread_mutex_unlock(&a);
+ pthread_mutex_unlock(&a);
+}
diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c
new file mode 100644
index 000000000..17c2b596f
--- /dev/null
+++ b/tools/lib/rbtree.c
@@ -0,0 +1,548 @@
+/*
+ Red Black Trees
+ (C) 1999 Andrea Arcangeli <andrea@suse.de>
+ (C) 2002 David Woodhouse <dwmw2@infradead.org>
+ (C) 2012 Michel Lespinasse <walken@google.com>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+ linux/lib/rbtree.c
+*/
+
+#include <linux/rbtree_augmented.h>
+
+/*
+ * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree
+ *
+ * 1) A node is either red or black
+ * 2) The root is black
+ * 3) All leaves (NULL) are black
+ * 4) Both children of every red node are black
+ * 5) Every simple path from root to leaves contains the same number
+ * of black nodes.
+ *
+ * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two
+ * consecutive red nodes in a path and every red node is therefore followed by
+ * a black. So if B is the number of black nodes on every simple path (as per
+ * 5), then the longest possible path due to 4 is 2B.
+ *
+ * We shall indicate color with case, where black nodes are uppercase and red
+ * nodes will be lowercase. Unknown color nodes shall be drawn as red within
+ * parentheses and have some accompanying text comment.
+ */
+
+static inline void rb_set_black(struct rb_node *rb)
+{
+ rb->__rb_parent_color |= RB_BLACK;
+}
+
+static inline struct rb_node *rb_red_parent(struct rb_node *red)
+{
+ return (struct rb_node *)red->__rb_parent_color;
+}
+
+/*
+ * Helper function for rotations:
+ * - old's parent and color get assigned to new
+ * - old gets assigned new as a parent and 'color' as a color.
+ */
+static inline void
+__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new,
+ struct rb_root *root, int color)
+{
+ struct rb_node *parent = rb_parent(old);
+ new->__rb_parent_color = old->__rb_parent_color;
+ rb_set_parent_color(old, new, color);
+ __rb_change_child(old, new, parent, root);
+}
+
+static __always_inline void
+__rb_insert(struct rb_node *node, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ struct rb_node *parent = rb_red_parent(node), *gparent, *tmp;
+
+ while (true) {
+ /*
+ * Loop invariant: node is red
+ *
+ * If there is a black parent, we are done.
+ * Otherwise, take some corrective action as we don't
+ * want a red root or two consecutive red nodes.
+ */
+ if (!parent) {
+ rb_set_parent_color(node, NULL, RB_BLACK);
+ break;
+ } else if (rb_is_black(parent))
+ break;
+
+ gparent = rb_red_parent(parent);
+
+ tmp = gparent->rb_right;
+ if (parent != tmp) { /* parent == gparent->rb_left */
+ if (tmp && rb_is_red(tmp)) {
+ /*
+ * Case 1 - color flips
+ *
+ * G g
+ * / \ / \
+ * p u --> P U
+ * / /
+ * n n
+ *
+ * However, since g's parent might be red, and
+ * 4) does not allow this, we need to recurse
+ * at g.
+ */
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ rb_set_parent_color(parent, gparent, RB_BLACK);
+ node = gparent;
+ parent = rb_parent(node);
+ rb_set_parent_color(node, parent, RB_RED);
+ continue;
+ }
+
+ tmp = parent->rb_right;
+ if (node == tmp) {
+ /*
+ * Case 2 - left rotate at parent
+ *
+ * G G
+ * / \ / \
+ * p U --> n U
+ * \ /
+ * n p
+ *
+ * This still leaves us in violation of 4), the
+ * continuation into Case 3 will fix that.
+ */
+ parent->rb_right = tmp = node->rb_left;
+ node->rb_left = parent;
+ if (tmp)
+ rb_set_parent_color(tmp, parent,
+ RB_BLACK);
+ rb_set_parent_color(parent, node, RB_RED);
+ augment_rotate(parent, node);
+ parent = node;
+ tmp = node->rb_right;
+ }
+
+ /*
+ * Case 3 - right rotate at gparent
+ *
+ * G P
+ * / \ / \
+ * p U --> n g
+ * / \
+ * n U
+ */
+ gparent->rb_left = tmp; /* == parent->rb_right */
+ parent->rb_right = gparent;
+ if (tmp)
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ __rb_rotate_set_parents(gparent, parent, root, RB_RED);
+ augment_rotate(gparent, parent);
+ break;
+ } else {
+ tmp = gparent->rb_left;
+ if (tmp && rb_is_red(tmp)) {
+ /* Case 1 - color flips */
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ rb_set_parent_color(parent, gparent, RB_BLACK);
+ node = gparent;
+ parent = rb_parent(node);
+ rb_set_parent_color(node, parent, RB_RED);
+ continue;
+ }
+
+ tmp = parent->rb_left;
+ if (node == tmp) {
+ /* Case 2 - right rotate at parent */
+ parent->rb_left = tmp = node->rb_right;
+ node->rb_right = parent;
+ if (tmp)
+ rb_set_parent_color(tmp, parent,
+ RB_BLACK);
+ rb_set_parent_color(parent, node, RB_RED);
+ augment_rotate(parent, node);
+ parent = node;
+ tmp = node->rb_left;
+ }
+
+ /* Case 3 - left rotate at gparent */
+ gparent->rb_right = tmp; /* == parent->rb_left */
+ parent->rb_left = gparent;
+ if (tmp)
+ rb_set_parent_color(tmp, gparent, RB_BLACK);
+ __rb_rotate_set_parents(gparent, parent, root, RB_RED);
+ augment_rotate(gparent, parent);
+ break;
+ }
+ }
+}
+
+/*
+ * Inline version for rb_erase() use - we want to be able to inline
+ * and eliminate the dummy_rotate callback there
+ */
+static __always_inline void
+____rb_erase_color(struct rb_node *parent, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ struct rb_node *node = NULL, *sibling, *tmp1, *tmp2;
+
+ while (true) {
+ /*
+ * Loop invariants:
+ * - node is black (or NULL on first iteration)
+ * - node is not the root (parent is not NULL)
+ * - All leaf paths going through parent and node have a
+ * black node count that is 1 lower than other leaf paths.
+ */
+ sibling = parent->rb_right;
+ if (node != sibling) { /* node == parent->rb_left */
+ if (rb_is_red(sibling)) {
+ /*
+ * Case 1 - left rotate at parent
+ *
+ * P S
+ * / \ / \
+ * N s --> p Sr
+ * / \ / \
+ * Sl Sr N Sl
+ */
+ parent->rb_right = tmp1 = sibling->rb_left;
+ sibling->rb_left = parent;
+ rb_set_parent_color(tmp1, parent, RB_BLACK);
+ __rb_rotate_set_parents(parent, sibling, root,
+ RB_RED);
+ augment_rotate(parent, sibling);
+ sibling = tmp1;
+ }
+ tmp1 = sibling->rb_right;
+ if (!tmp1 || rb_is_black(tmp1)) {
+ tmp2 = sibling->rb_left;
+ if (!tmp2 || rb_is_black(tmp2)) {
+ /*
+ * Case 2 - sibling color flip
+ * (p could be either color here)
+ *
+ * (p) (p)
+ * / \ / \
+ * N S --> N s
+ * / \ / \
+ * Sl Sr Sl Sr
+ *
+ * This leaves us violating 5) which
+ * can be fixed by flipping p to black
+ * if it was red, or by recursing at p.
+ * p is red when coming from Case 1.
+ */
+ rb_set_parent_color(sibling, parent,
+ RB_RED);
+ if (rb_is_red(parent))
+ rb_set_black(parent);
+ else {
+ node = parent;
+ parent = rb_parent(node);
+ if (parent)
+ continue;
+ }
+ break;
+ }
+ /*
+ * Case 3 - right rotate at sibling
+ * (p could be either color here)
+ *
+ * (p) (p)
+ * / \ / \
+ * N S --> N Sl
+ * / \ \
+ * sl Sr s
+ * \
+ * Sr
+ */
+ sibling->rb_left = tmp1 = tmp2->rb_right;
+ tmp2->rb_right = sibling;
+ parent->rb_right = tmp2;
+ if (tmp1)
+ rb_set_parent_color(tmp1, sibling,
+ RB_BLACK);
+ augment_rotate(sibling, tmp2);
+ tmp1 = sibling;
+ sibling = tmp2;
+ }
+ /*
+ * Case 4 - left rotate at parent + color flips
+ * (p and sl could be either color here.
+ * After rotation, p becomes black, s acquires
+ * p's color, and sl keeps its color)
+ *
+ * (p) (s)
+ * / \ / \
+ * N S --> P Sr
+ * / \ / \
+ * (sl) sr N (sl)
+ */
+ parent->rb_right = tmp2 = sibling->rb_left;
+ sibling->rb_left = parent;
+ rb_set_parent_color(tmp1, sibling, RB_BLACK);
+ if (tmp2)
+ rb_set_parent(tmp2, parent);
+ __rb_rotate_set_parents(parent, sibling, root,
+ RB_BLACK);
+ augment_rotate(parent, sibling);
+ break;
+ } else {
+ sibling = parent->rb_left;
+ if (rb_is_red(sibling)) {
+ /* Case 1 - right rotate at parent */
+ parent->rb_left = tmp1 = sibling->rb_right;
+ sibling->rb_right = parent;
+ rb_set_parent_color(tmp1, parent, RB_BLACK);
+ __rb_rotate_set_parents(parent, sibling, root,
+ RB_RED);
+ augment_rotate(parent, sibling);
+ sibling = tmp1;
+ }
+ tmp1 = sibling->rb_left;
+ if (!tmp1 || rb_is_black(tmp1)) {
+ tmp2 = sibling->rb_right;
+ if (!tmp2 || rb_is_black(tmp2)) {
+ /* Case 2 - sibling color flip */
+ rb_set_parent_color(sibling, parent,
+ RB_RED);
+ if (rb_is_red(parent))
+ rb_set_black(parent);
+ else {
+ node = parent;
+ parent = rb_parent(node);
+ if (parent)
+ continue;
+ }
+ break;
+ }
+ /* Case 3 - right rotate at sibling */
+ sibling->rb_right = tmp1 = tmp2->rb_left;
+ tmp2->rb_left = sibling;
+ parent->rb_left = tmp2;
+ if (tmp1)
+ rb_set_parent_color(tmp1, sibling,
+ RB_BLACK);
+ augment_rotate(sibling, tmp2);
+ tmp1 = sibling;
+ sibling = tmp2;
+ }
+ /* Case 4 - left rotate at parent + color flips */
+ parent->rb_left = tmp2 = sibling->rb_right;
+ sibling->rb_right = parent;
+ rb_set_parent_color(tmp1, sibling, RB_BLACK);
+ if (tmp2)
+ rb_set_parent(tmp2, parent);
+ __rb_rotate_set_parents(parent, sibling, root,
+ RB_BLACK);
+ augment_rotate(parent, sibling);
+ break;
+ }
+ }
+}
+
+/* Non-inline version for rb_erase_augmented() use */
+void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ ____rb_erase_color(parent, root, augment_rotate);
+}
+
+/*
+ * Non-augmented rbtree manipulation functions.
+ *
+ * We use dummy augmented callbacks here, and have the compiler optimize them
+ * out of the rb_insert_color() and rb_erase() function definitions.
+ */
+
+static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {}
+static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {}
+static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {}
+
+static const struct rb_augment_callbacks dummy_callbacks = {
+ dummy_propagate, dummy_copy, dummy_rotate
+};
+
+void rb_insert_color(struct rb_node *node, struct rb_root *root)
+{
+ __rb_insert(node, root, dummy_rotate);
+}
+
+void rb_erase(struct rb_node *node, struct rb_root *root)
+{
+ struct rb_node *rebalance;
+ rebalance = __rb_erase_augmented(node, root, &dummy_callbacks);
+ if (rebalance)
+ ____rb_erase_color(rebalance, root, dummy_rotate);
+}
+
+/*
+ * Augmented rbtree manipulation functions.
+ *
+ * This instantiates the same __always_inline functions as in the non-augmented
+ * case, but this time with user-defined callbacks.
+ */
+
+void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+ void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
+{
+ __rb_insert(node, root, augment_rotate);
+}
+
+/*
+ * This function returns the first node (in sort order) of the tree.
+ */
+struct rb_node *rb_first(const struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_left)
+ n = n->rb_left;
+ return n;
+}
+
+struct rb_node *rb_last(const struct rb_root *root)
+{
+ struct rb_node *n;
+
+ n = root->rb_node;
+ if (!n)
+ return NULL;
+ while (n->rb_right)
+ n = n->rb_right;
+ return n;
+}
+
+struct rb_node *rb_next(const struct rb_node *node)
+{
+ struct rb_node *parent;
+
+ if (RB_EMPTY_NODE(node))
+ return NULL;
+
+ /*
+ * If we have a right-hand child, go down and then left as far
+ * as we can.
+ */
+ if (node->rb_right) {
+ node = node->rb_right;
+ while (node->rb_left)
+ node=node->rb_left;
+ return (struct rb_node *)node;
+ }
+
+ /*
+ * No right-hand children. Everything down and left is smaller than us,
+ * so any 'next' node must be in the general direction of our parent.
+ * Go up the tree; any time the ancestor is a right-hand child of its
+ * parent, keep going up. First time it's a left-hand child of its
+ * parent, said parent is our 'next' node.
+ */
+ while ((parent = rb_parent(node)) && node == parent->rb_right)
+ node = parent;
+
+ return parent;
+}
+
+struct rb_node *rb_prev(const struct rb_node *node)
+{
+ struct rb_node *parent;
+
+ if (RB_EMPTY_NODE(node))
+ return NULL;
+
+ /*
+ * If we have a left-hand child, go down and then right as far
+ * as we can.
+ */
+ if (node->rb_left) {
+ node = node->rb_left;
+ while (node->rb_right)
+ node=node->rb_right;
+ return (struct rb_node *)node;
+ }
+
+ /*
+ * No left-hand children. Go up till we find an ancestor which
+ * is a right-hand child of its parent.
+ */
+ while ((parent = rb_parent(node)) && node == parent->rb_left)
+ node = parent;
+
+ return parent;
+}
+
+void rb_replace_node(struct rb_node *victim, struct rb_node *new,
+ struct rb_root *root)
+{
+ struct rb_node *parent = rb_parent(victim);
+
+ /* Set the surrounding nodes to point to the replacement */
+ __rb_change_child(victim, new, parent, root);
+ if (victim->rb_left)
+ rb_set_parent(victim->rb_left, new);
+ if (victim->rb_right)
+ rb_set_parent(victim->rb_right, new);
+
+ /* Copy the pointers/colour from the victim to the replacement */
+ *new = *victim;
+}
+
+static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
+{
+ for (;;) {
+ if (node->rb_left)
+ node = node->rb_left;
+ else if (node->rb_right)
+ node = node->rb_right;
+ else
+ return (struct rb_node *)node;
+ }
+}
+
+struct rb_node *rb_next_postorder(const struct rb_node *node)
+{
+ const struct rb_node *parent;
+ if (!node)
+ return NULL;
+ parent = rb_parent(node);
+
+ /* If we're sitting on node, we've already seen our children */
+ if (parent && node == parent->rb_left && parent->rb_right) {
+ /* If we are the parent's left node, go to the parent's right
+ * node then all the way down to the left */
+ return rb_left_deepest_node(parent->rb_right);
+ } else
+ /* Otherwise we are the parent's right node, and the parent
+ * should be next */
+ return (struct rb_node *)parent;
+}
+
+struct rb_node *rb_first_postorder(const struct rb_root *root)
+{
+ if (!root->rb_node)
+ return NULL;
+
+ return rb_left_deepest_node(root->rb_node);
+}
diff --git a/tools/lib/str_error_r.c b/tools/lib/str_error_r.c
new file mode 100644
index 000000000..6aad8308a
--- /dev/null
+++ b/tools/lib/str_error_r.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef _GNU_SOURCE
+#include <string.h>
+#include <stdio.h>
+#include <linux/string.h>
+
+/*
+ * The tools so far have been using the strerror_r() GNU variant, that returns
+ * a string, be it the buffer passed or something else.
+ *
+ * But that, besides being tricky in cases where we expect that the function
+ * using strerror_r() returns the error formatted in a provided buffer (we have
+ * to check if it returned something else and copy that instead), breaks the
+ * build on systems not using glibc, like Alpine Linux, where musl libc is
+ * used.
+ *
+ * So, introduce yet another wrapper, str_error_r(), that has the GNU
+ * interface, but uses the portable XSI variant of strerror_r(), so that users
+ * rest asured that the provided buffer is used and it is what is returned.
+ */
+char *str_error_r(int errnum, char *buf, size_t buflen)
+{
+ int err = strerror_r(errnum, buf, buflen);
+ if (err)
+ snprintf(buf, buflen, "INTERNAL ERROR: strerror_r(%d, [buf], %zd)=%d", errnum, buflen, err);
+ return buf;
+}
diff --git a/tools/lib/string.c b/tools/lib/string.c
new file mode 100644
index 000000000..ee0afcbdd
--- /dev/null
+++ b/tools/lib/string.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * linux/tools/lib/string.c
+ *
+ * Copied from linux/lib/string.c, where it is:
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ * More specifically, the first copied function was strtobool, which
+ * was introduced by:
+ *
+ * d0f1fed29e6e ("Add a strtobool function matching semantics of existing in kernel equivalents")
+ * Author: Jonathan Cameron <jic23@cam.ac.uk>
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <linux/string.h>
+#include <linux/compiler.h>
+
+/**
+ * memdup - duplicate region of memory
+ *
+ * @src: memory region to duplicate
+ * @len: memory region length
+ */
+void *memdup(const void *src, size_t len)
+{
+ void *p = malloc(len);
+
+ if (p)
+ memcpy(p, src, len);
+
+ return p;
+}
+
+/**
+ * strtobool - convert common user inputs into boolean values
+ * @s: input string
+ * @res: result
+ *
+ * This routine returns 0 iff the first character is one of 'Yy1Nn0', or
+ * [oO][NnFf] for "on" and "off". Otherwise it will return -EINVAL. Value
+ * pointed to by res is updated upon finding a match.
+ */
+int strtobool(const char *s, bool *res)
+{
+ if (!s)
+ return -EINVAL;
+
+ switch (s[0]) {
+ case 'y':
+ case 'Y':
+ case '1':
+ *res = true;
+ return 0;
+ case 'n':
+ case 'N':
+ case '0':
+ *res = false;
+ return 0;
+ case 'o':
+ case 'O':
+ switch (s[1]) {
+ case 'n':
+ case 'N':
+ *res = true;
+ return 0;
+ case 'f':
+ case 'F':
+ *res = false;
+ return 0;
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * strlcpy - Copy a C-string into a sized buffer
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @size: size of destination buffer
+ *
+ * Compatible with *BSD: the result is always a valid
+ * NUL-terminated string that fits in the buffer (unless,
+ * of course, the buffer size is zero). It does not pad
+ * out the result like strncpy() does.
+ *
+ * If libc has strlcpy() then that version will override this
+ * implementation:
+ */
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wignored-attributes"
+#endif
+size_t __weak strlcpy(char *dest, const char *src, size_t size)
+{
+ size_t ret = strlen(src);
+
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+ memcpy(dest, src, len);
+ dest[len] = '\0';
+ }
+ return ret;
+}
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
diff --git a/tools/lib/subcmd/Build b/tools/lib/subcmd/Build
new file mode 100644
index 000000000..ee3128878
--- /dev/null
+++ b/tools/lib/subcmd/Build
@@ -0,0 +1,7 @@
+libsubcmd-y += exec-cmd.o
+libsubcmd-y += help.o
+libsubcmd-y += pager.o
+libsubcmd-y += parse-options.o
+libsubcmd-y += run-command.o
+libsubcmd-y += sigchain.o
+libsubcmd-y += subcmd-config.o
diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
new file mode 100644
index 000000000..5dbb0dde2
--- /dev/null
+++ b/tools/lib/subcmd/Makefile
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../../scripts/Makefile.include
+include ../../scripts/utilities.mak # QUIET_CLEAN
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+CC ?= $(CROSS_COMPILE)gcc
+LD ?= $(CROSS_COMPILE)ld
+AR ?= $(CROSS_COMPILE)ar
+
+RM = rm -f
+
+MAKEFLAGS += --no-print-directory
+
+LIBFILE = $(OUTPUT)libsubcmd.a
+
+CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -fPIC
+
+ifeq ($(DEBUG),0)
+ ifeq ($(feature-fortify-source), 1)
+ CFLAGS += -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2
+ endif
+endif
+
+ifeq ($(DEBUG),1)
+ CFLAGS += -O0
+else ifeq ($(CC_NO_CLANG), 0)
+ CFLAGS += -O3
+else
+ CFLAGS += -O6
+endif
+
+# Treat warnings as errors unless directed not to
+ifneq ($(WERROR),0)
+ CFLAGS += -Werror
+endif
+
+CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+
+CFLAGS += -I$(srctree)/tools/include/
+
+SUBCMD_IN := $(OUTPUT)libsubcmd-in.o
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
+
+all: fixdep $(LIBFILE)
+
+$(SUBCMD_IN): FORCE
+ @$(MAKE) $(build)=libsubcmd
+
+$(LIBFILE): $(SUBCMD_IN)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SUBCMD_IN)
+
+clean:
+ $(call QUIET_CLEAN, libsubcmd) $(RM) $(LIBFILE); \
+ find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+
+FORCE:
+
+.PHONY: clean FORCE
diff --git a/tools/lib/subcmd/exec-cmd.c b/tools/lib/subcmd/exec-cmd.c
new file mode 100644
index 000000000..33e94fb83
--- /dev/null
+++ b/tools/lib/subcmd/exec-cmd.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "subcmd-util.h"
+#include "exec-cmd.h"
+#include "subcmd-config.h"
+
+#define MAX_ARGS 32
+#define PATH_MAX 4096
+
+static const char *argv_exec_path;
+static const char *argv0_path;
+
+void exec_cmd_init(const char *exec_name, const char *prefix,
+ const char *exec_path, const char *exec_path_env)
+{
+ subcmd_config.exec_name = exec_name;
+ subcmd_config.prefix = prefix;
+ subcmd_config.exec_path = exec_path;
+ subcmd_config.exec_path_env = exec_path_env;
+}
+
+#define is_dir_sep(c) ((c) == '/')
+
+static int is_absolute_path(const char *path)
+{
+ return path[0] == '/';
+}
+
+static const char *get_pwd_cwd(void)
+{
+ static char cwd[PATH_MAX + 1];
+ char *pwd;
+ struct stat cwd_stat, pwd_stat;
+ if (getcwd(cwd, PATH_MAX) == NULL)
+ return NULL;
+ pwd = getenv("PWD");
+ if (pwd && strcmp(pwd, cwd)) {
+ stat(cwd, &cwd_stat);
+ if (!stat(pwd, &pwd_stat) &&
+ pwd_stat.st_dev == cwd_stat.st_dev &&
+ pwd_stat.st_ino == cwd_stat.st_ino) {
+ strlcpy(cwd, pwd, PATH_MAX);
+ }
+ }
+ return cwd;
+}
+
+static const char *make_nonrelative_path(const char *path)
+{
+ static char buf[PATH_MAX + 1];
+
+ if (is_absolute_path(path)) {
+ if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
+ die("Too long path: %.*s", 60, path);
+ } else {
+ const char *cwd = get_pwd_cwd();
+ if (!cwd)
+ die("Cannot determine the current working directory");
+ if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
+ die("Too long path: %.*s", 60, path);
+ }
+ return buf;
+}
+
+char *system_path(const char *path)
+{
+ char *buf = NULL;
+
+ if (is_absolute_path(path))
+ return strdup(path);
+
+ astrcatf(&buf, "%s/%s", subcmd_config.prefix, path);
+
+ return buf;
+}
+
+const char *extract_argv0_path(const char *argv0)
+{
+ const char *slash;
+
+ if (!argv0 || !*argv0)
+ return NULL;
+ slash = argv0 + strlen(argv0);
+
+ while (argv0 <= slash && !is_dir_sep(*slash))
+ slash--;
+
+ if (slash >= argv0) {
+ argv0_path = strndup(argv0, slash - argv0);
+ return argv0_path ? slash + 1 : NULL;
+ }
+
+ return argv0;
+}
+
+void set_argv_exec_path(const char *exec_path)
+{
+ argv_exec_path = exec_path;
+ /*
+ * Propagate this setting to external programs.
+ */
+ setenv(subcmd_config.exec_path_env, exec_path, 1);
+}
+
+
+/* Returns the highest-priority location to look for subprograms. */
+char *get_argv_exec_path(void)
+{
+ char *env;
+
+ if (argv_exec_path)
+ return strdup(argv_exec_path);
+
+ env = getenv(subcmd_config.exec_path_env);
+ if (env && *env)
+ return strdup(env);
+
+ return system_path(subcmd_config.exec_path);
+}
+
+static void add_path(char **out, const char *path)
+{
+ if (path && *path) {
+ if (is_absolute_path(path))
+ astrcat(out, path);
+ else
+ astrcat(out, make_nonrelative_path(path));
+
+ astrcat(out, ":");
+ }
+}
+
+void setup_path(void)
+{
+ const char *old_path = getenv("PATH");
+ char *new_path = NULL;
+ char *tmp = get_argv_exec_path();
+
+ add_path(&new_path, tmp);
+ add_path(&new_path, argv0_path);
+ free(tmp);
+
+ if (old_path)
+ astrcat(&new_path, old_path);
+ else
+ astrcat(&new_path, "/usr/local/bin:/usr/bin:/bin");
+
+ setenv("PATH", new_path, 1);
+
+ free(new_path);
+}
+
+static const char **prepare_exec_cmd(const char **argv)
+{
+ int argc;
+ const char **nargv;
+
+ for (argc = 0; argv[argc]; argc++)
+ ; /* just counting */
+ nargv = malloc(sizeof(*nargv) * (argc + 2));
+
+ nargv[0] = subcmd_config.exec_name;
+ for (argc = 0; argv[argc]; argc++)
+ nargv[argc + 1] = argv[argc];
+ nargv[argc + 1] = NULL;
+ return nargv;
+}
+
+int execv_cmd(const char **argv) {
+ const char **nargv = prepare_exec_cmd(argv);
+
+ /* execvp() can only ever return if it fails */
+ execvp(subcmd_config.exec_name, (char **)nargv);
+
+ free(nargv);
+ return -1;
+}
+
+
+int execl_cmd(const char *cmd,...)
+{
+ int argc;
+ const char *argv[MAX_ARGS + 1];
+ const char *arg;
+ va_list param;
+
+ va_start(param, cmd);
+ argv[0] = cmd;
+ argc = 1;
+ while (argc < MAX_ARGS) {
+ arg = argv[argc++] = va_arg(param, char *);
+ if (!arg)
+ break;
+ }
+ va_end(param);
+ if (MAX_ARGS <= argc) {
+ fprintf(stderr, " Error: too many args to run %s\n", cmd);
+ return -1;
+ }
+
+ argv[argc] = NULL;
+ return execv_cmd(argv);
+}
diff --git a/tools/lib/subcmd/exec-cmd.h b/tools/lib/subcmd/exec-cmd.h
new file mode 100644
index 000000000..aba591b8d
--- /dev/null
+++ b/tools/lib/subcmd/exec-cmd.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_EXEC_CMD_H
+#define __SUBCMD_EXEC_CMD_H
+
+extern void exec_cmd_init(const char *exec_name, const char *prefix,
+ const char *exec_path, const char *exec_path_env);
+
+extern void set_argv_exec_path(const char *exec_path);
+extern const char *extract_argv0_path(const char *path);
+extern void setup_path(void);
+extern int execv_cmd(const char **argv); /* NULL terminated */
+extern int execl_cmd(const char *cmd, ...);
+/* get_argv_exec_path and system_path return malloc'd string, caller must free it */
+extern char *get_argv_exec_path(void);
+extern char *system_path(const char *path);
+
+#endif /* __SUBCMD_EXEC_CMD_H */
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
new file mode 100644
index 000000000..2859f107a
--- /dev/null
+++ b/tools/lib/subcmd/help.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/string.h>
+#include <termios.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <dirent.h>
+#include "subcmd-util.h"
+#include "help.h"
+#include "exec-cmd.h"
+
+void add_cmdname(struct cmdnames *cmds, const char *name, size_t len)
+{
+ struct cmdname *ent = malloc(sizeof(*ent) + len + 1);
+
+ ent->len = len;
+ memcpy(ent->name, name, len);
+ ent->name[len] = 0;
+
+ ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc);
+ cmds->names[cmds->cnt++] = ent;
+}
+
+void clean_cmdnames(struct cmdnames *cmds)
+{
+ unsigned int i;
+
+ for (i = 0; i < cmds->cnt; ++i)
+ zfree(&cmds->names[i]);
+ zfree(&cmds->names);
+ cmds->cnt = 0;
+ cmds->alloc = 0;
+}
+
+int cmdname_compare(const void *a_, const void *b_)
+{
+ struct cmdname *a = *(struct cmdname **)a_;
+ struct cmdname *b = *(struct cmdname **)b_;
+ return strcmp(a->name, b->name);
+}
+
+void uniq(struct cmdnames *cmds)
+{
+ unsigned int i, j;
+
+ if (!cmds->cnt)
+ return;
+
+ for (i = j = 1; i < cmds->cnt; i++)
+ if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name))
+ cmds->names[j++] = cmds->names[i];
+
+ cmds->cnt = j;
+}
+
+void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
+{
+ size_t ci, cj, ei;
+ int cmp;
+
+ ci = cj = ei = 0;
+ while (ci < cmds->cnt && ei < excludes->cnt) {
+ cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
+ if (cmp < 0)
+ cmds->names[cj++] = cmds->names[ci++];
+ else if (cmp == 0)
+ ci++, ei++;
+ else if (cmp > 0)
+ ei++;
+ }
+
+ while (ci < cmds->cnt)
+ cmds->names[cj++] = cmds->names[ci++];
+
+ cmds->cnt = cj;
+}
+
+static void get_term_dimensions(struct winsize *ws)
+{
+ char *s = getenv("LINES");
+
+ if (s != NULL) {
+ ws->ws_row = atoi(s);
+ s = getenv("COLUMNS");
+ if (s != NULL) {
+ ws->ws_col = atoi(s);
+ if (ws->ws_row && ws->ws_col)
+ return;
+ }
+ }
+#ifdef TIOCGWINSZ
+ if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
+ ws->ws_row && ws->ws_col)
+ return;
+#endif
+ ws->ws_row = 25;
+ ws->ws_col = 80;
+}
+
+static void pretty_print_string_list(struct cmdnames *cmds, int longest)
+{
+ int cols = 1, rows;
+ int space = longest + 1; /* min 1 SP between words */
+ struct winsize win;
+ int max_cols;
+ int i, j;
+
+ get_term_dimensions(&win);
+ max_cols = win.ws_col - 1; /* don't print *on* the edge */
+
+ if (space < max_cols)
+ cols = max_cols / space;
+ rows = (cmds->cnt + cols - 1) / cols;
+
+ for (i = 0; i < rows; i++) {
+ printf(" ");
+
+ for (j = 0; j < cols; j++) {
+ unsigned int n = j * rows + i;
+ unsigned int size = space;
+
+ if (n >= cmds->cnt)
+ break;
+ if (j == cols-1 || n + rows >= cmds->cnt)
+ size = 1;
+ printf("%-*s", size, cmds->names[n]->name);
+ }
+ putchar('\n');
+ }
+}
+
+static int is_executable(const char *name)
+{
+ struct stat st;
+
+ if (stat(name, &st) || /* stat, not lstat */
+ !S_ISREG(st.st_mode))
+ return 0;
+
+ return st.st_mode & S_IXUSR;
+}
+
+static int has_extension(const char *filename, const char *ext)
+{
+ size_t len = strlen(filename);
+ size_t extlen = strlen(ext);
+
+ return len > extlen && !memcmp(filename + len - extlen, ext, extlen);
+}
+
+static void list_commands_in_dir(struct cmdnames *cmds,
+ const char *path,
+ const char *prefix)
+{
+ int prefix_len;
+ DIR *dir = opendir(path);
+ struct dirent *de;
+ char *buf = NULL;
+
+ if (!dir)
+ return;
+ if (!prefix)
+ prefix = "perf-";
+ prefix_len = strlen(prefix);
+
+ astrcatf(&buf, "%s/", path);
+
+ while ((de = readdir(dir)) != NULL) {
+ int entlen;
+
+ if (!strstarts(de->d_name, prefix))
+ continue;
+
+ astrcat(&buf, de->d_name);
+ if (!is_executable(buf))
+ continue;
+
+ entlen = strlen(de->d_name) - prefix_len;
+ if (has_extension(de->d_name, ".exe"))
+ entlen -= 4;
+
+ add_cmdname(cmds, de->d_name + prefix_len, entlen);
+ }
+ closedir(dir);
+ free(buf);
+}
+
+void load_command_list(const char *prefix,
+ struct cmdnames *main_cmds,
+ struct cmdnames *other_cmds)
+{
+ const char *env_path = getenv("PATH");
+ char *exec_path = get_argv_exec_path();
+
+ if (exec_path) {
+ list_commands_in_dir(main_cmds, exec_path, prefix);
+ qsort(main_cmds->names, main_cmds->cnt,
+ sizeof(*main_cmds->names), cmdname_compare);
+ uniq(main_cmds);
+ }
+
+ if (env_path) {
+ char *paths, *path, *colon;
+ path = paths = strdup(env_path);
+ while (1) {
+ if ((colon = strchr(path, ':')))
+ *colon = 0;
+ if (!exec_path || strcmp(path, exec_path))
+ list_commands_in_dir(other_cmds, path, prefix);
+
+ if (!colon)
+ break;
+ path = colon + 1;
+ }
+ free(paths);
+
+ qsort(other_cmds->names, other_cmds->cnt,
+ sizeof(*other_cmds->names), cmdname_compare);
+ uniq(other_cmds);
+ }
+ free(exec_path);
+ exclude_cmds(other_cmds, main_cmds);
+}
+
+void list_commands(const char *title, struct cmdnames *main_cmds,
+ struct cmdnames *other_cmds)
+{
+ unsigned int i, longest = 0;
+
+ for (i = 0; i < main_cmds->cnt; i++)
+ if (longest < main_cmds->names[i]->len)
+ longest = main_cmds->names[i]->len;
+ for (i = 0; i < other_cmds->cnt; i++)
+ if (longest < other_cmds->names[i]->len)
+ longest = other_cmds->names[i]->len;
+
+ if (main_cmds->cnt) {
+ char *exec_path = get_argv_exec_path();
+ printf("available %s in '%s'\n", title, exec_path);
+ printf("----------------");
+ mput_char('-', strlen(title) + strlen(exec_path));
+ putchar('\n');
+ pretty_print_string_list(main_cmds, longest);
+ putchar('\n');
+ free(exec_path);
+ }
+
+ if (other_cmds->cnt) {
+ printf("%s available from elsewhere on your $PATH\n", title);
+ printf("---------------------------------------");
+ mput_char('-', strlen(title));
+ putchar('\n');
+ pretty_print_string_list(other_cmds, longest);
+ putchar('\n');
+ }
+}
+
+int is_in_cmdlist(struct cmdnames *c, const char *s)
+{
+ unsigned int i;
+
+ for (i = 0; i < c->cnt; i++)
+ if (!strcmp(s, c->names[i]->name))
+ return 1;
+ return 0;
+}
diff --git a/tools/lib/subcmd/help.h b/tools/lib/subcmd/help.h
new file mode 100644
index 000000000..355c066c8
--- /dev/null
+++ b/tools/lib/subcmd/help.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_HELP_H
+#define __SUBCMD_HELP_H
+
+#include <sys/types.h>
+#include <stdio.h>
+
+struct cmdnames {
+ size_t alloc;
+ size_t cnt;
+ struct cmdname {
+ size_t len; /* also used for similarity index in help.c */
+ char name[];
+ } **names;
+};
+
+static inline void mput_char(char c, unsigned int num)
+{
+ while(num--)
+ putchar(c);
+}
+
+void load_command_list(const char *prefix,
+ struct cmdnames *main_cmds,
+ struct cmdnames *other_cmds);
+void add_cmdname(struct cmdnames *cmds, const char *name, size_t len);
+void clean_cmdnames(struct cmdnames *cmds);
+int cmdname_compare(const void *a, const void *b);
+void uniq(struct cmdnames *cmds);
+/* Here we require that excludes is a sorted list. */
+void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes);
+int is_in_cmdlist(struct cmdnames *c, const char *s);
+void list_commands(const char *title, struct cmdnames *main_cmds,
+ struct cmdnames *other_cmds);
+
+#endif /* __SUBCMD_HELP_H */
diff --git a/tools/lib/subcmd/pager.c b/tools/lib/subcmd/pager.c
new file mode 100644
index 000000000..9997a8805
--- /dev/null
+++ b/tools/lib/subcmd/pager.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/select.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include "pager.h"
+#include "run-command.h"
+#include "sigchain.h"
+#include "subcmd-config.h"
+
+/*
+ * This is split up from the rest of git so that we can do
+ * something different on Windows.
+ */
+
+static int spawned_pager;
+static int pager_columns;
+
+void pager_init(const char *pager_env)
+{
+ subcmd_config.pager_env = pager_env;
+}
+
+static void pager_preexec(void)
+{
+ /*
+ * Work around bug in "less" by not starting it until we
+ * have real input
+ */
+ fd_set in;
+ fd_set exception;
+
+ FD_ZERO(&in);
+ FD_ZERO(&exception);
+ FD_SET(0, &in);
+ FD_SET(0, &exception);
+ select(1, &in, NULL, &exception, NULL);
+
+ setenv("LESS", "FRSX", 0);
+}
+
+static const char *pager_argv[] = { "sh", "-c", NULL, NULL };
+static struct child_process pager_process;
+
+static void wait_for_pager(void)
+{
+ fflush(stdout);
+ fflush(stderr);
+ /* signal EOF to pager */
+ close(1);
+ close(2);
+ finish_command(&pager_process);
+}
+
+static void wait_for_pager_signal(int signo)
+{
+ wait_for_pager();
+ sigchain_pop(signo);
+ raise(signo);
+}
+
+void setup_pager(void)
+{
+ const char *pager = getenv(subcmd_config.pager_env);
+ struct winsize sz;
+
+ if (!isatty(1))
+ return;
+ if (ioctl(1, TIOCGWINSZ, &sz) == 0)
+ pager_columns = sz.ws_col;
+ if (!pager)
+ pager = getenv("PAGER");
+ if (!(pager || access("/usr/bin/pager", X_OK)))
+ pager = "/usr/bin/pager";
+ if (!(pager || access("/usr/bin/less", X_OK)))
+ pager = "/usr/bin/less";
+ if (!pager)
+ pager = "cat";
+ if (!*pager || !strcmp(pager, "cat"))
+ return;
+
+ spawned_pager = 1; /* means we are emitting to terminal */
+
+ /* spawn the pager */
+ pager_argv[2] = pager;
+ pager_process.argv = pager_argv;
+ pager_process.in = -1;
+ pager_process.preexec_cb = pager_preexec;
+
+ if (start_command(&pager_process))
+ return;
+
+ /* original process continues, but writes to the pipe */
+ dup2(pager_process.in, 1);
+ if (isatty(2))
+ dup2(pager_process.in, 2);
+ close(pager_process.in);
+
+ /* this makes sure that the parent terminates after the pager */
+ sigchain_push_common(wait_for_pager_signal);
+ atexit(wait_for_pager);
+}
+
+int pager_in_use(void)
+{
+ return spawned_pager;
+}
+
+int pager_get_columns(void)
+{
+ char *s;
+
+ s = getenv("COLUMNS");
+ if (s)
+ return atoi(s);
+
+ return (pager_columns ? pager_columns : 80) - 2;
+}
diff --git a/tools/lib/subcmd/pager.h b/tools/lib/subcmd/pager.h
new file mode 100644
index 000000000..f1a53cf29
--- /dev/null
+++ b/tools/lib/subcmd/pager.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_PAGER_H
+#define __SUBCMD_PAGER_H
+
+extern void pager_init(const char *pager_env);
+
+extern void setup_pager(void);
+extern int pager_in_use(void);
+extern int pager_get_columns(void);
+
+#endif /* __SUBCMD_PAGER_H */
diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c
new file mode 100644
index 000000000..cb7154ecc
--- /dev/null
+++ b/tools/lib/subcmd/parse-options.c
@@ -0,0 +1,1006 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <ctype.h>
+#include "subcmd-util.h"
+#include "parse-options.h"
+#include "subcmd-config.h"
+#include "pager.h"
+
+#define OPT_SHORT 1
+#define OPT_UNSET 2
+
+char *error_buf;
+
+static int opterror(const struct option *opt, const char *reason, int flags)
+{
+ if (flags & OPT_SHORT)
+ fprintf(stderr, " Error: switch `%c' %s", opt->short_name, reason);
+ else if (flags & OPT_UNSET)
+ fprintf(stderr, " Error: option `no-%s' %s", opt->long_name, reason);
+ else
+ fprintf(stderr, " Error: option `%s' %s", opt->long_name, reason);
+
+ return -1;
+}
+
+static const char *skip_prefix(const char *str, const char *prefix)
+{
+ size_t len = strlen(prefix);
+ return strncmp(str, prefix, len) ? NULL : str + len;
+}
+
+static void optwarning(const struct option *opt, const char *reason, int flags)
+{
+ if (flags & OPT_SHORT)
+ fprintf(stderr, " Warning: switch `%c' %s", opt->short_name, reason);
+ else if (flags & OPT_UNSET)
+ fprintf(stderr, " Warning: option `no-%s' %s", opt->long_name, reason);
+ else
+ fprintf(stderr, " Warning: option `%s' %s", opt->long_name, reason);
+}
+
+static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt,
+ int flags, const char **arg)
+{
+ const char *res;
+
+ if (p->opt) {
+ res = p->opt;
+ p->opt = NULL;
+ } else if ((opt->flags & PARSE_OPT_LASTARG_DEFAULT) && (p->argc == 1 ||
+ **(p->argv + 1) == '-')) {
+ res = (const char *)opt->defval;
+ } else if (p->argc > 1) {
+ p->argc--;
+ res = *++p->argv;
+ } else
+ return opterror(opt, "requires a value", flags);
+ if (arg)
+ *arg = res;
+ return 0;
+}
+
+static int get_value(struct parse_opt_ctx_t *p,
+ const struct option *opt, int flags)
+{
+ const char *s, *arg = NULL;
+ const int unset = flags & OPT_UNSET;
+ int err;
+
+ if (unset && p->opt)
+ return opterror(opt, "takes no value", flags);
+ if (unset && (opt->flags & PARSE_OPT_NONEG))
+ return opterror(opt, "isn't available", flags);
+ if (opt->flags & PARSE_OPT_DISABLED)
+ return opterror(opt, "is not usable", flags);
+
+ if (opt->flags & PARSE_OPT_EXCLUSIVE) {
+ if (p->excl_opt && p->excl_opt != opt) {
+ char msg[128];
+
+ if (((flags & OPT_SHORT) && p->excl_opt->short_name) ||
+ p->excl_opt->long_name == NULL) {
+ snprintf(msg, sizeof(msg), "cannot be used with switch `%c'",
+ p->excl_opt->short_name);
+ } else {
+ snprintf(msg, sizeof(msg), "cannot be used with %s",
+ p->excl_opt->long_name);
+ }
+ opterror(opt, msg, flags);
+ return -3;
+ }
+ p->excl_opt = opt;
+ }
+ if (!(flags & OPT_SHORT) && p->opt) {
+ switch (opt->type) {
+ case OPTION_CALLBACK:
+ if (!(opt->flags & PARSE_OPT_NOARG))
+ break;
+ /* FALLTHROUGH */
+ case OPTION_BOOLEAN:
+ case OPTION_INCR:
+ case OPTION_BIT:
+ case OPTION_SET_UINT:
+ case OPTION_SET_PTR:
+ return opterror(opt, "takes no value", flags);
+ case OPTION_END:
+ case OPTION_ARGUMENT:
+ case OPTION_GROUP:
+ case OPTION_STRING:
+ case OPTION_INTEGER:
+ case OPTION_UINTEGER:
+ case OPTION_LONG:
+ case OPTION_U64:
+ default:
+ break;
+ }
+ }
+
+ if (opt->flags & PARSE_OPT_NOBUILD) {
+ char reason[128];
+ bool noarg = false;
+
+ err = snprintf(reason, sizeof(reason),
+ opt->flags & PARSE_OPT_CANSKIP ?
+ "is being ignored because %s " :
+ "is not available because %s",
+ opt->build_opt);
+ reason[sizeof(reason) - 1] = '\0';
+
+ if (err < 0)
+ strncpy(reason, opt->flags & PARSE_OPT_CANSKIP ?
+ "is being ignored" :
+ "is not available",
+ sizeof(reason));
+
+ if (!(opt->flags & PARSE_OPT_CANSKIP))
+ return opterror(opt, reason, flags);
+
+ err = 0;
+ if (unset)
+ noarg = true;
+ if (opt->flags & PARSE_OPT_NOARG)
+ noarg = true;
+ if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+ noarg = true;
+
+ switch (opt->type) {
+ case OPTION_BOOLEAN:
+ case OPTION_INCR:
+ case OPTION_BIT:
+ case OPTION_SET_UINT:
+ case OPTION_SET_PTR:
+ case OPTION_END:
+ case OPTION_ARGUMENT:
+ case OPTION_GROUP:
+ noarg = true;
+ break;
+ case OPTION_CALLBACK:
+ case OPTION_STRING:
+ case OPTION_INTEGER:
+ case OPTION_UINTEGER:
+ case OPTION_LONG:
+ case OPTION_U64:
+ default:
+ break;
+ }
+
+ if (!noarg)
+ err = get_arg(p, opt, flags, NULL);
+ if (err)
+ return err;
+
+ optwarning(opt, reason, flags);
+ return 0;
+ }
+
+ switch (opt->type) {
+ case OPTION_BIT:
+ if (unset)
+ *(int *)opt->value &= ~opt->defval;
+ else
+ *(int *)opt->value |= opt->defval;
+ return 0;
+
+ case OPTION_BOOLEAN:
+ *(bool *)opt->value = unset ? false : true;
+ if (opt->set)
+ *(bool *)opt->set = true;
+ return 0;
+
+ case OPTION_INCR:
+ *(int *)opt->value = unset ? 0 : *(int *)opt->value + 1;
+ return 0;
+
+ case OPTION_SET_UINT:
+ *(unsigned int *)opt->value = unset ? 0 : opt->defval;
+ return 0;
+
+ case OPTION_SET_PTR:
+ *(void **)opt->value = unset ? NULL : (void *)opt->defval;
+ return 0;
+
+ case OPTION_STRING:
+ err = 0;
+ if (unset)
+ *(const char **)opt->value = NULL;
+ else if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+ *(const char **)opt->value = (const char *)opt->defval;
+ else
+ err = get_arg(p, opt, flags, (const char **)opt->value);
+
+ if (opt->set)
+ *(bool *)opt->set = true;
+
+ /* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */
+ if (opt->flags & PARSE_OPT_NOEMPTY) {
+ const char *val = *(const char **)opt->value;
+
+ if (!val)
+ return err;
+
+ /* Similar to unset if we are given an empty string. */
+ if (val[0] == '\0') {
+ *(const char **)opt->value = NULL;
+ return 0;
+ }
+ }
+
+ return err;
+
+ case OPTION_CALLBACK:
+ if (unset)
+ return (*opt->callback)(opt, NULL, 1) ? (-1) : 0;
+ if (opt->flags & PARSE_OPT_NOARG)
+ return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
+ if (opt->flags & PARSE_OPT_OPTARG && !p->opt)
+ return (*opt->callback)(opt, NULL, 0) ? (-1) : 0;
+ if (get_arg(p, opt, flags, &arg))
+ return -1;
+ return (*opt->callback)(opt, arg, 0) ? (-1) : 0;
+
+ case OPTION_INTEGER:
+ if (unset) {
+ *(int *)opt->value = 0;
+ return 0;
+ }
+ if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+ *(int *)opt->value = opt->defval;
+ return 0;
+ }
+ if (get_arg(p, opt, flags, &arg))
+ return -1;
+ *(int *)opt->value = strtol(arg, (char **)&s, 10);
+ if (*s)
+ return opterror(opt, "expects a numerical value", flags);
+ return 0;
+
+ case OPTION_UINTEGER:
+ if (unset) {
+ *(unsigned int *)opt->value = 0;
+ return 0;
+ }
+ if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+ *(unsigned int *)opt->value = opt->defval;
+ return 0;
+ }
+ if (get_arg(p, opt, flags, &arg))
+ return -1;
+ if (arg[0] == '-')
+ return opterror(opt, "expects an unsigned numerical value", flags);
+ *(unsigned int *)opt->value = strtol(arg, (char **)&s, 10);
+ if (*s)
+ return opterror(opt, "expects a numerical value", flags);
+ return 0;
+
+ case OPTION_LONG:
+ if (unset) {
+ *(long *)opt->value = 0;
+ return 0;
+ }
+ if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+ *(long *)opt->value = opt->defval;
+ return 0;
+ }
+ if (get_arg(p, opt, flags, &arg))
+ return -1;
+ *(long *)opt->value = strtol(arg, (char **)&s, 10);
+ if (*s)
+ return opterror(opt, "expects a numerical value", flags);
+ return 0;
+
+ case OPTION_U64:
+ if (unset) {
+ *(u64 *)opt->value = 0;
+ return 0;
+ }
+ if (opt->flags & PARSE_OPT_OPTARG && !p->opt) {
+ *(u64 *)opt->value = opt->defval;
+ return 0;
+ }
+ if (get_arg(p, opt, flags, &arg))
+ return -1;
+ if (arg[0] == '-')
+ return opterror(opt, "expects an unsigned numerical value", flags);
+ *(u64 *)opt->value = strtoull(arg, (char **)&s, 10);
+ if (*s)
+ return opterror(opt, "expects a numerical value", flags);
+ return 0;
+
+ case OPTION_END:
+ case OPTION_ARGUMENT:
+ case OPTION_GROUP:
+ default:
+ die("should not happen, someone must be hit on the forehead");
+ }
+}
+
+static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options)
+{
+retry:
+ for (; options->type != OPTION_END; options++) {
+ if (options->short_name == *p->opt) {
+ p->opt = p->opt[1] ? p->opt + 1 : NULL;
+ return get_value(p, options, OPT_SHORT);
+ }
+ }
+
+ if (options->parent) {
+ options = options->parent;
+ goto retry;
+ }
+
+ return -2;
+}
+
+static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg,
+ const struct option *options)
+{
+ const char *arg_end = strchr(arg, '=');
+ const struct option *abbrev_option = NULL, *ambiguous_option = NULL;
+ int abbrev_flags = 0, ambiguous_flags = 0;
+
+ if (!arg_end)
+ arg_end = arg + strlen(arg);
+
+retry:
+ for (; options->type != OPTION_END; options++) {
+ const char *rest;
+ int flags = 0;
+
+ if (!options->long_name)
+ continue;
+
+ rest = skip_prefix(arg, options->long_name);
+ if (options->type == OPTION_ARGUMENT) {
+ if (!rest)
+ continue;
+ if (*rest == '=')
+ return opterror(options, "takes no value", flags);
+ if (*rest)
+ continue;
+ p->out[p->cpidx++] = arg - 2;
+ return 0;
+ }
+ if (!rest) {
+ if (strstarts(options->long_name, "no-")) {
+ /*
+ * The long name itself starts with "no-", so
+ * accept the option without "no-" so that users
+ * do not have to enter "no-no-" to get the
+ * negation.
+ */
+ rest = skip_prefix(arg, options->long_name + 3);
+ if (rest) {
+ flags |= OPT_UNSET;
+ goto match;
+ }
+ /* Abbreviated case */
+ if (strstarts(options->long_name + 3, arg)) {
+ flags |= OPT_UNSET;
+ goto is_abbreviated;
+ }
+ }
+ /* abbreviated? */
+ if (!strncmp(options->long_name, arg, arg_end - arg)) {
+is_abbreviated:
+ if (abbrev_option) {
+ /*
+ * If this is abbreviated, it is
+ * ambiguous. So when there is no
+ * exact match later, we need to
+ * error out.
+ */
+ ambiguous_option = abbrev_option;
+ ambiguous_flags = abbrev_flags;
+ }
+ if (!(flags & OPT_UNSET) && *arg_end)
+ p->opt = arg_end + 1;
+ abbrev_option = options;
+ abbrev_flags = flags;
+ continue;
+ }
+ /* negated and abbreviated very much? */
+ if (strstarts("no-", arg)) {
+ flags |= OPT_UNSET;
+ goto is_abbreviated;
+ }
+ /* negated? */
+ if (strncmp(arg, "no-", 3))
+ continue;
+ flags |= OPT_UNSET;
+ rest = skip_prefix(arg + 3, options->long_name);
+ /* abbreviated and negated? */
+ if (!rest && strstarts(options->long_name, arg + 3))
+ goto is_abbreviated;
+ if (!rest)
+ continue;
+ }
+match:
+ if (*rest) {
+ if (*rest != '=')
+ continue;
+ p->opt = rest + 1;
+ }
+ return get_value(p, options, flags);
+ }
+
+ if (ambiguous_option) {
+ fprintf(stderr,
+ " Error: Ambiguous option: %s (could be --%s%s or --%s%s)\n",
+ arg,
+ (ambiguous_flags & OPT_UNSET) ? "no-" : "",
+ ambiguous_option->long_name,
+ (abbrev_flags & OPT_UNSET) ? "no-" : "",
+ abbrev_option->long_name);
+ return -1;
+ }
+ if (abbrev_option)
+ return get_value(p, abbrev_option, abbrev_flags);
+
+ if (options->parent) {
+ options = options->parent;
+ goto retry;
+ }
+
+ return -2;
+}
+
+static void check_typos(const char *arg, const struct option *options)
+{
+ if (strlen(arg) < 3)
+ return;
+
+ if (strstarts(arg, "no-")) {
+ fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg);
+ exit(129);
+ }
+
+ for (; options->type != OPTION_END; options++) {
+ if (!options->long_name)
+ continue;
+ if (strstarts(options->long_name, arg)) {
+ fprintf(stderr, " Error: did you mean `--%s` (with two dashes ?)\n", arg);
+ exit(129);
+ }
+ }
+}
+
+static void parse_options_start(struct parse_opt_ctx_t *ctx,
+ int argc, const char **argv, int flags)
+{
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->argc = argc - 1;
+ ctx->argv = argv + 1;
+ ctx->out = argv;
+ ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0);
+ ctx->flags = flags;
+ if ((flags & PARSE_OPT_KEEP_UNKNOWN) &&
+ (flags & PARSE_OPT_STOP_AT_NON_OPTION))
+ die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together");
+}
+
+static int usage_with_options_internal(const char * const *,
+ const struct option *, int,
+ struct parse_opt_ctx_t *);
+
+static int parse_options_step(struct parse_opt_ctx_t *ctx,
+ const struct option *options,
+ const char * const usagestr[])
+{
+ int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP);
+ int excl_short_opt = 1;
+ const char *arg;
+
+ /* we must reset ->opt, unknown short option leave it dangling */
+ ctx->opt = NULL;
+
+ for (; ctx->argc; ctx->argc--, ctx->argv++) {
+ arg = ctx->argv[0];
+ if (*arg != '-' || !arg[1]) {
+ if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION)
+ break;
+ ctx->out[ctx->cpidx++] = ctx->argv[0];
+ continue;
+ }
+
+ if (arg[1] != '-') {
+ ctx->opt = ++arg;
+ if (internal_help && *ctx->opt == 'h') {
+ return usage_with_options_internal(usagestr, options, 0, ctx);
+ }
+ switch (parse_short_opt(ctx, options)) {
+ case -1:
+ return parse_options_usage(usagestr, options, arg, 1);
+ case -2:
+ goto unknown;
+ case -3:
+ goto exclusive;
+ default:
+ break;
+ }
+ if (ctx->opt)
+ check_typos(arg, options);
+ while (ctx->opt) {
+ if (internal_help && *ctx->opt == 'h')
+ return usage_with_options_internal(usagestr, options, 0, ctx);
+ arg = ctx->opt;
+ switch (parse_short_opt(ctx, options)) {
+ case -1:
+ return parse_options_usage(usagestr, options, arg, 1);
+ case -2:
+ /* fake a short option thing to hide the fact that we may have
+ * started to parse aggregated stuff
+ *
+ * This is leaky, too bad.
+ */
+ ctx->argv[0] = strdup(ctx->opt - 1);
+ *(char *)ctx->argv[0] = '-';
+ goto unknown;
+ case -3:
+ goto exclusive;
+ default:
+ break;
+ }
+ }
+ continue;
+ }
+
+ if (!arg[2]) { /* "--" */
+ if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) {
+ ctx->argc--;
+ ctx->argv++;
+ }
+ break;
+ }
+
+ arg += 2;
+ if (internal_help && !strcmp(arg, "help-all"))
+ return usage_with_options_internal(usagestr, options, 1, ctx);
+ if (internal_help && !strcmp(arg, "help"))
+ return usage_with_options_internal(usagestr, options, 0, ctx);
+ if (!strcmp(arg, "list-opts"))
+ return PARSE_OPT_LIST_OPTS;
+ if (!strcmp(arg, "list-cmds"))
+ return PARSE_OPT_LIST_SUBCMDS;
+ switch (parse_long_opt(ctx, arg, options)) {
+ case -1:
+ return parse_options_usage(usagestr, options, arg, 0);
+ case -2:
+ goto unknown;
+ case -3:
+ excl_short_opt = 0;
+ goto exclusive;
+ default:
+ break;
+ }
+ continue;
+unknown:
+ if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN))
+ return PARSE_OPT_UNKNOWN;
+ ctx->out[ctx->cpidx++] = ctx->argv[0];
+ ctx->opt = NULL;
+ }
+ return PARSE_OPT_DONE;
+
+exclusive:
+ parse_options_usage(usagestr, options, arg, excl_short_opt);
+ if ((excl_short_opt && ctx->excl_opt->short_name) ||
+ ctx->excl_opt->long_name == NULL) {
+ char opt = ctx->excl_opt->short_name;
+ parse_options_usage(NULL, options, &opt, 1);
+ } else {
+ parse_options_usage(NULL, options, ctx->excl_opt->long_name, 0);
+ }
+ return PARSE_OPT_HELP;
+}
+
+static int parse_options_end(struct parse_opt_ctx_t *ctx)
+{
+ memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out));
+ ctx->out[ctx->cpidx + ctx->argc] = NULL;
+ return ctx->cpidx + ctx->argc;
+}
+
+int parse_options_subcommand(int argc, const char **argv, const struct option *options,
+ const char *const subcommands[], const char *usagestr[], int flags)
+{
+ struct parse_opt_ctx_t ctx;
+
+ /* build usage string if it's not provided */
+ if (subcommands && !usagestr[0]) {
+ char *buf = NULL;
+
+ astrcatf(&buf, "%s %s [<options>] {", subcmd_config.exec_name, argv[0]);
+
+ for (int i = 0; subcommands[i]; i++) {
+ if (i)
+ astrcat(&buf, "|");
+ astrcat(&buf, subcommands[i]);
+ }
+ astrcat(&buf, "}");
+
+ usagestr[0] = buf;
+ }
+
+ parse_options_start(&ctx, argc, argv, flags);
+ switch (parse_options_step(&ctx, options, usagestr)) {
+ case PARSE_OPT_HELP:
+ exit(129);
+ case PARSE_OPT_DONE:
+ break;
+ case PARSE_OPT_LIST_OPTS:
+ while (options->type != OPTION_END) {
+ if (options->long_name)
+ printf("--%s ", options->long_name);
+ options++;
+ }
+ putchar('\n');
+ exit(130);
+ case PARSE_OPT_LIST_SUBCMDS:
+ if (subcommands) {
+ for (int i = 0; subcommands[i]; i++)
+ printf("%s ", subcommands[i]);
+ }
+ putchar('\n');
+ exit(130);
+ default: /* PARSE_OPT_UNKNOWN */
+ if (ctx.argv[0][1] == '-')
+ astrcatf(&error_buf, "unknown option `%s'",
+ ctx.argv[0] + 2);
+ else
+ astrcatf(&error_buf, "unknown switch `%c'", *ctx.opt);
+ usage_with_options(usagestr, options);
+ }
+
+ return parse_options_end(&ctx);
+}
+
+int parse_options(int argc, const char **argv, const struct option *options,
+ const char * const usagestr[], int flags)
+{
+ return parse_options_subcommand(argc, argv, options, NULL,
+ (const char **) usagestr, flags);
+}
+
+#define USAGE_OPTS_WIDTH 24
+#define USAGE_GAP 2
+
+static void print_option_help(const struct option *opts, int full)
+{
+ size_t pos;
+ int pad;
+
+ if (opts->type == OPTION_GROUP) {
+ fputc('\n', stderr);
+ if (*opts->help)
+ fprintf(stderr, "%s\n", opts->help);
+ return;
+ }
+ if (!full && (opts->flags & PARSE_OPT_HIDDEN))
+ return;
+ if (opts->flags & PARSE_OPT_DISABLED)
+ return;
+
+ pos = fprintf(stderr, " ");
+ if (opts->short_name)
+ pos += fprintf(stderr, "-%c", opts->short_name);
+ else
+ pos += fprintf(stderr, " ");
+
+ if (opts->long_name && opts->short_name)
+ pos += fprintf(stderr, ", ");
+ if (opts->long_name)
+ pos += fprintf(stderr, "--%s", opts->long_name);
+
+ switch (opts->type) {
+ case OPTION_ARGUMENT:
+ break;
+ case OPTION_LONG:
+ case OPTION_U64:
+ case OPTION_INTEGER:
+ case OPTION_UINTEGER:
+ if (opts->flags & PARSE_OPT_OPTARG)
+ if (opts->long_name)
+ pos += fprintf(stderr, "[=<n>]");
+ else
+ pos += fprintf(stderr, "[<n>]");
+ else
+ pos += fprintf(stderr, " <n>");
+ break;
+ case OPTION_CALLBACK:
+ if (opts->flags & PARSE_OPT_NOARG)
+ break;
+ /* FALLTHROUGH */
+ case OPTION_STRING:
+ if (opts->argh) {
+ if (opts->flags & PARSE_OPT_OPTARG)
+ if (opts->long_name)
+ pos += fprintf(stderr, "[=<%s>]", opts->argh);
+ else
+ pos += fprintf(stderr, "[<%s>]", opts->argh);
+ else
+ pos += fprintf(stderr, " <%s>", opts->argh);
+ } else {
+ if (opts->flags & PARSE_OPT_OPTARG)
+ if (opts->long_name)
+ pos += fprintf(stderr, "[=...]");
+ else
+ pos += fprintf(stderr, "[...]");
+ else
+ pos += fprintf(stderr, " ...");
+ }
+ break;
+ default: /* OPTION_{BIT,BOOLEAN,SET_UINT,SET_PTR} */
+ case OPTION_END:
+ case OPTION_GROUP:
+ case OPTION_BIT:
+ case OPTION_BOOLEAN:
+ case OPTION_INCR:
+ case OPTION_SET_UINT:
+ case OPTION_SET_PTR:
+ break;
+ }
+
+ if (pos <= USAGE_OPTS_WIDTH)
+ pad = USAGE_OPTS_WIDTH - pos;
+ else {
+ fputc('\n', stderr);
+ pad = USAGE_OPTS_WIDTH;
+ }
+ fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help);
+ if (opts->flags & PARSE_OPT_NOBUILD)
+ fprintf(stderr, "%*s(not built-in because %s)\n",
+ USAGE_OPTS_WIDTH + USAGE_GAP, "",
+ opts->build_opt);
+}
+
+static int option__cmp(const void *va, const void *vb)
+{
+ const struct option *a = va, *b = vb;
+ int sa = tolower(a->short_name), sb = tolower(b->short_name), ret;
+
+ if (sa == 0)
+ sa = 'z' + 1;
+ if (sb == 0)
+ sb = 'z' + 1;
+
+ ret = sa - sb;
+
+ if (ret == 0) {
+ const char *la = a->long_name ?: "",
+ *lb = b->long_name ?: "";
+ ret = strcmp(la, lb);
+ }
+
+ return ret;
+}
+
+static struct option *options__order(const struct option *opts)
+{
+ int nr_opts = 0, len;
+ const struct option *o = opts;
+ struct option *ordered;
+
+ for (o = opts; o->type != OPTION_END; o++)
+ ++nr_opts;
+
+ len = sizeof(*o) * (nr_opts + 1);
+ ordered = malloc(len);
+ if (!ordered)
+ goto out;
+ memcpy(ordered, opts, len);
+
+ qsort(ordered, nr_opts, sizeof(*o), option__cmp);
+out:
+ return ordered;
+}
+
+static bool option__in_argv(const struct option *opt, const struct parse_opt_ctx_t *ctx)
+{
+ int i;
+
+ for (i = 1; i < ctx->argc; ++i) {
+ const char *arg = ctx->argv[i];
+
+ if (arg[0] != '-') {
+ if (arg[1] == '\0') {
+ if (arg[0] == opt->short_name)
+ return true;
+ continue;
+ }
+
+ if (opt->long_name && strcmp(opt->long_name, arg) == 0)
+ return true;
+
+ if (opt->help && strcasestr(opt->help, arg) != NULL)
+ return true;
+
+ continue;
+ }
+
+ if (arg[1] == opt->short_name ||
+ (arg[1] == '-' && opt->long_name && strcmp(opt->long_name, arg + 2) == 0))
+ return true;
+ }
+
+ return false;
+}
+
+static int usage_with_options_internal(const char * const *usagestr,
+ const struct option *opts, int full,
+ struct parse_opt_ctx_t *ctx)
+{
+ struct option *ordered;
+
+ if (!usagestr)
+ return PARSE_OPT_HELP;
+
+ setup_pager();
+
+ if (error_buf) {
+ fprintf(stderr, " Error: %s\n", error_buf);
+ zfree(&error_buf);
+ }
+
+ fprintf(stderr, "\n Usage: %s\n", *usagestr++);
+ while (*usagestr && **usagestr)
+ fprintf(stderr, " or: %s\n", *usagestr++);
+ while (*usagestr) {
+ fprintf(stderr, "%s%s\n",
+ **usagestr ? " " : "",
+ *usagestr);
+ usagestr++;
+ }
+
+ if (opts->type != OPTION_GROUP)
+ fputc('\n', stderr);
+
+ ordered = options__order(opts);
+ if (ordered)
+ opts = ordered;
+
+ for ( ; opts->type != OPTION_END; opts++) {
+ if (ctx && ctx->argc > 1 && !option__in_argv(opts, ctx))
+ continue;
+ print_option_help(opts, full);
+ }
+
+ fputc('\n', stderr);
+
+ free(ordered);
+
+ return PARSE_OPT_HELP;
+}
+
+void usage_with_options(const char * const *usagestr,
+ const struct option *opts)
+{
+ usage_with_options_internal(usagestr, opts, 0, NULL);
+ exit(129);
+}
+
+void usage_with_options_msg(const char * const *usagestr,
+ const struct option *opts, const char *fmt, ...)
+{
+ va_list ap;
+ char *tmp = error_buf;
+
+ va_start(ap, fmt);
+ if (vasprintf(&error_buf, fmt, ap) == -1)
+ die("vasprintf failed");
+ va_end(ap);
+
+ free(tmp);
+
+ usage_with_options_internal(usagestr, opts, 0, NULL);
+ exit(129);
+}
+
+int parse_options_usage(const char * const *usagestr,
+ const struct option *opts,
+ const char *optstr, bool short_opt)
+{
+ if (!usagestr)
+ goto opt;
+
+ fprintf(stderr, "\n Usage: %s\n", *usagestr++);
+ while (*usagestr && **usagestr)
+ fprintf(stderr, " or: %s\n", *usagestr++);
+ while (*usagestr) {
+ fprintf(stderr, "%s%s\n",
+ **usagestr ? " " : "",
+ *usagestr);
+ usagestr++;
+ }
+ fputc('\n', stderr);
+
+opt:
+ for ( ; opts->type != OPTION_END; opts++) {
+ if (short_opt) {
+ if (opts->short_name == *optstr) {
+ print_option_help(opts, 0);
+ break;
+ }
+ continue;
+ }
+
+ if (opts->long_name == NULL)
+ continue;
+
+ if (strstarts(opts->long_name, optstr))
+ print_option_help(opts, 0);
+ if (strstarts("no-", optstr) &&
+ strstarts(opts->long_name, optstr + 3))
+ print_option_help(opts, 0);
+ }
+
+ return PARSE_OPT_HELP;
+}
+
+
+int parse_opt_verbosity_cb(const struct option *opt,
+ const char *arg __maybe_unused,
+ int unset)
+{
+ int *target = opt->value;
+
+ if (unset)
+ /* --no-quiet, --no-verbose */
+ *target = 0;
+ else if (opt->short_name == 'v') {
+ if (*target >= 0)
+ (*target)++;
+ else
+ *target = 1;
+ } else {
+ if (*target <= 0)
+ (*target)--;
+ else
+ *target = -1;
+ }
+ return 0;
+}
+
+static struct option *
+find_option(struct option *opts, int shortopt, const char *longopt)
+{
+ for (; opts->type != OPTION_END; opts++) {
+ if ((shortopt && opts->short_name == shortopt) ||
+ (opts->long_name && longopt &&
+ !strcmp(opts->long_name, longopt)))
+ return opts;
+ }
+ return NULL;
+}
+
+void set_option_flag(struct option *opts, int shortopt, const char *longopt,
+ int flag)
+{
+ struct option *opt = find_option(opts, shortopt, longopt);
+
+ if (opt)
+ opt->flags |= flag;
+ return;
+}
+
+void set_option_nobuild(struct option *opts, int shortopt,
+ const char *longopt,
+ const char *build_opt,
+ bool can_skip)
+{
+ struct option *opt = find_option(opts, shortopt, longopt);
+
+ if (!opt)
+ return;
+
+ opt->flags |= PARSE_OPT_NOBUILD;
+ opt->flags |= can_skip ? PARSE_OPT_CANSKIP : 0;
+ opt->build_opt = build_opt;
+}
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
new file mode 100644
index 000000000..92fdbe151
--- /dev/null
+++ b/tools/lib/subcmd/parse-options.h
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_PARSE_OPTIONS_H
+#define __SUBCMD_PARSE_OPTIONS_H
+
+#include <linux/kernel.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#ifndef NORETURN
+#define NORETURN __attribute__((__noreturn__))
+#endif
+
+enum parse_opt_type {
+ /* special types */
+ OPTION_END,
+ OPTION_ARGUMENT,
+ OPTION_GROUP,
+ /* options with no arguments */
+ OPTION_BIT,
+ OPTION_BOOLEAN,
+ OPTION_INCR,
+ OPTION_SET_UINT,
+ OPTION_SET_PTR,
+ /* options with arguments (usually) */
+ OPTION_STRING,
+ OPTION_INTEGER,
+ OPTION_LONG,
+ OPTION_CALLBACK,
+ OPTION_U64,
+ OPTION_UINTEGER,
+};
+
+enum parse_opt_flags {
+ PARSE_OPT_KEEP_DASHDASH = 1,
+ PARSE_OPT_STOP_AT_NON_OPTION = 2,
+ PARSE_OPT_KEEP_ARGV0 = 4,
+ PARSE_OPT_KEEP_UNKNOWN = 8,
+ PARSE_OPT_NO_INTERNAL_HELP = 16,
+};
+
+enum parse_opt_option_flags {
+ PARSE_OPT_OPTARG = 1,
+ PARSE_OPT_NOARG = 2,
+ PARSE_OPT_NONEG = 4,
+ PARSE_OPT_HIDDEN = 8,
+ PARSE_OPT_LASTARG_DEFAULT = 16,
+ PARSE_OPT_DISABLED = 32,
+ PARSE_OPT_EXCLUSIVE = 64,
+ PARSE_OPT_NOEMPTY = 128,
+ PARSE_OPT_NOBUILD = 256,
+ PARSE_OPT_CANSKIP = 512,
+};
+
+struct option;
+typedef int parse_opt_cb(const struct option *, const char *arg, int unset);
+
+/*
+ * `type`::
+ * holds the type of the option, you must have an OPTION_END last in your
+ * array.
+ *
+ * `short_name`::
+ * the character to use as a short option name, '\0' if none.
+ *
+ * `long_name`::
+ * the long option name, without the leading dashes, NULL if none.
+ *
+ * `value`::
+ * stores pointers to the values to be filled.
+ *
+ * `argh`::
+ * token to explain the kind of argument this option wants. Keep it
+ * homogenous across the repository.
+ *
+ * `help`::
+ * the short help associated to what the option does.
+ * Must never be NULL (except for OPTION_END).
+ * OPTION_GROUP uses this pointer to store the group header.
+ *
+ * `flags`::
+ * mask of parse_opt_option_flags.
+ * PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs)
+ * PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs
+ * PARSE_OPT_NONEG: says that this option cannot be negated
+ * PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in
+ * the long one.
+ *
+ * `callback`::
+ * pointer to the callback to use for OPTION_CALLBACK.
+ *
+ * `defval`::
+ * default value to fill (*->value) with for PARSE_OPT_OPTARG.
+ * OPTION_{BIT,SET_UINT,SET_PTR} store the {mask,integer,pointer} to put in
+ * the value when met.
+ * CALLBACKS can use it like they want.
+ *
+ * `set`::
+ * whether an option was set by the user
+ */
+struct option {
+ enum parse_opt_type type;
+ int short_name;
+ const char *long_name;
+ void *value;
+ const char *argh;
+ const char *help;
+ const char *build_opt;
+
+ int flags;
+ parse_opt_cb *callback;
+ intptr_t defval;
+ bool *set;
+ void *data;
+ const struct option *parent;
+};
+
+#define check_vtype(v, type) ( BUILD_BUG_ON_ZERO(!__builtin_types_compatible_p(typeof(v), type)) + v )
+
+#define OPT_END() { .type = OPTION_END }
+#define OPT_PARENT(p) { .type = OPTION_END, .parent = (p) }
+#define OPT_ARGUMENT(l, h) { .type = OPTION_ARGUMENT, .long_name = (l), .help = (h) }
+#define OPT_GROUP(h) { .type = OPTION_GROUP, .help = (h) }
+#define OPT_BIT(s, l, v, h, b) { .type = OPTION_BIT, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h), .defval = (b) }
+#define OPT_BOOLEAN(s, l, v, h) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h) }
+#define OPT_BOOLEAN_FLAG(s, l, v, h, f) { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), .value = check_vtype(v, bool *), .help = (h), .flags = (f) }
+#define OPT_BOOLEAN_SET(s, l, v, os, h) \
+ { .type = OPTION_BOOLEAN, .short_name = (s), .long_name = (l), \
+ .value = check_vtype(v, bool *), .help = (h), \
+ .set = check_vtype(os, bool *)}
+#define OPT_INCR(s, l, v, h) { .type = OPTION_INCR, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
+#define OPT_SET_UINT(s, l, v, h, i) { .type = OPTION_SET_UINT, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h), .defval = (i) }
+#define OPT_SET_PTR(s, l, v, h, p) { .type = OPTION_SET_PTR, .short_name = (s), .long_name = (l), .value = (v), .help = (h), .defval = (p) }
+#define OPT_INTEGER(s, l, v, h) { .type = OPTION_INTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, int *), .help = (h) }
+#define OPT_UINTEGER(s, l, v, h) { .type = OPTION_UINTEGER, .short_name = (s), .long_name = (l), .value = check_vtype(v, unsigned int *), .help = (h) }
+#define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
+#define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
+#define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h) }
+#define OPT_STRING_OPTARG(s, l, v, a, h, d) \
+ { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
+ .value = check_vtype(v, const char **), .argh =(a), .help = (h), \
+ .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
+#define OPT_STRING_OPTARG_SET(s, l, v, os, a, h, d) \
+ { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \
+ .value = check_vtype(v, const char **), .argh = (a), .help = (h), \
+ .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d), \
+ .set = check_vtype(os, bool *)}
+#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), .argh = (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
+#define OPT_DATE(s, l, v, h) \
+ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
+#define OPT_CALLBACK(s, l, v, a, h, f) \
+ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f) }
+#define OPT_CALLBACK_NOOPT(s, l, v, a, h, f) \
+ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
+#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
+ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .defval = (intptr_t)d, .flags = PARSE_OPT_LASTARG_DEFAULT }
+#define OPT_CALLBACK_DEFAULT_NOOPT(s, l, v, a, h, f, d) \
+ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l),\
+ .value = (v), .arg = (a), .help = (h), .callback = (f), .defval = (intptr_t)d,\
+ .flags = PARSE_OPT_LASTARG_DEFAULT | PARSE_OPT_NOARG}
+#define OPT_CALLBACK_OPTARG(s, l, v, d, a, h, f) \
+ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), \
+ .value = (v), .argh = (a), .help = (h), .callback = (f), \
+ .flags = PARSE_OPT_OPTARG, .data = (d) }
+
+/* parse_options() will filter out the processed options and leave the
+ * non-option argments in argv[].
+ * Returns the number of arguments left in argv[].
+ *
+ * NOTE: parse_options() and parse_options_subcommand() may call exit() in the
+ * case of an error (or for 'special' options like --list-cmds or --list-opts).
+ */
+extern int parse_options(int argc, const char **argv,
+ const struct option *options,
+ const char * const usagestr[], int flags);
+
+extern int parse_options_subcommand(int argc, const char **argv,
+ const struct option *options,
+ const char *const subcommands[],
+ const char *usagestr[], int flags);
+
+extern NORETURN void usage_with_options(const char * const *usagestr,
+ const struct option *options);
+extern NORETURN __attribute__((format(printf,3,4)))
+void usage_with_options_msg(const char * const *usagestr,
+ const struct option *options,
+ const char *fmt, ...);
+
+/*----- incremantal advanced APIs -----*/
+
+enum {
+ PARSE_OPT_HELP = -1,
+ PARSE_OPT_DONE,
+ PARSE_OPT_LIST_OPTS,
+ PARSE_OPT_LIST_SUBCMDS,
+ PARSE_OPT_UNKNOWN,
+};
+
+/*
+ * It's okay for the caller to consume argv/argc in the usual way.
+ * Other fields of that structure are private to parse-options and should not
+ * be modified in any way.
+ */
+struct parse_opt_ctx_t {
+ const char **argv;
+ const char **out;
+ int argc, cpidx;
+ const char *opt;
+ const struct option *excl_opt;
+ int flags;
+};
+
+extern int parse_options_usage(const char * const *usagestr,
+ const struct option *opts,
+ const char *optstr,
+ bool short_opt);
+
+
+/*----- some often used options -----*/
+extern int parse_opt_abbrev_cb(const struct option *, const char *, int);
+extern int parse_opt_approxidate_cb(const struct option *, const char *, int);
+extern int parse_opt_verbosity_cb(const struct option *, const char *, int);
+
+#define OPT__VERBOSE(var) OPT_BOOLEAN('v', "verbose", (var), "be verbose")
+#define OPT__QUIET(var) OPT_BOOLEAN('q', "quiet", (var), "be quiet")
+#define OPT__VERBOSITY(var) \
+ { OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \
+ PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \
+ { OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \
+ PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }
+#define OPT__DRY_RUN(var) OPT_BOOLEAN('n', "dry-run", (var), "dry run")
+#define OPT__ABBREV(var) \
+ { OPTION_CALLBACK, 0, "abbrev", (var), "n", \
+ "use <n> digits to display SHA-1s", \
+ PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 }
+
+extern const char *parse_options_fix_filename(const char *prefix, const char *file);
+
+void set_option_flag(struct option *opts, int sopt, const char *lopt, int flag);
+void set_option_nobuild(struct option *opts, int shortopt, const char *longopt,
+ const char *build_opt, bool can_skip);
+
+#endif /* __SUBCMD_PARSE_OPTIONS_H */
diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c
new file mode 100644
index 000000000..5cdac2162
--- /dev/null
+++ b/tools/lib/subcmd/run-command.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <linux/string.h>
+#include <errno.h>
+#include <sys/wait.h>
+#include "subcmd-util.h"
+#include "run-command.h"
+#include "exec-cmd.h"
+
+#define STRERR_BUFSIZE 128
+
+static inline void close_pair(int fd[2])
+{
+ close(fd[0]);
+ close(fd[1]);
+}
+
+static inline void dup_devnull(int to)
+{
+ int fd = open("/dev/null", O_RDWR);
+ dup2(fd, to);
+ close(fd);
+}
+
+int start_command(struct child_process *cmd)
+{
+ int need_in, need_out, need_err;
+ int fdin[2], fdout[2], fderr[2];
+ char sbuf[STRERR_BUFSIZE];
+
+ /*
+ * In case of errors we must keep the promise to close FDs
+ * that have been passed in via ->in and ->out.
+ */
+
+ need_in = !cmd->no_stdin && cmd->in < 0;
+ if (need_in) {
+ if (pipe(fdin) < 0) {
+ if (cmd->out > 0)
+ close(cmd->out);
+ return -ERR_RUN_COMMAND_PIPE;
+ }
+ cmd->in = fdin[1];
+ }
+
+ need_out = !cmd->no_stdout
+ && !cmd->stdout_to_stderr
+ && cmd->out < 0;
+ if (need_out) {
+ if (pipe(fdout) < 0) {
+ if (need_in)
+ close_pair(fdin);
+ else if (cmd->in)
+ close(cmd->in);
+ return -ERR_RUN_COMMAND_PIPE;
+ }
+ cmd->out = fdout[0];
+ }
+
+ need_err = !cmd->no_stderr && cmd->err < 0;
+ if (need_err) {
+ if (pipe(fderr) < 0) {
+ if (need_in)
+ close_pair(fdin);
+ else if (cmd->in)
+ close(cmd->in);
+ if (need_out)
+ close_pair(fdout);
+ else if (cmd->out)
+ close(cmd->out);
+ return -ERR_RUN_COMMAND_PIPE;
+ }
+ cmd->err = fderr[0];
+ }
+
+ fflush(NULL);
+ cmd->pid = fork();
+ if (!cmd->pid) {
+ if (cmd->no_stdin)
+ dup_devnull(0);
+ else if (need_in) {
+ dup2(fdin[0], 0);
+ close_pair(fdin);
+ } else if (cmd->in) {
+ dup2(cmd->in, 0);
+ close(cmd->in);
+ }
+
+ if (cmd->no_stderr)
+ dup_devnull(2);
+ else if (need_err) {
+ dup2(fderr[1], 2);
+ close_pair(fderr);
+ }
+
+ if (cmd->no_stdout)
+ dup_devnull(1);
+ else if (cmd->stdout_to_stderr)
+ dup2(2, 1);
+ else if (need_out) {
+ dup2(fdout[1], 1);
+ close_pair(fdout);
+ } else if (cmd->out > 1) {
+ dup2(cmd->out, 1);
+ close(cmd->out);
+ }
+
+ if (cmd->dir && chdir(cmd->dir))
+ die("exec %s: cd to %s failed (%s)", cmd->argv[0],
+ cmd->dir, str_error_r(errno, sbuf, sizeof(sbuf)));
+ if (cmd->env) {
+ for (; *cmd->env; cmd->env++) {
+ if (strchr(*cmd->env, '='))
+ putenv((char*)*cmd->env);
+ else
+ unsetenv(*cmd->env);
+ }
+ }
+ if (cmd->preexec_cb)
+ cmd->preexec_cb();
+ if (cmd->exec_cmd) {
+ execv_cmd(cmd->argv);
+ } else {
+ execvp(cmd->argv[0], (char *const*) cmd->argv);
+ }
+ exit(127);
+ }
+
+ if (cmd->pid < 0) {
+ int err = errno;
+ if (need_in)
+ close_pair(fdin);
+ else if (cmd->in)
+ close(cmd->in);
+ if (need_out)
+ close_pair(fdout);
+ else if (cmd->out)
+ close(cmd->out);
+ if (need_err)
+ close_pair(fderr);
+ return err == ENOENT ?
+ -ERR_RUN_COMMAND_EXEC :
+ -ERR_RUN_COMMAND_FORK;
+ }
+
+ if (need_in)
+ close(fdin[0]);
+ else if (cmd->in)
+ close(cmd->in);
+
+ if (need_out)
+ close(fdout[1]);
+ else if (cmd->out)
+ close(cmd->out);
+
+ if (need_err)
+ close(fderr[1]);
+
+ return 0;
+}
+
+static int wait_or_whine(pid_t pid)
+{
+ char sbuf[STRERR_BUFSIZE];
+
+ for (;;) {
+ int status, code;
+ pid_t waiting = waitpid(pid, &status, 0);
+
+ if (waiting < 0) {
+ if (errno == EINTR)
+ continue;
+ fprintf(stderr, " Error: waitpid failed (%s)",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ return -ERR_RUN_COMMAND_WAITPID;
+ }
+ if (waiting != pid)
+ return -ERR_RUN_COMMAND_WAITPID_WRONG_PID;
+ if (WIFSIGNALED(status))
+ return -ERR_RUN_COMMAND_WAITPID_SIGNAL;
+
+ if (!WIFEXITED(status))
+ return -ERR_RUN_COMMAND_WAITPID_NOEXIT;
+ code = WEXITSTATUS(status);
+ switch (code) {
+ case 127:
+ return -ERR_RUN_COMMAND_EXEC;
+ case 0:
+ return 0;
+ default:
+ return -code;
+ }
+ }
+}
+
+int finish_command(struct child_process *cmd)
+{
+ return wait_or_whine(cmd->pid);
+}
+
+int run_command(struct child_process *cmd)
+{
+ int code = start_command(cmd);
+ if (code)
+ return code;
+ return finish_command(cmd);
+}
+
+static void prepare_run_command_v_opt(struct child_process *cmd,
+ const char **argv,
+ int opt)
+{
+ memset(cmd, 0, sizeof(*cmd));
+ cmd->argv = argv;
+ cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0;
+ cmd->exec_cmd = opt & RUN_EXEC_CMD ? 1 : 0;
+ cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0;
+}
+
+int run_command_v_opt(const char **argv, int opt)
+{
+ struct child_process cmd;
+ prepare_run_command_v_opt(&cmd, argv, opt);
+ return run_command(&cmd);
+}
diff --git a/tools/lib/subcmd/run-command.h b/tools/lib/subcmd/run-command.h
new file mode 100644
index 000000000..17d969c6a
--- /dev/null
+++ b/tools/lib/subcmd/run-command.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_RUN_COMMAND_H
+#define __SUBCMD_RUN_COMMAND_H
+
+#include <unistd.h>
+
+enum {
+ ERR_RUN_COMMAND_FORK = 10000,
+ ERR_RUN_COMMAND_EXEC,
+ ERR_RUN_COMMAND_PIPE,
+ ERR_RUN_COMMAND_WAITPID,
+ ERR_RUN_COMMAND_WAITPID_WRONG_PID,
+ ERR_RUN_COMMAND_WAITPID_SIGNAL,
+ ERR_RUN_COMMAND_WAITPID_NOEXIT,
+};
+#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK)
+
+struct child_process {
+ const char **argv;
+ pid_t pid;
+ /*
+ * Using .in, .out, .err:
+ * - Specify 0 for no redirections (child inherits stdin, stdout,
+ * stderr from parent).
+ * - Specify -1 to have a pipe allocated as follows:
+ * .in: returns the writable pipe end; parent writes to it,
+ * the readable pipe end becomes child's stdin
+ * .out, .err: returns the readable pipe end; parent reads from
+ * it, the writable pipe end becomes child's stdout/stderr
+ * The caller of start_command() must close the returned FDs
+ * after it has completed reading from/writing to it!
+ * - Specify > 0 to set a channel to a particular FD as follows:
+ * .in: a readable FD, becomes child's stdin
+ * .out: a writable FD, becomes child's stdout/stderr
+ * .err > 0 not supported
+ * The specified FD is closed by start_command(), even in case
+ * of errors!
+ */
+ int in;
+ int out;
+ int err;
+ const char *dir;
+ const char *const *env;
+ unsigned no_stdin:1;
+ unsigned no_stdout:1;
+ unsigned no_stderr:1;
+ unsigned exec_cmd:1; /* if this is to be external sub-command */
+ unsigned stdout_to_stderr:1;
+ void (*preexec_cb)(void);
+};
+
+int start_command(struct child_process *);
+int finish_command(struct child_process *);
+int run_command(struct child_process *);
+
+#define RUN_COMMAND_NO_STDIN 1
+#define RUN_EXEC_CMD 2 /*If this is to be external sub-command */
+#define RUN_COMMAND_STDOUT_TO_STDERR 4
+int run_command_v_opt(const char **argv, int opt);
+
+#endif /* __SUBCMD_RUN_COMMAND_H */
diff --git a/tools/lib/subcmd/sigchain.c b/tools/lib/subcmd/sigchain.c
new file mode 100644
index 000000000..f0fe3dbef
--- /dev/null
+++ b/tools/lib/subcmd/sigchain.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <signal.h>
+#include "subcmd-util.h"
+#include "sigchain.h"
+
+#define SIGCHAIN_MAX_SIGNALS 32
+
+struct sigchain_signal {
+ sigchain_fun *old;
+ int n;
+ int alloc;
+};
+static struct sigchain_signal signals[SIGCHAIN_MAX_SIGNALS];
+
+static void check_signum(int sig)
+{
+ if (sig < 1 || sig >= SIGCHAIN_MAX_SIGNALS)
+ die("BUG: signal out of range: %d", sig);
+}
+
+static int sigchain_push(int sig, sigchain_fun f)
+{
+ struct sigchain_signal *s = signals + sig;
+ check_signum(sig);
+
+ ALLOC_GROW(s->old, s->n + 1, s->alloc);
+ s->old[s->n] = signal(sig, f);
+ if (s->old[s->n] == SIG_ERR)
+ return -1;
+ s->n++;
+ return 0;
+}
+
+int sigchain_pop(int sig)
+{
+ struct sigchain_signal *s = signals + sig;
+ check_signum(sig);
+ if (s->n < 1)
+ return 0;
+
+ if (signal(sig, s->old[s->n - 1]) == SIG_ERR)
+ return -1;
+ s->n--;
+ return 0;
+}
+
+void sigchain_push_common(sigchain_fun f)
+{
+ sigchain_push(SIGINT, f);
+ sigchain_push(SIGHUP, f);
+ sigchain_push(SIGTERM, f);
+ sigchain_push(SIGQUIT, f);
+ sigchain_push(SIGPIPE, f);
+}
diff --git a/tools/lib/subcmd/sigchain.h b/tools/lib/subcmd/sigchain.h
new file mode 100644
index 000000000..1ec663af4
--- /dev/null
+++ b/tools/lib/subcmd/sigchain.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_SIGCHAIN_H
+#define __SUBCMD_SIGCHAIN_H
+
+typedef void (*sigchain_fun)(int);
+
+int sigchain_pop(int sig);
+
+void sigchain_push_common(sigchain_fun f);
+
+#endif /* __SUBCMD_SIGCHAIN_H */
diff --git a/tools/lib/subcmd/subcmd-config.c b/tools/lib/subcmd/subcmd-config.c
new file mode 100644
index 000000000..84a7cf6c7
--- /dev/null
+++ b/tools/lib/subcmd/subcmd-config.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "subcmd-config.h"
+
+#define UNDEFINED "SUBCMD_HAS_NOT_BEEN_INITIALIZED"
+
+struct subcmd_config subcmd_config = {
+ .exec_name = UNDEFINED,
+ .prefix = UNDEFINED,
+ .exec_path = UNDEFINED,
+ .exec_path_env = UNDEFINED,
+ .pager_env = UNDEFINED,
+};
diff --git a/tools/lib/subcmd/subcmd-config.h b/tools/lib/subcmd/subcmd-config.h
new file mode 100644
index 000000000..9024dc17d
--- /dev/null
+++ b/tools/lib/subcmd/subcmd-config.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SUBCMD_CONFIG_H
+#define __PERF_SUBCMD_CONFIG_H
+
+struct subcmd_config {
+ const char *exec_name;
+ const char *prefix;
+ const char *exec_path;
+ const char *exec_path_env;
+ const char *pager_env;
+};
+
+extern struct subcmd_config subcmd_config;
+
+#endif /* __PERF_SUBCMD_CONFIG_H */
diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h
new file mode 100644
index 000000000..b2aec04fc
--- /dev/null
+++ b/tools/lib/subcmd/subcmd-util.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SUBCMD_UTIL_H
+#define __SUBCMD_UTIL_H
+
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#define NORETURN __attribute__((__noreturn__))
+
+static inline void report(const char *prefix, const char *err, va_list params)
+{
+ char msg[1024];
+ vsnprintf(msg, sizeof(msg), err, params);
+ fprintf(stderr, " %s%s\n", prefix, msg);
+}
+
+static NORETURN inline void die(const char *err, ...)
+{
+ va_list params;
+
+ va_start(params, err);
+ report(" Fatal: ", err, params);
+ exit(128);
+ va_end(params);
+}
+
+#define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+/*
+ * Realloc the buffer pointed at by variable 'x' so that it can hold
+ * at least 'nr' entries; the number of entries currently allocated
+ * is 'alloc', using the standard growing factor alloc_nr() macro.
+ *
+ * DO NOT USE any expression with side-effect for 'x' or 'alloc'.
+ */
+#define ALLOC_GROW(x, nr, alloc) \
+ do { \
+ if ((nr) > alloc) { \
+ if (alloc_nr(alloc) < (nr)) \
+ alloc = (nr); \
+ else \
+ alloc = alloc_nr(alloc); \
+ x = xrealloc((x), alloc * sizeof(*(x))); \
+ } \
+ } while(0)
+
+static inline void *xrealloc(void *ptr, size_t size)
+{
+ void *ret = realloc(ptr, size);
+ if (!ret)
+ die("Out of memory, realloc failed");
+ return ret;
+}
+
+#define astrcatf(out, fmt, ...) \
+({ \
+ char *tmp = *(out); \
+ if (asprintf((out), "%s" fmt, tmp ?: "", ## __VA_ARGS__) == -1) \
+ die("asprintf failed"); \
+ free(tmp); \
+})
+
+static inline void astrcat(char **out, const char *add)
+{
+ char *tmp = *out;
+
+ if (asprintf(out, "%s%s", tmp ?: "", add) == -1)
+ die("asprintf failed");
+
+ free(tmp);
+}
+
+#endif /* __SUBCMD_UTIL_H */
diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
new file mode 100644
index 000000000..96d830545
--- /dev/null
+++ b/tools/lib/symbol/kallsyms.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <ctype.h>
+#include "symbol/kallsyms.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+u8 kallsyms2elf_type(char type)
+{
+ type = tolower(type);
+ return (type == 't' || type == 'w') ? STT_FUNC : STT_OBJECT;
+}
+
+bool kallsyms__is_function(char symbol_type)
+{
+ symbol_type = toupper(symbol_type);
+ return symbol_type == 'T' || symbol_type == 'W';
+}
+
+int kallsyms__parse(const char *filename, void *arg,
+ int (*process_symbol)(void *arg, const char *name,
+ char type, u64 start))
+{
+ char *line = NULL;
+ size_t n;
+ int err = -1;
+ FILE *file = fopen(filename, "r");
+
+ if (file == NULL)
+ goto out_failure;
+
+ err = 0;
+
+ while (!feof(file)) {
+ u64 start;
+ int line_len, len;
+ char symbol_type;
+ char *symbol_name;
+
+ line_len = getline(&line, &n, file);
+ if (line_len < 0 || !line)
+ break;
+
+ line[--line_len] = '\0'; /* \n */
+
+ len = hex2u64(line, &start);
+
+ /* Skip the line if we failed to parse the address. */
+ if (!len)
+ continue;
+
+ len++;
+ if (len + 2 >= line_len)
+ continue;
+
+ symbol_type = line[len];
+ len += 2;
+ symbol_name = line + len;
+ len = line_len - len;
+
+ if (len >= KSYM_NAME_LEN) {
+ err = -1;
+ break;
+ }
+
+ err = process_symbol(arg, symbol_name, symbol_type, start);
+ if (err)
+ break;
+ }
+
+ free(line);
+ fclose(file);
+ return err;
+
+out_failure:
+ return -1;
+}
diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h
new file mode 100644
index 000000000..72ab98704
--- /dev/null
+++ b/tools/lib/symbol/kallsyms.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TOOLS_KALLSYMS_H_
+#define __TOOLS_KALLSYMS_H_ 1
+
+#include <elf.h>
+#include <linux/ctype.h>
+#include <linux/types.h>
+
+#ifndef KSYM_NAME_LEN
+#define KSYM_NAME_LEN 256
+#endif
+
+static inline u8 kallsyms2elf_binding(char type)
+{
+ if (type == 'W')
+ return STB_WEAK;
+
+ return isupper(type) ? STB_GLOBAL : STB_LOCAL;
+}
+
+u8 kallsyms2elf_type(char type);
+
+bool kallsyms__is_function(char symbol_type);
+
+int kallsyms__parse(const char *filename, void *arg,
+ int (*process_symbol)(void *arg, const char *name,
+ char type, u64 start));
+
+#endif /* __TOOLS_KALLSYMS_H_ */
diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore
new file mode 100644
index 000000000..9e9f25fb1
--- /dev/null
+++ b/tools/lib/traceevent/.gitignore
@@ -0,0 +1,3 @@
+TRACEEVENT-CFLAGS
+libtraceevent-dynamic-list
+libtraceevent.so.*
diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build
new file mode 100644
index 000000000..c681d0575
--- /dev/null
+++ b/tools/lib/traceevent/Build
@@ -0,0 +1,17 @@
+libtraceevent-y += event-parse.o
+libtraceevent-y += event-plugin.o
+libtraceevent-y += trace-seq.o
+libtraceevent-y += parse-filter.o
+libtraceevent-y += parse-utils.o
+libtraceevent-y += kbuffer-parse.o
+
+plugin_jbd2-y += plugin_jbd2.o
+plugin_hrtimer-y += plugin_hrtimer.o
+plugin_kmem-y += plugin_kmem.o
+plugin_kvm-y += plugin_kvm.o
+plugin_mac80211-y += plugin_mac80211.o
+plugin_sched_switch-y += plugin_sched_switch.o
+plugin_function-y += plugin_function.o
+plugin_xen-y += plugin_xen.o
+plugin_scsi-y += plugin_scsi.o
+plugin_cfg80211-y += plugin_cfg80211.o
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
new file mode 100644
index 000000000..b7f7e4e54
--- /dev/null
+++ b/tools/lib/traceevent/Makefile
@@ -0,0 +1,301 @@
+# SPDX-License-Identifier: GPL-2.0
+# trace-cmd version
+EP_VERSION = 1
+EP_PATCHLEVEL = 1
+EP_EXTRAVERSION = 0
+
+# file format version
+FILE_VERSION = 6
+
+MAKEFLAGS += --no-print-directory
+
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,NM,$(CROSS_COMPILE)nm)
+
+EXT = -std=gnu99
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+
+prefix ?= /usr/local
+libdir = $(prefix)/$(libdir_relative)
+man_dir = $(prefix)/share/man
+man_dir_SQ = '$(subst ','\'',$(man_dir))'
+
+export man_dir man_dir_SQ INSTALL
+export DESTDIR DESTDIR_SQ
+
+set_plugin_dir := 1
+
+# Set plugin_dir to preffered global plugin location
+# If we install under $HOME directory we go under
+# $(HOME)/.local/lib/traceevent/plugins
+#
+# We dont set PLUGIN_DIR in case we install under $HOME
+# directory, because by default the code looks under:
+# $(HOME)/.local/lib/traceevent/plugins by default.
+#
+ifeq ($(plugin_dir),)
+ifeq ($(prefix),$(HOME))
+override plugin_dir = $(HOME)/.local/lib/traceevent/plugins
+set_plugin_dir := 0
+else
+override plugin_dir = $(libdir)/traceevent/plugins
+endif
+endif
+
+ifeq ($(set_plugin_dir),1)
+PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)"
+PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))'
+endif
+
+include ../../scripts/Makefile.include
+
+# copy a bit from Linux kbuild
+
+ifeq ("$(origin V)", "command line")
+ VERBOSE = $(V)
+endif
+ifndef VERBOSE
+ VERBOSE = 0
+endif
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+export prefix libdir src obj
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
+plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
+
+CONFIG_INCLUDES =
+CONFIG_LIBS =
+CONFIG_FLAGS =
+
+VERSION = $(EP_VERSION)
+PATCHLEVEL = $(EP_PATCHLEVEL)
+EXTRAVERSION = $(EP_EXTRAVERSION)
+
+OBJ = $@
+N =
+
+EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION)
+
+LIB_TARGET = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION)
+LIB_INSTALL = libtraceevent.a libtraceevent.so*
+LIB_INSTALL := $(addprefix $(OUTPUT),$(LIB_INSTALL))
+
+INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES)
+
+# Set compile option CFLAGS
+ifdef EXTRA_CFLAGS
+ CFLAGS := $(EXTRA_CFLAGS)
+else
+ CFLAGS := -g -Wall
+endif
+
+# Append required CFLAGS
+override CFLAGS += -fPIC
+override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
+override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
+
+ifeq ($(VERBOSE),1)
+ Q =
+else
+ Q = @
+endif
+
+# Disable command line variables (CFLAGS) override from top
+# level Makefile (perf), otherwise build Makefile will get
+# the same command line setup.
+MAKEOVERRIDES=
+
+export srctree OUTPUT CC LD CFLAGS V
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+PLUGINS = plugin_jbd2.so
+PLUGINS += plugin_hrtimer.so
+PLUGINS += plugin_kmem.so
+PLUGINS += plugin_kvm.so
+PLUGINS += plugin_mac80211.so
+PLUGINS += plugin_sched_switch.so
+PLUGINS += plugin_function.so
+PLUGINS += plugin_xen.so
+PLUGINS += plugin_scsi.so
+PLUGINS += plugin_cfg80211.so
+
+PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS))
+PLUGINS_IN := $(PLUGINS:.so=-in.o)
+
+TE_IN := $(OUTPUT)libtraceevent-in.o
+LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
+DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list
+
+CMD_TARGETS = $(LIB_TARGET) $(PLUGINS) $(DYNAMIC_LIST_FILE)
+
+TARGETS = $(CMD_TARGETS)
+
+all: all_cmd
+
+all_cmd: $(CMD_TARGETS)
+
+$(TE_IN): force
+ $(Q)$(MAKE) $(build)=libtraceevent
+
+$(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN)
+ $(QUIET_LINK)$(CC) --shared $^ -Wl,-soname,libtraceevent.so.$(EP_VERSION) -o $@
+ @ln -sf $(@F) $(OUTPUT)libtraceevent.so
+ @ln -sf $(@F) $(OUTPUT)libtraceevent.so.$(EP_VERSION)
+
+$(OUTPUT)libtraceevent.a: $(TE_IN)
+ $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
+
+$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS)
+ $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@)
+
+plugins: $(PLUGINS)
+
+__plugin_obj = $(notdir $@)
+ plugin_obj = $(__plugin_obj:-in.o=)
+
+$(PLUGINS_IN): force
+ $(Q)$(MAKE) $(build)=$(plugin_obj)
+
+$(OUTPUT)%.so: $(OUTPUT)%-in.o
+ $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $^
+
+define make_version.h
+ (echo '/* This file is automatically generated. Do not modify. */'; \
+ echo \#define VERSION_CODE $(shell \
+ expr $(VERSION) \* 256 + $(PATCHLEVEL)); \
+ echo '#define EXTRAVERSION ' $(EXTRAVERSION); \
+ echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \
+ echo '#define FILE_VERSION '$(FILE_VERSION); \
+ ) > $1
+endef
+
+define update_version.h
+ ($(call make_version.h, $@.tmp); \
+ if [ -r $@ ] && cmp -s $@ $@.tmp; then \
+ rm -f $@.tmp; \
+ else \
+ echo ' UPDATE $@'; \
+ mv -f $@.tmp $@; \
+ fi);
+endef
+
+ep_version.h: force
+ $(Q)$(N)$(call update_version.h)
+
+VERSION_FILES = ep_version.h
+
+define update_dir
+ (echo $1 > $@.tmp; \
+ if [ -r $@ ] && cmp -s $@ $@.tmp; then \
+ rm -f $@.tmp; \
+ else \
+ echo ' UPDATE $@'; \
+ mv -f $@.tmp $@; \
+ fi);
+endef
+
+tags: force
+ $(RM) tags
+ find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
+ --regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
+
+TAGS: force
+ $(RM) TAGS
+ find . -name '*.[ch]' | xargs etags \
+ --regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
+
+define do_install_mkdir
+ if [ ! -d '$(DESTDIR_SQ)$1' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+ fi
+endef
+
+define do_install
+ $(call do_install_mkdir,$2); \
+ $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
+endef
+
+define do_install_plugins
+ for plugin in $1; do \
+ $(call do_install,$$plugin,$(plugin_dir_SQ)); \
+ done
+endef
+
+define do_generate_dynamic_list_file
+ symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \
+ xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\
+ if [ "$$symbol_type" = "U W" ];then \
+ (echo '{'; \
+ $(NM) -u -D $1 | awk 'NF>1 {sub("@.*", "", $$2); print "\t"$$2";"}' | sort -u;\
+ echo '};'; \
+ ) > $2; \
+ else \
+ (echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\
+ fi
+endef
+
+install_lib: all_cmd install_plugins
+ $(call QUIET_INSTALL, $(LIB_TARGET)) \
+ $(call do_install_mkdir,$(libdir_SQ)); \
+ cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ)
+
+install_plugins: $(PLUGINS)
+ $(call QUIET_INSTALL, trace_plugins) \
+ $(call do_install_plugins, $(PLUGINS))
+
+install_headers:
+ $(call QUIET_INSTALL, headers) \
+ $(call do_install,event-parse.h,$(prefix)/include/traceevent,644); \
+ $(call do_install,event-utils.h,$(prefix)/include/traceevent,644); \
+ $(call do_install,kbuffer.h,$(prefix)/include/traceevent,644)
+
+install: install_lib
+
+clean:
+ $(call QUIET_CLEAN, libtraceevent) \
+ $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \
+ $(RM) TRACEEVENT-CFLAGS tags TAGS
+
+PHONY += force plugins
+force:
+
+# Declare the contents of the .PHONY variable as phony. We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
new file mode 100644
index 000000000..c0fcc8af2
--- /dev/null
+++ b/tools/lib/traceevent/event-parse.c
@@ -0,0 +1,6892 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ *
+ * The parts for function graph printing was taken and modified from the
+ * Linux Kernel that were written by
+ * - Copyright (C) 2009 Frederic Weisbecker,
+ * Frederic Weisbecker gave his permission to relicense the code to
+ * the Lesser General Public License.
+ */
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdint.h>
+#include <limits.h>
+#include <linux/string.h>
+#include <linux/time64.h>
+
+#include <netinet/in.h>
+#include "event-parse.h"
+#include "event-utils.h"
+
+static const char *input_buf;
+static unsigned long long input_buf_ptr;
+static unsigned long long input_buf_siz;
+
+static int is_flag_field;
+static int is_symbolic_field;
+
+static int show_warning = 1;
+
+#define do_warning(fmt, ...) \
+ do { \
+ if (show_warning) \
+ warning(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+#define do_warning_event(event, fmt, ...) \
+ do { \
+ if (!show_warning) \
+ continue; \
+ \
+ if (event) \
+ warning("[%s:%s] " fmt, event->system, \
+ event->name, ##__VA_ARGS__); \
+ else \
+ warning(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+static void init_input_buf(const char *buf, unsigned long long size)
+{
+ input_buf = buf;
+ input_buf_siz = size;
+ input_buf_ptr = 0;
+}
+
+const char *tep_get_input_buf(void)
+{
+ return input_buf;
+}
+
+unsigned long long tep_get_input_buf_ptr(void)
+{
+ return input_buf_ptr;
+}
+
+struct event_handler {
+ struct event_handler *next;
+ int id;
+ const char *sys_name;
+ const char *event_name;
+ tep_event_handler_func func;
+ void *context;
+};
+
+struct func_params {
+ struct func_params *next;
+ enum tep_func_arg_type type;
+};
+
+struct tep_function_handler {
+ struct tep_function_handler *next;
+ enum tep_func_arg_type ret_type;
+ char *name;
+ tep_func_handler func;
+ struct func_params *params;
+ int nr_args;
+};
+
+static unsigned long long
+process_defined_func(struct trace_seq *s, void *data, int size,
+ struct event_format *event, struct print_arg *arg);
+
+static void free_func_handle(struct tep_function_handler *func);
+
+/**
+ * tep_buffer_init - init buffer for parsing
+ * @buf: buffer to parse
+ * @size: the size of the buffer
+ *
+ * For use with tep_read_token(), this initializes the internal
+ * buffer that tep_read_token() will parse.
+ */
+void tep_buffer_init(const char *buf, unsigned long long size)
+{
+ init_input_buf(buf, size);
+}
+
+void breakpoint(void)
+{
+ static int x;
+ x++;
+}
+
+struct print_arg *alloc_arg(void)
+{
+ return calloc(1, sizeof(struct print_arg));
+}
+
+struct cmdline {
+ char *comm;
+ int pid;
+};
+
+static int cmdline_cmp(const void *a, const void *b)
+{
+ const struct cmdline *ca = a;
+ const struct cmdline *cb = b;
+
+ if (ca->pid < cb->pid)
+ return -1;
+ if (ca->pid > cb->pid)
+ return 1;
+
+ return 0;
+}
+
+struct cmdline_list {
+ struct cmdline_list *next;
+ char *comm;
+ int pid;
+};
+
+static int cmdline_init(struct tep_handle *pevent)
+{
+ struct cmdline_list *cmdlist = pevent->cmdlist;
+ struct cmdline_list *item;
+ struct cmdline *cmdlines;
+ int i;
+
+ cmdlines = malloc(sizeof(*cmdlines) * pevent->cmdline_count);
+ if (!cmdlines)
+ return -1;
+
+ i = 0;
+ while (cmdlist) {
+ cmdlines[i].pid = cmdlist->pid;
+ cmdlines[i].comm = cmdlist->comm;
+ i++;
+ item = cmdlist;
+ cmdlist = cmdlist->next;
+ free(item);
+ }
+
+ qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp);
+
+ pevent->cmdlines = cmdlines;
+ pevent->cmdlist = NULL;
+
+ return 0;
+}
+
+static const char *find_cmdline(struct tep_handle *pevent, int pid)
+{
+ const struct cmdline *comm;
+ struct cmdline key;
+
+ if (!pid)
+ return "<idle>";
+
+ if (!pevent->cmdlines && cmdline_init(pevent))
+ return "<not enough memory for cmdlines!>";
+
+ key.pid = pid;
+
+ comm = bsearch(&key, pevent->cmdlines, pevent->cmdline_count,
+ sizeof(*pevent->cmdlines), cmdline_cmp);
+
+ if (comm)
+ return comm->comm;
+ return "<...>";
+}
+
+/**
+ * tep_pid_is_registered - return if a pid has a cmdline registered
+ * @pevent: handle for the pevent
+ * @pid: The pid to check if it has a cmdline registered with.
+ *
+ * Returns 1 if the pid has a cmdline mapped to it
+ * 0 otherwise.
+ */
+int tep_pid_is_registered(struct tep_handle *pevent, int pid)
+{
+ const struct cmdline *comm;
+ struct cmdline key;
+
+ if (!pid)
+ return 1;
+
+ if (!pevent->cmdlines && cmdline_init(pevent))
+ return 0;
+
+ key.pid = pid;
+
+ comm = bsearch(&key, pevent->cmdlines, pevent->cmdline_count,
+ sizeof(*pevent->cmdlines), cmdline_cmp);
+
+ if (comm)
+ return 1;
+ return 0;
+}
+
+/*
+ * If the command lines have been converted to an array, then
+ * we must add this pid. This is much slower than when cmdlines
+ * are added before the array is initialized.
+ */
+static int add_new_comm(struct tep_handle *pevent, const char *comm, int pid)
+{
+ struct cmdline *cmdlines = pevent->cmdlines;
+ const struct cmdline *cmdline;
+ struct cmdline key;
+
+ if (!pid)
+ return 0;
+
+ /* avoid duplicates */
+ key.pid = pid;
+
+ cmdline = bsearch(&key, pevent->cmdlines, pevent->cmdline_count,
+ sizeof(*pevent->cmdlines), cmdline_cmp);
+ if (cmdline) {
+ errno = EEXIST;
+ return -1;
+ }
+
+ cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (pevent->cmdline_count + 1));
+ if (!cmdlines) {
+ errno = ENOMEM;
+ return -1;
+ }
+ pevent->cmdlines = cmdlines;
+
+ cmdlines[pevent->cmdline_count].comm = strdup(comm);
+ if (!cmdlines[pevent->cmdline_count].comm) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ cmdlines[pevent->cmdline_count].pid = pid;
+
+ if (cmdlines[pevent->cmdline_count].comm)
+ pevent->cmdline_count++;
+
+ qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp);
+
+ return 0;
+}
+
+/**
+ * tep_register_comm - register a pid / comm mapping
+ * @pevent: handle for the pevent
+ * @comm: the command line to register
+ * @pid: the pid to map the command line to
+ *
+ * This adds a mapping to search for command line names with
+ * a given pid. The comm is duplicated.
+ */
+int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid)
+{
+ struct cmdline_list *item;
+
+ if (pevent->cmdlines)
+ return add_new_comm(pevent, comm, pid);
+
+ item = malloc(sizeof(*item));
+ if (!item)
+ return -1;
+
+ if (comm)
+ item->comm = strdup(comm);
+ else
+ item->comm = strdup("<...>");
+ if (!item->comm) {
+ free(item);
+ return -1;
+ }
+ item->pid = pid;
+ item->next = pevent->cmdlist;
+
+ pevent->cmdlist = item;
+ pevent->cmdline_count++;
+
+ return 0;
+}
+
+int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock)
+{
+ pevent->trace_clock = strdup(trace_clock);
+ if (!pevent->trace_clock) {
+ errno = ENOMEM;
+ return -1;
+ }
+ return 0;
+}
+
+struct func_map {
+ unsigned long long addr;
+ char *func;
+ char *mod;
+};
+
+struct func_list {
+ struct func_list *next;
+ unsigned long long addr;
+ char *func;
+ char *mod;
+};
+
+static int func_cmp(const void *a, const void *b)
+{
+ const struct func_map *fa = a;
+ const struct func_map *fb = b;
+
+ if (fa->addr < fb->addr)
+ return -1;
+ if (fa->addr > fb->addr)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * We are searching for a record in between, not an exact
+ * match.
+ */
+static int func_bcmp(const void *a, const void *b)
+{
+ const struct func_map *fa = a;
+ const struct func_map *fb = b;
+
+ if ((fa->addr == fb->addr) ||
+
+ (fa->addr > fb->addr &&
+ fa->addr < (fb+1)->addr))
+ return 0;
+
+ if (fa->addr < fb->addr)
+ return -1;
+
+ return 1;
+}
+
+static int func_map_init(struct tep_handle *pevent)
+{
+ struct func_list *funclist;
+ struct func_list *item;
+ struct func_map *func_map;
+ int i;
+
+ func_map = malloc(sizeof(*func_map) * (pevent->func_count + 1));
+ if (!func_map)
+ return -1;
+
+ funclist = pevent->funclist;
+
+ i = 0;
+ while (funclist) {
+ func_map[i].func = funclist->func;
+ func_map[i].addr = funclist->addr;
+ func_map[i].mod = funclist->mod;
+ i++;
+ item = funclist;
+ funclist = funclist->next;
+ free(item);
+ }
+
+ qsort(func_map, pevent->func_count, sizeof(*func_map), func_cmp);
+
+ /*
+ * Add a special record at the end.
+ */
+ func_map[pevent->func_count].func = NULL;
+ func_map[pevent->func_count].addr = 0;
+ func_map[pevent->func_count].mod = NULL;
+
+ pevent->func_map = func_map;
+ pevent->funclist = NULL;
+
+ return 0;
+}
+
+static struct func_map *
+__find_func(struct tep_handle *pevent, unsigned long long addr)
+{
+ struct func_map *func;
+ struct func_map key;
+
+ if (!pevent->func_map)
+ func_map_init(pevent);
+
+ key.addr = addr;
+
+ func = bsearch(&key, pevent->func_map, pevent->func_count,
+ sizeof(*pevent->func_map), func_bcmp);
+
+ return func;
+}
+
+struct func_resolver {
+ tep_func_resolver_t *func;
+ void *priv;
+ struct func_map map;
+};
+
+/**
+ * tep_set_function_resolver - set an alternative function resolver
+ * @pevent: handle for the pevent
+ * @resolver: function to be used
+ * @priv: resolver function private state.
+ *
+ * Some tools may have already a way to resolve kernel functions, allow them to
+ * keep using it instead of duplicating all the entries inside
+ * pevent->funclist.
+ */
+int tep_set_function_resolver(struct tep_handle *pevent,
+ tep_func_resolver_t *func, void *priv)
+{
+ struct func_resolver *resolver = malloc(sizeof(*resolver));
+
+ if (resolver == NULL)
+ return -1;
+
+ resolver->func = func;
+ resolver->priv = priv;
+
+ free(pevent->func_resolver);
+ pevent->func_resolver = resolver;
+
+ return 0;
+}
+
+/**
+ * tep_reset_function_resolver - reset alternative function resolver
+ * @pevent: handle for the pevent
+ *
+ * Stop using whatever alternative resolver was set, use the default
+ * one instead.
+ */
+void tep_reset_function_resolver(struct tep_handle *pevent)
+{
+ free(pevent->func_resolver);
+ pevent->func_resolver = NULL;
+}
+
+static struct func_map *
+find_func(struct tep_handle *pevent, unsigned long long addr)
+{
+ struct func_map *map;
+
+ if (!pevent->func_resolver)
+ return __find_func(pevent, addr);
+
+ map = &pevent->func_resolver->map;
+ map->mod = NULL;
+ map->addr = addr;
+ map->func = pevent->func_resolver->func(pevent->func_resolver->priv,
+ &map->addr, &map->mod);
+ if (map->func == NULL)
+ return NULL;
+
+ return map;
+}
+
+/**
+ * tep_find_function - find a function by a given address
+ * @pevent: handle for the pevent
+ * @addr: the address to find the function with
+ *
+ * Returns a pointer to the function stored that has the given
+ * address. Note, the address does not have to be exact, it
+ * will select the function that would contain the address.
+ */
+const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr)
+{
+ struct func_map *map;
+
+ map = find_func(pevent, addr);
+ if (!map)
+ return NULL;
+
+ return map->func;
+}
+
+/**
+ * tep_find_function_address - find a function address by a given address
+ * @pevent: handle for the pevent
+ * @addr: the address to find the function with
+ *
+ * Returns the address the function starts at. This can be used in
+ * conjunction with tep_find_function to print both the function
+ * name and the function offset.
+ */
+unsigned long long
+tep_find_function_address(struct tep_handle *pevent, unsigned long long addr)
+{
+ struct func_map *map;
+
+ map = find_func(pevent, addr);
+ if (!map)
+ return 0;
+
+ return map->addr;
+}
+
+/**
+ * tep_register_function - register a function with a given address
+ * @pevent: handle for the pevent
+ * @function: the function name to register
+ * @addr: the address the function starts at
+ * @mod: the kernel module the function may be in (NULL for none)
+ *
+ * This registers a function name with an address and module.
+ * The @func passed in is duplicated.
+ */
+int tep_register_function(struct tep_handle *pevent, char *func,
+ unsigned long long addr, char *mod)
+{
+ struct func_list *item = malloc(sizeof(*item));
+
+ if (!item)
+ return -1;
+
+ item->next = pevent->funclist;
+ item->func = strdup(func);
+ if (!item->func)
+ goto out_free;
+
+ if (mod) {
+ item->mod = strdup(mod);
+ if (!item->mod)
+ goto out_free_func;
+ } else
+ item->mod = NULL;
+ item->addr = addr;
+
+ pevent->funclist = item;
+ pevent->func_count++;
+
+ return 0;
+
+out_free_func:
+ free(item->func);
+ item->func = NULL;
+out_free:
+ free(item);
+ errno = ENOMEM;
+ return -1;
+}
+
+/**
+ * tep_print_funcs - print out the stored functions
+ * @pevent: handle for the pevent
+ *
+ * This prints out the stored functions.
+ */
+void tep_print_funcs(struct tep_handle *pevent)
+{
+ int i;
+
+ if (!pevent->func_map)
+ func_map_init(pevent);
+
+ for (i = 0; i < (int)pevent->func_count; i++) {
+ printf("%016llx %s",
+ pevent->func_map[i].addr,
+ pevent->func_map[i].func);
+ if (pevent->func_map[i].mod)
+ printf(" [%s]\n", pevent->func_map[i].mod);
+ else
+ printf("\n");
+ }
+}
+
+struct printk_map {
+ unsigned long long addr;
+ char *printk;
+};
+
+struct printk_list {
+ struct printk_list *next;
+ unsigned long long addr;
+ char *printk;
+};
+
+static int printk_cmp(const void *a, const void *b)
+{
+ const struct printk_map *pa = a;
+ const struct printk_map *pb = b;
+
+ if (pa->addr < pb->addr)
+ return -1;
+ if (pa->addr > pb->addr)
+ return 1;
+
+ return 0;
+}
+
+static int printk_map_init(struct tep_handle *pevent)
+{
+ struct printk_list *printklist;
+ struct printk_list *item;
+ struct printk_map *printk_map;
+ int i;
+
+ printk_map = malloc(sizeof(*printk_map) * (pevent->printk_count + 1));
+ if (!printk_map)
+ return -1;
+
+ printklist = pevent->printklist;
+
+ i = 0;
+ while (printklist) {
+ printk_map[i].printk = printklist->printk;
+ printk_map[i].addr = printklist->addr;
+ i++;
+ item = printklist;
+ printklist = printklist->next;
+ free(item);
+ }
+
+ qsort(printk_map, pevent->printk_count, sizeof(*printk_map), printk_cmp);
+
+ pevent->printk_map = printk_map;
+ pevent->printklist = NULL;
+
+ return 0;
+}
+
+static struct printk_map *
+find_printk(struct tep_handle *pevent, unsigned long long addr)
+{
+ struct printk_map *printk;
+ struct printk_map key;
+
+ if (!pevent->printk_map && printk_map_init(pevent))
+ return NULL;
+
+ key.addr = addr;
+
+ printk = bsearch(&key, pevent->printk_map, pevent->printk_count,
+ sizeof(*pevent->printk_map), printk_cmp);
+
+ return printk;
+}
+
+/**
+ * tep_register_print_string - register a string by its address
+ * @pevent: handle for the pevent
+ * @fmt: the string format to register
+ * @addr: the address the string was located at
+ *
+ * This registers a string by the address it was stored in the kernel.
+ * The @fmt passed in is duplicated.
+ */
+int tep_register_print_string(struct tep_handle *pevent, const char *fmt,
+ unsigned long long addr)
+{
+ struct printk_list *item = malloc(sizeof(*item));
+ char *p;
+
+ if (!item)
+ return -1;
+
+ item->next = pevent->printklist;
+ item->addr = addr;
+
+ /* Strip off quotes and '\n' from the end */
+ if (fmt[0] == '"')
+ fmt++;
+ item->printk = strdup(fmt);
+ if (!item->printk)
+ goto out_free;
+
+ p = item->printk + strlen(item->printk) - 1;
+ if (*p == '"')
+ *p = 0;
+
+ p -= 2;
+ if (strcmp(p, "\\n") == 0)
+ *p = 0;
+
+ pevent->printklist = item;
+ pevent->printk_count++;
+
+ return 0;
+
+out_free:
+ free(item);
+ errno = ENOMEM;
+ return -1;
+}
+
+/**
+ * tep_print_printk - print out the stored strings
+ * @pevent: handle for the pevent
+ *
+ * This prints the string formats that were stored.
+ */
+void tep_print_printk(struct tep_handle *pevent)
+{
+ int i;
+
+ if (!pevent->printk_map)
+ printk_map_init(pevent);
+
+ for (i = 0; i < (int)pevent->printk_count; i++) {
+ printf("%016llx %s\n",
+ pevent->printk_map[i].addr,
+ pevent->printk_map[i].printk);
+ }
+}
+
+static struct event_format *alloc_event(void)
+{
+ return calloc(1, sizeof(struct event_format));
+}
+
+static int add_event(struct tep_handle *pevent, struct event_format *event)
+{
+ int i;
+ struct event_format **events = realloc(pevent->events, sizeof(event) *
+ (pevent->nr_events + 1));
+ if (!events)
+ return -1;
+
+ pevent->events = events;
+
+ for (i = 0; i < pevent->nr_events; i++) {
+ if (pevent->events[i]->id > event->id)
+ break;
+ }
+ if (i < pevent->nr_events)
+ memmove(&pevent->events[i + 1],
+ &pevent->events[i],
+ sizeof(event) * (pevent->nr_events - i));
+
+ pevent->events[i] = event;
+ pevent->nr_events++;
+
+ event->pevent = pevent;
+
+ return 0;
+}
+
+static int event_item_type(enum event_type type)
+{
+ switch (type) {
+ case EVENT_ITEM ... EVENT_SQUOTE:
+ return 1;
+ case EVENT_ERROR ... EVENT_DELIM:
+ default:
+ return 0;
+ }
+}
+
+static void free_flag_sym(struct print_flag_sym *fsym)
+{
+ struct print_flag_sym *next;
+
+ while (fsym) {
+ next = fsym->next;
+ free(fsym->value);
+ free(fsym->str);
+ free(fsym);
+ fsym = next;
+ }
+}
+
+static void free_arg(struct print_arg *arg)
+{
+ struct print_arg *farg;
+
+ if (!arg)
+ return;
+
+ switch (arg->type) {
+ case PRINT_ATOM:
+ free(arg->atom.atom);
+ break;
+ case PRINT_FIELD:
+ free(arg->field.name);
+ break;
+ case PRINT_FLAGS:
+ free_arg(arg->flags.field);
+ free(arg->flags.delim);
+ free_flag_sym(arg->flags.flags);
+ break;
+ case PRINT_SYMBOL:
+ free_arg(arg->symbol.field);
+ free_flag_sym(arg->symbol.symbols);
+ break;
+ case PRINT_HEX:
+ case PRINT_HEX_STR:
+ free_arg(arg->hex.field);
+ free_arg(arg->hex.size);
+ break;
+ case PRINT_INT_ARRAY:
+ free_arg(arg->int_array.field);
+ free_arg(arg->int_array.count);
+ free_arg(arg->int_array.el_size);
+ break;
+ case PRINT_TYPE:
+ free(arg->typecast.type);
+ free_arg(arg->typecast.item);
+ break;
+ case PRINT_STRING:
+ case PRINT_BSTRING:
+ free(arg->string.string);
+ break;
+ case PRINT_BITMASK:
+ free(arg->bitmask.bitmask);
+ break;
+ case PRINT_DYNAMIC_ARRAY:
+ case PRINT_DYNAMIC_ARRAY_LEN:
+ free(arg->dynarray.index);
+ break;
+ case PRINT_OP:
+ free(arg->op.op);
+ free_arg(arg->op.left);
+ free_arg(arg->op.right);
+ break;
+ case PRINT_FUNC:
+ while (arg->func.args) {
+ farg = arg->func.args;
+ arg->func.args = farg->next;
+ free_arg(farg);
+ }
+ break;
+
+ case PRINT_NULL:
+ default:
+ break;
+ }
+
+ free(arg);
+}
+
+static enum event_type get_type(int ch)
+{
+ if (ch == '\n')
+ return EVENT_NEWLINE;
+ if (isspace(ch))
+ return EVENT_SPACE;
+ if (isalnum(ch) || ch == '_')
+ return EVENT_ITEM;
+ if (ch == '\'')
+ return EVENT_SQUOTE;
+ if (ch == '"')
+ return EVENT_DQUOTE;
+ if (!isprint(ch))
+ return EVENT_NONE;
+ if (ch == '(' || ch == ')' || ch == ',')
+ return EVENT_DELIM;
+
+ return EVENT_OP;
+}
+
+static int __read_char(void)
+{
+ if (input_buf_ptr >= input_buf_siz)
+ return -1;
+
+ return input_buf[input_buf_ptr++];
+}
+
+static int __peek_char(void)
+{
+ if (input_buf_ptr >= input_buf_siz)
+ return -1;
+
+ return input_buf[input_buf_ptr];
+}
+
+/**
+ * tep_peek_char - peek at the next character that will be read
+ *
+ * Returns the next character read, or -1 if end of buffer.
+ */
+int tep_peek_char(void)
+{
+ return __peek_char();
+}
+
+static int extend_token(char **tok, char *buf, int size)
+{
+ char *newtok = realloc(*tok, size);
+
+ if (!newtok) {
+ free(*tok);
+ *tok = NULL;
+ return -1;
+ }
+
+ if (!*tok)
+ strcpy(newtok, buf);
+ else
+ strcat(newtok, buf);
+ *tok = newtok;
+
+ return 0;
+}
+
+static enum event_type force_token(const char *str, char **tok);
+
+static enum event_type __read_token(char **tok)
+{
+ char buf[BUFSIZ];
+ int ch, last_ch, quote_ch, next_ch;
+ int i = 0;
+ int tok_size = 0;
+ enum event_type type;
+
+ *tok = NULL;
+
+
+ ch = __read_char();
+ if (ch < 0)
+ return EVENT_NONE;
+
+ type = get_type(ch);
+ if (type == EVENT_NONE)
+ return type;
+
+ buf[i++] = ch;
+
+ switch (type) {
+ case EVENT_NEWLINE:
+ case EVENT_DELIM:
+ if (asprintf(tok, "%c", ch) < 0)
+ return EVENT_ERROR;
+
+ return type;
+
+ case EVENT_OP:
+ switch (ch) {
+ case '-':
+ next_ch = __peek_char();
+ if (next_ch == '>') {
+ buf[i++] = __read_char();
+ break;
+ }
+ /* fall through */
+ case '+':
+ case '|':
+ case '&':
+ case '>':
+ case '<':
+ last_ch = ch;
+ ch = __peek_char();
+ if (ch != last_ch)
+ goto test_equal;
+ buf[i++] = __read_char();
+ switch (last_ch) {
+ case '>':
+ case '<':
+ goto test_equal;
+ default:
+ break;
+ }
+ break;
+ case '!':
+ case '=':
+ goto test_equal;
+ default: /* what should we do instead? */
+ break;
+ }
+ buf[i] = 0;
+ *tok = strdup(buf);
+ return type;
+
+ test_equal:
+ ch = __peek_char();
+ if (ch == '=')
+ buf[i++] = __read_char();
+ goto out;
+
+ case EVENT_DQUOTE:
+ case EVENT_SQUOTE:
+ /* don't keep quotes */
+ i--;
+ quote_ch = ch;
+ last_ch = 0;
+ concat:
+ do {
+ if (i == (BUFSIZ - 1)) {
+ buf[i] = 0;
+ tok_size += BUFSIZ;
+
+ if (extend_token(tok, buf, tok_size) < 0)
+ return EVENT_NONE;
+ i = 0;
+ }
+ last_ch = ch;
+ ch = __read_char();
+ buf[i++] = ch;
+ /* the '\' '\' will cancel itself */
+ if (ch == '\\' && last_ch == '\\')
+ last_ch = 0;
+ } while (ch != quote_ch || last_ch == '\\');
+ /* remove the last quote */
+ i--;
+
+ /*
+ * For strings (double quotes) check the next token.
+ * If it is another string, concatinate the two.
+ */
+ if (type == EVENT_DQUOTE) {
+ unsigned long long save_input_buf_ptr = input_buf_ptr;
+
+ do {
+ ch = __read_char();
+ } while (isspace(ch));
+ if (ch == '"')
+ goto concat;
+ input_buf_ptr = save_input_buf_ptr;
+ }
+
+ goto out;
+
+ case EVENT_ERROR ... EVENT_SPACE:
+ case EVENT_ITEM:
+ default:
+ break;
+ }
+
+ while (get_type(__peek_char()) == type) {
+ if (i == (BUFSIZ - 1)) {
+ buf[i] = 0;
+ tok_size += BUFSIZ;
+
+ if (extend_token(tok, buf, tok_size) < 0)
+ return EVENT_NONE;
+ i = 0;
+ }
+ ch = __read_char();
+ buf[i++] = ch;
+ }
+
+ out:
+ buf[i] = 0;
+ if (extend_token(tok, buf, tok_size + i + 1) < 0)
+ return EVENT_NONE;
+
+ if (type == EVENT_ITEM) {
+ /*
+ * Older versions of the kernel has a bug that
+ * creates invalid symbols and will break the mac80211
+ * parsing. This is a work around to that bug.
+ *
+ * See Linux kernel commit:
+ * 811cb50baf63461ce0bdb234927046131fc7fa8b
+ */
+ if (strcmp(*tok, "LOCAL_PR_FMT") == 0) {
+ free(*tok);
+ *tok = NULL;
+ return force_token("\"%s\" ", tok);
+ } else if (strcmp(*tok, "STA_PR_FMT") == 0) {
+ free(*tok);
+ *tok = NULL;
+ return force_token("\" sta:%pM\" ", tok);
+ } else if (strcmp(*tok, "VIF_PR_FMT") == 0) {
+ free(*tok);
+ *tok = NULL;
+ return force_token("\" vif:%p(%d)\" ", tok);
+ }
+ }
+
+ return type;
+}
+
+static enum event_type force_token(const char *str, char **tok)
+{
+ const char *save_input_buf;
+ unsigned long long save_input_buf_ptr;
+ unsigned long long save_input_buf_siz;
+ enum event_type type;
+
+ /* save off the current input pointers */
+ save_input_buf = input_buf;
+ save_input_buf_ptr = input_buf_ptr;
+ save_input_buf_siz = input_buf_siz;
+
+ init_input_buf(str, strlen(str));
+
+ type = __read_token(tok);
+
+ /* reset back to original token */
+ input_buf = save_input_buf;
+ input_buf_ptr = save_input_buf_ptr;
+ input_buf_siz = save_input_buf_siz;
+
+ return type;
+}
+
+static void free_token(char *tok)
+{
+ if (tok)
+ free(tok);
+}
+
+static enum event_type read_token(char **tok)
+{
+ enum event_type type;
+
+ for (;;) {
+ type = __read_token(tok);
+ if (type != EVENT_SPACE)
+ return type;
+
+ free_token(*tok);
+ }
+
+ /* not reached */
+ *tok = NULL;
+ return EVENT_NONE;
+}
+
+/**
+ * tep_read_token - access to utilites to use the pevent parser
+ * @tok: The token to return
+ *
+ * This will parse tokens from the string given by
+ * tep_init_data().
+ *
+ * Returns the token type.
+ */
+enum event_type tep_read_token(char **tok)
+{
+ return read_token(tok);
+}
+
+/**
+ * tep_free_token - free a token returned by tep_read_token
+ * @token: the token to free
+ */
+void tep_free_token(char *token)
+{
+ free_token(token);
+}
+
+/* no newline */
+static enum event_type read_token_item(char **tok)
+{
+ enum event_type type;
+
+ for (;;) {
+ type = __read_token(tok);
+ if (type != EVENT_SPACE && type != EVENT_NEWLINE)
+ return type;
+ free_token(*tok);
+ *tok = NULL;
+ }
+
+ /* not reached */
+ *tok = NULL;
+ return EVENT_NONE;
+}
+
+static int test_type(enum event_type type, enum event_type expect)
+{
+ if (type != expect) {
+ do_warning("Error: expected type %d but read %d",
+ expect, type);
+ return -1;
+ }
+ return 0;
+}
+
+static int test_type_token(enum event_type type, const char *token,
+ enum event_type expect, const char *expect_tok)
+{
+ if (type != expect) {
+ do_warning("Error: expected type %d but read %d",
+ expect, type);
+ return -1;
+ }
+
+ if (strcmp(token, expect_tok) != 0) {
+ do_warning("Error: expected '%s' but read '%s'",
+ expect_tok, token);
+ return -1;
+ }
+ return 0;
+}
+
+static int __read_expect_type(enum event_type expect, char **tok, int newline_ok)
+{
+ enum event_type type;
+
+ if (newline_ok)
+ type = read_token(tok);
+ else
+ type = read_token_item(tok);
+ return test_type(type, expect);
+}
+
+static int read_expect_type(enum event_type expect, char **tok)
+{
+ return __read_expect_type(expect, tok, 1);
+}
+
+static int __read_expected(enum event_type expect, const char *str,
+ int newline_ok)
+{
+ enum event_type type;
+ char *token;
+ int ret;
+
+ if (newline_ok)
+ type = read_token(&token);
+ else
+ type = read_token_item(&token);
+
+ ret = test_type_token(type, token, expect, str);
+
+ free_token(token);
+
+ return ret;
+}
+
+static int read_expected(enum event_type expect, const char *str)
+{
+ return __read_expected(expect, str, 1);
+}
+
+static int read_expected_item(enum event_type expect, const char *str)
+{
+ return __read_expected(expect, str, 0);
+}
+
+static char *event_read_name(void)
+{
+ char *token;
+
+ if (read_expected(EVENT_ITEM, "name") < 0)
+ return NULL;
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ return NULL;
+
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+
+ return token;
+
+ fail:
+ free_token(token);
+ return NULL;
+}
+
+static int event_read_id(void)
+{
+ char *token;
+ int id;
+
+ if (read_expected_item(EVENT_ITEM, "ID") < 0)
+ return -1;
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ return -1;
+
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+
+ id = strtoul(token, NULL, 0);
+ free_token(token);
+ return id;
+
+ fail:
+ free_token(token);
+ return -1;
+}
+
+static int field_is_string(struct format_field *field)
+{
+ if ((field->flags & FIELD_IS_ARRAY) &&
+ (strstr(field->type, "char") || strstr(field->type, "u8") ||
+ strstr(field->type, "s8")))
+ return 1;
+
+ return 0;
+}
+
+static int field_is_dynamic(struct format_field *field)
+{
+ if (strncmp(field->type, "__data_loc", 10) == 0)
+ return 1;
+
+ return 0;
+}
+
+static int field_is_long(struct format_field *field)
+{
+ /* includes long long */
+ if (strstr(field->type, "long"))
+ return 1;
+
+ return 0;
+}
+
+static unsigned int type_size(const char *name)
+{
+ /* This covers all FIELD_IS_STRING types. */
+ static struct {
+ const char *type;
+ unsigned int size;
+ } table[] = {
+ { "u8", 1 },
+ { "u16", 2 },
+ { "u32", 4 },
+ { "u64", 8 },
+ { "s8", 1 },
+ { "s16", 2 },
+ { "s32", 4 },
+ { "s64", 8 },
+ { "char", 1 },
+ { },
+ };
+ int i;
+
+ for (i = 0; table[i].type; i++) {
+ if (!strcmp(table[i].type, name))
+ return table[i].size;
+ }
+
+ return 0;
+}
+
+static int event_read_fields(struct event_format *event, struct format_field **fields)
+{
+ struct format_field *field = NULL;
+ enum event_type type;
+ char *token;
+ char *last_token;
+ int count = 0;
+
+ do {
+ unsigned int size_dynamic = 0;
+
+ type = read_token(&token);
+ if (type == EVENT_NEWLINE) {
+ free_token(token);
+ return count;
+ }
+
+ count++;
+
+ if (test_type_token(type, token, EVENT_ITEM, "field"))
+ goto fail;
+ free_token(token);
+
+ type = read_token(&token);
+ /*
+ * The ftrace fields may still use the "special" name.
+ * Just ignore it.
+ */
+ if (event->flags & EVENT_FL_ISFTRACE &&
+ type == EVENT_ITEM && strcmp(token, "special") == 0) {
+ free_token(token);
+ type = read_token(&token);
+ }
+
+ if (test_type_token(type, token, EVENT_OP, ":") < 0)
+ goto fail;
+
+ free_token(token);
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+
+ last_token = token;
+
+ field = calloc(1, sizeof(*field));
+ if (!field)
+ goto fail;
+
+ field->event = event;
+
+ /* read the rest of the type */
+ for (;;) {
+ type = read_token(&token);
+ if (type == EVENT_ITEM ||
+ (type == EVENT_OP && strcmp(token, "*") == 0) ||
+ /*
+ * Some of the ftrace fields are broken and have
+ * an illegal "." in them.
+ */
+ (event->flags & EVENT_FL_ISFTRACE &&
+ type == EVENT_OP && strcmp(token, ".") == 0)) {
+
+ if (strcmp(token, "*") == 0)
+ field->flags |= FIELD_IS_POINTER;
+
+ if (field->type) {
+ char *new_type;
+ new_type = realloc(field->type,
+ strlen(field->type) +
+ strlen(last_token) + 2);
+ if (!new_type) {
+ free(last_token);
+ goto fail;
+ }
+ field->type = new_type;
+ strcat(field->type, " ");
+ strcat(field->type, last_token);
+ free(last_token);
+ } else
+ field->type = last_token;
+ last_token = token;
+ continue;
+ }
+
+ break;
+ }
+
+ if (!field->type) {
+ do_warning_event(event, "%s: no type found", __func__);
+ goto fail;
+ }
+ field->name = field->alias = last_token;
+
+ if (test_type(type, EVENT_OP))
+ goto fail;
+
+ if (strcmp(token, "[") == 0) {
+ enum event_type last_type = type;
+ char *brackets = token;
+ char *new_brackets;
+ int len;
+
+ field->flags |= FIELD_IS_ARRAY;
+
+ type = read_token(&token);
+
+ if (type == EVENT_ITEM)
+ field->arraylen = strtoul(token, NULL, 0);
+ else
+ field->arraylen = 0;
+
+ while (strcmp(token, "]") != 0) {
+ if (last_type == EVENT_ITEM &&
+ type == EVENT_ITEM)
+ len = 2;
+ else
+ len = 1;
+ last_type = type;
+
+ new_brackets = realloc(brackets,
+ strlen(brackets) +
+ strlen(token) + len);
+ if (!new_brackets) {
+ free(brackets);
+ goto fail;
+ }
+ brackets = new_brackets;
+ if (len == 2)
+ strcat(brackets, " ");
+ strcat(brackets, token);
+ /* We only care about the last token */
+ field->arraylen = strtoul(token, NULL, 0);
+ free_token(token);
+ type = read_token(&token);
+ if (type == EVENT_NONE) {
+ do_warning_event(event, "failed to find token");
+ goto fail;
+ }
+ }
+
+ free_token(token);
+
+ new_brackets = realloc(brackets, strlen(brackets) + 2);
+ if (!new_brackets) {
+ free(brackets);
+ goto fail;
+ }
+ brackets = new_brackets;
+ strcat(brackets, "]");
+
+ /* add brackets to type */
+
+ type = read_token(&token);
+ /*
+ * If the next token is not an OP, then it is of
+ * the format: type [] item;
+ */
+ if (type == EVENT_ITEM) {
+ char *new_type;
+ new_type = realloc(field->type,
+ strlen(field->type) +
+ strlen(field->name) +
+ strlen(brackets) + 2);
+ if (!new_type) {
+ free(brackets);
+ goto fail;
+ }
+ field->type = new_type;
+ strcat(field->type, " ");
+ strcat(field->type, field->name);
+ size_dynamic = type_size(field->name);
+ free_token(field->name);
+ strcat(field->type, brackets);
+ field->name = field->alias = token;
+ type = read_token(&token);
+ } else {
+ char *new_type;
+ new_type = realloc(field->type,
+ strlen(field->type) +
+ strlen(brackets) + 1);
+ if (!new_type) {
+ free(brackets);
+ goto fail;
+ }
+ field->type = new_type;
+ strcat(field->type, brackets);
+ }
+ free(brackets);
+ }
+
+ if (field_is_string(field))
+ field->flags |= FIELD_IS_STRING;
+ if (field_is_dynamic(field))
+ field->flags |= FIELD_IS_DYNAMIC;
+ if (field_is_long(field))
+ field->flags |= FIELD_IS_LONG;
+
+ if (test_type_token(type, token, EVENT_OP, ";"))
+ goto fail;
+ free_token(token);
+
+ if (read_expected(EVENT_ITEM, "offset") < 0)
+ goto fail_expect;
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ goto fail_expect;
+
+ if (read_expect_type(EVENT_ITEM, &token))
+ goto fail;
+ field->offset = strtoul(token, NULL, 0);
+ free_token(token);
+
+ if (read_expected(EVENT_OP, ";") < 0)
+ goto fail_expect;
+
+ if (read_expected(EVENT_ITEM, "size") < 0)
+ goto fail_expect;
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ goto fail_expect;
+
+ if (read_expect_type(EVENT_ITEM, &token))
+ goto fail;
+ field->size = strtoul(token, NULL, 0);
+ free_token(token);
+
+ if (read_expected(EVENT_OP, ";") < 0)
+ goto fail_expect;
+
+ type = read_token(&token);
+ if (type != EVENT_NEWLINE) {
+ /* newer versions of the kernel have a "signed" type */
+ if (test_type_token(type, token, EVENT_ITEM, "signed"))
+ goto fail;
+
+ free_token(token);
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ goto fail_expect;
+
+ if (read_expect_type(EVENT_ITEM, &token))
+ goto fail;
+
+ if (strtoul(token, NULL, 0))
+ field->flags |= FIELD_IS_SIGNED;
+
+ free_token(token);
+ if (read_expected(EVENT_OP, ";") < 0)
+ goto fail_expect;
+
+ if (read_expect_type(EVENT_NEWLINE, &token))
+ goto fail;
+ }
+
+ free_token(token);
+
+ if (field->flags & FIELD_IS_ARRAY) {
+ if (field->arraylen)
+ field->elementsize = field->size / field->arraylen;
+ else if (field->flags & FIELD_IS_DYNAMIC)
+ field->elementsize = size_dynamic;
+ else if (field->flags & FIELD_IS_STRING)
+ field->elementsize = 1;
+ else if (field->flags & FIELD_IS_LONG)
+ field->elementsize = event->pevent ?
+ event->pevent->long_size :
+ sizeof(long);
+ } else
+ field->elementsize = field->size;
+
+ *fields = field;
+ fields = &field->next;
+
+ } while (1);
+
+ return 0;
+
+fail:
+ free_token(token);
+fail_expect:
+ if (field) {
+ free(field->type);
+ free(field->name);
+ free(field);
+ }
+ return -1;
+}
+
+static int event_read_format(struct event_format *event)
+{
+ char *token;
+ int ret;
+
+ if (read_expected_item(EVENT_ITEM, "format") < 0)
+ return -1;
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ return -1;
+
+ if (read_expect_type(EVENT_NEWLINE, &token))
+ goto fail;
+ free_token(token);
+
+ ret = event_read_fields(event, &event->format.common_fields);
+ if (ret < 0)
+ return ret;
+ event->format.nr_common = ret;
+
+ ret = event_read_fields(event, &event->format.fields);
+ if (ret < 0)
+ return ret;
+ event->format.nr_fields = ret;
+
+ return 0;
+
+ fail:
+ free_token(token);
+ return -1;
+}
+
+static enum event_type
+process_arg_token(struct event_format *event, struct print_arg *arg,
+ char **tok, enum event_type type);
+
+static enum event_type
+process_arg(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ enum event_type type;
+ char *token;
+
+ type = read_token(&token);
+ *tok = token;
+
+ return process_arg_token(event, arg, tok, type);
+}
+
+static enum event_type
+process_op(struct event_format *event, struct print_arg *arg, char **tok);
+
+/*
+ * For __print_symbolic() and __print_flags, we need to completely
+ * evaluate the first argument, which defines what to print next.
+ */
+static enum event_type
+process_field_arg(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ enum event_type type;
+
+ type = process_arg(event, arg, tok);
+
+ while (type == EVENT_OP) {
+ type = process_op(event, arg, tok);
+ }
+
+ return type;
+}
+
+static enum event_type
+process_cond(struct event_format *event, struct print_arg *top, char **tok)
+{
+ struct print_arg *arg, *left, *right;
+ enum event_type type;
+ char *token = NULL;
+
+ arg = alloc_arg();
+ left = alloc_arg();
+ right = alloc_arg();
+
+ if (!arg || !left || !right) {
+ do_warning_event(event, "%s: not enough memory!", __func__);
+ /* arg will be freed at out_free */
+ free_arg(left);
+ free_arg(right);
+ goto out_free;
+ }
+
+ arg->type = PRINT_OP;
+ arg->op.left = left;
+ arg->op.right = right;
+
+ *tok = NULL;
+ type = process_arg(event, left, &token);
+
+ again:
+ if (type == EVENT_ERROR)
+ goto out_free;
+
+ /* Handle other operations in the arguments */
+ if (type == EVENT_OP && strcmp(token, ":") != 0) {
+ type = process_op(event, left, &token);
+ goto again;
+ }
+
+ if (test_type_token(type, token, EVENT_OP, ":"))
+ goto out_free;
+
+ arg->op.op = token;
+
+ type = process_arg(event, right, &token);
+
+ top->op.right = arg;
+
+ *tok = token;
+ return type;
+
+out_free:
+ /* Top may point to itself */
+ top->op.right = NULL;
+ free_token(token);
+ free_arg(arg);
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_array(struct event_format *event, struct print_arg *top, char **tok)
+{
+ struct print_arg *arg;
+ enum event_type type;
+ char *token = NULL;
+
+ arg = alloc_arg();
+ if (!arg) {
+ do_warning_event(event, "%s: not enough memory!", __func__);
+ /* '*tok' is set to top->op.op. No need to free. */
+ *tok = NULL;
+ return EVENT_ERROR;
+ }
+
+ *tok = NULL;
+ type = process_arg(event, arg, &token);
+ if (test_type_token(type, token, EVENT_OP, "]"))
+ goto out_free;
+
+ top->op.right = arg;
+
+ free_token(token);
+ type = read_token_item(&token);
+ *tok = token;
+
+ return type;
+
+out_free:
+ free_token(token);
+ free_arg(arg);
+ return EVENT_ERROR;
+}
+
+static int get_op_prio(char *op)
+{
+ if (!op[1]) {
+ switch (op[0]) {
+ case '~':
+ case '!':
+ return 4;
+ case '*':
+ case '/':
+ case '%':
+ return 6;
+ case '+':
+ case '-':
+ return 7;
+ /* '>>' and '<<' are 8 */
+ case '<':
+ case '>':
+ return 9;
+ /* '==' and '!=' are 10 */
+ case '&':
+ return 11;
+ case '^':
+ return 12;
+ case '|':
+ return 13;
+ case '?':
+ return 16;
+ default:
+ do_warning("unknown op '%c'", op[0]);
+ return -1;
+ }
+ } else {
+ if (strcmp(op, "++") == 0 ||
+ strcmp(op, "--") == 0) {
+ return 3;
+ } else if (strcmp(op, ">>") == 0 ||
+ strcmp(op, "<<") == 0) {
+ return 8;
+ } else if (strcmp(op, ">=") == 0 ||
+ strcmp(op, "<=") == 0) {
+ return 9;
+ } else if (strcmp(op, "==") == 0 ||
+ strcmp(op, "!=") == 0) {
+ return 10;
+ } else if (strcmp(op, "&&") == 0) {
+ return 14;
+ } else if (strcmp(op, "||") == 0) {
+ return 15;
+ } else {
+ do_warning("unknown op '%s'", op);
+ return -1;
+ }
+ }
+}
+
+static int set_op_prio(struct print_arg *arg)
+{
+
+ /* single ops are the greatest */
+ if (!arg->op.left || arg->op.left->type == PRINT_NULL)
+ arg->op.prio = 0;
+ else
+ arg->op.prio = get_op_prio(arg->op.op);
+
+ return arg->op.prio;
+}
+
+/* Note, *tok does not get freed, but will most likely be saved */
+static enum event_type
+process_op(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ struct print_arg *left, *right = NULL;
+ enum event_type type;
+ char *token;
+
+ /* the op is passed in via tok */
+ token = *tok;
+
+ if (arg->type == PRINT_OP && !arg->op.left) {
+ /* handle single op */
+ if (token[1]) {
+ do_warning_event(event, "bad op token %s", token);
+ goto out_free;
+ }
+ switch (token[0]) {
+ case '~':
+ case '!':
+ case '+':
+ case '-':
+ break;
+ default:
+ do_warning_event(event, "bad op token %s", token);
+ goto out_free;
+
+ }
+
+ /* make an empty left */
+ left = alloc_arg();
+ if (!left)
+ goto out_warn_free;
+
+ left->type = PRINT_NULL;
+ arg->op.left = left;
+
+ right = alloc_arg();
+ if (!right)
+ goto out_warn_free;
+
+ arg->op.right = right;
+
+ /* do not free the token, it belongs to an op */
+ *tok = NULL;
+ type = process_arg(event, right, tok);
+
+ } else if (strcmp(token, "?") == 0) {
+
+ left = alloc_arg();
+ if (!left)
+ goto out_warn_free;
+
+ /* copy the top arg to the left */
+ *left = *arg;
+
+ arg->type = PRINT_OP;
+ arg->op.op = token;
+ arg->op.left = left;
+ arg->op.prio = 0;
+
+ /* it will set arg->op.right */
+ type = process_cond(event, arg, tok);
+
+ } else if (strcmp(token, ">>") == 0 ||
+ strcmp(token, "<<") == 0 ||
+ strcmp(token, "&") == 0 ||
+ strcmp(token, "|") == 0 ||
+ strcmp(token, "&&") == 0 ||
+ strcmp(token, "||") == 0 ||
+ strcmp(token, "-") == 0 ||
+ strcmp(token, "+") == 0 ||
+ strcmp(token, "*") == 0 ||
+ strcmp(token, "^") == 0 ||
+ strcmp(token, "/") == 0 ||
+ strcmp(token, "%") == 0 ||
+ strcmp(token, "<") == 0 ||
+ strcmp(token, ">") == 0 ||
+ strcmp(token, "<=") == 0 ||
+ strcmp(token, ">=") == 0 ||
+ strcmp(token, "==") == 0 ||
+ strcmp(token, "!=") == 0) {
+
+ left = alloc_arg();
+ if (!left)
+ goto out_warn_free;
+
+ /* copy the top arg to the left */
+ *left = *arg;
+
+ arg->type = PRINT_OP;
+ arg->op.op = token;
+ arg->op.left = left;
+ arg->op.right = NULL;
+
+ if (set_op_prio(arg) == -1) {
+ event->flags |= EVENT_FL_FAILED;
+ /* arg->op.op (= token) will be freed at out_free */
+ arg->op.op = NULL;
+ goto out_free;
+ }
+
+ type = read_token_item(&token);
+ *tok = token;
+
+ /* could just be a type pointer */
+ if ((strcmp(arg->op.op, "*") == 0) &&
+ type == EVENT_DELIM && (strcmp(token, ")") == 0)) {
+ char *new_atom;
+
+ if (left->type != PRINT_ATOM) {
+ do_warning_event(event, "bad pointer type");
+ goto out_free;
+ }
+ new_atom = realloc(left->atom.atom,
+ strlen(left->atom.atom) + 3);
+ if (!new_atom)
+ goto out_warn_free;
+
+ left->atom.atom = new_atom;
+ strcat(left->atom.atom, " *");
+ free(arg->op.op);
+ *arg = *left;
+ free(left);
+
+ return type;
+ }
+
+ right = alloc_arg();
+ if (!right)
+ goto out_warn_free;
+
+ type = process_arg_token(event, right, tok, type);
+ if (type == EVENT_ERROR) {
+ free_arg(right);
+ /* token was freed in process_arg_token() via *tok */
+ token = NULL;
+ goto out_free;
+ }
+
+ if (right->type == PRINT_OP &&
+ get_op_prio(arg->op.op) < get_op_prio(right->op.op)) {
+ struct print_arg tmp;
+
+ /* rotate ops according to the priority */
+ arg->op.right = right->op.left;
+
+ tmp = *arg;
+ *arg = *right;
+ *right = tmp;
+
+ arg->op.left = right;
+ } else {
+ arg->op.right = right;
+ }
+
+ } else if (strcmp(token, "[") == 0) {
+
+ left = alloc_arg();
+ if (!left)
+ goto out_warn_free;
+
+ *left = *arg;
+
+ arg->type = PRINT_OP;
+ arg->op.op = token;
+ arg->op.left = left;
+
+ arg->op.prio = 0;
+
+ /* it will set arg->op.right */
+ type = process_array(event, arg, tok);
+
+ } else {
+ do_warning_event(event, "unknown op '%s'", token);
+ event->flags |= EVENT_FL_FAILED;
+ /* the arg is now the left side */
+ goto out_free;
+ }
+
+ if (type == EVENT_OP && strcmp(*tok, ":") != 0) {
+ int prio;
+
+ /* higher prios need to be closer to the root */
+ prio = get_op_prio(*tok);
+
+ if (prio > arg->op.prio)
+ return process_op(event, arg, tok);
+
+ return process_op(event, right, tok);
+ }
+
+ return type;
+
+out_warn_free:
+ do_warning_event(event, "%s: not enough memory!", __func__);
+out_free:
+ free_token(token);
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_entry(struct event_format *event __maybe_unused, struct print_arg *arg,
+ char **tok)
+{
+ enum event_type type;
+ char *field;
+ char *token;
+
+ if (read_expected(EVENT_OP, "->") < 0)
+ goto out_err;
+
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto out_free;
+ field = token;
+
+ arg->type = PRINT_FIELD;
+ arg->field.name = field;
+
+ if (is_flag_field) {
+ arg->field.field = tep_find_any_field(event, arg->field.name);
+ arg->field.field->flags |= FIELD_IS_FLAG;
+ is_flag_field = 0;
+ } else if (is_symbolic_field) {
+ arg->field.field = tep_find_any_field(event, arg->field.name);
+ arg->field.field->flags |= FIELD_IS_SYMBOLIC;
+ is_symbolic_field = 0;
+ }
+
+ type = read_token(&token);
+ *tok = token;
+
+ return type;
+
+ out_free:
+ free_token(token);
+ out_err:
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static int alloc_and_process_delim(struct event_format *event, char *next_token,
+ struct print_arg **print_arg)
+{
+ struct print_arg *field;
+ enum event_type type;
+ char *token;
+ int ret = 0;
+
+ field = alloc_arg();
+ if (!field) {
+ do_warning_event(event, "%s: not enough memory!", __func__);
+ errno = ENOMEM;
+ return -1;
+ }
+
+ type = process_arg(event, field, &token);
+
+ if (test_type_token(type, token, EVENT_DELIM, next_token)) {
+ errno = EINVAL;
+ ret = -1;
+ free_arg(field);
+ goto out_free_token;
+ }
+
+ *print_arg = field;
+
+out_free_token:
+ free_token(token);
+
+ return ret;
+}
+
+static char *arg_eval (struct print_arg *arg);
+
+static unsigned long long
+eval_type_str(unsigned long long val, const char *type, int pointer)
+{
+ int sign = 0;
+ char *ref;
+ int len;
+
+ len = strlen(type);
+
+ if (pointer) {
+
+ if (type[len-1] != '*') {
+ do_warning("pointer expected with non pointer type");
+ return val;
+ }
+
+ ref = malloc(len);
+ if (!ref) {
+ do_warning("%s: not enough memory!", __func__);
+ return val;
+ }
+ memcpy(ref, type, len);
+
+ /* chop off the " *" */
+ ref[len - 2] = 0;
+
+ val = eval_type_str(val, ref, 0);
+ free(ref);
+ return val;
+ }
+
+ /* check if this is a pointer */
+ if (type[len - 1] == '*')
+ return val;
+
+ /* Try to figure out the arg size*/
+ if (strncmp(type, "struct", 6) == 0)
+ /* all bets off */
+ return val;
+
+ if (strcmp(type, "u8") == 0)
+ return val & 0xff;
+
+ if (strcmp(type, "u16") == 0)
+ return val & 0xffff;
+
+ if (strcmp(type, "u32") == 0)
+ return val & 0xffffffff;
+
+ if (strcmp(type, "u64") == 0 ||
+ strcmp(type, "s64") == 0)
+ return val;
+
+ if (strcmp(type, "s8") == 0)
+ return (unsigned long long)(char)val & 0xff;
+
+ if (strcmp(type, "s16") == 0)
+ return (unsigned long long)(short)val & 0xffff;
+
+ if (strcmp(type, "s32") == 0)
+ return (unsigned long long)(int)val & 0xffffffff;
+
+ if (strncmp(type, "unsigned ", 9) == 0) {
+ sign = 0;
+ type += 9;
+ }
+
+ if (strcmp(type, "char") == 0) {
+ if (sign)
+ return (unsigned long long)(char)val & 0xff;
+ else
+ return val & 0xff;
+ }
+
+ if (strcmp(type, "short") == 0) {
+ if (sign)
+ return (unsigned long long)(short)val & 0xffff;
+ else
+ return val & 0xffff;
+ }
+
+ if (strcmp(type, "int") == 0) {
+ if (sign)
+ return (unsigned long long)(int)val & 0xffffffff;
+ else
+ return val & 0xffffffff;
+ }
+
+ return val;
+}
+
+/*
+ * Try to figure out the type.
+ */
+static unsigned long long
+eval_type(unsigned long long val, struct print_arg *arg, int pointer)
+{
+ if (arg->type != PRINT_TYPE) {
+ do_warning("expected type argument");
+ return 0;
+ }
+
+ return eval_type_str(val, arg->typecast.type, pointer);
+}
+
+static int arg_num_eval(struct print_arg *arg, long long *val)
+{
+ long long left, right;
+ int ret = 1;
+
+ switch (arg->type) {
+ case PRINT_ATOM:
+ *val = strtoll(arg->atom.atom, NULL, 0);
+ break;
+ case PRINT_TYPE:
+ ret = arg_num_eval(arg->typecast.item, val);
+ if (!ret)
+ break;
+ *val = eval_type(*val, arg, 0);
+ break;
+ case PRINT_OP:
+ switch (arg->op.op[0]) {
+ case '|':
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+ if (arg->op.op[1])
+ *val = left || right;
+ else
+ *val = left | right;
+ break;
+ case '&':
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+ if (arg->op.op[1])
+ *val = left && right;
+ else
+ *val = left & right;
+ break;
+ case '<':
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+ switch (arg->op.op[1]) {
+ case 0:
+ *val = left < right;
+ break;
+ case '<':
+ *val = left << right;
+ break;
+ case '=':
+ *val = left <= right;
+ break;
+ default:
+ do_warning("unknown op '%s'", arg->op.op);
+ ret = 0;
+ }
+ break;
+ case '>':
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+ switch (arg->op.op[1]) {
+ case 0:
+ *val = left > right;
+ break;
+ case '>':
+ *val = left >> right;
+ break;
+ case '=':
+ *val = left >= right;
+ break;
+ default:
+ do_warning("unknown op '%s'", arg->op.op);
+ ret = 0;
+ }
+ break;
+ case '=':
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+
+ if (arg->op.op[1] != '=') {
+ do_warning("unknown op '%s'", arg->op.op);
+ ret = 0;
+ } else
+ *val = left == right;
+ break;
+ case '!':
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+
+ switch (arg->op.op[1]) {
+ case '=':
+ *val = left != right;
+ break;
+ default:
+ do_warning("unknown op '%s'", arg->op.op);
+ ret = 0;
+ }
+ break;
+ case '-':
+ /* check for negative */
+ if (arg->op.left->type == PRINT_NULL)
+ left = 0;
+ else
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+ *val = left - right;
+ break;
+ case '+':
+ if (arg->op.left->type == PRINT_NULL)
+ left = 0;
+ else
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+ *val = left + right;
+ break;
+ case '~':
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+ *val = ~right;
+ break;
+ default:
+ do_warning("unknown op '%s'", arg->op.op);
+ ret = 0;
+ }
+ break;
+
+ case PRINT_NULL:
+ case PRINT_FIELD ... PRINT_SYMBOL:
+ case PRINT_STRING:
+ case PRINT_BSTRING:
+ case PRINT_BITMASK:
+ default:
+ do_warning("invalid eval type %d", arg->type);
+ ret = 0;
+
+ }
+ return ret;
+}
+
+static char *arg_eval (struct print_arg *arg)
+{
+ long long val;
+ static char buf[24];
+
+ switch (arg->type) {
+ case PRINT_ATOM:
+ return arg->atom.atom;
+ case PRINT_TYPE:
+ return arg_eval(arg->typecast.item);
+ case PRINT_OP:
+ if (!arg_num_eval(arg, &val))
+ break;
+ sprintf(buf, "%lld", val);
+ return buf;
+
+ case PRINT_NULL:
+ case PRINT_FIELD ... PRINT_SYMBOL:
+ case PRINT_STRING:
+ case PRINT_BSTRING:
+ case PRINT_BITMASK:
+ default:
+ do_warning("invalid eval type %d", arg->type);
+ break;
+ }
+
+ return NULL;
+}
+
+static enum event_type
+process_fields(struct event_format *event, struct print_flag_sym **list, char **tok)
+{
+ enum event_type type;
+ struct print_arg *arg = NULL;
+ struct print_flag_sym *field;
+ char *token = *tok;
+ char *value;
+
+ do {
+ free_token(token);
+ type = read_token_item(&token);
+ if (test_type_token(type, token, EVENT_OP, "{"))
+ break;
+
+ arg = alloc_arg();
+ if (!arg)
+ goto out_free;
+
+ free_token(token);
+ type = process_arg(event, arg, &token);
+
+ if (type == EVENT_OP)
+ type = process_op(event, arg, &token);
+
+ if (type == EVENT_ERROR)
+ goto out_free;
+
+ if (test_type_token(type, token, EVENT_DELIM, ","))
+ goto out_free;
+
+ field = calloc(1, sizeof(*field));
+ if (!field)
+ goto out_free;
+
+ value = arg_eval(arg);
+ if (value == NULL)
+ goto out_free_field;
+ field->value = strdup(value);
+ if (field->value == NULL)
+ goto out_free_field;
+
+ free_arg(arg);
+ arg = alloc_arg();
+ if (!arg)
+ goto out_free;
+
+ free_token(token);
+ type = process_arg(event, arg, &token);
+ if (test_type_token(type, token, EVENT_OP, "}"))
+ goto out_free_field;
+
+ value = arg_eval(arg);
+ if (value == NULL)
+ goto out_free_field;
+ field->str = strdup(value);
+ if (field->str == NULL)
+ goto out_free_field;
+ free_arg(arg);
+ arg = NULL;
+
+ *list = field;
+ list = &field->next;
+
+ free_token(token);
+ type = read_token_item(&token);
+ } while (type == EVENT_DELIM && strcmp(token, ",") == 0);
+
+ *tok = token;
+ return type;
+
+out_free_field:
+ free_flag_sym(field);
+out_free:
+ free_arg(arg);
+ free_token(token);
+ *tok = NULL;
+
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_flags(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ struct print_arg *field;
+ enum event_type type;
+ char *token = NULL;
+
+ memset(arg, 0, sizeof(*arg));
+ arg->type = PRINT_FLAGS;
+
+ field = alloc_arg();
+ if (!field) {
+ do_warning_event(event, "%s: not enough memory!", __func__);
+ goto out_free;
+ }
+
+ type = process_field_arg(event, field, &token);
+
+ /* Handle operations in the first argument */
+ while (type == EVENT_OP)
+ type = process_op(event, field, &token);
+
+ if (test_type_token(type, token, EVENT_DELIM, ","))
+ goto out_free_field;
+ free_token(token);
+
+ arg->flags.field = field;
+
+ type = read_token_item(&token);
+ if (event_item_type(type)) {
+ arg->flags.delim = token;
+ type = read_token_item(&token);
+ }
+
+ if (test_type_token(type, token, EVENT_DELIM, ","))
+ goto out_free;
+
+ type = process_fields(event, &arg->flags.flags, &token);
+ if (test_type_token(type, token, EVENT_DELIM, ")"))
+ goto out_free;
+
+ free_token(token);
+ type = read_token_item(tok);
+ return type;
+
+out_free_field:
+ free_arg(field);
+out_free:
+ free_token(token);
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_symbols(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ struct print_arg *field;
+ enum event_type type;
+ char *token = NULL;
+
+ memset(arg, 0, sizeof(*arg));
+ arg->type = PRINT_SYMBOL;
+
+ field = alloc_arg();
+ if (!field) {
+ do_warning_event(event, "%s: not enough memory!", __func__);
+ goto out_free;
+ }
+
+ type = process_field_arg(event, field, &token);
+
+ if (test_type_token(type, token, EVENT_DELIM, ","))
+ goto out_free_field;
+
+ arg->symbol.field = field;
+
+ type = process_fields(event, &arg->symbol.symbols, &token);
+ if (test_type_token(type, token, EVENT_DELIM, ")"))
+ goto out_free;
+
+ free_token(token);
+ type = read_token_item(tok);
+ return type;
+
+out_free_field:
+ free_arg(field);
+out_free:
+ free_token(token);
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_hex_common(struct event_format *event, struct print_arg *arg,
+ char **tok, enum print_arg_type type)
+{
+ memset(arg, 0, sizeof(*arg));
+ arg->type = type;
+
+ if (alloc_and_process_delim(event, ",", &arg->hex.field))
+ goto out;
+
+ if (alloc_and_process_delim(event, ")", &arg->hex.size))
+ goto free_field;
+
+ return read_token_item(tok);
+
+free_field:
+ free_arg(arg->hex.field);
+ arg->hex.field = NULL;
+out:
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_hex(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ return process_hex_common(event, arg, tok, PRINT_HEX);
+}
+
+static enum event_type
+process_hex_str(struct event_format *event, struct print_arg *arg,
+ char **tok)
+{
+ return process_hex_common(event, arg, tok, PRINT_HEX_STR);
+}
+
+static enum event_type
+process_int_array(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ memset(arg, 0, sizeof(*arg));
+ arg->type = PRINT_INT_ARRAY;
+
+ if (alloc_and_process_delim(event, ",", &arg->int_array.field))
+ goto out;
+
+ if (alloc_and_process_delim(event, ",", &arg->int_array.count))
+ goto free_field;
+
+ if (alloc_and_process_delim(event, ")", &arg->int_array.el_size))
+ goto free_size;
+
+ return read_token_item(tok);
+
+free_size:
+ free_arg(arg->int_array.count);
+ arg->int_array.count = NULL;
+free_field:
+ free_arg(arg->int_array.field);
+ arg->int_array.field = NULL;
+out:
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_dynamic_array(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ struct format_field *field;
+ enum event_type type;
+ char *token;
+
+ memset(arg, 0, sizeof(*arg));
+ arg->type = PRINT_DYNAMIC_ARRAY;
+
+ /*
+ * The item within the parenthesis is another field that holds
+ * the index into where the array starts.
+ */
+ type = read_token(&token);
+ *tok = token;
+ if (type != EVENT_ITEM)
+ goto out_free;
+
+ /* Find the field */
+
+ field = tep_find_field(event, token);
+ if (!field)
+ goto out_free;
+
+ arg->dynarray.field = field;
+ arg->dynarray.index = 0;
+
+ if (read_expected(EVENT_DELIM, ")") < 0)
+ goto out_free;
+
+ free_token(token);
+ type = read_token_item(&token);
+ *tok = token;
+ if (type != EVENT_OP || strcmp(token, "[") != 0)
+ return type;
+
+ free_token(token);
+ arg = alloc_arg();
+ if (!arg) {
+ do_warning_event(event, "%s: not enough memory!", __func__);
+ *tok = NULL;
+ return EVENT_ERROR;
+ }
+
+ type = process_arg(event, arg, &token);
+ if (type == EVENT_ERROR)
+ goto out_free_arg;
+
+ if (!test_type_token(type, token, EVENT_OP, "]"))
+ goto out_free_arg;
+
+ free_token(token);
+ type = read_token_item(tok);
+ return type;
+
+ out_free_arg:
+ free_arg(arg);
+ out_free:
+ free_token(token);
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_dynamic_array_len(struct event_format *event, struct print_arg *arg,
+ char **tok)
+{
+ struct format_field *field;
+ enum event_type type;
+ char *token;
+
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto out_free;
+
+ arg->type = PRINT_DYNAMIC_ARRAY_LEN;
+
+ /* Find the field */
+ field = tep_find_field(event, token);
+ if (!field)
+ goto out_free;
+
+ arg->dynarray.field = field;
+ arg->dynarray.index = 0;
+
+ if (read_expected(EVENT_DELIM, ")") < 0)
+ goto out_err;
+
+ free_token(token);
+ type = read_token(&token);
+ *tok = token;
+
+ return type;
+
+ out_free:
+ free_token(token);
+ out_err:
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_paren(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ struct print_arg *item_arg;
+ enum event_type type;
+ char *token;
+
+ type = process_arg(event, arg, &token);
+
+ if (type == EVENT_ERROR)
+ goto out_free;
+
+ if (type == EVENT_OP)
+ type = process_op(event, arg, &token);
+
+ if (type == EVENT_ERROR)
+ goto out_free;
+
+ if (test_type_token(type, token, EVENT_DELIM, ")"))
+ goto out_free;
+
+ free_token(token);
+ type = read_token_item(&token);
+
+ /*
+ * If the next token is an item or another open paren, then
+ * this was a typecast.
+ */
+ if (event_item_type(type) ||
+ (type == EVENT_DELIM && strcmp(token, "(") == 0)) {
+
+ /* make this a typecast and contine */
+
+ /* prevous must be an atom */
+ if (arg->type != PRINT_ATOM) {
+ do_warning_event(event, "previous needed to be PRINT_ATOM");
+ goto out_free;
+ }
+
+ item_arg = alloc_arg();
+ if (!item_arg) {
+ do_warning_event(event, "%s: not enough memory!",
+ __func__);
+ goto out_free;
+ }
+
+ arg->type = PRINT_TYPE;
+ arg->typecast.type = arg->atom.atom;
+ arg->typecast.item = item_arg;
+ type = process_arg_token(event, item_arg, &token, type);
+
+ }
+
+ *tok = token;
+ return type;
+
+ out_free:
+ free_token(token);
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+
+static enum event_type
+process_str(struct event_format *event __maybe_unused, struct print_arg *arg,
+ char **tok)
+{
+ enum event_type type;
+ char *token;
+
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto out_free;
+
+ arg->type = PRINT_STRING;
+ arg->string.string = token;
+ arg->string.offset = -1;
+
+ if (read_expected(EVENT_DELIM, ")") < 0)
+ goto out_err;
+
+ type = read_token(&token);
+ *tok = token;
+
+ return type;
+
+ out_free:
+ free_token(token);
+ out_err:
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_bitmask(struct event_format *event __maybe_unused, struct print_arg *arg,
+ char **tok)
+{
+ enum event_type type;
+ char *token;
+
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto out_free;
+
+ arg->type = PRINT_BITMASK;
+ arg->bitmask.bitmask = token;
+ arg->bitmask.offset = -1;
+
+ if (read_expected(EVENT_DELIM, ")") < 0)
+ goto out_err;
+
+ type = read_token(&token);
+ *tok = token;
+
+ return type;
+
+ out_free:
+ free_token(token);
+ out_err:
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static struct tep_function_handler *
+find_func_handler(struct tep_handle *pevent, char *func_name)
+{
+ struct tep_function_handler *func;
+
+ if (!pevent)
+ return NULL;
+
+ for (func = pevent->func_handlers; func; func = func->next) {
+ if (strcmp(func->name, func_name) == 0)
+ break;
+ }
+
+ return func;
+}
+
+static void remove_func_handler(struct tep_handle *pevent, char *func_name)
+{
+ struct tep_function_handler *func;
+ struct tep_function_handler **next;
+
+ next = &pevent->func_handlers;
+ while ((func = *next)) {
+ if (strcmp(func->name, func_name) == 0) {
+ *next = func->next;
+ free_func_handle(func);
+ break;
+ }
+ next = &func->next;
+ }
+}
+
+static enum event_type
+process_func_handler(struct event_format *event, struct tep_function_handler *func,
+ struct print_arg *arg, char **tok)
+{
+ struct print_arg **next_arg;
+ struct print_arg *farg;
+ enum event_type type;
+ char *token;
+ int i;
+
+ arg->type = PRINT_FUNC;
+ arg->func.func = func;
+
+ *tok = NULL;
+
+ next_arg = &(arg->func.args);
+ for (i = 0; i < func->nr_args; i++) {
+ farg = alloc_arg();
+ if (!farg) {
+ do_warning_event(event, "%s: not enough memory!",
+ __func__);
+ return EVENT_ERROR;
+ }
+
+ type = process_arg(event, farg, &token);
+ if (i < (func->nr_args - 1)) {
+ if (type != EVENT_DELIM || strcmp(token, ",") != 0) {
+ do_warning_event(event,
+ "Error: function '%s()' expects %d arguments but event %s only uses %d",
+ func->name, func->nr_args,
+ event->name, i + 1);
+ goto err;
+ }
+ } else {
+ if (type != EVENT_DELIM || strcmp(token, ")") != 0) {
+ do_warning_event(event,
+ "Error: function '%s()' only expects %d arguments but event %s has more",
+ func->name, func->nr_args, event->name);
+ goto err;
+ }
+ }
+
+ *next_arg = farg;
+ next_arg = &(farg->next);
+ free_token(token);
+ }
+
+ type = read_token(&token);
+ *tok = token;
+
+ return type;
+
+err:
+ free_arg(farg);
+ free_token(token);
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_function(struct event_format *event, struct print_arg *arg,
+ char *token, char **tok)
+{
+ struct tep_function_handler *func;
+
+ if (strcmp(token, "__print_flags") == 0) {
+ free_token(token);
+ is_flag_field = 1;
+ return process_flags(event, arg, tok);
+ }
+ if (strcmp(token, "__print_symbolic") == 0) {
+ free_token(token);
+ is_symbolic_field = 1;
+ return process_symbols(event, arg, tok);
+ }
+ if (strcmp(token, "__print_hex") == 0) {
+ free_token(token);
+ return process_hex(event, arg, tok);
+ }
+ if (strcmp(token, "__print_hex_str") == 0) {
+ free_token(token);
+ return process_hex_str(event, arg, tok);
+ }
+ if (strcmp(token, "__print_array") == 0) {
+ free_token(token);
+ return process_int_array(event, arg, tok);
+ }
+ if (strcmp(token, "__get_str") == 0) {
+ free_token(token);
+ return process_str(event, arg, tok);
+ }
+ if (strcmp(token, "__get_bitmask") == 0) {
+ free_token(token);
+ return process_bitmask(event, arg, tok);
+ }
+ if (strcmp(token, "__get_dynamic_array") == 0) {
+ free_token(token);
+ return process_dynamic_array(event, arg, tok);
+ }
+ if (strcmp(token, "__get_dynamic_array_len") == 0) {
+ free_token(token);
+ return process_dynamic_array_len(event, arg, tok);
+ }
+
+ func = find_func_handler(event->pevent, token);
+ if (func) {
+ free_token(token);
+ return process_func_handler(event, func, arg, tok);
+ }
+
+ do_warning_event(event, "function %s not defined", token);
+ free_token(token);
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_arg_token(struct event_format *event, struct print_arg *arg,
+ char **tok, enum event_type type)
+{
+ char *token;
+ char *atom;
+
+ token = *tok;
+
+ switch (type) {
+ case EVENT_ITEM:
+ if (strcmp(token, "REC") == 0) {
+ free_token(token);
+ type = process_entry(event, arg, &token);
+ break;
+ }
+ atom = token;
+ /* test the next token */
+ type = read_token_item(&token);
+
+ /*
+ * If the next token is a parenthesis, then this
+ * is a function.
+ */
+ if (type == EVENT_DELIM && strcmp(token, "(") == 0) {
+ free_token(token);
+ token = NULL;
+ /* this will free atom. */
+ type = process_function(event, arg, atom, &token);
+ break;
+ }
+ /* atoms can be more than one token long */
+ while (type == EVENT_ITEM) {
+ char *new_atom;
+ new_atom = realloc(atom,
+ strlen(atom) + strlen(token) + 2);
+ if (!new_atom) {
+ free(atom);
+ *tok = NULL;
+ free_token(token);
+ return EVENT_ERROR;
+ }
+ atom = new_atom;
+ strcat(atom, " ");
+ strcat(atom, token);
+ free_token(token);
+ type = read_token_item(&token);
+ }
+
+ arg->type = PRINT_ATOM;
+ arg->atom.atom = atom;
+ break;
+
+ case EVENT_DQUOTE:
+ case EVENT_SQUOTE:
+ arg->type = PRINT_ATOM;
+ arg->atom.atom = token;
+ type = read_token_item(&token);
+ break;
+ case EVENT_DELIM:
+ if (strcmp(token, "(") == 0) {
+ free_token(token);
+ type = process_paren(event, arg, &token);
+ break;
+ }
+ case EVENT_OP:
+ /* handle single ops */
+ arg->type = PRINT_OP;
+ arg->op.op = token;
+ arg->op.left = NULL;
+ type = process_op(event, arg, &token);
+
+ /* On error, the op is freed */
+ if (type == EVENT_ERROR)
+ arg->op.op = NULL;
+
+ /* return error type if errored */
+ break;
+
+ case EVENT_ERROR ... EVENT_NEWLINE:
+ default:
+ do_warning_event(event, "unexpected type %d", type);
+ return EVENT_ERROR;
+ }
+ *tok = token;
+
+ return type;
+}
+
+static int event_read_print_args(struct event_format *event, struct print_arg **list)
+{
+ enum event_type type = EVENT_ERROR;
+ struct print_arg *arg;
+ char *token;
+ int args = 0;
+
+ do {
+ if (type == EVENT_NEWLINE) {
+ type = read_token_item(&token);
+ continue;
+ }
+
+ arg = alloc_arg();
+ if (!arg) {
+ do_warning_event(event, "%s: not enough memory!",
+ __func__);
+ return -1;
+ }
+
+ type = process_arg(event, arg, &token);
+
+ if (type == EVENT_ERROR) {
+ free_token(token);
+ free_arg(arg);
+ return -1;
+ }
+
+ *list = arg;
+ args++;
+
+ if (type == EVENT_OP) {
+ type = process_op(event, arg, &token);
+ free_token(token);
+ if (type == EVENT_ERROR) {
+ *list = NULL;
+ free_arg(arg);
+ return -1;
+ }
+ list = &arg->next;
+ continue;
+ }
+
+ if (type == EVENT_DELIM && strcmp(token, ",") == 0) {
+ free_token(token);
+ *list = arg;
+ list = &arg->next;
+ continue;
+ }
+ break;
+ } while (type != EVENT_NONE);
+
+ if (type != EVENT_NONE && type != EVENT_ERROR)
+ free_token(token);
+
+ return args;
+}
+
+static int event_read_print(struct event_format *event)
+{
+ enum event_type type;
+ char *token;
+ int ret;
+
+ if (read_expected_item(EVENT_ITEM, "print") < 0)
+ return -1;
+
+ if (read_expected(EVENT_ITEM, "fmt") < 0)
+ return -1;
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ return -1;
+
+ if (read_expect_type(EVENT_DQUOTE, &token) < 0)
+ goto fail;
+
+ concat:
+ event->print_fmt.format = token;
+ event->print_fmt.args = NULL;
+
+ /* ok to have no arg */
+ type = read_token_item(&token);
+
+ if (type == EVENT_NONE)
+ return 0;
+
+ /* Handle concatenation of print lines */
+ if (type == EVENT_DQUOTE) {
+ char *cat;
+
+ if (asprintf(&cat, "%s%s", event->print_fmt.format, token) < 0)
+ goto fail;
+ free_token(token);
+ free_token(event->print_fmt.format);
+ event->print_fmt.format = NULL;
+ token = cat;
+ goto concat;
+ }
+
+ if (test_type_token(type, token, EVENT_DELIM, ","))
+ goto fail;
+
+ free_token(token);
+
+ ret = event_read_print_args(event, &event->print_fmt.args);
+ if (ret < 0)
+ return -1;
+
+ return ret;
+
+ fail:
+ free_token(token);
+ return -1;
+}
+
+/**
+ * tep_find_common_field - return a common field by event
+ * @event: handle for the event
+ * @name: the name of the common field to return
+ *
+ * Returns a common field from the event by the given @name.
+ * This only searchs the common fields and not all field.
+ */
+struct format_field *
+tep_find_common_field(struct event_format *event, const char *name)
+{
+ struct format_field *format;
+
+ for (format = event->format.common_fields;
+ format; format = format->next) {
+ if (strcmp(format->name, name) == 0)
+ break;
+ }
+
+ return format;
+}
+
+/**
+ * tep_find_field - find a non-common field
+ * @event: handle for the event
+ * @name: the name of the non-common field
+ *
+ * Returns a non-common field by the given @name.
+ * This does not search common fields.
+ */
+struct format_field *
+tep_find_field(struct event_format *event, const char *name)
+{
+ struct format_field *format;
+
+ for (format = event->format.fields;
+ format; format = format->next) {
+ if (strcmp(format->name, name) == 0)
+ break;
+ }
+
+ return format;
+}
+
+/**
+ * tep_find_any_field - find any field by name
+ * @event: handle for the event
+ * @name: the name of the field
+ *
+ * Returns a field by the given @name.
+ * This searchs the common field names first, then
+ * the non-common ones if a common one was not found.
+ */
+struct format_field *
+tep_find_any_field(struct event_format *event, const char *name)
+{
+ struct format_field *format;
+
+ format = tep_find_common_field(event, name);
+ if (format)
+ return format;
+ return tep_find_field(event, name);
+}
+
+/**
+ * tep_read_number - read a number from data
+ * @pevent: handle for the pevent
+ * @ptr: the raw data
+ * @size: the size of the data that holds the number
+ *
+ * Returns the number (converted to host) from the
+ * raw data.
+ */
+unsigned long long tep_read_number(struct tep_handle *pevent,
+ const void *ptr, int size)
+{
+ switch (size) {
+ case 1:
+ return *(unsigned char *)ptr;
+ case 2:
+ return data2host2(pevent, ptr);
+ case 4:
+ return data2host4(pevent, ptr);
+ case 8:
+ return data2host8(pevent, ptr);
+ default:
+ /* BUG! */
+ return 0;
+ }
+}
+
+/**
+ * tep_read_number_field - read a number from data
+ * @field: a handle to the field
+ * @data: the raw data to read
+ * @value: the value to place the number in
+ *
+ * Reads raw data according to a field offset and size,
+ * and translates it into @value.
+ *
+ * Returns 0 on success, -1 otherwise.
+ */
+int tep_read_number_field(struct format_field *field, const void *data,
+ unsigned long long *value)
+{
+ if (!field)
+ return -1;
+ switch (field->size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ *value = tep_read_number(field->event->pevent,
+ data + field->offset, field->size);
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+static int get_common_info(struct tep_handle *pevent,
+ const char *type, int *offset, int *size)
+{
+ struct event_format *event;
+ struct format_field *field;
+
+ /*
+ * All events should have the same common elements.
+ * Pick any event to find where the type is;
+ */
+ if (!pevent->events) {
+ do_warning("no event_list!");
+ return -1;
+ }
+
+ event = pevent->events[0];
+ field = tep_find_common_field(event, type);
+ if (!field)
+ return -1;
+
+ *offset = field->offset;
+ *size = field->size;
+
+ return 0;
+}
+
+static int __parse_common(struct tep_handle *pevent, void *data,
+ int *size, int *offset, const char *name)
+{
+ int ret;
+
+ if (!*size) {
+ ret = get_common_info(pevent, name, offset, size);
+ if (ret < 0)
+ return ret;
+ }
+ return tep_read_number(pevent, data + *offset, *size);
+}
+
+static int trace_parse_common_type(struct tep_handle *pevent, void *data)
+{
+ return __parse_common(pevent, data,
+ &pevent->type_size, &pevent->type_offset,
+ "common_type");
+}
+
+static int parse_common_pid(struct tep_handle *pevent, void *data)
+{
+ return __parse_common(pevent, data,
+ &pevent->pid_size, &pevent->pid_offset,
+ "common_pid");
+}
+
+static int parse_common_pc(struct tep_handle *pevent, void *data)
+{
+ return __parse_common(pevent, data,
+ &pevent->pc_size, &pevent->pc_offset,
+ "common_preempt_count");
+}
+
+static int parse_common_flags(struct tep_handle *pevent, void *data)
+{
+ return __parse_common(pevent, data,
+ &pevent->flags_size, &pevent->flags_offset,
+ "common_flags");
+}
+
+static int parse_common_lock_depth(struct tep_handle *pevent, void *data)
+{
+ return __parse_common(pevent, data,
+ &pevent->ld_size, &pevent->ld_offset,
+ "common_lock_depth");
+}
+
+static int parse_common_migrate_disable(struct tep_handle *pevent, void *data)
+{
+ return __parse_common(pevent, data,
+ &pevent->ld_size, &pevent->ld_offset,
+ "common_migrate_disable");
+}
+
+static int events_id_cmp(const void *a, const void *b);
+
+/**
+ * tep_find_event - find an event by given id
+ * @pevent: a handle to the pevent
+ * @id: the id of the event
+ *
+ * Returns an event that has a given @id.
+ */
+struct event_format *tep_find_event(struct tep_handle *pevent, int id)
+{
+ struct event_format **eventptr;
+ struct event_format key;
+ struct event_format *pkey = &key;
+
+ /* Check cache first */
+ if (pevent->last_event && pevent->last_event->id == id)
+ return pevent->last_event;
+
+ key.id = id;
+
+ eventptr = bsearch(&pkey, pevent->events, pevent->nr_events,
+ sizeof(*pevent->events), events_id_cmp);
+
+ if (eventptr) {
+ pevent->last_event = *eventptr;
+ return *eventptr;
+ }
+
+ return NULL;
+}
+
+/**
+ * tep_find_event_by_name - find an event by given name
+ * @pevent: a handle to the pevent
+ * @sys: the system name to search for
+ * @name: the name of the event to search for
+ *
+ * This returns an event with a given @name and under the system
+ * @sys. If @sys is NULL the first event with @name is returned.
+ */
+struct event_format *
+tep_find_event_by_name(struct tep_handle *pevent,
+ const char *sys, const char *name)
+{
+ struct event_format *event;
+ int i;
+
+ if (pevent->last_event &&
+ strcmp(pevent->last_event->name, name) == 0 &&
+ (!sys || strcmp(pevent->last_event->system, sys) == 0))
+ return pevent->last_event;
+
+ for (i = 0; i < pevent->nr_events; i++) {
+ event = pevent->events[i];
+ if (strcmp(event->name, name) == 0) {
+ if (!sys)
+ break;
+ if (strcmp(event->system, sys) == 0)
+ break;
+ }
+ }
+ if (i == pevent->nr_events)
+ event = NULL;
+
+ pevent->last_event = event;
+ return event;
+}
+
+static unsigned long long
+eval_num_arg(void *data, int size, struct event_format *event, struct print_arg *arg)
+{
+ struct tep_handle *pevent = event->pevent;
+ unsigned long long val = 0;
+ unsigned long long left, right;
+ struct print_arg *typearg = NULL;
+ struct print_arg *larg;
+ unsigned long offset;
+ unsigned int field_size;
+
+ switch (arg->type) {
+ case PRINT_NULL:
+ /* ?? */
+ return 0;
+ case PRINT_ATOM:
+ return strtoull(arg->atom.atom, NULL, 0);
+ case PRINT_FIELD:
+ if (!arg->field.field) {
+ arg->field.field = tep_find_any_field(event, arg->field.name);
+ if (!arg->field.field)
+ goto out_warning_field;
+
+ }
+ /* must be a number */
+ val = tep_read_number(pevent, data + arg->field.field->offset,
+ arg->field.field->size);
+ break;
+ case PRINT_FLAGS:
+ case PRINT_SYMBOL:
+ case PRINT_INT_ARRAY:
+ case PRINT_HEX:
+ case PRINT_HEX_STR:
+ break;
+ case PRINT_TYPE:
+ val = eval_num_arg(data, size, event, arg->typecast.item);
+ return eval_type(val, arg, 0);
+ case PRINT_STRING:
+ case PRINT_BSTRING:
+ case PRINT_BITMASK:
+ return 0;
+ case PRINT_FUNC: {
+ struct trace_seq s;
+ trace_seq_init(&s);
+ val = process_defined_func(&s, data, size, event, arg);
+ trace_seq_destroy(&s);
+ return val;
+ }
+ case PRINT_OP:
+ if (strcmp(arg->op.op, "[") == 0) {
+ /*
+ * Arrays are special, since we don't want
+ * to read the arg as is.
+ */
+ right = eval_num_arg(data, size, event, arg->op.right);
+
+ /* handle typecasts */
+ larg = arg->op.left;
+ while (larg->type == PRINT_TYPE) {
+ if (!typearg)
+ typearg = larg;
+ larg = larg->typecast.item;
+ }
+
+ /* Default to long size */
+ field_size = pevent->long_size;
+
+ switch (larg->type) {
+ case PRINT_DYNAMIC_ARRAY:
+ offset = tep_read_number(pevent,
+ data + larg->dynarray.field->offset,
+ larg->dynarray.field->size);
+ if (larg->dynarray.field->elementsize)
+ field_size = larg->dynarray.field->elementsize;
+ /*
+ * The actual length of the dynamic array is stored
+ * in the top half of the field, and the offset
+ * is in the bottom half of the 32 bit field.
+ */
+ offset &= 0xffff;
+ offset += right;
+ break;
+ case PRINT_FIELD:
+ if (!larg->field.field) {
+ larg->field.field =
+ tep_find_any_field(event, larg->field.name);
+ if (!larg->field.field) {
+ arg = larg;
+ goto out_warning_field;
+ }
+ }
+ field_size = larg->field.field->elementsize;
+ offset = larg->field.field->offset +
+ right * larg->field.field->elementsize;
+ break;
+ default:
+ goto default_op; /* oops, all bets off */
+ }
+ val = tep_read_number(pevent,
+ data + offset, field_size);
+ if (typearg)
+ val = eval_type(val, typearg, 1);
+ break;
+ } else if (strcmp(arg->op.op, "?") == 0) {
+ left = eval_num_arg(data, size, event, arg->op.left);
+ arg = arg->op.right;
+ if (left)
+ val = eval_num_arg(data, size, event, arg->op.left);
+ else
+ val = eval_num_arg(data, size, event, arg->op.right);
+ break;
+ }
+ default_op:
+ left = eval_num_arg(data, size, event, arg->op.left);
+ right = eval_num_arg(data, size, event, arg->op.right);
+ switch (arg->op.op[0]) {
+ case '!':
+ switch (arg->op.op[1]) {
+ case 0:
+ val = !right;
+ break;
+ case '=':
+ val = left != right;
+ break;
+ default:
+ goto out_warning_op;
+ }
+ break;
+ case '~':
+ val = ~right;
+ break;
+ case '|':
+ if (arg->op.op[1])
+ val = left || right;
+ else
+ val = left | right;
+ break;
+ case '&':
+ if (arg->op.op[1])
+ val = left && right;
+ else
+ val = left & right;
+ break;
+ case '<':
+ switch (arg->op.op[1]) {
+ case 0:
+ val = left < right;
+ break;
+ case '<':
+ val = left << right;
+ break;
+ case '=':
+ val = left <= right;
+ break;
+ default:
+ goto out_warning_op;
+ }
+ break;
+ case '>':
+ switch (arg->op.op[1]) {
+ case 0:
+ val = left > right;
+ break;
+ case '>':
+ val = left >> right;
+ break;
+ case '=':
+ val = left >= right;
+ break;
+ default:
+ goto out_warning_op;
+ }
+ break;
+ case '=':
+ if (arg->op.op[1] != '=')
+ goto out_warning_op;
+
+ val = left == right;
+ break;
+ case '-':
+ val = left - right;
+ break;
+ case '+':
+ val = left + right;
+ break;
+ case '/':
+ val = left / right;
+ break;
+ case '%':
+ val = left % right;
+ break;
+ case '*':
+ val = left * right;
+ break;
+ default:
+ goto out_warning_op;
+ }
+ break;
+ case PRINT_DYNAMIC_ARRAY_LEN:
+ offset = tep_read_number(pevent,
+ data + arg->dynarray.field->offset,
+ arg->dynarray.field->size);
+ /*
+ * The total allocated length of the dynamic array is
+ * stored in the top half of the field, and the offset
+ * is in the bottom half of the 32 bit field.
+ */
+ val = (unsigned long long)(offset >> 16);
+ break;
+ case PRINT_DYNAMIC_ARRAY:
+ /* Without [], we pass the address to the dynamic data */
+ offset = tep_read_number(pevent,
+ data + arg->dynarray.field->offset,
+ arg->dynarray.field->size);
+ /*
+ * The total allocated length of the dynamic array is
+ * stored in the top half of the field, and the offset
+ * is in the bottom half of the 32 bit field.
+ */
+ offset &= 0xffff;
+ val = (unsigned long long)((unsigned long)data + offset);
+ break;
+ default: /* not sure what to do there */
+ return 0;
+ }
+ return val;
+
+out_warning_op:
+ do_warning_event(event, "%s: unknown op '%s'", __func__, arg->op.op);
+ return 0;
+
+out_warning_field:
+ do_warning_event(event, "%s: field %s not found",
+ __func__, arg->field.name);
+ return 0;
+}
+
+struct flag {
+ const char *name;
+ unsigned long long value;
+};
+
+static const struct flag flags[] = {
+ { "HI_SOFTIRQ", 0 },
+ { "TIMER_SOFTIRQ", 1 },
+ { "NET_TX_SOFTIRQ", 2 },
+ { "NET_RX_SOFTIRQ", 3 },
+ { "BLOCK_SOFTIRQ", 4 },
+ { "IRQ_POLL_SOFTIRQ", 5 },
+ { "TASKLET_SOFTIRQ", 6 },
+ { "SCHED_SOFTIRQ", 7 },
+ { "HRTIMER_SOFTIRQ", 8 },
+ { "RCU_SOFTIRQ", 9 },
+
+ { "HRTIMER_NORESTART", 0 },
+ { "HRTIMER_RESTART", 1 },
+};
+
+static long long eval_flag(const char *flag)
+{
+ int i;
+
+ /*
+ * Some flags in the format files do not get converted.
+ * If the flag is not numeric, see if it is something that
+ * we already know about.
+ */
+ if (isdigit(flag[0]))
+ return strtoull(flag, NULL, 0);
+
+ for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++)
+ if (strcmp(flags[i].name, flag) == 0)
+ return flags[i].value;
+
+ return -1LL;
+}
+
+static void print_str_to_seq(struct trace_seq *s, const char *format,
+ int len_arg, const char *str)
+{
+ if (len_arg >= 0)
+ trace_seq_printf(s, format, len_arg, str);
+ else
+ trace_seq_printf(s, format, str);
+}
+
+static void print_bitmask_to_seq(struct tep_handle *pevent,
+ struct trace_seq *s, const char *format,
+ int len_arg, const void *data, int size)
+{
+ int nr_bits = size * 8;
+ int str_size = (nr_bits + 3) / 4;
+ int len = 0;
+ char buf[3];
+ char *str;
+ int index;
+ int i;
+
+ /*
+ * The kernel likes to put in commas every 32 bits, we
+ * can do the same.
+ */
+ str_size += (nr_bits - 1) / 32;
+
+ str = malloc(str_size + 1);
+ if (!str) {
+ do_warning("%s: not enough memory!", __func__);
+ return;
+ }
+ str[str_size] = 0;
+
+ /* Start out with -2 for the two chars per byte */
+ for (i = str_size - 2; i >= 0; i -= 2) {
+ /*
+ * data points to a bit mask of size bytes.
+ * In the kernel, this is an array of long words, thus
+ * endianess is very important.
+ */
+ if (pevent->file_bigendian)
+ index = size - (len + 1);
+ else
+ index = len;
+
+ snprintf(buf, 3, "%02x", *((unsigned char *)data + index));
+ memcpy(str + i, buf, 2);
+ len++;
+ if (!(len & 3) && i > 0) {
+ i--;
+ str[i] = ',';
+ }
+ }
+
+ if (len_arg >= 0)
+ trace_seq_printf(s, format, len_arg, str);
+ else
+ trace_seq_printf(s, format, str);
+
+ free(str);
+}
+
+static void print_str_arg(struct trace_seq *s, void *data, int size,
+ struct event_format *event, const char *format,
+ int len_arg, struct print_arg *arg)
+{
+ struct tep_handle *pevent = event->pevent;
+ struct print_flag_sym *flag;
+ struct format_field *field;
+ struct printk_map *printk;
+ long long val, fval;
+ unsigned long long addr;
+ char *str;
+ unsigned char *hex;
+ int print;
+ int i, len;
+
+ switch (arg->type) {
+ case PRINT_NULL:
+ /* ?? */
+ return;
+ case PRINT_ATOM:
+ print_str_to_seq(s, format, len_arg, arg->atom.atom);
+ return;
+ case PRINT_FIELD:
+ field = arg->field.field;
+ if (!field) {
+ field = tep_find_any_field(event, arg->field.name);
+ if (!field) {
+ str = arg->field.name;
+ goto out_warning_field;
+ }
+ arg->field.field = field;
+ }
+ /* Zero sized fields, mean the rest of the data */
+ len = field->size ? : size - field->offset;
+
+ /*
+ * Some events pass in pointers. If this is not an array
+ * and the size is the same as long_size, assume that it
+ * is a pointer.
+ */
+ if (!(field->flags & FIELD_IS_ARRAY) &&
+ field->size == pevent->long_size) {
+
+ /* Handle heterogeneous recording and processing
+ * architectures
+ *
+ * CASE I:
+ * Traces recorded on 32-bit devices (32-bit
+ * addressing) and processed on 64-bit devices:
+ * In this case, only 32 bits should be read.
+ *
+ * CASE II:
+ * Traces recorded on 64 bit devices and processed
+ * on 32-bit devices:
+ * In this case, 64 bits must be read.
+ */
+ addr = (pevent->long_size == 8) ?
+ *(unsigned long long *)(data + field->offset) :
+ (unsigned long long)*(unsigned int *)(data + field->offset);
+
+ /* Check if it matches a print format */
+ printk = find_printk(pevent, addr);
+ if (printk)
+ trace_seq_puts(s, printk->printk);
+ else
+ trace_seq_printf(s, "%llx", addr);
+ break;
+ }
+ str = malloc(len + 1);
+ if (!str) {
+ do_warning_event(event, "%s: not enough memory!",
+ __func__);
+ return;
+ }
+ memcpy(str, data + field->offset, len);
+ str[len] = 0;
+ print_str_to_seq(s, format, len_arg, str);
+ free(str);
+ break;
+ case PRINT_FLAGS:
+ val = eval_num_arg(data, size, event, arg->flags.field);
+ print = 0;
+ for (flag = arg->flags.flags; flag; flag = flag->next) {
+ fval = eval_flag(flag->value);
+ if (!val && fval < 0) {
+ print_str_to_seq(s, format, len_arg, flag->str);
+ break;
+ }
+ if (fval > 0 && (val & fval) == fval) {
+ if (print && arg->flags.delim)
+ trace_seq_puts(s, arg->flags.delim);
+ print_str_to_seq(s, format, len_arg, flag->str);
+ print = 1;
+ val &= ~fval;
+ }
+ }
+ if (val) {
+ if (print && arg->flags.delim)
+ trace_seq_puts(s, arg->flags.delim);
+ trace_seq_printf(s, "0x%llx", val);
+ }
+ break;
+ case PRINT_SYMBOL:
+ val = eval_num_arg(data, size, event, arg->symbol.field);
+ for (flag = arg->symbol.symbols; flag; flag = flag->next) {
+ fval = eval_flag(flag->value);
+ if (val == fval) {
+ print_str_to_seq(s, format, len_arg, flag->str);
+ break;
+ }
+ }
+ if (!flag)
+ trace_seq_printf(s, "0x%llx", val);
+ break;
+ case PRINT_HEX:
+ case PRINT_HEX_STR:
+ if (arg->hex.field->type == PRINT_DYNAMIC_ARRAY) {
+ unsigned long offset;
+ offset = tep_read_number(pevent,
+ data + arg->hex.field->dynarray.field->offset,
+ arg->hex.field->dynarray.field->size);
+ hex = data + (offset & 0xffff);
+ } else {
+ field = arg->hex.field->field.field;
+ if (!field) {
+ str = arg->hex.field->field.name;
+ field = tep_find_any_field(event, str);
+ if (!field)
+ goto out_warning_field;
+ arg->hex.field->field.field = field;
+ }
+ hex = data + field->offset;
+ }
+ len = eval_num_arg(data, size, event, arg->hex.size);
+ for (i = 0; i < len; i++) {
+ if (i && arg->type == PRINT_HEX)
+ trace_seq_putc(s, ' ');
+ trace_seq_printf(s, "%02x", hex[i]);
+ }
+ break;
+
+ case PRINT_INT_ARRAY: {
+ void *num;
+ int el_size;
+
+ if (arg->int_array.field->type == PRINT_DYNAMIC_ARRAY) {
+ unsigned long offset;
+ struct format_field *field =
+ arg->int_array.field->dynarray.field;
+ offset = tep_read_number(pevent,
+ data + field->offset,
+ field->size);
+ num = data + (offset & 0xffff);
+ } else {
+ field = arg->int_array.field->field.field;
+ if (!field) {
+ str = arg->int_array.field->field.name;
+ field = tep_find_any_field(event, str);
+ if (!field)
+ goto out_warning_field;
+ arg->int_array.field->field.field = field;
+ }
+ num = data + field->offset;
+ }
+ len = eval_num_arg(data, size, event, arg->int_array.count);
+ el_size = eval_num_arg(data, size, event,
+ arg->int_array.el_size);
+ for (i = 0; i < len; i++) {
+ if (i)
+ trace_seq_putc(s, ' ');
+
+ if (el_size == 1) {
+ trace_seq_printf(s, "%u", *(uint8_t *)num);
+ } else if (el_size == 2) {
+ trace_seq_printf(s, "%u", *(uint16_t *)num);
+ } else if (el_size == 4) {
+ trace_seq_printf(s, "%u", *(uint32_t *)num);
+ } else if (el_size == 8) {
+ trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num);
+ } else {
+ trace_seq_printf(s, "BAD SIZE:%d 0x%x",
+ el_size, *(uint8_t *)num);
+ el_size = 1;
+ }
+
+ num += el_size;
+ }
+ break;
+ }
+ case PRINT_TYPE:
+ break;
+ case PRINT_STRING: {
+ int str_offset;
+
+ if (arg->string.offset == -1) {
+ struct format_field *f;
+
+ f = tep_find_any_field(event, arg->string.string);
+ arg->string.offset = f->offset;
+ }
+ str_offset = data2host4(pevent, data + arg->string.offset);
+ str_offset &= 0xffff;
+ print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
+ break;
+ }
+ case PRINT_BSTRING:
+ print_str_to_seq(s, format, len_arg, arg->string.string);
+ break;
+ case PRINT_BITMASK: {
+ int bitmask_offset;
+ int bitmask_size;
+
+ if (arg->bitmask.offset == -1) {
+ struct format_field *f;
+
+ f = tep_find_any_field(event, arg->bitmask.bitmask);
+ arg->bitmask.offset = f->offset;
+ }
+ bitmask_offset = data2host4(pevent, data + arg->bitmask.offset);
+ bitmask_size = bitmask_offset >> 16;
+ bitmask_offset &= 0xffff;
+ print_bitmask_to_seq(pevent, s, format, len_arg,
+ data + bitmask_offset, bitmask_size);
+ break;
+ }
+ case PRINT_OP:
+ /*
+ * The only op for string should be ? :
+ */
+ if (arg->op.op[0] != '?')
+ return;
+ val = eval_num_arg(data, size, event, arg->op.left);
+ if (val)
+ print_str_arg(s, data, size, event,
+ format, len_arg, arg->op.right->op.left);
+ else
+ print_str_arg(s, data, size, event,
+ format, len_arg, arg->op.right->op.right);
+ break;
+ case PRINT_FUNC:
+ process_defined_func(s, data, size, event, arg);
+ break;
+ default:
+ /* well... */
+ break;
+ }
+
+ return;
+
+out_warning_field:
+ do_warning_event(event, "%s: field %s not found",
+ __func__, arg->field.name);
+}
+
+static unsigned long long
+process_defined_func(struct trace_seq *s, void *data, int size,
+ struct event_format *event, struct print_arg *arg)
+{
+ struct tep_function_handler *func_handle = arg->func.func;
+ struct func_params *param;
+ unsigned long long *args;
+ unsigned long long ret;
+ struct print_arg *farg;
+ struct trace_seq str;
+ struct save_str {
+ struct save_str *next;
+ char *str;
+ } *strings = NULL, *string;
+ int i;
+
+ if (!func_handle->nr_args) {
+ ret = (*func_handle->func)(s, NULL);
+ goto out;
+ }
+
+ farg = arg->func.args;
+ param = func_handle->params;
+
+ ret = ULLONG_MAX;
+ args = malloc(sizeof(*args) * func_handle->nr_args);
+ if (!args)
+ goto out;
+
+ for (i = 0; i < func_handle->nr_args; i++) {
+ switch (param->type) {
+ case TEP_FUNC_ARG_INT:
+ case TEP_FUNC_ARG_LONG:
+ case TEP_FUNC_ARG_PTR:
+ args[i] = eval_num_arg(data, size, event, farg);
+ break;
+ case TEP_FUNC_ARG_STRING:
+ trace_seq_init(&str);
+ print_str_arg(&str, data, size, event, "%s", -1, farg);
+ trace_seq_terminate(&str);
+ string = malloc(sizeof(*string));
+ if (!string) {
+ do_warning_event(event, "%s(%d): malloc str",
+ __func__, __LINE__);
+ goto out_free;
+ }
+ string->next = strings;
+ string->str = strdup(str.buffer);
+ if (!string->str) {
+ free(string);
+ do_warning_event(event, "%s(%d): malloc str",
+ __func__, __LINE__);
+ goto out_free;
+ }
+ args[i] = (uintptr_t)string->str;
+ strings = string;
+ trace_seq_destroy(&str);
+ break;
+ default:
+ /*
+ * Something went totally wrong, this is not
+ * an input error, something in this code broke.
+ */
+ do_warning_event(event, "Unexpected end of arguments\n");
+ goto out_free;
+ }
+ farg = farg->next;
+ param = param->next;
+ }
+
+ ret = (*func_handle->func)(s, args);
+out_free:
+ free(args);
+ while (strings) {
+ string = strings;
+ strings = string->next;
+ free(string->str);
+ free(string);
+ }
+
+ out:
+ /* TBD : handle return type here */
+ return ret;
+}
+
+static void free_args(struct print_arg *args)
+{
+ struct print_arg *next;
+
+ while (args) {
+ next = args->next;
+
+ free_arg(args);
+ args = next;
+ }
+}
+
+static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struct event_format *event)
+{
+ struct tep_handle *pevent = event->pevent;
+ struct format_field *field, *ip_field;
+ struct print_arg *args, *arg, **next;
+ unsigned long long ip, val;
+ char *ptr;
+ void *bptr;
+ int vsize;
+
+ field = pevent->bprint_buf_field;
+ ip_field = pevent->bprint_ip_field;
+
+ if (!field) {
+ field = tep_find_field(event, "buf");
+ if (!field) {
+ do_warning_event(event, "can't find buffer field for binary printk");
+ return NULL;
+ }
+ ip_field = tep_find_field(event, "ip");
+ if (!ip_field) {
+ do_warning_event(event, "can't find ip field for binary printk");
+ return NULL;
+ }
+ pevent->bprint_buf_field = field;
+ pevent->bprint_ip_field = ip_field;
+ }
+
+ ip = tep_read_number(pevent, data + ip_field->offset, ip_field->size);
+
+ /*
+ * The first arg is the IP pointer.
+ */
+ args = alloc_arg();
+ if (!args) {
+ do_warning_event(event, "%s(%d): not enough memory!",
+ __func__, __LINE__);
+ return NULL;
+ }
+ arg = args;
+ arg->next = NULL;
+ next = &arg->next;
+
+ arg->type = PRINT_ATOM;
+
+ if (asprintf(&arg->atom.atom, "%lld", ip) < 0)
+ goto out_free;
+
+ /* skip the first "%ps: " */
+ for (ptr = fmt + 5, bptr = data + field->offset;
+ bptr < data + size && *ptr; ptr++) {
+ int ls = 0;
+
+ if (*ptr == '%') {
+ process_again:
+ ptr++;
+ switch (*ptr) {
+ case '%':
+ break;
+ case 'l':
+ ls++;
+ goto process_again;
+ case 'L':
+ ls = 2;
+ goto process_again;
+ case '0' ... '9':
+ goto process_again;
+ case '.':
+ goto process_again;
+ case 'z':
+ case 'Z':
+ ls = 1;
+ goto process_again;
+ case 'p':
+ ls = 1;
+ if (isalnum(ptr[1])) {
+ ptr++;
+ /* Check for special pointers */
+ switch (*ptr) {
+ case 's':
+ case 'S':
+ case 'f':
+ case 'F':
+ break;
+ default:
+ /*
+ * Older kernels do not process
+ * dereferenced pointers.
+ * Only process if the pointer
+ * value is a printable.
+ */
+ if (isprint(*(char *)bptr))
+ goto process_string;
+ }
+ }
+ /* fall through */
+ case 'd':
+ case 'u':
+ case 'x':
+ case 'i':
+ switch (ls) {
+ case 0:
+ vsize = 4;
+ break;
+ case 1:
+ vsize = pevent->long_size;
+ break;
+ case 2:
+ vsize = 8;
+ break;
+ default:
+ vsize = ls; /* ? */
+ break;
+ }
+ /* fall through */
+ case '*':
+ if (*ptr == '*')
+ vsize = 4;
+
+ /* the pointers are always 4 bytes aligned */
+ bptr = (void *)(((unsigned long)bptr + 3) &
+ ~3);
+ val = tep_read_number(pevent, bptr, vsize);
+ bptr += vsize;
+ arg = alloc_arg();
+ if (!arg) {
+ do_warning_event(event, "%s(%d): not enough memory!",
+ __func__, __LINE__);
+ goto out_free;
+ }
+ arg->next = NULL;
+ arg->type = PRINT_ATOM;
+ if (asprintf(&arg->atom.atom, "%lld", val) < 0) {
+ free(arg);
+ goto out_free;
+ }
+ *next = arg;
+ next = &arg->next;
+ /*
+ * The '*' case means that an arg is used as the length.
+ * We need to continue to figure out for what.
+ */
+ if (*ptr == '*')
+ goto process_again;
+
+ break;
+ case 's':
+ process_string:
+ arg = alloc_arg();
+ if (!arg) {
+ do_warning_event(event, "%s(%d): not enough memory!",
+ __func__, __LINE__);
+ goto out_free;
+ }
+ arg->next = NULL;
+ arg->type = PRINT_BSTRING;
+ arg->string.string = strdup(bptr);
+ if (!arg->string.string)
+ goto out_free;
+ bptr += strlen(bptr) + 1;
+ *next = arg;
+ next = &arg->next;
+ default:
+ break;
+ }
+ }
+ }
+
+ return args;
+
+out_free:
+ free_args(args);
+ return NULL;
+}
+
+static char *
+get_bprint_format(void *data, int size __maybe_unused,
+ struct event_format *event)
+{
+ struct tep_handle *pevent = event->pevent;
+ unsigned long long addr;
+ struct format_field *field;
+ struct printk_map *printk;
+ char *format;
+
+ field = pevent->bprint_fmt_field;
+
+ if (!field) {
+ field = tep_find_field(event, "fmt");
+ if (!field) {
+ do_warning_event(event, "can't find format field for binary printk");
+ return NULL;
+ }
+ pevent->bprint_fmt_field = field;
+ }
+
+ addr = tep_read_number(pevent, data + field->offset, field->size);
+
+ printk = find_printk(pevent, addr);
+ if (!printk) {
+ if (asprintf(&format, "%%pf: (NO FORMAT FOUND at %llx)\n", addr) < 0)
+ return NULL;
+ return format;
+ }
+
+ if (asprintf(&format, "%s: %s", "%pf", printk->printk) < 0)
+ return NULL;
+
+ return format;
+}
+
+static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size,
+ struct event_format *event, struct print_arg *arg)
+{
+ unsigned char *buf;
+ const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
+
+ if (arg->type == PRINT_FUNC) {
+ process_defined_func(s, data, size, event, arg);
+ return;
+ }
+
+ if (arg->type != PRINT_FIELD) {
+ trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d",
+ arg->type);
+ return;
+ }
+
+ if (mac == 'm')
+ fmt = "%.2x%.2x%.2x%.2x%.2x%.2x";
+ if (!arg->field.field) {
+ arg->field.field =
+ tep_find_any_field(event, arg->field.name);
+ if (!arg->field.field) {
+ do_warning_event(event, "%s: field %s not found",
+ __func__, arg->field.name);
+ return;
+ }
+ }
+ if (arg->field.field->size != 6) {
+ trace_seq_printf(s, "INVALIDMAC");
+ return;
+ }
+ buf = data + arg->field.field->offset;
+ trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
+}
+
+static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf)
+{
+ const char *fmt;
+
+ if (i == 'i')
+ fmt = "%03d.%03d.%03d.%03d";
+ else
+ fmt = "%d.%d.%d.%d";
+
+ trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]);
+}
+
+static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
+{
+ return ((unsigned long)(a->s6_addr32[0] | a->s6_addr32[1]) |
+ (unsigned long)(a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL;
+}
+
+static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr)
+{
+ return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE);
+}
+
+static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr)
+{
+ int i, j, range;
+ unsigned char zerolength[8];
+ int longest = 1;
+ int colonpos = -1;
+ uint16_t word;
+ uint8_t hi, lo;
+ bool needcolon = false;
+ bool useIPv4;
+ struct in6_addr in6;
+
+ memcpy(&in6, addr, sizeof(struct in6_addr));
+
+ useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6);
+
+ memset(zerolength, 0, sizeof(zerolength));
+
+ if (useIPv4)
+ range = 6;
+ else
+ range = 8;
+
+ /* find position of longest 0 run */
+ for (i = 0; i < range; i++) {
+ for (j = i; j < range; j++) {
+ if (in6.s6_addr16[j] != 0)
+ break;
+ zerolength[i]++;
+ }
+ }
+ for (i = 0; i < range; i++) {
+ if (zerolength[i] > longest) {
+ longest = zerolength[i];
+ colonpos = i;
+ }
+ }
+ if (longest == 1) /* don't compress a single 0 */
+ colonpos = -1;
+
+ /* emit address */
+ for (i = 0; i < range; i++) {
+ if (i == colonpos) {
+ if (needcolon || i == 0)
+ trace_seq_printf(s, ":");
+ trace_seq_printf(s, ":");
+ needcolon = false;
+ i += longest - 1;
+ continue;
+ }
+ if (needcolon) {
+ trace_seq_printf(s, ":");
+ needcolon = false;
+ }
+ /* hex u16 without leading 0s */
+ word = ntohs(in6.s6_addr16[i]);
+ hi = word >> 8;
+ lo = word & 0xff;
+ if (hi)
+ trace_seq_printf(s, "%x%02x", hi, lo);
+ else
+ trace_seq_printf(s, "%x", lo);
+
+ needcolon = true;
+ }
+
+ if (useIPv4) {
+ if (needcolon)
+ trace_seq_printf(s, ":");
+ print_ip4_addr(s, 'I', &in6.s6_addr[12]);
+ }
+
+ return;
+}
+
+static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf)
+{
+ int j;
+
+ for (j = 0; j < 16; j += 2) {
+ trace_seq_printf(s, "%02x%02x", buf[j], buf[j+1]);
+ if (i == 'I' && j < 14)
+ trace_seq_printf(s, ":");
+ }
+}
+
+/*
+ * %pi4 print an IPv4 address with leading zeros
+ * %pI4 print an IPv4 address without leading zeros
+ * %pi6 print an IPv6 address without colons
+ * %pI6 print an IPv6 address with colons
+ * %pI6c print an IPv6 address in compressed form with colons
+ * %pISpc print an IP address based on sockaddr; p adds port.
+ */
+static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
+ void *data, int size, struct event_format *event,
+ struct print_arg *arg)
+{
+ unsigned char *buf;
+
+ if (arg->type == PRINT_FUNC) {
+ process_defined_func(s, data, size, event, arg);
+ return 0;
+ }
+
+ if (arg->type != PRINT_FIELD) {
+ trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+ return 0;
+ }
+
+ if (!arg->field.field) {
+ arg->field.field =
+ tep_find_any_field(event, arg->field.name);
+ if (!arg->field.field) {
+ do_warning("%s: field %s not found",
+ __func__, arg->field.name);
+ return 0;
+ }
+ }
+
+ buf = data + arg->field.field->offset;
+
+ if (arg->field.field->size != 4) {
+ trace_seq_printf(s, "INVALIDIPv4");
+ return 0;
+ }
+ print_ip4_addr(s, i, buf);
+
+ return 0;
+}
+
+static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
+ void *data, int size, struct event_format *event,
+ struct print_arg *arg)
+{
+ char have_c = 0;
+ unsigned char *buf;
+ int rc = 0;
+
+ /* pI6c */
+ if (i == 'I' && *ptr == 'c') {
+ have_c = 1;
+ ptr++;
+ rc++;
+ }
+
+ if (arg->type == PRINT_FUNC) {
+ process_defined_func(s, data, size, event, arg);
+ return rc;
+ }
+
+ if (arg->type != PRINT_FIELD) {
+ trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+ return rc;
+ }
+
+ if (!arg->field.field) {
+ arg->field.field =
+ tep_find_any_field(event, arg->field.name);
+ if (!arg->field.field) {
+ do_warning("%s: field %s not found",
+ __func__, arg->field.name);
+ return rc;
+ }
+ }
+
+ buf = data + arg->field.field->offset;
+
+ if (arg->field.field->size != 16) {
+ trace_seq_printf(s, "INVALIDIPv6");
+ return rc;
+ }
+
+ if (have_c)
+ print_ip6c_addr(s, buf);
+ else
+ print_ip6_addr(s, i, buf);
+
+ return rc;
+}
+
+static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
+ void *data, int size, struct event_format *event,
+ struct print_arg *arg)
+{
+ char have_c = 0, have_p = 0;
+ unsigned char *buf;
+ struct sockaddr_storage *sa;
+ int rc = 0;
+
+ /* pISpc */
+ if (i == 'I') {
+ if (*ptr == 'p') {
+ have_p = 1;
+ ptr++;
+ rc++;
+ }
+ if (*ptr == 'c') {
+ have_c = 1;
+ ptr++;
+ rc++;
+ }
+ }
+
+ if (arg->type == PRINT_FUNC) {
+ process_defined_func(s, data, size, event, arg);
+ return rc;
+ }
+
+ if (arg->type != PRINT_FIELD) {
+ trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+ return rc;
+ }
+
+ if (!arg->field.field) {
+ arg->field.field =
+ tep_find_any_field(event, arg->field.name);
+ if (!arg->field.field) {
+ do_warning("%s: field %s not found",
+ __func__, arg->field.name);
+ return rc;
+ }
+ }
+
+ sa = (struct sockaddr_storage *) (data + arg->field.field->offset);
+
+ if (sa->ss_family == AF_INET) {
+ struct sockaddr_in *sa4 = (struct sockaddr_in *) sa;
+
+ if (arg->field.field->size < sizeof(struct sockaddr_in)) {
+ trace_seq_printf(s, "INVALIDIPv4");
+ return rc;
+ }
+
+ print_ip4_addr(s, i, (unsigned char *) &sa4->sin_addr);
+ if (have_p)
+ trace_seq_printf(s, ":%d", ntohs(sa4->sin_port));
+
+
+ } else if (sa->ss_family == AF_INET6) {
+ struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) sa;
+
+ if (arg->field.field->size < sizeof(struct sockaddr_in6)) {
+ trace_seq_printf(s, "INVALIDIPv6");
+ return rc;
+ }
+
+ if (have_p)
+ trace_seq_printf(s, "[");
+
+ buf = (unsigned char *) &sa6->sin6_addr;
+ if (have_c)
+ print_ip6c_addr(s, buf);
+ else
+ print_ip6_addr(s, i, buf);
+
+ if (have_p)
+ trace_seq_printf(s, "]:%d", ntohs(sa6->sin6_port));
+ }
+
+ return rc;
+}
+
+static int print_ip_arg(struct trace_seq *s, const char *ptr,
+ void *data, int size, struct event_format *event,
+ struct print_arg *arg)
+{
+ char i = *ptr; /* 'i' or 'I' */
+ char ver;
+ int rc = 0;
+
+ ptr++;
+ rc++;
+
+ ver = *ptr;
+ ptr++;
+ rc++;
+
+ switch (ver) {
+ case '4':
+ rc += print_ipv4_arg(s, ptr, i, data, size, event, arg);
+ break;
+ case '6':
+ rc += print_ipv6_arg(s, ptr, i, data, size, event, arg);
+ break;
+ case 'S':
+ rc += print_ipsa_arg(s, ptr, i, data, size, event, arg);
+ break;
+ default:
+ return 0;
+ }
+
+ return rc;
+}
+
+static int is_printable_array(char *p, unsigned int len)
+{
+ unsigned int i;
+
+ for (i = 0; i < len && p[i]; i++)
+ if (!isprint(p[i]) && !isspace(p[i]))
+ return 0;
+ return 1;
+}
+
+void tep_print_field(struct trace_seq *s, void *data,
+ struct format_field *field)
+{
+ unsigned long long val;
+ unsigned int offset, len, i;
+ struct tep_handle *pevent = field->event->pevent;
+
+ if (field->flags & FIELD_IS_ARRAY) {
+ offset = field->offset;
+ len = field->size;
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ val = tep_read_number(pevent, data + offset, len);
+ offset = val;
+ len = offset >> 16;
+ offset &= 0xffff;
+ }
+ if (field->flags & FIELD_IS_STRING &&
+ is_printable_array(data + offset, len)) {
+ trace_seq_printf(s, "%s", (char *)data + offset);
+ } else {
+ trace_seq_puts(s, "ARRAY[");
+ for (i = 0; i < len; i++) {
+ if (i)
+ trace_seq_puts(s, ", ");
+ trace_seq_printf(s, "%02x",
+ *((unsigned char *)data + offset + i));
+ }
+ trace_seq_putc(s, ']');
+ field->flags &= ~FIELD_IS_STRING;
+ }
+ } else {
+ val = tep_read_number(pevent, data + field->offset,
+ field->size);
+ if (field->flags & FIELD_IS_POINTER) {
+ trace_seq_printf(s, "0x%llx", val);
+ } else if (field->flags & FIELD_IS_SIGNED) {
+ switch (field->size) {
+ case 4:
+ /*
+ * If field is long then print it in hex.
+ * A long usually stores pointers.
+ */
+ if (field->flags & FIELD_IS_LONG)
+ trace_seq_printf(s, "0x%x", (int)val);
+ else
+ trace_seq_printf(s, "%d", (int)val);
+ break;
+ case 2:
+ trace_seq_printf(s, "%2d", (short)val);
+ break;
+ case 1:
+ trace_seq_printf(s, "%1d", (char)val);
+ break;
+ default:
+ trace_seq_printf(s, "%lld", val);
+ }
+ } else {
+ if (field->flags & FIELD_IS_LONG)
+ trace_seq_printf(s, "0x%llx", val);
+ else
+ trace_seq_printf(s, "%llu", val);
+ }
+ }
+}
+
+void tep_print_fields(struct trace_seq *s, void *data,
+ int size __maybe_unused, struct event_format *event)
+{
+ struct format_field *field;
+
+ field = event->format.fields;
+ while (field) {
+ trace_seq_printf(s, " %s=", field->name);
+ tep_print_field(s, data, field);
+ field = field->next;
+ }
+}
+
+static void pretty_print(struct trace_seq *s, void *data, int size, struct event_format *event)
+{
+ struct tep_handle *pevent = event->pevent;
+ struct print_fmt *print_fmt = &event->print_fmt;
+ struct print_arg *arg = print_fmt->args;
+ struct print_arg *args = NULL;
+ const char *ptr = print_fmt->format;
+ unsigned long long val;
+ struct func_map *func;
+ const char *saveptr;
+ struct trace_seq p;
+ char *bprint_fmt = NULL;
+ char format[32];
+ int show_func;
+ int len_as_arg;
+ int len_arg;
+ int len;
+ int ls;
+
+ if (event->flags & EVENT_FL_FAILED) {
+ trace_seq_printf(s, "[FAILED TO PARSE]");
+ tep_print_fields(s, data, size, event);
+ return;
+ }
+
+ if (event->flags & EVENT_FL_ISBPRINT) {
+ bprint_fmt = get_bprint_format(data, size, event);
+ args = make_bprint_args(bprint_fmt, data, size, event);
+ arg = args;
+ ptr = bprint_fmt;
+ }
+
+ for (; *ptr; ptr++) {
+ ls = 0;
+ if (*ptr == '\\') {
+ ptr++;
+ switch (*ptr) {
+ case 'n':
+ trace_seq_putc(s, '\n');
+ break;
+ case 't':
+ trace_seq_putc(s, '\t');
+ break;
+ case 'r':
+ trace_seq_putc(s, '\r');
+ break;
+ case '\\':
+ trace_seq_putc(s, '\\');
+ break;
+ default:
+ trace_seq_putc(s, *ptr);
+ break;
+ }
+
+ } else if (*ptr == '%') {
+ saveptr = ptr;
+ show_func = 0;
+ len_as_arg = 0;
+ cont_process:
+ ptr++;
+ switch (*ptr) {
+ case '%':
+ trace_seq_putc(s, '%');
+ break;
+ case '#':
+ /* FIXME: need to handle properly */
+ goto cont_process;
+ case 'h':
+ ls--;
+ goto cont_process;
+ case 'l':
+ ls++;
+ goto cont_process;
+ case 'L':
+ ls = 2;
+ goto cont_process;
+ case '*':
+ /* The argument is the length. */
+ if (!arg) {
+ do_warning_event(event, "no argument match");
+ event->flags |= EVENT_FL_FAILED;
+ goto out_failed;
+ }
+ len_arg = eval_num_arg(data, size, event, arg);
+ len_as_arg = 1;
+ arg = arg->next;
+ goto cont_process;
+ case '.':
+ case 'z':
+ case 'Z':
+ case '0' ... '9':
+ case '-':
+ goto cont_process;
+ case 'p':
+ if (pevent->long_size == 4)
+ ls = 1;
+ else
+ ls = 2;
+
+ if (isalnum(ptr[1]))
+ ptr++;
+
+ if (arg->type == PRINT_BSTRING) {
+ trace_seq_puts(s, arg->string.string);
+ arg = arg->next;
+ break;
+ }
+
+ if (*ptr == 'F' || *ptr == 'f' ||
+ *ptr == 'S' || *ptr == 's') {
+ show_func = *ptr;
+ } else if (*ptr == 'M' || *ptr == 'm') {
+ print_mac_arg(s, *ptr, data, size, event, arg);
+ arg = arg->next;
+ break;
+ } else if (*ptr == 'I' || *ptr == 'i') {
+ int n;
+
+ n = print_ip_arg(s, ptr, data, size, event, arg);
+ if (n > 0) {
+ ptr += n - 1;
+ arg = arg->next;
+ break;
+ }
+ }
+
+ /* fall through */
+ case 'd':
+ case 'i':
+ case 'x':
+ case 'X':
+ case 'u':
+ if (!arg) {
+ do_warning_event(event, "no argument match");
+ event->flags |= EVENT_FL_FAILED;
+ goto out_failed;
+ }
+
+ len = ((unsigned long)ptr + 1) -
+ (unsigned long)saveptr;
+
+ /* should never happen */
+ if (len > 31) {
+ do_warning_event(event, "bad format!");
+ event->flags |= EVENT_FL_FAILED;
+ len = 31;
+ }
+
+ memcpy(format, saveptr, len);
+ format[len] = 0;
+
+ val = eval_num_arg(data, size, event, arg);
+ arg = arg->next;
+
+ if (show_func) {
+ func = find_func(pevent, val);
+ if (func) {
+ trace_seq_puts(s, func->func);
+ if (show_func == 'F')
+ trace_seq_printf(s,
+ "+0x%llx",
+ val - func->addr);
+ break;
+ }
+ }
+ if (pevent->long_size == 8 && ls == 1 &&
+ sizeof(long) != 8) {
+ char *p;
+
+ /* make %l into %ll */
+ if (ls == 1 && (p = strchr(format, 'l')))
+ memmove(p+1, p, strlen(p)+1);
+ else if (strcmp(format, "%p") == 0)
+ strcpy(format, "0x%llx");
+ ls = 2;
+ }
+ switch (ls) {
+ case -2:
+ if (len_as_arg)
+ trace_seq_printf(s, format, len_arg, (char)val);
+ else
+ trace_seq_printf(s, format, (char)val);
+ break;
+ case -1:
+ if (len_as_arg)
+ trace_seq_printf(s, format, len_arg, (short)val);
+ else
+ trace_seq_printf(s, format, (short)val);
+ break;
+ case 0:
+ if (len_as_arg)
+ trace_seq_printf(s, format, len_arg, (int)val);
+ else
+ trace_seq_printf(s, format, (int)val);
+ break;
+ case 1:
+ if (len_as_arg)
+ trace_seq_printf(s, format, len_arg, (long)val);
+ else
+ trace_seq_printf(s, format, (long)val);
+ break;
+ case 2:
+ if (len_as_arg)
+ trace_seq_printf(s, format, len_arg,
+ (long long)val);
+ else
+ trace_seq_printf(s, format, (long long)val);
+ break;
+ default:
+ do_warning_event(event, "bad count (%d)", ls);
+ event->flags |= EVENT_FL_FAILED;
+ }
+ break;
+ case 's':
+ if (!arg) {
+ do_warning_event(event, "no matching argument");
+ event->flags |= EVENT_FL_FAILED;
+ goto out_failed;
+ }
+
+ len = ((unsigned long)ptr + 1) -
+ (unsigned long)saveptr;
+
+ /* should never happen */
+ if (len > 31) {
+ do_warning_event(event, "bad format!");
+ event->flags |= EVENT_FL_FAILED;
+ len = 31;
+ }
+
+ memcpy(format, saveptr, len);
+ format[len] = 0;
+ if (!len_as_arg)
+ len_arg = -1;
+ /* Use helper trace_seq */
+ trace_seq_init(&p);
+ print_str_arg(&p, data, size, event,
+ format, len_arg, arg);
+ trace_seq_terminate(&p);
+ trace_seq_puts(s, p.buffer);
+ trace_seq_destroy(&p);
+ arg = arg->next;
+ break;
+ default:
+ trace_seq_printf(s, ">%c<", *ptr);
+
+ }
+ } else
+ trace_seq_putc(s, *ptr);
+ }
+
+ if (event->flags & EVENT_FL_FAILED) {
+out_failed:
+ trace_seq_printf(s, "[FAILED TO PARSE]");
+ }
+
+ if (args) {
+ free_args(args);
+ free(bprint_fmt);
+ }
+}
+
+/**
+ * tep_data_lat_fmt - parse the data for the latency format
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @record: the record to read from
+ *
+ * This parses out the Latency format (interrupts disabled,
+ * need rescheduling, in hard/soft interrupt, preempt count
+ * and lock depth) and places it into the trace_seq.
+ */
+void tep_data_lat_fmt(struct tep_handle *pevent,
+ struct trace_seq *s, struct tep_record *record)
+{
+ static int check_lock_depth = 1;
+ static int check_migrate_disable = 1;
+ static int lock_depth_exists;
+ static int migrate_disable_exists;
+ unsigned int lat_flags;
+ unsigned int pc;
+ int lock_depth;
+ int migrate_disable;
+ int hardirq;
+ int softirq;
+ void *data = record->data;
+
+ lat_flags = parse_common_flags(pevent, data);
+ pc = parse_common_pc(pevent, data);
+ /* lock_depth may not always exist */
+ if (lock_depth_exists)
+ lock_depth = parse_common_lock_depth(pevent, data);
+ else if (check_lock_depth) {
+ lock_depth = parse_common_lock_depth(pevent, data);
+ if (lock_depth < 0)
+ check_lock_depth = 0;
+ else
+ lock_depth_exists = 1;
+ }
+
+ /* migrate_disable may not always exist */
+ if (migrate_disable_exists)
+ migrate_disable = parse_common_migrate_disable(pevent, data);
+ else if (check_migrate_disable) {
+ migrate_disable = parse_common_migrate_disable(pevent, data);
+ if (migrate_disable < 0)
+ check_migrate_disable = 0;
+ else
+ migrate_disable_exists = 1;
+ }
+
+ hardirq = lat_flags & TRACE_FLAG_HARDIRQ;
+ softirq = lat_flags & TRACE_FLAG_SOFTIRQ;
+
+ trace_seq_printf(s, "%c%c%c",
+ (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
+ (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
+ 'X' : '.',
+ (lat_flags & TRACE_FLAG_NEED_RESCHED) ?
+ 'N' : '.',
+ (hardirq && softirq) ? 'H' :
+ hardirq ? 'h' : softirq ? 's' : '.');
+
+ if (pc)
+ trace_seq_printf(s, "%x", pc);
+ else
+ trace_seq_putc(s, '.');
+
+ if (migrate_disable_exists) {
+ if (migrate_disable < 0)
+ trace_seq_putc(s, '.');
+ else
+ trace_seq_printf(s, "%d", migrate_disable);
+ }
+
+ if (lock_depth_exists) {
+ if (lock_depth < 0)
+ trace_seq_putc(s, '.');
+ else
+ trace_seq_printf(s, "%d", lock_depth);
+ }
+
+ trace_seq_terminate(s);
+}
+
+/**
+ * tep_data_type - parse out the given event type
+ * @pevent: a handle to the pevent
+ * @rec: the record to read from
+ *
+ * This returns the event id from the @rec.
+ */
+int tep_data_type(struct tep_handle *pevent, struct tep_record *rec)
+{
+ return trace_parse_common_type(pevent, rec->data);
+}
+
+/**
+ * tep_data_event_from_type - find the event by a given type
+ * @pevent: a handle to the pevent
+ * @type: the type of the event.
+ *
+ * This returns the event form a given @type;
+ */
+struct event_format *tep_data_event_from_type(struct tep_handle *pevent, int type)
+{
+ return tep_find_event(pevent, type);
+}
+
+/**
+ * tep_data_pid - parse the PID from record
+ * @pevent: a handle to the pevent
+ * @rec: the record to parse
+ *
+ * This returns the PID from a record.
+ */
+int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec)
+{
+ return parse_common_pid(pevent, rec->data);
+}
+
+/**
+ * tep_data_preempt_count - parse the preempt count from the record
+ * @pevent: a handle to the pevent
+ * @rec: the record to parse
+ *
+ * This returns the preempt count from a record.
+ */
+int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec)
+{
+ return parse_common_pc(pevent, rec->data);
+}
+
+/**
+ * tep_data_flags - parse the latency flags from the record
+ * @pevent: a handle to the pevent
+ * @rec: the record to parse
+ *
+ * This returns the latency flags from a record.
+ *
+ * Use trace_flag_type enum for the flags (see event-parse.h).
+ */
+int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec)
+{
+ return parse_common_flags(pevent, rec->data);
+}
+
+/**
+ * tep_data_comm_from_pid - return the command line from PID
+ * @pevent: a handle to the pevent
+ * @pid: the PID of the task to search for
+ *
+ * This returns a pointer to the command line that has the given
+ * @pid.
+ */
+const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid)
+{
+ const char *comm;
+
+ comm = find_cmdline(pevent, pid);
+ return comm;
+}
+
+static struct cmdline *
+pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct cmdline *next)
+{
+ struct cmdline_list *cmdlist = (struct cmdline_list *)next;
+
+ if (cmdlist)
+ cmdlist = cmdlist->next;
+ else
+ cmdlist = pevent->cmdlist;
+
+ while (cmdlist && strcmp(cmdlist->comm, comm) != 0)
+ cmdlist = cmdlist->next;
+
+ return (struct cmdline *)cmdlist;
+}
+
+/**
+ * tep_data_pid_from_comm - return the pid from a given comm
+ * @pevent: a handle to the pevent
+ * @comm: the cmdline to find the pid from
+ * @next: the cmdline structure to find the next comm
+ *
+ * This returns the cmdline structure that holds a pid for a given
+ * comm, or NULL if none found. As there may be more than one pid for
+ * a given comm, the result of this call can be passed back into
+ * a recurring call in the @next paramater, and then it will find the
+ * next pid.
+ * Also, it does a linear seach, so it may be slow.
+ */
+struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm,
+ struct cmdline *next)
+{
+ struct cmdline *cmdline;
+
+ /*
+ * If the cmdlines have not been converted yet, then use
+ * the list.
+ */
+ if (!pevent->cmdlines)
+ return pid_from_cmdlist(pevent, comm, next);
+
+ if (next) {
+ /*
+ * The next pointer could have been still from
+ * a previous call before cmdlines were created
+ */
+ if (next < pevent->cmdlines ||
+ next >= pevent->cmdlines + pevent->cmdline_count)
+ next = NULL;
+ else
+ cmdline = next++;
+ }
+
+ if (!next)
+ cmdline = pevent->cmdlines;
+
+ while (cmdline < pevent->cmdlines + pevent->cmdline_count) {
+ if (strcmp(cmdline->comm, comm) == 0)
+ return cmdline;
+ cmdline++;
+ }
+ return NULL;
+}
+
+/**
+ * tep_cmdline_pid - return the pid associated to a given cmdline
+ * @cmdline: The cmdline structure to get the pid from
+ *
+ * Returns the pid for a give cmdline. If @cmdline is NULL, then
+ * -1 is returned.
+ */
+int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline)
+{
+ struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline;
+
+ if (!cmdline)
+ return -1;
+
+ /*
+ * If cmdlines have not been created yet, or cmdline is
+ * not part of the array, then treat it as a cmdlist instead.
+ */
+ if (!pevent->cmdlines ||
+ cmdline < pevent->cmdlines ||
+ cmdline >= pevent->cmdlines + pevent->cmdline_count)
+ return cmdlist->pid;
+
+ return cmdline->pid;
+}
+
+/**
+ * tep_event_info - parse the data into the print format
+ * @s: the trace_seq to write to
+ * @event: the handle to the event
+ * @record: the record to read from
+ *
+ * This parses the raw @data using the given @event information and
+ * writes the print format into the trace_seq.
+ */
+void tep_event_info(struct trace_seq *s, struct event_format *event,
+ struct tep_record *record)
+{
+ int print_pretty = 1;
+
+ if (event->pevent->print_raw || (event->flags & EVENT_FL_PRINTRAW))
+ tep_print_fields(s, record->data, record->size, event);
+ else {
+
+ if (event->handler && !(event->flags & EVENT_FL_NOHANDLE))
+ print_pretty = event->handler(s, record, event,
+ event->context);
+
+ if (print_pretty)
+ pretty_print(s, record->data, record->size, event);
+ }
+
+ trace_seq_terminate(s);
+}
+
+static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock)
+{
+ if (!use_trace_clock)
+ return true;
+
+ if (!strcmp(trace_clock, "local") || !strcmp(trace_clock, "global")
+ || !strcmp(trace_clock, "uptime") || !strcmp(trace_clock, "perf"))
+ return true;
+
+ /* trace_clock is setting in tsc or counter mode */
+ return false;
+}
+
+/**
+ * tep_find_event_by_record - return the event from a given record
+ * @pevent: a handle to the pevent
+ * @record: The record to get the event from
+ *
+ * Returns the associated event for a given record, or NULL if non is
+ * is found.
+ */
+struct event_format *
+tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record)
+{
+ int type;
+
+ if (record->size < 0) {
+ do_warning("ug! negative record size %d", record->size);
+ return NULL;
+ }
+
+ type = trace_parse_common_type(pevent, record->data);
+
+ return tep_find_event(pevent, type);
+}
+
+/**
+ * tep_print_event_task - Write the event task comm, pid and CPU
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the tasks comm, pid and CPU to @s.
+ */
+void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
+ struct event_format *event,
+ struct tep_record *record)
+{
+ void *data = record->data;
+ const char *comm;
+ int pid;
+
+ pid = parse_common_pid(pevent, data);
+ comm = find_cmdline(pevent, pid);
+
+ if (pevent->latency_format) {
+ trace_seq_printf(s, "%8.8s-%-5d %3d",
+ comm, pid, record->cpu);
+ } else
+ trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
+}
+
+/**
+ * tep_print_event_time - Write the event timestamp
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ * @use_trace_clock: Set to parse according to the @pevent->trace_clock
+ *
+ * Writes the timestamp of the record into @s.
+ */
+void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
+ struct event_format *event,
+ struct tep_record *record,
+ bool use_trace_clock)
+{
+ unsigned long secs;
+ unsigned long usecs;
+ unsigned long nsecs;
+ int p;
+ bool use_usec_format;
+
+ use_usec_format = is_timestamp_in_us(pevent->trace_clock,
+ use_trace_clock);
+ if (use_usec_format) {
+ secs = record->ts / NSEC_PER_SEC;
+ nsecs = record->ts - secs * NSEC_PER_SEC;
+ }
+
+ if (pevent->latency_format) {
+ tep_data_lat_fmt(pevent, s, record);
+ }
+
+ if (use_usec_format) {
+ if (pevent->flags & TEP_NSEC_OUTPUT) {
+ usecs = nsecs;
+ p = 9;
+ } else {
+ usecs = (nsecs + 500) / NSEC_PER_USEC;
+ /* To avoid usecs larger than 1 sec */
+ if (usecs >= USEC_PER_SEC) {
+ usecs -= USEC_PER_SEC;
+ secs++;
+ }
+ p = 6;
+ }
+
+ trace_seq_printf(s, " %5lu.%0*lu:", secs, p, usecs);
+ } else
+ trace_seq_printf(s, " %12llu:", record->ts);
+}
+
+/**
+ * tep_print_event_data - Write the event data section
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @event: the handle to the record's event
+ * @record: The record to get the event from
+ *
+ * Writes the parsing of the record's data to @s.
+ */
+void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s,
+ struct event_format *event,
+ struct tep_record *record)
+{
+ static const char *spaces = " "; /* 20 spaces */
+ int len;
+
+ trace_seq_printf(s, " %s: ", event->name);
+
+ /* Space out the event names evenly. */
+ len = strlen(event->name);
+ if (len < 20)
+ trace_seq_printf(s, "%.*s", 20 - len, spaces);
+
+ tep_event_info(s, event, record);
+}
+
+void tep_print_event(struct tep_handle *pevent, struct trace_seq *s,
+ struct tep_record *record, bool use_trace_clock)
+{
+ struct event_format *event;
+
+ event = tep_find_event_by_record(pevent, record);
+ if (!event) {
+ int i;
+ int type = trace_parse_common_type(pevent, record->data);
+
+ do_warning("ug! no event found for type %d", type);
+ trace_seq_printf(s, "[UNKNOWN TYPE %d]", type);
+ for (i = 0; i < record->size; i++)
+ trace_seq_printf(s, " %02x",
+ ((unsigned char *)record->data)[i]);
+ return;
+ }
+
+ tep_print_event_task(pevent, s, event, record);
+ tep_print_event_time(pevent, s, event, record, use_trace_clock);
+ tep_print_event_data(pevent, s, event, record);
+}
+
+static int events_id_cmp(const void *a, const void *b)
+{
+ struct event_format * const * ea = a;
+ struct event_format * const * eb = b;
+
+ if ((*ea)->id < (*eb)->id)
+ return -1;
+
+ if ((*ea)->id > (*eb)->id)
+ return 1;
+
+ return 0;
+}
+
+static int events_name_cmp(const void *a, const void *b)
+{
+ struct event_format * const * ea = a;
+ struct event_format * const * eb = b;
+ int res;
+
+ res = strcmp((*ea)->name, (*eb)->name);
+ if (res)
+ return res;
+
+ res = strcmp((*ea)->system, (*eb)->system);
+ if (res)
+ return res;
+
+ return events_id_cmp(a, b);
+}
+
+static int events_system_cmp(const void *a, const void *b)
+{
+ struct event_format * const * ea = a;
+ struct event_format * const * eb = b;
+ int res;
+
+ res = strcmp((*ea)->system, (*eb)->system);
+ if (res)
+ return res;
+
+ res = strcmp((*ea)->name, (*eb)->name);
+ if (res)
+ return res;
+
+ return events_id_cmp(a, b);
+}
+
+struct event_format **tep_list_events(struct tep_handle *pevent, enum event_sort_type sort_type)
+{
+ struct event_format **events;
+ int (*sort)(const void *a, const void *b);
+
+ events = pevent->sort_events;
+
+ if (events && pevent->last_type == sort_type)
+ return events;
+
+ if (!events) {
+ events = malloc(sizeof(*events) * (pevent->nr_events + 1));
+ if (!events)
+ return NULL;
+
+ memcpy(events, pevent->events, sizeof(*events) * pevent->nr_events);
+ events[pevent->nr_events] = NULL;
+
+ pevent->sort_events = events;
+
+ /* the internal events are sorted by id */
+ if (sort_type == EVENT_SORT_ID) {
+ pevent->last_type = sort_type;
+ return events;
+ }
+ }
+
+ switch (sort_type) {
+ case EVENT_SORT_ID:
+ sort = events_id_cmp;
+ break;
+ case EVENT_SORT_NAME:
+ sort = events_name_cmp;
+ break;
+ case EVENT_SORT_SYSTEM:
+ sort = events_system_cmp;
+ break;
+ default:
+ return events;
+ }
+
+ qsort(events, pevent->nr_events, sizeof(*events), sort);
+ pevent->last_type = sort_type;
+
+ return events;
+}
+
+static struct format_field **
+get_event_fields(const char *type, const char *name,
+ int count, struct format_field *list)
+{
+ struct format_field **fields;
+ struct format_field *field;
+ int i = 0;
+
+ fields = malloc(sizeof(*fields) * (count + 1));
+ if (!fields)
+ return NULL;
+
+ for (field = list; field; field = field->next) {
+ fields[i++] = field;
+ if (i == count + 1) {
+ do_warning("event %s has more %s fields than specified",
+ name, type);
+ i--;
+ break;
+ }
+ }
+
+ if (i != count)
+ do_warning("event %s has less %s fields than specified",
+ name, type);
+
+ fields[i] = NULL;
+
+ return fields;
+}
+
+/**
+ * tep_event_common_fields - return a list of common fields for an event
+ * @event: the event to return the common fields of.
+ *
+ * Returns an allocated array of fields. The last item in the array is NULL.
+ * The array must be freed with free().
+ */
+struct format_field **tep_event_common_fields(struct event_format *event)
+{
+ return get_event_fields("common", event->name,
+ event->format.nr_common,
+ event->format.common_fields);
+}
+
+/**
+ * tep_event_fields - return a list of event specific fields for an event
+ * @event: the event to return the fields of.
+ *
+ * Returns an allocated array of fields. The last item in the array is NULL.
+ * The array must be freed with free().
+ */
+struct format_field **tep_event_fields(struct event_format *event)
+{
+ return get_event_fields("event", event->name,
+ event->format.nr_fields,
+ event->format.fields);
+}
+
+static void print_fields(struct trace_seq *s, struct print_flag_sym *field)
+{
+ trace_seq_printf(s, "{ %s, %s }", field->value, field->str);
+ if (field->next) {
+ trace_seq_puts(s, ", ");
+ print_fields(s, field->next);
+ }
+}
+
+/* for debugging */
+static void print_args(struct print_arg *args)
+{
+ int print_paren = 1;
+ struct trace_seq s;
+
+ switch (args->type) {
+ case PRINT_NULL:
+ printf("null");
+ break;
+ case PRINT_ATOM:
+ printf("%s", args->atom.atom);
+ break;
+ case PRINT_FIELD:
+ printf("REC->%s", args->field.name);
+ break;
+ case PRINT_FLAGS:
+ printf("__print_flags(");
+ print_args(args->flags.field);
+ printf(", %s, ", args->flags.delim);
+ trace_seq_init(&s);
+ print_fields(&s, args->flags.flags);
+ trace_seq_do_printf(&s);
+ trace_seq_destroy(&s);
+ printf(")");
+ break;
+ case PRINT_SYMBOL:
+ printf("__print_symbolic(");
+ print_args(args->symbol.field);
+ printf(", ");
+ trace_seq_init(&s);
+ print_fields(&s, args->symbol.symbols);
+ trace_seq_do_printf(&s);
+ trace_seq_destroy(&s);
+ printf(")");
+ break;
+ case PRINT_HEX:
+ printf("__print_hex(");
+ print_args(args->hex.field);
+ printf(", ");
+ print_args(args->hex.size);
+ printf(")");
+ break;
+ case PRINT_HEX_STR:
+ printf("__print_hex_str(");
+ print_args(args->hex.field);
+ printf(", ");
+ print_args(args->hex.size);
+ printf(")");
+ break;
+ case PRINT_INT_ARRAY:
+ printf("__print_array(");
+ print_args(args->int_array.field);
+ printf(", ");
+ print_args(args->int_array.count);
+ printf(", ");
+ print_args(args->int_array.el_size);
+ printf(")");
+ break;
+ case PRINT_STRING:
+ case PRINT_BSTRING:
+ printf("__get_str(%s)", args->string.string);
+ break;
+ case PRINT_BITMASK:
+ printf("__get_bitmask(%s)", args->bitmask.bitmask);
+ break;
+ case PRINT_TYPE:
+ printf("(%s)", args->typecast.type);
+ print_args(args->typecast.item);
+ break;
+ case PRINT_OP:
+ if (strcmp(args->op.op, ":") == 0)
+ print_paren = 0;
+ if (print_paren)
+ printf("(");
+ print_args(args->op.left);
+ printf(" %s ", args->op.op);
+ print_args(args->op.right);
+ if (print_paren)
+ printf(")");
+ break;
+ default:
+ /* we should warn... */
+ return;
+ }
+ if (args->next) {
+ printf("\n");
+ print_args(args->next);
+ }
+}
+
+static void parse_header_field(const char *field,
+ int *offset, int *size, int mandatory)
+{
+ unsigned long long save_input_buf_ptr;
+ unsigned long long save_input_buf_siz;
+ char *token;
+ int type;
+
+ save_input_buf_ptr = input_buf_ptr;
+ save_input_buf_siz = input_buf_siz;
+
+ if (read_expected(EVENT_ITEM, "field") < 0)
+ return;
+ if (read_expected(EVENT_OP, ":") < 0)
+ return;
+
+ /* type */
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+ free_token(token);
+
+ /*
+ * If this is not a mandatory field, then test it first.
+ */
+ if (mandatory) {
+ if (read_expected(EVENT_ITEM, field) < 0)
+ return;
+ } else {
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+ if (strcmp(token, field) != 0)
+ goto discard;
+ free_token(token);
+ }
+
+ if (read_expected(EVENT_OP, ";") < 0)
+ return;
+ if (read_expected(EVENT_ITEM, "offset") < 0)
+ return;
+ if (read_expected(EVENT_OP, ":") < 0)
+ return;
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+ *offset = atoi(token);
+ free_token(token);
+ if (read_expected(EVENT_OP, ";") < 0)
+ return;
+ if (read_expected(EVENT_ITEM, "size") < 0)
+ return;
+ if (read_expected(EVENT_OP, ":") < 0)
+ return;
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+ *size = atoi(token);
+ free_token(token);
+ if (read_expected(EVENT_OP, ";") < 0)
+ return;
+ type = read_token(&token);
+ if (type != EVENT_NEWLINE) {
+ /* newer versions of the kernel have a "signed" type */
+ if (type != EVENT_ITEM)
+ goto fail;
+
+ if (strcmp(token, "signed") != 0)
+ goto fail;
+
+ free_token(token);
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ return;
+
+ if (read_expect_type(EVENT_ITEM, &token))
+ goto fail;
+
+ free_token(token);
+ if (read_expected(EVENT_OP, ";") < 0)
+ return;
+
+ if (read_expect_type(EVENT_NEWLINE, &token))
+ goto fail;
+ }
+ fail:
+ free_token(token);
+ return;
+
+ discard:
+ input_buf_ptr = save_input_buf_ptr;
+ input_buf_siz = save_input_buf_siz;
+ *offset = 0;
+ *size = 0;
+ free_token(token);
+}
+
+/**
+ * tep_parse_header_page - parse the data stored in the header page
+ * @pevent: the handle to the pevent
+ * @buf: the buffer storing the header page format string
+ * @size: the size of @buf
+ * @long_size: the long size to use if there is no header
+ *
+ * This parses the header page format for information on the
+ * ring buffer used. The @buf should be copied from
+ *
+ * /sys/kernel/debug/tracing/events/header_page
+ */
+int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long size,
+ int long_size)
+{
+ int ignore;
+
+ if (!size) {
+ /*
+ * Old kernels did not have header page info.
+ * Sorry but we just use what we find here in user space.
+ */
+ pevent->header_page_ts_size = sizeof(long long);
+ pevent->header_page_size_size = long_size;
+ pevent->header_page_data_offset = sizeof(long long) + long_size;
+ pevent->old_format = 1;
+ return -1;
+ }
+ init_input_buf(buf, size);
+
+ parse_header_field("timestamp", &pevent->header_page_ts_offset,
+ &pevent->header_page_ts_size, 1);
+ parse_header_field("commit", &pevent->header_page_size_offset,
+ &pevent->header_page_size_size, 1);
+ parse_header_field("overwrite", &pevent->header_page_overwrite,
+ &ignore, 0);
+ parse_header_field("data", &pevent->header_page_data_offset,
+ &pevent->header_page_data_size, 1);
+
+ return 0;
+}
+
+static int event_matches(struct event_format *event,
+ int id, const char *sys_name,
+ const char *event_name)
+{
+ if (id >= 0 && id != event->id)
+ return 0;
+
+ if (event_name && (strcmp(event_name, event->name) != 0))
+ return 0;
+
+ if (sys_name && (strcmp(sys_name, event->system) != 0))
+ return 0;
+
+ return 1;
+}
+
+static void free_handler(struct event_handler *handle)
+{
+ free((void *)handle->sys_name);
+ free((void *)handle->event_name);
+ free(handle);
+}
+
+static int find_event_handle(struct tep_handle *pevent, struct event_format *event)
+{
+ struct event_handler *handle, **next;
+
+ for (next = &pevent->handlers; *next;
+ next = &(*next)->next) {
+ handle = *next;
+ if (event_matches(event, handle->id,
+ handle->sys_name,
+ handle->event_name))
+ break;
+ }
+
+ if (!(*next))
+ return 0;
+
+ pr_stat("overriding event (%d) %s:%s with new print handler",
+ event->id, event->system, event->name);
+
+ event->handler = handle->func;
+ event->context = handle->context;
+
+ *next = handle->next;
+ free_handler(handle);
+
+ return 1;
+}
+
+/**
+ * __tep_parse_format - parse the event format
+ * @buf: the buffer storing the event format string
+ * @size: the size of @buf
+ * @sys: the system the event belongs to
+ *
+ * This parses the event format and creates an event structure
+ * to quickly parse raw data for a given event.
+ *
+ * These files currently come from:
+ *
+ * /sys/kernel/debug/tracing/events/.../.../format
+ */
+enum tep_errno __tep_parse_format(struct event_format **eventp,
+ struct tep_handle *pevent, const char *buf,
+ unsigned long size, const char *sys)
+{
+ struct event_format *event;
+ int ret;
+
+ init_input_buf(buf, size);
+
+ *eventp = event = alloc_event();
+ if (!event)
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+
+ event->name = event_read_name();
+ if (!event->name) {
+ /* Bad event? */
+ ret = TEP_ERRNO__MEM_ALLOC_FAILED;
+ goto event_alloc_failed;
+ }
+
+ if (strcmp(sys, "ftrace") == 0) {
+ event->flags |= EVENT_FL_ISFTRACE;
+
+ if (strcmp(event->name, "bprint") == 0)
+ event->flags |= EVENT_FL_ISBPRINT;
+ }
+
+ event->id = event_read_id();
+ if (event->id < 0) {
+ ret = TEP_ERRNO__READ_ID_FAILED;
+ /*
+ * This isn't an allocation error actually.
+ * But as the ID is critical, just bail out.
+ */
+ goto event_alloc_failed;
+ }
+
+ event->system = strdup(sys);
+ if (!event->system) {
+ ret = TEP_ERRNO__MEM_ALLOC_FAILED;
+ goto event_alloc_failed;
+ }
+
+ /* Add pevent to event so that it can be referenced */
+ event->pevent = pevent;
+
+ ret = event_read_format(event);
+ if (ret < 0) {
+ ret = TEP_ERRNO__READ_FORMAT_FAILED;
+ goto event_parse_failed;
+ }
+
+ /*
+ * If the event has an override, don't print warnings if the event
+ * print format fails to parse.
+ */
+ if (pevent && find_event_handle(pevent, event))
+ show_warning = 0;
+
+ ret = event_read_print(event);
+ show_warning = 1;
+
+ if (ret < 0) {
+ ret = TEP_ERRNO__READ_PRINT_FAILED;
+ goto event_parse_failed;
+ }
+
+ if (!ret && (event->flags & EVENT_FL_ISFTRACE)) {
+ struct format_field *field;
+ struct print_arg *arg, **list;
+
+ /* old ftrace had no args */
+ list = &event->print_fmt.args;
+ for (field = event->format.fields; field; field = field->next) {
+ arg = alloc_arg();
+ if (!arg) {
+ event->flags |= EVENT_FL_FAILED;
+ return TEP_ERRNO__OLD_FTRACE_ARG_FAILED;
+ }
+ arg->type = PRINT_FIELD;
+ arg->field.name = strdup(field->name);
+ if (!arg->field.name) {
+ event->flags |= EVENT_FL_FAILED;
+ free_arg(arg);
+ return TEP_ERRNO__OLD_FTRACE_ARG_FAILED;
+ }
+ arg->field.field = field;
+ *list = arg;
+ list = &arg->next;
+ }
+ return 0;
+ }
+
+ return 0;
+
+ event_parse_failed:
+ event->flags |= EVENT_FL_FAILED;
+ return ret;
+
+ event_alloc_failed:
+ free(event->system);
+ free(event->name);
+ free(event);
+ *eventp = NULL;
+ return ret;
+}
+
+static enum tep_errno
+__parse_event(struct tep_handle *pevent,
+ struct event_format **eventp,
+ const char *buf, unsigned long size,
+ const char *sys)
+{
+ int ret = __tep_parse_format(eventp, pevent, buf, size, sys);
+ struct event_format *event = *eventp;
+
+ if (event == NULL)
+ return ret;
+
+ if (pevent && add_event(pevent, event)) {
+ ret = TEP_ERRNO__MEM_ALLOC_FAILED;
+ goto event_add_failed;
+ }
+
+#define PRINT_ARGS 0
+ if (PRINT_ARGS && event->print_fmt.args)
+ print_args(event->print_fmt.args);
+
+ return 0;
+
+event_add_failed:
+ tep_free_format(event);
+ return ret;
+}
+
+/**
+ * tep_parse_format - parse the event format
+ * @pevent: the handle to the pevent
+ * @eventp: returned format
+ * @buf: the buffer storing the event format string
+ * @size: the size of @buf
+ * @sys: the system the event belongs to
+ *
+ * This parses the event format and creates an event structure
+ * to quickly parse raw data for a given event.
+ *
+ * These files currently come from:
+ *
+ * /sys/kernel/debug/tracing/events/.../.../format
+ */
+enum tep_errno tep_parse_format(struct tep_handle *pevent,
+ struct event_format **eventp,
+ const char *buf,
+ unsigned long size, const char *sys)
+{
+ return __parse_event(pevent, eventp, buf, size, sys);
+}
+
+/**
+ * tep_parse_event - parse the event format
+ * @pevent: the handle to the pevent
+ * @buf: the buffer storing the event format string
+ * @size: the size of @buf
+ * @sys: the system the event belongs to
+ *
+ * This parses the event format and creates an event structure
+ * to quickly parse raw data for a given event.
+ *
+ * These files currently come from:
+ *
+ * /sys/kernel/debug/tracing/events/.../.../format
+ */
+enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf,
+ unsigned long size, const char *sys)
+{
+ struct event_format *event = NULL;
+ return __parse_event(pevent, &event, buf, size, sys);
+}
+
+#undef _PE
+#define _PE(code, str) str
+static const char * const tep_error_str[] = {
+ TEP_ERRORS
+};
+#undef _PE
+
+int tep_strerror(struct tep_handle *pevent __maybe_unused,
+ enum tep_errno errnum, char *buf, size_t buflen)
+{
+ int idx;
+ const char *msg;
+
+ if (errnum >= 0) {
+ str_error_r(errnum, buf, buflen);
+ return 0;
+ }
+
+ if (errnum <= __TEP_ERRNO__START ||
+ errnum >= __TEP_ERRNO__END)
+ return -1;
+
+ idx = errnum - __TEP_ERRNO__START - 1;
+ msg = tep_error_str[idx];
+ snprintf(buf, buflen, "%s", msg);
+
+ return 0;
+}
+
+int get_field_val(struct trace_seq *s, struct format_field *field,
+ const char *name, struct tep_record *record,
+ unsigned long long *val, int err)
+{
+ if (!field) {
+ if (err)
+ trace_seq_printf(s, "<CANT FIND FIELD %s>", name);
+ return -1;
+ }
+
+ if (tep_read_number_field(field, record->data, val)) {
+ if (err)
+ trace_seq_printf(s, " %s=INVALID", name);
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * tep_get_field_raw - return the raw pointer into the data field
+ * @s: The seq to print to on error
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @len: place to store the field length.
+ * @err: print default error if failed.
+ *
+ * Returns a pointer into record->data of the field and places
+ * the length of the field in @len.
+ *
+ * On failure, it returns NULL.
+ */
+void *tep_get_field_raw(struct trace_seq *s, struct event_format *event,
+ const char *name, struct tep_record *record,
+ int *len, int err)
+{
+ struct format_field *field;
+ void *data = record->data;
+ unsigned offset;
+ int dummy;
+
+ if (!event)
+ return NULL;
+
+ field = tep_find_field(event, name);
+
+ if (!field) {
+ if (err)
+ trace_seq_printf(s, "<CANT FIND FIELD %s>", name);
+ return NULL;
+ }
+
+ /* Allow @len to be NULL */
+ if (!len)
+ len = &dummy;
+
+ offset = field->offset;
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ offset = tep_read_number(event->pevent,
+ data + offset, field->size);
+ *len = offset >> 16;
+ offset &= 0xffff;
+ } else
+ *len = field->size;
+
+ return data + offset;
+}
+
+/**
+ * tep_get_field_val - find a field and return its value
+ * @s: The seq to print to on error
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @val: place to store the value of the field.
+ * @err: print default error if failed.
+ *
+ * Returns 0 on success -1 on field not found.
+ */
+int tep_get_field_val(struct trace_seq *s, struct event_format *event,
+ const char *name, struct tep_record *record,
+ unsigned long long *val, int err)
+{
+ struct format_field *field;
+
+ if (!event)
+ return -1;
+
+ field = tep_find_field(event, name);
+
+ return get_field_val(s, field, name, record, val, err);
+}
+
+/**
+ * tep_get_common_field_val - find a common field and return its value
+ * @s: The seq to print to on error
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @val: place to store the value of the field.
+ * @err: print default error if failed.
+ *
+ * Returns 0 on success -1 on field not found.
+ */
+int tep_get_common_field_val(struct trace_seq *s, struct event_format *event,
+ const char *name, struct tep_record *record,
+ unsigned long long *val, int err)
+{
+ struct format_field *field;
+
+ if (!event)
+ return -1;
+
+ field = tep_find_common_field(event, name);
+
+ return get_field_val(s, field, name, record, val, err);
+}
+
+/**
+ * tep_get_any_field_val - find a any field and return its value
+ * @s: The seq to print to on error
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @val: place to store the value of the field.
+ * @err: print default error if failed.
+ *
+ * Returns 0 on success -1 on field not found.
+ */
+int tep_get_any_field_val(struct trace_seq *s, struct event_format *event,
+ const char *name, struct tep_record *record,
+ unsigned long long *val, int err)
+{
+ struct format_field *field;
+
+ if (!event)
+ return -1;
+
+ field = tep_find_any_field(event, name);
+
+ return get_field_val(s, field, name, record, val, err);
+}
+
+/**
+ * tep_print_num_field - print a field and a format
+ * @s: The seq to print to
+ * @fmt: The printf format to print the field with.
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @err: print default error if failed.
+ *
+ * Returns: 0 on success, -1 field not found, or 1 if buffer is full.
+ */
+int tep_print_num_field(struct trace_seq *s, const char *fmt,
+ struct event_format *event, const char *name,
+ struct tep_record *record, int err)
+{
+ struct format_field *field = tep_find_field(event, name);
+ unsigned long long val;
+
+ if (!field)
+ goto failed;
+
+ if (tep_read_number_field(field, record->data, &val))
+ goto failed;
+
+ return trace_seq_printf(s, fmt, val);
+
+ failed:
+ if (err)
+ trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name);
+ return -1;
+}
+
+/**
+ * tep_print_func_field - print a field and a format for function pointers
+ * @s: The seq to print to
+ * @fmt: The printf format to print the field with.
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @err: print default error if failed.
+ *
+ * Returns: 0 on success, -1 field not found, or 1 if buffer is full.
+ */
+int tep_print_func_field(struct trace_seq *s, const char *fmt,
+ struct event_format *event, const char *name,
+ struct tep_record *record, int err)
+{
+ struct format_field *field = tep_find_field(event, name);
+ struct tep_handle *pevent = event->pevent;
+ unsigned long long val;
+ struct func_map *func;
+ char tmp[128];
+
+ if (!field)
+ goto failed;
+
+ if (tep_read_number_field(field, record->data, &val))
+ goto failed;
+
+ func = find_func(pevent, val);
+
+ if (func)
+ snprintf(tmp, 128, "%s/0x%llx", func->func, func->addr - val);
+ else
+ sprintf(tmp, "0x%08llx", val);
+
+ return trace_seq_printf(s, fmt, tmp);
+
+ failed:
+ if (err)
+ trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name);
+ return -1;
+}
+
+static void free_func_handle(struct tep_function_handler *func)
+{
+ struct func_params *params;
+
+ free(func->name);
+
+ while (func->params) {
+ params = func->params;
+ func->params = params->next;
+ free(params);
+ }
+
+ free(func);
+}
+
+/**
+ * tep_register_print_function - register a helper function
+ * @pevent: the handle to the pevent
+ * @func: the function to process the helper function
+ * @ret_type: the return type of the helper function
+ * @name: the name of the helper function
+ * @parameters: A list of enum tep_func_arg_type
+ *
+ * Some events may have helper functions in the print format arguments.
+ * This allows a plugin to dynamically create a way to process one
+ * of these functions.
+ *
+ * The @parameters is a variable list of tep_func_arg_type enums that
+ * must end with TEP_FUNC_ARG_VOID.
+ */
+int tep_register_print_function(struct tep_handle *pevent,
+ tep_func_handler func,
+ enum tep_func_arg_type ret_type,
+ char *name, ...)
+{
+ struct tep_function_handler *func_handle;
+ struct func_params **next_param;
+ struct func_params *param;
+ enum tep_func_arg_type type;
+ va_list ap;
+ int ret;
+
+ func_handle = find_func_handler(pevent, name);
+ if (func_handle) {
+ /*
+ * This is most like caused by the users own
+ * plugins updating the function. This overrides the
+ * system defaults.
+ */
+ pr_stat("override of function helper '%s'", name);
+ remove_func_handler(pevent, name);
+ }
+
+ func_handle = calloc(1, sizeof(*func_handle));
+ if (!func_handle) {
+ do_warning("Failed to allocate function handler");
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+ }
+
+ func_handle->ret_type = ret_type;
+ func_handle->name = strdup(name);
+ func_handle->func = func;
+ if (!func_handle->name) {
+ do_warning("Failed to allocate function name");
+ free(func_handle);
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+ }
+
+ next_param = &(func_handle->params);
+ va_start(ap, name);
+ for (;;) {
+ type = va_arg(ap, enum tep_func_arg_type);
+ if (type == TEP_FUNC_ARG_VOID)
+ break;
+
+ if (type >= TEP_FUNC_ARG_MAX_TYPES) {
+ do_warning("Invalid argument type %d", type);
+ ret = TEP_ERRNO__INVALID_ARG_TYPE;
+ goto out_free;
+ }
+
+ param = malloc(sizeof(*param));
+ if (!param) {
+ do_warning("Failed to allocate function param");
+ ret = TEP_ERRNO__MEM_ALLOC_FAILED;
+ goto out_free;
+ }
+ param->type = type;
+ param->next = NULL;
+
+ *next_param = param;
+ next_param = &(param->next);
+
+ func_handle->nr_args++;
+ }
+ va_end(ap);
+
+ func_handle->next = pevent->func_handlers;
+ pevent->func_handlers = func_handle;
+
+ return 0;
+ out_free:
+ va_end(ap);
+ free_func_handle(func_handle);
+ return ret;
+}
+
+/**
+ * tep_unregister_print_function - unregister a helper function
+ * @pevent: the handle to the pevent
+ * @func: the function to process the helper function
+ * @name: the name of the helper function
+ *
+ * This function removes existing print handler for function @name.
+ *
+ * Returns 0 if the handler was removed successully, -1 otherwise.
+ */
+int tep_unregister_print_function(struct tep_handle *pevent,
+ tep_func_handler func, char *name)
+{
+ struct tep_function_handler *func_handle;
+
+ func_handle = find_func_handler(pevent, name);
+ if (func_handle && func_handle->func == func) {
+ remove_func_handler(pevent, name);
+ return 0;
+ }
+ return -1;
+}
+
+static struct event_format *search_event(struct tep_handle *pevent, int id,
+ const char *sys_name,
+ const char *event_name)
+{
+ struct event_format *event;
+
+ if (id >= 0) {
+ /* search by id */
+ event = tep_find_event(pevent, id);
+ if (!event)
+ return NULL;
+ if (event_name && (strcmp(event_name, event->name) != 0))
+ return NULL;
+ if (sys_name && (strcmp(sys_name, event->system) != 0))
+ return NULL;
+ } else {
+ event = tep_find_event_by_name(pevent, sys_name, event_name);
+ if (!event)
+ return NULL;
+ }
+ return event;
+}
+
+/**
+ * tep_register_event_handler - register a way to parse an event
+ * @pevent: the handle to the pevent
+ * @id: the id of the event to register
+ * @sys_name: the system name the event belongs to
+ * @event_name: the name of the event
+ * @func: the function to call to parse the event information
+ * @context: the data to be passed to @func
+ *
+ * This function allows a developer to override the parsing of
+ * a given event. If for some reason the default print format
+ * is not sufficient, this function will register a function
+ * for an event to be used to parse the data instead.
+ *
+ * If @id is >= 0, then it is used to find the event.
+ * else @sys_name and @event_name are used.
+ */
+int tep_register_event_handler(struct tep_handle *pevent, int id,
+ const char *sys_name, const char *event_name,
+ tep_event_handler_func func, void *context)
+{
+ struct event_format *event;
+ struct event_handler *handle;
+
+ event = search_event(pevent, id, sys_name, event_name);
+ if (event == NULL)
+ goto not_found;
+
+ pr_stat("overriding event (%d) %s:%s with new print handler",
+ event->id, event->system, event->name);
+
+ event->handler = func;
+ event->context = context;
+ return 0;
+
+ not_found:
+ /* Save for later use. */
+ handle = calloc(1, sizeof(*handle));
+ if (!handle) {
+ do_warning("Failed to allocate event handler");
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+ }
+
+ handle->id = id;
+ if (event_name)
+ handle->event_name = strdup(event_name);
+ if (sys_name)
+ handle->sys_name = strdup(sys_name);
+
+ if ((event_name && !handle->event_name) ||
+ (sys_name && !handle->sys_name)) {
+ do_warning("Failed to allocate event/sys name");
+ free((void *)handle->event_name);
+ free((void *)handle->sys_name);
+ free(handle);
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+ }
+
+ handle->func = func;
+ handle->next = pevent->handlers;
+ pevent->handlers = handle;
+ handle->context = context;
+
+ return -1;
+}
+
+static int handle_matches(struct event_handler *handler, int id,
+ const char *sys_name, const char *event_name,
+ tep_event_handler_func func, void *context)
+{
+ if (id >= 0 && id != handler->id)
+ return 0;
+
+ if (event_name && (strcmp(event_name, handler->event_name) != 0))
+ return 0;
+
+ if (sys_name && (strcmp(sys_name, handler->sys_name) != 0))
+ return 0;
+
+ if (func != handler->func || context != handler->context)
+ return 0;
+
+ return 1;
+}
+
+/**
+ * tep_unregister_event_handler - unregister an existing event handler
+ * @pevent: the handle to the pevent
+ * @id: the id of the event to unregister
+ * @sys_name: the system name the handler belongs to
+ * @event_name: the name of the event handler
+ * @func: the function to call to parse the event information
+ * @context: the data to be passed to @func
+ *
+ * This function removes existing event handler (parser).
+ *
+ * If @id is >= 0, then it is used to find the event.
+ * else @sys_name and @event_name are used.
+ *
+ * Returns 0 if handler was removed successfully, -1 if event was not found.
+ */
+int tep_unregister_event_handler(struct tep_handle *pevent, int id,
+ const char *sys_name, const char *event_name,
+ tep_event_handler_func func, void *context)
+{
+ struct event_format *event;
+ struct event_handler *handle;
+ struct event_handler **next;
+
+ event = search_event(pevent, id, sys_name, event_name);
+ if (event == NULL)
+ goto not_found;
+
+ if (event->handler == func && event->context == context) {
+ pr_stat("removing override handler for event (%d) %s:%s. Going back to default handler.",
+ event->id, event->system, event->name);
+
+ event->handler = NULL;
+ event->context = NULL;
+ return 0;
+ }
+
+not_found:
+ for (next = &pevent->handlers; *next; next = &(*next)->next) {
+ handle = *next;
+ if (handle_matches(handle, id, sys_name, event_name,
+ func, context))
+ break;
+ }
+
+ if (!(*next))
+ return -1;
+
+ *next = handle->next;
+ free_handler(handle);
+
+ return 0;
+}
+
+/**
+ * tep_alloc - create a pevent handle
+ */
+struct tep_handle *tep_alloc(void)
+{
+ struct tep_handle *pevent = calloc(1, sizeof(*pevent));
+
+ if (pevent)
+ pevent->ref_count = 1;
+
+ return pevent;
+}
+
+void tep_ref(struct tep_handle *pevent)
+{
+ pevent->ref_count++;
+}
+
+void tep_free_format_field(struct format_field *field)
+{
+ free(field->type);
+ if (field->alias != field->name)
+ free(field->alias);
+ free(field->name);
+ free(field);
+}
+
+static void free_format_fields(struct format_field *field)
+{
+ struct format_field *next;
+
+ while (field) {
+ next = field->next;
+ tep_free_format_field(field);
+ field = next;
+ }
+}
+
+static void free_formats(struct format *format)
+{
+ free_format_fields(format->common_fields);
+ free_format_fields(format->fields);
+}
+
+void tep_free_format(struct event_format *event)
+{
+ free(event->name);
+ free(event->system);
+
+ free_formats(&event->format);
+
+ free(event->print_fmt.format);
+ free_args(event->print_fmt.args);
+
+ free(event);
+}
+
+/**
+ * tep_free - free a pevent handle
+ * @pevent: the pevent handle to free
+ */
+void tep_free(struct tep_handle *pevent)
+{
+ struct cmdline_list *cmdlist, *cmdnext;
+ struct func_list *funclist, *funcnext;
+ struct printk_list *printklist, *printknext;
+ struct tep_function_handler *func_handler;
+ struct event_handler *handle;
+ int i;
+
+ if (!pevent)
+ return;
+
+ cmdlist = pevent->cmdlist;
+ funclist = pevent->funclist;
+ printklist = pevent->printklist;
+
+ pevent->ref_count--;
+ if (pevent->ref_count)
+ return;
+
+ if (pevent->cmdlines) {
+ for (i = 0; i < pevent->cmdline_count; i++)
+ free(pevent->cmdlines[i].comm);
+ free(pevent->cmdlines);
+ }
+
+ while (cmdlist) {
+ cmdnext = cmdlist->next;
+ free(cmdlist->comm);
+ free(cmdlist);
+ cmdlist = cmdnext;
+ }
+
+ if (pevent->func_map) {
+ for (i = 0; i < (int)pevent->func_count; i++) {
+ free(pevent->func_map[i].func);
+ free(pevent->func_map[i].mod);
+ }
+ free(pevent->func_map);
+ }
+
+ while (funclist) {
+ funcnext = funclist->next;
+ free(funclist->func);
+ free(funclist->mod);
+ free(funclist);
+ funclist = funcnext;
+ }
+
+ while (pevent->func_handlers) {
+ func_handler = pevent->func_handlers;
+ pevent->func_handlers = func_handler->next;
+ free_func_handle(func_handler);
+ }
+
+ if (pevent->printk_map) {
+ for (i = 0; i < (int)pevent->printk_count; i++)
+ free(pevent->printk_map[i].printk);
+ free(pevent->printk_map);
+ }
+
+ while (printklist) {
+ printknext = printklist->next;
+ free(printklist->printk);
+ free(printklist);
+ printklist = printknext;
+ }
+
+ for (i = 0; i < pevent->nr_events; i++)
+ tep_free_format(pevent->events[i]);
+
+ while (pevent->handlers) {
+ handle = pevent->handlers;
+ pevent->handlers = handle->next;
+ free_handler(handle);
+ }
+
+ free(pevent->trace_clock);
+ free(pevent->events);
+ free(pevent->sort_events);
+ free(pevent->func_resolver);
+
+ free(pevent);
+}
+
+void tep_unref(struct tep_handle *pevent)
+{
+ tep_free(pevent);
+}
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
new file mode 100644
index 000000000..44b7c2d41
--- /dev/null
+++ b/tools/lib/traceevent/event-parse.h
@@ -0,0 +1,991 @@
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#ifndef _PARSE_EVENTS_H
+#define _PARSE_EVENTS_H
+
+#include <stdbool.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <regex.h>
+#include <string.h>
+
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((unused))
+#endif
+
+/* ----------------------- trace_seq ----------------------- */
+
+
+#ifndef TRACE_SEQ_BUF_SIZE
+#define TRACE_SEQ_BUF_SIZE 4096
+#endif
+
+#ifndef DEBUG_RECORD
+#define DEBUG_RECORD 0
+#endif
+
+struct tep_record {
+ unsigned long long ts;
+ unsigned long long offset;
+ long long missed_events; /* buffer dropped events before */
+ int record_size; /* size of binary record */
+ int size; /* size of data */
+ void *data;
+ int cpu;
+ int ref_count;
+ int locked; /* Do not free, even if ref_count is zero */
+ void *priv;
+#if DEBUG_RECORD
+ struct tep_record *prev;
+ struct tep_record *next;
+ long alloc_addr;
+#endif
+};
+
+enum trace_seq_fail {
+ TRACE_SEQ__GOOD,
+ TRACE_SEQ__BUFFER_POISONED,
+ TRACE_SEQ__MEM_ALLOC_FAILED,
+};
+
+/*
+ * Trace sequences are used to allow a function to call several other functions
+ * to create a string of data to use (up to a max of PAGE_SIZE).
+ */
+
+struct trace_seq {
+ char *buffer;
+ unsigned int buffer_size;
+ unsigned int len;
+ unsigned int readpos;
+ enum trace_seq_fail state;
+};
+
+void trace_seq_init(struct trace_seq *s);
+void trace_seq_reset(struct trace_seq *s);
+void trace_seq_destroy(struct trace_seq *s);
+
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+ __attribute__ ((format (printf, 2, 3)));
+extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+ __attribute__ ((format (printf, 2, 0)));
+
+extern int trace_seq_puts(struct trace_seq *s, const char *str);
+extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
+
+extern void trace_seq_terminate(struct trace_seq *s);
+
+extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp);
+extern int trace_seq_do_printf(struct trace_seq *s);
+
+
+/* ----------------------- pevent ----------------------- */
+
+struct tep_handle;
+struct event_format;
+
+typedef int (*tep_event_handler_func)(struct trace_seq *s,
+ struct tep_record *record,
+ struct event_format *event,
+ void *context);
+
+typedef int (*tep_plugin_load_func)(struct tep_handle *pevent);
+typedef int (*tep_plugin_unload_func)(struct tep_handle *pevent);
+
+struct tep_plugin_option {
+ struct tep_plugin_option *next;
+ void *handle;
+ char *file;
+ char *name;
+ char *plugin_alias;
+ char *description;
+ const char *value;
+ void *priv;
+ int set;
+};
+
+/*
+ * Plugin hooks that can be called:
+ *
+ * TEP_PLUGIN_LOADER: (required)
+ * The function name to initialized the plugin.
+ *
+ * int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+ *
+ * TEP_PLUGIN_UNLOADER: (optional)
+ * The function called just before unloading
+ *
+ * int TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+ *
+ * TEP_PLUGIN_OPTIONS: (optional)
+ * Plugin options that can be set before loading
+ *
+ * struct tep_plugin_option TEP_PLUGIN_OPTIONS[] = {
+ * {
+ * .name = "option-name",
+ * .plugin_alias = "override-file-name", (optional)
+ * .description = "description of option to show users",
+ * },
+ * {
+ * .name = NULL,
+ * },
+ * };
+ *
+ * Array must end with .name = NULL;
+ *
+ *
+ * .plugin_alias is used to give a shorter name to access
+ * the vairable. Useful if a plugin handles more than one event.
+ *
+ * If .value is not set, then it is considered a boolean and only
+ * .set will be processed. If .value is defined, then it is considered
+ * a string option and .set will be ignored.
+ *
+ * TEP_PLUGIN_ALIAS: (optional)
+ * The name to use for finding options (uses filename if not defined)
+ */
+#define TEP_PLUGIN_LOADER tep_plugin_loader
+#define TEP_PLUGIN_UNLOADER tep_plugin_unloader
+#define TEP_PLUGIN_OPTIONS tep_plugin_options
+#define TEP_PLUGIN_ALIAS tep_plugin_alias
+#define _MAKE_STR(x) #x
+#define MAKE_STR(x) _MAKE_STR(x)
+#define TEP_PLUGIN_LOADER_NAME MAKE_STR(TEP_PLUGIN_LOADER)
+#define TEP_PLUGIN_UNLOADER_NAME MAKE_STR(TEP_PLUGIN_UNLOADER)
+#define TEP_PLUGIN_OPTIONS_NAME MAKE_STR(TEP_PLUGIN_OPTIONS)
+#define TEP_PLUGIN_ALIAS_NAME MAKE_STR(TEP_PLUGIN_ALIAS)
+
+enum format_flags {
+ FIELD_IS_ARRAY = 1,
+ FIELD_IS_POINTER = 2,
+ FIELD_IS_SIGNED = 4,
+ FIELD_IS_STRING = 8,
+ FIELD_IS_DYNAMIC = 16,
+ FIELD_IS_LONG = 32,
+ FIELD_IS_FLAG = 64,
+ FIELD_IS_SYMBOLIC = 128,
+};
+
+struct format_field {
+ struct format_field *next;
+ struct event_format *event;
+ char *type;
+ char *name;
+ char *alias;
+ int offset;
+ int size;
+ unsigned int arraylen;
+ unsigned int elementsize;
+ unsigned long flags;
+};
+
+struct format {
+ int nr_common;
+ int nr_fields;
+ struct format_field *common_fields;
+ struct format_field *fields;
+};
+
+struct print_arg_atom {
+ char *atom;
+};
+
+struct print_arg_string {
+ char *string;
+ int offset;
+};
+
+struct print_arg_bitmask {
+ char *bitmask;
+ int offset;
+};
+
+struct print_arg_field {
+ char *name;
+ struct format_field *field;
+};
+
+struct print_flag_sym {
+ struct print_flag_sym *next;
+ char *value;
+ char *str;
+};
+
+struct print_arg_typecast {
+ char *type;
+ struct print_arg *item;
+};
+
+struct print_arg_flags {
+ struct print_arg *field;
+ char *delim;
+ struct print_flag_sym *flags;
+};
+
+struct print_arg_symbol {
+ struct print_arg *field;
+ struct print_flag_sym *symbols;
+};
+
+struct print_arg_hex {
+ struct print_arg *field;
+ struct print_arg *size;
+};
+
+struct print_arg_int_array {
+ struct print_arg *field;
+ struct print_arg *count;
+ struct print_arg *el_size;
+};
+
+struct print_arg_dynarray {
+ struct format_field *field;
+ struct print_arg *index;
+};
+
+struct print_arg;
+
+struct print_arg_op {
+ char *op;
+ int prio;
+ struct print_arg *left;
+ struct print_arg *right;
+};
+
+struct tep_function_handler;
+
+struct print_arg_func {
+ struct tep_function_handler *func;
+ struct print_arg *args;
+};
+
+enum print_arg_type {
+ PRINT_NULL,
+ PRINT_ATOM,
+ PRINT_FIELD,
+ PRINT_FLAGS,
+ PRINT_SYMBOL,
+ PRINT_HEX,
+ PRINT_INT_ARRAY,
+ PRINT_TYPE,
+ PRINT_STRING,
+ PRINT_BSTRING,
+ PRINT_DYNAMIC_ARRAY,
+ PRINT_OP,
+ PRINT_FUNC,
+ PRINT_BITMASK,
+ PRINT_DYNAMIC_ARRAY_LEN,
+ PRINT_HEX_STR,
+};
+
+struct print_arg {
+ struct print_arg *next;
+ enum print_arg_type type;
+ union {
+ struct print_arg_atom atom;
+ struct print_arg_field field;
+ struct print_arg_typecast typecast;
+ struct print_arg_flags flags;
+ struct print_arg_symbol symbol;
+ struct print_arg_hex hex;
+ struct print_arg_int_array int_array;
+ struct print_arg_func func;
+ struct print_arg_string string;
+ struct print_arg_bitmask bitmask;
+ struct print_arg_op op;
+ struct print_arg_dynarray dynarray;
+ };
+};
+
+struct print_fmt {
+ char *format;
+ struct print_arg *args;
+};
+
+struct event_format {
+ struct tep_handle *pevent;
+ char *name;
+ int id;
+ int flags;
+ struct format format;
+ struct print_fmt print_fmt;
+ char *system;
+ tep_event_handler_func handler;
+ void *context;
+};
+
+enum {
+ EVENT_FL_ISFTRACE = 0x01,
+ EVENT_FL_ISPRINT = 0x02,
+ EVENT_FL_ISBPRINT = 0x04,
+ EVENT_FL_ISFUNCENT = 0x10,
+ EVENT_FL_ISFUNCRET = 0x20,
+ EVENT_FL_NOHANDLE = 0x40,
+ EVENT_FL_PRINTRAW = 0x80,
+
+ EVENT_FL_FAILED = 0x80000000
+};
+
+enum event_sort_type {
+ EVENT_SORT_ID,
+ EVENT_SORT_NAME,
+ EVENT_SORT_SYSTEM,
+};
+
+enum event_type {
+ EVENT_ERROR,
+ EVENT_NONE,
+ EVENT_SPACE,
+ EVENT_NEWLINE,
+ EVENT_OP,
+ EVENT_DELIM,
+ EVENT_ITEM,
+ EVENT_DQUOTE,
+ EVENT_SQUOTE,
+};
+
+typedef unsigned long long (*tep_func_handler)(struct trace_seq *s,
+ unsigned long long *args);
+
+enum tep_func_arg_type {
+ TEP_FUNC_ARG_VOID,
+ TEP_FUNC_ARG_INT,
+ TEP_FUNC_ARG_LONG,
+ TEP_FUNC_ARG_STRING,
+ TEP_FUNC_ARG_PTR,
+ TEP_FUNC_ARG_MAX_TYPES
+};
+
+enum tep_flag {
+ TEP_NSEC_OUTPUT = 1, /* output in NSECS */
+ TEP_DISABLE_SYS_PLUGINS = 1 << 1,
+ TEP_DISABLE_PLUGINS = 1 << 2,
+};
+
+#define TEP_ERRORS \
+ _PE(MEM_ALLOC_FAILED, "failed to allocate memory"), \
+ _PE(PARSE_EVENT_FAILED, "failed to parse event"), \
+ _PE(READ_ID_FAILED, "failed to read event id"), \
+ _PE(READ_FORMAT_FAILED, "failed to read event format"), \
+ _PE(READ_PRINT_FAILED, "failed to read event print fmt"), \
+ _PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace"),\
+ _PE(INVALID_ARG_TYPE, "invalid argument type"), \
+ _PE(INVALID_EXP_TYPE, "invalid expression type"), \
+ _PE(INVALID_OP_TYPE, "invalid operator type"), \
+ _PE(INVALID_EVENT_NAME, "invalid event name"), \
+ _PE(EVENT_NOT_FOUND, "no event found"), \
+ _PE(SYNTAX_ERROR, "syntax error"), \
+ _PE(ILLEGAL_RVALUE, "illegal rvalue"), \
+ _PE(ILLEGAL_LVALUE, "illegal lvalue for string comparison"), \
+ _PE(INVALID_REGEX, "regex did not compute"), \
+ _PE(ILLEGAL_STRING_CMP, "illegal comparison for string"), \
+ _PE(ILLEGAL_INTEGER_CMP,"illegal comparison for integer"), \
+ _PE(REPARENT_NOT_OP, "cannot reparent other than OP"), \
+ _PE(REPARENT_FAILED, "failed to reparent filter OP"), \
+ _PE(BAD_FILTER_ARG, "bad arg in filter tree"), \
+ _PE(UNEXPECTED_TYPE, "unexpected type (not a value)"), \
+ _PE(ILLEGAL_TOKEN, "illegal token"), \
+ _PE(INVALID_PAREN, "open parenthesis cannot come here"), \
+ _PE(UNBALANCED_PAREN, "unbalanced number of parenthesis"), \
+ _PE(UNKNOWN_TOKEN, "unknown token"), \
+ _PE(FILTER_NOT_FOUND, "no filter found"), \
+ _PE(NOT_A_NUMBER, "must have number field"), \
+ _PE(NO_FILTER, "no filters exists"), \
+ _PE(FILTER_MISS, "record does not match to filter")
+
+#undef _PE
+#define _PE(__code, __str) TEP_ERRNO__ ## __code
+enum tep_errno {
+ TEP_ERRNO__SUCCESS = 0,
+ TEP_ERRNO__FILTER_MATCH = TEP_ERRNO__SUCCESS,
+
+ /*
+ * Choose an arbitrary negative big number not to clash with standard
+ * errno since SUS requires the errno has distinct positive values.
+ * See 'Issue 6' in the link below.
+ *
+ * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+ */
+ __TEP_ERRNO__START = -100000,
+
+ TEP_ERRORS,
+
+ __TEP_ERRNO__END,
+};
+#undef _PE
+
+struct plugin_list;
+
+#define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1))
+
+struct plugin_list *tep_load_plugins(struct tep_handle *pevent);
+void tep_unload_plugins(struct plugin_list *plugin_list,
+ struct tep_handle *pevent);
+char **tep_plugin_list_options(void);
+void tep_plugin_free_options_list(char **list);
+int tep_plugin_add_options(const char *name,
+ struct tep_plugin_option *options);
+void tep_plugin_remove_options(struct tep_plugin_option *options);
+void tep_print_plugins(struct trace_seq *s,
+ const char *prefix, const char *suffix,
+ const struct plugin_list *list);
+
+struct cmdline;
+struct cmdline_list;
+struct func_map;
+struct func_list;
+struct event_handler;
+struct func_resolver;
+
+typedef char *(tep_func_resolver_t)(void *priv,
+ unsigned long long *addrp, char **modp);
+
+struct tep_handle {
+ int ref_count;
+
+ int header_page_ts_offset;
+ int header_page_ts_size;
+ int header_page_size_offset;
+ int header_page_size_size;
+ int header_page_data_offset;
+ int header_page_data_size;
+ int header_page_overwrite;
+
+ int file_bigendian;
+ int host_bigendian;
+
+ int latency_format;
+
+ int old_format;
+
+ int cpus;
+ int long_size;
+ int page_size;
+
+ struct cmdline *cmdlines;
+ struct cmdline_list *cmdlist;
+ int cmdline_count;
+
+ struct func_map *func_map;
+ struct func_resolver *func_resolver;
+ struct func_list *funclist;
+ unsigned int func_count;
+
+ struct printk_map *printk_map;
+ struct printk_list *printklist;
+ unsigned int printk_count;
+
+
+ struct event_format **events;
+ int nr_events;
+ struct event_format **sort_events;
+ enum event_sort_type last_type;
+
+ int type_offset;
+ int type_size;
+
+ int pid_offset;
+ int pid_size;
+
+ int pc_offset;
+ int pc_size;
+
+ int flags_offset;
+ int flags_size;
+
+ int ld_offset;
+ int ld_size;
+
+ int print_raw;
+
+ int test_filters;
+
+ int flags;
+
+ struct format_field *bprint_ip_field;
+ struct format_field *bprint_fmt_field;
+ struct format_field *bprint_buf_field;
+
+ struct event_handler *handlers;
+ struct tep_function_handler *func_handlers;
+
+ /* cache */
+ struct event_format *last_event;
+
+ char *trace_clock;
+};
+
+static inline void tep_set_flag(struct tep_handle *pevent, int flag)
+{
+ pevent->flags |= flag;
+}
+
+static inline unsigned short
+__data2host2(struct tep_handle *pevent, unsigned short data)
+{
+ unsigned short swap;
+
+ if (pevent->host_bigendian == pevent->file_bigendian)
+ return data;
+
+ swap = ((data & 0xffULL) << 8) |
+ ((data & (0xffULL << 8)) >> 8);
+
+ return swap;
+}
+
+static inline unsigned int
+__data2host4(struct tep_handle *pevent, unsigned int data)
+{
+ unsigned int swap;
+
+ if (pevent->host_bigendian == pevent->file_bigendian)
+ return data;
+
+ swap = ((data & 0xffULL) << 24) |
+ ((data & (0xffULL << 8)) << 8) |
+ ((data & (0xffULL << 16)) >> 8) |
+ ((data & (0xffULL << 24)) >> 24);
+
+ return swap;
+}
+
+static inline unsigned long long
+__data2host8(struct tep_handle *pevent, unsigned long long data)
+{
+ unsigned long long swap;
+
+ if (pevent->host_bigendian == pevent->file_bigendian)
+ return data;
+
+ swap = ((data & 0xffULL) << 56) |
+ ((data & (0xffULL << 8)) << 40) |
+ ((data & (0xffULL << 16)) << 24) |
+ ((data & (0xffULL << 24)) << 8) |
+ ((data & (0xffULL << 32)) >> 8) |
+ ((data & (0xffULL << 40)) >> 24) |
+ ((data & (0xffULL << 48)) >> 40) |
+ ((data & (0xffULL << 56)) >> 56);
+
+ return swap;
+}
+
+#define data2host2(pevent, ptr) __data2host2(pevent, *(unsigned short *)(ptr))
+#define data2host4(pevent, ptr) __data2host4(pevent, *(unsigned int *)(ptr))
+#define data2host8(pevent, ptr) \
+({ \
+ unsigned long long __val; \
+ \
+ memcpy(&__val, (ptr), sizeof(unsigned long long)); \
+ __data2host8(pevent, __val); \
+})
+
+static inline int tep_host_bigendian(void)
+{
+ unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
+ unsigned int val;
+
+ memcpy(&val, str, 4);
+ return val == 0x01020304;
+}
+
+/* taken from kernel/trace/trace.h */
+enum trace_flag_type {
+ TRACE_FLAG_IRQS_OFF = 0x01,
+ TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
+ TRACE_FLAG_NEED_RESCHED = 0x04,
+ TRACE_FLAG_HARDIRQ = 0x08,
+ TRACE_FLAG_SOFTIRQ = 0x10,
+};
+
+int tep_set_function_resolver(struct tep_handle *pevent,
+ tep_func_resolver_t *func, void *priv);
+void tep_reset_function_resolver(struct tep_handle *pevent);
+int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid);
+int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock);
+int tep_register_function(struct tep_handle *pevent, char *name,
+ unsigned long long addr, char *mod);
+int tep_register_print_string(struct tep_handle *pevent, const char *fmt,
+ unsigned long long addr);
+int tep_pid_is_registered(struct tep_handle *pevent, int pid);
+
+void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s,
+ struct event_format *event,
+ struct tep_record *record);
+void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s,
+ struct event_format *event,
+ struct tep_record *record,
+ bool use_trace_clock);
+void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s,
+ struct event_format *event,
+ struct tep_record *record);
+void tep_print_event(struct tep_handle *pevent, struct trace_seq *s,
+ struct tep_record *record, bool use_trace_clock);
+
+int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long size,
+ int long_size);
+
+enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf,
+ unsigned long size, const char *sys);
+enum tep_errno tep_parse_format(struct tep_handle *pevent,
+ struct event_format **eventp,
+ const char *buf,
+ unsigned long size, const char *sys);
+void tep_free_format(struct event_format *event);
+void tep_free_format_field(struct format_field *field);
+
+void *tep_get_field_raw(struct trace_seq *s, struct event_format *event,
+ const char *name, struct tep_record *record,
+ int *len, int err);
+
+int tep_get_field_val(struct trace_seq *s, struct event_format *event,
+ const char *name, struct tep_record *record,
+ unsigned long long *val, int err);
+int tep_get_common_field_val(struct trace_seq *s, struct event_format *event,
+ const char *name, struct tep_record *record,
+ unsigned long long *val, int err);
+int tep_get_any_field_val(struct trace_seq *s, struct event_format *event,
+ const char *name, struct tep_record *record,
+ unsigned long long *val, int err);
+
+int tep_print_num_field(struct trace_seq *s, const char *fmt,
+ struct event_format *event, const char *name,
+ struct tep_record *record, int err);
+
+int tep_print_func_field(struct trace_seq *s, const char *fmt,
+ struct event_format *event, const char *name,
+ struct tep_record *record, int err);
+
+int tep_register_event_handler(struct tep_handle *pevent, int id,
+ const char *sys_name, const char *event_name,
+ tep_event_handler_func func, void *context);
+int tep_unregister_event_handler(struct tep_handle *pevent, int id,
+ const char *sys_name, const char *event_name,
+ tep_event_handler_func func, void *context);
+int tep_register_print_function(struct tep_handle *pevent,
+ tep_func_handler func,
+ enum tep_func_arg_type ret_type,
+ char *name, ...);
+int tep_unregister_print_function(struct tep_handle *pevent,
+ tep_func_handler func, char *name);
+
+struct format_field *tep_find_common_field(struct event_format *event, const char *name);
+struct format_field *tep_find_field(struct event_format *event, const char *name);
+struct format_field *tep_find_any_field(struct event_format *event, const char *name);
+
+const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr);
+unsigned long long
+tep_find_function_address(struct tep_handle *pevent, unsigned long long addr);
+unsigned long long tep_read_number(struct tep_handle *pevent, const void *ptr, int size);
+int tep_read_number_field(struct format_field *field, const void *data,
+ unsigned long long *value);
+
+struct event_format *tep_find_event(struct tep_handle *pevent, int id);
+
+struct event_format *
+tep_find_event_by_name(struct tep_handle *pevent, const char *sys, const char *name);
+
+struct event_format *
+tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record);
+
+void tep_data_lat_fmt(struct tep_handle *pevent,
+ struct trace_seq *s, struct tep_record *record);
+int tep_data_type(struct tep_handle *pevent, struct tep_record *rec);
+struct event_format *tep_data_event_from_type(struct tep_handle *pevent, int type);
+int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec);
+int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec);
+int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec);
+const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid);
+struct cmdline;
+struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm,
+ struct cmdline *next);
+int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline);
+
+void tep_print_field(struct trace_seq *s, void *data,
+ struct format_field *field);
+void tep_print_fields(struct trace_seq *s, void *data,
+ int size __maybe_unused, struct event_format *event);
+void tep_event_info(struct trace_seq *s, struct event_format *event,
+ struct tep_record *record);
+int tep_strerror(struct tep_handle *pevent, enum tep_errno errnum,
+ char *buf, size_t buflen);
+
+struct event_format **tep_list_events(struct tep_handle *pevent, enum event_sort_type);
+struct format_field **tep_event_common_fields(struct event_format *event);
+struct format_field **tep_event_fields(struct event_format *event);
+
+static inline int tep_get_cpus(struct tep_handle *pevent)
+{
+ return pevent->cpus;
+}
+
+static inline void tep_set_cpus(struct tep_handle *pevent, int cpus)
+{
+ pevent->cpus = cpus;
+}
+
+static inline int tep_get_long_size(struct tep_handle *pevent)
+{
+ return pevent->long_size;
+}
+
+static inline void tep_set_long_size(struct tep_handle *pevent, int long_size)
+{
+ pevent->long_size = long_size;
+}
+
+static inline int tep_get_page_size(struct tep_handle *pevent)
+{
+ return pevent->page_size;
+}
+
+static inline void tep_set_page_size(struct tep_handle *pevent, int _page_size)
+{
+ pevent->page_size = _page_size;
+}
+
+static inline int tep_is_file_bigendian(struct tep_handle *pevent)
+{
+ return pevent->file_bigendian;
+}
+
+static inline void tep_set_file_bigendian(struct tep_handle *pevent, int endian)
+{
+ pevent->file_bigendian = endian;
+}
+
+static inline int tep_is_host_bigendian(struct tep_handle *pevent)
+{
+ return pevent->host_bigendian;
+}
+
+static inline void tep_set_host_bigendian(struct tep_handle *pevent, int endian)
+{
+ pevent->host_bigendian = endian;
+}
+
+static inline int tep_is_latency_format(struct tep_handle *pevent)
+{
+ return pevent->latency_format;
+}
+
+static inline void tep_set_latency_format(struct tep_handle *pevent, int lat)
+{
+ pevent->latency_format = lat;
+}
+
+struct tep_handle *tep_alloc(void);
+void tep_free(struct tep_handle *pevent);
+void tep_ref(struct tep_handle *pevent);
+void tep_unref(struct tep_handle *pevent);
+
+/* access to the internal parser */
+void tep_buffer_init(const char *buf, unsigned long long size);
+enum event_type tep_read_token(char **tok);
+void tep_free_token(char *token);
+int tep_peek_char(void);
+const char *tep_get_input_buf(void);
+unsigned long long tep_get_input_buf_ptr(void);
+
+/* for debugging */
+void tep_print_funcs(struct tep_handle *pevent);
+void tep_print_printk(struct tep_handle *pevent);
+
+/* ----------------------- filtering ----------------------- */
+
+enum filter_boolean_type {
+ FILTER_FALSE,
+ FILTER_TRUE,
+};
+
+enum filter_op_type {
+ FILTER_OP_AND = 1,
+ FILTER_OP_OR,
+ FILTER_OP_NOT,
+};
+
+enum filter_cmp_type {
+ FILTER_CMP_NONE,
+ FILTER_CMP_EQ,
+ FILTER_CMP_NE,
+ FILTER_CMP_GT,
+ FILTER_CMP_LT,
+ FILTER_CMP_GE,
+ FILTER_CMP_LE,
+ FILTER_CMP_MATCH,
+ FILTER_CMP_NOT_MATCH,
+ FILTER_CMP_REGEX,
+ FILTER_CMP_NOT_REGEX,
+};
+
+enum filter_exp_type {
+ FILTER_EXP_NONE,
+ FILTER_EXP_ADD,
+ FILTER_EXP_SUB,
+ FILTER_EXP_MUL,
+ FILTER_EXP_DIV,
+ FILTER_EXP_MOD,
+ FILTER_EXP_RSHIFT,
+ FILTER_EXP_LSHIFT,
+ FILTER_EXP_AND,
+ FILTER_EXP_OR,
+ FILTER_EXP_XOR,
+ FILTER_EXP_NOT,
+};
+
+enum filter_arg_type {
+ FILTER_ARG_NONE,
+ FILTER_ARG_BOOLEAN,
+ FILTER_ARG_VALUE,
+ FILTER_ARG_FIELD,
+ FILTER_ARG_EXP,
+ FILTER_ARG_OP,
+ FILTER_ARG_NUM,
+ FILTER_ARG_STR,
+};
+
+enum filter_value_type {
+ FILTER_NUMBER,
+ FILTER_STRING,
+ FILTER_CHAR
+};
+
+struct fliter_arg;
+
+struct filter_arg_boolean {
+ enum filter_boolean_type value;
+};
+
+struct filter_arg_field {
+ struct format_field *field;
+};
+
+struct filter_arg_value {
+ enum filter_value_type type;
+ union {
+ char *str;
+ unsigned long long val;
+ };
+};
+
+struct filter_arg_op {
+ enum filter_op_type type;
+ struct filter_arg *left;
+ struct filter_arg *right;
+};
+
+struct filter_arg_exp {
+ enum filter_exp_type type;
+ struct filter_arg *left;
+ struct filter_arg *right;
+};
+
+struct filter_arg_num {
+ enum filter_cmp_type type;
+ struct filter_arg *left;
+ struct filter_arg *right;
+};
+
+struct filter_arg_str {
+ enum filter_cmp_type type;
+ struct format_field *field;
+ char *val;
+ char *buffer;
+ regex_t reg;
+};
+
+struct filter_arg {
+ enum filter_arg_type type;
+ union {
+ struct filter_arg_boolean boolean;
+ struct filter_arg_field field;
+ struct filter_arg_value value;
+ struct filter_arg_op op;
+ struct filter_arg_exp exp;
+ struct filter_arg_num num;
+ struct filter_arg_str str;
+ };
+};
+
+struct filter_type {
+ int event_id;
+ struct event_format *event;
+ struct filter_arg *filter;
+};
+
+#define TEP_FILTER_ERROR_BUFSZ 1024
+
+struct event_filter {
+ struct tep_handle *pevent;
+ int filters;
+ struct filter_type *event_filters;
+ char error_buffer[TEP_FILTER_ERROR_BUFSZ];
+};
+
+struct event_filter *tep_filter_alloc(struct tep_handle *pevent);
+
+/* for backward compatibility */
+#define FILTER_NONE TEP_ERRNO__NO_FILTER
+#define FILTER_NOEXIST TEP_ERRNO__FILTER_NOT_FOUND
+#define FILTER_MISS TEP_ERRNO__FILTER_MISS
+#define FILTER_MATCH TEP_ERRNO__FILTER_MATCH
+
+enum filter_trivial_type {
+ FILTER_TRIVIAL_FALSE,
+ FILTER_TRIVIAL_TRUE,
+ FILTER_TRIVIAL_BOTH,
+};
+
+enum tep_errno tep_filter_add_filter_str(struct event_filter *filter,
+ const char *filter_str);
+
+enum tep_errno tep_filter_match(struct event_filter *filter,
+ struct tep_record *record);
+
+int tep_filter_strerror(struct event_filter *filter, enum tep_errno err,
+ char *buf, size_t buflen);
+
+int tep_event_filtered(struct event_filter *filter,
+ int event_id);
+
+void tep_filter_reset(struct event_filter *filter);
+
+int tep_filter_clear_trivial(struct event_filter *filter,
+ enum filter_trivial_type type);
+
+void tep_filter_free(struct event_filter *filter);
+
+char *tep_filter_make_string(struct event_filter *filter, int event_id);
+
+int tep_filter_remove_event(struct event_filter *filter,
+ int event_id);
+
+int tep_filter_event_has_trivial(struct event_filter *filter,
+ int event_id,
+ enum filter_trivial_type type);
+
+int tep_filter_copy(struct event_filter *dest, struct event_filter *source);
+
+int tep_update_trivial(struct event_filter *dest, struct event_filter *source,
+ enum filter_trivial_type type);
+
+int tep_filter_compare(struct event_filter *filter1, struct event_filter *filter2);
+
+#endif /* _PARSE_EVENTS_H */
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
new file mode 100644
index 000000000..52874eb94
--- /dev/null
+++ b/tools/lib/traceevent/event-plugin.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <dirent.h>
+#include "event-parse.h"
+#include "event-utils.h"
+
+#define LOCAL_PLUGIN_DIR ".local/lib/traceevent/plugins/"
+
+static struct registered_plugin_options {
+ struct registered_plugin_options *next;
+ struct tep_plugin_option *options;
+} *registered_options;
+
+static struct trace_plugin_options {
+ struct trace_plugin_options *next;
+ char *plugin;
+ char *option;
+ char *value;
+} *trace_plugin_options;
+
+struct plugin_list {
+ struct plugin_list *next;
+ char *name;
+ void *handle;
+};
+
+static void lower_case(char *str)
+{
+ if (!str)
+ return;
+ for (; *str; str++)
+ *str = tolower(*str);
+}
+
+static int update_option_value(struct tep_plugin_option *op, const char *val)
+{
+ char *op_val;
+
+ if (!val) {
+ /* toggle, only if option is boolean */
+ if (op->value)
+ /* Warn? */
+ return 0;
+ op->set ^= 1;
+ return 0;
+ }
+
+ /*
+ * If the option has a value then it takes a string
+ * otherwise the option is a boolean.
+ */
+ if (op->value) {
+ op->value = val;
+ return 0;
+ }
+
+ /* Option is boolean, must be either "1", "0", "true" or "false" */
+
+ op_val = strdup(val);
+ if (!op_val)
+ return -1;
+ lower_case(op_val);
+
+ if (strcmp(val, "1") == 0 || strcmp(val, "true") == 0)
+ op->set = 1;
+ else if (strcmp(val, "0") == 0 || strcmp(val, "false") == 0)
+ op->set = 0;
+ free(op_val);
+
+ return 0;
+}
+
+/**
+ * tep_plugin_list_options - get list of plugin options
+ *
+ * Returns an array of char strings that list the currently registered
+ * plugin options in the format of <plugin>:<option>. This list can be
+ * used by toggling the option.
+ *
+ * Returns NULL if there's no options registered. On error it returns
+ * INVALID_PLUGIN_LIST_OPTION
+ *
+ * Must be freed with tep_plugin_free_options_list().
+ */
+char **tep_plugin_list_options(void)
+{
+ struct registered_plugin_options *reg;
+ struct tep_plugin_option *op;
+ char **list = NULL;
+ char *name;
+ int count = 0;
+
+ for (reg = registered_options; reg; reg = reg->next) {
+ for (op = reg->options; op->name; op++) {
+ char *alias = op->plugin_alias ? op->plugin_alias : op->file;
+ char **temp = list;
+ int ret;
+
+ ret = asprintf(&name, "%s:%s", alias, op->name);
+ if (ret < 0)
+ goto err;
+
+ list = realloc(list, count + 2);
+ if (!list) {
+ list = temp;
+ free(name);
+ goto err;
+ }
+ list[count++] = name;
+ list[count] = NULL;
+ }
+ }
+ return list;
+
+ err:
+ while (--count >= 0)
+ free(list[count]);
+ free(list);
+
+ return INVALID_PLUGIN_LIST_OPTION;
+}
+
+void tep_plugin_free_options_list(char **list)
+{
+ int i;
+
+ if (!list)
+ return;
+
+ if (list == INVALID_PLUGIN_LIST_OPTION)
+ return;
+
+ for (i = 0; list[i]; i++)
+ free(list[i]);
+
+ free(list);
+}
+
+static int
+update_option(const char *file, struct tep_plugin_option *option)
+{
+ struct trace_plugin_options *op;
+ char *plugin;
+ int ret = 0;
+
+ if (option->plugin_alias) {
+ plugin = strdup(option->plugin_alias);
+ if (!plugin)
+ return -1;
+ } else {
+ char *p;
+ plugin = strdup(file);
+ if (!plugin)
+ return -1;
+ p = strstr(plugin, ".");
+ if (p)
+ *p = '\0';
+ }
+
+ /* first look for named options */
+ for (op = trace_plugin_options; op; op = op->next) {
+ if (!op->plugin)
+ continue;
+ if (strcmp(op->plugin, plugin) != 0)
+ continue;
+ if (strcmp(op->option, option->name) != 0)
+ continue;
+
+ ret = update_option_value(option, op->value);
+ if (ret)
+ goto out;
+ break;
+ }
+
+ /* first look for unnamed options */
+ for (op = trace_plugin_options; op; op = op->next) {
+ if (op->plugin)
+ continue;
+ if (strcmp(op->option, option->name) != 0)
+ continue;
+
+ ret = update_option_value(option, op->value);
+ break;
+ }
+
+ out:
+ free(plugin);
+ return ret;
+}
+
+/**
+ * tep_plugin_add_options - Add a set of options by a plugin
+ * @name: The name of the plugin adding the options
+ * @options: The set of options being loaded
+ *
+ * Sets the options with the values that have been added by user.
+ */
+int tep_plugin_add_options(const char *name,
+ struct tep_plugin_option *options)
+{
+ struct registered_plugin_options *reg;
+
+ reg = malloc(sizeof(*reg));
+ if (!reg)
+ return -1;
+ reg->next = registered_options;
+ reg->options = options;
+ registered_options = reg;
+
+ while (options->name) {
+ update_option(name, options);
+ options++;
+ }
+ return 0;
+}
+
+/**
+ * tep_plugin_remove_options - remove plugin options that were registered
+ * @options: Options to removed that were registered with tep_plugin_add_options
+ */
+void tep_plugin_remove_options(struct tep_plugin_option *options)
+{
+ struct registered_plugin_options **last;
+ struct registered_plugin_options *reg;
+
+ for (last = &registered_options; *last; last = &(*last)->next) {
+ if ((*last)->options == options) {
+ reg = *last;
+ *last = reg->next;
+ free(reg);
+ return;
+ }
+ }
+}
+
+/**
+ * tep_print_plugins - print out the list of plugins loaded
+ * @s: the trace_seq descripter to write to
+ * @prefix: The prefix string to add before listing the option name
+ * @suffix: The suffix string ot append after the option name
+ * @list: The list of plugins (usually returned by tep_load_plugins()
+ *
+ * Writes to the trace_seq @s the list of plugins (files) that is
+ * returned by tep_load_plugins(). Use @prefix and @suffix for formating:
+ * @prefix = " ", @suffix = "\n".
+ */
+void tep_print_plugins(struct trace_seq *s,
+ const char *prefix, const char *suffix,
+ const struct plugin_list *list)
+{
+ while (list) {
+ trace_seq_printf(s, "%s%s%s", prefix, list->name, suffix);
+ list = list->next;
+ }
+}
+
+static void
+load_plugin(struct tep_handle *pevent, const char *path,
+ const char *file, void *data)
+{
+ struct plugin_list **plugin_list = data;
+ tep_plugin_load_func func;
+ struct plugin_list *list;
+ const char *alias;
+ char *plugin;
+ void *handle;
+ int ret;
+
+ ret = asprintf(&plugin, "%s/%s", path, file);
+ if (ret < 0) {
+ warning("could not allocate plugin memory\n");
+ return;
+ }
+
+ handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL);
+ if (!handle) {
+ warning("could not load plugin '%s'\n%s\n",
+ plugin, dlerror());
+ goto out_free;
+ }
+
+ alias = dlsym(handle, TEP_PLUGIN_ALIAS_NAME);
+ if (!alias)
+ alias = file;
+
+ func = dlsym(handle, TEP_PLUGIN_LOADER_NAME);
+ if (!func) {
+ warning("could not find func '%s' in plugin '%s'\n%s\n",
+ TEP_PLUGIN_LOADER_NAME, plugin, dlerror());
+ goto out_free;
+ }
+
+ list = malloc(sizeof(*list));
+ if (!list) {
+ warning("could not allocate plugin memory\n");
+ goto out_free;
+ }
+
+ list->next = *plugin_list;
+ list->handle = handle;
+ list->name = plugin;
+ *plugin_list = list;
+
+ pr_stat("registering plugin: %s", plugin);
+ func(pevent);
+ return;
+
+ out_free:
+ free(plugin);
+}
+
+static void
+load_plugins_dir(struct tep_handle *pevent, const char *suffix,
+ const char *path,
+ void (*load_plugin)(struct tep_handle *pevent,
+ const char *path,
+ const char *name,
+ void *data),
+ void *data)
+{
+ struct dirent *dent;
+ struct stat st;
+ DIR *dir;
+ int ret;
+
+ ret = stat(path, &st);
+ if (ret < 0)
+ return;
+
+ if (!S_ISDIR(st.st_mode))
+ return;
+
+ dir = opendir(path);
+ if (!dir)
+ return;
+
+ while ((dent = readdir(dir))) {
+ const char *name = dent->d_name;
+
+ if (strcmp(name, ".") == 0 ||
+ strcmp(name, "..") == 0)
+ continue;
+
+ /* Only load plugins that end in suffix */
+ if (strcmp(name + (strlen(name) - strlen(suffix)), suffix) != 0)
+ continue;
+
+ load_plugin(pevent, path, name, data);
+ }
+
+ closedir(dir);
+}
+
+static void
+load_plugins(struct tep_handle *pevent, const char *suffix,
+ void (*load_plugin)(struct tep_handle *pevent,
+ const char *path,
+ const char *name,
+ void *data),
+ void *data)
+{
+ char *home;
+ char *path;
+ char *envdir;
+ int ret;
+
+ if (pevent->flags & TEP_DISABLE_PLUGINS)
+ return;
+
+ /*
+ * If a system plugin directory was defined,
+ * check that first.
+ */
+#ifdef PLUGIN_DIR
+ if (!(pevent->flags & TEP_DISABLE_SYS_PLUGINS))
+ load_plugins_dir(pevent, suffix, PLUGIN_DIR,
+ load_plugin, data);
+#endif
+
+ /*
+ * Next let the environment-set plugin directory
+ * override the system defaults.
+ */
+ envdir = getenv("TRACEEVENT_PLUGIN_DIR");
+ if (envdir)
+ load_plugins_dir(pevent, suffix, envdir, load_plugin, data);
+
+ /*
+ * Now let the home directory override the environment
+ * or system defaults.
+ */
+ home = getenv("HOME");
+ if (!home)
+ return;
+
+ ret = asprintf(&path, "%s/%s", home, LOCAL_PLUGIN_DIR);
+ if (ret < 0) {
+ warning("could not allocate plugin memory\n");
+ return;
+ }
+
+ load_plugins_dir(pevent, suffix, path, load_plugin, data);
+
+ free(path);
+}
+
+struct plugin_list*
+tep_load_plugins(struct tep_handle *pevent)
+{
+ struct plugin_list *list = NULL;
+
+ load_plugins(pevent, ".so", load_plugin, &list);
+ return list;
+}
+
+void
+tep_unload_plugins(struct plugin_list *plugin_list, struct tep_handle *pevent)
+{
+ tep_plugin_unload_func func;
+ struct plugin_list *list;
+
+ while (plugin_list) {
+ list = plugin_list;
+ plugin_list = list->next;
+ func = dlsym(list->handle, TEP_PLUGIN_UNLOADER_NAME);
+ if (func)
+ func(pevent);
+ dlclose(list->handle);
+ free(list->name);
+ free(list);
+ }
+}
diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h
new file mode 100644
index 000000000..0560b96a3
--- /dev/null
+++ b/tools/lib/traceevent/event-utils.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#ifndef __UTIL_H
+#define __UTIL_H
+
+#include <ctype.h>
+
+/* Can be overridden */
+void warning(const char *fmt, ...);
+void pr_stat(const char *fmt, ...);
+void vpr_stat(const char *fmt, va_list ap);
+
+/* Always available */
+void __warning(const char *fmt, ...);
+void __pr_stat(const char *fmt, ...);
+
+void __vwarning(const char *fmt, ...);
+void __vpr_stat(const char *fmt, ...);
+
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+
+static inline char *strim(char *string)
+{
+ char *ret;
+
+ if (!string)
+ return NULL;
+ while (*string) {
+ if (!isspace(*string))
+ break;
+ string++;
+ }
+ ret = string;
+
+ string = ret + strlen(ret) - 1;
+ while (string > ret) {
+ if (!isspace(*string))
+ break;
+ string--;
+ }
+ string[1] = 0;
+
+ return ret;
+}
+
+static inline int has_text(const char *text)
+{
+ if (!text)
+ return 0;
+
+ while (*text) {
+ if (!isspace(*text))
+ return 1;
+ text++;
+ }
+
+ return 0;
+}
+
+#endif
diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
new file mode 100644
index 000000000..af2a1f3b7
--- /dev/null
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -0,0 +1,729 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "kbuffer.h"
+
+#define MISSING_EVENTS (1UL << 31)
+#define MISSING_STORED (1UL << 30)
+
+#define COMMIT_MASK ((1 << 27) - 1)
+
+enum {
+ KBUFFER_FL_HOST_BIG_ENDIAN = (1<<0),
+ KBUFFER_FL_BIG_ENDIAN = (1<<1),
+ KBUFFER_FL_LONG_8 = (1<<2),
+ KBUFFER_FL_OLD_FORMAT = (1<<3),
+};
+
+#define ENDIAN_MASK (KBUFFER_FL_HOST_BIG_ENDIAN | KBUFFER_FL_BIG_ENDIAN)
+
+/** kbuffer
+ * @timestamp - timestamp of current event
+ * @lost_events - # of lost events between this subbuffer and previous
+ * @flags - special flags of the kbuffer
+ * @subbuffer - pointer to the sub-buffer page
+ * @data - pointer to the start of data on the sub-buffer page
+ * @index - index from @data to the @curr event data
+ * @curr - offset from @data to the start of current event
+ * (includes metadata)
+ * @next - offset from @data to the start of next event
+ * @size - The size of data on @data
+ * @start - The offset from @subbuffer where @data lives
+ *
+ * @read_4 - Function to read 4 raw bytes (may swap)
+ * @read_8 - Function to read 8 raw bytes (may swap)
+ * @read_long - Function to read a long word (4 or 8 bytes with needed swap)
+ */
+struct kbuffer {
+ unsigned long long timestamp;
+ long long lost_events;
+ unsigned long flags;
+ void *subbuffer;
+ void *data;
+ unsigned int index;
+ unsigned int curr;
+ unsigned int next;
+ unsigned int size;
+ unsigned int start;
+
+ unsigned int (*read_4)(void *ptr);
+ unsigned long long (*read_8)(void *ptr);
+ unsigned long long (*read_long)(struct kbuffer *kbuf, void *ptr);
+ int (*next_event)(struct kbuffer *kbuf);
+};
+
+static void *zmalloc(size_t size)
+{
+ return calloc(1, size);
+}
+
+static int host_is_bigendian(void)
+{
+ unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
+ unsigned int *ptr;
+
+ ptr = (unsigned int *)str;
+ return *ptr == 0x01020304;
+}
+
+static int do_swap(struct kbuffer *kbuf)
+{
+ return ((kbuf->flags & KBUFFER_FL_HOST_BIG_ENDIAN) + kbuf->flags) &
+ ENDIAN_MASK;
+}
+
+static unsigned long long __read_8(void *ptr)
+{
+ unsigned long long data = *(unsigned long long *)ptr;
+
+ return data;
+}
+
+static unsigned long long __read_8_sw(void *ptr)
+{
+ unsigned long long data = *(unsigned long long *)ptr;
+ unsigned long long swap;
+
+ swap = ((data & 0xffULL) << 56) |
+ ((data & (0xffULL << 8)) << 40) |
+ ((data & (0xffULL << 16)) << 24) |
+ ((data & (0xffULL << 24)) << 8) |
+ ((data & (0xffULL << 32)) >> 8) |
+ ((data & (0xffULL << 40)) >> 24) |
+ ((data & (0xffULL << 48)) >> 40) |
+ ((data & (0xffULL << 56)) >> 56);
+
+ return swap;
+}
+
+static unsigned int __read_4(void *ptr)
+{
+ unsigned int data = *(unsigned int *)ptr;
+
+ return data;
+}
+
+static unsigned int __read_4_sw(void *ptr)
+{
+ unsigned int data = *(unsigned int *)ptr;
+ unsigned int swap;
+
+ swap = ((data & 0xffULL) << 24) |
+ ((data & (0xffULL << 8)) << 8) |
+ ((data & (0xffULL << 16)) >> 8) |
+ ((data & (0xffULL << 24)) >> 24);
+
+ return swap;
+}
+
+static unsigned long long read_8(struct kbuffer *kbuf, void *ptr)
+{
+ return kbuf->read_8(ptr);
+}
+
+static unsigned int read_4(struct kbuffer *kbuf, void *ptr)
+{
+ return kbuf->read_4(ptr);
+}
+
+static unsigned long long __read_long_8(struct kbuffer *kbuf, void *ptr)
+{
+ return kbuf->read_8(ptr);
+}
+
+static unsigned long long __read_long_4(struct kbuffer *kbuf, void *ptr)
+{
+ return kbuf->read_4(ptr);
+}
+
+static unsigned long long read_long(struct kbuffer *kbuf, void *ptr)
+{
+ return kbuf->read_long(kbuf, ptr);
+}
+
+static int calc_index(struct kbuffer *kbuf, void *ptr)
+{
+ return (unsigned long)ptr - (unsigned long)kbuf->data;
+}
+
+static int __next_event(struct kbuffer *kbuf);
+
+/**
+ * kbuffer_alloc - allocat a new kbuffer
+ * @size; enum to denote size of word
+ * @endian: enum to denote endianness
+ *
+ * Allocates and returns a new kbuffer.
+ */
+struct kbuffer *
+kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian)
+{
+ struct kbuffer *kbuf;
+ int flags = 0;
+
+ switch (size) {
+ case KBUFFER_LSIZE_4:
+ break;
+ case KBUFFER_LSIZE_8:
+ flags |= KBUFFER_FL_LONG_8;
+ break;
+ default:
+ return NULL;
+ }
+
+ switch (endian) {
+ case KBUFFER_ENDIAN_LITTLE:
+ break;
+ case KBUFFER_ENDIAN_BIG:
+ flags |= KBUFFER_FL_BIG_ENDIAN;
+ break;
+ default:
+ return NULL;
+ }
+
+ kbuf = zmalloc(sizeof(*kbuf));
+ if (!kbuf)
+ return NULL;
+
+ kbuf->flags = flags;
+
+ if (host_is_bigendian())
+ kbuf->flags |= KBUFFER_FL_HOST_BIG_ENDIAN;
+
+ if (do_swap(kbuf)) {
+ kbuf->read_8 = __read_8_sw;
+ kbuf->read_4 = __read_4_sw;
+ } else {
+ kbuf->read_8 = __read_8;
+ kbuf->read_4 = __read_4;
+ }
+
+ if (kbuf->flags & KBUFFER_FL_LONG_8)
+ kbuf->read_long = __read_long_8;
+ else
+ kbuf->read_long = __read_long_4;
+
+ /* May be changed by kbuffer_set_old_format() */
+ kbuf->next_event = __next_event;
+
+ return kbuf;
+}
+
+/** kbuffer_free - free an allocated kbuffer
+ * @kbuf: The kbuffer to free
+ *
+ * Can take NULL as a parameter.
+ */
+void kbuffer_free(struct kbuffer *kbuf)
+{
+ free(kbuf);
+}
+
+static unsigned int type4host(struct kbuffer *kbuf,
+ unsigned int type_len_ts)
+{
+ if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+ return (type_len_ts >> 29) & 3;
+ else
+ return type_len_ts & 3;
+}
+
+static unsigned int len4host(struct kbuffer *kbuf,
+ unsigned int type_len_ts)
+{
+ if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+ return (type_len_ts >> 27) & 7;
+ else
+ return (type_len_ts >> 2) & 7;
+}
+
+static unsigned int type_len4host(struct kbuffer *kbuf,
+ unsigned int type_len_ts)
+{
+ if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+ return (type_len_ts >> 27) & ((1 << 5) - 1);
+ else
+ return type_len_ts & ((1 << 5) - 1);
+}
+
+static unsigned int ts4host(struct kbuffer *kbuf,
+ unsigned int type_len_ts)
+{
+ if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+ return type_len_ts & ((1 << 27) - 1);
+ else
+ return type_len_ts >> 5;
+}
+
+/*
+ * Linux 2.6.30 and earlier (not much ealier) had a different
+ * ring buffer format. It should be obsolete, but we handle it anyway.
+ */
+enum old_ring_buffer_type {
+ OLD_RINGBUF_TYPE_PADDING,
+ OLD_RINGBUF_TYPE_TIME_EXTEND,
+ OLD_RINGBUF_TYPE_TIME_STAMP,
+ OLD_RINGBUF_TYPE_DATA,
+};
+
+static unsigned int old_update_pointers(struct kbuffer *kbuf)
+{
+ unsigned long long extend;
+ unsigned int type_len_ts;
+ unsigned int type;
+ unsigned int len;
+ unsigned int delta;
+ unsigned int length;
+ void *ptr = kbuf->data + kbuf->curr;
+
+ type_len_ts = read_4(kbuf, ptr);
+ ptr += 4;
+
+ type = type4host(kbuf, type_len_ts);
+ len = len4host(kbuf, type_len_ts);
+ delta = ts4host(kbuf, type_len_ts);
+
+ switch (type) {
+ case OLD_RINGBUF_TYPE_PADDING:
+ kbuf->next = kbuf->size;
+ return 0;
+
+ case OLD_RINGBUF_TYPE_TIME_EXTEND:
+ extend = read_4(kbuf, ptr);
+ extend <<= TS_SHIFT;
+ extend += delta;
+ delta = extend;
+ ptr += 4;
+ length = 0;
+ break;
+
+ case OLD_RINGBUF_TYPE_TIME_STAMP:
+ /* should never happen! */
+ kbuf->curr = kbuf->size;
+ kbuf->next = kbuf->size;
+ kbuf->index = kbuf->size;
+ return -1;
+ default:
+ if (len)
+ length = len * 4;
+ else {
+ length = read_4(kbuf, ptr);
+ length -= 4;
+ ptr += 4;
+ }
+ break;
+ }
+
+ kbuf->timestamp += delta;
+ kbuf->index = calc_index(kbuf, ptr);
+ kbuf->next = kbuf->index + length;
+
+ return type;
+}
+
+static int __old_next_event(struct kbuffer *kbuf)
+{
+ int type;
+
+ do {
+ kbuf->curr = kbuf->next;
+ if (kbuf->next >= kbuf->size)
+ return -1;
+ type = old_update_pointers(kbuf);
+ } while (type == OLD_RINGBUF_TYPE_TIME_EXTEND || type == OLD_RINGBUF_TYPE_PADDING);
+
+ return 0;
+}
+
+static unsigned int
+translate_data(struct kbuffer *kbuf, void *data, void **rptr,
+ unsigned long long *delta, int *length)
+{
+ unsigned long long extend;
+ unsigned int type_len_ts;
+ unsigned int type_len;
+
+ type_len_ts = read_4(kbuf, data);
+ data += 4;
+
+ type_len = type_len4host(kbuf, type_len_ts);
+ *delta = ts4host(kbuf, type_len_ts);
+
+ switch (type_len) {
+ case KBUFFER_TYPE_PADDING:
+ *length = read_4(kbuf, data);
+ break;
+
+ case KBUFFER_TYPE_TIME_EXTEND:
+ extend = read_4(kbuf, data);
+ data += 4;
+ extend <<= TS_SHIFT;
+ extend += *delta;
+ *delta = extend;
+ *length = 0;
+ break;
+
+ case KBUFFER_TYPE_TIME_STAMP:
+ data += 12;
+ *length = 0;
+ break;
+ case 0:
+ *length = read_4(kbuf, data) - 4;
+ *length = (*length + 3) & ~3;
+ data += 4;
+ break;
+ default:
+ *length = type_len * 4;
+ break;
+ }
+
+ *rptr = data;
+
+ return type_len;
+}
+
+static unsigned int update_pointers(struct kbuffer *kbuf)
+{
+ unsigned long long delta;
+ unsigned int type_len;
+ int length;
+ void *ptr = kbuf->data + kbuf->curr;
+
+ type_len = translate_data(kbuf, ptr, &ptr, &delta, &length);
+
+ kbuf->timestamp += delta;
+ kbuf->index = calc_index(kbuf, ptr);
+ kbuf->next = kbuf->index + length;
+
+ return type_len;
+}
+
+/**
+ * kbuffer_translate_data - read raw data to get a record
+ * @swap: Set to 1 if bytes in words need to be swapped when read
+ * @data: The raw data to read
+ * @size: Address to store the size of the event data.
+ *
+ * Returns a pointer to the event data. To determine the entire
+ * record size (record metadata + data) just add the difference between
+ * @data and the returned value to @size.
+ */
+void *kbuffer_translate_data(int swap, void *data, unsigned int *size)
+{
+ unsigned long long delta;
+ struct kbuffer kbuf;
+ int type_len;
+ int length;
+ void *ptr;
+
+ if (swap) {
+ kbuf.read_8 = __read_8_sw;
+ kbuf.read_4 = __read_4_sw;
+ kbuf.flags = host_is_bigendian() ? 0 : KBUFFER_FL_BIG_ENDIAN;
+ } else {
+ kbuf.read_8 = __read_8;
+ kbuf.read_4 = __read_4;
+ kbuf.flags = host_is_bigendian() ? KBUFFER_FL_BIG_ENDIAN: 0;
+ }
+
+ type_len = translate_data(&kbuf, data, &ptr, &delta, &length);
+ switch (type_len) {
+ case KBUFFER_TYPE_PADDING:
+ case KBUFFER_TYPE_TIME_EXTEND:
+ case KBUFFER_TYPE_TIME_STAMP:
+ return NULL;
+ };
+
+ *size = length;
+
+ return ptr;
+}
+
+static int __next_event(struct kbuffer *kbuf)
+{
+ int type;
+
+ do {
+ kbuf->curr = kbuf->next;
+ if (kbuf->next >= kbuf->size)
+ return -1;
+ type = update_pointers(kbuf);
+ } while (type == KBUFFER_TYPE_TIME_EXTEND || type == KBUFFER_TYPE_PADDING);
+
+ return 0;
+}
+
+static int next_event(struct kbuffer *kbuf)
+{
+ return kbuf->next_event(kbuf);
+}
+
+/**
+ * kbuffer_next_event - increment the current pointer
+ * @kbuf: The kbuffer to read
+ * @ts: Address to store the next record's timestamp (may be NULL to ignore)
+ *
+ * Increments the pointers into the subbuffer of the kbuffer to point to the
+ * next event so that the next kbuffer_read_event() will return a
+ * new event.
+ *
+ * Returns the data of the next event if a new event exists on the subbuffer,
+ * NULL otherwise.
+ */
+void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts)
+{
+ int ret;
+
+ if (!kbuf || !kbuf->subbuffer)
+ return NULL;
+
+ ret = next_event(kbuf);
+ if (ret < 0)
+ return NULL;
+
+ if (ts)
+ *ts = kbuf->timestamp;
+
+ return kbuf->data + kbuf->index;
+}
+
+/**
+ * kbuffer_load_subbuffer - load a new subbuffer into the kbuffer
+ * @kbuf: The kbuffer to load
+ * @subbuffer: The subbuffer to load into @kbuf.
+ *
+ * Load a new subbuffer (page) into @kbuf. This will reset all
+ * the pointers and update the @kbuf timestamp. The next read will
+ * return the first event on @subbuffer.
+ *
+ * Returns 0 on succes, -1 otherwise.
+ */
+int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer)
+{
+ unsigned long long flags;
+ void *ptr = subbuffer;
+
+ if (!kbuf || !subbuffer)
+ return -1;
+
+ kbuf->subbuffer = subbuffer;
+
+ kbuf->timestamp = read_8(kbuf, ptr);
+ ptr += 8;
+
+ kbuf->curr = 0;
+
+ if (kbuf->flags & KBUFFER_FL_LONG_8)
+ kbuf->start = 16;
+ else
+ kbuf->start = 12;
+
+ kbuf->data = subbuffer + kbuf->start;
+
+ flags = read_long(kbuf, ptr);
+ kbuf->size = (unsigned int)flags & COMMIT_MASK;
+
+ if (flags & MISSING_EVENTS) {
+ if (flags & MISSING_STORED) {
+ ptr = kbuf->data + kbuf->size;
+ kbuf->lost_events = read_long(kbuf, ptr);
+ } else
+ kbuf->lost_events = -1;
+ } else
+ kbuf->lost_events = 0;
+
+ kbuf->index = 0;
+ kbuf->next = 0;
+
+ next_event(kbuf);
+
+ return 0;
+}
+
+/**
+ * kbuffer_read_event - read the next event in the kbuffer subbuffer
+ * @kbuf: The kbuffer to read from
+ * @ts: The address to store the timestamp of the event (may be NULL to ignore)
+ *
+ * Returns a pointer to the data part of the current event.
+ * NULL if no event is left on the subbuffer.
+ */
+void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts)
+{
+ if (!kbuf || !kbuf->subbuffer)
+ return NULL;
+
+ if (kbuf->curr >= kbuf->size)
+ return NULL;
+
+ if (ts)
+ *ts = kbuf->timestamp;
+ return kbuf->data + kbuf->index;
+}
+
+/**
+ * kbuffer_timestamp - Return the timestamp of the current event
+ * @kbuf: The kbuffer to read from
+ *
+ * Returns the timestamp of the current (next) event.
+ */
+unsigned long long kbuffer_timestamp(struct kbuffer *kbuf)
+{
+ return kbuf->timestamp;
+}
+
+/**
+ * kbuffer_read_at_offset - read the event that is at offset
+ * @kbuf: The kbuffer to read from
+ * @offset: The offset into the subbuffer
+ * @ts: The address to store the timestamp of the event (may be NULL to ignore)
+ *
+ * The @offset must be an index from the @kbuf subbuffer beginning.
+ * If @offset is bigger than the stored subbuffer, NULL will be returned.
+ *
+ * Returns the data of the record that is at @offset. Note, @offset does
+ * not need to be the start of the record, the offset just needs to be
+ * in the record (or beginning of it).
+ *
+ * Note, the kbuf timestamp and pointers are updated to the
+ * returned record. That is, kbuffer_read_event() will return the same
+ * data and timestamp, and kbuffer_next_event() will increment from
+ * this record.
+ */
+void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset,
+ unsigned long long *ts)
+{
+ void *data;
+
+ if (offset < kbuf->start)
+ offset = 0;
+ else
+ offset -= kbuf->start;
+
+ /* Reset the buffer */
+ kbuffer_load_subbuffer(kbuf, kbuf->subbuffer);
+ data = kbuffer_read_event(kbuf, ts);
+
+ while (kbuf->curr < offset) {
+ data = kbuffer_next_event(kbuf, ts);
+ if (!data)
+ break;
+ }
+
+ return data;
+}
+
+/**
+ * kbuffer_subbuffer_size - the size of the loaded subbuffer
+ * @kbuf: The kbuffer to read from
+ *
+ * Returns the size of the subbuffer. Note, this size is
+ * where the last event resides. The stored subbuffer may actually be
+ * bigger due to padding and such.
+ */
+int kbuffer_subbuffer_size(struct kbuffer *kbuf)
+{
+ return kbuf->size;
+}
+
+/**
+ * kbuffer_curr_index - Return the index of the record
+ * @kbuf: The kbuffer to read from
+ *
+ * Returns the index from the start of the data part of
+ * the subbuffer to the current location. Note this is not
+ * from the start of the subbuffer. An index of zero will
+ * point to the first record. Use kbuffer_curr_offset() for
+ * the actually offset (that can be used by kbuffer_read_at_offset())
+ */
+int kbuffer_curr_index(struct kbuffer *kbuf)
+{
+ return kbuf->curr;
+}
+
+/**
+ * kbuffer_curr_offset - Return the offset of the record
+ * @kbuf: The kbuffer to read from
+ *
+ * Returns the offset from the start of the subbuffer to the
+ * current location.
+ */
+int kbuffer_curr_offset(struct kbuffer *kbuf)
+{
+ return kbuf->curr + kbuf->start;
+}
+
+/**
+ * kbuffer_event_size - return the size of the event data
+ * @kbuf: The kbuffer to read
+ *
+ * Returns the size of the event data (the payload not counting
+ * the meta data of the record) of the current event.
+ */
+int kbuffer_event_size(struct kbuffer *kbuf)
+{
+ return kbuf->next - kbuf->index;
+}
+
+/**
+ * kbuffer_curr_size - return the size of the entire record
+ * @kbuf: The kbuffer to read
+ *
+ * Returns the size of the entire record (meta data and payload)
+ * of the current event.
+ */
+int kbuffer_curr_size(struct kbuffer *kbuf)
+{
+ return kbuf->next - kbuf->curr;
+}
+
+/**
+ * kbuffer_missed_events - return the # of missed events from last event.
+ * @kbuf: The kbuffer to read from
+ *
+ * Returns the # of missed events (if recorded) before the current
+ * event. Note, only events on the beginning of a subbuffer can
+ * have missed events, all other events within the buffer will be
+ * zero.
+ */
+int kbuffer_missed_events(struct kbuffer *kbuf)
+{
+ /* Only the first event can have missed events */
+ if (kbuf->curr)
+ return 0;
+
+ return kbuf->lost_events;
+}
+
+/**
+ * kbuffer_set_old_forma - set the kbuffer to use the old format parsing
+ * @kbuf: The kbuffer to set
+ *
+ * This is obsolete (or should be). The first kernels to use the
+ * new ring buffer had a slightly different ring buffer format
+ * (2.6.30 and earlier). It is still somewhat supported by kbuffer,
+ * but should not be counted on in the future.
+ */
+void kbuffer_set_old_format(struct kbuffer *kbuf)
+{
+ kbuf->flags |= KBUFFER_FL_OLD_FORMAT;
+
+ kbuf->next_event = __old_next_event;
+}
+
+/**
+ * kbuffer_start_of_data - return offset of where data starts on subbuffer
+ * @kbuf: The kbuffer
+ *
+ * Returns the location on the subbuffer where the data starts.
+ */
+int kbuffer_start_of_data(struct kbuffer *kbuf)
+{
+ return kbuf->start;
+}
diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h
new file mode 100644
index 000000000..03dce7575
--- /dev/null
+++ b/tools/lib/traceevent/kbuffer.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#ifndef _KBUFFER_H
+#define _KBUFFER_H
+
+#ifndef TS_SHIFT
+#define TS_SHIFT 27
+#endif
+
+enum kbuffer_endian {
+ KBUFFER_ENDIAN_BIG,
+ KBUFFER_ENDIAN_LITTLE,
+};
+
+enum kbuffer_long_size {
+ KBUFFER_LSIZE_4,
+ KBUFFER_LSIZE_8,
+};
+
+enum {
+ KBUFFER_TYPE_PADDING = 29,
+ KBUFFER_TYPE_TIME_EXTEND = 30,
+ KBUFFER_TYPE_TIME_STAMP = 31,
+};
+
+struct kbuffer;
+
+struct kbuffer *kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian);
+void kbuffer_free(struct kbuffer *kbuf);
+int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer);
+void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts);
+void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts);
+unsigned long long kbuffer_timestamp(struct kbuffer *kbuf);
+
+void *kbuffer_translate_data(int swap, void *data, unsigned int *size);
+
+void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset, unsigned long long *ts);
+
+int kbuffer_curr_index(struct kbuffer *kbuf);
+
+int kbuffer_curr_offset(struct kbuffer *kbuf);
+int kbuffer_curr_size(struct kbuffer *kbuf);
+int kbuffer_event_size(struct kbuffer *kbuf);
+int kbuffer_missed_events(struct kbuffer *kbuf);
+int kbuffer_subbuffer_size(struct kbuffer *kbuf);
+
+void kbuffer_set_old_format(struct kbuffer *kbuf);
+int kbuffer_start_of_data(struct kbuffer *kbuf);
+
+#endif /* _K_BUFFER_H */
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
new file mode 100644
index 000000000..27248a0aa
--- /dev/null
+++ b/tools/lib/traceevent/parse-filter.c
@@ -0,0 +1,2445 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/types.h>
+
+#include "event-parse.h"
+#include "event-utils.h"
+
+#define COMM "COMM"
+#define CPU "CPU"
+
+static struct format_field comm = {
+ .name = "COMM",
+};
+
+static struct format_field cpu = {
+ .name = "CPU",
+};
+
+struct event_list {
+ struct event_list *next;
+ struct event_format *event;
+};
+
+static void show_error(char *error_buf, const char *fmt, ...)
+{
+ unsigned long long index;
+ const char *input;
+ va_list ap;
+ int len;
+ int i;
+
+ input = tep_get_input_buf();
+ index = tep_get_input_buf_ptr();
+ len = input ? strlen(input) : 0;
+
+ if (len) {
+ strcpy(error_buf, input);
+ error_buf[len] = '\n';
+ for (i = 1; i < len && i < index; i++)
+ error_buf[len+i] = ' ';
+ error_buf[len + i] = '^';
+ error_buf[len + i + 1] = '\n';
+ len += i+2;
+ }
+
+ va_start(ap, fmt);
+ vsnprintf(error_buf + len, TEP_FILTER_ERROR_BUFSZ - len, fmt, ap);
+ va_end(ap);
+}
+
+static void free_token(char *token)
+{
+ tep_free_token(token);
+}
+
+static enum event_type read_token(char **tok)
+{
+ enum event_type type;
+ char *token = NULL;
+
+ do {
+ free_token(token);
+ type = tep_read_token(&token);
+ } while (type == EVENT_NEWLINE || type == EVENT_SPACE);
+
+ /* If token is = or ! check to see if the next char is ~ */
+ if (token &&
+ (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) &&
+ tep_peek_char() == '~') {
+ /* append it */
+ *tok = malloc(3);
+ if (*tok == NULL) {
+ free_token(token);
+ return EVENT_ERROR;
+ }
+ sprintf(*tok, "%c%c", *token, '~');
+ free_token(token);
+ /* Now remove the '~' from the buffer */
+ tep_read_token(&token);
+ free_token(token);
+ } else
+ *tok = token;
+
+ return type;
+}
+
+static int filter_cmp(const void *a, const void *b)
+{
+ const struct filter_type *ea = a;
+ const struct filter_type *eb = b;
+
+ if (ea->event_id < eb->event_id)
+ return -1;
+
+ if (ea->event_id > eb->event_id)
+ return 1;
+
+ return 0;
+}
+
+static struct filter_type *
+find_filter_type(struct event_filter *filter, int id)
+{
+ struct filter_type *filter_type;
+ struct filter_type key;
+
+ key.event_id = id;
+
+ filter_type = bsearch(&key, filter->event_filters,
+ filter->filters,
+ sizeof(*filter->event_filters),
+ filter_cmp);
+
+ return filter_type;
+}
+
+static struct filter_type *
+add_filter_type(struct event_filter *filter, int id)
+{
+ struct filter_type *filter_type;
+ int i;
+
+ filter_type = find_filter_type(filter, id);
+ if (filter_type)
+ return filter_type;
+
+ filter_type = realloc(filter->event_filters,
+ sizeof(*filter->event_filters) *
+ (filter->filters + 1));
+ if (!filter_type)
+ return NULL;
+
+ filter->event_filters = filter_type;
+
+ for (i = 0; i < filter->filters; i++) {
+ if (filter->event_filters[i].event_id > id)
+ break;
+ }
+
+ if (i < filter->filters)
+ memmove(&filter->event_filters[i+1],
+ &filter->event_filters[i],
+ sizeof(*filter->event_filters) *
+ (filter->filters - i));
+
+ filter_type = &filter->event_filters[i];
+ filter_type->event_id = id;
+ filter_type->event = tep_find_event(filter->pevent, id);
+ filter_type->filter = NULL;
+
+ filter->filters++;
+
+ return filter_type;
+}
+
+/**
+ * tep_filter_alloc - create a new event filter
+ * @pevent: The pevent that this filter is associated with
+ */
+struct event_filter *tep_filter_alloc(struct tep_handle *pevent)
+{
+ struct event_filter *filter;
+
+ filter = malloc(sizeof(*filter));
+ if (filter == NULL)
+ return NULL;
+
+ memset(filter, 0, sizeof(*filter));
+ filter->pevent = pevent;
+ tep_ref(pevent);
+
+ return filter;
+}
+
+static struct filter_arg *allocate_arg(void)
+{
+ return calloc(1, sizeof(struct filter_arg));
+}
+
+static void free_arg(struct filter_arg *arg)
+{
+ if (!arg)
+ return;
+
+ switch (arg->type) {
+ case FILTER_ARG_NONE:
+ case FILTER_ARG_BOOLEAN:
+ break;
+
+ case FILTER_ARG_NUM:
+ free_arg(arg->num.left);
+ free_arg(arg->num.right);
+ break;
+
+ case FILTER_ARG_EXP:
+ free_arg(arg->exp.left);
+ free_arg(arg->exp.right);
+ break;
+
+ case FILTER_ARG_STR:
+ free(arg->str.val);
+ regfree(&arg->str.reg);
+ free(arg->str.buffer);
+ break;
+
+ case FILTER_ARG_VALUE:
+ if (arg->value.type == FILTER_STRING ||
+ arg->value.type == FILTER_CHAR)
+ free(arg->value.str);
+ break;
+
+ case FILTER_ARG_OP:
+ free_arg(arg->op.left);
+ free_arg(arg->op.right);
+ default:
+ break;
+ }
+
+ free(arg);
+}
+
+static int add_event(struct event_list **events,
+ struct event_format *event)
+{
+ struct event_list *list;
+
+ list = malloc(sizeof(*list));
+ if (list == NULL)
+ return -1;
+
+ list->next = *events;
+ *events = list;
+ list->event = event;
+ return 0;
+}
+
+static int event_match(struct event_format *event,
+ regex_t *sreg, regex_t *ereg)
+{
+ if (sreg) {
+ return !regexec(sreg, event->system, 0, NULL, 0) &&
+ !regexec(ereg, event->name, 0, NULL, 0);
+ }
+
+ return !regexec(ereg, event->system, 0, NULL, 0) ||
+ !regexec(ereg, event->name, 0, NULL, 0);
+}
+
+static enum tep_errno
+find_event(struct tep_handle *pevent, struct event_list **events,
+ char *sys_name, char *event_name)
+{
+ struct event_format *event;
+ regex_t ereg;
+ regex_t sreg;
+ int match = 0;
+ int fail = 0;
+ char *reg;
+ int ret;
+ int i;
+
+ if (!event_name) {
+ /* if no name is given, then swap sys and name */
+ event_name = sys_name;
+ sys_name = NULL;
+ }
+
+ ret = asprintf(&reg, "^%s$", event_name);
+ if (ret < 0)
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+
+ ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB);
+ free(reg);
+
+ if (ret)
+ return TEP_ERRNO__INVALID_EVENT_NAME;
+
+ if (sys_name) {
+ ret = asprintf(&reg, "^%s$", sys_name);
+ if (ret < 0) {
+ regfree(&ereg);
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+ }
+
+ ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB);
+ free(reg);
+ if (ret) {
+ regfree(&ereg);
+ return TEP_ERRNO__INVALID_EVENT_NAME;
+ }
+ }
+
+ for (i = 0; i < pevent->nr_events; i++) {
+ event = pevent->events[i];
+ if (event_match(event, sys_name ? &sreg : NULL, &ereg)) {
+ match = 1;
+ if (add_event(events, event) < 0) {
+ fail = 1;
+ break;
+ }
+ }
+ }
+
+ regfree(&ereg);
+ if (sys_name)
+ regfree(&sreg);
+
+ if (!match)
+ return TEP_ERRNO__EVENT_NOT_FOUND;
+ if (fail)
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+
+ return 0;
+}
+
+static void free_events(struct event_list *events)
+{
+ struct event_list *event;
+
+ while (events) {
+ event = events;
+ events = events->next;
+ free(event);
+ }
+}
+
+static enum tep_errno
+create_arg_item(struct event_format *event, const char *token,
+ enum event_type type, struct filter_arg **parg, char *error_str)
+{
+ struct format_field *field;
+ struct filter_arg *arg;
+
+ arg = allocate_arg();
+ if (arg == NULL) {
+ show_error(error_str, "failed to allocate filter arg");
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+ }
+
+ switch (type) {
+
+ case EVENT_SQUOTE:
+ case EVENT_DQUOTE:
+ arg->type = FILTER_ARG_VALUE;
+ arg->value.type =
+ type == EVENT_DQUOTE ? FILTER_STRING : FILTER_CHAR;
+ arg->value.str = strdup(token);
+ if (!arg->value.str) {
+ free_arg(arg);
+ show_error(error_str, "failed to allocate string filter arg");
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+ }
+ break;
+ case EVENT_ITEM:
+ /* if it is a number, then convert it */
+ if (isdigit(token[0])) {
+ arg->type = FILTER_ARG_VALUE;
+ arg->value.type = FILTER_NUMBER;
+ arg->value.val = strtoull(token, NULL, 0);
+ break;
+ }
+ /* Consider this a field */
+ field = tep_find_any_field(event, token);
+ if (!field) {
+ /* If token is 'COMM' or 'CPU' then it is special */
+ if (strcmp(token, COMM) == 0) {
+ field = &comm;
+ } else if (strcmp(token, CPU) == 0) {
+ field = &cpu;
+ } else {
+ /* not a field, Make it false */
+ arg->type = FILTER_ARG_BOOLEAN;
+ arg->boolean.value = FILTER_FALSE;
+ break;
+ }
+ }
+ arg->type = FILTER_ARG_FIELD;
+ arg->field.field = field;
+ break;
+ default:
+ free_arg(arg);
+ show_error(error_str, "expected a value but found %s", token);
+ return TEP_ERRNO__UNEXPECTED_TYPE;
+ }
+ *parg = arg;
+ return 0;
+}
+
+static struct filter_arg *
+create_arg_op(enum filter_op_type btype)
+{
+ struct filter_arg *arg;
+
+ arg = allocate_arg();
+ if (!arg)
+ return NULL;
+
+ arg->type = FILTER_ARG_OP;
+ arg->op.type = btype;
+
+ return arg;
+}
+
+static struct filter_arg *
+create_arg_exp(enum filter_exp_type etype)
+{
+ struct filter_arg *arg;
+
+ arg = allocate_arg();
+ if (!arg)
+ return NULL;
+
+ arg->type = FILTER_ARG_EXP;
+ arg->exp.type = etype;
+
+ return arg;
+}
+
+static struct filter_arg *
+create_arg_cmp(enum filter_cmp_type ctype)
+{
+ struct filter_arg *arg;
+
+ arg = allocate_arg();
+ if (!arg)
+ return NULL;
+
+ /* Use NUM and change if necessary */
+ arg->type = FILTER_ARG_NUM;
+ arg->num.type = ctype;
+
+ return arg;
+}
+
+static enum tep_errno
+add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
+{
+ struct filter_arg *left;
+ char *str;
+ int op_type;
+ int ret;
+
+ switch (op->type) {
+ case FILTER_ARG_EXP:
+ if (op->exp.right)
+ goto out_fail;
+ op->exp.right = arg;
+ break;
+
+ case FILTER_ARG_OP:
+ if (op->op.right)
+ goto out_fail;
+ op->op.right = arg;
+ break;
+
+ case FILTER_ARG_NUM:
+ if (op->op.right)
+ goto out_fail;
+ /*
+ * The arg must be num, str, or field
+ */
+ switch (arg->type) {
+ case FILTER_ARG_VALUE:
+ case FILTER_ARG_FIELD:
+ break;
+ default:
+ show_error(error_str, "Illegal rvalue");
+ return TEP_ERRNO__ILLEGAL_RVALUE;
+ }
+
+ /*
+ * Depending on the type, we may need to
+ * convert this to a string or regex.
+ */
+ switch (arg->value.type) {
+ case FILTER_CHAR:
+ /*
+ * A char should be converted to number if
+ * the string is 1 byte, and the compare
+ * is not a REGEX.
+ */
+ if (strlen(arg->value.str) == 1 &&
+ op->num.type != FILTER_CMP_REGEX &&
+ op->num.type != FILTER_CMP_NOT_REGEX) {
+ arg->value.type = FILTER_NUMBER;
+ goto do_int;
+ }
+ /* fall through */
+ case FILTER_STRING:
+
+ /* convert op to a string arg */
+ op_type = op->num.type;
+ left = op->num.left;
+ str = arg->value.str;
+
+ /* reset the op for the new field */
+ memset(op, 0, sizeof(*op));
+
+ /*
+ * If left arg was a field not found then
+ * NULL the entire op.
+ */
+ if (left->type == FILTER_ARG_BOOLEAN) {
+ free_arg(left);
+ free_arg(arg);
+ op->type = FILTER_ARG_BOOLEAN;
+ op->boolean.value = FILTER_FALSE;
+ break;
+ }
+
+ /* Left arg must be a field */
+ if (left->type != FILTER_ARG_FIELD) {
+ show_error(error_str,
+ "Illegal lvalue for string comparison");
+ return TEP_ERRNO__ILLEGAL_LVALUE;
+ }
+
+ /* Make sure this is a valid string compare */
+ switch (op_type) {
+ case FILTER_CMP_EQ:
+ op_type = FILTER_CMP_MATCH;
+ break;
+ case FILTER_CMP_NE:
+ op_type = FILTER_CMP_NOT_MATCH;
+ break;
+
+ case FILTER_CMP_REGEX:
+ case FILTER_CMP_NOT_REGEX:
+ ret = regcomp(&op->str.reg, str, REG_ICASE|REG_NOSUB);
+ if (ret) {
+ show_error(error_str,
+ "RegEx '%s' did not compute",
+ str);
+ return TEP_ERRNO__INVALID_REGEX;
+ }
+ break;
+ default:
+ show_error(error_str,
+ "Illegal comparison for string");
+ return TEP_ERRNO__ILLEGAL_STRING_CMP;
+ }
+
+ op->type = FILTER_ARG_STR;
+ op->str.type = op_type;
+ op->str.field = left->field.field;
+ op->str.val = strdup(str);
+ if (!op->str.val) {
+ show_error(error_str, "Failed to allocate string filter");
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+ }
+ /*
+ * Need a buffer to copy data for tests
+ */
+ op->str.buffer = malloc(op->str.field->size + 1);
+ if (!op->str.buffer) {
+ show_error(error_str, "Failed to allocate string filter");
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+ }
+ /* Null terminate this buffer */
+ op->str.buffer[op->str.field->size] = 0;
+
+ /* We no longer have left or right args */
+ free_arg(arg);
+ free_arg(left);
+
+ break;
+
+ case FILTER_NUMBER:
+
+ do_int:
+ switch (op->num.type) {
+ case FILTER_CMP_REGEX:
+ case FILTER_CMP_NOT_REGEX:
+ show_error(error_str,
+ "Op not allowed with integers");
+ return TEP_ERRNO__ILLEGAL_INTEGER_CMP;
+
+ default:
+ break;
+ }
+
+ /* numeric compare */
+ op->num.right = arg;
+ break;
+ default:
+ goto out_fail;
+ }
+ break;
+ default:
+ goto out_fail;
+ }
+
+ return 0;
+
+ out_fail:
+ show_error(error_str, "Syntax error");
+ return TEP_ERRNO__SYNTAX_ERROR;
+}
+
+static struct filter_arg *
+rotate_op_right(struct filter_arg *a, struct filter_arg *b)
+{
+ struct filter_arg *arg;
+
+ arg = a->op.right;
+ a->op.right = b;
+ return arg;
+}
+
+static enum tep_errno add_left(struct filter_arg *op, struct filter_arg *arg)
+{
+ switch (op->type) {
+ case FILTER_ARG_EXP:
+ if (arg->type == FILTER_ARG_OP)
+ arg = rotate_op_right(arg, op);
+ op->exp.left = arg;
+ break;
+
+ case FILTER_ARG_OP:
+ op->op.left = arg;
+ break;
+ case FILTER_ARG_NUM:
+ if (arg->type == FILTER_ARG_OP)
+ arg = rotate_op_right(arg, op);
+
+ /* left arg of compares must be a field */
+ if (arg->type != FILTER_ARG_FIELD &&
+ arg->type != FILTER_ARG_BOOLEAN)
+ return TEP_ERRNO__INVALID_ARG_TYPE;
+ op->num.left = arg;
+ break;
+ default:
+ return TEP_ERRNO__INVALID_ARG_TYPE;
+ }
+ return 0;
+}
+
+enum op_type {
+ OP_NONE,
+ OP_BOOL,
+ OP_NOT,
+ OP_EXP,
+ OP_CMP,
+};
+
+static enum op_type process_op(const char *token,
+ enum filter_op_type *btype,
+ enum filter_cmp_type *ctype,
+ enum filter_exp_type *etype)
+{
+ *btype = FILTER_OP_NOT;
+ *etype = FILTER_EXP_NONE;
+ *ctype = FILTER_CMP_NONE;
+
+ if (strcmp(token, "&&") == 0)
+ *btype = FILTER_OP_AND;
+ else if (strcmp(token, "||") == 0)
+ *btype = FILTER_OP_OR;
+ else if (strcmp(token, "!") == 0)
+ return OP_NOT;
+
+ if (*btype != FILTER_OP_NOT)
+ return OP_BOOL;
+
+ /* Check for value expressions */
+ if (strcmp(token, "+") == 0) {
+ *etype = FILTER_EXP_ADD;
+ } else if (strcmp(token, "-") == 0) {
+ *etype = FILTER_EXP_SUB;
+ } else if (strcmp(token, "*") == 0) {
+ *etype = FILTER_EXP_MUL;
+ } else if (strcmp(token, "/") == 0) {
+ *etype = FILTER_EXP_DIV;
+ } else if (strcmp(token, "%") == 0) {
+ *etype = FILTER_EXP_MOD;
+ } else if (strcmp(token, ">>") == 0) {
+ *etype = FILTER_EXP_RSHIFT;
+ } else if (strcmp(token, "<<") == 0) {
+ *etype = FILTER_EXP_LSHIFT;
+ } else if (strcmp(token, "&") == 0) {
+ *etype = FILTER_EXP_AND;
+ } else if (strcmp(token, "|") == 0) {
+ *etype = FILTER_EXP_OR;
+ } else if (strcmp(token, "^") == 0) {
+ *etype = FILTER_EXP_XOR;
+ } else if (strcmp(token, "~") == 0)
+ *etype = FILTER_EXP_NOT;
+
+ if (*etype != FILTER_EXP_NONE)
+ return OP_EXP;
+
+ /* Check for compares */
+ if (strcmp(token, "==") == 0)
+ *ctype = FILTER_CMP_EQ;
+ else if (strcmp(token, "!=") == 0)
+ *ctype = FILTER_CMP_NE;
+ else if (strcmp(token, "<") == 0)
+ *ctype = FILTER_CMP_LT;
+ else if (strcmp(token, ">") == 0)
+ *ctype = FILTER_CMP_GT;
+ else if (strcmp(token, "<=") == 0)
+ *ctype = FILTER_CMP_LE;
+ else if (strcmp(token, ">=") == 0)
+ *ctype = FILTER_CMP_GE;
+ else if (strcmp(token, "=~") == 0)
+ *ctype = FILTER_CMP_REGEX;
+ else if (strcmp(token, "!~") == 0)
+ *ctype = FILTER_CMP_NOT_REGEX;
+ else
+ return OP_NONE;
+
+ return OP_CMP;
+}
+
+static int check_op_done(struct filter_arg *arg)
+{
+ switch (arg->type) {
+ case FILTER_ARG_EXP:
+ return arg->exp.right != NULL;
+
+ case FILTER_ARG_OP:
+ return arg->op.right != NULL;
+
+ case FILTER_ARG_NUM:
+ return arg->num.right != NULL;
+
+ case FILTER_ARG_STR:
+ /* A string conversion is always done */
+ return 1;
+
+ case FILTER_ARG_BOOLEAN:
+ /* field not found, is ok */
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+enum filter_vals {
+ FILTER_VAL_NORM,
+ FILTER_VAL_FALSE,
+ FILTER_VAL_TRUE,
+};
+
+static enum tep_errno
+reparent_op_arg(struct filter_arg *parent, struct filter_arg *old_child,
+ struct filter_arg *arg, char *error_str)
+{
+ struct filter_arg *other_child;
+ struct filter_arg **ptr;
+
+ if (parent->type != FILTER_ARG_OP &&
+ arg->type != FILTER_ARG_OP) {
+ show_error(error_str, "can not reparent other than OP");
+ return TEP_ERRNO__REPARENT_NOT_OP;
+ }
+
+ /* Get the sibling */
+ if (old_child->op.right == arg) {
+ ptr = &old_child->op.right;
+ other_child = old_child->op.left;
+ } else if (old_child->op.left == arg) {
+ ptr = &old_child->op.left;
+ other_child = old_child->op.right;
+ } else {
+ show_error(error_str, "Error in reparent op, find other child");
+ return TEP_ERRNO__REPARENT_FAILED;
+ }
+
+ /* Detach arg from old_child */
+ *ptr = NULL;
+
+ /* Check for root */
+ if (parent == old_child) {
+ free_arg(other_child);
+ *parent = *arg;
+ /* Free arg without recussion */
+ free(arg);
+ return 0;
+ }
+
+ if (parent->op.right == old_child)
+ ptr = &parent->op.right;
+ else if (parent->op.left == old_child)
+ ptr = &parent->op.left;
+ else {
+ show_error(error_str, "Error in reparent op");
+ return TEP_ERRNO__REPARENT_FAILED;
+ }
+
+ *ptr = arg;
+
+ free_arg(old_child);
+ return 0;
+}
+
+/* Returns either filter_vals (success) or tep_errno (failfure) */
+static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
+ char *error_str)
+{
+ int lval, rval;
+
+ switch (arg->type) {
+
+ /* bad case */
+ case FILTER_ARG_BOOLEAN:
+ return FILTER_VAL_FALSE + arg->boolean.value;
+
+ /* good cases: */
+ case FILTER_ARG_STR:
+ case FILTER_ARG_VALUE:
+ case FILTER_ARG_FIELD:
+ return FILTER_VAL_NORM;
+
+ case FILTER_ARG_EXP:
+ lval = test_arg(arg, arg->exp.left, error_str);
+ if (lval != FILTER_VAL_NORM)
+ return lval;
+ rval = test_arg(arg, arg->exp.right, error_str);
+ if (rval != FILTER_VAL_NORM)
+ return rval;
+ return FILTER_VAL_NORM;
+
+ case FILTER_ARG_NUM:
+ lval = test_arg(arg, arg->num.left, error_str);
+ if (lval != FILTER_VAL_NORM)
+ return lval;
+ rval = test_arg(arg, arg->num.right, error_str);
+ if (rval != FILTER_VAL_NORM)
+ return rval;
+ return FILTER_VAL_NORM;
+
+ case FILTER_ARG_OP:
+ if (arg->op.type != FILTER_OP_NOT) {
+ lval = test_arg(arg, arg->op.left, error_str);
+ switch (lval) {
+ case FILTER_VAL_NORM:
+ break;
+ case FILTER_VAL_TRUE:
+ if (arg->op.type == FILTER_OP_OR)
+ return FILTER_VAL_TRUE;
+ rval = test_arg(arg, arg->op.right, error_str);
+ if (rval != FILTER_VAL_NORM)
+ return rval;
+
+ return reparent_op_arg(parent, arg, arg->op.right,
+ error_str);
+
+ case FILTER_VAL_FALSE:
+ if (arg->op.type == FILTER_OP_AND)
+ return FILTER_VAL_FALSE;
+ rval = test_arg(arg, arg->op.right, error_str);
+ if (rval != FILTER_VAL_NORM)
+ return rval;
+
+ return reparent_op_arg(parent, arg, arg->op.right,
+ error_str);
+
+ default:
+ return lval;
+ }
+ }
+
+ rval = test_arg(arg, arg->op.right, error_str);
+ switch (rval) {
+ case FILTER_VAL_NORM:
+ default:
+ break;
+
+ case FILTER_VAL_TRUE:
+ if (arg->op.type == FILTER_OP_OR)
+ return FILTER_VAL_TRUE;
+ if (arg->op.type == FILTER_OP_NOT)
+ return FILTER_VAL_FALSE;
+
+ return reparent_op_arg(parent, arg, arg->op.left,
+ error_str);
+
+ case FILTER_VAL_FALSE:
+ if (arg->op.type == FILTER_OP_AND)
+ return FILTER_VAL_FALSE;
+ if (arg->op.type == FILTER_OP_NOT)
+ return FILTER_VAL_TRUE;
+
+ return reparent_op_arg(parent, arg, arg->op.left,
+ error_str);
+ }
+
+ return rval;
+ default:
+ show_error(error_str, "bad arg in filter tree");
+ return TEP_ERRNO__BAD_FILTER_ARG;
+ }
+ return FILTER_VAL_NORM;
+}
+
+/* Remove any unknown event fields */
+static int collapse_tree(struct filter_arg *arg,
+ struct filter_arg **arg_collapsed, char *error_str)
+{
+ int ret;
+
+ ret = test_arg(arg, arg, error_str);
+ switch (ret) {
+ case FILTER_VAL_NORM:
+ break;
+
+ case FILTER_VAL_TRUE:
+ case FILTER_VAL_FALSE:
+ free_arg(arg);
+ arg = allocate_arg();
+ if (arg) {
+ arg->type = FILTER_ARG_BOOLEAN;
+ arg->boolean.value = ret == FILTER_VAL_TRUE;
+ } else {
+ show_error(error_str, "Failed to allocate filter arg");
+ ret = TEP_ERRNO__MEM_ALLOC_FAILED;
+ }
+ break;
+
+ default:
+ /* test_arg() already set the error_str */
+ free_arg(arg);
+ arg = NULL;
+ break;
+ }
+
+ *arg_collapsed = arg;
+ return ret;
+}
+
+static enum tep_errno
+process_filter(struct event_format *event, struct filter_arg **parg,
+ char *error_str, int not)
+{
+ enum event_type type;
+ char *token = NULL;
+ struct filter_arg *current_op = NULL;
+ struct filter_arg *current_exp = NULL;
+ struct filter_arg *left_item = NULL;
+ struct filter_arg *arg = NULL;
+ enum op_type op_type;
+ enum filter_op_type btype;
+ enum filter_exp_type etype;
+ enum filter_cmp_type ctype;
+ enum tep_errno ret;
+
+ *parg = NULL;
+
+ do {
+ free(token);
+ type = read_token(&token);
+ switch (type) {
+ case EVENT_SQUOTE:
+ case EVENT_DQUOTE:
+ case EVENT_ITEM:
+ ret = create_arg_item(event, token, type, &arg, error_str);
+ if (ret < 0)
+ goto fail;
+ if (!left_item)
+ left_item = arg;
+ else if (current_exp) {
+ ret = add_right(current_exp, arg, error_str);
+ if (ret < 0)
+ goto fail;
+ left_item = NULL;
+ /* Not's only one one expression */
+ if (not) {
+ arg = NULL;
+ if (current_op)
+ goto fail_syntax;
+ free(token);
+ *parg = current_exp;
+ return 0;
+ }
+ } else
+ goto fail_syntax;
+ arg = NULL;
+ break;
+
+ case EVENT_DELIM:
+ if (*token == ',') {
+ show_error(error_str, "Illegal token ','");
+ ret = TEP_ERRNO__ILLEGAL_TOKEN;
+ goto fail;
+ }
+
+ if (*token == '(') {
+ if (left_item) {
+ show_error(error_str,
+ "Open paren can not come after item");
+ ret = TEP_ERRNO__INVALID_PAREN;
+ goto fail;
+ }
+ if (current_exp) {
+ show_error(error_str,
+ "Open paren can not come after expression");
+ ret = TEP_ERRNO__INVALID_PAREN;
+ goto fail;
+ }
+
+ ret = process_filter(event, &arg, error_str, 0);
+ if (ret != TEP_ERRNO__UNBALANCED_PAREN) {
+ if (ret == 0) {
+ show_error(error_str,
+ "Unbalanced number of '('");
+ ret = TEP_ERRNO__UNBALANCED_PAREN;
+ }
+ goto fail;
+ }
+ ret = 0;
+
+ /* A not wants just one expression */
+ if (not) {
+ if (current_op)
+ goto fail_syntax;
+ *parg = arg;
+ return 0;
+ }
+
+ if (current_op)
+ ret = add_right(current_op, arg, error_str);
+ else
+ current_exp = arg;
+
+ if (ret < 0)
+ goto fail;
+
+ } else { /* ')' */
+ if (!current_op && !current_exp)
+ goto fail_syntax;
+
+ /* Make sure everything is finished at this level */
+ if (current_exp && !check_op_done(current_exp))
+ goto fail_syntax;
+ if (current_op && !check_op_done(current_op))
+ goto fail_syntax;
+
+ if (current_op)
+ *parg = current_op;
+ else
+ *parg = current_exp;
+ free(token);
+ return TEP_ERRNO__UNBALANCED_PAREN;
+ }
+ break;
+
+ case EVENT_OP:
+ op_type = process_op(token, &btype, &ctype, &etype);
+
+ /* All expect a left arg except for NOT */
+ switch (op_type) {
+ case OP_BOOL:
+ /* Logic ops need a left expression */
+ if (!current_exp && !current_op)
+ goto fail_syntax;
+ /* fall through */
+ case OP_NOT:
+ /* logic only processes ops and exp */
+ if (left_item)
+ goto fail_syntax;
+ break;
+ case OP_EXP:
+ case OP_CMP:
+ if (!left_item)
+ goto fail_syntax;
+ break;
+ case OP_NONE:
+ show_error(error_str,
+ "Unknown op token %s", token);
+ ret = TEP_ERRNO__UNKNOWN_TOKEN;
+ goto fail;
+ }
+
+ ret = 0;
+ switch (op_type) {
+ case OP_BOOL:
+ arg = create_arg_op(btype);
+ if (arg == NULL)
+ goto fail_alloc;
+ if (current_op)
+ ret = add_left(arg, current_op);
+ else
+ ret = add_left(arg, current_exp);
+ current_op = arg;
+ current_exp = NULL;
+ break;
+
+ case OP_NOT:
+ arg = create_arg_op(btype);
+ if (arg == NULL)
+ goto fail_alloc;
+ if (current_op)
+ ret = add_right(current_op, arg, error_str);
+ if (ret < 0)
+ goto fail;
+ current_exp = arg;
+ ret = process_filter(event, &arg, error_str, 1);
+ if (ret < 0)
+ goto fail;
+ ret = add_right(current_exp, arg, error_str);
+ if (ret < 0)
+ goto fail;
+ break;
+
+ case OP_EXP:
+ case OP_CMP:
+ if (op_type == OP_EXP)
+ arg = create_arg_exp(etype);
+ else
+ arg = create_arg_cmp(ctype);
+ if (arg == NULL)
+ goto fail_alloc;
+
+ if (current_op)
+ ret = add_right(current_op, arg, error_str);
+ if (ret < 0)
+ goto fail;
+ ret = add_left(arg, left_item);
+ if (ret < 0) {
+ arg = NULL;
+ goto fail_syntax;
+ }
+ current_exp = arg;
+ break;
+ default:
+ break;
+ }
+ arg = NULL;
+ if (ret < 0)
+ goto fail_syntax;
+ break;
+ case EVENT_NONE:
+ break;
+ case EVENT_ERROR:
+ goto fail_alloc;
+ default:
+ goto fail_syntax;
+ }
+ } while (type != EVENT_NONE);
+
+ if (!current_op && !current_exp)
+ goto fail_syntax;
+
+ if (!current_op)
+ current_op = current_exp;
+
+ ret = collapse_tree(current_op, parg, error_str);
+ /* collapse_tree() may free current_op, and updates parg accordingly */
+ current_op = NULL;
+ if (ret < 0)
+ goto fail;
+
+ free(token);
+ return 0;
+
+ fail_alloc:
+ show_error(error_str, "failed to allocate filter arg");
+ ret = TEP_ERRNO__MEM_ALLOC_FAILED;
+ goto fail;
+ fail_syntax:
+ show_error(error_str, "Syntax error");
+ ret = TEP_ERRNO__SYNTAX_ERROR;
+ fail:
+ free_arg(current_op);
+ free_arg(current_exp);
+ free_arg(arg);
+ free(token);
+ return ret;
+}
+
+static enum tep_errno
+process_event(struct event_format *event, const char *filter_str,
+ struct filter_arg **parg, char *error_str)
+{
+ int ret;
+
+ tep_buffer_init(filter_str, strlen(filter_str));
+
+ ret = process_filter(event, parg, error_str, 0);
+ if (ret < 0)
+ return ret;
+
+ /* If parg is NULL, then make it into FALSE */
+ if (!*parg) {
+ *parg = allocate_arg();
+ if (*parg == NULL)
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+
+ (*parg)->type = FILTER_ARG_BOOLEAN;
+ (*parg)->boolean.value = FILTER_FALSE;
+ }
+
+ return 0;
+}
+
+static enum tep_errno
+filter_event(struct event_filter *filter, struct event_format *event,
+ const char *filter_str, char *error_str)
+{
+ struct filter_type *filter_type;
+ struct filter_arg *arg;
+ enum tep_errno ret;
+
+ if (filter_str) {
+ ret = process_event(event, filter_str, &arg, error_str);
+ if (ret < 0)
+ return ret;
+
+ } else {
+ /* just add a TRUE arg */
+ arg = allocate_arg();
+ if (arg == NULL)
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+
+ arg->type = FILTER_ARG_BOOLEAN;
+ arg->boolean.value = FILTER_TRUE;
+ }
+
+ filter_type = add_filter_type(filter, event->id);
+ if (filter_type == NULL) {
+ free_arg(arg);
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+ }
+
+ if (filter_type->filter)
+ free_arg(filter_type->filter);
+ filter_type->filter = arg;
+
+ return 0;
+}
+
+static void filter_init_error_buf(struct event_filter *filter)
+{
+ /* clear buffer to reset show error */
+ tep_buffer_init("", 0);
+ filter->error_buffer[0] = '\0';
+}
+
+/**
+ * tep_filter_add_filter_str - add a new filter
+ * @filter: the event filter to add to
+ * @filter_str: the filter string that contains the filter
+ *
+ * Returns 0 if the filter was successfully added or a
+ * negative error code. Use tep_filter_strerror() to see
+ * actual error message in case of error.
+ */
+enum tep_errno tep_filter_add_filter_str(struct event_filter *filter,
+ const char *filter_str)
+{
+ struct tep_handle *pevent = filter->pevent;
+ struct event_list *event;
+ struct event_list *events = NULL;
+ const char *filter_start;
+ const char *next_event;
+ char *this_event;
+ char *event_name = NULL;
+ char *sys_name = NULL;
+ char *sp;
+ enum tep_errno rtn = 0; /* TEP_ERRNO__SUCCESS */
+ int len;
+ int ret;
+
+ filter_init_error_buf(filter);
+
+ filter_start = strchr(filter_str, ':');
+ if (filter_start)
+ len = filter_start - filter_str;
+ else
+ len = strlen(filter_str);
+
+ do {
+ next_event = strchr(filter_str, ',');
+ if (next_event &&
+ (!filter_start || next_event < filter_start))
+ len = next_event - filter_str;
+ else if (filter_start)
+ len = filter_start - filter_str;
+ else
+ len = strlen(filter_str);
+
+ this_event = malloc(len + 1);
+ if (this_event == NULL) {
+ /* This can only happen when events is NULL, but still */
+ free_events(events);
+ return TEP_ERRNO__MEM_ALLOC_FAILED;
+ }
+ memcpy(this_event, filter_str, len);
+ this_event[len] = 0;
+
+ if (next_event)
+ next_event++;
+
+ filter_str = next_event;
+
+ sys_name = strtok_r(this_event, "/", &sp);
+ event_name = strtok_r(NULL, "/", &sp);
+
+ if (!sys_name) {
+ /* This can only happen when events is NULL, but still */
+ free_events(events);
+ free(this_event);
+ return TEP_ERRNO__FILTER_NOT_FOUND;
+ }
+
+ /* Find this event */
+ ret = find_event(pevent, &events, strim(sys_name), strim(event_name));
+ if (ret < 0) {
+ free_events(events);
+ free(this_event);
+ return ret;
+ }
+ free(this_event);
+ } while (filter_str);
+
+ /* Skip the ':' */
+ if (filter_start)
+ filter_start++;
+
+ /* filter starts here */
+ for (event = events; event; event = event->next) {
+ ret = filter_event(filter, event->event, filter_start,
+ filter->error_buffer);
+ /* Failures are returned if a parse error happened */
+ if (ret < 0)
+ rtn = ret;
+
+ if (ret >= 0 && pevent->test_filters) {
+ char *test;
+ test = tep_filter_make_string(filter, event->event->id);
+ if (test) {
+ printf(" '%s: %s'\n", event->event->name, test);
+ free(test);
+ }
+ }
+ }
+
+ free_events(events);
+
+ if (rtn >= 0 && pevent->test_filters)
+ exit(0);
+
+ return rtn;
+}
+
+static void free_filter_type(struct filter_type *filter_type)
+{
+ free_arg(filter_type->filter);
+}
+
+/**
+ * tep_filter_strerror - fill error message in a buffer
+ * @filter: the event filter contains error
+ * @err: the error code
+ * @buf: the buffer to be filled in
+ * @buflen: the size of the buffer
+ *
+ * Returns 0 if message was filled successfully, -1 if error
+ */
+int tep_filter_strerror(struct event_filter *filter, enum tep_errno err,
+ char *buf, size_t buflen)
+{
+ if (err <= __TEP_ERRNO__START || err >= __TEP_ERRNO__END)
+ return -1;
+
+ if (strlen(filter->error_buffer) > 0) {
+ size_t len = snprintf(buf, buflen, "%s", filter->error_buffer);
+
+ if (len > buflen)
+ return -1;
+ return 0;
+ }
+
+ return tep_strerror(filter->pevent, err, buf, buflen);
+}
+
+/**
+ * tep_filter_remove_event - remove a filter for an event
+ * @filter: the event filter to remove from
+ * @event_id: the event to remove a filter for
+ *
+ * Removes the filter saved for an event defined by @event_id
+ * from the @filter.
+ *
+ * Returns 1: if an event was removed
+ * 0: if the event was not found
+ */
+int tep_filter_remove_event(struct event_filter *filter,
+ int event_id)
+{
+ struct filter_type *filter_type;
+ unsigned long len;
+
+ if (!filter->filters)
+ return 0;
+
+ filter_type = find_filter_type(filter, event_id);
+
+ if (!filter_type)
+ return 0;
+
+ free_filter_type(filter_type);
+
+ /* The filter_type points into the event_filters array */
+ len = (unsigned long)(filter->event_filters + filter->filters) -
+ (unsigned long)(filter_type + 1);
+
+ memmove(filter_type, filter_type + 1, len);
+ filter->filters--;
+
+ memset(&filter->event_filters[filter->filters], 0,
+ sizeof(*filter_type));
+
+ return 1;
+}
+
+/**
+ * tep_filter_reset - clear all filters in a filter
+ * @filter: the event filter to reset
+ *
+ * Removes all filters from a filter and resets it.
+ */
+void tep_filter_reset(struct event_filter *filter)
+{
+ int i;
+
+ for (i = 0; i < filter->filters; i++)
+ free_filter_type(&filter->event_filters[i]);
+
+ free(filter->event_filters);
+ filter->filters = 0;
+ filter->event_filters = NULL;
+}
+
+void tep_filter_free(struct event_filter *filter)
+{
+ tep_unref(filter->pevent);
+
+ tep_filter_reset(filter);
+
+ free(filter);
+}
+
+static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg);
+
+static int copy_filter_type(struct event_filter *filter,
+ struct event_filter *source,
+ struct filter_type *filter_type)
+{
+ struct filter_arg *arg;
+ struct event_format *event;
+ const char *sys;
+ const char *name;
+ char *str;
+
+ /* Can't assume that the pevent's are the same */
+ sys = filter_type->event->system;
+ name = filter_type->event->name;
+ event = tep_find_event_by_name(filter->pevent, sys, name);
+ if (!event)
+ return -1;
+
+ str = arg_to_str(source, filter_type->filter);
+ if (!str)
+ return -1;
+
+ if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) {
+ /* Add trivial event */
+ arg = allocate_arg();
+ if (arg == NULL) {
+ free(str);
+ return -1;
+ }
+
+ arg->type = FILTER_ARG_BOOLEAN;
+ if (strcmp(str, "TRUE") == 0)
+ arg->boolean.value = 1;
+ else
+ arg->boolean.value = 0;
+
+ filter_type = add_filter_type(filter, event->id);
+ if (filter_type == NULL) {
+ free(str);
+ free_arg(arg);
+ return -1;
+ }
+
+ filter_type->filter = arg;
+
+ free(str);
+ return 0;
+ }
+
+ filter_event(filter, event, str, NULL);
+ free(str);
+
+ return 0;
+}
+
+/**
+ * tep_filter_copy - copy a filter using another filter
+ * @dest - the filter to copy to
+ * @source - the filter to copy from
+ *
+ * Returns 0 on success and -1 if not all filters were copied
+ */
+int tep_filter_copy(struct event_filter *dest, struct event_filter *source)
+{
+ int ret = 0;
+ int i;
+
+ tep_filter_reset(dest);
+
+ for (i = 0; i < source->filters; i++) {
+ if (copy_filter_type(dest, source, &source->event_filters[i]))
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/**
+ * tep_update_trivial - update the trivial filters with the given filter
+ * @dest - the filter to update
+ * @source - the filter as the source of the update
+ * @type - the type of trivial filter to update.
+ *
+ * Scan dest for trivial events matching @type to replace with the source.
+ *
+ * Returns 0 on success and -1 if there was a problem updating, but
+ * events may have still been updated on error.
+ */
+int tep_update_trivial(struct event_filter *dest, struct event_filter *source,
+ enum filter_trivial_type type)
+{
+ struct tep_handle *src_pevent;
+ struct tep_handle *dest_pevent;
+ struct event_format *event;
+ struct filter_type *filter_type;
+ struct filter_arg *arg;
+ char *str;
+ int i;
+
+ src_pevent = source->pevent;
+ dest_pevent = dest->pevent;
+
+ /* Do nothing if either of the filters has nothing to filter */
+ if (!dest->filters || !source->filters)
+ return 0;
+
+ for (i = 0; i < dest->filters; i++) {
+ filter_type = &dest->event_filters[i];
+ arg = filter_type->filter;
+ if (arg->type != FILTER_ARG_BOOLEAN)
+ continue;
+ if ((arg->boolean.value && type == FILTER_TRIVIAL_FALSE) ||
+ (!arg->boolean.value && type == FILTER_TRIVIAL_TRUE))
+ continue;
+
+ event = filter_type->event;
+
+ if (src_pevent != dest_pevent) {
+ /* do a look up */
+ event = tep_find_event_by_name(src_pevent,
+ event->system,
+ event->name);
+ if (!event)
+ return -1;
+ }
+
+ str = tep_filter_make_string(source, event->id);
+ if (!str)
+ continue;
+
+ /* Don't bother if the filter is trivial too */
+ if (strcmp(str, "TRUE") != 0 && strcmp(str, "FALSE") != 0)
+ filter_event(dest, event, str, NULL);
+ free(str);
+ }
+ return 0;
+}
+
+/**
+ * tep_filter_clear_trivial - clear TRUE and FALSE filters
+ * @filter: the filter to remove trivial filters from
+ * @type: remove only true, false, or both
+ *
+ * Removes filters that only contain a TRUE or FALES boolean arg.
+ *
+ * Returns 0 on success and -1 if there was a problem.
+ */
+int tep_filter_clear_trivial(struct event_filter *filter,
+ enum filter_trivial_type type)
+{
+ struct filter_type *filter_type;
+ int count = 0;
+ int *ids = NULL;
+ int i;
+
+ if (!filter->filters)
+ return 0;
+
+ /*
+ * Two steps, first get all ids with trivial filters.
+ * then remove those ids.
+ */
+ for (i = 0; i < filter->filters; i++) {
+ int *new_ids;
+
+ filter_type = &filter->event_filters[i];
+ if (filter_type->filter->type != FILTER_ARG_BOOLEAN)
+ continue;
+ switch (type) {
+ case FILTER_TRIVIAL_FALSE:
+ if (filter_type->filter->boolean.value)
+ continue;
+ break;
+ case FILTER_TRIVIAL_TRUE:
+ if (!filter_type->filter->boolean.value)
+ continue;
+ default:
+ break;
+ }
+
+ new_ids = realloc(ids, sizeof(*ids) * (count + 1));
+ if (!new_ids) {
+ free(ids);
+ return -1;
+ }
+
+ ids = new_ids;
+ ids[count++] = filter_type->event_id;
+ }
+
+ if (!count)
+ return 0;
+
+ for (i = 0; i < count; i++)
+ tep_filter_remove_event(filter, ids[i]);
+
+ free(ids);
+ return 0;
+}
+
+/**
+ * tep_filter_event_has_trivial - return true event contains trivial filter
+ * @filter: the filter with the information
+ * @event_id: the id of the event to test
+ * @type: trivial type to test for (TRUE, FALSE, EITHER)
+ *
+ * Returns 1 if the event contains a matching trivial type
+ * otherwise 0.
+ */
+int tep_filter_event_has_trivial(struct event_filter *filter,
+ int event_id,
+ enum filter_trivial_type type)
+{
+ struct filter_type *filter_type;
+
+ if (!filter->filters)
+ return 0;
+
+ filter_type = find_filter_type(filter, event_id);
+
+ if (!filter_type)
+ return 0;
+
+ if (filter_type->filter->type != FILTER_ARG_BOOLEAN)
+ return 0;
+
+ switch (type) {
+ case FILTER_TRIVIAL_FALSE:
+ return !filter_type->filter->boolean.value;
+
+ case FILTER_TRIVIAL_TRUE:
+ return filter_type->filter->boolean.value;
+ default:
+ return 1;
+ }
+}
+
+static int test_filter(struct event_format *event, struct filter_arg *arg,
+ struct tep_record *record, enum tep_errno *err);
+
+static const char *
+get_comm(struct event_format *event, struct tep_record *record)
+{
+ const char *comm;
+ int pid;
+
+ pid = tep_data_pid(event->pevent, record);
+ comm = tep_data_comm_from_pid(event->pevent, pid);
+ return comm;
+}
+
+static unsigned long long
+get_value(struct event_format *event,
+ struct format_field *field, struct tep_record *record)
+{
+ unsigned long long val;
+
+ /* Handle our dummy "comm" field */
+ if (field == &comm) {
+ const char *name;
+
+ name = get_comm(event, record);
+ return (unsigned long)name;
+ }
+
+ /* Handle our dummy "cpu" field */
+ if (field == &cpu)
+ return record->cpu;
+
+ tep_read_number_field(field, record->data, &val);
+
+ if (!(field->flags & FIELD_IS_SIGNED))
+ return val;
+
+ switch (field->size) {
+ case 1:
+ return (char)val;
+ case 2:
+ return (short)val;
+ case 4:
+ return (int)val;
+ case 8:
+ return (long long)val;
+ }
+ return val;
+}
+
+static unsigned long long
+get_arg_value(struct event_format *event, struct filter_arg *arg,
+ struct tep_record *record, enum tep_errno *err);
+
+static unsigned long long
+get_exp_value(struct event_format *event, struct filter_arg *arg,
+ struct tep_record *record, enum tep_errno *err)
+{
+ unsigned long long lval, rval;
+
+ lval = get_arg_value(event, arg->exp.left, record, err);
+ rval = get_arg_value(event, arg->exp.right, record, err);
+
+ if (*err) {
+ /*
+ * There was an error, no need to process anymore.
+ */
+ return 0;
+ }
+
+ switch (arg->exp.type) {
+ case FILTER_EXP_ADD:
+ return lval + rval;
+
+ case FILTER_EXP_SUB:
+ return lval - rval;
+
+ case FILTER_EXP_MUL:
+ return lval * rval;
+
+ case FILTER_EXP_DIV:
+ return lval / rval;
+
+ case FILTER_EXP_MOD:
+ return lval % rval;
+
+ case FILTER_EXP_RSHIFT:
+ return lval >> rval;
+
+ case FILTER_EXP_LSHIFT:
+ return lval << rval;
+
+ case FILTER_EXP_AND:
+ return lval & rval;
+
+ case FILTER_EXP_OR:
+ return lval | rval;
+
+ case FILTER_EXP_XOR:
+ return lval ^ rval;
+
+ case FILTER_EXP_NOT:
+ default:
+ if (!*err)
+ *err = TEP_ERRNO__INVALID_EXP_TYPE;
+ }
+ return 0;
+}
+
+static unsigned long long
+get_arg_value(struct event_format *event, struct filter_arg *arg,
+ struct tep_record *record, enum tep_errno *err)
+{
+ switch (arg->type) {
+ case FILTER_ARG_FIELD:
+ return get_value(event, arg->field.field, record);
+
+ case FILTER_ARG_VALUE:
+ if (arg->value.type != FILTER_NUMBER) {
+ if (!*err)
+ *err = TEP_ERRNO__NOT_A_NUMBER;
+ }
+ return arg->value.val;
+
+ case FILTER_ARG_EXP:
+ return get_exp_value(event, arg, record, err);
+
+ default:
+ if (!*err)
+ *err = TEP_ERRNO__INVALID_ARG_TYPE;
+ }
+ return 0;
+}
+
+static int test_num(struct event_format *event, struct filter_arg *arg,
+ struct tep_record *record, enum tep_errno *err)
+{
+ unsigned long long lval, rval;
+
+ lval = get_arg_value(event, arg->num.left, record, err);
+ rval = get_arg_value(event, arg->num.right, record, err);
+
+ if (*err) {
+ /*
+ * There was an error, no need to process anymore.
+ */
+ return 0;
+ }
+
+ switch (arg->num.type) {
+ case FILTER_CMP_EQ:
+ return lval == rval;
+
+ case FILTER_CMP_NE:
+ return lval != rval;
+
+ case FILTER_CMP_GT:
+ return lval > rval;
+
+ case FILTER_CMP_LT:
+ return lval < rval;
+
+ case FILTER_CMP_GE:
+ return lval >= rval;
+
+ case FILTER_CMP_LE:
+ return lval <= rval;
+
+ default:
+ if (!*err)
+ *err = TEP_ERRNO__ILLEGAL_INTEGER_CMP;
+ return 0;
+ }
+}
+
+static const char *get_field_str(struct filter_arg *arg, struct tep_record *record)
+{
+ struct event_format *event;
+ struct tep_handle *pevent;
+ unsigned long long addr;
+ const char *val = NULL;
+ unsigned int size;
+ char hex[64];
+
+ /* If the field is not a string convert it */
+ if (arg->str.field->flags & FIELD_IS_STRING) {
+ val = record->data + arg->str.field->offset;
+ size = arg->str.field->size;
+
+ if (arg->str.field->flags & FIELD_IS_DYNAMIC) {
+ addr = *(unsigned int *)val;
+ val = record->data + (addr & 0xffff);
+ size = addr >> 16;
+ }
+
+ /*
+ * We need to copy the data since we can't be sure the field
+ * is null terminated.
+ */
+ if (*(val + size - 1)) {
+ /* copy it */
+ memcpy(arg->str.buffer, val, arg->str.field->size);
+ /* the buffer is already NULL terminated */
+ val = arg->str.buffer;
+ }
+
+ } else {
+ event = arg->str.field->event;
+ pevent = event->pevent;
+ addr = get_value(event, arg->str.field, record);
+
+ if (arg->str.field->flags & (FIELD_IS_POINTER | FIELD_IS_LONG))
+ /* convert to a kernel symbol */
+ val = tep_find_function(pevent, addr);
+
+ if (val == NULL) {
+ /* just use the hex of the string name */
+ snprintf(hex, 64, "0x%llx", addr);
+ val = hex;
+ }
+ }
+
+ return val;
+}
+
+static int test_str(struct event_format *event, struct filter_arg *arg,
+ struct tep_record *record, enum tep_errno *err)
+{
+ const char *val;
+
+ if (arg->str.field == &comm)
+ val = get_comm(event, record);
+ else
+ val = get_field_str(arg, record);
+
+ switch (arg->str.type) {
+ case FILTER_CMP_MATCH:
+ return strcmp(val, arg->str.val) == 0;
+
+ case FILTER_CMP_NOT_MATCH:
+ return strcmp(val, arg->str.val) != 0;
+
+ case FILTER_CMP_REGEX:
+ /* Returns zero on match */
+ return !regexec(&arg->str.reg, val, 0, NULL, 0);
+
+ case FILTER_CMP_NOT_REGEX:
+ return regexec(&arg->str.reg, val, 0, NULL, 0);
+
+ default:
+ if (!*err)
+ *err = TEP_ERRNO__ILLEGAL_STRING_CMP;
+ return 0;
+ }
+}
+
+static int test_op(struct event_format *event, struct filter_arg *arg,
+ struct tep_record *record, enum tep_errno *err)
+{
+ switch (arg->op.type) {
+ case FILTER_OP_AND:
+ return test_filter(event, arg->op.left, record, err) &&
+ test_filter(event, arg->op.right, record, err);
+
+ case FILTER_OP_OR:
+ return test_filter(event, arg->op.left, record, err) ||
+ test_filter(event, arg->op.right, record, err);
+
+ case FILTER_OP_NOT:
+ return !test_filter(event, arg->op.right, record, err);
+
+ default:
+ if (!*err)
+ *err = TEP_ERRNO__INVALID_OP_TYPE;
+ return 0;
+ }
+}
+
+static int test_filter(struct event_format *event, struct filter_arg *arg,
+ struct tep_record *record, enum tep_errno *err)
+{
+ if (*err) {
+ /*
+ * There was an error, no need to process anymore.
+ */
+ return 0;
+ }
+
+ switch (arg->type) {
+ case FILTER_ARG_BOOLEAN:
+ /* easy case */
+ return arg->boolean.value;
+
+ case FILTER_ARG_OP:
+ return test_op(event, arg, record, err);
+
+ case FILTER_ARG_NUM:
+ return test_num(event, arg, record, err);
+
+ case FILTER_ARG_STR:
+ return test_str(event, arg, record, err);
+
+ case FILTER_ARG_EXP:
+ case FILTER_ARG_VALUE:
+ case FILTER_ARG_FIELD:
+ /*
+ * Expressions, fields and values evaluate
+ * to true if they return non zero
+ */
+ return !!get_arg_value(event, arg, record, err);
+
+ default:
+ if (!*err)
+ *err = TEP_ERRNO__INVALID_ARG_TYPE;
+ return 0;
+ }
+}
+
+/**
+ * tep_event_filtered - return true if event has filter
+ * @filter: filter struct with filter information
+ * @event_id: event id to test if filter exists
+ *
+ * Returns 1 if filter found for @event_id
+ * otherwise 0;
+ */
+int tep_event_filtered(struct event_filter *filter, int event_id)
+{
+ struct filter_type *filter_type;
+
+ if (!filter->filters)
+ return 0;
+
+ filter_type = find_filter_type(filter, event_id);
+
+ return filter_type ? 1 : 0;
+}
+
+/**
+ * tep_filter_match - test if a record matches a filter
+ * @filter: filter struct with filter information
+ * @record: the record to test against the filter
+ *
+ * Returns: match result or error code (prefixed with TEP_ERRNO__)
+ * FILTER_MATCH - filter found for event and @record matches
+ * FILTER_MISS - filter found for event and @record does not match
+ * FILTER_NOT_FOUND - no filter found for @record's event
+ * NO_FILTER - if no filters exist
+ * otherwise - error occurred during test
+ */
+enum tep_errno tep_filter_match(struct event_filter *filter,
+ struct tep_record *record)
+{
+ struct tep_handle *pevent = filter->pevent;
+ struct filter_type *filter_type;
+ int event_id;
+ int ret;
+ enum tep_errno err = 0;
+
+ filter_init_error_buf(filter);
+
+ if (!filter->filters)
+ return TEP_ERRNO__NO_FILTER;
+
+ event_id = tep_data_type(pevent, record);
+
+ filter_type = find_filter_type(filter, event_id);
+ if (!filter_type)
+ return TEP_ERRNO__FILTER_NOT_FOUND;
+
+ ret = test_filter(filter_type->event, filter_type->filter, record, &err);
+ if (err)
+ return err;
+
+ return ret ? TEP_ERRNO__FILTER_MATCH : TEP_ERRNO__FILTER_MISS;
+}
+
+static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ char *str = NULL;
+ char *left = NULL;
+ char *right = NULL;
+ char *op = NULL;
+ int left_val = -1;
+ int right_val = -1;
+ int val;
+
+ switch (arg->op.type) {
+ case FILTER_OP_AND:
+ op = "&&";
+ /* fall through */
+ case FILTER_OP_OR:
+ if (!op)
+ op = "||";
+
+ left = arg_to_str(filter, arg->op.left);
+ right = arg_to_str(filter, arg->op.right);
+ if (!left || !right)
+ break;
+
+ /* Try to consolidate boolean values */
+ if (strcmp(left, "TRUE") == 0)
+ left_val = 1;
+ else if (strcmp(left, "FALSE") == 0)
+ left_val = 0;
+
+ if (strcmp(right, "TRUE") == 0)
+ right_val = 1;
+ else if (strcmp(right, "FALSE") == 0)
+ right_val = 0;
+
+ if (left_val >= 0) {
+ if ((arg->op.type == FILTER_OP_AND && !left_val) ||
+ (arg->op.type == FILTER_OP_OR && left_val)) {
+ /* Just return left value */
+ str = left;
+ left = NULL;
+ break;
+ }
+ if (right_val >= 0) {
+ /* just evaluate this. */
+ val = 0;
+ switch (arg->op.type) {
+ case FILTER_OP_AND:
+ val = left_val && right_val;
+ break;
+ case FILTER_OP_OR:
+ val = left_val || right_val;
+ break;
+ default:
+ break;
+ }
+ asprintf(&str, val ? "TRUE" : "FALSE");
+ break;
+ }
+ }
+ if (right_val >= 0) {
+ if ((arg->op.type == FILTER_OP_AND && !right_val) ||
+ (arg->op.type == FILTER_OP_OR && right_val)) {
+ /* Just return right value */
+ str = right;
+ right = NULL;
+ break;
+ }
+ /* The right value is meaningless */
+ str = left;
+ left = NULL;
+ break;
+ }
+
+ asprintf(&str, "(%s) %s (%s)", left, op, right);
+ break;
+
+ case FILTER_OP_NOT:
+ op = "!";
+ right = arg_to_str(filter, arg->op.right);
+ if (!right)
+ break;
+
+ /* See if we can consolidate */
+ if (strcmp(right, "TRUE") == 0)
+ right_val = 1;
+ else if (strcmp(right, "FALSE") == 0)
+ right_val = 0;
+ if (right_val >= 0) {
+ /* just return the opposite */
+ asprintf(&str, right_val ? "FALSE" : "TRUE");
+ break;
+ }
+ asprintf(&str, "%s(%s)", op, right);
+ break;
+
+ default:
+ /* ?? */
+ break;
+ }
+ free(left);
+ free(right);
+ return str;
+}
+
+static char *val_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ char *str = NULL;
+
+ asprintf(&str, "%lld", arg->value.val);
+
+ return str;
+}
+
+static char *field_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ return strdup(arg->field.field->name);
+}
+
+static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ char *lstr;
+ char *rstr;
+ char *op;
+ char *str = NULL;
+
+ lstr = arg_to_str(filter, arg->exp.left);
+ rstr = arg_to_str(filter, arg->exp.right);
+ if (!lstr || !rstr)
+ goto out;
+
+ switch (arg->exp.type) {
+ case FILTER_EXP_ADD:
+ op = "+";
+ break;
+ case FILTER_EXP_SUB:
+ op = "-";
+ break;
+ case FILTER_EXP_MUL:
+ op = "*";
+ break;
+ case FILTER_EXP_DIV:
+ op = "/";
+ break;
+ case FILTER_EXP_MOD:
+ op = "%";
+ break;
+ case FILTER_EXP_RSHIFT:
+ op = ">>";
+ break;
+ case FILTER_EXP_LSHIFT:
+ op = "<<";
+ break;
+ case FILTER_EXP_AND:
+ op = "&";
+ break;
+ case FILTER_EXP_OR:
+ op = "|";
+ break;
+ case FILTER_EXP_XOR:
+ op = "^";
+ break;
+ default:
+ op = "[ERROR IN EXPRESSION TYPE]";
+ break;
+ }
+
+ asprintf(&str, "%s %s %s", lstr, op, rstr);
+out:
+ free(lstr);
+ free(rstr);
+
+ return str;
+}
+
+static char *num_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ char *lstr;
+ char *rstr;
+ char *str = NULL;
+ char *op = NULL;
+
+ lstr = arg_to_str(filter, arg->num.left);
+ rstr = arg_to_str(filter, arg->num.right);
+ if (!lstr || !rstr)
+ goto out;
+
+ switch (arg->num.type) {
+ case FILTER_CMP_EQ:
+ op = "==";
+ /* fall through */
+ case FILTER_CMP_NE:
+ if (!op)
+ op = "!=";
+ /* fall through */
+ case FILTER_CMP_GT:
+ if (!op)
+ op = ">";
+ /* fall through */
+ case FILTER_CMP_LT:
+ if (!op)
+ op = "<";
+ /* fall through */
+ case FILTER_CMP_GE:
+ if (!op)
+ op = ">=";
+ /* fall through */
+ case FILTER_CMP_LE:
+ if (!op)
+ op = "<=";
+
+ asprintf(&str, "%s %s %s", lstr, op, rstr);
+ break;
+
+ default:
+ /* ?? */
+ break;
+ }
+
+out:
+ free(lstr);
+ free(rstr);
+ return str;
+}
+
+static char *str_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ char *str = NULL;
+ char *op = NULL;
+
+ switch (arg->str.type) {
+ case FILTER_CMP_MATCH:
+ op = "==";
+ /* fall through */
+ case FILTER_CMP_NOT_MATCH:
+ if (!op)
+ op = "!=";
+ /* fall through */
+ case FILTER_CMP_REGEX:
+ if (!op)
+ op = "=~";
+ /* fall through */
+ case FILTER_CMP_NOT_REGEX:
+ if (!op)
+ op = "!~";
+
+ asprintf(&str, "%s %s \"%s\"",
+ arg->str.field->name, op, arg->str.val);
+ break;
+
+ default:
+ /* ?? */
+ break;
+ }
+ return str;
+}
+
+static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ char *str = NULL;
+
+ switch (arg->type) {
+ case FILTER_ARG_BOOLEAN:
+ asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE");
+ return str;
+
+ case FILTER_ARG_OP:
+ return op_to_str(filter, arg);
+
+ case FILTER_ARG_NUM:
+ return num_to_str(filter, arg);
+
+ case FILTER_ARG_STR:
+ return str_to_str(filter, arg);
+
+ case FILTER_ARG_VALUE:
+ return val_to_str(filter, arg);
+
+ case FILTER_ARG_FIELD:
+ return field_to_str(filter, arg);
+
+ case FILTER_ARG_EXP:
+ return exp_to_str(filter, arg);
+
+ default:
+ /* ?? */
+ return NULL;
+ }
+
+}
+
+/**
+ * tep_filter_make_string - return a string showing the filter
+ * @filter: filter struct with filter information
+ * @event_id: the event id to return the filter string with
+ *
+ * Returns a string that displays the filter contents.
+ * This string must be freed with free(str).
+ * NULL is returned if no filter is found or allocation failed.
+ */
+char *
+tep_filter_make_string(struct event_filter *filter, int event_id)
+{
+ struct filter_type *filter_type;
+
+ if (!filter->filters)
+ return NULL;
+
+ filter_type = find_filter_type(filter, event_id);
+
+ if (!filter_type)
+ return NULL;
+
+ return arg_to_str(filter, filter_type->filter);
+}
+
+/**
+ * tep_filter_compare - compare two filters and return if they are the same
+ * @filter1: Filter to compare with @filter2
+ * @filter2: Filter to compare with @filter1
+ *
+ * Returns:
+ * 1 if the two filters hold the same content.
+ * 0 if they do not.
+ */
+int tep_filter_compare(struct event_filter *filter1, struct event_filter *filter2)
+{
+ struct filter_type *filter_type1;
+ struct filter_type *filter_type2;
+ char *str1, *str2;
+ int result;
+ int i;
+
+ /* Do the easy checks first */
+ if (filter1->filters != filter2->filters)
+ return 0;
+ if (!filter1->filters && !filter2->filters)
+ return 1;
+
+ /*
+ * Now take a look at each of the events to see if they have the same
+ * filters to them.
+ */
+ for (i = 0; i < filter1->filters; i++) {
+ filter_type1 = &filter1->event_filters[i];
+ filter_type2 = find_filter_type(filter2, filter_type1->event_id);
+ if (!filter_type2)
+ break;
+ if (filter_type1->filter->type != filter_type2->filter->type)
+ break;
+ switch (filter_type1->filter->type) {
+ case FILTER_TRIVIAL_FALSE:
+ case FILTER_TRIVIAL_TRUE:
+ /* trivial types just need the type compared */
+ continue;
+ default:
+ break;
+ }
+ /* The best way to compare complex filters is with strings */
+ str1 = arg_to_str(filter1, filter_type1->filter);
+ str2 = arg_to_str(filter2, filter_type2->filter);
+ if (str1 && str2)
+ result = strcmp(str1, str2) != 0;
+ else
+ /* bail out if allocation fails */
+ result = 1;
+
+ free(str1);
+ free(str2);
+ if (result)
+ break;
+ }
+
+ if (i < filter1->filters)
+ return 0;
+ return 1;
+}
+
diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c
new file mode 100644
index 000000000..77e4ec640
--- /dev/null
+++ b/tools/lib/traceevent/parse-utils.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#define __weak __attribute__((weak))
+
+void __vwarning(const char *fmt, va_list ap)
+{
+ if (errno)
+ perror("trace-cmd");
+ errno = 0;
+
+ fprintf(stderr, " ");
+ vfprintf(stderr, fmt, ap);
+
+ fprintf(stderr, "\n");
+}
+
+void __warning(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ __vwarning(fmt, ap);
+ va_end(ap);
+}
+
+void __weak warning(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ __vwarning(fmt, ap);
+ va_end(ap);
+}
+
+void __vpr_stat(const char *fmt, va_list ap)
+{
+ vprintf(fmt, ap);
+ printf("\n");
+}
+
+void __pr_stat(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ __vpr_stat(fmt, ap);
+ va_end(ap);
+}
+
+void __weak vpr_stat(const char *fmt, va_list ap)
+{
+ __vpr_stat(fmt, ap);
+}
+
+void __weak pr_stat(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ __vpr_stat(fmt, ap);
+ va_end(ap);
+}
diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugin_cfg80211.c
new file mode 100644
index 000000000..a51b366f4
--- /dev/null
+++ b/tools/lib/traceevent/plugin_cfg80211.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <endian.h>
+#include "event-parse.h"
+
+/*
+ * From glibc endian.h, for older systems where it is not present, e.g.: RHEL5,
+ * Fedora6.
+ */
+#ifndef le16toh
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+# define le16toh(x) (x)
+# else
+# define le16toh(x) __bswap_16 (x)
+# endif
+#endif
+
+
+static unsigned long long
+process___le16_to_cpup(struct trace_seq *s, unsigned long long *args)
+{
+ uint16_t *val = (uint16_t *) (unsigned long) args[0];
+ return val ? (long long) le16toh(*val) : 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+{
+ tep_register_print_function(pevent,
+ process___le16_to_cpup,
+ TEP_FUNC_ARG_INT,
+ "__le16_to_cpup",
+ TEP_FUNC_ARG_PTR,
+ TEP_FUNC_ARG_VOID);
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+{
+ tep_unregister_print_function(pevent, process___le16_to_cpup,
+ "__le16_to_cpup");
+}
diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c
new file mode 100644
index 000000000..424747475
--- /dev/null
+++ b/tools/lib/traceevent/plugin_function.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "event-utils.h"
+
+static struct func_stack {
+ int size;
+ char **stack;
+} *fstack;
+
+static int cpus = -1;
+
+#define STK_BLK 10
+
+struct tep_plugin_option plugin_options[] =
+{
+ {
+ .name = "parent",
+ .plugin_alias = "ftrace",
+ .description =
+ "Print parent of functions for function events",
+ },
+ {
+ .name = "indent",
+ .plugin_alias = "ftrace",
+ .description =
+ "Try to show function call indents, based on parents",
+ .set = 1,
+ },
+ {
+ .name = NULL,
+ }
+};
+
+static struct tep_plugin_option *ftrace_parent = &plugin_options[0];
+static struct tep_plugin_option *ftrace_indent = &plugin_options[1];
+
+static void add_child(struct func_stack *stack, const char *child, int pos)
+{
+ int i;
+
+ if (!child)
+ return;
+
+ if (pos < stack->size)
+ free(stack->stack[pos]);
+ else {
+ char **ptr;
+
+ ptr = realloc(stack->stack, sizeof(char *) *
+ (stack->size + STK_BLK));
+ if (!ptr) {
+ warning("could not allocate plugin memory\n");
+ return;
+ }
+
+ stack->stack = ptr;
+
+ for (i = stack->size; i < stack->size + STK_BLK; i++)
+ stack->stack[i] = NULL;
+ stack->size += STK_BLK;
+ }
+
+ stack->stack[pos] = strdup(child);
+}
+
+static int add_and_get_index(const char *parent, const char *child, int cpu)
+{
+ int i;
+
+ if (cpu < 0)
+ return 0;
+
+ if (cpu > cpus) {
+ struct func_stack *ptr;
+
+ ptr = realloc(fstack, sizeof(*fstack) * (cpu + 1));
+ if (!ptr) {
+ warning("could not allocate plugin memory\n");
+ return 0;
+ }
+
+ fstack = ptr;
+
+ /* Account for holes in the cpu count */
+ for (i = cpus + 1; i <= cpu; i++)
+ memset(&fstack[i], 0, sizeof(fstack[i]));
+ cpus = cpu;
+ }
+
+ for (i = 0; i < fstack[cpu].size && fstack[cpu].stack[i]; i++) {
+ if (strcmp(parent, fstack[cpu].stack[i]) == 0) {
+ add_child(&fstack[cpu], child, i+1);
+ return i;
+ }
+ }
+
+ /* Not found */
+ add_child(&fstack[cpu], parent, 0);
+ add_child(&fstack[cpu], child, 1);
+ return 0;
+}
+
+static int function_handler(struct trace_seq *s, struct tep_record *record,
+ struct event_format *event, void *context)
+{
+ struct tep_handle *pevent = event->pevent;
+ unsigned long long function;
+ unsigned long long pfunction;
+ const char *func;
+ const char *parent;
+ int index = 0;
+
+ if (tep_get_field_val(s, event, "ip", record, &function, 1))
+ return trace_seq_putc(s, '!');
+
+ func = tep_find_function(pevent, function);
+
+ if (tep_get_field_val(s, event, "parent_ip", record, &pfunction, 1))
+ return trace_seq_putc(s, '!');
+
+ parent = tep_find_function(pevent, pfunction);
+
+ if (parent && ftrace_indent->set)
+ index = add_and_get_index(parent, func, record->cpu);
+
+ trace_seq_printf(s, "%*s", index*3, "");
+
+ if (func)
+ trace_seq_printf(s, "%s", func);
+ else
+ trace_seq_printf(s, "0x%llx", function);
+
+ if (ftrace_parent->set) {
+ trace_seq_printf(s, " <-- ");
+ if (parent)
+ trace_seq_printf(s, "%s", parent);
+ else
+ trace_seq_printf(s, "0x%llx", pfunction);
+ }
+
+ return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+{
+ tep_register_event_handler(pevent, -1, "ftrace", "function",
+ function_handler, NULL);
+
+ tep_plugin_add_options("ftrace", plugin_options);
+
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+{
+ int i, x;
+
+ tep_unregister_event_handler(pevent, -1, "ftrace", "function",
+ function_handler, NULL);
+
+ for (i = 0; i <= cpus; i++) {
+ for (x = 0; x < fstack[i].size && fstack[i].stack[x]; x++)
+ free(fstack[i].stack[x]);
+ free(fstack[i].stack);
+ }
+
+ tep_plugin_remove_options(plugin_options);
+
+ free(fstack);
+ fstack = NULL;
+ cpus = -1;
+}
diff --git a/tools/lib/traceevent/plugin_hrtimer.c b/tools/lib/traceevent/plugin_hrtimer.c
new file mode 100644
index 000000000..b43bfec56
--- /dev/null
+++ b/tools/lib/traceevent/plugin_hrtimer.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+static int timer_expire_handler(struct trace_seq *s,
+ struct tep_record *record,
+ struct event_format *event, void *context)
+{
+ trace_seq_printf(s, "hrtimer=");
+
+ if (tep_print_num_field(s, "0x%llx", event, "timer",
+ record, 0) == -1)
+ tep_print_num_field(s, "0x%llx", event, "hrtimer",
+ record, 1);
+
+ trace_seq_printf(s, " now=");
+
+ tep_print_num_field(s, "%llu", event, "now", record, 1);
+
+ tep_print_func_field(s, " function=%s", event, "function",
+ record, 0);
+ return 0;
+}
+
+static int timer_start_handler(struct trace_seq *s,
+ struct tep_record *record,
+ struct event_format *event, void *context)
+{
+ trace_seq_printf(s, "hrtimer=");
+
+ if (tep_print_num_field(s, "0x%llx", event, "timer",
+ record, 0) == -1)
+ tep_print_num_field(s, "0x%llx", event, "hrtimer",
+ record, 1);
+
+ tep_print_func_field(s, " function=%s", event, "function",
+ record, 0);
+
+ trace_seq_printf(s, " expires=");
+ tep_print_num_field(s, "%llu", event, "expires", record, 1);
+
+ trace_seq_printf(s, " softexpires=");
+ tep_print_num_field(s, "%llu", event, "softexpires", record, 1);
+ return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+{
+ tep_register_event_handler(pevent, -1,
+ "timer", "hrtimer_expire_entry",
+ timer_expire_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "timer", "hrtimer_start",
+ timer_start_handler, NULL);
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+{
+ tep_unregister_event_handler(pevent, -1,
+ "timer", "hrtimer_expire_entry",
+ timer_expire_handler, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "timer", "hrtimer_start",
+ timer_start_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugin_jbd2.c b/tools/lib/traceevent/plugin_jbd2.c
new file mode 100644
index 000000000..45a9acd19
--- /dev/null
+++ b/tools/lib/traceevent/plugin_jbd2.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+#define MINORBITS 20
+#define MINORMASK ((1U << MINORBITS) - 1)
+
+#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
+
+static unsigned long long
+process_jbd2_dev_to_name(struct trace_seq *s, unsigned long long *args)
+{
+ unsigned int dev = args[0];
+
+ trace_seq_printf(s, "%d:%d", MAJOR(dev), MINOR(dev));
+ return 0;
+}
+
+static unsigned long long
+process_jiffies_to_msecs(struct trace_seq *s, unsigned long long *args)
+{
+ unsigned long long jiffies = args[0];
+
+ trace_seq_printf(s, "%lld", jiffies);
+ return jiffies;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+{
+ tep_register_print_function(pevent,
+ process_jbd2_dev_to_name,
+ TEP_FUNC_ARG_STRING,
+ "jbd2_dev_to_name",
+ TEP_FUNC_ARG_INT,
+ TEP_FUNC_ARG_VOID);
+
+ tep_register_print_function(pevent,
+ process_jiffies_to_msecs,
+ TEP_FUNC_ARG_LONG,
+ "jiffies_to_msecs",
+ TEP_FUNC_ARG_LONG,
+ TEP_FUNC_ARG_VOID);
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+{
+ tep_unregister_print_function(pevent, process_jbd2_dev_to_name,
+ "jbd2_dev_to_name");
+
+ tep_unregister_print_function(pevent, process_jiffies_to_msecs,
+ "jiffies_to_msecs");
+}
diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugin_kmem.c
new file mode 100644
index 000000000..73966b05a
--- /dev/null
+++ b/tools/lib/traceevent/plugin_kmem.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+static int call_site_handler(struct trace_seq *s, struct tep_record *record,
+ struct event_format *event, void *context)
+{
+ struct format_field *field;
+ unsigned long long val, addr;
+ void *data = record->data;
+ const char *func;
+
+ field = tep_find_field(event, "call_site");
+ if (!field)
+ return 1;
+
+ if (tep_read_number_field(field, data, &val))
+ return 1;
+
+ func = tep_find_function(event->pevent, val);
+ if (!func)
+ return 1;
+
+ addr = tep_find_function_address(event->pevent, val);
+
+ trace_seq_printf(s, "(%s+0x%x) ", func, (int)(val - addr));
+ return 1;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+{
+ tep_register_event_handler(pevent, -1, "kmem", "kfree",
+ call_site_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "kmem", "kmalloc",
+ call_site_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "kmem", "kmalloc_node",
+ call_site_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "kmem", "kmem_cache_alloc",
+ call_site_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "kmem",
+ "kmem_cache_alloc_node",
+ call_site_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "kmem", "kmem_cache_free",
+ call_site_handler, NULL);
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+{
+ tep_unregister_event_handler(pevent, -1, "kmem", "kfree",
+ call_site_handler, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "kmem", "kmalloc",
+ call_site_handler, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "kmem", "kmalloc_node",
+ call_site_handler, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_alloc",
+ call_site_handler, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "kmem",
+ "kmem_cache_alloc_node",
+ call_site_handler, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_free",
+ call_site_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c
new file mode 100644
index 000000000..1d0d15906
--- /dev/null
+++ b/tools/lib/traceevent/plugin_kvm.c
@@ -0,0 +1,522 @@
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+#include "event-parse.h"
+
+#ifdef HAVE_UDIS86
+
+#include <udis86.h>
+
+static ud_t ud;
+
+static void init_disassembler(void)
+{
+ ud_init(&ud);
+ ud_set_syntax(&ud, UD_SYN_ATT);
+}
+
+static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
+ int cr0_pe, int eflags_vm,
+ int cs_d, int cs_l)
+{
+ int mode;
+
+ if (!cr0_pe)
+ mode = 16;
+ else if (eflags_vm)
+ mode = 16;
+ else if (cs_l)
+ mode = 64;
+ else if (cs_d)
+ mode = 32;
+ else
+ mode = 16;
+
+ ud_set_pc(&ud, rip);
+ ud_set_mode(&ud, mode);
+ ud_set_input_buffer(&ud, insn, len);
+ ud_disassemble(&ud);
+ return ud_insn_asm(&ud);
+}
+
+#else
+
+static void init_disassembler(void)
+{
+}
+
+static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
+ int cr0_pe, int eflags_vm,
+ int cs_d, int cs_l)
+{
+ static char out[15*3+1];
+ int i;
+
+ for (i = 0; i < len; ++i)
+ sprintf(out + i * 3, "%02x ", insn[i]);
+ out[len*3-1] = '\0';
+ return out;
+}
+
+#endif
+
+
+#define VMX_EXIT_REASONS \
+ _ER(EXCEPTION_NMI, 0) \
+ _ER(EXTERNAL_INTERRUPT, 1) \
+ _ER(TRIPLE_FAULT, 2) \
+ _ER(PENDING_INTERRUPT, 7) \
+ _ER(NMI_WINDOW, 8) \
+ _ER(TASK_SWITCH, 9) \
+ _ER(CPUID, 10) \
+ _ER(HLT, 12) \
+ _ER(INVD, 13) \
+ _ER(INVLPG, 14) \
+ _ER(RDPMC, 15) \
+ _ER(RDTSC, 16) \
+ _ER(VMCALL, 18) \
+ _ER(VMCLEAR, 19) \
+ _ER(VMLAUNCH, 20) \
+ _ER(VMPTRLD, 21) \
+ _ER(VMPTRST, 22) \
+ _ER(VMREAD, 23) \
+ _ER(VMRESUME, 24) \
+ _ER(VMWRITE, 25) \
+ _ER(VMOFF, 26) \
+ _ER(VMON, 27) \
+ _ER(CR_ACCESS, 28) \
+ _ER(DR_ACCESS, 29) \
+ _ER(IO_INSTRUCTION, 30) \
+ _ER(MSR_READ, 31) \
+ _ER(MSR_WRITE, 32) \
+ _ER(MWAIT_INSTRUCTION, 36) \
+ _ER(MONITOR_INSTRUCTION, 39) \
+ _ER(PAUSE_INSTRUCTION, 40) \
+ _ER(MCE_DURING_VMENTRY, 41) \
+ _ER(TPR_BELOW_THRESHOLD, 43) \
+ _ER(APIC_ACCESS, 44) \
+ _ER(EOI_INDUCED, 45) \
+ _ER(EPT_VIOLATION, 48) \
+ _ER(EPT_MISCONFIG, 49) \
+ _ER(INVEPT, 50) \
+ _ER(PREEMPTION_TIMER, 52) \
+ _ER(WBINVD, 54) \
+ _ER(XSETBV, 55) \
+ _ER(APIC_WRITE, 56) \
+ _ER(INVPCID, 58) \
+ _ER(PML_FULL, 62) \
+ _ER(XSAVES, 63) \
+ _ER(XRSTORS, 64)
+
+#define SVM_EXIT_REASONS \
+ _ER(EXIT_READ_CR0, 0x000) \
+ _ER(EXIT_READ_CR3, 0x003) \
+ _ER(EXIT_READ_CR4, 0x004) \
+ _ER(EXIT_READ_CR8, 0x008) \
+ _ER(EXIT_WRITE_CR0, 0x010) \
+ _ER(EXIT_WRITE_CR3, 0x013) \
+ _ER(EXIT_WRITE_CR4, 0x014) \
+ _ER(EXIT_WRITE_CR8, 0x018) \
+ _ER(EXIT_READ_DR0, 0x020) \
+ _ER(EXIT_READ_DR1, 0x021) \
+ _ER(EXIT_READ_DR2, 0x022) \
+ _ER(EXIT_READ_DR3, 0x023) \
+ _ER(EXIT_READ_DR4, 0x024) \
+ _ER(EXIT_READ_DR5, 0x025) \
+ _ER(EXIT_READ_DR6, 0x026) \
+ _ER(EXIT_READ_DR7, 0x027) \
+ _ER(EXIT_WRITE_DR0, 0x030) \
+ _ER(EXIT_WRITE_DR1, 0x031) \
+ _ER(EXIT_WRITE_DR2, 0x032) \
+ _ER(EXIT_WRITE_DR3, 0x033) \
+ _ER(EXIT_WRITE_DR4, 0x034) \
+ _ER(EXIT_WRITE_DR5, 0x035) \
+ _ER(EXIT_WRITE_DR6, 0x036) \
+ _ER(EXIT_WRITE_DR7, 0x037) \
+ _ER(EXIT_EXCP_BASE, 0x040) \
+ _ER(EXIT_INTR, 0x060) \
+ _ER(EXIT_NMI, 0x061) \
+ _ER(EXIT_SMI, 0x062) \
+ _ER(EXIT_INIT, 0x063) \
+ _ER(EXIT_VINTR, 0x064) \
+ _ER(EXIT_CR0_SEL_WRITE, 0x065) \
+ _ER(EXIT_IDTR_READ, 0x066) \
+ _ER(EXIT_GDTR_READ, 0x067) \
+ _ER(EXIT_LDTR_READ, 0x068) \
+ _ER(EXIT_TR_READ, 0x069) \
+ _ER(EXIT_IDTR_WRITE, 0x06a) \
+ _ER(EXIT_GDTR_WRITE, 0x06b) \
+ _ER(EXIT_LDTR_WRITE, 0x06c) \
+ _ER(EXIT_TR_WRITE, 0x06d) \
+ _ER(EXIT_RDTSC, 0x06e) \
+ _ER(EXIT_RDPMC, 0x06f) \
+ _ER(EXIT_PUSHF, 0x070) \
+ _ER(EXIT_POPF, 0x071) \
+ _ER(EXIT_CPUID, 0x072) \
+ _ER(EXIT_RSM, 0x073) \
+ _ER(EXIT_IRET, 0x074) \
+ _ER(EXIT_SWINT, 0x075) \
+ _ER(EXIT_INVD, 0x076) \
+ _ER(EXIT_PAUSE, 0x077) \
+ _ER(EXIT_HLT, 0x078) \
+ _ER(EXIT_INVLPG, 0x079) \
+ _ER(EXIT_INVLPGA, 0x07a) \
+ _ER(EXIT_IOIO, 0x07b) \
+ _ER(EXIT_MSR, 0x07c) \
+ _ER(EXIT_TASK_SWITCH, 0x07d) \
+ _ER(EXIT_FERR_FREEZE, 0x07e) \
+ _ER(EXIT_SHUTDOWN, 0x07f) \
+ _ER(EXIT_VMRUN, 0x080) \
+ _ER(EXIT_VMMCALL, 0x081) \
+ _ER(EXIT_VMLOAD, 0x082) \
+ _ER(EXIT_VMSAVE, 0x083) \
+ _ER(EXIT_STGI, 0x084) \
+ _ER(EXIT_CLGI, 0x085) \
+ _ER(EXIT_SKINIT, 0x086) \
+ _ER(EXIT_RDTSCP, 0x087) \
+ _ER(EXIT_ICEBP, 0x088) \
+ _ER(EXIT_WBINVD, 0x089) \
+ _ER(EXIT_MONITOR, 0x08a) \
+ _ER(EXIT_MWAIT, 0x08b) \
+ _ER(EXIT_MWAIT_COND, 0x08c) \
+ _ER(EXIT_NPF, 0x400) \
+ _ER(EXIT_ERR, -1)
+
+#define _ER(reason, val) { #reason, val },
+struct str_values {
+ const char *str;
+ int val;
+};
+
+static struct str_values vmx_exit_reasons[] = {
+ VMX_EXIT_REASONS
+ { NULL, -1}
+};
+
+static struct str_values svm_exit_reasons[] = {
+ SVM_EXIT_REASONS
+ { NULL, -1}
+};
+
+static struct isa_exit_reasons {
+ unsigned isa;
+ struct str_values *strings;
+} isa_exit_reasons[] = {
+ { .isa = 1, .strings = vmx_exit_reasons },
+ { .isa = 2, .strings = svm_exit_reasons },
+ { }
+};
+
+static const char *find_exit_reason(unsigned isa, int val)
+{
+ struct str_values *strings = NULL;
+ int i;
+
+ for (i = 0; isa_exit_reasons[i].strings; ++i)
+ if (isa_exit_reasons[i].isa == isa) {
+ strings = isa_exit_reasons[i].strings;
+ break;
+ }
+ if (!strings)
+ return "UNKNOWN-ISA";
+ for (i = 0; strings[i].val >= 0; i++)
+ if (strings[i].val == val)
+ break;
+
+ return strings[i].str;
+}
+
+static int print_exit_reason(struct trace_seq *s, struct tep_record *record,
+ struct event_format *event, const char *field)
+{
+ unsigned long long isa;
+ unsigned long long val;
+ const char *reason;
+
+ if (tep_get_field_val(s, event, field, record, &val, 1) < 0)
+ return -1;
+
+ if (tep_get_field_val(s, event, "isa", record, &isa, 0) < 0)
+ isa = 1;
+
+ reason = find_exit_reason(isa, val);
+ if (reason)
+ trace_seq_printf(s, "reason %s", reason);
+ else
+ trace_seq_printf(s, "reason UNKNOWN (%llu)", val);
+ return 0;
+}
+
+static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record,
+ struct event_format *event, void *context)
+{
+ unsigned long long info1 = 0, info2 = 0;
+
+ if (print_exit_reason(s, record, event, "exit_reason") < 0)
+ return -1;
+
+ tep_print_num_field(s, " rip 0x%lx", event, "guest_rip", record, 1);
+
+ if (tep_get_field_val(s, event, "info1", record, &info1, 0) >= 0
+ && tep_get_field_val(s, event, "info2", record, &info2, 0) >= 0)
+ trace_seq_printf(s, " info %llx %llx", info1, info2);
+
+ return 0;
+}
+
+#define KVM_EMUL_INSN_F_CR0_PE (1 << 0)
+#define KVM_EMUL_INSN_F_EFL_VM (1 << 1)
+#define KVM_EMUL_INSN_F_CS_D (1 << 2)
+#define KVM_EMUL_INSN_F_CS_L (1 << 3)
+
+static int kvm_emulate_insn_handler(struct trace_seq *s,
+ struct tep_record *record,
+ struct event_format *event, void *context)
+{
+ unsigned long long rip, csbase, len, flags, failed;
+ int llen;
+ uint8_t *insn;
+ const char *disasm;
+
+ if (tep_get_field_val(s, event, "rip", record, &rip, 1) < 0)
+ return -1;
+
+ if (tep_get_field_val(s, event, "csbase", record, &csbase, 1) < 0)
+ return -1;
+
+ if (tep_get_field_val(s, event, "len", record, &len, 1) < 0)
+ return -1;
+
+ if (tep_get_field_val(s, event, "flags", record, &flags, 1) < 0)
+ return -1;
+
+ if (tep_get_field_val(s, event, "failed", record, &failed, 1) < 0)
+ return -1;
+
+ insn = tep_get_field_raw(s, event, "insn", record, &llen, 1);
+ if (!insn)
+ return -1;
+
+ disasm = disassemble(insn, len, rip,
+ flags & KVM_EMUL_INSN_F_CR0_PE,
+ flags & KVM_EMUL_INSN_F_EFL_VM,
+ flags & KVM_EMUL_INSN_F_CS_D,
+ flags & KVM_EMUL_INSN_F_CS_L);
+
+ trace_seq_printf(s, "%llx:%llx: %s%s", csbase, rip, disasm,
+ failed ? " FAIL" : "");
+ return 0;
+}
+
+
+static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_record *record,
+ struct event_format *event, void *context)
+{
+ if (print_exit_reason(s, record, event, "exit_code") < 0)
+ return -1;
+
+ tep_print_num_field(s, " info1 %llx", event, "exit_info1", record, 1);
+ tep_print_num_field(s, " info2 %llx", event, "exit_info2", record, 1);
+ tep_print_num_field(s, " int_info %llx", event, "exit_int_info", record, 1);
+ tep_print_num_field(s, " int_info_err %llx", event, "exit_int_info_err", record, 1);
+
+ return 0;
+}
+
+static int kvm_nested_vmexit_handler(struct trace_seq *s, struct tep_record *record,
+ struct event_format *event, void *context)
+{
+ tep_print_num_field(s, "rip %llx ", event, "rip", record, 1);
+
+ return kvm_nested_vmexit_inject_handler(s, record, event, context);
+}
+
+union kvm_mmu_page_role {
+ unsigned word;
+ struct {
+ unsigned level:4;
+ unsigned cr4_pae:1;
+ unsigned quadrant:2;
+ unsigned direct:1;
+ unsigned access:3;
+ unsigned invalid:1;
+ unsigned nxe:1;
+ unsigned cr0_wp:1;
+ unsigned smep_and_not_wp:1;
+ unsigned smap_and_not_wp:1;
+ unsigned pad_for_nice_hex_output:8;
+ unsigned smm:8;
+ };
+};
+
+static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
+ struct event_format *event, void *context)
+{
+ unsigned long long val;
+ static const char *access_str[] = {
+ "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"
+ };
+ union kvm_mmu_page_role role;
+
+ if (tep_get_field_val(s, event, "role", record, &val, 1) < 0)
+ return -1;
+
+ role.word = (int)val;
+
+ /*
+ * We can only use the structure if file is of the same
+ * endianess.
+ */
+ if (tep_is_file_bigendian(event->pevent) ==
+ tep_is_host_bigendian(event->pevent)) {
+
+ trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s",
+ role.level,
+ role.quadrant,
+ role.direct ? " direct" : "",
+ access_str[role.access],
+ role.invalid ? " invalid" : "",
+ role.cr4_pae ? "" : "!",
+ role.nxe ? "" : "!",
+ role.cr0_wp ? "" : "!",
+ role.smep_and_not_wp ? " smep" : "",
+ role.smap_and_not_wp ? " smap" : "",
+ role.smm ? " smm" : "");
+ } else
+ trace_seq_printf(s, "WORD: %08x", role.word);
+
+ tep_print_num_field(s, " root %u ", event,
+ "root_count", record, 1);
+
+ if (tep_get_field_val(s, event, "unsync", record, &val, 1) < 0)
+ return -1;
+
+ trace_seq_printf(s, "%s%c", val ? "unsync" : "sync", 0);
+ return 0;
+}
+
+static int kvm_mmu_get_page_handler(struct trace_seq *s,
+ struct tep_record *record,
+ struct event_format *event, void *context)
+{
+ unsigned long long val;
+
+ if (tep_get_field_val(s, event, "created", record, &val, 1) < 0)
+ return -1;
+
+ trace_seq_printf(s, "%s ", val ? "new" : "existing");
+
+ if (tep_get_field_val(s, event, "gfn", record, &val, 1) < 0)
+ return -1;
+
+ trace_seq_printf(s, "sp gfn %llx ", val);
+ return kvm_mmu_print_role(s, record, event, context);
+}
+
+#define PT_WRITABLE_SHIFT 1
+#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
+
+static unsigned long long
+process_is_writable_pte(struct trace_seq *s, unsigned long long *args)
+{
+ unsigned long pte = args[0];
+ return pte & PT_WRITABLE_MASK;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+{
+ init_disassembler();
+
+ tep_register_event_handler(pevent, -1, "kvm", "kvm_exit",
+ kvm_exit_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "kvm", "kvm_emulate_insn",
+ kvm_emulate_insn_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit",
+ kvm_nested_vmexit_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject",
+ kvm_nested_vmexit_inject_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page",
+ kvm_mmu_get_page_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page",
+ kvm_mmu_print_role, NULL);
+
+ tep_register_event_handler(pevent, -1,
+ "kvmmmu", "kvm_mmu_unsync_page",
+ kvm_mmu_print_role, NULL);
+
+ tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page",
+ kvm_mmu_print_role, NULL);
+
+ tep_register_event_handler(pevent, -1, "kvmmmu",
+ "kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
+ NULL);
+
+ tep_register_print_function(pevent,
+ process_is_writable_pte,
+ TEP_FUNC_ARG_INT,
+ "is_writable_pte",
+ TEP_FUNC_ARG_LONG,
+ TEP_FUNC_ARG_VOID);
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+{
+ tep_unregister_event_handler(pevent, -1, "kvm", "kvm_exit",
+ kvm_exit_handler, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "kvm", "kvm_emulate_insn",
+ kvm_emulate_insn_handler, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit",
+ kvm_nested_vmexit_handler, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject",
+ kvm_nested_vmexit_inject_handler, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page",
+ kvm_mmu_get_page_handler, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page",
+ kvm_mmu_print_role, NULL);
+
+ tep_unregister_event_handler(pevent, -1,
+ "kvmmmu", "kvm_mmu_unsync_page",
+ kvm_mmu_print_role, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page",
+ kvm_mmu_print_role, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "kvmmmu",
+ "kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
+ NULL);
+
+ tep_unregister_print_function(pevent, process_is_writable_pte,
+ "is_writable_pte");
+}
diff --git a/tools/lib/traceevent/plugin_mac80211.c b/tools/lib/traceevent/plugin_mac80211.c
new file mode 100644
index 000000000..de50a5316
--- /dev/null
+++ b/tools/lib/traceevent/plugin_mac80211.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+#define INDENT 65
+
+static void print_string(struct trace_seq *s, struct event_format *event,
+ const char *name, const void *data)
+{
+ struct format_field *f = tep_find_field(event, name);
+ int offset;
+ int length;
+
+ if (!f) {
+ trace_seq_printf(s, "NOTFOUND:%s", name);
+ return;
+ }
+
+ offset = f->offset;
+ length = f->size;
+
+ if (!strncmp(f->type, "__data_loc", 10)) {
+ unsigned long long v;
+ if (tep_read_number_field(f, data, &v)) {
+ trace_seq_printf(s, "invalid_data_loc");
+ return;
+ }
+ offset = v & 0xffff;
+ length = v >> 16;
+ }
+
+ trace_seq_printf(s, "%.*s", length, (char *)data + offset);
+}
+
+#define SF(fn) tep_print_num_field(s, fn ":%d", event, fn, record, 0)
+#define SFX(fn) tep_print_num_field(s, fn ":%#x", event, fn, record, 0)
+#define SP() trace_seq_putc(s, ' ')
+
+static int drv_bss_info_changed(struct trace_seq *s,
+ struct tep_record *record,
+ struct event_format *event, void *context)
+{
+ void *data = record->data;
+
+ print_string(s, event, "wiphy_name", data);
+ trace_seq_printf(s, " vif:");
+ print_string(s, event, "vif_name", data);
+ tep_print_num_field(s, "(%d)", event, "vif_type", record, 1);
+
+ trace_seq_printf(s, "\n%*s", INDENT, "");
+ SF("assoc"); SP();
+ SF("aid"); SP();
+ SF("cts"); SP();
+ SF("shortpre"); SP();
+ SF("shortslot"); SP();
+ SF("dtimper"); SP();
+ trace_seq_printf(s, "\n%*s", INDENT, "");
+ SF("bcnint"); SP();
+ SFX("assoc_cap"); SP();
+ SFX("basic_rates"); SP();
+ SF("enable_beacon");
+ trace_seq_printf(s, "\n%*s", INDENT, "");
+ SF("ht_operation_mode");
+
+ return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+{
+ tep_register_event_handler(pevent, -1, "mac80211",
+ "drv_bss_info_changed",
+ drv_bss_info_changed, NULL);
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+{
+ tep_unregister_event_handler(pevent, -1, "mac80211",
+ "drv_bss_info_changed",
+ drv_bss_info_changed, NULL);
+}
diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c
new file mode 100644
index 000000000..eecb4bd95
--- /dev/null
+++ b/tools/lib/traceevent/plugin_sched_switch.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+static void write_state(struct trace_seq *s, int val)
+{
+ const char states[] = "SDTtZXxW";
+ int found = 0;
+ int i;
+
+ for (i = 0; i < (sizeof(states) - 1); i++) {
+ if (!(val & (1 << i)))
+ continue;
+
+ if (found)
+ trace_seq_putc(s, '|');
+
+ found = 1;
+ trace_seq_putc(s, states[i]);
+ }
+
+ if (!found)
+ trace_seq_putc(s, 'R');
+}
+
+static void write_and_save_comm(struct format_field *field,
+ struct tep_record *record,
+ struct trace_seq *s, int pid)
+{
+ const char *comm;
+ int len;
+
+ comm = (char *)(record->data + field->offset);
+ len = s->len;
+ trace_seq_printf(s, "%.*s",
+ field->size, comm);
+
+ /* make sure the comm has a \0 at the end. */
+ trace_seq_terminate(s);
+ comm = &s->buffer[len];
+
+ /* Help out the comm to ids. This will handle dups */
+ tep_register_comm(field->event->pevent, comm, pid);
+}
+
+static int sched_wakeup_handler(struct trace_seq *s,
+ struct tep_record *record,
+ struct event_format *event, void *context)
+{
+ struct format_field *field;
+ unsigned long long val;
+
+ if (tep_get_field_val(s, event, "pid", record, &val, 1))
+ return trace_seq_putc(s, '!');
+
+ field = tep_find_any_field(event, "comm");
+ if (field) {
+ write_and_save_comm(field, record, s, val);
+ trace_seq_putc(s, ':');
+ }
+ trace_seq_printf(s, "%lld", val);
+
+ if (tep_get_field_val(s, event, "prio", record, &val, 0) == 0)
+ trace_seq_printf(s, " [%lld]", val);
+
+ if (tep_get_field_val(s, event, "success", record, &val, 1) == 0)
+ trace_seq_printf(s, " success=%lld", val);
+
+ if (tep_get_field_val(s, event, "target_cpu", record, &val, 0) == 0)
+ trace_seq_printf(s, " CPU:%03llu", val);
+
+ return 0;
+}
+
+static int sched_switch_handler(struct trace_seq *s,
+ struct tep_record *record,
+ struct event_format *event, void *context)
+{
+ struct format_field *field;
+ unsigned long long val;
+
+ if (tep_get_field_val(s, event, "prev_pid", record, &val, 1))
+ return trace_seq_putc(s, '!');
+
+ field = tep_find_any_field(event, "prev_comm");
+ if (field) {
+ write_and_save_comm(field, record, s, val);
+ trace_seq_putc(s, ':');
+ }
+ trace_seq_printf(s, "%lld ", val);
+
+ if (tep_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
+ trace_seq_printf(s, "[%d] ", (int) val);
+
+ if (tep_get_field_val(s, event, "prev_state", record, &val, 0) == 0)
+ write_state(s, val);
+
+ trace_seq_puts(s, " ==> ");
+
+ if (tep_get_field_val(s, event, "next_pid", record, &val, 1))
+ return trace_seq_putc(s, '!');
+
+ field = tep_find_any_field(event, "next_comm");
+ if (field) {
+ write_and_save_comm(field, record, s, val);
+ trace_seq_putc(s, ':');
+ }
+ trace_seq_printf(s, "%lld", val);
+
+ if (tep_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
+ trace_seq_printf(s, " [%d]", (int) val);
+
+ return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+{
+ tep_register_event_handler(pevent, -1, "sched", "sched_switch",
+ sched_switch_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "sched", "sched_wakeup",
+ sched_wakeup_handler, NULL);
+
+ tep_register_event_handler(pevent, -1, "sched", "sched_wakeup_new",
+ sched_wakeup_handler, NULL);
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+{
+ tep_unregister_event_handler(pevent, -1, "sched", "sched_switch",
+ sched_switch_handler, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "sched", "sched_wakeup",
+ sched_wakeup_handler, NULL);
+
+ tep_unregister_event_handler(pevent, -1, "sched", "sched_wakeup_new",
+ sched_wakeup_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugin_scsi.c b/tools/lib/traceevent/plugin_scsi.c
new file mode 100644
index 000000000..5ec346f6b
--- /dev/null
+++ b/tools/lib/traceevent/plugin_scsi.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include "event-parse.h"
+
+typedef unsigned long sector_t;
+typedef uint64_t u64;
+typedef unsigned int u32;
+
+/*
+ * SCSI opcodes
+ */
+#define TEST_UNIT_READY 0x00
+#define REZERO_UNIT 0x01
+#define REQUEST_SENSE 0x03
+#define FORMAT_UNIT 0x04
+#define READ_BLOCK_LIMITS 0x05
+#define REASSIGN_BLOCKS 0x07
+#define INITIALIZE_ELEMENT_STATUS 0x07
+#define READ_6 0x08
+#define WRITE_6 0x0a
+#define SEEK_6 0x0b
+#define READ_REVERSE 0x0f
+#define WRITE_FILEMARKS 0x10
+#define SPACE 0x11
+#define INQUIRY 0x12
+#define RECOVER_BUFFERED_DATA 0x14
+#define MODE_SELECT 0x15
+#define RESERVE 0x16
+#define RELEASE 0x17
+#define COPY 0x18
+#define ERASE 0x19
+#define MODE_SENSE 0x1a
+#define START_STOP 0x1b
+#define RECEIVE_DIAGNOSTIC 0x1c
+#define SEND_DIAGNOSTIC 0x1d
+#define ALLOW_MEDIUM_REMOVAL 0x1e
+
+#define READ_FORMAT_CAPACITIES 0x23
+#define SET_WINDOW 0x24
+#define READ_CAPACITY 0x25
+#define READ_10 0x28
+#define WRITE_10 0x2a
+#define SEEK_10 0x2b
+#define POSITION_TO_ELEMENT 0x2b
+#define WRITE_VERIFY 0x2e
+#define VERIFY 0x2f
+#define SEARCH_HIGH 0x30
+#define SEARCH_EQUAL 0x31
+#define SEARCH_LOW 0x32
+#define SET_LIMITS 0x33
+#define PRE_FETCH 0x34
+#define READ_POSITION 0x34
+#define SYNCHRONIZE_CACHE 0x35
+#define LOCK_UNLOCK_CACHE 0x36
+#define READ_DEFECT_DATA 0x37
+#define MEDIUM_SCAN 0x38
+#define COMPARE 0x39
+#define COPY_VERIFY 0x3a
+#define WRITE_BUFFER 0x3b
+#define READ_BUFFER 0x3c
+#define UPDATE_BLOCK 0x3d
+#define READ_LONG 0x3e
+#define WRITE_LONG 0x3f
+#define CHANGE_DEFINITION 0x40
+#define WRITE_SAME 0x41
+#define UNMAP 0x42
+#define READ_TOC 0x43
+#define READ_HEADER 0x44
+#define GET_EVENT_STATUS_NOTIFICATION 0x4a
+#define LOG_SELECT 0x4c
+#define LOG_SENSE 0x4d
+#define XDWRITEREAD_10 0x53
+#define MODE_SELECT_10 0x55
+#define RESERVE_10 0x56
+#define RELEASE_10 0x57
+#define MODE_SENSE_10 0x5a
+#define PERSISTENT_RESERVE_IN 0x5e
+#define PERSISTENT_RESERVE_OUT 0x5f
+#define VARIABLE_LENGTH_CMD 0x7f
+#define REPORT_LUNS 0xa0
+#define SECURITY_PROTOCOL_IN 0xa2
+#define MAINTENANCE_IN 0xa3
+#define MAINTENANCE_OUT 0xa4
+#define MOVE_MEDIUM 0xa5
+#define EXCHANGE_MEDIUM 0xa6
+#define READ_12 0xa8
+#define SERVICE_ACTION_OUT_12 0xa9
+#define WRITE_12 0xaa
+#define SERVICE_ACTION_IN_12 0xab
+#define WRITE_VERIFY_12 0xae
+#define VERIFY_12 0xaf
+#define SEARCH_HIGH_12 0xb0
+#define SEARCH_EQUAL_12 0xb1
+#define SEARCH_LOW_12 0xb2
+#define SECURITY_PROTOCOL_OUT 0xb5
+#define READ_ELEMENT_STATUS 0xb8
+#define SEND_VOLUME_TAG 0xb6
+#define WRITE_LONG_2 0xea
+#define EXTENDED_COPY 0x83
+#define RECEIVE_COPY_RESULTS 0x84
+#define ACCESS_CONTROL_IN 0x86
+#define ACCESS_CONTROL_OUT 0x87
+#define READ_16 0x88
+#define WRITE_16 0x8a
+#define READ_ATTRIBUTE 0x8c
+#define WRITE_ATTRIBUTE 0x8d
+#define VERIFY_16 0x8f
+#define SYNCHRONIZE_CACHE_16 0x91
+#define WRITE_SAME_16 0x93
+#define SERVICE_ACTION_BIDIRECTIONAL 0x9d
+#define SERVICE_ACTION_IN_16 0x9e
+#define SERVICE_ACTION_OUT_16 0x9f
+/* values for service action in */
+#define SAI_READ_CAPACITY_16 0x10
+#define SAI_GET_LBA_STATUS 0x12
+/* values for VARIABLE_LENGTH_CMD service action codes
+ * see spc4r17 Section D.3.5, table D.7 and D.8 */
+#define VLC_SA_RECEIVE_CREDENTIAL 0x1800
+/* values for maintenance in */
+#define MI_REPORT_IDENTIFYING_INFORMATION 0x05
+#define MI_REPORT_TARGET_PGS 0x0a
+#define MI_REPORT_ALIASES 0x0b
+#define MI_REPORT_SUPPORTED_OPERATION_CODES 0x0c
+#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS 0x0d
+#define MI_REPORT_PRIORITY 0x0e
+#define MI_REPORT_TIMESTAMP 0x0f
+#define MI_MANAGEMENT_PROTOCOL_IN 0x10
+/* value for MI_REPORT_TARGET_PGS ext header */
+#define MI_EXT_HDR_PARAM_FMT 0x20
+/* values for maintenance out */
+#define MO_SET_IDENTIFYING_INFORMATION 0x06
+#define MO_SET_TARGET_PGS 0x0a
+#define MO_CHANGE_ALIASES 0x0b
+#define MO_SET_PRIORITY 0x0e
+#define MO_SET_TIMESTAMP 0x0f
+#define MO_MANAGEMENT_PROTOCOL_OUT 0x10
+/* values for variable length command */
+#define XDREAD_32 0x03
+#define XDWRITE_32 0x04
+#define XPWRITE_32 0x06
+#define XDWRITEREAD_32 0x07
+#define READ_32 0x09
+#define VERIFY_32 0x0a
+#define WRITE_32 0x0b
+#define WRITE_SAME_32 0x0d
+
+#define SERVICE_ACTION16(cdb) (cdb[1] & 0x1f)
+#define SERVICE_ACTION32(cdb) ((cdb[8] << 8) | cdb[9])
+
+static const char *
+scsi_trace_misc(struct trace_seq *, unsigned char *, int);
+
+static const char *
+scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len;
+ sector_t lba = 0, txlen = 0;
+
+ lba |= ((cdb[1] & 0x1F) << 16);
+ lba |= (cdb[2] << 8);
+ lba |= cdb[3];
+ txlen = cdb[4];
+
+ trace_seq_printf(p, "lba=%llu txlen=%llu",
+ (unsigned long long)lba, (unsigned long long)txlen);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len;
+ sector_t lba = 0, txlen = 0;
+
+ lba |= (cdb[2] << 24);
+ lba |= (cdb[3] << 16);
+ lba |= (cdb[4] << 8);
+ lba |= cdb[5];
+ txlen |= (cdb[7] << 8);
+ txlen |= cdb[8];
+
+ trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
+ (unsigned long long)lba, (unsigned long long)txlen,
+ cdb[1] >> 5);
+
+ if (cdb[0] == WRITE_SAME)
+ trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
+
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len;
+ sector_t lba = 0, txlen = 0;
+
+ lba |= (cdb[2] << 24);
+ lba |= (cdb[3] << 16);
+ lba |= (cdb[4] << 8);
+ lba |= cdb[5];
+ txlen |= (cdb[6] << 24);
+ txlen |= (cdb[7] << 16);
+ txlen |= (cdb[8] << 8);
+ txlen |= cdb[9];
+
+ trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
+ (unsigned long long)lba, (unsigned long long)txlen,
+ cdb[1] >> 5);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len;
+ sector_t lba = 0, txlen = 0;
+
+ lba |= ((u64)cdb[2] << 56);
+ lba |= ((u64)cdb[3] << 48);
+ lba |= ((u64)cdb[4] << 40);
+ lba |= ((u64)cdb[5] << 32);
+ lba |= (cdb[6] << 24);
+ lba |= (cdb[7] << 16);
+ lba |= (cdb[8] << 8);
+ lba |= cdb[9];
+ txlen |= (cdb[10] << 24);
+ txlen |= (cdb[11] << 16);
+ txlen |= (cdb[12] << 8);
+ txlen |= cdb[13];
+
+ trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
+ (unsigned long long)lba, (unsigned long long)txlen,
+ cdb[1] >> 5);
+
+ if (cdb[0] == WRITE_SAME_16)
+ trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
+
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len, *cmd;
+ sector_t lba = 0, txlen = 0;
+ u32 ei_lbrt = 0;
+
+ switch (SERVICE_ACTION32(cdb)) {
+ case READ_32:
+ cmd = "READ";
+ break;
+ case VERIFY_32:
+ cmd = "VERIFY";
+ break;
+ case WRITE_32:
+ cmd = "WRITE";
+ break;
+ case WRITE_SAME_32:
+ cmd = "WRITE_SAME";
+ break;
+ default:
+ trace_seq_printf(p, "UNKNOWN");
+ goto out;
+ }
+
+ lba |= ((u64)cdb[12] << 56);
+ lba |= ((u64)cdb[13] << 48);
+ lba |= ((u64)cdb[14] << 40);
+ lba |= ((u64)cdb[15] << 32);
+ lba |= (cdb[16] << 24);
+ lba |= (cdb[17] << 16);
+ lba |= (cdb[18] << 8);
+ lba |= cdb[19];
+ ei_lbrt |= (cdb[20] << 24);
+ ei_lbrt |= (cdb[21] << 16);
+ ei_lbrt |= (cdb[22] << 8);
+ ei_lbrt |= cdb[23];
+ txlen |= (cdb[28] << 24);
+ txlen |= (cdb[29] << 16);
+ txlen |= (cdb[30] << 8);
+ txlen |= cdb[31];
+
+ trace_seq_printf(p, "%s_32 lba=%llu txlen=%llu protect=%u ei_lbrt=%u",
+ cmd, (unsigned long long)lba,
+ (unsigned long long)txlen, cdb[10] >> 5, ei_lbrt);
+
+ if (SERVICE_ACTION32(cdb) == WRITE_SAME_32)
+ trace_seq_printf(p, " unmap=%u", cdb[10] >> 3 & 1);
+
+out:
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len;
+ unsigned int regions = cdb[7] << 8 | cdb[8];
+
+ trace_seq_printf(p, "regions=%u", (regions - 8) / 16);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len, *cmd;
+ sector_t lba = 0;
+ u32 alloc_len = 0;
+
+ switch (SERVICE_ACTION16(cdb)) {
+ case SAI_READ_CAPACITY_16:
+ cmd = "READ_CAPACITY_16";
+ break;
+ case SAI_GET_LBA_STATUS:
+ cmd = "GET_LBA_STATUS";
+ break;
+ default:
+ trace_seq_printf(p, "UNKNOWN");
+ goto out;
+ }
+
+ lba |= ((u64)cdb[2] << 56);
+ lba |= ((u64)cdb[3] << 48);
+ lba |= ((u64)cdb[4] << 40);
+ lba |= ((u64)cdb[5] << 32);
+ lba |= (cdb[6] << 24);
+ lba |= (cdb[7] << 16);
+ lba |= (cdb[8] << 8);
+ lba |= cdb[9];
+ alloc_len |= (cdb[10] << 24);
+ alloc_len |= (cdb[11] << 16);
+ alloc_len |= (cdb[12] << 8);
+ alloc_len |= cdb[13];
+
+ trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd,
+ (unsigned long long)lba, alloc_len);
+
+out:
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ switch (SERVICE_ACTION32(cdb)) {
+ case READ_32:
+ case VERIFY_32:
+ case WRITE_32:
+ case WRITE_SAME_32:
+ return scsi_trace_rw32(p, cdb, len);
+ default:
+ return scsi_trace_misc(p, cdb, len);
+ }
+}
+
+static const char *
+scsi_trace_misc(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len;
+
+ trace_seq_printf(p, "-");
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+const char *
+scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ switch (cdb[0]) {
+ case READ_6:
+ case WRITE_6:
+ return scsi_trace_rw6(p, cdb, len);
+ case READ_10:
+ case VERIFY:
+ case WRITE_10:
+ case WRITE_SAME:
+ return scsi_trace_rw10(p, cdb, len);
+ case READ_12:
+ case VERIFY_12:
+ case WRITE_12:
+ return scsi_trace_rw12(p, cdb, len);
+ case READ_16:
+ case VERIFY_16:
+ case WRITE_16:
+ case WRITE_SAME_16:
+ return scsi_trace_rw16(p, cdb, len);
+ case UNMAP:
+ return scsi_trace_unmap(p, cdb, len);
+ case SERVICE_ACTION_IN_16:
+ return scsi_trace_service_action_in(p, cdb, len);
+ case VARIABLE_LENGTH_CMD:
+ return scsi_trace_varlen(p, cdb, len);
+ default:
+ return scsi_trace_misc(p, cdb, len);
+ }
+}
+
+unsigned long long process_scsi_trace_parse_cdb(struct trace_seq *s,
+ unsigned long long *args)
+{
+ scsi_trace_parse_cdb(s, (unsigned char *) (unsigned long) args[1], args[2]);
+ return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+{
+ tep_register_print_function(pevent,
+ process_scsi_trace_parse_cdb,
+ TEP_FUNC_ARG_STRING,
+ "scsi_trace_parse_cdb",
+ TEP_FUNC_ARG_PTR,
+ TEP_FUNC_ARG_PTR,
+ TEP_FUNC_ARG_INT,
+ TEP_FUNC_ARG_VOID);
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+{
+ tep_unregister_print_function(pevent, process_scsi_trace_parse_cdb,
+ "scsi_trace_parse_cdb");
+}
diff --git a/tools/lib/traceevent/plugin_xen.c b/tools/lib/traceevent/plugin_xen.c
new file mode 100644
index 000000000..b2acbd6e9
--- /dev/null
+++ b/tools/lib/traceevent/plugin_xen.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "event-parse.h"
+
+#define __HYPERVISOR_set_trap_table 0
+#define __HYPERVISOR_mmu_update 1
+#define __HYPERVISOR_set_gdt 2
+#define __HYPERVISOR_stack_switch 3
+#define __HYPERVISOR_set_callbacks 4
+#define __HYPERVISOR_fpu_taskswitch 5
+#define __HYPERVISOR_sched_op_compat 6
+#define __HYPERVISOR_dom0_op 7
+#define __HYPERVISOR_set_debugreg 8
+#define __HYPERVISOR_get_debugreg 9
+#define __HYPERVISOR_update_descriptor 10
+#define __HYPERVISOR_memory_op 12
+#define __HYPERVISOR_multicall 13
+#define __HYPERVISOR_update_va_mapping 14
+#define __HYPERVISOR_set_timer_op 15
+#define __HYPERVISOR_event_channel_op_compat 16
+#define __HYPERVISOR_xen_version 17
+#define __HYPERVISOR_console_io 18
+#define __HYPERVISOR_physdev_op_compat 19
+#define __HYPERVISOR_grant_table_op 20
+#define __HYPERVISOR_vm_assist 21
+#define __HYPERVISOR_update_va_mapping_otherdomain 22
+#define __HYPERVISOR_iret 23 /* x86 only */
+#define __HYPERVISOR_vcpu_op 24
+#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
+#define __HYPERVISOR_mmuext_op 26
+#define __HYPERVISOR_acm_op 27
+#define __HYPERVISOR_nmi_op 28
+#define __HYPERVISOR_sched_op 29
+#define __HYPERVISOR_callback_op 30
+#define __HYPERVISOR_xenoprof_op 31
+#define __HYPERVISOR_event_channel_op 32
+#define __HYPERVISOR_physdev_op 33
+#define __HYPERVISOR_hvm_op 34
+#define __HYPERVISOR_tmem_op 38
+
+/* Architecture-specific hypercall definitions. */
+#define __HYPERVISOR_arch_0 48
+#define __HYPERVISOR_arch_1 49
+#define __HYPERVISOR_arch_2 50
+#define __HYPERVISOR_arch_3 51
+#define __HYPERVISOR_arch_4 52
+#define __HYPERVISOR_arch_5 53
+#define __HYPERVISOR_arch_6 54
+#define __HYPERVISOR_arch_7 55
+
+#define N(x) [__HYPERVISOR_##x] = "("#x")"
+static const char *xen_hypercall_names[] = {
+ N(set_trap_table),
+ N(mmu_update),
+ N(set_gdt),
+ N(stack_switch),
+ N(set_callbacks),
+ N(fpu_taskswitch),
+ N(sched_op_compat),
+ N(dom0_op),
+ N(set_debugreg),
+ N(get_debugreg),
+ N(update_descriptor),
+ N(memory_op),
+ N(multicall),
+ N(update_va_mapping),
+ N(set_timer_op),
+ N(event_channel_op_compat),
+ N(xen_version),
+ N(console_io),
+ N(physdev_op_compat),
+ N(grant_table_op),
+ N(vm_assist),
+ N(update_va_mapping_otherdomain),
+ N(iret),
+ N(vcpu_op),
+ N(set_segment_base),
+ N(mmuext_op),
+ N(acm_op),
+ N(nmi_op),
+ N(sched_op),
+ N(callback_op),
+ N(xenoprof_op),
+ N(event_channel_op),
+ N(physdev_op),
+ N(hvm_op),
+
+/* Architecture-specific hypercall definitions. */
+ N(arch_0),
+ N(arch_1),
+ N(arch_2),
+ N(arch_3),
+ N(arch_4),
+ N(arch_5),
+ N(arch_6),
+ N(arch_7),
+};
+#undef N
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+static const char *xen_hypercall_name(unsigned op)
+{
+ if (op < ARRAY_SIZE(xen_hypercall_names) &&
+ xen_hypercall_names[op] != NULL)
+ return xen_hypercall_names[op];
+
+ return "";
+}
+
+unsigned long long process_xen_hypercall_name(struct trace_seq *s,
+ unsigned long long *args)
+{
+ unsigned int op = args[0];
+
+ trace_seq_printf(s, "%s", xen_hypercall_name(op));
+ return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *pevent)
+{
+ tep_register_print_function(pevent,
+ process_xen_hypercall_name,
+ TEP_FUNC_ARG_STRING,
+ "xen_hypercall_name",
+ TEP_FUNC_ARG_INT,
+ TEP_FUNC_ARG_VOID);
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent)
+{
+ tep_unregister_print_function(pevent, process_xen_hypercall_name,
+ "xen_hypercall_name");
+}
diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c
new file mode 100644
index 000000000..e3bac4543
--- /dev/null
+++ b/tools/lib/traceevent/trace-seq.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include <asm/bug.h>
+#include "event-parse.h"
+#include "event-utils.h"
+
+/*
+ * The TRACE_SEQ_POISON is to catch the use of using
+ * a trace_seq structure after it was destroyed.
+ */
+#define TRACE_SEQ_POISON ((void *)0xdeadbeef)
+#define TRACE_SEQ_CHECK(s) \
+do { \
+ if (WARN_ONCE((s)->buffer == TRACE_SEQ_POISON, \
+ "Usage of trace_seq after it was destroyed")) \
+ (s)->state = TRACE_SEQ__BUFFER_POISONED; \
+} while (0)
+
+#define TRACE_SEQ_CHECK_RET_N(s, n) \
+do { \
+ TRACE_SEQ_CHECK(s); \
+ if ((s)->state != TRACE_SEQ__GOOD) \
+ return n; \
+} while (0)
+
+#define TRACE_SEQ_CHECK_RET(s) TRACE_SEQ_CHECK_RET_N(s, )
+#define TRACE_SEQ_CHECK_RET0(s) TRACE_SEQ_CHECK_RET_N(s, 0)
+
+/**
+ * trace_seq_init - initialize the trace_seq structure
+ * @s: a pointer to the trace_seq structure to initialize
+ */
+void trace_seq_init(struct trace_seq *s)
+{
+ s->len = 0;
+ s->readpos = 0;
+ s->buffer_size = TRACE_SEQ_BUF_SIZE;
+ s->buffer = malloc(s->buffer_size);
+ if (s->buffer != NULL)
+ s->state = TRACE_SEQ__GOOD;
+ else
+ s->state = TRACE_SEQ__MEM_ALLOC_FAILED;
+}
+
+/**
+ * trace_seq_reset - re-initialize the trace_seq structure
+ * @s: a pointer to the trace_seq structure to reset
+ */
+void trace_seq_reset(struct trace_seq *s)
+{
+ if (!s)
+ return;
+ TRACE_SEQ_CHECK(s);
+ s->len = 0;
+ s->readpos = 0;
+}
+
+/**
+ * trace_seq_destroy - free up memory of a trace_seq
+ * @s: a pointer to the trace_seq to free the buffer
+ *
+ * Only frees the buffer, not the trace_seq struct itself.
+ */
+void trace_seq_destroy(struct trace_seq *s)
+{
+ if (!s)
+ return;
+ TRACE_SEQ_CHECK_RET(s);
+ free(s->buffer);
+ s->buffer = TRACE_SEQ_POISON;
+}
+
+static void expand_buffer(struct trace_seq *s)
+{
+ char *buf;
+
+ buf = realloc(s->buffer, s->buffer_size + TRACE_SEQ_BUF_SIZE);
+ if (WARN_ONCE(!buf, "Can't allocate trace_seq buffer memory")) {
+ s->state = TRACE_SEQ__MEM_ALLOC_FAILED;
+ return;
+ }
+
+ s->buffer = buf;
+ s->buffer_size += TRACE_SEQ_BUF_SIZE;
+}
+
+/**
+ * trace_seq_printf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, 1 otherwise.
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+{
+ va_list ap;
+ int len;
+ int ret;
+
+ try_again:
+ TRACE_SEQ_CHECK_RET0(s);
+
+ len = (s->buffer_size - 1) - s->len;
+
+ va_start(ap, fmt);
+ ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
+ va_end(ap);
+
+ if (ret >= len) {
+ expand_buffer(s);
+ goto try_again;
+ }
+
+ s->len += ret;
+
+ return 1;
+}
+
+/**
+ * trace_seq_vprintf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+{
+ int len;
+ int ret;
+
+ try_again:
+ TRACE_SEQ_CHECK_RET0(s);
+
+ len = (s->buffer_size - 1) - s->len;
+
+ ret = vsnprintf(s->buffer + s->len, len, fmt, args);
+
+ if (ret >= len) {
+ expand_buffer(s);
+ goto try_again;
+ }
+
+ s->len += ret;
+
+ return len;
+}
+
+/**
+ * trace_seq_puts - trace sequence printing of simple string
+ * @s: trace sequence descriptor
+ * @str: simple string to record
+ *
+ * The tracer may use either the sequence operations or its own
+ * copy to user routines. This function records a simple string
+ * into a special buffer (@s) for later retrieval by a sequencer
+ * or other mechanism.
+ */
+int trace_seq_puts(struct trace_seq *s, const char *str)
+{
+ int len;
+
+ TRACE_SEQ_CHECK_RET0(s);
+
+ len = strlen(str);
+
+ while (len > ((s->buffer_size - 1) - s->len))
+ expand_buffer(s);
+
+ TRACE_SEQ_CHECK_RET0(s);
+
+ memcpy(s->buffer + s->len, str, len);
+ s->len += len;
+
+ return len;
+}
+
+int trace_seq_putc(struct trace_seq *s, unsigned char c)
+{
+ TRACE_SEQ_CHECK_RET0(s);
+
+ while (s->len >= (s->buffer_size - 1))
+ expand_buffer(s);
+
+ TRACE_SEQ_CHECK_RET0(s);
+
+ s->buffer[s->len++] = c;
+
+ return 1;
+}
+
+void trace_seq_terminate(struct trace_seq *s)
+{
+ TRACE_SEQ_CHECK_RET(s);
+
+ /* There's always one character left on the buffer */
+ s->buffer[s->len] = 0;
+}
+
+int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp)
+{
+ TRACE_SEQ_CHECK(s);
+
+ switch (s->state) {
+ case TRACE_SEQ__GOOD:
+ return fprintf(fp, "%.*s", s->len, s->buffer);
+ case TRACE_SEQ__BUFFER_POISONED:
+ fprintf(fp, "%s\n", "Usage of trace_seq after it was destroyed");
+ break;
+ case TRACE_SEQ__MEM_ALLOC_FAILED:
+ fprintf(fp, "%s\n", "Can't allocate trace_seq buffer memory");
+ break;
+ }
+ return -1;
+}
+
+int trace_seq_do_printf(struct trace_seq *s)
+{
+ return trace_seq_do_fprintf(s, stdout);
+}
diff --git a/tools/lib/vsprintf.c b/tools/lib/vsprintf.c
new file mode 100644
index 000000000..e08ee147e
--- /dev/null
+++ b/tools/lib/vsprintf.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <linux/kernel.h>
+#include <stdio.h>
+
+int vscnprintf(char *buf, size_t size, const char *fmt, va_list args)
+{
+ int i = vsnprintf(buf, size, fmt, args);
+ ssize_t ssize = size;
+
+ return (i >= ssize) ? (ssize - 1) : i;
+}
+
+int scnprintf(char * buf, size_t size, const char * fmt, ...)
+{
+ ssize_t ssize = size;
+ va_list args;
+ int i;
+
+ va_start(args, fmt);
+ i = vsnprintf(buf, size, fmt, args);
+ va_end(args);
+
+ return (i >= ssize) ? (ssize - 1) : i;
+}
diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt
new file mode 100644
index 000000000..33ba98d72
--- /dev/null
+++ b/tools/memory-model/Documentation/cheatsheet.txt
@@ -0,0 +1,30 @@
+ Prior Operation Subsequent Operation
+ --------------- ---------------------------
+ C Self R W RMW Self R W DR DW RMW SV
+ -- ---- - - --- ---- - - -- -- --- --
+
+Store, e.g., WRITE_ONCE() Y Y
+Load, e.g., READ_ONCE() Y Y Y Y
+Unsuccessful RMW operation Y Y Y Y
+rcu_dereference() Y Y Y Y
+Successful *_acquire() R Y Y Y Y Y Y
+Successful *_release() C Y Y Y W Y
+smp_rmb() Y R Y Y R
+smp_wmb() Y W Y Y W
+smp_mb() & synchronize_rcu() CP Y Y Y Y Y Y Y Y
+Successful full non-void RMW CP Y Y Y Y Y Y Y Y Y Y Y
+smp_mb__before_atomic() CP Y Y Y a a a a Y
+smp_mb__after_atomic() CP a a Y Y Y Y Y Y
+
+
+Key: C: Ordering is cumulative
+ P: Ordering propagates
+ R: Read, for example, READ_ONCE(), or read portion of RMW
+ W: Write, for example, WRITE_ONCE(), or write portion of RMW
+ Y: Provides ordering
+ a: Provides ordering given intervening RMW atomic operation
+ DR: Dependent read (address dependency)
+ DW: Dependent write (address, data, or control dependency)
+ RMW: Atomic read-modify-write operation
+ SELF: Orders self, as opposed to accesses before and/or after
+ SV: Orders later accesses to the same variable
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
new file mode 100644
index 000000000..0cbd1ef8f
--- /dev/null
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -0,0 +1,1896 @@
+Explanation of the Linux-Kernel Memory Consistency Model
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+:Author: Alan Stern <stern@rowland.harvard.edu>
+:Created: October 2017
+
+.. Contents
+
+ 1. INTRODUCTION
+ 2. BACKGROUND
+ 3. A SIMPLE EXAMPLE
+ 4. A SELECTION OF MEMORY MODELS
+ 5. ORDERING AND CYCLES
+ 6. EVENTS
+ 7. THE PROGRAM ORDER RELATION: po AND po-loc
+ 8. A WARNING
+ 9. DEPENDENCY RELATIONS: data, addr, and ctrl
+ 10. THE READS-FROM RELATION: rf, rfi, and rfe
+ 11. CACHE COHERENCE AND THE COHERENCE ORDER RELATION: co, coi, and coe
+ 12. THE FROM-READS RELATION: fr, fri, and fre
+ 13. AN OPERATIONAL MODEL
+ 14. PROPAGATION ORDER RELATION: cumul-fence
+ 15. DERIVATION OF THE LKMM FROM THE OPERATIONAL MODEL
+ 16. SEQUENTIAL CONSISTENCY PER VARIABLE
+ 17. ATOMIC UPDATES: rmw
+ 18. THE PRESERVED PROGRAM ORDER RELATION: ppo
+ 19. AND THEN THERE WAS ALPHA
+ 20. THE HAPPENS-BEFORE RELATION: hb
+ 21. THE PROPAGATES-BEFORE RELATION: pb
+ 22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
+ 23. ODDS AND ENDS
+
+
+
+INTRODUCTION
+------------
+
+The Linux-kernel memory consistency model (LKMM) is rather complex and
+obscure. This is particularly evident if you read through the
+linux-kernel.bell and linux-kernel.cat files that make up the formal
+version of the model; they are extremely terse and their meanings are
+far from clear.
+
+This document describes the ideas underlying the LKMM. It is meant
+for people who want to understand how the model was designed. It does
+not go into the details of the code in the .bell and .cat files;
+rather, it explains in English what the code expresses symbolically.
+
+Sections 2 (BACKGROUND) through 5 (ORDERING AND CYCLES) are aimed
+toward beginners; they explain what memory consistency models are and
+the basic notions shared by all such models. People already familiar
+with these concepts can skim or skip over them. Sections 6 (EVENTS)
+through 12 (THE FROM_READS RELATION) describe the fundamental
+relations used in many models. Starting in Section 13 (AN OPERATIONAL
+MODEL), the workings of the LKMM itself are covered.
+
+Warning: The code examples in this document are not written in the
+proper format for litmus tests. They don't include a header line, the
+initializations are not enclosed in braces, the global variables are
+not passed by pointers, and they don't have an "exists" clause at the
+end. Converting them to the right format is left as an exercise for
+the reader.
+
+
+BACKGROUND
+----------
+
+A memory consistency model (or just memory model, for short) is
+something which predicts, given a piece of computer code running on a
+particular kind of system, what values may be obtained by the code's
+load instructions. The LKMM makes these predictions for code running
+as part of the Linux kernel.
+
+In practice, people tend to use memory models the other way around.
+That is, given a piece of code and a collection of values specified
+for the loads, the model will predict whether it is possible for the
+code to run in such a way that the loads will indeed obtain the
+specified values. Of course, this is just another way of expressing
+the same idea.
+
+For code running on a uniprocessor system, the predictions are easy:
+Each load instruction must obtain the value written by the most recent
+store instruction accessing the same location (we ignore complicating
+factors such as DMA and mixed-size accesses.) But on multiprocessor
+systems, with multiple CPUs making concurrent accesses to shared
+memory locations, things aren't so simple.
+
+Different architectures have differing memory models, and the Linux
+kernel supports a variety of architectures. The LKMM has to be fairly
+permissive, in the sense that any behavior allowed by one of these
+architectures also has to be allowed by the LKMM.
+
+
+A SIMPLE EXAMPLE
+----------------
+
+Here is a simple example to illustrate the basic concepts. Consider
+some code running as part of a device driver for an input device. The
+driver might contain an interrupt handler which collects data from the
+device, stores it in a buffer, and sets a flag to indicate the buffer
+is full. Running concurrently on a different CPU might be a part of
+the driver code being executed by a process in the midst of a read(2)
+system call. This code tests the flag to see whether the buffer is
+ready, and if it is, copies the data back to userspace. The buffer
+and the flag are memory locations shared between the two CPUs.
+
+We can abstract out the important pieces of the driver code as follows
+(the reason for using WRITE_ONCE() and READ_ONCE() instead of simple
+assignment statements is discussed later):
+
+ int buf = 0, flag = 0;
+
+ P0()
+ {
+ WRITE_ONCE(buf, 1);
+ WRITE_ONCE(flag, 1);
+ }
+
+ P1()
+ {
+ int r1;
+ int r2 = 0;
+
+ r1 = READ_ONCE(flag);
+ if (r1)
+ r2 = READ_ONCE(buf);
+ }
+
+Here the P0() function represents the interrupt handler running on one
+CPU and P1() represents the read() routine running on another. The
+value 1 stored in buf represents input data collected from the device.
+Thus, P0 stores the data in buf and then sets flag. Meanwhile, P1
+reads flag into the private variable r1, and if it is set, reads the
+data from buf into a second private variable r2 for copying to
+userspace. (Presumably if flag is not set then the driver will wait a
+while and try again.)
+
+This pattern of memory accesses, where one CPU stores values to two
+shared memory locations and another CPU loads from those locations in
+the opposite order, is widely known as the "Message Passing" or MP
+pattern. It is typical of memory access patterns in the kernel.
+
+Please note that this example code is a simplified abstraction. Real
+buffers are usually larger than a single integer, real device drivers
+usually use sleep and wakeup mechanisms rather than polling for I/O
+completion, and real code generally doesn't bother to copy values into
+private variables before using them. All that is beside the point;
+the idea here is simply to illustrate the overall pattern of memory
+accesses by the CPUs.
+
+A memory model will predict what values P1 might obtain for its loads
+from flag and buf, or equivalently, what values r1 and r2 might end up
+with after the code has finished running.
+
+Some predictions are trivial. For instance, no sane memory model would
+predict that r1 = 42 or r2 = -7, because neither of those values ever
+gets stored in flag or buf.
+
+Some nontrivial predictions are nonetheless quite simple. For
+instance, P1 might run entirely before P0 begins, in which case r1 and
+r2 will both be 0 at the end. Or P0 might run entirely before P1
+begins, in which case r1 and r2 will both be 1.
+
+The interesting predictions concern what might happen when the two
+routines run concurrently. One possibility is that P1 runs after P0's
+store to buf but before the store to flag. In this case, r1 and r2
+will again both be 0. (If P1 had been designed to read buf
+unconditionally then we would instead have r1 = 0 and r2 = 1.)
+
+However, the most interesting possibility is where r1 = 1 and r2 = 0.
+If this were to occur it would mean the driver contains a bug, because
+incorrect data would get sent to the user: 0 instead of 1. As it
+happens, the LKMM does predict this outcome can occur, and the example
+driver code shown above is indeed buggy.
+
+
+A SELECTION OF MEMORY MODELS
+----------------------------
+
+The first widely cited memory model, and the simplest to understand,
+is Sequential Consistency. According to this model, systems behave as
+if each CPU executed its instructions in order but with unspecified
+timing. In other words, the instructions from the various CPUs get
+interleaved in a nondeterministic way, always according to some single
+global order that agrees with the order of the instructions in the
+program source for each CPU. The model says that the value obtained
+by each load is simply the value written by the most recently executed
+store to the same memory location, from any CPU.
+
+For the MP example code shown above, Sequential Consistency predicts
+that the undesired result r1 = 1, r2 = 0 cannot occur. The reasoning
+goes like this:
+
+ Since r1 = 1, P0 must store 1 to flag before P1 loads 1 from
+ it, as loads can obtain values only from earlier stores.
+
+ P1 loads from flag before loading from buf, since CPUs execute
+ their instructions in order.
+
+ P1 must load 0 from buf before P0 stores 1 to it; otherwise r2
+ would be 1 since a load obtains its value from the most recent
+ store to the same address.
+
+ P0 stores 1 to buf before storing 1 to flag, since it executes
+ its instructions in order.
+
+ Since an instruction (in this case, P1's store to flag) cannot
+ execute before itself, the specified outcome is impossible.
+
+However, real computer hardware almost never follows the Sequential
+Consistency memory model; doing so would rule out too many valuable
+performance optimizations. On ARM and PowerPC architectures, for
+instance, the MP example code really does sometimes yield r1 = 1 and
+r2 = 0.
+
+x86 and SPARC follow yet a different memory model: TSO (Total Store
+Ordering). This model predicts that the undesired outcome for the MP
+pattern cannot occur, but in other respects it differs from Sequential
+Consistency. One example is the Store Buffer (SB) pattern, in which
+each CPU stores to its own shared location and then loads from the
+other CPU's location:
+
+ int x = 0, y = 0;
+
+ P0()
+ {
+ int r0;
+
+ WRITE_ONCE(x, 1);
+ r0 = READ_ONCE(y);
+ }
+
+ P1()
+ {
+ int r1;
+
+ WRITE_ONCE(y, 1);
+ r1 = READ_ONCE(x);
+ }
+
+Sequential Consistency predicts that the outcome r0 = 0, r1 = 0 is
+impossible. (Exercise: Figure out the reasoning.) But TSO allows
+this outcome to occur, and in fact it does sometimes occur on x86 and
+SPARC systems.
+
+The LKMM was inspired by the memory models followed by PowerPC, ARM,
+x86, Alpha, and other architectures. However, it is different in
+detail from each of them.
+
+
+ORDERING AND CYCLES
+-------------------
+
+Memory models are all about ordering. Often this is temporal ordering
+(i.e., the order in which certain events occur) but it doesn't have to
+be; consider for example the order of instructions in a program's
+source code. We saw above that Sequential Consistency makes an
+important assumption that CPUs execute instructions in the same order
+as those instructions occur in the code, and there are many other
+instances of ordering playing central roles in memory models.
+
+The counterpart to ordering is a cycle. Ordering rules out cycles:
+It's not possible to have X ordered before Y, Y ordered before Z, and
+Z ordered before X, because this would mean that X is ordered before
+itself. The analysis of the MP example under Sequential Consistency
+involved just such an impossible cycle:
+
+ W: P0 stores 1 to flag executes before
+ X: P1 loads 1 from flag executes before
+ Y: P1 loads 0 from buf executes before
+ Z: P0 stores 1 to buf executes before
+ W: P0 stores 1 to flag.
+
+In short, if a memory model requires certain accesses to be ordered,
+and a certain outcome for the loads in a piece of code can happen only
+if those accesses would form a cycle, then the memory model predicts
+that outcome cannot occur.
+
+The LKMM is defined largely in terms of cycles, as we will see.
+
+
+EVENTS
+------
+
+The LKMM does not work directly with the C statements that make up
+kernel source code. Instead it considers the effects of those
+statements in a more abstract form, namely, events. The model
+includes three types of events:
+
+ Read events correspond to loads from shared memory, such as
+ calls to READ_ONCE(), smp_load_acquire(), or
+ rcu_dereference().
+
+ Write events correspond to stores to shared memory, such as
+ calls to WRITE_ONCE(), smp_store_release(), or atomic_set().
+
+ Fence events correspond to memory barriers (also known as
+ fences), such as calls to smp_rmb() or rcu_read_lock().
+
+These categories are not exclusive; a read or write event can also be
+a fence. This happens with functions like smp_load_acquire() or
+spin_lock(). However, no single event can be both a read and a write.
+Atomic read-modify-write accesses, such as atomic_inc() or xchg(),
+correspond to a pair of events: a read followed by a write. (The
+write event is omitted for executions where it doesn't occur, such as
+a cmpxchg() where the comparison fails.)
+
+Other parts of the code, those which do not involve interaction with
+shared memory, do not give rise to events. Thus, arithmetic and
+logical computations, control-flow instructions, or accesses to
+private memory or CPU registers are not of central interest to the
+memory model. They only affect the model's predictions indirectly.
+For example, an arithmetic computation might determine the value that
+gets stored to a shared memory location (or in the case of an array
+index, the address where the value gets stored), but the memory model
+is concerned only with the store itself -- its value and its address
+-- not the computation leading up to it.
+
+Events in the LKMM can be linked by various relations, which we will
+describe in the following sections. The memory model requires certain
+of these relations to be orderings, that is, it requires them not to
+have any cycles.
+
+
+THE PROGRAM ORDER RELATION: po AND po-loc
+-----------------------------------------
+
+The most important relation between events is program order (po). You
+can think of it as the order in which statements occur in the source
+code after branches are taken into account and loops have been
+unrolled. A better description might be the order in which
+instructions are presented to a CPU's execution unit. Thus, we say
+that X is po-before Y (written as "X ->po Y" in formulas) if X occurs
+before Y in the instruction stream.
+
+This is inherently a single-CPU relation; two instructions executing
+on different CPUs are never linked by po. Also, it is by definition
+an ordering so it cannot have any cycles.
+
+po-loc is a sub-relation of po. It links two memory accesses when the
+first comes before the second in program order and they access the
+same memory location (the "-loc" suffix).
+
+Although this may seem straightforward, there is one subtle aspect to
+program order we need to explain. The LKMM was inspired by low-level
+architectural memory models which describe the behavior of machine
+code, and it retains their outlook to a considerable extent. The
+read, write, and fence events used by the model are close in spirit to
+individual machine instructions. Nevertheless, the LKMM describes
+kernel code written in C, and the mapping from C to machine code can
+be extremely complex.
+
+Optimizing compilers have great freedom in the way they translate
+source code to object code. They are allowed to apply transformations
+that add memory accesses, eliminate accesses, combine them, split them
+into pieces, or move them around. Faced with all these possibilities,
+the LKMM basically gives up. It insists that the code it analyzes
+must contain no ordinary accesses to shared memory; all accesses must
+be performed using READ_ONCE(), WRITE_ONCE(), or one of the other
+atomic or synchronization primitives. These primitives prevent a
+large number of compiler optimizations. In particular, it is
+guaranteed that the compiler will not remove such accesses from the
+generated code (unless it can prove the accesses will never be
+executed), it will not change the order in which they occur in the
+code (within limits imposed by the C standard), and it will not
+introduce extraneous accesses.
+
+This explains why the MP and SB examples above used READ_ONCE() and
+WRITE_ONCE() rather than ordinary memory accesses. Thanks to this
+usage, we can be certain that in the MP example, P0's write event to
+buf really is po-before its write event to flag, and similarly for the
+other shared memory accesses in the examples.
+
+Private variables are not subject to this restriction. Since they are
+not shared between CPUs, they can be accessed normally without
+READ_ONCE() or WRITE_ONCE(), and there will be no ill effects. In
+fact, they need not even be stored in normal memory at all -- in
+principle a private variable could be stored in a CPU register (hence
+the convention that these variables have names starting with the
+letter 'r').
+
+
+A WARNING
+---------
+
+The protections provided by READ_ONCE(), WRITE_ONCE(), and others are
+not perfect; and under some circumstances it is possible for the
+compiler to undermine the memory model. Here is an example. Suppose
+both branches of an "if" statement store the same value to the same
+location:
+
+ r1 = READ_ONCE(x);
+ if (r1) {
+ WRITE_ONCE(y, 2);
+ ... /* do something */
+ } else {
+ WRITE_ONCE(y, 2);
+ ... /* do something else */
+ }
+
+For this code, the LKMM predicts that the load from x will always be
+executed before either of the stores to y. However, a compiler could
+lift the stores out of the conditional, transforming the code into
+something resembling:
+
+ r1 = READ_ONCE(x);
+ WRITE_ONCE(y, 2);
+ if (r1) {
+ ... /* do something */
+ } else {
+ ... /* do something else */
+ }
+
+Given this version of the code, the LKMM would predict that the load
+from x could be executed after the store to y. Thus, the memory
+model's original prediction could be invalidated by the compiler.
+
+Another issue arises from the fact that in C, arguments to many
+operators and function calls can be evaluated in any order. For
+example:
+
+ r1 = f(5) + g(6);
+
+The object code might call f(5) either before or after g(6); the
+memory model cannot assume there is a fixed program order relation
+between them. (In fact, if the functions are inlined then the
+compiler might even interleave their object code.)
+
+
+DEPENDENCY RELATIONS: data, addr, and ctrl
+------------------------------------------
+
+We say that two events are linked by a dependency relation when the
+execution of the second event depends in some way on a value obtained
+from memory by the first. The first event must be a read, and the
+value it obtains must somehow affect what the second event does.
+There are three kinds of dependencies: data, address (addr), and
+control (ctrl).
+
+A read and a write event are linked by a data dependency if the value
+obtained by the read affects the value stored by the write. As a very
+simple example:
+
+ int x, y;
+
+ r1 = READ_ONCE(x);
+ WRITE_ONCE(y, r1 + 5);
+
+The value stored by the WRITE_ONCE obviously depends on the value
+loaded by the READ_ONCE. Such dependencies can wind through
+arbitrarily complicated computations, and a write can depend on the
+values of multiple reads.
+
+A read event and another memory access event are linked by an address
+dependency if the value obtained by the read affects the location
+accessed by the other event. The second event can be either a read or
+a write. Here's another simple example:
+
+ int a[20];
+ int i;
+
+ r1 = READ_ONCE(i);
+ r2 = READ_ONCE(a[r1]);
+
+Here the location accessed by the second READ_ONCE() depends on the
+index value loaded by the first. Pointer indirection also gives rise
+to address dependencies, since the address of a location accessed
+through a pointer will depend on the value read earlier from that
+pointer.
+
+Finally, a read event and another memory access event are linked by a
+control dependency if the value obtained by the read affects whether
+the second event is executed at all. Simple example:
+
+ int x, y;
+
+ r1 = READ_ONCE(x);
+ if (r1)
+ WRITE_ONCE(y, 1984);
+
+Execution of the WRITE_ONCE() is controlled by a conditional expression
+which depends on the value obtained by the READ_ONCE(); hence there is
+a control dependency from the load to the store.
+
+It should be pretty obvious that events can only depend on reads that
+come earlier in program order. Symbolically, if we have R ->data X,
+R ->addr X, or R ->ctrl X (where R is a read event), then we must also
+have R ->po X. It wouldn't make sense for a computation to depend
+somehow on a value that doesn't get loaded from shared memory until
+later in the code!
+
+
+THE READS-FROM RELATION: rf, rfi, and rfe
+-----------------------------------------
+
+The reads-from relation (rf) links a write event to a read event when
+the value loaded by the read is the value that was stored by the
+write. In colloquial terms, the load "reads from" the store. We
+write W ->rf R to indicate that the load R reads from the store W. We
+further distinguish the cases where the load and the store occur on
+the same CPU (internal reads-from, or rfi) and where they occur on
+different CPUs (external reads-from, or rfe).
+
+For our purposes, a memory location's initial value is treated as
+though it had been written there by an imaginary initial store that
+executes on a separate CPU before the program runs.
+
+Usage of the rf relation implicitly assumes that loads will always
+read from a single store. It doesn't apply properly in the presence
+of load-tearing, where a load obtains some of its bits from one store
+and some of them from another store. Fortunately, use of READ_ONCE()
+and WRITE_ONCE() will prevent load-tearing; it's not possible to have:
+
+ int x = 0;
+
+ P0()
+ {
+ WRITE_ONCE(x, 0x1234);
+ }
+
+ P1()
+ {
+ int r1;
+
+ r1 = READ_ONCE(x);
+ }
+
+and end up with r1 = 0x1200 (partly from x's initial value and partly
+from the value stored by P0).
+
+On the other hand, load-tearing is unavoidable when mixed-size
+accesses are used. Consider this example:
+
+ union {
+ u32 w;
+ u16 h[2];
+ } x;
+
+ P0()
+ {
+ WRITE_ONCE(x.h[0], 0x1234);
+ WRITE_ONCE(x.h[1], 0x5678);
+ }
+
+ P1()
+ {
+ int r1;
+
+ r1 = READ_ONCE(x.w);
+ }
+
+If r1 = 0x56781234 (little-endian!) at the end, then P1 must have read
+from both of P0's stores. It is possible to handle mixed-size and
+unaligned accesses in a memory model, but the LKMM currently does not
+attempt to do so. It requires all accesses to be properly aligned and
+of the location's actual size.
+
+
+CACHE COHERENCE AND THE COHERENCE ORDER RELATION: co, coi, and coe
+------------------------------------------------------------------
+
+Cache coherence is a general principle requiring that in a
+multi-processor system, the CPUs must share a consistent view of the
+memory contents. Specifically, it requires that for each location in
+shared memory, the stores to that location must form a single global
+ordering which all the CPUs agree on (the coherence order), and this
+ordering must be consistent with the program order for accesses to
+that location.
+
+To put it another way, for any variable x, the coherence order (co) of
+the stores to x is simply the order in which the stores overwrite one
+another. The imaginary store which establishes x's initial value
+comes first in the coherence order; the store which directly
+overwrites the initial value comes second; the store which overwrites
+that value comes third, and so on.
+
+You can think of the coherence order as being the order in which the
+stores reach x's location in memory (or if you prefer a more
+hardware-centric view, the order in which the stores get written to
+x's cache line). We write W ->co W' if W comes before W' in the
+coherence order, that is, if the value stored by W gets overwritten,
+directly or indirectly, by the value stored by W'.
+
+Coherence order is required to be consistent with program order. This
+requirement takes the form of four coherency rules:
+
+ Write-write coherence: If W ->po-loc W' (i.e., W comes before
+ W' in program order and they access the same location), where W
+ and W' are two stores, then W ->co W'.
+
+ Write-read coherence: If W ->po-loc R, where W is a store and R
+ is a load, then R must read from W or from some other store
+ which comes after W in the coherence order.
+
+ Read-write coherence: If R ->po-loc W, where R is a load and W
+ is a store, then the store which R reads from must come before
+ W in the coherence order.
+
+ Read-read coherence: If R ->po-loc R', where R and R' are two
+ loads, then either they read from the same store or else the
+ store read by R comes before the store read by R' in the
+ coherence order.
+
+This is sometimes referred to as sequential consistency per variable,
+because it means that the accesses to any single memory location obey
+the rules of the Sequential Consistency memory model. (According to
+Wikipedia, sequential consistency per variable and cache coherence
+mean the same thing except that cache coherence includes an extra
+requirement that every store eventually becomes visible to every CPU.)
+
+Any reasonable memory model will include cache coherence. Indeed, our
+expectation of cache coherence is so deeply ingrained that violations
+of its requirements look more like hardware bugs than programming
+errors:
+
+ int x;
+
+ P0()
+ {
+ WRITE_ONCE(x, 17);
+ WRITE_ONCE(x, 23);
+ }
+
+If the final value stored in x after this code ran was 17, you would
+think your computer was broken. It would be a violation of the
+write-write coherence rule: Since the store of 23 comes later in
+program order, it must also come later in x's coherence order and
+thus must overwrite the store of 17.
+
+ int x = 0;
+
+ P0()
+ {
+ int r1;
+
+ r1 = READ_ONCE(x);
+ WRITE_ONCE(x, 666);
+ }
+
+If r1 = 666 at the end, this would violate the read-write coherence
+rule: The READ_ONCE() load comes before the WRITE_ONCE() store in
+program order, so it must not read from that store but rather from one
+coming earlier in the coherence order (in this case, x's initial
+value).
+
+ int x = 0;
+
+ P0()
+ {
+ WRITE_ONCE(x, 5);
+ }
+
+ P1()
+ {
+ int r1, r2;
+
+ r1 = READ_ONCE(x);
+ r2 = READ_ONCE(x);
+ }
+
+If r1 = 5 (reading from P0's store) and r2 = 0 (reading from the
+imaginary store which establishes x's initial value) at the end, this
+would violate the read-read coherence rule: The r1 load comes before
+the r2 load in program order, so it must not read from a store that
+comes later in the coherence order.
+
+(As a minor curiosity, if this code had used normal loads instead of
+READ_ONCE() in P1, on Itanium it sometimes could end up with r1 = 5
+and r2 = 0! This results from parallel execution of the operations
+encoded in Itanium's Very-Long-Instruction-Word format, and it is yet
+another motivation for using READ_ONCE() when accessing shared memory
+locations.)
+
+Just like the po relation, co is inherently an ordering -- it is not
+possible for a store to directly or indirectly overwrite itself! And
+just like with the rf relation, we distinguish between stores that
+occur on the same CPU (internal coherence order, or coi) and stores
+that occur on different CPUs (external coherence order, or coe).
+
+On the other hand, stores to different memory locations are never
+related by co, just as instructions on different CPUs are never
+related by po. Coherence order is strictly per-location, or if you
+prefer, each location has its own independent coherence order.
+
+
+THE FROM-READS RELATION: fr, fri, and fre
+-----------------------------------------
+
+The from-reads relation (fr) can be a little difficult for people to
+grok. It describes the situation where a load reads a value that gets
+overwritten by a store. In other words, we have R ->fr W when the
+value that R reads is overwritten (directly or indirectly) by W, or
+equivalently, when R reads from a store which comes earlier than W in
+the coherence order.
+
+For example:
+
+ int x = 0;
+
+ P0()
+ {
+ int r1;
+
+ r1 = READ_ONCE(x);
+ WRITE_ONCE(x, 2);
+ }
+
+The value loaded from x will be 0 (assuming cache coherence!), and it
+gets overwritten by the value 2. Thus there is an fr link from the
+READ_ONCE() to the WRITE_ONCE(). If the code contained any later
+stores to x, there would also be fr links from the READ_ONCE() to
+them.
+
+As with rf, rfi, and rfe, we subdivide the fr relation into fri (when
+the load and the store are on the same CPU) and fre (when they are on
+different CPUs).
+
+Note that the fr relation is determined entirely by the rf and co
+relations; it is not independent. Given a read event R and a write
+event W for the same location, we will have R ->fr W if and only if
+the write which R reads from is co-before W. In symbols,
+
+ (R ->fr W) := (there exists W' with W' ->rf R and W' ->co W).
+
+
+AN OPERATIONAL MODEL
+--------------------
+
+The LKMM is based on various operational memory models, meaning that
+the models arise from an abstract view of how a computer system
+operates. Here are the main ideas, as incorporated into the LKMM.
+
+The system as a whole is divided into the CPUs and a memory subsystem.
+The CPUs are responsible for executing instructions (not necessarily
+in program order), and they communicate with the memory subsystem.
+For the most part, executing an instruction requires a CPU to perform
+only internal operations. However, loads, stores, and fences involve
+more.
+
+When CPU C executes a store instruction, it tells the memory subsystem
+to store a certain value at a certain location. The memory subsystem
+propagates the store to all the other CPUs as well as to RAM. (As a
+special case, we say that the store propagates to its own CPU at the
+time it is executed.) The memory subsystem also determines where the
+store falls in the location's coherence order. In particular, it must
+arrange for the store to be co-later than (i.e., to overwrite) any
+other store to the same location which has already propagated to CPU C.
+
+When a CPU executes a load instruction R, it first checks to see
+whether there are any as-yet unexecuted store instructions, for the
+same location, that come before R in program order. If there are, it
+uses the value of the po-latest such store as the value obtained by R,
+and we say that the store's value is forwarded to R. Otherwise, the
+CPU asks the memory subsystem for the value to load and we say that R
+is satisfied from memory. The memory subsystem hands back the value
+of the co-latest store to the location in question which has already
+propagated to that CPU.
+
+(In fact, the picture needs to be a little more complicated than this.
+CPUs have local caches, and propagating a store to a CPU really means
+propagating it to the CPU's local cache. A local cache can take some
+time to process the stores that it receives, and a store can't be used
+to satisfy one of the CPU's loads until it has been processed. On
+most architectures, the local caches process stores in
+First-In-First-Out order, and consequently the processing delay
+doesn't matter for the memory model. But on Alpha, the local caches
+have a partitioned design that results in non-FIFO behavior. We will
+discuss this in more detail later.)
+
+Note that load instructions may be executed speculatively and may be
+restarted under certain circumstances. The memory model ignores these
+premature executions; we simply say that the load executes at the
+final time it is forwarded or satisfied.
+
+Executing a fence (or memory barrier) instruction doesn't require a
+CPU to do anything special other than informing the memory subsystem
+about the fence. However, fences do constrain the way CPUs and the
+memory subsystem handle other instructions, in two respects.
+
+First, a fence forces the CPU to execute various instructions in
+program order. Exactly which instructions are ordered depends on the
+type of fence:
+
+ Strong fences, including smp_mb() and synchronize_rcu(), force
+ the CPU to execute all po-earlier instructions before any
+ po-later instructions;
+
+ smp_rmb() forces the CPU to execute all po-earlier loads
+ before any po-later loads;
+
+ smp_wmb() forces the CPU to execute all po-earlier stores
+ before any po-later stores;
+
+ Acquire fences, such as smp_load_acquire(), force the CPU to
+ execute the load associated with the fence (e.g., the load
+ part of an smp_load_acquire()) before any po-later
+ instructions;
+
+ Release fences, such as smp_store_release(), force the CPU to
+ execute all po-earlier instructions before the store
+ associated with the fence (e.g., the store part of an
+ smp_store_release()).
+
+Second, some types of fence affect the way the memory subsystem
+propagates stores. When a fence instruction is executed on CPU C:
+
+ For each other CPU C', smp_wmb() forces all po-earlier stores
+ on C to propagate to C' before any po-later stores do.
+
+ For each other CPU C', any store which propagates to C before
+ a release fence is executed (including all po-earlier
+ stores executed on C) is forced to propagate to C' before the
+ store associated with the release fence does.
+
+ Any store which propagates to C before a strong fence is
+ executed (including all po-earlier stores on C) is forced to
+ propagate to all other CPUs before any instructions po-after
+ the strong fence are executed on C.
+
+The propagation ordering enforced by release fences and strong fences
+affects stores from other CPUs that propagate to CPU C before the
+fence is executed, as well as stores that are executed on C before the
+fence. We describe this property by saying that release fences and
+strong fences are A-cumulative. By contrast, smp_wmb() fences are not
+A-cumulative; they only affect the propagation of stores that are
+executed on C before the fence (i.e., those which precede the fence in
+program order).
+
+rcu_read_lock(), rcu_read_unlock(), and synchronize_rcu() fences have
+other properties which we discuss later.
+
+
+PROPAGATION ORDER RELATION: cumul-fence
+---------------------------------------
+
+The fences which affect propagation order (i.e., strong, release, and
+smp_wmb() fences) are collectively referred to as cumul-fences, even
+though smp_wmb() isn't A-cumulative. The cumul-fence relation is
+defined to link memory access events E and F whenever:
+
+ E and F are both stores on the same CPU and an smp_wmb() fence
+ event occurs between them in program order; or
+
+ F is a release fence and some X comes before F in program order,
+ where either X = E or else E ->rf X; or
+
+ A strong fence event occurs between some X and F in program
+ order, where either X = E or else E ->rf X.
+
+The operational model requires that whenever W and W' are both stores
+and W ->cumul-fence W', then W must propagate to any given CPU
+before W' does. However, for different CPUs C and C', it does not
+require W to propagate to C before W' propagates to C'.
+
+
+DERIVATION OF THE LKMM FROM THE OPERATIONAL MODEL
+-------------------------------------------------
+
+The LKMM is derived from the restrictions imposed by the design
+outlined above. These restrictions involve the necessity of
+maintaining cache coherence and the fact that a CPU can't operate on a
+value before it knows what that value is, among other things.
+
+The formal version of the LKMM is defined by five requirements, or
+axioms:
+
+ Sequential consistency per variable: This requires that the
+ system obey the four coherency rules.
+
+ Atomicity: This requires that atomic read-modify-write
+ operations really are atomic, that is, no other stores can
+ sneak into the middle of such an update.
+
+ Happens-before: This requires that certain instructions are
+ executed in a specific order.
+
+ Propagation: This requires that certain stores propagate to
+ CPUs and to RAM in a specific order.
+
+ Rcu: This requires that RCU read-side critical sections and
+ grace periods obey the rules of RCU, in particular, the
+ Grace-Period Guarantee.
+
+The first and second are quite common; they can be found in many
+memory models (such as those for C11/C++11). The "happens-before" and
+"propagation" axioms have analogs in other memory models as well. The
+"rcu" axiom is specific to the LKMM.
+
+Each of these axioms is discussed below.
+
+
+SEQUENTIAL CONSISTENCY PER VARIABLE
+-----------------------------------
+
+According to the principle of cache coherence, the stores to any fixed
+shared location in memory form a global ordering. We can imagine
+inserting the loads from that location into this ordering, by placing
+each load between the store that it reads from and the following
+store. This leaves the relative positions of loads that read from the
+same store unspecified; let's say they are inserted in program order,
+first for CPU 0, then CPU 1, etc.
+
+You can check that the four coherency rules imply that the rf, co, fr,
+and po-loc relations agree with this global ordering; in other words,
+whenever we have X ->rf Y or X ->co Y or X ->fr Y or X ->po-loc Y, the
+X event comes before the Y event in the global ordering. The LKMM's
+"coherence" axiom expresses this by requiring the union of these
+relations not to have any cycles. This means it must not be possible
+to find events
+
+ X0 -> X1 -> X2 -> ... -> Xn -> X0,
+
+where each of the links is either rf, co, fr, or po-loc. This has to
+hold if the accesses to the fixed memory location can be ordered as
+cache coherence demands.
+
+Although it is not obvious, it can be shown that the converse is also
+true: This LKMM axiom implies that the four coherency rules are
+obeyed.
+
+
+ATOMIC UPDATES: rmw
+-------------------
+
+What does it mean to say that a read-modify-write (rmw) update, such
+as atomic_inc(&x), is atomic? It means that the memory location (x in
+this case) does not get altered between the read and the write events
+making up the atomic operation. In particular, if two CPUs perform
+atomic_inc(&x) concurrently, it must be guaranteed that the final
+value of x will be the initial value plus two. We should never have
+the following sequence of events:
+
+ CPU 0 loads x obtaining 13;
+ CPU 1 loads x obtaining 13;
+ CPU 0 stores 14 to x;
+ CPU 1 stores 14 to x;
+
+where the final value of x is wrong (14 rather than 15).
+
+In this example, CPU 0's increment effectively gets lost because it
+occurs in between CPU 1's load and store. To put it another way, the
+problem is that the position of CPU 0's store in x's coherence order
+is between the store that CPU 1 reads from and the store that CPU 1
+performs.
+
+The same analysis applies to all atomic update operations. Therefore,
+to enforce atomicity the LKMM requires that atomic updates follow this
+rule: Whenever R and W are the read and write events composing an
+atomic read-modify-write and W' is the write event which R reads from,
+there must not be any stores coming between W' and W in the coherence
+order. Equivalently,
+
+ (R ->rmw W) implies (there is no X with R ->fr X and X ->co W),
+
+where the rmw relation links the read and write events making up each
+atomic update. This is what the LKMM's "atomic" axiom says.
+
+
+THE PRESERVED PROGRAM ORDER RELATION: ppo
+-----------------------------------------
+
+There are many situations where a CPU is obligated to execute two
+instructions in program order. We amalgamate them into the ppo (for
+"preserved program order") relation, which links the po-earlier
+instruction to the po-later instruction and is thus a sub-relation of
+po.
+
+The operational model already includes a description of one such
+situation: Fences are a source of ppo links. Suppose X and Y are
+memory accesses with X ->po Y; then the CPU must execute X before Y if
+any of the following hold:
+
+ A strong (smp_mb() or synchronize_rcu()) fence occurs between
+ X and Y;
+
+ X and Y are both stores and an smp_wmb() fence occurs between
+ them;
+
+ X and Y are both loads and an smp_rmb() fence occurs between
+ them;
+
+ X is also an acquire fence, such as smp_load_acquire();
+
+ Y is also a release fence, such as smp_store_release().
+
+Another possibility, not mentioned earlier but discussed in the next
+section, is:
+
+ X and Y are both loads, X ->addr Y (i.e., there is an address
+ dependency from X to Y), and X is a READ_ONCE() or an atomic
+ access.
+
+Dependencies can also cause instructions to be executed in program
+order. This is uncontroversial when the second instruction is a
+store; either a data, address, or control dependency from a load R to
+a store W will force the CPU to execute R before W. This is very
+simply because the CPU cannot tell the memory subsystem about W's
+store before it knows what value should be stored (in the case of a
+data dependency), what location it should be stored into (in the case
+of an address dependency), or whether the store should actually take
+place (in the case of a control dependency).
+
+Dependencies to load instructions are more problematic. To begin with,
+there is no such thing as a data dependency to a load. Next, a CPU
+has no reason to respect a control dependency to a load, because it
+can always satisfy the second load speculatively before the first, and
+then ignore the result if it turns out that the second load shouldn't
+be executed after all. And lastly, the real difficulties begin when
+we consider address dependencies to loads.
+
+To be fair about it, all Linux-supported architectures do execute
+loads in program order if there is an address dependency between them.
+After all, a CPU cannot ask the memory subsystem to load a value from
+a particular location before it knows what that location is. However,
+the split-cache design used by Alpha can cause it to behave in a way
+that looks as if the loads were executed out of order (see the next
+section for more details). The kernel includes a workaround for this
+problem when the loads come from READ_ONCE(), and therefore the LKMM
+includes address dependencies to loads in the ppo relation.
+
+On the other hand, dependencies can indirectly affect the ordering of
+two loads. This happens when there is a dependency from a load to a
+store and a second, po-later load reads from that store:
+
+ R ->dep W ->rfi R',
+
+where the dep link can be either an address or a data dependency. In
+this situation we know it is possible for the CPU to execute R' before
+W, because it can forward the value that W will store to R'. But it
+cannot execute R' before R, because it cannot forward the value before
+it knows what that value is, or that W and R' do access the same
+location. However, if there is merely a control dependency between R
+and W then the CPU can speculatively forward W to R' before executing
+R; if the speculation turns out to be wrong then the CPU merely has to
+restart or abandon R'.
+
+(In theory, a CPU might forward a store to a load when it runs across
+an address dependency like this:
+
+ r1 = READ_ONCE(ptr);
+ WRITE_ONCE(*r1, 17);
+ r2 = READ_ONCE(*r1);
+
+because it could tell that the store and the second load access the
+same location even before it knows what the location's address is.
+However, none of the architectures supported by the Linux kernel do
+this.)
+
+Two memory accesses of the same location must always be executed in
+program order if the second access is a store. Thus, if we have
+
+ R ->po-loc W
+
+(the po-loc link says that R comes before W in program order and they
+access the same location), the CPU is obliged to execute W after R.
+If it executed W first then the memory subsystem would respond to R's
+read request with the value stored by W (or an even later store), in
+violation of the read-write coherence rule. Similarly, if we had
+
+ W ->po-loc W'
+
+and the CPU executed W' before W, then the memory subsystem would put
+W' before W in the coherence order. It would effectively cause W to
+overwrite W', in violation of the write-write coherence rule.
+(Interestingly, an early ARMv8 memory model, now obsolete, proposed
+allowing out-of-order writes like this to occur. The model avoided
+violating the write-write coherence rule by requiring the CPU not to
+send the W write to the memory subsystem at all!)
+
+There is one last example of preserved program order in the LKMM: when
+a load-acquire reads from an earlier store-release. For example:
+
+ smp_store_release(&x, 123);
+ r1 = smp_load_acquire(&x);
+
+If the smp_load_acquire() ends up obtaining the 123 value that was
+stored by the smp_store_release(), the LKMM says that the load must be
+executed after the store; the store cannot be forwarded to the load.
+This requirement does not arise from the operational model, but it
+yields correct predictions on all architectures supported by the Linux
+kernel, although for differing reasons.
+
+On some architectures, including x86 and ARMv8, it is true that the
+store cannot be forwarded to the load. On others, including PowerPC
+and ARMv7, smp_store_release() generates object code that starts with
+a fence and smp_load_acquire() generates object code that ends with a
+fence. The upshot is that even though the store may be forwarded to
+the load, it is still true that any instruction preceding the store
+will be executed before the load or any following instructions, and
+the store will be executed before any instruction following the load.
+
+
+AND THEN THERE WAS ALPHA
+------------------------
+
+As mentioned above, the Alpha architecture is unique in that it does
+not appear to respect address dependencies to loads. This means that
+code such as the following:
+
+ int x = 0;
+ int y = -1;
+ int *ptr = &y;
+
+ P0()
+ {
+ WRITE_ONCE(x, 1);
+ smp_wmb();
+ WRITE_ONCE(ptr, &x);
+ }
+
+ P1()
+ {
+ int *r1;
+ int r2;
+
+ r1 = ptr;
+ r2 = READ_ONCE(*r1);
+ }
+
+can malfunction on Alpha systems (notice that P1 uses an ordinary load
+to read ptr instead of READ_ONCE()). It is quite possible that r1 = &x
+and r2 = 0 at the end, in spite of the address dependency.
+
+At first glance this doesn't seem to make sense. We know that the
+smp_wmb() forces P0's store to x to propagate to P1 before the store
+to ptr does. And since P1 can't execute its second load
+until it knows what location to load from, i.e., after executing its
+first load, the value x = 1 must have propagated to P1 before the
+second load executed. So why doesn't r2 end up equal to 1?
+
+The answer lies in the Alpha's split local caches. Although the two
+stores do reach P1's local cache in the proper order, it can happen
+that the first store is processed by a busy part of the cache while
+the second store is processed by an idle part. As a result, the x = 1
+value may not become available for P1's CPU to read until after the
+ptr = &x value does, leading to the undesirable result above. The
+final effect is that even though the two loads really are executed in
+program order, it appears that they aren't.
+
+This could not have happened if the local cache had processed the
+incoming stores in FIFO order. By contrast, other architectures
+maintain at least the appearance of FIFO order.
+
+In practice, this difficulty is solved by inserting a special fence
+between P1's two loads when the kernel is compiled for the Alpha
+architecture. In fact, as of version 4.15, the kernel automatically
+adds this fence (called smp_read_barrier_depends() and defined as
+nothing at all on non-Alpha builds) after every READ_ONCE() and atomic
+load. The effect of the fence is to cause the CPU not to execute any
+po-later instructions until after the local cache has finished
+processing all the stores it has already received. Thus, if the code
+was changed to:
+
+ P1()
+ {
+ int *r1;
+ int r2;
+
+ r1 = READ_ONCE(ptr);
+ r2 = READ_ONCE(*r1);
+ }
+
+then we would never get r1 = &x and r2 = 0. By the time P1 executed
+its second load, the x = 1 store would already be fully processed by
+the local cache and available for satisfying the read request. Thus
+we have yet another reason why shared data should always be read with
+READ_ONCE() or another synchronization primitive rather than accessed
+directly.
+
+The LKMM requires that smp_rmb(), acquire fences, and strong fences
+share this property with smp_read_barrier_depends(): They do not allow
+the CPU to execute any po-later instructions (or po-later loads in the
+case of smp_rmb()) until all outstanding stores have been processed by
+the local cache. In the case of a strong fence, the CPU first has to
+wait for all of its po-earlier stores to propagate to every other CPU
+in the system; then it has to wait for the local cache to process all
+the stores received as of that time -- not just the stores received
+when the strong fence began.
+
+And of course, none of this matters for any architecture other than
+Alpha.
+
+
+THE HAPPENS-BEFORE RELATION: hb
+-------------------------------
+
+The happens-before relation (hb) links memory accesses that have to
+execute in a certain order. hb includes the ppo relation and two
+others, one of which is rfe.
+
+W ->rfe R implies that W and R are on different CPUs. It also means
+that W's store must have propagated to R's CPU before R executed;
+otherwise R could not have read the value stored by W. Therefore W
+must have executed before R, and so we have W ->hb R.
+
+The equivalent fact need not hold if W ->rfi R (i.e., W and R are on
+the same CPU). As we have already seen, the operational model allows
+W's value to be forwarded to R in such cases, meaning that R may well
+execute before W does.
+
+It's important to understand that neither coe nor fre is included in
+hb, despite their similarities to rfe. For example, suppose we have
+W ->coe W'. This means that W and W' are stores to the same location,
+they execute on different CPUs, and W comes before W' in the coherence
+order (i.e., W' overwrites W). Nevertheless, it is possible for W' to
+execute before W, because the decision as to which store overwrites
+the other is made later by the memory subsystem. When the stores are
+nearly simultaneous, either one can come out on top. Similarly,
+R ->fre W means that W overwrites the value which R reads, but it
+doesn't mean that W has to execute after R. All that's necessary is
+for the memory subsystem not to propagate W to R's CPU until after R
+has executed, which is possible if W executes shortly before R.
+
+The third relation included in hb is like ppo, in that it only links
+events that are on the same CPU. However it is more difficult to
+explain, because it arises only indirectly from the requirement of
+cache coherence. The relation is called prop, and it links two events
+on CPU C in situations where a store from some other CPU comes after
+the first event in the coherence order and propagates to C before the
+second event executes.
+
+This is best explained with some examples. The simplest case looks
+like this:
+
+ int x;
+
+ P0()
+ {
+ int r1;
+
+ WRITE_ONCE(x, 1);
+ r1 = READ_ONCE(x);
+ }
+
+ P1()
+ {
+ WRITE_ONCE(x, 8);
+ }
+
+If r1 = 8 at the end then P0's accesses must have executed in program
+order. We can deduce this from the operational model; if P0's load
+had executed before its store then the value of the store would have
+been forwarded to the load, so r1 would have ended up equal to 1, not
+8. In this case there is a prop link from P0's write event to its read
+event, because P1's store came after P0's store in x's coherence
+order, and P1's store propagated to P0 before P0's load executed.
+
+An equally simple case involves two loads of the same location that
+read from different stores:
+
+ int x = 0;
+
+ P0()
+ {
+ int r1, r2;
+
+ r1 = READ_ONCE(x);
+ r2 = READ_ONCE(x);
+ }
+
+ P1()
+ {
+ WRITE_ONCE(x, 9);
+ }
+
+If r1 = 0 and r2 = 9 at the end then P0's accesses must have executed
+in program order. If the second load had executed before the first
+then the x = 9 store must have been propagated to P0 before the first
+load executed, and so r1 would have been 9 rather than 0. In this
+case there is a prop link from P0's first read event to its second,
+because P1's store overwrote the value read by P0's first load, and
+P1's store propagated to P0 before P0's second load executed.
+
+Less trivial examples of prop all involve fences. Unlike the simple
+examples above, they can require that some instructions are executed
+out of program order. This next one should look familiar:
+
+ int buf = 0, flag = 0;
+
+ P0()
+ {
+ WRITE_ONCE(buf, 1);
+ smp_wmb();
+ WRITE_ONCE(flag, 1);
+ }
+
+ P1()
+ {
+ int r1;
+ int r2;
+
+ r1 = READ_ONCE(flag);
+ r2 = READ_ONCE(buf);
+ }
+
+This is the MP pattern again, with an smp_wmb() fence between the two
+stores. If r1 = 1 and r2 = 0 at the end then there is a prop link
+from P1's second load to its first (backwards!). The reason is
+similar to the previous examples: The value P1 loads from buf gets
+overwritten by P0's store to buf, the fence guarantees that the store
+to buf will propagate to P1 before the store to flag does, and the
+store to flag propagates to P1 before P1 reads flag.
+
+The prop link says that in order to obtain the r1 = 1, r2 = 0 result,
+P1 must execute its second load before the first. Indeed, if the load
+from flag were executed first, then the buf = 1 store would already
+have propagated to P1 by the time P1's load from buf executed, so r2
+would have been 1 at the end, not 0. (The reasoning holds even for
+Alpha, although the details are more complicated and we will not go
+into them.)
+
+But what if we put an smp_rmb() fence between P1's loads? The fence
+would force the two loads to be executed in program order, and it
+would generate a cycle in the hb relation: The fence would create a ppo
+link (hence an hb link) from the first load to the second, and the
+prop relation would give an hb link from the second load to the first.
+Since an instruction can't execute before itself, we are forced to
+conclude that if an smp_rmb() fence is added, the r1 = 1, r2 = 0
+outcome is impossible -- as it should be.
+
+The formal definition of the prop relation involves a coe or fre link,
+followed by an arbitrary number of cumul-fence links, ending with an
+rfe link. You can concoct more exotic examples, containing more than
+one fence, although this quickly leads to diminishing returns in terms
+of complexity. For instance, here's an example containing a coe link
+followed by two fences and an rfe link, utilizing the fact that
+release fences are A-cumulative:
+
+ int x, y, z;
+
+ P0()
+ {
+ int r0;
+
+ WRITE_ONCE(x, 1);
+ r0 = READ_ONCE(z);
+ }
+
+ P1()
+ {
+ WRITE_ONCE(x, 2);
+ smp_wmb();
+ WRITE_ONCE(y, 1);
+ }
+
+ P2()
+ {
+ int r2;
+
+ r2 = READ_ONCE(y);
+ smp_store_release(&z, 1);
+ }
+
+If x = 2, r0 = 1, and r2 = 1 after this code runs then there is a prop
+link from P0's store to its load. This is because P0's store gets
+overwritten by P1's store since x = 2 at the end (a coe link), the
+smp_wmb() ensures that P1's store to x propagates to P2 before the
+store to y does (the first fence), the store to y propagates to P2
+before P2's load and store execute, P2's smp_store_release()
+guarantees that the stores to x and y both propagate to P0 before the
+store to z does (the second fence), and P0's load executes after the
+store to z has propagated to P0 (an rfe link).
+
+In summary, the fact that the hb relation links memory access events
+in the order they execute means that it must not have cycles. This
+requirement is the content of the LKMM's "happens-before" axiom.
+
+The LKMM defines yet another relation connected to times of
+instruction execution, but it is not included in hb. It relies on the
+particular properties of strong fences, which we cover in the next
+section.
+
+
+THE PROPAGATES-BEFORE RELATION: pb
+----------------------------------
+
+The propagates-before (pb) relation capitalizes on the special
+features of strong fences. It links two events E and F whenever some
+store is coherence-later than E and propagates to every CPU and to RAM
+before F executes. The formal definition requires that E be linked to
+F via a coe or fre link, an arbitrary number of cumul-fences, an
+optional rfe link, a strong fence, and an arbitrary number of hb
+links. Let's see how this definition works out.
+
+Consider first the case where E is a store (implying that the sequence
+of links begins with coe). Then there are events W, X, Y, and Z such
+that:
+
+ E ->coe W ->cumul-fence* X ->rfe? Y ->strong-fence Z ->hb* F,
+
+where the * suffix indicates an arbitrary number of links of the
+specified type, and the ? suffix indicates the link is optional (Y may
+be equal to X). Because of the cumul-fence links, we know that W will
+propagate to Y's CPU before X does, hence before Y executes and hence
+before the strong fence executes. Because this fence is strong, we
+know that W will propagate to every CPU and to RAM before Z executes.
+And because of the hb links, we know that Z will execute before F.
+Thus W, which comes later than E in the coherence order, will
+propagate to every CPU and to RAM before F executes.
+
+The case where E is a load is exactly the same, except that the first
+link in the sequence is fre instead of coe.
+
+The existence of a pb link from E to F implies that E must execute
+before F. To see why, suppose that F executed first. Then W would
+have propagated to E's CPU before E executed. If E was a store, the
+memory subsystem would then be forced to make E come after W in the
+coherence order, contradicting the fact that E ->coe W. If E was a
+load, the memory subsystem would then be forced to satisfy E's read
+request with the value stored by W or an even later store,
+contradicting the fact that E ->fre W.
+
+A good example illustrating how pb works is the SB pattern with strong
+fences:
+
+ int x = 0, y = 0;
+
+ P0()
+ {
+ int r0;
+
+ WRITE_ONCE(x, 1);
+ smp_mb();
+ r0 = READ_ONCE(y);
+ }
+
+ P1()
+ {
+ int r1;
+
+ WRITE_ONCE(y, 1);
+ smp_mb();
+ r1 = READ_ONCE(x);
+ }
+
+If r0 = 0 at the end then there is a pb link from P0's load to P1's
+load: an fre link from P0's load to P1's store (which overwrites the
+value read by P0), and a strong fence between P1's store and its load.
+In this example, the sequences of cumul-fence and hb links are empty.
+Note that this pb link is not included in hb as an instance of prop,
+because it does not start and end on the same CPU.
+
+Similarly, if r1 = 0 at the end then there is a pb link from P1's load
+to P0's. This means that if both r1 and r2 were 0 there would be a
+cycle in pb, which is not possible since an instruction cannot execute
+before itself. Thus, adding smp_mb() fences to the SB pattern
+prevents the r0 = 0, r1 = 0 outcome.
+
+In summary, the fact that the pb relation links events in the order
+they execute means that it cannot have cycles. This requirement is
+the content of the LKMM's "propagation" axiom.
+
+
+RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
+----------------------------------------------------
+
+RCU (Read-Copy-Update) is a powerful synchronization mechanism. It
+rests on two concepts: grace periods and read-side critical sections.
+
+A grace period is the span of time occupied by a call to
+synchronize_rcu(). A read-side critical section (or just critical
+section, for short) is a region of code delimited by rcu_read_lock()
+at the start and rcu_read_unlock() at the end. Critical sections can
+be nested, although we won't make use of this fact.
+
+As far as memory models are concerned, RCU's main feature is its
+Grace-Period Guarantee, which states that a critical section can never
+span a full grace period. In more detail, the Guarantee says:
+
+ If a critical section starts before a grace period then it
+ must end before the grace period does. In addition, every
+ store that propagates to the critical section's CPU before the
+ end of the critical section must propagate to every CPU before
+ the end of the grace period.
+
+ If a critical section ends after a grace period ends then it
+ must start after the grace period does. In addition, every
+ store that propagates to the grace period's CPU before the
+ start of the grace period must propagate to every CPU before
+ the start of the critical section.
+
+Here is a simple example of RCU in action:
+
+ int x, y;
+
+ P0()
+ {
+ rcu_read_lock();
+ WRITE_ONCE(x, 1);
+ WRITE_ONCE(y, 1);
+ rcu_read_unlock();
+ }
+
+ P1()
+ {
+ int r1, r2;
+
+ r1 = READ_ONCE(x);
+ synchronize_rcu();
+ r2 = READ_ONCE(y);
+ }
+
+The Grace Period Guarantee tells us that when this code runs, it will
+never end with r1 = 1 and r2 = 0. The reasoning is as follows. r1 = 1
+means that P0's store to x propagated to P1 before P1 called
+synchronize_rcu(), so P0's critical section must have started before
+P1's grace period. On the other hand, r2 = 0 means that P0's store to
+y, which occurs before the end of the critical section, did not
+propagate to P1 before the end of the grace period, violating the
+Guarantee.
+
+In the kernel's implementations of RCU, the requirements for stores
+to propagate to every CPU are fulfilled by placing strong fences at
+suitable places in the RCU-related code. Thus, if a critical section
+starts before a grace period does then the critical section's CPU will
+execute an smp_mb() fence after the end of the critical section and
+some time before the grace period's synchronize_rcu() call returns.
+And if a critical section ends after a grace period does then the
+synchronize_rcu() routine will execute an smp_mb() fence at its start
+and some time before the critical section's opening rcu_read_lock()
+executes.
+
+What exactly do we mean by saying that a critical section "starts
+before" or "ends after" a grace period? Some aspects of the meaning
+are pretty obvious, as in the example above, but the details aren't
+entirely clear. The LKMM formalizes this notion by means of the
+rcu-link relation. rcu-link encompasses a very general notion of
+"before": Among other things, X ->rcu-link Z includes cases where X
+happens-before or is equal to some event Y which is equal to or comes
+before Z in the coherence order. When Y = Z this says that X ->rfe Z
+implies X ->rcu-link Z. In addition, when Y = X it says that X ->fr Z
+and X ->co Z each imply X ->rcu-link Z.
+
+The formal definition of the rcu-link relation is more than a little
+obscure, and we won't give it here. It is closely related to the pb
+relation, and the details don't matter unless you want to comb through
+a somewhat lengthy formal proof. Pretty much all you need to know
+about rcu-link is the information in the preceding paragraph.
+
+The LKMM also defines the gp and rscs relations. They bring grace
+periods and read-side critical sections into the picture, in the
+following way:
+
+ E ->gp F means there is a synchronize_rcu() fence event S such
+ that E ->po S and either S ->po F or S = F. In simple terms,
+ there is a grace period po-between E and F.
+
+ E ->rscs F means there is a critical section delimited by an
+ rcu_read_lock() fence L and an rcu_read_unlock() fence U, such
+ that E ->po U and either L ->po F or L = F. You can think of
+ this as saying that E and F are in the same critical section
+ (in fact, it also allows E to be po-before the start of the
+ critical section and F to be po-after the end).
+
+If we think of the rcu-link relation as standing for an extended
+"before", then X ->gp Y ->rcu-link Z says that X executes before a
+grace period which ends before Z executes. (In fact it covers more
+than this, because it also includes cases where X executes before a
+grace period and some store propagates to Z's CPU before Z executes
+but doesn't propagate to some other CPU until after the grace period
+ends.) Similarly, X ->rscs Y ->rcu-link Z says that X is part of (or
+before the start of) a critical section which starts before Z
+executes.
+
+The LKMM goes on to define the rcu-fence relation as a sequence of gp
+and rscs links separated by rcu-link links, in which the number of gp
+links is >= the number of rscs links. For example:
+
+ X ->gp Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
+
+would imply that X ->rcu-fence V, because this sequence contains two
+gp links and only one rscs link. (It also implies that X ->rcu-fence T
+and Z ->rcu-fence V.) On the other hand:
+
+ X ->rscs Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
+
+does not imply X ->rcu-fence V, because the sequence contains only
+one gp link but two rscs links.
+
+The rcu-fence relation is important because the Grace Period Guarantee
+means that rcu-fence acts kind of like a strong fence. In particular,
+if W is a write and we have W ->rcu-fence Z, the Guarantee says that W
+will propagate to every CPU before Z executes.
+
+To prove this in full generality requires some intellectual effort.
+We'll consider just a very simple case:
+
+ W ->gp X ->rcu-link Y ->rscs Z.
+
+This formula means that there is a grace period G and a critical
+section C such that:
+
+ 1. W is po-before G;
+
+ 2. X is equal to or po-after G;
+
+ 3. X comes "before" Y in some sense;
+
+ 4. Y is po-before the end of C;
+
+ 5. Z is equal to or po-after the start of C.
+
+From 2 - 4 we deduce that the grace period G ends before the critical
+section C. Then the second part of the Grace Period Guarantee says
+not only that G starts before C does, but also that W (which executes
+on G's CPU before G starts) must propagate to every CPU before C
+starts. In particular, W propagates to every CPU before Z executes
+(or finishes executing, in the case where Z is equal to the
+rcu_read_lock() fence event which starts C.) This sort of reasoning
+can be expanded to handle all the situations covered by rcu-fence.
+
+Finally, the LKMM defines the RCU-before (rb) relation in terms of
+rcu-fence. This is done in essentially the same way as the pb
+relation was defined in terms of strong-fence. We will omit the
+details; the end result is that E ->rb F implies E must execute before
+F, just as E ->pb F does (and for much the same reasons).
+
+Putting this all together, the LKMM expresses the Grace Period
+Guarantee by requiring that the rb relation does not contain a cycle.
+Equivalently, this "rcu" axiom requires that there are no events E and
+F with E ->rcu-link F ->rcu-fence E. Or to put it a third way, the
+axiom requires that there are no cycles consisting of gp and rscs
+alternating with rcu-link, where the number of gp links is >= the
+number of rscs links.
+
+Justifying the axiom isn't easy, but it is in fact a valid
+formalization of the Grace Period Guarantee. We won't attempt to go
+through the detailed argument, but the following analysis gives a
+taste of what is involved. Suppose we have a violation of the first
+part of the Guarantee: A critical section starts before a grace
+period, and some store propagates to the critical section's CPU before
+the end of the critical section but doesn't propagate to some other
+CPU until after the end of the grace period.
+
+Putting symbols to these ideas, let L and U be the rcu_read_lock() and
+rcu_read_unlock() fence events delimiting the critical section in
+question, and let S be the synchronize_rcu() fence event for the grace
+period. Saying that the critical section starts before S means there
+are events E and F where E is po-after L (which marks the start of the
+critical section), E is "before" F in the sense of the rcu-link
+relation, and F is po-before the grace period S:
+
+ L ->po E ->rcu-link F ->po S.
+
+Let W be the store mentioned above, let Z come before the end of the
+critical section and witness that W propagates to the critical
+section's CPU by reading from W, and let Y on some arbitrary CPU be a
+witness that W has not propagated to that CPU, where Y happens after
+some event X which is po-after S. Symbolically, this amounts to:
+
+ S ->po X ->hb* Y ->fr W ->rf Z ->po U.
+
+The fr link from Y to W indicates that W has not propagated to Y's CPU
+at the time that Y executes. From this, it can be shown (see the
+discussion of the rcu-link relation earlier) that X and Z are related
+by rcu-link, yielding:
+
+ S ->po X ->rcu-link Z ->po U.
+
+The formulas say that S is po-between F and X, hence F ->gp X. They
+also say that Z comes before the end of the critical section and E
+comes after its start, hence Z ->rscs E. From all this we obtain:
+
+ F ->gp X ->rcu-link Z ->rscs E ->rcu-link F,
+
+a forbidden cycle. Thus the "rcu" axiom rules out this violation of
+the Grace Period Guarantee.
+
+For something a little more down-to-earth, let's see how the axiom
+works out in practice. Consider the RCU code example from above, this
+time with statement labels added to the memory access instructions:
+
+ int x, y;
+
+ P0()
+ {
+ rcu_read_lock();
+ W: WRITE_ONCE(x, 1);
+ X: WRITE_ONCE(y, 1);
+ rcu_read_unlock();
+ }
+
+ P1()
+ {
+ int r1, r2;
+
+ Y: r1 = READ_ONCE(x);
+ synchronize_rcu();
+ Z: r2 = READ_ONCE(y);
+ }
+
+
+If r2 = 0 at the end then P0's store at X overwrites the value that
+P1's load at Z reads from, so we have Z ->fre X and thus Z ->rcu-link X.
+In addition, there is a synchronize_rcu() between Y and Z, so therefore
+we have Y ->gp Z.
+
+If r1 = 1 at the end then P1's load at Y reads from P0's store at W,
+so we have W ->rcu-link Y. In addition, W and X are in the same critical
+section, so therefore we have X ->rscs W.
+
+Then X ->rscs W ->rcu-link Y ->gp Z ->rcu-link X is a forbidden cycle,
+violating the "rcu" axiom. Hence the outcome is not allowed by the
+LKMM, as we would expect.
+
+For contrast, let's see what can happen in a more complicated example:
+
+ int x, y, z;
+
+ P0()
+ {
+ int r0;
+
+ rcu_read_lock();
+ W: r0 = READ_ONCE(x);
+ X: WRITE_ONCE(y, 1);
+ rcu_read_unlock();
+ }
+
+ P1()
+ {
+ int r1;
+
+ Y: r1 = READ_ONCE(y);
+ synchronize_rcu();
+ Z: WRITE_ONCE(z, 1);
+ }
+
+ P2()
+ {
+ int r2;
+
+ rcu_read_lock();
+ U: r2 = READ_ONCE(z);
+ V: WRITE_ONCE(x, 1);
+ rcu_read_unlock();
+ }
+
+If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows
+that W ->rscs X ->rcu-link Y ->gp Z ->rcu-link U ->rscs V ->rcu-link W.
+However this cycle is not forbidden, because the sequence of relations
+contains fewer instances of gp (one) than of rscs (two). Consequently
+the outcome is allowed by the LKMM. The following instruction timing
+diagram shows how it might actually occur:
+
+P0 P1 P2
+-------------------- -------------------- --------------------
+rcu_read_lock()
+X: WRITE_ONCE(y, 1)
+ Y: r1 = READ_ONCE(y)
+ synchronize_rcu() starts
+ . rcu_read_lock()
+ . V: WRITE_ONCE(x, 1)
+W: r0 = READ_ONCE(x) .
+rcu_read_unlock() .
+ synchronize_rcu() ends
+ Z: WRITE_ONCE(z, 1)
+ U: r2 = READ_ONCE(z)
+ rcu_read_unlock()
+
+This requires P0 and P2 to execute their loads and stores out of
+program order, but of course they are allowed to do so. And as you
+can see, the Grace Period Guarantee is not violated: The critical
+section in P0 both starts before P1's grace period does and ends
+before it does, and the critical section in P2 both starts after P1's
+grace period does and ends after it does.
+
+
+ODDS AND ENDS
+-------------
+
+This section covers material that didn't quite fit anywhere in the
+earlier sections.
+
+The descriptions in this document don't always match the formal
+version of the LKMM exactly. For example, the actual formal
+definition of the prop relation makes the initial coe or fre part
+optional, and it doesn't require the events linked by the relation to
+be on the same CPU. These differences are very unimportant; indeed,
+instances where the coe/fre part of prop is missing are of no interest
+because all the other parts (fences and rfe) are already included in
+hb anyway, and where the formal model adds prop into hb, it includes
+an explicit requirement that the events being linked are on the same
+CPU.
+
+Another minor difference has to do with events that are both memory
+accesses and fences, such as those corresponding to smp_load_acquire()
+calls. In the formal model, these events aren't actually both reads
+and fences; rather, they are read events with an annotation marking
+them as acquires. (Or write events annotated as releases, in the case
+smp_store_release().) The final effect is the same.
+
+Although we didn't mention it above, the instruction execution
+ordering provided by the smp_rmb() fence doesn't apply to read events
+that are part of a non-value-returning atomic update. For instance,
+given:
+
+ atomic_inc(&x);
+ smp_rmb();
+ r1 = READ_ONCE(y);
+
+it is not guaranteed that the load from y will execute after the
+update to x. This is because the ARMv8 architecture allows
+non-value-returning atomic operations effectively to be executed off
+the CPU. Basically, the CPU tells the memory subsystem to increment
+x, and then the increment is carried out by the memory hardware with
+no further involvement from the CPU. Since the CPU doesn't ever read
+the value of x, there is nothing for the smp_rmb() fence to act on.
+
+The LKMM defines a few extra synchronization operations in terms of
+things we have already covered. In particular, rcu_dereference() is
+treated as READ_ONCE() and rcu_assign_pointer() is treated as
+smp_store_release() -- which is basically how the Linux kernel treats
+them.
+
+There are a few oddball fences which need special treatment:
+smp_mb__before_atomic(), smp_mb__after_atomic(), and
+smp_mb__after_spinlock(). The LKMM uses fence events with special
+annotations for them; they act as strong fences just like smp_mb()
+except for the sets of events that they order. Instead of ordering
+all po-earlier events against all po-later events, as smp_mb() does,
+they behave as follows:
+
+ smp_mb__before_atomic() orders all po-earlier events against
+ po-later atomic updates and the events following them;
+
+ smp_mb__after_atomic() orders po-earlier atomic updates and
+ the events preceding them against all po-later events;
+
+ smp_mb_after_spinlock() orders po-earlier lock acquisition
+ events and the events preceding them against all po-later
+ events.
+
+The LKMM includes locking. In fact, there is special code for locking
+in the formal model, added in order to make tools run faster.
+However, this special code is intended to be exactly equivalent to
+concepts we have already covered. A spinlock_t variable is treated
+the same as an int, and spin_lock(&s) is treated the same as:
+
+ while (cmpxchg_acquire(&s, 0, 1) != 0)
+ cpu_relax();
+
+which waits until s is equal to 0 and then atomically sets it to 1,
+and where the read part of the atomic update is also an acquire fence.
+An alternate way to express the same thing would be:
+
+ r = xchg_acquire(&s, 1);
+
+along with a requirement that at the end, r = 0. spin_unlock(&s) is
+treated the same as:
+
+ smp_store_release(&s, 0);
+
+Interestingly, RCU and locking each introduce the possibility of
+deadlock. When faced with code sequences such as:
+
+ spin_lock(&s);
+ spin_lock(&s);
+ spin_unlock(&s);
+ spin_unlock(&s);
+
+or:
+
+ rcu_read_lock();
+ synchronize_rcu();
+ rcu_read_unlock();
+
+what does the LKMM have to say? Answer: It says there are no allowed
+executions at all, which makes sense. But this can also lead to
+misleading results, because if a piece of code has multiple possible
+executions, some of which deadlock, the model will report only on the
+non-deadlocking executions. For example:
+
+ int x, y;
+
+ P0()
+ {
+ int r0;
+
+ WRITE_ONCE(x, 1);
+ r0 = READ_ONCE(y);
+ }
+
+ P1()
+ {
+ rcu_read_lock();
+ if (READ_ONCE(x) > 0) {
+ WRITE_ONCE(y, 36);
+ synchronize_rcu();
+ }
+ rcu_read_unlock();
+ }
+
+Is it possible to end up with r0 = 36 at the end? The LKMM will tell
+you it is not, but the model won't mention that this is because P1
+will self-deadlock in the executions where it stores 36 in y.
diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt
new file mode 100644
index 000000000..af72700cc
--- /dev/null
+++ b/tools/memory-model/Documentation/recipes.txt
@@ -0,0 +1,570 @@
+This document provides "recipes", that is, litmus tests for commonly
+occurring situations, as well as a few that illustrate subtly broken but
+attractive nuisances. Many of these recipes include example code from
+v4.13 of the Linux kernel.
+
+The first section covers simple special cases, the second section
+takes off the training wheels to cover more involved examples,
+and the third section provides a few rules of thumb.
+
+
+Simple special cases
+====================
+
+This section presents two simple special cases, the first being where
+there is only one CPU or only one memory location is accessed, and the
+second being use of that old concurrency workhorse, locking.
+
+
+Single CPU or single memory location
+------------------------------------
+
+If there is only one CPU on the one hand or only one variable
+on the other, the code will execute in order. There are (as
+usual) some things to be careful of:
+
+1. Some aspects of the C language are unordered. For example,
+ in the expression "f(x) + g(y)", the order in which f and g are
+ called is not defined; the object code is allowed to use either
+ order or even to interleave the computations.
+
+2. Compilers are permitted to use the "as-if" rule. That is, a
+ compiler can emit whatever code it likes for normal accesses,
+ as long as the results of a single-threaded execution appear
+ just as if the compiler had followed all the relevant rules.
+ To see this, compile with a high level of optimization and run
+ the debugger on the resulting binary.
+
+3. If there is only one variable but multiple CPUs, that variable
+ must be properly aligned and all accesses to that variable must
+ be full sized. Variables that straddle cachelines or pages void
+ your full-ordering warranty, as do undersized accesses that load
+ from or store to only part of the variable.
+
+4. If there are multiple CPUs, accesses to shared variables should
+ use READ_ONCE() and WRITE_ONCE() or stronger to prevent load/store
+ tearing, load/store fusing, and invented loads and stores.
+ There are exceptions to this rule, including:
+
+ i. When there is no possibility of a given shared variable
+ being updated by some other CPU, for example, while
+ holding the update-side lock, reads from that variable
+ need not use READ_ONCE().
+
+ ii. When there is no possibility of a given shared variable
+ being either read or updated by other CPUs, for example,
+ when running during early boot, reads from that variable
+ need not use READ_ONCE() and writes to that variable
+ need not use WRITE_ONCE().
+
+
+Locking
+-------
+
+Locking is well-known and straightforward, at least if you don't think
+about it too hard. And the basic rule is indeed quite simple: Any CPU that
+has acquired a given lock sees any changes previously seen or made by any
+CPU before it released that same lock. Note that this statement is a bit
+stronger than "Any CPU holding a given lock sees all changes made by any
+CPU during the time that CPU was holding this same lock". For example,
+consider the following pair of code fragments:
+
+ /* See MP+polocks.litmus. */
+ void CPU0(void)
+ {
+ WRITE_ONCE(x, 1);
+ spin_lock(&mylock);
+ WRITE_ONCE(y, 1);
+ spin_unlock(&mylock);
+ }
+
+ void CPU1(void)
+ {
+ spin_lock(&mylock);
+ r0 = READ_ONCE(y);
+ spin_unlock(&mylock);
+ r1 = READ_ONCE(x);
+ }
+
+The basic rule guarantees that if CPU0() acquires mylock before CPU1(),
+then both r0 and r1 must be set to the value 1. This also has the
+consequence that if the final value of r0 is equal to 1, then the final
+value of r1 must also be equal to 1. In contrast, the weaker rule would
+say nothing about the final value of r1.
+
+The converse to the basic rule also holds, as illustrated by the
+following litmus test:
+
+ /* See MP+porevlocks.litmus. */
+ void CPU0(void)
+ {
+ r0 = READ_ONCE(y);
+ spin_lock(&mylock);
+ r1 = READ_ONCE(x);
+ spin_unlock(&mylock);
+ }
+
+ void CPU1(void)
+ {
+ spin_lock(&mylock);
+ WRITE_ONCE(x, 1);
+ spin_unlock(&mylock);
+ WRITE_ONCE(y, 1);
+ }
+
+This converse to the basic rule guarantees that if CPU0() acquires
+mylock before CPU1(), then both r0 and r1 must be set to the value 0.
+This also has the consequence that if the final value of r1 is equal
+to 0, then the final value of r0 must also be equal to 0. In contrast,
+the weaker rule would say nothing about the final value of r0.
+
+These examples show only a single pair of CPUs, but the effects of the
+locking basic rule extend across multiple acquisitions of a given lock
+across multiple CPUs.
+
+However, it is not necessarily the case that accesses ordered by
+locking will be seen as ordered by CPUs not holding that lock.
+Consider this example:
+
+ /* See Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus. */
+ void CPU0(void)
+ {
+ spin_lock(&mylock);
+ WRITE_ONCE(x, 1);
+ WRITE_ONCE(y, 1);
+ spin_unlock(&mylock);
+ }
+
+ void CPU1(void)
+ {
+ spin_lock(&mylock);
+ r0 = READ_ONCE(y);
+ WRITE_ONCE(z, 1);
+ spin_unlock(&mylock);
+ }
+
+ void CPU2(void)
+ {
+ WRITE_ONCE(z, 2);
+ smp_mb();
+ r1 = READ_ONCE(x);
+ }
+
+Counter-intuitive though it might be, it is quite possible to have
+the final value of r0 be 1, the final value of z be 2, and the final
+value of r1 be 0. The reason for this surprising outcome is that
+CPU2() never acquired the lock, and thus did not benefit from the
+lock's ordering properties.
+
+Ordering can be extended to CPUs not holding the lock by careful use
+of smp_mb__after_spinlock():
+
+ /* See Z6.0+pooncelock+poonceLock+pombonce.litmus. */
+ void CPU0(void)
+ {
+ spin_lock(&mylock);
+ WRITE_ONCE(x, 1);
+ WRITE_ONCE(y, 1);
+ spin_unlock(&mylock);
+ }
+
+ void CPU1(void)
+ {
+ spin_lock(&mylock);
+ smp_mb__after_spinlock();
+ r0 = READ_ONCE(y);
+ WRITE_ONCE(z, 1);
+ spin_unlock(&mylock);
+ }
+
+ void CPU2(void)
+ {
+ WRITE_ONCE(z, 2);
+ smp_mb();
+ r1 = READ_ONCE(x);
+ }
+
+This addition of smp_mb__after_spinlock() strengthens the lock acquisition
+sufficiently to rule out the counter-intuitive outcome.
+
+
+Taking off the training wheels
+==============================
+
+This section looks at more complex examples, including message passing,
+load buffering, release-acquire chains, store buffering.
+Many classes of litmus tests have abbreviated names, which may be found
+here: https://www.cl.cam.ac.uk/~pes20/ppc-supplemental/test6.pdf
+
+
+Message passing (MP)
+--------------------
+
+The MP pattern has one CPU execute a pair of stores to a pair of variables
+and another CPU execute a pair of loads from this same pair of variables,
+but in the opposite order. The goal is to avoid the counter-intuitive
+outcome in which the first load sees the value written by the second store
+but the second load does not see the value written by the first store.
+In the absence of any ordering, this goal may not be met, as can be seen
+in the MP+poonceonces.litmus litmus test. This section therefore looks at
+a number of ways of meeting this goal.
+
+
+Release and acquire
+~~~~~~~~~~~~~~~~~~~
+
+Use of smp_store_release() and smp_load_acquire() is one way to force
+the desired MP ordering. The general approach is shown below:
+
+ /* See MP+pooncerelease+poacquireonce.litmus. */
+ void CPU0(void)
+ {
+ WRITE_ONCE(x, 1);
+ smp_store_release(&y, 1);
+ }
+
+ void CPU1(void)
+ {
+ r0 = smp_load_acquire(&y);
+ r1 = READ_ONCE(x);
+ }
+
+The smp_store_release() macro orders any prior accesses against the
+store, while the smp_load_acquire macro orders the load against any
+subsequent accesses. Therefore, if the final value of r0 is the value 1,
+the final value of r1 must also be the value 1.
+
+The init_stack_slab() function in lib/stackdepot.c uses release-acquire
+in this way to safely initialize of a slab of the stack. Working out
+the mutual-exclusion design is left as an exercise for the reader.
+
+
+Assign and dereference
+~~~~~~~~~~~~~~~~~~~~~~
+
+Use of rcu_assign_pointer() and rcu_dereference() is quite similar to the
+use of smp_store_release() and smp_load_acquire(), except that both
+rcu_assign_pointer() and rcu_dereference() operate on RCU-protected
+pointers. The general approach is shown below:
+
+ /* See MP+onceassign+derefonce.litmus. */
+ int z;
+ int *y = &z;
+ int x;
+
+ void CPU0(void)
+ {
+ WRITE_ONCE(x, 1);
+ rcu_assign_pointer(y, &x);
+ }
+
+ void CPU1(void)
+ {
+ rcu_read_lock();
+ r0 = rcu_dereference(y);
+ r1 = READ_ONCE(*r0);
+ rcu_read_unlock();
+ }
+
+In this example, if the final value of r0 is &x then the final value of
+r1 must be 1.
+
+The rcu_assign_pointer() macro has the same ordering properties as does
+smp_store_release(), but the rcu_dereference() macro orders the load only
+against later accesses that depend on the value loaded. A dependency
+is present if the value loaded determines the address of a later access
+(address dependency, as shown above), the value written by a later store
+(data dependency), or whether or not a later store is executed in the
+first place (control dependency). Note that the term "data dependency"
+is sometimes casually used to cover both address and data dependencies.
+
+In lib/prime_numbers.c, the expand_to_next_prime() function invokes
+rcu_assign_pointer(), and the next_prime_number() function invokes
+rcu_dereference(). This combination mediates access to a bit vector
+that is expanded as additional primes are needed.
+
+
+Write and read memory barriers
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+It is usually better to use smp_store_release() instead of smp_wmb()
+and to use smp_load_acquire() instead of smp_rmb(). However, the older
+smp_wmb() and smp_rmb() APIs are still heavily used, so it is important
+to understand their use cases. The general approach is shown below:
+
+ /* See MP+fencewmbonceonce+fencermbonceonce.litmus. */
+ void CPU0(void)
+ {
+ WRITE_ONCE(x, 1);
+ smp_wmb();
+ WRITE_ONCE(y, 1);
+ }
+
+ void CPU1(void)
+ {
+ r0 = READ_ONCE(y);
+ smp_rmb();
+ r1 = READ_ONCE(x);
+ }
+
+The smp_wmb() macro orders prior stores against later stores, and the
+smp_rmb() macro orders prior loads against later loads. Therefore, if
+the final value of r0 is 1, the final value of r1 must also be 1.
+
+The the xlog_state_switch_iclogs() function in fs/xfs/xfs_log.c contains
+the following write-side code fragment:
+
+ log->l_curr_block -= log->l_logBBsize;
+ ASSERT(log->l_curr_block >= 0);
+ smp_wmb();
+ log->l_curr_cycle++;
+
+And the xlog_valid_lsn() function in fs/xfs/xfs_log_priv.h contains
+the corresponding read-side code fragment:
+
+ cur_cycle = READ_ONCE(log->l_curr_cycle);
+ smp_rmb();
+ cur_block = READ_ONCE(log->l_curr_block);
+
+Alternatively, consider the following comment in function
+perf_output_put_handle() in kernel/events/ring_buffer.c:
+
+ * kernel user
+ *
+ * if (LOAD ->data_tail) { LOAD ->data_head
+ * (A) smp_rmb() (C)
+ * STORE $data LOAD $data
+ * smp_wmb() (B) smp_mb() (D)
+ * STORE ->data_head STORE ->data_tail
+ * }
+
+The B/C pairing is an example of the MP pattern using smp_wmb() on the
+write side and smp_rmb() on the read side.
+
+Of course, given that smp_mb() is strictly stronger than either smp_wmb()
+or smp_rmb(), any code fragment that would work with smp_rmb() and
+smp_wmb() would also work with smp_mb() replacing either or both of the
+weaker barriers.
+
+
+Load buffering (LB)
+-------------------
+
+The LB pattern has one CPU load from one variable and then store to a
+second, while another CPU loads from the second variable and then stores
+to the first. The goal is to avoid the counter-intuitive situation where
+each load reads the value written by the other CPU's store. In the
+absence of any ordering it is quite possible that this may happen, as
+can be seen in the LB+poonceonces.litmus litmus test.
+
+One way of avoiding the counter-intuitive outcome is through the use of a
+control dependency paired with a full memory barrier:
+
+ /* See LB+fencembonceonce+ctrlonceonce.litmus. */
+ void CPU0(void)
+ {
+ r0 = READ_ONCE(x);
+ if (r0)
+ WRITE_ONCE(y, 1);
+ }
+
+ void CPU1(void)
+ {
+ r1 = READ_ONCE(y);
+ smp_mb();
+ WRITE_ONCE(x, 1);
+ }
+
+This pairing of a control dependency in CPU0() with a full memory
+barrier in CPU1() prevents r0 and r1 from both ending up equal to 1.
+
+The A/D pairing from the ring-buffer use case shown earlier also
+illustrates LB. Here is a repeat of the comment in
+perf_output_put_handle() in kernel/events/ring_buffer.c, showing a
+control dependency on the kernel side and a full memory barrier on
+the user side:
+
+ * kernel user
+ *
+ * if (LOAD ->data_tail) { LOAD ->data_head
+ * (A) smp_rmb() (C)
+ * STORE $data LOAD $data
+ * smp_wmb() (B) smp_mb() (D)
+ * STORE ->data_head STORE ->data_tail
+ * }
+ *
+ * Where A pairs with D, and B pairs with C.
+
+The kernel's control dependency between the load from ->data_tail
+and the store to data combined with the user's full memory barrier
+between the load from data and the store to ->data_tail prevents
+the counter-intuitive outcome where the kernel overwrites the data
+before the user gets done loading it.
+
+
+Release-acquire chains
+----------------------
+
+Release-acquire chains are a low-overhead, flexible, and easy-to-use
+method of maintaining order. However, they do have some limitations that
+need to be fully understood. Here is an example that maintains order:
+
+ /* See ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus. */
+ void CPU0(void)
+ {
+ WRITE_ONCE(x, 1);
+ smp_store_release(&y, 1);
+ }
+
+ void CPU1(void)
+ {
+ r0 = smp_load_acquire(y);
+ smp_store_release(&z, 1);
+ }
+
+ void CPU2(void)
+ {
+ r1 = smp_load_acquire(z);
+ r2 = READ_ONCE(x);
+ }
+
+In this case, if r0 and r1 both have final values of 1, then r2 must
+also have a final value of 1.
+
+The ordering in this example is stronger than it needs to be. For
+example, ordering would still be preserved if CPU1()'s smp_load_acquire()
+invocation was replaced with READ_ONCE().
+
+It is tempting to assume that CPU0()'s store to x is globally ordered
+before CPU1()'s store to z, but this is not the case:
+
+ /* See Z6.0+pooncerelease+poacquirerelease+mbonceonce.litmus. */
+ void CPU0(void)
+ {
+ WRITE_ONCE(x, 1);
+ smp_store_release(&y, 1);
+ }
+
+ void CPU1(void)
+ {
+ r0 = smp_load_acquire(y);
+ smp_store_release(&z, 1);
+ }
+
+ void CPU2(void)
+ {
+ WRITE_ONCE(z, 2);
+ smp_mb();
+ r1 = READ_ONCE(x);
+ }
+
+One might hope that if the final value of r0 is 1 and the final value
+of z is 2, then the final value of r1 must also be 1, but it really is
+possible for r1 to have the final value of 0. The reason, of course,
+is that in this version, CPU2() is not part of the release-acquire chain.
+This situation is accounted for in the rules of thumb below.
+
+Despite this limitation, release-acquire chains are low-overhead as
+well as simple and powerful, at least as memory-ordering mechanisms go.
+
+
+Store buffering
+---------------
+
+Store buffering can be thought of as upside-down load buffering, so
+that one CPU first stores to one variable and then loads from a second,
+while another CPU stores to the second variable and then loads from the
+first. Preserving order requires nothing less than full barriers:
+
+ /* See SB+fencembonceonces.litmus. */
+ void CPU0(void)
+ {
+ WRITE_ONCE(x, 1);
+ smp_mb();
+ r0 = READ_ONCE(y);
+ }
+
+ void CPU1(void)
+ {
+ WRITE_ONCE(y, 1);
+ smp_mb();
+ r1 = READ_ONCE(x);
+ }
+
+Omitting either smp_mb() will allow both r0 and r1 to have final
+values of 0, but providing both full barriers as shown above prevents
+this counter-intuitive outcome.
+
+This pattern most famously appears as part of Dekker's locking
+algorithm, but it has a much more practical use within the Linux kernel
+of ordering wakeups. The following comment taken from waitqueue_active()
+in include/linux/wait.h shows the canonical pattern:
+
+ * CPU0 - waker CPU1 - waiter
+ *
+ * for (;;) {
+ * @cond = true; prepare_to_wait(&wq_head, &wait, state);
+ * smp_mb(); // smp_mb() from set_current_state()
+ * if (waitqueue_active(wq_head)) if (@cond)
+ * wake_up(wq_head); break;
+ * schedule();
+ * }
+ * finish_wait(&wq_head, &wait);
+
+On CPU0, the store is to @cond and the load is in waitqueue_active().
+On CPU1, prepare_to_wait() contains both a store to wq_head and a call
+to set_current_state(), which contains an smp_mb() barrier; the load is
+"if (@cond)". The full barriers prevent the undesirable outcome where
+CPU1 puts the waiting task to sleep and CPU0 fails to wake it up.
+
+Note that use of locking can greatly simplify this pattern.
+
+
+Rules of thumb
+==============
+
+There might seem to be no pattern governing what ordering primitives are
+needed in which situations, but this is not the case. There is a pattern
+based on the relation between the accesses linking successive CPUs in a
+given litmus test. There are three types of linkage:
+
+1. Write-to-read, where the next CPU reads the value that the
+ previous CPU wrote. The LB litmus-test patterns contain only
+ this type of relation. In formal memory-modeling texts, this
+ relation is called "reads-from" and is usually abbreviated "rf".
+
+2. Read-to-write, where the next CPU overwrites the value that the
+ previous CPU read. The SB litmus test contains only this type
+ of relation. In formal memory-modeling texts, this relation is
+ often called "from-reads" and is sometimes abbreviated "fr".
+
+3. Write-to-write, where the next CPU overwrites the value written
+ by the previous CPU. The Z6.0 litmus test pattern contains a
+ write-to-write relation between the last access of CPU1() and
+ the first access of CPU2(). In formal memory-modeling texts,
+ this relation is often called "coherence order" and is sometimes
+ abbreviated "co". In the C++ standard, it is instead called
+ "modification order" and often abbreviated "mo".
+
+The strength of memory ordering required for a given litmus test to
+avoid a counter-intuitive outcome depends on the types of relations
+linking the memory accesses for the outcome in question:
+
+o If all links are write-to-read links, then the weakest
+ possible ordering within each CPU suffices. For example, in
+ the LB litmus test, a control dependency was enough to do the
+ job.
+
+o If all but one of the links are write-to-read links, then a
+ release-acquire chain suffices. Both the MP and the ISA2
+ litmus tests illustrate this case.
+
+o If more than one of the links are something other than
+ write-to-read links, then a full memory barrier is required
+ between each successive pair of non-write-to-read links. This
+ case is illustrated by the Z6.0 litmus tests, both in the
+ locking and in the release-acquire sections.
+
+However, if you find yourself having to stretch these rules of thumb
+to fit your situation, you should consider creating a litmus test and
+running it on the model.
diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt
new file mode 100644
index 000000000..b177f3e4a
--- /dev/null
+++ b/tools/memory-model/Documentation/references.txt
@@ -0,0 +1,114 @@
+This document provides background reading for memory models and related
+tools. These documents are aimed at kernel hackers who are interested
+in memory models.
+
+
+Hardware manuals and models
+===========================
+
+o SPARC International Inc. (Ed.). 1994. "The SPARC Architecture
+ Reference Manual Version 9". SPARC International Inc.
+
+o Compaq Computer Corporation (Ed.). 2002. "Alpha Architecture
+ Reference Manual". Compaq Computer Corporation.
+
+o Intel Corporation (Ed.). 2002. "A Formal Specification of Intel
+ Itanium Processor Family Memory Ordering". Intel Corporation.
+
+o Intel Corporation (Ed.). 2002. "Intel 64 and IA-32 Architectures
+ Software Developer’s Manual". Intel Corporation.
+
+o Peter Sewell, Susmit Sarkar, Scott Owens, Francesco Zappa Nardelli,
+ and Magnus O. Myreen. 2010. "x86-TSO: A Rigorous and Usable
+ Programmer's Model for x86 Multiprocessors". Commun. ACM 53, 7
+ (July, 2010), 89-97. http://doi.acm.org/10.1145/1785414.1785443
+
+o IBM Corporation (Ed.). 2009. "Power ISA Version 2.06". IBM
+ Corporation.
+
+o ARM Ltd. (Ed.). 2009. "ARM Barrier Litmus Tests and Cookbook".
+ ARM Ltd.
+
+o Susmit Sarkar, Peter Sewell, Jade Alglave, Luc Maranget, and
+ Derek Williams. 2011. "Understanding POWER Multiprocessors". In
+ Proceedings of the 32Nd ACM SIGPLAN Conference on Programming
+ Language Design and Implementation (PLDI ’11). ACM, New York,
+ NY, USA, 175–186.
+
+o Susmit Sarkar, Kayvan Memarian, Scott Owens, Mark Batty,
+ Peter Sewell, Luc Maranget, Jade Alglave, and Derek Williams.
+ 2012. "Synchronising C/C++ and POWER". In Proceedings of the 33rd
+ ACM SIGPLAN Conference on Programming Language Design and
+ Implementation (PLDI '12). ACM, New York, NY, USA, 311-322.
+
+o ARM Ltd. (Ed.). 2014. "ARM Architecture Reference Manual (ARMv8,
+ for ARMv8-A architecture profile)". ARM Ltd.
+
+o Imagination Technologies, LTD. 2015. "MIPS(R) Architecture
+ For Programmers, Volume II-A: The MIPS64(R) Instruction,
+ Set Reference Manual". Imagination Technologies,
+ LTD. https://imgtec.com/?do-download=4302.
+
+o Shaked Flur, Kathryn E. Gray, Christopher Pulte, Susmit
+ Sarkar, Ali Sezgin, Luc Maranget, Will Deacon, and Peter
+ Sewell. 2016. "Modelling the ARMv8 Architecture, Operationally:
+ Concurrency and ISA". In Proceedings of the 43rd Annual ACM
+ SIGPLAN-SIGACT Symposium on Principles of Programming Languages
+ (POPL ’16). ACM, New York, NY, USA, 608–621.
+
+o Shaked Flur, Susmit Sarkar, Christopher Pulte, Kyndylan Nienhuis,
+ Luc Maranget, Kathryn E. Gray, Ali Sezgin, Mark Batty, and Peter
+ Sewell. 2017. "Mixed-size Concurrency: ARM, POWER, C/C++11,
+ and SC". In Proceedings of the 44th ACM SIGPLAN Symposium on
+ Principles of Programming Languages (POPL 2017). ACM, New York,
+ NY, USA, 429–442.
+
+o Christopher Pulte, Shaked Flur, Will Deacon, Jon French,
+ Susmit Sarkar, and Peter Sewell. 2018. "Simplifying ARM concurrency:
+ multicopy-atomic axiomatic and operational models for ARMv8". In
+ Proceedings of the ACM on Programming Languages, Volume 2, Issue
+ POPL, Article No. 19. ACM, New York, NY, USA.
+
+
+Linux-kernel memory model
+=========================
+
+o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
+ Alan Stern. 2018. "Frightening small children and disconcerting
+ grown-ups: Concurrency in the Linux kernel". In Proceedings of
+ the 23rd International Conference on Architectural Support for
+ Programming Languages and Operating Systems (ASPLOS 2018). ACM,
+ New York, NY, USA, 405-418. Webpage: http://diy.inria.fr/linux/.
+
+o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
+ Alan Stern. 2017. "A formal kernel memory-ordering model (part 1)"
+ Linux Weekly News. https://lwn.net/Articles/718628/
+
+o Jade Alglave, Luc Maranget, Paul E. McKenney, Andrea Parri, and
+ Alan Stern. 2017. "A formal kernel memory-ordering model (part 2)"
+ Linux Weekly News. https://lwn.net/Articles/720550/
+
+
+Memory-model tooling
+====================
+
+o Daniel Jackson. 2002. "Alloy: A Lightweight Object Modelling
+ Notation". ACM Trans. Softw. Eng. Methodol. 11, 2 (April 2002),
+ 256–290. http://doi.acm.org/10.1145/505145.505149
+
+o Jade Alglave, Luc Maranget, and Michael Tautschnig. 2014. "Herding
+ Cats: Modelling, Simulation, Testing, and Data Mining for Weak
+ Memory". ACM Trans. Program. Lang. Syst. 36, 2, Article 7 (July
+ 2014), 7:1–7:74 pages.
+
+o Jade Alglave, Patrick Cousot, and Luc Maranget. 2016. "Syntax and
+ semantics of the weak consistency model specification language
+ cat". CoRR abs/1608.07531 (2016). http://arxiv.org/abs/1608.07531
+
+
+Memory-model comparisons
+========================
+
+o Paul E. McKenney, Ulrich Weigand, Andrea Parri, and Boqun
+ Feng. 2016. "Linux-Kernel Memory Model". (6 June 2016).
+ http://open-std.org/JTC1/SC22/WG21/docs/papers/2016/p0124r2.html.
diff --git a/tools/memory-model/README b/tools/memory-model/README
new file mode 100644
index 000000000..ee987ce20
--- /dev/null
+++ b/tools/memory-model/README
@@ -0,0 +1,206 @@
+ =====================================
+ LINUX KERNEL MEMORY CONSISTENCY MODEL
+ =====================================
+
+============
+INTRODUCTION
+============
+
+This directory contains the memory consistency model (memory model, for
+short) of the Linux kernel, written in the "cat" language and executable
+by the externally provided "herd7" simulator, which exhaustively explores
+the state space of small litmus tests.
+
+In addition, the "klitmus7" tool (also externally provided) may be used
+to convert a litmus test to a Linux kernel module, which in turn allows
+that litmus test to be exercised within the Linux kernel.
+
+
+============
+REQUIREMENTS
+============
+
+Version 7.49 of the "herd7" and "klitmus7" tools must be downloaded
+separately:
+
+ https://github.com/herd/herdtools7
+
+See "herdtools7/INSTALL.md" for installation instructions.
+
+
+==================
+BASIC USAGE: HERD7
+==================
+
+The memory model is used, in conjunction with "herd7", to exhaustively
+explore the state space of small litmus tests.
+
+For example, to run SB+fencembonceonces.litmus against the memory model:
+
+ $ herd7 -conf linux-kernel.cfg litmus-tests/SB+fencembonceonces.litmus
+
+Here is the corresponding output:
+
+ Test SB+fencembonceonces Allowed
+ States 3
+ 0:r0=0; 1:r0=1;
+ 0:r0=1; 1:r0=0;
+ 0:r0=1; 1:r0=1;
+ No
+ Witnesses
+ Positive: 0 Negative: 3
+ Condition exists (0:r0=0 /\ 1:r0=0)
+ Observation SB+fencembonceonces Never 0 3
+ Time SB+fencembonceonces 0.01
+ Hash=d66d99523e2cac6b06e66f4c995ebb48
+
+The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that
+this litmus test's "exists" clause can not be satisfied.
+
+See "herd7 -help" or "herdtools7/doc/" for more information.
+
+
+=====================
+BASIC USAGE: KLITMUS7
+=====================
+
+The "klitmus7" tool converts a litmus test into a Linux kernel module,
+which may then be loaded and run.
+
+For example, to run SB+fencembonceonces.litmus against hardware:
+
+ $ mkdir mymodules
+ $ klitmus7 -o mymodules litmus-tests/SB+fencembonceonces.litmus
+ $ cd mymodules ; make
+ $ sudo sh run.sh
+
+The corresponding output includes:
+
+ Test SB+fencembonceonces Allowed
+ Histogram (3 states)
+ 644580 :>0:r0=1; 1:r0=0;
+ 644328 :>0:r0=0; 1:r0=1;
+ 711092 :>0:r0=1; 1:r0=1;
+ No
+ Witnesses
+ Positive: 0, Negative: 2000000
+ Condition exists (0:r0=0 /\ 1:r0=0) is NOT validated
+ Hash=d66d99523e2cac6b06e66f4c995ebb48
+ Observation SB+fencembonceonces Never 0 2000000
+ Time SB+fencembonceonces 0.16
+
+The "Positive: 0 Negative: 2000000" and the "Never 0 2000000" indicate
+that during two million trials, the state specified in this litmus
+test's "exists" clause was not reached.
+
+And, as with "herd7", please see "klitmus7 -help" or "herdtools7/doc/"
+for more information.
+
+
+====================
+DESCRIPTION OF FILES
+====================
+
+Documentation/cheatsheet.txt
+ Quick-reference guide to the Linux-kernel memory model.
+
+Documentation/explanation.txt
+ Describes the memory model in detail.
+
+Documentation/recipes.txt
+ Lists common memory-ordering patterns.
+
+Documentation/references.txt
+ Provides background reading.
+
+linux-kernel.bell
+ Categorizes the relevant instructions, including memory
+ references, memory barriers, atomic read-modify-write operations,
+ lock acquisition/release, and RCU operations.
+
+ More formally, this file (1) lists the subtypes of the various
+ event types used by the memory model and (2) performs RCU
+ read-side critical section nesting analysis.
+
+linux-kernel.cat
+ Specifies what reorderings are forbidden by memory references,
+ memory barriers, atomic read-modify-write operations, and RCU.
+
+ More formally, this file specifies what executions are forbidden
+ by the memory model. Allowed executions are those which
+ satisfy the model's "coherence", "atomic", "happens-before",
+ "propagation", and "rcu" axioms, which are defined in the file.
+
+linux-kernel.cfg
+ Convenience file that gathers the common-case herd7 command-line
+ arguments.
+
+linux-kernel.def
+ Maps from C-like syntax to herd7's internal litmus-test
+ instruction-set architecture.
+
+litmus-tests
+ Directory containing a few representative litmus tests, which
+ are listed in litmus-tests/README. A great deal more litmus
+ tests are available at https://github.com/paulmckrcu/litmus.
+
+lock.cat
+ Provides a front-end analysis of lock acquisition and release,
+ for example, associating a lock acquisition with the preceding
+ and following releases and checking for self-deadlock.
+
+ More formally, this file defines a performance-enhanced scheme
+ for generation of the possible reads-from and coherence order
+ relations on the locking primitives.
+
+README
+ This file.
+
+
+===========
+LIMITATIONS
+===========
+
+The Linux-kernel memory model has the following limitations:
+
+1. Compiler optimizations are not modeled. Of course, the use
+ of READ_ONCE() and WRITE_ONCE() limits the compiler's ability
+ to optimize, but there is Linux-kernel code that uses bare C
+ memory accesses. Handling this code is on the to-do list.
+ For more information, see Documentation/explanation.txt (in
+ particular, the "THE PROGRAM ORDER RELATION: po AND po-loc"
+ and "A WARNING" sections).
+
+2. Multiple access sizes for a single variable are not supported,
+ and neither are misaligned or partially overlapping accesses.
+
+3. Exceptions and interrupts are not modeled. In some cases,
+ this limitation can be overcome by modeling the interrupt or
+ exception with an additional process.
+
+4. I/O such as MMIO or DMA is not supported.
+
+5. Self-modifying code (such as that found in the kernel's
+ alternatives mechanism, function tracer, Berkeley Packet Filter
+ JIT compiler, and module loader) is not supported.
+
+6. Complete modeling of all variants of atomic read-modify-write
+ operations, locking primitives, and RCU is not provided.
+ For example, call_rcu() and rcu_barrier() are not supported.
+ However, a substantial amount of support is provided for these
+ operations, as shown in the linux-kernel.def file.
+
+The "herd7" tool has some additional limitations of its own, apart from
+the memory model:
+
+1. Non-trivial data structures such as arrays or structures are
+ not supported. However, pointers are supported, allowing trivial
+ linked lists to be constructed.
+
+2. Dynamic memory allocation is not supported, although this can
+ be worked around in some cases by supplying multiple statically
+ allocated variables.
+
+Some of these limitations may be overcome in the future, but others are
+more likely to be addressed by incorporating the Linux-kernel memory model
+into other tools.
diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell
new file mode 100644
index 000000000..b84fb2f67
--- /dev/null
+++ b/tools/memory-model/linux-kernel.bell
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0+
+(*
+ * Copyright (C) 2015 Jade Alglave <j.alglave@ucl.ac.uk>,
+ * Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
+ * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
+ * Andrea Parri <parri.andrea@gmail.com>
+ *
+ * An earlier version of this file appeared in the companion webpage for
+ * "Frightening small children and disconcerting grown-ups: Concurrency
+ * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
+ * which appeared in ASPLOS 2018.
+ *)
+
+"Linux-kernel memory consistency model"
+
+enum Accesses = 'once (*READ_ONCE,WRITE_ONCE*) ||
+ 'release (*smp_store_release*) ||
+ 'acquire (*smp_load_acquire*) ||
+ 'noreturn (* R of non-return RMW *)
+instructions R[{'once,'acquire,'noreturn}]
+instructions W[{'once,'release}]
+instructions RMW[{'once,'acquire,'release}]
+
+enum Barriers = 'wmb (*smp_wmb*) ||
+ 'rmb (*smp_rmb*) ||
+ 'mb (*smp_mb*) ||
+ 'rcu-lock (*rcu_read_lock*) ||
+ 'rcu-unlock (*rcu_read_unlock*) ||
+ 'sync-rcu (*synchronize_rcu*) ||
+ 'before-atomic (*smp_mb__before_atomic*) ||
+ 'after-atomic (*smp_mb__after_atomic*) ||
+ 'after-spinlock (*smp_mb__after_spinlock*)
+instructions F[Barriers]
+
+(* Compute matching pairs of nested Rcu-lock and Rcu-unlock *)
+let matched = let rec
+ unmatched-locks = Rcu-lock \ domain(matched)
+ and unmatched-unlocks = Rcu-unlock \ range(matched)
+ and unmatched = unmatched-locks | unmatched-unlocks
+ and unmatched-po = [unmatched] ; po ; [unmatched]
+ and unmatched-locks-to-unlocks =
+ [unmatched-locks] ; po ; [unmatched-unlocks]
+ and matched = matched | (unmatched-locks-to-unlocks \
+ (unmatched-po ; unmatched-po))
+ in matched
+
+(* Validate nesting *)
+flag ~empty Rcu-lock \ domain(matched) as unbalanced-rcu-locking
+flag ~empty Rcu-unlock \ range(matched) as unbalanced-rcu-locking
+
+(* Outermost level of nesting only *)
+let crit = matched \ (po^-1 ; matched ; po^-1)
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
new file mode 100644
index 000000000..59b5cbe6b
--- /dev/null
+++ b/tools/memory-model/linux-kernel.cat
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0+
+(*
+ * Copyright (C) 2015 Jade Alglave <j.alglave@ucl.ac.uk>,
+ * Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
+ * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>,
+ * Andrea Parri <parri.andrea@gmail.com>
+ *
+ * An earlier version of this file appeared in the companion webpage for
+ * "Frightening small children and disconcerting grown-ups: Concurrency
+ * in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
+ * which appeared in ASPLOS 2018.
+ *)
+
+"Linux-kernel memory consistency model"
+
+(*
+ * File "lock.cat" handles locks and is experimental.
+ * It can be replaced by include "cos.cat" for tests that do not use locks.
+ *)
+
+include "lock.cat"
+
+(*******************)
+(* Basic relations *)
+(*******************)
+
+(* Fences *)
+let rmb = [R \ Noreturn] ; fencerel(Rmb) ; [R \ Noreturn]
+let wmb = [W] ; fencerel(Wmb) ; [W]
+let mb = ([M] ; fencerel(Mb) ; [M]) |
+ ([M] ; fencerel(Before-atomic) ; [RMW] ; po? ; [M]) |
+ ([M] ; po? ; [RMW] ; fencerel(After-atomic) ; [M]) |
+ ([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M])
+let gp = po ; [Sync-rcu] ; po?
+
+let strong-fence = mb | gp
+
+(* Release Acquire *)
+let acq-po = [Acquire] ; po ; [M]
+let po-rel = [M] ; po ; [Release]
+let rfi-rel-acq = [Release] ; rfi ; [Acquire]
+
+(**********************************)
+(* Fundamental coherence ordering *)
+(**********************************)
+
+(* Sequential Consistency Per Variable *)
+let com = rf | co | fr
+acyclic po-loc | com as coherence
+
+(* Atomic Read-Modify-Write *)
+empty rmw & (fre ; coe) as atomic
+
+(**********************************)
+(* Instruction execution ordering *)
+(**********************************)
+
+(* Preserved Program Order *)
+let dep = addr | data
+let rwdep = (dep | ctrl) ; [W]
+let overwrite = co | fr
+let to-w = rwdep | (overwrite & int)
+let to-r = addr | (dep ; rfi) | rfi-rel-acq
+let fence = strong-fence | wmb | po-rel | rmb | acq-po
+let ppo = to-r | to-w | fence
+
+(* Propagation: Ordering from release operations and strong fences. *)
+let A-cumul(r) = rfe? ; r
+let cumul-fence = A-cumul(strong-fence | po-rel) | wmb
+let prop = (overwrite & ext)? ; cumul-fence* ; rfe?
+
+(*
+ * Happens Before: Ordering from the passage of time.
+ * No fences needed here for prop because relation confined to one process.
+ *)
+let hb = ppo | rfe | ((prop \ id) & int)
+acyclic hb as happens-before
+
+(****************************************)
+(* Write and fence propagation ordering *)
+(****************************************)
+
+(* Propagation: Each non-rf link needs a strong fence. *)
+let pb = prop ; strong-fence ; hb*
+acyclic pb as propagation
+
+(*******)
+(* RCU *)
+(*******)
+
+(*
+ * Effect of read-side critical section proceeds from the rcu_read_lock()
+ * onward on the one hand and from the rcu_read_unlock() backwards on the
+ * other hand.
+ *)
+let rscs = po ; crit^-1 ; po?
+
+(*
+ * The synchronize_rcu() strong fence is special in that it can order not
+ * one but two non-rf relations, but only in conjunction with an RCU
+ * read-side critical section.
+ *)
+let rcu-link = hb* ; pb* ; prop
+
+(*
+ * Any sequence containing at least as many grace periods as RCU read-side
+ * critical sections (joined by rcu-link) acts as a generalized strong fence.
+ *)
+let rec rcu-fence = gp |
+ (gp ; rcu-link ; rscs) |
+ (rscs ; rcu-link ; gp) |
+ (gp ; rcu-link ; rcu-fence ; rcu-link ; rscs) |
+ (rscs ; rcu-link ; rcu-fence ; rcu-link ; gp) |
+ (rcu-fence ; rcu-link ; rcu-fence)
+
+(* rb orders instructions just as pb does *)
+let rb = prop ; rcu-fence ; hb* ; pb*
+
+irreflexive rb as rcu
+
+(*
+ * The happens-before, propagation, and rcu constraints are all
+ * expressions of temporal ordering. They could be replaced by
+ * a single constraint on an "executes-before" relation, xb:
+ *
+ * let xb = hb | pb | rb
+ * acyclic xb as executes-before
+ *)
diff --git a/tools/memory-model/linux-kernel.cfg b/tools/memory-model/linux-kernel.cfg
new file mode 100644
index 000000000..3c8098e99
--- /dev/null
+++ b/tools/memory-model/linux-kernel.cfg
@@ -0,0 +1,21 @@
+macros linux-kernel.def
+bell linux-kernel.bell
+model linux-kernel.cat
+graph columns
+squished true
+showevents noregs
+movelabel true
+fontsize 8
+xscale 2.0
+yscale 1.5
+arrowsize 0.8
+showinitrf false
+showfinalrf false
+showinitwrites false
+splines spline
+pad 0.1
+edgeattr hb,color,indigo
+edgeattr co,color,blue
+edgeattr mb,color,darkgreen
+edgeattr wmb,color,darkgreen
+edgeattr rmb,color,darkgreen
diff --git a/tools/memory-model/linux-kernel.def b/tools/memory-model/linux-kernel.def
new file mode 100644
index 000000000..6fa3eb28d
--- /dev/null
+++ b/tools/memory-model/linux-kernel.def
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// An earlier version of this file appeared in the companion webpage for
+// "Frightening small children and disconcerting grown-ups: Concurrency
+// in the Linux kernel" by Alglave, Maranget, McKenney, Parri, and Stern,
+// which appeared in ASPLOS 2018.
+
+// ONCE
+READ_ONCE(X) __load{once}(X)
+WRITE_ONCE(X,V) { __store{once}(X,V); }
+
+// Release Acquire and friends
+smp_store_release(X,V) { __store{release}(*X,V); }
+smp_load_acquire(X) __load{acquire}(*X)
+rcu_assign_pointer(X,V) { __store{release}(X,V); }
+rcu_dereference(X) __load{once}(X)
+smp_store_mb(X,V) { __store{once}(X,V); __fence{mb}; }
+
+// Fences
+smp_mb() { __fence{mb}; }
+smp_rmb() { __fence{rmb}; }
+smp_wmb() { __fence{wmb}; }
+smp_mb__before_atomic() { __fence{before-atomic}; }
+smp_mb__after_atomic() { __fence{after-atomic}; }
+smp_mb__after_spinlock() { __fence{after-spinlock}; }
+
+// Exchange
+xchg(X,V) __xchg{mb}(X,V)
+xchg_relaxed(X,V) __xchg{once}(X,V)
+xchg_release(X,V) __xchg{release}(X,V)
+xchg_acquire(X,V) __xchg{acquire}(X,V)
+cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W)
+cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W)
+cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
+cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
+
+// Spinlocks
+spin_lock(X) { __lock(X); }
+spin_unlock(X) { __unlock(X); }
+spin_trylock(X) __trylock(X)
+spin_is_locked(X) __islocked(X)
+
+// RCU
+rcu_read_lock() { __fence{rcu-lock}; }
+rcu_read_unlock() { __fence{rcu-unlock}; }
+synchronize_rcu() { __fence{sync-rcu}; }
+synchronize_rcu_expedited() { __fence{sync-rcu}; }
+
+// Atomic
+atomic_read(X) READ_ONCE(*X)
+atomic_set(X,V) { WRITE_ONCE(*X,V); }
+atomic_read_acquire(X) smp_load_acquire(X)
+atomic_set_release(X,V) { smp_store_release(X,V); }
+
+atomic_add(V,X) { __atomic_op(X,+,V); }
+atomic_sub(V,X) { __atomic_op(X,-,V); }
+atomic_inc(X) { __atomic_op(X,+,1); }
+atomic_dec(X) { __atomic_op(X,-,1); }
+
+atomic_add_return(V,X) __atomic_op_return{mb}(X,+,V)
+atomic_add_return_relaxed(V,X) __atomic_op_return{once}(X,+,V)
+atomic_add_return_acquire(V,X) __atomic_op_return{acquire}(X,+,V)
+atomic_add_return_release(V,X) __atomic_op_return{release}(X,+,V)
+atomic_fetch_add(V,X) __atomic_fetch_op{mb}(X,+,V)
+atomic_fetch_add_relaxed(V,X) __atomic_fetch_op{once}(X,+,V)
+atomic_fetch_add_acquire(V,X) __atomic_fetch_op{acquire}(X,+,V)
+atomic_fetch_add_release(V,X) __atomic_fetch_op{release}(X,+,V)
+
+atomic_inc_return(X) __atomic_op_return{mb}(X,+,1)
+atomic_inc_return_relaxed(X) __atomic_op_return{once}(X,+,1)
+atomic_inc_return_acquire(X) __atomic_op_return{acquire}(X,+,1)
+atomic_inc_return_release(X) __atomic_op_return{release}(X,+,1)
+atomic_fetch_inc(X) __atomic_fetch_op{mb}(X,+,1)
+atomic_fetch_inc_relaxed(X) __atomic_fetch_op{once}(X,+,1)
+atomic_fetch_inc_acquire(X) __atomic_fetch_op{acquire}(X,+,1)
+atomic_fetch_inc_release(X) __atomic_fetch_op{release}(X,+,1)
+
+atomic_sub_return(V,X) __atomic_op_return{mb}(X,-,V)
+atomic_sub_return_relaxed(V,X) __atomic_op_return{once}(X,-,V)
+atomic_sub_return_acquire(V,X) __atomic_op_return{acquire}(X,-,V)
+atomic_sub_return_release(V,X) __atomic_op_return{release}(X,-,V)
+atomic_fetch_sub(V,X) __atomic_fetch_op{mb}(X,-,V)
+atomic_fetch_sub_relaxed(V,X) __atomic_fetch_op{once}(X,-,V)
+atomic_fetch_sub_acquire(V,X) __atomic_fetch_op{acquire}(X,-,V)
+atomic_fetch_sub_release(V,X) __atomic_fetch_op{release}(X,-,V)
+
+atomic_dec_return(X) __atomic_op_return{mb}(X,-,1)
+atomic_dec_return_relaxed(X) __atomic_op_return{once}(X,-,1)
+atomic_dec_return_acquire(X) __atomic_op_return{acquire}(X,-,1)
+atomic_dec_return_release(X) __atomic_op_return{release}(X,-,1)
+atomic_fetch_dec(X) __atomic_fetch_op{mb}(X,-,1)
+atomic_fetch_dec_relaxed(X) __atomic_fetch_op{once}(X,-,1)
+atomic_fetch_dec_acquire(X) __atomic_fetch_op{acquire}(X,-,1)
+atomic_fetch_dec_release(X) __atomic_fetch_op{release}(X,-,1)
+
+atomic_xchg(X,V) __xchg{mb}(X,V)
+atomic_xchg_relaxed(X,V) __xchg{once}(X,V)
+atomic_xchg_release(X,V) __xchg{release}(X,V)
+atomic_xchg_acquire(X,V) __xchg{acquire}(X,V)
+atomic_cmpxchg(X,V,W) __cmpxchg{mb}(X,V,W)
+atomic_cmpxchg_relaxed(X,V,W) __cmpxchg{once}(X,V,W)
+atomic_cmpxchg_acquire(X,V,W) __cmpxchg{acquire}(X,V,W)
+atomic_cmpxchg_release(X,V,W) __cmpxchg{release}(X,V,W)
+
+atomic_sub_and_test(V,X) __atomic_op_return{mb}(X,-,V) == 0
+atomic_dec_and_test(X) __atomic_op_return{mb}(X,-,1) == 0
+atomic_inc_and_test(X) __atomic_op_return{mb}(X,+,1) == 0
+atomic_add_negative(V,X) __atomic_op_return{mb}(X,+,V) < 0
diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore
new file mode 100644
index 000000000..6e2ddc541
--- /dev/null
+++ b/tools/memory-model/litmus-tests/.gitignore
@@ -0,0 +1 @@
+*.litmus.out
diff --git a/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus
new file mode 100644
index 000000000..967f9f2a6
--- /dev/null
+++ b/tools/memory-model/litmus-tests/CoRR+poonceonce+Once.litmus
@@ -0,0 +1,26 @@
+C CoRR+poonceonce+Once
+
+(*
+ * Result: Never
+ *
+ * Test of read-read coherence, that is, whether or not two successive
+ * reads from the same variable are ordered.
+ *)
+
+{}
+
+P0(int *x)
+{
+ WRITE_ONCE(*x, 1);
+}
+
+P1(int *x)
+{
+ int r0;
+ int r1;
+
+ r0 = READ_ONCE(*x);
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus
new file mode 100644
index 000000000..4635739f3
--- /dev/null
+++ b/tools/memory-model/litmus-tests/CoRW+poonceonce+Once.litmus
@@ -0,0 +1,25 @@
+C CoRW+poonceonce+Once
+
+(*
+ * Result: Never
+ *
+ * Test of read-write coherence, that is, whether or not a read from
+ * a given variable and a later write to that same variable are ordered.
+ *)
+
+{}
+
+P0(int *x)
+{
+ int r0;
+
+ r0 = READ_ONCE(*x);
+ WRITE_ONCE(*x, 1);
+}
+
+P1(int *x)
+{
+ WRITE_ONCE(*x, 2);
+}
+
+exists (x=2 /\ 0:r0=2)
diff --git a/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus b/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus
new file mode 100644
index 000000000..bb068c92d
--- /dev/null
+++ b/tools/memory-model/litmus-tests/CoWR+poonceonce+Once.litmus
@@ -0,0 +1,25 @@
+C CoWR+poonceonce+Once
+
+(*
+ * Result: Never
+ *
+ * Test of write-read coherence, that is, whether or not a write to a
+ * given variable and a later read from that same variable are ordered.
+ *)
+
+{}
+
+P0(int *x)
+{
+ int r0;
+
+ WRITE_ONCE(*x, 1);
+ r0 = READ_ONCE(*x);
+}
+
+P1(int *x)
+{
+ WRITE_ONCE(*x, 2);
+}
+
+exists (x=1 /\ 0:r0=2)
diff --git a/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus b/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus
new file mode 100644
index 000000000..0d9f0a958
--- /dev/null
+++ b/tools/memory-model/litmus-tests/CoWW+poonceonce.litmus
@@ -0,0 +1,18 @@
+C CoWW+poonceonce
+
+(*
+ * Result: Never
+ *
+ * Test of write-write coherence, that is, whether or not two successive
+ * writes to the same variable are ordered.
+ *)
+
+{}
+
+P0(int *x)
+{
+ WRITE_ONCE(*x, 1);
+ WRITE_ONCE(*x, 2);
+}
+
+exists (x=1)
diff --git a/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus
new file mode 100644
index 000000000..e729d2776
--- /dev/null
+++ b/tools/memory-model/litmus-tests/IRIW+fencembonceonces+OnceOnce.litmus
@@ -0,0 +1,45 @@
+C IRIW+fencembonceonces+OnceOnce
+
+(*
+ * Result: Never
+ *
+ * Test of independent reads from independent writes with smp_mb()
+ * between each pairs of reads. In other words, is smp_mb() sufficient to
+ * cause two different reading processes to agree on the order of a pair
+ * of writes, where each write is to a different variable by a different
+ * process? This litmus test exercises LKMM's "propagation" rule.
+ *)
+
+{}
+
+P0(int *x)
+{
+ WRITE_ONCE(*x, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+ int r1;
+
+ r0 = READ_ONCE(*x);
+ smp_mb();
+ r1 = READ_ONCE(*y);
+}
+
+P2(int *y)
+{
+ WRITE_ONCE(*y, 1);
+}
+
+P3(int *x, int *y)
+{
+ int r0;
+ int r1;
+
+ r0 = READ_ONCE(*y);
+ smp_mb();
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ 1:r1=0 /\ 3:r0=1 /\ 3:r1=0)
diff --git a/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus b/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus
new file mode 100644
index 000000000..4b54dd6a6
--- /dev/null
+++ b/tools/memory-model/litmus-tests/IRIW+poonceonces+OnceOnce.litmus
@@ -0,0 +1,43 @@
+C IRIW+poonceonces+OnceOnce
+
+(*
+ * Result: Sometimes
+ *
+ * Test of independent reads from independent writes with nothing
+ * between each pairs of reads. In other words, is anything at all
+ * needed to cause two different reading processes to agree on the order
+ * of a pair of writes, where each write is to a different variable by a
+ * different process?
+ *)
+
+{}
+
+P0(int *x)
+{
+ WRITE_ONCE(*x, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+ int r1;
+
+ r0 = READ_ONCE(*x);
+ r1 = READ_ONCE(*y);
+}
+
+P2(int *y)
+{
+ WRITE_ONCE(*y, 1);
+}
+
+P3(int *x, int *y)
+{
+ int r0;
+ int r1;
+
+ r0 = READ_ONCE(*y);
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ 1:r1=0 /\ 3:r0=1 /\ 3:r1=0)
diff --git a/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
new file mode 100644
index 000000000..0f749e419
--- /dev/null
+++ b/tools/memory-model/litmus-tests/ISA2+pooncelock+pooncelock+pombonce.litmus
@@ -0,0 +1,41 @@
+C ISA2+pooncelock+pooncelock+pombonce
+
+(*
+ * Result: Sometimes
+ *
+ * This test shows that the ordering provided by a lock-protected S
+ * litmus test (P0() and P1()) are not visible to external process P2().
+ * This is likely to change soon.
+ *)
+
+{}
+
+P0(int *x, int *y, spinlock_t *mylock)
+{
+ spin_lock(mylock);
+ WRITE_ONCE(*x, 1);
+ WRITE_ONCE(*y, 1);
+ spin_unlock(mylock);
+}
+
+P1(int *y, int *z, spinlock_t *mylock)
+{
+ int r0;
+
+ spin_lock(mylock);
+ r0 = READ_ONCE(*y);
+ WRITE_ONCE(*z, 1);
+ spin_unlock(mylock);
+}
+
+P2(int *x, int *z)
+{
+ int r1;
+ int r2;
+
+ r2 = READ_ONCE(*z);
+ smp_mb();
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ 2:r2=1 /\ 2:r1=0)
diff --git a/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus b/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus
new file mode 100644
index 000000000..b321aa6f4
--- /dev/null
+++ b/tools/memory-model/litmus-tests/ISA2+poonceonces.litmus
@@ -0,0 +1,37 @@
+C ISA2+poonceonces
+
+(*
+ * Result: Sometimes
+ *
+ * Given a release-acquire chain ordering the first process's store
+ * against the last process's load, is ordering preserved if all of the
+ * smp_store_release() invocations are replaced by WRITE_ONCE() and all
+ * of the smp_load_acquire() invocations are replaced by READ_ONCE()?
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ WRITE_ONCE(*x, 1);
+ WRITE_ONCE(*y, 1);
+}
+
+P1(int *y, int *z)
+{
+ int r0;
+
+ r0 = READ_ONCE(*y);
+ WRITE_ONCE(*z, 1);
+}
+
+P2(int *x, int *z)
+{
+ int r0;
+ int r1;
+
+ r0 = READ_ONCE(*z);
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ 2:r0=1 /\ 2:r1=0)
diff --git a/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus
new file mode 100644
index 000000000..025b0462e
--- /dev/null
+++ b/tools/memory-model/litmus-tests/ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus
@@ -0,0 +1,39 @@
+C ISA2+pooncerelease+poacquirerelease+poacquireonce
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that a release-acquire chain suffices
+ * to order P0()'s initial write against P2()'s final read. The reason
+ * that the release-acquire chain suffices is because in all but one
+ * case (P2() to P0()), each process reads from the preceding process's
+ * write. In memory-model-speak, there is only one non-reads-from
+ * (AKA non-rf) link, so release-acquire is all that is needed.
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ WRITE_ONCE(*x, 1);
+ smp_store_release(y, 1);
+}
+
+P1(int *y, int *z)
+{
+ int r0;
+
+ r0 = smp_load_acquire(y);
+ smp_store_release(z, 1);
+}
+
+P2(int *x, int *z)
+{
+ int r0;
+ int r1;
+
+ r0 = smp_load_acquire(z);
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ 2:r0=1 /\ 2:r1=0)
diff --git a/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus
new file mode 100644
index 000000000..4727f5aaf
--- /dev/null
+++ b/tools/memory-model/litmus-tests/LB+fencembonceonce+ctrlonceonce.litmus
@@ -0,0 +1,34 @@
+C LB+fencembonceonce+ctrlonceonce
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that lightweight ordering suffices for
+ * the load-buffering pattern, in other words, preventing all processes
+ * reading from the preceding process's write. In this example, the
+ * combination of a control dependency and a full memory barrier are enough
+ * to do the trick. (But the full memory barrier could be replaced with
+ * another control dependency and order would still be maintained.)
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ int r0;
+
+ r0 = READ_ONCE(*x);
+ if (r0)
+ WRITE_ONCE(*y, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+
+ r0 = READ_ONCE(*y);
+ smp_mb();
+ WRITE_ONCE(*x, 1);
+}
+
+exists (0:r0=1 /\ 1:r0=1)
diff --git a/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus b/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus
new file mode 100644
index 000000000..07b9904b0
--- /dev/null
+++ b/tools/memory-model/litmus-tests/LB+poacquireonce+pooncerelease.litmus
@@ -0,0 +1,29 @@
+C LB+poacquireonce+pooncerelease
+
+(*
+ * Result: Never
+ *
+ * Does a release-acquire pair suffice for the load-buffering litmus
+ * test, where each process reads from one of two variables then writes
+ * to the other?
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ int r0;
+
+ r0 = READ_ONCE(*x);
+ smp_store_release(y, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+
+ r0 = smp_load_acquire(y);
+ WRITE_ONCE(*x, 1);
+}
+
+exists (0:r0=1 /\ 1:r0=1)
diff --git a/tools/memory-model/litmus-tests/LB+poonceonces.litmus b/tools/memory-model/litmus-tests/LB+poonceonces.litmus
new file mode 100644
index 000000000..74c49cb3c
--- /dev/null
+++ b/tools/memory-model/litmus-tests/LB+poonceonces.litmus
@@ -0,0 +1,28 @@
+C LB+poonceonces
+
+(*
+ * Result: Sometimes
+ *
+ * Can the counter-intuitive outcome for the load-buffering pattern
+ * be prevented even with no explicit ordering?
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ int r0;
+
+ r0 = READ_ONCE(*x);
+ WRITE_ONCE(*y, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+
+ r0 = READ_ONCE(*y);
+ WRITE_ONCE(*x, 1);
+}
+
+exists (0:r0=1 /\ 1:r0=1)
diff --git a/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
new file mode 100644
index 000000000..a273da9fa
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+fencewmbonceonce+fencermbonceonce.litmus
@@ -0,0 +1,30 @@
+C MP+fencewmbonceonce+fencermbonceonce
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that smp_wmb() and smp_rmb() provide
+ * sufficient ordering for the message-passing pattern. However, it
+ * is usually better to use smp_store_release() and smp_load_acquire().
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ WRITE_ONCE(*x, 1);
+ smp_wmb();
+ WRITE_ONCE(*y, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+ int r1;
+
+ r0 = READ_ONCE(*y);
+ smp_rmb();
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
new file mode 100644
index 000000000..97731b4bb
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus
@@ -0,0 +1,34 @@
+C MP+onceassign+derefonce
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that rcu_assign_pointer() and
+ * rcu_dereference() suffice to ensure that an RCU reader will not see
+ * pre-initialization garbage when it traverses an RCU-protected data
+ * structure containing a newly inserted element.
+ *)
+
+{
+y=z;
+z=0;
+}
+
+P0(int *x, int **y)
+{
+ WRITE_ONCE(*x, 1);
+ rcu_assign_pointer(*y, x);
+}
+
+P1(int *x, int **y)
+{
+ int *r0;
+ int r1;
+
+ rcu_read_lock();
+ r0 = rcu_dereference(*y);
+ r1 = READ_ONCE(*r0);
+ rcu_read_unlock();
+}
+
+exists (1:r0=x /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
new file mode 100644
index 000000000..50f4d62bb
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+polockmbonce+poacquiresilsil.litmus
@@ -0,0 +1,35 @@
+C MP+polockmbonce+poacquiresilsil
+
+(*
+ * Result: Never
+ *
+ * Do spinlocks combined with smp_mb__after_spinlock() provide order
+ * to outside observers using spin_is_locked() to sense the lock-held
+ * state, ordered by acquire? Note that when the first spin_is_locked()
+ * returns false and the second true, we know that the smp_load_acquire()
+ * executed before the lock was acquired (loosely speaking).
+ *)
+
+{
+}
+
+P0(spinlock_t *lo, int *x)
+{
+ spin_lock(lo);
+ smp_mb__after_spinlock();
+ WRITE_ONCE(*x, 1);
+ spin_unlock(lo);
+}
+
+P1(spinlock_t *lo, int *x)
+{
+ int r1;
+ int r2;
+ int r3;
+
+ r1 = smp_load_acquire(x);
+ r2 = spin_is_locked(lo);
+ r3 = spin_is_locked(lo);
+}
+
+exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
diff --git a/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
new file mode 100644
index 000000000..abf81e7a0
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+polockonce+poacquiresilsil.litmus
@@ -0,0 +1,34 @@
+C MP+polockonce+poacquiresilsil
+
+(*
+ * Result: Sometimes
+ *
+ * Do spinlocks provide order to outside observers using spin_is_locked()
+ * to sense the lock-held state, ordered by acquire? Note that when the
+ * first spin_is_locked() returns false and the second true, we know that
+ * the smp_load_acquire() executed before the lock was acquired (loosely
+ * speaking).
+ *)
+
+{
+}
+
+P0(spinlock_t *lo, int *x)
+{
+ spin_lock(lo);
+ WRITE_ONCE(*x, 1);
+ spin_unlock(lo);
+}
+
+P1(spinlock_t *lo, int *x)
+{
+ int r1;
+ int r2;
+ int r3;
+
+ r1 = smp_load_acquire(x);
+ r2 = spin_is_locked(lo);
+ r3 = spin_is_locked(lo);
+}
+
+exists (1:r1=1 /\ 1:r2=0 /\ 1:r3=1)
diff --git a/tools/memory-model/litmus-tests/MP+polocks.litmus b/tools/memory-model/litmus-tests/MP+polocks.litmus
new file mode 100644
index 000000000..712a4fcdf
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+polocks.litmus
@@ -0,0 +1,35 @@
+C MP+polocks
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates how lock acquisitions and releases can
+ * stand in for smp_load_acquire() and smp_store_release(), respectively.
+ * In other words, when holding a given lock (or indeed after releasing a
+ * given lock), a CPU is not only guaranteed to see the accesses that other
+ * CPUs made while previously holding that lock, it is also guaranteed
+ * to see all prior accesses by those other CPUs.
+ *)
+
+{}
+
+P0(int *x, int *y, spinlock_t *mylock)
+{
+ WRITE_ONCE(*x, 1);
+ spin_lock(mylock);
+ WRITE_ONCE(*y, 1);
+ spin_unlock(mylock);
+}
+
+P1(int *x, int *y, spinlock_t *mylock)
+{
+ int r0;
+ int r1;
+
+ spin_lock(mylock);
+ r0 = READ_ONCE(*y);
+ spin_unlock(mylock);
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/MP+poonceonces.litmus b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
new file mode 100644
index 000000000..b2b60b84f
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+poonceonces.litmus
@@ -0,0 +1,27 @@
+C MP+poonceonces
+
+(*
+ * Result: Maybe
+ *
+ * Can the counter-intuitive message-passing outcome be prevented with
+ * no ordering at all?
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ WRITE_ONCE(*x, 1);
+ WRITE_ONCE(*y, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+ int r1;
+
+ r0 = READ_ONCE(*y);
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
new file mode 100644
index 000000000..d52c68429
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
@@ -0,0 +1,28 @@
+C MP+pooncerelease+poacquireonce
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that smp_store_release() and
+ * smp_load_acquire() provide sufficient ordering for the message-passing
+ * pattern.
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ WRITE_ONCE(*x, 1);
+ smp_store_release(y, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+ int r1;
+
+ r0 = smp_load_acquire(y);
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ 1:r1=0)
diff --git a/tools/memory-model/litmus-tests/MP+porevlocks.litmus b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
new file mode 100644
index 000000000..72c9276b3
--- /dev/null
+++ b/tools/memory-model/litmus-tests/MP+porevlocks.litmus
@@ -0,0 +1,35 @@
+C MP+porevlocks
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates how lock acquisitions and releases can
+ * stand in for smp_load_acquire() and smp_store_release(), respectively.
+ * In other words, when holding a given lock (or indeed after releasing a
+ * given lock), a CPU is not only guaranteed to see the accesses that other
+ * CPUs made while previously holding that lock, it is also guaranteed to
+ * see all prior accesses by those other CPUs.
+ *)
+
+{}
+
+P0(int *x, int *y, spinlock_t *mylock)
+{
+ int r0;
+ int r1;
+
+ r0 = READ_ONCE(*y);
+ spin_lock(mylock);
+ r1 = READ_ONCE(*x);
+ spin_unlock(mylock);
+}
+
+P1(int *x, int *y, spinlock_t *mylock)
+{
+ spin_lock(mylock);
+ WRITE_ONCE(*x, 1);
+ spin_unlock(mylock);
+ WRITE_ONCE(*y, 1);
+}
+
+exists (0:r0=1 /\ 0:r1=0)
diff --git a/tools/memory-model/litmus-tests/R+fencembonceonces.litmus b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus
new file mode 100644
index 000000000..222a0b850
--- /dev/null
+++ b/tools/memory-model/litmus-tests/R+fencembonceonces.litmus
@@ -0,0 +1,30 @@
+C R+fencembonceonces
+
+(*
+ * Result: Never
+ *
+ * This is the fully ordered (via smp_mb()) version of one of the classic
+ * counterintuitive litmus tests that illustrates the effects of store
+ * propagation delays. Note that weakening either of the barriers would
+ * cause the resulting test to be allowed.
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ WRITE_ONCE(*x, 1);
+ smp_mb();
+ WRITE_ONCE(*y, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+
+ WRITE_ONCE(*y, 2);
+ smp_mb();
+ r0 = READ_ONCE(*x);
+}
+
+exists (y=2 /\ 1:r0=0)
diff --git a/tools/memory-model/litmus-tests/R+poonceonces.litmus b/tools/memory-model/litmus-tests/R+poonceonces.litmus
new file mode 100644
index 000000000..5386f128a
--- /dev/null
+++ b/tools/memory-model/litmus-tests/R+poonceonces.litmus
@@ -0,0 +1,27 @@
+C R+poonceonces
+
+(*
+ * Result: Sometimes
+ *
+ * This is the unordered (thus lacking smp_mb()) version of one of the
+ * classic counterintuitive litmus tests that illustrates the effects of
+ * store propagation delays.
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ WRITE_ONCE(*x, 1);
+ WRITE_ONCE(*y, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+
+ WRITE_ONCE(*y, 2);
+ r0 = READ_ONCE(*x);
+}
+
+exists (y=2 /\ 1:r0=0)
diff --git a/tools/memory-model/litmus-tests/README b/tools/memory-model/litmus-tests/README
new file mode 100644
index 000000000..4581ec2d3
--- /dev/null
+++ b/tools/memory-model/litmus-tests/README
@@ -0,0 +1,153 @@
+This directory contains the following litmus tests:
+
+CoRR+poonceonce+Once.litmus
+ Test of read-read coherence, that is, whether or not two
+ successive reads from the same variable are ordered.
+
+CoRW+poonceonce+Once.litmus
+ Test of read-write coherence, that is, whether or not a read
+ from a given variable followed by a write to that same variable
+ are ordered.
+
+CoWR+poonceonce+Once.litmus
+ Test of write-read coherence, that is, whether or not a write
+ to a given variable followed by a read from that same variable
+ are ordered.
+
+CoWW+poonceonce.litmus
+ Test of write-write coherence, that is, whether or not two
+ successive writes to the same variable are ordered.
+
+IRIW+fencembonceonces+OnceOnce.litmus
+ Test of independent reads from independent writes with smp_mb()
+ between each pairs of reads. In other words, is smp_mb()
+ sufficient to cause two different reading processes to agree on
+ the order of a pair of writes, where each write is to a different
+ variable by a different process? This litmus test is forbidden
+ by LKMM's propagation rule.
+
+IRIW+poonceonces+OnceOnce.litmus
+ Test of independent reads from independent writes with nothing
+ between each pairs of reads. In other words, is anything at all
+ needed to cause two different reading processes to agree on the
+ order of a pair of writes, where each write is to a different
+ variable by a different process?
+
+ISA2+pooncelock+pooncelock+pombonce.litmus
+ Tests whether the ordering provided by a lock-protected S
+ litmus test is visible to an external process whose accesses are
+ separated by smp_mb(). This addition of an external process to
+ S is otherwise known as ISA2.
+
+ISA2+poonceonces.litmus
+ As below, but with store-release replaced with WRITE_ONCE()
+ and load-acquire replaced with READ_ONCE().
+
+ISA2+pooncerelease+poacquirerelease+poacquireonce.litmus
+ Can a release-acquire chain order a prior store against
+ a later load?
+
+LB+fencembonceonce+ctrlonceonce.litmus
+ Does a control dependency and an smp_mb() suffice for the
+ load-buffering litmus test, where each process reads from one
+ of two variables then writes to the other?
+
+LB+poacquireonce+pooncerelease.litmus
+ Does a release-acquire pair suffice for the load-buffering
+ litmus test, where each process reads from one of two variables then
+ writes to the other?
+
+LB+poonceonces.litmus
+ As above, but with store-release replaced with WRITE_ONCE()
+ and load-acquire replaced with READ_ONCE().
+
+MP+onceassign+derefonce.litmus
+ As below, but with rcu_assign_pointer() and an rcu_dereference().
+
+MP+polockmbonce+poacquiresilsil.litmus
+ Protect the access with a lock and an smp_mb__after_spinlock()
+ in one process, and use an acquire load followed by a pair of
+ spin_is_locked() calls in the other process.
+
+MP+polockonce+poacquiresilsil.litmus
+ Protect the access with a lock in one process, and use an
+ acquire load followed by a pair of spin_is_locked() calls
+ in the other process.
+
+MP+polocks.litmus
+ As below, but with the second access of the writer process
+ and the first access of reader process protected by a lock.
+
+MP+poonceonces.litmus
+ As below, but without the smp_rmb() and smp_wmb().
+
+MP+pooncerelease+poacquireonce.litmus
+ As below, but with a release-acquire chain.
+
+MP+porevlocks.litmus
+ As below, but with the first access of the writer process
+ and the second access of reader process protected by a lock.
+
+MP+fencewmbonceonce+fencermbonceonce.litmus
+ Does a smp_wmb() (between the stores) and an smp_rmb() (between
+ the loads) suffice for the message-passing litmus test, where one
+ process writes data and then a flag, and the other process reads
+ the flag and then the data. (This is similar to the ISA2 tests,
+ but with two processes instead of three.)
+
+R+fencembonceonces.litmus
+ This is the fully ordered (via smp_mb()) version of one of
+ the classic counterintuitive litmus tests that illustrates the
+ effects of store propagation delays.
+
+R+poonceonces.litmus
+ As above, but without the smp_mb() invocations.
+
+SB+fencembonceonces.litmus
+ This is the fully ordered (again, via smp_mb() version of store
+ buffering, which forms the core of Dekker's mutual-exclusion
+ algorithm.
+
+SB+poonceonces.litmus
+ As above, but without the smp_mb() invocations.
+
+SB+rfionceonce-poonceonces.litmus
+ This litmus test demonstrates that LKMM is not fully multicopy
+ atomic. (Neither is it other multicopy atomic.) This litmus test
+ also demonstrates the "locations" debugging aid, which designates
+ additional registers and locations to be printed out in the dump
+ of final states in the herd7 output. Without the "locations"
+ statement, only those registers and locations mentioned in the
+ "exists" clause will be printed.
+
+S+poonceonces.litmus
+ As below, but without the smp_wmb() and acquire load.
+
+S+fencewmbonceonce+poacquireonce.litmus
+ Can a smp_wmb(), instead of a release, and an acquire order
+ a prior store against a subsequent store?
+
+WRC+poonceonces+Once.litmus
+WRC+pooncerelease+fencermbonceonce+Once.litmus
+ These two are members of an extension of the MP litmus-test
+ class in which the first write is moved to a separate process.
+ The second is forbidden because smp_store_release() is
+ A-cumulative in LKMM.
+
+Z6.0+pooncelock+pooncelock+pombonce.litmus
+ Is the ordering provided by a spin_unlock() and a subsequent
+ spin_lock() sufficient to make ordering apparent to accesses
+ by a process not holding the lock?
+
+Z6.0+pooncelock+poonceLock+pombonce.litmus
+ As above, but with smp_mb__after_spinlock() immediately
+ following the spin_lock().
+
+Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
+ Is the ordering provided by a release-acquire chain sufficient
+ to make ordering apparent to accesses by a process that does
+ not participate in that release-acquire chain?
+
+A great many more litmus tests are available here:
+
+ https://github.com/paulmckrcu/litmus
diff --git a/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus
new file mode 100644
index 000000000..18479823c
--- /dev/null
+++ b/tools/memory-model/litmus-tests/S+fencewmbonceonce+poacquireonce.litmus
@@ -0,0 +1,27 @@
+C S+fencewmbonceonce+poacquireonce
+
+(*
+ * Result: Never
+ *
+ * Can a smp_wmb(), instead of a release, and an acquire order a prior
+ * store against a subsequent store?
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ WRITE_ONCE(*x, 2);
+ smp_wmb();
+ WRITE_ONCE(*y, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+
+ r0 = smp_load_acquire(y);
+ WRITE_ONCE(*x, 1);
+}
+
+exists (x=2 /\ 1:r0=1)
diff --git a/tools/memory-model/litmus-tests/S+poonceonces.litmus b/tools/memory-model/litmus-tests/S+poonceonces.litmus
new file mode 100644
index 000000000..8c9c2f81a
--- /dev/null
+++ b/tools/memory-model/litmus-tests/S+poonceonces.litmus
@@ -0,0 +1,28 @@
+C S+poonceonces
+
+(*
+ * Result: Sometimes
+ *
+ * Starting with a two-process release-acquire chain ordering P0()'s
+ * first store against P1()'s final load, if the smp_store_release()
+ * is replaced by WRITE_ONCE() and the smp_load_acquire() replaced by
+ * READ_ONCE(), is ordering preserved?
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ WRITE_ONCE(*x, 2);
+ WRITE_ONCE(*y, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+
+ r0 = READ_ONCE(*y);
+ WRITE_ONCE(*x, 1);
+}
+
+exists (x=2 /\ 1:r0=1)
diff --git a/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus
new file mode 100644
index 000000000..ed5fff18d
--- /dev/null
+++ b/tools/memory-model/litmus-tests/SB+fencembonceonces.litmus
@@ -0,0 +1,32 @@
+C SB+fencembonceonces
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates that full memory barriers suffice to
+ * order the store-buffering pattern, where each process writes to the
+ * variable that the preceding process reads. (Locking and RCU can also
+ * suffice, but not much else.)
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ int r0;
+
+ WRITE_ONCE(*x, 1);
+ smp_mb();
+ r0 = READ_ONCE(*y);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+
+ WRITE_ONCE(*y, 1);
+ smp_mb();
+ r0 = READ_ONCE(*x);
+}
+
+exists (0:r0=0 /\ 1:r0=0)
diff --git a/tools/memory-model/litmus-tests/SB+poonceonces.litmus b/tools/memory-model/litmus-tests/SB+poonceonces.litmus
new file mode 100644
index 000000000..10d550730
--- /dev/null
+++ b/tools/memory-model/litmus-tests/SB+poonceonces.litmus
@@ -0,0 +1,29 @@
+C SB+poonceonces
+
+(*
+ * Result: Sometimes
+ *
+ * This litmus test demonstrates that at least some ordering is required
+ * to order the store-buffering pattern, where each process writes to the
+ * variable that the preceding process reads.
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ int r0;
+
+ WRITE_ONCE(*x, 1);
+ r0 = READ_ONCE(*y);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+
+ WRITE_ONCE(*y, 1);
+ r0 = READ_ONCE(*x);
+}
+
+exists (0:r0=0 /\ 1:r0=0)
diff --git a/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus
new file mode 100644
index 000000000..04a166036
--- /dev/null
+++ b/tools/memory-model/litmus-tests/SB+rfionceonce-poonceonces.litmus
@@ -0,0 +1,32 @@
+C SB+rfionceonce-poonceonces
+
+(*
+ * Result: Sometimes
+ *
+ * This litmus test demonstrates that LKMM is not fully multicopy atomic.
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ int r1;
+ int r2;
+
+ WRITE_ONCE(*x, 1);
+ r1 = READ_ONCE(*x);
+ r2 = READ_ONCE(*y);
+}
+
+P1(int *x, int *y)
+{
+ int r3;
+ int r4;
+
+ WRITE_ONCE(*y, 1);
+ r3 = READ_ONCE(*y);
+ r4 = READ_ONCE(*x);
+}
+
+locations [0:r1; 1:r3; x; y] (* Debug aid: Print things not in "exists". *)
+exists (0:r2=0 /\ 1:r4=0)
diff --git a/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus b/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus
new file mode 100644
index 000000000..6a2bc12a1
--- /dev/null
+++ b/tools/memory-model/litmus-tests/WRC+poonceonces+Once.litmus
@@ -0,0 +1,35 @@
+C WRC+poonceonces+Once
+
+(*
+ * Result: Sometimes
+ *
+ * This litmus test is an extension of the message-passing pattern,
+ * where the first write is moved to a separate process. Note that this
+ * test has no ordering at all.
+ *)
+
+{}
+
+P0(int *x)
+{
+ WRITE_ONCE(*x, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+
+ r0 = READ_ONCE(*x);
+ WRITE_ONCE(*y, 1);
+}
+
+P2(int *x, int *y)
+{
+ int r0;
+ int r1;
+
+ r0 = READ_ONCE(*y);
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ 2:r0=1 /\ 2:r1=0)
diff --git a/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus
new file mode 100644
index 000000000..e9947250d
--- /dev/null
+++ b/tools/memory-model/litmus-tests/WRC+pooncerelease+fencermbonceonce+Once.litmus
@@ -0,0 +1,38 @@
+C WRC+pooncerelease+fencermbonceonce+Once
+
+(*
+ * Result: Never
+ *
+ * This litmus test is an extension of the message-passing pattern, where
+ * the first write is moved to a separate process. Because it features
+ * a release and a read memory barrier, it should be forbidden. More
+ * specifically, this litmus test is forbidden because smp_store_release()
+ * is A-cumulative in LKMM.
+ *)
+
+{}
+
+P0(int *x)
+{
+ WRITE_ONCE(*x, 1);
+}
+
+P1(int *x, int *y)
+{
+ int r0;
+
+ r0 = READ_ONCE(*x);
+ smp_store_release(y, 1);
+}
+
+P2(int *x, int *y)
+{
+ int r0;
+ int r1;
+
+ r0 = READ_ONCE(*y);
+ smp_rmb();
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ 2:r0=1 /\ 2:r1=0)
diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus
new file mode 100644
index 000000000..415248fb6
--- /dev/null
+++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+poonceLock+pombonce.litmus
@@ -0,0 +1,42 @@
+C Z6.0+pooncelock+poonceLock+pombonce
+
+(*
+ * Result: Never
+ *
+ * This litmus test demonstrates how smp_mb__after_spinlock() may be
+ * used to ensure that accesses in different critical sections for a
+ * given lock running on different CPUs are nevertheless seen in order
+ * by CPUs not holding that lock.
+ *)
+
+{}
+
+P0(int *x, int *y, spinlock_t *mylock)
+{
+ spin_lock(mylock);
+ WRITE_ONCE(*x, 1);
+ WRITE_ONCE(*y, 1);
+ spin_unlock(mylock);
+}
+
+P1(int *y, int *z, spinlock_t *mylock)
+{
+ int r0;
+
+ spin_lock(mylock);
+ smp_mb__after_spinlock();
+ r0 = READ_ONCE(*y);
+ WRITE_ONCE(*z, 1);
+ spin_unlock(mylock);
+}
+
+P2(int *x, int *z)
+{
+ int r1;
+
+ WRITE_ONCE(*z, 2);
+ smp_mb();
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ z=2 /\ 2:r1=0)
diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus
new file mode 100644
index 000000000..10a2aa04c
--- /dev/null
+++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus
@@ -0,0 +1,40 @@
+C Z6.0+pooncelock+pooncelock+pombonce
+
+(*
+ * Result: Sometimes
+ *
+ * This example demonstrates that a pair of accesses made by different
+ * processes each while holding a given lock will not necessarily be
+ * seen as ordered by a third process not holding that lock.
+ *)
+
+{}
+
+P0(int *x, int *y, spinlock_t *mylock)
+{
+ spin_lock(mylock);
+ WRITE_ONCE(*x, 1);
+ WRITE_ONCE(*y, 1);
+ spin_unlock(mylock);
+}
+
+P1(int *y, int *z, spinlock_t *mylock)
+{
+ int r0;
+
+ spin_lock(mylock);
+ r0 = READ_ONCE(*y);
+ WRITE_ONCE(*z, 1);
+ spin_unlock(mylock);
+}
+
+P2(int *x, int *z)
+{
+ int r1;
+
+ WRITE_ONCE(*z, 2);
+ smp_mb();
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ z=2 /\ 2:r1=0)
diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
new file mode 100644
index 000000000..88e70b87a
--- /dev/null
+++ b/tools/memory-model/litmus-tests/Z6.0+pooncerelease+poacquirerelease+fencembonceonce.litmus
@@ -0,0 +1,42 @@
+C Z6.0+pooncerelease+poacquirerelease+fencembonceonce
+
+(*
+ * Result: Sometimes
+ *
+ * This litmus test shows that a release-acquire chain, while sufficient
+ * when there is but one non-reads-from (AKA non-rf) link, does not suffice
+ * if there is more than one. Of the three processes, only P1() reads from
+ * P0's write, which means that there are two non-rf links: P1() to P2()
+ * is a write-to-write link (AKA a "coherence" or just "co" link) and P2()
+ * to P0() is a read-to-write link (AKA a "from-reads" or just "fr" link).
+ * When there are two or more non-rf links, you typically will need one
+ * full barrier for each non-rf link. (Exceptions include some cases
+ * involving locking.)
+ *)
+
+{}
+
+P0(int *x, int *y)
+{
+ WRITE_ONCE(*x, 1);
+ smp_store_release(y, 1);
+}
+
+P1(int *y, int *z)
+{
+ int r0;
+
+ r0 = smp_load_acquire(y);
+ smp_store_release(z, 1);
+}
+
+P2(int *x, int *z)
+{
+ int r1;
+
+ WRITE_ONCE(*z, 2);
+ smp_mb();
+ r1 = READ_ONCE(*x);
+}
+
+exists (1:r0=1 /\ z=2 /\ 2:r1=0)
diff --git a/tools/memory-model/lock.cat b/tools/memory-model/lock.cat
new file mode 100644
index 000000000..305ded17e
--- /dev/null
+++ b/tools/memory-model/lock.cat
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0+
+(*
+ * Copyright (C) 2016 Luc Maranget <luc.maranget@inria.fr> for Inria
+ * Copyright (C) 2017 Alan Stern <stern@rowland.harvard.edu>
+ *)
+
+(*
+ * Generate coherence orders and handle lock operations
+ *
+ * Warning: spin_is_locked() crashes herd7 versions strictly before 7.48.
+ * spin_is_locked() is functional from herd7 version 7.49.
+ *)
+
+include "cross.cat"
+
+(*
+ * The lock-related events generated by herd are as follows:
+ *
+ * LKR Lock-Read: the read part of a spin_lock() or successful
+ * spin_trylock() read-modify-write event pair
+ * LKW Lock-Write: the write part of a spin_lock() or successful
+ * spin_trylock() RMW event pair
+ * UL Unlock: a spin_unlock() event
+ * LF Lock-Fail: a failed spin_trylock() event
+ * RL Read-Locked: a spin_is_locked() event which returns True
+ * RU Read-Unlocked: a spin_is_locked() event which returns False
+ *
+ * LKR and LKW events always come paired, like all RMW event sequences.
+ *
+ * LKR, LF, RL, and RU are read events; LKR has Acquire ordering.
+ * LKW and UL are write events; UL has Release ordering.
+ * LKW, LF, RL, and RU have no ordering properties.
+ *)
+
+(* Backward compatibility *)
+let RL = try RL with emptyset
+let RU = try RU with emptyset
+
+(* Treat RL as a kind of LF: a read with no ordering properties *)
+let LF = LF | RL
+
+(* There should be no ordinary R or W accesses to spinlocks *)
+let ALL-LOCKS = LKR | LKW | UL | LF | RU
+flag ~empty [M \ IW] ; loc ; [ALL-LOCKS] as mixed-lock-accesses
+
+(* Link Lock-Reads to their RMW-partner Lock-Writes *)
+let lk-rmw = ([LKR] ; po-loc ; [LKW]) \ (po ; po)
+let rmw = rmw | lk-rmw
+
+(* The litmus test is invalid if an LKR/LKW event is not part of an RMW pair *)
+flag ~empty LKW \ range(lk-rmw) as unpaired-LKW
+flag ~empty LKR \ domain(lk-rmw) as unpaired-LKR
+
+(*
+ * An LKR must always see an unlocked value; spin_lock() calls nested
+ * inside a critical section (for the same lock) always deadlock.
+ *)
+empty ([LKW] ; po-loc ; [LKR]) \ (po-loc ; [UL] ; po-loc) as lock-nest
+
+(* The final value of a spinlock should not be tested *)
+flag ~empty [FW] ; loc ; [ALL-LOCKS] as lock-final
+
+(*
+ * Put lock operations in their appropriate classes, but leave UL out of W
+ * until after the co relation has been generated.
+ *)
+let R = R | LKR | LF | RU
+let W = W | LKW
+
+let Release = Release | UL
+let Acquire = Acquire | LKR
+
+(* Match LKW events to their corresponding UL events *)
+let critical = ([LKW] ; po-loc ; [UL]) \ (po-loc ; [LKW | UL] ; po-loc)
+
+flag ~empty UL \ range(critical) as unmatched-unlock
+
+(* Allow up to one unmatched LKW per location; more must deadlock *)
+let UNMATCHED-LKW = LKW \ domain(critical)
+empty ([UNMATCHED-LKW] ; loc ; [UNMATCHED-LKW]) \ id as unmatched-locks
+
+(* rfi for LF events: link each LKW to the LF events in its critical section *)
+let rfi-lf = ([LKW] ; po-loc ; [LF]) \ ([LKW] ; po-loc ; [UL] ; po-loc)
+
+(* rfe for LF events *)
+let all-possible-rfe-lf =
+ (*
+ * Given an LF event r, compute the possible rfe edges for that event
+ * (all those starting from LKW events in other threads),
+ * and then convert that relation to a set of single-edge relations.
+ *)
+ let possible-rfe-lf r =
+ let pair-to-relation p = p ++ 0
+ in map pair-to-relation ((LKW * {r}) & loc & ext)
+ (* Do this for each LF event r that isn't in rfi-lf *)
+ in map possible-rfe-lf (LF \ range(rfi-lf))
+
+(* Generate all rf relations for LF events *)
+with rfe-lf from cross(all-possible-rfe-lf)
+let rf-lf = rfe-lf | rfi-lf
+
+(*
+ * RU, i.e., spin_is_locked() returning False, is slightly different.
+ * We rely on the memory model to rule out cases where spin_is_locked()
+ * within one of the lock's critical sections returns False.
+ *)
+
+(* rfi for RU events: an RU may read from the last po-previous UL *)
+let rfi-ru = ([UL] ; po-loc ; [RU]) \ ([UL] ; po-loc ; [LKW] ; po-loc)
+
+(* rfe for RU events: an RU may read from an external UL or the initial write *)
+let all-possible-rfe-ru =
+ let possible-rfe-ru r =
+ let pair-to-relation p = p ++ 0
+ in map pair-to-relation (((UL | IW) * {r}) & loc & ext)
+ in map possible-rfe-ru RU
+
+(* Generate all rf relations for RU events *)
+with rfe-ru from cross(all-possible-rfe-ru)
+let rf-ru = rfe-ru | rfi-ru
+
+(* Final rf relation *)
+let rf = rf | rf-lf | rf-ru
+
+(* Generate all co relations, including LKW events but not UL *)
+let co0 = co0 | ([IW] ; loc ; [LKW]) |
+ (([LKW] ; loc ; [UNMATCHED-LKW]) \ [UNMATCHED-LKW])
+include "cos-opt.cat"
+let W = W | UL
+let M = R | W
+
+(* Merge UL events into co *)
+let co = (co | critical | (critical^-1 ; co))+
+let coe = co & ext
+let coi = co & int
+
+(* Merge LKR events into rf *)
+let rf = rf | ([IW | UL] ; singlestep(co) ; lk-rmw^-1)
+let rfe = rf & ext
+let rfi = rf & int
+
+let fr = rf^-1 ; co
+let fre = fr & ext
+let fri = fr & int
+
+show co,rf,fr
diff --git a/tools/memory-model/scripts/checkalllitmus.sh b/tools/memory-model/scripts/checkalllitmus.sh
new file mode 100755
index 000000000..ca528f9a2
--- /dev/null
+++ b/tools/memory-model/scripts/checkalllitmus.sh
@@ -0,0 +1,73 @@
+#!/bin/sh
+#
+# Run herd tests on all .litmus files in the specified directory (which
+# defaults to litmus-tests) and check each file's result against a "Result:"
+# comment within that litmus test. If the verification result does not
+# match that specified in the litmus test, this script prints an error
+# message prefixed with "^^^". It also outputs verification results to
+# a file whose name is that of the specified litmus test, but with ".out"
+# appended.
+#
+# Usage:
+# checkalllitmus.sh [ directory ]
+#
+# The LINUX_HERD_OPTIONS environment variable may be used to specify
+# arguments to herd, whose default is defined by the checklitmus.sh script.
+# Thus, one would normally run this in the directory containing the memory
+# model, specifying the pathname of the litmus test to check.
+#
+# This script makes no attempt to run the litmus tests concurrently.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright IBM Corporation, 2018
+#
+# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+litmusdir=${1-litmus-tests}
+if test -d "$litmusdir" -a -r "$litmusdir" -a -x "$litmusdir"
+then
+ :
+else
+ echo ' --- ' error: $litmusdir is not an accessible directory
+ exit 255
+fi
+
+# Find the checklitmus script. If it is not where we expect it, then
+# assume that the caller has the PATH environment variable set
+# appropriately.
+if test -x scripts/checklitmus.sh
+then
+ clscript=scripts/checklitmus.sh
+else
+ clscript=checklitmus.sh
+fi
+
+# Run the script on all the litmus tests in the specified directory
+ret=0
+for i in litmus-tests/*.litmus
+do
+ if ! $clscript $i
+ then
+ ret=1
+ fi
+done
+if test "$ret" -ne 0
+then
+ echo " ^^^ VERIFICATION MISMATCHES"
+else
+ echo All litmus tests verified as was expected.
+fi
+exit $ret
diff --git a/tools/memory-model/scripts/checklitmus.sh b/tools/memory-model/scripts/checklitmus.sh
new file mode 100755
index 000000000..bf12a75c0
--- /dev/null
+++ b/tools/memory-model/scripts/checklitmus.sh
@@ -0,0 +1,86 @@
+#!/bin/sh
+#
+# Run a herd test and check the result against a "Result:" comment within
+# the litmus test. If the verification result does not match that specified
+# in the litmus test, this script prints an error message prefixed with
+# "^^^" and exits with a non-zero status. It also outputs verification
+# results to a file whose name is that of the specified litmus test, but
+# with ".out" appended.
+#
+# Usage:
+# checklitmus.sh file.litmus
+#
+# The LINUX_HERD_OPTIONS environment variable may be used to specify
+# arguments to herd, which default to "-conf linux-kernel.cfg". Thus,
+# one would normally run this in the directory containing the memory model,
+# specifying the pathname of the litmus test to check.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright IBM Corporation, 2018
+#
+# Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+litmus=$1
+herdoptions=${LINUX_HERD_OPTIONS--conf linux-kernel.cfg}
+
+if test -f "$litmus" -a -r "$litmus"
+then
+ :
+else
+ echo ' --- ' error: \"$litmus\" is not a readable file
+ exit 255
+fi
+if grep -q '^ \* Result: ' $litmus
+then
+ outcome=`grep -m 1 '^ \* Result: ' $litmus | awk '{ print $3 }'`
+else
+ outcome=specified
+fi
+
+echo Herd options: $herdoptions > $litmus.out
+/usr/bin/time herd7 -o ~/tmp $herdoptions $litmus >> $litmus.out 2>&1
+grep "Herd options:" $litmus.out
+grep '^Observation' $litmus.out
+if grep -q '^Observation' $litmus.out
+then
+ :
+else
+ cat $litmus.out
+ echo ' ^^^ Verification error'
+ echo ' ^^^ Verification error' >> $litmus.out 2>&1
+ exit 255
+fi
+if test "$outcome" = DEADLOCK
+then
+ echo grep 3 and 4
+ if grep '^Observation' $litmus.out | grep -q 'Never 0 0$'
+ then
+ ret=0
+ else
+ echo " ^^^ Unexpected non-$outcome verification"
+ echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1
+ ret=1
+ fi
+elif grep '^Observation' $litmus.out | grep -q $outcome || test "$outcome" = Maybe
+then
+ ret=0
+else
+ echo " ^^^ Unexpected non-$outcome verification"
+ echo " ^^^ Unexpected non-$outcome verification" >> $litmus.out 2>&1
+ ret=1
+fi
+tail -2 $litmus.out | head -1
+exit $ret
diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh
new file mode 100755
index 000000000..10ceee64a
--- /dev/null
+++ b/tools/nfsd/inject_fault.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+#
+# Script for easier NFSD fault injection
+
+# Check that debugfs has been mounted
+DEBUGFS=`cat /proc/mounts | grep debugfs`
+if [ "$DEBUGFS" == "" ]; then
+ echo "debugfs does not appear to be mounted!"
+ echo "Please mount debugfs and try again"
+ exit 1
+fi
+
+# Check that the fault injection directory exists
+DEBUGDIR=`echo $DEBUGFS | awk '{print $2}'`/nfsd
+if [ ! -d "$DEBUGDIR" ]; then
+ echo "$DEBUGDIR does not exist"
+ echo "Check that your .config selects CONFIG_NFSD_FAULT_INJECTION"
+ exit 1
+fi
+
+function help()
+{
+ echo "Usage $0 injection_type [count]"
+ echo ""
+ echo "Injection types are:"
+ ls $DEBUGDIR
+ exit 1
+}
+
+if [ $# == 0 ]; then
+ help
+elif [ ! -f $DEBUGDIR/$1 ]; then
+ help
+elif [ $# != 2 ]; then
+ COUNT=0
+else
+ COUNT=$2
+fi
+
+BEFORE=`mktemp`
+AFTER=`mktemp`
+dmesg > $BEFORE
+echo $COUNT > $DEBUGDIR/$1
+dmesg > $AFTER
+# Capture lines that only exist in the $AFTER file
+diff $BEFORE $AFTER | grep ">"
+rm -f $BEFORE $AFTER
diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore
new file mode 100644
index 000000000..914cff128
--- /dev/null
+++ b/tools/objtool/.gitignore
@@ -0,0 +1,3 @@
+arch/x86/lib/inat-tables.c
+objtool
+fixdep
diff --git a/tools/objtool/Build b/tools/objtool/Build
new file mode 100644
index 000000000..749becdf5
--- /dev/null
+++ b/tools/objtool/Build
@@ -0,0 +1,22 @@
+objtool-y += arch/$(SRCARCH)/
+objtool-y += builtin-check.o
+objtool-y += builtin-orc.o
+objtool-y += check.o
+objtool-y += orc_gen.o
+objtool-y += orc_dump.o
+objtool-y += elf.o
+objtool-y += special.o
+objtool-y += objtool.o
+
+objtool-y += libstring.o
+objtool-y += str_error_r.o
+
+CFLAGS += -I$(srctree)/tools/lib
+
+$(OUTPUT)libstring.o: ../lib/string.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)str_error_r.o: ../lib/str_error_r.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
new file mode 100644
index 000000000..3995735a8
--- /dev/null
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -0,0 +1,317 @@
+Compile-time stack metadata validation
+======================================
+
+
+Overview
+--------
+
+The kernel CONFIG_STACK_VALIDATION option enables a host tool named
+objtool which runs at compile time. It has a "check" subcommand which
+analyzes every .o file and ensures the validity of its stack metadata.
+It enforces a set of rules on asm code and C inline assembly code so
+that stack traces can be reliable.
+
+For each function, it recursively follows all possible code paths and
+validates the correct frame pointer state at each instruction.
+
+It also follows code paths involving special sections, like
+.altinstructions, __jump_table, and __ex_table, which can add
+alternative execution paths to a given instruction (or set of
+instructions). Similarly, it knows how to follow switch statements, for
+which gcc sometimes uses jump tables.
+
+(Objtool also has an 'orc generate' subcommand which generates debuginfo
+for the ORC unwinder. See Documentation/x86/orc-unwinder.txt in the
+kernel tree for more details.)
+
+
+Why do we need stack metadata validation?
+-----------------------------------------
+
+Here are some of the benefits of validating stack metadata:
+
+a) More reliable stack traces for frame pointer enabled kernels
+
+ Frame pointers are used for debugging purposes. They allow runtime
+ code and debug tools to be able to walk the stack to determine the
+ chain of function call sites that led to the currently executing
+ code.
+
+ For some architectures, frame pointers are enabled by
+ CONFIG_FRAME_POINTER. For some other architectures they may be
+ required by the ABI (sometimes referred to as "backchain pointers").
+
+ For C code, gcc automatically generates instructions for setting up
+ frame pointers when the -fno-omit-frame-pointer option is used.
+
+ But for asm code, the frame setup instructions have to be written by
+ hand, which most people don't do. So the end result is that
+ CONFIG_FRAME_POINTER is honored for C code but not for most asm code.
+
+ For stack traces based on frame pointers to be reliable, all
+ functions which call other functions must first create a stack frame
+ and update the frame pointer. If a first function doesn't properly
+ create a stack frame before calling a second function, the *caller*
+ of the first function will be skipped on the stack trace.
+
+ For example, consider the following example backtrace with frame
+ pointers enabled:
+
+ [<ffffffff81812584>] dump_stack+0x4b/0x63
+ [<ffffffff812d6dc2>] cmdline_proc_show+0x12/0x30
+ [<ffffffff8127f568>] seq_read+0x108/0x3e0
+ [<ffffffff812cce62>] proc_reg_read+0x42/0x70
+ [<ffffffff81256197>] __vfs_read+0x37/0x100
+ [<ffffffff81256b16>] vfs_read+0x86/0x130
+ [<ffffffff81257898>] SyS_read+0x58/0xd0
+ [<ffffffff8181c1f2>] entry_SYSCALL_64_fastpath+0x12/0x76
+
+ It correctly shows that the caller of cmdline_proc_show() is
+ seq_read().
+
+ If we remove the frame pointer logic from cmdline_proc_show() by
+ replacing the frame pointer related instructions with nops, here's
+ what it looks like instead:
+
+ [<ffffffff81812584>] dump_stack+0x4b/0x63
+ [<ffffffff812d6dc2>] cmdline_proc_show+0x12/0x30
+ [<ffffffff812cce62>] proc_reg_read+0x42/0x70
+ [<ffffffff81256197>] __vfs_read+0x37/0x100
+ [<ffffffff81256b16>] vfs_read+0x86/0x130
+ [<ffffffff81257898>] SyS_read+0x58/0xd0
+ [<ffffffff8181c1f2>] entry_SYSCALL_64_fastpath+0x12/0x76
+
+ Notice that cmdline_proc_show()'s caller, seq_read(), has been
+ skipped. Instead the stack trace seems to show that
+ cmdline_proc_show() was called by proc_reg_read().
+
+ The benefit of objtool here is that because it ensures that *all*
+ functions honor CONFIG_FRAME_POINTER, no functions will ever[*] be
+ skipped on a stack trace.
+
+ [*] unless an interrupt or exception has occurred at the very
+ beginning of a function before the stack frame has been created,
+ or at the very end of the function after the stack frame has been
+ destroyed. This is an inherent limitation of frame pointers.
+
+b) ORC (Oops Rewind Capability) unwind table generation
+
+ An alternative to frame pointers and DWARF, ORC unwind data can be
+ used to walk the stack. Unlike frame pointers, ORC data is out of
+ band. So it doesn't affect runtime performance and it can be
+ reliable even when interrupts or exceptions are involved.
+
+ For more details, see Documentation/x86/orc-unwinder.txt.
+
+c) Higher live patching compatibility rate
+
+ Livepatch has an optional "consistency model", which is needed for
+ more complex patches. In order for the consistency model to work,
+ stack traces need to be reliable (or an unreliable condition needs to
+ be detectable). Objtool makes that possible.
+
+ For more details, see the livepatch documentation in the Linux kernel
+ source tree at Documentation/livepatch/livepatch.txt.
+
+Rules
+-----
+
+To achieve the validation, objtool enforces the following rules:
+
+1. Each callable function must be annotated as such with the ELF
+ function type. In asm code, this is typically done using the
+ ENTRY/ENDPROC macros. If objtool finds a return instruction
+ outside of a function, it flags an error since that usually indicates
+ callable code which should be annotated accordingly.
+
+ This rule is needed so that objtool can properly identify each
+ callable function in order to analyze its stack metadata.
+
+2. Conversely, each section of code which is *not* callable should *not*
+ be annotated as an ELF function. The ENDPROC macro shouldn't be used
+ in this case.
+
+ This rule is needed so that objtool can ignore non-callable code.
+ Such code doesn't have to follow any of the other rules.
+
+3. Each callable function which calls another function must have the
+ correct frame pointer logic, if required by CONFIG_FRAME_POINTER or
+ the architecture's back chain rules. This can by done in asm code
+ with the FRAME_BEGIN/FRAME_END macros.
+
+ This rule ensures that frame pointer based stack traces will work as
+ designed. If function A doesn't create a stack frame before calling
+ function B, the _caller_ of function A will be skipped on the stack
+ trace.
+
+4. Dynamic jumps and jumps to undefined symbols are only allowed if:
+
+ a) the jump is part of a switch statement; or
+
+ b) the jump matches sibling call semantics and the frame pointer has
+ the same value it had on function entry.
+
+ This rule is needed so that objtool can reliably analyze all of a
+ function's code paths. If a function jumps to code in another file,
+ and it's not a sibling call, objtool has no way to follow the jump
+ because it only analyzes a single file at a time.
+
+5. A callable function may not execute kernel entry/exit instructions.
+ The only code which needs such instructions is kernel entry code,
+ which shouldn't be be in callable functions anyway.
+
+ This rule is just a sanity check to ensure that callable functions
+ return normally.
+
+
+Objtool warnings
+----------------
+
+For asm files, if you're getting an error which doesn't make sense,
+first make sure that the affected code follows the above rules.
+
+For C files, the common culprits are inline asm statements and calls to
+"noreturn" functions. See below for more details.
+
+Another possible cause for errors in C code is if the Makefile removes
+-fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
+
+Here are some examples of common warnings reported by objtool, what
+they mean, and suggestions for how to fix them.
+
+
+1. file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
+
+ The func() function made a function call without first saving and/or
+ updating the frame pointer, and CONFIG_FRAME_POINTER is enabled.
+
+ If the error is for an asm file, and func() is indeed a callable
+ function, add proper frame pointer logic using the FRAME_BEGIN and
+ FRAME_END macros. Otherwise, if it's not a callable function, remove
+ its ELF function annotation by changing ENDPROC to END, and instead
+ use the manual unwind hint macros in asm/unwind_hints.h.
+
+ If it's a GCC-compiled .c file, the error may be because the function
+ uses an inline asm() statement which has a "call" instruction. An
+ asm() statement with a call instruction must declare the use of the
+ stack pointer in its output operand. On x86_64, this means adding
+ the ASM_CALL_CONSTRAINT as an output constraint:
+
+ asm volatile("call func" : ASM_CALL_CONSTRAINT);
+
+ Otherwise the stack frame may not get created before the call.
+
+
+2. file.o: warning: objtool: .text+0x53: unreachable instruction
+
+ Objtool couldn't find a code path to reach the instruction.
+
+ If the error is for an asm file, and the instruction is inside (or
+ reachable from) a callable function, the function should be annotated
+ with the ENTRY/ENDPROC macros (ENDPROC is the important one).
+ Otherwise, the code should probably be annotated with the unwind hint
+ macros in asm/unwind_hints.h so objtool and the unwinder can know the
+ stack state associated with the code.
+
+ If you're 100% sure the code won't affect stack traces, or if you're
+ a just a bad person, you can tell objtool to ignore it. See the
+ "Adding exceptions" section below.
+
+ If it's not actually in a callable function (e.g. kernel entry code),
+ change ENDPROC to END.
+
+
+4. file.o: warning: objtool: func(): can't find starting instruction
+ or
+ file.o: warning: objtool: func()+0x11dd: can't decode instruction
+
+ Does the file have data in a text section? If so, that can confuse
+ objtool's instruction decoder. Move the data to a more appropriate
+ section like .data or .rodata.
+
+
+5. file.o: warning: objtool: func()+0x6: unsupported instruction in callable function
+
+ This is a kernel entry/exit instruction like sysenter or iret. Such
+ instructions aren't allowed in a callable function, and are most
+ likely part of the kernel entry code. They should usually not have
+ the callable function annotation (ENDPROC) and should always be
+ annotated with the unwind hint macros in asm/unwind_hints.h.
+
+
+6. file.o: warning: objtool: func()+0x26: sibling call from callable instruction with modified stack frame
+
+ This is a dynamic jump or a jump to an undefined symbol. Objtool
+ assumed it's a sibling call and detected that the frame pointer
+ wasn't first restored to its original state.
+
+ If it's not really a sibling call, you may need to move the
+ destination code to the local file.
+
+ If the instruction is not actually in a callable function (e.g.
+ kernel entry code), change ENDPROC to END and annotate manually with
+ the unwind hint macros in asm/unwind_hints.h.
+
+
+7. file: warning: objtool: func()+0x5c: stack state mismatch
+
+ The instruction's frame pointer state is inconsistent, depending on
+ which execution path was taken to reach the instruction.
+
+ Make sure that, when CONFIG_FRAME_POINTER is enabled, the function
+ pushes and sets up the frame pointer (for x86_64, this means rbp) at
+ the beginning of the function and pops it at the end of the function.
+ Also make sure that no other code in the function touches the frame
+ pointer.
+
+ Another possibility is that the code has some asm or inline asm which
+ does some unusual things to the stack or the frame pointer. In such
+ cases it's probably appropriate to use the unwind hint macros in
+ asm/unwind_hints.h.
+
+
+8. file.o: warning: objtool: funcA() falls through to next function funcB()
+
+ This means that funcA() doesn't end with a return instruction or an
+ unconditional jump, and that objtool has determined that the function
+ can fall through into the next function. There could be different
+ reasons for this:
+
+ 1) funcA()'s last instruction is a call to a "noreturn" function like
+ panic(). In this case the noreturn function needs to be added to
+ objtool's hard-coded global_noreturns array. Feel free to bug the
+ objtool maintainer, or you can submit a patch.
+
+ 2) funcA() uses the unreachable() annotation in a section of code
+ that is actually reachable.
+
+ 3) If funcA() calls an inline function, the object code for funcA()
+ might be corrupt due to a gcc bug. For more details, see:
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
+
+
+If the error doesn't seem to make sense, it could be a bug in objtool.
+Feel free to ask the objtool maintainer for help.
+
+
+Adding exceptions
+-----------------
+
+If you _really_ need objtool to ignore something, and are 100% sure
+that it won't affect kernel stack traces, you can tell objtool to
+ignore it:
+
+- To skip validation of a function, use the STACK_FRAME_NON_STANDARD
+ macro.
+
+- To skip validation of a file, add
+
+ OBJECT_FILES_NON_STANDARD_filename.o := n
+
+ to the Makefile.
+
+- To skip validation of a directory, add
+
+ OBJECT_FILES_NON_STANDARD := y
+
+ to the Makefile.
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
new file mode 100644
index 000000000..15f32f67c
--- /dev/null
+++ b/tools/objtool/Makefile
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../scripts/Makefile.include
+include ../scripts/Makefile.arch
+
+ifeq ($(ARCH),x86_64)
+ARCH := x86
+endif
+
+# always use the host compiler
+AR = $(HOSTAR)
+CC = $(HOSTCC)
+LD = $(HOSTLD)
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+SUBCMD_SRCDIR = $(srctree)/tools/lib/subcmd/
+LIBSUBCMD_OUTPUT = $(if $(OUTPUT),$(OUTPUT),$(CURDIR)/)
+LIBSUBCMD = $(LIBSUBCMD_OUTPUT)libsubcmd.a
+
+OBJTOOL := $(OUTPUT)objtool
+OBJTOOL_IN := $(OBJTOOL)-in.o
+
+LIBELF_FLAGS := $(shell pkg-config libelf --cflags 2>/dev/null)
+LIBELF_LIBS := $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf)
+
+all: $(OBJTOOL)
+
+INCLUDES := -I$(srctree)/tools/include \
+ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
+ -I$(srctree)/tools/objtool/arch/$(ARCH)/include
+WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
+CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
+LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
+
+# Allow old libelf to be used:
+elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr)
+CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
+
+AWK = awk
+export srctree OUTPUT CFLAGS SRCARCH AWK
+include $(srctree)/tools/build/Makefile.include
+
+$(OBJTOOL_IN): fixdep FORCE
+ @$(MAKE) $(build)=objtool
+
+$(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
+ @$(CONFIG_SHELL) ./sync-check.sh
+ $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
+
+
+$(LIBSUBCMD): fixdep FORCE
+ $(Q)$(MAKE) -C $(SUBCMD_SRCDIR) OUTPUT=$(LIBSUBCMD_OUTPUT)
+
+clean:
+ $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL)
+ $(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+ $(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep
+
+FORCE:
+
+.PHONY: clean FORCE
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
new file mode 100644
index 000000000..b0d7dc3d7
--- /dev/null
+++ b/tools/objtool/arch.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ARCH_H
+#define _ARCH_H
+
+#include <stdbool.h>
+#include <linux/list.h>
+#include "elf.h"
+#include "cfi.h"
+
+#define INSN_JUMP_CONDITIONAL 1
+#define INSN_JUMP_UNCONDITIONAL 2
+#define INSN_JUMP_DYNAMIC 3
+#define INSN_CALL 4
+#define INSN_CALL_DYNAMIC 5
+#define INSN_RETURN 6
+#define INSN_CONTEXT_SWITCH 7
+#define INSN_STACK 8
+#define INSN_BUG 9
+#define INSN_NOP 10
+#define INSN_OTHER 11
+#define INSN_LAST INSN_OTHER
+
+enum op_dest_type {
+ OP_DEST_REG,
+ OP_DEST_REG_INDIRECT,
+ OP_DEST_MEM,
+ OP_DEST_PUSH,
+ OP_DEST_LEAVE,
+};
+
+struct op_dest {
+ enum op_dest_type type;
+ unsigned char reg;
+ int offset;
+};
+
+enum op_src_type {
+ OP_SRC_REG,
+ OP_SRC_REG_INDIRECT,
+ OP_SRC_CONST,
+ OP_SRC_POP,
+ OP_SRC_ADD,
+ OP_SRC_AND,
+};
+
+struct op_src {
+ enum op_src_type type;
+ unsigned char reg;
+ int offset;
+};
+
+struct stack_op {
+ struct op_dest dest;
+ struct op_src src;
+};
+
+void arch_initial_func_cfi_state(struct cfi_state *state);
+
+int arch_decode_instruction(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int maxlen,
+ unsigned int *len, unsigned char *type,
+ unsigned long *immediate, struct stack_op *op);
+
+bool arch_callee_saved_reg(unsigned char reg);
+
+#endif /* _ARCH_H */
diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build
new file mode 100644
index 000000000..b998412c0
--- /dev/null
+++ b/tools/objtool/arch/x86/Build
@@ -0,0 +1,12 @@
+objtool-y += decode.o
+
+inat_tables_script = arch/x86/tools/gen-insn-attr-x86.awk
+inat_tables_maps = arch/x86/lib/x86-opcode-map.txt
+
+$(OUTPUT)arch/x86/lib/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@
+
+$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/lib/inat-tables.c
+
+CFLAGS_decode.o += -I$(OUTPUT)arch/x86/lib
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
new file mode 100644
index 000000000..540a209b7
--- /dev/null
+++ b/tools/objtool/arch/x86/decode.c
@@ -0,0 +1,496 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define unlikely(cond) (cond)
+#include <asm/insn.h>
+#include "lib/inat.c"
+#include "lib/insn.c"
+
+#include "../../elf.h"
+#include "../../arch.h"
+#include "../../warn.h"
+
+static unsigned char op_to_cfi_reg[][2] = {
+ {CFI_AX, CFI_R8},
+ {CFI_CX, CFI_R9},
+ {CFI_DX, CFI_R10},
+ {CFI_BX, CFI_R11},
+ {CFI_SP, CFI_R12},
+ {CFI_BP, CFI_R13},
+ {CFI_SI, CFI_R14},
+ {CFI_DI, CFI_R15},
+};
+
+static int is_x86_64(struct elf *elf)
+{
+ switch (elf->ehdr.e_machine) {
+ case EM_X86_64:
+ return 1;
+ case EM_386:
+ return 0;
+ default:
+ WARN("unexpected ELF machine type %d", elf->ehdr.e_machine);
+ return -1;
+ }
+}
+
+bool arch_callee_saved_reg(unsigned char reg)
+{
+ switch (reg) {
+ case CFI_BP:
+ case CFI_BX:
+ case CFI_R12:
+ case CFI_R13:
+ case CFI_R14:
+ case CFI_R15:
+ return true;
+
+ case CFI_AX:
+ case CFI_CX:
+ case CFI_DX:
+ case CFI_SI:
+ case CFI_DI:
+ case CFI_SP:
+ case CFI_R8:
+ case CFI_R9:
+ case CFI_R10:
+ case CFI_R11:
+ case CFI_RA:
+ default:
+ return false;
+ }
+}
+
+int arch_decode_instruction(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int maxlen,
+ unsigned int *len, unsigned char *type,
+ unsigned long *immediate, struct stack_op *op)
+{
+ struct insn insn;
+ int x86_64, sign;
+ unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
+ rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
+ modrm_reg = 0, sib = 0;
+
+ x86_64 = is_x86_64(elf);
+ if (x86_64 == -1)
+ return -1;
+
+ insn_init(&insn, sec->data->d_buf + offset, maxlen, x86_64);
+ insn_get_length(&insn);
+
+ if (!insn_complete(&insn)) {
+ WARN_FUNC("can't decode instruction", sec, offset);
+ return -1;
+ }
+
+ *len = insn.length;
+ *type = INSN_OTHER;
+
+ if (insn.vex_prefix.nbytes)
+ return 0;
+
+ op1 = insn.opcode.bytes[0];
+ op2 = insn.opcode.bytes[1];
+
+ if (insn.rex_prefix.nbytes) {
+ rex = insn.rex_prefix.bytes[0];
+ rex_w = X86_REX_W(rex) >> 3;
+ rex_r = X86_REX_R(rex) >> 2;
+ rex_x = X86_REX_X(rex) >> 1;
+ rex_b = X86_REX_B(rex);
+ }
+
+ if (insn.modrm.nbytes) {
+ modrm = insn.modrm.bytes[0];
+ modrm_mod = X86_MODRM_MOD(modrm);
+ modrm_reg = X86_MODRM_REG(modrm);
+ modrm_rm = X86_MODRM_RM(modrm);
+ }
+
+ if (insn.sib.nbytes)
+ sib = insn.sib.bytes[0];
+
+ switch (op1) {
+
+ case 0x1:
+ case 0x29:
+ if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+
+ /* add/sub reg, %rsp */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
+ break;
+
+ case 0x50 ... 0x57:
+
+ /* push reg */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+ op->dest.type = OP_DEST_PUSH;
+
+ break;
+
+ case 0x58 ... 0x5f:
+
+ /* pop reg */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+
+ break;
+
+ case 0x68:
+ case 0x6a:
+ /* push immediate */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+ break;
+
+ case 0x70 ... 0x7f:
+ *type = INSN_JUMP_CONDITIONAL;
+ break;
+
+ case 0x81:
+ case 0x83:
+ if (rex != 0x48)
+ break;
+
+ if (modrm == 0xe4) {
+ /* and imm, %rsp */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_AND;
+ op->src.reg = CFI_SP;
+ op->src.offset = insn.immediate.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ break;
+ }
+
+ if (modrm == 0xc4)
+ sign = 1;
+ else if (modrm == 0xec)
+ sign = -1;
+ else
+ break;
+
+ /* add/sub imm, %rsp */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_SP;
+ op->src.offset = insn.immediate.value * sign;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ break;
+
+ case 0x89:
+ if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
+
+ /* mov %rsp, reg */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG;
+ op->src.reg = CFI_SP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ break;
+ }
+
+ if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+
+ /* mov reg, %rsp */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ break;
+ }
+
+ /* fallthrough */
+ case 0x88:
+ if (!rex_b &&
+ (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
+
+ /* mov reg, disp(%rbp) */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG_INDIRECT;
+ op->dest.reg = CFI_BP;
+ op->dest.offset = insn.displacement.value;
+
+ } else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
+
+ /* mov reg, disp(%rsp) */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG_INDIRECT;
+ op->dest.reg = CFI_SP;
+ op->dest.offset = insn.displacement.value;
+ }
+
+ break;
+
+ case 0x8b:
+ if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
+
+ /* mov disp(%rbp), reg */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG_INDIRECT;
+ op->src.reg = CFI_BP;
+ op->src.offset = insn.displacement.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+
+ } else if (rex_w && !rex_b && sib == 0x24 &&
+ modrm_mod != 3 && modrm_rm == 4) {
+
+ /* mov disp(%rsp), reg */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_REG_INDIRECT;
+ op->src.reg = CFI_SP;
+ op->src.offset = insn.displacement.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ }
+
+ break;
+
+ case 0x8d:
+ if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
+
+ *type = INSN_STACK;
+ if (!insn.displacement.value) {
+ /* lea (%rsp), reg */
+ op->src.type = OP_SRC_REG;
+ } else {
+ /* lea disp(%rsp), reg */
+ op->src.type = OP_SRC_ADD;
+ op->src.offset = insn.displacement.value;
+ }
+ op->src.reg = CFI_SP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+
+ } else if (rex == 0x48 && modrm == 0x65) {
+
+ /* lea disp(%rbp), %rsp */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_BP;
+ op->src.offset = insn.displacement.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+
+ } else if (rex == 0x49 && modrm == 0x62 &&
+ insn.displacement.value == -8) {
+
+ /*
+ * lea -0x8(%r10), %rsp
+ *
+ * Restoring rsp back to its original value after a
+ * stack realignment.
+ */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_R10;
+ op->src.offset = -8;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+
+ } else if (rex == 0x49 && modrm == 0x65 &&
+ insn.displacement.value == -16) {
+
+ /*
+ * lea -0x10(%r13), %rsp
+ *
+ * Restoring rsp back to its original value after a
+ * stack realignment.
+ */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_R13;
+ op->src.offset = -16;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
+
+ break;
+
+ case 0x8f:
+ /* pop to mem */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_MEM;
+ break;
+
+ case 0x90:
+ *type = INSN_NOP;
+ break;
+
+ case 0x9c:
+ /* pushf */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+ break;
+
+ case 0x9d:
+ /* popf */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_MEM;
+ break;
+
+ case 0x0f:
+
+ if (op2 >= 0x80 && op2 <= 0x8f) {
+
+ *type = INSN_JUMP_CONDITIONAL;
+
+ } else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
+ op2 == 0x35) {
+
+ /* sysenter, sysret */
+ *type = INSN_CONTEXT_SWITCH;
+
+ } else if (op2 == 0x0b || op2 == 0xb9) {
+
+ /* ud2 */
+ *type = INSN_BUG;
+
+ } else if (op2 == 0x0d || op2 == 0x1f) {
+
+ /* nopl/nopw */
+ *type = INSN_NOP;
+
+ } else if (op2 == 0xa0 || op2 == 0xa8) {
+
+ /* push fs/gs */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+
+ } else if (op2 == 0xa1 || op2 == 0xa9) {
+
+ /* pop fs/gs */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_MEM;
+ }
+
+ break;
+
+ case 0xc9:
+ /*
+ * leave
+ *
+ * equivalent to:
+ * mov bp, sp
+ * pop bp
+ */
+ *type = INSN_STACK;
+ op->dest.type = OP_DEST_LEAVE;
+
+ break;
+
+ case 0xe3:
+ /* jecxz/jrcxz */
+ *type = INSN_JUMP_CONDITIONAL;
+ break;
+
+ case 0xe9:
+ case 0xeb:
+ *type = INSN_JUMP_UNCONDITIONAL;
+ break;
+
+ case 0xc2:
+ case 0xc3:
+ *type = INSN_RETURN;
+ break;
+
+ case 0xca: /* retf */
+ case 0xcb: /* retf */
+ case 0xcf: /* iret */
+ *type = INSN_CONTEXT_SWITCH;
+ break;
+
+ case 0xe8:
+ *type = INSN_CALL;
+ break;
+
+ case 0xff:
+ if (modrm_reg == 2 || modrm_reg == 3)
+
+ *type = INSN_CALL_DYNAMIC;
+
+ else if (modrm_reg == 4)
+
+ *type = INSN_JUMP_DYNAMIC;
+
+ else if (modrm_reg == 5)
+
+ /* jmpf */
+ *type = INSN_CONTEXT_SWITCH;
+
+ else if (modrm_reg == 6) {
+
+ /* push from mem */
+ *type = INSN_STACK;
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+ }
+
+ break;
+
+ default:
+ break;
+ }
+
+ *immediate = insn.immediate.nbytes ? insn.immediate.value : 0;
+
+ return 0;
+}
+
+void arch_initial_func_cfi_state(struct cfi_state *state)
+{
+ int i;
+
+ for (i = 0; i < CFI_NUM_REGS; i++) {
+ state->regs[i].base = CFI_UNDEFINED;
+ state->regs[i].offset = 0;
+ }
+
+ /* initial CFA (call frame address) */
+ state->cfa.base = CFI_SP;
+ state->cfa.offset = 8;
+
+ /* initial RA (return address) */
+ state->regs[16].base = CFI_CFA;
+ state->regs[16].offset = -8;
+}
diff --git a/tools/objtool/arch/x86/include/asm/inat.h b/tools/objtool/arch/x86/include/asm/inat.h
new file mode 100644
index 000000000..1c78580e5
--- /dev/null
+++ b/tools/objtool/arch/x86/include/asm/inat.h
@@ -0,0 +1,244 @@
+#ifndef _ASM_X86_INAT_H
+#define _ASM_X86_INAT_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include <asm/inat_types.h>
+
+/*
+ * Internal bits. Don't use bitmasks directly, because these bits are
+ * unstable. You should use checking functions.
+ */
+
+#define INAT_OPCODE_TABLE_SIZE 256
+#define INAT_GROUP_TABLE_SIZE 8
+
+/* Legacy last prefixes */
+#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */
+#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */
+#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */
+/* Other Legacy prefixes */
+#define INAT_PFX_LOCK 4 /* 0xF0 */
+#define INAT_PFX_CS 5 /* 0x2E */
+#define INAT_PFX_DS 6 /* 0x3E */
+#define INAT_PFX_ES 7 /* 0x26 */
+#define INAT_PFX_FS 8 /* 0x64 */
+#define INAT_PFX_GS 9 /* 0x65 */
+#define INAT_PFX_SS 10 /* 0x36 */
+#define INAT_PFX_ADDRSZ 11 /* 0x67 */
+/* x86-64 REX prefix */
+#define INAT_PFX_REX 12 /* 0x4X */
+/* AVX VEX prefixes */
+#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
+#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
+#define INAT_PFX_EVEX 15 /* EVEX prefix */
+
+#define INAT_LSTPFX_MAX 3
+#define INAT_LGCPFX_MAX 11
+
+/* Immediate size */
+#define INAT_IMM_BYTE 1
+#define INAT_IMM_WORD 2
+#define INAT_IMM_DWORD 3
+#define INAT_IMM_QWORD 4
+#define INAT_IMM_PTR 5
+#define INAT_IMM_VWORD32 6
+#define INAT_IMM_VWORD 7
+
+/* Legacy prefix */
+#define INAT_PFX_OFFS 0
+#define INAT_PFX_BITS 4
+#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
+#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
+/* Escape opcodes */
+#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS)
+#define INAT_ESC_BITS 2
+#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1)
+#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS)
+/* Group opcodes (1-16) */
+#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS)
+#define INAT_GRP_BITS 5
+#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1)
+#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS)
+/* Immediates */
+#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS)
+#define INAT_IMM_BITS 3
+#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
+/* Flags */
+#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS)
+#define INAT_MODRM (1 << (INAT_FLAG_OFFS))
+#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1))
+#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2))
+#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
+#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
+#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
+#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
+#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7))
+/* Attribute making macros for attribute tables */
+#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
+#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
+#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
+#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)
+
+/* Identifiers for segment registers */
+#define INAT_SEG_REG_IGNORE 0
+#define INAT_SEG_REG_DEFAULT 1
+#define INAT_SEG_REG_CS 2
+#define INAT_SEG_REG_SS 3
+#define INAT_SEG_REG_DS 4
+#define INAT_SEG_REG_ES 5
+#define INAT_SEG_REG_FS 6
+#define INAT_SEG_REG_GS 7
+
+/* Attribute search APIs */
+extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
+extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
+ int lpfx_id,
+ insn_attr_t esc_attr);
+extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
+ int lpfx_id,
+ insn_attr_t esc_attr);
+extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
+ insn_byte_t vex_m,
+ insn_byte_t vex_pp);
+
+/* Attribute checking functions */
+static inline int inat_is_legacy_prefix(insn_attr_t attr)
+{
+ attr &= INAT_PFX_MASK;
+ return attr && attr <= INAT_LGCPFX_MAX;
+}
+
+static inline int inat_is_address_size_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
+}
+
+static inline int inat_is_operand_size_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
+}
+
+static inline int inat_is_rex_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
+}
+
+static inline int inat_last_prefix_id(insn_attr_t attr)
+{
+ if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
+ return 0;
+ else
+ return attr & INAT_PFX_MASK;
+}
+
+static inline int inat_is_vex_prefix(insn_attr_t attr)
+{
+ attr &= INAT_PFX_MASK;
+ return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 ||
+ attr == INAT_PFX_EVEX;
+}
+
+static inline int inat_is_evex_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX;
+}
+
+static inline int inat_is_vex3_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
+}
+
+static inline int inat_is_escape(insn_attr_t attr)
+{
+ return attr & INAT_ESC_MASK;
+}
+
+static inline int inat_escape_id(insn_attr_t attr)
+{
+ return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
+}
+
+static inline int inat_is_group(insn_attr_t attr)
+{
+ return attr & INAT_GRP_MASK;
+}
+
+static inline int inat_group_id(insn_attr_t attr)
+{
+ return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
+}
+
+static inline int inat_group_common_attribute(insn_attr_t attr)
+{
+ return attr & ~INAT_GRP_MASK;
+}
+
+static inline int inat_has_immediate(insn_attr_t attr)
+{
+ return attr & INAT_IMM_MASK;
+}
+
+static inline int inat_immediate_size(insn_attr_t attr)
+{
+ return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
+}
+
+static inline int inat_has_modrm(insn_attr_t attr)
+{
+ return attr & INAT_MODRM;
+}
+
+static inline int inat_is_force64(insn_attr_t attr)
+{
+ return attr & INAT_FORCE64;
+}
+
+static inline int inat_has_second_immediate(insn_attr_t attr)
+{
+ return attr & INAT_SCNDIMM;
+}
+
+static inline int inat_has_moffset(insn_attr_t attr)
+{
+ return attr & INAT_MOFFSET;
+}
+
+static inline int inat_has_variant(insn_attr_t attr)
+{
+ return attr & INAT_VARIANT;
+}
+
+static inline int inat_accept_vex(insn_attr_t attr)
+{
+ return attr & INAT_VEXOK;
+}
+
+static inline int inat_must_vex(insn_attr_t attr)
+{
+ return attr & (INAT_VEXONLY | INAT_EVEXONLY);
+}
+
+static inline int inat_must_evex(insn_attr_t attr)
+{
+ return attr & INAT_EVEXONLY;
+}
+#endif
diff --git a/tools/objtool/arch/x86/include/asm/inat_types.h b/tools/objtool/arch/x86/include/asm/inat_types.h
new file mode 100644
index 000000000..cb3c20ce3
--- /dev/null
+++ b/tools/objtool/arch/x86/include/asm/inat_types.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_X86_INAT_TYPES_H
+#define _ASM_X86_INAT_TYPES_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* Instruction attributes */
+typedef unsigned int insn_attr_t;
+typedef unsigned char insn_byte_t;
+typedef signed int insn_value_t;
+
+#endif
diff --git a/tools/objtool/arch/x86/include/asm/insn.h b/tools/objtool/arch/x86/include/asm/insn.h
new file mode 100644
index 000000000..3e0e18d37
--- /dev/null
+++ b/tools/objtool/arch/x86/include/asm/insn.h
@@ -0,0 +1,244 @@
+#ifndef _ASM_X86_INSN_H
+#define _ASM_X86_INSN_H
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+/* insn_attr_t is defined in inat.h */
+#include <asm/inat.h>
+
+struct insn_field {
+ union {
+ insn_value_t value;
+ insn_byte_t bytes[4];
+ };
+ /* !0 if we've run insn_get_xxx() for this field */
+ unsigned char got;
+ unsigned char nbytes;
+};
+
+struct insn {
+ struct insn_field prefixes; /*
+ * Prefixes
+ * prefixes.bytes[3]: last prefix
+ */
+ struct insn_field rex_prefix; /* REX prefix */
+ struct insn_field vex_prefix; /* VEX prefix */
+ struct insn_field opcode; /*
+ * opcode.bytes[0]: opcode1
+ * opcode.bytes[1]: opcode2
+ * opcode.bytes[2]: opcode3
+ */
+ struct insn_field modrm;
+ struct insn_field sib;
+ struct insn_field displacement;
+ union {
+ struct insn_field immediate;
+ struct insn_field moffset1; /* for 64bit MOV */
+ struct insn_field immediate1; /* for 64bit imm or off16/32 */
+ };
+ union {
+ struct insn_field moffset2; /* for 64bit MOV */
+ struct insn_field immediate2; /* for 64bit imm or seg16 */
+ };
+
+ insn_attr_t attr;
+ unsigned char opnd_bytes;
+ unsigned char addr_bytes;
+ unsigned char length;
+ unsigned char x86_64;
+
+ const insn_byte_t *kaddr; /* kernel address of insn to analyze */
+ const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */
+ const insn_byte_t *next_byte;
+};
+
+#define MAX_INSN_SIZE 15
+
+#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
+#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
+#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
+
+#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
+#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
+#define X86_SIB_BASE(sib) ((sib) & 0x07)
+
+#define X86_REX_W(rex) ((rex) & 8)
+#define X86_REX_R(rex) ((rex) & 4)
+#define X86_REX_X(rex) ((rex) & 2)
+#define X86_REX_B(rex) ((rex) & 1)
+
+/* VEX bit flags */
+#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
+#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */
+#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */
+#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
+#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
+/* VEX bit fields */
+#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */
+#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
+#define X86_VEX2_M 1 /* VEX2.M always 1 */
+#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
+
+extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
+extern void insn_get_prefixes(struct insn *insn);
+extern void insn_get_opcode(struct insn *insn);
+extern void insn_get_modrm(struct insn *insn);
+extern void insn_get_sib(struct insn *insn);
+extern void insn_get_displacement(struct insn *insn);
+extern void insn_get_immediate(struct insn *insn);
+extern void insn_get_length(struct insn *insn);
+
+/* Attribute will be determined after getting ModRM (for opcode groups) */
+static inline void insn_get_attribute(struct insn *insn)
+{
+ insn_get_modrm(insn);
+}
+
+/* Instruction uses RIP-relative addressing */
+extern int insn_rip_relative(struct insn *insn);
+
+/* Init insn for kernel text */
+static inline void kernel_insn_init(struct insn *insn,
+ const void *kaddr, int buf_len)
+{
+#ifdef CONFIG_X86_64
+ insn_init(insn, kaddr, buf_len, 1);
+#else /* CONFIG_X86_32 */
+ insn_init(insn, kaddr, buf_len, 0);
+#endif
+}
+
+static inline int insn_is_avx(struct insn *insn)
+{
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+ return (insn->vex_prefix.value != 0);
+}
+
+static inline int insn_is_evex(struct insn *insn)
+{
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+ return (insn->vex_prefix.nbytes == 4);
+}
+
+/* Ensure this instruction is decoded completely */
+static inline int insn_complete(struct insn *insn)
+{
+ return insn->opcode.got && insn->modrm.got && insn->sib.got &&
+ insn->displacement.got && insn->immediate.got;
+}
+
+static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
+ return X86_VEX2_M;
+ else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */
+ return X86_VEX3_M(insn->vex_prefix.bytes[1]);
+ else /* EVEX */
+ return X86_EVEX_M(insn->vex_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
+ return X86_VEX_P(insn->vex_prefix.bytes[1]);
+ else
+ return X86_VEX_P(insn->vex_prefix.bytes[2]);
+}
+
+/* Get the last prefix id from last prefix or VEX prefix */
+static inline int insn_last_prefix_id(struct insn *insn)
+{
+ if (insn_is_avx(insn))
+ return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
+
+ if (insn->prefixes.bytes[3])
+ return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
+
+ return 0;
+}
+
+/* Offset of each field from kaddr */
+static inline int insn_offset_rex_prefix(struct insn *insn)
+{
+ return insn->prefixes.nbytes;
+}
+static inline int insn_offset_vex_prefix(struct insn *insn)
+{
+ return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
+}
+static inline int insn_offset_opcode(struct insn *insn)
+{
+ return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
+}
+static inline int insn_offset_modrm(struct insn *insn)
+{
+ return insn_offset_opcode(insn) + insn->opcode.nbytes;
+}
+static inline int insn_offset_sib(struct insn *insn)
+{
+ return insn_offset_modrm(insn) + insn->modrm.nbytes;
+}
+static inline int insn_offset_displacement(struct insn *insn)
+{
+ return insn_offset_sib(insn) + insn->sib.nbytes;
+}
+static inline int insn_offset_immediate(struct insn *insn)
+{
+ return insn_offset_displacement(insn) + insn->displacement.nbytes;
+}
+
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @idx: Index storage.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, idx, prefix) \
+ for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+ return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+ (insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
+#endif /* _ASM_X86_INSN_H */
diff --git a/tools/objtool/arch/x86/include/asm/orc_types.h b/tools/objtool/arch/x86/include/asm/orc_types.h
new file mode 100644
index 000000000..46f516dd8
--- /dev/null
+++ b/tools/objtool/arch/x86/include/asm/orc_types.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ORC_TYPES_H
+#define _ORC_TYPES_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+/*
+ * The ORC_REG_* registers are base registers which are used to find other
+ * registers on the stack.
+ *
+ * ORC_REG_PREV_SP, also known as DWARF Call Frame Address (CFA), is the
+ * address of the previous frame: the caller's SP before it called the current
+ * function.
+ *
+ * ORC_REG_UNDEFINED means the corresponding register's value didn't change in
+ * the current frame.
+ *
+ * The most commonly used base registers are SP and BP -- which the previous SP
+ * is usually based on -- and PREV_SP and UNDEFINED -- which the previous BP is
+ * usually based on.
+ *
+ * The rest of the base registers are needed for special cases like entry code
+ * and GCC realigned stacks.
+ */
+#define ORC_REG_UNDEFINED 0
+#define ORC_REG_PREV_SP 1
+#define ORC_REG_DX 2
+#define ORC_REG_DI 3
+#define ORC_REG_BP 4
+#define ORC_REG_SP 5
+#define ORC_REG_R10 6
+#define ORC_REG_R13 7
+#define ORC_REG_BP_INDIRECT 8
+#define ORC_REG_SP_INDIRECT 9
+#define ORC_REG_MAX 15
+
+/*
+ * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
+ * caller's SP right before it made the call). Used for all callable
+ * functions, i.e. all C code and all callable asm functions.
+ *
+ * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
+ * to a fully populated pt_regs from a syscall, interrupt, or exception.
+ *
+ * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
+ * points to the iret return frame.
+ *
+ * The UNWIND_HINT macros are used only for the unwind_hint struct. They
+ * aren't used in struct orc_entry due to size and complexity constraints.
+ * Objtool converts them to real types when it converts the hints to orc
+ * entries.
+ */
+#define ORC_TYPE_CALL 0
+#define ORC_TYPE_REGS 1
+#define ORC_TYPE_REGS_IRET 2
+#define UNWIND_HINT_TYPE_SAVE 3
+#define UNWIND_HINT_TYPE_RESTORE 4
+
+#ifndef __ASSEMBLY__
+/*
+ * This struct is more or less a vastly simplified version of the DWARF Call
+ * Frame Information standard. It contains only the necessary parts of DWARF
+ * CFI, simplified for ease of access by the in-kernel unwinder. It tells the
+ * unwinder how to find the previous SP and BP (and sometimes entry regs) on
+ * the stack for a given code address. Each instance of the struct corresponds
+ * to one or more code locations.
+ */
+struct orc_entry {
+ s16 sp_offset;
+ s16 bp_offset;
+ unsigned sp_reg:4;
+ unsigned bp_reg:4;
+ unsigned type:2;
+ unsigned end:1;
+} __packed;
+
+/*
+ * This struct is used by asm and inline asm code to manually annotate the
+ * location of registers on the stack for the ORC unwinder.
+ *
+ * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
+ */
+struct unwind_hint {
+ u32 ip;
+ s16 sp_offset;
+ u8 sp_reg;
+ u8 type;
+ u8 end;
+};
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ORC_TYPES_H */
diff --git a/tools/objtool/arch/x86/lib/inat.c b/tools/objtool/arch/x86/lib/inat.c
new file mode 100644
index 000000000..c1f01a8e9
--- /dev/null
+++ b/tools/objtool/arch/x86/lib/inat.c
@@ -0,0 +1,97 @@
+/*
+ * x86 instruction attribute tables
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include <asm/insn.h>
+
+/* Attribute tables are generated from opcode map */
+#include "inat-tables.c"
+
+/* Attribute search APIs */
+insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
+{
+ return inat_primary_table[opcode];
+}
+
+int inat_get_last_prefix_id(insn_byte_t last_pfx)
+{
+ insn_attr_t lpfx_attr;
+
+ lpfx_attr = inat_get_opcode_attribute(last_pfx);
+ return inat_last_prefix_id(lpfx_attr);
+}
+
+insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
+ insn_attr_t esc_attr)
+{
+ const insn_attr_t *table;
+ int n;
+
+ n = inat_escape_id(esc_attr);
+
+ table = inat_escape_tables[n][0];
+ if (!table)
+ return 0;
+ if (inat_has_variant(table[opcode]) && lpfx_id) {
+ table = inat_escape_tables[n][lpfx_id];
+ if (!table)
+ return 0;
+ }
+ return table[opcode];
+}
+
+insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
+ insn_attr_t grp_attr)
+{
+ const insn_attr_t *table;
+ int n;
+
+ n = inat_group_id(grp_attr);
+
+ table = inat_group_tables[n][0];
+ if (!table)
+ return inat_group_common_attribute(grp_attr);
+ if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
+ table = inat_group_tables[n][lpfx_id];
+ if (!table)
+ return inat_group_common_attribute(grp_attr);
+ }
+ return table[X86_MODRM_REG(modrm)] |
+ inat_group_common_attribute(grp_attr);
+}
+
+insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
+ insn_byte_t vex_p)
+{
+ const insn_attr_t *table;
+ if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
+ return 0;
+ /* At first, this checks the master table */
+ table = inat_avx_tables[vex_m][0];
+ if (!table)
+ return 0;
+ if (!inat_is_group(table[opcode]) && vex_p) {
+ /* If this is not a group, get attribute directly */
+ table = inat_avx_tables[vex_m][vex_p];
+ if (!table)
+ return 0;
+ }
+ return table[opcode];
+}
+
diff --git a/tools/objtool/arch/x86/lib/insn.c b/tools/objtool/arch/x86/lib/insn.c
new file mode 100644
index 000000000..1088eb8f3
--- /dev/null
+++ b/tools/objtool/arch/x86/lib/insn.c
@@ -0,0 +1,606 @@
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004, 2009
+ */
+
+#ifdef __KERNEL__
+#include <linux/string.h>
+#else
+#include <string.h>
+#endif
+#include <asm/inat.h>
+#include <asm/insn.h>
+
+/* Verify next sizeof(t) bytes can be on the same instruction */
+#define validate_next(t, insn, n) \
+ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
+
+#define __get_next(t, insn) \
+ ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+
+#define __peek_nbyte_next(t, insn, n) \
+ ({ t r = *(t*)((insn)->next_byte + n); r; })
+
+#define get_next(t, insn) \
+ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
+
+#define peek_nbyte_next(t, insn, n) \
+ ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
+
+#define peek_next(t, insn) peek_nbyte_next(t, insn, 0)
+
+/**
+ * insn_init() - initialize struct insn
+ * @insn: &struct insn to be initialized
+ * @kaddr: address (in kernel memory) of instruction (or copy thereof)
+ * @x86_64: !0 for 64-bit kernel or 64-bit app
+ */
+void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
+{
+ /*
+ * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
+ * even if the input buffer is long enough to hold them.
+ */
+ if (buf_len > MAX_INSN_SIZE)
+ buf_len = MAX_INSN_SIZE;
+
+ memset(insn, 0, sizeof(*insn));
+ insn->kaddr = kaddr;
+ insn->end_kaddr = kaddr + buf_len;
+ insn->next_byte = kaddr;
+ insn->x86_64 = x86_64 ? 1 : 0;
+ insn->opnd_bytes = 4;
+ if (x86_64)
+ insn->addr_bytes = 8;
+ else
+ insn->addr_bytes = 4;
+}
+
+/**
+ * insn_get_prefixes - scan x86 instruction prefix bytes
+ * @insn: &struct insn containing instruction
+ *
+ * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
+ * to point to the (first) opcode. No effect if @insn->prefixes.got
+ * is already set.
+ */
+void insn_get_prefixes(struct insn *insn)
+{
+ struct insn_field *prefixes = &insn->prefixes;
+ insn_attr_t attr;
+ insn_byte_t b, lb;
+ int i, nb;
+
+ if (prefixes->got)
+ return;
+
+ nb = 0;
+ lb = 0;
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ while (inat_is_legacy_prefix(attr)) {
+ /* Skip if same prefix */
+ for (i = 0; i < nb; i++)
+ if (prefixes->bytes[i] == b)
+ goto found;
+ if (nb == 4)
+ /* Invalid instruction */
+ break;
+ prefixes->bytes[nb++] = b;
+ if (inat_is_address_size_prefix(attr)) {
+ /* address size switches 2/4 or 4/8 */
+ if (insn->x86_64)
+ insn->addr_bytes ^= 12;
+ else
+ insn->addr_bytes ^= 6;
+ } else if (inat_is_operand_size_prefix(attr)) {
+ /* oprand size switches 2/4 */
+ insn->opnd_bytes ^= 6;
+ }
+found:
+ prefixes->nbytes++;
+ insn->next_byte++;
+ lb = b;
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ }
+ /* Set the last prefix */
+ if (lb && lb != insn->prefixes.bytes[3]) {
+ if (unlikely(insn->prefixes.bytes[3])) {
+ /* Swap the last prefix */
+ b = insn->prefixes.bytes[3];
+ for (i = 0; i < nb; i++)
+ if (prefixes->bytes[i] == lb)
+ prefixes->bytes[i] = b;
+ }
+ insn->prefixes.bytes[3] = lb;
+ }
+
+ /* Decode REX prefix */
+ if (insn->x86_64) {
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ if (inat_is_rex_prefix(attr)) {
+ insn->rex_prefix.value = b;
+ insn->rex_prefix.nbytes = 1;
+ insn->next_byte++;
+ if (X86_REX_W(b))
+ /* REX.W overrides opnd_size */
+ insn->opnd_bytes = 8;
+ }
+ }
+ insn->rex_prefix.got = 1;
+
+ /* Decode VEX prefix */
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ if (inat_is_vex_prefix(attr)) {
+ insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
+ if (!insn->x86_64) {
+ /*
+ * In 32-bits mode, if the [7:6] bits (mod bits of
+ * ModRM) on the second byte are not 11b, it is
+ * LDS or LES or BOUND.
+ */
+ if (X86_MODRM_MOD(b2) != 3)
+ goto vex_end;
+ }
+ insn->vex_prefix.bytes[0] = b;
+ insn->vex_prefix.bytes[1] = b2;
+ if (inat_is_evex_prefix(attr)) {
+ b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+ insn->vex_prefix.bytes[2] = b2;
+ b2 = peek_nbyte_next(insn_byte_t, insn, 3);
+ insn->vex_prefix.bytes[3] = b2;
+ insn->vex_prefix.nbytes = 4;
+ insn->next_byte += 4;
+ if (insn->x86_64 && X86_VEX_W(b2))
+ /* VEX.W overrides opnd_size */
+ insn->opnd_bytes = 8;
+ } else if (inat_is_vex3_prefix(attr)) {
+ b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+ insn->vex_prefix.bytes[2] = b2;
+ insn->vex_prefix.nbytes = 3;
+ insn->next_byte += 3;
+ if (insn->x86_64 && X86_VEX_W(b2))
+ /* VEX.W overrides opnd_size */
+ insn->opnd_bytes = 8;
+ } else {
+ /*
+ * For VEX2, fake VEX3-like byte#2.
+ * Makes it easier to decode vex.W, vex.vvvv,
+ * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
+ */
+ insn->vex_prefix.bytes[2] = b2 & 0x7f;
+ insn->vex_prefix.nbytes = 2;
+ insn->next_byte += 2;
+ }
+ }
+vex_end:
+ insn->vex_prefix.got = 1;
+
+ prefixes->got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_opcode - collect opcode(s)
+ * @insn: &struct insn containing instruction
+ *
+ * Populates @insn->opcode, updates @insn->next_byte to point past the
+ * opcode byte(s), and set @insn->attr (except for groups).
+ * If necessary, first collects any preceding (prefix) bytes.
+ * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
+ * is already 1.
+ */
+void insn_get_opcode(struct insn *insn)
+{
+ struct insn_field *opcode = &insn->opcode;
+ insn_byte_t op;
+ int pfx_id;
+ if (opcode->got)
+ return;
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+
+ /* Get first opcode */
+ op = get_next(insn_byte_t, insn);
+ opcode->bytes[0] = op;
+ opcode->nbytes = 1;
+
+ /* Check if there is VEX prefix or not */
+ if (insn_is_avx(insn)) {
+ insn_byte_t m, p;
+ m = insn_vex_m_bits(insn);
+ p = insn_vex_p_bits(insn);
+ insn->attr = inat_get_avx_attribute(op, m, p);
+ if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
+ (!inat_accept_vex(insn->attr) &&
+ !inat_is_group(insn->attr)))
+ insn->attr = 0; /* This instruction is bad */
+ goto end; /* VEX has only 1 byte for opcode */
+ }
+
+ insn->attr = inat_get_opcode_attribute(op);
+ while (inat_is_escape(insn->attr)) {
+ /* Get escaped opcode */
+ op = get_next(insn_byte_t, insn);
+ opcode->bytes[opcode->nbytes++] = op;
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
+ }
+ if (inat_must_vex(insn->attr))
+ insn->attr = 0; /* This instruction is bad */
+end:
+ opcode->got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_modrm - collect ModRM byte, if any
+ * @insn: &struct insn containing instruction
+ *
+ * Populates @insn->modrm and updates @insn->next_byte to point past the
+ * ModRM byte, if any. If necessary, first collects the preceding bytes
+ * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
+ */
+void insn_get_modrm(struct insn *insn)
+{
+ struct insn_field *modrm = &insn->modrm;
+ insn_byte_t pfx_id, mod;
+ if (modrm->got)
+ return;
+ if (!insn->opcode.got)
+ insn_get_opcode(insn);
+
+ if (inat_has_modrm(insn->attr)) {
+ mod = get_next(insn_byte_t, insn);
+ modrm->value = mod;
+ modrm->nbytes = 1;
+ if (inat_is_group(insn->attr)) {
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_group_attribute(mod, pfx_id,
+ insn->attr);
+ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
+ insn->attr = 0; /* This is bad */
+ }
+ }
+
+ if (insn->x86_64 && inat_is_force64(insn->attr))
+ insn->opnd_bytes = 8;
+ modrm->got = 1;
+
+err_out:
+ return;
+}
+
+
+/**
+ * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte. No effect if @insn->x86_64 is 0.
+ */
+int insn_rip_relative(struct insn *insn)
+{
+ struct insn_field *modrm = &insn->modrm;
+
+ if (!insn->x86_64)
+ return 0;
+ if (!modrm->got)
+ insn_get_modrm(insn);
+ /*
+ * For rip-relative instructions, the mod field (top 2 bits)
+ * is zero and the r/m field (bottom 3 bits) is 0x5.
+ */
+ return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+}
+
+/**
+ * insn_get_sib() - Get the SIB byte of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.
+ */
+void insn_get_sib(struct insn *insn)
+{
+ insn_byte_t modrm;
+
+ if (insn->sib.got)
+ return;
+ if (!insn->modrm.got)
+ insn_get_modrm(insn);
+ if (insn->modrm.nbytes) {
+ modrm = (insn_byte_t)insn->modrm.value;
+ if (insn->addr_bytes != 2 &&
+ X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
+ insn->sib.value = get_next(insn_byte_t, insn);
+ insn->sib.nbytes = 1;
+ }
+ }
+ insn->sib.got = 1;
+
+err_out:
+ return;
+}
+
+
+/**
+ * insn_get_displacement() - Get the displacement of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * SIB byte.
+ * Displacement value is sign-expanded.
+ */
+void insn_get_displacement(struct insn *insn)
+{
+ insn_byte_t mod, rm, base;
+
+ if (insn->displacement.got)
+ return;
+ if (!insn->sib.got)
+ insn_get_sib(insn);
+ if (insn->modrm.nbytes) {
+ /*
+ * Interpreting the modrm byte:
+ * mod = 00 - no displacement fields (exceptions below)
+ * mod = 01 - 1-byte displacement field
+ * mod = 10 - displacement field is 4 bytes, or 2 bytes if
+ * address size = 2 (0x67 prefix in 32-bit mode)
+ * mod = 11 - no memory operand
+ *
+ * If address size = 2...
+ * mod = 00, r/m = 110 - displacement field is 2 bytes
+ *
+ * If address size != 2...
+ * mod != 11, r/m = 100 - SIB byte exists
+ * mod = 00, SIB base = 101 - displacement field is 4 bytes
+ * mod = 00, r/m = 101 - rip-relative addressing, displacement
+ * field is 4 bytes
+ */
+ mod = X86_MODRM_MOD(insn->modrm.value);
+ rm = X86_MODRM_RM(insn->modrm.value);
+ base = X86_SIB_BASE(insn->sib.value);
+ if (mod == 3)
+ goto out;
+ if (mod == 1) {
+ insn->displacement.value = get_next(signed char, insn);
+ insn->displacement.nbytes = 1;
+ } else if (insn->addr_bytes == 2) {
+ if ((mod == 0 && rm == 6) || mod == 2) {
+ insn->displacement.value =
+ get_next(short, insn);
+ insn->displacement.nbytes = 2;
+ }
+ } else {
+ if ((mod == 0 && rm == 5) || mod == 2 ||
+ (mod == 0 && base == 5)) {
+ insn->displacement.value = get_next(int, insn);
+ insn->displacement.nbytes = 4;
+ }
+ }
+ }
+out:
+ insn->displacement.got = 1;
+
+err_out:
+ return;
+}
+
+/* Decode moffset16/32/64. Return 0 if failed */
+static int __get_moffset(struct insn *insn)
+{
+ switch (insn->addr_bytes) {
+ case 2:
+ insn->moffset1.value = get_next(short, insn);
+ insn->moffset1.nbytes = 2;
+ break;
+ case 4:
+ insn->moffset1.value = get_next(int, insn);
+ insn->moffset1.nbytes = 4;
+ break;
+ case 8:
+ insn->moffset1.value = get_next(int, insn);
+ insn->moffset1.nbytes = 4;
+ insn->moffset2.value = get_next(int, insn);
+ insn->moffset2.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->moffset1.got = insn->moffset2.got = 1;
+
+ return 1;
+
+err_out:
+ return 0;
+}
+
+/* Decode imm v32(Iz). Return 0 if failed */
+static int __get_immv32(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate.value = get_next(short, insn);
+ insn->immediate.nbytes = 2;
+ break;
+ case 4:
+ case 8:
+ insn->immediate.value = get_next(int, insn);
+ insn->immediate.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+
+ return 1;
+
+err_out:
+ return 0;
+}
+
+/* Decode imm v64(Iv/Ov), Return 0 if failed */
+static int __get_immv(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate1.value = get_next(short, insn);
+ insn->immediate1.nbytes = 2;
+ break;
+ case 4:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ break;
+ case 8:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ insn->immediate2.value = get_next(int, insn);
+ insn->immediate2.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->immediate1.got = insn->immediate2.got = 1;
+
+ return 1;
+err_out:
+ return 0;
+}
+
+/* Decode ptr16:16/32(Ap) */
+static int __get_immptr(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate1.value = get_next(short, insn);
+ insn->immediate1.nbytes = 2;
+ break;
+ case 4:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ break;
+ case 8:
+ /* ptr16:64 is not exist (no segment) */
+ return 0;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->immediate2.value = get_next(unsigned short, insn);
+ insn->immediate2.nbytes = 2;
+ insn->immediate1.got = insn->immediate2.got = 1;
+
+ return 1;
+err_out:
+ return 0;
+}
+
+/**
+ * insn_get_immediate() - Get the immediates of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * displacement bytes.
+ * Basically, most of immediates are sign-expanded. Unsigned-value can be
+ * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ */
+void insn_get_immediate(struct insn *insn)
+{
+ if (insn->immediate.got)
+ return;
+ if (!insn->displacement.got)
+ insn_get_displacement(insn);
+
+ if (inat_has_moffset(insn->attr)) {
+ if (!__get_moffset(insn))
+ goto err_out;
+ goto done;
+ }
+
+ if (!inat_has_immediate(insn->attr))
+ /* no immediates */
+ goto done;
+
+ switch (inat_immediate_size(insn->attr)) {
+ case INAT_IMM_BYTE:
+ insn->immediate.value = get_next(signed char, insn);
+ insn->immediate.nbytes = 1;
+ break;
+ case INAT_IMM_WORD:
+ insn->immediate.value = get_next(short, insn);
+ insn->immediate.nbytes = 2;
+ break;
+ case INAT_IMM_DWORD:
+ insn->immediate.value = get_next(int, insn);
+ insn->immediate.nbytes = 4;
+ break;
+ case INAT_IMM_QWORD:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ insn->immediate2.value = get_next(int, insn);
+ insn->immediate2.nbytes = 4;
+ break;
+ case INAT_IMM_PTR:
+ if (!__get_immptr(insn))
+ goto err_out;
+ break;
+ case INAT_IMM_VWORD32:
+ if (!__get_immv32(insn))
+ goto err_out;
+ break;
+ case INAT_IMM_VWORD:
+ if (!__get_immv(insn))
+ goto err_out;
+ break;
+ default:
+ /* Here, insn must have an immediate, but failed */
+ goto err_out;
+ }
+ if (inat_has_second_immediate(insn->attr)) {
+ insn->immediate2.value = get_next(signed char, insn);
+ insn->immediate2.nbytes = 1;
+ }
+done:
+ insn->immediate.got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_length() - Get the length of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * immediates bytes.
+ */
+void insn_get_length(struct insn *insn)
+{
+ if (insn->length)
+ return;
+ if (!insn->immediate.got)
+ insn_get_immediate(insn);
+ insn->length = (unsigned char)((unsigned long)insn->next_byte
+ - (unsigned long)insn->kaddr);
+}
diff --git a/tools/objtool/arch/x86/lib/x86-opcode-map.txt b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
new file mode 100644
index 000000000..5cb9f009f
--- /dev/null
+++ b/tools/objtool/arch/x86/lib/x86-opcode-map.txt
@@ -0,0 +1,1078 @@
+# x86 Opcode Maps
+#
+# This is (mostly) based on following documentations.
+# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C
+# (#326018-047US, June 2013)
+#
+#<Opcode maps>
+# Table: table-name
+# Referrer: escaped-name
+# AVXcode: avx-code
+# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# (or)
+# opcode: escape # escaped-name
+# EndTable
+#
+# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix
+# mnemonics that begin with lowercase 'k' accept a VEX prefix
+#
+#<group maps>
+# GrpTable: GrpXXX
+# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# EndTable
+#
+# AVX Superscripts
+# (ev): this opcode requires EVEX prefix.
+# (evo): this opcode is changed by EVEX prefix (EVEX opcode)
+# (v): this opcode requires VEX prefix.
+# (v1): this opcode only supports 128bit VEX.
+#
+# Last Prefix Superscripts
+# - (66): the last prefix is 0x66
+# - (F3): the last prefix is 0xF3
+# - (F2): the last prefix is 0xF2
+# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
+# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
+
+Table: one byte opcode
+Referrer:
+AVXcode:
+# 0x00 - 0x0f
+00: ADD Eb,Gb
+01: ADD Ev,Gv
+02: ADD Gb,Eb
+03: ADD Gv,Ev
+04: ADD AL,Ib
+05: ADD rAX,Iz
+06: PUSH ES (i64)
+07: POP ES (i64)
+08: OR Eb,Gb
+09: OR Ev,Gv
+0a: OR Gb,Eb
+0b: OR Gv,Ev
+0c: OR AL,Ib
+0d: OR rAX,Iz
+0e: PUSH CS (i64)
+0f: escape # 2-byte escape
+# 0x10 - 0x1f
+10: ADC Eb,Gb
+11: ADC Ev,Gv
+12: ADC Gb,Eb
+13: ADC Gv,Ev
+14: ADC AL,Ib
+15: ADC rAX,Iz
+16: PUSH SS (i64)
+17: POP SS (i64)
+18: SBB Eb,Gb
+19: SBB Ev,Gv
+1a: SBB Gb,Eb
+1b: SBB Gv,Ev
+1c: SBB AL,Ib
+1d: SBB rAX,Iz
+1e: PUSH DS (i64)
+1f: POP DS (i64)
+# 0x20 - 0x2f
+20: AND Eb,Gb
+21: AND Ev,Gv
+22: AND Gb,Eb
+23: AND Gv,Ev
+24: AND AL,Ib
+25: AND rAx,Iz
+26: SEG=ES (Prefix)
+27: DAA (i64)
+28: SUB Eb,Gb
+29: SUB Ev,Gv
+2a: SUB Gb,Eb
+2b: SUB Gv,Ev
+2c: SUB AL,Ib
+2d: SUB rAX,Iz
+2e: SEG=CS (Prefix)
+2f: DAS (i64)
+# 0x30 - 0x3f
+30: XOR Eb,Gb
+31: XOR Ev,Gv
+32: XOR Gb,Eb
+33: XOR Gv,Ev
+34: XOR AL,Ib
+35: XOR rAX,Iz
+36: SEG=SS (Prefix)
+37: AAA (i64)
+38: CMP Eb,Gb
+39: CMP Ev,Gv
+3a: CMP Gb,Eb
+3b: CMP Gv,Ev
+3c: CMP AL,Ib
+3d: CMP rAX,Iz
+3e: SEG=DS (Prefix)
+3f: AAS (i64)
+# 0x40 - 0x4f
+40: INC eAX (i64) | REX (o64)
+41: INC eCX (i64) | REX.B (o64)
+42: INC eDX (i64) | REX.X (o64)
+43: INC eBX (i64) | REX.XB (o64)
+44: INC eSP (i64) | REX.R (o64)
+45: INC eBP (i64) | REX.RB (o64)
+46: INC eSI (i64) | REX.RX (o64)
+47: INC eDI (i64) | REX.RXB (o64)
+48: DEC eAX (i64) | REX.W (o64)
+49: DEC eCX (i64) | REX.WB (o64)
+4a: DEC eDX (i64) | REX.WX (o64)
+4b: DEC eBX (i64) | REX.WXB (o64)
+4c: DEC eSP (i64) | REX.WR (o64)
+4d: DEC eBP (i64) | REX.WRB (o64)
+4e: DEC eSI (i64) | REX.WRX (o64)
+4f: DEC eDI (i64) | REX.WRXB (o64)
+# 0x50 - 0x5f
+50: PUSH rAX/r8 (d64)
+51: PUSH rCX/r9 (d64)
+52: PUSH rDX/r10 (d64)
+53: PUSH rBX/r11 (d64)
+54: PUSH rSP/r12 (d64)
+55: PUSH rBP/r13 (d64)
+56: PUSH rSI/r14 (d64)
+57: PUSH rDI/r15 (d64)
+58: POP rAX/r8 (d64)
+59: POP rCX/r9 (d64)
+5a: POP rDX/r10 (d64)
+5b: POP rBX/r11 (d64)
+5c: POP rSP/r12 (d64)
+5d: POP rBP/r13 (d64)
+5e: POP rSI/r14 (d64)
+5f: POP rDI/r15 (d64)
+# 0x60 - 0x6f
+60: PUSHA/PUSHAD (i64)
+61: POPA/POPAD (i64)
+62: BOUND Gv,Ma (i64) | EVEX (Prefix)
+63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
+64: SEG=FS (Prefix)
+65: SEG=GS (Prefix)
+66: Operand-Size (Prefix)
+67: Address-Size (Prefix)
+68: PUSH Iz (d64)
+69: IMUL Gv,Ev,Iz
+6a: PUSH Ib (d64)
+6b: IMUL Gv,Ev,Ib
+6c: INS/INSB Yb,DX
+6d: INS/INSW/INSD Yz,DX
+6e: OUTS/OUTSB DX,Xb
+6f: OUTS/OUTSW/OUTSD DX,Xz
+# 0x70 - 0x7f
+70: JO Jb
+71: JNO Jb
+72: JB/JNAE/JC Jb
+73: JNB/JAE/JNC Jb
+74: JZ/JE Jb
+75: JNZ/JNE Jb
+76: JBE/JNA Jb
+77: JNBE/JA Jb
+78: JS Jb
+79: JNS Jb
+7a: JP/JPE Jb
+7b: JNP/JPO Jb
+7c: JL/JNGE Jb
+7d: JNL/JGE Jb
+7e: JLE/JNG Jb
+7f: JNLE/JG Jb
+# 0x80 - 0x8f
+80: Grp1 Eb,Ib (1A)
+81: Grp1 Ev,Iz (1A)
+82: Grp1 Eb,Ib (1A),(i64)
+83: Grp1 Ev,Ib (1A)
+84: TEST Eb,Gb
+85: TEST Ev,Gv
+86: XCHG Eb,Gb
+87: XCHG Ev,Gv
+88: MOV Eb,Gb
+89: MOV Ev,Gv
+8a: MOV Gb,Eb
+8b: MOV Gv,Ev
+8c: MOV Ev,Sw
+8d: LEA Gv,M
+8e: MOV Sw,Ew
+8f: Grp1A (1A) | POP Ev (d64)
+# 0x90 - 0x9f
+90: NOP | PAUSE (F3) | XCHG r8,rAX
+91: XCHG rCX/r9,rAX
+92: XCHG rDX/r10,rAX
+93: XCHG rBX/r11,rAX
+94: XCHG rSP/r12,rAX
+95: XCHG rBP/r13,rAX
+96: XCHG rSI/r14,rAX
+97: XCHG rDI/r15,rAX
+98: CBW/CWDE/CDQE
+99: CWD/CDQ/CQO
+9a: CALLF Ap (i64)
+9b: FWAIT/WAIT
+9c: PUSHF/D/Q Fv (d64)
+9d: POPF/D/Q Fv (d64)
+9e: SAHF
+9f: LAHF
+# 0xa0 - 0xaf
+a0: MOV AL,Ob
+a1: MOV rAX,Ov
+a2: MOV Ob,AL
+a3: MOV Ov,rAX
+a4: MOVS/B Yb,Xb
+a5: MOVS/W/D/Q Yv,Xv
+a6: CMPS/B Xb,Yb
+a7: CMPS/W/D Xv,Yv
+a8: TEST AL,Ib
+a9: TEST rAX,Iz
+aa: STOS/B Yb,AL
+ab: STOS/W/D/Q Yv,rAX
+ac: LODS/B AL,Xb
+ad: LODS/W/D/Q rAX,Xv
+ae: SCAS/B AL,Yb
+# Note: The May 2011 Intel manual shows Xv for the second parameter of the
+# next instruction but Yv is correct
+af: SCAS/W/D/Q rAX,Yv
+# 0xb0 - 0xbf
+b0: MOV AL/R8L,Ib
+b1: MOV CL/R9L,Ib
+b2: MOV DL/R10L,Ib
+b3: MOV BL/R11L,Ib
+b4: MOV AH/R12L,Ib
+b5: MOV CH/R13L,Ib
+b6: MOV DH/R14L,Ib
+b7: MOV BH/R15L,Ib
+b8: MOV rAX/r8,Iv
+b9: MOV rCX/r9,Iv
+ba: MOV rDX/r10,Iv
+bb: MOV rBX/r11,Iv
+bc: MOV rSP/r12,Iv
+bd: MOV rBP/r13,Iv
+be: MOV rSI/r14,Iv
+bf: MOV rDI/r15,Iv
+# 0xc0 - 0xcf
+c0: Grp2 Eb,Ib (1A)
+c1: Grp2 Ev,Ib (1A)
+c2: RETN Iw (f64)
+c3: RETN
+c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
+c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
+c6: Grp11A Eb,Ib (1A)
+c7: Grp11B Ev,Iz (1A)
+c8: ENTER Iw,Ib
+c9: LEAVE (d64)
+ca: RETF Iw
+cb: RETF
+cc: INT3
+cd: INT Ib
+ce: INTO (i64)
+cf: IRET/D/Q
+# 0xd0 - 0xdf
+d0: Grp2 Eb,1 (1A)
+d1: Grp2 Ev,1 (1A)
+d2: Grp2 Eb,CL (1A)
+d3: Grp2 Ev,CL (1A)
+d4: AAM Ib (i64)
+d5: AAD Ib (i64)
+d6:
+d7: XLAT/XLATB
+d8: ESC
+d9: ESC
+da: ESC
+db: ESC
+dc: ESC
+dd: ESC
+de: ESC
+df: ESC
+# 0xe0 - 0xef
+# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
+# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
+# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
+e0: LOOPNE/LOOPNZ Jb (f64)
+e1: LOOPE/LOOPZ Jb (f64)
+e2: LOOP Jb (f64)
+e3: JrCXZ Jb (f64)
+e4: IN AL,Ib
+e5: IN eAX,Ib
+e6: OUT Ib,AL
+e7: OUT Ib,eAX
+# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
+# in "near" jumps and calls is 16-bit. For CALL,
+# push of return address is 16-bit wide, RSP is decremented by 2
+# but is not truncated to 16 bits, unlike RIP.
+e8: CALL Jz (f64)
+e9: JMP-near Jz (f64)
+ea: JMP-far Ap (i64)
+eb: JMP-short Jb (f64)
+ec: IN AL,DX
+ed: IN eAX,DX
+ee: OUT DX,AL
+ef: OUT DX,eAX
+# 0xf0 - 0xff
+f0: LOCK (Prefix)
+f1:
+f2: REPNE (Prefix) | XACQUIRE (Prefix)
+f3: REP/REPE (Prefix) | XRELEASE (Prefix)
+f4: HLT
+f5: CMC
+f6: Grp3_1 Eb (1A)
+f7: Grp3_2 Ev (1A)
+f8: CLC
+f9: STC
+fa: CLI
+fb: STI
+fc: CLD
+fd: STD
+fe: Grp4 (1A)
+ff: Grp5 (1A)
+EndTable
+
+Table: 2-byte opcode (0x0f)
+Referrer: 2-byte escape
+AVXcode: 1
+# 0x0f 0x00-0x0f
+00: Grp6 (1A)
+01: Grp7 (1A)
+02: LAR Gv,Ew
+03: LSL Gv,Ew
+04:
+05: SYSCALL (o64)
+06: CLTS
+07: SYSRET (o64)
+08: INVD
+09: WBINVD | WBNOINVD (F3)
+0a:
+0b: UD2 (1B)
+0c:
+# AMD's prefetch group. Intel supports prefetchw(/1) only.
+0d: GrpP
+0e: FEMMS
+# 3DNow! uses the last imm byte as opcode extension.
+0f: 3DNow! Pq,Qq,Ib
+# 0x0f 0x10-0x1f
+# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
+# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
+# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
+# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
+# Reference A.1
+10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
+11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
+12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
+13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
+14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
+15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
+16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
+17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
+18: Grp16 (1A)
+19:
+# Intel SDM opcode map does not list MPX instructions. For now using Gv for
+# bnd registers and Ev for everything else is OK because the instruction
+# decoder does not use the information except as an indication that there is
+# a ModR/M byte.
+1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
+1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
+1c: Grp20 (1A),(1C)
+1d:
+1e:
+1f: NOP Ev
+# 0x0f 0x20-0x2f
+20: MOV Rd,Cd
+21: MOV Rd,Dd
+22: MOV Cd,Rd
+23: MOV Dd,Rd
+24:
+25:
+26:
+27:
+28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
+29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
+2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
+2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
+2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
+2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
+2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1)
+2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1)
+# 0x0f 0x30-0x3f
+30: WRMSR
+31: RDTSC
+32: RDMSR
+33: RDPMC
+34: SYSENTER
+35: SYSEXIT
+36:
+37: GETSEC
+38: escape # 3-byte escape 1
+39:
+3a: escape # 3-byte escape 2
+3b:
+3c:
+3d:
+3e:
+3f:
+# 0x0f 0x40-0x4f
+40: CMOVO Gv,Ev
+41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66)
+42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66)
+43: CMOVAE/NB/NC Gv,Ev
+44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66)
+45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66)
+46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66)
+47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66)
+48: CMOVS Gv,Ev
+49: CMOVNS Gv,Ev
+4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66)
+4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk
+4c: CMOVL/NGE Gv,Ev
+4d: CMOVNL/GE Gv,Ev
+4e: CMOVLE/NG Gv,Ev
+4f: CMOVNLE/G Gv,Ev
+# 0x0f 0x50-0x5f
+50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
+51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
+52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
+53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
+54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
+55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
+56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
+57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
+58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
+59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
+5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
+5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
+5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
+5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
+5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
+5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
+# 0x0f 0x60-0x6f
+60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
+61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
+62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
+63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
+64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
+65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
+66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
+67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
+68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
+69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
+6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
+6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
+6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
+6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
+6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
+6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev)
+# 0x0f 0x70-0x7f
+70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
+71: Grp12 (1A)
+72: Grp13 (1A)
+73: Grp14 (1A)
+74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
+75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
+76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
+# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
+77: emms | vzeroupper | vzeroall
+78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev)
+79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev)
+7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev)
+7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev)
+7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
+7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
+7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
+7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
+# 0x0f 0x80-0x8f
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
+80: JO Jz (f64)
+81: JNO Jz (f64)
+82: JB/JC/JNAE Jz (f64)
+83: JAE/JNB/JNC Jz (f64)
+84: JE/JZ Jz (f64)
+85: JNE/JNZ Jz (f64)
+86: JBE/JNA Jz (f64)
+87: JA/JNBE Jz (f64)
+88: JS Jz (f64)
+89: JNS Jz (f64)
+8a: JP/JPE Jz (f64)
+8b: JNP/JPO Jz (f64)
+8c: JL/JNGE Jz (f64)
+8d: JNL/JGE Jz (f64)
+8e: JLE/JNG Jz (f64)
+8f: JNLE/JG Jz (f64)
+# 0x0f 0x90-0x9f
+90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
+91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
+92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2)
+93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2)
+94: SETE/Z Eb
+95: SETNE/NZ Eb
+96: SETBE/NA Eb
+97: SETA/NBE Eb
+98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66)
+99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66)
+9a: SETP/PE Eb
+9b: SETNP/PO Eb
+9c: SETL/NGE Eb
+9d: SETNL/GE Eb
+9e: SETLE/NG Eb
+9f: SETNLE/G Eb
+# 0x0f 0xa0-0xaf
+a0: PUSH FS (d64)
+a1: POP FS (d64)
+a2: CPUID
+a3: BT Ev,Gv
+a4: SHLD Ev,Gv,Ib
+a5: SHLD Ev,Gv,CL
+a6: GrpPDLK
+a7: GrpRNG
+a8: PUSH GS (d64)
+a9: POP GS (d64)
+aa: RSM
+ab: BTS Ev,Gv
+ac: SHRD Ev,Gv,Ib
+ad: SHRD Ev,Gv,CL
+ae: Grp15 (1A),(1C)
+af: IMUL Gv,Ev
+# 0x0f 0xb0-0xbf
+b0: CMPXCHG Eb,Gb
+b1: CMPXCHG Ev,Gv
+b2: LSS Gv,Mp
+b3: BTR Ev,Gv
+b4: LFS Gv,Mp
+b5: LGS Gv,Mp
+b6: MOVZX Gv,Eb
+b7: MOVZX Gv,Ew
+b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
+b9: Grp10 (1A)
+ba: Grp8 Ev,Ib (1A)
+bb: BTC Ev,Gv
+bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
+bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
+be: MOVSX Gv,Eb
+bf: MOVSX Gv,Ew
+# 0x0f 0xc0-0xcf
+c0: XADD Eb,Gb
+c1: XADD Ev,Gv
+c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
+c3: movnti My,Gy
+c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
+c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
+c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
+c7: Grp9 (1A)
+c8: BSWAP RAX/EAX/R8/R8D
+c9: BSWAP RCX/ECX/R9/R9D
+ca: BSWAP RDX/EDX/R10/R10D
+cb: BSWAP RBX/EBX/R11/R11D
+cc: BSWAP RSP/ESP/R12/R12D
+cd: BSWAP RBP/EBP/R13/R13D
+ce: BSWAP RSI/ESI/R14/R14D
+cf: BSWAP RDI/EDI/R15/R15D
+# 0x0f 0xd0-0xdf
+d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
+d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
+d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
+d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
+d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
+d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
+d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
+d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
+d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
+d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
+da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
+db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo)
+dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
+dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
+de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
+df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo)
+# 0x0f 0xe0-0xef
+e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
+e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
+e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
+e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
+e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
+e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
+e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2)
+e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
+e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
+e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
+ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
+eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo)
+ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
+ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
+ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
+ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo)
+# 0x0f 0xf0-0xff
+f0: vlddqu Vx,Mx (F2)
+f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
+f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
+f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
+f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
+f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
+f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
+f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
+f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
+f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
+fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
+fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
+fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
+fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
+fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
+ff: UD0
+EndTable
+
+Table: 3-byte opcode 1 (0x0f 0x38)
+Referrer: 3-byte escape 1
+AVXcode: 2
+# 0x0f 0x38 0x00-0x0f
+00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
+01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
+02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
+03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
+04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
+05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
+06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
+07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
+08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
+09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
+0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
+0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
+0c: vpermilps Vx,Hx,Wx (66),(v)
+0d: vpermilpd Vx,Hx,Wx (66),(v)
+0e: vtestps Vx,Wx (66),(v)
+0f: vtestpd Vx,Wx (66),(v)
+# 0x0f 0x38 0x10-0x1f
+10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev)
+11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev)
+12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev)
+13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev)
+14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo)
+15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo)
+16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo)
+17: vptest Vx,Wx (66)
+18: vbroadcastss Vx,Wd (66),(v)
+19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo)
+1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo)
+1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev)
+1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
+1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
+1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
+1f: vpabsq Vx,Wx (66),(ev)
+# 0x0f 0x38 0x20-0x2f
+20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev)
+21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev)
+22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev)
+23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev)
+24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev)
+25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev)
+26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev)
+27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev)
+28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev)
+29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev)
+2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev)
+2b: vpackusdw Vx,Hx,Wx (66),(v1)
+2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo)
+2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo)
+2e: vmaskmovps Mx,Hx,Vx (66),(v)
+2f: vmaskmovpd Mx,Hx,Vx (66),(v)
+# 0x0f 0x38 0x30-0x3f
+30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev)
+31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev)
+32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev)
+33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev)
+34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev)
+35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev)
+36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo)
+37: vpcmpgtq Vx,Hx,Wx (66),(v1)
+38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev)
+39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev)
+3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev)
+3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo)
+3c: vpmaxsb Vx,Hx,Wx (66),(v1)
+3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo)
+3e: vpmaxuw Vx,Hx,Wx (66),(v1)
+3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo)
+# 0x0f 0x38 0x40-0x8f
+40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo)
+41: vphminposuw Vdq,Wdq (66),(v1)
+42: vgetexpps/d Vx,Wx (66),(ev)
+43: vgetexpss/d Vx,Hx,Wx (66),(ev)
+44: vplzcntd/q Vx,Wx (66),(ev)
+45: vpsrlvd/q Vx,Hx,Wx (66),(v)
+46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
+47: vpsllvd/q Vx,Hx,Wx (66),(v)
+# Skip 0x48-0x4b
+4c: vrcp14ps/d Vpd,Wpd (66),(ev)
+4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
+4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
+4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev)
+# Skip 0x50-0x57
+58: vpbroadcastd Vx,Wx (66),(v)
+59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
+5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
+5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
+# Skip 0x5c-0x63
+64: vpblendmd/q Vx,Hx,Wx (66),(ev)
+65: vblendmps/d Vx,Hx,Wx (66),(ev)
+66: vpblendmb/w Vx,Hx,Wx (66),(ev)
+# Skip 0x67-0x74
+75: vpermi2b/w Vx,Hx,Wx (66),(ev)
+76: vpermi2d/q Vx,Hx,Wx (66),(ev)
+77: vpermi2ps/d Vx,Hx,Wx (66),(ev)
+78: vpbroadcastb Vx,Wx (66),(v)
+79: vpbroadcastw Vx,Wx (66),(v)
+7a: vpbroadcastb Vx,Rv (66),(ev)
+7b: vpbroadcastw Vx,Rv (66),(ev)
+7c: vpbroadcastd/q Vx,Rv (66),(ev)
+7d: vpermt2b/w Vx,Hx,Wx (66),(ev)
+7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
+7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
+80: INVEPT Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
+82: INVPCID Gy,Mdq (66)
+83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
+88: vexpandps/d Vpd,Wpd (66),(ev)
+89: vpexpandd/q Vx,Wx (66),(ev)
+8a: vcompressps/d Wx,Vx (66),(ev)
+8b: vpcompressd/q Wx,Vx (66),(ev)
+8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
+8d: vpermb/w Vx,Hx,Wx (66),(ev)
+8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
+# 0x0f 0x38 0x90-0xbf (FMA)
+90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo)
+91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo)
+92: vgatherdps/d Vx,Hx,Wx (66),(v)
+93: vgatherqps/d Vx,Hx,Wx (66),(v)
+94:
+95:
+96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
+97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
+98: vfmadd132ps/d Vx,Hx,Wx (66),(v)
+99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
+9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
+9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
+9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+a0: vpscatterdd/q Wx,Vx (66),(ev)
+a1: vpscatterqd/q Wx,Vx (66),(ev)
+a2: vscatterdps/d Wx,Vx (66),(ev)
+a3: vscatterqps/d Wx,Vx (66),(ev)
+a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
+a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
+a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
+a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
+ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
+ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
+af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+b4: vpmadd52luq Vx,Hx,Wx (66),(ev)
+b5: vpmadd52huq Vx,Hx,Wx (66),(ev)
+b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
+b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
+b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
+b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
+bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
+bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
+bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+# 0x0f 0x38 0xc0-0xff
+c4: vpconflictd/q Vx,Wx (66),(ev)
+c6: Grp18 (1A)
+c7: Grp19 (1A)
+c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev)
+c9: sha1msg1 Vdq,Wdq
+ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev)
+cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev)
+cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev)
+cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev)
+db: VAESIMC Vdq,Wdq (66),(v1)
+dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
+dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
+de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
+df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
+f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
+f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
+f2: ANDN Gy,By,Ey (v)
+f3: Grp17 (1A)
+f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
+f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
+f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
+f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3)
+f9: MOVDIRI My,Gy
+EndTable
+
+Table: 3-byte opcode 2 (0x0f 0x3a)
+Referrer: 3-byte escape 2
+AVXcode: 3
+# 0x0f 0x3a 0x00-0xff
+00: vpermq Vqq,Wqq,Ib (66),(v)
+01: vpermpd Vqq,Wqq,Ib (66),(v)
+02: vpblendd Vx,Hx,Wx,Ib (66),(v)
+03: valignd/q Vx,Hx,Wx,Ib (66),(ev)
+04: vpermilps Vx,Wx,Ib (66),(v)
+05: vpermilpd Vx,Wx,Ib (66),(v)
+06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
+07:
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
+0c: vblendps Vx,Hx,Wx,Ib (66)
+0d: vblendpd Vx,Hx,Wx,Ib (66)
+0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
+0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
+14: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
+15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
+16: vpextrd/q Ey,Vdq,Ib (66),(v1)
+17: vextractps Ed,Vdq,Ib (66),(v1)
+18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev)
+1d: vcvtps2ph Wx,Vx,Ib (66),(v)
+1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev)
+1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev)
+20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
+21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
+22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
+23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
+25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+30: kshiftrb/w Vk,Uk,Ib (66),(v)
+31: kshiftrd/q Vk,Uk,Ib (66),(v)
+32: kshiftlb/w Vk,Uk,Ib (66),(v)
+33: kshiftld/q Vk,Uk,Ib (66),(v)
+38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev)
+3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev)
+3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev)
+40: vdpps Vx,Hx,Wx,Ib (66)
+41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
+42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo)
+43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
+44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
+46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
+4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
+4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
+4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
+50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev)
+51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
+54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
+55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
+60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
+61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
+62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
+63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+cc: sha1rnds4 Vdq,Wdq,Ib
+df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
+f0: RORX Gy,Ey,Ib (F2),(v)
+EndTable
+
+GrpTable: Grp1
+0: ADD
+1: OR
+2: ADC
+3: SBB
+4: AND
+5: SUB
+6: XOR
+7: CMP
+EndTable
+
+GrpTable: Grp1A
+0: POP
+EndTable
+
+GrpTable: Grp2
+0: ROL
+1: ROR
+2: RCL
+3: RCR
+4: SHL/SAL
+5: SHR
+6:
+7: SAR
+EndTable
+
+GrpTable: Grp3_1
+0: TEST Eb,Ib
+1: TEST Eb,Ib
+2: NOT Eb
+3: NEG Eb
+4: MUL AL,Eb
+5: IMUL AL,Eb
+6: DIV AL,Eb
+7: IDIV AL,Eb
+EndTable
+
+GrpTable: Grp3_2
+0: TEST Ev,Iz
+1: TEST Ev,Iz
+2: NOT Ev
+3: NEG Ev
+4: MUL rAX,Ev
+5: IMUL rAX,Ev
+6: DIV rAX,Ev
+7: IDIV rAX,Ev
+EndTable
+
+GrpTable: Grp4
+0: INC Eb
+1: DEC Eb
+EndTable
+
+GrpTable: Grp5
+0: INC Ev
+1: DEC Ev
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
+2: CALLN Ev (f64)
+3: CALLF Ep
+4: JMPN Ev (f64)
+5: JMPF Mp
+6: PUSH Ev (d64)
+7:
+EndTable
+
+GrpTable: Grp6
+0: SLDT Rv/Mw
+1: STR Rv/Mw
+2: LLDT Ew
+3: LTR Ew
+4: VERR Ew
+5: VERW Ew
+EndTable
+
+GrpTable: Grp7
+0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B)
+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
+3: LIDT Ms
+4: SMSW Mw/Rv
+5: rdpkru (110),(11B) | wrpkru (111),(11B)
+6: LMSW Ew
+7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
+EndTable
+
+GrpTable: Grp8
+4: BT
+5: BTS
+6: BTR
+7: BTC
+EndTable
+
+GrpTable: Grp9
+1: CMPXCHG8B/16B Mq/Mdq
+3: xrstors
+4: xsavec
+5: xsaves
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
+EndTable
+
+GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
+EndTable
+
+# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
+GrpTable: Grp11A
+0: MOV Eb,Ib
+7: XABORT Ib (000),(11B)
+EndTable
+
+GrpTable: Grp11B
+0: MOV Eb,Iz
+7: XBEGIN Jz (000),(11B)
+EndTable
+
+GrpTable: Grp12
+2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
+4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
+6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp13
+0: vprord/q Hx,Wx,Ib (66),(ev)
+1: vprold/q Hx,Wx,Ib (66),(ev)
+2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
+4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo)
+6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp14
+2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
+3: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
+6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
+7: vpslldq Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp15
+0: fxsave | RDFSBASE Ry (F3),(11B)
+1: fxstor | RDGSBASE Ry (F3),(11B)
+2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
+3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
+4: XSAVE | ptwrite Ey (F3),(11B)
+5: XRSTOR | lfence (11B)
+6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B)
+7: clflush | clflushopt (66) | sfence (11B)
+EndTable
+
+GrpTable: Grp16
+0: prefetch NTA
+1: prefetch T0
+2: prefetch T1
+3: prefetch T2
+EndTable
+
+GrpTable: Grp17
+1: BLSR By,Ey (v)
+2: BLSMSK By,Ey (v)
+3: BLSI By,Ey (v)
+EndTable
+
+GrpTable: Grp18
+1: vgatherpf0dps/d Wx (66),(ev)
+2: vgatherpf1dps/d Wx (66),(ev)
+5: vscatterpf0dps/d Wx (66),(ev)
+6: vscatterpf1dps/d Wx (66),(ev)
+EndTable
+
+GrpTable: Grp19
+1: vgatherpf0qps/d Wx (66),(ev)
+2: vgatherpf1qps/d Wx (66),(ev)
+5: vscatterpf0qps/d Wx (66),(ev)
+6: vscatterpf1qps/d Wx (66),(ev)
+EndTable
+
+GrpTable: Grp20
+0: cldemote Mb
+EndTable
+
+# AMD's Prefetch Group
+GrpTable: GrpP
+0: PREFETCH
+1: PREFETCHW
+EndTable
+
+GrpTable: GrpPDLK
+0: MONTMUL
+1: XSHA1
+2: XSHA2
+EndTable
+
+GrpTable: GrpRNG
+0: xstore-rng
+1: xcrypt-ecb
+2: xcrypt-cbc
+4: xcrypt-cfb
+5: xcrypt-ofb
+EndTable
diff --git a/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk
new file mode 100644
index 000000000..a42015b30
--- /dev/null
+++ b/tools/objtool/arch/x86/tools/gen-insn-attr-x86.awk
@@ -0,0 +1,393 @@
+#!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+# gen-insn-attr-x86.awk: Instruction attribute table generator
+# Written by Masami Hiramatsu <mhiramat@redhat.com>
+#
+# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
+
+# Awk implementation sanity check
+function check_awk_implement() {
+ if (sprintf("%x", 0) != "0")
+ return "Your awk has a printf-format problem."
+ return ""
+}
+
+# Clear working vars
+function clear_vars() {
+ delete table
+ delete lptable2
+ delete lptable1
+ delete lptable3
+ eid = -1 # escape id
+ gid = -1 # group id
+ aid = -1 # AVX id
+ tname = ""
+}
+
+BEGIN {
+ # Implementation error checking
+ awkchecked = check_awk_implement()
+ if (awkchecked != "") {
+ print "Error: " awkchecked > "/dev/stderr"
+ print "Please try to use gawk." > "/dev/stderr"
+ exit 1
+ }
+
+ # Setup generating tables
+ print "/* x86 opcode map generated from x86-opcode-map.txt */"
+ print "/* Do not change this code. */\n"
+ ggid = 1
+ geid = 1
+ gaid = 0
+ delete etable
+ delete gtable
+ delete atable
+
+ opnd_expr = "^[A-Za-z/]"
+ ext_expr = "^\\("
+ sep_expr = "^\\|$"
+ group_expr = "^Grp[0-9A-Za-z]+"
+
+ imm_expr = "^[IJAOL][a-z]"
+ imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+ imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+ imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
+ imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
+ imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
+ imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
+ imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+ imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+ imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
+ imm_flag["Ob"] = "INAT_MOFFSET"
+ imm_flag["Ov"] = "INAT_MOFFSET"
+ imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+
+ modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
+ force64_expr = "\\([df]64\\)"
+ rex_expr = "^REX(\\.[XRWB]+)*"
+ fpu_expr = "^ESC" # TODO
+
+ lprefix1_expr = "\\((66|!F3)\\)"
+ lprefix2_expr = "\\(F3\\)"
+ lprefix3_expr = "\\((F2|!F3|66&F2)\\)"
+ lprefix_expr = "\\((66|F2|F3)\\)"
+ max_lprefix = 4
+
+ # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
+ # accepts VEX prefix
+ vexok_opcode_expr = "^[vk].*"
+ vexok_expr = "\\(v1\\)"
+ # All opcodes with (v) superscript supports *only* VEX prefix
+ vexonly_expr = "\\(v\\)"
+ # All opcodes with (ev) superscript supports *only* EVEX prefix
+ evexonly_expr = "\\(ev\\)"
+
+ prefix_expr = "\\(Prefix\\)"
+ prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
+ prefix_num["REPNE"] = "INAT_PFX_REPNE"
+ prefix_num["REP/REPE"] = "INAT_PFX_REPE"
+ prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
+ prefix_num["XRELEASE"] = "INAT_PFX_REPE"
+ prefix_num["LOCK"] = "INAT_PFX_LOCK"
+ prefix_num["SEG=CS"] = "INAT_PFX_CS"
+ prefix_num["SEG=DS"] = "INAT_PFX_DS"
+ prefix_num["SEG=ES"] = "INAT_PFX_ES"
+ prefix_num["SEG=FS"] = "INAT_PFX_FS"
+ prefix_num["SEG=GS"] = "INAT_PFX_GS"
+ prefix_num["SEG=SS"] = "INAT_PFX_SS"
+ prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
+ prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
+ prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
+ prefix_num["EVEX"] = "INAT_PFX_EVEX"
+
+ clear_vars()
+}
+
+function semantic_error(msg) {
+ print "Semantic error at " NR ": " msg > "/dev/stderr"
+ exit 1
+}
+
+function debug(msg) {
+ print "DEBUG: " msg
+}
+
+function array_size(arr, i,c) {
+ c = 0
+ for (i in arr)
+ c++
+ return c
+}
+
+/^Table:/ {
+ print "/* " $0 " */"
+ if (tname != "")
+ semantic_error("Hit Table: before EndTable:.");
+}
+
+/^Referrer:/ {
+ if (NF != 1) {
+ # escape opcode table
+ ref = ""
+ for (i = 2; i <= NF; i++)
+ ref = ref $i
+ eid = escape[ref]
+ tname = sprintf("inat_escape_table_%d", eid)
+ }
+}
+
+/^AVXcode:/ {
+ if (NF != 1) {
+ # AVX/escape opcode table
+ aid = $2
+ if (gaid <= aid)
+ gaid = aid + 1
+ if (tname == "") # AVX only opcode table
+ tname = sprintf("inat_avx_table_%d", $2)
+ }
+ if (aid == -1 && eid == -1) # primary opcode table
+ tname = "inat_primary_table"
+}
+
+/^GrpTable:/ {
+ print "/* " $0 " */"
+ if (!($2 in group))
+ semantic_error("No group: " $2 )
+ gid = group[$2]
+ tname = "inat_group_table_" gid
+}
+
+function print_table(tbl,name,fmt,n)
+{
+ print "const insn_attr_t " name " = {"
+ for (i = 0; i < n; i++) {
+ id = sprintf(fmt, i)
+ if (tbl[id])
+ print " [" id "] = " tbl[id] ","
+ }
+ print "};"
+}
+
+/^EndTable/ {
+ if (gid != -1) {
+ # print group tables
+ if (array_size(table) != 0) {
+ print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,0] = tname
+ }
+ if (array_size(lptable1) != 0) {
+ print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,1] = tname "_1"
+ }
+ if (array_size(lptable2) != 0) {
+ print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,2] = tname "_2"
+ }
+ if (array_size(lptable3) != 0) {
+ print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,3] = tname "_3"
+ }
+ } else {
+ # print primary/escaped tables
+ if (array_size(table) != 0) {
+ print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,0] = tname
+ if (aid >= 0)
+ atable[aid,0] = tname
+ }
+ if (array_size(lptable1) != 0) {
+ print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,1] = tname "_1"
+ if (aid >= 0)
+ atable[aid,1] = tname "_1"
+ }
+ if (array_size(lptable2) != 0) {
+ print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,2] = tname "_2"
+ if (aid >= 0)
+ atable[aid,2] = tname "_2"
+ }
+ if (array_size(lptable3) != 0) {
+ print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,3] = tname "_3"
+ if (aid >= 0)
+ atable[aid,3] = tname "_3"
+ }
+ }
+ print ""
+ clear_vars()
+}
+
+function add_flags(old,new) {
+ if (old && new)
+ return old " | " new
+ else if (old)
+ return old
+ else
+ return new
+}
+
+# convert operands to flags.
+function convert_operands(count,opnd, i,j,imm,mod)
+{
+ imm = null
+ mod = null
+ for (j = 1; j <= count; j++) {
+ i = opnd[j]
+ if (match(i, imm_expr) == 1) {
+ if (!imm_flag[i])
+ semantic_error("Unknown imm opnd: " i)
+ if (imm) {
+ if (i != "Ib")
+ semantic_error("Second IMM error")
+ imm = add_flags(imm, "INAT_SCNDIMM")
+ } else
+ imm = imm_flag[i]
+ } else if (match(i, modrm_expr))
+ mod = "INAT_MODRM"
+ }
+ return add_flags(imm, mod)
+}
+
+/^[0-9a-f]+:/ {
+ if (NR == 1)
+ next
+ # get index
+ idx = "0x" substr($1, 1, index($1,":") - 1)
+ if (idx in table)
+ semantic_error("Redefine " idx " in " tname)
+
+ # check if escaped opcode
+ if ("escape" == $2) {
+ if ($3 != "#")
+ semantic_error("No escaped name")
+ ref = ""
+ for (i = 4; i <= NF; i++)
+ ref = ref $i
+ if (ref in escape)
+ semantic_error("Redefine escape (" ref ")")
+ escape[ref] = geid
+ geid++
+ table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
+ next
+ }
+
+ variant = null
+ # converts
+ i = 2
+ while (i <= NF) {
+ opcode = $(i++)
+ delete opnds
+ ext = null
+ flags = null
+ opnd = null
+ # parse one opcode
+ if (match($i, opnd_expr)) {
+ opnd = $i
+ count = split($(i++), opnds, ",")
+ flags = convert_operands(count, opnds)
+ }
+ if (match($i, ext_expr))
+ ext = $(i++)
+ if (match($i, sep_expr))
+ i++
+ else if (i < NF)
+ semantic_error($i " is not a separator")
+
+ # check if group opcode
+ if (match(opcode, group_expr)) {
+ if (!(opcode in group)) {
+ group[opcode] = ggid
+ ggid++
+ }
+ flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
+ }
+ # check force(or default) 64bit
+ if (match(ext, force64_expr))
+ flags = add_flags(flags, "INAT_FORCE64")
+
+ # check REX prefix
+ if (match(opcode, rex_expr))
+ flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
+
+ # check coprocessor escape : TODO
+ if (match(opcode, fpu_expr))
+ flags = add_flags(flags, "INAT_MODRM")
+
+ # check VEX codes
+ if (match(ext, evexonly_expr))
+ flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
+ else if (match(ext, vexonly_expr))
+ flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
+ else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
+ flags = add_flags(flags, "INAT_VEXOK")
+
+ # check prefixes
+ if (match(ext, prefix_expr)) {
+ if (!prefix_num[opcode])
+ semantic_error("Unknown prefix: " opcode)
+ flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
+ }
+ if (length(flags) == 0)
+ continue
+ # check if last prefix
+ if (match(ext, lprefix1_expr)) {
+ lptable1[idx] = add_flags(lptable1[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (match(ext, lprefix2_expr)) {
+ lptable2[idx] = add_flags(lptable2[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (match(ext, lprefix3_expr)) {
+ lptable3[idx] = add_flags(lptable3[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (!match(ext, lprefix_expr)){
+ table[idx] = add_flags(table[idx],flags)
+ }
+ }
+ if (variant)
+ table[idx] = add_flags(table[idx],variant)
+}
+
+END {
+ if (awkchecked != "")
+ exit 1
+ # print escape opcode map's array
+ print "/* Escape opcode map array */"
+ print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < geid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (etable[i,j])
+ print " ["i"]["j"] = "etable[i,j]","
+ print "};\n"
+ # print group opcode map's array
+ print "/* Group opcode map array */"
+ print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < ggid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (gtable[i,j])
+ print " ["i"]["j"] = "gtable[i,j]","
+ print "};\n"
+ # print AVX opcode map's array
+ print "/* AVX opcode map array */"
+ print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < gaid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (atable[i,j])
+ print " ["i"]["j"] = "atable[i,j]","
+ print "};"
+}
+
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
new file mode 100644
index 000000000..694abc628
--- /dev/null
+++ b/tools/objtool/builtin-check.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * objtool check:
+ *
+ * This command analyzes every .o file and ensures the validity of its stack
+ * trace metadata. It enforces a set of rules on asm code and C inline
+ * assembly code so that stack traces can be reliable.
+ *
+ * For more information, see tools/objtool/Documentation/stack-validation.txt.
+ */
+
+#include <subcmd/parse-options.h>
+#include "builtin.h"
+#include "check.h"
+
+bool no_fp, no_unreachable, retpoline, module;
+
+static const char * const check_usage[] = {
+ "objtool check [<options>] file.o",
+ NULL,
+};
+
+const struct option check_options[] = {
+ OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
+ OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
+ OPT_BOOLEAN('r', "retpoline", &retpoline, "Validate retpoline assumptions"),
+ OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
+ OPT_END(),
+};
+
+int cmd_check(int argc, const char **argv)
+{
+ const char *objname;
+
+ argc = parse_options(argc, argv, check_options, check_usage, 0);
+
+ if (argc != 1)
+ usage_with_options(check_usage, check_options);
+
+ objname = argv[0];
+
+ return check(objname, false);
+}
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
new file mode 100644
index 000000000..77ea2b971
--- /dev/null
+++ b/tools/objtool/builtin-orc.c
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * objtool orc:
+ *
+ * This command analyzes a .o file and adds .orc_unwind and .orc_unwind_ip
+ * sections to it, which is used by the in-kernel ORC unwinder.
+ *
+ * This command is a superset of "objtool check".
+ */
+
+#include <string.h>
+#include "builtin.h"
+#include "check.h"
+
+
+static const char *orc_usage[] = {
+ "objtool orc generate [<options>] file.o",
+ "objtool orc dump file.o",
+ NULL,
+};
+
+int cmd_orc(int argc, const char **argv)
+{
+ const char *objname;
+
+ argc--; argv++;
+ if (argc <= 0)
+ usage_with_options(orc_usage, check_options);
+
+ if (!strncmp(argv[0], "gen", 3)) {
+ argc = parse_options(argc, argv, check_options, orc_usage, 0);
+ if (argc != 1)
+ usage_with_options(orc_usage, check_options);
+
+ objname = argv[0];
+
+ return check(objname, true);
+ }
+
+ if (!strcmp(argv[0], "dump")) {
+ if (argc != 2)
+ usage_with_options(orc_usage, check_options);
+
+ objname = argv[1];
+
+ return orc_dump(objname);
+ }
+
+ usage_with_options(orc_usage, check_options);
+
+ return 0;
+}
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
new file mode 100644
index 000000000..28ff40e19
--- /dev/null
+++ b/tools/objtool/builtin.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _BUILTIN_H
+#define _BUILTIN_H
+
+#include <subcmd/parse-options.h>
+
+extern const struct option check_options[];
+extern bool no_fp, no_unreachable, retpoline, module;
+
+extern int cmd_check(int argc, const char **argv);
+extern int cmd_orc(int argc, const char **argv);
+
+#endif /* _BUILTIN_H */
diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h
new file mode 100644
index 000000000..2fe883c66
--- /dev/null
+++ b/tools/objtool/cfi.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _OBJTOOL_CFI_H
+#define _OBJTOOL_CFI_H
+
+#define CFI_UNDEFINED -1
+#define CFI_CFA -2
+#define CFI_SP_INDIRECT -3
+#define CFI_BP_INDIRECT -4
+
+#define CFI_AX 0
+#define CFI_DX 1
+#define CFI_CX 2
+#define CFI_BX 3
+#define CFI_SI 4
+#define CFI_DI 5
+#define CFI_BP 6
+#define CFI_SP 7
+#define CFI_R8 8
+#define CFI_R9 9
+#define CFI_R10 10
+#define CFI_R11 11
+#define CFI_R12 12
+#define CFI_R13 13
+#define CFI_R14 14
+#define CFI_R15 15
+#define CFI_RA 16
+#define CFI_NUM_REGS 17
+
+struct cfi_reg {
+ int base;
+ int offset;
+};
+
+struct cfi_state {
+ struct cfi_reg cfa;
+ struct cfi_reg regs[CFI_NUM_REGS];
+};
+
+#endif /* _OBJTOOL_CFI_H */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
new file mode 100644
index 000000000..c0ab27368
--- /dev/null
+++ b/tools/objtool/check.c
@@ -0,0 +1,2281 @@
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <stdlib.h>
+
+#include "builtin.h"
+#include "check.h"
+#include "elf.h"
+#include "special.h"
+#include "arch.h"
+#include "warn.h"
+
+#include <linux/hashtable.h>
+#include <linux/kernel.h>
+
+#define FAKE_JUMP_OFFSET -1
+
+struct alternative {
+ struct list_head list;
+ struct instruction *insn;
+};
+
+const char *objname;
+struct cfi_state initial_func_cfi;
+
+struct instruction *find_insn(struct objtool_file *file,
+ struct section *sec, unsigned long offset)
+{
+ struct instruction *insn;
+
+ hash_for_each_possible(file->insn_hash, insn, hash, offset)
+ if (insn->sec == sec && insn->offset == offset)
+ return insn;
+
+ return NULL;
+}
+
+static struct instruction *next_insn_same_sec(struct objtool_file *file,
+ struct instruction *insn)
+{
+ struct instruction *next = list_next_entry(insn, list);
+
+ if (!next || &next->list == &file->insn_list || next->sec != insn->sec)
+ return NULL;
+
+ return next;
+}
+
+static struct instruction *next_insn_same_func(struct objtool_file *file,
+ struct instruction *insn)
+{
+ struct instruction *next = list_next_entry(insn, list);
+ struct symbol *func = insn->func;
+
+ if (!func)
+ return NULL;
+
+ if (&next->list != &file->insn_list && next->func == func)
+ return next;
+
+ /* Check if we're already in the subfunction: */
+ if (func == func->cfunc)
+ return NULL;
+
+ /* Move to the subfunction: */
+ return find_insn(file, func->cfunc->sec, func->cfunc->offset);
+}
+
+#define func_for_each_insn_all(file, func, insn) \
+ for (insn = find_insn(file, func->sec, func->offset); \
+ insn; \
+ insn = next_insn_same_func(file, insn))
+
+#define func_for_each_insn(file, func, insn) \
+ for (insn = find_insn(file, func->sec, func->offset); \
+ insn && &insn->list != &file->insn_list && \
+ insn->sec == func->sec && \
+ insn->offset < func->offset + func->len; \
+ insn = list_next_entry(insn, list))
+
+#define func_for_each_insn_continue_reverse(file, func, insn) \
+ for (insn = list_prev_entry(insn, list); \
+ &insn->list != &file->insn_list && \
+ insn->sec == func->sec && insn->offset >= func->offset; \
+ insn = list_prev_entry(insn, list))
+
+#define sec_for_each_insn_from(file, insn) \
+ for (; insn; insn = next_insn_same_sec(file, insn))
+
+#define sec_for_each_insn_continue(file, insn) \
+ for (insn = next_insn_same_sec(file, insn); insn; \
+ insn = next_insn_same_sec(file, insn))
+
+/*
+ * Check if the function has been manually whitelisted with the
+ * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
+ * due to its use of a context switching instruction.
+ */
+static bool ignore_func(struct objtool_file *file, struct symbol *func)
+{
+ struct rela *rela;
+
+ /* check for STACK_FRAME_NON_STANDARD */
+ if (file->whitelist && file->whitelist->rela)
+ list_for_each_entry(rela, &file->whitelist->rela->rela_list, list) {
+ if (rela->sym->type == STT_SECTION &&
+ rela->sym->sec == func->sec &&
+ rela->addend == func->offset)
+ return true;
+ if (rela->sym->type == STT_FUNC && rela->sym == func)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * This checks to see if the given function is a "noreturn" function.
+ *
+ * For global functions which are outside the scope of this object file, we
+ * have to keep a manual list of them.
+ *
+ * For local functions, we have to detect them manually by simply looking for
+ * the lack of a return instruction.
+ *
+ * Returns:
+ * -1: error
+ * 0: no dead end
+ * 1: dead end
+ */
+static int __dead_end_function(struct objtool_file *file, struct symbol *func,
+ int recursion)
+{
+ int i;
+ struct instruction *insn;
+ bool empty = true;
+
+ /*
+ * Unfortunately these have to be hard coded because the noreturn
+ * attribute isn't provided in ELF data.
+ */
+ static const char * const global_noreturns[] = {
+ "__stack_chk_fail",
+ "panic",
+ "do_exit",
+ "do_task_dead",
+ "__module_put_and_exit",
+ "complete_and_exit",
+ "kvm_spurious_fault",
+ "__reiserfs_panic",
+ "lbug_with_loc",
+ "fortify_panic",
+ "usercopy_abort",
+ "machine_real_restart",
+ "rewind_stack_do_exit",
+ };
+
+ if (func->bind == STB_WEAK)
+ return 0;
+
+ if (func->bind == STB_GLOBAL)
+ for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
+ if (!strcmp(func->name, global_noreturns[i]))
+ return 1;
+
+ if (!func->len)
+ return 0;
+
+ insn = find_insn(file, func->sec, func->offset);
+ if (!insn->func)
+ return 0;
+
+ func_for_each_insn_all(file, func, insn) {
+ empty = false;
+
+ if (insn->type == INSN_RETURN)
+ return 0;
+ }
+
+ if (empty)
+ return 0;
+
+ /*
+ * A function can have a sibling call instead of a return. In that
+ * case, the function's dead-end status depends on whether the target
+ * of the sibling call returns.
+ */
+ func_for_each_insn_all(file, func, insn) {
+ if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+ struct instruction *dest = insn->jump_dest;
+
+ if (!dest)
+ /* sibling call to another file */
+ return 0;
+
+ if (dest->func && dest->func->pfunc != insn->func->pfunc) {
+
+ /* local sibling call */
+ if (recursion == 5) {
+ /*
+ * Infinite recursion: two functions
+ * have sibling calls to each other.
+ * This is a very rare case. It means
+ * they aren't dead ends.
+ */
+ return 0;
+ }
+
+ return __dead_end_function(file, dest->func,
+ recursion + 1);
+ }
+ }
+
+ if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
+ /* sibling call */
+ return 0;
+ }
+
+ return 1;
+}
+
+static int dead_end_function(struct objtool_file *file, struct symbol *func)
+{
+ return __dead_end_function(file, func, 0);
+}
+
+static void clear_insn_state(struct insn_state *state)
+{
+ int i;
+
+ memset(state, 0, sizeof(*state));
+ state->cfa.base = CFI_UNDEFINED;
+ for (i = 0; i < CFI_NUM_REGS; i++) {
+ state->regs[i].base = CFI_UNDEFINED;
+ state->vals[i].base = CFI_UNDEFINED;
+ }
+ state->drap_reg = CFI_UNDEFINED;
+ state->drap_offset = -1;
+}
+
+/*
+ * Call the arch-specific instruction decoder for all the instructions and add
+ * them to the global instruction list.
+ */
+static int decode_instructions(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+ unsigned long offset;
+ struct instruction *insn;
+ int ret;
+
+ for_each_sec(file, sec) {
+
+ if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ continue;
+
+ if (strcmp(sec->name, ".altinstr_replacement") &&
+ strcmp(sec->name, ".altinstr_aux") &&
+ strncmp(sec->name, ".discard.", 9))
+ sec->text = true;
+
+ for (offset = 0; offset < sec->len; offset += insn->len) {
+ insn = malloc(sizeof(*insn));
+ if (!insn) {
+ WARN("malloc failed");
+ return -1;
+ }
+ memset(insn, 0, sizeof(*insn));
+ INIT_LIST_HEAD(&insn->alts);
+ clear_insn_state(&insn->state);
+
+ insn->sec = sec;
+ insn->offset = offset;
+
+ ret = arch_decode_instruction(file->elf, sec, offset,
+ sec->len - offset,
+ &insn->len, &insn->type,
+ &insn->immediate,
+ &insn->stack_op);
+ if (ret)
+ goto err;
+
+ if (!insn->type || insn->type > INSN_LAST) {
+ WARN_FUNC("invalid instruction type %d",
+ insn->sec, insn->offset, insn->type);
+ ret = -1;
+ goto err;
+ }
+
+ hash_add(file->insn_hash, &insn->hash, insn->offset);
+ list_add_tail(&insn->list, &file->insn_list);
+ }
+
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC)
+ continue;
+
+ if (!find_insn(file, sec, func->offset)) {
+ WARN("%s(): can't find starting instruction",
+ func->name);
+ return -1;
+ }
+
+ func_for_each_insn(file, func, insn)
+ if (!insn->func)
+ insn->func = func;
+ }
+ }
+
+ return 0;
+
+err:
+ free(insn);
+ return ret;
+}
+
+/*
+ * Mark "ud2" instructions and manually annotated dead ends.
+ */
+static int add_dead_ends(struct objtool_file *file)
+{
+ struct section *sec;
+ struct rela *rela;
+ struct instruction *insn;
+ bool found;
+
+ /*
+ * By default, "ud2" is a dead end unless otherwise annotated, because
+ * GCC 7 inserts it for certain divide-by-zero cases.
+ */
+ for_each_insn(file, insn)
+ if (insn->type == INSN_BUG)
+ insn->dead_end = true;
+
+ /*
+ * Check for manually annotated dead ends.
+ */
+ sec = find_section_by_name(file->elf, ".rela.discard.unreachable");
+ if (!sec)
+ goto reachable;
+
+ list_for_each_entry(rela, &sec->rela_list, list) {
+ if (rela->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s", sec->name);
+ return -1;
+ }
+ insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (insn)
+ insn = list_prev_entry(insn, list);
+ else if (rela->addend == rela->sym->sec->len) {
+ found = false;
+ list_for_each_entry_reverse(insn, &file->insn_list, list) {
+ if (insn->sec == rela->sym->sec) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ WARN("can't find unreachable insn at %s+0x%x",
+ rela->sym->sec->name, rela->addend);
+ return -1;
+ }
+ } else {
+ WARN("can't find unreachable insn at %s+0x%x",
+ rela->sym->sec->name, rela->addend);
+ return -1;
+ }
+
+ insn->dead_end = true;
+ }
+
+reachable:
+ /*
+ * These manually annotated reachable checks are needed for GCC 4.4,
+ * where the Linux unreachable() macro isn't supported. In that case
+ * GCC doesn't know the "ud2" is fatal, so it generates code as if it's
+ * not a dead end.
+ */
+ sec = find_section_by_name(file->elf, ".rela.discard.reachable");
+ if (!sec)
+ return 0;
+
+ list_for_each_entry(rela, &sec->rela_list, list) {
+ if (rela->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s", sec->name);
+ return -1;
+ }
+ insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (insn)
+ insn = list_prev_entry(insn, list);
+ else if (rela->addend == rela->sym->sec->len) {
+ found = false;
+ list_for_each_entry_reverse(insn, &file->insn_list, list) {
+ if (insn->sec == rela->sym->sec) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ WARN("can't find reachable insn at %s+0x%x",
+ rela->sym->sec->name, rela->addend);
+ return -1;
+ }
+ } else {
+ WARN("can't find reachable insn at %s+0x%x",
+ rela->sym->sec->name, rela->addend);
+ return -1;
+ }
+
+ insn->dead_end = false;
+ }
+
+ return 0;
+}
+
+/*
+ * Warnings shouldn't be reported for ignored functions.
+ */
+static void add_ignores(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct section *sec;
+ struct symbol *func;
+
+ for_each_sec(file, sec) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC)
+ continue;
+
+ if (!ignore_func(file, func))
+ continue;
+
+ func_for_each_insn_all(file, func, insn)
+ insn->ignore = true;
+ }
+ }
+}
+
+/*
+ * FIXME: For now, just ignore any alternatives which add retpolines. This is
+ * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
+ * But it at least allows objtool to understand the control flow *around* the
+ * retpoline.
+ */
+static int add_nospec_ignores(struct objtool_file *file)
+{
+ struct section *sec;
+ struct rela *rela;
+ struct instruction *insn;
+
+ sec = find_section_by_name(file->elf, ".rela.discard.nospec");
+ if (!sec)
+ return 0;
+
+ list_for_each_entry(rela, &sec->rela_list, list) {
+ if (rela->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s", sec->name);
+ return -1;
+ }
+
+ insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (!insn) {
+ WARN("bad .discard.nospec entry");
+ return -1;
+ }
+
+ insn->ignore_alts = true;
+ }
+
+ return 0;
+}
+
+/*
+ * Find the destination instructions for all jumps.
+ */
+static int add_jump_destinations(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct rela *rela;
+ struct section *dest_sec;
+ unsigned long dest_off;
+
+ for_each_insn(file, insn) {
+ if (insn->type != INSN_JUMP_CONDITIONAL &&
+ insn->type != INSN_JUMP_UNCONDITIONAL)
+ continue;
+
+ if (insn->offset == FAKE_JUMP_OFFSET)
+ continue;
+
+ rela = find_rela_by_dest_range(insn->sec, insn->offset,
+ insn->len);
+ if (!rela) {
+ dest_sec = insn->sec;
+ dest_off = insn->offset + insn->len + insn->immediate;
+ } else if (rela->sym->type == STT_SECTION) {
+ dest_sec = rela->sym->sec;
+ dest_off = rela->addend + 4;
+ } else if (rela->sym->sec->idx) {
+ dest_sec = rela->sym->sec;
+ dest_off = rela->sym->sym.st_value + rela->addend + 4;
+ } else if (strstr(rela->sym->name, "_indirect_thunk_")) {
+ /*
+ * Retpoline jumps are really dynamic jumps in
+ * disguise, so convert them accordingly.
+ */
+ insn->type = INSN_JUMP_DYNAMIC;
+ insn->retpoline_safe = true;
+ continue;
+ } else {
+ /* sibling call */
+ insn->jump_dest = 0;
+ continue;
+ }
+
+ insn->jump_dest = find_insn(file, dest_sec, dest_off);
+ if (!insn->jump_dest) {
+
+ /*
+ * This is a special case where an alt instruction
+ * jumps past the end of the section. These are
+ * handled later in handle_group_alt().
+ */
+ if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+ continue;
+
+ WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
+ insn->sec, insn->offset, dest_sec->name,
+ dest_off);
+ return -1;
+ }
+
+ /*
+ * For GCC 8+, create parent/child links for any cold
+ * subfunctions. This is _mostly_ redundant with a similar
+ * initialization in read_symbols().
+ *
+ * If a function has aliases, we want the *first* such function
+ * in the symbol table to be the subfunction's parent. In that
+ * case we overwrite the initialization done in read_symbols().
+ *
+ * However this code can't completely replace the
+ * read_symbols() code because this doesn't detect the case
+ * where the parent function's only reference to a subfunction
+ * is through a switch table.
+ */
+ if (insn->func && insn->jump_dest->func &&
+ insn->func != insn->jump_dest->func &&
+ !strstr(insn->func->name, ".cold.") &&
+ strstr(insn->jump_dest->func->name, ".cold.")) {
+ insn->func->cfunc = insn->jump_dest->func;
+ insn->jump_dest->func->pfunc = insn->func;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Find the destination instructions for all calls.
+ */
+static int add_call_destinations(struct objtool_file *file)
+{
+ struct instruction *insn;
+ unsigned long dest_off;
+ struct rela *rela;
+
+ for_each_insn(file, insn) {
+ if (insn->type != INSN_CALL)
+ continue;
+
+ rela = find_rela_by_dest_range(insn->sec, insn->offset,
+ insn->len);
+ if (!rela) {
+ dest_off = insn->offset + insn->len + insn->immediate;
+ insn->call_dest = find_symbol_by_offset(insn->sec,
+ dest_off);
+
+ if (!insn->call_dest && !insn->ignore) {
+ WARN_FUNC("unsupported intra-function call",
+ insn->sec, insn->offset);
+ if (retpoline)
+ WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
+ return -1;
+ }
+
+ } else if (rela->sym->type == STT_SECTION) {
+ insn->call_dest = find_symbol_by_offset(rela->sym->sec,
+ rela->addend+4);
+ if (!insn->call_dest ||
+ insn->call_dest->type != STT_FUNC) {
+ WARN_FUNC("can't find call dest symbol at %s+0x%x",
+ insn->sec, insn->offset,
+ rela->sym->sec->name,
+ rela->addend + 4);
+ return -1;
+ }
+ } else
+ insn->call_dest = rela->sym;
+ }
+
+ return 0;
+}
+
+/*
+ * The .alternatives section requires some extra special care, over and above
+ * what other special sections require:
+ *
+ * 1. Because alternatives are patched in-place, we need to insert a fake jump
+ * instruction at the end so that validate_branch() skips all the original
+ * replaced instructions when validating the new instruction path.
+ *
+ * 2. An added wrinkle is that the new instruction length might be zero. In
+ * that case the old instructions are replaced with noops. We simulate that
+ * by creating a fake jump as the only new instruction.
+ *
+ * 3. In some cases, the alternative section includes an instruction which
+ * conditionally jumps to the _end_ of the entry. We have to modify these
+ * jumps' destinations to point back to .text rather than the end of the
+ * entry in .altinstr_replacement.
+ *
+ * 4. It has been requested that we don't validate the !POPCNT feature path
+ * which is a "very very small percentage of machines".
+ */
+static int handle_group_alt(struct objtool_file *file,
+ struct special_alt *special_alt,
+ struct instruction *orig_insn,
+ struct instruction **new_insn)
+{
+ struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
+ unsigned long dest_off;
+
+ last_orig_insn = NULL;
+ insn = orig_insn;
+ sec_for_each_insn_from(file, insn) {
+ if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
+ break;
+
+ if (special_alt->skip_orig)
+ insn->type = INSN_NOP;
+
+ insn->alt_group = true;
+ last_orig_insn = insn;
+ }
+
+ if (next_insn_same_sec(file, last_orig_insn)) {
+ fake_jump = malloc(sizeof(*fake_jump));
+ if (!fake_jump) {
+ WARN("malloc failed");
+ return -1;
+ }
+ memset(fake_jump, 0, sizeof(*fake_jump));
+ INIT_LIST_HEAD(&fake_jump->alts);
+ clear_insn_state(&fake_jump->state);
+
+ fake_jump->sec = special_alt->new_sec;
+ fake_jump->offset = FAKE_JUMP_OFFSET;
+ fake_jump->type = INSN_JUMP_UNCONDITIONAL;
+ fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
+ fake_jump->func = orig_insn->func;
+ }
+
+ if (!special_alt->new_len) {
+ if (!fake_jump) {
+ WARN("%s: empty alternative at end of section",
+ special_alt->orig_sec->name);
+ return -1;
+ }
+
+ *new_insn = fake_jump;
+ return 0;
+ }
+
+ last_new_insn = NULL;
+ insn = *new_insn;
+ sec_for_each_insn_from(file, insn) {
+ if (insn->offset >= special_alt->new_off + special_alt->new_len)
+ break;
+
+ last_new_insn = insn;
+
+ insn->ignore = orig_insn->ignore_alts;
+
+ if (insn->type != INSN_JUMP_CONDITIONAL &&
+ insn->type != INSN_JUMP_UNCONDITIONAL)
+ continue;
+
+ if (!insn->immediate)
+ continue;
+
+ dest_off = insn->offset + insn->len + insn->immediate;
+ if (dest_off == special_alt->new_off + special_alt->new_len) {
+ if (!fake_jump) {
+ WARN("%s: alternative jump to end of section",
+ special_alt->orig_sec->name);
+ return -1;
+ }
+ insn->jump_dest = fake_jump;
+ }
+
+ if (!insn->jump_dest) {
+ WARN_FUNC("can't find alternative jump destination",
+ insn->sec, insn->offset);
+ return -1;
+ }
+ }
+
+ if (!last_new_insn) {
+ WARN_FUNC("can't find last new alternative instruction",
+ special_alt->new_sec, special_alt->new_off);
+ return -1;
+ }
+
+ if (fake_jump)
+ list_add(&fake_jump->list, &last_new_insn->list);
+
+ return 0;
+}
+
+/*
+ * A jump table entry can either convert a nop to a jump or a jump to a nop.
+ * If the original instruction is a jump, make the alt entry an effective nop
+ * by just skipping the original instruction.
+ */
+static int handle_jump_alt(struct objtool_file *file,
+ struct special_alt *special_alt,
+ struct instruction *orig_insn,
+ struct instruction **new_insn)
+{
+ if (orig_insn->type == INSN_NOP)
+ return 0;
+
+ if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
+ WARN_FUNC("unsupported instruction at jump label",
+ orig_insn->sec, orig_insn->offset);
+ return -1;
+ }
+
+ *new_insn = list_next_entry(orig_insn, list);
+ return 0;
+}
+
+/*
+ * Read all the special sections which have alternate instructions which can be
+ * patched in or redirected to at runtime. Each instruction having alternate
+ * instruction(s) has them added to its insn->alts list, which will be
+ * traversed in validate_branch().
+ */
+static int add_special_section_alts(struct objtool_file *file)
+{
+ struct list_head special_alts;
+ struct instruction *orig_insn, *new_insn;
+ struct special_alt *special_alt, *tmp;
+ struct alternative *alt;
+ int ret;
+
+ ret = special_get_alts(file->elf, &special_alts);
+ if (ret)
+ return ret;
+
+ list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
+
+ orig_insn = find_insn(file, special_alt->orig_sec,
+ special_alt->orig_off);
+ if (!orig_insn) {
+ WARN_FUNC("special: can't find orig instruction",
+ special_alt->orig_sec, special_alt->orig_off);
+ ret = -1;
+ goto out;
+ }
+
+ new_insn = NULL;
+ if (!special_alt->group || special_alt->new_len) {
+ new_insn = find_insn(file, special_alt->new_sec,
+ special_alt->new_off);
+ if (!new_insn) {
+ WARN_FUNC("special: can't find new instruction",
+ special_alt->new_sec,
+ special_alt->new_off);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (special_alt->group) {
+ if (!special_alt->orig_len) {
+ WARN_FUNC("empty alternative entry",
+ orig_insn->sec, orig_insn->offset);
+ continue;
+ }
+
+ ret = handle_group_alt(file, special_alt, orig_insn,
+ &new_insn);
+ if (ret)
+ goto out;
+ } else if (special_alt->jump_or_nop) {
+ ret = handle_jump_alt(file, special_alt, orig_insn,
+ &new_insn);
+ if (ret)
+ goto out;
+ }
+
+ alt = malloc(sizeof(*alt));
+ if (!alt) {
+ WARN("malloc failed");
+ ret = -1;
+ goto out;
+ }
+
+ alt->insn = new_insn;
+ list_add_tail(&alt->list, &orig_insn->alts);
+
+ list_del(&special_alt->list);
+ free(special_alt);
+ }
+
+out:
+ return ret;
+}
+
+static int add_switch_table(struct objtool_file *file, struct instruction *insn,
+ struct rela *table, struct rela *next_table)
+{
+ struct rela *rela = table;
+ struct instruction *alt_insn;
+ struct alternative *alt;
+ struct symbol *pfunc = insn->func->pfunc;
+ unsigned int prev_offset = 0;
+
+ list_for_each_entry_from(rela, &table->rela_sec->rela_list, list) {
+ if (rela == next_table)
+ break;
+
+ /* Make sure the switch table entries are consecutive: */
+ if (prev_offset && rela->offset != prev_offset + 8)
+ break;
+
+ /* Detect function pointers from contiguous objects: */
+ if (rela->sym->sec == pfunc->sec &&
+ rela->addend == pfunc->offset)
+ break;
+
+ alt_insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (!alt_insn)
+ break;
+
+ /* Make sure the jmp dest is in the function or subfunction: */
+ if (alt_insn->func->pfunc != pfunc)
+ break;
+
+ alt = malloc(sizeof(*alt));
+ if (!alt) {
+ WARN("malloc failed");
+ return -1;
+ }
+
+ alt->insn = alt_insn;
+ list_add_tail(&alt->list, &insn->alts);
+ prev_offset = rela->offset;
+ }
+
+ if (!prev_offset) {
+ WARN_FUNC("can't find switch jump table",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * find_switch_table() - Given a dynamic jump, find the switch jump table in
+ * .rodata associated with it.
+ *
+ * There are 3 basic patterns:
+ *
+ * 1. jmpq *[rodata addr](,%reg,8)
+ *
+ * This is the most common case by far. It jumps to an address in a simple
+ * jump table which is stored in .rodata.
+ *
+ * 2. jmpq *[rodata addr](%rip)
+ *
+ * This is caused by a rare GCC quirk, currently only seen in three driver
+ * functions in the kernel, only with certain obscure non-distro configs.
+ *
+ * As part of an optimization, GCC makes a copy of an existing switch jump
+ * table, modifies it, and then hard-codes the jump (albeit with an indirect
+ * jump) to use a single entry in the table. The rest of the jump table and
+ * some of its jump targets remain as dead code.
+ *
+ * In such a case we can just crudely ignore all unreachable instruction
+ * warnings for the entire object file. Ideally we would just ignore them
+ * for the function, but that would require redesigning the code quite a
+ * bit. And honestly that's just not worth doing: unreachable instruction
+ * warnings are of questionable value anyway, and this is such a rare issue.
+ *
+ * 3. mov [rodata addr],%reg1
+ * ... some instructions ...
+ * jmpq *(%reg1,%reg2,8)
+ *
+ * This is a fairly uncommon pattern which is new for GCC 6. As of this
+ * writing, there are 11 occurrences of it in the allmodconfig kernel.
+ *
+ * As of GCC 7 there are quite a few more of these and the 'in between' code
+ * is significant. Esp. with KASAN enabled some of the code between the mov
+ * and jmpq uses .rodata itself, which can confuse things.
+ *
+ * TODO: Once we have DWARF CFI and smarter instruction decoding logic,
+ * ensure the same register is used in the mov and jump instructions.
+ *
+ * NOTE: RETPOLINE made it harder still to decode dynamic jumps.
+ */
+static struct rela *find_switch_table(struct objtool_file *file,
+ struct symbol *func,
+ struct instruction *insn)
+{
+ struct rela *text_rela, *rodata_rela;
+ struct instruction *orig_insn = insn;
+ struct section *rodata_sec;
+ unsigned long table_offset;
+
+ /*
+ * Backward search using the @first_jump_src links, these help avoid
+ * much of the 'in between' code. Which avoids us getting confused by
+ * it.
+ */
+ for (;
+ &insn->list != &file->insn_list && insn->func && insn->func->pfunc == func;
+ insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
+
+ if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
+ break;
+
+ /* allow small jumps within the range */
+ if (insn->type == INSN_JUMP_UNCONDITIONAL &&
+ insn->jump_dest &&
+ (insn->jump_dest->offset <= insn->offset ||
+ insn->jump_dest->offset > orig_insn->offset))
+ break;
+
+ /* look for a relocation which references .rodata */
+ text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
+ insn->len);
+ if (!text_rela || text_rela->sym->type != STT_SECTION ||
+ !text_rela->sym->sec->rodata)
+ continue;
+
+ table_offset = text_rela->addend;
+ rodata_sec = text_rela->sym->sec;
+
+ if (text_rela->type == R_X86_64_PC32)
+ table_offset += 4;
+
+ /*
+ * Make sure the .rodata address isn't associated with a
+ * symbol. gcc jump tables are anonymous data.
+ */
+ if (find_symbol_containing(rodata_sec, table_offset))
+ continue;
+
+ rodata_rela = find_rela_by_dest(rodata_sec, table_offset);
+ if (rodata_rela) {
+ /*
+ * Use of RIP-relative switch jumps is quite rare, and
+ * indicates a rare GCC quirk/bug which can leave dead
+ * code behind.
+ */
+ if (text_rela->type == R_X86_64_PC32)
+ file->ignore_unreachables = true;
+
+ return rodata_rela;
+ }
+ }
+
+ return NULL;
+}
+
+
+static int add_func_switch_tables(struct objtool_file *file,
+ struct symbol *func)
+{
+ struct instruction *insn, *last = NULL, *prev_jump = NULL;
+ struct rela *rela, *prev_rela = NULL;
+ int ret;
+
+ func_for_each_insn_all(file, func, insn) {
+ if (!last)
+ last = insn;
+
+ /*
+ * Store back-pointers for unconditional forward jumps such
+ * that find_switch_table() can back-track using those and
+ * avoid some potentially confusing code.
+ */
+ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest &&
+ insn->offset > last->offset &&
+ insn->jump_dest->offset > insn->offset &&
+ !insn->jump_dest->first_jump_src) {
+
+ insn->jump_dest->first_jump_src = insn;
+ last = insn->jump_dest;
+ }
+
+ if (insn->type != INSN_JUMP_DYNAMIC)
+ continue;
+
+ rela = find_switch_table(file, func, insn);
+ if (!rela)
+ continue;
+
+ /*
+ * We found a switch table, but we don't know yet how big it
+ * is. Don't add it until we reach the end of the function or
+ * the beginning of another switch table in the same function.
+ */
+ if (prev_jump) {
+ ret = add_switch_table(file, prev_jump, prev_rela, rela);
+ if (ret)
+ return ret;
+ }
+
+ prev_jump = insn;
+ prev_rela = rela;
+ }
+
+ if (prev_jump) {
+ ret = add_switch_table(file, prev_jump, prev_rela, NULL);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * For some switch statements, gcc generates a jump table in the .rodata
+ * section which contains a list of addresses within the function to jump to.
+ * This finds these jump tables and adds them to the insn->alts lists.
+ */
+static int add_switch_table_alts(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+ int ret;
+
+ if (!file->rodata)
+ return 0;
+
+ for_each_sec(file, sec) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC)
+ continue;
+
+ ret = add_func_switch_tables(file, func);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int read_unwind_hints(struct objtool_file *file)
+{
+ struct section *sec, *relasec;
+ struct rela *rela;
+ struct unwind_hint *hint;
+ struct instruction *insn;
+ struct cfi_reg *cfa;
+ int i;
+
+ sec = find_section_by_name(file->elf, ".discard.unwind_hints");
+ if (!sec)
+ return 0;
+
+ relasec = sec->rela;
+ if (!relasec) {
+ WARN("missing .rela.discard.unwind_hints section");
+ return -1;
+ }
+
+ if (sec->len % sizeof(struct unwind_hint)) {
+ WARN("struct unwind_hint size mismatch");
+ return -1;
+ }
+
+ file->hints = true;
+
+ for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) {
+ hint = (struct unwind_hint *)sec->data->d_buf + i;
+
+ rela = find_rela_by_dest(sec, i * sizeof(*hint));
+ if (!rela) {
+ WARN("can't find rela for unwind_hints[%d]", i);
+ return -1;
+ }
+
+ insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (!insn) {
+ WARN("can't find insn for unwind_hints[%d]", i);
+ return -1;
+ }
+
+ cfa = &insn->state.cfa;
+
+ if (hint->type == UNWIND_HINT_TYPE_SAVE) {
+ insn->save = true;
+ continue;
+
+ } else if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
+ insn->restore = true;
+ insn->hint = true;
+ continue;
+ }
+
+ insn->hint = true;
+
+ switch (hint->sp_reg) {
+ case ORC_REG_UNDEFINED:
+ cfa->base = CFI_UNDEFINED;
+ break;
+ case ORC_REG_SP:
+ cfa->base = CFI_SP;
+ break;
+ case ORC_REG_BP:
+ cfa->base = CFI_BP;
+ break;
+ case ORC_REG_SP_INDIRECT:
+ cfa->base = CFI_SP_INDIRECT;
+ break;
+ case ORC_REG_R10:
+ cfa->base = CFI_R10;
+ break;
+ case ORC_REG_R13:
+ cfa->base = CFI_R13;
+ break;
+ case ORC_REG_DI:
+ cfa->base = CFI_DI;
+ break;
+ case ORC_REG_DX:
+ cfa->base = CFI_DX;
+ break;
+ default:
+ WARN_FUNC("unsupported unwind_hint sp base reg %d",
+ insn->sec, insn->offset, hint->sp_reg);
+ return -1;
+ }
+
+ cfa->offset = hint->sp_offset;
+ insn->state.type = hint->type;
+ insn->state.end = hint->end;
+ }
+
+ return 0;
+}
+
+static int read_retpoline_hints(struct objtool_file *file)
+{
+ struct section *sec;
+ struct instruction *insn;
+ struct rela *rela;
+
+ sec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
+ if (!sec)
+ return 0;
+
+ list_for_each_entry(rela, &sec->rela_list, list) {
+ if (rela->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s", sec->name);
+ return -1;
+ }
+
+ insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (!insn) {
+ WARN("bad .discard.retpoline_safe entry");
+ return -1;
+ }
+
+ if (insn->type != INSN_JUMP_DYNAMIC &&
+ insn->type != INSN_CALL_DYNAMIC) {
+ WARN_FUNC("retpoline_safe hint not an indirect jump/call",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ insn->retpoline_safe = true;
+ }
+
+ return 0;
+}
+
+static void mark_rodata(struct objtool_file *file)
+{
+ struct section *sec;
+ bool found = false;
+
+ /*
+ * This searches for the .rodata section or multiple .rodata.func_name
+ * sections if -fdata-sections is being used. The .str.1.1 and .str.1.8
+ * rodata sections are ignored as they don't contain jump tables.
+ */
+ for_each_sec(file, sec) {
+ if (!strncmp(sec->name, ".rodata", 7) &&
+ !strstr(sec->name, ".str1.")) {
+ sec->rodata = true;
+ found = true;
+ }
+ }
+
+ file->rodata = found;
+}
+
+static int decode_sections(struct objtool_file *file)
+{
+ int ret;
+
+ mark_rodata(file);
+
+ ret = decode_instructions(file);
+ if (ret)
+ return ret;
+
+ ret = add_dead_ends(file);
+ if (ret)
+ return ret;
+
+ add_ignores(file);
+
+ ret = add_nospec_ignores(file);
+ if (ret)
+ return ret;
+
+ ret = add_jump_destinations(file);
+ if (ret)
+ return ret;
+
+ ret = add_special_section_alts(file);
+ if (ret)
+ return ret;
+
+ ret = add_call_destinations(file);
+ if (ret)
+ return ret;
+
+ ret = add_switch_table_alts(file);
+ if (ret)
+ return ret;
+
+ ret = read_unwind_hints(file);
+ if (ret)
+ return ret;
+
+ ret = read_retpoline_hints(file);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static bool is_fentry_call(struct instruction *insn)
+{
+ if (insn->type == INSN_CALL &&
+ insn->call_dest->type == STT_NOTYPE &&
+ !strcmp(insn->call_dest->name, "__fentry__"))
+ return true;
+
+ return false;
+}
+
+static bool has_modified_stack_frame(struct insn_state *state)
+{
+ int i;
+
+ if (state->cfa.base != initial_func_cfi.cfa.base ||
+ state->cfa.offset != initial_func_cfi.cfa.offset ||
+ state->stack_size != initial_func_cfi.cfa.offset ||
+ state->drap)
+ return true;
+
+ for (i = 0; i < CFI_NUM_REGS; i++)
+ if (state->regs[i].base != initial_func_cfi.regs[i].base ||
+ state->regs[i].offset != initial_func_cfi.regs[i].offset)
+ return true;
+
+ return false;
+}
+
+static bool has_valid_stack_frame(struct insn_state *state)
+{
+ if (state->cfa.base == CFI_BP && state->regs[CFI_BP].base == CFI_CFA &&
+ state->regs[CFI_BP].offset == -16)
+ return true;
+
+ if (state->drap && state->regs[CFI_BP].base == CFI_BP)
+ return true;
+
+ return false;
+}
+
+static int update_insn_state_regs(struct instruction *insn, struct insn_state *state)
+{
+ struct cfi_reg *cfa = &state->cfa;
+ struct stack_op *op = &insn->stack_op;
+
+ if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT)
+ return 0;
+
+ /* push */
+ if (op->dest.type == OP_DEST_PUSH)
+ cfa->offset += 8;
+
+ /* pop */
+ if (op->src.type == OP_SRC_POP)
+ cfa->offset -= 8;
+
+ /* add immediate to sp */
+ if (op->dest.type == OP_DEST_REG && op->src.type == OP_SRC_ADD &&
+ op->dest.reg == CFI_SP && op->src.reg == CFI_SP)
+ cfa->offset -= op->src.offset;
+
+ return 0;
+}
+
+static void save_reg(struct insn_state *state, unsigned char reg, int base,
+ int offset)
+{
+ if (arch_callee_saved_reg(reg) &&
+ state->regs[reg].base == CFI_UNDEFINED) {
+ state->regs[reg].base = base;
+ state->regs[reg].offset = offset;
+ }
+}
+
+static void restore_reg(struct insn_state *state, unsigned char reg)
+{
+ state->regs[reg].base = CFI_UNDEFINED;
+ state->regs[reg].offset = 0;
+}
+
+/*
+ * A note about DRAP stack alignment:
+ *
+ * GCC has the concept of a DRAP register, which is used to help keep track of
+ * the stack pointer when aligning the stack. r10 or r13 is used as the DRAP
+ * register. The typical DRAP pattern is:
+ *
+ * 4c 8d 54 24 08 lea 0x8(%rsp),%r10
+ * 48 83 e4 c0 and $0xffffffffffffffc0,%rsp
+ * 41 ff 72 f8 pushq -0x8(%r10)
+ * 55 push %rbp
+ * 48 89 e5 mov %rsp,%rbp
+ * (more pushes)
+ * 41 52 push %r10
+ * ...
+ * 41 5a pop %r10
+ * (more pops)
+ * 5d pop %rbp
+ * 49 8d 62 f8 lea -0x8(%r10),%rsp
+ * c3 retq
+ *
+ * There are some variations in the epilogues, like:
+ *
+ * 5b pop %rbx
+ * 41 5a pop %r10
+ * 41 5c pop %r12
+ * 41 5d pop %r13
+ * 41 5e pop %r14
+ * c9 leaveq
+ * 49 8d 62 f8 lea -0x8(%r10),%rsp
+ * c3 retq
+ *
+ * and:
+ *
+ * 4c 8b 55 e8 mov -0x18(%rbp),%r10
+ * 48 8b 5d e0 mov -0x20(%rbp),%rbx
+ * 4c 8b 65 f0 mov -0x10(%rbp),%r12
+ * 4c 8b 6d f8 mov -0x8(%rbp),%r13
+ * c9 leaveq
+ * 49 8d 62 f8 lea -0x8(%r10),%rsp
+ * c3 retq
+ *
+ * Sometimes r13 is used as the DRAP register, in which case it's saved and
+ * restored beforehand:
+ *
+ * 41 55 push %r13
+ * 4c 8d 6c 24 10 lea 0x10(%rsp),%r13
+ * 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
+ * ...
+ * 49 8d 65 f0 lea -0x10(%r13),%rsp
+ * 41 5d pop %r13
+ * c3 retq
+ */
+static int update_insn_state(struct instruction *insn, struct insn_state *state)
+{
+ struct stack_op *op = &insn->stack_op;
+ struct cfi_reg *cfa = &state->cfa;
+ struct cfi_reg *regs = state->regs;
+
+ /* stack operations don't make sense with an undefined CFA */
+ if (cfa->base == CFI_UNDEFINED) {
+ if (insn->func) {
+ WARN_FUNC("undefined stack state", insn->sec, insn->offset);
+ return -1;
+ }
+ return 0;
+ }
+
+ if (state->type == ORC_TYPE_REGS || state->type == ORC_TYPE_REGS_IRET)
+ return update_insn_state_regs(insn, state);
+
+ switch (op->dest.type) {
+
+ case OP_DEST_REG:
+ switch (op->src.type) {
+
+ case OP_SRC_REG:
+ if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
+ cfa->base == CFI_SP &&
+ regs[CFI_BP].base == CFI_CFA &&
+ regs[CFI_BP].offset == -cfa->offset) {
+
+ /* mov %rsp, %rbp */
+ cfa->base = op->dest.reg;
+ state->bp_scratch = false;
+ }
+
+ else if (op->src.reg == CFI_SP &&
+ op->dest.reg == CFI_BP && state->drap) {
+
+ /* drap: mov %rsp, %rbp */
+ regs[CFI_BP].base = CFI_BP;
+ regs[CFI_BP].offset = -state->stack_size;
+ state->bp_scratch = false;
+ }
+
+ else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
+
+ /*
+ * mov %rsp, %reg
+ *
+ * This is needed for the rare case where GCC
+ * does:
+ *
+ * mov %rsp, %rax
+ * ...
+ * mov %rax, %rsp
+ */
+ state->vals[op->dest.reg].base = CFI_CFA;
+ state->vals[op->dest.reg].offset = -state->stack_size;
+ }
+
+ else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP &&
+ cfa->base == CFI_BP) {
+
+ /*
+ * mov %rbp, %rsp
+ *
+ * Restore the original stack pointer (Clang).
+ */
+ state->stack_size = -state->regs[CFI_BP].offset;
+ }
+
+ else if (op->dest.reg == cfa->base) {
+
+ /* mov %reg, %rsp */
+ if (cfa->base == CFI_SP &&
+ state->vals[op->src.reg].base == CFI_CFA) {
+
+ /*
+ * This is needed for the rare case
+ * where GCC does something dumb like:
+ *
+ * lea 0x8(%rsp), %rcx
+ * ...
+ * mov %rcx, %rsp
+ */
+ cfa->offset = -state->vals[op->src.reg].offset;
+ state->stack_size = cfa->offset;
+
+ } else {
+ cfa->base = CFI_UNDEFINED;
+ cfa->offset = 0;
+ }
+ }
+
+ break;
+
+ case OP_SRC_ADD:
+ if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) {
+
+ /* add imm, %rsp */
+ state->stack_size -= op->src.offset;
+ if (cfa->base == CFI_SP)
+ cfa->offset -= op->src.offset;
+ break;
+ }
+
+ if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) {
+
+ /* lea disp(%rbp), %rsp */
+ state->stack_size = -(op->src.offset + regs[CFI_BP].offset);
+ break;
+ }
+
+ if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
+
+ /* drap: lea disp(%rsp), %drap */
+ state->drap_reg = op->dest.reg;
+
+ /*
+ * lea disp(%rsp), %reg
+ *
+ * This is needed for the rare case where GCC
+ * does something dumb like:
+ *
+ * lea 0x8(%rsp), %rcx
+ * ...
+ * mov %rcx, %rsp
+ */
+ state->vals[op->dest.reg].base = CFI_CFA;
+ state->vals[op->dest.reg].offset = \
+ -state->stack_size + op->src.offset;
+
+ break;
+ }
+
+ if (state->drap && op->dest.reg == CFI_SP &&
+ op->src.reg == state->drap_reg) {
+
+ /* drap: lea disp(%drap), %rsp */
+ cfa->base = CFI_SP;
+ cfa->offset = state->stack_size = -op->src.offset;
+ state->drap_reg = CFI_UNDEFINED;
+ state->drap = false;
+ break;
+ }
+
+ if (op->dest.reg == state->cfa.base) {
+ WARN_FUNC("unsupported stack register modification",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ break;
+
+ case OP_SRC_AND:
+ if (op->dest.reg != CFI_SP ||
+ (state->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) ||
+ (state->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) {
+ WARN_FUNC("unsupported stack pointer realignment",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ if (state->drap_reg != CFI_UNDEFINED) {
+ /* drap: and imm, %rsp */
+ cfa->base = state->drap_reg;
+ cfa->offset = state->stack_size = 0;
+ state->drap = true;
+ }
+
+ /*
+ * Older versions of GCC (4.8ish) realign the stack
+ * without DRAP, with a frame pointer.
+ */
+
+ break;
+
+ case OP_SRC_POP:
+ if (!state->drap && op->dest.type == OP_DEST_REG &&
+ op->dest.reg == cfa->base) {
+
+ /* pop %rbp */
+ cfa->base = CFI_SP;
+ }
+
+ if (state->drap && cfa->base == CFI_BP_INDIRECT &&
+ op->dest.type == OP_DEST_REG &&
+ op->dest.reg == state->drap_reg &&
+ state->drap_offset == -state->stack_size) {
+
+ /* drap: pop %drap */
+ cfa->base = state->drap_reg;
+ cfa->offset = 0;
+ state->drap_offset = -1;
+
+ } else if (regs[op->dest.reg].offset == -state->stack_size) {
+
+ /* pop %reg */
+ restore_reg(state, op->dest.reg);
+ }
+
+ state->stack_size -= 8;
+ if (cfa->base == CFI_SP)
+ cfa->offset -= 8;
+
+ break;
+
+ case OP_SRC_REG_INDIRECT:
+ if (state->drap && op->src.reg == CFI_BP &&
+ op->src.offset == state->drap_offset) {
+
+ /* drap: mov disp(%rbp), %drap */
+ cfa->base = state->drap_reg;
+ cfa->offset = 0;
+ state->drap_offset = -1;
+ }
+
+ if (state->drap && op->src.reg == CFI_BP &&
+ op->src.offset == regs[op->dest.reg].offset) {
+
+ /* drap: mov disp(%rbp), %reg */
+ restore_reg(state, op->dest.reg);
+
+ } else if (op->src.reg == cfa->base &&
+ op->src.offset == regs[op->dest.reg].offset + cfa->offset) {
+
+ /* mov disp(%rbp), %reg */
+ /* mov disp(%rsp), %reg */
+ restore_reg(state, op->dest.reg);
+ }
+
+ break;
+
+ default:
+ WARN_FUNC("unknown stack-related instruction",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ break;
+
+ case OP_DEST_PUSH:
+ state->stack_size += 8;
+ if (cfa->base == CFI_SP)
+ cfa->offset += 8;
+
+ if (op->src.type != OP_SRC_REG)
+ break;
+
+ if (state->drap) {
+ if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+
+ /* drap: push %drap */
+ cfa->base = CFI_BP_INDIRECT;
+ cfa->offset = -state->stack_size;
+
+ /* save drap so we know when to restore it */
+ state->drap_offset = -state->stack_size;
+
+ } else if (op->src.reg == CFI_BP && cfa->base == state->drap_reg) {
+
+ /* drap: push %rbp */
+ state->stack_size = 0;
+
+ } else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+
+ /* drap: push %reg */
+ save_reg(state, op->src.reg, CFI_BP, -state->stack_size);
+ }
+
+ } else {
+
+ /* push %reg */
+ save_reg(state, op->src.reg, CFI_CFA, -state->stack_size);
+ }
+
+ /* detect when asm code uses rbp as a scratch register */
+ if (!no_fp && insn->func && op->src.reg == CFI_BP &&
+ cfa->base != CFI_BP)
+ state->bp_scratch = true;
+ break;
+
+ case OP_DEST_REG_INDIRECT:
+
+ if (state->drap) {
+ if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+
+ /* drap: mov %drap, disp(%rbp) */
+ cfa->base = CFI_BP_INDIRECT;
+ cfa->offset = op->dest.offset;
+
+ /* save drap offset so we know when to restore it */
+ state->drap_offset = op->dest.offset;
+ }
+
+ else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+
+ /* drap: mov reg, disp(%rbp) */
+ save_reg(state, op->src.reg, CFI_BP, op->dest.offset);
+ }
+
+ } else if (op->dest.reg == cfa->base) {
+
+ /* mov reg, disp(%rbp) */
+ /* mov reg, disp(%rsp) */
+ save_reg(state, op->src.reg, CFI_CFA,
+ op->dest.offset - state->cfa.offset);
+ }
+
+ break;
+
+ case OP_DEST_LEAVE:
+ if ((!state->drap && cfa->base != CFI_BP) ||
+ (state->drap && cfa->base != state->drap_reg)) {
+ WARN_FUNC("leave instruction with modified stack frame",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ /* leave (mov %rbp, %rsp; pop %rbp) */
+
+ state->stack_size = -state->regs[CFI_BP].offset - 8;
+ restore_reg(state, CFI_BP);
+
+ if (!state->drap) {
+ cfa->base = CFI_SP;
+ cfa->offset -= 8;
+ }
+
+ break;
+
+ case OP_DEST_MEM:
+ if (op->src.type != OP_SRC_POP) {
+ WARN_FUNC("unknown stack-related memory operation",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ /* pop mem */
+ state->stack_size -= 8;
+ if (cfa->base == CFI_SP)
+ cfa->offset -= 8;
+
+ break;
+
+ default:
+ WARN_FUNC("unknown stack-related instruction",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ return 0;
+}
+
+static bool insn_state_match(struct instruction *insn, struct insn_state *state)
+{
+ struct insn_state *state1 = &insn->state, *state2 = state;
+ int i;
+
+ if (memcmp(&state1->cfa, &state2->cfa, sizeof(state1->cfa))) {
+ WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
+ insn->sec, insn->offset,
+ state1->cfa.base, state1->cfa.offset,
+ state2->cfa.base, state2->cfa.offset);
+
+ } else if (memcmp(&state1->regs, &state2->regs, sizeof(state1->regs))) {
+ for (i = 0; i < CFI_NUM_REGS; i++) {
+ if (!memcmp(&state1->regs[i], &state2->regs[i],
+ sizeof(struct cfi_reg)))
+ continue;
+
+ WARN_FUNC("stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d",
+ insn->sec, insn->offset,
+ i, state1->regs[i].base, state1->regs[i].offset,
+ i, state2->regs[i].base, state2->regs[i].offset);
+ break;
+ }
+
+ } else if (state1->type != state2->type) {
+ WARN_FUNC("stack state mismatch: type1=%d type2=%d",
+ insn->sec, insn->offset, state1->type, state2->type);
+
+ } else if (state1->drap != state2->drap ||
+ (state1->drap && state1->drap_reg != state2->drap_reg) ||
+ (state1->drap && state1->drap_offset != state2->drap_offset)) {
+ WARN_FUNC("stack state mismatch: drap1=%d(%d,%d) drap2=%d(%d,%d)",
+ insn->sec, insn->offset,
+ state1->drap, state1->drap_reg, state1->drap_offset,
+ state2->drap, state2->drap_reg, state2->drap_offset);
+
+ } else
+ return true;
+
+ return false;
+}
+
+/*
+ * Follow the branch starting at the given instruction, and recursively follow
+ * any other branches (jumps). Meanwhile, track the frame pointer state at
+ * each instruction and validate all the rules described in
+ * tools/objtool/Documentation/stack-validation.txt.
+ */
+static int validate_branch(struct objtool_file *file, struct instruction *first,
+ struct insn_state state)
+{
+ struct alternative *alt;
+ struct instruction *insn, *next_insn;
+ struct section *sec;
+ struct symbol *func = NULL;
+ int ret;
+
+ insn = first;
+ sec = insn->sec;
+
+ if (insn->alt_group && list_empty(&insn->alts)) {
+ WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
+ sec, insn->offset);
+ return 1;
+ }
+
+ while (1) {
+ next_insn = next_insn_same_sec(file, insn);
+
+ if (file->c_file && func && insn->func && func != insn->func->pfunc) {
+ WARN("%s() falls through to next function %s()",
+ func->name, insn->func->name);
+ return 1;
+ }
+
+ if (insn->func)
+ func = insn->func->pfunc;
+
+ if (func && insn->ignore) {
+ WARN_FUNC("BUG: why am I validating an ignored function?",
+ sec, insn->offset);
+ return 1;
+ }
+
+ if (insn->visited) {
+ if (!insn->hint && !insn_state_match(insn, &state))
+ return 1;
+
+ return 0;
+ }
+
+ if (insn->hint) {
+ if (insn->restore) {
+ struct instruction *save_insn, *i;
+
+ i = insn;
+ save_insn = NULL;
+ func_for_each_insn_continue_reverse(file, insn->func, i) {
+ if (i->save) {
+ save_insn = i;
+ break;
+ }
+ }
+
+ if (!save_insn) {
+ WARN_FUNC("no corresponding CFI save for CFI restore",
+ sec, insn->offset);
+ return 1;
+ }
+
+ if (!save_insn->visited) {
+ /*
+ * Oops, no state to copy yet.
+ * Hopefully we can reach this
+ * instruction from another branch
+ * after the save insn has been
+ * visited.
+ */
+ if (insn == first)
+ return 0;
+
+ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
+ sec, insn->offset);
+ return 1;
+ }
+
+ insn->state = save_insn->state;
+ }
+
+ state = insn->state;
+
+ } else
+ insn->state = state;
+
+ insn->visited = true;
+
+ if (!insn->ignore_alts) {
+ list_for_each_entry(alt, &insn->alts, list) {
+ ret = validate_branch(file, alt->insn, state);
+ if (ret)
+ return 1;
+ }
+ }
+
+ switch (insn->type) {
+
+ case INSN_RETURN:
+ if (func && has_modified_stack_frame(&state)) {
+ WARN_FUNC("return with modified stack frame",
+ sec, insn->offset);
+ return 1;
+ }
+
+ if (state.bp_scratch) {
+ WARN("%s uses BP as a scratch register",
+ insn->func->name);
+ return 1;
+ }
+
+ return 0;
+
+ case INSN_CALL:
+ if (is_fentry_call(insn))
+ break;
+
+ ret = dead_end_function(file, insn->call_dest);
+ if (ret == 1)
+ return 0;
+ if (ret == -1)
+ return 1;
+
+ /* fallthrough */
+ case INSN_CALL_DYNAMIC:
+ if (!no_fp && func && !has_valid_stack_frame(&state)) {
+ WARN_FUNC("call without frame pointer save/setup",
+ sec, insn->offset);
+ return 1;
+ }
+ break;
+
+ case INSN_JUMP_CONDITIONAL:
+ case INSN_JUMP_UNCONDITIONAL:
+ if (insn->jump_dest &&
+ (!func || !insn->jump_dest->func ||
+ insn->jump_dest->func->pfunc == func)) {
+ ret = validate_branch(file, insn->jump_dest,
+ state);
+ if (ret)
+ return 1;
+
+ } else if (func && has_modified_stack_frame(&state)) {
+ WARN_FUNC("sibling call from callable instruction with modified stack frame",
+ sec, insn->offset);
+ return 1;
+ }
+
+ if (insn->type == INSN_JUMP_UNCONDITIONAL)
+ return 0;
+
+ break;
+
+ case INSN_JUMP_DYNAMIC:
+ if (func && list_empty(&insn->alts) &&
+ has_modified_stack_frame(&state)) {
+ WARN_FUNC("sibling call from callable instruction with modified stack frame",
+ sec, insn->offset);
+ return 1;
+ }
+
+ return 0;
+
+ case INSN_CONTEXT_SWITCH:
+ if (func && (!next_insn || !next_insn->hint)) {
+ WARN_FUNC("unsupported instruction in callable function",
+ sec, insn->offset);
+ return 1;
+ }
+ return 0;
+
+ case INSN_STACK:
+ if (update_insn_state(insn, &state))
+ return 1;
+
+ break;
+
+ default:
+ break;
+ }
+
+ if (insn->dead_end)
+ return 0;
+
+ if (!next_insn) {
+ if (state.cfa.base == CFI_UNDEFINED)
+ return 0;
+ WARN("%s: unexpected end of section", sec->name);
+ return 1;
+ }
+
+ insn = next_insn;
+ }
+
+ return 0;
+}
+
+static int validate_unwind_hints(struct objtool_file *file)
+{
+ struct instruction *insn;
+ int ret, warnings = 0;
+ struct insn_state state;
+
+ if (!file->hints)
+ return 0;
+
+ clear_insn_state(&state);
+
+ for_each_insn(file, insn) {
+ if (insn->hint && !insn->visited) {
+ ret = validate_branch(file, insn, state);
+ warnings += ret;
+ }
+ }
+
+ return warnings;
+}
+
+static int validate_retpoline(struct objtool_file *file)
+{
+ struct instruction *insn;
+ int warnings = 0;
+
+ for_each_insn(file, insn) {
+ if (insn->type != INSN_JUMP_DYNAMIC &&
+ insn->type != INSN_CALL_DYNAMIC)
+ continue;
+
+ if (insn->retpoline_safe)
+ continue;
+
+ /*
+ * .init.text code is ran before userspace and thus doesn't
+ * strictly need retpolines, except for modules which are
+ * loaded late, they very much do need retpoline in their
+ * .init.text
+ */
+ if (!strcmp(insn->sec->name, ".init.text") && !module)
+ continue;
+
+ WARN_FUNC("indirect %s found in RETPOLINE build",
+ insn->sec, insn->offset,
+ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+
+ warnings++;
+ }
+
+ return warnings;
+}
+
+static bool is_kasan_insn(struct instruction *insn)
+{
+ return (insn->type == INSN_CALL &&
+ !strcmp(insn->call_dest->name, "__asan_handle_no_return"));
+}
+
+static bool is_ubsan_insn(struct instruction *insn)
+{
+ return (insn->type == INSN_CALL &&
+ !strcmp(insn->call_dest->name,
+ "__ubsan_handle_builtin_unreachable"));
+}
+
+static bool ignore_unreachable_insn(struct instruction *insn)
+{
+ int i;
+
+ if (insn->ignore || insn->type == INSN_NOP)
+ return true;
+
+ /*
+ * Ignore any unused exceptions. This can happen when a whitelisted
+ * function has an exception table entry.
+ *
+ * Also ignore alternative replacement instructions. This can happen
+ * when a whitelisted function uses one of the ALTERNATIVE macros.
+ */
+ if (!strcmp(insn->sec->name, ".fixup") ||
+ !strcmp(insn->sec->name, ".altinstr_replacement") ||
+ !strcmp(insn->sec->name, ".altinstr_aux"))
+ return true;
+
+ if (!insn->func)
+ return false;
+
+ /*
+ * CONFIG_UBSAN_TRAP inserts a UD2 when it sees
+ * __builtin_unreachable(). The BUG() macro has an unreachable() after
+ * the UD2, which causes GCC's undefined trap logic to emit another UD2
+ * (or occasionally a JMP to UD2).
+ */
+ if (list_prev_entry(insn, list)->dead_end &&
+ (insn->type == INSN_BUG ||
+ (insn->type == INSN_JUMP_UNCONDITIONAL &&
+ insn->jump_dest && insn->jump_dest->type == INSN_BUG)))
+ return true;
+
+ /*
+ * Check if this (or a subsequent) instruction is related to
+ * CONFIG_UBSAN or CONFIG_KASAN.
+ *
+ * End the search at 5 instructions to avoid going into the weeds.
+ */
+ for (i = 0; i < 5; i++) {
+
+ if (is_kasan_insn(insn) || is_ubsan_insn(insn))
+ return true;
+
+ if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+ if (insn->jump_dest &&
+ insn->jump_dest->func == insn->func) {
+ insn = insn->jump_dest;
+ continue;
+ }
+
+ break;
+ }
+
+ if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
+ break;
+
+ insn = list_next_entry(insn, list);
+ }
+
+ return false;
+}
+
+static int validate_functions(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+ struct instruction *insn;
+ struct insn_state state;
+ int ret, warnings = 0;
+
+ clear_insn_state(&state);
+
+ state.cfa = initial_func_cfi.cfa;
+ memcpy(&state.regs, &initial_func_cfi.regs,
+ CFI_NUM_REGS * sizeof(struct cfi_reg));
+ state.stack_size = initial_func_cfi.cfa.offset;
+
+ for_each_sec(file, sec) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC || func->pfunc != func)
+ continue;
+
+ insn = find_insn(file, sec, func->offset);
+ if (!insn || insn->ignore)
+ continue;
+
+ ret = validate_branch(file, insn, state);
+ warnings += ret;
+ }
+ }
+
+ return warnings;
+}
+
+static int validate_reachable_instructions(struct objtool_file *file)
+{
+ struct instruction *insn;
+
+ if (file->ignore_unreachables)
+ return 0;
+
+ for_each_insn(file, insn) {
+ if (insn->visited || ignore_unreachable_insn(insn))
+ continue;
+
+ WARN_FUNC("unreachable instruction", insn->sec, insn->offset);
+ return 1;
+ }
+
+ return 0;
+}
+
+static void cleanup(struct objtool_file *file)
+{
+ struct instruction *insn, *tmpinsn;
+ struct alternative *alt, *tmpalt;
+
+ list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
+ list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
+ list_del(&alt->list);
+ free(alt);
+ }
+ list_del(&insn->list);
+ hash_del(&insn->hash);
+ free(insn);
+ }
+ elf_close(file->elf);
+}
+
+static struct objtool_file file;
+
+int check(const char *_objname, bool orc)
+{
+ int ret, warnings = 0;
+
+ objname = _objname;
+
+ file.elf = elf_open(objname, orc ? O_RDWR : O_RDONLY);
+ if (!file.elf)
+ return 1;
+
+ INIT_LIST_HEAD(&file.insn_list);
+ hash_init(file.insn_hash);
+ file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard");
+ file.c_file = find_section_by_name(file.elf, ".comment");
+ file.ignore_unreachables = no_unreachable;
+ file.hints = false;
+
+ arch_initial_func_cfi_state(&initial_func_cfi);
+
+ ret = decode_sections(&file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+
+ if (list_empty(&file.insn_list))
+ goto out;
+
+ if (retpoline) {
+ ret = validate_retpoline(&file);
+ if (ret < 0)
+ return ret;
+ warnings += ret;
+ }
+
+ ret = validate_functions(&file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+
+ ret = validate_unwind_hints(&file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+
+ if (!warnings) {
+ ret = validate_reachable_instructions(&file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+ }
+
+ if (orc) {
+ ret = create_orc(&file);
+ if (ret < 0)
+ goto out;
+
+ ret = create_orc_sections(&file);
+ if (ret < 0)
+ goto out;
+
+ ret = elf_write(file.elf);
+ if (ret < 0)
+ goto out;
+ }
+
+out:
+ cleanup(&file);
+
+ /* ignore warnings for now until we get all the code cleaned up */
+ if (ret || warnings)
+ return 0;
+ return 0;
+}
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
new file mode 100644
index 000000000..e6e8a655b
--- /dev/null
+++ b/tools/objtool/check.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _CHECK_H
+#define _CHECK_H
+
+#include <stdbool.h>
+#include "elf.h"
+#include "cfi.h"
+#include "arch.h"
+#include "orc.h"
+#include <linux/hashtable.h>
+
+struct insn_state {
+ struct cfi_reg cfa;
+ struct cfi_reg regs[CFI_NUM_REGS];
+ int stack_size;
+ unsigned char type;
+ bool bp_scratch;
+ bool drap, end;
+ int drap_reg, drap_offset;
+ struct cfi_reg vals[CFI_NUM_REGS];
+};
+
+struct instruction {
+ struct list_head list;
+ struct hlist_node hash;
+ struct section *sec;
+ unsigned long offset;
+ unsigned int len;
+ unsigned char type;
+ unsigned long immediate;
+ bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
+ bool retpoline_safe;
+ struct symbol *call_dest;
+ struct instruction *jump_dest;
+ struct instruction *first_jump_src;
+ struct list_head alts;
+ struct symbol *func;
+ struct stack_op stack_op;
+ struct insn_state state;
+ struct orc_entry orc;
+};
+
+struct objtool_file {
+ struct elf *elf;
+ struct list_head insn_list;
+ DECLARE_HASHTABLE(insn_hash, 16);
+ struct section *whitelist;
+ bool ignore_unreachables, c_file, hints, rodata;
+};
+
+int check(const char *objname, bool orc);
+
+struct instruction *find_insn(struct objtool_file *file,
+ struct section *sec, unsigned long offset);
+
+#define for_each_insn(file, insn) \
+ list_for_each_entry(insn, &file->insn_list, list)
+
+#define sec_for_each_insn(file, sec, insn) \
+ for (insn = find_insn(file, sec, 0); \
+ insn && &insn->list != &file->insn_list && \
+ insn->sec == sec; \
+ insn = list_next_entry(insn, list))
+
+
+#endif /* _CHECK_H */
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
new file mode 100644
index 000000000..264d49fea
--- /dev/null
+++ b/tools/objtool/elf.c
@@ -0,0 +1,693 @@
+/*
+ * elf.c - ELF access library
+ *
+ * Adapted from kpatch (https://github.com/dynup/kpatch):
+ * Copyright (C) 2013-2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "elf.h"
+#include "warn.h"
+
+#define MAX_NAME_LEN 128
+
+struct section *find_section_by_name(struct elf *elf, const char *name)
+{
+ struct section *sec;
+
+ list_for_each_entry(sec, &elf->sections, list)
+ if (!strcmp(sec->name, name))
+ return sec;
+
+ return NULL;
+}
+
+static struct section *find_section_by_index(struct elf *elf,
+ unsigned int idx)
+{
+ struct section *sec;
+
+ list_for_each_entry(sec, &elf->sections, list)
+ if (sec->idx == idx)
+ return sec;
+
+ return NULL;
+}
+
+static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
+{
+ struct section *sec;
+ struct symbol *sym;
+
+ list_for_each_entry(sec, &elf->sections, list)
+ hash_for_each_possible(sec->symbol_hash, sym, hash, idx)
+ if (sym->idx == idx)
+ return sym;
+
+ return NULL;
+}
+
+struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
+{
+ struct symbol *sym;
+
+ list_for_each_entry(sym, &sec->symbol_list, list)
+ if (sym->type != STT_SECTION &&
+ sym->offset == offset)
+ return sym;
+
+ return NULL;
+}
+
+struct symbol *find_symbol_by_name(struct elf *elf, const char *name)
+{
+ struct section *sec;
+ struct symbol *sym;
+
+ list_for_each_entry(sec, &elf->sections, list)
+ list_for_each_entry(sym, &sec->symbol_list, list)
+ if (!strcmp(sym->name, name))
+ return sym;
+
+ return NULL;
+}
+
+struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
+{
+ struct symbol *sym;
+
+ list_for_each_entry(sym, &sec->symbol_list, list)
+ if (sym->type != STT_SECTION &&
+ offset >= sym->offset && offset < sym->offset + sym->len)
+ return sym;
+
+ return NULL;
+}
+
+struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
+ unsigned int len)
+{
+ struct rela *rela;
+ unsigned long o;
+
+ if (!sec->rela)
+ return NULL;
+
+ for (o = offset; o < offset + len; o++)
+ hash_for_each_possible(sec->rela->rela_hash, rela, hash, o)
+ if (rela->offset == o)
+ return rela;
+
+ return NULL;
+}
+
+struct rela *find_rela_by_dest(struct section *sec, unsigned long offset)
+{
+ return find_rela_by_dest_range(sec, offset, 1);
+}
+
+struct symbol *find_containing_func(struct section *sec, unsigned long offset)
+{
+ struct symbol *func;
+
+ list_for_each_entry(func, &sec->symbol_list, list)
+ if (func->type == STT_FUNC && offset >= func->offset &&
+ offset < func->offset + func->len)
+ return func;
+
+ return NULL;
+}
+
+static int read_sections(struct elf *elf)
+{
+ Elf_Scn *s = NULL;
+ struct section *sec;
+ size_t shstrndx, sections_nr;
+ int i;
+
+ if (elf_getshdrnum(elf->elf, &sections_nr)) {
+ WARN_ELF("elf_getshdrnum");
+ return -1;
+ }
+
+ if (elf_getshdrstrndx(elf->elf, &shstrndx)) {
+ WARN_ELF("elf_getshdrstrndx");
+ return -1;
+ }
+
+ for (i = 0; i < sections_nr; i++) {
+ sec = malloc(sizeof(*sec));
+ if (!sec) {
+ perror("malloc");
+ return -1;
+ }
+ memset(sec, 0, sizeof(*sec));
+
+ INIT_LIST_HEAD(&sec->symbol_list);
+ INIT_LIST_HEAD(&sec->rela_list);
+ hash_init(sec->rela_hash);
+ hash_init(sec->symbol_hash);
+
+ list_add_tail(&sec->list, &elf->sections);
+
+ s = elf_getscn(elf->elf, i);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ sec->idx = elf_ndxscn(s);
+
+ if (!gelf_getshdr(s, &sec->sh)) {
+ WARN_ELF("gelf_getshdr");
+ return -1;
+ }
+
+ sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name);
+ if (!sec->name) {
+ WARN_ELF("elf_strptr");
+ return -1;
+ }
+
+ if (sec->sh.sh_size != 0) {
+ sec->data = elf_getdata(s, NULL);
+ if (!sec->data) {
+ WARN_ELF("elf_getdata");
+ return -1;
+ }
+ if (sec->data->d_off != 0 ||
+ sec->data->d_size != sec->sh.sh_size) {
+ WARN("unexpected data attributes for %s",
+ sec->name);
+ return -1;
+ }
+ }
+ sec->len = sec->sh.sh_size;
+ }
+
+ /* sanity check, one more call to elf_nextscn() should return NULL */
+ if (elf_nextscn(elf->elf, s)) {
+ WARN("section entry mismatch");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int read_symbols(struct elf *elf)
+{
+ struct section *symtab, *sec;
+ struct symbol *sym, *pfunc;
+ struct list_head *entry, *tmp;
+ int symbols_nr, i;
+ char *coldstr;
+
+ symtab = find_section_by_name(elf, ".symtab");
+ if (!symtab) {
+ /*
+ * A missing symbol table is actually possible if it's an empty
+ * .o file. This can happen for thunk_64.o.
+ */
+ return 0;
+ }
+
+ symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
+
+ for (i = 0; i < symbols_nr; i++) {
+ sym = malloc(sizeof(*sym));
+ if (!sym) {
+ perror("malloc");
+ return -1;
+ }
+ memset(sym, 0, sizeof(*sym));
+
+ sym->idx = i;
+
+ if (!gelf_getsym(symtab->data, i, &sym->sym)) {
+ WARN_ELF("gelf_getsym");
+ goto err;
+ }
+
+ sym->name = elf_strptr(elf->elf, symtab->sh.sh_link,
+ sym->sym.st_name);
+ if (!sym->name) {
+ WARN_ELF("elf_strptr");
+ goto err;
+ }
+
+ sym->type = GELF_ST_TYPE(sym->sym.st_info);
+ sym->bind = GELF_ST_BIND(sym->sym.st_info);
+
+ if (sym->sym.st_shndx > SHN_UNDEF &&
+ sym->sym.st_shndx < SHN_LORESERVE) {
+ sym->sec = find_section_by_index(elf,
+ sym->sym.st_shndx);
+ if (!sym->sec) {
+ WARN("couldn't find section for symbol %s",
+ sym->name);
+ goto err;
+ }
+ if (sym->type == STT_SECTION) {
+ sym->name = sym->sec->name;
+ sym->sec->sym = sym;
+ }
+ } else
+ sym->sec = find_section_by_index(elf, 0);
+
+ sym->offset = sym->sym.st_value;
+ sym->len = sym->sym.st_size;
+
+ /* sorted insert into a per-section list */
+ entry = &sym->sec->symbol_list;
+ list_for_each_prev(tmp, &sym->sec->symbol_list) {
+ struct symbol *s;
+
+ s = list_entry(tmp, struct symbol, list);
+
+ if (sym->offset > s->offset) {
+ entry = tmp;
+ break;
+ }
+
+ if (sym->offset == s->offset && sym->len >= s->len) {
+ entry = tmp;
+ break;
+ }
+ }
+ list_add(&sym->list, entry);
+ hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx);
+ }
+
+ /* Create parent/child links for any cold subfunctions */
+ list_for_each_entry(sec, &elf->sections, list) {
+ list_for_each_entry(sym, &sec->symbol_list, list) {
+ char pname[MAX_NAME_LEN + 1];
+ size_t pnamelen;
+ if (sym->type != STT_FUNC)
+ continue;
+ sym->pfunc = sym->cfunc = sym;
+ coldstr = strstr(sym->name, ".cold");
+ if (!coldstr)
+ continue;
+
+ pnamelen = coldstr - sym->name;
+ if (pnamelen > MAX_NAME_LEN) {
+ WARN("%s(): parent function name exceeds maximum length of %d characters",
+ sym->name, MAX_NAME_LEN);
+ return -1;
+ }
+
+ strncpy(pname, sym->name, pnamelen);
+ pname[pnamelen] = '\0';
+ pfunc = find_symbol_by_name(elf, pname);
+
+ if (!pfunc) {
+ WARN("%s(): can't find parent function",
+ sym->name);
+ return -1;
+ }
+
+ sym->pfunc = pfunc;
+ pfunc->cfunc = sym;
+
+ /*
+ * Unfortunately, -fnoreorder-functions puts the child
+ * inside the parent. Remove the overlap so we can
+ * have sane assumptions.
+ *
+ * Note that pfunc->len now no longer matches
+ * pfunc->sym.st_size.
+ */
+ if (sym->sec == pfunc->sec &&
+ sym->offset >= pfunc->offset &&
+ sym->offset + sym->len == pfunc->offset + pfunc->len) {
+ pfunc->len -= sym->len;
+ }
+ }
+ }
+
+ return 0;
+
+err:
+ free(sym);
+ return -1;
+}
+
+static int read_relas(struct elf *elf)
+{
+ struct section *sec;
+ struct rela *rela;
+ int i;
+ unsigned int symndx;
+
+ list_for_each_entry(sec, &elf->sections, list) {
+ if (sec->sh.sh_type != SHT_RELA)
+ continue;
+
+ sec->base = find_section_by_name(elf, sec->name + 5);
+ if (!sec->base) {
+ WARN("can't find base section for rela section %s",
+ sec->name);
+ return -1;
+ }
+
+ sec->base->rela = sec;
+
+ for (i = 0; i < sec->sh.sh_size / sec->sh.sh_entsize; i++) {
+ rela = malloc(sizeof(*rela));
+ if (!rela) {
+ perror("malloc");
+ return -1;
+ }
+ memset(rela, 0, sizeof(*rela));
+
+ if (!gelf_getrela(sec->data, i, &rela->rela)) {
+ WARN_ELF("gelf_getrela");
+ return -1;
+ }
+
+ rela->type = GELF_R_TYPE(rela->rela.r_info);
+ rela->addend = rela->rela.r_addend;
+ rela->offset = rela->rela.r_offset;
+ symndx = GELF_R_SYM(rela->rela.r_info);
+ rela->sym = find_symbol_by_index(elf, symndx);
+ rela->rela_sec = sec;
+ if (!rela->sym) {
+ WARN("can't find rela entry symbol %d for %s",
+ symndx, sec->name);
+ return -1;
+ }
+
+ list_add_tail(&rela->list, &sec->rela_list);
+ hash_add(sec->rela_hash, &rela->hash, rela->offset);
+
+ }
+ }
+
+ return 0;
+}
+
+struct elf *elf_open(const char *name, int flags)
+{
+ struct elf *elf;
+ Elf_Cmd cmd;
+
+ elf_version(EV_CURRENT);
+
+ elf = malloc(sizeof(*elf));
+ if (!elf) {
+ perror("malloc");
+ return NULL;
+ }
+ memset(elf, 0, sizeof(*elf));
+
+ INIT_LIST_HEAD(&elf->sections);
+
+ elf->fd = open(name, flags);
+ if (elf->fd == -1) {
+ fprintf(stderr, "objtool: Can't open '%s': %s\n",
+ name, strerror(errno));
+ goto err;
+ }
+
+ if ((flags & O_ACCMODE) == O_RDONLY)
+ cmd = ELF_C_READ_MMAP;
+ else if ((flags & O_ACCMODE) == O_RDWR)
+ cmd = ELF_C_RDWR;
+ else /* O_WRONLY */
+ cmd = ELF_C_WRITE;
+
+ elf->elf = elf_begin(elf->fd, cmd, NULL);
+ if (!elf->elf) {
+ WARN_ELF("elf_begin");
+ goto err;
+ }
+
+ if (!gelf_getehdr(elf->elf, &elf->ehdr)) {
+ WARN_ELF("gelf_getehdr");
+ goto err;
+ }
+
+ if (read_sections(elf))
+ goto err;
+
+ if (read_symbols(elf))
+ goto err;
+
+ if (read_relas(elf))
+ goto err;
+
+ return elf;
+
+err:
+ elf_close(elf);
+ return NULL;
+}
+
+struct section *elf_create_section(struct elf *elf, const char *name,
+ size_t entsize, int nr)
+{
+ struct section *sec, *shstrtab;
+ size_t size = entsize * nr;
+ struct Elf_Scn *s;
+ Elf_Data *data;
+
+ sec = malloc(sizeof(*sec));
+ if (!sec) {
+ perror("malloc");
+ return NULL;
+ }
+ memset(sec, 0, sizeof(*sec));
+
+ INIT_LIST_HEAD(&sec->symbol_list);
+ INIT_LIST_HEAD(&sec->rela_list);
+ hash_init(sec->rela_hash);
+ hash_init(sec->symbol_hash);
+
+ list_add_tail(&sec->list, &elf->sections);
+
+ s = elf_newscn(elf->elf);
+ if (!s) {
+ WARN_ELF("elf_newscn");
+ return NULL;
+ }
+
+ sec->name = strdup(name);
+ if (!sec->name) {
+ perror("strdup");
+ return NULL;
+ }
+
+ sec->idx = elf_ndxscn(s);
+ sec->len = size;
+ sec->changed = true;
+
+ sec->data = elf_newdata(s);
+ if (!sec->data) {
+ WARN_ELF("elf_newdata");
+ return NULL;
+ }
+
+ sec->data->d_size = size;
+ sec->data->d_align = 1;
+
+ if (size) {
+ sec->data->d_buf = malloc(size);
+ if (!sec->data->d_buf) {
+ perror("malloc");
+ return NULL;
+ }
+ memset(sec->data->d_buf, 0, size);
+ }
+
+ if (!gelf_getshdr(s, &sec->sh)) {
+ WARN_ELF("gelf_getshdr");
+ return NULL;
+ }
+
+ sec->sh.sh_size = size;
+ sec->sh.sh_entsize = entsize;
+ sec->sh.sh_type = SHT_PROGBITS;
+ sec->sh.sh_addralign = 1;
+ sec->sh.sh_flags = SHF_ALLOC;
+
+
+ /* Add section name to .shstrtab (or .strtab for Clang) */
+ shstrtab = find_section_by_name(elf, ".shstrtab");
+ if (!shstrtab)
+ shstrtab = find_section_by_name(elf, ".strtab");
+ if (!shstrtab) {
+ WARN("can't find .shstrtab or .strtab section");
+ return NULL;
+ }
+
+ s = elf_getscn(elf->elf, shstrtab->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return NULL;
+ }
+
+ data = elf_newdata(s);
+ if (!data) {
+ WARN_ELF("elf_newdata");
+ return NULL;
+ }
+
+ data->d_buf = sec->name;
+ data->d_size = strlen(name) + 1;
+ data->d_align = 1;
+
+ sec->sh.sh_name = shstrtab->len;
+
+ shstrtab->len += strlen(name) + 1;
+ shstrtab->changed = true;
+
+ return sec;
+}
+
+struct section *elf_create_rela_section(struct elf *elf, struct section *base)
+{
+ char *relaname;
+ struct section *sec;
+
+ relaname = malloc(strlen(base->name) + strlen(".rela") + 1);
+ if (!relaname) {
+ perror("malloc");
+ return NULL;
+ }
+ strcpy(relaname, ".rela");
+ strcat(relaname, base->name);
+
+ sec = elf_create_section(elf, relaname, sizeof(GElf_Rela), 0);
+ free(relaname);
+ if (!sec)
+ return NULL;
+
+ base->rela = sec;
+ sec->base = base;
+
+ sec->sh.sh_type = SHT_RELA;
+ sec->sh.sh_addralign = 8;
+ sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
+ sec->sh.sh_info = base->idx;
+ sec->sh.sh_flags = SHF_INFO_LINK;
+
+ return sec;
+}
+
+int elf_rebuild_rela_section(struct section *sec)
+{
+ struct rela *rela;
+ int nr, idx = 0, size;
+ GElf_Rela *relas;
+
+ nr = 0;
+ list_for_each_entry(rela, &sec->rela_list, list)
+ nr++;
+
+ size = nr * sizeof(*relas);
+ relas = malloc(size);
+ if (!relas) {
+ perror("malloc");
+ return -1;
+ }
+
+ sec->data->d_buf = relas;
+ sec->data->d_size = size;
+
+ sec->sh.sh_size = size;
+
+ idx = 0;
+ list_for_each_entry(rela, &sec->rela_list, list) {
+ relas[idx].r_offset = rela->offset;
+ relas[idx].r_addend = rela->addend;
+ relas[idx].r_info = GELF_R_INFO(rela->sym->idx, rela->type);
+ idx++;
+ }
+
+ return 0;
+}
+
+int elf_write(struct elf *elf)
+{
+ struct section *sec;
+ Elf_Scn *s;
+
+ /* Update section headers for changed sections: */
+ list_for_each_entry(sec, &elf->sections, list) {
+ if (sec->changed) {
+ s = elf_getscn(elf->elf, sec->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+ if (!gelf_update_shdr(s, &sec->sh)) {
+ WARN_ELF("gelf_update_shdr");
+ return -1;
+ }
+ }
+ }
+
+ /* Make sure the new section header entries get updated properly. */
+ elf_flagelf(elf->elf, ELF_C_SET, ELF_F_DIRTY);
+
+ /* Write all changes to the file. */
+ if (elf_update(elf->elf, ELF_C_WRITE) < 0) {
+ WARN_ELF("elf_update");
+ return -1;
+ }
+
+ return 0;
+}
+
+void elf_close(struct elf *elf)
+{
+ struct section *sec, *tmpsec;
+ struct symbol *sym, *tmpsym;
+ struct rela *rela, *tmprela;
+
+ if (elf->elf)
+ elf_end(elf->elf);
+
+ if (elf->fd > 0)
+ close(elf->fd);
+
+ list_for_each_entry_safe(sec, tmpsec, &elf->sections, list) {
+ list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) {
+ list_del(&sym->list);
+ hash_del(&sym->hash);
+ free(sym);
+ }
+ list_for_each_entry_safe(rela, tmprela, &sec->rela_list, list) {
+ list_del(&rela->list);
+ hash_del(&rela->hash);
+ free(rela);
+ }
+ list_del(&sec->list);
+ free(sec);
+ }
+
+ free(elf);
+}
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
new file mode 100644
index 000000000..bc97ed86b
--- /dev/null
+++ b/tools/objtool/elf.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _OBJTOOL_ELF_H
+#define _OBJTOOL_ELF_H
+
+#include <stdio.h>
+#include <gelf.h>
+#include <linux/list.h>
+#include <linux/hashtable.h>
+
+#ifdef LIBELF_USE_DEPRECATED
+# define elf_getshdrnum elf_getshnum
+# define elf_getshdrstrndx elf_getshstrndx
+#endif
+
+/*
+ * Fallback for systems without this "read, mmaping if possible" cmd.
+ */
+#ifndef ELF_C_READ_MMAP
+#define ELF_C_READ_MMAP ELF_C_READ
+#endif
+
+struct section {
+ struct list_head list;
+ GElf_Shdr sh;
+ struct list_head symbol_list;
+ DECLARE_HASHTABLE(symbol_hash, 8);
+ struct list_head rela_list;
+ DECLARE_HASHTABLE(rela_hash, 16);
+ struct section *base, *rela;
+ struct symbol *sym;
+ Elf_Data *data;
+ char *name;
+ int idx;
+ unsigned int len;
+ bool changed, text, rodata;
+};
+
+struct symbol {
+ struct list_head list;
+ struct hlist_node hash;
+ GElf_Sym sym;
+ struct section *sec;
+ char *name;
+ unsigned int idx;
+ unsigned char bind, type;
+ unsigned long offset;
+ unsigned int len;
+ struct symbol *pfunc, *cfunc;
+};
+
+struct rela {
+ struct list_head list;
+ struct hlist_node hash;
+ GElf_Rela rela;
+ struct section *rela_sec;
+ struct symbol *sym;
+ unsigned int type;
+ unsigned long offset;
+ int addend;
+};
+
+struct elf {
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ int fd;
+ char *name;
+ struct list_head sections;
+ DECLARE_HASHTABLE(rela_hash, 16);
+};
+
+
+struct elf *elf_open(const char *name, int flags);
+struct section *find_section_by_name(struct elf *elf, const char *name);
+struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_by_name(struct elf *elf, const char *name);
+struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
+struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
+struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
+ unsigned int len);
+struct symbol *find_containing_func(struct section *sec, unsigned long offset);
+struct section *elf_create_section(struct elf *elf, const char *name, size_t
+ entsize, int nr);
+struct section *elf_create_rela_section(struct elf *elf, struct section *base);
+int elf_rebuild_rela_section(struct section *sec);
+int elf_write(struct elf *elf);
+void elf_close(struct elf *elf);
+
+#define for_each_sec(file, sec) \
+ list_for_each_entry(sec, &file->elf->sections, list)
+
+#endif /* _OBJTOOL_ELF_H */
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
new file mode 100644
index 000000000..07f329919
--- /dev/null
+++ b/tools/objtool/objtool.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * objtool:
+ *
+ * The 'check' subcmd analyzes every .o file and ensures the validity of its
+ * stack trace metadata. It enforces a set of rules on asm code and C inline
+ * assembly code so that stack traces can be reliable.
+ *
+ * For more information, see tools/objtool/Documentation/stack-validation.txt.
+ */
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdlib.h>
+#include <subcmd/exec-cmd.h>
+#include <subcmd/pager.h>
+#include <linux/kernel.h>
+
+#include "builtin.h"
+
+struct cmd_struct {
+ const char *name;
+ int (*fn)(int, const char **);
+ const char *help;
+};
+
+static const char objtool_usage_string[] =
+ "objtool COMMAND [ARGS]";
+
+static struct cmd_struct objtool_cmds[] = {
+ {"check", cmd_check, "Perform stack metadata validation on an object file" },
+ {"orc", cmd_orc, "Generate in-place ORC unwind tables for an object file" },
+};
+
+bool help;
+
+static void cmd_usage(void)
+{
+ unsigned int i, longest = 0;
+
+ printf("\n usage: %s\n\n", objtool_usage_string);
+
+ for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) {
+ if (longest < strlen(objtool_cmds[i].name))
+ longest = strlen(objtool_cmds[i].name);
+ }
+
+ puts(" Commands:");
+ for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) {
+ printf(" %-*s ", longest, objtool_cmds[i].name);
+ puts(objtool_cmds[i].help);
+ }
+
+ printf("\n");
+
+ exit(129);
+}
+
+static void handle_options(int *argc, const char ***argv)
+{
+ while (*argc > 0) {
+ const char *cmd = (*argv)[0];
+
+ if (cmd[0] != '-')
+ break;
+
+ if (!strcmp(cmd, "--help") || !strcmp(cmd, "-h")) {
+ help = true;
+ break;
+ } else {
+ fprintf(stderr, "Unknown option: %s\n", cmd);
+ cmd_usage();
+ }
+
+ (*argv)++;
+ (*argc)--;
+ }
+}
+
+static void handle_internal_command(int argc, const char **argv)
+{
+ const char *cmd = argv[0];
+ unsigned int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) {
+ struct cmd_struct *p = objtool_cmds+i;
+
+ if (strcmp(p->name, cmd))
+ continue;
+
+ ret = p->fn(argc, argv);
+
+ exit(ret);
+ }
+
+ cmd_usage();
+}
+
+int main(int argc, const char **argv)
+{
+ static const char *UNUSED = "OBJTOOL_NOT_IMPLEMENTED";
+
+ /* libsubcmd init */
+ exec_cmd_init("objtool", UNUSED, UNUSED, UNUSED);
+ pager_init(UNUSED);
+
+ argv++;
+ argc--;
+ handle_options(&argc, &argv);
+
+ if (!argc || help)
+ cmd_usage();
+
+ handle_internal_command(argc, argv);
+
+ return 0;
+}
diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h
new file mode 100644
index 000000000..b0e92a6d0
--- /dev/null
+++ b/tools/objtool/orc.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ORC_H
+#define _ORC_H
+
+#include <asm/orc_types.h>
+
+struct objtool_file;
+
+int create_orc(struct objtool_file *file);
+int create_orc_sections(struct objtool_file *file);
+
+int orc_dump(const char *objname);
+
+#endif /* _ORC_H */
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
new file mode 100644
index 000000000..1a3e77494
--- /dev/null
+++ b/tools/objtool/orc_dump.c
@@ -0,0 +1,224 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <unistd.h>
+#include "orc.h"
+#include "warn.h"
+
+static const char *reg_name(unsigned int reg)
+{
+ switch (reg) {
+ case ORC_REG_PREV_SP:
+ return "prevsp";
+ case ORC_REG_DX:
+ return "dx";
+ case ORC_REG_DI:
+ return "di";
+ case ORC_REG_BP:
+ return "bp";
+ case ORC_REG_SP:
+ return "sp";
+ case ORC_REG_R10:
+ return "r10";
+ case ORC_REG_R13:
+ return "r13";
+ case ORC_REG_BP_INDIRECT:
+ return "bp(ind)";
+ case ORC_REG_SP_INDIRECT:
+ return "sp(ind)";
+ default:
+ return "?";
+ }
+}
+
+static const char *orc_type_name(unsigned int type)
+{
+ switch (type) {
+ case ORC_TYPE_CALL:
+ return "call";
+ case ORC_TYPE_REGS:
+ return "regs";
+ case ORC_TYPE_REGS_IRET:
+ return "iret";
+ default:
+ return "?";
+ }
+}
+
+static void print_reg(unsigned int reg, int offset)
+{
+ if (reg == ORC_REG_BP_INDIRECT)
+ printf("(bp%+d)", offset);
+ else if (reg == ORC_REG_SP_INDIRECT)
+ printf("(sp%+d)", offset);
+ else if (reg == ORC_REG_UNDEFINED)
+ printf("(und)");
+ else
+ printf("%s%+d", reg_name(reg), offset);
+}
+
+int orc_dump(const char *_objname)
+{
+ int fd, nr_entries, i, *orc_ip = NULL, orc_size = 0;
+ struct orc_entry *orc = NULL;
+ char *name;
+ size_t nr_sections;
+ Elf64_Addr orc_ip_addr = 0;
+ size_t shstrtab_idx, strtab_idx = 0;
+ Elf *elf;
+ Elf_Scn *scn;
+ GElf_Shdr sh;
+ GElf_Rela rela;
+ GElf_Sym sym;
+ Elf_Data *data, *symtab = NULL, *rela_orc_ip = NULL;
+
+
+ objname = _objname;
+
+ elf_version(EV_CURRENT);
+
+ fd = open(objname, O_RDONLY);
+ if (fd == -1) {
+ perror("open");
+ return -1;
+ }
+
+ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (!elf) {
+ WARN_ELF("elf_begin");
+ return -1;
+ }
+
+ if (elf_getshdrnum(elf, &nr_sections)) {
+ WARN_ELF("elf_getshdrnum");
+ return -1;
+ }
+
+ if (elf_getshdrstrndx(elf, &shstrtab_idx)) {
+ WARN_ELF("elf_getshdrstrndx");
+ return -1;
+ }
+
+ for (i = 0; i < nr_sections; i++) {
+ scn = elf_getscn(elf, i);
+ if (!scn) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ if (!gelf_getshdr(scn, &sh)) {
+ WARN_ELF("gelf_getshdr");
+ return -1;
+ }
+
+ name = elf_strptr(elf, shstrtab_idx, sh.sh_name);
+ if (!name) {
+ WARN_ELF("elf_strptr");
+ return -1;
+ }
+
+ data = elf_getdata(scn, NULL);
+ if (!data) {
+ WARN_ELF("elf_getdata");
+ return -1;
+ }
+
+ if (!strcmp(name, ".symtab")) {
+ symtab = data;
+ } else if (!strcmp(name, ".strtab")) {
+ strtab_idx = i;
+ } else if (!strcmp(name, ".orc_unwind")) {
+ orc = data->d_buf;
+ orc_size = sh.sh_size;
+ } else if (!strcmp(name, ".orc_unwind_ip")) {
+ orc_ip = data->d_buf;
+ orc_ip_addr = sh.sh_addr;
+ } else if (!strcmp(name, ".rela.orc_unwind_ip")) {
+ rela_orc_ip = data;
+ }
+ }
+
+ if (!symtab || !strtab_idx || !orc || !orc_ip)
+ return 0;
+
+ if (orc_size % sizeof(*orc) != 0) {
+ WARN("bad .orc_unwind section size");
+ return -1;
+ }
+
+ nr_entries = orc_size / sizeof(*orc);
+ for (i = 0; i < nr_entries; i++) {
+ if (rela_orc_ip) {
+ if (!gelf_getrela(rela_orc_ip, i, &rela)) {
+ WARN_ELF("gelf_getrela");
+ return -1;
+ }
+
+ if (!gelf_getsym(symtab, GELF_R_SYM(rela.r_info), &sym)) {
+ WARN_ELF("gelf_getsym");
+ return -1;
+ }
+
+ if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) {
+ scn = elf_getscn(elf, sym.st_shndx);
+ if (!scn) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ if (!gelf_getshdr(scn, &sh)) {
+ WARN_ELF("gelf_getshdr");
+ return -1;
+ }
+
+ name = elf_strptr(elf, shstrtab_idx, sh.sh_name);
+ if (!name) {
+ WARN_ELF("elf_strptr");
+ return -1;
+ }
+ } else {
+ name = elf_strptr(elf, strtab_idx, sym.st_name);
+ if (!name) {
+ WARN_ELF("elf_strptr");
+ return -1;
+ }
+ }
+
+ printf("%s+%llx:", name, (unsigned long long)rela.r_addend);
+
+ } else {
+ printf("%llx:", (unsigned long long)(orc_ip_addr + (i * sizeof(int)) + orc_ip[i]));
+ }
+
+
+ printf(" sp:");
+
+ print_reg(orc[i].sp_reg, orc[i].sp_offset);
+
+ printf(" bp:");
+
+ print_reg(orc[i].bp_reg, orc[i].bp_offset);
+
+ printf(" type:%s end:%d\n",
+ orc_type_name(orc[i].type), orc[i].end);
+ }
+
+ elf_end(elf);
+ close(fd);
+
+ return 0;
+}
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
new file mode 100644
index 000000000..0b1ba8e7d
--- /dev/null
+++ b/tools/objtool/orc_gen.c
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "orc.h"
+#include "check.h"
+#include "warn.h"
+
+int create_orc(struct objtool_file *file)
+{
+ struct instruction *insn;
+
+ for_each_insn(file, insn) {
+ struct orc_entry *orc = &insn->orc;
+ struct cfi_reg *cfa = &insn->state.cfa;
+ struct cfi_reg *bp = &insn->state.regs[CFI_BP];
+
+ orc->end = insn->state.end;
+
+ if (cfa->base == CFI_UNDEFINED) {
+ orc->sp_reg = ORC_REG_UNDEFINED;
+ continue;
+ }
+
+ switch (cfa->base) {
+ case CFI_SP:
+ orc->sp_reg = ORC_REG_SP;
+ break;
+ case CFI_SP_INDIRECT:
+ orc->sp_reg = ORC_REG_SP_INDIRECT;
+ break;
+ case CFI_BP:
+ orc->sp_reg = ORC_REG_BP;
+ break;
+ case CFI_BP_INDIRECT:
+ orc->sp_reg = ORC_REG_BP_INDIRECT;
+ break;
+ case CFI_R10:
+ orc->sp_reg = ORC_REG_R10;
+ break;
+ case CFI_R13:
+ orc->sp_reg = ORC_REG_R13;
+ break;
+ case CFI_DI:
+ orc->sp_reg = ORC_REG_DI;
+ break;
+ case CFI_DX:
+ orc->sp_reg = ORC_REG_DX;
+ break;
+ default:
+ WARN_FUNC("unknown CFA base reg %d",
+ insn->sec, insn->offset, cfa->base);
+ return -1;
+ }
+
+ switch(bp->base) {
+ case CFI_UNDEFINED:
+ orc->bp_reg = ORC_REG_UNDEFINED;
+ break;
+ case CFI_CFA:
+ orc->bp_reg = ORC_REG_PREV_SP;
+ break;
+ case CFI_BP:
+ orc->bp_reg = ORC_REG_BP;
+ break;
+ default:
+ WARN_FUNC("unknown BP base reg %d",
+ insn->sec, insn->offset, bp->base);
+ return -1;
+ }
+
+ orc->sp_offset = cfa->offset;
+ orc->bp_offset = bp->offset;
+ orc->type = insn->state.type;
+ }
+
+ return 0;
+}
+
+static int create_orc_entry(struct section *u_sec, struct section *ip_relasec,
+ unsigned int idx, struct section *insn_sec,
+ unsigned long insn_off, struct orc_entry *o)
+{
+ struct orc_entry *orc;
+ struct rela *rela;
+
+ /* populate ORC data */
+ orc = (struct orc_entry *)u_sec->data->d_buf + idx;
+ memcpy(orc, o, sizeof(*orc));
+
+ /* populate rela for ip */
+ rela = malloc(sizeof(*rela));
+ if (!rela) {
+ perror("malloc");
+ return -1;
+ }
+ memset(rela, 0, sizeof(*rela));
+
+ if (insn_sec->sym) {
+ rela->sym = insn_sec->sym;
+ rela->addend = insn_off;
+ } else {
+ /*
+ * The Clang assembler doesn't produce section symbols, so we
+ * have to reference the function symbol instead:
+ */
+ rela->sym = find_symbol_containing(insn_sec, insn_off);
+ if (!rela->sym) {
+ /*
+ * Hack alert. This happens when we need to reference
+ * the NOP pad insn immediately after the function.
+ */
+ rela->sym = find_symbol_containing(insn_sec,
+ insn_off - 1);
+ }
+ if (!rela->sym) {
+ WARN("missing symbol for insn at offset 0x%lx\n",
+ insn_off);
+ return -1;
+ }
+
+ rela->addend = insn_off - rela->sym->offset;
+ }
+
+ rela->type = R_X86_64_PC32;
+ rela->offset = idx * sizeof(int);
+
+ list_add_tail(&rela->list, &ip_relasec->rela_list);
+ hash_add(ip_relasec->rela_hash, &rela->hash, rela->offset);
+
+ return 0;
+}
+
+int create_orc_sections(struct objtool_file *file)
+{
+ struct instruction *insn, *prev_insn;
+ struct section *sec, *u_sec, *ip_relasec;
+ unsigned int idx;
+
+ struct orc_entry empty = {
+ .sp_reg = ORC_REG_UNDEFINED,
+ .bp_reg = ORC_REG_UNDEFINED,
+ .type = ORC_TYPE_CALL,
+ };
+
+ sec = find_section_by_name(file->elf, ".orc_unwind");
+ if (sec) {
+ WARN("file already has .orc_unwind section, skipping");
+ return -1;
+ }
+
+ /* count the number of needed orcs */
+ idx = 0;
+ for_each_sec(file, sec) {
+ if (!sec->text)
+ continue;
+
+ prev_insn = NULL;
+ sec_for_each_insn(file, sec, insn) {
+ if (!prev_insn ||
+ memcmp(&insn->orc, &prev_insn->orc,
+ sizeof(struct orc_entry))) {
+ idx++;
+ }
+ prev_insn = insn;
+ }
+
+ /* section terminator */
+ if (prev_insn)
+ idx++;
+ }
+ if (!idx)
+ return -1;
+
+
+ /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
+ sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
+ if (!sec)
+ return -1;
+
+ ip_relasec = elf_create_rela_section(file->elf, sec);
+ if (!ip_relasec)
+ return -1;
+
+ /* create .orc_unwind section */
+ u_sec = elf_create_section(file->elf, ".orc_unwind",
+ sizeof(struct orc_entry), idx);
+
+ /* populate sections */
+ idx = 0;
+ for_each_sec(file, sec) {
+ if (!sec->text)
+ continue;
+
+ prev_insn = NULL;
+ sec_for_each_insn(file, sec, insn) {
+ if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc,
+ sizeof(struct orc_entry))) {
+
+ if (create_orc_entry(u_sec, ip_relasec, idx,
+ insn->sec, insn->offset,
+ &insn->orc))
+ return -1;
+
+ idx++;
+ }
+ prev_insn = insn;
+ }
+
+ /* section terminator */
+ if (prev_insn) {
+ if (create_orc_entry(u_sec, ip_relasec, idx,
+ prev_insn->sec,
+ prev_insn->offset + prev_insn->len,
+ &empty))
+ return -1;
+
+ idx++;
+ }
+ }
+
+ if (elf_rebuild_rela_section(ip_relasec))
+ return -1;
+
+ return 0;
+}
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
new file mode 100644
index 000000000..84f001d52
--- /dev/null
+++ b/tools/objtool/special.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This file reads all the special sections which have alternate instructions
+ * which can be patched in or redirected to at runtime.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "special.h"
+#include "warn.h"
+
+#define EX_ENTRY_SIZE 12
+#define EX_ORIG_OFFSET 0
+#define EX_NEW_OFFSET 4
+
+#define JUMP_ENTRY_SIZE 24
+#define JUMP_ORIG_OFFSET 0
+#define JUMP_NEW_OFFSET 8
+
+#define ALT_ENTRY_SIZE 13
+#define ALT_ORIG_OFFSET 0
+#define ALT_NEW_OFFSET 4
+#define ALT_FEATURE_OFFSET 8
+#define ALT_ORIG_LEN_OFFSET 10
+#define ALT_NEW_LEN_OFFSET 11
+
+#define X86_FEATURE_POPCNT (4*32+23)
+
+struct special_entry {
+ const char *sec;
+ bool group, jump_or_nop;
+ unsigned char size, orig, new;
+ unsigned char orig_len, new_len; /* group only */
+ unsigned char feature; /* ALTERNATIVE macro CPU feature */
+};
+
+struct special_entry entries[] = {
+ {
+ .sec = ".altinstructions",
+ .group = true,
+ .size = ALT_ENTRY_SIZE,
+ .orig = ALT_ORIG_OFFSET,
+ .orig_len = ALT_ORIG_LEN_OFFSET,
+ .new = ALT_NEW_OFFSET,
+ .new_len = ALT_NEW_LEN_OFFSET,
+ .feature = ALT_FEATURE_OFFSET,
+ },
+ {
+ .sec = "__jump_table",
+ .jump_or_nop = true,
+ .size = JUMP_ENTRY_SIZE,
+ .orig = JUMP_ORIG_OFFSET,
+ .new = JUMP_NEW_OFFSET,
+ },
+ {
+ .sec = "__ex_table",
+ .size = EX_ENTRY_SIZE,
+ .orig = EX_ORIG_OFFSET,
+ .new = EX_NEW_OFFSET,
+ },
+ {},
+};
+
+static int get_alt_entry(struct elf *elf, struct special_entry *entry,
+ struct section *sec, int idx,
+ struct special_alt *alt)
+{
+ struct rela *orig_rela, *new_rela;
+ unsigned long offset;
+
+ offset = idx * entry->size;
+
+ alt->group = entry->group;
+ alt->jump_or_nop = entry->jump_or_nop;
+
+ if (alt->group) {
+ alt->orig_len = *(unsigned char *)(sec->data->d_buf + offset +
+ entry->orig_len);
+ alt->new_len = *(unsigned char *)(sec->data->d_buf + offset +
+ entry->new_len);
+ }
+
+ if (entry->feature) {
+ unsigned short feature;
+
+ feature = *(unsigned short *)(sec->data->d_buf + offset +
+ entry->feature);
+
+ /*
+ * It has been requested that we don't validate the !POPCNT
+ * feature path which is a "very very small percentage of
+ * machines".
+ */
+ if (feature == X86_FEATURE_POPCNT)
+ alt->skip_orig = true;
+ }
+
+ orig_rela = find_rela_by_dest(sec, offset + entry->orig);
+ if (!orig_rela) {
+ WARN_FUNC("can't find orig rela", sec, offset + entry->orig);
+ return -1;
+ }
+ if (orig_rela->sym->type != STT_SECTION) {
+ WARN_FUNC("don't know how to handle non-section rela symbol %s",
+ sec, offset + entry->orig, orig_rela->sym->name);
+ return -1;
+ }
+
+ alt->orig_sec = orig_rela->sym->sec;
+ alt->orig_off = orig_rela->addend;
+
+ if (!entry->group || alt->new_len) {
+ new_rela = find_rela_by_dest(sec, offset + entry->new);
+ if (!new_rela) {
+ WARN_FUNC("can't find new rela",
+ sec, offset + entry->new);
+ return -1;
+ }
+
+ alt->new_sec = new_rela->sym->sec;
+ alt->new_off = (unsigned int)new_rela->addend;
+
+ /* _ASM_EXTABLE_EX hack */
+ if (alt->new_off >= 0x7ffffff0)
+ alt->new_off -= 0x7ffffff0;
+ }
+
+ return 0;
+}
+
+/*
+ * Read all the special sections and create a list of special_alt structs which
+ * describe all the alternate instructions which can be patched in or
+ * redirected to at runtime.
+ */
+int special_get_alts(struct elf *elf, struct list_head *alts)
+{
+ struct special_entry *entry;
+ struct section *sec;
+ unsigned int nr_entries;
+ struct special_alt *alt;
+ int idx, ret;
+
+ INIT_LIST_HEAD(alts);
+
+ for (entry = entries; entry->sec; entry++) {
+ sec = find_section_by_name(elf, entry->sec);
+ if (!sec)
+ continue;
+
+ if (sec->len % entry->size != 0) {
+ WARN("%s size not a multiple of %d",
+ sec->name, entry->size);
+ return -1;
+ }
+
+ nr_entries = sec->len / entry->size;
+
+ for (idx = 0; idx < nr_entries; idx++) {
+ alt = malloc(sizeof(*alt));
+ if (!alt) {
+ WARN("malloc failed");
+ return -1;
+ }
+ memset(alt, 0, sizeof(*alt));
+
+ ret = get_alt_entry(elf, entry, sec, idx, alt);
+ if (ret)
+ return ret;
+
+ list_add_tail(&alt->list, alts);
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/objtool/special.h b/tools/objtool/special.h
new file mode 100644
index 000000000..fad1d092f
--- /dev/null
+++ b/tools/objtool/special.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPECIAL_H
+#define _SPECIAL_H
+
+#include <stdbool.h>
+#include "elf.h"
+
+struct special_alt {
+ struct list_head list;
+
+ bool group;
+ bool skip_orig;
+ bool jump_or_nop;
+
+ struct section *orig_sec;
+ unsigned long orig_off;
+
+ struct section *new_sec;
+ unsigned long new_off;
+
+ unsigned int orig_len, new_len; /* group only */
+};
+
+int special_get_alts(struct elf *elf, struct list_head *alts);
+
+#endif /* _SPECIAL_H */
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
new file mode 100755
index 000000000..1470e74e9
--- /dev/null
+++ b/tools/objtool/sync-check.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+FILES='
+arch/x86/lib/insn.c
+arch/x86/lib/inat.c
+arch/x86/lib/x86-opcode-map.txt
+arch/x86/tools/gen-insn-attr-x86.awk
+arch/x86/include/asm/insn.h
+arch/x86/include/asm/inat.h
+arch/x86/include/asm/inat_types.h
+arch/x86/include/asm/orc_types.h
+'
+
+check()
+{
+ local file=$1
+
+ diff $file ../../$file > /dev/null ||
+ echo "Warning: synced file at 'tools/objtool/$file' differs from latest kernel version at '$file'"
+}
+
+if [ ! -d ../../kernel ] || [ ! -d ../../tools ] || [ ! -d ../objtool ]; then
+ exit 0
+fi
+
+for i in $FILES; do
+ check $i
+done
diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h
new file mode 100644
index 000000000..afd9f7a05
--- /dev/null
+++ b/tools/objtool/warn.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _WARN_H
+#define _WARN_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "elf.h"
+
+extern const char *objname;
+
+static inline char *offstr(struct section *sec, unsigned long offset)
+{
+ struct symbol *func;
+ char *name, *str;
+ unsigned long name_off;
+
+ func = find_containing_func(sec, offset);
+ if (func) {
+ name = func->name;
+ name_off = offset - func->offset;
+ } else {
+ name = sec->name;
+ name_off = offset;
+ }
+
+ str = malloc(strlen(name) + 20);
+
+ if (func)
+ sprintf(str, "%s()+0x%lx", name, name_off);
+ else
+ sprintf(str, "%s+0x%lx", name, name_off);
+
+ return str;
+}
+
+#define WARN(format, ...) \
+ fprintf(stderr, \
+ "%s: warning: objtool: " format "\n", \
+ objname, ##__VA_ARGS__)
+
+#define WARN_FUNC(format, sec, offset, ...) \
+({ \
+ char *_str = offstr(sec, offset); \
+ WARN("%s: " format, _str, ##__VA_ARGS__); \
+ free(_str); \
+})
+
+#define WARN_ELF(format, ...) \
+ WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1))
+
+#endif /* _WARN_H */
diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c
new file mode 100644
index 000000000..4c5be77c2
--- /dev/null
+++ b/tools/pci/pcitest.c
@@ -0,0 +1,233 @@
+/**
+ * Userspace PCI Endpoint Test Module
+ *
+ * Copyright (C) 2017 Texas Instruments
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <linux/pcitest.h>
+
+#define BILLION 1E9
+
+static char *result[] = { "NOT OKAY", "OKAY" };
+static char *irq[] = { "LEGACY", "MSI", "MSI-X" };
+
+struct pci_test {
+ char *device;
+ char barnum;
+ bool legacyirq;
+ unsigned int msinum;
+ unsigned int msixnum;
+ int irqtype;
+ bool set_irqtype;
+ bool get_irqtype;
+ bool read;
+ bool write;
+ bool copy;
+ unsigned long size;
+};
+
+static int run_test(struct pci_test *test)
+{
+ int ret = -EINVAL;
+ int fd;
+
+ fd = open(test->device, O_RDWR);
+ if (fd < 0) {
+ perror("can't open PCI Endpoint Test device");
+ return -ENODEV;
+ }
+
+ if (test->barnum >= 0 && test->barnum <= 5) {
+ ret = ioctl(fd, PCITEST_BAR, test->barnum);
+ fprintf(stdout, "BAR%d:\t\t", test->barnum);
+ if (ret < 0)
+ fprintf(stdout, "TEST FAILED\n");
+ else
+ fprintf(stdout, "%s\n", result[ret]);
+ }
+
+ if (test->set_irqtype) {
+ ret = ioctl(fd, PCITEST_SET_IRQTYPE, test->irqtype);
+ fprintf(stdout, "SET IRQ TYPE TO %s:\t\t", irq[test->irqtype]);
+ if (ret < 0)
+ fprintf(stdout, "FAILED\n");
+ else
+ fprintf(stdout, "%s\n", result[ret]);
+ }
+
+ if (test->get_irqtype) {
+ ret = ioctl(fd, PCITEST_GET_IRQTYPE);
+ fprintf(stdout, "GET IRQ TYPE:\t\t");
+ if (ret < 0)
+ fprintf(stdout, "FAILED\n");
+ else
+ fprintf(stdout, "%s\n", irq[ret]);
+ }
+
+ if (test->legacyirq) {
+ ret = ioctl(fd, PCITEST_LEGACY_IRQ, 0);
+ fprintf(stdout, "LEGACY IRQ:\t");
+ if (ret < 0)
+ fprintf(stdout, "TEST FAILED\n");
+ else
+ fprintf(stdout, "%s\n", result[ret]);
+ }
+
+ if (test->msinum > 0 && test->msinum <= 32) {
+ ret = ioctl(fd, PCITEST_MSI, test->msinum);
+ fprintf(stdout, "MSI%d:\t\t", test->msinum);
+ if (ret < 0)
+ fprintf(stdout, "TEST FAILED\n");
+ else
+ fprintf(stdout, "%s\n", result[ret]);
+ }
+
+ if (test->msixnum > 0 && test->msixnum <= 2048) {
+ ret = ioctl(fd, PCITEST_MSIX, test->msixnum);
+ fprintf(stdout, "MSI-X%d:\t\t", test->msixnum);
+ if (ret < 0)
+ fprintf(stdout, "TEST FAILED\n");
+ else
+ fprintf(stdout, "%s\n", result[ret]);
+ }
+
+ if (test->write) {
+ ret = ioctl(fd, PCITEST_WRITE, test->size);
+ fprintf(stdout, "WRITE (%7ld bytes):\t\t", test->size);
+ if (ret < 0)
+ fprintf(stdout, "TEST FAILED\n");
+ else
+ fprintf(stdout, "%s\n", result[ret]);
+ }
+
+ if (test->read) {
+ ret = ioctl(fd, PCITEST_READ, test->size);
+ fprintf(stdout, "READ (%7ld bytes):\t\t", test->size);
+ if (ret < 0)
+ fprintf(stdout, "TEST FAILED\n");
+ else
+ fprintf(stdout, "%s\n", result[ret]);
+ }
+
+ if (test->copy) {
+ ret = ioctl(fd, PCITEST_COPY, test->size);
+ fprintf(stdout, "COPY (%7ld bytes):\t\t", test->size);
+ if (ret < 0)
+ fprintf(stdout, "TEST FAILED\n");
+ else
+ fprintf(stdout, "%s\n", result[ret]);
+ }
+
+ fflush(stdout);
+}
+
+int main(int argc, char **argv)
+{
+ int c;
+ struct pci_test *test;
+
+ test = calloc(1, sizeof(*test));
+ if (!test) {
+ perror("Fail to allocate memory for pci_test\n");
+ return -ENOMEM;
+ }
+
+ /* since '0' is a valid BAR number, initialize it to -1 */
+ test->barnum = -1;
+
+ /* set default size as 100KB */
+ test->size = 0x19000;
+
+ /* set default endpoint device */
+ test->device = "/dev/pci-endpoint-test.0";
+
+ while ((c = getopt(argc, argv, "D:b:m:x:i:Ilrwcs:")) != EOF)
+ switch (c) {
+ case 'D':
+ test->device = optarg;
+ continue;
+ case 'b':
+ test->barnum = atoi(optarg);
+ if (test->barnum < 0 || test->barnum > 5)
+ goto usage;
+ continue;
+ case 'l':
+ test->legacyirq = true;
+ continue;
+ case 'm':
+ test->msinum = atoi(optarg);
+ if (test->msinum < 1 || test->msinum > 32)
+ goto usage;
+ continue;
+ case 'x':
+ test->msixnum = atoi(optarg);
+ if (test->msixnum < 1 || test->msixnum > 2048)
+ goto usage;
+ continue;
+ case 'i':
+ test->irqtype = atoi(optarg);
+ if (test->irqtype < 0 || test->irqtype > 2)
+ goto usage;
+ test->set_irqtype = true;
+ continue;
+ case 'I':
+ test->get_irqtype = true;
+ continue;
+ case 'r':
+ test->read = true;
+ continue;
+ case 'w':
+ test->write = true;
+ continue;
+ case 'c':
+ test->copy = true;
+ continue;
+ case 's':
+ test->size = strtoul(optarg, NULL, 0);
+ continue;
+ case '?':
+ case 'h':
+ default:
+usage:
+ fprintf(stderr,
+ "usage: %s [options]\n"
+ "Options:\n"
+ "\t-D <dev> PCI endpoint test device {default: /dev/pci-endpoint-test.0}\n"
+ "\t-b <bar num> BAR test (bar number between 0..5)\n"
+ "\t-m <msi num> MSI test (msi number between 1..32)\n"
+ "\t-x <msix num> \tMSI-X test (msix number between 1..2048)\n"
+ "\t-i <irq type> \tSet IRQ type (0 - Legacy, 1 - MSI, 2 - MSI-X)\n"
+ "\t-I Get current IRQ type configured\n"
+ "\t-l Legacy IRQ test\n"
+ "\t-r Read buffer test\n"
+ "\t-w Write buffer test\n"
+ "\t-c Copy buffer test\n"
+ "\t-s <size> Size of buffer {default: 100KB}\n",
+ argv[0]);
+ return -EINVAL;
+ }
+
+ run_test(test);
+ return 0;
+}
diff --git a/tools/pci/pcitest.sh b/tools/pci/pcitest.sh
new file mode 100644
index 000000000..75ed48ff2
--- /dev/null
+++ b/tools/pci/pcitest.sh
@@ -0,0 +1,72 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+echo "BAR tests"
+echo
+
+bar=0
+
+while [ $bar -lt 6 ]
+do
+ pcitest -b $bar
+ bar=`expr $bar + 1`
+done
+echo
+
+echo "Interrupt tests"
+echo
+
+pcitest -i 0
+pcitest -l
+
+pcitest -i 1
+msi=1
+
+while [ $msi -lt 33 ]
+do
+ pcitest -m $msi
+ msi=`expr $msi + 1`
+done
+echo
+
+pcitest -i 2
+msix=1
+
+while [ $msix -lt 2049 ]
+do
+ pcitest -x $msix
+ msix=`expr $msix + 1`
+done
+echo
+
+echo "Read Tests"
+echo
+
+pcitest -i 1
+
+pcitest -r -s 1
+pcitest -r -s 1024
+pcitest -r -s 1025
+pcitest -r -s 1024000
+pcitest -r -s 1024001
+echo
+
+echo "Write Tests"
+echo
+
+pcitest -w -s 1
+pcitest -w -s 1024
+pcitest -w -s 1025
+pcitest -w -s 1024000
+pcitest -w -s 1024001
+echo
+
+echo "Copy Tests"
+echo
+
+pcitest -c -s 1
+pcitest -c -s 1024
+pcitest -c -s 1025
+pcitest -c -s 1024000
+pcitest -c -s 1024001
+echo
diff --git a/tools/pcmcia/.gitignore b/tools/pcmcia/.gitignore
new file mode 100644
index 000000000..53d081336
--- /dev/null
+++ b/tools/pcmcia/.gitignore
@@ -0,0 +1 @@
+crc32hash
diff --git a/tools/pcmcia/Makefile b/tools/pcmcia/Makefile
new file mode 100644
index 000000000..eae24df1d
--- /dev/null
+++ b/tools/pcmcia/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+CC := $(CROSS_COMPILE)gcc
+CFLAGS := -I../../usr/include
+
+PROGS := crc32hash
+
+all: $(PROGS)
+
+clean:
+ rm -fr $(PROGS)
diff --git a/tools/pcmcia/crc32hash.c b/tools/pcmcia/crc32hash.c
new file mode 100644
index 000000000..44f8beea7
--- /dev/null
+++ b/tools/pcmcia/crc32hash.c
@@ -0,0 +1,32 @@
+/* crc32hash.c - derived from linux/lib/crc32.c, GNU GPL v2 */
+/* Usage example:
+$ ./crc32hash "Dual Speed"
+*/
+
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <stdlib.h>
+
+static unsigned int crc32(unsigned char const *p, unsigned int len)
+{
+ int i;
+ unsigned int crc = 0;
+ while (len--) {
+ crc ^= *p++;
+ for (i = 0; i < 8; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
+ }
+ return crc;
+}
+
+int main(int argc, char **argv) {
+ unsigned int result;
+ if (argc != 2) {
+ printf("no string passed as argument\n");
+ return -1;
+ }
+ result = crc32((unsigned char const *)argv[1], strlen(argv[1]));
+ printf("0x%x\n", result);
+ return 0;
+}
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
new file mode 100644
index 000000000..3e5135dde
--- /dev/null
+++ b/tools/perf/.gitignore
@@ -0,0 +1,36 @@
+PERF-CFLAGS
+PERF-GUI-VARS
+PERF-VERSION-FILE
+FEATURE-DUMP
+perf
+perf-read-vdso32
+perf-read-vdsox32
+perf-help
+perf-record
+perf-report
+perf-stat
+perf-top
+perf*.1
+perf*.xml
+perf*.html
+common-cmds.h
+perf.data
+perf.data.old
+output.svg
+perf-archive
+perf-with-kcore
+tags
+TAGS
+cscope*
+config.mak
+config.mak.autogen
+*-bison.*
+*-flex.*
+*.pyc
+*.pyo
+.config-detected
+util/intel-pt-decoder/inat-tables.c
+arch/*/include/generated/
+trace/beauty/generated/
+pmu-events/pmu-events.c
+pmu-events/jevents
diff --git a/tools/perf/Build b/tools/perf/Build
new file mode 100644
index 000000000..e5232d567
--- /dev/null
+++ b/tools/perf/Build
@@ -0,0 +1,55 @@
+perf-y += builtin-bench.o
+perf-y += builtin-annotate.o
+perf-y += builtin-config.o
+perf-y += builtin-diff.o
+perf-y += builtin-evlist.o
+perf-y += builtin-ftrace.o
+perf-y += builtin-help.o
+perf-y += builtin-sched.o
+perf-y += builtin-buildid-list.o
+perf-y += builtin-buildid-cache.o
+perf-y += builtin-kallsyms.o
+perf-y += builtin-list.o
+perf-y += builtin-record.o
+perf-y += builtin-report.o
+perf-y += builtin-stat.o
+perf-y += builtin-timechart.o
+perf-y += builtin-top.o
+perf-y += builtin-script.o
+perf-y += builtin-kmem.o
+perf-y += builtin-lock.o
+perf-y += builtin-kvm.o
+perf-y += builtin-inject.o
+perf-y += builtin-mem.o
+perf-y += builtin-data.o
+perf-y += builtin-version.o
+perf-y += builtin-c2c.o
+
+perf-$(CONFIG_TRACE) += builtin-trace.o
+perf-$(CONFIG_LIBELF) += builtin-probe.o
+
+perf-y += bench/
+perf-y += tests/
+
+perf-y += perf.o
+
+paths += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))"
+paths += -DPERF_INFO_PATH="BUILD_STR($(infodir_SQ))"
+paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))"
+
+CFLAGS_builtin-help.o += $(paths)
+CFLAGS_builtin-timechart.o += $(paths)
+CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" \
+ -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" \
+ -DPREFIX="BUILD_STR($(prefix_SQ))"
+CFLAGS_builtin-trace.o += -DSTRACE_GROUPS_DIR="BUILD_STR($(STRACE_GROUPS_DIR_SQ))"
+CFLAGS_builtin-report.o += -DTIPDIR="BUILD_STR($(tipdir_SQ))"
+CFLAGS_builtin-report.o += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)"
+
+libperf-y += util/
+libperf-y += arch/
+libperf-y += ui/
+libperf-y += scripts/
+libperf-$(CONFIG_TRACE) += trace/beauty/
+
+gtk-y += ui/gtk/
diff --git a/tools/perf/CREDITS b/tools/perf/CREDITS
new file mode 100644
index 000000000..c2ddcb3ac
--- /dev/null
+++ b/tools/perf/CREDITS
@@ -0,0 +1,30 @@
+Most of the infrastructure that 'perf' uses here has been reused
+from the Git project, as of version:
+
+ 66996ec: Sync with 1.6.2.4
+
+Here is an (incomplete!) list of main contributors to those files
+in util/* and elsewhere:
+
+ Alex Riesen
+ Christian Couder
+ Dmitry Potapov
+ Jeff King
+ Johannes Schindelin
+ Johannes Sixt
+ Junio C Hamano
+ Linus Torvalds
+ Matthias Kestenholz
+ Michal Ostrowski
+ Miklos Vajna
+ Petr Baudis
+ Pierre Habouzit
+ René Scharfe
+ Samuel Tardieu
+ Shawn O. Pearce
+ Steffen Prohaska
+ Steve Haslam
+
+Thanks guys!
+
+The full history of the files can be found in the upstream Git commits.
diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt
new file mode 100644
index 000000000..f6fc6507b
--- /dev/null
+++ b/tools/perf/Documentation/Build.txt
@@ -0,0 +1,49 @@
+
+1) perf build
+=============
+The perf build process consists of several separated building blocks,
+which are linked together to form the perf binary:
+ - libperf library (static)
+ - perf builtin commands
+ - traceevent library (static)
+ - GTK ui library
+
+Several makefiles govern the perf build:
+
+ - Makefile
+ top level Makefile working as a wrapper that calls the main
+ Makefile.perf with a -j option to do parallel builds.
+
+ - Makefile.perf
+ main makefile that triggers build of all perf objects including
+ installation and documentation processing.
+
+ - tools/build/Makefile.build
+ main makefile of the build framework
+
+ - tools/build/Build.include
+ build framework generic definitions
+
+ - Build makefiles
+ makefiles that defines build objects
+
+Please refer to tools/build/Documentation/Build.txt for more
+information about build framework.
+
+
+2) perf build
+=============
+The Makefile.perf triggers the build framework for build objects:
+ perf, libperf, gtk
+
+resulting in following objects:
+ $ ls *-in.o
+ gtk-in.o libperf-in.o perf-in.o
+
+Those objects are then used in final linking:
+ libperf-gtk.so <- gtk-in.o libperf-in.o
+ perf <- perf-in.o libperf-in.o
+
+
+NOTE this description is omitting other libraries involved, only
+ focusing on build framework outcomes
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
new file mode 100644
index 000000000..ac841bc5c
--- /dev/null
+++ b/tools/perf/Documentation/Makefile
@@ -0,0 +1,362 @@
+include ../../scripts/Makefile.include
+include ../../scripts/utilities.mak
+
+MAN1_TXT= \
+ $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
+ $(wildcard perf-*.txt)) \
+ perf.txt
+MAN5_TXT=
+MAN7_TXT=
+
+MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)
+_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
+_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
+
+MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
+MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
+
+ARTICLES =
+# with their own formatting rules.
+SP_ARTICLES =
+API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt)))
+SP_ARTICLES += $(API_DOCS)
+SP_ARTICLES += technical/api-index
+
+_DOC_HTML = $(_MAN_HTML)
+_DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
+DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML))
+
+_DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
+_DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
+_DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
+
+DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1))
+DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5))
+DOC_MAN7=$(addprefix $(OUTPUT),$(_DOC_MAN7))
+
+# Make the path relative to DESTDIR, not prefix
+ifndef DESTDIR
+prefix?=$(HOME)
+endif
+bindir?=$(prefix)/bin
+htmldir?=$(prefix)/share/doc/perf-doc
+pdfdir?=$(prefix)/share/doc/perf-doc
+mandir?=$(prefix)/share/man
+man1dir=$(mandir)/man1
+man5dir=$(mandir)/man5
+man7dir=$(mandir)/man7
+
+ASCIIDOC=asciidoc
+ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
+ASCIIDOC_HTML = xhtml11
+MANPAGE_XSL = manpage-normal.xsl
+XMLTO_EXTRA =
+INSTALL?=install
+RM ?= rm -f
+DOC_REF = origin/man
+HTML_REF = origin/html
+
+ifdef USE_ASCIIDOCTOR
+ASCIIDOC = asciidoctor
+ASCIIDOC_EXTRA = -a compat-mode
+ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
+ASCIIDOC_EXTRA += -a mansource="perf" -a manmanual="perf Manual"
+ASCIIDOC_HTML = xhtml5
+endif
+
+infodir?=$(prefix)/share/info
+MAKEINFO=makeinfo
+INSTALL_INFO=install-info
+DOCBOOK2X_TEXI=docbook2x-texi
+DBLATEX=dblatex
+XMLTO=xmlto
+ifndef PERL_PATH
+ PERL_PATH = /usr/bin/perl
+endif
+
+-include ../config.mak.autogen
+-include ../config.mak
+
+_tmp_tool_path := $(call get-executable,$(ASCIIDOC))
+ifeq ($(_tmp_tool_path),)
+ missing_tools = $(ASCIIDOC)
+endif
+
+ifndef USE_ASCIIDOCTOR
+_tmp_tool_path := $(call get-executable,$(XMLTO))
+ifeq ($(_tmp_tool_path),)
+ missing_tools += $(XMLTO)
+endif
+endif
+
+#
+# For asciidoc ...
+# -7.1.2, no extra settings are needed.
+# 8.0-, set ASCIIDOC8.
+#
+
+#
+# For docbook-xsl ...
+# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
+# 1.69.0, no extra settings are needed?
+# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP?
+# 1.71.1, no extra settings are needed?
+# 1.72.0, set DOCBOOK_XSL_172.
+# 1.73.0-, set ASCIIDOC_NO_ROFF
+#
+
+#
+# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
+# of 'the ".ft C" problem' in your generated manpages, and you
+# instead ended up with weird characters around callouts, try
+# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
+#
+
+ifdef ASCIIDOC8
+ASCIIDOC_EXTRA += -a asciidoc7compatible
+endif
+ifdef DOCBOOK_XSL_172
+ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
+MANPAGE_XSL = manpage-1.72.xsl
+else
+ ifdef ASCIIDOC_NO_ROFF
+ # docbook-xsl after 1.72 needs the regular XSL, but will not
+ # pass-thru raw roff codes from asciidoc.conf, so turn them off.
+ ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
+ endif
+endif
+ifdef MAN_BOLD_LITERAL
+XMLTO_EXTRA += -m manpage-bold-literal.xsl
+endif
+ifdef DOCBOOK_SUPPRESS_SP
+XMLTO_EXTRA += -m manpage-suppress-sp.xsl
+endif
+
+SHELL_PATH ?= $(SHELL)
+# Shell quote;
+SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
+
+#
+# Please note that there is a minor bug in asciidoc.
+# The version after 6.0.3 _will_ include the patch found here:
+# http://marc.theaimsgroup.com/?l=perf&m=111558757202243&w=2
+#
+# Until that version is released you may have to apply the patch
+# yourself - yes, all 6 characters of it!
+#
+
+QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir
+QUIET_SUBDIR1 =
+
+ifneq ($(findstring $(MAKEFLAGS),w),w)
+PRINT_DIR = --no-print-directory
+else # "make -w"
+NO_SUBDIR = :
+endif
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifneq ($(V),1)
+ QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@;
+ QUIET_XMLTO = @echo ' XMLTO '$@;
+ QUIET_DB2TEXI = @echo ' DB2TEXI '$@;
+ QUIET_MAKEINFO = @echo ' MAKEINFO '$@;
+ QUIET_DBLATEX = @echo ' DBLATEX '$@;
+ QUIET_XSLTPROC = @echo ' XSLTPROC '$@;
+ QUIET_GEN = @echo ' GEN '$@;
+ QUIET_STDERR = 2> /dev/null
+ QUIET_SUBDIR0 = +@subdir=
+ QUIET_SUBDIR1 = ;$(NO_SUBDIR) \
+ echo ' SUBDIR ' $$subdir; \
+ $(MAKE) $(PRINT_DIR) -C $$subdir
+ export V
+endif
+endif
+
+all: html man
+
+html: $(DOC_HTML)
+
+$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7): asciidoc.conf
+
+man: man1 man5 man7
+man1: $(DOC_MAN1)
+man5: $(DOC_MAN5)
+man7: $(DOC_MAN7)
+
+info: $(OUTPUT)perf.info $(OUTPUT)perfman.info
+
+pdf: $(OUTPUT)user-manual.pdf
+
+install: install-man
+
+check-man-tools:
+ifdef missing_tools
+ $(error "You need to install $(missing_tools) for man pages")
+endif
+
+do-install-man: man
+ $(call QUIET_INSTALL, Documentation-man) \
+ $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir); \
+# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir); \
+# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \
+ $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir); \
+# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir); \
+# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+
+install-man: check-man-tools man do-install-man
+
+ifdef missing_tools
+ DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
+else
+ DO_INSTALL_MAN = do-install-man
+endif
+
+try-install-man: $(DO_INSTALL_MAN)
+
+install-info: info
+ $(call QUIET_INSTALL, Documentation-info) \
+ $(INSTALL) -d -m 755 $(DESTDIR)$(infodir); \
+ $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir); \
+ if test -r $(DESTDIR)$(infodir)/dir; then \
+ $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
+ $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
+ else \
+ echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \
+ fi
+
+install-pdf: pdf
+ $(call QUIET_INSTALL, Documentation-pdf) \
+ $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir); \
+ $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
+
+#install-html: html
+# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
+
+
+#
+# Determine "include::" file references in asciidoc files.
+#
+$(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl
+ $(QUIET_GEN)$(RM) $@+ $@ && \
+ $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
+ mv $@+ $@
+
+-include $(OUPTUT)doc.dep
+
+_cmds_txt = cmds-ancillaryinterrogators.txt \
+ cmds-ancillarymanipulators.txt \
+ cmds-mainporcelain.txt \
+ cmds-plumbinginterrogators.txt \
+ cmds-plumbingmanipulators.txt \
+ cmds-synchingrepositories.txt \
+ cmds-synchelpers.txt \
+ cmds-purehelpers.txt \
+ cmds-foreignscminterface.txt
+cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt))
+
+$(cmds_txt): $(OUTPUT)cmd-list.made
+
+$(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
+ $(QUIET_GEN)$(RM) $@ && \
+ $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
+ date >$@
+
+CLEAN_FILES = \
+ $(MAN_XML) $(addsuffix +,$(MAN_XML)) \
+ $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \
+ $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) \
+ $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ \
+ $(OUTPUT)perf.info $(OUTPUT)perfman.info \
+ $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep \
+ $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt \
+ $(cmds_txt) $(OUTPUT)*.made
+clean:
+ $(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
+
+$(MAN_HTML): $(OUTPUT)%.html : %.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \
+ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+ mv $@+ $@
+
+ifdef USE_ASCIIDOCTOR
+$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : %.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ $(ASCIIDOC) -b manpage -d manpage \
+ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+ mv $@+ $@
+endif
+
+$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
+ $(QUIET_XMLTO)$(RM) $@ && \
+ $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+
+$(OUTPUT)%.xml : %.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ $(ASCIIDOC) -b docbook -d manpage \
+ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+ mv $@+ $@
+
+XSLT = docbook.xsl
+XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
+
+$(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml
+ $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
+
+$(OUTPUT)perf.info: $(OUTPUT)user-manual.texi
+ $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi
+
+$(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml
+ $(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+ $(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
+ $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
+ rm $@++ && \
+ mv $@+ $@
+
+$(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml
+ $(QUIET_DBLATEX)$(RM) $@+ $@ && \
+ $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
+ mv $@+ $@
+
+$(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl
+ $(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+ ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
+ --to-stdout $(xml) &&) true) > $@++ && \
+ $(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \
+ rm $@++ && \
+ mv $@+ $@
+
+$(OUTPUT)perfman.info: $(OUTPUT)perfman.texi
+ $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
+
+$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
+ $(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+ $(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
+ mv $@+ $@
+
+howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
+ $(QUIET_GEN)$(RM) $@+ $@ && \
+ '$(SHELL_PATH_SQ)' ./howto-index.sh $(wildcard howto/*.txt) >$@+ && \
+ mv $@+ $@
+
+$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
+ $(QUIET_ASCIIDOC)$(ASCIIDOC) -b $(ASCIIDOC_HTML) $*.txt
+
+WEBDOC_DEST = /pub/software/tools/perf/docs
+
+$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b $(ASCIIDOC_HTML) - >$@+ && \
+ mv $@+ $@
+
+# UNIMPLEMENTED
+#install-webdoc : html
+# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
+
+# quick-install: quick-install-man
+
+# quick-install-man:
+# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
+
+#quick-install-html:
+# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
diff --git a/tools/perf/Documentation/android.txt b/tools/perf/Documentation/android.txt
new file mode 100644
index 000000000..24a59998f
--- /dev/null
+++ b/tools/perf/Documentation/android.txt
@@ -0,0 +1,78 @@
+How to compile perf for Android
+=========================================
+
+I. Set the Android NDK environment
+------------------------------------------------
+
+(a). Use the Android NDK
+------------------------------------------------
+1. You need to download and install the Android Native Development Kit (NDK).
+Set the NDK variable to point to the path where you installed the NDK:
+ export NDK=/path/to/android-ndk
+
+2. Set cross-compiling environment variables for NDK toolchain and sysroot.
+For arm:
+ export NDK_TOOLCHAIN=${NDK}/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-
+ export NDK_SYSROOT=${NDK}/platforms/android-24/arch-arm
+For x86:
+ export NDK_TOOLCHAIN=${NDK}/toolchains/x86-4.9/prebuilt/linux-x86_64/bin/i686-linux-android-
+ export NDK_SYSROOT=${NDK}/platforms/android-24/arch-x86
+
+This method is only tested for Android NDK versions Revision 11b and later.
+perf uses some bionic enhancements that are not included in prior NDK versions.
+You can use method (b) described below instead.
+
+(b). Use the Android source tree
+-----------------------------------------------
+1. Download the master branch of the Android source tree.
+Set the environment for the target you want using:
+ source build/envsetup.sh
+ lunch
+
+2. Build your own NDK sysroot to contain latest bionic changes and set the
+NDK sysroot environment variable.
+ cd ${ANDROID_BUILD_TOP}/ndk
+For arm:
+ ./build/tools/build-ndk-sysroot.sh --abi=arm
+ export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-arm
+For x86:
+ ./build/tools/build-ndk-sysroot.sh --abi=x86
+ export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-x86
+
+3. Set the NDK toolchain environment variable.
+For arm:
+ export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/arm-linux-androideabi-
+For x86:
+ export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/i686-linux-android-
+
+II. Compile perf for Android
+------------------------------------------------
+You need to run make with the NDK toolchain and sysroot defined above:
+For arm:
+ make WERROR=0 ARCH=arm CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}"
+For x86:
+ make WERROR=0 ARCH=x86 CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}"
+
+III. Install perf
+-----------------------------------------------
+You need to connect to your Android device/emulator using adb.
+Install perf using:
+ adb push perf /data/perf
+
+If you also want to use perf-archive you need busybox tools for Android.
+For installing perf-archive, you first need to replace #!/bin/bash with #!/system/bin/sh:
+ sed 's/#!\/bin\/bash/#!\/system\/bin\/sh/g' perf-archive >> /tmp/perf-archive
+ chmod +x /tmp/perf-archive
+ adb push /tmp/perf-archive /data/perf-archive
+
+IV. Environment settings for running perf
+------------------------------------------------
+Some perf features need environment variables to run properly.
+You need to set these before running perf on the target:
+ adb shell
+ # PERF_PAGER=cat
+
+IV. Run perf
+------------------------------------------------
+Run perf on your device/emulator to which you previously connected using adb:
+ # ./data/perf
diff --git a/tools/perf/Documentation/asciidoc.conf b/tools/perf/Documentation/asciidoc.conf
new file mode 100644
index 000000000..356b23a40
--- /dev/null
+++ b/tools/perf/Documentation/asciidoc.conf
@@ -0,0 +1,91 @@
+## linkperf: macro
+#
+# Usage: linkperf:command[manpage-section]
+#
+# Note, {0} is the manpage section, while {target} is the command.
+#
+# Show PERF link as: <command>(<section>); if section is defined, else just show
+# the command.
+
+[macros]
+(?su)[\\]?(?P<name>linkperf):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
+
+[attributes]
+asterisk=&#42;
+plus=&#43;
+caret=&#94;
+startsb=&#91;
+endsb=&#93;
+tilde=&#126;
+
+ifdef::backend-docbook[]
+[linkperf-inlinemacro]
+{0%{target}}
+{0#<citerefentry>}
+{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
+{0#</citerefentry>}
+endif::backend-docbook[]
+
+ifdef::backend-docbook[]
+ifndef::perf-asciidoc-no-roff[]
+# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
+# v1.72 breaks with this because it replaces dots not in roff requests.
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+ifdef::doctype-manpage[]
+&#10;.ft C&#10;
+endif::doctype-manpage[]
+|
+ifdef::doctype-manpage[]
+&#10;.ft&#10;
+endif::doctype-manpage[]
+</literallayout>
+{title#}</example>
+endif::perf-asciidoc-no-roff[]
+
+ifdef::perf-asciidoc-no-roff[]
+ifdef::doctype-manpage[]
+# The following two small workarounds insert a simple paragraph after screen
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+|
+</literallayout><simpara></simpara>
+{title#}</example>
+
+[verseblock]
+<formalpara{id? id="{id}"}><title>{title}</title><para>
+{title%}<literallayout{id? id="{id}"}>
+{title#}<literallayout>
+|
+</literallayout>
+{title#}</para></formalpara>
+{title%}<simpara></simpara>
+endif::doctype-manpage[]
+endif::perf-asciidoc-no-roff[]
+endif::backend-docbook[]
+
+ifdef::doctype-manpage[]
+ifdef::backend-docbook[]
+[header]
+template::[header-declarations]
+<refentry>
+<refmeta>
+<refentrytitle>{mantitle}</refentrytitle>
+<manvolnum>{manvolnum}</manvolnum>
+<refmiscinfo class="source">perf</refmiscinfo>
+<refmiscinfo class="version">{perf_version}</refmiscinfo>
+<refmiscinfo class="manual">perf Manual</refmiscinfo>
+</refmeta>
+<refnamediv>
+ <refname>{manname}</refname>
+ <refpurpose>{manpurpose}</refpurpose>
+</refnamediv>
+endif::backend-docbook[]
+endif::doctype-manpage[]
+
+ifdef::backend-xhtml11[]
+[linkperf-inlinemacro]
+<a href="{target}.html">{target}{0?({0})}</a>
+endif::backend-xhtml11[]
diff --git a/tools/perf/Documentation/asciidoctor-extensions.rb b/tools/perf/Documentation/asciidoctor-extensions.rb
new file mode 100644
index 000000000..d148fe95c
--- /dev/null
+++ b/tools/perf/Documentation/asciidoctor-extensions.rb
@@ -0,0 +1,29 @@
+require 'asciidoctor'
+require 'asciidoctor/extensions'
+
+module Perf
+ module Documentation
+ class LinkPerfProcessor < Asciidoctor::Extensions::InlineMacroProcessor
+ use_dsl
+
+ named :chrome
+
+ def process(parent, target, attrs)
+ if parent.document.basebackend? 'html'
+ %(<a href="#{target}.html">#{target}(#{attrs[1]})</a>\n)
+ elsif parent.document.basebackend? 'manpage'
+ "#{target}(#{attrs[1]})"
+ elsif parent.document.basebackend? 'docbook'
+ "<citerefentry>\n" \
+ "<refentrytitle>#{target}</refentrytitle>" \
+ "<manvolnum>#{attrs[1]}</manvolnum>\n" \
+ "</citerefentry>\n"
+ end
+ end
+ end
+ end
+end
+
+Asciidoctor::Extensions.register do
+ inline_macro Perf::Documentation::LinkPerfProcessor, :linkperf
+end
diff --git a/tools/perf/Documentation/callchain-overhead-calculation.txt b/tools/perf/Documentation/callchain-overhead-calculation.txt
new file mode 100644
index 000000000..1a7579271
--- /dev/null
+++ b/tools/perf/Documentation/callchain-overhead-calculation.txt
@@ -0,0 +1,108 @@
+Overhead calculation
+--------------------
+The overhead can be shown in two columns as 'Children' and 'Self' when
+perf collects callchains. The 'self' overhead is simply calculated by
+adding all period values of the entry - usually a function (symbol).
+This is the value that perf shows traditionally and sum of all the
+'self' overhead values should be 100%.
+
+The 'children' overhead is calculated by adding all period values of
+the child functions so that it can show the total overhead of the
+higher level functions even if they don't directly execute much.
+'Children' here means functions that are called from another (parent)
+function.
+
+It might be confusing that the sum of all the 'children' overhead
+values exceeds 100% since each of them is already an accumulation of
+'self' overhead of its child functions. But with this enabled, users
+can find which function has the most overhead even if samples are
+spread over the children.
+
+Consider the following example; there are three functions like below.
+
+-----------------------
+void foo(void) {
+ /* do something */
+}
+
+void bar(void) {
+ /* do something */
+ foo();
+}
+
+int main(void) {
+ bar()
+ return 0;
+}
+-----------------------
+
+In this case 'foo' is a child of 'bar', and 'bar' is an immediate
+child of 'main' so 'foo' also is a child of 'main'. In other words,
+'main' is a parent of 'foo' and 'bar', and 'bar' is a parent of 'foo'.
+
+Suppose all samples are recorded in 'foo' and 'bar' only. When it's
+recorded with callchains the output will show something like below
+in the usual (self-overhead-only) output of perf report:
+
+----------------------------------
+Overhead Symbol
+........ .....................
+ 60.00% foo
+ |
+ --- foo
+ bar
+ main
+ __libc_start_main
+
+ 40.00% bar
+ |
+ --- bar
+ main
+ __libc_start_main
+----------------------------------
+
+When the --children option is enabled, the 'self' overhead values of
+child functions (i.e. 'foo' and 'bar') are added to the parents to
+calculate the 'children' overhead. In this case the report could be
+displayed as:
+
+-------------------------------------------
+Children Self Symbol
+........ ........ ....................
+ 100.00% 0.00% __libc_start_main
+ |
+ --- __libc_start_main
+
+ 100.00% 0.00% main
+ |
+ --- main
+ __libc_start_main
+
+ 100.00% 40.00% bar
+ |
+ --- bar
+ main
+ __libc_start_main
+
+ 60.00% 60.00% foo
+ |
+ --- foo
+ bar
+ main
+ __libc_start_main
+-------------------------------------------
+
+In the above output, the 'self' overhead of 'foo' (60%) was add to the
+'children' overhead of 'bar', 'main' and '\_\_libc_start_main'.
+Likewise, the 'self' overhead of 'bar' (40%) was added to the
+'children' overhead of 'main' and '\_\_libc_start_main'.
+
+So '\_\_libc_start_main' and 'main' are shown first since they have
+same (100%) 'children' overhead (even though they have zero 'self'
+overhead) and they are the parents of 'foo' and 'bar'.
+
+Since v3.16 the 'children' overhead is shown by default and the output
+is sorted by its values. The 'children' overhead is disabled by
+specifying --no-children option on the command line or by adding
+'report.children = false' or 'top.children = false' in the perf config
+file.
diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt
new file mode 100644
index 000000000..a4e392156
--- /dev/null
+++ b/tools/perf/Documentation/examples.txt
@@ -0,0 +1,225 @@
+
+ ------------------------------
+ ****** perf by examples ******
+ ------------------------------
+
+[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ]
+
+
+First, discovery/enumeration of available counters can be done via
+'perf list':
+
+titan:~> perf list
+ [...]
+ kmem:kmalloc [Tracepoint event]
+ kmem:kmem_cache_alloc [Tracepoint event]
+ kmem:kmalloc_node [Tracepoint event]
+ kmem:kmem_cache_alloc_node [Tracepoint event]
+ kmem:kfree [Tracepoint event]
+ kmem:kmem_cache_free [Tracepoint event]
+ kmem:mm_page_free [Tracepoint event]
+ kmem:mm_page_free_batched [Tracepoint event]
+ kmem:mm_page_alloc [Tracepoint event]
+ kmem:mm_page_alloc_zone_locked [Tracepoint event]
+ kmem:mm_page_pcpu_drain [Tracepoint event]
+ kmem:mm_page_alloc_extfrag [Tracepoint event]
+
+Then any (or all) of the above event sources can be activated and
+measured. For example the page alloc/free properties of a 'hackbench
+run' are:
+
+ titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc
+ -e kmem:mm_page_free_batched -e kmem:mm_page_free ./hackbench 10
+ Time: 0.575
+
+ Performance counter stats for './hackbench 10':
+
+ 13857 kmem:mm_page_pcpu_drain
+ 27576 kmem:mm_page_alloc
+ 6025 kmem:mm_page_free_batched
+ 20934 kmem:mm_page_free
+
+ 0.613972165 seconds time elapsed
+
+You can observe the statistical properties as well, by using the
+'repeat the workload N times' feature of perf stat:
+
+ titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e
+ kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
+ kmem:mm_page_free ./hackbench 10
+ Time: 0.627
+ Time: 0.644
+ Time: 0.564
+ Time: 0.559
+ Time: 0.626
+
+ Performance counter stats for './hackbench 10' (5 runs):
+
+ 12920 kmem:mm_page_pcpu_drain ( +- 3.359% )
+ 25035 kmem:mm_page_alloc ( +- 3.783% )
+ 6104 kmem:mm_page_free_batched ( +- 0.934% )
+ 18376 kmem:mm_page_free ( +- 4.941% )
+
+ 0.643954516 seconds time elapsed ( +- 2.363% )
+
+Furthermore, these tracepoints can be used to sample the workload as
+well. For example the page allocations done by a 'git gc' can be
+captured the following way:
+
+ titan:~/git> perf record -e kmem:mm_page_alloc -c 1 ./git gc
+ Counting objects: 1148, done.
+ Delta compression using up to 2 threads.
+ Compressing objects: 100% (450/450), done.
+ Writing objects: 100% (1148/1148), done.
+ Total 1148 (delta 690), reused 1148 (delta 690)
+ [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ]
+
+To check which functions generated page allocations:
+
+ titan:~/git> perf report
+ # Samples: 10646
+ #
+ # Overhead Command Shared Object
+ # ........ ............... ..........................
+ #
+ 23.57% git-repack /lib64/libc-2.5.so
+ 21.81% git /lib64/libc-2.5.so
+ 14.59% git ./git
+ 11.79% git-repack ./git
+ 7.12% git /lib64/ld-2.5.so
+ 3.16% git-repack /lib64/libpthread-2.5.so
+ 2.09% git-repack /bin/bash
+ 1.97% rm /lib64/libc-2.5.so
+ 1.39% mv /lib64/ld-2.5.so
+ 1.37% mv /lib64/libc-2.5.so
+ 1.12% git-repack /lib64/ld-2.5.so
+ 0.95% rm /lib64/ld-2.5.so
+ 0.90% git-update-serv /lib64/libc-2.5.so
+ 0.73% git-update-serv /lib64/ld-2.5.so
+ 0.68% perf /lib64/libpthread-2.5.so
+ 0.64% git-repack /usr/lib64/libz.so.1.2.3
+
+Or to see it on a more finegrained level:
+
+titan:~/git> perf report --sort comm,dso,symbol
+# Samples: 10646
+#
+# Overhead Command Shared Object Symbol
+# ........ ............... .......................... ......
+#
+ 9.35% git-repack ./git [.] insert_obj_hash
+ 9.12% git ./git [.] insert_obj_hash
+ 7.31% git /lib64/libc-2.5.so [.] memcpy
+ 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc
+ 6.24% git-repack /lib64/libc-2.5.so [.] memcpy
+ 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork
+ 5.47% git /lib64/libc-2.5.so [.] _int_malloc
+ 2.99% git /lib64/libc-2.5.so [.] memset
+
+Furthermore, call-graph sampling can be done too, of page
+allocations - to see precisely what kind of page allocations there
+are:
+
+ titan:~/git> perf record -g -e kmem:mm_page_alloc -c 1 ./git gc
+ Counting objects: 1148, done.
+ Delta compression using up to 2 threads.
+ Compressing objects: 100% (450/450), done.
+ Writing objects: 100% (1148/1148), done.
+ Total 1148 (delta 690), reused 1148 (delta 690)
+ [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ]
+
+ titan:~/git> perf report -g
+ # Samples: 10686
+ #
+ # Overhead Command Shared Object
+ # ........ ............... ..........................
+ #
+ 23.25% git-repack /lib64/libc-2.5.so
+ |
+ |--50.00%-- _int_free
+ |
+ |--37.50%-- __GI___fork
+ | make_child
+ |
+ |--12.50%-- ptmalloc_unlock_all2
+ | make_child
+ |
+ --6.25%-- __GI_strcpy
+ 21.61% git /lib64/libc-2.5.so
+ |
+ |--30.00%-- __GI_read
+ | |
+ | --83.33%-- git_config_from_file
+ | git_config
+ | |
+ [...]
+
+Or you can observe the whole system's page allocations for 10
+seconds:
+
+titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e
+kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
+kmem:mm_page_free sleep 10
+
+ Performance counter stats for 'sleep 10':
+
+ 171585 kmem:mm_page_pcpu_drain
+ 322114 kmem:mm_page_alloc
+ 73623 kmem:mm_page_free_batched
+ 254115 kmem:mm_page_free
+
+ 10.000591410 seconds time elapsed
+
+Or observe how fluctuating the page allocations are, via statistical
+analysis done over ten 1-second intervals:
+
+ titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e
+ kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
+ kmem:mm_page_free sleep 1
+
+ Performance counter stats for 'sleep 1' (10 runs):
+
+ 17254 kmem:mm_page_pcpu_drain ( +- 3.709% )
+ 34394 kmem:mm_page_alloc ( +- 4.617% )
+ 7509 kmem:mm_page_free_batched ( +- 4.820% )
+ 25653 kmem:mm_page_free ( +- 3.672% )
+
+ 1.058135029 seconds time elapsed ( +- 3.089% )
+
+Or you can annotate the recorded 'git gc' run on a per symbol basis
+and check which instructions/source-code generated page allocations:
+
+ titan:~/git> perf annotate __GI___fork
+ ------------------------------------------------
+ Percent | Source code & Disassembly of libc-2.5.so
+ ------------------------------------------------
+ :
+ :
+ : Disassembly of section .plt:
+ : Disassembly of section .text:
+ :
+ : 00000031a2e95560 <__fork>:
+ [...]
+ 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax
+ 0.00 : 31a2e95607: 0f 05 syscall
+ 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
+ 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202>
+ 0.00 : 31a2e95615: 85 c0 test %eax,%eax
+
+( this shows that 83.42% of __GI___fork's page allocations come from
+ the 0x38 system call it performs. )
+
+etc. etc. - a lot more is possible. I could list a dozen of
+other different usecases straight away - neither of which is
+possible via /proc/vmstat.
+
+/proc/vmstat is not in the same league really, in terms of
+expressive power of system analysis and performance
+analysis.
+
+All that the above results needed were those new tracepoints
+in include/tracing/events/kmem.h.
+
+ Ingo
+
+
diff --git a/tools/perf/Documentation/intel-bts.txt b/tools/perf/Documentation/intel-bts.txt
new file mode 100644
index 000000000..8bdc93bd7
--- /dev/null
+++ b/tools/perf/Documentation/intel-bts.txt
@@ -0,0 +1,86 @@
+Intel Branch Trace Store
+========================
+
+Overview
+========
+
+Intel BTS could be regarded as a predecessor to Intel PT and has some
+similarities because it can also identify every branch a program takes. A
+notable difference is that Intel BTS has no timing information and as a
+consequence the present implementation is limited to per-thread recording.
+
+While decoding Intel BTS does not require walking the object code, the object
+code is still needed to pair up calls and returns correctly, consequently much
+of the Intel PT documentation applies also to Intel BTS. Refer to the Intel PT
+documentation and consider that the PMU 'intel_bts' can usually be used in
+place of 'intel_pt' in the examples provided, with the proviso that per-thread
+recording must also be stipulated i.e. the --per-thread option for
+'perf record'.
+
+
+perf record
+===========
+
+new event
+---------
+
+The Intel BTS kernel driver creates a new PMU for Intel BTS. The perf record
+option is:
+
+ -e intel_bts//
+
+Currently Intel BTS is limited to per-thread tracing so the --per-thread option
+is also needed.
+
+
+snapshot option
+---------------
+
+The snapshot option is the same as Intel PT (refer Intel PT documentation).
+
+
+auxtrace mmap size option
+-----------------------
+
+The mmap size option is the same as Intel PT (refer Intel PT documentation).
+
+
+perf script
+===========
+
+By default, perf script will decode trace data found in the perf.data file.
+This can be further controlled by option --itrace. The --itrace option is
+the same as Intel PT (refer Intel PT documentation) except that neither
+"instructions" events nor "transactions" events (and consequently call
+chains) are supported.
+
+To disable trace decoding entirely, use the option --no-itrace.
+
+
+dump option
+-----------
+
+perf script has an option (-D) to "dump" the events i.e. display the binary
+data.
+
+When -D is used, Intel BTS packets are displayed.
+
+To disable the display of Intel BTS packets, combine the -D option with
+--no-itrace.
+
+
+perf report
+===========
+
+By default, perf report will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace exactly the same as
+perf script.
+
+
+perf inject
+===========
+
+perf inject also accepts the --itrace option in which case tracing data is
+removed and replaced with the synthesized events. e.g.
+
+ perf inject --itrace -i perf.data -o perf.data.new
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
new file mode 100644
index 000000000..76971d2e4
--- /dev/null
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -0,0 +1,891 @@
+Intel Processor Trace
+=====================
+
+Overview
+========
+
+Intel Processor Trace (Intel PT) is an extension of Intel Architecture that
+collects information about software execution such as control flow, execution
+modes and timings and formats it into highly compressed binary packets.
+Technical details are documented in the Intel 64 and IA-32 Architectures
+Software Developer Manuals, Chapter 36 Intel Processor Trace.
+
+Intel PT is first supported in Intel Core M and 5th generation Intel Core
+processors that are based on the Intel micro-architecture code name Broadwell.
+
+Trace data is collected by 'perf record' and stored within the perf.data file.
+See below for options to 'perf record'.
+
+Trace data must be 'decoded' which involves walking the object code and matching
+the trace data packets. For example a TNT packet only tells whether a
+conditional branch was taken or not taken, so to make use of that packet the
+decoder must know precisely which instruction was being executed.
+
+Decoding is done on-the-fly. The decoder outputs samples in the same format as
+samples output by perf hardware events, for example as though the "instructions"
+or "branches" events had been recorded. Presently 3 tools support this:
+'perf script', 'perf report' and 'perf inject'. See below for more information
+on using those tools.
+
+The main distinguishing feature of Intel PT is that the decoder can determine
+the exact flow of software execution. Intel PT can be used to understand why
+and how did software get to a certain point, or behave a certain way. The
+software does not have to be recompiled, so Intel PT works with debug or release
+builds, however the executed images are needed - which makes use in JIT-compiled
+environments, or with self-modified code, a challenge. Also symbols need to be
+provided to make sense of addresses.
+
+A limitation of Intel PT is that it produces huge amounts of trace data
+(hundreds of megabytes per second per core) which takes a long time to decode,
+for example two or three orders of magnitude longer than it took to collect.
+Another limitation is the performance impact of tracing, something that will
+vary depending on the use-case and architecture.
+
+
+Quickstart
+==========
+
+It is important to start small. That is because it is easy to capture vastly
+more data than can possibly be processed.
+
+The simplest thing to do with Intel PT is userspace profiling of small programs.
+Data is captured with 'perf record' e.g. to trace 'ls' userspace-only:
+
+ perf record -e intel_pt//u ls
+
+And profiled with 'perf report' e.g.
+
+ perf report
+
+To also trace kernel space presents a problem, namely kernel self-modifying
+code. A fairly good kernel image is available in /proc/kcore but to get an
+accurate image a copy of /proc/kcore needs to be made under the same conditions
+as the data capture. A script perf-with-kcore can do that, but beware that the
+script makes use of 'sudo' to copy /proc/kcore. If you have perf installed
+locally from the source tree you can do:
+
+ ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
+
+which will create a directory named 'pt_ls' and put the perf.data file and
+copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use
+'perf report' becomes:
+
+ ~/libexec/perf-core/perf-with-kcore report pt_ls
+
+Because samples are synthesized after-the-fact, the sampling period can be
+selected for reporting. e.g. sample every microsecond
+
+ ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
+
+See the sections below for more information about the --itrace option.
+
+Beware the smaller the period, the more samples that are produced, and the
+longer it takes to process them.
+
+Also note that the coarseness of Intel PT timing information will start to
+distort the statistical value of the sampling as the sampling period becomes
+smaller.
+
+To represent software control flow, "branches" samples are produced. By default
+a branch sample is synthesized for every single branch. To get an idea what
+data is available you can use the 'perf script' tool with no parameters, which
+will list all the samples.
+
+ perf record -e intel_pt//u ls
+ perf script
+
+An interesting field that is not printed by default is 'flags' which can be
+displayed as follows:
+
+ perf script -Fcomm,tid,pid,time,cpu,event,trace,ip,sym,dso,addr,symoff,flags
+
+The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
+system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
+in transaction, respectively.
+
+While it is possible to create scripts to analyze the data, an alternative
+approach is available to export the data to a sqlite or postgresql database.
+Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
+and to script call-graph-from-sql.py for an example of using the database.
+
+There is also script intel-pt-events.py which provides an example of how to
+unpack the raw data for power events and PTWRITE.
+
+As mentioned above, it is easy to capture too much data. One way to limit the
+data captured is to use 'snapshot' mode which is explained further below.
+Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
+
+Another problem that will be experienced is decoder errors. They can be caused
+by inability to access the executed image, self-modified or JIT-ed code, or the
+inability to match side-band information (such as context switches and mmaps)
+which results in the decoder not knowing what code was executed.
+
+There is also the problem of perf not being able to copy the data fast enough,
+resulting in data lost because the buffer was full. See 'Buffer handling' below
+for more details.
+
+
+perf record
+===========
+
+new event
+---------
+
+The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are
+selected by providing the PMU name followed by the "config" separated by slashes.
+An enhancement has been made to allow default "config" e.g. the option
+
+ -e intel_pt//
+
+will use a default config value. Currently that is the same as
+
+ -e intel_pt/tsc,noretcomp=0/
+
+which is the same as
+
+ -e intel_pt/tsc=1,noretcomp=0/
+
+Note there are now new config terms - see section 'config terms' further below.
+
+The config terms are listed in /sys/devices/intel_pt/format. They are bit
+fields within the config member of the struct perf_event_attr which is
+passed to the kernel by the perf_event_open system call. They correspond to bit
+fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions:
+
+ $ grep -H . /sys/bus/event_source/devices/intel_pt/format/*
+ /sys/bus/event_source/devices/intel_pt/format/cyc:config:1
+ /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22
+ /sys/bus/event_source/devices/intel_pt/format/mtc:config:9
+ /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17
+ /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11
+ /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27
+ /sys/bus/event_source/devices/intel_pt/format/tsc:config:10
+
+Note that the default config must be overridden for each term i.e.
+
+ -e intel_pt/noretcomp=0/
+
+is the same as:
+
+ -e intel_pt/tsc=1,noretcomp=0/
+
+So, to disable TSC packets use:
+
+ -e intel_pt/tsc=0/
+
+It is also possible to specify the config value explicitly:
+
+ -e intel_pt/config=0x400/
+
+Note that, as with all events, the event is suffixed with event modifiers:
+
+ u userspace
+ k kernel
+ h hypervisor
+ G guest
+ H host
+ p precise ip
+
+'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
+'p' is also not relevant to Intel PT. So only options 'u' and 'k' are
+meaningful for Intel PT.
+
+perf_event_attr is displayed if the -vv option is used e.g.
+
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 6
+ size 112
+ config 0x400
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|CPU|IDENTIFIER
+ read_format ID
+ disabled 1
+ inherit 1
+ exclude_kernel 1
+ exclude_hv 1
+ enable_on_exec 1
+ sample_id_all 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
+ ------------------------------------------------------------
+
+
+config terms
+------------
+
+The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
+Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
+Some of the features are reflect in new config terms. All the config terms are
+described below.
+
+tsc Always supported. Produces TSC timestamp packets to provide
+ timing information. In some cases it is possible to decode
+ without timing information, for example a per-thread context
+ that does not overlap executable memory maps.
+
+ The default config selects tsc (i.e. tsc=1).
+
+noretcomp Always supported. Disables "return compression" so a TIP packet
+ is produced when a function returns. Causes more packets to be
+ produced but might make decoding more reliable.
+
+ The default config does not select noretcomp (i.e. noretcomp=0).
+
+psb_period Allows the frequency of PSB packets to be specified.
+
+ The PSB packet is a synchronization packet that provides a
+ starting point for decoding or recovery from errors.
+
+ Support for psb_period is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+ which contains "1" if the feature is supported and "0"
+ otherwise.
+
+ Valid values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_periods
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The psb_period value is converted to the approximate number of
+ trace bytes between PSB packets as:
+
+ 2 ^ (value + 11)
+
+ e.g. value 3 means 16KiB bytes between PSBs
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/psb_period=15/u uname
+ Invalid psb_period for intel_pt. Valid values are: 0-5
+
+ If MTC packets are selected, the default config selects a value
+ of 3 (i.e. psb_period=3) or the nearest lower value that is
+ supported (0 is always supported). Otherwise the default is 0.
+
+ If decoding is expected to be reliable and the buffer is large
+ then a large PSB period can be used.
+
+ Because a TSC packet is produced with PSB, the PSB period can
+ also affect the granularity to timing information in the absence
+ of MTC or CYC.
+
+mtc Produces MTC timing packets.
+
+ MTC packets provide finer grain timestamp information than TSC
+ packets. MTC packets record time using the hardware crystal
+ clock (CTC) which is related to TSC packets using a TMA packet.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/mtc
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+ The frequency of MTC packets can also be specified - see
+ mtc_period below.
+
+mtc_period Specifies how frequently MTC packets are produced - see mtc
+ above for how to determine if MTC packets are supported.
+
+ Valid values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/mtc_periods
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The mtc_period value is converted to the MTC frequency as:
+
+ CTC-frequency / (2 ^ value)
+
+ e.g. value 3 means one eighth of CTC-frequency
+
+ Where CTC is the hardware crystal clock, the frequency of which
+ can be related to TSC via values provided in cpuid leaf 0x15.
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/mtc_period=15/u uname
+ Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
+
+ The default value is 3 or the nearest lower value
+ that is supported (0 is always supported).
+
+cyc Produces CYC timing packets.
+
+ CYC packets provide even finer grain timestamp information than
+ MTC and TSC packets. A CYC packet contains the number of CPU
+ cycles since the last CYC packet. Unlike MTC and TSC packets,
+ CYC packets are only sent when another packet is also sent.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+ The number of CYC packets produced can be reduced by specifying
+ a threshold - see cyc_thresh below.
+
+cyc_thresh Specifies how frequently CYC packets are produced - see cyc
+ above for how to determine if CYC packets are supported.
+
+ Valid cyc_thresh values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The cyc_thresh value represents the minimum number of CPU cycles
+ that must have passed before a CYC packet can be sent. The
+ number of CPU cycles is:
+
+ 2 ^ (value - 1)
+
+ e.g. value 4 means 8 CPU cycles must pass before a CYC packet
+ can be sent. Note a CYC packet is still only sent when another
+ packet is sent, not at, e.g. every 8 CPU cycles.
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
+ Invalid cyc_thresh for intel_pt. Valid values are: 0-12
+
+ CYC packets are not requested by default.
+
+pt Specifies pass-through which enables the 'branch' config term.
+
+ The default config selects 'pt' if it is available, so a user will
+ never need to specify this term.
+
+branch Enable branch tracing. Branch tracing is enabled by default so to
+ disable branch tracing use 'branch=0'.
+
+ The default config selects 'branch' if it is available.
+
+ptw Enable PTWRITE packets which are produced when a ptwrite instruction
+ is executed.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/ptwrite
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet
+ provides the address of the ptwrite instruction. In the absence of
+ fup_on_ptw, the decoder will use the address of the previous branch
+ if branch tracing is enabled, otherwise the address will be zero.
+ Note that fup_on_ptw will work even when branch tracing is disabled.
+
+pwr_evt Enable power events. The power events provide information about
+ changes to the CPU C-state.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/power_event_trace
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+
+new snapshot option
+-------------------
+
+The difference between full trace and snapshot from the kernel's perspective is
+that in full trace we don't overwrite trace data that the user hasn't collected
+yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let
+the trace run and overwrite older data in the buffer so that whenever something
+interesting happens, we can stop it and grab a snapshot of what was going on
+around that interesting moment.
+
+To select snapshot mode a new option has been added:
+
+ -S
+
+Optionally it can be followed by the snapshot size e.g.
+
+ -S0x100000
+
+The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size
+nor snapshot size is specified, then the default is 4MiB for privileged users
+(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
+If an unprivileged user does not specify mmap pages, the mmap pages will be
+reduced as described in the 'new auxtrace mmap size option' section below.
+
+The snapshot size is displayed if the option -vv is used e.g.
+
+ Intel PT snapshot size: %zu
+
+
+new auxtrace mmap size option
+---------------------------
+
+Intel PT buffer size is specified by an addition to the -m option e.g.
+
+ -m,16
+
+selects a buffer size of 16 pages i.e. 64KiB.
+
+Note that the existing functionality of -m is unchanged. The auxtrace mmap size
+is specified by the optional addition of a comma and the value.
+
+The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users
+(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
+If an unprivileged user does not specify mmap pages, the mmap pages will be
+reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
+user is likely to get an error as they exceed their mlock limit (Max locked
+memory as shown in /proc/self/limits). Note that perf does not count the first
+512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu
+against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus
+their mlock limit (which defaults to 64KiB but is not multiplied by the number
+of cpus).
+
+In full-trace mode, powers of two are allowed for buffer size, with a minimum
+size of 2 pages. In snapshot mode, it is the same but the minimum size is
+1 page.
+
+The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
+
+ mmap length 528384
+ auxtrace mmap length 4198400
+
+
+Intel PT modes of operation
+---------------------------
+
+Intel PT can be used in 2 modes:
+ full-trace mode
+ snapshot mode
+
+Full-trace mode traces continuously e.g.
+
+ perf record -e intel_pt//u uname
+
+Snapshot mode captures the available data when a signal is sent e.g.
+
+ perf record -v -e intel_pt//u -S ./loopy 1000000000 &
+ [1] 11435
+ kill -USR2 11435
+ Recording AUX area tracing snapshot
+
+Note that the signal sent is SIGUSR2.
+Note that "Recording AUX area tracing snapshot" is displayed because the -v
+option is used.
+
+The 2 modes cannot be used together.
+
+
+Buffer handling
+---------------
+
+There may be buffer limitations (i.e. single ToPa entry) which means that actual
+buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to
+provide other sizes, and in particular an arbitrarily large size, multiple
+buffers are logically concatenated. However an interrupt must be used to switch
+between buffers. That has two potential problems:
+ a) the interrupt may not be handled in time so that the current buffer
+ becomes full and some trace data is lost.
+ b) the interrupts may slow the system and affect the performance
+ results.
+
+If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event
+which the tools report as an error.
+
+In full-trace mode, the driver waits for data to be copied out before allowing
+the (logical) buffer to wrap-around. If data is not copied out quickly enough,
+again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to
+wait, the intel_pt event gets disabled. Because it is difficult to know when
+that happens, perf tools always re-enable the intel_pt event after copying out
+data.
+
+
+Intel PT and build ids
+----------------------
+
+By default "perf record" post-processes the event stream to find all build ids
+for executables for all addresses sampled. Deliberately, Intel PT is not
+decoded for that purpose (it would take too long). Instead the build ids for
+all executables encountered (due to mmap, comm or task events) are included
+in the perf.data file.
+
+To see buildids included in the perf.data file use the command:
+
+ perf buildid-list
+
+If the perf.data file contains Intel PT data, that is the same as:
+
+ perf buildid-list --with-hits
+
+
+Snapshot mode and event disabling
+---------------------------------
+
+In order to make a snapshot, the intel_pt event is disabled using an IOCTL,
+namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the
+collection of side-band information. In order to prevent that, a dummy
+software event has been introduced that permits tracking events (like mmaps) to
+continue to be recorded while intel_pt is disabled. That is important to ensure
+there is complete side-band information to allow the decoding of subsequent
+snapshots.
+
+A test has been created for that. To find the test:
+
+ perf test list
+ ...
+ 23: Test using a dummy software event to keep tracking
+
+To run the test:
+
+ perf test 23
+ 23: Test using a dummy software event to keep tracking : Ok
+
+
+perf record modes (nothing new here)
+------------------------------------
+
+perf record essentially operates in one of three modes:
+ per thread
+ per cpu
+ workload only
+
+"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a
+workload).
+"per cpu" is selected by -C or -a.
+"workload only" mode is selected by not using the other options but providing a
+command to run (i.e. the workload).
+
+In per-thread mode an exact list of threads is traced. There is no inheritance.
+Each thread has its own event buffer.
+
+In per-cpu mode all processes (or processes from the selected cgroup i.e. -G
+option, or processes selected with -p or -u) are traced. Each cpu has its own
+buffer. Inheritance is allowed.
+
+In workload-only mode, the workload is traced but with per-cpu buffers.
+Inheritance is allowed. Note that you can now trace a workload in per-thread
+mode by using the --per-thread option.
+
+
+Privileged vs non-privileged users
+----------------------------------
+
+Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
+have memory limits imposed upon them. That affects what buffer sizes they can
+have as outlined above.
+
+The v4.2 kernel introduced support for a context switch metadata event,
+PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
+are scheduled out and in, just not by whom, which is left for the
+PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
+which in turn requires CAP_SYS_ADMIN.
+
+Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
+switches") commit, that introduces these metadata events for further info.
+
+When working with kernels < v4.2, the following considerations must be taken,
+as the sched:sched_switch tracepoints will be used to receive such information:
+
+Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
+not permitted to use tracepoints which means there is insufficient side-band
+information to decode Intel PT in per-cpu mode, and potentially workload-only
+mode too if the workload creates new processes.
+
+Note also, that to use tracepoints, read-access to debugfs is required. So if
+debugfs is not mounted or the user does not have read-access, it will again not
+be possible to decode Intel PT in per-cpu mode.
+
+
+sched_switch tracepoint
+-----------------------
+
+The sched_switch tracepoint is used to provide side-band data for Intel PT
+decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't
+available.
+
+The sched_switch events are automatically added. e.g. the second event shown
+below:
+
+ $ perf record -vv -e intel_pt//u uname
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 6
+ size 112
+ config 0x400
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|CPU|IDENTIFIER
+ read_format ID
+ disabled 1
+ inherit 1
+ exclude_kernel 1
+ exclude_hv 1
+ enable_on_exec 1
+ sample_id_all 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 2
+ size 112
+ config 0x108
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER
+ read_format ID
+ inherit 1
+ sample_id_all 1
+ exclude_guest 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 1
+ size 112
+ config 0x9
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|IDENTIFIER
+ read_format ID
+ disabled 1
+ inherit 1
+ exclude_kernel 1
+ exclude_hv 1
+ mmap 1
+ comm 1
+ enable_on_exec 1
+ task 1
+ sample_id_all 1
+ mmap2 1
+ comm_exec 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
+ mmap size 528384B
+ AUX area mmap length 4194304
+ perf event ring buffer mmapped per cpu
+ Synthesizing auxtrace information
+ Linux
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 0.042 MB perf.data ]
+
+Note, the sched_switch event is only added if the user is permitted to use it
+and only in per-cpu mode.
+
+Note also, the sched_switch event is only added if TSC packets are requested.
+That is because, in the absence of timing information, the sched_switch events
+cannot be matched against the Intel PT trace.
+
+
+perf script
+===========
+
+By default, perf script will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace.
+
+
+New --itrace option
+-------------------
+
+Having no option is the same as
+
+ --itrace
+
+which, in turn, is the same as
+
+ --itrace=ibxwpe
+
+The letters are:
+
+ i synthesize "instructions" events
+ b synthesize "branches" events
+ x synthesize "transactions" events
+ w synthesize "ptwrite" events
+ p synthesize "power" events
+ c synthesize branches events (calls only)
+ r synthesize branches events (returns only)
+ e synthesize tracing error events
+ d create a debug log
+ g synthesize a call chain (use with i or x)
+ l synthesize last branch entries (use with i or x)
+ s skip initial number of events
+
+"Instructions" events look like they were recorded by "perf record -e
+instructions".
+
+"Branches" events look like they were recorded by "perf record -e branches". "c"
+and "r" can be combined to get calls and returns.
+
+"Transactions" events correspond to the start or end of transactions. The
+'flags' field can be used in perf script to determine whether the event is a
+tranasaction start, commit or abort.
+
+Note that "instructions", "branches" and "transactions" events depend on code
+flow packets which can be disabled by using the config term "branch=0". Refer
+to the config terms section above.
+
+"ptwrite" events record the payload of the ptwrite instruction and whether
+"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are
+recorded only if the "ptw" config term was used. Refer to the config terms
+section above. perf script "synth" field displays "ptwrite" information like
+this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was
+used.
+
+"Power" events correspond to power event packets and CBR (core-to-bus ratio)
+packets. While CBR packets are always recorded when tracing is enabled, power
+event packets are recorded only if the "pwr_evt" config term was used. Refer to
+the config terms section above. The power events record information about
+C-state changes, whereas CBR is indicative of CPU frequency. perf script
+"event,synth" fields display information like this:
+ cbr: cbr: 22 freq: 2189 MHz (200%)
+ mwait: hints: 0x60 extensions: 0x1
+ pwre: hw: 0 cstate: 2 sub-cstate: 0
+ exstop: ip: 1
+ pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4
+Where:
+ "cbr" includes the frequency and the percentage of maximum non-turbo
+ "mwait" shows mwait hints and extensions
+ "pwre" shows C-state transitions (to a C-state deeper than C0) and
+ whether initiated by hardware
+ "exstop" indicates execution stopped and whether the IP was recorded
+ exactly,
+ "pwrx" indicates return to C0
+For more details refer to the Intel 64 and IA-32 Architectures Software
+Developer Manuals.
+
+Error events show where the decoder lost the trace. Error events
+are quite important. Users must know if what they are seeing is a complete
+picture or not.
+
+The "d" option will cause the creation of a file "intel_pt.log" containing all
+decoded packets and instructions. Note that this option slows down the decoder
+and that the resulting file may be very large.
+
+In addition, the period of the "instructions" event can be specified. e.g.
+
+ --itrace=i10us
+
+sets the period to 10us i.e. one instruction sample is synthesized for each 10
+microseconds of trace. Alternatives to "us" are "ms" (milliseconds),
+"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions).
+
+"ms", "us" and "ns" are converted to TSC ticks.
+
+The timing information included with Intel PT does not give the time of every
+instruction. Consequently, for the purpose of sampling, the decoder estimates
+the time since the last timing packet based on 1 tick per instruction. The time
+on the sample is *not* adjusted and reflects the last known value of TSC.
+
+For Intel PT, the default period is 100us.
+
+Setting it to a zero period means "as often as possible".
+
+In the case of Intel PT that is the same as a period of 1 and a unit of
+'instructions' (i.e. --itrace=i1i).
+
+Also the call chain size (default 16, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+ --itrace=ig32
+ --itrace=xg32
+
+Also the number of last branch entries (default 64, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+ --itrace=il10
+ --itrace=xl10
+
+Note that last branch entries are cleared for each sample, so there is no overlap
+from one sample to the next.
+
+To disable trace decoding entirely, use the option --no-itrace.
+
+It is also possible to skip events generated (instructions, branches, transactions)
+at the beginning. This is useful to ignore initialization code.
+
+ --itrace=i0nss1000000
+
+skips the first million instructions.
+
+dump option
+-----------
+
+perf script has an option (-D) to "dump" the events i.e. display the binary
+data.
+
+When -D is used, Intel PT packets are displayed. The packet decoder does not
+pay attention to PSB packets, but just decodes the bytes - so the packets seen
+by the actual decoder may not be identical in places where the data is corrupt.
+One example of that would be when the buffer-switching interrupt has been too
+slow, and the buffer has been filled completely. In that case, the last packet
+in the buffer might be truncated and immediately followed by a PSB as the trace
+continues in the next buffer.
+
+To disable the display of Intel PT packets, combine the -D option with
+--no-itrace.
+
+
+perf report
+===========
+
+By default, perf report will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace exactly the same as
+perf script, with the exception that the default is --itrace=igxe.
+
+
+perf inject
+===========
+
+perf inject also accepts the --itrace option in which case tracing data is
+removed and replaced with the synthesized events. e.g.
+
+ perf inject --itrace -i perf.data -o perf.data.new
+
+Below is an example of using Intel PT with autofdo. It requires autofdo
+(https://github.com/google/autofdo) and gcc version 5. The bubble
+sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
+amended to take the number of elements as a parameter.
+
+ $ gcc-5 -O3 sort.c -o sort_optimized
+ $ ./sort_optimized 30000
+ Bubble sorting array of 30000 elements
+ 2254 ms
+
+ $ cat ~/.perfconfig
+ [intel-pt]
+ mispred-all = on
+
+ $ perf record -e intel_pt//u ./sort 3000
+ Bubble sorting array of 3000 elements
+ 58 ms
+ [ perf record: Woken up 2 times to write data ]
+ [ perf record: Captured and wrote 3.939 MB perf.data ]
+ $ perf inject -i perf.data -o inj --itrace=i100usle --strip
+ $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
+ $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
+ $ ./sort_autofdo 30000
+ Bubble sorting array of 30000 elements
+ 2155 ms
+
+Note there is currently no advantage to using Intel PT instead of LBR, but
+that may change in the future if greater use is made of the data.
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
new file mode 100644
index 000000000..a3abe04c7
--- /dev/null
+++ b/tools/perf/Documentation/itrace.txt
@@ -0,0 +1,36 @@
+ i synthesize instructions events
+ b synthesize branches events
+ c synthesize branches events (calls only)
+ r synthesize branches events (returns only)
+ x synthesize transactions events
+ w synthesize ptwrite events
+ p synthesize power events
+ e synthesize error events
+ d create a debug log
+ g synthesize a call chain (use with i or x)
+ l synthesize last branch entries (use with i or x)
+ s skip initial number of events
+
+ The default is all events i.e. the same as --itrace=ibxwpe
+
+ In addition, the period (default 100000) for instructions events
+ can be specified in units of:
+
+ i instructions
+ t ticks
+ ms milliseconds
+ us microseconds
+ ns nanoseconds (default)
+
+ Also the call chain size (default 16, max. 1024) for instructions or
+ transactions events can be specified.
+
+ Also the number of last branch entries (default 64, max. 1024) for
+ instructions or transactions events can be specified.
+
+ It is also possible to skip events generated (instructions, branches, transactions,
+ ptwrite, power) at the beginning. This is useful to ignore initialization code.
+
+ --itrace=i0nss1000000
+
+ skips the first million instructions.
diff --git a/tools/perf/Documentation/jit-interface.txt b/tools/perf/Documentation/jit-interface.txt
new file mode 100644
index 000000000..a8656f564
--- /dev/null
+++ b/tools/perf/Documentation/jit-interface.txt
@@ -0,0 +1,15 @@
+perf supports a simple JIT interface to resolve symbols for dynamic code generated
+by a JIT.
+
+The JIT has to write a /tmp/perf-%d.map (%d = pid of process) file
+
+This is a text file.
+
+Each line has the following format, fields separated with spaces:
+
+START SIZE symbolname
+
+START and SIZE are hex numbers without 0x.
+symbolname is the rest of the line, so it could contain special characters.
+
+The ownership of the file has to match the process.
diff --git a/tools/perf/Documentation/jitdump-specification.txt b/tools/perf/Documentation/jitdump-specification.txt
new file mode 100644
index 000000000..4c62b0713
--- /dev/null
+++ b/tools/perf/Documentation/jitdump-specification.txt
@@ -0,0 +1,170 @@
+JITDUMP specification version 2
+Last Revised: 09/15/2016
+Author: Stephane Eranian <eranian@gmail.com>
+
+--------------------------------------------------------
+| Revision | Date | Description |
+--------------------------------------------------------
+| 1 | 09/07/2016 | Initial revision |
+--------------------------------------------------------
+| 2 | 09/15/2016 | Add JIT_CODE_UNWINDING_INFO |
+--------------------------------------------------------
+
+
+I/ Introduction
+
+
+This document describes the jitdump file format. The file is generated by Just-In-time compiler runtimes to save meta-data information about the generated code, such as address, size, and name of generated functions, the native code generated, the source line information. The data may then be used by performance tools, such as Linux perf to generate function and assembly level profiles.
+
+The format is not specific to any particular programming language. It can be extended as need be.
+
+The format of the file is binary. It is self-describing in terms of endianness and is portable across multiple processor architectures.
+
+
+II/ Overview of the format
+
+
+The format requires only sequential accesses, i.e., append only mode. The file starts with a fixed size file header describing the version of the specification, the endianness.
+
+The header is followed by a series of records, each starting with a fixed size header describing the type of record and its size. It is, itself, followed by the payload for the record. Records can have a variable size even for a given type.
+
+Each entry in the file is timestamped. All timestamps must use the same clock source. The CLOCK_MONOTONIC clock source is recommended.
+
+
+III/ Jitdump file header format
+
+Each jitdump file starts with a fixed size header containing the following fields in order:
+
+
+* uint32_t magic : a magic number tagging the file type. The value is 4-byte long and represents the string "JiTD" in ASCII form. It is 0x4A695444 or 0x4454694a depending on the endianness. The field can be used to detect the endianness of the file
+* uint32_t version : a 4-byte value representing the format version. It is currently set to 2
+* uint32_t total_size: size in bytes of file header
+* uint32_t elf_mach : ELF architecture encoding (ELF e_machine value as specified in /usr/include/elf.h)
+* uint32_t pad1 : padding. Reserved for future use
+* uint32_t pid : JIT runtime process identification (OS specific)
+* uint64_t timestamp : timestamp of when the file was created
+* uint64_t flags : a bitmask of flags
+
+The flags currently defined are as follows:
+ * bit 0: JITDUMP_FLAGS_ARCH_TIMESTAMP : set if the jitdump file is using an architecture-specific timestamp clock source. For instance, on x86, one could use TSC directly
+
+IV/ Record header
+
+The file header is immediately followed by records. Each record starts with a fixed size header describing the record that follows.
+
+The record header is specified in order as follows:
+* uint32_t id : a value identifying the record type (see below)
+* uint32_t total_size: the size in bytes of the record including the header.
+* uint64_t timestamp : a timestamp of when the record was created.
+
+The following record types are defined:
+ * Value 0 : JIT_CODE_LOAD : record describing a jitted function
+ * Value 1 : JIT_CODE_MOVE : record describing an already jitted function which is moved
+ * Value 2 : JIT_CODE_DEBUG_INFO: record describing the debug information for a jitted function
+ * Value 3 : JIT_CODE_CLOSE : record marking the end of the jit runtime (optional)
+ * Value 4 : JIT_CODE_UNWINDING_INFO: record describing a function unwinding information
+
+ The payload of the record must immediately follow the record header without padding.
+
+V/ JIT_CODE_LOAD record
+
+
+ The record has the following fields following the fixed-size record header in order:
+ * uint32_t pid: OS process id of the runtime generating the jitted code
+ * uint32_t tid: OS thread identification of the runtime thread generating the jitted code
+ * uint64_t vma: virtual address of jitted code start
+ * uint64_t code_addr: code start address for the jitted code. By default vma = code_addr
+ * uint64_t code_size: size in bytes of the generated jitted code
+ * uint64_t code_index: unique identifier for the jitted code (see below)
+ * char[n]: function name in ASCII including the null termination
+ * native code: raw byte encoding of the jitted code
+
+ The record header total_size field is inclusive of all components:
+ * record header
+ * fixed-sized fields
+ * function name string, including termination
+ * native code length
+ * record specific variable data (e.g., array of data entries)
+
+The code_index is used to uniquely identify each jitted function. The index can be a monotonically increasing 64-bit value. Each time a function is jitted it gets a new number. This value is used in case the code for a function is moved and avoids having to issue another JIT_CODE_LOAD record.
+
+The format supports empty functions with no native code.
+
+
+VI/ JIT_CODE_MOVE record
+
+ The record type is optional.
+
+ The record has the following fields following the fixed-size record header in order:
+ * uint32_t pid : OS process id of the runtime generating the jitted code
+ * uint32_t tid : OS thread identification of the runtime thread generating the jitted code
+ * uint64_t vma : new virtual address of jitted code start
+ * uint64_t old_code_addr: previous code address for the same function
+ * uint64_t new_code_addr: alternate new code started address for the jitted code. By default it should be equal to the vma address.
+ * uint64_t code_size : size in bytes of the jitted code
+ * uint64_t code_index : index referring to the JIT_CODE_LOAD code_index record of when the function was initially jitted
+
+
+The MOVE record can be used in case an already jitted function is simply moved by the runtime inside the code cache.
+
+The JIT_CODE_MOVE record cannot come before the JIT_CODE_LOAD record for the same function name. The function cannot have changed name, otherwise a new JIT_CODE_LOAD record must be emitted.
+
+The code size of the function cannot change.
+
+
+VII/ JIT_DEBUG_INFO record
+
+The record type is optional.
+
+The record contains source lines debug information, i.e., a way to map a code address back to a source line. This information may be used by the performance tool.
+
+The record has the following fields following the fixed-size record header in order:
+ * uint64_t code_addr: address of function for which the debug information is generated
+ * uint64_t nr_entry : number of debug entries for the function
+ * debug_entry[n]: array of nr_entry debug entries for the function
+
+The debug_entry describes the source line information. It is defined as follows in order:
+* uint64_t code_addr: address of function for which the debug information is generated
+* uint32_t line : source file line number (starting at 1)
+* uint32_t discrim : column discriminator, 0 is default
+* char name[n] : source file name in ASCII, including null termination
+
+The debug_entry entries are saved in sequence but given that they have variable sizes due to the file name string, they cannot be indexed directly.
+They need to be walked sequentially. The next debug_entry is found at sizeof(debug_entry) + strlen(name) + 1.
+
+IMPORTANT:
+ The JIT_CODE_DEBUG for a given function must always be generated BEFORE the JIT_CODE_LOAD for the function. This facilitates greatly the parser for the jitdump file.
+
+
+VIII/ JIT_CODE_CLOSE record
+
+
+The record type is optional.
+
+The record is used as a marker for the end of the jitted runtime. It can be replaced by the end of the file.
+
+The JIT_CODE_CLOSE record does not have any specific fields, the record header contains all the information needed.
+
+
+IX/ JIT_CODE_UNWINDING_INFO
+
+
+The record type is optional.
+
+The record is used to describe the unwinding information for a jitted function.
+
+The record has the following fields following the fixed-size record header in order:
+
+uint64_t unwind_data_size : the size in bytes of the unwinding data table at the end of the record
+uint64_t eh_frame_hdr_size : the size in bytes of the DWARF EH Frame Header at the start of the unwinding data table at the end of the record
+uint64_t mapped_size : the size of the unwinding data mapped in memory
+const char unwinding_data[n]: an array of unwinding data, consisting of the EH Frame Header, followed by the actual EH Frame
+
+
+The EH Frame header follows the Linux Standard Base (LSB) specification as described in the document at https://refspecs.linuxfoundation.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html
+
+
+The EH Frame follows the LSB specicfication as described in the document at https://refspecs.linuxbase.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html
+
+
+NOTE: The mapped_size is generally either the same as unwind_data_size (if the unwinding data was mapped in memory by the running process) or zero (if the unwinding data is not mapped by the process). If the unwinding data was not mapped, then only the EH Frame Header will be read, which can be used to specify FP based unwinding for a function which does not have unwinding information.
diff --git a/tools/perf/Documentation/manpage-1.72.xsl b/tools/perf/Documentation/manpage-1.72.xsl
new file mode 100644
index 000000000..b4d315cb8
--- /dev/null
+++ b/tools/perf/Documentation/manpage-1.72.xsl
@@ -0,0 +1,14 @@
+<!-- manpage-1.72.xsl:
+ special settings for manpages rendered from asciidoc+docbook
+ handles peculiarities in docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the special values for the roff control characters
+ needed for docbook-xsl 1.72.0 -->
+<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
+<xsl:param name="git.docbook.dot" >&#x2302;</xsl:param>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-base.xsl b/tools/perf/Documentation/manpage-base.xsl
new file mode 100644
index 000000000..a264fa616
--- /dev/null
+++ b/tools/perf/Documentation/manpage-base.xsl
@@ -0,0 +1,35 @@
+<!-- manpage-base.xsl:
+ special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<!-- these params silence some output from xmlto -->
+<xsl:param name="man.output.quietly" select="1"/>
+<xsl:param name="refentry.meta.get.quietly" select="1"/>
+
+<!-- convert asciidoc callouts to man page format;
+ git.docbook.backslash and git.docbook.dot params
+ must be supplied by another XSL file or other means -->
+<xsl:template match="co">
+ <xsl:value-of select="concat(
+ $git.docbook.backslash,'fB(',
+ substring-after(@id,'-'),')',
+ $git.docbook.backslash,'fR')"/>
+</xsl:template>
+<xsl:template match="calloutlist">
+ <xsl:value-of select="$git.docbook.dot"/>
+ <xsl:text>sp&#10;</xsl:text>
+ <xsl:apply-templates/>
+ <xsl:text>&#10;</xsl:text>
+</xsl:template>
+<xsl:template match="callout">
+ <xsl:value-of select="concat(
+ $git.docbook.backslash,'fB',
+ substring-after(@arearefs,'-'),
+ '. ',$git.docbook.backslash,'fR')"/>
+ <xsl:apply-templates/>
+ <xsl:value-of select="$git.docbook.dot"/>
+ <xsl:text>br&#10;</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-bold-literal.xsl b/tools/perf/Documentation/manpage-bold-literal.xsl
new file mode 100644
index 000000000..608eb5df6
--- /dev/null
+++ b/tools/perf/Documentation/manpage-bold-literal.xsl
@@ -0,0 +1,17 @@
+<!-- manpage-bold-literal.xsl:
+ special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<!-- render literal text as bold (instead of plain or monospace);
+ this makes literal text easier to distinguish in manpages
+ viewed on a tty -->
+<xsl:template match="literal">
+ <xsl:value-of select="$git.docbook.backslash"/>
+ <xsl:text>fB</xsl:text>
+ <xsl:apply-templates/>
+ <xsl:value-of select="$git.docbook.backslash"/>
+ <xsl:text>fR</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-normal.xsl b/tools/perf/Documentation/manpage-normal.xsl
new file mode 100644
index 000000000..a48f5b11f
--- /dev/null
+++ b/tools/perf/Documentation/manpage-normal.xsl
@@ -0,0 +1,13 @@
+<!-- manpage-normal.xsl:
+ special settings for manpages rendered from asciidoc+docbook
+ handles anything we want to keep away from docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the normal values for the roff control characters -->
+<xsl:param name="git.docbook.backslash">\</xsl:param>
+<xsl:param name="git.docbook.dot" >.</xsl:param>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-suppress-sp.xsl b/tools/perf/Documentation/manpage-suppress-sp.xsl
new file mode 100644
index 000000000..a63c7632a
--- /dev/null
+++ b/tools/perf/Documentation/manpage-suppress-sp.xsl
@@ -0,0 +1,21 @@
+<!-- manpage-suppress-sp.xsl:
+ special settings for manpages rendered from asciidoc+docbook
+ handles erroneous, inline .sp in manpage output of some
+ versions of docbook-xsl -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<!-- attempt to work around spurious .sp at the tail of the line
+ that some versions of docbook stylesheets seem to add -->
+<xsl:template match="simpara">
+ <xsl:variable name="content">
+ <xsl:apply-templates/>
+ </xsl:variable>
+ <xsl:value-of select="normalize-space($content)"/>
+ <xsl:if test="not(ancestor::authorblurb) and
+ not(ancestor::personblurb)">
+ <xsl:text>&#10;&#10;</xsl:text>
+ </xsl:if>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
new file mode 100644
index 000000000..e8c972f89
--- /dev/null
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -0,0 +1,132 @@
+perf-annotate(1)
+================
+
+NAME
+----
+perf-annotate - Read perf.data (created by perf record) and display annotated code
+
+SYNOPSIS
+--------
+[verse]
+'perf annotate' [-i <file> | --input=file] [symbol_name]
+
+DESCRIPTION
+-----------
+This command reads the input file and displays an annotated version of the
+code. If the object file has debug symbols then the source code will be
+displayed alongside assembly code.
+
+If there is no debug info in the object, then annotated assembly is displayed.
+
+OPTIONS
+-------
+-i::
+--input=<file>::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-d::
+--dsos=<dso[,dso...]>::
+ Only consider symbols in these dsos.
+-s::
+--symbol=<symbol>::
+ Symbol to annotate.
+
+-f::
+--force::
+ Don't do ownership validation.
+
+-v::
+--verbose::
+ Be more verbose. (Show symbol address, etc)
+
+-q::
+--quiet::
+ Do not show any message. (Suppress -v)
+
+-n::
+--show-nr-samples::
+ Show the number of samples for each symbol
+
+-D::
+--dump-raw-trace::
+ Dump raw trace in ASCII.
+
+-k::
+--vmlinux=<file>::
+ vmlinux pathname.
+
+--ignore-vmlinux::
+ Ignore vmlinux files.
+
+-m::
+--modules::
+ Load module symbols. WARNING: use only with -k and LIVE kernel.
+
+-l::
+--print-line::
+ Print matching source lines (may be slow).
+
+-P::
+--full-paths::
+ Don't shorten the displayed pathnames.
+
+--stdio:: Use the stdio interface.
+
+--stdio2:: Use the stdio2 interface, non-interactive, uses the TUI formatting.
+
+--stdio-color=<mode>::
+ 'always', 'never' or 'auto', allowing configuring color output
+ via the command line, in addition to via "color.ui" .perfconfig.
+ Use '--stdio-color always' to generate color even when redirecting
+ to a pipe or file. Using just '--stdio-color' is equivalent to
+ using 'always'.
+
+--tui:: Use the TUI interface. Use of --tui requires a tty, if one is not
+ present, as when piping to other commands, the stdio interface is
+ used. This interfaces starts by centering on the line with more
+ samples, TAB/UNTAB cycles through the lines with more samples.
+
+--gtk:: Use the GTK interface.
+
+-C::
+--cpu=<cpu>:: Only report samples for the list of CPUs provided. Multiple CPUs can
+ be provided as a comma-separated list with no space: 0,1. Ranges of
+ CPUs are specified with -: 0-2. Default is to report samples on all
+ CPUs.
+
+--asm-raw::
+ Show raw instruction encoding of assembly instructions.
+
+--show-total-period:: Show a column with the sum of periods.
+
+--source::
+ Interleave source code with assembly code. Enabled by default,
+ disable with --no-source.
+
+--symfs=<directory>::
+ Look for files with symbols relative to this directory.
+
+-M::
+--disassembler-style=:: Set disassembler style for objdump.
+
+--objdump=<path>::
+ Path to objdump binary.
+
+--skip-missing::
+ Skip symbols that cannot be annotated.
+
+--group::
+ Show event group information together
+
+--percent-type::
+ Set annotation percent type from following choices:
+ global-period, local-period, global-hits, local-hits
+
+ The local/global keywords set if the percentage is computed
+ in the scope of the function (local) or the whole data (global).
+ The period/hits keywords set the base the percentage is computed
+ on - the samples period or the number of samples (hits).
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-archive.txt b/tools/perf/Documentation/perf-archive.txt
new file mode 100644
index 000000000..ac6ecbb3e
--- /dev/null
+++ b/tools/perf/Documentation/perf-archive.txt
@@ -0,0 +1,22 @@
+perf-archive(1)
+===============
+
+NAME
+----
+perf-archive - Create archive with object files with build-ids found in perf.data file
+
+SYNOPSIS
+--------
+[verse]
+'perf archive' [file]
+
+DESCRIPTION
+-----------
+This command runs perf-buildid-list --with-hits, and collects the files with the
+buildids found so that analysis of perf.data contents can be possible on another
+machine.
+
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-buildid-list[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
new file mode 100644
index 000000000..34750fc32
--- /dev/null
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -0,0 +1,209 @@
+perf-bench(1)
+=============
+
+NAME
+----
+perf-bench - General framework for benchmark suites
+
+SYNOPSIS
+--------
+[verse]
+'perf bench' [<common options>] <subsystem> <suite> [<options>]
+
+DESCRIPTION
+-----------
+This 'perf bench' command is a general framework for benchmark suites.
+
+COMMON OPTIONS
+--------------
+-r::
+--repeat=::
+Specify amount of times to repeat the run (default 10).
+
+-f::
+--format=::
+Specify format style.
+Current available format styles are:
+
+'default'::
+Default style. This is mainly for human reading.
+---------------------
+% perf bench sched pipe # with no style specified
+(executing 1000000 pipe operations between two tasks)
+ Total time:5.855 sec
+ 5.855061 usecs/op
+ 170792 ops/sec
+---------------------
+
+'simple'::
+This simple style is friendly for automated
+processing by scripts.
+---------------------
+% perf bench --format=simple sched pipe # specified simple
+5.988
+---------------------
+
+SUBSYSTEM
+---------
+
+'sched'::
+ Scheduler and IPC mechanisms.
+
+'mem'::
+ Memory access performance.
+
+'numa'::
+ NUMA scheduling and MM benchmarks.
+
+'futex'::
+ Futex stressing benchmarks.
+
+'all'::
+ All benchmark subsystems.
+
+SUITES FOR 'sched'
+~~~~~~~~~~~~~~~~~~
+*messaging*::
+Suite for evaluating performance of scheduler and IPC mechanisms.
+Based on hackbench by Rusty Russell.
+
+Options of *messaging*
+^^^^^^^^^^^^^^^^^^^^^^
+-p::
+--pipe::
+Use pipe() instead of socketpair()
+
+-t::
+--thread::
+Be multi thread instead of multi process
+
+-g::
+--group=::
+Specify number of groups
+
+-l::
+--nr_loops=::
+Specify number of loops
+
+Example of *messaging*
+^^^^^^^^^^^^^^^^^^^^^^
+
+---------------------
+% perf bench sched messaging # run with default
+options (20 sender and receiver processes per group)
+(10 groups == 400 processes run)
+
+ Total time:0.308 sec
+
+% perf bench sched messaging -t -g 20 # be multi-thread, with 20 groups
+(20 sender and receiver threads per group)
+(20 groups == 800 threads run)
+
+ Total time:0.582 sec
+---------------------
+
+*pipe*::
+Suite for pipe() system call.
+Based on pipe-test-1m.c by Ingo Molnar.
+
+Options of *pipe*
+^^^^^^^^^^^^^^^^^
+-l::
+--loop=::
+Specify number of loops.
+
+Example of *pipe*
+^^^^^^^^^^^^^^^^^
+
+---------------------
+% perf bench sched pipe
+(executing 1000000 pipe operations between two tasks)
+
+ Total time:8.091 sec
+ 8.091833 usecs/op
+ 123581 ops/sec
+
+% perf bench sched pipe -l 1000 # loop 1000
+(executing 1000 pipe operations between two tasks)
+
+ Total time:0.016 sec
+ 16.948000 usecs/op
+ 59004 ops/sec
+---------------------
+
+SUITES FOR 'mem'
+~~~~~~~~~~~~~~~~
+*memcpy*::
+Suite for evaluating performance of simple memory copy in various ways.
+
+Options of *memcpy*
+^^^^^^^^^^^^^^^^^^^
+-l::
+--size::
+Specify size of memory to copy (default: 1MB).
+Available units are B, KB, MB, GB and TB (case insensitive).
+
+-f::
+--function::
+Specify function to copy (default: default).
+Available functions are depend on the architecture.
+On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported.
+
+-l::
+--nr_loops::
+Repeat memcpy invocation this number of times.
+
+-c::
+--cycles::
+Use perf's cpu-cycles event instead of gettimeofday syscall.
+
+*memset*::
+Suite for evaluating performance of simple memory set in various ways.
+
+Options of *memset*
+^^^^^^^^^^^^^^^^^^^
+-l::
+--size::
+Specify size of memory to set (default: 1MB).
+Available units are B, KB, MB, GB and TB (case insensitive).
+
+-f::
+--function::
+Specify function to set (default: default).
+Available functions are depend on the architecture.
+On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported.
+
+-l::
+--nr_loops::
+Repeat memset invocation this number of times.
+
+-c::
+--cycles::
+Use perf's cpu-cycles event instead of gettimeofday syscall.
+
+SUITES FOR 'numa'
+~~~~~~~~~~~~~~~~~
+*mem*::
+Suite for evaluating NUMA workloads.
+
+SUITES FOR 'futex'
+~~~~~~~~~~~~~~~~~~
+*hash*::
+Suite for evaluating hash tables.
+
+*wake*::
+Suite for evaluating wake calls.
+
+*wake-parallel*::
+Suite for evaluating parallel wake calls.
+
+*requeue*::
+Suite for evaluating requeue calls.
+
+*lock-pi*::
+Suite for evaluating futex lock_pi calls.
+
+
+SEE ALSO
+--------
+linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
new file mode 100644
index 000000000..f6de0952f
--- /dev/null
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -0,0 +1,79 @@
+perf-buildid-cache(1)
+=====================
+
+NAME
+----
+perf-buildid-cache - Manage build-id cache.
+
+SYNOPSIS
+--------
+[verse]
+'perf buildid-cache <options>'
+
+DESCRIPTION
+-----------
+This command manages the build-id cache. It can add, remove, update and purge
+files to/from the cache. In the future it should as well set upper limits for
+the space used by the cache, etc.
+This also scans the target binary for SDT (Statically Defined Tracing) and
+record it along with the buildid-cache, which will be used by perf-probe.
+For more details, see linkperf:perf-probe[1].
+
+OPTIONS
+-------
+-a::
+--add=::
+ Add specified file to the cache.
+-f::
+--force::
+ Don't complain, do it.
+-k::
+--kcore::
+ Add specified kcore file to the cache. For the current host that is
+ /proc/kcore which requires root permissions to read. Be aware that
+ running 'perf buildid-cache' as root may update root's build-id cache
+ not the user's. Use the -v option to see where the file is created.
+ Note that the copied file contains only code sections not the whole core
+ image. Note also that files "kallsyms" and "modules" must also be in the
+ same directory and are also copied. All 3 files are created with read
+ permissions for root only. kcore will not be added if there is already a
+ kcore in the cache (with the same build-id) that has the same modules at
+ the same addresses. Use the -v option to see if a copy of kcore is
+ actually made.
+-r::
+--remove=::
+ Remove a cached binary which has same build-id of specified file
+ from the cache.
+-p::
+--purge=::
+ Purge all cached binaries including older caches which have specified
+ path from the cache.
+-P::
+--purge-all::
+ Purge all cached binaries. This will flush out entire cache.
+-M::
+--missing=::
+ List missing build ids in the cache for the specified file.
+-u::
+--update=::
+ Update specified file of the cache. Note that this doesn't remove
+ older entires since those may be still needed for annotating old
+ (or remote) perf.data. Only if there is already a cache which has
+ exactly same build-id, that is replaced by new one. It can be used
+ to update kallsyms and kernel dso to vmlinux in order to support
+ annotation.
+-l::
+--list::
+ List all valid binaries from cache.
+-v::
+--verbose::
+ Be more verbose.
+
+--target-ns=PID:
+ Obtain mount namespace information from the target pid. This is
+ used when creating a uprobe for a process that resides in a
+ different mount namespace from the perf(1) utility.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1]
diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt
new file mode 100644
index 000000000..25c52efcc
--- /dev/null
+++ b/tools/perf/Documentation/perf-buildid-list.txt
@@ -0,0 +1,43 @@
+perf-buildid-list(1)
+====================
+
+NAME
+----
+perf-buildid-list - List the buildids in a perf.data file
+
+SYNOPSIS
+--------
+[verse]
+'perf buildid-list <options>'
+
+DESCRIPTION
+-----------
+This command displays the buildids found in a perf.data file, so that other
+tools can be used to fetch packages with matching symbol tables for use by
+perf report.
+
+It can also be used to show the build id of the running kernel or in an ELF
+file using -i/--input.
+
+OPTIONS
+-------
+-H::
+--with-hits::
+ Show only DSOs with hits.
+-i::
+--input=::
+ Input file name. (default: perf.data unless stdin is a fifo)
+-f::
+--force::
+ Don't do ownership validation.
+-k::
+--kernel::
+ Show running kernel build id.
+-v::
+--verbose::
+ Be more verbose.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-top[1],
+linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
new file mode 100644
index 000000000..095aebdc5
--- /dev/null
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -0,0 +1,290 @@
+perf-c2c(1)
+===========
+
+NAME
+----
+perf-c2c - Shared Data C2C/HITM Analyzer.
+
+SYNOPSIS
+--------
+[verse]
+'perf c2c record' [<options>] <command>
+'perf c2c record' [<options>] -- [<record command options>] <command>
+'perf c2c report' [<options>]
+
+DESCRIPTION
+-----------
+C2C stands for Cache To Cache.
+
+The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows
+you to track down the cacheline contentions.
+
+The tool is based on x86's load latency and precise store facility events
+provided by Intel CPUs. These events provide:
+ - memory address of the access
+ - type of the access (load and store details)
+ - latency (in cycles) of the load access
+
+The c2c tool provide means to record this data and report back access details
+for cachelines with highest contention - highest number of HITM accesses.
+
+The basic workflow with this tool follows the standard record/report phase.
+User uses the record command to record events data and report command to
+display it.
+
+
+RECORD OPTIONS
+--------------
+-e::
+--event=::
+ Select the PMU event. Use 'perf mem record -e list'
+ to list available events.
+
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
+
+-l::
+--ldlat::
+ Configure mem-loads latency.
+
+-k::
+--all-kernel::
+ Configure all used events to run in kernel space.
+
+-u::
+--all-user::
+ Configure all used events to run in user space.
+
+REPORT OPTIONS
+--------------
+-k::
+--vmlinux=<file>::
+ vmlinux pathname
+
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
+
+-i::
+--input::
+ Specify the input file to process.
+
+-N::
+--node-info::
+ Show extra node info in report (see NODE INFO section)
+
+-c::
+--coalesce::
+ Specify sorting fields for single cacheline display.
+ Following fields are available: tid,pid,iaddr,dso
+ (see COALESCE)
+
+-g::
+--call-graph::
+ Setup callchains parameters.
+ Please refer to perf-report man page for details.
+
+--stdio::
+ Force the stdio output (see STDIO OUTPUT)
+
+--stats::
+ Display only statistic tables and force stdio mode.
+
+--full-symbols::
+ Display full length of symbols.
+
+--no-source::
+ Do not display Source:Line column.
+
+--show-all::
+ Show all captured HITM lines, with no regard to HITM % 0.0005 limit.
+
+-f::
+--force::
+ Don't do ownership validation.
+
+-d::
+--display::
+ Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
+
+C2C RECORD
+----------
+The perf c2c record command setup options related to HITM cacheline analysis
+and calls standard perf record command.
+
+Following perf record options are configured by default:
+(check perf record man page for details)
+
+ -W,-d,--phys-data,--sample-cpu
+
+Unless specified otherwise with '-e' option, following events are monitored by
+default:
+
+ cpu/mem-loads,ldlat=30/P
+ cpu/mem-stores/P
+
+User can pass any 'perf record' option behind '--' mark, like (to enable
+callchains and system wide monitoring):
+
+ $ perf c2c record -- -g -a
+
+Please check RECORD OPTIONS section for specific c2c record options.
+
+C2C REPORT
+----------
+The perf c2c report command displays shared data analysis. It comes in two
+display modes: stdio and tui (default).
+
+The report command workflow is following:
+ - sort all the data based on the cacheline address
+ - store access details for each cacheline
+ - sort all cachelines based on user settings
+ - display data
+
+In general perf report output consist of 2 basic views:
+ 1) most expensive cachelines list
+ 2) offsets details for each cacheline
+
+For each cacheline in the 1) list we display following data:
+(Both stdio and TUI modes follow the same fields output)
+
+ Index
+ - zero based index to identify the cacheline
+
+ Cacheline
+ - cacheline address (hex number)
+
+ Total records
+ - sum of all cachelines accesses
+
+ Rmt/Lcl Hitm
+ - cacheline percentage of all Remote/Local HITM accesses
+
+ LLC Load Hitm - Total, Lcl, Rmt
+ - count of Total/Local/Remote load HITMs
+
+ Store Reference - Total, L1Hit, L1Miss
+ Total - all store accesses
+ L1Hit - store accesses that hit L1
+ L1Hit - store accesses that missed L1
+
+ Load Dram
+ - count of local and remote DRAM accesses
+
+ LLC Ld Miss
+ - count of all accesses that missed LLC
+
+ Total Loads
+ - sum of all load accesses
+
+ Core Load Hit - FB, L1, L2
+ - count of load hits in FB (Fill Buffer), L1 and L2 cache
+
+ LLC Load Hit - Llc, Rmt
+ - count of LLC and Remote load hits
+
+For each offset in the 2) list we display following data:
+
+ HITM - Rmt, Lcl
+ - % of Remote/Local HITM accesses for given offset within cacheline
+
+ Store Refs - L1 Hit, L1 Miss
+ - % of store accesses that hit/missed L1 for given offset within cacheline
+
+ Data address - Offset
+ - offset address
+
+ Pid
+ - pid of the process responsible for the accesses
+
+ Tid
+ - tid of the process responsible for the accesses
+
+ Code address
+ - code address responsible for the accesses
+
+ cycles - rmt hitm, lcl hitm, load
+ - sum of cycles for given accesses - Remote/Local HITM and generic load
+
+ cpu cnt
+ - number of cpus that participated on the access
+
+ Symbol
+ - code symbol related to the 'Code address' value
+
+ Shared Object
+ - shared object name related to the 'Code address' value
+
+ Source:Line
+ - source information related to the 'Code address' value
+
+ Node
+ - nodes participating on the access (see NODE INFO section)
+
+NODE INFO
+---------
+The 'Node' field displays nodes that accesses given cacheline
+offset. Its output comes in 3 flavors:
+ - node IDs separated by ','
+ - node IDs with stats for each ID, in following format:
+ Node{cpus %hitms %stores}
+ - node IDs with list of affected CPUs in following format:
+ Node{cpu list}
+
+User can switch between above flavors with -N option or
+use 'n' key to interactively switch in TUI mode.
+
+COALESCE
+--------
+User can specify how to sort offsets for cacheline.
+
+Following fields are available and governs the final
+output fields set for caheline offsets output:
+
+ tid - coalesced by process TIDs
+ pid - coalesced by process PIDs
+ iaddr - coalesced by code address, following fields are displayed:
+ Code address, Code symbol, Shared Object, Source line
+ dso - coalesced by shared object
+
+By default the coalescing is setup with 'pid,iaddr'.
+
+STDIO OUTPUT
+------------
+The stdio output displays data on standard output.
+
+Following tables are displayed:
+ Trace Event Information
+ - overall statistics of memory accesses
+
+ Global Shared Cache Line Event Information
+ - overall statistics on shared cachelines
+
+ Shared Data Cache Line Table
+ - list of most expensive cachelines
+
+ Shared Cache Line Distribution Pareto
+ - list of all accessed offsets for each cacheline
+
+TUI OUTPUT
+----------
+The TUI output provides interactive interface to navigate
+through cachelines list and to display offset details.
+
+For details please refer to the help window by pressing '?' key.
+
+CREDITS
+-------
+Although Don Zickus, Dick Fowles and Joe Mario worked together
+to get this implemented, we got lots of early help from Arnaldo
+Carvalho de Melo, Stephane Eranian, Jiri Olsa and Andi Kleen.
+
+C2C BLOG
+--------
+Check Joe's blog on c2c tool for detailed use case explanation:
+ https://joemario.github.io/blog/2016/09/01/c2c-blog/
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-mem[1]
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
new file mode 100644
index 000000000..05c10eb56
--- /dev/null
+++ b/tools/perf/Documentation/perf-config.txt
@@ -0,0 +1,520 @@
+perf-config(1)
+==============
+
+NAME
+----
+perf-config - Get and set variables in a configuration file.
+
+SYNOPSIS
+--------
+[verse]
+'perf config' [<file-option>] [section.name[=value] ...]
+or
+'perf config' [<file-option>] -l | --list
+
+DESCRIPTION
+-----------
+You can manage variables in a configuration file with this command.
+
+OPTIONS
+-------
+
+-l::
+--list::
+ Show current config variables, name and value, for all sections.
+
+--user::
+ For writing and reading options: write to user
+ '$HOME/.perfconfig' file or read it.
+
+--system::
+ For writing and reading options: write to system-wide
+ '$(sysconfdir)/perfconfig' or read it.
+
+CONFIGURATION FILE
+------------------
+
+The perf configuration file contains many variables to change various
+aspects of each of its tools, including output, disk usage, etc.
+The '$HOME/.perfconfig' file is used to store a per-user configuration.
+The file '$(sysconfdir)/perfconfig' can be used to
+store a system-wide default configuration.
+
+When reading or writing, the values are read from the system and user
+configuration files by default, and options '--system' and '--user'
+can be used to tell the command to read from or write to only that location.
+
+Syntax
+~~~~~~
+
+The file consist of sections. A section starts with its name
+surrounded by square brackets and continues till the next section
+begins. Each variable must be in a section, and have the form
+'name = value', for example:
+
+ [section]
+ name1 = value1
+ name2 = value2
+
+Section names are case sensitive and can contain any characters except
+newline (double quote `"` and backslash have to be escaped as `\"` and `\\`,
+respectively). Section headers can't span multiple lines.
+
+Example
+~~~~~~~
+
+Given a $HOME/.perfconfig like this:
+
+#
+# This is the config file, and
+# a '#' and ';' character indicates a comment
+#
+
+ [colors]
+ # Color variables
+ top = red, default
+ medium = green, default
+ normal = lightgray, default
+ selected = white, lightgray
+ jump_arrows = blue, default
+ addr = magenta, default
+ root = white, blue
+
+ [tui]
+ # Defaults if linked with libslang
+ report = on
+ annotate = on
+ top = on
+
+ [buildid]
+ # Default, disable using /dev/null
+ dir = ~/.debug
+
+ [annotate]
+ # Defaults
+ hide_src_code = false
+ use_offset = true
+ jump_arrows = true
+ show_nr_jumps = false
+
+ [help]
+ # Format can be man, info, web or html
+ format = man
+ autocorrect = 0
+
+ [ui]
+ show-headers = true
+
+ [call-graph]
+ # fp (framepointer), dwarf
+ record-mode = fp
+ print-type = graph
+ order = caller
+ sort-key = function
+
+ [report]
+ # Defaults
+ sort_order = comm,dso,symbol
+ percent-limit = 0
+ queue-size = 0
+ children = true
+ group = true
+
+You can hide source code of annotate feature setting the config to false with
+
+ % perf config annotate.hide_src_code=true
+
+If you want to add or modify several config items, you can do like
+
+ % perf config ui.show-headers=false kmem.default=slab
+
+To modify the sort order of report functionality in user config file(i.e. `~/.perfconfig`), do
+
+ % perf config --user report sort-order=srcline
+
+To change colors of selected line to other foreground and background colors
+in system config file (i.e. `$(sysconf)/perfconfig`), do
+
+ % perf config --system colors.selected=yellow,green
+
+To query the record mode of call graph, do
+
+ % perf config call-graph.record-mode
+
+If you want to know multiple config key/value pairs, you can do like
+
+ % perf config report.queue-size call-graph.order report.children
+
+To query the config value of sort order of call graph in user config file (i.e. `~/.perfconfig`), do
+
+ % perf config --user call-graph.sort-order
+
+To query the config value of buildid directory in system config file (i.e. `$(sysconf)/perfconfig`), do
+
+ % perf config --system buildid.dir
+
+Variables
+~~~~~~~~~
+
+colors.*::
+ The variables for customizing the colors used in the output for the
+ 'report', 'top' and 'annotate' in the TUI. They should specify the
+ foreground and background colors, separated by a comma, for example:
+
+ medium = green, lightgray
+
+ If you want to use the color configured for you terminal, just leave it
+ as 'default', for example:
+
+ medium = default, lightgray
+
+ Available colors:
+ red, yellow, green, cyan, gray, black, blue,
+ white, default, magenta, lightgray
+
+ colors.top::
+ 'top' means a overhead percentage which is more than 5%.
+ And values of this variable specify percentage colors.
+ Basic key values are foreground-color 'red' and
+ background-color 'default'.
+ colors.medium::
+ 'medium' means a overhead percentage which has more than 0.5%.
+ Default values are 'green' and 'default'.
+ colors.normal::
+ 'normal' means the rest of overhead percentages
+ except 'top', 'medium', 'selected'.
+ Default values are 'lightgray' and 'default'.
+ colors.selected::
+ This selects the colors for the current entry in a list of entries
+ from sub-commands (top, report, annotate).
+ Default values are 'black' and 'lightgray'.
+ colors.jump_arrows::
+ Colors for jump arrows on assembly code listings
+ such as 'jns', 'jmp', 'jane', etc.
+ Default values are 'blue', 'default'.
+ colors.addr::
+ This selects colors for addresses from 'annotate'.
+ Default values are 'magenta', 'default'.
+ colors.root::
+ Colors for headers in the output of a sub-commands (top, report).
+ Default values are 'white', 'blue'.
+
+tui.*, gtk.*::
+ Subcommands that can be configured here are 'top', 'report' and 'annotate'.
+ These values are booleans, for example:
+
+ [tui]
+ top = true
+
+ will make the TUI be the default for the 'top' subcommand. Those will be
+ available if the required libs were detected at tool build time.
+
+buildid.*::
+ buildid.dir::
+ Each executable and shared library in modern distributions comes with a
+ content based identifier that, if available, will be inserted in a
+ 'perf.data' file header to, at analysis time find what is needed to do
+ symbol resolution, code annotation, etc.
+
+ The recording tools also stores a hard link or copy in a per-user
+ directory, $HOME/.debug/, of binaries, shared libraries, /proc/kallsyms
+ and /proc/kcore files to be used at analysis time.
+
+ The buildid.dir variable can be used to either change this directory
+ cache location, or to disable it altogether. If you want to disable it,
+ set buildid.dir to /dev/null. The default is $HOME/.debug
+
+annotate.*::
+ These options work only for TUI.
+ These are in control of addresses, jump function, source code
+ in lines of assembly code from a specific program.
+
+ annotate.hide_src_code::
+ If a program which is analyzed has source code,
+ this option lets 'annotate' print a list of assembly code with the source code.
+ For example, let's see a part of a program. There're four lines.
+ If this option is 'true', they can be printed
+ without source code from a program as below.
+
+ │ push %rbp
+ │ mov %rsp,%rbp
+ │ sub $0x10,%rsp
+ │ mov (%rdi),%rdx
+
+ But if this option is 'false', source code of the part
+ can be also printed as below. Default is 'false'.
+
+ │ struct rb_node *rb_next(const struct rb_node *node)
+ │ {
+ │ push %rbp
+ │ mov %rsp,%rbp
+ │ sub $0x10,%rsp
+ │ struct rb_node *parent;
+ │
+ │ if (RB_EMPTY_NODE(node))
+ │ mov (%rdi),%rdx
+ │ return n;
+
+ annotate.use_offset::
+ Basing on a first address of a loaded function, offset can be used.
+ Instead of using original addresses of assembly code,
+ addresses subtracted from a base address can be printed.
+ Let's illustrate an example.
+ If a base address is 0XFFFFFFFF81624d50 as below,
+
+ ffffffff81624d50 <load0>
+
+ an address on assembly code has a specific absolute address as below
+
+ ffffffff816250b8:│ mov 0x8(%r14),%rdi
+
+ but if use_offset is 'true', an address subtracted from a base address is printed.
+ Default is true. This option is only applied to TUI.
+
+ 368:│ mov 0x8(%r14),%rdi
+
+ annotate.jump_arrows::
+ There can be jump instruction among assembly code.
+ Depending on a boolean value of jump_arrows,
+ arrows can be printed or not which represent
+ where do the instruction jump into as below.
+
+ │ ┌──jmp 1333
+ │ │ xchg %ax,%ax
+ │1330:│ mov %r15,%r10
+ │1333:└─→cmp %r15,%r14
+
+ If jump_arrow is 'false', the arrows isn't printed as below.
+ Default is 'false'.
+
+ │ ↓ jmp 1333
+ │ xchg %ax,%ax
+ │1330: mov %r15,%r10
+ │1333: cmp %r15,%r14
+
+ annotate.show_linenr::
+ When showing source code if this option is 'true',
+ line numbers are printed as below.
+
+ │1628 if (type & PERF_SAMPLE_IDENTIFIER) {
+ │ ↓ jne 508
+ │1628 data->id = *array;
+ │1629 array++;
+ │1630 }
+
+ However if this option is 'false', they aren't printed as below.
+ Default is 'false'.
+
+ │ if (type & PERF_SAMPLE_IDENTIFIER) {
+ │ ↓ jne 508
+ │ data->id = *array;
+ │ array++;
+ │ }
+
+ annotate.show_nr_jumps::
+ Let's see a part of assembly code.
+
+ │1382: movb $0x1,-0x270(%rbp)
+
+ If use this, the number of branches jumping to that address can be printed as below.
+ Default is 'false'.
+
+ │1 1382: movb $0x1,-0x270(%rbp)
+
+ annotate.show_total_period::
+ To compare two records on an instruction base, with this option
+ provided, display total number of samples that belong to a line
+ in assembly code. If this option is 'true', total periods are printed
+ instead of percent values as below.
+
+ 302 │ mov %eax,%eax
+
+ But if this option is 'false', percent values for overhead are printed i.e.
+ Default is 'false'.
+
+ 99.93 │ mov %eax,%eax
+
+ annotate.offset_level::
+ Default is '1', meaning just jump targets will have offsets show right beside
+ the instruction. When set to '2' 'call' instructions will also have its offsets
+ shown, 3 or higher will show offsets for all instructions.
+
+hist.*::
+ hist.percentage::
+ This option control the way to calculate overhead of filtered entries -
+ that means the value of this option is effective only if there's a
+ filter (by comm, dso or symbol name). Suppose a following example:
+
+ Overhead Symbols
+ ........ .......
+ 33.33% foo
+ 33.33% bar
+ 33.33% baz
+
+ This is an original overhead and we'll filter out the first 'foo'
+ entry. The value of 'relative' would increase the overhead of 'bar'
+ and 'baz' to 50.00% for each, while 'absolute' would show their
+ current overhead (33.33%).
+
+ui.*::
+ ui.show-headers::
+ This option controls display of column headers (like 'Overhead' and 'Symbol')
+ in 'report' and 'top'. If this option is false, they are hidden.
+ This option is only applied to TUI.
+
+call-graph.*::
+ When sub-commands 'top' and 'report' work with -g/—-children
+ there're options in control of call-graph.
+
+ call-graph.record-mode::
+ The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'.
+ The value of 'dwarf' is effective only if perf detect needed library
+ (libunwind or a recent version of libdw).
+ 'lbr' only work for cpus that support it.
+
+ call-graph.dump-size::
+ The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
+ When using dwarf into record-mode, the default size will be used if omitted.
+
+ call-graph.print-type::
+ The print-types can be graph (graph absolute), fractal (graph relative),
+ flat and folded. This option controls a way to show overhead for each callchain
+ entry. Suppose a following example.
+
+ Overhead Symbols
+ ........ .......
+ 40.00% foo
+ |
+ ---foo
+ |
+ |--50.00%--bar
+ | main
+ |
+ --50.00%--baz
+ main
+
+ This output is a 'fractal' format. The 'foo' came from 'bar' and 'baz' exactly
+ half and half so 'fractal' shows 50.00% for each
+ (meaning that it assumes 100% total overhead of 'foo').
+
+ The 'graph' uses absolute overhead value of 'foo' as total so each of
+ 'bar' and 'baz' callchain will have 20.00% of overhead.
+ If 'flat' is used, single column and linear exposure of call chains.
+ 'folded' mean call chains are displayed in a line, separated by semicolons.
+
+ call-graph.order::
+ This option controls print order of callchains. The default is
+ 'callee' which means callee is printed at top and then followed by its
+ caller and so on. The 'caller' prints it in reverse order.
+
+ If this option is not set and report.children or top.children is
+ set to true (or the equivalent command line option is given),
+ the default value of this option is changed to 'caller' for the
+ execution of 'perf report' or 'perf top'. Other commands will
+ still default to 'callee'.
+
+ call-graph.sort-key::
+ The callchains are merged if they contain same information.
+ The sort-key option determines a way to compare the callchains.
+ A value of 'sort-key' can be 'function' or 'address'.
+ The default is 'function'.
+
+ call-graph.threshold::
+ When there're many callchains it'd print tons of lines. So perf omits
+ small callchains under a certain overhead (threshold) and this option
+ control the threshold. Default is 0.5 (%). The overhead is calculated
+ by value depends on call-graph.print-type.
+
+ call-graph.print-limit::
+ This is a maximum number of lines of callchain printed for a single
+ histogram entry. Default is 0 which means no limitation.
+
+report.*::
+ report.sort_order::
+ Allows changing the default sort order from "comm,dso,symbol" to
+ some other default, for instance "sym,dso" may be more fitting for
+ kernel developers.
+ report.percent-limit::
+ This one is mostly the same as call-graph.threshold but works for
+ histogram entries. Entries having an overhead lower than this
+ percentage will not be printed. Default is '0'. If percent-limit
+ is '10', only entries which have more than 10% of overhead will be
+ printed.
+
+ report.queue-size::
+ This option sets up the maximum allocation size of the internal
+ event queue for ordering events. Default is 0, meaning no limit.
+
+ report.children::
+ 'Children' means functions called from another function.
+ If this option is true, 'perf report' cumulates callchains of children
+ and show (accumulated) total overhead as well as 'Self' overhead.
+ Please refer to the 'perf report' manual. The default is 'true'.
+
+ report.group::
+ This option is to show event group information together.
+ Example output with this turned on, notice that there is one column
+ per event in the group, ref-cycles and cycles:
+
+ # group: {ref-cycles,cycles}
+ # ========
+ #
+ # Samples: 7K of event 'anon group { ref-cycles, cycles }'
+ # Event count (approx.): 6876107743
+ #
+ # Overhead Command Shared Object Symbol
+ # ................ ....... ................. ...................
+ #
+ 99.84% 99.76% noploop noploop [.] main
+ 0.07% 0.00% noploop ld-2.15.so [.] strcmp
+ 0.03% 0.00% noploop [kernel.kallsyms] [k] timerqueue_del
+
+top.*::
+ top.children::
+ Same as 'report.children'. So if it is enabled, the output of 'top'
+ command will have 'Children' overhead column as well as 'Self' overhead
+ column by default.
+ The default is 'true'.
+
+man.*::
+ man.viewer::
+ This option can assign a tool to view manual pages when 'help'
+ subcommand was invoked. Supported tools are 'man', 'woman'
+ (with emacs client) and 'konqueror'. Default is 'man'.
+
+ New man viewer tool can be also added using 'man.<tool>.cmd'
+ or use different path using 'man.<tool>.path' config option.
+
+pager.*::
+ pager.<subcommand>::
+ When the subcommand is run on stdio, determine whether it uses
+ pager or not based on this value. Default is 'unspecified'.
+
+kmem.*::
+ kmem.default::
+ This option decides which allocator is to be analyzed if neither
+ '--slab' nor '--page' option is used. Default is 'slab'.
+
+record.*::
+ record.build-id::
+ This option can be 'cache', 'no-cache' or 'skip'.
+ 'cache' is to post-process data and save/update the binaries into
+ the build-id cache (in ~/.debug). This is the default.
+ But if this option is 'no-cache', it will not update the build-id cache.
+ 'skip' skips post-processing and does not update the cache.
+
+diff.*::
+ diff.order::
+ This option sets the number of columns to sort the result.
+ The default is 0, which means sorting by baseline.
+ Setting it to 1 will sort the result by delta (or other
+ compute method selected).
+
+ diff.compute::
+ This options sets the method for computing the diff result.
+ Possible values are 'delta', 'delta-abs', 'ratio' and
+ 'wdiff'. Default is 'delta'.
+
+SEE ALSO
+--------
+linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
new file mode 100644
index 000000000..c87180764
--- /dev/null
+++ b/tools/perf/Documentation/perf-data.txt
@@ -0,0 +1,48 @@
+perf-data(1)
+============
+
+NAME
+----
+perf-data - Data file related processing
+
+SYNOPSIS
+--------
+[verse]
+'perf data' [<common options>] <command> [<options>]",
+
+DESCRIPTION
+-----------
+Data file related processing.
+
+COMMANDS
+--------
+convert::
+ Converts perf data file into another format (only CTF [1] format is support by now).
+ It's possible to set data-convert debug variable to get debug messages from conversion,
+ like:
+ perf --debug data-convert data convert ...
+
+OPTIONS for 'convert'
+---------------------
+--to-ctf::
+ Triggers the CTF conversion, specify the path of CTF data directory.
+
+-i::
+ Specify input perf data file path.
+
+-f::
+--force::
+ Don't complain, do it.
+
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
+
+--all::
+ Convert all events, including non-sample events (comm, fork, ...), to output.
+ Default is off, only convert samples.
+
+SEE ALSO
+--------
+linkperf:perf[1]
+[1] Common Trace Format - http://www.efficios.com/ctf
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
new file mode 100644
index 000000000..a79c84ae6
--- /dev/null
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -0,0 +1,227 @@
+perf-diff(1)
+============
+
+NAME
+----
+perf-diff - Read perf.data files and display the differential profile
+
+SYNOPSIS
+--------
+[verse]
+'perf diff' [baseline file] [data file1] [[data file2] ... ]
+
+DESCRIPTION
+-----------
+This command displays the performance difference amongst two or more perf.data
+files captured via perf record.
+
+If no parameters are passed it will assume perf.data.old and perf.data.
+
+The differential profile is displayed only for events matching both
+specified perf.data files.
+
+If no parameters are passed the samples will be sorted by dso and symbol.
+As the perf.data files could come from different binaries, the symbols addresses
+could vary. So perf diff is based on the comparison of the files and
+symbols name.
+
+OPTIONS
+-------
+-D::
+--dump-raw-trace::
+ Dump raw trace in ASCII.
+
+--kallsyms=<file>::
+ kallsyms pathname
+
+-m::
+--modules::
+ Load module symbols. WARNING: use only with -k and LIVE kernel
+
+-d::
+--dsos=::
+ Only consider symbols in these dsos. CSV that understands
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
+
+-C::
+--comms=::
+ Only consider symbols in these comms. CSV that understands
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
+
+-S::
+--symbols=::
+ Only consider these symbols. CSV that understands
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
+
+-s::
+--sort=::
+ Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
+ Please see description of --sort in the perf-report man page.
+
+-t::
+--field-separator=::
+
+ Use a special separator character and don't pad with spaces, replacing
+ all occurrences of this separator in symbol names (and other output)
+ with a '.' character, that thus it's the only non valid separator.
+
+-v::
+--verbose::
+ Be verbose, for instance, show the raw counts in addition to the
+ diff.
+
+-q::
+--quiet::
+ Do not show any message. (Suppress -v)
+
+-f::
+--force::
+ Don't do ownership validation.
+
+--symfs=<directory>::
+ Look for files with symbols relative to this directory.
+
+-b::
+--baseline-only::
+ Show only items with match in baseline.
+
+-c::
+--compute::
+ Differential computation selection - delta, ratio, wdiff, delta-abs
+ (default is delta-abs). Default can be changed using diff.compute
+ config option. See COMPARISON METHODS section for more info.
+
+-p::
+--period::
+ Show period values for both compared hist entries.
+
+-F::
+--formula::
+ Show formula for given computation.
+
+-o::
+--order::
+ Specify compute sorting column number. 0 means sorting by baseline
+ overhead and 1 (default) means sorting by computed value of column 1
+ (data from the first file other base baseline). Values more than 1
+ can be used only if enough data files are provided.
+ The default value can be set using the diff.order config option.
+
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options.
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
+COMPARISON
+----------
+The comparison is governed by the baseline file. The baseline perf.data
+file is iterated for samples. All other perf.data files specified on
+the command line are searched for the baseline sample pair. If the pair
+is found, specified computation is made and result is displayed.
+
+All samples from non-baseline perf.data files, that do not match any
+baseline entry, are displayed with empty space within baseline column
+and possible computation results (delta) in their related column.
+
+Example files samples:
+- file A with samples f1, f2, f3, f4, f6
+- file B with samples f2, f4, f5
+- file C with samples f1, f2, f5
+
+Example output:
+ x - computation takes place for pair
+ b - baseline sample percentage
+
+- perf diff A B C
+
+ baseline/A compute/B compute/C samples
+ ---------------------------------------
+ b x f1
+ b x x f2
+ b f3
+ b x f4
+ b f6
+ x x f5
+
+- perf diff B A C
+
+ baseline/B compute/A compute/C samples
+ ---------------------------------------
+ b x x f2
+ b x f4
+ b x f5
+ x x f1
+ x f3
+ x f6
+
+- perf diff C B A
+
+ baseline/C compute/B compute/A samples
+ ---------------------------------------
+ b x f1
+ b x x f2
+ b x f5
+ x f3
+ x x f4
+ x f6
+
+COMPARISON METHODS
+------------------
+delta
+~~~~~
+If specified the 'Delta' column is displayed with value 'd' computed as:
+
+ d = A->period_percent - B->period_percent
+
+with:
+ - A/B being matching hist entry from data/baseline file specified
+ (or perf.data/perf.data.old) respectively.
+
+ - period_percent being the % of the hist entry period value within
+ single data file
+
+ - with filtering by -C, -d and/or -S, period_percent might be changed
+ relative to how entries are filtered. Use --percentage=absolute to
+ prevent such fluctuation.
+
+delta-abs
+~~~~~~~~~
+Same as 'delta` method, but sort the result with the absolute values.
+
+ratio
+~~~~~
+If specified the 'Ratio' column is displayed with value 'r' computed as:
+
+ r = A->period / B->period
+
+with:
+ - A/B being matching hist entry from data/baseline file specified
+ (or perf.data/perf.data.old) respectively.
+
+ - period being the hist entry period value
+
+wdiff:WEIGHT-B,WEIGHT-A
+~~~~~~~~~~~~~~~~~~~~~~~
+If specified the 'Weighted diff' column is displayed with value 'd' computed as:
+
+ d = B->period * WEIGHT-A - A->period * WEIGHT-B
+
+ - A/B being matching hist entry from data/baseline file specified
+ (or perf.data/perf.data.old) respectively.
+
+ - period being the hist entry period value
+
+ - WEIGHT-A/WEIGHT-B being user supplied weights in the the '-c' option
+ behind ':' separator like '-c wdiff:1,2'.
+ - WEIGHT-A being the weight of the data file
+ - WEIGHT-B being the weight of the baseline data file
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
new file mode 100644
index 000000000..c0a66400a
--- /dev/null
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -0,0 +1,45 @@
+perf-evlist(1)
+==============
+
+NAME
+----
+perf-evlist - List the event names in a perf.data file
+
+SYNOPSIS
+--------
+[verse]
+'perf evlist <options>'
+
+DESCRIPTION
+-----------
+This command displays the names of events sampled in a perf.data file.
+
+OPTIONS
+-------
+-i::
+--input=::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-f::
+--force::
+ Don't complain, do it.
+
+-F::
+--freq=::
+ Show just the sample frequency used for each event.
+
+-v::
+--verbose=::
+ Show all fields.
+
+-g::
+--group::
+ Show event group information.
+
+--trace-fields::
+ Show tracepoint field names.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-list[1],
+linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
new file mode 100644
index 000000000..b80c84307
--- /dev/null
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -0,0 +1,87 @@
+perf-ftrace(1)
+==============
+
+NAME
+----
+perf-ftrace - simple wrapper for kernel's ftrace functionality
+
+
+SYNOPSIS
+--------
+[verse]
+'perf ftrace' <command>
+
+DESCRIPTION
+-----------
+The 'perf ftrace' command is a simple wrapper of kernel's ftrace
+functionality. It only supports single thread tracing currently and
+just reads trace_pipe in text and then write it to stdout.
+
+The following options apply to perf ftrace.
+
+OPTIONS
+-------
+
+-t::
+--tracer=::
+ Tracer to use: function_graph or function.
+
+-v::
+--verbose=::
+ Verbosity level.
+
+-p::
+--pid=::
+ Trace on existing process id (comma separated list).
+
+-a::
+--all-cpus::
+ Force system-wide collection. Scripts run without a <command>
+ normally use -a by default, while scripts run with a <command>
+ normally don't - this option allows the latter to be run in
+ system-wide mode.
+
+-C::
+--cpu=::
+ Only trace for the list of CPUs provided. Multiple CPUs can
+ be provided as a comma separated list with no space like: 0,1.
+ Ranges of CPUs are specified with -: 0-2.
+ Default is to trace on all online CPUs.
+
+-T::
+--trace-funcs=::
+ Only trace functions given by the argument. Multiple functions
+ can be given by using this option more than once. The function
+ argument also can be a glob pattern. It will be passed to
+ 'set_ftrace_filter' in tracefs.
+
+-N::
+--notrace-funcs=::
+ Do not trace functions given by the argument. Like -T option,
+ this can be used more than once to specify multiple functions
+ (or glob patterns). It will be passed to 'set_ftrace_notrace'
+ in tracefs.
+
+-G::
+--graph-funcs=::
+ Set graph filter on the given function (or a glob pattern).
+ This is useful for the function_graph tracer only and enables
+ tracing for functions executed from the given function.
+ This can be used more than once to specify multiple functions.
+ It will be passed to 'set_graph_function' in tracefs.
+
+-g::
+--nograph-funcs=::
+ Set graph notrace filter on the given function (or a glob pattern).
+ Like -G option, this is useful for the function_graph tracer only
+ and disables tracing for function executed from the given function.
+ This can be used more than once to specify multiple functions.
+ It will be passed to 'set_graph_notrace' in tracefs.
+
+-D::
+--graph-depth=::
+ Set max depth for function graph tracer to follow
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-trace[1]
diff --git a/tools/perf/Documentation/perf-help.txt b/tools/perf/Documentation/perf-help.txt
new file mode 100644
index 000000000..514391818
--- /dev/null
+++ b/tools/perf/Documentation/perf-help.txt
@@ -0,0 +1,38 @@
+perf-help(1)
+============
+
+NAME
+----
+perf-help - display help information about perf
+
+SYNOPSIS
+--------
+'perf help' [-a|--all] [COMMAND]
+
+DESCRIPTION
+-----------
+
+With no options and no COMMAND given, the synopsis of the 'perf'
+command and a list of the most commonly used perf commands are printed
+on the standard output.
+
+If the option '--all' or '-a' is given, then all available commands are
+printed on the standard output.
+
+If a perf command is named, a manual page for that command is brought
+up. The 'man' program is used by default for this purpose, but this
+can be overridden by other options or configuration variables.
+
+Note that `perf --help ...` is identical to `perf help ...` because the
+former is internally converted into the latter.
+
+OPTIONS
+-------
+-a::
+--all::
+ Prints all the available commands on the standard output. This
+ option supersedes any other option.
+
+PERF
+----
+Part of the linkperf:perf[1] suite
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
new file mode 100644
index 000000000..a64d65884
--- /dev/null
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -0,0 +1,69 @@
+perf-inject(1)
+==============
+
+NAME
+----
+perf-inject - Filter to augment the events stream with additional information
+
+SYNOPSIS
+--------
+[verse]
+'perf inject <options>'
+
+DESCRIPTION
+-----------
+perf-inject reads a perf-record event stream and repipes it to stdout. At any
+point the processing code can inject other events into the event stream - in
+this case build-ids (-b option) are read and injected as needed into the event
+stream.
+
+Build-ids are just the first user of perf-inject - potentially anything that
+needs userspace processing to augment the events stream with additional
+information could make use of this facility.
+
+OPTIONS
+-------
+-b::
+--build-ids=::
+ Inject build-ids into the output stream
+-v::
+--verbose::
+ Be more verbose.
+-i::
+--input=::
+ Input file name. (default: stdin)
+-o::
+--output=::
+ Output file name. (default: stdout)
+-s::
+--sched-stat::
+ Merge sched_stat and sched_switch for getting events where and how long
+ tasks slept. sched_switch contains a callchain where a task slept and
+ sched_stat contains a timeslice how long a task slept.
+
+--kallsyms=<file>::
+ kallsyms pathname
+
+--itrace::
+ Decode Instruction Tracing data, replacing it with synthesized events.
+ Options are:
+
+include::itrace.txt[]
+
+--strip::
+ Use with --itrace to strip out non-synthesized events.
+
+-j::
+--jit::
+ Process jitdump files by injecting the mmap records corresponding to jitted
+ functions. This option also generates the ELF images for each jitted function
+ found in the jitdumps files captured in the input perf.data file. Use this option
+ if you are monitoring environment using JIT runtimes, such as Java, DART or V8.
+
+-f::
+--force::
+ Don't complain, do it.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt
new file mode 100644
index 000000000..f3c620951
--- /dev/null
+++ b/tools/perf/Documentation/perf-kallsyms.txt
@@ -0,0 +1,24 @@
+perf-kallsyms(1)
+================
+
+NAME
+----
+perf-kallsyms - Searches running kernel for symbols
+
+SYNOPSIS
+--------
+[verse]
+'perf kallsyms' [<options>] symbol_name[,symbol_name...]
+
+DESCRIPTION
+-----------
+This command searches the running kernel kallsyms file for the given symbol(s)
+and prints information about it, including the DSO, the kallsyms begin/end
+addresses and the addresses in the ELF kallsyms symbol table (for symbols in
+modules).
+
+OPTIONS
+-------
+-v::
+--verbose=::
+ Increase verbosity level, showing details about symbol table loading, etc.
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
new file mode 100644
index 000000000..85b8ac695
--- /dev/null
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -0,0 +1,77 @@
+perf-kmem(1)
+============
+
+NAME
+----
+perf-kmem - Tool to trace/measure kernel memory properties
+
+SYNOPSIS
+--------
+[verse]
+'perf kmem' {record|stat} [<options>]
+
+DESCRIPTION
+-----------
+There are two variants of perf kmem:
+
+ 'perf kmem record <command>' to record the kmem events
+ of an arbitrary workload.
+
+ 'perf kmem stat' to report kernel memory statistics.
+
+OPTIONS
+-------
+-i <file>::
+--input=<file>::
+ Select the input file (default: perf.data unless stdin is a fifo)
+
+-f::
+--force::
+ Don't do ownership validation
+
+-v::
+--verbose::
+ Be more verbose. (show symbol address, etc)
+
+--caller::
+ Show per-callsite statistics
+
+--alloc::
+ Show per-allocation statistics
+
+-s <key[,key2...]>::
+--sort=<key[,key2...]>::
+ Sort the output (default: 'frag,hit,bytes' for slab and 'bytes,hit'
+ for page). Available sort keys are 'ptr, callsite, bytes, hit,
+ pingpong, frag' for slab and 'page, callsite, bytes, hit, order,
+ migtype, gfp' for page. This option should be preceded by one of the
+ mode selection options - i.e. --slab, --page, --alloc and/or --caller.
+
+-l <num>::
+--line=<num>::
+ Print n lines only
+
+--raw-ip::
+ Print raw ip instead of symbol
+
+--slab::
+ Analyze SLAB allocator events.
+
+--page::
+ Analyze page allocator events
+
+--live::
+ Show live page stat. The perf kmem shows total allocation stat by
+ default, but this option shows live (currently allocated) pages
+ instead. (This option works with --page option only)
+
+--time=<start>,<stop>::
+ Only analyze samples within given time window: <start>,<stop>. Times
+ have the format seconds.microseconds. If start is not given (i.e., time
+ string is ',x.y') then analysis starts at the beginning of the file. If
+ stop time is not given (i.e, time string is 'x.y,') then analysis goes
+ to end of file.
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
new file mode 100644
index 000000000..6a5bb2b17
--- /dev/null
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -0,0 +1,164 @@
+perf-kvm(1)
+===========
+
+NAME
+----
+perf-kvm - Tool to trace/measure kvm guest os
+
+SYNOPSIS
+--------
+[verse]
+'perf kvm' [--host] [--guest] [--guestmount=<path>
+ [--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]]
+ {top|record|report|diff|buildid-list} [<options>]
+'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path>
+ | --guestvmlinux=<path>] {top|record|report|diff|buildid-list|stat} [<options>]
+'perf kvm stat [record|report|live] [<options>]
+
+DESCRIPTION
+-----------
+There are a couple of variants of perf kvm:
+
+ 'perf kvm [options] top <command>' to generates and displays
+ a performance counter profile of guest os in realtime
+ of an arbitrary workload.
+
+ 'perf kvm record <command>' to record the performance counter profile
+ of an arbitrary workload and save it into a perf data file. We set the
+ default behavior of perf kvm as --guest, so if neither --host nor --guest
+ is input, the perf data file name is perf.data.guest. If --host is input,
+ the perf data file name is perf.data.kvm. If you want to record data into
+ perf.data.host, please input --host --no-guest. The behaviors are shown as
+ following:
+ Default('') -> perf.data.guest
+ --host -> perf.data.kvm
+ --guest -> perf.data.guest
+ --host --guest -> perf.data.kvm
+ --host --no-guest -> perf.data.host
+
+ 'perf kvm report' to display the performance counter profile information
+ recorded via perf kvm record.
+
+ 'perf kvm diff' to displays the performance difference amongst two perf.data
+ files captured via perf record.
+
+ 'perf kvm buildid-list' to display the buildids found in a perf data file,
+ so that other tools can be used to fetch packages with matching symbol tables
+ for use by perf report. As buildid is read from /sys/kernel/notes in os, then
+ if you want to list the buildid for guest, please make sure your perf data file
+ was captured with --guestmount in perf kvm record.
+
+ 'perf kvm stat <command>' to run a command and gather performance counter
+ statistics.
+ Especially, perf 'kvm stat record/report' generates a statistical analysis
+ of KVM events. Currently, vmexit, mmio (x86 only) and ioport (x86 only)
+ events are supported. 'perf kvm stat record <command>' records kvm events
+ and the events between start and end <command>.
+ And this command produces a file which contains tracing results of kvm
+ events.
+
+ 'perf kvm stat report' reports statistical data which includes events
+ handled time, samples, and so on.
+
+ 'perf kvm stat live' reports statistical data in a live mode (similar to
+ record + report but with statistical data updated live at a given display
+ rate).
+
+OPTIONS
+-------
+-i::
+--input=<path>::
+ Input file name.
+-o::
+--output=<path>::
+ Output file name.
+--host::
+ Collect host side performance profile.
+--guest::
+ Collect guest side performance profile.
+--guestmount=<path>::
+ Guest os root file system mount directory. Users mounts guest os
+ root directories under <path> by a specific filesystem access method,
+ typically, sshfs. For example, start 2 guest os. The one's pid is 8888
+ and the other's is 9999.
+ #mkdir ~/guestmount; cd ~/guestmount
+ #sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
+ #sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
+ #perf kvm --host --guest --guestmount=~/guestmount top
+--guestkallsyms=<path>::
+ Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest
+ kernel symbols. Users copy it out from guest os.
+--guestmodules=<path>::
+ Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest
+ kernel module information. Users copy it out from guest os.
+--guestvmlinux=<path>::
+ Guest os kernel vmlinux.
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
+
+STAT REPORT OPTIONS
+-------------------
+--vcpu=<value>::
+ analyze events which occur on this vcpu. (default: all vcpus)
+
+--event=<value>::
+ event to be analyzed. Possible values: vmexit, mmio (x86 only),
+ ioport (x86 only). (default: vmexit)
+-k::
+--key=<value>::
+ Sorting key. Possible values: sample (default, sort by samples
+ number), time (sort by average time).
+-p::
+--pid=::
+ Analyze events only for given process ID(s) (comma separated list).
+
+STAT LIVE OPTIONS
+-----------------
+-d::
+--display::
+ Time in seconds between display updates
+
+-m::
+--mmap-pages=::
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
+
+-a::
+--all-cpus::
+ System-wide collection from all CPUs.
+
+-p::
+--pid=::
+ Analyze events only for given process ID(s) (comma separated list).
+
+--vcpu=<value>::
+ analyze events which occur on this vcpu. (default: all vcpus)
+
+
+--event=<value>::
+ event to be analyzed. Possible values: vmexit,
+ mmio (x86 only), ioport (x86 only).
+ (default: vmexit)
+
+-k::
+--key=<value>::
+ Sorting key. Possible values: sample (default, sort by samples
+ number), time (sort by average time).
+
+--duration=<value>::
+ Show events other than HLT (x86 only) or Wait state (s390 only)
+ that take longer than duration usecs.
+
+--proc-map-timeout::
+ When processing pre-existing threads /proc/XXX/mmap, it may take
+ a long time, because the file may be huge. A time out is needed
+ in such cases.
+ This option sets the time out limit. The default value is 500 ms.
+
+SEE ALSO
+--------
+linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1],
+linkperf:perf-diff[1], linkperf:perf-buildid-list[1],
+linkperf:perf-stat[1]
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
new file mode 100644
index 000000000..236b9b97d
--- /dev/null
+++ b/tools/perf/Documentation/perf-list.txt
@@ -0,0 +1,291 @@
+perf-list(1)
+============
+
+NAME
+----
+perf-list - List all symbolic event types
+
+SYNOPSIS
+--------
+[verse]
+'perf list' [--no-desc] [--long-desc]
+ [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob]
+
+DESCRIPTION
+-----------
+This command displays the symbolic event types which can be selected in the
+various perf commands with the -e option.
+
+OPTIONS
+-------
+-d::
+--desc::
+Print extra event descriptions. (default)
+
+--no-desc::
+Don't print descriptions.
+
+-v::
+--long-desc::
+Print longer event descriptions.
+
+--debug::
+Enable debugging output.
+
+--details::
+Print how named events are resolved internally into perf events, and also
+any extra expressions computed by perf stat.
+
+[[EVENT_MODIFIERS]]
+EVENT MODIFIERS
+---------------
+
+Events can optionally have a modifier by appending a colon and one or
+more modifiers. Modifiers allow the user to restrict the events to be
+counted. The following modifiers exist:
+
+ u - user-space counting
+ k - kernel counting
+ h - hypervisor counting
+ I - non idle counting
+ G - guest counting (in KVM guests)
+ H - host counting (not in KVM guests)
+ p - precise level
+ P - use maximum detected precise level
+ S - read sample value (PERF_SAMPLE_READ)
+ D - pin the event to the PMU
+ W - group is weak and will fallback to non-group if not schedulable,
+ only supported in 'perf stat' for now.
+
+The 'p' modifier can be used for specifying how precise the instruction
+address should be. The 'p' modifier can be specified multiple times:
+
+ 0 - SAMPLE_IP can have arbitrary skid
+ 1 - SAMPLE_IP must have constant skid
+ 2 - SAMPLE_IP requested to have 0 skid
+ 3 - SAMPLE_IP must have 0 skid, or uses randomization to avoid
+ sample shadowing effects.
+
+For Intel systems precise event sampling is implemented with PEBS
+which supports up to precise-level 2, and precise level 3 for
+some special cases
+
+On AMD systems it is implemented using IBS (up to precise-level 2).
+The precise modifier works with event types 0x76 (cpu-cycles, CPU
+clocks not halted) and 0xC1 (micro-ops retired). Both events map to
+IBS execution sampling (IBS op) with the IBS Op Counter Control bit
+(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s
+Manual Volume 2: System Programming, 13.3 Instruction-Based
+Sampling). Examples to use IBS:
+
+ perf record -a -e cpu-cycles:p ... # use ibs op counting cycles
+ perf record -a -e r076:p ... # same as -e cpu-cycles:p
+ perf record -a -e r0C1:p ... # use ibs op counting micro-ops
+
+RAW HARDWARE EVENT DESCRIPTOR
+-----------------------------
+Even when an event is not available in a symbolic form within perf right now,
+it can be encoded in a per processor specific way.
+
+For instance For x86 CPUs NNN represents the raw register encoding with the
+layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout
+of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344,
+Figure 13-7 Performance Event-Select Register (PerfEvtSeln)).
+
+Note: Only the following bit fields can be set in x86 counter
+registers: event, umask, edge, inv, cmask. Esp. guest/host only and
+OS/user mode flags must be setup using <<EVENT_MODIFIERS, EVENT
+MODIFIERS>>.
+
+Example:
+
+If the Intel docs for a QM720 Core i7 describe an event as:
+
+ Event Umask Event Mask
+ Num. Value Mnemonic Description Comment
+
+ A8H 01H LSD.UOPS Counts the number of micro-ops Use cmask=1 and
+ delivered by loop stream detector invert to count
+ cycles
+
+raw encoding of 0x1A8 can be used:
+
+ perf stat -e r1a8 -a sleep 1
+ perf record -e r1a8 ...
+
+You should refer to the processor specific documentation for getting these
+details. Some of them are referenced in the SEE ALSO section below.
+
+ARBITRARY PMUS
+--------------
+
+perf also supports an extended syntax for specifying raw parameters
+to PMUs. Using this typically requires looking up the specific event
+in the CPU vendor specific documentation.
+
+The available PMUs and their raw parameters can be listed with
+
+ ls /sys/devices/*/format
+
+For example the raw event "LSD.UOPS" core pmu event above could
+be specified as
+
+ perf stat -e cpu/event=0xa8,umask=0x1,name=LSD.UOPS_CYCLES,cmask=0x1/ ...
+
+ or using extended name syntax
+
+ perf stat -e cpu/event=0xa8,umask=0x1,cmask=0x1,name=\'LSD.UOPS_CYCLES:cmask=0x1\'/ ...
+
+PER SOCKET PMUS
+---------------
+
+Some PMUs are not associated with a core, but with a whole CPU socket.
+Events on these PMUs generally cannot be sampled, but only counted globally
+with perf stat -a. They can be bound to one logical CPU, but will measure
+all the CPUs in the same socket.
+
+This example measures memory bandwidth every second
+on the first memory controller on socket 0 of a Intel Xeon system
+
+ perf stat -C 0 -a uncore_imc_0/cas_count_read/,uncore_imc_0/cas_count_write/ -I 1000 ...
+
+Each memory controller has its own PMU. Measuring the complete system
+bandwidth would require specifying all imc PMUs (see perf list output),
+and adding the values together. To simplify creation of multiple events,
+prefix and glob matching is supported in the PMU name, and the prefix
+'uncore_' is also ignored when performing the match. So the command above
+can be expanded to all memory controllers by using the syntaxes:
+
+ perf stat -C 0 -a imc/cas_count_read/,imc/cas_count_write/ -I 1000 ...
+ perf stat -C 0 -a *imc*/cas_count_read/,*imc*/cas_count_write/ -I 1000 ...
+
+This example measures the combined core power every second
+
+ perf stat -I 1000 -e power/energy-cores/ -a
+
+ACCESS RESTRICTIONS
+-------------------
+
+For non root users generally only context switched PMU events are available.
+This is normally only the events in the cpu PMU, the predefined events
+like cycles and instructions and some software events.
+
+Other PMUs and global measurements are normally root only.
+Some event qualifiers, such as "any", are also root only.
+
+This can be overriden by setting the kernel.perf_event_paranoid
+sysctl to -1, which allows non root to use these events.
+
+For accessing trace point events perf needs to have read access to
+/sys/kernel/debug/tracing, even when perf_event_paranoid is in a relaxed
+setting.
+
+TRACING
+-------
+
+Some PMUs control advanced hardware tracing capabilities, such as Intel PT,
+that allows low overhead execution tracing. These are described in a separate
+intel-pt.txt document.
+
+PARAMETERIZED EVENTS
+--------------------
+
+Some pmu events listed by 'perf-list' will be displayed with '?' in them. For
+example:
+
+ hv_gpci/dtbp_ptitc,phys_processor_idx=?/
+
+This means that when provided as an event, a value for '?' must
+also be supplied. For example:
+
+ perf stat -C 0 -e 'hv_gpci/dtbp_ptitc,phys_processor_idx=0x2/' ...
+
+EVENT GROUPS
+------------
+
+Perf supports time based multiplexing of events, when the number of events
+active exceeds the number of hardware performance counters. Multiplexing
+can cause measurement errors when the workload changes its execution
+profile.
+
+When metrics are computed using formulas from event counts, it is useful to
+ensure some events are always measured together as a group to minimize multiplexing
+errors. Event groups can be specified using { }.
+
+ perf stat -e '{instructions,cycles}' ...
+
+The number of available performance counters depend on the CPU. A group
+cannot contain more events than available counters.
+For example Intel Core CPUs typically have four generic performance counters
+for the core, plus three fixed counters for instructions, cycles and
+ref-cycles. Some special events have restrictions on which counter they
+can schedule, and may not support multiple instances in a single group.
+When too many events are specified in the group some of them will not
+be measured.
+
+Globally pinned events can limit the number of counters available for
+other groups. On x86 systems, the NMI watchdog pins a counter by default.
+The nmi watchdog can be disabled as root with
+
+ echo 0 > /proc/sys/kernel/nmi_watchdog
+
+Events from multiple different PMUs cannot be mixed in a group, with
+some exceptions for software events.
+
+LEADER SAMPLING
+---------------
+
+perf also supports group leader sampling using the :S specifier.
+
+ perf record -e '{cycles,instructions}:S' ...
+ perf report --group
+
+Normally all events in an event group sample, but with :S only
+the first event (the leader) samples, and it only reads the values of the
+other events in the group.
+
+OPTIONS
+-------
+
+Without options all known events will be listed.
+
+To limit the list use:
+
+. 'hw' or 'hardware' to list hardware events such as cache-misses, etc.
+
+. 'sw' or 'software' to list software events such as context switches, etc.
+
+. 'cache' or 'hwcache' to list hardware cache events such as L1-dcache-loads, etc.
+
+. 'tracepoint' to list all tracepoint events, alternatively use
+ 'subsys_glob:event_glob' to filter by tracepoint subsystems such as sched,
+ block, etc.
+
+. 'pmu' to print the kernel supplied PMU events.
+
+. 'sdt' to list all Statically Defined Tracepoint events.
+
+. 'metric' to list metrics
+
+. 'metricgroup' to list metricgroups with metrics.
+
+. If none of the above is matched, it will apply the supplied glob to all
+ events, printing the ones that match.
+
+. As a last resort, it will do a substring search in all event names.
+
+One or more types can be used at the same time, listing the events for the
+types specified.
+
+Support raw format:
+
+. '--raw-dump', shows the raw-dump of all the events.
+. '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of
+ a certain kind of events.
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-top[1],
+linkperf:perf-record[1],
+http://www.intel.com/sdm/[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
+http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming]
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
new file mode 100644
index 000000000..74d774592
--- /dev/null
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -0,0 +1,70 @@
+perf-lock(1)
+============
+
+NAME
+----
+perf-lock - Analyze lock events
+
+SYNOPSIS
+--------
+[verse]
+'perf lock' {record|report|script|info}
+
+DESCRIPTION
+-----------
+You can analyze various lock behaviours
+and statistics with this 'perf lock' command.
+
+ 'perf lock record <command>' records lock events
+ between start and end <command>. And this command
+ produces the file "perf.data" which contains tracing
+ results of lock events.
+
+ 'perf lock report' reports statistical data.
+
+ 'perf lock script' shows raw lock events.
+
+ 'perf lock info' shows metadata like threads or addresses
+ of lock instances.
+
+COMMON OPTIONS
+--------------
+
+-i::
+--input=<file>::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-v::
+--verbose::
+ Be more verbose (show symbol address, etc).
+
+-D::
+--dump-raw-trace::
+ Dump raw trace in ASCII.
+
+-f::
+--force::
+ Don't complan, do it.
+
+REPORT OPTIONS
+--------------
+
+-k::
+--key=<value>::
+ Sorting key. Possible values: acquired (default), contended,
+ avg_wait, wait_total, wait_max, wait_min.
+
+INFO OPTIONS
+------------
+
+-t::
+--threads::
+ dump thread list in perf.data
+
+-m::
+--map::
+ dump map of lock instances (address:name table)
+
+SEE ALSO
+--------
+linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
new file mode 100644
index 000000000..f8d2167cf
--- /dev/null
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -0,0 +1,92 @@
+perf-mem(1)
+===========
+
+NAME
+----
+perf-mem - Profile memory accesses
+
+SYNOPSIS
+--------
+[verse]
+'perf mem' [<options>] (record [<command>] | report)
+
+DESCRIPTION
+-----------
+"perf mem record" runs a command and gathers memory operation data
+from it, into perf.data. Perf record options are accepted and are passed through.
+
+"perf mem report" displays the result. It invokes perf report with the
+right set of options to display a memory access profile. By default, loads
+and stores are sampled. Use the -t option to limit to loads or stores.
+
+Note that on Intel systems the memory latency reported is the use-latency,
+not the pure load (or store latency). Use latency includes any pipeline
+queueing delays in addition to the memory subsystem latency.
+
+OPTIONS
+-------
+<command>...::
+ Any command you can specify in a shell.
+
+-i::
+--input=<file>::
+ Input file name.
+
+-f::
+--force::
+ Don't do ownership validation
+
+-t::
+--type=<type>::
+ Select the memory operation type: load or store (default: load,store)
+
+-D::
+--dump-raw-samples::
+ Dump the raw decoded samples on the screen in a format that is easy to parse with
+ one sample per line.
+
+-x::
+--field-separator=<separator>::
+ Specify the field separator used when dump raw samples (-D option). By default,
+ The separator is the space character.
+
+-C::
+--cpu=<cpu>::
+ Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
+ comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2. Default
+ is to monitor all CPUS.
+-U::
+--hide-unresolved::
+ Only display entries resolved to a symbol.
+
+-p::
+--phys-data::
+ Record/Report sample physical addresses
+
+RECORD OPTIONS
+--------------
+-e::
+--event <event>::
+ Event selector. Use 'perf mem record -e list' to list available events.
+
+-K::
+--all-kernel::
+ Configure all used events to run in kernel space.
+
+-U::
+--all-user::
+ Configure all used events to run in user space.
+
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc)
+
+--ldlat <n>::
+ Specify desired latency for loads event.
+
+In addition, for report all perf report options are valid, and for record
+all perf record options.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
new file mode 100644
index 000000000..b6866a05e
--- /dev/null
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -0,0 +1,299 @@
+perf-probe(1)
+=============
+
+NAME
+----
+perf-probe - Define new dynamic tracepoints
+
+SYNOPSIS
+--------
+[verse]
+'perf probe' [options] --add='PROBE' [...]
+or
+'perf probe' [options] PROBE
+or
+'perf probe' [options] --del='[GROUP:]EVENT' [...]
+or
+'perf probe' --list[=[GROUP:]EVENT]
+or
+'perf probe' [options] --line='LINE'
+or
+'perf probe' [options] --vars='PROBEPOINT'
+or
+'perf probe' [options] --funcs
+or
+'perf probe' [options] --definition='PROBE' [...]
+
+DESCRIPTION
+-----------
+This command defines dynamic tracepoint events, by symbol and registers
+without debuginfo, or by C expressions (C line numbers, C function names,
+and C local variables) with debuginfo.
+
+
+OPTIONS
+-------
+-k::
+--vmlinux=PATH::
+ Specify vmlinux path which has debuginfo (Dwarf binary).
+ Only when using this with --definition, you can give an offline
+ vmlinux file.
+
+-m::
+--module=MODNAME|PATH::
+ Specify module name in which perf-probe searches probe points
+ or lines. If a path of module file is passed, perf-probe
+ treat it as an offline module (this means you can add a probe on
+ a module which has not been loaded yet).
+
+-s::
+--source=PATH::
+ Specify path to kernel source.
+
+-v::
+--verbose::
+ Be more verbose (show parsed arguments, etc).
+ Can not use with -q.
+
+-q::
+--quiet::
+ Be quiet (do not show any messages including errors).
+ Can not use with -v.
+
+-a::
+--add=::
+ Define a probe event (see PROBE SYNTAX for detail).
+
+-d::
+--del=::
+ Delete probe events. This accepts glob wildcards('*', '?') and character
+ classes(e.g. [a-z], [!A-Z]).
+
+-l::
+--list[=[GROUP:]EVENT]::
+ List up current probe events. This can also accept filtering patterns of
+ event names.
+ When this is used with --cache, perf shows all cached probes instead of
+ the live probes.
+
+-L::
+--line=::
+ Show source code lines which can be probed. This needs an argument
+ which specifies a range of the source code. (see LINE SYNTAX for detail)
+
+-V::
+--vars=::
+ Show available local variables at given probe point. The argument
+ syntax is same as PROBE SYNTAX, but NO ARGs.
+
+--externs::
+ (Only for --vars) Show external defined variables in addition to local
+ variables.
+
+--no-inlines::
+ (Only for --add) Search only for non-inlined functions. The functions
+ which do not have instances are ignored.
+
+-F::
+--funcs[=FILTER]::
+ Show available functions in given module or kernel. With -x/--exec,
+ can also list functions in a user space executable / shared library.
+ This also can accept a FILTER rule argument.
+
+-D::
+--definition=::
+ Show trace-event definition converted from given probe-event instead
+ of write it into tracing/[k,u]probe_events.
+
+--filter=FILTER::
+ (Only for --vars and --funcs) Set filter. FILTER is a combination of glob
+ pattern, see FILTER PATTERN for detail.
+ Default FILTER is "!__k???tab_* & !__crc_*" for --vars, and "!_*"
+ for --funcs.
+ If several filters are specified, only the last filter is used.
+
+-f::
+--force::
+ Forcibly add events with existing name.
+
+-n::
+--dry-run::
+ Dry run. With this option, --add and --del doesn't execute actual
+ adding and removal operations.
+
+--cache::
+ (With --add) Cache the probes. Any events which successfully added
+ are also stored in the cache file.
+ (With --list) Show cached probes.
+ (With --del) Remove cached probes.
+
+--max-probes=NUM::
+ Set the maximum number of probe points for an event. Default is 128.
+
+--target-ns=PID:
+ Obtain mount namespace information from the target pid. This is
+ used when creating a uprobe for a process that resides in a
+ different mount namespace from the perf(1) utility.
+
+-x::
+--exec=PATH::
+ Specify path to the executable or shared library file for user
+ space tracing. Can also be used with --funcs option.
+
+--demangle::
+ Demangle application symbols. --no-demangle is also available
+ for disabling demangling.
+
+--demangle-kernel::
+ Demangle kernel symbols. --no-demangle-kernel is also available
+ for disabling kernel demangling.
+
+In absence of -m/-x options, perf probe checks if the first argument after
+the options is an absolute path name. If its an absolute path, perf probe
+uses it as a target module/target user space binary to probe.
+
+PROBE SYNTAX
+------------
+Probe points are defined by following syntax.
+
+ 1) Define event based on function name
+ [[GROUP:]EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...]
+
+ 2) Define event based on source file with line number
+ [[GROUP:]EVENT=]SRC:ALN [ARG ...]
+
+ 3) Define event based on source file with lazy pattern
+ [[GROUP:]EVENT=]SRC;PTN [ARG ...]
+
+ 4) Pre-defined SDT events or cached event with name
+ %[sdt_PROVIDER:]SDTEVENT
+ or,
+ sdt_PROVIDER:SDTEVENT
+
+'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function, and for return probes, a "\_\_return" suffix is automatically added to the function name. You can also specify a group name by 'GROUP', if omitted, set 'probe' is used for kprobe and 'probe_<bin>' is used for uprobe.
+Note that using existing group name can conflict with other events. Especially, using the group name reserved for kernel modules can hide embedded events in the
+modules.
+'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function.
+It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern.
+'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT).
+'SDTEVENT' and 'PROVIDER' is the pre-defined event name which is defined by user SDT (Statically Defined Tracing) or the pre-cached probes with event name.
+Note that before using the SDT event, the target binary (on which SDT events are defined) must be scanned by linkperf:perf-buildid-cache[1] to make SDT events as cached events.
+
+For details of the SDT, see below.
+https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html
+
+ESCAPED CHARACTER
+-----------------
+
+In the probe syntax, '=', '@', '+', ':' and ';' are treated as a special character. You can use a backslash ('\') to escape the special characters.
+This is useful if you need to probe on a specific versioned symbols, like @GLIBC_... suffixes, or also you need to specify a source file which includes the special characters.
+Note that usually single backslash is consumed by shell, so you might need to pass double backslash (\\) or wrapping with single quotes (\'AAA\@BBB').
+See EXAMPLES how it is used.
+
+PROBE ARGUMENT
+--------------
+Each probe argument follows below syntax.
+
+ [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
+
+'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
+'$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo (*). Currently, basic types (u8/u16/u32/u64/s8/s16/s32/s64), hexadecimal integers (x/x8/x16/x32/x64), signedness casting (u/s), "string" and bitfield are supported. (see TYPES for detail)
+On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
+
+TYPES
+-----
+Basic types (u8/u16/u32/u64/s8/s16/s32/s64) and hexadecimal integers (x8/x16/x32/x64) are integer types. Prefix 's' and 'u' means those types are signed and unsigned respectively, and 'x' means that is shown in hexadecimal format. Traced arguments are shown in decimal (sNN/uNN) or hex (xNN). You can also use 's' or 'u' to specify only signedness and leave its size auto-detected by perf probe. Moreover, you can use 'x' to explicitly specify to be shown in hexadecimal (the size is also auto-detected).
+String type is a special type, which fetches a "null-terminated" string from kernel space. This means it will fail and store NULL if the string container has been paged out. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+Bitfield is another special type, which takes 3 parameters, bit-width, bit-offset, and container-size (usually 32). The syntax is;
+
+ b<bit-width>@<bit-offset>/<container-size>
+
+LINE SYNTAX
+-----------
+Line range is described by following syntax.
+
+ "FUNC[@SRC][:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
+
+FUNC specifies the function name of showing lines. 'RLN' is the start line
+number from function entry line, and 'RLN2' is the end line number. As same as
+probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
+and 'ALN2' is end line number in the file. It is also possible to specify how
+many lines to show by using 'NUM'. Moreover, 'FUNC@SRC' combination is good
+for searching a specific function when several functions share same name.
+So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
+
+LAZY MATCHING
+-------------
+ The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]).
+
+e.g.
+ 'a=*' can matches 'a=b', 'a = b', 'a == b' and so on.
+
+This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.)
+
+FILTER PATTERN
+--------------
+ The filter pattern is a glob matching pattern(s) to filter variables.
+ In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")".
+
+e.g.
+ With --filter "foo* | bar*", perf probe -V shows variables which start with "foo" or "bar".
+ With --filter "!foo* & *bar", perf probe -V shows variables which don't start with "foo" and end with "bar", like "fizzbar". But "foobar" is filtered out.
+
+EXAMPLES
+--------
+Display which lines in schedule() can be probed:
+
+ ./perf probe --line schedule
+
+Add a probe on schedule() function 12th line with recording cpu local variable:
+
+ ./perf probe schedule:12 cpu
+ or
+ ./perf probe --add='schedule:12 cpu'
+
+Add one or more probes which has the name start with "schedule".
+
+ ./perf probe schedule*
+ or
+ ./perf probe --add='schedule*'
+
+Add probes on lines in schedule() function which calls update_rq_clock().
+
+ ./perf probe 'schedule;update_rq_clock*'
+ or
+ ./perf probe --add='schedule;update_rq_clock*'
+
+Delete all probes on schedule().
+
+ ./perf probe --del='schedule*'
+
+Add probes at zfree() function on /bin/zsh
+
+ ./perf probe -x /bin/zsh zfree or ./perf probe /bin/zsh zfree
+
+Add probes at malloc() function on libc
+
+ ./perf probe -x /lib/libc.so.6 malloc or ./perf probe /lib/libc.so.6 malloc
+
+Add a uprobe to a target process running in a different mount namespace
+
+ ./perf probe --target-ns <target pid> -x /lib64/libc.so.6 malloc
+
+Add a USDT probe to a target process running in a different mount namespace
+
+ ./perf probe --target-ns <target pid> -x /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.121-0.b13.el7_3.x86_64/jre/lib/amd64/server/libjvm.so %sdt_hotspot:thread__sleep__end
+
+Add a probe on specific versioned symbol by backslash escape
+
+ ./perf probe -x /lib64/libc-2.25.so 'malloc_get_state\@GLIBC_2.2.5'
+
+Add a probe in a source file using special characters by backslash escape
+
+ ./perf probe -x /opt/test/a.out 'foo\+bar.c:4'
+
+
+SEE ALSO
+--------
+linkperf:perf-trace[1], linkperf:perf-record[1], linkperf:perf-buildid-cache[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
new file mode 100644
index 000000000..edf2be251
--- /dev/null
+++ b/tools/perf/Documentation/perf-record.txt
@@ -0,0 +1,509 @@
+perf-record(1)
+==============
+
+NAME
+----
+perf-record - Run a command and record its profile into perf.data
+
+SYNOPSIS
+--------
+[verse]
+'perf record' [-e <EVENT> | --event=EVENT] [-a] <command>
+'perf record' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
+
+DESCRIPTION
+-----------
+This command runs a command and gathers a performance counter profile
+from it, into perf.data - without displaying anything.
+
+This file can then be inspected later on, using 'perf report'.
+
+
+OPTIONS
+-------
+<command>...::
+ Any command you can specify in a shell.
+
+-e::
+--event=::
+ Select the PMU event. Selection can be:
+
+ - a symbolic event name (use 'perf list' to list all events)
+
+ - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
+ hexadecimal event descriptor.
+
+ - a symbolic or raw PMU event followed by an optional colon
+ and a list of event modifiers, e.g., cpu-cycles:p. See the
+ linkperf:perf-list[1] man page for details on event modifiers.
+
+ - a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where
+ 'param1', 'param2', etc are defined as formats for the PMU in
+ /sys/bus/event_source/devices/<pmu>/format/*.
+
+ - a symbolically formed event like 'pmu/config=M,config1=N,config3=K/'
+
+ where M, N, K are numbers (in decimal, hex, octal format). Acceptable
+ values for each of 'config', 'config1' and 'config2' are defined by
+ corresponding entries in /sys/bus/event_source/devices/<pmu>/format/*
+ param1 and param2 are defined as formats for the PMU in:
+ /sys/bus/event_source/devices/<pmu>/format/*
+
+ There are also some parameters which are not defined in .../<pmu>/format/*.
+ These params can be used to overload default config values per event.
+ Here are some common parameters:
+ - 'period': Set event sampling period
+ - 'freq': Set event sampling frequency
+ - 'time': Disable/enable time stamping. Acceptable values are 1 for
+ enabling time stamping. 0 for disabling time stamping.
+ The default is 1.
+ - 'call-graph': Disable/enable callgraph. Acceptable str are "fp" for
+ FP mode, "dwarf" for DWARF mode, "lbr" for LBR mode and
+ "no" for disable callgraph.
+ - 'stack-size': user stack size for dwarf mode
+ - 'name' : User defined event name. Single quotes (') may be used to
+ escape symbols in the name from parsing by shell and tool
+ like this: name=\'CPU_CLK_UNHALTED.THREAD:cmask=0x1\'.
+
+ See the linkperf:perf-list[1] man page for more parameters.
+
+ Note: If user explicitly sets options which conflict with the params,
+ the value set by the parameters will be overridden.
+
+ Also not defined in .../<pmu>/format/* are PMU driver specific
+ configuration parameters. Any configuration parameter preceded by
+ the letter '@' is not interpreted in user space and sent down directly
+ to the PMU driver. For example:
+
+ perf record -e some_event/@cfg1,@cfg2=config/ ...
+
+ will see 'cfg1' and 'cfg2=config' pushed to the PMU driver associated
+ with the event for further processing. There is no restriction on
+ what the configuration parameters are, as long as their semantic is
+ understood and supported by the PMU driver.
+
+ - a hardware breakpoint event in the form of '\mem:addr[/len][:access]'
+ where addr is the address in memory you want to break in.
+ Access is the memory access type (read, write, execute) it can
+ be passed as follows: '\mem:addr[:[r][w][x]]'. len is the range,
+ number of bytes from specified addr, which the breakpoint will cover.
+ If you want to profile read-write accesses in 0x1000, just set
+ 'mem:0x1000:rw'.
+ If you want to profile write accesses in [0x1000~1008), just set
+ 'mem:0x1000/8:w'.
+
+ - a group of events surrounded by a pair of brace ("{event1,event2,...}").
+ Each event is separated by commas and the group should be quoted to
+ prevent the shell interpretation. You also need to use --group on
+ "perf report" to view group events together.
+
+--filter=<filter>::
+ Event filter. This option should follow an event selector (-e) which
+ selects either tracepoint event(s) or a hardware trace PMU
+ (e.g. Intel PT or CoreSight).
+
+ - tracepoint filters
+
+ In the case of tracepoints, multiple '--filter' options are combined
+ using '&&'.
+
+ - address filters
+
+ A hardware trace PMU advertises its ability to accept a number of
+ address filters by specifying a non-zero value in
+ /sys/bus/event_source/devices/<pmu>/nr_addr_filters.
+
+ Address filters have the format:
+
+ filter|start|stop|tracestop <start> [/ <size>] [@<file name>]
+
+ Where:
+ - 'filter': defines a region that will be traced.
+ - 'start': defines an address at which tracing will begin.
+ - 'stop': defines an address at which tracing will stop.
+ - 'tracestop': defines a region in which tracing will stop.
+
+ <file name> is the name of the object file, <start> is the offset to the
+ code to trace in that file, and <size> is the size of the region to
+ trace. 'start' and 'stop' filters need not specify a <size>.
+
+ If no object file is specified then the kernel is assumed, in which case
+ the start address must be a current kernel memory address.
+
+ <start> can also be specified by providing the name of a symbol. If the
+ symbol name is not unique, it can be disambiguated by inserting #n where
+ 'n' selects the n'th symbol in address order. Alternately #0, #g or #G
+ select only a global symbol. <size> can also be specified by providing
+ the name of a symbol, in which case the size is calculated to the end
+ of that symbol. For 'filter' and 'tracestop' filters, if <size> is
+ omitted and <start> is a symbol, then the size is calculated to the end
+ of that symbol.
+
+ If <size> is omitted and <start> is '*', then the start and size will
+ be calculated from the first and last symbols, i.e. to trace the whole
+ file.
+
+ If symbol names (or '*') are provided, they must be surrounded by white
+ space.
+
+ The filter passed to the kernel is not necessarily the same as entered.
+ To see the filter that is passed, use the -v option.
+
+ The kernel may not be able to configure a trace region if it is not
+ within a single mapping. MMAP events (or /proc/<pid>/maps) can be
+ examined to determine if that is a possibility.
+
+ Multiple filters can be separated with space or comma.
+
+--exclude-perf::
+ Don't record events issued by perf itself. This option should follow
+ an event selector (-e) which selects tracepoint event(s). It adds a
+ filter expression 'common_pid != $PERFPID' to filters. If other
+ '--filter' exists, the new filter expression will be combined with
+ them by '&&'.
+
+-a::
+--all-cpus::
+ System-wide collection from all CPUs (default if no target is specified).
+
+-p::
+--pid=::
+ Record events on existing process ID (comma separated list).
+
+-t::
+--tid=::
+ Record events on existing thread ID (comma separated list).
+ This option also disables inheritance by default. Enable it by adding
+ --inherit.
+
+-u::
+--uid=::
+ Record events in threads owned by uid. Name or number.
+
+-r::
+--realtime=::
+ Collect data with this RT SCHED_FIFO priority.
+
+--no-buffering::
+ Collect data without buffering.
+
+-c::
+--count=::
+ Event period to sample.
+
+-o::
+--output=::
+ Output file name.
+
+-i::
+--no-inherit::
+ Child tasks do not inherit counters.
+
+-F::
+--freq=::
+ Profile at this frequency. Use 'max' to use the currently maximum
+ allowed frequency, i.e. the value in the kernel.perf_event_max_sample_rate
+ sysctl. Will throttle down to the currently maximum allowed frequency.
+ See --strict-freq.
+
+--strict-freq::
+ Fail if the specified frequency can't be used.
+
+-m::
+--mmap-pages=::
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
+ Also, by adding a comma, the number of mmap pages for AUX
+ area tracing can be specified.
+
+--group::
+ Put all events in a single event group. This precedes the --event
+ option and remains only for backward compatibility. See --event.
+
+-g::
+ Enables call-graph (stack chain/backtrace) recording.
+
+--call-graph::
+ Setup and enable call-graph (stack chain/backtrace) recording,
+ implies -g. Default is "fp".
+
+ Allows specifying "fp" (frame pointer) or "dwarf"
+ (DWARF's CFI - Call Frame Information) or "lbr"
+ (Hardware Last Branch Record facility) as the method to collect
+ the information used to show the call graphs.
+
+ In some systems, where binaries are build with gcc
+ --fomit-frame-pointer, using the "fp" method will produce bogus
+ call graphs, using "dwarf", if available (perf tools linked to
+ the libunwind or libdw library) should be used instead.
+ Using the "lbr" method doesn't require any compiler options. It
+ will produce call graphs from the hardware LBR registers. The
+ main limitation is that it is only available on new Intel
+ platforms, such as Haswell. It can only get user call chain. It
+ doesn't work with branch stack sampling at the same time.
+
+ When "dwarf" recording is used, perf also records (user) stack dump
+ when sampled. Default size of the stack dump is 8192 (bytes).
+ User can change the size by passing the size after comma like
+ "--call-graph dwarf,4096".
+
+-q::
+--quiet::
+ Don't print any message, useful for scripting.
+
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
+
+-s::
+--stat::
+ Record per-thread event counts. Use it with 'perf report -T' to see
+ the values.
+
+-d::
+--data::
+ Record the sample virtual addresses.
+
+--phys-data::
+ Record the sample physical addresses.
+
+-T::
+--timestamp::
+ Record the sample timestamps. Use it with 'perf report -D' to see the
+ timestamps, for instance.
+
+-P::
+--period::
+ Record the sample period.
+
+--sample-cpu::
+ Record the sample cpu.
+
+-n::
+--no-samples::
+ Don't sample.
+
+-R::
+--raw-samples::
+Collect raw sample records from all opened counters (default for tracepoint counters).
+
+-C::
+--cpu::
+Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+In per-thread mode with inheritance mode on (default), samples are captured only when
+the thread executes on the designated CPUs. Default is to monitor all CPUs.
+
+-B::
+--no-buildid::
+Do not save the build ids of binaries in the perf.data files. This skips
+post processing after recording, which sometimes makes the final step in
+the recording process to take a long time, as it needs to process all
+events looking for mmap records. The downside is that it can misresolve
+symbols if the workload binaries used when recording get locally rebuilt
+or upgraded, because the only key available in this case is the
+pathname. You can also set the "record.build-id" config variable to
+'skip to have this behaviour permanently.
+
+-N::
+--no-buildid-cache::
+Do not update the buildid cache. This saves some overhead in situations
+where the information in the perf.data file (which includes buildids)
+is sufficient. You can also set the "record.build-id" config variable to
+'no-cache' to have the same effect.
+
+-G name,...::
+--cgroup name,...::
+monitor only in the container (cgroup) called "name". This option is available only
+in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
+container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
+can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
+to first event, second cgroup to second event and so on. It is possible to provide
+an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
+corresponding events, i.e., they always refer to events defined earlier on the command
+line. If the user wants to track multiple events for a specific cgroup, the user can
+use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'.
+
+If wanting to monitor, say, 'cycles' for a cgroup and also for system wide, this
+command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'.
+
+-b::
+--branch-any::
+Enable taken branch stack sampling. Any type of taken branch may be sampled.
+This is a shortcut for --branch-filter any. See --branch-filter for more infos.
+
+-j::
+--branch-filter::
+Enable taken branch stack sampling. Each sample captures a series of consecutive
+taken branches. The number of branches captured with each sample depends on the
+underlying hardware, the type of branches of interest, and the executed code.
+It is possible to select the types of branches captured by enabling filters. The
+following filters are defined:
+
+ - any: any type of branches
+ - any_call: any function call or system call
+ - any_ret: any function return or system call return
+ - ind_call: any indirect branch
+ - call: direct calls, including far (to/from kernel) calls
+ - u: only when the branch target is at the user level
+ - k: only when the branch target is in the kernel
+ - hv: only when the target is at the hypervisor level
+ - in_tx: only when the target is in a hardware transaction
+ - no_tx: only when the target is not in a hardware transaction
+ - abort_tx: only when the target is a hardware transaction abort
+ - cond: conditional branches
+ - save_type: save branch type during sampling in case binary is not available later
+
++
+The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
+The privilege levels may be omitted, in which case, the privilege levels of the associated
+event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
+levels are subject to permissions. When sampling on multiple events, branch stack sampling
+is enabled for all the sampling events. The sampled branch type is the same for all events.
+The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
+Note that this feature may not be available on all processors.
+
+--weight::
+Enable weightened sampling. An additional weight is recorded per sample and can be
+displayed with the weight and local_weight sort keys. This currently works for TSX
+abort events and some memory events in precise mode on modern Intel CPUs.
+
+--namespaces::
+Record events of type PERF_RECORD_NAMESPACES.
+
+--transaction::
+Record transaction flags for transaction related events.
+
+--per-thread::
+Use per-thread mmaps. By default per-cpu mmaps are created. This option
+overrides that and uses per-thread mmaps. A side-effect of that is that
+inheritance is automatically disabled. --per-thread is ignored with a warning
+if combined with -a or -C options.
+
+-D::
+--delay=::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
+-I::
+--intr-regs::
+Capture machine state (registers) at interrupt, i.e., on counter overflows for
+each sample. List of captured registers depends on the architecture. This option
+is off by default. It is possible to select the registers to sample using their
+symbolic names, e.g. on x86, ax, si. To list the available registers use
+--intr-regs=\?. To name registers, pass a comma separated list such as
+--intr-regs=ax,bx. The list of register is architecture dependent.
+
+--user-regs::
+Capture user registers at sample time. Same arguments as -I.
+
+--running-time::
+Record running and enabled time for read events (:S)
+
+-k::
+--clockid::
+Sets the clock id to use for the various time fields in the perf_event_type
+records. See clock_gettime(). In particular CLOCK_MONOTONIC and
+CLOCK_MONOTONIC_RAW are supported, some events might also allow
+CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI.
+
+-S::
+--snapshot::
+Select AUX area tracing Snapshot Mode. This option is valid only with an
+AUX area tracing event. Optionally the number of bytes to capture per
+snapshot can be specified. In Snapshot Mode, trace data is captured only when
+signal SIGUSR2 is received.
+
+--proc-map-timeout::
+When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
+because the file may be huge. A time out is needed in such cases.
+This option sets the time out limit. The default value is 500 ms.
+
+--switch-events::
+Record context switch events i.e. events of type PERF_RECORD_SWITCH or
+PERF_RECORD_SWITCH_CPU_WIDE.
+
+--clang-path=PATH::
+Path to clang binary to use for compiling BPF scriptlets.
+(enabled when BPF support is on)
+
+--clang-opt=OPTIONS::
+Options passed to clang when compiling BPF scriptlets.
+(enabled when BPF support is on)
+
+--vmlinux=PATH::
+Specify vmlinux path which has debuginfo.
+(enabled when BPF prologue is on)
+
+--buildid-all::
+Record build-id of all DSOs regardless whether it's actually hit or not.
+
+--all-kernel::
+Configure all used events to run in kernel space.
+
+--all-user::
+Configure all used events to run in user space.
+
+--timestamp-filename
+Append timestamp to output file name.
+
+--timestamp-boundary::
+Record timestamp boundary (time of first/last samples).
+
+--switch-output[=mode]::
+Generate multiple perf.data files, timestamp prefixed, switching to a new one
+based on 'mode' value:
+ "signal" - when receiving a SIGUSR2 (default value) or
+ <size> - when reaching the size threshold, size is expected to
+ be a number with appended unit character - B/K/M/G
+ <time> - when reaching the time threshold, size is expected to
+ be a number with appended unit character - s/m/h/d
+
+ Note: the precision of the size threshold hugely depends
+ on your configuration - the number and size of your ring
+ buffers (-m). It is generally more precise for higher sizes
+ (like >5M), for lower values expect different sizes.
+
+A possible use case is to, given an external event, slice the perf.data file
+that gets then processed, possibly via a perf script, to decide if that
+particular perf.data snapshot should be kept or not.
+
+Implies --timestamp-filename, --no-buildid and --no-buildid-cache.
+The reason for the latter two is to reduce the data file switching
+overhead. You can still switch them on with:
+
+ --switch-output --no-no-buildid --no-no-buildid-cache
+
+--dry-run::
+Parse options then exit. --dry-run can be used to detect errors in cmdline
+options.
+
+'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj
+in config file is set to true.
+
+--tail-synthesize::
+Instead of collecting non-sample events (for example, fork, comm, mmap) at
+the beginning of record, collect them during finalizing an output file.
+The collected non-sample events reflects the status of the system when
+record is finished.
+
+--overwrite::
+Makes all events use an overwritable ring buffer. An overwritable ring
+buffer works like a flight recorder: when it gets full, the kernel will
+overwrite the oldest records, that thus will never make it to the
+perf.data file.
+
+When '--overwrite' and '--switch-output' are used perf records and drops
+events until it receives a signal, meaning that something unusual was
+detected that warrants taking a snapshot of the most current events,
+those fitting in the ring buffer at that moment.
+
+'overwrite' attribute can also be set or canceled for an event using
+config terms. For example: 'cycles/overwrite/' and 'instructions/no-overwrite/'.
+
+Implies --tail-synthesize.
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
new file mode 100644
index 000000000..474a4941f
--- /dev/null
+++ b/tools/perf/Documentation/perf-report.txt
@@ -0,0 +1,493 @@
+perf-report(1)
+==============
+
+NAME
+----
+perf-report - Read perf.data (created by perf record) and display the profile
+
+SYNOPSIS
+--------
+[verse]
+'perf report' [-i <file> | --input=file]
+
+DESCRIPTION
+-----------
+This command displays the performance counter profile information recorded
+via perf record.
+
+OPTIONS
+-------
+-i::
+--input=::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-v::
+--verbose::
+ Be more verbose. (show symbol address, etc)
+
+-q::
+--quiet::
+ Do not show any message. (Suppress -v)
+
+-n::
+--show-nr-samples::
+ Show the number of samples for each symbol
+
+--show-cpu-utilization::
+ Show sample percentage for different cpu modes.
+
+-T::
+--threads::
+ Show per-thread event counters. The input data file should be recorded
+ with -s option.
+-c::
+--comms=::
+ Only consider symbols in these comms. CSV that understands
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
+--pid=::
+ Only show events for given process ID (comma separated list).
+
+--tid=::
+ Only show events for given thread ID (comma separated list).
+-d::
+--dsos=::
+ Only consider symbols in these dsos. CSV that understands
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
+-S::
+--symbols=::
+ Only consider these symbols. CSV that understands
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
+
+--symbol-filter=::
+ Only show symbols that match (partially) with this filter.
+
+-U::
+--hide-unresolved::
+ Only display entries resolved to a symbol.
+
+-s::
+--sort=::
+ Sort histogram entries by given key(s) - multiple keys can be specified
+ in CSV format. Following sort keys are available:
+ pid, comm, dso, symbol, parent, cpu, socket, srcline, weight,
+ local_weight, cgroup_id.
+
+ Each key has following meaning:
+
+ - comm: command (name) of the task which can be read via /proc/<pid>/comm
+ - pid: command and tid of the task
+ - dso: name of library or module executed at the time of sample
+ - dso_size: size of library or module executed at the time of sample
+ - symbol: name of function executed at the time of sample
+ - symbol_size: size of function executed at the time of sample
+ - parent: name of function matched to the parent regex filter. Unmatched
+ entries are displayed as "[other]".
+ - cpu: cpu number the task ran at the time of sample
+ - socket: processor socket number the task ran at the time of sample
+ - srcline: filename and line number executed at the time of sample. The
+ DWARF debugging info must be provided.
+ - srcfile: file name of the source file of the same. Requires dwarf
+ information.
+ - weight: Event specific weight, e.g. memory latency or transaction
+ abort cost. This is the global weight.
+ - local_weight: Local weight version of the weight above.
+ - cgroup_id: ID derived from cgroup namespace device and inode numbers.
+ - transaction: Transaction abort flags.
+ - overhead: Overhead percentage of sample
+ - overhead_sys: Overhead percentage of sample running in system mode
+ - overhead_us: Overhead percentage of sample running in user mode
+ - overhead_guest_sys: Overhead percentage of sample running in system mode
+ on guest machine
+ - overhead_guest_us: Overhead percentage of sample running in user mode on
+ guest machine
+ - sample: Number of sample
+ - period: Raw number of event count of sample
+
+ By default, comm, dso and symbol keys are used.
+ (i.e. --sort comm,dso,symbol)
+
+ If --branch-stack option is used, following sort keys are also
+ available:
+
+ - dso_from: name of library or module branched from
+ - dso_to: name of library or module branched to
+ - symbol_from: name of function branched from
+ - symbol_to: name of function branched to
+ - srcline_from: source file and line branched from
+ - srcline_to: source file and line branched to
+ - mispredict: "N" for predicted branch, "Y" for mispredicted branch
+ - in_tx: branch in TSX transaction
+ - abort: TSX transaction abort.
+ - cycles: Cycles in basic block
+
+ And default sort keys are changed to comm, dso_from, symbol_from, dso_to
+ and symbol_to, see '--branch-stack'.
+
+ If the --mem-mode option is used, the following sort keys are also available
+ (incompatible with --branch-stack):
+ symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
+
+ - symbol_daddr: name of data symbol being executed on at the time of sample
+ - dso_daddr: name of library or module containing the data being executed
+ on at the time of the sample
+ - locked: whether the bus was locked at the time of the sample
+ - tlb: type of tlb access for the data at the time of the sample
+ - mem: type of memory access for the data at the time of the sample
+ - snoop: type of snoop (if any) for the data at the time of the sample
+ - dcacheline: the cacheline the data address is on at the time of the sample
+ - phys_daddr: physical address of data being executed on at the time of sample
+
+ And the default sort keys are changed to local_weight, mem, sym, dso,
+ symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
+
+ If the data file has tracepoint event(s), following (dynamic) sort keys
+ are also available:
+ trace, trace_fields, [<event>.]<field>[/raw]
+
+ - trace: pretty printed trace output in a single column
+ - trace_fields: fields in tracepoints in separate columns
+ - <field name>: optional event and field name for a specific field
+
+ The last form consists of event and field names. If event name is
+ omitted, it searches all events for matching field name. The matched
+ field will be shown only for the event has the field. The event name
+ supports substring match so user doesn't need to specify full subsystem
+ and event name everytime. For example, 'sched:sched_switch' event can
+ be shortened to 'switch' as long as it's not ambiguous. Also event can
+ be specified by its index (starting from 1) preceded by the '%'.
+ So '%1' is the first event, '%2' is the second, and so on.
+
+ The field name can have '/raw' suffix which disables pretty printing
+ and shows raw field value like hex numbers. The --raw-trace option
+ has the same effect for all dynamic sort keys.
+
+ The default sort keys are changed to 'trace' if all events in the data
+ file are tracepoint.
+
+-F::
+--fields=::
+ Specify output field - multiple keys can be specified in CSV format.
+ Following fields are available:
+ overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+ Also it can contain any sort key(s).
+
+ By default, every sort keys not specified in -F will be appended
+ automatically.
+
+ If the keys starts with a prefix '+', then it will append the specified
+ field(s) to the default field order. For example: perf report -F +period,sample.
+
+-p::
+--parent=<regex>::
+ A regex filter to identify parent. The parent is a caller of this
+ function and searched through the callchain, thus it requires callchain
+ information recorded. The pattern is in the extended regex format and
+ defaults to "\^sys_|^do_page_fault", see '--sort parent'.
+
+-x::
+--exclude-other::
+ Only display entries with parent-match.
+
+-w::
+--column-widths=<width[,width...]>::
+ Force each column width to the provided list, for large terminal
+ readability. 0 means no limit (default behavior).
+
+-t::
+--field-separator=::
+ Use a special separator character and don't pad with spaces, replacing
+ all occurrences of this separator in symbol names (and other output)
+ with a '.' character, that thus it's the only non valid separator.
+
+-D::
+--dump-raw-trace::
+ Dump raw trace in ASCII.
+
+-g::
+--call-graph=<print_type,threshold[,print_limit],order,sort_key[,branch],value>::
+ Display call chains using type, min percent threshold, print limit,
+ call order, sort key, optional branch and value. Note that ordering
+ is not fixed so any parameter can be given in an arbitrary order.
+ One exception is the print_limit which should be preceded by threshold.
+
+ print_type can be either:
+ - flat: single column, linear exposure of call chains.
+ - graph: use a graph tree, displaying absolute overhead rates. (default)
+ - fractal: like graph, but displays relative rates. Each branch of
+ the tree is considered as a new profiled object.
+ - folded: call chains are displayed in a line, separated by semicolons
+ - none: disable call chain display.
+
+ threshold is a percentage value which specifies a minimum percent to be
+ included in the output call graph. Default is 0.5 (%).
+
+ print_limit is only applied when stdio interface is used. It's to limit
+ number of call graph entries in a single hist entry. Note that it needs
+ to be given after threshold (but not necessarily consecutive).
+ Default is 0 (unlimited).
+
+ order can be either:
+ - callee: callee based call graph.
+ - caller: inverted caller based call graph.
+ Default is 'caller' when --children is used, otherwise 'callee'.
+
+ sort_key can be:
+ - function: compare on functions (default)
+ - address: compare on individual code addresses
+ - srcline: compare on source filename and line number
+
+ branch can be:
+ - branch: include last branch information in callgraph when available.
+ Usually more convenient to use --branch-history for this.
+
+ value can be:
+ - percent: diplay overhead percent (default)
+ - period: display event period
+ - count: display event count
+
+--children::
+ Accumulate callchain of children to parent entry so that then can
+ show up in the output. The output will have a new "Children" column
+ and will be sorted on the data. It requires callchains are recorded.
+ See the `overhead calculation' section for more details. Enabled by
+ default, disable with --no-children.
+
+--max-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ beyond the specified depth will be ignored. This is a trade-off
+ between information loss and faster processing especially for
+ workloads that can have a very long callchain stack.
+ Note that when using the --itrace option the synthesized callchain size
+ will override this value if the synthesized callchain size is bigger.
+
+ Default: 127
+
+-G::
+--inverted::
+ alias for inverted caller based call graph.
+
+--ignore-callees=<regex>::
+ Ignore callees of the function(s) matching the given regex.
+ This has the effect of collecting the callers of each such
+ function into one place in the call-graph tree.
+
+--pretty=<key>::
+ Pretty printing style. key: normal, raw
+
+--stdio:: Use the stdio interface.
+
+--stdio-color::
+ 'always', 'never' or 'auto', allowing configuring color output
+ via the command line, in addition to via "color.ui" .perfconfig.
+ Use '--stdio-color always' to generate color even when redirecting
+ to a pipe or file. Using just '--stdio-color' is equivalent to
+ using 'always'.
+
+--tui:: Use the TUI interface, that is integrated with annotate and allows
+ zooming into DSOs or threads, among other features. Use of --tui
+ requires a tty, if one is not present, as when piping to other
+ commands, the stdio interface is used.
+
+--gtk:: Use the GTK2 interface.
+
+-k::
+--vmlinux=<file>::
+ vmlinux pathname
+
+--ignore-vmlinux::
+ Ignore vmlinux files.
+
+--kallsyms=<file>::
+ kallsyms pathname
+
+-m::
+--modules::
+ Load module symbols. WARNING: This should only be used with -k and
+ a LIVE kernel.
+
+-f::
+--force::
+ Don't do ownership validation.
+
+--symfs=<directory>::
+ Look for files with symbols relative to this directory.
+
+-C::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+ be provided as a comma-separated list with no space: 0,1. Ranges of
+ CPUs are specified with -: 0-2. Default is to report samples on all
+ CPUs.
+
+-M::
+--disassembler-style=:: Set disassembler style for objdump.
+
+--source::
+ Interleave source code with assembly code. Enabled by default,
+ disable with --no-source.
+
+--asm-raw::
+ Show raw instruction encoding of assembly instructions.
+
+--show-total-period:: Show a column with the sum of periods.
+
+-I::
+--show-info::
+ Display extended information about the perf.data file. This adds
+ information which may be very large and thus may clutter the display.
+ It currently includes: cpu and numa topology of the host system.
+
+-b::
+--branch-stack::
+ Use the addresses of sampled taken branches instead of the instruction
+ address to build the histograms. To generate meaningful output, the
+ perf.data file must have been obtained using perf record -b or
+ perf record --branch-filter xxx where xxx is a branch filter option.
+ perf report is able to auto-detect whether a perf.data file contains
+ branch stacks and it will automatically switch to the branch view mode,
+ unless --no-branch-stack is used.
+
+--branch-history::
+ Add the addresses of sampled taken branches to the callstack.
+ This allows to examine the path the program took to each sample.
+ The data collection must have used -b (or -j) and -g.
+
+--objdump=<path>::
+ Path to objdump binary.
+
+--group::
+ Show event group information together. It forces group output also
+ if there are no groups defined in data file.
+
+--demangle::
+ Demangle symbol names to human readable form. It's enabled by default,
+ disable with --no-demangle.
+
+--demangle-kernel::
+ Demangle kernel symbol names to human readable form (for C++ kernels).
+
+--mem-mode::
+ Use the data addresses of samples in addition to instruction addresses
+ to build the histograms. To generate meaningful output, the perf.data
+ file must have been obtained using perf record -d -W and using a
+ special event -e cpu/mem-loads/p or -e cpu/mem-stores/p. See
+ 'perf mem' for simpler access.
+
+--percent-limit::
+ Do not show entries which have an overhead under that percent.
+ (Default: 0). Note that this option also sets the percent limit (threshold)
+ of callchains. However the default value of callchain threshold is
+ different than the default value of hist entries. Please see the
+ --call-graph option for details.
+
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options and
+ Zoom operations on the TUI (thread, dso, etc).
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
+--header::
+ Show header information in the perf.data file. This includes
+ various information like hostname, OS and perf version, cpu/mem
+ info, perf command line, event list and so on. Currently only
+ --stdio output supports this feature.
+
+--header-only::
+ Show only perf.data header (forces --stdio).
+
+--time::
+ Only analyze samples within given time window: <start>,<stop>. Times
+ have the format seconds.microseconds. If start is not given (i.e., time
+ string is ',x.y') then analysis starts at the beginning of the file. If
+ stop time is not given (i.e, time string is 'x.y,') then analysis goes
+ to end of file.
+
+ Also support time percent with multiple time range. Time string is
+ 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
+
+ For example:
+ Select the second 10% time slice:
+
+ perf report --time 10%/2
+
+ Select from 0% to 10% time slice:
+
+ perf report --time 0%-10%
+
+ Select the first and second 10% time slices:
+
+ perf report --time 10%/1,10%/2
+
+ Select from 0% to 10% and 30% to 40% slices:
+
+ perf report --time 0%-10%,30%-40%
+
+--itrace::
+ Options for decoding instruction tracing data. The options are:
+
+include::itrace.txt[]
+
+ To disable decoding entirely, use --no-itrace.
+
+--full-source-path::
+ Show the full path for source files for srcline output.
+
+--show-ref-call-graph::
+ When multiple events are sampled, it may not be needed to collect
+ callgraphs for all of them. The sample sites are usually nearby,
+ and it's enough to collect the callgraphs on a reference event.
+ So user can use "call-graph=no" event modifier to disable callgraph
+ for other events to reduce the overhead.
+ However, perf report cannot show callgraphs for the event which
+ disable the callgraph.
+ This option extends the perf report to show reference callgraphs,
+ which collected by reference event, in no callgraph event.
+
+--socket-filter::
+ Only report the samples on the processor socket that match with this filter
+
+--raw-trace::
+ When displaying traceevent output, do not use print fmt or plugins.
+
+--hierarchy::
+ Enable hierarchical output.
+
+--inline::
+ If a callgraph address belongs to an inlined function, the inline stack
+ will be printed. Each entry is function name or file/line. Enabled by
+ default, disable with --no-inline.
+
+--mmaps::
+ Show --tasks output plus mmap information in a format similar to
+ /proc/<PID>/maps.
+
+ Please note that not all mmaps are stored, options affecting which ones
+ are include 'perf record --data', for instance.
+
+--stats::
+ Display overall events statistics without any further processing.
+ (like the one at the end of the perf report -D command)
+
+--tasks::
+ Display monitored tasks stored in perf data. Displaying pid/tid/ppid
+ plus the command string aligned to distinguish parent and child tasks.
+
+--percent-type::
+ Set annotation percent type from following choices:
+ global-period, local-period, global-hits, local-hits
+
+ The local/global keywords set if the percentage is computed
+ in the scope of the function (local) or the whole data (global).
+ The period/hits keywords set the base the percentage is computed
+ on - the samples period or the number of samples (hits).
+
+include::callchain-overhead-calculation.txt[]
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
new file mode 100644
index 000000000..63f938b88
--- /dev/null
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -0,0 +1,167 @@
+perf-sched(1)
+=============
+
+NAME
+----
+perf-sched - Tool to trace/measure scheduler properties (latencies)
+
+SYNOPSIS
+--------
+[verse]
+'perf sched' {record|latency|map|replay|script|timehist}
+
+DESCRIPTION
+-----------
+There are several variants of 'perf sched':
+
+ 'perf sched record <command>' to record the scheduling events
+ of an arbitrary workload.
+
+ 'perf sched latency' to report the per task scheduling latencies
+ and other scheduling properties of the workload.
+
+ 'perf sched script' to see a detailed trace of the workload that
+ was recorded (aliased to 'perf script' for now).
+
+ 'perf sched replay' to simulate the workload that was recorded
+ via perf sched record. (this is done by starting up mockup threads
+ that mimic the workload based on the events in the trace. These
+ threads can then replay the timings (CPU runtime and sleep patterns)
+ of the workload as it occurred when it was recorded - and can repeat
+ it a number of times, measuring its performance.)
+
+ 'perf sched map' to print a textual context-switching outline of
+ workload captured via perf sched record. Columns stand for
+ individual CPUs, and the two-letter shortcuts stand for tasks that
+ are running on a CPU. A '*' denotes the CPU that had the event, and
+ a dot signals an idle CPU.
+
+ 'perf sched timehist' provides an analysis of scheduling events.
+
+ Example usage:
+ perf sched record -- sleep 1
+ perf sched timehist
+
+ By default it shows the individual schedule events, including the wait
+ time (time between sched-out and next sched-in events for the task), the
+ task scheduling delay (time between wakeup and actually running) and run
+ time for the task:
+
+ time cpu task name wait time sch delay run time
+ [tid/pid] (msec) (msec) (msec)
+ -------------- ------ -------------------- --------- --------- ---------
+ 79371.874569 [0011] gcc[31949] 0.014 0.000 1.148
+ 79371.874591 [0010] gcc[31951] 0.000 0.000 0.024
+ 79371.874603 [0010] migration/10[59] 3.350 0.004 0.011
+ 79371.874604 [0011] <idle> 1.148 0.000 0.035
+ 79371.874723 [0005] <idle> 0.016 0.000 1.383
+ 79371.874746 [0005] gcc[31949] 0.153 0.078 0.022
+ ...
+
+ Times are in msec.usec.
+
+OPTIONS
+-------
+-i::
+--input=<file>::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-v::
+--verbose::
+ Be more verbose. (show symbol address, etc)
+
+-D::
+--dump-raw-trace=::
+ Display verbose dump of the sched data.
+
+-f::
+--force::
+ Don't complain, do it.
+
+OPTIONS for 'perf sched map'
+----------------------------
+
+--compact::
+ Show only CPUs with activity. Helps visualizing on high core
+ count systems.
+
+--cpus::
+ Show just entries with activities for the given CPUs.
+
+--color-cpus::
+ Highlight the given cpus.
+
+--color-pids::
+ Highlight the given pids.
+
+OPTIONS for 'perf sched timehist'
+---------------------------------
+-k::
+--vmlinux=<file>::
+ vmlinux pathname
+
+--kallsyms=<file>::
+ kallsyms pathname
+
+-g::
+--call-graph::
+ Display call chains if present (default on).
+
+--max-stack::
+ Maximum number of functions to display in backtrace, default 5.
+
+-p=::
+--pid=::
+ Only show events for given process ID (comma separated list).
+
+-t=::
+--tid=::
+ Only show events for given thread ID (comma separated list).
+
+-s::
+--summary::
+ Show only a summary of scheduling by thread with min, max, and average
+ run times (in sec) and relative stddev.
+
+-S::
+--with-summary::
+ Show all scheduling events followed by a summary by thread with min,
+ max, and average run times (in sec) and relative stddev.
+
+--symfs=<directory>::
+ Look for files with symbols relative to this directory.
+
+-V::
+--cpu-visual::
+ Show visual aid for sched switches by CPU: 'i' marks idle time,
+ 's' are scheduler events.
+
+-w::
+--wakeups::
+ Show wakeup events.
+
+-M::
+--migrations::
+ Show migration events.
+
+-n::
+--next::
+ Show next task.
+
+-I::
+--idle-hist::
+ Show idle-related events only.
+
+--time::
+ Only analyze samples within given time window: <start>,<stop>. Times
+ have the format seconds.microseconds. If start is not given (i.e., time
+ string is ',x.y') then analysis starts at the beginning of the file. If
+ stop time is not given (i.e, time string is 'x.y,') then analysis goes
+ to end of file.
+
+--state::
+ Show task state when it switched out.
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
new file mode 100644
index 000000000..5a1f68122
--- /dev/null
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -0,0 +1,216 @@
+perf-script-perl(1)
+===================
+
+NAME
+----
+perf-script-perl - Process trace data with a Perl script
+
+SYNOPSIS
+--------
+[verse]
+'perf script' [-s [Perl]:script[.pl] ]
+
+DESCRIPTION
+-----------
+
+This perf script option is used to process perf script data using perf's
+built-in Perl interpreter. It reads and processes the input file and
+displays the results of the trace analysis implemented in the given
+Perl script, if any.
+
+STARTER SCRIPTS
+---------------
+
+You can avoid reading the rest of this document by running 'perf script
+-g perl' in the same directory as an existing perf.data trace file.
+That will generate a starter script containing a handler for each of
+the event types in the trace file; it simply prints every available
+field for each event in the trace file.
+
+You can also look at the existing scripts in
+~/libexec/perf-core/scripts/perl for typical examples showing how to
+do basic things like aggregate event data, print results, etc. Also,
+the check-perf-script.pl script, while not interesting for its results,
+attempts to exercise all of the main scripting features.
+
+EVENT HANDLERS
+--------------
+
+When perf script is invoked using a trace script, a user-defined
+'handler function' is called for each event in the trace. If there's
+no handler function defined for a given event type, the event is
+ignored (or passed to a 'trace_unhandled' function, see below) and the
+next event is processed.
+
+Most of the event's field values are passed as arguments to the
+handler function; some of the less common ones aren't - those are
+available as calls back into the perf executable (see below).
+
+As an example, the following perf record command can be used to record
+all sched_wakeup events in the system:
+
+ # perf record -a -e sched:sched_wakeup
+
+Traces meant to be processed using a script should be recorded with
+the above option: -a to enable system-wide collection.
+
+The format file for the sched_wakep event defines the following fields
+(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
+
+----
+ format:
+ field:unsigned short common_type;
+ field:unsigned char common_flags;
+ field:unsigned char common_preempt_count;
+ field:int common_pid;
+
+ field:char comm[TASK_COMM_LEN];
+ field:pid_t pid;
+ field:int prio;
+ field:int success;
+ field:int target_cpu;
+----
+
+The handler function for this event would be defined as:
+
+----
+sub sched::sched_wakeup
+{
+ my ($event_name, $context, $common_cpu, $common_secs,
+ $common_nsecs, $common_pid, $common_comm,
+ $comm, $pid, $prio, $success, $target_cpu) = @_;
+}
+----
+
+The handler function takes the form subsystem::event_name.
+
+The $common_* arguments in the handler's argument list are the set of
+arguments passed to all event handlers; some of the fields correspond
+to the common_* fields in the format file, but some are synthesized,
+and some of the common_* fields aren't common enough to to be passed
+to every event as arguments but are available as library functions.
+
+Here's a brief description of each of the invariant event args:
+
+ $event_name the name of the event as text
+ $context an opaque 'cookie' used in calls back into perf
+ $common_cpu the cpu the event occurred on
+ $common_secs the secs portion of the event timestamp
+ $common_nsecs the nsecs portion of the event timestamp
+ $common_pid the pid of the current task
+ $common_comm the name of the current process
+
+All of the remaining fields in the event's format file have
+counterparts as handler function arguments of the same name, as can be
+seen in the example above.
+
+The above provides the basics needed to directly access every field of
+every event in a trace, which covers 90% of what you need to know to
+write a useful trace script. The sections below cover the rest.
+
+SCRIPT LAYOUT
+-------------
+
+Every perf script Perl script should start by setting up a Perl module
+search path and 'use'ing a few support modules (see module
+descriptions below):
+
+----
+ use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+ use lib "./Perf-Trace-Util/lib";
+ use Perf::Trace::Core;
+ use Perf::Trace::Context;
+ use Perf::Trace::Util;
+----
+
+The rest of the script can contain handler functions and support
+functions in any order.
+
+Aside from the event handler functions discussed above, every script
+can implement a set of optional functions:
+
+*trace_begin*, if defined, is called before any event is processed and
+gives scripts a chance to do setup tasks:
+
+----
+ sub trace_begin
+ {
+ }
+----
+
+*trace_end*, if defined, is called after all events have been
+ processed and gives scripts a chance to do end-of-script tasks, such
+ as display results:
+
+----
+sub trace_end
+{
+}
+----
+
+*trace_unhandled*, if defined, is called after for any event that
+ doesn't have a handler explicitly defined for it. The standard set
+ of common arguments are passed into it:
+
+----
+sub trace_unhandled
+{
+ my ($event_name, $context, $common_cpu, $common_secs,
+ $common_nsecs, $common_pid, $common_comm) = @_;
+}
+----
+
+The remaining sections provide descriptions of each of the available
+built-in perf script Perl modules and their associated functions.
+
+AVAILABLE MODULES AND FUNCTIONS
+-------------------------------
+
+The following sections describe the functions and variables available
+via the various Perf::Trace::* Perl modules. To use the functions and
+variables from the given module, add the corresponding 'use
+Perf::Trace::XXX' line to your perf script script.
+
+Perf::Trace::Core Module
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+These functions provide some essential functions to user scripts.
+
+The *flag_str* and *symbol_str* functions provide human-readable
+strings for flag and symbolic fields. These correspond to the strings
+and values parsed from the 'print fmt' fields of the event format
+files:
+
+ flag_str($event_name, $field_name, $field_value) - returns the string representation corresponding to $field_value for the flag field $field_name of event $event_name
+ symbol_str($event_name, $field_name, $field_value) - returns the string representation corresponding to $field_value for the symbolic field $field_name of event $event_name
+
+Perf::Trace::Context Module
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some of the 'common' fields in the event format file aren't all that
+common, but need to be made accessible to user scripts nonetheless.
+
+Perf::Trace::Context defines a set of functions that can be used to
+access this data in the context of the current event. Each of these
+functions expects a $context variable, which is the same as the
+$context variable passed into every event handler as the second
+argument.
+
+ common_pc($context) - returns common_preempt count for the current event
+ common_flags($context) - returns common_flags for the current event
+ common_lock_depth($context) - returns common_lock_depth for the current event
+
+Perf::Trace::Util Module
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Various utility functions for use with perf script:
+
+ nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
+ nsecs_secs($nsecs) - returns whole secs portion given nsecs
+ nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs
+ nsecs_str($nsecs) - returns printable string in the form secs.nsecs
+ avg($total, $n) - returns average given a sum and a total number of values
+
+SEE ALSO
+--------
+linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
new file mode 100644
index 000000000..0fb9eda3c
--- /dev/null
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -0,0 +1,641 @@
+perf-script-python(1)
+====================
+
+NAME
+----
+perf-script-python - Process trace data with a Python script
+
+SYNOPSIS
+--------
+[verse]
+'perf script' [-s [Python]:script[.py] ]
+
+DESCRIPTION
+-----------
+
+This perf script option is used to process perf script data using perf's
+built-in Python interpreter. It reads and processes the input file and
+displays the results of the trace analysis implemented in the given
+Python script, if any.
+
+A QUICK EXAMPLE
+---------------
+
+This section shows the process, start to finish, of creating a working
+Python script that aggregates and extracts useful information from a
+raw perf script stream. You can avoid reading the rest of this
+document if an example is enough for you; the rest of the document
+provides more details on each step and lists the library functions
+available to script writers.
+
+This example actually details the steps that were used to create the
+'syscall-counts' script you see when you list the available perf script
+scripts via 'perf script -l'. As such, this script also shows how to
+integrate your script into the list of general-purpose 'perf script'
+scripts listed by that command.
+
+The syscall-counts script is a simple script, but demonstrates all the
+basic ideas necessary to create a useful script. Here's an example
+of its output (syscall names are not yet supported, they will appear
+as numbers):
+
+----
+syscall events:
+
+event count
+---------------------------------------- -----------
+sys_write 455067
+sys_getdents 4072
+sys_close 3037
+sys_swapoff 1769
+sys_read 923
+sys_sched_setparam 826
+sys_open 331
+sys_newfstat 326
+sys_mmap 217
+sys_munmap 216
+sys_futex 141
+sys_select 102
+sys_poll 84
+sys_setitimer 12
+sys_writev 8
+15 8
+sys_lseek 7
+sys_rt_sigprocmask 6
+sys_wait4 3
+sys_ioctl 3
+sys_set_robust_list 1
+sys_exit 1
+56 1
+sys_access 1
+----
+
+Basically our task is to keep a per-syscall tally that gets updated
+every time a system call occurs in the system. Our script will do
+that, but first we need to record the data that will be processed by
+that script. Theoretically, there are a couple of ways we could do
+that:
+
+- we could enable every event under the tracing/events/syscalls
+ directory, but this is over 600 syscalls, well beyond the number
+ allowable by perf. These individual syscall events will however be
+ useful if we want to later use the guidance we get from the
+ general-purpose scripts to drill down and get more detail about
+ individual syscalls of interest.
+
+- we can enable the sys_enter and/or sys_exit syscalls found under
+ tracing/events/raw_syscalls. These are called for all syscalls; the
+ 'id' field can be used to distinguish between individual syscall
+ numbers.
+
+For this script, we only need to know that a syscall was entered; we
+don't care how it exited, so we'll use 'perf record' to record only
+the sys_enter events:
+
+----
+# perf record -a -e raw_syscalls:sys_enter
+
+^C[ perf record: Woken up 1 times to write data ]
+[ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ]
+----
+
+The options basically say to collect data for every syscall event
+system-wide and multiplex the per-cpu output into a single stream.
+That single stream will be recorded in a file in the current directory
+called perf.data.
+
+Once we have a perf.data file containing our data, we can use the -g
+'perf script' option to generate a Python script that will contain a
+callback handler for each event type found in the perf.data trace
+stream (for more details, see the STARTER SCRIPTS section).
+
+----
+# perf script -g python
+generated Python script: perf-script.py
+
+The output file created also in the current directory is named
+perf-script.py. Here's the file in its entirety:
+
+# perf script event handlers, generated by perf script -g python
+# Licensed under the terms of the GNU GPL License version 2
+
+# The common_* event handler fields are the most useful fields common to
+# all events. They don't necessarily correspond to the 'common_*' fields
+# in the format files. Those fields not available as handler params can
+# be retrieved using Python functions of the form common_*(context).
+# See the perf-script-python Documentation for the list of available functions.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+def trace_begin():
+ print "in trace_begin"
+
+def trace_end():
+ print "in trace_end"
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ id, args):
+ print_header(event_name, common_cpu, common_secs, common_nsecs,
+ common_pid, common_comm)
+
+ print "id=%d, args=%s\n" % \
+ (id, args),
+
+def trace_unhandled(event_name, context, event_fields_dict):
+ print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])
+
+def print_header(event_name, cpu, secs, nsecs, pid, comm):
+ print "%-20s %5u %05u.%09u %8u %-20s " % \
+ (event_name, cpu, secs, nsecs, pid, comm),
+----
+
+At the top is a comment block followed by some import statements and a
+path append which every perf script script should include.
+
+Following that are a couple generated functions, trace_begin() and
+trace_end(), which are called at the beginning and the end of the
+script respectively (for more details, see the SCRIPT_LAYOUT section
+below).
+
+Following those are the 'event handler' functions generated one for
+every event in the 'perf record' output. The handler functions take
+the form subsystem__event_name, and contain named parameters, one for
+each field in the event; in this case, there's only one event,
+raw_syscalls__sys_enter(). (see the EVENT HANDLERS section below for
+more info on event handlers).
+
+The final couple of functions are, like the begin and end functions,
+generated for every script. The first, trace_unhandled(), is called
+every time the script finds an event in the perf.data file that
+doesn't correspond to any event handler in the script. This could
+mean either that the record step recorded event types that it wasn't
+really interested in, or the script was run against a trace file that
+doesn't correspond to the script.
+
+The script generated by -g option simply prints a line for each
+event found in the trace stream i.e. it basically just dumps the event
+and its parameter values to stdout. The print_header() function is
+simply a utility function used for that purpose. Let's rename the
+script and run it to see the default output:
+
+----
+# mv perf-script.py syscall-counts.py
+# perf script -s syscall-counts.py
+
+raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args=
+raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args=
+raw_syscalls__sys_enter 1 00840.847620860 7506 perf id=1, args=
+raw_syscalls__sys_enter 1 00840.847710478 6533 npviewer.bin id=78, args=
+raw_syscalls__sys_enter 1 00840.847719204 6533 npviewer.bin id=142, args=
+raw_syscalls__sys_enter 1 00840.847755445 6533 npviewer.bin id=3, args=
+raw_syscalls__sys_enter 1 00840.847775601 6533 npviewer.bin id=3, args=
+raw_syscalls__sys_enter 1 00840.847781820 6533 npviewer.bin id=3, args=
+.
+.
+.
+----
+
+Of course, for this script, we're not interested in printing every
+trace event, but rather aggregating it in a useful way. So we'll get
+rid of everything to do with printing as well as the trace_begin() and
+trace_unhandled() functions, which we won't be using. That leaves us
+with this minimalistic skeleton:
+
+----
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+def trace_end():
+ print "in trace_end"
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ id, args):
+----
+
+In trace_end(), we'll simply print the results, but first we need to
+generate some results to print. To do that we need to have our
+sys_enter() handler do the necessary tallying until all events have
+been counted. A hash table indexed by syscall id is a good way to
+store that information; every time the sys_enter() handler is called,
+we simply increment a count associated with that hash entry indexed by
+that syscall id:
+
+----
+ syscalls = autodict()
+
+ try:
+ syscalls[id] += 1
+ except TypeError:
+ syscalls[id] = 1
+----
+
+The syscalls 'autodict' object is a special kind of Python dictionary
+(implemented in Core.py) that implements Perl's 'autovivifying' hashes
+in Python i.e. with autovivifying hashes, you can assign nested hash
+values without having to go to the trouble of creating intermediate
+levels if they don't exist e.g syscalls[comm][pid][id] = 1 will create
+the intermediate hash levels and finally assign the value 1 to the
+hash entry for 'id' (because the value being assigned isn't a hash
+object itself, the initial value is assigned in the TypeError
+exception. Well, there may be a better way to do this in Python but
+that's what works for now).
+
+Putting that code into the raw_syscalls__sys_enter() handler, we
+effectively end up with a single-level dictionary keyed on syscall id
+and having the counts we've tallied as values.
+
+The print_syscall_totals() function iterates over the entries in the
+dictionary and displays a line for each entry containing the syscall
+name (the dictionary keys contain the syscall ids, which are passed to
+the Util function syscall_name(), which translates the raw syscall
+numbers to the corresponding syscall name strings). The output is
+displayed after all the events in the trace have been processed, by
+calling the print_syscall_totals() function from the trace_end()
+handler called at the end of script processing.
+
+The final script producing the output shown above is shown in its
+entirety below (syscall_name() helper is not yet available, you can
+only deal with id's for now):
+
+----
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+syscalls = autodict()
+
+def trace_end():
+ print_syscall_totals()
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ id, args):
+ try:
+ syscalls[id] += 1
+ except TypeError:
+ syscalls[id] = 1
+
+def print_syscall_totals():
+ if for_comm is not None:
+ print "\nsyscall events for %s:\n\n" % (for_comm),
+ else:
+ print "\nsyscall events:\n\n",
+
+ print "%-40s %10s\n" % ("event", "count"),
+ print "%-40s %10s\n" % ("----------------------------------------", \
+ "-----------"),
+
+ for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
+ reverse = True):
+ print "%-40s %10d\n" % (syscall_name(id), val),
+----
+
+The script can be run just as before:
+
+ # perf script -s syscall-counts.py
+
+So those are the essential steps in writing and running a script. The
+process can be generalized to any tracepoint or set of tracepoints
+you're interested in - basically find the tracepoint(s) you're
+interested in by looking at the list of available events shown by
+'perf list' and/or look in /sys/kernel/debug/tracing/events/ for
+detailed event and field info, record the corresponding trace data
+using 'perf record', passing it the list of interesting events,
+generate a skeleton script using 'perf script -g python' and modify the
+code to aggregate and display it for your particular needs.
+
+After you've done that you may end up with a general-purpose script
+that you want to keep around and have available for future use. By
+writing a couple of very simple shell scripts and putting them in the
+right place, you can have your script listed alongside the other
+scripts listed by the 'perf script -l' command e.g.:
+
+----
+# perf script -l
+List of available trace scripts:
+ wakeup-latency system-wide min/max/avg wakeup latency
+ rw-by-file <comm> r/w activity for a program, by file
+ rw-by-pid system-wide r/w activity
+----
+
+A nice side effect of doing this is that you also then capture the
+probably lengthy 'perf record' command needed to record the events for
+the script.
+
+To have the script appear as a 'built-in' script, you write two simple
+scripts, one for recording and one for 'reporting'.
+
+The 'record' script is a shell script with the same base name as your
+script, but with -record appended. The shell script should be put
+into the perf/scripts/python/bin directory in the kernel source tree.
+In that script, you write the 'perf record' command-line needed for
+your script:
+
+----
+# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record
+
+#!/bin/bash
+perf record -a -e raw_syscalls:sys_enter
+----
+
+The 'report' script is also a shell script with the same base name as
+your script, but with -report appended. It should also be located in
+the perf/scripts/python/bin directory. In that script, you write the
+'perf script -s' command-line needed for running your script:
+
+----
+# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
+
+#!/bin/bash
+# description: system-wide syscall counts
+perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py
+----
+
+Note that the location of the Python script given in the shell script
+is in the libexec/perf-core/scripts/python directory - this is where
+the script will be copied by 'make install' when you install perf.
+For the installation to install your script there, your script needs
+to be located in the perf/scripts/python directory in the kernel
+source tree:
+
+----
+# ls -al kernel-source/tools/perf/scripts/python
+total 32
+drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
+drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
+drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
+-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py
+drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util
+-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
+----
+
+Once you've done that (don't forget to do a new 'make install',
+otherwise your script won't show up at run-time), 'perf script -l'
+should show a new entry for your script:
+
+----
+# perf script -l
+List of available trace scripts:
+ wakeup-latency system-wide min/max/avg wakeup latency
+ rw-by-file <comm> r/w activity for a program, by file
+ rw-by-pid system-wide r/w activity
+ syscall-counts system-wide syscall counts
+----
+
+You can now perform the record step via 'perf script record':
+
+ # perf script record syscall-counts
+
+and display the output using 'perf script report':
+
+ # perf script report syscall-counts
+
+STARTER SCRIPTS
+---------------
+
+You can quickly get started writing a script for a particular set of
+trace data by generating a skeleton script using 'perf script -g
+python' in the same directory as an existing perf.data trace file.
+That will generate a starter script containing a handler for each of
+the event types in the trace file; it simply prints every available
+field for each event in the trace file.
+
+You can also look at the existing scripts in
+~/libexec/perf-core/scripts/python for typical examples showing how to
+do basic things like aggregate event data, print results, etc. Also,
+the check-perf-script.py script, while not interesting for its results,
+attempts to exercise all of the main scripting features.
+
+EVENT HANDLERS
+--------------
+
+When perf script is invoked using a trace script, a user-defined
+'handler function' is called for each event in the trace. If there's
+no handler function defined for a given event type, the event is
+ignored (or passed to a 'trace_unhandled' function, see below) and the
+next event is processed.
+
+Most of the event's field values are passed as arguments to the
+handler function; some of the less common ones aren't - those are
+available as calls back into the perf executable (see below).
+
+As an example, the following perf record command can be used to record
+all sched_wakeup events in the system:
+
+ # perf record -a -e sched:sched_wakeup
+
+Traces meant to be processed using a script should be recorded with
+the above option: -a to enable system-wide collection.
+
+The format file for the sched_wakep event defines the following fields
+(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
+
+----
+ format:
+ field:unsigned short common_type;
+ field:unsigned char common_flags;
+ field:unsigned char common_preempt_count;
+ field:int common_pid;
+
+ field:char comm[TASK_COMM_LEN];
+ field:pid_t pid;
+ field:int prio;
+ field:int success;
+ field:int target_cpu;
+----
+
+The handler function for this event would be defined as:
+
+----
+def sched__sched_wakeup(event_name, context, common_cpu, common_secs,
+ common_nsecs, common_pid, common_comm,
+ comm, pid, prio, success, target_cpu):
+ pass
+----
+
+The handler function takes the form subsystem__event_name.
+
+The common_* arguments in the handler's argument list are the set of
+arguments passed to all event handlers; some of the fields correspond
+to the common_* fields in the format file, but some are synthesized,
+and some of the common_* fields aren't common enough to to be passed
+to every event as arguments but are available as library functions.
+
+Here's a brief description of each of the invariant event args:
+
+ event_name the name of the event as text
+ context an opaque 'cookie' used in calls back into perf
+ common_cpu the cpu the event occurred on
+ common_secs the secs portion of the event timestamp
+ common_nsecs the nsecs portion of the event timestamp
+ common_pid the pid of the current task
+ common_comm the name of the current process
+
+All of the remaining fields in the event's format file have
+counterparts as handler function arguments of the same name, as can be
+seen in the example above.
+
+The above provides the basics needed to directly access every field of
+every event in a trace, which covers 90% of what you need to know to
+write a useful trace script. The sections below cover the rest.
+
+SCRIPT LAYOUT
+-------------
+
+Every perf script Python script should start by setting up a Python
+module search path and 'import'ing a few support modules (see module
+descriptions below):
+
+----
+ import os
+ import sys
+
+ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+ from perf_trace_context import *
+ from Core import *
+----
+
+The rest of the script can contain handler functions and support
+functions in any order.
+
+Aside from the event handler functions discussed above, every script
+can implement a set of optional functions:
+
+*trace_begin*, if defined, is called before any event is processed and
+gives scripts a chance to do setup tasks:
+
+----
+def trace_begin():
+ pass
+----
+
+*trace_end*, if defined, is called after all events have been
+ processed and gives scripts a chance to do end-of-script tasks, such
+ as display results:
+
+----
+def trace_end():
+ pass
+----
+
+*trace_unhandled*, if defined, is called after for any event that
+ doesn't have a handler explicitly defined for it. The standard set
+ of common arguments are passed into it:
+
+----
+def trace_unhandled(event_name, context, event_fields_dict):
+ pass
+----
+
+The remaining sections provide descriptions of each of the available
+built-in perf script Python modules and their associated functions.
+
+AVAILABLE MODULES AND FUNCTIONS
+-------------------------------
+
+The following sections describe the functions and variables available
+via the various perf script Python modules. To use the functions and
+variables from the given module, add the corresponding 'from XXXX
+import' line to your perf script script.
+
+Core.py Module
+~~~~~~~~~~~~~~
+
+These functions provide some essential functions to user scripts.
+
+The *flag_str* and *symbol_str* functions provide human-readable
+strings for flag and symbolic fields. These correspond to the strings
+and values parsed from the 'print fmt' fields of the event format
+files:
+
+ flag_str(event_name, field_name, field_value) - returns the string representation corresponding to field_value for the flag field field_name of event event_name
+ symbol_str(event_name, field_name, field_value) - returns the string representation corresponding to field_value for the symbolic field field_name of event event_name
+
+The *autodict* function returns a special kind of Python
+dictionary that implements Perl's 'autovivifying' hashes in Python
+i.e. with autovivifying hashes, you can assign nested hash values
+without having to go to the trouble of creating intermediate levels if
+they don't exist.
+
+ autodict() - returns an autovivifying dictionary instance
+
+
+perf_trace_context Module
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some of the 'common' fields in the event format file aren't all that
+common, but need to be made accessible to user scripts nonetheless.
+
+perf_trace_context defines a set of functions that can be used to
+access this data in the context of the current event. Each of these
+functions expects a context variable, which is the same as the
+context variable passed into every event handler as the second
+argument.
+
+ common_pc(context) - returns common_preempt count for the current event
+ common_flags(context) - returns common_flags for the current event
+ common_lock_depth(context) - returns common_lock_depth for the current event
+
+Util.py Module
+~~~~~~~~~~~~~~
+
+Various utility functions for use with perf script:
+
+ nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
+ nsecs_secs(nsecs) - returns whole secs portion given nsecs
+ nsecs_nsecs(nsecs) - returns nsecs remainder given nsecs
+ nsecs_str(nsecs) - returns printable string in the form secs.nsecs
+ avg(total, n) - returns average given a sum and a total number of values
+
+SUPPORTED FIELDS
+----------------
+
+Currently supported fields:
+
+ev_name, comm, pid, tid, cpu, ip, time, period, phys_addr, addr,
+symbol, dso, time_enabled, time_running, values, callchain,
+brstack, brstacksym, datasrc, datasrc_decode, iregs, uregs,
+weight, transaction, raw_buf, attr.
+
+Some fields have sub items:
+
+brstack:
+ from, to, from_dsoname, to_dsoname, mispred,
+ predicted, in_tx, abort, cycles.
+
+brstacksym:
+ items: from, to, pred, in_tx, abort (converted string)
+
+For example,
+We can use this code to print brstack "from", "to", "cycles".
+
+if 'brstack' in dict:
+ for entry in dict['brstack']:
+ print "from %s, to %s, cycles %s" % (entry["from"], entry["to"], entry["cycles"])
+
+SEE ALSO
+--------
+linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
new file mode 100644
index 000000000..afdafe211
--- /dev/null
+++ b/tools/perf/Documentation/perf-script.txt
@@ -0,0 +1,389 @@
+perf-script(1)
+=============
+
+NAME
+----
+perf-script - Read perf.data (created by perf record) and display trace output
+
+SYNOPSIS
+--------
+[verse]
+'perf script' [<options>]
+'perf script' [<options>] record <script> [<record-options>] <command>
+'perf script' [<options>] report <script> [script-args]
+'perf script' [<options>] <script> <required-script-args> [<record-options>] <command>
+'perf script' [<options>] <top-script> [script-args]
+
+DESCRIPTION
+-----------
+This command reads the input file and displays the trace recorded.
+
+There are several variants of perf script:
+
+ 'perf script' to see a detailed trace of the workload that was
+ recorded.
+
+ You can also run a set of pre-canned scripts that aggregate and
+ summarize the raw trace data in various ways (the list of scripts is
+ available via 'perf script -l'). The following variants allow you to
+ record and run those scripts:
+
+ 'perf script record <script> <command>' to record the events required
+ for 'perf script report'. <script> is the name displayed in the
+ output of 'perf script --list' i.e. the actual script name minus any
+ language extension. If <command> is not specified, the events are
+ recorded using the -a (system-wide) 'perf record' option.
+
+ 'perf script report <script> [args]' to run and display the results
+ of <script>. <script> is the name displayed in the output of 'perf
+ script --list' i.e. the actual script name minus any language
+ extension. The perf.data output from a previous run of 'perf script
+ record <script>' is used and should be present for this command to
+ succeed. [args] refers to the (mainly optional) args expected by
+ the script.
+
+ 'perf script <script> <required-script-args> <command>' to both
+ record the events required for <script> and to run the <script>
+ using 'live-mode' i.e. without writing anything to disk. <script>
+ is the name displayed in the output of 'perf script --list' i.e. the
+ actual script name minus any language extension. If <command> is
+ not specified, the events are recorded using the -a (system-wide)
+ 'perf record' option. If <script> has any required args, they
+ should be specified before <command>. This mode doesn't allow for
+ optional script args to be specified; if optional script args are
+ desired, they can be specified using separate 'perf script record'
+ and 'perf script report' commands, with the stdout of the record step
+ piped to the stdin of the report script, using the '-o -' and '-i -'
+ options of the corresponding commands.
+
+ 'perf script <top-script>' to both record the events required for
+ <top-script> and to run the <top-script> using 'live-mode'
+ i.e. without writing anything to disk. <top-script> is the name
+ displayed in the output of 'perf script --list' i.e. the actual
+ script name minus any language extension; a <top-script> is defined
+ as any script name ending with the string 'top'.
+
+ [<record-options>] can be passed to the record steps of 'perf script
+ record' and 'live-mode' variants; this isn't possible however for
+ <top-script> 'live-mode' or 'perf script report' variants.
+
+ See the 'SEE ALSO' section for links to language-specific
+ information on how to write and run your own trace scripts.
+
+OPTIONS
+-------
+<command>...::
+ Any command you can specify in a shell.
+
+-D::
+--dump-raw-trace=::
+ Display verbose dump of the trace data.
+
+-L::
+--Latency=::
+ Show latency attributes (irqs/preemption disabled, etc).
+
+-l::
+--list=::
+ Display a list of available trace scripts.
+
+-s ['lang']::
+--script=::
+ Process trace data with the given script ([lang]:script[.ext]).
+ If the string 'lang' is specified in place of a script name, a
+ list of supported languages will be displayed instead.
+
+-g::
+--gen-script=::
+ Generate perf-script.[ext] starter script for given language,
+ using current perf.data.
+
+-a::
+ Force system-wide collection. Scripts run without a <command>
+ normally use -a by default, while scripts run with a <command>
+ normally don't - this option allows the latter to be run in
+ system-wide mode.
+
+-i::
+--input=::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-d::
+--debug-mode::
+ Do various checks like samples ordering and lost events.
+
+-F::
+--fields::
+ Comma separated list of fields to print. Options are:
+ comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
+ srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn,
+ brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc.
+ Field list can be prepended with the type, trace, sw or hw,
+ to indicate to which event type the field list applies.
+ e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace
+
+ perf script -F <fields>
+
+ is equivalent to:
+
+ perf script -F trace:<fields> -F sw:<fields> -F hw:<fields>
+
+ i.e., the specified fields apply to all event types if the type string
+ is not given.
+
+ In addition to overriding fields, it is also possible to add or remove
+ fields from the defaults. For example
+
+ -F -cpu,+insn
+
+ removes the cpu field and adds the insn field. Adding/removing fields
+ cannot be mixed with normal overriding.
+
+ The arguments are processed in the order received. A later usage can
+ reset a prior request. e.g.:
+
+ -F trace: -F comm,tid,time,ip,sym
+
+ The first -F suppresses trace events (field list is ""), but then the
+ second invocation sets the fields to comm,tid,time,ip,sym. In this case a
+ warning is given to the user:
+
+ "Overriding previous field request for all events."
+
+ Alternatively, consider the order:
+
+ -F comm,tid,time,ip,sym -F trace:
+
+ The first -F sets the fields for all events and the second -F
+ suppresses trace events. The user is given a warning message about
+ the override, and the result of the above is that only S/W and H/W
+ events are displayed with the given fields.
+
+ For the 'wildcard' option if a user selected field is invalid for an
+ event type, a message is displayed to the user that the option is
+ ignored for that type. For example:
+
+ $ perf script -F comm,tid,trace
+ 'trace' not valid for hardware events. Ignoring.
+ 'trace' not valid for software events. Ignoring.
+
+ Alternatively, if the type is given an invalid field is specified it
+ is an error. For example:
+
+ perf script -v -F sw:comm,tid,trace
+ 'trace' not valid for software events.
+
+ At this point usage is displayed, and perf-script exits.
+
+ The flags field is synthesized and may have a value when Instruction
+ Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
+ call, return, conditional, system, asynchronous, interrupt,
+ transaction abort, trace begin, trace end, and in transaction,
+ respectively. Known combinations of flags are printed more nicely e.g.
+ "call" for "bc", "return" for "br", "jcc" for "bo", "jmp" for "b",
+ "int" for "bci", "iret" for "bri", "syscall" for "bcs", "sysret" for "brs",
+ "async" for "by", "hw int" for "bcyi", "tx abrt" for "bA", "tr strt" for "bB",
+ "tr end" for "bE". However the "x" flag will be display separately in those
+ cases e.g. "jcc (x)" for a condition branch within a transaction.
+
+ The callindent field is synthesized and may have a value when
+ Instruction Trace decoding. For calls and returns, it will display the
+ name of the symbol indented with spaces to reflect the stack depth.
+
+ When doing instruction trace decoding insn and insnlen give the
+ instruction bytes and the instruction length of the current
+ instruction.
+
+ The synth field is used by synthesized events which may be created when
+ Instruction Trace decoding.
+
+ Finally, a user may not set fields to none for all event types.
+ i.e., -F "" is not allowed.
+
+ The brstack output includes branch related information with raw addresses using the
+ /v/v/v/v/cycles syntax in the following order:
+ FROM: branch source instruction
+ TO : branch target instruction
+ M/P/-: M=branch target mispredicted or branch direction was mispredicted, P=target predicted or direction predicted, -=not supported
+ X/- : X=branch inside a transactional region, -=not in transaction region or not supported
+ A/- : A=TSX abort entry, -=not aborted region or not supported
+ cycles
+
+ The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
+
+ When brstackinsn is specified the full assembler sequences of branch sequences for each sample
+ is printed. This is the full execution path leading to the sample. This is only supported when the
+ sample was recorded with perf record -b or -j any.
+
+ The brstackoff field will print an offset into a specific dso/binary.
+
+ With the metric option perf script can compute metrics for
+ sampling periods, similar to perf stat. This requires
+ specifying a group with multiple metrics with the :S option
+ for perf record. perf will sample on the first event, and
+ compute metrics for all the events in the group. Please note
+ that the metric computed is averaged over the whole sampling
+ period, not just for the sample point.
+
+ For sample events it's possible to display misc field with -F +misc option,
+ following letters are displayed for each bit:
+
+ PERF_RECORD_MISC_KERNEL K
+ PERF_RECORD_MISC_USER U
+ PERF_RECORD_MISC_HYPERVISOR H
+ PERF_RECORD_MISC_GUEST_KERNEL G
+ PERF_RECORD_MISC_GUEST_USER g
+ PERF_RECORD_MISC_MMAP_DATA* M
+ PERF_RECORD_MISC_COMM_EXEC E
+ PERF_RECORD_MISC_SWITCH_OUT S
+ PERF_RECORD_MISC_SWITCH_OUT_PREEMPT Sp
+
+ $ perf script -F +misc ...
+ sched-messaging 1414 K 28690.636582: 4590 cycles ...
+ sched-messaging 1407 U 28690.636600: 325620 cycles ...
+ sched-messaging 1414 K 28690.636608: 19473 cycles ...
+ misc field ___________/
+
+-k::
+--vmlinux=<file>::
+ vmlinux pathname
+
+--kallsyms=<file>::
+ kallsyms pathname
+
+--symfs=<directory>::
+ Look for files with symbols relative to this directory.
+
+-G::
+--hide-call-graph::
+ When printing symbols do not display call chain.
+
+--stop-bt::
+ Stop display of callgraph at these symbols
+
+-C::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+ be provided as a comma-separated list with no space: 0,1. Ranges of
+ CPUs are specified with -: 0-2. Default is to report samples on all
+ CPUs.
+
+-c::
+--comms=::
+ Only display events for these comms. CSV that understands
+ file://filename entries.
+
+--pid=::
+ Only show events for given process ID (comma separated list).
+
+--tid=::
+ Only show events for given thread ID (comma separated list).
+
+-I::
+--show-info::
+ Display extended information about the perf.data file. This adds
+ information which may be very large and thus may clutter the display.
+ It currently includes: cpu and numa topology of the host system.
+ It can only be used with the perf script report mode.
+
+--show-kernel-path::
+ Try to resolve the path of [kernel.kallsyms]
+
+--show-task-events
+ Display task related events (e.g. FORK, COMM, EXIT).
+
+--show-mmap-events
+ Display mmap related events (e.g. MMAP, MMAP2).
+
+--show-namespace-events
+ Display namespace events i.e. events of type PERF_RECORD_NAMESPACES.
+
+--show-switch-events
+ Display context switch events i.e. events of type PERF_RECORD_SWITCH or
+ PERF_RECORD_SWITCH_CPU_WIDE.
+
+--show-lost-events
+ Display lost events i.e. events of type PERF_RECORD_LOST.
+
+--show-round-events
+ Display finished round events i.e. events of type PERF_RECORD_FINISHED_ROUND.
+
+--demangle::
+ Demangle symbol names to human readable form. It's enabled by default,
+ disable with --no-demangle.
+
+--demangle-kernel::
+ Demangle kernel symbol names to human readable form (for C++ kernels).
+
+--header
+ Show perf.data header.
+
+--header-only
+ Show only perf.data header.
+
+--itrace::
+ Options for decoding instruction tracing data. The options are:
+
+include::itrace.txt[]
+
+ To disable decoding entirely, use --no-itrace.
+
+--full-source-path::
+ Show the full path for source files for srcline output.
+
+--max-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ beyond the specified depth will be ignored. This is a trade-off
+ between information loss and faster processing especially for
+ workloads that can have a very long callchain stack.
+ Note that when using the --itrace option the synthesized callchain size
+ will override this value if the synthesized callchain size is bigger.
+
+ Default: 127
+
+--ns::
+ Use 9 decimal places when displaying time (i.e. show the nanoseconds)
+
+-f::
+--force::
+ Don't do ownership validation.
+
+--time::
+ Only analyze samples within given time window: <start>,<stop>. Times
+ have the format seconds.microseconds. If start is not given (i.e., time
+ string is ',x.y') then analysis starts at the beginning of the file. If
+ stop time is not given (i.e, time string is 'x.y,') then analysis goes
+ to end of file.
+
+ Also support time percent with multipe time range. Time string is
+ 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'.
+
+ For example:
+ Select the second 10% time slice:
+ perf script --time 10%/2
+
+ Select from 0% to 10% time slice:
+ perf script --time 0%-10%
+
+ Select the first and second 10% time slices:
+ perf script --time 10%/1,10%/2
+
+ Select from 0% to 10% and 30% to 40% slices:
+ perf script --time 0%-10%,30%-40%
+
+--max-blocks::
+ Set the maximum number of program blocks to print with brstackasm for
+ each sample.
+
+--per-event-dump::
+ Create per event files with a "perf.data.EVENT.dump" name instead of
+ printing to stdout, useful, for instance, for generating flamegraphs.
+
+--inline::
+ If a callgraph address belongs to an inlined function, the inline stack
+ will be printed. Each entry has function name and file/line. Enabled by
+ default, disable with --no-inline.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-script-perl[1],
+linkperf:perf-script-python[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
new file mode 100644
index 000000000..239af8f71
--- /dev/null
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -0,0 +1,378 @@
+perf-stat(1)
+============
+
+NAME
+----
+perf-stat - Run a command and gather performance counter statistics
+
+SYNOPSIS
+--------
+[verse]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] record [-o file] -- <command> [<options>]
+'perf stat' report [-i file]
+
+DESCRIPTION
+-----------
+This command runs a command and gathers performance counter statistics
+from it.
+
+
+OPTIONS
+-------
+<command>...::
+ Any command you can specify in a shell.
+
+record::
+ See STAT RECORD.
+
+report::
+ See STAT REPORT.
+
+-e::
+--event=::
+ Select the PMU event. Selection can be:
+
+ - a symbolic event name (use 'perf list' to list all events)
+
+ - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
+ hexadecimal event descriptor.
+
+ - a symbolic or raw PMU event followed by an optional colon
+ and a list of event modifiers, e.g., cpu-cycles:p. See the
+ linkperf:perf-list[1] man page for details on event modifiers.
+
+ - a symbolically formed event like 'pmu/param1=0x3,param2/' where
+ param1 and param2 are defined as formats for the PMU in
+ /sys/bus/event_source/devices/<pmu>/format/*
+
+ - a symbolically formed event like 'pmu/config=M,config1=N,config2=K/'
+ where M, N, K are numbers (in decimal, hex, octal format).
+ Acceptable values for each of 'config', 'config1' and 'config2'
+ parameters are defined by corresponding entries in
+ /sys/bus/event_source/devices/<pmu>/format/*
+
+ Note that the last two syntaxes support prefix and glob matching in
+ the PMU name to simplify creation of events accross multiple instances
+ of the same type of PMU in large systems (e.g. memory controller PMUs).
+ Multiple PMU instances are typical for uncore PMUs, so the prefix
+ 'uncore_' is also ignored when performing this match.
+
+
+-i::
+--no-inherit::
+ child tasks do not inherit counters
+-p::
+--pid=<pid>::
+ stat events on existing process id (comma separated list)
+
+-t::
+--tid=<tid>::
+ stat events on existing thread id (comma separated list)
+
+
+-a::
+--all-cpus::
+ system-wide collection from all CPUs (default if no target is specified)
+
+-c::
+--scale::
+ scale/normalize counter values
+
+-d::
+--detailed::
+ print more detailed statistics, can be specified up to 3 times
+
+ -d: detailed events, L1 and LLC data cache
+ -d -d: more detailed events, dTLB and iTLB events
+ -d -d -d: very detailed events, adding prefetch events
+
+-r::
+--repeat=<n>::
+ repeat command and print average + stddev (max: 100). 0 means forever.
+
+-B::
+--big-num::
+ print large numbers with thousands' separators according to locale
+
+-C::
+--cpu=::
+Count only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+In per-thread mode, this option is ignored. The -a option is still necessary
+to activate system-wide monitoring. Default is to count on all CPUs.
+
+-A::
+--no-aggr::
+Do not aggregate counts across all monitored CPUs.
+
+-n::
+--null::
+ null run - don't start any counters
+
+-v::
+--verbose::
+ be more verbose (show counter open errors, etc)
+
+-x SEP::
+--field-separator SEP::
+print counts using a CSV-style output to make it easy to import directly into
+spreadsheets. Columns are separated by the string specified in SEP.
+
+--table:: Display time for each run (-r option), in a table format, e.g.:
+
+ $ perf stat --null -r 5 --table perf bench sched pipe
+
+ Performance counter stats for 'perf bench sched pipe' (5 runs):
+
+ # Table of individual measurements:
+ 5.189 (-0.293) #
+ 5.189 (-0.294) #
+ 5.186 (-0.296) #
+ 5.663 (+0.181) ##
+ 6.186 (+0.703) ####
+
+ # Final result:
+ 5.483 +- 0.198 seconds time elapsed ( +- 3.62% )
+
+-G name::
+--cgroup name::
+monitor only in the container (cgroup) called "name". This option is available only
+in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
+container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
+can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
+to first event, second cgroup to second event and so on. It is possible to provide
+an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
+corresponding events, i.e., they always refer to events defined earlier on the command
+line. If the user wants to track multiple events for a specific cgroup, the user can
+use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'.
+
+If wanting to monitor, say, 'cycles' for a cgroup and also for system wide, this
+command line can be used: 'perf stat -e cycles -G cgroup_name -a -e cycles'.
+
+-o file::
+--output file::
+Print the output into the designated file.
+
+--append::
+Append to the output file designated with the -o option. Ignored if -o is not specified.
+
+--log-fd::
+
+Log output to fd, instead of stderr. Complementary to --output, and mutually exclusive
+with it. --append may be used here. Examples:
+ 3>results perf stat --log-fd 3 -- $cmd
+ 3>>results perf stat --log-fd 3 --append -- $cmd
+
+--pre::
+--post::
+ Pre and post measurement hooks, e.g.:
+
+perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
+
+-I msecs::
+--interval-print msecs::
+Print count deltas every N milliseconds (minimum: 1ms)
+The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution.
+ example: 'perf stat -I 1000 -e cycles -a sleep 5'
+
+--interval-count times::
+Print count deltas for fixed number of times.
+This option should be used together with "-I" option.
+ example: 'perf stat -I 1000 --interval-count 2 -e cycles -a'
+
+--interval-clear::
+Clear the screen before next interval.
+
+--timeout msecs::
+Stop the 'perf stat' session and print count deltas after N milliseconds (minimum: 10 ms).
+This option is not supported with the "-I" option.
+ example: 'perf stat --time 2000 -e cycles -a'
+
+--metric-only::
+Only print computed metrics. Print them in a single line.
+Don't show any raw values. Not supported with --per-thread.
+
+--per-socket::
+Aggregate counts per processor socket for system-wide mode measurements. This
+is a useful mode to detect imbalance between sockets. To enable this mode,
+use --per-socket in addition to -a. (system-wide). The output includes the
+socket number and the number of online processors on that socket. This is
+useful to gauge the amount of aggregation.
+
+--per-core::
+Aggregate counts per physical processor for system-wide mode measurements. This
+is a useful mode to detect imbalance between physical cores. To enable this mode,
+use --per-core in addition to -a. (system-wide). The output includes the
+core number and the number of online logical processors on that physical processor.
+
+--per-thread::
+Aggregate counts per monitored threads, when monitoring threads (-t option)
+or processes (-p option).
+
+-D msecs::
+--delay msecs::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
+STAT RECORD
+-----------
+Stores stat data into perf data file.
+
+-o file::
+--output file::
+Output file name.
+
+STAT REPORT
+-----------
+Reads and reports stat data from perf data file.
+
+-i file::
+--input file::
+Input file name.
+
+--per-socket::
+Aggregate counts per processor socket for system-wide mode measurements.
+
+--per-core::
+Aggregate counts per physical processor for system-wide mode measurements.
+
+-M::
+--metrics::
+Print metrics or metricgroups specified in a comma separated list.
+For a group all metrics from the group are added.
+The events from the metrics are automatically measured.
+See perf list output for the possble metrics and metricgroups.
+
+-A::
+--no-aggr::
+Do not aggregate counts across all monitored CPUs.
+
+--topdown::
+Print top down level 1 metrics if supported by the CPU. This allows to
+determine bottle necks in the CPU pipeline for CPU bound workloads,
+by breaking the cycles consumed down into frontend bound, backend bound,
+bad speculation and retiring.
+
+Frontend bound means that the CPU cannot fetch and decode instructions fast
+enough. Backend bound means that computation or memory access is the bottle
+neck. Bad Speculation means that the CPU wasted cycles due to branch
+mispredictions and similar issues. Retiring means that the CPU computed without
+an apparently bottleneck. The bottleneck is only the real bottleneck
+if the workload is actually bound by the CPU and not by something else.
+
+For best results it is usually a good idea to use it with interval
+mode like -I 1000, as the bottleneck of workloads can change often.
+
+The top down metrics are collected per core instead of per
+CPU thread. Per core mode is automatically enabled
+and -a (global monitoring) is needed, requiring root rights or
+perf.perf_event_paranoid=-1.
+
+Topdown uses the full Performance Monitoring Unit, and needs
+disabling of the NMI watchdog (as root):
+echo 0 > /proc/sys/kernel/nmi_watchdog
+for best results. Otherwise the bottlenecks may be inconsistent
+on workload with changing phases.
+
+This enables --metric-only, unless overriden with --no-metric-only.
+
+To interpret the results it is usually needed to know on which
+CPUs the workload runs on. If needed the CPUs can be forced using
+taskset.
+
+--no-merge::
+Do not merge results from same PMUs.
+
+When multiple events are created from a single event specification,
+stat will, by default, aggregate the event counts and show the result
+in a single row. This option disables that behavior and shows
+the individual events and counts.
+
+Multiple events are created from a single event specification when:
+1. Prefix or glob matching is used for the PMU name.
+2. Aliases, which are listed immediately after the Kernel PMU events
+ by perf list, are used.
+
+--smi-cost::
+Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
+
+During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
+freeze core counters on SMI.
+The aperf counter will not be effected by the setting.
+The cost of SMI can be measured by (aperf - unhalted core cycles).
+
+In practice, the percentages of SMI cycles is very useful for performance
+oriented analysis. --metric_only will be applied by default.
+The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
+
+Users who wants to get the actual value can apply --no-metric-only.
+
+EXAMPLES
+--------
+
+$ perf stat -- make
+
+ Performance counter stats for 'make':
+
+ 83723.452481 task-clock:u (msec) # 1.004 CPUs utilized
+ 0 context-switches:u # 0.000 K/sec
+ 0 cpu-migrations:u # 0.000 K/sec
+ 3,228,188 page-faults:u # 0.039 M/sec
+ 229,570,665,834 cycles:u # 2.742 GHz
+ 313,163,853,778 instructions:u # 1.36 insn per cycle
+ 69,704,684,856 branches:u # 832.559 M/sec
+ 2,078,861,393 branch-misses:u # 2.98% of all branches
+
+ 83.409183620 seconds time elapsed
+
+ 74.684747000 seconds user
+ 8.739217000 seconds sys
+
+TIMINGS
+-------
+As displayed in the example above we can display 3 types of timings.
+We always display the time the counters were enabled/alive:
+
+ 83.409183620 seconds time elapsed
+
+For workload sessions we also display time the workloads spent in
+user/system lands:
+
+ 74.684747000 seconds user
+ 8.739217000 seconds sys
+
+Those times are the very same as displayed by the 'time' tool.
+
+CSV FORMAT
+----------
+
+With -x, perf stat is able to output a not-quite-CSV format output
+Commas in the output are not put into "". To make it easy to parse
+it is recommended to use a different character like -x \;
+
+The fields are in this order:
+
+ - optional usec time stamp in fractions of second (with -I xxx)
+ - optional CPU, core, or socket identifier
+ - optional number of logical CPUs aggregated
+ - counter value
+ - unit of the counter value or empty
+ - event name
+ - run time of counter
+ - percentage of measurement time the counter was running
+ - optional variance if multiple values are collected with -r
+ - optional metric value
+ - optional unit of metric
+
+Additional metrics may be printed with all earlier fields being empty.
+
+SEE ALSO
+--------
+linkperf:perf-top[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
new file mode 100644
index 000000000..b329c65d7
--- /dev/null
+++ b/tools/perf/Documentation/perf-test.txt
@@ -0,0 +1,36 @@
+perf-test(1)
+============
+
+NAME
+----
+perf-test - Runs sanity tests.
+
+SYNOPSIS
+--------
+[verse]
+'perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]'
+
+DESCRIPTION
+-----------
+This command does assorted sanity tests, initially through linked routines but
+also will look for a directory with more tests in the form of scripts.
+
+To get a list of available tests use 'perf test list', specifying a test name
+fragment will show all tests that have it.
+
+To run just specific tests, inform test name fragments or the numbers obtained
+from 'perf test list'.
+
+OPTIONS
+-------
+-s::
+--skip::
+ Tests to skip (comma separated numeric list).
+
+-v::
+--verbose::
+ Be more verbose.
+
+-F::
+--dont-fork::
+ Do not fork child for each test, run all tests within single process.
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
new file mode 100644
index 000000000..ef0c7565b
--- /dev/null
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -0,0 +1,128 @@
+perf-timechart(1)
+=================
+
+NAME
+----
+perf-timechart - Tool to visualize total system behavior during a workload
+
+SYNOPSIS
+--------
+[verse]
+'perf timechart' [<timechart options>] {record} [<record options>]
+
+DESCRIPTION
+-----------
+There are two variants of perf timechart:
+
+ 'perf timechart record <command>' to record the system level events
+ of an arbitrary workload. By default timechart records only scheduler
+ and CPU events (task switches, running times, CPU power states, etc),
+ but it's possible to record IO (disk, network) activity using -I argument.
+
+ 'perf timechart' to turn a trace into a Scalable Vector Graphics file,
+ that can be viewed with popular SVG viewers such as 'Inkscape'. Depending
+ on the events in the perf.data file, timechart will contain scheduler/cpu
+ events or IO events.
+
+ In IO mode, every bar has two charts: upper and lower.
+ Upper bar shows incoming events (disk reads, ingress network packets).
+ Lower bar shows outgoing events (disk writes, egress network packets).
+ There are also poll bars which show how much time application spent
+ in poll/epoll/select syscalls.
+
+TIMECHART OPTIONS
+-----------------
+-o::
+--output=::
+ Select the output file (default: output.svg)
+-i::
+--input=::
+ Select the input file (default: perf.data unless stdin is a fifo)
+-w::
+--width=::
+ Select the width of the SVG file (default: 1000)
+-P::
+--power-only::
+ Only output the CPU power section of the diagram
+-T::
+--tasks-only::
+ Don't output processor state transitions
+-p::
+--process::
+ Select the processes to display, by name or PID
+-f::
+--force::
+ Don't complain, do it.
+--symfs=<directory>::
+ Look for files with symbols relative to this directory.
+-n::
+--proc-num::
+ Print task info for at least given number of tasks.
+-t::
+--topology::
+ Sort CPUs according to topology.
+--highlight=<duration_nsecs|task_name>::
+ Highlight tasks (using different color) that run more than given
+ duration or tasks with given name. If number is given it's interpreted
+ as number of nanoseconds. If non-numeric string is given it's
+ interpreted as task name.
+--io-skip-eagain::
+ Don't draw EAGAIN IO events.
+--io-min-time=<nsecs>::
+ Draw small events as if they lasted min-time. Useful when you need
+ to see very small and fast IO. It's possible to specify ms or us
+ suffix to specify time in milliseconds or microseconds.
+ Default value is 1ms.
+--io-merge-dist=<nsecs>::
+ Merge events that are merge-dist nanoseconds apart.
+ Reduces number of figures on the SVG and makes it more render-friendly.
+ It's possible to specify ms or us suffix to specify time in
+ milliseconds or microseconds.
+ Default value is 1us.
+
+RECORD OPTIONS
+--------------
+-P::
+--power-only::
+ Record only power-related events
+-T::
+--tasks-only::
+ Record only tasks-related events
+-I::
+--io-only::
+ Record only io-related events
+-g::
+--callchain::
+ Do call-graph (stack chain/backtrace) recording
+
+EXAMPLES
+--------
+
+$ perf timechart record git pull
+
+ [ perf record: Woken up 13 times to write data ]
+ [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ]
+
+$ perf timechart
+
+ Written 10.2 seconds of trace to output.svg.
+
+Record system-wide timechart:
+
+ $ perf timechart record
+
+ then generate timechart and highlight 'gcc' tasks:
+
+ $ perf timechart --highlight gcc
+
+Record system-wide IO events:
+
+ $ perf timechart record -I
+
+ then generate timechart:
+
+ $ perf timechart
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
new file mode 100644
index 000000000..114fda12a
--- /dev/null
+++ b/tools/perf/Documentation/perf-top.txt
@@ -0,0 +1,294 @@
+perf-top(1)
+===========
+
+NAME
+----
+perf-top - System profiling tool.
+
+SYNOPSIS
+--------
+[verse]
+'perf top' [-e <EVENT> | --event=EVENT] [<options>]
+
+DESCRIPTION
+-----------
+This command generates and displays a performance counter profile in real time.
+
+
+OPTIONS
+-------
+-a::
+--all-cpus::
+ System-wide collection. (default)
+
+-c <count>::
+--count=<count>::
+ Event period to sample.
+
+-C <cpu-list>::
+--cpu=<cpu>::
+Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Default is to monitor all CPUS.
+
+-d <seconds>::
+--delay=<seconds>::
+ Number of seconds to delay between refreshes.
+
+-e <event>::
+--event=<event>::
+ Select the PMU event. Selection can be a symbolic event name
+ (use 'perf list' to list all events) or a raw PMU
+ event (eventsel+umask) in the form of rNNN where NNN is a
+ hexadecimal event descriptor.
+
+-E <entries>::
+--entries=<entries>::
+ Display this many functions.
+
+-f <count>::
+--count-filter=<count>::
+ Only display functions with more events than this.
+
+--group::
+ Put the counters into a counter group.
+
+-F <freq>::
+--freq=<freq>::
+ Profile at this frequency. Use 'max' to use the currently maximum
+ allowed frequency, i.e. the value in the kernel.perf_event_max_sample_rate
+ sysctl.
+
+-i::
+--inherit::
+ Child tasks do not inherit counters.
+
+-k <path>::
+--vmlinux=<path>::
+ Path to vmlinux. Required for annotation functionality.
+
+--ignore-vmlinux::
+ Ignore vmlinux files.
+
+-m <pages>::
+--mmap-pages=<pages>::
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
+
+-p <pid>::
+--pid=<pid>::
+ Profile events on existing Process ID (comma separated list).
+
+-t <tid>::
+--tid=<tid>::
+ Profile events on existing thread ID (comma separated list).
+
+-u::
+--uid=::
+ Record events in threads owned by uid. Name or number.
+
+-r <priority>::
+--realtime=<priority>::
+ Collect data with this RT SCHED_FIFO priority.
+
+--sym-annotate=<symbol>::
+ Annotate this symbol.
+
+-K::
+--hide_kernel_symbols::
+ Hide kernel symbols.
+
+-U::
+--hide_user_symbols::
+ Hide user symbols.
+
+--demangle-kernel::
+ Demangle kernel symbols.
+
+-D::
+--dump-symtab::
+ Dump the symbol table used for profiling.
+
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
+
+-z::
+--zero::
+ Zero history across display updates.
+
+-s::
+--sort::
+ Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight,
+ local_weight, abort, in_tx, transaction, overhead, sample, period.
+ Please see description of --sort in the perf-report man page.
+
+--fields=::
+ Specify output field - multiple keys can be specified in CSV format.
+ Following fields are available:
+ overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+ Also it can contain any sort key(s).
+
+ By default, every sort keys not specified in --field will be appended
+ automatically.
+
+-n::
+--show-nr-samples::
+ Show a column with the number of samples.
+
+--show-total-period::
+ Show a column with the sum of periods.
+
+--dsos::
+ Only consider symbols in these dsos. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
+
+--comms::
+ Only consider symbols in these comms. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
+
+--symbols::
+ Only consider these symbols. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
+
+-M::
+--disassembler-style=:: Set disassembler style for objdump.
+
+--source::
+ Interleave source code with assembly code. Enabled by default,
+ disable with --no-source.
+
+--asm-raw::
+ Show raw instruction encoding of assembly instructions.
+
+-g::
+ Enables call-graph (stack chain/backtrace) recording.
+
+--call-graph [mode,type,min[,limit],order[,key][,branch]]::
+ Setup and enable call-graph (stack chain/backtrace) recording,
+ implies -g. See `--call-graph` section in perf-record and
+ perf-report man pages for details.
+
+--children::
+ Accumulate callchain of children to parent entry so that then can
+ show up in the output. The output will have a new "Children" column
+ and will be sorted on the data. It requires -g/--call-graph option
+ enabled. See the `overhead calculation' section for more details.
+ Enabled by default, disable with --no-children.
+
+--max-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ beyond the specified depth will be ignored. This is a trade-off
+ between information loss and faster processing especially for
+ workloads that can have a very long callchain stack.
+
+ Default: /proc/sys/kernel/perf_event_max_stack when present, 127 otherwise.
+
+--ignore-callees=<regex>::
+ Ignore callees of the function(s) matching the given regex.
+ This has the effect of collecting the callers of each such
+ function into one place in the call-graph tree.
+
+--percent-limit::
+ Do not show entries which have an overhead under that percent.
+ (Default: 0).
+
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options and
+ Zoom operations on the TUI (thread, dso, etc).
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
+-w::
+--column-widths=<width[,width...]>::
+ Force each column width to the provided list, for large terminal
+ readability. 0 means no limit (default behavior).
+
+--proc-map-timeout::
+ When processing pre-existing threads /proc/XXX/mmap, it may take
+ a long time, because the file may be huge. A time out is needed
+ in such cases.
+ This option sets the time out limit. The default value is 500 ms.
+
+
+-b::
+--branch-any::
+ Enable taken branch stack sampling. Any type of taken branch may be sampled.
+ This is a shortcut for --branch-filter any. See --branch-filter for more infos.
+
+-j::
+--branch-filter::
+ Enable taken branch stack sampling. Each sample captures a series of consecutive
+ taken branches. The number of branches captured with each sample depends on the
+ underlying hardware, the type of branches of interest, and the executed code.
+ It is possible to select the types of branches captured by enabling filters.
+ For a full list of modifiers please see the perf record manpage.
+
+ The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
+ The privilege levels may be omitted, in which case, the privilege levels of the associated
+ event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
+ levels are subject to permissions. When sampling on multiple events, branch stack sampling
+ is enabled for all the sampling events. The sampled branch type is the same for all events.
+ The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
+ Note that this feature may not be available on all processors.
+
+--raw-trace::
+ When displaying traceevent output, do not use print fmt or plugins.
+
+--hierarchy::
+ Enable hierarchy output.
+
+--force::
+ Don't do ownership validation.
+
+--num-thread-synthesize::
+ The number of threads to run when synthesizing events for existing processes.
+ By default, the number of threads equals to the number of online CPUs.
+
+INTERACTIVE PROMPTING KEYS
+--------------------------
+
+[d]::
+ Display refresh delay.
+
+[e]::
+ Number of entries to display.
+
+[E]::
+ Event to display when multiple counters are active.
+
+[f]::
+ Profile display filter (>= hit count).
+
+[F]::
+ Annotation display filter (>= % of total).
+
+[s]::
+ Annotate symbol.
+
+[S]::
+ Stop annotation, return to full profile display.
+
+[K]::
+ Hide kernel symbols.
+
+[U]::
+ Hide user symbols.
+
+[z]::
+ Toggle event count zeroing across display updates.
+
+[qQ]::
+ Quit.
+
+Pressing any unmapped key displays a menu, and prompts for input.
+
+include::callchain-overhead-calculation.txt[]
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
new file mode 100644
index 000000000..115db9e06
--- /dev/null
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -0,0 +1,243 @@
+perf-trace(1)
+=============
+
+NAME
+----
+perf-trace - strace inspired tool
+
+SYNOPSIS
+--------
+[verse]
+'perf trace'
+'perf trace record'
+
+DESCRIPTION
+-----------
+This command will show the events associated with the target, initially
+syscalls, but other system events like pagefaults, task lifetime events,
+scheduling events, etc.
+
+This is a live mode tool in addition to working with perf.data files like
+the other perf tools. Files can be generated using the 'perf record' command
+but the session needs to include the raw_syscalls events (-e 'raw_syscalls:*').
+Alternatively, 'perf trace record' can be used as a shortcut to
+automatically include the raw_syscalls events when writing events to a file.
+
+The following options apply to perf trace; options to perf trace record are
+found in the perf record man page.
+
+OPTIONS
+-------
+
+-a::
+--all-cpus::
+ System-wide collection from all CPUs.
+
+-e::
+--expr::
+--event::
+ List of syscalls and other perf events (tracepoints, HW cache events,
+ etc) to show. Globbing is supported, e.g.: "epoll_*", "*msg*", etc.
+ See 'perf list' for a complete list of events.
+ Prefixing with ! shows all syscalls but the ones specified. You may
+ need to escape it.
+
+-D msecs::
+--delay msecs::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
+-o::
+--output=::
+ Output file name.
+
+-p::
+--pid=::
+ Record events on existing process ID (comma separated list).
+
+-t::
+--tid=::
+ Record events on existing thread ID (comma separated list).
+
+-u::
+--uid=::
+ Record events in threads owned by uid. Name or number.
+
+-G::
+--cgroup::
+ Record events in threads in a cgroup.
+
+ Look for cgroups to set at the /sys/fs/cgroup/perf_event directory, then
+ remove the /sys/fs/cgroup/perf_event/ part and try:
+
+ perf trace -G A -e sched:*switch
+
+ Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
+ _and_ sched:sched_switch to the 'A' cgroup, while:
+
+ perf trace -e sched:*switch -G A
+
+ will only set the sched:sched_switch event to the 'A' cgroup, all the
+ other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
+ a cgroup (on the root cgroup, sys wide, etc).
+
+ Multiple cgroups:
+
+ perf trace -G A -e sched:*switch -G B
+
+ the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
+ to the 'B' cgroup.
+
+--filter-pids=::
+ Filter out events for these pids and for 'trace' itself (comma separated list).
+
+-v::
+--verbose=::
+ Verbosity level.
+
+--no-inherit::
+ Child tasks do not inherit counters.
+
+-m::
+--mmap-pages=::
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
+
+-C::
+--cpu::
+Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+In per-thread mode with inheritance mode on (default), Events are captured only when
+the thread executes on the designated CPUs. Default is to monitor all CPUs.
+
+--duration::
+ Show only events that had a duration greater than N.M ms.
+
+--sched::
+ Accrue thread runtime and provide a summary at the end of the session.
+
+--failure::
+ Show only syscalls that failed, i.e. that returned < 0.
+
+-i::
+--input::
+ Process events from a given perf data file.
+
+-T::
+--time::
+ Print full timestamp rather time relative to first sample.
+
+--comm::
+ Show process COMM right beside its ID, on by default, disable with --no-comm.
+
+-s::
+--summary::
+ Show only a summary of syscalls by thread with min, max, and average times
+ (in msec) and relative stddev.
+
+-S::
+--with-summary::
+ Show all syscalls followed by a summary by thread with min, max, and
+ average times (in msec) and relative stddev.
+
+--tool_stats::
+ Show tool stats such as number of times fd->pathname was discovered thru
+ hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
+
+-f::
+--force::
+ Don't complain, do it.
+
+-F=[all|min|maj]::
+--pf=[all|min|maj]::
+ Trace pagefaults. Optionally, you can specify whether you want minor,
+ major or all pagefaults. Default value is maj.
+
+--syscalls::
+ Trace system calls. This options is enabled by default, disable with
+ --no-syscalls.
+
+--call-graph [mode,type,min[,limit],order[,key][,branch]]::
+ Setup and enable call-graph (stack chain/backtrace) recording.
+ See `--call-graph` section in perf-record and perf-report
+ man pages for details. The ones that are most useful in 'perf trace'
+ are 'dwarf' and 'lbr', where available, try: 'perf trace --call-graph dwarf'.
+
+ Using this will, for the root user, bump the value of --mmap-pages to 4
+ times the maximum for non-root users, based on the kernel.perf_event_mlock_kb
+ sysctl. This is done only if the user doesn't specify a --mmap-pages value.
+
+--kernel-syscall-graph::
+ Show the kernel callchains on the syscall exit path.
+
+--max-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ beyond the specified depth will be ignored. Note that at this point
+ this is just about the presentation part, i.e. the kernel is still
+ not limiting, the overhead of callchains needs to be set via the
+ knobs in --call-graph dwarf.
+
+ Implies '--call-graph dwarf' when --call-graph not present on the
+ command line, on systems where DWARF unwinding was built in.
+
+ Default: /proc/sys/kernel/perf_event_max_stack when present for
+ live sessions (without --input/-i), 127 otherwise.
+
+--min-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ below the specified depth will be ignored. Disabled by default.
+
+ Implies '--call-graph dwarf' when --call-graph not present on the
+ command line, on systems where DWARF unwinding was built in.
+
+--print-sample::
+ Print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info for the
+ raw_syscalls:sys_{enter,exit} tracepoints, for debugging.
+
+--proc-map-timeout::
+ When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
+ because the file may be huge. A time out is needed in such cases.
+ This option sets the time out limit. The default value is 500 ms.
+
+PAGEFAULTS
+----------
+
+When tracing pagefaults, the format of the trace is as follows:
+
+<min|maj>fault [<ip.symbol>+<ip.offset>] => <addr.dso@addr.offset> (<map type><addr level>).
+
+- min/maj indicates whether fault event is minor or major;
+- ip.symbol shows symbol for instruction pointer (the code that generated the
+ fault); if no debug symbols available, perf trace will print raw IP;
+- addr.dso shows DSO for the faulted address;
+- map type is either 'd' for non-executable maps or 'x' for executable maps;
+- addr level is either 'k' for kernel dso or '.' for user dso.
+
+For symbols resolution you may need to install debugging symbols.
+
+Please be aware that duration is currently always 0 and doesn't reflect actual
+time it took for fault to be handled!
+
+When --verbose specified, perf trace tries to print all available information
+for both IP and fault address in the form of dso@symbol+offset.
+
+EXAMPLES
+--------
+
+Trace only major pagefaults:
+
+ $ perf trace --no-syscalls -F
+
+Trace syscalls, major and minor pagefaults:
+
+ $ perf trace -F all
+
+ 1416.547 ( 0.000 ms): python/20235 majfault [CRYPTO_push_info_+0x0] => /lib/x86_64-linux-gnu/libcrypto.so.1.0.0@0x61be0 (x.)
+
+ As you can see, there was major pagefault in python process, from
+ CRYPTO_push_info_ routine which faulted somewhere in libcrypto.so.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-version.txt b/tools/perf/Documentation/perf-version.txt
new file mode 100644
index 000000000..e207b7cfc
--- /dev/null
+++ b/tools/perf/Documentation/perf-version.txt
@@ -0,0 +1,24 @@
+perf-version(1)
+===============
+
+NAME
+----
+perf-version - display the version of perf binary
+
+SYNOPSIS
+--------
+'perf version' [--build-options]
+
+DESCRIPTION
+-----------
+With no options given, the 'perf version' prints the perf version
+on the standard output.
+
+If the option '--build-options' is given, then the status of
+compiled-in libraries are printed on the standard output.
+
+OPTIONS
+-------
+--build-options::
+ Prints the status of compiled-in libraries on the
+ standard output.
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
new file mode 100644
index 000000000..dfb218fea
--- /dev/null
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -0,0 +1,497 @@
+perf.data format
+
+Uptodate as of v4.7
+
+This document describes the on-disk perf.data format, generated by perf record
+or perf inject and consumed by the other perf tools.
+
+On a high level perf.data contains the events generated by the PMUs, plus metadata.
+
+All fields are in native-endian of the machine that generated the perf.data.
+
+When perf is writing to a pipe it uses a special version of the file
+format that does not rely on seeking to adjust data offsets. This
+format is described in "Pipe-mode data" section. The pipe data version can be
+augmented with additional events using perf inject.
+
+The file starts with a perf_header:
+
+struct perf_header {
+ char magic[8]; /* PERFILE2 */
+ uint64_t size; /* size of the header */
+ uint64_t attr_size; /* size of an attribute in attrs */
+ struct perf_file_section attrs;
+ struct perf_file_section data;
+ struct perf_file_section event_types;
+ uint64_t flags;
+ uint64_t flags1[3];
+};
+
+The magic number identifies the perf file and the version. Current perf versions
+use PERFILE2. Old perf versions generated a version 1 format (PERFFILE). Version 1
+is not described here. The magic number also identifies the endian. When the
+magic value is 64bit byte swapped compared the file is in non-native
+endian.
+
+A perf_file_section contains a pointer to another section of the perf file.
+The header contains three such pointers: for attributes, data and event types.
+
+struct perf_file_section {
+ uint64_t offset; /* offset from start of file */
+ uint64_t size; /* size of the section */
+};
+
+Flags section:
+
+The header is followed by different optional headers, described by the bits set
+in flags. Only headers for which the bit is set are included. Each header
+consists of a perf_file_section located after the initial header.
+The respective perf_file_section points to the data of the additional
+header and defines its size.
+
+Some headers consist of strings, which are defined like this:
+
+struct perf_header_string {
+ uint32_t len;
+ char string[len]; /* zero terminated */
+};
+
+Some headers consist of a sequence of strings, which start with a
+
+struct perf_header_string_list {
+ uint32_t nr;
+ struct perf_header_string strings[nr]; /* variable length records */
+};
+
+The bits are the flags bits in a 256 bit bitmap starting with
+flags. These define the valid bits:
+
+ HEADER_RESERVED = 0, /* always cleared */
+ HEADER_FIRST_FEATURE = 1,
+ HEADER_TRACING_DATA = 1,
+
+Describe me.
+
+ HEADER_BUILD_ID = 2,
+
+The header consists of an sequence of build_id_event. The size of each record
+is defined by header.size (see perf_event.h). Each event defines a ELF build id
+for a executable file name for a pid. An ELF build id is a unique identifier
+assigned by the linker to an executable.
+
+struct build_id_event {
+ struct perf_event_header header;
+ pid_t pid;
+ uint8_t build_id[24];
+ char filename[header.size - offsetof(struct build_id_event, filename)];
+};
+
+ HEADER_HOSTNAME = 3,
+
+A perf_header_string with the hostname where the data was collected
+(uname -n)
+
+ HEADER_OSRELEASE = 4,
+
+A perf_header_string with the os release where the data was collected
+(uname -r)
+
+ HEADER_VERSION = 5,
+
+A perf_header_string with the perf user tool version where the
+data was collected. This is the same as the version of the source tree
+the perf tool was built from.
+
+ HEADER_ARCH = 6,
+
+A perf_header_string with the CPU architecture (uname -m)
+
+ HEADER_NRCPUS = 7,
+
+A structure defining the number of CPUs.
+
+struct nr_cpus {
+ uint32_t nr_cpus_available; /* CPUs not yet onlined */
+ uint32_t nr_cpus_online;
+};
+
+ HEADER_CPUDESC = 8,
+
+A perf_header_string with description of the CPU. On x86 this is the model name
+in /proc/cpuinfo
+
+ HEADER_CPUID = 9,
+
+A perf_header_string with the exact CPU type. On x86 this is
+vendor,family,model,stepping. For example: GenuineIntel,6,69,1
+
+ HEADER_TOTAL_MEM = 10,
+
+An uint64_t with the total memory in bytes.
+
+ HEADER_CMDLINE = 11,
+
+A perf_header_string with the perf command line used to collect the data.
+
+ HEADER_EVENT_DESC = 12,
+
+Another description of the perf_event_attrs, more detailed than header.attrs
+including IDs and names. See perf_event.h or the man page for a description
+of a struct perf_event_attr.
+
+struct {
+ uint32_t nr; /* number of events */
+ uint32_t attr_size; /* size of each perf_event_attr */
+ struct {
+ struct perf_event_attr attr; /* size of attr_size */
+ uint32_t nr_ids;
+ struct perf_header_string event_string;
+ uint64_t ids[nr_ids];
+ } events[nr]; /* Variable length records */
+};
+
+ HEADER_CPU_TOPOLOGY = 13,
+
+String lists defining the core and CPU threads topology.
+The string lists are followed by a variable length array
+which contains core_id and socket_id of each cpu.
+The number of entries can be determined by the size of the
+section minus the sizes of both string lists.
+
+struct {
+ struct perf_header_string_list cores; /* Variable length */
+ struct perf_header_string_list threads; /* Variable length */
+ struct {
+ uint32_t core_id;
+ uint32_t socket_id;
+ } cpus[nr]; /* Variable length records */
+};
+
+Example:
+ sibling cores : 0-3
+ sibling threads : 0-1
+ sibling threads : 2-3
+
+ HEADER_NUMA_TOPOLOGY = 14,
+
+ A list of NUMA node descriptions
+
+struct {
+ uint32_t nr;
+ struct {
+ uint32_t nodenr;
+ uint64_t mem_total;
+ uint64_t mem_free;
+ struct perf_header_string cpus;
+ } nodes[nr]; /* Variable length records */
+};
+
+ HEADER_BRANCH_STACK = 15,
+
+Not implemented in perf.
+
+ HEADER_PMU_MAPPINGS = 16,
+
+ A list of PMU structures, defining the different PMUs supported by perf.
+
+struct {
+ uint32_t nr;
+ struct pmu {
+ uint32_t pmu_type;
+ struct perf_header_string pmu_name;
+ } [nr]; /* Variable length records */
+};
+
+ HEADER_GROUP_DESC = 17,
+
+ Description of counter groups ({...} in perf syntax)
+
+struct {
+ uint32_t nr;
+ struct {
+ struct perf_header_string string;
+ uint32_t leader_idx;
+ uint32_t nr_members;
+ } [nr]; /* Variable length records */
+};
+
+ HEADER_AUXTRACE = 18,
+
+Define additional auxtrace areas in the perf.data. auxtrace is used to store
+undecoded hardware tracing information, such as Intel Processor Trace data.
+
+/**
+ * struct auxtrace_index_entry - indexes a AUX area tracing event within a
+ * perf.data file.
+ * @file_offset: offset within the perf.data file
+ * @sz: size of the event
+ */
+struct auxtrace_index_entry {
+ u64 file_offset;
+ u64 sz;
+};
+
+#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
+
+/**
+ * struct auxtrace_index - index of AUX area tracing events within a perf.data
+ * file.
+ * @list: linking a number of arrays of entries
+ * @nr: number of entries
+ * @entries: array of entries
+ */
+struct auxtrace_index {
+ struct list_head list;
+ size_t nr;
+ struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
+};
+
+ HEADER_STAT = 19,
+
+This is merely a flag signifying that the data section contains data
+recorded from perf stat record.
+
+ HEADER_CACHE = 20,
+
+Description of the cache hierarchy. Based on the Linux sysfs format
+in /sys/devices/system/cpu/cpu*/cache/
+
+ u32 version Currently always 1
+ u32 number_of_cache_levels
+
+struct {
+ u32 level;
+ u32 line_size;
+ u32 sets;
+ u32 ways;
+ struct perf_header_string type;
+ struct perf_header_string size;
+ struct perf_header_string map;
+}[number_of_cache_levels];
+
+ HEADER_SAMPLE_TIME = 21,
+
+Two uint64_t for the time of first sample and the time of last sample.
+
+ other bits are reserved and should ignored for now
+ HEADER_FEAT_BITS = 256,
+
+Attributes
+
+This is an array of perf_event_attrs, each attr_size bytes long, which defines
+each event collected. See perf_event.h or the man page for a detailed
+description.
+
+Data
+
+This section is the bulk of the file. It consist of a stream of perf_events
+describing events. This matches the format generated by the kernel.
+See perf_event.h or the manpage for a detailed description.
+
+Some notes on parsing:
+
+Ordering
+
+The events are not necessarily in time stamp order, as they can be
+collected in parallel on different CPUs. If the events should be
+processed in time order they need to be sorted first. It is possible
+to only do a partial sort using the FINISHED_ROUND event header (see
+below). perf record guarantees that there is no reordering over a
+FINISHED_ROUND.
+
+ID vs IDENTIFIER
+
+When the event stream contains multiple events each event is identified
+by an ID. This can be either through the PERF_SAMPLE_ID or the
+PERF_SAMPLE_IDENTIFIER header. The PERF_SAMPLE_IDENTIFIER header is
+at a fixed offset from the event header, which allows reliable
+parsing of the header. Relying on ID may be ambiguous.
+IDENTIFIER is only supported by newer Linux kernels.
+
+Perf record specific events:
+
+In addition to the kernel generated event types perf record adds its
+own event types (in addition it also synthesizes some kernel events,
+for example MMAP events)
+
+ PERF_RECORD_USER_TYPE_START = 64,
+ PERF_RECORD_HEADER_ATTR = 64,
+
+struct attr_event {
+ struct perf_event_header header;
+ struct perf_event_attr attr;
+ uint64_t id[];
+};
+
+ PERF_RECORD_HEADER_EVENT_TYPE = 65, /* deprecated */
+
+#define MAX_EVENT_NAME 64
+
+struct perf_trace_event_type {
+ uint64_t event_id;
+ char name[MAX_EVENT_NAME];
+};
+
+struct event_type_event {
+ struct perf_event_header header;
+ struct perf_trace_event_type event_type;
+};
+
+
+ PERF_RECORD_HEADER_TRACING_DATA = 66,
+
+Describe me
+
+struct tracing_data_event {
+ struct perf_event_header header;
+ uint32_t size;
+};
+
+ PERF_RECORD_HEADER_BUILD_ID = 67,
+
+Define a ELF build ID for a referenced executable.
+
+ struct build_id_event; /* See above */
+
+ PERF_RECORD_FINISHED_ROUND = 68,
+
+No event reordering over this header. No payload.
+
+ PERF_RECORD_ID_INDEX = 69,
+
+Map event ids to CPUs and TIDs.
+
+struct id_index_entry {
+ uint64_t id;
+ uint64_t idx;
+ uint64_t cpu;
+ uint64_t tid;
+};
+
+struct id_index_event {
+ struct perf_event_header header;
+ uint64_t nr;
+ struct id_index_entry entries[nr];
+};
+
+ PERF_RECORD_AUXTRACE_INFO = 70,
+
+Auxtrace type specific information. Describe me
+
+struct auxtrace_info_event {
+ struct perf_event_header header;
+ uint32_t type;
+ uint32_t reserved__; /* For alignment */
+ uint64_t priv[];
+};
+
+ PERF_RECORD_AUXTRACE = 71,
+
+Defines auxtrace data. Followed by the actual data. The contents of
+the auxtrace data is dependent on the event and the CPU. For example
+for Intel Processor Trace it contains Processor Trace data generated
+by the CPU.
+
+struct auxtrace_event {
+ struct perf_event_header header;
+ uint64_t size;
+ uint64_t offset;
+ uint64_t reference;
+ uint32_t idx;
+ uint32_t tid;
+ uint32_t cpu;
+ uint32_t reserved__; /* For alignment */
+};
+
+struct aux_event {
+ struct perf_event_header header;
+ uint64_t aux_offset;
+ uint64_t aux_size;
+ uint64_t flags;
+};
+
+ PERF_RECORD_AUXTRACE_ERROR = 72,
+
+Describes an error in hardware tracing
+
+enum auxtrace_error_type {
+ PERF_AUXTRACE_ERROR_ITRACE = 1,
+ PERF_AUXTRACE_ERROR_MAX
+};
+
+#define MAX_AUXTRACE_ERROR_MSG 64
+
+struct auxtrace_error_event {
+ struct perf_event_header header;
+ uint32_t type;
+ uint32_t code;
+ uint32_t cpu;
+ uint32_t pid;
+ uint32_t tid;
+ uint32_t reserved__; /* For alignment */
+ uint64_t ip;
+ char msg[MAX_AUXTRACE_ERROR_MSG];
+};
+
+ PERF_RECORD_HEADER_FEATURE = 80,
+
+Describes a header feature. These are records used in pipe-mode that
+contain information that otherwise would be in perf.data file's header.
+
+Event types
+
+Define the event attributes with their IDs.
+
+An array bound by the perf_file_section size.
+
+ struct {
+ struct perf_event_attr attr; /* Size defined by header.attr_size */
+ struct perf_file_section ids;
+ }
+
+ids points to a array of uint64_t defining the ids for event attr attr.
+
+Pipe-mode data
+
+Pipe-mode avoid seeks in the file by removing the perf_file_section and flags
+from the struct perf_header. The trimmed header is:
+
+struct perf_pipe_file_header {
+ u64 magic;
+ u64 size;
+};
+
+The information about attrs, data, and event_types is instead in the
+synthesized events PERF_RECORD_ATTR, PERF_RECORD_HEADER_TRACING_DATA,
+PERF_RECORD_HEADER_EVENT_TYPE, and PERF_RECORD_HEADER_FEATURE
+that are generated by perf record in pipe-mode.
+
+
+References:
+
+include/uapi/linux/perf_event.h
+
+This is the canonical description of the kernel generated perf_events
+and the perf_event_attrs.
+
+perf_events manpage
+
+A manpage describing perf_event and perf_event_attr is here:
+http://web.eece.maine.edu/~vweaver/projects/perf_events/programming.html
+This tends to be slightly behind the kernel include, but has better
+descriptions. An (typically older) version of the man page may be
+included with the standard Linux man pages, available with "man
+perf_events"
+
+pmu-tools
+
+https://github.com/andikleen/pmu-tools/tree/master/parser
+
+A definition of the perf.data format in python "construct" format is available
+in pmu-tools parser. This allows to read perf.data from python and dump it.
+
+quipper
+
+The quipper C++ parser is available at
+http://github.com/google/perf_data_converter/tree/master/src/quipper
+
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
new file mode 100644
index 000000000..864e37597
--- /dev/null
+++ b/tools/perf/Documentation/perf.txt
@@ -0,0 +1,49 @@
+perf(1)
+=======
+
+NAME
+----
+perf - Performance analysis tools for Linux
+
+SYNOPSIS
+--------
+[verse]
+'perf' [--version] [--help] [OPTIONS] COMMAND [ARGS]
+
+OPTIONS
+-------
+--debug::
+ Setup debug variable (see list below) in value
+ range (0, 10). Use like:
+ --debug verbose # sets verbose = 1
+ --debug verbose=2 # sets verbose = 2
+
+ List of debug variables allowed to set:
+ verbose - general debug messages
+ ordered-events - ordered events object debug messages
+ data-convert - data convert command debug messages
+
+--buildid-dir::
+ Setup buildid cache directory. It has higher priority than
+ buildid.dir config file option.
+
+-v::
+--version::
+ Display perf version.
+
+-h::
+--help::
+ Run perf help command.
+
+DESCRIPTION
+-----------
+Performance counters for Linux are a new kernel-based subsystem
+that provide a framework for all things performance analysis. It
+covers hardware level (CPU/PMU, Performance Monitoring Unit) features
+and software features (software counters, tracepoints) as well.
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-top[1],
+linkperf:perf-record[1], linkperf:perf-report[1],
+linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perfconfig.example b/tools/perf/Documentation/perfconfig.example
new file mode 100644
index 000000000..2b477c1d1
--- /dev/null
+++ b/tools/perf/Documentation/perfconfig.example
@@ -0,0 +1,38 @@
+[colors]
+
+ # These were the old defaults
+ top = red, lightgray
+ medium = green, lightgray
+ normal = black, lightgray
+ selected = lightgray, magenta
+ jump_arrows = blue, lightgray
+ addr = magenta, lightgray
+
+[tui]
+
+ # Defaults if linked with libslang
+ report = on
+ annotate = on
+ top = on
+
+[buildid]
+
+ # Default, disable using /dev/null
+ dir = /root/.debug
+
+[annotate]
+
+ # Defaults
+ hide_src_code = false
+ use_offset = true
+ jump_arrows = true
+ show_nr_jumps = false
+
+[report]
+
+ # Defaults
+ sort-order = comm,dso,symbol
+ percent-limit = 0
+ queue-size = 0
+ children = true
+ group = true
diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt
new file mode 100644
index 000000000..849599f39
--- /dev/null
+++ b/tools/perf/Documentation/tips.txt
@@ -0,0 +1,36 @@
+For a higher level overview, try: perf report --sort comm,dso
+Sample related events with: perf record -e '{cycles,instructions}:S'
+Compare performance results with: perf diff [<old file> <new file>]
+Boolean options have negative forms, e.g.: perf report --no-children
+Customize output of perf script with: perf script -F event,ip,sym
+Generate a script for your data: perf script -g <lang>
+Save output of perf stat using: perf stat record <target workload>
+Create an archive with symtabs to analyse on other machine: perf archive
+Search options using a keyword: perf report -h <keyword>
+Use parent filter to see specific call path: perf report -p <regex>
+List events using substring match: perf list <keyword>
+To see list of saved events and attributes: perf evlist -v
+Use --symfs <dir> if your symbol files are in non-standard locations
+To see callchains in a more compact form: perf report -g folded
+Show individual samples with: perf script
+Limit to show entries above 5% only: perf report --percent-limit 5
+Profiling branch (mis)predictions with: perf record -b / perf report
+Treat branches as callchains: perf report --branch-history
+To count events in every 1000 msec: perf stat -I 1000
+Print event counts in CSV format with: perf stat -x,
+If you have debuginfo enabled, try: perf report -s sym,srcline
+For memory address profiling, try: perf mem record / perf mem report
+For tracepoint events, try: perf report -s trace_fields
+To record callchains for each sample: perf record -g
+To record every process run by a user: perf record -u <user>
+Skip collecting build-id when recording: perf record -B
+To change sampling frequency to 100 Hz: perf record -F 100
+See assembly instructions with percentage: perf annotate <symbol>
+If you prefer Intel style assembly, try: perf annotate -M intel
+For hierarchical output, try: perf report --hierarchy
+Order by the overhead of source file name and line number: perf report -s srcline
+System-wide collection from all CPUs: perf record -a
+Show current config key-value pairs: perf config --list
+Show user configuration overrides: perf config --user --list
+To add Node.js USDT(User-Level Statically Defined Tracing): perf buildid-cache --add `which node`
+To report cacheline events from previous recording: perf c2c report
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
new file mode 100644
index 000000000..627b7cada
--- /dev/null
+++ b/tools/perf/MANIFEST
@@ -0,0 +1,18 @@
+tools/perf
+tools/arch
+tools/scripts
+tools/build
+tools/include
+tools/lib/traceevent
+tools/lib/api
+tools/lib/bpf
+tools/lib/subcmd
+tools/lib/hweight.c
+tools/lib/rbtree.c
+tools/lib/string.c
+tools/lib/symbol/kallsyms.c
+tools/lib/symbol/kallsyms.h
+tools/lib/find_bit.c
+tools/lib/bitmap.c
+tools/lib/str_error_r.c
+tools/lib/vsprintf.c
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
new file mode 100644
index 000000000..b8fc7d972
--- /dev/null
+++ b/tools/perf/Makefile
@@ -0,0 +1,112 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# This is a simple wrapper Makefile that calls the main Makefile.perf
+# with a -j option to do parallel builds
+#
+# If you want to invoke the perf build in some non-standard way then
+# you can use the 'make -f Makefile.perf' method to invoke it.
+#
+
+#
+# Clear out the built-in rules GNU make defines by default (such as .o targets),
+# so that we pass through all targets to Makefile.perf:
+#
+.SUFFIXES:
+
+#
+# We don't want to pass along options like -j:
+#
+unexport MAKEFLAGS
+
+#
+# Do a parallel build with multiple jobs, based on the number of CPUs online
+# in this system: 'make -j8' on a 8-CPU system, etc.
+#
+# (To override it, run 'make JOBS=1' and similar.)
+#
+ifeq ($(JOBS),)
+ JOBS := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+ ifeq ($(JOBS),0)
+ JOBS := 1
+ endif
+endif
+
+#
+# Only pass canonical directory names as the output directory:
+#
+ifneq ($(O),)
+ FULL_O := $(shell cd $(PWD); readlink -f $(O) || echo $(O))
+endif
+
+#
+# Only accept the 'DEBUG' variable from the command line:
+#
+ifeq ("$(origin DEBUG)", "command line")
+ ifeq ($(DEBUG),)
+ override DEBUG = 0
+ else
+ SET_DEBUG = "DEBUG=$(DEBUG)"
+ endif
+else
+ override DEBUG = 0
+endif
+
+define print_msg
+ @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n'
+endef
+
+define make
+ @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) O=$(FULL_O) $(SET_DEBUG) $@
+endef
+
+#
+# Needed if no target specified:
+# (Except for tags and TAGS targets. The reason is that the
+# Makefile does not treat tags/TAGS as targets but as files
+# and thus won't rebuilt them once they are in place.)
+#
+all tags TAGS:
+ $(print_msg)
+ $(make)
+
+ifdef MAKECMDGOALS
+has_clean := 0
+ifneq ($(filter clean,$(MAKECMDGOALS)),)
+ has_clean := 1
+endif # clean
+
+ifeq ($(has_clean),1)
+ rest := $(filter-out clean,$(MAKECMDGOALS))
+ ifneq ($(rest),)
+$(rest): clean
+ endif # rest
+endif # has_clean
+endif # MAKECMDGOALS
+
+#
+# Explicitly disable parallelism for the clean target.
+#
+clean:
+ $(make) -j1
+
+#
+# The build-test target is not really parallel, don't print the jobs info,
+# it also uses only the tests/make targets that don't pollute the source
+# repository, i.e. that uses O= or builds the tarpkg outside the source
+# repo directories.
+#
+# For a full test, use:
+#
+# make -C tools/perf -f tests/make
+#
+build-test:
+ @$(MAKE) SHUF=1 -f tests/make REUSE_FEATURES_DUMP=1 MK=Makefile SET_PARALLEL=1 --no-print-directory tarpkg out
+
+#
+# All other targets get passed through:
+#
+%: FORCE
+ $(print_msg)
+ $(make)
+
+.PHONY: tags TAGS FORCE Makefile
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
new file mode 100644
index 000000000..878e49849
--- /dev/null
+++ b/tools/perf/Makefile.config
@@ -0,0 +1,1048 @@
+
+ifeq ($(src-perf),)
+src-perf := $(srctree)/tools/perf
+endif
+
+ifeq ($(obj-perf),)
+obj-perf := $(OUTPUT)
+endif
+
+ifneq ($(obj-perf),)
+obj-perf := $(abspath $(obj-perf))/
+endif
+
+$(shell printf "" > $(OUTPUT).config-detected)
+detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected)
+detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected)
+
+CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
+
+include $(srctree)/tools/scripts/Makefile.arch
+
+$(call detected_var,SRCARCH)
+
+NO_PERF_REGS := 1
+NO_SYSCALL_TABLE := 1
+
+# Additional ARCH settings for ppc
+ifeq ($(SRCARCH),powerpc)
+ NO_PERF_REGS := 0
+ NO_SYSCALL_TABLE := 0
+ CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated
+ LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
+endif
+
+# Additional ARCH settings for x86
+ifeq ($(SRCARCH),x86)
+ $(call detected,CONFIG_X86)
+ ifeq (${IS_64_BIT}, 1)
+ NO_SYSCALL_TABLE := 0
+ CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -I$(OUTPUT)arch/x86/include/generated
+ ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
+ LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma
+ $(call detected,CONFIG_X86_64)
+ else
+ LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
+ endif
+ NO_PERF_REGS := 0
+endif
+
+ifeq ($(SRCARCH),arm)
+ NO_PERF_REGS := 0
+ LIBUNWIND_LIBS = -lunwind -lunwind-arm
+endif
+
+ifeq ($(SRCARCH),arm64)
+ NO_PERF_REGS := 0
+ NO_SYSCALL_TABLE := 0
+ CFLAGS += -I$(OUTPUT)arch/arm64/include/generated
+ LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
+endif
+
+ifeq ($(ARCH),s390)
+ NO_PERF_REGS := 0
+ NO_SYSCALL_TABLE := 0
+ CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated
+endif
+
+ifeq ($(NO_PERF_REGS),0)
+ $(call detected,CONFIG_PERF_REGS)
+endif
+
+ifneq ($(NO_SYSCALL_TABLE),1)
+ CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT
+endif
+
+# So far there's only x86 and arm libdw unwind support merged in perf.
+# Disable it on all other architectures in case libdw unwind
+# support is detected in system. Add supported architectures
+# to the check.
+ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390))
+ NO_LIBDW_DWARF_UNWIND := 1
+endif
+
+ifeq ($(LIBUNWIND_LIBS),)
+ NO_LIBUNWIND := 1
+endif
+#
+# For linking with debug library, run like:
+#
+# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
+#
+
+libunwind_arch_set_flags = $(eval $(libunwind_arch_set_flags_code))
+define libunwind_arch_set_flags_code
+ FEATURE_CHECK_CFLAGS-libunwind-$(1) = -I$(LIBUNWIND_DIR)/include
+ FEATURE_CHECK_LDFLAGS-libunwind-$(1) = -L$(LIBUNWIND_DIR)/lib
+endef
+
+ifdef LIBUNWIND_DIR
+ LIBUNWIND_CFLAGS = -I$(LIBUNWIND_DIR)/include
+ LIBUNWIND_LDFLAGS = -L$(LIBUNWIND_DIR)/lib
+ LIBUNWIND_ARCHS = x86 x86_64 arm aarch64 debug-frame-arm debug-frame-aarch64
+ $(foreach libunwind_arch,$(LIBUNWIND_ARCHS),$(call libunwind_arch_set_flags,$(libunwind_arch)))
+endif
+
+# Set per-feature check compilation flags
+FEATURE_CHECK_CFLAGS-libunwind = $(LIBUNWIND_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
+FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS)
+
+ifdef CSINCLUDES
+ LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
+endif
+OPENCSDLIBS := -lopencsd_c_api -lopencsd
+ifdef CSLIBS
+ LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
+endif
+FEATURE_CHECK_CFLAGS-libopencsd := $(LIBOPENCSD_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libopencsd := $(LIBOPENCSD_LDFLAGS) $(OPENCSDLIBS)
+
+ifeq ($(NO_PERF_REGS),0)
+ CFLAGS += -DHAVE_PERF_REGS_SUPPORT
+endif
+
+# for linking with debug library, run like:
+# make DEBUG=1 LIBDW_DIR=/opt/libdw/
+ifdef LIBDW_DIR
+ LIBDW_CFLAGS := -I$(LIBDW_DIR)/include
+ LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
+endif
+DWARFLIBS := -ldw
+ifeq ($(findstring -static,${LDFLAGS}),-static)
+ DWARFLIBS += -lelf -lebl -ldl -lz -llzma -lbz2
+endif
+FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS)
+
+# for linking with debug library, run like:
+# make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/
+ifdef LIBBABELTRACE_DIR
+ LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include
+ LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib
+endif
+FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS)
+FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf
+
+FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi -I$(srctree)/tools/include/uapi
+# include ARCH specific config
+-include $(src-perf)/arch/$(SRCARCH)/Makefile
+
+ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+ CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+endif
+
+include $(srctree)/tools/scripts/utilities.mak
+
+ifeq ($(call get-executable,$(FLEX)),)
+ dummy := $(error Error: $(FLEX) is missing on this system, please install it)
+endif
+
+ifeq ($(call get-executable,$(BISON)),)
+ dummy := $(error Error: $(BISON) is missing on this system, please install it)
+endif
+
+# Treat warnings as errors unless directed not to
+ifneq ($(WERROR),0)
+ CFLAGS += -Werror
+ CXXFLAGS += -Werror
+endif
+
+ifndef DEBUG
+ DEBUG := 0
+endif
+
+ifeq ($(DEBUG),0)
+ifeq ($(CC_NO_CLANG), 0)
+ CFLAGS += -O3
+else
+ CFLAGS += -O6
+endif
+endif
+
+ifdef PARSER_DEBUG
+ PARSER_DEBUG_BISON := -t
+ PARSER_DEBUG_FLEX := -d
+ CFLAGS += -DPARSER_DEBUG
+ $(call detected_var,PARSER_DEBUG_BISON)
+ $(call detected_var,PARSER_DEBUG_FLEX)
+endif
+
+# Try different combinations to accommodate systems that only have
+# python[2][-config] in weird combinations but always preferring
+# python2 and python2-config as per pep-0394. If we catch a
+# python[-config] in version 3, the version check will kill it.
+PYTHON2 := $(if $(call get-executable,python2),python2,python)
+override PYTHON := $(call get-executable-or-default,PYTHON,$(PYTHON2))
+PYTHON2_CONFIG := \
+ $(if $(call get-executable,$(PYTHON)-config),$(PYTHON)-config,python-config)
+override PYTHON_CONFIG := \
+ $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON2_CONFIG))
+
+grep-libs = $(filter -l%,$(1))
+strip-libs = $(filter-out -l%,$(1))
+
+PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
+
+# Python 3.8 changed the output of `python-config --ldflags` to not include the
+# '-lpythonX.Y' flag unless '--embed' is also passed. The feature check for
+# libpython fails if that flag is not included in LDFLAGS
+ifeq ($(shell $(PYTHON_CONFIG_SQ) --ldflags --embed 2>&1 1>/dev/null; echo $$?), 0)
+ PYTHON_CONFIG_LDFLAGS := --ldflags --embed
+else
+ PYTHON_CONFIG_LDFLAGS := --ldflags
+endif
+
+ifdef PYTHON_CONFIG
+ PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null)
+ PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
+ PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
+ PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
+ FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
+endif
+
+FEATURE_CHECK_CFLAGS-libpython := $(PYTHON_EMBED_CCOPTS)
+FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
+
+CFLAGS += -fno-omit-frame-pointer
+CFLAGS += -ggdb3
+CFLAGS += -funwind-tables
+CFLAGS += -Wall
+CFLAGS += -Wextra
+CFLAGS += -std=gnu99
+
+CXXFLAGS += -std=gnu++11 -fno-exceptions -fno-rtti
+CXXFLAGS += -Wall
+CXXFLAGS += -fno-omit-frame-pointer
+CXXFLAGS += -ggdb3
+CXXFLAGS += -funwind-tables
+CXXFLAGS += -Wno-strict-aliasing
+
+# Enforce a non-executable stack, as we may regress (again) in the future by
+# adding assembler files missing the .GNU-stack linker note.
+LDFLAGS += -Wl,-z,noexecstack
+
+EXTLIBS = -lpthread -lrt -lm -ldl
+
+ifeq ($(FEATURES_DUMP),)
+include $(srctree)/tools/build/Makefile.feature
+else
+include $(FEATURES_DUMP)
+endif
+
+ifeq ($(feature-stackprotector-all), 1)
+ CFLAGS += -fstack-protector-all
+endif
+
+ifeq ($(DEBUG),0)
+ ifeq ($(feature-fortify-source), 1)
+ CFLAGS += -D_FORTIFY_SOURCE=2
+ endif
+endif
+
+INC_FLAGS += -I$(src-perf)/util/include
+INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
+INC_FLAGS += -I$(srctree)/tools/include/uapi
+INC_FLAGS += -I$(srctree)/tools/include/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/uapi
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/include/
+INC_FLAGS += -I$(srctree)/tools/arch/$(SRCARCH)/
+
+# $(obj-perf) for generated common-cmds.h
+# $(obj-perf)/util for generated bison/flex headers
+ifneq ($(OUTPUT),)
+INC_FLAGS += -I$(obj-perf)/util
+INC_FLAGS += -I$(obj-perf)
+endif
+
+INC_FLAGS += -I$(src-perf)/util
+INC_FLAGS += -I$(src-perf)
+INC_FLAGS += -I$(srctree)/tools/lib/
+
+CFLAGS += $(INC_FLAGS)
+CXXFLAGS += $(INC_FLAGS)
+
+CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
+
+ifeq ($(feature-sync-compare-and-swap), 1)
+ CFLAGS += -DHAVE_SYNC_COMPARE_AND_SWAP_SUPPORT
+endif
+
+ifeq ($(feature-pthread-attr-setaffinity-np), 1)
+ CFLAGS += -DHAVE_PTHREAD_ATTR_SETAFFINITY_NP
+endif
+
+ifeq ($(feature-pthread-barrier), 1)
+ CFLAGS += -DHAVE_PTHREAD_BARRIER
+endif
+
+ifndef NO_BIONIC
+ $(call feature_check,bionic)
+ ifeq ($(feature-bionic), 1)
+ BIONIC := 1
+ CFLAGS += -DLACKS_SIGQUEUE_PROTOTYPE
+ CFLAGS += -DLACKS_OPEN_MEMSTREAM_PROTOTYPE
+ EXTLIBS := $(filter-out -lrt,$(EXTLIBS))
+ EXTLIBS := $(filter-out -lpthread,$(EXTLIBS))
+ endif
+endif
+
+ifeq ($(feature-eventfd), 1)
+ CFLAGS += -DHAVE_EVENTFD
+endif
+
+ifeq ($(feature-get_current_dir_name), 1)
+ CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME
+endif
+
+ifeq ($(feature-gettid), 1)
+ CFLAGS += -DHAVE_GETTID
+endif
+
+ifdef NO_LIBELF
+ NO_DWARF := 1
+ NO_DEMANGLE := 1
+ NO_LIBUNWIND := 1
+ NO_LIBDW_DWARF_UNWIND := 1
+ NO_LIBBPF := 1
+ NO_JVMTI := 1
+else
+ ifeq ($(feature-libelf), 0)
+ ifeq ($(feature-glibc), 1)
+ LIBC_SUPPORT := 1
+ endif
+ ifeq ($(BIONIC),1)
+ LIBC_SUPPORT := 1
+ endif
+ ifeq ($(LIBC_SUPPORT),1)
+ msg := $(warning No libelf found. Disables 'probe' tool, jvmti and BPF support in 'perf record'. Please install libelf-dev, libelf-devel or elfutils-libelf-devel);
+
+ NO_LIBELF := 1
+ NO_DWARF := 1
+ NO_DEMANGLE := 1
+ NO_LIBUNWIND := 1
+ NO_LIBDW_DWARF_UNWIND := 1
+ NO_LIBBPF := 1
+ NO_JVMTI := 1
+ else
+ ifneq ($(filter s% -static%,$(LDFLAGS),),)
+ msg := $(error No static glibc found, please install glibc-static);
+ else
+ msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]);
+ endif
+ endif
+ else
+ ifndef NO_LIBDW_DWARF_UNWIND
+ ifneq ($(feature-libdw-dwarf-unwind),1)
+ NO_LIBDW_DWARF_UNWIND := 1
+ msg := $(warning No libdw DWARF unwind found, Please install elfutils-devel/libdw-dev >= 0.158 and/or set LIBDW_DIR);
+ endif
+ endif
+ ifneq ($(feature-dwarf), 1)
+ ifndef NO_DWARF
+ msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
+ NO_DWARF := 1
+ endif
+ else
+ ifneq ($(feature-dwarf_getlocations), 1)
+ msg := $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157);
+ else
+ CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT
+ endif # dwarf_getlocations
+ endif # Dwarf support
+ endif # libelf support
+endif # NO_LIBELF
+
+ifeq ($(feature-glibc), 1)
+ CFLAGS += -DHAVE_GLIBC_SUPPORT
+endif
+
+ifdef NO_DWARF
+ NO_LIBDW_DWARF_UNWIND := 1
+endif
+
+ifeq ($(feature-sched_getcpu), 1)
+ CFLAGS += -DHAVE_SCHED_GETCPU_SUPPORT
+endif
+
+ifeq ($(feature-setns), 1)
+ CFLAGS += -DHAVE_SETNS_SUPPORT
+ $(call detected,CONFIG_SETNS)
+endif
+
+ifndef NO_CORESIGHT
+ ifeq ($(feature-libopencsd), 1)
+ CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS)
+ LDFLAGS += $(LIBOPENCSD_LDFLAGS)
+ EXTLIBS += $(OPENCSDLIBS)
+ $(call detected,CONFIG_LIBOPENCSD)
+ ifdef CSTRACE_RAW
+ CFLAGS += -DCS_DEBUG_RAW
+ ifeq (${CSTRACE_RAW}, packed)
+ CFLAGS += -DCS_RAW_PACKED
+ endif
+ endif
+ endif
+endif
+
+ifndef NO_LIBELF
+ CFLAGS += -DHAVE_LIBELF_SUPPORT
+ EXTLIBS += -lelf
+ $(call detected,CONFIG_LIBELF)
+
+ ifeq ($(feature-libelf-mmap), 1)
+ CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT
+ endif
+
+ ifeq ($(feature-libelf-getphdrnum), 1)
+ CFLAGS += -DHAVE_ELF_GETPHDRNUM_SUPPORT
+ endif
+
+ ifeq ($(feature-libelf-gelf_getnote), 1)
+ CFLAGS += -DHAVE_GELF_GETNOTE_SUPPORT
+ else
+ msg := $(warning gelf_getnote() not found on libelf, SDT support disabled);
+ endif
+
+ ifeq ($(feature-libelf-getshdrstrndx), 1)
+ CFLAGS += -DHAVE_ELF_GETSHDRSTRNDX_SUPPORT
+ endif
+
+ ifndef NO_DWARF
+ ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
+ msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
+ NO_DWARF := 1
+ else
+ CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS)
+ LDFLAGS += $(LIBDW_LDFLAGS)
+ EXTLIBS += ${DWARFLIBS}
+ $(call detected,CONFIG_DWARF)
+ endif # PERF_HAVE_DWARF_REGS
+ endif # NO_DWARF
+
+ ifndef NO_LIBBPF
+ ifeq ($(feature-bpf), 1)
+ CFLAGS += -DHAVE_LIBBPF_SUPPORT
+ $(call detected,CONFIG_LIBBPF)
+ endif
+
+ ifndef NO_DWARF
+ ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+ CFLAGS += -DHAVE_BPF_PROLOGUE
+ $(call detected,CONFIG_BPF_PROLOGUE)
+ else
+ msg := $(warning BPF prologue is not supported by architecture $(SRCARCH), missing regs_query_register_offset());
+ endif
+ else
+ msg := $(warning DWARF support is off, BPF prologue is disabled);
+ endif
+
+ endif # NO_LIBBPF
+endif # NO_LIBELF
+
+ifndef NO_SDT
+ ifneq ($(feature-sdt), 1)
+ msg := $(warning No sys/sdt.h found, no SDT events are defined, please install systemtap-sdt-devel or systemtap-sdt-dev);
+ NO_SDT := 1;
+ else
+ CFLAGS += -DHAVE_SDT_EVENT
+ $(call detected,CONFIG_SDT_EVENT)
+ endif
+endif
+
+ifdef PERF_HAVE_JITDUMP
+ ifndef NO_LIBELF
+ $(call detected,CONFIG_JITDUMP)
+ CFLAGS += -DHAVE_JITDUMP
+ endif
+endif
+
+ifeq ($(SRCARCH),powerpc)
+ ifndef NO_DWARF
+ CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX
+ endif
+endif
+
+ifndef NO_LIBUNWIND
+ have_libunwind :=
+
+ ifeq ($(feature-libunwind-x86), 1)
+ $(call detected,CONFIG_LIBUNWIND_X86)
+ CFLAGS += -DHAVE_LIBUNWIND_X86_SUPPORT
+ LDFLAGS += -lunwind-x86
+ EXTLIBS_LIBUNWIND += -lunwind-x86
+ have_libunwind = 1
+ endif
+
+ ifeq ($(feature-libunwind-aarch64), 1)
+ $(call detected,CONFIG_LIBUNWIND_AARCH64)
+ CFLAGS += -DHAVE_LIBUNWIND_AARCH64_SUPPORT
+ LDFLAGS += -lunwind-aarch64
+ EXTLIBS_LIBUNWIND += -lunwind-aarch64
+ have_libunwind = 1
+ $(call feature_check,libunwind-debug-frame-aarch64)
+ ifneq ($(feature-libunwind-debug-frame-aarch64), 1)
+ msg := $(warning No debug_frame support found in libunwind-aarch64);
+ CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME_AARCH64
+ endif
+ endif
+
+ ifneq ($(feature-libunwind), 1)
+ msg := $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR);
+ NO_LOCAL_LIBUNWIND := 1
+ else
+ have_libunwind := 1
+ $(call detected,CONFIG_LOCAL_LIBUNWIND)
+ endif
+
+ ifneq ($(have_libunwind), 1)
+ NO_LIBUNWIND := 1
+ endif
+else
+ NO_LOCAL_LIBUNWIND := 1
+endif
+
+ifndef NO_LIBBPF
+ ifneq ($(feature-bpf), 1)
+ msg := $(warning BPF API too old. Please install recent kernel headers. BPF support in 'perf record' is disabled.)
+ NO_LIBBPF := 1
+ endif
+endif
+
+dwarf-post-unwind := 1
+dwarf-post-unwind-text := BUG
+
+# setup DWARF post unwinder
+ifdef NO_LIBUNWIND
+ ifdef NO_LIBDW_DWARF_UNWIND
+ msg := $(warning Disabling post unwind, no support found.);
+ dwarf-post-unwind := 0
+ else
+ dwarf-post-unwind-text := libdw
+ $(call detected,CONFIG_LIBDW_DWARF_UNWIND)
+ endif
+else
+ dwarf-post-unwind-text := libunwind
+ $(call detected,CONFIG_LIBUNWIND)
+ # Enable libunwind support by default.
+ ifndef NO_LIBDW_DWARF_UNWIND
+ NO_LIBDW_DWARF_UNWIND := 1
+ endif
+endif
+
+ifeq ($(dwarf-post-unwind),1)
+ CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT
+ $(call detected,CONFIG_DWARF_UNWIND)
+else
+ NO_DWARF_UNWIND := 1
+endif
+
+ifndef NO_LOCAL_LIBUNWIND
+ ifeq ($(SRCARCH),$(filter $(SRCARCH),arm arm64))
+ $(call feature_check,libunwind-debug-frame)
+ ifneq ($(feature-libunwind-debug-frame), 1)
+ msg := $(warning No debug_frame support found in libunwind);
+ CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
+ endif
+ else
+ # non-ARM has no dwarf_find_debug_frame() function:
+ CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
+ endif
+ EXTLIBS += $(LIBUNWIND_LIBS)
+ LDFLAGS += $(LIBUNWIND_LIBS)
+endif
+ifeq ($(findstring -static,${LDFLAGS}),-static)
+ # gcc -static links libgcc_eh which contans piece of libunwind
+ LIBUNWIND_LDFLAGS += -Wl,--allow-multiple-definition
+endif
+
+ifndef NO_LIBUNWIND
+ CFLAGS += -DHAVE_LIBUNWIND_SUPPORT
+ CFLAGS += $(LIBUNWIND_CFLAGS)
+ LDFLAGS += $(LIBUNWIND_LDFLAGS)
+ EXTLIBS += $(EXTLIBS_LIBUNWIND)
+endif
+
+ifeq ($(NO_SYSCALL_TABLE),0)
+ $(call detected,CONFIG_TRACE)
+else
+ ifndef NO_LIBAUDIT
+ ifneq ($(feature-libaudit), 1)
+ msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
+ NO_LIBAUDIT := 1
+ else
+ CFLAGS += -DHAVE_LIBAUDIT_SUPPORT
+ EXTLIBS += -laudit
+ $(call detected,CONFIG_TRACE)
+ endif
+ endif
+endif
+
+ifndef NO_LIBCRYPTO
+ ifneq ($(feature-libcrypto), 1)
+ msg := $(warning No libcrypto.h found, disables jitted code injection, please install libssl-devel or libssl-dev);
+ NO_LIBCRYPTO := 1
+ else
+ CFLAGS += -DHAVE_LIBCRYPTO_SUPPORT
+ EXTLIBS += -lcrypto
+ $(call detected,CONFIG_CRYPTO)
+ endif
+endif
+
+ifdef NO_NEWT
+ NO_SLANG=1
+endif
+
+ifndef NO_SLANG
+ ifneq ($(feature-libslang), 1)
+ msg := $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev);
+ NO_SLANG := 1
+ else
+ # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
+ CFLAGS += -I/usr/include/slang
+ CFLAGS += -DHAVE_SLANG_SUPPORT
+ EXTLIBS += -lslang
+ $(call detected,CONFIG_SLANG)
+ endif
+endif
+
+ifndef NO_GTK2
+ FLAGS_GTK2=$(CFLAGS) $(LDFLAGS) $(EXTLIBS) $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null)
+ ifneq ($(feature-gtk2), 1)
+ msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev);
+ NO_GTK2 := 1
+ else
+ ifeq ($(feature-gtk2-infobar), 1)
+ GTK_CFLAGS := -DHAVE_GTK_INFO_BAR_SUPPORT
+ endif
+ CFLAGS += -DHAVE_GTK2_SUPPORT
+ GTK_CFLAGS += $(shell $(PKG_CONFIG) --cflags gtk+-2.0 2>/dev/null)
+ GTK_LIBS := $(shell $(PKG_CONFIG) --libs gtk+-2.0 2>/dev/null)
+ EXTLIBS += -ldl
+ endif
+endif
+
+ifdef NO_LIBPERL
+ CFLAGS += -DNO_LIBPERL
+else
+ PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
+ PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
+ PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
+ PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
+ PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+ PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
+ FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
+
+ ifneq ($(feature-libperl), 1)
+ CFLAGS += -DNO_LIBPERL
+ NO_LIBPERL := 1
+ msg := $(warning Missing perl devel files. Disabling perl scripting support, please install perl-ExtUtils-Embed/libperl-dev);
+ else
+ LDFLAGS += $(PERL_EMBED_LDFLAGS)
+ EXTLIBS += $(PERL_EMBED_LIBADD)
+ CFLAGS += -DHAVE_LIBPERL_SUPPORT
+ ifeq ($(CC_NO_CLANG), 0)
+ CFLAGS += -Wno-compound-token-split-by-macro
+ endif
+ $(call detected,CONFIG_LIBPERL)
+ endif
+endif
+
+ifeq ($(feature-timerfd), 1)
+ CFLAGS += -DHAVE_TIMERFD_SUPPORT
+else
+ msg := $(warning No timerfd support. Disables 'perf kvm stat live');
+endif
+
+disable-python = $(eval $(disable-python_code))
+define disable-python_code
+ CFLAGS += -DNO_LIBPYTHON
+ $(warning $1)
+ NO_LIBPYTHON := 1
+endef
+
+ifdef NO_LIBPYTHON
+ $(call disable-python,Python support disabled by user)
+else
+
+ ifndef PYTHON
+ $(call disable-python,No python interpreter was found: disables Python support - please install python-devel/python-dev)
+ else
+ PYTHON_WORD := $(call shell-wordify,$(PYTHON))
+
+ ifndef PYTHON_CONFIG
+ $(call disable-python,No 'python-config' tool was found: disables Python support - please install python-devel/python-dev)
+ else
+
+ ifneq ($(feature-libpython), 1)
+ $(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev)
+ else
+ LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
+ EXTLIBS += $(PYTHON_EMBED_LIBADD)
+ LANG_BINDINGS += $(obj-perf)python/perf.so
+ CFLAGS += -DHAVE_LIBPYTHON_SUPPORT
+ $(call detected,CONFIG_LIBPYTHON)
+ endif
+ endif
+ endif
+endif
+
+ifeq ($(feature-libbfd), 1)
+ EXTLIBS += -lbfd
+else
+ # we are on a system that requires -liberty and (maybe) -lz
+ # to link against -lbfd; test each case individually here
+
+ # call all detections now so we get correct
+ # status in VF output
+ $(call feature_check,libbfd-liberty)
+ $(call feature_check,libbfd-liberty-z)
+
+ ifeq ($(feature-libbfd-liberty), 1)
+ EXTLIBS += -lbfd -liberty
+ else
+ ifeq ($(feature-libbfd-liberty-z), 1)
+ EXTLIBS += -lbfd -liberty -lz
+ endif
+ endif
+endif
+
+ifdef NO_DEMANGLE
+ CFLAGS += -DNO_DEMANGLE
+else
+ ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
+ EXTLIBS += -liberty
+ else
+ ifeq ($(filter -liberty,$(EXTLIBS)),)
+ $(call feature_check,cplus-demangle)
+
+ # we dont have neither HAVE_CPLUS_DEMANGLE_SUPPORT
+ # or any of 'bfd iberty z' trinity
+ ifeq ($(feature-cplus-demangle), 1)
+ EXTLIBS += -liberty
+ else
+ msg := $(warning No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling)
+ CFLAGS += -DNO_DEMANGLE
+ endif
+ endif
+ endif
+
+ ifneq ($(filter -liberty,$(EXTLIBS)),)
+ CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
+ endif
+endif
+
+ifneq ($(filter -lbfd,$(EXTLIBS)),)
+ CFLAGS += -DHAVE_LIBBFD_SUPPORT
+endif
+
+ifndef NO_ZLIB
+ ifeq ($(feature-zlib), 1)
+ CFLAGS += -DHAVE_ZLIB_SUPPORT
+ EXTLIBS += -lz
+ $(call detected,CONFIG_ZLIB)
+ else
+ NO_ZLIB := 1
+ endif
+endif
+
+ifndef NO_LZMA
+ ifeq ($(feature-lzma), 1)
+ CFLAGS += -DHAVE_LZMA_SUPPORT
+ EXTLIBS += -llzma
+ $(call detected,CONFIG_LZMA)
+ else
+ msg := $(warning No liblzma found, disables xz kernel module decompression, please install xz-devel/liblzma-dev);
+ NO_LZMA := 1
+ endif
+endif
+
+ifndef NO_BACKTRACE
+ ifeq ($(feature-backtrace), 1)
+ CFLAGS += -DHAVE_BACKTRACE_SUPPORT
+ endif
+endif
+
+ifndef NO_LIBNUMA
+ ifeq ($(feature-libnuma), 0)
+ msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev);
+ NO_LIBNUMA := 1
+ else
+ ifeq ($(feature-numa_num_possible_cpus), 0)
+ msg := $(warning Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8);
+ NO_LIBNUMA := 1
+ else
+ CFLAGS += -DHAVE_LIBNUMA_SUPPORT
+ EXTLIBS += -lnuma
+ $(call detected,CONFIG_NUMA)
+ endif
+ endif
+endif
+
+ifdef HAVE_KVM_STAT_SUPPORT
+ CFLAGS += -DHAVE_KVM_STAT_SUPPORT
+endif
+
+ifeq (${IS_64_BIT}, 1)
+ ifndef NO_PERF_READ_VDSO32
+ $(call feature_check,compile-32)
+ ifeq ($(feature-compile-32), 1)
+ CFLAGS += -DHAVE_PERF_READ_VDSO32
+ else
+ NO_PERF_READ_VDSO32 := 1
+ endif
+ endif
+ ifneq ($(SRCARCH), x86)
+ NO_PERF_READ_VDSOX32 := 1
+ endif
+ ifndef NO_PERF_READ_VDSOX32
+ $(call feature_check,compile-x32)
+ ifeq ($(feature-compile-x32), 1)
+ CFLAGS += -DHAVE_PERF_READ_VDSOX32
+ else
+ NO_PERF_READ_VDSOX32 := 1
+ endif
+ endif
+else
+ NO_PERF_READ_VDSO32 := 1
+ NO_PERF_READ_VDSOX32 := 1
+endif
+
+ifndef NO_LIBBABELTRACE
+ $(call feature_check,libbabeltrace)
+ ifeq ($(feature-libbabeltrace), 1)
+ CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS)
+ LDFLAGS += $(LIBBABELTRACE_LDFLAGS)
+ EXTLIBS += -lbabeltrace-ctf
+ $(call detected,CONFIG_LIBBABELTRACE)
+ else
+ msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev);
+ endif
+endif
+
+ifndef NO_AUXTRACE
+ ifeq ($(SRCARCH),x86)
+ ifeq ($(feature-get_cpuid), 0)
+ msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
+ NO_AUXTRACE := 1
+ endif
+ endif
+ ifndef NO_AUXTRACE
+ $(call detected,CONFIG_AUXTRACE)
+ CFLAGS += -DHAVE_AUXTRACE_SUPPORT
+ endif
+endif
+
+ifndef NO_JVMTI
+ ifneq (,$(wildcard /usr/sbin/update-java-alternatives))
+ JDIR=$(shell /usr/sbin/update-java-alternatives -l | head -1 | awk '{print $$3}')
+ else
+ ifneq (,$(wildcard /usr/sbin/alternatives))
+ JDIR=$(shell /usr/sbin/alternatives --display java | tail -1 | cut -d' ' -f 5 | sed -e 's%/jre/bin/java.%%g' -e 's%/bin/java.%%g')
+ endif
+ endif
+ ifndef JDIR
+ $(warning No alternatives command found, you need to set JDIR= to point to the root of your Java directory)
+ NO_JVMTI := 1
+ endif
+endif
+
+ifndef NO_JVMTI
+ FEATURE_CHECK_CFLAGS-jvmti := -I$(JDIR)/include -I$(JDIR)/include/linux
+ $(call feature_check,jvmti)
+ ifeq ($(feature-jvmti), 1)
+ $(call detected_var,JDIR)
+ else
+ $(warning No openjdk development package found, please install JDK package, e.g. openjdk-8-jdk, java-1.8.0-openjdk-devel)
+ NO_JVMTI := 1
+ endif
+endif
+
+USE_CXX = 0
+USE_CLANGLLVM = 0
+ifdef LIBCLANGLLVM
+ $(call feature_check,cxx)
+ ifneq ($(feature-cxx), 1)
+ msg := $(warning No g++ found, disable clang and llvm support. Please install g++)
+ else
+ $(call feature_check,llvm)
+ $(call feature_check,llvm-version)
+ ifneq ($(feature-llvm), 1)
+ msg := $(warning No suitable libLLVM found, disabling builtin clang and LLVM support. Please install llvm-dev(el) (>= 3.9.0))
+ else
+ $(call feature_check,clang)
+ ifneq ($(feature-clang), 1)
+ msg := $(warning No suitable libclang found, disabling builtin clang and LLVM support. Please install libclang-dev(el) (>= 3.9.0))
+ else
+ CFLAGS += -DHAVE_LIBCLANGLLVM_SUPPORT
+ CXXFLAGS += -DHAVE_LIBCLANGLLVM_SUPPORT -I$(shell $(LLVM_CONFIG) --includedir)
+ $(call detected,CONFIG_CXX)
+ $(call detected,CONFIG_CLANGLLVM)
+ USE_CXX = 1
+ USE_LLVM = 1
+ USE_CLANG = 1
+ ifneq ($(feature-llvm-version),1)
+ msg := $(warning This version of LLVM is not tested. May cause build errors)
+ endif
+ endif
+ endif
+ endif
+endif
+
+# Among the variables below, these:
+# perfexecdir
+# perf_include_dir
+# perf_examples_dir
+# template_dir
+# mandir
+# infodir
+# htmldir
+# ETC_PERFCONFIG (but not sysconfdir)
+# can be specified as a relative path some/where/else;
+# this is interpreted as relative to $(prefix) and "perf" at
+# runtime figures out where they are based on the path to the executable.
+# This can help installing the suite in a relocatable way.
+
+# Make the path relative to DESTDIR, not to prefix
+ifndef DESTDIR
+prefix ?= $(HOME)
+endif
+bindir_relative = bin
+bindir = $(abspath $(prefix)/$(bindir_relative))
+mandir = share/man
+infodir = share/info
+perfexecdir = libexec/perf-core
+perf_include_dir = lib/perf/include
+perf_examples_dir = lib/perf/examples
+sharedir = $(prefix)/share
+template_dir = share/perf-core/templates
+STRACE_GROUPS_DIR = share/perf-core/strace/groups
+htmldir = share/doc/perf-doc
+tipdir = share/doc/perf-tip
+srcdir = $(srctree)/tools/perf
+ifeq ($(prefix),/usr)
+sysconfdir = /etc
+ETC_PERFCONFIG = $(sysconfdir)/perfconfig
+else
+sysconfdir = $(prefix)/etc
+ETC_PERFCONFIG = etc/perfconfig
+endif
+ifndef lib
+ifeq ($(SRCARCH)$(IS_64_BIT), x861)
+lib = lib64
+else
+lib = lib
+endif
+endif # lib
+libdir = $(prefix)/$(lib)
+
+# Shell quote (do not use $(call) to accommodate ancient setups);
+ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
+STRACE_GROUPS_DIR_SQ = $(subst ','\'',$(STRACE_GROUPS_DIR))
+DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
+bindir_SQ = $(subst ','\'',$(bindir))
+mandir_SQ = $(subst ','\'',$(mandir))
+infodir_SQ = $(subst ','\'',$(infodir))
+perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
+perf_include_dir_SQ = $(subst ','\'',$(perf_include_dir))
+perf_examples_dir_SQ = $(subst ','\'',$(perf_examples_dir))
+template_dir_SQ = $(subst ','\'',$(template_dir))
+htmldir_SQ = $(subst ','\'',$(htmldir))
+tipdir_SQ = $(subst ','\'',$(tipdir))
+prefix_SQ = $(subst ','\'',$(prefix))
+sysconfdir_SQ = $(subst ','\'',$(sysconfdir))
+libdir_SQ = $(subst ','\'',$(libdir))
+srcdir_SQ = $(subst ','\'',$(srcdir))
+
+ifneq ($(filter /%,$(firstword $(perfexecdir))),)
+perfexec_instdir = $(perfexecdir)
+perf_include_instdir = $(perf_include_dir)
+perf_examples_instdir = $(perf_examples_dir)
+STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR)
+tip_instdir = $(tipdir)
+else
+perfexec_instdir = $(prefix)/$(perfexecdir)
+perf_include_instdir = $(prefix)/$(perf_include_dir)
+perf_examples_instdir = $(prefix)/$(perf_examples_dir)
+STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR)
+tip_instdir = $(prefix)/$(tipdir)
+endif
+perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
+perf_include_instdir_SQ = $(subst ','\'',$(perf_include_instdir))
+perf_examples_instdir_SQ = $(subst ','\'',$(perf_examples_instdir))
+STRACE_GROUPS_INSTDIR_SQ = $(subst ','\'',$(STRACE_GROUPS_INSTDIR))
+tip_instdir_SQ = $(subst ','\'',$(tip_instdir))
+
+# If we install to $(HOME) we keep the traceevent default:
+# $(HOME)/.traceevent/plugins
+# Otherwise we install plugins into the global $(libdir).
+ifdef DESTDIR
+plugindir=$(libdir)/traceevent/plugins
+plugindir_SQ= $(subst ','\'',$(plugindir))
+endif
+
+print_var = $(eval $(print_var_code)) $(info $(MSG))
+define print_var_code
+ MSG = $(shell printf '...%30s: %s' $(1) $($(1)))
+endef
+
+ifeq ($(VF),1)
+ # Display EXTRA features which are detected manualy
+ # from here with feature_check call and thus cannot
+ # be partof global state output.
+ $(foreach feat,$(FEATURE_TESTS_EXTRA),$(call feature_print_status,$(feat),))
+ $(call print_var,prefix)
+ $(call print_var,bindir)
+ $(call print_var,libdir)
+ $(call print_var,sysconfdir)
+ $(call print_var,LIBUNWIND_DIR)
+ $(call print_var,LIBDW_DIR)
+ $(call print_var,JDIR)
+
+ ifeq ($(dwarf-post-unwind),1)
+ $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text))
+ endif
+ $(info )
+endif
+
+$(call detected_var,bindir_SQ)
+$(call detected_var,PYTHON_WORD)
+ifneq ($(OUTPUT),)
+$(call detected_var,OUTPUT)
+endif
+$(call detected_var,htmldir_SQ)
+$(call detected_var,infodir_SQ)
+$(call detected_var,mandir_SQ)
+$(call detected_var,ETC_PERFCONFIG_SQ)
+$(call detected_var,STRACE_GROUPS_DIR_SQ)
+$(call detected_var,prefix_SQ)
+$(call detected_var,perfexecdir_SQ)
+$(call detected_var,perf_include_dir_SQ)
+$(call detected_var,perf_examples_dir_SQ)
+$(call detected_var,tipdir_SQ)
+$(call detected_var,srcdir_SQ)
+$(call detected_var,LIBDIR)
+$(call detected_var,GTK_CFLAGS)
+$(call detected_var,PERL_EMBED_CCOPTS)
+$(call detected_var,PYTHON_EMBED_CCOPTS)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
new file mode 100644
index 000000000..678aa7feb
--- /dev/null
+++ b/tools/perf/Makefile.perf
@@ -0,0 +1,901 @@
+include ../scripts/Makefile.include
+
+# The default target of this Makefile is...
+all:
+
+include ../scripts/utilities.mak
+
+# Define V to have a more verbose compile.
+#
+# Define VF to have a more verbose feature check output.
+#
+# Define O to save output files in a separate directory.
+#
+# Define ARCH as name of target architecture if you want cross-builds.
+#
+# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
+#
+# Define NO_LIBPERL to disable perl script extension.
+#
+# Define NO_LIBPYTHON to disable python script extension.
+#
+# Define PYTHON to point to the python binary if the default
+# `python' is not correct; for example: PYTHON=python2
+#
+# Define PYTHON_CONFIG to point to the python-config binary if
+# the default `$(PYTHON)-config' is not correct.
+#
+# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
+#
+# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
+#
+# Define LDFLAGS=-static to build a static binary.
+#
+# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
+#
+# Define EXCLUDE_EXTLIBS=-lmylib to exclude libmylib from the auto-generated
+# EXTLIBS.
+#
+# Define EXTRA_PERFLIBS to pass extra libraries to PERFLIBS.
+#
+# Define NO_DWARF if you do not want debug-info analysis feature at all.
+#
+# Define WERROR=0 to disable treating any warnings as errors.
+#
+# Define NO_NEWT if you do not want TUI support. (deprecated)
+#
+# Define NO_SLANG if you do not want TUI support.
+#
+# Define NO_GTK2 if you do not want GTK+ GUI support.
+#
+# Define NO_DEMANGLE if you do not want C++ symbol demangling.
+#
+# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds)
+#
+# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf
+# backtrace post unwind.
+#
+# Define NO_BACKTRACE if you do not want stack backtrace debug feature
+#
+# Define NO_LIBNUMA if you do not want numa perf benchmark
+#
+# Define NO_LIBAUDIT if you do not want libaudit support
+#
+# Define NO_LIBBIONIC if you do not want bionic support
+#
+# Define NO_LIBCRYPTO if you do not want libcrypto (openssl) support
+# used for generating build-ids for ELFs generated by jitdump.
+#
+# Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support
+# for dwarf backtrace post unwind.
+#
+# Define NO_PERF_READ_VDSO32 if you do not want to build perf-read-vdso32
+# for reading the 32-bit compatibility VDSO in 64-bit mode
+#
+# Define NO_PERF_READ_VDSOX32 if you do not want to build perf-read-vdsox32
+# for reading the x32 mode 32-bit compatibility VDSO in 64-bit mode
+#
+# Define NO_ZLIB if you do not want to support compressed kernel modules
+#
+# Define NO_LIBBABELTRACE if you do not want libbabeltrace support
+# for CTF data format.
+#
+# Define NO_LZMA if you do not want to support compressed (xz) kernel modules
+#
+# Define NO_AUXTRACE if you do not want AUX area tracing support
+#
+# Define NO_LIBBPF if you do not want BPF support
+#
+# Define NO_SDT if you do not want to define SDT event in perf tools,
+# note that it doesn't disable SDT scanning support.
+#
+# Define FEATURES_DUMP to provide features detection dump file
+# and bypass the feature detection
+#
+# Define NO_JVMTI if you do not want jvmti agent built
+#
+# Define LIBCLANGLLVM if you DO want builtin clang and llvm support.
+# When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if
+# llvm-config is not in $PATH.
+
+# Define NO_CORESIGHT if you do not want support for CoreSight trace decoding.
+
+# As per kernel Makefile, avoid funny character set dependencies
+unexport LC_ALL
+LC_COLLATE=C
+LC_NUMERIC=C
+export LC_COLLATE LC_NUMERIC
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+ifneq ($(objtree),)
+#$(info Determined 'objtree' to be $(objtree))
+endif
+
+ifneq ($(OUTPUT),)
+#$(info Determined 'OUTPUT' to be $(OUTPUT))
+# Adding $(OUTPUT) as a directory to look for source files,
+# because use generated output files as sources dependency
+# for flex/bison parsers.
+VPATH += $(OUTPUT)
+export VPATH
+endif
+
+ifeq ($(V),1)
+ Q =
+else
+ Q = @
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+LD += $(EXTRA_LDFLAGS)
+
+PKG_CONFIG = $(CROSS_COMPILE)pkg-config
+LLVM_CONFIG ?= llvm-config
+
+RM = rm -f
+LN = ln -f
+MKDIR = mkdir
+FIND = find
+INSTALL = install
+FLEX ?= flex
+BISON ?= bison
+STRIP = strip
+AWK = awk
+
+# include Makefile.config by default and rule out
+# non-config cases
+config := 1
+
+NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf
+
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
+ config := 0
+endif
+endif
+
+# The fixdep build - we force fixdep tool to be built as
+# the first target in the separate make session not to be
+# disturbed by any parallel make jobs. Once fixdep is done
+# we issue the requested build with FIXDEP=1 variable.
+#
+# The fixdep build is disabled for $(NON_CONFIG_TARGETS)
+# targets, because it's not necessary.
+
+ifdef FIXDEP
+ force_fixdep := 0
+else
+ force_fixdep := $(config)
+endif
+
+export srctree OUTPUT RM CC CXX LD AR CFLAGS CXXFLAGS V BISON FLEX AWK
+export HOSTCC HOSTLD HOSTAR
+
+include $(srctree)/tools/build/Makefile.include
+
+ifeq ($(force_fixdep),1)
+goals := $(filter-out all sub-make, $(MAKECMDGOALS))
+
+$(goals) all: sub-make
+
+sub-make: fixdep
+ @./check-headers.sh
+ $(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals)
+
+else # force_fixdep
+
+LIB_DIR = $(srctree)/tools/lib/api/
+TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
+BPF_DIR = $(srctree)/tools/lib/bpf/
+SUBCMD_DIR = $(srctree)/tools/lib/subcmd/
+
+# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
+# Without this setting the output feature dump file misses some features, for
+# example, liberty. Select all checkers so we won't get an incomplete feature
+# dump file.
+ifeq ($(config),1)
+ifdef MAKECMDGOALS
+ifeq ($(filter feature-dump,$(MAKECMDGOALS)),feature-dump)
+FEATURE_TESTS := all
+endif
+endif
+include Makefile.config
+endif
+
+ifeq ($(config),0)
+include $(srctree)/tools/scripts/Makefile.arch
+-include arch/$(SRCARCH)/Makefile
+endif
+
+# The FEATURE_DUMP_EXPORT holds location of the actual
+# FEATURE_DUMP file to be used to bypass feature detection
+# (for bpf or any other subproject)
+ifeq ($(FEATURES_DUMP),)
+FEATURE_DUMP_EXPORT := $(realpath $(OUTPUT)FEATURE-DUMP)
+else
+FEATURE_DUMP_EXPORT := $(realpath $(FEATURES_DUMP))
+endif
+
+export prefix bindir sharedir sysconfdir DESTDIR
+
+# sparse is architecture-neutral, which means that we need to tell it
+# explicitly what architecture to check for. Fix this up for yours..
+SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
+
+# Guard against environment variables
+PYRF_OBJS =
+SCRIPT_SH =
+
+SCRIPT_SH += perf-archive.sh
+SCRIPT_SH += perf-with-kcore.sh
+
+grep-libs = $(filter -l%,$(1))
+strip-libs = $(filter-out -l%,$(1))
+
+ifneq ($(OUTPUT),)
+ TE_PATH=$(OUTPUT)
+ BPF_PATH=$(OUTPUT)
+ SUBCMD_PATH=$(OUTPUT)
+ifneq ($(subdir),)
+ API_PATH=$(OUTPUT)/../lib/api/
+else
+ API_PATH=$(OUTPUT)
+endif
+else
+ TE_PATH=$(TRACE_EVENT_DIR)
+ API_PATH=$(LIB_DIR)
+ BPF_PATH=$(BPF_DIR)
+ SUBCMD_PATH=$(SUBCMD_DIR)
+endif
+
+LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
+export LIBTRACEEVENT
+
+LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
+
+#
+# The static build has no dynsym table, so this does not work for
+# static build. Looks like linker starts to scream about that now
+# (in Fedora 26) so we need to switch it off for static build.
+DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
+LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS))
+
+LIBAPI = $(API_PATH)libapi.a
+export LIBAPI
+
+LIBBPF = $(BPF_PATH)libbpf.a
+
+LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a
+
+# python extension build directories
+PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
+PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
+PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
+export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
+
+python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf*.so
+
+PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
+
+SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
+
+PROGRAMS += $(OUTPUT)perf
+
+ifndef NO_PERF_READ_VDSO32
+PROGRAMS += $(OUTPUT)perf-read-vdso32
+endif
+
+ifndef NO_PERF_READ_VDSOX32
+PROGRAMS += $(OUTPUT)perf-read-vdsox32
+endif
+
+LIBJVMTI = libperf-jvmti.so
+
+ifndef NO_JVMTI
+PROGRAMS += $(OUTPUT)$(LIBJVMTI)
+endif
+
+# what 'all' will build and 'install' will install, in perfexecdir
+ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
+
+# what 'all' will build but not install in perfexecdir
+OTHER_PROGRAMS = $(OUTPUT)perf
+
+# Set paths to tools early so that they can be used for version tests.
+ifndef SHELL_PATH
+ SHELL_PATH = /bin/sh
+endif
+ifndef PERL_PATH
+ PERL_PATH = /usr/bin/perl
+endif
+
+export PERL_PATH
+
+LIB_FILE=$(OUTPUT)libperf.a
+
+PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) $(LIBSUBCMD)
+ifndef NO_LIBBPF
+ PERFLIBS += $(LIBBPF)
+endif
+
+# We choose to avoid "if .. else if .. else .. endif endif"
+# because maintaining the nesting to match is a pain. If
+# we had "elif" things would have been much nicer...
+
+ifneq ($(OUTPUT),)
+ CFLAGS += -I$(OUTPUT)
+endif
+
+ifndef NO_GTK2
+ ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so
+ GTK_IN := $(OUTPUT)gtk-in.o
+endif
+
+ifdef ASCIIDOC8
+ export ASCIIDOC8
+endif
+
+EXTLIBS := $(call filter-out,$(EXCLUDE_EXTLIBS),$(EXTLIBS))
+LIBS = -Wl,--whole-archive $(PERFLIBS) $(EXTRA_PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
+
+ifeq ($(USE_CLANG), 1)
+ CLANGLIBS_LIST = AST Basic CodeGen Driver Frontend Lex Tooling Edit Sema Analysis Parse Serialization
+ CLANGLIBS_NOEXT_LIST = $(foreach l,$(CLANGLIBS_LIST),$(shell $(LLVM_CONFIG) --libdir)/libclang$(l))
+ LIBCLANG = $(foreach l,$(CLANGLIBS_NOEXT_LIST),$(wildcard $(l).a $(l).so))
+ LIBS += -Wl,--start-group $(LIBCLANG) -Wl,--end-group
+endif
+
+ifeq ($(USE_LLVM), 1)
+ LIBLLVM = $(shell $(LLVM_CONFIG) --libs all) $(shell $(LLVM_CONFIG) --system-libs)
+ LIBS += -L$(shell $(LLVM_CONFIG) --libdir) $(LIBLLVM)
+endif
+
+ifeq ($(USE_CXX), 1)
+ LIBS += -lstdc++
+endif
+
+export INSTALL SHELL_PATH
+
+### Build rules
+
+SHELL = $(SHELL_PATH)
+
+linux_uapi_dir := $(srctree)/tools/include/uapi/linux
+
+beauty_outdir := $(OUTPUT)trace/beauty/generated
+beauty_ioctl_outdir := $(beauty_outdir)/ioctl
+drm_ioctl_array := $(beauty_ioctl_outdir)/drm_ioctl_array.c
+drm_hdr_dir := $(srctree)/tools/include/uapi/drm
+drm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/drm_ioctl.sh
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(beauty_ioctl_outdir)' ] || mkdir -p '$(beauty_ioctl_outdir)')
+
+$(drm_ioctl_array): $(drm_hdr_dir)/drm.h $(drm_hdr_dir)/i915_drm.h $(drm_ioctl_tbl)
+ $(Q)$(SHELL) '$(drm_ioctl_tbl)' $(drm_hdr_dir) > $@
+
+pkey_alloc_access_rights_array := $(beauty_outdir)/pkey_alloc_access_rights_array.c
+asm_generic_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/
+pkey_alloc_access_rights_tbl := $(srctree)/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
+
+$(pkey_alloc_access_rights_array): $(asm_generic_hdr_dir)/mman-common.h $(pkey_alloc_access_rights_tbl)
+ $(Q)$(SHELL) '$(pkey_alloc_access_rights_tbl)' $(asm_generic_hdr_dir) > $@
+
+sndrv_ctl_ioctl_array := $(beauty_ioctl_outdir)/sndrv_ctl_ioctl_array.c
+sndrv_ctl_hdr_dir := $(srctree)/tools/include/uapi/sound
+sndrv_ctl_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
+
+$(sndrv_ctl_ioctl_array): $(sndrv_ctl_hdr_dir)/asound.h $(sndrv_ctl_ioctl_tbl)
+ $(Q)$(SHELL) '$(sndrv_ctl_ioctl_tbl)' $(sndrv_ctl_hdr_dir) > $@
+
+sndrv_pcm_ioctl_array := $(beauty_ioctl_outdir)/sndrv_pcm_ioctl_array.c
+sndrv_pcm_hdr_dir := $(srctree)/tools/include/uapi/sound
+sndrv_pcm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
+
+$(sndrv_pcm_ioctl_array): $(sndrv_pcm_hdr_dir)/asound.h $(sndrv_pcm_ioctl_tbl)
+ $(Q)$(SHELL) '$(sndrv_pcm_ioctl_tbl)' $(sndrv_pcm_hdr_dir) > $@
+
+kcmp_type_array := $(beauty_outdir)/kcmp_type_array.c
+kcmp_hdr_dir := $(srctree)/tools/include/uapi/linux/
+kcmp_type_tbl := $(srctree)/tools/perf/trace/beauty/kcmp_type.sh
+
+$(kcmp_type_array): $(kcmp_hdr_dir)/kcmp.h $(kcmp_type_tbl)
+ $(Q)$(SHELL) '$(kcmp_type_tbl)' $(kcmp_hdr_dir) > $@
+
+kvm_ioctl_array := $(beauty_ioctl_outdir)/kvm_ioctl_array.c
+kvm_hdr_dir := $(srctree)/tools/include/uapi/linux
+kvm_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/kvm_ioctl.sh
+
+$(kvm_ioctl_array): $(kvm_hdr_dir)/kvm.h $(kvm_ioctl_tbl)
+ $(Q)$(SHELL) '$(kvm_ioctl_tbl)' $(kvm_hdr_dir) > $@
+
+socket_ipproto_array := $(beauty_outdir)/socket_ipproto_array.c
+socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh
+
+$(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl)
+ $(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@
+
+vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
+vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
+vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
+
+$(vhost_virtio_ioctl_array): $(vhost_virtio_hdr_dir)/vhost.h $(vhost_virtio_ioctl_tbl)
+ $(Q)$(SHELL) '$(vhost_virtio_ioctl_tbl)' $(vhost_virtio_hdr_dir) > $@
+
+perf_ioctl_array := $(beauty_ioctl_outdir)/perf_ioctl_array.c
+perf_hdr_dir := $(srctree)/tools/include/uapi/linux
+perf_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/perf_ioctl.sh
+
+$(perf_ioctl_array): $(perf_hdr_dir)/perf_event.h $(perf_ioctl_tbl)
+ $(Q)$(SHELL) '$(perf_ioctl_tbl)' $(perf_hdr_dir) > $@
+
+madvise_behavior_array := $(beauty_outdir)/madvise_behavior_array.c
+madvise_hdr_dir := $(srctree)/tools/include/uapi/asm-generic/
+madvise_behavior_tbl := $(srctree)/tools/perf/trace/beauty/madvise_behavior.sh
+
+$(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_tbl)
+ $(Q)$(SHELL) '$(madvise_behavior_tbl)' $(madvise_hdr_dir) > $@
+
+prctl_option_array := $(beauty_outdir)/prctl_option_array.c
+prctl_hdr_dir := $(srctree)/tools/include/uapi/linux/
+prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh
+
+$(prctl_option_array): $(prctl_hdr_dir)/prctl.h $(prctl_option_tbl)
+ $(Q)$(SHELL) '$(prctl_option_tbl)' $(prctl_hdr_dir) > $@
+
+arch_errno_name_array := $(beauty_outdir)/arch_errno_name_array.c
+arch_errno_hdr_dir := $(srctree)/tools
+arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
+
+$(arch_errno_name_array): $(arch_errno_tbl)
+ $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@
+
+all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
+
+$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
+ $(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \
+ CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
+ $(PYTHON_WORD) util/setup.py \
+ --quiet build_ext; \
+ mkdir -p $(OUTPUT)python && \
+ cp $(PYTHON_EXTBUILD_LIB)perf*.so $(OUTPUT)python/
+
+please_set_SHELL_PATH_to_a_more_modern_shell:
+ $(Q)$$(:)
+
+shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
+
+strip: $(PROGRAMS) $(OUTPUT)perf
+ $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
+
+PERF_IN := $(OUTPUT)perf-in.o
+
+JEVENTS := $(OUTPUT)pmu-events/jevents
+JEVENTS_IN := $(OUTPUT)pmu-events/jevents-in.o
+
+PMU_EVENTS_IN := $(OUTPUT)pmu-events/pmu-events-in.o
+
+export JEVENTS
+
+build := -f $(srctree)/tools/build/Makefile.build dir=. obj
+
+$(PERF_IN): prepare FORCE
+ $(Q)$(MAKE) $(build)=perf
+
+$(JEVENTS_IN): FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=jevents
+
+$(JEVENTS): $(JEVENTS_IN)
+ $(QUIET_LINK)$(HOSTCC) $(JEVENTS_IN) -o $@
+
+$(PMU_EVENTS_IN): $(JEVENTS) FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events
+
+$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
+ $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@
+
+$(GTK_IN): FORCE
+ $(Q)$(MAKE) $(build)=gtk
+
+$(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS)
+ $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS)
+
+$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
+
+$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
+ $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
+
+$(SCRIPTS) : % : %.sh
+ $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
+
+$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
+ $(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
+ $(Q)touch $(OUTPUT)PERF-VERSION-FILE
+
+# These can record PERF_VERSION
+perf.spec $(SCRIPTS) \
+ : $(OUTPUT)PERF-VERSION-FILE
+
+.SUFFIXES:
+
+#
+# If a target does not match any of the later rules then prefix it by $(OUTPUT)
+# This makes targets like 'make O=/tmp/perf perf.o' work in a natural way.
+#
+ifneq ($(OUTPUT),)
+%.o: $(OUTPUT)%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+pmu-events/%.o: $(OUTPUT)pmu-events/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+util/%.o: $(OUTPUT)util/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+bench/%.o: $(OUTPUT)bench/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+tests/%.o: $(OUTPUT)tests/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+endif
+
+# These two need to be here so that when O= is not used they take precedence
+# over the general rule for .o
+
+# get relative building directory (to $(OUTPUT))
+# and '.' if it's $(OUTPUT) itself
+__build-dir = $(subst $(OUTPUT),,$(dir $@))
+build-dir = $(if $(__build-dir),$(__build-dir),.)
+
+prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioctl_array) \
+ $(pkey_alloc_access_rights_array) \
+ $(sndrv_pcm_ioctl_array) \
+ $(sndrv_ctl_ioctl_array) \
+ $(kcmp_type_array) \
+ $(kvm_ioctl_array) \
+ $(socket_ipproto_array) \
+ $(vhost_virtio_ioctl_array) \
+ $(madvise_behavior_array) \
+ $(perf_ioctl_array) \
+ $(prctl_option_array) \
+ $(arch_errno_name_array)
+
+$(OUTPUT)%.o: %.c prepare FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)%.i: %.c prepare FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)%.s: %.c prepare FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)%-bison.o: %.c prepare FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)%-flex.o: %.c prepare FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)%.o: %.S prepare FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)%.i: %.S prepare FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
+
+$(OUTPUT)perf-%: %.o $(PERFLIBS)
+ $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS)
+
+ifndef NO_PERF_READ_VDSO32
+$(OUTPUT)perf-read-vdso32: perf-read-vdso.c util/find-vdso-map.c
+ $(QUIET_CC)$(CC) -m32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
+endif
+
+ifndef NO_PERF_READ_VDSOX32
+$(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c
+ $(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c
+endif
+
+ifndef NO_JVMTI
+LIBJVMTI_IN := $(OUTPUT)jvmti/jvmti-in.o
+
+$(LIBJVMTI_IN): FORCE
+ $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=jvmti obj=jvmti
+
+$(OUTPUT)$(LIBJVMTI): $(LIBJVMTI_IN)
+ $(QUIET_LINK)$(CC) -shared -Wl,-soname -Wl,$(LIBJVMTI) -o $@ $<
+endif
+
+$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
+
+LIBPERF_IN := $(OUTPUT)libperf-in.o
+
+$(LIBPERF_IN): prepare FORCE
+ $(Q)$(MAKE) $(build)=libperf
+
+$(LIB_FILE): $(LIBPERF_IN)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS)
+
+LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)'
+
+$(LIBTRACEEVENT): FORCE
+ $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
+
+libtraceevent_plugins: FORCE
+ $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
+
+$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
+ $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
+
+$(LIBTRACEEVENT)-clean:
+ $(call QUIET_CLEAN, libtraceevent)
+ $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null
+
+install-traceevent-plugins: libtraceevent_plugins
+ $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins
+
+$(LIBAPI): FORCE
+ $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
+
+$(LIBAPI)-clean:
+ $(call QUIET_CLEAN, libapi)
+ $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
+
+$(LIBBPF): FORCE
+ $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) $(OUTPUT)libbpf.a FEATURES_DUMP=$(FEATURE_DUMP_EXPORT)
+
+$(LIBBPF)-clean:
+ $(call QUIET_CLEAN, libbpf)
+ $(Q)$(MAKE) -C $(BPF_DIR) O=$(OUTPUT) clean >/dev/null
+
+$(LIBSUBCMD): FORCE
+ $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a
+
+$(LIBSUBCMD)-clean:
+ $(call QUIET_CLEAN, libsubcmd)
+ $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean
+
+help:
+ @echo 'Perf make targets:'
+ @echo ' doc - make *all* documentation (see below)'
+ @echo ' man - make manpage documentation (access with man <foo>)'
+ @echo ' html - make html documentation'
+ @echo ' info - make GNU info documentation (access with info <foo>)'
+ @echo ' pdf - make pdf documentation'
+ @echo ' TAGS - use etags to make tag information for source browsing'
+ @echo ' tags - use ctags to make tag information for source browsing'
+ @echo ' cscope - use cscope to make interactive browsing database'
+ @echo ''
+ @echo 'Perf install targets:'
+ @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed'
+ @echo ' HINT: use "prefix" or "DESTDIR" to install to a particular'
+ @echo ' path like "make prefix=/usr/local install install-doc"'
+ @echo ' install - install compiled binaries'
+ @echo ' install-doc - install *all* documentation'
+ @echo ' install-man - install manpage documentation'
+ @echo ' install-html - install html documentation'
+ @echo ' install-info - install GNU info documentation'
+ @echo ' install-pdf - install pdf documentation'
+ @echo ''
+ @echo ' quick-install-doc - alias for quick-install-man'
+ @echo ' quick-install-man - install the documentation quickly'
+ @echo ' quick-install-html - install the html documentation quickly'
+ @echo ''
+ @echo 'Perf maintainer targets:'
+ @echo ' clean - clean all binary objects and build output'
+
+
+DOC_TARGETS := doc man html info pdf
+
+INSTALL_DOC_TARGETS := $(patsubst %,install-%,$(DOC_TARGETS)) try-install-man
+INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
+
+# 'make doc' should call 'make -C Documentation all'
+$(DOC_TARGETS):
+ $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
+
+TAG_FOLDERS= . ../lib ../include
+TAG_FILES= ../../include/uapi/linux/perf_event.h
+
+TAGS:
+ $(QUIET_GEN)$(RM) TAGS; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs etags -a $(TAG_FILES)
+
+tags:
+ $(QUIET_GEN)$(RM) tags; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs ctags -a $(TAG_FILES)
+
+cscope:
+ $(QUIET_GEN)$(RM) cscope*; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print -o -name '*.cpp' -print | xargs cscope -b $(TAG_FILES)
+
+### Testing rules
+
+# GNU make supports exporting all variables by "export" without parameters.
+# However, the environment gets quite big, and some programs have problems
+# with that.
+
+check: $(OUTPUT)common-cmds.h
+ if sparse; \
+ then \
+ for i in *.c */*.c; \
+ do \
+ sparse $(CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
+ done; \
+ else \
+ exit 1; \
+ fi
+
+### Installation rules
+
+ifndef NO_GTK2
+install-gtk: $(OUTPUT)libperf-gtk.so
+ $(call QUIET_INSTALL, 'GTK UI') \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \
+ $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)'
+else
+install-gtk:
+endif
+
+install-tools: all install-gtk
+ $(call QUIET_INSTALL, binaries) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \
+ $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
+ $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'
+ifndef NO_PERF_READ_VDSO32
+ $(call QUIET_INSTALL, perf-read-vdso32) \
+ $(INSTALL) $(OUTPUT)perf-read-vdso32 '$(DESTDIR_SQ)$(bindir_SQ)';
+endif
+ifndef NO_PERF_READ_VDSOX32
+ $(call QUIET_INSTALL, perf-read-vdsox32) \
+ $(INSTALL) $(OUTPUT)perf-read-vdsox32 '$(DESTDIR_SQ)$(bindir_SQ)';
+endif
+ifndef NO_JVMTI
+ $(call QUIET_INSTALL, $(LIBJVMTI)) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \
+ $(INSTALL) $(OUTPUT)$(LIBJVMTI) '$(DESTDIR_SQ)$(libdir_SQ)';
+endif
+ $(call QUIET_INSTALL, libexec) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifndef NO_LIBBPF
+ $(call QUIET_INSTALL, bpf-headers) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
+ $(INSTALL) include/bpf/*.h -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'
+ $(call QUIET_INSTALL, bpf-examples) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \
+ $(INSTALL) examples/bpf/*.c -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
+endif
+ $(call QUIET_INSTALL, perf-archive) \
+ $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ $(call QUIET_INSTALL, perf-with-kcore) \
+ $(INSTALL) $(OUTPUT)perf-with-kcore -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifndef NO_LIBAUDIT
+ $(call QUIET_INSTALL, strace/groups) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'; \
+ $(INSTALL) trace/strace/groups/* -t '$(DESTDIR_SQ)$(STRACE_GROUPS_INSTDIR_SQ)'
+endif
+ifndef NO_LIBPERL
+ $(call QUIET_INSTALL, perl-scripts) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'; \
+ $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+endif
+ifndef NO_LIBPYTHON
+ $(call QUIET_INSTALL, python-scripts) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \
+ $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \
+ $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
+endif
+ $(call QUIET_INSTALL, perf_completion-script) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
+ $(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+ $(call QUIET_INSTALL, perf-tip) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(tip_instdir_SQ)'; \
+ $(INSTALL) Documentation/tips.txt -t '$(DESTDIR_SQ)$(tip_instdir_SQ)'
+
+install-tests: all install-gtk
+ $(call QUIET_INSTALL, tests) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+ $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
+ $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
+ $(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \
+ $(INSTALL) tests/shell/lib/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'
+
+install-bin: install-tools install-tests install-traceevent-plugins
+
+install: install-bin try-install-man
+
+install-python_ext:
+ $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
+
+# 'make install-doc' should call 'make -C Documentation install'
+$(INSTALL_DOC_TARGETS):
+ $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=)
+
+### Cleaning rules
+
+#
+# This is here, not in Makefile.config, because Makefile.config does
+# not get included for the clean target:
+#
+config-clean:
+ $(call QUIET_CLEAN, config)
+ $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
+
+python-clean:
+ $(python-clean)
+
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean config-clean fixdep-clean python-clean
+ $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
+ $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+ $(Q)$(RM) $(OUTPUT).config-detected
+ $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 $(OUTPUT)pmu-events/jevents $(OUTPUT)$(LIBJVMTI).so
+ $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
+ $(OUTPUT)util/intel-pt-decoder/inat-tables.c \
+ $(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
+ $(OUTPUT)pmu-events/pmu-events.c \
+ $(OUTPUT)$(madvise_behavior_array) \
+ $(OUTPUT)$(drm_ioctl_array) \
+ $(OUTPUT)$(pkey_alloc_access_rights_array) \
+ $(OUTPUT)$(sndrv_ctl_ioctl_array) \
+ $(OUTPUT)$(sndrv_pcm_ioctl_array) \
+ $(OUTPUT)$(kvm_ioctl_array) \
+ $(OUTPUT)$(kcmp_type_array) \
+ $(OUTPUT)$(socket_ipproto_array) \
+ $(OUTPUT)$(vhost_virtio_ioctl_array) \
+ $(OUTPUT)$(perf_ioctl_array) \
+ $(OUTPUT)$(prctl_option_array) \
+ $(OUTPUT)$(arch_errno_name_array)
+ $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
+
+#
+# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
+# file if defined, with no further action.
+feature-dump:
+ifdef FEATURE_DUMP_COPY
+ @cp $(OUTPUT)FEATURE-DUMP $(FEATURE_DUMP_COPY)
+ @echo "FEATURE-DUMP file copied into $(FEATURE_DUMP_COPY)"
+else
+ @echo "FEATURE-DUMP file available in $(OUTPUT)FEATURE-DUMP"
+endif
+
+#
+# Trick: if ../../.git does not exist - we are building out of tree for example,
+# then force version regeneration:
+#
+ifeq ($(wildcard ../../.git/HEAD),)
+ GIT-HEAD-PHONY = ../../.git/HEAD
+else
+ GIT-HEAD-PHONY =
+endif
+
+FORCE:
+
+.PHONY: all install clean config-clean strip install-gtk
+.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
+.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE prepare
+.PHONY: libtraceevent_plugins archheaders
+
+endif # force_fixdep
diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build
new file mode 100644
index 000000000..d9b6af837
--- /dev/null
+++ b/tools/perf/arch/Build
@@ -0,0 +1,2 @@
+libperf-y += common.o
+libperf-y += $(SRCARCH)/
diff --git a/tools/perf/arch/alpha/Build b/tools/perf/arch/alpha/Build
new file mode 100644
index 000000000..1bb8bf6d7
--- /dev/null
+++ b/tools/perf/arch/alpha/Build
@@ -0,0 +1 @@
+# empty
diff --git a/tools/perf/arch/arm/Build b/tools/perf/arch/arm/Build
new file mode 100644
index 000000000..41bf61da4
--- /dev/null
+++ b/tools/perf/arch/arm/Build
@@ -0,0 +1,2 @@
+libperf-y += util/
+libperf-$(CONFIG_DWARF_UNWIND) += tests/
diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile
new file mode 100644
index 000000000..18b13518d
--- /dev/null
+++ b/tools/perf/arch/arm/Makefile
@@ -0,0 +1,4 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
+PERF_HAVE_JITDUMP := 1
diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c
new file mode 100644
index 000000000..f64516d5b
--- /dev/null
+++ b/tools/perf/arch/arm/annotate/instructions.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <regex.h>
+
+struct arm_annotate {
+ regex_t call_insn,
+ jump_insn;
+};
+
+static struct ins_ops *arm__associate_instruction_ops(struct arch *arch, const char *name)
+{
+ struct arm_annotate *arm = arch->priv;
+ struct ins_ops *ops;
+ regmatch_t match[2];
+
+ if (!regexec(&arm->call_insn, name, 2, match, 0))
+ ops = &call_ops;
+ else if (!regexec(&arm->jump_insn, name, 2, match, 0))
+ ops = &jump_ops;
+ else
+ return NULL;
+
+ arch__associate_ins_ops(arch, name, ops);
+ return ops;
+}
+
+static int arm__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+ struct arm_annotate *arm;
+ int err;
+
+ if (arch->initialized)
+ return 0;
+
+ arm = zalloc(sizeof(*arm));
+ if (!arm)
+ return -1;
+
+#define ARM_CONDS "(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl|vc|vs)"
+ err = regcomp(&arm->call_insn, "^blx?" ARM_CONDS "?$", REG_EXTENDED);
+ if (err)
+ goto out_free_arm;
+ err = regcomp(&arm->jump_insn, "^bx?" ARM_CONDS "?$", REG_EXTENDED);
+ if (err)
+ goto out_free_call;
+#undef ARM_CONDS
+
+ arch->initialized = true;
+ arch->priv = arm;
+ arch->associate_instruction_ops = arm__associate_instruction_ops;
+ arch->objdump.comment_char = ';';
+ arch->objdump.skip_functions_char = '+';
+ return 0;
+
+out_free_call:
+ regfree(&arm->call_insn);
+out_free_arm:
+ free(arm);
+ return -1;
+}
diff --git a/tools/perf/arch/arm/include/arch-tests.h b/tools/perf/arch/arm/include/arch-tests.h
new file mode 100644
index 000000000..90ec4c8cb
--- /dev/null
+++ b/tools/perf/arch/arm/include/arch-tests.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/tools/perf/arch/arm/include/dwarf-regs-table.h b/tools/perf/arch/arm/include/dwarf-regs-table.h
new file mode 100644
index 000000000..5a45046fa
--- /dev/null
+++ b/tools/perf/arch/arm/include/dwarf-regs-table.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const arm_regstr_tbl[] = {
+ "%r0", "%r1", "%r2", "%r3", "%r4",
+ "%r5", "%r6", "%r7", "%r8", "%r9", "%r10",
+ "%fp", "%ip", "%sp", "%lr", "%pc",
+};
+#endif
diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h
new file mode 100644
index 000000000..ed20e0253
--- /dev/null
+++ b/tools/perf/arch/arm/include/perf_regs.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_ARM_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
+
+#define PERF_REG_IP PERF_REG_ARM_PC
+#define PERF_REG_SP PERF_REG_ARM_SP
+
+static inline const char *perf_reg_name(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM_R0:
+ return "r0";
+ case PERF_REG_ARM_R1:
+ return "r1";
+ case PERF_REG_ARM_R2:
+ return "r2";
+ case PERF_REG_ARM_R3:
+ return "r3";
+ case PERF_REG_ARM_R4:
+ return "r4";
+ case PERF_REG_ARM_R5:
+ return "r5";
+ case PERF_REG_ARM_R6:
+ return "r6";
+ case PERF_REG_ARM_R7:
+ return "r7";
+ case PERF_REG_ARM_R8:
+ return "r8";
+ case PERF_REG_ARM_R9:
+ return "r9";
+ case PERF_REG_ARM_R10:
+ return "r10";
+ case PERF_REG_ARM_FP:
+ return "fp";
+ case PERF_REG_ARM_IP:
+ return "ip";
+ case PERF_REG_ARM_SP:
+ return "sp";
+ case PERF_REG_ARM_LR:
+ return "lr";
+ case PERF_REG_ARM_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build
new file mode 100644
index 000000000..883c57ff0
--- /dev/null
+++ b/tools/perf/arch/arm/tests/Build
@@ -0,0 +1,4 @@
+libperf-y += regs_load.o
+libperf-y += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/tools/perf/arch/arm/tests/arch-tests.c b/tools/perf/arch/arm/tests/arch-tests.c
new file mode 100644
index 000000000..5b1543c98
--- /dev/null
+++ b/tools/perf/arch/arm/tests/arch-tests.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+ {
+ .desc = "DWARF unwind",
+ .func = test__dwarf_unwind,
+ },
+#endif
+ {
+ .func = NULL,
+ },
+};
diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c
new file mode 100644
index 000000000..9a0242e74
--- /dev/null
+++ b/tools/perf/arch/arm/tests/dwarf-unwind.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_ARM_SP];
+
+ map = map_groups__find(thread->mg, (u64)sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/arm/tests/regs_load.S b/tools/perf/arch/arm/tests/regs_load.S
new file mode 100644
index 000000000..6e2495cc4
--- /dev/null
+++ b/tools/perf/arch/arm/tests/regs_load.S
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
+#define R0 0x00
+#define R1 0x08
+#define R2 0x10
+#define R3 0x18
+#define R4 0x20
+#define R5 0x28
+#define R6 0x30
+#define R7 0x38
+#define R8 0x40
+#define R9 0x48
+#define SL 0x50
+#define FP 0x58
+#define IP 0x60
+#define SP 0x68
+#define LR 0x70
+#define PC 0x78
+
+/*
+ * Implementation of void perf_regs_load(u64 *regs);
+ *
+ * This functions fills in the 'regs' buffer from the actual registers values,
+ * in the way the perf built-in unwinding test expects them:
+ * - the PC at the time at the call to this function. Since this function
+ * is called using a bl instruction, the PC value is taken from LR.
+ * The built-in unwinding test then unwinds the call stack from the dwarf
+ * information in unwind__get_entries.
+ *
+ * Notes:
+ * - the 8 bytes stride in the registers offsets comes from the fact
+ * that the registers are stored in an u64 array (u64 *regs),
+ * - the regs buffer needs to be zeroed before the call to this function,
+ * in this case using a calloc in dwarf-unwind.c.
+ */
+
+.text
+.type perf_regs_load,%function
+ENTRY(perf_regs_load)
+ str r0, [r0, #R0]
+ str r1, [r0, #R1]
+ str r2, [r0, #R2]
+ str r3, [r0, #R3]
+ str r4, [r0, #R4]
+ str r5, [r0, #R5]
+ str r6, [r0, #R6]
+ str r7, [r0, #R7]
+ str r8, [r0, #R8]
+ str r9, [r0, #R9]
+ str sl, [r0, #SL]
+ str fp, [r0, #FP]
+ str ip, [r0, #IP]
+ str sp, [r0, #SP]
+ str lr, [r0, #LR]
+ str lr, [r0, #PC] // store pc as lr in order to skip the call
+ // to this function
+ mov pc, lr
+ENDPROC(perf_regs_load)
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
new file mode 100644
index 000000000..e64c5f216
--- /dev/null
+++ b/tools/perf/arch/arm/util/Build
@@ -0,0 +1,6 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+
+libperf-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
new file mode 100644
index 000000000..1ce6bdbda
--- /dev/null
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <stdbool.h>
+#include <linux/coresight-pmu.h>
+
+#include "../../util/auxtrace.h"
+#include "../../util/evlist.h"
+#include "../../util/pmu.h"
+#include "cs-etm.h"
+#include "arm-spe.h"
+
+static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err)
+{
+ struct perf_pmu **arm_spe_pmus = NULL;
+ int ret, i, nr_cpus = sysconf(_SC_NPROCESSORS_CONF);
+ /* arm_spe_xxxxxxxxx\0 */
+ char arm_spe_pmu_name[sizeof(ARM_SPE_PMU_NAME) + 10];
+
+ arm_spe_pmus = zalloc(sizeof(struct perf_pmu *) * nr_cpus);
+ if (!arm_spe_pmus) {
+ pr_err("spes alloc failed\n");
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ ret = sprintf(arm_spe_pmu_name, "%s%d", ARM_SPE_PMU_NAME, i);
+ if (ret < 0) {
+ pr_err("sprintf failed\n");
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ arm_spe_pmus[*nr_spes] = perf_pmu__find(arm_spe_pmu_name);
+ if (arm_spe_pmus[*nr_spes]) {
+ pr_debug2("%s %d: arm_spe_pmu %d type %d name %s\n",
+ __func__, __LINE__, *nr_spes,
+ arm_spe_pmus[*nr_spes]->type,
+ arm_spe_pmus[*nr_spes]->name);
+ (*nr_spes)++;
+ }
+ }
+
+ return arm_spe_pmus;
+}
+
+struct auxtrace_record
+*auxtrace_record__init(struct perf_evlist *evlist, int *err)
+{
+ struct perf_pmu *cs_etm_pmu;
+ struct perf_evsel *evsel;
+ bool found_etm = false;
+ bool found_spe = false;
+ static struct perf_pmu **arm_spe_pmus = NULL;
+ static int nr_spes = 0;
+ int i = 0;
+
+ if (!evlist)
+ return NULL;
+
+ cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
+
+ if (!arm_spe_pmus)
+ arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (cs_etm_pmu &&
+ evsel->attr.type == cs_etm_pmu->type)
+ found_etm = true;
+
+ if (!nr_spes)
+ continue;
+
+ for (i = 0; i < nr_spes; i++) {
+ if (evsel->attr.type == arm_spe_pmus[i]->type) {
+ found_spe = true;
+ break;
+ }
+ }
+ }
+
+ if (found_etm && found_spe) {
+ pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
+ *err = -EOPNOTSUPP;
+ return NULL;
+ }
+
+ if (found_etm)
+ return cs_etm_record_init(err);
+
+#if defined(__aarch64__)
+ if (found_spe)
+ return arm_spe_recording_init(err, arm_spe_pmus[i]);
+#endif
+
+ /*
+ * Clear 'err' even if we haven't found an event - that way perf
+ * record can still be used even if tracers aren't present. The NULL
+ * return value will take care of telling the infrastructure HW tracing
+ * isn't available.
+ */
+ *err = 0;
+ return NULL;
+}
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
new file mode 100644
index 000000000..16af6c3b1
--- /dev/null
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -0,0 +1,770 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <api/fs/fs.h>
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/coresight-pmu.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+
+#include "cs-etm.h"
+#include "../../perf.h"
+#include "../../util/auxtrace.h"
+#include "../../util/cpumap.h"
+#include "../../util/evlist.h"
+#include "../../util/evsel.h"
+#include "../../util/pmu.h"
+#include "../../util/thread_map.h"
+#include "../../util/cs-etm.h"
+
+#include <stdlib.h>
+#include <sys/stat.h>
+
+#define ENABLE_SINK_MAX 128
+#define CS_BUS_DEVICE_PATH "/bus/coresight/devices/"
+
+struct cs_etm_recording {
+ struct auxtrace_record itr;
+ struct perf_pmu *cs_etm_pmu;
+ struct perf_evlist *evlist;
+ int wrapped_cnt;
+ bool *wrapped;
+ bool snapshot_mode;
+ size_t snapshot_size;
+};
+
+static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu);
+
+static int cs_etm_parse_snapshot_options(struct auxtrace_record *itr,
+ struct record_opts *opts,
+ const char *str)
+{
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ unsigned long long snapshot_size = 0;
+ char *endptr;
+
+ if (str) {
+ snapshot_size = strtoull(str, &endptr, 0);
+ if (*endptr || snapshot_size > SIZE_MAX)
+ return -1;
+ }
+
+ opts->auxtrace_snapshot_mode = true;
+ opts->auxtrace_snapshot_size = snapshot_size;
+ ptr->snapshot_size = snapshot_size;
+
+ return 0;
+}
+
+static int cs_etm_recording_options(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts)
+{
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+ struct perf_evsel *evsel, *cs_etm_evsel = NULL;
+ const struct cpu_map *cpus = evlist->cpus;
+ bool privileged = (geteuid() == 0 || perf_event_paranoid() < 0);
+
+ ptr->evlist = evlist;
+ ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == cs_etm_pmu->type) {
+ if (cs_etm_evsel) {
+ pr_err("There may be only one %s event\n",
+ CORESIGHT_ETM_PMU_NAME);
+ return -EINVAL;
+ }
+ evsel->attr.freq = 0;
+ evsel->attr.sample_period = 1;
+ cs_etm_evsel = evsel;
+ opts->full_auxtrace = true;
+ }
+ }
+
+ /* no need to continue if at least one event of interest was found */
+ if (!cs_etm_evsel)
+ return 0;
+
+ if (opts->use_clockid) {
+ pr_err("Cannot use clockid (-k option) with %s\n",
+ CORESIGHT_ETM_PMU_NAME);
+ return -EINVAL;
+ }
+
+ /* we are in snapshot mode */
+ if (opts->auxtrace_snapshot_mode) {
+ /*
+ * No size were given to '-S' or '-m,', so go with
+ * the default
+ */
+ if (!opts->auxtrace_snapshot_size &&
+ !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages =
+ KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ } else if (!opts->auxtrace_mmap_pages && !privileged &&
+ opts->mmap_pages == UINT_MAX) {
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+
+ /*
+ * '-m,xyz' was specified but no snapshot size, so make the
+ * snapshot size as big as the auxtrace mmap area.
+ */
+ if (!opts->auxtrace_snapshot_size) {
+ opts->auxtrace_snapshot_size =
+ opts->auxtrace_mmap_pages * (size_t)page_size;
+ }
+
+ /*
+ * -Sxyz was specified but no auxtrace mmap area, so make the
+ * auxtrace mmap area big enough to fit the requested snapshot
+ * size.
+ */
+ if (!opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_snapshot_size;
+
+ sz = round_up(sz, page_size) / page_size;
+ opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+ }
+
+ /* Snapshost size can't be bigger than the auxtrace area */
+ if (opts->auxtrace_snapshot_size >
+ opts->auxtrace_mmap_pages * (size_t)page_size) {
+ pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+ opts->auxtrace_snapshot_size,
+ opts->auxtrace_mmap_pages * (size_t)page_size);
+ return -EINVAL;
+ }
+
+ /* Something went wrong somewhere - this shouldn't happen */
+ if (!opts->auxtrace_snapshot_size ||
+ !opts->auxtrace_mmap_pages) {
+ pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+ return -EINVAL;
+ }
+ }
+
+ /* We are in full trace mode but '-m,xyz' wasn't specified */
+ if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+
+ }
+
+ /* Validate auxtrace_mmap_pages provided by user */
+ if (opts->auxtrace_mmap_pages) {
+ unsigned int max_page = (KiB(128) / page_size);
+ size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+
+ if (!privileged &&
+ opts->auxtrace_mmap_pages > max_page) {
+ opts->auxtrace_mmap_pages = max_page;
+ pr_err("auxtrace too big, truncating to %d\n",
+ max_page);
+ }
+
+ if (!is_power_of_2(sz)) {
+ pr_err("Invalid mmap size for %s: must be a power of 2\n",
+ CORESIGHT_ETM_PMU_NAME);
+ return -EINVAL;
+ }
+ }
+
+ if (opts->auxtrace_snapshot_mode)
+ pr_debug2("%s snapshot size: %zu\n", CORESIGHT_ETM_PMU_NAME,
+ opts->auxtrace_snapshot_size);
+
+ /*
+ * To obtain the auxtrace buffer file descriptor, the auxtrace
+ * event must come first.
+ */
+ perf_evlist__to_front(evlist, cs_etm_evsel);
+
+ /*
+ * In the case of per-cpu mmaps, we need the CPU on the
+ * AUX event.
+ */
+ if (!cpu_map__empty(cpus))
+ perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
+
+ /* Add dummy event to keep tracking */
+ if (opts->full_auxtrace) {
+ struct perf_evsel *tracking_evsel;
+ int err;
+
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ tracking_evsel = perf_evlist__last(evlist);
+ perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+ tracking_evsel->attr.freq = 0;
+ tracking_evsel->attr.sample_period = 1;
+
+ /* In per-cpu case, always need the time of mmap events etc */
+ if (!cpu_map__empty(cpus))
+ perf_evsel__set_sample_bit(tracking_evsel, TIME);
+ }
+
+ return 0;
+}
+
+static u64 cs_etm_get_config(struct auxtrace_record *itr)
+{
+ u64 config = 0;
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+ struct perf_evlist *evlist = ptr->evlist;
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == cs_etm_pmu->type) {
+ /*
+ * Variable perf_event_attr::config is assigned to
+ * ETMv3/PTM. The bit fields have been made to match
+ * the ETMv3.5 ETRMCR register specification. See the
+ * PMU_FORMAT_ATTR() declarations in
+ * drivers/hwtracing/coresight/coresight-perf.c for
+ * details.
+ */
+ config = evsel->attr.config;
+ break;
+ }
+ }
+
+ return config;
+}
+
+#ifndef BIT
+#define BIT(N) (1UL << (N))
+#endif
+
+static u64 cs_etmv4_get_config(struct auxtrace_record *itr)
+{
+ u64 config = 0;
+ u64 config_opts = 0;
+
+ /*
+ * The perf event variable config bits represent both
+ * the command line options and register programming
+ * bits in ETMv3/PTM. For ETMv4 we must remap options
+ * to real bits
+ */
+ config_opts = cs_etm_get_config(itr);
+ if (config_opts & BIT(ETM_OPT_CYCACC))
+ config |= BIT(ETM4_CFG_BIT_CYCACC);
+ if (config_opts & BIT(ETM_OPT_TS))
+ config |= BIT(ETM4_CFG_BIT_TS);
+ if (config_opts & BIT(ETM_OPT_RETSTK))
+ config |= BIT(ETM4_CFG_BIT_RETSTK);
+
+ return config;
+}
+
+static size_t
+cs_etm_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ int i;
+ int etmv3 = 0, etmv4 = 0;
+ struct cpu_map *event_cpus = evlist->cpus;
+ struct cpu_map *online_cpus = cpu_map__new(NULL);
+
+ /* cpu map is not empty, we have specific CPUs to work with */
+ if (!cpu_map__empty(event_cpus)) {
+ for (i = 0; i < cpu__max_cpu(); i++) {
+ if (!cpu_map__has(event_cpus, i) ||
+ !cpu_map__has(online_cpus, i))
+ continue;
+
+ if (cs_etm_is_etmv4(itr, i))
+ etmv4++;
+ else
+ etmv3++;
+ }
+ } else {
+ /* get configuration for all CPUs in the system */
+ for (i = 0; i < cpu__max_cpu(); i++) {
+ if (!cpu_map__has(online_cpus, i))
+ continue;
+
+ if (cs_etm_is_etmv4(itr, i))
+ etmv4++;
+ else
+ etmv3++;
+ }
+ }
+
+ cpu_map__put(online_cpus);
+
+ return (CS_ETM_HEADER_SIZE +
+ (etmv4 * CS_ETMV4_PRIV_SIZE) +
+ (etmv3 * CS_ETMV3_PRIV_SIZE));
+}
+
+static const char *metadata_etmv3_ro[CS_ETM_PRIV_MAX] = {
+ [CS_ETM_ETMCCER] = "mgmt/etmccer",
+ [CS_ETM_ETMIDR] = "mgmt/etmidr",
+};
+
+static const char *metadata_etmv4_ro[CS_ETMV4_PRIV_MAX] = {
+ [CS_ETMV4_TRCIDR0] = "trcidr/trcidr0",
+ [CS_ETMV4_TRCIDR1] = "trcidr/trcidr1",
+ [CS_ETMV4_TRCIDR2] = "trcidr/trcidr2",
+ [CS_ETMV4_TRCIDR8] = "trcidr/trcidr8",
+ [CS_ETMV4_TRCAUTHSTATUS] = "mgmt/trcauthstatus",
+};
+
+static bool cs_etm_is_etmv4(struct auxtrace_record *itr, int cpu)
+{
+ bool ret = false;
+ char path[PATH_MAX];
+ int scan;
+ unsigned int val;
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+ /* Take any of the RO files for ETMv4 and see if it present */
+ snprintf(path, PATH_MAX, "cpu%d/%s",
+ cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
+ scan = perf_pmu__scan_file(cs_etm_pmu, path, "%x", &val);
+
+ /* The file was read successfully, we have a winner */
+ if (scan == 1)
+ ret = true;
+
+ return ret;
+}
+
+static int cs_etm_get_ro(struct perf_pmu *pmu, int cpu, const char *path)
+{
+ char pmu_path[PATH_MAX];
+ int scan;
+ unsigned int val = 0;
+
+ /* Get RO metadata from sysfs */
+ snprintf(pmu_path, PATH_MAX, "cpu%d/%s", cpu, path);
+
+ scan = perf_pmu__scan_file(pmu, pmu_path, "%x", &val);
+ if (scan != 1)
+ pr_err("%s: error reading: %s\n", __func__, pmu_path);
+
+ return val;
+}
+
+static void cs_etm_get_metadata(int cpu, u32 *offset,
+ struct auxtrace_record *itr,
+ struct auxtrace_info_event *info)
+{
+ u32 increment;
+ u64 magic;
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+ /* first see what kind of tracer this cpu is affined to */
+ if (cs_etm_is_etmv4(itr, cpu)) {
+ magic = __perf_cs_etmv4_magic;
+ /* Get trace configuration register */
+ info->priv[*offset + CS_ETMV4_TRCCONFIGR] =
+ cs_etmv4_get_config(itr);
+ /* Get traceID from the framework */
+ info->priv[*offset + CS_ETMV4_TRCTRACEIDR] =
+ coresight_get_trace_id(cpu);
+ /* Get read-only information from sysFS */
+ info->priv[*offset + CS_ETMV4_TRCIDR0] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv4_ro[CS_ETMV4_TRCIDR0]);
+ info->priv[*offset + CS_ETMV4_TRCIDR1] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv4_ro[CS_ETMV4_TRCIDR1]);
+ info->priv[*offset + CS_ETMV4_TRCIDR2] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv4_ro[CS_ETMV4_TRCIDR2]);
+ info->priv[*offset + CS_ETMV4_TRCIDR8] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv4_ro[CS_ETMV4_TRCIDR8]);
+ info->priv[*offset + CS_ETMV4_TRCAUTHSTATUS] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv4_ro
+ [CS_ETMV4_TRCAUTHSTATUS]);
+
+ /* How much space was used */
+ increment = CS_ETMV4_PRIV_MAX;
+ } else {
+ magic = __perf_cs_etmv3_magic;
+ /* Get configuration register */
+ info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
+ /* Get traceID from the framework */
+ info->priv[*offset + CS_ETM_ETMTRACEIDR] =
+ coresight_get_trace_id(cpu);
+ /* Get read-only information from sysFS */
+ info->priv[*offset + CS_ETM_ETMCCER] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv3_ro[CS_ETM_ETMCCER]);
+ info->priv[*offset + CS_ETM_ETMIDR] =
+ cs_etm_get_ro(cs_etm_pmu, cpu,
+ metadata_etmv3_ro[CS_ETM_ETMIDR]);
+
+ /* How much space was used */
+ increment = CS_ETM_PRIV_MAX;
+ }
+
+ /* Build generic header portion */
+ info->priv[*offset + CS_ETM_MAGIC] = magic;
+ info->priv[*offset + CS_ETM_CPU] = cpu;
+ /* Where the next CPU entry should start from */
+ *offset += increment;
+}
+
+static int cs_etm_info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *info,
+ size_t priv_size)
+{
+ int i;
+ u32 offset;
+ u64 nr_cpu, type;
+ struct cpu_map *cpu_map;
+ struct cpu_map *event_cpus = session->evlist->cpus;
+ struct cpu_map *online_cpus = cpu_map__new(NULL);
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_pmu *cs_etm_pmu = ptr->cs_etm_pmu;
+
+ if (priv_size != cs_etm_info_priv_size(itr, session->evlist))
+ return -EINVAL;
+
+ if (!session->evlist->nr_mmaps)
+ return -EINVAL;
+
+ /* If the cpu_map is empty all online CPUs are involved */
+ if (cpu_map__empty(event_cpus)) {
+ cpu_map = online_cpus;
+ } else {
+ /* Make sure all specified CPUs are online */
+ for (i = 0; i < cpu_map__nr(event_cpus); i++) {
+ if (cpu_map__has(event_cpus, i) &&
+ !cpu_map__has(online_cpus, i))
+ return -EINVAL;
+ }
+
+ cpu_map = event_cpus;
+ }
+
+ nr_cpu = cpu_map__nr(cpu_map);
+ /* Get PMU type as dynamically assigned by the core */
+ type = cs_etm_pmu->type;
+
+ /* First fill out the session header */
+ info->type = PERF_AUXTRACE_CS_ETM;
+ info->priv[CS_HEADER_VERSION_0] = 0;
+ info->priv[CS_PMU_TYPE_CPUS] = type << 32;
+ info->priv[CS_PMU_TYPE_CPUS] |= nr_cpu;
+ info->priv[CS_ETM_SNAPSHOT] = ptr->snapshot_mode;
+
+ offset = CS_ETM_SNAPSHOT + 1;
+
+ for (i = 0; i < cpu__max_cpu() && offset < priv_size; i++)
+ if (cpu_map__has(cpu_map, i))
+ cs_etm_get_metadata(i, &offset, itr, info);
+
+ cpu_map__put(online_cpus);
+
+ return 0;
+}
+
+static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx)
+{
+ bool *wrapped;
+ int cnt = ptr->wrapped_cnt;
+
+ /* Make @ptr->wrapped as big as @idx */
+ while (cnt <= idx)
+ cnt++;
+
+ /*
+ * Free'ed in cs_etm_recording_free(). Using realloc() to avoid
+ * cross compilation problems where the host's system supports
+ * reallocarray() but not the target.
+ */
+ wrapped = realloc(ptr->wrapped, cnt * sizeof(bool));
+ if (!wrapped)
+ return -ENOMEM;
+
+ wrapped[cnt - 1] = false;
+ ptr->wrapped_cnt = cnt;
+ ptr->wrapped = wrapped;
+
+ return 0;
+}
+
+static bool cs_etm_buffer_has_wrapped(unsigned char *buffer,
+ size_t buffer_size, u64 head)
+{
+ u64 i, watermark;
+ u64 *buf = (u64 *)buffer;
+ size_t buf_size = buffer_size;
+
+ /*
+ * We want to look the very last 512 byte (chosen arbitrarily) in
+ * the ring buffer.
+ */
+ watermark = buf_size - 512;
+
+ /*
+ * @head is continuously increasing - if its value is equal or greater
+ * than the size of the ring buffer, it has wrapped around.
+ */
+ if (head >= buffer_size)
+ return true;
+
+ /*
+ * The value of @head is somewhere within the size of the ring buffer.
+ * This can be that there hasn't been enough data to fill the ring
+ * buffer yet or the trace time was so long that @head has numerically
+ * wrapped around. To find we need to check if we have data at the very
+ * end of the ring buffer. We can reliably do this because mmap'ed
+ * pages are zeroed out and there is a fresh mapping with every new
+ * session.
+ */
+
+ /* @head is less than 512 byte from the end of the ring buffer */
+ if (head > watermark)
+ watermark = head;
+
+ /*
+ * Speed things up by using 64 bit transactions (see "u64 *buf" above)
+ */
+ watermark >>= 3;
+ buf_size >>= 3;
+
+ /*
+ * If we find trace data at the end of the ring buffer, @head has
+ * been there and has numerically wrapped around at least once.
+ */
+ for (i = watermark; i < buf_size; i++)
+ if (buf[i])
+ return true;
+
+ return false;
+}
+
+static int cs_etm_find_snapshot(struct auxtrace_record *itr,
+ int idx, struct auxtrace_mmap *mm,
+ unsigned char *data,
+ u64 *head, u64 *old)
+{
+ int err;
+ bool wrapped;
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+
+ /*
+ * Allocate memory to keep track of wrapping if this is the first
+ * time we deal with this *mm.
+ */
+ if (idx >= ptr->wrapped_cnt) {
+ err = cs_etm_alloc_wrapped_array(ptr, idx);
+ if (err)
+ return err;
+ }
+
+ /*
+ * Check to see if *head has wrapped around. If it hasn't only the
+ * amount of data between *head and *old is snapshot'ed to avoid
+ * bloating the perf.data file with zeros. But as soon as *head has
+ * wrapped around the entire size of the AUX ring buffer it taken.
+ */
+ wrapped = ptr->wrapped[idx];
+ if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) {
+ wrapped = true;
+ ptr->wrapped[idx] = true;
+ }
+
+ pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
+ __func__, idx, (size_t)*old, (size_t)*head, mm->len);
+
+ /* No wrap has occurred, we can just use *head and *old. */
+ if (!wrapped)
+ return 0;
+
+ /*
+ * *head has wrapped around - adjust *head and *old to pickup the
+ * entire content of the AUX buffer.
+ */
+ if (*head >= mm->len) {
+ *old = *head - mm->len;
+ } else {
+ *head += mm->len;
+ *old = *head - mm->len;
+ }
+
+ return 0;
+}
+
+static int cs_etm_snapshot_start(struct auxtrace_record *itr)
+{
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(ptr->evlist, evsel) {
+ if (evsel->attr.type == ptr->cs_etm_pmu->type)
+ return perf_evsel__disable(evsel);
+ }
+ return -EINVAL;
+}
+
+static int cs_etm_snapshot_finish(struct auxtrace_record *itr)
+{
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(ptr->evlist, evsel) {
+ if (evsel->attr.type == ptr->cs_etm_pmu->type)
+ return perf_evsel__enable(evsel);
+ }
+ return -EINVAL;
+}
+
+static u64 cs_etm_reference(struct auxtrace_record *itr __maybe_unused)
+{
+ return (((u64) rand() << 0) & 0x00000000FFFFFFFFull) |
+ (((u64) rand() << 32) & 0xFFFFFFFF00000000ull);
+}
+
+static void cs_etm_recording_free(struct auxtrace_record *itr)
+{
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+
+ zfree(&ptr->wrapped);
+ free(ptr);
+}
+
+static int cs_etm_read_finish(struct auxtrace_record *itr, int idx)
+{
+ struct cs_etm_recording *ptr =
+ container_of(itr, struct cs_etm_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(ptr->evlist, evsel) {
+ if (evsel->attr.type == ptr->cs_etm_pmu->type)
+ return perf_evlist__enable_event_idx(ptr->evlist,
+ evsel, idx);
+ }
+
+ return -EINVAL;
+}
+
+struct auxtrace_record *cs_etm_record_init(int *err)
+{
+ struct perf_pmu *cs_etm_pmu;
+ struct cs_etm_recording *ptr;
+
+ cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
+
+ if (!cs_etm_pmu) {
+ *err = -EINVAL;
+ goto out;
+ }
+
+ ptr = zalloc(sizeof(struct cs_etm_recording));
+ if (!ptr) {
+ *err = -ENOMEM;
+ goto out;
+ }
+
+ ptr->cs_etm_pmu = cs_etm_pmu;
+ ptr->itr.parse_snapshot_options = cs_etm_parse_snapshot_options;
+ ptr->itr.recording_options = cs_etm_recording_options;
+ ptr->itr.info_priv_size = cs_etm_info_priv_size;
+ ptr->itr.info_fill = cs_etm_info_fill;
+ ptr->itr.find_snapshot = cs_etm_find_snapshot;
+ ptr->itr.snapshot_start = cs_etm_snapshot_start;
+ ptr->itr.snapshot_finish = cs_etm_snapshot_finish;
+ ptr->itr.reference = cs_etm_reference;
+ ptr->itr.free = cs_etm_recording_free;
+ ptr->itr.read_finish = cs_etm_read_finish;
+
+ *err = 0;
+ return &ptr->itr;
+out:
+ return NULL;
+}
+
+static FILE *cs_device__open_file(const char *name)
+{
+ struct stat st;
+ char path[PATH_MAX];
+ const char *sysfs;
+
+ sysfs = sysfs__mountpoint();
+ if (!sysfs)
+ return NULL;
+
+ snprintf(path, PATH_MAX,
+ "%s" CS_BUS_DEVICE_PATH "%s", sysfs, name);
+
+ if (stat(path, &st) < 0)
+ return NULL;
+
+ return fopen(path, "w");
+
+}
+
+static int __printf(2, 3) cs_device__print_file(const char *name, const char *fmt, ...)
+{
+ va_list args;
+ FILE *file;
+ int ret = -EINVAL;
+
+ va_start(args, fmt);
+ file = cs_device__open_file(name);
+ if (file) {
+ ret = vfprintf(file, fmt, args);
+ fclose(file);
+ }
+ va_end(args);
+ return ret;
+}
+
+int cs_etm_set_drv_config(struct perf_evsel_config_term *term)
+{
+ int ret;
+ char enable_sink[ENABLE_SINK_MAX];
+
+ snprintf(enable_sink, ENABLE_SINK_MAX, "%s/%s",
+ term->val.drv_cfg, "enable_sink");
+
+ ret = cs_device__print_file(enable_sink, "%d", 1);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
diff --git a/tools/perf/arch/arm/util/cs-etm.h b/tools/perf/arch/arm/util/cs-etm.h
new file mode 100644
index 000000000..1a12e64f5
--- /dev/null
+++ b/tools/perf/arch/arm/util/cs-etm.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef INCLUDE__PERF_CS_ETM_H__
+#define INCLUDE__PERF_CS_ETM_H__
+
+#include "../../util/evsel.h"
+
+struct auxtrace_record *cs_etm_record_init(int *err);
+int cs_etm_set_drv_config(struct perf_evsel_config_term *term);
+
+#endif
diff --git a/tools/perf/arch/arm/util/dwarf-regs.c b/tools/perf/arch/arm/util/dwarf-regs.c
new file mode 100644
index 000000000..8bb176a37
--- /dev/null
+++ b/tools/perf/arch/arm/util/dwarf-regs.c
@@ -0,0 +1,64 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Will Deacon, ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stddef.h>
+#include <linux/stringify.h>
+#include <dwarf-regs.h>
+
+struct pt_regs_dwarfnum {
+ const char *name;
+ unsigned int dwarfnum;
+};
+
+#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
+#define GPR_DWARFNUM_NAME(num) \
+ {.name = __stringify(%r##num), .dwarfnum = num}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+
+/*
+ * Reference:
+ * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0040a/IHI0040A_aadwarf.pdf
+ */
+static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
+ GPR_DWARFNUM_NAME(0),
+ GPR_DWARFNUM_NAME(1),
+ GPR_DWARFNUM_NAME(2),
+ GPR_DWARFNUM_NAME(3),
+ GPR_DWARFNUM_NAME(4),
+ GPR_DWARFNUM_NAME(5),
+ GPR_DWARFNUM_NAME(6),
+ GPR_DWARFNUM_NAME(7),
+ GPR_DWARFNUM_NAME(8),
+ GPR_DWARFNUM_NAME(9),
+ GPR_DWARFNUM_NAME(10),
+ REG_DWARFNUM_NAME("%fp", 11),
+ REG_DWARFNUM_NAME("%ip", 12),
+ REG_DWARFNUM_NAME("%sp", 13),
+ REG_DWARFNUM_NAME("%lr", 14),
+ REG_DWARFNUM_NAME("%pc", 15),
+ REG_DWARFNUM_END,
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n: the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+ const struct pt_regs_dwarfnum *roff;
+ for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+ if (roff->dwarfnum == n)
+ return roff->name;
+ return NULL;
+}
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
new file mode 100644
index 000000000..e047571e6
--- /dev/null
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <string.h>
+#include <linux/coresight-pmu.h>
+#include <linux/perf_event.h>
+
+#include "cs-etm.h"
+#include "arm-spe.h"
+#include "../../util/pmu.h"
+
+struct perf_event_attr
+*perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+{
+#ifdef HAVE_AUXTRACE_SUPPORT
+ if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
+ /* add ETM default config here */
+ pmu->selectable = true;
+ pmu->set_drv_config = cs_etm_set_drv_config;
+#if defined(__aarch64__)
+ } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
+ return arm_spe_pmu_default_config(pmu);
+#endif
+ }
+
+#endif
+ return NULL;
+}
diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c
new file mode 100644
index 000000000..36ba4c69c
--- /dev/null
+++ b/tools/perf/arch/arm/util/unwind-libdw.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct regs_dump *user_regs = &ui->sample->user_regs;
+ Dwarf_Word dwarf_regs[PERF_REG_ARM_MAX];
+
+#define REG(r) ({ \
+ Dwarf_Word val = 0; \
+ perf_reg_value(&val, user_regs, PERF_REG_ARM_##r); \
+ val; \
+})
+
+ dwarf_regs[0] = REG(R0);
+ dwarf_regs[1] = REG(R1);
+ dwarf_regs[2] = REG(R2);
+ dwarf_regs[3] = REG(R3);
+ dwarf_regs[4] = REG(R4);
+ dwarf_regs[5] = REG(R5);
+ dwarf_regs[6] = REG(R6);
+ dwarf_regs[7] = REG(R7);
+ dwarf_regs[8] = REG(R8);
+ dwarf_regs[9] = REG(R9);
+ dwarf_regs[10] = REG(R10);
+ dwarf_regs[11] = REG(FP);
+ dwarf_regs[12] = REG(IP);
+ dwarf_regs[13] = REG(SP);
+ dwarf_regs[14] = REG(LR);
+ dwarf_regs[15] = REG(PC);
+
+ return dwfl_thread_state_registers(thread, 0, PERF_REG_ARM_MAX,
+ dwarf_regs);
+}
diff --git a/tools/perf/arch/arm/util/unwind-libunwind.c b/tools/perf/arch/arm/util/unwind-libunwind.c
new file mode 100644
index 000000000..3a550225d
--- /dev/null
+++ b/tools/perf/arch/arm/util/unwind-libunwind.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+#include "../../util/debug.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+ switch (regnum) {
+ case UNW_ARM_R0:
+ return PERF_REG_ARM_R0;
+ case UNW_ARM_R1:
+ return PERF_REG_ARM_R1;
+ case UNW_ARM_R2:
+ return PERF_REG_ARM_R2;
+ case UNW_ARM_R3:
+ return PERF_REG_ARM_R3;
+ case UNW_ARM_R4:
+ return PERF_REG_ARM_R4;
+ case UNW_ARM_R5:
+ return PERF_REG_ARM_R5;
+ case UNW_ARM_R6:
+ return PERF_REG_ARM_R6;
+ case UNW_ARM_R7:
+ return PERF_REG_ARM_R7;
+ case UNW_ARM_R8:
+ return PERF_REG_ARM_R8;
+ case UNW_ARM_R9:
+ return PERF_REG_ARM_R9;
+ case UNW_ARM_R10:
+ return PERF_REG_ARM_R10;
+ case UNW_ARM_R11:
+ return PERF_REG_ARM_FP;
+ case UNW_ARM_R12:
+ return PERF_REG_ARM_IP;
+ case UNW_ARM_R13:
+ return PERF_REG_ARM_SP;
+ case UNW_ARM_R14:
+ return PERF_REG_ARM_LR;
+ case UNW_ARM_R15:
+ return PERF_REG_ARM_PC;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build
new file mode 100644
index 000000000..41bf61da4
--- /dev/null
+++ b/tools/perf/arch/arm64/Build
@@ -0,0 +1,2 @@
+libperf-y += util/
+libperf-$(CONFIG_DWARF_UNWIND) += tests/
diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
new file mode 100644
index 000000000..dbef716a1
--- /dev/null
+++ b/tools/perf/arch/arm64/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
+PERF_HAVE_JITDUMP := 1
+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+
+#
+# Syscall table generation for perf
+#
+
+out := $(OUTPUT)arch/arm64/include/generated/asm
+header := $(out)/syscalls.c
+incpath := $(srctree)/tools
+sysdef := $(srctree)/tools/arch/arm64/include/uapi/asm/unistd.h
+sysprf := $(srctree)/tools/perf/arch/arm64/entry/syscalls/
+systbl := $(sysprf)/mksyscalltbl
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header): $(sysdef) $(systbl)
+ $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@
+
+clean::
+ $(call QUIET_CLEAN, arm64) $(RM) $(header)
+
+archheaders: $(header)
diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c
new file mode 100644
index 000000000..6688977e4
--- /dev/null
+++ b/tools/perf/arch/arm64/annotate/instructions.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <regex.h>
+
+struct arm64_annotate {
+ regex_t call_insn,
+ jump_insn;
+};
+
+static struct ins_ops *arm64__associate_instruction_ops(struct arch *arch, const char *name)
+{
+ struct arm64_annotate *arm = arch->priv;
+ struct ins_ops *ops;
+ regmatch_t match[2];
+
+ if (!regexec(&arm->jump_insn, name, 2, match, 0))
+ ops = &jump_ops;
+ else if (!regexec(&arm->call_insn, name, 2, match, 0))
+ ops = &call_ops;
+ else if (!strcmp(name, "ret"))
+ ops = &ret_ops;
+ else
+ return NULL;
+
+ arch__associate_ins_ops(arch, name, ops);
+ return ops;
+}
+
+static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+ struct arm64_annotate *arm;
+ int err;
+
+ if (arch->initialized)
+ return 0;
+
+ arm = zalloc(sizeof(*arm));
+ if (!arm)
+ return -1;
+
+ /* bl, blr */
+ err = regcomp(&arm->call_insn, "^blr?$", REG_EXTENDED);
+ if (err)
+ goto out_free_arm;
+ /* b, b.cond, br, cbz/cbnz, tbz/tbnz */
+ err = regcomp(&arm->jump_insn, "^[ct]?br?\\.?(cc|cs|eq|ge|gt|hi|le|ls|lt|mi|ne|pl)?n?z?$",
+ REG_EXTENDED);
+ if (err)
+ goto out_free_call;
+
+ arch->initialized = true;
+ arch->priv = arm;
+ arch->associate_instruction_ops = arm64__associate_instruction_ops;
+ arch->objdump.comment_char = '/';
+ arch->objdump.skip_functions_char = '+';
+ return 0;
+
+out_free_call:
+ regfree(&arm->call_insn);
+out_free_arm:
+ free(arm);
+ return -1;
+}
diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
new file mode 100755
index 000000000..2dbb8cade
--- /dev/null
+++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate system call table for perf. Derived from
+# powerpc script.
+#
+# Copyright IBM Corp. 2017
+# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+# Changed by: Kim Phillips <kim.phillips@arm.com>
+
+gcc=$1
+hostcc=$2
+incpath=$3
+input=$4
+
+if ! test -r $input; then
+ echo "Could not read input file" >&2
+ exit 1
+fi
+
+create_table_from_c()
+{
+ local sc nr last_sc
+
+ create_table_exe=`mktemp /tmp/create-table-XXXXXX`
+
+ {
+
+ cat <<-_EoHEADER
+ #include <stdio.h>
+ #include "$input"
+ int main(int argc, char *argv[])
+ {
+ _EoHEADER
+
+ while read sc nr; do
+ printf "%s\n" " printf(\"\\t[%d] = \\\"$sc\\\",\\n\", __NR_$sc);"
+ last_sc=$sc
+ done
+
+ printf "%s\n" " printf(\"#define SYSCALLTBL_ARM64_MAX_ID %d\\n\", __NR_$last_sc);"
+ printf "}\n"
+
+ } | $hostcc -I $incpath/include/uapi -o $create_table_exe -x c -
+
+ $create_table_exe
+
+ rm -f $create_table_exe
+}
+
+create_table()
+{
+ echo "static const char *syscalltbl_arm64[] = {"
+ create_table_from_c
+ echo "};"
+}
+
+$gcc -E -dM -x c $input \
+ |sed -ne 's/^#define __NR_//p' \
+ |sort -t' ' -k2 -nu \
+ |create_table
diff --git a/tools/perf/arch/arm64/include/arch-tests.h b/tools/perf/arch/arm64/include/arch-tests.h
new file mode 100644
index 000000000..90ec4c8cb
--- /dev/null
+++ b/tools/perf/arch/arm64/include/arch-tests.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/tools/perf/arch/arm64/include/dwarf-regs-table.h b/tools/perf/arch/arm64/include/dwarf-regs-table.h
new file mode 100644
index 000000000..177b2855f
--- /dev/null
+++ b/tools/perf/arch/arm64/include/dwarf-regs-table.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const aarch64_regstr_tbl[] = {
+ "%x0", "%x1", "%x2", "%x3", "%x4",
+ "%x5", "%x6", "%x7", "%x8", "%x9",
+ "%x10", "%x11", "%x12", "%x13", "%x14",
+ "%x15", "%x16", "%x17", "%x18", "%x19",
+ "%x20", "%x21", "%x22", "%x23", "%x24",
+ "%x25", "%x26", "%x27", "%x28", "%x29",
+ "%lr", "%sp",
+};
+#endif
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
new file mode 100644
index 000000000..baaa5e64a
--- /dev/null
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_ARM64_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
+
+#define PERF_REG_IP PERF_REG_ARM64_PC
+#define PERF_REG_SP PERF_REG_ARM64_SP
+
+static inline const char *perf_reg_name(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM64_X0:
+ return "x0";
+ case PERF_REG_ARM64_X1:
+ return "x1";
+ case PERF_REG_ARM64_X2:
+ return "x2";
+ case PERF_REG_ARM64_X3:
+ return "x3";
+ case PERF_REG_ARM64_X4:
+ return "x4";
+ case PERF_REG_ARM64_X5:
+ return "x5";
+ case PERF_REG_ARM64_X6:
+ return "x6";
+ case PERF_REG_ARM64_X7:
+ return "x7";
+ case PERF_REG_ARM64_X8:
+ return "x8";
+ case PERF_REG_ARM64_X9:
+ return "x9";
+ case PERF_REG_ARM64_X10:
+ return "x10";
+ case PERF_REG_ARM64_X11:
+ return "x11";
+ case PERF_REG_ARM64_X12:
+ return "x12";
+ case PERF_REG_ARM64_X13:
+ return "x13";
+ case PERF_REG_ARM64_X14:
+ return "x14";
+ case PERF_REG_ARM64_X15:
+ return "x15";
+ case PERF_REG_ARM64_X16:
+ return "x16";
+ case PERF_REG_ARM64_X17:
+ return "x17";
+ case PERF_REG_ARM64_X18:
+ return "x18";
+ case PERF_REG_ARM64_X19:
+ return "x19";
+ case PERF_REG_ARM64_X20:
+ return "x20";
+ case PERF_REG_ARM64_X21:
+ return "x21";
+ case PERF_REG_ARM64_X22:
+ return "x22";
+ case PERF_REG_ARM64_X23:
+ return "x23";
+ case PERF_REG_ARM64_X24:
+ return "x24";
+ case PERF_REG_ARM64_X25:
+ return "x25";
+ case PERF_REG_ARM64_X26:
+ return "x26";
+ case PERF_REG_ARM64_X27:
+ return "x27";
+ case PERF_REG_ARM64_X28:
+ return "x28";
+ case PERF_REG_ARM64_X29:
+ return "x29";
+ case PERF_REG_ARM64_SP:
+ return "sp";
+ case PERF_REG_ARM64_LR:
+ return "lr";
+ case PERF_REG_ARM64_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build
new file mode 100644
index 000000000..883c57ff0
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/Build
@@ -0,0 +1,4 @@
+libperf-y += regs_load.o
+libperf-y += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/tools/perf/arch/arm64/tests/arch-tests.c b/tools/perf/arch/arm64/tests/arch-tests.c
new file mode 100644
index 000000000..5b1543c98
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/arch-tests.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+ {
+ .desc = "DWARF unwind",
+ .func = test__dwarf_unwind,
+ },
+#endif
+ {
+ .func = NULL,
+ },
+};
diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c
new file mode 100644
index 000000000..5522ce384
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_ARM64_SP];
+
+ map = map_groups__find(thread->mg, (u64)sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
new file mode 100644
index 000000000..07042511d
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/regs_load.S
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
+.text
+.type perf_regs_load,%function
+#define STR_REG(r) str x##r, [x0, 8 * r]
+#define LDR_REG(r) ldr x##r, [x0, 8 * r]
+#define SP (8 * 31)
+#define PC (8 * 32)
+ENTRY(perf_regs_load)
+ STR_REG(0)
+ STR_REG(1)
+ STR_REG(2)
+ STR_REG(3)
+ STR_REG(4)
+ STR_REG(5)
+ STR_REG(6)
+ STR_REG(7)
+ STR_REG(8)
+ STR_REG(9)
+ STR_REG(10)
+ STR_REG(11)
+ STR_REG(12)
+ STR_REG(13)
+ STR_REG(14)
+ STR_REG(15)
+ STR_REG(16)
+ STR_REG(17)
+ STR_REG(18)
+ STR_REG(19)
+ STR_REG(20)
+ STR_REG(21)
+ STR_REG(22)
+ STR_REG(23)
+ STR_REG(24)
+ STR_REG(25)
+ STR_REG(26)
+ STR_REG(27)
+ STR_REG(28)
+ STR_REG(29)
+ STR_REG(30)
+ mov x1, sp
+ str x1, [x0, #SP]
+ str x30, [x0, #PC]
+ LDR_REG(1)
+ ret
+ENDPROC(perf_regs_load)
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
new file mode 100644
index 000000000..68f8a8eb3
--- /dev/null
+++ b/tools/perf/arch/arm64/util/Build
@@ -0,0 +1,10 @@
+libperf-y += header.o
+libperf-y += sym-handling.o
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+
+libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
+ ../../arm/util/auxtrace.o \
+ ../../arm/util/cs-etm.o \
+ arm-spe.o
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
new file mode 100644
index 000000000..5ccfce87e
--- /dev/null
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <time.h>
+
+#include "../../util/cpumap.h"
+#include "../../util/evsel.h"
+#include "../../util/evlist.h"
+#include "../../util/session.h"
+#include "../../util/util.h"
+#include "../../util/pmu.h"
+#include "../../util/debug.h"
+#include "../../util/auxtrace.h"
+#include "../../util/arm-spe.h"
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+
+struct arm_spe_recording {
+ struct auxtrace_record itr;
+ struct perf_pmu *arm_spe_pmu;
+ struct perf_evlist *evlist;
+};
+
+static size_t
+arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ return ARM_SPE_AUXTRACE_PRIV_SIZE;
+}
+
+static int arm_spe_info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *auxtrace_info,
+ size_t priv_size)
+{
+ struct arm_spe_recording *sper =
+ container_of(itr, struct arm_spe_recording, itr);
+ struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
+
+ if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE)
+ return -EINVAL;
+
+ if (!session->evlist->nr_mmaps)
+ return -EINVAL;
+
+ auxtrace_info->type = PERF_AUXTRACE_ARM_SPE;
+ auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type;
+
+ return 0;
+}
+
+static int arm_spe_recording_options(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts)
+{
+ struct arm_spe_recording *sper =
+ container_of(itr, struct arm_spe_recording, itr);
+ struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu;
+ struct perf_evsel *evsel, *arm_spe_evsel = NULL;
+ bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+ struct perf_evsel *tracking_evsel;
+ int err;
+
+ sper->evlist = evlist;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == arm_spe_pmu->type) {
+ if (arm_spe_evsel) {
+ pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n");
+ return -EINVAL;
+ }
+ evsel->attr.freq = 0;
+ evsel->attr.sample_period = 1;
+ arm_spe_evsel = evsel;
+ opts->full_auxtrace = true;
+ }
+ }
+
+ if (!opts->full_auxtrace)
+ return 0;
+
+ /* We are in full trace mode but '-m,xyz' wasn't specified */
+ if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ }
+
+ /* Validate auxtrace_mmap_pages */
+ if (opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+ size_t min_sz = KiB(8);
+
+ if (sz < min_sz || !is_power_of_2(sz)) {
+ pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n",
+ min_sz / 1024);
+ return -EINVAL;
+ }
+ }
+
+
+ /*
+ * To obtain the auxtrace buffer file descriptor, the auxtrace event
+ * must come first.
+ */
+ perf_evlist__to_front(evlist, arm_spe_evsel);
+
+ perf_evsel__set_sample_bit(arm_spe_evsel, CPU);
+ perf_evsel__set_sample_bit(arm_spe_evsel, TIME);
+ perf_evsel__set_sample_bit(arm_spe_evsel, TID);
+
+ /* Add dummy event to keep tracking */
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ tracking_evsel = perf_evlist__last(evlist);
+ perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+ tracking_evsel->attr.freq = 0;
+ tracking_evsel->attr.sample_period = 1;
+ perf_evsel__set_sample_bit(tracking_evsel, TIME);
+ perf_evsel__set_sample_bit(tracking_evsel, CPU);
+ perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
+
+ return 0;
+}
+
+static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
+
+ return ts.tv_sec ^ ts.tv_nsec;
+}
+
+static void arm_spe_recording_free(struct auxtrace_record *itr)
+{
+ struct arm_spe_recording *sper =
+ container_of(itr, struct arm_spe_recording, itr);
+
+ free(sper);
+}
+
+static int arm_spe_read_finish(struct auxtrace_record *itr, int idx)
+{
+ struct arm_spe_recording *sper =
+ container_of(itr, struct arm_spe_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(sper->evlist, evsel) {
+ if (evsel->attr.type == sper->arm_spe_pmu->type)
+ return perf_evlist__enable_event_idx(sper->evlist,
+ evsel, idx);
+ }
+ return -EINVAL;
+}
+
+struct auxtrace_record *arm_spe_recording_init(int *err,
+ struct perf_pmu *arm_spe_pmu)
+{
+ struct arm_spe_recording *sper;
+
+ if (!arm_spe_pmu) {
+ *err = -ENODEV;
+ return NULL;
+ }
+
+ sper = zalloc(sizeof(struct arm_spe_recording));
+ if (!sper) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ sper->arm_spe_pmu = arm_spe_pmu;
+ sper->itr.recording_options = arm_spe_recording_options;
+ sper->itr.info_priv_size = arm_spe_info_priv_size;
+ sper->itr.info_fill = arm_spe_info_fill;
+ sper->itr.free = arm_spe_recording_free;
+ sper->itr.reference = arm_spe_reference;
+ sper->itr.read_finish = arm_spe_read_finish;
+ sper->itr.alignment = 0;
+
+ *err = 0;
+ return &sper->itr;
+}
+
+struct perf_event_attr
+*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu)
+{
+ struct perf_event_attr *attr;
+
+ attr = zalloc(sizeof(struct perf_event_attr));
+ if (!attr) {
+ pr_err("arm_spe default config cannot allocate a perf_event_attr\n");
+ return NULL;
+ }
+
+ /*
+ * If kernel driver doesn't advertise a minimum,
+ * use max allowable by PMSIDR_EL1.INTERVAL
+ */
+ if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
+ &attr->sample_period) != 1) {
+ pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
+ attr->sample_period = 4096;
+ }
+
+ arm_spe_pmu->selectable = true;
+ arm_spe_pmu->is_uncore = false;
+
+ return attr;
+}
diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c
new file mode 100644
index 000000000..cd764a9fd
--- /dev/null
+++ b/tools/perf/arch/arm64/util/dwarf-regs.c
@@ -0,0 +1,96 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Will Deacon, ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <string.h>
+#include <dwarf-regs.h>
+#include <linux/ptrace.h> /* for struct user_pt_regs */
+#include <linux/stringify.h>
+#include "util.h"
+
+struct pt_regs_dwarfnum {
+ const char *name;
+ unsigned int dwarfnum;
+};
+
+#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
+#define GPR_DWARFNUM_NAME(num) \
+ {.name = __stringify(%x##num), .dwarfnum = num}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+#define DWARFNUM2OFFSET(index) \
+ (index * sizeof((struct user_pt_regs *)0)->regs[0])
+
+/*
+ * Reference:
+ * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0057b/IHI0057B_aadwarf64.pdf
+ */
+static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
+ GPR_DWARFNUM_NAME(0),
+ GPR_DWARFNUM_NAME(1),
+ GPR_DWARFNUM_NAME(2),
+ GPR_DWARFNUM_NAME(3),
+ GPR_DWARFNUM_NAME(4),
+ GPR_DWARFNUM_NAME(5),
+ GPR_DWARFNUM_NAME(6),
+ GPR_DWARFNUM_NAME(7),
+ GPR_DWARFNUM_NAME(8),
+ GPR_DWARFNUM_NAME(9),
+ GPR_DWARFNUM_NAME(10),
+ GPR_DWARFNUM_NAME(11),
+ GPR_DWARFNUM_NAME(12),
+ GPR_DWARFNUM_NAME(13),
+ GPR_DWARFNUM_NAME(14),
+ GPR_DWARFNUM_NAME(15),
+ GPR_DWARFNUM_NAME(16),
+ GPR_DWARFNUM_NAME(17),
+ GPR_DWARFNUM_NAME(18),
+ GPR_DWARFNUM_NAME(19),
+ GPR_DWARFNUM_NAME(20),
+ GPR_DWARFNUM_NAME(21),
+ GPR_DWARFNUM_NAME(22),
+ GPR_DWARFNUM_NAME(23),
+ GPR_DWARFNUM_NAME(24),
+ GPR_DWARFNUM_NAME(25),
+ GPR_DWARFNUM_NAME(26),
+ GPR_DWARFNUM_NAME(27),
+ GPR_DWARFNUM_NAME(28),
+ GPR_DWARFNUM_NAME(29),
+ REG_DWARFNUM_NAME("%lr", 30),
+ REG_DWARFNUM_NAME("%sp", 31),
+ REG_DWARFNUM_END,
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n: the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+ const struct pt_regs_dwarfnum *roff;
+ for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+ if (roff->dwarfnum == n)
+ return roff->name;
+ return NULL;
+}
+
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_dwarfnum *roff;
+
+ for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return DWARFNUM2OFFSET(roff->dwarfnum);
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c
new file mode 100644
index 000000000..534cd2507
--- /dev/null
+++ b/tools/perf/arch/arm64/util/header.c
@@ -0,0 +1,65 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <api/fs/fs.h>
+#include "header.h"
+
+#define MIDR "/regs/identification/midr_el1"
+#define MIDR_SIZE 19
+#define MIDR_REVISION_MASK 0xf
+#define MIDR_VARIANT_SHIFT 20
+#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT)
+
+char *get_cpuid_str(struct perf_pmu *pmu)
+{
+ char *buf = NULL;
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+ int cpu;
+ u64 midr = 0;
+ struct cpu_map *cpus;
+ FILE *file;
+
+ if (!sysfs || !pmu || !pmu->cpus)
+ return NULL;
+
+ buf = malloc(MIDR_SIZE);
+ if (!buf)
+ return NULL;
+
+ /* read midr from list of cpus mapped to this pmu */
+ cpus = cpu_map__get(pmu->cpus);
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR,
+ sysfs, cpus->map[cpu]);
+
+ file = fopen(path, "r");
+ if (!file) {
+ pr_debug("fopen failed for file %s\n", path);
+ continue;
+ }
+
+ if (!fgets(buf, MIDR_SIZE, file)) {
+ fclose(file);
+ continue;
+ }
+ fclose(file);
+
+ /* Ignore/clear Variant[23:20] and
+ * Revision[3:0] of MIDR
+ */
+ midr = strtoul(buf, NULL, 16);
+ midr &= (~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK));
+ scnprintf(buf, MIDR_SIZE, "0x%016lx", midr);
+ /* got midr break loop */
+ break;
+ }
+
+ if (!midr) {
+ pr_err("failed to get cpuid string for PMU %s\n", pmu->name);
+ free(buf);
+ buf = NULL;
+ }
+
+ cpu_map__put(cpus);
+ return buf;
+}
diff --git a/tools/perf/arch/arm64/util/sym-handling.c b/tools/perf/arch/arm64/util/sym-handling.c
new file mode 100644
index 000000000..0051b1ee8
--- /dev/null
+++ b/tools/perf/arch/arm64/util/sym-handling.c
@@ -0,0 +1,22 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
+ */
+
+#include "debug.h"
+#include "symbol.h"
+#include "map.h"
+#include "probe-event.h"
+#include "probe-file.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+ return ehdr.e_type == ET_EXEC ||
+ ehdr.e_type == ET_REL ||
+ ehdr.e_type == ET_DYN;
+}
+#endif
diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c
new file mode 100644
index 000000000..7623d85e7
--- /dev/null
+++ b/tools/perf/arch/arm64/util/unwind-libdw.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct regs_dump *user_regs = &ui->sample->user_regs;
+ Dwarf_Word dwarf_regs[PERF_REG_ARM64_MAX], dwarf_pc;
+
+#define REG(r) ({ \
+ Dwarf_Word val = 0; \
+ perf_reg_value(&val, user_regs, PERF_REG_ARM64_##r); \
+ val; \
+})
+
+ dwarf_regs[0] = REG(X0);
+ dwarf_regs[1] = REG(X1);
+ dwarf_regs[2] = REG(X2);
+ dwarf_regs[3] = REG(X3);
+ dwarf_regs[4] = REG(X4);
+ dwarf_regs[5] = REG(X5);
+ dwarf_regs[6] = REG(X6);
+ dwarf_regs[7] = REG(X7);
+ dwarf_regs[8] = REG(X8);
+ dwarf_regs[9] = REG(X9);
+ dwarf_regs[10] = REG(X10);
+ dwarf_regs[11] = REG(X11);
+ dwarf_regs[12] = REG(X12);
+ dwarf_regs[13] = REG(X13);
+ dwarf_regs[14] = REG(X14);
+ dwarf_regs[15] = REG(X15);
+ dwarf_regs[16] = REG(X16);
+ dwarf_regs[17] = REG(X17);
+ dwarf_regs[18] = REG(X18);
+ dwarf_regs[19] = REG(X19);
+ dwarf_regs[20] = REG(X20);
+ dwarf_regs[21] = REG(X21);
+ dwarf_regs[22] = REG(X22);
+ dwarf_regs[23] = REG(X23);
+ dwarf_regs[24] = REG(X24);
+ dwarf_regs[25] = REG(X25);
+ dwarf_regs[26] = REG(X26);
+ dwarf_regs[27] = REG(X27);
+ dwarf_regs[28] = REG(X28);
+ dwarf_regs[29] = REG(X29);
+ dwarf_regs[30] = REG(LR);
+ dwarf_regs[31] = REG(SP);
+
+ if (!dwfl_thread_state_registers(thread, 0, PERF_REG_ARM64_MAX,
+ dwarf_regs))
+ return false;
+
+ dwarf_pc = REG(PC);
+ dwfl_thread_state_register_pc(thread, dwarf_pc);
+
+ return true;
+}
diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c
new file mode 100644
index 000000000..002520d40
--- /dev/null
+++ b/tools/perf/arch/arm64/util/unwind-libunwind.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+
+#ifndef REMOTE_UNWIND_LIBUNWIND
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+#include "../../util/debug.h"
+#endif
+
+int LIBUNWIND__ARCH_REG_ID(int regnum)
+{
+ switch (regnum) {
+ case UNW_AARCH64_X0:
+ return PERF_REG_ARM64_X0;
+ case UNW_AARCH64_X1:
+ return PERF_REG_ARM64_X1;
+ case UNW_AARCH64_X2:
+ return PERF_REG_ARM64_X2;
+ case UNW_AARCH64_X3:
+ return PERF_REG_ARM64_X3;
+ case UNW_AARCH64_X4:
+ return PERF_REG_ARM64_X4;
+ case UNW_AARCH64_X5:
+ return PERF_REG_ARM64_X5;
+ case UNW_AARCH64_X6:
+ return PERF_REG_ARM64_X6;
+ case UNW_AARCH64_X7:
+ return PERF_REG_ARM64_X7;
+ case UNW_AARCH64_X8:
+ return PERF_REG_ARM64_X8;
+ case UNW_AARCH64_X9:
+ return PERF_REG_ARM64_X9;
+ case UNW_AARCH64_X10:
+ return PERF_REG_ARM64_X10;
+ case UNW_AARCH64_X11:
+ return PERF_REG_ARM64_X11;
+ case UNW_AARCH64_X12:
+ return PERF_REG_ARM64_X12;
+ case UNW_AARCH64_X13:
+ return PERF_REG_ARM64_X13;
+ case UNW_AARCH64_X14:
+ return PERF_REG_ARM64_X14;
+ case UNW_AARCH64_X15:
+ return PERF_REG_ARM64_X15;
+ case UNW_AARCH64_X16:
+ return PERF_REG_ARM64_X16;
+ case UNW_AARCH64_X17:
+ return PERF_REG_ARM64_X17;
+ case UNW_AARCH64_X18:
+ return PERF_REG_ARM64_X18;
+ case UNW_AARCH64_X19:
+ return PERF_REG_ARM64_X19;
+ case UNW_AARCH64_X20:
+ return PERF_REG_ARM64_X20;
+ case UNW_AARCH64_X21:
+ return PERF_REG_ARM64_X21;
+ case UNW_AARCH64_X22:
+ return PERF_REG_ARM64_X22;
+ case UNW_AARCH64_X23:
+ return PERF_REG_ARM64_X23;
+ case UNW_AARCH64_X24:
+ return PERF_REG_ARM64_X24;
+ case UNW_AARCH64_X25:
+ return PERF_REG_ARM64_X25;
+ case UNW_AARCH64_X26:
+ return PERF_REG_ARM64_X26;
+ case UNW_AARCH64_X27:
+ return PERF_REG_ARM64_X27;
+ case UNW_AARCH64_X28:
+ return PERF_REG_ARM64_X28;
+ case UNW_AARCH64_X29:
+ return PERF_REG_ARM64_X29;
+ case UNW_AARCH64_X30:
+ return PERF_REG_ARM64_LR;
+ case UNW_AARCH64_SP:
+ return PERF_REG_ARM64_SP;
+ case UNW_AARCH64_PC:
+ return PERF_REG_ARM64_PC;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
new file mode 100644
index 000000000..5f69fd0b7
--- /dev/null
+++ b/tools/perf/arch/common.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "common.h"
+#include "../util/env.h"
+#include "../util/util.h"
+#include "../util/debug.h"
+
+const char *const arm_triplets[] = {
+ "arm-eabi-",
+ "arm-linux-androideabi-",
+ "arm-unknown-linux-",
+ "arm-unknown-linux-gnu-",
+ "arm-unknown-linux-gnueabi-",
+ "arm-linux-gnu-",
+ "arm-linux-gnueabihf-",
+ "arm-none-eabi-",
+ NULL
+};
+
+const char *const arm64_triplets[] = {
+ "aarch64-linux-android-",
+ "aarch64-linux-gnu-",
+ NULL
+};
+
+const char *const powerpc_triplets[] = {
+ "powerpc-unknown-linux-gnu-",
+ "powerpc-linux-gnu-",
+ "powerpc64-unknown-linux-gnu-",
+ "powerpc64-linux-gnu-",
+ "powerpc64le-linux-gnu-",
+ NULL
+};
+
+const char *const s390_triplets[] = {
+ "s390-ibm-linux-",
+ "s390x-linux-gnu-",
+ NULL
+};
+
+const char *const sh_triplets[] = {
+ "sh-unknown-linux-gnu-",
+ "sh64-unknown-linux-gnu-",
+ "sh-linux-gnu-",
+ "sh64-linux-gnu-",
+ NULL
+};
+
+const char *const sparc_triplets[] = {
+ "sparc-unknown-linux-gnu-",
+ "sparc64-unknown-linux-gnu-",
+ "sparc64-linux-gnu-",
+ NULL
+};
+
+const char *const x86_triplets[] = {
+ "x86_64-pc-linux-gnu-",
+ "x86_64-unknown-linux-gnu-",
+ "i686-pc-linux-gnu-",
+ "i586-pc-linux-gnu-",
+ "i486-pc-linux-gnu-",
+ "i386-pc-linux-gnu-",
+ "i686-linux-android-",
+ "i686-android-linux-",
+ "x86_64-linux-gnu-",
+ "i586-linux-gnu-",
+ NULL
+};
+
+const char *const mips_triplets[] = {
+ "mips-unknown-linux-gnu-",
+ "mipsel-linux-android-",
+ "mips-linux-gnu-",
+ "mips64-linux-gnu-",
+ "mips64el-linux-gnuabi64-",
+ "mips64-linux-gnuabi64-",
+ "mipsel-linux-gnu-",
+ NULL
+};
+
+static bool lookup_path(char *name)
+{
+ bool found = false;
+ char *path, *tmp = NULL;
+ char buf[PATH_MAX];
+ char *env = getenv("PATH");
+
+ if (!env)
+ return false;
+
+ env = strdup(env);
+ if (!env)
+ return false;
+
+ path = strtok_r(env, ":", &tmp);
+ while (path) {
+ scnprintf(buf, sizeof(buf), "%s/%s", path, name);
+ if (access(buf, F_OK) == 0) {
+ found = true;
+ break;
+ }
+ path = strtok_r(NULL, ":", &tmp);
+ }
+ free(env);
+ return found;
+}
+
+static int lookup_triplets(const char *const *triplets, const char *name)
+{
+ int i;
+ char buf[PATH_MAX];
+
+ for (i = 0; triplets[i] != NULL; i++) {
+ scnprintf(buf, sizeof(buf), "%s%s", triplets[i], name);
+ if (lookup_path(buf))
+ return i;
+ }
+ return -1;
+}
+
+static int perf_env__lookup_binutils_path(struct perf_env *env,
+ const char *name, const char **path)
+{
+ int idx;
+ const char *arch = perf_env__arch(env), *cross_env;
+ const char *const *path_list;
+ char *buf = NULL;
+
+ /*
+ * We don't need to try to find objdump path for native system.
+ * Just use default binutils path (e.g.: "objdump").
+ */
+ if (!strcmp(perf_env__arch(NULL), arch))
+ goto out;
+
+ cross_env = getenv("CROSS_COMPILE");
+ if (cross_env) {
+ if (asprintf(&buf, "%s%s", cross_env, name) < 0)
+ goto out_error;
+ if (buf[0] == '/') {
+ if (access(buf, F_OK) == 0)
+ goto out;
+ goto out_error;
+ }
+ if (lookup_path(buf))
+ goto out;
+ zfree(&buf);
+ }
+
+ if (!strcmp(arch, "arm"))
+ path_list = arm_triplets;
+ else if (!strcmp(arch, "arm64"))
+ path_list = arm64_triplets;
+ else if (!strcmp(arch, "powerpc"))
+ path_list = powerpc_triplets;
+ else if (!strcmp(arch, "sh"))
+ path_list = sh_triplets;
+ else if (!strcmp(arch, "s390"))
+ path_list = s390_triplets;
+ else if (!strcmp(arch, "sparc"))
+ path_list = sparc_triplets;
+ else if (!strcmp(arch, "x86"))
+ path_list = x86_triplets;
+ else if (!strcmp(arch, "mips"))
+ path_list = mips_triplets;
+ else {
+ ui__error("binutils for %s not supported.\n", arch);
+ goto out_error;
+ }
+
+ idx = lookup_triplets(path_list, name);
+ if (idx < 0) {
+ ui__error("Please install %s for %s.\n"
+ "You can add it to PATH, set CROSS_COMPILE or "
+ "override the default using --%s.\n",
+ name, arch, name);
+ goto out_error;
+ }
+
+ if (asprintf(&buf, "%s%s", path_list[idx], name) < 0)
+ goto out_error;
+
+out:
+ *path = buf;
+ return 0;
+out_error:
+ free(buf);
+ *path = NULL;
+ return -1;
+}
+
+int perf_env__lookup_objdump(struct perf_env *env, const char **path)
+{
+ /*
+ * For live mode, env->arch will be NULL and we can use
+ * the native objdump tool.
+ */
+ if (env->arch == NULL)
+ return 0;
+
+ return perf_env__lookup_binutils_path(env, "objdump", path);
+}
+
+/*
+ * Some architectures have a single address space for kernel and user addresses,
+ * which makes it possible to determine if an address is in kernel space or user
+ * space.
+ */
+bool perf_env__single_address_space(struct perf_env *env)
+{
+ return strcmp(perf_env__arch(env), "sparc");
+}
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
new file mode 100644
index 000000000..c298a446d
--- /dev/null
+++ b/tools/perf/arch/common.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_COMMON_H
+#define ARCH_PERF_COMMON_H
+
+#include "../util/env.h"
+
+int perf_env__lookup_objdump(struct perf_env *env, const char **path);
+bool perf_env__single_address_space(struct perf_env *env);
+
+#endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/arch/mips/Build b/tools/perf/arch/mips/Build
new file mode 100644
index 000000000..1bb8bf6d7
--- /dev/null
+++ b/tools/perf/arch/mips/Build
@@ -0,0 +1 @@
+# empty
diff --git a/tools/perf/arch/parisc/Build b/tools/perf/arch/parisc/Build
new file mode 100644
index 000000000..1bb8bf6d7
--- /dev/null
+++ b/tools/perf/arch/parisc/Build
@@ -0,0 +1 @@
+# empty
diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build
new file mode 100644
index 000000000..db52fa22d
--- /dev/null
+++ b/tools/perf/arch/powerpc/Build
@@ -0,0 +1,2 @@
+libperf-y += util/
+libperf-y += tests/
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
new file mode 100644
index 000000000..a111239df
--- /dev/null
+++ b/tools/perf/arch/powerpc/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
+
+HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+PERF_HAVE_JITDUMP := 1
+
+#
+# Syscall table generation for perf
+#
+
+out := $(OUTPUT)arch/powerpc/include/generated/asm
+header32 := $(out)/syscalls_32.c
+header64 := $(out)/syscalls_64.c
+sysdef := $(srctree)/tools/arch/powerpc/include/uapi/asm/unistd.h
+sysprf := $(srctree)/tools/perf/arch/powerpc/entry/syscalls/
+systbl := $(sysprf)/mksyscalltbl
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header64): $(sysdef) $(systbl)
+ $(Q)$(SHELL) '$(systbl)' '64' '$(CC)' $(sysdef) > $@
+
+$(header32): $(sysdef) $(systbl)
+ $(Q)$(SHELL) '$(systbl)' '32' '$(CC)' $(sysdef) > $@
+
+clean::
+ $(call QUIET_CLEAN, powerpc) $(RM) $(header32) $(header64)
+
+archheaders: $(header32) $(header64)
diff --git a/tools/perf/arch/powerpc/annotate/instructions.c b/tools/perf/arch/powerpc/annotate/instructions.c
new file mode 100644
index 000000000..a3f423c27
--- /dev/null
+++ b/tools/perf/arch/powerpc/annotate/instructions.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+
+static struct ins_ops *powerpc__associate_instruction_ops(struct arch *arch, const char *name)
+{
+ int i;
+ struct ins_ops *ops;
+
+ /*
+ * - Interested only if instruction starts with 'b'.
+ * - Few start with 'b', but aren't branch instructions.
+ */
+ if (name[0] != 'b' ||
+ !strncmp(name, "bcd", 3) ||
+ !strncmp(name, "brinc", 5) ||
+ !strncmp(name, "bper", 4))
+ return NULL;
+
+ ops = &jump_ops;
+
+ i = strlen(name) - 1;
+ if (i < 0)
+ return NULL;
+
+ /* ignore optional hints at the end of the instructions */
+ if (name[i] == '+' || name[i] == '-')
+ i--;
+
+ if (name[i] == 'l' || (name[i] == 'a' && name[i-1] == 'l')) {
+ /*
+ * if the instruction ends up with 'l' or 'la', then
+ * those are considered 'calls' since they update LR.
+ * ... except for 'bnl' which is branch if not less than
+ * and the absolute form of the same.
+ */
+ if (strcmp(name, "bnl") && strcmp(name, "bnl+") &&
+ strcmp(name, "bnl-") && strcmp(name, "bnla") &&
+ strcmp(name, "bnla+") && strcmp(name, "bnla-"))
+ ops = &call_ops;
+ }
+ if (name[i] == 'r' && name[i-1] == 'l')
+ /*
+ * instructions ending with 'lr' are considered to be
+ * return instructions
+ */
+ ops = &ret_ops;
+
+ arch__associate_ins_ops(arch, name, ops);
+ return ops;
+}
+
+static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+ if (!arch->initialized) {
+ arch->initialized = true;
+ arch->associate_instruction_ops = powerpc__associate_instruction_ops;
+ arch->objdump.comment_char = '#';
+ }
+
+ return 0;
+}
diff --git a/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
new file mode 100755
index 000000000..ef52e1dd6
--- /dev/null
+++ b/tools/perf/arch/powerpc/entry/syscalls/mksyscalltbl
@@ -0,0 +1,37 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate system call table for perf. Derived from
+# s390 script.
+#
+# Copyright IBM Corp. 2017
+# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com>
+
+wordsize=$1
+gcc=$2
+input=$3
+
+if ! test -r $input; then
+ echo "Could not read input file" >&2
+ exit 1
+fi
+
+create_table()
+{
+ local wordsize=$1
+ local max_nr
+
+ echo "static const char *syscalltbl_powerpc_${wordsize}[] = {"
+ while read sc nr; do
+ printf '\t[%d] = "%s",\n' $nr $sc
+ max_nr=$nr
+ done
+ echo '};'
+ echo "#define SYSCALLTBL_POWERPC_${wordsize}_MAX_ID $max_nr"
+}
+
+$gcc -m${wordsize} -E -dM -x c $input \
+ |sed -ne 's/^#define __NR_//p' \
+ |sort -t' ' -k2 -nu \
+ |create_table ${wordsize}
diff --git a/tools/perf/arch/powerpc/include/arch-tests.h b/tools/perf/arch/powerpc/include/arch-tests.h
new file mode 100644
index 000000000..1c7be75cb
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/arch-tests.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread);
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/tools/perf/arch/powerpc/include/dwarf-regs-table.h b/tools/perf/arch/powerpc/include/dwarf-regs-table.h
new file mode 100644
index 000000000..66dc015a7
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/dwarf-regs-table.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+/*
+ * Reference:
+ * http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html
+ * http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf
+ */
+#define REG_DWARFNUM_NAME(reg, idx) [idx] = "%" #reg
+
+static const char * const powerpc_regstr_tbl[] = {
+ "%gpr0", "%gpr1", "%gpr2", "%gpr3", "%gpr4",
+ "%gpr5", "%gpr6", "%gpr7", "%gpr8", "%gpr9",
+ "%gpr10", "%gpr11", "%gpr12", "%gpr13", "%gpr14",
+ "%gpr15", "%gpr16", "%gpr17", "%gpr18", "%gpr19",
+ "%gpr20", "%gpr21", "%gpr22", "%gpr23", "%gpr24",
+ "%gpr25", "%gpr26", "%gpr27", "%gpr28", "%gpr29",
+ "%gpr30", "%gpr31",
+ REG_DWARFNUM_NAME(msr, 66),
+ REG_DWARFNUM_NAME(ctr, 109),
+ REG_DWARFNUM_NAME(link, 108),
+ REG_DWARFNUM_NAME(xer, 101),
+ REG_DWARFNUM_NAME(dar, 119),
+ REG_DWARFNUM_NAME(dsisr, 118),
+};
+
+#endif
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
new file mode 100644
index 000000000..00e37b106
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_POWERPC_MAX
+#ifdef __powerpc64__
+ #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
+#else
+ #define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
+#endif
+
+#define PERF_REG_IP PERF_REG_POWERPC_NIP
+#define PERF_REG_SP PERF_REG_POWERPC_R1
+
+static const char *reg_names[] = {
+ [PERF_REG_POWERPC_R0] = "r0",
+ [PERF_REG_POWERPC_R1] = "r1",
+ [PERF_REG_POWERPC_R2] = "r2",
+ [PERF_REG_POWERPC_R3] = "r3",
+ [PERF_REG_POWERPC_R4] = "r4",
+ [PERF_REG_POWERPC_R5] = "r5",
+ [PERF_REG_POWERPC_R6] = "r6",
+ [PERF_REG_POWERPC_R7] = "r7",
+ [PERF_REG_POWERPC_R8] = "r8",
+ [PERF_REG_POWERPC_R9] = "r9",
+ [PERF_REG_POWERPC_R10] = "r10",
+ [PERF_REG_POWERPC_R11] = "r11",
+ [PERF_REG_POWERPC_R12] = "r12",
+ [PERF_REG_POWERPC_R13] = "r13",
+ [PERF_REG_POWERPC_R14] = "r14",
+ [PERF_REG_POWERPC_R15] = "r15",
+ [PERF_REG_POWERPC_R16] = "r16",
+ [PERF_REG_POWERPC_R17] = "r17",
+ [PERF_REG_POWERPC_R18] = "r18",
+ [PERF_REG_POWERPC_R19] = "r19",
+ [PERF_REG_POWERPC_R20] = "r20",
+ [PERF_REG_POWERPC_R21] = "r21",
+ [PERF_REG_POWERPC_R22] = "r22",
+ [PERF_REG_POWERPC_R23] = "r23",
+ [PERF_REG_POWERPC_R24] = "r24",
+ [PERF_REG_POWERPC_R25] = "r25",
+ [PERF_REG_POWERPC_R26] = "r26",
+ [PERF_REG_POWERPC_R27] = "r27",
+ [PERF_REG_POWERPC_R28] = "r28",
+ [PERF_REG_POWERPC_R29] = "r29",
+ [PERF_REG_POWERPC_R30] = "r30",
+ [PERF_REG_POWERPC_R31] = "r31",
+ [PERF_REG_POWERPC_NIP] = "nip",
+ [PERF_REG_POWERPC_MSR] = "msr",
+ [PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
+ [PERF_REG_POWERPC_CTR] = "ctr",
+ [PERF_REG_POWERPC_LINK] = "link",
+ [PERF_REG_POWERPC_XER] = "xer",
+ [PERF_REG_POWERPC_CCR] = "ccr",
+ [PERF_REG_POWERPC_SOFTE] = "softe",
+ [PERF_REG_POWERPC_TRAP] = "trap",
+ [PERF_REG_POWERPC_DAR] = "dar",
+ [PERF_REG_POWERPC_DSISR] = "dsisr"
+};
+
+static inline const char *perf_reg_name(int id)
+{
+ return reg_names[id];
+}
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/powerpc/tests/Build b/tools/perf/arch/powerpc/tests/Build
new file mode 100644
index 000000000..d827ef384
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/Build
@@ -0,0 +1,4 @@
+libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o
+libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/tools/perf/arch/powerpc/tests/arch-tests.c b/tools/perf/arch/powerpc/tests/arch-tests.c
new file mode 100644
index 000000000..8c3fbd4af
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/arch-tests.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+ {
+ .desc = "Test dwarf unwind",
+ .func = test__dwarf_unwind,
+ },
+#endif
+ {
+ .func = NULL,
+ },
+};
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
new file mode 100644
index 000000000..5f39efef0
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_POWERPC_R1];
+
+ map = map_groups__find(thread->mg, (u64)sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/powerpc/tests/regs_load.S b/tools/perf/arch/powerpc/tests/regs_load.S
new file mode 100644
index 000000000..36a20b003
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/regs_load.S
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
+/* Offset is based on macros from arch/powerpc/include/uapi/asm/ptrace.h. */
+#define R0 0
+#define R1 1 * 8
+#define R2 2 * 8
+#define R3 3 * 8
+#define R4 4 * 8
+#define R5 5 * 8
+#define R6 6 * 8
+#define R7 7 * 8
+#define R8 8 * 8
+#define R9 9 * 8
+#define R10 10 * 8
+#define R11 11 * 8
+#define R12 12 * 8
+#define R13 13 * 8
+#define R14 14 * 8
+#define R15 15 * 8
+#define R16 16 * 8
+#define R17 17 * 8
+#define R18 18 * 8
+#define R19 19 * 8
+#define R20 20 * 8
+#define R21 21 * 8
+#define R22 22 * 8
+#define R23 23 * 8
+#define R24 24 * 8
+#define R25 25 * 8
+#define R26 26 * 8
+#define R27 27 * 8
+#define R28 28 * 8
+#define R29 29 * 8
+#define R30 30 * 8
+#define R31 31 * 8
+#define NIP 32 * 8
+#define CTR 35 * 8
+#define LINK 36 * 8
+#define XER 37 * 8
+
+.globl perf_regs_load
+perf_regs_load:
+ std 0, R0(3)
+ std 1, R1(3)
+ std 2, R2(3)
+ std 3, R3(3)
+ std 4, R4(3)
+ std 5, R5(3)
+ std 6, R6(3)
+ std 7, R7(3)
+ std 8, R8(3)
+ std 9, R9(3)
+ std 10, R10(3)
+ std 11, R11(3)
+ std 12, R12(3)
+ std 13, R13(3)
+ std 14, R14(3)
+ std 15, R15(3)
+ std 16, R16(3)
+ std 17, R17(3)
+ std 18, R18(3)
+ std 19, R19(3)
+ std 20, R20(3)
+ std 21, R21(3)
+ std 22, R22(3)
+ std 23, R23(3)
+ std 24, R24(3)
+ std 25, R25(3)
+ std 26, R26(3)
+ std 27, R27(3)
+ std 28, R28(3)
+ std 29, R29(3)
+ std 30, R30(3)
+ std 31, R31(3)
+
+ /* store NIP */
+ mflr 4
+ std 4, NIP(3)
+
+ /* Store LR */
+ std 4, LINK(3)
+
+ /* Store XER */
+ mfxer 4
+ std 4, XER(3)
+
+ /* Store CTR */
+ mfctr 4
+ std 4, CTR(3)
+
+ /* Restore original value of r4 */
+ ld 4, R4(3)
+
+ blr
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
new file mode 100644
index 000000000..2e6595310
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/Build
@@ -0,0 +1,10 @@
+libperf-y += header.o
+libperf-y += sym-handling.o
+libperf-y += kvm-stat.o
+libperf-y += perf_regs.o
+
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
+
+libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h
new file mode 100644
index 000000000..54cfa0530
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_BOOK3S_HV_HCALLS_H
+#define ARCH_PERF_BOOK3S_HV_HCALLS_H
+
+/*
+ * PowerPC HCALL codes : hcall code to name mapping
+ */
+#define kvm_trace_symbol_hcall \
+ {0x4, "H_REMOVE"}, \
+ {0x8, "H_ENTER"}, \
+ {0xc, "H_READ"}, \
+ {0x10, "H_CLEAR_MOD"}, \
+ {0x14, "H_CLEAR_REF"}, \
+ {0x18, "H_PROTECT"}, \
+ {0x1c, "H_GET_TCE"}, \
+ {0x20, "H_PUT_TCE"}, \
+ {0x24, "H_SET_SPRG0"}, \
+ {0x28, "H_SET_DABR"}, \
+ {0x2c, "H_PAGE_INIT"}, \
+ {0x30, "H_SET_ASR"}, \
+ {0x34, "H_ASR_ON"}, \
+ {0x38, "H_ASR_OFF"}, \
+ {0x3c, "H_LOGICAL_CI_LOAD"}, \
+ {0x40, "H_LOGICAL_CI_STORE"}, \
+ {0x44, "H_LOGICAL_CACHE_LOAD"}, \
+ {0x48, "H_LOGICAL_CACHE_STORE"}, \
+ {0x4c, "H_LOGICAL_ICBI"}, \
+ {0x50, "H_LOGICAL_DCBF"}, \
+ {0x54, "H_GET_TERM_CHAR"}, \
+ {0x58, "H_PUT_TERM_CHAR"}, \
+ {0x5c, "H_REAL_TO_LOGICAL"}, \
+ {0x60, "H_HYPERVISOR_DATA"}, \
+ {0x64, "H_EOI"}, \
+ {0x68, "H_CPPR"}, \
+ {0x6c, "H_IPI"}, \
+ {0x70, "H_IPOLL"}, \
+ {0x74, "H_XIRR"}, \
+ {0x78, "H_MIGRATE_DMA"}, \
+ {0x7c, "H_PERFMON"}, \
+ {0xdc, "H_REGISTER_VPA"}, \
+ {0xe0, "H_CEDE"}, \
+ {0xe4, "H_CONFER"}, \
+ {0xe8, "H_PROD"}, \
+ {0xec, "H_GET_PPP"}, \
+ {0xf0, "H_SET_PPP"}, \
+ {0xf4, "H_PURR"}, \
+ {0xf8, "H_PIC"}, \
+ {0xfc, "H_REG_CRQ"}, \
+ {0x100, "H_FREE_CRQ"}, \
+ {0x104, "H_VIO_SIGNAL"}, \
+ {0x108, "H_SEND_CRQ"}, \
+ {0x110, "H_COPY_RDMA"}, \
+ {0x114, "H_REGISTER_LOGICAL_LAN"}, \
+ {0x118, "H_FREE_LOGICAL_LAN"}, \
+ {0x11c, "H_ADD_LOGICAL_LAN_BUFFER"}, \
+ {0x120, "H_SEND_LOGICAL_LAN"}, \
+ {0x124, "H_BULK_REMOVE"}, \
+ {0x130, "H_MULTICAST_CTRL"}, \
+ {0x134, "H_SET_XDABR"}, \
+ {0x138, "H_STUFF_TCE"}, \
+ {0x13c, "H_PUT_TCE_INDIRECT"}, \
+ {0x14c, "H_CHANGE_LOGICAL_LAN_MAC"}, \
+ {0x150, "H_VTERM_PARTNER_INFO"}, \
+ {0x154, "H_REGISTER_VTERM"}, \
+ {0x158, "H_FREE_VTERM"}, \
+ {0x15c, "H_RESET_EVENTS"}, \
+ {0x160, "H_ALLOC_RESOURCE"}, \
+ {0x164, "H_FREE_RESOURCE"}, \
+ {0x168, "H_MODIFY_QP"}, \
+ {0x16c, "H_QUERY_QP"}, \
+ {0x170, "H_REREGISTER_PMR"}, \
+ {0x174, "H_REGISTER_SMR"}, \
+ {0x178, "H_QUERY_MR"}, \
+ {0x17c, "H_QUERY_MW"}, \
+ {0x180, "H_QUERY_HCA"}, \
+ {0x184, "H_QUERY_PORT"}, \
+ {0x188, "H_MODIFY_PORT"}, \
+ {0x18c, "H_DEFINE_AQP1"}, \
+ {0x190, "H_GET_TRACE_BUFFER"}, \
+ {0x194, "H_DEFINE_AQP0"}, \
+ {0x198, "H_RESIZE_MR"}, \
+ {0x19c, "H_ATTACH_MCQP"}, \
+ {0x1a0, "H_DETACH_MCQP"}, \
+ {0x1a4, "H_CREATE_RPT"}, \
+ {0x1a8, "H_REMOVE_RPT"}, \
+ {0x1ac, "H_REGISTER_RPAGES"}, \
+ {0x1b0, "H_DISABLE_AND_GETC"}, \
+ {0x1b4, "H_ERROR_DATA"}, \
+ {0x1b8, "H_GET_HCA_INFO"}, \
+ {0x1bc, "H_GET_PERF_COUNT"}, \
+ {0x1c0, "H_MANAGE_TRACE"}, \
+ {0x1d4, "H_FREE_LOGICAL_LAN_BUFFER"}, \
+ {0x1d8, "H_POLL_PENDING"}, \
+ {0x1e4, "H_QUERY_INT_STATE"}, \
+ {0x244, "H_ILLAN_ATTRIBUTES"}, \
+ {0x250, "H_MODIFY_HEA_QP"}, \
+ {0x254, "H_QUERY_HEA_QP"}, \
+ {0x258, "H_QUERY_HEA"}, \
+ {0x25c, "H_QUERY_HEA_PORT"}, \
+ {0x260, "H_MODIFY_HEA_PORT"}, \
+ {0x264, "H_REG_BCMC"}, \
+ {0x268, "H_DEREG_BCMC"}, \
+ {0x26c, "H_REGISTER_HEA_RPAGES"}, \
+ {0x270, "H_DISABLE_AND_GET_HEA"}, \
+ {0x274, "H_GET_HEA_INFO"}, \
+ {0x278, "H_ALLOC_HEA_RESOURCE"}, \
+ {0x284, "H_ADD_CONN"}, \
+ {0x288, "H_DEL_CONN"}, \
+ {0x298, "H_JOIN"}, \
+ {0x2a4, "H_VASI_STATE"}, \
+ {0x2b0, "H_ENABLE_CRQ"}, \
+ {0x2b8, "H_GET_EM_PARMS"}, \
+ {0x2d0, "H_SET_MPP"}, \
+ {0x2d4, "H_GET_MPP"}, \
+ {0x2ec, "H_HOME_NODE_ASSOCIATIVITY"}, \
+ {0x2f4, "H_BEST_ENERGY"}, \
+ {0x2fc, "H_XIRR_X"}, \
+ {0x300, "H_RANDOM"}, \
+ {0x304, "H_COP"}, \
+ {0x314, "H_GET_MPP_X"}, \
+ {0x31c, "H_SET_MODE"}, \
+ {0xf000, "H_RTAS"} \
+
+#endif
diff --git a/tools/perf/arch/powerpc/util/book3s_hv_exits.h b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
new file mode 100644
index 000000000..853b95d1e
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/book3s_hv_exits.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_BOOK3S_HV_EXITS_H
+#define ARCH_PERF_BOOK3S_HV_EXITS_H
+
+/*
+ * PowerPC Interrupt vectors : exit code to name mapping
+ */
+
+#define kvm_trace_symbol_exit \
+ {0x0, "RETURN_TO_HOST"}, \
+ {0x100, "SYSTEM_RESET"}, \
+ {0x200, "MACHINE_CHECK"}, \
+ {0x300, "DATA_STORAGE"}, \
+ {0x380, "DATA_SEGMENT"}, \
+ {0x400, "INST_STORAGE"}, \
+ {0x480, "INST_SEGMENT"}, \
+ {0x500, "EXTERNAL"}, \
+ {0x501, "EXTERNAL_LEVEL"}, \
+ {0x502, "EXTERNAL_HV"}, \
+ {0x600, "ALIGNMENT"}, \
+ {0x700, "PROGRAM"}, \
+ {0x800, "FP_UNAVAIL"}, \
+ {0x900, "DECREMENTER"}, \
+ {0x980, "HV_DECREMENTER"}, \
+ {0xc00, "SYSCALL"}, \
+ {0xd00, "TRACE"}, \
+ {0xe00, "H_DATA_STORAGE"}, \
+ {0xe20, "H_INST_STORAGE"}, \
+ {0xe40, "H_EMUL_ASSIST"}, \
+ {0xf00, "PERFMON"}, \
+ {0xf20, "ALTIVEC"}, \
+ {0xf40, "VSX"}
+
+#endif
diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c
new file mode 100644
index 000000000..98ac87052
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
@@ -0,0 +1,105 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Ian Munsie, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <errno.h>
+#include <string.h>
+#include <dwarf-regs.h>
+#include <linux/ptrace.h>
+#include <linux/kernel.h>
+#include <linux/stringify.h>
+#include "util.h"
+
+struct pt_regs_dwarfnum {
+ const char *name;
+ unsigned int dwarfnum;
+ unsigned int ptregs_offset;
+};
+
+#define REG_DWARFNUM_NAME(r, num) \
+ {.name = __stringify(%)__stringify(r), .dwarfnum = num, \
+ .ptregs_offset = offsetof(struct pt_regs, r)}
+#define GPR_DWARFNUM_NAME(num) \
+ {.name = __stringify(%gpr##num), .dwarfnum = num, \
+ .ptregs_offset = offsetof(struct pt_regs, gpr[num])}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0}
+
+/*
+ * Reference:
+ * http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html
+ */
+static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
+ GPR_DWARFNUM_NAME(0),
+ GPR_DWARFNUM_NAME(1),
+ GPR_DWARFNUM_NAME(2),
+ GPR_DWARFNUM_NAME(3),
+ GPR_DWARFNUM_NAME(4),
+ GPR_DWARFNUM_NAME(5),
+ GPR_DWARFNUM_NAME(6),
+ GPR_DWARFNUM_NAME(7),
+ GPR_DWARFNUM_NAME(8),
+ GPR_DWARFNUM_NAME(9),
+ GPR_DWARFNUM_NAME(10),
+ GPR_DWARFNUM_NAME(11),
+ GPR_DWARFNUM_NAME(12),
+ GPR_DWARFNUM_NAME(13),
+ GPR_DWARFNUM_NAME(14),
+ GPR_DWARFNUM_NAME(15),
+ GPR_DWARFNUM_NAME(16),
+ GPR_DWARFNUM_NAME(17),
+ GPR_DWARFNUM_NAME(18),
+ GPR_DWARFNUM_NAME(19),
+ GPR_DWARFNUM_NAME(20),
+ GPR_DWARFNUM_NAME(21),
+ GPR_DWARFNUM_NAME(22),
+ GPR_DWARFNUM_NAME(23),
+ GPR_DWARFNUM_NAME(24),
+ GPR_DWARFNUM_NAME(25),
+ GPR_DWARFNUM_NAME(26),
+ GPR_DWARFNUM_NAME(27),
+ GPR_DWARFNUM_NAME(28),
+ GPR_DWARFNUM_NAME(29),
+ GPR_DWARFNUM_NAME(30),
+ GPR_DWARFNUM_NAME(31),
+ REG_DWARFNUM_NAME(msr, 66),
+ REG_DWARFNUM_NAME(ctr, 109),
+ REG_DWARFNUM_NAME(link, 108),
+ REG_DWARFNUM_NAME(xer, 101),
+ REG_DWARFNUM_NAME(dar, 119),
+ REG_DWARFNUM_NAME(dsisr, 118),
+ REG_DWARFNUM_END,
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n: the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+ const struct pt_regs_dwarfnum *roff;
+ for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+ if (roff->dwarfnum == n)
+ return roff->name;
+ return NULL;
+}
+
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_dwarfnum *roff;
+ for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->ptregs_offset;
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
new file mode 100644
index 000000000..e46be9ef5
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/stringify.h>
+#include "header.h"
+#include "util.h"
+
+#define mfspr(rn) ({unsigned long rval; \
+ asm volatile("mfspr %0," __stringify(rn) \
+ : "=r" (rval)); rval; })
+
+#define SPRN_PVR 0x11F /* Processor Version Register */
+#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */
+#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */
+
+int
+get_cpuid(char *buffer, size_t sz)
+{
+ unsigned long pvr;
+ int nb;
+
+ pvr = mfspr(SPRN_PVR);
+
+ nb = scnprintf(buffer, sz, "%lu,%lu$", PVR_VER(pvr), PVR_REV(pvr));
+
+ /* look for end marker to ensure the entire data fit */
+ if (strchr(buffer, '$')) {
+ buffer[nb-1] = '\0';
+ return 0;
+ }
+ return ENOBUFS;
+}
+
+char *
+get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+ char *bufp;
+
+ if (asprintf(&bufp, "%.8lx", mfspr(SPRN_PVR)) < 0)
+ bufp = NULL;
+
+ return bufp;
+}
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c
new file mode 100644
index 000000000..596ad6aed
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include "util/kvm-stat.h"
+#include "util/parse-events.h"
+#include "util/debug.h"
+
+#include "book3s_hv_exits.h"
+#include "book3s_hcalls.h"
+
+#define NR_TPS 4
+
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 40;
+const char *kvm_entry_trace = "kvm_hv:kvm_guest_enter";
+const char *kvm_exit_trace = "kvm_hv:kvm_guest_exit";
+
+define_exit_reasons_table(hv_exit_reasons, kvm_trace_symbol_exit);
+define_exit_reasons_table(hcall_reasons, kvm_trace_symbol_hcall);
+
+/* Tracepoints specific to ppc_book3s_hv */
+const char *ppc_book3s_hv_kvm_tp[] = {
+ "kvm_hv:kvm_guest_enter",
+ "kvm_hv:kvm_guest_exit",
+ "kvm_hv:kvm_hcall_enter",
+ "kvm_hv:kvm_hcall_exit",
+ NULL,
+};
+
+/* 1 extra placeholder for NULL */
+const char *kvm_events_tp[NR_TPS + 1];
+const char *kvm_exit_reason;
+
+static void hcall_event_get_key(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ key->info = 0;
+ key->key = perf_evsel__intval(evsel, sample, "req");
+}
+
+static const char *get_hcall_exit_reason(u64 exit_code)
+{
+ struct exit_reasons_table *tbl = hcall_reasons;
+
+ while (tbl->reason != NULL) {
+ if (tbl->exit_code == exit_code)
+ return tbl->reason;
+ tbl++;
+ }
+
+ pr_debug("Unknown hcall code: %lld\n",
+ (unsigned long long)exit_code);
+ return "UNKNOWN";
+}
+
+static bool hcall_event_end(struct perf_evsel *evsel,
+ struct perf_sample *sample __maybe_unused,
+ struct event_key *key __maybe_unused)
+{
+ return (!strcmp(evsel->name, kvm_events_tp[3]));
+}
+
+static bool hcall_event_begin(struct perf_evsel *evsel,
+ struct perf_sample *sample, struct event_key *key)
+{
+ if (!strcmp(evsel->name, kvm_events_tp[2])) {
+ hcall_event_get_key(evsel, sample, key);
+ return true;
+ }
+
+ return false;
+}
+static void hcall_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+ struct event_key *key,
+ char *decode)
+{
+ const char *hcall_reason = get_hcall_exit_reason(key->key);
+
+ scnprintf(decode, decode_str_len, "%s", hcall_reason);
+}
+
+static struct kvm_events_ops hcall_events = {
+ .is_begin_event = hcall_event_begin,
+ .is_end_event = hcall_event_end,
+ .decode_key = hcall_event_decode_key,
+ .name = "HCALL-EVENT",
+};
+
+static struct kvm_events_ops exit_events = {
+ .is_begin_event = exit_event_begin,
+ .is_end_event = exit_event_end,
+ .decode_key = exit_event_decode_key,
+ .name = "VM-EXIT"
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+ { .name = "vmexit", .ops = &exit_events },
+ { .name = "hcall", .ops = &hcall_events },
+ { NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+ NULL,
+};
+
+
+static int is_tracepoint_available(const char *str, struct perf_evlist *evlist)
+{
+ struct parse_events_error err;
+ int ret;
+
+ err.str = NULL;
+ ret = parse_events(evlist, str, &err);
+ if (err.str)
+ pr_err("%s : %s\n", str, err.str);
+ return ret;
+}
+
+static int ppc__setup_book3s_hv(struct perf_kvm_stat *kvm,
+ struct perf_evlist *evlist)
+{
+ const char **events_ptr;
+ int i, nr_tp = 0, err = -1;
+
+ /* Check for book3s_hv tracepoints */
+ for (events_ptr = ppc_book3s_hv_kvm_tp; *events_ptr; events_ptr++) {
+ err = is_tracepoint_available(*events_ptr, evlist);
+ if (err)
+ return -1;
+ nr_tp++;
+ }
+
+ for (i = 0; i < nr_tp; i++)
+ kvm_events_tp[i] = ppc_book3s_hv_kvm_tp[i];
+
+ kvm_events_tp[i] = NULL;
+ kvm_exit_reason = "trap";
+ kvm->exit_reasons = hv_exit_reasons;
+ kvm->exit_reasons_isa = "HV";
+
+ return 0;
+}
+
+/* Wrapper to setup kvm tracepoints */
+static int ppc__setup_kvm_tp(struct perf_kvm_stat *kvm)
+{
+ struct perf_evlist *evlist = perf_evlist__new();
+
+ if (evlist == NULL)
+ return -ENOMEM;
+
+ /* Right now, only supported on book3s_hv */
+ return ppc__setup_book3s_hv(kvm, evlist);
+}
+
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm)
+{
+ return ppc__setup_kvm_tp(kvm);
+}
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid __maybe_unused)
+{
+ int ret;
+
+ ret = ppc__setup_kvm_tp(kvm);
+ if (ret) {
+ kvm->exit_reasons = NULL;
+ kvm->exit_reasons_isa = NULL;
+ }
+
+ return ret;
+}
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
new file mode 100644
index 000000000..ec50939b0
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <string.h>
+#include <regex.h>
+
+#include "../../perf.h"
+#include "../../util/util.h"
+#include "../../util/perf_regs.h"
+#include "../../util/debug.h"
+
+const struct sample_reg sample_reg_masks[] = {
+ SMPL_REG(r0, PERF_REG_POWERPC_R0),
+ SMPL_REG(r1, PERF_REG_POWERPC_R1),
+ SMPL_REG(r2, PERF_REG_POWERPC_R2),
+ SMPL_REG(r3, PERF_REG_POWERPC_R3),
+ SMPL_REG(r4, PERF_REG_POWERPC_R4),
+ SMPL_REG(r5, PERF_REG_POWERPC_R5),
+ SMPL_REG(r6, PERF_REG_POWERPC_R6),
+ SMPL_REG(r7, PERF_REG_POWERPC_R7),
+ SMPL_REG(r8, PERF_REG_POWERPC_R8),
+ SMPL_REG(r9, PERF_REG_POWERPC_R9),
+ SMPL_REG(r10, PERF_REG_POWERPC_R10),
+ SMPL_REG(r11, PERF_REG_POWERPC_R11),
+ SMPL_REG(r12, PERF_REG_POWERPC_R12),
+ SMPL_REG(r13, PERF_REG_POWERPC_R13),
+ SMPL_REG(r14, PERF_REG_POWERPC_R14),
+ SMPL_REG(r15, PERF_REG_POWERPC_R15),
+ SMPL_REG(r16, PERF_REG_POWERPC_R16),
+ SMPL_REG(r17, PERF_REG_POWERPC_R17),
+ SMPL_REG(r18, PERF_REG_POWERPC_R18),
+ SMPL_REG(r19, PERF_REG_POWERPC_R19),
+ SMPL_REG(r20, PERF_REG_POWERPC_R20),
+ SMPL_REG(r21, PERF_REG_POWERPC_R21),
+ SMPL_REG(r22, PERF_REG_POWERPC_R22),
+ SMPL_REG(r23, PERF_REG_POWERPC_R23),
+ SMPL_REG(r24, PERF_REG_POWERPC_R24),
+ SMPL_REG(r25, PERF_REG_POWERPC_R25),
+ SMPL_REG(r26, PERF_REG_POWERPC_R26),
+ SMPL_REG(r27, PERF_REG_POWERPC_R27),
+ SMPL_REG(r28, PERF_REG_POWERPC_R28),
+ SMPL_REG(r29, PERF_REG_POWERPC_R29),
+ SMPL_REG(r30, PERF_REG_POWERPC_R30),
+ SMPL_REG(r31, PERF_REG_POWERPC_R31),
+ SMPL_REG(nip, PERF_REG_POWERPC_NIP),
+ SMPL_REG(msr, PERF_REG_POWERPC_MSR),
+ SMPL_REG(orig_r3, PERF_REG_POWERPC_ORIG_R3),
+ SMPL_REG(ctr, PERF_REG_POWERPC_CTR),
+ SMPL_REG(link, PERF_REG_POWERPC_LINK),
+ SMPL_REG(xer, PERF_REG_POWERPC_XER),
+ SMPL_REG(ccr, PERF_REG_POWERPC_CCR),
+ SMPL_REG(softe, PERF_REG_POWERPC_SOFTE),
+ SMPL_REG(trap, PERF_REG_POWERPC_TRAP),
+ SMPL_REG(dar, PERF_REG_POWERPC_DAR),
+ SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
+ SMPL_REG_END
+};
+
+/* REG or %rREG */
+#define SDT_OP_REGEX1 "^(%r)?([1-2]?[0-9]|3[0-1])$"
+
+/* -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) */
+#define SDT_OP_REGEX2 "^(\\-)?([0-9]+)\\((%r)?([1-2]?[0-9]|3[0-1])\\)$"
+
+static regex_t sdt_op_regex1, sdt_op_regex2;
+
+static int sdt_init_op_regex(void)
+{
+ static int initialized;
+ int ret = 0;
+
+ if (initialized)
+ return 0;
+
+ ret = regcomp(&sdt_op_regex1, SDT_OP_REGEX1, REG_EXTENDED);
+ if (ret)
+ goto error;
+
+ ret = regcomp(&sdt_op_regex2, SDT_OP_REGEX2, REG_EXTENDED);
+ if (ret)
+ goto free_regex1;
+
+ initialized = 1;
+ return 0;
+
+free_regex1:
+ regfree(&sdt_op_regex1);
+error:
+ pr_debug4("Regex compilation error.\n");
+ return ret;
+}
+
+/*
+ * Parse OP and convert it into uprobe format, which is, +/-NUM(%gprREG).
+ * Possible variants of OP are:
+ * Format Example
+ * -------------------------
+ * NUM(REG) 48(18)
+ * -NUM(REG) -48(18)
+ * NUM(%rREG) 48(%r18)
+ * -NUM(%rREG) -48(%r18)
+ * REG 18
+ * %rREG %r18
+ * iNUM i0
+ * i-NUM i-1
+ *
+ * SDT marker arguments on Powerpc uses %rREG form with -mregnames flag
+ * and REG form with -mno-regnames. Here REG is general purpose register,
+ * which is in 0 to 31 range.
+ */
+int arch_sdt_arg_parse_op(char *old_op, char **new_op)
+{
+ int ret, new_len;
+ regmatch_t rm[5];
+ char prefix;
+
+ /* Constant argument. Uprobe does not support it */
+ if (old_op[0] == 'i') {
+ pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
+ return SDT_ARG_SKIP;
+ }
+
+ ret = sdt_init_op_regex();
+ if (ret < 0)
+ return ret;
+
+ if (!regexec(&sdt_op_regex1, old_op, 3, rm, 0)) {
+ /* REG or %rREG --> %gprREG */
+
+ new_len = 5; /* % g p r NULL */
+ new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
+
+ *new_op = zalloc(new_len);
+ if (!*new_op)
+ return -ENOMEM;
+
+ scnprintf(*new_op, new_len, "%%gpr%.*s",
+ (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so);
+ } else if (!regexec(&sdt_op_regex2, old_op, 5, rm, 0)) {
+ /*
+ * -NUM(REG) or NUM(REG) or -NUM(%rREG) or NUM(%rREG) -->
+ * +/-NUM(%gprREG)
+ */
+ prefix = (rm[1].rm_so == -1) ? '+' : '-';
+
+ new_len = 8; /* +/- ( % g p r ) NULL */
+ new_len += (int)(rm[2].rm_eo - rm[2].rm_so);
+ new_len += (int)(rm[4].rm_eo - rm[4].rm_so);
+
+ *new_op = zalloc(new_len);
+ if (!*new_op)
+ return -ENOMEM;
+
+ scnprintf(*new_op, new_len, "%c%.*s(%%gpr%.*s)", prefix,
+ (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
+ (int)(rm[4].rm_eo - rm[4].rm_so), old_op + rm[4].rm_so);
+ } else {
+ pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
+ return SDT_ARG_SKIP;
+ }
+
+ return SDT_ARG_VALID;
+}
diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
new file mode 100644
index 000000000..7c6eeb463
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -0,0 +1,283 @@
+/*
+ * Use DWARF Debug information to skip unnecessary callchain entries.
+ *
+ * Copyright (C) 2014 Sukadev Bhattiprolu, IBM Corporation.
+ * Copyright (C) 2014 Ulrich Weigand, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <inttypes.h>
+#include <dwarf.h>
+#include <elfutils/libdwfl.h>
+
+#include "util/thread.h"
+#include "util/callchain.h"
+#include "util/debug.h"
+
+/*
+ * When saving the callchain on Power, the kernel conservatively saves
+ * excess entries in the callchain. A few of these entries are needed
+ * in some cases but not others. If the unnecessary entries are not
+ * ignored, we end up with duplicate arcs in the call-graphs. Use
+ * DWARF debug information to skip over any unnecessary callchain
+ * entries.
+ *
+ * See function header for arch_adjust_callchain() below for more details.
+ *
+ * The libdwfl code in this file is based on code from elfutils
+ * (libdwfl/argp-std.c, libdwfl/tests/addrcfi.c, etc).
+ */
+static char *debuginfo_path;
+
+static const Dwfl_Callbacks offline_callbacks = {
+ .debuginfo_path = &debuginfo_path,
+ .find_debuginfo = dwfl_standard_find_debuginfo,
+ .section_address = dwfl_offline_section_address,
+};
+
+
+/*
+ * Use the DWARF expression for the Call-frame-address and determine
+ * if return address is in LR and if a new frame was allocated.
+ */
+static int check_return_reg(int ra_regno, Dwarf_Frame *frame)
+{
+ Dwarf_Op ops_mem[2];
+ Dwarf_Op dummy;
+ Dwarf_Op *ops = &dummy;
+ size_t nops;
+ int result;
+
+ result = dwarf_frame_register(frame, ra_regno, ops_mem, &ops, &nops);
+ if (result < 0) {
+ pr_debug("dwarf_frame_register() %s\n", dwarf_errmsg(-1));
+ return -1;
+ }
+
+ /*
+ * Check if return address is on the stack. If return address
+ * is in a register (typically R0), it is yet to be saved on
+ * the stack.
+ */
+ if ((nops != 0 || ops != NULL) &&
+ !(nops == 1 && ops[0].atom == DW_OP_regx &&
+ ops[0].number2 == 0 && ops[0].offset == 0))
+ return 0;
+
+ /*
+ * Return address is in LR. Check if a frame was allocated
+ * but not-yet used.
+ */
+ result = dwarf_frame_cfa(frame, &ops, &nops);
+ if (result < 0) {
+ pr_debug("dwarf_frame_cfa() returns %d, %s\n", result,
+ dwarf_errmsg(-1));
+ return -1;
+ }
+
+ /*
+ * If call frame address is in r1, no new frame was allocated.
+ */
+ if (nops == 1 && ops[0].atom == DW_OP_bregx && ops[0].number == 1 &&
+ ops[0].number2 == 0)
+ return 1;
+
+ /*
+ * A new frame was allocated but has not yet been used.
+ */
+ return 2;
+}
+
+/*
+ * Get the DWARF frame from the .eh_frame section.
+ */
+static Dwarf_Frame *get_eh_frame(Dwfl_Module *mod, Dwarf_Addr pc)
+{
+ int result;
+ Dwarf_Addr bias;
+ Dwarf_CFI *cfi;
+ Dwarf_Frame *frame;
+
+ cfi = dwfl_module_eh_cfi(mod, &bias);
+ if (!cfi) {
+ pr_debug("%s(): no CFI - %s\n", __func__, dwfl_errmsg(-1));
+ return NULL;
+ }
+
+ result = dwarf_cfi_addrframe(cfi, pc-bias, &frame);
+ if (result) {
+ pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1));
+ return NULL;
+ }
+
+ return frame;
+}
+
+/*
+ * Get the DWARF frame from the .debug_frame section.
+ */
+static Dwarf_Frame *get_dwarf_frame(Dwfl_Module *mod, Dwarf_Addr pc)
+{
+ Dwarf_CFI *cfi;
+ Dwarf_Addr bias;
+ Dwarf_Frame *frame;
+ int result;
+
+ cfi = dwfl_module_dwarf_cfi(mod, &bias);
+ if (!cfi) {
+ pr_debug("%s(): no CFI - %s\n", __func__, dwfl_errmsg(-1));
+ return NULL;
+ }
+
+ result = dwarf_cfi_addrframe(cfi, pc-bias, &frame);
+ if (result) {
+ pr_debug("%s(): %s\n", __func__, dwfl_errmsg(-1));
+ return NULL;
+ }
+
+ return frame;
+}
+
+/*
+ * Return:
+ * 0 if return address for the program counter @pc is on stack
+ * 1 if return address is in LR and no new stack frame was allocated
+ * 2 if return address is in LR and a new frame was allocated (but not
+ * yet used)
+ * -1 in case of errors
+ */
+static int check_return_addr(struct dso *dso, u64 map_start, Dwarf_Addr pc)
+{
+ int rc = -1;
+ Dwfl *dwfl;
+ Dwfl_Module *mod;
+ Dwarf_Frame *frame;
+ int ra_regno;
+ Dwarf_Addr start = pc;
+ Dwarf_Addr end = pc;
+ bool signalp;
+ const char *exec_file = dso->long_name;
+
+ dwfl = dso->dwfl;
+
+ if (!dwfl) {
+ dwfl = dwfl_begin(&offline_callbacks);
+ if (!dwfl) {
+ pr_debug("dwfl_begin() failed: %s\n", dwarf_errmsg(-1));
+ return -1;
+ }
+
+ mod = dwfl_report_elf(dwfl, exec_file, exec_file, -1,
+ map_start, false);
+ if (!mod) {
+ pr_debug("dwfl_report_elf() failed %s\n",
+ dwarf_errmsg(-1));
+ /*
+ * We normally cache the DWARF debug info and never
+ * call dwfl_end(). But to prevent fd leak, free in
+ * case of error.
+ */
+ dwfl_end(dwfl);
+ goto out;
+ }
+ dso->dwfl = dwfl;
+ }
+
+ mod = dwfl_addrmodule(dwfl, pc);
+ if (!mod) {
+ pr_debug("dwfl_addrmodule() failed, %s\n", dwarf_errmsg(-1));
+ goto out;
+ }
+
+ /*
+ * To work with split debug info files (eg: glibc), check both
+ * .eh_frame and .debug_frame sections of the ELF header.
+ */
+ frame = get_eh_frame(mod, pc);
+ if (!frame) {
+ frame = get_dwarf_frame(mod, pc);
+ if (!frame)
+ goto out;
+ }
+
+ ra_regno = dwarf_frame_info(frame, &start, &end, &signalp);
+ if (ra_regno < 0) {
+ pr_debug("Return address register unavailable: %s\n",
+ dwarf_errmsg(-1));
+ goto out;
+ }
+
+ rc = check_return_reg(ra_regno, frame);
+
+out:
+ return rc;
+}
+
+/*
+ * The callchain saved by the kernel always includes the link register (LR).
+ *
+ * 0: PERF_CONTEXT_USER
+ * 1: Program counter (Next instruction pointer)
+ * 2: LR value
+ * 3: Caller's caller
+ * 4: ...
+ *
+ * The value in LR is only needed when it holds a return address. If the
+ * return address is on the stack, we should ignore the LR value.
+ *
+ * Further, when the return address is in the LR, if a new frame was just
+ * allocated but the LR was not saved into it, then the LR contains the
+ * caller, slot 4: contains the caller's caller and the contents of slot 3:
+ * (chain->ips[3]) is undefined and must be ignored.
+ *
+ * Use DWARF debug information to determine if any entries need to be skipped.
+ *
+ * Return:
+ * index: of callchain entry that needs to be ignored (if any)
+ * -1 if no entry needs to be ignored or in case of errors
+ */
+int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
+{
+ struct addr_location al;
+ struct dso *dso = NULL;
+ int rc;
+ u64 ip;
+ u64 skip_slot = -1;
+
+ if (!chain || chain->nr < 3)
+ return skip_slot;
+
+ ip = chain->ips[1];
+
+ thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
+
+ if (al.map)
+ dso = al.map->dso;
+
+ if (!dso) {
+ pr_debug("%" PRIx64 " dso is NULL\n", ip);
+ return skip_slot;
+ }
+
+ rc = check_return_addr(dso, al.map->start, ip);
+
+ pr_debug("[DSO %s, sym %s, ip 0x%" PRIx64 "] rc %d\n",
+ dso->long_name, al.sym->name, ip, rc);
+
+ if (rc == 0) {
+ /*
+ * Return address on stack. Ignore LR value in callchain
+ */
+ skip_slot = 2;
+ } else if (rc == 2) {
+ /*
+ * New frame allocated but return address still in LR.
+ * Ignore the caller's caller entry in callchain.
+ */
+ skip_slot = 3;
+ }
+ return skip_slot;
+}
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
new file mode 100644
index 000000000..10a44e946
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -0,0 +1,155 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
+ */
+
+#include "debug.h"
+#include "symbol.h"
+#include "map.h"
+#include "probe-event.h"
+#include "probe-file.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+ return ehdr.e_type == ET_EXEC ||
+ ehdr.e_type == ET_REL ||
+ ehdr.e_type == ET_DYN;
+}
+
+#endif
+
+int arch__choose_best_symbol(struct symbol *syma,
+ struct symbol *symb __maybe_unused)
+{
+ char *sym = syma->name;
+
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+ /* Skip over any initial dot */
+ if (*sym == '.')
+ sym++;
+#endif
+
+ /* Avoid "SyS" kernel syscall aliases */
+ if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3))
+ return SYMBOL_B;
+ if (strlen(sym) >= 10 && !strncmp(sym, "compat_SyS", 10))
+ return SYMBOL_B;
+
+ return SYMBOL_A;
+}
+
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+/* Allow matching against dot variants */
+int arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+ /* Skip over initial dot */
+ if (*namea == '.')
+ namea++;
+ if (*nameb == '.')
+ nameb++;
+
+ return strcmp(namea, nameb);
+}
+
+int arch__compare_symbol_names_n(const char *namea, const char *nameb,
+ unsigned int n)
+{
+ /* Skip over initial dot */
+ if (*namea == '.')
+ namea++;
+ if (*nameb == '.')
+ nameb++;
+
+ return strncmp(namea, nameb, n);
+}
+
+const char *arch__normalize_symbol_name(const char *name)
+{
+ /* Skip over initial dot */
+ if (name && *name == '.')
+ name++;
+ return name;
+}
+#endif
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+
+#ifdef HAVE_LIBELF_SUPPORT
+void arch__sym_update(struct symbol *s, GElf_Sym *sym)
+{
+ s->arch_sym = sym->st_other;
+}
+#endif
+
+#define PPC64LE_LEP_OFFSET 8
+
+void arch__fix_tev_from_maps(struct perf_probe_event *pev,
+ struct probe_trace_event *tev, struct map *map,
+ struct symbol *sym)
+{
+ int lep_offset;
+
+ /*
+ * When probing at a function entry point, we normally always want the
+ * LEP since that catches calls to the function through both the GEP and
+ * the LEP. Hence, we would like to probe at an offset of 8 bytes if
+ * the user only specified the function entry.
+ *
+ * However, if the user specifies an offset, we fall back to using the
+ * GEP since all userspace applications (objdump/readelf) show function
+ * disassembly with offsets from the GEP.
+ */
+ if (pev->point.offset || !map || !sym)
+ return;
+
+ /* For kretprobes, add an offset only if the kernel supports it */
+ if (!pev->uprobes && pev->point.retprobe) {
+#ifdef HAVE_LIBELF_SUPPORT
+ if (!kretprobe_offset_is_supported())
+#endif
+ return;
+ }
+
+ lep_offset = PPC64_LOCAL_ENTRY_OFFSET(sym->arch_sym);
+
+ if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS)
+ tev->point.offset += PPC64LE_LEP_OFFSET;
+ else if (lep_offset) {
+ if (pev->uprobes)
+ tev->point.address += lep_offset;
+ else
+ tev->point.offset += lep_offset;
+ }
+}
+
+#ifdef HAVE_LIBELF_SUPPORT
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+ int ntevs)
+{
+ struct probe_trace_event *tev;
+ struct map *map;
+ struct symbol *sym = NULL;
+ struct rb_node *tmp;
+ int i = 0;
+
+ map = get_target_map(pev->target, pev->nsi, pev->uprobes);
+ if (!map || map__load(map) < 0)
+ return;
+
+ for (i = 0; i < ntevs; i++) {
+ tev = &pev->tevs[i];
+ map__for_each_symbol(map, sym, tmp) {
+ if (map->unmap_ip(map, sym->start) == tev->point.address) {
+ arch__fix_tev_from_maps(pev, tev, map, sym);
+ break;
+ }
+ }
+ }
+}
+#endif /* HAVE_LIBELF_SUPPORT */
+
+#endif
diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c
new file mode 100644
index 000000000..7a1f05ef2
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/unwind-libdw.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+
+/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */
+static const int special_regs[3][2] = {
+ { 65, PERF_REG_POWERPC_LINK },
+ { 101, PERF_REG_POWERPC_XER },
+ { 109, PERF_REG_POWERPC_CTR },
+};
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct regs_dump *user_regs = &ui->sample->user_regs;
+ Dwarf_Word dwarf_regs[32], dwarf_nip;
+ size_t i;
+
+#define REG(r) ({ \
+ Dwarf_Word val = 0; \
+ perf_reg_value(&val, user_regs, PERF_REG_POWERPC_##r); \
+ val; \
+})
+
+ dwarf_regs[0] = REG(R0);
+ dwarf_regs[1] = REG(R1);
+ dwarf_regs[2] = REG(R2);
+ dwarf_regs[3] = REG(R3);
+ dwarf_regs[4] = REG(R4);
+ dwarf_regs[5] = REG(R5);
+ dwarf_regs[6] = REG(R6);
+ dwarf_regs[7] = REG(R7);
+ dwarf_regs[8] = REG(R8);
+ dwarf_regs[9] = REG(R9);
+ dwarf_regs[10] = REG(R10);
+ dwarf_regs[11] = REG(R11);
+ dwarf_regs[12] = REG(R12);
+ dwarf_regs[13] = REG(R13);
+ dwarf_regs[14] = REG(R14);
+ dwarf_regs[15] = REG(R15);
+ dwarf_regs[16] = REG(R16);
+ dwarf_regs[17] = REG(R17);
+ dwarf_regs[18] = REG(R18);
+ dwarf_regs[19] = REG(R19);
+ dwarf_regs[20] = REG(R20);
+ dwarf_regs[21] = REG(R21);
+ dwarf_regs[22] = REG(R22);
+ dwarf_regs[23] = REG(R23);
+ dwarf_regs[24] = REG(R24);
+ dwarf_regs[25] = REG(R25);
+ dwarf_regs[26] = REG(R26);
+ dwarf_regs[27] = REG(R27);
+ dwarf_regs[28] = REG(R28);
+ dwarf_regs[29] = REG(R29);
+ dwarf_regs[30] = REG(R30);
+ dwarf_regs[31] = REG(R31);
+ if (!dwfl_thread_state_registers(thread, 0, 32, dwarf_regs))
+ return false;
+
+ dwarf_nip = REG(NIP);
+ dwfl_thread_state_register_pc(thread, dwarf_nip);
+ for (i = 0; i < ARRAY_SIZE(special_regs); i++) {
+ Dwarf_Word val = 0;
+ perf_reg_value(&val, user_regs, special_regs[i][1]);
+ if (!dwfl_thread_state_registers(thread,
+ special_regs[i][0], 1,
+ &val))
+ return false;
+ }
+
+ return true;
+}
diff --git a/tools/perf/arch/powerpc/util/unwind-libunwind.c b/tools/perf/arch/powerpc/util/unwind-libunwind.c
new file mode 100644
index 000000000..9e15f92ae
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/unwind-libunwind.c
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2016 Chandan Kumar, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <errno.h>
+#include <libunwind.h>
+#include <asm/perf_regs.h>
+#include "../../util/unwind.h"
+#include "../../util/debug.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+ switch (regnum) {
+ case UNW_PPC64_R0:
+ return PERF_REG_POWERPC_R0;
+ case UNW_PPC64_R1:
+ return PERF_REG_POWERPC_R1;
+ case UNW_PPC64_R2:
+ return PERF_REG_POWERPC_R2;
+ case UNW_PPC64_R3:
+ return PERF_REG_POWERPC_R3;
+ case UNW_PPC64_R4:
+ return PERF_REG_POWERPC_R4;
+ case UNW_PPC64_R5:
+ return PERF_REG_POWERPC_R5;
+ case UNW_PPC64_R6:
+ return PERF_REG_POWERPC_R6;
+ case UNW_PPC64_R7:
+ return PERF_REG_POWERPC_R7;
+ case UNW_PPC64_R8:
+ return PERF_REG_POWERPC_R8;
+ case UNW_PPC64_R9:
+ return PERF_REG_POWERPC_R9;
+ case UNW_PPC64_R10:
+ return PERF_REG_POWERPC_R10;
+ case UNW_PPC64_R11:
+ return PERF_REG_POWERPC_R11;
+ case UNW_PPC64_R12:
+ return PERF_REG_POWERPC_R12;
+ case UNW_PPC64_R13:
+ return PERF_REG_POWERPC_R13;
+ case UNW_PPC64_R14:
+ return PERF_REG_POWERPC_R14;
+ case UNW_PPC64_R15:
+ return PERF_REG_POWERPC_R15;
+ case UNW_PPC64_R16:
+ return PERF_REG_POWERPC_R16;
+ case UNW_PPC64_R17:
+ return PERF_REG_POWERPC_R17;
+ case UNW_PPC64_R18:
+ return PERF_REG_POWERPC_R18;
+ case UNW_PPC64_R19:
+ return PERF_REG_POWERPC_R19;
+ case UNW_PPC64_R20:
+ return PERF_REG_POWERPC_R20;
+ case UNW_PPC64_R21:
+ return PERF_REG_POWERPC_R21;
+ case UNW_PPC64_R22:
+ return PERF_REG_POWERPC_R22;
+ case UNW_PPC64_R23:
+ return PERF_REG_POWERPC_R23;
+ case UNW_PPC64_R24:
+ return PERF_REG_POWERPC_R24;
+ case UNW_PPC64_R25:
+ return PERF_REG_POWERPC_R25;
+ case UNW_PPC64_R26:
+ return PERF_REG_POWERPC_R26;
+ case UNW_PPC64_R27:
+ return PERF_REG_POWERPC_R27;
+ case UNW_PPC64_R28:
+ return PERF_REG_POWERPC_R28;
+ case UNW_PPC64_R29:
+ return PERF_REG_POWERPC_R29;
+ case UNW_PPC64_R30:
+ return PERF_REG_POWERPC_R30;
+ case UNW_PPC64_R31:
+ return PERF_REG_POWERPC_R31;
+ case UNW_PPC64_LR:
+ return PERF_REG_POWERPC_LINK;
+ case UNW_PPC64_CTR:
+ return PERF_REG_POWERPC_CTR;
+ case UNW_PPC64_XER:
+ return PERF_REG_POWERPC_XER;
+ case UNW_PPC64_NIP:
+ return PERF_REG_POWERPC_NIP;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/s390/Build b/tools/perf/arch/s390/Build
new file mode 100644
index 000000000..54afe4a46
--- /dev/null
+++ b/tools/perf/arch/s390/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile
new file mode 100644
index 000000000..dfa6e3103
--- /dev/null
+++ b/tools/perf/arch/s390/Makefile
@@ -0,0 +1,30 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
+HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+
+#
+# Syscall table generation for perf
+#
+
+out := $(OUTPUT)arch/s390/include/generated/asm
+header := $(out)/syscalls_64.c
+syskrn := $(srctree)/arch/s390/kernel/syscalls/syscall.tbl
+sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls
+sysdef := $(sysprf)/syscall.tbl
+systbl := $(sysprf)/mksyscalltbl
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header): $(sysdef) $(systbl)
+ @(test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+ (diff -B $(sysdef) $(syskrn) >/dev/null) \
+ || echo "Warning: Kernel ABI header at '$(sysdef)' differs from latest version at '$(syskrn)'" >&2 )) || true
+ $(Q)$(SHELL) '$(systbl)' $(sysdef) > $@
+
+clean::
+ $(call QUIET_CLEAN, s390) $(RM) $(header)
+
+archheaders: $(header)
diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c
new file mode 100644
index 000000000..cee4e2f7c
--- /dev/null
+++ b/tools/perf/arch/s390/annotate/instructions.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+
+static int s390_call__parse(struct arch *arch, struct ins_operands *ops,
+ struct map_symbol *ms)
+{
+ char *endptr, *tok, *name;
+ struct map *map = ms->map;
+ struct addr_map_symbol target = {
+ .map = map,
+ };
+
+ tok = strchr(ops->raw, ',');
+ if (!tok)
+ return -1;
+
+ ops->target.addr = strtoull(tok + 1, &endptr, 16);
+
+ name = strchr(endptr, '<');
+ if (name == NULL)
+ return -1;
+
+ name++;
+
+ if (arch->objdump.skip_functions_char &&
+ strchr(name, arch->objdump.skip_functions_char))
+ return -1;
+
+ tok = strchr(name, '>');
+ if (tok == NULL)
+ return -1;
+
+ *tok = '\0';
+ ops->target.name = strdup(name);
+ *tok = '>';
+
+ if (ops->target.name == NULL)
+ return -1;
+ target.addr = map__objdump_2mem(map, ops->target.addr);
+
+ if (map_groups__find_ams(&target) == 0 &&
+ map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
+ ops->target.sym = target.sym;
+
+ return 0;
+}
+
+static int call__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops);
+
+static struct ins_ops s390_call_ops = {
+ .parse = s390_call__parse,
+ .scnprintf = call__scnprintf,
+};
+
+static int s390_mov__parse(struct arch *arch __maybe_unused,
+ struct ins_operands *ops,
+ struct map_symbol *ms __maybe_unused)
+{
+ char *s = strchr(ops->raw, ','), *target, *endptr;
+
+ if (s == NULL)
+ return -1;
+
+ *s = '\0';
+ ops->source.raw = strdup(ops->raw);
+ *s = ',';
+
+ if (ops->source.raw == NULL)
+ return -1;
+
+ target = ++s;
+ ops->target.raw = strdup(target);
+ if (ops->target.raw == NULL)
+ goto out_free_source;
+
+ ops->target.addr = strtoull(target, &endptr, 16);
+ if (endptr == target)
+ goto out_free_target;
+
+ s = strchr(endptr, '<');
+ if (s == NULL)
+ goto out_free_target;
+ endptr = strchr(s + 1, '>');
+ if (endptr == NULL)
+ goto out_free_target;
+
+ *endptr = '\0';
+ ops->target.name = strdup(s + 1);
+ *endptr = '>';
+ if (ops->target.name == NULL)
+ goto out_free_target;
+
+ return 0;
+
+out_free_target:
+ zfree(&ops->target.raw);
+out_free_source:
+ zfree(&ops->source.raw);
+ return -1;
+}
+
+static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops);
+
+static struct ins_ops s390_mov_ops = {
+ .parse = s390_mov__parse,
+ .scnprintf = mov__scnprintf,
+};
+
+static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *name)
+{
+ struct ins_ops *ops = NULL;
+
+ /* catch all kind of jumps */
+ if (strchr(name, 'j') ||
+ !strncmp(name, "bct", 3) ||
+ !strncmp(name, "br", 2))
+ ops = &jump_ops;
+ /* override call/returns */
+ if (!strcmp(name, "bras") ||
+ !strcmp(name, "brasl") ||
+ !strcmp(name, "basr"))
+ ops = &s390_call_ops;
+ if (!strcmp(name, "br"))
+ ops = &ret_ops;
+ /* override load/store relative to PC */
+ if (!strcmp(name, "lrl") ||
+ !strcmp(name, "lgrl") ||
+ !strcmp(name, "lgfrl") ||
+ !strcmp(name, "llgfrl") ||
+ !strcmp(name, "strl") ||
+ !strcmp(name, "stgrl"))
+ ops = &s390_mov_ops;
+
+ if (ops)
+ arch__associate_ins_ops(arch, name, ops);
+ return ops;
+}
+
+static int s390__cpuid_parse(struct arch *arch, char *cpuid)
+{
+ unsigned int family;
+ char model[16], model_c[16], cpumf_v[16], cpumf_a[16];
+ int ret;
+
+ /*
+ * cpuid string format:
+ * "IBM,family,model-capacity,model[,cpum_cf-version,cpum_cf-authorization]"
+ */
+ ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%s", &family, model_c,
+ model, cpumf_v, cpumf_a);
+ if (ret >= 2) {
+ arch->family = family;
+ arch->model = 0;
+ return 0;
+ }
+
+ return -1;
+}
+
+static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused)
+{
+ int err = 0;
+
+ if (!arch->initialized) {
+ arch->initialized = true;
+ arch->associate_instruction_ops = s390__associate_ins_ops;
+ if (cpuid)
+ err = s390__cpuid_parse(arch, cpuid);
+ }
+
+ return err;
+}
diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
new file mode 100755
index 000000000..72ecbb676
--- /dev/null
+++ b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl
@@ -0,0 +1,32 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate system call table for perf
+#
+# Copyright IBM Corp. 2017, 2018
+# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+#
+
+SYSCALL_TBL=$1
+
+if ! test -r $SYSCALL_TBL; then
+ echo "Could not read input file" >&2
+ exit 1
+fi
+
+create_table()
+{
+ local max_nr nr abi sc discard
+
+ echo 'static const char *syscalltbl_s390_64[] = {'
+ while read nr abi sc discard; do
+ printf '\t[%d] = "%s",\n' $nr $sc
+ max_nr=$nr
+ done
+ echo '};'
+ echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr"
+}
+
+grep -E "^[[:digit:]]+[[:space:]]+(common|64)" $SYSCALL_TBL \
+ |sort -k1 -n \
+ |create_table
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
new file mode 100644
index 000000000..b38d48464
--- /dev/null
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -0,0 +1,390 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+#
+# System call table for s390
+#
+# Format:
+#
+# <nr> <abi> <syscall> <entry-64bit> <compat-entry>
+#
+# where <abi> can be common, 64, or 32
+
+1 common exit sys_exit sys_exit
+2 common fork sys_fork sys_fork
+3 common read sys_read compat_sys_s390_read
+4 common write sys_write compat_sys_s390_write
+5 common open sys_open compat_sys_open
+6 common close sys_close sys_close
+7 common restart_syscall sys_restart_syscall sys_restart_syscall
+8 common creat sys_creat compat_sys_creat
+9 common link sys_link compat_sys_link
+10 common unlink sys_unlink compat_sys_unlink
+11 common execve sys_execve compat_sys_execve
+12 common chdir sys_chdir compat_sys_chdir
+13 32 time - compat_sys_time
+14 common mknod sys_mknod compat_sys_mknod
+15 common chmod sys_chmod compat_sys_chmod
+16 32 lchown - compat_sys_s390_lchown16
+19 common lseek sys_lseek compat_sys_lseek
+20 common getpid sys_getpid sys_getpid
+21 common mount sys_mount compat_sys_mount
+22 common umount sys_oldumount compat_sys_oldumount
+23 32 setuid - compat_sys_s390_setuid16
+24 32 getuid - compat_sys_s390_getuid16
+25 32 stime - compat_sys_stime
+26 common ptrace sys_ptrace compat_sys_ptrace
+27 common alarm sys_alarm sys_alarm
+29 common pause sys_pause sys_pause
+30 common utime sys_utime compat_sys_utime
+33 common access sys_access compat_sys_access
+34 common nice sys_nice sys_nice
+36 common sync sys_sync sys_sync
+37 common kill sys_kill sys_kill
+38 common rename sys_rename compat_sys_rename
+39 common mkdir sys_mkdir compat_sys_mkdir
+40 common rmdir sys_rmdir compat_sys_rmdir
+41 common dup sys_dup sys_dup
+42 common pipe sys_pipe compat_sys_pipe
+43 common times sys_times compat_sys_times
+45 common brk sys_brk compat_sys_brk
+46 32 setgid - compat_sys_s390_setgid16
+47 32 getgid - compat_sys_s390_getgid16
+48 common signal sys_signal compat_sys_signal
+49 32 geteuid - compat_sys_s390_geteuid16
+50 32 getegid - compat_sys_s390_getegid16
+51 common acct sys_acct compat_sys_acct
+52 common umount2 sys_umount compat_sys_umount
+54 common ioctl sys_ioctl compat_sys_ioctl
+55 common fcntl sys_fcntl compat_sys_fcntl
+57 common setpgid sys_setpgid sys_setpgid
+60 common umask sys_umask sys_umask
+61 common chroot sys_chroot compat_sys_chroot
+62 common ustat sys_ustat compat_sys_ustat
+63 common dup2 sys_dup2 sys_dup2
+64 common getppid sys_getppid sys_getppid
+65 common getpgrp sys_getpgrp sys_getpgrp
+66 common setsid sys_setsid sys_setsid
+67 common sigaction sys_sigaction compat_sys_sigaction
+70 32 setreuid - compat_sys_s390_setreuid16
+71 32 setregid - compat_sys_s390_setregid16
+72 common sigsuspend sys_sigsuspend compat_sys_sigsuspend
+73 common sigpending sys_sigpending compat_sys_sigpending
+74 common sethostname sys_sethostname compat_sys_sethostname
+75 common setrlimit sys_setrlimit compat_sys_setrlimit
+76 32 getrlimit - compat_sys_old_getrlimit
+77 common getrusage sys_getrusage compat_sys_getrusage
+78 common gettimeofday sys_gettimeofday compat_sys_gettimeofday
+79 common settimeofday sys_settimeofday compat_sys_settimeofday
+80 32 getgroups - compat_sys_s390_getgroups16
+81 32 setgroups - compat_sys_s390_setgroups16
+83 common symlink sys_symlink compat_sys_symlink
+85 common readlink sys_readlink compat_sys_readlink
+86 common uselib sys_uselib compat_sys_uselib
+87 common swapon sys_swapon compat_sys_swapon
+88 common reboot sys_reboot compat_sys_reboot
+89 common readdir - compat_sys_old_readdir
+90 common mmap sys_old_mmap compat_sys_s390_old_mmap
+91 common munmap sys_munmap compat_sys_munmap
+92 common truncate sys_truncate compat_sys_truncate
+93 common ftruncate sys_ftruncate compat_sys_ftruncate
+94 common fchmod sys_fchmod sys_fchmod
+95 32 fchown - compat_sys_s390_fchown16
+96 common getpriority sys_getpriority sys_getpriority
+97 common setpriority sys_setpriority sys_setpriority
+99 common statfs sys_statfs compat_sys_statfs
+100 common fstatfs sys_fstatfs compat_sys_fstatfs
+101 32 ioperm - -
+102 common socketcall sys_socketcall compat_sys_socketcall
+103 common syslog sys_syslog compat_sys_syslog
+104 common setitimer sys_setitimer compat_sys_setitimer
+105 common getitimer sys_getitimer compat_sys_getitimer
+106 common stat sys_newstat compat_sys_newstat
+107 common lstat sys_newlstat compat_sys_newlstat
+108 common fstat sys_newfstat compat_sys_newfstat
+110 common lookup_dcookie sys_lookup_dcookie compat_sys_lookup_dcookie
+111 common vhangup sys_vhangup sys_vhangup
+112 common idle - -
+114 common wait4 sys_wait4 compat_sys_wait4
+115 common swapoff sys_swapoff compat_sys_swapoff
+116 common sysinfo sys_sysinfo compat_sys_sysinfo
+117 common ipc sys_s390_ipc compat_sys_s390_ipc
+118 common fsync sys_fsync sys_fsync
+119 common sigreturn sys_sigreturn compat_sys_sigreturn
+120 common clone sys_clone compat_sys_clone
+121 common setdomainname sys_setdomainname compat_sys_setdomainname
+122 common uname sys_newuname compat_sys_newuname
+124 common adjtimex sys_adjtimex compat_sys_adjtimex
+125 common mprotect sys_mprotect compat_sys_mprotect
+126 common sigprocmask sys_sigprocmask compat_sys_sigprocmask
+127 common create_module - -
+128 common init_module sys_init_module compat_sys_init_module
+129 common delete_module sys_delete_module compat_sys_delete_module
+130 common get_kernel_syms - -
+131 common quotactl sys_quotactl compat_sys_quotactl
+132 common getpgid sys_getpgid sys_getpgid
+133 common fchdir sys_fchdir sys_fchdir
+134 common bdflush sys_bdflush compat_sys_bdflush
+135 common sysfs sys_sysfs compat_sys_sysfs
+136 common personality sys_s390_personality sys_s390_personality
+137 common afs_syscall - -
+138 32 setfsuid - compat_sys_s390_setfsuid16
+139 32 setfsgid - compat_sys_s390_setfsgid16
+140 32 _llseek - compat_sys_llseek
+141 common getdents sys_getdents compat_sys_getdents
+142 32 _newselect - compat_sys_select
+142 64 select sys_select -
+143 common flock sys_flock sys_flock
+144 common msync sys_msync compat_sys_msync
+145 common readv sys_readv compat_sys_readv
+146 common writev sys_writev compat_sys_writev
+147 common getsid sys_getsid sys_getsid
+148 common fdatasync sys_fdatasync sys_fdatasync
+149 common _sysctl sys_sysctl compat_sys_sysctl
+150 common mlock sys_mlock compat_sys_mlock
+151 common munlock sys_munlock compat_sys_munlock
+152 common mlockall sys_mlockall sys_mlockall
+153 common munlockall sys_munlockall sys_munlockall
+154 common sched_setparam sys_sched_setparam compat_sys_sched_setparam
+155 common sched_getparam sys_sched_getparam compat_sys_sched_getparam
+156 common sched_setscheduler sys_sched_setscheduler compat_sys_sched_setscheduler
+157 common sched_getscheduler sys_sched_getscheduler sys_sched_getscheduler
+158 common sched_yield sys_sched_yield sys_sched_yield
+159 common sched_get_priority_max sys_sched_get_priority_max sys_sched_get_priority_max
+160 common sched_get_priority_min sys_sched_get_priority_min sys_sched_get_priority_min
+161 common sched_rr_get_interval sys_sched_rr_get_interval compat_sys_sched_rr_get_interval
+162 common nanosleep sys_nanosleep compat_sys_nanosleep
+163 common mremap sys_mremap compat_sys_mremap
+164 32 setresuid - compat_sys_s390_setresuid16
+165 32 getresuid - compat_sys_s390_getresuid16
+167 common query_module - -
+168 common poll sys_poll compat_sys_poll
+169 common nfsservctl - -
+170 32 setresgid - compat_sys_s390_setresgid16
+171 32 getresgid - compat_sys_s390_getresgid16
+172 common prctl sys_prctl compat_sys_prctl
+173 common rt_sigreturn sys_rt_sigreturn compat_sys_rt_sigreturn
+174 common rt_sigaction sys_rt_sigaction compat_sys_rt_sigaction
+175 common rt_sigprocmask sys_rt_sigprocmask compat_sys_rt_sigprocmask
+176 common rt_sigpending sys_rt_sigpending compat_sys_rt_sigpending
+177 common rt_sigtimedwait sys_rt_sigtimedwait compat_sys_rt_sigtimedwait
+178 common rt_sigqueueinfo sys_rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+179 common rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
+180 common pread64 sys_pread64 compat_sys_s390_pread64
+181 common pwrite64 sys_pwrite64 compat_sys_s390_pwrite64
+182 32 chown - compat_sys_s390_chown16
+183 common getcwd sys_getcwd compat_sys_getcwd
+184 common capget sys_capget compat_sys_capget
+185 common capset sys_capset compat_sys_capset
+186 common sigaltstack sys_sigaltstack compat_sys_sigaltstack
+187 common sendfile sys_sendfile64 compat_sys_sendfile
+188 common getpmsg - -
+189 common putpmsg - -
+190 common vfork sys_vfork sys_vfork
+191 32 ugetrlimit - compat_sys_getrlimit
+191 64 getrlimit sys_getrlimit -
+192 32 mmap2 - compat_sys_s390_mmap2
+193 32 truncate64 - compat_sys_s390_truncate64
+194 32 ftruncate64 - compat_sys_s390_ftruncate64
+195 32 stat64 - compat_sys_s390_stat64
+196 32 lstat64 - compat_sys_s390_lstat64
+197 32 fstat64 - compat_sys_s390_fstat64
+198 32 lchown32 - compat_sys_lchown
+198 64 lchown sys_lchown -
+199 32 getuid32 - sys_getuid
+199 64 getuid sys_getuid -
+200 32 getgid32 - sys_getgid
+200 64 getgid sys_getgid -
+201 32 geteuid32 - sys_geteuid
+201 64 geteuid sys_geteuid -
+202 32 getegid32 - sys_getegid
+202 64 getegid sys_getegid -
+203 32 setreuid32 - sys_setreuid
+203 64 setreuid sys_setreuid -
+204 32 setregid32 - sys_setregid
+204 64 setregid sys_setregid -
+205 32 getgroups32 - compat_sys_getgroups
+205 64 getgroups sys_getgroups -
+206 32 setgroups32 - compat_sys_setgroups
+206 64 setgroups sys_setgroups -
+207 32 fchown32 - sys_fchown
+207 64 fchown sys_fchown -
+208 32 setresuid32 - sys_setresuid
+208 64 setresuid sys_setresuid -
+209 32 getresuid32 - compat_sys_getresuid
+209 64 getresuid sys_getresuid -
+210 32 setresgid32 - sys_setresgid
+210 64 setresgid sys_setresgid -
+211 32 getresgid32 - compat_sys_getresgid
+211 64 getresgid sys_getresgid -
+212 32 chown32 - compat_sys_chown
+212 64 chown sys_chown -
+213 32 setuid32 - sys_setuid
+213 64 setuid sys_setuid -
+214 32 setgid32 - sys_setgid
+214 64 setgid sys_setgid -
+215 32 setfsuid32 - sys_setfsuid
+215 64 setfsuid sys_setfsuid -
+216 32 setfsgid32 - sys_setfsgid
+216 64 setfsgid sys_setfsgid -
+217 common pivot_root sys_pivot_root compat_sys_pivot_root
+218 common mincore sys_mincore compat_sys_mincore
+219 common madvise sys_madvise compat_sys_madvise
+220 common getdents64 sys_getdents64 compat_sys_getdents64
+221 32 fcntl64 - compat_sys_fcntl64
+222 common readahead sys_readahead compat_sys_s390_readahead
+223 32 sendfile64 - compat_sys_sendfile64
+224 common setxattr sys_setxattr compat_sys_setxattr
+225 common lsetxattr sys_lsetxattr compat_sys_lsetxattr
+226 common fsetxattr sys_fsetxattr compat_sys_fsetxattr
+227 common getxattr sys_getxattr compat_sys_getxattr
+228 common lgetxattr sys_lgetxattr compat_sys_lgetxattr
+229 common fgetxattr sys_fgetxattr compat_sys_fgetxattr
+230 common listxattr sys_listxattr compat_sys_listxattr
+231 common llistxattr sys_llistxattr compat_sys_llistxattr
+232 common flistxattr sys_flistxattr compat_sys_flistxattr
+233 common removexattr sys_removexattr compat_sys_removexattr
+234 common lremovexattr sys_lremovexattr compat_sys_lremovexattr
+235 common fremovexattr sys_fremovexattr compat_sys_fremovexattr
+236 common gettid sys_gettid sys_gettid
+237 common tkill sys_tkill sys_tkill
+238 common futex sys_futex compat_sys_futex
+239 common sched_setaffinity sys_sched_setaffinity compat_sys_sched_setaffinity
+240 common sched_getaffinity sys_sched_getaffinity compat_sys_sched_getaffinity
+241 common tgkill sys_tgkill sys_tgkill
+243 common io_setup sys_io_setup compat_sys_io_setup
+244 common io_destroy sys_io_destroy compat_sys_io_destroy
+245 common io_getevents sys_io_getevents compat_sys_io_getevents
+246 common io_submit sys_io_submit compat_sys_io_submit
+247 common io_cancel sys_io_cancel compat_sys_io_cancel
+248 common exit_group sys_exit_group sys_exit_group
+249 common epoll_create sys_epoll_create sys_epoll_create
+250 common epoll_ctl sys_epoll_ctl compat_sys_epoll_ctl
+251 common epoll_wait sys_epoll_wait compat_sys_epoll_wait
+252 common set_tid_address sys_set_tid_address compat_sys_set_tid_address
+253 common fadvise64 sys_fadvise64_64 compat_sys_s390_fadvise64
+254 common timer_create sys_timer_create compat_sys_timer_create
+255 common timer_settime sys_timer_settime compat_sys_timer_settime
+256 common timer_gettime sys_timer_gettime compat_sys_timer_gettime
+257 common timer_getoverrun sys_timer_getoverrun sys_timer_getoverrun
+258 common timer_delete sys_timer_delete sys_timer_delete
+259 common clock_settime sys_clock_settime compat_sys_clock_settime
+260 common clock_gettime sys_clock_gettime compat_sys_clock_gettime
+261 common clock_getres sys_clock_getres compat_sys_clock_getres
+262 common clock_nanosleep sys_clock_nanosleep compat_sys_clock_nanosleep
+264 32 fadvise64_64 - compat_sys_s390_fadvise64_64
+265 common statfs64 sys_statfs64 compat_sys_statfs64
+266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64
+267 common remap_file_pages sys_remap_file_pages compat_sys_remap_file_pages
+268 common mbind sys_mbind compat_sys_mbind
+269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy
+270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy
+271 common mq_open sys_mq_open compat_sys_mq_open
+272 common mq_unlink sys_mq_unlink compat_sys_mq_unlink
+273 common mq_timedsend sys_mq_timedsend compat_sys_mq_timedsend
+274 common mq_timedreceive sys_mq_timedreceive compat_sys_mq_timedreceive
+275 common mq_notify sys_mq_notify compat_sys_mq_notify
+276 common mq_getsetattr sys_mq_getsetattr compat_sys_mq_getsetattr
+277 common kexec_load sys_kexec_load compat_sys_kexec_load
+278 common add_key sys_add_key compat_sys_add_key
+279 common request_key sys_request_key compat_sys_request_key
+280 common keyctl sys_keyctl compat_sys_keyctl
+281 common waitid sys_waitid compat_sys_waitid
+282 common ioprio_set sys_ioprio_set sys_ioprio_set
+283 common ioprio_get sys_ioprio_get sys_ioprio_get
+284 common inotify_init sys_inotify_init sys_inotify_init
+285 common inotify_add_watch sys_inotify_add_watch compat_sys_inotify_add_watch
+286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch
+287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages
+288 common openat sys_openat compat_sys_openat
+289 common mkdirat sys_mkdirat compat_sys_mkdirat
+290 common mknodat sys_mknodat compat_sys_mknodat
+291 common fchownat sys_fchownat compat_sys_fchownat
+292 common futimesat sys_futimesat compat_sys_futimesat
+293 32 fstatat64 - compat_sys_s390_fstatat64
+293 64 newfstatat sys_newfstatat -
+294 common unlinkat sys_unlinkat compat_sys_unlinkat
+295 common renameat sys_renameat compat_sys_renameat
+296 common linkat sys_linkat compat_sys_linkat
+297 common symlinkat sys_symlinkat compat_sys_symlinkat
+298 common readlinkat sys_readlinkat compat_sys_readlinkat
+299 common fchmodat sys_fchmodat compat_sys_fchmodat
+300 common faccessat sys_faccessat compat_sys_faccessat
+301 common pselect6 sys_pselect6 compat_sys_pselect6
+302 common ppoll sys_ppoll compat_sys_ppoll
+303 common unshare sys_unshare compat_sys_unshare
+304 common set_robust_list sys_set_robust_list compat_sys_set_robust_list
+305 common get_robust_list sys_get_robust_list compat_sys_get_robust_list
+306 common splice sys_splice compat_sys_splice
+307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range
+308 common tee sys_tee compat_sys_tee
+309 common vmsplice sys_vmsplice compat_sys_vmsplice
+310 common move_pages sys_move_pages compat_sys_move_pages
+311 common getcpu sys_getcpu compat_sys_getcpu
+312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
+313 common utimes sys_utimes compat_sys_utimes
+314 common fallocate sys_fallocate compat_sys_s390_fallocate
+315 common utimensat sys_utimensat compat_sys_utimensat
+316 common signalfd sys_signalfd compat_sys_signalfd
+317 common timerfd - -
+318 common eventfd sys_eventfd sys_eventfd
+319 common timerfd_create sys_timerfd_create sys_timerfd_create
+320 common timerfd_settime sys_timerfd_settime compat_sys_timerfd_settime
+321 common timerfd_gettime sys_timerfd_gettime compat_sys_timerfd_gettime
+322 common signalfd4 sys_signalfd4 compat_sys_signalfd4
+323 common eventfd2 sys_eventfd2 sys_eventfd2
+324 common inotify_init1 sys_inotify_init1 sys_inotify_init1
+325 common pipe2 sys_pipe2 compat_sys_pipe2
+326 common dup3 sys_dup3 sys_dup3
+327 common epoll_create1 sys_epoll_create1 sys_epoll_create1
+328 common preadv sys_preadv compat_sys_preadv
+329 common pwritev sys_pwritev compat_sys_pwritev
+330 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+331 common perf_event_open sys_perf_event_open compat_sys_perf_event_open
+332 common fanotify_init sys_fanotify_init sys_fanotify_init
+333 common fanotify_mark sys_fanotify_mark compat_sys_fanotify_mark
+334 common prlimit64 sys_prlimit64 compat_sys_prlimit64
+335 common name_to_handle_at sys_name_to_handle_at compat_sys_name_to_handle_at
+336 common open_by_handle_at sys_open_by_handle_at compat_sys_open_by_handle_at
+337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
+338 common syncfs sys_syncfs sys_syncfs
+339 common setns sys_setns sys_setns
+340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
+341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr
+343 common kcmp sys_kcmp compat_sys_kcmp
+344 common finit_module sys_finit_module compat_sys_finit_module
+345 common sched_setattr sys_sched_setattr compat_sys_sched_setattr
+346 common sched_getattr sys_sched_getattr compat_sys_sched_getattr
+347 common renameat2 sys_renameat2 compat_sys_renameat2
+348 common seccomp sys_seccomp compat_sys_seccomp
+349 common getrandom sys_getrandom compat_sys_getrandom
+350 common memfd_create sys_memfd_create compat_sys_memfd_create
+351 common bpf sys_bpf compat_sys_bpf
+352 common s390_pci_mmio_write sys_s390_pci_mmio_write compat_sys_s390_pci_mmio_write
+353 common s390_pci_mmio_read sys_s390_pci_mmio_read compat_sys_s390_pci_mmio_read
+354 common execveat sys_execveat compat_sys_execveat
+355 common userfaultfd sys_userfaultfd sys_userfaultfd
+356 common membarrier sys_membarrier sys_membarrier
+357 common recvmmsg sys_recvmmsg compat_sys_recvmmsg
+358 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
+359 common socket sys_socket sys_socket
+360 common socketpair sys_socketpair compat_sys_socketpair
+361 common bind sys_bind compat_sys_bind
+362 common connect sys_connect compat_sys_connect
+363 common listen sys_listen sys_listen
+364 common accept4 sys_accept4 compat_sys_accept4
+365 common getsockopt sys_getsockopt compat_sys_getsockopt
+366 common setsockopt sys_setsockopt compat_sys_setsockopt
+367 common getsockname sys_getsockname compat_sys_getsockname
+368 common getpeername sys_getpeername compat_sys_getpeername
+369 common sendto sys_sendto compat_sys_sendto
+370 common sendmsg sys_sendmsg compat_sys_sendmsg
+371 common recvfrom sys_recvfrom compat_sys_recvfrom
+372 common recvmsg sys_recvmsg compat_sys_recvmsg
+373 common shutdown sys_shutdown sys_shutdown
+374 common mlock2 sys_mlock2 compat_sys_mlock2
+375 common copy_file_range sys_copy_file_range compat_sys_copy_file_range
+376 common preadv2 sys_preadv2 compat_sys_preadv2
+377 common pwritev2 sys_pwritev2 compat_sys_pwritev2
+378 common s390_guarded_storage sys_s390_guarded_storage compat_sys_s390_guarded_storage
+379 common statx sys_statx compat_sys_statx
+380 common s390_sthyi sys_s390_sthyi compat_sys_s390_sthyi
diff --git a/tools/perf/arch/s390/include/dwarf-regs-table.h b/tools/perf/arch/s390/include/dwarf-regs-table.h
new file mode 100644
index 000000000..671553525
--- /dev/null
+++ b/tools/perf/arch/s390/include/dwarf-regs-table.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef S390_DWARF_REGS_TABLE_H
+#define S390_DWARF_REGS_TABLE_H
+
+#define REG_DWARFNUM_NAME(reg, idx) [idx] = "%" #reg
+
+/*
+ * For reference, see DWARF register mapping:
+ * http://refspecs.linuxfoundation.org/ELF/zSeries/lzsabi0_s390/x1542.html
+ */
+static const char * const s390_dwarf_regs[] = {
+ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+ REG_DWARFNUM_NAME(f0, 16),
+ REG_DWARFNUM_NAME(f1, 20),
+ REG_DWARFNUM_NAME(f2, 17),
+ REG_DWARFNUM_NAME(f3, 21),
+ REG_DWARFNUM_NAME(f4, 18),
+ REG_DWARFNUM_NAME(f5, 22),
+ REG_DWARFNUM_NAME(f6, 19),
+ REG_DWARFNUM_NAME(f7, 23),
+ REG_DWARFNUM_NAME(f8, 24),
+ REG_DWARFNUM_NAME(f9, 28),
+ REG_DWARFNUM_NAME(f10, 25),
+ REG_DWARFNUM_NAME(f11, 29),
+ REG_DWARFNUM_NAME(f12, 26),
+ REG_DWARFNUM_NAME(f13, 30),
+ REG_DWARFNUM_NAME(f14, 27),
+ REG_DWARFNUM_NAME(f15, 31),
+ REG_DWARFNUM_NAME(c0, 32),
+ REG_DWARFNUM_NAME(c1, 33),
+ REG_DWARFNUM_NAME(c2, 34),
+ REG_DWARFNUM_NAME(c3, 35),
+ REG_DWARFNUM_NAME(c4, 36),
+ REG_DWARFNUM_NAME(c5, 37),
+ REG_DWARFNUM_NAME(c6, 38),
+ REG_DWARFNUM_NAME(c7, 39),
+ REG_DWARFNUM_NAME(c8, 40),
+ REG_DWARFNUM_NAME(c9, 41),
+ REG_DWARFNUM_NAME(c10, 42),
+ REG_DWARFNUM_NAME(c11, 43),
+ REG_DWARFNUM_NAME(c12, 44),
+ REG_DWARFNUM_NAME(c13, 45),
+ REG_DWARFNUM_NAME(c14, 46),
+ REG_DWARFNUM_NAME(c15, 47),
+ REG_DWARFNUM_NAME(a0, 48),
+ REG_DWARFNUM_NAME(a1, 49),
+ REG_DWARFNUM_NAME(a2, 50),
+ REG_DWARFNUM_NAME(a3, 51),
+ REG_DWARFNUM_NAME(a4, 52),
+ REG_DWARFNUM_NAME(a5, 53),
+ REG_DWARFNUM_NAME(a6, 54),
+ REG_DWARFNUM_NAME(a7, 55),
+ REG_DWARFNUM_NAME(a8, 56),
+ REG_DWARFNUM_NAME(a9, 57),
+ REG_DWARFNUM_NAME(a10, 58),
+ REG_DWARFNUM_NAME(a11, 59),
+ REG_DWARFNUM_NAME(a12, 60),
+ REG_DWARFNUM_NAME(a13, 61),
+ REG_DWARFNUM_NAME(a14, 62),
+ REG_DWARFNUM_NAME(a15, 63),
+ REG_DWARFNUM_NAME(pswm, 64),
+ REG_DWARFNUM_NAME(pswa, 65),
+};
+
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+#define s390_regstr_tbl s390_dwarf_regs
+
+#endif /* DEFINE_DWARF_REGSTR_TABLE */
+#endif /* S390_DWARF_REGS_TABLE_H */
diff --git a/tools/perf/arch/s390/include/perf_regs.h b/tools/perf/arch/s390/include/perf_regs.h
new file mode 100644
index 000000000..bcfbaed78
--- /dev/null
+++ b/tools/perf/arch/s390/include/perf_regs.h
@@ -0,0 +1,95 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_S390_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_S390_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
+
+#define PERF_REG_IP PERF_REG_S390_PC
+#define PERF_REG_SP PERF_REG_S390_R15
+
+static inline const char *perf_reg_name(int id)
+{
+ switch (id) {
+ case PERF_REG_S390_R0:
+ return "R0";
+ case PERF_REG_S390_R1:
+ return "R1";
+ case PERF_REG_S390_R2:
+ return "R2";
+ case PERF_REG_S390_R3:
+ return "R3";
+ case PERF_REG_S390_R4:
+ return "R4";
+ case PERF_REG_S390_R5:
+ return "R5";
+ case PERF_REG_S390_R6:
+ return "R6";
+ case PERF_REG_S390_R7:
+ return "R7";
+ case PERF_REG_S390_R8:
+ return "R8";
+ case PERF_REG_S390_R9:
+ return "R9";
+ case PERF_REG_S390_R10:
+ return "R10";
+ case PERF_REG_S390_R11:
+ return "R11";
+ case PERF_REG_S390_R12:
+ return "R12";
+ case PERF_REG_S390_R13:
+ return "R13";
+ case PERF_REG_S390_R14:
+ return "R14";
+ case PERF_REG_S390_R15:
+ return "R15";
+ case PERF_REG_S390_FP0:
+ return "FP0";
+ case PERF_REG_S390_FP1:
+ return "FP1";
+ case PERF_REG_S390_FP2:
+ return "FP2";
+ case PERF_REG_S390_FP3:
+ return "FP3";
+ case PERF_REG_S390_FP4:
+ return "FP4";
+ case PERF_REG_S390_FP5:
+ return "FP5";
+ case PERF_REG_S390_FP6:
+ return "FP6";
+ case PERF_REG_S390_FP7:
+ return "FP7";
+ case PERF_REG_S390_FP8:
+ return "FP8";
+ case PERF_REG_S390_FP9:
+ return "FP9";
+ case PERF_REG_S390_FP10:
+ return "FP10";
+ case PERF_REG_S390_FP11:
+ return "FP11";
+ case PERF_REG_S390_FP12:
+ return "FP12";
+ case PERF_REG_S390_FP13:
+ return "FP13";
+ case PERF_REG_S390_FP14:
+ return "FP14";
+ case PERF_REG_S390_FP15:
+ return "FP15";
+ case PERF_REG_S390_MASK:
+ return "MASK";
+ case PERF_REG_S390_PC:
+ return "PC";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build
new file mode 100644
index 000000000..4a233683c
--- /dev/null
+++ b/tools/perf/arch/s390/util/Build
@@ -0,0 +1,9 @@
+libperf-y += header.o
+libperf-y += kvm-stat.o
+
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+
+libperf-y += machine.o
+
+libperf-$(CONFIG_AUXTRACE) += auxtrace.o
diff --git a/tools/perf/arch/s390/util/auxtrace.c b/tools/perf/arch/s390/util/auxtrace.c
new file mode 100644
index 000000000..44c857388
--- /dev/null
+++ b/tools/perf/arch/s390/util/auxtrace.c
@@ -0,0 +1,120 @@
+#include <stdbool.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "../../util/evlist.h"
+#include "../../util/auxtrace.h"
+#include "../../util/evsel.h"
+
+#define PERF_EVENT_CPUM_SF 0xB0000 /* Event: Basic-sampling */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */
+#define DEFAULT_AUX_PAGES 128
+#define DEFAULT_FREQ 4000
+
+static void cpumsf_free(struct auxtrace_record *itr)
+{
+ free(itr);
+}
+
+static size_t cpumsf_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
+static int
+cpumsf_info_fill(struct auxtrace_record *itr __maybe_unused,
+ struct perf_session *session __maybe_unused,
+ struct auxtrace_info_event *auxtrace_info __maybe_unused,
+ size_t priv_size __maybe_unused)
+{
+ auxtrace_info->type = PERF_AUXTRACE_S390_CPUMSF;
+ return 0;
+}
+
+static unsigned long
+cpumsf_reference(struct auxtrace_record *itr __maybe_unused)
+{
+ return 0;
+}
+
+static int
+cpumsf_recording_options(struct auxtrace_record *ar __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused,
+ struct record_opts *opts)
+{
+ unsigned int factor = 1;
+ unsigned int pages;
+
+ opts->full_auxtrace = true;
+
+ /*
+ * The AUX buffer size should be set properly to avoid
+ * overflow of samples if it is not set explicitly.
+ * DEFAULT_AUX_PAGES is an proper size when sampling frequency
+ * is DEFAULT_FREQ. It is expected to hold about 1/2 second
+ * of sampling data. The size used for AUX buffer will scale
+ * according to the specified frequency and DEFAULT_FREQ.
+ */
+ if (!opts->auxtrace_mmap_pages) {
+ if (opts->user_freq != UINT_MAX)
+ factor = (opts->user_freq + DEFAULT_FREQ
+ - 1) / DEFAULT_FREQ;
+ pages = DEFAULT_AUX_PAGES * factor;
+ opts->auxtrace_mmap_pages = roundup_pow_of_two(pages);
+ }
+
+ return 0;
+}
+
+static int
+cpumsf_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
+ struct record_opts *opts __maybe_unused,
+ const char *str __maybe_unused)
+{
+ return 0;
+}
+
+/*
+ * auxtrace_record__init is called when perf record
+ * check if the event really need auxtrace
+ */
+struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
+ int *err)
+{
+ struct auxtrace_record *aux;
+ struct perf_evsel *pos;
+ int diagnose = 0;
+
+ *err = 0;
+ if (evlist->nr_entries == 0)
+ return NULL;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (pos->attr.config == PERF_EVENT_CPUM_SF_DIAG) {
+ diagnose = 1;
+ break;
+ }
+ }
+
+ if (!diagnose)
+ return NULL;
+
+ /* sampling in diagnose mode. alloc aux buffer */
+ aux = zalloc(sizeof(*aux));
+ if (aux == NULL) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ aux->parse_snapshot_options = cpumsf_parse_snapshot_options;
+ aux->recording_options = cpumsf_recording_options;
+ aux->info_priv_size = cpumsf_info_priv_size;
+ aux->info_fill = cpumsf_info_fill;
+ aux->free = cpumsf_free;
+ aux->reference = cpumsf_reference;
+
+ return aux;
+}
diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c
new file mode 100644
index 000000000..a8ace5cc6
--- /dev/null
+++ b/tools/perf/arch/s390/util/dwarf-regs.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright IBM Corp. 2010, 2017
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ * Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ *
+ */
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <linux/kernel.h>
+#include <asm/ptrace.h>
+#include <string.h>
+#include <dwarf-regs.h>
+#include "dwarf-regs-table.h"
+
+const char *get_arch_regstr(unsigned int n)
+{
+ return (n >= ARRAY_SIZE(s390_dwarf_regs)) ? NULL : s390_dwarf_regs[n];
+}
+
+/*
+ * Convert the register name into an offset to struct pt_regs (kernel).
+ * This is required by the BPF prologue generator. The BPF
+ * program is called in the BPF overflow handler in the perf
+ * core.
+ */
+int regs_query_register_offset(const char *name)
+{
+ unsigned long gpr;
+
+ if (!name || strncmp(name, "%r", 2))
+ return -EINVAL;
+
+ errno = 0;
+ gpr = strtoul(name + 2, NULL, 10);
+ if (errno || gpr >= 16)
+ return -EINVAL;
+
+ return offsetof(user_pt_regs, gprs) + 8 * gpr;
+}
diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c
new file mode 100644
index 000000000..cc72554c3
--- /dev/null
+++ b/tools/perf/arch/s390/util/header.c
@@ -0,0 +1,149 @@
+/*
+ * Implementation of get_cpuid().
+ *
+ * Copyright IBM Corp. 2014, 2018
+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
+ * Thomas Richter <tmricht@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#include <sys/types.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "../../util/header.h"
+#include "../../util/util.h"
+
+#define SYSINFO_MANU "Manufacturer:"
+#define SYSINFO_TYPE "Type:"
+#define SYSINFO_MODEL "Model:"
+#define SRVLVL_CPUMF "CPU-MF:"
+#define SRVLVL_VERSION "version="
+#define SRVLVL_AUTHORIZATION "authorization="
+#define SYSINFO "/proc/sysinfo"
+#define SRVLVL "/proc/service_levels"
+
+int get_cpuid(char *buffer, size_t sz)
+{
+ char *cp, *line = NULL, *line2;
+ char type[8], model[33], version[8], manufacturer[32], authorization[8];
+ int tpsize = 0, mdsize = 0, vssize = 0, mfsize = 0, atsize = 0;
+ int read;
+ unsigned long line_sz;
+ size_t nbytes;
+ FILE *sysinfo;
+
+ /*
+ * Scan /proc/sysinfo line by line and read out values for
+ * Manufacturer:, Type: and Model:, for example:
+ * Manufacturer: IBM
+ * Type: 2964
+ * Model: 702 N96
+ * The first word is the Model Capacity and the second word is
+ * Model (can be omitted). Both words have a maximum size of 16
+ * bytes.
+ */
+ memset(manufacturer, 0, sizeof(manufacturer));
+ memset(type, 0, sizeof(type));
+ memset(model, 0, sizeof(model));
+ memset(version, 0, sizeof(version));
+ memset(authorization, 0, sizeof(authorization));
+
+ sysinfo = fopen(SYSINFO, "r");
+ if (sysinfo == NULL)
+ return errno;
+
+ while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
+ if (!strncmp(line, SYSINFO_MANU, strlen(SYSINFO_MANU))) {
+ line2 = line + strlen(SYSINFO_MANU);
+
+ while ((cp = strtok_r(line2, "\n ", &line2))) {
+ mfsize += scnprintf(manufacturer + mfsize,
+ sizeof(manufacturer) - mfsize, "%s", cp);
+ }
+ }
+
+ if (!strncmp(line, SYSINFO_TYPE, strlen(SYSINFO_TYPE))) {
+ line2 = line + strlen(SYSINFO_TYPE);
+
+ while ((cp = strtok_r(line2, "\n ", &line2))) {
+ tpsize += scnprintf(type + tpsize,
+ sizeof(type) - tpsize, "%s", cp);
+ }
+ }
+
+ if (!strncmp(line, SYSINFO_MODEL, strlen(SYSINFO_MODEL))) {
+ line2 = line + strlen(SYSINFO_MODEL);
+
+ while ((cp = strtok_r(line2, "\n ", &line2))) {
+ mdsize += scnprintf(model + mdsize, sizeof(model) - mdsize,
+ "%s%s", model[0] ? "," : "", cp);
+ }
+ break;
+ }
+ }
+ fclose(sysinfo);
+
+ /* Missing manufacturer, type or model information should not happen */
+ if (!manufacturer[0] || !type[0] || !model[0])
+ return EINVAL;
+
+ /*
+ * Scan /proc/service_levels and return the CPU-MF counter facility
+ * version number and authorization level.
+ * Optional, does not exist on z/VM guests.
+ */
+ sysinfo = fopen(SRVLVL, "r");
+ if (sysinfo == NULL)
+ goto skip_sysinfo;
+ while ((read = getline(&line, &line_sz, sysinfo)) != -1) {
+ if (strncmp(line, SRVLVL_CPUMF, strlen(SRVLVL_CPUMF)))
+ continue;
+
+ line2 = line + strlen(SRVLVL_CPUMF);
+ while ((cp = strtok_r(line2, "\n ", &line2))) {
+ if (!strncmp(cp, SRVLVL_VERSION,
+ strlen(SRVLVL_VERSION))) {
+ char *sep = strchr(cp, '=');
+
+ vssize += scnprintf(version + vssize,
+ sizeof(version) - vssize, "%s", sep + 1);
+ }
+ if (!strncmp(cp, SRVLVL_AUTHORIZATION,
+ strlen(SRVLVL_AUTHORIZATION))) {
+ char *sep = strchr(cp, '=');
+
+ atsize += scnprintf(authorization + atsize,
+ sizeof(authorization) - atsize, "%s", sep + 1);
+ }
+ }
+ }
+ fclose(sysinfo);
+
+skip_sysinfo:
+ free(line);
+
+ if (version[0] && authorization[0] )
+ nbytes = snprintf(buffer, sz, "%s,%s,%s,%s,%s",
+ manufacturer, type, model, version,
+ authorization);
+ else
+ nbytes = snprintf(buffer, sz, "%s,%s,%s", manufacturer, type,
+ model);
+ return (nbytes >= sz) ? ENOBUFS : 0;
+}
+
+char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+ char *buf = malloc(128);
+
+ if (buf && get_cpuid(buf, 128))
+ zfree(&buf);
+ return buf;
+}
diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c
new file mode 100644
index 000000000..aaabab5e2
--- /dev/null
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -0,0 +1,112 @@
+/*
+ * Arch specific functions for perf kvm stat.
+ *
+ * Copyright 2014 IBM Corp.
+ * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License (version 2 only)
+ * as published by the Free Software Foundation.
+ */
+
+#include <errno.h>
+#include "../../util/kvm-stat.h"
+#include <asm/sie.h>
+
+define_exit_reasons_table(sie_exit_reasons, sie_intercept_code);
+define_exit_reasons_table(sie_icpt_insn_codes, icpt_insn_codes);
+define_exit_reasons_table(sie_sigp_order_codes, sigp_order_codes);
+define_exit_reasons_table(sie_diagnose_codes, diagnose_codes);
+define_exit_reasons_table(sie_icpt_prog_codes, icpt_prog_codes);
+
+const char *vcpu_id_str = "id";
+const int decode_str_len = 40;
+const char *kvm_exit_reason = "icptcode";
+const char *kvm_entry_trace = "kvm:kvm_s390_sie_enter";
+const char *kvm_exit_trace = "kvm:kvm_s390_sie_exit";
+
+static void event_icpt_insn_get_key(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ unsigned long insn;
+
+ insn = perf_evsel__intval(evsel, sample, "instruction");
+ key->key = icpt_insn_decoder(insn);
+ key->exit_reasons = sie_icpt_insn_codes;
+}
+
+static void event_sigp_get_key(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ key->key = perf_evsel__intval(evsel, sample, "order_code");
+ key->exit_reasons = sie_sigp_order_codes;
+}
+
+static void event_diag_get_key(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ key->key = perf_evsel__intval(evsel, sample, "code");
+ key->exit_reasons = sie_diagnose_codes;
+}
+
+static void event_icpt_prog_get_key(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ key->key = perf_evsel__intval(evsel, sample, "code");
+ key->exit_reasons = sie_icpt_prog_codes;
+}
+
+static struct child_event_ops child_events[] = {
+ { .name = "kvm:kvm_s390_intercept_instruction",
+ .get_key = event_icpt_insn_get_key },
+ { .name = "kvm:kvm_s390_handle_sigp",
+ .get_key = event_sigp_get_key },
+ { .name = "kvm:kvm_s390_handle_diag",
+ .get_key = event_diag_get_key },
+ { .name = "kvm:kvm_s390_intercept_prog",
+ .get_key = event_icpt_prog_get_key },
+ { NULL, NULL },
+};
+
+static struct kvm_events_ops exit_events = {
+ .is_begin_event = exit_event_begin,
+ .is_end_event = exit_event_end,
+ .child_ops = child_events,
+ .decode_key = exit_event_decode_key,
+ .name = "VM-EXIT"
+};
+
+const char *kvm_events_tp[] = {
+ "kvm:kvm_s390_sie_enter",
+ "kvm:kvm_s390_sie_exit",
+ "kvm:kvm_s390_intercept_instruction",
+ "kvm:kvm_s390_handle_sigp",
+ "kvm:kvm_s390_handle_diag",
+ "kvm:kvm_s390_intercept_prog",
+ NULL,
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+ { .name = "vmexit", .ops = &exit_events },
+ { NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+ "Wait state",
+ NULL,
+};
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid)
+{
+ if (strstr(cpuid, "IBM")) {
+ kvm->exit_reasons = sie_exit_reasons;
+ kvm->exit_reasons_isa = "SIE";
+ } else
+ return -ENOTSUP;
+
+ return 0;
+}
diff --git a/tools/perf/arch/s390/util/machine.c b/tools/perf/arch/s390/util/machine.c
new file mode 100644
index 000000000..c8c86a0c9
--- /dev/null
+++ b/tools/perf/arch/s390/util/machine.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include "util.h"
+#include "machine.h"
+#include "api/fs/fs.h"
+#include "debug.h"
+#include "symbol.h"
+
+int arch__fix_module_text_start(u64 *start, u64 *size, const char *name)
+{
+ u64 m_start = *start;
+ char path[PATH_MAX];
+
+ snprintf(path, PATH_MAX, "module/%.*s/sections/.text",
+ (int)strlen(name) - 2, name + 1);
+ if (sysfs__read_ull(path, (unsigned long long *)start) < 0) {
+ pr_debug2("Using module %s start:%#lx\n", path, m_start);
+ *start = m_start;
+ } else {
+ /* Successful read of the modules segment text start address.
+ * Calculate difference between module start address
+ * in memory and module text segment start address.
+ * For example module load address is 0x3ff8011b000
+ * (from /proc/modules) and module text segment start
+ * address is 0x3ff8011b870 (from file above).
+ *
+ * Adjust the module size and subtract the GOT table
+ * size located at the beginning of the module.
+ */
+ *size -= (*start - m_start);
+ }
+
+ return 0;
+}
+
+/* On s390 kernel text segment start is located at very low memory addresses,
+ * for example 0x10000. Modules are located at very high memory addresses,
+ * for example 0x3ff xxxx xxxx. The gap between end of kernel text segment
+ * and beginning of first module's text segment is very big.
+ * Therefore do not fill this gap and do not assign it to the kernel dso map.
+ */
+void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
+{
+ if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
+ /* Last kernel symbol mapped to end of page */
+ p->end = roundup(p->end, page_size);
+ else
+ p->end = c->start;
+ pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
+}
diff --git a/tools/perf/arch/s390/util/unwind-libdw.c b/tools/perf/arch/s390/util/unwind-libdw.c
new file mode 100644
index 000000000..387c698cd
--- /dev/null
+++ b/tools/perf/arch/s390/util/unwind-libdw.c
@@ -0,0 +1,63 @@
+#include <linux/kernel.h>
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+#include "dwarf-regs-table.h"
+
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct regs_dump *user_regs = &ui->sample->user_regs;
+ Dwarf_Word dwarf_regs[ARRAY_SIZE(s390_dwarf_regs)];
+
+#define REG(r) ({ \
+ Dwarf_Word val = 0; \
+ perf_reg_value(&val, user_regs, PERF_REG_S390_##r); \
+ val; \
+})
+ /*
+ * For DWARF register mapping details,
+ * see also perf/arch/s390/include/dwarf-regs-table.h
+ */
+ dwarf_regs[0] = REG(R0);
+ dwarf_regs[1] = REG(R1);
+ dwarf_regs[2] = REG(R2);
+ dwarf_regs[3] = REG(R3);
+ dwarf_regs[4] = REG(R4);
+ dwarf_regs[5] = REG(R5);
+ dwarf_regs[6] = REG(R6);
+ dwarf_regs[7] = REG(R7);
+ dwarf_regs[8] = REG(R8);
+ dwarf_regs[9] = REG(R9);
+ dwarf_regs[10] = REG(R10);
+ dwarf_regs[11] = REG(R11);
+ dwarf_regs[12] = REG(R12);
+ dwarf_regs[13] = REG(R13);
+ dwarf_regs[14] = REG(R14);
+ dwarf_regs[15] = REG(R15);
+
+ dwarf_regs[16] = REG(FP0);
+ dwarf_regs[17] = REG(FP2);
+ dwarf_regs[18] = REG(FP4);
+ dwarf_regs[19] = REG(FP6);
+ dwarf_regs[20] = REG(FP1);
+ dwarf_regs[21] = REG(FP3);
+ dwarf_regs[22] = REG(FP5);
+ dwarf_regs[23] = REG(FP7);
+ dwarf_regs[24] = REG(FP8);
+ dwarf_regs[25] = REG(FP10);
+ dwarf_regs[26] = REG(FP12);
+ dwarf_regs[27] = REG(FP14);
+ dwarf_regs[28] = REG(FP9);
+ dwarf_regs[29] = REG(FP11);
+ dwarf_regs[30] = REG(FP13);
+ dwarf_regs[31] = REG(FP15);
+
+ dwarf_regs[64] = REG(MASK);
+ dwarf_regs[65] = REG(PC);
+
+ dwfl_thread_state_register_pc(thread, dwarf_regs[65]);
+ return dwfl_thread_state_registers(thread, 0, 32, dwarf_regs);
+}
diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build
new file mode 100644
index 000000000..54afe4a46
--- /dev/null
+++ b/tools/perf/arch/sh/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile
new file mode 100644
index 000000000..7fbca1750
--- /dev/null
+++ b/tools/perf/arch/sh/Makefile
@@ -0,0 +1,3 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
diff --git a/tools/perf/arch/sh/include/dwarf-regs-table.h b/tools/perf/arch/sh/include/dwarf-regs-table.h
new file mode 100644
index 000000000..900e69619
--- /dev/null
+++ b/tools/perf/arch/sh/include/dwarf-regs-table.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+const char * const sh_regstr_tbl[] = {
+ "r0",
+ "r1",
+ "r2",
+ "r3",
+ "r4",
+ "r5",
+ "r6",
+ "r7",
+ "r8",
+ "r9",
+ "r10",
+ "r11",
+ "r12",
+ "r13",
+ "r14",
+ "r15",
+ "pc",
+ "pr",
+};
+
+#endif
diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build
new file mode 100644
index 000000000..954e287bb
--- /dev/null
+++ b/tools/perf/arch/sh/util/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c
new file mode 100644
index 000000000..f8dfa8969
--- /dev/null
+++ b/tools/perf/arch/sh/util/dwarf-regs.c
@@ -0,0 +1,55 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Matt Fleming <matt@console-pimps.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+/*
+ * Generic dwarf analysis helpers
+ */
+
+#define SH_MAX_REGS 18
+const char *sh_regs_table[SH_MAX_REGS] = {
+ "r0",
+ "r1",
+ "r2",
+ "r3",
+ "r4",
+ "r5",
+ "r6",
+ "r7",
+ "r8",
+ "r9",
+ "r10",
+ "r11",
+ "r12",
+ "r13",
+ "r14",
+ "r15",
+ "pc",
+ "pr",
+};
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+const char *get_arch_regstr(unsigned int n)
+{
+ return (n < SH_MAX_REGS) ? sh_regs_table[n] : NULL;
+}
diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build
new file mode 100644
index 000000000..54afe4a46
--- /dev/null
+++ b/tools/perf/arch/sparc/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile
new file mode 100644
index 000000000..7fbca1750
--- /dev/null
+++ b/tools/perf/arch/sparc/Makefile
@@ -0,0 +1,3 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
diff --git a/tools/perf/arch/sparc/include/dwarf-regs-table.h b/tools/perf/arch/sparc/include/dwarf-regs-table.h
new file mode 100644
index 000000000..35ede84a6
--- /dev/null
+++ b/tools/perf/arch/sparc/include/dwarf-regs-table.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const sparc_regstr_tbl[] = {
+ "%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7",
+ "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7",
+ "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7",
+ "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7",
+ "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
+ "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
+ "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
+ "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
+ "%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39",
+ "%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47",
+ "%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55",
+ "%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63",
+};
+#endif
diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build
new file mode 100644
index 000000000..954e287bb
--- /dev/null
+++ b/tools/perf/arch/sparc/util/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c
new file mode 100644
index 000000000..b704fdb92
--- /dev/null
+++ b/tools/perf/arch/sparc/util/dwarf-regs.c
@@ -0,0 +1,43 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 David S. Miller <davem@davemloft.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define SPARC_MAX_REGS 96
+
+const char *sparc_regs_table[SPARC_MAX_REGS] = {
+ "%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7",
+ "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7",
+ "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7",
+ "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7",
+ "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
+ "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
+ "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
+ "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
+ "%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39",
+ "%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47",
+ "%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55",
+ "%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63",
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n: the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+ return (n < SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL;
+}
diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build
new file mode 100644
index 000000000..db52fa22d
--- /dev/null
+++ b/tools/perf/arch/x86/Build
@@ -0,0 +1,2 @@
+libperf-y += util/
+libperf-y += tests/
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
new file mode 100644
index 000000000..8cc6642fc
--- /dev/null
+++ b/tools/perf/arch/x86/Makefile
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
+HAVE_KVM_STAT_SUPPORT := 1
+PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1
+PERF_HAVE_JITDUMP := 1
+
+###
+# Syscall table generation
+#
+
+out := $(OUTPUT)arch/x86/include/generated/asm
+header := $(out)/syscalls_64.c
+sys := $(srctree)/tools/perf/arch/x86/entry/syscalls
+systbl := $(sys)/syscalltbl.sh
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+
+$(header): $(sys)/syscall_64.tbl $(systbl)
+ $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
+
+clean::
+ $(call QUIET_CLEAN, x86) $(RM) $(header)
+
+archheaders: $(header)
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
new file mode 100644
index 000000000..44f5aba78
--- /dev/null
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+static struct ins x86__instructions[] = {
+ { .name = "adc", .ops = &mov_ops, },
+ { .name = "adcb", .ops = &mov_ops, },
+ { .name = "adcl", .ops = &mov_ops, },
+ { .name = "add", .ops = &mov_ops, },
+ { .name = "addl", .ops = &mov_ops, },
+ { .name = "addq", .ops = &mov_ops, },
+ { .name = "addsd", .ops = &mov_ops, },
+ { .name = "addw", .ops = &mov_ops, },
+ { .name = "and", .ops = &mov_ops, },
+ { .name = "andb", .ops = &mov_ops, },
+ { .name = "andl", .ops = &mov_ops, },
+ { .name = "andpd", .ops = &mov_ops, },
+ { .name = "andps", .ops = &mov_ops, },
+ { .name = "andq", .ops = &mov_ops, },
+ { .name = "andw", .ops = &mov_ops, },
+ { .name = "bsr", .ops = &mov_ops, },
+ { .name = "bt", .ops = &mov_ops, },
+ { .name = "btr", .ops = &mov_ops, },
+ { .name = "bts", .ops = &mov_ops, },
+ { .name = "btsq", .ops = &mov_ops, },
+ { .name = "call", .ops = &call_ops, },
+ { .name = "callq", .ops = &call_ops, },
+ { .name = "cmovbe", .ops = &mov_ops, },
+ { .name = "cmove", .ops = &mov_ops, },
+ { .name = "cmovae", .ops = &mov_ops, },
+ { .name = "cmp", .ops = &mov_ops, },
+ { .name = "cmpb", .ops = &mov_ops, },
+ { .name = "cmpl", .ops = &mov_ops, },
+ { .name = "cmpq", .ops = &mov_ops, },
+ { .name = "cmpw", .ops = &mov_ops, },
+ { .name = "cmpxch", .ops = &mov_ops, },
+ { .name = "cmpxchg", .ops = &mov_ops, },
+ { .name = "cs", .ops = &mov_ops, },
+ { .name = "dec", .ops = &dec_ops, },
+ { .name = "decl", .ops = &dec_ops, },
+ { .name = "divsd", .ops = &mov_ops, },
+ { .name = "divss", .ops = &mov_ops, },
+ { .name = "gs", .ops = &mov_ops, },
+ { .name = "imul", .ops = &mov_ops, },
+ { .name = "inc", .ops = &dec_ops, },
+ { .name = "incl", .ops = &dec_ops, },
+ { .name = "ja", .ops = &jump_ops, },
+ { .name = "jae", .ops = &jump_ops, },
+ { .name = "jb", .ops = &jump_ops, },
+ { .name = "jbe", .ops = &jump_ops, },
+ { .name = "jc", .ops = &jump_ops, },
+ { .name = "jcxz", .ops = &jump_ops, },
+ { .name = "je", .ops = &jump_ops, },
+ { .name = "jecxz", .ops = &jump_ops, },
+ { .name = "jg", .ops = &jump_ops, },
+ { .name = "jge", .ops = &jump_ops, },
+ { .name = "jl", .ops = &jump_ops, },
+ { .name = "jle", .ops = &jump_ops, },
+ { .name = "jmp", .ops = &jump_ops, },
+ { .name = "jmpq", .ops = &jump_ops, },
+ { .name = "jna", .ops = &jump_ops, },
+ { .name = "jnae", .ops = &jump_ops, },
+ { .name = "jnb", .ops = &jump_ops, },
+ { .name = "jnbe", .ops = &jump_ops, },
+ { .name = "jnc", .ops = &jump_ops, },
+ { .name = "jne", .ops = &jump_ops, },
+ { .name = "jng", .ops = &jump_ops, },
+ { .name = "jnge", .ops = &jump_ops, },
+ { .name = "jnl", .ops = &jump_ops, },
+ { .name = "jnle", .ops = &jump_ops, },
+ { .name = "jno", .ops = &jump_ops, },
+ { .name = "jnp", .ops = &jump_ops, },
+ { .name = "jns", .ops = &jump_ops, },
+ { .name = "jnz", .ops = &jump_ops, },
+ { .name = "jo", .ops = &jump_ops, },
+ { .name = "jp", .ops = &jump_ops, },
+ { .name = "jpe", .ops = &jump_ops, },
+ { .name = "jpo", .ops = &jump_ops, },
+ { .name = "jrcxz", .ops = &jump_ops, },
+ { .name = "js", .ops = &jump_ops, },
+ { .name = "jz", .ops = &jump_ops, },
+ { .name = "lea", .ops = &mov_ops, },
+ { .name = "lock", .ops = &lock_ops, },
+ { .name = "mov", .ops = &mov_ops, },
+ { .name = "movapd", .ops = &mov_ops, },
+ { .name = "movaps", .ops = &mov_ops, },
+ { .name = "movb", .ops = &mov_ops, },
+ { .name = "movdqa", .ops = &mov_ops, },
+ { .name = "movdqu", .ops = &mov_ops, },
+ { .name = "movl", .ops = &mov_ops, },
+ { .name = "movq", .ops = &mov_ops, },
+ { .name = "movsd", .ops = &mov_ops, },
+ { .name = "movslq", .ops = &mov_ops, },
+ { .name = "movss", .ops = &mov_ops, },
+ { .name = "movupd", .ops = &mov_ops, },
+ { .name = "movups", .ops = &mov_ops, },
+ { .name = "movw", .ops = &mov_ops, },
+ { .name = "movzbl", .ops = &mov_ops, },
+ { .name = "movzwl", .ops = &mov_ops, },
+ { .name = "mulsd", .ops = &mov_ops, },
+ { .name = "mulss", .ops = &mov_ops, },
+ { .name = "nop", .ops = &nop_ops, },
+ { .name = "nopl", .ops = &nop_ops, },
+ { .name = "nopw", .ops = &nop_ops, },
+ { .name = "or", .ops = &mov_ops, },
+ { .name = "orb", .ops = &mov_ops, },
+ { .name = "orl", .ops = &mov_ops, },
+ { .name = "orps", .ops = &mov_ops, },
+ { .name = "orq", .ops = &mov_ops, },
+ { .name = "pand", .ops = &mov_ops, },
+ { .name = "paddq", .ops = &mov_ops, },
+ { .name = "pcmpeqb", .ops = &mov_ops, },
+ { .name = "por", .ops = &mov_ops, },
+ { .name = "rclb", .ops = &mov_ops, },
+ { .name = "rcll", .ops = &mov_ops, },
+ { .name = "retq", .ops = &ret_ops, },
+ { .name = "sbb", .ops = &mov_ops, },
+ { .name = "sbbl", .ops = &mov_ops, },
+ { .name = "sete", .ops = &mov_ops, },
+ { .name = "sub", .ops = &mov_ops, },
+ { .name = "subl", .ops = &mov_ops, },
+ { .name = "subq", .ops = &mov_ops, },
+ { .name = "subsd", .ops = &mov_ops, },
+ { .name = "subw", .ops = &mov_ops, },
+ { .name = "test", .ops = &mov_ops, },
+ { .name = "testb", .ops = &mov_ops, },
+ { .name = "testl", .ops = &mov_ops, },
+ { .name = "ucomisd", .ops = &mov_ops, },
+ { .name = "ucomiss", .ops = &mov_ops, },
+ { .name = "vaddsd", .ops = &mov_ops, },
+ { .name = "vandpd", .ops = &mov_ops, },
+ { .name = "vmovdqa", .ops = &mov_ops, },
+ { .name = "vmovq", .ops = &mov_ops, },
+ { .name = "vmovsd", .ops = &mov_ops, },
+ { .name = "vmulsd", .ops = &mov_ops, },
+ { .name = "vorpd", .ops = &mov_ops, },
+ { .name = "vsubsd", .ops = &mov_ops, },
+ { .name = "vucomisd", .ops = &mov_ops, },
+ { .name = "xadd", .ops = &mov_ops, },
+ { .name = "xbeginl", .ops = &jump_ops, },
+ { .name = "xbeginq", .ops = &jump_ops, },
+ { .name = "xchg", .ops = &mov_ops, },
+ { .name = "xor", .ops = &mov_ops, },
+ { .name = "xorb", .ops = &mov_ops, },
+ { .name = "xorpd", .ops = &mov_ops, },
+ { .name = "xorps", .ops = &mov_ops, },
+};
+
+static bool x86__ins_is_fused(struct arch *arch, const char *ins1,
+ const char *ins2)
+{
+ if (arch->family != 6 || arch->model < 0x1e || strstr(ins2, "jmp"))
+ return false;
+
+ if (arch->model == 0x1e) {
+ /* Nehalem */
+ if ((strstr(ins1, "cmp") && !strstr(ins1, "xchg")) ||
+ strstr(ins1, "test")) {
+ return true;
+ }
+ } else {
+ /* Newer platform */
+ if ((strstr(ins1, "cmp") && !strstr(ins1, "xchg")) ||
+ strstr(ins1, "test") ||
+ strstr(ins1, "add") ||
+ strstr(ins1, "sub") ||
+ strstr(ins1, "and") ||
+ strstr(ins1, "inc") ||
+ strstr(ins1, "dec")) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static int x86__cpuid_parse(struct arch *arch, char *cpuid)
+{
+ unsigned int family, model, stepping;
+ int ret;
+
+ /*
+ * cpuid = "GenuineIntel,family,model,stepping"
+ */
+ ret = sscanf(cpuid, "%*[^,],%u,%u,%u", &family, &model, &stepping);
+ if (ret == 3) {
+ arch->family = family;
+ arch->model = model;
+ return 0;
+ }
+
+ return -1;
+}
+
+static int x86__annotate_init(struct arch *arch, char *cpuid)
+{
+ int err = 0;
+
+ if (arch->initialized)
+ return 0;
+
+ if (cpuid)
+ err = x86__cpuid_parse(arch, cpuid);
+
+ arch->initialized = true;
+ return err;
+}
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
new file mode 100644
index 000000000..f0b1709a5
--- /dev/null
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -0,0 +1,388 @@
+#
+# 64-bit system call numbers and entry vectors
+#
+# The format is:
+# <number> <abi> <name> <entry point>
+#
+# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls
+#
+# The abi is "common", "64" or "x32" for this file.
+#
+0 common read __x64_sys_read
+1 common write __x64_sys_write
+2 common open __x64_sys_open
+3 common close __x64_sys_close
+4 common stat __x64_sys_newstat
+5 common fstat __x64_sys_newfstat
+6 common lstat __x64_sys_newlstat
+7 common poll __x64_sys_poll
+8 common lseek __x64_sys_lseek
+9 common mmap __x64_sys_mmap
+10 common mprotect __x64_sys_mprotect
+11 common munmap __x64_sys_munmap
+12 common brk __x64_sys_brk
+13 64 rt_sigaction __x64_sys_rt_sigaction
+14 common rt_sigprocmask __x64_sys_rt_sigprocmask
+15 64 rt_sigreturn __x64_sys_rt_sigreturn/ptregs
+16 64 ioctl __x64_sys_ioctl
+17 common pread64 __x64_sys_pread64
+18 common pwrite64 __x64_sys_pwrite64
+19 64 readv __x64_sys_readv
+20 64 writev __x64_sys_writev
+21 common access __x64_sys_access
+22 common pipe __x64_sys_pipe
+23 common select __x64_sys_select
+24 common sched_yield __x64_sys_sched_yield
+25 common mremap __x64_sys_mremap
+26 common msync __x64_sys_msync
+27 common mincore __x64_sys_mincore
+28 common madvise __x64_sys_madvise
+29 common shmget __x64_sys_shmget
+30 common shmat __x64_sys_shmat
+31 common shmctl __x64_sys_shmctl
+32 common dup __x64_sys_dup
+33 common dup2 __x64_sys_dup2
+34 common pause __x64_sys_pause
+35 common nanosleep __x64_sys_nanosleep
+36 common getitimer __x64_sys_getitimer
+37 common alarm __x64_sys_alarm
+38 common setitimer __x64_sys_setitimer
+39 common getpid __x64_sys_getpid
+40 common sendfile __x64_sys_sendfile64
+41 common socket __x64_sys_socket
+42 common connect __x64_sys_connect
+43 common accept __x64_sys_accept
+44 common sendto __x64_sys_sendto
+45 64 recvfrom __x64_sys_recvfrom
+46 64 sendmsg __x64_sys_sendmsg
+47 64 recvmsg __x64_sys_recvmsg
+48 common shutdown __x64_sys_shutdown
+49 common bind __x64_sys_bind
+50 common listen __x64_sys_listen
+51 common getsockname __x64_sys_getsockname
+52 common getpeername __x64_sys_getpeername
+53 common socketpair __x64_sys_socketpair
+54 64 setsockopt __x64_sys_setsockopt
+55 64 getsockopt __x64_sys_getsockopt
+56 common clone __x64_sys_clone/ptregs
+57 common fork __x64_sys_fork/ptregs
+58 common vfork __x64_sys_vfork/ptregs
+59 64 execve __x64_sys_execve/ptregs
+60 common exit __x64_sys_exit
+61 common wait4 __x64_sys_wait4
+62 common kill __x64_sys_kill
+63 common uname __x64_sys_newuname
+64 common semget __x64_sys_semget
+65 common semop __x64_sys_semop
+66 common semctl __x64_sys_semctl
+67 common shmdt __x64_sys_shmdt
+68 common msgget __x64_sys_msgget
+69 common msgsnd __x64_sys_msgsnd
+70 common msgrcv __x64_sys_msgrcv
+71 common msgctl __x64_sys_msgctl
+72 common fcntl __x64_sys_fcntl
+73 common flock __x64_sys_flock
+74 common fsync __x64_sys_fsync
+75 common fdatasync __x64_sys_fdatasync
+76 common truncate __x64_sys_truncate
+77 common ftruncate __x64_sys_ftruncate
+78 common getdents __x64_sys_getdents
+79 common getcwd __x64_sys_getcwd
+80 common chdir __x64_sys_chdir
+81 common fchdir __x64_sys_fchdir
+82 common rename __x64_sys_rename
+83 common mkdir __x64_sys_mkdir
+84 common rmdir __x64_sys_rmdir
+85 common creat __x64_sys_creat
+86 common link __x64_sys_link
+87 common unlink __x64_sys_unlink
+88 common symlink __x64_sys_symlink
+89 common readlink __x64_sys_readlink
+90 common chmod __x64_sys_chmod
+91 common fchmod __x64_sys_fchmod
+92 common chown __x64_sys_chown
+93 common fchown __x64_sys_fchown
+94 common lchown __x64_sys_lchown
+95 common umask __x64_sys_umask
+96 common gettimeofday __x64_sys_gettimeofday
+97 common getrlimit __x64_sys_getrlimit
+98 common getrusage __x64_sys_getrusage
+99 common sysinfo __x64_sys_sysinfo
+100 common times __x64_sys_times
+101 64 ptrace __x64_sys_ptrace
+102 common getuid __x64_sys_getuid
+103 common syslog __x64_sys_syslog
+104 common getgid __x64_sys_getgid
+105 common setuid __x64_sys_setuid
+106 common setgid __x64_sys_setgid
+107 common geteuid __x64_sys_geteuid
+108 common getegid __x64_sys_getegid
+109 common setpgid __x64_sys_setpgid
+110 common getppid __x64_sys_getppid
+111 common getpgrp __x64_sys_getpgrp
+112 common setsid __x64_sys_setsid
+113 common setreuid __x64_sys_setreuid
+114 common setregid __x64_sys_setregid
+115 common getgroups __x64_sys_getgroups
+116 common setgroups __x64_sys_setgroups
+117 common setresuid __x64_sys_setresuid
+118 common getresuid __x64_sys_getresuid
+119 common setresgid __x64_sys_setresgid
+120 common getresgid __x64_sys_getresgid
+121 common getpgid __x64_sys_getpgid
+122 common setfsuid __x64_sys_setfsuid
+123 common setfsgid __x64_sys_setfsgid
+124 common getsid __x64_sys_getsid
+125 common capget __x64_sys_capget
+126 common capset __x64_sys_capset
+127 64 rt_sigpending __x64_sys_rt_sigpending
+128 64 rt_sigtimedwait __x64_sys_rt_sigtimedwait
+129 64 rt_sigqueueinfo __x64_sys_rt_sigqueueinfo
+130 common rt_sigsuspend __x64_sys_rt_sigsuspend
+131 64 sigaltstack __x64_sys_sigaltstack
+132 common utime __x64_sys_utime
+133 common mknod __x64_sys_mknod
+134 64 uselib
+135 common personality __x64_sys_personality
+136 common ustat __x64_sys_ustat
+137 common statfs __x64_sys_statfs
+138 common fstatfs __x64_sys_fstatfs
+139 common sysfs __x64_sys_sysfs
+140 common getpriority __x64_sys_getpriority
+141 common setpriority __x64_sys_setpriority
+142 common sched_setparam __x64_sys_sched_setparam
+143 common sched_getparam __x64_sys_sched_getparam
+144 common sched_setscheduler __x64_sys_sched_setscheduler
+145 common sched_getscheduler __x64_sys_sched_getscheduler
+146 common sched_get_priority_max __x64_sys_sched_get_priority_max
+147 common sched_get_priority_min __x64_sys_sched_get_priority_min
+148 common sched_rr_get_interval __x64_sys_sched_rr_get_interval
+149 common mlock __x64_sys_mlock
+150 common munlock __x64_sys_munlock
+151 common mlockall __x64_sys_mlockall
+152 common munlockall __x64_sys_munlockall
+153 common vhangup __x64_sys_vhangup
+154 common modify_ldt __x64_sys_modify_ldt
+155 common pivot_root __x64_sys_pivot_root
+156 64 _sysctl __x64_sys_sysctl
+157 common prctl __x64_sys_prctl
+158 common arch_prctl __x64_sys_arch_prctl
+159 common adjtimex __x64_sys_adjtimex
+160 common setrlimit __x64_sys_setrlimit
+161 common chroot __x64_sys_chroot
+162 common sync __x64_sys_sync
+163 common acct __x64_sys_acct
+164 common settimeofday __x64_sys_settimeofday
+165 common mount __x64_sys_mount
+166 common umount2 __x64_sys_umount
+167 common swapon __x64_sys_swapon
+168 common swapoff __x64_sys_swapoff
+169 common reboot __x64_sys_reboot
+170 common sethostname __x64_sys_sethostname
+171 common setdomainname __x64_sys_setdomainname
+172 common iopl __x64_sys_iopl/ptregs
+173 common ioperm __x64_sys_ioperm
+174 64 create_module
+175 common init_module __x64_sys_init_module
+176 common delete_module __x64_sys_delete_module
+177 64 get_kernel_syms
+178 64 query_module
+179 common quotactl __x64_sys_quotactl
+180 64 nfsservctl
+181 common getpmsg
+182 common putpmsg
+183 common afs_syscall
+184 common tuxcall
+185 common security
+186 common gettid __x64_sys_gettid
+187 common readahead __x64_sys_readahead
+188 common setxattr __x64_sys_setxattr
+189 common lsetxattr __x64_sys_lsetxattr
+190 common fsetxattr __x64_sys_fsetxattr
+191 common getxattr __x64_sys_getxattr
+192 common lgetxattr __x64_sys_lgetxattr
+193 common fgetxattr __x64_sys_fgetxattr
+194 common listxattr __x64_sys_listxattr
+195 common llistxattr __x64_sys_llistxattr
+196 common flistxattr __x64_sys_flistxattr
+197 common removexattr __x64_sys_removexattr
+198 common lremovexattr __x64_sys_lremovexattr
+199 common fremovexattr __x64_sys_fremovexattr
+200 common tkill __x64_sys_tkill
+201 common time __x64_sys_time
+202 common futex __x64_sys_futex
+203 common sched_setaffinity __x64_sys_sched_setaffinity
+204 common sched_getaffinity __x64_sys_sched_getaffinity
+205 64 set_thread_area
+206 64 io_setup __x64_sys_io_setup
+207 common io_destroy __x64_sys_io_destroy
+208 common io_getevents __x64_sys_io_getevents
+209 64 io_submit __x64_sys_io_submit
+210 common io_cancel __x64_sys_io_cancel
+211 64 get_thread_area
+212 common lookup_dcookie __x64_sys_lookup_dcookie
+213 common epoll_create __x64_sys_epoll_create
+214 64 epoll_ctl_old
+215 64 epoll_wait_old
+216 common remap_file_pages __x64_sys_remap_file_pages
+217 common getdents64 __x64_sys_getdents64
+218 common set_tid_address __x64_sys_set_tid_address
+219 common restart_syscall __x64_sys_restart_syscall
+220 common semtimedop __x64_sys_semtimedop
+221 common fadvise64 __x64_sys_fadvise64
+222 64 timer_create __x64_sys_timer_create
+223 common timer_settime __x64_sys_timer_settime
+224 common timer_gettime __x64_sys_timer_gettime
+225 common timer_getoverrun __x64_sys_timer_getoverrun
+226 common timer_delete __x64_sys_timer_delete
+227 common clock_settime __x64_sys_clock_settime
+228 common clock_gettime __x64_sys_clock_gettime
+229 common clock_getres __x64_sys_clock_getres
+230 common clock_nanosleep __x64_sys_clock_nanosleep
+231 common exit_group __x64_sys_exit_group
+232 common epoll_wait __x64_sys_epoll_wait
+233 common epoll_ctl __x64_sys_epoll_ctl
+234 common tgkill __x64_sys_tgkill
+235 common utimes __x64_sys_utimes
+236 64 vserver
+237 common mbind __x64_sys_mbind
+238 common set_mempolicy __x64_sys_set_mempolicy
+239 common get_mempolicy __x64_sys_get_mempolicy
+240 common mq_open __x64_sys_mq_open
+241 common mq_unlink __x64_sys_mq_unlink
+242 common mq_timedsend __x64_sys_mq_timedsend
+243 common mq_timedreceive __x64_sys_mq_timedreceive
+244 64 mq_notify __x64_sys_mq_notify
+245 common mq_getsetattr __x64_sys_mq_getsetattr
+246 64 kexec_load __x64_sys_kexec_load
+247 64 waitid __x64_sys_waitid
+248 common add_key __x64_sys_add_key
+249 common request_key __x64_sys_request_key
+250 common keyctl __x64_sys_keyctl
+251 common ioprio_set __x64_sys_ioprio_set
+252 common ioprio_get __x64_sys_ioprio_get
+253 common inotify_init __x64_sys_inotify_init
+254 common inotify_add_watch __x64_sys_inotify_add_watch
+255 common inotify_rm_watch __x64_sys_inotify_rm_watch
+256 common migrate_pages __x64_sys_migrate_pages
+257 common openat __x64_sys_openat
+258 common mkdirat __x64_sys_mkdirat
+259 common mknodat __x64_sys_mknodat
+260 common fchownat __x64_sys_fchownat
+261 common futimesat __x64_sys_futimesat
+262 common newfstatat __x64_sys_newfstatat
+263 common unlinkat __x64_sys_unlinkat
+264 common renameat __x64_sys_renameat
+265 common linkat __x64_sys_linkat
+266 common symlinkat __x64_sys_symlinkat
+267 common readlinkat __x64_sys_readlinkat
+268 common fchmodat __x64_sys_fchmodat
+269 common faccessat __x64_sys_faccessat
+270 common pselect6 __x64_sys_pselect6
+271 common ppoll __x64_sys_ppoll
+272 common unshare __x64_sys_unshare
+273 64 set_robust_list __x64_sys_set_robust_list
+274 64 get_robust_list __x64_sys_get_robust_list
+275 common splice __x64_sys_splice
+276 common tee __x64_sys_tee
+277 common sync_file_range __x64_sys_sync_file_range
+278 64 vmsplice __x64_sys_vmsplice
+279 64 move_pages __x64_sys_move_pages
+280 common utimensat __x64_sys_utimensat
+281 common epoll_pwait __x64_sys_epoll_pwait
+282 common signalfd __x64_sys_signalfd
+283 common timerfd_create __x64_sys_timerfd_create
+284 common eventfd __x64_sys_eventfd
+285 common fallocate __x64_sys_fallocate
+286 common timerfd_settime __x64_sys_timerfd_settime
+287 common timerfd_gettime __x64_sys_timerfd_gettime
+288 common accept4 __x64_sys_accept4
+289 common signalfd4 __x64_sys_signalfd4
+290 common eventfd2 __x64_sys_eventfd2
+291 common epoll_create1 __x64_sys_epoll_create1
+292 common dup3 __x64_sys_dup3
+293 common pipe2 __x64_sys_pipe2
+294 common inotify_init1 __x64_sys_inotify_init1
+295 64 preadv __x64_sys_preadv
+296 64 pwritev __x64_sys_pwritev
+297 64 rt_tgsigqueueinfo __x64_sys_rt_tgsigqueueinfo
+298 common perf_event_open __x64_sys_perf_event_open
+299 64 recvmmsg __x64_sys_recvmmsg
+300 common fanotify_init __x64_sys_fanotify_init
+301 common fanotify_mark __x64_sys_fanotify_mark
+302 common prlimit64 __x64_sys_prlimit64
+303 common name_to_handle_at __x64_sys_name_to_handle_at
+304 common open_by_handle_at __x64_sys_open_by_handle_at
+305 common clock_adjtime __x64_sys_clock_adjtime
+306 common syncfs __x64_sys_syncfs
+307 64 sendmmsg __x64_sys_sendmmsg
+308 common setns __x64_sys_setns
+309 common getcpu __x64_sys_getcpu
+310 64 process_vm_readv __x64_sys_process_vm_readv
+311 64 process_vm_writev __x64_sys_process_vm_writev
+312 common kcmp __x64_sys_kcmp
+313 common finit_module __x64_sys_finit_module
+314 common sched_setattr __x64_sys_sched_setattr
+315 common sched_getattr __x64_sys_sched_getattr
+316 common renameat2 __x64_sys_renameat2
+317 common seccomp __x64_sys_seccomp
+318 common getrandom __x64_sys_getrandom
+319 common memfd_create __x64_sys_memfd_create
+320 common kexec_file_load __x64_sys_kexec_file_load
+321 common bpf __x64_sys_bpf
+322 64 execveat __x64_sys_execveat/ptregs
+323 common userfaultfd __x64_sys_userfaultfd
+324 common membarrier __x64_sys_membarrier
+325 common mlock2 __x64_sys_mlock2
+326 common copy_file_range __x64_sys_copy_file_range
+327 64 preadv2 __x64_sys_preadv2
+328 64 pwritev2 __x64_sys_pwritev2
+329 common pkey_mprotect __x64_sys_pkey_mprotect
+330 common pkey_alloc __x64_sys_pkey_alloc
+331 common pkey_free __x64_sys_pkey_free
+332 common statx __x64_sys_statx
+333 common io_pgetevents __x64_sys_io_pgetevents
+334 common rseq __x64_sys_rseq
+
+#
+# x32-specific system call numbers start at 512 to avoid cache impact
+# for native 64-bit operation. The __x32_compat_sys stubs are created
+# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
+# is defined.
+#
+512 x32 rt_sigaction __x32_compat_sys_rt_sigaction
+513 x32 rt_sigreturn sys32_x32_rt_sigreturn
+514 x32 ioctl __x32_compat_sys_ioctl
+515 x32 readv __x32_compat_sys_readv
+516 x32 writev __x32_compat_sys_writev
+517 x32 recvfrom __x32_compat_sys_recvfrom
+518 x32 sendmsg __x32_compat_sys_sendmsg
+519 x32 recvmsg __x32_compat_sys_recvmsg
+520 x32 execve __x32_compat_sys_execve/ptregs
+521 x32 ptrace __x32_compat_sys_ptrace
+522 x32 rt_sigpending __x32_compat_sys_rt_sigpending
+523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait
+524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo
+525 x32 sigaltstack __x32_compat_sys_sigaltstack
+526 x32 timer_create __x32_compat_sys_timer_create
+527 x32 mq_notify __x32_compat_sys_mq_notify
+528 x32 kexec_load __x32_compat_sys_kexec_load
+529 x32 waitid __x32_compat_sys_waitid
+530 x32 set_robust_list __x32_compat_sys_set_robust_list
+531 x32 get_robust_list __x32_compat_sys_get_robust_list
+532 x32 vmsplice __x32_compat_sys_vmsplice
+533 x32 move_pages __x32_compat_sys_move_pages
+534 x32 preadv __x32_compat_sys_preadv64
+535 x32 pwritev __x32_compat_sys_pwritev64
+536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo
+537 x32 recvmmsg __x32_compat_sys_recvmmsg
+538 x32 sendmmsg __x32_compat_sys_sendmmsg
+539 x32 process_vm_readv __x32_compat_sys_process_vm_readv
+540 x32 process_vm_writev __x32_compat_sys_process_vm_writev
+541 x32 setsockopt __x32_compat_sys_setsockopt
+542 x32 getsockopt __x32_compat_sys_getsockopt
+543 x32 io_setup __x32_compat_sys_io_setup
+544 x32 io_submit __x32_compat_sys_io_submit
+545 x32 execveat __x32_compat_sys_execveat/ptregs
+546 x32 preadv2 __x32_compat_sys_preadv64v2
+547 x32 pwritev2 __x32_compat_sys_pwritev64v2
diff --git a/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
new file mode 100755
index 000000000..029a72c20
--- /dev/null
+++ b/tools/perf/arch/x86/entry/syscalls/syscalltbl.sh
@@ -0,0 +1,40 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+in="$1"
+arch="$2"
+
+syscall_macro() {
+ nr="$1"
+ name="$2"
+
+ echo " [$nr] = \"$name\","
+}
+
+emit() {
+ nr="$1"
+ entry="$2"
+
+ syscall_macro "$nr" "$entry"
+}
+
+echo "static const char *syscalltbl_${arch}[] = {"
+
+sorted_table=$(mktemp /tmp/syscalltbl.XXXXXX)
+grep '^[0-9]' "$in" | sort -n > $sorted_table
+
+max_nr=0
+while read nr abi name entry compat; do
+ if [ $nr -ge 512 ] ; then # discard compat sycalls
+ break
+ fi
+
+ emit "$nr" "$name"
+ max_nr=$nr
+done < $sorted_table
+
+rm -f $sorted_table
+
+echo "};"
+
+echo "#define SYSCALLTBL_${arch}_MAX_ID ${max_nr}"
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
new file mode 100644
index 000000000..613709cfb
--- /dev/null
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#include <linux/compiler.h>
+struct test;
+
+/* Tests */
+int test__rdpmc(struct test *test __maybe_unused, int subtest);
+int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest);
+int test__insn_x86(struct test *test __maybe_unused, int subtest);
+int test__bp_modify(struct test *test, int subtest);
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread);
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/tools/perf/arch/x86/include/dwarf-regs-table.h b/tools/perf/arch/x86/include/dwarf-regs-table.h
new file mode 100644
index 000000000..b9bd5dc9d
--- /dev/null
+++ b/tools/perf/arch/x86/include/dwarf-regs-table.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const x86_32_regstr_tbl[] = {
+ "%ax", "%cx", "%dx", "%bx", "$stack",/* Stack address instead of %sp */
+ "%bp", "%si", "%di",
+};
+
+static const char * const x86_64_regstr_tbl[] = {
+ "%ax", "%dx", "%cx", "%bx", "%si", "%di",
+ "%bp", "%sp", "%r8", "%r9", "%r10", "%r11",
+ "%r12", "%r13", "%r14", "%r15",
+};
+#endif
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
new file mode 100644
index 000000000..7f6d538f8
--- /dev/null
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#ifndef HAVE_ARCH_X86_64_SUPPORT
+#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_X86_32_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
+#else
+#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
+ (1ULL << PERF_REG_X86_ES) | \
+ (1ULL << PERF_REG_X86_FS) | \
+ (1ULL << PERF_REG_X86_GS))
+#define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT)
+#define PERF_REGS_MAX PERF_REG_X86_64_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
+#endif
+#define PERF_REG_IP PERF_REG_X86_IP
+#define PERF_REG_SP PERF_REG_X86_SP
+
+static inline const char *perf_reg_name(int id)
+{
+ switch (id) {
+ case PERF_REG_X86_AX:
+ return "AX";
+ case PERF_REG_X86_BX:
+ return "BX";
+ case PERF_REG_X86_CX:
+ return "CX";
+ case PERF_REG_X86_DX:
+ return "DX";
+ case PERF_REG_X86_SI:
+ return "SI";
+ case PERF_REG_X86_DI:
+ return "DI";
+ case PERF_REG_X86_BP:
+ return "BP";
+ case PERF_REG_X86_SP:
+ return "SP";
+ case PERF_REG_X86_IP:
+ return "IP";
+ case PERF_REG_X86_FLAGS:
+ return "FLAGS";
+ case PERF_REG_X86_CS:
+ return "CS";
+ case PERF_REG_X86_SS:
+ return "SS";
+ case PERF_REG_X86_DS:
+ return "DS";
+ case PERF_REG_X86_ES:
+ return "ES";
+ case PERF_REG_X86_FS:
+ return "FS";
+ case PERF_REG_X86_GS:
+ return "GS";
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ case PERF_REG_X86_R8:
+ return "R8";
+ case PERF_REG_X86_R9:
+ return "R9";
+ case PERF_REG_X86_R10:
+ return "R10";
+ case PERF_REG_X86_R11:
+ return "R11";
+ case PERF_REG_X86_R12:
+ return "R12";
+ case PERF_REG_X86_R13:
+ return "R13";
+ case PERF_REG_X86_R14:
+ return "R14";
+ case PERF_REG_X86_R15:
+ return "R15";
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
new file mode 100644
index 000000000..586849ff8
--- /dev/null
+++ b/tools/perf/arch/x86/tests/Build
@@ -0,0 +1,8 @@
+libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o
+libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
+
+libperf-y += arch-tests.o
+libperf-y += rdpmc.o
+libperf-y += perf-time-to-tsc.o
+libperf-$(CONFIG_AUXTRACE) += insn-x86.o
+libperf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
new file mode 100644
index 000000000..d47d3f8e3
--- /dev/null
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+ {
+ .desc = "x86 rdpmc",
+ .func = test__rdpmc,
+ },
+ {
+ .desc = "Convert perf time to TSC",
+ .func = test__perf_time_to_tsc,
+ },
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+ {
+ .desc = "DWARF unwind",
+ .func = test__dwarf_unwind,
+ },
+#endif
+#ifdef HAVE_AUXTRACE_SUPPORT
+ {
+ .desc = "x86 instruction decoder - new instructions",
+ .func = test__insn_x86,
+ },
+#endif
+#if defined(__x86_64__)
+ {
+ .desc = "x86 bp modify",
+ .func = test__bp_modify,
+ },
+#endif
+ {
+ .func = NULL,
+ },
+
+};
diff --git a/tools/perf/arch/x86/tests/bp-modify.c b/tools/perf/arch/x86/tests/bp-modify.c
new file mode 100644
index 000000000..f53e44067
--- /dev/null
+++ b/tools/perf/arch/x86/tests/bp-modify.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/user.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <asm/ptrace.h>
+#include <errno.h>
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+static noinline int bp_1(void)
+{
+ pr_debug("in %s\n", __func__);
+ return 0;
+}
+
+static noinline int bp_2(void)
+{
+ pr_debug("in %s\n", __func__);
+ return 0;
+}
+
+static int spawn_child(void)
+{
+ int child = fork();
+
+ if (child == 0) {
+ /*
+ * The child sets itself for as tracee and
+ * waits in signal for parent to trace it,
+ * then it calls bp_1 and quits.
+ */
+ int err = ptrace(PTRACE_TRACEME, 0, NULL, NULL);
+
+ if (err) {
+ pr_debug("failed to PTRACE_TRACEME\n");
+ exit(1);
+ }
+
+ raise(SIGCONT);
+ bp_1();
+ exit(0);
+ }
+
+ return child;
+}
+
+/*
+ * This tests creates HW breakpoint, tries to
+ * change it and checks it was properly changed.
+ */
+static int bp_modify1(void)
+{
+ pid_t child;
+ int status;
+ unsigned long rip = 0, dr7 = 1;
+
+ child = spawn_child();
+
+ waitpid(child, &status, 0);
+ if (WIFEXITED(status)) {
+ pr_debug("tracee exited prematurely 1\n");
+ return TEST_FAIL;
+ }
+
+ /*
+ * The parent does following steps:
+ * - creates a new breakpoint (id 0) for bp_2 function
+ * - changes that breakponit to bp_1 function
+ * - waits for the breakpoint to hit and checks
+ * it has proper rip of bp_1 function
+ * - detaches the child
+ */
+ if (ptrace(PTRACE_POKEUSER, child,
+ offsetof(struct user, u_debugreg[0]), bp_2)) {
+ pr_debug("failed to set breakpoint, 1st time: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ if (ptrace(PTRACE_POKEUSER, child,
+ offsetof(struct user, u_debugreg[0]), bp_1)) {
+ pr_debug("failed to set breakpoint, 2nd time: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ if (ptrace(PTRACE_POKEUSER, child,
+ offsetof(struct user, u_debugreg[7]), dr7)) {
+ pr_debug("failed to set dr7: %s\n", strerror(errno));
+ goto out;
+ }
+
+ if (ptrace(PTRACE_CONT, child, NULL, NULL)) {
+ pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno));
+ goto out;
+ }
+
+ waitpid(child, &status, 0);
+ if (WIFEXITED(status)) {
+ pr_debug("tracee exited prematurely 2\n");
+ return TEST_FAIL;
+ }
+
+ rip = ptrace(PTRACE_PEEKUSER, child,
+ offsetof(struct user_regs_struct, rip), NULL);
+ if (rip == (unsigned long) -1) {
+ pr_debug("failed to PTRACE_PEEKUSER: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ pr_debug("rip %lx, bp_1 %p\n", rip, bp_1);
+
+out:
+ if (ptrace(PTRACE_DETACH, child, NULL, NULL)) {
+ pr_debug("failed to PTRACE_DETACH: %s", strerror(errno));
+ return TEST_FAIL;
+ }
+
+ return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL;
+}
+
+/*
+ * This tests creates HW breakpoint, tries to
+ * change it to bogus value and checks the original
+ * breakpoint is hit.
+ */
+static int bp_modify2(void)
+{
+ pid_t child;
+ int status;
+ unsigned long rip = 0, dr7 = 1;
+
+ child = spawn_child();
+
+ waitpid(child, &status, 0);
+ if (WIFEXITED(status)) {
+ pr_debug("tracee exited prematurely 1\n");
+ return TEST_FAIL;
+ }
+
+ /*
+ * The parent does following steps:
+ * - creates a new breakpoint (id 0) for bp_1 function
+ * - tries to change that breakpoint to (-1) address
+ * - waits for the breakpoint to hit and checks
+ * it has proper rip of bp_1 function
+ * - detaches the child
+ */
+ if (ptrace(PTRACE_POKEUSER, child,
+ offsetof(struct user, u_debugreg[0]), bp_1)) {
+ pr_debug("failed to set breakpoint: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ if (ptrace(PTRACE_POKEUSER, child,
+ offsetof(struct user, u_debugreg[7]), dr7)) {
+ pr_debug("failed to set dr7: %s\n", strerror(errno));
+ goto out;
+ }
+
+ if (!ptrace(PTRACE_POKEUSER, child,
+ offsetof(struct user, u_debugreg[0]), (unsigned long) (-1))) {
+ pr_debug("failed, breakpoint set to bogus address\n");
+ goto out;
+ }
+
+ if (ptrace(PTRACE_CONT, child, NULL, NULL)) {
+ pr_debug("failed to PTRACE_CONT: %s\n", strerror(errno));
+ goto out;
+ }
+
+ waitpid(child, &status, 0);
+ if (WIFEXITED(status)) {
+ pr_debug("tracee exited prematurely 2\n");
+ return TEST_FAIL;
+ }
+
+ rip = ptrace(PTRACE_PEEKUSER, child,
+ offsetof(struct user_regs_struct, rip), NULL);
+ if (rip == (unsigned long) -1) {
+ pr_debug("failed to PTRACE_PEEKUSER: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ pr_debug("rip %lx, bp_1 %p\n", rip, bp_1);
+
+out:
+ if (ptrace(PTRACE_DETACH, child, NULL, NULL)) {
+ pr_debug("failed to PTRACE_DETACH: %s", strerror(errno));
+ return TEST_FAIL;
+ }
+
+ return rip == (unsigned long) bp_1 ? TEST_OK : TEST_FAIL;
+}
+
+int test__bp_modify(struct test *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ TEST_ASSERT_VAL("modify test 1 failed\n", !bp_modify1());
+ TEST_ASSERT_VAL("modify test 2 failed\n", !bp_modify2());
+
+ return 0;
+}
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
new file mode 100644
index 000000000..7879df345
--- /dev/null
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_X86_SP];
+
+ map = map_groups__find(thread->mg, (u64)sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = malloc(sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk b/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk
new file mode 100644
index 000000000..a21454835
--- /dev/null
+++ b/tools/perf/arch/x86/tests/gen-insn-x86-dat.awk
@@ -0,0 +1,75 @@
+#!/bin/awk -f
+# gen-insn-x86-dat.awk: script to convert data for the insn-x86 test
+# Copyright (c) 2015, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+
+BEGIN {
+ print "/*"
+ print " * Generated by gen-insn-x86-dat.sh and gen-insn-x86-dat.awk"
+ print " * from insn-x86-dat-src.c for inclusion by insn-x86.c"
+ print " * Do not change this code."
+ print "*/\n"
+ op = ""
+ branch = ""
+ rel = 0
+ going = 0
+}
+
+/ Start here / {
+ going = 1
+}
+
+/ Stop here / {
+ going = 0
+}
+
+/^\s*[0-9a-fA-F]+\:/ {
+ if (going) {
+ colon_pos = index($0, ":")
+ useful_line = substr($0, colon_pos + 1)
+ first_pos = match(useful_line, "[0-9a-fA-F]")
+ useful_line = substr(useful_line, first_pos)
+ gsub("\t", "\\t", useful_line)
+ printf "{{"
+ len = 0
+ for (i = 2; i <= NF; i++) {
+ if (match($i, "^[0-9a-fA-F][0-9a-fA-F]$")) {
+ printf "0x%s, ", $i
+ len += 1
+ } else {
+ break
+ }
+ }
+ printf "}, %d, %s, \"%s\", \"%s\",", len, rel, op, branch
+ printf "\n\"%s\",},\n", useful_line
+ op = ""
+ branch = ""
+ rel = 0
+ }
+}
+
+/ Expecting: / {
+ expecting_str = " Expecting: "
+ expecting_len = length(expecting_str)
+ expecting_pos = index($0, expecting_str)
+ useful_line = substr($0, expecting_pos + expecting_len)
+ for (i = 1; i <= NF; i++) {
+ if ($i == "Expecting:") {
+ i++
+ op = $i
+ i++
+ branch = $i
+ i++
+ rel = $i
+ break
+ }
+ }
+}
diff --git a/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh b/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh
new file mode 100755
index 000000000..2d4ef94cf
--- /dev/null
+++ b/tools/perf/arch/x86/tests/gen-insn-x86-dat.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+# gen-insn-x86-dat: generate data for the insn-x86 test
+# Copyright (c) 2015, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+
+set -e
+
+if [ "$(uname -m)" != "x86_64" ]; then
+ echo "ERROR: This script only works on x86_64"
+ exit 1
+fi
+
+cd $(dirname $0)
+
+trap 'echo "Might need a more recent version of binutils"' EXIT
+
+echo "Compiling insn-x86-dat-src.c to 64-bit object"
+
+gcc -g -c insn-x86-dat-src.c
+
+objdump -dSw insn-x86-dat-src.o | awk -f gen-insn-x86-dat.awk > insn-x86-dat-64.c
+
+rm -f insn-x86-dat-src.o
+
+echo "Compiling insn-x86-dat-src.c to 32-bit object"
+
+gcc -g -c -m32 insn-x86-dat-src.c
+
+objdump -dSw insn-x86-dat-src.o | awk -f gen-insn-x86-dat.awk > insn-x86-dat-32.c
+
+rm -f insn-x86-dat-src.o
+
+trap - EXIT
+
+echo "Done (use git diff to see the changes)"
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
new file mode 100644
index 000000000..fab3c6de7
--- /dev/null
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -0,0 +1,1679 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generated by gen-insn-x86-dat.sh and gen-insn-x86-dat.awk
+ * from insn-x86-dat-src.c for inclusion by insn-x86.c
+ * Do not change this code.
+*/
+
+{{0x0f, 0x31, }, 2, 0, "", "",
+"0f 31 \trdtsc ",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb \tvcvtph2ps %xmm3,%ymm5",},
+{{0x62, 0x81, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 81 78 56 34 12 \tbound %eax,0x12345678(%ecx)",},
+{{0x62, 0x88, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 88 78 56 34 12 \tbound %ecx,0x12345678(%eax)",},
+{{0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 90 78 56 34 12 \tbound %edx,0x12345678(%eax)",},
+{{0x62, 0x98, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 98 78 56 34 12 \tbound %ebx,0x12345678(%eax)",},
+{{0x62, 0xa0, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 a0 78 56 34 12 \tbound %esp,0x12345678(%eax)",},
+{{0x62, 0xa8, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 a8 78 56 34 12 \tbound %ebp,0x12345678(%eax)",},
+{{0x62, 0xb0, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 b0 78 56 34 12 \tbound %esi,0x12345678(%eax)",},
+{{0x62, 0xb8, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 b8 78 56 34 12 \tbound %edi,0x12345678(%eax)",},
+{{0x62, 0x08, }, 2, 0, "", "",
+"62 08 \tbound %ecx,(%eax)",},
+{{0x62, 0x05, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 05 78 56 34 12 \tbound %eax,0x12345678",},
+{{0x62, 0x14, 0x01, }, 3, 0, "", "",
+"62 14 01 \tbound %edx,(%ecx,%eax,1)",},
+{{0x62, 0x14, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 14 05 78 56 34 12 \tbound %edx,0x12345678(,%eax,1)",},
+{{0x62, 0x14, 0x08, }, 3, 0, "", "",
+"62 14 08 \tbound %edx,(%eax,%ecx,1)",},
+{{0x62, 0x14, 0xc8, }, 3, 0, "", "",
+"62 14 c8 \tbound %edx,(%eax,%ecx,8)",},
+{{0x62, 0x50, 0x12, }, 3, 0, "", "",
+"62 50 12 \tbound %edx,0x12(%eax)",},
+{{0x62, 0x55, 0x12, }, 3, 0, "", "",
+"62 55 12 \tbound %edx,0x12(%ebp)",},
+{{0x62, 0x54, 0x01, 0x12, }, 4, 0, "", "",
+"62 54 01 12 \tbound %edx,0x12(%ecx,%eax,1)",},
+{{0x62, 0x54, 0x05, 0x12, }, 4, 0, "", "",
+"62 54 05 12 \tbound %edx,0x12(%ebp,%eax,1)",},
+{{0x62, 0x54, 0x08, 0x12, }, 4, 0, "", "",
+"62 54 08 12 \tbound %edx,0x12(%eax,%ecx,1)",},
+{{0x62, 0x54, 0xc8, 0x12, }, 4, 0, "", "",
+"62 54 c8 12 \tbound %edx,0x12(%eax,%ecx,8)",},
+{{0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 90 78 56 34 12 \tbound %edx,0x12345678(%eax)",},
+{{0x62, 0x95, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "", "",
+"62 95 78 56 34 12 \tbound %edx,0x12345678(%ebp)",},
+{{0x62, 0x94, 0x01, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 01 78 56 34 12 \tbound %edx,0x12345678(%ecx,%eax,1)",},
+{{0x62, 0x94, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 05 78 56 34 12 \tbound %edx,0x12345678(%ebp,%eax,1)",},
+{{0x62, 0x94, 0x08, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 08 78 56 34 12 \tbound %edx,0x12345678(%eax,%ecx,1)",},
+{{0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"62 94 c8 78 56 34 12 \tbound %edx,0x12345678(%eax,%ecx,8)",},
+{{0x66, 0x62, 0x81, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 81 78 56 34 12 \tbound %ax,0x12345678(%ecx)",},
+{{0x66, 0x62, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 88 78 56 34 12 \tbound %cx,0x12345678(%eax)",},
+{{0x66, 0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 90 78 56 34 12 \tbound %dx,0x12345678(%eax)",},
+{{0x66, 0x62, 0x98, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 98 78 56 34 12 \tbound %bx,0x12345678(%eax)",},
+{{0x66, 0x62, 0xa0, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 a0 78 56 34 12 \tbound %sp,0x12345678(%eax)",},
+{{0x66, 0x62, 0xa8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 a8 78 56 34 12 \tbound %bp,0x12345678(%eax)",},
+{{0x66, 0x62, 0xb0, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 b0 78 56 34 12 \tbound %si,0x12345678(%eax)",},
+{{0x66, 0x62, 0xb8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 b8 78 56 34 12 \tbound %di,0x12345678(%eax)",},
+{{0x66, 0x62, 0x08, }, 3, 0, "", "",
+"66 62 08 \tbound %cx,(%eax)",},
+{{0x66, 0x62, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 05 78 56 34 12 \tbound %ax,0x12345678",},
+{{0x66, 0x62, 0x14, 0x01, }, 4, 0, "", "",
+"66 62 14 01 \tbound %dx,(%ecx,%eax,1)",},
+{{0x66, 0x62, 0x14, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 14 05 78 56 34 12 \tbound %dx,0x12345678(,%eax,1)",},
+{{0x66, 0x62, 0x14, 0x08, }, 4, 0, "", "",
+"66 62 14 08 \tbound %dx,(%eax,%ecx,1)",},
+{{0x66, 0x62, 0x14, 0xc8, }, 4, 0, "", "",
+"66 62 14 c8 \tbound %dx,(%eax,%ecx,8)",},
+{{0x66, 0x62, 0x50, 0x12, }, 4, 0, "", "",
+"66 62 50 12 \tbound %dx,0x12(%eax)",},
+{{0x66, 0x62, 0x55, 0x12, }, 4, 0, "", "",
+"66 62 55 12 \tbound %dx,0x12(%ebp)",},
+{{0x66, 0x62, 0x54, 0x01, 0x12, }, 5, 0, "", "",
+"66 62 54 01 12 \tbound %dx,0x12(%ecx,%eax,1)",},
+{{0x66, 0x62, 0x54, 0x05, 0x12, }, 5, 0, "", "",
+"66 62 54 05 12 \tbound %dx,0x12(%ebp,%eax,1)",},
+{{0x66, 0x62, 0x54, 0x08, 0x12, }, 5, 0, "", "",
+"66 62 54 08 12 \tbound %dx,0x12(%eax,%ecx,1)",},
+{{0x66, 0x62, 0x54, 0xc8, 0x12, }, 5, 0, "", "",
+"66 62 54 c8 12 \tbound %dx,0x12(%eax,%ecx,8)",},
+{{0x66, 0x62, 0x90, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 90 78 56 34 12 \tbound %dx,0x12345678(%eax)",},
+{{0x66, 0x62, 0x95, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"66 62 95 78 56 34 12 \tbound %dx,0x12345678(%ebp)",},
+{{0x66, 0x62, 0x94, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 01 78 56 34 12 \tbound %dx,0x12345678(%ecx,%eax,1)",},
+{{0x66, 0x62, 0x94, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 05 78 56 34 12 \tbound %dx,0x12345678(%ebp,%eax,1)",},
+{{0x66, 0x62, 0x94, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 08 78 56 34 12 \tbound %dx,0x12345678(%eax,%ecx,1)",},
+{{0x66, 0x62, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 62 94 c8 78 56 34 12 \tbound %dx,0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0x41, 0xd8, }, 3, 0, "", "",
+"0f 41 d8 \tcmovno %eax,%ebx",},
+{{0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 41 88 78 56 34 12 \tcmovno 0x12345678(%eax),%ecx",},
+{{0x66, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%eax),%cx",},
+{{0x0f, 0x44, 0xd8, }, 3, 0, "", "",
+"0f 44 d8 \tcmove %eax,%ebx",},
+{{0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 44 88 78 56 34 12 \tcmove 0x12345678(%eax),%ecx",},
+{{0x66, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 44 88 78 56 34 12 \tcmove 0x12345678(%eax),%cx",},
+{{0x0f, 0x90, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 90 80 78 56 34 12 \tseto 0x12345678(%eax)",},
+{{0x0f, 0x91, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 91 80 78 56 34 12 \tsetno 0x12345678(%eax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb 0x12345678(%eax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb 0x12345678(%eax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb 0x12345678(%eax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae 0x12345678(%eax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae 0x12345678(%eax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae 0x12345678(%eax)",},
+{{0x0f, 0x98, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 98 80 78 56 34 12 \tsets 0x12345678(%eax)",},
+{{0x0f, 0x99, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 99 80 78 56 34 12 \tsetns 0x12345678(%eax)",},
+{{0xc5, 0xcc, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cc 41 ef \tkandw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 41 ef \tkandq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cd 41 ef \tkandb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 41 ef \tkandd %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cc 42 ef \tkandnw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 42 ef \tkandnq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cd 42 ef \tkandnb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 42 ef \tkandnd %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f8 44 f7 \tknotw %k7,%k6",},
+{{0xc4, 0xe1, 0xf8, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f8 44 f7 \tknotq %k7,%k6",},
+{{0xc5, 0xf9, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f9 44 f7 \tknotb %k7,%k6",},
+{{0xc4, 0xe1, 0xf9, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f9 44 f7 \tknotd %k7,%k6",},
+{{0xc5, 0xcc, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cc 45 ef \tkorw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 45 ef \tkorq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cd 45 ef \tkorb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 45 ef \tkord %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cc 46 ef \tkxnorw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 46 ef \tkxnorq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cd 46 ef \tkxnorb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 46 ef \tkxnord %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cc 47 ef \tkxorw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 47 ef \tkxorq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cd 47 ef \tkxorb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 47 ef \tkxord %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cc 4a ef \tkaddw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4a ef \tkaddq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cd 4a ef \tkaddb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 4a ef \tkaddd %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cd 4b ef \tkunpckbw %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cc 4b ef \tkunpckwd %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4b, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4b ef \tkunpckdq %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f8 90 ee \tkmovw %k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f8 90 29 \tkmovw (%ecx),%k5",},
+{{0xc5, 0xf8, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f8 90 ac c8 23 01 00 00 \tkmovw 0x123(%eax,%ecx,8),%k5",},
+{{0xc5, 0xf8, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f8 91 29 \tkmovw %k5,(%ecx)",},
+{{0xc5, 0xf8, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f8 91 ac c8 23 01 00 00 \tkmovw %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xf8, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f8 92 e8 \tkmovw %eax,%k5",},
+{{0xc5, 0xf8, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f8 92 ed \tkmovw %ebp,%k5",},
+{{0xc5, 0xf8, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f8 93 c5 \tkmovw %k5,%eax",},
+{{0xc5, 0xf8, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f8 93 ed \tkmovw %k5,%ebp",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 90 ee \tkmovq %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 90 29 \tkmovq (%ecx),%k5",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f8 90 ac c8 23 01 00 00 \tkmovq 0x123(%eax,%ecx,8),%k5",},
+{{0xc4, 0xe1, 0xf8, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 91 29 \tkmovq %k5,(%ecx)",},
+{{0xc4, 0xe1, 0xf8, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f8 91 ac c8 23 01 00 00 \tkmovq %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xf9, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f9 90 ee \tkmovb %k6,%k5",},
+{{0xc5, 0xf9, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f9 90 29 \tkmovb (%ecx),%k5",},
+{{0xc5, 0xf9, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f9 90 ac c8 23 01 00 00 \tkmovb 0x123(%eax,%ecx,8),%k5",},
+{{0xc5, 0xf9, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f9 91 29 \tkmovb %k5,(%ecx)",},
+{{0xc5, 0xf9, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 9, 0, "", "",
+"c5 f9 91 ac c8 23 01 00 00 \tkmovb %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xf9, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f9 92 e8 \tkmovb %eax,%k5",},
+{{0xc5, 0xf9, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f9 92 ed \tkmovb %ebp,%k5",},
+{{0xc5, 0xf9, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f9 93 c5 \tkmovb %k5,%eax",},
+{{0xc5, 0xf9, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f9 93 ed \tkmovb %k5,%ebp",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 90 ee \tkmovd %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 90 29 \tkmovd (%ecx),%k5",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f9 90 ac c8 23 01 00 00 \tkmovd 0x123(%eax,%ecx,8),%k5",},
+{{0xc4, 0xe1, 0xf9, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 91 29 \tkmovd %k5,(%ecx)",},
+{{0xc4, 0xe1, 0xf9, 0x91, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 e1 f9 91 ac c8 23 01 00 00 \tkmovd %k5,0x123(%eax,%ecx,8)",},
+{{0xc5, 0xfb, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 fb 92 e8 \tkmovd %eax,%k5",},
+{{0xc5, 0xfb, 0x92, 0xed, }, 4, 0, "", "",
+"c5 fb 92 ed \tkmovd %ebp,%k5",},
+{{0xc5, 0xfb, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 fb 93 c5 \tkmovd %k5,%eax",},
+{{0xc5, 0xfb, 0x93, 0xed, }, 4, 0, "", "",
+"c5 fb 93 ed \tkmovd %k5,%ebp",},
+{{0xc5, 0xf8, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f8 98 ee \tkortestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 98 ee \tkortestq %k6,%k5",},
+{{0xc5, 0xf9, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f9 98 ee \tkortestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 98 ee \tkortestd %k6,%k5",},
+{{0xc5, 0xf8, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f8 99 ee \tktestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 99 ee \tktestq %k6,%k5",},
+{{0xc5, 0xf9, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f9 99 ee \tktestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 99 ee \tktestd %k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x30, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 30 ee 12 \tkshiftrw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x31, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 31 ee 5b \tkshiftrq $0x5b,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x32, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 32 ee 12 \tkshiftlw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x33, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 33 ee 5b \tkshiftlq $0x5b,%k6,%k5",},
+{{0xc5, 0xf8, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f8 5b f5 \tvcvtdq2ps %xmm5,%xmm6",},
+{{0x62, 0xf1, 0xfc, 0x4f, 0x5b, 0xf5, }, 6, 0, "", "",
+"62 f1 fc 4f 5b f5 \tvcvtqq2ps %zmm5,%ymm6{%k7}",},
+{{0xc5, 0xf9, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f9 5b f5 \tvcvtps2dq %xmm5,%xmm6",},
+{{0xc5, 0xfa, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 fa 5b f5 \tvcvttps2dq %xmm5,%xmm6",},
+{{0x0f, 0x6f, 0xe0, }, 3, 0, "", "",
+"0f 6f e0 \tmovq %mm0,%mm4",},
+{{0xc5, 0xfd, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fd 6f f4 \tvmovdqa %ymm4,%ymm6",},
+{{0x62, 0xf1, 0x7d, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 48 6f f5 \tvmovdqa32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 6f f5 \tvmovdqa64 %zmm5,%zmm6",},
+{{0xc5, 0xfe, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fe 6f f4 \tvmovdqu %ymm4,%ymm6",},
+{{0x62, 0xf1, 0x7e, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 7e 48 6f f5 \tvmovdqu32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfe, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 fe 48 6f f5 \tvmovdqu64 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7f, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 7f 48 6f f5 \tvmovdqu8 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xff, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 f1 ff 48 6f f5 \tvmovdqu16 %zmm5,%zmm6",},
+{{0x0f, 0x78, 0xc3, }, 3, 0, "", "",
+"0f 78 c3 \tvmread %eax,%ebx",},
+{{0x62, 0xf1, 0x7c, 0x48, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 7c 48 78 f5 \tvcvttps2udq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfc, 0x4f, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 fc 4f 78 f5 \tvcvttpd2udq %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf1, 0x7f, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 7f 08 78 c6 \tvcvttsd2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7e, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 7e 08 78 c6 \tvcvttss2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 78 f5 \tvcvttps2uqq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x78, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 78 f5 \tvcvttpd2uqq %zmm5,%zmm6",},
+{{0x0f, 0x79, 0xd8, }, 3, 0, "", "",
+"0f 79 d8 \tvmwrite %eax,%ebx",},
+{{0x62, 0xf1, 0x7c, 0x48, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 7c 48 79 f5 \tvcvtps2udq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfc, 0x4f, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 fc 4f 79 f5 \tvcvtpd2udq %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf1, 0x7f, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 7f 08 79 c6 \tvcvtsd2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7e, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 7e 08 79 c6 \tvcvtss2usi %xmm6,%eax",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 79 f5 \tvcvtps2uqq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x79, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 79 f5 \tvcvtpd2uqq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7e, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 7e 4f 7a f5 \tvcvtudq2pd %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfe, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 fe 48 7a f5 \tvcvtuqq2pd %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7f, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 7f 48 7a f5 \tvcvtudq2ps %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xff, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 ff 4f 7a f5 \tvcvtuqq2ps %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 7a f5 \tvcvttps2qq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 7a f5 \tvcvttpd2qq %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x57, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 57 08 7b f0 \tvcvtusi2sd %eax,%xmm5,%xmm6",},
+{{0x62, 0xf1, 0x56, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 56 08 7b f0 \tvcvtusi2ss %eax,%xmm5,%xmm6",},
+{{0x62, 0xf1, 0x7d, 0x4f, 0x7b, 0xf5, }, 6, 0, "", "",
+"62 f1 7d 4f 7b f5 \tvcvtps2qq %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x7b, 0xf5, }, 6, 0, "", "",
+"62 f1 fd 48 7b f5 \tvcvtpd2qq %zmm5,%zmm6",},
+{{0x0f, 0x7f, 0xc4, }, 3, 0, "", "",
+"0f 7f c4 \tmovq %mm0,%mm4",},
+{{0xc5, 0xfd, 0x7f, 0xee, }, 4, 0, "", "",
+"c5 fd 7f ee \tvmovdqa %ymm5,%ymm6",},
+{{0x62, 0xf1, 0x7d, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 7d 48 7f ee \tvmovdqa32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfd, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 fd 48 7f ee \tvmovdqa64 %zmm5,%zmm6",},
+{{0xc5, 0xfe, 0x7f, 0xee, }, 4, 0, "", "",
+"c5 fe 7f ee \tvmovdqu %ymm5,%ymm6",},
+{{0x62, 0xf1, 0x7e, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 7e 48 7f ee \tvmovdqu32 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xfe, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 fe 48 7f ee \tvmovdqu64 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0x7f, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 7f 48 7f ee \tvmovdqu8 %zmm5,%zmm6",},
+{{0x62, 0xf1, 0xff, 0x48, 0x7f, 0xee, }, 6, 0, "", "",
+"62 f1 ff 48 7f ee \tvmovdqu16 %zmm5,%zmm6",},
+{{0x0f, 0xdb, 0xd1, }, 3, 0, "", "",
+"0f db d1 \tpand %mm1,%mm2",},
+{{0x66, 0x0f, 0xdb, 0xd1, }, 4, 0, "", "",
+"66 0f db d1 \tpand %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdb, 0xd4, }, 4, 0, "", "",
+"c5 cd db d4 \tvpand %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xdb, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 db f4 \tvpandd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xdb, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 db f4 \tvpandq %zmm4,%zmm5,%zmm6",},
+{{0x0f, 0xdf, 0xd1, }, 3, 0, "", "",
+"0f df d1 \tpandn %mm1,%mm2",},
+{{0x66, 0x0f, 0xdf, 0xd1, }, 4, 0, "", "",
+"66 0f df d1 \tpandn %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdf, 0xd4, }, 4, 0, "", "",
+"c5 cd df d4 \tvpandn %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xdf, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 df f4 \tvpandnd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xdf, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 df f4 \tvpandnq %zmm4,%zmm5,%zmm6",},
+{{0xc5, 0xf9, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 f9 e6 d1 \tvcvttpd2dq %xmm1,%xmm2",},
+{{0xc5, 0xfa, 0xe6, 0xf5, }, 4, 0, "", "",
+"c5 fa e6 f5 \tvcvtdq2pd %xmm5,%xmm6",},
+{{0x62, 0xf1, 0x7e, 0x4f, 0xe6, 0xf5, }, 6, 0, "", "",
+"62 f1 7e 4f e6 f5 \tvcvtdq2pd %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xfe, 0x48, 0xe6, 0xf5, }, 6, 0, "", "",
+"62 f1 fe 48 e6 f5 \tvcvtqq2pd %zmm5,%zmm6",},
+{{0xc5, 0xfb, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 fb e6 d1 \tvcvtpd2dq %xmm1,%xmm2",},
+{{0x0f, 0xeb, 0xf4, }, 3, 0, "", "",
+"0f eb f4 \tpor %mm4,%mm6",},
+{{0xc5, 0xcd, 0xeb, 0xd4, }, 4, 0, "", "",
+"c5 cd eb d4 \tvpor %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xeb, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 eb f4 \tvpord %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xeb, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 eb f4 \tvporq %zmm4,%zmm5,%zmm6",},
+{{0x0f, 0xef, 0xf4, }, 3, 0, "", "",
+"0f ef f4 \tpxor %mm4,%mm6",},
+{{0xc5, 0xcd, 0xef, 0xd4, }, 4, 0, "", "",
+"c5 cd ef d4 \tvpxor %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x55, 0x48, 0xef, 0xf4, }, 6, 0, "", "",
+"62 f1 55 48 ef f4 \tvpxord %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0xef, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 ef f4 \tvpxorq %zmm4,%zmm5,%zmm6",},
+{{0x66, 0x0f, 0x38, 0x10, 0xc1, }, 5, 0, "", "",
+"66 0f 38 10 c1 \tpblendvb %xmm0,%xmm1,%xmm0",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x10, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 10 f4 \tvpsrlvw %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x10, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 10 ee \tvpmovuswb %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x11, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 11 ee \tvpmovusdb %zmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x11, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 11 f4 \tvpsravw %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x12, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 12 ee \tvpmovusqb %zmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x12, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 12 f4 \tvpsllvw %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb \tvcvtph2ps %xmm3,%ymm5",},
+{{0x62, 0xf2, 0x7d, 0x4f, 0x13, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 4f 13 f5 \tvcvtph2ps %ymm5,%zmm6{%k7}",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x13, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 13 ee \tvpmovusdw %zmm5,%ymm6{%k7}",},
+{{0x66, 0x0f, 0x38, 0x14, 0xc1, }, 5, 0, "", "",
+"66 0f 38 14 c1 \tblendvps %xmm0,%xmm1,%xmm0",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x14, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 14 ee \tvpmovusqw %zmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0x55, 0x48, 0x14, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 14 f4 \tvprorvd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x14, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 14 f4 \tvprorvq %zmm4,%zmm5,%zmm6",},
+{{0x66, 0x0f, 0x38, 0x15, 0xc1, }, 5, 0, "", "",
+"66 0f 38 15 c1 \tblendvpd %xmm0,%xmm1,%xmm0",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x15, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 15 ee \tvpmovusqd %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf2, 0x55, 0x48, 0x15, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 15 f4 \tvprolvd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x15, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 15 f4 \tvprolvq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x16, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 16 d4 \tvpermps %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x4d, 0x2f, 0x16, 0xd4, }, 6, 0, "", "",
+"62 f2 4d 2f 16 d4 \tvpermps %ymm4,%ymm6,%ymm2{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x2f, 0x16, 0xd4, }, 6, 0, "", "",
+"62 f2 cd 2f 16 d4 \tvpermpd %ymm4,%ymm6,%ymm2{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x19, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 19 f4 \tvbroadcastsd %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x19, 0xf7, }, 6, 0, "", "",
+"62 f2 7d 48 19 f7 \tvbroadcastf32x2 %xmm7,%zmm6",},
+{{0xc4, 0xe2, 0x7d, 0x1a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 1a 21 \tvbroadcastf128 (%ecx),%ymm4",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x1a, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 1a 31 \tvbroadcastf32x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x1a, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 1a 31 \tvbroadcastf64x2 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x1b, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 1b 31 \tvbroadcastf32x8 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x1b, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 1b 31 \tvbroadcastf64x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x1f, 0xf4, }, 6, 0, "", "",
+"62 f2 fd 48 1f f4 \tvpabsq %zmm4,%zmm6",},
+{{0xc4, 0xe2, 0x79, 0x20, 0xec, }, 5, 0, "", "",
+"c4 e2 79 20 ec \tvpmovsxbw %xmm4,%xmm5",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x20, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 20 ee \tvpmovswb %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x21, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 21 f4 \tvpmovsxbd %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x21, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 21 ee \tvpmovsdb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x22, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 22 e4 \tvpmovsxbq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x22, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 22 ee \tvpmovsqb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x23, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 23 e4 \tvpmovsxwd %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x23, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 23 ee \tvpmovsdw %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x24, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 24 f4 \tvpmovsxwq %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x24, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 24 ee \tvpmovsqw %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x25, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 25 e4 \tvpmovsxdq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x25, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 25 ee \tvpmovsqd %zmm5,%ymm6{%k7}",},
+{{0x62, 0xf2, 0x4d, 0x48, 0x26, 0xed, }, 6, 0, "", "",
+"62 f2 4d 48 26 ed \tvptestmb %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0xcd, 0x48, 0x26, 0xed, }, 6, 0, "", "",
+"62 f2 cd 48 26 ed \tvptestmw %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0x56, 0x48, 0x26, 0xec, }, 6, 0, "", "",
+"62 f2 56 48 26 ec \tvptestnmb %zmm4,%zmm5,%k5",},
+{{0x62, 0xf2, 0xd6, 0x48, 0x26, 0xec, }, 6, 0, "", "",
+"62 f2 d6 48 26 ec \tvptestnmw %zmm4,%zmm5,%k5",},
+{{0x62, 0xf2, 0x4d, 0x48, 0x27, 0xed, }, 6, 0, "", "",
+"62 f2 4d 48 27 ed \tvptestmd %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0xcd, 0x48, 0x27, 0xed, }, 6, 0, "", "",
+"62 f2 cd 48 27 ed \tvptestmq %zmm5,%zmm6,%k5",},
+{{0x62, 0xf2, 0x56, 0x48, 0x27, 0xec, }, 6, 0, "", "",
+"62 f2 56 48 27 ec \tvptestnmd %zmm4,%zmm5,%k5",},
+{{0x62, 0xf2, 0xd6, 0x48, 0x27, 0xec, }, 6, 0, "", "",
+"62 f2 d6 48 27 ec \tvptestnmq %zmm4,%zmm5,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x28, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 28 d4 \tvpmuldq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x28, 0xf5, }, 6, 0, "", "",
+"62 f2 7e 48 28 f5 \tvpmovm2b %k5,%zmm6",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x28, 0xf5, }, 6, 0, "", "",
+"62 f2 fe 48 28 f5 \tvpmovm2w %k5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x29, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 29 d4 \tvpcmpeqq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x29, 0xee, }, 6, 0, "", "",
+"62 f2 7e 48 29 ee \tvpmovb2m %zmm6,%k5",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x29, 0xee, }, 6, 0, "", "",
+"62 f2 fe 48 29 ee \tvpmovw2m %zmm6,%k5",},
+{{0xc4, 0xe2, 0x7d, 0x2a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 2a 21 \tvmovntdqa (%ecx),%ymm4",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x2a, 0xce, }, 6, 0, "", "",
+"62 f2 fe 48 2a ce \tvpbroadcastmb2q %k6,%zmm1",},
+{{0xc4, 0xe2, 0x5d, 0x2c, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2c 31 \tvmaskmovps (%ecx),%ymm4,%ymm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x2c, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 2c f4 \tvscalefps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x2c, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 2c f4 \tvscalefpd %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x5d, 0x2d, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2d 31 \tvmaskmovpd (%ecx),%ymm4,%ymm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x2d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 2d f4 \tvscalefss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x0f, 0x2d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 0f 2d f4 \tvscalefsd %xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x30, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 30 e4 \tvpmovzxbw %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x30, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 30 ee \tvpmovwb %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x31, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 31 f4 \tvpmovzxbd %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x31, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 31 ee \tvpmovdb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x32, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 32 e4 \tvpmovzxbq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x32, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 32 ee \tvpmovqb %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x33, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 33 e4 \tvpmovzxwd %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x33, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 33 ee \tvpmovdw %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x34, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 34 f4 \tvpmovzxwq %xmm4,%ymm6",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x34, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 34 ee \tvpmovqw %zmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x35, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 35 e4 \tvpmovzxdq %xmm4,%ymm4",},
+{{0x62, 0xf2, 0x7e, 0x4f, 0x35, 0xee, }, 6, 0, "", "",
+"62 f2 7e 4f 35 ee \tvpmovqd %zmm5,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x36, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 36 d4 \tvpermd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x4d, 0x2f, 0x36, 0xd4, }, 6, 0, "", "",
+"62 f2 4d 2f 36 d4 \tvpermd %ymm4,%ymm6,%ymm2{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x2f, 0x36, 0xd4, }, 6, 0, "", "",
+"62 f2 cd 2f 36 d4 \tvpermq %ymm4,%ymm6,%ymm2{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x38, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 38 d4 \tvpminsb %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x38, 0xf5, }, 6, 0, "", "",
+"62 f2 7e 48 38 f5 \tvpmovm2d %k5,%zmm6",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x38, 0xf5, }, 6, 0, "", "",
+"62 f2 fe 48 38 f5 \tvpmovm2q %k5,%zmm6",},
+{{0xc4, 0xe2, 0x69, 0x39, 0xd9, }, 5, 0, "", "",
+"c4 e2 69 39 d9 \tvpminsd %xmm1,%xmm2,%xmm3",},
+{{0x62, 0xf2, 0x55, 0x48, 0x39, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 39 f4 \tvpminsd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x39, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 39 f4 \tvpminsq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x39, 0xee, }, 6, 0, "", "",
+"62 f2 7e 48 39 ee \tvpmovd2m %zmm6,%k5",},
+{{0x62, 0xf2, 0xfe, 0x48, 0x39, 0xee, }, 6, 0, "", "",
+"62 f2 fe 48 39 ee \tvpmovq2m %zmm6,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x3a, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3a d4 \tvpminuw %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x7e, 0x48, 0x3a, 0xf6, }, 6, 0, "", "",
+"62 f2 7e 48 3a f6 \tvpbroadcastmw2d %k6,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x3b, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3b d4 \tvpminud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x3b, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 3b f4 \tvpminud %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x3b, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 3b f4 \tvpminuq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x3d, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3d d4 \tvpmaxsd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x3d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 3d f4 \tvpmaxsd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x3d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 3d f4 \tvpmaxsq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x3f, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3f d4 \tvpmaxud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x3f, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 3f f4 \tvpmaxud %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x3f, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 3f f4 \tvpmaxuq %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x40, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 40 d4 \tvpmulld %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x40, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 40 f4 \tvpmulld %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x40, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 40 f4 \tvpmullq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x42, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 42 f5 \tvgetexpps %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x42, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 42 f5 \tvgetexppd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x43, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 43 f4 \tvgetexpss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xe5, 0x0f, 0x43, 0xe2, }, 6, 0, "", "",
+"62 f2 e5 0f 43 e2 \tvgetexpsd %xmm2,%xmm3,%xmm4{%k7}",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x44, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 44 f5 \tvplzcntd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x44, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 44 f5 \tvplzcntq %zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x4d, 0x46, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 46 d4 \tvpsravd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf2, 0x55, 0x48, 0x46, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 46 f4 \tvpsravd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x46, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 46 f4 \tvpsravq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x4c, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 4c f5 \tvrcp14ps %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x4c, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 4c f5 \tvrcp14pd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x4d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 4d f4 \tvrcp14ss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x0f, 0x4d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 0f 4d f4 \tvrcp14sd %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x4e, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 4e f5 \tvrsqrt14ps %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x4e, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 4e f5 \tvrsqrt14pd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x0f, 0x4f, 0xf4, }, 6, 0, "", "",
+"62 f2 55 0f 4f f4 \tvrsqrt14ss %xmm4,%xmm5,%xmm6{%k7}",},
+{{0x62, 0xf2, 0xd5, 0x0f, 0x4f, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 0f 4f f4 \tvrsqrt14sd %xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x79, 0x59, 0xf4, }, 5, 0, "", "",
+"c4 e2 79 59 f4 \tvpbroadcastq %xmm4,%xmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x59, 0xf7, }, 6, 0, "", "",
+"62 f2 7d 48 59 f7 \tvbroadcasti32x2 %xmm7,%zmm6",},
+{{0xc4, 0xe2, 0x7d, 0x5a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 5a 21 \tvbroadcasti128 (%ecx),%ymm4",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x5a, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 5a 31 \tvbroadcasti32x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x5a, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 5a 31 \tvbroadcasti64x2 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x5b, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 5b 31 \tvbroadcasti32x8 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x5b, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 5b 31 \tvbroadcasti64x4 (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x64, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 64 f4 \tvpblendmd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x64, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 64 f4 \tvpblendmq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x65, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 65 f4 \tvblendmps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x65, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 65 f4 \tvblendmpd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x66, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 66 f4 \tvpblendmb %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x66, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 66 f4 \tvpblendmw %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x75, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 75 f4 \tvpermi2b %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x75, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 75 f4 \tvpermi2w %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x76, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 76 f4 \tvpermi2d %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x76, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 76 f4 \tvpermi2q %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x77, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 77 f4 \tvpermi2ps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x77, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 77 f4 \tvpermi2pd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x08, 0x7a, 0xd8, }, 6, 0, "", "",
+"62 f2 7d 08 7a d8 \tvpbroadcastb %eax,%xmm3",},
+{{0x62, 0xf2, 0x7d, 0x08, 0x7b, 0xd8, }, 6, 0, "", "",
+"62 f2 7d 08 7b d8 \tvpbroadcastw %eax,%xmm3",},
+{{0x62, 0xf2, 0x7d, 0x08, 0x7c, 0xd8, }, 6, 0, "", "",
+"62 f2 7d 08 7c d8 \tvpbroadcastd %eax,%xmm3",},
+{{0x62, 0xf2, 0x55, 0x48, 0x7d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 7d f4 \tvpermt2b %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x7d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 7d f4 \tvpermt2w %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x7e, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 7e f4 \tvpermt2d %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x7e, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 7e f4 \tvpermt2q %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x55, 0x48, 0x7f, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 7f f4 \tvpermt2ps %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x7f, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 7f f4 \tvpermt2pd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x83, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 83 f4 \tvpmultishiftqb %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x88, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 88 31 \tvexpandps (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x88, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 88 31 \tvexpandpd (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x89, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 89 31 \tvpexpandd (%ecx),%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x89, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 89 31 \tvpexpandq (%ecx),%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x8a, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 8a 31 \tvcompressps %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x8a, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 8a 31 \tvcompresspd %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0x7d, 0x48, 0x8b, 0x31, }, 6, 0, "", "",
+"62 f2 7d 48 8b 31 \tvpcompressd %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0xfd, 0x48, 0x8b, 0x31, }, 6, 0, "", "",
+"62 f2 fd 48 8b 31 \tvpcompressq %zmm6,(%ecx)",},
+{{0x62, 0xf2, 0x55, 0x48, 0x8d, 0xf4, }, 6, 0, "", "",
+"62 f2 55 48 8d f4 \tvpermb %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0x8d, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 8d f4 \tvpermw %zmm4,%zmm5,%zmm6",},
+{{0xc4, 0xe2, 0x69, 0x90, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 90 4c 7d 02 \tvpgatherdd %xmm2,0x2(%ebp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x90, 0x4c, 0x7d, 0x04, }, 7, 0, "", "",
+"c4 e2 e9 90 4c 7d 04 \tvpgatherdq %xmm2,0x4(%ebp,%xmm7,2),%xmm1",},
+{{0x62, 0xf2, 0x7d, 0x49, 0x90, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 90 b4 fd 7b 00 00 00 \tvpgatherdd 0x7b(%ebp,%zmm7,8),%zmm6{%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0x90, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 90 b4 fd 7b 00 00 00 \tvpgatherdq 0x7b(%ebp,%ymm7,8),%zmm6{%k1}",},
+{{0xc4, 0xe2, 0x69, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 91 4c 7d 02 \tvpgatherqd %xmm2,0x2(%ebp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 e9 91 4c 7d 02 \tvpgatherqq %xmm2,0x2(%ebp,%xmm7,2),%xmm1",},
+{{0x62, 0xf2, 0x7d, 0x49, 0x91, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 91 b4 fd 7b 00 00 00 \tvpgatherqd 0x7b(%ebp,%zmm7,8),%ymm6{%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0x91, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 91 b4 fd 7b 00 00 00 \tvpgatherqq 0x7b(%ebp,%zmm7,8),%zmm6{%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa0, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a0 b4 fd 7b 00 00 00 \tvpscatterdd %zmm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xa0, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 a0 b4 fd 7b 00 00 00 \tvpscatterdq %zmm6,0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa1, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a1 b4 fd 7b 00 00 00 \tvpscatterqd %ymm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x29, 0xa1, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 29 a1 b4 fd 7b 00 00 00 \tvpscatterqq %ymm6,0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa2, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a2 b4 fd 7b 00 00 00 \tvscatterdps %zmm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xa2, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 a2 b4 fd 7b 00 00 00 \tvscatterdpd %zmm6,0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xa3, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 a3 b4 fd 7b 00 00 00 \tvscatterqps %ymm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xa3, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 a3 b4 fd 7b 00 00 00 \tvscatterqpd %zmm6,0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xd5, 0x48, 0xb4, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 b4 f4 \tvpmadd52luq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0xd5, 0x48, 0xb5, 0xf4, }, 6, 0, "", "",
+"62 f2 d5 48 b5 f4 \tvpmadd52huq %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xc4, 0xf5, }, 6, 0, "", "",
+"62 f2 7d 48 c4 f5 \tvpconflictd %zmm5,%zmm6",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xc4, 0xf5, }, 6, 0, "", "",
+"62 f2 fd 48 c4 f5 \tvpconflictq %zmm5,%zmm6",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xc8, 0xfe, }, 6, 0, "", "",
+"62 f2 7d 48 c8 fe \tvexp2ps %zmm6,%zmm7",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xc8, 0xfe, }, 6, 0, "", "",
+"62 f2 fd 48 c8 fe \tvexp2pd %zmm6,%zmm7",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xca, 0xfe, }, 6, 0, "", "",
+"62 f2 7d 48 ca fe \tvrcp28ps %zmm6,%zmm7",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xca, 0xfe, }, 6, 0, "", "",
+"62 f2 fd 48 ca fe \tvrcp28pd %zmm6,%zmm7",},
+{{0x62, 0xf2, 0x4d, 0x0f, 0xcb, 0xfd, }, 6, 0, "", "",
+"62 f2 4d 0f cb fd \tvrcp28ss %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x0f, 0xcb, 0xfd, }, 6, 0, "", "",
+"62 f2 cd 0f cb fd \tvrcp28sd %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf2, 0x7d, 0x48, 0xcc, 0xfe, }, 6, 0, "", "",
+"62 f2 7d 48 cc fe \tvrsqrt28ps %zmm6,%zmm7",},
+{{0x62, 0xf2, 0xfd, 0x48, 0xcc, 0xfe, }, 6, 0, "", "",
+"62 f2 fd 48 cc fe \tvrsqrt28pd %zmm6,%zmm7",},
+{{0x62, 0xf2, 0x4d, 0x0f, 0xcd, 0xfd, }, 6, 0, "", "",
+"62 f2 4d 0f cd fd \tvrsqrt28ss %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf2, 0xcd, 0x0f, 0xcd, 0xfd, }, 6, 0, "", "",
+"62 f2 cd 0f cd fd \tvrsqrt28sd %xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x03, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 03 fd 12 \tvalignd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x03, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 03 fd 12 \tvalignq $0x12,%zmm5,%zmm6,%zmm7",},
+{{0xc4, 0xe3, 0x7d, 0x08, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 08 d6 05 \tvroundps $0x5,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x08, 0xf5, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 08 f5 12 \tvrndscaleps $0x12,%zmm5,%zmm6",},
+{{0xc4, 0xe3, 0x7d, 0x09, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 09 d6 05 \tvroundpd $0x5,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x09, 0xf5, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 09 f5 12 \tvrndscalepd $0x12,%zmm5,%zmm6",},
+{{0xc4, 0xe3, 0x49, 0x0a, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0a d4 05 \tvroundss $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0xf3, 0x55, 0x0f, 0x0a, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 0f 0a f4 12 \tvrndscaless $0x12,%xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe3, 0x49, 0x0b, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0b d4 05 \tvroundsd $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0xf3, 0xd5, 0x0f, 0x0b, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 d5 0f 0b f4 12 \tvrndscalesd $0x12,%xmm4,%xmm5,%xmm6{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x18, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 18 f4 05 \tvinsertf128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0xf3, 0x55, 0x4f, 0x18, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 4f 18 f4 12 \tvinsertf32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0x62, 0xf3, 0xd5, 0x4f, 0x18, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 d5 4f 18 f4 12 \tvinsertf64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x19, 0xe4, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 19 e4 05 \tvextractf128 $0x5,%ymm4,%xmm4",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x19, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 19 ee 12 \tvextractf32x4 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x19, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 19 ee 12 \tvextractf64x2 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0x4d, 0x4f, 0x1a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 4f 1a fd 12 \tvinsertf32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x4f, 0x1a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 4f 1a fd 12 \tvinsertf64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x1b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 1b f7 12 \tvextractf32x8 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x1b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 1b f7 12 \tvextractf64x4 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0x45, 0x48, 0x1e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 1e ee 12 \tvpcmpud $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x1e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 1e ee 12 \tvpcmpuq $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0x45, 0x48, 0x1f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 1f ee 12 \tvpcmpd $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x1f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 1f ee 12 \tvpcmpq $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x23, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 23 fd 12 \tvshuff32x4 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x23, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 23 fd 12 \tvshuff64x2 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x25, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 25 fd 12 \tvpternlogd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x25, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 25 fd 12 \tvpternlogq $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x26, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 26 fe 12 \tvgetmantps $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x26, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 26 fe 12 \tvgetmantpd $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x0f, 0x27, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 0f 27 fd 12 \tvgetmantss $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x0f, 0x27, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 0f 27 fd 12 \tvgetmantsd $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x38, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 38 f4 05 \tvinserti128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0xf3, 0x55, 0x4f, 0x38, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 4f 38 f4 12 \tvinserti32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0x62, 0xf3, 0xd5, 0x4f, 0x38, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 d5 4f 38 f4 12 \tvinserti64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x39, 0xe6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 39 e6 05 \tvextracti128 $0x5,%ymm4,%xmm6",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x39, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 39 ee 12 \tvextracti32x4 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x39, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 39 ee 12 \tvextracti64x2 $0x12,%zmm5,%xmm6{%k7}",},
+{{0x62, 0xf3, 0x4d, 0x4f, 0x3a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 4f 3a fd 12 \tvinserti32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x4f, 0x3a, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 4f 3a fd 12 \tvinserti64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}",},
+{{0x62, 0xf3, 0x7d, 0x4f, 0x3b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 7d 4f 3b f7 12 \tvextracti32x8 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0xfd, 0x4f, 0x3b, 0xf7, 0x12, }, 7, 0, "", "",
+"62 f3 fd 4f 3b f7 12 \tvextracti64x4 $0x12,%zmm6,%ymm7{%k7}",},
+{{0x62, 0xf3, 0x45, 0x48, 0x3e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 3e ee 12 \tvpcmpub $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x3e, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 3e ee 12 \tvpcmpuw $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0x45, 0x48, 0x3f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 45 48 3f ee 12 \tvpcmpb $0x12,%zmm6,%zmm7,%k5",},
+{{0x62, 0xf3, 0xc5, 0x48, 0x3f, 0xee, 0x12, }, 7, 0, "", "",
+"62 f3 c5 48 3f ee 12 \tvpcmpw $0x12,%zmm6,%zmm7,%k5",},
+{{0xc4, 0xe3, 0x4d, 0x42, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 4d 42 d4 05 \tvmpsadbw $0x5,%ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0x55, 0x48, 0x42, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 48 42 f4 12 \tvdbpsadbw $0x12,%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x43, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 43 fd 12 \tvshufi32x4 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x43, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 43 fd 12 \tvshufi64x2 $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x50, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 50 fd 12 \tvrangeps $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x50, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 50 fd 12 \tvrangepd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x08, 0x51, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 08 51 fd 12 \tvrangess $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0xcd, 0x08, 0x51, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 08 51 fd 12 \tvrangesd $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0x4d, 0x48, 0x54, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 48 54 fd 12 \tvfixupimmps $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xcd, 0x48, 0x54, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 48 54 fd 12 \tvfixupimmpd $0x12,%zmm5,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x0f, 0x55, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 0f 55 fd 12 \tvfixupimmss $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0xcd, 0x0f, 0x55, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 0f 55 fd 12 \tvfixupimmsd $0x12,%xmm5,%xmm6,%xmm7{%k7}",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x56, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 56 fe 12 \tvreduceps $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x56, 0xfe, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 56 fe 12 \tvreducepd $0x12,%zmm6,%zmm7",},
+{{0x62, 0xf3, 0x4d, 0x08, 0x57, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 4d 08 57 fd 12 \tvreducess $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0xcd, 0x08, 0x57, 0xfd, 0x12, }, 7, 0, "", "",
+"62 f3 cd 08 57 fd 12 \tvreducesd $0x12,%xmm5,%xmm6,%xmm7",},
+{{0x62, 0xf3, 0x7d, 0x48, 0x66, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 7d 48 66 ef 12 \tvfpclassps $0x12,%zmm7,%k5",},
+{{0x62, 0xf3, 0xfd, 0x48, 0x66, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 fd 48 66 ef 12 \tvfpclasspd $0x12,%zmm7,%k5",},
+{{0x62, 0xf3, 0x7d, 0x08, 0x67, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 7d 08 67 ef 12 \tvfpclassss $0x12,%xmm7,%k5",},
+{{0x62, 0xf3, 0xfd, 0x08, 0x67, 0xef, 0x12, }, 7, 0, "", "",
+"62 f3 fd 08 67 ef 12 \tvfpclasssd $0x12,%xmm7,%k5",},
+{{0x62, 0xf1, 0x4d, 0x48, 0x72, 0xc5, 0x12, }, 7, 0, "", "",
+"62 f1 4d 48 72 c5 12 \tvprord $0x12,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xcd, 0x48, 0x72, 0xc5, 0x12, }, 7, 0, "", "",
+"62 f1 cd 48 72 c5 12 \tvprorq $0x12,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0x4d, 0x48, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 f1 4d 48 72 cd 12 \tvprold $0x12,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xcd, 0x48, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 f1 cd 48 72 cd 12 \tvprolq $0x12,%zmm5,%zmm6",},
+{{0x0f, 0x72, 0xe6, 0x02, }, 4, 0, "", "",
+"0f 72 e6 02 \tpsrad $0x2,%mm6",},
+{{0xc5, 0xed, 0x72, 0xe6, 0x05, }, 5, 0, "", "",
+"c5 ed 72 e6 05 \tvpsrad $0x5,%ymm6,%ymm2",},
+{{0x62, 0xf1, 0x6d, 0x48, 0x72, 0xe6, 0x05, }, 7, 0, "", "",
+"62 f1 6d 48 72 e6 05 \tvpsrad $0x5,%zmm6,%zmm2",},
+{{0x62, 0xf1, 0xed, 0x48, 0x72, 0xe6, 0x05, }, 7, 0, "", "",
+"62 f1 ed 48 72 e6 05 \tvpsraq $0x5,%zmm6,%zmm2",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 8c fd 7b 00 00 00 \tvgatherpf0dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 8c fd 7b 00 00 00 \tvgatherpf0dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 94 fd 7b 00 00 00 \tvgatherpf1dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 94 fd 7b 00 00 00 \tvgatherpf1dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 ac fd 7b 00 00 00 \tvscatterpf0dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 ac fd 7b 00 00 00 \tvscatterpf0dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc6, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c6 b4 fd 7b 00 00 00 \tvscatterpf1dps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc6, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c6 b4 fd 7b 00 00 00 \tvscatterpf1dpd 0x7b(%ebp,%ymm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 8c fd 7b 00 00 00 \tvgatherpf0qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0x8c, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 8c fd 7b 00 00 00 \tvgatherpf0qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 94 fd 7b 00 00 00 \tvgatherpf1qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0x94, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 94 fd 7b 00 00 00 \tvgatherpf1qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 ac fd 7b 00 00 00 \tvscatterpf0qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0xac, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 ac fd 7b 00 00 00 \tvscatterpf0qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0x7d, 0x49, 0xc7, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 7d 49 c7 b4 fd 7b 00 00 00 \tvscatterpf1qps 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf2, 0xfd, 0x49, 0xc7, 0xb4, 0xfd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 f2 fd 49 c7 b4 fd 7b 00 00 00 \tvscatterpf1qpd 0x7b(%ebp,%zmm7,8){%k1}",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 48 58 f4 \tvaddpd %zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x4f, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 4f 58 f4 \tvaddpd %zmm4,%zmm5,%zmm6{%k7}",},
+{{0x62, 0xf1, 0xd5, 0xcf, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 cf 58 f4 \tvaddpd %zmm4,%zmm5,%zmm6{%k7}{z}",},
+{{0x62, 0xf1, 0xd5, 0x18, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 18 58 f4 \tvaddpd {rn-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 58 58 f4 \tvaddpd {ru-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x38, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 38 58 f4 \tvaddpd {rd-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x78, 0x58, 0xf4, }, 6, 0, "", "",
+"62 f1 d5 78 58 f4 \tvaddpd {rz-sae},%zmm4,%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0x31, }, 6, 0, "", "",
+"62 f1 d5 48 58 31 \tvaddpd (%ecx),%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0xb4, 0xc8, 0x23, 0x01, 0x00, 0x00, }, 11, 0, "", "",
+"62 f1 d5 48 58 b4 c8 23 01 00 00 \tvaddpd 0x123(%eax,%ecx,8),%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0x31, }, 6, 0, "", "",
+"62 f1 d5 58 58 31 \tvaddpd (%ecx){1to8},%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x48, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 f1 d5 48 58 72 7f \tvaddpd 0x1fc0(%edx),%zmm5,%zmm6",},
+{{0x62, 0xf1, 0xd5, 0x58, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 f1 d5 58 58 72 7f \tvaddpd 0x3f8(%edx){1to8},%zmm5,%zmm6",},
+{{0x62, 0xf1, 0x4c, 0x58, 0xc2, 0x6a, 0x7f, 0x08, }, 8, 0, "", "",
+"62 f1 4c 58 c2 6a 7f 08 \tvcmpeq_uqps 0x1fc(%edx){1to16},%zmm6,%k5",},
+{{0x62, 0xf1, 0xe7, 0x0f, 0xc2, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, 0x01, }, 12, 0, "", "",
+"62 f1 e7 0f c2 ac c8 23 01 00 00 01 \tvcmpltsd 0x123(%eax,%ecx,8),%xmm3,%k5{%k7}",},
+{{0x62, 0xf1, 0xd7, 0x1f, 0xc2, 0xec, 0x02, }, 7, 0, "", "",
+"62 f1 d7 1f c2 ec 02 \tvcmplesd {sae},%xmm4,%xmm5,%k5{%k7}",},
+{{0x62, 0xf3, 0x5d, 0x0f, 0x27, 0xac, 0xc8, 0x23, 0x01, 0x00, 0x00, 0x5b, }, 12, 0, "", "",
+"62 f3 5d 0f 27 ac c8 23 01 00 00 5b \tvgetmantss $0x5b,0x123(%eax,%ecx,8),%xmm4,%xmm5{%k7}",},
+{{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"f3 0f 1b 00 \tbndmk (%eax),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1b 05 78 56 34 12 \tbndmk 0x12345678,%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x18, }, 4, 0, "", "",
+"f3 0f 1b 18 \tbndmk (%eax),%bnd3",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "",
+"f3 0f 1b 04 01 \tbndmk (%ecx,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 04 05 78 56 34 12 \tbndmk 0x12345678(,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "",
+"f3 0f 1b 04 08 \tbndmk (%eax,%ecx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "",
+"f3 0f 1b 04 c8 \tbndmk (%eax,%ecx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "",
+"f3 0f 1b 40 12 \tbndmk 0x12(%eax),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "",
+"f3 0f 1b 45 12 \tbndmk 0x12(%ebp),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 01 12 \tbndmk 0x12(%ecx,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 05 12 \tbndmk 0x12(%ebp,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 08 12 \tbndmk 0x12(%eax,%ecx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 c8 12 \tbndmk 0x12(%eax,%ecx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1b 80 78 56 34 12 \tbndmk 0x12345678(%eax),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1b 85 78 56 34 12 \tbndmk 0x12345678(%ebp),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 01 78 56 34 12 \tbndmk 0x12345678(%ecx,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 05 78 56 34 12 \tbndmk 0x12345678(%ebp,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 08 78 56 34 12 \tbndmk 0x12345678(%eax,%ecx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 c8 78 56 34 12 \tbndmk 0x12345678(%eax,%ecx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"f3 0f 1a 00 \tbndcl (%eax),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1a 05 78 56 34 12 \tbndcl 0x12345678,%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x18, }, 4, 0, "", "",
+"f3 0f 1a 18 \tbndcl (%eax),%bnd3",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "",
+"f3 0f 1a 04 01 \tbndcl (%ecx,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 04 05 78 56 34 12 \tbndcl 0x12345678(,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "",
+"f3 0f 1a 04 08 \tbndcl (%eax,%ecx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "",
+"f3 0f 1a 04 c8 \tbndcl (%eax,%ecx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "",
+"f3 0f 1a 40 12 \tbndcl 0x12(%eax),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "",
+"f3 0f 1a 45 12 \tbndcl 0x12(%ebp),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 01 12 \tbndcl 0x12(%ecx,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 05 12 \tbndcl 0x12(%ebp,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 08 12 \tbndcl 0x12(%eax,%ecx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 c8 12 \tbndcl 0x12(%eax,%ecx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1a 80 78 56 34 12 \tbndcl 0x12345678(%eax),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1a 85 78 56 34 12 \tbndcl 0x12345678(%ebp),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 01 78 56 34 12 \tbndcl 0x12345678(%ecx,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 05 78 56 34 12 \tbndcl 0x12345678(%ebp,%eax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 08 78 56 34 12 \tbndcl 0x12345678(%eax,%ecx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 c8 78 56 34 12 \tbndcl 0x12345678(%eax,%ecx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "",
+"f3 0f 1a c0 \tbndcl %eax,%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"f2 0f 1a 00 \tbndcu (%eax),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1a 05 78 56 34 12 \tbndcu 0x12345678,%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x18, }, 4, 0, "", "",
+"f2 0f 1a 18 \tbndcu (%eax),%bnd3",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "",
+"f2 0f 1a 04 01 \tbndcu (%ecx,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 04 05 78 56 34 12 \tbndcu 0x12345678(,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "",
+"f2 0f 1a 04 08 \tbndcu (%eax,%ecx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "",
+"f2 0f 1a 04 c8 \tbndcu (%eax,%ecx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "",
+"f2 0f 1a 40 12 \tbndcu 0x12(%eax),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "",
+"f2 0f 1a 45 12 \tbndcu 0x12(%ebp),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 01 12 \tbndcu 0x12(%ecx,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 05 12 \tbndcu 0x12(%ebp,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 08 12 \tbndcu 0x12(%eax,%ecx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 c8 12 \tbndcu 0x12(%eax,%ecx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1a 80 78 56 34 12 \tbndcu 0x12345678(%eax),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1a 85 78 56 34 12 \tbndcu 0x12345678(%ebp),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 01 78 56 34 12 \tbndcu 0x12345678(%ecx,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 05 78 56 34 12 \tbndcu 0x12345678(%ebp,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 08 78 56 34 12 \tbndcu 0x12345678(%eax,%ecx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 c8 78 56 34 12 \tbndcu 0x12345678(%eax,%ecx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "",
+"f2 0f 1a c0 \tbndcu %eax,%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"f2 0f 1b 00 \tbndcn (%eax),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1b 05 78 56 34 12 \tbndcn 0x12345678,%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x18, }, 4, 0, "", "",
+"f2 0f 1b 18 \tbndcn (%eax),%bnd3",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "",
+"f2 0f 1b 04 01 \tbndcn (%ecx,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 04 05 78 56 34 12 \tbndcn 0x12345678(,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "",
+"f2 0f 1b 04 08 \tbndcn (%eax,%ecx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "",
+"f2 0f 1b 04 c8 \tbndcn (%eax,%ecx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "",
+"f2 0f 1b 40 12 \tbndcn 0x12(%eax),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "",
+"f2 0f 1b 45 12 \tbndcn 0x12(%ebp),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 01 12 \tbndcn 0x12(%ecx,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 05 12 \tbndcn 0x12(%ebp,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 08 12 \tbndcn 0x12(%eax,%ecx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 c8 12 \tbndcn 0x12(%eax,%ecx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1b 80 78 56 34 12 \tbndcn 0x12345678(%eax),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1b 85 78 56 34 12 \tbndcn 0x12345678(%ebp),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 01 78 56 34 12 \tbndcn 0x12345678(%ecx,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 05 78 56 34 12 \tbndcn 0x12345678(%ebp,%eax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 08 78 56 34 12 \tbndcn 0x12345678(%eax,%ecx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 c8 78 56 34 12 \tbndcn 0x12345678(%eax,%ecx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0xc0, }, 4, 0, "", "",
+"f2 0f 1b c0 \tbndcn %eax,%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"66 0f 1a 00 \tbndmov (%eax),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1a 05 78 56 34 12 \tbndmov 0x12345678,%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x18, }, 4, 0, "", "",
+"66 0f 1a 18 \tbndmov (%eax),%bnd3",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "",
+"66 0f 1a 04 01 \tbndmov (%ecx,%eax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 04 05 78 56 34 12 \tbndmov 0x12345678(,%eax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "",
+"66 0f 1a 04 08 \tbndmov (%eax,%ecx,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "",
+"66 0f 1a 04 c8 \tbndmov (%eax,%ecx,8),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "",
+"66 0f 1a 40 12 \tbndmov 0x12(%eax),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "",
+"66 0f 1a 45 12 \tbndmov 0x12(%ebp),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 01 12 \tbndmov 0x12(%ecx,%eax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 05 12 \tbndmov 0x12(%ebp,%eax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 08 12 \tbndmov 0x12(%eax,%ecx,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 c8 12 \tbndmov 0x12(%eax,%ecx,8),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1a 80 78 56 34 12 \tbndmov 0x12345678(%eax),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1a 85 78 56 34 12 \tbndmov 0x12345678(%ebp),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 01 78 56 34 12 \tbndmov 0x12345678(%ecx,%eax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 05 78 56 34 12 \tbndmov 0x12345678(%ebp,%eax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 08 78 56 34 12 \tbndmov 0x12345678(%eax,%ecx,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 c8 78 56 34 12 \tbndmov 0x12345678(%eax,%ecx,8),%bnd0",},
+{{0x66, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"66 0f 1b 00 \tbndmov %bnd0,(%eax)",},
+{{0x66, 0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1b 05 78 56 34 12 \tbndmov %bnd0,0x12345678",},
+{{0x66, 0x0f, 0x1b, 0x18, }, 4, 0, "", "",
+"66 0f 1b 18 \tbndmov %bnd3,(%eax)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "",
+"66 0f 1b 04 01 \tbndmov %bnd0,(%ecx,%eax,1)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 04 05 78 56 34 12 \tbndmov %bnd0,0x12345678(,%eax,1)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "",
+"66 0f 1b 04 08 \tbndmov %bnd0,(%eax,%ecx,1)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "",
+"66 0f 1b 04 c8 \tbndmov %bnd0,(%eax,%ecx,8)",},
+{{0x66, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "",
+"66 0f 1b 40 12 \tbndmov %bnd0,0x12(%eax)",},
+{{0x66, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "",
+"66 0f 1b 45 12 \tbndmov %bnd0,0x12(%ebp)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 01 12 \tbndmov %bnd0,0x12(%ecx,%eax,1)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 05 12 \tbndmov %bnd0,0x12(%ebp,%eax,1)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 08 12 \tbndmov %bnd0,0x12(%eax,%ecx,1)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 c8 12 \tbndmov %bnd0,0x12(%eax,%ecx,8)",},
+{{0x66, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1b 80 78 56 34 12 \tbndmov %bnd0,0x12345678(%eax)",},
+{{0x66, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1b 85 78 56 34 12 \tbndmov %bnd0,0x12345678(%ebp)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 01 78 56 34 12 \tbndmov %bnd0,0x12345678(%ecx,%eax,1)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 05 78 56 34 12 \tbndmov %bnd0,0x12345678(%ebp,%eax,1)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 08 78 56 34 12 \tbndmov %bnd0,0x12345678(%eax,%ecx,1)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 c8 78 56 34 12 \tbndmov %bnd0,0x12345678(%eax,%ecx,8)",},
+{{0x66, 0x0f, 0x1a, 0xc8, }, 4, 0, "", "",
+"66 0f 1a c8 \tbndmov %bnd0,%bnd1",},
+{{0x66, 0x0f, 0x1a, 0xc1, }, 4, 0, "", "",
+"66 0f 1a c1 \tbndmov %bnd1,%bnd0",},
+{{0x0f, 0x1a, 0x00, }, 3, 0, "", "",
+"0f 1a 00 \tbndldx (%eax),%bnd0",},
+{{0x0f, 0x1a, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1a 05 78 56 34 12 \tbndldx 0x12345678,%bnd0",},
+{{0x0f, 0x1a, 0x18, }, 3, 0, "", "",
+"0f 1a 18 \tbndldx (%eax),%bnd3",},
+{{0x0f, 0x1a, 0x04, 0x01, }, 4, 0, "", "",
+"0f 1a 04 01 \tbndldx (%ecx,%eax,1),%bnd0",},
+{{0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 04 05 78 56 34 12 \tbndldx 0x12345678(,%eax,1),%bnd0",},
+{{0x0f, 0x1a, 0x04, 0x08, }, 4, 0, "", "",
+"0f 1a 04 08 \tbndldx (%eax,%ecx,1),%bnd0",},
+{{0x0f, 0x1a, 0x40, 0x12, }, 4, 0, "", "",
+"0f 1a 40 12 \tbndldx 0x12(%eax),%bnd0",},
+{{0x0f, 0x1a, 0x45, 0x12, }, 4, 0, "", "",
+"0f 1a 45 12 \tbndldx 0x12(%ebp),%bnd0",},
+{{0x0f, 0x1a, 0x44, 0x01, 0x12, }, 5, 0, "", "",
+"0f 1a 44 01 12 \tbndldx 0x12(%ecx,%eax,1),%bnd0",},
+{{0x0f, 0x1a, 0x44, 0x05, 0x12, }, 5, 0, "", "",
+"0f 1a 44 05 12 \tbndldx 0x12(%ebp,%eax,1),%bnd0",},
+{{0x0f, 0x1a, 0x44, 0x08, 0x12, }, 5, 0, "", "",
+"0f 1a 44 08 12 \tbndldx 0x12(%eax,%ecx,1),%bnd0",},
+{{0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1a 80 78 56 34 12 \tbndldx 0x12345678(%eax),%bnd0",},
+{{0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1a 85 78 56 34 12 \tbndldx 0x12345678(%ebp),%bnd0",},
+{{0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 84 01 78 56 34 12 \tbndldx 0x12345678(%ecx,%eax,1),%bnd0",},
+{{0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 84 05 78 56 34 12 \tbndldx 0x12345678(%ebp,%eax,1),%bnd0",},
+{{0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 84 08 78 56 34 12 \tbndldx 0x12345678(%eax,%ecx,1),%bnd0",},
+{{0x0f, 0x1b, 0x00, }, 3, 0, "", "",
+"0f 1b 00 \tbndstx %bnd0,(%eax)",},
+{{0x0f, 0x1b, 0x05, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1b 05 78 56 34 12 \tbndstx %bnd0,0x12345678",},
+{{0x0f, 0x1b, 0x18, }, 3, 0, "", "",
+"0f 1b 18 \tbndstx %bnd3,(%eax)",},
+{{0x0f, 0x1b, 0x04, 0x01, }, 4, 0, "", "",
+"0f 1b 04 01 \tbndstx %bnd0,(%ecx,%eax,1)",},
+{{0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 04 05 78 56 34 12 \tbndstx %bnd0,0x12345678(,%eax,1)",},
+{{0x0f, 0x1b, 0x04, 0x08, }, 4, 0, "", "",
+"0f 1b 04 08 \tbndstx %bnd0,(%eax,%ecx,1)",},
+{{0x0f, 0x1b, 0x40, 0x12, }, 4, 0, "", "",
+"0f 1b 40 12 \tbndstx %bnd0,0x12(%eax)",},
+{{0x0f, 0x1b, 0x45, 0x12, }, 4, 0, "", "",
+"0f 1b 45 12 \tbndstx %bnd0,0x12(%ebp)",},
+{{0x0f, 0x1b, 0x44, 0x01, 0x12, }, 5, 0, "", "",
+"0f 1b 44 01 12 \tbndstx %bnd0,0x12(%ecx,%eax,1)",},
+{{0x0f, 0x1b, 0x44, 0x05, 0x12, }, 5, 0, "", "",
+"0f 1b 44 05 12 \tbndstx %bnd0,0x12(%ebp,%eax,1)",},
+{{0x0f, 0x1b, 0x44, 0x08, 0x12, }, 5, 0, "", "",
+"0f 1b 44 08 12 \tbndstx %bnd0,0x12(%eax,%ecx,1)",},
+{{0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1b 80 78 56 34 12 \tbndstx %bnd0,0x12345678(%eax)",},
+{{0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1b 85 78 56 34 12 \tbndstx %bnd0,0x12345678(%ebp)",},
+{{0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 84 01 78 56 34 12 \tbndstx %bnd0,0x12345678(%ecx,%eax,1)",},
+{{0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 84 05 78 56 34 12 \tbndstx %bnd0,0x12345678(%ebp,%eax,1)",},
+{{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%eax,%ecx,1)",},
+{{0xf2, 0xe8, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "call", "unconditional",
+"f2 e8 fc ff ff ff \tbnd call fce <main+0xfce>",},
+{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"f2 ff 10 \tbnd call *(%eax)",},
+{{0xf2, 0xc3, }, 2, 0, "ret", "indirect",
+"f2 c3 \tbnd ret ",},
+{{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional",
+"f2 e9 fc ff ff ff \tbnd jmp fd9 <main+0xfd9>",},
+{{0xf2, 0xe9, 0xfc, 0xff, 0xff, 0xff, }, 6, 0xfffffffc, "jmp", "unconditional",
+"f2 e9 fc ff ff ff \tbnd jmp fdf <main+0xfdf>",},
+{{0xf2, 0xff, 0x21, }, 3, 0, "jmp", "indirect",
+"f2 ff 21 \tbnd jmp *(%ecx)",},
+{{0xf2, 0x0f, 0x85, 0xfc, 0xff, 0xff, 0xff, }, 7, 0xfffffffc, "jcc", "conditional",
+"f2 0f 85 fc ff ff ff \tbnd jne fe9 <main+0xfe9>",},
+{{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "",
+"0f 3a cc c1 00 \tsha1rnds4 $0x0,%xmm1,%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "",
+"0f 3a cc d7 91 \tsha1rnds4 $0x91,%xmm7,%xmm2",},
+{{0x0f, 0x3a, 0xcc, 0x00, 0x91, }, 5, 0, "", "",
+"0f 3a cc 00 91 \tsha1rnds4 $0x91,(%eax),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "",
+"0f 3a cc 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678,%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x18, 0x91, }, 5, 0, "", "",
+"0f 3a cc 18 91 \tsha1rnds4 $0x91,(%eax),%xmm3",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x01, 0x91, }, 6, 0, "", "",
+"0f 3a cc 04 01 91 \tsha1rnds4 $0x91,(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 04 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(,%eax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x08, 0x91, }, 6, 0, "", "",
+"0f 3a cc 04 08 91 \tsha1rnds4 $0x91,(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0xc8, 0x91, }, 6, 0, "", "",
+"0f 3a cc 04 c8 91 \tsha1rnds4 $0x91,(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x40, 0x12, 0x91, }, 6, 0, "", "",
+"0f 3a cc 40 12 91 \tsha1rnds4 $0x91,0x12(%eax),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x45, 0x12, 0x91, }, 6, 0, "", "",
+"0f 3a cc 45 12 91 \tsha1rnds4 $0x91,0x12(%ebp),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0x01, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 01 12 91 \tsha1rnds4 $0x91,0x12(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0x05, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 05 12 91 \tsha1rnds4 $0x91,0x12(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0x08, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 08 12 91 \tsha1rnds4 $0x91,0x12(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0xc8, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 c8 12 91 \tsha1rnds4 $0x91,0x12(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "",
+"0f 3a cc 80 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%eax),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "",
+"0f 3a cc 85 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%ebp),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 01 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 08 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 c8 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0xc1, }, 4, 0, "", "",
+"0f 38 c8 c1 \tsha1nexte %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xc8, 0xd7, }, 4, 0, "", "",
+"0f 38 c8 d7 \tsha1nexte %xmm7,%xmm2",},
+{{0x0f, 0x38, 0xc8, 0x00, }, 4, 0, "", "",
+"0f 38 c8 00 \tsha1nexte (%eax),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c8 05 78 56 34 12 \tsha1nexte 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x18, }, 4, 0, "", "",
+"0f 38 c8 18 \tsha1nexte (%eax),%xmm3",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 c8 04 01 \tsha1nexte (%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 04 05 78 56 34 12 \tsha1nexte 0x12345678(,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 c8 04 08 \tsha1nexte (%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 c8 04 c8 \tsha1nexte (%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 c8 40 12 \tsha1nexte 0x12(%eax),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 c8 45 12 \tsha1nexte 0x12(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 01 12 \tsha1nexte 0x12(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 05 12 \tsha1nexte 0x12(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 08 12 \tsha1nexte 0x12(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 c8 12 \tsha1nexte 0x12(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c8 80 78 56 34 12 \tsha1nexte 0x12345678(%eax),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c8 85 78 56 34 12 \tsha1nexte 0x12345678(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 01 78 56 34 12 \tsha1nexte 0x12345678(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 05 78 56 34 12 \tsha1nexte 0x12345678(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 08 78 56 34 12 \tsha1nexte 0x12345678(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 c8 78 56 34 12 \tsha1nexte 0x12345678(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0xc1, }, 4, 0, "", "",
+"0f 38 c9 c1 \tsha1msg1 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xc9, 0xd7, }, 4, 0, "", "",
+"0f 38 c9 d7 \tsha1msg1 %xmm7,%xmm2",},
+{{0x0f, 0x38, 0xc9, 0x00, }, 4, 0, "", "",
+"0f 38 c9 00 \tsha1msg1 (%eax),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c9 05 78 56 34 12 \tsha1msg1 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x18, }, 4, 0, "", "",
+"0f 38 c9 18 \tsha1msg1 (%eax),%xmm3",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 c9 04 01 \tsha1msg1 (%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 04 05 78 56 34 12 \tsha1msg1 0x12345678(,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 c9 04 08 \tsha1msg1 (%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 c9 04 c8 \tsha1msg1 (%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 c9 40 12 \tsha1msg1 0x12(%eax),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 c9 45 12 \tsha1msg1 0x12(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 01 12 \tsha1msg1 0x12(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 05 12 \tsha1msg1 0x12(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 08 12 \tsha1msg1 0x12(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 c8 12 \tsha1msg1 0x12(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c9 80 78 56 34 12 \tsha1msg1 0x12345678(%eax),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c9 85 78 56 34 12 \tsha1msg1 0x12345678(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 01 78 56 34 12 \tsha1msg1 0x12345678(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 05 78 56 34 12 \tsha1msg1 0x12345678(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 08 78 56 34 12 \tsha1msg1 0x12345678(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 c8 78 56 34 12 \tsha1msg1 0x12345678(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xca, 0xc1, }, 4, 0, "", "",
+"0f 38 ca c1 \tsha1msg2 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xca, 0xd7, }, 4, 0, "", "",
+"0f 38 ca d7 \tsha1msg2 %xmm7,%xmm2",},
+{{0x0f, 0x38, 0xca, 0x00, }, 4, 0, "", "",
+"0f 38 ca 00 \tsha1msg2 (%eax),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 ca 05 78 56 34 12 \tsha1msg2 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xca, 0x18, }, 4, 0, "", "",
+"0f 38 ca 18 \tsha1msg2 (%eax),%xmm3",},
+{{0x0f, 0x38, 0xca, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 ca 04 01 \tsha1msg2 (%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 04 05 78 56 34 12 \tsha1msg2 0x12345678(,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 ca 04 08 \tsha1msg2 (%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 ca 04 c8 \tsha1msg2 (%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 ca 40 12 \tsha1msg2 0x12(%eax),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 ca 45 12 \tsha1msg2 0x12(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 01 12 \tsha1msg2 0x12(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 05 12 \tsha1msg2 0x12(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 08 12 \tsha1msg2 0x12(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 c8 12 \tsha1msg2 0x12(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 ca 80 78 56 34 12 \tsha1msg2 0x12345678(%eax),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 ca 85 78 56 34 12 \tsha1msg2 0x12345678(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 01 78 56 34 12 \tsha1msg2 0x12345678(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 05 78 56 34 12 \tsha1msg2 0x12345678(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 08 78 56 34 12 \tsha1msg2 0x12345678(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 c8 78 56 34 12 \tsha1msg2 0x12345678(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xcb, 0xcc, }, 4, 0, "", "",
+"0f 38 cb cc \tsha256rnds2 %xmm0,%xmm4,%xmm1",},
+{{0x0f, 0x38, 0xcb, 0xd7, }, 4, 0, "", "",
+"0f 38 cb d7 \tsha256rnds2 %xmm0,%xmm7,%xmm2",},
+{{0x0f, 0x38, 0xcb, 0x08, }, 4, 0, "", "",
+"0f 38 cb 08 \tsha256rnds2 %xmm0,(%eax),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cb 0d 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678,%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x18, }, 4, 0, "", "",
+"0f 38 cb 18 \tsha256rnds2 %xmm0,(%eax),%xmm3",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x01, }, 5, 0, "", "",
+"0f 38 cb 0c 01 \tsha256rnds2 %xmm0,(%ecx,%eax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 0c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(,%eax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x08, }, 5, 0, "", "",
+"0f 38 cb 0c 08 \tsha256rnds2 %xmm0,(%eax,%ecx,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0xc8, }, 5, 0, "", "",
+"0f 38 cb 0c c8 \tsha256rnds2 %xmm0,(%eax,%ecx,8),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x48, 0x12, }, 5, 0, "", "",
+"0f 38 cb 48 12 \tsha256rnds2 %xmm0,0x12(%eax),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4d, 0x12, }, 5, 0, "", "",
+"0f 38 cb 4d 12 \tsha256rnds2 %xmm0,0x12(%ebp),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c 01 12 \tsha256rnds2 %xmm0,0x12(%ecx,%eax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c 05 12 \tsha256rnds2 %xmm0,0x12(%ebp,%eax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c 08 12 \tsha256rnds2 %xmm0,0x12(%eax,%ecx,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c c8 12 \tsha256rnds2 %xmm0,0x12(%eax,%ecx,8),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cb 88 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%eax),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cb 8d 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%ebp),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c 01 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%ecx,%eax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%ebp,%eax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c 08 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%eax,%ecx,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c c8 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%eax,%ecx,8),%xmm1",},
+{{0x0f, 0x38, 0xcc, 0xc1, }, 4, 0, "", "",
+"0f 38 cc c1 \tsha256msg1 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xcc, 0xd7, }, 4, 0, "", "",
+"0f 38 cc d7 \tsha256msg1 %xmm7,%xmm2",},
+{{0x0f, 0x38, 0xcc, 0x00, }, 4, 0, "", "",
+"0f 38 cc 00 \tsha256msg1 (%eax),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cc 05 78 56 34 12 \tsha256msg1 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x18, }, 4, 0, "", "",
+"0f 38 cc 18 \tsha256msg1 (%eax),%xmm3",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 cc 04 01 \tsha256msg1 (%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 04 05 78 56 34 12 \tsha256msg1 0x12345678(,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 cc 04 08 \tsha256msg1 (%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 cc 04 c8 \tsha256msg1 (%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 cc 40 12 \tsha256msg1 0x12(%eax),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 cc 45 12 \tsha256msg1 0x12(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 01 12 \tsha256msg1 0x12(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 05 12 \tsha256msg1 0x12(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 08 12 \tsha256msg1 0x12(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 c8 12 \tsha256msg1 0x12(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cc 80 78 56 34 12 \tsha256msg1 0x12345678(%eax),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cc 85 78 56 34 12 \tsha256msg1 0x12345678(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 01 78 56 34 12 \tsha256msg1 0x12345678(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 05 78 56 34 12 \tsha256msg1 0x12345678(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 08 78 56 34 12 \tsha256msg1 0x12345678(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 c8 78 56 34 12 \tsha256msg1 0x12345678(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0xc1, }, 4, 0, "", "",
+"0f 38 cd c1 \tsha256msg2 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xcd, 0xd7, }, 4, 0, "", "",
+"0f 38 cd d7 \tsha256msg2 %xmm7,%xmm2",},
+{{0x0f, 0x38, 0xcd, 0x00, }, 4, 0, "", "",
+"0f 38 cd 00 \tsha256msg2 (%eax),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cd 05 78 56 34 12 \tsha256msg2 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x18, }, 4, 0, "", "",
+"0f 38 cd 18 \tsha256msg2 (%eax),%xmm3",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 cd 04 01 \tsha256msg2 (%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 04 05 78 56 34 12 \tsha256msg2 0x12345678(,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 cd 04 08 \tsha256msg2 (%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 cd 04 c8 \tsha256msg2 (%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 cd 40 12 \tsha256msg2 0x12(%eax),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 cd 45 12 \tsha256msg2 0x12(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 01 12 \tsha256msg2 0x12(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 05 12 \tsha256msg2 0x12(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 08 12 \tsha256msg2 0x12(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 c8 12 \tsha256msg2 0x12(%eax,%ecx,8),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cd 80 78 56 34 12 \tsha256msg2 0x12345678(%eax),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cd 85 78 56 34 12 \tsha256msg2 0x12345678(%ebp),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 01 78 56 34 12 \tsha256msg2 0x12345678(%ecx,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 05 78 56 34 12 \tsha256msg2 0x12345678(%ebp,%eax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 08 78 56 34 12 \tsha256msg2 0x12345678(%eax,%ecx,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 c8 78 56 34 12 \tsha256msg2 0x12345678(%eax,%ecx,8),%xmm0",},
+{{0x66, 0x0f, 0xae, 0x38, }, 4, 0, "", "",
+"66 0f ae 38 \tclflushopt (%eax)",},
+{{0x66, 0x0f, 0xae, 0x3d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f ae 3d 78 56 34 12 \tclflushopt 0x12345678",},
+{{0x66, 0x0f, 0xae, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f ae bc c8 78 56 34 12 \tclflushopt 0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0xae, 0x38, }, 3, 0, "", "",
+"0f ae 38 \tclflush (%eax)",},
+{{0x0f, 0xae, 0xf8, }, 3, 0, "", "",
+"0f ae f8 \tsfence ",},
+{{0x66, 0x0f, 0xae, 0x30, }, 4, 0, "", "",
+"66 0f ae 30 \tclwb (%eax)",},
+{{0x66, 0x0f, 0xae, 0x35, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f ae 35 78 56 34 12 \tclwb 0x12345678",},
+{{0x66, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f ae b4 c8 78 56 34 12 \tclwb 0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0xae, 0x30, }, 3, 0, "", "",
+"0f ae 30 \txsaveopt (%eax)",},
+{{0x0f, 0xae, 0xf0, }, 3, 0, "", "",
+"0f ae f0 \tmfence ",},
+{{0x0f, 0xc7, 0x20, }, 3, 0, "", "",
+"0f c7 20 \txsavec (%eax)",},
+{{0x0f, 0xc7, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f c7 25 78 56 34 12 \txsavec 0x12345678",},
+{{0x0f, 0xc7, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 a4 c8 78 56 34 12 \txsavec 0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0xc7, 0x28, }, 3, 0, "", "",
+"0f c7 28 \txsaves (%eax)",},
+{{0x0f, 0xc7, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f c7 2d 78 56 34 12 \txsaves 0x12345678",},
+{{0x0f, 0xc7, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 ac c8 78 56 34 12 \txsaves 0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0xc7, 0x18, }, 3, 0, "", "",
+"0f c7 18 \txrstors (%eax)",},
+{{0x0f, 0xc7, 0x1d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f c7 1d 78 56 34 12 \txrstors 0x12345678",},
+{{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20 \tptwritel (%eax)",},
+{{0xf3, 0x0f, 0xae, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f ae 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20 \tptwritel (%eax)",},
+{{0xf3, 0x0f, 0xae, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f ae 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%eax,%ecx,8)",},
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
new file mode 100644
index 000000000..c57f34603
--- /dev/null
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -0,0 +1,1729 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generated by gen-insn-x86-dat.sh and gen-insn-x86-dat.awk
+ * from insn-x86-dat-src.c for inclusion by insn-x86.c
+ * Do not change this code.
+*/
+
+{{0x0f, 0x31, }, 2, 0, "", "",
+"0f 31 \trdtsc ",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb \tvcvtph2ps %xmm3,%ymm5",},
+{{0x48, 0x0f, 0x41, 0xd8, }, 4, 0, "", "",
+"48 0f 41 d8 \tcmovno %rax,%rbx",},
+{{0x48, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"48 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%rax),%rcx",},
+{{0x66, 0x0f, 0x41, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 41 88 78 56 34 12 \tcmovno 0x12345678(%rax),%cx",},
+{{0x48, 0x0f, 0x44, 0xd8, }, 4, 0, "", "",
+"48 0f 44 d8 \tcmove %rax,%rbx",},
+{{0x48, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"48 0f 44 88 78 56 34 12 \tcmove 0x12345678(%rax),%rcx",},
+{{0x66, 0x0f, 0x44, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 44 88 78 56 34 12 \tcmove 0x12345678(%rax),%cx",},
+{{0x0f, 0x90, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 90 80 78 56 34 12 \tseto 0x12345678(%rax)",},
+{{0x0f, 0x91, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 91 80 78 56 34 12 \tsetno 0x12345678(%rax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb 0x12345678(%rax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb 0x12345678(%rax)",},
+{{0x0f, 0x92, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 92 80 78 56 34 12 \tsetb 0x12345678(%rax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae 0x12345678(%rax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae 0x12345678(%rax)",},
+{{0x0f, 0x93, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 93 80 78 56 34 12 \tsetae 0x12345678(%rax)",},
+{{0x0f, 0x98, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 98 80 78 56 34 12 \tsets 0x12345678(%rax)",},
+{{0x0f, 0x99, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 99 80 78 56 34 12 \tsetns 0x12345678(%rax)",},
+{{0xc5, 0xcc, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cc 41 ef \tkandw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 41 ef \tkandq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x41, 0xef, }, 4, 0, "", "",
+"c5 cd 41 ef \tkandb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x41, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 41 ef \tkandd %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cc 42 ef \tkandnw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 42 ef \tkandnq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x42, 0xef, }, 4, 0, "", "",
+"c5 cd 42 ef \tkandnb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x42, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 42 ef \tkandnd %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f8 44 f7 \tknotw %k7,%k6",},
+{{0xc4, 0xe1, 0xf8, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f8 44 f7 \tknotq %k7,%k6",},
+{{0xc5, 0xf9, 0x44, 0xf7, }, 4, 0, "", "",
+"c5 f9 44 f7 \tknotb %k7,%k6",},
+{{0xc4, 0xe1, 0xf9, 0x44, 0xf7, }, 5, 0, "", "",
+"c4 e1 f9 44 f7 \tknotd %k7,%k6",},
+{{0xc5, 0xcc, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cc 45 ef \tkorw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 45 ef \tkorq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x45, 0xef, }, 4, 0, "", "",
+"c5 cd 45 ef \tkorb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x45, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 45 ef \tkord %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cc 46 ef \tkxnorw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 46 ef \tkxnorq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x46, 0xef, }, 4, 0, "", "",
+"c5 cd 46 ef \tkxnorb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x46, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 46 ef \tkxnord %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cc 47 ef \tkxorw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 47 ef \tkxorq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x47, 0xef, }, 4, 0, "", "",
+"c5 cd 47 ef \tkxorb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x47, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 47 ef \tkxord %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cc 4a ef \tkaddw %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4a ef \tkaddq %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4a, 0xef, }, 4, 0, "", "",
+"c5 cd 4a ef \tkaddb %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcd, 0x4a, 0xef, }, 5, 0, "", "",
+"c4 e1 cd 4a ef \tkaddd %k7,%k6,%k5",},
+{{0xc5, 0xcd, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cd 4b ef \tkunpckbw %k7,%k6,%k5",},
+{{0xc5, 0xcc, 0x4b, 0xef, }, 4, 0, "", "",
+"c5 cc 4b ef \tkunpckwd %k7,%k6,%k5",},
+{{0xc4, 0xe1, 0xcc, 0x4b, 0xef, }, 5, 0, "", "",
+"c4 e1 cc 4b ef \tkunpckdq %k7,%k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f8 90 ee \tkmovw %k6,%k5",},
+{{0xc5, 0xf8, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f8 90 29 \tkmovw (%rcx),%k5",},
+{{0xc4, 0xa1, 0x78, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 78 90 ac f0 23 01 00 00 \tkmovw 0x123(%rax,%r14,8),%k5",},
+{{0xc5, 0xf8, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f8 91 29 \tkmovw %k5,(%rcx)",},
+{{0xc4, 0xa1, 0x78, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 78 91 ac f0 23 01 00 00 \tkmovw %k5,0x123(%rax,%r14,8)",},
+{{0xc5, 0xf8, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f8 92 e8 \tkmovw %eax,%k5",},
+{{0xc5, 0xf8, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f8 92 ed \tkmovw %ebp,%k5",},
+{{0xc4, 0xc1, 0x78, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 78 92 ed \tkmovw %r13d,%k5",},
+{{0xc5, 0xf8, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f8 93 c5 \tkmovw %k5,%eax",},
+{{0xc5, 0xf8, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f8 93 ed \tkmovw %k5,%ebp",},
+{{0xc5, 0x78, 0x93, 0xed, }, 4, 0, "", "",
+"c5 78 93 ed \tkmovw %k5,%r13d",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 90 ee \tkmovq %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 90 29 \tkmovq (%rcx),%k5",},
+{{0xc4, 0xa1, 0xf8, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f8 90 ac f0 23 01 00 00 \tkmovq 0x123(%rax,%r14,8),%k5",},
+{{0xc4, 0xe1, 0xf8, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f8 91 29 \tkmovq %k5,(%rcx)",},
+{{0xc4, 0xa1, 0xf8, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f8 91 ac f0 23 01 00 00 \tkmovq %k5,0x123(%rax,%r14,8)",},
+{{0xc4, 0xe1, 0xfb, 0x92, 0xe8, }, 5, 0, "", "",
+"c4 e1 fb 92 e8 \tkmovq %rax,%k5",},
+{{0xc4, 0xe1, 0xfb, 0x92, 0xed, }, 5, 0, "", "",
+"c4 e1 fb 92 ed \tkmovq %rbp,%k5",},
+{{0xc4, 0xc1, 0xfb, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 fb 92 ed \tkmovq %r13,%k5",},
+{{0xc4, 0xe1, 0xfb, 0x93, 0xc5, }, 5, 0, "", "",
+"c4 e1 fb 93 c5 \tkmovq %k5,%rax",},
+{{0xc4, 0xe1, 0xfb, 0x93, 0xed, }, 5, 0, "", "",
+"c4 e1 fb 93 ed \tkmovq %k5,%rbp",},
+{{0xc4, 0x61, 0xfb, 0x93, 0xed, }, 5, 0, "", "",
+"c4 61 fb 93 ed \tkmovq %k5,%r13",},
+{{0xc5, 0xf9, 0x90, 0xee, }, 4, 0, "", "",
+"c5 f9 90 ee \tkmovb %k6,%k5",},
+{{0xc5, 0xf9, 0x90, 0x29, }, 4, 0, "", "",
+"c5 f9 90 29 \tkmovb (%rcx),%k5",},
+{{0xc4, 0xa1, 0x79, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 79 90 ac f0 23 01 00 00 \tkmovb 0x123(%rax,%r14,8),%k5",},
+{{0xc5, 0xf9, 0x91, 0x29, }, 4, 0, "", "",
+"c5 f9 91 29 \tkmovb %k5,(%rcx)",},
+{{0xc4, 0xa1, 0x79, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 79 91 ac f0 23 01 00 00 \tkmovb %k5,0x123(%rax,%r14,8)",},
+{{0xc5, 0xf9, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 f9 92 e8 \tkmovb %eax,%k5",},
+{{0xc5, 0xf9, 0x92, 0xed, }, 4, 0, "", "",
+"c5 f9 92 ed \tkmovb %ebp,%k5",},
+{{0xc4, 0xc1, 0x79, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 79 92 ed \tkmovb %r13d,%k5",},
+{{0xc5, 0xf9, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 f9 93 c5 \tkmovb %k5,%eax",},
+{{0xc5, 0xf9, 0x93, 0xed, }, 4, 0, "", "",
+"c5 f9 93 ed \tkmovb %k5,%ebp",},
+{{0xc5, 0x79, 0x93, 0xed, }, 4, 0, "", "",
+"c5 79 93 ed \tkmovb %k5,%r13d",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 90 ee \tkmovd %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x90, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 90 29 \tkmovd (%rcx),%k5",},
+{{0xc4, 0xa1, 0xf9, 0x90, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f9 90 ac f0 23 01 00 00 \tkmovd 0x123(%rax,%r14,8),%k5",},
+{{0xc4, 0xe1, 0xf9, 0x91, 0x29, }, 5, 0, "", "",
+"c4 e1 f9 91 29 \tkmovd %k5,(%rcx)",},
+{{0xc4, 0xa1, 0xf9, 0x91, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 10, 0, "", "",
+"c4 a1 f9 91 ac f0 23 01 00 00 \tkmovd %k5,0x123(%rax,%r14,8)",},
+{{0xc5, 0xfb, 0x92, 0xe8, }, 4, 0, "", "",
+"c5 fb 92 e8 \tkmovd %eax,%k5",},
+{{0xc5, 0xfb, 0x92, 0xed, }, 4, 0, "", "",
+"c5 fb 92 ed \tkmovd %ebp,%k5",},
+{{0xc4, 0xc1, 0x7b, 0x92, 0xed, }, 5, 0, "", "",
+"c4 c1 7b 92 ed \tkmovd %r13d,%k5",},
+{{0xc5, 0xfb, 0x93, 0xc5, }, 4, 0, "", "",
+"c5 fb 93 c5 \tkmovd %k5,%eax",},
+{{0xc5, 0xfb, 0x93, 0xed, }, 4, 0, "", "",
+"c5 fb 93 ed \tkmovd %k5,%ebp",},
+{{0xc5, 0x7b, 0x93, 0xed, }, 4, 0, "", "",
+"c5 7b 93 ed \tkmovd %k5,%r13d",},
+{{0xc5, 0xf8, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f8 98 ee \tkortestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 98 ee \tkortestq %k6,%k5",},
+{{0xc5, 0xf9, 0x98, 0xee, }, 4, 0, "", "",
+"c5 f9 98 ee \tkortestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x98, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 98 ee \tkortestd %k6,%k5",},
+{{0xc5, 0xf8, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f8 99 ee \tktestw %k6,%k5",},
+{{0xc4, 0xe1, 0xf8, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f8 99 ee \tktestq %k6,%k5",},
+{{0xc5, 0xf9, 0x99, 0xee, }, 4, 0, "", "",
+"c5 f9 99 ee \tktestb %k6,%k5",},
+{{0xc4, 0xe1, 0xf9, 0x99, 0xee, }, 5, 0, "", "",
+"c4 e1 f9 99 ee \tktestd %k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x30, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 30 ee 12 \tkshiftrw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x31, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 31 ee 5b \tkshiftrq $0x5b,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x32, 0xee, 0x12, }, 6, 0, "", "",
+"c4 e3 f9 32 ee 12 \tkshiftlw $0x12,%k6,%k5",},
+{{0xc4, 0xe3, 0xf9, 0x33, 0xee, 0x5b, }, 6, 0, "", "",
+"c4 e3 f9 33 ee 5b \tkshiftlq $0x5b,%k6,%k5",},
+{{0xc5, 0xf8, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f8 5b f5 \tvcvtdq2ps %xmm5,%xmm6",},
+{{0x62, 0x91, 0xfc, 0x4f, 0x5b, 0xf5, }, 6, 0, "", "",
+"62 91 fc 4f 5b f5 \tvcvtqq2ps %zmm29,%ymm6{%k7}",},
+{{0xc5, 0xf9, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 f9 5b f5 \tvcvtps2dq %xmm5,%xmm6",},
+{{0xc5, 0xfa, 0x5b, 0xf5, }, 4, 0, "", "",
+"c5 fa 5b f5 \tvcvttps2dq %xmm5,%xmm6",},
+{{0x0f, 0x6f, 0xe0, }, 3, 0, "", "",
+"0f 6f e0 \tmovq %mm0,%mm4",},
+{{0xc5, 0xfd, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fd 6f f4 \tvmovdqa %ymm4,%ymm6",},
+{{0x62, 0x01, 0x7d, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 7d 48 6f d1 \tvmovdqa32 %zmm25,%zmm26",},
+{{0x62, 0x01, 0xfd, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 fd 48 6f d1 \tvmovdqa64 %zmm25,%zmm26",},
+{{0xc5, 0xfe, 0x6f, 0xf4, }, 4, 0, "", "",
+"c5 fe 6f f4 \tvmovdqu %ymm4,%ymm6",},
+{{0x62, 0x01, 0x7e, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 01 7e 48 6f f5 \tvmovdqu32 %zmm29,%zmm30",},
+{{0x62, 0x01, 0xfe, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 fe 48 6f d1 \tvmovdqu64 %zmm25,%zmm26",},
+{{0x62, 0x01, 0x7f, 0x48, 0x6f, 0xf5, }, 6, 0, "", "",
+"62 01 7f 48 6f f5 \tvmovdqu8 %zmm29,%zmm30",},
+{{0x62, 0x01, 0xff, 0x48, 0x6f, 0xd1, }, 6, 0, "", "",
+"62 01 ff 48 6f d1 \tvmovdqu16 %zmm25,%zmm26",},
+{{0x0f, 0x78, 0xc3, }, 3, 0, "", "",
+"0f 78 c3 \tvmread %rax,%rbx",},
+{{0x62, 0x01, 0x7c, 0x48, 0x78, 0xd1, }, 6, 0, "", "",
+"62 01 7c 48 78 d1 \tvcvttps2udq %zmm25,%zmm26",},
+{{0x62, 0x91, 0xfc, 0x4f, 0x78, 0xf5, }, 6, 0, "", "",
+"62 91 fc 4f 78 f5 \tvcvttpd2udq %zmm29,%ymm6{%k7}",},
+{{0x62, 0xf1, 0xff, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 ff 08 78 c6 \tvcvttsd2usi %xmm6,%rax",},
+{{0x62, 0xf1, 0xfe, 0x08, 0x78, 0xc6, }, 6, 0, "", "",
+"62 f1 fe 08 78 c6 \tvcvttss2usi %xmm6,%rax",},
+{{0x62, 0x61, 0x7d, 0x4f, 0x78, 0xd5, }, 6, 0, "", "",
+"62 61 7d 4f 78 d5 \tvcvttps2uqq %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x78, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 78 f5 \tvcvttpd2uqq %zmm29,%zmm30",},
+{{0x0f, 0x79, 0xd8, }, 3, 0, "", "",
+"0f 79 d8 \tvmwrite %rax,%rbx",},
+{{0x62, 0x01, 0x7c, 0x48, 0x79, 0xd1, }, 6, 0, "", "",
+"62 01 7c 48 79 d1 \tvcvtps2udq %zmm25,%zmm26",},
+{{0x62, 0x91, 0xfc, 0x4f, 0x79, 0xf5, }, 6, 0, "", "",
+"62 91 fc 4f 79 f5 \tvcvtpd2udq %zmm29,%ymm6{%k7}",},
+{{0x62, 0xf1, 0xff, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 ff 08 79 c6 \tvcvtsd2usi %xmm6,%rax",},
+{{0x62, 0xf1, 0xfe, 0x08, 0x79, 0xc6, }, 6, 0, "", "",
+"62 f1 fe 08 79 c6 \tvcvtss2usi %xmm6,%rax",},
+{{0x62, 0x61, 0x7d, 0x4f, 0x79, 0xd5, }, 6, 0, "", "",
+"62 61 7d 4f 79 d5 \tvcvtps2uqq %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x79, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 79 f5 \tvcvtpd2uqq %zmm29,%zmm30",},
+{{0x62, 0x61, 0x7e, 0x4f, 0x7a, 0xed, }, 6, 0, "", "",
+"62 61 7e 4f 7a ed \tvcvtudq2pd %ymm5,%zmm29{%k7}",},
+{{0x62, 0x01, 0xfe, 0x48, 0x7a, 0xd1, }, 6, 0, "", "",
+"62 01 fe 48 7a d1 \tvcvtuqq2pd %zmm25,%zmm26",},
+{{0x62, 0x01, 0x7f, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 01 7f 48 7a f5 \tvcvtudq2ps %zmm29,%zmm30",},
+{{0x62, 0x01, 0xff, 0x4f, 0x7a, 0xd1, }, 6, 0, "", "",
+"62 01 ff 4f 7a d1 \tvcvtuqq2ps %zmm25,%ymm26{%k7}",},
+{{0x62, 0x01, 0x7d, 0x4f, 0x7a, 0xd1, }, 6, 0, "", "",
+"62 01 7d 4f 7a d1 \tvcvttps2qq %ymm25,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x7a, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 7a f5 \tvcvttpd2qq %zmm29,%zmm30",},
+{{0x62, 0xf1, 0x57, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 57 08 7b f0 \tvcvtusi2sd %eax,%xmm5,%xmm6",},
+{{0x62, 0xf1, 0x56, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 f1 56 08 7b f0 \tvcvtusi2ss %eax,%xmm5,%xmm6",},
+{{0x62, 0x61, 0x7d, 0x4f, 0x7b, 0xd5, }, 6, 0, "", "",
+"62 61 7d 4f 7b d5 \tvcvtps2qq %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfd, 0x48, 0x7b, 0xf5, }, 6, 0, "", "",
+"62 01 fd 48 7b f5 \tvcvtpd2qq %zmm29,%zmm30",},
+{{0x0f, 0x7f, 0xc4, }, 3, 0, "", "",
+"0f 7f c4 \tmovq %mm0,%mm4",},
+{{0xc5, 0x7d, 0x7f, 0xc6, }, 4, 0, "", "",
+"c5 7d 7f c6 \tvmovdqa %ymm8,%ymm6",},
+{{0x62, 0x01, 0x7d, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 7d 48 7f ca \tvmovdqa32 %zmm25,%zmm26",},
+{{0x62, 0x01, 0xfd, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 fd 48 7f ca \tvmovdqa64 %zmm25,%zmm26",},
+{{0xc5, 0x7e, 0x7f, 0xc6, }, 4, 0, "", "",
+"c5 7e 7f c6 \tvmovdqu %ymm8,%ymm6",},
+{{0x62, 0x01, 0x7e, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 7e 48 7f ca \tvmovdqu32 %zmm25,%zmm26",},
+{{0x62, 0x01, 0xfe, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 fe 48 7f ca \tvmovdqu64 %zmm25,%zmm26",},
+{{0x62, 0x61, 0x7f, 0x48, 0x7f, 0x31, }, 6, 0, "", "",
+"62 61 7f 48 7f 31 \tvmovdqu8 %zmm30,(%rcx)",},
+{{0x62, 0x01, 0xff, 0x48, 0x7f, 0xca, }, 6, 0, "", "",
+"62 01 ff 48 7f ca \tvmovdqu16 %zmm25,%zmm26",},
+{{0x0f, 0xdb, 0xd1, }, 3, 0, "", "",
+"0f db d1 \tpand %mm1,%mm2",},
+{{0x66, 0x0f, 0xdb, 0xd1, }, 4, 0, "", "",
+"66 0f db d1 \tpand %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdb, 0xd4, }, 4, 0, "", "",
+"c5 cd db d4 \tvpand %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xdb, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 db d0 \tvpandd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xdb, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 db d0 \tvpandq %zmm24,%zmm25,%zmm26",},
+{{0x0f, 0xdf, 0xd1, }, 3, 0, "", "",
+"0f df d1 \tpandn %mm1,%mm2",},
+{{0x66, 0x0f, 0xdf, 0xd1, }, 4, 0, "", "",
+"66 0f df d1 \tpandn %xmm1,%xmm2",},
+{{0xc5, 0xcd, 0xdf, 0xd4, }, 4, 0, "", "",
+"c5 cd df d4 \tvpandn %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xdf, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 df d0 \tvpandnd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xdf, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 df d0 \tvpandnq %zmm24,%zmm25,%zmm26",},
+{{0xc5, 0xf9, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 f9 e6 d1 \tvcvttpd2dq %xmm1,%xmm2",},
+{{0xc5, 0xfa, 0xe6, 0xf5, }, 4, 0, "", "",
+"c5 fa e6 f5 \tvcvtdq2pd %xmm5,%xmm6",},
+{{0x62, 0x61, 0x7e, 0x4f, 0xe6, 0xd5, }, 6, 0, "", "",
+"62 61 7e 4f e6 d5 \tvcvtdq2pd %ymm5,%zmm26{%k7}",},
+{{0x62, 0x01, 0xfe, 0x48, 0xe6, 0xd1, }, 6, 0, "", "",
+"62 01 fe 48 e6 d1 \tvcvtqq2pd %zmm25,%zmm26",},
+{{0xc5, 0xfb, 0xe6, 0xd1, }, 4, 0, "", "",
+"c5 fb e6 d1 \tvcvtpd2dq %xmm1,%xmm2",},
+{{0x0f, 0xeb, 0xf4, }, 3, 0, "", "",
+"0f eb f4 \tpor %mm4,%mm6",},
+{{0xc5, 0xcd, 0xeb, 0xd4, }, 4, 0, "", "",
+"c5 cd eb d4 \tvpor %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xeb, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 eb d0 \tvpord %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xeb, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 eb d0 \tvporq %zmm24,%zmm25,%zmm26",},
+{{0x0f, 0xef, 0xf4, }, 3, 0, "", "",
+"0f ef f4 \tpxor %mm4,%mm6",},
+{{0xc5, 0xcd, 0xef, 0xd4, }, 4, 0, "", "",
+"c5 cd ef d4 \tvpxor %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x01, 0x35, 0x40, 0xef, 0xd0, }, 6, 0, "", "",
+"62 01 35 40 ef d0 \tvpxord %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x01, 0xb5, 0x40, 0xef, 0xd0, }, 6, 0, "", "",
+"62 01 b5 40 ef d0 \tvpxorq %zmm24,%zmm25,%zmm26",},
+{{0x66, 0x0f, 0x38, 0x10, 0xc1, }, 5, 0, "", "",
+"66 0f 38 10 c1 \tpblendvb %xmm0,%xmm1,%xmm0",},
+{{0x62, 0x02, 0x9d, 0x40, 0x10, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 10 eb \tvpsrlvw %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x10, 0xe6, }, 6, 0, "", "",
+"62 62 7e 4f 10 e6 \tvpmovuswb %zmm28,%ymm6{%k7}",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x11, 0xe6, }, 6, 0, "", "",
+"62 62 7e 4f 11 e6 \tvpmovusdb %zmm28,%xmm6{%k7}",},
+{{0x62, 0x02, 0x9d, 0x40, 0x11, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 11 eb \tvpsravw %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x12, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 12 de \tvpmovusqb %zmm27,%xmm6{%k7}",},
+{{0x62, 0x02, 0x9d, 0x40, 0x12, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 12 eb \tvpsllvw %zmm27,%zmm28,%zmm29",},
+{{0xc4, 0xe2, 0x7d, 0x13, 0xeb, }, 5, 0, "", "",
+"c4 e2 7d 13 eb \tvcvtph2ps %xmm3,%ymm5",},
+{{0x62, 0x62, 0x7d, 0x4f, 0x13, 0xdd, }, 6, 0, "", "",
+"62 62 7d 4f 13 dd \tvcvtph2ps %ymm5,%zmm27{%k7}",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x13, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 13 de \tvpmovusdw %zmm27,%ymm6{%k7}",},
+{{0x66, 0x0f, 0x38, 0x14, 0xc1, }, 5, 0, "", "",
+"66 0f 38 14 c1 \tblendvps %xmm0,%xmm1,%xmm0",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x14, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 14 de \tvpmovusqw %zmm27,%xmm6{%k7}",},
+{{0x62, 0x02, 0x1d, 0x40, 0x14, 0xeb, }, 6, 0, "", "",
+"62 02 1d 40 14 eb \tvprorvd %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x02, 0x9d, 0x40, 0x14, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 14 eb \tvprorvq %zmm27,%zmm28,%zmm29",},
+{{0x66, 0x0f, 0x38, 0x15, 0xc1, }, 5, 0, "", "",
+"66 0f 38 15 c1 \tblendvpd %xmm0,%xmm1,%xmm0",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x15, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 15 de \tvpmovusqd %zmm27,%ymm6{%k7}",},
+{{0x62, 0x02, 0x1d, 0x40, 0x15, 0xeb, }, 6, 0, "", "",
+"62 02 1d 40 15 eb \tvprolvd %zmm27,%zmm28,%zmm29",},
+{{0x62, 0x02, 0x9d, 0x40, 0x15, 0xeb, }, 6, 0, "", "",
+"62 02 9d 40 15 eb \tvprolvq %zmm27,%zmm28,%zmm29",},
+{{0xc4, 0xe2, 0x4d, 0x16, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 16 d4 \tvpermps %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x82, 0x2d, 0x27, 0x16, 0xf0, }, 6, 0, "", "",
+"62 82 2d 27 16 f0 \tvpermps %ymm24,%ymm26,%ymm22{%k7}",},
+{{0x62, 0x82, 0xad, 0x27, 0x16, 0xf0, }, 6, 0, "", "",
+"62 82 ad 27 16 f0 \tvpermpd %ymm24,%ymm26,%ymm22{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x19, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 19 f4 \tvbroadcastsd %xmm4,%ymm6",},
+{{0x62, 0x02, 0x7d, 0x48, 0x19, 0xd3, }, 6, 0, "", "",
+"62 02 7d 48 19 d3 \tvbroadcastf32x2 %xmm27,%zmm26",},
+{{0xc4, 0xe2, 0x7d, 0x1a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 1a 21 \tvbroadcastf128 (%rcx),%ymm4",},
+{{0x62, 0x62, 0x7d, 0x48, 0x1a, 0x11, }, 6, 0, "", "",
+"62 62 7d 48 1a 11 \tvbroadcastf32x4 (%rcx),%zmm26",},
+{{0x62, 0x62, 0xfd, 0x48, 0x1a, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 1a 11 \tvbroadcastf64x2 (%rcx),%zmm26",},
+{{0x62, 0x62, 0x7d, 0x48, 0x1b, 0x19, }, 6, 0, "", "",
+"62 62 7d 48 1b 19 \tvbroadcastf32x8 (%rcx),%zmm27",},
+{{0x62, 0x62, 0xfd, 0x48, 0x1b, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 1b 11 \tvbroadcastf64x4 (%rcx),%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x1f, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 1f e3 \tvpabsq %zmm27,%zmm28",},
+{{0xc4, 0xe2, 0x79, 0x20, 0xec, }, 5, 0, "", "",
+"c4 e2 79 20 ec \tvpmovsxbw %xmm4,%xmm5",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x20, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 20 de \tvpmovswb %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x21, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 21 f4 \tvpmovsxbd %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x21, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 21 de \tvpmovsdb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x22, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 22 e4 \tvpmovsxbq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x22, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 22 de \tvpmovsqb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x23, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 23 e4 \tvpmovsxwd %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x23, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 23 de \tvpmovsdw %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x24, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 24 f4 \tvpmovsxwq %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x24, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 24 de \tvpmovsqw %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x25, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 25 e4 \tvpmovsxdq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x25, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 25 de \tvpmovsqd %zmm27,%ymm6{%k7}",},
+{{0x62, 0x92, 0x1d, 0x40, 0x26, 0xeb, }, 6, 0, "", "",
+"62 92 1d 40 26 eb \tvptestmb %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x9d, 0x40, 0x26, 0xeb, }, 6, 0, "", "",
+"62 92 9d 40 26 eb \tvptestmw %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x26, 0x40, 0x26, 0xea, }, 6, 0, "", "",
+"62 92 26 40 26 ea \tvptestnmb %zmm26,%zmm27,%k5",},
+{{0x62, 0x92, 0xa6, 0x40, 0x26, 0xea, }, 6, 0, "", "",
+"62 92 a6 40 26 ea \tvptestnmw %zmm26,%zmm27,%k5",},
+{{0x62, 0x92, 0x1d, 0x40, 0x27, 0xeb, }, 6, 0, "", "",
+"62 92 1d 40 27 eb \tvptestmd %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x9d, 0x40, 0x27, 0xeb, }, 6, 0, "", "",
+"62 92 9d 40 27 eb \tvptestmq %zmm27,%zmm28,%k5",},
+{{0x62, 0x92, 0x26, 0x40, 0x27, 0xea, }, 6, 0, "", "",
+"62 92 26 40 27 ea \tvptestnmd %zmm26,%zmm27,%k5",},
+{{0x62, 0x92, 0xa6, 0x40, 0x27, 0xea, }, 6, 0, "", "",
+"62 92 a6 40 27 ea \tvptestnmq %zmm26,%zmm27,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x28, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 28 d4 \tvpmuldq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x62, 0x7e, 0x48, 0x28, 0xe5, }, 6, 0, "", "",
+"62 62 7e 48 28 e5 \tvpmovm2b %k5,%zmm28",},
+{{0x62, 0x62, 0xfe, 0x48, 0x28, 0xe5, }, 6, 0, "", "",
+"62 62 fe 48 28 e5 \tvpmovm2w %k5,%zmm28",},
+{{0xc4, 0xe2, 0x4d, 0x29, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 29 d4 \tvpcmpeqq %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x92, 0x7e, 0x48, 0x29, 0xec, }, 6, 0, "", "",
+"62 92 7e 48 29 ec \tvpmovb2m %zmm28,%k5",},
+{{0x62, 0x92, 0xfe, 0x48, 0x29, 0xec, }, 6, 0, "", "",
+"62 92 fe 48 29 ec \tvpmovw2m %zmm28,%k5",},
+{{0xc4, 0xe2, 0x7d, 0x2a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 2a 21 \tvmovntdqa (%rcx),%ymm4",},
+{{0x62, 0x62, 0xfe, 0x48, 0x2a, 0xf6, }, 6, 0, "", "",
+"62 62 fe 48 2a f6 \tvpbroadcastmb2q %k6,%zmm30",},
+{{0xc4, 0xe2, 0x5d, 0x2c, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2c 31 \tvmaskmovps (%rcx),%ymm4,%ymm6",},
+{{0x62, 0x02, 0x35, 0x40, 0x2c, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 2c d0 \tvscalefps %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x2c, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 2c d0 \tvscalefpd %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x5d, 0x2d, 0x31, }, 5, 0, "", "",
+"c4 e2 5d 2d 31 \tvmaskmovpd (%rcx),%ymm4,%ymm6",},
+{{0x62, 0x02, 0x35, 0x07, 0x2d, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 2d d0 \tvscalefss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0xb5, 0x07, 0x2d, 0xd0, }, 6, 0, "", "",
+"62 02 b5 07 2d d0 \tvscalefsd %xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x30, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 30 e4 \tvpmovzxbw %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x30, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 30 de \tvpmovwb %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x31, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 31 f4 \tvpmovzxbd %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x31, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 31 de \tvpmovdb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x32, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 32 e4 \tvpmovzxbq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x32, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 32 de \tvpmovqb %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x33, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 33 e4 \tvpmovzxwd %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x33, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 33 de \tvpmovdw %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x34, 0xf4, }, 5, 0, "", "",
+"c4 e2 7d 34 f4 \tvpmovzxwq %xmm4,%ymm6",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x34, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 34 de \tvpmovqw %zmm27,%xmm6{%k7}",},
+{{0xc4, 0xe2, 0x7d, 0x35, 0xe4, }, 5, 0, "", "",
+"c4 e2 7d 35 e4 \tvpmovzxdq %xmm4,%ymm4",},
+{{0x62, 0x62, 0x7e, 0x4f, 0x35, 0xde, }, 6, 0, "", "",
+"62 62 7e 4f 35 de \tvpmovqd %zmm27,%ymm6{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x36, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 36 d4 \tvpermd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x82, 0x2d, 0x27, 0x36, 0xf0, }, 6, 0, "", "",
+"62 82 2d 27 36 f0 \tvpermd %ymm24,%ymm26,%ymm22{%k7}",},
+{{0x62, 0x82, 0xad, 0x27, 0x36, 0xf0, }, 6, 0, "", "",
+"62 82 ad 27 36 f0 \tvpermq %ymm24,%ymm26,%ymm22{%k7}",},
+{{0xc4, 0xe2, 0x4d, 0x38, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 38 d4 \tvpminsb %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x62, 0x7e, 0x48, 0x38, 0xe5, }, 6, 0, "", "",
+"62 62 7e 48 38 e5 \tvpmovm2d %k5,%zmm28",},
+{{0x62, 0x62, 0xfe, 0x48, 0x38, 0xe5, }, 6, 0, "", "",
+"62 62 fe 48 38 e5 \tvpmovm2q %k5,%zmm28",},
+{{0xc4, 0xe2, 0x69, 0x39, 0xd9, }, 5, 0, "", "",
+"c4 e2 69 39 d9 \tvpminsd %xmm1,%xmm2,%xmm3",},
+{{0x62, 0x02, 0x35, 0x40, 0x39, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 39 d0 \tvpminsd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x39, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 39 d0 \tvpminsq %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x92, 0x7e, 0x48, 0x39, 0xec, }, 6, 0, "", "",
+"62 92 7e 48 39 ec \tvpmovd2m %zmm28,%k5",},
+{{0x62, 0x92, 0xfe, 0x48, 0x39, 0xec, }, 6, 0, "", "",
+"62 92 fe 48 39 ec \tvpmovq2m %zmm28,%k5",},
+{{0xc4, 0xe2, 0x4d, 0x3a, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3a d4 \tvpminuw %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x62, 0x7e, 0x48, 0x3a, 0xe6, }, 6, 0, "", "",
+"62 62 7e 48 3a e6 \tvpbroadcastmw2d %k6,%zmm28",},
+{{0xc4, 0xe2, 0x4d, 0x3b, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3b d4 \tvpminud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x3b, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 3b d0 \tvpminud %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x3b, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 3b d0 \tvpminuq %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x4d, 0x3d, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3d d4 \tvpmaxsd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x3d, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 3d d0 \tvpmaxsd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x3d, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 3d d0 \tvpmaxsq %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x4d, 0x3f, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 3f d4 \tvpmaxud %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x3f, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 3f d0 \tvpmaxud %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x3f, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 3f d0 \tvpmaxuq %zmm24,%zmm25,%zmm26",},
+{{0xc4, 0xe2, 0x4d, 0x40, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 40 d4 \tvpmulld %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x40, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 40 d0 \tvpmulld %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x40, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 40 d0 \tvpmullq %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0x7d, 0x48, 0x42, 0xd1, }, 6, 0, "", "",
+"62 02 7d 48 42 d1 \tvgetexpps %zmm25,%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x42, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 42 e3 \tvgetexppd %zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x07, 0x43, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 43 d0 \tvgetexpss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0x95, 0x07, 0x43, 0xf4, }, 6, 0, "", "",
+"62 02 95 07 43 f4 \tvgetexpsd %xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x02, 0x7d, 0x48, 0x44, 0xe3, }, 6, 0, "", "",
+"62 02 7d 48 44 e3 \tvplzcntd %zmm27,%zmm28",},
+{{0x62, 0x02, 0xfd, 0x48, 0x44, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 44 e3 \tvplzcntq %zmm27,%zmm28",},
+{{0xc4, 0xe2, 0x4d, 0x46, 0xd4, }, 5, 0, "", "",
+"c4 e2 4d 46 d4 \tvpsravd %ymm4,%ymm6,%ymm2",},
+{{0x62, 0x02, 0x35, 0x40, 0x46, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 46 d0 \tvpsravd %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xb5, 0x40, 0x46, 0xd0, }, 6, 0, "", "",
+"62 02 b5 40 46 d0 \tvpsravq %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0x7d, 0x48, 0x4c, 0xd1, }, 6, 0, "", "",
+"62 02 7d 48 4c d1 \tvrcp14ps %zmm25,%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x4c, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 4c e3 \tvrcp14pd %zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x07, 0x4d, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 4d d0 \tvrcp14ss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0xb5, 0x07, 0x4d, 0xd0, }, 6, 0, "", "",
+"62 02 b5 07 4d d0 \tvrcp14sd %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0x7d, 0x48, 0x4e, 0xd1, }, 6, 0, "", "",
+"62 02 7d 48 4e d1 \tvrsqrt14ps %zmm25,%zmm26",},
+{{0x62, 0x02, 0xfd, 0x48, 0x4e, 0xe3, }, 6, 0, "", "",
+"62 02 fd 48 4e e3 \tvrsqrt14pd %zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x07, 0x4f, 0xd0, }, 6, 0, "", "",
+"62 02 35 07 4f d0 \tvrsqrt14ss %xmm24,%xmm25,%xmm26{%k7}",},
+{{0x62, 0x02, 0xb5, 0x07, 0x4f, 0xd0, }, 6, 0, "", "",
+"62 02 b5 07 4f d0 \tvrsqrt14sd %xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe2, 0x79, 0x59, 0xf4, }, 5, 0, "", "",
+"c4 e2 79 59 f4 \tvpbroadcastq %xmm4,%xmm6",},
+{{0x62, 0x02, 0x7d, 0x48, 0x59, 0xd3, }, 6, 0, "", "",
+"62 02 7d 48 59 d3 \tvbroadcasti32x2 %xmm27,%zmm26",},
+{{0xc4, 0xe2, 0x7d, 0x5a, 0x21, }, 5, 0, "", "",
+"c4 e2 7d 5a 21 \tvbroadcasti128 (%rcx),%ymm4",},
+{{0x62, 0x62, 0x7d, 0x48, 0x5a, 0x11, }, 6, 0, "", "",
+"62 62 7d 48 5a 11 \tvbroadcasti32x4 (%rcx),%zmm26",},
+{{0x62, 0x62, 0xfd, 0x48, 0x5a, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 5a 11 \tvbroadcasti64x2 (%rcx),%zmm26",},
+{{0x62, 0x62, 0x7d, 0x48, 0x5b, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 5b 21 \tvbroadcasti32x8 (%rcx),%zmm28",},
+{{0x62, 0x62, 0xfd, 0x48, 0x5b, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 5b 11 \tvbroadcasti64x4 (%rcx),%zmm26",},
+{{0x62, 0x02, 0x25, 0x40, 0x64, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 64 e2 \tvpblendmd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x64, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 64 e2 \tvpblendmq %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x40, 0x65, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 65 d0 \tvblendmps %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xa5, 0x40, 0x65, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 65 e2 \tvblendmpd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x66, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 66 e2 \tvpblendmb %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x66, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 66 e2 \tvpblendmw %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x35, 0x40, 0x75, 0xd0, }, 6, 0, "", "",
+"62 02 35 40 75 d0 \tvpermi2b %zmm24,%zmm25,%zmm26",},
+{{0x62, 0x02, 0xa5, 0x40, 0x75, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 75 e2 \tvpermi2w %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x76, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 76 e2 \tvpermi2d %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x76, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 76 e2 \tvpermi2q %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x77, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 77 e2 \tvpermi2ps %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x77, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 77 e2 \tvpermi2pd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x62, 0x7d, 0x08, 0x7a, 0xf0, }, 6, 0, "", "",
+"62 62 7d 08 7a f0 \tvpbroadcastb %eax,%xmm30",},
+{{0x62, 0x62, 0x7d, 0x08, 0x7b, 0xf0, }, 6, 0, "", "",
+"62 62 7d 08 7b f0 \tvpbroadcastw %eax,%xmm30",},
+{{0x62, 0x62, 0x7d, 0x08, 0x7c, 0xf0, }, 6, 0, "", "",
+"62 62 7d 08 7c f0 \tvpbroadcastd %eax,%xmm30",},
+{{0x62, 0x62, 0xfd, 0x48, 0x7c, 0xf0, }, 6, 0, "", "",
+"62 62 fd 48 7c f0 \tvpbroadcastq %rax,%zmm30",},
+{{0x62, 0x02, 0x25, 0x40, 0x7d, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 7d e2 \tvpermt2b %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x7d, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 7d e2 \tvpermt2w %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x7e, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 7e e2 \tvpermt2d %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x7e, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 7e e2 \tvpermt2q %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x25, 0x40, 0x7f, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 7f e2 \tvpermt2ps %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x7f, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 7f e2 \tvpermt2pd %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x83, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 83 e2 \tvpmultishiftqb %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x62, 0x7d, 0x48, 0x88, 0x11, }, 6, 0, "", "",
+"62 62 7d 48 88 11 \tvexpandps (%rcx),%zmm26",},
+{{0x62, 0x62, 0xfd, 0x48, 0x88, 0x21, }, 6, 0, "", "",
+"62 62 fd 48 88 21 \tvexpandpd (%rcx),%zmm28",},
+{{0x62, 0x62, 0x7d, 0x48, 0x89, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 89 21 \tvpexpandd (%rcx),%zmm28",},
+{{0x62, 0x62, 0xfd, 0x48, 0x89, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 89 11 \tvpexpandq (%rcx),%zmm26",},
+{{0x62, 0x62, 0x7d, 0x48, 0x8a, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 8a 21 \tvcompressps %zmm28,(%rcx)",},
+{{0x62, 0x62, 0xfd, 0x48, 0x8a, 0x21, }, 6, 0, "", "",
+"62 62 fd 48 8a 21 \tvcompresspd %zmm28,(%rcx)",},
+{{0x62, 0x62, 0x7d, 0x48, 0x8b, 0x21, }, 6, 0, "", "",
+"62 62 7d 48 8b 21 \tvpcompressd %zmm28,(%rcx)",},
+{{0x62, 0x62, 0xfd, 0x48, 0x8b, 0x11, }, 6, 0, "", "",
+"62 62 fd 48 8b 11 \tvpcompressq %zmm26,(%rcx)",},
+{{0x62, 0x02, 0x25, 0x40, 0x8d, 0xe2, }, 6, 0, "", "",
+"62 02 25 40 8d e2 \tvpermb %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0x8d, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 8d e2 \tvpermw %zmm26,%zmm27,%zmm28",},
+{{0xc4, 0xe2, 0x69, 0x90, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 90 4c 7d 02 \tvpgatherdd %xmm2,0x2(%rbp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x90, 0x4c, 0x7d, 0x04, }, 7, 0, "", "",
+"c4 e2 e9 90 4c 7d 04 \tvpgatherdq %xmm2,0x4(%rbp,%xmm7,2),%xmm1",},
+{{0x62, 0x22, 0x7d, 0x41, 0x90, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 90 94 dd 7b 00 00 00 \tvpgatherdd 0x7b(%rbp,%zmm27,8),%zmm26{%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0x90, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 90 94 dd 7b 00 00 00 \tvpgatherdq 0x7b(%rbp,%ymm27,8),%zmm26{%k1}",},
+{{0xc4, 0xe2, 0x69, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 69 91 4c 7d 02 \tvpgatherqd %xmm2,0x2(%rbp,%xmm7,2),%xmm1",},
+{{0xc4, 0xe2, 0xe9, 0x91, 0x4c, 0x7d, 0x02, }, 7, 0, "", "",
+"c4 e2 e9 91 4c 7d 02 \tvpgatherqq %xmm2,0x2(%rbp,%xmm7,2),%xmm1",},
+{{0x62, 0x22, 0x7d, 0x41, 0x91, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 91 94 dd 7b 00 00 00 \tvpgatherqd 0x7b(%rbp,%zmm27,8),%ymm26{%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0x91, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 91 94 dd 7b 00 00 00 \tvpgatherqq 0x7b(%rbp,%zmm27,8),%zmm26{%k1}",},
+{{0x62, 0x22, 0x7d, 0x41, 0xa0, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 a0 a4 ed 7b 00 00 00 \tvpscatterdd %zmm28,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0xa0, 0x94, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 a0 94 dd 7b 00 00 00 \tvpscatterdq %zmm26,0x7b(%rbp,%ymm27,8){%k1}",},
+{{0x62, 0xb2, 0x7d, 0x41, 0xa1, 0xb4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 b2 7d 41 a1 b4 ed 7b 00 00 00 \tvpscatterqd %ymm6,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0xb2, 0xfd, 0x21, 0xa1, 0xb4, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 b2 fd 21 a1 b4 dd 7b 00 00 00 \tvpscatterqq %ymm6,0x7b(%rbp,%ymm27,8){%k1}",},
+{{0x62, 0x22, 0x7d, 0x41, 0xa2, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 7d 41 a2 a4 ed 7b 00 00 00 \tvscatterdps %zmm28,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0xa2, 0xa4, 0xdd, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 a2 a4 dd 7b 00 00 00 \tvscatterdpd %zmm28,0x7b(%rbp,%ymm27,8){%k1}",},
+{{0x62, 0xb2, 0x7d, 0x41, 0xa3, 0xb4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 b2 7d 41 a3 b4 ed 7b 00 00 00 \tvscatterqps %ymm6,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x22, 0xfd, 0x41, 0xa3, 0xa4, 0xed, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 22 fd 41 a3 a4 ed 7b 00 00 00 \tvscatterqpd %zmm28,0x7b(%rbp,%zmm29,8){%k1}",},
+{{0x62, 0x02, 0xa5, 0x40, 0xb4, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 b4 e2 \tvpmadd52luq %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0xa5, 0x40, 0xb5, 0xe2, }, 6, 0, "", "",
+"62 02 a5 40 b5 e2 \tvpmadd52huq %zmm26,%zmm27,%zmm28",},
+{{0x62, 0x02, 0x7d, 0x48, 0xc4, 0xda, }, 6, 0, "", "",
+"62 02 7d 48 c4 da \tvpconflictd %zmm26,%zmm27",},
+{{0x62, 0x02, 0xfd, 0x48, 0xc4, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 c4 da \tvpconflictq %zmm26,%zmm27",},
+{{0x62, 0x02, 0x7d, 0x48, 0xc8, 0xf5, }, 6, 0, "", "",
+"62 02 7d 48 c8 f5 \tvexp2ps %zmm29,%zmm30",},
+{{0x62, 0x02, 0xfd, 0x48, 0xc8, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 c8 da \tvexp2pd %zmm26,%zmm27",},
+{{0x62, 0x02, 0x7d, 0x48, 0xca, 0xf5, }, 6, 0, "", "",
+"62 02 7d 48 ca f5 \tvrcp28ps %zmm29,%zmm30",},
+{{0x62, 0x02, 0xfd, 0x48, 0xca, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 ca da \tvrcp28pd %zmm26,%zmm27",},
+{{0x62, 0x02, 0x15, 0x07, 0xcb, 0xf4, }, 6, 0, "", "",
+"62 02 15 07 cb f4 \tvrcp28ss %xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x02, 0xad, 0x07, 0xcb, 0xd9, }, 6, 0, "", "",
+"62 02 ad 07 cb d9 \tvrcp28sd %xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x02, 0x7d, 0x48, 0xcc, 0xf5, }, 6, 0, "", "",
+"62 02 7d 48 cc f5 \tvrsqrt28ps %zmm29,%zmm30",},
+{{0x62, 0x02, 0xfd, 0x48, 0xcc, 0xda, }, 6, 0, "", "",
+"62 02 fd 48 cc da \tvrsqrt28pd %zmm26,%zmm27",},
+{{0x62, 0x02, 0x15, 0x07, 0xcd, 0xf4, }, 6, 0, "", "",
+"62 02 15 07 cd f4 \tvrsqrt28ss %xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x02, 0xad, 0x07, 0xcd, 0xd9, }, 6, 0, "", "",
+"62 02 ad 07 cd d9 \tvrsqrt28sd %xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x03, 0x15, 0x40, 0x03, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 03 f4 12 \tvalignd $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0xad, 0x40, 0x03, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 40 03 d9 12 \tvalignq $0x12,%zmm25,%zmm26,%zmm27",},
+{{0xc4, 0xe3, 0x7d, 0x08, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 08 d6 05 \tvroundps $0x5,%ymm6,%ymm2",},
+{{0x62, 0x03, 0x7d, 0x48, 0x08, 0xd1, 0x12, }, 7, 0, "", "",
+"62 03 7d 48 08 d1 12 \tvrndscaleps $0x12,%zmm25,%zmm26",},
+{{0xc4, 0xe3, 0x7d, 0x09, 0xd6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 09 d6 05 \tvroundpd $0x5,%ymm6,%ymm2",},
+{{0x62, 0x03, 0xfd, 0x48, 0x09, 0xd1, 0x12, }, 7, 0, "", "",
+"62 03 fd 48 09 d1 12 \tvrndscalepd $0x12,%zmm25,%zmm26",},
+{{0xc4, 0xe3, 0x49, 0x0a, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0a d4 05 \tvroundss $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0x03, 0x35, 0x07, 0x0a, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 35 07 0a d0 12 \tvrndscaless $0x12,%xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe3, 0x49, 0x0b, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 49 0b d4 05 \tvroundsd $0x5,%xmm4,%xmm6,%xmm2",},
+{{0x62, 0x03, 0xb5, 0x07, 0x0b, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 b5 07 0b d0 12 \tvrndscalesd $0x12,%xmm24,%xmm25,%xmm26{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x18, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 18 f4 05 \tvinsertf128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0x03, 0x35, 0x47, 0x18, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 35 47 18 d0 12 \tvinsertf32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0x62, 0x03, 0xb5, 0x47, 0x18, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 b5 47 18 d0 12 \tvinsertf64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x19, 0xe4, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 19 e4 05 \tvextractf128 $0x5,%ymm4,%xmm4",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x19, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 19 ca 12 \tvextractf32x4 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x19, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 19 ca 12 \tvextractf64x2 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0x2d, 0x47, 0x1a, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 47 1a d9 12 \tvinsertf32x8 $0x12,%ymm25,%zmm26,%zmm27{%k7}",},
+{{0x62, 0x03, 0x95, 0x47, 0x1a, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 47 1a f4 12 \tvinsertf64x4 $0x12,%ymm28,%zmm29,%zmm30{%k7}",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x1b, 0xee, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 1b ee 12 \tvextractf32x8 $0x12,%zmm29,%ymm30{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x1b, 0xd3, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 1b d3 12 \tvextractf64x4 $0x12,%zmm26,%ymm27{%k7}",},
+{{0x62, 0x93, 0x0d, 0x40, 0x1e, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 1e ed 12 \tvpcmpud $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x1e, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 1e ea 12 \tvpcmpuq $0x12,%zmm26,%zmm27,%k5",},
+{{0x62, 0x93, 0x0d, 0x40, 0x1f, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 1f ed 12 \tvpcmpd $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x1f, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 1f ea 12 \tvpcmpq $0x12,%zmm26,%zmm27,%k5",},
+{{0x62, 0x03, 0x15, 0x40, 0x23, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 23 f4 12 \tvshuff32x4 $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0xad, 0x40, 0x23, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 40 23 d9 12 \tvshuff64x2 $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x15, 0x40, 0x25, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 25 f4 12 \tvpternlogd $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x95, 0x40, 0x25, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 40 25 f4 12 \tvpternlogq $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x7d, 0x48, 0x26, 0xda, 0x12, }, 7, 0, "", "",
+"62 03 7d 48 26 da 12 \tvgetmantps $0x12,%zmm26,%zmm27",},
+{{0x62, 0x03, 0xfd, 0x48, 0x26, 0xf5, 0x12, }, 7, 0, "", "",
+"62 03 fd 48 26 f5 12 \tvgetmantpd $0x12,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x07, 0x27, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 07 27 d9 12 \tvgetmantss $0x12,%xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x03, 0x95, 0x07, 0x27, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 07 27 f4 12 \tvgetmantsd $0x12,%xmm28,%xmm29,%xmm30{%k7}",},
+{{0xc4, 0xe3, 0x5d, 0x38, 0xf4, 0x05, }, 6, 0, "", "",
+"c4 e3 5d 38 f4 05 \tvinserti128 $0x5,%xmm4,%ymm4,%ymm6",},
+{{0x62, 0x03, 0x35, 0x47, 0x38, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 35 47 38 d0 12 \tvinserti32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0x62, 0x03, 0xb5, 0x47, 0x38, 0xd0, 0x12, }, 7, 0, "", "",
+"62 03 b5 47 38 d0 12 \tvinserti64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}",},
+{{0xc4, 0xe3, 0x7d, 0x39, 0xe6, 0x05, }, 6, 0, "", "",
+"c4 e3 7d 39 e6 05 \tvextracti128 $0x5,%ymm4,%xmm6",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x39, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 39 ca 12 \tvextracti32x4 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x39, 0xca, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 39 ca 12 \tvextracti64x2 $0x12,%zmm25,%xmm26{%k7}",},
+{{0x62, 0x03, 0x15, 0x47, 0x3a, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 47 3a f4 12 \tvinserti32x8 $0x12,%ymm28,%zmm29,%zmm30{%k7}",},
+{{0x62, 0x03, 0xad, 0x47, 0x3a, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 47 3a d9 12 \tvinserti64x4 $0x12,%ymm25,%zmm26,%zmm27{%k7}",},
+{{0x62, 0x03, 0x7d, 0x4f, 0x3b, 0xee, 0x12, }, 7, 0, "", "",
+"62 03 7d 4f 3b ee 12 \tvextracti32x8 $0x12,%zmm29,%ymm30{%k7}",},
+{{0x62, 0x03, 0xfd, 0x4f, 0x3b, 0xd3, 0x12, }, 7, 0, "", "",
+"62 03 fd 4f 3b d3 12 \tvextracti64x4 $0x12,%zmm26,%ymm27{%k7}",},
+{{0x62, 0x93, 0x0d, 0x40, 0x3e, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 3e ed 12 \tvpcmpub $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x3e, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 3e ea 12 \tvpcmpuw $0x12,%zmm26,%zmm27,%k5",},
+{{0x62, 0x93, 0x0d, 0x40, 0x3f, 0xed, 0x12, }, 7, 0, "", "",
+"62 93 0d 40 3f ed 12 \tvpcmpb $0x12,%zmm29,%zmm30,%k5",},
+{{0x62, 0x93, 0xa5, 0x40, 0x3f, 0xea, 0x12, }, 7, 0, "", "",
+"62 93 a5 40 3f ea 12 \tvpcmpw $0x12,%zmm26,%zmm27,%k5",},
+{{0xc4, 0xe3, 0x4d, 0x42, 0xd4, 0x05, }, 6, 0, "", "",
+"c4 e3 4d 42 d4 05 \tvmpsadbw $0x5,%ymm4,%ymm6,%ymm2",},
+{{0x62, 0xf3, 0x55, 0x48, 0x42, 0xf4, 0x12, }, 7, 0, "", "",
+"62 f3 55 48 42 f4 12 \tvdbpsadbw $0x12,%zmm4,%zmm5,%zmm6",},
+{{0x62, 0x03, 0x2d, 0x40, 0x43, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 40 43 d9 12 \tvshufi32x4 $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x95, 0x40, 0x43, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 40 43 f4 12 \tvshufi64x2 $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x40, 0x50, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 40 50 d9 12 \tvrangeps $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x95, 0x40, 0x50, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 40 50 f4 12 \tvrangepd $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x00, 0x51, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 00 51 d9 12 \tvrangess $0x12,%xmm25,%xmm26,%xmm27",},
+{{0x62, 0x03, 0x95, 0x00, 0x51, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 00 51 f4 12 \tvrangesd $0x12,%xmm28,%xmm29,%xmm30",},
+{{0x62, 0x03, 0x15, 0x40, 0x54, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 40 54 f4 12 \tvfixupimmps $0x12,%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x03, 0xad, 0x40, 0x54, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 40 54 d9 12 \tvfixupimmpd $0x12,%zmm25,%zmm26,%zmm27",},
+{{0x62, 0x03, 0x15, 0x07, 0x55, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 15 07 55 f4 12 \tvfixupimmss $0x12,%xmm28,%xmm29,%xmm30{%k7}",},
+{{0x62, 0x03, 0xad, 0x07, 0x55, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 ad 07 55 d9 12 \tvfixupimmsd $0x12,%xmm25,%xmm26,%xmm27{%k7}",},
+{{0x62, 0x03, 0x7d, 0x48, 0x56, 0xda, 0x12, }, 7, 0, "", "",
+"62 03 7d 48 56 da 12 \tvreduceps $0x12,%zmm26,%zmm27",},
+{{0x62, 0x03, 0xfd, 0x48, 0x56, 0xf5, 0x12, }, 7, 0, "", "",
+"62 03 fd 48 56 f5 12 \tvreducepd $0x12,%zmm29,%zmm30",},
+{{0x62, 0x03, 0x2d, 0x00, 0x57, 0xd9, 0x12, }, 7, 0, "", "",
+"62 03 2d 00 57 d9 12 \tvreducess $0x12,%xmm25,%xmm26,%xmm27",},
+{{0x62, 0x03, 0x95, 0x00, 0x57, 0xf4, 0x12, }, 7, 0, "", "",
+"62 03 95 00 57 f4 12 \tvreducesd $0x12,%xmm28,%xmm29,%xmm30",},
+{{0x62, 0x93, 0x7d, 0x48, 0x66, 0xeb, 0x12, }, 7, 0, "", "",
+"62 93 7d 48 66 eb 12 \tvfpclassps $0x12,%zmm27,%k5",},
+{{0x62, 0x93, 0xfd, 0x48, 0x66, 0xee, 0x12, }, 7, 0, "", "",
+"62 93 fd 48 66 ee 12 \tvfpclasspd $0x12,%zmm30,%k5",},
+{{0x62, 0x93, 0x7d, 0x08, 0x67, 0xeb, 0x12, }, 7, 0, "", "",
+"62 93 7d 08 67 eb 12 \tvfpclassss $0x12,%xmm27,%k5",},
+{{0x62, 0x93, 0xfd, 0x08, 0x67, 0xee, 0x12, }, 7, 0, "", "",
+"62 93 fd 08 67 ee 12 \tvfpclasssd $0x12,%xmm30,%k5",},
+{{0x62, 0x91, 0x2d, 0x40, 0x72, 0xc1, 0x12, }, 7, 0, "", "",
+"62 91 2d 40 72 c1 12 \tvprord $0x12,%zmm25,%zmm26",},
+{{0x62, 0x91, 0xad, 0x40, 0x72, 0xc1, 0x12, }, 7, 0, "", "",
+"62 91 ad 40 72 c1 12 \tvprorq $0x12,%zmm25,%zmm26",},
+{{0x62, 0x91, 0x0d, 0x40, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 91 0d 40 72 cd 12 \tvprold $0x12,%zmm29,%zmm30",},
+{{0x62, 0x91, 0x8d, 0x40, 0x72, 0xcd, 0x12, }, 7, 0, "", "",
+"62 91 8d 40 72 cd 12 \tvprolq $0x12,%zmm29,%zmm30",},
+{{0x0f, 0x72, 0xe6, 0x02, }, 4, 0, "", "",
+"0f 72 e6 02 \tpsrad $0x2,%mm6",},
+{{0xc5, 0xed, 0x72, 0xe6, 0x05, }, 5, 0, "", "",
+"c5 ed 72 e6 05 \tvpsrad $0x5,%ymm6,%ymm2",},
+{{0x62, 0x91, 0x4d, 0x40, 0x72, 0xe2, 0x05, }, 7, 0, "", "",
+"62 91 4d 40 72 e2 05 \tvpsrad $0x5,%zmm26,%zmm22",},
+{{0x62, 0x91, 0xcd, 0x40, 0x72, 0xe2, 0x05, }, 7, 0, "", "",
+"62 91 cd 40 72 e2 05 \tvpsraq $0x5,%zmm26,%zmm22",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 8c fe 7b 00 00 00 \tvgatherpf0dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 8c fe 7b 00 00 00 \tvgatherpf0dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 94 fe 7b 00 00 00 \tvgatherpf1dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 94 fe 7b 00 00 00 \tvgatherpf1dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 ac fe 7b 00 00 00 \tvscatterpf0dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 ac fe 7b 00 00 00 \tvscatterpf0dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c6 b4 fe 7b 00 00 00 \tvscatterpf1dps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc6, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c6 b4 fe 7b 00 00 00 \tvscatterpf1dpd 0x7b(%r14,%ymm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 8c fe 7b 00 00 00 \tvgatherpf0qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0x8c, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 8c fe 7b 00 00 00 \tvgatherpf0qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 94 fe 7b 00 00 00 \tvgatherpf1qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0x94, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 94 fe 7b 00 00 00 \tvgatherpf1qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 ac fe 7b 00 00 00 \tvscatterpf0qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0xac, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 ac fe 7b 00 00 00 \tvscatterpf0qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0x7d, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 7d 41 c7 b4 fe 7b 00 00 00 \tvscatterpf1qps 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x92, 0xfd, 0x41, 0xc7, 0xb4, 0xfe, 0x7b, 0x00, 0x00, 0x00, }, 11, 0, "", "",
+"62 92 fd 41 c7 b4 fe 7b 00 00 00 \tvscatterpf1qpd 0x7b(%r14,%zmm31,8){%k1}",},
+{{0x62, 0x01, 0x95, 0x40, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 40 58 f4 \tvaddpd %zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x47, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 47 58 f4 \tvaddpd %zmm28,%zmm29,%zmm30{%k7}",},
+{{0x62, 0x01, 0x95, 0xc7, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 c7 58 f4 \tvaddpd %zmm28,%zmm29,%zmm30{%k7}{z}",},
+{{0x62, 0x01, 0x95, 0x10, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 10 58 f4 \tvaddpd {rn-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x50, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 50 58 f4 \tvaddpd {ru-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x30, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 30 58 f4 \tvaddpd {rd-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x01, 0x95, 0x70, 0x58, 0xf4, }, 6, 0, "", "",
+"62 01 95 70 58 f4 \tvaddpd {rz-sae},%zmm28,%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x40, 0x58, 0x31, }, 6, 0, "", "",
+"62 61 95 40 58 31 \tvaddpd (%rcx),%zmm29,%zmm30",},
+{{0x62, 0x21, 0x95, 0x40, 0x58, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00, }, 11, 0, "", "",
+"62 21 95 40 58 b4 f0 23 01 00 00 \tvaddpd 0x123(%rax,%r14,8),%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x50, 0x58, 0x31, }, 6, 0, "", "",
+"62 61 95 50 58 31 \tvaddpd (%rcx){1to8},%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x40, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 61 95 40 58 72 7f \tvaddpd 0x1fc0(%rdx),%zmm29,%zmm30",},
+{{0x62, 0x61, 0x95, 0x50, 0x58, 0x72, 0x7f, }, 7, 0, "", "",
+"62 61 95 50 58 72 7f \tvaddpd 0x3f8(%rdx){1to8},%zmm29,%zmm30",},
+{{0x62, 0xf1, 0x0c, 0x50, 0xc2, 0x6a, 0x7f, 0x08, }, 8, 0, "", "",
+"62 f1 0c 50 c2 6a 7f 08 \tvcmpeq_uqps 0x1fc(%rdx){1to16},%zmm30,%k5",},
+{{0x62, 0xb1, 0x97, 0x07, 0xc2, 0xac, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x01, }, 12, 0, "", "",
+"62 b1 97 07 c2 ac f0 23 01 00 00 01 \tvcmpltsd 0x123(%rax,%r14,8),%xmm29,%k5{%k7}",},
+{{0x62, 0x91, 0x97, 0x17, 0xc2, 0xec, 0x02, }, 7, 0, "", "",
+"62 91 97 17 c2 ec 02 \tvcmplesd {sae},%xmm28,%xmm29,%k5{%k7}",},
+{{0x62, 0x23, 0x15, 0x07, 0x27, 0xb4, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x5b, }, 12, 0, "", "",
+"62 23 15 07 27 b4 f0 23 01 00 00 5b \tvgetmantss $0x5b,0x123(%rax,%r14,8),%xmm29,%xmm30{%k7}",},
+{{0xf3, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"f3 0f 1b 00 \tbndmk (%rax),%bnd0",},
+{{0xf3, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "",
+"f3 41 0f 1b 00 \tbndmk (%r8),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 04 25 78 56 34 12 \tbndmk 0x12345678,%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x18, }, 4, 0, "", "",
+"f3 0f 1b 18 \tbndmk (%rax),%bnd3",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "",
+"f3 0f 1b 04 01 \tbndmk (%rcx,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 04 05 78 56 34 12 \tbndmk 0x12345678(,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "",
+"f3 0f 1b 04 08 \tbndmk (%rax,%rcx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "",
+"f3 0f 1b 04 c8 \tbndmk (%rax,%rcx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "",
+"f3 0f 1b 40 12 \tbndmk 0x12(%rax),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "",
+"f3 0f 1b 45 12 \tbndmk 0x12(%rbp),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 01 12 \tbndmk 0x12(%rcx,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 05 12 \tbndmk 0x12(%rbp,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 08 12 \tbndmk 0x12(%rax,%rcx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f3 0f 1b 44 c8 12 \tbndmk 0x12(%rax,%rcx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1b 80 78 56 34 12 \tbndmk 0x12345678(%rax),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1b 85 78 56 34 12 \tbndmk 0x12345678(%rbp),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 01 78 56 34 12 \tbndmk 0x12345678(%rcx,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 05 78 56 34 12 \tbndmk 0x12345678(%rbp,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 08 78 56 34 12 \tbndmk 0x12345678(%rax,%rcx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1b 84 c8 78 56 34 12 \tbndmk 0x12345678(%rax,%rcx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"f3 0f 1a 00 \tbndcl (%rax),%bnd0",},
+{{0xf3, 0x41, 0x0f, 0x1a, 0x00, }, 5, 0, "", "",
+"f3 41 0f 1a 00 \tbndcl (%r8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 04 25 78 56 34 12 \tbndcl 0x12345678,%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x18, }, 4, 0, "", "",
+"f3 0f 1a 18 \tbndcl (%rax),%bnd3",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "",
+"f3 0f 1a 04 01 \tbndcl (%rcx,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 04 05 78 56 34 12 \tbndcl 0x12345678(,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "",
+"f3 0f 1a 04 08 \tbndcl (%rax,%rcx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "",
+"f3 0f 1a 04 c8 \tbndcl (%rax,%rcx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "",
+"f3 0f 1a 40 12 \tbndcl 0x12(%rax),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "",
+"f3 0f 1a 45 12 \tbndcl 0x12(%rbp),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 01 12 \tbndcl 0x12(%rcx,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 05 12 \tbndcl 0x12(%rbp,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 08 12 \tbndcl 0x12(%rax,%rcx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f3 0f 1a 44 c8 12 \tbndcl 0x12(%rax,%rcx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1a 80 78 56 34 12 \tbndcl 0x12345678(%rax),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 1a 85 78 56 34 12 \tbndcl 0x12345678(%rbp),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 01 78 56 34 12 \tbndcl 0x12345678(%rcx,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 05 78 56 34 12 \tbndcl 0x12345678(%rbp,%rax,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 08 78 56 34 12 \tbndcl 0x12345678(%rax,%rcx,1),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 1a 84 c8 78 56 34 12 \tbndcl 0x12345678(%rax,%rcx,8),%bnd0",},
+{{0xf3, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "",
+"f3 0f 1a c0 \tbndcl %rax,%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"f2 0f 1a 00 \tbndcu (%rax),%bnd0",},
+{{0xf2, 0x41, 0x0f, 0x1a, 0x00, }, 5, 0, "", "",
+"f2 41 0f 1a 00 \tbndcu (%r8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 04 25 78 56 34 12 \tbndcu 0x12345678,%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x18, }, 4, 0, "", "",
+"f2 0f 1a 18 \tbndcu (%rax),%bnd3",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "",
+"f2 0f 1a 04 01 \tbndcu (%rcx,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 04 05 78 56 34 12 \tbndcu 0x12345678(,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "",
+"f2 0f 1a 04 08 \tbndcu (%rax,%rcx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "",
+"f2 0f 1a 04 c8 \tbndcu (%rax,%rcx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "",
+"f2 0f 1a 40 12 \tbndcu 0x12(%rax),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "",
+"f2 0f 1a 45 12 \tbndcu 0x12(%rbp),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 01 12 \tbndcu 0x12(%rcx,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 05 12 \tbndcu 0x12(%rbp,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 08 12 \tbndcu 0x12(%rax,%rcx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f2 0f 1a 44 c8 12 \tbndcu 0x12(%rax,%rcx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1a 80 78 56 34 12 \tbndcu 0x12345678(%rax),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1a 85 78 56 34 12 \tbndcu 0x12345678(%rbp),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 01 78 56 34 12 \tbndcu 0x12345678(%rcx,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 05 78 56 34 12 \tbndcu 0x12345678(%rbp,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 08 78 56 34 12 \tbndcu 0x12345678(%rax,%rcx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1a 84 c8 78 56 34 12 \tbndcu 0x12345678(%rax,%rcx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1a, 0xc0, }, 4, 0, "", "",
+"f2 0f 1a c0 \tbndcu %rax,%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"f2 0f 1b 00 \tbndcn (%rax),%bnd0",},
+{{0xf2, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "",
+"f2 41 0f 1b 00 \tbndcn (%r8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 04 25 78 56 34 12 \tbndcn 0x12345678,%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x18, }, 4, 0, "", "",
+"f2 0f 1b 18 \tbndcn (%rax),%bnd3",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "",
+"f2 0f 1b 04 01 \tbndcn (%rcx,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 04 05 78 56 34 12 \tbndcn 0x12345678(,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "",
+"f2 0f 1b 04 08 \tbndcn (%rax,%rcx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "",
+"f2 0f 1b 04 c8 \tbndcn (%rax,%rcx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "",
+"f2 0f 1b 40 12 \tbndcn 0x12(%rax),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "",
+"f2 0f 1b 45 12 \tbndcn 0x12(%rbp),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 01 12 \tbndcn 0x12(%rcx,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 05 12 \tbndcn 0x12(%rbp,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 08 12 \tbndcn 0x12(%rax,%rcx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"f2 0f 1b 44 c8 12 \tbndcn 0x12(%rax,%rcx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1b 80 78 56 34 12 \tbndcn 0x12345678(%rax),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f2 0f 1b 85 78 56 34 12 \tbndcn 0x12345678(%rbp),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 01 78 56 34 12 \tbndcn 0x12345678(%rcx,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 05 78 56 34 12 \tbndcn 0x12345678(%rbp,%rax,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 08 78 56 34 12 \tbndcn 0x12345678(%rax,%rcx,1),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f2 0f 1b 84 c8 78 56 34 12 \tbndcn 0x12345678(%rax,%rcx,8),%bnd0",},
+{{0xf2, 0x0f, 0x1b, 0xc0, }, 4, 0, "", "",
+"f2 0f 1b c0 \tbndcn %rax,%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"66 0f 1a 00 \tbndmov (%rax),%bnd0",},
+{{0x66, 0x41, 0x0f, 0x1a, 0x00, }, 5, 0, "", "",
+"66 41 0f 1a 00 \tbndmov (%r8),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 04 25 78 56 34 12 \tbndmov 0x12345678,%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x18, }, 4, 0, "", "",
+"66 0f 1a 18 \tbndmov (%rax),%bnd3",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x01, }, 5, 0, "", "",
+"66 0f 1a 04 01 \tbndmov (%rcx,%rax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 04 05 78 56 34 12 \tbndmov 0x12345678(,%rax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0x08, }, 5, 0, "", "",
+"66 0f 1a 04 08 \tbndmov (%rax,%rcx,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x04, 0xc8, }, 5, 0, "", "",
+"66 0f 1a 04 c8 \tbndmov (%rax,%rcx,8),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x40, 0x12, }, 5, 0, "", "",
+"66 0f 1a 40 12 \tbndmov 0x12(%rax),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x45, 0x12, }, 5, 0, "", "",
+"66 0f 1a 45 12 \tbndmov 0x12(%rbp),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 01 12 \tbndmov 0x12(%rcx,%rax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 05 12 \tbndmov 0x12(%rbp,%rax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 08 12 \tbndmov 0x12(%rax,%rcx,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"66 0f 1a 44 c8 12 \tbndmov 0x12(%rax,%rcx,8),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1a 80 78 56 34 12 \tbndmov 0x12345678(%rax),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1a 85 78 56 34 12 \tbndmov 0x12345678(%rbp),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 01 78 56 34 12 \tbndmov 0x12345678(%rcx,%rax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 05 78 56 34 12 \tbndmov 0x12345678(%rbp,%rax,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 08 78 56 34 12 \tbndmov 0x12345678(%rax,%rcx,1),%bnd0",},
+{{0x66, 0x0f, 0x1a, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1a 84 c8 78 56 34 12 \tbndmov 0x12345678(%rax,%rcx,8),%bnd0",},
+{{0x66, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"66 0f 1b 00 \tbndmov %bnd0,(%rax)",},
+{{0x66, 0x41, 0x0f, 0x1b, 0x00, }, 5, 0, "", "",
+"66 41 0f 1b 00 \tbndmov %bnd0,(%r8)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 04 25 78 56 34 12 \tbndmov %bnd0,0x12345678",},
+{{0x66, 0x0f, 0x1b, 0x18, }, 4, 0, "", "",
+"66 0f 1b 18 \tbndmov %bnd3,(%rax)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x01, }, 5, 0, "", "",
+"66 0f 1b 04 01 \tbndmov %bnd0,(%rcx,%rax,1)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 04 05 78 56 34 12 \tbndmov %bnd0,0x12345678(,%rax,1)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0x08, }, 5, 0, "", "",
+"66 0f 1b 04 08 \tbndmov %bnd0,(%rax,%rcx,1)",},
+{{0x66, 0x0f, 0x1b, 0x04, 0xc8, }, 5, 0, "", "",
+"66 0f 1b 04 c8 \tbndmov %bnd0,(%rax,%rcx,8)",},
+{{0x66, 0x0f, 0x1b, 0x40, 0x12, }, 5, 0, "", "",
+"66 0f 1b 40 12 \tbndmov %bnd0,0x12(%rax)",},
+{{0x66, 0x0f, 0x1b, 0x45, 0x12, }, 5, 0, "", "",
+"66 0f 1b 45 12 \tbndmov %bnd0,0x12(%rbp)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 01 12 \tbndmov %bnd0,0x12(%rcx,%rax,1)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 05 12 \tbndmov %bnd0,0x12(%rbp,%rax,1)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 08 12 \tbndmov %bnd0,0x12(%rax,%rcx,1)",},
+{{0x66, 0x0f, 0x1b, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"66 0f 1b 44 c8 12 \tbndmov %bnd0,0x12(%rax,%rcx,8)",},
+{{0x66, 0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1b 80 78 56 34 12 \tbndmov %bnd0,0x12345678(%rax)",},
+{{0x66, 0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"66 0f 1b 85 78 56 34 12 \tbndmov %bnd0,0x12345678(%rbp)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 01 78 56 34 12 \tbndmov %bnd0,0x12345678(%rcx,%rax,1)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 05 78 56 34 12 \tbndmov %bnd0,0x12345678(%rbp,%rax,1)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 08 78 56 34 12 \tbndmov %bnd0,0x12345678(%rax,%rcx,1)",},
+{{0x66, 0x0f, 0x1b, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 1b 84 c8 78 56 34 12 \tbndmov %bnd0,0x12345678(%rax,%rcx,8)",},
+{{0x66, 0x0f, 0x1a, 0xc8, }, 4, 0, "", "",
+"66 0f 1a c8 \tbndmov %bnd0,%bnd1",},
+{{0x66, 0x0f, 0x1a, 0xc1, }, 4, 0, "", "",
+"66 0f 1a c1 \tbndmov %bnd1,%bnd0",},
+{{0x0f, 0x1a, 0x00, }, 3, 0, "", "",
+"0f 1a 00 \tbndldx (%rax),%bnd0",},
+{{0x41, 0x0f, 0x1a, 0x00, }, 4, 0, "", "",
+"41 0f 1a 00 \tbndldx (%r8),%bnd0",},
+{{0x0f, 0x1a, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 04 25 78 56 34 12 \tbndldx 0x12345678,%bnd0",},
+{{0x0f, 0x1a, 0x18, }, 3, 0, "", "",
+"0f 1a 18 \tbndldx (%rax),%bnd3",},
+{{0x0f, 0x1a, 0x04, 0x01, }, 4, 0, "", "",
+"0f 1a 04 01 \tbndldx (%rcx,%rax,1),%bnd0",},
+{{0x0f, 0x1a, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 04 05 78 56 34 12 \tbndldx 0x12345678(,%rax,1),%bnd0",},
+{{0x0f, 0x1a, 0x04, 0x08, }, 4, 0, "", "",
+"0f 1a 04 08 \tbndldx (%rax,%rcx,1),%bnd0",},
+{{0x0f, 0x1a, 0x40, 0x12, }, 4, 0, "", "",
+"0f 1a 40 12 \tbndldx 0x12(%rax),%bnd0",},
+{{0x0f, 0x1a, 0x45, 0x12, }, 4, 0, "", "",
+"0f 1a 45 12 \tbndldx 0x12(%rbp),%bnd0",},
+{{0x0f, 0x1a, 0x44, 0x01, 0x12, }, 5, 0, "", "",
+"0f 1a 44 01 12 \tbndldx 0x12(%rcx,%rax,1),%bnd0",},
+{{0x0f, 0x1a, 0x44, 0x05, 0x12, }, 5, 0, "", "",
+"0f 1a 44 05 12 \tbndldx 0x12(%rbp,%rax,1),%bnd0",},
+{{0x0f, 0x1a, 0x44, 0x08, 0x12, }, 5, 0, "", "",
+"0f 1a 44 08 12 \tbndldx 0x12(%rax,%rcx,1),%bnd0",},
+{{0x0f, 0x1a, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1a 80 78 56 34 12 \tbndldx 0x12345678(%rax),%bnd0",},
+{{0x0f, 0x1a, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1a 85 78 56 34 12 \tbndldx 0x12345678(%rbp),%bnd0",},
+{{0x0f, 0x1a, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 84 01 78 56 34 12 \tbndldx 0x12345678(%rcx,%rax,1),%bnd0",},
+{{0x0f, 0x1a, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 84 05 78 56 34 12 \tbndldx 0x12345678(%rbp,%rax,1),%bnd0",},
+{{0x0f, 0x1a, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1a 84 08 78 56 34 12 \tbndldx 0x12345678(%rax,%rcx,1),%bnd0",},
+{{0x0f, 0x1b, 0x00, }, 3, 0, "", "",
+"0f 1b 00 \tbndstx %bnd0,(%rax)",},
+{{0x41, 0x0f, 0x1b, 0x00, }, 4, 0, "", "",
+"41 0f 1b 00 \tbndstx %bnd0,(%r8)",},
+{{0x0f, 0x1b, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 04 25 78 56 34 12 \tbndstx %bnd0,0x12345678",},
+{{0x0f, 0x1b, 0x18, }, 3, 0, "", "",
+"0f 1b 18 \tbndstx %bnd3,(%rax)",},
+{{0x0f, 0x1b, 0x04, 0x01, }, 4, 0, "", "",
+"0f 1b 04 01 \tbndstx %bnd0,(%rcx,%rax,1)",},
+{{0x0f, 0x1b, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 04 05 78 56 34 12 \tbndstx %bnd0,0x12345678(,%rax,1)",},
+{{0x0f, 0x1b, 0x04, 0x08, }, 4, 0, "", "",
+"0f 1b 04 08 \tbndstx %bnd0,(%rax,%rcx,1)",},
+{{0x0f, 0x1b, 0x40, 0x12, }, 4, 0, "", "",
+"0f 1b 40 12 \tbndstx %bnd0,0x12(%rax)",},
+{{0x0f, 0x1b, 0x45, 0x12, }, 4, 0, "", "",
+"0f 1b 45 12 \tbndstx %bnd0,0x12(%rbp)",},
+{{0x0f, 0x1b, 0x44, 0x01, 0x12, }, 5, 0, "", "",
+"0f 1b 44 01 12 \tbndstx %bnd0,0x12(%rcx,%rax,1)",},
+{{0x0f, 0x1b, 0x44, 0x05, 0x12, }, 5, 0, "", "",
+"0f 1b 44 05 12 \tbndstx %bnd0,0x12(%rbp,%rax,1)",},
+{{0x0f, 0x1b, 0x44, 0x08, 0x12, }, 5, 0, "", "",
+"0f 1b 44 08 12 \tbndstx %bnd0,0x12(%rax,%rcx,1)",},
+{{0x0f, 0x1b, 0x80, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1b 80 78 56 34 12 \tbndstx %bnd0,0x12345678(%rax)",},
+{{0x0f, 0x1b, 0x85, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f 1b 85 78 56 34 12 \tbndstx %bnd0,0x12345678(%rbp)",},
+{{0x0f, 0x1b, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 84 01 78 56 34 12 \tbndstx %bnd0,0x12345678(%rcx,%rax,1)",},
+{{0x0f, 0x1b, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 84 05 78 56 34 12 \tbndstx %bnd0,0x12345678(%rbp,%rax,1)",},
+{{0x0f, 0x1b, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 1b 84 08 78 56 34 12 \tbndstx %bnd0,0x12345678(%rax,%rcx,1)",},
+{{0xf2, 0xe8, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "call", "unconditional",
+"f2 e8 00 00 00 00 \tbnd callq f22 <main+0xf22>",},
+{{0x67, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"67 f2 ff 10 \tbnd callq *(%eax)",},
+{{0xf2, 0xc3, }, 2, 0, "ret", "indirect",
+"f2 c3 \tbnd retq ",},
+{{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional",
+"f2 e9 00 00 00 00 \tbnd jmpq f2e <main+0xf2e>",},
+{{0xf2, 0xe9, 0x00, 0x00, 0x00, 0x00, }, 6, 0, "jmp", "unconditional",
+"f2 e9 00 00 00 00 \tbnd jmpq f34 <main+0xf34>",},
+{{0x67, 0xf2, 0xff, 0x21, }, 4, 0, "jmp", "indirect",
+"67 f2 ff 21 \tbnd jmpq *(%ecx)",},
+{{0xf2, 0x0f, 0x85, 0x00, 0x00, 0x00, 0x00, }, 7, 0, "jcc", "conditional",
+"f2 0f 85 00 00 00 00 \tbnd jne f3f <main+0xf3f>",},
+{{0x0f, 0x3a, 0xcc, 0xc1, 0x00, }, 5, 0, "", "",
+"0f 3a cc c1 00 \tsha1rnds4 $0x0,%xmm1,%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0xd7, 0x91, }, 5, 0, "", "",
+"0f 3a cc d7 91 \tsha1rnds4 $0x91,%xmm7,%xmm2",},
+{{0x41, 0x0f, 0x3a, 0xcc, 0xc0, 0x91, }, 6, 0, "", "",
+"41 0f 3a cc c0 91 \tsha1rnds4 $0x91,%xmm8,%xmm0",},
+{{0x44, 0x0f, 0x3a, 0xcc, 0xc7, 0x91, }, 6, 0, "", "",
+"44 0f 3a cc c7 91 \tsha1rnds4 $0x91,%xmm7,%xmm8",},
+{{0x45, 0x0f, 0x3a, 0xcc, 0xc7, 0x91, }, 6, 0, "", "",
+"45 0f 3a cc c7 91 \tsha1rnds4 $0x91,%xmm15,%xmm8",},
+{{0x0f, 0x3a, 0xcc, 0x00, 0x91, }, 5, 0, "", "",
+"0f 3a cc 00 91 \tsha1rnds4 $0x91,(%rax),%xmm0",},
+{{0x41, 0x0f, 0x3a, 0xcc, 0x00, 0x91, }, 6, 0, "", "",
+"41 0f 3a cc 00 91 \tsha1rnds4 $0x91,(%r8),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 04 25 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678,%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x18, 0x91, }, 5, 0, "", "",
+"0f 3a cc 18 91 \tsha1rnds4 $0x91,(%rax),%xmm3",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x01, 0x91, }, 6, 0, "", "",
+"0f 3a cc 04 01 91 \tsha1rnds4 $0x91,(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 04 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(,%rax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0x08, 0x91, }, 6, 0, "", "",
+"0f 3a cc 04 08 91 \tsha1rnds4 $0x91,(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x04, 0xc8, 0x91, }, 6, 0, "", "",
+"0f 3a cc 04 c8 91 \tsha1rnds4 $0x91,(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x40, 0x12, 0x91, }, 6, 0, "", "",
+"0f 3a cc 40 12 91 \tsha1rnds4 $0x91,0x12(%rax),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x45, 0x12, 0x91, }, 6, 0, "", "",
+"0f 3a cc 45 12 91 \tsha1rnds4 $0x91,0x12(%rbp),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0x01, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 01 12 91 \tsha1rnds4 $0x91,0x12(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0x05, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 05 12 91 \tsha1rnds4 $0x91,0x12(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0x08, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 08 12 91 \tsha1rnds4 $0x91,0x12(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x44, 0xc8, 0x12, 0x91, }, 7, 0, "", "",
+"0f 3a cc 44 c8 12 91 \tsha1rnds4 $0x91,0x12(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "",
+"0f 3a cc 80 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, 0x91, }, 9, 0, "", "",
+"0f 3a cc 85 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rbp),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 01 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 05 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 08 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x3a, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x91, }, 10, 0, "", "",
+"0f 3a cc 84 c8 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax,%rcx,8),%xmm0",},
+{{0x44, 0x0f, 0x3a, 0xcc, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, 0x91, }, 11, 0, "", "",
+"44 0f 3a cc bc c8 78 56 34 12 91 \tsha1rnds4 $0x91,0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x0f, 0x38, 0xc8, 0xc1, }, 4, 0, "", "",
+"0f 38 c8 c1 \tsha1nexte %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xc8, 0xd7, }, 4, 0, "", "",
+"0f 38 c8 d7 \tsha1nexte %xmm7,%xmm2",},
+{{0x41, 0x0f, 0x38, 0xc8, 0xc0, }, 5, 0, "", "",
+"41 0f 38 c8 c0 \tsha1nexte %xmm8,%xmm0",},
+{{0x44, 0x0f, 0x38, 0xc8, 0xc7, }, 5, 0, "", "",
+"44 0f 38 c8 c7 \tsha1nexte %xmm7,%xmm8",},
+{{0x45, 0x0f, 0x38, 0xc8, 0xc7, }, 5, 0, "", "",
+"45 0f 38 c8 c7 \tsha1nexte %xmm15,%xmm8",},
+{{0x0f, 0x38, 0xc8, 0x00, }, 4, 0, "", "",
+"0f 38 c8 00 \tsha1nexte (%rax),%xmm0",},
+{{0x41, 0x0f, 0x38, 0xc8, 0x00, }, 5, 0, "", "",
+"41 0f 38 c8 00 \tsha1nexte (%r8),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 04 25 78 56 34 12 \tsha1nexte 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x18, }, 4, 0, "", "",
+"0f 38 c8 18 \tsha1nexte (%rax),%xmm3",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 c8 04 01 \tsha1nexte (%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 04 05 78 56 34 12 \tsha1nexte 0x12345678(,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 c8 04 08 \tsha1nexte (%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 c8 04 c8 \tsha1nexte (%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 c8 40 12 \tsha1nexte 0x12(%rax),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 c8 45 12 \tsha1nexte 0x12(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 01 12 \tsha1nexte 0x12(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 05 12 \tsha1nexte 0x12(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 08 12 \tsha1nexte 0x12(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 c8 44 c8 12 \tsha1nexte 0x12(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c8 80 78 56 34 12 \tsha1nexte 0x12345678(%rax),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c8 85 78 56 34 12 \tsha1nexte 0x12345678(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 01 78 56 34 12 \tsha1nexte 0x12345678(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 05 78 56 34 12 \tsha1nexte 0x12345678(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 08 78 56 34 12 \tsha1nexte 0x12345678(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xc8, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c8 84 c8 78 56 34 12 \tsha1nexte 0x12345678(%rax,%rcx,8),%xmm0",},
+{{0x44, 0x0f, 0x38, 0xc8, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"44 0f 38 c8 bc c8 78 56 34 12 \tsha1nexte 0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x0f, 0x38, 0xc9, 0xc1, }, 4, 0, "", "",
+"0f 38 c9 c1 \tsha1msg1 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xc9, 0xd7, }, 4, 0, "", "",
+"0f 38 c9 d7 \tsha1msg1 %xmm7,%xmm2",},
+{{0x41, 0x0f, 0x38, 0xc9, 0xc0, }, 5, 0, "", "",
+"41 0f 38 c9 c0 \tsha1msg1 %xmm8,%xmm0",},
+{{0x44, 0x0f, 0x38, 0xc9, 0xc7, }, 5, 0, "", "",
+"44 0f 38 c9 c7 \tsha1msg1 %xmm7,%xmm8",},
+{{0x45, 0x0f, 0x38, 0xc9, 0xc7, }, 5, 0, "", "",
+"45 0f 38 c9 c7 \tsha1msg1 %xmm15,%xmm8",},
+{{0x0f, 0x38, 0xc9, 0x00, }, 4, 0, "", "",
+"0f 38 c9 00 \tsha1msg1 (%rax),%xmm0",},
+{{0x41, 0x0f, 0x38, 0xc9, 0x00, }, 5, 0, "", "",
+"41 0f 38 c9 00 \tsha1msg1 (%r8),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 04 25 78 56 34 12 \tsha1msg1 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x18, }, 4, 0, "", "",
+"0f 38 c9 18 \tsha1msg1 (%rax),%xmm3",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 c9 04 01 \tsha1msg1 (%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 04 05 78 56 34 12 \tsha1msg1 0x12345678(,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 c9 04 08 \tsha1msg1 (%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 c9 04 c8 \tsha1msg1 (%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 c9 40 12 \tsha1msg1 0x12(%rax),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 c9 45 12 \tsha1msg1 0x12(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 01 12 \tsha1msg1 0x12(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 05 12 \tsha1msg1 0x12(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 08 12 \tsha1msg1 0x12(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 c9 44 c8 12 \tsha1msg1 0x12(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c9 80 78 56 34 12 \tsha1msg1 0x12345678(%rax),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 c9 85 78 56 34 12 \tsha1msg1 0x12345678(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 01 78 56 34 12 \tsha1msg1 0x12345678(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 05 78 56 34 12 \tsha1msg1 0x12345678(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 08 78 56 34 12 \tsha1msg1 0x12345678(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xc9, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 c9 84 c8 78 56 34 12 \tsha1msg1 0x12345678(%rax,%rcx,8),%xmm0",},
+{{0x44, 0x0f, 0x38, 0xc9, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"44 0f 38 c9 bc c8 78 56 34 12 \tsha1msg1 0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x0f, 0x38, 0xca, 0xc1, }, 4, 0, "", "",
+"0f 38 ca c1 \tsha1msg2 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xca, 0xd7, }, 4, 0, "", "",
+"0f 38 ca d7 \tsha1msg2 %xmm7,%xmm2",},
+{{0x41, 0x0f, 0x38, 0xca, 0xc0, }, 5, 0, "", "",
+"41 0f 38 ca c0 \tsha1msg2 %xmm8,%xmm0",},
+{{0x44, 0x0f, 0x38, 0xca, 0xc7, }, 5, 0, "", "",
+"44 0f 38 ca c7 \tsha1msg2 %xmm7,%xmm8",},
+{{0x45, 0x0f, 0x38, 0xca, 0xc7, }, 5, 0, "", "",
+"45 0f 38 ca c7 \tsha1msg2 %xmm15,%xmm8",},
+{{0x0f, 0x38, 0xca, 0x00, }, 4, 0, "", "",
+"0f 38 ca 00 \tsha1msg2 (%rax),%xmm0",},
+{{0x41, 0x0f, 0x38, 0xca, 0x00, }, 5, 0, "", "",
+"41 0f 38 ca 00 \tsha1msg2 (%r8),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 04 25 78 56 34 12 \tsha1msg2 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xca, 0x18, }, 4, 0, "", "",
+"0f 38 ca 18 \tsha1msg2 (%rax),%xmm3",},
+{{0x0f, 0x38, 0xca, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 ca 04 01 \tsha1msg2 (%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 04 05 78 56 34 12 \tsha1msg2 0x12345678(,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 ca 04 08 \tsha1msg2 (%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 ca 04 c8 \tsha1msg2 (%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 ca 40 12 \tsha1msg2 0x12(%rax),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 ca 45 12 \tsha1msg2 0x12(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 01 12 \tsha1msg2 0x12(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 05 12 \tsha1msg2 0x12(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 08 12 \tsha1msg2 0x12(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 ca 44 c8 12 \tsha1msg2 0x12(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 ca 80 78 56 34 12 \tsha1msg2 0x12345678(%rax),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 ca 85 78 56 34 12 \tsha1msg2 0x12345678(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 01 78 56 34 12 \tsha1msg2 0x12345678(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 05 78 56 34 12 \tsha1msg2 0x12345678(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 08 78 56 34 12 \tsha1msg2 0x12345678(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xca, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 ca 84 c8 78 56 34 12 \tsha1msg2 0x12345678(%rax,%rcx,8),%xmm0",},
+{{0x44, 0x0f, 0x38, 0xca, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"44 0f 38 ca bc c8 78 56 34 12 \tsha1msg2 0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x0f, 0x38, 0xcb, 0xcc, }, 4, 0, "", "",
+"0f 38 cb cc \tsha256rnds2 %xmm0,%xmm4,%xmm1",},
+{{0x0f, 0x38, 0xcb, 0xd7, }, 4, 0, "", "",
+"0f 38 cb d7 \tsha256rnds2 %xmm0,%xmm7,%xmm2",},
+{{0x41, 0x0f, 0x38, 0xcb, 0xc8, }, 5, 0, "", "",
+"41 0f 38 cb c8 \tsha256rnds2 %xmm0,%xmm8,%xmm1",},
+{{0x44, 0x0f, 0x38, 0xcb, 0xc7, }, 5, 0, "", "",
+"44 0f 38 cb c7 \tsha256rnds2 %xmm0,%xmm7,%xmm8",},
+{{0x45, 0x0f, 0x38, 0xcb, 0xc7, }, 5, 0, "", "",
+"45 0f 38 cb c7 \tsha256rnds2 %xmm0,%xmm15,%xmm8",},
+{{0x0f, 0x38, 0xcb, 0x08, }, 4, 0, "", "",
+"0f 38 cb 08 \tsha256rnds2 %xmm0,(%rax),%xmm1",},
+{{0x41, 0x0f, 0x38, 0xcb, 0x08, }, 5, 0, "", "",
+"41 0f 38 cb 08 \tsha256rnds2 %xmm0,(%r8),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 0c 25 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678,%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x18, }, 4, 0, "", "",
+"0f 38 cb 18 \tsha256rnds2 %xmm0,(%rax),%xmm3",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x01, }, 5, 0, "", "",
+"0f 38 cb 0c 01 \tsha256rnds2 %xmm0,(%rcx,%rax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 0c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(,%rax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0x08, }, 5, 0, "", "",
+"0f 38 cb 0c 08 \tsha256rnds2 %xmm0,(%rax,%rcx,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x0c, 0xc8, }, 5, 0, "", "",
+"0f 38 cb 0c c8 \tsha256rnds2 %xmm0,(%rax,%rcx,8),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x48, 0x12, }, 5, 0, "", "",
+"0f 38 cb 48 12 \tsha256rnds2 %xmm0,0x12(%rax),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4d, 0x12, }, 5, 0, "", "",
+"0f 38 cb 4d 12 \tsha256rnds2 %xmm0,0x12(%rbp),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c 01 12 \tsha256rnds2 %xmm0,0x12(%rcx,%rax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c 05 12 \tsha256rnds2 %xmm0,0x12(%rbp,%rax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c 08 12 \tsha256rnds2 %xmm0,0x12(%rax,%rcx,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x4c, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 cb 4c c8 12 \tsha256rnds2 %xmm0,0x12(%rax,%rcx,8),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x88, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cb 88 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cb 8d 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rbp),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c 01 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rcx,%rax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c 05 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rbp,%rax,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c 08 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax,%rcx,1),%xmm1",},
+{{0x0f, 0x38, 0xcb, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cb 8c c8 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax,%rcx,8),%xmm1",},
+{{0x44, 0x0f, 0x38, 0xcb, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"44 0f 38 cb bc c8 78 56 34 12 \tsha256rnds2 %xmm0,0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x0f, 0x38, 0xcc, 0xc1, }, 4, 0, "", "",
+"0f 38 cc c1 \tsha256msg1 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xcc, 0xd7, }, 4, 0, "", "",
+"0f 38 cc d7 \tsha256msg1 %xmm7,%xmm2",},
+{{0x41, 0x0f, 0x38, 0xcc, 0xc0, }, 5, 0, "", "",
+"41 0f 38 cc c0 \tsha256msg1 %xmm8,%xmm0",},
+{{0x44, 0x0f, 0x38, 0xcc, 0xc7, }, 5, 0, "", "",
+"44 0f 38 cc c7 \tsha256msg1 %xmm7,%xmm8",},
+{{0x45, 0x0f, 0x38, 0xcc, 0xc7, }, 5, 0, "", "",
+"45 0f 38 cc c7 \tsha256msg1 %xmm15,%xmm8",},
+{{0x0f, 0x38, 0xcc, 0x00, }, 4, 0, "", "",
+"0f 38 cc 00 \tsha256msg1 (%rax),%xmm0",},
+{{0x41, 0x0f, 0x38, 0xcc, 0x00, }, 5, 0, "", "",
+"41 0f 38 cc 00 \tsha256msg1 (%r8),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 04 25 78 56 34 12 \tsha256msg1 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x18, }, 4, 0, "", "",
+"0f 38 cc 18 \tsha256msg1 (%rax),%xmm3",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 cc 04 01 \tsha256msg1 (%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 04 05 78 56 34 12 \tsha256msg1 0x12345678(,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 cc 04 08 \tsha256msg1 (%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 cc 04 c8 \tsha256msg1 (%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 cc 40 12 \tsha256msg1 0x12(%rax),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 cc 45 12 \tsha256msg1 0x12(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 01 12 \tsha256msg1 0x12(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 05 12 \tsha256msg1 0x12(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 08 12 \tsha256msg1 0x12(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 cc 44 c8 12 \tsha256msg1 0x12(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cc 80 78 56 34 12 \tsha256msg1 0x12345678(%rax),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cc 85 78 56 34 12 \tsha256msg1 0x12345678(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 01 78 56 34 12 \tsha256msg1 0x12345678(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 05 78 56 34 12 \tsha256msg1 0x12345678(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 08 78 56 34 12 \tsha256msg1 0x12345678(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xcc, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cc 84 c8 78 56 34 12 \tsha256msg1 0x12345678(%rax,%rcx,8),%xmm0",},
+{{0x44, 0x0f, 0x38, 0xcc, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"44 0f 38 cc bc c8 78 56 34 12 \tsha256msg1 0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x0f, 0x38, 0xcd, 0xc1, }, 4, 0, "", "",
+"0f 38 cd c1 \tsha256msg2 %xmm1,%xmm0",},
+{{0x0f, 0x38, 0xcd, 0xd7, }, 4, 0, "", "",
+"0f 38 cd d7 \tsha256msg2 %xmm7,%xmm2",},
+{{0x41, 0x0f, 0x38, 0xcd, 0xc0, }, 5, 0, "", "",
+"41 0f 38 cd c0 \tsha256msg2 %xmm8,%xmm0",},
+{{0x44, 0x0f, 0x38, 0xcd, 0xc7, }, 5, 0, "", "",
+"44 0f 38 cd c7 \tsha256msg2 %xmm7,%xmm8",},
+{{0x45, 0x0f, 0x38, 0xcd, 0xc7, }, 5, 0, "", "",
+"45 0f 38 cd c7 \tsha256msg2 %xmm15,%xmm8",},
+{{0x0f, 0x38, 0xcd, 0x00, }, 4, 0, "", "",
+"0f 38 cd 00 \tsha256msg2 (%rax),%xmm0",},
+{{0x41, 0x0f, 0x38, 0xcd, 0x00, }, 5, 0, "", "",
+"41 0f 38 cd 00 \tsha256msg2 (%r8),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 04 25 78 56 34 12 \tsha256msg2 0x12345678,%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x18, }, 4, 0, "", "",
+"0f 38 cd 18 \tsha256msg2 (%rax),%xmm3",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x01, }, 5, 0, "", "",
+"0f 38 cd 04 01 \tsha256msg2 (%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 04 05 78 56 34 12 \tsha256msg2 0x12345678(,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0x08, }, 5, 0, "", "",
+"0f 38 cd 04 08 \tsha256msg2 (%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x04, 0xc8, }, 5, 0, "", "",
+"0f 38 cd 04 c8 \tsha256msg2 (%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x40, 0x12, }, 5, 0, "", "",
+"0f 38 cd 40 12 \tsha256msg2 0x12(%rax),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x45, 0x12, }, 5, 0, "", "",
+"0f 38 cd 45 12 \tsha256msg2 0x12(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0x01, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 01 12 \tsha256msg2 0x12(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0x05, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 05 12 \tsha256msg2 0x12(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0x08, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 08 12 \tsha256msg2 0x12(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x44, 0xc8, 0x12, }, 6, 0, "", "",
+"0f 38 cd 44 c8 12 \tsha256msg2 0x12(%rax,%rcx,8),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x80, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cd 80 78 56 34 12 \tsha256msg2 0x12345678(%rax),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x85, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 cd 85 78 56 34 12 \tsha256msg2 0x12345678(%rbp),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0x01, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 01 78 56 34 12 \tsha256msg2 0x12345678(%rcx,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0x05, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 05 78 56 34 12 \tsha256msg2 0x12345678(%rbp,%rax,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0x08, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 08 78 56 34 12 \tsha256msg2 0x12345678(%rax,%rcx,1),%xmm0",},
+{{0x0f, 0x38, 0xcd, 0x84, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 cd 84 c8 78 56 34 12 \tsha256msg2 0x12345678(%rax,%rcx,8),%xmm0",},
+{{0x44, 0x0f, 0x38, 0xcd, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"44 0f 38 cd bc c8 78 56 34 12 \tsha256msg2 0x12345678(%rax,%rcx,8),%xmm15",},
+{{0x66, 0x0f, 0xae, 0x38, }, 4, 0, "", "",
+"66 0f ae 38 \tclflushopt (%rax)",},
+{{0x66, 0x41, 0x0f, 0xae, 0x38, }, 5, 0, "", "",
+"66 41 0f ae 38 \tclflushopt (%r8)",},
+{{0x66, 0x0f, 0xae, 0x3c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f ae 3c 25 78 56 34 12 \tclflushopt 0x12345678",},
+{{0x66, 0x0f, 0xae, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f ae bc c8 78 56 34 12 \tclflushopt 0x12345678(%rax,%rcx,8)",},
+{{0x66, 0x41, 0x0f, 0xae, 0xbc, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"66 41 0f ae bc c8 78 56 34 12 \tclflushopt 0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0xae, 0x38, }, 3, 0, "", "",
+"0f ae 38 \tclflush (%rax)",},
+{{0x41, 0x0f, 0xae, 0x38, }, 4, 0, "", "",
+"41 0f ae 38 \tclflush (%r8)",},
+{{0x0f, 0xae, 0xf8, }, 3, 0, "", "",
+"0f ae f8 \tsfence ",},
+{{0x66, 0x0f, 0xae, 0x30, }, 4, 0, "", "",
+"66 0f ae 30 \tclwb (%rax)",},
+{{0x66, 0x41, 0x0f, 0xae, 0x30, }, 5, 0, "", "",
+"66 41 0f ae 30 \tclwb (%r8)",},
+{{0x66, 0x0f, 0xae, 0x34, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f ae 34 25 78 56 34 12 \tclwb 0x12345678",},
+{{0x66, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f ae b4 c8 78 56 34 12 \tclwb 0x12345678(%rax,%rcx,8)",},
+{{0x66, 0x41, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"66 41 0f ae b4 c8 78 56 34 12 \tclwb 0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0xae, 0x30, }, 3, 0, "", "",
+"0f ae 30 \txsaveopt (%rax)",},
+{{0x41, 0x0f, 0xae, 0x30, }, 4, 0, "", "",
+"41 0f ae 30 \txsaveopt (%r8)",},
+{{0x0f, 0xae, 0xf0, }, 3, 0, "", "",
+"0f ae f0 \tmfence ",},
+{{0x0f, 0xc7, 0x20, }, 3, 0, "", "",
+"0f c7 20 \txsavec (%rax)",},
+{{0x41, 0x0f, 0xc7, 0x20, }, 4, 0, "", "",
+"41 0f c7 20 \txsavec (%r8)",},
+{{0x0f, 0xc7, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 24 25 78 56 34 12 \txsavec 0x12345678",},
+{{0x0f, 0xc7, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 a4 c8 78 56 34 12 \txsavec 0x12345678(%rax,%rcx,8)",},
+{{0x41, 0x0f, 0xc7, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"41 0f c7 a4 c8 78 56 34 12 \txsavec 0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0xc7, 0x28, }, 3, 0, "", "",
+"0f c7 28 \txsaves (%rax)",},
+{{0x41, 0x0f, 0xc7, 0x28, }, 4, 0, "", "",
+"41 0f c7 28 \txsaves (%r8)",},
+{{0x0f, 0xc7, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 2c 25 78 56 34 12 \txsaves 0x12345678",},
+{{0x0f, 0xc7, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 ac c8 78 56 34 12 \txsaves 0x12345678(%rax,%rcx,8)",},
+{{0x41, 0x0f, 0xc7, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"41 0f c7 ac c8 78 56 34 12 \txsaves 0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0xc7, 0x18, }, 3, 0, "", "",
+"0f c7 18 \txrstors (%rax)",},
+{{0x41, 0x0f, 0xc7, 0x18, }, 4, 0, "", "",
+"41 0f c7 18 \txrstors (%r8)",},
+{{0x0f, 0xc7, 0x1c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 1c 25 78 56 34 12 \txrstors 0x12345678",},
+{{0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%rax,%rcx,8)",},
+{{0x41, 0x0f, 0xc7, 0x9c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"41 0f c7 9c c8 78 56 34 12 \txrstors 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20 \tptwritel (%rax)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 41 0f ae 20 \tptwritel (%r8)",},
+{{0xf3, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae 24 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0xae, 0x20, }, 4, 0, "", "",
+"f3 0f ae 20 \tptwritel (%rax)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 41 0f ae 20 \tptwritel (%r8)",},
+{{0xf3, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae 24 25 78 56 34 12 \tptwritel 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f ae a4 c8 78 56 34 12 \tptwritel 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x48, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 48 0f ae 20 \tptwriteq (%rax)",},
+{{0xf3, 0x49, 0x0f, 0xae, 0x20, }, 5, 0, "", "",
+"f3 49 0f ae 20 \tptwriteq (%r8)",},
+{{0xf3, 0x48, 0x0f, 0xae, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 48 0f ae 24 25 78 56 34 12 \tptwriteq 0x12345678",},
+{{0xf3, 0x48, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 48 0f ae a4 c8 78 56 34 12 \tptwriteq 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x49, 0x0f, 0xae, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 49 0f ae a4 c8 78 56 34 12 \tptwriteq 0x12345678(%r8,%rcx,8)",},
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
new file mode 100644
index 000000000..891415b10
--- /dev/null
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -0,0 +1,2693 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file contains instructions for testing by the test titled:
+ *
+ * "Test x86 instruction decoder - new instructions"
+ *
+ * Note that the 'Expecting' comment lines are consumed by the
+ * gen-insn-x86-dat.awk script and have the format:
+ *
+ * Expecting: <op> <branch> <rel>
+ *
+ * If this file is changed, remember to run the gen-insn-x86-dat.sh
+ * script and commit the result.
+ *
+ * Refer to insn-x86.c for more details.
+ */
+
+int main(void)
+{
+ /* Following line is a marker for the awk script - do not change */
+ asm volatile("rdtsc"); /* Start here */
+
+ /* Test fix for vcvtph2ps in x86-opcode-map.txt */
+
+ asm volatile("vcvtph2ps %xmm3,%ymm5");
+
+#ifdef __x86_64__
+
+ /* AVX-512: Instructions with the same op codes as Mask Instructions */
+
+ asm volatile("cmovno %rax,%rbx");
+ asm volatile("cmovno 0x12345678(%rax),%rcx");
+ asm volatile("cmovno 0x12345678(%rax),%cx");
+
+ asm volatile("cmove %rax,%rbx");
+ asm volatile("cmove 0x12345678(%rax),%rcx");
+ asm volatile("cmove 0x12345678(%rax),%cx");
+
+ asm volatile("seto 0x12345678(%rax)");
+ asm volatile("setno 0x12345678(%rax)");
+ asm volatile("setb 0x12345678(%rax)");
+ asm volatile("setc 0x12345678(%rax)");
+ asm volatile("setnae 0x12345678(%rax)");
+ asm volatile("setae 0x12345678(%rax)");
+ asm volatile("setnb 0x12345678(%rax)");
+ asm volatile("setnc 0x12345678(%rax)");
+ asm volatile("sets 0x12345678(%rax)");
+ asm volatile("setns 0x12345678(%rax)");
+
+ /* AVX-512: Mask Instructions */
+
+ asm volatile("kandw %k7,%k6,%k5");
+ asm volatile("kandq %k7,%k6,%k5");
+ asm volatile("kandb %k7,%k6,%k5");
+ asm volatile("kandd %k7,%k6,%k5");
+
+ asm volatile("kandnw %k7,%k6,%k5");
+ asm volatile("kandnq %k7,%k6,%k5");
+ asm volatile("kandnb %k7,%k6,%k5");
+ asm volatile("kandnd %k7,%k6,%k5");
+
+ asm volatile("knotw %k7,%k6");
+ asm volatile("knotq %k7,%k6");
+ asm volatile("knotb %k7,%k6");
+ asm volatile("knotd %k7,%k6");
+
+ asm volatile("korw %k7,%k6,%k5");
+ asm volatile("korq %k7,%k6,%k5");
+ asm volatile("korb %k7,%k6,%k5");
+ asm volatile("kord %k7,%k6,%k5");
+
+ asm volatile("kxnorw %k7,%k6,%k5");
+ asm volatile("kxnorq %k7,%k6,%k5");
+ asm volatile("kxnorb %k7,%k6,%k5");
+ asm volatile("kxnord %k7,%k6,%k5");
+
+ asm volatile("kxorw %k7,%k6,%k5");
+ asm volatile("kxorq %k7,%k6,%k5");
+ asm volatile("kxorb %k7,%k6,%k5");
+ asm volatile("kxord %k7,%k6,%k5");
+
+ asm volatile("kaddw %k7,%k6,%k5");
+ asm volatile("kaddq %k7,%k6,%k5");
+ asm volatile("kaddb %k7,%k6,%k5");
+ asm volatile("kaddd %k7,%k6,%k5");
+
+ asm volatile("kunpckbw %k7,%k6,%k5");
+ asm volatile("kunpckwd %k7,%k6,%k5");
+ asm volatile("kunpckdq %k7,%k6,%k5");
+
+ asm volatile("kmovw %k6,%k5");
+ asm volatile("kmovw (%rcx),%k5");
+ asm volatile("kmovw 0x123(%rax,%r14,8),%k5");
+ asm volatile("kmovw %k5,(%rcx)");
+ asm volatile("kmovw %k5,0x123(%rax,%r14,8)");
+ asm volatile("kmovw %eax,%k5");
+ asm volatile("kmovw %ebp,%k5");
+ asm volatile("kmovw %r13d,%k5");
+ asm volatile("kmovw %k5,%eax");
+ asm volatile("kmovw %k5,%ebp");
+ asm volatile("kmovw %k5,%r13d");
+
+ asm volatile("kmovq %k6,%k5");
+ asm volatile("kmovq (%rcx),%k5");
+ asm volatile("kmovq 0x123(%rax,%r14,8),%k5");
+ asm volatile("kmovq %k5,(%rcx)");
+ asm volatile("kmovq %k5,0x123(%rax,%r14,8)");
+ asm volatile("kmovq %rax,%k5");
+ asm volatile("kmovq %rbp,%k5");
+ asm volatile("kmovq %r13,%k5");
+ asm volatile("kmovq %k5,%rax");
+ asm volatile("kmovq %k5,%rbp");
+ asm volatile("kmovq %k5,%r13");
+
+ asm volatile("kmovb %k6,%k5");
+ asm volatile("kmovb (%rcx),%k5");
+ asm volatile("kmovb 0x123(%rax,%r14,8),%k5");
+ asm volatile("kmovb %k5,(%rcx)");
+ asm volatile("kmovb %k5,0x123(%rax,%r14,8)");
+ asm volatile("kmovb %eax,%k5");
+ asm volatile("kmovb %ebp,%k5");
+ asm volatile("kmovb %r13d,%k5");
+ asm volatile("kmovb %k5,%eax");
+ asm volatile("kmovb %k5,%ebp");
+ asm volatile("kmovb %k5,%r13d");
+
+ asm volatile("kmovd %k6,%k5");
+ asm volatile("kmovd (%rcx),%k5");
+ asm volatile("kmovd 0x123(%rax,%r14,8),%k5");
+ asm volatile("kmovd %k5,(%rcx)");
+ asm volatile("kmovd %k5,0x123(%rax,%r14,8)");
+ asm volatile("kmovd %eax,%k5");
+ asm volatile("kmovd %ebp,%k5");
+ asm volatile("kmovd %r13d,%k5");
+ asm volatile("kmovd %k5,%eax");
+ asm volatile("kmovd %k5,%ebp");
+ asm volatile("kmovd %k5,%r13d");
+
+ asm volatile("kortestw %k6,%k5");
+ asm volatile("kortestq %k6,%k5");
+ asm volatile("kortestb %k6,%k5");
+ asm volatile("kortestd %k6,%k5");
+
+ asm volatile("ktestw %k6,%k5");
+ asm volatile("ktestq %k6,%k5");
+ asm volatile("ktestb %k6,%k5");
+ asm volatile("ktestd %k6,%k5");
+
+ asm volatile("kshiftrw $0x12,%k6,%k5");
+ asm volatile("kshiftrq $0x5b,%k6,%k5");
+ asm volatile("kshiftlw $0x12,%k6,%k5");
+ asm volatile("kshiftlq $0x5b,%k6,%k5");
+
+ /* AVX-512: Op code 0f 5b */
+ asm volatile("vcvtdq2ps %xmm5,%xmm6");
+ asm volatile("vcvtqq2ps %zmm29,%ymm6{%k7}");
+ asm volatile("vcvtps2dq %xmm5,%xmm6");
+ asm volatile("vcvttps2dq %xmm5,%xmm6");
+
+ /* AVX-512: Op code 0f 6f */
+
+ asm volatile("movq %mm0,%mm4");
+ asm volatile("vmovdqa %ymm4,%ymm6");
+ asm volatile("vmovdqa32 %zmm25,%zmm26");
+ asm volatile("vmovdqa64 %zmm25,%zmm26");
+ asm volatile("vmovdqu %ymm4,%ymm6");
+ asm volatile("vmovdqu32 %zmm29,%zmm30");
+ asm volatile("vmovdqu64 %zmm25,%zmm26");
+ asm volatile("vmovdqu8 %zmm29,%zmm30");
+ asm volatile("vmovdqu16 %zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f 78 */
+
+ asm volatile("vmread %rax,%rbx");
+ asm volatile("vcvttps2udq %zmm25,%zmm26");
+ asm volatile("vcvttpd2udq %zmm29,%ymm6{%k7}");
+ asm volatile("vcvttsd2usi %xmm6,%rax");
+ asm volatile("vcvttss2usi %xmm6,%rax");
+ asm volatile("vcvttps2uqq %ymm5,%zmm26{%k7}");
+ asm volatile("vcvttpd2uqq %zmm29,%zmm30");
+
+ /* AVX-512: Op code 0f 79 */
+
+ asm volatile("vmwrite %rax,%rbx");
+ asm volatile("vcvtps2udq %zmm25,%zmm26");
+ asm volatile("vcvtpd2udq %zmm29,%ymm6{%k7}");
+ asm volatile("vcvtsd2usi %xmm6,%rax");
+ asm volatile("vcvtss2usi %xmm6,%rax");
+ asm volatile("vcvtps2uqq %ymm5,%zmm26{%k7}");
+ asm volatile("vcvtpd2uqq %zmm29,%zmm30");
+
+ /* AVX-512: Op code 0f 7a */
+
+ asm volatile("vcvtudq2pd %ymm5,%zmm29{%k7}");
+ asm volatile("vcvtuqq2pd %zmm25,%zmm26");
+ asm volatile("vcvtudq2ps %zmm29,%zmm30");
+ asm volatile("vcvtuqq2ps %zmm25,%ymm26{%k7}");
+ asm volatile("vcvttps2qq %ymm25,%zmm26{%k7}");
+ asm volatile("vcvttpd2qq %zmm29,%zmm30");
+
+ /* AVX-512: Op code 0f 7b */
+
+ asm volatile("vcvtusi2sd %eax,%xmm5,%xmm6");
+ asm volatile("vcvtusi2ss %eax,%xmm5,%xmm6");
+ asm volatile("vcvtps2qq %ymm5,%zmm26{%k7}");
+ asm volatile("vcvtpd2qq %zmm29,%zmm30");
+
+ /* AVX-512: Op code 0f 7f */
+
+ asm volatile("movq.s %mm0,%mm4");
+ asm volatile("vmovdqa %ymm8,%ymm6");
+ asm volatile("vmovdqa32.s %zmm25,%zmm26");
+ asm volatile("vmovdqa64.s %zmm25,%zmm26");
+ asm volatile("vmovdqu %ymm8,%ymm6");
+ asm volatile("vmovdqu32.s %zmm25,%zmm26");
+ asm volatile("vmovdqu64.s %zmm25,%zmm26");
+ asm volatile("vmovdqu8.s %zmm30,(%rcx)");
+ asm volatile("vmovdqu16.s %zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f db */
+
+ asm volatile("pand %mm1,%mm2");
+ asm volatile("pand %xmm1,%xmm2");
+ asm volatile("vpand %ymm4,%ymm6,%ymm2");
+ asm volatile("vpandd %zmm24,%zmm25,%zmm26");
+ asm volatile("vpandq %zmm24,%zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f df */
+
+ asm volatile("pandn %mm1,%mm2");
+ asm volatile("pandn %xmm1,%xmm2");
+ asm volatile("vpandn %ymm4,%ymm6,%ymm2");
+ asm volatile("vpandnd %zmm24,%zmm25,%zmm26");
+ asm volatile("vpandnq %zmm24,%zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f e6 */
+
+ asm volatile("vcvttpd2dq %xmm1,%xmm2");
+ asm volatile("vcvtdq2pd %xmm5,%xmm6");
+ asm volatile("vcvtdq2pd %ymm5,%zmm26{%k7}");
+ asm volatile("vcvtqq2pd %zmm25,%zmm26");
+ asm volatile("vcvtpd2dq %xmm1,%xmm2");
+
+ /* AVX-512: Op code 0f eb */
+
+ asm volatile("por %mm4,%mm6");
+ asm volatile("vpor %ymm4,%ymm6,%ymm2");
+ asm volatile("vpord %zmm24,%zmm25,%zmm26");
+ asm volatile("vporq %zmm24,%zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f ef */
+
+ asm volatile("pxor %mm4,%mm6");
+ asm volatile("vpxor %ymm4,%ymm6,%ymm2");
+ asm volatile("vpxord %zmm24,%zmm25,%zmm26");
+ asm volatile("vpxorq %zmm24,%zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f 38 10 */
+
+ asm volatile("pblendvb %xmm1,%xmm0");
+ asm volatile("vpsrlvw %zmm27,%zmm28,%zmm29");
+ asm volatile("vpmovuswb %zmm28,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 11 */
+
+ asm volatile("vpmovusdb %zmm28,%xmm6{%k7}");
+ asm volatile("vpsravw %zmm27,%zmm28,%zmm29");
+
+ /* AVX-512: Op code 0f 38 12 */
+
+ asm volatile("vpmovusqb %zmm27,%xmm6{%k7}");
+ asm volatile("vpsllvw %zmm27,%zmm28,%zmm29");
+
+ /* AVX-512: Op code 0f 38 13 */
+
+ asm volatile("vcvtph2ps %xmm3,%ymm5");
+ asm volatile("vcvtph2ps %ymm5,%zmm27{%k7}");
+ asm volatile("vpmovusdw %zmm27,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 14 */
+
+ asm volatile("blendvps %xmm1,%xmm0");
+ asm volatile("vpmovusqw %zmm27,%xmm6{%k7}");
+ asm volatile("vprorvd %zmm27,%zmm28,%zmm29");
+ asm volatile("vprorvq %zmm27,%zmm28,%zmm29");
+
+ /* AVX-512: Op code 0f 38 15 */
+
+ asm volatile("blendvpd %xmm1,%xmm0");
+ asm volatile("vpmovusqd %zmm27,%ymm6{%k7}");
+ asm volatile("vprolvd %zmm27,%zmm28,%zmm29");
+ asm volatile("vprolvq %zmm27,%zmm28,%zmm29");
+
+ /* AVX-512: Op code 0f 38 16 */
+
+ asm volatile("vpermps %ymm4,%ymm6,%ymm2");
+ asm volatile("vpermps %ymm24,%ymm26,%ymm22{%k7}");
+ asm volatile("vpermpd %ymm24,%ymm26,%ymm22{%k7}");
+
+ /* AVX-512: Op code 0f 38 19 */
+
+ asm volatile("vbroadcastsd %xmm4,%ymm6");
+ asm volatile("vbroadcastf32x2 %xmm27,%zmm26");
+
+ /* AVX-512: Op code 0f 38 1a */
+
+ asm volatile("vbroadcastf128 (%rcx),%ymm4");
+ asm volatile("vbroadcastf32x4 (%rcx),%zmm26");
+ asm volatile("vbroadcastf64x2 (%rcx),%zmm26");
+
+ /* AVX-512: Op code 0f 38 1b */
+
+ asm volatile("vbroadcastf32x8 (%rcx),%zmm27");
+ asm volatile("vbroadcastf64x4 (%rcx),%zmm26");
+
+ /* AVX-512: Op code 0f 38 1f */
+
+ asm volatile("vpabsq %zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 20 */
+
+ asm volatile("vpmovsxbw %xmm4,%xmm5");
+ asm volatile("vpmovswb %zmm27,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 21 */
+
+ asm volatile("vpmovsxbd %xmm4,%ymm6");
+ asm volatile("vpmovsdb %zmm27,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 22 */
+
+ asm volatile("vpmovsxbq %xmm4,%ymm4");
+ asm volatile("vpmovsqb %zmm27,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 23 */
+
+ asm volatile("vpmovsxwd %xmm4,%ymm4");
+ asm volatile("vpmovsdw %zmm27,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 24 */
+
+ asm volatile("vpmovsxwq %xmm4,%ymm6");
+ asm volatile("vpmovsqw %zmm27,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 25 */
+
+ asm volatile("vpmovsxdq %xmm4,%ymm4");
+ asm volatile("vpmovsqd %zmm27,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 26 */
+
+ asm volatile("vptestmb %zmm27,%zmm28,%k5");
+ asm volatile("vptestmw %zmm27,%zmm28,%k5");
+ asm volatile("vptestnmb %zmm26,%zmm27,%k5");
+ asm volatile("vptestnmw %zmm26,%zmm27,%k5");
+
+ /* AVX-512: Op code 0f 38 27 */
+
+ asm volatile("vptestmd %zmm27,%zmm28,%k5");
+ asm volatile("vptestmq %zmm27,%zmm28,%k5");
+ asm volatile("vptestnmd %zmm26,%zmm27,%k5");
+ asm volatile("vptestnmq %zmm26,%zmm27,%k5");
+
+ /* AVX-512: Op code 0f 38 28 */
+
+ asm volatile("vpmuldq %ymm4,%ymm6,%ymm2");
+ asm volatile("vpmovm2b %k5,%zmm28");
+ asm volatile("vpmovm2w %k5,%zmm28");
+
+ /* AVX-512: Op code 0f 38 29 */
+
+ asm volatile("vpcmpeqq %ymm4,%ymm6,%ymm2");
+ asm volatile("vpmovb2m %zmm28,%k5");
+ asm volatile("vpmovw2m %zmm28,%k5");
+
+ /* AVX-512: Op code 0f 38 2a */
+
+ asm volatile("vmovntdqa (%rcx),%ymm4");
+ asm volatile("vpbroadcastmb2q %k6,%zmm30");
+
+ /* AVX-512: Op code 0f 38 2c */
+
+ asm volatile("vmaskmovps (%rcx),%ymm4,%ymm6");
+ asm volatile("vscalefps %zmm24,%zmm25,%zmm26");
+ asm volatile("vscalefpd %zmm24,%zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f 38 2d */
+
+ asm volatile("vmaskmovpd (%rcx),%ymm4,%ymm6");
+ asm volatile("vscalefss %xmm24,%xmm25,%xmm26{%k7}");
+ asm volatile("vscalefsd %xmm24,%xmm25,%xmm26{%k7}");
+
+ /* AVX-512: Op code 0f 38 30 */
+
+ asm volatile("vpmovzxbw %xmm4,%ymm4");
+ asm volatile("vpmovwb %zmm27,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 31 */
+
+ asm volatile("vpmovzxbd %xmm4,%ymm6");
+ asm volatile("vpmovdb %zmm27,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 32 */
+
+ asm volatile("vpmovzxbq %xmm4,%ymm4");
+ asm volatile("vpmovqb %zmm27,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 33 */
+
+ asm volatile("vpmovzxwd %xmm4,%ymm4");
+ asm volatile("vpmovdw %zmm27,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 34 */
+
+ asm volatile("vpmovzxwq %xmm4,%ymm6");
+ asm volatile("vpmovqw %zmm27,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 35 */
+
+ asm volatile("vpmovzxdq %xmm4,%ymm4");
+ asm volatile("vpmovqd %zmm27,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 38 */
+
+ asm volatile("vpermd %ymm4,%ymm6,%ymm2");
+ asm volatile("vpermd %ymm24,%ymm26,%ymm22{%k7}");
+ asm volatile("vpermq %ymm24,%ymm26,%ymm22{%k7}");
+
+ /* AVX-512: Op code 0f 38 38 */
+
+ asm volatile("vpminsb %ymm4,%ymm6,%ymm2");
+ asm volatile("vpmovm2d %k5,%zmm28");
+ asm volatile("vpmovm2q %k5,%zmm28");
+
+ /* AVX-512: Op code 0f 38 39 */
+
+ asm volatile("vpminsd %xmm1,%xmm2,%xmm3");
+ asm volatile("vpminsd %zmm24,%zmm25,%zmm26");
+ asm volatile("vpminsq %zmm24,%zmm25,%zmm26");
+ asm volatile("vpmovd2m %zmm28,%k5");
+ asm volatile("vpmovq2m %zmm28,%k5");
+
+ /* AVX-512: Op code 0f 38 3a */
+
+ asm volatile("vpminuw %ymm4,%ymm6,%ymm2");
+ asm volatile("vpbroadcastmw2d %k6,%zmm28");
+
+ /* AVX-512: Op code 0f 38 3b */
+
+ asm volatile("vpminud %ymm4,%ymm6,%ymm2");
+ asm volatile("vpminud %zmm24,%zmm25,%zmm26");
+ asm volatile("vpminuq %zmm24,%zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f 38 3d */
+
+ asm volatile("vpmaxsd %ymm4,%ymm6,%ymm2");
+ asm volatile("vpmaxsd %zmm24,%zmm25,%zmm26");
+ asm volatile("vpmaxsq %zmm24,%zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f 38 3f */
+
+ asm volatile("vpmaxud %ymm4,%ymm6,%ymm2");
+ asm volatile("vpmaxud %zmm24,%zmm25,%zmm26");
+ asm volatile("vpmaxuq %zmm24,%zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f 38 42 */
+
+ asm volatile("vpmulld %ymm4,%ymm6,%ymm2");
+ asm volatile("vpmulld %zmm24,%zmm25,%zmm26");
+ asm volatile("vpmullq %zmm24,%zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f 38 42 */
+
+ asm volatile("vgetexpps %zmm25,%zmm26");
+ asm volatile("vgetexppd %zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 43 */
+
+ asm volatile("vgetexpss %xmm24,%xmm25,%xmm26{%k7}");
+ asm volatile("vgetexpsd %xmm28,%xmm29,%xmm30{%k7}");
+
+ /* AVX-512: Op code 0f 38 44 */
+
+ asm volatile("vplzcntd %zmm27,%zmm28");
+ asm volatile("vplzcntq %zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 46 */
+
+ asm volatile("vpsravd %ymm4,%ymm6,%ymm2");
+ asm volatile("vpsravd %zmm24,%zmm25,%zmm26");
+ asm volatile("vpsravq %zmm24,%zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f 38 4c */
+
+ asm volatile("vrcp14ps %zmm25,%zmm26");
+ asm volatile("vrcp14pd %zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 4d */
+
+ asm volatile("vrcp14ss %xmm24,%xmm25,%xmm26{%k7}");
+ asm volatile("vrcp14sd %xmm24,%xmm25,%xmm26{%k7}");
+
+ /* AVX-512: Op code 0f 38 4e */
+
+ asm volatile("vrsqrt14ps %zmm25,%zmm26");
+ asm volatile("vrsqrt14pd %zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 4f */
+
+ asm volatile("vrsqrt14ss %xmm24,%xmm25,%xmm26{%k7}");
+ asm volatile("vrsqrt14sd %xmm24,%xmm25,%xmm26{%k7}");
+
+ /* AVX-512: Op code 0f 38 59 */
+
+ asm volatile("vpbroadcastq %xmm4,%xmm6");
+ asm volatile("vbroadcasti32x2 %xmm27,%zmm26");
+
+ /* AVX-512: Op code 0f 38 5a */
+
+ asm volatile("vbroadcasti128 (%rcx),%ymm4");
+ asm volatile("vbroadcasti32x4 (%rcx),%zmm26");
+ asm volatile("vbroadcasti64x2 (%rcx),%zmm26");
+
+ /* AVX-512: Op code 0f 38 5b */
+
+ asm volatile("vbroadcasti32x8 (%rcx),%zmm28");
+ asm volatile("vbroadcasti64x4 (%rcx),%zmm26");
+
+ /* AVX-512: Op code 0f 38 64 */
+
+ asm volatile("vpblendmd %zmm26,%zmm27,%zmm28");
+ asm volatile("vpblendmq %zmm26,%zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 65 */
+
+ asm volatile("vblendmps %zmm24,%zmm25,%zmm26");
+ asm volatile("vblendmpd %zmm26,%zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 66 */
+
+ asm volatile("vpblendmb %zmm26,%zmm27,%zmm28");
+ asm volatile("vpblendmw %zmm26,%zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 75 */
+
+ asm volatile("vpermi2b %zmm24,%zmm25,%zmm26");
+ asm volatile("vpermi2w %zmm26,%zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 76 */
+
+ asm volatile("vpermi2d %zmm26,%zmm27,%zmm28");
+ asm volatile("vpermi2q %zmm26,%zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 77 */
+
+ asm volatile("vpermi2ps %zmm26,%zmm27,%zmm28");
+ asm volatile("vpermi2pd %zmm26,%zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 7a */
+
+ asm volatile("vpbroadcastb %eax,%xmm30");
+
+ /* AVX-512: Op code 0f 38 7b */
+
+ asm volatile("vpbroadcastw %eax,%xmm30");
+
+ /* AVX-512: Op code 0f 38 7c */
+
+ asm volatile("vpbroadcastd %eax,%xmm30");
+ asm volatile("vpbroadcastq %rax,%zmm30");
+
+ /* AVX-512: Op code 0f 38 7d */
+
+ asm volatile("vpermt2b %zmm26,%zmm27,%zmm28");
+ asm volatile("vpermt2w %zmm26,%zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 7e */
+
+ asm volatile("vpermt2d %zmm26,%zmm27,%zmm28");
+ asm volatile("vpermt2q %zmm26,%zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 7f */
+
+ asm volatile("vpermt2ps %zmm26,%zmm27,%zmm28");
+ asm volatile("vpermt2pd %zmm26,%zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 83 */
+
+ asm volatile("vpmultishiftqb %zmm26,%zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 88 */
+
+ asm volatile("vexpandps (%rcx),%zmm26");
+ asm volatile("vexpandpd (%rcx),%zmm28");
+
+ /* AVX-512: Op code 0f 38 89 */
+
+ asm volatile("vpexpandd (%rcx),%zmm28");
+ asm volatile("vpexpandq (%rcx),%zmm26");
+
+ /* AVX-512: Op code 0f 38 8a */
+
+ asm volatile("vcompressps %zmm28,(%rcx)");
+ asm volatile("vcompresspd %zmm28,(%rcx)");
+
+ /* AVX-512: Op code 0f 38 8b */
+
+ asm volatile("vpcompressd %zmm28,(%rcx)");
+ asm volatile("vpcompressq %zmm26,(%rcx)");
+
+ /* AVX-512: Op code 0f 38 8d */
+
+ asm volatile("vpermb %zmm26,%zmm27,%zmm28");
+ asm volatile("vpermw %zmm26,%zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 90 */
+
+ asm volatile("vpgatherdd %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
+ asm volatile("vpgatherdq %xmm2,0x04(%rbp,%xmm7,2),%xmm1");
+ asm volatile("vpgatherdd 0x7b(%rbp,%zmm27,8),%zmm26{%k1}");
+ asm volatile("vpgatherdq 0x7b(%rbp,%ymm27,8),%zmm26{%k1}");
+
+ /* AVX-512: Op code 0f 38 91 */
+
+ asm volatile("vpgatherqd %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
+ asm volatile("vpgatherqq %xmm2,0x02(%rbp,%xmm7,2),%xmm1");
+ asm volatile("vpgatherqd 0x7b(%rbp,%zmm27,8),%ymm26{%k1}");
+ asm volatile("vpgatherqq 0x7b(%rbp,%zmm27,8),%zmm26{%k1}");
+
+ /* AVX-512: Op code 0f 38 a0 */
+
+ asm volatile("vpscatterdd %zmm28,0x7b(%rbp,%zmm29,8){%k1}");
+ asm volatile("vpscatterdq %zmm26,0x7b(%rbp,%ymm27,8){%k1}");
+
+ /* AVX-512: Op code 0f 38 a1 */
+
+ asm volatile("vpscatterqd %ymm6,0x7b(%rbp,%zmm29,8){%k1}");
+ asm volatile("vpscatterqq %ymm6,0x7b(%rbp,%ymm27,8){%k1}");
+
+ /* AVX-512: Op code 0f 38 a2 */
+
+ asm volatile("vscatterdps %zmm28,0x7b(%rbp,%zmm29,8){%k1}");
+ asm volatile("vscatterdpd %zmm28,0x7b(%rbp,%ymm27,8){%k1}");
+
+ /* AVX-512: Op code 0f 38 a3 */
+
+ asm volatile("vscatterqps %ymm6,0x7b(%rbp,%zmm29,8){%k1}");
+ asm volatile("vscatterqpd %zmm28,0x7b(%rbp,%zmm29,8){%k1}");
+
+ /* AVX-512: Op code 0f 38 b4 */
+
+ asm volatile("vpmadd52luq %zmm26,%zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 b5 */
+
+ asm volatile("vpmadd52huq %zmm26,%zmm27,%zmm28");
+
+ /* AVX-512: Op code 0f 38 c4 */
+
+ asm volatile("vpconflictd %zmm26,%zmm27");
+ asm volatile("vpconflictq %zmm26,%zmm27");
+
+ /* AVX-512: Op code 0f 38 c8 */
+
+ asm volatile("vexp2ps %zmm29,%zmm30");
+ asm volatile("vexp2pd %zmm26,%zmm27");
+
+ /* AVX-512: Op code 0f 38 ca */
+
+ asm volatile("vrcp28ps %zmm29,%zmm30");
+ asm volatile("vrcp28pd %zmm26,%zmm27");
+
+ /* AVX-512: Op code 0f 38 cb */
+
+ asm volatile("vrcp28ss %xmm28,%xmm29,%xmm30{%k7}");
+ asm volatile("vrcp28sd %xmm25,%xmm26,%xmm27{%k7}");
+
+ /* AVX-512: Op code 0f 38 cc */
+
+ asm volatile("vrsqrt28ps %zmm29,%zmm30");
+ asm volatile("vrsqrt28pd %zmm26,%zmm27");
+
+ /* AVX-512: Op code 0f 38 cd */
+
+ asm volatile("vrsqrt28ss %xmm28,%xmm29,%xmm30{%k7}");
+ asm volatile("vrsqrt28sd %xmm25,%xmm26,%xmm27{%k7}");
+
+ /* AVX-512: Op code 0f 3a 03 */
+
+ asm volatile("valignd $0x12,%zmm28,%zmm29,%zmm30");
+ asm volatile("valignq $0x12,%zmm25,%zmm26,%zmm27");
+
+ /* AVX-512: Op code 0f 3a 08 */
+
+ asm volatile("vroundps $0x5,%ymm6,%ymm2");
+ asm volatile("vrndscaleps $0x12,%zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f 3a 09 */
+
+ asm volatile("vroundpd $0x5,%ymm6,%ymm2");
+ asm volatile("vrndscalepd $0x12,%zmm25,%zmm26");
+
+ /* AVX-512: Op code 0f 3a 1a */
+
+ asm volatile("vroundss $0x5,%xmm4,%xmm6,%xmm2");
+ asm volatile("vrndscaless $0x12,%xmm24,%xmm25,%xmm26{%k7}");
+
+ /* AVX-512: Op code 0f 3a 0b */
+
+ asm volatile("vroundsd $0x5,%xmm4,%xmm6,%xmm2");
+ asm volatile("vrndscalesd $0x12,%xmm24,%xmm25,%xmm26{%k7}");
+
+ /* AVX-512: Op code 0f 3a 18 */
+
+ asm volatile("vinsertf128 $0x5,%xmm4,%ymm4,%ymm6");
+ asm volatile("vinsertf32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+ asm volatile("vinsertf64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+
+ /* AVX-512: Op code 0f 3a 19 */
+
+ asm volatile("vextractf128 $0x5,%ymm4,%xmm4");
+ asm volatile("vextractf32x4 $0x12,%zmm25,%xmm26{%k7}");
+ asm volatile("vextractf64x2 $0x12,%zmm25,%xmm26{%k7}");
+
+ /* AVX-512: Op code 0f 3a 1a */
+
+ asm volatile("vinsertf32x8 $0x12,%ymm25,%zmm26,%zmm27{%k7}");
+ asm volatile("vinsertf64x4 $0x12,%ymm28,%zmm29,%zmm30{%k7}");
+
+ /* AVX-512: Op code 0f 3a 1b */
+
+ asm volatile("vextractf32x8 $0x12,%zmm29,%ymm30{%k7}");
+ asm volatile("vextractf64x4 $0x12,%zmm26,%ymm27{%k7}");
+
+ /* AVX-512: Op code 0f 3a 1e */
+
+ asm volatile("vpcmpud $0x12,%zmm29,%zmm30,%k5");
+ asm volatile("vpcmpuq $0x12,%zmm26,%zmm27,%k5");
+
+ /* AVX-512: Op code 0f 3a 1f */
+
+ asm volatile("vpcmpd $0x12,%zmm29,%zmm30,%k5");
+ asm volatile("vpcmpq $0x12,%zmm26,%zmm27,%k5");
+
+ /* AVX-512: Op code 0f 3a 23 */
+
+ asm volatile("vshuff32x4 $0x12,%zmm28,%zmm29,%zmm30");
+ asm volatile("vshuff64x2 $0x12,%zmm25,%zmm26,%zmm27");
+
+ /* AVX-512: Op code 0f 3a 25 */
+
+ asm volatile("vpternlogd $0x12,%zmm28,%zmm29,%zmm30");
+ asm volatile("vpternlogq $0x12,%zmm28,%zmm29,%zmm30");
+
+ /* AVX-512: Op code 0f 3a 26 */
+
+ asm volatile("vgetmantps $0x12,%zmm26,%zmm27");
+ asm volatile("vgetmantpd $0x12,%zmm29,%zmm30");
+
+ /* AVX-512: Op code 0f 3a 27 */
+
+ asm volatile("vgetmantss $0x12,%xmm25,%xmm26,%xmm27{%k7}");
+ asm volatile("vgetmantsd $0x12,%xmm28,%xmm29,%xmm30{%k7}");
+
+ /* AVX-512: Op code 0f 3a 38 */
+
+ asm volatile("vinserti128 $0x5,%xmm4,%ymm4,%ymm6");
+ asm volatile("vinserti32x4 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+ asm volatile("vinserti64x2 $0x12,%xmm24,%zmm25,%zmm26{%k7}");
+
+ /* AVX-512: Op code 0f 3a 39 */
+
+ asm volatile("vextracti128 $0x5,%ymm4,%xmm6");
+ asm volatile("vextracti32x4 $0x12,%zmm25,%xmm26{%k7}");
+ asm volatile("vextracti64x2 $0x12,%zmm25,%xmm26{%k7}");
+
+ /* AVX-512: Op code 0f 3a 3a */
+
+ asm volatile("vinserti32x8 $0x12,%ymm28,%zmm29,%zmm30{%k7}");
+ asm volatile("vinserti64x4 $0x12,%ymm25,%zmm26,%zmm27{%k7}");
+
+ /* AVX-512: Op code 0f 3a 3b */
+
+ asm volatile("vextracti32x8 $0x12,%zmm29,%ymm30{%k7}");
+ asm volatile("vextracti64x4 $0x12,%zmm26,%ymm27{%k7}");
+
+ /* AVX-512: Op code 0f 3a 3e */
+
+ asm volatile("vpcmpub $0x12,%zmm29,%zmm30,%k5");
+ asm volatile("vpcmpuw $0x12,%zmm26,%zmm27,%k5");
+
+ /* AVX-512: Op code 0f 3a 3f */
+
+ asm volatile("vpcmpb $0x12,%zmm29,%zmm30,%k5");
+ asm volatile("vpcmpw $0x12,%zmm26,%zmm27,%k5");
+
+ /* AVX-512: Op code 0f 3a 43 */
+
+ asm volatile("vmpsadbw $0x5,%ymm4,%ymm6,%ymm2");
+ asm volatile("vdbpsadbw $0x12,%zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 3a 43 */
+
+ asm volatile("vshufi32x4 $0x12,%zmm25,%zmm26,%zmm27");
+ asm volatile("vshufi64x2 $0x12,%zmm28,%zmm29,%zmm30");
+
+ /* AVX-512: Op code 0f 3a 50 */
+
+ asm volatile("vrangeps $0x12,%zmm25,%zmm26,%zmm27");
+ asm volatile("vrangepd $0x12,%zmm28,%zmm29,%zmm30");
+
+ /* AVX-512: Op code 0f 3a 51 */
+
+ asm volatile("vrangess $0x12,%xmm25,%xmm26,%xmm27");
+ asm volatile("vrangesd $0x12,%xmm28,%xmm29,%xmm30");
+
+ /* AVX-512: Op code 0f 3a 54 */
+
+ asm volatile("vfixupimmps $0x12,%zmm28,%zmm29,%zmm30");
+ asm volatile("vfixupimmpd $0x12,%zmm25,%zmm26,%zmm27");
+
+ /* AVX-512: Op code 0f 3a 55 */
+
+ asm volatile("vfixupimmss $0x12,%xmm28,%xmm29,%xmm30{%k7}");
+ asm volatile("vfixupimmsd $0x12,%xmm25,%xmm26,%xmm27{%k7}");
+
+ /* AVX-512: Op code 0f 3a 56 */
+
+ asm volatile("vreduceps $0x12,%zmm26,%zmm27");
+ asm volatile("vreducepd $0x12,%zmm29,%zmm30");
+
+ /* AVX-512: Op code 0f 3a 57 */
+
+ asm volatile("vreducess $0x12,%xmm25,%xmm26,%xmm27");
+ asm volatile("vreducesd $0x12,%xmm28,%xmm29,%xmm30");
+
+ /* AVX-512: Op code 0f 3a 66 */
+
+ asm volatile("vfpclassps $0x12,%zmm27,%k5");
+ asm volatile("vfpclasspd $0x12,%zmm30,%k5");
+
+ /* AVX-512: Op code 0f 3a 67 */
+
+ asm volatile("vfpclassss $0x12,%xmm27,%k5");
+ asm volatile("vfpclasssd $0x12,%xmm30,%k5");
+
+ /* AVX-512: Op code 0f 72 (Grp13) */
+
+ asm volatile("vprord $0x12,%zmm25,%zmm26");
+ asm volatile("vprorq $0x12,%zmm25,%zmm26");
+ asm volatile("vprold $0x12,%zmm29,%zmm30");
+ asm volatile("vprolq $0x12,%zmm29,%zmm30");
+ asm volatile("psrad $0x2,%mm6");
+ asm volatile("vpsrad $0x5,%ymm6,%ymm2");
+ asm volatile("vpsrad $0x5,%zmm26,%zmm22");
+ asm volatile("vpsraq $0x5,%zmm26,%zmm22");
+
+ /* AVX-512: Op code 0f 38 c6 (Grp18) */
+
+ asm volatile("vgatherpf0dps 0x7b(%r14,%zmm31,8){%k1}");
+ asm volatile("vgatherpf0dpd 0x7b(%r14,%ymm31,8){%k1}");
+ asm volatile("vgatherpf1dps 0x7b(%r14,%zmm31,8){%k1}");
+ asm volatile("vgatherpf1dpd 0x7b(%r14,%ymm31,8){%k1}");
+ asm volatile("vscatterpf0dps 0x7b(%r14,%zmm31,8){%k1}");
+ asm volatile("vscatterpf0dpd 0x7b(%r14,%ymm31,8){%k1}");
+ asm volatile("vscatterpf1dps 0x7b(%r14,%zmm31,8){%k1}");
+ asm volatile("vscatterpf1dpd 0x7b(%r14,%ymm31,8){%k1}");
+
+ /* AVX-512: Op code 0f 38 c7 (Grp19) */
+
+ asm volatile("vgatherpf0qps 0x7b(%r14,%zmm31,8){%k1}");
+ asm volatile("vgatherpf0qpd 0x7b(%r14,%zmm31,8){%k1}");
+ asm volatile("vgatherpf1qps 0x7b(%r14,%zmm31,8){%k1}");
+ asm volatile("vgatherpf1qpd 0x7b(%r14,%zmm31,8){%k1}");
+ asm volatile("vscatterpf0qps 0x7b(%r14,%zmm31,8){%k1}");
+ asm volatile("vscatterpf0qpd 0x7b(%r14,%zmm31,8){%k1}");
+ asm volatile("vscatterpf1qps 0x7b(%r14,%zmm31,8){%k1}");
+ asm volatile("vscatterpf1qpd 0x7b(%r14,%zmm31,8){%k1}");
+
+ /* AVX-512: Examples */
+
+ asm volatile("vaddpd %zmm28,%zmm29,%zmm30");
+ asm volatile("vaddpd %zmm28,%zmm29,%zmm30{%k7}");
+ asm volatile("vaddpd %zmm28,%zmm29,%zmm30{%k7}{z}");
+ asm volatile("vaddpd {rn-sae},%zmm28,%zmm29,%zmm30");
+ asm volatile("vaddpd {ru-sae},%zmm28,%zmm29,%zmm30");
+ asm volatile("vaddpd {rd-sae},%zmm28,%zmm29,%zmm30");
+ asm volatile("vaddpd {rz-sae},%zmm28,%zmm29,%zmm30");
+ asm volatile("vaddpd (%rcx),%zmm29,%zmm30");
+ asm volatile("vaddpd 0x123(%rax,%r14,8),%zmm29,%zmm30");
+ asm volatile("vaddpd (%rcx){1to8},%zmm29,%zmm30");
+ asm volatile("vaddpd 0x1fc0(%rdx),%zmm29,%zmm30");
+ asm volatile("vaddpd 0x3f8(%rdx){1to8},%zmm29,%zmm30");
+ asm volatile("vcmpeq_uqps 0x1fc(%rdx){1to16},%zmm30,%k5");
+ asm volatile("vcmpltsd 0x123(%rax,%r14,8),%xmm29,%k5{%k7}");
+ asm volatile("vcmplesd {sae},%xmm28,%xmm29,%k5{%k7}");
+ asm volatile("vgetmantss $0x5b,0x123(%rax,%r14,8),%xmm29,%xmm30{%k7}");
+
+ /* bndmk m64, bnd */
+
+ asm volatile("bndmk (%rax), %bnd0");
+ asm volatile("bndmk (%r8), %bnd0");
+ asm volatile("bndmk (0x12345678), %bnd0");
+ asm volatile("bndmk (%rax), %bnd3");
+ asm volatile("bndmk (%rcx,%rax,1), %bnd0");
+ asm volatile("bndmk 0x12345678(,%rax,1), %bnd0");
+ asm volatile("bndmk (%rax,%rcx,1), %bnd0");
+ asm volatile("bndmk (%rax,%rcx,8), %bnd0");
+ asm volatile("bndmk 0x12(%rax), %bnd0");
+ asm volatile("bndmk 0x12(%rbp), %bnd0");
+ asm volatile("bndmk 0x12(%rcx,%rax,1), %bnd0");
+ asm volatile("bndmk 0x12(%rbp,%rax,1), %bnd0");
+ asm volatile("bndmk 0x12(%rax,%rcx,1), %bnd0");
+ asm volatile("bndmk 0x12(%rax,%rcx,8), %bnd0");
+ asm volatile("bndmk 0x12345678(%rax), %bnd0");
+ asm volatile("bndmk 0x12345678(%rbp), %bnd0");
+ asm volatile("bndmk 0x12345678(%rcx,%rax,1), %bnd0");
+ asm volatile("bndmk 0x12345678(%rbp,%rax,1), %bnd0");
+ asm volatile("bndmk 0x12345678(%rax,%rcx,1), %bnd0");
+ asm volatile("bndmk 0x12345678(%rax,%rcx,8), %bnd0");
+
+ /* bndcl r/m64, bnd */
+
+ asm volatile("bndcl (%rax), %bnd0");
+ asm volatile("bndcl (%r8), %bnd0");
+ asm volatile("bndcl (0x12345678), %bnd0");
+ asm volatile("bndcl (%rax), %bnd3");
+ asm volatile("bndcl (%rcx,%rax,1), %bnd0");
+ asm volatile("bndcl 0x12345678(,%rax,1), %bnd0");
+ asm volatile("bndcl (%rax,%rcx,1), %bnd0");
+ asm volatile("bndcl (%rax,%rcx,8), %bnd0");
+ asm volatile("bndcl 0x12(%rax), %bnd0");
+ asm volatile("bndcl 0x12(%rbp), %bnd0");
+ asm volatile("bndcl 0x12(%rcx,%rax,1), %bnd0");
+ asm volatile("bndcl 0x12(%rbp,%rax,1), %bnd0");
+ asm volatile("bndcl 0x12(%rax,%rcx,1), %bnd0");
+ asm volatile("bndcl 0x12(%rax,%rcx,8), %bnd0");
+ asm volatile("bndcl 0x12345678(%rax), %bnd0");
+ asm volatile("bndcl 0x12345678(%rbp), %bnd0");
+ asm volatile("bndcl 0x12345678(%rcx,%rax,1), %bnd0");
+ asm volatile("bndcl 0x12345678(%rbp,%rax,1), %bnd0");
+ asm volatile("bndcl 0x12345678(%rax,%rcx,1), %bnd0");
+ asm volatile("bndcl 0x12345678(%rax,%rcx,8), %bnd0");
+ asm volatile("bndcl %rax, %bnd0");
+
+ /* bndcu r/m64, bnd */
+
+ asm volatile("bndcu (%rax), %bnd0");
+ asm volatile("bndcu (%r8), %bnd0");
+ asm volatile("bndcu (0x12345678), %bnd0");
+ asm volatile("bndcu (%rax), %bnd3");
+ asm volatile("bndcu (%rcx,%rax,1), %bnd0");
+ asm volatile("bndcu 0x12345678(,%rax,1), %bnd0");
+ asm volatile("bndcu (%rax,%rcx,1), %bnd0");
+ asm volatile("bndcu (%rax,%rcx,8), %bnd0");
+ asm volatile("bndcu 0x12(%rax), %bnd0");
+ asm volatile("bndcu 0x12(%rbp), %bnd0");
+ asm volatile("bndcu 0x12(%rcx,%rax,1), %bnd0");
+ asm volatile("bndcu 0x12(%rbp,%rax,1), %bnd0");
+ asm volatile("bndcu 0x12(%rax,%rcx,1), %bnd0");
+ asm volatile("bndcu 0x12(%rax,%rcx,8), %bnd0");
+ asm volatile("bndcu 0x12345678(%rax), %bnd0");
+ asm volatile("bndcu 0x12345678(%rbp), %bnd0");
+ asm volatile("bndcu 0x12345678(%rcx,%rax,1), %bnd0");
+ asm volatile("bndcu 0x12345678(%rbp,%rax,1), %bnd0");
+ asm volatile("bndcu 0x12345678(%rax,%rcx,1), %bnd0");
+ asm volatile("bndcu 0x12345678(%rax,%rcx,8), %bnd0");
+ asm volatile("bndcu %rax, %bnd0");
+
+ /* bndcn r/m64, bnd */
+
+ asm volatile("bndcn (%rax), %bnd0");
+ asm volatile("bndcn (%r8), %bnd0");
+ asm volatile("bndcn (0x12345678), %bnd0");
+ asm volatile("bndcn (%rax), %bnd3");
+ asm volatile("bndcn (%rcx,%rax,1), %bnd0");
+ asm volatile("bndcn 0x12345678(,%rax,1), %bnd0");
+ asm volatile("bndcn (%rax,%rcx,1), %bnd0");
+ asm volatile("bndcn (%rax,%rcx,8), %bnd0");
+ asm volatile("bndcn 0x12(%rax), %bnd0");
+ asm volatile("bndcn 0x12(%rbp), %bnd0");
+ asm volatile("bndcn 0x12(%rcx,%rax,1), %bnd0");
+ asm volatile("bndcn 0x12(%rbp,%rax,1), %bnd0");
+ asm volatile("bndcn 0x12(%rax,%rcx,1), %bnd0");
+ asm volatile("bndcn 0x12(%rax,%rcx,8), %bnd0");
+ asm volatile("bndcn 0x12345678(%rax), %bnd0");
+ asm volatile("bndcn 0x12345678(%rbp), %bnd0");
+ asm volatile("bndcn 0x12345678(%rcx,%rax,1), %bnd0");
+ asm volatile("bndcn 0x12345678(%rbp,%rax,1), %bnd0");
+ asm volatile("bndcn 0x12345678(%rax,%rcx,1), %bnd0");
+ asm volatile("bndcn 0x12345678(%rax,%rcx,8), %bnd0");
+ asm volatile("bndcn %rax, %bnd0");
+
+ /* bndmov m128, bnd */
+
+ asm volatile("bndmov (%rax), %bnd0");
+ asm volatile("bndmov (%r8), %bnd0");
+ asm volatile("bndmov (0x12345678), %bnd0");
+ asm volatile("bndmov (%rax), %bnd3");
+ asm volatile("bndmov (%rcx,%rax,1), %bnd0");
+ asm volatile("bndmov 0x12345678(,%rax,1), %bnd0");
+ asm volatile("bndmov (%rax,%rcx,1), %bnd0");
+ asm volatile("bndmov (%rax,%rcx,8), %bnd0");
+ asm volatile("bndmov 0x12(%rax), %bnd0");
+ asm volatile("bndmov 0x12(%rbp), %bnd0");
+ asm volatile("bndmov 0x12(%rcx,%rax,1), %bnd0");
+ asm volatile("bndmov 0x12(%rbp,%rax,1), %bnd0");
+ asm volatile("bndmov 0x12(%rax,%rcx,1), %bnd0");
+ asm volatile("bndmov 0x12(%rax,%rcx,8), %bnd0");
+ asm volatile("bndmov 0x12345678(%rax), %bnd0");
+ asm volatile("bndmov 0x12345678(%rbp), %bnd0");
+ asm volatile("bndmov 0x12345678(%rcx,%rax,1), %bnd0");
+ asm volatile("bndmov 0x12345678(%rbp,%rax,1), %bnd0");
+ asm volatile("bndmov 0x12345678(%rax,%rcx,1), %bnd0");
+ asm volatile("bndmov 0x12345678(%rax,%rcx,8), %bnd0");
+
+ /* bndmov bnd, m128 */
+
+ asm volatile("bndmov %bnd0, (%rax)");
+ asm volatile("bndmov %bnd0, (%r8)");
+ asm volatile("bndmov %bnd0, (0x12345678)");
+ asm volatile("bndmov %bnd3, (%rax)");
+ asm volatile("bndmov %bnd0, (%rcx,%rax,1)");
+ asm volatile("bndmov %bnd0, 0x12345678(,%rax,1)");
+ asm volatile("bndmov %bnd0, (%rax,%rcx,1)");
+ asm volatile("bndmov %bnd0, (%rax,%rcx,8)");
+ asm volatile("bndmov %bnd0, 0x12(%rax)");
+ asm volatile("bndmov %bnd0, 0x12(%rbp)");
+ asm volatile("bndmov %bnd0, 0x12(%rcx,%rax,1)");
+ asm volatile("bndmov %bnd0, 0x12(%rbp,%rax,1)");
+ asm volatile("bndmov %bnd0, 0x12(%rax,%rcx,1)");
+ asm volatile("bndmov %bnd0, 0x12(%rax,%rcx,8)");
+ asm volatile("bndmov %bnd0, 0x12345678(%rax)");
+ asm volatile("bndmov %bnd0, 0x12345678(%rbp)");
+ asm volatile("bndmov %bnd0, 0x12345678(%rcx,%rax,1)");
+ asm volatile("bndmov %bnd0, 0x12345678(%rbp,%rax,1)");
+ asm volatile("bndmov %bnd0, 0x12345678(%rax,%rcx,1)");
+ asm volatile("bndmov %bnd0, 0x12345678(%rax,%rcx,8)");
+
+ /* bndmov bnd2, bnd1 */
+
+ asm volatile("bndmov %bnd0, %bnd1");
+ asm volatile("bndmov %bnd1, %bnd0");
+
+ /* bndldx mib, bnd */
+
+ asm volatile("bndldx (%rax), %bnd0");
+ asm volatile("bndldx (%r8), %bnd0");
+ asm volatile("bndldx (0x12345678), %bnd0");
+ asm volatile("bndldx (%rax), %bnd3");
+ asm volatile("bndldx (%rcx,%rax,1), %bnd0");
+ asm volatile("bndldx 0x12345678(,%rax,1), %bnd0");
+ asm volatile("bndldx (%rax,%rcx,1), %bnd0");
+ asm volatile("bndldx 0x12(%rax), %bnd0");
+ asm volatile("bndldx 0x12(%rbp), %bnd0");
+ asm volatile("bndldx 0x12(%rcx,%rax,1), %bnd0");
+ asm volatile("bndldx 0x12(%rbp,%rax,1), %bnd0");
+ asm volatile("bndldx 0x12(%rax,%rcx,1), %bnd0");
+ asm volatile("bndldx 0x12345678(%rax), %bnd0");
+ asm volatile("bndldx 0x12345678(%rbp), %bnd0");
+ asm volatile("bndldx 0x12345678(%rcx,%rax,1), %bnd0");
+ asm volatile("bndldx 0x12345678(%rbp,%rax,1), %bnd0");
+ asm volatile("bndldx 0x12345678(%rax,%rcx,1), %bnd0");
+
+ /* bndstx bnd, mib */
+
+ asm volatile("bndstx %bnd0, (%rax)");
+ asm volatile("bndstx %bnd0, (%r8)");
+ asm volatile("bndstx %bnd0, (0x12345678)");
+ asm volatile("bndstx %bnd3, (%rax)");
+ asm volatile("bndstx %bnd0, (%rcx,%rax,1)");
+ asm volatile("bndstx %bnd0, 0x12345678(,%rax,1)");
+ asm volatile("bndstx %bnd0, (%rax,%rcx,1)");
+ asm volatile("bndstx %bnd0, 0x12(%rax)");
+ asm volatile("bndstx %bnd0, 0x12(%rbp)");
+ asm volatile("bndstx %bnd0, 0x12(%rcx,%rax,1)");
+ asm volatile("bndstx %bnd0, 0x12(%rbp,%rax,1)");
+ asm volatile("bndstx %bnd0, 0x12(%rax,%rcx,1)");
+ asm volatile("bndstx %bnd0, 0x12345678(%rax)");
+ asm volatile("bndstx %bnd0, 0x12345678(%rbp)");
+ asm volatile("bndstx %bnd0, 0x12345678(%rcx,%rax,1)");
+ asm volatile("bndstx %bnd0, 0x12345678(%rbp,%rax,1)");
+ asm volatile("bndstx %bnd0, 0x12345678(%rax,%rcx,1)");
+
+ /* bnd prefix on call, ret, jmp and all jcc */
+
+ asm volatile("bnd call label1"); /* Expecting: call unconditional 0 */
+ asm volatile("bnd call *(%eax)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd ret"); /* Expecting: ret indirect 0 */
+ asm volatile("bnd jmp label1"); /* Expecting: jmp unconditional 0 */
+ asm volatile("bnd jmp label1"); /* Expecting: jmp unconditional 0 */
+ asm volatile("bnd jmp *(%ecx)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jne label1"); /* Expecting: jcc conditional 0 */
+
+ /* sha1rnds4 imm8, xmm2/m128, xmm1 */
+
+ asm volatile("sha1rnds4 $0x0, %xmm1, %xmm0");
+ asm volatile("sha1rnds4 $0x91, %xmm7, %xmm2");
+ asm volatile("sha1rnds4 $0x91, %xmm8, %xmm0");
+ asm volatile("sha1rnds4 $0x91, %xmm7, %xmm8");
+ asm volatile("sha1rnds4 $0x91, %xmm15, %xmm8");
+ asm volatile("sha1rnds4 $0x91, (%rax), %xmm0");
+ asm volatile("sha1rnds4 $0x91, (%r8), %xmm0");
+ asm volatile("sha1rnds4 $0x91, (0x12345678), %xmm0");
+ asm volatile("sha1rnds4 $0x91, (%rax), %xmm3");
+ asm volatile("sha1rnds4 $0x91, (%rcx,%rax,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(,%rax,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, (%rax,%rcx,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, (%rax,%rcx,8), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12(%rax), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12(%rbp), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12(%rcx,%rax,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12(%rbp,%rax,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12(%rax,%rcx,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12(%rax,%rcx,8), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(%rax), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(%rbp), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(%rcx,%rax,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(%rbp,%rax,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(%rax,%rcx,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(%rax,%rcx,8), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(%rax,%rcx,8), %xmm15");
+
+ /* sha1nexte xmm2/m128, xmm1 */
+
+ asm volatile("sha1nexte %xmm1, %xmm0");
+ asm volatile("sha1nexte %xmm7, %xmm2");
+ asm volatile("sha1nexte %xmm8, %xmm0");
+ asm volatile("sha1nexte %xmm7, %xmm8");
+ asm volatile("sha1nexte %xmm15, %xmm8");
+ asm volatile("sha1nexte (%rax), %xmm0");
+ asm volatile("sha1nexte (%r8), %xmm0");
+ asm volatile("sha1nexte (0x12345678), %xmm0");
+ asm volatile("sha1nexte (%rax), %xmm3");
+ asm volatile("sha1nexte (%rcx,%rax,1), %xmm0");
+ asm volatile("sha1nexte 0x12345678(,%rax,1), %xmm0");
+ asm volatile("sha1nexte (%rax,%rcx,1), %xmm0");
+ asm volatile("sha1nexte (%rax,%rcx,8), %xmm0");
+ asm volatile("sha1nexte 0x12(%rax), %xmm0");
+ asm volatile("sha1nexte 0x12(%rbp), %xmm0");
+ asm volatile("sha1nexte 0x12(%rcx,%rax,1), %xmm0");
+ asm volatile("sha1nexte 0x12(%rbp,%rax,1), %xmm0");
+ asm volatile("sha1nexte 0x12(%rax,%rcx,1), %xmm0");
+ asm volatile("sha1nexte 0x12(%rax,%rcx,8), %xmm0");
+ asm volatile("sha1nexte 0x12345678(%rax), %xmm0");
+ asm volatile("sha1nexte 0x12345678(%rbp), %xmm0");
+ asm volatile("sha1nexte 0x12345678(%rcx,%rax,1), %xmm0");
+ asm volatile("sha1nexte 0x12345678(%rbp,%rax,1), %xmm0");
+ asm volatile("sha1nexte 0x12345678(%rax,%rcx,1), %xmm0");
+ asm volatile("sha1nexte 0x12345678(%rax,%rcx,8), %xmm0");
+ asm volatile("sha1nexte 0x12345678(%rax,%rcx,8), %xmm15");
+
+ /* sha1msg1 xmm2/m128, xmm1 */
+
+ asm volatile("sha1msg1 %xmm1, %xmm0");
+ asm volatile("sha1msg1 %xmm7, %xmm2");
+ asm volatile("sha1msg1 %xmm8, %xmm0");
+ asm volatile("sha1msg1 %xmm7, %xmm8");
+ asm volatile("sha1msg1 %xmm15, %xmm8");
+ asm volatile("sha1msg1 (%rax), %xmm0");
+ asm volatile("sha1msg1 (%r8), %xmm0");
+ asm volatile("sha1msg1 (0x12345678), %xmm0");
+ asm volatile("sha1msg1 (%rax), %xmm3");
+ asm volatile("sha1msg1 (%rcx,%rax,1), %xmm0");
+ asm volatile("sha1msg1 0x12345678(,%rax,1), %xmm0");
+ asm volatile("sha1msg1 (%rax,%rcx,1), %xmm0");
+ asm volatile("sha1msg1 (%rax,%rcx,8), %xmm0");
+ asm volatile("sha1msg1 0x12(%rax), %xmm0");
+ asm volatile("sha1msg1 0x12(%rbp), %xmm0");
+ asm volatile("sha1msg1 0x12(%rcx,%rax,1), %xmm0");
+ asm volatile("sha1msg1 0x12(%rbp,%rax,1), %xmm0");
+ asm volatile("sha1msg1 0x12(%rax,%rcx,1), %xmm0");
+ asm volatile("sha1msg1 0x12(%rax,%rcx,8), %xmm0");
+ asm volatile("sha1msg1 0x12345678(%rax), %xmm0");
+ asm volatile("sha1msg1 0x12345678(%rbp), %xmm0");
+ asm volatile("sha1msg1 0x12345678(%rcx,%rax,1), %xmm0");
+ asm volatile("sha1msg1 0x12345678(%rbp,%rax,1), %xmm0");
+ asm volatile("sha1msg1 0x12345678(%rax,%rcx,1), %xmm0");
+ asm volatile("sha1msg1 0x12345678(%rax,%rcx,8), %xmm0");
+ asm volatile("sha1msg1 0x12345678(%rax,%rcx,8), %xmm15");
+
+ /* sha1msg2 xmm2/m128, xmm1 */
+
+ asm volatile("sha1msg2 %xmm1, %xmm0");
+ asm volatile("sha1msg2 %xmm7, %xmm2");
+ asm volatile("sha1msg2 %xmm8, %xmm0");
+ asm volatile("sha1msg2 %xmm7, %xmm8");
+ asm volatile("sha1msg2 %xmm15, %xmm8");
+ asm volatile("sha1msg2 (%rax), %xmm0");
+ asm volatile("sha1msg2 (%r8), %xmm0");
+ asm volatile("sha1msg2 (0x12345678), %xmm0");
+ asm volatile("sha1msg2 (%rax), %xmm3");
+ asm volatile("sha1msg2 (%rcx,%rax,1), %xmm0");
+ asm volatile("sha1msg2 0x12345678(,%rax,1), %xmm0");
+ asm volatile("sha1msg2 (%rax,%rcx,1), %xmm0");
+ asm volatile("sha1msg2 (%rax,%rcx,8), %xmm0");
+ asm volatile("sha1msg2 0x12(%rax), %xmm0");
+ asm volatile("sha1msg2 0x12(%rbp), %xmm0");
+ asm volatile("sha1msg2 0x12(%rcx,%rax,1), %xmm0");
+ asm volatile("sha1msg2 0x12(%rbp,%rax,1), %xmm0");
+ asm volatile("sha1msg2 0x12(%rax,%rcx,1), %xmm0");
+ asm volatile("sha1msg2 0x12(%rax,%rcx,8), %xmm0");
+ asm volatile("sha1msg2 0x12345678(%rax), %xmm0");
+ asm volatile("sha1msg2 0x12345678(%rbp), %xmm0");
+ asm volatile("sha1msg2 0x12345678(%rcx,%rax,1), %xmm0");
+ asm volatile("sha1msg2 0x12345678(%rbp,%rax,1), %xmm0");
+ asm volatile("sha1msg2 0x12345678(%rax,%rcx,1), %xmm0");
+ asm volatile("sha1msg2 0x12345678(%rax,%rcx,8), %xmm0");
+ asm volatile("sha1msg2 0x12345678(%rax,%rcx,8), %xmm15");
+
+ /* sha256rnds2 <XMM0>, xmm2/m128, xmm1 */
+ /* Note sha256rnds2 has an implicit operand 'xmm0' */
+
+ asm volatile("sha256rnds2 %xmm4, %xmm1");
+ asm volatile("sha256rnds2 %xmm7, %xmm2");
+ asm volatile("sha256rnds2 %xmm8, %xmm1");
+ asm volatile("sha256rnds2 %xmm7, %xmm8");
+ asm volatile("sha256rnds2 %xmm15, %xmm8");
+ asm volatile("sha256rnds2 (%rax), %xmm1");
+ asm volatile("sha256rnds2 (%r8), %xmm1");
+ asm volatile("sha256rnds2 (0x12345678), %xmm1");
+ asm volatile("sha256rnds2 (%rax), %xmm3");
+ asm volatile("sha256rnds2 (%rcx,%rax,1), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(,%rax,1), %xmm1");
+ asm volatile("sha256rnds2 (%rax,%rcx,1), %xmm1");
+ asm volatile("sha256rnds2 (%rax,%rcx,8), %xmm1");
+ asm volatile("sha256rnds2 0x12(%rax), %xmm1");
+ asm volatile("sha256rnds2 0x12(%rbp), %xmm1");
+ asm volatile("sha256rnds2 0x12(%rcx,%rax,1), %xmm1");
+ asm volatile("sha256rnds2 0x12(%rbp,%rax,1), %xmm1");
+ asm volatile("sha256rnds2 0x12(%rax,%rcx,1), %xmm1");
+ asm volatile("sha256rnds2 0x12(%rax,%rcx,8), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(%rax), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(%rbp), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(%rcx,%rax,1), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(%rbp,%rax,1), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(%rax,%rcx,1), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(%rax,%rcx,8), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(%rax,%rcx,8), %xmm15");
+
+ /* sha256msg1 xmm2/m128, xmm1 */
+
+ asm volatile("sha256msg1 %xmm1, %xmm0");
+ asm volatile("sha256msg1 %xmm7, %xmm2");
+ asm volatile("sha256msg1 %xmm8, %xmm0");
+ asm volatile("sha256msg1 %xmm7, %xmm8");
+ asm volatile("sha256msg1 %xmm15, %xmm8");
+ asm volatile("sha256msg1 (%rax), %xmm0");
+ asm volatile("sha256msg1 (%r8), %xmm0");
+ asm volatile("sha256msg1 (0x12345678), %xmm0");
+ asm volatile("sha256msg1 (%rax), %xmm3");
+ asm volatile("sha256msg1 (%rcx,%rax,1), %xmm0");
+ asm volatile("sha256msg1 0x12345678(,%rax,1), %xmm0");
+ asm volatile("sha256msg1 (%rax,%rcx,1), %xmm0");
+ asm volatile("sha256msg1 (%rax,%rcx,8), %xmm0");
+ asm volatile("sha256msg1 0x12(%rax), %xmm0");
+ asm volatile("sha256msg1 0x12(%rbp), %xmm0");
+ asm volatile("sha256msg1 0x12(%rcx,%rax,1), %xmm0");
+ asm volatile("sha256msg1 0x12(%rbp,%rax,1), %xmm0");
+ asm volatile("sha256msg1 0x12(%rax,%rcx,1), %xmm0");
+ asm volatile("sha256msg1 0x12(%rax,%rcx,8), %xmm0");
+ asm volatile("sha256msg1 0x12345678(%rax), %xmm0");
+ asm volatile("sha256msg1 0x12345678(%rbp), %xmm0");
+ asm volatile("sha256msg1 0x12345678(%rcx,%rax,1), %xmm0");
+ asm volatile("sha256msg1 0x12345678(%rbp,%rax,1), %xmm0");
+ asm volatile("sha256msg1 0x12345678(%rax,%rcx,1), %xmm0");
+ asm volatile("sha256msg1 0x12345678(%rax,%rcx,8), %xmm0");
+ asm volatile("sha256msg1 0x12345678(%rax,%rcx,8), %xmm15");
+
+ /* sha256msg2 xmm2/m128, xmm1 */
+
+ asm volatile("sha256msg2 %xmm1, %xmm0");
+ asm volatile("sha256msg2 %xmm7, %xmm2");
+ asm volatile("sha256msg2 %xmm8, %xmm0");
+ asm volatile("sha256msg2 %xmm7, %xmm8");
+ asm volatile("sha256msg2 %xmm15, %xmm8");
+ asm volatile("sha256msg2 (%rax), %xmm0");
+ asm volatile("sha256msg2 (%r8), %xmm0");
+ asm volatile("sha256msg2 (0x12345678), %xmm0");
+ asm volatile("sha256msg2 (%rax), %xmm3");
+ asm volatile("sha256msg2 (%rcx,%rax,1), %xmm0");
+ asm volatile("sha256msg2 0x12345678(,%rax,1), %xmm0");
+ asm volatile("sha256msg2 (%rax,%rcx,1), %xmm0");
+ asm volatile("sha256msg2 (%rax,%rcx,8), %xmm0");
+ asm volatile("sha256msg2 0x12(%rax), %xmm0");
+ asm volatile("sha256msg2 0x12(%rbp), %xmm0");
+ asm volatile("sha256msg2 0x12(%rcx,%rax,1), %xmm0");
+ asm volatile("sha256msg2 0x12(%rbp,%rax,1), %xmm0");
+ asm volatile("sha256msg2 0x12(%rax,%rcx,1), %xmm0");
+ asm volatile("sha256msg2 0x12(%rax,%rcx,8), %xmm0");
+ asm volatile("sha256msg2 0x12345678(%rax), %xmm0");
+ asm volatile("sha256msg2 0x12345678(%rbp), %xmm0");
+ asm volatile("sha256msg2 0x12345678(%rcx,%rax,1), %xmm0");
+ asm volatile("sha256msg2 0x12345678(%rbp,%rax,1), %xmm0");
+ asm volatile("sha256msg2 0x12345678(%rax,%rcx,1), %xmm0");
+ asm volatile("sha256msg2 0x12345678(%rax,%rcx,8), %xmm0");
+ asm volatile("sha256msg2 0x12345678(%rax,%rcx,8), %xmm15");
+
+ /* clflushopt m8 */
+
+ asm volatile("clflushopt (%rax)");
+ asm volatile("clflushopt (%r8)");
+ asm volatile("clflushopt (0x12345678)");
+ asm volatile("clflushopt 0x12345678(%rax,%rcx,8)");
+ asm volatile("clflushopt 0x12345678(%r8,%rcx,8)");
+ /* Also check instructions in the same group encoding as clflushopt */
+ asm volatile("clflush (%rax)");
+ asm volatile("clflush (%r8)");
+ asm volatile("sfence");
+
+ /* clwb m8 */
+
+ asm volatile("clwb (%rax)");
+ asm volatile("clwb (%r8)");
+ asm volatile("clwb (0x12345678)");
+ asm volatile("clwb 0x12345678(%rax,%rcx,8)");
+ asm volatile("clwb 0x12345678(%r8,%rcx,8)");
+ /* Also check instructions in the same group encoding as clwb */
+ asm volatile("xsaveopt (%rax)");
+ asm volatile("xsaveopt (%r8)");
+ asm volatile("mfence");
+
+ /* xsavec mem */
+
+ asm volatile("xsavec (%rax)");
+ asm volatile("xsavec (%r8)");
+ asm volatile("xsavec (0x12345678)");
+ asm volatile("xsavec 0x12345678(%rax,%rcx,8)");
+ asm volatile("xsavec 0x12345678(%r8,%rcx,8)");
+
+ /* xsaves mem */
+
+ asm volatile("xsaves (%rax)");
+ asm volatile("xsaves (%r8)");
+ asm volatile("xsaves (0x12345678)");
+ asm volatile("xsaves 0x12345678(%rax,%rcx,8)");
+ asm volatile("xsaves 0x12345678(%r8,%rcx,8)");
+
+ /* xrstors mem */
+
+ asm volatile("xrstors (%rax)");
+ asm volatile("xrstors (%r8)");
+ asm volatile("xrstors (0x12345678)");
+ asm volatile("xrstors 0x12345678(%rax,%rcx,8)");
+ asm volatile("xrstors 0x12345678(%r8,%rcx,8)");
+
+ /* ptwrite */
+
+ asm volatile("ptwrite (%rax)");
+ asm volatile("ptwrite (%r8)");
+ asm volatile("ptwrite (0x12345678)");
+ asm volatile("ptwrite 0x12345678(%rax,%rcx,8)");
+ asm volatile("ptwrite 0x12345678(%r8,%rcx,8)");
+
+ asm volatile("ptwritel (%rax)");
+ asm volatile("ptwritel (%r8)");
+ asm volatile("ptwritel (0x12345678)");
+ asm volatile("ptwritel 0x12345678(%rax,%rcx,8)");
+ asm volatile("ptwritel 0x12345678(%r8,%rcx,8)");
+
+ asm volatile("ptwriteq (%rax)");
+ asm volatile("ptwriteq (%r8)");
+ asm volatile("ptwriteq (0x12345678)");
+ asm volatile("ptwriteq 0x12345678(%rax,%rcx,8)");
+ asm volatile("ptwriteq 0x12345678(%r8,%rcx,8)");
+
+#else /* #ifdef __x86_64__ */
+
+ /* bound r32, mem (same op code as EVEX prefix) */
+
+ asm volatile("bound %eax, 0x12345678(%ecx)");
+ asm volatile("bound %ecx, 0x12345678(%eax)");
+ asm volatile("bound %edx, 0x12345678(%eax)");
+ asm volatile("bound %ebx, 0x12345678(%eax)");
+ asm volatile("bound %esp, 0x12345678(%eax)");
+ asm volatile("bound %ebp, 0x12345678(%eax)");
+ asm volatile("bound %esi, 0x12345678(%eax)");
+ asm volatile("bound %edi, 0x12345678(%eax)");
+ asm volatile("bound %ecx, (%eax)");
+ asm volatile("bound %eax, (0x12345678)");
+ asm volatile("bound %edx, (%ecx,%eax,1)");
+ asm volatile("bound %edx, 0x12345678(,%eax,1)");
+ asm volatile("bound %edx, (%eax,%ecx,1)");
+ asm volatile("bound %edx, (%eax,%ecx,8)");
+ asm volatile("bound %edx, 0x12(%eax)");
+ asm volatile("bound %edx, 0x12(%ebp)");
+ asm volatile("bound %edx, 0x12(%ecx,%eax,1)");
+ asm volatile("bound %edx, 0x12(%ebp,%eax,1)");
+ asm volatile("bound %edx, 0x12(%eax,%ecx,1)");
+ asm volatile("bound %edx, 0x12(%eax,%ecx,8)");
+ asm volatile("bound %edx, 0x12345678(%eax)");
+ asm volatile("bound %edx, 0x12345678(%ebp)");
+ asm volatile("bound %edx, 0x12345678(%ecx,%eax,1)");
+ asm volatile("bound %edx, 0x12345678(%ebp,%eax,1)");
+ asm volatile("bound %edx, 0x12345678(%eax,%ecx,1)");
+ asm volatile("bound %edx, 0x12345678(%eax,%ecx,8)");
+
+ /* bound r16, mem (same op code as EVEX prefix) */
+
+ asm volatile("bound %ax, 0x12345678(%ecx)");
+ asm volatile("bound %cx, 0x12345678(%eax)");
+ asm volatile("bound %dx, 0x12345678(%eax)");
+ asm volatile("bound %bx, 0x12345678(%eax)");
+ asm volatile("bound %sp, 0x12345678(%eax)");
+ asm volatile("bound %bp, 0x12345678(%eax)");
+ asm volatile("bound %si, 0x12345678(%eax)");
+ asm volatile("bound %di, 0x12345678(%eax)");
+ asm volatile("bound %cx, (%eax)");
+ asm volatile("bound %ax, (0x12345678)");
+ asm volatile("bound %dx, (%ecx,%eax,1)");
+ asm volatile("bound %dx, 0x12345678(,%eax,1)");
+ asm volatile("bound %dx, (%eax,%ecx,1)");
+ asm volatile("bound %dx, (%eax,%ecx,8)");
+ asm volatile("bound %dx, 0x12(%eax)");
+ asm volatile("bound %dx, 0x12(%ebp)");
+ asm volatile("bound %dx, 0x12(%ecx,%eax,1)");
+ asm volatile("bound %dx, 0x12(%ebp,%eax,1)");
+ asm volatile("bound %dx, 0x12(%eax,%ecx,1)");
+ asm volatile("bound %dx, 0x12(%eax,%ecx,8)");
+ asm volatile("bound %dx, 0x12345678(%eax)");
+ asm volatile("bound %dx, 0x12345678(%ebp)");
+ asm volatile("bound %dx, 0x12345678(%ecx,%eax,1)");
+ asm volatile("bound %dx, 0x12345678(%ebp,%eax,1)");
+ asm volatile("bound %dx, 0x12345678(%eax,%ecx,1)");
+ asm volatile("bound %dx, 0x12345678(%eax,%ecx,8)");
+
+ /* AVX-512: Instructions with the same op codes as Mask Instructions */
+
+ asm volatile("cmovno %eax,%ebx");
+ asm volatile("cmovno 0x12345678(%eax),%ecx");
+ asm volatile("cmovno 0x12345678(%eax),%cx");
+
+ asm volatile("cmove %eax,%ebx");
+ asm volatile("cmove 0x12345678(%eax),%ecx");
+ asm volatile("cmove 0x12345678(%eax),%cx");
+
+ asm volatile("seto 0x12345678(%eax)");
+ asm volatile("setno 0x12345678(%eax)");
+ asm volatile("setb 0x12345678(%eax)");
+ asm volatile("setc 0x12345678(%eax)");
+ asm volatile("setnae 0x12345678(%eax)");
+ asm volatile("setae 0x12345678(%eax)");
+ asm volatile("setnb 0x12345678(%eax)");
+ asm volatile("setnc 0x12345678(%eax)");
+ asm volatile("sets 0x12345678(%eax)");
+ asm volatile("setns 0x12345678(%eax)");
+
+ /* AVX-512: Mask Instructions */
+
+ asm volatile("kandw %k7,%k6,%k5");
+ asm volatile("kandq %k7,%k6,%k5");
+ asm volatile("kandb %k7,%k6,%k5");
+ asm volatile("kandd %k7,%k6,%k5");
+
+ asm volatile("kandnw %k7,%k6,%k5");
+ asm volatile("kandnq %k7,%k6,%k5");
+ asm volatile("kandnb %k7,%k6,%k5");
+ asm volatile("kandnd %k7,%k6,%k5");
+
+ asm volatile("knotw %k7,%k6");
+ asm volatile("knotq %k7,%k6");
+ asm volatile("knotb %k7,%k6");
+ asm volatile("knotd %k7,%k6");
+
+ asm volatile("korw %k7,%k6,%k5");
+ asm volatile("korq %k7,%k6,%k5");
+ asm volatile("korb %k7,%k6,%k5");
+ asm volatile("kord %k7,%k6,%k5");
+
+ asm volatile("kxnorw %k7,%k6,%k5");
+ asm volatile("kxnorq %k7,%k6,%k5");
+ asm volatile("kxnorb %k7,%k6,%k5");
+ asm volatile("kxnord %k7,%k6,%k5");
+
+ asm volatile("kxorw %k7,%k6,%k5");
+ asm volatile("kxorq %k7,%k6,%k5");
+ asm volatile("kxorb %k7,%k6,%k5");
+ asm volatile("kxord %k7,%k6,%k5");
+
+ asm volatile("kaddw %k7,%k6,%k5");
+ asm volatile("kaddq %k7,%k6,%k5");
+ asm volatile("kaddb %k7,%k6,%k5");
+ asm volatile("kaddd %k7,%k6,%k5");
+
+ asm volatile("kunpckbw %k7,%k6,%k5");
+ asm volatile("kunpckwd %k7,%k6,%k5");
+ asm volatile("kunpckdq %k7,%k6,%k5");
+
+ asm volatile("kmovw %k6,%k5");
+ asm volatile("kmovw (%ecx),%k5");
+ asm volatile("kmovw 0x123(%eax,%ecx,8),%k5");
+ asm volatile("kmovw %k5,(%ecx)");
+ asm volatile("kmovw %k5,0x123(%eax,%ecx,8)");
+ asm volatile("kmovw %eax,%k5");
+ asm volatile("kmovw %ebp,%k5");
+ asm volatile("kmovw %k5,%eax");
+ asm volatile("kmovw %k5,%ebp");
+
+ asm volatile("kmovq %k6,%k5");
+ asm volatile("kmovq (%ecx),%k5");
+ asm volatile("kmovq 0x123(%eax,%ecx,8),%k5");
+ asm volatile("kmovq %k5,(%ecx)");
+ asm volatile("kmovq %k5,0x123(%eax,%ecx,8)");
+
+ asm volatile("kmovb %k6,%k5");
+ asm volatile("kmovb (%ecx),%k5");
+ asm volatile("kmovb 0x123(%eax,%ecx,8),%k5");
+ asm volatile("kmovb %k5,(%ecx)");
+ asm volatile("kmovb %k5,0x123(%eax,%ecx,8)");
+ asm volatile("kmovb %eax,%k5");
+ asm volatile("kmovb %ebp,%k5");
+ asm volatile("kmovb %k5,%eax");
+ asm volatile("kmovb %k5,%ebp");
+
+ asm volatile("kmovd %k6,%k5");
+ asm volatile("kmovd (%ecx),%k5");
+ asm volatile("kmovd 0x123(%eax,%ecx,8),%k5");
+ asm volatile("kmovd %k5,(%ecx)");
+ asm volatile("kmovd %k5,0x123(%eax,%ecx,8)");
+ asm volatile("kmovd %eax,%k5");
+ asm volatile("kmovd %ebp,%k5");
+ asm volatile("kmovd %k5,%eax");
+ asm volatile("kmovd %k5,%ebp");
+
+ asm volatile("kortestw %k6,%k5");
+ asm volatile("kortestq %k6,%k5");
+ asm volatile("kortestb %k6,%k5");
+ asm volatile("kortestd %k6,%k5");
+
+ asm volatile("ktestw %k6,%k5");
+ asm volatile("ktestq %k6,%k5");
+ asm volatile("ktestb %k6,%k5");
+ asm volatile("ktestd %k6,%k5");
+
+ asm volatile("kshiftrw $0x12,%k6,%k5");
+ asm volatile("kshiftrq $0x5b,%k6,%k5");
+ asm volatile("kshiftlw $0x12,%k6,%k5");
+ asm volatile("kshiftlq $0x5b,%k6,%k5");
+
+ /* AVX-512: Op code 0f 5b */
+ asm volatile("vcvtdq2ps %xmm5,%xmm6");
+ asm volatile("vcvtqq2ps %zmm5,%ymm6{%k7}");
+ asm volatile("vcvtps2dq %xmm5,%xmm6");
+ asm volatile("vcvttps2dq %xmm5,%xmm6");
+
+ /* AVX-512: Op code 0f 6f */
+
+ asm volatile("movq %mm0,%mm4");
+ asm volatile("vmovdqa %ymm4,%ymm6");
+ asm volatile("vmovdqa32 %zmm5,%zmm6");
+ asm volatile("vmovdqa64 %zmm5,%zmm6");
+ asm volatile("vmovdqu %ymm4,%ymm6");
+ asm volatile("vmovdqu32 %zmm5,%zmm6");
+ asm volatile("vmovdqu64 %zmm5,%zmm6");
+ asm volatile("vmovdqu8 %zmm5,%zmm6");
+ asm volatile("vmovdqu16 %zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 78 */
+
+ asm volatile("vmread %eax,%ebx");
+ asm volatile("vcvttps2udq %zmm5,%zmm6");
+ asm volatile("vcvttpd2udq %zmm5,%ymm6{%k7}");
+ asm volatile("vcvttsd2usi %xmm6,%eax");
+ asm volatile("vcvttss2usi %xmm6,%eax");
+ asm volatile("vcvttps2uqq %ymm5,%zmm6{%k7}");
+ asm volatile("vcvttpd2uqq %zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 79 */
+
+ asm volatile("vmwrite %eax,%ebx");
+ asm volatile("vcvtps2udq %zmm5,%zmm6");
+ asm volatile("vcvtpd2udq %zmm5,%ymm6{%k7}");
+ asm volatile("vcvtsd2usi %xmm6,%eax");
+ asm volatile("vcvtss2usi %xmm6,%eax");
+ asm volatile("vcvtps2uqq %ymm5,%zmm6{%k7}");
+ asm volatile("vcvtpd2uqq %zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 7a */
+
+ asm volatile("vcvtudq2pd %ymm5,%zmm6{%k7}");
+ asm volatile("vcvtuqq2pd %zmm5,%zmm6");
+ asm volatile("vcvtudq2ps %zmm5,%zmm6");
+ asm volatile("vcvtuqq2ps %zmm5,%ymm6{%k7}");
+ asm volatile("vcvttps2qq %ymm5,%zmm6{%k7}");
+ asm volatile("vcvttpd2qq %zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 7b */
+
+ asm volatile("vcvtusi2sd %eax,%xmm5,%xmm6");
+ asm volatile("vcvtusi2ss %eax,%xmm5,%xmm6");
+ asm volatile("vcvtps2qq %ymm5,%zmm6{%k7}");
+ asm volatile("vcvtpd2qq %zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 7f */
+
+ asm volatile("movq.s %mm0,%mm4");
+ asm volatile("vmovdqa.s %ymm5,%ymm6");
+ asm volatile("vmovdqa32.s %zmm5,%zmm6");
+ asm volatile("vmovdqa64.s %zmm5,%zmm6");
+ asm volatile("vmovdqu.s %ymm5,%ymm6");
+ asm volatile("vmovdqu32.s %zmm5,%zmm6");
+ asm volatile("vmovdqu64.s %zmm5,%zmm6");
+ asm volatile("vmovdqu8.s %zmm5,%zmm6");
+ asm volatile("vmovdqu16.s %zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f db */
+
+ asm volatile("pand %mm1,%mm2");
+ asm volatile("pand %xmm1,%xmm2");
+ asm volatile("vpand %ymm4,%ymm6,%ymm2");
+ asm volatile("vpandd %zmm4,%zmm5,%zmm6");
+ asm volatile("vpandq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f df */
+
+ asm volatile("pandn %mm1,%mm2");
+ asm volatile("pandn %xmm1,%xmm2");
+ asm volatile("vpandn %ymm4,%ymm6,%ymm2");
+ asm volatile("vpandnd %zmm4,%zmm5,%zmm6");
+ asm volatile("vpandnq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f e6 */
+
+ asm volatile("vcvttpd2dq %xmm1,%xmm2");
+ asm volatile("vcvtdq2pd %xmm5,%xmm6");
+ asm volatile("vcvtdq2pd %ymm5,%zmm6{%k7}");
+ asm volatile("vcvtqq2pd %zmm5,%zmm6");
+ asm volatile("vcvtpd2dq %xmm1,%xmm2");
+
+ /* AVX-512: Op code 0f eb */
+
+ asm volatile("por %mm4,%mm6");
+ asm volatile("vpor %ymm4,%ymm6,%ymm2");
+ asm volatile("vpord %zmm4,%zmm5,%zmm6");
+ asm volatile("vporq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f ef */
+
+ asm volatile("pxor %mm4,%mm6");
+ asm volatile("vpxor %ymm4,%ymm6,%ymm2");
+ asm volatile("vpxord %zmm4,%zmm5,%zmm6");
+ asm volatile("vpxorq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 10 */
+
+ asm volatile("pblendvb %xmm1,%xmm0");
+ asm volatile("vpsrlvw %zmm4,%zmm5,%zmm6");
+ asm volatile("vpmovuswb %zmm5,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 11 */
+
+ asm volatile("vpmovusdb %zmm5,%xmm6{%k7}");
+ asm volatile("vpsravw %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 12 */
+
+ asm volatile("vpmovusqb %zmm5,%xmm6{%k7}");
+ asm volatile("vpsllvw %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 13 */
+
+ asm volatile("vcvtph2ps %xmm3,%ymm5");
+ asm volatile("vcvtph2ps %ymm5,%zmm6{%k7}");
+ asm volatile("vpmovusdw %zmm5,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 14 */
+
+ asm volatile("blendvps %xmm1,%xmm0");
+ asm volatile("vpmovusqw %zmm5,%xmm6{%k7}");
+ asm volatile("vprorvd %zmm4,%zmm5,%zmm6");
+ asm volatile("vprorvq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 15 */
+
+ asm volatile("blendvpd %xmm1,%xmm0");
+ asm volatile("vpmovusqd %zmm5,%ymm6{%k7}");
+ asm volatile("vprolvd %zmm4,%zmm5,%zmm6");
+ asm volatile("vprolvq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 16 */
+
+ asm volatile("vpermps %ymm4,%ymm6,%ymm2");
+ asm volatile("vpermps %ymm4,%ymm6,%ymm2{%k7}");
+ asm volatile("vpermpd %ymm4,%ymm6,%ymm2{%k7}");
+
+ /* AVX-512: Op code 0f 38 19 */
+
+ asm volatile("vbroadcastsd %xmm4,%ymm6");
+ asm volatile("vbroadcastf32x2 %xmm7,%zmm6");
+
+ /* AVX-512: Op code 0f 38 1a */
+
+ asm volatile("vbroadcastf128 (%ecx),%ymm4");
+ asm volatile("vbroadcastf32x4 (%ecx),%zmm6");
+ asm volatile("vbroadcastf64x2 (%ecx),%zmm6");
+
+ /* AVX-512: Op code 0f 38 1b */
+
+ asm volatile("vbroadcastf32x8 (%ecx),%zmm6");
+ asm volatile("vbroadcastf64x4 (%ecx),%zmm6");
+
+ /* AVX-512: Op code 0f 38 1f */
+
+ asm volatile("vpabsq %zmm4,%zmm6");
+
+ /* AVX-512: Op code 0f 38 20 */
+
+ asm volatile("vpmovsxbw %xmm4,%xmm5");
+ asm volatile("vpmovswb %zmm5,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 21 */
+
+ asm volatile("vpmovsxbd %xmm4,%ymm6");
+ asm volatile("vpmovsdb %zmm5,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 22 */
+
+ asm volatile("vpmovsxbq %xmm4,%ymm4");
+ asm volatile("vpmovsqb %zmm5,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 23 */
+
+ asm volatile("vpmovsxwd %xmm4,%ymm4");
+ asm volatile("vpmovsdw %zmm5,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 24 */
+
+ asm volatile("vpmovsxwq %xmm4,%ymm6");
+ asm volatile("vpmovsqw %zmm5,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 25 */
+
+ asm volatile("vpmovsxdq %xmm4,%ymm4");
+ asm volatile("vpmovsqd %zmm5,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 26 */
+
+ asm volatile("vptestmb %zmm5,%zmm6,%k5");
+ asm volatile("vptestmw %zmm5,%zmm6,%k5");
+ asm volatile("vptestnmb %zmm4,%zmm5,%k5");
+ asm volatile("vptestnmw %zmm4,%zmm5,%k5");
+
+ /* AVX-512: Op code 0f 38 27 */
+
+ asm volatile("vptestmd %zmm5,%zmm6,%k5");
+ asm volatile("vptestmq %zmm5,%zmm6,%k5");
+ asm volatile("vptestnmd %zmm4,%zmm5,%k5");
+ asm volatile("vptestnmq %zmm4,%zmm5,%k5");
+
+ /* AVX-512: Op code 0f 38 28 */
+
+ asm volatile("vpmuldq %ymm4,%ymm6,%ymm2");
+ asm volatile("vpmovm2b %k5,%zmm6");
+ asm volatile("vpmovm2w %k5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 29 */
+
+ asm volatile("vpcmpeqq %ymm4,%ymm6,%ymm2");
+ asm volatile("vpmovb2m %zmm6,%k5");
+ asm volatile("vpmovw2m %zmm6,%k5");
+
+ /* AVX-512: Op code 0f 38 2a */
+
+ asm volatile("vmovntdqa (%ecx),%ymm4");
+ asm volatile("vpbroadcastmb2q %k6,%zmm1");
+
+ /* AVX-512: Op code 0f 38 2c */
+
+ asm volatile("vmaskmovps (%ecx),%ymm4,%ymm6");
+ asm volatile("vscalefps %zmm4,%zmm5,%zmm6");
+ asm volatile("vscalefpd %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 2d */
+
+ asm volatile("vmaskmovpd (%ecx),%ymm4,%ymm6");
+ asm volatile("vscalefss %xmm4,%xmm5,%xmm6{%k7}");
+ asm volatile("vscalefsd %xmm4,%xmm5,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 30 */
+
+ asm volatile("vpmovzxbw %xmm4,%ymm4");
+ asm volatile("vpmovwb %zmm5,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 31 */
+
+ asm volatile("vpmovzxbd %xmm4,%ymm6");
+ asm volatile("vpmovdb %zmm5,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 32 */
+
+ asm volatile("vpmovzxbq %xmm4,%ymm4");
+ asm volatile("vpmovqb %zmm5,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 33 */
+
+ asm volatile("vpmovzxwd %xmm4,%ymm4");
+ asm volatile("vpmovdw %zmm5,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 34 */
+
+ asm volatile("vpmovzxwq %xmm4,%ymm6");
+ asm volatile("vpmovqw %zmm5,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 35 */
+
+ asm volatile("vpmovzxdq %xmm4,%ymm4");
+ asm volatile("vpmovqd %zmm5,%ymm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 36 */
+
+ asm volatile("vpermd %ymm4,%ymm6,%ymm2");
+ asm volatile("vpermd %ymm4,%ymm6,%ymm2{%k7}");
+ asm volatile("vpermq %ymm4,%ymm6,%ymm2{%k7}");
+
+ /* AVX-512: Op code 0f 38 38 */
+
+ asm volatile("vpminsb %ymm4,%ymm6,%ymm2");
+ asm volatile("vpmovm2d %k5,%zmm6");
+ asm volatile("vpmovm2q %k5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 39 */
+
+ asm volatile("vpminsd %xmm1,%xmm2,%xmm3");
+ asm volatile("vpminsd %zmm4,%zmm5,%zmm6");
+ asm volatile("vpminsq %zmm4,%zmm5,%zmm6");
+ asm volatile("vpmovd2m %zmm6,%k5");
+ asm volatile("vpmovq2m %zmm6,%k5");
+
+ /* AVX-512: Op code 0f 38 3a */
+
+ asm volatile("vpminuw %ymm4,%ymm6,%ymm2");
+ asm volatile("vpbroadcastmw2d %k6,%zmm6");
+
+ /* AVX-512: Op code 0f 38 3b */
+
+ asm volatile("vpminud %ymm4,%ymm6,%ymm2");
+ asm volatile("vpminud %zmm4,%zmm5,%zmm6");
+ asm volatile("vpminuq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 3d */
+
+ asm volatile("vpmaxsd %ymm4,%ymm6,%ymm2");
+ asm volatile("vpmaxsd %zmm4,%zmm5,%zmm6");
+ asm volatile("vpmaxsq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 3f */
+
+ asm volatile("vpmaxud %ymm4,%ymm6,%ymm2");
+ asm volatile("vpmaxud %zmm4,%zmm5,%zmm6");
+ asm volatile("vpmaxuq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 40 */
+
+ asm volatile("vpmulld %ymm4,%ymm6,%ymm2");
+ asm volatile("vpmulld %zmm4,%zmm5,%zmm6");
+ asm volatile("vpmullq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 42 */
+
+ asm volatile("vgetexpps %zmm5,%zmm6");
+ asm volatile("vgetexppd %zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 43 */
+
+ asm volatile("vgetexpss %xmm4,%xmm5,%xmm6{%k7}");
+ asm volatile("vgetexpsd %xmm2,%xmm3,%xmm4{%k7}");
+
+ /* AVX-512: Op code 0f 38 44 */
+
+ asm volatile("vplzcntd %zmm5,%zmm6");
+ asm volatile("vplzcntq %zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 46 */
+
+ asm volatile("vpsravd %ymm4,%ymm6,%ymm2");
+ asm volatile("vpsravd %zmm4,%zmm5,%zmm6");
+ asm volatile("vpsravq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 4c */
+
+ asm volatile("vrcp14ps %zmm5,%zmm6");
+ asm volatile("vrcp14pd %zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 4d */
+
+ asm volatile("vrcp14ss %xmm4,%xmm5,%xmm6{%k7}");
+ asm volatile("vrcp14sd %xmm4,%xmm5,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 4e */
+
+ asm volatile("vrsqrt14ps %zmm5,%zmm6");
+ asm volatile("vrsqrt14pd %zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 4f */
+
+ asm volatile("vrsqrt14ss %xmm4,%xmm5,%xmm6{%k7}");
+ asm volatile("vrsqrt14sd %xmm4,%xmm5,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 38 59 */
+
+ asm volatile("vpbroadcastq %xmm4,%xmm6");
+ asm volatile("vbroadcasti32x2 %xmm7,%zmm6");
+
+ /* AVX-512: Op code 0f 38 5a */
+
+ asm volatile("vbroadcasti128 (%ecx),%ymm4");
+ asm volatile("vbroadcasti32x4 (%ecx),%zmm6");
+ asm volatile("vbroadcasti64x2 (%ecx),%zmm6");
+
+ /* AVX-512: Op code 0f 38 5b */
+
+ asm volatile("vbroadcasti32x8 (%ecx),%zmm6");
+ asm volatile("vbroadcasti64x4 (%ecx),%zmm6");
+
+ /* AVX-512: Op code 0f 38 64 */
+
+ asm volatile("vpblendmd %zmm4,%zmm5,%zmm6");
+ asm volatile("vpblendmq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 65 */
+
+ asm volatile("vblendmps %zmm4,%zmm5,%zmm6");
+ asm volatile("vblendmpd %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 66 */
+
+ asm volatile("vpblendmb %zmm4,%zmm5,%zmm6");
+ asm volatile("vpblendmw %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 75 */
+
+ asm volatile("vpermi2b %zmm4,%zmm5,%zmm6");
+ asm volatile("vpermi2w %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 76 */
+
+ asm volatile("vpermi2d %zmm4,%zmm5,%zmm6");
+ asm volatile("vpermi2q %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 77 */
+
+ asm volatile("vpermi2ps %zmm4,%zmm5,%zmm6");
+ asm volatile("vpermi2pd %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 7a */
+
+ asm volatile("vpbroadcastb %eax,%xmm3");
+
+ /* AVX-512: Op code 0f 38 7b */
+
+ asm volatile("vpbroadcastw %eax,%xmm3");
+
+ /* AVX-512: Op code 0f 38 7c */
+
+ asm volatile("vpbroadcastd %eax,%xmm3");
+
+ /* AVX-512: Op code 0f 38 7d */
+
+ asm volatile("vpermt2b %zmm4,%zmm5,%zmm6");
+ asm volatile("vpermt2w %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 7e */
+
+ asm volatile("vpermt2d %zmm4,%zmm5,%zmm6");
+ asm volatile("vpermt2q %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 7f */
+
+ asm volatile("vpermt2ps %zmm4,%zmm5,%zmm6");
+ asm volatile("vpermt2pd %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 83 */
+
+ asm volatile("vpmultishiftqb %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 88 */
+
+ asm volatile("vexpandps (%ecx),%zmm6");
+ asm volatile("vexpandpd (%ecx),%zmm6");
+
+ /* AVX-512: Op code 0f 38 89 */
+
+ asm volatile("vpexpandd (%ecx),%zmm6");
+ asm volatile("vpexpandq (%ecx),%zmm6");
+
+ /* AVX-512: Op code 0f 38 8a */
+
+ asm volatile("vcompressps %zmm6,(%ecx)");
+ asm volatile("vcompresspd %zmm6,(%ecx)");
+
+ /* AVX-512: Op code 0f 38 8b */
+
+ asm volatile("vpcompressd %zmm6,(%ecx)");
+ asm volatile("vpcompressq %zmm6,(%ecx)");
+
+ /* AVX-512: Op code 0f 38 8d */
+
+ asm volatile("vpermb %zmm4,%zmm5,%zmm6");
+ asm volatile("vpermw %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 90 */
+
+ asm volatile("vpgatherdd %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
+ asm volatile("vpgatherdq %xmm2,0x04(%ebp,%xmm7,2),%xmm1");
+ asm volatile("vpgatherdd 0x7b(%ebp,%zmm7,8),%zmm6{%k1}");
+ asm volatile("vpgatherdq 0x7b(%ebp,%ymm7,8),%zmm6{%k1}");
+
+ /* AVX-512: Op code 0f 38 91 */
+
+ asm volatile("vpgatherqd %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
+ asm volatile("vpgatherqq %xmm2,0x02(%ebp,%xmm7,2),%xmm1");
+ asm volatile("vpgatherqd 0x7b(%ebp,%zmm7,8),%ymm6{%k1}");
+ asm volatile("vpgatherqq 0x7b(%ebp,%zmm7,8),%zmm6{%k1}");
+
+ /* AVX-512: Op code 0f 38 a0 */
+
+ asm volatile("vpscatterdd %zmm6,0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vpscatterdq %zmm6,0x7b(%ebp,%ymm7,8){%k1}");
+
+ /* AVX-512: Op code 0f 38 a1 */
+
+ asm volatile("vpscatterqd %ymm6,0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vpscatterqq %ymm6,0x7b(%ebp,%ymm7,8){%k1}");
+
+ /* AVX-512: Op code 0f 38 a2 */
+
+ asm volatile("vscatterdps %zmm6,0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vscatterdpd %zmm6,0x7b(%ebp,%ymm7,8){%k1}");
+
+ /* AVX-512: Op code 0f 38 a3 */
+
+ asm volatile("vscatterqps %ymm6,0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vscatterqpd %zmm6,0x7b(%ebp,%zmm7,8){%k1}");
+
+ /* AVX-512: Op code 0f 38 b4 */
+
+ asm volatile("vpmadd52luq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 b5 */
+
+ asm volatile("vpmadd52huq %zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 c4 */
+
+ asm volatile("vpconflictd %zmm5,%zmm6");
+ asm volatile("vpconflictq %zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 38 c8 */
+
+ asm volatile("vexp2ps %zmm6,%zmm7");
+ asm volatile("vexp2pd %zmm6,%zmm7");
+
+ /* AVX-512: Op code 0f 38 ca */
+
+ asm volatile("vrcp28ps %zmm6,%zmm7");
+ asm volatile("vrcp28pd %zmm6,%zmm7");
+
+ /* AVX-512: Op code 0f 38 cb */
+
+ asm volatile("vrcp28ss %xmm5,%xmm6,%xmm7{%k7}");
+ asm volatile("vrcp28sd %xmm5,%xmm6,%xmm7{%k7}");
+
+ /* AVX-512: Op code 0f 38 cc */
+
+ asm volatile("vrsqrt28ps %zmm6,%zmm7");
+ asm volatile("vrsqrt28pd %zmm6,%zmm7");
+
+ /* AVX-512: Op code 0f 38 cd */
+
+ asm volatile("vrsqrt28ss %xmm5,%xmm6,%xmm7{%k7}");
+ asm volatile("vrsqrt28sd %xmm5,%xmm6,%xmm7{%k7}");
+
+ /* AVX-512: Op code 0f 3a 03 */
+
+ asm volatile("valignd $0x12,%zmm5,%zmm6,%zmm7");
+ asm volatile("valignq $0x12,%zmm5,%zmm6,%zmm7");
+
+ /* AVX-512: Op code 0f 3a 08 */
+
+ asm volatile("vroundps $0x5,%ymm6,%ymm2");
+ asm volatile("vrndscaleps $0x12,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 3a 09 */
+
+ asm volatile("vroundpd $0x5,%ymm6,%ymm2");
+ asm volatile("vrndscalepd $0x12,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 3a 0a */
+
+ asm volatile("vroundss $0x5,%xmm4,%xmm6,%xmm2");
+ asm volatile("vrndscaless $0x12,%xmm4,%xmm5,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 3a 0b */
+
+ asm volatile("vroundsd $0x5,%xmm4,%xmm6,%xmm2");
+ asm volatile("vrndscalesd $0x12,%xmm4,%xmm5,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 3a 18 */
+
+ asm volatile("vinsertf128 $0x5,%xmm4,%ymm4,%ymm6");
+ asm volatile("vinsertf32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+ asm volatile("vinsertf64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+
+ /* AVX-512: Op code 0f 3a 19 */
+
+ asm volatile("vextractf128 $0x5,%ymm4,%xmm4");
+ asm volatile("vextractf32x4 $0x12,%zmm5,%xmm6{%k7}");
+ asm volatile("vextractf64x2 $0x12,%zmm5,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 3a 1a */
+
+ asm volatile("vinsertf32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+ asm volatile("vinsertf64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+
+ /* AVX-512: Op code 0f 3a 1b */
+
+ asm volatile("vextractf32x8 $0x12,%zmm6,%ymm7{%k7}");
+ asm volatile("vextractf64x4 $0x12,%zmm6,%ymm7{%k7}");
+
+ /* AVX-512: Op code 0f 3a 1e */
+
+ asm volatile("vpcmpud $0x12,%zmm6,%zmm7,%k5");
+ asm volatile("vpcmpuq $0x12,%zmm6,%zmm7,%k5");
+
+ /* AVX-512: Op code 0f 3a 1f */
+
+ asm volatile("vpcmpd $0x12,%zmm6,%zmm7,%k5");
+ asm volatile("vpcmpq $0x12,%zmm6,%zmm7,%k5");
+
+ /* AVX-512: Op code 0f 3a 23 */
+
+ asm volatile("vshuff32x4 $0x12,%zmm5,%zmm6,%zmm7");
+ asm volatile("vshuff64x2 $0x12,%zmm5,%zmm6,%zmm7");
+
+ /* AVX-512: Op code 0f 3a 25 */
+
+ asm volatile("vpternlogd $0x12,%zmm5,%zmm6,%zmm7");
+ asm volatile("vpternlogq $0x12,%zmm5,%zmm6,%zmm7");
+
+ /* AVX-512: Op code 0f 3a 26 */
+
+ asm volatile("vgetmantps $0x12,%zmm6,%zmm7");
+ asm volatile("vgetmantpd $0x12,%zmm6,%zmm7");
+
+ /* AVX-512: Op code 0f 3a 27 */
+
+ asm volatile("vgetmantss $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+ asm volatile("vgetmantsd $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+
+ /* AVX-512: Op code 0f 3a 38 */
+
+ asm volatile("vinserti128 $0x5,%xmm4,%ymm4,%ymm6");
+ asm volatile("vinserti32x4 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+ asm volatile("vinserti64x2 $0x12,%xmm4,%zmm5,%zmm6{%k7}");
+
+ /* AVX-512: Op code 0f 3a 39 */
+
+ asm volatile("vextracti128 $0x5,%ymm4,%xmm6");
+ asm volatile("vextracti32x4 $0x12,%zmm5,%xmm6{%k7}");
+ asm volatile("vextracti64x2 $0x12,%zmm5,%xmm6{%k7}");
+
+ /* AVX-512: Op code 0f 3a 3a */
+
+ asm volatile("vinserti32x8 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+ asm volatile("vinserti64x4 $0x12,%ymm5,%zmm6,%zmm7{%k7}");
+
+ /* AVX-512: Op code 0f 3a 3b */
+
+ asm volatile("vextracti32x8 $0x12,%zmm6,%ymm7{%k7}");
+ asm volatile("vextracti64x4 $0x12,%zmm6,%ymm7{%k7}");
+
+ /* AVX-512: Op code 0f 3a 3e */
+
+ asm volatile("vpcmpub $0x12,%zmm6,%zmm7,%k5");
+ asm volatile("vpcmpuw $0x12,%zmm6,%zmm7,%k5");
+
+ /* AVX-512: Op code 0f 3a 3f */
+
+ asm volatile("vpcmpb $0x12,%zmm6,%zmm7,%k5");
+ asm volatile("vpcmpw $0x12,%zmm6,%zmm7,%k5");
+
+ /* AVX-512: Op code 0f 3a 42 */
+
+ asm volatile("vmpsadbw $0x5,%ymm4,%ymm6,%ymm2");
+ asm volatile("vdbpsadbw $0x12,%zmm4,%zmm5,%zmm6");
+
+ /* AVX-512: Op code 0f 3a 43 */
+
+ asm volatile("vshufi32x4 $0x12,%zmm5,%zmm6,%zmm7");
+ asm volatile("vshufi64x2 $0x12,%zmm5,%zmm6,%zmm7");
+
+ /* AVX-512: Op code 0f 3a 50 */
+
+ asm volatile("vrangeps $0x12,%zmm5,%zmm6,%zmm7");
+ asm volatile("vrangepd $0x12,%zmm5,%zmm6,%zmm7");
+
+ /* AVX-512: Op code 0f 3a 51 */
+
+ asm volatile("vrangess $0x12,%xmm5,%xmm6,%xmm7");
+ asm volatile("vrangesd $0x12,%xmm5,%xmm6,%xmm7");
+
+ /* AVX-512: Op code 0f 3a 54 */
+
+ asm volatile("vfixupimmps $0x12,%zmm5,%zmm6,%zmm7");
+ asm volatile("vfixupimmpd $0x12,%zmm5,%zmm6,%zmm7");
+
+ /* AVX-512: Op code 0f 3a 55 */
+
+ asm volatile("vfixupimmss $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+ asm volatile("vfixupimmsd $0x12,%xmm5,%xmm6,%xmm7{%k7}");
+
+ /* AVX-512: Op code 0f 3a 56 */
+
+ asm volatile("vreduceps $0x12,%zmm6,%zmm7");
+ asm volatile("vreducepd $0x12,%zmm6,%zmm7");
+
+ /* AVX-512: Op code 0f 3a 57 */
+
+ asm volatile("vreducess $0x12,%xmm5,%xmm6,%xmm7");
+ asm volatile("vreducesd $0x12,%xmm5,%xmm6,%xmm7");
+
+ /* AVX-512: Op code 0f 3a 66 */
+
+ asm volatile("vfpclassps $0x12,%zmm7,%k5");
+ asm volatile("vfpclasspd $0x12,%zmm7,%k5");
+
+ /* AVX-512: Op code 0f 3a 67 */
+
+ asm volatile("vfpclassss $0x12,%xmm7,%k5");
+ asm volatile("vfpclasssd $0x12,%xmm7,%k5");
+
+ /* AVX-512: Op code 0f 72 (Grp13) */
+
+ asm volatile("vprord $0x12,%zmm5,%zmm6");
+ asm volatile("vprorq $0x12,%zmm5,%zmm6");
+ asm volatile("vprold $0x12,%zmm5,%zmm6");
+ asm volatile("vprolq $0x12,%zmm5,%zmm6");
+ asm volatile("psrad $0x2,%mm6");
+ asm volatile("vpsrad $0x5,%ymm6,%ymm2");
+ asm volatile("vpsrad $0x5,%zmm6,%zmm2");
+ asm volatile("vpsraq $0x5,%zmm6,%zmm2");
+
+ /* AVX-512: Op code 0f 38 c6 (Grp18) */
+
+ asm volatile("vgatherpf0dps 0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vgatherpf0dpd 0x7b(%ebp,%ymm7,8){%k1}");
+ asm volatile("vgatherpf1dps 0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vgatherpf1dpd 0x7b(%ebp,%ymm7,8){%k1}");
+ asm volatile("vscatterpf0dps 0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vscatterpf0dpd 0x7b(%ebp,%ymm7,8){%k1}");
+ asm volatile("vscatterpf1dps 0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vscatterpf1dpd 0x7b(%ebp,%ymm7,8){%k1}");
+
+ /* AVX-512: Op code 0f 38 c7 (Grp19) */
+
+ asm volatile("vgatherpf0qps 0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vgatherpf0qpd 0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vgatherpf1qps 0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vgatherpf1qpd 0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vscatterpf0qps 0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vscatterpf0qpd 0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vscatterpf1qps 0x7b(%ebp,%zmm7,8){%k1}");
+ asm volatile("vscatterpf1qpd 0x7b(%ebp,%zmm7,8){%k1}");
+
+ /* AVX-512: Examples */
+
+ asm volatile("vaddpd %zmm4,%zmm5,%zmm6");
+ asm volatile("vaddpd %zmm4,%zmm5,%zmm6{%k7}");
+ asm volatile("vaddpd %zmm4,%zmm5,%zmm6{%k7}{z}");
+ asm volatile("vaddpd {rn-sae},%zmm4,%zmm5,%zmm6");
+ asm volatile("vaddpd {ru-sae},%zmm4,%zmm5,%zmm6");
+ asm volatile("vaddpd {rd-sae},%zmm4,%zmm5,%zmm6");
+ asm volatile("vaddpd {rz-sae},%zmm4,%zmm5,%zmm6");
+ asm volatile("vaddpd (%ecx),%zmm5,%zmm6");
+ asm volatile("vaddpd 0x123(%eax,%ecx,8),%zmm5,%zmm6");
+ asm volatile("vaddpd (%ecx){1to8},%zmm5,%zmm6");
+ asm volatile("vaddpd 0x1fc0(%edx),%zmm5,%zmm6");
+ asm volatile("vaddpd 0x3f8(%edx){1to8},%zmm5,%zmm6");
+ asm volatile("vcmpeq_uqps 0x1fc(%edx){1to16},%zmm6,%k5");
+ asm volatile("vcmpltsd 0x123(%eax,%ecx,8),%xmm3,%k5{%k7}");
+ asm volatile("vcmplesd {sae},%xmm4,%xmm5,%k5{%k7}");
+ asm volatile("vgetmantss $0x5b,0x123(%eax,%ecx,8),%xmm4,%xmm5{%k7}");
+
+ /* bndmk m32, bnd */
+
+ asm volatile("bndmk (%eax), %bnd0");
+ asm volatile("bndmk (0x12345678), %bnd0");
+ asm volatile("bndmk (%eax), %bnd3");
+ asm volatile("bndmk (%ecx,%eax,1), %bnd0");
+ asm volatile("bndmk 0x12345678(,%eax,1), %bnd0");
+ asm volatile("bndmk (%eax,%ecx,1), %bnd0");
+ asm volatile("bndmk (%eax,%ecx,8), %bnd0");
+ asm volatile("bndmk 0x12(%eax), %bnd0");
+ asm volatile("bndmk 0x12(%ebp), %bnd0");
+ asm volatile("bndmk 0x12(%ecx,%eax,1), %bnd0");
+ asm volatile("bndmk 0x12(%ebp,%eax,1), %bnd0");
+ asm volatile("bndmk 0x12(%eax,%ecx,1), %bnd0");
+ asm volatile("bndmk 0x12(%eax,%ecx,8), %bnd0");
+ asm volatile("bndmk 0x12345678(%eax), %bnd0");
+ asm volatile("bndmk 0x12345678(%ebp), %bnd0");
+ asm volatile("bndmk 0x12345678(%ecx,%eax,1), %bnd0");
+ asm volatile("bndmk 0x12345678(%ebp,%eax,1), %bnd0");
+ asm volatile("bndmk 0x12345678(%eax,%ecx,1), %bnd0");
+ asm volatile("bndmk 0x12345678(%eax,%ecx,8), %bnd0");
+
+ /* bndcl r/m32, bnd */
+
+ asm volatile("bndcl (%eax), %bnd0");
+ asm volatile("bndcl (0x12345678), %bnd0");
+ asm volatile("bndcl (%eax), %bnd3");
+ asm volatile("bndcl (%ecx,%eax,1), %bnd0");
+ asm volatile("bndcl 0x12345678(,%eax,1), %bnd0");
+ asm volatile("bndcl (%eax,%ecx,1), %bnd0");
+ asm volatile("bndcl (%eax,%ecx,8), %bnd0");
+ asm volatile("bndcl 0x12(%eax), %bnd0");
+ asm volatile("bndcl 0x12(%ebp), %bnd0");
+ asm volatile("bndcl 0x12(%ecx,%eax,1), %bnd0");
+ asm volatile("bndcl 0x12(%ebp,%eax,1), %bnd0");
+ asm volatile("bndcl 0x12(%eax,%ecx,1), %bnd0");
+ asm volatile("bndcl 0x12(%eax,%ecx,8), %bnd0");
+ asm volatile("bndcl 0x12345678(%eax), %bnd0");
+ asm volatile("bndcl 0x12345678(%ebp), %bnd0");
+ asm volatile("bndcl 0x12345678(%ecx,%eax,1), %bnd0");
+ asm volatile("bndcl 0x12345678(%ebp,%eax,1), %bnd0");
+ asm volatile("bndcl 0x12345678(%eax,%ecx,1), %bnd0");
+ asm volatile("bndcl 0x12345678(%eax,%ecx,8), %bnd0");
+ asm volatile("bndcl %eax, %bnd0");
+
+ /* bndcu r/m32, bnd */
+
+ asm volatile("bndcu (%eax), %bnd0");
+ asm volatile("bndcu (0x12345678), %bnd0");
+ asm volatile("bndcu (%eax), %bnd3");
+ asm volatile("bndcu (%ecx,%eax,1), %bnd0");
+ asm volatile("bndcu 0x12345678(,%eax,1), %bnd0");
+ asm volatile("bndcu (%eax,%ecx,1), %bnd0");
+ asm volatile("bndcu (%eax,%ecx,8), %bnd0");
+ asm volatile("bndcu 0x12(%eax), %bnd0");
+ asm volatile("bndcu 0x12(%ebp), %bnd0");
+ asm volatile("bndcu 0x12(%ecx,%eax,1), %bnd0");
+ asm volatile("bndcu 0x12(%ebp,%eax,1), %bnd0");
+ asm volatile("bndcu 0x12(%eax,%ecx,1), %bnd0");
+ asm volatile("bndcu 0x12(%eax,%ecx,8), %bnd0");
+ asm volatile("bndcu 0x12345678(%eax), %bnd0");
+ asm volatile("bndcu 0x12345678(%ebp), %bnd0");
+ asm volatile("bndcu 0x12345678(%ecx,%eax,1), %bnd0");
+ asm volatile("bndcu 0x12345678(%ebp,%eax,1), %bnd0");
+ asm volatile("bndcu 0x12345678(%eax,%ecx,1), %bnd0");
+ asm volatile("bndcu 0x12345678(%eax,%ecx,8), %bnd0");
+ asm volatile("bndcu %eax, %bnd0");
+
+ /* bndcn r/m32, bnd */
+
+ asm volatile("bndcn (%eax), %bnd0");
+ asm volatile("bndcn (0x12345678), %bnd0");
+ asm volatile("bndcn (%eax), %bnd3");
+ asm volatile("bndcn (%ecx,%eax,1), %bnd0");
+ asm volatile("bndcn 0x12345678(,%eax,1), %bnd0");
+ asm volatile("bndcn (%eax,%ecx,1), %bnd0");
+ asm volatile("bndcn (%eax,%ecx,8), %bnd0");
+ asm volatile("bndcn 0x12(%eax), %bnd0");
+ asm volatile("bndcn 0x12(%ebp), %bnd0");
+ asm volatile("bndcn 0x12(%ecx,%eax,1), %bnd0");
+ asm volatile("bndcn 0x12(%ebp,%eax,1), %bnd0");
+ asm volatile("bndcn 0x12(%eax,%ecx,1), %bnd0");
+ asm volatile("bndcn 0x12(%eax,%ecx,8), %bnd0");
+ asm volatile("bndcn 0x12345678(%eax), %bnd0");
+ asm volatile("bndcn 0x12345678(%ebp), %bnd0");
+ asm volatile("bndcn 0x12345678(%ecx,%eax,1), %bnd0");
+ asm volatile("bndcn 0x12345678(%ebp,%eax,1), %bnd0");
+ asm volatile("bndcn 0x12345678(%eax,%ecx,1), %bnd0");
+ asm volatile("bndcn 0x12345678(%eax,%ecx,8), %bnd0");
+ asm volatile("bndcn %eax, %bnd0");
+
+ /* bndmov m64, bnd */
+
+ asm volatile("bndmov (%eax), %bnd0");
+ asm volatile("bndmov (0x12345678), %bnd0");
+ asm volatile("bndmov (%eax), %bnd3");
+ asm volatile("bndmov (%ecx,%eax,1), %bnd0");
+ asm volatile("bndmov 0x12345678(,%eax,1), %bnd0");
+ asm volatile("bndmov (%eax,%ecx,1), %bnd0");
+ asm volatile("bndmov (%eax,%ecx,8), %bnd0");
+ asm volatile("bndmov 0x12(%eax), %bnd0");
+ asm volatile("bndmov 0x12(%ebp), %bnd0");
+ asm volatile("bndmov 0x12(%ecx,%eax,1), %bnd0");
+ asm volatile("bndmov 0x12(%ebp,%eax,1), %bnd0");
+ asm volatile("bndmov 0x12(%eax,%ecx,1), %bnd0");
+ asm volatile("bndmov 0x12(%eax,%ecx,8), %bnd0");
+ asm volatile("bndmov 0x12345678(%eax), %bnd0");
+ asm volatile("bndmov 0x12345678(%ebp), %bnd0");
+ asm volatile("bndmov 0x12345678(%ecx,%eax,1), %bnd0");
+ asm volatile("bndmov 0x12345678(%ebp,%eax,1), %bnd0");
+ asm volatile("bndmov 0x12345678(%eax,%ecx,1), %bnd0");
+ asm volatile("bndmov 0x12345678(%eax,%ecx,8), %bnd0");
+
+ /* bndmov bnd, m64 */
+
+ asm volatile("bndmov %bnd0, (%eax)");
+ asm volatile("bndmov %bnd0, (0x12345678)");
+ asm volatile("bndmov %bnd3, (%eax)");
+ asm volatile("bndmov %bnd0, (%ecx,%eax,1)");
+ asm volatile("bndmov %bnd0, 0x12345678(,%eax,1)");
+ asm volatile("bndmov %bnd0, (%eax,%ecx,1)");
+ asm volatile("bndmov %bnd0, (%eax,%ecx,8)");
+ asm volatile("bndmov %bnd0, 0x12(%eax)");
+ asm volatile("bndmov %bnd0, 0x12(%ebp)");
+ asm volatile("bndmov %bnd0, 0x12(%ecx,%eax,1)");
+ asm volatile("bndmov %bnd0, 0x12(%ebp,%eax,1)");
+ asm volatile("bndmov %bnd0, 0x12(%eax,%ecx,1)");
+ asm volatile("bndmov %bnd0, 0x12(%eax,%ecx,8)");
+ asm volatile("bndmov %bnd0, 0x12345678(%eax)");
+ asm volatile("bndmov %bnd0, 0x12345678(%ebp)");
+ asm volatile("bndmov %bnd0, 0x12345678(%ecx,%eax,1)");
+ asm volatile("bndmov %bnd0, 0x12345678(%ebp,%eax,1)");
+ asm volatile("bndmov %bnd0, 0x12345678(%eax,%ecx,1)");
+ asm volatile("bndmov %bnd0, 0x12345678(%eax,%ecx,8)");
+
+ /* bndmov bnd2, bnd1 */
+
+ asm volatile("bndmov %bnd0, %bnd1");
+ asm volatile("bndmov %bnd1, %bnd0");
+
+ /* bndldx mib, bnd */
+
+ asm volatile("bndldx (%eax), %bnd0");
+ asm volatile("bndldx (0x12345678), %bnd0");
+ asm volatile("bndldx (%eax), %bnd3");
+ asm volatile("bndldx (%ecx,%eax,1), %bnd0");
+ asm volatile("bndldx 0x12345678(,%eax,1), %bnd0");
+ asm volatile("bndldx (%eax,%ecx,1), %bnd0");
+ asm volatile("bndldx 0x12(%eax), %bnd0");
+ asm volatile("bndldx 0x12(%ebp), %bnd0");
+ asm volatile("bndldx 0x12(%ecx,%eax,1), %bnd0");
+ asm volatile("bndldx 0x12(%ebp,%eax,1), %bnd0");
+ asm volatile("bndldx 0x12(%eax,%ecx,1), %bnd0");
+ asm volatile("bndldx 0x12345678(%eax), %bnd0");
+ asm volatile("bndldx 0x12345678(%ebp), %bnd0");
+ asm volatile("bndldx 0x12345678(%ecx,%eax,1), %bnd0");
+ asm volatile("bndldx 0x12345678(%ebp,%eax,1), %bnd0");
+ asm volatile("bndldx 0x12345678(%eax,%ecx,1), %bnd0");
+
+ /* bndstx bnd, mib */
+
+ asm volatile("bndstx %bnd0, (%eax)");
+ asm volatile("bndstx %bnd0, (0x12345678)");
+ asm volatile("bndstx %bnd3, (%eax)");
+ asm volatile("bndstx %bnd0, (%ecx,%eax,1)");
+ asm volatile("bndstx %bnd0, 0x12345678(,%eax,1)");
+ asm volatile("bndstx %bnd0, (%eax,%ecx,1)");
+ asm volatile("bndstx %bnd0, 0x12(%eax)");
+ asm volatile("bndstx %bnd0, 0x12(%ebp)");
+ asm volatile("bndstx %bnd0, 0x12(%ecx,%eax,1)");
+ asm volatile("bndstx %bnd0, 0x12(%ebp,%eax,1)");
+ asm volatile("bndstx %bnd0, 0x12(%eax,%ecx,1)");
+ asm volatile("bndstx %bnd0, 0x12345678(%eax)");
+ asm volatile("bndstx %bnd0, 0x12345678(%ebp)");
+ asm volatile("bndstx %bnd0, 0x12345678(%ecx,%eax,1)");
+ asm volatile("bndstx %bnd0, 0x12345678(%ebp,%eax,1)");
+ asm volatile("bndstx %bnd0, 0x12345678(%eax,%ecx,1)");
+
+ /* bnd prefix on call, ret, jmp and all jcc */
+
+ asm volatile("bnd call label1"); /* Expecting: call unconditional 0xfffffffc */
+ asm volatile("bnd call *(%eax)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd ret"); /* Expecting: ret indirect 0 */
+ asm volatile("bnd jmp label1"); /* Expecting: jmp unconditional 0xfffffffc */
+ asm volatile("bnd jmp label1"); /* Expecting: jmp unconditional 0xfffffffc */
+ asm volatile("bnd jmp *(%ecx)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jne label1"); /* Expecting: jcc conditional 0xfffffffc */
+
+ /* sha1rnds4 imm8, xmm2/m128, xmm1 */
+
+ asm volatile("sha1rnds4 $0x0, %xmm1, %xmm0");
+ asm volatile("sha1rnds4 $0x91, %xmm7, %xmm2");
+ asm volatile("sha1rnds4 $0x91, (%eax), %xmm0");
+ asm volatile("sha1rnds4 $0x91, (0x12345678), %xmm0");
+ asm volatile("sha1rnds4 $0x91, (%eax), %xmm3");
+ asm volatile("sha1rnds4 $0x91, (%ecx,%eax,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(,%eax,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, (%eax,%ecx,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, (%eax,%ecx,8), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12(%eax), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12(%ebp), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12(%ecx,%eax,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12(%ebp,%eax,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12(%eax,%ecx,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12(%eax,%ecx,8), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(%eax), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(%ebp), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(%ecx,%eax,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(%ebp,%eax,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(%eax,%ecx,1), %xmm0");
+ asm volatile("sha1rnds4 $0x91, 0x12345678(%eax,%ecx,8), %xmm0");
+
+ /* sha1nexte xmm2/m128, xmm1 */
+
+ asm volatile("sha1nexte %xmm1, %xmm0");
+ asm volatile("sha1nexte %xmm7, %xmm2");
+ asm volatile("sha1nexte (%eax), %xmm0");
+ asm volatile("sha1nexte (0x12345678), %xmm0");
+ asm volatile("sha1nexte (%eax), %xmm3");
+ asm volatile("sha1nexte (%ecx,%eax,1), %xmm0");
+ asm volatile("sha1nexte 0x12345678(,%eax,1), %xmm0");
+ asm volatile("sha1nexte (%eax,%ecx,1), %xmm0");
+ asm volatile("sha1nexte (%eax,%ecx,8), %xmm0");
+ asm volatile("sha1nexte 0x12(%eax), %xmm0");
+ asm volatile("sha1nexte 0x12(%ebp), %xmm0");
+ asm volatile("sha1nexte 0x12(%ecx,%eax,1), %xmm0");
+ asm volatile("sha1nexte 0x12(%ebp,%eax,1), %xmm0");
+ asm volatile("sha1nexte 0x12(%eax,%ecx,1), %xmm0");
+ asm volatile("sha1nexte 0x12(%eax,%ecx,8), %xmm0");
+ asm volatile("sha1nexte 0x12345678(%eax), %xmm0");
+ asm volatile("sha1nexte 0x12345678(%ebp), %xmm0");
+ asm volatile("sha1nexte 0x12345678(%ecx,%eax,1), %xmm0");
+ asm volatile("sha1nexte 0x12345678(%ebp,%eax,1), %xmm0");
+ asm volatile("sha1nexte 0x12345678(%eax,%ecx,1), %xmm0");
+ asm volatile("sha1nexte 0x12345678(%eax,%ecx,8), %xmm0");
+
+ /* sha1msg1 xmm2/m128, xmm1 */
+
+ asm volatile("sha1msg1 %xmm1, %xmm0");
+ asm volatile("sha1msg1 %xmm7, %xmm2");
+ asm volatile("sha1msg1 (%eax), %xmm0");
+ asm volatile("sha1msg1 (0x12345678), %xmm0");
+ asm volatile("sha1msg1 (%eax), %xmm3");
+ asm volatile("sha1msg1 (%ecx,%eax,1), %xmm0");
+ asm volatile("sha1msg1 0x12345678(,%eax,1), %xmm0");
+ asm volatile("sha1msg1 (%eax,%ecx,1), %xmm0");
+ asm volatile("sha1msg1 (%eax,%ecx,8), %xmm0");
+ asm volatile("sha1msg1 0x12(%eax), %xmm0");
+ asm volatile("sha1msg1 0x12(%ebp), %xmm0");
+ asm volatile("sha1msg1 0x12(%ecx,%eax,1), %xmm0");
+ asm volatile("sha1msg1 0x12(%ebp,%eax,1), %xmm0");
+ asm volatile("sha1msg1 0x12(%eax,%ecx,1), %xmm0");
+ asm volatile("sha1msg1 0x12(%eax,%ecx,8), %xmm0");
+ asm volatile("sha1msg1 0x12345678(%eax), %xmm0");
+ asm volatile("sha1msg1 0x12345678(%ebp), %xmm0");
+ asm volatile("sha1msg1 0x12345678(%ecx,%eax,1), %xmm0");
+ asm volatile("sha1msg1 0x12345678(%ebp,%eax,1), %xmm0");
+ asm volatile("sha1msg1 0x12345678(%eax,%ecx,1), %xmm0");
+ asm volatile("sha1msg1 0x12345678(%eax,%ecx,8), %xmm0");
+
+ /* sha1msg2 xmm2/m128, xmm1 */
+
+ asm volatile("sha1msg2 %xmm1, %xmm0");
+ asm volatile("sha1msg2 %xmm7, %xmm2");
+ asm volatile("sha1msg2 (%eax), %xmm0");
+ asm volatile("sha1msg2 (0x12345678), %xmm0");
+ asm volatile("sha1msg2 (%eax), %xmm3");
+ asm volatile("sha1msg2 (%ecx,%eax,1), %xmm0");
+ asm volatile("sha1msg2 0x12345678(,%eax,1), %xmm0");
+ asm volatile("sha1msg2 (%eax,%ecx,1), %xmm0");
+ asm volatile("sha1msg2 (%eax,%ecx,8), %xmm0");
+ asm volatile("sha1msg2 0x12(%eax), %xmm0");
+ asm volatile("sha1msg2 0x12(%ebp), %xmm0");
+ asm volatile("sha1msg2 0x12(%ecx,%eax,1), %xmm0");
+ asm volatile("sha1msg2 0x12(%ebp,%eax,1), %xmm0");
+ asm volatile("sha1msg2 0x12(%eax,%ecx,1), %xmm0");
+ asm volatile("sha1msg2 0x12(%eax,%ecx,8), %xmm0");
+ asm volatile("sha1msg2 0x12345678(%eax), %xmm0");
+ asm volatile("sha1msg2 0x12345678(%ebp), %xmm0");
+ asm volatile("sha1msg2 0x12345678(%ecx,%eax,1), %xmm0");
+ asm volatile("sha1msg2 0x12345678(%ebp,%eax,1), %xmm0");
+ asm volatile("sha1msg2 0x12345678(%eax,%ecx,1), %xmm0");
+ asm volatile("sha1msg2 0x12345678(%eax,%ecx,8), %xmm0");
+
+ /* sha256rnds2 <XMM0>, xmm2/m128, xmm1 */
+ /* Note sha256rnds2 has an implicit operand 'xmm0' */
+
+ asm volatile("sha256rnds2 %xmm4, %xmm1");
+ asm volatile("sha256rnds2 %xmm7, %xmm2");
+ asm volatile("sha256rnds2 (%eax), %xmm1");
+ asm volatile("sha256rnds2 (0x12345678), %xmm1");
+ asm volatile("sha256rnds2 (%eax), %xmm3");
+ asm volatile("sha256rnds2 (%ecx,%eax,1), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(,%eax,1), %xmm1");
+ asm volatile("sha256rnds2 (%eax,%ecx,1), %xmm1");
+ asm volatile("sha256rnds2 (%eax,%ecx,8), %xmm1");
+ asm volatile("sha256rnds2 0x12(%eax), %xmm1");
+ asm volatile("sha256rnds2 0x12(%ebp), %xmm1");
+ asm volatile("sha256rnds2 0x12(%ecx,%eax,1), %xmm1");
+ asm volatile("sha256rnds2 0x12(%ebp,%eax,1), %xmm1");
+ asm volatile("sha256rnds2 0x12(%eax,%ecx,1), %xmm1");
+ asm volatile("sha256rnds2 0x12(%eax,%ecx,8), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(%eax), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(%ebp), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(%ecx,%eax,1), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(%ebp,%eax,1), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(%eax,%ecx,1), %xmm1");
+ asm volatile("sha256rnds2 0x12345678(%eax,%ecx,8), %xmm1");
+
+ /* sha256msg1 xmm2/m128, xmm1 */
+
+ asm volatile("sha256msg1 %xmm1, %xmm0");
+ asm volatile("sha256msg1 %xmm7, %xmm2");
+ asm volatile("sha256msg1 (%eax), %xmm0");
+ asm volatile("sha256msg1 (0x12345678), %xmm0");
+ asm volatile("sha256msg1 (%eax), %xmm3");
+ asm volatile("sha256msg1 (%ecx,%eax,1), %xmm0");
+ asm volatile("sha256msg1 0x12345678(,%eax,1), %xmm0");
+ asm volatile("sha256msg1 (%eax,%ecx,1), %xmm0");
+ asm volatile("sha256msg1 (%eax,%ecx,8), %xmm0");
+ asm volatile("sha256msg1 0x12(%eax), %xmm0");
+ asm volatile("sha256msg1 0x12(%ebp), %xmm0");
+ asm volatile("sha256msg1 0x12(%ecx,%eax,1), %xmm0");
+ asm volatile("sha256msg1 0x12(%ebp,%eax,1), %xmm0");
+ asm volatile("sha256msg1 0x12(%eax,%ecx,1), %xmm0");
+ asm volatile("sha256msg1 0x12(%eax,%ecx,8), %xmm0");
+ asm volatile("sha256msg1 0x12345678(%eax), %xmm0");
+ asm volatile("sha256msg1 0x12345678(%ebp), %xmm0");
+ asm volatile("sha256msg1 0x12345678(%ecx,%eax,1), %xmm0");
+ asm volatile("sha256msg1 0x12345678(%ebp,%eax,1), %xmm0");
+ asm volatile("sha256msg1 0x12345678(%eax,%ecx,1), %xmm0");
+ asm volatile("sha256msg1 0x12345678(%eax,%ecx,8), %xmm0");
+
+ /* sha256msg2 xmm2/m128, xmm1 */
+
+ asm volatile("sha256msg2 %xmm1, %xmm0");
+ asm volatile("sha256msg2 %xmm7, %xmm2");
+ asm volatile("sha256msg2 (%eax), %xmm0");
+ asm volatile("sha256msg2 (0x12345678), %xmm0");
+ asm volatile("sha256msg2 (%eax), %xmm3");
+ asm volatile("sha256msg2 (%ecx,%eax,1), %xmm0");
+ asm volatile("sha256msg2 0x12345678(,%eax,1), %xmm0");
+ asm volatile("sha256msg2 (%eax,%ecx,1), %xmm0");
+ asm volatile("sha256msg2 (%eax,%ecx,8), %xmm0");
+ asm volatile("sha256msg2 0x12(%eax), %xmm0");
+ asm volatile("sha256msg2 0x12(%ebp), %xmm0");
+ asm volatile("sha256msg2 0x12(%ecx,%eax,1), %xmm0");
+ asm volatile("sha256msg2 0x12(%ebp,%eax,1), %xmm0");
+ asm volatile("sha256msg2 0x12(%eax,%ecx,1), %xmm0");
+ asm volatile("sha256msg2 0x12(%eax,%ecx,8), %xmm0");
+ asm volatile("sha256msg2 0x12345678(%eax), %xmm0");
+ asm volatile("sha256msg2 0x12345678(%ebp), %xmm0");
+ asm volatile("sha256msg2 0x12345678(%ecx,%eax,1), %xmm0");
+ asm volatile("sha256msg2 0x12345678(%ebp,%eax,1), %xmm0");
+ asm volatile("sha256msg2 0x12345678(%eax,%ecx,1), %xmm0");
+ asm volatile("sha256msg2 0x12345678(%eax,%ecx,8), %xmm0");
+
+ /* clflushopt m8 */
+
+ asm volatile("clflushopt (%eax)");
+ asm volatile("clflushopt (0x12345678)");
+ asm volatile("clflushopt 0x12345678(%eax,%ecx,8)");
+ /* Also check instructions in the same group encoding as clflushopt */
+ asm volatile("clflush (%eax)");
+ asm volatile("sfence");
+
+ /* clwb m8 */
+
+ asm volatile("clwb (%eax)");
+ asm volatile("clwb (0x12345678)");
+ asm volatile("clwb 0x12345678(%eax,%ecx,8)");
+ /* Also check instructions in the same group encoding as clwb */
+ asm volatile("xsaveopt (%eax)");
+ asm volatile("mfence");
+
+ /* xsavec mem */
+
+ asm volatile("xsavec (%eax)");
+ asm volatile("xsavec (0x12345678)");
+ asm volatile("xsavec 0x12345678(%eax,%ecx,8)");
+
+ /* xsaves mem */
+
+ asm volatile("xsaves (%eax)");
+ asm volatile("xsaves (0x12345678)");
+ asm volatile("xsaves 0x12345678(%eax,%ecx,8)");
+
+ /* xrstors mem */
+
+ asm volatile("xrstors (%eax)");
+ asm volatile("xrstors (0x12345678)");
+ asm volatile("xrstors 0x12345678(%eax,%ecx,8)");
+
+ /* ptwrite */
+
+ asm volatile("ptwrite (%eax)");
+ asm volatile("ptwrite (0x12345678)");
+ asm volatile("ptwrite 0x12345678(%eax,%ecx,8)");
+
+ asm volatile("ptwritel (%eax)");
+ asm volatile("ptwritel (0x12345678)");
+ asm volatile("ptwritel 0x12345678(%eax,%ecx,8)");
+
+#endif /* #ifndef __x86_64__ */
+
+ /* Following line is a marker for the awk script - do not change */
+ asm volatile("rdtsc"); /* Stop here */
+
+ return 0;
+}
diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c
new file mode 100644
index 000000000..a5d24ae58
--- /dev/null
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#include "intel-pt-decoder/insn.h"
+#include "intel-pt-decoder/intel-pt-insn-decoder.h"
+
+struct test_data {
+ u8 data[MAX_INSN_SIZE];
+ int expected_length;
+ int expected_rel;
+ const char *expected_op_str;
+ const char *expected_branch_str;
+ const char *asm_rep;
+};
+
+struct test_data test_data_32[] = {
+#include "insn-x86-dat-32.c"
+ {{0x0f, 0x01, 0xee}, 3, 0, NULL, NULL, "0f 01 ee \trdpkru"},
+ {{0x0f, 0x01, 0xef}, 3, 0, NULL, NULL, "0f 01 ef \twrpkru"},
+ {{0}, 0, 0, NULL, NULL, NULL},
+};
+
+struct test_data test_data_64[] = {
+#include "insn-x86-dat-64.c"
+ {{0x0f, 0x01, 0xee}, 3, 0, NULL, NULL, "0f 01 ee \trdpkru"},
+ {{0x0f, 0x01, 0xef}, 3, 0, NULL, NULL, "0f 01 ef \twrpkru"},
+ {{0}, 0, 0, NULL, NULL, NULL},
+};
+
+static int get_op(const char *op_str)
+{
+ struct val_data {
+ const char *name;
+ int val;
+ } vals[] = {
+ {"other", INTEL_PT_OP_OTHER},
+ {"call", INTEL_PT_OP_CALL},
+ {"ret", INTEL_PT_OP_RET},
+ {"jcc", INTEL_PT_OP_JCC},
+ {"jmp", INTEL_PT_OP_JMP},
+ {"loop", INTEL_PT_OP_LOOP},
+ {"iret", INTEL_PT_OP_IRET},
+ {"int", INTEL_PT_OP_INT},
+ {"syscall", INTEL_PT_OP_SYSCALL},
+ {"sysret", INTEL_PT_OP_SYSRET},
+ {NULL, 0},
+ };
+ struct val_data *val;
+
+ if (!op_str || !strlen(op_str))
+ return 0;
+
+ for (val = vals; val->name; val++) {
+ if (!strcmp(val->name, op_str))
+ return val->val;
+ }
+
+ pr_debug("Failed to get op\n");
+
+ return -1;
+}
+
+static int get_branch(const char *branch_str)
+{
+ struct val_data {
+ const char *name;
+ int val;
+ } vals[] = {
+ {"no_branch", INTEL_PT_BR_NO_BRANCH},
+ {"indirect", INTEL_PT_BR_INDIRECT},
+ {"conditional", INTEL_PT_BR_CONDITIONAL},
+ {"unconditional", INTEL_PT_BR_UNCONDITIONAL},
+ {NULL, 0},
+ };
+ struct val_data *val;
+
+ if (!branch_str || !strlen(branch_str))
+ return 0;
+
+ for (val = vals; val->name; val++) {
+ if (!strcmp(val->name, branch_str))
+ return val->val;
+ }
+
+ pr_debug("Failed to get branch\n");
+
+ return -1;
+}
+
+static int test_data_item(struct test_data *dat, int x86_64)
+{
+ struct intel_pt_insn intel_pt_insn;
+ struct insn insn;
+ int op, branch;
+
+ insn_init(&insn, dat->data, MAX_INSN_SIZE, x86_64);
+ insn_get_length(&insn);
+
+ if (!insn_complete(&insn)) {
+ pr_debug("Failed to decode: %s\n", dat->asm_rep);
+ return -1;
+ }
+
+ if (insn.length != dat->expected_length) {
+ pr_debug("Failed to decode length (%d vs expected %d): %s\n",
+ insn.length, dat->expected_length, dat->asm_rep);
+ return -1;
+ }
+
+ op = get_op(dat->expected_op_str);
+ branch = get_branch(dat->expected_branch_str);
+
+ if (intel_pt_get_insn(dat->data, MAX_INSN_SIZE, x86_64, &intel_pt_insn)) {
+ pr_debug("Intel PT failed to decode: %s\n", dat->asm_rep);
+ return -1;
+ }
+
+ if ((int)intel_pt_insn.op != op) {
+ pr_debug("Failed to decode 'op' value (%d vs expected %d): %s\n",
+ intel_pt_insn.op, op, dat->asm_rep);
+ return -1;
+ }
+
+ if ((int)intel_pt_insn.branch != branch) {
+ pr_debug("Failed to decode 'branch' value (%d vs expected %d): %s\n",
+ intel_pt_insn.branch, branch, dat->asm_rep);
+ return -1;
+ }
+
+ if (intel_pt_insn.rel != dat->expected_rel) {
+ pr_debug("Failed to decode 'rel' value (%#x vs expected %#x): %s\n",
+ intel_pt_insn.rel, dat->expected_rel, dat->asm_rep);
+ return -1;
+ }
+
+ pr_debug("Decoded ok: %s\n", dat->asm_rep);
+
+ return 0;
+}
+
+static int test_data_set(struct test_data *dat_set, int x86_64)
+{
+ struct test_data *dat;
+ int ret = 0;
+
+ for (dat = dat_set; dat->expected_length; dat++) {
+ if (test_data_item(dat, x86_64))
+ ret = -1;
+ }
+
+ return ret;
+}
+
+/**
+ * test__insn_x86 - test x86 instruction decoder - new instructions.
+ *
+ * This function implements a test that decodes a selection of instructions and
+ * checks the results. The Intel PT function that further categorizes
+ * instructions (i.e. intel_pt_get_insn()) is also checked.
+ *
+ * The instructions are originally in insn-x86-dat-src.c which has been
+ * processed by scripts gen-insn-x86-dat.sh and gen-insn-x86-dat.awk to produce
+ * insn-x86-dat-32.c and insn-x86-dat-64.c which are included into this program.
+ * i.e. to add new instructions to the test, edit insn-x86-dat-src.c, run the
+ * gen-insn-x86-dat.sh script, make perf, and then run the test.
+ *
+ * If the test passes %0 is returned, otherwise %-1 is returned. Use the
+ * verbose (-v) option to see all the instructions and whether or not they
+ * decoded successfuly.
+ */
+int test__insn_x86(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int ret = 0;
+
+ if (test_data_set(test_data_32, 0))
+ ret = -1;
+
+ if (test_data_set(test_data_64, 1))
+ ret = -1;
+
+ return ret;
+}
diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c
new file mode 100644
index 000000000..90a4a8c58
--- /dev/null
+++ b/tools/perf/arch/x86/tests/intel-cqm.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tests/tests.h"
+#include "perf.h"
+#include "cloexec.h"
+#include "debug.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "arch-tests.h"
+
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
+
+static pid_t spawn(void)
+{
+ pid_t pid;
+
+ pid = fork();
+ if (pid)
+ return pid;
+
+ while(1)
+ sleep(5);
+ return 0;
+}
+
+/*
+ * Create an event group that contains both a sampled hardware
+ * (cpu-cycles) and software (intel_cqm/llc_occupancy/) event. We then
+ * wait for the hardware perf counter to overflow and generate a PMI,
+ * which triggers an event read for both of the events in the group.
+ *
+ * Since reading Intel CQM event counters requires sending SMP IPIs, the
+ * CQM pmu needs to handle the above situation gracefully, and return
+ * the last read counter value to avoid triggering a WARN_ON_ONCE() in
+ * smp_call_function_many() caused by sending IPIs from NMI context.
+ */
+int test__intel_cqm_count_nmi_context(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct perf_evlist *evlist = NULL;
+ struct perf_evsel *evsel = NULL;
+ struct perf_event_attr pe;
+ int i, fd[2], flag, ret;
+ size_t mmap_len;
+ void *event;
+ pid_t pid;
+ int err = TEST_FAIL;
+
+ flag = perf_event_open_cloexec_flag();
+
+ evlist = perf_evlist__new();
+ if (!evlist) {
+ pr_debug("perf_evlist__new failed\n");
+ return TEST_FAIL;
+ }
+
+ ret = parse_events(evlist, "intel_cqm/llc_occupancy/", NULL);
+ if (ret) {
+ pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n");
+ err = TEST_SKIP;
+ goto out;
+ }
+
+ evsel = perf_evlist__first(evlist);
+ if (!evsel) {
+ pr_debug("perf_evlist__first failed\n");
+ goto out;
+ }
+
+ memset(&pe, 0, sizeof(pe));
+ pe.size = sizeof(pe);
+
+ pe.type = PERF_TYPE_HARDWARE;
+ pe.config = PERF_COUNT_HW_CPU_CYCLES;
+ pe.read_format = PERF_FORMAT_GROUP;
+
+ pe.sample_period = 128;
+ pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ;
+
+ pid = spawn();
+
+ fd[0] = sys_perf_event_open(&pe, pid, -1, -1, flag);
+ if (fd[0] < 0) {
+ pr_debug("failed to open event\n");
+ goto out;
+ }
+
+ memset(&pe, 0, sizeof(pe));
+ pe.size = sizeof(pe);
+
+ pe.type = evsel->attr.type;
+ pe.config = evsel->attr.config;
+
+ fd[1] = sys_perf_event_open(&pe, pid, -1, fd[0], flag);
+ if (fd[1] < 0) {
+ pr_debug("failed to open event\n");
+ goto out;
+ }
+
+ /*
+ * Pick a power-of-two number of pages + 1 for the meta-data
+ * page (struct perf_event_mmap_page). See tools/perf/design.txt.
+ */
+ mmap_len = page_size * 65;
+
+ event = mmap(NULL, mmap_len, PROT_READ, MAP_SHARED, fd[0], 0);
+ if (event == (void *)(-1)) {
+ pr_debug("failed to mmap %d\n", errno);
+ goto out;
+ }
+
+ sleep(1);
+
+ err = TEST_OK;
+
+ munmap(event, mmap_len);
+
+ for (i = 0; i < 2; i++)
+ close(fd[i]);
+
+ kill(pid, SIGKILL);
+ wait(NULL);
+out:
+ perf_evlist__delete(evlist);
+ return err;
+}
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
new file mode 100644
index 000000000..7a7721604
--- /dev/null
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <linux/types.h>
+#include <sys/prctl.h>
+
+#include "parse-events.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "tsc.h"
+#include "tests/tests.h"
+
+#include "arch-tests.h"
+
+#define CHECK__(x) { \
+ while ((x) < 0) { \
+ pr_debug(#x " failed!\n"); \
+ goto out_err; \
+ } \
+}
+
+#define CHECK_NOT_NULL__(x) { \
+ while ((x) == NULL) { \
+ pr_debug(#x " failed!\n"); \
+ goto out_err; \
+ } \
+}
+
+/**
+ * test__perf_time_to_tsc - test converting perf time to TSC.
+ *
+ * This function implements a test that checks that the conversion of perf time
+ * to and from TSC is consistent with the order of events. If the test passes
+ * %0 is returned, otherwise %-1 is returned. If TSC conversion is not
+ * supported then then the test passes but " (not supported)" is printed.
+ */
+int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct record_opts opts = {
+ .mmap_pages = UINT_MAX,
+ .user_freq = UINT_MAX,
+ .user_interval = ULLONG_MAX,
+ .target = {
+ .uses_mmap = true,
+ },
+ .sample_time = true,
+ };
+ struct thread_map *threads = NULL;
+ struct cpu_map *cpus = NULL;
+ struct perf_evlist *evlist = NULL;
+ struct perf_evsel *evsel = NULL;
+ int err = -1, ret, i;
+ const char *comm1, *comm2;
+ struct perf_tsc_conversion tc;
+ struct perf_event_mmap_page *pc;
+ union perf_event *event;
+ u64 test_tsc, comm1_tsc, comm2_tsc;
+ u64 test_time, comm1_time = 0, comm2_time = 0;
+ struct perf_mmap *md;
+
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
+ CHECK_NOT_NULL__(threads);
+
+ cpus = cpu_map__new(NULL);
+ CHECK_NOT_NULL__(cpus);
+
+ evlist = perf_evlist__new();
+ CHECK_NOT_NULL__(evlist);
+
+ perf_evlist__set_maps(evlist, cpus, threads);
+
+ CHECK__(parse_events(evlist, "cycles:u", NULL));
+
+ perf_evlist__config(evlist, &opts, NULL);
+
+ evsel = perf_evlist__first(evlist);
+
+ evsel->attr.comm = 1;
+ evsel->attr.disabled = 1;
+ evsel->attr.enable_on_exec = 0;
+
+ CHECK__(perf_evlist__open(evlist));
+
+ CHECK__(perf_evlist__mmap(evlist, UINT_MAX));
+
+ pc = evlist->mmap[0].base;
+ ret = perf_read_tsc_conversion(pc, &tc);
+ if (ret) {
+ if (ret == -EOPNOTSUPP) {
+ fprintf(stderr, " (not supported)");
+ return 0;
+ }
+ goto out_err;
+ }
+
+ perf_evlist__enable(evlist);
+
+ comm1 = "Test COMM 1";
+ CHECK__(prctl(PR_SET_NAME, (unsigned long)comm1, 0, 0, 0));
+
+ test_tsc = rdtsc();
+
+ comm2 = "Test COMM 2";
+ CHECK__(prctl(PR_SET_NAME, (unsigned long)comm2, 0, 0, 0));
+
+ perf_evlist__disable(evlist);
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ md = &evlist->mmap[i];
+ if (perf_mmap__read_init(md) < 0)
+ continue;
+
+ while ((event = perf_mmap__read_event(md)) != NULL) {
+ struct perf_sample sample;
+
+ if (event->header.type != PERF_RECORD_COMM ||
+ (pid_t)event->comm.pid != getpid() ||
+ (pid_t)event->comm.tid != getpid())
+ goto next_event;
+
+ if (strcmp(event->comm.comm, comm1) == 0) {
+ CHECK__(perf_evsel__parse_sample(evsel, event,
+ &sample));
+ comm1_time = sample.time;
+ }
+ if (strcmp(event->comm.comm, comm2) == 0) {
+ CHECK__(perf_evsel__parse_sample(evsel, event,
+ &sample));
+ comm2_time = sample.time;
+ }
+next_event:
+ perf_mmap__consume(md);
+ }
+ perf_mmap__read_done(md);
+ }
+
+ if (!comm1_time || !comm2_time)
+ goto out_err;
+
+ test_time = tsc_to_perf_time(test_tsc, &tc);
+ comm1_tsc = perf_time_to_tsc(comm1_time, &tc);
+ comm2_tsc = perf_time_to_tsc(comm2_time, &tc);
+
+ pr_debug("1st event perf time %"PRIu64" tsc %"PRIu64"\n",
+ comm1_time, comm1_tsc);
+ pr_debug("rdtsc time %"PRIu64" tsc %"PRIu64"\n",
+ test_time, test_tsc);
+ pr_debug("2nd event perf time %"PRIu64" tsc %"PRIu64"\n",
+ comm2_time, comm2_tsc);
+
+ if (test_time <= comm1_time ||
+ test_time >= comm2_time)
+ goto out_err;
+
+ if (test_tsc <= comm1_tsc ||
+ test_tsc >= comm2_tsc)
+ goto out_err;
+
+ err = 0;
+
+out_err:
+ perf_evlist__delete(evlist);
+ return err;
+}
diff --git a/tools/perf/arch/x86/tests/rdpmc.c b/tools/perf/arch/x86/tests/rdpmc.c
new file mode 100644
index 000000000..7a11f02d6
--- /dev/null
+++ b/tools/perf/arch/x86/tests/rdpmc.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <linux/types.h>
+#include "perf.h"
+#include "debug.h"
+#include "tests/tests.h"
+#include "cloexec.h"
+#include "util.h"
+#include "arch-tests.h"
+
+static u64 rdpmc(unsigned int counter)
+{
+ unsigned int low, high;
+
+ asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+ return low | ((u64)high) << 32;
+}
+
+static u64 rdtsc(void)
+{
+ unsigned int low, high;
+
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+ return low | ((u64)high) << 32;
+}
+
+static u64 mmap_read_self(void *addr)
+{
+ struct perf_event_mmap_page *pc = addr;
+ u32 seq, idx, time_mult = 0, time_shift = 0;
+ u64 count, cyc = 0, time_offset = 0, enabled, running, delta;
+
+ do {
+ seq = pc->lock;
+ barrier();
+
+ enabled = pc->time_enabled;
+ running = pc->time_running;
+
+ if (enabled != running) {
+ cyc = rdtsc();
+ time_mult = pc->time_mult;
+ time_shift = pc->time_shift;
+ time_offset = pc->time_offset;
+ }
+
+ idx = pc->index;
+ count = pc->offset;
+ if (idx)
+ count += rdpmc(idx - 1);
+
+ barrier();
+ } while (pc->lock != seq);
+
+ if (enabled != running) {
+ u64 quot, rem;
+
+ quot = (cyc >> time_shift);
+ rem = cyc & (((u64)1 << time_shift) - 1);
+ delta = time_offset + quot * time_mult +
+ ((rem * time_mult) >> time_shift);
+
+ enabled += delta;
+ if (idx)
+ running += delta;
+
+ quot = count / running;
+ rem = count % running;
+ count = quot * enabled + (rem * enabled) / running;
+ }
+
+ return count;
+}
+
+/*
+ * If the RDPMC instruction faults then signal this back to the test parent task:
+ */
+static void segfault_handler(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ exit(-1);
+}
+
+static int __test__rdpmc(void)
+{
+ volatile int tmp = 0;
+ u64 i, loops = 1000;
+ int n;
+ int fd;
+ void *addr;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .exclude_kernel = 1,
+ };
+ u64 delta_sum = 0;
+ struct sigaction sa;
+ char sbuf[STRERR_BUFSIZE];
+
+ sigfillset(&sa.sa_mask);
+ sa.sa_sigaction = segfault_handler;
+ sa.sa_flags = 0;
+ sigaction(SIGSEGV, &sa, NULL);
+
+ fd = sys_perf_event_open(&attr, 0, -1, -1,
+ perf_event_open_cloexec_flag());
+ if (fd < 0) {
+ pr_err("Error: sys_perf_event_open() syscall returned "
+ "with %d (%s)\n", fd,
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ return -1;
+ }
+
+ addr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (addr == (void *)(-1)) {
+ pr_err("Error: mmap() syscall returned with (%s)\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_close;
+ }
+
+ for (n = 0; n < 6; n++) {
+ u64 stamp, now, delta;
+
+ stamp = mmap_read_self(addr);
+
+ for (i = 0; i < loops; i++)
+ tmp++;
+
+ now = mmap_read_self(addr);
+ loops *= 10;
+
+ delta = now - stamp;
+ pr_debug("%14d: %14Lu\n", n, (long long)delta);
+
+ delta_sum += delta;
+ }
+
+ munmap(addr, page_size);
+ pr_debug(" ");
+out_close:
+ close(fd);
+
+ if (!delta_sum)
+ return -1;
+
+ return 0;
+}
+
+int test__rdpmc(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int status = 0;
+ int wret = 0;
+ int ret;
+ int pid;
+
+ pid = fork();
+ if (pid < 0)
+ return -1;
+
+ if (!pid) {
+ ret = __test__rdpmc();
+
+ exit(ret);
+ }
+
+ wret = waitpid(pid, &status, 0);
+ if (wret < 0 || status)
+ return -1;
+
+ return 0;
+}
diff --git a/tools/perf/arch/x86/tests/regs_load.S b/tools/perf/arch/x86/tests/regs_load.S
new file mode 100644
index 000000000..bbe5a0d16
--- /dev/null
+++ b/tools/perf/arch/x86/tests/regs_load.S
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
+#define AX 0
+#define BX 1 * 8
+#define CX 2 * 8
+#define DX 3 * 8
+#define SI 4 * 8
+#define DI 5 * 8
+#define BP 6 * 8
+#define SP 7 * 8
+#define IP 8 * 8
+#define FLAGS 9 * 8
+#define CS 10 * 8
+#define SS 11 * 8
+#define DS 12 * 8
+#define ES 13 * 8
+#define FS 14 * 8
+#define GS 15 * 8
+#define R8 16 * 8
+#define R9 17 * 8
+#define R10 18 * 8
+#define R11 19 * 8
+#define R12 20 * 8
+#define R13 21 * 8
+#define R14 22 * 8
+#define R15 23 * 8
+
+.text
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ENTRY(perf_regs_load)
+ movq %rax, AX(%rdi)
+ movq %rbx, BX(%rdi)
+ movq %rcx, CX(%rdi)
+ movq %rdx, DX(%rdi)
+ movq %rsi, SI(%rdi)
+ movq %rdi, DI(%rdi)
+ movq %rbp, BP(%rdi)
+
+ leaq 8(%rsp), %rax /* exclude this call. */
+ movq %rax, SP(%rdi)
+
+ movq 0(%rsp), %rax
+ movq %rax, IP(%rdi)
+
+ movq $0, FLAGS(%rdi)
+ movq $0, CS(%rdi)
+ movq $0, SS(%rdi)
+ movq $0, DS(%rdi)
+ movq $0, ES(%rdi)
+ movq $0, FS(%rdi)
+ movq $0, GS(%rdi)
+
+ movq %r8, R8(%rdi)
+ movq %r9, R9(%rdi)
+ movq %r10, R10(%rdi)
+ movq %r11, R11(%rdi)
+ movq %r12, R12(%rdi)
+ movq %r13, R13(%rdi)
+ movq %r14, R14(%rdi)
+ movq %r15, R15(%rdi)
+ ret
+ENDPROC(perf_regs_load)
+#else
+ENTRY(perf_regs_load)
+ push %edi
+ movl 8(%esp), %edi
+ movl %eax, AX(%edi)
+ movl %ebx, BX(%edi)
+ movl %ecx, CX(%edi)
+ movl %edx, DX(%edi)
+ movl %esi, SI(%edi)
+ pop %eax
+ movl %eax, DI(%edi)
+ movl %ebp, BP(%edi)
+
+ leal 4(%esp), %eax /* exclude this call. */
+ movl %eax, SP(%edi)
+
+ movl 0(%esp), %eax
+ movl %eax, IP(%edi)
+
+ movl $0, FLAGS(%edi)
+ movl $0, CS(%edi)
+ movl $0, SS(%edi)
+ movl $0, DS(%edi)
+ movl $0, ES(%edi)
+ movl $0, FS(%edi)
+ movl $0, GS(%edi)
+ ret
+ENDPROC(perf_regs_load)
+#endif
+
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
new file mode 100644
index 000000000..844b8f335
--- /dev/null
+++ b/tools/perf/arch/x86/util/Build
@@ -0,0 +1,18 @@
+libperf-y += header.o
+libperf-y += tsc.o
+libperf-y += pmu.o
+libperf-y += kvm-stat.o
+libperf-y += perf_regs.o
+libperf-y += group.o
+libperf-y += machine.o
+libperf-y += event.o
+
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+libperf-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o
+
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+
+libperf-$(CONFIG_AUXTRACE) += auxtrace.o
+libperf-$(CONFIG_AUXTRACE) += intel-pt.o
+libperf-$(CONFIG_AUXTRACE) += intel-bts.o
diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
new file mode 100644
index 000000000..b135af620
--- /dev/null
+++ b/tools/perf/arch/x86/util/auxtrace.c
@@ -0,0 +1,80 @@
+/*
+ * auxtrace.c: AUX area tracing support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+
+#include "../../util/header.h"
+#include "../../util/debug.h"
+#include "../../util/pmu.h"
+#include "../../util/auxtrace.h"
+#include "../../util/intel-pt.h"
+#include "../../util/intel-bts.h"
+#include "../../util/evlist.h"
+
+static
+struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist,
+ int *err)
+{
+ struct perf_pmu *intel_pt_pmu;
+ struct perf_pmu *intel_bts_pmu;
+ struct perf_evsel *evsel;
+ bool found_pt = false;
+ bool found_bts = false;
+
+ intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
+ intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (intel_pt_pmu && evsel->attr.type == intel_pt_pmu->type)
+ found_pt = true;
+ if (intel_bts_pmu && evsel->attr.type == intel_bts_pmu->type)
+ found_bts = true;
+ }
+
+ if (found_pt && found_bts) {
+ pr_err("intel_pt and intel_bts may not be used together\n");
+ *err = -EINVAL;
+ return NULL;
+ }
+
+ if (found_pt)
+ return intel_pt_recording_init(err);
+
+ if (found_bts)
+ return intel_bts_recording_init(err);
+
+ return NULL;
+}
+
+struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
+ int *err)
+{
+ char buffer[64];
+ int ret;
+
+ *err = 0;
+
+ ret = get_cpuid(buffer, sizeof(buffer));
+ if (ret) {
+ *err = ret;
+ return NULL;
+ }
+
+ if (!strncmp(buffer, "GenuineIntel,", 13))
+ return auxtrace_record__init_intel(evlist, err);
+
+ return NULL;
+}
diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c
new file mode 100644
index 000000000..1f86ee8fb
--- /dev/null
+++ b/tools/perf/arch/x86/util/dwarf-regs.c
@@ -0,0 +1,129 @@
+/*
+ * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
+ * Extracted from probe-finder.c
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <stddef.h>
+#include <errno.h> /* for EINVAL */
+#include <string.h> /* for strcmp */
+#include <linux/ptrace.h> /* for struct pt_regs */
+#include <linux/kernel.h> /* for offsetof */
+#include <dwarf-regs.h>
+
+/*
+ * See arch/x86/kernel/ptrace.c.
+ * Different from it:
+ *
+ * - Since struct pt_regs is defined differently for user and kernel,
+ * but we want to use 'ax, bx' instead of 'rax, rbx' (which is struct
+ * field name of user's pt_regs), we make REG_OFFSET_NAME to accept
+ * both string name and reg field name.
+ *
+ * - Since accessing x86_32's pt_regs from x86_64 building is difficult
+ * and vise versa, we simply fill offset with -1, so
+ * get_arch_regstr() still works but regs_query_register_offset()
+ * returns error.
+ * The only inconvenience caused by it now is that we are not allowed
+ * to generate BPF prologue for a x86_64 kernel if perf is built for
+ * x86_32. This is really a rare usecase.
+ *
+ * - Order is different from kernel's ptrace.c for get_arch_regstr(). Use
+ * the order defined by dwarf.
+ */
+
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+#ifdef __x86_64__
+# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)}
+# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = -1}
+#else
+# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = -1}
+# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)}
+#endif
+
+/* TODO: switching by dwarf address size */
+#ifndef __x86_64__
+static const struct pt_regs_offset x86_32_regoffset_table[] = {
+ REG_OFFSET_NAME_32("%ax", eax),
+ REG_OFFSET_NAME_32("%cx", ecx),
+ REG_OFFSET_NAME_32("%dx", edx),
+ REG_OFFSET_NAME_32("%bx", ebx),
+ REG_OFFSET_NAME_32("$stack", esp), /* Stack address instead of %sp */
+ REG_OFFSET_NAME_32("%bp", ebp),
+ REG_OFFSET_NAME_32("%si", esi),
+ REG_OFFSET_NAME_32("%di", edi),
+ REG_OFFSET_END,
+};
+
+#define regoffset_table x86_32_regoffset_table
+#else
+static const struct pt_regs_offset x86_64_regoffset_table[] = {
+ REG_OFFSET_NAME_64("%ax", rax),
+ REG_OFFSET_NAME_64("%dx", rdx),
+ REG_OFFSET_NAME_64("%cx", rcx),
+ REG_OFFSET_NAME_64("%bx", rbx),
+ REG_OFFSET_NAME_64("%si", rsi),
+ REG_OFFSET_NAME_64("%di", rdi),
+ REG_OFFSET_NAME_64("%bp", rbp),
+ REG_OFFSET_NAME_64("%sp", rsp),
+ REG_OFFSET_NAME_64("%r8", r8),
+ REG_OFFSET_NAME_64("%r9", r9),
+ REG_OFFSET_NAME_64("%r10", r10),
+ REG_OFFSET_NAME_64("%r11", r11),
+ REG_OFFSET_NAME_64("%r12", r12),
+ REG_OFFSET_NAME_64("%r13", r13),
+ REG_OFFSET_NAME_64("%r14", r14),
+ REG_OFFSET_NAME_64("%r15", r15),
+ REG_OFFSET_END,
+};
+
+#define regoffset_table x86_64_regoffset_table
+#endif
+
+/* Minus 1 for the ending REG_OFFSET_END */
+#define ARCH_MAX_REGS ((sizeof(regoffset_table) / sizeof(regoffset_table[0])) - 1)
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+const char *get_arch_regstr(unsigned int n)
+{
+ return (n < ARCH_MAX_REGS) ? regoffset_table[n].name : NULL;
+}
+
+/* Reuse code from arch/x86/kernel/ptrace.c */
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_offset *roff;
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
new file mode 100644
index 000000000..675a02130
--- /dev/null
+++ b/tools/perf/arch/x86/util/event.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/string.h>
+
+#include "../../util/machine.h"
+#include "../../util/tool.h"
+#include "../../util/map.h"
+#include "../../util/util.h"
+#include "../../util/debug.h"
+
+#if defined(__x86_64__)
+
+int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ int rc = 0;
+ struct map *pos;
+ struct map_groups *kmaps = &machine->kmaps;
+ struct maps *maps = &kmaps->maps;
+ union perf_event *event = zalloc(sizeof(event->mmap) +
+ machine->id_hdr_size);
+
+ if (!event) {
+ pr_debug("Not enough memory synthesizing mmap event "
+ "for extra kernel maps\n");
+ return -1;
+ }
+
+ for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+ struct kmap *kmap;
+ size_t size;
+
+ if (!__map__is_extra_kernel_map(pos))
+ continue;
+
+ kmap = map__kmap(pos);
+
+ size = sizeof(event->mmap) - sizeof(event->mmap.filename) +
+ PERF_ALIGN(strlen(kmap->name) + 1, sizeof(u64)) +
+ machine->id_hdr_size;
+
+ memset(event, 0, size);
+
+ event->mmap.header.type = PERF_RECORD_MMAP;
+
+ /*
+ * kernel uses 0 for user space maps, see kernel/perf_event.c
+ * __perf_event_mmap
+ */
+ if (machine__is_host(machine))
+ event->header.misc = PERF_RECORD_MISC_KERNEL;
+ else
+ event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+
+ event->mmap.header.size = size;
+
+ event->mmap.start = pos->start;
+ event->mmap.len = pos->end - pos->start;
+ event->mmap.pgoff = pos->pgoff;
+ event->mmap.pid = machine->pid;
+
+ strlcpy(event->mmap.filename, kmap->name, PATH_MAX);
+
+ if (perf_tool__process_synth_event(tool, event, machine,
+ process) != 0) {
+ rc = -1;
+ break;
+ }
+ }
+
+ free(event);
+ return rc;
+}
+
+#endif
diff --git a/tools/perf/arch/x86/util/group.c b/tools/perf/arch/x86/util/group.c
new file mode 100644
index 000000000..e2f8034b8
--- /dev/null
+++ b/tools/perf/arch/x86/util/group.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "api/fs/fs.h"
+#include "util/group.h"
+
+/*
+ * Check whether we can use a group for top down.
+ * Without a group may get bad results due to multiplexing.
+ */
+bool arch_topdown_check_group(bool *warn)
+{
+ int n;
+
+ if (sysctl__read_int("kernel/nmi_watchdog", &n) < 0)
+ return false;
+ if (n > 0) {
+ *warn = true;
+ return false;
+ }
+ return true;
+}
+
+void arch_topdown_group_warn(void)
+{
+ fprintf(stderr,
+ "nmi_watchdog enabled with topdown. May give wrong results.\n"
+ "Disable with echo 0 > /proc/sys/kernel/nmi_watchdog\n");
+}
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
new file mode 100644
index 000000000..2a5daec6f
--- /dev/null
+++ b/tools/perf/arch/x86/util/header.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../util/header.h"
+
+static inline void
+cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
+ unsigned int *d)
+{
+ __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t"
+ "movl %%ebx, %%esi\n\t.byte 0x5b"
+ : "=a" (*a),
+ "=S" (*b),
+ "=c" (*c),
+ "=d" (*d)
+ : "a" (op));
+}
+
+static int
+__get_cpuid(char *buffer, size_t sz, const char *fmt)
+{
+ unsigned int a, b, c, d, lvl;
+ int family = -1, model = -1, step = -1;
+ int nb;
+ char vendor[16];
+
+ cpuid(0, &lvl, &b, &c, &d);
+ strncpy(&vendor[0], (char *)(&b), 4);
+ strncpy(&vendor[4], (char *)(&d), 4);
+ strncpy(&vendor[8], (char *)(&c), 4);
+ vendor[12] = '\0';
+
+ if (lvl >= 1) {
+ cpuid(1, &a, &b, &c, &d);
+
+ family = (a >> 8) & 0xf; /* bits 11 - 8 */
+ model = (a >> 4) & 0xf; /* Bits 7 - 4 */
+ step = a & 0xf;
+
+ /* extended family */
+ if (family == 0xf)
+ family += (a >> 20) & 0xff;
+
+ /* extended model */
+ if (family >= 0x6)
+ model += ((a >> 16) & 0xf) << 4;
+ }
+ nb = scnprintf(buffer, sz, fmt, vendor, family, model, step);
+
+ /* look for end marker to ensure the entire data fit */
+ if (strchr(buffer, '$')) {
+ buffer[nb-1] = '\0';
+ return 0;
+ }
+ return ENOBUFS;
+}
+
+int
+get_cpuid(char *buffer, size_t sz)
+{
+ return __get_cpuid(buffer, sz, "%s,%u,%u,%u$");
+}
+
+char *
+get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+ char *buf = malloc(128);
+
+ if (buf && __get_cpuid(buf, 128, "%s-%u-%X$") < 0) {
+ free(buf);
+ return NULL;
+ }
+ return buf;
+}
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
new file mode 100644
index 000000000..781df40b2
--- /dev/null
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -0,0 +1,462 @@
+/*
+ * intel-bts.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <errno.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "../../util/cpumap.h"
+#include "../../util/evsel.h"
+#include "../../util/evlist.h"
+#include "../../util/session.h"
+#include "../../util/util.h"
+#include "../../util/pmu.h"
+#include "../../util/debug.h"
+#include "../../util/tsc.h"
+#include "../../util/auxtrace.h"
+#include "../../util/intel-bts.h"
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+#define KiB_MASK(x) (KiB(x) - 1)
+#define MiB_MASK(x) (MiB(x) - 1)
+
+struct intel_bts_snapshot_ref {
+ void *ref_buf;
+ size_t ref_offset;
+ bool wrapped;
+};
+
+struct intel_bts_recording {
+ struct auxtrace_record itr;
+ struct perf_pmu *intel_bts_pmu;
+ struct perf_evlist *evlist;
+ bool snapshot_mode;
+ size_t snapshot_size;
+ int snapshot_ref_cnt;
+ struct intel_bts_snapshot_ref *snapshot_refs;
+};
+
+struct branch {
+ u64 from;
+ u64 to;
+ u64 misc;
+};
+
+static size_t
+intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ return INTEL_BTS_AUXTRACE_PRIV_SIZE;
+}
+
+static int intel_bts_info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *auxtrace_info,
+ size_t priv_size)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
+ struct perf_event_mmap_page *pc;
+ struct perf_tsc_conversion tc = { .time_mult = 0, };
+ bool cap_user_time_zero = false;
+ int err;
+
+ if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE)
+ return -EINVAL;
+
+ if (!session->evlist->nr_mmaps)
+ return -EINVAL;
+
+ pc = session->evlist->mmap[0].base;
+ if (pc) {
+ err = perf_read_tsc_conversion(pc, &tc);
+ if (err) {
+ if (err != -EOPNOTSUPP)
+ return err;
+ } else {
+ cap_user_time_zero = tc.time_mult != 0;
+ }
+ if (!cap_user_time_zero)
+ ui__warning("Intel BTS: TSC not available\n");
+ }
+
+ auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS;
+ auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type;
+ auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift;
+ auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult;
+ auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero;
+ auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero;
+ auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode;
+
+ return 0;
+}
+
+static int intel_bts_recording_options(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
+ struct perf_evsel *evsel, *intel_bts_evsel = NULL;
+ const struct cpu_map *cpus = evlist->cpus;
+ bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+
+ btsr->evlist = evlist;
+ btsr->snapshot_mode = opts->auxtrace_snapshot_mode;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == intel_bts_pmu->type) {
+ if (intel_bts_evsel) {
+ pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n");
+ return -EINVAL;
+ }
+ evsel->attr.freq = 0;
+ evsel->attr.sample_period = 1;
+ intel_bts_evsel = evsel;
+ opts->full_auxtrace = true;
+ }
+ }
+
+ if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
+ pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n");
+ return -EINVAL;
+ }
+
+ if (!opts->full_auxtrace)
+ return 0;
+
+ if (opts->full_auxtrace && !cpu_map__empty(cpus)) {
+ pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
+ return -EINVAL;
+ }
+
+ /* Set default sizes for snapshot mode */
+ if (opts->auxtrace_snapshot_mode) {
+ if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ } else if (!opts->auxtrace_mmap_pages && !privileged &&
+ opts->mmap_pages == UINT_MAX) {
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ if (!opts->auxtrace_snapshot_size)
+ opts->auxtrace_snapshot_size =
+ opts->auxtrace_mmap_pages * (size_t)page_size;
+ if (!opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_snapshot_size;
+
+ sz = round_up(sz, page_size) / page_size;
+ opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+ }
+ if (opts->auxtrace_snapshot_size >
+ opts->auxtrace_mmap_pages * (size_t)page_size) {
+ pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+ opts->auxtrace_snapshot_size,
+ opts->auxtrace_mmap_pages * (size_t)page_size);
+ return -EINVAL;
+ }
+ if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
+ pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+ return -EINVAL;
+ }
+ pr_debug2("Intel BTS snapshot size: %zu\n",
+ opts->auxtrace_snapshot_size);
+ }
+
+ /* Set default sizes for full trace mode */
+ if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ }
+
+ /* Validate auxtrace_mmap_pages */
+ if (opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+ size_t min_sz;
+
+ if (opts->auxtrace_snapshot_mode)
+ min_sz = KiB(4);
+ else
+ min_sz = KiB(8);
+
+ if (sz < min_sz || !is_power_of_2(sz)) {
+ pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n",
+ min_sz / 1024);
+ return -EINVAL;
+ }
+ }
+
+ if (intel_bts_evsel) {
+ /*
+ * To obtain the auxtrace buffer file descriptor, the auxtrace event
+ * must come first.
+ */
+ perf_evlist__to_front(evlist, intel_bts_evsel);
+ /*
+ * In the case of per-cpu mmaps, we need the CPU on the
+ * AUX event.
+ */
+ if (!cpu_map__empty(cpus))
+ perf_evsel__set_sample_bit(intel_bts_evsel, CPU);
+ }
+
+ /* Add dummy event to keep tracking */
+ if (opts->full_auxtrace) {
+ struct perf_evsel *tracking_evsel;
+ int err;
+
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ tracking_evsel = perf_evlist__last(evlist);
+
+ perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+ tracking_evsel->attr.freq = 0;
+ tracking_evsel->attr.sample_period = 1;
+ }
+
+ return 0;
+}
+
+static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr,
+ struct record_opts *opts,
+ const char *str)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ unsigned long long snapshot_size = 0;
+ char *endptr;
+
+ if (str) {
+ snapshot_size = strtoull(str, &endptr, 0);
+ if (*endptr || snapshot_size > SIZE_MAX)
+ return -1;
+ }
+
+ opts->auxtrace_snapshot_mode = true;
+ opts->auxtrace_snapshot_size = snapshot_size;
+
+ btsr->snapshot_size = snapshot_size;
+
+ return 0;
+}
+
+static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused)
+{
+ return rdtsc();
+}
+
+static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr,
+ int idx)
+{
+ const size_t sz = sizeof(struct intel_bts_snapshot_ref);
+ int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2;
+ struct intel_bts_snapshot_ref *refs;
+
+ if (!new_cnt)
+ new_cnt = 16;
+
+ while (new_cnt <= idx)
+ new_cnt *= 2;
+
+ refs = calloc(new_cnt, sz);
+ if (!refs)
+ return -ENOMEM;
+
+ memcpy(refs, btsr->snapshot_refs, cnt * sz);
+
+ btsr->snapshot_refs = refs;
+ btsr->snapshot_ref_cnt = new_cnt;
+
+ return 0;
+}
+
+static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr)
+{
+ int i;
+
+ for (i = 0; i < btsr->snapshot_ref_cnt; i++)
+ zfree(&btsr->snapshot_refs[i].ref_buf);
+ zfree(&btsr->snapshot_refs);
+}
+
+static void intel_bts_recording_free(struct auxtrace_record *itr)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+
+ intel_bts_free_snapshot_refs(btsr);
+ free(btsr);
+}
+
+static int intel_bts_snapshot_start(struct auxtrace_record *itr)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(btsr->evlist, evsel) {
+ if (evsel->attr.type == btsr->intel_bts_pmu->type)
+ return perf_evsel__disable(evsel);
+ }
+ return -EINVAL;
+}
+
+static int intel_bts_snapshot_finish(struct auxtrace_record *itr)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(btsr->evlist, evsel) {
+ if (evsel->attr.type == btsr->intel_bts_pmu->type)
+ return perf_evsel__enable(evsel);
+ }
+ return -EINVAL;
+}
+
+static bool intel_bts_first_wrap(u64 *data, size_t buf_size)
+{
+ int i, a, b;
+
+ b = buf_size >> 3;
+ a = b - 512;
+ if (a < 0)
+ a = 0;
+
+ for (i = a; i < b; i++) {
+ if (data[i])
+ return true;
+ }
+
+ return false;
+}
+
+static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm, unsigned char *data,
+ u64 *head, u64 *old)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ bool wrapped;
+ int err;
+
+ pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
+ __func__, idx, (size_t)*old, (size_t)*head);
+
+ if (idx >= btsr->snapshot_ref_cnt) {
+ err = intel_bts_alloc_snapshot_refs(btsr, idx);
+ if (err)
+ goto out_err;
+ }
+
+ wrapped = btsr->snapshot_refs[idx].wrapped;
+ if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) {
+ btsr->snapshot_refs[idx].wrapped = true;
+ wrapped = true;
+ }
+
+ /*
+ * In full trace mode 'head' continually increases. However in snapshot
+ * mode 'head' is an offset within the buffer. Here 'old' and 'head'
+ * are adjusted to match the full trace case which expects that 'old' is
+ * always less than 'head'.
+ */
+ if (wrapped) {
+ *old = *head;
+ *head += mm->len;
+ } else {
+ if (mm->mask)
+ *old &= mm->mask;
+ else
+ *old %= mm->len;
+ if (*old > *head)
+ *head += mm->len;
+ }
+
+ pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
+ __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
+
+ return 0;
+
+out_err:
+ pr_err("%s: failed, error %d\n", __func__, err);
+ return err;
+}
+
+static int intel_bts_read_finish(struct auxtrace_record *itr, int idx)
+{
+ struct intel_bts_recording *btsr =
+ container_of(itr, struct intel_bts_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(btsr->evlist, evsel) {
+ if (evsel->attr.type == btsr->intel_bts_pmu->type)
+ return perf_evlist__enable_event_idx(btsr->evlist,
+ evsel, idx);
+ }
+ return -EINVAL;
+}
+
+struct auxtrace_record *intel_bts_recording_init(int *err)
+{
+ struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
+ struct intel_bts_recording *btsr;
+
+ if (!intel_bts_pmu)
+ return NULL;
+
+ if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
+ *err = -errno;
+ return NULL;
+ }
+
+ btsr = zalloc(sizeof(struct intel_bts_recording));
+ if (!btsr) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ btsr->intel_bts_pmu = intel_bts_pmu;
+ btsr->itr.recording_options = intel_bts_recording_options;
+ btsr->itr.info_priv_size = intel_bts_info_priv_size;
+ btsr->itr.info_fill = intel_bts_info_fill;
+ btsr->itr.free = intel_bts_recording_free;
+ btsr->itr.snapshot_start = intel_bts_snapshot_start;
+ btsr->itr.snapshot_finish = intel_bts_snapshot_finish;
+ btsr->itr.find_snapshot = intel_bts_find_snapshot;
+ btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options;
+ btsr->itr.reference = intel_bts_reference;
+ btsr->itr.read_finish = intel_bts_read_finish;
+ btsr->itr.alignment = sizeof(struct branch);
+ return &btsr->itr;
+}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
new file mode 100644
index 000000000..ba8ecaf52
--- /dev/null
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -0,0 +1,1122 @@
+/*
+ * intel_pt.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <cpuid.h>
+
+#include "../../perf.h"
+#include "../../util/session.h"
+#include "../../util/event.h"
+#include "../../util/evlist.h"
+#include "../../util/evsel.h"
+#include "../../util/cpumap.h"
+#include <subcmd/parse-options.h>
+#include "../../util/parse-events.h"
+#include "../../util/pmu.h"
+#include "../../util/debug.h"
+#include "../../util/auxtrace.h"
+#include "../../util/tsc.h"
+#include "../../util/intel-pt.h"
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+#define KiB_MASK(x) (KiB(x) - 1)
+#define MiB_MASK(x) (MiB(x) - 1)
+
+#define INTEL_PT_PSB_PERIOD_NEAR 256
+
+struct intel_pt_snapshot_ref {
+ void *ref_buf;
+ size_t ref_offset;
+ bool wrapped;
+};
+
+struct intel_pt_recording {
+ struct auxtrace_record itr;
+ struct perf_pmu *intel_pt_pmu;
+ int have_sched_switch;
+ struct perf_evlist *evlist;
+ bool snapshot_mode;
+ bool snapshot_init_done;
+ size_t snapshot_size;
+ size_t snapshot_ref_buf_size;
+ int snapshot_ref_cnt;
+ struct intel_pt_snapshot_ref *snapshot_refs;
+ size_t priv_size;
+};
+
+static int intel_pt_parse_terms_with_default(struct list_head *formats,
+ const char *str,
+ u64 *config)
+{
+ struct list_head *terms;
+ struct perf_event_attr attr = { .size = 0, };
+ int err;
+
+ terms = malloc(sizeof(struct list_head));
+ if (!terms)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(terms);
+
+ err = parse_events_terms(terms, str);
+ if (err)
+ goto out_free;
+
+ attr.config = *config;
+ err = perf_pmu__config_terms(formats, &attr, terms, true, NULL);
+ if (err)
+ goto out_free;
+
+ *config = attr.config;
+out_free:
+ parse_events_terms__delete(terms);
+ return err;
+}
+
+static int intel_pt_parse_terms(struct list_head *formats, const char *str,
+ u64 *config)
+{
+ *config = 0;
+ return intel_pt_parse_terms_with_default(formats, str, config);
+}
+
+static u64 intel_pt_masked_bits(u64 mask, u64 bits)
+{
+ const u64 top_bit = 1ULL << 63;
+ u64 res = 0;
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ if (mask & top_bit) {
+ res <<= 1;
+ if (bits & top_bit)
+ res |= 1;
+ }
+ mask <<= 1;
+ bits <<= 1;
+ }
+
+ return res;
+}
+
+static int intel_pt_read_config(struct perf_pmu *intel_pt_pmu, const char *str,
+ struct perf_evlist *evlist, u64 *res)
+{
+ struct perf_evsel *evsel;
+ u64 mask;
+
+ *res = 0;
+
+ mask = perf_pmu__format_bits(&intel_pt_pmu->format, str);
+ if (!mask)
+ return -EINVAL;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == intel_pt_pmu->type) {
+ *res = intel_pt_masked_bits(mask, evsel->attr.config);
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static size_t intel_pt_psb_period(struct perf_pmu *intel_pt_pmu,
+ struct perf_evlist *evlist)
+{
+ u64 val;
+ int err, topa_multiple_entries;
+ size_t psb_period;
+
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/topa_multiple_entries",
+ "%d", &topa_multiple_entries) != 1)
+ topa_multiple_entries = 0;
+
+ /*
+ * Use caps/topa_multiple_entries to indicate early hardware that had
+ * extra frequent PSBs.
+ */
+ if (!topa_multiple_entries) {
+ psb_period = 256;
+ goto out;
+ }
+
+ err = intel_pt_read_config(intel_pt_pmu, "psb_period", evlist, &val);
+ if (err)
+ val = 0;
+
+ psb_period = 1 << (val + 11);
+out:
+ pr_debug2("%s psb_period %zu\n", intel_pt_pmu->name, psb_period);
+ return psb_period;
+}
+
+static int intel_pt_pick_bit(int bits, int target)
+{
+ int pos, pick = -1;
+
+ for (pos = 0; bits; bits >>= 1, pos++) {
+ if (bits & 1) {
+ if (pos <= target || pick < 0)
+ pick = pos;
+ if (pos >= target)
+ break;
+ }
+ }
+
+ return pick;
+}
+
+static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
+{
+ char buf[256];
+ int mtc, mtc_periods = 0, mtc_period;
+ int psb_cyc, psb_periods, psb_period;
+ int pos = 0;
+ u64 config;
+ char c;
+
+ pos += scnprintf(buf + pos, sizeof(buf) - pos, "tsc");
+
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc", "%d",
+ &mtc) != 1)
+ mtc = 1;
+
+ if (mtc) {
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/mtc_periods", "%x",
+ &mtc_periods) != 1)
+ mtc_periods = 0;
+ if (mtc_periods) {
+ mtc_period = intel_pt_pick_bit(mtc_periods, 3);
+ pos += scnprintf(buf + pos, sizeof(buf) - pos,
+ ",mtc,mtc_period=%d", mtc_period);
+ }
+ }
+
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_cyc", "%d",
+ &psb_cyc) != 1)
+ psb_cyc = 1;
+
+ if (psb_cyc && mtc_periods) {
+ if (perf_pmu__scan_file(intel_pt_pmu, "caps/psb_periods", "%x",
+ &psb_periods) != 1)
+ psb_periods = 0;
+ if (psb_periods) {
+ psb_period = intel_pt_pick_bit(psb_periods, 3);
+ pos += scnprintf(buf + pos, sizeof(buf) - pos,
+ ",psb_period=%d", psb_period);
+ }
+ }
+
+ if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
+ perf_pmu__scan_file(intel_pt_pmu, "format/branch", "%c", &c) == 1)
+ pos += scnprintf(buf + pos, sizeof(buf) - pos, ",pt,branch");
+
+ pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
+
+ intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
+
+ return config;
+}
+
+static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
+ struct record_opts *opts,
+ const char *str)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ unsigned long long snapshot_size = 0;
+ char *endptr;
+
+ if (str) {
+ snapshot_size = strtoull(str, &endptr, 0);
+ if (*endptr || snapshot_size > SIZE_MAX)
+ return -1;
+ }
+
+ opts->auxtrace_snapshot_mode = true;
+ opts->auxtrace_snapshot_size = snapshot_size;
+
+ ptr->snapshot_size = snapshot_size;
+
+ return 0;
+}
+
+struct perf_event_attr *
+intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
+{
+ struct perf_event_attr *attr;
+
+ attr = zalloc(sizeof(struct perf_event_attr));
+ if (!attr)
+ return NULL;
+
+ attr->config = intel_pt_default_config(intel_pt_pmu);
+
+ intel_pt_pmu->selectable = true;
+
+ return attr;
+}
+
+static const char *intel_pt_find_filter(struct perf_evlist *evlist,
+ struct perf_pmu *intel_pt_pmu)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == intel_pt_pmu->type)
+ return evsel->filter;
+ }
+
+ return NULL;
+}
+
+static size_t intel_pt_filter_bytes(const char *filter)
+{
+ size_t len = filter ? strlen(filter) : 0;
+
+ return len ? roundup(len + 1, 8) : 0;
+}
+
+static size_t
+intel_pt_info_priv_size(struct auxtrace_record *itr, struct perf_evlist *evlist)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ const char *filter = intel_pt_find_filter(evlist, ptr->intel_pt_pmu);
+
+ ptr->priv_size = (INTEL_PT_AUXTRACE_PRIV_MAX * sizeof(u64)) +
+ intel_pt_filter_bytes(filter);
+
+ return ptr->priv_size;
+}
+
+static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
+{
+ unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+
+ __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
+ *n = ebx;
+ *d = eax;
+}
+
+static int intel_pt_info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *auxtrace_info,
+ size_t priv_size)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
+ struct perf_event_mmap_page *pc;
+ struct perf_tsc_conversion tc = { .time_mult = 0, };
+ bool cap_user_time_zero = false, per_cpu_mmaps;
+ u64 tsc_bit, mtc_bit, mtc_freq_bits, cyc_bit, noretcomp_bit;
+ u32 tsc_ctc_ratio_n, tsc_ctc_ratio_d;
+ unsigned long max_non_turbo_ratio;
+ size_t filter_str_len;
+ const char *filter;
+ u64 *info;
+ int err;
+
+ if (priv_size != ptr->priv_size)
+ return -EINVAL;
+
+ intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
+ intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp",
+ &noretcomp_bit);
+ intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit);
+ mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format,
+ "mtc_period");
+ intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit);
+
+ intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
+
+ if (perf_pmu__scan_file(intel_pt_pmu, "max_nonturbo_ratio",
+ "%lu", &max_non_turbo_ratio) != 1)
+ max_non_turbo_ratio = 0;
+
+ filter = intel_pt_find_filter(session->evlist, ptr->intel_pt_pmu);
+ filter_str_len = filter ? strlen(filter) : 0;
+
+ if (!session->evlist->nr_mmaps)
+ return -EINVAL;
+
+ pc = session->evlist->mmap[0].base;
+ if (pc) {
+ err = perf_read_tsc_conversion(pc, &tc);
+ if (err) {
+ if (err != -EOPNOTSUPP)
+ return err;
+ } else {
+ cap_user_time_zero = tc.time_mult != 0;
+ }
+ if (!cap_user_time_zero)
+ ui__warning("Intel Processor Trace: TSC not available\n");
+ }
+
+ per_cpu_mmaps = !cpu_map__empty(session->evlist->cpus);
+
+ auxtrace_info->type = PERF_AUXTRACE_INTEL_PT;
+ auxtrace_info->priv[INTEL_PT_PMU_TYPE] = intel_pt_pmu->type;
+ auxtrace_info->priv[INTEL_PT_TIME_SHIFT] = tc.time_shift;
+ auxtrace_info->priv[INTEL_PT_TIME_MULT] = tc.time_mult;
+ auxtrace_info->priv[INTEL_PT_TIME_ZERO] = tc.time_zero;
+ auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO] = cap_user_time_zero;
+ auxtrace_info->priv[INTEL_PT_TSC_BIT] = tsc_bit;
+ auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT] = noretcomp_bit;
+ auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH] = ptr->have_sched_switch;
+ auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE] = ptr->snapshot_mode;
+ auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS] = per_cpu_mmaps;
+ auxtrace_info->priv[INTEL_PT_MTC_BIT] = mtc_bit;
+ auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS] = mtc_freq_bits;
+ auxtrace_info->priv[INTEL_PT_TSC_CTC_N] = tsc_ctc_ratio_n;
+ auxtrace_info->priv[INTEL_PT_TSC_CTC_D] = tsc_ctc_ratio_d;
+ auxtrace_info->priv[INTEL_PT_CYC_BIT] = cyc_bit;
+ auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO] = max_non_turbo_ratio;
+ auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] = filter_str_len;
+
+ info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
+
+ if (filter_str_len) {
+ size_t len = intel_pt_filter_bytes(filter);
+
+ strncpy((char *)info, filter, len);
+ info += len >> 3;
+ }
+
+ return 0;
+}
+
+static int intel_pt_track_switches(struct perf_evlist *evlist)
+{
+ const char *sched_switch = "sched:sched_switch";
+ struct perf_evsel *evsel;
+ int err;
+
+ if (!perf_evlist__can_select_event(evlist, sched_switch))
+ return -EPERM;
+
+ err = parse_events(evlist, sched_switch, NULL);
+ if (err) {
+ pr_debug2("%s: failed to parse %s, error %d\n",
+ __func__, sched_switch, err);
+ return err;
+ }
+
+ evsel = perf_evlist__last(evlist);
+
+ perf_evsel__set_sample_bit(evsel, CPU);
+ perf_evsel__set_sample_bit(evsel, TIME);
+
+ evsel->system_wide = true;
+ evsel->no_aux_samples = true;
+ evsel->immediate = true;
+
+ return 0;
+}
+
+static void intel_pt_valid_str(char *str, size_t len, u64 valid)
+{
+ unsigned int val, last = 0, state = 1;
+ int p = 0;
+
+ str[0] = '\0';
+
+ for (val = 0; val <= 64; val++, valid >>= 1) {
+ if (valid & 1) {
+ last = val;
+ switch (state) {
+ case 0:
+ p += scnprintf(str + p, len - p, ",");
+ /* Fall through */
+ case 1:
+ p += scnprintf(str + p, len - p, "%u", val);
+ state = 2;
+ break;
+ case 2:
+ state = 3;
+ break;
+ case 3:
+ state = 4;
+ break;
+ default:
+ break;
+ }
+ } else {
+ switch (state) {
+ case 3:
+ p += scnprintf(str + p, len - p, ",%u", last);
+ state = 0;
+ break;
+ case 4:
+ p += scnprintf(str + p, len - p, "-%u", last);
+ state = 0;
+ break;
+ default:
+ break;
+ }
+ if (state != 1)
+ state = 0;
+ }
+ }
+}
+
+static int intel_pt_val_config_term(struct perf_pmu *intel_pt_pmu,
+ const char *caps, const char *name,
+ const char *supported, u64 config)
+{
+ char valid_str[256];
+ unsigned int shift;
+ unsigned long long valid;
+ u64 bits;
+ int ok;
+
+ if (perf_pmu__scan_file(intel_pt_pmu, caps, "%llx", &valid) != 1)
+ valid = 0;
+
+ if (supported &&
+ perf_pmu__scan_file(intel_pt_pmu, supported, "%d", &ok) == 1 && !ok)
+ valid = 0;
+
+ valid |= 1;
+
+ bits = perf_pmu__format_bits(&intel_pt_pmu->format, name);
+
+ config &= bits;
+
+ for (shift = 0; bits && !(bits & 1); shift++)
+ bits >>= 1;
+
+ config >>= shift;
+
+ if (config > 63)
+ goto out_err;
+
+ if (valid & (1 << config))
+ return 0;
+out_err:
+ intel_pt_valid_str(valid_str, sizeof(valid_str), valid);
+ pr_err("Invalid %s for %s. Valid values are: %s\n",
+ name, INTEL_PT_PMU_NAME, valid_str);
+ return -EINVAL;
+}
+
+static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
+ struct perf_evsel *evsel)
+{
+ int err;
+ char c;
+
+ if (!evsel)
+ return 0;
+
+ /*
+ * If supported, force pass-through config term (pt=1) even if user
+ * sets pt=0, which avoids senseless kernel errors.
+ */
+ if (perf_pmu__scan_file(intel_pt_pmu, "format/pt", "%c", &c) == 1 &&
+ !(evsel->attr.config & 1)) {
+ pr_warning("pt=0 doesn't make sense, forcing pt=1\n");
+ evsel->attr.config |= 1;
+ }
+
+ err = intel_pt_val_config_term(intel_pt_pmu, "caps/cycle_thresholds",
+ "cyc_thresh", "caps/psb_cyc",
+ evsel->attr.config);
+ if (err)
+ return err;
+
+ err = intel_pt_val_config_term(intel_pt_pmu, "caps/mtc_periods",
+ "mtc_period", "caps/mtc",
+ evsel->attr.config);
+ if (err)
+ return err;
+
+ return intel_pt_val_config_term(intel_pt_pmu, "caps/psb_periods",
+ "psb_period", "caps/psb_cyc",
+ evsel->attr.config);
+}
+
+static int intel_pt_recording_options(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu;
+ bool have_timing_info, need_immediate = false;
+ struct perf_evsel *evsel, *intel_pt_evsel = NULL;
+ const struct cpu_map *cpus = evlist->cpus;
+ bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
+ u64 tsc_bit;
+ int err;
+
+ ptr->evlist = evlist;
+ ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == intel_pt_pmu->type) {
+ if (intel_pt_evsel) {
+ pr_err("There may be only one " INTEL_PT_PMU_NAME " event\n");
+ return -EINVAL;
+ }
+ evsel->attr.freq = 0;
+ evsel->attr.sample_period = 1;
+ intel_pt_evsel = evsel;
+ opts->full_auxtrace = true;
+ }
+ }
+
+ if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
+ pr_err("Snapshot mode (-S option) requires " INTEL_PT_PMU_NAME " PMU event (-e " INTEL_PT_PMU_NAME ")\n");
+ return -EINVAL;
+ }
+
+ if (opts->use_clockid) {
+ pr_err("Cannot use clockid (-k option) with " INTEL_PT_PMU_NAME "\n");
+ return -EINVAL;
+ }
+
+ if (!opts->full_auxtrace)
+ return 0;
+
+ err = intel_pt_validate_config(intel_pt_pmu, intel_pt_evsel);
+ if (err)
+ return err;
+
+ /* Set default sizes for snapshot mode */
+ if (opts->auxtrace_snapshot_mode) {
+ size_t psb_period = intel_pt_psb_period(intel_pt_pmu, evlist);
+
+ if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ } else if (!opts->auxtrace_mmap_pages && !privileged &&
+ opts->mmap_pages == UINT_MAX) {
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ if (!opts->auxtrace_snapshot_size)
+ opts->auxtrace_snapshot_size =
+ opts->auxtrace_mmap_pages * (size_t)page_size;
+ if (!opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_snapshot_size;
+
+ sz = round_up(sz, page_size) / page_size;
+ opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
+ }
+ if (opts->auxtrace_snapshot_size >
+ opts->auxtrace_mmap_pages * (size_t)page_size) {
+ pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
+ opts->auxtrace_snapshot_size,
+ opts->auxtrace_mmap_pages * (size_t)page_size);
+ return -EINVAL;
+ }
+ if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
+ pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
+ return -EINVAL;
+ }
+ pr_debug2("Intel PT snapshot size: %zu\n",
+ opts->auxtrace_snapshot_size);
+ if (psb_period &&
+ opts->auxtrace_snapshot_size <= psb_period +
+ INTEL_PT_PSB_PERIOD_NEAR)
+ ui__warning("Intel PT snapshot size (%zu) may be too small for PSB period (%zu)\n",
+ opts->auxtrace_snapshot_size, psb_period);
+ }
+
+ /* Set default sizes for full trace mode */
+ if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
+ if (privileged) {
+ opts->auxtrace_mmap_pages = MiB(4) / page_size;
+ } else {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+ }
+
+ /* Validate auxtrace_mmap_pages */
+ if (opts->auxtrace_mmap_pages) {
+ size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
+ size_t min_sz;
+
+ if (opts->auxtrace_snapshot_mode)
+ min_sz = KiB(4);
+ else
+ min_sz = KiB(8);
+
+ if (sz < min_sz || !is_power_of_2(sz)) {
+ pr_err("Invalid mmap size for Intel Processor Trace: must be at least %zuKiB and a power of 2\n",
+ min_sz / 1024);
+ return -EINVAL;
+ }
+ }
+
+ intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
+
+ if (opts->full_auxtrace && (intel_pt_evsel->attr.config & tsc_bit))
+ have_timing_info = true;
+ else
+ have_timing_info = false;
+
+ /*
+ * Per-cpu recording needs sched_switch events to distinguish different
+ * threads.
+ */
+ if (have_timing_info && !cpu_map__empty(cpus)) {
+ if (perf_can_record_switch_events()) {
+ bool cpu_wide = !target__none(&opts->target) &&
+ !target__has_task(&opts->target);
+
+ if (!cpu_wide && perf_can_record_cpu_wide()) {
+ struct perf_evsel *switch_evsel;
+
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ switch_evsel = perf_evlist__last(evlist);
+
+ switch_evsel->attr.freq = 0;
+ switch_evsel->attr.sample_period = 1;
+ switch_evsel->attr.context_switch = 1;
+
+ switch_evsel->system_wide = true;
+ switch_evsel->no_aux_samples = true;
+ switch_evsel->immediate = true;
+
+ perf_evsel__set_sample_bit(switch_evsel, TID);
+ perf_evsel__set_sample_bit(switch_evsel, TIME);
+ perf_evsel__set_sample_bit(switch_evsel, CPU);
+ perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK);
+
+ opts->record_switch_events = false;
+ ptr->have_sched_switch = 3;
+ } else {
+ opts->record_switch_events = true;
+ need_immediate = true;
+ if (cpu_wide)
+ ptr->have_sched_switch = 3;
+ else
+ ptr->have_sched_switch = 2;
+ }
+ } else {
+ err = intel_pt_track_switches(evlist);
+ if (err == -EPERM)
+ pr_debug2("Unable to select sched:sched_switch\n");
+ else if (err)
+ return err;
+ else
+ ptr->have_sched_switch = 1;
+ }
+ }
+
+ if (intel_pt_evsel) {
+ /*
+ * To obtain the auxtrace buffer file descriptor, the auxtrace
+ * event must come first.
+ */
+ perf_evlist__to_front(evlist, intel_pt_evsel);
+ /*
+ * In the case of per-cpu mmaps, we need the CPU on the
+ * AUX event.
+ */
+ if (!cpu_map__empty(cpus))
+ perf_evsel__set_sample_bit(intel_pt_evsel, CPU);
+ }
+
+ /* Add dummy event to keep tracking */
+ if (opts->full_auxtrace) {
+ struct perf_evsel *tracking_evsel;
+
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ tracking_evsel = perf_evlist__last(evlist);
+
+ perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+ tracking_evsel->attr.freq = 0;
+ tracking_evsel->attr.sample_period = 1;
+
+ tracking_evsel->no_aux_samples = true;
+ if (need_immediate)
+ tracking_evsel->immediate = true;
+
+ /* In per-cpu case, always need the time of mmap events etc */
+ if (!cpu_map__empty(cpus)) {
+ perf_evsel__set_sample_bit(tracking_evsel, TIME);
+ /* And the CPU for switch events */
+ perf_evsel__set_sample_bit(tracking_evsel, CPU);
+ }
+ perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
+ }
+
+ /*
+ * Warn the user when we do not have enough information to decode i.e.
+ * per-cpu with no sched_switch (except workload-only).
+ */
+ if (!ptr->have_sched_switch && !cpu_map__empty(cpus) &&
+ !target__none(&opts->target))
+ ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
+
+ return 0;
+}
+
+static int intel_pt_snapshot_start(struct auxtrace_record *itr)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(ptr->evlist, evsel) {
+ if (evsel->attr.type == ptr->intel_pt_pmu->type)
+ return perf_evsel__disable(evsel);
+ }
+ return -EINVAL;
+}
+
+static int intel_pt_snapshot_finish(struct auxtrace_record *itr)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(ptr->evlist, evsel) {
+ if (evsel->attr.type == ptr->intel_pt_pmu->type)
+ return perf_evsel__enable(evsel);
+ }
+ return -EINVAL;
+}
+
+static int intel_pt_alloc_snapshot_refs(struct intel_pt_recording *ptr, int idx)
+{
+ const size_t sz = sizeof(struct intel_pt_snapshot_ref);
+ int cnt = ptr->snapshot_ref_cnt, new_cnt = cnt * 2;
+ struct intel_pt_snapshot_ref *refs;
+
+ if (!new_cnt)
+ new_cnt = 16;
+
+ while (new_cnt <= idx)
+ new_cnt *= 2;
+
+ refs = calloc(new_cnt, sz);
+ if (!refs)
+ return -ENOMEM;
+
+ memcpy(refs, ptr->snapshot_refs, cnt * sz);
+
+ ptr->snapshot_refs = refs;
+ ptr->snapshot_ref_cnt = new_cnt;
+
+ return 0;
+}
+
+static void intel_pt_free_snapshot_refs(struct intel_pt_recording *ptr)
+{
+ int i;
+
+ for (i = 0; i < ptr->snapshot_ref_cnt; i++)
+ zfree(&ptr->snapshot_refs[i].ref_buf);
+ zfree(&ptr->snapshot_refs);
+}
+
+static void intel_pt_recording_free(struct auxtrace_record *itr)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+
+ intel_pt_free_snapshot_refs(ptr);
+ free(ptr);
+}
+
+static int intel_pt_alloc_snapshot_ref(struct intel_pt_recording *ptr, int idx,
+ size_t snapshot_buf_size)
+{
+ size_t ref_buf_size = ptr->snapshot_ref_buf_size;
+ void *ref_buf;
+
+ ref_buf = zalloc(ref_buf_size);
+ if (!ref_buf)
+ return -ENOMEM;
+
+ ptr->snapshot_refs[idx].ref_buf = ref_buf;
+ ptr->snapshot_refs[idx].ref_offset = snapshot_buf_size - ref_buf_size;
+
+ return 0;
+}
+
+static size_t intel_pt_snapshot_ref_buf_size(struct intel_pt_recording *ptr,
+ size_t snapshot_buf_size)
+{
+ const size_t max_size = 256 * 1024;
+ size_t buf_size = 0, psb_period;
+
+ if (ptr->snapshot_size <= 64 * 1024)
+ return 0;
+
+ psb_period = intel_pt_psb_period(ptr->intel_pt_pmu, ptr->evlist);
+ if (psb_period)
+ buf_size = psb_period * 2;
+
+ if (!buf_size || buf_size > max_size)
+ buf_size = max_size;
+
+ if (buf_size >= snapshot_buf_size)
+ return 0;
+
+ if (buf_size >= ptr->snapshot_size / 2)
+ return 0;
+
+ return buf_size;
+}
+
+static int intel_pt_snapshot_init(struct intel_pt_recording *ptr,
+ size_t snapshot_buf_size)
+{
+ if (ptr->snapshot_init_done)
+ return 0;
+
+ ptr->snapshot_init_done = true;
+
+ ptr->snapshot_ref_buf_size = intel_pt_snapshot_ref_buf_size(ptr,
+ snapshot_buf_size);
+
+ return 0;
+}
+
+/**
+ * intel_pt_compare_buffers - compare bytes in a buffer to a circular buffer.
+ * @buf1: first buffer
+ * @compare_size: number of bytes to compare
+ * @buf2: second buffer (a circular buffer)
+ * @offs2: offset in second buffer
+ * @buf2_size: size of second buffer
+ *
+ * The comparison allows for the possibility that the bytes to compare in the
+ * circular buffer are not contiguous. It is assumed that @compare_size <=
+ * @buf2_size. This function returns %false if the bytes are identical, %true
+ * otherwise.
+ */
+static bool intel_pt_compare_buffers(void *buf1, size_t compare_size,
+ void *buf2, size_t offs2, size_t buf2_size)
+{
+ size_t end2 = offs2 + compare_size, part_size;
+
+ if (end2 <= buf2_size)
+ return memcmp(buf1, buf2 + offs2, compare_size);
+
+ part_size = end2 - buf2_size;
+ if (memcmp(buf1, buf2 + offs2, part_size))
+ return true;
+
+ compare_size -= part_size;
+
+ return memcmp(buf1 + part_size, buf2, compare_size);
+}
+
+static bool intel_pt_compare_ref(void *ref_buf, size_t ref_offset,
+ size_t ref_size, size_t buf_size,
+ void *data, size_t head)
+{
+ size_t ref_end = ref_offset + ref_size;
+
+ if (ref_end > buf_size) {
+ if (head > ref_offset || head < ref_end - buf_size)
+ return true;
+ } else if (head > ref_offset && head < ref_end) {
+ return true;
+ }
+
+ return intel_pt_compare_buffers(ref_buf, ref_size, data, ref_offset,
+ buf_size);
+}
+
+static void intel_pt_copy_ref(void *ref_buf, size_t ref_size, size_t buf_size,
+ void *data, size_t head)
+{
+ if (head >= ref_size) {
+ memcpy(ref_buf, data + head - ref_size, ref_size);
+ } else {
+ memcpy(ref_buf, data, head);
+ ref_size -= head;
+ memcpy(ref_buf + head, data + buf_size - ref_size, ref_size);
+ }
+}
+
+static bool intel_pt_wrapped(struct intel_pt_recording *ptr, int idx,
+ struct auxtrace_mmap *mm, unsigned char *data,
+ u64 head)
+{
+ struct intel_pt_snapshot_ref *ref = &ptr->snapshot_refs[idx];
+ bool wrapped;
+
+ wrapped = intel_pt_compare_ref(ref->ref_buf, ref->ref_offset,
+ ptr->snapshot_ref_buf_size, mm->len,
+ data, head);
+
+ intel_pt_copy_ref(ref->ref_buf, ptr->snapshot_ref_buf_size, mm->len,
+ data, head);
+
+ return wrapped;
+}
+
+static bool intel_pt_first_wrap(u64 *data, size_t buf_size)
+{
+ int i, a, b;
+
+ b = buf_size >> 3;
+ a = b - 512;
+ if (a < 0)
+ a = 0;
+
+ for (i = a; i < b; i++) {
+ if (data[i])
+ return true;
+ }
+
+ return false;
+}
+
+static int intel_pt_find_snapshot(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm, unsigned char *data,
+ u64 *head, u64 *old)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ bool wrapped;
+ int err;
+
+ pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
+ __func__, idx, (size_t)*old, (size_t)*head);
+
+ err = intel_pt_snapshot_init(ptr, mm->len);
+ if (err)
+ goto out_err;
+
+ if (idx >= ptr->snapshot_ref_cnt) {
+ err = intel_pt_alloc_snapshot_refs(ptr, idx);
+ if (err)
+ goto out_err;
+ }
+
+ if (ptr->snapshot_ref_buf_size) {
+ if (!ptr->snapshot_refs[idx].ref_buf) {
+ err = intel_pt_alloc_snapshot_ref(ptr, idx, mm->len);
+ if (err)
+ goto out_err;
+ }
+ wrapped = intel_pt_wrapped(ptr, idx, mm, data, *head);
+ } else {
+ wrapped = ptr->snapshot_refs[idx].wrapped;
+ if (!wrapped && intel_pt_first_wrap((u64 *)data, mm->len)) {
+ ptr->snapshot_refs[idx].wrapped = true;
+ wrapped = true;
+ }
+ }
+
+ /*
+ * In full trace mode 'head' continually increases. However in snapshot
+ * mode 'head' is an offset within the buffer. Here 'old' and 'head'
+ * are adjusted to match the full trace case which expects that 'old' is
+ * always less than 'head'.
+ */
+ if (wrapped) {
+ *old = *head;
+ *head += mm->len;
+ } else {
+ if (mm->mask)
+ *old &= mm->mask;
+ else
+ *old %= mm->len;
+ if (*old > *head)
+ *head += mm->len;
+ }
+
+ pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
+ __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
+
+ return 0;
+
+out_err:
+ pr_err("%s: failed, error %d\n", __func__, err);
+ return err;
+}
+
+static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
+{
+ return rdtsc();
+}
+
+static int intel_pt_read_finish(struct auxtrace_record *itr, int idx)
+{
+ struct intel_pt_recording *ptr =
+ container_of(itr, struct intel_pt_recording, itr);
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(ptr->evlist, evsel) {
+ if (evsel->attr.type == ptr->intel_pt_pmu->type)
+ return perf_evlist__enable_event_idx(ptr->evlist, evsel,
+ idx);
+ }
+ return -EINVAL;
+}
+
+struct auxtrace_record *intel_pt_recording_init(int *err)
+{
+ struct perf_pmu *intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
+ struct intel_pt_recording *ptr;
+
+ if (!intel_pt_pmu)
+ return NULL;
+
+ if (setenv("JITDUMP_USE_ARCH_TIMESTAMP", "1", 1)) {
+ *err = -errno;
+ return NULL;
+ }
+
+ ptr = zalloc(sizeof(struct intel_pt_recording));
+ if (!ptr) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ ptr->intel_pt_pmu = intel_pt_pmu;
+ ptr->itr.recording_options = intel_pt_recording_options;
+ ptr->itr.info_priv_size = intel_pt_info_priv_size;
+ ptr->itr.info_fill = intel_pt_info_fill;
+ ptr->itr.free = intel_pt_recording_free;
+ ptr->itr.snapshot_start = intel_pt_snapshot_start;
+ ptr->itr.snapshot_finish = intel_pt_snapshot_finish;
+ ptr->itr.find_snapshot = intel_pt_find_snapshot;
+ ptr->itr.parse_snapshot_options = intel_pt_parse_snapshot_options;
+ ptr->itr.reference = intel_pt_reference;
+ ptr->itr.read_finish = intel_pt_read_finish;
+ return &ptr->itr;
+}
diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c
new file mode 100644
index 000000000..081353d7b
--- /dev/null
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include "../../util/kvm-stat.h"
+#include <asm/svm.h>
+#include <asm/vmx.h>
+#include <asm/kvm.h>
+
+define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
+define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
+
+static struct kvm_events_ops exit_events = {
+ .is_begin_event = exit_event_begin,
+ .is_end_event = exit_event_end,
+ .decode_key = exit_event_decode_key,
+ .name = "VM-EXIT"
+};
+
+const char *vcpu_id_str = "vcpu_id";
+const int decode_str_len = 20;
+const char *kvm_exit_reason = "exit_reason";
+const char *kvm_entry_trace = "kvm:kvm_entry";
+const char *kvm_exit_trace = "kvm:kvm_exit";
+
+/*
+ * For the mmio events, we treat:
+ * the time of MMIO write: kvm_mmio(KVM_TRACE_MMIO_WRITE...) -> kvm_entry
+ * the time of MMIO read: kvm_exit -> kvm_mmio(KVM_TRACE_MMIO_READ...).
+ */
+static void mmio_event_get_key(struct perf_evsel *evsel, struct perf_sample *sample,
+ struct event_key *key)
+{
+ key->key = perf_evsel__intval(evsel, sample, "gpa");
+ key->info = perf_evsel__intval(evsel, sample, "type");
+}
+
+#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
+#define KVM_TRACE_MMIO_READ 1
+#define KVM_TRACE_MMIO_WRITE 2
+
+static bool mmio_event_begin(struct perf_evsel *evsel,
+ struct perf_sample *sample, struct event_key *key)
+{
+ /* MMIO read begin event in kernel. */
+ if (kvm_exit_event(evsel))
+ return true;
+
+ /* MMIO write begin event in kernel. */
+ if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
+ perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
+ mmio_event_get_key(evsel, sample, key);
+ return true;
+ }
+
+ return false;
+}
+
+static bool mmio_event_end(struct perf_evsel *evsel, struct perf_sample *sample,
+ struct event_key *key)
+{
+ /* MMIO write end event in kernel. */
+ if (kvm_entry_event(evsel))
+ return true;
+
+ /* MMIO read end event in kernel.*/
+ if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
+ perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
+ mmio_event_get_key(evsel, sample, key);
+ return true;
+ }
+
+ return false;
+}
+
+static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+ struct event_key *key,
+ char *decode)
+{
+ scnprintf(decode, decode_str_len, "%#lx:%s",
+ (unsigned long)key->key,
+ key->info == KVM_TRACE_MMIO_WRITE ? "W" : "R");
+}
+
+static struct kvm_events_ops mmio_events = {
+ .is_begin_event = mmio_event_begin,
+ .is_end_event = mmio_event_end,
+ .decode_key = mmio_event_decode_key,
+ .name = "MMIO Access"
+};
+
+ /* The time of emulation pio access is from kvm_pio to kvm_entry. */
+static void ioport_event_get_key(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ key->key = perf_evsel__intval(evsel, sample, "port");
+ key->info = perf_evsel__intval(evsel, sample, "rw");
+}
+
+static bool ioport_event_begin(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ if (!strcmp(evsel->name, "kvm:kvm_pio")) {
+ ioport_event_get_key(evsel, sample, key);
+ return true;
+ }
+
+ return false;
+}
+
+static bool ioport_event_end(struct perf_evsel *evsel,
+ struct perf_sample *sample __maybe_unused,
+ struct event_key *key __maybe_unused)
+{
+ return kvm_entry_event(evsel);
+}
+
+static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
+ struct event_key *key,
+ char *decode)
+{
+ scnprintf(decode, decode_str_len, "%#llx:%s",
+ (unsigned long long)key->key,
+ key->info ? "POUT" : "PIN");
+}
+
+static struct kvm_events_ops ioport_events = {
+ .is_begin_event = ioport_event_begin,
+ .is_end_event = ioport_event_end,
+ .decode_key = ioport_event_decode_key,
+ .name = "IO Port Access"
+};
+
+const char *kvm_events_tp[] = {
+ "kvm:kvm_entry",
+ "kvm:kvm_exit",
+ "kvm:kvm_mmio",
+ "kvm:kvm_pio",
+ NULL,
+};
+
+struct kvm_reg_events_ops kvm_reg_events_ops[] = {
+ { .name = "vmexit", .ops = &exit_events },
+ { .name = "mmio", .ops = &mmio_events },
+ { .name = "ioport", .ops = &ioport_events },
+ { NULL, NULL },
+};
+
+const char * const kvm_skip_events[] = {
+ "HLT",
+ NULL,
+};
+
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid)
+{
+ if (strstr(cpuid, "Intel")) {
+ kvm->exit_reasons = vmx_exit_reasons;
+ kvm->exit_reasons_isa = "VMX";
+ } else if (strstr(cpuid, "AMD") || strstr(cpuid, "Hygon")) {
+ kvm->exit_reasons = svm_exit_reasons;
+ kvm->exit_reasons_isa = "SVM";
+ } else
+ return -ENOTSUP;
+
+ return 0;
+}
diff --git a/tools/perf/arch/x86/util/machine.c b/tools/perf/arch/x86/util/machine.c
new file mode 100644
index 000000000..4520ac53c
--- /dev/null
+++ b/tools/perf/arch/x86/util/machine.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/string.h>
+#include <stdlib.h>
+
+#include "../../util/machine.h"
+#include "../../util/map.h"
+#include "../../util/symbol.h"
+#include "../../util/sane_ctype.h"
+
+#include <symbol/kallsyms.h>
+
+#if defined(__x86_64__)
+
+struct extra_kernel_map_info {
+ int cnt;
+ int max_cnt;
+ struct extra_kernel_map *maps;
+ bool get_entry_trampolines;
+ u64 entry_trampoline;
+};
+
+static int add_extra_kernel_map(struct extra_kernel_map_info *mi, u64 start,
+ u64 end, u64 pgoff, const char *name)
+{
+ if (mi->cnt >= mi->max_cnt) {
+ void *buf;
+ size_t sz;
+
+ mi->max_cnt = mi->max_cnt ? mi->max_cnt * 2 : 32;
+ sz = sizeof(struct extra_kernel_map) * mi->max_cnt;
+ buf = realloc(mi->maps, sz);
+ if (!buf)
+ return -1;
+ mi->maps = buf;
+ }
+
+ mi->maps[mi->cnt].start = start;
+ mi->maps[mi->cnt].end = end;
+ mi->maps[mi->cnt].pgoff = pgoff;
+ strlcpy(mi->maps[mi->cnt].name, name, KMAP_NAME_LEN);
+
+ mi->cnt += 1;
+
+ return 0;
+}
+
+static int find_extra_kernel_maps(void *arg, const char *name, char type,
+ u64 start)
+{
+ struct extra_kernel_map_info *mi = arg;
+
+ if (!mi->entry_trampoline && kallsyms2elf_binding(type) == STB_GLOBAL &&
+ !strcmp(name, "_entry_trampoline")) {
+ mi->entry_trampoline = start;
+ return 0;
+ }
+
+ if (is_entry_trampoline(name)) {
+ u64 end = start + page_size;
+
+ return add_extra_kernel_map(mi, start, end, 0, name);
+ }
+
+ return 0;
+}
+
+int machine__create_extra_kernel_maps(struct machine *machine,
+ struct dso *kernel)
+{
+ struct extra_kernel_map_info mi = { .cnt = 0, };
+ char filename[PATH_MAX];
+ int ret;
+ int i;
+
+ machine__get_kallsyms_filename(machine, filename, PATH_MAX);
+
+ if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+ return 0;
+
+ ret = kallsyms__parse(filename, &mi, find_extra_kernel_maps);
+ if (ret)
+ goto out_free;
+
+ if (!mi.entry_trampoline)
+ goto out_free;
+
+ for (i = 0; i < mi.cnt; i++) {
+ struct extra_kernel_map *xm = &mi.maps[i];
+
+ xm->pgoff = mi.entry_trampoline;
+ ret = machine__create_extra_kernel_map(machine, kernel, xm);
+ if (ret)
+ goto out_free;
+ }
+
+ machine->trampolines_mapped = mi.cnt;
+out_free:
+ free(mi.maps);
+ return ret;
+}
+
+#endif
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
new file mode 100644
index 000000000..fead6b3b4
--- /dev/null
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <string.h>
+#include <regex.h>
+
+#include "../../perf.h"
+#include "../../util/util.h"
+#include "../../util/perf_regs.h"
+#include "../../util/debug.h"
+
+const struct sample_reg sample_reg_masks[] = {
+ SMPL_REG(AX, PERF_REG_X86_AX),
+ SMPL_REG(BX, PERF_REG_X86_BX),
+ SMPL_REG(CX, PERF_REG_X86_CX),
+ SMPL_REG(DX, PERF_REG_X86_DX),
+ SMPL_REG(SI, PERF_REG_X86_SI),
+ SMPL_REG(DI, PERF_REG_X86_DI),
+ SMPL_REG(BP, PERF_REG_X86_BP),
+ SMPL_REG(SP, PERF_REG_X86_SP),
+ SMPL_REG(IP, PERF_REG_X86_IP),
+ SMPL_REG(FLAGS, PERF_REG_X86_FLAGS),
+ SMPL_REG(CS, PERF_REG_X86_CS),
+ SMPL_REG(SS, PERF_REG_X86_SS),
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ SMPL_REG(R8, PERF_REG_X86_R8),
+ SMPL_REG(R9, PERF_REG_X86_R9),
+ SMPL_REG(R10, PERF_REG_X86_R10),
+ SMPL_REG(R11, PERF_REG_X86_R11),
+ SMPL_REG(R12, PERF_REG_X86_R12),
+ SMPL_REG(R13, PERF_REG_X86_R13),
+ SMPL_REG(R14, PERF_REG_X86_R14),
+ SMPL_REG(R15, PERF_REG_X86_R15),
+#endif
+ SMPL_REG_END
+};
+
+struct sdt_name_reg {
+ const char *sdt_name;
+ const char *uprobe_name;
+};
+#define SDT_NAME_REG(n, m) {.sdt_name = "%" #n, .uprobe_name = "%" #m}
+#define SDT_NAME_REG_END {.sdt_name = NULL, .uprobe_name = NULL}
+
+static const struct sdt_name_reg sdt_reg_tbl[] = {
+ SDT_NAME_REG(eax, ax),
+ SDT_NAME_REG(rax, ax),
+ SDT_NAME_REG(al, ax),
+ SDT_NAME_REG(ah, ax),
+ SDT_NAME_REG(ebx, bx),
+ SDT_NAME_REG(rbx, bx),
+ SDT_NAME_REG(bl, bx),
+ SDT_NAME_REG(bh, bx),
+ SDT_NAME_REG(ecx, cx),
+ SDT_NAME_REG(rcx, cx),
+ SDT_NAME_REG(cl, cx),
+ SDT_NAME_REG(ch, cx),
+ SDT_NAME_REG(edx, dx),
+ SDT_NAME_REG(rdx, dx),
+ SDT_NAME_REG(dl, dx),
+ SDT_NAME_REG(dh, dx),
+ SDT_NAME_REG(esi, si),
+ SDT_NAME_REG(rsi, si),
+ SDT_NAME_REG(sil, si),
+ SDT_NAME_REG(edi, di),
+ SDT_NAME_REG(rdi, di),
+ SDT_NAME_REG(dil, di),
+ SDT_NAME_REG(ebp, bp),
+ SDT_NAME_REG(rbp, bp),
+ SDT_NAME_REG(bpl, bp),
+ SDT_NAME_REG(rsp, sp),
+ SDT_NAME_REG(esp, sp),
+ SDT_NAME_REG(spl, sp),
+
+ /* rNN registers */
+ SDT_NAME_REG(r8b, r8),
+ SDT_NAME_REG(r8w, r8),
+ SDT_NAME_REG(r8d, r8),
+ SDT_NAME_REG(r9b, r9),
+ SDT_NAME_REG(r9w, r9),
+ SDT_NAME_REG(r9d, r9),
+ SDT_NAME_REG(r10b, r10),
+ SDT_NAME_REG(r10w, r10),
+ SDT_NAME_REG(r10d, r10),
+ SDT_NAME_REG(r11b, r11),
+ SDT_NAME_REG(r11w, r11),
+ SDT_NAME_REG(r11d, r11),
+ SDT_NAME_REG(r12b, r12),
+ SDT_NAME_REG(r12w, r12),
+ SDT_NAME_REG(r12d, r12),
+ SDT_NAME_REG(r13b, r13),
+ SDT_NAME_REG(r13w, r13),
+ SDT_NAME_REG(r13d, r13),
+ SDT_NAME_REG(r14b, r14),
+ SDT_NAME_REG(r14w, r14),
+ SDT_NAME_REG(r14d, r14),
+ SDT_NAME_REG(r15b, r15),
+ SDT_NAME_REG(r15w, r15),
+ SDT_NAME_REG(r15d, r15),
+ SDT_NAME_REG_END,
+};
+
+/*
+ * Perf only supports OP which is in +/-NUM(REG) form.
+ * Here plus-minus sign, NUM and parenthesis are optional,
+ * only REG is mandatory.
+ *
+ * SDT events also supports indirect addressing mode with a
+ * symbol as offset, scaled mode and constants in OP. But
+ * perf does not support them yet. Below are few examples.
+ *
+ * OP with scaled mode:
+ * (%rax,%rsi,8)
+ * 10(%ras,%rsi,8)
+ *
+ * OP with indirect addressing mode:
+ * check_action(%rip)
+ * mp_+52(%rip)
+ * 44+mp_(%rip)
+ *
+ * OP with constant values:
+ * $0
+ * $123
+ * $-1
+ */
+#define SDT_OP_REGEX "^([+\\-]?)([0-9]*)(\\(?)(%[a-z][a-z0-9]+)(\\)?)$"
+
+static regex_t sdt_op_regex;
+
+static int sdt_init_op_regex(void)
+{
+ static int initialized;
+ int ret = 0;
+
+ if (initialized)
+ return 0;
+
+ ret = regcomp(&sdt_op_regex, SDT_OP_REGEX, REG_EXTENDED);
+ if (ret < 0) {
+ pr_debug4("Regex compilation error.\n");
+ return ret;
+ }
+
+ initialized = 1;
+ return 0;
+}
+
+/*
+ * Max x86 register name length is 5(ex: %r15d). So, 6th char
+ * should always contain NULL. This helps to find register name
+ * length using strlen, insted of maintaing one more variable.
+ */
+#define SDT_REG_NAME_SIZE 6
+
+/*
+ * The uprobe parser does not support all gas register names;
+ * so, we have to replace them (ex. for x86_64: %rax -> %ax).
+ * Note: If register does not require renaming, just copy
+ * paste as it is, but don't leave it empty.
+ */
+static void sdt_rename_register(char *sdt_reg, int sdt_len, char *uprobe_reg)
+{
+ int i = 0;
+
+ for (i = 0; sdt_reg_tbl[i].sdt_name != NULL; i++) {
+ if (!strncmp(sdt_reg_tbl[i].sdt_name, sdt_reg, sdt_len)) {
+ strcpy(uprobe_reg, sdt_reg_tbl[i].uprobe_name);
+ return;
+ }
+ }
+
+ strncpy(uprobe_reg, sdt_reg, sdt_len);
+}
+
+int arch_sdt_arg_parse_op(char *old_op, char **new_op)
+{
+ char new_reg[SDT_REG_NAME_SIZE] = {0};
+ int new_len = 0, ret;
+ /*
+ * rm[0]: +/-NUM(REG)
+ * rm[1]: +/-
+ * rm[2]: NUM
+ * rm[3]: (
+ * rm[4]: REG
+ * rm[5]: )
+ */
+ regmatch_t rm[6];
+ /*
+ * Max prefix length is 2 as it may contains sign(+/-)
+ * and displacement 0 (Both sign and displacement 0 are
+ * optional so it may be empty). Use one more character
+ * to hold last NULL so that strlen can be used to find
+ * prefix length, instead of maintaing one more variable.
+ */
+ char prefix[3] = {0};
+
+ ret = sdt_init_op_regex();
+ if (ret < 0)
+ return ret;
+
+ /*
+ * If unsupported OR does not match with regex OR
+ * register name too long, skip it.
+ */
+ if (strchr(old_op, ',') || strchr(old_op, '$') ||
+ regexec(&sdt_op_regex, old_op, 6, rm, 0) ||
+ rm[4].rm_eo - rm[4].rm_so > SDT_REG_NAME_SIZE) {
+ pr_debug4("Skipping unsupported SDT argument: %s\n", old_op);
+ return SDT_ARG_SKIP;
+ }
+
+ /*
+ * Prepare prefix.
+ * If SDT OP has parenthesis but does not provide
+ * displacement, add 0 for displacement.
+ * SDT Uprobe Prefix
+ * -----------------------------
+ * +24(%rdi) +24(%di) +
+ * 24(%rdi) +24(%di) +
+ * %rdi %di
+ * (%rdi) +0(%di) +0
+ * -80(%rbx) -80(%bx) -
+ */
+ if (rm[3].rm_so != rm[3].rm_eo) {
+ if (rm[1].rm_so != rm[1].rm_eo)
+ prefix[0] = *(old_op + rm[1].rm_so);
+ else if (rm[2].rm_so != rm[2].rm_eo)
+ prefix[0] = '+';
+ else
+ scnprintf(prefix, sizeof(prefix), "+0");
+ }
+
+ /* Rename register */
+ sdt_rename_register(old_op + rm[4].rm_so, rm[4].rm_eo - rm[4].rm_so,
+ new_reg);
+
+ /* Prepare final OP which should be valid for uprobe_events */
+ new_len = strlen(prefix) +
+ (rm[2].rm_eo - rm[2].rm_so) +
+ (rm[3].rm_eo - rm[3].rm_so) +
+ strlen(new_reg) +
+ (rm[5].rm_eo - rm[5].rm_so) +
+ 1; /* NULL */
+
+ *new_op = zalloc(new_len);
+ if (!*new_op)
+ return -ENOMEM;
+
+ scnprintf(*new_op, new_len, "%.*s%.*s%.*s%.*s%.*s",
+ strlen(prefix), prefix,
+ (int)(rm[2].rm_eo - rm[2].rm_so), old_op + rm[2].rm_so,
+ (int)(rm[3].rm_eo - rm[3].rm_so), old_op + rm[3].rm_so,
+ strlen(new_reg), new_reg,
+ (int)(rm[5].rm_eo - rm[5].rm_so), old_op + rm[5].rm_so);
+
+ return SDT_ARG_VALID;
+}
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
new file mode 100644
index 000000000..e33ef5bc3
--- /dev/null
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+
+#include "../../util/intel-pt.h"
+#include "../../util/intel-bts.h"
+#include "../../util/pmu.h"
+
+struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+{
+#ifdef HAVE_AUXTRACE_SUPPORT
+ if (!strcmp(pmu->name, INTEL_PT_PMU_NAME))
+ return intel_pt_pmu_default_config(pmu);
+ if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME))
+ pmu->selectable = true;
+#endif
+ return NULL;
+}
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
new file mode 100644
index 000000000..950539f9a
--- /dev/null
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <errno.h>
+
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+
+#include "../../perf.h"
+#include <linux/types.h>
+#include "../../util/debug.h"
+#include "../../util/tsc.h"
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc)
+{
+ bool cap_user_time_zero;
+ u32 seq;
+ int i = 0;
+
+ while (1) {
+ seq = pc->lock;
+ rmb();
+ tc->time_mult = pc->time_mult;
+ tc->time_shift = pc->time_shift;
+ tc->time_zero = pc->time_zero;
+ cap_user_time_zero = pc->cap_user_time_zero;
+ rmb();
+ if (pc->lock == seq && !(seq & 1))
+ break;
+ if (++i > 10000) {
+ pr_debug("failed to get perf_event_mmap_page lock\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!cap_user_time_zero)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
+u64 rdtsc(void)
+{
+ unsigned int low, high;
+
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+ return low | ((u64)high) << 32;
+}
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+ struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event event = {
+ .time_conv = {
+ .header = {
+ .type = PERF_RECORD_TIME_CONV,
+ .size = sizeof(struct time_conv_event),
+ },
+ },
+ };
+ struct perf_tsc_conversion tc;
+ int err;
+
+ if (!pc)
+ return 0;
+ err = perf_read_tsc_conversion(pc, &tc);
+ if (err == -EOPNOTSUPP)
+ return 0;
+ if (err)
+ return err;
+
+ pr_debug2("Synthesizing TSC conversion information\n");
+
+ event.time_conv.time_mult = tc.time_mult;
+ event.time_conv.time_shift = tc.time_shift;
+ event.time_conv.time_zero = tc.time_zero;
+
+ return process(tool, &event, NULL, machine);
+}
diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c
new file mode 100644
index 000000000..fda8f4206
--- /dev/null
+++ b/tools/perf/arch/x86/util/unwind-libdw.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+#include "../../util/event.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct regs_dump *user_regs = &ui->sample->user_regs;
+ Dwarf_Word dwarf_regs[17];
+ unsigned nregs;
+
+#define REG(r) ({ \
+ Dwarf_Word val = 0; \
+ perf_reg_value(&val, user_regs, PERF_REG_X86_##r); \
+ val; \
+})
+
+ if (user_regs->abi == PERF_SAMPLE_REGS_ABI_32) {
+ dwarf_regs[0] = REG(AX);
+ dwarf_regs[1] = REG(CX);
+ dwarf_regs[2] = REG(DX);
+ dwarf_regs[3] = REG(BX);
+ dwarf_regs[4] = REG(SP);
+ dwarf_regs[5] = REG(BP);
+ dwarf_regs[6] = REG(SI);
+ dwarf_regs[7] = REG(DI);
+ dwarf_regs[8] = REG(IP);
+ nregs = 9;
+ } else {
+ dwarf_regs[0] = REG(AX);
+ dwarf_regs[1] = REG(DX);
+ dwarf_regs[2] = REG(CX);
+ dwarf_regs[3] = REG(BX);
+ dwarf_regs[4] = REG(SI);
+ dwarf_regs[5] = REG(DI);
+ dwarf_regs[6] = REG(BP);
+ dwarf_regs[7] = REG(SP);
+ dwarf_regs[8] = REG(R8);
+ dwarf_regs[9] = REG(R9);
+ dwarf_regs[10] = REG(R10);
+ dwarf_regs[11] = REG(R11);
+ dwarf_regs[12] = REG(R12);
+ dwarf_regs[13] = REG(R13);
+ dwarf_regs[14] = REG(R14);
+ dwarf_regs[15] = REG(R15);
+ dwarf_regs[16] = REG(IP);
+ nregs = 17;
+ }
+
+ return dwfl_thread_state_registers(thread, 0, nregs, dwarf_regs);
+}
diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c
new file mode 100644
index 000000000..47357973b
--- /dev/null
+++ b/tools/perf/arch/x86/util/unwind-libunwind.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include "../../util/debug.h"
+#ifndef REMOTE_UNWIND_LIBUNWIND
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+#endif
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+int LIBUNWIND__ARCH_REG_ID(int regnum)
+{
+ int id;
+
+ switch (regnum) {
+ case UNW_X86_64_RAX:
+ id = PERF_REG_X86_AX;
+ break;
+ case UNW_X86_64_RDX:
+ id = PERF_REG_X86_DX;
+ break;
+ case UNW_X86_64_RCX:
+ id = PERF_REG_X86_CX;
+ break;
+ case UNW_X86_64_RBX:
+ id = PERF_REG_X86_BX;
+ break;
+ case UNW_X86_64_RSI:
+ id = PERF_REG_X86_SI;
+ break;
+ case UNW_X86_64_RDI:
+ id = PERF_REG_X86_DI;
+ break;
+ case UNW_X86_64_RBP:
+ id = PERF_REG_X86_BP;
+ break;
+ case UNW_X86_64_RSP:
+ id = PERF_REG_X86_SP;
+ break;
+ case UNW_X86_64_R8:
+ id = PERF_REG_X86_R8;
+ break;
+ case UNW_X86_64_R9:
+ id = PERF_REG_X86_R9;
+ break;
+ case UNW_X86_64_R10:
+ id = PERF_REG_X86_R10;
+ break;
+ case UNW_X86_64_R11:
+ id = PERF_REG_X86_R11;
+ break;
+ case UNW_X86_64_R12:
+ id = PERF_REG_X86_R12;
+ break;
+ case UNW_X86_64_R13:
+ id = PERF_REG_X86_R13;
+ break;
+ case UNW_X86_64_R14:
+ id = PERF_REG_X86_R14;
+ break;
+ case UNW_X86_64_R15:
+ id = PERF_REG_X86_R15;
+ break;
+ case UNW_X86_64_RIP:
+ id = PERF_REG_X86_IP;
+ break;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+
+ return id;
+}
+#else
+int LIBUNWIND__ARCH_REG_ID(int regnum)
+{
+ int id;
+
+ switch (regnum) {
+ case UNW_X86_EAX:
+ id = PERF_REG_X86_AX;
+ break;
+ case UNW_X86_EDX:
+ id = PERF_REG_X86_DX;
+ break;
+ case UNW_X86_ECX:
+ id = PERF_REG_X86_CX;
+ break;
+ case UNW_X86_EBX:
+ id = PERF_REG_X86_BX;
+ break;
+ case UNW_X86_ESI:
+ id = PERF_REG_X86_SI;
+ break;
+ case UNW_X86_EDI:
+ id = PERF_REG_X86_DI;
+ break;
+ case UNW_X86_EBP:
+ id = PERF_REG_X86_BP;
+ break;
+ case UNW_X86_ESP:
+ id = PERF_REG_X86_SP;
+ break;
+ case UNW_X86_EIP:
+ id = PERF_REG_X86_IP;
+ break;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+
+ return id;
+}
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build
new file mode 100644
index 000000000..54afe4a46
--- /dev/null
+++ b/tools/perf/arch/xtensa/Build
@@ -0,0 +1 @@
+libperf-y += util/
diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile
new file mode 100644
index 000000000..7fbca1750
--- /dev/null
+++ b/tools/perf/arch/xtensa/Makefile
@@ -0,0 +1,3 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+endif
diff --git a/tools/perf/arch/xtensa/include/dwarf-regs-table.h b/tools/perf/arch/xtensa/include/dwarf-regs-table.h
new file mode 100644
index 000000000..d7c9f1fb4
--- /dev/null
+++ b/tools/perf/arch/xtensa/include/dwarf-regs-table.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef DEFINE_DWARF_REGSTR_TABLE
+/* This is included in perf/util/dwarf-regs.c */
+
+static const char * const xtensa_regstr_tbl[] = {
+ "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+ "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+#endif
diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build
new file mode 100644
index 000000000..954e287bb
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c
new file mode 100644
index 000000000..4dba76bfb
--- /dev/null
+++ b/tools/perf/arch/xtensa/util/dwarf-regs.c
@@ -0,0 +1,25 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (c) 2015 Cadence Design Systems Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define XTENSA_MAX_REGS 16
+
+const char *xtensa_regs_table[XTENSA_MAX_REGS] = {
+ "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
+ "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15",
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+ return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL;
+}
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
new file mode 100644
index 000000000..eafce1a13
--- /dev/null
+++ b/tools/perf/bench/Build
@@ -0,0 +1,14 @@
+perf-y += sched-messaging.o
+perf-y += sched-pipe.o
+perf-y += mem-functions.o
+perf-y += futex-hash.o
+perf-y += futex-wake.o
+perf-y += futex-wake-parallel.o
+perf-y += futex-requeue.o
+perf-y += futex-lock-pi.o
+
+perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
+perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
+perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
+
+perf-$(CONFIG_NUMA) += numa.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
new file mode 100644
index 000000000..b3e418afc
--- /dev/null
+++ b/tools/perf/bench/bench.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BENCH_H
+#define BENCH_H
+
+#include <sys/time.h>
+
+extern struct timeval bench__start, bench__end, bench__runtime;
+
+/*
+ * The madvise transparent hugepage constants were added in glibc
+ * 2.13. For compatibility with older versions of glibc, define these
+ * tokens if they are not already defined.
+ *
+ * PA-RISC uses different madvise values from other architectures and
+ * needs to be special-cased.
+ */
+#ifdef __hppa__
+# ifndef MADV_HUGEPAGE
+# define MADV_HUGEPAGE 67
+# endif
+# ifndef MADV_NOHUGEPAGE
+# define MADV_NOHUGEPAGE 68
+# endif
+#else
+# ifndef MADV_HUGEPAGE
+# define MADV_HUGEPAGE 14
+# endif
+# ifndef MADV_NOHUGEPAGE
+# define MADV_NOHUGEPAGE 15
+# endif
+#endif
+
+int bench_numa(int argc, const char **argv);
+int bench_sched_messaging(int argc, const char **argv);
+int bench_sched_pipe(int argc, const char **argv);
+int bench_mem_memcpy(int argc, const char **argv);
+int bench_mem_memset(int argc, const char **argv);
+int bench_futex_hash(int argc, const char **argv);
+int bench_futex_wake(int argc, const char **argv);
+int bench_futex_wake_parallel(int argc, const char **argv);
+int bench_futex_requeue(int argc, const char **argv);
+/* pi futexes */
+int bench_futex_lock_pi(int argc, const char **argv);
+
+#define BENCH_FORMAT_DEFAULT_STR "default"
+#define BENCH_FORMAT_DEFAULT 0
+#define BENCH_FORMAT_SIMPLE_STR "simple"
+#define BENCH_FORMAT_SIMPLE 1
+
+#define BENCH_FORMAT_UNKNOWN -1
+
+extern int bench_format;
+extern unsigned int bench_repeat;
+
+#endif
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
new file mode 100644
index 000000000..ee9b28065
--- /dev/null
+++ b/tools/perf/bench/futex-hash.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing.
+ *
+ * This program is particularly useful for measuring the kernel's futex hash
+ * table/function implementation. In order for it to make sense, use with as
+ * many threads and futexes as possible.
+ */
+
+/* For the CLR_() macros */
+#include <string.h>
+#include <pthread.h>
+
+#include <errno.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <sys/time.h>
+
+#include "../util/stat.h"
+#include <subcmd/parse-options.h>
+#include "bench.h"
+#include "futex.h"
+#include "cpumap.h"
+
+#include <err.h>
+
+static unsigned int nthreads = 0;
+static unsigned int nsecs = 10;
+/* amount of futexes per thread */
+static unsigned int nfutexes = 1024;
+static bool fshared = false, done = false, silent = false;
+static int futex_flag = 0;
+
+struct timeval bench__start, bench__end, bench__runtime;
+static pthread_mutex_t thread_lock;
+static unsigned int threads_starting;
+static struct stats throughput_stats;
+static pthread_cond_t thread_parent, thread_worker;
+
+struct worker {
+ int tid;
+ u_int32_t *futex;
+ pthread_t thread;
+ unsigned long ops;
+};
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
+ OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
+ OPT_END()
+};
+
+static const char * const bench_futex_hash_usage[] = {
+ "perf bench futex hash <options>",
+ NULL
+};
+
+static void *workerfn(void *arg)
+{
+ int ret;
+ struct worker *w = (struct worker *) arg;
+ unsigned int i;
+ unsigned long ops = w->ops; /* avoid cacheline bouncing */
+
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ do {
+ for (i = 0; i < nfutexes; i++, ops++) {
+ /*
+ * We want the futex calls to fail in order to stress
+ * the hashing of uaddr and not measure other steps,
+ * such as internal waitqueue handling, thus enlarging
+ * the critical region protected by hb->lock.
+ */
+ ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag);
+ if (!silent &&
+ (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
+ warn("Non-expected futex return call");
+ }
+ } while (!done);
+
+ w->ops = ops;
+ return NULL;
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ /* inform all threads that we're done for the day */
+ done = true;
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
+}
+
+static void print_summary(void)
+{
+ unsigned long avg = avg_stats(&throughput_stats);
+ double stddev = stddev_stats(&throughput_stats);
+
+ printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
+ !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
+ (int)bench__runtime.tv_sec);
+}
+
+int bench_futex_hash(int argc, const char **argv)
+{
+ int ret = 0;
+ cpu_set_t cpuset;
+ struct sigaction act;
+ unsigned int i;
+ pthread_attr_t thread_attr;
+ struct worker *worker = NULL;
+ struct cpu_map *cpu;
+
+ argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
+ if (argc) {
+ usage_with_options(bench_futex_hash_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ goto errmem;
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads) /* default to the number of CPUs */
+ nthreads = cpu->nr;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ goto errmem;
+
+ if (!fshared)
+ futex_flag = FUTEX_PRIVATE_FLAG;
+
+ printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
+ getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
+
+ init_stats(&throughput_stats);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ threads_starting = nthreads;
+ pthread_attr_init(&thread_attr);
+ gettimeofday(&bench__start, NULL);
+ for (i = 0; i < nthreads; i++) {
+ worker[i].tid = i;
+ worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
+ if (!worker[i].futex)
+ goto errmem;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+
+ ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
+ (void *)(struct worker *) &worker[i]);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_create");
+
+ }
+ pthread_attr_destroy(&thread_attr);
+
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ sleep(nsecs);
+ toggle_done(0, NULL, NULL);
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i].thread, NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+
+ for (i = 0; i < nthreads; i++) {
+ unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+ update_stats(&throughput_stats, t);
+ if (!silent) {
+ if (nfutexes == 1)
+ printf("[thread %2d] futex: %p [ %ld ops/sec ]\n",
+ worker[i].tid, &worker[i].futex[0], t);
+ else
+ printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n",
+ worker[i].tid, &worker[i].futex[0],
+ &worker[i].futex[nfutexes-1], t);
+ }
+
+ free(worker[i].futex);
+ }
+
+ print_summary();
+
+ free(worker);
+ free(cpu);
+ return ret;
+errmem:
+ err(EXIT_FAILURE, "calloc");
+}
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
new file mode 100644
index 000000000..017609ae3
--- /dev/null
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015 Davidlohr Bueso.
+ */
+
+/* For the CLR_() macros */
+#include <string.h>
+#include <pthread.h>
+
+#include <signal.h>
+#include "../util/stat.h"
+#include <subcmd/parse-options.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <errno.h>
+#include "bench.h"
+#include "futex.h"
+#include "cpumap.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+struct worker {
+ int tid;
+ u_int32_t *futex;
+ pthread_t thread;
+ unsigned long ops;
+};
+
+static u_int32_t global_futex = 0;
+static struct worker *worker;
+static unsigned int nsecs = 10;
+static bool silent = false, multi = false;
+static bool done = false, fshared = false;
+static unsigned int nthreads = 0;
+static int futex_flag = 0;
+static pthread_mutex_t thread_lock;
+static unsigned int threads_starting;
+static struct stats throughput_stats;
+static pthread_cond_t thread_parent, thread_worker;
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
+ OPT_BOOLEAN( 'M', "multi", &multi, "Use multiple futexes"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
+ OPT_END()
+};
+
+static const char * const bench_futex_lock_pi_usage[] = {
+ "perf bench futex lock-pi <options>",
+ NULL
+};
+
+static void print_summary(void)
+{
+ unsigned long avg = avg_stats(&throughput_stats);
+ double stddev = stddev_stats(&throughput_stats);
+
+ printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
+ !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
+ (int)bench__runtime.tv_sec);
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ /* inform all threads that we're done for the day */
+ done = true;
+ gettimeofday(&bench__end, NULL);
+ timersub(&bench__end, &bench__start, &bench__runtime);
+}
+
+static void *workerfn(void *arg)
+{
+ struct worker *w = (struct worker *) arg;
+ unsigned long ops = w->ops;
+
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ do {
+ int ret;
+ again:
+ ret = futex_lock_pi(w->futex, NULL, futex_flag);
+
+ if (ret) { /* handle lock acquisition */
+ if (!silent)
+ warn("thread %d: Could not lock pi-lock for %p (%d)",
+ w->tid, w->futex, ret);
+ if (done)
+ break;
+
+ goto again;
+ }
+
+ usleep(1);
+ ret = futex_unlock_pi(w->futex, futex_flag);
+ if (ret && !silent)
+ warn("thread %d: Could not unlock pi-lock for %p (%d)",
+ w->tid, w->futex, ret);
+ ops++; /* account for thread's share of work */
+ } while (!done);
+
+ w->ops = ops;
+ return NULL;
+}
+
+static void create_threads(struct worker *w, pthread_attr_t thread_attr,
+ struct cpu_map *cpu)
+{
+ cpu_set_t cpuset;
+ unsigned int i;
+
+ threads_starting = nthreads;
+
+ for (i = 0; i < nthreads; i++) {
+ worker[i].tid = i;
+
+ if (multi) {
+ worker[i].futex = calloc(1, sizeof(u_int32_t));
+ if (!worker[i].futex)
+ err(EXIT_FAILURE, "calloc");
+ } else
+ worker[i].futex = &global_futex;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+}
+
+int bench_futex_lock_pi(int argc, const char **argv)
+{
+ int ret = 0;
+ unsigned int i;
+ struct sigaction act;
+ pthread_attr_t thread_attr;
+ struct cpu_map *cpu;
+
+ argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
+ if (argc)
+ goto err;
+
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ err(EXIT_FAILURE, "calloc");
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads)
+ nthreads = cpu->nr;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ err(EXIT_FAILURE, "calloc");
+
+ if (!fshared)
+ futex_flag = FUTEX_PRIVATE_FLAG;
+
+ printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n",
+ getpid(), nthreads, nsecs);
+
+ init_stats(&throughput_stats);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ threads_starting = nthreads;
+ pthread_attr_init(&thread_attr);
+ gettimeofday(&bench__start, NULL);
+
+ create_threads(worker, thread_attr, cpu);
+ pthread_attr_destroy(&thread_attr);
+
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ sleep(nsecs);
+ toggle_done(0, NULL, NULL);
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i].thread, NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+
+ for (i = 0; i < nthreads; i++) {
+ unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+
+ update_stats(&throughput_stats, t);
+ if (!silent)
+ printf("[thread %3d] futex: %p [ %ld ops/sec ]\n",
+ worker[i].tid, worker[i].futex, t);
+
+ if (multi)
+ free(worker[i].futex);
+ }
+
+ print_summary();
+
+ free(worker);
+ return ret;
+err:
+ usage_with_options(bench_futex_lock_pi_usage, options);
+ exit(EXIT_FAILURE);
+}
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
new file mode 100644
index 000000000..fc692efa0
--- /dev/null
+++ b/tools/perf/bench/futex-requeue.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-requeue: Block a bunch of threads on futex1 and requeue them
+ * on futex2, N at a time.
+ *
+ * This program is particularly useful to measure the latency of nthread
+ * requeues without waking up any tasks -- thus mimicking a regular futex_wait.
+ */
+
+/* For the CLR_() macros */
+#include <string.h>
+#include <pthread.h>
+
+#include <signal.h>
+#include "../util/stat.h"
+#include <subcmd/parse-options.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <errno.h>
+#include "bench.h"
+#include "futex.h"
+#include "cpumap.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+static u_int32_t futex1 = 0, futex2 = 0;
+
+/*
+ * How many tasks to requeue at a time.
+ * Default to 1 in order to make the kernel work more.
+ */
+static unsigned int nrequeue = 1;
+
+static pthread_t *worker;
+static bool done = false, silent = false, fshared = false;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats requeuetime_stats, requeued_stats;
+static unsigned int threads_starting, nthreads = 0;
+static int futex_flag = 0;
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
+ OPT_END()
+};
+
+static const char * const bench_futex_requeue_usage[] = {
+ "perf bench futex requeue <options>",
+ NULL
+};
+
+static void print_summary(void)
+{
+ double requeuetime_avg = avg_stats(&requeuetime_stats);
+ double requeuetime_stddev = stddev_stats(&requeuetime_stats);
+ unsigned int requeued_avg = avg_stats(&requeued_stats);
+
+ printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
+ requeued_avg,
+ nthreads,
+ requeuetime_avg / USEC_PER_MSEC,
+ rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
+}
+
+static void *workerfn(void *arg __maybe_unused)
+{
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ futex_wait(&futex1, 0, NULL, futex_flag);
+ return NULL;
+}
+
+static void block_threads(pthread_t *w,
+ pthread_attr_t thread_attr, struct cpu_map *cpu)
+{
+ cpu_set_t cpuset;
+ unsigned int i;
+
+ threads_starting = nthreads;
+
+ /* create and block all threads */
+ for (i = 0; i < nthreads; i++) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ done = true;
+}
+
+int bench_futex_requeue(int argc, const char **argv)
+{
+ int ret = 0;
+ unsigned int i, j;
+ struct sigaction act;
+ pthread_attr_t thread_attr;
+ struct cpu_map *cpu;
+
+ argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
+ if (argc)
+ goto err;
+
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ err(EXIT_FAILURE, "cpu_map__new");
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads)
+ nthreads = cpu->nr;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ err(EXIT_FAILURE, "calloc");
+
+ if (!fshared)
+ futex_flag = FUTEX_PRIVATE_FLAG;
+
+ if (nrequeue > nthreads)
+ nrequeue = nthreads;
+
+ printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
+ "%d at a time.\n\n", getpid(), nthreads,
+ fshared ? "shared":"private", &futex1, &futex2, nrequeue);
+
+ init_stats(&requeued_stats);
+ init_stats(&requeuetime_stats);
+ pthread_attr_init(&thread_attr);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ for (j = 0; j < bench_repeat && !done; j++) {
+ unsigned int nrequeued = 0;
+ struct timeval start, end, runtime;
+
+ /* create, launch & block all threads */
+ block_threads(worker, thread_attr, cpu);
+
+ /* make sure all threads are already blocked */
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ usleep(100000);
+
+ /* Ok, all threads are patiently blocked, start requeueing */
+ gettimeofday(&start, NULL);
+ while (nrequeued < nthreads) {
+ /*
+ * Do not wakeup any tasks blocked on futex1, allowing
+ * us to really measure futex_wait functionality.
+ */
+ nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0,
+ nrequeue, futex_flag);
+ }
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &runtime);
+
+ update_stats(&requeued_stats, nrequeued);
+ update_stats(&requeuetime_stats, runtime.tv_usec);
+
+ if (!silent) {
+ printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
+ j + 1, nrequeued, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
+ }
+
+ /* everybody should be blocked on futex2, wake'em up */
+ nrequeued = futex_wake(&futex2, nrequeued, futex_flag);
+ if (nthreads != nrequeued)
+ warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i], NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+ pthread_attr_destroy(&thread_attr);
+
+ print_summary();
+
+ free(worker);
+ return ret;
+err:
+ usage_with_options(bench_futex_requeue_usage, options);
+ exit(EXIT_FAILURE);
+}
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
new file mode 100644
index 000000000..69d8fdc87
--- /dev/null
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015 Davidlohr Bueso.
+ *
+ * Block a bunch of threads and let parallel waker threads wakeup an
+ * equal amount of them. The program output reflects the avg latency
+ * for each individual thread to service its share of work. Ultimately
+ * it can be used to measure futex_wake() changes.
+ */
+#include "bench.h"
+#include <linux/compiler.h>
+#include "../util/debug.h"
+
+#ifndef HAVE_PTHREAD_BARRIER
+int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused)
+{
+ pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__);
+ return 0;
+}
+#else /* HAVE_PTHREAD_BARRIER */
+/* For the CLR_() macros */
+#include <string.h>
+#include <pthread.h>
+
+#include <signal.h>
+#include "../util/stat.h"
+#include <subcmd/parse-options.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <errno.h>
+#include "futex.h"
+#include "cpumap.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+struct thread_data {
+ pthread_t worker;
+ unsigned int nwoken;
+ struct timeval runtime;
+};
+
+static unsigned int nwakes = 1;
+
+/* all threads will block on the same futex -- hash bucket chaos ;) */
+static u_int32_t futex = 0;
+
+static pthread_t *blocked_worker;
+static bool done = false, silent = false, fshared = false;
+static unsigned int nblocked_threads = 0, nwaking_threads = 0;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static pthread_barrier_t barrier;
+static struct stats waketime_stats, wakeup_stats;
+static unsigned int threads_starting;
+static int futex_flag = 0;
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"),
+ OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
+ OPT_END()
+};
+
+static const char * const bench_futex_wake_parallel_usage[] = {
+ "perf bench futex wake-parallel <options>",
+ NULL
+};
+
+static void *waking_workerfn(void *arg)
+{
+ struct thread_data *waker = (struct thread_data *) arg;
+ struct timeval start, end;
+
+ pthread_barrier_wait(&barrier);
+
+ gettimeofday(&start, NULL);
+
+ waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
+ if (waker->nwoken != nwakes)
+ warnx("couldn't wakeup all tasks (%d/%d)",
+ waker->nwoken, nwakes);
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &waker->runtime);
+
+ pthread_exit(NULL);
+ return NULL;
+}
+
+static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
+{
+ unsigned int i;
+
+ pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
+
+ pthread_barrier_init(&barrier, NULL, nwaking_threads + 1);
+
+ /* create and block all threads */
+ for (i = 0; i < nwaking_threads; i++) {
+ /*
+ * Thread creation order will impact per-thread latency
+ * as it will affect the order to acquire the hb spinlock.
+ * For now let the scheduler decide.
+ */
+ if (pthread_create(&td[i].worker, &thread_attr,
+ waking_workerfn, (void *)&td[i]))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+
+ pthread_barrier_wait(&barrier);
+
+ for (i = 0; i < nwaking_threads; i++)
+ if (pthread_join(td[i].worker, NULL))
+ err(EXIT_FAILURE, "pthread_join");
+
+ pthread_barrier_destroy(&barrier);
+}
+
+static void *blocked_workerfn(void *arg __maybe_unused)
+{
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ while (1) { /* handle spurious wakeups */
+ if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
+ break;
+ }
+
+ pthread_exit(NULL);
+ return NULL;
+}
+
+static void block_threads(pthread_t *w, pthread_attr_t thread_attr,
+ struct cpu_map *cpu)
+{
+ cpu_set_t cpuset;
+ unsigned int i;
+
+ threads_starting = nblocked_threads;
+
+ /* create and block all threads */
+ for (i = 0; i < nblocked_threads; i++) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+}
+
+static void print_run(struct thread_data *waking_worker, unsigned int run_num)
+{
+ unsigned int i, wakeup_avg;
+ double waketime_avg, waketime_stddev;
+ struct stats __waketime_stats, __wakeup_stats;
+
+ init_stats(&__wakeup_stats);
+ init_stats(&__waketime_stats);
+
+ for (i = 0; i < nwaking_threads; i++) {
+ update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);
+ update_stats(&__wakeup_stats, waking_worker[i].nwoken);
+ }
+
+ waketime_avg = avg_stats(&__waketime_stats);
+ waketime_stddev = stddev_stats(&__waketime_stats);
+ wakeup_avg = avg_stats(&__wakeup_stats);
+
+ printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
+ "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
+ nblocked_threads, waketime_avg / USEC_PER_MSEC,
+ rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+static void print_summary(void)
+{
+ unsigned int wakeup_avg;
+ double waketime_avg, waketime_stddev;
+
+ waketime_avg = avg_stats(&waketime_stats);
+ waketime_stddev = stddev_stats(&waketime_stats);
+ wakeup_avg = avg_stats(&wakeup_stats);
+
+ printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
+ wakeup_avg,
+ nblocked_threads,
+ waketime_avg / USEC_PER_MSEC,
+ rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+
+static void do_run_stats(struct thread_data *waking_worker)
+{
+ unsigned int i;
+
+ for (i = 0; i < nwaking_threads; i++) {
+ update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);
+ update_stats(&wakeup_stats, waking_worker[i].nwoken);
+ }
+
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ done = true;
+}
+
+int bench_futex_wake_parallel(int argc, const char **argv)
+{
+ int ret = 0;
+ unsigned int i, j;
+ struct sigaction act;
+ pthread_attr_t thread_attr;
+ struct thread_data *waking_worker;
+ struct cpu_map *cpu;
+
+ argc = parse_options(argc, argv, options,
+ bench_futex_wake_parallel_usage, 0);
+ if (argc) {
+ usage_with_options(bench_futex_wake_parallel_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ err(EXIT_FAILURE, "calloc");
+
+ if (!nblocked_threads)
+ nblocked_threads = cpu->nr;
+
+ /* some sanity checks */
+ if (nwaking_threads > nblocked_threads || !nwaking_threads)
+ nwaking_threads = nblocked_threads;
+
+ if (nblocked_threads % nwaking_threads)
+ errx(EXIT_FAILURE, "Must be perfectly divisible");
+ /*
+ * Each thread will wakeup nwakes tasks in
+ * a single futex_wait call.
+ */
+ nwakes = nblocked_threads/nwaking_threads;
+
+ blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker));
+ if (!blocked_worker)
+ err(EXIT_FAILURE, "calloc");
+
+ if (!fshared)
+ futex_flag = FUTEX_PRIVATE_FLAG;
+
+ printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
+ "futex %p), %d threads waking up %d at a time.\n\n",
+ getpid(), nblocked_threads, fshared ? "shared":"private",
+ &futex, nwaking_threads, nwakes);
+
+ init_stats(&wakeup_stats);
+ init_stats(&waketime_stats);
+
+ pthread_attr_init(&thread_attr);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ for (j = 0; j < bench_repeat && !done; j++) {
+ waking_worker = calloc(nwaking_threads, sizeof(*waking_worker));
+ if (!waking_worker)
+ err(EXIT_FAILURE, "calloc");
+
+ /* create, launch & block all threads */
+ block_threads(blocked_worker, thread_attr, cpu);
+
+ /* make sure all threads are already blocked */
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ usleep(100000);
+
+ /* Ok, all threads are patiently blocked, start waking folks up */
+ wakeup_threads(waking_worker, thread_attr);
+
+ for (i = 0; i < nblocked_threads; i++) {
+ ret = pthread_join(blocked_worker[i], NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ do_run_stats(waking_worker);
+ if (!silent)
+ print_run(waking_worker, j);
+
+ free(waking_worker);
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+ pthread_attr_destroy(&thread_attr);
+
+ print_summary();
+
+ free(blocked_worker);
+ return ret;
+}
+#endif /* HAVE_PTHREAD_BARRIER */
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
new file mode 100644
index 000000000..e62006342
--- /dev/null
+++ b/tools/perf/bench/futex-wake.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-wake: Block a bunch of threads on a futex and wake'em up, N at a time.
+ *
+ * This program is particularly useful to measure the latency of nthread wakeups
+ * in non-error situations: all waiters are queued and all wake calls wakeup
+ * one or more tasks, and thus the waitqueue is never empty.
+ */
+
+/* For the CLR_() macros */
+#include <string.h>
+#include <pthread.h>
+
+#include <signal.h>
+#include "../util/stat.h"
+#include <subcmd/parse-options.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <errno.h>
+#include "bench.h"
+#include "futex.h"
+#include "cpumap.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+/* all threads will block on the same futex */
+static u_int32_t futex1 = 0;
+
+/*
+ * How many wakeups to do at a time.
+ * Default to 1 in order to make the kernel work more.
+ */
+static unsigned int nwakes = 1;
+
+pthread_t *worker;
+static bool done = false, silent = false, fshared = false;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats waketime_stats, wakeup_stats;
+static unsigned int threads_starting, nthreads = 0;
+static int futex_flag = 0;
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
+ OPT_END()
+};
+
+static const char * const bench_futex_wake_usage[] = {
+ "perf bench futex wake <options>",
+ NULL
+};
+
+static void *workerfn(void *arg __maybe_unused)
+{
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ while (1) {
+ if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR)
+ break;
+ }
+
+ pthread_exit(NULL);
+ return NULL;
+}
+
+static void print_summary(void)
+{
+ double waketime_avg = avg_stats(&waketime_stats);
+ double waketime_stddev = stddev_stats(&waketime_stats);
+ unsigned int wakeup_avg = avg_stats(&wakeup_stats);
+
+ printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
+ wakeup_avg,
+ nthreads,
+ waketime_avg / USEC_PER_MSEC,
+ rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+static void block_threads(pthread_t *w,
+ pthread_attr_t thread_attr, struct cpu_map *cpu)
+{
+ cpu_set_t cpuset;
+ unsigned int i;
+
+ threads_starting = nthreads;
+
+ /* create and block all threads */
+ for (i = 0; i < nthreads; i++) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu->map[i % cpu->nr], &cpuset);
+
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ done = true;
+}
+
+int bench_futex_wake(int argc, const char **argv)
+{
+ int ret = 0;
+ unsigned int i, j;
+ struct sigaction act;
+ pthread_attr_t thread_attr;
+ struct cpu_map *cpu;
+
+ argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
+ if (argc) {
+ usage_with_options(bench_futex_wake_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ cpu = cpu_map__new(NULL);
+ if (!cpu)
+ err(EXIT_FAILURE, "calloc");
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads)
+ nthreads = cpu->nr;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ err(EXIT_FAILURE, "calloc");
+
+ if (!fshared)
+ futex_flag = FUTEX_PRIVATE_FLAG;
+
+ printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), "
+ "waking up %d at a time.\n\n",
+ getpid(), nthreads, fshared ? "shared":"private", &futex1, nwakes);
+
+ init_stats(&wakeup_stats);
+ init_stats(&waketime_stats);
+ pthread_attr_init(&thread_attr);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ for (j = 0; j < bench_repeat && !done; j++) {
+ unsigned int nwoken = 0;
+ struct timeval start, end, runtime;
+
+ /* create, launch & block all threads */
+ block_threads(worker, thread_attr, cpu);
+
+ /* make sure all threads are already blocked */
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ usleep(100000);
+
+ /* Ok, all threads are patiently blocked, start waking folks up */
+ gettimeofday(&start, NULL);
+ while (nwoken != nthreads)
+ nwoken += futex_wake(&futex1, nwakes, futex_flag);
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &runtime);
+
+ update_stats(&wakeup_stats, nwoken);
+ update_stats(&waketime_stats, runtime.tv_usec);
+
+ if (!silent) {
+ printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
+ j + 1, nwoken, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
+ }
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i], NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+ pthread_attr_destroy(&thread_attr);
+
+ print_summary();
+
+ free(worker);
+ return ret;
+}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
new file mode 100644
index 000000000..db4853f20
--- /dev/null
+++ b/tools/perf/bench/futex.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Glibc independent futex library for testing kernel functionality.
+ * Shamelessly stolen from Darren Hart <dvhltc@us.ibm.com>
+ * http://git.kernel.org/cgit/linux/kernel/git/dvhart/futextest.git/
+ */
+
+#ifndef _FUTEX_H
+#define _FUTEX_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/futex.h>
+
+/**
+ * futex() - SYS_futex syscall wrapper
+ * @uaddr: address of first futex
+ * @op: futex op code
+ * @val: typically expected value of uaddr, but varies by op
+ * @timeout: typically an absolute struct timespec (except where noted
+ * otherwise). Overloaded by some ops
+ * @uaddr2: address of second futex for some ops\
+ * @val3: varies by op
+ * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
+ *
+ * futex() is used by all the following futex op wrappers. It can also be
+ * used for misuse and abuse testing. Generally, the specific op wrappers
+ * should be used instead. It is a macro instead of an static inline function as
+ * some of the types over overloaded (timeout is used for nr_requeue for
+ * example).
+ *
+ * These argument descriptions are the defaults for all
+ * like-named arguments in the following wrappers except where noted below.
+ */
+#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \
+ syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3)
+
+/**
+ * futex_wait() - block on uaddr with optional timeout
+ * @timeout: relative timeout
+ */
+static inline int
+futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake() - wake one or more tasks blocked on uaddr
+ * @nr_wake: wake up to this many tasks
+ */
+static inline int
+futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_lock_pi() - block on uaddr as a PI mutex
+ */
+static inline int
+futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int opflags)
+{
+ return futex(uaddr, FUTEX_LOCK_PI, 0, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter
+ */
+static inline int
+futex_unlock_pi(u_int32_t *uaddr, int opflags)
+{
+ return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
+}
+
+/**
+* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
+* @nr_wake: wake up to this many tasks
+* @nr_requeue: requeue up to this many tasks
+*/
+static inline int
+futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake,
+ int nr_requeue, int opflags)
+{
+ return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
+ val, opflags);
+}
+
+#ifndef HAVE_PTHREAD_ATTR_SETAFFINITY_NP
+#include <pthread.h>
+#include <linux/compiler.h>
+static inline int pthread_attr_setaffinity_np(pthread_attr_t *attr __maybe_unused,
+ size_t cpusetsize __maybe_unused,
+ cpu_set_t *cpuset __maybe_unused)
+{
+ return 0;
+}
+#endif
+
+#endif /* _FUTEX_H */
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
new file mode 100644
index 000000000..4864fc67d
--- /dev/null
+++ b/tools/perf/bench/mem-functions.c
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * mem-memcpy.c
+ *
+ * Simple memcpy() and memset() benchmarks
+ *
+ * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ */
+
+#include "debug.h"
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../util/header.h"
+#include "../util/cloexec.h"
+#include "../util/string2.h"
+#include "bench.h"
+#include "mem-memcpy-arch.h"
+#include "mem-memset-arch.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+#include <linux/time64.h>
+
+#define K 1024
+
+static const char *size_str = "1MB";
+static const char *function_str = "all";
+static int nr_loops = 1;
+static bool use_cycles;
+static int cycles_fd;
+
+static const struct option options[] = {
+ OPT_STRING('s', "size", &size_str, "1MB",
+ "Specify the size of the memory buffers. "
+ "Available units: B, KB, MB, GB and TB (case insensitive)"),
+
+ OPT_STRING('f', "function", &function_str, "all",
+ "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
+
+ OPT_INTEGER('l', "nr_loops", &nr_loops,
+ "Specify the number of loops to run. (default: 1)"),
+
+ OPT_BOOLEAN('c', "cycles", &use_cycles,
+ "Use a cycles event instead of gettimeofday() to measure performance"),
+
+ OPT_END()
+};
+
+typedef void *(*memcpy_t)(void *, const void *, size_t);
+typedef void *(*memset_t)(void *, int, size_t);
+
+struct function {
+ const char *name;
+ const char *desc;
+ union {
+ memcpy_t memcpy;
+ memset_t memset;
+ } fn;
+};
+
+static struct perf_event_attr cycle_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static int init_cycles(void)
+{
+ cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
+
+ if (cycles_fd < 0 && errno == ENOSYS) {
+ pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+ return -1;
+ }
+
+ return cycles_fd;
+}
+
+static u64 get_cycles(void)
+{
+ int ret;
+ u64 clk;
+
+ ret = read(cycles_fd, &clk, sizeof(u64));
+ BUG_ON(ret != sizeof(u64));
+
+ return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+ return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
+}
+
+#define print_bps(x) do { \
+ if (x < K) \
+ printf(" %14lf bytes/sec\n", x); \
+ else if (x < K * K) \
+ printf(" %14lfd KB/sec\n", x / K); \
+ else if (x < K * K * K) \
+ printf(" %14lf MB/sec\n", x / K / K); \
+ else \
+ printf(" %14lf GB/sec\n", x / K / K / K); \
+ } while (0)
+
+struct bench_mem_info {
+ const struct function *functions;
+ u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
+ double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
+ const char *const *usage;
+ bool alloc_src;
+};
+
+static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
+{
+ const struct function *r = &info->functions[r_idx];
+ double result_bps = 0.0;
+ u64 result_cycles = 0;
+ void *src = NULL, *dst = zalloc(size);
+
+ printf("# function '%s' (%s)\n", r->name, r->desc);
+
+ if (dst == NULL)
+ goto out_alloc_failed;
+
+ if (info->alloc_src) {
+ src = zalloc(size);
+ if (src == NULL)
+ goto out_alloc_failed;
+ }
+
+ if (bench_format == BENCH_FORMAT_DEFAULT)
+ printf("# Copying %s bytes ...\n\n", size_str);
+
+ if (use_cycles) {
+ result_cycles = info->do_cycles(r, size, src, dst);
+ } else {
+ result_bps = info->do_gettimeofday(r, size, src, dst);
+ }
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ if (use_cycles) {
+ printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
+ } else {
+ print_bps(result_bps);
+ }
+ break;
+
+ case BENCH_FORMAT_SIMPLE:
+ if (use_cycles) {
+ printf("%lf\n", (double)result_cycles/size_total);
+ } else {
+ printf("%lf\n", result_bps);
+ }
+ break;
+
+ default:
+ BUG_ON(1);
+ break;
+ }
+
+out_free:
+ free(src);
+ free(dst);
+ return;
+out_alloc_failed:
+ printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
+ goto out_free;
+}
+
+static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
+{
+ int i;
+ size_t size;
+ double size_total;
+
+ argc = parse_options(argc, argv, options, info->usage, 0);
+
+ if (use_cycles) {
+ i = init_cycles();
+ if (i < 0) {
+ fprintf(stderr, "Failed to open cycles counter\n");
+ return i;
+ }
+ }
+
+ size = (size_t)perf_atoll((char *)size_str);
+ size_total = (double)size * nr_loops;
+
+ if ((s64)size <= 0) {
+ fprintf(stderr, "Invalid size:%s\n", size_str);
+ return 1;
+ }
+
+ if (!strncmp(function_str, "all", 3)) {
+ for (i = 0; info->functions[i].name; i++)
+ __bench_mem_function(info, i, size, size_total);
+ return 0;
+ }
+
+ for (i = 0; info->functions[i].name; i++) {
+ if (!strcmp(info->functions[i].name, function_str))
+ break;
+ }
+ if (!info->functions[i].name) {
+ if (strcmp(function_str, "help") && strcmp(function_str, "h"))
+ printf("Unknown function: %s\n", function_str);
+ printf("Available functions:\n");
+ for (i = 0; info->functions[i].name; i++) {
+ printf("\t%s ... %s\n",
+ info->functions[i].name, info->functions[i].desc);
+ }
+ return 1;
+ }
+
+ __bench_mem_function(info, i, size, size_total);
+
+ return 0;
+}
+
+static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
+{
+ /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
+ memset(src, 0, size);
+
+ /*
+ * We prefault the freshly allocated memory range here,
+ * to not measure page fault overhead:
+ */
+ fn(dst, src, size);
+}
+
+static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+{
+ u64 cycle_start = 0ULL, cycle_end = 0ULL;
+ memcpy_t fn = r->fn.memcpy;
+ int i;
+
+ memcpy_prefault(fn, size, src, dst);
+
+ cycle_start = get_cycles();
+ for (i = 0; i < nr_loops; ++i)
+ fn(dst, src, size);
+ cycle_end = get_cycles();
+
+ return cycle_end - cycle_start;
+}
+
+static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
+{
+ struct timeval tv_start, tv_end, tv_diff;
+ memcpy_t fn = r->fn.memcpy;
+ int i;
+
+ memcpy_prefault(fn, size, src, dst);
+
+ BUG_ON(gettimeofday(&tv_start, NULL));
+ for (i = 0; i < nr_loops; ++i)
+ fn(dst, src, size);
+ BUG_ON(gettimeofday(&tv_end, NULL));
+
+ timersub(&tv_end, &tv_start, &tv_diff);
+
+ return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
+}
+
+struct function memcpy_functions[] = {
+ { .name = "default",
+ .desc = "Default memcpy() provided by glibc",
+ .fn.memcpy = memcpy },
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
+# include "mem-memcpy-x86-64-asm-def.h"
+# undef MEMCPY_FN
+#endif
+
+ { .name = NULL, }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+ "perf bench mem memcpy <options>",
+ NULL
+};
+
+int bench_mem_memcpy(int argc, const char **argv)
+{
+ struct bench_mem_info info = {
+ .functions = memcpy_functions,
+ .do_cycles = do_memcpy_cycles,
+ .do_gettimeofday = do_memcpy_gettimeofday,
+ .usage = bench_mem_memcpy_usage,
+ .alloc_src = true,
+ };
+
+ return bench_mem_common(argc, argv, &info);
+}
+
+static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
+{
+ u64 cycle_start = 0ULL, cycle_end = 0ULL;
+ memset_t fn = r->fn.memset;
+ int i;
+
+ /*
+ * We prefault the freshly allocated memory range here,
+ * to not measure page fault overhead:
+ */
+ fn(dst, -1, size);
+
+ cycle_start = get_cycles();
+ for (i = 0; i < nr_loops; ++i)
+ fn(dst, i, size);
+ cycle_end = get_cycles();
+
+ return cycle_end - cycle_start;
+}
+
+static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
+{
+ struct timeval tv_start, tv_end, tv_diff;
+ memset_t fn = r->fn.memset;
+ int i;
+
+ /*
+ * We prefault the freshly allocated memory range here,
+ * to not measure page fault overhead:
+ */
+ fn(dst, -1, size);
+
+ BUG_ON(gettimeofday(&tv_start, NULL));
+ for (i = 0; i < nr_loops; ++i)
+ fn(dst, i, size);
+ BUG_ON(gettimeofday(&tv_end, NULL));
+
+ timersub(&tv_end, &tv_start, &tv_diff);
+
+ return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
+}
+
+static const char * const bench_mem_memset_usage[] = {
+ "perf bench mem memset <options>",
+ NULL
+};
+
+static const struct function memset_functions[] = {
+ { .name = "default",
+ .desc = "Default memset() provided by glibc",
+ .fn.memset = memset },
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
+# include "mem-memset-x86-64-asm-def.h"
+# undef MEMSET_FN
+#endif
+
+ { .name = NULL, }
+};
+
+int bench_mem_memset(int argc, const char **argv)
+{
+ struct bench_mem_info info = {
+ .functions = memset_functions,
+ .do_cycles = do_memset_cycles,
+ .do_gettimeofday = do_memset_gettimeofday,
+ .usage = bench_mem_memset_usage,
+ };
+
+ return bench_mem_common(argc, argv, &info);
+}
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
new file mode 100644
index 000000000..5bcaec560
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+
+#define MEMCPY_FN(fn, name, desc) \
+ void *fn(void *, const void *, size_t);
+
+#include "mem-memcpy-x86-64-asm-def.h"
+
+#undef MEMCPY_FN
+
+#endif
+
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
new file mode 100644
index 000000000..50ae8bd58
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+MEMCPY_FN(memcpy_orig,
+ "x86-64-unrolled",
+ "unrolled memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(__memcpy,
+ "x86-64-movsq",
+ "movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
+
+MEMCPY_FN(memcpy_erms,
+ "x86-64-movsb",
+ "movsb-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S
new file mode 100644
index 000000000..9ad015a1e
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Various wrappers to make the kernel .S file build in user-space: */
+
+#define memcpy MEMCPY /* don't hide glibc's memcpy() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#define _ASM_EXTABLE_FAULT(x, y)
+#define _ASM_EXTABLE(x, y)
+
+#include "../../arch/x86/lib/memcpy_64.S"
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c
new file mode 100644
index 000000000..4130734dd
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy-x86-64-lib.c
@@ -0,0 +1,24 @@
+/*
+ * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
+ * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
+ * happy.
+ */
+#include <linux/types.h>
+
+unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
+unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
+
+unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
+{
+ for (; len; --len, to++, from++) {
+ /*
+ * Call the assembly routine back directly since
+ * memcpy_mcsafe() may silently fallback to memcpy.
+ */
+ unsigned long rem = __memcpy_mcsafe(to, from, 1);
+
+ if (rem)
+ break;
+ }
+ return len;
+}
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
new file mode 100644
index 000000000..53f454826
--- /dev/null
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+
+#define MEMSET_FN(fn, name, desc) \
+ void *fn(void *, int, size_t);
+
+#include "mem-memset-x86-64-asm-def.h"
+
+#undef MEMSET_FN
+
+#endif
+
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
new file mode 100644
index 000000000..dac6d2b7c
--- /dev/null
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+MEMSET_FN(memset_orig,
+ "x86-64-unrolled",
+ "unrolled memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(__memset,
+ "x86-64-stosq",
+ "movsq-based memset() in arch/x86/lib/memset_64.S")
+
+MEMSET_FN(memset_erms,
+ "x86-64-stosb",
+ "movsb-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S
new file mode 100644
index 000000000..d550bd526
--- /dev/null
+++ b/tools/perf/bench/mem-memset-x86-64-asm.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define memset MEMSET /* don't hide glibc's memset() */
+#define altinstr_replacement text
+#define globl p2align 4; .globl
+#include "../../arch/x86/lib/memset_64.S"
+
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
new file mode 100644
index 000000000..e7fde88a0
--- /dev/null
+++ b/tools/perf/bench/numa.c
@@ -0,0 +1,1844 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * numa.c
+ *
+ * numa: Simulate NUMA-sensitive workload and measure their NUMA performance
+ */
+
+#include <inttypes.h>
+/* For the CLR_() macros */
+#include <pthread.h>
+
+#include "../perf.h"
+#include "../builtin.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../util/cloexec.h"
+
+#include "bench.h"
+
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <assert.h>
+#include <malloc.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+
+#include <numa.h>
+#include <numaif.h>
+
+#ifndef RUSAGE_THREAD
+# define RUSAGE_THREAD 1
+#endif
+
+/*
+ * Regular printout to the terminal, supressed if -q is specified:
+ */
+#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
+
+/*
+ * Debug printf:
+ */
+#undef dprintf
+#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
+
+struct thread_data {
+ int curr_cpu;
+ cpu_set_t bind_cpumask;
+ int bind_node;
+ u8 *process_data;
+ int process_nr;
+ int thread_nr;
+ int task_nr;
+ unsigned int loops_done;
+ u64 val;
+ u64 runtime_ns;
+ u64 system_time_ns;
+ u64 user_time_ns;
+ double speed_gbs;
+ pthread_mutex_t *process_lock;
+};
+
+/* Parameters set by options: */
+
+struct params {
+ /* Startup synchronization: */
+ bool serialize_startup;
+
+ /* Task hierarchy: */
+ int nr_proc;
+ int nr_threads;
+
+ /* Working set sizes: */
+ const char *mb_global_str;
+ const char *mb_proc_str;
+ const char *mb_proc_locked_str;
+ const char *mb_thread_str;
+
+ double mb_global;
+ double mb_proc;
+ double mb_proc_locked;
+ double mb_thread;
+
+ /* Access patterns to the working set: */
+ bool data_reads;
+ bool data_writes;
+ bool data_backwards;
+ bool data_zero_memset;
+ bool data_rand_walk;
+ u32 nr_loops;
+ u32 nr_secs;
+ u32 sleep_usecs;
+
+ /* Working set initialization: */
+ bool init_zero;
+ bool init_random;
+ bool init_cpu0;
+
+ /* Misc options: */
+ int show_details;
+ int run_all;
+ int thp;
+
+ long bytes_global;
+ long bytes_process;
+ long bytes_process_locked;
+ long bytes_thread;
+
+ int nr_tasks;
+ bool show_quiet;
+
+ bool show_convergence;
+ bool measure_convergence;
+
+ int perturb_secs;
+ int nr_cpus;
+ int nr_nodes;
+
+ /* Affinity options -C and -N: */
+ char *cpu_list_str;
+ char *node_list_str;
+};
+
+
+/* Global, read-writable area, accessible to all processes and threads: */
+
+struct global_info {
+ u8 *data;
+
+ pthread_mutex_t startup_mutex;
+ int nr_tasks_started;
+
+ pthread_mutex_t startup_done_mutex;
+
+ pthread_mutex_t start_work_mutex;
+ int nr_tasks_working;
+
+ pthread_mutex_t stop_work_mutex;
+ u64 bytes_done;
+
+ struct thread_data *threads;
+
+ /* Convergence latency measurement: */
+ bool all_converged;
+ bool stop_work;
+
+ int print_once;
+
+ struct params p;
+};
+
+static struct global_info *g = NULL;
+
+static int parse_cpus_opt(const struct option *opt, const char *arg, int unset);
+static int parse_nodes_opt(const struct option *opt, const char *arg, int unset);
+
+struct params p0;
+
+static const struct option options[] = {
+ OPT_INTEGER('p', "nr_proc" , &p0.nr_proc, "number of processes"),
+ OPT_INTEGER('t', "nr_threads" , &p0.nr_threads, "number of threads per process"),
+
+ OPT_STRING('G', "mb_global" , &p0.mb_global_str, "MB", "global memory (MBs)"),
+ OPT_STRING('P', "mb_proc" , &p0.mb_proc_str, "MB", "process memory (MBs)"),
+ OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
+ OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"),
+
+ OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run (default: unlimited)"),
+ OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"),
+ OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"),
+
+ OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via reads (can be mixed with -W)"),
+ OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"),
+ OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"),
+ OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
+ OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk, "access the data with random (32bit LFSR) walk"),
+
+
+ OPT_BOOLEAN('z', "init_zero" , &p0.init_zero, "bzero the initial allocations"),
+ OPT_BOOLEAN('I', "init_random" , &p0.init_random, "randomize the contents of the initial allocations"),
+ OPT_BOOLEAN('0', "init_cpu0" , &p0.init_cpu0, "do the initial allocations on CPU#0"),
+ OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs, "perturb thread 0/0 every X secs, to test convergence stability"),
+
+ OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"),
+ OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"),
+ OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
+ OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, "
+ "convergence is reached when each process (all its threads) is running on a single NUMA node."),
+ OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"),
+ OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"),
+ OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
+
+ /* Special option string parsing callbacks: */
+ OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]",
+ "bind the first N tasks to these specific cpus (the rest is unbound)",
+ parse_cpus_opt),
+ OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]",
+ "bind the first N tasks to these specific memory nodes (the rest is unbound)",
+ parse_nodes_opt),
+ OPT_END()
+};
+
+static const char * const bench_numa_usage[] = {
+ "perf bench numa <options>",
+ NULL
+};
+
+static const char * const numa_usage[] = {
+ "perf bench numa mem [<options>]",
+ NULL
+};
+
+/*
+ * To get number of numa nodes present.
+ */
+static int nr_numa_nodes(void)
+{
+ int i, nr_nodes = 0;
+
+ for (i = 0; i < g->p.nr_nodes; i++) {
+ if (numa_bitmask_isbitset(numa_nodes_ptr, i))
+ nr_nodes++;
+ }
+
+ return nr_nodes;
+}
+
+/*
+ * To check if given numa node is present.
+ */
+static int is_node_present(int node)
+{
+ return numa_bitmask_isbitset(numa_nodes_ptr, node);
+}
+
+/*
+ * To check given numa node has cpus.
+ */
+static bool node_has_cpus(int node)
+{
+ struct bitmask *cpu = numa_allocate_cpumask();
+ unsigned int i;
+
+ if (cpu && !numa_node_to_cpus(node, cpu)) {
+ for (i = 0; i < cpu->size; i++) {
+ if (numa_bitmask_isbitset(cpu, i))
+ return true;
+ }
+ }
+
+ return false; /* lets fall back to nocpus safely */
+}
+
+static cpu_set_t bind_to_cpu(int target_cpu)
+{
+ cpu_set_t orig_mask, mask;
+ int ret;
+
+ ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
+ BUG_ON(ret);
+
+ CPU_ZERO(&mask);
+
+ if (target_cpu == -1) {
+ int cpu;
+
+ for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+ CPU_SET(cpu, &mask);
+ } else {
+ BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
+ CPU_SET(target_cpu, &mask);
+ }
+
+ ret = sched_setaffinity(0, sizeof(mask), &mask);
+ BUG_ON(ret);
+
+ return orig_mask;
+}
+
+static cpu_set_t bind_to_node(int target_node)
+{
+ int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
+ cpu_set_t orig_mask, mask;
+ int cpu;
+ int ret;
+
+ BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
+ BUG_ON(!cpus_per_node);
+
+ ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
+ BUG_ON(ret);
+
+ CPU_ZERO(&mask);
+
+ if (target_node == -1) {
+ for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+ CPU_SET(cpu, &mask);
+ } else {
+ int cpu_start = (target_node + 0) * cpus_per_node;
+ int cpu_stop = (target_node + 1) * cpus_per_node;
+
+ BUG_ON(cpu_stop > g->p.nr_cpus);
+
+ for (cpu = cpu_start; cpu < cpu_stop; cpu++)
+ CPU_SET(cpu, &mask);
+ }
+
+ ret = sched_setaffinity(0, sizeof(mask), &mask);
+ BUG_ON(ret);
+
+ return orig_mask;
+}
+
+static void bind_to_cpumask(cpu_set_t mask)
+{
+ int ret;
+
+ ret = sched_setaffinity(0, sizeof(mask), &mask);
+ BUG_ON(ret);
+}
+
+static void mempol_restore(void)
+{
+ int ret;
+
+ ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1);
+
+ BUG_ON(ret);
+}
+
+static void bind_to_memnode(int node)
+{
+ unsigned long nodemask;
+ int ret;
+
+ if (node == -1)
+ return;
+
+ BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
+ nodemask = 1L << node;
+
+ ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
+ dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
+
+ BUG_ON(ret);
+}
+
+#define HPSIZE (2*1024*1024)
+
+#define set_taskname(fmt...) \
+do { \
+ char name[20]; \
+ \
+ snprintf(name, 20, fmt); \
+ prctl(PR_SET_NAME, name); \
+} while (0)
+
+static u8 *alloc_data(ssize_t bytes0, int map_flags,
+ int init_zero, int init_cpu0, int thp, int init_random)
+{
+ cpu_set_t orig_mask;
+ ssize_t bytes;
+ u8 *buf;
+ int ret;
+
+ if (!bytes0)
+ return NULL;
+
+ /* Allocate and initialize all memory on CPU#0: */
+ if (init_cpu0) {
+ int node = numa_node_of_cpu(0);
+
+ orig_mask = bind_to_node(node);
+ bind_to_memnode(node);
+ }
+
+ bytes = bytes0 + HPSIZE;
+
+ buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0);
+ BUG_ON(buf == (void *)-1);
+
+ if (map_flags == MAP_PRIVATE) {
+ if (thp > 0) {
+ ret = madvise(buf, bytes, MADV_HUGEPAGE);
+ if (ret && !g->print_once) {
+ g->print_once = 1;
+ printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n");
+ }
+ }
+ if (thp < 0) {
+ ret = madvise(buf, bytes, MADV_NOHUGEPAGE);
+ if (ret && !g->print_once) {
+ g->print_once = 1;
+ printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n");
+ }
+ }
+ }
+
+ if (init_zero) {
+ bzero(buf, bytes);
+ } else {
+ /* Initialize random contents, different in each word: */
+ if (init_random) {
+ u64 *wbuf = (void *)buf;
+ long off = rand();
+ long i;
+
+ for (i = 0; i < bytes/8; i++)
+ wbuf[i] = i + off;
+ }
+ }
+
+ /* Align to 2MB boundary: */
+ buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1));
+
+ /* Restore affinity: */
+ if (init_cpu0) {
+ bind_to_cpumask(orig_mask);
+ mempol_restore();
+ }
+
+ return buf;
+}
+
+static void free_data(void *data, ssize_t bytes)
+{
+ int ret;
+
+ if (!data)
+ return;
+
+ ret = munmap(data, bytes);
+ BUG_ON(ret);
+}
+
+/*
+ * Create a shared memory buffer that can be shared between processes, zeroed:
+ */
+static void * zalloc_shared_data(ssize_t bytes)
+{
+ return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0, g->p.thp, g->p.init_random);
+}
+
+/*
+ * Create a shared memory buffer that can be shared between processes:
+ */
+static void * setup_shared_data(ssize_t bytes)
+{
+ return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
+}
+
+/*
+ * Allocate process-local memory - this will either be shared between
+ * threads of this process, or only be accessed by this thread:
+ */
+static void * setup_private_data(ssize_t bytes)
+{
+ return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
+}
+
+/*
+ * Return a process-shared (global) mutex:
+ */
+static void init_global_mutex(pthread_mutex_t *mutex)
+{
+ pthread_mutexattr_t attr;
+
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+ pthread_mutex_init(mutex, &attr);
+}
+
+static int parse_cpu_list(const char *arg)
+{
+ p0.cpu_list_str = strdup(arg);
+
+ dprintf("got CPU list: {%s}\n", p0.cpu_list_str);
+
+ return 0;
+}
+
+static int parse_setup_cpu_list(void)
+{
+ struct thread_data *td;
+ char *str0, *str;
+ int t;
+
+ if (!g->p.cpu_list_str)
+ return 0;
+
+ dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
+
+ str0 = str = strdup(g->p.cpu_list_str);
+ t = 0;
+
+ BUG_ON(!str);
+
+ tprintf("# binding tasks to CPUs:\n");
+ tprintf("# ");
+
+ while (true) {
+ int bind_cpu, bind_cpu_0, bind_cpu_1;
+ char *tok, *tok_end, *tok_step, *tok_len, *tok_mul;
+ int bind_len;
+ int step;
+ int mul;
+
+ tok = strsep(&str, ",");
+ if (!tok)
+ break;
+
+ tok_end = strstr(tok, "-");
+
+ dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
+ if (!tok_end) {
+ /* Single CPU specified: */
+ bind_cpu_0 = bind_cpu_1 = atol(tok);
+ } else {
+ /* CPU range specified (for example: "5-11"): */
+ bind_cpu_0 = atol(tok);
+ bind_cpu_1 = atol(tok_end + 1);
+ }
+
+ step = 1;
+ tok_step = strstr(tok, "#");
+ if (tok_step) {
+ step = atol(tok_step + 1);
+ BUG_ON(step <= 0 || step >= g->p.nr_cpus);
+ }
+
+ /*
+ * Mask length.
+ * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4',
+ * where the _4 means the next 4 CPUs are allowed.
+ */
+ bind_len = 1;
+ tok_len = strstr(tok, "_");
+ if (tok_len) {
+ bind_len = atol(tok_len + 1);
+ BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus);
+ }
+
+ /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
+ mul = 1;
+ tok_mul = strstr(tok, "x");
+ if (tok_mul) {
+ mul = atol(tok_mul + 1);
+ BUG_ON(mul <= 0);
+ }
+
+ dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
+
+ if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) {
+ printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus);
+ return -1;
+ }
+
+ BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
+ BUG_ON(bind_cpu_0 > bind_cpu_1);
+
+ for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
+ int i;
+
+ for (i = 0; i < mul; i++) {
+ int cpu;
+
+ if (t >= g->p.nr_tasks) {
+ printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu);
+ goto out;
+ }
+ td = g->threads + t;
+
+ if (t)
+ tprintf(",");
+ if (bind_len > 1) {
+ tprintf("%2d/%d", bind_cpu, bind_len);
+ } else {
+ tprintf("%2d", bind_cpu);
+ }
+
+ CPU_ZERO(&td->bind_cpumask);
+ for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
+ BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
+ CPU_SET(cpu, &td->bind_cpumask);
+ }
+ t++;
+ }
+ }
+ }
+out:
+
+ tprintf("\n");
+
+ if (t < g->p.nr_tasks)
+ printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
+
+ free(str0);
+ return 0;
+}
+
+static int parse_cpus_opt(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ if (!arg)
+ return -1;
+
+ return parse_cpu_list(arg);
+}
+
+static int parse_node_list(const char *arg)
+{
+ p0.node_list_str = strdup(arg);
+
+ dprintf("got NODE list: {%s}\n", p0.node_list_str);
+
+ return 0;
+}
+
+static int parse_setup_node_list(void)
+{
+ struct thread_data *td;
+ char *str0, *str;
+ int t;
+
+ if (!g->p.node_list_str)
+ return 0;
+
+ dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
+
+ str0 = str = strdup(g->p.node_list_str);
+ t = 0;
+
+ BUG_ON(!str);
+
+ tprintf("# binding tasks to NODEs:\n");
+ tprintf("# ");
+
+ while (true) {
+ int bind_node, bind_node_0, bind_node_1;
+ char *tok, *tok_end, *tok_step, *tok_mul;
+ int step;
+ int mul;
+
+ tok = strsep(&str, ",");
+ if (!tok)
+ break;
+
+ tok_end = strstr(tok, "-");
+
+ dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
+ if (!tok_end) {
+ /* Single NODE specified: */
+ bind_node_0 = bind_node_1 = atol(tok);
+ } else {
+ /* NODE range specified (for example: "5-11"): */
+ bind_node_0 = atol(tok);
+ bind_node_1 = atol(tok_end + 1);
+ }
+
+ step = 1;
+ tok_step = strstr(tok, "#");
+ if (tok_step) {
+ step = atol(tok_step + 1);
+ BUG_ON(step <= 0 || step >= g->p.nr_nodes);
+ }
+
+ /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
+ mul = 1;
+ tok_mul = strstr(tok, "x");
+ if (tok_mul) {
+ mul = atol(tok_mul + 1);
+ BUG_ON(mul <= 0);
+ }
+
+ dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
+
+ if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) {
+ printf("\nTest not applicable, system has only %d nodes.\n", g->p.nr_nodes);
+ return -1;
+ }
+
+ BUG_ON(bind_node_0 < 0 || bind_node_1 < 0);
+ BUG_ON(bind_node_0 > bind_node_1);
+
+ for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) {
+ int i;
+
+ for (i = 0; i < mul; i++) {
+ if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) {
+ printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
+ goto out;
+ }
+ td = g->threads + t;
+
+ if (!t)
+ tprintf(" %2d", bind_node);
+ else
+ tprintf(",%2d", bind_node);
+
+ td->bind_node = bind_node;
+ t++;
+ }
+ }
+ }
+out:
+
+ tprintf("\n");
+
+ if (t < g->p.nr_tasks)
+ printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
+
+ free(str0);
+ return 0;
+}
+
+static int parse_nodes_opt(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ if (!arg)
+ return -1;
+
+ return parse_node_list(arg);
+
+ return 0;
+}
+
+#define BIT(x) (1ul << x)
+
+static inline uint32_t lfsr_32(uint32_t lfsr)
+{
+ const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
+ return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps);
+}
+
+/*
+ * Make sure there's real data dependency to RAM (when read
+ * accesses are enabled), so the compiler, the CPU and the
+ * kernel (KSM, zero page, etc.) cannot optimize away RAM
+ * accesses:
+ */
+static inline u64 access_data(u64 *data, u64 val)
+{
+ if (g->p.data_reads)
+ val += *data;
+ if (g->p.data_writes)
+ *data = val + 1;
+ return val;
+}
+
+/*
+ * The worker process does two types of work, a forwards going
+ * loop and a backwards going loop.
+ *
+ * We do this so that on multiprocessor systems we do not create
+ * a 'train' of processing, with highly synchronized processes,
+ * skewing the whole benchmark.
+ */
+static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val)
+{
+ long words = bytes/sizeof(u64);
+ u64 *data = (void *)__data;
+ long chunk_0, chunk_1;
+ u64 *d0, *d, *d1;
+ long off;
+ long i;
+
+ BUG_ON(!data && words);
+ BUG_ON(data && !words);
+
+ if (!data)
+ return val;
+
+ /* Very simple memset() work variant: */
+ if (g->p.data_zero_memset && !g->p.data_rand_walk) {
+ bzero(data, bytes);
+ return val;
+ }
+
+ /* Spread out by PID/TID nr and by loop nr: */
+ chunk_0 = words/nr_max;
+ chunk_1 = words/g->p.nr_loops;
+ off = nr*chunk_0 + loop*chunk_1;
+
+ while (off >= words)
+ off -= words;
+
+ if (g->p.data_rand_walk) {
+ u32 lfsr = nr + loop + val;
+ int j;
+
+ for (i = 0; i < words/1024; i++) {
+ long start, end;
+
+ lfsr = lfsr_32(lfsr);
+
+ start = lfsr % words;
+ end = min(start + 1024, words-1);
+
+ if (g->p.data_zero_memset) {
+ bzero(data + start, (end-start) * sizeof(u64));
+ } else {
+ for (j = start; j < end; j++)
+ val = access_data(data + j, val);
+ }
+ }
+ } else if (!g->p.data_backwards || (nr + loop) & 1) {
+
+ d0 = data + off;
+ d = data + off + 1;
+ d1 = data + words;
+
+ /* Process data forwards: */
+ for (;;) {
+ if (unlikely(d >= d1))
+ d = data;
+ if (unlikely(d == d0))
+ break;
+
+ val = access_data(d, val);
+
+ d++;
+ }
+ } else {
+ /* Process data backwards: */
+
+ d0 = data + off;
+ d = data + off - 1;
+ d1 = data + words;
+
+ /* Process data forwards: */
+ for (;;) {
+ if (unlikely(d < data))
+ d = data + words-1;
+ if (unlikely(d == d0))
+ break;
+
+ val = access_data(d, val);
+
+ d--;
+ }
+ }
+
+ return val;
+}
+
+static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
+{
+ unsigned int cpu;
+
+ cpu = sched_getcpu();
+
+ g->threads[task_nr].curr_cpu = cpu;
+ prctl(0, bytes_worked);
+}
+
+#define MAX_NR_NODES 64
+
+/*
+ * Count the number of nodes a process's threads
+ * are spread out on.
+ *
+ * A count of 1 means that the process is compressed
+ * to a single node. A count of g->p.nr_nodes means it's
+ * spread out on the whole system.
+ */
+static int count_process_nodes(int process_nr)
+{
+ char node_present[MAX_NR_NODES] = { 0, };
+ int nodes;
+ int n, t;
+
+ for (t = 0; t < g->p.nr_threads; t++) {
+ struct thread_data *td;
+ int task_nr;
+ int node;
+
+ task_nr = process_nr*g->p.nr_threads + t;
+ td = g->threads + task_nr;
+
+ node = numa_node_of_cpu(td->curr_cpu);
+ if (node < 0) /* curr_cpu was likely still -1 */
+ return 0;
+
+ node_present[node] = 1;
+ }
+
+ nodes = 0;
+
+ for (n = 0; n < MAX_NR_NODES; n++)
+ nodes += node_present[n];
+
+ return nodes;
+}
+
+/*
+ * Count the number of distinct process-threads a node contains.
+ *
+ * A count of 1 means that the node contains only a single
+ * process. If all nodes on the system contain at most one
+ * process then we are well-converged.
+ */
+static int count_node_processes(int node)
+{
+ int processes = 0;
+ int t, p;
+
+ for (p = 0; p < g->p.nr_proc; p++) {
+ for (t = 0; t < g->p.nr_threads; t++) {
+ struct thread_data *td;
+ int task_nr;
+ int n;
+
+ task_nr = p*g->p.nr_threads + t;
+ td = g->threads + task_nr;
+
+ n = numa_node_of_cpu(td->curr_cpu);
+ if (n == node) {
+ processes++;
+ break;
+ }
+ }
+ }
+
+ return processes;
+}
+
+static void calc_convergence_compression(int *strong)
+{
+ unsigned int nodes_min, nodes_max;
+ int p;
+
+ nodes_min = -1;
+ nodes_max = 0;
+
+ for (p = 0; p < g->p.nr_proc; p++) {
+ unsigned int nodes = count_process_nodes(p);
+
+ if (!nodes) {
+ *strong = 0;
+ return;
+ }
+
+ nodes_min = min(nodes, nodes_min);
+ nodes_max = max(nodes, nodes_max);
+ }
+
+ /* Strong convergence: all threads compress on a single node: */
+ if (nodes_min == 1 && nodes_max == 1) {
+ *strong = 1;
+ } else {
+ *strong = 0;
+ tprintf(" {%d-%d}", nodes_min, nodes_max);
+ }
+}
+
+static void calc_convergence(double runtime_ns_max, double *convergence)
+{
+ unsigned int loops_done_min, loops_done_max;
+ int process_groups;
+ int nodes[MAX_NR_NODES];
+ int distance;
+ int nr_min;
+ int nr_max;
+ int strong;
+ int sum;
+ int nr;
+ int node;
+ int cpu;
+ int t;
+
+ if (!g->p.show_convergence && !g->p.measure_convergence)
+ return;
+
+ for (node = 0; node < g->p.nr_nodes; node++)
+ nodes[node] = 0;
+
+ loops_done_min = -1;
+ loops_done_max = 0;
+
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ struct thread_data *td = g->threads + t;
+ unsigned int loops_done;
+
+ cpu = td->curr_cpu;
+
+ /* Not all threads have written it yet: */
+ if (cpu < 0)
+ continue;
+
+ node = numa_node_of_cpu(cpu);
+
+ nodes[node]++;
+
+ loops_done = td->loops_done;
+ loops_done_min = min(loops_done, loops_done_min);
+ loops_done_max = max(loops_done, loops_done_max);
+ }
+
+ nr_max = 0;
+ nr_min = g->p.nr_tasks;
+ sum = 0;
+
+ for (node = 0; node < g->p.nr_nodes; node++) {
+ if (!is_node_present(node))
+ continue;
+ nr = nodes[node];
+ nr_min = min(nr, nr_min);
+ nr_max = max(nr, nr_max);
+ sum += nr;
+ }
+ BUG_ON(nr_min > nr_max);
+
+ BUG_ON(sum > g->p.nr_tasks);
+
+ if (0 && (sum < g->p.nr_tasks))
+ return;
+
+ /*
+ * Count the number of distinct process groups present
+ * on nodes - when we are converged this will decrease
+ * to g->p.nr_proc:
+ */
+ process_groups = 0;
+
+ for (node = 0; node < g->p.nr_nodes; node++) {
+ int processes;
+
+ if (!is_node_present(node))
+ continue;
+ processes = count_node_processes(node);
+ nr = nodes[node];
+ tprintf(" %2d/%-2d", nr, processes);
+
+ process_groups += processes;
+ }
+
+ distance = nr_max - nr_min;
+
+ tprintf(" [%2d/%-2d]", distance, process_groups);
+
+ tprintf(" l:%3d-%-3d (%3d)",
+ loops_done_min, loops_done_max, loops_done_max-loops_done_min);
+
+ if (loops_done_min && loops_done_max) {
+ double skew = 1.0 - (double)loops_done_min/loops_done_max;
+
+ tprintf(" [%4.1f%%]", skew * 100.0);
+ }
+
+ calc_convergence_compression(&strong);
+
+ if (strong && process_groups == g->p.nr_proc) {
+ if (!*convergence) {
+ *convergence = runtime_ns_max;
+ tprintf(" (%6.1fs converged)\n", *convergence / NSEC_PER_SEC);
+ if (g->p.measure_convergence) {
+ g->all_converged = true;
+ g->stop_work = true;
+ }
+ }
+ } else {
+ if (*convergence) {
+ tprintf(" (%6.1fs de-converged)", runtime_ns_max / NSEC_PER_SEC);
+ *convergence = 0;
+ }
+ tprintf("\n");
+ }
+}
+
+static void show_summary(double runtime_ns_max, int l, double *convergence)
+{
+ tprintf("\r # %5.1f%% [%.1f mins]",
+ (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max / NSEC_PER_SEC / 60.0);
+
+ calc_convergence(runtime_ns_max, convergence);
+
+ if (g->p.show_details >= 0)
+ fflush(stdout);
+}
+
+static void *worker_thread(void *__tdata)
+{
+ struct thread_data *td = __tdata;
+ struct timeval start0, start, stop, diff;
+ int process_nr = td->process_nr;
+ int thread_nr = td->thread_nr;
+ unsigned long last_perturbance;
+ int task_nr = td->task_nr;
+ int details = g->p.show_details;
+ int first_task, last_task;
+ double convergence = 0;
+ u64 val = td->val;
+ double runtime_ns_max;
+ u8 *global_data;
+ u8 *process_data;
+ u8 *thread_data;
+ u64 bytes_done, secs;
+ long work_done;
+ u32 l;
+ struct rusage rusage;
+
+ bind_to_cpumask(td->bind_cpumask);
+ bind_to_memnode(td->bind_node);
+
+ set_taskname("thread %d/%d", process_nr, thread_nr);
+
+ global_data = g->data;
+ process_data = td->process_data;
+ thread_data = setup_private_data(g->p.bytes_thread);
+
+ bytes_done = 0;
+
+ last_task = 0;
+ if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1)
+ last_task = 1;
+
+ first_task = 0;
+ if (process_nr == 0 && thread_nr == 0)
+ first_task = 1;
+
+ if (details >= 2) {
+ printf("# thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n",
+ process_nr, thread_nr, global_data, process_data, thread_data);
+ }
+
+ if (g->p.serialize_startup) {
+ pthread_mutex_lock(&g->startup_mutex);
+ g->nr_tasks_started++;
+ pthread_mutex_unlock(&g->startup_mutex);
+
+ /* Here we will wait for the main process to start us all at once: */
+ pthread_mutex_lock(&g->start_work_mutex);
+ g->nr_tasks_working++;
+
+ /* Last one wake the main process: */
+ if (g->nr_tasks_working == g->p.nr_tasks)
+ pthread_mutex_unlock(&g->startup_done_mutex);
+
+ pthread_mutex_unlock(&g->start_work_mutex);
+ }
+
+ gettimeofday(&start0, NULL);
+
+ start = stop = start0;
+ last_perturbance = start.tv_sec;
+
+ for (l = 0; l < g->p.nr_loops; l++) {
+ start = stop;
+
+ if (g->stop_work)
+ break;
+
+ val += do_work(global_data, g->p.bytes_global, process_nr, g->p.nr_proc, l, val);
+ val += do_work(process_data, g->p.bytes_process, thread_nr, g->p.nr_threads, l, val);
+ val += do_work(thread_data, g->p.bytes_thread, 0, 1, l, val);
+
+ if (g->p.sleep_usecs) {
+ pthread_mutex_lock(td->process_lock);
+ usleep(g->p.sleep_usecs);
+ pthread_mutex_unlock(td->process_lock);
+ }
+ /*
+ * Amount of work to be done under a process-global lock:
+ */
+ if (g->p.bytes_process_locked) {
+ pthread_mutex_lock(td->process_lock);
+ val += do_work(process_data, g->p.bytes_process_locked, thread_nr, g->p.nr_threads, l, val);
+ pthread_mutex_unlock(td->process_lock);
+ }
+
+ work_done = g->p.bytes_global + g->p.bytes_process +
+ g->p.bytes_process_locked + g->p.bytes_thread;
+
+ update_curr_cpu(task_nr, work_done);
+ bytes_done += work_done;
+
+ if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs)
+ continue;
+
+ td->loops_done = l;
+
+ gettimeofday(&stop, NULL);
+
+ /* Check whether our max runtime timed out: */
+ if (g->p.nr_secs) {
+ timersub(&stop, &start0, &diff);
+ if ((u32)diff.tv_sec >= g->p.nr_secs) {
+ g->stop_work = true;
+ break;
+ }
+ }
+
+ /* Update the summary at most once per second: */
+ if (start.tv_sec == stop.tv_sec)
+ continue;
+
+ /*
+ * Perturb the first task's equilibrium every g->p.perturb_secs seconds,
+ * by migrating to CPU#0:
+ */
+ if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
+ cpu_set_t orig_mask;
+ int target_cpu;
+ int this_cpu;
+
+ last_perturbance = stop.tv_sec;
+
+ /*
+ * Depending on where we are running, move into
+ * the other half of the system, to create some
+ * real disturbance:
+ */
+ this_cpu = g->threads[task_nr].curr_cpu;
+ if (this_cpu < g->p.nr_cpus/2)
+ target_cpu = g->p.nr_cpus-1;
+ else
+ target_cpu = 0;
+
+ orig_mask = bind_to_cpu(target_cpu);
+
+ /* Here we are running on the target CPU already */
+ if (details >= 1)
+ printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
+
+ bind_to_cpumask(orig_mask);
+ }
+
+ if (details >= 3) {
+ timersub(&stop, &start, &diff);
+ runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
+ runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
+
+ if (details >= 0) {
+ printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n",
+ process_nr, thread_nr, runtime_ns_max / bytes_done, val);
+ }
+ fflush(stdout);
+ }
+ if (!last_task)
+ continue;
+
+ timersub(&stop, &start0, &diff);
+ runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
+ runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
+
+ show_summary(runtime_ns_max, l, &convergence);
+ }
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start0, &diff);
+ td->runtime_ns = diff.tv_sec * NSEC_PER_SEC;
+ td->runtime_ns += diff.tv_usec * NSEC_PER_USEC;
+ secs = td->runtime_ns / NSEC_PER_SEC;
+ td->speed_gbs = secs ? bytes_done / secs / 1e9 : 0;
+
+ getrusage(RUSAGE_THREAD, &rusage);
+ td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC;
+ td->system_time_ns += rusage.ru_stime.tv_usec * NSEC_PER_USEC;
+ td->user_time_ns = rusage.ru_utime.tv_sec * NSEC_PER_SEC;
+ td->user_time_ns += rusage.ru_utime.tv_usec * NSEC_PER_USEC;
+
+ free_data(thread_data, g->p.bytes_thread);
+
+ pthread_mutex_lock(&g->stop_work_mutex);
+ g->bytes_done += bytes_done;
+ pthread_mutex_unlock(&g->stop_work_mutex);
+
+ return NULL;
+}
+
+/*
+ * A worker process starts a couple of threads:
+ */
+static void worker_process(int process_nr)
+{
+ pthread_mutex_t process_lock;
+ struct thread_data *td;
+ pthread_t *pthreads;
+ u8 *process_data;
+ int task_nr;
+ int ret;
+ int t;
+
+ pthread_mutex_init(&process_lock, NULL);
+ set_taskname("process %d", process_nr);
+
+ /*
+ * Pick up the memory policy and the CPU binding of our first thread,
+ * so that we initialize memory accordingly:
+ */
+ task_nr = process_nr*g->p.nr_threads;
+ td = g->threads + task_nr;
+
+ bind_to_memnode(td->bind_node);
+ bind_to_cpumask(td->bind_cpumask);
+
+ pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t));
+ process_data = setup_private_data(g->p.bytes_process);
+
+ if (g->p.show_details >= 3) {
+ printf(" # process %2d global mem: %p, process mem: %p\n",
+ process_nr, g->data, process_data);
+ }
+
+ for (t = 0; t < g->p.nr_threads; t++) {
+ task_nr = process_nr*g->p.nr_threads + t;
+ td = g->threads + task_nr;
+
+ td->process_data = process_data;
+ td->process_nr = process_nr;
+ td->thread_nr = t;
+ td->task_nr = task_nr;
+ td->val = rand();
+ td->curr_cpu = -1;
+ td->process_lock = &process_lock;
+
+ ret = pthread_create(pthreads + t, NULL, worker_thread, td);
+ BUG_ON(ret);
+ }
+
+ for (t = 0; t < g->p.nr_threads; t++) {
+ ret = pthread_join(pthreads[t], NULL);
+ BUG_ON(ret);
+ }
+
+ free_data(process_data, g->p.bytes_process);
+ free(pthreads);
+}
+
+static void print_summary(void)
+{
+ if (g->p.show_details < 0)
+ return;
+
+ printf("\n ###\n");
+ printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
+ g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus);
+ printf(" # %5dx %5ldMB global shared mem operations\n",
+ g->p.nr_loops, g->p.bytes_global/1024/1024);
+ printf(" # %5dx %5ldMB process shared mem operations\n",
+ g->p.nr_loops, g->p.bytes_process/1024/1024);
+ printf(" # %5dx %5ldMB thread local mem operations\n",
+ g->p.nr_loops, g->p.bytes_thread/1024/1024);
+
+ printf(" ###\n");
+
+ printf("\n ###\n"); fflush(stdout);
+}
+
+static void init_thread_data(void)
+{
+ ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+ int t;
+
+ g->threads = zalloc_shared_data(size);
+
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ struct thread_data *td = g->threads + t;
+ int cpu;
+
+ /* Allow all nodes by default: */
+ td->bind_node = -1;
+
+ /* Allow all CPUs by default: */
+ CPU_ZERO(&td->bind_cpumask);
+ for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+ CPU_SET(cpu, &td->bind_cpumask);
+ }
+}
+
+static void deinit_thread_data(void)
+{
+ ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+
+ free_data(g->threads, size);
+}
+
+static int init(void)
+{
+ g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0);
+
+ /* Copy over options: */
+ g->p = p0;
+
+ g->p.nr_cpus = numa_num_configured_cpus();
+
+ g->p.nr_nodes = numa_max_node() + 1;
+
+ /* char array in count_process_nodes(): */
+ BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
+
+ if (g->p.show_quiet && !g->p.show_details)
+ g->p.show_details = -1;
+
+ /* Some memory should be specified: */
+ if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str)
+ return -1;
+
+ if (g->p.mb_global_str) {
+ g->p.mb_global = atof(g->p.mb_global_str);
+ BUG_ON(g->p.mb_global < 0);
+ }
+
+ if (g->p.mb_proc_str) {
+ g->p.mb_proc = atof(g->p.mb_proc_str);
+ BUG_ON(g->p.mb_proc < 0);
+ }
+
+ if (g->p.mb_proc_locked_str) {
+ g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str);
+ BUG_ON(g->p.mb_proc_locked < 0);
+ BUG_ON(g->p.mb_proc_locked > g->p.mb_proc);
+ }
+
+ if (g->p.mb_thread_str) {
+ g->p.mb_thread = atof(g->p.mb_thread_str);
+ BUG_ON(g->p.mb_thread < 0);
+ }
+
+ BUG_ON(g->p.nr_threads <= 0);
+ BUG_ON(g->p.nr_proc <= 0);
+
+ g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads;
+
+ g->p.bytes_global = g->p.mb_global *1024L*1024L;
+ g->p.bytes_process = g->p.mb_proc *1024L*1024L;
+ g->p.bytes_process_locked = g->p.mb_proc_locked *1024L*1024L;
+ g->p.bytes_thread = g->p.mb_thread *1024L*1024L;
+
+ g->data = setup_shared_data(g->p.bytes_global);
+
+ /* Startup serialization: */
+ init_global_mutex(&g->start_work_mutex);
+ init_global_mutex(&g->startup_mutex);
+ init_global_mutex(&g->startup_done_mutex);
+ init_global_mutex(&g->stop_work_mutex);
+
+ init_thread_data();
+
+ tprintf("#\n");
+ if (parse_setup_cpu_list() || parse_setup_node_list())
+ return -1;
+ tprintf("#\n");
+
+ print_summary();
+
+ return 0;
+}
+
+static void deinit(void)
+{
+ free_data(g->data, g->p.bytes_global);
+ g->data = NULL;
+
+ deinit_thread_data();
+
+ free_data(g, sizeof(*g));
+ g = NULL;
+}
+
+/*
+ * Print a short or long result, depending on the verbosity setting:
+ */
+static void print_res(const char *name, double val,
+ const char *txt_unit, const char *txt_short, const char *txt_long)
+{
+ if (!name)
+ name = "main,";
+
+ if (!g->p.show_quiet)
+ printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
+ else
+ printf(" %14.3f %s\n", val, txt_long);
+}
+
+static int __bench_numa(const char *name)
+{
+ struct timeval start, stop, diff;
+ u64 runtime_ns_min, runtime_ns_sum;
+ pid_t *pids, pid, wpid;
+ double delta_runtime;
+ double runtime_avg;
+ double runtime_sec_max;
+ double runtime_sec_min;
+ int wait_stat;
+ double bytes;
+ int i, t, p;
+
+ if (init())
+ return -1;
+
+ pids = zalloc(g->p.nr_proc * sizeof(*pids));
+ pid = -1;
+
+ /* All threads try to acquire it, this way we can wait for them to start up: */
+ pthread_mutex_lock(&g->start_work_mutex);
+
+ if (g->p.serialize_startup) {
+ tprintf(" #\n");
+ tprintf(" # Startup synchronization: ..."); fflush(stdout);
+ }
+
+ gettimeofday(&start, NULL);
+
+ for (i = 0; i < g->p.nr_proc; i++) {
+ pid = fork();
+ dprintf(" # process %2d: PID %d\n", i, pid);
+
+ BUG_ON(pid < 0);
+ if (!pid) {
+ /* Child process: */
+ worker_process(i);
+
+ exit(0);
+ }
+ pids[i] = pid;
+
+ }
+ /* Wait for all the threads to start up: */
+ while (g->nr_tasks_started != g->p.nr_tasks)
+ usleep(USEC_PER_MSEC);
+
+ BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
+
+ if (g->p.serialize_startup) {
+ double startup_sec;
+
+ pthread_mutex_lock(&g->startup_done_mutex);
+
+ /* This will start all threads: */
+ pthread_mutex_unlock(&g->start_work_mutex);
+
+ /* This mutex is locked - the last started thread will wake us: */
+ pthread_mutex_lock(&g->startup_done_mutex);
+
+ gettimeofday(&stop, NULL);
+
+ timersub(&stop, &start, &diff);
+
+ startup_sec = diff.tv_sec * NSEC_PER_SEC;
+ startup_sec += diff.tv_usec * NSEC_PER_USEC;
+ startup_sec /= NSEC_PER_SEC;
+
+ tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
+ tprintf(" #\n");
+
+ start = stop;
+ pthread_mutex_unlock(&g->startup_done_mutex);
+ } else {
+ gettimeofday(&start, NULL);
+ }
+
+ /* Parent process: */
+
+
+ for (i = 0; i < g->p.nr_proc; i++) {
+ wpid = waitpid(pids[i], &wait_stat, 0);
+ BUG_ON(wpid < 0);
+ BUG_ON(!WIFEXITED(wait_stat));
+
+ }
+
+ runtime_ns_sum = 0;
+ runtime_ns_min = -1LL;
+
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ u64 thread_runtime_ns = g->threads[t].runtime_ns;
+
+ runtime_ns_sum += thread_runtime_ns;
+ runtime_ns_min = min(thread_runtime_ns, runtime_ns_min);
+ }
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+
+ BUG_ON(bench_format != BENCH_FORMAT_DEFAULT);
+
+ tprintf("\n ###\n");
+ tprintf("\n");
+
+ runtime_sec_max = diff.tv_sec * NSEC_PER_SEC;
+ runtime_sec_max += diff.tv_usec * NSEC_PER_USEC;
+ runtime_sec_max /= NSEC_PER_SEC;
+
+ runtime_sec_min = runtime_ns_min / NSEC_PER_SEC;
+
+ bytes = g->bytes_done;
+ runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / NSEC_PER_SEC;
+
+ if (g->p.measure_convergence) {
+ print_res(name, runtime_sec_max,
+ "secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge");
+ }
+
+ print_res(name, runtime_sec_max,
+ "secs,", "runtime-max/thread", "secs slowest (max) thread-runtime");
+
+ print_res(name, runtime_sec_min,
+ "secs,", "runtime-min/thread", "secs fastest (min) thread-runtime");
+
+ print_res(name, runtime_avg,
+ "secs,", "runtime-avg/thread", "secs average thread-runtime");
+
+ delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0;
+ print_res(name, delta_runtime / runtime_sec_max * 100.0,
+ "%,", "spread-runtime/thread", "% difference between max/avg runtime");
+
+ print_res(name, bytes / g->p.nr_tasks / 1e9,
+ "GB,", "data/thread", "GB data processed, per thread");
+
+ print_res(name, bytes / 1e9,
+ "GB,", "data-total", "GB data processed, total");
+
+ print_res(name, runtime_sec_max * NSEC_PER_SEC / (bytes / g->p.nr_tasks),
+ "nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
+
+ print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
+ "GB/sec,", "thread-speed", "GB/sec/thread speed");
+
+ print_res(name, bytes / runtime_sec_max / 1e9,
+ "GB/sec,", "total-speed", "GB/sec total speed");
+
+ if (g->p.show_details >= 2) {
+ char tname[14 + 2 * 11 + 1];
+ struct thread_data *td;
+ for (p = 0; p < g->p.nr_proc; p++) {
+ for (t = 0; t < g->p.nr_threads; t++) {
+ memset(tname, 0, sizeof(tname));
+ td = g->threads + p*g->p.nr_threads + t;
+ snprintf(tname, sizeof(tname), "process%d:thread%d", p, t);
+ print_res(tname, td->speed_gbs,
+ "GB/sec", "thread-speed", "GB/sec/thread speed");
+ print_res(tname, td->system_time_ns / NSEC_PER_SEC,
+ "secs", "thread-system-time", "system CPU time/thread");
+ print_res(tname, td->user_time_ns / NSEC_PER_SEC,
+ "secs", "thread-user-time", "user CPU time/thread");
+ }
+ }
+ }
+
+ free(pids);
+
+ deinit();
+
+ return 0;
+}
+
+#define MAX_ARGS 50
+
+static int command_size(const char **argv)
+{
+ int size = 0;
+
+ while (*argv) {
+ size++;
+ argv++;
+ }
+
+ BUG_ON(size >= MAX_ARGS);
+
+ return size;
+}
+
+static void init_params(struct params *p, const char *name, int argc, const char **argv)
+{
+ int i;
+
+ printf("\n # Running %s \"perf bench numa", name);
+
+ for (i = 0; i < argc; i++)
+ printf(" %s", argv[i]);
+
+ printf("\"\n");
+
+ memset(p, 0, sizeof(*p));
+
+ /* Initialize nonzero defaults: */
+
+ p->serialize_startup = 1;
+ p->data_reads = true;
+ p->data_writes = true;
+ p->data_backwards = true;
+ p->data_rand_walk = true;
+ p->nr_loops = -1;
+ p->init_random = true;
+ p->mb_global_str = "1";
+ p->nr_proc = 1;
+ p->nr_threads = 1;
+ p->nr_secs = 5;
+ p->run_all = argc == 1;
+}
+
+static int run_bench_numa(const char *name, const char **argv)
+{
+ int argc = command_size(argv);
+
+ init_params(&p0, name, argc, argv);
+ argc = parse_options(argc, argv, options, bench_numa_usage, 0);
+ if (argc)
+ goto err;
+
+ if (__bench_numa(name))
+ goto err;
+
+ return 0;
+
+err:
+ return -1;
+}
+
+#define OPT_BW_RAM "-s", "20", "-zZq", "--thp", " 1", "--no-data_rand_walk"
+#define OPT_BW_RAM_NOTHP OPT_BW_RAM, "--thp", "-1"
+
+#define OPT_CONV "-s", "100", "-zZ0qcm", "--thp", " 1"
+#define OPT_CONV_NOTHP OPT_CONV, "--thp", "-1"
+
+#define OPT_BW "-s", "20", "-zZ0q", "--thp", " 1"
+#define OPT_BW_NOTHP OPT_BW, "--thp", "-1"
+
+/*
+ * The built-in test-suite executed by "perf bench numa -a".
+ *
+ * (A minimum of 4 nodes and 16 GB of RAM is recommended.)
+ */
+static const char *tests[][MAX_ARGS] = {
+ /* Basic single-stream NUMA bandwidth measurements: */
+ { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ "-C" , "0", "-M", "0", OPT_BW_RAM },
+ { "RAM-bw-local-NOTHP,",
+ "mem", "-p", "1", "-t", "1", "-P", "1024",
+ "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP },
+ { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ "-C" , "0", "-M", "1", OPT_BW_RAM },
+
+ /* 2-stream NUMA bandwidth measurements: */
+ { "RAM-bw-local-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
+ "-C", "0,2", "-M", "0x2", OPT_BW_RAM },
+ { "RAM-bw-remote-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
+ "-C", "0,2", "-M", "1x2", OPT_BW_RAM },
+
+ /* Cross-stream NUMA bandwidth measurement: */
+ { "RAM-bw-cross,", "mem", "-p", "2", "-t", "1", "-P", "1024",
+ "-C", "0,8", "-M", "1,0", OPT_BW_RAM },
+
+ /* Convergence latency measurements: */
+ { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
+ { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
+ { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
+ { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
+ { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
+ { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
+ { " 4x4-convergence-NOTHP,",
+ "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
+ { " 4x6-convergence,", "mem", "-p", "4", "-t", "6", "-P", "1020", OPT_CONV },
+ { " 4x8-convergence,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_CONV },
+ { " 8x4-convergence,", "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV },
+ { " 8x4-convergence-NOTHP,",
+ "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
+ { " 3x1-convergence,", "mem", "-p", "3", "-t", "1", "-P", "512", OPT_CONV },
+ { " 4x1-convergence,", "mem", "-p", "4", "-t", "1", "-P", "512", OPT_CONV },
+ { " 8x1-convergence,", "mem", "-p", "8", "-t", "1", "-P", "512", OPT_CONV },
+ { "16x1-convergence,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_CONV },
+ { "32x1-convergence,", "mem", "-p", "32", "-t", "1", "-P", "128", OPT_CONV },
+
+ /* Various NUMA process/thread layout bandwidth measurements: */
+ { " 2x1-bw-process,", "mem", "-p", "2", "-t", "1", "-P", "1024", OPT_BW },
+ { " 3x1-bw-process,", "mem", "-p", "3", "-t", "1", "-P", "1024", OPT_BW },
+ { " 4x1-bw-process,", "mem", "-p", "4", "-t", "1", "-P", "1024", OPT_BW },
+ { " 8x1-bw-process,", "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW },
+ { " 8x1-bw-process-NOTHP,",
+ "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
+ { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
+
+ { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
+ { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
+ { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
+ { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
+
+ { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
+ { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
+ { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
+ { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
+ { " 4x8-bw-thread-NOTHP,",
+ "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
+ { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
+ { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
+
+ { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
+ { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
+
+ { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
+ { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP },
+ { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW },
+ { "numa01-bw-thread-NOTHP,",
+ "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW_NOTHP },
+};
+
+static int bench_all(void)
+{
+ int nr = ARRAY_SIZE(tests);
+ int ret;
+ int i;
+
+ ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'");
+ BUG_ON(ret < 0);
+
+ for (i = 0; i < nr; i++) {
+ run_bench_numa(tests[i][0], tests[i] + 1);
+ }
+
+ printf("\n");
+
+ return 0;
+}
+
+int bench_numa(int argc, const char **argv)
+{
+ init_params(&p0, "main,", argc, argv);
+ argc = parse_options(argc, argv, options, bench_numa_usage, 0);
+ if (argc)
+ goto err;
+
+ if (p0.run_all)
+ return bench_all();
+
+ if (__bench_numa(NULL))
+ goto err;
+
+ return 0;
+
+err:
+ usage_with_options(numa_usage, options);
+ return -1;
+}
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
new file mode 100644
index 000000000..f9d7641ae
--- /dev/null
+++ b/tools/perf/bench/sched-messaging.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * sched-messaging.c
+ *
+ * messaging: Benchmark for scheduler and IPC mechanisms
+ *
+ * Based on hackbench by Rusty Russell <rusty@rustcorp.com.au>
+ * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ *
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../builtin.h"
+#include "bench.h"
+
+/* Test groups of 20 processes spraying to 20 receivers */
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <poll.h>
+#include <limits.h>
+#include <err.h>
+#include <linux/time64.h>
+
+#define DATASIZE 100
+
+static bool use_pipes = false;
+static unsigned int nr_loops = 100;
+static bool thread_mode = false;
+static unsigned int num_groups = 10;
+
+struct sender_context {
+ unsigned int num_fds;
+ int ready_out;
+ int wakefd;
+ int out_fds[0];
+};
+
+struct receiver_context {
+ unsigned int num_packets;
+ int in_fds[2];
+ int ready_out;
+ int wakefd;
+};
+
+static void fdpair(int fds[2])
+{
+ if (use_pipes) {
+ if (pipe(fds) == 0)
+ return;
+ } else {
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) == 0)
+ return;
+ }
+
+ err(EXIT_FAILURE, use_pipes ? "pipe()" : "socketpair()");
+}
+
+/* Block until we're ready to go */
+static void ready(int ready_out, int wakefd)
+{
+ char dummy;
+ struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
+
+ /* Tell them we're ready. */
+ if (write(ready_out, &dummy, 1) != 1)
+ err(EXIT_FAILURE, "CLIENT: ready write");
+
+ /* Wait for "GO" signal */
+ if (poll(&pollfd, 1, -1) != 1)
+ err(EXIT_FAILURE, "poll");
+}
+
+/* Sender sprays nr_loops messages down each file descriptor */
+static void *sender(struct sender_context *ctx)
+{
+ char data[DATASIZE];
+ unsigned int i, j;
+
+ ready(ctx->ready_out, ctx->wakefd);
+
+ /* Now pump to every receiver. */
+ for (i = 0; i < nr_loops; i++) {
+ for (j = 0; j < ctx->num_fds; j++) {
+ int ret, done = 0;
+
+again:
+ ret = write(ctx->out_fds[j], data + done,
+ sizeof(data)-done);
+ if (ret < 0)
+ err(EXIT_FAILURE, "SENDER: write");
+ done += ret;
+ if (done < DATASIZE)
+ goto again;
+ }
+ }
+
+ return NULL;
+}
+
+
+/* One receiver per fd */
+static void *receiver(struct receiver_context* ctx)
+{
+ unsigned int i;
+
+ if (!thread_mode)
+ close(ctx->in_fds[1]);
+
+ /* Wait for start... */
+ ready(ctx->ready_out, ctx->wakefd);
+
+ /* Receive them all */
+ for (i = 0; i < ctx->num_packets; i++) {
+ char data[DATASIZE];
+ int ret, done = 0;
+
+again:
+ ret = read(ctx->in_fds[0], data + done, DATASIZE - done);
+ if (ret < 0)
+ err(EXIT_FAILURE, "SERVER: read");
+ done += ret;
+ if (done < DATASIZE)
+ goto again;
+ }
+
+ return NULL;
+}
+
+static pthread_t create_worker(void *ctx, void *(*func)(void *))
+{
+ pthread_attr_t attr;
+ pthread_t childid;
+ int ret;
+
+ if (!thread_mode) {
+ /* process mode */
+ /* Fork the receiver. */
+ switch (fork()) {
+ case -1:
+ err(EXIT_FAILURE, "fork()");
+ break;
+ case 0:
+ (*func) (ctx);
+ exit(0);
+ break;
+ default:
+ break;
+ }
+
+ return (pthread_t)0;
+ }
+
+ if (pthread_attr_init(&attr) != 0)
+ err(EXIT_FAILURE, "pthread_attr_init:");
+
+#ifndef __ia64__
+ if (pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN) != 0)
+ err(EXIT_FAILURE, "pthread_attr_setstacksize");
+#endif
+
+ ret = pthread_create(&childid, &attr, func, ctx);
+ if (ret != 0)
+ err(EXIT_FAILURE, "pthread_create failed");
+
+ return childid;
+}
+
+static void reap_worker(pthread_t id)
+{
+ int proc_status;
+ void *thread_status;
+
+ if (!thread_mode) {
+ /* process mode */
+ wait(&proc_status);
+ if (!WIFEXITED(proc_status))
+ exit(1);
+ } else {
+ pthread_join(id, &thread_status);
+ }
+}
+
+/* One group of senders and receivers */
+static unsigned int group(pthread_t *pth,
+ unsigned int num_fds,
+ int ready_out,
+ int wakefd)
+{
+ unsigned int i;
+ struct sender_context *snd_ctx = malloc(sizeof(struct sender_context)
+ + num_fds * sizeof(int));
+
+ if (!snd_ctx)
+ err(EXIT_FAILURE, "malloc()");
+
+ for (i = 0; i < num_fds; i++) {
+ int fds[2];
+ struct receiver_context *ctx = malloc(sizeof(*ctx));
+
+ if (!ctx)
+ err(EXIT_FAILURE, "malloc()");
+
+
+ /* Create the pipe between client and server */
+ fdpair(fds);
+
+ ctx->num_packets = num_fds * nr_loops;
+ ctx->in_fds[0] = fds[0];
+ ctx->in_fds[1] = fds[1];
+ ctx->ready_out = ready_out;
+ ctx->wakefd = wakefd;
+
+ pth[i] = create_worker(ctx, (void *)receiver);
+
+ snd_ctx->out_fds[i] = fds[1];
+ if (!thread_mode)
+ close(fds[0]);
+ }
+
+ /* Now we have all the fds, fork the senders */
+ for (i = 0; i < num_fds; i++) {
+ snd_ctx->ready_out = ready_out;
+ snd_ctx->wakefd = wakefd;
+ snd_ctx->num_fds = num_fds;
+
+ pth[num_fds+i] = create_worker(snd_ctx, (void *)sender);
+ }
+
+ /* Close the fds we have left */
+ if (!thread_mode)
+ for (i = 0; i < num_fds; i++)
+ close(snd_ctx->out_fds[i]);
+
+ /* Return number of children to reap */
+ return num_fds * 2;
+}
+
+static const struct option options[] = {
+ OPT_BOOLEAN('p', "pipe", &use_pipes,
+ "Use pipe() instead of socketpair()"),
+ OPT_BOOLEAN('t', "thread", &thread_mode,
+ "Be multi thread instead of multi process"),
+ OPT_UINTEGER('g', "group", &num_groups, "Specify number of groups"),
+ OPT_UINTEGER('l', "nr_loops", &nr_loops, "Specify the number of loops to run (default: 100)"),
+ OPT_END()
+};
+
+static const char * const bench_sched_message_usage[] = {
+ "perf bench sched messaging <options>",
+ NULL
+};
+
+int bench_sched_messaging(int argc, const char **argv)
+{
+ unsigned int i, total_children;
+ struct timeval start, stop, diff;
+ unsigned int num_fds = 20;
+ int readyfds[2], wakefds[2];
+ char dummy;
+ pthread_t *pth_tab;
+
+ argc = parse_options(argc, argv, options,
+ bench_sched_message_usage, 0);
+
+ pth_tab = malloc(num_fds * 2 * num_groups * sizeof(pthread_t));
+ if (!pth_tab)
+ err(EXIT_FAILURE, "main:malloc()");
+
+ fdpair(readyfds);
+ fdpair(wakefds);
+
+ total_children = 0;
+ for (i = 0; i < num_groups; i++)
+ total_children += group(pth_tab+total_children, num_fds,
+ readyfds[1], wakefds[0]);
+
+ /* Wait for everyone to be ready */
+ for (i = 0; i < total_children; i++)
+ if (read(readyfds[0], &dummy, 1) != 1)
+ err(EXIT_FAILURE, "Reading for readyfds");
+
+ gettimeofday(&start, NULL);
+
+ /* Kick them off */
+ if (write(wakefds[1], &dummy, 1) != 1)
+ err(EXIT_FAILURE, "Writing to start them");
+
+ /* Reap them all */
+ for (i = 0; i < total_children; i++)
+ reap_worker(pth_tab[i]);
+
+ gettimeofday(&stop, NULL);
+
+ timersub(&stop, &start, &diff);
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("# %d sender and receiver %s per group\n",
+ num_fds, thread_mode ? "threads" : "processes");
+ printf("# %d groups == %d %s run\n\n",
+ num_groups, num_groups * 2 * num_fds,
+ thread_mode ? "threads" : "processes");
+ printf(" %14s: %lu.%03lu [sec]\n", "Total time",
+ diff.tv_sec,
+ (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ printf("%lu.%03lu\n", diff.tv_sec,
+ (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+ break;
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ free(pth_tab);
+
+ return 0;
+}
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
new file mode 100644
index 000000000..0591be008
--- /dev/null
+++ b/tools/perf/bench/sched-pipe.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * sched-pipe.c
+ *
+ * pipe: Benchmark for pipe()
+ *
+ * Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com>
+ * http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
+ * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ */
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../builtin.h"
+#include "bench.h"
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <linux/time64.h>
+
+#include <pthread.h>
+
+struct thread_data {
+ int nr;
+ int pipe_read;
+ int pipe_write;
+ pthread_t pthread;
+};
+
+#define LOOPS_DEFAULT 1000000
+static int loops = LOOPS_DEFAULT;
+
+/* Use processes by default: */
+static bool threaded;
+
+static const struct option options[] = {
+ OPT_INTEGER('l', "loop", &loops, "Specify number of loops"),
+ OPT_BOOLEAN('T', "threaded", &threaded, "Specify threads/process based task setup"),
+ OPT_END()
+};
+
+static const char * const bench_sched_pipe_usage[] = {
+ "perf bench sched pipe <options>",
+ NULL
+};
+
+static void *worker_thread(void *__tdata)
+{
+ struct thread_data *td = __tdata;
+ int m = 0, i;
+ int ret;
+
+ for (i = 0; i < loops; i++) {
+ if (!td->nr) {
+ ret = read(td->pipe_read, &m, sizeof(int));
+ BUG_ON(ret != sizeof(int));
+ ret = write(td->pipe_write, &m, sizeof(int));
+ BUG_ON(ret != sizeof(int));
+ } else {
+ ret = write(td->pipe_write, &m, sizeof(int));
+ BUG_ON(ret != sizeof(int));
+ ret = read(td->pipe_read, &m, sizeof(int));
+ BUG_ON(ret != sizeof(int));
+ }
+ }
+
+ return NULL;
+}
+
+int bench_sched_pipe(int argc, const char **argv)
+{
+ struct thread_data threads[2], *td;
+ int pipe_1[2], pipe_2[2];
+ struct timeval start, stop, diff;
+ unsigned long long result_usec = 0;
+ int nr_threads = 2;
+ int t;
+
+ /*
+ * why does "ret" exist?
+ * discarding returned value of read(), write()
+ * causes error in building environment for perf
+ */
+ int __maybe_unused ret, wait_stat;
+ pid_t pid, retpid __maybe_unused;
+
+ argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0);
+
+ BUG_ON(pipe(pipe_1));
+ BUG_ON(pipe(pipe_2));
+
+ gettimeofday(&start, NULL);
+
+ for (t = 0; t < nr_threads; t++) {
+ td = threads + t;
+
+ td->nr = t;
+
+ if (t == 0) {
+ td->pipe_read = pipe_1[0];
+ td->pipe_write = pipe_2[1];
+ } else {
+ td->pipe_write = pipe_1[1];
+ td->pipe_read = pipe_2[0];
+ }
+ }
+
+
+ if (threaded) {
+
+ for (t = 0; t < nr_threads; t++) {
+ td = threads + t;
+
+ ret = pthread_create(&td->pthread, NULL, worker_thread, td);
+ BUG_ON(ret);
+ }
+
+ for (t = 0; t < nr_threads; t++) {
+ td = threads + t;
+
+ ret = pthread_join(td->pthread, NULL);
+ BUG_ON(ret);
+ }
+
+ } else {
+ pid = fork();
+ assert(pid >= 0);
+
+ if (!pid) {
+ worker_thread(threads + 0);
+ exit(0);
+ } else {
+ worker_thread(threads + 1);
+ }
+
+ retpid = waitpid(pid, &wait_stat, 0);
+ assert((retpid == pid) && WIFEXITED(wait_stat));
+ }
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("# Executed %d pipe operations between two %s\n\n",
+ loops, threaded ? "threads" : "processes");
+
+ result_usec = diff.tv_sec * USEC_PER_SEC;
+ result_usec += diff.tv_usec;
+
+ printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+ diff.tv_sec,
+ (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+
+ printf(" %14lf usecs/op\n",
+ (double)result_usec / (double)loops);
+ printf(" %14d ops/sec\n",
+ (int)((double)loops /
+ ((double)result_usec / (double)USEC_PER_SEC)));
+ break;
+
+ case BENCH_FORMAT_SIMPLE:
+ printf("%lu.%03lu\n",
+ diff.tv_sec,
+ (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+ break;
+
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
new file mode 100644
index 000000000..830481b8d
--- /dev/null
+++ b/tools/perf/builtin-annotate.c
@@ -0,0 +1,639 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-annotate.c
+ *
+ * Builtin annotate command: Analyze the perf.data input file,
+ * look up and read DSOs and symbol information and display
+ * a histogram of results, along various sorting keys.
+ */
+#include "builtin.h"
+
+#include "util/util.h"
+#include "util/color.h"
+#include <linux/list.h>
+#include "util/cache.h"
+#include <linux/rbtree.h>
+#include "util/symbol.h"
+
+#include "perf.h"
+#include "util/debug.h"
+
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/annotate.h"
+#include "util/event.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-events.h"
+#include "util/thread.h"
+#include "util/sort.h"
+#include "util/hist.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/data.h"
+#include "arch/common.h"
+#include "util/block-range.h"
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+
+struct perf_annotate {
+ struct perf_tool tool;
+ struct perf_session *session;
+ struct annotation_options opts;
+ bool use_tui, use_stdio, use_stdio2, use_gtk;
+ bool skip_missing;
+ bool has_br_stack;
+ bool group_set;
+ const char *sym_hist_filter;
+ const char *cpu_list;
+ DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+};
+
+/*
+ * Given one basic block:
+ *
+ * from to branch_i
+ * * ----> *
+ * |
+ * | block
+ * v
+ * * ----> *
+ * from to branch_i+1
+ *
+ * where the horizontal are the branches and the vertical is the executed
+ * block of instructions.
+ *
+ * We count, for each 'instruction', the number of blocks that covered it as
+ * well as count the ratio each branch is taken.
+ *
+ * We can do this without knowing the actual instruction stream by keeping
+ * track of the address ranges. We break down ranges such that there is no
+ * overlap and iterate from the start until the end.
+ *
+ * @acme: once we parse the objdump output _before_ processing the samples,
+ * we can easily fold the branch.cycles IPC bits in.
+ */
+static void process_basic_block(struct addr_map_symbol *start,
+ struct addr_map_symbol *end,
+ struct branch_flags *flags)
+{
+ struct symbol *sym = start->sym;
+ struct annotation *notes = sym ? symbol__annotation(sym) : NULL;
+ struct block_range_iter iter;
+ struct block_range *entry;
+
+ /*
+ * Sanity; NULL isn't executable and the CPU cannot execute backwards
+ */
+ if (!start->addr || start->addr > end->addr)
+ return;
+
+ iter = block_range__create(start->addr, end->addr);
+ if (!block_range_iter__valid(&iter))
+ return;
+
+ /*
+ * First block in range is a branch target.
+ */
+ entry = block_range_iter(&iter);
+ assert(entry->is_target);
+ entry->entry++;
+
+ do {
+ entry = block_range_iter(&iter);
+
+ entry->coverage++;
+ entry->sym = sym;
+
+ if (notes)
+ notes->max_coverage = max(notes->max_coverage, entry->coverage);
+
+ } while (block_range_iter__next(&iter));
+
+ /*
+ * Last block in rage is a branch.
+ */
+ entry = block_range_iter(&iter);
+ assert(entry->is_branch);
+ entry->taken++;
+ if (flags->predicted)
+ entry->pred++;
+}
+
+static void process_branch_stack(struct branch_stack *bs, struct addr_location *al,
+ struct perf_sample *sample)
+{
+ struct addr_map_symbol *prev = NULL;
+ struct branch_info *bi;
+ int i;
+
+ if (!bs || !bs->nr)
+ return;
+
+ bi = sample__resolve_bstack(sample, al);
+ if (!bi)
+ return;
+
+ for (i = bs->nr - 1; i >= 0; i--) {
+ /*
+ * XXX filter against symbol
+ */
+ if (prev)
+ process_basic_block(prev, &bi[i].from, &bi[i].flags);
+ prev = &bi[i].to;
+ }
+
+ free(bi);
+}
+
+static int hist_iter__branch_callback(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused,
+ bool single __maybe_unused,
+ void *arg __maybe_unused)
+{
+ struct hist_entry *he = iter->he;
+ struct branch_info *bi;
+ struct perf_sample *sample = iter->sample;
+ struct perf_evsel *evsel = iter->evsel;
+ int err;
+
+ hist__account_cycles(sample->branch_stack, al, sample, false);
+
+ bi = he->branch_info;
+ err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
+
+ if (err)
+ goto out;
+
+ err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
+
+out:
+ return err;
+}
+
+static int process_branch_callback(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct addr_location *al __maybe_unused,
+ struct perf_annotate *ann,
+ struct machine *machine)
+{
+ struct hist_entry_iter iter = {
+ .evsel = evsel,
+ .sample = sample,
+ .add_entry_cb = hist_iter__branch_callback,
+ .hide_unresolved = symbol_conf.hide_unresolved,
+ .ops = &hist_iter_branch,
+ };
+
+ struct addr_location a;
+ int ret;
+
+ if (machine__resolve(machine, &a, sample) < 0)
+ return -1;
+
+ if (a.sym == NULL)
+ return 0;
+
+ if (a.map != NULL)
+ a.map->dso->hit = 1;
+
+ ret = hist_entry_iter__add(&iter, &a, PERF_MAX_STACK_DEPTH, ann);
+ return ret;
+}
+
+static bool has_annotation(struct perf_annotate *ann)
+{
+ return ui__has_annotation() || ann->use_stdio2;
+}
+
+static int perf_evsel__add_sample(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct addr_location *al,
+ struct perf_annotate *ann,
+ struct machine *machine)
+{
+ struct hists *hists = evsel__hists(evsel);
+ struct hist_entry *he;
+ int ret;
+
+ if ((!ann->has_br_stack || !has_annotation(ann)) &&
+ ann->sym_hist_filter != NULL &&
+ (al->sym == NULL ||
+ strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
+ /* We're only interested in a symbol named sym_hist_filter */
+ /*
+ * FIXME: why isn't this done in the symbol_filter when loading
+ * the DSO?
+ */
+ if (al->sym != NULL) {
+ rb_erase(&al->sym->rb_node,
+ &al->map->dso->symbols);
+ symbol__delete(al->sym);
+ dso__reset_find_symbol_cache(al->map->dso);
+ }
+ return 0;
+ }
+
+ /*
+ * XXX filtered samples can still have branch entires pointing into our
+ * symbol and are missed.
+ */
+ process_branch_stack(sample->branch_stack, al, sample);
+
+ if (ann->has_br_stack && has_annotation(ann))
+ return process_branch_callback(evsel, sample, al, ann, machine);
+
+ he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
+ if (he == NULL)
+ return -ENOMEM;
+
+ ret = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
+ hists__inc_nr_samples(hists, true);
+ return ret;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
+ struct addr_location al;
+ int ret = 0;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_warning("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
+ goto out_put;
+
+ if (!al.filtered &&
+ perf_evsel__add_sample(evsel, sample, &al, ann, machine)) {
+ pr_warning("problem incrementing symbol count, "
+ "skipping event\n");
+ ret = -1;
+ }
+out_put:
+ addr_location__put(&al);
+ return ret;
+}
+
+static int process_feature_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ if (event->feat.feat_id < HEADER_LAST_FEATURE)
+ return perf_event__process_feature(tool, event, session);
+ return 0;
+}
+
+static int hist_entry__tty_annotate(struct hist_entry *he,
+ struct perf_evsel *evsel,
+ struct perf_annotate *ann)
+{
+ if (!ann->use_stdio2)
+ return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel, &ann->opts);
+
+ return symbol__tty_annotate2(he->ms.sym, he->ms.map, evsel, &ann->opts);
+}
+
+static void hists__find_annotations(struct hists *hists,
+ struct perf_evsel *evsel,
+ struct perf_annotate *ann)
+{
+ struct rb_node *nd = rb_first(&hists->entries), *next;
+ int key = K_RIGHT;
+
+ while (nd) {
+ struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
+ struct annotation *notes;
+
+ if (he->ms.sym == NULL || he->ms.map->dso->annotate_warned)
+ goto find_next;
+
+ if (ann->sym_hist_filter &&
+ (strcmp(he->ms.sym->name, ann->sym_hist_filter) != 0))
+ goto find_next;
+
+ notes = symbol__annotation(he->ms.sym);
+ if (notes->src == NULL) {
+find_next:
+ if (key == K_LEFT)
+ nd = rb_prev(nd);
+ else
+ nd = rb_next(nd);
+ continue;
+ }
+
+ if (use_browser == 2) {
+ int ret;
+ int (*annotate)(struct hist_entry *he,
+ struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt);
+
+ annotate = dlsym(perf_gtk_handle,
+ "hist_entry__gtk_annotate");
+ if (annotate == NULL) {
+ ui__error("GTK browser not found!\n");
+ return;
+ }
+
+ ret = annotate(he, evsel, NULL);
+ if (!ret || !ann->skip_missing)
+ return;
+
+ /* skip missing symbols */
+ nd = rb_next(nd);
+ } else if (use_browser == 1) {
+ key = hist_entry__tui_annotate(he, evsel, NULL, &ann->opts);
+
+ switch (key) {
+ case -1:
+ if (!ann->skip_missing)
+ return;
+ /* fall through */
+ case K_RIGHT:
+ next = rb_next(nd);
+ break;
+ case K_LEFT:
+ next = rb_prev(nd);
+ break;
+ default:
+ return;
+ }
+
+ if (next != NULL)
+ nd = next;
+ } else {
+ hist_entry__tty_annotate(he, evsel, ann);
+ nd = rb_next(nd);
+ /*
+ * Since we have a hist_entry per IP for the same
+ * symbol, free he->ms.sym->src to signal we already
+ * processed this symbol.
+ */
+ zfree(&notes->src->cycles_hist);
+ zfree(&notes->src);
+ }
+ }
+}
+
+static int __cmd_annotate(struct perf_annotate *ann)
+{
+ int ret;
+ struct perf_session *session = ann->session;
+ struct perf_evsel *pos;
+ u64 total_nr_samples;
+
+ if (ann->cpu_list) {
+ ret = perf_session__cpu_bitmap(session, ann->cpu_list,
+ ann->cpu_bitmap);
+ if (ret)
+ goto out;
+ }
+
+ if (!ann->opts.objdump_path) {
+ ret = perf_env__lookup_objdump(&session->header.env,
+ &ann->opts.objdump_path);
+ if (ret)
+ goto out;
+ }
+
+ ret = perf_session__process_events(session);
+ if (ret)
+ goto out;
+
+ if (dump_trace) {
+ perf_session__fprintf_nr_events(session, stdout);
+ perf_evlist__fprintf_nr_events(session->evlist, stdout);
+ goto out;
+ }
+
+ if (verbose > 3)
+ perf_session__fprintf(session, stdout);
+
+ if (verbose > 2)
+ perf_session__fprintf_dsos(session, stdout);
+
+ total_nr_samples = 0;
+ evlist__for_each_entry(session->evlist, pos) {
+ struct hists *hists = evsel__hists(pos);
+ u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
+
+ if (nr_samples > 0) {
+ total_nr_samples += nr_samples;
+ hists__collapse_resort(hists, NULL);
+ /* Don't sort callchain */
+ perf_evsel__reset_sample_bit(pos, CALLCHAIN);
+ perf_evsel__output_resort(pos, NULL);
+
+ if (symbol_conf.event_group &&
+ !perf_evsel__is_group_leader(pos))
+ continue;
+
+ hists__find_annotations(hists, pos, ann);
+ }
+ }
+
+ if (total_nr_samples == 0) {
+ ui__error("The %s file has no samples!\n", session->data->file.path);
+ goto out;
+ }
+
+ if (use_browser == 2) {
+ void (*show_annotations)(void);
+
+ show_annotations = dlsym(perf_gtk_handle,
+ "perf_gtk__show_annotations");
+ if (show_annotations == NULL) {
+ ui__error("GTK browser not found!\n");
+ goto out;
+ }
+ show_annotations();
+ }
+
+out:
+ return ret;
+}
+
+static const char * const annotate_usage[] = {
+ "perf annotate [<options>]",
+ NULL
+};
+
+int cmd_annotate(int argc, const char **argv)
+{
+ struct perf_annotate annotate = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
+ .namespaces = perf_event__process_namespaces,
+ .attr = perf_event__process_attr,
+ .build_id = perf_event__process_build_id,
+ .tracing_data = perf_event__process_tracing_data,
+ .feature = process_feature_event,
+ .ordered_events = true,
+ .ordering_requires_timestamps = true,
+ },
+ .opts = annotation__default_options,
+ };
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_READ,
+ };
+ struct option options[] = {
+ OPT_STRING('i', "input", &input_name, "file",
+ "input file name"),
+ OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
+ "only consider symbols in these dsos"),
+ OPT_STRING('s', "symbol", &annotate.sym_hist_filter, "symbol",
+ "symbol to annotate"),
+ OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show symbol address, etc)"),
+ OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"),
+ OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+ "dump raw trace in ASCII"),
+ OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
+ OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
+ OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
+ OPT_BOOLEAN(0, "stdio2", &annotate.use_stdio2, "Use the stdio interface"),
+ OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
+ "don't load vmlinux even if found"),
+ OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
+ OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
+ "load module symbols - WARNING: use only with -k and LIVE kernel"),
+ OPT_BOOLEAN('l', "print-line", &annotate.opts.print_lines,
+ "print matching source lines (may be slow)"),
+ OPT_BOOLEAN('P', "full-paths", &annotate.opts.full_path,
+ "Don't shorten the displayed pathnames"),
+ OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
+ "Skip symbols that cannot be annotated"),
+ OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group,
+ &annotate.group_set,
+ "Show event group information together"),
+ OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
+ OPT_CALLBACK(0, "symfs", NULL, "directory",
+ "Look for files with symbols relative to this directory",
+ symbol__config_symfs),
+ OPT_BOOLEAN(0, "source", &annotate.opts.annotate_src,
+ "Interleave source code with assembly code (default)"),
+ OPT_BOOLEAN(0, "asm-raw", &annotate.opts.show_asm_raw,
+ "Display raw encoding of assembly instructions (default)"),
+ OPT_STRING('M', "disassembler-style", &annotate.opts.disassembler_style, "disassembler style",
+ "Specify disassembler style (e.g. -M intel for intel syntax)"),
+ OPT_STRING(0, "objdump", &annotate.opts.objdump_path, "path",
+ "objdump binary to use for disassembly and annotations"),
+ OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
+ "Show event group information together"),
+ OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
+ "Show a column with the sum of periods"),
+ OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
+ "Show a column with the number of samples"),
+ OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
+ "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
+ stdio__config_color, "always"),
+ OPT_CALLBACK(0, "percent-type", &annotate.opts, "local-period",
+ "Set percent type local/global-period/hits",
+ annotate_parse_percent_type),
+
+ OPT_END()
+ };
+ int ret;
+
+ set_option_flag(options, 0, "show-total-period", PARSE_OPT_EXCLUSIVE);
+ set_option_flag(options, 0, "show-nr-samples", PARSE_OPT_EXCLUSIVE);
+
+
+ ret = hists__init();
+ if (ret < 0)
+ return ret;
+
+ argc = parse_options(argc, argv, options, annotate_usage, 0);
+ if (argc) {
+ /*
+ * Special case: if there's an argument left then assume that
+ * it's a symbol filter:
+ */
+ if (argc > 1)
+ usage_with_options(annotate_usage, options);
+
+ annotate.sym_hist_filter = argv[0];
+ }
+
+ if (symbol_conf.show_nr_samples && annotate.use_gtk) {
+ pr_err("--show-nr-samples is not available in --gtk mode at this time\n");
+ return ret;
+ }
+
+ if (quiet)
+ perf_quiet_option();
+
+ data.file.path = input_name;
+
+ annotate.session = perf_session__new(&data, false, &annotate.tool);
+ if (annotate.session == NULL)
+ return -1;
+
+ annotate.has_br_stack = perf_header__has_feat(&annotate.session->header,
+ HEADER_BRANCH_STACK);
+
+ if (annotate.group_set)
+ perf_evlist__force_leader(annotate.session->evlist);
+
+ ret = symbol__annotation_init();
+ if (ret < 0)
+ goto out_delete;
+
+ annotation_config__init();
+
+ symbol_conf.try_vmlinux_path = true;
+
+ ret = symbol__init(&annotate.session->header.env);
+ if (ret < 0)
+ goto out_delete;
+
+ if (annotate.use_stdio || annotate.use_stdio2)
+ use_browser = 0;
+ else if (annotate.use_tui)
+ use_browser = 1;
+ else if (annotate.use_gtk)
+ use_browser = 2;
+
+ setup_browser(true);
+
+ if ((use_browser == 1 || annotate.use_stdio2) && annotate.has_br_stack) {
+ sort__mode = SORT_MODE__BRANCH;
+ if (setup_sorting(annotate.session->evlist) < 0)
+ usage_with_options(annotate_usage, options);
+ } else {
+ if (setup_sorting(NULL) < 0)
+ usage_with_options(annotate_usage, options);
+ }
+
+ ret = __cmd_annotate(&annotate);
+
+out_delete:
+ /*
+ * Speed up the exit process, for large files this can
+ * take quite a while.
+ *
+ * XXX Enable this when using valgrind or if we ever
+ * librarize this command.
+ *
+ * Also experiment with obstacks to see how much speed
+ * up we'll get here.
+ *
+ * perf_session__delete(session);
+ */
+ return ret;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
new file mode 100644
index 000000000..17a6bcd01
--- /dev/null
+++ b/tools/perf/builtin-bench.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-bench.c
+ *
+ * General benchmarking collections provided by perf
+ *
+ * Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ */
+
+/*
+ * Available benchmark collection list:
+ *
+ * sched ... scheduler and IPC performance
+ * mem ... memory access performance
+ * numa ... NUMA scheduling and MM performance
+ * futex ... Futex performance
+ */
+#include "perf.h"
+#include "util/util.h"
+#include <subcmd/parse-options.h>
+#include "builtin.h"
+#include "bench/bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+typedef int (*bench_fn_t)(int argc, const char **argv);
+
+struct bench {
+ const char *name;
+ const char *summary;
+ bench_fn_t fn;
+};
+
+#ifdef HAVE_LIBNUMA_SUPPORT
+static struct bench numa_benchmarks[] = {
+ { "mem", "Benchmark for NUMA workloads", bench_numa },
+ { "all", "Run all NUMA benchmarks", NULL },
+ { NULL, NULL, NULL }
+};
+#endif
+
+static struct bench sched_benchmarks[] = {
+ { "messaging", "Benchmark for scheduling and IPC", bench_sched_messaging },
+ { "pipe", "Benchmark for pipe() between two processes", bench_sched_pipe },
+ { "all", "Run all scheduler benchmarks", NULL },
+ { NULL, NULL, NULL }
+};
+
+static struct bench mem_benchmarks[] = {
+ { "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy },
+ { "memset", "Benchmark for memset() functions", bench_mem_memset },
+ { "all", "Run all memory access benchmarks", NULL },
+ { NULL, NULL, NULL }
+};
+
+static struct bench futex_benchmarks[] = {
+ { "hash", "Benchmark for futex hash table", bench_futex_hash },
+ { "wake", "Benchmark for futex wake calls", bench_futex_wake },
+ { "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel },
+ { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue },
+ /* pi-futexes */
+ { "lock-pi", "Benchmark for futex lock_pi calls", bench_futex_lock_pi },
+ { "all", "Run all futex benchmarks", NULL },
+ { NULL, NULL, NULL }
+};
+
+struct collection {
+ const char *name;
+ const char *summary;
+ struct bench *benchmarks;
+};
+
+static struct collection collections[] = {
+ { "sched", "Scheduler and IPC benchmarks", sched_benchmarks },
+ { "mem", "Memory access benchmarks", mem_benchmarks },
+#ifdef HAVE_LIBNUMA_SUPPORT
+ { "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks },
+#endif
+ {"futex", "Futex stressing benchmarks", futex_benchmarks },
+ { "all", "All benchmarks", NULL },
+ { NULL, NULL, NULL }
+};
+
+/* Iterate over all benchmark collections: */
+#define for_each_collection(coll) \
+ for (coll = collections; coll->name; coll++)
+
+/* Iterate over all benchmarks within a collection: */
+#define for_each_bench(coll, bench) \
+ for (bench = coll->benchmarks; bench && bench->name; bench++)
+
+static void dump_benchmarks(struct collection *coll)
+{
+ struct bench *bench;
+
+ printf("\n # List of available benchmarks for collection '%s':\n\n", coll->name);
+
+ for_each_bench(coll, bench)
+ printf("%14s: %s\n", bench->name, bench->summary);
+
+ printf("\n");
+}
+
+static const char *bench_format_str;
+
+/* Output/formatting style, exported to benchmark modules: */
+int bench_format = BENCH_FORMAT_DEFAULT;
+unsigned int bench_repeat = 10; /* default number of times to repeat the run */
+
+static const struct option bench_options[] = {
+ OPT_STRING('f', "format", &bench_format_str, "default|simple", "Specify the output formatting style"),
+ OPT_UINTEGER('r', "repeat", &bench_repeat, "Specify amount of times to repeat the run"),
+ OPT_END()
+};
+
+static const char * const bench_usage[] = {
+ "perf bench [<common options>] <collection> <benchmark> [<options>]",
+ NULL
+};
+
+static void print_usage(void)
+{
+ struct collection *coll;
+ int i;
+
+ printf("Usage: \n");
+ for (i = 0; bench_usage[i]; i++)
+ printf("\t%s\n", bench_usage[i]);
+ printf("\n");
+
+ printf(" # List of all available benchmark collections:\n\n");
+
+ for_each_collection(coll)
+ printf("%14s: %s\n", coll->name, coll->summary);
+ printf("\n");
+}
+
+static int bench_str2int(const char *str)
+{
+ if (!str)
+ return BENCH_FORMAT_DEFAULT;
+
+ if (!strcmp(str, BENCH_FORMAT_DEFAULT_STR))
+ return BENCH_FORMAT_DEFAULT;
+ else if (!strcmp(str, BENCH_FORMAT_SIMPLE_STR))
+ return BENCH_FORMAT_SIMPLE;
+
+ return BENCH_FORMAT_UNKNOWN;
+}
+
+/*
+ * Run a specific benchmark but first rename the running task's ->comm[]
+ * to something meaningful:
+ */
+static int run_bench(const char *coll_name, const char *bench_name, bench_fn_t fn,
+ int argc, const char **argv)
+{
+ int size;
+ char *name;
+ int ret;
+
+ size = strlen(coll_name) + 1 + strlen(bench_name) + 1;
+
+ name = zalloc(size);
+ BUG_ON(!name);
+
+ scnprintf(name, size, "%s-%s", coll_name, bench_name);
+
+ prctl(PR_SET_NAME, name);
+ argv[0] = name;
+
+ ret = fn(argc, argv);
+
+ free(name);
+
+ return ret;
+}
+
+static void run_collection(struct collection *coll)
+{
+ struct bench *bench;
+ const char *argv[2];
+
+ argv[1] = NULL;
+ /*
+ * TODO:
+ *
+ * Preparing preset parameters for
+ * embedded, ordinary PC, HPC, etc...
+ * would be helpful.
+ */
+ for_each_bench(coll, bench) {
+ if (!bench->fn)
+ break;
+ printf("# Running %s/%s benchmark...\n", coll->name, bench->name);
+ fflush(stdout);
+
+ argv[1] = bench->name;
+ run_bench(coll->name, bench->name, bench->fn, 1, argv);
+ printf("\n");
+ }
+}
+
+static void run_all_collections(void)
+{
+ struct collection *coll;
+
+ for_each_collection(coll)
+ run_collection(coll);
+}
+
+int cmd_bench(int argc, const char **argv)
+{
+ struct collection *coll;
+ int ret = 0;
+
+ if (argc < 2) {
+ /* No collection specified. */
+ print_usage();
+ goto end;
+ }
+
+ argc = parse_options(argc, argv, bench_options, bench_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ bench_format = bench_str2int(bench_format_str);
+ if (bench_format == BENCH_FORMAT_UNKNOWN) {
+ printf("Unknown format descriptor: '%s'\n", bench_format_str);
+ goto end;
+ }
+
+ if (bench_repeat == 0) {
+ printf("Invalid repeat option: Must specify a positive value\n");
+ goto end;
+ }
+
+ if (argc < 1) {
+ print_usage();
+ goto end;
+ }
+
+ if (!strcmp(argv[0], "all")) {
+ run_all_collections();
+ goto end;
+ }
+
+ for_each_collection(coll) {
+ struct bench *bench;
+
+ if (strcmp(coll->name, argv[0]))
+ continue;
+
+ if (argc < 2) {
+ /* No bench specified. */
+ dump_benchmarks(coll);
+ goto end;
+ }
+
+ if (!strcmp(argv[1], "all")) {
+ run_collection(coll);
+ goto end;
+ }
+
+ for_each_bench(coll, bench) {
+ if (strcmp(bench->name, argv[1]))
+ continue;
+
+ if (bench_format == BENCH_FORMAT_DEFAULT)
+ printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name);
+ fflush(stdout);
+ ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1);
+ goto end;
+ }
+
+ if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
+ dump_benchmarks(coll);
+ goto end;
+ }
+
+ printf("Unknown benchmark: '%s' for collection '%s'\n", argv[1], argv[0]);
+ ret = 1;
+ goto end;
+ }
+
+ printf("Unknown collection: '%s'\n", argv[0]);
+ ret = 1;
+
+end:
+ return ret;
+}
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
new file mode 100644
index 000000000..115110a47
--- /dev/null
+++ b/tools/perf/builtin-buildid-cache.c
@@ -0,0 +1,527 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-buildid-cache.c
+ *
+ * Builtin buildid-cache command: Manages build-id cache
+ *
+ * Copyright (C) 2010, Red Hat Inc.
+ * Copyright (C) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+#include <dirent.h>
+#include <errno.h>
+#include <unistd.h>
+#include "builtin.h"
+#include "perf.h"
+#include "namespaces.h"
+#include "util/cache.h"
+#include "util/debug.h"
+#include "util/header.h"
+#include <subcmd/parse-options.h>
+#include "util/strlist.h"
+#include "util/build-id.h"
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/time-utils.h"
+#include "util/probe-file.h"
+
+static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
+{
+ char root_dir[PATH_MAX];
+ char *p;
+
+ strlcpy(root_dir, proc_dir, sizeof(root_dir));
+
+ p = strrchr(root_dir, '/');
+ if (!p)
+ return -1;
+ *p = '\0';
+ return sysfs__sprintf_build_id(root_dir, sbuildid);
+}
+
+static int build_id_cache__kcore_dir(char *dir, size_t sz)
+{
+ return fetch_current_timestamp(dir, sz);
+}
+
+static bool same_kallsyms_reloc(const char *from_dir, char *to_dir)
+{
+ char from[PATH_MAX];
+ char to[PATH_MAX];
+ const char *name;
+ u64 addr1 = 0, addr2 = 0;
+ int i, err = -1;
+
+ scnprintf(from, sizeof(from), "%s/kallsyms", from_dir);
+ scnprintf(to, sizeof(to), "%s/kallsyms", to_dir);
+
+ for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
+ err = kallsyms__get_function_start(from, name, &addr1);
+ if (!err)
+ break;
+ }
+
+ if (err)
+ return false;
+
+ if (kallsyms__get_function_start(to, name, &addr2))
+ return false;
+
+ return addr1 == addr2;
+}
+
+static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir,
+ size_t to_dir_sz)
+{
+ char from[PATH_MAX];
+ char to[PATH_MAX];
+ char to_subdir[PATH_MAX];
+ struct dirent *dent;
+ int ret = -1;
+ DIR *d;
+
+ d = opendir(to_dir);
+ if (!d)
+ return -1;
+
+ scnprintf(from, sizeof(from), "%s/modules", from_dir);
+
+ while (1) {
+ dent = readdir(d);
+ if (!dent)
+ break;
+ if (dent->d_type != DT_DIR)
+ continue;
+ scnprintf(to, sizeof(to), "%s/%s/modules", to_dir,
+ dent->d_name);
+ scnprintf(to_subdir, sizeof(to_subdir), "%s/%s",
+ to_dir, dent->d_name);
+ if (!compare_proc_modules(from, to) &&
+ same_kallsyms_reloc(from_dir, to_subdir)) {
+ strlcpy(to_dir, to_subdir, to_dir_sz);
+ ret = 0;
+ break;
+ }
+ }
+
+ closedir(d);
+
+ return ret;
+}
+
+static int build_id_cache__add_kcore(const char *filename, bool force)
+{
+ char dir[32], sbuildid[SBUILD_ID_SIZE];
+ char from_dir[PATH_MAX], to_dir[PATH_MAX];
+ char *p;
+
+ strlcpy(from_dir, filename, sizeof(from_dir));
+
+ p = strrchr(from_dir, '/');
+ if (!p || strcmp(p + 1, "kcore"))
+ return -1;
+ *p = '\0';
+
+ if (build_id_cache__kcore_buildid(from_dir, sbuildid) < 0)
+ return -1;
+
+ scnprintf(to_dir, sizeof(to_dir), "%s/%s/%s",
+ buildid_dir, DSO__NAME_KCORE, sbuildid);
+
+ if (!force &&
+ !build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) {
+ pr_debug("same kcore found in %s\n", to_dir);
+ return 0;
+ }
+
+ if (build_id_cache__kcore_dir(dir, sizeof(dir)))
+ return -1;
+
+ scnprintf(to_dir, sizeof(to_dir), "%s/%s/%s/%s",
+ buildid_dir, DSO__NAME_KCORE, sbuildid, dir);
+
+ if (mkdir_p(to_dir, 0755))
+ return -1;
+
+ if (kcore_copy(from_dir, to_dir)) {
+ /* Remove YYYYmmddHHMMSShh directory */
+ if (!rmdir(to_dir)) {
+ p = strrchr(to_dir, '/');
+ if (p)
+ *p = '\0';
+ /* Try to remove buildid directory */
+ if (!rmdir(to_dir)) {
+ p = strrchr(to_dir, '/');
+ if (p)
+ *p = '\0';
+ /* Try to remove [kernel.kcore] directory */
+ rmdir(to_dir);
+ }
+ }
+ return -1;
+ }
+
+ pr_debug("kcore added to build-id cache directory %s\n", to_dir);
+
+ return 0;
+}
+
+static int build_id_cache__add_file(const char *filename, struct nsinfo *nsi)
+{
+ char sbuild_id[SBUILD_ID_SIZE];
+ u8 build_id[BUILD_ID_SIZE];
+ int err;
+ struct nscookie nsc;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+ nsinfo__mountns_exit(&nsc);
+ if (err < 0) {
+ pr_debug("Couldn't read a build-id in %s\n", filename);
+ return -1;
+ }
+
+ build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ err = build_id_cache__add_s(sbuild_id, filename, nsi,
+ false, false);
+ pr_debug("Adding %s %s: %s\n", sbuild_id, filename,
+ err ? "FAIL" : "Ok");
+ return err;
+}
+
+static int build_id_cache__remove_file(const char *filename, struct nsinfo *nsi)
+{
+ u8 build_id[BUILD_ID_SIZE];
+ char sbuild_id[SBUILD_ID_SIZE];
+ struct nscookie nsc;
+
+ int err;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+ nsinfo__mountns_exit(&nsc);
+ if (err < 0) {
+ pr_debug("Couldn't read a build-id in %s\n", filename);
+ return -1;
+ }
+
+ build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ err = build_id_cache__remove_s(sbuild_id);
+ pr_debug("Removing %s %s: %s\n", sbuild_id, filename,
+ err ? "FAIL" : "Ok");
+
+ return err;
+}
+
+static int build_id_cache__purge_path(const char *pathname, struct nsinfo *nsi)
+{
+ struct strlist *list;
+ struct str_node *pos;
+ int err;
+
+ err = build_id_cache__list_build_ids(pathname, nsi, &list);
+ if (err)
+ goto out;
+
+ strlist__for_each_entry(pos, list) {
+ err = build_id_cache__remove_s(pos->s);
+ pr_debug("Removing %s %s: %s\n", pos->s, pathname,
+ err ? "FAIL" : "Ok");
+ if (err)
+ break;
+ }
+ strlist__delete(list);
+
+out:
+ pr_debug("Purging %s: %s\n", pathname, err ? "FAIL" : "Ok");
+
+ return err;
+}
+
+static int build_id_cache__purge_all(void)
+{
+ struct strlist *list;
+ struct str_node *pos;
+ int err = 0;
+ char *buf;
+
+ list = build_id_cache__list_all(false);
+ if (!list) {
+ pr_debug("Failed to get buildids: -%d\n", errno);
+ return -EINVAL;
+ }
+
+ strlist__for_each_entry(pos, list) {
+ buf = build_id_cache__origname(pos->s);
+ err = build_id_cache__remove_s(pos->s);
+ pr_debug("Removing %s (%s): %s\n", buf, pos->s,
+ err ? "FAIL" : "Ok");
+ free(buf);
+ if (err)
+ break;
+ }
+ strlist__delete(list);
+
+ pr_debug("Purged all: %s\n", err ? "FAIL" : "Ok");
+ return err;
+}
+
+static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
+{
+ char filename[PATH_MAX];
+ u8 build_id[BUILD_ID_SIZE];
+
+ if (dso__build_id_filename(dso, filename, sizeof(filename), false) &&
+ filename__read_build_id(filename, build_id,
+ sizeof(build_id)) != sizeof(build_id)) {
+ if (errno == ENOENT)
+ return false;
+
+ pr_warning("Problems with %s file, consider removing it from the cache\n",
+ filename);
+ } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) {
+ pr_warning("Problems with %s file, consider removing it from the cache\n",
+ filename);
+ }
+
+ return true;
+}
+
+static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *fp)
+{
+ perf_session__fprintf_dsos_buildid(session, fp, dso__missing_buildid_cache, 0);
+ return 0;
+}
+
+static int build_id_cache__update_file(const char *filename, struct nsinfo *nsi)
+{
+ u8 build_id[BUILD_ID_SIZE];
+ char sbuild_id[SBUILD_ID_SIZE];
+ struct nscookie nsc;
+
+ int err;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+ nsinfo__mountns_exit(&nsc);
+ if (err < 0) {
+ pr_debug("Couldn't read a build-id in %s\n", filename);
+ return -1;
+ }
+ err = 0;
+
+ build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ if (build_id_cache__cached(sbuild_id))
+ err = build_id_cache__remove_s(sbuild_id);
+
+ if (!err)
+ err = build_id_cache__add_s(sbuild_id, filename, nsi, false,
+ false);
+
+ pr_debug("Updating %s %s: %s\n", sbuild_id, filename,
+ err ? "FAIL" : "Ok");
+
+ return err;
+}
+
+static int build_id_cache__show_all(void)
+{
+ struct strlist *bidlist;
+ struct str_node *nd;
+ char *buf;
+
+ bidlist = build_id_cache__list_all(true);
+ if (!bidlist) {
+ pr_debug("Failed to get buildids: -%d\n", errno);
+ return -1;
+ }
+ strlist__for_each_entry(nd, bidlist) {
+ buf = build_id_cache__origname(nd->s);
+ fprintf(stdout, "%s %s\n", nd->s, buf);
+ free(buf);
+ }
+ strlist__delete(bidlist);
+ return 0;
+}
+
+int cmd_buildid_cache(int argc, const char **argv)
+{
+ struct strlist *list;
+ struct str_node *pos;
+ int ret = 0;
+ int ns_id = -1;
+ bool force = false;
+ bool list_files = false;
+ bool opts_flag = false;
+ bool purge_all = false;
+ char const *add_name_list_str = NULL,
+ *remove_name_list_str = NULL,
+ *purge_name_list_str = NULL,
+ *missing_filename = NULL,
+ *update_name_list_str = NULL,
+ *kcore_filename = NULL;
+ char sbuf[STRERR_BUFSIZE];
+
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_READ,
+ };
+ struct perf_session *session = NULL;
+ struct nsinfo *nsi = NULL;
+
+ const struct option buildid_cache_options[] = {
+ OPT_STRING('a', "add", &add_name_list_str,
+ "file list", "file(s) to add"),
+ OPT_STRING('k', "kcore", &kcore_filename,
+ "file", "kcore file to add"),
+ OPT_STRING('r', "remove", &remove_name_list_str, "file list",
+ "file(s) to remove"),
+ OPT_STRING('p', "purge", &purge_name_list_str, "file list",
+ "file(s) to remove (remove old caches too)"),
+ OPT_BOOLEAN('P', "purge-all", &purge_all, "purge all cached files"),
+ OPT_BOOLEAN('l', "list", &list_files, "list all cached files"),
+ OPT_STRING('M', "missing", &missing_filename, "file",
+ "to find missing build ids in the cache"),
+ OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+ OPT_STRING('u', "update", &update_name_list_str, "file list",
+ "file(s) to update"),
+ OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+ OPT_INTEGER(0, "target-ns", &ns_id, "target pid for namespace context"),
+ OPT_END()
+ };
+ const char * const buildid_cache_usage[] = {
+ "perf buildid-cache [<options>]",
+ NULL
+ };
+
+ argc = parse_options(argc, argv, buildid_cache_options,
+ buildid_cache_usage, 0);
+
+ opts_flag = add_name_list_str || kcore_filename ||
+ remove_name_list_str || purge_name_list_str ||
+ missing_filename || update_name_list_str ||
+ purge_all;
+
+ if (argc || !(list_files || opts_flag))
+ usage_with_options(buildid_cache_usage, buildid_cache_options);
+
+ /* -l is exclusive. It can not be used with other options. */
+ if (list_files && opts_flag) {
+ usage_with_options_msg(buildid_cache_usage,
+ buildid_cache_options, "-l is exclusive.\n");
+ }
+
+ if (ns_id > 0)
+ nsi = nsinfo__new(ns_id);
+
+ if (missing_filename) {
+ data.file.path = missing_filename;
+ data.force = force;
+
+ session = perf_session__new(&data, false, NULL);
+ if (session == NULL)
+ return -1;
+ }
+
+ if (symbol__init(session ? &session->header.env : NULL) < 0)
+ goto out;
+
+ setup_pager();
+
+ if (list_files) {
+ ret = build_id_cache__show_all();
+ goto out;
+ }
+
+ if (add_name_list_str) {
+ list = strlist__new(add_name_list_str, NULL);
+ if (list) {
+ strlist__for_each_entry(pos, list)
+ if (build_id_cache__add_file(pos->s, nsi)) {
+ if (errno == EEXIST) {
+ pr_debug("%s already in the cache\n",
+ pos->s);
+ continue;
+ }
+ pr_warning("Couldn't add %s: %s\n",
+ pos->s, str_error_r(errno, sbuf, sizeof(sbuf)));
+ }
+
+ strlist__delete(list);
+ }
+ }
+
+ if (remove_name_list_str) {
+ list = strlist__new(remove_name_list_str, NULL);
+ if (list) {
+ strlist__for_each_entry(pos, list)
+ if (build_id_cache__remove_file(pos->s, nsi)) {
+ if (errno == ENOENT) {
+ pr_debug("%s wasn't in the cache\n",
+ pos->s);
+ continue;
+ }
+ pr_warning("Couldn't remove %s: %s\n",
+ pos->s, str_error_r(errno, sbuf, sizeof(sbuf)));
+ }
+
+ strlist__delete(list);
+ }
+ }
+
+ if (purge_name_list_str) {
+ list = strlist__new(purge_name_list_str, NULL);
+ if (list) {
+ strlist__for_each_entry(pos, list)
+ if (build_id_cache__purge_path(pos->s, nsi)) {
+ if (errno == ENOENT) {
+ pr_debug("%s wasn't in the cache\n",
+ pos->s);
+ continue;
+ }
+ pr_warning("Couldn't remove %s: %s\n",
+ pos->s, str_error_r(errno, sbuf, sizeof(sbuf)));
+ }
+
+ strlist__delete(list);
+ }
+ }
+
+ if (purge_all) {
+ if (build_id_cache__purge_all()) {
+ pr_warning("Couldn't remove some caches. Error: %s.\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ }
+ }
+
+ if (missing_filename)
+ ret = build_id_cache__fprintf_missing(session, stdout);
+
+ if (update_name_list_str) {
+ list = strlist__new(update_name_list_str, NULL);
+ if (list) {
+ strlist__for_each_entry(pos, list)
+ if (build_id_cache__update_file(pos->s, nsi)) {
+ if (errno == ENOENT) {
+ pr_debug("%s wasn't in the cache\n",
+ pos->s);
+ continue;
+ }
+ pr_warning("Couldn't update %s: %s\n",
+ pos->s, str_error_r(errno, sbuf, sizeof(sbuf)));
+ }
+
+ strlist__delete(list);
+ }
+ }
+
+ if (kcore_filename && build_id_cache__add_kcore(kcore_filename, force))
+ pr_warning("Couldn't add %s\n", kcore_filename);
+
+out:
+ perf_session__delete(session);
+ nsinfo__zput(nsi);
+
+ return ret;
+}
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
new file mode 100644
index 000000000..78abbe8d9
--- /dev/null
+++ b/tools/perf/builtin-buildid-list.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-buildid-list.c
+ *
+ * Builtin buildid-list command: list buildids in perf.data, in the running
+ * kernel and in ELF files.
+ *
+ * Copyright (C) 2009, Red Hat Inc.
+ * Copyright (C) 2009, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "builtin.h"
+#include "perf.h"
+#include "util/build-id.h"
+#include "util/cache.h"
+#include "util/debug.h"
+#include <subcmd/parse-options.h>
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/data.h"
+#include <errno.h>
+
+static int sysfs__fprintf_build_id(FILE *fp)
+{
+ char sbuild_id[SBUILD_ID_SIZE];
+ int ret;
+
+ ret = sysfs__sprintf_build_id("/", sbuild_id);
+ if (ret != sizeof(sbuild_id))
+ return ret < 0 ? ret : -EINVAL;
+
+ return fprintf(fp, "%s\n", sbuild_id);
+}
+
+static int filename__fprintf_build_id(const char *name, FILE *fp)
+{
+ char sbuild_id[SBUILD_ID_SIZE];
+ int ret;
+
+ ret = filename__sprintf_build_id(name, sbuild_id);
+ if (ret != sizeof(sbuild_id))
+ return ret < 0 ? ret : -EINVAL;
+
+ return fprintf(fp, "%s\n", sbuild_id);
+}
+
+static bool dso__skip_buildid(struct dso *dso, int with_hits)
+{
+ return with_hits && !dso->hit;
+}
+
+static int perf_session__list_build_ids(bool force, bool with_hits)
+{
+ struct perf_session *session;
+ struct perf_data data = {
+ .file = {
+ .path = input_name,
+ },
+ .mode = PERF_DATA_MODE_READ,
+ .force = force,
+ };
+
+ symbol__elf_init();
+ /*
+ * See if this is an ELF file first:
+ */
+ if (filename__fprintf_build_id(input_name, stdout) > 0)
+ goto out;
+
+ session = perf_session__new(&data, false, &build_id__mark_dso_hit_ops);
+ if (session == NULL)
+ return -1;
+
+ /*
+ * We take all buildids when the file contains AUX area tracing data
+ * because we do not decode the trace because it would take too long.
+ */
+ if (!perf_data__is_pipe(&data) &&
+ perf_header__has_feat(&session->header, HEADER_AUXTRACE))
+ with_hits = false;
+
+ /*
+ * in pipe-mode, the only way to get the buildids is to parse
+ * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
+ */
+ if (with_hits || perf_data__is_pipe(&data))
+ perf_session__process_events(session);
+
+ perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits);
+ perf_session__delete(session);
+out:
+ return 0;
+}
+
+int cmd_buildid_list(int argc, const char **argv)
+{
+ bool show_kernel = false;
+ bool with_hits = false;
+ bool force = false;
+ const struct option options[] = {
+ OPT_BOOLEAN('H', "with-hits", &with_hits, "Show only DSOs with hits"),
+ OPT_STRING('i', "input", &input_name, "file", "input file name"),
+ OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+ OPT_BOOLEAN('k', "kernel", &show_kernel, "Show current kernel build id"),
+ OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+ OPT_END()
+ };
+ const char * const buildid_list_usage[] = {
+ "perf buildid-list [<options>]",
+ NULL
+ };
+
+ argc = parse_options(argc, argv, options, buildid_list_usage, 0);
+ setup_pager();
+
+ if (show_kernel)
+ return !(sysfs__fprintf_build_id(stdout) > 0);
+
+ return perf_session__list_build_ids(force, with_hits);
+}
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
new file mode 100644
index 000000000..f3c142bd1
--- /dev/null
+++ b/tools/perf/builtin-c2c.c
@@ -0,0 +1,2987 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is rewrite of original c2c tool introduced in here:
+ * http://lwn.net/Articles/588866/
+ *
+ * The original tool was changed to fit in current perf state.
+ *
+ * Original authors:
+ * Don Zickus <dzickus@redhat.com>
+ * Dick Fowles <fowles@inreach.com>
+ * Joe Mario <jmario@redhat.com>
+ */
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/stringify.h>
+#include <asm/bug.h>
+#include <sys/param.h>
+#include "util.h"
+#include "debug.h"
+#include "builtin.h"
+#include <subcmd/parse-options.h>
+#include "mem-events.h"
+#include "session.h"
+#include "hist.h"
+#include "sort.h"
+#include "tool.h"
+#include "data.h"
+#include "event.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "ui/browsers/hists.h"
+#include "thread.h"
+#include "mem2node.h"
+
+struct c2c_hists {
+ struct hists hists;
+ struct perf_hpp_list list;
+ struct c2c_stats stats;
+};
+
+struct compute_stats {
+ struct stats lcl_hitm;
+ struct stats rmt_hitm;
+ struct stats load;
+};
+
+struct c2c_hist_entry {
+ struct c2c_hists *hists;
+ struct c2c_stats stats;
+ unsigned long *cpuset;
+ unsigned long *nodeset;
+ struct c2c_stats *node_stats;
+ unsigned int cacheline_idx;
+
+ struct compute_stats cstats;
+
+ unsigned long paddr;
+ unsigned long paddr_cnt;
+ bool paddr_zero;
+ char *nodestr;
+
+ /*
+ * must be at the end,
+ * because of its callchain dynamic entry
+ */
+ struct hist_entry he;
+};
+
+static char const *coalesce_default = "pid,iaddr";
+
+struct perf_c2c {
+ struct perf_tool tool;
+ struct c2c_hists hists;
+ struct mem2node mem2node;
+
+ unsigned long **nodes;
+ int nodes_cnt;
+ int cpus_cnt;
+ int *cpu2node;
+ int node_info;
+
+ bool show_src;
+ bool show_all;
+ bool use_stdio;
+ bool stats_only;
+ bool symbol_full;
+
+ /* HITM shared clines stats */
+ struct c2c_stats hitm_stats;
+ int shared_clines;
+
+ int display;
+
+ const char *coalesce;
+ char *cl_sort;
+ char *cl_resort;
+ char *cl_output;
+};
+
+enum {
+ DISPLAY_LCL,
+ DISPLAY_RMT,
+ DISPLAY_TOT,
+ DISPLAY_MAX,
+};
+
+static const char *display_str[DISPLAY_MAX] = {
+ [DISPLAY_LCL] = "Local",
+ [DISPLAY_RMT] = "Remote",
+ [DISPLAY_TOT] = "Total",
+};
+
+static const struct option c2c_options[] = {
+ OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"),
+ OPT_END()
+};
+
+static struct perf_c2c c2c;
+
+static void *c2c_he_zalloc(size_t size)
+{
+ struct c2c_hist_entry *c2c_he;
+
+ c2c_he = zalloc(size + sizeof(*c2c_he));
+ if (!c2c_he)
+ return NULL;
+
+ c2c_he->cpuset = bitmap_alloc(c2c.cpus_cnt);
+ if (!c2c_he->cpuset)
+ return NULL;
+
+ c2c_he->nodeset = bitmap_alloc(c2c.nodes_cnt);
+ if (!c2c_he->nodeset)
+ return NULL;
+
+ c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats));
+ if (!c2c_he->node_stats)
+ return NULL;
+
+ init_stats(&c2c_he->cstats.lcl_hitm);
+ init_stats(&c2c_he->cstats.rmt_hitm);
+ init_stats(&c2c_he->cstats.load);
+
+ return &c2c_he->he;
+}
+
+static void c2c_he_free(void *he)
+{
+ struct c2c_hist_entry *c2c_he;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ if (c2c_he->hists) {
+ hists__delete_entries(&c2c_he->hists->hists);
+ free(c2c_he->hists);
+ }
+
+ free(c2c_he->cpuset);
+ free(c2c_he->nodeset);
+ free(c2c_he->nodestr);
+ free(c2c_he->node_stats);
+ free(c2c_he);
+}
+
+static struct hist_entry_ops c2c_entry_ops = {
+ .new = c2c_he_zalloc,
+ .free = c2c_he_free,
+};
+
+static int c2c_hists__init(struct c2c_hists *hists,
+ const char *sort,
+ int nr_header_lines);
+
+static struct c2c_hists*
+he__get_c2c_hists(struct hist_entry *he,
+ const char *sort,
+ int nr_header_lines)
+{
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_hists *hists;
+ int ret;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ if (c2c_he->hists)
+ return c2c_he->hists;
+
+ hists = c2c_he->hists = zalloc(sizeof(*hists));
+ if (!hists)
+ return NULL;
+
+ ret = c2c_hists__init(hists, sort, nr_header_lines);
+ if (ret) {
+ free(hists);
+ return NULL;
+ }
+
+ return hists;
+}
+
+static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
+ struct perf_sample *sample)
+{
+ if (WARN_ONCE(sample->cpu == (unsigned int) -1,
+ "WARNING: no sample cpu value"))
+ return;
+
+ set_bit(sample->cpu, c2c_he->cpuset);
+}
+
+static void c2c_he__set_node(struct c2c_hist_entry *c2c_he,
+ struct perf_sample *sample)
+{
+ int node;
+
+ if (!sample->phys_addr) {
+ c2c_he->paddr_zero = true;
+ return;
+ }
+
+ node = mem2node__node(&c2c.mem2node, sample->phys_addr);
+ if (WARN_ONCE(node < 0, "WARNING: failed to find node\n"))
+ return;
+
+ set_bit(node, c2c_he->nodeset);
+
+ if (c2c_he->paddr != sample->phys_addr) {
+ c2c_he->paddr_cnt++;
+ c2c_he->paddr = sample->phys_addr;
+ }
+}
+
+static void compute_stats(struct c2c_hist_entry *c2c_he,
+ struct c2c_stats *stats,
+ u64 weight)
+{
+ struct compute_stats *cstats = &c2c_he->cstats;
+
+ if (stats->rmt_hitm)
+ update_stats(&cstats->rmt_hitm, weight);
+ else if (stats->lcl_hitm)
+ update_stats(&cstats->lcl_hitm, weight);
+ else if (stats->load)
+ update_stats(&cstats->load, weight);
+}
+
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct c2c_hists *c2c_hists = &c2c.hists;
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_stats stats = { .nr_entries = 0, };
+ struct hist_entry *he;
+ struct addr_location al;
+ struct mem_info *mi, *mi_dup;
+ int ret;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_debug("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ ret = sample__resolve_callchain(sample, &callchain_cursor, NULL,
+ evsel, &al, sysctl_perf_event_max_stack);
+ if (ret)
+ goto out;
+
+ mi = sample__resolve_mem(sample, &al);
+ if (mi == NULL)
+ return -ENOMEM;
+
+ /*
+ * The mi object is released in hists__add_entry_ops,
+ * if it gets sorted out into existing data, so we need
+ * to take the copy now.
+ */
+ mi_dup = mem_info__get(mi);
+
+ c2c_decode_stats(&stats, mi);
+
+ he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
+ &al, NULL, NULL, mi,
+ sample, true);
+ if (he == NULL)
+ goto free_mi;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ c2c_add_stats(&c2c_he->stats, &stats);
+ c2c_add_stats(&c2c_hists->stats, &stats);
+
+ c2c_he__set_cpu(c2c_he, sample);
+ c2c_he__set_node(c2c_he, sample);
+
+ hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
+ ret = hist_entry__append_callchain(he, sample);
+
+ if (!ret) {
+ /*
+ * There's already been warning about missing
+ * sample's cpu value. Let's account all to
+ * node 0 in this case, without any further
+ * warning.
+ *
+ * Doing node stats only for single callchain data.
+ */
+ int cpu = sample->cpu == (unsigned int) -1 ? 0 : sample->cpu;
+ int node = c2c.cpu2node[cpu];
+
+ mi = mi_dup;
+
+ c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2);
+ if (!c2c_hists)
+ goto free_mi;
+
+ he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
+ &al, NULL, NULL, mi,
+ sample, true);
+ if (he == NULL)
+ goto free_mi;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ c2c_add_stats(&c2c_he->stats, &stats);
+ c2c_add_stats(&c2c_hists->stats, &stats);
+ c2c_add_stats(&c2c_he->node_stats[node], &stats);
+
+ compute_stats(c2c_he, &stats, sample->weight);
+
+ c2c_he__set_cpu(c2c_he, sample);
+ c2c_he__set_node(c2c_he, sample);
+
+ hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
+ ret = hist_entry__append_callchain(he, sample);
+ }
+
+out:
+ addr_location__put(&al);
+ return ret;
+
+free_mi:
+ mem_info__put(mi_dup);
+ mem_info__put(mi);
+ ret = -ENOMEM;
+ goto out;
+}
+
+static struct perf_c2c c2c = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
+ .lost = perf_event__process_lost,
+ .ordered_events = true,
+ .ordering_requires_timestamps = true,
+ },
+};
+
+static const char * const c2c_usage[] = {
+ "perf c2c {record|report}",
+ NULL
+};
+
+static const char * const __usage_report[] = {
+ "perf c2c report",
+ NULL
+};
+
+static const char * const *report_c2c_usage = __usage_report;
+
+#define C2C_HEADER_MAX 2
+
+struct c2c_header {
+ struct {
+ const char *text;
+ int span;
+ } line[C2C_HEADER_MAX];
+};
+
+struct c2c_dimension {
+ struct c2c_header header;
+ const char *name;
+ int width;
+ struct sort_entry *se;
+
+ int64_t (*cmp)(struct perf_hpp_fmt *fmt,
+ struct hist_entry *, struct hist_entry *);
+ int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he);
+ int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he);
+};
+
+struct c2c_fmt {
+ struct perf_hpp_fmt fmt;
+ struct c2c_dimension *dim;
+};
+
+#define SYMBOL_WIDTH 30
+
+static struct c2c_dimension dim_symbol;
+static struct c2c_dimension dim_srcline;
+
+static int symbol_width(struct hists *hists, struct sort_entry *se)
+{
+ int width = hists__col_len(hists, se->se_width_idx);
+
+ if (!c2c.symbol_full)
+ width = MIN(width, SYMBOL_WIDTH);
+
+ return width;
+}
+
+static int c2c_width(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct hists *hists)
+{
+ struct c2c_fmt *c2c_fmt;
+ struct c2c_dimension *dim;
+
+ c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+ dim = c2c_fmt->dim;
+
+ if (dim == &dim_symbol || dim == &dim_srcline)
+ return symbol_width(hists, dim->se);
+
+ return dim->se ? hists__col_len(hists, dim->se->se_width_idx) :
+ c2c_fmt->dim->width;
+}
+
+static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hists *hists, int line, int *span)
+{
+ struct perf_hpp_list *hpp_list = hists->hpp_list;
+ struct c2c_fmt *c2c_fmt;
+ struct c2c_dimension *dim;
+ const char *text = NULL;
+ int width = c2c_width(fmt, hpp, hists);
+
+ c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+ dim = c2c_fmt->dim;
+
+ if (dim->se) {
+ text = dim->header.line[line].text;
+ /* Use the last line from sort_entry if not defined. */
+ if (!text && (line == hpp_list->nr_header_lines - 1))
+ text = dim->se->se_header;
+ } else {
+ text = dim->header.line[line].text;
+
+ if (*span) {
+ (*span)--;
+ return 0;
+ } else {
+ *span = dim->header.line[line].span;
+ }
+ }
+
+ if (text == NULL)
+ text = "";
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, text);
+}
+
+#define HEX_STR(__s, __v) \
+({ \
+ scnprintf(__s, sizeof(__s), "0x%" PRIx64, __v); \
+ __s; \
+})
+
+static int64_t
+dcacheline_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return sort__dcacheline_cmp(left, right);
+}
+
+static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ uint64_t addr = 0;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[20];
+
+ if (he->mem_info)
+ addr = cl_address(he->mem_info->daddr.addr);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
+}
+
+static int
+dcacheline_node_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ if (WARN_ON_ONCE(!c2c_he->nodestr))
+ return 0;
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, c2c_he->nodestr);
+}
+
+static int
+dcacheline_node_count(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ return scnprintf(hpp->buf, hpp->size, "%*lu", width, c2c_he->paddr_cnt);
+}
+
+static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ uint64_t addr = 0;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[20];
+
+ if (he->mem_info)
+ addr = cl_offset(he->mem_info->daddr.al_addr);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
+}
+
+static int64_t
+offset_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ uint64_t l = 0, r = 0;
+
+ if (left->mem_info)
+ l = cl_offset(left->mem_info->daddr.addr);
+ if (right->mem_info)
+ r = cl_offset(right->mem_info->daddr.addr);
+
+ return (int64_t)(r - l);
+}
+
+static int
+iaddr_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ uint64_t addr = 0;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[20];
+
+ if (he->mem_info)
+ addr = he->mem_info->iaddr.addr;
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr));
+}
+
+static int64_t
+iaddr_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return sort__iaddr_cmp(left, right);
+}
+
+static int
+tot_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ unsigned int tot_hitm;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ tot_hitm = c2c_he->stats.lcl_hitm + c2c_he->stats.rmt_hitm;
+
+ return scnprintf(hpp->buf, hpp->size, "%*u", width, tot_hitm);
+}
+
+static int64_t
+tot_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct c2c_hist_entry *c2c_left;
+ struct c2c_hist_entry *c2c_right;
+ uint64_t tot_hitm_left;
+ uint64_t tot_hitm_right;
+
+ c2c_left = container_of(left, struct c2c_hist_entry, he);
+ c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+ tot_hitm_left = c2c_left->stats.lcl_hitm + c2c_left->stats.rmt_hitm;
+ tot_hitm_right = c2c_right->stats.lcl_hitm + c2c_right->stats.rmt_hitm;
+
+ return tot_hitm_left - tot_hitm_right;
+}
+
+#define STAT_FN_ENTRY(__f) \
+static int \
+__f ## _entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, \
+ struct hist_entry *he) \
+{ \
+ struct c2c_hist_entry *c2c_he; \
+ int width = c2c_width(fmt, hpp, he->hists); \
+ \
+ c2c_he = container_of(he, struct c2c_hist_entry, he); \
+ return scnprintf(hpp->buf, hpp->size, "%*u", width, \
+ c2c_he->stats.__f); \
+}
+
+#define STAT_FN_CMP(__f) \
+static int64_t \
+__f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \
+ struct hist_entry *left, struct hist_entry *right) \
+{ \
+ struct c2c_hist_entry *c2c_left, *c2c_right; \
+ \
+ c2c_left = container_of(left, struct c2c_hist_entry, he); \
+ c2c_right = container_of(right, struct c2c_hist_entry, he); \
+ return (uint64_t) c2c_left->stats.__f - \
+ (uint64_t) c2c_right->stats.__f; \
+}
+
+#define STAT_FN(__f) \
+ STAT_FN_ENTRY(__f) \
+ STAT_FN_CMP(__f)
+
+STAT_FN(rmt_hitm)
+STAT_FN(lcl_hitm)
+STAT_FN(store)
+STAT_FN(st_l1hit)
+STAT_FN(st_l1miss)
+STAT_FN(ld_fbhit)
+STAT_FN(ld_l1hit)
+STAT_FN(ld_l2hit)
+STAT_FN(ld_llchit)
+STAT_FN(rmt_hit)
+
+static uint64_t llc_miss(struct c2c_stats *stats)
+{
+ uint64_t llcmiss;
+
+ llcmiss = stats->lcl_dram +
+ stats->rmt_dram +
+ stats->rmt_hitm +
+ stats->rmt_hit;
+
+ return llcmiss;
+}
+
+static int
+ld_llcmiss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ return scnprintf(hpp->buf, hpp->size, "%*lu", width,
+ llc_miss(&c2c_he->stats));
+}
+
+static int64_t
+ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct c2c_hist_entry *c2c_left;
+ struct c2c_hist_entry *c2c_right;
+
+ c2c_left = container_of(left, struct c2c_hist_entry, he);
+ c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+ return (uint64_t) llc_miss(&c2c_left->stats) -
+ (uint64_t) llc_miss(&c2c_right->stats);
+}
+
+static uint64_t total_records(struct c2c_stats *stats)
+{
+ uint64_t lclmiss, ldcnt, total;
+
+ lclmiss = stats->lcl_dram +
+ stats->rmt_dram +
+ stats->rmt_hitm +
+ stats->rmt_hit;
+
+ ldcnt = lclmiss +
+ stats->ld_fbhit +
+ stats->ld_l1hit +
+ stats->ld_l2hit +
+ stats->ld_llchit +
+ stats->lcl_hitm;
+
+ total = ldcnt +
+ stats->st_l1hit +
+ stats->st_l1miss;
+
+ return total;
+}
+
+static int
+tot_recs_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ uint64_t tot_recs;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ tot_recs = total_records(&c2c_he->stats);
+
+ return scnprintf(hpp->buf, hpp->size, "%*" PRIu64, width, tot_recs);
+}
+
+static int64_t
+tot_recs_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct c2c_hist_entry *c2c_left;
+ struct c2c_hist_entry *c2c_right;
+ uint64_t tot_recs_left;
+ uint64_t tot_recs_right;
+
+ c2c_left = container_of(left, struct c2c_hist_entry, he);
+ c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+ tot_recs_left = total_records(&c2c_left->stats);
+ tot_recs_right = total_records(&c2c_right->stats);
+
+ return tot_recs_left - tot_recs_right;
+}
+
+static uint64_t total_loads(struct c2c_stats *stats)
+{
+ uint64_t lclmiss, ldcnt;
+
+ lclmiss = stats->lcl_dram +
+ stats->rmt_dram +
+ stats->rmt_hitm +
+ stats->rmt_hit;
+
+ ldcnt = lclmiss +
+ stats->ld_fbhit +
+ stats->ld_l1hit +
+ stats->ld_l2hit +
+ stats->ld_llchit +
+ stats->lcl_hitm;
+
+ return ldcnt;
+}
+
+static int
+tot_loads_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ uint64_t tot_recs;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ tot_recs = total_loads(&c2c_he->stats);
+
+ return scnprintf(hpp->buf, hpp->size, "%*" PRIu64, width, tot_recs);
+}
+
+static int64_t
+tot_loads_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct c2c_hist_entry *c2c_left;
+ struct c2c_hist_entry *c2c_right;
+ uint64_t tot_recs_left;
+ uint64_t tot_recs_right;
+
+ c2c_left = container_of(left, struct c2c_hist_entry, he);
+ c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+ tot_recs_left = total_loads(&c2c_left->stats);
+ tot_recs_right = total_loads(&c2c_right->stats);
+
+ return tot_recs_left - tot_recs_right;
+}
+
+typedef double (get_percent_cb)(struct c2c_hist_entry *);
+
+static int
+percent_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he, get_percent_cb get_percent)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ per = get_percent(c2c_he);
+
+#ifdef HAVE_SLANG_SUPPORT
+ if (use_browser)
+ return __hpp__slsmg_color_printf(hpp, "%*.2f%%", width - 1, per);
+#endif
+ return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, per);
+}
+
+static double percent_hitm(struct c2c_hist_entry *c2c_he)
+{
+ struct c2c_hists *hists;
+ struct c2c_stats *stats;
+ struct c2c_stats *total;
+ int tot = 0, st = 0;
+ double p;
+
+ hists = container_of(c2c_he->he.hists, struct c2c_hists, hists);
+ stats = &c2c_he->stats;
+ total = &hists->stats;
+
+ switch (c2c.display) {
+ case DISPLAY_RMT:
+ st = stats->rmt_hitm;
+ tot = total->rmt_hitm;
+ break;
+ case DISPLAY_LCL:
+ st = stats->lcl_hitm;
+ tot = total->lcl_hitm;
+ break;
+ case DISPLAY_TOT:
+ st = stats->tot_hitm;
+ tot = total->tot_hitm;
+ default:
+ break;
+ }
+
+ p = tot ? (double) st / tot : 0;
+
+ return 100 * p;
+}
+
+#define PERC_STR(__s, __v) \
+({ \
+ scnprintf(__s, sizeof(__s), "%.2F%%", __v); \
+ __s; \
+})
+
+static int
+percent_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[10];
+ double per;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ per = percent_hitm(c2c_he);
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_hitm);
+}
+
+static int64_t
+percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct c2c_hist_entry *c2c_left;
+ struct c2c_hist_entry *c2c_right;
+ double per_left;
+ double per_right;
+
+ c2c_left = container_of(left, struct c2c_hist_entry, he);
+ c2c_right = container_of(right, struct c2c_hist_entry, he);
+
+ per_left = percent_hitm(c2c_left);
+ per_right = percent_hitm(c2c_right);
+
+ return per_left - per_right;
+}
+
+static struct c2c_stats *he_stats(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ return &c2c_he->stats;
+}
+
+static struct c2c_stats *total_stats(struct hist_entry *he)
+{
+ struct c2c_hists *hists;
+
+ hists = container_of(he->hists, struct c2c_hists, hists);
+ return &hists->stats;
+}
+
+static double percent(int st, int tot)
+{
+ return tot ? 100. * (double) st / (double) tot : 0;
+}
+
+#define PERCENT(__h, __f) percent(he_stats(__h)->__f, total_stats(__h)->__f)
+
+#define PERCENT_FN(__f) \
+static double percent_ ## __f(struct c2c_hist_entry *c2c_he) \
+{ \
+ struct c2c_hists *hists; \
+ \
+ hists = container_of(c2c_he->he.hists, struct c2c_hists, hists); \
+ return percent(c2c_he->stats.__f, hists->stats.__f); \
+}
+
+PERCENT_FN(rmt_hitm)
+PERCENT_FN(lcl_hitm)
+PERCENT_FN(st_l1hit)
+PERCENT_FN(st_l1miss)
+
+static int
+percent_rmt_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, rmt_hitm);
+ char buf[10];
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_rmt_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_rmt_hitm);
+}
+
+static int64_t
+percent_rmt_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, rmt_hitm);
+ per_right = PERCENT(right, rmt_hitm);
+
+ return per_left - per_right;
+}
+
+static int
+percent_lcl_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, lcl_hitm);
+ char buf[10];
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_lcl_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_lcl_hitm);
+}
+
+static int64_t
+percent_lcl_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, lcl_hitm);
+ per_right = PERCENT(right, lcl_hitm);
+
+ return per_left - per_right;
+}
+
+static int
+percent_stores_l1hit_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, st_l1hit);
+ char buf[10];
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_stores_l1hit_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_st_l1hit);
+}
+
+static int64_t
+percent_stores_l1hit_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, st_l1hit);
+ per_right = PERCENT(right, st_l1hit);
+
+ return per_left - per_right;
+}
+
+static int
+percent_stores_l1miss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ double per = PERCENT(he, st_l1miss);
+ char buf[10];
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per));
+}
+
+static int
+percent_stores_l1miss_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ return percent_color(fmt, hpp, he, percent_st_l1miss);
+}
+
+static int64_t
+percent_stores_l1miss_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ double per_left;
+ double per_right;
+
+ per_left = PERCENT(left, st_l1miss);
+ per_right = PERCENT(right, st_l1miss);
+
+ return per_left - per_right;
+}
+
+STAT_FN(lcl_dram)
+STAT_FN(rmt_dram)
+
+static int
+pid_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+
+ return scnprintf(hpp->buf, hpp->size, "%*d", width, he->thread->pid_);
+}
+
+static int64_t
+pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return left->thread->pid_ - right->thread->pid_;
+}
+
+static int64_t
+empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left __maybe_unused,
+ struct hist_entry *right __maybe_unused)
+{
+ return 0;
+}
+
+static int
+node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ bool first = true;
+ int node;
+ int ret = 0;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ for (node = 0; node < c2c.nodes_cnt; node++) {
+ DECLARE_BITMAP(set, c2c.cpus_cnt);
+
+ bitmap_zero(set, c2c.cpus_cnt);
+ bitmap_and(set, c2c_he->cpuset, c2c.nodes[node], c2c.cpus_cnt);
+
+ if (!bitmap_weight(set, c2c.cpus_cnt)) {
+ if (c2c.node_info == 1) {
+ ret = scnprintf(hpp->buf, hpp->size, "%21s", " ");
+ advance_hpp(hpp, ret);
+ }
+ continue;
+ }
+
+ if (!first) {
+ ret = scnprintf(hpp->buf, hpp->size, " ");
+ advance_hpp(hpp, ret);
+ }
+
+ switch (c2c.node_info) {
+ case 0:
+ ret = scnprintf(hpp->buf, hpp->size, "%2d", node);
+ advance_hpp(hpp, ret);
+ break;
+ case 1:
+ {
+ int num = bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt);
+ struct c2c_stats *stats = &c2c_he->node_stats[node];
+
+ ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num);
+ advance_hpp(hpp, ret);
+
+ #define DISPLAY_HITM(__h) \
+ if (c2c_he->stats.__h> 0) { \
+ ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", \
+ percent(stats->__h, c2c_he->stats.__h));\
+ } else { \
+ ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); \
+ }
+
+ switch (c2c.display) {
+ case DISPLAY_RMT:
+ DISPLAY_HITM(rmt_hitm);
+ break;
+ case DISPLAY_LCL:
+ DISPLAY_HITM(lcl_hitm);
+ break;
+ case DISPLAY_TOT:
+ DISPLAY_HITM(tot_hitm);
+ default:
+ break;
+ }
+
+ #undef DISPLAY_HITM
+
+ advance_hpp(hpp, ret);
+
+ if (c2c_he->stats.store > 0) {
+ ret = scnprintf(hpp->buf, hpp->size, "%5.1f%%}",
+ percent(stats->store, c2c_he->stats.store));
+ } else {
+ ret = scnprintf(hpp->buf, hpp->size, "%6s}", "n/a");
+ }
+
+ advance_hpp(hpp, ret);
+ break;
+ }
+ case 2:
+ ret = scnprintf(hpp->buf, hpp->size, "%2d{", node);
+ advance_hpp(hpp, ret);
+
+ ret = bitmap_scnprintf(set, c2c.cpus_cnt, hpp->buf, hpp->size);
+ advance_hpp(hpp, ret);
+
+ ret = scnprintf(hpp->buf, hpp->size, "}");
+ advance_hpp(hpp, ret);
+ break;
+ default:
+ break;
+ }
+
+ first = false;
+ }
+
+ return 0;
+}
+
+static int
+mean_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he, double mean)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[10];
+
+ scnprintf(buf, 10, "%6.0f", mean);
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+#define MEAN_ENTRY(__func, __val) \
+static int \
+__func(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ struct c2c_hist_entry *c2c_he; \
+ c2c_he = container_of(he, struct c2c_hist_entry, he); \
+ return mean_entry(fmt, hpp, he, avg_stats(&c2c_he->cstats.__val)); \
+}
+
+MEAN_ENTRY(mean_rmt_entry, rmt_hitm);
+MEAN_ENTRY(mean_lcl_entry, lcl_hitm);
+MEAN_ENTRY(mean_load_entry, load);
+
+static int
+cpucnt_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[10];
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ scnprintf(buf, 10, "%d", bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt));
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+static int
+cl_idx_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ int width = c2c_width(fmt, hpp, he->hists);
+ char buf[10];
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ scnprintf(buf, 10, "%u", c2c_he->cacheline_idx);
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, buf);
+}
+
+static int
+cl_idx_empty_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ int width = c2c_width(fmt, hpp, he->hists);
+
+ return scnprintf(hpp->buf, hpp->size, "%*s", width, "");
+}
+
+#define HEADER_LOW(__h) \
+ { \
+ .line[1] = { \
+ .text = __h, \
+ }, \
+ }
+
+#define HEADER_BOTH(__h0, __h1) \
+ { \
+ .line[0] = { \
+ .text = __h0, \
+ }, \
+ .line[1] = { \
+ .text = __h1, \
+ }, \
+ }
+
+#define HEADER_SPAN(__h0, __h1, __s) \
+ { \
+ .line[0] = { \
+ .text = __h0, \
+ .span = __s, \
+ }, \
+ .line[1] = { \
+ .text = __h1, \
+ }, \
+ }
+
+#define HEADER_SPAN_LOW(__h) \
+ { \
+ .line[1] = { \
+ .text = __h, \
+ }, \
+ }
+
+static struct c2c_dimension dim_dcacheline = {
+ .header = HEADER_SPAN("--- Cacheline ----", "Address", 2),
+ .name = "dcacheline",
+ .cmp = dcacheline_cmp,
+ .entry = dcacheline_entry,
+ .width = 18,
+};
+
+static struct c2c_dimension dim_dcacheline_node = {
+ .header = HEADER_LOW("Node"),
+ .name = "dcacheline_node",
+ .cmp = empty_cmp,
+ .entry = dcacheline_node_entry,
+ .width = 4,
+};
+
+static struct c2c_dimension dim_dcacheline_count = {
+ .header = HEADER_LOW("PA cnt"),
+ .name = "dcacheline_count",
+ .cmp = empty_cmp,
+ .entry = dcacheline_node_count,
+ .width = 6,
+};
+
+static struct c2c_header header_offset_tui = HEADER_SPAN("-----", "Off", 2);
+
+static struct c2c_dimension dim_offset = {
+ .header = HEADER_SPAN("--- Data address -", "Offset", 2),
+ .name = "offset",
+ .cmp = offset_cmp,
+ .entry = offset_entry,
+ .width = 18,
+};
+
+static struct c2c_dimension dim_offset_node = {
+ .header = HEADER_LOW("Node"),
+ .name = "offset_node",
+ .cmp = empty_cmp,
+ .entry = dcacheline_node_entry,
+ .width = 4,
+};
+
+static struct c2c_dimension dim_iaddr = {
+ .header = HEADER_LOW("Code address"),
+ .name = "iaddr",
+ .cmp = iaddr_cmp,
+ .entry = iaddr_entry,
+ .width = 18,
+};
+
+static struct c2c_dimension dim_tot_hitm = {
+ .header = HEADER_SPAN("----- LLC Load Hitm -----", "Total", 2),
+ .name = "tot_hitm",
+ .cmp = tot_hitm_cmp,
+ .entry = tot_hitm_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_lcl_hitm = {
+ .header = HEADER_SPAN_LOW("Lcl"),
+ .name = "lcl_hitm",
+ .cmp = lcl_hitm_cmp,
+ .entry = lcl_hitm_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_rmt_hitm = {
+ .header = HEADER_SPAN_LOW("Rmt"),
+ .name = "rmt_hitm",
+ .cmp = rmt_hitm_cmp,
+ .entry = rmt_hitm_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_cl_rmt_hitm = {
+ .header = HEADER_SPAN("----- HITM -----", "Rmt", 1),
+ .name = "cl_rmt_hitm",
+ .cmp = rmt_hitm_cmp,
+ .entry = rmt_hitm_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_cl_lcl_hitm = {
+ .header = HEADER_SPAN_LOW("Lcl"),
+ .name = "cl_lcl_hitm",
+ .cmp = lcl_hitm_cmp,
+ .entry = lcl_hitm_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_stores = {
+ .header = HEADER_SPAN("---- Store Reference ----", "Total", 2),
+ .name = "stores",
+ .cmp = store_cmp,
+ .entry = store_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_stores_l1hit = {
+ .header = HEADER_SPAN_LOW("L1Hit"),
+ .name = "stores_l1hit",
+ .cmp = st_l1hit_cmp,
+ .entry = st_l1hit_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_stores_l1miss = {
+ .header = HEADER_SPAN_LOW("L1Miss"),
+ .name = "stores_l1miss",
+ .cmp = st_l1miss_cmp,
+ .entry = st_l1miss_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_cl_stores_l1hit = {
+ .header = HEADER_SPAN("-- Store Refs --", "L1 Hit", 1),
+ .name = "cl_stores_l1hit",
+ .cmp = st_l1hit_cmp,
+ .entry = st_l1hit_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_cl_stores_l1miss = {
+ .header = HEADER_SPAN_LOW("L1 Miss"),
+ .name = "cl_stores_l1miss",
+ .cmp = st_l1miss_cmp,
+ .entry = st_l1miss_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_ld_fbhit = {
+ .header = HEADER_SPAN("----- Core Load Hit -----", "FB", 2),
+ .name = "ld_fbhit",
+ .cmp = ld_fbhit_cmp,
+ .entry = ld_fbhit_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_ld_l1hit = {
+ .header = HEADER_SPAN_LOW("L1"),
+ .name = "ld_l1hit",
+ .cmp = ld_l1hit_cmp,
+ .entry = ld_l1hit_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_ld_l2hit = {
+ .header = HEADER_SPAN_LOW("L2"),
+ .name = "ld_l2hit",
+ .cmp = ld_l2hit_cmp,
+ .entry = ld_l2hit_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_ld_llchit = {
+ .header = HEADER_SPAN("-- LLC Load Hit --", "Llc", 1),
+ .name = "ld_lclhit",
+ .cmp = ld_llchit_cmp,
+ .entry = ld_llchit_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_ld_rmthit = {
+ .header = HEADER_SPAN_LOW("Rmt"),
+ .name = "ld_rmthit",
+ .cmp = rmt_hit_cmp,
+ .entry = rmt_hit_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_ld_llcmiss = {
+ .header = HEADER_BOTH("LLC", "Ld Miss"),
+ .name = "ld_llcmiss",
+ .cmp = ld_llcmiss_cmp,
+ .entry = ld_llcmiss_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_tot_recs = {
+ .header = HEADER_BOTH("Total", "records"),
+ .name = "tot_recs",
+ .cmp = tot_recs_cmp,
+ .entry = tot_recs_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_tot_loads = {
+ .header = HEADER_BOTH("Total", "Loads"),
+ .name = "tot_loads",
+ .cmp = tot_loads_cmp,
+ .entry = tot_loads_entry,
+ .width = 7,
+};
+
+static struct c2c_header percent_hitm_header[] = {
+ [DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"),
+ [DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"),
+ [DISPLAY_TOT] = HEADER_BOTH("Tot", "Hitm"),
+};
+
+static struct c2c_dimension dim_percent_hitm = {
+ .name = "percent_hitm",
+ .cmp = percent_hitm_cmp,
+ .entry = percent_hitm_entry,
+ .color = percent_hitm_color,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_percent_rmt_hitm = {
+ .header = HEADER_SPAN("----- HITM -----", "Rmt", 1),
+ .name = "percent_rmt_hitm",
+ .cmp = percent_rmt_hitm_cmp,
+ .entry = percent_rmt_hitm_entry,
+ .color = percent_rmt_hitm_color,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_percent_lcl_hitm = {
+ .header = HEADER_SPAN_LOW("Lcl"),
+ .name = "percent_lcl_hitm",
+ .cmp = percent_lcl_hitm_cmp,
+ .entry = percent_lcl_hitm_entry,
+ .color = percent_lcl_hitm_color,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_percent_stores_l1hit = {
+ .header = HEADER_SPAN("-- Store Refs --", "L1 Hit", 1),
+ .name = "percent_stores_l1hit",
+ .cmp = percent_stores_l1hit_cmp,
+ .entry = percent_stores_l1hit_entry,
+ .color = percent_stores_l1hit_color,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_percent_stores_l1miss = {
+ .header = HEADER_SPAN_LOW("L1 Miss"),
+ .name = "percent_stores_l1miss",
+ .cmp = percent_stores_l1miss_cmp,
+ .entry = percent_stores_l1miss_entry,
+ .color = percent_stores_l1miss_color,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_dram_lcl = {
+ .header = HEADER_SPAN("--- Load Dram ----", "Lcl", 1),
+ .name = "dram_lcl",
+ .cmp = lcl_dram_cmp,
+ .entry = lcl_dram_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_dram_rmt = {
+ .header = HEADER_SPAN_LOW("Rmt"),
+ .name = "dram_rmt",
+ .cmp = rmt_dram_cmp,
+ .entry = rmt_dram_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_pid = {
+ .header = HEADER_LOW("Pid"),
+ .name = "pid",
+ .cmp = pid_cmp,
+ .entry = pid_entry,
+ .width = 7,
+};
+
+static struct c2c_dimension dim_tid = {
+ .header = HEADER_LOW("Tid"),
+ .name = "tid",
+ .se = &sort_thread,
+};
+
+static struct c2c_dimension dim_symbol = {
+ .name = "symbol",
+ .se = &sort_sym,
+};
+
+static struct c2c_dimension dim_dso = {
+ .header = HEADER_BOTH("Shared", "Object"),
+ .name = "dso",
+ .se = &sort_dso,
+};
+
+static struct c2c_header header_node[3] = {
+ HEADER_LOW("Node"),
+ HEADER_LOW("Node{cpus %hitms %stores}"),
+ HEADER_LOW("Node{cpu list}"),
+};
+
+static struct c2c_dimension dim_node = {
+ .name = "node",
+ .cmp = empty_cmp,
+ .entry = node_entry,
+ .width = 4,
+};
+
+static struct c2c_dimension dim_mean_rmt = {
+ .header = HEADER_SPAN("---------- cycles ----------", "rmt hitm", 2),
+ .name = "mean_rmt",
+ .cmp = empty_cmp,
+ .entry = mean_rmt_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_mean_lcl = {
+ .header = HEADER_SPAN_LOW("lcl hitm"),
+ .name = "mean_lcl",
+ .cmp = empty_cmp,
+ .entry = mean_lcl_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_mean_load = {
+ .header = HEADER_SPAN_LOW("load"),
+ .name = "mean_load",
+ .cmp = empty_cmp,
+ .entry = mean_load_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_cpucnt = {
+ .header = HEADER_BOTH("cpu", "cnt"),
+ .name = "cpucnt",
+ .cmp = empty_cmp,
+ .entry = cpucnt_entry,
+ .width = 8,
+};
+
+static struct c2c_dimension dim_srcline = {
+ .name = "cl_srcline",
+ .se = &sort_srcline,
+};
+
+static struct c2c_dimension dim_dcacheline_idx = {
+ .header = HEADER_LOW("Index"),
+ .name = "cl_idx",
+ .cmp = empty_cmp,
+ .entry = cl_idx_entry,
+ .width = 5,
+};
+
+static struct c2c_dimension dim_dcacheline_num = {
+ .header = HEADER_LOW("Num"),
+ .name = "cl_num",
+ .cmp = empty_cmp,
+ .entry = cl_idx_entry,
+ .width = 5,
+};
+
+static struct c2c_dimension dim_dcacheline_num_empty = {
+ .header = HEADER_LOW("Num"),
+ .name = "cl_num_empty",
+ .cmp = empty_cmp,
+ .entry = cl_idx_empty_entry,
+ .width = 5,
+};
+
+static struct c2c_dimension *dimensions[] = {
+ &dim_dcacheline,
+ &dim_dcacheline_node,
+ &dim_dcacheline_count,
+ &dim_offset,
+ &dim_offset_node,
+ &dim_iaddr,
+ &dim_tot_hitm,
+ &dim_lcl_hitm,
+ &dim_rmt_hitm,
+ &dim_cl_lcl_hitm,
+ &dim_cl_rmt_hitm,
+ &dim_stores,
+ &dim_stores_l1hit,
+ &dim_stores_l1miss,
+ &dim_cl_stores_l1hit,
+ &dim_cl_stores_l1miss,
+ &dim_ld_fbhit,
+ &dim_ld_l1hit,
+ &dim_ld_l2hit,
+ &dim_ld_llchit,
+ &dim_ld_rmthit,
+ &dim_ld_llcmiss,
+ &dim_tot_recs,
+ &dim_tot_loads,
+ &dim_percent_hitm,
+ &dim_percent_rmt_hitm,
+ &dim_percent_lcl_hitm,
+ &dim_percent_stores_l1hit,
+ &dim_percent_stores_l1miss,
+ &dim_dram_lcl,
+ &dim_dram_rmt,
+ &dim_pid,
+ &dim_tid,
+ &dim_symbol,
+ &dim_dso,
+ &dim_node,
+ &dim_mean_rmt,
+ &dim_mean_lcl,
+ &dim_mean_load,
+ &dim_cpucnt,
+ &dim_srcline,
+ &dim_dcacheline_idx,
+ &dim_dcacheline_num,
+ &dim_dcacheline_num_empty,
+ NULL,
+};
+
+static void fmt_free(struct perf_hpp_fmt *fmt)
+{
+ struct c2c_fmt *c2c_fmt;
+
+ c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+ free(c2c_fmt);
+}
+
+static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+ struct c2c_fmt *c2c_a = container_of(a, struct c2c_fmt, fmt);
+ struct c2c_fmt *c2c_b = container_of(b, struct c2c_fmt, fmt);
+
+ return c2c_a->dim == c2c_b->dim;
+}
+
+static struct c2c_dimension *get_dimension(const char *name)
+{
+ unsigned int i;
+
+ for (i = 0; dimensions[i]; i++) {
+ struct c2c_dimension *dim = dimensions[i];
+
+ if (!strcmp(dim->name, name))
+ return dim;
+ };
+
+ return NULL;
+}
+
+static int c2c_se_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+ struct c2c_dimension *dim = c2c_fmt->dim;
+ size_t len = fmt->user_len;
+
+ if (!len) {
+ len = hists__col_len(he->hists, dim->se->se_width_idx);
+
+ if (dim == &dim_symbol || dim == &dim_srcline)
+ len = symbol_width(he->hists, dim->se);
+ }
+
+ return dim->se->se_snprintf(he, hpp->buf, hpp->size, len);
+}
+
+static int64_t c2c_se_cmp(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+ struct c2c_dimension *dim = c2c_fmt->dim;
+
+ return dim->se->se_cmp(a, b);
+}
+
+static int64_t c2c_se_collapse(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt);
+ struct c2c_dimension *dim = c2c_fmt->dim;
+ int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *);
+
+ collapse_fn = dim->se->se_collapse ?: dim->se->se_cmp;
+ return collapse_fn(a, b);
+}
+
+static struct c2c_fmt *get_format(const char *name)
+{
+ struct c2c_dimension *dim = get_dimension(name);
+ struct c2c_fmt *c2c_fmt;
+ struct perf_hpp_fmt *fmt;
+
+ if (!dim)
+ return NULL;
+
+ c2c_fmt = zalloc(sizeof(*c2c_fmt));
+ if (!c2c_fmt)
+ return NULL;
+
+ c2c_fmt->dim = dim;
+
+ fmt = &c2c_fmt->fmt;
+ INIT_LIST_HEAD(&fmt->list);
+ INIT_LIST_HEAD(&fmt->sort_list);
+
+ fmt->cmp = dim->se ? c2c_se_cmp : dim->cmp;
+ fmt->sort = dim->se ? c2c_se_cmp : dim->cmp;
+ fmt->color = dim->se ? NULL : dim->color;
+ fmt->entry = dim->se ? c2c_se_entry : dim->entry;
+ fmt->header = c2c_header;
+ fmt->width = c2c_width;
+ fmt->collapse = dim->se ? c2c_se_collapse : dim->cmp;
+ fmt->equal = fmt_equal;
+ fmt->free = fmt_free;
+
+ return c2c_fmt;
+}
+
+static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name)
+{
+ struct c2c_fmt *c2c_fmt = get_format(name);
+
+ if (!c2c_fmt) {
+ reset_dimensions();
+ return output_field_add(hpp_list, name);
+ }
+
+ perf_hpp_list__column_register(hpp_list, &c2c_fmt->fmt);
+ return 0;
+}
+
+static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name)
+{
+ struct c2c_fmt *c2c_fmt = get_format(name);
+ struct c2c_dimension *dim;
+
+ if (!c2c_fmt) {
+ reset_dimensions();
+ return sort_dimension__add(hpp_list, name, NULL, 0);
+ }
+
+ dim = c2c_fmt->dim;
+ if (dim == &dim_dso)
+ hpp_list->dso = 1;
+
+ perf_hpp_list__register_sort_field(hpp_list, &c2c_fmt->fmt);
+ return 0;
+}
+
+#define PARSE_LIST(_list, _fn) \
+ do { \
+ char *tmp, *tok; \
+ ret = 0; \
+ \
+ if (!_list) \
+ break; \
+ \
+ for (tok = strtok_r((char *)_list, ", ", &tmp); \
+ tok; tok = strtok_r(NULL, ", ", &tmp)) { \
+ ret = _fn(hpp_list, tok); \
+ if (ret == -EINVAL) { \
+ pr_err("Invalid --fields key: `%s'", tok); \
+ break; \
+ } else if (ret == -ESRCH) { \
+ pr_err("Unknown --fields key: `%s'", tok); \
+ break; \
+ } \
+ } \
+ } while (0)
+
+static int hpp_list__parse(struct perf_hpp_list *hpp_list,
+ const char *output_,
+ const char *sort_)
+{
+ char *output = output_ ? strdup(output_) : NULL;
+ char *sort = sort_ ? strdup(sort_) : NULL;
+ int ret;
+
+ PARSE_LIST(output, c2c_hists__init_output);
+ PARSE_LIST(sort, c2c_hists__init_sort);
+
+ /* copy sort keys to output fields */
+ perf_hpp__setup_output_field(hpp_list);
+
+ /*
+ * We dont need other sorting keys other than those
+ * we already specified. It also really slows down
+ * the processing a lot with big number of output
+ * fields, so switching this off for c2c.
+ */
+
+#if 0
+ /* and then copy output fields to sort keys */
+ perf_hpp__append_sort_keys(&hists->list);
+#endif
+
+ free(output);
+ free(sort);
+ return ret;
+}
+
+static int c2c_hists__init(struct c2c_hists *hists,
+ const char *sort,
+ int nr_header_lines)
+{
+ __hists__init(&hists->hists, &hists->list);
+
+ /*
+ * Initialize only with sort fields, we need to resort
+ * later anyway, and that's where we add output fields
+ * as well.
+ */
+ perf_hpp_list__init(&hists->list);
+
+ /* Overload number of header lines.*/
+ hists->list.nr_header_lines = nr_header_lines;
+
+ return hpp_list__parse(&hists->list, NULL, sort);
+}
+
+static int c2c_hists__reinit(struct c2c_hists *c2c_hists,
+ const char *output,
+ const char *sort)
+{
+ perf_hpp__reset_output_field(&c2c_hists->list);
+ return hpp_list__parse(&c2c_hists->list, output, sort);
+}
+
+#define DISPLAY_LINE_LIMIT 0.0005
+
+static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
+{
+ struct c2c_hist_entry *c2c_he;
+ double ld_dist;
+
+ if (c2c.show_all)
+ return true;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+#define FILTER_HITM(__h) \
+ if (stats->__h) { \
+ ld_dist = ((double)c2c_he->stats.__h / stats->__h); \
+ if (ld_dist < DISPLAY_LINE_LIMIT) \
+ he->filtered = HIST_FILTER__C2C; \
+ } else { \
+ he->filtered = HIST_FILTER__C2C; \
+ }
+
+ switch (c2c.display) {
+ case DISPLAY_LCL:
+ FILTER_HITM(lcl_hitm);
+ break;
+ case DISPLAY_RMT:
+ FILTER_HITM(rmt_hitm);
+ break;
+ case DISPLAY_TOT:
+ FILTER_HITM(tot_hitm);
+ default:
+ break;
+ };
+
+#undef FILTER_HITM
+
+ return he->filtered == 0;
+}
+
+static inline int valid_hitm_or_store(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ bool has_hitm;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ has_hitm = c2c.display == DISPLAY_TOT ? c2c_he->stats.tot_hitm :
+ c2c.display == DISPLAY_LCL ? c2c_he->stats.lcl_hitm :
+ c2c_he->stats.rmt_hitm;
+ return has_hitm || c2c_he->stats.store;
+}
+
+static void set_node_width(struct c2c_hist_entry *c2c_he, int len)
+{
+ struct c2c_dimension *dim;
+
+ dim = &c2c.hists == c2c_he->hists ?
+ &dim_dcacheline_node : &dim_offset_node;
+
+ if (len > dim->width)
+ dim->width = len;
+}
+
+static int set_nodestr(struct c2c_hist_entry *c2c_he)
+{
+ char buf[30];
+ int len;
+
+ if (c2c_he->nodestr)
+ return 0;
+
+ if (bitmap_weight(c2c_he->nodeset, c2c.nodes_cnt)) {
+ len = bitmap_scnprintf(c2c_he->nodeset, c2c.nodes_cnt,
+ buf, sizeof(buf));
+ } else {
+ len = scnprintf(buf, sizeof(buf), "N/A");
+ }
+
+ set_node_width(c2c_he, len);
+ c2c_he->nodestr = strdup(buf);
+ return c2c_he->nodestr ? 0 : -ENOMEM;
+}
+
+static void calc_width(struct c2c_hist_entry *c2c_he)
+{
+ struct c2c_hists *c2c_hists;
+
+ c2c_hists = container_of(c2c_he->he.hists, struct c2c_hists, hists);
+ hists__calc_col_len(&c2c_hists->hists, &c2c_he->he);
+ set_nodestr(c2c_he);
+}
+
+static int filter_cb(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ if (c2c.show_src && !he->srcline)
+ he->srcline = hist_entry__srcline(he);
+
+ calc_width(c2c_he);
+
+ if (!valid_hitm_or_store(he))
+ he->filtered = HIST_FILTER__C2C;
+
+ return 0;
+}
+
+static int resort_cl_cb(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_hists *c2c_hists;
+ bool display = he__display(he, &c2c.hitm_stats);
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ c2c_hists = c2c_he->hists;
+
+ if (display && c2c_hists) {
+ static unsigned int idx;
+
+ c2c_he->cacheline_idx = idx++;
+ calc_width(c2c_he);
+
+ c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort);
+
+ hists__collapse_resort(&c2c_hists->hists, NULL);
+ hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb);
+ }
+
+ return 0;
+}
+
+static void setup_nodes_header(void)
+{
+ dim_node.header = header_node[c2c.node_info];
+}
+
+static int setup_nodes(struct perf_session *session)
+{
+ struct numa_node *n;
+ unsigned long **nodes;
+ int node, cpu;
+ int *cpu2node;
+
+ if (c2c.node_info > 2)
+ c2c.node_info = 2;
+
+ c2c.nodes_cnt = session->header.env.nr_numa_nodes;
+ c2c.cpus_cnt = session->header.env.nr_cpus_online;
+
+ n = session->header.env.numa_nodes;
+ if (!n)
+ return -EINVAL;
+
+ nodes = zalloc(sizeof(unsigned long *) * c2c.nodes_cnt);
+ if (!nodes)
+ return -ENOMEM;
+
+ c2c.nodes = nodes;
+
+ cpu2node = zalloc(sizeof(int) * c2c.cpus_cnt);
+ if (!cpu2node)
+ return -ENOMEM;
+
+ for (cpu = 0; cpu < c2c.cpus_cnt; cpu++)
+ cpu2node[cpu] = -1;
+
+ c2c.cpu2node = cpu2node;
+
+ for (node = 0; node < c2c.nodes_cnt; node++) {
+ struct cpu_map *map = n[node].map;
+ unsigned long *set;
+
+ set = bitmap_alloc(c2c.cpus_cnt);
+ if (!set)
+ return -ENOMEM;
+
+ nodes[node] = set;
+
+ /* empty node, skip */
+ if (cpu_map__empty(map))
+ continue;
+
+ for (cpu = 0; cpu < map->nr; cpu++) {
+ set_bit(map->map[cpu], set);
+
+ if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug"))
+ return -EINVAL;
+
+ cpu2node[map->map[cpu]] = node;
+ }
+ }
+
+ setup_nodes_header();
+ return 0;
+}
+
+#define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm)
+
+static int resort_hitm_cb(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+
+ if (HAS_HITMS(c2c_he)) {
+ c2c.shared_clines++;
+ c2c_add_stats(&c2c.hitm_stats, &c2c_he->stats);
+ }
+
+ return 0;
+}
+
+static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb)
+{
+ struct rb_node *next = rb_first(&hists->entries);
+ int ret = 0;
+
+ while (next) {
+ struct hist_entry *he;
+
+ he = rb_entry(next, struct hist_entry, rb_node);
+ ret = cb(he);
+ if (ret)
+ break;
+ next = rb_next(&he->rb_node);
+ }
+
+ return ret;
+}
+
+static void print_c2c__display_stats(FILE *out)
+{
+ int llc_misses;
+ struct c2c_stats *stats = &c2c.hists.stats;
+
+ llc_misses = stats->lcl_dram +
+ stats->rmt_dram +
+ stats->rmt_hit +
+ stats->rmt_hitm;
+
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Trace Event Information \n");
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Total records : %10d\n", stats->nr_entries);
+ fprintf(out, " Locked Load/Store Operations : %10d\n", stats->locks);
+ fprintf(out, " Load Operations : %10d\n", stats->load);
+ fprintf(out, " Loads - uncacheable : %10d\n", stats->ld_uncache);
+ fprintf(out, " Loads - IO : %10d\n", stats->ld_io);
+ fprintf(out, " Loads - Miss : %10d\n", stats->ld_miss);
+ fprintf(out, " Loads - no mapping : %10d\n", stats->ld_noadrs);
+ fprintf(out, " Load Fill Buffer Hit : %10d\n", stats->ld_fbhit);
+ fprintf(out, " Load L1D hit : %10d\n", stats->ld_l1hit);
+ fprintf(out, " Load L2D hit : %10d\n", stats->ld_l2hit);
+ fprintf(out, " Load LLC hit : %10d\n", stats->ld_llchit + stats->lcl_hitm);
+ fprintf(out, " Load Local HITM : %10d\n", stats->lcl_hitm);
+ fprintf(out, " Load Remote HITM : %10d\n", stats->rmt_hitm);
+ fprintf(out, " Load Remote HIT : %10d\n", stats->rmt_hit);
+ fprintf(out, " Load Local DRAM : %10d\n", stats->lcl_dram);
+ fprintf(out, " Load Remote DRAM : %10d\n", stats->rmt_dram);
+ fprintf(out, " Load MESI State Exclusive : %10d\n", stats->ld_excl);
+ fprintf(out, " Load MESI State Shared : %10d\n", stats->ld_shared);
+ fprintf(out, " Load LLC Misses : %10d\n", llc_misses);
+ fprintf(out, " LLC Misses to Local DRAM : %10.1f%%\n", ((double)stats->lcl_dram/(double)llc_misses) * 100.);
+ fprintf(out, " LLC Misses to Remote DRAM : %10.1f%%\n", ((double)stats->rmt_dram/(double)llc_misses) * 100.);
+ fprintf(out, " LLC Misses to Remote cache (HIT) : %10.1f%%\n", ((double)stats->rmt_hit /(double)llc_misses) * 100.);
+ fprintf(out, " LLC Misses to Remote cache (HITM) : %10.1f%%\n", ((double)stats->rmt_hitm/(double)llc_misses) * 100.);
+ fprintf(out, " Store Operations : %10d\n", stats->store);
+ fprintf(out, " Store - uncacheable : %10d\n", stats->st_uncache);
+ fprintf(out, " Store - no mapping : %10d\n", stats->st_noadrs);
+ fprintf(out, " Store L1D Hit : %10d\n", stats->st_l1hit);
+ fprintf(out, " Store L1D Miss : %10d\n", stats->st_l1miss);
+ fprintf(out, " No Page Map Rejects : %10d\n", stats->nomap);
+ fprintf(out, " Unable to parse data source : %10d\n", stats->noparse);
+}
+
+static void print_shared_cacheline_info(FILE *out)
+{
+ struct c2c_stats *stats = &c2c.hitm_stats;
+ int hitm_cnt = stats->lcl_hitm + stats->rmt_hitm;
+
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Global Shared Cache Line Event Information \n");
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Total Shared Cache Lines : %10d\n", c2c.shared_clines);
+ fprintf(out, " Load HITs on shared lines : %10d\n", stats->load);
+ fprintf(out, " Fill Buffer Hits on shared lines : %10d\n", stats->ld_fbhit);
+ fprintf(out, " L1D hits on shared lines : %10d\n", stats->ld_l1hit);
+ fprintf(out, " L2D hits on shared lines : %10d\n", stats->ld_l2hit);
+ fprintf(out, " LLC hits on shared lines : %10d\n", stats->ld_llchit + stats->lcl_hitm);
+ fprintf(out, " Locked Access on shared lines : %10d\n", stats->locks);
+ fprintf(out, " Store HITs on shared lines : %10d\n", stats->store);
+ fprintf(out, " Store L1D hits on shared lines : %10d\n", stats->st_l1hit);
+ fprintf(out, " Total Merged records : %10d\n", hitm_cnt + stats->store);
+}
+
+static void print_cacheline(struct c2c_hists *c2c_hists,
+ struct hist_entry *he_cl,
+ struct perf_hpp_list *hpp_list,
+ FILE *out)
+{
+ char bf[1000];
+ struct perf_hpp hpp = {
+ .buf = bf,
+ .size = 1000,
+ };
+ static bool once;
+
+ if (!once) {
+ hists__fprintf_headers(&c2c_hists->hists, out);
+ once = true;
+ } else {
+ fprintf(out, "\n");
+ }
+
+ fprintf(out, " -------------------------------------------------------------\n");
+ __hist_entry__snprintf(he_cl, &hpp, hpp_list);
+ fprintf(out, "%s\n", bf);
+ fprintf(out, " -------------------------------------------------------------\n");
+
+ hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, false);
+}
+
+static void print_pareto(FILE *out)
+{
+ struct perf_hpp_list hpp_list;
+ struct rb_node *nd;
+ int ret;
+
+ perf_hpp_list__init(&hpp_list);
+ ret = hpp_list__parse(&hpp_list,
+ "cl_num,"
+ "cl_rmt_hitm,"
+ "cl_lcl_hitm,"
+ "cl_stores_l1hit,"
+ "cl_stores_l1miss,"
+ "dcacheline",
+ NULL);
+
+ if (WARN_ONCE(ret, "failed to setup sort entries\n"))
+ return;
+
+ nd = rb_first(&c2c.hists.hists.entries);
+
+ for (; nd; nd = rb_next(nd)) {
+ struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
+ struct c2c_hist_entry *c2c_he;
+
+ if (he->filtered)
+ continue;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ print_cacheline(c2c_he->hists, he, &hpp_list, out);
+ }
+}
+
+static void print_c2c_info(FILE *out, struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_evsel *evsel;
+ bool first = true;
+
+ fprintf(out, "=================================================\n");
+ fprintf(out, " c2c details \n");
+ fprintf(out, "=================================================\n");
+
+ evlist__for_each_entry(evlist, evsel) {
+ fprintf(out, "%-36s: %s\n", first ? " Events" : "",
+ perf_evsel__name(evsel));
+ first = false;
+ }
+ fprintf(out, " Cachelines sort on : %s HITMs\n",
+ display_str[c2c.display]);
+ fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort);
+}
+
+static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session)
+{
+ setup_pager();
+
+ print_c2c__display_stats(out);
+ fprintf(out, "\n");
+ print_shared_cacheline_info(out);
+ fprintf(out, "\n");
+ print_c2c_info(out, session);
+
+ if (c2c.stats_only)
+ return;
+
+ fprintf(out, "\n");
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Shared Data Cache Line Table \n");
+ fprintf(out, "=================================================\n");
+ fprintf(out, "#\n");
+
+ hists__fprintf(&c2c.hists.hists, true, 0, 0, 0, stdout, true);
+
+ fprintf(out, "\n");
+ fprintf(out, "=================================================\n");
+ fprintf(out, " Shared Cache Line Distribution Pareto \n");
+ fprintf(out, "=================================================\n");
+ fprintf(out, "#\n");
+
+ print_pareto(out);
+}
+
+#ifdef HAVE_SLANG_SUPPORT
+static void c2c_browser__update_nr_entries(struct hist_browser *hb)
+{
+ u64 nr_entries = 0;
+ struct rb_node *nd = rb_first(&hb->hists->entries);
+
+ while (nd) {
+ struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node);
+
+ if (!he->filtered)
+ nr_entries++;
+
+ nd = rb_next(nd);
+ }
+
+ hb->nr_non_filtered_entries = nr_entries;
+}
+
+struct c2c_cacheline_browser {
+ struct hist_browser hb;
+ struct hist_entry *he;
+};
+
+static int
+perf_c2c_cacheline_browser__title(struct hist_browser *browser,
+ char *bf, size_t size)
+{
+ struct c2c_cacheline_browser *cl_browser;
+ struct hist_entry *he;
+ uint64_t addr = 0;
+
+ cl_browser = container_of(browser, struct c2c_cacheline_browser, hb);
+ he = cl_browser->he;
+
+ if (he->mem_info)
+ addr = cl_address(he->mem_info->daddr.addr);
+
+ scnprintf(bf, size, "Cacheline 0x%lx", addr);
+ return 0;
+}
+
+static struct c2c_cacheline_browser*
+c2c_cacheline_browser__new(struct hists *hists, struct hist_entry *he)
+{
+ struct c2c_cacheline_browser *browser;
+
+ browser = zalloc(sizeof(*browser));
+ if (browser) {
+ hist_browser__init(&browser->hb, hists);
+ browser->hb.c2c_filter = true;
+ browser->hb.title = perf_c2c_cacheline_browser__title;
+ browser->he = he;
+ }
+
+ return browser;
+}
+
+static int perf_c2c__browse_cacheline(struct hist_entry *he)
+{
+ struct c2c_hist_entry *c2c_he;
+ struct c2c_hists *c2c_hists;
+ struct c2c_cacheline_browser *cl_browser;
+ struct hist_browser *browser;
+ int key = -1;
+ const char help[] =
+ " ENTER Toggle callchains (if present) \n"
+ " n Toggle Node details info \n"
+ " s Toggle full length of symbol and source line columns \n"
+ " q Return back to cacheline list \n";
+
+ if (!he)
+ return 0;
+
+ /* Display compact version first. */
+ c2c.symbol_full = false;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ c2c_hists = c2c_he->hists;
+
+ cl_browser = c2c_cacheline_browser__new(&c2c_hists->hists, he);
+ if (cl_browser == NULL)
+ return -1;
+
+ browser = &cl_browser->hb;
+
+ /* reset abort key so that it can get Ctrl-C as a key */
+ SLang_reset_tty();
+ SLang_init_tty(0, 0, 0);
+
+ c2c_browser__update_nr_entries(browser);
+
+ while (1) {
+ key = hist_browser__run(browser, "? - help", true);
+
+ switch (key) {
+ case 's':
+ c2c.symbol_full = !c2c.symbol_full;
+ break;
+ case 'n':
+ c2c.node_info = (c2c.node_info + 1) % 3;
+ setup_nodes_header();
+ break;
+ case 'q':
+ goto out;
+ case '?':
+ ui_browser__help_window(&browser->b, help);
+ break;
+ default:
+ break;
+ }
+ }
+
+out:
+ free(cl_browser);
+ return 0;
+}
+
+static int perf_c2c_browser__title(struct hist_browser *browser,
+ char *bf, size_t size)
+{
+ scnprintf(bf, size,
+ "Shared Data Cache Line Table "
+ "(%lu entries, sorted on %s HITMs)",
+ browser->nr_non_filtered_entries,
+ display_str[c2c.display]);
+ return 0;
+}
+
+static struct hist_browser*
+perf_c2c_browser__new(struct hists *hists)
+{
+ struct hist_browser *browser = hist_browser__new(hists);
+
+ if (browser) {
+ browser->title = perf_c2c_browser__title;
+ browser->c2c_filter = true;
+ }
+
+ return browser;
+}
+
+static int perf_c2c__hists_browse(struct hists *hists)
+{
+ struct hist_browser *browser;
+ int key = -1;
+ const char help[] =
+ " d Display cacheline details \n"
+ " ENTER Toggle callchains (if present) \n"
+ " q Quit \n";
+
+ browser = perf_c2c_browser__new(hists);
+ if (browser == NULL)
+ return -1;
+
+ /* reset abort key so that it can get Ctrl-C as a key */
+ SLang_reset_tty();
+ SLang_init_tty(0, 0, 0);
+
+ c2c_browser__update_nr_entries(browser);
+
+ while (1) {
+ key = hist_browser__run(browser, "? - help", true);
+
+ switch (key) {
+ case 'q':
+ goto out;
+ case 'd':
+ perf_c2c__browse_cacheline(browser->he_selection);
+ break;
+ case '?':
+ ui_browser__help_window(&browser->b, help);
+ break;
+ default:
+ break;
+ }
+ }
+
+out:
+ hist_browser__delete(browser);
+ return 0;
+}
+
+static void perf_c2c_display(struct perf_session *session)
+{
+ if (use_browser == 0)
+ perf_c2c__hists_fprintf(stdout, session);
+ else
+ perf_c2c__hists_browse(&c2c.hists.hists);
+}
+#else
+static void perf_c2c_display(struct perf_session *session)
+{
+ use_browser = 0;
+ perf_c2c__hists_fprintf(stdout, session);
+}
+#endif /* HAVE_SLANG_SUPPORT */
+
+static char *fill_line(const char *orig, int len)
+{
+ int i, j, olen = strlen(orig);
+ char *buf;
+
+ buf = zalloc(len + 1);
+ if (!buf)
+ return NULL;
+
+ j = len / 2 - olen / 2;
+
+ for (i = 0; i < j - 1; i++)
+ buf[i] = '-';
+
+ buf[i++] = ' ';
+
+ strcpy(buf + i, orig);
+
+ i += olen;
+
+ buf[i++] = ' ';
+
+ for (; i < len; i++)
+ buf[i] = '-';
+
+ return buf;
+}
+
+static int ui_quirks(void)
+{
+ const char *nodestr = "Data address";
+ char *buf;
+
+ if (!c2c.use_stdio) {
+ dim_offset.width = 5;
+ dim_offset.header = header_offset_tui;
+ nodestr = "CL";
+ }
+
+ dim_percent_hitm.header = percent_hitm_header[c2c.display];
+
+ /* Fix the zero line for dcacheline column. */
+ buf = fill_line("Cacheline", dim_dcacheline.width +
+ dim_dcacheline_node.width +
+ dim_dcacheline_count.width + 4);
+ if (!buf)
+ return -ENOMEM;
+
+ dim_dcacheline.header.line[0].text = buf;
+
+ /* Fix the zero line for offset column. */
+ buf = fill_line(nodestr, dim_offset.width +
+ dim_offset_node.width +
+ dim_dcacheline_count.width + 4);
+ if (!buf)
+ return -ENOMEM;
+
+ dim_offset.header.line[0].text = buf;
+
+ return 0;
+}
+
+#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
+
+const char callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
+ CALLCHAIN_REPORT_HELP
+ "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT;
+
+static int
+parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+{
+ struct callchain_param *callchain = opt->value;
+
+ callchain->enabled = !unset;
+ /*
+ * --no-call-graph
+ */
+ if (unset) {
+ symbol_conf.use_callchain = false;
+ callchain->mode = CHAIN_NONE;
+ return 0;
+ }
+
+ return parse_callchain_report_opt(arg);
+}
+
+static int setup_callchain(struct perf_evlist *evlist)
+{
+ u64 sample_type = perf_evlist__combined_sample_type(evlist);
+ enum perf_call_graph_mode mode = CALLCHAIN_NONE;
+
+ if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+ (sample_type & PERF_SAMPLE_STACK_USER)) {
+ mode = CALLCHAIN_DWARF;
+ dwarf_callchain_users = true;
+ } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ mode = CALLCHAIN_LBR;
+ else if (sample_type & PERF_SAMPLE_CALLCHAIN)
+ mode = CALLCHAIN_FP;
+
+ if (!callchain_param.enabled &&
+ callchain_param.mode != CHAIN_NONE &&
+ mode != CALLCHAIN_NONE) {
+ symbol_conf.use_callchain = true;
+ if (callchain_register_param(&callchain_param) < 0) {
+ ui__error("Can't register callchain params.\n");
+ return -EINVAL;
+ }
+ }
+
+ callchain_param.record_mode = mode;
+ callchain_param.min_percent = 0;
+ return 0;
+}
+
+static int setup_display(const char *str)
+{
+ const char *display = str ?: "tot";
+
+ if (!strcmp(display, "tot"))
+ c2c.display = DISPLAY_TOT;
+ else if (!strcmp(display, "rmt"))
+ c2c.display = DISPLAY_RMT;
+ else if (!strcmp(display, "lcl"))
+ c2c.display = DISPLAY_LCL;
+ else {
+ pr_err("failed: unknown display type: %s\n", str);
+ return -1;
+ }
+
+ return 0;
+}
+
+#define for_each_token(__tok, __buf, __sep, __tmp) \
+ for (__tok = strtok_r(__buf, __sep, &__tmp); __tok; \
+ __tok = strtok_r(NULL, __sep, &__tmp))
+
+static int build_cl_output(char *cl_sort, bool no_source)
+{
+ char *tok, *tmp, *buf = strdup(cl_sort);
+ bool add_pid = false;
+ bool add_tid = false;
+ bool add_iaddr = false;
+ bool add_sym = false;
+ bool add_dso = false;
+ bool add_src = false;
+ int ret = 0;
+
+ if (!buf)
+ return -ENOMEM;
+
+ for_each_token(tok, buf, ",", tmp) {
+ if (!strcmp(tok, "tid")) {
+ add_tid = true;
+ } else if (!strcmp(tok, "pid")) {
+ add_pid = true;
+ } else if (!strcmp(tok, "iaddr")) {
+ add_iaddr = true;
+ add_sym = true;
+ add_dso = true;
+ add_src = no_source ? false : true;
+ } else if (!strcmp(tok, "dso")) {
+ add_dso = true;
+ } else if (strcmp(tok, "offset")) {
+ pr_err("unrecognized sort token: %s\n", tok);
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+
+ if (asprintf(&c2c.cl_output,
+ "%s%s%s%s%s%s%s%s%s%s",
+ c2c.use_stdio ? "cl_num_empty," : "",
+ "percent_rmt_hitm,"
+ "percent_lcl_hitm,"
+ "percent_stores_l1hit,"
+ "percent_stores_l1miss,"
+ "offset,offset_node,dcacheline_count,",
+ add_pid ? "pid," : "",
+ add_tid ? "tid," : "",
+ add_iaddr ? "iaddr," : "",
+ "mean_rmt,"
+ "mean_lcl,"
+ "mean_load,"
+ "tot_recs,"
+ "cpucnt,",
+ add_sym ? "symbol," : "",
+ add_dso ? "dso," : "",
+ add_src ? "cl_srcline," : "",
+ "node") < 0) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ c2c.show_src = add_src;
+err:
+ free(buf);
+ return ret;
+}
+
+static int setup_coalesce(const char *coalesce, bool no_source)
+{
+ const char *c = coalesce ?: coalesce_default;
+
+ if (asprintf(&c2c.cl_sort, "offset,%s", c) < 0)
+ return -ENOMEM;
+
+ if (build_cl_output(c2c.cl_sort, no_source))
+ return -1;
+
+ if (asprintf(&c2c.cl_resort, "offset,%s",
+ c2c.display == DISPLAY_TOT ?
+ "tot_hitm" :
+ c2c.display == DISPLAY_RMT ?
+ "rmt_hitm,lcl_hitm" :
+ "lcl_hitm,rmt_hitm") < 0)
+ return -ENOMEM;
+
+ pr_debug("coalesce sort fields: %s\n", c2c.cl_sort);
+ pr_debug("coalesce resort fields: %s\n", c2c.cl_resort);
+ pr_debug("coalesce output fields: %s\n", c2c.cl_output);
+ return 0;
+}
+
+static int perf_c2c__report(int argc, const char **argv)
+{
+ struct perf_session *session;
+ struct ui_progress prog;
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_READ,
+ };
+ char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
+ const char *display = NULL;
+ const char *coalesce = NULL;
+ bool no_source = false;
+ const struct option options[] = {
+ OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
+ OPT_STRING('i', "input", &input_name, "file",
+ "the input file to process"),
+ OPT_INCR('N', "node-info", &c2c.node_info,
+ "show extra node info in report (repeat for more info)"),
+ OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"),
+ OPT_BOOLEAN(0, "stats", &c2c.stats_only,
+ "Display only statistic tables (implies --stdio)"),
+ OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full,
+ "Display full length of symbols"),
+ OPT_BOOLEAN(0, "no-source", &no_source,
+ "Do not display Source Line column"),
+ OPT_BOOLEAN(0, "show-all", &c2c.show_all,
+ "Show all captured HITM lines."),
+ OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
+ "print_type,threshold[,print_limit],order,sort_key[,branch],value",
+ callchain_help, &parse_callchain_opt,
+ callchain_default_opt),
+ OPT_STRING('d', "display", &display, "Switch HITM output type", "lcl,rmt"),
+ OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
+ "coalesce fields: pid,tid,iaddr,dso"),
+ OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
+ OPT_PARENT(c2c_options),
+ OPT_END()
+ };
+ int err = 0;
+
+ argc = parse_options(argc, argv, options, report_c2c_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (argc)
+ usage_with_options(report_c2c_usage, options);
+
+#ifndef HAVE_SLANG_SUPPORT
+ c2c.use_stdio = true;
+#endif
+
+ if (c2c.stats_only)
+ c2c.use_stdio = true;
+
+ if (!input_name || !strlen(input_name))
+ input_name = "perf.data";
+
+ data.file.path = input_name;
+ data.force = symbol_conf.force;
+
+ err = setup_display(display);
+ if (err)
+ goto out;
+
+ err = setup_coalesce(coalesce, no_source);
+ if (err) {
+ pr_debug("Failed to initialize hists\n");
+ goto out;
+ }
+
+ err = c2c_hists__init(&c2c.hists, "dcacheline", 2);
+ if (err) {
+ pr_debug("Failed to initialize hists\n");
+ goto out;
+ }
+
+ session = perf_session__new(&data, 0, &c2c.tool);
+ if (session == NULL) {
+ pr_debug("No memory for session\n");
+ goto out;
+ }
+
+ err = setup_nodes(session);
+ if (err) {
+ pr_err("Failed setup nodes\n");
+ goto out;
+ }
+
+ err = mem2node__init(&c2c.mem2node, &session->header.env);
+ if (err)
+ goto out_session;
+
+ err = setup_callchain(session->evlist);
+ if (err)
+ goto out_mem2node;
+
+ if (symbol__init(&session->header.env) < 0)
+ goto out_mem2node;
+
+ /* No pipe support at the moment. */
+ if (perf_data__is_pipe(session->data)) {
+ pr_debug("No pipe support at the moment.\n");
+ goto out_mem2node;
+ }
+
+ if (c2c.use_stdio)
+ use_browser = 0;
+ else
+ use_browser = 1;
+
+ setup_browser(false);
+
+ err = perf_session__process_events(session);
+ if (err) {
+ pr_err("failed to process sample\n");
+ goto out_mem2node;
+ }
+
+ c2c_hists__reinit(&c2c.hists,
+ "cl_idx,"
+ "dcacheline,"
+ "dcacheline_node,"
+ "dcacheline_count,"
+ "tot_recs,"
+ "percent_hitm,"
+ "tot_hitm,lcl_hitm,rmt_hitm,"
+ "stores,stores_l1hit,stores_l1miss,"
+ "dram_lcl,dram_rmt,"
+ "ld_llcmiss,"
+ "tot_loads,"
+ "ld_fbhit,ld_l1hit,ld_l2hit,"
+ "ld_lclhit,ld_rmthit",
+ c2c.display == DISPLAY_TOT ? "tot_hitm" :
+ c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm"
+ );
+
+ ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting...");
+
+ hists__collapse_resort(&c2c.hists.hists, NULL);
+ hists__output_resort_cb(&c2c.hists.hists, &prog, resort_hitm_cb);
+ hists__iterate_cb(&c2c.hists.hists, resort_cl_cb);
+
+ ui_progress__finish();
+
+ if (ui_quirks()) {
+ pr_err("failed to setup UI\n");
+ goto out_mem2node;
+ }
+
+ perf_c2c_display(session);
+
+out_mem2node:
+ mem2node__exit(&c2c.mem2node);
+out_session:
+ perf_session__delete(session);
+out:
+ return err;
+}
+
+static int parse_record_events(const struct option *opt,
+ const char *str, int unset __maybe_unused)
+{
+ bool *event_set = (bool *) opt->value;
+
+ *event_set = true;
+ return perf_mem_events__parse(str);
+}
+
+
+static const char * const __usage_record[] = {
+ "perf c2c record [<options>] [<command>]",
+ "perf c2c record [<options>] -- <command> [<options>]",
+ NULL
+};
+
+static const char * const *record_mem_usage = __usage_record;
+
+static int perf_c2c__record(int argc, const char **argv)
+{
+ int rec_argc, i = 0, j;
+ const char **rec_argv;
+ int ret;
+ bool all_user = false, all_kernel = false;
+ bool event_set = false;
+ struct option options[] = {
+ OPT_CALLBACK('e', "event", &event_set, "event",
+ "event selector. Use 'perf mem record -e list' to list available events",
+ parse_record_events),
+ OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"),
+ OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"),
+ OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"),
+ OPT_PARENT(c2c_options),
+ OPT_END()
+ };
+
+ if (perf_mem_events__init()) {
+ pr_err("failed: memory events not supported\n");
+ return -1;
+ }
+
+ argc = parse_options(argc, argv, options, record_mem_usage,
+ PARSE_OPT_KEEP_UNKNOWN);
+
+ rec_argc = argc + 11; /* max number of arguments */
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+ if (!rec_argv)
+ return -1;
+
+ rec_argv[i++] = "record";
+
+ if (!event_set) {
+ perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
+ perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+ }
+
+ if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
+ rec_argv[i++] = "-W";
+
+ rec_argv[i++] = "-d";
+ rec_argv[i++] = "--phys-data";
+ rec_argv[i++] = "--sample-cpu";
+
+ for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ if (!perf_mem_events[j].record)
+ continue;
+
+ if (!perf_mem_events[j].supported) {
+ pr_err("failed: event '%s' not supported\n",
+ perf_mem_events[j].name);
+ free(rec_argv);
+ return -1;
+ }
+
+ rec_argv[i++] = "-e";
+ rec_argv[i++] = perf_mem_events__name(j);
+ };
+
+ if (all_user)
+ rec_argv[i++] = "--all-user";
+
+ if (all_kernel)
+ rec_argv[i++] = "--all-kernel";
+
+ for (j = 0; j < argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ if (verbose > 0) {
+ pr_debug("calling: ");
+
+ j = 0;
+
+ while (rec_argv[j]) {
+ pr_debug("%s ", rec_argv[j]);
+ j++;
+ }
+ pr_debug("\n");
+ }
+
+ ret = cmd_record(i, rec_argv);
+ free(rec_argv);
+ return ret;
+}
+
+int cmd_c2c(int argc, const char **argv)
+{
+ argc = parse_options(argc, argv, c2c_options, c2c_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (!argc)
+ usage_with_options(c2c_usage, c2c_options);
+
+ if (!strncmp(argv[0], "rec", 3)) {
+ return perf_c2c__record(argc, argv);
+ } else if (!strncmp(argv[0], "rep", 3)) {
+ return perf_c2c__report(argc, argv);
+ } else {
+ usage_with_options(c2c_usage, c2c_options);
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-config.c b/tools/perf/builtin-config.c
new file mode 100644
index 000000000..514f70f95
--- /dev/null
+++ b/tools/perf/builtin-config.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-config.c
+ *
+ * Copyright (C) 2015, Taeung Song <treeze.taeung@gmail.com>
+ *
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/cache.h"
+#include <subcmd/parse-options.h>
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/config.h"
+#include <linux/string.h>
+
+static bool use_system_config, use_user_config;
+
+static const char * const config_usage[] = {
+ "perf config [<file-option>] [options] [section.name[=value] ...]",
+ NULL
+};
+
+enum actions {
+ ACTION_LIST = 1
+} actions;
+
+static struct option config_options[] = {
+ OPT_SET_UINT('l', "list", &actions,
+ "show current config variables", ACTION_LIST),
+ OPT_BOOLEAN(0, "system", &use_system_config, "use system config file"),
+ OPT_BOOLEAN(0, "user", &use_user_config, "use user config file"),
+ OPT_END()
+};
+
+static int set_config(struct perf_config_set *set, const char *file_name)
+{
+ struct perf_config_section *section = NULL;
+ struct perf_config_item *item = NULL;
+ const char *first_line = "# this file is auto-generated.";
+ FILE *fp;
+
+ if (set == NULL)
+ return -1;
+
+ fp = fopen(file_name, "w");
+ if (!fp)
+ return -1;
+
+ fprintf(fp, "%s\n", first_line);
+
+ /* overwrite configvariables */
+ perf_config_items__for_each_entry(&set->sections, section) {
+ if (!use_system_config && section->from_system_config)
+ continue;
+ fprintf(fp, "[%s]\n", section->name);
+
+ perf_config_items__for_each_entry(&section->items, item) {
+ if (!use_system_config && item->from_system_config)
+ continue;
+ if (item->value)
+ fprintf(fp, "\t%s = %s\n",
+ item->name, item->value);
+ }
+ }
+ fclose(fp);
+
+ return 0;
+}
+
+static int show_spec_config(struct perf_config_set *set, const char *var)
+{
+ struct perf_config_section *section;
+ struct perf_config_item *item;
+
+ if (set == NULL)
+ return -1;
+
+ perf_config_items__for_each_entry(&set->sections, section) {
+ if (!strstarts(var, section->name))
+ continue;
+
+ perf_config_items__for_each_entry(&section->items, item) {
+ const char *name = var + strlen(section->name) + 1;
+
+ if (strcmp(name, item->name) == 0) {
+ char *value = item->value;
+
+ if (value) {
+ printf("%s=%s\n", var, value);
+ return 0;
+ }
+ }
+
+ }
+ }
+
+ return 0;
+}
+
+static int show_config(struct perf_config_set *set)
+{
+ struct perf_config_section *section;
+ struct perf_config_item *item;
+
+ if (set == NULL)
+ return -1;
+
+ perf_config_set__for_each_entry(set, section, item) {
+ char *value = item->value;
+
+ if (value)
+ printf("%s.%s=%s\n", section->name,
+ item->name, value);
+ }
+
+ return 0;
+}
+
+static int parse_config_arg(char *arg, char **var, char **value)
+{
+ const char *last_dot = strchr(arg, '.');
+
+ /*
+ * Since "var" actually contains the section name and the real
+ * config variable name separated by a dot, we have to know where the dot is.
+ */
+ if (last_dot == NULL || last_dot == arg) {
+ pr_err("The config variable does not contain a section name: %s\n", arg);
+ return -1;
+ }
+ if (!last_dot[1]) {
+ pr_err("The config variable does not contain a variable name: %s\n", arg);
+ return -1;
+ }
+
+ *value = strchr(arg, '=');
+ if (*value == NULL)
+ *var = arg;
+ else if (!strcmp(*value, "=")) {
+ pr_err("The config variable does not contain a value: %s\n", arg);
+ return -1;
+ } else {
+ *value = *value + 1; /* excluding a first character '=' */
+ *var = strsep(&arg, "=");
+ if (*var[0] == '\0') {
+ pr_err("invalid config variable: %s\n", arg);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int cmd_config(int argc, const char **argv)
+{
+ int i, ret = -1;
+ struct perf_config_set *set;
+ char *user_config = mkpath("%s/.perfconfig", getenv("HOME"));
+ const char *config_filename;
+ bool changed = false;
+
+ argc = parse_options(argc, argv, config_options, config_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (use_system_config && use_user_config) {
+ pr_err("Error: only one config file at a time\n");
+ parse_options_usage(config_usage, config_options, "user", 0);
+ parse_options_usage(NULL, config_options, "system", 0);
+ return -1;
+ }
+
+ if (use_system_config)
+ config_exclusive_filename = perf_etc_perfconfig();
+ else if (use_user_config)
+ config_exclusive_filename = user_config;
+
+ if (!config_exclusive_filename)
+ config_filename = user_config;
+ else
+ config_filename = config_exclusive_filename;
+
+ /*
+ * At only 'config' sub-command, individually use the config set
+ * because of reinitializing with options config file location.
+ */
+ set = perf_config_set__new();
+ if (!set)
+ goto out_err;
+
+ switch (actions) {
+ case ACTION_LIST:
+ if (argc) {
+ pr_err("Error: takes no arguments\n");
+ parse_options_usage(config_usage, config_options, "l", 1);
+ } else {
+ if (show_config(set) < 0) {
+ pr_err("Nothing configured, "
+ "please check your %s \n", config_filename);
+ goto out_err;
+ }
+ }
+ break;
+ default:
+ if (!argc) {
+ usage_with_options(config_usage, config_options);
+ break;
+ }
+
+ for (i = 0; argv[i]; i++) {
+ char *var, *value;
+ char *arg = strdup(argv[i]);
+
+ if (!arg) {
+ pr_err("%s: strdup failed\n", __func__);
+ goto out_err;
+ }
+
+ if (parse_config_arg(arg, &var, &value) < 0) {
+ free(arg);
+ goto out_err;
+ }
+
+ if (value == NULL) {
+ if (show_spec_config(set, var) < 0) {
+ pr_err("%s is not configured: %s\n",
+ var, config_filename);
+ free(arg);
+ goto out_err;
+ }
+ } else {
+ if (perf_config_set__collect(set, config_filename,
+ var, value) < 0) {
+ pr_err("Failed to add '%s=%s'\n",
+ var, value);
+ free(arg);
+ goto out_err;
+ }
+ changed = true;
+ }
+ free(arg);
+ }
+
+ if (!changed)
+ break;
+
+ if (set_config(set, config_filename) < 0) {
+ pr_err("Failed to set the configs on %s\n",
+ config_filename);
+ goto out_err;
+ }
+ }
+
+ ret = 0;
+out_err:
+ perf_config_set__delete(set);
+ return ret;
+}
diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
new file mode 100644
index 000000000..dde25d4ca
--- /dev/null
+++ b/tools/perf/builtin-data.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include "builtin.h"
+#include "perf.h"
+#include "debug.h"
+#include <subcmd/parse-options.h>
+#include "data-convert.h"
+#include "data-convert-bt.h"
+
+typedef int (*data_cmd_fn_t)(int argc, const char **argv);
+
+struct data_cmd {
+ const char *name;
+ const char *summary;
+ data_cmd_fn_t fn;
+};
+
+static struct data_cmd data_cmds[];
+
+#define for_each_cmd(cmd) \
+ for (cmd = data_cmds; cmd && cmd->name; cmd++)
+
+static const struct option data_options[] = {
+ OPT_END()
+};
+
+static const char * const data_subcommands[] = { "convert", NULL };
+
+static const char *data_usage[] = {
+ "perf data [<common options>] <command> [<options>]",
+ NULL
+};
+
+static void print_usage(void)
+{
+ struct data_cmd *cmd;
+
+ printf("Usage:\n");
+ printf("\t%s\n\n", data_usage[0]);
+ printf("\tAvailable commands:\n");
+
+ for_each_cmd(cmd) {
+ printf("\t %s\t- %s\n", cmd->name, cmd->summary);
+ }
+
+ printf("\n");
+}
+
+static const char * const data_convert_usage[] = {
+ "perf data convert [<options>]",
+ NULL
+};
+
+static int cmd_data_convert(int argc, const char **argv)
+{
+ const char *to_ctf = NULL;
+ struct perf_data_convert_opts opts = {
+ .force = false,
+ .all = false,
+ };
+ const struct option options[] = {
+ OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+ OPT_STRING('i', "input", &input_name, "file", "input file name"),
+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+ OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
+#endif
+ OPT_BOOLEAN('f', "force", &opts.force, "don't complain, do it"),
+ OPT_BOOLEAN(0, "all", &opts.all, "Convert all events"),
+ OPT_END()
+ };
+
+#ifndef HAVE_LIBBABELTRACE_SUPPORT
+ pr_err("No conversion support compiled in. perf should be compiled with environment variables LIBBABELTRACE=1 and LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
+ return -1;
+#endif
+
+ argc = parse_options(argc, argv, options,
+ data_convert_usage, 0);
+ if (argc) {
+ usage_with_options(data_convert_usage, options);
+ return -1;
+ }
+
+ if (to_ctf) {
+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+ return bt_convert__perf2ctf(input_name, to_ctf, &opts);
+#else
+ pr_err("The libbabeltrace support is not compiled in.\n");
+ return -1;
+#endif
+ }
+
+ return 0;
+}
+
+static struct data_cmd data_cmds[] = {
+ { "convert", "converts data file between formats", cmd_data_convert },
+ { .name = NULL, },
+};
+
+int cmd_data(int argc, const char **argv)
+{
+ struct data_cmd *cmd;
+ const char *cmdstr;
+
+ /* No command specified. */
+ if (argc < 2)
+ goto usage;
+
+ argc = parse_options_subcommand(argc, argv, data_options, data_subcommands, data_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (argc < 1)
+ goto usage;
+
+ cmdstr = argv[0];
+
+ for_each_cmd(cmd) {
+ if (strcmp(cmd->name, cmdstr))
+ continue;
+
+ return cmd->fn(argc, argv);
+ }
+
+ pr_err("Unknown command: %s\n", cmdstr);
+usage:
+ print_usage();
+ return -1;
+}
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
new file mode 100644
index 000000000..39db2ee32
--- /dev/null
+++ b/tools/perf/builtin-diff.c
@@ -0,0 +1,1363 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-diff.c
+ *
+ * Builtin diff command: Analyze two perf.data input files, look up and read
+ * DSOs and symbol information, sort them and produce a diff.
+ */
+#include "builtin.h"
+
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/hist.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/sort.h"
+#include "util/symbol.h"
+#include "util/util.h"
+#include "util/data.h"
+#include "util/config.h"
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <math.h>
+
+/* Diff command specific HPP columns. */
+enum {
+ PERF_HPP_DIFF__BASELINE,
+ PERF_HPP_DIFF__PERIOD,
+ PERF_HPP_DIFF__PERIOD_BASELINE,
+ PERF_HPP_DIFF__DELTA,
+ PERF_HPP_DIFF__RATIO,
+ PERF_HPP_DIFF__WEIGHTED_DIFF,
+ PERF_HPP_DIFF__FORMULA,
+ PERF_HPP_DIFF__DELTA_ABS,
+
+ PERF_HPP_DIFF__MAX_INDEX
+};
+
+struct diff_hpp_fmt {
+ struct perf_hpp_fmt fmt;
+ int idx;
+ char *header;
+ int header_width;
+};
+
+struct data__file {
+ struct perf_session *session;
+ struct perf_data data;
+ int idx;
+ struct hists *hists;
+ struct diff_hpp_fmt fmt[PERF_HPP_DIFF__MAX_INDEX];
+};
+
+static struct data__file *data__files;
+static int data__files_cnt;
+
+#define data__for_each_file_start(i, d, s) \
+ for (i = s, d = &data__files[s]; \
+ i < data__files_cnt; \
+ i++, d = &data__files[i])
+
+#define data__for_each_file(i, d) data__for_each_file_start(i, d, 0)
+#define data__for_each_file_new(i, d) data__for_each_file_start(i, d, 1)
+
+static bool force;
+static bool show_period;
+static bool show_formula;
+static bool show_baseline_only;
+static unsigned int sort_compute = 1;
+
+static s64 compute_wdiff_w1;
+static s64 compute_wdiff_w2;
+
+enum {
+ COMPUTE_DELTA,
+ COMPUTE_RATIO,
+ COMPUTE_WEIGHTED_DIFF,
+ COMPUTE_DELTA_ABS,
+ COMPUTE_MAX,
+};
+
+const char *compute_names[COMPUTE_MAX] = {
+ [COMPUTE_DELTA] = "delta",
+ [COMPUTE_DELTA_ABS] = "delta-abs",
+ [COMPUTE_RATIO] = "ratio",
+ [COMPUTE_WEIGHTED_DIFF] = "wdiff",
+};
+
+static int compute = COMPUTE_DELTA_ABS;
+
+static int compute_2_hpp[COMPUTE_MAX] = {
+ [COMPUTE_DELTA] = PERF_HPP_DIFF__DELTA,
+ [COMPUTE_DELTA_ABS] = PERF_HPP_DIFF__DELTA_ABS,
+ [COMPUTE_RATIO] = PERF_HPP_DIFF__RATIO,
+ [COMPUTE_WEIGHTED_DIFF] = PERF_HPP_DIFF__WEIGHTED_DIFF,
+};
+
+#define MAX_COL_WIDTH 70
+
+static struct header_column {
+ const char *name;
+ int width;
+} columns[PERF_HPP_DIFF__MAX_INDEX] = {
+ [PERF_HPP_DIFF__BASELINE] = {
+ .name = "Baseline",
+ },
+ [PERF_HPP_DIFF__PERIOD] = {
+ .name = "Period",
+ .width = 14,
+ },
+ [PERF_HPP_DIFF__PERIOD_BASELINE] = {
+ .name = "Base period",
+ .width = 14,
+ },
+ [PERF_HPP_DIFF__DELTA] = {
+ .name = "Delta",
+ .width = 7,
+ },
+ [PERF_HPP_DIFF__DELTA_ABS] = {
+ .name = "Delta Abs",
+ .width = 7,
+ },
+ [PERF_HPP_DIFF__RATIO] = {
+ .name = "Ratio",
+ .width = 14,
+ },
+ [PERF_HPP_DIFF__WEIGHTED_DIFF] = {
+ .name = "Weighted diff",
+ .width = 14,
+ },
+ [PERF_HPP_DIFF__FORMULA] = {
+ .name = "Formula",
+ .width = MAX_COL_WIDTH,
+ }
+};
+
+static int setup_compute_opt_wdiff(char *opt)
+{
+ char *w1_str = opt;
+ char *w2_str;
+
+ int ret = -EINVAL;
+
+ if (!opt)
+ goto out;
+
+ w2_str = strchr(opt, ',');
+ if (!w2_str)
+ goto out;
+
+ *w2_str++ = 0x0;
+ if (!*w2_str)
+ goto out;
+
+ compute_wdiff_w1 = strtol(w1_str, NULL, 10);
+ compute_wdiff_w2 = strtol(w2_str, NULL, 10);
+
+ if (!compute_wdiff_w1 || !compute_wdiff_w2)
+ goto out;
+
+ pr_debug("compute wdiff w1(%" PRId64 ") w2(%" PRId64 ")\n",
+ compute_wdiff_w1, compute_wdiff_w2);
+
+ ret = 0;
+
+ out:
+ if (ret)
+ pr_err("Failed: wrong weight data, use 'wdiff:w1,w2'\n");
+
+ return ret;
+}
+
+static int setup_compute_opt(char *opt)
+{
+ if (compute == COMPUTE_WEIGHTED_DIFF)
+ return setup_compute_opt_wdiff(opt);
+
+ if (opt) {
+ pr_err("Failed: extra option specified '%s'", opt);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int setup_compute(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ int *cp = (int *) opt->value;
+ char *cstr = (char *) str;
+ char buf[50];
+ unsigned i;
+ char *option;
+
+ if (!str) {
+ *cp = COMPUTE_DELTA;
+ return 0;
+ }
+
+ option = strchr(str, ':');
+ if (option) {
+ unsigned len = option++ - str;
+
+ /*
+ * The str data are not writeable, so we need
+ * to use another buffer.
+ */
+
+ /* No option value is longer. */
+ if (len >= sizeof(buf))
+ return -EINVAL;
+
+ strncpy(buf, str, len);
+ buf[len] = 0x0;
+ cstr = buf;
+ }
+
+ for (i = 0; i < COMPUTE_MAX; i++)
+ if (!strcmp(cstr, compute_names[i])) {
+ *cp = i;
+ return setup_compute_opt(option);
+ }
+
+ pr_err("Failed: '%s' is not computation method "
+ "(use 'delta','ratio' or 'wdiff')\n", str);
+ return -EINVAL;
+}
+
+static double period_percent(struct hist_entry *he, u64 period)
+{
+ u64 total = hists__total_period(he->hists);
+
+ return (period * 100.0) / total;
+}
+
+static double compute_delta(struct hist_entry *he, struct hist_entry *pair)
+{
+ double old_percent = period_percent(he, he->stat.period);
+ double new_percent = period_percent(pair, pair->stat.period);
+
+ pair->diff.period_ratio_delta = new_percent - old_percent;
+ pair->diff.computed = true;
+ return pair->diff.period_ratio_delta;
+}
+
+static double compute_ratio(struct hist_entry *he, struct hist_entry *pair)
+{
+ double old_period = he->stat.period ?: 1;
+ double new_period = pair->stat.period;
+
+ pair->diff.computed = true;
+ pair->diff.period_ratio = new_period / old_period;
+ return pair->diff.period_ratio;
+}
+
+static s64 compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
+{
+ u64 old_period = he->stat.period;
+ u64 new_period = pair->stat.period;
+
+ pair->diff.computed = true;
+ pair->diff.wdiff = new_period * compute_wdiff_w2 -
+ old_period * compute_wdiff_w1;
+
+ return pair->diff.wdiff;
+}
+
+static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
+ char *buf, size_t size)
+{
+ u64 he_total = he->hists->stats.total_period;
+ u64 pair_total = pair->hists->stats.total_period;
+
+ if (symbol_conf.filter_relative) {
+ he_total = he->hists->stats.total_non_filtered_period;
+ pair_total = pair->hists->stats.total_non_filtered_period;
+ }
+ return scnprintf(buf, size,
+ "(%" PRIu64 " * 100 / %" PRIu64 ") - "
+ "(%" PRIu64 " * 100 / %" PRIu64 ")",
+ pair->stat.period, pair_total,
+ he->stat.period, he_total);
+}
+
+static int formula_ratio(struct hist_entry *he, struct hist_entry *pair,
+ char *buf, size_t size)
+{
+ double old_period = he->stat.period;
+ double new_period = pair->stat.period;
+
+ return scnprintf(buf, size, "%.0F / %.0F", new_period, old_period);
+}
+
+static int formula_wdiff(struct hist_entry *he, struct hist_entry *pair,
+ char *buf, size_t size)
+{
+ u64 old_period = he->stat.period;
+ u64 new_period = pair->stat.period;
+
+ return scnprintf(buf, size,
+ "(%" PRIu64 " * " "%" PRId64 ") - (%" PRIu64 " * " "%" PRId64 ")",
+ new_period, compute_wdiff_w2, old_period, compute_wdiff_w1);
+}
+
+static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair,
+ char *buf, size_t size)
+{
+ switch (compute) {
+ case COMPUTE_DELTA:
+ case COMPUTE_DELTA_ABS:
+ return formula_delta(he, pair, buf, size);
+ case COMPUTE_RATIO:
+ return formula_ratio(he, pair, buf, size);
+ case COMPUTE_WEIGHTED_DIFF:
+ return formula_wdiff(he, pair, buf, size);
+ default:
+ BUG_ON(1);
+ }
+
+ return -1;
+}
+
+static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct addr_location al;
+ struct hists *hists = evsel__hists(evsel);
+ int ret = -1;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_warning("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, true)) {
+ pr_warning("problem incrementing symbol period, skipping event\n");
+ goto out_put;
+ }
+
+ /*
+ * The total_period is updated here before going to the output
+ * tree since normally only the baseline hists will call
+ * hists__output_resort() and precompute needs the total
+ * period in order to sort entries by percentage delta.
+ */
+ hists->stats.total_period += sample->period;
+ if (!al.filtered)
+ hists->stats.total_non_filtered_period += sample->period;
+ ret = 0;
+out_put:
+ addr_location__put(&al);
+ return ret;
+}
+
+static struct perf_tool tool = {
+ .sample = diff__process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
+ .lost = perf_event__process_lost,
+ .namespaces = perf_event__process_namespaces,
+ .ordered_events = true,
+ .ordering_requires_timestamps = true,
+};
+
+static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
+ struct perf_evlist *evlist)
+{
+ struct perf_evsel *e;
+
+ evlist__for_each_entry(evlist, e) {
+ if (perf_evsel__match2(evsel, e))
+ return e;
+ }
+
+ return NULL;
+}
+
+static void perf_evlist__collapse_resort(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ struct hists *hists = evsel__hists(evsel);
+
+ hists__collapse_resort(hists, NULL);
+ }
+}
+
+static struct data__file *fmt_to_data_file(struct perf_hpp_fmt *fmt)
+{
+ struct diff_hpp_fmt *dfmt = container_of(fmt, struct diff_hpp_fmt, fmt);
+ void *ptr = dfmt - dfmt->idx;
+ struct data__file *d = container_of(ptr, struct data__file, fmt);
+
+ return d;
+}
+
+static struct hist_entry*
+get_pair_data(struct hist_entry *he, struct data__file *d)
+{
+ if (hist_entry__has_pairs(he)) {
+ struct hist_entry *pair;
+
+ list_for_each_entry(pair, &he->pairs.head, pairs.node)
+ if (pair->hists == d->hists)
+ return pair;
+ }
+
+ return NULL;
+}
+
+static struct hist_entry*
+get_pair_fmt(struct hist_entry *he, struct diff_hpp_fmt *dfmt)
+{
+ struct data__file *d = fmt_to_data_file(&dfmt->fmt);
+
+ return get_pair_data(he, d);
+}
+
+static void hists__baseline_only(struct hists *hists)
+{
+ struct rb_root *root;
+ struct rb_node *next;
+
+ if (hists__has(hists, need_collapse))
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ next = rb_first(root);
+ while (next != NULL) {
+ struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in);
+
+ next = rb_next(&he->rb_node_in);
+ if (!hist_entry__next_pair(he)) {
+ rb_erase(&he->rb_node_in, root);
+ hist_entry__delete(he);
+ }
+ }
+}
+
+static void hists__precompute(struct hists *hists)
+{
+ struct rb_root *root;
+ struct rb_node *next;
+
+ if (hists__has(hists, need_collapse))
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ next = rb_first(root);
+ while (next != NULL) {
+ struct hist_entry *he, *pair;
+ struct data__file *d;
+ int i;
+
+ he = rb_entry(next, struct hist_entry, rb_node_in);
+ next = rb_next(&he->rb_node_in);
+
+ data__for_each_file_new(i, d) {
+ pair = get_pair_data(he, d);
+ if (!pair)
+ continue;
+
+ switch (compute) {
+ case COMPUTE_DELTA:
+ case COMPUTE_DELTA_ABS:
+ compute_delta(he, pair);
+ break;
+ case COMPUTE_RATIO:
+ compute_ratio(he, pair);
+ break;
+ case COMPUTE_WEIGHTED_DIFF:
+ compute_wdiff(he, pair);
+ break;
+ default:
+ BUG_ON(1);
+ }
+ }
+ }
+}
+
+static int64_t cmp_doubles(double l, double r)
+{
+ if (l > r)
+ return -1;
+ else if (l < r)
+ return 1;
+ else
+ return 0;
+}
+
+static int64_t
+__hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
+ int c)
+{
+ switch (c) {
+ case COMPUTE_DELTA:
+ {
+ double l = left->diff.period_ratio_delta;
+ double r = right->diff.period_ratio_delta;
+
+ return cmp_doubles(l, r);
+ }
+ case COMPUTE_DELTA_ABS:
+ {
+ double l = fabs(left->diff.period_ratio_delta);
+ double r = fabs(right->diff.period_ratio_delta);
+
+ return cmp_doubles(l, r);
+ }
+ case COMPUTE_RATIO:
+ {
+ double l = left->diff.period_ratio;
+ double r = right->diff.period_ratio;
+
+ return cmp_doubles(l, r);
+ }
+ case COMPUTE_WEIGHTED_DIFF:
+ {
+ s64 l = left->diff.wdiff;
+ s64 r = right->diff.wdiff;
+
+ return r - l;
+ }
+ default:
+ BUG_ON(1);
+ }
+
+ return 0;
+}
+
+static int64_t
+hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
+ int c, int sort_idx)
+{
+ bool pairs_left = hist_entry__has_pairs(left);
+ bool pairs_right = hist_entry__has_pairs(right);
+ struct hist_entry *p_right, *p_left;
+
+ if (!pairs_left && !pairs_right)
+ return 0;
+
+ if (!pairs_left || !pairs_right)
+ return pairs_left ? -1 : 1;
+
+ p_left = get_pair_data(left, &data__files[sort_idx]);
+ p_right = get_pair_data(right, &data__files[sort_idx]);
+
+ if (!p_left && !p_right)
+ return 0;
+
+ if (!p_left || !p_right)
+ return p_left ? -1 : 1;
+
+ /*
+ * We have 2 entries of same kind, let's
+ * make the data comparison.
+ */
+ return __hist_entry__cmp_compute(p_left, p_right, c);
+}
+
+static int64_t
+hist_entry__cmp_compute_idx(struct hist_entry *left, struct hist_entry *right,
+ int c, int sort_idx)
+{
+ struct hist_entry *p_right, *p_left;
+
+ p_left = get_pair_data(left, &data__files[sort_idx]);
+ p_right = get_pair_data(right, &data__files[sort_idx]);
+
+ if (!p_left && !p_right)
+ return 0;
+
+ if (!p_left || !p_right)
+ return p_left ? -1 : 1;
+
+ if (c != COMPUTE_DELTA && c != COMPUTE_DELTA_ABS) {
+ /*
+ * The delta can be computed without the baseline, but
+ * others are not. Put those entries which have no
+ * values below.
+ */
+ if (left->dummy && right->dummy)
+ return 0;
+
+ if (left->dummy || right->dummy)
+ return left->dummy ? 1 : -1;
+ }
+
+ return __hist_entry__cmp_compute(p_left, p_right, c);
+}
+
+static int64_t
+hist_entry__cmp_nop(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left __maybe_unused,
+ struct hist_entry *right __maybe_unused)
+{
+ return 0;
+}
+
+static int64_t
+hist_entry__cmp_baseline(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ if (left->stat.period == right->stat.period)
+ return 0;
+ return left->stat.period > right->stat.period ? 1 : -1;
+}
+
+static int64_t
+hist_entry__cmp_delta(struct perf_hpp_fmt *fmt,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct data__file *d = fmt_to_data_file(fmt);
+
+ return hist_entry__cmp_compute(right, left, COMPUTE_DELTA, d->idx);
+}
+
+static int64_t
+hist_entry__cmp_delta_abs(struct perf_hpp_fmt *fmt,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct data__file *d = fmt_to_data_file(fmt);
+
+ return hist_entry__cmp_compute(right, left, COMPUTE_DELTA_ABS, d->idx);
+}
+
+static int64_t
+hist_entry__cmp_ratio(struct perf_hpp_fmt *fmt,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct data__file *d = fmt_to_data_file(fmt);
+
+ return hist_entry__cmp_compute(right, left, COMPUTE_RATIO, d->idx);
+}
+
+static int64_t
+hist_entry__cmp_wdiff(struct perf_hpp_fmt *fmt,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ struct data__file *d = fmt_to_data_file(fmt);
+
+ return hist_entry__cmp_compute(right, left, COMPUTE_WEIGHTED_DIFF, d->idx);
+}
+
+static int64_t
+hist_entry__cmp_delta_idx(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return hist_entry__cmp_compute_idx(right, left, COMPUTE_DELTA,
+ sort_compute);
+}
+
+static int64_t
+hist_entry__cmp_delta_abs_idx(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return hist_entry__cmp_compute_idx(right, left, COMPUTE_DELTA_ABS,
+ sort_compute);
+}
+
+static int64_t
+hist_entry__cmp_ratio_idx(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return hist_entry__cmp_compute_idx(right, left, COMPUTE_RATIO,
+ sort_compute);
+}
+
+static int64_t
+hist_entry__cmp_wdiff_idx(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return hist_entry__cmp_compute_idx(right, left, COMPUTE_WEIGHTED_DIFF,
+ sort_compute);
+}
+
+static void hists__process(struct hists *hists)
+{
+ if (show_baseline_only)
+ hists__baseline_only(hists);
+
+ hists__precompute(hists);
+ hists__output_resort(hists, NULL);
+
+ hists__fprintf(hists, !quiet, 0, 0, 0, stdout,
+ !symbol_conf.use_callchain);
+}
+
+static void data__fprintf(void)
+{
+ struct data__file *d;
+ int i;
+
+ fprintf(stdout, "# Data files:\n");
+
+ data__for_each_file(i, d)
+ fprintf(stdout, "# [%d] %s %s\n",
+ d->idx, d->data.file.path,
+ !d->idx ? "(Baseline)" : "");
+
+ fprintf(stdout, "#\n");
+}
+
+static void data_process(void)
+{
+ struct perf_evlist *evlist_base = data__files[0].session->evlist;
+ struct perf_evsel *evsel_base;
+ bool first = true;
+
+ evlist__for_each_entry(evlist_base, evsel_base) {
+ struct hists *hists_base = evsel__hists(evsel_base);
+ struct data__file *d;
+ int i;
+
+ data__for_each_file_new(i, d) {
+ struct perf_evlist *evlist = d->session->evlist;
+ struct perf_evsel *evsel;
+ struct hists *hists;
+
+ evsel = evsel_match(evsel_base, evlist);
+ if (!evsel)
+ continue;
+
+ hists = evsel__hists(evsel);
+ d->hists = hists;
+
+ hists__match(hists_base, hists);
+
+ if (!show_baseline_only)
+ hists__link(hists_base, hists);
+ }
+
+ if (!quiet) {
+ fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
+ perf_evsel__name(evsel_base));
+ }
+
+ first = false;
+
+ if (verbose > 0 || ((data__files_cnt > 2) && !quiet))
+ data__fprintf();
+
+ /* Don't sort callchain for perf diff */
+ perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN);
+
+ hists__process(hists_base);
+ }
+}
+
+static void data__free(struct data__file *d)
+{
+ int col;
+
+ for (col = 0; col < PERF_HPP_DIFF__MAX_INDEX; col++) {
+ struct diff_hpp_fmt *fmt = &d->fmt[col];
+
+ zfree(&fmt->header);
+ }
+}
+
+static int __cmd_diff(void)
+{
+ struct data__file *d;
+ int ret = -EINVAL, i;
+
+ data__for_each_file(i, d) {
+ d->session = perf_session__new(&d->data, false, &tool);
+ if (!d->session) {
+ pr_err("Failed to open %s\n", d->data.file.path);
+ ret = -1;
+ goto out_delete;
+ }
+
+ ret = perf_session__process_events(d->session);
+ if (ret) {
+ pr_err("Failed to process %s\n", d->data.file.path);
+ goto out_delete;
+ }
+
+ perf_evlist__collapse_resort(d->session->evlist);
+ }
+
+ data_process();
+
+ out_delete:
+ data__for_each_file(i, d) {
+ perf_session__delete(d->session);
+ data__free(d);
+ }
+
+ free(data__files);
+ return ret;
+}
+
+static const char * const diff_usage[] = {
+ "perf diff [<options>] [old_file] [new_file]",
+ NULL,
+};
+
+static const struct option options[] = {
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show symbol address, etc)"),
+ OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
+ OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
+ "Show only items with match in baseline"),
+ OPT_CALLBACK('c', "compute", &compute,
+ "delta,delta-abs,ratio,wdiff:w1,w2 (default delta-abs)",
+ "Entries differential computation selection",
+ setup_compute),
+ OPT_BOOLEAN('p', "period", &show_period,
+ "Show period values."),
+ OPT_BOOLEAN('F', "formula", &show_formula,
+ "Show formula."),
+ OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+ "dump raw trace in ASCII"),
+ OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+ OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+ "file", "kallsyms pathname"),
+ OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
+ "load module symbols - WARNING: use only with -k and LIVE kernel"),
+ OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
+ "only consider symbols in these dsos"),
+ OPT_STRING('C', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
+ "only consider symbols in these comms"),
+ OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
+ "only consider these symbols"),
+ OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
+ "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+ " Please refer the man page for the complete list."),
+ OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator",
+ "separator for columns, no spaces will be added between "
+ "columns '.' is reserved."),
+ OPT_CALLBACK(0, "symfs", NULL, "directory",
+ "Look for files with symbols relative to this directory",
+ symbol__config_symfs),
+ OPT_UINTEGER('o', "order", &sort_compute, "Specify compute sorting."),
+ OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
+ "How to display percentage of filtered entries", parse_filter_percentage),
+ OPT_END()
+};
+
+static double baseline_percent(struct hist_entry *he)
+{
+ u64 total = hists__total_period(he->hists);
+
+ return 100.0 * he->stat.period / total;
+}
+
+static int hpp__color_baseline(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ struct diff_hpp_fmt *dfmt =
+ container_of(fmt, struct diff_hpp_fmt, fmt);
+ double percent = baseline_percent(he);
+ char pfmt[20] = " ";
+
+ if (!he->dummy) {
+ scnprintf(pfmt, 20, "%%%d.2f%%%%", dfmt->header_width - 1);
+ return percent_color_snprintf(hpp->buf, hpp->size,
+ pfmt, percent);
+ } else
+ return scnprintf(hpp->buf, hpp->size, "%*s",
+ dfmt->header_width, pfmt);
+}
+
+static int hpp__entry_baseline(struct hist_entry *he, char *buf, size_t size)
+{
+ double percent = baseline_percent(he);
+ const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%";
+ int ret = 0;
+
+ if (!he->dummy)
+ ret = scnprintf(buf, size, fmt, percent);
+
+ return ret;
+}
+
+static int __hpp__color_compare(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he,
+ int comparison_method)
+{
+ struct diff_hpp_fmt *dfmt =
+ container_of(fmt, struct diff_hpp_fmt, fmt);
+ struct hist_entry *pair = get_pair_fmt(he, dfmt);
+ double diff;
+ s64 wdiff;
+ char pfmt[20] = " ";
+
+ if (!pair)
+ goto no_print;
+
+ switch (comparison_method) {
+ case COMPUTE_DELTA:
+ if (pair->diff.computed)
+ diff = pair->diff.period_ratio_delta;
+ else
+ diff = compute_delta(he, pair);
+
+ scnprintf(pfmt, 20, "%%%+d.2f%%%%", dfmt->header_width - 1);
+ return percent_color_snprintf(hpp->buf, hpp->size,
+ pfmt, diff);
+ case COMPUTE_RATIO:
+ if (he->dummy)
+ goto dummy_print;
+ if (pair->diff.computed)
+ diff = pair->diff.period_ratio;
+ else
+ diff = compute_ratio(he, pair);
+
+ scnprintf(pfmt, 20, "%%%d.6f", dfmt->header_width);
+ return value_color_snprintf(hpp->buf, hpp->size,
+ pfmt, diff);
+ case COMPUTE_WEIGHTED_DIFF:
+ if (he->dummy)
+ goto dummy_print;
+ if (pair->diff.computed)
+ wdiff = pair->diff.wdiff;
+ else
+ wdiff = compute_wdiff(he, pair);
+
+ scnprintf(pfmt, 20, "%%14ld", dfmt->header_width);
+ return color_snprintf(hpp->buf, hpp->size,
+ get_percent_color(wdiff),
+ pfmt, wdiff);
+ default:
+ BUG_ON(1);
+ }
+dummy_print:
+ return scnprintf(hpp->buf, hpp->size, "%*s",
+ dfmt->header_width, "N/A");
+no_print:
+ return scnprintf(hpp->buf, hpp->size, "%*s",
+ dfmt->header_width, pfmt);
+}
+
+static int hpp__color_delta(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ return __hpp__color_compare(fmt, hpp, he, COMPUTE_DELTA);
+}
+
+static int hpp__color_ratio(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ return __hpp__color_compare(fmt, hpp, he, COMPUTE_RATIO);
+}
+
+static int hpp__color_wdiff(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ return __hpp__color_compare(fmt, hpp, he, COMPUTE_WEIGHTED_DIFF);
+}
+
+static void
+hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size)
+{
+ switch (idx) {
+ case PERF_HPP_DIFF__PERIOD_BASELINE:
+ scnprintf(buf, size, "%" PRIu64, he->stat.period);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
+ int idx, char *buf, size_t size)
+{
+ double diff;
+ double ratio;
+ s64 wdiff;
+
+ switch (idx) {
+ case PERF_HPP_DIFF__DELTA:
+ case PERF_HPP_DIFF__DELTA_ABS:
+ if (pair->diff.computed)
+ diff = pair->diff.period_ratio_delta;
+ else
+ diff = compute_delta(he, pair);
+
+ scnprintf(buf, size, "%+4.2F%%", diff);
+ break;
+
+ case PERF_HPP_DIFF__RATIO:
+ /* No point for ratio number if we are dummy.. */
+ if (he->dummy) {
+ scnprintf(buf, size, "N/A");
+ break;
+ }
+
+ if (pair->diff.computed)
+ ratio = pair->diff.period_ratio;
+ else
+ ratio = compute_ratio(he, pair);
+
+ if (ratio > 0.0)
+ scnprintf(buf, size, "%14.6F", ratio);
+ break;
+
+ case PERF_HPP_DIFF__WEIGHTED_DIFF:
+ /* No point for wdiff number if we are dummy.. */
+ if (he->dummy) {
+ scnprintf(buf, size, "N/A");
+ break;
+ }
+
+ if (pair->diff.computed)
+ wdiff = pair->diff.wdiff;
+ else
+ wdiff = compute_wdiff(he, pair);
+
+ if (wdiff != 0)
+ scnprintf(buf, size, "%14ld", wdiff);
+ break;
+
+ case PERF_HPP_DIFF__FORMULA:
+ formula_fprintf(he, pair, buf, size);
+ break;
+
+ case PERF_HPP_DIFF__PERIOD:
+ scnprintf(buf, size, "%" PRIu64, pair->stat.period);
+ break;
+
+ default:
+ BUG_ON(1);
+ };
+}
+
+static void
+__hpp__entry_global(struct hist_entry *he, struct diff_hpp_fmt *dfmt,
+ char *buf, size_t size)
+{
+ struct hist_entry *pair = get_pair_fmt(he, dfmt);
+ int idx = dfmt->idx;
+
+ /* baseline is special */
+ if (idx == PERF_HPP_DIFF__BASELINE)
+ hpp__entry_baseline(he, buf, size);
+ else {
+ if (pair)
+ hpp__entry_pair(he, pair, idx, buf, size);
+ else
+ hpp__entry_unpair(he, idx, buf, size);
+ }
+}
+
+static int hpp__entry_global(struct perf_hpp_fmt *_fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct diff_hpp_fmt *dfmt =
+ container_of(_fmt, struct diff_hpp_fmt, fmt);
+ char buf[MAX_COL_WIDTH] = " ";
+
+ __hpp__entry_global(he, dfmt, buf, MAX_COL_WIDTH);
+
+ if (symbol_conf.field_sep)
+ return scnprintf(hpp->buf, hpp->size, "%s", buf);
+ else
+ return scnprintf(hpp->buf, hpp->size, "%*s",
+ dfmt->header_width, buf);
+}
+
+static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hists *hists __maybe_unused,
+ int line __maybe_unused,
+ int *span __maybe_unused)
+{
+ struct diff_hpp_fmt *dfmt =
+ container_of(fmt, struct diff_hpp_fmt, fmt);
+
+ BUG_ON(!dfmt->header);
+ return scnprintf(hpp->buf, hpp->size, dfmt->header);
+}
+
+static int hpp__width(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct hists *hists __maybe_unused)
+{
+ struct diff_hpp_fmt *dfmt =
+ container_of(fmt, struct diff_hpp_fmt, fmt);
+
+ BUG_ON(dfmt->header_width <= 0);
+ return dfmt->header_width;
+}
+
+static void init_header(struct data__file *d, struct diff_hpp_fmt *dfmt)
+{
+#define MAX_HEADER_NAME 100
+ char buf_indent[MAX_HEADER_NAME];
+ char buf[MAX_HEADER_NAME];
+ const char *header = NULL;
+ int width = 0;
+
+ BUG_ON(dfmt->idx >= PERF_HPP_DIFF__MAX_INDEX);
+ header = columns[dfmt->idx].name;
+ width = columns[dfmt->idx].width;
+
+ /* Only our defined HPP fmts should appear here. */
+ BUG_ON(!header);
+
+ if (data__files_cnt > 2)
+ scnprintf(buf, MAX_HEADER_NAME, "%s/%d", header, d->idx);
+
+#define NAME (data__files_cnt > 2 ? buf : header)
+ dfmt->header_width = width;
+ width = (int) strlen(NAME);
+ if (dfmt->header_width < width)
+ dfmt->header_width = width;
+
+ scnprintf(buf_indent, MAX_HEADER_NAME, "%*s",
+ dfmt->header_width, NAME);
+
+ dfmt->header = strdup(buf_indent);
+#undef MAX_HEADER_NAME
+#undef NAME
+}
+
+static void data__hpp_register(struct data__file *d, int idx)
+{
+ struct diff_hpp_fmt *dfmt = &d->fmt[idx];
+ struct perf_hpp_fmt *fmt = &dfmt->fmt;
+
+ dfmt->idx = idx;
+
+ fmt->header = hpp__header;
+ fmt->width = hpp__width;
+ fmt->entry = hpp__entry_global;
+ fmt->cmp = hist_entry__cmp_nop;
+ fmt->collapse = hist_entry__cmp_nop;
+
+ /* TODO more colors */
+ switch (idx) {
+ case PERF_HPP_DIFF__BASELINE:
+ fmt->color = hpp__color_baseline;
+ fmt->sort = hist_entry__cmp_baseline;
+ break;
+ case PERF_HPP_DIFF__DELTA:
+ fmt->color = hpp__color_delta;
+ fmt->sort = hist_entry__cmp_delta;
+ break;
+ case PERF_HPP_DIFF__RATIO:
+ fmt->color = hpp__color_ratio;
+ fmt->sort = hist_entry__cmp_ratio;
+ break;
+ case PERF_HPP_DIFF__WEIGHTED_DIFF:
+ fmt->color = hpp__color_wdiff;
+ fmt->sort = hist_entry__cmp_wdiff;
+ break;
+ case PERF_HPP_DIFF__DELTA_ABS:
+ fmt->color = hpp__color_delta;
+ fmt->sort = hist_entry__cmp_delta_abs;
+ break;
+ default:
+ fmt->sort = hist_entry__cmp_nop;
+ break;
+ }
+
+ init_header(d, dfmt);
+ perf_hpp__column_register(fmt);
+ perf_hpp__register_sort_field(fmt);
+}
+
+static int ui_init(void)
+{
+ struct data__file *d;
+ struct perf_hpp_fmt *fmt;
+ int i;
+
+ data__for_each_file(i, d) {
+
+ /*
+ * Baseline or compute realted columns:
+ *
+ * PERF_HPP_DIFF__BASELINE
+ * PERF_HPP_DIFF__DELTA
+ * PERF_HPP_DIFF__RATIO
+ * PERF_HPP_DIFF__WEIGHTED_DIFF
+ */
+ data__hpp_register(d, i ? compute_2_hpp[compute] :
+ PERF_HPP_DIFF__BASELINE);
+
+ /*
+ * And the rest:
+ *
+ * PERF_HPP_DIFF__FORMULA
+ * PERF_HPP_DIFF__PERIOD
+ * PERF_HPP_DIFF__PERIOD_BASELINE
+ */
+ if (show_formula && i)
+ data__hpp_register(d, PERF_HPP_DIFF__FORMULA);
+
+ if (show_period)
+ data__hpp_register(d, i ? PERF_HPP_DIFF__PERIOD :
+ PERF_HPP_DIFF__PERIOD_BASELINE);
+ }
+
+ if (!sort_compute)
+ return 0;
+
+ /*
+ * Prepend an fmt to sort on columns at 'sort_compute' first.
+ * This fmt is added only to the sort list but not to the
+ * output fields list.
+ *
+ * Note that this column (data) can be compared twice - one
+ * for this 'sort_compute' fmt and another for the normal
+ * diff_hpp_fmt. But it shouldn't a problem as most entries
+ * will be sorted out by first try or baseline and comparing
+ * is not a costly operation.
+ */
+ fmt = zalloc(sizeof(*fmt));
+ if (fmt == NULL) {
+ pr_err("Memory allocation failed\n");
+ return -1;
+ }
+
+ fmt->cmp = hist_entry__cmp_nop;
+ fmt->collapse = hist_entry__cmp_nop;
+
+ switch (compute) {
+ case COMPUTE_DELTA:
+ fmt->sort = hist_entry__cmp_delta_idx;
+ break;
+ case COMPUTE_RATIO:
+ fmt->sort = hist_entry__cmp_ratio_idx;
+ break;
+ case COMPUTE_WEIGHTED_DIFF:
+ fmt->sort = hist_entry__cmp_wdiff_idx;
+ break;
+ case COMPUTE_DELTA_ABS:
+ fmt->sort = hist_entry__cmp_delta_abs_idx;
+ break;
+ default:
+ BUG_ON(1);
+ }
+
+ perf_hpp__prepend_sort_field(fmt);
+ return 0;
+}
+
+static int data_init(int argc, const char **argv)
+{
+ struct data__file *d;
+ static const char *defaults[] = {
+ "perf.data.old",
+ "perf.data",
+ };
+ bool use_default = true;
+ int i;
+
+ data__files_cnt = 2;
+
+ if (argc) {
+ if (argc == 1)
+ defaults[1] = argv[0];
+ else {
+ data__files_cnt = argc;
+ use_default = false;
+ }
+ } else if (perf_guest) {
+ defaults[0] = "perf.data.host";
+ defaults[1] = "perf.data.guest";
+ }
+
+ if (sort_compute >= (unsigned int) data__files_cnt) {
+ pr_err("Order option out of limit.\n");
+ return -EINVAL;
+ }
+
+ data__files = zalloc(sizeof(*data__files) * data__files_cnt);
+ if (!data__files)
+ return -ENOMEM;
+
+ data__for_each_file(i, d) {
+ struct perf_data *data = &d->data;
+
+ data->file.path = use_default ? defaults[i] : argv[i];
+ data->mode = PERF_DATA_MODE_READ,
+ data->force = force,
+
+ d->idx = i;
+ }
+
+ return 0;
+}
+
+static int diff__config(const char *var, const char *value,
+ void *cb __maybe_unused)
+{
+ if (!strcmp(var, "diff.order")) {
+ int ret;
+ if (perf_config_int(&ret, var, value) < 0)
+ return -1;
+ sort_compute = ret;
+ return 0;
+ }
+ if (!strcmp(var, "diff.compute")) {
+ if (!strcmp(value, "delta")) {
+ compute = COMPUTE_DELTA;
+ } else if (!strcmp(value, "delta-abs")) {
+ compute = COMPUTE_DELTA_ABS;
+ } else if (!strcmp(value, "ratio")) {
+ compute = COMPUTE_RATIO;
+ } else if (!strcmp(value, "wdiff")) {
+ compute = COMPUTE_WEIGHTED_DIFF;
+ } else {
+ pr_err("Invalid compute method: %s\n", value);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int cmd_diff(int argc, const char **argv)
+{
+ int ret = hists__init();
+
+ if (ret < 0)
+ return ret;
+
+ perf_config(diff__config, NULL);
+
+ argc = parse_options(argc, argv, options, diff_usage, 0);
+
+ if (quiet)
+ perf_quiet_option();
+
+ if (symbol__init(NULL) < 0)
+ return -1;
+
+ if (data_init(argc, argv) < 0)
+ return -1;
+
+ if (ui_init() < 0)
+ return -1;
+
+ sort__mode = SORT_MODE__DIFF;
+
+ if (setup_sorting(NULL) < 0)
+ usage_with_options(diff_usage, options);
+
+ setup_pager();
+
+ sort__setup_elide(NULL);
+
+ return __cmd_diff();
+}
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
new file mode 100644
index 000000000..e06e822ce
--- /dev/null
+++ b/tools/perf/builtin-evlist.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Builtin evlist command: Show the list of event selectors present
+ * in a perf.data file.
+ */
+#include "builtin.h"
+
+#include "util/util.h"
+
+#include <linux/list.h>
+
+#include "perf.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/parse-events.h"
+#include <subcmd/parse-options.h>
+#include "util/session.h"
+#include "util/data.h"
+#include "util/debug.h"
+
+static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
+{
+ struct perf_session *session;
+ struct perf_evsel *pos;
+ struct perf_data data = {
+ .file = {
+ .path = file_name,
+ },
+ .mode = PERF_DATA_MODE_READ,
+ .force = details->force,
+ };
+ bool has_tracepoint = false;
+
+ session = perf_session__new(&data, 0, NULL);
+ if (session == NULL)
+ return -1;
+
+ evlist__for_each_entry(session->evlist, pos) {
+ perf_evsel__fprintf(pos, details, stdout);
+
+ if (pos->attr.type == PERF_TYPE_TRACEPOINT)
+ has_tracepoint = true;
+ }
+
+ if (has_tracepoint && !details->trace_fields)
+ printf("# Tip: use 'perf evlist --trace-fields' to show fields for tracepoint events\n");
+
+ perf_session__delete(session);
+ return 0;
+}
+
+int cmd_evlist(int argc, const char **argv)
+{
+ struct perf_attr_details details = { .verbose = false, };
+ const struct option options[] = {
+ OPT_STRING('i', "input", &input_name, "file", "Input file name"),
+ OPT_BOOLEAN('F', "freq", &details.freq, "Show the sample frequency"),
+ OPT_BOOLEAN('v', "verbose", &details.verbose,
+ "Show all event attr details"),
+ OPT_BOOLEAN('g', "group", &details.event_group,
+ "Show event group information"),
+ OPT_BOOLEAN('f', "force", &details.force, "don't complain, do it"),
+ OPT_BOOLEAN(0, "trace-fields", &details.trace_fields, "Show tracepoint fields"),
+ OPT_END()
+ };
+ const char * const evlist_usage[] = {
+ "perf evlist [<options>]",
+ NULL
+ };
+
+ argc = parse_options(argc, argv, options, evlist_usage, 0);
+ if (argc)
+ usage_with_options(evlist_usage, options);
+
+ if (details.event_group && (details.verbose || details.freq)) {
+ usage_with_options_msg(evlist_usage, options,
+ "--group option is not compatible with other options\n");
+ }
+
+ return __cmd_evlist(input_name, &details);
+}
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
new file mode 100644
index 000000000..137955197
--- /dev/null
+++ b/tools/perf/builtin-ftrace.c
@@ -0,0 +1,521 @@
+/*
+ * builtin-ftrace.c
+ *
+ * Copyright (c) 2013 LG Electronics, Namhyung Kim <namhyung@kernel.org>
+ *
+ * Released under the GPL v2.
+ */
+
+#include "builtin.h"
+#include "perf.h"
+
+#include <errno.h>
+#include <unistd.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <poll.h>
+
+#include "debug.h"
+#include <subcmd/parse-options.h>
+#include <api/fs/tracing_path.h>
+#include "evlist.h"
+#include "target.h"
+#include "cpumap.h"
+#include "thread_map.h"
+#include "util/config.h"
+
+
+#define DEFAULT_TRACER "function_graph"
+
+struct perf_ftrace {
+ struct perf_evlist *evlist;
+ struct target target;
+ const char *tracer;
+ struct list_head filters;
+ struct list_head notrace;
+ struct list_head graph_funcs;
+ struct list_head nograph_funcs;
+ int graph_depth;
+};
+
+struct filter_entry {
+ struct list_head list;
+ char name[];
+};
+
+static bool done;
+
+static void sig_handler(int sig __maybe_unused)
+{
+ done = true;
+}
+
+/*
+ * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
+ * we asked by setting its exec_error to the function below,
+ * ftrace__workload_exec_failed_signal.
+ *
+ * XXX We need to handle this more appropriately, emitting an error, etc.
+ */
+static void ftrace__workload_exec_failed_signal(int signo __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *ucontext __maybe_unused)
+{
+ /* workload_exec_errno = info->si_value.sival_int; */
+ done = true;
+}
+
+static int __write_tracing_file(const char *name, const char *val, bool append)
+{
+ char *file;
+ int fd, ret = -1;
+ ssize_t size = strlen(val);
+ int flags = O_WRONLY;
+ char errbuf[512];
+ char *val_copy;
+
+ file = get_tracing_file(name);
+ if (!file) {
+ pr_debug("cannot get tracing file: %s\n", name);
+ return -1;
+ }
+
+ if (append)
+ flags |= O_APPEND;
+ else
+ flags |= O_TRUNC;
+
+ fd = open(file, flags);
+ if (fd < 0) {
+ pr_debug("cannot open tracing file: %s: %s\n",
+ name, str_error_r(errno, errbuf, sizeof(errbuf)));
+ goto out;
+ }
+
+ /*
+ * Copy the original value and append a '\n'. Without this,
+ * the kernel can hide possible errors.
+ */
+ val_copy = strdup(val);
+ if (!val_copy)
+ goto out_close;
+ val_copy[size] = '\n';
+
+ if (write(fd, val_copy, size + 1) == size + 1)
+ ret = 0;
+ else
+ pr_debug("write '%s' to tracing/%s failed: %s\n",
+ val, name, str_error_r(errno, errbuf, sizeof(errbuf)));
+
+ free(val_copy);
+out_close:
+ close(fd);
+out:
+ put_tracing_file(file);
+ return ret;
+}
+
+static int write_tracing_file(const char *name, const char *val)
+{
+ return __write_tracing_file(name, val, false);
+}
+
+static int append_tracing_file(const char *name, const char *val)
+{
+ return __write_tracing_file(name, val, true);
+}
+
+static int reset_tracing_cpu(void);
+static void reset_tracing_filters(void);
+
+static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
+{
+ if (write_tracing_file("tracing_on", "0") < 0)
+ return -1;
+
+ if (write_tracing_file("current_tracer", "nop") < 0)
+ return -1;
+
+ if (write_tracing_file("set_ftrace_pid", " ") < 0)
+ return -1;
+
+ if (reset_tracing_cpu() < 0)
+ return -1;
+
+ if (write_tracing_file("max_graph_depth", "0") < 0)
+ return -1;
+
+ reset_tracing_filters();
+ return 0;
+}
+
+static int set_tracing_pid(struct perf_ftrace *ftrace)
+{
+ int i;
+ char buf[16];
+
+ if (target__has_cpu(&ftrace->target))
+ return 0;
+
+ for (i = 0; i < thread_map__nr(ftrace->evlist->threads); i++) {
+ scnprintf(buf, sizeof(buf), "%d",
+ ftrace->evlist->threads->map[i]);
+ if (append_tracing_file("set_ftrace_pid", buf) < 0)
+ return -1;
+ }
+ return 0;
+}
+
+static int set_tracing_cpumask(struct cpu_map *cpumap)
+{
+ char *cpumask;
+ size_t mask_size;
+ int ret;
+ int last_cpu;
+
+ last_cpu = cpu_map__cpu(cpumap, cpumap->nr - 1);
+ mask_size = last_cpu / 4 + 2; /* one more byte for EOS */
+ mask_size += last_cpu / 32; /* ',' is needed for every 32th cpus */
+
+ cpumask = malloc(mask_size);
+ if (cpumask == NULL) {
+ pr_debug("failed to allocate cpu mask\n");
+ return -1;
+ }
+
+ cpu_map__snprint_mask(cpumap, cpumask, mask_size);
+
+ ret = write_tracing_file("tracing_cpumask", cpumask);
+
+ free(cpumask);
+ return ret;
+}
+
+static int set_tracing_cpu(struct perf_ftrace *ftrace)
+{
+ struct cpu_map *cpumap = ftrace->evlist->cpus;
+
+ if (!target__has_cpu(&ftrace->target))
+ return 0;
+
+ return set_tracing_cpumask(cpumap);
+}
+
+static int reset_tracing_cpu(void)
+{
+ struct cpu_map *cpumap = cpu_map__new(NULL);
+ int ret;
+
+ ret = set_tracing_cpumask(cpumap);
+ cpu_map__put(cpumap);
+ return ret;
+}
+
+static int __set_tracing_filter(const char *filter_file, struct list_head *funcs)
+{
+ struct filter_entry *pos;
+
+ list_for_each_entry(pos, funcs, list) {
+ if (append_tracing_file(filter_file, pos->name) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+static int set_tracing_filters(struct perf_ftrace *ftrace)
+{
+ int ret;
+
+ ret = __set_tracing_filter("set_ftrace_filter", &ftrace->filters);
+ if (ret < 0)
+ return ret;
+
+ ret = __set_tracing_filter("set_ftrace_notrace", &ftrace->notrace);
+ if (ret < 0)
+ return ret;
+
+ ret = __set_tracing_filter("set_graph_function", &ftrace->graph_funcs);
+ if (ret < 0)
+ return ret;
+
+ /* old kernels do not have this filter */
+ __set_tracing_filter("set_graph_notrace", &ftrace->nograph_funcs);
+
+ return ret;
+}
+
+static void reset_tracing_filters(void)
+{
+ write_tracing_file("set_ftrace_filter", " ");
+ write_tracing_file("set_ftrace_notrace", " ");
+ write_tracing_file("set_graph_function", " ");
+ write_tracing_file("set_graph_notrace", " ");
+}
+
+static int set_tracing_depth(struct perf_ftrace *ftrace)
+{
+ char buf[16];
+
+ if (ftrace->graph_depth == 0)
+ return 0;
+
+ if (ftrace->graph_depth < 0) {
+ pr_err("invalid graph depth: %d\n", ftrace->graph_depth);
+ return -1;
+ }
+
+ snprintf(buf, sizeof(buf), "%d", ftrace->graph_depth);
+
+ if (write_tracing_file("max_graph_depth", buf) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
+{
+ char *trace_file;
+ int trace_fd;
+ char buf[4096];
+ struct pollfd pollfd = {
+ .events = POLLIN,
+ };
+
+ if (geteuid() != 0) {
+ pr_err("ftrace only works for root!\n");
+ return -1;
+ }
+
+ signal(SIGINT, sig_handler);
+ signal(SIGUSR1, sig_handler);
+ signal(SIGCHLD, sig_handler);
+ signal(SIGPIPE, sig_handler);
+
+ if (reset_tracing_files(ftrace) < 0) {
+ pr_err("failed to reset ftrace\n");
+ goto out;
+ }
+
+ /* reset ftrace buffer */
+ if (write_tracing_file("trace", "0") < 0)
+ goto out;
+
+ if (argc && perf_evlist__prepare_workload(ftrace->evlist,
+ &ftrace->target, argv, false,
+ ftrace__workload_exec_failed_signal) < 0) {
+ goto out;
+ }
+
+ if (set_tracing_pid(ftrace) < 0) {
+ pr_err("failed to set ftrace pid\n");
+ goto out_reset;
+ }
+
+ if (set_tracing_cpu(ftrace) < 0) {
+ pr_err("failed to set tracing cpumask\n");
+ goto out_reset;
+ }
+
+ if (set_tracing_filters(ftrace) < 0) {
+ pr_err("failed to set tracing filters\n");
+ goto out_reset;
+ }
+
+ if (set_tracing_depth(ftrace) < 0) {
+ pr_err("failed to set graph depth\n");
+ goto out_reset;
+ }
+
+ if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
+ pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
+ goto out_reset;
+ }
+
+ setup_pager();
+
+ trace_file = get_tracing_file("trace_pipe");
+ if (!trace_file) {
+ pr_err("failed to open trace_pipe\n");
+ goto out_reset;
+ }
+
+ trace_fd = open(trace_file, O_RDONLY);
+
+ put_tracing_file(trace_file);
+
+ if (trace_fd < 0) {
+ pr_err("failed to open trace_pipe\n");
+ goto out_reset;
+ }
+
+ fcntl(trace_fd, F_SETFL, O_NONBLOCK);
+ pollfd.fd = trace_fd;
+
+ if (write_tracing_file("tracing_on", "1") < 0) {
+ pr_err("can't enable tracing\n");
+ goto out_close_fd;
+ }
+
+ perf_evlist__start_workload(ftrace->evlist);
+
+ while (!done) {
+ if (poll(&pollfd, 1, -1) < 0)
+ break;
+
+ if (pollfd.revents & POLLIN) {
+ int n = read(trace_fd, buf, sizeof(buf));
+ if (n < 0)
+ break;
+ if (fwrite(buf, n, 1, stdout) != 1)
+ break;
+ }
+ }
+
+ write_tracing_file("tracing_on", "0");
+
+ /* read remaining buffer contents */
+ while (true) {
+ int n = read(trace_fd, buf, sizeof(buf));
+ if (n <= 0)
+ break;
+ if (fwrite(buf, n, 1, stdout) != 1)
+ break;
+ }
+
+out_close_fd:
+ close(trace_fd);
+out_reset:
+ reset_tracing_files(ftrace);
+out:
+ return done ? 0 : -1;
+}
+
+static int perf_ftrace_config(const char *var, const char *value, void *cb)
+{
+ struct perf_ftrace *ftrace = cb;
+
+ if (!strstarts(var, "ftrace."))
+ return 0;
+
+ if (strcmp(var, "ftrace.tracer"))
+ return -1;
+
+ if (!strcmp(value, "function_graph") ||
+ !strcmp(value, "function")) {
+ ftrace->tracer = value;
+ return 0;
+ }
+
+ pr_err("Please select \"function_graph\" (default) or \"function\"\n");
+ return -1;
+}
+
+static int parse_filter_func(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct list_head *head = opt->value;
+ struct filter_entry *entry;
+
+ entry = malloc(sizeof(*entry) + strlen(str) + 1);
+ if (entry == NULL)
+ return -ENOMEM;
+
+ strcpy(entry->name, str);
+ list_add_tail(&entry->list, head);
+
+ return 0;
+}
+
+static void delete_filter_func(struct list_head *head)
+{
+ struct filter_entry *pos, *tmp;
+
+ list_for_each_entry_safe(pos, tmp, head, list) {
+ list_del(&pos->list);
+ free(pos);
+ }
+}
+
+int cmd_ftrace(int argc, const char **argv)
+{
+ int ret;
+ struct perf_ftrace ftrace = {
+ .tracer = DEFAULT_TRACER,
+ .target = { .uid = UINT_MAX, },
+ };
+ const char * const ftrace_usage[] = {
+ "perf ftrace [<options>] [<command>]",
+ "perf ftrace [<options>] -- <command> [<options>]",
+ NULL
+ };
+ const struct option ftrace_options[] = {
+ OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
+ "tracer to use: function_graph(default) or function"),
+ OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
+ "trace on existing process id"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose"),
+ OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide,
+ "system-wide collection from all CPUs"),
+ OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
+ "list of cpus to monitor"),
+ OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
+ "trace given functions only", parse_filter_func),
+ OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
+ "do not trace given functions", parse_filter_func),
+ OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
+ "Set graph filter on given functions", parse_filter_func),
+ OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
+ "Set nograph filter on given functions", parse_filter_func),
+ OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth,
+ "Max depth for function graph tracer"),
+ OPT_END()
+ };
+
+ INIT_LIST_HEAD(&ftrace.filters);
+ INIT_LIST_HEAD(&ftrace.notrace);
+ INIT_LIST_HEAD(&ftrace.graph_funcs);
+ INIT_LIST_HEAD(&ftrace.nograph_funcs);
+
+ ret = perf_config(perf_ftrace_config, &ftrace);
+ if (ret < 0)
+ return -1;
+
+ argc = parse_options(argc, argv, ftrace_options, ftrace_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (!argc && target__none(&ftrace.target))
+ usage_with_options(ftrace_usage, ftrace_options);
+
+ ret = target__validate(&ftrace.target);
+ if (ret) {
+ char errbuf[512];
+
+ target__strerror(&ftrace.target, ret, errbuf, 512);
+ pr_err("%s\n", errbuf);
+ goto out_delete_filters;
+ }
+
+ ftrace.evlist = perf_evlist__new();
+ if (ftrace.evlist == NULL) {
+ ret = -ENOMEM;
+ goto out_delete_filters;
+ }
+
+ ret = perf_evlist__create_maps(ftrace.evlist, &ftrace.target);
+ if (ret < 0)
+ goto out_delete_evlist;
+
+ ret = __cmd_ftrace(&ftrace, argc, argv);
+
+out_delete_evlist:
+ perf_evlist__delete(ftrace.evlist);
+
+out_delete_filters:
+ delete_filter_func(&ftrace.filters);
+ delete_filter_func(&ftrace.notrace);
+ delete_filter_func(&ftrace.graph_funcs);
+ delete_filter_func(&ftrace.nograph_funcs);
+
+ return ret;
+}
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
new file mode 100644
index 000000000..3d29d0524
--- /dev/null
+++ b/tools/perf/builtin-help.c
@@ -0,0 +1,493 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-help.c
+ *
+ * Builtin help command
+ */
+#include "perf.h"
+#include "util/config.h"
+#include "builtin.h"
+#include <subcmd/exec-cmd.h>
+#include "common-cmds.h"
+#include <subcmd/parse-options.h>
+#include <subcmd/run-command.h>
+#include <subcmd/help.h>
+#include "util/debug.h"
+#include <linux/kernel.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+static struct man_viewer_list {
+ struct man_viewer_list *next;
+ char name[0];
+} *man_viewer_list;
+
+static struct man_viewer_info_list {
+ struct man_viewer_info_list *next;
+ const char *info;
+ char name[0];
+} *man_viewer_info_list;
+
+enum help_format {
+ HELP_FORMAT_NONE,
+ HELP_FORMAT_MAN,
+ HELP_FORMAT_INFO,
+ HELP_FORMAT_WEB,
+};
+
+static enum help_format parse_help_format(const char *format)
+{
+ if (!strcmp(format, "man"))
+ return HELP_FORMAT_MAN;
+ if (!strcmp(format, "info"))
+ return HELP_FORMAT_INFO;
+ if (!strcmp(format, "web") || !strcmp(format, "html"))
+ return HELP_FORMAT_WEB;
+
+ pr_err("unrecognized help format '%s'", format);
+ return HELP_FORMAT_NONE;
+}
+
+static const char *get_man_viewer_info(const char *name)
+{
+ struct man_viewer_info_list *viewer;
+
+ for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) {
+ if (!strcasecmp(name, viewer->name))
+ return viewer->info;
+ }
+ return NULL;
+}
+
+static int check_emacsclient_version(void)
+{
+ struct strbuf buffer = STRBUF_INIT;
+ struct child_process ec_process;
+ const char *argv_ec[] = { "emacsclient", "--version", NULL };
+ int version;
+ int ret = -1;
+
+ /* emacsclient prints its version number on stderr */
+ memset(&ec_process, 0, sizeof(ec_process));
+ ec_process.argv = argv_ec;
+ ec_process.err = -1;
+ ec_process.stdout_to_stderr = 1;
+ if (start_command(&ec_process)) {
+ fprintf(stderr, "Failed to start emacsclient.\n");
+ return -1;
+ }
+ if (strbuf_read(&buffer, ec_process.err, 20) < 0) {
+ fprintf(stderr, "Failed to read emacsclient version\n");
+ goto out;
+ }
+ close(ec_process.err);
+
+ /*
+ * Don't bother checking return value, because "emacsclient --version"
+ * seems to always exits with code 1.
+ */
+ finish_command(&ec_process);
+
+ if (!strstarts(buffer.buf, "emacsclient")) {
+ fprintf(stderr, "Failed to parse emacsclient version.\n");
+ goto out;
+ }
+
+ version = atoi(buffer.buf + strlen("emacsclient"));
+
+ if (version < 22) {
+ fprintf(stderr,
+ "emacsclient version '%d' too old (< 22).\n",
+ version);
+ } else
+ ret = 0;
+out:
+ strbuf_release(&buffer);
+ return ret;
+}
+
+static void exec_failed(const char *cmd)
+{
+ char sbuf[STRERR_BUFSIZE];
+ pr_warning("failed to exec '%s': %s", cmd, str_error_r(errno, sbuf, sizeof(sbuf)));
+}
+
+static void exec_woman_emacs(const char *path, const char *page)
+{
+ if (!check_emacsclient_version()) {
+ /* This works only with emacsclient version >= 22. */
+ char *man_page;
+
+ if (!path)
+ path = "emacsclient";
+ if (asprintf(&man_page, "(woman \"%s\")", page) > 0) {
+ execlp(path, "emacsclient", "-e", man_page, NULL);
+ free(man_page);
+ }
+ exec_failed(path);
+ }
+}
+
+static void exec_man_konqueror(const char *path, const char *page)
+{
+ const char *display = getenv("DISPLAY");
+
+ if (display && *display) {
+ char *man_page;
+ const char *filename = "kfmclient";
+
+ /* It's simpler to launch konqueror using kfmclient. */
+ if (path) {
+ const char *file = strrchr(path, '/');
+ if (file && !strcmp(file + 1, "konqueror")) {
+ char *new = strdup(path);
+ char *dest = strrchr(new, '/');
+
+ /* strlen("konqueror") == strlen("kfmclient") */
+ strcpy(dest + 1, "kfmclient");
+ path = new;
+ }
+ if (file)
+ filename = file;
+ } else
+ path = "kfmclient";
+ if (asprintf(&man_page, "man:%s(1)", page) > 0) {
+ execlp(path, filename, "newTab", man_page, NULL);
+ free(man_page);
+ }
+ exec_failed(path);
+ }
+}
+
+static void exec_man_man(const char *path, const char *page)
+{
+ if (!path)
+ path = "man";
+ execlp(path, "man", page, NULL);
+ exec_failed(path);
+}
+
+static void exec_man_cmd(const char *cmd, const char *page)
+{
+ char *shell_cmd;
+
+ if (asprintf(&shell_cmd, "%s %s", cmd, page) > 0) {
+ execl("/bin/sh", "sh", "-c", shell_cmd, NULL);
+ free(shell_cmd);
+ }
+ exec_failed(cmd);
+}
+
+static void add_man_viewer(const char *name)
+{
+ struct man_viewer_list **p = &man_viewer_list;
+ size_t len = strlen(name);
+
+ while (*p)
+ p = &((*p)->next);
+ *p = zalloc(sizeof(**p) + len + 1);
+ strcpy((*p)->name, name);
+}
+
+static int supported_man_viewer(const char *name, size_t len)
+{
+ return (!strncasecmp("man", name, len) ||
+ !strncasecmp("woman", name, len) ||
+ !strncasecmp("konqueror", name, len));
+}
+
+static void do_add_man_viewer_info(const char *name,
+ size_t len,
+ const char *value)
+{
+ struct man_viewer_info_list *new = zalloc(sizeof(*new) + len + 1);
+
+ strncpy(new->name, name, len);
+ new->info = strdup(value);
+ new->next = man_viewer_info_list;
+ man_viewer_info_list = new;
+}
+
+static void unsupported_man_viewer(const char *name, const char *var)
+{
+ pr_warning("'%s': path for unsupported man viewer.\n"
+ "Please consider using 'man.<tool>.%s' instead.", name, var);
+}
+
+static int add_man_viewer_path(const char *name,
+ size_t len,
+ const char *value)
+{
+ if (supported_man_viewer(name, len))
+ do_add_man_viewer_info(name, len, value);
+ else
+ unsupported_man_viewer(name, "cmd");
+
+ return 0;
+}
+
+static int add_man_viewer_cmd(const char *name,
+ size_t len,
+ const char *value)
+{
+ if (supported_man_viewer(name, len))
+ unsupported_man_viewer(name, "path");
+ else
+ do_add_man_viewer_info(name, len, value);
+
+ return 0;
+}
+
+static int add_man_viewer_info(const char *var, const char *value)
+{
+ const char *name = var + 4;
+ const char *subkey = strrchr(name, '.');
+
+ if (!subkey) {
+ pr_err("Config with no key for man viewer: %s", name);
+ return -1;
+ }
+
+ if (!strcmp(subkey, ".path")) {
+ if (!value)
+ return config_error_nonbool(var);
+ return add_man_viewer_path(name, subkey - name, value);
+ }
+ if (!strcmp(subkey, ".cmd")) {
+ if (!value)
+ return config_error_nonbool(var);
+ return add_man_viewer_cmd(name, subkey - name, value);
+ }
+
+ pr_warning("'%s': unsupported man viewer sub key.", subkey);
+ return 0;
+}
+
+static int perf_help_config(const char *var, const char *value, void *cb)
+{
+ enum help_format *help_formatp = cb;
+
+ if (!strcmp(var, "help.format")) {
+ if (!value)
+ return config_error_nonbool(var);
+ *help_formatp = parse_help_format(value);
+ if (*help_formatp == HELP_FORMAT_NONE)
+ return -1;
+ return 0;
+ }
+ if (!strcmp(var, "man.viewer")) {
+ if (!value)
+ return config_error_nonbool(var);
+ add_man_viewer(value);
+ return 0;
+ }
+ if (strstarts(var, "man."))
+ return add_man_viewer_info(var, value);
+
+ return 0;
+}
+
+static struct cmdnames main_cmds, other_cmds;
+
+void list_common_cmds_help(void)
+{
+ unsigned int i, longest = 0;
+
+ for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
+ if (longest < strlen(common_cmds[i].name))
+ longest = strlen(common_cmds[i].name);
+ }
+
+ puts(" The most commonly used perf commands are:");
+ for (i = 0; i < ARRAY_SIZE(common_cmds); i++) {
+ printf(" %-*s ", longest, common_cmds[i].name);
+ puts(common_cmds[i].help);
+ }
+}
+
+static const char *cmd_to_page(const char *perf_cmd)
+{
+ char *s;
+
+ if (!perf_cmd)
+ return "perf";
+ else if (strstarts(perf_cmd, "perf"))
+ return perf_cmd;
+
+ return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s;
+}
+
+static void setup_man_path(void)
+{
+ char *new_path;
+ const char *old_path = getenv("MANPATH");
+
+ /* We should always put ':' after our path. If there is no
+ * old_path, the ':' at the end will let 'man' to try
+ * system-wide paths after ours to find the manual page. If
+ * there is old_path, we need ':' as delimiter. */
+ if (asprintf(&new_path, "%s:%s", system_path(PERF_MAN_PATH), old_path ?: "") > 0) {
+ setenv("MANPATH", new_path, 1);
+ free(new_path);
+ } else {
+ pr_err("Unable to setup man path");
+ }
+}
+
+static void exec_viewer(const char *name, const char *page)
+{
+ const char *info = get_man_viewer_info(name);
+
+ if (!strcasecmp(name, "man"))
+ exec_man_man(info, page);
+ else if (!strcasecmp(name, "woman"))
+ exec_woman_emacs(info, page);
+ else if (!strcasecmp(name, "konqueror"))
+ exec_man_konqueror(info, page);
+ else if (info)
+ exec_man_cmd(info, page);
+ else
+ pr_warning("'%s': unknown man viewer.", name);
+}
+
+static int show_man_page(const char *perf_cmd)
+{
+ struct man_viewer_list *viewer;
+ const char *page = cmd_to_page(perf_cmd);
+ const char *fallback = getenv("PERF_MAN_VIEWER");
+
+ setup_man_path();
+ for (viewer = man_viewer_list; viewer; viewer = viewer->next)
+ exec_viewer(viewer->name, page); /* will return when unable */
+
+ if (fallback)
+ exec_viewer(fallback, page);
+ exec_viewer("man", page);
+
+ pr_err("no man viewer handled the request");
+ return -1;
+}
+
+static int show_info_page(const char *perf_cmd)
+{
+ const char *page = cmd_to_page(perf_cmd);
+ setenv("INFOPATH", system_path(PERF_INFO_PATH), 1);
+ execlp("info", "info", "perfman", page, NULL);
+ return -1;
+}
+
+static int get_html_page_path(char **page_path, const char *page)
+{
+ struct stat st;
+ const char *html_path = system_path(PERF_HTML_PATH);
+
+ /* Check that we have a perf documentation directory. */
+ if (stat(mkpath("%s/perf.html", html_path), &st)
+ || !S_ISREG(st.st_mode)) {
+ pr_err("'%s': not a documentation directory.", html_path);
+ return -1;
+ }
+
+ return asprintf(page_path, "%s/%s.html", html_path, page);
+}
+
+/*
+ * If open_html is not defined in a platform-specific way (see for
+ * example compat/mingw.h), we use the script web--browse to display
+ * HTML.
+ */
+#ifndef open_html
+static void open_html(const char *path)
+{
+ execl_cmd("web--browse", "-c", "help.browser", path, NULL);
+}
+#endif
+
+static int show_html_page(const char *perf_cmd)
+{
+ const char *page = cmd_to_page(perf_cmd);
+ char *page_path; /* it leaks but we exec bellow */
+
+ if (get_html_page_path(&page_path, page) < 0)
+ return -1;
+
+ open_html(page_path);
+
+ return 0;
+}
+
+int cmd_help(int argc, const char **argv)
+{
+ bool show_all = false;
+ enum help_format help_format = HELP_FORMAT_MAN;
+ struct option builtin_help_options[] = {
+ OPT_BOOLEAN('a', "all", &show_all, "print all available commands"),
+ OPT_SET_UINT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN),
+ OPT_SET_UINT('w', "web", &help_format, "show manual in web browser",
+ HELP_FORMAT_WEB),
+ OPT_SET_UINT('i', "info", &help_format, "show info page",
+ HELP_FORMAT_INFO),
+ OPT_END(),
+ };
+ const char * const builtin_help_subcommands[] = {
+ "buildid-cache", "buildid-list", "diff", "evlist", "help", "list",
+ "record", "report", "bench", "stat", "timechart", "top", "annotate",
+ "script", "sched", "kallsyms", "kmem", "lock", "kvm", "test", "inject", "mem", "data",
+#ifdef HAVE_LIBELF_SUPPORT
+ "probe",
+#endif
+#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
+ "trace",
+#endif
+ NULL };
+ const char *builtin_help_usage[] = {
+ "perf help [--all] [--man|--web|--info] [command]",
+ NULL
+ };
+ int rc;
+
+ load_command_list("perf-", &main_cmds, &other_cmds);
+
+ rc = perf_config(perf_help_config, &help_format);
+ if (rc)
+ return rc;
+
+ argc = parse_options_subcommand(argc, argv, builtin_help_options,
+ builtin_help_subcommands, builtin_help_usage, 0);
+
+ if (show_all) {
+ printf("\n Usage: %s\n\n", perf_usage_string);
+ list_commands("perf commands", &main_cmds, &other_cmds);
+ printf(" %s\n\n", perf_more_info_string);
+ return 0;
+ }
+
+ if (!argv[0]) {
+ printf("\n usage: %s\n\n", perf_usage_string);
+ list_common_cmds_help();
+ printf("\n %s\n\n", perf_more_info_string);
+ return 0;
+ }
+
+ switch (help_format) {
+ case HELP_FORMAT_MAN:
+ rc = show_man_page(argv[0]);
+ break;
+ case HELP_FORMAT_INFO:
+ rc = show_info_page(argv[0]);
+ break;
+ case HELP_FORMAT_WEB:
+ rc = show_html_page(argv[0]);
+ break;
+ case HELP_FORMAT_NONE:
+ /* fall-through */
+ default:
+ rc = -1;
+ break;
+ }
+
+ return rc;
+}
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
new file mode 100644
index 000000000..a3b346359
--- /dev/null
+++ b/tools/perf/builtin-inject.c
@@ -0,0 +1,876 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-inject.c
+ *
+ * Builtin inject command: Examine the live mode (stdin) event stream
+ * and repipe it to stdout while optionally injecting additional
+ * events into it.
+ */
+#include "builtin.h"
+
+#include "perf.h"
+#include "util/color.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/debug.h"
+#include "util/build-id.h"
+#include "util/data.h"
+#include "util/auxtrace.h"
+#include "util/jit.h"
+#include "util/thread.h"
+
+#include <subcmd/parse-options.h>
+
+#include <linux/list.h>
+#include <errno.h>
+#include <signal.h>
+
+struct perf_inject {
+ struct perf_tool tool;
+ struct perf_session *session;
+ bool build_ids;
+ bool sched_stat;
+ bool have_auxtrace;
+ bool strip;
+ bool jit_mode;
+ const char *input_name;
+ struct perf_data output;
+ u64 bytes_written;
+ u64 aux_id;
+ struct list_head samples;
+ struct itrace_synth_opts itrace_synth_opts;
+};
+
+struct event_entry {
+ struct list_head node;
+ u32 tid;
+ union perf_event event[0];
+};
+
+static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
+{
+ ssize_t size;
+
+ size = perf_data__write(&inject->output, buf, sz);
+ if (size < 0)
+ return -errno;
+
+ inject->bytes_written += size;
+ return 0;
+}
+
+static int perf_event__repipe_synth(struct perf_tool *tool,
+ union perf_event *event)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject,
+ tool);
+
+ return output_bytes(inject, event, event->header.size);
+}
+
+static int perf_event__repipe_oe_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct ordered_events *oe __maybe_unused)
+{
+ return perf_event__repipe_synth(tool, event);
+}
+
+#ifdef HAVE_JITDUMP
+static int perf_event__drop_oe(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct ordered_events *oe __maybe_unused)
+{
+ return 0;
+}
+#endif
+
+static int perf_event__repipe_op2_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session
+ __maybe_unused)
+{
+ return perf_event__repipe_synth(tool, event);
+}
+
+static int perf_event__repipe_attr(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_evlist **pevlist)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject,
+ tool);
+ int ret;
+
+ ret = perf_event__process_attr(tool, event, pevlist);
+ if (ret)
+ return ret;
+
+ if (!inject->output.is_pipe)
+ return 0;
+
+ return perf_event__repipe_synth(tool, event);
+}
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
+{
+ char buf[4096];
+ ssize_t ssz;
+ int ret;
+
+ while (size > 0) {
+ ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
+ if (ssz < 0)
+ return -errno;
+ ret = output_bytes(inject, buf, ssz);
+ if (ret)
+ return ret;
+ size -= ssz;
+ }
+
+ return 0;
+}
+
+static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject,
+ tool);
+ int ret;
+
+ inject->have_auxtrace = true;
+
+ if (!inject->output.is_pipe) {
+ off_t offset;
+
+ offset = lseek(inject->output.file.fd, 0, SEEK_CUR);
+ if (offset == -1)
+ return -errno;
+ ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
+ event, offset);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (perf_data__is_pipe(session->data) || !session->one_mmap) {
+ ret = output_bytes(inject, event, event->header.size);
+ if (ret < 0)
+ return ret;
+ ret = copy_bytes(inject, perf_data__fd(session->data),
+ event->auxtrace.size);
+ } else {
+ ret = output_bytes(inject, event,
+ event->header.size + event->auxtrace.size);
+ }
+ if (ret < 0)
+ return ret;
+
+ return event->auxtrace.size;
+}
+
+#else
+
+static s64
+perf_event__repipe_auxtrace(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ pr_err("AUX area tracing not supported\n");
+ return -EINVAL;
+}
+
+#endif
+
+static int perf_event__repipe(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return perf_event__repipe_synth(tool, event);
+}
+
+static int perf_event__drop(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return 0;
+}
+
+static int perf_event__drop_aux(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct machine *machine __maybe_unused)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+
+ if (!inject->aux_id)
+ inject->aux_id = sample->id;
+
+ return 0;
+}
+
+typedef int (*inject_handler)(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine);
+
+static int perf_event__repipe_sample(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ if (evsel->handler) {
+ inject_handler f = evsel->handler;
+ return f(tool, event, sample, evsel, machine);
+ }
+
+ build_id__mark_dso_hit(tool, event, sample, evsel, machine);
+
+ return perf_event__repipe_synth(tool, event);
+}
+
+static int perf_event__repipe_mmap(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int err;
+
+ err = perf_event__process_mmap(tool, event, sample, machine);
+ perf_event__repipe(tool, event, sample, machine);
+
+ return err;
+}
+
+#ifdef HAVE_JITDUMP
+static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ u64 n = 0;
+ int ret;
+
+ /*
+ * if jit marker, then inject jit mmaps and generate ELF images
+ */
+ ret = jit_process(inject->session, &inject->output, machine,
+ event->mmap.filename, sample->pid, &n);
+ if (ret < 0)
+ return ret;
+ if (ret) {
+ inject->bytes_written += n;
+ return 0;
+ }
+ return perf_event__repipe_mmap(tool, event, sample, machine);
+}
+#endif
+
+static int perf_event__repipe_mmap2(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int err;
+
+ err = perf_event__process_mmap2(tool, event, sample, machine);
+ perf_event__repipe(tool, event, sample, machine);
+
+ return err;
+}
+
+#ifdef HAVE_JITDUMP
+static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ u64 n = 0;
+ int ret;
+
+ /*
+ * if jit marker, then inject jit mmaps and generate ELF images
+ */
+ ret = jit_process(inject->session, &inject->output, machine,
+ event->mmap2.filename, sample->pid, &n);
+ if (ret < 0)
+ return ret;
+ if (ret) {
+ inject->bytes_written += n;
+ return 0;
+ }
+ return perf_event__repipe_mmap2(tool, event, sample, machine);
+}
+#endif
+
+static int perf_event__repipe_fork(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int err;
+
+ err = perf_event__process_fork(tool, event, sample, machine);
+ perf_event__repipe(tool, event, sample, machine);
+
+ return err;
+}
+
+static int perf_event__repipe_comm(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int err;
+
+ err = perf_event__process_comm(tool, event, sample, machine);
+ perf_event__repipe(tool, event, sample, machine);
+
+ return err;
+}
+
+static int perf_event__repipe_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int err = perf_event__process_namespaces(tool, event, sample, machine);
+
+ perf_event__repipe(tool, event, sample, machine);
+
+ return err;
+}
+
+static int perf_event__repipe_exit(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int err;
+
+ err = perf_event__process_exit(tool, event, sample, machine);
+ perf_event__repipe(tool, event, sample, machine);
+
+ return err;
+}
+
+static int perf_event__repipe_tracing_data(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ int err;
+
+ perf_event__repipe_synth(tool, event);
+ err = perf_event__process_tracing_data(tool, event, session);
+
+ return err;
+}
+
+static int perf_event__repipe_id_index(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ int err;
+
+ perf_event__repipe_synth(tool, event);
+ err = perf_event__process_id_index(tool, event, session);
+
+ return err;
+}
+
+static int dso__read_build_id(struct dso *dso)
+{
+ if (dso->has_build_id)
+ return 0;
+
+ if (filename__read_build_id(dso->long_name, dso->build_id,
+ sizeof(dso->build_id)) > 0) {
+ dso->has_build_id = true;
+ return 0;
+ }
+
+ return -1;
+}
+
+static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
+ struct machine *machine)
+{
+ u16 misc = PERF_RECORD_MISC_USER;
+ int err;
+
+ if (dso__read_build_id(dso) < 0) {
+ pr_debug("no build_id found for %s\n", dso->long_name);
+ return -1;
+ }
+
+ if (dso->kernel)
+ misc = PERF_RECORD_MISC_KERNEL;
+
+ err = perf_event__synthesize_build_id(tool, dso, misc, perf_event__repipe,
+ machine);
+ if (err) {
+ pr_err("Can't synthesize build_id event for %s\n", dso->long_name);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int perf_event__inject_buildid(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine)
+{
+ struct addr_location al;
+ struct thread *thread;
+
+ thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+ if (thread == NULL) {
+ pr_err("problem processing %d event, skipping it.\n",
+ event->header.type);
+ goto repipe;
+ }
+
+ if (thread__find_map(thread, sample->cpumode, sample->ip, &al)) {
+ if (!al.map->dso->hit) {
+ al.map->dso->hit = 1;
+ if (map__load(al.map) >= 0) {
+ dso__inject_build_id(al.map->dso, tool, machine);
+ /*
+ * If this fails, too bad, let the other side
+ * account this as unresolved.
+ */
+ } else {
+#ifdef HAVE_LIBELF_SUPPORT
+ pr_warning("no symbols found in %s, maybe "
+ "install a debug package?\n",
+ al.map->dso->long_name);
+#endif
+ }
+ }
+ }
+
+ thread__put(thread);
+repipe:
+ perf_event__repipe(tool, event, sample, machine);
+ return 0;
+}
+
+static int perf_inject__sched_process_exit(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ struct event_entry *ent;
+
+ list_for_each_entry(ent, &inject->samples, node) {
+ if (sample->tid == ent->tid) {
+ list_del_init(&ent->node);
+ free(ent);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int perf_inject__sched_switch(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ struct event_entry *ent;
+
+ perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
+
+ ent = malloc(event->header.size + sizeof(struct event_entry));
+ if (ent == NULL) {
+ color_fprintf(stderr, PERF_COLOR_RED,
+ "Not enough memory to process sched switch event!");
+ return -1;
+ }
+
+ ent->tid = sample->tid;
+ memcpy(&ent->event, event, event->header.size);
+ list_add(&ent->node, &inject->samples);
+ return 0;
+}
+
+static int perf_inject__sched_stat(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct event_entry *ent;
+ union perf_event *event_sw;
+ struct perf_sample sample_sw;
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ u32 pid = perf_evsel__intval(evsel, sample, "pid");
+
+ list_for_each_entry(ent, &inject->samples, node) {
+ if (pid == ent->tid)
+ goto found;
+ }
+
+ return 0;
+found:
+ event_sw = &ent->event[0];
+ perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
+
+ sample_sw.period = sample->period;
+ sample_sw.time = sample->time;
+ perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
+ evsel->attr.read_format, &sample_sw);
+ build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
+ return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+}
+
+static void sig_handler(int sig __maybe_unused)
+{
+ session_done = 1;
+}
+
+static int perf_evsel__check_stype(struct perf_evsel *evsel,
+ u64 sample_type, const char *sample_msg)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ const char *name = perf_evsel__name(evsel);
+
+ if (!(attr->sample_type & sample_type)) {
+ pr_err("Samples for %s event do not have %s attribute set.",
+ name, sample_msg);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int drop_sample(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return 0;
+}
+
+static void strip_init(struct perf_inject *inject)
+{
+ struct perf_evlist *evlist = inject->session->evlist;
+ struct perf_evsel *evsel;
+
+ inject->tool.context_switch = perf_event__drop;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel->handler = drop_sample;
+}
+
+static bool has_tracking(struct perf_evsel *evsel)
+{
+ return evsel->attr.mmap || evsel->attr.mmap2 || evsel->attr.comm ||
+ evsel->attr.task;
+}
+
+#define COMPAT_MASK (PERF_SAMPLE_ID | PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
+ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_IDENTIFIER)
+
+/*
+ * In order that the perf.data file is parsable, tracking events like MMAP need
+ * their selected event to exist, except if there is only 1 selected event left
+ * and it has a compatible sample type.
+ */
+static bool ok_to_remove(struct perf_evlist *evlist,
+ struct perf_evsel *evsel_to_remove)
+{
+ struct perf_evsel *evsel;
+ int cnt = 0;
+ bool ok = false;
+
+ if (!has_tracking(evsel_to_remove))
+ return true;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->handler != drop_sample) {
+ cnt += 1;
+ if ((evsel->attr.sample_type & COMPAT_MASK) ==
+ (evsel_to_remove->attr.sample_type & COMPAT_MASK))
+ ok = true;
+ }
+ }
+
+ return ok && cnt == 1;
+}
+
+static void strip_fini(struct perf_inject *inject)
+{
+ struct perf_evlist *evlist = inject->session->evlist;
+ struct perf_evsel *evsel, *tmp;
+
+ /* Remove non-synthesized evsels if possible */
+ evlist__for_each_entry_safe(evlist, tmp, evsel) {
+ if (evsel->handler == drop_sample &&
+ ok_to_remove(evlist, evsel)) {
+ pr_debug("Deleting %s\n", perf_evsel__name(evsel));
+ perf_evlist__remove(evlist, evsel);
+ perf_evsel__delete(evsel);
+ }
+ }
+}
+
+static int __cmd_inject(struct perf_inject *inject)
+{
+ int ret = -EINVAL;
+ struct perf_session *session = inject->session;
+ struct perf_data *data_out = &inject->output;
+ int fd = perf_data__fd(data_out);
+ u64 output_data_offset;
+
+ signal(SIGINT, sig_handler);
+
+ if (inject->build_ids || inject->sched_stat ||
+ inject->itrace_synth_opts.set) {
+ inject->tool.mmap = perf_event__repipe_mmap;
+ inject->tool.mmap2 = perf_event__repipe_mmap2;
+ inject->tool.fork = perf_event__repipe_fork;
+ inject->tool.tracing_data = perf_event__repipe_tracing_data;
+ }
+
+ output_data_offset = session->header.data_offset;
+
+ if (inject->build_ids) {
+ inject->tool.sample = perf_event__inject_buildid;
+ } else if (inject->sched_stat) {
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ const char *name = perf_evsel__name(evsel);
+
+ if (!strcmp(name, "sched:sched_switch")) {
+ if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+ return -EINVAL;
+
+ evsel->handler = perf_inject__sched_switch;
+ } else if (!strcmp(name, "sched:sched_process_exit"))
+ evsel->handler = perf_inject__sched_process_exit;
+ else if (!strncmp(name, "sched:sched_stat_", 17))
+ evsel->handler = perf_inject__sched_stat;
+ }
+ } else if (inject->itrace_synth_opts.set) {
+ session->itrace_synth_opts = &inject->itrace_synth_opts;
+ inject->itrace_synth_opts.inject = true;
+ inject->tool.comm = perf_event__repipe_comm;
+ inject->tool.namespaces = perf_event__repipe_namespaces;
+ inject->tool.exit = perf_event__repipe_exit;
+ inject->tool.id_index = perf_event__repipe_id_index;
+ inject->tool.auxtrace_info = perf_event__process_auxtrace_info;
+ inject->tool.auxtrace = perf_event__process_auxtrace;
+ inject->tool.aux = perf_event__drop_aux;
+ inject->tool.itrace_start = perf_event__drop_aux,
+ inject->tool.ordered_events = true;
+ inject->tool.ordering_requires_timestamps = true;
+ /* Allow space in the header for new attributes */
+ output_data_offset = 4096;
+ if (inject->strip)
+ strip_init(inject);
+ }
+
+ if (!inject->itrace_synth_opts.set)
+ auxtrace_index__free(&session->auxtrace_index);
+
+ if (!data_out->is_pipe)
+ lseek(fd, output_data_offset, SEEK_SET);
+
+ ret = perf_session__process_events(session);
+ if (ret)
+ return ret;
+
+ if (!data_out->is_pipe) {
+ if (inject->build_ids)
+ perf_header__set_feat(&session->header,
+ HEADER_BUILD_ID);
+ /*
+ * Keep all buildids when there is unprocessed AUX data because
+ * it is not known which ones the AUX trace hits.
+ */
+ if (perf_header__has_feat(&session->header, HEADER_BUILD_ID) &&
+ inject->have_auxtrace && !inject->itrace_synth_opts.set)
+ dsos__hit_all(session);
+ /*
+ * The AUX areas have been removed and replaced with
+ * synthesized hardware events, so clear the feature flag and
+ * remove the evsel.
+ */
+ if (inject->itrace_synth_opts.set) {
+ struct perf_evsel *evsel;
+
+ perf_header__clear_feat(&session->header,
+ HEADER_AUXTRACE);
+ if (inject->itrace_synth_opts.last_branch)
+ perf_header__set_feat(&session->header,
+ HEADER_BRANCH_STACK);
+ evsel = perf_evlist__id2evsel_strict(session->evlist,
+ inject->aux_id);
+ if (evsel) {
+ pr_debug("Deleting %s\n",
+ perf_evsel__name(evsel));
+ perf_evlist__remove(session->evlist, evsel);
+ perf_evsel__delete(evsel);
+ }
+ if (inject->strip)
+ strip_fini(inject);
+ }
+ session->header.data_offset = output_data_offset;
+ session->header.data_size = inject->bytes_written;
+ perf_session__write_header(session, session->evlist, fd, true);
+ }
+
+ return ret;
+}
+
+int cmd_inject(int argc, const char **argv)
+{
+ struct perf_inject inject = {
+ .tool = {
+ .sample = perf_event__repipe_sample,
+ .mmap = perf_event__repipe,
+ .mmap2 = perf_event__repipe,
+ .comm = perf_event__repipe,
+ .fork = perf_event__repipe,
+ .exit = perf_event__repipe,
+ .lost = perf_event__repipe,
+ .lost_samples = perf_event__repipe,
+ .aux = perf_event__repipe,
+ .itrace_start = perf_event__repipe,
+ .context_switch = perf_event__repipe,
+ .read = perf_event__repipe_sample,
+ .throttle = perf_event__repipe,
+ .unthrottle = perf_event__repipe,
+ .attr = perf_event__repipe_attr,
+ .tracing_data = perf_event__repipe_op2_synth,
+ .auxtrace_info = perf_event__repipe_op2_synth,
+ .auxtrace = perf_event__repipe_auxtrace,
+ .auxtrace_error = perf_event__repipe_op2_synth,
+ .time_conv = perf_event__repipe_op2_synth,
+ .finished_round = perf_event__repipe_oe_synth,
+ .build_id = perf_event__repipe_op2_synth,
+ .id_index = perf_event__repipe_op2_synth,
+ .feature = perf_event__repipe_op2_synth,
+ },
+ .input_name = "-",
+ .samples = LIST_HEAD_INIT(inject.samples),
+ .output = {
+ .file = {
+ .path = "-",
+ },
+ .mode = PERF_DATA_MODE_WRITE,
+ },
+ };
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_READ,
+ };
+ int ret;
+
+ struct option options[] = {
+ OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
+ "Inject build-ids into the output stream"),
+ OPT_STRING('i', "input", &inject.input_name, "file",
+ "input file name"),
+ OPT_STRING('o', "output", &inject.output.file.path, "file",
+ "output file name"),
+ OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
+ "Merge sched-stat and sched-switch for getting events "
+ "where and how long tasks slept"),
+#ifdef HAVE_JITDUMP
+ OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
+#endif
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show build ids, etc)"),
+ OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
+ "kallsyms pathname"),
+ OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
+ OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
+ NULL, "opts", "Instruction Tracing options",
+ itrace_parse_synth_opts),
+ OPT_BOOLEAN(0, "strip", &inject.strip,
+ "strip non-synthesized events (use with --itrace)"),
+ OPT_END()
+ };
+ const char * const inject_usage[] = {
+ "perf inject [<options>]",
+ NULL
+ };
+#ifndef HAVE_JITDUMP
+ set_option_nobuild(options, 'j', "jit", "NO_LIBELF=1", true);
+#endif
+ argc = parse_options(argc, argv, options, inject_usage, 0);
+
+ /*
+ * Any (unrecognized) arguments left?
+ */
+ if (argc)
+ usage_with_options(inject_usage, options);
+
+ if (inject.strip && !inject.itrace_synth_opts.set) {
+ pr_err("--strip option requires --itrace option\n");
+ return -1;
+ }
+
+ if (perf_data__open(&inject.output)) {
+ perror("failed to create output file");
+ return -1;
+ }
+
+ inject.tool.ordered_events = inject.sched_stat;
+
+ data.file.path = inject.input_name;
+ inject.session = perf_session__new(&data, true, &inject.tool);
+ if (inject.session == NULL)
+ return -1;
+
+ if (inject.build_ids) {
+ /*
+ * to make sure the mmap records are ordered correctly
+ * and so that the correct especially due to jitted code
+ * mmaps. We cannot generate the buildid hit list and
+ * inject the jit mmaps at the same time for now.
+ */
+ inject.tool.ordered_events = true;
+ inject.tool.ordering_requires_timestamps = true;
+ }
+#ifdef HAVE_JITDUMP
+ if (inject.jit_mode) {
+ inject.tool.mmap2 = perf_event__jit_repipe_mmap2;
+ inject.tool.mmap = perf_event__jit_repipe_mmap;
+ inject.tool.ordered_events = true;
+ inject.tool.ordering_requires_timestamps = true;
+ /*
+ * JIT MMAP injection injects all MMAP events in one go, so it
+ * does not obey finished_round semantics.
+ */
+ inject.tool.finished_round = perf_event__drop_oe;
+ }
+#endif
+ ret = symbol__init(&inject.session->header.env);
+ if (ret < 0)
+ goto out_delete;
+
+ ret = __cmd_inject(&inject);
+
+out_delete:
+ perf_session__delete(inject.session);
+ return ret;
+}
diff --git a/tools/perf/builtin-kallsyms.c b/tools/perf/builtin-kallsyms.c
new file mode 100644
index 000000000..90d1a2305
--- /dev/null
+++ b/tools/perf/builtin-kallsyms.c
@@ -0,0 +1,68 @@
+/*
+ * builtin-kallsyms.c
+ *
+ * Builtin command: Look for a symbol in the running kernel and its modules
+ *
+ * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include <inttypes.h>
+#include "builtin.h"
+#include <linux/compiler.h>
+#include <subcmd/parse-options.h>
+#include "debug.h"
+#include "machine.h"
+#include "symbol.h"
+
+static int __cmd_kallsyms(int argc, const char **argv)
+{
+ int i;
+ struct machine *machine = machine__new_kallsyms();
+
+ if (machine == NULL) {
+ pr_err("Couldn't read /proc/kallsyms\n");
+ return -1;
+ }
+
+ for (i = 0; i < argc; ++i) {
+ struct map *map;
+ struct symbol *symbol = machine__find_kernel_symbol_by_name(machine, argv[i], &map);
+
+ if (symbol == NULL) {
+ printf("%s: not found\n", argv[i]);
+ continue;
+ }
+
+ printf("%s: %s %s %#" PRIx64 "-%#" PRIx64 " (%#" PRIx64 "-%#" PRIx64")\n",
+ symbol->name, map->dso->short_name, map->dso->long_name,
+ map->unmap_ip(map, symbol->start), map->unmap_ip(map, symbol->end),
+ symbol->start, symbol->end);
+ }
+
+ machine__delete(machine);
+ return 0;
+}
+
+int cmd_kallsyms(int argc, const char **argv)
+{
+ const struct option options[] = {
+ OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"),
+ OPT_END()
+ };
+ const char * const kallsyms_usage[] = {
+ "perf kallsyms [<options>] symbol_name",
+ NULL
+ };
+
+ argc = parse_options(argc, argv, options, kallsyms_usage, 0);
+ if (argc < 1)
+ usage_with_options(kallsyms_usage, options);
+
+ symbol_conf.sort_by_name = true;
+ symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
+ if (symbol__init(NULL) < 0)
+ return -1;
+
+ return __cmd_kallsyms(argc, argv);
+}
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
new file mode 100644
index 000000000..56dd5d147
--- /dev/null
+++ b/tools/perf/builtin-kmem.c
@@ -0,0 +1,2020 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/util.h"
+#include "util/config.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/callchain.h"
+#include "util/time-utils.h"
+
+#include <subcmd/parse-options.h>
+#include "util/trace-event.h"
+#include "util/data.h"
+#include "util/cpumap.h"
+
+#include "util/debug.h"
+
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <locale.h>
+#include <regex.h>
+
+#include "sane_ctype.h"
+
+static int kmem_slab;
+static int kmem_page;
+
+static long kmem_page_size;
+static enum {
+ KMEM_SLAB,
+ KMEM_PAGE,
+} kmem_default = KMEM_SLAB; /* for backward compatibility */
+
+struct alloc_stat;
+typedef int (*sort_fn_t)(void *, void *);
+
+static int alloc_flag;
+static int caller_flag;
+
+static int alloc_lines = -1;
+static int caller_lines = -1;
+
+static bool raw_ip;
+
+struct alloc_stat {
+ u64 call_site;
+ u64 ptr;
+ u64 bytes_req;
+ u64 bytes_alloc;
+ u64 last_alloc;
+ u32 hit;
+ u32 pingpong;
+
+ short alloc_cpu;
+
+ struct rb_node node;
+};
+
+static struct rb_root root_alloc_stat;
+static struct rb_root root_alloc_sorted;
+static struct rb_root root_caller_stat;
+static struct rb_root root_caller_sorted;
+
+static unsigned long total_requested, total_allocated, total_freed;
+static unsigned long nr_allocs, nr_cross_allocs;
+
+/* filters for controlling start and stop of time of analysis */
+static struct perf_time_interval ptime;
+const char *time_str;
+
+static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
+ int bytes_req, int bytes_alloc, int cpu)
+{
+ struct rb_node **node = &root_alloc_stat.rb_node;
+ struct rb_node *parent = NULL;
+ struct alloc_stat *data = NULL;
+
+ while (*node) {
+ parent = *node;
+ data = rb_entry(*node, struct alloc_stat, node);
+
+ if (ptr > data->ptr)
+ node = &(*node)->rb_right;
+ else if (ptr < data->ptr)
+ node = &(*node)->rb_left;
+ else
+ break;
+ }
+
+ if (data && data->ptr == ptr) {
+ data->hit++;
+ data->bytes_req += bytes_req;
+ data->bytes_alloc += bytes_alloc;
+ } else {
+ data = malloc(sizeof(*data));
+ if (!data) {
+ pr_err("%s: malloc failed\n", __func__);
+ return -1;
+ }
+ data->ptr = ptr;
+ data->pingpong = 0;
+ data->hit = 1;
+ data->bytes_req = bytes_req;
+ data->bytes_alloc = bytes_alloc;
+
+ rb_link_node(&data->node, parent, node);
+ rb_insert_color(&data->node, &root_alloc_stat);
+ }
+ data->call_site = call_site;
+ data->alloc_cpu = cpu;
+ data->last_alloc = bytes_alloc;
+
+ return 0;
+}
+
+static int insert_caller_stat(unsigned long call_site,
+ int bytes_req, int bytes_alloc)
+{
+ struct rb_node **node = &root_caller_stat.rb_node;
+ struct rb_node *parent = NULL;
+ struct alloc_stat *data = NULL;
+
+ while (*node) {
+ parent = *node;
+ data = rb_entry(*node, struct alloc_stat, node);
+
+ if (call_site > data->call_site)
+ node = &(*node)->rb_right;
+ else if (call_site < data->call_site)
+ node = &(*node)->rb_left;
+ else
+ break;
+ }
+
+ if (data && data->call_site == call_site) {
+ data->hit++;
+ data->bytes_req += bytes_req;
+ data->bytes_alloc += bytes_alloc;
+ } else {
+ data = malloc(sizeof(*data));
+ if (!data) {
+ pr_err("%s: malloc failed\n", __func__);
+ return -1;
+ }
+ data->call_site = call_site;
+ data->pingpong = 0;
+ data->hit = 1;
+ data->bytes_req = bytes_req;
+ data->bytes_alloc = bytes_alloc;
+
+ rb_link_node(&data->node, parent, node);
+ rb_insert_color(&data->node, &root_caller_stat);
+ }
+
+ return 0;
+}
+
+static int perf_evsel__process_alloc_event(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
+ call_site = perf_evsel__intval(evsel, sample, "call_site");
+ int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
+ bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
+
+ if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
+ insert_caller_stat(call_site, bytes_req, bytes_alloc))
+ return -1;
+
+ total_requested += bytes_req;
+ total_allocated += bytes_alloc;
+
+ nr_allocs++;
+ return 0;
+}
+
+static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ int ret = perf_evsel__process_alloc_event(evsel, sample);
+
+ if (!ret) {
+ int node1 = cpu__get_node(sample->cpu),
+ node2 = perf_evsel__intval(evsel, sample, "node");
+
+ if (node1 != node2)
+ nr_cross_allocs++;
+ }
+
+ return ret;
+}
+
+static int ptr_cmp(void *, void *);
+static int slab_callsite_cmp(void *, void *);
+
+static struct alloc_stat *search_alloc_stat(unsigned long ptr,
+ unsigned long call_site,
+ struct rb_root *root,
+ sort_fn_t sort_fn)
+{
+ struct rb_node *node = root->rb_node;
+ struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
+
+ while (node) {
+ struct alloc_stat *data;
+ int cmp;
+
+ data = rb_entry(node, struct alloc_stat, node);
+
+ cmp = sort_fn(&key, data);
+ if (cmp < 0)
+ node = node->rb_left;
+ else if (cmp > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+}
+
+static int perf_evsel__process_free_event(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
+ struct alloc_stat *s_alloc, *s_caller;
+
+ s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
+ if (!s_alloc)
+ return 0;
+
+ total_freed += s_alloc->last_alloc;
+
+ if ((short)sample->cpu != s_alloc->alloc_cpu) {
+ s_alloc->pingpong++;
+
+ s_caller = search_alloc_stat(0, s_alloc->call_site,
+ &root_caller_stat,
+ slab_callsite_cmp);
+ if (!s_caller)
+ return -1;
+ s_caller->pingpong++;
+ }
+ s_alloc->alloc_cpu = -1;
+
+ return 0;
+}
+
+static u64 total_page_alloc_bytes;
+static u64 total_page_free_bytes;
+static u64 total_page_nomatch_bytes;
+static u64 total_page_fail_bytes;
+static unsigned long nr_page_allocs;
+static unsigned long nr_page_frees;
+static unsigned long nr_page_fails;
+static unsigned long nr_page_nomatch;
+
+static bool use_pfn;
+static bool live_page;
+static struct perf_session *kmem_session;
+
+#define MAX_MIGRATE_TYPES 6
+#define MAX_PAGE_ORDER 11
+
+static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
+
+struct page_stat {
+ struct rb_node node;
+ u64 page;
+ u64 callsite;
+ int order;
+ unsigned gfp_flags;
+ unsigned migrate_type;
+ u64 alloc_bytes;
+ u64 free_bytes;
+ int nr_alloc;
+ int nr_free;
+};
+
+static struct rb_root page_live_tree;
+static struct rb_root page_alloc_tree;
+static struct rb_root page_alloc_sorted;
+static struct rb_root page_caller_tree;
+static struct rb_root page_caller_sorted;
+
+struct alloc_func {
+ u64 start;
+ u64 end;
+ char *name;
+};
+
+static int nr_alloc_funcs;
+static struct alloc_func *alloc_func_list;
+
+static int funcmp(const void *a, const void *b)
+{
+ const struct alloc_func *fa = a;
+ const struct alloc_func *fb = b;
+
+ if (fa->start > fb->start)
+ return 1;
+ else
+ return -1;
+}
+
+static int callcmp(const void *a, const void *b)
+{
+ const struct alloc_func *fa = a;
+ const struct alloc_func *fb = b;
+
+ if (fb->start <= fa->start && fa->end < fb->end)
+ return 0;
+
+ if (fa->start > fb->start)
+ return 1;
+ else
+ return -1;
+}
+
+static int build_alloc_func_list(void)
+{
+ int ret;
+ struct map *kernel_map;
+ struct symbol *sym;
+ struct rb_node *node;
+ struct alloc_func *func;
+ struct machine *machine = &kmem_session->machines.host;
+ regex_t alloc_func_regex;
+ const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
+
+ ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
+ if (ret) {
+ char err[BUFSIZ];
+
+ regerror(ret, &alloc_func_regex, err, sizeof(err));
+ pr_err("Invalid regex: %s\n%s", pattern, err);
+ return -EINVAL;
+ }
+
+ kernel_map = machine__kernel_map(machine);
+ if (map__load(kernel_map) < 0) {
+ pr_err("cannot load kernel map\n");
+ return -ENOENT;
+ }
+
+ map__for_each_symbol(kernel_map, sym, node) {
+ if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
+ continue;
+
+ func = realloc(alloc_func_list,
+ (nr_alloc_funcs + 1) * sizeof(*func));
+ if (func == NULL)
+ return -ENOMEM;
+
+ pr_debug("alloc func: %s\n", sym->name);
+ func[nr_alloc_funcs].start = sym->start;
+ func[nr_alloc_funcs].end = sym->end;
+ func[nr_alloc_funcs].name = sym->name;
+
+ alloc_func_list = func;
+ nr_alloc_funcs++;
+ }
+
+ qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
+
+ regfree(&alloc_func_regex);
+ return 0;
+}
+
+/*
+ * Find first non-memory allocation function from callchain.
+ * The allocation functions are in the 'alloc_func_list'.
+ */
+static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
+{
+ struct addr_location al;
+ struct machine *machine = &kmem_session->machines.host;
+ struct callchain_cursor_node *node;
+
+ if (alloc_func_list == NULL) {
+ if (build_alloc_func_list() < 0)
+ goto out;
+ }
+
+ al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+ sample__resolve_callchain(sample, &callchain_cursor, NULL, evsel, &al, 16);
+
+ callchain_cursor_commit(&callchain_cursor);
+ while (true) {
+ struct alloc_func key, *caller;
+ u64 addr;
+
+ node = callchain_cursor_current(&callchain_cursor);
+ if (node == NULL)
+ break;
+
+ key.start = key.end = node->ip;
+ caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
+ sizeof(key), callcmp);
+ if (!caller) {
+ /* found */
+ if (node->map)
+ addr = map__unmap_ip(node->map, node->ip);
+ else
+ addr = node->ip;
+
+ return addr;
+ } else
+ pr_debug3("skipping alloc function: %s\n", caller->name);
+
+ callchain_cursor_advance(&callchain_cursor);
+ }
+
+out:
+ pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
+ return sample->ip;
+}
+
+struct sort_dimension {
+ const char name[20];
+ sort_fn_t cmp;
+ struct list_head list;
+};
+
+static LIST_HEAD(page_alloc_sort_input);
+static LIST_HEAD(page_caller_sort_input);
+
+static struct page_stat *
+__page_stat__findnew_page(struct page_stat *pstat, bool create)
+{
+ struct rb_node **node = &page_live_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct page_stat *data;
+
+ while (*node) {
+ s64 cmp;
+
+ parent = *node;
+ data = rb_entry(*node, struct page_stat, node);
+
+ cmp = data->page - pstat->page;
+ if (cmp < 0)
+ node = &parent->rb_left;
+ else if (cmp > 0)
+ node = &parent->rb_right;
+ else
+ return data;
+ }
+
+ if (!create)
+ return NULL;
+
+ data = zalloc(sizeof(*data));
+ if (data != NULL) {
+ data->page = pstat->page;
+ data->order = pstat->order;
+ data->gfp_flags = pstat->gfp_flags;
+ data->migrate_type = pstat->migrate_type;
+
+ rb_link_node(&data->node, parent, node);
+ rb_insert_color(&data->node, &page_live_tree);
+ }
+
+ return data;
+}
+
+static struct page_stat *page_stat__find_page(struct page_stat *pstat)
+{
+ return __page_stat__findnew_page(pstat, false);
+}
+
+static struct page_stat *page_stat__findnew_page(struct page_stat *pstat)
+{
+ return __page_stat__findnew_page(pstat, true);
+}
+
+static struct page_stat *
+__page_stat__findnew_alloc(struct page_stat *pstat, bool create)
+{
+ struct rb_node **node = &page_alloc_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct page_stat *data;
+ struct sort_dimension *sort;
+
+ while (*node) {
+ int cmp = 0;
+
+ parent = *node;
+ data = rb_entry(*node, struct page_stat, node);
+
+ list_for_each_entry(sort, &page_alloc_sort_input, list) {
+ cmp = sort->cmp(pstat, data);
+ if (cmp)
+ break;
+ }
+
+ if (cmp < 0)
+ node = &parent->rb_left;
+ else if (cmp > 0)
+ node = &parent->rb_right;
+ else
+ return data;
+ }
+
+ if (!create)
+ return NULL;
+
+ data = zalloc(sizeof(*data));
+ if (data != NULL) {
+ data->page = pstat->page;
+ data->order = pstat->order;
+ data->gfp_flags = pstat->gfp_flags;
+ data->migrate_type = pstat->migrate_type;
+
+ rb_link_node(&data->node, parent, node);
+ rb_insert_color(&data->node, &page_alloc_tree);
+ }
+
+ return data;
+}
+
+static struct page_stat *page_stat__find_alloc(struct page_stat *pstat)
+{
+ return __page_stat__findnew_alloc(pstat, false);
+}
+
+static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
+{
+ return __page_stat__findnew_alloc(pstat, true);
+}
+
+static struct page_stat *
+__page_stat__findnew_caller(struct page_stat *pstat, bool create)
+{
+ struct rb_node **node = &page_caller_tree.rb_node;
+ struct rb_node *parent = NULL;
+ struct page_stat *data;
+ struct sort_dimension *sort;
+
+ while (*node) {
+ int cmp = 0;
+
+ parent = *node;
+ data = rb_entry(*node, struct page_stat, node);
+
+ list_for_each_entry(sort, &page_caller_sort_input, list) {
+ cmp = sort->cmp(pstat, data);
+ if (cmp)
+ break;
+ }
+
+ if (cmp < 0)
+ node = &parent->rb_left;
+ else if (cmp > 0)
+ node = &parent->rb_right;
+ else
+ return data;
+ }
+
+ if (!create)
+ return NULL;
+
+ data = zalloc(sizeof(*data));
+ if (data != NULL) {
+ data->callsite = pstat->callsite;
+ data->order = pstat->order;
+ data->gfp_flags = pstat->gfp_flags;
+ data->migrate_type = pstat->migrate_type;
+
+ rb_link_node(&data->node, parent, node);
+ rb_insert_color(&data->node, &page_caller_tree);
+ }
+
+ return data;
+}
+
+static struct page_stat *page_stat__find_caller(struct page_stat *pstat)
+{
+ return __page_stat__findnew_caller(pstat, false);
+}
+
+static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat)
+{
+ return __page_stat__findnew_caller(pstat, true);
+}
+
+static bool valid_page(u64 pfn_or_page)
+{
+ if (use_pfn && pfn_or_page == -1UL)
+ return false;
+ if (!use_pfn && pfn_or_page == 0)
+ return false;
+ return true;
+}
+
+struct gfp_flag {
+ unsigned int flags;
+ char *compact_str;
+ char *human_readable;
+};
+
+static struct gfp_flag *gfps;
+static int nr_gfps;
+
+static int gfpcmp(const void *a, const void *b)
+{
+ const struct gfp_flag *fa = a;
+ const struct gfp_flag *fb = b;
+
+ return fa->flags - fb->flags;
+}
+
+/* see include/trace/events/mmflags.h */
+static const struct {
+ const char *original;
+ const char *compact;
+} gfp_compact_table[] = {
+ { "GFP_TRANSHUGE", "THP" },
+ { "GFP_TRANSHUGE_LIGHT", "THL" },
+ { "GFP_HIGHUSER_MOVABLE", "HUM" },
+ { "GFP_HIGHUSER", "HU" },
+ { "GFP_USER", "U" },
+ { "GFP_KERNEL_ACCOUNT", "KAC" },
+ { "GFP_KERNEL", "K" },
+ { "GFP_NOFS", "NF" },
+ { "GFP_ATOMIC", "A" },
+ { "GFP_NOIO", "NI" },
+ { "GFP_NOWAIT", "NW" },
+ { "GFP_DMA", "D" },
+ { "__GFP_HIGHMEM", "HM" },
+ { "GFP_DMA32", "D32" },
+ { "__GFP_HIGH", "H" },
+ { "__GFP_ATOMIC", "_A" },
+ { "__GFP_IO", "I" },
+ { "__GFP_FS", "F" },
+ { "__GFP_NOWARN", "NWR" },
+ { "__GFP_RETRY_MAYFAIL", "R" },
+ { "__GFP_NOFAIL", "NF" },
+ { "__GFP_NORETRY", "NR" },
+ { "__GFP_COMP", "C" },
+ { "__GFP_ZERO", "Z" },
+ { "__GFP_NOMEMALLOC", "NMA" },
+ { "__GFP_MEMALLOC", "MA" },
+ { "__GFP_HARDWALL", "HW" },
+ { "__GFP_THISNODE", "TN" },
+ { "__GFP_RECLAIMABLE", "RC" },
+ { "__GFP_MOVABLE", "M" },
+ { "__GFP_ACCOUNT", "AC" },
+ { "__GFP_WRITE", "WR" },
+ { "__GFP_RECLAIM", "R" },
+ { "__GFP_DIRECT_RECLAIM", "DR" },
+ { "__GFP_KSWAPD_RECLAIM", "KR" },
+};
+
+static size_t max_gfp_len;
+
+static char *compact_gfp_flags(char *gfp_flags)
+{
+ char *orig_flags = strdup(gfp_flags);
+ char *new_flags = NULL;
+ char *str, *pos = NULL;
+ size_t len = 0;
+
+ if (orig_flags == NULL)
+ return NULL;
+
+ str = strtok_r(orig_flags, "|", &pos);
+ while (str) {
+ size_t i;
+ char *new;
+ const char *cpt;
+
+ for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) {
+ if (strcmp(gfp_compact_table[i].original, str))
+ continue;
+
+ cpt = gfp_compact_table[i].compact;
+ new = realloc(new_flags, len + strlen(cpt) + 2);
+ if (new == NULL) {
+ free(new_flags);
+ free(orig_flags);
+ return NULL;
+ }
+
+ new_flags = new;
+
+ if (!len) {
+ strcpy(new_flags, cpt);
+ } else {
+ strcat(new_flags, "|");
+ strcat(new_flags, cpt);
+ len++;
+ }
+
+ len += strlen(cpt);
+ }
+
+ str = strtok_r(NULL, "|", &pos);
+ }
+
+ if (max_gfp_len < len)
+ max_gfp_len = len;
+
+ free(orig_flags);
+ return new_flags;
+}
+
+static char *compact_gfp_string(unsigned long gfp_flags)
+{
+ struct gfp_flag key = {
+ .flags = gfp_flags,
+ };
+ struct gfp_flag *gfp;
+
+ gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp);
+ if (gfp)
+ return gfp->compact_str;
+
+ return NULL;
+}
+
+static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample,
+ unsigned int gfp_flags)
+{
+ struct tep_record record = {
+ .cpu = sample->cpu,
+ .data = sample->raw_data,
+ .size = sample->raw_size,
+ };
+ struct trace_seq seq;
+ char *str, *pos = NULL;
+
+ if (nr_gfps) {
+ struct gfp_flag key = {
+ .flags = gfp_flags,
+ };
+
+ if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp))
+ return 0;
+ }
+
+ trace_seq_init(&seq);
+ tep_event_info(&seq, evsel->tp_format, &record);
+
+ str = strtok_r(seq.buffer, " ", &pos);
+ while (str) {
+ if (!strncmp(str, "gfp_flags=", 10)) {
+ struct gfp_flag *new;
+
+ new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps));
+ if (new == NULL)
+ return -ENOMEM;
+
+ gfps = new;
+ new += nr_gfps++;
+
+ new->flags = gfp_flags;
+ new->human_readable = strdup(str + 10);
+ new->compact_str = compact_gfp_flags(str + 10);
+ if (!new->human_readable || !new->compact_str)
+ return -ENOMEM;
+
+ qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp);
+ }
+
+ str = strtok_r(NULL, " ", &pos);
+ }
+
+ trace_seq_destroy(&seq);
+ return 0;
+}
+
+static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ u64 page;
+ unsigned int order = perf_evsel__intval(evsel, sample, "order");
+ unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags");
+ unsigned int migrate_type = perf_evsel__intval(evsel, sample,
+ "migratetype");
+ u64 bytes = kmem_page_size << order;
+ u64 callsite;
+ struct page_stat *pstat;
+ struct page_stat this = {
+ .order = order,
+ .gfp_flags = gfp_flags,
+ .migrate_type = migrate_type,
+ };
+
+ if (use_pfn)
+ page = perf_evsel__intval(evsel, sample, "pfn");
+ else
+ page = perf_evsel__intval(evsel, sample, "page");
+
+ nr_page_allocs++;
+ total_page_alloc_bytes += bytes;
+
+ if (!valid_page(page)) {
+ nr_page_fails++;
+ total_page_fail_bytes += bytes;
+
+ return 0;
+ }
+
+ if (parse_gfp_flags(evsel, sample, gfp_flags) < 0)
+ return -1;
+
+ callsite = find_callsite(evsel, sample);
+
+ /*
+ * This is to find the current page (with correct gfp flags and
+ * migrate type) at free event.
+ */
+ this.page = page;
+ pstat = page_stat__findnew_page(&this);
+ if (pstat == NULL)
+ return -ENOMEM;
+
+ pstat->nr_alloc++;
+ pstat->alloc_bytes += bytes;
+ pstat->callsite = callsite;
+
+ if (!live_page) {
+ pstat = page_stat__findnew_alloc(&this);
+ if (pstat == NULL)
+ return -ENOMEM;
+
+ pstat->nr_alloc++;
+ pstat->alloc_bytes += bytes;
+ pstat->callsite = callsite;
+ }
+
+ this.callsite = callsite;
+ pstat = page_stat__findnew_caller(&this);
+ if (pstat == NULL)
+ return -ENOMEM;
+
+ pstat->nr_alloc++;
+ pstat->alloc_bytes += bytes;
+
+ order_stats[order][migrate_type]++;
+
+ return 0;
+}
+
+static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ u64 page;
+ unsigned int order = perf_evsel__intval(evsel, sample, "order");
+ u64 bytes = kmem_page_size << order;
+ struct page_stat *pstat;
+ struct page_stat this = {
+ .order = order,
+ };
+
+ if (use_pfn)
+ page = perf_evsel__intval(evsel, sample, "pfn");
+ else
+ page = perf_evsel__intval(evsel, sample, "page");
+
+ nr_page_frees++;
+ total_page_free_bytes += bytes;
+
+ this.page = page;
+ pstat = page_stat__find_page(&this);
+ if (pstat == NULL) {
+ pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
+ page, order);
+
+ nr_page_nomatch++;
+ total_page_nomatch_bytes += bytes;
+
+ return 0;
+ }
+
+ this.gfp_flags = pstat->gfp_flags;
+ this.migrate_type = pstat->migrate_type;
+ this.callsite = pstat->callsite;
+
+ rb_erase(&pstat->node, &page_live_tree);
+ free(pstat);
+
+ if (live_page) {
+ order_stats[this.order][this.migrate_type]--;
+ } else {
+ pstat = page_stat__find_alloc(&this);
+ if (pstat == NULL)
+ return -ENOMEM;
+
+ pstat->nr_free++;
+ pstat->free_bytes += bytes;
+ }
+
+ pstat = page_stat__find_caller(&this);
+ if (pstat == NULL)
+ return -ENOENT;
+
+ pstat->nr_free++;
+ pstat->free_bytes += bytes;
+
+ if (live_page) {
+ pstat->nr_alloc--;
+ pstat->alloc_bytes -= bytes;
+
+ if (pstat->nr_alloc == 0) {
+ rb_erase(&pstat->node, &page_caller_tree);
+ free(pstat);
+ }
+ }
+
+ return 0;
+}
+
+static bool perf_kmem__skip_sample(struct perf_sample *sample)
+{
+ /* skip sample based on time? */
+ if (perf_time__skip_sample(&ptime, sample->time))
+ return true;
+
+ return false;
+}
+
+typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
+ struct perf_sample *sample);
+
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ int err = 0;
+ struct thread *thread = machine__findnew_thread(machine, sample->pid,
+ sample->tid);
+
+ if (thread == NULL) {
+ pr_debug("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ if (perf_kmem__skip_sample(sample))
+ return 0;
+
+ dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
+
+ if (evsel->handler != NULL) {
+ tracepoint_handler f = evsel->handler;
+ err = f(evsel, sample);
+ }
+
+ thread__put(thread);
+
+ return err;
+}
+
+static struct perf_tool perf_kmem = {
+ .sample = process_sample_event,
+ .comm = perf_event__process_comm,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .namespaces = perf_event__process_namespaces,
+ .ordered_events = true,
+};
+
+static double fragmentation(unsigned long n_req, unsigned long n_alloc)
+{
+ if (n_alloc == 0)
+ return 0.0;
+ else
+ return 100.0 - (100.0 * n_req / n_alloc);
+}
+
+static void __print_slab_result(struct rb_root *root,
+ struct perf_session *session,
+ int n_lines, int is_caller)
+{
+ struct rb_node *next;
+ struct machine *machine = &session->machines.host;
+
+ printf("%.105s\n", graph_dotted_line);
+ printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
+ printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
+ printf("%.105s\n", graph_dotted_line);
+
+ next = rb_first(root);
+
+ while (next && n_lines--) {
+ struct alloc_stat *data = rb_entry(next, struct alloc_stat,
+ node);
+ struct symbol *sym = NULL;
+ struct map *map;
+ char buf[BUFSIZ];
+ u64 addr;
+
+ if (is_caller) {
+ addr = data->call_site;
+ if (!raw_ip)
+ sym = machine__find_kernel_symbol(machine, addr, &map);
+ } else
+ addr = data->ptr;
+
+ if (sym != NULL)
+ snprintf(buf, sizeof(buf), "%s+%" PRIx64 "", sym->name,
+ addr - map->unmap_ip(map, sym->start));
+ else
+ snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr);
+ printf(" %-34s |", buf);
+
+ printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n",
+ (unsigned long long)data->bytes_alloc,
+ (unsigned long)data->bytes_alloc / data->hit,
+ (unsigned long long)data->bytes_req,
+ (unsigned long)data->bytes_req / data->hit,
+ (unsigned long)data->hit,
+ (unsigned long)data->pingpong,
+ fragmentation(data->bytes_req, data->bytes_alloc));
+
+ next = rb_next(next);
+ }
+
+ if (n_lines == -1)
+ printf(" ... | ... | ... | ... | ... | ... \n");
+
+ printf("%.105s\n", graph_dotted_line);
+}
+
+static const char * const migrate_type_str[] = {
+ "UNMOVABL",
+ "RECLAIM",
+ "MOVABLE",
+ "RESERVED",
+ "CMA/ISLT",
+ "UNKNOWN",
+};
+
+static void __print_page_alloc_result(struct perf_session *session, int n_lines)
+{
+ struct rb_node *next = rb_first(&page_alloc_sorted);
+ struct machine *machine = &session->machines.host;
+ const char *format;
+ int gfp_len = max(strlen("GFP flags"), max_gfp_len);
+
+ printf("\n%.105s\n", graph_dotted_line);
+ printf(" %-16s | %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n",
+ use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total",
+ gfp_len, "GFP flags");
+ printf("%.105s\n", graph_dotted_line);
+
+ if (use_pfn)
+ format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
+ else
+ format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
+
+ while (next && n_lines--) {
+ struct page_stat *data;
+ struct symbol *sym;
+ struct map *map;
+ char buf[32];
+ char *caller = buf;
+
+ data = rb_entry(next, struct page_stat, node);
+ sym = machine__find_kernel_symbol(machine, data->callsite, &map);
+ if (sym)
+ caller = sym->name;
+ else
+ scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
+
+ printf(format, (unsigned long long)data->page,
+ (unsigned long long)data->alloc_bytes / 1024,
+ data->nr_alloc, data->order,
+ migrate_type_str[data->migrate_type],
+ gfp_len, compact_gfp_string(data->gfp_flags), caller);
+
+ next = rb_next(next);
+ }
+
+ if (n_lines == -1) {
+ printf(" ... | ... | ... | ... | ... | %-*s | ...\n",
+ gfp_len, "...");
+ }
+
+ printf("%.105s\n", graph_dotted_line);
+}
+
+static void __print_page_caller_result(struct perf_session *session, int n_lines)
+{
+ struct rb_node *next = rb_first(&page_caller_sorted);
+ struct machine *machine = &session->machines.host;
+ int gfp_len = max(strlen("GFP flags"), max_gfp_len);
+
+ printf("\n%.105s\n", graph_dotted_line);
+ printf(" %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n",
+ live_page ? "Live" : "Total", gfp_len, "GFP flags");
+ printf("%.105s\n", graph_dotted_line);
+
+ while (next && n_lines--) {
+ struct page_stat *data;
+ struct symbol *sym;
+ struct map *map;
+ char buf[32];
+ char *caller = buf;
+
+ data = rb_entry(next, struct page_stat, node);
+ sym = machine__find_kernel_symbol(machine, data->callsite, &map);
+ if (sym)
+ caller = sym->name;
+ else
+ scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
+
+ printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n",
+ (unsigned long long)data->alloc_bytes / 1024,
+ data->nr_alloc, data->order,
+ migrate_type_str[data->migrate_type],
+ gfp_len, compact_gfp_string(data->gfp_flags), caller);
+
+ next = rb_next(next);
+ }
+
+ if (n_lines == -1) {
+ printf(" ... | ... | ... | ... | %-*s | ...\n",
+ gfp_len, "...");
+ }
+
+ printf("%.105s\n", graph_dotted_line);
+}
+
+static void print_gfp_flags(void)
+{
+ int i;
+
+ printf("#\n");
+ printf("# GFP flags\n");
+ printf("# ---------\n");
+ for (i = 0; i < nr_gfps; i++) {
+ printf("# %08x: %*s: %s\n", gfps[i].flags,
+ (int) max_gfp_len, gfps[i].compact_str,
+ gfps[i].human_readable);
+ }
+}
+
+static void print_slab_summary(void)
+{
+ printf("\nSUMMARY (SLAB allocator)");
+ printf("\n========================\n");
+ printf("Total bytes requested: %'lu\n", total_requested);
+ printf("Total bytes allocated: %'lu\n", total_allocated);
+ printf("Total bytes freed: %'lu\n", total_freed);
+ if (total_allocated > total_freed) {
+ printf("Net total bytes allocated: %'lu\n",
+ total_allocated - total_freed);
+ }
+ printf("Total bytes wasted on internal fragmentation: %'lu\n",
+ total_allocated - total_requested);
+ printf("Internal fragmentation: %f%%\n",
+ fragmentation(total_requested, total_allocated));
+ printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs);
+}
+
+static void print_page_summary(void)
+{
+ int o, m;
+ u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch;
+ u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes;
+
+ printf("\nSUMMARY (page allocator)");
+ printf("\n========================\n");
+ printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation requests",
+ nr_page_allocs, total_page_alloc_bytes / 1024);
+ printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free requests",
+ nr_page_frees, total_page_free_bytes / 1024);
+ printf("\n");
+
+ printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests",
+ nr_alloc_freed, (total_alloc_freed_bytes) / 1024);
+ printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc-only requests",
+ nr_page_allocs - nr_alloc_freed,
+ (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024);
+ printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests",
+ nr_page_nomatch, total_page_nomatch_bytes / 1024);
+ printf("\n");
+
+ printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation failures",
+ nr_page_fails, total_page_fail_bytes / 1024);
+ printf("\n");
+
+ printf("%5s %12s %12s %12s %12s %12s\n", "Order", "Unmovable",
+ "Reclaimable", "Movable", "Reserved", "CMA/Isolated");
+ printf("%.5s %.12s %.12s %.12s %.12s %.12s\n", graph_dotted_line,
+ graph_dotted_line, graph_dotted_line, graph_dotted_line,
+ graph_dotted_line, graph_dotted_line);
+
+ for (o = 0; o < MAX_PAGE_ORDER; o++) {
+ printf("%5d", o);
+ for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) {
+ if (order_stats[o][m])
+ printf(" %'12d", order_stats[o][m]);
+ else
+ printf(" %12c", '.');
+ }
+ printf("\n");
+ }
+}
+
+static void print_slab_result(struct perf_session *session)
+{
+ if (caller_flag)
+ __print_slab_result(&root_caller_sorted, session, caller_lines, 1);
+ if (alloc_flag)
+ __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0);
+ print_slab_summary();
+}
+
+static void print_page_result(struct perf_session *session)
+{
+ if (caller_flag || alloc_flag)
+ print_gfp_flags();
+ if (caller_flag)
+ __print_page_caller_result(session, caller_lines);
+ if (alloc_flag)
+ __print_page_alloc_result(session, alloc_lines);
+ print_page_summary();
+}
+
+static void print_result(struct perf_session *session)
+{
+ if (kmem_slab)
+ print_slab_result(session);
+ if (kmem_page)
+ print_page_result(session);
+}
+
+static LIST_HEAD(slab_caller_sort);
+static LIST_HEAD(slab_alloc_sort);
+static LIST_HEAD(page_caller_sort);
+static LIST_HEAD(page_alloc_sort);
+
+static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
+ struct list_head *sort_list)
+{
+ struct rb_node **new = &(root->rb_node);
+ struct rb_node *parent = NULL;
+ struct sort_dimension *sort;
+
+ while (*new) {
+ struct alloc_stat *this;
+ int cmp = 0;
+
+ this = rb_entry(*new, struct alloc_stat, node);
+ parent = *new;
+
+ list_for_each_entry(sort, sort_list, list) {
+ cmp = sort->cmp(data, this);
+ if (cmp)
+ break;
+ }
+
+ if (cmp > 0)
+ new = &((*new)->rb_left);
+ else
+ new = &((*new)->rb_right);
+ }
+
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+}
+
+static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted,
+ struct list_head *sort_list)
+{
+ struct rb_node *node;
+ struct alloc_stat *data;
+
+ for (;;) {
+ node = rb_first(root);
+ if (!node)
+ break;
+
+ rb_erase(node, root);
+ data = rb_entry(node, struct alloc_stat, node);
+ sort_slab_insert(root_sorted, data, sort_list);
+ }
+}
+
+static void sort_page_insert(struct rb_root *root, struct page_stat *data,
+ struct list_head *sort_list)
+{
+ struct rb_node **new = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct sort_dimension *sort;
+
+ while (*new) {
+ struct page_stat *this;
+ int cmp = 0;
+
+ this = rb_entry(*new, struct page_stat, node);
+ parent = *new;
+
+ list_for_each_entry(sort, sort_list, list) {
+ cmp = sort->cmp(data, this);
+ if (cmp)
+ break;
+ }
+
+ if (cmp > 0)
+ new = &parent->rb_left;
+ else
+ new = &parent->rb_right;
+ }
+
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+}
+
+static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted,
+ struct list_head *sort_list)
+{
+ struct rb_node *node;
+ struct page_stat *data;
+
+ for (;;) {
+ node = rb_first(root);
+ if (!node)
+ break;
+
+ rb_erase(node, root);
+ data = rb_entry(node, struct page_stat, node);
+ sort_page_insert(root_sorted, data, sort_list);
+ }
+}
+
+static void sort_result(void)
+{
+ if (kmem_slab) {
+ __sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
+ &slab_alloc_sort);
+ __sort_slab_result(&root_caller_stat, &root_caller_sorted,
+ &slab_caller_sort);
+ }
+ if (kmem_page) {
+ if (live_page)
+ __sort_page_result(&page_live_tree, &page_alloc_sorted,
+ &page_alloc_sort);
+ else
+ __sort_page_result(&page_alloc_tree, &page_alloc_sorted,
+ &page_alloc_sort);
+
+ __sort_page_result(&page_caller_tree, &page_caller_sorted,
+ &page_caller_sort);
+ }
+}
+
+static int __cmd_kmem(struct perf_session *session)
+{
+ int err = -EINVAL;
+ struct perf_evsel *evsel;
+ const struct perf_evsel_str_handler kmem_tracepoints[] = {
+ /* slab allocator */
+ { "kmem:kmalloc", perf_evsel__process_alloc_event, },
+ { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, },
+ { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, },
+ { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
+ { "kmem:kfree", perf_evsel__process_free_event, },
+ { "kmem:kmem_cache_free", perf_evsel__process_free_event, },
+ /* page allocator */
+ { "kmem:mm_page_alloc", perf_evsel__process_page_alloc_event, },
+ { "kmem:mm_page_free", perf_evsel__process_page_free_event, },
+ };
+
+ if (!perf_session__has_traces(session, "kmem record"))
+ goto out;
+
+ if (perf_session__set_tracepoints_handlers(session, kmem_tracepoints)) {
+ pr_err("Initializing perf session tracepoint handlers failed\n");
+ goto out;
+ }
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
+ perf_evsel__field(evsel, "pfn")) {
+ use_pfn = true;
+ break;
+ }
+ }
+
+ setup_pager();
+ err = perf_session__process_events(session);
+ if (err != 0) {
+ pr_err("error during process events: %d\n", err);
+ goto out;
+ }
+ sort_result();
+ print_result(session);
+out:
+ return err;
+}
+
+/* slab sort keys */
+static int ptr_cmp(void *a, void *b)
+{
+ struct alloc_stat *l = a;
+ struct alloc_stat *r = b;
+
+ if (l->ptr < r->ptr)
+ return -1;
+ else if (l->ptr > r->ptr)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension ptr_sort_dimension = {
+ .name = "ptr",
+ .cmp = ptr_cmp,
+};
+
+static int slab_callsite_cmp(void *a, void *b)
+{
+ struct alloc_stat *l = a;
+ struct alloc_stat *r = b;
+
+ if (l->call_site < r->call_site)
+ return -1;
+ else if (l->call_site > r->call_site)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension callsite_sort_dimension = {
+ .name = "callsite",
+ .cmp = slab_callsite_cmp,
+};
+
+static int hit_cmp(void *a, void *b)
+{
+ struct alloc_stat *l = a;
+ struct alloc_stat *r = b;
+
+ if (l->hit < r->hit)
+ return -1;
+ else if (l->hit > r->hit)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension hit_sort_dimension = {
+ .name = "hit",
+ .cmp = hit_cmp,
+};
+
+static int bytes_cmp(void *a, void *b)
+{
+ struct alloc_stat *l = a;
+ struct alloc_stat *r = b;
+
+ if (l->bytes_alloc < r->bytes_alloc)
+ return -1;
+ else if (l->bytes_alloc > r->bytes_alloc)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension bytes_sort_dimension = {
+ .name = "bytes",
+ .cmp = bytes_cmp,
+};
+
+static int frag_cmp(void *a, void *b)
+{
+ double x, y;
+ struct alloc_stat *l = a;
+ struct alloc_stat *r = b;
+
+ x = fragmentation(l->bytes_req, l->bytes_alloc);
+ y = fragmentation(r->bytes_req, r->bytes_alloc);
+
+ if (x < y)
+ return -1;
+ else if (x > y)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension frag_sort_dimension = {
+ .name = "frag",
+ .cmp = frag_cmp,
+};
+
+static int pingpong_cmp(void *a, void *b)
+{
+ struct alloc_stat *l = a;
+ struct alloc_stat *r = b;
+
+ if (l->pingpong < r->pingpong)
+ return -1;
+ else if (l->pingpong > r->pingpong)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension pingpong_sort_dimension = {
+ .name = "pingpong",
+ .cmp = pingpong_cmp,
+};
+
+/* page sort keys */
+static int page_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ if (l->page < r->page)
+ return -1;
+ else if (l->page > r->page)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension page_sort_dimension = {
+ .name = "page",
+ .cmp = page_cmp,
+};
+
+static int page_callsite_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ if (l->callsite < r->callsite)
+ return -1;
+ else if (l->callsite > r->callsite)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension page_callsite_sort_dimension = {
+ .name = "callsite",
+ .cmp = page_callsite_cmp,
+};
+
+static int page_hit_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ if (l->nr_alloc < r->nr_alloc)
+ return -1;
+ else if (l->nr_alloc > r->nr_alloc)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension page_hit_sort_dimension = {
+ .name = "hit",
+ .cmp = page_hit_cmp,
+};
+
+static int page_bytes_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ if (l->alloc_bytes < r->alloc_bytes)
+ return -1;
+ else if (l->alloc_bytes > r->alloc_bytes)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension page_bytes_sort_dimension = {
+ .name = "bytes",
+ .cmp = page_bytes_cmp,
+};
+
+static int page_order_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ if (l->order < r->order)
+ return -1;
+ else if (l->order > r->order)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension page_order_sort_dimension = {
+ .name = "order",
+ .cmp = page_order_cmp,
+};
+
+static int migrate_type_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ /* for internal use to find free'd page */
+ if (l->migrate_type == -1U)
+ return 0;
+
+ if (l->migrate_type < r->migrate_type)
+ return -1;
+ else if (l->migrate_type > r->migrate_type)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension migrate_type_sort_dimension = {
+ .name = "migtype",
+ .cmp = migrate_type_cmp,
+};
+
+static int gfp_flags_cmp(void *a, void *b)
+{
+ struct page_stat *l = a;
+ struct page_stat *r = b;
+
+ /* for internal use to find free'd page */
+ if (l->gfp_flags == -1U)
+ return 0;
+
+ if (l->gfp_flags < r->gfp_flags)
+ return -1;
+ else if (l->gfp_flags > r->gfp_flags)
+ return 1;
+ return 0;
+}
+
+static struct sort_dimension gfp_flags_sort_dimension = {
+ .name = "gfp",
+ .cmp = gfp_flags_cmp,
+};
+
+static struct sort_dimension *slab_sorts[] = {
+ &ptr_sort_dimension,
+ &callsite_sort_dimension,
+ &hit_sort_dimension,
+ &bytes_sort_dimension,
+ &frag_sort_dimension,
+ &pingpong_sort_dimension,
+};
+
+static struct sort_dimension *page_sorts[] = {
+ &page_sort_dimension,
+ &page_callsite_sort_dimension,
+ &page_hit_sort_dimension,
+ &page_bytes_sort_dimension,
+ &page_order_sort_dimension,
+ &migrate_type_sort_dimension,
+ &gfp_flags_sort_dimension,
+};
+
+static int slab_sort_dimension__add(const char *tok, struct list_head *list)
+{
+ struct sort_dimension *sort;
+ int i;
+
+ for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) {
+ if (!strcmp(slab_sorts[i]->name, tok)) {
+ sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i]));
+ if (!sort) {
+ pr_err("%s: memdup failed\n", __func__);
+ return -1;
+ }
+ list_add_tail(&sort->list, list);
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+static int page_sort_dimension__add(const char *tok, struct list_head *list)
+{
+ struct sort_dimension *sort;
+ int i;
+
+ for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) {
+ if (!strcmp(page_sorts[i]->name, tok)) {
+ sort = memdup(page_sorts[i], sizeof(*page_sorts[i]));
+ if (!sort) {
+ pr_err("%s: memdup failed\n", __func__);
+ return -1;
+ }
+ list_add_tail(&sort->list, list);
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
+{
+ char *tok;
+ char *str = strdup(arg);
+ char *pos = str;
+
+ if (!str) {
+ pr_err("%s: strdup failed\n", __func__);
+ return -1;
+ }
+
+ while (true) {
+ tok = strsep(&pos, ",");
+ if (!tok)
+ break;
+ if (slab_sort_dimension__add(tok, sort_list) < 0) {
+ pr_err("Unknown slab --sort key: '%s'", tok);
+ free(str);
+ return -1;
+ }
+ }
+
+ free(str);
+ return 0;
+}
+
+static int setup_page_sorting(struct list_head *sort_list, const char *arg)
+{
+ char *tok;
+ char *str = strdup(arg);
+ char *pos = str;
+
+ if (!str) {
+ pr_err("%s: strdup failed\n", __func__);
+ return -1;
+ }
+
+ while (true) {
+ tok = strsep(&pos, ",");
+ if (!tok)
+ break;
+ if (page_sort_dimension__add(tok, sort_list) < 0) {
+ pr_err("Unknown page --sort key: '%s'", tok);
+ free(str);
+ return -1;
+ }
+ }
+
+ free(str);
+ return 0;
+}
+
+static int parse_sort_opt(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ if (!arg)
+ return -1;
+
+ if (kmem_page > kmem_slab ||
+ (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) {
+ if (caller_flag > alloc_flag)
+ return setup_page_sorting(&page_caller_sort, arg);
+ else
+ return setup_page_sorting(&page_alloc_sort, arg);
+ } else {
+ if (caller_flag > alloc_flag)
+ return setup_slab_sorting(&slab_caller_sort, arg);
+ else
+ return setup_slab_sorting(&slab_alloc_sort, arg);
+ }
+
+ return 0;
+}
+
+static int parse_caller_opt(const struct option *opt __maybe_unused,
+ const char *arg __maybe_unused,
+ int unset __maybe_unused)
+{
+ caller_flag = (alloc_flag + 1);
+ return 0;
+}
+
+static int parse_alloc_opt(const struct option *opt __maybe_unused,
+ const char *arg __maybe_unused,
+ int unset __maybe_unused)
+{
+ alloc_flag = (caller_flag + 1);
+ return 0;
+}
+
+static int parse_slab_opt(const struct option *opt __maybe_unused,
+ const char *arg __maybe_unused,
+ int unset __maybe_unused)
+{
+ kmem_slab = (kmem_page + 1);
+ return 0;
+}
+
+static int parse_page_opt(const struct option *opt __maybe_unused,
+ const char *arg __maybe_unused,
+ int unset __maybe_unused)
+{
+ kmem_page = (kmem_slab + 1);
+ return 0;
+}
+
+static int parse_line_opt(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ int lines;
+
+ if (!arg)
+ return -1;
+
+ lines = strtoul(arg, NULL, 10);
+
+ if (caller_flag > alloc_flag)
+ caller_lines = lines;
+ else
+ alloc_lines = lines;
+
+ return 0;
+}
+
+static int __cmd_record(int argc, const char **argv)
+{
+ const char * const record_args[] = {
+ "record", "-a", "-R", "-c", "1",
+ };
+ const char * const slab_events[] = {
+ "-e", "kmem:kmalloc",
+ "-e", "kmem:kmalloc_node",
+ "-e", "kmem:kfree",
+ "-e", "kmem:kmem_cache_alloc",
+ "-e", "kmem:kmem_cache_alloc_node",
+ "-e", "kmem:kmem_cache_free",
+ };
+ const char * const page_events[] = {
+ "-e", "kmem:mm_page_alloc",
+ "-e", "kmem:mm_page_free",
+ };
+ unsigned int rec_argc, i, j;
+ const char **rec_argv;
+
+ rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+ if (kmem_slab)
+ rec_argc += ARRAY_SIZE(slab_events);
+ if (kmem_page)
+ rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
+
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+ if (rec_argv == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(record_args); i++)
+ rec_argv[i] = strdup(record_args[i]);
+
+ if (kmem_slab) {
+ for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++)
+ rec_argv[i] = strdup(slab_events[j]);
+ }
+ if (kmem_page) {
+ rec_argv[i++] = strdup("-g");
+
+ for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
+ rec_argv[i] = strdup(page_events[j]);
+ }
+
+ for (j = 1; j < (unsigned int)argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ return cmd_record(i, rec_argv);
+}
+
+static int kmem_config(const char *var, const char *value, void *cb __maybe_unused)
+{
+ if (!strcmp(var, "kmem.default")) {
+ if (!strcmp(value, "slab"))
+ kmem_default = KMEM_SLAB;
+ else if (!strcmp(value, "page"))
+ kmem_default = KMEM_PAGE;
+ else
+ pr_err("invalid default value ('slab' or 'page' required): %s\n",
+ value);
+ return 0;
+ }
+
+ return 0;
+}
+
+int cmd_kmem(int argc, const char **argv)
+{
+ const char * const default_slab_sort = "frag,hit,bytes";
+ const char * const default_page_sort = "bytes,hit";
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_READ,
+ };
+ const struct option kmem_options[] = {
+ OPT_STRING('i', "input", &input_name, "file", "input file name"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show symbol address, etc)"),
+ OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
+ "show per-callsite statistics", parse_caller_opt),
+ OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
+ "show per-allocation statistics", parse_alloc_opt),
+ OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
+ "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, "
+ "page, order, migtype, gfp", parse_sort_opt),
+ OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
+ OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
+ OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
+ OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator",
+ parse_slab_opt),
+ OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
+ parse_page_opt),
+ OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
+ OPT_STRING(0, "time", &time_str, "str",
+ "Time span of interest (start,stop)"),
+ OPT_END()
+ };
+ const char *const kmem_subcommands[] = { "record", "stat", NULL };
+ const char *kmem_usage[] = {
+ NULL,
+ NULL
+ };
+ struct perf_session *session;
+ const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n";
+ int ret = perf_config(kmem_config, NULL);
+
+ if (ret)
+ return ret;
+
+ argc = parse_options_subcommand(argc, argv, kmem_options,
+ kmem_subcommands, kmem_usage, 0);
+
+ if (!argc)
+ usage_with_options(kmem_usage, kmem_options);
+
+ if (kmem_slab == 0 && kmem_page == 0) {
+ if (kmem_default == KMEM_SLAB)
+ kmem_slab = 1;
+ else
+ kmem_page = 1;
+ }
+
+ if (!strncmp(argv[0], "rec", 3)) {
+ symbol__init(NULL);
+ return __cmd_record(argc, argv);
+ }
+
+ data.file.path = input_name;
+
+ kmem_session = session = perf_session__new(&data, false, &perf_kmem);
+ if (session == NULL)
+ return -1;
+
+ ret = -1;
+
+ if (kmem_slab) {
+ if (!perf_evlist__find_tracepoint_by_name(session->evlist,
+ "kmem:kmalloc")) {
+ pr_err(errmsg, "slab", "slab");
+ goto out_delete;
+ }
+ }
+
+ if (kmem_page) {
+ struct perf_evsel *evsel;
+
+ evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
+ "kmem:mm_page_alloc");
+ if (evsel == NULL) {
+ pr_err(errmsg, "page", "page");
+ goto out_delete;
+ }
+
+ kmem_page_size = tep_get_page_size(evsel->tp_format->pevent);
+ symbol_conf.use_callchain = true;
+ }
+
+ symbol__init(&session->header.env);
+
+ if (perf_time__parse_str(&ptime, time_str) != 0) {
+ pr_err("Invalid time string\n");
+ ret = -EINVAL;
+ goto out_delete;
+ }
+
+ if (!strcmp(argv[0], "stat")) {
+ setlocale(LC_ALL, "");
+
+ if (cpu__setup_cpunode_map())
+ goto out_delete;
+
+ if (list_empty(&slab_caller_sort))
+ setup_slab_sorting(&slab_caller_sort, default_slab_sort);
+ if (list_empty(&slab_alloc_sort))
+ setup_slab_sorting(&slab_alloc_sort, default_slab_sort);
+ if (list_empty(&page_caller_sort))
+ setup_page_sorting(&page_caller_sort, default_page_sort);
+ if (list_empty(&page_alloc_sort))
+ setup_page_sorting(&page_alloc_sort, default_page_sort);
+
+ if (kmem_page) {
+ setup_page_sorting(&page_alloc_sort_input,
+ "page,order,migtype,gfp");
+ setup_page_sorting(&page_caller_sort_input,
+ "callsite,order,migtype,gfp");
+ }
+ ret = __cmd_kmem(session);
+ } else
+ usage_with_options(kmem_usage, kmem_options);
+
+out_delete:
+ perf_session__delete(session);
+
+ return ret;
+}
+
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
new file mode 100644
index 000000000..952e2228f
--- /dev/null
+++ b/tools/perf/builtin-kvm.c
@@ -0,0 +1,1641 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/term.h"
+#include "util/util.h"
+#include "util/cache.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+#include "util/session.h"
+#include "util/intlist.h"
+#include <subcmd/parse-options.h>
+#include "util/trace-event.h"
+#include "util/debug.h"
+#include "util/tool.h"
+#include "util/stat.h"
+#include "util/top.h"
+#include "util/data.h"
+#include "util/ordered-events.h"
+
+#include <sys/prctl.h>
+#ifdef HAVE_TIMERFD_SUPPORT
+#include <sys/timerfd.h>
+#endif
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <linux/kernel.h>
+#include <linux/time64.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <poll.h>
+#include <termios.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <math.h>
+
+static const char *get_filename_for_perf_kvm(void)
+{
+ const char *filename;
+
+ if (perf_host && !perf_guest)
+ filename = strdup("perf.data.host");
+ else if (!perf_host && perf_guest)
+ filename = strdup("perf.data.guest");
+ else
+ filename = strdup("perf.data.kvm");
+
+ return filename;
+}
+
+#ifdef HAVE_KVM_STAT_SUPPORT
+#include "util/kvm-stat.h"
+
+void exit_event_get_key(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ key->info = 0;
+ key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
+}
+
+bool kvm_exit_event(struct perf_evsel *evsel)
+{
+ return !strcmp(evsel->name, kvm_exit_trace);
+}
+
+bool exit_event_begin(struct perf_evsel *evsel,
+ struct perf_sample *sample, struct event_key *key)
+{
+ if (kvm_exit_event(evsel)) {
+ exit_event_get_key(evsel, sample, key);
+ return true;
+ }
+
+ return false;
+}
+
+bool kvm_entry_event(struct perf_evsel *evsel)
+{
+ return !strcmp(evsel->name, kvm_entry_trace);
+}
+
+bool exit_event_end(struct perf_evsel *evsel,
+ struct perf_sample *sample __maybe_unused,
+ struct event_key *key __maybe_unused)
+{
+ return kvm_entry_event(evsel);
+}
+
+static const char *get_exit_reason(struct perf_kvm_stat *kvm,
+ struct exit_reasons_table *tbl,
+ u64 exit_code)
+{
+ while (tbl->reason != NULL) {
+ if (tbl->exit_code == exit_code)
+ return tbl->reason;
+ tbl++;
+ }
+
+ pr_err("unknown kvm exit code:%lld on %s\n",
+ (unsigned long long)exit_code, kvm->exit_reasons_isa);
+ return "UNKNOWN";
+}
+
+void exit_event_decode_key(struct perf_kvm_stat *kvm,
+ struct event_key *key,
+ char *decode)
+{
+ const char *exit_reason = get_exit_reason(kvm, key->exit_reasons,
+ key->key);
+
+ scnprintf(decode, decode_str_len, "%s", exit_reason);
+}
+
+static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
+{
+ struct kvm_reg_events_ops *events_ops = kvm_reg_events_ops;
+
+ for (events_ops = kvm_reg_events_ops; events_ops->name; events_ops++) {
+ if (!strcmp(events_ops->name, kvm->report_event)) {
+ kvm->events_ops = events_ops->ops;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+struct vcpu_event_record {
+ int vcpu_id;
+ u64 start_time;
+ struct kvm_event *last_event;
+};
+
+
+static void init_kvm_event_record(struct perf_kvm_stat *kvm)
+{
+ unsigned int i;
+
+ for (i = 0; i < EVENTS_CACHE_SIZE; i++)
+ INIT_LIST_HEAD(&kvm->kvm_events_cache[i]);
+}
+
+#ifdef HAVE_TIMERFD_SUPPORT
+static void clear_events_cache_stats(struct list_head *kvm_events_cache)
+{
+ struct list_head *head;
+ struct kvm_event *event;
+ unsigned int i;
+ int j;
+
+ for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
+ head = &kvm_events_cache[i];
+ list_for_each_entry(event, head, hash_entry) {
+ /* reset stats for event */
+ event->total.time = 0;
+ init_stats(&event->total.stats);
+
+ for (j = 0; j < event->max_vcpu; ++j) {
+ event->vcpu[j].time = 0;
+ init_stats(&event->vcpu[j].stats);
+ }
+ }
+ }
+}
+#endif
+
+static int kvm_events_hash_fn(u64 key)
+{
+ return key & (EVENTS_CACHE_SIZE - 1);
+}
+
+static bool kvm_event_expand(struct kvm_event *event, int vcpu_id)
+{
+ int old_max_vcpu = event->max_vcpu;
+ void *prev;
+
+ if (vcpu_id < event->max_vcpu)
+ return true;
+
+ while (event->max_vcpu <= vcpu_id)
+ event->max_vcpu += DEFAULT_VCPU_NUM;
+
+ prev = event->vcpu;
+ event->vcpu = realloc(event->vcpu,
+ event->max_vcpu * sizeof(*event->vcpu));
+ if (!event->vcpu) {
+ free(prev);
+ pr_err("Not enough memory\n");
+ return false;
+ }
+
+ memset(event->vcpu + old_max_vcpu, 0,
+ (event->max_vcpu - old_max_vcpu) * sizeof(*event->vcpu));
+ return true;
+}
+
+static struct kvm_event *kvm_alloc_init_event(struct event_key *key)
+{
+ struct kvm_event *event;
+
+ event = zalloc(sizeof(*event));
+ if (!event) {
+ pr_err("Not enough memory\n");
+ return NULL;
+ }
+
+ event->key = *key;
+ init_stats(&event->total.stats);
+ return event;
+}
+
+static struct kvm_event *find_create_kvm_event(struct perf_kvm_stat *kvm,
+ struct event_key *key)
+{
+ struct kvm_event *event;
+ struct list_head *head;
+
+ BUG_ON(key->key == INVALID_KEY);
+
+ head = &kvm->kvm_events_cache[kvm_events_hash_fn(key->key)];
+ list_for_each_entry(event, head, hash_entry) {
+ if (event->key.key == key->key && event->key.info == key->info)
+ return event;
+ }
+
+ event = kvm_alloc_init_event(key);
+ if (!event)
+ return NULL;
+
+ list_add(&event->hash_entry, head);
+ return event;
+}
+
+static bool handle_begin_event(struct perf_kvm_stat *kvm,
+ struct vcpu_event_record *vcpu_record,
+ struct event_key *key, u64 timestamp)
+{
+ struct kvm_event *event = NULL;
+
+ if (key->key != INVALID_KEY)
+ event = find_create_kvm_event(kvm, key);
+
+ vcpu_record->last_event = event;
+ vcpu_record->start_time = timestamp;
+ return true;
+}
+
+static void
+kvm_update_event_stats(struct kvm_event_stats *kvm_stats, u64 time_diff)
+{
+ kvm_stats->time += time_diff;
+ update_stats(&kvm_stats->stats, time_diff);
+}
+
+static double kvm_event_rel_stddev(int vcpu_id, struct kvm_event *event)
+{
+ struct kvm_event_stats *kvm_stats = &event->total;
+
+ if (vcpu_id != -1)
+ kvm_stats = &event->vcpu[vcpu_id];
+
+ return rel_stddev_stats(stddev_stats(&kvm_stats->stats),
+ avg_stats(&kvm_stats->stats));
+}
+
+static bool update_kvm_event(struct kvm_event *event, int vcpu_id,
+ u64 time_diff)
+{
+ if (vcpu_id == -1) {
+ kvm_update_event_stats(&event->total, time_diff);
+ return true;
+ }
+
+ if (!kvm_event_expand(event, vcpu_id))
+ return false;
+
+ kvm_update_event_stats(&event->vcpu[vcpu_id], time_diff);
+ return true;
+}
+
+static bool is_child_event(struct perf_kvm_stat *kvm,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key)
+{
+ struct child_event_ops *child_ops;
+
+ child_ops = kvm->events_ops->child_ops;
+
+ if (!child_ops)
+ return false;
+
+ for (; child_ops->name; child_ops++) {
+ if (!strcmp(evsel->name, child_ops->name)) {
+ child_ops->get_key(evsel, sample, key);
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static bool handle_child_event(struct perf_kvm_stat *kvm,
+ struct vcpu_event_record *vcpu_record,
+ struct event_key *key,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct kvm_event *event = NULL;
+
+ if (key->key != INVALID_KEY)
+ event = find_create_kvm_event(kvm, key);
+
+ vcpu_record->last_event = event;
+
+ return true;
+}
+
+static bool skip_event(const char *event)
+{
+ const char * const *skip_events;
+
+ for (skip_events = kvm_skip_events; *skip_events; skip_events++)
+ if (!strcmp(event, *skip_events))
+ return true;
+
+ return false;
+}
+
+static bool handle_end_event(struct perf_kvm_stat *kvm,
+ struct vcpu_event_record *vcpu_record,
+ struct event_key *key,
+ struct perf_sample *sample)
+{
+ struct kvm_event *event;
+ u64 time_begin, time_diff;
+ int vcpu;
+
+ if (kvm->trace_vcpu == -1)
+ vcpu = -1;
+ else
+ vcpu = vcpu_record->vcpu_id;
+
+ event = vcpu_record->last_event;
+ time_begin = vcpu_record->start_time;
+
+ /* The begin event is not caught. */
+ if (!time_begin)
+ return true;
+
+ /*
+ * In some case, the 'begin event' only records the start timestamp,
+ * the actual event is recognized in the 'end event' (e.g. mmio-event).
+ */
+
+ /* Both begin and end events did not get the key. */
+ if (!event && key->key == INVALID_KEY)
+ return true;
+
+ if (!event)
+ event = find_create_kvm_event(kvm, key);
+
+ if (!event)
+ return false;
+
+ vcpu_record->last_event = NULL;
+ vcpu_record->start_time = 0;
+
+ /* seems to happen once in a while during live mode */
+ if (sample->time < time_begin) {
+ pr_debug("End time before begin time; skipping event.\n");
+ return true;
+ }
+
+ time_diff = sample->time - time_begin;
+
+ if (kvm->duration && time_diff > kvm->duration) {
+ char decode[decode_str_len];
+
+ kvm->events_ops->decode_key(kvm, &event->key, decode);
+ if (!skip_event(decode)) {
+ pr_info("%" PRIu64 " VM %d, vcpu %d: %s event took %" PRIu64 "usec\n",
+ sample->time, sample->pid, vcpu_record->vcpu_id,
+ decode, time_diff / NSEC_PER_USEC);
+ }
+ }
+
+ return update_kvm_event(event, vcpu, time_diff);
+}
+
+static
+struct vcpu_event_record *per_vcpu_record(struct thread *thread,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ /* Only kvm_entry records vcpu id. */
+ if (!thread__priv(thread) && kvm_entry_event(evsel)) {
+ struct vcpu_event_record *vcpu_record;
+
+ vcpu_record = zalloc(sizeof(*vcpu_record));
+ if (!vcpu_record) {
+ pr_err("%s: Not enough memory\n", __func__);
+ return NULL;
+ }
+
+ vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample,
+ vcpu_id_str);
+ thread__set_priv(thread, vcpu_record);
+ }
+
+ return thread__priv(thread);
+}
+
+static bool handle_kvm_event(struct perf_kvm_stat *kvm,
+ struct thread *thread,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ struct vcpu_event_record *vcpu_record;
+ struct event_key key = { .key = INVALID_KEY,
+ .exit_reasons = kvm->exit_reasons };
+
+ vcpu_record = per_vcpu_record(thread, evsel, sample);
+ if (!vcpu_record)
+ return true;
+
+ /* only process events for vcpus user cares about */
+ if ((kvm->trace_vcpu != -1) &&
+ (kvm->trace_vcpu != vcpu_record->vcpu_id))
+ return true;
+
+ if (kvm->events_ops->is_begin_event(evsel, sample, &key))
+ return handle_begin_event(kvm, vcpu_record, &key, sample->time);
+
+ if (is_child_event(kvm, evsel, sample, &key))
+ return handle_child_event(kvm, vcpu_record, &key, sample);
+
+ if (kvm->events_ops->is_end_event(evsel, sample, &key))
+ return handle_end_event(kvm, vcpu_record, &key, sample);
+
+ return true;
+}
+
+#define GET_EVENT_KEY(func, field) \
+static u64 get_event_ ##func(struct kvm_event *event, int vcpu) \
+{ \
+ if (vcpu == -1) \
+ return event->total.field; \
+ \
+ if (vcpu >= event->max_vcpu) \
+ return 0; \
+ \
+ return event->vcpu[vcpu].field; \
+}
+
+#define COMPARE_EVENT_KEY(func, field) \
+GET_EVENT_KEY(func, field) \
+static int compare_kvm_event_ ## func(struct kvm_event *one, \
+ struct kvm_event *two, int vcpu)\
+{ \
+ return get_event_ ##func(one, vcpu) > \
+ get_event_ ##func(two, vcpu); \
+}
+
+GET_EVENT_KEY(time, time);
+COMPARE_EVENT_KEY(count, stats.n);
+COMPARE_EVENT_KEY(mean, stats.mean);
+GET_EVENT_KEY(max, stats.max);
+GET_EVENT_KEY(min, stats.min);
+
+#define DEF_SORT_NAME_KEY(name, compare_key) \
+ { #name, compare_kvm_event_ ## compare_key }
+
+static struct kvm_event_key keys[] = {
+ DEF_SORT_NAME_KEY(sample, count),
+ DEF_SORT_NAME_KEY(time, mean),
+ { NULL, NULL }
+};
+
+static bool select_key(struct perf_kvm_stat *kvm)
+{
+ int i;
+
+ for (i = 0; keys[i].name; i++) {
+ if (!strcmp(keys[i].name, kvm->sort_key)) {
+ kvm->compare = keys[i].key;
+ return true;
+ }
+ }
+
+ pr_err("Unknown compare key:%s\n", kvm->sort_key);
+ return false;
+}
+
+static void insert_to_result(struct rb_root *result, struct kvm_event *event,
+ key_cmp_fun bigger, int vcpu)
+{
+ struct rb_node **rb = &result->rb_node;
+ struct rb_node *parent = NULL;
+ struct kvm_event *p;
+
+ while (*rb) {
+ p = container_of(*rb, struct kvm_event, rb);
+ parent = *rb;
+
+ if (bigger(event, p, vcpu))
+ rb = &(*rb)->rb_left;
+ else
+ rb = &(*rb)->rb_right;
+ }
+
+ rb_link_node(&event->rb, parent, rb);
+ rb_insert_color(&event->rb, result);
+}
+
+static void
+update_total_count(struct perf_kvm_stat *kvm, struct kvm_event *event)
+{
+ int vcpu = kvm->trace_vcpu;
+
+ kvm->total_count += get_event_count(event, vcpu);
+ kvm->total_time += get_event_time(event, vcpu);
+}
+
+static bool event_is_valid(struct kvm_event *event, int vcpu)
+{
+ return !!get_event_count(event, vcpu);
+}
+
+static void sort_result(struct perf_kvm_stat *kvm)
+{
+ unsigned int i;
+ int vcpu = kvm->trace_vcpu;
+ struct kvm_event *event;
+
+ for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
+ list_for_each_entry(event, &kvm->kvm_events_cache[i], hash_entry) {
+ if (event_is_valid(event, vcpu)) {
+ update_total_count(kvm, event);
+ insert_to_result(&kvm->result, event,
+ kvm->compare, vcpu);
+ }
+ }
+ }
+}
+
+/* returns left most element of result, and erase it */
+static struct kvm_event *pop_from_result(struct rb_root *result)
+{
+ struct rb_node *node = rb_first(result);
+
+ if (!node)
+ return NULL;
+
+ rb_erase(node, result);
+ return container_of(node, struct kvm_event, rb);
+}
+
+static void print_vcpu_info(struct perf_kvm_stat *kvm)
+{
+ int vcpu = kvm->trace_vcpu;
+
+ pr_info("Analyze events for ");
+
+ if (kvm->opts.target.system_wide)
+ pr_info("all VMs, ");
+ else if (kvm->opts.target.pid)
+ pr_info("pid(s) %s, ", kvm->opts.target.pid);
+ else
+ pr_info("dazed and confused on what is monitored, ");
+
+ if (vcpu == -1)
+ pr_info("all VCPUs:\n\n");
+ else
+ pr_info("VCPU %d:\n\n", vcpu);
+}
+
+static void show_timeofday(void)
+{
+ char date[64];
+ struct timeval tv;
+ struct tm ltime;
+
+ gettimeofday(&tv, NULL);
+ if (localtime_r(&tv.tv_sec, &ltime)) {
+ strftime(date, sizeof(date), "%H:%M:%S", &ltime);
+ pr_info("%s.%06ld", date, tv.tv_usec);
+ } else
+ pr_info("00:00:00.000000");
+
+ return;
+}
+
+static void print_result(struct perf_kvm_stat *kvm)
+{
+ char decode[decode_str_len];
+ struct kvm_event *event;
+ int vcpu = kvm->trace_vcpu;
+
+ if (kvm->live) {
+ puts(CONSOLE_CLEAR);
+ show_timeofday();
+ }
+
+ pr_info("\n\n");
+ print_vcpu_info(kvm);
+ pr_info("%*s ", decode_str_len, kvm->events_ops->name);
+ pr_info("%10s ", "Samples");
+ pr_info("%9s ", "Samples%");
+
+ pr_info("%9s ", "Time%");
+ pr_info("%11s ", "Min Time");
+ pr_info("%11s ", "Max Time");
+ pr_info("%16s ", "Avg time");
+ pr_info("\n\n");
+
+ while ((event = pop_from_result(&kvm->result))) {
+ u64 ecount, etime, max, min;
+
+ ecount = get_event_count(event, vcpu);
+ etime = get_event_time(event, vcpu);
+ max = get_event_max(event, vcpu);
+ min = get_event_min(event, vcpu);
+
+ kvm->events_ops->decode_key(kvm, &event->key, decode);
+ pr_info("%*s ", decode_str_len, decode);
+ pr_info("%10llu ", (unsigned long long)ecount);
+ pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
+ pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
+ pr_info("%9.2fus ", (double)min / NSEC_PER_USEC);
+ pr_info("%9.2fus ", (double)max / NSEC_PER_USEC);
+ pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount / NSEC_PER_USEC,
+ kvm_event_rel_stddev(vcpu, event));
+ pr_info("\n");
+ }
+
+ pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
+ kvm->total_count, kvm->total_time / (double)NSEC_PER_USEC);
+
+ if (kvm->lost_events)
+ pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
+}
+
+#ifdef HAVE_TIMERFD_SUPPORT
+static int process_lost_event(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, tool);
+
+ kvm->lost_events++;
+ return 0;
+}
+#endif
+
+static bool skip_sample(struct perf_kvm_stat *kvm,
+ struct perf_sample *sample)
+{
+ if (kvm->pid_list && intlist__find(kvm->pid_list, sample->pid) == NULL)
+ return true;
+
+ return false;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ int err = 0;
+ struct thread *thread;
+ struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
+ tool);
+
+ if (skip_sample(kvm, sample))
+ return 0;
+
+ thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+ if (thread == NULL) {
+ pr_debug("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ if (!handle_kvm_event(kvm, thread, evsel, sample))
+ err = -1;
+
+ thread__put(thread);
+ return err;
+}
+
+static int cpu_isa_config(struct perf_kvm_stat *kvm)
+{
+ char buf[128], *cpuid;
+ int err;
+
+ if (kvm->live) {
+ err = get_cpuid(buf, sizeof(buf));
+ if (err != 0) {
+ pr_err("Failed to look up CPU type: %s\n",
+ str_error_r(err, buf, sizeof(buf)));
+ return -err;
+ }
+ cpuid = buf;
+ } else
+ cpuid = kvm->session->header.env.cpuid;
+
+ if (!cpuid) {
+ pr_err("Failed to look up CPU type\n");
+ return -EINVAL;
+ }
+
+ err = cpu_isa_init(kvm, cpuid);
+ if (err == -ENOTSUP)
+ pr_err("CPU %s is not supported.\n", cpuid);
+
+ return err;
+}
+
+static bool verify_vcpu(int vcpu)
+{
+ if (vcpu != -1 && vcpu < 0) {
+ pr_err("Invalid vcpu:%d.\n", vcpu);
+ return false;
+ }
+
+ return true;
+}
+
+#ifdef HAVE_TIMERFD_SUPPORT
+/* keeping the max events to a modest level to keep
+ * the processing of samples per mmap smooth.
+ */
+#define PERF_KVM__MAX_EVENTS_PER_MMAP 25
+
+static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
+ u64 *mmap_time)
+{
+ struct perf_evlist *evlist = kvm->evlist;
+ union perf_event *event;
+ struct perf_mmap *md;
+ u64 timestamp;
+ s64 n = 0;
+ int err;
+
+ *mmap_time = ULLONG_MAX;
+ md = &evlist->mmap[idx];
+ err = perf_mmap__read_init(md);
+ if (err < 0)
+ return (err == -EAGAIN) ? 0 : -1;
+
+ while ((event = perf_mmap__read_event(md)) != NULL) {
+ err = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
+ if (err) {
+ perf_mmap__consume(md);
+ pr_err("Failed to parse sample\n");
+ return -1;
+ }
+
+ err = perf_session__queue_event(kvm->session, event, timestamp, 0);
+ /*
+ * FIXME: Here we can't consume the event, as perf_session__queue_event will
+ * point to it, and it'll get possibly overwritten by the kernel.
+ */
+ perf_mmap__consume(md);
+
+ if (err) {
+ pr_err("Failed to enqueue sample: %d\n", err);
+ return -1;
+ }
+
+ /* save time stamp of our first sample for this mmap */
+ if (n == 0)
+ *mmap_time = timestamp;
+
+ /* limit events per mmap handled all at once */
+ n++;
+ if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
+ break;
+ }
+
+ perf_mmap__read_done(md);
+ return n;
+}
+
+static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm)
+{
+ int i, err, throttled = 0;
+ s64 n, ntotal = 0;
+ u64 flush_time = ULLONG_MAX, mmap_time;
+
+ for (i = 0; i < kvm->evlist->nr_mmaps; i++) {
+ n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time);
+ if (n < 0)
+ return -1;
+
+ /* flush time is going to be the minimum of all the individual
+ * mmap times. Essentially, we flush all the samples queued up
+ * from the last pass under our minimal start time -- that leaves
+ * a very small race for samples to come in with a lower timestamp.
+ * The ioctl to return the perf_clock timestamp should close the
+ * race entirely.
+ */
+ if (mmap_time < flush_time)
+ flush_time = mmap_time;
+
+ ntotal += n;
+ if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
+ throttled = 1;
+ }
+
+ /* flush queue after each round in which we processed events */
+ if (ntotal) {
+ struct ordered_events *oe = &kvm->session->ordered_events;
+
+ oe->next_flush = flush_time;
+ err = ordered_events__flush(oe, OE_FLUSH__ROUND);
+ if (err) {
+ if (kvm->lost_events)
+ pr_info("\nLost events: %" PRIu64 "\n\n",
+ kvm->lost_events);
+ return err;
+ }
+ }
+
+ return throttled;
+}
+
+static volatile int done;
+
+static void sig_handler(int sig __maybe_unused)
+{
+ done = 1;
+}
+
+static int perf_kvm__timerfd_create(struct perf_kvm_stat *kvm)
+{
+ struct itimerspec new_value;
+ int rc = -1;
+
+ kvm->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
+ if (kvm->timerfd < 0) {
+ pr_err("timerfd_create failed\n");
+ goto out;
+ }
+
+ new_value.it_value.tv_sec = kvm->display_time;
+ new_value.it_value.tv_nsec = 0;
+ new_value.it_interval.tv_sec = kvm->display_time;
+ new_value.it_interval.tv_nsec = 0;
+
+ if (timerfd_settime(kvm->timerfd, 0, &new_value, NULL) != 0) {
+ pr_err("timerfd_settime failed: %d\n", errno);
+ close(kvm->timerfd);
+ goto out;
+ }
+
+ rc = 0;
+out:
+ return rc;
+}
+
+static int perf_kvm__handle_timerfd(struct perf_kvm_stat *kvm)
+{
+ uint64_t c;
+ int rc;
+
+ rc = read(kvm->timerfd, &c, sizeof(uint64_t));
+ if (rc < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ pr_err("Failed to read timer fd: %d\n", errno);
+ return -1;
+ }
+
+ if (rc != sizeof(uint64_t)) {
+ pr_err("Error reading timer fd - invalid size returned\n");
+ return -1;
+ }
+
+ if (c != 1)
+ pr_debug("Missed timer beats: %" PRIu64 "\n", c-1);
+
+ /* update display */
+ sort_result(kvm);
+ print_result(kvm);
+
+ /* reset counts */
+ clear_events_cache_stats(kvm->kvm_events_cache);
+ kvm->total_count = 0;
+ kvm->total_time = 0;
+ kvm->lost_events = 0;
+
+ return 0;
+}
+
+static int fd_set_nonblock(int fd)
+{
+ long arg = 0;
+
+ arg = fcntl(fd, F_GETFL);
+ if (arg < 0) {
+ pr_err("Failed to get current flags for fd %d\n", fd);
+ return -1;
+ }
+
+ if (fcntl(fd, F_SETFL, arg | O_NONBLOCK) < 0) {
+ pr_err("Failed to set non-block option on fd %d\n", fd);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int perf_kvm__handle_stdin(void)
+{
+ int c;
+
+ c = getc(stdin);
+ if (c == 'q')
+ return 1;
+
+ return 0;
+}
+
+static int kvm_events_live_report(struct perf_kvm_stat *kvm)
+{
+ int nr_stdin, ret, err = -EINVAL;
+ struct termios save;
+
+ /* live flag must be set first */
+ kvm->live = true;
+
+ ret = cpu_isa_config(kvm);
+ if (ret < 0)
+ return ret;
+
+ if (!verify_vcpu(kvm->trace_vcpu) ||
+ !select_key(kvm) ||
+ !register_kvm_events_ops(kvm)) {
+ goto out;
+ }
+
+ set_term_quiet_input(&save);
+ init_kvm_event_record(kvm);
+
+ signal(SIGINT, sig_handler);
+ signal(SIGTERM, sig_handler);
+
+ /* add timer fd */
+ if (perf_kvm__timerfd_create(kvm) < 0) {
+ err = -1;
+ goto out;
+ }
+
+ if (perf_evlist__add_pollfd(kvm->evlist, kvm->timerfd) < 0)
+ goto out;
+
+ nr_stdin = perf_evlist__add_pollfd(kvm->evlist, fileno(stdin));
+ if (nr_stdin < 0)
+ goto out;
+
+ if (fd_set_nonblock(fileno(stdin)) != 0)
+ goto out;
+
+ /* everything is good - enable the events and process */
+ perf_evlist__enable(kvm->evlist);
+
+ while (!done) {
+ struct fdarray *fda = &kvm->evlist->pollfd;
+ int rc;
+
+ rc = perf_kvm__mmap_read(kvm);
+ if (rc < 0)
+ break;
+
+ err = perf_kvm__handle_timerfd(kvm);
+ if (err)
+ goto out;
+
+ if (fda->entries[nr_stdin].revents & POLLIN)
+ done = perf_kvm__handle_stdin();
+
+ if (!rc && !done)
+ err = fdarray__poll(fda, 100);
+ }
+
+ perf_evlist__disable(kvm->evlist);
+
+ if (err == 0) {
+ sort_result(kvm);
+ print_result(kvm);
+ }
+
+out:
+ if (kvm->timerfd >= 0)
+ close(kvm->timerfd);
+
+ tcsetattr(0, TCSAFLUSH, &save);
+ return err;
+}
+
+static int kvm_live_open_events(struct perf_kvm_stat *kvm)
+{
+ int err, rc = -1;
+ struct perf_evsel *pos;
+ struct perf_evlist *evlist = kvm->evlist;
+ char sbuf[STRERR_BUFSIZE];
+
+ perf_evlist__config(evlist, &kvm->opts, NULL);
+
+ /*
+ * Note: exclude_{guest,host} do not apply here.
+ * This command processes KVM tracepoints from host only
+ */
+ evlist__for_each_entry(evlist, pos) {
+ struct perf_event_attr *attr = &pos->attr;
+
+ /* make sure these *are* set */
+ perf_evsel__set_sample_bit(pos, TID);
+ perf_evsel__set_sample_bit(pos, TIME);
+ perf_evsel__set_sample_bit(pos, CPU);
+ perf_evsel__set_sample_bit(pos, RAW);
+ /* make sure these are *not*; want as small a sample as possible */
+ perf_evsel__reset_sample_bit(pos, PERIOD);
+ perf_evsel__reset_sample_bit(pos, IP);
+ perf_evsel__reset_sample_bit(pos, CALLCHAIN);
+ perf_evsel__reset_sample_bit(pos, ADDR);
+ perf_evsel__reset_sample_bit(pos, READ);
+ attr->mmap = 0;
+ attr->comm = 0;
+ attr->task = 0;
+
+ attr->sample_period = 1;
+
+ attr->watermark = 0;
+ attr->wakeup_events = 1000;
+
+ /* will enable all once we are ready */
+ attr->disabled = 1;
+ }
+
+ err = perf_evlist__open(evlist);
+ if (err < 0) {
+ printf("Couldn't create the events: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out;
+ }
+
+ if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) {
+ ui__error("Failed to mmap the events: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ perf_evlist__close(evlist);
+ goto out;
+ }
+
+ rc = 0;
+
+out:
+ return rc;
+}
+#endif
+
+static int read_events(struct perf_kvm_stat *kvm)
+{
+ int ret;
+
+ struct perf_tool eops = {
+ .sample = process_sample_event,
+ .comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
+ .ordered_events = true,
+ };
+ struct perf_data file = {
+ .file = {
+ .path = kvm->file_name,
+ },
+ .mode = PERF_DATA_MODE_READ,
+ .force = kvm->force,
+ };
+
+ kvm->tool = eops;
+ kvm->session = perf_session__new(&file, false, &kvm->tool);
+ if (!kvm->session) {
+ pr_err("Initializing perf session failed\n");
+ return -1;
+ }
+
+ symbol__init(&kvm->session->header.env);
+
+ if (!perf_session__has_traces(kvm->session, "kvm record")) {
+ ret = -EINVAL;
+ goto out_delete;
+ }
+
+ /*
+ * Do not use 'isa' recorded in kvm_exit tracepoint since it is not
+ * traced in the old kernel.
+ */
+ ret = cpu_isa_config(kvm);
+ if (ret < 0)
+ goto out_delete;
+
+ ret = perf_session__process_events(kvm->session);
+
+out_delete:
+ perf_session__delete(kvm->session);
+ return ret;
+}
+
+static int parse_target_str(struct perf_kvm_stat *kvm)
+{
+ if (kvm->opts.target.pid) {
+ kvm->pid_list = intlist__new(kvm->opts.target.pid);
+ if (kvm->pid_list == NULL) {
+ pr_err("Error parsing process id string\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm)
+{
+ int ret = -EINVAL;
+ int vcpu = kvm->trace_vcpu;
+
+ if (parse_target_str(kvm) != 0)
+ goto exit;
+
+ if (!verify_vcpu(vcpu))
+ goto exit;
+
+ if (!select_key(kvm))
+ goto exit;
+
+ if (!register_kvm_events_ops(kvm))
+ goto exit;
+
+ init_kvm_event_record(kvm);
+ setup_pager();
+
+ ret = read_events(kvm);
+ if (ret)
+ goto exit;
+
+ sort_result(kvm);
+ print_result(kvm);
+
+exit:
+ return ret;
+}
+
+#define STRDUP_FAIL_EXIT(s) \
+ ({ char *_p; \
+ _p = strdup(s); \
+ if (!_p) \
+ return -ENOMEM; \
+ _p; \
+ })
+
+int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused)
+{
+ return 0;
+}
+
+static int
+kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
+{
+ unsigned int rec_argc, i, j, events_tp_size;
+ const char **rec_argv;
+ const char * const record_args[] = {
+ "record",
+ "-R",
+ "-m", "1024",
+ "-c", "1",
+ };
+ const char * const kvm_stat_record_usage[] = {
+ "perf kvm stat record [<options>]",
+ NULL
+ };
+ const char * const *events_tp;
+ int ret;
+
+ events_tp_size = 0;
+ ret = setup_kvm_events_tp(kvm);
+ if (ret < 0) {
+ pr_err("Unable to setup the kvm tracepoints\n");
+ return ret;
+ }
+
+ for (events_tp = kvm_events_tp; *events_tp; events_tp++)
+ events_tp_size++;
+
+ rec_argc = ARRAY_SIZE(record_args) + argc + 2 +
+ 2 * events_tp_size;
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+ if (rec_argv == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(record_args); i++)
+ rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
+
+ for (j = 0; j < events_tp_size; j++) {
+ rec_argv[i++] = "-e";
+ rec_argv[i++] = STRDUP_FAIL_EXIT(kvm_events_tp[j]);
+ }
+
+ rec_argv[i++] = STRDUP_FAIL_EXIT("-o");
+ rec_argv[i++] = STRDUP_FAIL_EXIT(kvm->file_name);
+
+ for (j = 1; j < (unsigned int)argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ set_option_flag(record_options, 'e', "event", PARSE_OPT_HIDDEN);
+ set_option_flag(record_options, 0, "filter", PARSE_OPT_HIDDEN);
+ set_option_flag(record_options, 'R', "raw-samples", PARSE_OPT_HIDDEN);
+
+ set_option_flag(record_options, 'F', "freq", PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 0, "group", PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 'g', NULL, PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 0, "call-graph", PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 'd', "data", PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 'T', "timestamp", PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 'P', "period", PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 'n', "no-samples", PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 'N', "no-buildid-cache", PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 'B', "no-buildid", PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 'G', "cgroup", PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 'b', "branch-any", PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 'j', "branch-filter", PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 'W', "weight", PARSE_OPT_DISABLED);
+ set_option_flag(record_options, 0, "transaction", PARSE_OPT_DISABLED);
+
+ record_usage = kvm_stat_record_usage;
+ return cmd_record(i, rec_argv);
+}
+
+static int
+kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
+{
+ const struct option kvm_events_report_options[] = {
+ OPT_STRING(0, "event", &kvm->report_event, "report event",
+ "event for reporting: vmexit, "
+ "mmio (x86 only), ioport (x86 only)"),
+ OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
+ "vcpu id to report"),
+ OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
+ "key for sorting: sample(sort by samples number)"
+ " time (sort by avg time)"),
+ OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
+ "analyze events only for given process id(s)"),
+ OPT_BOOLEAN('f', "force", &kvm->force, "don't complain, do it"),
+ OPT_END()
+ };
+
+ const char * const kvm_events_report_usage[] = {
+ "perf kvm stat report [<options>]",
+ NULL
+ };
+
+ if (argc) {
+ argc = parse_options(argc, argv,
+ kvm_events_report_options,
+ kvm_events_report_usage, 0);
+ if (argc)
+ usage_with_options(kvm_events_report_usage,
+ kvm_events_report_options);
+ }
+
+ if (!kvm->opts.target.pid)
+ kvm->opts.target.system_wide = true;
+
+ return kvm_events_report_vcpu(kvm);
+}
+
+#ifdef HAVE_TIMERFD_SUPPORT
+static struct perf_evlist *kvm_live_event_list(void)
+{
+ struct perf_evlist *evlist;
+ char *tp, *name, *sys;
+ int err = -1;
+ const char * const *events_tp;
+
+ evlist = perf_evlist__new();
+ if (evlist == NULL)
+ return NULL;
+
+ for (events_tp = kvm_events_tp; *events_tp; events_tp++) {
+
+ tp = strdup(*events_tp);
+ if (tp == NULL)
+ goto out;
+
+ /* split tracepoint into subsystem and name */
+ sys = tp;
+ name = strchr(tp, ':');
+ if (name == NULL) {
+ pr_err("Error parsing %s tracepoint: subsystem delimiter not found\n",
+ *events_tp);
+ free(tp);
+ goto out;
+ }
+ *name = '\0';
+ name++;
+
+ if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
+ pr_err("Failed to add %s tracepoint to the list\n", *events_tp);
+ free(tp);
+ goto out;
+ }
+
+ free(tp);
+ }
+
+ err = 0;
+
+out:
+ if (err) {
+ perf_evlist__delete(evlist);
+ evlist = NULL;
+ }
+
+ return evlist;
+}
+
+static int kvm_events_live(struct perf_kvm_stat *kvm,
+ int argc, const char **argv)
+{
+ char errbuf[BUFSIZ];
+ int err;
+
+ const struct option live_options[] = {
+ OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
+ "record events on existing process id"),
+ OPT_CALLBACK('m', "mmap-pages", &kvm->opts.mmap_pages, "pages",
+ "number of mmap data pages",
+ perf_evlist__parse_mmap_pages),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show counter open errors, etc)"),
+ OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide,
+ "system-wide collection from all CPUs"),
+ OPT_UINTEGER('d', "display", &kvm->display_time,
+ "time in seconds between display updates"),
+ OPT_STRING(0, "event", &kvm->report_event, "report event",
+ "event for reporting: "
+ "vmexit, mmio (x86 only), ioport (x86 only)"),
+ OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
+ "vcpu id to report"),
+ OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
+ "key for sorting: sample(sort by samples number)"
+ " time (sort by avg time)"),
+ OPT_U64(0, "duration", &kvm->duration,
+ "show events other than"
+ " HLT (x86 only) or Wait state (s390 only)"
+ " that take longer than duration usecs"),
+ OPT_UINTEGER(0, "proc-map-timeout", &kvm->opts.proc_map_timeout,
+ "per thread proc mmap processing timeout in ms"),
+ OPT_END()
+ };
+ const char * const live_usage[] = {
+ "perf kvm stat live [<options>]",
+ NULL
+ };
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_WRITE,
+ };
+
+
+ /* event handling */
+ kvm->tool.sample = process_sample_event;
+ kvm->tool.comm = perf_event__process_comm;
+ kvm->tool.exit = perf_event__process_exit;
+ kvm->tool.fork = perf_event__process_fork;
+ kvm->tool.lost = process_lost_event;
+ kvm->tool.namespaces = perf_event__process_namespaces;
+ kvm->tool.ordered_events = true;
+ perf_tool__fill_defaults(&kvm->tool);
+
+ /* set defaults */
+ kvm->display_time = 1;
+ kvm->opts.user_interval = 1;
+ kvm->opts.mmap_pages = 512;
+ kvm->opts.target.uses_mmap = false;
+ kvm->opts.target.uid_str = NULL;
+ kvm->opts.target.uid = UINT_MAX;
+ kvm->opts.proc_map_timeout = 500;
+
+ symbol__init(NULL);
+ disable_buildid_cache();
+
+ use_browser = 0;
+
+ if (argc) {
+ argc = parse_options(argc, argv, live_options,
+ live_usage, 0);
+ if (argc)
+ usage_with_options(live_usage, live_options);
+ }
+
+ kvm->duration *= NSEC_PER_USEC; /* convert usec to nsec */
+
+ /*
+ * target related setups
+ */
+ err = target__validate(&kvm->opts.target);
+ if (err) {
+ target__strerror(&kvm->opts.target, err, errbuf, BUFSIZ);
+ ui__warning("%s", errbuf);
+ }
+
+ if (target__none(&kvm->opts.target))
+ kvm->opts.target.system_wide = true;
+
+
+ /*
+ * generate the event list
+ */
+ err = setup_kvm_events_tp(kvm);
+ if (err < 0) {
+ pr_err("Unable to setup the kvm tracepoints\n");
+ return err;
+ }
+
+ kvm->evlist = kvm_live_event_list();
+ if (kvm->evlist == NULL) {
+ err = -1;
+ goto out;
+ }
+
+ if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
+ usage_with_options(live_usage, live_options);
+
+ /*
+ * perf session
+ */
+ kvm->session = perf_session__new(&data, false, &kvm->tool);
+ if (kvm->session == NULL) {
+ err = -1;
+ goto out;
+ }
+ kvm->session->evlist = kvm->evlist;
+ perf_session__set_id_hdr_size(kvm->session);
+ ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
+ machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
+ kvm->evlist->threads, false,
+ kvm->opts.proc_map_timeout, 1);
+ err = kvm_live_open_events(kvm);
+ if (err)
+ goto out;
+
+ err = kvm_events_live_report(kvm);
+
+out:
+ perf_session__delete(kvm->session);
+ kvm->session = NULL;
+ perf_evlist__delete(kvm->evlist);
+
+ return err;
+}
+#endif
+
+static void print_kvm_stat_usage(void)
+{
+ printf("Usage: perf kvm stat <command>\n\n");
+
+ printf("# Available commands:\n");
+ printf("\trecord: record kvm events\n");
+ printf("\treport: report statistical data of kvm events\n");
+ printf("\tlive: live reporting of statistical data of kvm events\n");
+
+ printf("\nOtherwise, it is the alias of 'perf stat':\n");
+}
+
+static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
+{
+ struct perf_kvm_stat kvm = {
+ .file_name = file_name,
+
+ .trace_vcpu = -1,
+ .report_event = "vmexit",
+ .sort_key = "sample",
+
+ };
+
+ if (argc == 1) {
+ print_kvm_stat_usage();
+ goto perf_stat;
+ }
+
+ if (!strncmp(argv[1], "rec", 3))
+ return kvm_events_record(&kvm, argc - 1, argv + 1);
+
+ if (!strncmp(argv[1], "rep", 3))
+ return kvm_events_report(&kvm, argc - 1 , argv + 1);
+
+#ifdef HAVE_TIMERFD_SUPPORT
+ if (!strncmp(argv[1], "live", 4))
+ return kvm_events_live(&kvm, argc - 1 , argv + 1);
+#endif
+
+perf_stat:
+ return cmd_stat(argc, argv);
+}
+#endif /* HAVE_KVM_STAT_SUPPORT */
+
+static int __cmd_record(const char *file_name, int argc, const char **argv)
+{
+ int rec_argc, i = 0, j;
+ const char **rec_argv;
+
+ rec_argc = argc + 2;
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+ rec_argv[i++] = strdup("record");
+ rec_argv[i++] = strdup("-o");
+ rec_argv[i++] = strdup(file_name);
+ for (j = 1; j < argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ BUG_ON(i != rec_argc);
+
+ return cmd_record(i, rec_argv);
+}
+
+static int __cmd_report(const char *file_name, int argc, const char **argv)
+{
+ int rec_argc, i = 0, j;
+ const char **rec_argv;
+
+ rec_argc = argc + 2;
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+ rec_argv[i++] = strdup("report");
+ rec_argv[i++] = strdup("-i");
+ rec_argv[i++] = strdup(file_name);
+ for (j = 1; j < argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ BUG_ON(i != rec_argc);
+
+ return cmd_report(i, rec_argv);
+}
+
+static int
+__cmd_buildid_list(const char *file_name, int argc, const char **argv)
+{
+ int rec_argc, i = 0, j;
+ const char **rec_argv;
+
+ rec_argc = argc + 2;
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+ rec_argv[i++] = strdup("buildid-list");
+ rec_argv[i++] = strdup("-i");
+ rec_argv[i++] = strdup(file_name);
+ for (j = 1; j < argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ BUG_ON(i != rec_argc);
+
+ return cmd_buildid_list(i, rec_argv);
+}
+
+int cmd_kvm(int argc, const char **argv)
+{
+ const char *file_name = NULL;
+ const struct option kvm_options[] = {
+ OPT_STRING('i', "input", &file_name, "file",
+ "Input file name"),
+ OPT_STRING('o', "output", &file_name, "file",
+ "Output file name"),
+ OPT_BOOLEAN(0, "guest", &perf_guest,
+ "Collect guest os data"),
+ OPT_BOOLEAN(0, "host", &perf_host,
+ "Collect host os data"),
+ OPT_STRING(0, "guestmount", &symbol_conf.guestmount, "directory",
+ "guest mount directory under which every guest os"
+ " instance has a subdir"),
+ OPT_STRING(0, "guestvmlinux", &symbol_conf.default_guest_vmlinux_name,
+ "file", "file saving guest os vmlinux"),
+ OPT_STRING(0, "guestkallsyms", &symbol_conf.default_guest_kallsyms,
+ "file", "file saving guest os /proc/kallsyms"),
+ OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
+ "file", "file saving guest os /proc/modules"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show counter open errors, etc)"),
+ OPT_END()
+ };
+
+ const char *const kvm_subcommands[] = { "top", "record", "report", "diff",
+ "buildid-list", "stat", NULL };
+ const char *kvm_usage[] = { NULL, NULL };
+
+ perf_host = 0;
+ perf_guest = 1;
+
+ argc = parse_options_subcommand(argc, argv, kvm_options, kvm_subcommands, kvm_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (!argc)
+ usage_with_options(kvm_usage, kvm_options);
+
+ if (!perf_host)
+ perf_guest = 1;
+
+ if (!file_name) {
+ file_name = get_filename_for_perf_kvm();
+
+ if (!file_name) {
+ pr_err("Failed to allocate memory for filename\n");
+ return -ENOMEM;
+ }
+ }
+
+ if (!strncmp(argv[0], "rec", 3))
+ return __cmd_record(file_name, argc, argv);
+ else if (!strncmp(argv[0], "rep", 3))
+ return __cmd_report(file_name, argc, argv);
+ else if (!strncmp(argv[0], "diff", 4))
+ return cmd_diff(argc, argv);
+ else if (!strncmp(argv[0], "top", 3))
+ return cmd_top(argc, argv);
+ else if (!strncmp(argv[0], "buildid-list", 12))
+ return __cmd_buildid_list(file_name, argc, argv);
+#ifdef HAVE_KVM_STAT_SUPPORT
+ else if (!strncmp(argv[0], "stat", 4))
+ return kvm_cmd_stat(file_name, argc, argv);
+#endif
+ else
+ usage_with_options(kvm_usage, kvm_options);
+
+ return 0;
+}
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
new file mode 100644
index 000000000..ead221e49
--- /dev/null
+++ b/tools/perf/builtin-list.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-list.c
+ *
+ * Builtin list command: list all event types
+ *
+ * Copyright (C) 2009, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/parse-events.h"
+#include "util/cache.h"
+#include "util/pmu.h"
+#include "util/debug.h"
+#include "util/metricgroup.h"
+#include <subcmd/parse-options.h>
+
+static bool desc_flag = true;
+static bool details_flag;
+
+int cmd_list(int argc, const char **argv)
+{
+ int i;
+ bool raw_dump = false;
+ bool long_desc_flag = false;
+ struct option list_options[] = {
+ OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"),
+ OPT_BOOLEAN('d', "desc", &desc_flag,
+ "Print extra event descriptions. --no-desc to not print."),
+ OPT_BOOLEAN('v', "long-desc", &long_desc_flag,
+ "Print longer event descriptions."),
+ OPT_BOOLEAN(0, "details", &details_flag,
+ "Print information on the perf event names and expressions used internally by events."),
+ OPT_INCR(0, "debug", &verbose,
+ "Enable debugging output"),
+ OPT_END()
+ };
+ const char * const list_usage[] = {
+ "perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]",
+ NULL
+ };
+
+ set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN);
+
+ argc = parse_options(argc, argv, list_options, list_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ setup_pager();
+
+ if (!raw_dump && pager_in_use())
+ printf("\nList of pre-defined events (to be used in -e):\n\n");
+
+ if (argc == 0) {
+ print_events(NULL, raw_dump, !desc_flag, long_desc_flag,
+ details_flag);
+ return 0;
+ }
+
+ for (i = 0; i < argc; ++i) {
+ char *sep, *s;
+
+ if (strcmp(argv[i], "tracepoint") == 0)
+ print_tracepoint_events(NULL, NULL, raw_dump);
+ else if (strcmp(argv[i], "hw") == 0 ||
+ strcmp(argv[i], "hardware") == 0)
+ print_symbol_events(NULL, PERF_TYPE_HARDWARE,
+ event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
+ else if (strcmp(argv[i], "sw") == 0 ||
+ strcmp(argv[i], "software") == 0)
+ print_symbol_events(NULL, PERF_TYPE_SOFTWARE,
+ event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
+ else if (strcmp(argv[i], "cache") == 0 ||
+ strcmp(argv[i], "hwcache") == 0)
+ print_hwcache_events(NULL, raw_dump);
+ else if (strcmp(argv[i], "pmu") == 0)
+ print_pmu_events(NULL, raw_dump, !desc_flag,
+ long_desc_flag, details_flag);
+ else if (strcmp(argv[i], "sdt") == 0)
+ print_sdt_events(NULL, NULL, raw_dump);
+ else if (strcmp(argv[i], "metric") == 0)
+ metricgroup__print(true, false, NULL, raw_dump);
+ else if (strcmp(argv[i], "metricgroup") == 0)
+ metricgroup__print(false, true, NULL, raw_dump);
+ else if ((sep = strchr(argv[i], ':')) != NULL) {
+ int sep_idx;
+
+ if (sep == NULL) {
+ print_events(argv[i], raw_dump, !desc_flag,
+ long_desc_flag,
+ details_flag);
+ continue;
+ }
+ sep_idx = sep - argv[i];
+ s = strdup(argv[i]);
+ if (s == NULL)
+ return -1;
+
+ s[sep_idx] = '\0';
+ print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
+ print_sdt_events(s, s + sep_idx + 1, raw_dump);
+ metricgroup__print(true, true, s, raw_dump);
+ free(s);
+ } else {
+ if (asprintf(&s, "*%s*", argv[i]) < 0) {
+ printf("Critical: Not enough memory! Trying to continue...\n");
+ continue;
+ }
+ print_symbol_events(s, PERF_TYPE_HARDWARE,
+ event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
+ print_symbol_events(s, PERF_TYPE_SOFTWARE,
+ event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
+ print_hwcache_events(s, raw_dump);
+ print_pmu_events(s, raw_dump, !desc_flag,
+ long_desc_flag,
+ details_flag);
+ print_tracepoint_events(NULL, s, raw_dump);
+ print_sdt_events(NULL, s, raw_dump);
+ metricgroup__print(true, true, NULL, raw_dump);
+ free(s);
+ }
+ }
+ return 0;
+}
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
new file mode 100644
index 000000000..0cb7f7b73
--- /dev/null
+++ b/tools/perf/builtin-lock.c
@@ -0,0 +1,1034 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/util.h"
+#include "util/cache.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+
+#include <subcmd/parse-options.h>
+#include "util/trace-event.h"
+
+#include "util/debug.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/data.h"
+
+#include <sys/types.h>
+#include <sys/prctl.h>
+#include <semaphore.h>
+#include <pthread.h>
+#include <math.h>
+#include <limits.h>
+
+#include <linux/list.h>
+#include <linux/hash.h>
+#include <linux/kernel.h>
+
+static struct perf_session *session;
+
+/* based on kernel/lockdep.c */
+#define LOCKHASH_BITS 12
+#define LOCKHASH_SIZE (1UL << LOCKHASH_BITS)
+
+static struct list_head lockhash_table[LOCKHASH_SIZE];
+
+#define __lockhashfn(key) hash_long((unsigned long)key, LOCKHASH_BITS)
+#define lockhashentry(key) (lockhash_table + __lockhashfn((key)))
+
+struct lock_stat {
+ struct list_head hash_entry;
+ struct rb_node rb; /* used for sorting */
+
+ /*
+ * FIXME: perf_evsel__intval() returns u64,
+ * so address of lockdep_map should be dealed as 64bit.
+ * Is there more better solution?
+ */
+ void *addr; /* address of lockdep_map, used as ID */
+ char *name; /* for strcpy(), we cannot use const */
+
+ unsigned int nr_acquire;
+ unsigned int nr_acquired;
+ unsigned int nr_contended;
+ unsigned int nr_release;
+
+ unsigned int nr_readlock;
+ unsigned int nr_trylock;
+
+ /* these times are in nano sec. */
+ u64 avg_wait_time;
+ u64 wait_time_total;
+ u64 wait_time_min;
+ u64 wait_time_max;
+
+ int discard; /* flag of blacklist */
+};
+
+/*
+ * States of lock_seq_stat
+ *
+ * UNINITIALIZED is required for detecting first event of acquire.
+ * As the nature of lock events, there is no guarantee
+ * that the first event for the locks are acquire,
+ * it can be acquired, contended or release.
+ */
+#define SEQ_STATE_UNINITIALIZED 0 /* initial state */
+#define SEQ_STATE_RELEASED 1
+#define SEQ_STATE_ACQUIRING 2
+#define SEQ_STATE_ACQUIRED 3
+#define SEQ_STATE_READ_ACQUIRED 4
+#define SEQ_STATE_CONTENDED 5
+
+/*
+ * MAX_LOCK_DEPTH
+ * Imported from include/linux/sched.h.
+ * Should this be synchronized?
+ */
+#define MAX_LOCK_DEPTH 48
+
+/*
+ * struct lock_seq_stat:
+ * Place to put on state of one lock sequence
+ * 1) acquire -> acquired -> release
+ * 2) acquire -> contended -> acquired -> release
+ * 3) acquire (with read or try) -> release
+ * 4) Are there other patterns?
+ */
+struct lock_seq_stat {
+ struct list_head list;
+ int state;
+ u64 prev_event_time;
+ void *addr;
+
+ int read_count;
+};
+
+struct thread_stat {
+ struct rb_node rb;
+
+ u32 tid;
+ struct list_head seq_list;
+};
+
+static struct rb_root thread_stats;
+
+static struct thread_stat *thread_stat_find(u32 tid)
+{
+ struct rb_node *node;
+ struct thread_stat *st;
+
+ node = thread_stats.rb_node;
+ while (node) {
+ st = container_of(node, struct thread_stat, rb);
+ if (st->tid == tid)
+ return st;
+ else if (tid < st->tid)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
+ }
+
+ return NULL;
+}
+
+static void thread_stat_insert(struct thread_stat *new)
+{
+ struct rb_node **rb = &thread_stats.rb_node;
+ struct rb_node *parent = NULL;
+ struct thread_stat *p;
+
+ while (*rb) {
+ p = container_of(*rb, struct thread_stat, rb);
+ parent = *rb;
+
+ if (new->tid < p->tid)
+ rb = &(*rb)->rb_left;
+ else if (new->tid > p->tid)
+ rb = &(*rb)->rb_right;
+ else
+ BUG_ON("inserting invalid thread_stat\n");
+ }
+
+ rb_link_node(&new->rb, parent, rb);
+ rb_insert_color(&new->rb, &thread_stats);
+}
+
+static struct thread_stat *thread_stat_findnew_after_first(u32 tid)
+{
+ struct thread_stat *st;
+
+ st = thread_stat_find(tid);
+ if (st)
+ return st;
+
+ st = zalloc(sizeof(struct thread_stat));
+ if (!st) {
+ pr_err("memory allocation failed\n");
+ return NULL;
+ }
+
+ st->tid = tid;
+ INIT_LIST_HEAD(&st->seq_list);
+
+ thread_stat_insert(st);
+
+ return st;
+}
+
+static struct thread_stat *thread_stat_findnew_first(u32 tid);
+static struct thread_stat *(*thread_stat_findnew)(u32 tid) =
+ thread_stat_findnew_first;
+
+static struct thread_stat *thread_stat_findnew_first(u32 tid)
+{
+ struct thread_stat *st;
+
+ st = zalloc(sizeof(struct thread_stat));
+ if (!st) {
+ pr_err("memory allocation failed\n");
+ return NULL;
+ }
+ st->tid = tid;
+ INIT_LIST_HEAD(&st->seq_list);
+
+ rb_link_node(&st->rb, NULL, &thread_stats.rb_node);
+ rb_insert_color(&st->rb, &thread_stats);
+
+ thread_stat_findnew = thread_stat_findnew_after_first;
+ return st;
+}
+
+/* build simple key function one is bigger than two */
+#define SINGLE_KEY(member) \
+ static int lock_stat_key_ ## member(struct lock_stat *one, \
+ struct lock_stat *two) \
+ { \
+ return one->member > two->member; \
+ }
+
+SINGLE_KEY(nr_acquired)
+SINGLE_KEY(nr_contended)
+SINGLE_KEY(avg_wait_time)
+SINGLE_KEY(wait_time_total)
+SINGLE_KEY(wait_time_max)
+
+static int lock_stat_key_wait_time_min(struct lock_stat *one,
+ struct lock_stat *two)
+{
+ u64 s1 = one->wait_time_min;
+ u64 s2 = two->wait_time_min;
+ if (s1 == ULLONG_MAX)
+ s1 = 0;
+ if (s2 == ULLONG_MAX)
+ s2 = 0;
+ return s1 > s2;
+}
+
+struct lock_key {
+ /*
+ * name: the value for specify by user
+ * this should be simpler than raw name of member
+ * e.g. nr_acquired -> acquired, wait_time_total -> wait_total
+ */
+ const char *name;
+ int (*key)(struct lock_stat*, struct lock_stat*);
+};
+
+static const char *sort_key = "acquired";
+
+static int (*compare)(struct lock_stat *, struct lock_stat *);
+
+static struct rb_root result; /* place to store sorted data */
+
+#define DEF_KEY_LOCK(name, fn_suffix) \
+ { #name, lock_stat_key_ ## fn_suffix }
+struct lock_key keys[] = {
+ DEF_KEY_LOCK(acquired, nr_acquired),
+ DEF_KEY_LOCK(contended, nr_contended),
+ DEF_KEY_LOCK(avg_wait, avg_wait_time),
+ DEF_KEY_LOCK(wait_total, wait_time_total),
+ DEF_KEY_LOCK(wait_min, wait_time_min),
+ DEF_KEY_LOCK(wait_max, wait_time_max),
+
+ /* extra comparisons much complicated should be here */
+
+ { NULL, NULL }
+};
+
+static int select_key(void)
+{
+ int i;
+
+ for (i = 0; keys[i].name; i++) {
+ if (!strcmp(keys[i].name, sort_key)) {
+ compare = keys[i].key;
+ return 0;
+ }
+ }
+
+ pr_err("Unknown compare key: %s\n", sort_key);
+
+ return -1;
+}
+
+static void insert_to_result(struct lock_stat *st,
+ int (*bigger)(struct lock_stat *, struct lock_stat *))
+{
+ struct rb_node **rb = &result.rb_node;
+ struct rb_node *parent = NULL;
+ struct lock_stat *p;
+
+ while (*rb) {
+ p = container_of(*rb, struct lock_stat, rb);
+ parent = *rb;
+
+ if (bigger(st, p))
+ rb = &(*rb)->rb_left;
+ else
+ rb = &(*rb)->rb_right;
+ }
+
+ rb_link_node(&st->rb, parent, rb);
+ rb_insert_color(&st->rb, &result);
+}
+
+/* returns left most element of result, and erase it */
+static struct lock_stat *pop_from_result(void)
+{
+ struct rb_node *node = result.rb_node;
+
+ if (!node)
+ return NULL;
+
+ while (node->rb_left)
+ node = node->rb_left;
+
+ rb_erase(node, &result);
+ return container_of(node, struct lock_stat, rb);
+}
+
+static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
+{
+ struct list_head *entry = lockhashentry(addr);
+ struct lock_stat *ret, *new;
+
+ list_for_each_entry(ret, entry, hash_entry) {
+ if (ret->addr == addr)
+ return ret;
+ }
+
+ new = zalloc(sizeof(struct lock_stat));
+ if (!new)
+ goto alloc_failed;
+
+ new->addr = addr;
+ new->name = zalloc(sizeof(char) * strlen(name) + 1);
+ if (!new->name) {
+ free(new);
+ goto alloc_failed;
+ }
+
+ strcpy(new->name, name);
+ new->wait_time_min = ULLONG_MAX;
+
+ list_add(&new->hash_entry, entry);
+ return new;
+
+alloc_failed:
+ pr_err("memory allocation failed\n");
+ return NULL;
+}
+
+struct trace_lock_handler {
+ int (*acquire_event)(struct perf_evsel *evsel,
+ struct perf_sample *sample);
+
+ int (*acquired_event)(struct perf_evsel *evsel,
+ struct perf_sample *sample);
+
+ int (*contended_event)(struct perf_evsel *evsel,
+ struct perf_sample *sample);
+
+ int (*release_event)(struct perf_evsel *evsel,
+ struct perf_sample *sample);
+};
+
+static struct lock_seq_stat *get_seq(struct thread_stat *ts, void *addr)
+{
+ struct lock_seq_stat *seq;
+
+ list_for_each_entry(seq, &ts->seq_list, list) {
+ if (seq->addr == addr)
+ return seq;
+ }
+
+ seq = zalloc(sizeof(struct lock_seq_stat));
+ if (!seq) {
+ pr_err("memory allocation failed\n");
+ return NULL;
+ }
+ seq->state = SEQ_STATE_UNINITIALIZED;
+ seq->addr = addr;
+
+ list_add(&seq->list, &ts->seq_list);
+ return seq;
+}
+
+enum broken_state {
+ BROKEN_ACQUIRE,
+ BROKEN_ACQUIRED,
+ BROKEN_CONTENDED,
+ BROKEN_RELEASE,
+ BROKEN_MAX,
+};
+
+static int bad_hist[BROKEN_MAX];
+
+enum acquire_flags {
+ TRY_LOCK = 1,
+ READ_LOCK = 2,
+};
+
+static int report_lock_acquire_event(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ void *addr;
+ struct lock_stat *ls;
+ struct thread_stat *ts;
+ struct lock_seq_stat *seq;
+ const char *name = perf_evsel__strval(evsel, sample, "name");
+ u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+ int flag = perf_evsel__intval(evsel, sample, "flag");
+
+ memcpy(&addr, &tmp, sizeof(void *));
+
+ ls = lock_stat_findnew(addr, name);
+ if (!ls)
+ return -ENOMEM;
+ if (ls->discard)
+ return 0;
+
+ ts = thread_stat_findnew(sample->tid);
+ if (!ts)
+ return -ENOMEM;
+
+ seq = get_seq(ts, addr);
+ if (!seq)
+ return -ENOMEM;
+
+ switch (seq->state) {
+ case SEQ_STATE_UNINITIALIZED:
+ case SEQ_STATE_RELEASED:
+ if (!flag) {
+ seq->state = SEQ_STATE_ACQUIRING;
+ } else {
+ if (flag & TRY_LOCK)
+ ls->nr_trylock++;
+ if (flag & READ_LOCK)
+ ls->nr_readlock++;
+ seq->state = SEQ_STATE_READ_ACQUIRED;
+ seq->read_count = 1;
+ ls->nr_acquired++;
+ }
+ break;
+ case SEQ_STATE_READ_ACQUIRED:
+ if (flag & READ_LOCK) {
+ seq->read_count++;
+ ls->nr_acquired++;
+ goto end;
+ } else {
+ goto broken;
+ }
+ break;
+ case SEQ_STATE_ACQUIRED:
+ case SEQ_STATE_ACQUIRING:
+ case SEQ_STATE_CONTENDED:
+broken:
+ /* broken lock sequence, discard it */
+ ls->discard = 1;
+ bad_hist[BROKEN_ACQUIRE]++;
+ list_del(&seq->list);
+ free(seq);
+ goto end;
+ default:
+ BUG_ON("Unknown state of lock sequence found!\n");
+ break;
+ }
+
+ ls->nr_acquire++;
+ seq->prev_event_time = sample->time;
+end:
+ return 0;
+}
+
+static int report_lock_acquired_event(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ void *addr;
+ struct lock_stat *ls;
+ struct thread_stat *ts;
+ struct lock_seq_stat *seq;
+ u64 contended_term;
+ const char *name = perf_evsel__strval(evsel, sample, "name");
+ u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+
+ memcpy(&addr, &tmp, sizeof(void *));
+
+ ls = lock_stat_findnew(addr, name);
+ if (!ls)
+ return -ENOMEM;
+ if (ls->discard)
+ return 0;
+
+ ts = thread_stat_findnew(sample->tid);
+ if (!ts)
+ return -ENOMEM;
+
+ seq = get_seq(ts, addr);
+ if (!seq)
+ return -ENOMEM;
+
+ switch (seq->state) {
+ case SEQ_STATE_UNINITIALIZED:
+ /* orphan event, do nothing */
+ return 0;
+ case SEQ_STATE_ACQUIRING:
+ break;
+ case SEQ_STATE_CONTENDED:
+ contended_term = sample->time - seq->prev_event_time;
+ ls->wait_time_total += contended_term;
+ if (contended_term < ls->wait_time_min)
+ ls->wait_time_min = contended_term;
+ if (ls->wait_time_max < contended_term)
+ ls->wait_time_max = contended_term;
+ break;
+ case SEQ_STATE_RELEASED:
+ case SEQ_STATE_ACQUIRED:
+ case SEQ_STATE_READ_ACQUIRED:
+ /* broken lock sequence, discard it */
+ ls->discard = 1;
+ bad_hist[BROKEN_ACQUIRED]++;
+ list_del(&seq->list);
+ free(seq);
+ goto end;
+ default:
+ BUG_ON("Unknown state of lock sequence found!\n");
+ break;
+ }
+
+ seq->state = SEQ_STATE_ACQUIRED;
+ ls->nr_acquired++;
+ ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
+ seq->prev_event_time = sample->time;
+end:
+ return 0;
+}
+
+static int report_lock_contended_event(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ void *addr;
+ struct lock_stat *ls;
+ struct thread_stat *ts;
+ struct lock_seq_stat *seq;
+ const char *name = perf_evsel__strval(evsel, sample, "name");
+ u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+
+ memcpy(&addr, &tmp, sizeof(void *));
+
+ ls = lock_stat_findnew(addr, name);
+ if (!ls)
+ return -ENOMEM;
+ if (ls->discard)
+ return 0;
+
+ ts = thread_stat_findnew(sample->tid);
+ if (!ts)
+ return -ENOMEM;
+
+ seq = get_seq(ts, addr);
+ if (!seq)
+ return -ENOMEM;
+
+ switch (seq->state) {
+ case SEQ_STATE_UNINITIALIZED:
+ /* orphan event, do nothing */
+ return 0;
+ case SEQ_STATE_ACQUIRING:
+ break;
+ case SEQ_STATE_RELEASED:
+ case SEQ_STATE_ACQUIRED:
+ case SEQ_STATE_READ_ACQUIRED:
+ case SEQ_STATE_CONTENDED:
+ /* broken lock sequence, discard it */
+ ls->discard = 1;
+ bad_hist[BROKEN_CONTENDED]++;
+ list_del(&seq->list);
+ free(seq);
+ goto end;
+ default:
+ BUG_ON("Unknown state of lock sequence found!\n");
+ break;
+ }
+
+ seq->state = SEQ_STATE_CONTENDED;
+ ls->nr_contended++;
+ ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
+ seq->prev_event_time = sample->time;
+end:
+ return 0;
+}
+
+static int report_lock_release_event(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ void *addr;
+ struct lock_stat *ls;
+ struct thread_stat *ts;
+ struct lock_seq_stat *seq;
+ const char *name = perf_evsel__strval(evsel, sample, "name");
+ u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+
+ memcpy(&addr, &tmp, sizeof(void *));
+
+ ls = lock_stat_findnew(addr, name);
+ if (!ls)
+ return -ENOMEM;
+ if (ls->discard)
+ return 0;
+
+ ts = thread_stat_findnew(sample->tid);
+ if (!ts)
+ return -ENOMEM;
+
+ seq = get_seq(ts, addr);
+ if (!seq)
+ return -ENOMEM;
+
+ switch (seq->state) {
+ case SEQ_STATE_UNINITIALIZED:
+ goto end;
+ case SEQ_STATE_ACQUIRED:
+ break;
+ case SEQ_STATE_READ_ACQUIRED:
+ seq->read_count--;
+ BUG_ON(seq->read_count < 0);
+ if (seq->read_count) {
+ ls->nr_release++;
+ goto end;
+ }
+ break;
+ case SEQ_STATE_ACQUIRING:
+ case SEQ_STATE_CONTENDED:
+ case SEQ_STATE_RELEASED:
+ /* broken lock sequence, discard it */
+ ls->discard = 1;
+ bad_hist[BROKEN_RELEASE]++;
+ goto free_seq;
+ default:
+ BUG_ON("Unknown state of lock sequence found!\n");
+ break;
+ }
+
+ ls->nr_release++;
+free_seq:
+ list_del(&seq->list);
+ free(seq);
+end:
+ return 0;
+}
+
+/* lock oriented handlers */
+/* TODO: handlers for CPU oriented, thread oriented */
+static struct trace_lock_handler report_lock_ops = {
+ .acquire_event = report_lock_acquire_event,
+ .acquired_event = report_lock_acquired_event,
+ .contended_event = report_lock_contended_event,
+ .release_event = report_lock_release_event,
+};
+
+static struct trace_lock_handler *trace_handler;
+
+static int perf_evsel__process_lock_acquire(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ if (trace_handler->acquire_event)
+ return trace_handler->acquire_event(evsel, sample);
+ return 0;
+}
+
+static int perf_evsel__process_lock_acquired(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ if (trace_handler->acquired_event)
+ return trace_handler->acquired_event(evsel, sample);
+ return 0;
+}
+
+static int perf_evsel__process_lock_contended(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ if (trace_handler->contended_event)
+ return trace_handler->contended_event(evsel, sample);
+ return 0;
+}
+
+static int perf_evsel__process_lock_release(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ if (trace_handler->release_event)
+ return trace_handler->release_event(evsel, sample);
+ return 0;
+}
+
+static void print_bad_events(int bad, int total)
+{
+ /* Output for debug, this have to be removed */
+ int i;
+ const char *name[4] =
+ { "acquire", "acquired", "contended", "release" };
+
+ pr_info("\n=== output for debug===\n\n");
+ pr_info("bad: %d, total: %d\n", bad, total);
+ pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100);
+ pr_info("histogram of events caused bad sequence\n");
+ for (i = 0; i < BROKEN_MAX; i++)
+ pr_info(" %10s: %d\n", name[i], bad_hist[i]);
+}
+
+/* TODO: various way to print, coloring, nano or milli sec */
+static void print_result(void)
+{
+ struct lock_stat *st;
+ char cut_name[20];
+ int bad, total;
+
+ pr_info("%20s ", "Name");
+ pr_info("%10s ", "acquired");
+ pr_info("%10s ", "contended");
+
+ pr_info("%15s ", "avg wait (ns)");
+ pr_info("%15s ", "total wait (ns)");
+ pr_info("%15s ", "max wait (ns)");
+ pr_info("%15s ", "min wait (ns)");
+
+ pr_info("\n\n");
+
+ bad = total = 0;
+ while ((st = pop_from_result())) {
+ total++;
+ if (st->discard) {
+ bad++;
+ continue;
+ }
+ bzero(cut_name, 20);
+
+ if (strlen(st->name) < 16) {
+ /* output raw name */
+ pr_info("%20s ", st->name);
+ } else {
+ strncpy(cut_name, st->name, 16);
+ cut_name[16] = '.';
+ cut_name[17] = '.';
+ cut_name[18] = '.';
+ cut_name[19] = '\0';
+ /* cut off name for saving output style */
+ pr_info("%20s ", cut_name);
+ }
+
+ pr_info("%10u ", st->nr_acquired);
+ pr_info("%10u ", st->nr_contended);
+
+ pr_info("%15" PRIu64 " ", st->avg_wait_time);
+ pr_info("%15" PRIu64 " ", st->wait_time_total);
+ pr_info("%15" PRIu64 " ", st->wait_time_max);
+ pr_info("%15" PRIu64 " ", st->wait_time_min == ULLONG_MAX ?
+ 0 : st->wait_time_min);
+ pr_info("\n");
+ }
+
+ print_bad_events(bad, total);
+}
+
+static bool info_threads, info_map;
+
+static void dump_threads(void)
+{
+ struct thread_stat *st;
+ struct rb_node *node;
+ struct thread *t;
+
+ pr_info("%10s: comm\n", "Thread ID");
+
+ node = rb_first(&thread_stats);
+ while (node) {
+ st = container_of(node, struct thread_stat, rb);
+ t = perf_session__findnew(session, st->tid);
+ pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
+ node = rb_next(node);
+ thread__put(t);
+ };
+}
+
+static void dump_map(void)
+{
+ unsigned int i;
+ struct lock_stat *st;
+
+ pr_info("Address of instance: name of class\n");
+ for (i = 0; i < LOCKHASH_SIZE; i++) {
+ list_for_each_entry(st, &lockhash_table[i], hash_entry) {
+ pr_info(" %p: %s\n", st->addr, st->name);
+ }
+ }
+}
+
+static int dump_info(void)
+{
+ int rc = 0;
+
+ if (info_threads)
+ dump_threads();
+ else if (info_map)
+ dump_map();
+ else {
+ rc = -1;
+ pr_err("Unknown type of information\n");
+ }
+
+ return rc;
+}
+
+typedef int (*tracepoint_handler)(struct perf_evsel *evsel,
+ struct perf_sample *sample);
+
+static int process_sample_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ int err = 0;
+ struct thread *thread = machine__findnew_thread(machine, sample->pid,
+ sample->tid);
+
+ if (thread == NULL) {
+ pr_debug("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ if (evsel->handler != NULL) {
+ tracepoint_handler f = evsel->handler;
+ err = f(evsel, sample);
+ }
+
+ thread__put(thread);
+
+ return err;
+}
+
+static void sort_result(void)
+{
+ unsigned int i;
+ struct lock_stat *st;
+
+ for (i = 0; i < LOCKHASH_SIZE; i++) {
+ list_for_each_entry(st, &lockhash_table[i], hash_entry) {
+ insert_to_result(st, compare);
+ }
+ }
+}
+
+static const struct perf_evsel_str_handler lock_tracepoints[] = {
+ { "lock:lock_acquire", perf_evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
+ { "lock:lock_acquired", perf_evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+ { "lock:lock_contended", perf_evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+ { "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
+};
+
+static bool force;
+
+static int __cmd_report(bool display_info)
+{
+ int err = -EINVAL;
+ struct perf_tool eops = {
+ .sample = process_sample_event,
+ .comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
+ .ordered_events = true,
+ };
+ struct perf_data data = {
+ .file = {
+ .path = input_name,
+ },
+ .mode = PERF_DATA_MODE_READ,
+ .force = force,
+ };
+
+ session = perf_session__new(&data, false, &eops);
+ if (!session) {
+ pr_err("Initializing perf session failed\n");
+ return -1;
+ }
+
+ symbol__init(&session->header.env);
+
+ if (!perf_session__has_traces(session, "lock record"))
+ goto out_delete;
+
+ if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
+ pr_err("Initializing perf session tracepoint handlers failed\n");
+ goto out_delete;
+ }
+
+ if (select_key())
+ goto out_delete;
+
+ err = perf_session__process_events(session);
+ if (err)
+ goto out_delete;
+
+ setup_pager();
+ if (display_info) /* used for info subcommand */
+ err = dump_info();
+ else {
+ sort_result();
+ print_result();
+ }
+
+out_delete:
+ perf_session__delete(session);
+ return err;
+}
+
+static int __cmd_record(int argc, const char **argv)
+{
+ const char *record_args[] = {
+ "record", "-R", "-m", "1024", "-c", "1",
+ };
+ unsigned int rec_argc, i, j, ret;
+ const char **rec_argv;
+
+ for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
+ if (!is_valid_tracepoint(lock_tracepoints[i].name)) {
+ pr_err("tracepoint %s is not enabled. "
+ "Are CONFIG_LOCKDEP and CONFIG_LOCK_STAT enabled?\n",
+ lock_tracepoints[i].name);
+ return 1;
+ }
+ }
+
+ rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+ /* factor of 2 is for -e in front of each tracepoint */
+ rec_argc += 2 * ARRAY_SIZE(lock_tracepoints);
+
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+ if (!rec_argv)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(record_args); i++)
+ rec_argv[i] = strdup(record_args[i]);
+
+ for (j = 0; j < ARRAY_SIZE(lock_tracepoints); j++) {
+ rec_argv[i++] = "-e";
+ rec_argv[i++] = strdup(lock_tracepoints[j].name);
+ }
+
+ for (j = 1; j < (unsigned int)argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ BUG_ON(i != rec_argc);
+
+ ret = cmd_record(i, rec_argv);
+ free(rec_argv);
+ return ret;
+}
+
+int cmd_lock(int argc, const char **argv)
+{
+ const struct option lock_options[] = {
+ OPT_STRING('i', "input", &input_name, "file", "input file name"),
+ OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"),
+ OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"),
+ OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+ OPT_END()
+ };
+
+ const struct option info_options[] = {
+ OPT_BOOLEAN('t', "threads", &info_threads,
+ "dump thread list in perf.data"),
+ OPT_BOOLEAN('m', "map", &info_map,
+ "map of lock instances (address:name table)"),
+ OPT_PARENT(lock_options)
+ };
+
+ const struct option report_options[] = {
+ OPT_STRING('k', "key", &sort_key, "acquired",
+ "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
+ /* TODO: type */
+ OPT_PARENT(lock_options)
+ };
+
+ const char * const info_usage[] = {
+ "perf lock info [<options>]",
+ NULL
+ };
+ const char *const lock_subcommands[] = { "record", "report", "script",
+ "info", NULL };
+ const char *lock_usage[] = {
+ NULL,
+ NULL
+ };
+ const char * const report_usage[] = {
+ "perf lock report [<options>]",
+ NULL
+ };
+ unsigned int i;
+ int rc = 0;
+
+ for (i = 0; i < LOCKHASH_SIZE; i++)
+ INIT_LIST_HEAD(lockhash_table + i);
+
+ argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
+ lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+ if (!argc)
+ usage_with_options(lock_usage, lock_options);
+
+ if (!strncmp(argv[0], "rec", 3)) {
+ return __cmd_record(argc, argv);
+ } else if (!strncmp(argv[0], "report", 6)) {
+ trace_handler = &report_lock_ops;
+ if (argc) {
+ argc = parse_options(argc, argv,
+ report_options, report_usage, 0);
+ if (argc)
+ usage_with_options(report_usage, report_options);
+ }
+ rc = __cmd_report(false);
+ } else if (!strcmp(argv[0], "script")) {
+ /* Aliased to 'perf script' */
+ return cmd_script(argc, argv);
+ } else if (!strcmp(argv[0], "info")) {
+ if (argc) {
+ argc = parse_options(argc, argv,
+ info_options, info_usage, 0);
+ if (argc)
+ usage_with_options(info_usage, info_options);
+ }
+ /* recycling report_lock_ops */
+ trace_handler = &report_lock_ops;
+ rc = __cmd_report(true);
+ } else {
+ usage_with_options(lock_usage, lock_options);
+ }
+
+ return rc;
+}
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
new file mode 100644
index 000000000..57393e94d
--- /dev/null
+++ b/tools/perf/builtin-mem.c
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "builtin.h"
+#include "perf.h"
+
+#include <subcmd/parse-options.h>
+#include "util/trace-event.h"
+#include "util/tool.h"
+#include "util/session.h"
+#include "util/data.h"
+#include "util/mem-events.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+
+#define MEM_OPERATION_LOAD 0x1
+#define MEM_OPERATION_STORE 0x2
+
+struct perf_mem {
+ struct perf_tool tool;
+ char const *input_name;
+ bool hide_unresolved;
+ bool dump_raw;
+ bool force;
+ bool phys_addr;
+ int operation;
+ const char *cpu_list;
+ DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+};
+
+static int parse_record_events(const struct option *opt,
+ const char *str, int unset __maybe_unused)
+{
+ struct perf_mem *mem = *(struct perf_mem **)opt->value;
+ int j;
+
+ if (strcmp(str, "list")) {
+ if (!perf_mem_events__parse(str)) {
+ mem->operation = 0;
+ return 0;
+ }
+ exit(-1);
+ }
+
+ for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ struct perf_mem_event *e = &perf_mem_events[j];
+
+ fprintf(stderr, "%-13s%-*s%s\n",
+ e->tag,
+ verbose > 0 ? 25 : 0,
+ verbose > 0 ? perf_mem_events__name(j) : "",
+ e->supported ? ": available" : "");
+ }
+ exit(0);
+}
+
+static const char * const __usage[] = {
+ "perf mem record [<options>] [<command>]",
+ "perf mem record [<options>] -- <command> [<options>]",
+ NULL
+};
+
+static const char * const *record_mem_usage = __usage;
+
+static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
+{
+ int rec_argc, i = 0, j;
+ const char **rec_argv;
+ int ret;
+ bool all_user = false, all_kernel = false;
+ struct option options[] = {
+ OPT_CALLBACK('e', "event", &mem, "event",
+ "event selector. use 'perf mem record -e list' to list available events",
+ parse_record_events),
+ OPT_UINTEGER(0, "ldlat", &perf_mem_events__loads_ldlat, "mem-loads latency"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show counter open errors, etc)"),
+ OPT_BOOLEAN('U', "all-user", &all_user, "collect only user level data"),
+ OPT_BOOLEAN('K', "all-kernel", &all_kernel, "collect only kernel level data"),
+ OPT_END()
+ };
+
+ argc = parse_options(argc, argv, options, record_mem_usage,
+ PARSE_OPT_KEEP_UNKNOWN);
+
+ rec_argc = argc + 9; /* max number of arguments */
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+ if (!rec_argv)
+ return -1;
+
+ rec_argv[i++] = "record";
+
+ if (mem->operation & MEM_OPERATION_LOAD)
+ perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true;
+
+ if (mem->operation & MEM_OPERATION_STORE)
+ perf_mem_events[PERF_MEM_EVENTS__STORE].record = true;
+
+ if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record)
+ rec_argv[i++] = "-W";
+
+ rec_argv[i++] = "-d";
+
+ if (mem->phys_addr)
+ rec_argv[i++] = "--phys-data";
+
+ for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ if (!perf_mem_events[j].record)
+ continue;
+
+ if (!perf_mem_events[j].supported) {
+ pr_err("failed: event '%s' not supported\n",
+ perf_mem_events__name(j));
+ free(rec_argv);
+ return -1;
+ }
+
+ rec_argv[i++] = "-e";
+ rec_argv[i++] = perf_mem_events__name(j);
+ };
+
+ if (all_user)
+ rec_argv[i++] = "--all-user";
+
+ if (all_kernel)
+ rec_argv[i++] = "--all-kernel";
+
+ for (j = 0; j < argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ if (verbose > 0) {
+ pr_debug("calling: record ");
+
+ while (rec_argv[j]) {
+ pr_debug("%s ", rec_argv[j]);
+ j++;
+ }
+ pr_debug("\n");
+ }
+
+ ret = cmd_record(i, rec_argv);
+ free(rec_argv);
+ return ret;
+}
+
+static int
+dump_raw_samples(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_mem *mem = container_of(tool, struct perf_mem, tool);
+ struct addr_location al;
+ const char *fmt;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ fprintf(stderr, "problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ if (al.filtered || (mem->hide_unresolved && al.sym == NULL))
+ goto out_put;
+
+ if (al.map != NULL)
+ al.map->dso->hit = 1;
+
+ if (mem->phys_addr) {
+ if (symbol_conf.field_sep) {
+ fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s0x%016"PRIx64
+ "%s%"PRIu64"%s0x%"PRIx64"%s%s:%s\n";
+ } else {
+ fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
+ "%s0x%016"PRIx64"%s%5"PRIu64"%s0x%06"PRIx64
+ "%s%s:%s\n";
+ symbol_conf.field_sep = " ";
+ }
+
+ printf(fmt,
+ sample->pid,
+ symbol_conf.field_sep,
+ sample->tid,
+ symbol_conf.field_sep,
+ sample->ip,
+ symbol_conf.field_sep,
+ sample->addr,
+ symbol_conf.field_sep,
+ sample->phys_addr,
+ symbol_conf.field_sep,
+ sample->weight,
+ symbol_conf.field_sep,
+ sample->data_src,
+ symbol_conf.field_sep,
+ al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
+ al.sym ? al.sym->name : "???");
+ } else {
+ if (symbol_conf.field_sep) {
+ fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
+ "%s0x%"PRIx64"%s%s:%s\n";
+ } else {
+ fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
+ "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
+ symbol_conf.field_sep = " ";
+ }
+
+ printf(fmt,
+ sample->pid,
+ symbol_conf.field_sep,
+ sample->tid,
+ symbol_conf.field_sep,
+ sample->ip,
+ symbol_conf.field_sep,
+ sample->addr,
+ symbol_conf.field_sep,
+ sample->weight,
+ symbol_conf.field_sep,
+ sample->data_src,
+ symbol_conf.field_sep,
+ al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
+ al.sym ? al.sym->name : "???");
+ }
+out_put:
+ addr_location__put(&al);
+ return 0;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine)
+{
+ return dump_raw_samples(tool, event, sample, machine);
+}
+
+static int report_raw_events(struct perf_mem *mem)
+{
+ struct perf_data data = {
+ .file = {
+ .path = input_name,
+ },
+ .mode = PERF_DATA_MODE_READ,
+ .force = mem->force,
+ };
+ int ret;
+ struct perf_session *session = perf_session__new(&data, false,
+ &mem->tool);
+
+ if (session == NULL)
+ return -1;
+
+ if (mem->cpu_list) {
+ ret = perf_session__cpu_bitmap(session, mem->cpu_list,
+ mem->cpu_bitmap);
+ if (ret < 0)
+ goto out_delete;
+ }
+
+ ret = symbol__init(&session->header.env);
+ if (ret < 0)
+ goto out_delete;
+
+ if (mem->phys_addr)
+ printf("# PID, TID, IP, ADDR, PHYS ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
+ else
+ printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
+
+ ret = perf_session__process_events(session);
+
+out_delete:
+ perf_session__delete(session);
+ return ret;
+}
+
+static int report_events(int argc, const char **argv, struct perf_mem *mem)
+{
+ const char **rep_argv;
+ int ret, i = 0, j, rep_argc;
+
+ if (mem->dump_raw)
+ return report_raw_events(mem);
+
+ rep_argc = argc + 3;
+ rep_argv = calloc(rep_argc + 1, sizeof(char *));
+ if (!rep_argv)
+ return -1;
+
+ rep_argv[i++] = "report";
+ rep_argv[i++] = "--mem-mode";
+ rep_argv[i++] = "-n"; /* display number of samples */
+
+ /*
+ * there is no weight (cost) associated with stores, so don't print
+ * the column
+ */
+ if (!(mem->operation & MEM_OPERATION_LOAD)) {
+ if (mem->phys_addr)
+ rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
+ "dso_daddr,tlb,locked,phys_daddr";
+ else
+ rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
+ "dso_daddr,tlb,locked";
+ } else if (mem->phys_addr)
+ rep_argv[i++] = "--sort=local_weight,mem,sym,dso,symbol_daddr,"
+ "dso_daddr,snoop,tlb,locked,phys_daddr";
+
+ for (j = 1; j < argc; j++, i++)
+ rep_argv[i] = argv[j];
+
+ ret = cmd_report(i, rep_argv);
+ free(rep_argv);
+ return ret;
+}
+
+struct mem_mode {
+ const char *name;
+ int mode;
+};
+
+#define MEM_OPT(n, m) \
+ { .name = n, .mode = (m) }
+
+#define MEM_END { .name = NULL }
+
+static const struct mem_mode mem_modes[]={
+ MEM_OPT("load", MEM_OPERATION_LOAD),
+ MEM_OPT("store", MEM_OPERATION_STORE),
+ MEM_END
+};
+
+static int
+parse_mem_ops(const struct option *opt, const char *str, int unset)
+{
+ int *mode = (int *)opt->value;
+ const struct mem_mode *m;
+ char *s, *os = NULL, *p;
+ int ret = -1;
+
+ if (unset)
+ return 0;
+
+ /* str may be NULL in case no arg is passed to -t */
+ if (str) {
+ /* because str is read-only */
+ s = os = strdup(str);
+ if (!s)
+ return -1;
+
+ /* reset mode */
+ *mode = 0;
+
+ for (;;) {
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ for (m = mem_modes; m->name; m++) {
+ if (!strcasecmp(s, m->name))
+ break;
+ }
+ if (!m->name) {
+ fprintf(stderr, "unknown sampling op %s,"
+ " check man page\n", s);
+ goto error;
+ }
+
+ *mode |= m->mode;
+
+ if (!p)
+ break;
+
+ s = p + 1;
+ }
+ }
+ ret = 0;
+
+ if (*mode == 0)
+ *mode = MEM_OPERATION_LOAD;
+error:
+ free(os);
+ return ret;
+}
+
+int cmd_mem(int argc, const char **argv)
+{
+ struct stat st;
+ struct perf_mem mem = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .lost = perf_event__process_lost,
+ .fork = perf_event__process_fork,
+ .build_id = perf_event__process_build_id,
+ .namespaces = perf_event__process_namespaces,
+ .ordered_events = true,
+ },
+ .input_name = "perf.data",
+ /*
+ * default to both load an store sampling
+ */
+ .operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE,
+ };
+ const struct option mem_options[] = {
+ OPT_CALLBACK('t', "type", &mem.operation,
+ "type", "memory operations(load,store) Default load,store",
+ parse_mem_ops),
+ OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
+ "dump raw samples in ASCII"),
+ OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,
+ "Only display entries resolved to a symbol"),
+ OPT_STRING('i', "input", &input_name, "file",
+ "input file name"),
+ OPT_STRING('C', "cpu", &mem.cpu_list, "cpu",
+ "list of cpus to profile"),
+ OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep,
+ "separator",
+ "separator for columns, no spaces will be added"
+ " between columns '.' is reserved."),
+ OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"),
+ OPT_BOOLEAN('p', "phys-data", &mem.phys_addr, "Record/Report sample physical addresses"),
+ OPT_END()
+ };
+ const char *const mem_subcommands[] = { "record", "report", NULL };
+ const char *mem_usage[] = {
+ NULL,
+ NULL
+ };
+
+ if (perf_mem_events__init()) {
+ pr_err("failed: memory events not supported\n");
+ return -1;
+ }
+
+ argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
+ mem_usage, PARSE_OPT_KEEP_UNKNOWN);
+
+ if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation))
+ usage_with_options(mem_usage, mem_options);
+
+ if (!mem.input_name || !strlen(mem.input_name)) {
+ if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
+ mem.input_name = "-";
+ else
+ mem.input_name = "perf.data";
+ }
+
+ if (!strncmp(argv[0], "rec", 3))
+ return __cmd_record(argc, argv, &mem);
+ else if (!strncmp(argv[0], "rep", 3))
+ return report_events(argc, argv, &mem);
+ else
+ usage_with_options(mem_usage, mem_options);
+
+ return 0;
+}
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
new file mode 100644
index 000000000..215da628d
--- /dev/null
+++ b/tools/perf/builtin-probe.c
@@ -0,0 +1,748 @@
+/*
+ * builtin-probe.c
+ *
+ * Builtin probe command: Set up probe events by C expression
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "perf.h"
+#include "builtin.h"
+#include "util/util.h"
+#include "util/strlist.h"
+#include "util/strfilter.h"
+#include "util/symbol.h"
+#include "util/debug.h"
+#include <subcmd/parse-options.h>
+#include "util/probe-finder.h"
+#include "util/probe-event.h"
+#include "util/probe-file.h"
+
+#define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
+#define DEFAULT_FUNC_FILTER "!_*"
+#define DEFAULT_LIST_FILTER "*"
+
+/* Session management structure */
+static struct {
+ int command; /* Command short_name */
+ bool list_events;
+ bool uprobes;
+ bool quiet;
+ bool target_used;
+ int nevents;
+ struct perf_probe_event events[MAX_PROBES];
+ struct line_range line_range;
+ char *target;
+ struct strfilter *filter;
+ struct nsinfo *nsi;
+} params;
+
+/* Parse an event definition. Note that any error must die. */
+static int parse_probe_event(const char *str)
+{
+ struct perf_probe_event *pev = &params.events[params.nevents];
+ int ret;
+
+ pr_debug("probe-definition(%d): %s\n", params.nevents, str);
+ if (++params.nevents == MAX_PROBES) {
+ pr_err("Too many probes (> %d) were specified.", MAX_PROBES);
+ return -1;
+ }
+
+ pev->uprobes = params.uprobes;
+ if (params.target) {
+ pev->target = strdup(params.target);
+ if (!pev->target)
+ return -ENOMEM;
+ params.target_used = true;
+ }
+
+ pev->nsi = nsinfo__get(params.nsi);
+
+ /* Parse a perf-probe command into event */
+ ret = parse_perf_probe_command(str, pev);
+ pr_debug("%d arguments\n", pev->nargs);
+
+ return ret;
+}
+
+static int params_add_filter(const char *str)
+{
+ const char *err = NULL;
+ int ret = 0;
+
+ pr_debug2("Add filter: %s\n", str);
+ if (!params.filter) {
+ params.filter = strfilter__new(str, &err);
+ if (!params.filter)
+ ret = err ? -EINVAL : -ENOMEM;
+ } else
+ ret = strfilter__or(params.filter, str, &err);
+
+ if (ret == -EINVAL) {
+ pr_err("Filter parse error at %td.\n", err - str + 1);
+ pr_err("Source: \"%s\"\n", str);
+ pr_err(" %*c\n", (int)(err - str + 1), '^');
+ }
+
+ return ret;
+}
+
+static int set_target(const char *ptr)
+{
+ int found = 0;
+ const char *buf;
+
+ /*
+ * The first argument after options can be an absolute path
+ * to an executable / library or kernel module.
+ *
+ * TODO: Support relative path, and $PATH, $LD_LIBRARY_PATH,
+ * short module name.
+ */
+ if (!params.target && ptr && *ptr == '/') {
+ params.target = strdup(ptr);
+ if (!params.target)
+ return -ENOMEM;
+ params.target_used = false;
+
+ found = 1;
+ buf = ptr + (strlen(ptr) - 3);
+
+ if (strcmp(buf, ".ko"))
+ params.uprobes = true;
+
+ }
+
+ return found;
+}
+
+static int parse_probe_event_argv(int argc, const char **argv)
+{
+ int i, len, ret, found_target;
+ char *buf;
+
+ found_target = set_target(argv[0]);
+ if (found_target < 0)
+ return found_target;
+
+ if (found_target && argc == 1)
+ return 0;
+
+ /* Bind up rest arguments */
+ len = 0;
+ for (i = 0; i < argc; i++) {
+ if (i == 0 && found_target)
+ continue;
+
+ len += strlen(argv[i]) + 1;
+ }
+ buf = zalloc(len + 1);
+ if (buf == NULL)
+ return -ENOMEM;
+ len = 0;
+ for (i = 0; i < argc; i++) {
+ if (i == 0 && found_target)
+ continue;
+
+ len += sprintf(&buf[len], "%s ", argv[i]);
+ }
+ ret = parse_probe_event(buf);
+ free(buf);
+ return ret;
+}
+
+static int opt_set_target(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ int ret = -ENOENT;
+ char *tmp;
+
+ if (str) {
+ if (!strcmp(opt->long_name, "exec"))
+ params.uprobes = true;
+ else if (!strcmp(opt->long_name, "module"))
+ params.uprobes = false;
+ else
+ return ret;
+
+ /* Expand given path to absolute path, except for modulename */
+ if (params.uprobes || strchr(str, '/')) {
+ tmp = nsinfo__realpath(str, params.nsi);
+ if (!tmp) {
+ pr_warning("Failed to get the absolute path of %s: %m\n", str);
+ return ret;
+ }
+ } else {
+ tmp = strdup(str);
+ if (!tmp)
+ return -ENOMEM;
+ }
+ free(params.target);
+ params.target = tmp;
+ params.target_used = false;
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int opt_set_target_ns(const struct option *opt __maybe_unused,
+ const char *str, int unset __maybe_unused)
+{
+ int ret = -ENOENT;
+ pid_t ns_pid;
+ struct nsinfo *nsip;
+
+ if (str) {
+ errno = 0;
+ ns_pid = (pid_t)strtol(str, NULL, 10);
+ if (errno != 0) {
+ ret = -errno;
+ pr_warning("Failed to parse %s as a pid: %s\n", str,
+ strerror(errno));
+ return ret;
+ }
+ nsip = nsinfo__new(ns_pid);
+ if (nsip && nsip->need_setns)
+ params.nsi = nsinfo__get(nsip);
+ nsinfo__put(nsip);
+
+ ret = 0;
+ }
+
+ return ret;
+}
+
+
+/* Command option callbacks */
+
+#ifdef HAVE_DWARF_SUPPORT
+static int opt_show_lines(const struct option *opt,
+ const char *str, int unset __maybe_unused)
+{
+ int ret = 0;
+
+ if (!str)
+ return 0;
+
+ if (params.command == 'L') {
+ pr_warning("Warning: more than one --line options are"
+ " detected. Only the first one is valid.\n");
+ return 0;
+ }
+
+ params.command = opt->short_name;
+ ret = parse_line_range_desc(str, &params.line_range);
+
+ return ret;
+}
+
+static int opt_show_vars(const struct option *opt,
+ const char *str, int unset __maybe_unused)
+{
+ struct perf_probe_event *pev = &params.events[params.nevents];
+ int ret;
+
+ if (!str)
+ return 0;
+
+ ret = parse_probe_event(str);
+ if (!ret && pev->nargs != 0) {
+ pr_err(" Error: '--vars' doesn't accept arguments.\n");
+ return -EINVAL;
+ }
+ params.command = opt->short_name;
+
+ return ret;
+}
+#else
+# define opt_show_lines NULL
+# define opt_show_vars NULL
+#endif
+static int opt_add_probe_event(const struct option *opt,
+ const char *str, int unset __maybe_unused)
+{
+ if (str) {
+ params.command = opt->short_name;
+ return parse_probe_event(str);
+ }
+
+ return 0;
+}
+
+static int opt_set_filter_with_command(const struct option *opt,
+ const char *str, int unset)
+{
+ if (!unset)
+ params.command = opt->short_name;
+
+ if (str)
+ return params_add_filter(str);
+
+ return 0;
+}
+
+static int opt_set_filter(const struct option *opt __maybe_unused,
+ const char *str, int unset __maybe_unused)
+{
+ if (str)
+ return params_add_filter(str);
+
+ return 0;
+}
+
+static int init_params(void)
+{
+ return line_range__init(&params.line_range);
+}
+
+static void cleanup_params(void)
+{
+ int i;
+
+ for (i = 0; i < params.nevents; i++)
+ clear_perf_probe_event(params.events + i);
+ line_range__clear(&params.line_range);
+ free(params.target);
+ strfilter__delete(params.filter);
+ nsinfo__put(params.nsi);
+ memset(&params, 0, sizeof(params));
+}
+
+static void pr_err_with_code(const char *msg, int err)
+{
+ char sbuf[STRERR_BUFSIZE];
+
+ pr_err("%s", msg);
+ pr_debug(" Reason: %s (Code: %d)",
+ str_error_r(-err, sbuf, sizeof(sbuf)), err);
+ pr_err("\n");
+}
+
+static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+ int ret;
+ int i, k;
+ const char *event = NULL, *group = NULL;
+
+ ret = init_probe_symbol_maps(pevs->uprobes);
+ if (ret < 0)
+ return ret;
+
+ ret = convert_perf_probe_events(pevs, npevs);
+ if (ret < 0)
+ goto out_cleanup;
+
+ if (params.command == 'D') { /* it shows definition */
+ ret = show_probe_trace_events(pevs, npevs);
+ goto out_cleanup;
+ }
+
+ ret = apply_perf_probe_events(pevs, npevs);
+ if (ret < 0)
+ goto out_cleanup;
+
+ for (i = k = 0; i < npevs; i++)
+ k += pevs[i].ntevs;
+
+ pr_info("Added new event%s\n", (k > 1) ? "s:" : ":");
+ for (i = 0; i < npevs; i++) {
+ struct perf_probe_event *pev = &pevs[i];
+
+ for (k = 0; k < pev->ntevs; k++) {
+ struct probe_trace_event *tev = &pev->tevs[k];
+ /* Skipped events have no event name */
+ if (!tev->event)
+ continue;
+
+ /* We use tev's name for showing new events */
+ show_perf_probe_event(tev->group, tev->event, pev,
+ tev->point.module, false);
+
+ /* Save the last valid name */
+ event = tev->event;
+ group = tev->group;
+ }
+ }
+
+ /* Note that it is possible to skip all events because of blacklist */
+ if (event) {
+ /* Show how to use the event. */
+ pr_info("\nYou can now use it in all perf tools, such as:\n\n");
+ pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event);
+ }
+
+out_cleanup:
+ cleanup_perf_probe_events(pevs, npevs);
+ exit_probe_symbol_maps();
+ return ret;
+}
+
+static int del_perf_probe_caches(struct strfilter *filter)
+{
+ struct probe_cache *cache;
+ struct strlist *bidlist;
+ struct str_node *nd;
+ int ret;
+
+ bidlist = build_id_cache__list_all(false);
+ if (!bidlist) {
+ ret = -errno;
+ pr_debug("Failed to get buildids: %d\n", ret);
+ return ret ?: -ENOMEM;
+ }
+
+ strlist__for_each_entry(nd, bidlist) {
+ cache = probe_cache__new(nd->s, NULL);
+ if (!cache)
+ continue;
+ if (probe_cache__filter_purge(cache, filter) < 0 ||
+ probe_cache__commit(cache) < 0)
+ pr_warning("Failed to remove entries for %s\n", nd->s);
+ probe_cache__delete(cache);
+ }
+ return 0;
+}
+
+static int perf_del_probe_events(struct strfilter *filter)
+{
+ int ret, ret2, ufd = -1, kfd = -1;
+ char *str = strfilter__string(filter);
+ struct strlist *klist = NULL, *ulist = NULL;
+ struct str_node *ent;
+
+ if (!str)
+ return -EINVAL;
+
+ pr_debug("Delete filter: \'%s\'\n", str);
+
+ if (probe_conf.cache)
+ return del_perf_probe_caches(filter);
+
+ /* Get current event names */
+ ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW);
+ if (ret < 0)
+ goto out;
+
+ klist = strlist__new(NULL, NULL);
+ ulist = strlist__new(NULL, NULL);
+ if (!klist || !ulist) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = probe_file__get_events(kfd, filter, klist);
+ if (ret == 0) {
+ strlist__for_each_entry(ent, klist)
+ pr_info("Removed event: %s\n", ent->s);
+
+ ret = probe_file__del_strlist(kfd, klist);
+ if (ret < 0)
+ goto error;
+ }
+
+ ret2 = probe_file__get_events(ufd, filter, ulist);
+ if (ret2 == 0) {
+ strlist__for_each_entry(ent, ulist)
+ pr_info("Removed event: %s\n", ent->s);
+
+ ret2 = probe_file__del_strlist(ufd, ulist);
+ if (ret2 < 0)
+ goto error;
+ }
+
+ if (ret == -ENOENT && ret2 == -ENOENT)
+ pr_warning("\"%s\" does not hit any event.\n", str);
+ else
+ ret = 0;
+
+error:
+ if (kfd >= 0)
+ close(kfd);
+ if (ufd >= 0)
+ close(ufd);
+out:
+ strlist__delete(klist);
+ strlist__delete(ulist);
+ free(str);
+
+ return ret;
+}
+
+#ifdef HAVE_DWARF_SUPPORT
+#define PROBEDEF_STR \
+ "[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT [[NAME=]ARG ...]"
+#else
+#define PROBEDEF_STR "[EVENT=]FUNC[+OFF|%return] [[NAME=]ARG ...]"
+#endif
+
+
+static int
+__cmd_probe(int argc, const char **argv)
+{
+ const char * const probe_usage[] = {
+ "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
+ "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
+ "perf probe [<options>] --del '[GROUP:]EVENT' ...",
+ "perf probe --list [GROUP:]EVENT ...",
+#ifdef HAVE_DWARF_SUPPORT
+ "perf probe [<options>] --line 'LINEDESC'",
+ "perf probe [<options>] --vars 'PROBEPOINT'",
+#endif
+ "perf probe [<options>] --funcs",
+ NULL
+ };
+ struct option options[] = {
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show parsed arguments, etc)"),
+ OPT_BOOLEAN('q', "quiet", &params.quiet,
+ "be quiet (do not show any messages)"),
+ OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT",
+ "list up probe events",
+ opt_set_filter_with_command, DEFAULT_LIST_FILTER),
+ OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
+ opt_set_filter_with_command),
+ OPT_CALLBACK('a', "add", NULL, PROBEDEF_STR,
+ "probe point definition, where\n"
+ "\t\tGROUP:\tGroup name (optional)\n"
+ "\t\tEVENT:\tEvent name\n"
+ "\t\tFUNC:\tFunction name\n"
+ "\t\tOFF:\tOffset from function entry (in byte)\n"
+ "\t\t%return:\tPut the probe at function return\n"
+#ifdef HAVE_DWARF_SUPPORT
+ "\t\tSRC:\tSource code path\n"
+ "\t\tRL:\tRelative line number from function entry.\n"
+ "\t\tAL:\tAbsolute line number in file.\n"
+ "\t\tPT:\tLazy expression of line code.\n"
+ "\t\tARG:\tProbe argument (local variable name or\n"
+ "\t\t\tkprobe-tracer argument format.)\n",
+#else
+ "\t\tARG:\tProbe argument (kprobe-tracer argument format.)\n",
+#endif
+ opt_add_probe_event),
+ OPT_CALLBACK('D', "definition", NULL, PROBEDEF_STR,
+ "Show trace event definition of given traceevent for k/uprobe_events.",
+ opt_add_probe_event),
+ OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events"
+ " with existing name"),
+ OPT_CALLBACK('L', "line", NULL,
+ "FUNC[:RLN[+NUM|-RLN2]]|SRC:ALN[+NUM|-ALN2]",
+ "Show source code lines.", opt_show_lines),
+ OPT_CALLBACK('V', "vars", NULL,
+ "FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT",
+ "Show accessible variables on PROBEDEF", opt_show_vars),
+ OPT_BOOLEAN('\0', "externs", &probe_conf.show_ext_vars,
+ "Show external variables too (with --vars only)"),
+ OPT_BOOLEAN('\0', "range", &probe_conf.show_location_range,
+ "Show variables location range in scope (with --vars only)"),
+ OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
+ OPT_STRING('s', "source", &symbol_conf.source_prefix,
+ "directory", "path to kernel source"),
+ OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
+ "Don't search inlined functions"),
+ OPT__DRY_RUN(&probe_event_dry_run),
+ OPT_INTEGER('\0', "max-probes", &probe_conf.max_probes,
+ "Set how many probe points can be found for a probe."),
+ OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
+ "Show potential probe-able functions.",
+ opt_set_filter_with_command, DEFAULT_FUNC_FILTER),
+ OPT_CALLBACK('\0', "filter", NULL,
+ "[!]FILTER", "Set a filter (with --vars/funcs only)\n"
+ "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n"
+ "\t\t\t \"" DEFAULT_FUNC_FILTER "\" for --funcs)",
+ opt_set_filter),
+ OPT_CALLBACK('x', "exec", NULL, "executable|path",
+ "target executable name or path", opt_set_target),
+ OPT_CALLBACK('m', "module", NULL, "modname|path",
+ "target module name (for online) or path (for offline)",
+ opt_set_target),
+ OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
+ "Enable symbol demangling"),
+ OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
+ "Enable kernel symbol demangling"),
+ OPT_BOOLEAN(0, "cache", &probe_conf.cache, "Manipulate probe cache"),
+ OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+ "Look for files with symbols relative to this directory"),
+ OPT_CALLBACK(0, "target-ns", NULL, "pid",
+ "target pid for namespace contexts", opt_set_target_ns),
+ OPT_END()
+ };
+ int ret;
+
+ set_option_flag(options, 'a', "add", PARSE_OPT_EXCLUSIVE);
+ set_option_flag(options, 'd', "del", PARSE_OPT_EXCLUSIVE);
+ set_option_flag(options, 'D', "definition", PARSE_OPT_EXCLUSIVE);
+ set_option_flag(options, 'l', "list", PARSE_OPT_EXCLUSIVE);
+#ifdef HAVE_DWARF_SUPPORT
+ set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE);
+ set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE);
+#else
+# define set_nobuild(s, l, c) set_option_nobuild(options, s, l, "NO_DWARF=1", c)
+ set_nobuild('L', "line", false);
+ set_nobuild('V', "vars", false);
+ set_nobuild('\0', "externs", false);
+ set_nobuild('\0', "range", false);
+ set_nobuild('k', "vmlinux", true);
+ set_nobuild('s', "source", true);
+ set_nobuild('\0', "no-inlines", true);
+# undef set_nobuild
+#endif
+ set_option_flag(options, 'F', "funcs", PARSE_OPT_EXCLUSIVE);
+
+ argc = parse_options(argc, argv, options, probe_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (argc > 0) {
+ if (strcmp(argv[0], "-") == 0) {
+ usage_with_options_msg(probe_usage, options,
+ "'-' is not supported.\n");
+ }
+ if (params.command && params.command != 'a') {
+ usage_with_options_msg(probe_usage, options,
+ "another command except --add is set.\n");
+ }
+ ret = parse_probe_event_argv(argc, argv);
+ if (ret < 0) {
+ pr_err_with_code(" Error: Command Parse Error.", ret);
+ return ret;
+ }
+ params.command = 'a';
+ }
+
+ if (params.quiet) {
+ if (verbose != 0) {
+ pr_err(" Error: -v and -q are exclusive.\n");
+ return -EINVAL;
+ }
+ verbose = -1;
+ }
+
+ if (probe_conf.max_probes == 0)
+ probe_conf.max_probes = MAX_PROBES;
+
+ /*
+ * Only consider the user's kernel image path if given.
+ */
+ symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
+
+ /*
+ * Except for --list, --del and --add, other command doesn't depend
+ * nor change running kernel. So if user gives offline vmlinux,
+ * ignore its buildid.
+ */
+ if (!strchr("lda", params.command) && symbol_conf.vmlinux_name)
+ symbol_conf.ignore_vmlinux_buildid = true;
+
+ switch (params.command) {
+ case 'l':
+ if (params.uprobes) {
+ pr_err(" Error: Don't use --list with --exec.\n");
+ parse_options_usage(probe_usage, options, "l", true);
+ parse_options_usage(NULL, options, "x", true);
+ return -EINVAL;
+ }
+ ret = show_perf_probe_events(params.filter);
+ if (ret < 0)
+ pr_err_with_code(" Error: Failed to show event list.", ret);
+ return ret;
+ case 'F':
+ ret = show_available_funcs(params.target, params.nsi,
+ params.filter, params.uprobes);
+ if (ret < 0)
+ pr_err_with_code(" Error: Failed to show functions.", ret);
+ return ret;
+#ifdef HAVE_DWARF_SUPPORT
+ case 'L':
+ ret = show_line_range(&params.line_range, params.target,
+ params.nsi, params.uprobes);
+ if (ret < 0)
+ pr_err_with_code(" Error: Failed to show lines.", ret);
+ return ret;
+ case 'V':
+ if (!params.filter)
+ params.filter = strfilter__new(DEFAULT_VAR_FILTER,
+ NULL);
+
+ ret = show_available_vars(params.events, params.nevents,
+ params.filter);
+ if (ret < 0)
+ pr_err_with_code(" Error: Failed to show vars.", ret);
+ return ret;
+#endif
+ case 'd':
+ ret = perf_del_probe_events(params.filter);
+ if (ret < 0) {
+ pr_err_with_code(" Error: Failed to delete events.", ret);
+ return ret;
+ }
+ break;
+ case 'D':
+ case 'a':
+
+ /* Ensure the last given target is used */
+ if (params.target && !params.target_used) {
+ pr_err(" Error: -x/-m must follow the probe definitions.\n");
+ parse_options_usage(probe_usage, options, "m", true);
+ parse_options_usage(NULL, options, "x", true);
+ return -EINVAL;
+ }
+
+ ret = perf_add_probe_events(params.events, params.nevents);
+ if (ret < 0) {
+
+ /*
+ * When perf_add_probe_events() fails it calls
+ * cleanup_perf_probe_events(pevs, npevs), i.e.
+ * cleanup_perf_probe_events(params.events, params.nevents), which
+ * will call clear_perf_probe_event(), so set nevents to zero
+ * to avoid cleanup_params() to call clear_perf_probe_event() again
+ * on the same pevs.
+ */
+ params.nevents = 0;
+ pr_err_with_code(" Error: Failed to add events.", ret);
+ return ret;
+ }
+ break;
+ default:
+ usage_with_options(probe_usage, options);
+ }
+ return 0;
+}
+
+int cmd_probe(int argc, const char **argv)
+{
+ int ret;
+
+ ret = init_params();
+ if (!ret) {
+ ret = __cmd_probe(argc, argv);
+ cleanup_params();
+ }
+
+ return ret < 0 ? ret : 0;
+}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
new file mode 100644
index 000000000..22ebeb92a
--- /dev/null
+++ b/tools/perf/builtin-record.c
@@ -0,0 +1,1902 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-record.c
+ *
+ * Builtin record command: Record the profile of a workload
+ * (or a CPU, or a PID) into the perf.data output file - for
+ * later analysis via perf report.
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/build-id.h"
+#include "util/util.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-events.h"
+#include "util/config.h"
+
+#include "util/callchain.h"
+#include "util/cgroup.h"
+#include "util/header.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/debug.h"
+#include "util/drv_configs.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/symbol.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/data.h"
+#include "util/perf_regs.h"
+#include "util/auxtrace.h"
+#include "util/tsc.h"
+#include "util/parse-branch-options.h"
+#include "util/parse-regs-options.h"
+#include "util/llvm-utils.h"
+#include "util/bpf-loader.h"
+#include "util/trigger.h"
+#include "util/perf-hooks.h"
+#include "util/time-utils.h"
+#include "util/units.h"
+#include "asm/bug.h"
+
+#include <errno.h>
+#include <inttypes.h>
+#include <locale.h>
+#include <poll.h>
+#include <unistd.h>
+#include <sched.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <linux/time64.h>
+
+struct switch_output {
+ bool enabled;
+ bool signal;
+ unsigned long size;
+ unsigned long time;
+ const char *str;
+ bool set;
+};
+
+struct record {
+ struct perf_tool tool;
+ struct record_opts opts;
+ u64 bytes_written;
+ struct perf_data data;
+ struct auxtrace_record *itr;
+ struct perf_evlist *evlist;
+ struct perf_session *session;
+ int realtime_prio;
+ bool no_buildid;
+ bool no_buildid_set;
+ bool no_buildid_cache;
+ bool no_buildid_cache_set;
+ bool buildid_all;
+ bool timestamp_filename;
+ bool timestamp_boundary;
+ struct switch_output switch_output;
+ unsigned long long samples;
+};
+
+static volatile int auxtrace_record__snapshot_started;
+static DEFINE_TRIGGER(auxtrace_snapshot_trigger);
+static DEFINE_TRIGGER(switch_output_trigger);
+
+static bool switch_output_signal(struct record *rec)
+{
+ return rec->switch_output.signal &&
+ trigger_is_ready(&switch_output_trigger);
+}
+
+static bool switch_output_size(struct record *rec)
+{
+ return rec->switch_output.size &&
+ trigger_is_ready(&switch_output_trigger) &&
+ (rec->bytes_written >= rec->switch_output.size);
+}
+
+static bool switch_output_time(struct record *rec)
+{
+ return rec->switch_output.time &&
+ trigger_is_ready(&switch_output_trigger);
+}
+
+static int record__write(struct record *rec, void *bf, size_t size)
+{
+ if (perf_data__write(rec->session->data, bf, size) < 0) {
+ pr_err("failed to write perf data, error: %m\n");
+ return -1;
+ }
+
+ rec->bytes_written += size;
+
+ if (switch_output_size(rec))
+ trigger_hit(&switch_output_trigger);
+
+ return 0;
+}
+
+static int process_synthesized_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct record *rec = container_of(tool, struct record, tool);
+ return record__write(rec, event, event->header.size);
+}
+
+static int record__pushfn(void *to, void *bf, size_t size)
+{
+ struct record *rec = to;
+
+ rec->samples++;
+ return record__write(rec, bf, size);
+}
+
+static volatile int done;
+static volatile int signr = -1;
+static volatile int child_finished;
+
+static void sig_handler(int sig)
+{
+ if (sig == SIGCHLD)
+ child_finished = 1;
+ else
+ signr = sig;
+
+ done = 1;
+}
+
+static void sigsegv_handler(int sig)
+{
+ perf_hooks__recover();
+ sighandler_dump_stack(sig);
+}
+
+static void record__sig_exit(void)
+{
+ if (signr == -1)
+ return;
+
+ signal(signr, SIG_DFL);
+ raise(signr);
+}
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+static int record__process_auxtrace(struct perf_tool *tool,
+ union perf_event *event, void *data1,
+ size_t len1, void *data2, size_t len2)
+{
+ struct record *rec = container_of(tool, struct record, tool);
+ struct perf_data *data = &rec->data;
+ size_t padding;
+ u8 pad[8] = {0};
+
+ if (!perf_data__is_pipe(data)) {
+ off_t file_offset;
+ int fd = perf_data__fd(data);
+ int err;
+
+ file_offset = lseek(fd, 0, SEEK_CUR);
+ if (file_offset == -1)
+ return -1;
+ err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
+ event, file_offset);
+ if (err)
+ return err;
+ }
+
+ /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
+ padding = (len1 + len2) & 7;
+ if (padding)
+ padding = 8 - padding;
+
+ record__write(rec, event, event->header.size);
+ record__write(rec, data1, len1);
+ if (len2)
+ record__write(rec, data2, len2);
+ record__write(rec, &pad, padding);
+
+ return 0;
+}
+
+static int record__auxtrace_mmap_read(struct record *rec,
+ struct auxtrace_mmap *mm)
+{
+ int ret;
+
+ ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
+ record__process_auxtrace);
+ if (ret < 0)
+ return ret;
+
+ if (ret)
+ rec->samples++;
+
+ return 0;
+}
+
+static int record__auxtrace_mmap_read_snapshot(struct record *rec,
+ struct auxtrace_mmap *mm)
+{
+ int ret;
+
+ ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
+ record__process_auxtrace,
+ rec->opts.auxtrace_snapshot_size);
+ if (ret < 0)
+ return ret;
+
+ if (ret)
+ rec->samples++;
+
+ return 0;
+}
+
+static int record__auxtrace_read_snapshot_all(struct record *rec)
+{
+ int i;
+ int rc = 0;
+
+ for (i = 0; i < rec->evlist->nr_mmaps; i++) {
+ struct auxtrace_mmap *mm =
+ &rec->evlist->mmap[i].auxtrace_mmap;
+
+ if (!mm->base)
+ continue;
+
+ if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
+ rc = -1;
+ goto out;
+ }
+ }
+out:
+ return rc;
+}
+
+static void record__read_auxtrace_snapshot(struct record *rec)
+{
+ pr_debug("Recording AUX area tracing snapshot\n");
+ if (record__auxtrace_read_snapshot_all(rec) < 0) {
+ trigger_error(&auxtrace_snapshot_trigger);
+ } else {
+ if (auxtrace_record__snapshot_finish(rec->itr))
+ trigger_error(&auxtrace_snapshot_trigger);
+ else
+ trigger_ready(&auxtrace_snapshot_trigger);
+ }
+}
+
+static int record__auxtrace_init(struct record *rec)
+{
+ int err;
+
+ if (!rec->itr) {
+ rec->itr = auxtrace_record__init(rec->evlist, &err);
+ if (err)
+ return err;
+ }
+
+ err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
+ rec->opts.auxtrace_snapshot_opts);
+ if (err)
+ return err;
+
+ return auxtrace_parse_filters(rec->evlist);
+}
+
+#else
+
+static inline
+int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
+ struct auxtrace_mmap *mm __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
+{
+ return 0;
+}
+
+static int record__auxtrace_init(struct record *rec __maybe_unused)
+{
+ return 0;
+}
+
+#endif
+
+static int record__mmap_evlist(struct record *rec,
+ struct perf_evlist *evlist)
+{
+ struct record_opts *opts = &rec->opts;
+ char msg[512];
+
+ if (perf_evlist__mmap_ex(evlist, opts->mmap_pages,
+ opts->auxtrace_mmap_pages,
+ opts->auxtrace_snapshot_mode) < 0) {
+ if (errno == EPERM) {
+ pr_err("Permission error mapping pages.\n"
+ "Consider increasing "
+ "/proc/sys/kernel/perf_event_mlock_kb,\n"
+ "or try again with a smaller value of -m/--mmap_pages.\n"
+ "(current value: %u,%u)\n",
+ opts->mmap_pages, opts->auxtrace_mmap_pages);
+ return -errno;
+ } else {
+ pr_err("failed to mmap with %d (%s)\n", errno,
+ str_error_r(errno, msg, sizeof(msg)));
+ if (errno)
+ return -errno;
+ else
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
+static int record__mmap(struct record *rec)
+{
+ return record__mmap_evlist(rec, rec->evlist);
+}
+
+static int record__open(struct record *rec)
+{
+ char msg[BUFSIZ];
+ struct perf_evsel *pos;
+ struct perf_evlist *evlist = rec->evlist;
+ struct perf_session *session = rec->session;
+ struct record_opts *opts = &rec->opts;
+ struct perf_evsel_config_term *err_term;
+ int rc = 0;
+
+ /*
+ * For initial_delay we need to add a dummy event so that we can track
+ * PERF_RECORD_MMAP while we wait for the initial delay to enable the
+ * real events, the ones asked by the user.
+ */
+ if (opts->initial_delay) {
+ if (perf_evlist__add_dummy(evlist))
+ return -ENOMEM;
+
+ pos = perf_evlist__first(evlist);
+ pos->tracking = 0;
+ pos = perf_evlist__last(evlist);
+ pos->tracking = 1;
+ pos->attr.enable_on_exec = 1;
+ }
+
+ perf_evlist__config(evlist, opts, &callchain_param);
+
+ evlist__for_each_entry(evlist, pos) {
+try_again:
+ if (perf_evsel__open(pos, pos->cpus, pos->threads) < 0) {
+ if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
+ if (verbose > 0)
+ ui__warning("%s\n", msg);
+ goto try_again;
+ }
+
+ rc = -errno;
+ perf_evsel__open_strerror(pos, &opts->target,
+ errno, msg, sizeof(msg));
+ ui__error("%s\n", msg);
+ goto out;
+ }
+
+ pos->supported = true;
+ }
+
+ if (perf_evlist__apply_filters(evlist, &pos)) {
+ pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
+ pos->filter, perf_evsel__name(pos), errno,
+ str_error_r(errno, msg, sizeof(msg)));
+ rc = -1;
+ goto out;
+ }
+
+ if (perf_evlist__apply_drv_configs(evlist, &pos, &err_term)) {
+ pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
+ err_term->val.drv_cfg, perf_evsel__name(pos), errno,
+ str_error_r(errno, msg, sizeof(msg)));
+ rc = -1;
+ goto out;
+ }
+
+ rc = record__mmap(rec);
+ if (rc)
+ goto out;
+
+ session->evlist = evlist;
+ perf_session__set_id_hdr_size(session);
+out:
+ return rc;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct record *rec = container_of(tool, struct record, tool);
+
+ if (rec->evlist->first_sample_time == 0)
+ rec->evlist->first_sample_time = sample->time;
+
+ rec->evlist->last_sample_time = sample->time;
+
+ if (rec->buildid_all)
+ return 0;
+
+ rec->samples++;
+ return build_id__mark_dso_hit(tool, event, sample, evsel, machine);
+}
+
+static int process_buildids(struct record *rec)
+{
+ struct perf_data *data = &rec->data;
+ struct perf_session *session = rec->session;
+
+ if (data->size == 0)
+ return 0;
+
+ /*
+ * During this process, it'll load kernel map and replace the
+ * dso->long_name to a real pathname it found. In this case
+ * we prefer the vmlinux path like
+ * /lib/modules/3.16.4/build/vmlinux
+ *
+ * rather than build-id path (in debug directory).
+ * $HOME/.debug/.build-id/f0/6e17aa50adf4d00b88925e03775de107611551
+ */
+ symbol_conf.ignore_vmlinux_buildid = true;
+
+ /*
+ * If --buildid-all is given, it marks all DSO regardless of hits,
+ * so no need to process samples. But if timestamp_boundary is enabled,
+ * it still needs to walk on all samples to get the timestamps of
+ * first/last samples.
+ */
+ if (rec->buildid_all && !rec->timestamp_boundary)
+ rec->tool.sample = NULL;
+
+ return perf_session__process_events(session);
+}
+
+static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
+{
+ int err;
+ struct perf_tool *tool = data;
+ /*
+ *As for guest kernel when processing subcommand record&report,
+ *we arrange module mmap prior to guest kernel mmap and trigger
+ *a preload dso because default guest module symbols are loaded
+ *from guest kallsyms instead of /lib/modules/XXX/XXX. This
+ *method is used to avoid symbol missing when the first addr is
+ *in module instead of in guest kernel.
+ */
+ err = perf_event__synthesize_modules(tool, process_synthesized_event,
+ machine);
+ if (err < 0)
+ pr_err("Couldn't record guest kernel [%d]'s reference"
+ " relocation symbol.\n", machine->pid);
+
+ /*
+ * We use _stext for guest kernel because guest kernel's /proc/kallsyms
+ * have no _text sometimes.
+ */
+ err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+ machine);
+ if (err < 0)
+ pr_err("Couldn't record guest kernel [%d]'s reference"
+ " relocation symbol.\n", machine->pid);
+}
+
+static struct perf_event_header finished_round_event = {
+ .size = sizeof(struct perf_event_header),
+ .type = PERF_RECORD_FINISHED_ROUND,
+};
+
+static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist,
+ bool overwrite)
+{
+ u64 bytes_written = rec->bytes_written;
+ int i;
+ int rc = 0;
+ struct perf_mmap *maps;
+
+ if (!evlist)
+ return 0;
+
+ maps = overwrite ? evlist->overwrite_mmap : evlist->mmap;
+ if (!maps)
+ return 0;
+
+ if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING)
+ return 0;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap;
+
+ if (maps[i].base) {
+ if (perf_mmap__push(&maps[i], rec, record__pushfn) != 0) {
+ rc = -1;
+ goto out;
+ }
+ }
+
+ if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
+ record__auxtrace_mmap_read(rec, mm) != 0) {
+ rc = -1;
+ goto out;
+ }
+ }
+
+ /*
+ * Mark the round finished in case we wrote
+ * at least one event.
+ */
+ if (bytes_written != rec->bytes_written)
+ rc = record__write(rec, &finished_round_event, sizeof(finished_round_event));
+
+ if (overwrite)
+ perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
+out:
+ return rc;
+}
+
+static int record__mmap_read_all(struct record *rec)
+{
+ int err;
+
+ err = record__mmap_read_evlist(rec, rec->evlist, false);
+ if (err)
+ return err;
+
+ return record__mmap_read_evlist(rec, rec->evlist, true);
+}
+
+static void record__init_features(struct record *rec)
+{
+ struct perf_session *session = rec->session;
+ int feat;
+
+ for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
+ perf_header__set_feat(&session->header, feat);
+
+ if (rec->no_buildid)
+ perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
+
+ if (!have_tracepoints(&rec->evlist->entries))
+ perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
+
+ if (!rec->opts.branch_stack)
+ perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+
+ if (!rec->opts.full_auxtrace)
+ perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
+
+ perf_header__clear_feat(&session->header, HEADER_STAT);
+}
+
+static void
+record__finish_output(struct record *rec)
+{
+ struct perf_data *data = &rec->data;
+ int fd = perf_data__fd(data);
+
+ if (data->is_pipe)
+ return;
+
+ rec->session->header.data_size += rec->bytes_written;
+ data->size = lseek(perf_data__fd(data), 0, SEEK_CUR);
+
+ if (!rec->no_buildid) {
+ process_buildids(rec);
+
+ if (rec->buildid_all)
+ dsos__hit_all(rec->session);
+ }
+ perf_session__write_header(rec->session, rec->evlist, fd, true);
+
+ return;
+}
+
+static int record__synthesize_workload(struct record *rec, bool tail)
+{
+ int err;
+ struct thread_map *thread_map;
+
+ if (rec->opts.tail_synthesize != tail)
+ return 0;
+
+ thread_map = thread_map__new_by_tid(rec->evlist->workload.pid);
+ if (thread_map == NULL)
+ return -1;
+
+ err = perf_event__synthesize_thread_map(&rec->tool, thread_map,
+ process_synthesized_event,
+ &rec->session->machines.host,
+ rec->opts.sample_address,
+ rec->opts.proc_map_timeout);
+ thread_map__put(thread_map);
+ return err;
+}
+
+static int record__synthesize(struct record *rec, bool tail);
+
+static int
+record__switch_output(struct record *rec, bool at_exit)
+{
+ struct perf_data *data = &rec->data;
+ int fd, err;
+
+ /* Same Size: "2015122520103046"*/
+ char timestamp[] = "InvalidTimestamp";
+
+ record__synthesize(rec, true);
+ if (target__none(&rec->opts.target))
+ record__synthesize_workload(rec, true);
+
+ rec->samples = 0;
+ record__finish_output(rec);
+ err = fetch_current_timestamp(timestamp, sizeof(timestamp));
+ if (err) {
+ pr_err("Failed to get current timestamp\n");
+ return -EINVAL;
+ }
+
+ fd = perf_data__switch(data, timestamp,
+ rec->session->header.data_offset,
+ at_exit);
+ if (fd >= 0 && !at_exit) {
+ rec->bytes_written = 0;
+ rec->session->header.data_size = 0;
+ }
+
+ if (!quiet)
+ fprintf(stderr, "[ perf record: Dump %s.%s ]\n",
+ data->file.path, timestamp);
+
+ /* Output tracking events */
+ if (!at_exit) {
+ record__synthesize(rec, false);
+
+ /*
+ * In 'perf record --switch-output' without -a,
+ * record__synthesize() in record__switch_output() won't
+ * generate tracking events because there's no thread_map
+ * in evlist. Which causes newly created perf.data doesn't
+ * contain map and comm information.
+ * Create a fake thread_map and directly call
+ * perf_event__synthesize_thread_map() for those events.
+ */
+ if (target__none(&rec->opts.target))
+ record__synthesize_workload(rec, false);
+ }
+ return fd;
+}
+
+static volatile int workload_exec_errno;
+
+/*
+ * perf_evlist__prepare_workload will send a SIGUSR1
+ * if the fork fails, since we asked by setting its
+ * want_signal to true.
+ */
+static void workload_exec_failed_signal(int signo __maybe_unused,
+ siginfo_t *info,
+ void *ucontext __maybe_unused)
+{
+ workload_exec_errno = info->si_value.sival_int;
+ done = 1;
+ child_finished = 1;
+}
+
+static void snapshot_sig_handler(int sig);
+static void alarm_sig_handler(int sig);
+
+int __weak
+perf_event__synth_time_conv(const struct perf_event_mmap_page *pc __maybe_unused,
+ struct perf_tool *tool __maybe_unused,
+ perf_event__handler_t process __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return 0;
+}
+
+static const struct perf_event_mmap_page *
+perf_evlist__pick_pc(struct perf_evlist *evlist)
+{
+ if (evlist) {
+ if (evlist->mmap && evlist->mmap[0].base)
+ return evlist->mmap[0].base;
+ if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base)
+ return evlist->overwrite_mmap[0].base;
+ }
+ return NULL;
+}
+
+static const struct perf_event_mmap_page *record__pick_pc(struct record *rec)
+{
+ const struct perf_event_mmap_page *pc;
+
+ pc = perf_evlist__pick_pc(rec->evlist);
+ if (pc)
+ return pc;
+ return NULL;
+}
+
+static int record__synthesize(struct record *rec, bool tail)
+{
+ struct perf_session *session = rec->session;
+ struct machine *machine = &session->machines.host;
+ struct perf_data *data = &rec->data;
+ struct record_opts *opts = &rec->opts;
+ struct perf_tool *tool = &rec->tool;
+ int fd = perf_data__fd(data);
+ int err = 0;
+
+ if (rec->opts.tail_synthesize != tail)
+ return 0;
+
+ if (data->is_pipe) {
+ /*
+ * We need to synthesize events first, because some
+ * features works on top of them (on report side).
+ */
+ err = perf_event__synthesize_attrs(tool, session,
+ process_synthesized_event);
+ if (err < 0) {
+ pr_err("Couldn't synthesize attrs.\n");
+ goto out;
+ }
+
+ err = perf_event__synthesize_features(tool, session, rec->evlist,
+ process_synthesized_event);
+ if (err < 0) {
+ pr_err("Couldn't synthesize features.\n");
+ return err;
+ }
+
+ if (have_tracepoints(&rec->evlist->entries)) {
+ /*
+ * FIXME err <= 0 here actually means that
+ * there were no tracepoints so its not really
+ * an error, just that we don't need to
+ * synthesize anything. We really have to
+ * return this more properly and also
+ * propagate errors that now are calling die()
+ */
+ err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
+ process_synthesized_event);
+ if (err <= 0) {
+ pr_err("Couldn't record tracing data.\n");
+ goto out;
+ }
+ rec->bytes_written += err;
+ }
+ }
+
+ err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
+ process_synthesized_event, machine);
+ if (err)
+ goto out;
+
+ if (rec->opts.full_auxtrace) {
+ err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
+ session, process_synthesized_event);
+ if (err)
+ goto out;
+ }
+
+ if (!perf_evlist__exclude_kernel(rec->evlist)) {
+ err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
+ machine);
+ WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n"
+ "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+ "Check /proc/kallsyms permission or run as root.\n");
+
+ err = perf_event__synthesize_modules(tool, process_synthesized_event,
+ machine);
+ WARN_ONCE(err < 0, "Couldn't record kernel module information.\n"
+ "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
+ "Check /proc/modules permission or run as root.\n");
+ }
+
+ if (perf_guest) {
+ machines__process_guests(&session->machines,
+ perf_event__synthesize_guest_os, tool);
+ }
+
+ err = perf_event__synthesize_extra_attr(&rec->tool,
+ rec->evlist,
+ process_synthesized_event,
+ data->is_pipe);
+ if (err)
+ goto out;
+
+ err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads,
+ process_synthesized_event,
+ NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize thread map.\n");
+ return err;
+ }
+
+ err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus,
+ process_synthesized_event, NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize cpu map.\n");
+ return err;
+ }
+
+ err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
+ process_synthesized_event, opts->sample_address,
+ opts->proc_map_timeout, 1);
+out:
+ return err;
+}
+
+static int __cmd_record(struct record *rec, int argc, const char **argv)
+{
+ int err;
+ int status = 0;
+ unsigned long waking = 0;
+ const bool forks = argc > 0;
+ struct perf_tool *tool = &rec->tool;
+ struct record_opts *opts = &rec->opts;
+ struct perf_data *data = &rec->data;
+ struct perf_session *session;
+ bool disabled = false, draining = false;
+ int fd;
+
+ atexit(record__sig_exit);
+ signal(SIGCHLD, sig_handler);
+ signal(SIGINT, sig_handler);
+ signal(SIGTERM, sig_handler);
+ signal(SIGSEGV, sigsegv_handler);
+
+ if (rec->opts.record_namespaces)
+ tool->namespace_events = true;
+
+ if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
+ signal(SIGUSR2, snapshot_sig_handler);
+ if (rec->opts.auxtrace_snapshot_mode)
+ trigger_on(&auxtrace_snapshot_trigger);
+ if (rec->switch_output.enabled)
+ trigger_on(&switch_output_trigger);
+ } else {
+ signal(SIGUSR2, SIG_IGN);
+ }
+
+ session = perf_session__new(data, false, tool);
+ if (session == NULL) {
+ pr_err("Perf session creation failed.\n");
+ return -1;
+ }
+
+ fd = perf_data__fd(data);
+ rec->session = session;
+
+ record__init_features(rec);
+
+ if (forks) {
+ err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
+ argv, data->is_pipe,
+ workload_exec_failed_signal);
+ if (err < 0) {
+ pr_err("Couldn't run the workload!\n");
+ status = err;
+ goto out_delete_session;
+ }
+ }
+
+ /*
+ * If we have just single event and are sending data
+ * through pipe, we need to force the ids allocation,
+ * because we synthesize event name through the pipe
+ * and need the id for that.
+ */
+ if (data->is_pipe && rec->evlist->nr_entries == 1)
+ rec->opts.sample_id = true;
+
+ if (record__open(rec) != 0) {
+ err = -1;
+ goto out_child;
+ }
+
+ err = bpf__apply_obj_config();
+ if (err) {
+ char errbuf[BUFSIZ];
+
+ bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
+ pr_err("ERROR: Apply config to BPF failed: %s\n",
+ errbuf);
+ goto out_child;
+ }
+
+ /*
+ * Normally perf_session__new would do this, but it doesn't have the
+ * evlist.
+ */
+ if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
+ pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
+ rec->tool.ordered_events = false;
+ }
+
+ if (!rec->evlist->nr_groups)
+ perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
+
+ if (data->is_pipe) {
+ err = perf_header__write_pipe(fd);
+ if (err < 0)
+ goto out_child;
+ } else {
+ err = perf_session__write_header(session, rec->evlist, fd, false);
+ if (err < 0)
+ goto out_child;
+ }
+
+ if (!rec->no_buildid
+ && !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
+ pr_err("Couldn't generate buildids. "
+ "Use --no-buildid to profile anyway.\n");
+ err = -1;
+ goto out_child;
+ }
+
+ err = record__synthesize(rec, false);
+ if (err < 0)
+ goto out_child;
+
+ if (rec->realtime_prio) {
+ struct sched_param param;
+
+ param.sched_priority = rec->realtime_prio;
+ if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+ pr_err("Could not set realtime priority.\n");
+ err = -1;
+ goto out_child;
+ }
+ }
+
+ /*
+ * When perf is starting the traced process, all the events
+ * (apart from group members) have enable_on_exec=1 set,
+ * so don't spoil it by prematurely enabling them.
+ */
+ if (!target__none(&opts->target) && !opts->initial_delay)
+ perf_evlist__enable(rec->evlist);
+
+ /*
+ * Let the child rip
+ */
+ if (forks) {
+ struct machine *machine = &session->machines.host;
+ union perf_event *event;
+ pid_t tgid;
+
+ event = malloc(sizeof(event->comm) + machine->id_hdr_size);
+ if (event == NULL) {
+ err = -ENOMEM;
+ goto out_child;
+ }
+
+ /*
+ * Some H/W events are generated before COMM event
+ * which is emitted during exec(), so perf script
+ * cannot see a correct process name for those events.
+ * Synthesize COMM event to prevent it.
+ */
+ tgid = perf_event__synthesize_comm(tool, event,
+ rec->evlist->workload.pid,
+ process_synthesized_event,
+ machine);
+ free(event);
+
+ if (tgid == -1)
+ goto out_child;
+
+ event = malloc(sizeof(event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+ if (event == NULL) {
+ err = -ENOMEM;
+ goto out_child;
+ }
+
+ /*
+ * Synthesize NAMESPACES event for the command specified.
+ */
+ perf_event__synthesize_namespaces(tool, event,
+ rec->evlist->workload.pid,
+ tgid, process_synthesized_event,
+ machine);
+ free(event);
+
+ perf_evlist__start_workload(rec->evlist);
+ }
+
+ if (opts->initial_delay) {
+ usleep(opts->initial_delay * USEC_PER_MSEC);
+ perf_evlist__enable(rec->evlist);
+ }
+
+ trigger_ready(&auxtrace_snapshot_trigger);
+ trigger_ready(&switch_output_trigger);
+ perf_hooks__invoke_record_start();
+ for (;;) {
+ unsigned long long hits = rec->samples;
+
+ /*
+ * rec->evlist->bkw_mmap_state is possible to be
+ * BKW_MMAP_EMPTY here: when done == true and
+ * hits != rec->samples in previous round.
+ *
+ * perf_evlist__toggle_bkw_mmap ensure we never
+ * convert BKW_MMAP_EMPTY to BKW_MMAP_DATA_PENDING.
+ */
+ if (trigger_is_hit(&switch_output_trigger) || done || draining)
+ perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_DATA_PENDING);
+
+ if (record__mmap_read_all(rec) < 0) {
+ trigger_error(&auxtrace_snapshot_trigger);
+ trigger_error(&switch_output_trigger);
+ err = -1;
+ goto out_child;
+ }
+
+ if (auxtrace_record__snapshot_started) {
+ auxtrace_record__snapshot_started = 0;
+ if (!trigger_is_error(&auxtrace_snapshot_trigger))
+ record__read_auxtrace_snapshot(rec);
+ if (trigger_is_error(&auxtrace_snapshot_trigger)) {
+ pr_err("AUX area tracing snapshot failed\n");
+ err = -1;
+ goto out_child;
+ }
+ }
+
+ if (trigger_is_hit(&switch_output_trigger)) {
+ /*
+ * If switch_output_trigger is hit, the data in
+ * overwritable ring buffer should have been collected,
+ * so bkw_mmap_state should be set to BKW_MMAP_EMPTY.
+ *
+ * If SIGUSR2 raise after or during record__mmap_read_all(),
+ * record__mmap_read_all() didn't collect data from
+ * overwritable ring buffer. Read again.
+ */
+ if (rec->evlist->bkw_mmap_state == BKW_MMAP_RUNNING)
+ continue;
+ trigger_ready(&switch_output_trigger);
+
+ /*
+ * Reenable events in overwrite ring buffer after
+ * record__mmap_read_all(): we should have collected
+ * data from it.
+ */
+ perf_evlist__toggle_bkw_mmap(rec->evlist, BKW_MMAP_RUNNING);
+
+ if (!quiet)
+ fprintf(stderr, "[ perf record: dump data: Woken up %ld times ]\n",
+ waking);
+ waking = 0;
+ fd = record__switch_output(rec, false);
+ if (fd < 0) {
+ pr_err("Failed to switch to new file\n");
+ trigger_error(&switch_output_trigger);
+ err = fd;
+ goto out_child;
+ }
+
+ /* re-arm the alarm */
+ if (rec->switch_output.time)
+ alarm(rec->switch_output.time);
+ }
+
+ if (hits == rec->samples) {
+ if (done || draining)
+ break;
+ err = perf_evlist__poll(rec->evlist, -1);
+ /*
+ * Propagate error, only if there's any. Ignore positive
+ * number of returned events and interrupt error.
+ */
+ if (err > 0 || (err < 0 && errno == EINTR))
+ err = 0;
+ waking++;
+
+ if (perf_evlist__filter_pollfd(rec->evlist, POLLERR | POLLHUP) == 0)
+ draining = true;
+ }
+
+ /*
+ * When perf is starting the traced process, at the end events
+ * die with the process and we wait for that. Thus no need to
+ * disable events in this case.
+ */
+ if (done && !disabled && !target__none(&opts->target)) {
+ trigger_off(&auxtrace_snapshot_trigger);
+ perf_evlist__disable(rec->evlist);
+ disabled = true;
+ }
+ }
+ trigger_off(&auxtrace_snapshot_trigger);
+ trigger_off(&switch_output_trigger);
+
+ if (forks && workload_exec_errno) {
+ char msg[STRERR_BUFSIZE];
+ const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
+ pr_err("Workload failed: %s\n", emsg);
+ err = -1;
+ goto out_child;
+ }
+
+ if (!quiet)
+ fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
+
+ if (target__none(&rec->opts.target))
+ record__synthesize_workload(rec, true);
+
+out_child:
+ if (forks) {
+ int exit_status;
+
+ if (!child_finished)
+ kill(rec->evlist->workload.pid, SIGTERM);
+
+ wait(&exit_status);
+
+ if (err < 0)
+ status = err;
+ else if (WIFEXITED(exit_status))
+ status = WEXITSTATUS(exit_status);
+ else if (WIFSIGNALED(exit_status))
+ signr = WTERMSIG(exit_status);
+ } else
+ status = err;
+
+ record__synthesize(rec, true);
+ /* this will be recalculated during process_buildids() */
+ rec->samples = 0;
+
+ if (!err) {
+ if (!rec->timestamp_filename) {
+ record__finish_output(rec);
+ } else {
+ fd = record__switch_output(rec, true);
+ if (fd < 0) {
+ status = fd;
+ goto out_delete_session;
+ }
+ }
+ }
+
+ perf_hooks__invoke_record_end();
+
+ if (!err && !quiet) {
+ char samples[128];
+ const char *postfix = rec->timestamp_filename ?
+ ".<timestamp>" : "";
+
+ if (rec->samples && !rec->opts.full_auxtrace)
+ scnprintf(samples, sizeof(samples),
+ " (%" PRIu64 " samples)", rec->samples);
+ else
+ samples[0] = '\0';
+
+ fprintf(stderr, "[ perf record: Captured and wrote %.3f MB %s%s%s ]\n",
+ perf_data__size(data) / 1024.0 / 1024.0,
+ data->file.path, postfix, samples);
+ }
+
+out_delete_session:
+ perf_session__delete(session);
+ return status;
+}
+
+static void callchain_debug(struct callchain_param *callchain)
+{
+ static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
+
+ pr_debug("callchain: type %s\n", str[callchain->record_mode]);
+
+ if (callchain->record_mode == CALLCHAIN_DWARF)
+ pr_debug("callchain: stack dump size %d\n",
+ callchain->dump_size);
+}
+
+int record_opts__parse_callchain(struct record_opts *record,
+ struct callchain_param *callchain,
+ const char *arg, bool unset)
+{
+ int ret;
+ callchain->enabled = !unset;
+
+ /* --no-call-graph */
+ if (unset) {
+ callchain->record_mode = CALLCHAIN_NONE;
+ pr_debug("callchain: disabled\n");
+ return 0;
+ }
+
+ ret = parse_callchain_record_opt(arg, callchain);
+ if (!ret) {
+ /* Enable data address sampling for DWARF unwind. */
+ if (callchain->record_mode == CALLCHAIN_DWARF)
+ record->sample_address = true;
+ callchain_debug(callchain);
+ }
+
+ return ret;
+}
+
+int record_parse_callchain_opt(const struct option *opt,
+ const char *arg,
+ int unset)
+{
+ return record_opts__parse_callchain(opt->value, &callchain_param, arg, unset);
+}
+
+int record_callchain_opt(const struct option *opt,
+ const char *arg __maybe_unused,
+ int unset __maybe_unused)
+{
+ struct callchain_param *callchain = opt->value;
+
+ callchain->enabled = true;
+
+ if (callchain->record_mode == CALLCHAIN_NONE)
+ callchain->record_mode = CALLCHAIN_FP;
+
+ callchain_debug(callchain);
+ return 0;
+}
+
+static int perf_record_config(const char *var, const char *value, void *cb)
+{
+ struct record *rec = cb;
+
+ if (!strcmp(var, "record.build-id")) {
+ if (!strcmp(value, "cache"))
+ rec->no_buildid_cache = false;
+ else if (!strcmp(value, "no-cache"))
+ rec->no_buildid_cache = true;
+ else if (!strcmp(value, "skip"))
+ rec->no_buildid = true;
+ else
+ return -1;
+ return 0;
+ }
+ if (!strcmp(var, "record.call-graph")) {
+ var = "call-graph.record-mode";
+ return perf_default_config(var, value, cb);
+ }
+
+ return 0;
+}
+
+struct clockid_map {
+ const char *name;
+ int clockid;
+};
+
+#define CLOCKID_MAP(n, c) \
+ { .name = n, .clockid = (c), }
+
+#define CLOCKID_END { .name = NULL, }
+
+
+/*
+ * Add the missing ones, we need to build on many distros...
+ */
+#ifndef CLOCK_MONOTONIC_RAW
+#define CLOCK_MONOTONIC_RAW 4
+#endif
+#ifndef CLOCK_BOOTTIME
+#define CLOCK_BOOTTIME 7
+#endif
+#ifndef CLOCK_TAI
+#define CLOCK_TAI 11
+#endif
+
+static const struct clockid_map clockids[] = {
+ /* available for all events, NMI safe */
+ CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
+ CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
+
+ /* available for some events */
+ CLOCKID_MAP("realtime", CLOCK_REALTIME),
+ CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
+ CLOCKID_MAP("tai", CLOCK_TAI),
+
+ /* available for the lazy */
+ CLOCKID_MAP("mono", CLOCK_MONOTONIC),
+ CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
+ CLOCKID_MAP("real", CLOCK_REALTIME),
+ CLOCKID_MAP("boot", CLOCK_BOOTTIME),
+
+ CLOCKID_END,
+};
+
+static int parse_clockid(const struct option *opt, const char *str, int unset)
+{
+ struct record_opts *opts = (struct record_opts *)opt->value;
+ const struct clockid_map *cm;
+ const char *ostr = str;
+
+ if (unset) {
+ opts->use_clockid = 0;
+ return 0;
+ }
+
+ /* no arg passed */
+ if (!str)
+ return 0;
+
+ /* no setting it twice */
+ if (opts->use_clockid)
+ return -1;
+
+ opts->use_clockid = true;
+
+ /* if its a number, we're done */
+ if (sscanf(str, "%d", &opts->clockid) == 1)
+ return 0;
+
+ /* allow a "CLOCK_" prefix to the name */
+ if (!strncasecmp(str, "CLOCK_", 6))
+ str += 6;
+
+ for (cm = clockids; cm->name; cm++) {
+ if (!strcasecmp(str, cm->name)) {
+ opts->clockid = cm->clockid;
+ return 0;
+ }
+ }
+
+ opts->use_clockid = false;
+ ui__warning("unknown clockid %s, check man page\n", ostr);
+ return -1;
+}
+
+static int record__parse_mmap_pages(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ struct record_opts *opts = opt->value;
+ char *s, *p;
+ unsigned int mmap_pages;
+ int ret;
+
+ if (!str)
+ return -EINVAL;
+
+ s = strdup(str);
+ if (!s)
+ return -ENOMEM;
+
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ if (*s) {
+ ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
+ if (ret)
+ goto out_free;
+ opts->mmap_pages = mmap_pages;
+ }
+
+ if (!p) {
+ ret = 0;
+ goto out_free;
+ }
+
+ ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
+ if (ret)
+ goto out_free;
+
+ opts->auxtrace_mmap_pages = mmap_pages;
+
+out_free:
+ free(s);
+ return ret;
+}
+
+static void switch_output_size_warn(struct record *rec)
+{
+ u64 wakeup_size = perf_evlist__mmap_size(rec->opts.mmap_pages);
+ struct switch_output *s = &rec->switch_output;
+
+ wakeup_size /= 2;
+
+ if (s->size < wakeup_size) {
+ char buf[100];
+
+ unit_number__scnprintf(buf, sizeof(buf), wakeup_size);
+ pr_warning("WARNING: switch-output data size lower than "
+ "wakeup kernel buffer size (%s) "
+ "expect bigger perf.data sizes\n", buf);
+ }
+}
+
+static int switch_output_setup(struct record *rec)
+{
+ struct switch_output *s = &rec->switch_output;
+ static struct parse_tag tags_size[] = {
+ { .tag = 'B', .mult = 1 },
+ { .tag = 'K', .mult = 1 << 10 },
+ { .tag = 'M', .mult = 1 << 20 },
+ { .tag = 'G', .mult = 1 << 30 },
+ { .tag = 0 },
+ };
+ static struct parse_tag tags_time[] = {
+ { .tag = 's', .mult = 1 },
+ { .tag = 'm', .mult = 60 },
+ { .tag = 'h', .mult = 60*60 },
+ { .tag = 'd', .mult = 60*60*24 },
+ { .tag = 0 },
+ };
+ unsigned long val;
+
+ if (!s->set)
+ return 0;
+
+ if (!strcmp(s->str, "signal")) {
+ s->signal = true;
+ pr_debug("switch-output with SIGUSR2 signal\n");
+ goto enabled;
+ }
+
+ val = parse_tag_value(s->str, tags_size);
+ if (val != (unsigned long) -1) {
+ s->size = val;
+ pr_debug("switch-output with %s size threshold\n", s->str);
+ goto enabled;
+ }
+
+ val = parse_tag_value(s->str, tags_time);
+ if (val != (unsigned long) -1) {
+ s->time = val;
+ pr_debug("switch-output with %s time threshold (%lu seconds)\n",
+ s->str, s->time);
+ goto enabled;
+ }
+
+ return -1;
+
+enabled:
+ rec->timestamp_filename = true;
+ s->enabled = true;
+
+ if (s->size && !rec->opts.no_buffering)
+ switch_output_size_warn(rec);
+
+ return 0;
+}
+
+static const char * const __record_usage[] = {
+ "perf record [<options>] [<command>]",
+ "perf record [<options>] -- <command> [<options>]",
+ NULL
+};
+const char * const *record_usage = __record_usage;
+
+/*
+ * XXX Ideally would be local to cmd_record() and passed to a record__new
+ * because we need to have access to it in record__exit, that is called
+ * after cmd_record() exits, but since record_options need to be accessible to
+ * builtin-script, leave it here.
+ *
+ * At least we don't ouch it in all the other functions here directly.
+ *
+ * Just say no to tons of global variables, sigh.
+ */
+static struct record record = {
+ .opts = {
+ .sample_time = true,
+ .mmap_pages = UINT_MAX,
+ .user_freq = UINT_MAX,
+ .user_interval = ULLONG_MAX,
+ .freq = 4000,
+ .target = {
+ .uses_mmap = true,
+ .default_per_cpu = true,
+ },
+ .proc_map_timeout = 500,
+ },
+ .tool = {
+ .sample = process_sample_event,
+ .fork = perf_event__process_fork,
+ .exit = perf_event__process_exit,
+ .comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .ordered_events = true,
+ },
+};
+
+const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
+ "\n\t\t\t\tDefault: fp";
+
+static bool dry_run;
+
+/*
+ * XXX Will stay a global variable till we fix builtin-script.c to stop messing
+ * with it and switch to use the library functions in perf_evlist that came
+ * from builtin-record.c, i.e. use record_opts,
+ * perf_evlist__prepare_workload, etc instead of fork+exec'in 'perf record',
+ * using pipes, etc.
+ */
+static struct option __record_options[] = {
+ OPT_CALLBACK('e', "event", &record.evlist, "event",
+ "event selector. use 'perf list' to list available events",
+ parse_events_option),
+ OPT_CALLBACK(0, "filter", &record.evlist, "filter",
+ "event filter", parse_filter),
+ OPT_CALLBACK_NOOPT(0, "exclude-perf", &record.evlist,
+ NULL, "don't record events from perf itself",
+ exclude_perf),
+ OPT_STRING('p', "pid", &record.opts.target.pid, "pid",
+ "record events on existing process id"),
+ OPT_STRING('t', "tid", &record.opts.target.tid, "tid",
+ "record events on existing thread id"),
+ OPT_INTEGER('r', "realtime", &record.realtime_prio,
+ "collect data with this RT SCHED_FIFO priority"),
+ OPT_BOOLEAN(0, "no-buffering", &record.opts.no_buffering,
+ "collect data without buffering"),
+ OPT_BOOLEAN('R', "raw-samples", &record.opts.raw_samples,
+ "collect raw sample records from all opened counters"),
+ OPT_BOOLEAN('a', "all-cpus", &record.opts.target.system_wide,
+ "system-wide collection from all CPUs"),
+ OPT_STRING('C', "cpu", &record.opts.target.cpu_list, "cpu",
+ "list of cpus to monitor"),
+ OPT_U64('c', "count", &record.opts.user_interval, "event period to sample"),
+ OPT_STRING('o', "output", &record.data.file.path, "file",
+ "output file name"),
+ OPT_BOOLEAN_SET('i', "no-inherit", &record.opts.no_inherit,
+ &record.opts.no_inherit_set,
+ "child tasks do not inherit counters"),
+ OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
+ "synthesize non-sample events at the end of output"),
+ OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
+ OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
+ "Fail if the specified frequency can't be used"),
+ OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
+ "profile at this frequency",
+ record__parse_freq),
+ OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
+ "number of mmap data pages and AUX area tracing mmap pages",
+ record__parse_mmap_pages),
+ OPT_BOOLEAN(0, "group", &record.opts.group,
+ "put the counters into a counter group"),
+ OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
+ NULL, "enables call-graph recording" ,
+ &record_callchain_opt),
+ OPT_CALLBACK(0, "call-graph", &record.opts,
+ "record_mode[,record_size]", record_callchain_help,
+ &record_parse_callchain_opt),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show counter open errors, etc)"),
+ OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
+ OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
+ "per thread counts"),
+ OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
+ OPT_BOOLEAN(0, "phys-data", &record.opts.sample_phys_addr,
+ "Record the sample physical addresses"),
+ OPT_BOOLEAN(0, "sample-cpu", &record.opts.sample_cpu, "Record the sample cpu"),
+ OPT_BOOLEAN_SET('T', "timestamp", &record.opts.sample_time,
+ &record.opts.sample_time_set,
+ "Record the sample timestamps"),
+ OPT_BOOLEAN_SET('P', "period", &record.opts.period, &record.opts.period_set,
+ "Record the sample period"),
+ OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
+ "don't sample"),
+ OPT_BOOLEAN_SET('N', "no-buildid-cache", &record.no_buildid_cache,
+ &record.no_buildid_cache_set,
+ "do not update the buildid cache"),
+ OPT_BOOLEAN_SET('B', "no-buildid", &record.no_buildid,
+ &record.no_buildid_set,
+ "do not collect buildids in perf.data"),
+ OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
+ "monitor event in cgroup name only",
+ parse_cgroups),
+ OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
+ "ms to wait before starting measurement after program start"),
+ OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
+ "user to profile"),
+
+ OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
+ "branch any", "sample any taken branches",
+ parse_branch_stack),
+
+ OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
+ "branch filter mask", "branch stack filter modes",
+ parse_branch_stack),
+ OPT_BOOLEAN('W', "weight", &record.opts.sample_weight,
+ "sample by weight (on special events only)"),
+ OPT_BOOLEAN(0, "transaction", &record.opts.sample_transaction,
+ "sample transaction flags (special events only)"),
+ OPT_BOOLEAN(0, "per-thread", &record.opts.target.per_thread,
+ "use per-thread mmaps"),
+ OPT_CALLBACK_OPTARG('I', "intr-regs", &record.opts.sample_intr_regs, NULL, "any register",
+ "sample selected machine registers on interrupt,"
+ " use -I ? to list register names", parse_regs),
+ OPT_CALLBACK_OPTARG(0, "user-regs", &record.opts.sample_user_regs, NULL, "any register",
+ "sample selected machine registers on interrupt,"
+ " use -I ? to list register names", parse_regs),
+ OPT_BOOLEAN(0, "running-time", &record.opts.running_time,
+ "Record running/enabled time of read (:S) events"),
+ OPT_CALLBACK('k', "clockid", &record.opts,
+ "clockid", "clockid to use for events, see clock_gettime()",
+ parse_clockid),
+ OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
+ "opts", "AUX area tracing Snapshot Mode", ""),
+ OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
+ "per thread proc mmap processing timeout in ms"),
+ OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
+ "Record namespaces events"),
+ OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
+ "Record context switch events"),
+ OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
+ "Configure all used events to run in kernel space.",
+ PARSE_OPT_EXCLUSIVE),
+ OPT_BOOLEAN_FLAG(0, "all-user", &record.opts.all_user,
+ "Configure all used events to run in user space.",
+ PARSE_OPT_EXCLUSIVE),
+ OPT_STRING(0, "clang-path", &llvm_param.clang_path, "clang path",
+ "clang binary to use for compiling BPF scriptlets"),
+ OPT_STRING(0, "clang-opt", &llvm_param.clang_opt, "clang options",
+ "options passed to clang when compiling BPF scriptlets"),
+ OPT_STRING(0, "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
+ OPT_BOOLEAN(0, "buildid-all", &record.buildid_all,
+ "Record build-id of all DSOs regardless of hits"),
+ OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename,
+ "append timestamp to output filename"),
+ OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary,
+ "Record timestamp boundary (time of first/last samples)"),
+ OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str,
+ &record.switch_output.set, "signal,size,time",
+ "Switch output when receive SIGUSR2 or cross size,time threshold",
+ "signal"),
+ OPT_BOOLEAN(0, "dry-run", &dry_run,
+ "Parse options then exit"),
+ OPT_END()
+};
+
+struct option *record_options = __record_options;
+
+int cmd_record(int argc, const char **argv)
+{
+ int err;
+ struct record *rec = &record;
+ char errbuf[BUFSIZ];
+
+ setlocale(LC_ALL, "");
+
+#ifndef HAVE_LIBBPF_SUPPORT
+# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, "NO_LIBBPF=1", c)
+ set_nobuild('\0', "clang-path", true);
+ set_nobuild('\0', "clang-opt", true);
+# undef set_nobuild
+#endif
+
+#ifndef HAVE_BPF_PROLOGUE
+# if !defined (HAVE_DWARF_SUPPORT)
+# define REASON "NO_DWARF=1"
+# elif !defined (HAVE_LIBBPF_SUPPORT)
+# define REASON "NO_LIBBPF=1"
+# else
+# define REASON "this architecture doesn't support BPF prologue"
+# endif
+# define set_nobuild(s, l, c) set_option_nobuild(record_options, s, l, REASON, c)
+ set_nobuild('\0', "vmlinux", true);
+# undef set_nobuild
+# undef REASON
+#endif
+
+ rec->evlist = perf_evlist__new();
+ if (rec->evlist == NULL)
+ return -ENOMEM;
+
+ err = perf_config(perf_record_config, rec);
+ if (err)
+ return err;
+
+ argc = parse_options(argc, argv, record_options, record_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ if (quiet)
+ perf_quiet_option();
+
+ /* Make system wide (-a) the default target. */
+ if (!argc && target__none(&rec->opts.target))
+ rec->opts.target.system_wide = true;
+
+ if (nr_cgroups && !rec->opts.target.system_wide) {
+ usage_with_options_msg(record_usage, record_options,
+ "cgroup monitoring only available in system-wide mode");
+
+ }
+ if (rec->opts.record_switch_events &&
+ !perf_can_record_switch_events()) {
+ ui__error("kernel does not support recording context switch events\n");
+ parse_options_usage(record_usage, record_options, "switch-events", 0);
+ return -EINVAL;
+ }
+
+ if (switch_output_setup(rec)) {
+ parse_options_usage(record_usage, record_options, "switch-output", 0);
+ return -EINVAL;
+ }
+
+ if (rec->switch_output.time) {
+ signal(SIGALRM, alarm_sig_handler);
+ alarm(rec->switch_output.time);
+ }
+
+ /*
+ * Allow aliases to facilitate the lookup of symbols for address
+ * filters. Refer to auxtrace_parse_filters().
+ */
+ symbol_conf.allow_aliases = true;
+
+ symbol__init(NULL);
+
+ err = record__auxtrace_init(rec);
+ if (err)
+ goto out;
+
+ if (dry_run)
+ goto out;
+
+ err = bpf__setup_stdout(rec->evlist);
+ if (err) {
+ bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
+ pr_err("ERROR: Setup BPF stdout failed: %s\n",
+ errbuf);
+ goto out;
+ }
+
+ err = -ENOMEM;
+
+ if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist))
+ pr_warning(
+"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
+"check /proc/sys/kernel/kptr_restrict.\n\n"
+"Samples in kernel functions may not be resolved if a suitable vmlinux\n"
+"file is not found in the buildid cache or in the vmlinux path.\n\n"
+"Samples in kernel modules won't be resolved at all.\n\n"
+"If some relocation was applied (e.g. kexec) symbols may be misresolved\n"
+"even with a suitable vmlinux or kallsyms file.\n\n");
+
+ if (rec->no_buildid_cache || rec->no_buildid) {
+ disable_buildid_cache();
+ } else if (rec->switch_output.enabled) {
+ /*
+ * In 'perf record --switch-output', disable buildid
+ * generation by default to reduce data file switching
+ * overhead. Still generate buildid if they are required
+ * explicitly using
+ *
+ * perf record --switch-output --no-no-buildid \
+ * --no-no-buildid-cache
+ *
+ * Following code equals to:
+ *
+ * if ((rec->no_buildid || !rec->no_buildid_set) &&
+ * (rec->no_buildid_cache || !rec->no_buildid_cache_set))
+ * disable_buildid_cache();
+ */
+ bool disable = true;
+
+ if (rec->no_buildid_set && !rec->no_buildid)
+ disable = false;
+ if (rec->no_buildid_cache_set && !rec->no_buildid_cache)
+ disable = false;
+ if (disable) {
+ rec->no_buildid = true;
+ rec->no_buildid_cache = true;
+ disable_buildid_cache();
+ }
+ }
+
+ if (record.opts.overwrite)
+ record.opts.tail_synthesize = true;
+
+ if (rec->evlist->nr_entries == 0 &&
+ __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
+ pr_err("Not enough memory for event selector list\n");
+ goto out;
+ }
+
+ if (rec->opts.target.tid && !rec->opts.no_inherit_set)
+ rec->opts.no_inherit = true;
+
+ err = target__validate(&rec->opts.target);
+ if (err) {
+ target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
+ ui__warning("%s\n", errbuf);
+ }
+
+ err = target__parse_uid(&rec->opts.target);
+ if (err) {
+ int saved_errno = errno;
+
+ target__strerror(&rec->opts.target, err, errbuf, BUFSIZ);
+ ui__error("%s", errbuf);
+
+ err = -saved_errno;
+ goto out;
+ }
+
+ /* Enable ignoring missing threads when -u/-p option is defined. */
+ rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
+
+ err = -ENOMEM;
+ if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
+ usage_with_options(record_usage, record_options);
+
+ err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
+ if (err)
+ goto out;
+
+ /*
+ * We take all buildids when the file contains
+ * AUX area tracing data because we do not decode the
+ * trace because it would take too long.
+ */
+ if (rec->opts.full_auxtrace)
+ rec->buildid_all = true;
+
+ if (record_opts__config(&rec->opts)) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = __cmd_record(&record, argc, argv);
+out:
+ perf_evlist__delete(rec->evlist);
+ symbol__exit();
+ auxtrace_record__free(rec->itr);
+ return err;
+}
+
+static void snapshot_sig_handler(int sig __maybe_unused)
+{
+ struct record *rec = &record;
+
+ if (trigger_is_ready(&auxtrace_snapshot_trigger)) {
+ trigger_hit(&auxtrace_snapshot_trigger);
+ auxtrace_record__snapshot_started = 1;
+ if (auxtrace_record__snapshot_start(record.itr))
+ trigger_error(&auxtrace_snapshot_trigger);
+ }
+
+ if (switch_output_signal(rec))
+ trigger_hit(&switch_output_trigger);
+}
+
+static void alarm_sig_handler(int sig __maybe_unused)
+{
+ struct record *rec = &record;
+
+ if (switch_output_time(rec))
+ trigger_hit(&switch_output_trigger);
+}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
new file mode 100644
index 000000000..05eae94d0
--- /dev/null
+++ b/tools/perf/builtin-report.c
@@ -0,0 +1,1404 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-report.c
+ *
+ * Builtin report command: Analyze the perf.data input file,
+ * look up and read DSOs and symbol information and display
+ * a histogram of results, along various sorting keys.
+ */
+#include "builtin.h"
+
+#include "util/util.h"
+#include "util/config.h"
+
+#include "util/annotate.h"
+#include "util/color.h"
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/err.h>
+#include "util/symbol.h"
+#include "util/callchain.h"
+#include "util/values.h"
+
+#include "perf.h"
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/header.h"
+#include "util/session.h"
+#include "util/tool.h"
+
+#include <subcmd/parse-options.h>
+#include <subcmd/exec-cmd.h>
+#include "util/parse-events.h"
+
+#include "util/thread.h"
+#include "util/sort.h"
+#include "util/hist.h"
+#include "util/data.h"
+#include "arch/common.h"
+#include "util/time-utils.h"
+#include "util/auxtrace.h"
+#include "util/units.h"
+#include "util/branch.h"
+
+#include <dlfcn.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <regex.h>
+#include <signal.h>
+#include <linux/bitmap.h>
+#include <linux/stringify.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/mman.h>
+
+struct report {
+ struct perf_tool tool;
+ struct perf_session *session;
+ bool use_tui, use_gtk, use_stdio;
+ bool show_full_info;
+ bool show_threads;
+ bool inverted_callchain;
+ bool mem_mode;
+ bool stats_mode;
+ bool tasks_mode;
+ bool mmaps_mode;
+ bool header;
+ bool header_only;
+ bool nonany_branch_mode;
+ bool group_set;
+ int max_stack;
+ struct perf_read_values show_threads_values;
+ struct annotation_options annotation_opts;
+ const char *pretty_printing_style;
+ const char *cpu_list;
+ const char *symbol_filter_str;
+ const char *time_str;
+ struct perf_time_interval *ptime_range;
+ int range_size;
+ int range_num;
+ float min_percent;
+ u64 nr_entries;
+ u64 queue_size;
+ int socket_filter;
+ DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+ struct branch_type_stat brtype_stat;
+};
+
+static int report__config(const char *var, const char *value, void *cb)
+{
+ struct report *rep = cb;
+
+ if (!strcmp(var, "report.group")) {
+ symbol_conf.event_group = perf_config_bool(var, value);
+ return 0;
+ }
+ if (!strcmp(var, "report.percent-limit")) {
+ double pcnt = strtof(value, NULL);
+
+ rep->min_percent = pcnt;
+ callchain_param.min_percent = pcnt;
+ return 0;
+ }
+ if (!strcmp(var, "report.children")) {
+ symbol_conf.cumulate_callchain = perf_config_bool(var, value);
+ return 0;
+ }
+ if (!strcmp(var, "report.queue-size"))
+ return perf_config_u64(&rep->queue_size, var, value);
+
+ if (!strcmp(var, "report.sort_order")) {
+ default_sort_order = strdup(value);
+ return 0;
+ }
+
+ return 0;
+}
+
+static int hist_iter__report_callback(struct hist_entry_iter *iter,
+ struct addr_location *al, bool single,
+ void *arg)
+{
+ int err = 0;
+ struct report *rep = arg;
+ struct hist_entry *he = iter->he;
+ struct perf_evsel *evsel = iter->evsel;
+ struct perf_sample *sample = iter->sample;
+ struct mem_info *mi;
+ struct branch_info *bi;
+
+ if (!ui__has_annotation())
+ return 0;
+
+ hist__account_cycles(sample->branch_stack, al, sample,
+ rep->nonany_branch_mode);
+
+ if (sort__mode == SORT_MODE__BRANCH) {
+ bi = he->branch_info;
+ err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
+ if (err)
+ goto out;
+
+ err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
+
+ } else if (rep->mem_mode) {
+ mi = he->mem_info;
+ err = addr_map_symbol__inc_samples(&mi->daddr, sample, evsel);
+ if (err)
+ goto out;
+
+ err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
+
+ } else if (symbol_conf.cumulate_callchain) {
+ if (single)
+ err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
+ } else {
+ err = hist_entry__inc_addr_samples(he, sample, evsel, al->addr);
+ }
+
+out:
+ return err;
+}
+
+static int hist_iter__branch_callback(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused,
+ bool single __maybe_unused,
+ void *arg)
+{
+ struct hist_entry *he = iter->he;
+ struct report *rep = arg;
+ struct branch_info *bi;
+ struct perf_sample *sample = iter->sample;
+ struct perf_evsel *evsel = iter->evsel;
+ int err;
+
+ if (!ui__has_annotation())
+ return 0;
+
+ hist__account_cycles(sample->branch_stack, al, sample,
+ rep->nonany_branch_mode);
+
+ bi = he->branch_info;
+ err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
+ if (err)
+ goto out;
+
+ err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
+
+ branch_type_count(&rep->brtype_stat, &bi->flags,
+ bi->from.addr, bi->to.addr);
+
+out:
+ return err;
+}
+
+static void setup_forced_leader(struct report *report,
+ struct perf_evlist *evlist)
+{
+ if (report->group_set)
+ perf_evlist__force_leader(evlist);
+}
+
+static int process_feature_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct report *rep = container_of(tool, struct report, tool);
+
+ if (event->feat.feat_id < HEADER_LAST_FEATURE)
+ return perf_event__process_feature(tool, event, session);
+
+ if (event->feat.feat_id != HEADER_LAST_FEATURE) {
+ pr_err("failed: wrong feature ID: %" PRIu64 "\n",
+ event->feat.feat_id);
+ return -1;
+ }
+
+ /*
+ * (feat_id = HEADER_LAST_FEATURE) is the end marker which
+ * means all features are received, now we can force the
+ * group if needed.
+ */
+ setup_forced_leader(rep, session->evlist);
+ return 0;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct report *rep = container_of(tool, struct report, tool);
+ struct addr_location al;
+ struct hist_entry_iter iter = {
+ .evsel = evsel,
+ .sample = sample,
+ .hide_unresolved = symbol_conf.hide_unresolved,
+ .add_entry_cb = hist_iter__report_callback,
+ };
+ int ret = 0;
+
+ if (perf_time__ranges_skip_sample(rep->ptime_range, rep->range_num,
+ sample->time)) {
+ return 0;
+ }
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_debug("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ if (symbol_conf.hide_unresolved && al.sym == NULL)
+ goto out_put;
+
+ if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
+ goto out_put;
+
+ if (sort__mode == SORT_MODE__BRANCH) {
+ /*
+ * A non-synthesized event might not have a branch stack if
+ * branch stacks have been synthesized (using itrace options).
+ */
+ if (!sample->branch_stack)
+ goto out_put;
+
+ iter.add_entry_cb = hist_iter__branch_callback;
+ iter.ops = &hist_iter_branch;
+ } else if (rep->mem_mode) {
+ iter.ops = &hist_iter_mem;
+ } else if (symbol_conf.cumulate_callchain) {
+ iter.ops = &hist_iter_cumulative;
+ } else {
+ iter.ops = &hist_iter_normal;
+ }
+
+ if (al.map != NULL)
+ al.map->dso->hit = 1;
+
+ ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
+ if (ret < 0)
+ pr_debug("problem adding hist entry, skipping event\n");
+out_put:
+ addr_location__put(&al);
+ return ret;
+}
+
+static int process_read_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct perf_evsel *evsel,
+ struct machine *machine __maybe_unused)
+{
+ struct report *rep = container_of(tool, struct report, tool);
+
+ if (rep->show_threads) {
+ const char *name = evsel ? perf_evsel__name(evsel) : "unknown";
+ int err = perf_read_values_add_value(&rep->show_threads_values,
+ event->read.pid, event->read.tid,
+ evsel->idx,
+ name,
+ event->read.value);
+
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/* For pipe mode, sample_type is not currently set */
+static int report__setup_sample_type(struct report *rep)
+{
+ struct perf_session *session = rep->session;
+ u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+ bool is_pipe = perf_data__is_pipe(session->data);
+
+ if (session->itrace_synth_opts->callchain ||
+ (!is_pipe &&
+ perf_header__has_feat(&session->header, HEADER_AUXTRACE) &&
+ !session->itrace_synth_opts->set))
+ sample_type |= PERF_SAMPLE_CALLCHAIN;
+
+ if (session->itrace_synth_opts->last_branch)
+ sample_type |= PERF_SAMPLE_BRANCH_STACK;
+
+ if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+ if (perf_hpp_list.parent) {
+ ui__error("Selected --sort parent, but no "
+ "callchain data. Did you call "
+ "'perf record' without -g?\n");
+ return -EINVAL;
+ }
+ if (symbol_conf.use_callchain &&
+ !symbol_conf.show_branchflag_count) {
+ ui__error("Selected -g or --branch-history.\n"
+ "But no callchain or branch data.\n"
+ "Did you call 'perf record' without -g or -b?\n");
+ return -1;
+ }
+ } else if (!callchain_param.enabled &&
+ callchain_param.mode != CHAIN_NONE &&
+ !symbol_conf.use_callchain) {
+ symbol_conf.use_callchain = true;
+ if (callchain_register_param(&callchain_param) < 0) {
+ ui__error("Can't register callchain params.\n");
+ return -EINVAL;
+ }
+ }
+
+ if (symbol_conf.cumulate_callchain) {
+ /* Silently ignore if callchain is missing */
+ if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+ symbol_conf.cumulate_callchain = false;
+ perf_hpp__cancel_cumulate();
+ }
+ }
+
+ if (sort__mode == SORT_MODE__BRANCH) {
+ if (!is_pipe &&
+ !(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
+ ui__error("Selected -b but no branch data. "
+ "Did you call perf record without -b?\n");
+ return -1;
+ }
+ }
+
+ if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
+ if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+ (sample_type & PERF_SAMPLE_STACK_USER)) {
+ callchain_param.record_mode = CALLCHAIN_DWARF;
+ dwarf_callchain_users = true;
+ } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ callchain_param.record_mode = CALLCHAIN_LBR;
+ else
+ callchain_param.record_mode = CALLCHAIN_FP;
+ }
+
+ /* ??? handle more cases than just ANY? */
+ if (!(perf_evlist__combined_branch_type(session->evlist) &
+ PERF_SAMPLE_BRANCH_ANY))
+ rep->nonany_branch_mode = true;
+
+#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT)
+ if (dwarf_callchain_users) {
+ ui__warning("Please install libunwind or libdw "
+ "development packages during the perf build.\n");
+ }
+#endif
+
+ return 0;
+}
+
+static void sig_handler(int sig __maybe_unused)
+{
+ session_done = 1;
+}
+
+static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report *rep,
+ const char *evname, FILE *fp)
+{
+ size_t ret;
+ char unit;
+ unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
+ u64 nr_events = hists->stats.total_period;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ char buf[512];
+ size_t size = sizeof(buf);
+ int socked_id = hists->socket_filter;
+
+ if (quiet)
+ return 0;
+
+ if (symbol_conf.filter_relative) {
+ nr_samples = hists->stats.nr_non_filtered_samples;
+ nr_events = hists->stats.total_non_filtered_period;
+ }
+
+ if (perf_evsel__is_group_event(evsel)) {
+ struct perf_evsel *pos;
+
+ perf_evsel__group_desc(evsel, buf, size);
+ evname = buf;
+
+ for_each_group_member(pos, evsel) {
+ const struct hists *pos_hists = evsel__hists(pos);
+
+ if (symbol_conf.filter_relative) {
+ nr_samples += pos_hists->stats.nr_non_filtered_samples;
+ nr_events += pos_hists->stats.total_non_filtered_period;
+ } else {
+ nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE];
+ nr_events += pos_hists->stats.total_period;
+ }
+ }
+ }
+
+ nr_samples = convert_unit(nr_samples, &unit);
+ ret = fprintf(fp, "# Samples: %lu%c", nr_samples, unit);
+ if (evname != NULL) {
+ ret += fprintf(fp, " of event%s '%s'",
+ evsel->nr_members > 1 ? "s" : "", evname);
+ }
+
+ if (rep->time_str)
+ ret += fprintf(fp, " (time slices: %s)", rep->time_str);
+
+ if (symbol_conf.show_ref_callgraph && evname && strstr(evname, "call-graph=no")) {
+ ret += fprintf(fp, ", show reference callgraph");
+ }
+
+ if (rep->mem_mode) {
+ ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events);
+ ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order);
+ } else
+ ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events);
+
+ if (socked_id > -1)
+ ret += fprintf(fp, "\n# Processor Socket: %d", socked_id);
+
+ return ret + fprintf(fp, "\n#\n");
+}
+
+static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
+ struct report *rep,
+ const char *help)
+{
+ struct perf_evsel *pos;
+
+ if (!quiet) {
+ fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n",
+ evlist->stats.total_lost_samples);
+ }
+
+ evlist__for_each_entry(evlist, pos) {
+ struct hists *hists = evsel__hists(pos);
+ const char *evname = perf_evsel__name(pos);
+
+ if (symbol_conf.event_group &&
+ !perf_evsel__is_group_leader(pos))
+ continue;
+
+ hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
+ hists__fprintf(hists, !quiet, 0, 0, rep->min_percent, stdout,
+ !(symbol_conf.use_callchain ||
+ symbol_conf.show_branchflag_count));
+ fprintf(stdout, "\n\n");
+ }
+
+ if (!quiet)
+ fprintf(stdout, "#\n# (%s)\n#\n", help);
+
+ if (rep->show_threads) {
+ bool style = !strcmp(rep->pretty_printing_style, "raw");
+ perf_read_values_display(stdout, &rep->show_threads_values,
+ style);
+ perf_read_values_destroy(&rep->show_threads_values);
+ }
+
+ if (sort__mode == SORT_MODE__BRANCH)
+ branch_type_stat_display(stdout, &rep->brtype_stat);
+
+ return 0;
+}
+
+static void report__warn_kptr_restrict(const struct report *rep)
+{
+ struct map *kernel_map = machine__kernel_map(&rep->session->machines.host);
+ struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL;
+
+ if (perf_evlist__exclude_kernel(rep->session->evlist))
+ return;
+
+ if (kernel_map == NULL ||
+ (kernel_map->dso->hit &&
+ (kernel_kmap->ref_reloc_sym == NULL ||
+ kernel_kmap->ref_reloc_sym->addr == 0))) {
+ const char *desc =
+ "As no suitable kallsyms nor vmlinux was found, kernel samples\n"
+ "can't be resolved.";
+
+ if (kernel_map && map__has_symbols(kernel_map)) {
+ desc = "If some relocation was applied (e.g. "
+ "kexec) symbols may be misresolved.";
+ }
+
+ ui__warning(
+"Kernel address maps (/proc/{kallsyms,modules}) were restricted.\n\n"
+"Check /proc/sys/kernel/kptr_restrict before running 'perf record'.\n\n%s\n\n"
+"Samples in kernel modules can't be resolved as well.\n\n",
+ desc);
+ }
+}
+
+static int report__gtk_browse_hists(struct report *rep, const char *help)
+{
+ int (*hist_browser)(struct perf_evlist *evlist, const char *help,
+ struct hist_browser_timer *timer, float min_pcnt);
+
+ hist_browser = dlsym(perf_gtk_handle, "perf_evlist__gtk_browse_hists");
+
+ if (hist_browser == NULL) {
+ ui__error("GTK browser not found!\n");
+ return -1;
+ }
+
+ return hist_browser(rep->session->evlist, help, NULL, rep->min_percent);
+}
+
+static int report__browse_hists(struct report *rep)
+{
+ int ret;
+ struct perf_session *session = rep->session;
+ struct perf_evlist *evlist = session->evlist;
+ const char *help = perf_tip(system_path(TIPDIR));
+
+ if (help == NULL) {
+ /* fallback for people who don't install perf ;-) */
+ help = perf_tip(DOCDIR);
+ if (help == NULL)
+ help = "Cannot load tips.txt file, please install perf!";
+ }
+
+ switch (use_browser) {
+ case 1:
+ ret = perf_evlist__tui_browse_hists(evlist, help, NULL,
+ rep->min_percent,
+ &session->header.env,
+ true, &rep->annotation_opts);
+ /*
+ * Usually "ret" is the last pressed key, and we only
+ * care if the key notifies us to switch data file.
+ */
+ if (ret != K_SWITCH_INPUT_DATA)
+ ret = 0;
+ break;
+ case 2:
+ ret = report__gtk_browse_hists(rep, help);
+ break;
+ default:
+ ret = perf_evlist__tty_browse_hists(evlist, rep, help);
+ break;
+ }
+
+ return ret;
+}
+
+static int report__collapse_hists(struct report *rep)
+{
+ struct ui_progress prog;
+ struct perf_evsel *pos;
+ int ret = 0;
+
+ ui_progress__init(&prog, rep->nr_entries, "Merging related events...");
+
+ evlist__for_each_entry(rep->session->evlist, pos) {
+ struct hists *hists = evsel__hists(pos);
+
+ if (pos->idx == 0)
+ hists->symbol_filter_str = rep->symbol_filter_str;
+
+ hists->socket_filter = rep->socket_filter;
+
+ ret = hists__collapse_resort(hists, &prog);
+ if (ret < 0)
+ break;
+
+ /* Non-group events are considered as leader */
+ if (symbol_conf.event_group &&
+ !perf_evsel__is_group_leader(pos)) {
+ struct hists *leader_hists = evsel__hists(pos->leader);
+
+ hists__match(leader_hists, hists);
+ hists__link(leader_hists, hists);
+ }
+ }
+
+ ui_progress__finish();
+ return ret;
+}
+
+static void report__output_resort(struct report *rep)
+{
+ struct ui_progress prog;
+ struct perf_evsel *pos;
+
+ ui_progress__init(&prog, rep->nr_entries, "Sorting events for output...");
+
+ evlist__for_each_entry(rep->session->evlist, pos)
+ perf_evsel__output_resort(pos, &prog);
+
+ ui_progress__finish();
+}
+
+static void stats_setup(struct report *rep)
+{
+ memset(&rep->tool, 0, sizeof(rep->tool));
+ rep->tool.no_warn = true;
+}
+
+static int stats_print(struct report *rep)
+{
+ struct perf_session *session = rep->session;
+
+ perf_session__fprintf_nr_events(session, stdout);
+ return 0;
+}
+
+static void tasks_setup(struct report *rep)
+{
+ memset(&rep->tool, 0, sizeof(rep->tool));
+ rep->tool.ordered_events = true;
+ if (rep->mmaps_mode) {
+ rep->tool.mmap = perf_event__process_mmap;
+ rep->tool.mmap2 = perf_event__process_mmap2;
+ }
+ rep->tool.comm = perf_event__process_comm;
+ rep->tool.exit = perf_event__process_exit;
+ rep->tool.fork = perf_event__process_fork;
+ rep->tool.no_warn = true;
+}
+
+struct task {
+ struct thread *thread;
+ struct list_head list;
+ struct list_head children;
+};
+
+static struct task *tasks_list(struct task *task, struct machine *machine)
+{
+ struct thread *parent_thread, *thread = task->thread;
+ struct task *parent_task;
+
+ /* Already listed. */
+ if (!list_empty(&task->list))
+ return NULL;
+
+ /* Last one in the chain. */
+ if (thread->ppid == -1)
+ return task;
+
+ parent_thread = machine__find_thread(machine, -1, thread->ppid);
+ if (!parent_thread)
+ return ERR_PTR(-ENOENT);
+
+ parent_task = thread__priv(parent_thread);
+ list_add_tail(&task->list, &parent_task->children);
+ return tasks_list(parent_task, machine);
+}
+
+static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp)
+{
+ size_t printed = 0;
+ struct rb_node *nd;
+
+ for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
+ struct map *map = rb_entry(nd, struct map, rb_node);
+
+ printed += fprintf(fp, "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n",
+ indent, "", map->start, map->end,
+ map->prot & PROT_READ ? 'r' : '-',
+ map->prot & PROT_WRITE ? 'w' : '-',
+ map->prot & PROT_EXEC ? 'x' : '-',
+ map->flags & MAP_SHARED ? 's' : 'p',
+ map->pgoff,
+ map->ino, map->dso->name);
+ }
+
+ return printed;
+}
+
+static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp)
+{
+ return maps__fprintf_task(&mg->maps, indent, fp);
+}
+
+static void task__print_level(struct task *task, FILE *fp, int level)
+{
+ struct thread *thread = task->thread;
+ struct task *child;
+ int comm_indent = fprintf(fp, " %8d %8d %8d |%*s",
+ thread->pid_, thread->tid, thread->ppid,
+ level, "");
+
+ fprintf(fp, "%s\n", thread__comm_str(thread));
+
+ map_groups__fprintf_task(thread->mg, comm_indent, fp);
+
+ if (!list_empty(&task->children)) {
+ list_for_each_entry(child, &task->children, list)
+ task__print_level(child, fp, level + 1);
+ }
+}
+
+static int tasks_print(struct report *rep, FILE *fp)
+{
+ struct perf_session *session = rep->session;
+ struct machine *machine = &session->machines.host;
+ struct task *tasks, *task;
+ unsigned int nr = 0, itask = 0, i;
+ struct rb_node *nd;
+ LIST_HEAD(list);
+
+ /*
+ * No locking needed while accessing machine->threads,
+ * because --tasks is single threaded command.
+ */
+
+ /* Count all the threads. */
+ for (i = 0; i < THREADS__TABLE_SIZE; i++)
+ nr += machine->threads[i].nr;
+
+ tasks = malloc(sizeof(*tasks) * nr);
+ if (!tasks)
+ return -ENOMEM;
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+
+ for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+ task = tasks + itask++;
+
+ task->thread = rb_entry(nd, struct thread, rb_node);
+ INIT_LIST_HEAD(&task->children);
+ INIT_LIST_HEAD(&task->list);
+ thread__set_priv(task->thread, task);
+ }
+ }
+
+ /*
+ * Iterate every task down to the unprocessed parent
+ * and link all in task children list. Task with no
+ * parent is added into 'list'.
+ */
+ for (itask = 0; itask < nr; itask++) {
+ task = tasks + itask;
+
+ if (!list_empty(&task->list))
+ continue;
+
+ task = tasks_list(task, machine);
+ if (IS_ERR(task)) {
+ pr_err("Error: failed to process tasks\n");
+ free(tasks);
+ return PTR_ERR(task);
+ }
+
+ if (task)
+ list_add_tail(&task->list, &list);
+ }
+
+ fprintf(fp, "# %8s %8s %8s %s\n", "pid", "tid", "ppid", "comm");
+
+ list_for_each_entry(task, &list, list)
+ task__print_level(task, fp, 0);
+
+ free(tasks);
+ return 0;
+}
+
+static int __cmd_report(struct report *rep)
+{
+ int ret;
+ struct perf_session *session = rep->session;
+ struct perf_evsel *pos;
+ struct perf_data *data = session->data;
+
+ signal(SIGINT, sig_handler);
+
+ if (rep->cpu_list) {
+ ret = perf_session__cpu_bitmap(session, rep->cpu_list,
+ rep->cpu_bitmap);
+ if (ret) {
+ ui__error("failed to set cpu bitmap\n");
+ return ret;
+ }
+ session->itrace_synth_opts->cpu_bitmap = rep->cpu_bitmap;
+ }
+
+ if (rep->show_threads) {
+ ret = perf_read_values_init(&rep->show_threads_values);
+ if (ret)
+ return ret;
+ }
+
+ ret = report__setup_sample_type(rep);
+ if (ret) {
+ /* report__setup_sample_type() already showed error message */
+ return ret;
+ }
+
+ if (rep->stats_mode)
+ stats_setup(rep);
+
+ if (rep->tasks_mode)
+ tasks_setup(rep);
+
+ ret = perf_session__process_events(session);
+ if (ret) {
+ ui__error("failed to process sample\n");
+ return ret;
+ }
+
+ if (rep->stats_mode)
+ return stats_print(rep);
+
+ if (rep->tasks_mode)
+ return tasks_print(rep, stdout);
+
+ report__warn_kptr_restrict(rep);
+
+ evlist__for_each_entry(session->evlist, pos)
+ rep->nr_entries += evsel__hists(pos)->nr_entries;
+
+ if (use_browser == 0) {
+ if (verbose > 3)
+ perf_session__fprintf(session, stdout);
+
+ if (verbose > 2)
+ perf_session__fprintf_dsos(session, stdout);
+
+ if (dump_trace) {
+ perf_session__fprintf_nr_events(session, stdout);
+ perf_evlist__fprintf_nr_events(session->evlist, stdout);
+ return 0;
+ }
+ }
+
+ ret = report__collapse_hists(rep);
+ if (ret) {
+ ui__error("failed to process hist entry\n");
+ return ret;
+ }
+
+ if (session_done())
+ return 0;
+
+ /*
+ * recalculate number of entries after collapsing since it
+ * might be changed during the collapse phase.
+ */
+ rep->nr_entries = 0;
+ evlist__for_each_entry(session->evlist, pos)
+ rep->nr_entries += evsel__hists(pos)->nr_entries;
+
+ if (rep->nr_entries == 0) {
+ ui__error("The %s file has no samples!\n", data->file.path);
+ return 0;
+ }
+
+ report__output_resort(rep);
+
+ return report__browse_hists(rep);
+}
+
+static int
+report_parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+{
+ struct callchain_param *callchain = opt->value;
+
+ callchain->enabled = !unset;
+ /*
+ * --no-call-graph
+ */
+ if (unset) {
+ symbol_conf.use_callchain = false;
+ callchain->mode = CHAIN_NONE;
+ return 0;
+ }
+
+ return parse_callchain_report_opt(arg);
+}
+
+int
+report_parse_ignore_callees_opt(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ if (arg) {
+ int err = regcomp(&ignore_callees_regex, arg, REG_EXTENDED);
+ if (err) {
+ char buf[BUFSIZ];
+ regerror(err, &ignore_callees_regex, buf, sizeof(buf));
+ pr_err("Invalid --ignore-callees regex: %s\n%s", arg, buf);
+ return -1;
+ }
+ have_ignore_callees = 1;
+ }
+
+ return 0;
+}
+
+static int
+parse_branch_mode(const struct option *opt,
+ const char *str __maybe_unused, int unset)
+{
+ int *branch_mode = opt->value;
+
+ *branch_mode = !unset;
+ return 0;
+}
+
+static int
+parse_percent_limit(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct report *rep = opt->value;
+ double pcnt = strtof(str, NULL);
+
+ rep->min_percent = pcnt;
+ callchain_param.min_percent = pcnt;
+ return 0;
+}
+
+int cmd_report(int argc, const char **argv)
+{
+ struct perf_session *session;
+ struct itrace_synth_opts itrace_synth_opts = { .set = 0, };
+ struct stat st;
+ bool has_br_stack = false;
+ int branch_mode = -1;
+ int last_key = 0;
+ bool branch_call_mode = false;
+#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent"
+ const char report_callchain_help[] = "Display call graph (stack chain/backtrace):\n\n"
+ CALLCHAIN_REPORT_HELP
+ "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT;
+ char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT;
+ const char * const report_usage[] = {
+ "perf report [<options>]",
+ NULL
+ };
+ struct report report = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
+ .lost = perf_event__process_lost,
+ .read = process_read_event,
+ .attr = perf_event__process_attr,
+ .tracing_data = perf_event__process_tracing_data,
+ .build_id = perf_event__process_build_id,
+ .id_index = perf_event__process_id_index,
+ .auxtrace_info = perf_event__process_auxtrace_info,
+ .auxtrace = perf_event__process_auxtrace,
+ .event_update = perf_event__process_event_update,
+ .feature = process_feature_event,
+ .ordered_events = true,
+ .ordering_requires_timestamps = true,
+ },
+ .max_stack = PERF_MAX_STACK_DEPTH,
+ .pretty_printing_style = "normal",
+ .socket_filter = -1,
+ .annotation_opts = annotation__default_options,
+ };
+ const struct option options[] = {
+ OPT_STRING('i', "input", &input_name, "file",
+ "input file name"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show symbol address, etc)"),
+ OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
+ OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+ "dump raw trace in ASCII"),
+ OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"),
+ OPT_BOOLEAN(0, "tasks", &report.tasks_mode, "Display recorded tasks"),
+ OPT_BOOLEAN(0, "mmaps", &report.mmaps_mode, "Display recorded tasks memory maps"),
+ OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
+ OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
+ "don't load vmlinux even if found"),
+ OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+ "file", "kallsyms pathname"),
+ OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
+ OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
+ "load module symbols - WARNING: use only with -k and LIVE kernel"),
+ OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
+ "Show a column with the number of samples"),
+ OPT_BOOLEAN('T', "threads", &report.show_threads,
+ "Show per-thread event counters"),
+ OPT_STRING(0, "pretty", &report.pretty_printing_style, "key",
+ "pretty printing style key: normal raw"),
+ OPT_BOOLEAN(0, "tui", &report.use_tui, "Use the TUI interface"),
+ OPT_BOOLEAN(0, "gtk", &report.use_gtk, "Use the GTK2 interface"),
+ OPT_BOOLEAN(0, "stdio", &report.use_stdio,
+ "Use the stdio interface"),
+ OPT_BOOLEAN(0, "header", &report.header, "Show data header."),
+ OPT_BOOLEAN(0, "header-only", &report.header_only,
+ "Show only data header."),
+ OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
+ "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+ " Please refer the man page for the complete list."),
+ OPT_STRING('F', "fields", &field_order, "key[,keys...]",
+ "output field(s): overhead, period, sample plus all of sort keys"),
+ OPT_BOOLEAN(0, "show-cpu-utilization", &symbol_conf.show_cpu_utilization,
+ "Show sample percentage for different cpu modes"),
+ OPT_BOOLEAN_FLAG(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
+ "Show sample percentage for different cpu modes", PARSE_OPT_HIDDEN),
+ OPT_STRING('p', "parent", &parent_pattern, "regex",
+ "regex filter to identify parent, see: '--sort parent'"),
+ OPT_BOOLEAN('x', "exclude-other", &symbol_conf.exclude_other,
+ "Only display entries with parent-match"),
+ OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param,
+ "print_type,threshold[,print_limit],order,sort_key[,branch],value",
+ report_callchain_help, &report_parse_callchain_opt,
+ callchain_default_opt),
+ OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
+ "Accumulate callchains of children and show total overhead as well"),
+ OPT_INTEGER(0, "max-stack", &report.max_stack,
+ "Set the maximum stack depth when parsing the callchain, "
+ "anything beyond the specified depth will be ignored. "
+ "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
+ OPT_BOOLEAN('G', "inverted", &report.inverted_callchain,
+ "alias for inverted call graph"),
+ OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+ "ignore callees of these functions in call graphs",
+ report_parse_ignore_callees_opt),
+ OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
+ "only consider symbols in these dsos"),
+ OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
+ "only consider symbols in these comms"),
+ OPT_STRING(0, "pid", &symbol_conf.pid_list_str, "pid[,pid...]",
+ "only consider symbols in these pids"),
+ OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
+ "only consider symbols in these tids"),
+ OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
+ "only consider these symbols"),
+ OPT_STRING(0, "symbol-filter", &report.symbol_filter_str, "filter",
+ "only show symbols that (partially) match with this filter"),
+ OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
+ "width[,width...]",
+ "don't try to adjust column width, use these fixed values"),
+ OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator",
+ "separator for columns, no spaces will be added between "
+ "columns '.' is reserved."),
+ OPT_BOOLEAN('U', "hide-unresolved", &symbol_conf.hide_unresolved,
+ "Only display entries resolved to a symbol"),
+ OPT_CALLBACK(0, "symfs", NULL, "directory",
+ "Look for files with symbols relative to this directory",
+ symbol__config_symfs),
+ OPT_STRING('C', "cpu", &report.cpu_list, "cpu",
+ "list of cpus to profile"),
+ OPT_BOOLEAN('I', "show-info", &report.show_full_info,
+ "Display extended information about perf.data file"),
+ OPT_BOOLEAN(0, "source", &report.annotation_opts.annotate_src,
+ "Interleave source code with assembly code (default)"),
+ OPT_BOOLEAN(0, "asm-raw", &report.annotation_opts.show_asm_raw,
+ "Display raw encoding of assembly instructions (default)"),
+ OPT_STRING('M', "disassembler-style", &report.annotation_opts.disassembler_style, "disassembler style",
+ "Specify disassembler style (e.g. -M intel for intel syntax)"),
+ OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
+ "Show a column with the sum of periods"),
+ OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set,
+ "Show event group information together"),
+ OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
+ "use branch records for per branch histogram filling",
+ parse_branch_mode),
+ OPT_BOOLEAN(0, "branch-history", &branch_call_mode,
+ "add last branch records to call history"),
+ OPT_STRING(0, "objdump", &report.annotation_opts.objdump_path, "path",
+ "objdump binary to use for disassembly and annotations"),
+ OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
+ "Disable symbol demangling"),
+ OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
+ "Enable kernel symbol demangling"),
+ OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
+ OPT_CALLBACK(0, "percent-limit", &report, "percent",
+ "Don't show entries under that percent", parse_percent_limit),
+ OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
+ "how to display percentage of filtered entries", parse_filter_percentage),
+ OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
+ "Instruction Tracing options",
+ itrace_parse_synth_opts),
+ OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
+ "Show full source file name path for source lines"),
+ OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph,
+ "Show callgraph from reference event"),
+ OPT_INTEGER(0, "socket-filter", &report.socket_filter,
+ "only show processor socket that match with this filter"),
+ OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
+ "Show raw trace event output (do not use print fmt or plugins)"),
+ OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+ "Show entries in a hierarchy"),
+ OPT_CALLBACK_DEFAULT(0, "stdio-color", NULL, "mode",
+ "'always' (default), 'never' or 'auto' only applicable to --stdio mode",
+ stdio__config_color, "always"),
+ OPT_STRING(0, "time", &report.time_str, "str",
+ "Time span of interest (start,stop)"),
+ OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name,
+ "Show inline function"),
+ OPT_CALLBACK(0, "percent-type", &report.annotation_opts, "local-period",
+ "Set percent type local/global-period/hits",
+ annotate_parse_percent_type),
+ OPT_END()
+ };
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_READ,
+ };
+ int ret = hists__init();
+
+ if (ret < 0)
+ return ret;
+
+ ret = perf_config(report__config, &report);
+ if (ret)
+ return ret;
+
+ argc = parse_options(argc, argv, options, report_usage, 0);
+ if (argc) {
+ /*
+ * Special case: if there's an argument left then assume that
+ * it's a symbol filter:
+ */
+ if (argc > 1)
+ usage_with_options(report_usage, options);
+
+ report.symbol_filter_str = argv[0];
+ }
+
+ if (report.mmaps_mode)
+ report.tasks_mode = true;
+
+ if (quiet)
+ perf_quiet_option();
+
+ if (symbol_conf.vmlinux_name &&
+ access(symbol_conf.vmlinux_name, R_OK)) {
+ pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name);
+ return -EINVAL;
+ }
+ if (symbol_conf.kallsyms_name &&
+ access(symbol_conf.kallsyms_name, R_OK)) {
+ pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name);
+ return -EINVAL;
+ }
+
+ if (report.inverted_callchain)
+ callchain_param.order = ORDER_CALLER;
+ if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
+ callchain_param.order = ORDER_CALLER;
+
+ if (itrace_synth_opts.callchain &&
+ (int)itrace_synth_opts.callchain_sz > report.max_stack)
+ report.max_stack = itrace_synth_opts.callchain_sz;
+
+ if (!input_name || !strlen(input_name)) {
+ if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
+ input_name = "-";
+ else
+ input_name = "perf.data";
+ }
+
+ data.file.path = input_name;
+ data.force = symbol_conf.force;
+
+repeat:
+ session = perf_session__new(&data, false, &report.tool);
+ if (session == NULL)
+ return -1;
+
+ if (report.queue_size) {
+ ordered_events__set_alloc_size(&session->ordered_events,
+ report.queue_size);
+ }
+
+ session->itrace_synth_opts = &itrace_synth_opts;
+
+ report.session = session;
+
+ has_br_stack = perf_header__has_feat(&session->header,
+ HEADER_BRANCH_STACK);
+
+ setup_forced_leader(&report, session->evlist);
+
+ if (itrace_synth_opts.last_branch)
+ has_br_stack = true;
+
+ if (has_br_stack && branch_call_mode)
+ symbol_conf.show_branchflag_count = true;
+
+ memset(&report.brtype_stat, 0, sizeof(struct branch_type_stat));
+
+ /*
+ * Branch mode is a tristate:
+ * -1 means default, so decide based on the file having branch data.
+ * 0/1 means the user chose a mode.
+ */
+ if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) &&
+ !branch_call_mode) {
+ sort__mode = SORT_MODE__BRANCH;
+ symbol_conf.cumulate_callchain = false;
+ }
+ if (branch_call_mode) {
+ callchain_param.key = CCKEY_ADDRESS;
+ callchain_param.branch_callstack = 1;
+ symbol_conf.use_callchain = true;
+ callchain_register_param(&callchain_param);
+ if (sort_order == NULL)
+ sort_order = "srcline,symbol,dso";
+ }
+
+ if (report.mem_mode) {
+ if (sort__mode == SORT_MODE__BRANCH) {
+ pr_err("branch and mem mode incompatible\n");
+ goto error;
+ }
+ sort__mode = SORT_MODE__MEMORY;
+ symbol_conf.cumulate_callchain = false;
+ }
+
+ if (symbol_conf.report_hierarchy) {
+ /* disable incompatible options */
+ symbol_conf.cumulate_callchain = false;
+
+ if (field_order) {
+ pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+ parse_options_usage(report_usage, options, "F", 1);
+ parse_options_usage(NULL, options, "hierarchy", 0);
+ goto error;
+ }
+
+ perf_hpp_list.need_collapse = true;
+ }
+
+ if (report.use_stdio)
+ use_browser = 0;
+ else if (report.use_tui)
+ use_browser = 1;
+ else if (report.use_gtk)
+ use_browser = 2;
+
+ /* Force tty output for header output and per-thread stat. */
+ if (report.header || report.header_only || report.show_threads)
+ use_browser = 0;
+ if (report.header || report.header_only)
+ report.tool.show_feat_hdr = SHOW_FEAT_HEADER;
+ if (report.show_full_info)
+ report.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO;
+ if (report.stats_mode || report.tasks_mode)
+ use_browser = 0;
+ if (report.stats_mode && report.tasks_mode) {
+ pr_err("Error: --tasks and --mmaps can't be used together with --stats\n");
+ goto error;
+ }
+
+ if (strcmp(input_name, "-") != 0)
+ setup_browser(true);
+ else
+ use_browser = 0;
+
+ if ((last_key != K_SWITCH_INPUT_DATA) &&
+ (setup_sorting(session->evlist) < 0)) {
+ if (sort_order)
+ parse_options_usage(report_usage, options, "s", 1);
+ if (field_order)
+ parse_options_usage(sort_order ? NULL : report_usage,
+ options, "F", 1);
+ goto error;
+ }
+
+ if ((report.header || report.header_only) && !quiet) {
+ perf_session__fprintf_info(session, stdout,
+ report.show_full_info);
+ if (report.header_only) {
+ ret = 0;
+ goto error;
+ }
+ } else if (use_browser == 0 && !quiet &&
+ !report.stats_mode && !report.tasks_mode) {
+ fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n",
+ stdout);
+ }
+
+ /*
+ * Only in the TUI browser we are doing integrated annotation,
+ * so don't allocate extra space that won't be used in the stdio
+ * implementation.
+ */
+ if (ui__has_annotation()) {
+ ret = symbol__annotation_init();
+ if (ret < 0)
+ goto error;
+ /*
+ * For searching by name on the "Browse map details".
+ * providing it only in verbose mode not to bloat too
+ * much struct symbol.
+ */
+ if (verbose > 0) {
+ /*
+ * XXX: Need to provide a less kludgy way to ask for
+ * more space per symbol, the u32 is for the index on
+ * the ui browser.
+ * See symbol__browser_index.
+ */
+ symbol_conf.priv_size += sizeof(u32);
+ symbol_conf.sort_by_name = true;
+ }
+ annotation_config__init();
+ }
+
+ if (symbol__init(&session->header.env) < 0)
+ goto error;
+
+ report.ptime_range = perf_time__range_alloc(report.time_str,
+ &report.range_size);
+ if (!report.ptime_range) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) {
+ if (session->evlist->first_sample_time == 0 &&
+ session->evlist->last_sample_time == 0) {
+ pr_err("HINT: no first/last sample time found in perf data.\n"
+ "Please use latest perf binary to execute 'perf record'\n"
+ "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
+ ret = -EINVAL;
+ goto error;
+ }
+
+ report.range_num = perf_time__percent_parse_str(
+ report.ptime_range, report.range_size,
+ report.time_str,
+ session->evlist->first_sample_time,
+ session->evlist->last_sample_time);
+
+ if (report.range_num < 0) {
+ pr_err("Invalid time string\n");
+ ret = -EINVAL;
+ goto error;
+ }
+ } else {
+ report.range_num = 1;
+ }
+
+ if (session->tevent.pevent &&
+ tep_set_function_resolver(session->tevent.pevent,
+ machine__resolve_kernel_addr,
+ &session->machines.host) < 0) {
+ pr_err("%s: failed to set libtraceevent function resolver\n",
+ __func__);
+ return -1;
+ }
+
+ sort__setup_elide(stdout);
+
+ ret = __cmd_report(&report);
+ if (ret == K_SWITCH_INPUT_DATA) {
+ perf_session__delete(session);
+ last_key = K_SWITCH_INPUT_DATA;
+ goto repeat;
+ } else
+ ret = 0;
+
+error:
+ zfree(&report.ptime_range);
+
+ perf_session__delete(session);
+ return ret;
+}
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
new file mode 100644
index 000000000..cbf39dab1
--- /dev/null
+++ b/tools/perf/builtin-sched.c
@@ -0,0 +1,3532 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/util.h"
+#include "util/evlist.h"
+#include "util/cache.h"
+#include "util/evsel.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/header.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/cloexec.h"
+#include "util/thread_map.h"
+#include "util/color.h"
+#include "util/stat.h"
+#include "util/callchain.h"
+#include "util/time-utils.h"
+
+#include <subcmd/parse-options.h>
+#include "util/trace-event.h"
+
+#include "util/debug.h"
+
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <inttypes.h>
+
+#include <errno.h>
+#include <semaphore.h>
+#include <pthread.h>
+#include <math.h>
+#include <api/fs/fs.h>
+#include <linux/time64.h>
+
+#include "sane_ctype.h"
+
+#define PR_SET_NAME 15 /* Set process name */
+#define MAX_CPUS 4096
+#define COMM_LEN 20
+#define SYM_LEN 129
+#define MAX_PID 1024000
+
+struct sched_atom;
+
+struct task_desc {
+ unsigned long nr;
+ unsigned long pid;
+ char comm[COMM_LEN];
+
+ unsigned long nr_events;
+ unsigned long curr_event;
+ struct sched_atom **atoms;
+
+ pthread_t thread;
+ sem_t sleep_sem;
+
+ sem_t ready_for_work;
+ sem_t work_done_sem;
+
+ u64 cpu_usage;
+};
+
+enum sched_event_type {
+ SCHED_EVENT_RUN,
+ SCHED_EVENT_SLEEP,
+ SCHED_EVENT_WAKEUP,
+ SCHED_EVENT_MIGRATION,
+};
+
+struct sched_atom {
+ enum sched_event_type type;
+ int specific_wait;
+ u64 timestamp;
+ u64 duration;
+ unsigned long nr;
+ sem_t *wait_sem;
+ struct task_desc *wakee;
+};
+
+#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
+
+/* task state bitmask, copied from include/linux/sched.h */
+#define TASK_RUNNING 0
+#define TASK_INTERRUPTIBLE 1
+#define TASK_UNINTERRUPTIBLE 2
+#define __TASK_STOPPED 4
+#define __TASK_TRACED 8
+/* in tsk->exit_state */
+#define EXIT_DEAD 16
+#define EXIT_ZOMBIE 32
+#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
+/* in tsk->state again */
+#define TASK_DEAD 64
+#define TASK_WAKEKILL 128
+#define TASK_WAKING 256
+#define TASK_PARKED 512
+
+enum thread_state {
+ THREAD_SLEEPING = 0,
+ THREAD_WAIT_CPU,
+ THREAD_SCHED_IN,
+ THREAD_IGNORE
+};
+
+struct work_atom {
+ struct list_head list;
+ enum thread_state state;
+ u64 sched_out_time;
+ u64 wake_up_time;
+ u64 sched_in_time;
+ u64 runtime;
+};
+
+struct work_atoms {
+ struct list_head work_list;
+ struct thread *thread;
+ struct rb_node node;
+ u64 max_lat;
+ u64 max_lat_at;
+ u64 total_lat;
+ u64 nb_atoms;
+ u64 total_runtime;
+ int num_merged;
+};
+
+typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
+
+struct perf_sched;
+
+struct trace_sched_handler {
+ int (*switch_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct machine *machine);
+
+ int (*runtime_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct machine *machine);
+
+ int (*wakeup_event)(struct perf_sched *sched, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct machine *machine);
+
+ /* PERF_RECORD_FORK event, not sched_process_fork tracepoint */
+ int (*fork_event)(struct perf_sched *sched, union perf_event *event,
+ struct machine *machine);
+
+ int (*migrate_task_event)(struct perf_sched *sched,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine);
+};
+
+#define COLOR_PIDS PERF_COLOR_BLUE
+#define COLOR_CPUS PERF_COLOR_BG_RED
+
+struct perf_sched_map {
+ DECLARE_BITMAP(comp_cpus_mask, MAX_CPUS);
+ int *comp_cpus;
+ bool comp;
+ struct thread_map *color_pids;
+ const char *color_pids_str;
+ struct cpu_map *color_cpus;
+ const char *color_cpus_str;
+ struct cpu_map *cpus;
+ const char *cpus_str;
+};
+
+struct perf_sched {
+ struct perf_tool tool;
+ const char *sort_order;
+ unsigned long nr_tasks;
+ struct task_desc **pid_to_task;
+ struct task_desc **tasks;
+ const struct trace_sched_handler *tp_handler;
+ pthread_mutex_t start_work_mutex;
+ pthread_mutex_t work_done_wait_mutex;
+ int profile_cpu;
+/*
+ * Track the current task - that way we can know whether there's any
+ * weird events, such as a task being switched away that is not current.
+ */
+ int max_cpu;
+ u32 curr_pid[MAX_CPUS];
+ struct thread *curr_thread[MAX_CPUS];
+ char next_shortname1;
+ char next_shortname2;
+ unsigned int replay_repeat;
+ unsigned long nr_run_events;
+ unsigned long nr_sleep_events;
+ unsigned long nr_wakeup_events;
+ unsigned long nr_sleep_corrections;
+ unsigned long nr_run_events_optimized;
+ unsigned long targetless_wakeups;
+ unsigned long multitarget_wakeups;
+ unsigned long nr_runs;
+ unsigned long nr_timestamps;
+ unsigned long nr_unordered_timestamps;
+ unsigned long nr_context_switch_bugs;
+ unsigned long nr_events;
+ unsigned long nr_lost_chunks;
+ unsigned long nr_lost_events;
+ u64 run_measurement_overhead;
+ u64 sleep_measurement_overhead;
+ u64 start_time;
+ u64 cpu_usage;
+ u64 runavg_cpu_usage;
+ u64 parent_cpu_usage;
+ u64 runavg_parent_cpu_usage;
+ u64 sum_runtime;
+ u64 sum_fluct;
+ u64 run_avg;
+ u64 all_runtime;
+ u64 all_count;
+ u64 cpu_last_switched[MAX_CPUS];
+ struct rb_root atom_root, sorted_atom_root, merged_atom_root;
+ struct list_head sort_list, cmp_pid;
+ bool force;
+ bool skip_merge;
+ struct perf_sched_map map;
+
+ /* options for timehist command */
+ bool summary;
+ bool summary_only;
+ bool idle_hist;
+ bool show_callchain;
+ unsigned int max_stack;
+ bool show_cpu_visual;
+ bool show_wakeups;
+ bool show_next;
+ bool show_migrations;
+ bool show_state;
+ u64 skipped_samples;
+ const char *time_str;
+ struct perf_time_interval ptime;
+ struct perf_time_interval hist_time;
+};
+
+/* per thread run time data */
+struct thread_runtime {
+ u64 last_time; /* time of previous sched in/out event */
+ u64 dt_run; /* run time */
+ u64 dt_sleep; /* time between CPU access by sleep (off cpu) */
+ u64 dt_iowait; /* time between CPU access by iowait (off cpu) */
+ u64 dt_preempt; /* time between CPU access by preempt (off cpu) */
+ u64 dt_delay; /* time between wakeup and sched-in */
+ u64 ready_to_run; /* time of wakeup */
+
+ struct stats run_stats;
+ u64 total_run_time;
+ u64 total_sleep_time;
+ u64 total_iowait_time;
+ u64 total_preempt_time;
+ u64 total_delay_time;
+
+ int last_state;
+
+ char shortname[3];
+ bool comm_changed;
+
+ u64 migrations;
+};
+
+/* per event run time data */
+struct evsel_runtime {
+ u64 *last_time; /* time this event was last seen per cpu */
+ u32 ncpu; /* highest cpu slot allocated */
+};
+
+/* per cpu idle time data */
+struct idle_thread_runtime {
+ struct thread_runtime tr;
+ struct thread *last_thread;
+ struct rb_root sorted_root;
+ struct callchain_root callchain;
+ struct callchain_cursor cursor;
+};
+
+/* track idle times per cpu */
+static struct thread **idle_threads;
+static int idle_max_cpu;
+static char idle_comm[] = "<idle>";
+
+static u64 get_nsecs(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+
+ return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+static void burn_nsecs(struct perf_sched *sched, u64 nsecs)
+{
+ u64 T0 = get_nsecs(), T1;
+
+ do {
+ T1 = get_nsecs();
+ } while (T1 + sched->run_measurement_overhead < T0 + nsecs);
+}
+
+static void sleep_nsecs(u64 nsecs)
+{
+ struct timespec ts;
+
+ ts.tv_nsec = nsecs % 999999999;
+ ts.tv_sec = nsecs / 999999999;
+
+ nanosleep(&ts, NULL);
+}
+
+static void calibrate_run_measurement_overhead(struct perf_sched *sched)
+{
+ u64 T0, T1, delta, min_delta = NSEC_PER_SEC;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ T0 = get_nsecs();
+ burn_nsecs(sched, 0);
+ T1 = get_nsecs();
+ delta = T1-T0;
+ min_delta = min(min_delta, delta);
+ }
+ sched->run_measurement_overhead = min_delta;
+
+ printf("run measurement overhead: %" PRIu64 " nsecs\n", min_delta);
+}
+
+static void calibrate_sleep_measurement_overhead(struct perf_sched *sched)
+{
+ u64 T0, T1, delta, min_delta = NSEC_PER_SEC;
+ int i;
+
+ for (i = 0; i < 10; i++) {
+ T0 = get_nsecs();
+ sleep_nsecs(10000);
+ T1 = get_nsecs();
+ delta = T1-T0;
+ min_delta = min(min_delta, delta);
+ }
+ min_delta -= 10000;
+ sched->sleep_measurement_overhead = min_delta;
+
+ printf("sleep measurement overhead: %" PRIu64 " nsecs\n", min_delta);
+}
+
+static struct sched_atom *
+get_new_event(struct task_desc *task, u64 timestamp)
+{
+ struct sched_atom *event = zalloc(sizeof(*event));
+ unsigned long idx = task->nr_events;
+ size_t size;
+
+ event->timestamp = timestamp;
+ event->nr = idx;
+
+ task->nr_events++;
+ size = sizeof(struct sched_atom *) * task->nr_events;
+ task->atoms = realloc(task->atoms, size);
+ BUG_ON(!task->atoms);
+
+ task->atoms[idx] = event;
+
+ return event;
+}
+
+static struct sched_atom *last_event(struct task_desc *task)
+{
+ if (!task->nr_events)
+ return NULL;
+
+ return task->atoms[task->nr_events - 1];
+}
+
+static void add_sched_event_run(struct perf_sched *sched, struct task_desc *task,
+ u64 timestamp, u64 duration)
+{
+ struct sched_atom *event, *curr_event = last_event(task);
+
+ /*
+ * optimize an existing RUN event by merging this one
+ * to it:
+ */
+ if (curr_event && curr_event->type == SCHED_EVENT_RUN) {
+ sched->nr_run_events_optimized++;
+ curr_event->duration += duration;
+ return;
+ }
+
+ event = get_new_event(task, timestamp);
+
+ event->type = SCHED_EVENT_RUN;
+ event->duration = duration;
+
+ sched->nr_run_events++;
+}
+
+static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *task,
+ u64 timestamp, struct task_desc *wakee)
+{
+ struct sched_atom *event, *wakee_event;
+
+ event = get_new_event(task, timestamp);
+ event->type = SCHED_EVENT_WAKEUP;
+ event->wakee = wakee;
+
+ wakee_event = last_event(wakee);
+ if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) {
+ sched->targetless_wakeups++;
+ return;
+ }
+ if (wakee_event->wait_sem) {
+ sched->multitarget_wakeups++;
+ return;
+ }
+
+ wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem));
+ sem_init(wakee_event->wait_sem, 0, 0);
+ wakee_event->specific_wait = 1;
+ event->wait_sem = wakee_event->wait_sem;
+
+ sched->nr_wakeup_events++;
+}
+
+static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task,
+ u64 timestamp, u64 task_state __maybe_unused)
+{
+ struct sched_atom *event = get_new_event(task, timestamp);
+
+ event->type = SCHED_EVENT_SLEEP;
+
+ sched->nr_sleep_events++;
+}
+
+static struct task_desc *register_pid(struct perf_sched *sched,
+ unsigned long pid, const char *comm)
+{
+ struct task_desc *task;
+ static int pid_max;
+
+ if (sched->pid_to_task == NULL) {
+ if (sysctl__read_int("kernel/pid_max", &pid_max) < 0)
+ pid_max = MAX_PID;
+ BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL);
+ }
+ if (pid >= (unsigned long)pid_max) {
+ BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) *
+ sizeof(struct task_desc *))) == NULL);
+ while (pid >= (unsigned long)pid_max)
+ sched->pid_to_task[pid_max++] = NULL;
+ }
+
+ task = sched->pid_to_task[pid];
+
+ if (task)
+ return task;
+
+ task = zalloc(sizeof(*task));
+ task->pid = pid;
+ task->nr = sched->nr_tasks;
+ strcpy(task->comm, comm);
+ /*
+ * every task starts in sleeping state - this gets ignored
+ * if there's no wakeup pointing to this sleep state:
+ */
+ add_sched_event_sleep(sched, task, 0, 0);
+
+ sched->pid_to_task[pid] = task;
+ sched->nr_tasks++;
+ sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *));
+ BUG_ON(!sched->tasks);
+ sched->tasks[task->nr] = task;
+
+ if (verbose > 0)
+ printf("registered task #%ld, PID %ld (%s)\n", sched->nr_tasks, pid, comm);
+
+ return task;
+}
+
+
+static void print_task_traces(struct perf_sched *sched)
+{
+ struct task_desc *task;
+ unsigned long i;
+
+ for (i = 0; i < sched->nr_tasks; i++) {
+ task = sched->tasks[i];
+ printf("task %6ld (%20s:%10ld), nr_events: %ld\n",
+ task->nr, task->comm, task->pid, task->nr_events);
+ }
+}
+
+static void add_cross_task_wakeups(struct perf_sched *sched)
+{
+ struct task_desc *task1, *task2;
+ unsigned long i, j;
+
+ for (i = 0; i < sched->nr_tasks; i++) {
+ task1 = sched->tasks[i];
+ j = i + 1;
+ if (j == sched->nr_tasks)
+ j = 0;
+ task2 = sched->tasks[j];
+ add_sched_event_wakeup(sched, task1, 0, task2);
+ }
+}
+
+static void perf_sched__process_event(struct perf_sched *sched,
+ struct sched_atom *atom)
+{
+ int ret = 0;
+
+ switch (atom->type) {
+ case SCHED_EVENT_RUN:
+ burn_nsecs(sched, atom->duration);
+ break;
+ case SCHED_EVENT_SLEEP:
+ if (atom->wait_sem)
+ ret = sem_wait(atom->wait_sem);
+ BUG_ON(ret);
+ break;
+ case SCHED_EVENT_WAKEUP:
+ if (atom->wait_sem)
+ ret = sem_post(atom->wait_sem);
+ BUG_ON(ret);
+ break;
+ case SCHED_EVENT_MIGRATION:
+ break;
+ default:
+ BUG_ON(1);
+ }
+}
+
+static u64 get_cpu_usage_nsec_parent(void)
+{
+ struct rusage ru;
+ u64 sum;
+ int err;
+
+ err = getrusage(RUSAGE_SELF, &ru);
+ BUG_ON(err);
+
+ sum = ru.ru_utime.tv_sec * NSEC_PER_SEC + ru.ru_utime.tv_usec * NSEC_PER_USEC;
+ sum += ru.ru_stime.tv_sec * NSEC_PER_SEC + ru.ru_stime.tv_usec * NSEC_PER_USEC;
+
+ return sum;
+}
+
+static int self_open_counters(struct perf_sched *sched, unsigned long cur_task)
+{
+ struct perf_event_attr attr;
+ char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE];
+ int fd;
+ struct rlimit limit;
+ bool need_privilege = false;
+
+ memset(&attr, 0, sizeof(attr));
+
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_TASK_CLOCK;
+
+force_again:
+ fd = sys_perf_event_open(&attr, 0, -1, -1,
+ perf_event_open_cloexec_flag());
+
+ if (fd < 0) {
+ if (errno == EMFILE) {
+ if (sched->force) {
+ BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1);
+ limit.rlim_cur += sched->nr_tasks - cur_task;
+ if (limit.rlim_cur > limit.rlim_max) {
+ limit.rlim_max = limit.rlim_cur;
+ need_privilege = true;
+ }
+ if (setrlimit(RLIMIT_NOFILE, &limit) == -1) {
+ if (need_privilege && errno == EPERM)
+ strcpy(info, "Need privilege\n");
+ } else
+ goto force_again;
+ } else
+ strcpy(info, "Have a try with -f option\n");
+ }
+ pr_err("Error: sys_perf_event_open() syscall returned "
+ "with %d (%s)\n%s", fd,
+ str_error_r(errno, sbuf, sizeof(sbuf)), info);
+ exit(EXIT_FAILURE);
+ }
+ return fd;
+}
+
+static u64 get_cpu_usage_nsec_self(int fd)
+{
+ u64 runtime;
+ int ret;
+
+ ret = read(fd, &runtime, sizeof(runtime));
+ BUG_ON(ret != sizeof(runtime));
+
+ return runtime;
+}
+
+struct sched_thread_parms {
+ struct task_desc *task;
+ struct perf_sched *sched;
+ int fd;
+};
+
+static void *thread_func(void *ctx)
+{
+ struct sched_thread_parms *parms = ctx;
+ struct task_desc *this_task = parms->task;
+ struct perf_sched *sched = parms->sched;
+ u64 cpu_usage_0, cpu_usage_1;
+ unsigned long i, ret;
+ char comm2[22];
+ int fd = parms->fd;
+
+ zfree(&parms);
+
+ sprintf(comm2, ":%s", this_task->comm);
+ prctl(PR_SET_NAME, comm2);
+ if (fd < 0)
+ return NULL;
+again:
+ ret = sem_post(&this_task->ready_for_work);
+ BUG_ON(ret);
+ ret = pthread_mutex_lock(&sched->start_work_mutex);
+ BUG_ON(ret);
+ ret = pthread_mutex_unlock(&sched->start_work_mutex);
+ BUG_ON(ret);
+
+ cpu_usage_0 = get_cpu_usage_nsec_self(fd);
+
+ for (i = 0; i < this_task->nr_events; i++) {
+ this_task->curr_event = i;
+ perf_sched__process_event(sched, this_task->atoms[i]);
+ }
+
+ cpu_usage_1 = get_cpu_usage_nsec_self(fd);
+ this_task->cpu_usage = cpu_usage_1 - cpu_usage_0;
+ ret = sem_post(&this_task->work_done_sem);
+ BUG_ON(ret);
+
+ ret = pthread_mutex_lock(&sched->work_done_wait_mutex);
+ BUG_ON(ret);
+ ret = pthread_mutex_unlock(&sched->work_done_wait_mutex);
+ BUG_ON(ret);
+
+ goto again;
+}
+
+static void create_tasks(struct perf_sched *sched)
+{
+ struct task_desc *task;
+ pthread_attr_t attr;
+ unsigned long i;
+ int err;
+
+ err = pthread_attr_init(&attr);
+ BUG_ON(err);
+ err = pthread_attr_setstacksize(&attr,
+ (size_t) max(16 * 1024, PTHREAD_STACK_MIN));
+ BUG_ON(err);
+ err = pthread_mutex_lock(&sched->start_work_mutex);
+ BUG_ON(err);
+ err = pthread_mutex_lock(&sched->work_done_wait_mutex);
+ BUG_ON(err);
+ for (i = 0; i < sched->nr_tasks; i++) {
+ struct sched_thread_parms *parms = malloc(sizeof(*parms));
+ BUG_ON(parms == NULL);
+ parms->task = task = sched->tasks[i];
+ parms->sched = sched;
+ parms->fd = self_open_counters(sched, i);
+ sem_init(&task->sleep_sem, 0, 0);
+ sem_init(&task->ready_for_work, 0, 0);
+ sem_init(&task->work_done_sem, 0, 0);
+ task->curr_event = 0;
+ err = pthread_create(&task->thread, &attr, thread_func, parms);
+ BUG_ON(err);
+ }
+}
+
+static void wait_for_tasks(struct perf_sched *sched)
+{
+ u64 cpu_usage_0, cpu_usage_1;
+ struct task_desc *task;
+ unsigned long i, ret;
+
+ sched->start_time = get_nsecs();
+ sched->cpu_usage = 0;
+ pthread_mutex_unlock(&sched->work_done_wait_mutex);
+
+ for (i = 0; i < sched->nr_tasks; i++) {
+ task = sched->tasks[i];
+ ret = sem_wait(&task->ready_for_work);
+ BUG_ON(ret);
+ sem_init(&task->ready_for_work, 0, 0);
+ }
+ ret = pthread_mutex_lock(&sched->work_done_wait_mutex);
+ BUG_ON(ret);
+
+ cpu_usage_0 = get_cpu_usage_nsec_parent();
+
+ pthread_mutex_unlock(&sched->start_work_mutex);
+
+ for (i = 0; i < sched->nr_tasks; i++) {
+ task = sched->tasks[i];
+ ret = sem_wait(&task->work_done_sem);
+ BUG_ON(ret);
+ sem_init(&task->work_done_sem, 0, 0);
+ sched->cpu_usage += task->cpu_usage;
+ task->cpu_usage = 0;
+ }
+
+ cpu_usage_1 = get_cpu_usage_nsec_parent();
+ if (!sched->runavg_cpu_usage)
+ sched->runavg_cpu_usage = sched->cpu_usage;
+ sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat;
+
+ sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0;
+ if (!sched->runavg_parent_cpu_usage)
+ sched->runavg_parent_cpu_usage = sched->parent_cpu_usage;
+ sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) +
+ sched->parent_cpu_usage)/sched->replay_repeat;
+
+ ret = pthread_mutex_lock(&sched->start_work_mutex);
+ BUG_ON(ret);
+
+ for (i = 0; i < sched->nr_tasks; i++) {
+ task = sched->tasks[i];
+ sem_init(&task->sleep_sem, 0, 0);
+ task->curr_event = 0;
+ }
+}
+
+static void run_one_test(struct perf_sched *sched)
+{
+ u64 T0, T1, delta, avg_delta, fluct;
+
+ T0 = get_nsecs();
+ wait_for_tasks(sched);
+ T1 = get_nsecs();
+
+ delta = T1 - T0;
+ sched->sum_runtime += delta;
+ sched->nr_runs++;
+
+ avg_delta = sched->sum_runtime / sched->nr_runs;
+ if (delta < avg_delta)
+ fluct = avg_delta - delta;
+ else
+ fluct = delta - avg_delta;
+ sched->sum_fluct += fluct;
+ if (!sched->run_avg)
+ sched->run_avg = delta;
+ sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat;
+
+ printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / NSEC_PER_MSEC);
+
+ printf("ravg: %0.2f, ", (double)sched->run_avg / NSEC_PER_MSEC);
+
+ printf("cpu: %0.2f / %0.2f",
+ (double)sched->cpu_usage / NSEC_PER_MSEC, (double)sched->runavg_cpu_usage / NSEC_PER_MSEC);
+
+#if 0
+ /*
+ * rusage statistics done by the parent, these are less
+ * accurate than the sched->sum_exec_runtime based statistics:
+ */
+ printf(" [%0.2f / %0.2f]",
+ (double)sched->parent_cpu_usage / NSEC_PER_MSEC,
+ (double)sched->runavg_parent_cpu_usage / NSEC_PER_MSEC);
+#endif
+
+ printf("\n");
+
+ if (sched->nr_sleep_corrections)
+ printf(" (%ld sleep corrections)\n", sched->nr_sleep_corrections);
+ sched->nr_sleep_corrections = 0;
+}
+
+static void test_calibrations(struct perf_sched *sched)
+{
+ u64 T0, T1;
+
+ T0 = get_nsecs();
+ burn_nsecs(sched, NSEC_PER_MSEC);
+ T1 = get_nsecs();
+
+ printf("the run test took %" PRIu64 " nsecs\n", T1 - T0);
+
+ T0 = get_nsecs();
+ sleep_nsecs(NSEC_PER_MSEC);
+ T1 = get_nsecs();
+
+ printf("the sleep test took %" PRIu64 " nsecs\n", T1 - T0);
+}
+
+static int
+replay_wakeup_event(struct perf_sched *sched,
+ struct perf_evsel *evsel, struct perf_sample *sample,
+ struct machine *machine __maybe_unused)
+{
+ const char *comm = perf_evsel__strval(evsel, sample, "comm");
+ const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ struct task_desc *waker, *wakee;
+
+ if (verbose > 0) {
+ printf("sched_wakeup event %p\n", evsel);
+
+ printf(" ... pid %d woke up %s/%d\n", sample->tid, comm, pid);
+ }
+
+ waker = register_pid(sched, sample->tid, "<unknown>");
+ wakee = register_pid(sched, pid, comm);
+
+ add_sched_event_wakeup(sched, waker, sample->time, wakee);
+ return 0;
+}
+
+static int replay_switch_event(struct perf_sched *sched,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine __maybe_unused)
+{
+ const char *prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"),
+ *next_comm = perf_evsel__strval(evsel, sample, "next_comm");
+ const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
+ next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+ const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+ struct task_desc *prev, __maybe_unused *next;
+ u64 timestamp0, timestamp = sample->time;
+ int cpu = sample->cpu;
+ s64 delta;
+
+ if (verbose > 0)
+ printf("sched_switch event %p\n", evsel);
+
+ if (cpu >= MAX_CPUS || cpu < 0)
+ return 0;
+
+ timestamp0 = sched->cpu_last_switched[cpu];
+ if (timestamp0)
+ delta = timestamp - timestamp0;
+ else
+ delta = 0;
+
+ if (delta < 0) {
+ pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
+ return -1;
+ }
+
+ pr_debug(" ... switch from %s/%d to %s/%d [ran %" PRIu64 " nsecs]\n",
+ prev_comm, prev_pid, next_comm, next_pid, delta);
+
+ prev = register_pid(sched, prev_pid, prev_comm);
+ next = register_pid(sched, next_pid, next_comm);
+
+ sched->cpu_last_switched[cpu] = timestamp;
+
+ add_sched_event_run(sched, prev, timestamp, delta);
+ add_sched_event_sleep(sched, prev, timestamp, prev_state);
+
+ return 0;
+}
+
+static int replay_fork_event(struct perf_sched *sched,
+ union perf_event *event,
+ struct machine *machine)
+{
+ struct thread *child, *parent;
+
+ child = machine__findnew_thread(machine, event->fork.pid,
+ event->fork.tid);
+ parent = machine__findnew_thread(machine, event->fork.ppid,
+ event->fork.ptid);
+
+ if (child == NULL || parent == NULL) {
+ pr_debug("thread does not exist on fork event: child %p, parent %p\n",
+ child, parent);
+ goto out_put;
+ }
+
+ if (verbose > 0) {
+ printf("fork event\n");
+ printf("... parent: %s/%d\n", thread__comm_str(parent), parent->tid);
+ printf("... child: %s/%d\n", thread__comm_str(child), child->tid);
+ }
+
+ register_pid(sched, parent->tid, thread__comm_str(parent));
+ register_pid(sched, child->tid, thread__comm_str(child));
+out_put:
+ thread__put(child);
+ thread__put(parent);
+ return 0;
+}
+
+struct sort_dimension {
+ const char *name;
+ sort_fn_t cmp;
+ struct list_head list;
+};
+
+/*
+ * handle runtime stats saved per thread
+ */
+static struct thread_runtime *thread__init_runtime(struct thread *thread)
+{
+ struct thread_runtime *r;
+
+ r = zalloc(sizeof(struct thread_runtime));
+ if (!r)
+ return NULL;
+
+ init_stats(&r->run_stats);
+ thread__set_priv(thread, r);
+
+ return r;
+}
+
+static struct thread_runtime *thread__get_runtime(struct thread *thread)
+{
+ struct thread_runtime *tr;
+
+ tr = thread__priv(thread);
+ if (tr == NULL) {
+ tr = thread__init_runtime(thread);
+ if (tr == NULL)
+ pr_debug("Failed to malloc memory for runtime data.\n");
+ }
+
+ return tr;
+}
+
+static int
+thread_lat_cmp(struct list_head *list, struct work_atoms *l, struct work_atoms *r)
+{
+ struct sort_dimension *sort;
+ int ret = 0;
+
+ BUG_ON(list_empty(list));
+
+ list_for_each_entry(sort, list, list) {
+ ret = sort->cmp(l, r);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static struct work_atoms *
+thread_atoms_search(struct rb_root *root, struct thread *thread,
+ struct list_head *sort_list)
+{
+ struct rb_node *node = root->rb_node;
+ struct work_atoms key = { .thread = thread };
+
+ while (node) {
+ struct work_atoms *atoms;
+ int cmp;
+
+ atoms = container_of(node, struct work_atoms, node);
+
+ cmp = thread_lat_cmp(sort_list, &key, atoms);
+ if (cmp > 0)
+ node = node->rb_left;
+ else if (cmp < 0)
+ node = node->rb_right;
+ else {
+ BUG_ON(thread != atoms->thread);
+ return atoms;
+ }
+ }
+ return NULL;
+}
+
+static void
+__thread_latency_insert(struct rb_root *root, struct work_atoms *data,
+ struct list_head *sort_list)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ while (*new) {
+ struct work_atoms *this;
+ int cmp;
+
+ this = container_of(*new, struct work_atoms, node);
+ parent = *new;
+
+ cmp = thread_lat_cmp(sort_list, data, this);
+
+ if (cmp > 0)
+ new = &((*new)->rb_left);
+ else
+ new = &((*new)->rb_right);
+ }
+
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+}
+
+static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread)
+{
+ struct work_atoms *atoms = zalloc(sizeof(*atoms));
+ if (!atoms) {
+ pr_err("No memory at %s\n", __func__);
+ return -1;
+ }
+
+ atoms->thread = thread__get(thread);
+ INIT_LIST_HEAD(&atoms->work_list);
+ __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid);
+ return 0;
+}
+
+static char sched_out_state(u64 prev_state)
+{
+ const char *str = TASK_STATE_TO_CHAR_STR;
+
+ return str[prev_state];
+}
+
+static int
+add_sched_out_event(struct work_atoms *atoms,
+ char run_state,
+ u64 timestamp)
+{
+ struct work_atom *atom = zalloc(sizeof(*atom));
+ if (!atom) {
+ pr_err("Non memory at %s", __func__);
+ return -1;
+ }
+
+ atom->sched_out_time = timestamp;
+
+ if (run_state == 'R') {
+ atom->state = THREAD_WAIT_CPU;
+ atom->wake_up_time = atom->sched_out_time;
+ }
+
+ list_add_tail(&atom->list, &atoms->work_list);
+ return 0;
+}
+
+static void
+add_runtime_event(struct work_atoms *atoms, u64 delta,
+ u64 timestamp __maybe_unused)
+{
+ struct work_atom *atom;
+
+ BUG_ON(list_empty(&atoms->work_list));
+
+ atom = list_entry(atoms->work_list.prev, struct work_atom, list);
+
+ atom->runtime += delta;
+ atoms->total_runtime += delta;
+}
+
+static void
+add_sched_in_event(struct work_atoms *atoms, u64 timestamp)
+{
+ struct work_atom *atom;
+ u64 delta;
+
+ if (list_empty(&atoms->work_list))
+ return;
+
+ atom = list_entry(atoms->work_list.prev, struct work_atom, list);
+
+ if (atom->state != THREAD_WAIT_CPU)
+ return;
+
+ if (timestamp < atom->wake_up_time) {
+ atom->state = THREAD_IGNORE;
+ return;
+ }
+
+ atom->state = THREAD_SCHED_IN;
+ atom->sched_in_time = timestamp;
+
+ delta = atom->sched_in_time - atom->wake_up_time;
+ atoms->total_lat += delta;
+ if (delta > atoms->max_lat) {
+ atoms->max_lat = delta;
+ atoms->max_lat_at = timestamp;
+ }
+ atoms->nb_atoms++;
+}
+
+static int latency_switch_event(struct perf_sched *sched,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
+ next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+ const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+ struct work_atoms *out_events, *in_events;
+ struct thread *sched_out, *sched_in;
+ u64 timestamp0, timestamp = sample->time;
+ int cpu = sample->cpu, err = -1;
+ s64 delta;
+
+ BUG_ON(cpu >= MAX_CPUS || cpu < 0);
+
+ timestamp0 = sched->cpu_last_switched[cpu];
+ sched->cpu_last_switched[cpu] = timestamp;
+ if (timestamp0)
+ delta = timestamp - timestamp0;
+ else
+ delta = 0;
+
+ if (delta < 0) {
+ pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
+ return -1;
+ }
+
+ sched_out = machine__findnew_thread(machine, -1, prev_pid);
+ sched_in = machine__findnew_thread(machine, -1, next_pid);
+ if (sched_out == NULL || sched_in == NULL)
+ goto out_put;
+
+ out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
+ if (!out_events) {
+ if (thread_atoms_insert(sched, sched_out))
+ goto out_put;
+ out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
+ if (!out_events) {
+ pr_err("out-event: Internal tree error");
+ goto out_put;
+ }
+ }
+ if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp))
+ return -1;
+
+ in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
+ if (!in_events) {
+ if (thread_atoms_insert(sched, sched_in))
+ goto out_put;
+ in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
+ if (!in_events) {
+ pr_err("in-event: Internal tree error");
+ goto out_put;
+ }
+ /*
+ * Take came in we have not heard about yet,
+ * add in an initial atom in runnable state:
+ */
+ if (add_sched_out_event(in_events, 'R', timestamp))
+ goto out_put;
+ }
+ add_sched_in_event(in_events, timestamp);
+ err = 0;
+out_put:
+ thread__put(sched_out);
+ thread__put(sched_in);
+ return err;
+}
+
+static int latency_runtime_event(struct perf_sched *sched,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
+ struct thread *thread = machine__findnew_thread(machine, -1, pid);
+ struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
+ u64 timestamp = sample->time;
+ int cpu = sample->cpu, err = -1;
+
+ if (thread == NULL)
+ return -1;
+
+ BUG_ON(cpu >= MAX_CPUS || cpu < 0);
+ if (!atoms) {
+ if (thread_atoms_insert(sched, thread))
+ goto out_put;
+ atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
+ if (!atoms) {
+ pr_err("in-event: Internal tree error");
+ goto out_put;
+ }
+ if (add_sched_out_event(atoms, 'R', timestamp))
+ goto out_put;
+ }
+
+ add_runtime_event(atoms, runtime, timestamp);
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static int latency_wakeup_event(struct perf_sched *sched,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ struct work_atoms *atoms;
+ struct work_atom *atom;
+ struct thread *wakee;
+ u64 timestamp = sample->time;
+ int err = -1;
+
+ wakee = machine__findnew_thread(machine, -1, pid);
+ if (wakee == NULL)
+ return -1;
+ atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
+ if (!atoms) {
+ if (thread_atoms_insert(sched, wakee))
+ goto out_put;
+ atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
+ if (!atoms) {
+ pr_err("wakeup-event: Internal tree error");
+ goto out_put;
+ }
+ if (add_sched_out_event(atoms, 'S', timestamp))
+ goto out_put;
+ }
+
+ BUG_ON(list_empty(&atoms->work_list));
+
+ atom = list_entry(atoms->work_list.prev, struct work_atom, list);
+
+ /*
+ * As we do not guarantee the wakeup event happens when
+ * task is out of run queue, also may happen when task is
+ * on run queue and wakeup only change ->state to TASK_RUNNING,
+ * then we should not set the ->wake_up_time when wake up a
+ * task which is on run queue.
+ *
+ * You WILL be missing events if you've recorded only
+ * one CPU, or are only looking at only one, so don't
+ * skip in this case.
+ */
+ if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
+ goto out_ok;
+
+ sched->nr_timestamps++;
+ if (atom->sched_out_time > timestamp) {
+ sched->nr_unordered_timestamps++;
+ goto out_ok;
+ }
+
+ atom->state = THREAD_WAIT_CPU;
+ atom->wake_up_time = timestamp;
+out_ok:
+ err = 0;
+out_put:
+ thread__put(wakee);
+ return err;
+}
+
+static int latency_migrate_task_event(struct perf_sched *sched,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ u64 timestamp = sample->time;
+ struct work_atoms *atoms;
+ struct work_atom *atom;
+ struct thread *migrant;
+ int err = -1;
+
+ /*
+ * Only need to worry about migration when profiling one CPU.
+ */
+ if (sched->profile_cpu == -1)
+ return 0;
+
+ migrant = machine__findnew_thread(machine, -1, pid);
+ if (migrant == NULL)
+ return -1;
+ atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
+ if (!atoms) {
+ if (thread_atoms_insert(sched, migrant))
+ goto out_put;
+ register_pid(sched, migrant->tid, thread__comm_str(migrant));
+ atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
+ if (!atoms) {
+ pr_err("migration-event: Internal tree error");
+ goto out_put;
+ }
+ if (add_sched_out_event(atoms, 'R', timestamp))
+ goto out_put;
+ }
+
+ BUG_ON(list_empty(&atoms->work_list));
+
+ atom = list_entry(atoms->work_list.prev, struct work_atom, list);
+ atom->sched_in_time = atom->sched_out_time = atom->wake_up_time = timestamp;
+
+ sched->nr_timestamps++;
+
+ if (atom->sched_out_time > timestamp)
+ sched->nr_unordered_timestamps++;
+ err = 0;
+out_put:
+ thread__put(migrant);
+ return err;
+}
+
+static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list)
+{
+ int i;
+ int ret;
+ u64 avg;
+ char max_lat_at[32];
+
+ if (!work_list->nb_atoms)
+ return;
+ /*
+ * Ignore idle threads:
+ */
+ if (!strcmp(thread__comm_str(work_list->thread), "swapper"))
+ return;
+
+ sched->all_runtime += work_list->total_runtime;
+ sched->all_count += work_list->nb_atoms;
+
+ if (work_list->num_merged > 1)
+ ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged);
+ else
+ ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
+
+ for (i = 0; i < 24 - ret; i++)
+ printf(" ");
+
+ avg = work_list->total_lat / work_list->nb_atoms;
+ timestamp__scnprintf_usec(work_list->max_lat_at, max_lat_at, sizeof(max_lat_at));
+
+ printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13s s\n",
+ (double)work_list->total_runtime / NSEC_PER_MSEC,
+ work_list->nb_atoms, (double)avg / NSEC_PER_MSEC,
+ (double)work_list->max_lat / NSEC_PER_MSEC,
+ max_lat_at);
+}
+
+static int pid_cmp(struct work_atoms *l, struct work_atoms *r)
+{
+ if (l->thread == r->thread)
+ return 0;
+ if (l->thread->tid < r->thread->tid)
+ return -1;
+ if (l->thread->tid > r->thread->tid)
+ return 1;
+ return (int)(l->thread - r->thread);
+}
+
+static int avg_cmp(struct work_atoms *l, struct work_atoms *r)
+{
+ u64 avgl, avgr;
+
+ if (!l->nb_atoms)
+ return -1;
+
+ if (!r->nb_atoms)
+ return 1;
+
+ avgl = l->total_lat / l->nb_atoms;
+ avgr = r->total_lat / r->nb_atoms;
+
+ if (avgl < avgr)
+ return -1;
+ if (avgl > avgr)
+ return 1;
+
+ return 0;
+}
+
+static int max_cmp(struct work_atoms *l, struct work_atoms *r)
+{
+ if (l->max_lat < r->max_lat)
+ return -1;
+ if (l->max_lat > r->max_lat)
+ return 1;
+
+ return 0;
+}
+
+static int switch_cmp(struct work_atoms *l, struct work_atoms *r)
+{
+ if (l->nb_atoms < r->nb_atoms)
+ return -1;
+ if (l->nb_atoms > r->nb_atoms)
+ return 1;
+
+ return 0;
+}
+
+static int runtime_cmp(struct work_atoms *l, struct work_atoms *r)
+{
+ if (l->total_runtime < r->total_runtime)
+ return -1;
+ if (l->total_runtime > r->total_runtime)
+ return 1;
+
+ return 0;
+}
+
+static int sort_dimension__add(const char *tok, struct list_head *list)
+{
+ size_t i;
+ static struct sort_dimension avg_sort_dimension = {
+ .name = "avg",
+ .cmp = avg_cmp,
+ };
+ static struct sort_dimension max_sort_dimension = {
+ .name = "max",
+ .cmp = max_cmp,
+ };
+ static struct sort_dimension pid_sort_dimension = {
+ .name = "pid",
+ .cmp = pid_cmp,
+ };
+ static struct sort_dimension runtime_sort_dimension = {
+ .name = "runtime",
+ .cmp = runtime_cmp,
+ };
+ static struct sort_dimension switch_sort_dimension = {
+ .name = "switch",
+ .cmp = switch_cmp,
+ };
+ struct sort_dimension *available_sorts[] = {
+ &pid_sort_dimension,
+ &avg_sort_dimension,
+ &max_sort_dimension,
+ &switch_sort_dimension,
+ &runtime_sort_dimension,
+ };
+
+ for (i = 0; i < ARRAY_SIZE(available_sorts); i++) {
+ if (!strcmp(available_sorts[i]->name, tok)) {
+ list_add_tail(&available_sorts[i]->list, list);
+
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+static void perf_sched__sort_lat(struct perf_sched *sched)
+{
+ struct rb_node *node;
+ struct rb_root *root = &sched->atom_root;
+again:
+ for (;;) {
+ struct work_atoms *data;
+ node = rb_first(root);
+ if (!node)
+ break;
+
+ rb_erase(node, root);
+ data = rb_entry(node, struct work_atoms, node);
+ __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
+ }
+ if (root == &sched->atom_root) {
+ root = &sched->merged_atom_root;
+ goto again;
+ }
+}
+
+static int process_sched_wakeup_event(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+ if (sched->tp_handler->wakeup_event)
+ return sched->tp_handler->wakeup_event(sched, evsel, sample, machine);
+
+ return 0;
+}
+
+union map_priv {
+ void *ptr;
+ bool color;
+};
+
+static bool thread__has_color(struct thread *thread)
+{
+ union map_priv priv = {
+ .ptr = thread__priv(thread),
+ };
+
+ return priv.color;
+}
+
+static struct thread*
+map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid)
+{
+ struct thread *thread = machine__findnew_thread(machine, pid, tid);
+ union map_priv priv = {
+ .color = false,
+ };
+
+ if (!sched->map.color_pids || !thread || thread__priv(thread))
+ return thread;
+
+ if (thread_map__has(sched->map.color_pids, tid))
+ priv.color = true;
+
+ thread__set_priv(thread, priv.ptr);
+ return thread;
+}
+
+static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct machine *machine)
+{
+ const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+ struct thread *sched_in;
+ struct thread_runtime *tr;
+ int new_shortname;
+ u64 timestamp0, timestamp = sample->time;
+ s64 delta;
+ int i, this_cpu = sample->cpu;
+ int cpus_nr;
+ bool new_cpu = false;
+ const char *color = PERF_COLOR_NORMAL;
+ char stimestamp[32];
+
+ BUG_ON(this_cpu >= MAX_CPUS || this_cpu < 0);
+
+ if (this_cpu > sched->max_cpu)
+ sched->max_cpu = this_cpu;
+
+ if (sched->map.comp) {
+ cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
+ if (!test_and_set_bit(this_cpu, sched->map.comp_cpus_mask)) {
+ sched->map.comp_cpus[cpus_nr++] = this_cpu;
+ new_cpu = true;
+ }
+ } else
+ cpus_nr = sched->max_cpu;
+
+ timestamp0 = sched->cpu_last_switched[this_cpu];
+ sched->cpu_last_switched[this_cpu] = timestamp;
+ if (timestamp0)
+ delta = timestamp - timestamp0;
+ else
+ delta = 0;
+
+ if (delta < 0) {
+ pr_err("hm, delta: %" PRIu64 " < 0 ?\n", delta);
+ return -1;
+ }
+
+ sched_in = map__findnew_thread(sched, machine, -1, next_pid);
+ if (sched_in == NULL)
+ return -1;
+
+ tr = thread__get_runtime(sched_in);
+ if (tr == NULL) {
+ thread__put(sched_in);
+ return -1;
+ }
+
+ sched->curr_thread[this_cpu] = thread__get(sched_in);
+
+ printf(" ");
+
+ new_shortname = 0;
+ if (!tr->shortname[0]) {
+ if (!strcmp(thread__comm_str(sched_in), "swapper")) {
+ /*
+ * Don't allocate a letter-number for swapper:0
+ * as a shortname. Instead, we use '.' for it.
+ */
+ tr->shortname[0] = '.';
+ tr->shortname[1] = ' ';
+ } else {
+ tr->shortname[0] = sched->next_shortname1;
+ tr->shortname[1] = sched->next_shortname2;
+
+ if (sched->next_shortname1 < 'Z') {
+ sched->next_shortname1++;
+ } else {
+ sched->next_shortname1 = 'A';
+ if (sched->next_shortname2 < '9')
+ sched->next_shortname2++;
+ else
+ sched->next_shortname2 = '0';
+ }
+ }
+ new_shortname = 1;
+ }
+
+ for (i = 0; i < cpus_nr; i++) {
+ int cpu = sched->map.comp ? sched->map.comp_cpus[i] : i;
+ struct thread *curr_thread = sched->curr_thread[cpu];
+ struct thread_runtime *curr_tr;
+ const char *pid_color = color;
+ const char *cpu_color = color;
+
+ if (curr_thread && thread__has_color(curr_thread))
+ pid_color = COLOR_PIDS;
+
+ if (sched->map.cpus && !cpu_map__has(sched->map.cpus, cpu))
+ continue;
+
+ if (sched->map.color_cpus && cpu_map__has(sched->map.color_cpus, cpu))
+ cpu_color = COLOR_CPUS;
+
+ if (cpu != this_cpu)
+ color_fprintf(stdout, color, " ");
+ else
+ color_fprintf(stdout, cpu_color, "*");
+
+ if (sched->curr_thread[cpu]) {
+ curr_tr = thread__get_runtime(sched->curr_thread[cpu]);
+ if (curr_tr == NULL) {
+ thread__put(sched_in);
+ return -1;
+ }
+ color_fprintf(stdout, pid_color, "%2s ", curr_tr->shortname);
+ } else
+ color_fprintf(stdout, color, " ");
+ }
+
+ if (sched->map.cpus && !cpu_map__has(sched->map.cpus, this_cpu))
+ goto out;
+
+ timestamp__scnprintf_usec(timestamp, stimestamp, sizeof(stimestamp));
+ color_fprintf(stdout, color, " %12s secs ", stimestamp);
+ if (new_shortname || tr->comm_changed || (verbose > 0 && sched_in->tid)) {
+ const char *pid_color = color;
+
+ if (thread__has_color(sched_in))
+ pid_color = COLOR_PIDS;
+
+ color_fprintf(stdout, pid_color, "%s => %s:%d",
+ tr->shortname, thread__comm_str(sched_in), sched_in->tid);
+ tr->comm_changed = false;
+ }
+
+ if (sched->map.comp && new_cpu)
+ color_fprintf(stdout, color, " (CPU %d)", this_cpu);
+
+out:
+ color_fprintf(stdout, color, "\n");
+
+ thread__put(sched_in);
+
+ return 0;
+}
+
+static int process_sched_switch_event(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+ int this_cpu = sample->cpu, err = 0;
+ u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
+ next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+
+ if (sched->curr_pid[this_cpu] != (u32)-1) {
+ /*
+ * Are we trying to switch away a PID that is
+ * not current?
+ */
+ if (sched->curr_pid[this_cpu] != prev_pid)
+ sched->nr_context_switch_bugs++;
+ }
+
+ if (sched->tp_handler->switch_event)
+ err = sched->tp_handler->switch_event(sched, evsel, sample, machine);
+
+ sched->curr_pid[this_cpu] = next_pid;
+ return err;
+}
+
+static int process_sched_runtime_event(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+ if (sched->tp_handler->runtime_event)
+ return sched->tp_handler->runtime_event(sched, evsel, sample, machine);
+
+ return 0;
+}
+
+static int perf_sched__process_fork_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+ /* run the fork event through the perf machineruy */
+ perf_event__process_fork(tool, event, sample, machine);
+
+ /* and then run additional processing needed for this command */
+ if (sched->tp_handler->fork_event)
+ return sched->tp_handler->fork_event(sched, event, machine);
+
+ return 0;
+}
+
+static int process_sched_migrate_task_event(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+
+ if (sched->tp_handler->migrate_task_event)
+ return sched->tp_handler->migrate_task_event(sched, evsel, sample, machine);
+
+ return 0;
+}
+
+typedef int (*tracepoint_handler)(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine);
+
+static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ int err = 0;
+
+ if (evsel->handler != NULL) {
+ tracepoint_handler f = evsel->handler;
+ err = f(tool, evsel, sample, machine);
+ }
+
+ return err;
+}
+
+static int perf_sched__process_comm(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct thread_runtime *tr;
+ int err;
+
+ err = perf_event__process_comm(tool, event, sample, machine);
+ if (err)
+ return err;
+
+ thread = machine__find_thread(machine, sample->pid, sample->tid);
+ if (!thread) {
+ pr_err("Internal error: can't find thread\n");
+ return -1;
+ }
+
+ tr = thread__get_runtime(thread);
+ if (tr == NULL) {
+ thread__put(thread);
+ return -1;
+ }
+
+ tr->comm_changed = true;
+ thread__put(thread);
+
+ return 0;
+}
+
+static int perf_sched__read_events(struct perf_sched *sched)
+{
+ const struct perf_evsel_str_handler handlers[] = {
+ { "sched:sched_switch", process_sched_switch_event, },
+ { "sched:sched_stat_runtime", process_sched_runtime_event, },
+ { "sched:sched_wakeup", process_sched_wakeup_event, },
+ { "sched:sched_wakeup_new", process_sched_wakeup_event, },
+ { "sched:sched_migrate_task", process_sched_migrate_task_event, },
+ };
+ struct perf_session *session;
+ struct perf_data data = {
+ .file = {
+ .path = input_name,
+ },
+ .mode = PERF_DATA_MODE_READ,
+ .force = sched->force,
+ };
+ int rc = -1;
+
+ session = perf_session__new(&data, false, &sched->tool);
+ if (session == NULL) {
+ pr_debug("No Memory for session\n");
+ return -1;
+ }
+
+ symbol__init(&session->header.env);
+
+ if (perf_session__set_tracepoints_handlers(session, handlers))
+ goto out_delete;
+
+ if (perf_session__has_traces(session, "record -R")) {
+ int err = perf_session__process_events(session);
+ if (err) {
+ pr_err("Failed to process events, error %d", err);
+ goto out_delete;
+ }
+
+ sched->nr_events = session->evlist->stats.nr_events[0];
+ sched->nr_lost_events = session->evlist->stats.total_lost;
+ sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST];
+ }
+
+ rc = 0;
+out_delete:
+ perf_session__delete(session);
+ return rc;
+}
+
+/*
+ * scheduling times are printed as msec.usec
+ */
+static inline void print_sched_time(unsigned long long nsecs, int width)
+{
+ unsigned long msecs;
+ unsigned long usecs;
+
+ msecs = nsecs / NSEC_PER_MSEC;
+ nsecs -= msecs * NSEC_PER_MSEC;
+ usecs = nsecs / NSEC_PER_USEC;
+ printf("%*lu.%03lu ", width, msecs, usecs);
+}
+
+/*
+ * returns runtime data for event, allocating memory for it the
+ * first time it is used.
+ */
+static struct evsel_runtime *perf_evsel__get_runtime(struct perf_evsel *evsel)
+{
+ struct evsel_runtime *r = evsel->priv;
+
+ if (r == NULL) {
+ r = zalloc(sizeof(struct evsel_runtime));
+ evsel->priv = r;
+ }
+
+ return r;
+}
+
+/*
+ * save last time event was seen per cpu
+ */
+static void perf_evsel__save_time(struct perf_evsel *evsel,
+ u64 timestamp, u32 cpu)
+{
+ struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
+
+ if (r == NULL)
+ return;
+
+ if ((cpu >= r->ncpu) || (r->last_time == NULL)) {
+ int i, n = __roundup_pow_of_two(cpu+1);
+ void *p = r->last_time;
+
+ p = realloc(r->last_time, n * sizeof(u64));
+ if (!p)
+ return;
+
+ r->last_time = p;
+ for (i = r->ncpu; i < n; ++i)
+ r->last_time[i] = (u64) 0;
+
+ r->ncpu = n;
+ }
+
+ r->last_time[cpu] = timestamp;
+}
+
+/* returns last time this event was seen on the given cpu */
+static u64 perf_evsel__get_time(struct perf_evsel *evsel, u32 cpu)
+{
+ struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
+
+ if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu))
+ return 0;
+
+ return r->last_time[cpu];
+}
+
+static int comm_width = 30;
+
+static char *timehist_get_commstr(struct thread *thread)
+{
+ static char str[32];
+ const char *comm = thread__comm_str(thread);
+ pid_t tid = thread->tid;
+ pid_t pid = thread->pid_;
+ int n;
+
+ if (pid == 0)
+ n = scnprintf(str, sizeof(str), "%s", comm);
+
+ else if (tid != pid)
+ n = scnprintf(str, sizeof(str), "%s[%d/%d]", comm, tid, pid);
+
+ else
+ n = scnprintf(str, sizeof(str), "%s[%d]", comm, tid);
+
+ if (n > comm_width)
+ comm_width = n;
+
+ return str;
+}
+
+static void timehist_header(struct perf_sched *sched)
+{
+ u32 ncpus = sched->max_cpu + 1;
+ u32 i, j;
+
+ printf("%15s %6s ", "time", "cpu");
+
+ if (sched->show_cpu_visual) {
+ printf(" ");
+ for (i = 0, j = 0; i < ncpus; ++i) {
+ printf("%x", j++);
+ if (j > 15)
+ j = 0;
+ }
+ printf(" ");
+ }
+
+ printf(" %-*s %9s %9s %9s", comm_width,
+ "task name", "wait time", "sch delay", "run time");
+
+ if (sched->show_state)
+ printf(" %s", "state");
+
+ printf("\n");
+
+ /*
+ * units row
+ */
+ printf("%15s %-6s ", "", "");
+
+ if (sched->show_cpu_visual)
+ printf(" %*s ", ncpus, "");
+
+ printf(" %-*s %9s %9s %9s", comm_width,
+ "[tid/pid]", "(msec)", "(msec)", "(msec)");
+
+ if (sched->show_state)
+ printf(" %5s", "");
+
+ printf("\n");
+
+ /*
+ * separator
+ */
+ printf("%.15s %.6s ", graph_dotted_line, graph_dotted_line);
+
+ if (sched->show_cpu_visual)
+ printf(" %.*s ", ncpus, graph_dotted_line);
+
+ printf(" %.*s %.9s %.9s %.9s", comm_width,
+ graph_dotted_line, graph_dotted_line, graph_dotted_line,
+ graph_dotted_line);
+
+ if (sched->show_state)
+ printf(" %.5s", graph_dotted_line);
+
+ printf("\n");
+}
+
+static char task_state_char(struct thread *thread, int state)
+{
+ static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
+ unsigned bit = state ? ffs(state) : 0;
+
+ /* 'I' for idle */
+ if (thread->tid == 0)
+ return 'I';
+
+ return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
+}
+
+static void timehist_print_sample(struct perf_sched *sched,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct addr_location *al,
+ struct thread *thread,
+ u64 t, int state)
+{
+ struct thread_runtime *tr = thread__priv(thread);
+ const char *next_comm = perf_evsel__strval(evsel, sample, "next_comm");
+ const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+ u32 max_cpus = sched->max_cpu + 1;
+ char tstr[64];
+ char nstr[30];
+ u64 wait_time;
+
+ timestamp__scnprintf_usec(t, tstr, sizeof(tstr));
+ printf("%15s [%04d] ", tstr, sample->cpu);
+
+ if (sched->show_cpu_visual) {
+ u32 i;
+ char c;
+
+ printf(" ");
+ for (i = 0; i < max_cpus; ++i) {
+ /* flag idle times with 'i'; others are sched events */
+ if (i == sample->cpu)
+ c = (thread->tid == 0) ? 'i' : 's';
+ else
+ c = ' ';
+ printf("%c", c);
+ }
+ printf(" ");
+ }
+
+ printf(" %-*s ", comm_width, timehist_get_commstr(thread));
+
+ wait_time = tr->dt_sleep + tr->dt_iowait + tr->dt_preempt;
+ print_sched_time(wait_time, 6);
+
+ print_sched_time(tr->dt_delay, 6);
+ print_sched_time(tr->dt_run, 6);
+
+ if (sched->show_state)
+ printf(" %5c ", task_state_char(thread, state));
+
+ if (sched->show_next) {
+ snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid);
+ printf(" %-*s", comm_width, nstr);
+ }
+
+ if (sched->show_wakeups && !sched->show_next)
+ printf(" %-*s", comm_width, "");
+
+ if (thread->tid == 0)
+ goto out;
+
+ if (sched->show_callchain)
+ printf(" ");
+
+ sample__fprintf_sym(sample, al, 0,
+ EVSEL__PRINT_SYM | EVSEL__PRINT_ONELINE |
+ EVSEL__PRINT_CALLCHAIN_ARROW |
+ EVSEL__PRINT_SKIP_IGNORED,
+ &callchain_cursor, stdout);
+
+out:
+ printf("\n");
+}
+
+/*
+ * Explanation of delta-time stats:
+ *
+ * t = time of current schedule out event
+ * tprev = time of previous sched out event
+ * also time of schedule-in event for current task
+ * last_time = time of last sched change event for current task
+ * (i.e, time process was last scheduled out)
+ * ready_to_run = time of wakeup for current task
+ *
+ * -----|------------|------------|------------|------
+ * last ready tprev t
+ * time to run
+ *
+ * |-------- dt_wait --------|
+ * |- dt_delay -|-- dt_run --|
+ *
+ * dt_run = run time of current task
+ * dt_wait = time between last schedule out event for task and tprev
+ * represents time spent off the cpu
+ * dt_delay = time between wakeup and schedule-in of task
+ */
+
+static void timehist_update_runtime_stats(struct thread_runtime *r,
+ u64 t, u64 tprev)
+{
+ r->dt_delay = 0;
+ r->dt_sleep = 0;
+ r->dt_iowait = 0;
+ r->dt_preempt = 0;
+ r->dt_run = 0;
+
+ if (tprev) {
+ r->dt_run = t - tprev;
+ if (r->ready_to_run) {
+ if (r->ready_to_run > tprev)
+ pr_debug("time travel: wakeup time for task > previous sched_switch event\n");
+ else
+ r->dt_delay = tprev - r->ready_to_run;
+ }
+
+ if (r->last_time > tprev)
+ pr_debug("time travel: last sched out time for task > previous sched_switch event\n");
+ else if (r->last_time) {
+ u64 dt_wait = tprev - r->last_time;
+
+ if (r->last_state == TASK_RUNNING)
+ r->dt_preempt = dt_wait;
+ else if (r->last_state == TASK_UNINTERRUPTIBLE)
+ r->dt_iowait = dt_wait;
+ else
+ r->dt_sleep = dt_wait;
+ }
+ }
+
+ update_stats(&r->run_stats, r->dt_run);
+
+ r->total_run_time += r->dt_run;
+ r->total_delay_time += r->dt_delay;
+ r->total_sleep_time += r->dt_sleep;
+ r->total_iowait_time += r->dt_iowait;
+ r->total_preempt_time += r->dt_preempt;
+}
+
+static bool is_idle_sample(struct perf_sample *sample,
+ struct perf_evsel *evsel)
+{
+ /* pid 0 == swapper == idle task */
+ if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0)
+ return perf_evsel__intval(evsel, sample, "prev_pid") == 0;
+
+ return sample->pid == 0;
+}
+
+static void save_task_callchain(struct perf_sched *sched,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct callchain_cursor *cursor = &callchain_cursor;
+ struct thread *thread;
+
+ /* want main thread for process - has maps */
+ thread = machine__findnew_thread(machine, sample->pid, sample->pid);
+ if (thread == NULL) {
+ pr_debug("Failed to get thread for pid %d.\n", sample->pid);
+ return;
+ }
+
+ if (!sched->show_callchain || sample->callchain == NULL)
+ return;
+
+ if (thread__resolve_callchain(thread, cursor, evsel, sample,
+ NULL, NULL, sched->max_stack + 2) != 0) {
+ if (verbose > 0)
+ pr_err("Failed to resolve callchain. Skipping\n");
+
+ return;
+ }
+
+ callchain_cursor_commit(cursor);
+
+ while (true) {
+ struct callchain_cursor_node *node;
+ struct symbol *sym;
+
+ node = callchain_cursor_current(cursor);
+ if (node == NULL)
+ break;
+
+ sym = node->sym;
+ if (sym) {
+ if (!strcmp(sym->name, "schedule") ||
+ !strcmp(sym->name, "__schedule") ||
+ !strcmp(sym->name, "preempt_schedule"))
+ sym->ignore = 1;
+ }
+
+ callchain_cursor_advance(cursor);
+ }
+}
+
+static int init_idle_thread(struct thread *thread)
+{
+ struct idle_thread_runtime *itr;
+
+ thread__set_comm(thread, idle_comm, 0);
+
+ itr = zalloc(sizeof(*itr));
+ if (itr == NULL)
+ return -ENOMEM;
+
+ init_stats(&itr->tr.run_stats);
+ callchain_init(&itr->callchain);
+ callchain_cursor_reset(&itr->cursor);
+ thread__set_priv(thread, itr);
+
+ return 0;
+}
+
+/*
+ * Track idle stats per cpu by maintaining a local thread
+ * struct for the idle task on each cpu.
+ */
+static int init_idle_threads(int ncpu)
+{
+ int i, ret;
+
+ idle_threads = zalloc(ncpu * sizeof(struct thread *));
+ if (!idle_threads)
+ return -ENOMEM;
+
+ idle_max_cpu = ncpu;
+
+ /* allocate the actual thread struct if needed */
+ for (i = 0; i < ncpu; ++i) {
+ idle_threads[i] = thread__new(0, 0);
+ if (idle_threads[i] == NULL)
+ return -ENOMEM;
+
+ ret = init_idle_thread(idle_threads[i]);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void free_idle_threads(void)
+{
+ int i;
+
+ if (idle_threads == NULL)
+ return;
+
+ for (i = 0; i < idle_max_cpu; ++i) {
+ if ((idle_threads[i]))
+ thread__delete(idle_threads[i]);
+ }
+
+ free(idle_threads);
+}
+
+static struct thread *get_idle_thread(int cpu)
+{
+ /*
+ * expand/allocate array of pointers to local thread
+ * structs if needed
+ */
+ if ((cpu >= idle_max_cpu) || (idle_threads == NULL)) {
+ int i, j = __roundup_pow_of_two(cpu+1);
+ void *p;
+
+ p = realloc(idle_threads, j * sizeof(struct thread *));
+ if (!p)
+ return NULL;
+
+ idle_threads = (struct thread **) p;
+ for (i = idle_max_cpu; i < j; ++i)
+ idle_threads[i] = NULL;
+
+ idle_max_cpu = j;
+ }
+
+ /* allocate a new thread struct if needed */
+ if (idle_threads[cpu] == NULL) {
+ idle_threads[cpu] = thread__new(0, 0);
+
+ if (idle_threads[cpu]) {
+ if (init_idle_thread(idle_threads[cpu]) < 0)
+ return NULL;
+ }
+ }
+
+ return idle_threads[cpu];
+}
+
+static void save_idle_callchain(struct perf_sched *sched,
+ struct idle_thread_runtime *itr,
+ struct perf_sample *sample)
+{
+ if (!sched->show_callchain || sample->callchain == NULL)
+ return;
+
+ callchain_cursor__copy(&itr->cursor, &callchain_cursor);
+}
+
+static struct thread *timehist_get_thread(struct perf_sched *sched,
+ struct perf_sample *sample,
+ struct machine *machine,
+ struct perf_evsel *evsel)
+{
+ struct thread *thread;
+
+ if (is_idle_sample(sample, evsel)) {
+ thread = get_idle_thread(sample->cpu);
+ if (thread == NULL)
+ pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu);
+
+ } else {
+ /* there were samples with tid 0 but non-zero pid */
+ thread = machine__findnew_thread(machine, sample->pid,
+ sample->tid ?: sample->pid);
+ if (thread == NULL) {
+ pr_debug("Failed to get thread for tid %d. skipping sample.\n",
+ sample->tid);
+ }
+
+ save_task_callchain(sched, sample, evsel, machine);
+ if (sched->idle_hist) {
+ struct thread *idle;
+ struct idle_thread_runtime *itr;
+
+ idle = get_idle_thread(sample->cpu);
+ if (idle == NULL) {
+ pr_err("Failed to get idle thread for cpu %d.\n", sample->cpu);
+ return NULL;
+ }
+
+ itr = thread__priv(idle);
+ if (itr == NULL)
+ return NULL;
+
+ itr->last_thread = thread;
+
+ /* copy task callchain when entering to idle */
+ if (perf_evsel__intval(evsel, sample, "next_pid") == 0)
+ save_idle_callchain(sched, itr, sample);
+ }
+ }
+
+ return thread;
+}
+
+static bool timehist_skip_sample(struct perf_sched *sched,
+ struct thread *thread,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ bool rc = false;
+
+ if (thread__is_filtered(thread)) {
+ rc = true;
+ sched->skipped_samples++;
+ }
+
+ if (sched->idle_hist) {
+ if (strcmp(perf_evsel__name(evsel), "sched:sched_switch"))
+ rc = true;
+ else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 &&
+ perf_evsel__intval(evsel, sample, "next_pid") != 0)
+ rc = true;
+ }
+
+ return rc;
+}
+
+static void timehist_print_wakeup_event(struct perf_sched *sched,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine,
+ struct thread *awakened)
+{
+ struct thread *thread;
+ char tstr[64];
+
+ thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+ if (thread == NULL)
+ return;
+
+ /* show wakeup unless both awakee and awaker are filtered */
+ if (timehist_skip_sample(sched, thread, evsel, sample) &&
+ timehist_skip_sample(sched, awakened, evsel, sample)) {
+ return;
+ }
+
+ timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
+ printf("%15s [%04d] ", tstr, sample->cpu);
+ if (sched->show_cpu_visual)
+ printf(" %*s ", sched->max_cpu + 1, "");
+
+ printf(" %-*s ", comm_width, timehist_get_commstr(thread));
+
+ /* dt spacer */
+ printf(" %9s %9s %9s ", "", "", "");
+
+ printf("awakened: %s", timehist_get_commstr(awakened));
+
+ printf("\n");
+}
+
+static int timehist_sched_wakeup_event(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+ struct thread *thread;
+ struct thread_runtime *tr = NULL;
+ /* want pid of awakened task not pid in sample */
+ const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+
+ thread = machine__findnew_thread(machine, 0, pid);
+ if (thread == NULL)
+ return -1;
+
+ tr = thread__get_runtime(thread);
+ if (tr == NULL)
+ return -1;
+
+ if (tr->ready_to_run == 0)
+ tr->ready_to_run = sample->time;
+
+ /* show wakeups if requested */
+ if (sched->show_wakeups &&
+ !perf_time__skip_sample(&sched->ptime, sample->time))
+ timehist_print_wakeup_event(sched, evsel, sample, machine, thread);
+
+ return 0;
+}
+
+static void timehist_print_migration_event(struct perf_sched *sched,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine,
+ struct thread *migrated)
+{
+ struct thread *thread;
+ char tstr[64];
+ u32 max_cpus = sched->max_cpu + 1;
+ u32 ocpu, dcpu;
+
+ if (sched->summary_only)
+ return;
+
+ max_cpus = sched->max_cpu + 1;
+ ocpu = perf_evsel__intval(evsel, sample, "orig_cpu");
+ dcpu = perf_evsel__intval(evsel, sample, "dest_cpu");
+
+ thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+ if (thread == NULL)
+ return;
+
+ if (timehist_skip_sample(sched, thread, evsel, sample) &&
+ timehist_skip_sample(sched, migrated, evsel, sample)) {
+ return;
+ }
+
+ timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
+ printf("%15s [%04d] ", tstr, sample->cpu);
+
+ if (sched->show_cpu_visual) {
+ u32 i;
+ char c;
+
+ printf(" ");
+ for (i = 0; i < max_cpus; ++i) {
+ c = (i == sample->cpu) ? 'm' : ' ';
+ printf("%c", c);
+ }
+ printf(" ");
+ }
+
+ printf(" %-*s ", comm_width, timehist_get_commstr(thread));
+
+ /* dt spacer */
+ printf(" %9s %9s %9s ", "", "", "");
+
+ printf("migrated: %s", timehist_get_commstr(migrated));
+ printf(" cpu %d => %d", ocpu, dcpu);
+
+ printf("\n");
+}
+
+static int timehist_migrate_task_event(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+ struct thread *thread;
+ struct thread_runtime *tr = NULL;
+ /* want pid of migrated task not pid in sample */
+ const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+
+ thread = machine__findnew_thread(machine, 0, pid);
+ if (thread == NULL)
+ return -1;
+
+ tr = thread__get_runtime(thread);
+ if (tr == NULL)
+ return -1;
+
+ tr->migrations++;
+
+ /* show migrations if requested */
+ timehist_print_migration_event(sched, evsel, sample, machine, thread);
+
+ return 0;
+}
+
+static int timehist_sched_change_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+ struct perf_time_interval *ptime = &sched->ptime;
+ struct addr_location al;
+ struct thread *thread;
+ struct thread_runtime *tr = NULL;
+ u64 tprev, t = sample->time;
+ int rc = 0;
+ int state = perf_evsel__intval(evsel, sample, "prev_state");
+
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_err("problem processing %d event. skipping it\n",
+ event->header.type);
+ rc = -1;
+ goto out;
+ }
+
+ thread = timehist_get_thread(sched, sample, machine, evsel);
+ if (thread == NULL) {
+ rc = -1;
+ goto out;
+ }
+
+ if (timehist_skip_sample(sched, thread, evsel, sample))
+ goto out;
+
+ tr = thread__get_runtime(thread);
+ if (tr == NULL) {
+ rc = -1;
+ goto out;
+ }
+
+ tprev = perf_evsel__get_time(evsel, sample->cpu);
+
+ /*
+ * If start time given:
+ * - sample time is under window user cares about - skip sample
+ * - tprev is under window user cares about - reset to start of window
+ */
+ if (ptime->start && ptime->start > t)
+ goto out;
+
+ if (tprev && ptime->start > tprev)
+ tprev = ptime->start;
+
+ /*
+ * If end time given:
+ * - previous sched event is out of window - we are done
+ * - sample time is beyond window user cares about - reset it
+ * to close out stats for time window interest
+ */
+ if (ptime->end) {
+ if (tprev > ptime->end)
+ goto out;
+
+ if (t > ptime->end)
+ t = ptime->end;
+ }
+
+ if (!sched->idle_hist || thread->tid == 0) {
+ timehist_update_runtime_stats(tr, t, tprev);
+
+ if (sched->idle_hist) {
+ struct idle_thread_runtime *itr = (void *)tr;
+ struct thread_runtime *last_tr;
+
+ BUG_ON(thread->tid != 0);
+
+ if (itr->last_thread == NULL)
+ goto out;
+
+ /* add current idle time as last thread's runtime */
+ last_tr = thread__get_runtime(itr->last_thread);
+ if (last_tr == NULL)
+ goto out;
+
+ timehist_update_runtime_stats(last_tr, t, tprev);
+ /*
+ * remove delta time of last thread as it's not updated
+ * and otherwise it will show an invalid value next
+ * time. we only care total run time and run stat.
+ */
+ last_tr->dt_run = 0;
+ last_tr->dt_delay = 0;
+ last_tr->dt_sleep = 0;
+ last_tr->dt_iowait = 0;
+ last_tr->dt_preempt = 0;
+
+ if (itr->cursor.nr)
+ callchain_append(&itr->callchain, &itr->cursor, t - tprev);
+
+ itr->last_thread = NULL;
+ }
+ }
+
+ if (!sched->summary_only)
+ timehist_print_sample(sched, evsel, sample, &al, thread, t, state);
+
+out:
+ if (sched->hist_time.start == 0 && t >= ptime->start)
+ sched->hist_time.start = t;
+ if (ptime->end == 0 || t <= ptime->end)
+ sched->hist_time.end = t;
+
+ if (tr) {
+ /* time of this sched_switch event becomes last time task seen */
+ tr->last_time = sample->time;
+
+ /* last state is used to determine where to account wait time */
+ tr->last_state = state;
+
+ /* sched out event for task so reset ready to run time */
+ tr->ready_to_run = 0;
+ }
+
+ perf_evsel__save_time(evsel, sample->time, sample->cpu);
+
+ return rc;
+}
+
+static int timehist_sched_switch_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine __maybe_unused)
+{
+ return timehist_sched_change_event(tool, event, evsel, sample, machine);
+}
+
+static int process_lost(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine __maybe_unused)
+{
+ char tstr[64];
+
+ timestamp__scnprintf_usec(sample->time, tstr, sizeof(tstr));
+ printf("%15s ", tstr);
+ printf("lost %" PRIu64 " events on cpu %d\n", event->lost.lost, sample->cpu);
+
+ return 0;
+}
+
+
+static void print_thread_runtime(struct thread *t,
+ struct thread_runtime *r)
+{
+ double mean = avg_stats(&r->run_stats);
+ float stddev;
+
+ printf("%*s %5d %9" PRIu64 " ",
+ comm_width, timehist_get_commstr(t), t->ppid,
+ (u64) r->run_stats.n);
+
+ print_sched_time(r->total_run_time, 8);
+ stddev = rel_stddev_stats(stddev_stats(&r->run_stats), mean);
+ print_sched_time(r->run_stats.min, 6);
+ printf(" ");
+ print_sched_time((u64) mean, 6);
+ printf(" ");
+ print_sched_time(r->run_stats.max, 6);
+ printf(" ");
+ printf("%5.2f", stddev);
+ printf(" %5" PRIu64, r->migrations);
+ printf("\n");
+}
+
+static void print_thread_waittime(struct thread *t,
+ struct thread_runtime *r)
+{
+ printf("%*s %5d %9" PRIu64 " ",
+ comm_width, timehist_get_commstr(t), t->ppid,
+ (u64) r->run_stats.n);
+
+ print_sched_time(r->total_run_time, 8);
+ print_sched_time(r->total_sleep_time, 6);
+ printf(" ");
+ print_sched_time(r->total_iowait_time, 6);
+ printf(" ");
+ print_sched_time(r->total_preempt_time, 6);
+ printf(" ");
+ print_sched_time(r->total_delay_time, 6);
+ printf("\n");
+}
+
+struct total_run_stats {
+ struct perf_sched *sched;
+ u64 sched_count;
+ u64 task_count;
+ u64 total_run_time;
+};
+
+static int __show_thread_runtime(struct thread *t, void *priv)
+{
+ struct total_run_stats *stats = priv;
+ struct thread_runtime *r;
+
+ if (thread__is_filtered(t))
+ return 0;
+
+ r = thread__priv(t);
+ if (r && r->run_stats.n) {
+ stats->task_count++;
+ stats->sched_count += r->run_stats.n;
+ stats->total_run_time += r->total_run_time;
+
+ if (stats->sched->show_state)
+ print_thread_waittime(t, r);
+ else
+ print_thread_runtime(t, r);
+ }
+
+ return 0;
+}
+
+static int show_thread_runtime(struct thread *t, void *priv)
+{
+ if (t->dead)
+ return 0;
+
+ return __show_thread_runtime(t, priv);
+}
+
+static int show_deadthread_runtime(struct thread *t, void *priv)
+{
+ if (!t->dead)
+ return 0;
+
+ return __show_thread_runtime(t, priv);
+}
+
+static size_t callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
+{
+ const char *sep = " <- ";
+ struct callchain_list *chain;
+ size_t ret = 0;
+ char bf[1024];
+ bool first;
+
+ if (node == NULL)
+ return 0;
+
+ ret = callchain__fprintf_folded(fp, node->parent);
+ first = (ret == 0);
+
+ list_for_each_entry(chain, &node->val, list) {
+ if (chain->ip >= PERF_CONTEXT_MAX)
+ continue;
+ if (chain->ms.sym && chain->ms.sym->ignore)
+ continue;
+ ret += fprintf(fp, "%s%s", first ? "" : sep,
+ callchain_list__sym_name(chain, bf, sizeof(bf),
+ false));
+ first = false;
+ }
+
+ return ret;
+}
+
+static size_t timehist_print_idlehist_callchain(struct rb_root *root)
+{
+ size_t ret = 0;
+ FILE *fp = stdout;
+ struct callchain_node *chain;
+ struct rb_node *rb_node = rb_first(root);
+
+ printf(" %16s %8s %s\n", "Idle time (msec)", "Count", "Callchains");
+ printf(" %.16s %.8s %.50s\n", graph_dotted_line, graph_dotted_line,
+ graph_dotted_line);
+
+ while (rb_node) {
+ chain = rb_entry(rb_node, struct callchain_node, rb_node);
+ rb_node = rb_next(rb_node);
+
+ ret += fprintf(fp, " ");
+ print_sched_time(chain->hit, 12);
+ ret += 16; /* print_sched_time returns 2nd arg + 4 */
+ ret += fprintf(fp, " %8d ", chain->count);
+ ret += callchain__fprintf_folded(fp, chain);
+ ret += fprintf(fp, "\n");
+ }
+
+ return ret;
+}
+
+static void timehist_print_summary(struct perf_sched *sched,
+ struct perf_session *session)
+{
+ struct machine *m = &session->machines.host;
+ struct total_run_stats totals;
+ u64 task_count;
+ struct thread *t;
+ struct thread_runtime *r;
+ int i;
+ u64 hist_time = sched->hist_time.end - sched->hist_time.start;
+
+ memset(&totals, 0, sizeof(totals));
+ totals.sched = sched;
+
+ if (sched->idle_hist) {
+ printf("\nIdle-time summary\n");
+ printf("%*s parent sched-out ", comm_width, "comm");
+ printf(" idle-time min-idle avg-idle max-idle stddev migrations\n");
+ } else if (sched->show_state) {
+ printf("\nWait-time summary\n");
+ printf("%*s parent sched-in ", comm_width, "comm");
+ printf(" run-time sleep iowait preempt delay\n");
+ } else {
+ printf("\nRuntime summary\n");
+ printf("%*s parent sched-in ", comm_width, "comm");
+ printf(" run-time min-run avg-run max-run stddev migrations\n");
+ }
+ printf("%*s (count) ", comm_width, "");
+ printf(" (msec) (msec) (msec) (msec) %s\n",
+ sched->show_state ? "(msec)" : "%");
+ printf("%.117s\n", graph_dotted_line);
+
+ machine__for_each_thread(m, show_thread_runtime, &totals);
+ task_count = totals.task_count;
+ if (!task_count)
+ printf("<no still running tasks>\n");
+
+ printf("\nTerminated tasks:\n");
+ machine__for_each_thread(m, show_deadthread_runtime, &totals);
+ if (task_count == totals.task_count)
+ printf("<no terminated tasks>\n");
+
+ /* CPU idle stats not tracked when samples were skipped */
+ if (sched->skipped_samples && !sched->idle_hist)
+ return;
+
+ printf("\nIdle stats:\n");
+ for (i = 0; i < idle_max_cpu; ++i) {
+ t = idle_threads[i];
+ if (!t)
+ continue;
+
+ r = thread__priv(t);
+ if (r && r->run_stats.n) {
+ totals.sched_count += r->run_stats.n;
+ printf(" CPU %2d idle for ", i);
+ print_sched_time(r->total_run_time, 6);
+ printf(" msec (%6.2f%%)\n", 100.0 * r->total_run_time / hist_time);
+ } else
+ printf(" CPU %2d idle entire time window\n", i);
+ }
+
+ if (sched->idle_hist && sched->show_callchain) {
+ callchain_param.mode = CHAIN_FOLDED;
+ callchain_param.value = CCVAL_PERIOD;
+
+ callchain_register_param(&callchain_param);
+
+ printf("\nIdle stats by callchain:\n");
+ for (i = 0; i < idle_max_cpu; ++i) {
+ struct idle_thread_runtime *itr;
+
+ t = idle_threads[i];
+ if (!t)
+ continue;
+
+ itr = thread__priv(t);
+ if (itr == NULL)
+ continue;
+
+ callchain_param.sort(&itr->sorted_root, &itr->callchain,
+ 0, &callchain_param);
+
+ printf(" CPU %2d:", i);
+ print_sched_time(itr->tr.total_run_time, 6);
+ printf(" msec\n");
+ timehist_print_idlehist_callchain(&itr->sorted_root);
+ printf("\n");
+ }
+ }
+
+ printf("\n"
+ " Total number of unique tasks: %" PRIu64 "\n"
+ "Total number of context switches: %" PRIu64 "\n",
+ totals.task_count, totals.sched_count);
+
+ printf(" Total run time (msec): ");
+ print_sched_time(totals.total_run_time, 2);
+ printf("\n");
+
+ printf(" Total scheduling time (msec): ");
+ print_sched_time(hist_time, 2);
+ printf(" (x %d)\n", sched->max_cpu);
+}
+
+typedef int (*sched_handler)(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine);
+
+static int perf_timehist__process_sample(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
+ int err = 0;
+ int this_cpu = sample->cpu;
+
+ if (this_cpu > sched->max_cpu)
+ sched->max_cpu = this_cpu;
+
+ if (evsel->handler != NULL) {
+ sched_handler f = evsel->handler;
+
+ err = f(tool, event, evsel, sample, machine);
+ }
+
+ return err;
+}
+
+static int timehist_check_attr(struct perf_sched *sched,
+ struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ struct evsel_runtime *er;
+
+ list_for_each_entry(evsel, &evlist->entries, node) {
+ er = perf_evsel__get_runtime(evsel);
+ if (er == NULL) {
+ pr_err("Failed to allocate memory for evsel runtime data\n");
+ return -1;
+ }
+
+ if (sched->show_callchain && !evsel__has_callchain(evsel)) {
+ pr_info("Samples do not have callchains.\n");
+ sched->show_callchain = 0;
+ symbol_conf.use_callchain = 0;
+ }
+ }
+
+ return 0;
+}
+
+static int perf_sched__timehist(struct perf_sched *sched)
+{
+ const struct perf_evsel_str_handler handlers[] = {
+ { "sched:sched_switch", timehist_sched_switch_event, },
+ { "sched:sched_wakeup", timehist_sched_wakeup_event, },
+ { "sched:sched_wakeup_new", timehist_sched_wakeup_event, },
+ };
+ const struct perf_evsel_str_handler migrate_handlers[] = {
+ { "sched:sched_migrate_task", timehist_migrate_task_event, },
+ };
+ struct perf_data data = {
+ .file = {
+ .path = input_name,
+ },
+ .mode = PERF_DATA_MODE_READ,
+ .force = sched->force,
+ };
+
+ struct perf_session *session;
+ struct perf_evlist *evlist;
+ int err = -1;
+
+ /*
+ * event handlers for timehist option
+ */
+ sched->tool.sample = perf_timehist__process_sample;
+ sched->tool.mmap = perf_event__process_mmap;
+ sched->tool.comm = perf_event__process_comm;
+ sched->tool.exit = perf_event__process_exit;
+ sched->tool.fork = perf_event__process_fork;
+ sched->tool.lost = process_lost;
+ sched->tool.attr = perf_event__process_attr;
+ sched->tool.tracing_data = perf_event__process_tracing_data;
+ sched->tool.build_id = perf_event__process_build_id;
+
+ sched->tool.ordered_events = true;
+ sched->tool.ordering_requires_timestamps = true;
+
+ symbol_conf.use_callchain = sched->show_callchain;
+
+ session = perf_session__new(&data, false, &sched->tool);
+ if (session == NULL)
+ return -ENOMEM;
+
+ evlist = session->evlist;
+
+ symbol__init(&session->header.env);
+
+ if (perf_time__parse_str(&sched->ptime, sched->time_str) != 0) {
+ pr_err("Invalid time string\n");
+ return -EINVAL;
+ }
+
+ if (timehist_check_attr(sched, evlist) != 0)
+ goto out;
+
+ setup_pager();
+
+ /* setup per-evsel handlers */
+ if (perf_session__set_tracepoints_handlers(session, handlers))
+ goto out;
+
+ /* sched_switch event at a minimum needs to exist */
+ if (!perf_evlist__find_tracepoint_by_name(session->evlist,
+ "sched:sched_switch")) {
+ pr_err("No sched_switch events found. Have you run 'perf sched record'?\n");
+ goto out;
+ }
+
+ if (sched->show_migrations &&
+ perf_session__set_tracepoints_handlers(session, migrate_handlers))
+ goto out;
+
+ /* pre-allocate struct for per-CPU idle stats */
+ sched->max_cpu = session->header.env.nr_cpus_online;
+ if (sched->max_cpu == 0)
+ sched->max_cpu = 4;
+ if (init_idle_threads(sched->max_cpu))
+ goto out;
+
+ /* summary_only implies summary option, but don't overwrite summary if set */
+ if (sched->summary_only)
+ sched->summary = sched->summary_only;
+
+ if (!sched->summary_only)
+ timehist_header(sched);
+
+ err = perf_session__process_events(session);
+ if (err) {
+ pr_err("Failed to process events, error %d", err);
+ goto out;
+ }
+
+ sched->nr_events = evlist->stats.nr_events[0];
+ sched->nr_lost_events = evlist->stats.total_lost;
+ sched->nr_lost_chunks = evlist->stats.nr_events[PERF_RECORD_LOST];
+
+ if (sched->summary)
+ timehist_print_summary(sched, session);
+
+out:
+ free_idle_threads();
+ perf_session__delete(session);
+
+ return err;
+}
+
+
+static void print_bad_events(struct perf_sched *sched)
+{
+ if (sched->nr_unordered_timestamps && sched->nr_timestamps) {
+ printf(" INFO: %.3f%% unordered timestamps (%ld out of %ld)\n",
+ (double)sched->nr_unordered_timestamps/(double)sched->nr_timestamps*100.0,
+ sched->nr_unordered_timestamps, sched->nr_timestamps);
+ }
+ if (sched->nr_lost_events && sched->nr_events) {
+ printf(" INFO: %.3f%% lost events (%ld out of %ld, in %ld chunks)\n",
+ (double)sched->nr_lost_events/(double)sched->nr_events * 100.0,
+ sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks);
+ }
+ if (sched->nr_context_switch_bugs && sched->nr_timestamps) {
+ printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)",
+ (double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0,
+ sched->nr_context_switch_bugs, sched->nr_timestamps);
+ if (sched->nr_lost_events)
+ printf(" (due to lost events?)");
+ printf("\n");
+ }
+}
+
+static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+ struct work_atoms *this;
+ const char *comm = thread__comm_str(data->thread), *this_comm;
+
+ while (*new) {
+ int cmp;
+
+ this = container_of(*new, struct work_atoms, node);
+ parent = *new;
+
+ this_comm = thread__comm_str(this->thread);
+ cmp = strcmp(comm, this_comm);
+ if (cmp > 0) {
+ new = &((*new)->rb_left);
+ } else if (cmp < 0) {
+ new = &((*new)->rb_right);
+ } else {
+ this->num_merged++;
+ this->total_runtime += data->total_runtime;
+ this->nb_atoms += data->nb_atoms;
+ this->total_lat += data->total_lat;
+ list_splice(&data->work_list, &this->work_list);
+ if (this->max_lat < data->max_lat) {
+ this->max_lat = data->max_lat;
+ this->max_lat_at = data->max_lat_at;
+ }
+ zfree(&data);
+ return;
+ }
+ }
+
+ data->num_merged++;
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+}
+
+static void perf_sched__merge_lat(struct perf_sched *sched)
+{
+ struct work_atoms *data;
+ struct rb_node *node;
+
+ if (sched->skip_merge)
+ return;
+
+ while ((node = rb_first(&sched->atom_root))) {
+ rb_erase(node, &sched->atom_root);
+ data = rb_entry(node, struct work_atoms, node);
+ __merge_work_atoms(&sched->merged_atom_root, data);
+ }
+}
+
+static int perf_sched__lat(struct perf_sched *sched)
+{
+ struct rb_node *next;
+
+ setup_pager();
+
+ if (perf_sched__read_events(sched))
+ return -1;
+
+ perf_sched__merge_lat(sched);
+ perf_sched__sort_lat(sched);
+
+ printf("\n -----------------------------------------------------------------------------------------------------------------\n");
+ printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
+ printf(" -----------------------------------------------------------------------------------------------------------------\n");
+
+ next = rb_first(&sched->sorted_atom_root);
+
+ while (next) {
+ struct work_atoms *work_list;
+
+ work_list = rb_entry(next, struct work_atoms, node);
+ output_lat_thread(sched, work_list);
+ next = rb_next(next);
+ thread__zput(work_list->thread);
+ }
+
+ printf(" -----------------------------------------------------------------------------------------------------------------\n");
+ printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n",
+ (double)sched->all_runtime / NSEC_PER_MSEC, sched->all_count);
+
+ printf(" ---------------------------------------------------\n");
+
+ print_bad_events(sched);
+ printf("\n");
+
+ return 0;
+}
+
+static int setup_map_cpus(struct perf_sched *sched)
+{
+ struct cpu_map *map;
+
+ sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF);
+
+ if (sched->map.comp) {
+ sched->map.comp_cpus = zalloc(sched->max_cpu * sizeof(int));
+ if (!sched->map.comp_cpus)
+ return -1;
+ }
+
+ if (!sched->map.cpus_str)
+ return 0;
+
+ map = cpu_map__new(sched->map.cpus_str);
+ if (!map) {
+ pr_err("failed to get cpus map from %s\n", sched->map.cpus_str);
+ return -1;
+ }
+
+ sched->map.cpus = map;
+ return 0;
+}
+
+static int setup_color_pids(struct perf_sched *sched)
+{
+ struct thread_map *map;
+
+ if (!sched->map.color_pids_str)
+ return 0;
+
+ map = thread_map__new_by_tid_str(sched->map.color_pids_str);
+ if (!map) {
+ pr_err("failed to get thread map from %s\n", sched->map.color_pids_str);
+ return -1;
+ }
+
+ sched->map.color_pids = map;
+ return 0;
+}
+
+static int setup_color_cpus(struct perf_sched *sched)
+{
+ struct cpu_map *map;
+
+ if (!sched->map.color_cpus_str)
+ return 0;
+
+ map = cpu_map__new(sched->map.color_cpus_str);
+ if (!map) {
+ pr_err("failed to get thread map from %s\n", sched->map.color_cpus_str);
+ return -1;
+ }
+
+ sched->map.color_cpus = map;
+ return 0;
+}
+
+static int perf_sched__map(struct perf_sched *sched)
+{
+ if (setup_map_cpus(sched))
+ return -1;
+
+ if (setup_color_pids(sched))
+ return -1;
+
+ if (setup_color_cpus(sched))
+ return -1;
+
+ setup_pager();
+ if (perf_sched__read_events(sched))
+ return -1;
+ print_bad_events(sched);
+ return 0;
+}
+
+static int perf_sched__replay(struct perf_sched *sched)
+{
+ unsigned long i;
+
+ calibrate_run_measurement_overhead(sched);
+ calibrate_sleep_measurement_overhead(sched);
+
+ test_calibrations(sched);
+
+ if (perf_sched__read_events(sched))
+ return -1;
+
+ printf("nr_run_events: %ld\n", sched->nr_run_events);
+ printf("nr_sleep_events: %ld\n", sched->nr_sleep_events);
+ printf("nr_wakeup_events: %ld\n", sched->nr_wakeup_events);
+
+ if (sched->targetless_wakeups)
+ printf("target-less wakeups: %ld\n", sched->targetless_wakeups);
+ if (sched->multitarget_wakeups)
+ printf("multi-target wakeups: %ld\n", sched->multitarget_wakeups);
+ if (sched->nr_run_events_optimized)
+ printf("run atoms optimized: %ld\n",
+ sched->nr_run_events_optimized);
+
+ print_task_traces(sched);
+ add_cross_task_wakeups(sched);
+
+ create_tasks(sched);
+ printf("------------------------------------------------------------\n");
+ for (i = 0; i < sched->replay_repeat; i++)
+ run_one_test(sched);
+
+ return 0;
+}
+
+static void setup_sorting(struct perf_sched *sched, const struct option *options,
+ const char * const usage_msg[])
+{
+ char *tmp, *tok, *str = strdup(sched->sort_order);
+
+ for (tok = strtok_r(str, ", ", &tmp);
+ tok; tok = strtok_r(NULL, ", ", &tmp)) {
+ if (sort_dimension__add(tok, &sched->sort_list) < 0) {
+ usage_with_options_msg(usage_msg, options,
+ "Unknown --sort key: `%s'", tok);
+ }
+ }
+
+ free(str);
+
+ sort_dimension__add("pid", &sched->cmp_pid);
+}
+
+static int __cmd_record(int argc, const char **argv)
+{
+ unsigned int rec_argc, i, j;
+ const char **rec_argv;
+ const char * const record_args[] = {
+ "record",
+ "-a",
+ "-R",
+ "-m", "1024",
+ "-c", "1",
+ "-e", "sched:sched_switch",
+ "-e", "sched:sched_stat_wait",
+ "-e", "sched:sched_stat_sleep",
+ "-e", "sched:sched_stat_iowait",
+ "-e", "sched:sched_stat_runtime",
+ "-e", "sched:sched_process_fork",
+ "-e", "sched:sched_wakeup",
+ "-e", "sched:sched_wakeup_new",
+ "-e", "sched:sched_migrate_task",
+ };
+
+ rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+ if (rec_argv == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < ARRAY_SIZE(record_args); i++)
+ rec_argv[i] = strdup(record_args[i]);
+
+ for (j = 1; j < (unsigned int)argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ BUG_ON(i != rec_argc);
+
+ return cmd_record(i, rec_argv);
+}
+
+int cmd_sched(int argc, const char **argv)
+{
+ const char default_sort_order[] = "avg, max, switch, runtime";
+ struct perf_sched sched = {
+ .tool = {
+ .sample = perf_sched__process_tracepoint_sample,
+ .comm = perf_sched__process_comm,
+ .namespaces = perf_event__process_namespaces,
+ .lost = perf_event__process_lost,
+ .fork = perf_sched__process_fork_event,
+ .ordered_events = true,
+ },
+ .cmp_pid = LIST_HEAD_INIT(sched.cmp_pid),
+ .sort_list = LIST_HEAD_INIT(sched.sort_list),
+ .start_work_mutex = PTHREAD_MUTEX_INITIALIZER,
+ .work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER,
+ .sort_order = default_sort_order,
+ .replay_repeat = 10,
+ .profile_cpu = -1,
+ .next_shortname1 = 'A',
+ .next_shortname2 = '0',
+ .skip_merge = 0,
+ .show_callchain = 1,
+ .max_stack = 5,
+ };
+ const struct option sched_options[] = {
+ OPT_STRING('i', "input", &input_name, "file",
+ "input file name"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show symbol address, etc)"),
+ OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+ "dump raw trace in ASCII"),
+ OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"),
+ OPT_END()
+ };
+ const struct option latency_options[] = {
+ OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
+ "sort by key(s): runtime, switch, avg, max"),
+ OPT_INTEGER('C', "CPU", &sched.profile_cpu,
+ "CPU to profile on"),
+ OPT_BOOLEAN('p', "pids", &sched.skip_merge,
+ "latency stats per pid instead of per comm"),
+ OPT_PARENT(sched_options)
+ };
+ const struct option replay_options[] = {
+ OPT_UINTEGER('r', "repeat", &sched.replay_repeat,
+ "repeat the workload replay N times (-1: infinite)"),
+ OPT_PARENT(sched_options)
+ };
+ const struct option map_options[] = {
+ OPT_BOOLEAN(0, "compact", &sched.map.comp,
+ "map output in compact mode"),
+ OPT_STRING(0, "color-pids", &sched.map.color_pids_str, "pids",
+ "highlight given pids in map"),
+ OPT_STRING(0, "color-cpus", &sched.map.color_cpus_str, "cpus",
+ "highlight given CPUs in map"),
+ OPT_STRING(0, "cpus", &sched.map.cpus_str, "cpus",
+ "display given CPUs in map"),
+ OPT_PARENT(sched_options)
+ };
+ const struct option timehist_options[] = {
+ OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
+ OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+ "file", "kallsyms pathname"),
+ OPT_BOOLEAN('g', "call-graph", &sched.show_callchain,
+ "Display call chains if present (default on)"),
+ OPT_UINTEGER(0, "max-stack", &sched.max_stack,
+ "Maximum number of functions to display backtrace."),
+ OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
+ "Look for files with symbols relative to this directory"),
+ OPT_BOOLEAN('s', "summary", &sched.summary_only,
+ "Show only syscall summary with statistics"),
+ OPT_BOOLEAN('S', "with-summary", &sched.summary,
+ "Show all syscalls and summary with statistics"),
+ OPT_BOOLEAN('w', "wakeups", &sched.show_wakeups, "Show wakeup events"),
+ OPT_BOOLEAN('n', "next", &sched.show_next, "Show next task"),
+ OPT_BOOLEAN('M', "migrations", &sched.show_migrations, "Show migration events"),
+ OPT_BOOLEAN('V', "cpu-visual", &sched.show_cpu_visual, "Add CPU visual"),
+ OPT_BOOLEAN('I', "idle-hist", &sched.idle_hist, "Show idle events only"),
+ OPT_STRING(0, "time", &sched.time_str, "str",
+ "Time span for analysis (start,stop)"),
+ OPT_BOOLEAN(0, "state", &sched.show_state, "Show task state when sched-out"),
+ OPT_STRING('p', "pid", &symbol_conf.pid_list_str, "pid[,pid...]",
+ "analyze events only for given process id(s)"),
+ OPT_STRING('t', "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
+ "analyze events only for given thread id(s)"),
+ OPT_PARENT(sched_options)
+ };
+
+ const char * const latency_usage[] = {
+ "perf sched latency [<options>]",
+ NULL
+ };
+ const char * const replay_usage[] = {
+ "perf sched replay [<options>]",
+ NULL
+ };
+ const char * const map_usage[] = {
+ "perf sched map [<options>]",
+ NULL
+ };
+ const char * const timehist_usage[] = {
+ "perf sched timehist [<options>]",
+ NULL
+ };
+ const char *const sched_subcommands[] = { "record", "latency", "map",
+ "replay", "script",
+ "timehist", NULL };
+ const char *sched_usage[] = {
+ NULL,
+ NULL
+ };
+ struct trace_sched_handler lat_ops = {
+ .wakeup_event = latency_wakeup_event,
+ .switch_event = latency_switch_event,
+ .runtime_event = latency_runtime_event,
+ .migrate_task_event = latency_migrate_task_event,
+ };
+ struct trace_sched_handler map_ops = {
+ .switch_event = map_switch_event,
+ };
+ struct trace_sched_handler replay_ops = {
+ .wakeup_event = replay_wakeup_event,
+ .switch_event = replay_switch_event,
+ .fork_event = replay_fork_event,
+ };
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
+ sched.curr_pid[i] = -1;
+
+ argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands,
+ sched_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+ if (!argc)
+ usage_with_options(sched_usage, sched_options);
+
+ /*
+ * Aliased to 'perf script' for now:
+ */
+ if (!strcmp(argv[0], "script"))
+ return cmd_script(argc, argv);
+
+ if (!strncmp(argv[0], "rec", 3)) {
+ return __cmd_record(argc, argv);
+ } else if (!strncmp(argv[0], "lat", 3)) {
+ sched.tp_handler = &lat_ops;
+ if (argc > 1) {
+ argc = parse_options(argc, argv, latency_options, latency_usage, 0);
+ if (argc)
+ usage_with_options(latency_usage, latency_options);
+ }
+ setup_sorting(&sched, latency_options, latency_usage);
+ return perf_sched__lat(&sched);
+ } else if (!strcmp(argv[0], "map")) {
+ if (argc) {
+ argc = parse_options(argc, argv, map_options, map_usage, 0);
+ if (argc)
+ usage_with_options(map_usage, map_options);
+ }
+ sched.tp_handler = &map_ops;
+ setup_sorting(&sched, latency_options, latency_usage);
+ return perf_sched__map(&sched);
+ } else if (!strncmp(argv[0], "rep", 3)) {
+ sched.tp_handler = &replay_ops;
+ if (argc) {
+ argc = parse_options(argc, argv, replay_options, replay_usage, 0);
+ if (argc)
+ usage_with_options(replay_usage, replay_options);
+ }
+ return perf_sched__replay(&sched);
+ } else if (!strcmp(argv[0], "timehist")) {
+ if (argc) {
+ argc = parse_options(argc, argv, timehist_options,
+ timehist_usage, 0);
+ if (argc)
+ usage_with_options(timehist_usage, timehist_options);
+ }
+ if ((sched.show_wakeups || sched.show_next) &&
+ sched.summary_only) {
+ pr_err(" Error: -s and -[n|w] are mutually exclusive.\n");
+ parse_options_usage(timehist_usage, timehist_options, "s", true);
+ if (sched.show_wakeups)
+ parse_options_usage(NULL, timehist_options, "w", true);
+ if (sched.show_next)
+ parse_options_usage(NULL, timehist_options, "n", true);
+ return -EINVAL;
+ }
+
+ return perf_sched__timehist(&sched);
+ } else {
+ usage_with_options(sched_usage, sched_options);
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
new file mode 100644
index 000000000..1200973c7
--- /dev/null
+++ b/tools/perf/builtin-script.c
@@ -0,0 +1,3542 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "builtin.h"
+
+#include "perf.h"
+#include "util/cache.h"
+#include "util/debug.h"
+#include <subcmd/exec-cmd.h>
+#include "util/header.h"
+#include <subcmd/parse-options.h>
+#include "util/perf_regs.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/trace-event.h"
+#include "util/util.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/sort.h"
+#include "util/data.h"
+#include "util/auxtrace.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/stat.h"
+#include "util/color.h"
+#include "util/string2.h"
+#include "util/thread-stack.h"
+#include "util/time-utils.h"
+#include "util/path.h"
+#include "print_binary.h"
+#include <linux/bitmap.h>
+#include <linux/kernel.h>
+#include <linux/stringify.h>
+#include <linux/time64.h>
+#include "asm/bug.h"
+#include "util/mem-events.h"
+#include "util/dump-insn.h"
+#include <dirent.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "sane_ctype.h"
+
+static char const *script_name;
+static char const *generate_script_lang;
+static bool debug_mode;
+static u64 last_timestamp;
+static u64 nr_unordered;
+static bool no_callchain;
+static bool latency_format;
+static bool system_wide;
+static bool print_flags;
+static bool nanosecs;
+static const char *cpu_list;
+static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+static struct perf_stat_config stat_config;
+static int max_blocks;
+
+unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
+
+enum perf_output_field {
+ PERF_OUTPUT_COMM = 1U << 0,
+ PERF_OUTPUT_TID = 1U << 1,
+ PERF_OUTPUT_PID = 1U << 2,
+ PERF_OUTPUT_TIME = 1U << 3,
+ PERF_OUTPUT_CPU = 1U << 4,
+ PERF_OUTPUT_EVNAME = 1U << 5,
+ PERF_OUTPUT_TRACE = 1U << 6,
+ PERF_OUTPUT_IP = 1U << 7,
+ PERF_OUTPUT_SYM = 1U << 8,
+ PERF_OUTPUT_DSO = 1U << 9,
+ PERF_OUTPUT_ADDR = 1U << 10,
+ PERF_OUTPUT_SYMOFFSET = 1U << 11,
+ PERF_OUTPUT_SRCLINE = 1U << 12,
+ PERF_OUTPUT_PERIOD = 1U << 13,
+ PERF_OUTPUT_IREGS = 1U << 14,
+ PERF_OUTPUT_BRSTACK = 1U << 15,
+ PERF_OUTPUT_BRSTACKSYM = 1U << 16,
+ PERF_OUTPUT_DATA_SRC = 1U << 17,
+ PERF_OUTPUT_WEIGHT = 1U << 18,
+ PERF_OUTPUT_BPF_OUTPUT = 1U << 19,
+ PERF_OUTPUT_CALLINDENT = 1U << 20,
+ PERF_OUTPUT_INSN = 1U << 21,
+ PERF_OUTPUT_INSNLEN = 1U << 22,
+ PERF_OUTPUT_BRSTACKINSN = 1U << 23,
+ PERF_OUTPUT_BRSTACKOFF = 1U << 24,
+ PERF_OUTPUT_SYNTH = 1U << 25,
+ PERF_OUTPUT_PHYS_ADDR = 1U << 26,
+ PERF_OUTPUT_UREGS = 1U << 27,
+ PERF_OUTPUT_METRIC = 1U << 28,
+ PERF_OUTPUT_MISC = 1U << 29,
+};
+
+struct output_option {
+ const char *str;
+ enum perf_output_field field;
+} all_output_options[] = {
+ {.str = "comm", .field = PERF_OUTPUT_COMM},
+ {.str = "tid", .field = PERF_OUTPUT_TID},
+ {.str = "pid", .field = PERF_OUTPUT_PID},
+ {.str = "time", .field = PERF_OUTPUT_TIME},
+ {.str = "cpu", .field = PERF_OUTPUT_CPU},
+ {.str = "event", .field = PERF_OUTPUT_EVNAME},
+ {.str = "trace", .field = PERF_OUTPUT_TRACE},
+ {.str = "ip", .field = PERF_OUTPUT_IP},
+ {.str = "sym", .field = PERF_OUTPUT_SYM},
+ {.str = "dso", .field = PERF_OUTPUT_DSO},
+ {.str = "addr", .field = PERF_OUTPUT_ADDR},
+ {.str = "symoff", .field = PERF_OUTPUT_SYMOFFSET},
+ {.str = "srcline", .field = PERF_OUTPUT_SRCLINE},
+ {.str = "period", .field = PERF_OUTPUT_PERIOD},
+ {.str = "iregs", .field = PERF_OUTPUT_IREGS},
+ {.str = "uregs", .field = PERF_OUTPUT_UREGS},
+ {.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
+ {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
+ {.str = "data_src", .field = PERF_OUTPUT_DATA_SRC},
+ {.str = "weight", .field = PERF_OUTPUT_WEIGHT},
+ {.str = "bpf-output", .field = PERF_OUTPUT_BPF_OUTPUT},
+ {.str = "callindent", .field = PERF_OUTPUT_CALLINDENT},
+ {.str = "insn", .field = PERF_OUTPUT_INSN},
+ {.str = "insnlen", .field = PERF_OUTPUT_INSNLEN},
+ {.str = "brstackinsn", .field = PERF_OUTPUT_BRSTACKINSN},
+ {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF},
+ {.str = "synth", .field = PERF_OUTPUT_SYNTH},
+ {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR},
+ {.str = "metric", .field = PERF_OUTPUT_METRIC},
+ {.str = "misc", .field = PERF_OUTPUT_MISC},
+};
+
+enum {
+ OUTPUT_TYPE_SYNTH = PERF_TYPE_MAX,
+ OUTPUT_TYPE_MAX
+};
+
+/* default set to maintain compatibility with current format */
+static struct {
+ bool user_set;
+ bool wildcard_set;
+ unsigned int print_ip_opts;
+ u64 fields;
+ u64 invalid_fields;
+} output[OUTPUT_TYPE_MAX] = {
+
+ [PERF_TYPE_HARDWARE] = {
+ .user_set = false,
+
+ .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+ PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+ PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+ PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+ PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
+
+ .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
+ },
+
+ [PERF_TYPE_SOFTWARE] = {
+ .user_set = false,
+
+ .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+ PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+ PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+ PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+ PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
+ PERF_OUTPUT_BPF_OUTPUT,
+
+ .invalid_fields = PERF_OUTPUT_TRACE,
+ },
+
+ [PERF_TYPE_TRACEPOINT] = {
+ .user_set = false,
+
+ .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+ PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+ PERF_OUTPUT_EVNAME | PERF_OUTPUT_TRACE
+ },
+
+ [PERF_TYPE_HW_CACHE] = {
+ .user_set = false,
+
+ .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+ PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+ PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+ PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+ PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
+
+ .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
+ },
+
+ [PERF_TYPE_RAW] = {
+ .user_set = false,
+
+ .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+ PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+ PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+ PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+ PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
+ PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC |
+ PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR,
+
+ .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
+ },
+
+ [PERF_TYPE_BREAKPOINT] = {
+ .user_set = false,
+
+ .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+ PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+ PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+ PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+ PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD,
+
+ .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
+ },
+
+ [OUTPUT_TYPE_SYNTH] = {
+ .user_set = false,
+
+ .fields = PERF_OUTPUT_COMM | PERF_OUTPUT_TID |
+ PERF_OUTPUT_CPU | PERF_OUTPUT_TIME |
+ PERF_OUTPUT_EVNAME | PERF_OUTPUT_IP |
+ PERF_OUTPUT_SYM | PERF_OUTPUT_SYMOFFSET |
+ PERF_OUTPUT_DSO | PERF_OUTPUT_SYNTH,
+
+ .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
+ },
+};
+
+struct perf_evsel_script {
+ char *filename;
+ FILE *fp;
+ u64 samples;
+ /* For metric output */
+ u64 val;
+ int gnum;
+};
+
+static inline struct perf_evsel_script *evsel_script(struct perf_evsel *evsel)
+{
+ return (struct perf_evsel_script *)evsel->priv;
+}
+
+static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel,
+ struct perf_data *data)
+{
+ struct perf_evsel_script *es = zalloc(sizeof(*es));
+
+ if (es != NULL) {
+ if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0)
+ goto out_free;
+ es->fp = fopen(es->filename, "w");
+ if (es->fp == NULL)
+ goto out_free_filename;
+ }
+
+ return es;
+out_free_filename:
+ zfree(&es->filename);
+out_free:
+ free(es);
+ return NULL;
+}
+
+static void perf_evsel_script__delete(struct perf_evsel_script *es)
+{
+ zfree(&es->filename);
+ fclose(es->fp);
+ es->fp = NULL;
+ free(es);
+}
+
+static int perf_evsel_script__fprintf(struct perf_evsel_script *es, FILE *fp)
+{
+ struct stat st;
+
+ fstat(fileno(es->fp), &st);
+ return fprintf(fp, "[ perf script: Wrote %.3f MB %s (%" PRIu64 " samples) ]\n",
+ st.st_size / 1024.0 / 1024.0, es->filename, es->samples);
+}
+
+static inline int output_type(unsigned int type)
+{
+ switch (type) {
+ case PERF_TYPE_SYNTH:
+ return OUTPUT_TYPE_SYNTH;
+ default:
+ return type;
+ }
+}
+
+static inline unsigned int attr_type(unsigned int type)
+{
+ switch (type) {
+ case OUTPUT_TYPE_SYNTH:
+ return PERF_TYPE_SYNTH;
+ default:
+ return type;
+ }
+}
+
+static bool output_set_by_user(void)
+{
+ int j;
+ for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
+ if (output[j].user_set)
+ return true;
+ }
+ return false;
+}
+
+static const char *output_field2str(enum perf_output_field field)
+{
+ int i, imax = ARRAY_SIZE(all_output_options);
+ const char *str = "";
+
+ for (i = 0; i < imax; ++i) {
+ if (all_output_options[i].field == field) {
+ str = all_output_options[i].str;
+ break;
+ }
+ }
+ return str;
+}
+
+#define PRINT_FIELD(x) (output[output_type(attr->type)].fields & PERF_OUTPUT_##x)
+
+static int perf_evsel__do_check_stype(struct perf_evsel *evsel,
+ u64 sample_type, const char *sample_msg,
+ enum perf_output_field field,
+ bool allow_user_set)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ int type = output_type(attr->type);
+ const char *evname;
+
+ if (attr->sample_type & sample_type)
+ return 0;
+
+ if (output[type].user_set) {
+ if (allow_user_set)
+ return 0;
+ evname = perf_evsel__name(evsel);
+ pr_err("Samples for '%s' event do not have %s attribute set. "
+ "Cannot print '%s' field.\n",
+ evname, sample_msg, output_field2str(field));
+ return -1;
+ }
+
+ /* user did not ask for it explicitly so remove from the default list */
+ output[type].fields &= ~field;
+ evname = perf_evsel__name(evsel);
+ pr_debug("Samples for '%s' event do not have %s attribute set. "
+ "Skipping '%s' field.\n",
+ evname, sample_msg, output_field2str(field));
+
+ return 0;
+}
+
+static int perf_evsel__check_stype(struct perf_evsel *evsel,
+ u64 sample_type, const char *sample_msg,
+ enum perf_output_field field)
+{
+ return perf_evsel__do_check_stype(evsel, sample_type, sample_msg, field,
+ false);
+}
+
+static int perf_evsel__check_attr(struct perf_evsel *evsel,
+ struct perf_session *session)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ bool allow_user_set;
+
+ if (perf_header__has_feat(&session->header, HEADER_STAT))
+ return 0;
+
+ allow_user_set = perf_header__has_feat(&session->header,
+ HEADER_AUXTRACE);
+
+ if (PRINT_FIELD(TRACE) &&
+ !perf_session__has_traces(session, "record -R"))
+ return -EINVAL;
+
+ if (PRINT_FIELD(IP)) {
+ if (perf_evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP",
+ PERF_OUTPUT_IP))
+ return -EINVAL;
+ }
+
+ if (PRINT_FIELD(ADDR) &&
+ perf_evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
+ PERF_OUTPUT_ADDR, allow_user_set))
+ return -EINVAL;
+
+ if (PRINT_FIELD(DATA_SRC) &&
+ perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC",
+ PERF_OUTPUT_DATA_SRC))
+ return -EINVAL;
+
+ if (PRINT_FIELD(WEIGHT) &&
+ perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT",
+ PERF_OUTPUT_WEIGHT))
+ return -EINVAL;
+
+ if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
+ pr_err("Display of symbols requested but neither sample IP nor "
+ "sample address\nis selected. Hence, no addresses to convert "
+ "to symbols.\n");
+ return -EINVAL;
+ }
+ if (PRINT_FIELD(SYMOFFSET) && !PRINT_FIELD(SYM)) {
+ pr_err("Display of offsets requested but symbol is not"
+ "selected.\n");
+ return -EINVAL;
+ }
+ if (PRINT_FIELD(DSO) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR) &&
+ !PRINT_FIELD(BRSTACK) && !PRINT_FIELD(BRSTACKSYM) && !PRINT_FIELD(BRSTACKOFF)) {
+ pr_err("Display of DSO requested but no address to convert. Select\n"
+ "sample IP, sample address, brstack, brstacksym, or brstackoff.\n");
+ return -EINVAL;
+ }
+ if (PRINT_FIELD(SRCLINE) && !PRINT_FIELD(IP)) {
+ pr_err("Display of source line number requested but sample IP is not\n"
+ "selected. Hence, no address to lookup the source line number.\n");
+ return -EINVAL;
+ }
+ if (PRINT_FIELD(BRSTACKINSN) && !allow_user_set &&
+ !(perf_evlist__combined_branch_type(session->evlist) &
+ PERF_SAMPLE_BRANCH_ANY)) {
+ pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
+ "Hint: run 'perf record -b ...'\n");
+ return -EINVAL;
+ }
+ if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
+ perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
+ PERF_OUTPUT_TID|PERF_OUTPUT_PID))
+ return -EINVAL;
+
+ if (PRINT_FIELD(TIME) &&
+ perf_evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME",
+ PERF_OUTPUT_TIME))
+ return -EINVAL;
+
+ if (PRINT_FIELD(CPU) &&
+ perf_evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
+ PERF_OUTPUT_CPU, allow_user_set))
+ return -EINVAL;
+
+ if (PRINT_FIELD(IREGS) &&
+ perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS",
+ PERF_OUTPUT_IREGS))
+ return -EINVAL;
+
+ if (PRINT_FIELD(UREGS) &&
+ perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS",
+ PERF_OUTPUT_UREGS))
+ return -EINVAL;
+
+ if (PRINT_FIELD(PHYS_ADDR) &&
+ perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
+ PERF_OUTPUT_PHYS_ADDR))
+ return -EINVAL;
+
+ return 0;
+}
+
+static void set_print_ip_opts(struct perf_event_attr *attr)
+{
+ unsigned int type = output_type(attr->type);
+
+ output[type].print_ip_opts = 0;
+ if (PRINT_FIELD(IP))
+ output[type].print_ip_opts |= EVSEL__PRINT_IP;
+
+ if (PRINT_FIELD(SYM))
+ output[type].print_ip_opts |= EVSEL__PRINT_SYM;
+
+ if (PRINT_FIELD(DSO))
+ output[type].print_ip_opts |= EVSEL__PRINT_DSO;
+
+ if (PRINT_FIELD(SYMOFFSET))
+ output[type].print_ip_opts |= EVSEL__PRINT_SYMOFFSET;
+
+ if (PRINT_FIELD(SRCLINE))
+ output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE;
+}
+
+/*
+ * verify all user requested events exist and the samples
+ * have the expected data
+ */
+static int perf_session__check_output_opt(struct perf_session *session)
+{
+ unsigned int j;
+ struct perf_evsel *evsel;
+
+ for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
+ evsel = perf_session__find_first_evtype(session, attr_type(j));
+
+ /*
+ * even if fields is set to 0 (ie., show nothing) event must
+ * exist if user explicitly includes it on the command line
+ */
+ if (!evsel && output[j].user_set && !output[j].wildcard_set &&
+ j != OUTPUT_TYPE_SYNTH) {
+ pr_err("%s events do not exist. "
+ "Remove corresponding -F option to proceed.\n",
+ event_type(j));
+ return -1;
+ }
+
+ if (evsel && output[j].fields &&
+ perf_evsel__check_attr(evsel, session))
+ return -1;
+
+ if (evsel == NULL)
+ continue;
+
+ set_print_ip_opts(&evsel->attr);
+ }
+
+ if (!no_callchain) {
+ bool use_callchain = false;
+ bool not_pipe = false;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ not_pipe = true;
+ if (evsel__has_callchain(evsel)) {
+ use_callchain = true;
+ break;
+ }
+ }
+ if (not_pipe && !use_callchain)
+ symbol_conf.use_callchain = false;
+ }
+
+ /*
+ * set default for tracepoints to print symbols only
+ * if callchains are present
+ */
+ if (symbol_conf.use_callchain &&
+ !output[PERF_TYPE_TRACEPOINT].user_set) {
+ j = PERF_TYPE_TRACEPOINT;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->attr.type != j)
+ continue;
+
+ if (evsel__has_callchain(evsel)) {
+ output[j].fields |= PERF_OUTPUT_IP;
+ output[j].fields |= PERF_OUTPUT_SYM;
+ output[j].fields |= PERF_OUTPUT_SYMOFFSET;
+ output[j].fields |= PERF_OUTPUT_DSO;
+ set_print_ip_opts(&evsel->attr);
+ goto out;
+ }
+ }
+ }
+
+out:
+ return 0;
+}
+
+static int perf_sample__fprintf_iregs(struct perf_sample *sample,
+ struct perf_event_attr *attr, FILE *fp)
+{
+ struct regs_dump *regs = &sample->intr_regs;
+ uint64_t mask = attr->sample_regs_intr;
+ unsigned i = 0, r;
+ int printed = 0;
+
+ if (!regs)
+ return 0;
+
+ for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
+ u64 val = regs->regs[i++];
+ printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
+ }
+
+ return printed;
+}
+
+static int perf_sample__fprintf_uregs(struct perf_sample *sample,
+ struct perf_event_attr *attr, FILE *fp)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ uint64_t mask = attr->sample_regs_user;
+ unsigned i = 0, r;
+ int printed = 0;
+
+ if (!regs || !regs->regs)
+ return 0;
+
+ printed += fprintf(fp, " ABI:%" PRIu64 " ", regs->abi);
+
+ for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
+ u64 val = regs->regs[i++];
+ printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
+ }
+
+ return printed;
+}
+
+static int perf_sample__fprintf_start(struct perf_sample *sample,
+ struct thread *thread,
+ struct perf_evsel *evsel,
+ u32 type, FILE *fp)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ unsigned long secs;
+ unsigned long long nsecs;
+ int printed = 0;
+
+ if (PRINT_FIELD(COMM)) {
+ if (latency_format)
+ printed += fprintf(fp, "%8.8s ", thread__comm_str(thread));
+ else if (PRINT_FIELD(IP) && evsel__has_callchain(evsel) && symbol_conf.use_callchain)
+ printed += fprintf(fp, "%s ", thread__comm_str(thread));
+ else
+ printed += fprintf(fp, "%16s ", thread__comm_str(thread));
+ }
+
+ if (PRINT_FIELD(PID) && PRINT_FIELD(TID))
+ printed += fprintf(fp, "%5d/%-5d ", sample->pid, sample->tid);
+ else if (PRINT_FIELD(PID))
+ printed += fprintf(fp, "%5d ", sample->pid);
+ else if (PRINT_FIELD(TID))
+ printed += fprintf(fp, "%5d ", sample->tid);
+
+ if (PRINT_FIELD(CPU)) {
+ if (latency_format)
+ printed += fprintf(fp, "%3d ", sample->cpu);
+ else
+ printed += fprintf(fp, "[%03d] ", sample->cpu);
+ }
+
+ if (PRINT_FIELD(MISC)) {
+ int ret = 0;
+
+ #define has(m) \
+ (sample->misc & PERF_RECORD_MISC_##m) == PERF_RECORD_MISC_##m
+
+ if (has(KERNEL))
+ ret += fprintf(fp, "K");
+ if (has(USER))
+ ret += fprintf(fp, "U");
+ if (has(HYPERVISOR))
+ ret += fprintf(fp, "H");
+ if (has(GUEST_KERNEL))
+ ret += fprintf(fp, "G");
+ if (has(GUEST_USER))
+ ret += fprintf(fp, "g");
+
+ switch (type) {
+ case PERF_RECORD_MMAP:
+ case PERF_RECORD_MMAP2:
+ if (has(MMAP_DATA))
+ ret += fprintf(fp, "M");
+ break;
+ case PERF_RECORD_COMM:
+ if (has(COMM_EXEC))
+ ret += fprintf(fp, "E");
+ break;
+ case PERF_RECORD_SWITCH:
+ case PERF_RECORD_SWITCH_CPU_WIDE:
+ if (has(SWITCH_OUT)) {
+ ret += fprintf(fp, "S");
+ if (sample->misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT)
+ ret += fprintf(fp, "p");
+ }
+ default:
+ break;
+ }
+
+ #undef has
+
+ ret += fprintf(fp, "%*s", 6 - ret, " ");
+ printed += ret;
+ }
+
+ if (PRINT_FIELD(TIME)) {
+ nsecs = sample->time;
+ secs = nsecs / NSEC_PER_SEC;
+ nsecs -= secs * NSEC_PER_SEC;
+
+ if (nanosecs)
+ printed += fprintf(fp, "%5lu.%09llu: ", secs, nsecs);
+ else {
+ char sample_time[32];
+ timestamp__scnprintf_usec(sample->time, sample_time, sizeof(sample_time));
+ printed += fprintf(fp, "%12s: ", sample_time);
+ }
+ }
+
+ return printed;
+}
+
+static inline char
+mispred_str(struct branch_entry *br)
+{
+ if (!(br->flags.mispred || br->flags.predicted))
+ return '-';
+
+ return br->flags.predicted ? 'P' : 'M';
+}
+
+static int perf_sample__fprintf_brstack(struct perf_sample *sample,
+ struct thread *thread,
+ struct perf_event_attr *attr, FILE *fp)
+{
+ struct branch_stack *br = sample->branch_stack;
+ struct addr_location alf, alt;
+ u64 i, from, to;
+ int printed = 0;
+
+ if (!(br && br->nr))
+ return 0;
+
+ for (i = 0; i < br->nr; i++) {
+ from = br->entries[i].from;
+ to = br->entries[i].to;
+
+ if (PRINT_FIELD(DSO)) {
+ memset(&alf, 0, sizeof(alf));
+ memset(&alt, 0, sizeof(alt));
+ thread__find_map_fb(thread, sample->cpumode, from, &alf);
+ thread__find_map_fb(thread, sample->cpumode, to, &alt);
+ }
+
+ printed += fprintf(fp, " 0x%"PRIx64, from);
+ if (PRINT_FIELD(DSO)) {
+ printed += fprintf(fp, "(");
+ printed += map__fprintf_dsoname(alf.map, fp);
+ printed += fprintf(fp, ")");
+ }
+
+ printed += fprintf(fp, "/0x%"PRIx64, to);
+ if (PRINT_FIELD(DSO)) {
+ printed += fprintf(fp, "(");
+ printed += map__fprintf_dsoname(alt.map, fp);
+ printed += fprintf(fp, ")");
+ }
+
+ printed += fprintf(fp, "/%c/%c/%c/%d ",
+ mispred_str( br->entries + i),
+ br->entries[i].flags.in_tx? 'X' : '-',
+ br->entries[i].flags.abort? 'A' : '-',
+ br->entries[i].flags.cycles);
+ }
+
+ return printed;
+}
+
+static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
+ struct thread *thread,
+ struct perf_event_attr *attr, FILE *fp)
+{
+ struct branch_stack *br = sample->branch_stack;
+ struct addr_location alf, alt;
+ u64 i, from, to;
+ int printed = 0;
+
+ if (!(br && br->nr))
+ return 0;
+
+ for (i = 0; i < br->nr; i++) {
+
+ memset(&alf, 0, sizeof(alf));
+ memset(&alt, 0, sizeof(alt));
+ from = br->entries[i].from;
+ to = br->entries[i].to;
+
+ thread__find_symbol_fb(thread, sample->cpumode, from, &alf);
+ thread__find_symbol_fb(thread, sample->cpumode, to, &alt);
+
+ printed += symbol__fprintf_symname_offs(alf.sym, &alf, fp);
+ if (PRINT_FIELD(DSO)) {
+ printed += fprintf(fp, "(");
+ printed += map__fprintf_dsoname(alf.map, fp);
+ printed += fprintf(fp, ")");
+ }
+ printed += fprintf(fp, "%c", '/');
+ printed += symbol__fprintf_symname_offs(alt.sym, &alt, fp);
+ if (PRINT_FIELD(DSO)) {
+ printed += fprintf(fp, "(");
+ printed += map__fprintf_dsoname(alt.map, fp);
+ printed += fprintf(fp, ")");
+ }
+ printed += fprintf(fp, "/%c/%c/%c/%d ",
+ mispred_str( br->entries + i),
+ br->entries[i].flags.in_tx? 'X' : '-',
+ br->entries[i].flags.abort? 'A' : '-',
+ br->entries[i].flags.cycles);
+ }
+
+ return printed;
+}
+
+static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
+ struct thread *thread,
+ struct perf_event_attr *attr, FILE *fp)
+{
+ struct branch_stack *br = sample->branch_stack;
+ struct addr_location alf, alt;
+ u64 i, from, to;
+ int printed = 0;
+
+ if (!(br && br->nr))
+ return 0;
+
+ for (i = 0; i < br->nr; i++) {
+
+ memset(&alf, 0, sizeof(alf));
+ memset(&alt, 0, sizeof(alt));
+ from = br->entries[i].from;
+ to = br->entries[i].to;
+
+ if (thread__find_map_fb(thread, sample->cpumode, from, &alf) &&
+ !alf.map->dso->adjust_symbols)
+ from = map__map_ip(alf.map, from);
+
+ if (thread__find_map_fb(thread, sample->cpumode, to, &alt) &&
+ !alt.map->dso->adjust_symbols)
+ to = map__map_ip(alt.map, to);
+
+ printed += fprintf(fp, " 0x%"PRIx64, from);
+ if (PRINT_FIELD(DSO)) {
+ printed += fprintf(fp, "(");
+ printed += map__fprintf_dsoname(alf.map, fp);
+ printed += fprintf(fp, ")");
+ }
+ printed += fprintf(fp, "/0x%"PRIx64, to);
+ if (PRINT_FIELD(DSO)) {
+ printed += fprintf(fp, "(");
+ printed += map__fprintf_dsoname(alt.map, fp);
+ printed += fprintf(fp, ")");
+ }
+ printed += fprintf(fp, "/%c/%c/%c/%d ",
+ mispred_str(br->entries + i),
+ br->entries[i].flags.in_tx ? 'X' : '-',
+ br->entries[i].flags.abort ? 'A' : '-',
+ br->entries[i].flags.cycles);
+ }
+
+ return printed;
+}
+#define MAXBB 16384UL
+
+static int grab_bb(u8 *buffer, u64 start, u64 end,
+ struct machine *machine, struct thread *thread,
+ bool *is64bit, u8 *cpumode, bool last)
+{
+ long offset, len;
+ struct addr_location al;
+ bool kernel;
+
+ if (!start || !end)
+ return 0;
+
+ kernel = machine__kernel_ip(machine, start);
+ if (kernel)
+ *cpumode = PERF_RECORD_MISC_KERNEL;
+ else
+ *cpumode = PERF_RECORD_MISC_USER;
+
+ /*
+ * Block overlaps between kernel and user.
+ * This can happen due to ring filtering
+ * On Intel CPUs the entry into the kernel is filtered,
+ * but the exit is not. Let the caller patch it up.
+ */
+ if (kernel != machine__kernel_ip(machine, end)) {
+ pr_debug("\tblock %" PRIx64 "-%" PRIx64 " transfers between kernel and user\n", start, end);
+ return -ENXIO;
+ }
+
+ memset(&al, 0, sizeof(al));
+ if (end - start > MAXBB - MAXINSN) {
+ if (last)
+ pr_debug("\tbrstack does not reach to final jump (%" PRIx64 "-%" PRIx64 ")\n", start, end);
+ else
+ pr_debug("\tblock %" PRIx64 "-%" PRIx64 " (%" PRIu64 ") too long to dump\n", start, end, end - start);
+ return 0;
+ }
+
+ if (!thread__find_map(thread, *cpumode, start, &al) || !al.map->dso) {
+ pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
+ return 0;
+ }
+ if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR) {
+ pr_debug("\tcannot resolve %" PRIx64 "-%" PRIx64 "\n", start, end);
+ return 0;
+ }
+
+ /* Load maps to ensure dso->is_64_bit has been updated */
+ map__load(al.map);
+
+ offset = al.map->map_ip(al.map, start);
+ len = dso__data_read_offset(al.map->dso, machine, offset, (u8 *)buffer,
+ end - start + MAXINSN);
+
+ *is64bit = al.map->dso->is_64_bit;
+ if (len <= 0)
+ pr_debug("\tcannot fetch code for block at %" PRIx64 "-%" PRIx64 "\n",
+ start, end);
+ return len;
+}
+
+static int ip__fprintf_jump(uint64_t ip, struct branch_entry *en,
+ struct perf_insn *x, u8 *inbuf, int len,
+ int insn, FILE *fp)
+{
+ int printed = fprintf(fp, "\t%016" PRIx64 "\t%-30s\t#%s%s%s%s", ip,
+ dump_insn(x, ip, inbuf, len, NULL),
+ en->flags.predicted ? " PRED" : "",
+ en->flags.mispred ? " MISPRED" : "",
+ en->flags.in_tx ? " INTX" : "",
+ en->flags.abort ? " ABORT" : "");
+ if (en->flags.cycles) {
+ printed += fprintf(fp, " %d cycles", en->flags.cycles);
+ if (insn)
+ printed += fprintf(fp, " %.2f IPC", (float)insn / en->flags.cycles);
+ }
+ return printed + fprintf(fp, "\n");
+}
+
+static int ip__fprintf_sym(uint64_t addr, struct thread *thread,
+ u8 cpumode, int cpu, struct symbol **lastsym,
+ struct perf_event_attr *attr, FILE *fp)
+{
+ struct addr_location al;
+ int off, printed = 0;
+
+ memset(&al, 0, sizeof(al));
+
+ thread__find_map(thread, cpumode, addr, &al);
+
+ if ((*lastsym) && al.addr >= (*lastsym)->start && al.addr < (*lastsym)->end)
+ return 0;
+
+ al.cpu = cpu;
+ al.sym = NULL;
+ if (al.map)
+ al.sym = map__find_symbol(al.map, al.addr);
+
+ if (!al.sym)
+ return 0;
+
+ if (al.addr < al.sym->end)
+ off = al.addr - al.sym->start;
+ else
+ off = al.addr - al.map->start - al.sym->start;
+ printed += fprintf(fp, "\t%s", al.sym->name);
+ if (off)
+ printed += fprintf(fp, "%+d", off);
+ printed += fprintf(fp, ":");
+ if (PRINT_FIELD(SRCLINE))
+ printed += map__fprintf_srcline(al.map, al.addr, "\t", fp);
+ printed += fprintf(fp, "\n");
+ *lastsym = al.sym;
+
+ return printed;
+}
+
+static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
+ struct thread *thread,
+ struct perf_event_attr *attr,
+ struct machine *machine, FILE *fp)
+{
+ struct branch_stack *br = sample->branch_stack;
+ u64 start, end;
+ int i, insn, len, nr, ilen, printed = 0;
+ struct perf_insn x;
+ u8 buffer[MAXBB];
+ unsigned off;
+ struct symbol *lastsym = NULL;
+
+ if (!(br && br->nr))
+ return 0;
+ nr = br->nr;
+ if (max_blocks && nr > max_blocks + 1)
+ nr = max_blocks + 1;
+
+ x.thread = thread;
+ x.cpu = sample->cpu;
+
+ printed += fprintf(fp, "%c", '\n');
+
+ /* Handle first from jump, of which we don't know the entry. */
+ len = grab_bb(buffer, br->entries[nr-1].from,
+ br->entries[nr-1].from,
+ machine, thread, &x.is64bit, &x.cpumode, false);
+ if (len > 0) {
+ printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
+ x.cpumode, x.cpu, &lastsym, attr, fp);
+ printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
+ &x, buffer, len, 0, fp);
+ }
+
+ /* Print all blocks */
+ for (i = nr - 2; i >= 0; i--) {
+ if (br->entries[i].from || br->entries[i].to)
+ pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i,
+ br->entries[i].from,
+ br->entries[i].to);
+ start = br->entries[i + 1].to;
+ end = br->entries[i].from;
+
+ len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
+ /* Patch up missing kernel transfers due to ring filters */
+ if (len == -ENXIO && i > 0) {
+ end = br->entries[--i].from;
+ pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end);
+ len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
+ }
+ if (len <= 0)
+ continue;
+
+ insn = 0;
+ for (off = 0; off < (unsigned)len; off += ilen) {
+ uint64_t ip = start + off;
+
+ printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
+ if (ip == end) {
+ printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp);
+ break;
+ } else {
+ ilen = 0;
+ printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", ip,
+ dump_insn(&x, ip, buffer + off, len - off, &ilen));
+ if (ilen == 0)
+ break;
+ insn++;
+ }
+ }
+ if (off != end - start)
+ printed += fprintf(fp, "\tmismatch of LBR data and executable\n");
+ }
+
+ /*
+ * Hit the branch? In this case we are already done, and the target
+ * has not been executed yet.
+ */
+ if (br->entries[0].from == sample->ip)
+ goto out;
+ if (br->entries[0].flags.abort)
+ goto out;
+
+ /*
+ * Print final block upto sample
+ */
+ start = br->entries[0].to;
+ end = sample->ip;
+ len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, true);
+ printed += ip__fprintf_sym(start, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
+ if (len <= 0) {
+ /* Print at least last IP if basic block did not work */
+ len = grab_bb(buffer, sample->ip, sample->ip,
+ machine, thread, &x.is64bit, &x.cpumode, false);
+ if (len <= 0)
+ goto out;
+
+ printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", sample->ip,
+ dump_insn(&x, sample->ip, buffer, len, NULL));
+ goto out;
+ }
+ for (off = 0; off <= end - start; off += ilen) {
+ ilen = 0;
+ printed += fprintf(fp, "\t%016" PRIx64 "\t%s\n", start + off,
+ dump_insn(&x, start + off, buffer + off, len - off, &ilen));
+ if (ilen == 0)
+ break;
+ }
+out:
+ return printed;
+}
+
+static int perf_sample__fprintf_addr(struct perf_sample *sample,
+ struct thread *thread,
+ struct perf_event_attr *attr, FILE *fp)
+{
+ struct addr_location al;
+ int printed = fprintf(fp, "%16" PRIx64, sample->addr);
+
+ if (!sample_addr_correlates_sym(attr))
+ goto out;
+
+ thread__resolve(thread, &al, sample);
+
+ if (PRINT_FIELD(SYM)) {
+ printed += fprintf(fp, " ");
+ if (PRINT_FIELD(SYMOFFSET))
+ printed += symbol__fprintf_symname_offs(al.sym, &al, fp);
+ else
+ printed += symbol__fprintf_symname(al.sym, fp);
+ }
+
+ if (PRINT_FIELD(DSO)) {
+ printed += fprintf(fp, " (");
+ printed += map__fprintf_dsoname(al.map, fp);
+ printed += fprintf(fp, ")");
+ }
+out:
+ return printed;
+}
+
+static int perf_sample__fprintf_callindent(struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct thread *thread,
+ struct addr_location *al, FILE *fp)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ size_t depth = thread_stack__depth(thread);
+ struct addr_location addr_al;
+ const char *name = NULL;
+ static int spacing;
+ int len = 0;
+ u64 ip = 0;
+
+ /*
+ * The 'return' has already been popped off the stack so the depth has
+ * to be adjusted to match the 'call'.
+ */
+ if (thread->ts && sample->flags & PERF_IP_FLAG_RETURN)
+ depth += 1;
+
+ if (sample->flags & (PERF_IP_FLAG_CALL | PERF_IP_FLAG_TRACE_BEGIN)) {
+ if (sample_addr_correlates_sym(attr)) {
+ thread__resolve(thread, &addr_al, sample);
+ if (addr_al.sym)
+ name = addr_al.sym->name;
+ else
+ ip = sample->addr;
+ } else {
+ ip = sample->addr;
+ }
+ } else if (sample->flags & (PERF_IP_FLAG_RETURN | PERF_IP_FLAG_TRACE_END)) {
+ if (al->sym)
+ name = al->sym->name;
+ else
+ ip = sample->ip;
+ }
+
+ if (name)
+ len = fprintf(fp, "%*s%s", (int)depth * 4, "", name);
+ else if (ip)
+ len = fprintf(fp, "%*s%16" PRIx64, (int)depth * 4, "", ip);
+
+ if (len < 0)
+ return len;
+
+ /*
+ * Try to keep the output length from changing frequently so that the
+ * output lines up more nicely.
+ */
+ if (len > spacing || (len && len < spacing - 52))
+ spacing = round_up(len + 4, 32);
+
+ if (len < spacing)
+ len += fprintf(fp, "%*s", spacing - len, "");
+
+ return len;
+}
+
+static int perf_sample__fprintf_insn(struct perf_sample *sample,
+ struct perf_event_attr *attr,
+ struct thread *thread,
+ struct machine *machine, FILE *fp)
+{
+ int printed = 0;
+
+ if (PRINT_FIELD(INSNLEN))
+ printed += fprintf(fp, " ilen: %d", sample->insn_len);
+ if (PRINT_FIELD(INSN)) {
+ int i;
+
+ printed += fprintf(fp, " insn:");
+ for (i = 0; i < sample->insn_len; i++)
+ printed += fprintf(fp, " %02x", (unsigned char)sample->insn[i]);
+ }
+ if (PRINT_FIELD(BRSTACKINSN))
+ printed += perf_sample__fprintf_brstackinsn(sample, thread, attr, machine, fp);
+
+ return printed;
+}
+
+static int perf_sample__fprintf_bts(struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct thread *thread,
+ struct addr_location *al,
+ struct machine *machine, FILE *fp)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ unsigned int type = output_type(attr->type);
+ bool print_srcline_last = false;
+ int printed = 0;
+
+ if (PRINT_FIELD(CALLINDENT))
+ printed += perf_sample__fprintf_callindent(sample, evsel, thread, al, fp);
+
+ /* print branch_from information */
+ if (PRINT_FIELD(IP)) {
+ unsigned int print_opts = output[type].print_ip_opts;
+ struct callchain_cursor *cursor = NULL;
+
+ if (symbol_conf.use_callchain && sample->callchain &&
+ thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+ sample, NULL, NULL, scripting_max_stack) == 0)
+ cursor = &callchain_cursor;
+
+ if (cursor == NULL) {
+ printed += fprintf(fp, " ");
+ if (print_opts & EVSEL__PRINT_SRCLINE) {
+ print_srcline_last = true;
+ print_opts &= ~EVSEL__PRINT_SRCLINE;
+ }
+ } else
+ printed += fprintf(fp, "\n");
+
+ printed += sample__fprintf_sym(sample, al, 0, print_opts, cursor, fp);
+ }
+
+ /* print branch_to information */
+ if (PRINT_FIELD(ADDR) ||
+ ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
+ !output[type].user_set)) {
+ printed += fprintf(fp, " => ");
+ printed += perf_sample__fprintf_addr(sample, thread, attr, fp);
+ }
+
+ if (print_srcline_last)
+ printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
+
+ printed += perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
+ return printed + fprintf(fp, "\n");
+}
+
+static struct {
+ u32 flags;
+ const char *name;
+} sample_flags[] = {
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "jcc"},
+ {PERF_IP_FLAG_BRANCH, "jmp"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT, "int"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT, "iret"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET, "syscall"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET, "sysret"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "async"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC | PERF_IP_FLAG_INTERRUPT, "hw int"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "tx abrt"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "tr strt"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"},
+ {0, NULL}
+};
+
+static int perf_sample__fprintf_flags(u32 flags, FILE *fp)
+{
+ const char *chars = PERF_IP_FLAG_CHARS;
+ const int n = strlen(PERF_IP_FLAG_CHARS);
+ bool in_tx = flags & PERF_IP_FLAG_IN_TX;
+ const char *name = NULL;
+ char str[33];
+ int i, pos = 0;
+
+ for (i = 0; sample_flags[i].name ; i++) {
+ if (sample_flags[i].flags == (flags & ~PERF_IP_FLAG_IN_TX)) {
+ name = sample_flags[i].name;
+ break;
+ }
+ }
+
+ for (i = 0; i < n; i++, flags >>= 1) {
+ if (flags & 1)
+ str[pos++] = chars[i];
+ }
+ for (; i < 32; i++, flags >>= 1) {
+ if (flags & 1)
+ str[pos++] = '?';
+ }
+ str[pos] = 0;
+
+ if (name)
+ return fprintf(fp, " %-7s%4s ", name, in_tx ? "(x)" : "");
+
+ return fprintf(fp, " %-11s ", str);
+}
+
+struct printer_data {
+ int line_no;
+ bool hit_nul;
+ bool is_printable;
+};
+
+static int sample__fprintf_bpf_output(enum binary_printer_ops op,
+ unsigned int val,
+ void *extra, FILE *fp)
+{
+ unsigned char ch = (unsigned char)val;
+ struct printer_data *printer_data = extra;
+ int printed = 0;
+
+ switch (op) {
+ case BINARY_PRINT_DATA_BEGIN:
+ printed += fprintf(fp, "\n");
+ break;
+ case BINARY_PRINT_LINE_BEGIN:
+ printed += fprintf(fp, "%17s", !printer_data->line_no ? "BPF output:" :
+ " ");
+ break;
+ case BINARY_PRINT_ADDR:
+ printed += fprintf(fp, " %04x:", val);
+ break;
+ case BINARY_PRINT_NUM_DATA:
+ printed += fprintf(fp, " %02x", val);
+ break;
+ case BINARY_PRINT_NUM_PAD:
+ printed += fprintf(fp, " ");
+ break;
+ case BINARY_PRINT_SEP:
+ printed += fprintf(fp, " ");
+ break;
+ case BINARY_PRINT_CHAR_DATA:
+ if (printer_data->hit_nul && ch)
+ printer_data->is_printable = false;
+
+ if (!isprint(ch)) {
+ printed += fprintf(fp, "%c", '.');
+
+ if (!printer_data->is_printable)
+ break;
+
+ if (ch == '\0')
+ printer_data->hit_nul = true;
+ else
+ printer_data->is_printable = false;
+ } else {
+ printed += fprintf(fp, "%c", ch);
+ }
+ break;
+ case BINARY_PRINT_CHAR_PAD:
+ printed += fprintf(fp, " ");
+ break;
+ case BINARY_PRINT_LINE_END:
+ printed += fprintf(fp, "\n");
+ printer_data->line_no++;
+ break;
+ case BINARY_PRINT_DATA_END:
+ default:
+ break;
+ }
+
+ return printed;
+}
+
+static int perf_sample__fprintf_bpf_output(struct perf_sample *sample, FILE *fp)
+{
+ unsigned int nr_bytes = sample->raw_size;
+ struct printer_data printer_data = {0, false, true};
+ int printed = binary__fprintf(sample->raw_data, nr_bytes, 8,
+ sample__fprintf_bpf_output, &printer_data, fp);
+
+ if (printer_data.is_printable && printer_data.hit_nul)
+ printed += fprintf(fp, "%17s \"%s\"\n", "BPF string:", (char *)(sample->raw_data));
+
+ return printed;
+}
+
+static int perf_sample__fprintf_spacing(int len, int spacing, FILE *fp)
+{
+ if (len > 0 && len < spacing)
+ return fprintf(fp, "%*s", spacing - len, "");
+
+ return 0;
+}
+
+static int perf_sample__fprintf_pt_spacing(int len, FILE *fp)
+{
+ return perf_sample__fprintf_spacing(len, 34, fp);
+}
+
+static int perf_sample__fprintf_synth_ptwrite(struct perf_sample *sample, FILE *fp)
+{
+ struct perf_synth_intel_ptwrite *data = perf_sample__synth_ptr(sample);
+ int len;
+
+ if (perf_sample__bad_synth_size(sample, *data))
+ return 0;
+
+ len = fprintf(fp, " IP: %u payload: %#" PRIx64 " ",
+ data->ip, le64_to_cpu(data->payload));
+ return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
+static int perf_sample__fprintf_synth_mwait(struct perf_sample *sample, FILE *fp)
+{
+ struct perf_synth_intel_mwait *data = perf_sample__synth_ptr(sample);
+ int len;
+
+ if (perf_sample__bad_synth_size(sample, *data))
+ return 0;
+
+ len = fprintf(fp, " hints: %#x extensions: %#x ",
+ data->hints, data->extensions);
+ return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
+static int perf_sample__fprintf_synth_pwre(struct perf_sample *sample, FILE *fp)
+{
+ struct perf_synth_intel_pwre *data = perf_sample__synth_ptr(sample);
+ int len;
+
+ if (perf_sample__bad_synth_size(sample, *data))
+ return 0;
+
+ len = fprintf(fp, " hw: %u cstate: %u sub-cstate: %u ",
+ data->hw, data->cstate, data->subcstate);
+ return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
+static int perf_sample__fprintf_synth_exstop(struct perf_sample *sample, FILE *fp)
+{
+ struct perf_synth_intel_exstop *data = perf_sample__synth_ptr(sample);
+ int len;
+
+ if (perf_sample__bad_synth_size(sample, *data))
+ return 0;
+
+ len = fprintf(fp, " IP: %u ", data->ip);
+ return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
+static int perf_sample__fprintf_synth_pwrx(struct perf_sample *sample, FILE *fp)
+{
+ struct perf_synth_intel_pwrx *data = perf_sample__synth_ptr(sample);
+ int len;
+
+ if (perf_sample__bad_synth_size(sample, *data))
+ return 0;
+
+ len = fprintf(fp, " deepest cstate: %u last cstate: %u wake reason: %#x ",
+ data->deepest_cstate, data->last_cstate,
+ data->wake_reason);
+ return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
+static int perf_sample__fprintf_synth_cbr(struct perf_sample *sample, FILE *fp)
+{
+ struct perf_synth_intel_cbr *data = perf_sample__synth_ptr(sample);
+ unsigned int percent, freq;
+ int len;
+
+ if (perf_sample__bad_synth_size(sample, *data))
+ return 0;
+
+ freq = (le32_to_cpu(data->freq) + 500) / 1000;
+ len = fprintf(fp, " cbr: %2u freq: %4u MHz ", data->cbr, freq);
+ if (data->max_nonturbo) {
+ percent = (5 + (1000 * data->cbr) / data->max_nonturbo) / 10;
+ len += fprintf(fp, "(%3u%%) ", percent);
+ }
+ return len + perf_sample__fprintf_pt_spacing(len, fp);
+}
+
+static int perf_sample__fprintf_synth(struct perf_sample *sample,
+ struct perf_evsel *evsel, FILE *fp)
+{
+ switch (evsel->attr.config) {
+ case PERF_SYNTH_INTEL_PTWRITE:
+ return perf_sample__fprintf_synth_ptwrite(sample, fp);
+ case PERF_SYNTH_INTEL_MWAIT:
+ return perf_sample__fprintf_synth_mwait(sample, fp);
+ case PERF_SYNTH_INTEL_PWRE:
+ return perf_sample__fprintf_synth_pwre(sample, fp);
+ case PERF_SYNTH_INTEL_EXSTOP:
+ return perf_sample__fprintf_synth_exstop(sample, fp);
+ case PERF_SYNTH_INTEL_PWRX:
+ return perf_sample__fprintf_synth_pwrx(sample, fp);
+ case PERF_SYNTH_INTEL_CBR:
+ return perf_sample__fprintf_synth_cbr(sample, fp);
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+struct perf_script {
+ struct perf_tool tool;
+ struct perf_session *session;
+ bool show_task_events;
+ bool show_mmap_events;
+ bool show_switch_events;
+ bool show_namespace_events;
+ bool show_lost_events;
+ bool show_round_events;
+ bool allocated;
+ bool per_event_dump;
+ struct cpu_map *cpus;
+ struct thread_map *threads;
+ int name_width;
+ const char *time_str;
+ struct perf_time_interval *ptime_range;
+ int range_size;
+ int range_num;
+};
+
+static int perf_evlist__max_name_len(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ int max = 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ int len = strlen(perf_evsel__name(evsel));
+
+ max = MAX(len, max);
+ }
+
+ return max;
+}
+
+static int data_src__fprintf(u64 data_src, FILE *fp)
+{
+ struct mem_info mi = { .data_src.val = data_src };
+ char decode[100];
+ char out[100];
+ static int maxlen;
+ int len;
+
+ perf_script__meminfo_scnprintf(decode, 100, &mi);
+
+ len = scnprintf(out, 100, "%16" PRIx64 " %s", data_src, decode);
+ if (maxlen < len)
+ maxlen = len;
+
+ return fprintf(fp, "%-*s", maxlen, out);
+}
+
+struct metric_ctx {
+ struct perf_sample *sample;
+ struct thread *thread;
+ struct perf_evsel *evsel;
+ FILE *fp;
+};
+
+static void script_print_metric(void *ctx, const char *color,
+ const char *fmt,
+ const char *unit, double val)
+{
+ struct metric_ctx *mctx = ctx;
+
+ if (!fmt)
+ return;
+ perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+ PERF_RECORD_SAMPLE, mctx->fp);
+ fputs("\tmetric: ", mctx->fp);
+ if (color)
+ color_fprintf(mctx->fp, color, fmt, val);
+ else
+ printf(fmt, val);
+ fprintf(mctx->fp, " %s\n", unit);
+}
+
+static void script_new_line(void *ctx)
+{
+ struct metric_ctx *mctx = ctx;
+
+ perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+ PERF_RECORD_SAMPLE, mctx->fp);
+ fputs("\tmetric: ", mctx->fp);
+}
+
+static void perf_sample__fprint_metric(struct perf_script *script,
+ struct thread *thread,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ FILE *fp)
+{
+ struct perf_stat_output_ctx ctx = {
+ .print_metric = script_print_metric,
+ .new_line = script_new_line,
+ .ctx = &(struct metric_ctx) {
+ .sample = sample,
+ .thread = thread,
+ .evsel = evsel,
+ .fp = fp,
+ },
+ .force_header = false,
+ };
+ struct perf_evsel *ev2;
+ u64 val;
+
+ if (!evsel->stats)
+ perf_evlist__alloc_stats(script->session->evlist, false);
+ if (evsel_script(evsel->leader)->gnum++ == 0)
+ perf_stat__reset_shadow_stats();
+ val = sample->period * evsel->scale;
+ perf_stat__update_shadow_stats(evsel,
+ val,
+ sample->cpu,
+ &rt_stat);
+ evsel_script(evsel)->val = val;
+ if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) {
+ for_each_group_member (ev2, evsel->leader) {
+ perf_stat__print_shadow_stats(ev2,
+ evsel_script(ev2)->val,
+ sample->cpu,
+ &ctx,
+ NULL,
+ &rt_stat);
+ }
+ evsel_script(evsel->leader)->gnum = 0;
+ }
+}
+
+static void process_event(struct perf_script *script,
+ struct perf_sample *sample, struct perf_evsel *evsel,
+ struct addr_location *al,
+ struct machine *machine)
+{
+ struct thread *thread = al->thread;
+ struct perf_event_attr *attr = &evsel->attr;
+ unsigned int type = output_type(attr->type);
+ struct perf_evsel_script *es = evsel->priv;
+ FILE *fp = es->fp;
+
+ if (output[type].fields == 0)
+ return;
+
+ ++es->samples;
+
+ perf_sample__fprintf_start(sample, thread, evsel,
+ PERF_RECORD_SAMPLE, fp);
+
+ if (PRINT_FIELD(PERIOD))
+ fprintf(fp, "%10" PRIu64 " ", sample->period);
+
+ if (PRINT_FIELD(EVNAME)) {
+ const char *evname = perf_evsel__name(evsel);
+
+ if (!script->name_width)
+ script->name_width = perf_evlist__max_name_len(script->session->evlist);
+
+ fprintf(fp, "%*s: ", script->name_width, evname ?: "[unknown]");
+ }
+
+ if (print_flags)
+ perf_sample__fprintf_flags(sample->flags, fp);
+
+ if (is_bts_event(attr)) {
+ perf_sample__fprintf_bts(sample, evsel, thread, al, machine, fp);
+ return;
+ }
+
+ if (PRINT_FIELD(TRACE) && sample->raw_data) {
+ event_format__fprintf(evsel->tp_format, sample->cpu,
+ sample->raw_data, sample->raw_size, fp);
+ }
+
+ if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH))
+ perf_sample__fprintf_synth(sample, evsel, fp);
+
+ if (PRINT_FIELD(ADDR))
+ perf_sample__fprintf_addr(sample, thread, attr, fp);
+
+ if (PRINT_FIELD(DATA_SRC))
+ data_src__fprintf(sample->data_src, fp);
+
+ if (PRINT_FIELD(WEIGHT))
+ fprintf(fp, "%16" PRIu64, sample->weight);
+
+ if (PRINT_FIELD(IP)) {
+ struct callchain_cursor *cursor = NULL;
+
+ if (symbol_conf.use_callchain && sample->callchain &&
+ thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+ sample, NULL, NULL, scripting_max_stack) == 0)
+ cursor = &callchain_cursor;
+
+ fputc(cursor ? '\n' : ' ', fp);
+ sample__fprintf_sym(sample, al, 0, output[type].print_ip_opts, cursor, fp);
+ }
+
+ if (PRINT_FIELD(IREGS))
+ perf_sample__fprintf_iregs(sample, attr, fp);
+
+ if (PRINT_FIELD(UREGS))
+ perf_sample__fprintf_uregs(sample, attr, fp);
+
+ if (PRINT_FIELD(BRSTACK))
+ perf_sample__fprintf_brstack(sample, thread, attr, fp);
+ else if (PRINT_FIELD(BRSTACKSYM))
+ perf_sample__fprintf_brstacksym(sample, thread, attr, fp);
+ else if (PRINT_FIELD(BRSTACKOFF))
+ perf_sample__fprintf_brstackoff(sample, thread, attr, fp);
+
+ if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
+ perf_sample__fprintf_bpf_output(sample, fp);
+ perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
+
+ if (PRINT_FIELD(PHYS_ADDR))
+ fprintf(fp, "%16" PRIx64, sample->phys_addr);
+ fprintf(fp, "\n");
+
+ if (PRINT_FIELD(METRIC))
+ perf_sample__fprint_metric(script, thread, evsel, sample, fp);
+}
+
+static struct scripting_ops *scripting_ops;
+
+static void __process_stat(struct perf_evsel *counter, u64 tstamp)
+{
+ int nthreads = thread_map__nr(counter->threads);
+ int ncpus = perf_evsel__nr_cpus(counter);
+ int cpu, thread;
+ static int header_printed;
+
+ if (counter->system_wide)
+ nthreads = 1;
+
+ if (!header_printed) {
+ printf("%3s %8s %15s %15s %15s %15s %s\n",
+ "CPU", "THREAD", "VAL", "ENA", "RUN", "TIME", "EVENT");
+ header_printed = 1;
+ }
+
+ for (thread = 0; thread < nthreads; thread++) {
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ struct perf_counts_values *counts;
+
+ counts = perf_counts(counter->counts, cpu, thread);
+
+ printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n",
+ counter->cpus->map[cpu],
+ thread_map__pid(counter->threads, thread),
+ counts->val,
+ counts->ena,
+ counts->run,
+ tstamp,
+ perf_evsel__name(counter));
+ }
+ }
+}
+
+static void process_stat(struct perf_evsel *counter, u64 tstamp)
+{
+ if (scripting_ops && scripting_ops->process_stat)
+ scripting_ops->process_stat(&stat_config, counter, tstamp);
+ else
+ __process_stat(counter, tstamp);
+}
+
+static void process_stat_interval(u64 tstamp)
+{
+ if (scripting_ops && scripting_ops->process_stat_interval)
+ scripting_ops->process_stat_interval(tstamp);
+}
+
+static void setup_scripting(void)
+{
+ setup_perl_scripting();
+ setup_python_scripting();
+}
+
+static int flush_scripting(void)
+{
+ return scripting_ops ? scripting_ops->flush_script() : 0;
+}
+
+static int cleanup_scripting(void)
+{
+ pr_debug("\nperf script stopped\n");
+
+ return scripting_ops ? scripting_ops->stop_script() : 0;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct perf_script *scr = container_of(tool, struct perf_script, tool);
+ struct addr_location al;
+
+ if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num,
+ sample->time)) {
+ return 0;
+ }
+
+ if (debug_mode) {
+ if (sample->time < last_timestamp) {
+ pr_err("Samples misordered, previous: %" PRIu64
+ " this: %" PRIu64 "\n", last_timestamp,
+ sample->time);
+ nr_unordered++;
+ }
+ last_timestamp = sample->time;
+ return 0;
+ }
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_err("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ if (al.filtered)
+ goto out_put;
+
+ if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
+ goto out_put;
+
+ if (scripting_ops)
+ scripting_ops->process_event(event, sample, evsel, &al);
+ else
+ process_event(scr, sample, evsel, &al, machine);
+
+out_put:
+ addr_location__put(&al);
+ return 0;
+}
+
+static int process_attr(struct perf_tool *tool, union perf_event *event,
+ struct perf_evlist **pevlist)
+{
+ struct perf_script *scr = container_of(tool, struct perf_script, tool);
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel, *pos;
+ int err;
+ static struct perf_evsel_script *es;
+
+ err = perf_event__process_attr(tool, event, pevlist);
+ if (err)
+ return err;
+
+ evlist = *pevlist;
+ evsel = perf_evlist__last(*pevlist);
+
+ if (!evsel->priv) {
+ if (scr->per_event_dump) {
+ evsel->priv = perf_evsel_script__new(evsel,
+ scr->session->data);
+ } else {
+ es = zalloc(sizeof(*es));
+ if (!es)
+ return -ENOMEM;
+ es->fp = stdout;
+ evsel->priv = es;
+ }
+ }
+
+ if (evsel->attr.type >= PERF_TYPE_MAX &&
+ evsel->attr.type != PERF_TYPE_SYNTH)
+ return 0;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (pos->attr.type == evsel->attr.type && pos != evsel)
+ return 0;
+ }
+
+ set_print_ip_opts(&evsel->attr);
+
+ if (evsel->attr.sample_type)
+ err = perf_evsel__check_attr(evsel, scr->session);
+
+ return err;
+}
+
+static int process_comm_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+ struct perf_session *session = script->session;
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+ int ret = -1;
+
+ thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid);
+ if (thread == NULL) {
+ pr_debug("problem processing COMM event, skipping it.\n");
+ return -1;
+ }
+
+ if (perf_event__process_comm(tool, event, sample, machine) < 0)
+ goto out;
+
+ if (!evsel->attr.sample_id_all) {
+ sample->cpu = 0;
+ sample->time = 0;
+ sample->tid = event->comm.tid;
+ sample->pid = event->comm.pid;
+ }
+ perf_sample__fprintf_start(sample, thread, evsel,
+ PERF_RECORD_COMM, stdout);
+ perf_event__fprintf(event, stdout);
+ ret = 0;
+out:
+ thread__put(thread);
+ return ret;
+}
+
+static int process_namespaces_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+ struct perf_session *session = script->session;
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+ int ret = -1;
+
+ thread = machine__findnew_thread(machine, event->namespaces.pid,
+ event->namespaces.tid);
+ if (thread == NULL) {
+ pr_debug("problem processing NAMESPACES event, skipping it.\n");
+ return -1;
+ }
+
+ if (perf_event__process_namespaces(tool, event, sample, machine) < 0)
+ goto out;
+
+ if (!evsel->attr.sample_id_all) {
+ sample->cpu = 0;
+ sample->time = 0;
+ sample->tid = event->namespaces.tid;
+ sample->pid = event->namespaces.pid;
+ }
+ perf_sample__fprintf_start(sample, thread, evsel,
+ PERF_RECORD_NAMESPACES, stdout);
+ perf_event__fprintf(event, stdout);
+ ret = 0;
+out:
+ thread__put(thread);
+ return ret;
+}
+
+static int process_fork_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+ struct perf_session *session = script->session;
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+
+ if (perf_event__process_fork(tool, event, sample, machine) < 0)
+ return -1;
+
+ thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
+ if (thread == NULL) {
+ pr_debug("problem processing FORK event, skipping it.\n");
+ return -1;
+ }
+
+ if (!evsel->attr.sample_id_all) {
+ sample->cpu = 0;
+ sample->time = event->fork.time;
+ sample->tid = event->fork.tid;
+ sample->pid = event->fork.pid;
+ }
+ perf_sample__fprintf_start(sample, thread, evsel,
+ PERF_RECORD_FORK, stdout);
+ perf_event__fprintf(event, stdout);
+ thread__put(thread);
+
+ return 0;
+}
+static int process_exit_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int err = 0;
+ struct thread *thread;
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+ struct perf_session *session = script->session;
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+
+ thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
+ if (thread == NULL) {
+ pr_debug("problem processing EXIT event, skipping it.\n");
+ return -1;
+ }
+
+ if (!evsel->attr.sample_id_all) {
+ sample->cpu = 0;
+ sample->time = 0;
+ sample->tid = event->fork.tid;
+ sample->pid = event->fork.pid;
+ }
+ perf_sample__fprintf_start(sample, thread, evsel,
+ PERF_RECORD_EXIT, stdout);
+ perf_event__fprintf(event, stdout);
+
+ if (perf_event__process_exit(tool, event, sample, machine) < 0)
+ err = -1;
+
+ thread__put(thread);
+ return err;
+}
+
+static int process_mmap_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+ struct perf_session *session = script->session;
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+
+ if (perf_event__process_mmap(tool, event, sample, machine) < 0)
+ return -1;
+
+ thread = machine__findnew_thread(machine, event->mmap.pid, event->mmap.tid);
+ if (thread == NULL) {
+ pr_debug("problem processing MMAP event, skipping it.\n");
+ return -1;
+ }
+
+ if (!evsel->attr.sample_id_all) {
+ sample->cpu = 0;
+ sample->time = 0;
+ sample->tid = event->mmap.tid;
+ sample->pid = event->mmap.pid;
+ }
+ perf_sample__fprintf_start(sample, thread, evsel,
+ PERF_RECORD_MMAP, stdout);
+ perf_event__fprintf(event, stdout);
+ thread__put(thread);
+ return 0;
+}
+
+static int process_mmap2_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+ struct perf_session *session = script->session;
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+
+ if (perf_event__process_mmap2(tool, event, sample, machine) < 0)
+ return -1;
+
+ thread = machine__findnew_thread(machine, event->mmap2.pid, event->mmap2.tid);
+ if (thread == NULL) {
+ pr_debug("problem processing MMAP2 event, skipping it.\n");
+ return -1;
+ }
+
+ if (!evsel->attr.sample_id_all) {
+ sample->cpu = 0;
+ sample->time = 0;
+ sample->tid = event->mmap2.tid;
+ sample->pid = event->mmap2.pid;
+ }
+ perf_sample__fprintf_start(sample, thread, evsel,
+ PERF_RECORD_MMAP2, stdout);
+ perf_event__fprintf(event, stdout);
+ thread__put(thread);
+ return 0;
+}
+
+static int process_switch_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct thread *thread;
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+ struct perf_session *session = script->session;
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+
+ if (perf_event__process_switch(tool, event, sample, machine) < 0)
+ return -1;
+
+ thread = machine__findnew_thread(machine, sample->pid,
+ sample->tid);
+ if (thread == NULL) {
+ pr_debug("problem processing SWITCH event, skipping it.\n");
+ return -1;
+ }
+
+ perf_sample__fprintf_start(sample, thread, evsel,
+ PERF_RECORD_SWITCH, stdout);
+ perf_event__fprintf(event, stdout);
+ thread__put(thread);
+ return 0;
+}
+
+static int
+process_lost_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+ struct perf_session *session = script->session;
+ struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
+ struct thread *thread;
+
+ thread = machine__findnew_thread(machine, sample->pid,
+ sample->tid);
+ if (thread == NULL)
+ return -1;
+
+ perf_sample__fprintf_start(sample, thread, evsel,
+ PERF_RECORD_LOST, stdout);
+ perf_event__fprintf(event, stdout);
+ thread__put(thread);
+ return 0;
+}
+
+static int
+process_finished_round_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct ordered_events *oe __maybe_unused)
+
+{
+ perf_event__fprintf(event, stdout);
+ return 0;
+}
+
+static void sig_handler(int sig __maybe_unused)
+{
+ session_done = 1;
+}
+
+static void perf_script__fclose_per_event_dump(struct perf_script *script)
+{
+ struct perf_evlist *evlist = script->session->evlist;
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (!evsel->priv)
+ break;
+ perf_evsel_script__delete(evsel->priv);
+ evsel->priv = NULL;
+ }
+}
+
+static int perf_script__fopen_per_event_dump(struct perf_script *script)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(script->session->evlist, evsel) {
+ /*
+ * Already setup? I.e. we may be called twice in cases like
+ * Intel PT, one for the intel_pt// and dummy events, then
+ * for the evsels syntheized from the auxtrace info.
+ *
+ * Ses perf_script__process_auxtrace_info.
+ */
+ if (evsel->priv != NULL)
+ continue;
+
+ evsel->priv = perf_evsel_script__new(evsel, script->session->data);
+ if (evsel->priv == NULL)
+ goto out_err_fclose;
+ }
+
+ return 0;
+
+out_err_fclose:
+ perf_script__fclose_per_event_dump(script);
+ return -1;
+}
+
+static int perf_script__setup_per_event_dump(struct perf_script *script)
+{
+ struct perf_evsel *evsel;
+ static struct perf_evsel_script es_stdout;
+
+ if (script->per_event_dump)
+ return perf_script__fopen_per_event_dump(script);
+
+ es_stdout.fp = stdout;
+
+ evlist__for_each_entry(script->session->evlist, evsel)
+ evsel->priv = &es_stdout;
+
+ return 0;
+}
+
+static void perf_script__exit_per_event_dump_stats(struct perf_script *script)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(script->session->evlist, evsel) {
+ struct perf_evsel_script *es = evsel->priv;
+
+ perf_evsel_script__fprintf(es, stdout);
+ perf_evsel_script__delete(es);
+ evsel->priv = NULL;
+ }
+}
+
+static int __cmd_script(struct perf_script *script)
+{
+ int ret;
+
+ signal(SIGINT, sig_handler);
+
+ perf_stat__init_shadow_stats();
+
+ /* override event processing functions */
+ if (script->show_task_events) {
+ script->tool.comm = process_comm_event;
+ script->tool.fork = process_fork_event;
+ script->tool.exit = process_exit_event;
+ }
+ if (script->show_mmap_events) {
+ script->tool.mmap = process_mmap_event;
+ script->tool.mmap2 = process_mmap2_event;
+ }
+ if (script->show_switch_events)
+ script->tool.context_switch = process_switch_event;
+ if (script->show_namespace_events)
+ script->tool.namespaces = process_namespaces_event;
+ if (script->show_lost_events)
+ script->tool.lost = process_lost_event;
+ if (script->show_round_events) {
+ script->tool.ordered_events = false;
+ script->tool.finished_round = process_finished_round_event;
+ }
+
+ if (perf_script__setup_per_event_dump(script)) {
+ pr_err("Couldn't create the per event dump files\n");
+ return -1;
+ }
+
+ ret = perf_session__process_events(script->session);
+
+ if (script->per_event_dump)
+ perf_script__exit_per_event_dump_stats(script);
+
+ if (debug_mode)
+ pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered);
+
+ return ret;
+}
+
+struct script_spec {
+ struct list_head node;
+ struct scripting_ops *ops;
+ char spec[0];
+};
+
+static LIST_HEAD(script_specs);
+
+static struct script_spec *script_spec__new(const char *spec,
+ struct scripting_ops *ops)
+{
+ struct script_spec *s = malloc(sizeof(*s) + strlen(spec) + 1);
+
+ if (s != NULL) {
+ strcpy(s->spec, spec);
+ s->ops = ops;
+ }
+
+ return s;
+}
+
+static void script_spec__add(struct script_spec *s)
+{
+ list_add_tail(&s->node, &script_specs);
+}
+
+static struct script_spec *script_spec__find(const char *spec)
+{
+ struct script_spec *s;
+
+ list_for_each_entry(s, &script_specs, node)
+ if (strcasecmp(s->spec, spec) == 0)
+ return s;
+ return NULL;
+}
+
+int script_spec_register(const char *spec, struct scripting_ops *ops)
+{
+ struct script_spec *s;
+
+ s = script_spec__find(spec);
+ if (s)
+ return -1;
+
+ s = script_spec__new(spec, ops);
+ if (!s)
+ return -1;
+ else
+ script_spec__add(s);
+
+ return 0;
+}
+
+static struct scripting_ops *script_spec__lookup(const char *spec)
+{
+ struct script_spec *s = script_spec__find(spec);
+ if (!s)
+ return NULL;
+
+ return s->ops;
+}
+
+static void list_available_languages(void)
+{
+ struct script_spec *s;
+
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Scripting language extensions (used in "
+ "perf script -s [spec:]script.[spec]):\n\n");
+
+ list_for_each_entry(s, &script_specs, node)
+ fprintf(stderr, " %-42s [%s]\n", s->spec, s->ops->name);
+
+ fprintf(stderr, "\n");
+}
+
+static int parse_scriptname(const struct option *opt __maybe_unused,
+ const char *str, int unset __maybe_unused)
+{
+ char spec[PATH_MAX];
+ const char *script, *ext;
+ int len;
+
+ if (strcmp(str, "lang") == 0) {
+ list_available_languages();
+ exit(0);
+ }
+
+ script = strchr(str, ':');
+ if (script) {
+ len = script - str;
+ if (len >= PATH_MAX) {
+ fprintf(stderr, "invalid language specifier");
+ return -1;
+ }
+ strncpy(spec, str, len);
+ spec[len] = '\0';
+ scripting_ops = script_spec__lookup(spec);
+ if (!scripting_ops) {
+ fprintf(stderr, "invalid language specifier");
+ return -1;
+ }
+ script++;
+ } else {
+ script = str;
+ ext = strrchr(script, '.');
+ if (!ext) {
+ fprintf(stderr, "invalid script extension");
+ return -1;
+ }
+ scripting_ops = script_spec__lookup(++ext);
+ if (!scripting_ops) {
+ fprintf(stderr, "invalid script extension");
+ return -1;
+ }
+ }
+
+ script_name = strdup(script);
+
+ return 0;
+}
+
+static int parse_output_fields(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ char *tok, *strtok_saveptr = NULL;
+ int i, imax = ARRAY_SIZE(all_output_options);
+ int j;
+ int rc = 0;
+ char *str = strdup(arg);
+ int type = -1;
+ enum { DEFAULT, SET, ADD, REMOVE } change = DEFAULT;
+
+ if (!str)
+ return -ENOMEM;
+
+ /* first word can state for which event type the user is specifying
+ * the fields. If no type exists, the specified fields apply to all
+ * event types found in the file minus the invalid fields for a type.
+ */
+ tok = strchr(str, ':');
+ if (tok) {
+ *tok = '\0';
+ tok++;
+ if (!strcmp(str, "hw"))
+ type = PERF_TYPE_HARDWARE;
+ else if (!strcmp(str, "sw"))
+ type = PERF_TYPE_SOFTWARE;
+ else if (!strcmp(str, "trace"))
+ type = PERF_TYPE_TRACEPOINT;
+ else if (!strcmp(str, "raw"))
+ type = PERF_TYPE_RAW;
+ else if (!strcmp(str, "break"))
+ type = PERF_TYPE_BREAKPOINT;
+ else if (!strcmp(str, "synth"))
+ type = OUTPUT_TYPE_SYNTH;
+ else {
+ fprintf(stderr, "Invalid event type in field string.\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (output[type].user_set)
+ pr_warning("Overriding previous field request for %s events.\n",
+ event_type(type));
+
+ output[type].fields = 0;
+ output[type].user_set = true;
+ output[type].wildcard_set = false;
+
+ } else {
+ tok = str;
+ if (strlen(str) == 0) {
+ fprintf(stderr,
+ "Cannot set fields to 'none' for all event types.\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* Don't override defaults for +- */
+ if (strchr(str, '+') || strchr(str, '-'))
+ goto parse;
+
+ if (output_set_by_user())
+ pr_warning("Overriding previous field request for all events.\n");
+
+ for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
+ output[j].fields = 0;
+ output[j].user_set = true;
+ output[j].wildcard_set = true;
+ }
+ }
+
+parse:
+ for (tok = strtok_r(tok, ",", &strtok_saveptr); tok; tok = strtok_r(NULL, ",", &strtok_saveptr)) {
+ if (*tok == '+') {
+ if (change == SET)
+ goto out_badmix;
+ change = ADD;
+ tok++;
+ } else if (*tok == '-') {
+ if (change == SET)
+ goto out_badmix;
+ change = REMOVE;
+ tok++;
+ } else {
+ if (change != SET && change != DEFAULT)
+ goto out_badmix;
+ change = SET;
+ }
+
+ for (i = 0; i < imax; ++i) {
+ if (strcmp(tok, all_output_options[i].str) == 0)
+ break;
+ }
+ if (i == imax && strcmp(tok, "flags") == 0) {
+ print_flags = change == REMOVE ? false : true;
+ continue;
+ }
+ if (i == imax) {
+ fprintf(stderr, "Invalid field requested.\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (type == -1) {
+ /* add user option to all events types for
+ * which it is valid
+ */
+ for (j = 0; j < OUTPUT_TYPE_MAX; ++j) {
+ if (output[j].invalid_fields & all_output_options[i].field) {
+ pr_warning("\'%s\' not valid for %s events. Ignoring.\n",
+ all_output_options[i].str, event_type(j));
+ } else {
+ if (change == REMOVE)
+ output[j].fields &= ~all_output_options[i].field;
+ else
+ output[j].fields |= all_output_options[i].field;
+ }
+ }
+ } else {
+ if (output[type].invalid_fields & all_output_options[i].field) {
+ fprintf(stderr, "\'%s\' not valid for %s events.\n",
+ all_output_options[i].str, event_type(type));
+
+ rc = -EINVAL;
+ goto out;
+ }
+ output[type].fields |= all_output_options[i].field;
+ }
+ }
+
+ if (type >= 0) {
+ if (output[type].fields == 0) {
+ pr_debug("No fields requested for %s type. "
+ "Events will not be displayed.\n", event_type(type));
+ }
+ }
+ goto out;
+
+out_badmix:
+ fprintf(stderr, "Cannot mix +-field with overridden fields\n");
+ rc = -EINVAL;
+out:
+ free(str);
+ return rc;
+}
+
+#define for_each_lang(scripts_path, scripts_dir, lang_dirent) \
+ while ((lang_dirent = readdir(scripts_dir)) != NULL) \
+ if ((lang_dirent->d_type == DT_DIR || \
+ (lang_dirent->d_type == DT_UNKNOWN && \
+ is_directory(scripts_path, lang_dirent))) && \
+ (strcmp(lang_dirent->d_name, ".")) && \
+ (strcmp(lang_dirent->d_name, "..")))
+
+#define for_each_script(lang_path, lang_dir, script_dirent) \
+ while ((script_dirent = readdir(lang_dir)) != NULL) \
+ if (script_dirent->d_type != DT_DIR && \
+ (script_dirent->d_type != DT_UNKNOWN || \
+ !is_directory(lang_path, script_dirent)))
+
+
+#define RECORD_SUFFIX "-record"
+#define REPORT_SUFFIX "-report"
+
+struct script_desc {
+ struct list_head node;
+ char *name;
+ char *half_liner;
+ char *args;
+};
+
+static LIST_HEAD(script_descs);
+
+static struct script_desc *script_desc__new(const char *name)
+{
+ struct script_desc *s = zalloc(sizeof(*s));
+
+ if (s != NULL && name)
+ s->name = strdup(name);
+
+ return s;
+}
+
+static void script_desc__delete(struct script_desc *s)
+{
+ zfree(&s->name);
+ zfree(&s->half_liner);
+ zfree(&s->args);
+ free(s);
+}
+
+static void script_desc__add(struct script_desc *s)
+{
+ list_add_tail(&s->node, &script_descs);
+}
+
+static struct script_desc *script_desc__find(const char *name)
+{
+ struct script_desc *s;
+
+ list_for_each_entry(s, &script_descs, node)
+ if (strcasecmp(s->name, name) == 0)
+ return s;
+ return NULL;
+}
+
+static struct script_desc *script_desc__findnew(const char *name)
+{
+ struct script_desc *s = script_desc__find(name);
+
+ if (s)
+ return s;
+
+ s = script_desc__new(name);
+ if (!s)
+ return NULL;
+
+ script_desc__add(s);
+
+ return s;
+}
+
+static const char *ends_with(const char *str, const char *suffix)
+{
+ size_t suffix_len = strlen(suffix);
+ const char *p = str;
+
+ if (strlen(str) > suffix_len) {
+ p = str + strlen(str) - suffix_len;
+ if (!strncmp(p, suffix, suffix_len))
+ return p;
+ }
+
+ return NULL;
+}
+
+static int read_script_info(struct script_desc *desc, const char *filename)
+{
+ char line[BUFSIZ], *p;
+ FILE *fp;
+
+ fp = fopen(filename, "r");
+ if (!fp)
+ return -1;
+
+ while (fgets(line, sizeof(line), fp)) {
+ p = ltrim(line);
+ if (strlen(p) == 0)
+ continue;
+ if (*p != '#')
+ continue;
+ p++;
+ if (strlen(p) && *p == '!')
+ continue;
+
+ p = ltrim(p);
+ if (strlen(p) && p[strlen(p) - 1] == '\n')
+ p[strlen(p) - 1] = '\0';
+
+ if (!strncmp(p, "description:", strlen("description:"))) {
+ p += strlen("description:");
+ desc->half_liner = strdup(ltrim(p));
+ continue;
+ }
+
+ if (!strncmp(p, "args:", strlen("args:"))) {
+ p += strlen("args:");
+ desc->args = strdup(ltrim(p));
+ continue;
+ }
+ }
+
+ fclose(fp);
+
+ return 0;
+}
+
+static char *get_script_root(struct dirent *script_dirent, const char *suffix)
+{
+ char *script_root, *str;
+
+ script_root = strdup(script_dirent->d_name);
+ if (!script_root)
+ return NULL;
+
+ str = (char *)ends_with(script_root, suffix);
+ if (!str) {
+ free(script_root);
+ return NULL;
+ }
+
+ *str = '\0';
+ return script_root;
+}
+
+static int list_available_scripts(const struct option *opt __maybe_unused,
+ const char *s __maybe_unused,
+ int unset __maybe_unused)
+{
+ struct dirent *script_dirent, *lang_dirent;
+ char scripts_path[MAXPATHLEN];
+ DIR *scripts_dir, *lang_dir;
+ char script_path[MAXPATHLEN];
+ char lang_path[MAXPATHLEN];
+ struct script_desc *desc;
+ char first_half[BUFSIZ];
+ char *script_root;
+
+ snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
+
+ scripts_dir = opendir(scripts_path);
+ if (!scripts_dir) {
+ fprintf(stdout,
+ "open(%s) failed.\n"
+ "Check \"PERF_EXEC_PATH\" env to set scripts dir.\n",
+ scripts_path);
+ exit(-1);
+ }
+
+ for_each_lang(scripts_path, scripts_dir, lang_dirent) {
+ scnprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
+ lang_dirent->d_name);
+ lang_dir = opendir(lang_path);
+ if (!lang_dir)
+ continue;
+
+ for_each_script(lang_path, lang_dir, script_dirent) {
+ script_root = get_script_root(script_dirent, REPORT_SUFFIX);
+ if (script_root) {
+ desc = script_desc__findnew(script_root);
+ scnprintf(script_path, MAXPATHLEN, "%s/%s",
+ lang_path, script_dirent->d_name);
+ read_script_info(desc, script_path);
+ free(script_root);
+ }
+ }
+ }
+
+ fprintf(stdout, "List of available trace scripts:\n");
+ list_for_each_entry(desc, &script_descs, node) {
+ sprintf(first_half, "%s %s", desc->name,
+ desc->args ? desc->args : "");
+ fprintf(stdout, " %-36s %s\n", first_half,
+ desc->half_liner ? desc->half_liner : "");
+ }
+
+ exit(0);
+}
+
+/*
+ * Some scripts specify the required events in their "xxx-record" file,
+ * this function will check if the events in perf.data match those
+ * mentioned in the "xxx-record".
+ *
+ * Fixme: All existing "xxx-record" are all in good formats "-e event ",
+ * which is covered well now. And new parsing code should be added to
+ * cover the future complexing formats like event groups etc.
+ */
+static int check_ev_match(char *dir_name, char *scriptname,
+ struct perf_session *session)
+{
+ char filename[MAXPATHLEN], evname[128];
+ char line[BUFSIZ], *p;
+ struct perf_evsel *pos;
+ int match, len;
+ FILE *fp;
+
+ scnprintf(filename, MAXPATHLEN, "%s/bin/%s-record", dir_name, scriptname);
+
+ fp = fopen(filename, "r");
+ if (!fp)
+ return -1;
+
+ while (fgets(line, sizeof(line), fp)) {
+ p = ltrim(line);
+ if (*p == '#')
+ continue;
+
+ while (strlen(p)) {
+ p = strstr(p, "-e");
+ if (!p)
+ break;
+
+ p += 2;
+ p = ltrim(p);
+ len = strcspn(p, " \t");
+ if (!len)
+ break;
+
+ snprintf(evname, len + 1, "%s", p);
+
+ match = 0;
+ evlist__for_each_entry(session->evlist, pos) {
+ if (!strcmp(perf_evsel__name(pos), evname)) {
+ match = 1;
+ break;
+ }
+ }
+
+ if (!match) {
+ fclose(fp);
+ return -1;
+ }
+ }
+ }
+
+ fclose(fp);
+ return 0;
+}
+
+/*
+ * Return -1 if none is found, otherwise the actual scripts number.
+ *
+ * Currently the only user of this function is the script browser, which
+ * will list all statically runnable scripts, select one, execute it and
+ * show the output in a perf browser.
+ */
+int find_scripts(char **scripts_array, char **scripts_path_array)
+{
+ struct dirent *script_dirent, *lang_dirent;
+ char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN];
+ DIR *scripts_dir, *lang_dir;
+ struct perf_session *session;
+ struct perf_data data = {
+ .file = {
+ .path = input_name,
+ },
+ .mode = PERF_DATA_MODE_READ,
+ };
+ char *temp;
+ int i = 0;
+
+ session = perf_session__new(&data, false, NULL);
+ if (!session)
+ return -1;
+
+ snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
+
+ scripts_dir = opendir(scripts_path);
+ if (!scripts_dir) {
+ perf_session__delete(session);
+ return -1;
+ }
+
+ for_each_lang(scripts_path, scripts_dir, lang_dirent) {
+ scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path,
+ lang_dirent->d_name);
+#ifndef HAVE_LIBPERL_SUPPORT
+ if (strstr(lang_path, "perl"))
+ continue;
+#endif
+#ifndef HAVE_LIBPYTHON_SUPPORT
+ if (strstr(lang_path, "python"))
+ continue;
+#endif
+
+ lang_dir = opendir(lang_path);
+ if (!lang_dir)
+ continue;
+
+ for_each_script(lang_path, lang_dir, script_dirent) {
+ /* Skip those real time scripts: xxxtop.p[yl] */
+ if (strstr(script_dirent->d_name, "top."))
+ continue;
+ sprintf(scripts_path_array[i], "%s/%s", lang_path,
+ script_dirent->d_name);
+ temp = strchr(script_dirent->d_name, '.');
+ snprintf(scripts_array[i],
+ (temp - script_dirent->d_name) + 1,
+ "%s", script_dirent->d_name);
+
+ if (check_ev_match(lang_path,
+ scripts_array[i], session))
+ continue;
+
+ i++;
+ }
+ closedir(lang_dir);
+ }
+
+ closedir(scripts_dir);
+ perf_session__delete(session);
+ return i;
+}
+
+static char *get_script_path(const char *script_root, const char *suffix)
+{
+ struct dirent *script_dirent, *lang_dirent;
+ char scripts_path[MAXPATHLEN];
+ char script_path[MAXPATHLEN];
+ DIR *scripts_dir, *lang_dir;
+ char lang_path[MAXPATHLEN];
+ char *__script_root;
+
+ snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
+
+ scripts_dir = opendir(scripts_path);
+ if (!scripts_dir)
+ return NULL;
+
+ for_each_lang(scripts_path, scripts_dir, lang_dirent) {
+ scnprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path,
+ lang_dirent->d_name);
+ lang_dir = opendir(lang_path);
+ if (!lang_dir)
+ continue;
+
+ for_each_script(lang_path, lang_dir, script_dirent) {
+ __script_root = get_script_root(script_dirent, suffix);
+ if (__script_root && !strcmp(script_root, __script_root)) {
+ free(__script_root);
+ closedir(lang_dir);
+ closedir(scripts_dir);
+ scnprintf(script_path, MAXPATHLEN, "%s/%s",
+ lang_path, script_dirent->d_name);
+ return strdup(script_path);
+ }
+ free(__script_root);
+ }
+ closedir(lang_dir);
+ }
+ closedir(scripts_dir);
+
+ return NULL;
+}
+
+static bool is_top_script(const char *script_path)
+{
+ return ends_with(script_path, "top") == NULL ? false : true;
+}
+
+static int has_required_arg(char *script_path)
+{
+ struct script_desc *desc;
+ int n_args = 0;
+ char *p;
+
+ desc = script_desc__new(NULL);
+
+ if (read_script_info(desc, script_path))
+ goto out;
+
+ if (!desc->args)
+ goto out;
+
+ for (p = desc->args; *p; p++)
+ if (*p == '<')
+ n_args++;
+out:
+ script_desc__delete(desc);
+
+ return n_args;
+}
+
+static int have_cmd(int argc, const char **argv)
+{
+ char **__argv = malloc(sizeof(const char *) * argc);
+
+ if (!__argv) {
+ pr_err("malloc failed\n");
+ return -1;
+ }
+
+ memcpy(__argv, argv, sizeof(const char *) * argc);
+ argc = parse_options(argc, (const char **)__argv, record_options,
+ NULL, PARSE_OPT_STOP_AT_NON_OPTION);
+ free(__argv);
+
+ system_wide = (argc == 0);
+
+ return 0;
+}
+
+static void script__setup_sample_type(struct perf_script *script)
+{
+ struct perf_session *session = script->session;
+ u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+
+ if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
+ if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+ (sample_type & PERF_SAMPLE_STACK_USER)) {
+ callchain_param.record_mode = CALLCHAIN_DWARF;
+ dwarf_callchain_users = true;
+ } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ callchain_param.record_mode = CALLCHAIN_LBR;
+ else
+ callchain_param.record_mode = CALLCHAIN_FP;
+ }
+}
+
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ struct stat_round_event *round = &event->stat_round;
+ struct perf_evsel *counter;
+
+ evlist__for_each_entry(session->evlist, counter) {
+ perf_stat_process_counter(&stat_config, counter);
+ process_stat(counter, round->time);
+ }
+
+ process_stat_interval(round->time);
+ return 0;
+}
+
+static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ perf_event__read_stat_config(&stat_config, &event->stat_config);
+ return 0;
+}
+
+static int set_maps(struct perf_script *script)
+{
+ struct perf_evlist *evlist = script->session->evlist;
+
+ if (!script->cpus || !script->threads)
+ return 0;
+
+ if (WARN_ONCE(script->allocated, "stats double allocation\n"))
+ return -EINVAL;
+
+ perf_evlist__set_maps(evlist, script->cpus, script->threads);
+
+ if (perf_evlist__alloc_stats(evlist, true))
+ return -ENOMEM;
+
+ script->allocated = true;
+ return 0;
+}
+
+static
+int process_thread_map_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+ if (script->threads) {
+ pr_warning("Extra thread map event, ignoring.\n");
+ return 0;
+ }
+
+ script->threads = thread_map__new_event(&event->thread_map);
+ if (!script->threads)
+ return -ENOMEM;
+
+ return set_maps(script);
+}
+
+static
+int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+ if (script->cpus) {
+ pr_warning("Extra cpu map event, ignoring.\n");
+ return 0;
+ }
+
+ script->cpus = cpu_map__new_data(&event->cpu_map.data);
+ if (!script->cpus)
+ return -ENOMEM;
+
+ return set_maps(script);
+}
+
+static int process_feature_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ if (event->feat.feat_id < HEADER_LAST_FEATURE)
+ return perf_event__process_feature(tool, event, session);
+ return 0;
+}
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+static int perf_script__process_auxtrace_info(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ int ret = perf_event__process_auxtrace_info(tool, event, session);
+
+ if (ret == 0) {
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+ ret = perf_script__setup_per_event_dump(script);
+ }
+
+ return ret;
+}
+#else
+#define perf_script__process_auxtrace_info 0
+#endif
+
+int cmd_script(int argc, const char **argv)
+{
+ bool show_full_info = false;
+ bool header = false;
+ bool header_only = false;
+ bool script_started = false;
+ char *rec_script_path = NULL;
+ char *rep_script_path = NULL;
+ struct perf_session *session;
+ struct itrace_synth_opts itrace_synth_opts = { .set = false, };
+ char *script_path = NULL;
+ const char **__argv;
+ int i, j, err = 0;
+ struct perf_script script = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .namespaces = perf_event__process_namespaces,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
+ .attr = process_attr,
+ .event_update = perf_event__process_event_update,
+ .tracing_data = perf_event__process_tracing_data,
+ .feature = process_feature_event,
+ .build_id = perf_event__process_build_id,
+ .id_index = perf_event__process_id_index,
+ .auxtrace_info = perf_script__process_auxtrace_info,
+ .auxtrace = perf_event__process_auxtrace,
+ .auxtrace_error = perf_event__process_auxtrace_error,
+ .stat = perf_event__process_stat_event,
+ .stat_round = process_stat_round_event,
+ .stat_config = process_stat_config_event,
+ .thread_map = process_thread_map_event,
+ .cpu_map = process_cpu_map_event,
+ .ordered_events = true,
+ .ordering_requires_timestamps = true,
+ },
+ };
+ struct perf_data data = {
+ .mode = PERF_DATA_MODE_READ,
+ };
+ const struct option options[] = {
+ OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
+ "dump raw trace in ASCII"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show symbol address, etc)"),
+ OPT_BOOLEAN('L', "Latency", &latency_format,
+ "show latency attributes (irqs/preemption disabled, etc)"),
+ OPT_CALLBACK_NOOPT('l', "list", NULL, NULL, "list available scripts",
+ list_available_scripts),
+ OPT_CALLBACK('s', "script", NULL, "name",
+ "script file name (lang:script name, script name, or *)",
+ parse_scriptname),
+ OPT_STRING('g', "gen-script", &generate_script_lang, "lang",
+ "generate perf-script.xx script in specified language"),
+ OPT_STRING('i', "input", &input_name, "file", "input file name"),
+ OPT_BOOLEAN('d', "debug-mode", &debug_mode,
+ "do various checks like samples ordering and lost events"),
+ OPT_BOOLEAN(0, "header", &header, "Show data header."),
+ OPT_BOOLEAN(0, "header-only", &header_only, "Show only data header."),
+ OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
+ OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
+ "file", "kallsyms pathname"),
+ OPT_BOOLEAN('G', "hide-call-graph", &no_callchain,
+ "When printing symbols do not display call chain"),
+ OPT_CALLBACK(0, "symfs", NULL, "directory",
+ "Look for files with symbols relative to this directory",
+ symbol__config_symfs),
+ OPT_CALLBACK('F', "fields", NULL, "str",
+ "comma separated output fields prepend with 'type:'. "
+ "+field to add and -field to remove."
+ "Valid types: hw,sw,trace,raw,synth. "
+ "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
+ "addr,symoff,srcline,period,iregs,uregs,brstack,"
+ "brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
+ "callindent,insn,insnlen,synth,phys_addr,metric,misc",
+ parse_output_fields),
+ OPT_BOOLEAN('a', "all-cpus", &system_wide,
+ "system-wide collection from all CPUs"),
+ OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
+ "only consider these symbols"),
+ OPT_STRING(0, "stop-bt", &symbol_conf.bt_stop_list_str, "symbol[,symbol...]",
+ "Stop display of callgraph at these symbols"),
+ OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"),
+ OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
+ "only display events for these comms"),
+ OPT_STRING(0, "pid", &symbol_conf.pid_list_str, "pid[,pid...]",
+ "only consider symbols in these pids"),
+ OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]",
+ "only consider symbols in these tids"),
+ OPT_UINTEGER(0, "max-stack", &scripting_max_stack,
+ "Set the maximum stack depth when parsing the callchain, "
+ "anything beyond the specified depth will be ignored. "
+ "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
+ OPT_BOOLEAN('I', "show-info", &show_full_info,
+ "display extended information from perf.data file"),
+ OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
+ "Show the path of [kernel.kallsyms]"),
+ OPT_BOOLEAN('\0', "show-task-events", &script.show_task_events,
+ "Show the fork/comm/exit events"),
+ OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events,
+ "Show the mmap events"),
+ OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events,
+ "Show context switch events (if recorded)"),
+ OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
+ "Show namespace events (if recorded)"),
+ OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events,
+ "Show lost events (if recorded)"),
+ OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events,
+ "Show round events (if recorded)"),
+ OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
+ "Dump trace output to files named by the monitored events"),
+ OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
+ OPT_INTEGER(0, "max-blocks", &max_blocks,
+ "Maximum number of code blocks to dump with brstackinsn"),
+ OPT_BOOLEAN(0, "ns", &nanosecs,
+ "Use 9 decimal places when displaying time"),
+ OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
+ "Instruction Tracing options",
+ itrace_parse_synth_opts),
+ OPT_BOOLEAN(0, "full-source-path", &srcline_full_filename,
+ "Show full source file name path for source lines"),
+ OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle,
+ "Enable symbol demangling"),
+ OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
+ "Enable kernel symbol demangling"),
+ OPT_STRING(0, "time", &script.time_str, "str",
+ "Time span of interest (start,stop)"),
+ OPT_BOOLEAN(0, "inline", &symbol_conf.inline_name,
+ "Show inline function"),
+ OPT_END()
+ };
+ const char * const script_subcommands[] = { "record", "report", NULL };
+ const char *script_usage[] = {
+ "perf script [<options>]",
+ "perf script [<options>] record <script> [<record-options>] <command>",
+ "perf script [<options>] report <script> [script-args]",
+ "perf script [<options>] <script> [<record-options>] <command>",
+ "perf script [<options>] <top-script> [script-args]",
+ NULL
+ };
+
+ perf_set_singlethreaded();
+
+ setup_scripting();
+
+ argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ data.file.path = input_name;
+ data.force = symbol_conf.force;
+
+ if (argc > 1 && !strncmp(argv[0], "rec", strlen("rec"))) {
+ rec_script_path = get_script_path(argv[1], RECORD_SUFFIX);
+ if (!rec_script_path)
+ return cmd_record(argc, argv);
+ }
+
+ if (argc > 1 && !strncmp(argv[0], "rep", strlen("rep"))) {
+ rep_script_path = get_script_path(argv[1], REPORT_SUFFIX);
+ if (!rep_script_path) {
+ fprintf(stderr,
+ "Please specify a valid report script"
+ "(see 'perf script -l' for listing)\n");
+ return -1;
+ }
+ }
+
+ if (itrace_synth_opts.callchain &&
+ itrace_synth_opts.callchain_sz > scripting_max_stack)
+ scripting_max_stack = itrace_synth_opts.callchain_sz;
+
+ /* make sure PERF_EXEC_PATH is set for scripts */
+ set_argv_exec_path(get_argv_exec_path());
+
+ if (argc && !script_name && !rec_script_path && !rep_script_path) {
+ int live_pipe[2];
+ int rep_args;
+ pid_t pid;
+
+ rec_script_path = get_script_path(argv[0], RECORD_SUFFIX);
+ rep_script_path = get_script_path(argv[0], REPORT_SUFFIX);
+
+ if (!rec_script_path && !rep_script_path) {
+ usage_with_options_msg(script_usage, options,
+ "Couldn't find script `%s'\n\n See perf"
+ " script -l for available scripts.\n", argv[0]);
+ }
+
+ if (is_top_script(argv[0])) {
+ rep_args = argc - 1;
+ } else {
+ int rec_args;
+
+ rep_args = has_required_arg(rep_script_path);
+ rec_args = (argc - 1) - rep_args;
+ if (rec_args < 0) {
+ usage_with_options_msg(script_usage, options,
+ "`%s' script requires options."
+ "\n\n See perf script -l for available "
+ "scripts and options.\n", argv[0]);
+ }
+ }
+
+ if (pipe(live_pipe) < 0) {
+ perror("failed to create pipe");
+ return -1;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ perror("failed to fork");
+ return -1;
+ }
+
+ if (!pid) {
+ j = 0;
+
+ dup2(live_pipe[1], 1);
+ close(live_pipe[0]);
+
+ if (is_top_script(argv[0])) {
+ system_wide = true;
+ } else if (!system_wide) {
+ if (have_cmd(argc - rep_args, &argv[rep_args]) != 0) {
+ err = -1;
+ goto out;
+ }
+ }
+
+ __argv = malloc((argc + 6) * sizeof(const char *));
+ if (!__argv) {
+ pr_err("malloc failed\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ __argv[j++] = "/bin/sh";
+ __argv[j++] = rec_script_path;
+ if (system_wide)
+ __argv[j++] = "-a";
+ __argv[j++] = "-q";
+ __argv[j++] = "-o";
+ __argv[j++] = "-";
+ for (i = rep_args + 1; i < argc; i++)
+ __argv[j++] = argv[i];
+ __argv[j++] = NULL;
+
+ execvp("/bin/sh", (char **)__argv);
+ free(__argv);
+ exit(-1);
+ }
+
+ dup2(live_pipe[0], 0);
+ close(live_pipe[1]);
+
+ __argv = malloc((argc + 4) * sizeof(const char *));
+ if (!__argv) {
+ pr_err("malloc failed\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ j = 0;
+ __argv[j++] = "/bin/sh";
+ __argv[j++] = rep_script_path;
+ for (i = 1; i < rep_args + 1; i++)
+ __argv[j++] = argv[i];
+ __argv[j++] = "-i";
+ __argv[j++] = "-";
+ __argv[j++] = NULL;
+
+ execvp("/bin/sh", (char **)__argv);
+ free(__argv);
+ exit(-1);
+ }
+
+ if (rec_script_path)
+ script_path = rec_script_path;
+ if (rep_script_path)
+ script_path = rep_script_path;
+
+ if (script_path) {
+ j = 0;
+
+ if (!rec_script_path)
+ system_wide = false;
+ else if (!system_wide) {
+ if (have_cmd(argc - 1, &argv[1]) != 0) {
+ err = -1;
+ goto out;
+ }
+ }
+
+ __argv = malloc((argc + 2) * sizeof(const char *));
+ if (!__argv) {
+ pr_err("malloc failed\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ __argv[j++] = "/bin/sh";
+ __argv[j++] = script_path;
+ if (system_wide)
+ __argv[j++] = "-a";
+ for (i = 2; i < argc; i++)
+ __argv[j++] = argv[i];
+ __argv[j++] = NULL;
+
+ execvp("/bin/sh", (char **)__argv);
+ free(__argv);
+ exit(-1);
+ }
+
+ if (!script_name)
+ setup_pager();
+
+ session = perf_session__new(&data, false, &script.tool);
+ if (session == NULL)
+ return -1;
+
+ if (header || header_only) {
+ script.tool.show_feat_hdr = SHOW_FEAT_HEADER;
+ perf_session__fprintf_info(session, stdout, show_full_info);
+ if (header_only)
+ goto out_delete;
+ }
+ if (show_full_info)
+ script.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO;
+
+ if (symbol__init(&session->header.env) < 0)
+ goto out_delete;
+
+ script.session = session;
+ script__setup_sample_type(&script);
+
+ if (output[PERF_TYPE_HARDWARE].fields & PERF_OUTPUT_CALLINDENT)
+ itrace_synth_opts.thread_stack = true;
+
+ session->itrace_synth_opts = &itrace_synth_opts;
+
+ if (cpu_list) {
+ err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
+ if (err < 0)
+ goto out_delete;
+ itrace_synth_opts.cpu_bitmap = cpu_bitmap;
+ }
+
+ if (!no_callchain)
+ symbol_conf.use_callchain = true;
+ else
+ symbol_conf.use_callchain = false;
+
+ if (session->tevent.pevent &&
+ tep_set_function_resolver(session->tevent.pevent,
+ machine__resolve_kernel_addr,
+ &session->machines.host) < 0) {
+ pr_err("%s: failed to set libtraceevent function resolver\n", __func__);
+ err = -1;
+ goto out_delete;
+ }
+
+ if (generate_script_lang) {
+ struct stat perf_stat;
+ int input;
+
+ if (output_set_by_user()) {
+ fprintf(stderr,
+ "custom fields not supported for generated scripts");
+ err = -EINVAL;
+ goto out_delete;
+ }
+
+ input = open(data.file.path, O_RDONLY); /* input_name */
+ if (input < 0) {
+ err = -errno;
+ perror("failed to open file");
+ goto out_delete;
+ }
+
+ err = fstat(input, &perf_stat);
+ if (err < 0) {
+ perror("failed to stat file");
+ goto out_delete;
+ }
+
+ if (!perf_stat.st_size) {
+ fprintf(stderr, "zero-sized file, nothing to do!\n");
+ goto out_delete;
+ }
+
+ scripting_ops = script_spec__lookup(generate_script_lang);
+ if (!scripting_ops) {
+ fprintf(stderr, "invalid language specifier");
+ err = -ENOENT;
+ goto out_delete;
+ }
+
+ err = scripting_ops->generate_script(session->tevent.pevent,
+ "perf-script");
+ goto out_delete;
+ }
+
+ if (script_name) {
+ err = scripting_ops->start_script(script_name, argc, argv);
+ if (err)
+ goto out_delete;
+ pr_debug("perf script started with script %s\n\n", script_name);
+ script_started = true;
+ }
+
+
+ err = perf_session__check_output_opt(session);
+ if (err < 0)
+ goto out_delete;
+
+ script.ptime_range = perf_time__range_alloc(script.time_str,
+ &script.range_size);
+ if (!script.ptime_range) {
+ err = -ENOMEM;
+ goto out_delete;
+ }
+
+ /* needs to be parsed after looking up reference time */
+ if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) {
+ if (session->evlist->first_sample_time == 0 &&
+ session->evlist->last_sample_time == 0) {
+ pr_err("HINT: no first/last sample time found in perf data.\n"
+ "Please use latest perf binary to execute 'perf record'\n"
+ "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n");
+ err = -EINVAL;
+ goto out_delete;
+ }
+
+ script.range_num = perf_time__percent_parse_str(
+ script.ptime_range, script.range_size,
+ script.time_str,
+ session->evlist->first_sample_time,
+ session->evlist->last_sample_time);
+
+ if (script.range_num < 0) {
+ pr_err("Invalid time string\n");
+ err = -EINVAL;
+ goto out_delete;
+ }
+ } else {
+ script.range_num = 1;
+ }
+
+ err = __cmd_script(&script);
+
+ flush_scripting();
+
+out_delete:
+ zfree(&script.ptime_range);
+
+ perf_evlist__free_stats(session->evlist);
+ perf_session__delete(session);
+
+ if (script_started)
+ cleanup_scripting();
+out:
+ return err;
+}
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
new file mode 100644
index 000000000..adabe9d4d
--- /dev/null
+++ b/tools/perf/builtin-stat.c
@@ -0,0 +1,3154 @@
+/*
+ * builtin-stat.c
+ *
+ * Builtin stat command: Give a precise performance counters summary
+ * overview about any workload, CPU or specific PID.
+ *
+ * Sample output:
+
+ $ perf stat ./hackbench 10
+
+ Time: 0.118
+
+ Performance counter stats for './hackbench 10':
+
+ 1708.761321 task-clock # 11.037 CPUs utilized
+ 41,190 context-switches # 0.024 M/sec
+ 6,735 CPU-migrations # 0.004 M/sec
+ 17,318 page-faults # 0.010 M/sec
+ 5,205,202,243 cycles # 3.046 GHz
+ 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle
+ 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle
+ 2,603,501,247 instructions # 0.50 insns per cycle
+ # 1.48 stalled cycles per insn
+ 484,357,498 branches # 283.455 M/sec
+ 6,388,934 branch-misses # 1.32% of all branches
+
+ 0.154822978 seconds time elapsed
+
+ *
+ * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ *
+ * Improvements and fixes by:
+ *
+ * Arjan van de Ven <arjan@linux.intel.com>
+ * Yanmin Zhang <yanmin.zhang@intel.com>
+ * Wu Fengguang <fengguang.wu@intel.com>
+ * Mike Galbraith <efault@gmx.de>
+ * Paul Mackerras <paulus@samba.org>
+ * Jaswinder Singh Rajput <jaswinder@kernel.org>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "perf.h"
+#include "builtin.h"
+#include "util/cgroup.h"
+#include "util/util.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-events.h"
+#include "util/pmu.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/debug.h"
+#include "util/drv_configs.h"
+#include "util/color.h"
+#include "util/stat.h"
+#include "util/header.h"
+#include "util/cpumap.h"
+#include "util/thread.h"
+#include "util/thread_map.h"
+#include "util/counts.h"
+#include "util/group.h"
+#include "util/session.h"
+#include "util/tool.h"
+#include "util/string2.h"
+#include "util/metricgroup.h"
+#include "util/top.h"
+#include "asm/bug.h"
+
+#include <linux/time64.h>
+#include <api/fs/fs.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <inttypes.h>
+#include <locale.h>
+#include <math.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/wait.h>
+
+#include "sane_ctype.h"
+
+#define DEFAULT_SEPARATOR " "
+#define CNTR_NOT_SUPPORTED "<not supported>"
+#define CNTR_NOT_COUNTED "<not counted>"
+#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
+
+static void print_counters(struct timespec *ts, int argc, const char **argv);
+
+/* Default events used for perf stat -T */
+static const char *transaction_attrs = {
+ "task-clock,"
+ "{"
+ "instructions,"
+ "cycles,"
+ "cpu/cycles-t/,"
+ "cpu/tx-start/,"
+ "cpu/el-start/,"
+ "cpu/cycles-ct/"
+ "}"
+};
+
+/* More limited version when the CPU does not have all events. */
+static const char * transaction_limited_attrs = {
+ "task-clock,"
+ "{"
+ "instructions,"
+ "cycles,"
+ "cpu/cycles-t/,"
+ "cpu/tx-start/"
+ "}"
+};
+
+static const char * topdown_attrs[] = {
+ "topdown-total-slots",
+ "topdown-slots-retired",
+ "topdown-recovery-bubbles",
+ "topdown-fetch-bubbles",
+ "topdown-slots-issued",
+ NULL,
+};
+
+static const char *smi_cost_attrs = {
+ "{"
+ "msr/aperf/,"
+ "msr/smi/,"
+ "cycles"
+ "}"
+};
+
+static struct perf_evlist *evsel_list;
+
+static struct rblist metric_events;
+
+static struct target target = {
+ .uid = UINT_MAX,
+};
+
+typedef int (*aggr_get_id_t)(struct cpu_map *m, int cpu);
+
+#define METRIC_ONLY_LEN 20
+
+static int run_count = 1;
+static bool no_inherit = false;
+static volatile pid_t child_pid = -1;
+static bool null_run = false;
+static int detailed_run = 0;
+static bool transaction_run;
+static bool topdown_run = false;
+static bool smi_cost = false;
+static bool smi_reset = false;
+static bool big_num = true;
+static int big_num_opt = -1;
+static const char *csv_sep = NULL;
+static bool csv_output = false;
+static bool group = false;
+static const char *pre_cmd = NULL;
+static const char *post_cmd = NULL;
+static bool sync_run = false;
+static unsigned int initial_delay = 0;
+static unsigned int unit_width = 4; /* strlen("unit") */
+static bool forever = false;
+static bool metric_only = false;
+static bool force_metric_only = false;
+static bool no_merge = false;
+static bool walltime_run_table = false;
+static struct timespec ref_time;
+static struct cpu_map *aggr_map;
+static aggr_get_id_t aggr_get_id;
+static bool append_file;
+static bool interval_count;
+static bool interval_clear;
+static const char *output_name;
+static int output_fd;
+static int print_free_counters_hint;
+static int print_mixed_hw_group_error;
+static u64 *walltime_run;
+static bool ru_display = false;
+static struct rusage ru_data;
+static unsigned int metric_only_len = METRIC_ONLY_LEN;
+
+struct perf_stat {
+ bool record;
+ struct perf_data data;
+ struct perf_session *session;
+ u64 bytes_written;
+ struct perf_tool tool;
+ bool maps_allocated;
+ struct cpu_map *cpus;
+ struct thread_map *threads;
+ enum aggr_mode aggr_mode;
+};
+
+static struct perf_stat perf_stat;
+#define STAT_RECORD perf_stat.record
+
+static volatile int done = 0;
+
+static struct perf_stat_config stat_config = {
+ .aggr_mode = AGGR_GLOBAL,
+ .scale = true,
+};
+
+static bool is_duration_time(struct perf_evsel *evsel)
+{
+ return !strcmp(evsel->name, "duration_time");
+}
+
+static inline void diff_timespec(struct timespec *r, struct timespec *a,
+ struct timespec *b)
+{
+ r->tv_sec = a->tv_sec - b->tv_sec;
+ if (a->tv_nsec < b->tv_nsec) {
+ r->tv_nsec = a->tv_nsec + NSEC_PER_SEC - b->tv_nsec;
+ r->tv_sec--;
+ } else {
+ r->tv_nsec = a->tv_nsec - b->tv_nsec ;
+ }
+}
+
+static void perf_stat__reset_stats(void)
+{
+ int i;
+
+ perf_evlist__reset_stats(evsel_list);
+ perf_stat__reset_shadow_stats();
+
+ for (i = 0; i < stat_config.stats_num; i++)
+ perf_stat__reset_shadow_per_stat(&stat_config.stats[i]);
+}
+
+static int create_perf_stat_counter(struct perf_evsel *evsel)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ struct perf_evsel *leader = evsel->leader;
+
+ if (stat_config.scale) {
+ attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+ }
+
+ /*
+ * The event is part of non trivial group, let's enable
+ * the group read (for leader) and ID retrieval for all
+ * members.
+ */
+ if (leader->nr_members > 1)
+ attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
+
+ attr->inherit = !no_inherit;
+
+ /*
+ * Some events get initialized with sample_(period/type) set,
+ * like tracepoints. Clear it up for counting.
+ */
+ attr->sample_period = 0;
+
+ /*
+ * But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
+ * while avoiding that older tools show confusing messages.
+ *
+ * However for pipe sessions we need to keep it zero,
+ * because script's perf_evsel__check_attr is triggered
+ * by attr->sample_type != 0, and we can't run it on
+ * stat sessions.
+ */
+ if (!(STAT_RECORD && perf_stat.data.is_pipe))
+ attr->sample_type = PERF_SAMPLE_IDENTIFIER;
+
+ /*
+ * Disabling all counters initially, they will be enabled
+ * either manually by us or by kernel via enable_on_exec
+ * set later.
+ */
+ if (perf_evsel__is_group_leader(evsel)) {
+ attr->disabled = 1;
+
+ /*
+ * In case of initial_delay we enable tracee
+ * events manually.
+ */
+ if (target__none(&target) && !initial_delay)
+ attr->enable_on_exec = 1;
+ }
+
+ if (target__has_cpu(&target) && !target__has_per_thread(&target))
+ return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel));
+
+ return perf_evsel__open_per_thread(evsel, evsel_list->threads);
+}
+
+static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ if (perf_data__write(&perf_stat.data, event, event->header.size) < 0) {
+ pr_err("failed to write perf data, error: %m\n");
+ return -1;
+ }
+
+ perf_stat.bytes_written += event->header.size;
+ return 0;
+}
+
+static int write_stat_round_event(u64 tm, u64 type)
+{
+ return perf_event__synthesize_stat_round(NULL, tm, type,
+ process_synthesized_event,
+ NULL);
+}
+
+#define WRITE_STAT_ROUND_EVENT(time, interval) \
+ write_stat_round_event(time, PERF_STAT_ROUND_TYPE__ ## interval)
+
+#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+
+static int
+perf_evsel__write_stat_event(struct perf_evsel *counter, u32 cpu, u32 thread,
+ struct perf_counts_values *count)
+{
+ struct perf_sample_id *sid = SID(counter, cpu, thread);
+
+ return perf_event__synthesize_stat(NULL, cpu, thread, sid->id, count,
+ process_synthesized_event, NULL);
+}
+
+/*
+ * Read out the results of a single counter:
+ * do not aggregate counts across CPUs in system-wide mode
+ */
+static int read_counter(struct perf_evsel *counter)
+{
+ int nthreads = thread_map__nr(evsel_list->threads);
+ int ncpus, cpu, thread;
+
+ if (target__has_cpu(&target) && !target__has_per_thread(&target))
+ ncpus = perf_evsel__nr_cpus(counter);
+ else
+ ncpus = 1;
+
+ if (!counter->supported)
+ return -ENOENT;
+
+ if (counter->system_wide)
+ nthreads = 1;
+
+ for (thread = 0; thread < nthreads; thread++) {
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ struct perf_counts_values *count;
+
+ count = perf_counts(counter->counts, cpu, thread);
+
+ /*
+ * The leader's group read loads data into its group members
+ * (via perf_evsel__read_counter) and sets threir count->loaded.
+ */
+ if (!count->loaded &&
+ perf_evsel__read_counter(counter, cpu, thread)) {
+ counter->counts->scaled = -1;
+ perf_counts(counter->counts, cpu, thread)->ena = 0;
+ perf_counts(counter->counts, cpu, thread)->run = 0;
+ return -1;
+ }
+
+ count->loaded = false;
+
+ if (STAT_RECORD) {
+ if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+ pr_err("failed to write stat event\n");
+ return -1;
+ }
+ }
+
+ if (verbose > 1) {
+ fprintf(stat_config.output,
+ "%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+ perf_evsel__name(counter),
+ cpu,
+ count->val, count->ena, count->run);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static void read_counters(void)
+{
+ struct perf_evsel *counter;
+ int ret;
+
+ evlist__for_each_entry(evsel_list, counter) {
+ ret = read_counter(counter);
+ if (ret)
+ pr_debug("failed to read counter %s\n", counter->name);
+
+ if (ret == 0 && perf_stat_process_counter(&stat_config, counter))
+ pr_warning("failed to process counter %s\n", counter->name);
+ }
+}
+
+static void process_interval(void)
+{
+ struct timespec ts, rs;
+
+ read_counters();
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ diff_timespec(&rs, &ts, &ref_time);
+
+ if (STAT_RECORD) {
+ if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
+ pr_err("failed to write stat round event\n");
+ }
+
+ init_stats(&walltime_nsecs_stats);
+ update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL);
+ print_counters(&rs, 0, NULL);
+}
+
+static void enable_counters(void)
+{
+ if (initial_delay)
+ usleep(initial_delay * USEC_PER_MSEC);
+
+ /*
+ * We need to enable counters only if:
+ * - we don't have tracee (attaching to task or cpu)
+ * - we have initial delay configured
+ */
+ if (!target__none(&target) || initial_delay)
+ perf_evlist__enable(evsel_list);
+}
+
+static void disable_counters(void)
+{
+ /*
+ * If we don't have tracee (attaching to task or cpu), counters may
+ * still be running. To get accurate group ratios, we must stop groups
+ * from counting before reading their constituent counters.
+ */
+ if (!target__none(&target))
+ perf_evlist__disable(evsel_list);
+}
+
+static volatile int workload_exec_errno;
+
+/*
+ * perf_evlist__prepare_workload will send a SIGUSR1
+ * if the fork fails, since we asked by setting its
+ * want_signal to true.
+ */
+static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *info,
+ void *ucontext __maybe_unused)
+{
+ workload_exec_errno = info->si_value.sival_int;
+}
+
+static int perf_stat_synthesize_config(bool is_pipe)
+{
+ int err;
+
+ if (is_pipe) {
+ err = perf_event__synthesize_attrs(NULL, perf_stat.session,
+ process_synthesized_event);
+ if (err < 0) {
+ pr_err("Couldn't synthesize attrs.\n");
+ return err;
+ }
+ }
+
+ err = perf_event__synthesize_extra_attr(NULL,
+ evsel_list,
+ process_synthesized_event,
+ is_pipe);
+
+ err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads,
+ process_synthesized_event,
+ NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize thread map.\n");
+ return err;
+ }
+
+ err = perf_event__synthesize_cpu_map(NULL, evsel_list->cpus,
+ process_synthesized_event, NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize thread map.\n");
+ return err;
+ }
+
+ err = perf_event__synthesize_stat_config(NULL, &stat_config,
+ process_synthesized_event, NULL);
+ if (err < 0) {
+ pr_err("Couldn't synthesize config.\n");
+ return err;
+ }
+
+ return 0;
+}
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
+static int __store_counter_ids(struct perf_evsel *counter)
+{
+ int cpu, thread;
+
+ for (cpu = 0; cpu < xyarray__max_x(counter->fd); cpu++) {
+ for (thread = 0; thread < xyarray__max_y(counter->fd);
+ thread++) {
+ int fd = FD(counter, cpu, thread);
+
+ if (perf_evlist__id_add_fd(evsel_list, counter,
+ cpu, thread, fd) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int store_counter_ids(struct perf_evsel *counter)
+{
+ struct cpu_map *cpus = counter->cpus;
+ struct thread_map *threads = counter->threads;
+
+ if (perf_evsel__alloc_id(counter, cpus->nr, threads->nr))
+ return -ENOMEM;
+
+ return __store_counter_ids(counter);
+}
+
+static bool perf_evsel__should_store_id(struct perf_evsel *counter)
+{
+ return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
+}
+
+static struct perf_evsel *perf_evsel__reset_weak_group(struct perf_evsel *evsel)
+{
+ struct perf_evsel *c2, *leader;
+ bool is_open = true;
+
+ leader = evsel->leader;
+ pr_debug("Weak group for %s/%d failed\n",
+ leader->name, leader->nr_members);
+
+ /*
+ * for_each_group_member doesn't work here because it doesn't
+ * include the first entry.
+ */
+ evlist__for_each_entry(evsel_list, c2) {
+ if (c2 == evsel)
+ is_open = false;
+ if (c2->leader == leader) {
+ if (is_open)
+ perf_evsel__close(c2);
+ c2->leader = c2;
+ c2->nr_members = 0;
+ }
+ }
+ return leader;
+}
+
+static int __run_perf_stat(int argc, const char **argv, int run_idx)
+{
+ int interval = stat_config.interval;
+ int times = stat_config.times;
+ int timeout = stat_config.timeout;
+ char msg[BUFSIZ];
+ unsigned long long t0, t1;
+ struct perf_evsel *counter;
+ struct timespec ts;
+ size_t l;
+ int status = 0;
+ const bool forks = (argc > 0);
+ bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
+ struct perf_evsel_config_term *err_term;
+
+ if (interval) {
+ ts.tv_sec = interval / USEC_PER_MSEC;
+ ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
+ } else if (timeout) {
+ ts.tv_sec = timeout / USEC_PER_MSEC;
+ ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
+ } else {
+ ts.tv_sec = 1;
+ ts.tv_nsec = 0;
+ }
+
+ if (forks) {
+ if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
+ workload_exec_failed_signal) < 0) {
+ perror("failed to prepare workload");
+ return -1;
+ }
+ child_pid = evsel_list->workload.pid;
+ }
+
+ if (group)
+ perf_evlist__set_leader(evsel_list);
+
+ evlist__for_each_entry(evsel_list, counter) {
+try_again:
+ if (create_perf_stat_counter(counter) < 0) {
+
+ /* Weak group failed. Reset the group. */
+ if ((errno == EINVAL || errno == EBADF) &&
+ counter->leader != counter &&
+ counter->weak_group) {
+ counter = perf_evsel__reset_weak_group(counter);
+ goto try_again;
+ }
+
+ /*
+ * PPC returns ENXIO for HW counters until 2.6.37
+ * (behavior changed with commit b0a873e).
+ */
+ if (errno == EINVAL || errno == ENOSYS ||
+ errno == ENOENT || errno == EOPNOTSUPP ||
+ errno == ENXIO) {
+ if (verbose > 0)
+ ui__warning("%s event is not supported by the kernel.\n",
+ perf_evsel__name(counter));
+ counter->supported = false;
+
+ if ((counter->leader != counter) ||
+ !(counter->leader->nr_members > 1))
+ continue;
+ } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+ if (verbose > 0)
+ ui__warning("%s\n", msg);
+ goto try_again;
+ } else if (target__has_per_thread(&target) &&
+ evsel_list->threads &&
+ evsel_list->threads->err_thread != -1) {
+ /*
+ * For global --per-thread case, skip current
+ * error thread.
+ */
+ if (!thread_map__remove(evsel_list->threads,
+ evsel_list->threads->err_thread)) {
+ evsel_list->threads->err_thread = -1;
+ goto try_again;
+ }
+ }
+
+ perf_evsel__open_strerror(counter, &target,
+ errno, msg, sizeof(msg));
+ ui__error("%s\n", msg);
+
+ if (child_pid != -1)
+ kill(child_pid, SIGTERM);
+
+ return -1;
+ }
+ counter->supported = true;
+
+ l = strlen(counter->unit);
+ if (l > unit_width)
+ unit_width = l;
+
+ if (perf_evsel__should_store_id(counter) &&
+ store_counter_ids(counter))
+ return -1;
+ }
+
+ if (perf_evlist__apply_filters(evsel_list, &counter)) {
+ pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
+ counter->filter, perf_evsel__name(counter), errno,
+ str_error_r(errno, msg, sizeof(msg)));
+ return -1;
+ }
+
+ if (perf_evlist__apply_drv_configs(evsel_list, &counter, &err_term)) {
+ pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
+ err_term->val.drv_cfg, perf_evsel__name(counter), errno,
+ str_error_r(errno, msg, sizeof(msg)));
+ return -1;
+ }
+
+ if (STAT_RECORD) {
+ int err, fd = perf_data__fd(&perf_stat.data);
+
+ if (is_pipe) {
+ err = perf_header__write_pipe(perf_data__fd(&perf_stat.data));
+ } else {
+ err = perf_session__write_header(perf_stat.session, evsel_list,
+ fd, false);
+ }
+
+ if (err < 0)
+ return err;
+
+ err = perf_stat_synthesize_config(is_pipe);
+ if (err < 0)
+ return err;
+ }
+
+ /*
+ * Enable counters and exec the command:
+ */
+ t0 = rdclock();
+ clock_gettime(CLOCK_MONOTONIC, &ref_time);
+
+ if (forks) {
+ perf_evlist__start_workload(evsel_list);
+ enable_counters();
+
+ if (interval || timeout) {
+ while (!waitpid(child_pid, &status, WNOHANG)) {
+ nanosleep(&ts, NULL);
+ if (timeout)
+ break;
+ process_interval();
+ if (interval_count && !(--times))
+ break;
+ }
+ }
+ wait4(child_pid, &status, 0, &ru_data);
+
+ if (workload_exec_errno) {
+ const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
+ pr_err("Workload failed: %s\n", emsg);
+ return -1;
+ }
+
+ if (WIFSIGNALED(status))
+ psignal(WTERMSIG(status), argv[0]);
+ } else {
+ enable_counters();
+ while (!done) {
+ nanosleep(&ts, NULL);
+ if (timeout)
+ break;
+ if (interval) {
+ process_interval();
+ if (interval_count && !(--times))
+ break;
+ }
+ }
+ }
+
+ disable_counters();
+
+ t1 = rdclock();
+
+ if (walltime_run_table)
+ walltime_run[run_idx] = t1 - t0;
+
+ update_stats(&walltime_nsecs_stats, t1 - t0);
+
+ /*
+ * Closing a group leader splits the group, and as we only disable
+ * group leaders, results in remaining events becoming enabled. To
+ * avoid arbitrary skew, we must read all counters before closing any
+ * group leaders.
+ */
+ read_counters();
+ perf_evlist__close(evsel_list);
+
+ return WEXITSTATUS(status);
+}
+
+static int run_perf_stat(int argc, const char **argv, int run_idx)
+{
+ int ret;
+
+ if (pre_cmd) {
+ ret = system(pre_cmd);
+ if (ret)
+ return ret;
+ }
+
+ if (sync_run)
+ sync();
+
+ ret = __run_perf_stat(argc, argv, run_idx);
+ if (ret)
+ return ret;
+
+ if (post_cmd) {
+ ret = system(post_cmd);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static void print_running(u64 run, u64 ena)
+{
+ if (csv_output) {
+ fprintf(stat_config.output, "%s%" PRIu64 "%s%.2f",
+ csv_sep,
+ run,
+ csv_sep,
+ ena ? 100.0 * run / ena : 100.0);
+ } else if (run != ena) {
+ fprintf(stat_config.output, " (%.2f%%)", 100.0 * run / ena);
+ }
+}
+
+static void print_noise_pct(double total, double avg)
+{
+ double pct = rel_stddev_stats(total, avg);
+
+ if (csv_output)
+ fprintf(stat_config.output, "%s%.2f%%", csv_sep, pct);
+ else if (pct)
+ fprintf(stat_config.output, " ( +-%6.2f%% )", pct);
+}
+
+static void print_noise(struct perf_evsel *evsel, double avg)
+{
+ struct perf_stat_evsel *ps;
+
+ if (run_count == 1)
+ return;
+
+ ps = evsel->stats;
+ print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
+}
+
+static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
+{
+ switch (stat_config.aggr_mode) {
+ case AGGR_CORE:
+ fprintf(stat_config.output, "S%d-C%*d%s%*d%s",
+ cpu_map__id_to_socket(id),
+ csv_output ? 0 : -8,
+ cpu_map__id_to_cpu(id),
+ csv_sep,
+ csv_output ? 0 : 4,
+ nr,
+ csv_sep);
+ break;
+ case AGGR_SOCKET:
+ fprintf(stat_config.output, "S%*d%s%*d%s",
+ csv_output ? 0 : -5,
+ id,
+ csv_sep,
+ csv_output ? 0 : 4,
+ nr,
+ csv_sep);
+ break;
+ case AGGR_NONE:
+ fprintf(stat_config.output, "CPU%*d%s",
+ csv_output ? 0 : -4,
+ perf_evsel__cpus(evsel)->map[id], csv_sep);
+ break;
+ case AGGR_THREAD:
+ fprintf(stat_config.output, "%*s-%*d%s",
+ csv_output ? 0 : 16,
+ thread_map__comm(evsel->threads, id),
+ csv_output ? 0 : -8,
+ thread_map__pid(evsel->threads, id),
+ csv_sep);
+ break;
+ case AGGR_GLOBAL:
+ case AGGR_UNSET:
+ default:
+ break;
+ }
+}
+
+struct outstate {
+ FILE *fh;
+ bool newline;
+ const char *prefix;
+ int nfields;
+ int id, nr;
+ struct perf_evsel *evsel;
+};
+
+#define METRIC_LEN 35
+
+static void new_line_std(void *ctx)
+{
+ struct outstate *os = ctx;
+
+ os->newline = true;
+}
+
+static void do_new_line_std(struct outstate *os)
+{
+ fputc('\n', os->fh);
+ fputs(os->prefix, os->fh);
+ aggr_printout(os->evsel, os->id, os->nr);
+ if (stat_config.aggr_mode == AGGR_NONE)
+ fprintf(os->fh, " ");
+ fprintf(os->fh, " ");
+}
+
+static void print_metric_std(void *ctx, const char *color, const char *fmt,
+ const char *unit, double val)
+{
+ struct outstate *os = ctx;
+ FILE *out = os->fh;
+ int n;
+ bool newline = os->newline;
+
+ os->newline = false;
+
+ if (unit == NULL || fmt == NULL) {
+ fprintf(out, "%-*s", METRIC_LEN, "");
+ return;
+ }
+
+ if (newline)
+ do_new_line_std(os);
+
+ n = fprintf(out, " # ");
+ if (color)
+ n += color_fprintf(out, color, fmt, val);
+ else
+ n += fprintf(out, fmt, val);
+ fprintf(out, " %-*s", METRIC_LEN - n - 1, unit);
+}
+
+static void new_line_csv(void *ctx)
+{
+ struct outstate *os = ctx;
+ int i;
+
+ fputc('\n', os->fh);
+ if (os->prefix)
+ fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+ aggr_printout(os->evsel, os->id, os->nr);
+ for (i = 0; i < os->nfields; i++)
+ fputs(csv_sep, os->fh);
+}
+
+static void print_metric_csv(void *ctx,
+ const char *color __maybe_unused,
+ const char *fmt, const char *unit, double val)
+{
+ struct outstate *os = ctx;
+ FILE *out = os->fh;
+ char buf[64], *vals, *ends;
+
+ if (unit == NULL || fmt == NULL) {
+ fprintf(out, "%s%s", csv_sep, csv_sep);
+ return;
+ }
+ snprintf(buf, sizeof(buf), fmt, val);
+ ends = vals = ltrim(buf);
+ while (isdigit(*ends) || *ends == '.')
+ ends++;
+ *ends = 0;
+ while (isspace(*unit))
+ unit++;
+ fprintf(out, "%s%s%s%s", csv_sep, vals, csv_sep, unit);
+}
+
+/* Filter out some columns that don't work well in metrics only mode */
+
+static bool valid_only_metric(const char *unit)
+{
+ if (!unit)
+ return false;
+ if (strstr(unit, "/sec") ||
+ strstr(unit, "hz") ||
+ strstr(unit, "Hz") ||
+ strstr(unit, "CPUs utilized"))
+ return false;
+ return true;
+}
+
+static const char *fixunit(char *buf, struct perf_evsel *evsel,
+ const char *unit)
+{
+ if (!strncmp(unit, "of all", 6)) {
+ snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
+ unit);
+ return buf;
+ }
+ return unit;
+}
+
+static void print_metric_only(void *ctx, const char *color, const char *fmt,
+ const char *unit, double val)
+{
+ struct outstate *os = ctx;
+ FILE *out = os->fh;
+ char buf[1024], str[1024];
+ unsigned mlen = metric_only_len;
+
+ if (!valid_only_metric(unit))
+ return;
+ unit = fixunit(buf, os->evsel, unit);
+ if (mlen < strlen(unit))
+ mlen = strlen(unit) + 1;
+
+ if (color)
+ mlen += strlen(color) + sizeof(PERF_COLOR_RESET) - 1;
+
+ color_snprintf(str, sizeof(str), color ?: "", fmt, val);
+ fprintf(out, "%*s ", mlen, str);
+}
+
+static void print_metric_only_csv(void *ctx, const char *color __maybe_unused,
+ const char *fmt,
+ const char *unit, double val)
+{
+ struct outstate *os = ctx;
+ FILE *out = os->fh;
+ char buf[64], *vals, *ends;
+ char tbuf[1024];
+
+ if (!valid_only_metric(unit))
+ return;
+ unit = fixunit(tbuf, os->evsel, unit);
+ snprintf(buf, sizeof buf, fmt, val);
+ ends = vals = ltrim(buf);
+ while (isdigit(*ends) || *ends == '.')
+ ends++;
+ *ends = 0;
+ fprintf(out, "%s%s", vals, csv_sep);
+}
+
+static void new_line_metric(void *ctx __maybe_unused)
+{
+}
+
+static void print_metric_header(void *ctx, const char *color __maybe_unused,
+ const char *fmt __maybe_unused,
+ const char *unit, double val __maybe_unused)
+{
+ struct outstate *os = ctx;
+ char tbuf[1024];
+
+ if (!valid_only_metric(unit))
+ return;
+ unit = fixunit(tbuf, os->evsel, unit);
+ if (csv_output)
+ fprintf(os->fh, "%s%s", unit, csv_sep);
+ else
+ fprintf(os->fh, "%*s ", metric_only_len, unit);
+}
+
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+ int i;
+
+ if (!aggr_get_id)
+ return 0;
+
+ if (stat_config.aggr_mode == AGGR_NONE)
+ return id;
+
+ if (stat_config.aggr_mode == AGGR_GLOBAL)
+ return 0;
+
+ for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+ int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+ if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+ return cpu2;
+ }
+ return 0;
+}
+
+static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
+{
+ FILE *output = stat_config.output;
+ double sc = evsel->scale;
+ const char *fmt;
+
+ if (csv_output) {
+ fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s";
+ } else {
+ if (big_num)
+ fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
+ else
+ fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
+ }
+
+ aggr_printout(evsel, id, nr);
+
+ fprintf(output, fmt, avg, csv_sep);
+
+ if (evsel->unit)
+ fprintf(output, "%-*s%s",
+ csv_output ? 0 : unit_width,
+ evsel->unit, csv_sep);
+
+ fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
+
+ if (evsel->cgrp)
+ fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
+}
+
+static bool is_mixed_hw_group(struct perf_evsel *counter)
+{
+ struct perf_evlist *evlist = counter->evlist;
+ u32 pmu_type = counter->attr.type;
+ struct perf_evsel *pos;
+
+ if (counter->nr_members < 2)
+ return false;
+
+ evlist__for_each_entry(evlist, pos) {
+ /* software events can be part of any hardware group */
+ if (pos->attr.type == PERF_TYPE_SOFTWARE)
+ continue;
+ if (pmu_type == PERF_TYPE_SOFTWARE) {
+ pmu_type = pos->attr.type;
+ continue;
+ }
+ if (pmu_type != pos->attr.type)
+ return true;
+ }
+
+ return false;
+}
+
+static void printout(int id, int nr, struct perf_evsel *counter, double uval,
+ char *prefix, u64 run, u64 ena, double noise,
+ struct runtime_stat *st)
+{
+ struct perf_stat_output_ctx out;
+ struct outstate os = {
+ .fh = stat_config.output,
+ .prefix = prefix ? prefix : "",
+ .id = id,
+ .nr = nr,
+ .evsel = counter,
+ };
+ print_metric_t pm = print_metric_std;
+ void (*nl)(void *);
+
+ if (metric_only) {
+ nl = new_line_metric;
+ if (csv_output)
+ pm = print_metric_only_csv;
+ else
+ pm = print_metric_only;
+ } else
+ nl = new_line_std;
+
+ if (csv_output && !metric_only) {
+ static int aggr_fields[] = {
+ [AGGR_GLOBAL] = 0,
+ [AGGR_THREAD] = 1,
+ [AGGR_NONE] = 1,
+ [AGGR_SOCKET] = 2,
+ [AGGR_CORE] = 2,
+ };
+
+ pm = print_metric_csv;
+ nl = new_line_csv;
+ os.nfields = 3;
+ os.nfields += aggr_fields[stat_config.aggr_mode];
+ if (counter->cgrp)
+ os.nfields++;
+ }
+ if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
+ if (metric_only) {
+ pm(&os, NULL, "", "", 0);
+ return;
+ }
+ aggr_printout(counter, id, nr);
+
+ fprintf(stat_config.output, "%*s%s",
+ csv_output ? 0 : 18,
+ counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
+ csv_sep);
+
+ if (counter->supported) {
+ print_free_counters_hint = 1;
+ if (is_mixed_hw_group(counter))
+ print_mixed_hw_group_error = 1;
+ }
+
+ fprintf(stat_config.output, "%-*s%s",
+ csv_output ? 0 : unit_width,
+ counter->unit, csv_sep);
+
+ fprintf(stat_config.output, "%*s",
+ csv_output ? 0 : -25,
+ perf_evsel__name(counter));
+
+ if (counter->cgrp)
+ fprintf(stat_config.output, "%s%s",
+ csv_sep, counter->cgrp->name);
+
+ if (!csv_output)
+ pm(&os, NULL, NULL, "", 0);
+ print_noise(counter, noise);
+ print_running(run, ena);
+ if (csv_output)
+ pm(&os, NULL, NULL, "", 0);
+ return;
+ }
+
+ if (!metric_only)
+ abs_printout(id, nr, counter, uval);
+
+ out.print_metric = pm;
+ out.new_line = nl;
+ out.ctx = &os;
+ out.force_header = false;
+
+ if (csv_output && !metric_only) {
+ print_noise(counter, noise);
+ print_running(run, ena);
+ }
+
+ perf_stat__print_shadow_stats(counter, uval,
+ first_shadow_cpu(counter, id),
+ &out, &metric_events, st);
+ if (!csv_output && !metric_only) {
+ print_noise(counter, noise);
+ print_running(run, ena);
+ }
+}
+
+static void aggr_update_shadow(void)
+{
+ int cpu, s2, id, s;
+ u64 val;
+ struct perf_evsel *counter;
+
+ for (s = 0; s < aggr_map->nr; s++) {
+ id = aggr_map->map[s];
+ evlist__for_each_entry(evsel_list, counter) {
+ val = 0;
+ for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+ s2 = aggr_get_id(evsel_list->cpus, cpu);
+ if (s2 != id)
+ continue;
+ val += perf_counts(counter->counts, cpu, 0)->val;
+ }
+ perf_stat__update_shadow_stats(counter, val,
+ first_shadow_cpu(counter, id),
+ &rt_stat);
+ }
+ }
+}
+
+static void uniquify_event_name(struct perf_evsel *counter)
+{
+ char *new_name;
+ char *config;
+
+ if (counter->uniquified_name ||
+ !counter->pmu_name || !strncmp(counter->name, counter->pmu_name,
+ strlen(counter->pmu_name)))
+ return;
+
+ config = strchr(counter->name, '/');
+ if (config) {
+ if (asprintf(&new_name,
+ "%s%s", counter->pmu_name, config) > 0) {
+ free(counter->name);
+ counter->name = new_name;
+ }
+ } else {
+ if (asprintf(&new_name,
+ "%s [%s]", counter->name, counter->pmu_name) > 0) {
+ free(counter->name);
+ counter->name = new_name;
+ }
+ }
+
+ counter->uniquified_name = true;
+}
+
+static void collect_all_aliases(struct perf_evsel *counter,
+ void (*cb)(struct perf_evsel *counter, void *data,
+ bool first),
+ void *data)
+{
+ struct perf_evsel *alias;
+
+ alias = list_prepare_entry(counter, &(evsel_list->entries), node);
+ list_for_each_entry_continue (alias, &evsel_list->entries, node) {
+ if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
+ alias->scale != counter->scale ||
+ alias->cgrp != counter->cgrp ||
+ strcmp(alias->unit, counter->unit) ||
+ perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter))
+ break;
+ alias->merged_stat = true;
+ cb(alias, data, false);
+ }
+}
+
+static bool collect_data(struct perf_evsel *counter,
+ void (*cb)(struct perf_evsel *counter, void *data,
+ bool first),
+ void *data)
+{
+ if (counter->merged_stat)
+ return false;
+ cb(counter, data, true);
+ if (no_merge)
+ uniquify_event_name(counter);
+ else if (counter->auto_merge_stats)
+ collect_all_aliases(counter, cb, data);
+ return true;
+}
+
+struct aggr_data {
+ u64 ena, run, val;
+ int id;
+ int nr;
+ int cpu;
+};
+
+static void aggr_cb(struct perf_evsel *counter, void *data, bool first)
+{
+ struct aggr_data *ad = data;
+ int cpu, s2;
+
+ for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+ struct perf_counts_values *counts;
+
+ s2 = aggr_get_id(perf_evsel__cpus(counter), cpu);
+ if (s2 != ad->id)
+ continue;
+ if (first)
+ ad->nr++;
+ counts = perf_counts(counter->counts, cpu, 0);
+ /*
+ * When any result is bad, make them all to give
+ * consistent output in interval mode.
+ */
+ if (counts->ena == 0 || counts->run == 0 ||
+ counter->counts->scaled == -1) {
+ ad->ena = 0;
+ ad->run = 0;
+ break;
+ }
+ ad->val += counts->val;
+ ad->ena += counts->ena;
+ ad->run += counts->run;
+ }
+}
+
+static void print_aggr(char *prefix)
+{
+ FILE *output = stat_config.output;
+ struct perf_evsel *counter;
+ int s, id, nr;
+ double uval;
+ u64 ena, run, val;
+ bool first;
+
+ if (!(aggr_map || aggr_get_id))
+ return;
+
+ aggr_update_shadow();
+
+ /*
+ * With metric_only everything is on a single line.
+ * Without each counter has its own line.
+ */
+ for (s = 0; s < aggr_map->nr; s++) {
+ struct aggr_data ad;
+ if (prefix && metric_only)
+ fprintf(output, "%s", prefix);
+
+ ad.id = id = aggr_map->map[s];
+ first = true;
+ evlist__for_each_entry(evsel_list, counter) {
+ if (is_duration_time(counter))
+ continue;
+
+ ad.val = ad.ena = ad.run = 0;
+ ad.nr = 0;
+ if (!collect_data(counter, aggr_cb, &ad))
+ continue;
+ nr = ad.nr;
+ ena = ad.ena;
+ run = ad.run;
+ val = ad.val;
+ if (first && metric_only) {
+ first = false;
+ aggr_printout(counter, id, nr);
+ }
+ if (prefix && !metric_only)
+ fprintf(output, "%s", prefix);
+
+ uval = val * counter->scale;
+ printout(id, nr, counter, uval, prefix, run, ena, 1.0,
+ &rt_stat);
+ if (!metric_only)
+ fputc('\n', output);
+ }
+ if (metric_only)
+ fputc('\n', output);
+ }
+}
+
+static int cmp_val(const void *a, const void *b)
+{
+ return ((struct perf_aggr_thread_value *)b)->val -
+ ((struct perf_aggr_thread_value *)a)->val;
+}
+
+static struct perf_aggr_thread_value *sort_aggr_thread(
+ struct perf_evsel *counter,
+ int nthreads, int ncpus,
+ int *ret)
+{
+ int cpu, thread, i = 0;
+ double uval;
+ struct perf_aggr_thread_value *buf;
+
+ buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value));
+ if (!buf)
+ return NULL;
+
+ for (thread = 0; thread < nthreads; thread++) {
+ u64 ena = 0, run = 0, val = 0;
+
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ val += perf_counts(counter->counts, cpu, thread)->val;
+ ena += perf_counts(counter->counts, cpu, thread)->ena;
+ run += perf_counts(counter->counts, cpu, thread)->run;
+ }
+
+ uval = val * counter->scale;
+
+ /*
+ * Skip value 0 when enabling --per-thread globally,
+ * otherwise too many 0 output.
+ */
+ if (uval == 0.0 && target__has_per_thread(&target))
+ continue;
+
+ buf[i].counter = counter;
+ buf[i].id = thread;
+ buf[i].uval = uval;
+ buf[i].val = val;
+ buf[i].run = run;
+ buf[i].ena = ena;
+ i++;
+ }
+
+ qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val);
+
+ if (ret)
+ *ret = i;
+
+ return buf;
+}
+
+static void print_aggr_thread(struct perf_evsel *counter, char *prefix)
+{
+ FILE *output = stat_config.output;
+ int nthreads = thread_map__nr(counter->threads);
+ int ncpus = cpu_map__nr(counter->cpus);
+ int thread, sorted_threads, id;
+ struct perf_aggr_thread_value *buf;
+
+ buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads);
+ if (!buf) {
+ perror("cannot sort aggr thread");
+ return;
+ }
+
+ for (thread = 0; thread < sorted_threads; thread++) {
+ if (prefix)
+ fprintf(output, "%s", prefix);
+
+ id = buf[thread].id;
+ if (stat_config.stats)
+ printout(id, 0, buf[thread].counter, buf[thread].uval,
+ prefix, buf[thread].run, buf[thread].ena, 1.0,
+ &stat_config.stats[id]);
+ else
+ printout(id, 0, buf[thread].counter, buf[thread].uval,
+ prefix, buf[thread].run, buf[thread].ena, 1.0,
+ &rt_stat);
+ fputc('\n', output);
+ }
+
+ free(buf);
+}
+
+struct caggr_data {
+ double avg, avg_enabled, avg_running;
+};
+
+static void counter_aggr_cb(struct perf_evsel *counter, void *data,
+ bool first __maybe_unused)
+{
+ struct caggr_data *cd = data;
+ struct perf_stat_evsel *ps = counter->stats;
+
+ cd->avg += avg_stats(&ps->res_stats[0]);
+ cd->avg_enabled += avg_stats(&ps->res_stats[1]);
+ cd->avg_running += avg_stats(&ps->res_stats[2]);
+}
+
+/*
+ * Print out the results of a single counter:
+ * aggregated counts in system-wide mode
+ */
+static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
+{
+ FILE *output = stat_config.output;
+ double uval;
+ struct caggr_data cd = { .avg = 0.0 };
+
+ if (!collect_data(counter, counter_aggr_cb, &cd))
+ return;
+
+ if (prefix && !metric_only)
+ fprintf(output, "%s", prefix);
+
+ uval = cd.avg * counter->scale;
+ printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled,
+ cd.avg, &rt_stat);
+ if (!metric_only)
+ fprintf(output, "\n");
+}
+
+static void counter_cb(struct perf_evsel *counter, void *data,
+ bool first __maybe_unused)
+{
+ struct aggr_data *ad = data;
+
+ ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
+ ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
+ ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
+}
+
+/*
+ * Print out the results of a single counter:
+ * does not use aggregated count in system-wide
+ */
+static void print_counter(struct perf_evsel *counter, char *prefix)
+{
+ FILE *output = stat_config.output;
+ u64 ena, run, val;
+ double uval;
+ int cpu;
+
+ for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+ struct aggr_data ad = { .cpu = cpu };
+
+ if (!collect_data(counter, counter_cb, &ad))
+ return;
+ val = ad.val;
+ ena = ad.ena;
+ run = ad.run;
+
+ if (prefix)
+ fprintf(output, "%s", prefix);
+
+ uval = val * counter->scale;
+ printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
+ &rt_stat);
+
+ fputc('\n', output);
+ }
+}
+
+static void print_no_aggr_metric(char *prefix)
+{
+ int cpu;
+ int nrcpus = 0;
+ struct perf_evsel *counter;
+ u64 ena, run, val;
+ double uval;
+
+ nrcpus = evsel_list->cpus->nr;
+ for (cpu = 0; cpu < nrcpus; cpu++) {
+ bool first = true;
+
+ if (prefix)
+ fputs(prefix, stat_config.output);
+ evlist__for_each_entry(evsel_list, counter) {
+ if (is_duration_time(counter))
+ continue;
+ if (first) {
+ aggr_printout(counter, cpu, 0);
+ first = false;
+ }
+ val = perf_counts(counter->counts, cpu, 0)->val;
+ ena = perf_counts(counter->counts, cpu, 0)->ena;
+ run = perf_counts(counter->counts, cpu, 0)->run;
+
+ uval = val * counter->scale;
+ printout(cpu, 0, counter, uval, prefix, run, ena, 1.0,
+ &rt_stat);
+ }
+ fputc('\n', stat_config.output);
+ }
+}
+
+static int aggr_header_lens[] = {
+ [AGGR_CORE] = 18,
+ [AGGR_SOCKET] = 12,
+ [AGGR_NONE] = 6,
+ [AGGR_THREAD] = 24,
+ [AGGR_GLOBAL] = 0,
+};
+
+static const char *aggr_header_csv[] = {
+ [AGGR_CORE] = "core,cpus,",
+ [AGGR_SOCKET] = "socket,cpus",
+ [AGGR_NONE] = "cpu,",
+ [AGGR_THREAD] = "comm-pid,",
+ [AGGR_GLOBAL] = ""
+};
+
+static void print_metric_headers(const char *prefix, bool no_indent)
+{
+ struct perf_stat_output_ctx out;
+ struct perf_evsel *counter;
+ struct outstate os = {
+ .fh = stat_config.output
+ };
+
+ if (prefix)
+ fprintf(stat_config.output, "%s", prefix);
+
+ if (!csv_output && !no_indent)
+ fprintf(stat_config.output, "%*s",
+ aggr_header_lens[stat_config.aggr_mode], "");
+ if (csv_output) {
+ if (stat_config.interval)
+ fputs("time,", stat_config.output);
+ fputs(aggr_header_csv[stat_config.aggr_mode],
+ stat_config.output);
+ }
+
+ /* Print metrics headers only */
+ evlist__for_each_entry(evsel_list, counter) {
+ if (is_duration_time(counter))
+ continue;
+ os.evsel = counter;
+ out.ctx = &os;
+ out.print_metric = print_metric_header;
+ out.new_line = new_line_metric;
+ out.force_header = true;
+ os.evsel = counter;
+ perf_stat__print_shadow_stats(counter, 0,
+ 0,
+ &out,
+ &metric_events,
+ &rt_stat);
+ }
+ fputc('\n', stat_config.output);
+}
+
+static void print_interval(char *prefix, struct timespec *ts)
+{
+ FILE *output = stat_config.output;
+ static int num_print_interval;
+
+ if (interval_clear)
+ puts(CONSOLE_CLEAR);
+
+ sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep);
+
+ if ((num_print_interval == 0 && !csv_output) || interval_clear) {
+ switch (stat_config.aggr_mode) {
+ case AGGR_SOCKET:
+ fprintf(output, "# time socket cpus");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
+ break;
+ case AGGR_CORE:
+ fprintf(output, "# time core cpus");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
+ break;
+ case AGGR_NONE:
+ fprintf(output, "# time CPU ");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
+ break;
+ case AGGR_THREAD:
+ fprintf(output, "# time comm-pid");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
+ break;
+ case AGGR_GLOBAL:
+ default:
+ fprintf(output, "# time");
+ if (!metric_only)
+ fprintf(output, " counts %*s events\n", unit_width, "unit");
+ case AGGR_UNSET:
+ break;
+ }
+ }
+
+ if ((num_print_interval == 0 || interval_clear) && metric_only)
+ print_metric_headers(" ", true);
+ if (++num_print_interval == 25)
+ num_print_interval = 0;
+}
+
+static void print_header(int argc, const char **argv)
+{
+ FILE *output = stat_config.output;
+ int i;
+
+ fflush(stdout);
+
+ if (!csv_output) {
+ fprintf(output, "\n");
+ fprintf(output, " Performance counter stats for ");
+ if (target.system_wide)
+ fprintf(output, "\'system wide");
+ else if (target.cpu_list)
+ fprintf(output, "\'CPU(s) %s", target.cpu_list);
+ else if (!target__has_task(&target)) {
+ fprintf(output, "\'%s", argv ? argv[0] : "pipe");
+ for (i = 1; argv && (i < argc); i++)
+ fprintf(output, " %s", argv[i]);
+ } else if (target.pid)
+ fprintf(output, "process id \'%s", target.pid);
+ else
+ fprintf(output, "thread id \'%s", target.tid);
+
+ fprintf(output, "\'");
+ if (run_count > 1)
+ fprintf(output, " (%d runs)", run_count);
+ fprintf(output, ":\n\n");
+ }
+}
+
+static int get_precision(double num)
+{
+ if (num > 1)
+ return 0;
+
+ return lround(ceil(-log10(num)));
+}
+
+static void print_table(FILE *output, int precision, double avg)
+{
+ char tmp[64];
+ int idx, indent = 0;
+
+ scnprintf(tmp, 64, " %17.*f", precision, avg);
+ while (tmp[indent] == ' ')
+ indent++;
+
+ fprintf(output, "%*s# Table of individual measurements:\n", indent, "");
+
+ for (idx = 0; idx < run_count; idx++) {
+ double run = (double) walltime_run[idx] / NSEC_PER_SEC;
+ int h, n = 1 + abs((int) (100.0 * (run - avg)/run) / 5);
+
+ fprintf(output, " %17.*f (%+.*f) ",
+ precision, run, precision, run - avg);
+
+ for (h = 0; h < n; h++)
+ fprintf(output, "#");
+
+ fprintf(output, "\n");
+ }
+
+ fprintf(output, "\n%*s# Final result:\n", indent, "");
+}
+
+static double timeval2double(struct timeval *t)
+{
+ return t->tv_sec + (double) t->tv_usec/USEC_PER_SEC;
+}
+
+static void print_footer(void)
+{
+ double avg = avg_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
+ FILE *output = stat_config.output;
+ int n;
+
+ if (!null_run)
+ fprintf(output, "\n");
+
+ if (run_count == 1) {
+ fprintf(output, " %17.9f seconds time elapsed", avg);
+
+ if (ru_display) {
+ double ru_utime = timeval2double(&ru_data.ru_utime);
+ double ru_stime = timeval2double(&ru_data.ru_stime);
+
+ fprintf(output, "\n\n");
+ fprintf(output, " %17.9f seconds user\n", ru_utime);
+ fprintf(output, " %17.9f seconds sys\n", ru_stime);
+ }
+ } else {
+ double sd = stddev_stats(&walltime_nsecs_stats) / NSEC_PER_SEC;
+ /*
+ * Display at most 2 more significant
+ * digits than the stddev inaccuracy.
+ */
+ int precision = get_precision(sd) + 2;
+
+ if (walltime_run_table)
+ print_table(output, precision, avg);
+
+ fprintf(output, " %17.*f +- %.*f seconds time elapsed",
+ precision, avg, precision, sd);
+
+ print_noise_pct(sd, avg);
+ }
+ fprintf(output, "\n\n");
+
+ if (print_free_counters_hint &&
+ sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
+ n > 0)
+ fprintf(output,
+"Some events weren't counted. Try disabling the NMI watchdog:\n"
+" echo 0 > /proc/sys/kernel/nmi_watchdog\n"
+" perf stat ...\n"
+" echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+
+ if (print_mixed_hw_group_error)
+ fprintf(output,
+ "The events in group usually have to be from "
+ "the same PMU. Try reorganizing the group.\n");
+}
+
+static void print_counters(struct timespec *ts, int argc, const char **argv)
+{
+ int interval = stat_config.interval;
+ struct perf_evsel *counter;
+ char buf[64], *prefix = NULL;
+
+ /* Do not print anything if we record to the pipe. */
+ if (STAT_RECORD && perf_stat.data.is_pipe)
+ return;
+
+ if (interval)
+ print_interval(prefix = buf, ts);
+ else
+ print_header(argc, argv);
+
+ if (metric_only) {
+ static int num_print_iv;
+
+ if (num_print_iv == 0 && !interval)
+ print_metric_headers(prefix, false);
+ if (num_print_iv++ == 25)
+ num_print_iv = 0;
+ if (stat_config.aggr_mode == AGGR_GLOBAL && prefix)
+ fprintf(stat_config.output, "%s", prefix);
+ }
+
+ switch (stat_config.aggr_mode) {
+ case AGGR_CORE:
+ case AGGR_SOCKET:
+ print_aggr(prefix);
+ break;
+ case AGGR_THREAD:
+ evlist__for_each_entry(evsel_list, counter) {
+ if (is_duration_time(counter))
+ continue;
+ print_aggr_thread(counter, prefix);
+ }
+ break;
+ case AGGR_GLOBAL:
+ evlist__for_each_entry(evsel_list, counter) {
+ if (is_duration_time(counter))
+ continue;
+ print_counter_aggr(counter, prefix);
+ }
+ if (metric_only)
+ fputc('\n', stat_config.output);
+ break;
+ case AGGR_NONE:
+ if (metric_only)
+ print_no_aggr_metric(prefix);
+ else {
+ evlist__for_each_entry(evsel_list, counter) {
+ if (is_duration_time(counter))
+ continue;
+ print_counter(counter, prefix);
+ }
+ }
+ break;
+ case AGGR_UNSET:
+ default:
+ break;
+ }
+
+ if (!interval && !csv_output)
+ print_footer();
+
+ fflush(stat_config.output);
+}
+
+static volatile int signr = -1;
+
+static void skip_signal(int signo)
+{
+ if ((child_pid == -1) || stat_config.interval)
+ done = 1;
+
+ signr = signo;
+ /*
+ * render child_pid harmless
+ * won't send SIGTERM to a random
+ * process in case of race condition
+ * and fast PID recycling
+ */
+ child_pid = -1;
+}
+
+static void sig_atexit(void)
+{
+ sigset_t set, oset;
+
+ /*
+ * avoid race condition with SIGCHLD handler
+ * in skip_signal() which is modifying child_pid
+ * goal is to avoid send SIGTERM to a random
+ * process
+ */
+ sigemptyset(&set);
+ sigaddset(&set, SIGCHLD);
+ sigprocmask(SIG_BLOCK, &set, &oset);
+
+ if (child_pid != -1)
+ kill(child_pid, SIGTERM);
+
+ sigprocmask(SIG_SETMASK, &oset, NULL);
+
+ if (signr == -1)
+ return;
+
+ signal(signr, SIG_DFL);
+ kill(getpid(), signr);
+}
+
+static int stat__set_big_num(const struct option *opt __maybe_unused,
+ const char *s __maybe_unused, int unset)
+{
+ big_num_opt = unset ? 0 : 1;
+ return 0;
+}
+
+static int enable_metric_only(const struct option *opt __maybe_unused,
+ const char *s __maybe_unused, int unset)
+{
+ force_metric_only = true;
+ metric_only = !unset;
+ return 0;
+}
+
+static int parse_metric_groups(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ return metricgroup__parse_groups(opt, str, &metric_events);
+}
+
+static struct option stat_options[] = {
+ OPT_BOOLEAN('T', "transaction", &transaction_run,
+ "hardware transaction statistics"),
+ OPT_CALLBACK('e', "event", &evsel_list, "event",
+ "event selector. use 'perf list' to list available events",
+ parse_events_option),
+ OPT_CALLBACK(0, "filter", &evsel_list, "filter",
+ "event filter", parse_filter),
+ OPT_BOOLEAN('i', "no-inherit", &no_inherit,
+ "child tasks do not inherit counters"),
+ OPT_STRING('p', "pid", &target.pid, "pid",
+ "stat events on existing process id"),
+ OPT_STRING('t', "tid", &target.tid, "tid",
+ "stat events on existing thread id"),
+ OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
+ "system-wide collection from all CPUs"),
+ OPT_BOOLEAN('g', "group", &group,
+ "put the counters into a counter group"),
+ OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show counter open errors, etc)"),
+ OPT_INTEGER('r', "repeat", &run_count,
+ "repeat command and print average + stddev (max: 100, forever: 0)"),
+ OPT_BOOLEAN(0, "table", &walltime_run_table,
+ "display details about each run (only with -r option)"),
+ OPT_BOOLEAN('n', "null", &null_run,
+ "null run - dont start any counters"),
+ OPT_INCR('d', "detailed", &detailed_run,
+ "detailed run - start a lot of events"),
+ OPT_BOOLEAN('S', "sync", &sync_run,
+ "call sync() before starting a run"),
+ OPT_CALLBACK_NOOPT('B', "big-num", NULL, NULL,
+ "print large numbers with thousands\' separators",
+ stat__set_big_num),
+ OPT_STRING('C', "cpu", &target.cpu_list, "cpu",
+ "list of cpus to monitor in system-wide"),
+ OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
+ "disable CPU count aggregation", AGGR_NONE),
+ OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"),
+ OPT_STRING('x', "field-separator", &csv_sep, "separator",
+ "print counts with custom separator"),
+ OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
+ "monitor event in cgroup name only", parse_cgroups),
+ OPT_STRING('o', "output", &output_name, "file", "output file name"),
+ OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
+ OPT_INTEGER(0, "log-fd", &output_fd,
+ "log output to fd, instead of stderr"),
+ OPT_STRING(0, "pre", &pre_cmd, "command",
+ "command to run prior to the measured command"),
+ OPT_STRING(0, "post", &post_cmd, "command",
+ "command to run after to the measured command"),
+ OPT_UINTEGER('I', "interval-print", &stat_config.interval,
+ "print counts at regular interval in ms "
+ "(overhead is possible for values <= 100ms)"),
+ OPT_INTEGER(0, "interval-count", &stat_config.times,
+ "print counts for fixed number of times"),
+ OPT_BOOLEAN(0, "interval-clear", &interval_clear,
+ "clear screen in between new interval"),
+ OPT_UINTEGER(0, "timeout", &stat_config.timeout,
+ "stop workload and print counts after a timeout period in ms (>= 10ms)"),
+ OPT_SET_UINT(0, "per-socket", &stat_config.aggr_mode,
+ "aggregate counts per processor socket", AGGR_SOCKET),
+ OPT_SET_UINT(0, "per-core", &stat_config.aggr_mode,
+ "aggregate counts per physical processor core", AGGR_CORE),
+ OPT_SET_UINT(0, "per-thread", &stat_config.aggr_mode,
+ "aggregate counts per thread", AGGR_THREAD),
+ OPT_UINTEGER('D', "delay", &initial_delay,
+ "ms to wait before starting measurement after program start"),
+ OPT_CALLBACK_NOOPT(0, "metric-only", &metric_only, NULL,
+ "Only print computed metrics. No raw values", enable_metric_only),
+ OPT_BOOLEAN(0, "topdown", &topdown_run,
+ "measure topdown level 1 statistics"),
+ OPT_BOOLEAN(0, "smi-cost", &smi_cost,
+ "measure SMI cost"),
+ OPT_CALLBACK('M', "metrics", &evsel_list, "metric/metric group list",
+ "monitor specified metrics or metric groups (separated by ,)",
+ parse_metric_groups),
+ OPT_END()
+};
+
+static int perf_stat__get_socket(struct cpu_map *map, int cpu)
+{
+ return cpu_map__get_socket(map, cpu, NULL);
+}
+
+static int perf_stat__get_core(struct cpu_map *map, int cpu)
+{
+ return cpu_map__get_core(map, cpu, NULL);
+}
+
+static int cpu_map__get_max(struct cpu_map *map)
+{
+ int i, max = -1;
+
+ for (i = 0; i < map->nr; i++) {
+ if (map->map[i] > max)
+ max = map->map[i];
+ }
+
+ return max;
+}
+
+static struct cpu_map *cpus_aggr_map;
+
+static int perf_stat__get_aggr(aggr_get_id_t get_id, struct cpu_map *map, int idx)
+{
+ int cpu;
+
+ if (idx >= map->nr)
+ return -1;
+
+ cpu = map->map[idx];
+
+ if (cpus_aggr_map->map[cpu] == -1)
+ cpus_aggr_map->map[cpu] = get_id(map, idx);
+
+ return cpus_aggr_map->map[cpu];
+}
+
+static int perf_stat__get_socket_cached(struct cpu_map *map, int idx)
+{
+ return perf_stat__get_aggr(perf_stat__get_socket, map, idx);
+}
+
+static int perf_stat__get_core_cached(struct cpu_map *map, int idx)
+{
+ return perf_stat__get_aggr(perf_stat__get_core, map, idx);
+}
+
+static int perf_stat_init_aggr_mode(void)
+{
+ int nr;
+
+ switch (stat_config.aggr_mode) {
+ case AGGR_SOCKET:
+ if (cpu_map__build_socket_map(evsel_list->cpus, &aggr_map)) {
+ perror("cannot build socket map");
+ return -1;
+ }
+ aggr_get_id = perf_stat__get_socket_cached;
+ break;
+ case AGGR_CORE:
+ if (cpu_map__build_core_map(evsel_list->cpus, &aggr_map)) {
+ perror("cannot build core map");
+ return -1;
+ }
+ aggr_get_id = perf_stat__get_core_cached;
+ break;
+ case AGGR_NONE:
+ case AGGR_GLOBAL:
+ case AGGR_THREAD:
+ case AGGR_UNSET:
+ default:
+ break;
+ }
+
+ /*
+ * The evsel_list->cpus is the base we operate on,
+ * taking the highest cpu number to be the size of
+ * the aggregation translate cpumap.
+ */
+ nr = cpu_map__get_max(evsel_list->cpus);
+ cpus_aggr_map = cpu_map__empty_new(nr + 1);
+ return cpus_aggr_map ? 0 : -ENOMEM;
+}
+
+static void perf_stat__exit_aggr_mode(void)
+{
+ cpu_map__put(aggr_map);
+ cpu_map__put(cpus_aggr_map);
+ aggr_map = NULL;
+ cpus_aggr_map = NULL;
+}
+
+static inline int perf_env__get_cpu(struct perf_env *env, struct cpu_map *map, int idx)
+{
+ int cpu;
+
+ if (idx > map->nr)
+ return -1;
+
+ cpu = map->map[idx];
+
+ if (cpu >= env->nr_cpus_avail)
+ return -1;
+
+ return cpu;
+}
+
+static int perf_env__get_socket(struct cpu_map *map, int idx, void *data)
+{
+ struct perf_env *env = data;
+ int cpu = perf_env__get_cpu(env, map, idx);
+
+ return cpu == -1 ? -1 : env->cpu[cpu].socket_id;
+}
+
+static int perf_env__get_core(struct cpu_map *map, int idx, void *data)
+{
+ struct perf_env *env = data;
+ int core = -1, cpu = perf_env__get_cpu(env, map, idx);
+
+ if (cpu != -1) {
+ int socket_id = env->cpu[cpu].socket_id;
+
+ /*
+ * Encode socket in upper 16 bits
+ * core_id is relative to socket, and
+ * we need a global id. So we combine
+ * socket + core id.
+ */
+ core = (socket_id << 16) | (env->cpu[cpu].core_id & 0xffff);
+ }
+
+ return core;
+}
+
+static int perf_env__build_socket_map(struct perf_env *env, struct cpu_map *cpus,
+ struct cpu_map **sockp)
+{
+ return cpu_map__build_map(cpus, sockp, perf_env__get_socket, env);
+}
+
+static int perf_env__build_core_map(struct perf_env *env, struct cpu_map *cpus,
+ struct cpu_map **corep)
+{
+ return cpu_map__build_map(cpus, corep, perf_env__get_core, env);
+}
+
+static int perf_stat__get_socket_file(struct cpu_map *map, int idx)
+{
+ return perf_env__get_socket(map, idx, &perf_stat.session->header.env);
+}
+
+static int perf_stat__get_core_file(struct cpu_map *map, int idx)
+{
+ return perf_env__get_core(map, idx, &perf_stat.session->header.env);
+}
+
+static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
+{
+ struct perf_env *env = &st->session->header.env;
+
+ switch (stat_config.aggr_mode) {
+ case AGGR_SOCKET:
+ if (perf_env__build_socket_map(env, evsel_list->cpus, &aggr_map)) {
+ perror("cannot build socket map");
+ return -1;
+ }
+ aggr_get_id = perf_stat__get_socket_file;
+ break;
+ case AGGR_CORE:
+ if (perf_env__build_core_map(env, evsel_list->cpus, &aggr_map)) {
+ perror("cannot build core map");
+ return -1;
+ }
+ aggr_get_id = perf_stat__get_core_file;
+ break;
+ case AGGR_NONE:
+ case AGGR_GLOBAL:
+ case AGGR_THREAD:
+ case AGGR_UNSET:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int topdown_filter_events(const char **attr, char **str, bool use_group)
+{
+ int off = 0;
+ int i;
+ int len = 0;
+ char *s;
+
+ for (i = 0; attr[i]; i++) {
+ if (pmu_have_event("cpu", attr[i])) {
+ len += strlen(attr[i]) + 1;
+ attr[i - off] = attr[i];
+ } else
+ off++;
+ }
+ attr[i - off] = NULL;
+
+ *str = malloc(len + 1 + 2);
+ if (!*str)
+ return -1;
+ s = *str;
+ if (i - off == 0) {
+ *s = 0;
+ return 0;
+ }
+ if (use_group)
+ *s++ = '{';
+ for (i = 0; attr[i]; i++) {
+ strcpy(s, attr[i]);
+ s += strlen(s);
+ *s++ = ',';
+ }
+ if (use_group) {
+ s[-1] = '}';
+ *s = 0;
+ } else
+ s[-1] = 0;
+ return 0;
+}
+
+__weak bool arch_topdown_check_group(bool *warn)
+{
+ *warn = false;
+ return false;
+}
+
+__weak void arch_topdown_group_warn(void)
+{
+}
+
+/*
+ * Add default attributes, if there were no attributes specified or
+ * if -d/--detailed, -d -d or -d -d -d is used:
+ */
+static int add_default_attributes(void)
+{
+ int err;
+ struct perf_event_attr default_attrs0[] = {
+
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS },
+ { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS },
+
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES },
+};
+ struct perf_event_attr frontend_attrs[] = {
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND },
+};
+ struct perf_event_attr backend_attrs[] = {
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND },
+};
+ struct perf_event_attr default_attrs1[] = {
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
+ { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES },
+
+};
+
+/*
+ * Detailed stats (-d), covering the L1 and last level data caches:
+ */
+ struct perf_event_attr detailed_attrs[] = {
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_L1D << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_L1D << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_LL << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_LL << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
+};
+
+/*
+ * Very detailed stats (-d -d), covering the instruction cache and the TLB caches:
+ */
+ struct perf_event_attr very_detailed_attrs[] = {
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_L1I << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_L1I << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_DTLB << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_DTLB << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_ITLB << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_ITLB << 0 |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
+
+};
+
+/*
+ * Very, very detailed stats (-d -d -d), adding prefetch events:
+ */
+ struct perf_event_attr very_very_detailed_attrs[] = {
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_L1D << 0 |
+ (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) },
+
+ { .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_L1D << 0 |
+ (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) },
+};
+ struct parse_events_error errinfo;
+
+ /* Set attrs if no event is selected and !null_run: */
+ if (null_run)
+ return 0;
+
+ if (transaction_run) {
+ /* Handle -T as -M transaction. Once platform specific metrics
+ * support has been added to the json files, all archictures
+ * will use this approach. To determine transaction support
+ * on an architecture test for such a metric name.
+ */
+ if (metricgroup__has_metric("transaction")) {
+ struct option opt = { .value = &evsel_list };
+
+ return metricgroup__parse_groups(&opt, "transaction",
+ &metric_events);
+ }
+
+ if (pmu_have_event("cpu", "cycles-ct") &&
+ pmu_have_event("cpu", "el-start"))
+ err = parse_events(evsel_list, transaction_attrs,
+ &errinfo);
+ else
+ err = parse_events(evsel_list,
+ transaction_limited_attrs,
+ &errinfo);
+ if (err) {
+ fprintf(stderr, "Cannot set up transaction events\n");
+ parse_events_print_error(&errinfo, transaction_attrs);
+ return -1;
+ }
+ return 0;
+ }
+
+ if (smi_cost) {
+ int smi;
+
+ if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
+ fprintf(stderr, "freeze_on_smi is not supported.\n");
+ return -1;
+ }
+
+ if (!smi) {
+ if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
+ fprintf(stderr, "Failed to set freeze_on_smi.\n");
+ return -1;
+ }
+ smi_reset = true;
+ }
+
+ if (pmu_have_event("msr", "aperf") &&
+ pmu_have_event("msr", "smi")) {
+ if (!force_metric_only)
+ metric_only = true;
+ err = parse_events(evsel_list, smi_cost_attrs, &errinfo);
+ } else {
+ fprintf(stderr, "To measure SMI cost, it needs "
+ "msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
+ parse_events_print_error(&errinfo, smi_cost_attrs);
+ return -1;
+ }
+ if (err) {
+ fprintf(stderr, "Cannot set up SMI cost events\n");
+ return -1;
+ }
+ return 0;
+ }
+
+ if (topdown_run) {
+ char *str = NULL;
+ bool warn = false;
+
+ if (stat_config.aggr_mode != AGGR_GLOBAL &&
+ stat_config.aggr_mode != AGGR_CORE) {
+ pr_err("top down event configuration requires --per-core mode\n");
+ return -1;
+ }
+ stat_config.aggr_mode = AGGR_CORE;
+ if (nr_cgroups || !target__has_cpu(&target)) {
+ pr_err("top down event configuration requires system-wide mode (-a)\n");
+ return -1;
+ }
+
+ if (!force_metric_only)
+ metric_only = true;
+ if (topdown_filter_events(topdown_attrs, &str,
+ arch_topdown_check_group(&warn)) < 0) {
+ pr_err("Out of memory\n");
+ return -1;
+ }
+ if (topdown_attrs[0] && str) {
+ if (warn)
+ arch_topdown_group_warn();
+ err = parse_events(evsel_list, str, &errinfo);
+ if (err) {
+ fprintf(stderr,
+ "Cannot set up top down events %s: %d\n",
+ str, err);
+ parse_events_print_error(&errinfo, str);
+ free(str);
+ return -1;
+ }
+ } else {
+ fprintf(stderr, "System does not support topdown\n");
+ return -1;
+ }
+ free(str);
+ }
+
+ if (!evsel_list->nr_entries) {
+ if (target__has_cpu(&target))
+ default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
+
+ if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
+ return -1;
+ if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
+ if (perf_evlist__add_default_attrs(evsel_list,
+ frontend_attrs) < 0)
+ return -1;
+ }
+ if (pmu_have_event("cpu", "stalled-cycles-backend")) {
+ if (perf_evlist__add_default_attrs(evsel_list,
+ backend_attrs) < 0)
+ return -1;
+ }
+ if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
+ return -1;
+ }
+
+ /* Detailed events get appended to the event list: */
+
+ if (detailed_run < 1)
+ return 0;
+
+ /* Append detailed run extra attributes: */
+ if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
+ return -1;
+
+ if (detailed_run < 2)
+ return 0;
+
+ /* Append very detailed run extra attributes: */
+ if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
+ return -1;
+
+ if (detailed_run < 3)
+ return 0;
+
+ /* Append very, very detailed run extra attributes: */
+ return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
+}
+
+static const char * const stat_record_usage[] = {
+ "perf stat record [<options>]",
+ NULL,
+};
+
+static void init_features(struct perf_session *session)
+{
+ int feat;
+
+ for (feat = HEADER_FIRST_FEATURE; feat < HEADER_LAST_FEATURE; feat++)
+ perf_header__set_feat(&session->header, feat);
+
+ perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
+ perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
+ perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+ perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
+}
+
+static int __cmd_record(int argc, const char **argv)
+{
+ struct perf_session *session;
+ struct perf_data *data = &perf_stat.data;
+
+ argc = parse_options(argc, argv, stat_options, stat_record_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (output_name)
+ data->file.path = output_name;
+
+ if (run_count != 1 || forever) {
+ pr_err("Cannot use -r option with perf stat record.\n");
+ return -1;
+ }
+
+ session = perf_session__new(data, false, NULL);
+ if (session == NULL) {
+ pr_err("Perf session creation failed.\n");
+ return -1;
+ }
+
+ init_features(session);
+
+ session->evlist = evsel_list;
+ perf_stat.session = session;
+ perf_stat.record = true;
+ return argc;
+}
+
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ struct stat_round_event *stat_round = &event->stat_round;
+ struct perf_evsel *counter;
+ struct timespec tsh, *ts = NULL;
+ const char **argv = session->header.env.cmdline_argv;
+ int argc = session->header.env.nr_cmdline;
+
+ evlist__for_each_entry(evsel_list, counter)
+ perf_stat_process_counter(&stat_config, counter);
+
+ if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
+ update_stats(&walltime_nsecs_stats, stat_round->time);
+
+ if (stat_config.interval && stat_round->time) {
+ tsh.tv_sec = stat_round->time / NSEC_PER_SEC;
+ tsh.tv_nsec = stat_round->time % NSEC_PER_SEC;
+ ts = &tsh;
+ }
+
+ print_counters(ts, argc, argv);
+ return 0;
+}
+
+static
+int process_stat_config_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+
+ perf_event__read_stat_config(&stat_config, &event->stat_config);
+
+ if (cpu_map__empty(st->cpus)) {
+ if (st->aggr_mode != AGGR_UNSET)
+ pr_warning("warning: processing task data, aggregation mode not set\n");
+ return 0;
+ }
+
+ if (st->aggr_mode != AGGR_UNSET)
+ stat_config.aggr_mode = st->aggr_mode;
+
+ if (perf_stat.data.is_pipe)
+ perf_stat_init_aggr_mode();
+ else
+ perf_stat_init_aggr_mode_file(st);
+
+ return 0;
+}
+
+static int set_maps(struct perf_stat *st)
+{
+ if (!st->cpus || !st->threads)
+ return 0;
+
+ if (WARN_ONCE(st->maps_allocated, "stats double allocation\n"))
+ return -EINVAL;
+
+ perf_evlist__set_maps(evsel_list, st->cpus, st->threads);
+
+ if (perf_evlist__alloc_stats(evsel_list, true))
+ return -ENOMEM;
+
+ st->maps_allocated = true;
+ return 0;
+}
+
+static
+int process_thread_map_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+
+ if (st->threads) {
+ pr_warning("Extra thread map event, ignoring.\n");
+ return 0;
+ }
+
+ st->threads = thread_map__new_event(&event->thread_map);
+ if (!st->threads)
+ return -ENOMEM;
+
+ return set_maps(st);
+}
+
+static
+int process_cpu_map_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_stat *st = container_of(tool, struct perf_stat, tool);
+ struct cpu_map *cpus;
+
+ if (st->cpus) {
+ pr_warning("Extra cpu map event, ignoring.\n");
+ return 0;
+ }
+
+ cpus = cpu_map__new_data(&event->cpu_map.data);
+ if (!cpus)
+ return -ENOMEM;
+
+ st->cpus = cpus;
+ return set_maps(st);
+}
+
+static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
+{
+ int i;
+
+ config->stats = calloc(nthreads, sizeof(struct runtime_stat));
+ if (!config->stats)
+ return -1;
+
+ config->stats_num = nthreads;
+
+ for (i = 0; i < nthreads; i++)
+ runtime_stat__init(&config->stats[i]);
+
+ return 0;
+}
+
+static void runtime_stat_delete(struct perf_stat_config *config)
+{
+ int i;
+
+ if (!config->stats)
+ return;
+
+ for (i = 0; i < config->stats_num; i++)
+ runtime_stat__exit(&config->stats[i]);
+
+ free(config->stats);
+}
+
+static const char * const stat_report_usage[] = {
+ "perf stat report [<options>]",
+ NULL,
+};
+
+static struct perf_stat perf_stat = {
+ .tool = {
+ .attr = perf_event__process_attr,
+ .event_update = perf_event__process_event_update,
+ .thread_map = process_thread_map_event,
+ .cpu_map = process_cpu_map_event,
+ .stat_config = process_stat_config_event,
+ .stat = perf_event__process_stat_event,
+ .stat_round = process_stat_round_event,
+ },
+ .aggr_mode = AGGR_UNSET,
+};
+
+static int __cmd_report(int argc, const char **argv)
+{
+ struct perf_session *session;
+ const struct option options[] = {
+ OPT_STRING('i', "input", &input_name, "file", "input file name"),
+ OPT_SET_UINT(0, "per-socket", &perf_stat.aggr_mode,
+ "aggregate counts per processor socket", AGGR_SOCKET),
+ OPT_SET_UINT(0, "per-core", &perf_stat.aggr_mode,
+ "aggregate counts per physical processor core", AGGR_CORE),
+ OPT_SET_UINT('A', "no-aggr", &perf_stat.aggr_mode,
+ "disable CPU count aggregation", AGGR_NONE),
+ OPT_END()
+ };
+ struct stat st;
+ int ret;
+
+ argc = parse_options(argc, argv, options, stat_report_usage, 0);
+
+ if (!input_name || !strlen(input_name)) {
+ if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
+ input_name = "-";
+ else
+ input_name = "perf.data";
+ }
+
+ perf_stat.data.file.path = input_name;
+ perf_stat.data.mode = PERF_DATA_MODE_READ;
+
+ session = perf_session__new(&perf_stat.data, false, &perf_stat.tool);
+ if (session == NULL)
+ return -1;
+
+ perf_stat.session = session;
+ stat_config.output = stderr;
+ evsel_list = session->evlist;
+
+ ret = perf_session__process_events(session);
+ if (ret)
+ return ret;
+
+ perf_session__delete(session);
+ return 0;
+}
+
+static void setup_system_wide(int forks)
+{
+ /*
+ * Make system wide (-a) the default target if
+ * no target was specified and one of following
+ * conditions is met:
+ *
+ * - there's no workload specified
+ * - there is workload specified but all requested
+ * events are system wide events
+ */
+ if (!target__none(&target))
+ return;
+
+ if (!forks)
+ target.system_wide = true;
+ else {
+ struct perf_evsel *counter;
+
+ evlist__for_each_entry(evsel_list, counter) {
+ if (!counter->system_wide)
+ return;
+ }
+
+ if (evsel_list->nr_entries)
+ target.system_wide = true;
+ }
+}
+
+int cmd_stat(int argc, const char **argv)
+{
+ const char * const stat_usage[] = {
+ "perf stat [<options>] [<command>]",
+ NULL
+ };
+ int status = -EINVAL, run_idx;
+ const char *mode;
+ FILE *output = stderr;
+ unsigned int interval, timeout;
+ const char * const stat_subcommands[] = { "record", "report" };
+
+ setlocale(LC_ALL, "");
+
+ evsel_list = perf_evlist__new();
+ if (evsel_list == NULL)
+ return -ENOMEM;
+
+ parse_events__shrink_config_terms();
+
+ /* String-parsing callback-based options would segfault when negated */
+ set_option_flag(stat_options, 'e', "event", PARSE_OPT_NONEG);
+ set_option_flag(stat_options, 'M', "metrics", PARSE_OPT_NONEG);
+ set_option_flag(stat_options, 'G', "cgroup", PARSE_OPT_NONEG);
+
+ argc = parse_options_subcommand(argc, argv, stat_options, stat_subcommands,
+ (const char **) stat_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+ perf_stat__collect_metric_expr(evsel_list);
+ perf_stat__init_shadow_stats();
+
+ if (csv_sep) {
+ csv_output = true;
+ if (!strcmp(csv_sep, "\\t"))
+ csv_sep = "\t";
+ } else
+ csv_sep = DEFAULT_SEPARATOR;
+
+ if (argc && !strncmp(argv[0], "rec", 3)) {
+ argc = __cmd_record(argc, argv);
+ if (argc < 0)
+ return -1;
+ } else if (argc && !strncmp(argv[0], "rep", 3))
+ return __cmd_report(argc, argv);
+
+ interval = stat_config.interval;
+ timeout = stat_config.timeout;
+
+ /*
+ * For record command the -o is already taken care of.
+ */
+ if (!STAT_RECORD && output_name && strcmp(output_name, "-"))
+ output = NULL;
+
+ if (output_name && output_fd) {
+ fprintf(stderr, "cannot use both --output and --log-fd\n");
+ parse_options_usage(stat_usage, stat_options, "o", 1);
+ parse_options_usage(NULL, stat_options, "log-fd", 0);
+ goto out;
+ }
+
+ if (metric_only && stat_config.aggr_mode == AGGR_THREAD) {
+ fprintf(stderr, "--metric-only is not supported with --per-thread\n");
+ goto out;
+ }
+
+ if (metric_only && run_count > 1) {
+ fprintf(stderr, "--metric-only is not supported with -r\n");
+ goto out;
+ }
+
+ if (walltime_run_table && run_count <= 1) {
+ fprintf(stderr, "--table is only supported with -r\n");
+ parse_options_usage(stat_usage, stat_options, "r", 1);
+ parse_options_usage(NULL, stat_options, "table", 0);
+ goto out;
+ }
+
+ if (output_fd < 0) {
+ fprintf(stderr, "argument to --log-fd must be a > 0\n");
+ parse_options_usage(stat_usage, stat_options, "log-fd", 0);
+ goto out;
+ }
+
+ if (!output) {
+ struct timespec tm;
+ mode = append_file ? "a" : "w";
+
+ output = fopen(output_name, mode);
+ if (!output) {
+ perror("failed to create output file");
+ return -1;
+ }
+ clock_gettime(CLOCK_REALTIME, &tm);
+ fprintf(output, "# started on %s\n", ctime(&tm.tv_sec));
+ } else if (output_fd > 0) {
+ mode = append_file ? "a" : "w";
+ output = fdopen(output_fd, mode);
+ if (!output) {
+ perror("Failed opening logfd");
+ return -errno;
+ }
+ }
+
+ stat_config.output = output;
+
+ /*
+ * let the spreadsheet do the pretty-printing
+ */
+ if (csv_output) {
+ /* User explicitly passed -B? */
+ if (big_num_opt == 1) {
+ fprintf(stderr, "-B option not supported with -x\n");
+ parse_options_usage(stat_usage, stat_options, "B", 1);
+ parse_options_usage(NULL, stat_options, "x", 1);
+ goto out;
+ } else /* Nope, so disable big number formatting */
+ big_num = false;
+ } else if (big_num_opt == 0) /* User passed --no-big-num */
+ big_num = false;
+
+ setup_system_wide(argc);
+
+ /*
+ * Display user/system times only for single
+ * run and when there's specified tracee.
+ */
+ if ((run_count == 1) && target__none(&target))
+ ru_display = true;
+
+ if (run_count < 0) {
+ pr_err("Run count must be a positive number\n");
+ parse_options_usage(stat_usage, stat_options, "r", 1);
+ goto out;
+ } else if (run_count == 0) {
+ forever = true;
+ run_count = 1;
+ }
+
+ if (walltime_run_table) {
+ walltime_run = zalloc(run_count * sizeof(walltime_run[0]));
+ if (!walltime_run) {
+ pr_err("failed to setup -r option");
+ goto out;
+ }
+ }
+
+ if ((stat_config.aggr_mode == AGGR_THREAD) &&
+ !target__has_task(&target)) {
+ if (!target.system_wide || target.cpu_list) {
+ fprintf(stderr, "The --per-thread option is only "
+ "available when monitoring via -p -t -a "
+ "options or only --per-thread.\n");
+ parse_options_usage(NULL, stat_options, "p", 1);
+ parse_options_usage(NULL, stat_options, "t", 1);
+ goto out;
+ }
+ }
+
+ /*
+ * no_aggr, cgroup are for system-wide only
+ * --per-thread is aggregated per thread, we dont mix it with cpu mode
+ */
+ if (((stat_config.aggr_mode != AGGR_GLOBAL &&
+ stat_config.aggr_mode != AGGR_THREAD) || nr_cgroups) &&
+ !target__has_cpu(&target)) {
+ fprintf(stderr, "both cgroup and no-aggregation "
+ "modes only available in system-wide mode\n");
+
+ parse_options_usage(stat_usage, stat_options, "G", 1);
+ parse_options_usage(NULL, stat_options, "A", 1);
+ parse_options_usage(NULL, stat_options, "a", 1);
+ goto out;
+ }
+
+ if (add_default_attributes())
+ goto out;
+
+ target__validate(&target);
+
+ if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide))
+ target.per_thread = true;
+
+ if (perf_evlist__create_maps(evsel_list, &target) < 0) {
+ if (target__has_task(&target)) {
+ pr_err("Problems finding threads of monitor\n");
+ parse_options_usage(stat_usage, stat_options, "p", 1);
+ parse_options_usage(NULL, stat_options, "t", 1);
+ } else if (target__has_cpu(&target)) {
+ perror("failed to parse CPUs map");
+ parse_options_usage(stat_usage, stat_options, "C", 1);
+ parse_options_usage(NULL, stat_options, "a", 1);
+ }
+ goto out;
+ }
+
+ /*
+ * Initialize thread_map with comm names,
+ * so we could print it out on output.
+ */
+ if (stat_config.aggr_mode == AGGR_THREAD) {
+ thread_map__read_comms(evsel_list->threads);
+ if (target.system_wide) {
+ if (runtime_stat_new(&stat_config,
+ thread_map__nr(evsel_list->threads))) {
+ goto out;
+ }
+ }
+ }
+
+ if (stat_config.times && interval)
+ interval_count = true;
+ else if (stat_config.times && !interval) {
+ pr_err("interval-count option should be used together with "
+ "interval-print.\n");
+ parse_options_usage(stat_usage, stat_options, "interval-count", 0);
+ parse_options_usage(stat_usage, stat_options, "I", 1);
+ goto out;
+ }
+
+ if (timeout && timeout < 100) {
+ if (timeout < 10) {
+ pr_err("timeout must be >= 10ms.\n");
+ parse_options_usage(stat_usage, stat_options, "timeout", 0);
+ goto out;
+ } else
+ pr_warning("timeout < 100ms. "
+ "The overhead percentage could be high in some cases. "
+ "Please proceed with caution.\n");
+ }
+ if (timeout && interval) {
+ pr_err("timeout option is not supported with interval-print.\n");
+ parse_options_usage(stat_usage, stat_options, "timeout", 0);
+ parse_options_usage(stat_usage, stat_options, "I", 1);
+ goto out;
+ }
+
+ if (perf_evlist__alloc_stats(evsel_list, interval))
+ goto out;
+
+ if (perf_stat_init_aggr_mode())
+ goto out;
+
+ /*
+ * We dont want to block the signals - that would cause
+ * child tasks to inherit that and Ctrl-C would not work.
+ * What we want is for Ctrl-C to work in the exec()-ed
+ * task, but being ignored by perf stat itself:
+ */
+ atexit(sig_atexit);
+ if (!forever)
+ signal(SIGINT, skip_signal);
+ signal(SIGCHLD, skip_signal);
+ signal(SIGALRM, skip_signal);
+ signal(SIGABRT, skip_signal);
+
+ status = 0;
+ for (run_idx = 0; forever || run_idx < run_count; run_idx++) {
+ if (run_count != 1 && verbose > 0)
+ fprintf(output, "[ perf stat: executing run #%d ... ]\n",
+ run_idx + 1);
+
+ if (run_idx != 0)
+ perf_evlist__reset_prev_raw_counts(evsel_list);
+
+ status = run_perf_stat(argc, argv, run_idx);
+ if (forever && status != -1 && !interval) {
+ print_counters(NULL, argc, argv);
+ perf_stat__reset_stats();
+ }
+ }
+
+ if (!forever && status != -1 && !interval)
+ print_counters(NULL, argc, argv);
+
+ if (STAT_RECORD) {
+ /*
+ * We synthesize the kernel mmap record just so that older tools
+ * don't emit warnings about not being able to resolve symbols
+ * due to /proc/sys/kernel/kptr_restrict settings and instear provide
+ * a saner message about no samples being in the perf.data file.
+ *
+ * This also serves to suppress a warning about f_header.data.size == 0
+ * in header.c at the moment 'perf stat record' gets introduced, which
+ * is not really needed once we start adding the stat specific PERF_RECORD_
+ * records, but the need to suppress the kptr_restrict messages in older
+ * tools remain -acme
+ */
+ int fd = perf_data__fd(&perf_stat.data);
+ int err = perf_event__synthesize_kernel_mmap((void *)&perf_stat,
+ process_synthesized_event,
+ &perf_stat.session->machines.host);
+ if (err) {
+ pr_warning("Couldn't synthesize the kernel mmap record, harmless, "
+ "older tools may produce warnings about this file\n.");
+ }
+
+ if (!interval) {
+ if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
+ pr_err("failed to write stat round event\n");
+ }
+
+ if (!perf_stat.data.is_pipe) {
+ perf_stat.session->header.data_size += perf_stat.bytes_written;
+ perf_session__write_header(perf_stat.session, evsel_list, fd, true);
+ }
+
+ perf_session__delete(perf_stat.session);
+ }
+
+ perf_stat__exit_aggr_mode();
+ perf_evlist__free_stats(evsel_list);
+out:
+ free(walltime_run);
+
+ if (smi_cost && smi_reset)
+ sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
+
+ perf_evlist__delete(evsel_list);
+
+ runtime_stat_delete(&stat_config);
+
+ return status;
+}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
new file mode 100644
index 000000000..775b99833
--- /dev/null
+++ b/tools/perf/builtin-timechart.c
@@ -0,0 +1,2018 @@
+/*
+ * builtin-timechart.c - make an svg timechart of system activity
+ *
+ * (C) Copyright 2009 Intel Corporation
+ *
+ * Authors:
+ * Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <traceevent/event-parse.h>
+
+#include "builtin.h"
+
+#include "util/util.h"
+
+#include "util/color.h"
+#include <linux/list.h>
+#include "util/cache.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/time64.h>
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/callchain.h"
+
+#include "perf.h"
+#include "util/header.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-events.h"
+#include "util/event.h"
+#include "util/session.h"
+#include "util/svghelper.h"
+#include "util/tool.h"
+#include "util/data.h"
+#include "util/debug.h"
+
+#ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE
+FILE *open_memstream(char **ptr, size_t *sizeloc);
+#endif
+
+#define SUPPORT_OLD_POWER_EVENTS 1
+#define PWR_EVENT_EXIT -1
+
+struct per_pid;
+struct power_event;
+struct wake_event;
+
+struct timechart {
+ struct perf_tool tool;
+ struct per_pid *all_data;
+ struct power_event *power_events;
+ struct wake_event *wake_events;
+ int proc_num;
+ unsigned int numcpus;
+ u64 min_freq, /* Lowest CPU frequency seen */
+ max_freq, /* Highest CPU frequency seen */
+ turbo_frequency,
+ first_time, last_time;
+ bool power_only,
+ tasks_only,
+ with_backtrace,
+ topology;
+ bool force;
+ /* IO related settings */
+ bool io_only,
+ skip_eagain;
+ u64 io_events;
+ u64 min_time,
+ merge_dist;
+};
+
+struct per_pidcomm;
+struct cpu_sample;
+struct io_sample;
+
+/*
+ * Datastructure layout:
+ * We keep an list of "pid"s, matching the kernels notion of a task struct.
+ * Each "pid" entry, has a list of "comm"s.
+ * this is because we want to track different programs different, while
+ * exec will reuse the original pid (by design).
+ * Each comm has a list of samples that will be used to draw
+ * final graph.
+ */
+
+struct per_pid {
+ struct per_pid *next;
+
+ int pid;
+ int ppid;
+
+ u64 start_time;
+ u64 end_time;
+ u64 total_time;
+ u64 total_bytes;
+ int display;
+
+ struct per_pidcomm *all;
+ struct per_pidcomm *current;
+};
+
+
+struct per_pidcomm {
+ struct per_pidcomm *next;
+
+ u64 start_time;
+ u64 end_time;
+ u64 total_time;
+ u64 max_bytes;
+ u64 total_bytes;
+
+ int Y;
+ int display;
+
+ long state;
+ u64 state_since;
+
+ char *comm;
+
+ struct cpu_sample *samples;
+ struct io_sample *io_samples;
+};
+
+struct sample_wrapper {
+ struct sample_wrapper *next;
+
+ u64 timestamp;
+ unsigned char data[0];
+};
+
+#define TYPE_NONE 0
+#define TYPE_RUNNING 1
+#define TYPE_WAITING 2
+#define TYPE_BLOCKED 3
+
+struct cpu_sample {
+ struct cpu_sample *next;
+
+ u64 start_time;
+ u64 end_time;
+ int type;
+ int cpu;
+ const char *backtrace;
+};
+
+enum {
+ IOTYPE_READ,
+ IOTYPE_WRITE,
+ IOTYPE_SYNC,
+ IOTYPE_TX,
+ IOTYPE_RX,
+ IOTYPE_POLL,
+};
+
+struct io_sample {
+ struct io_sample *next;
+
+ u64 start_time;
+ u64 end_time;
+ u64 bytes;
+ int type;
+ int fd;
+ int err;
+ int merges;
+};
+
+#define CSTATE 1
+#define PSTATE 2
+
+struct power_event {
+ struct power_event *next;
+ int type;
+ int state;
+ u64 start_time;
+ u64 end_time;
+ int cpu;
+};
+
+struct wake_event {
+ struct wake_event *next;
+ int waker;
+ int wakee;
+ u64 time;
+ const char *backtrace;
+};
+
+struct process_filter {
+ char *name;
+ int pid;
+ struct process_filter *next;
+};
+
+static struct process_filter *process_filter;
+
+
+static struct per_pid *find_create_pid(struct timechart *tchart, int pid)
+{
+ struct per_pid *cursor = tchart->all_data;
+
+ while (cursor) {
+ if (cursor->pid == pid)
+ return cursor;
+ cursor = cursor->next;
+ }
+ cursor = zalloc(sizeof(*cursor));
+ assert(cursor != NULL);
+ cursor->pid = pid;
+ cursor->next = tchart->all_data;
+ tchart->all_data = cursor;
+ return cursor;
+}
+
+static void pid_set_comm(struct timechart *tchart, int pid, char *comm)
+{
+ struct per_pid *p;
+ struct per_pidcomm *c;
+ p = find_create_pid(tchart, pid);
+ c = p->all;
+ while (c) {
+ if (c->comm && strcmp(c->comm, comm) == 0) {
+ p->current = c;
+ return;
+ }
+ if (!c->comm) {
+ c->comm = strdup(comm);
+ p->current = c;
+ return;
+ }
+ c = c->next;
+ }
+ c = zalloc(sizeof(*c));
+ assert(c != NULL);
+ c->comm = strdup(comm);
+ p->current = c;
+ c->next = p->all;
+ p->all = c;
+}
+
+static void pid_fork(struct timechart *tchart, int pid, int ppid, u64 timestamp)
+{
+ struct per_pid *p, *pp;
+ p = find_create_pid(tchart, pid);
+ pp = find_create_pid(tchart, ppid);
+ p->ppid = ppid;
+ if (pp->current && pp->current->comm && !p->current)
+ pid_set_comm(tchart, pid, pp->current->comm);
+
+ p->start_time = timestamp;
+ if (p->current && !p->current->start_time) {
+ p->current->start_time = timestamp;
+ p->current->state_since = timestamp;
+ }
+}
+
+static void pid_exit(struct timechart *tchart, int pid, u64 timestamp)
+{
+ struct per_pid *p;
+ p = find_create_pid(tchart, pid);
+ p->end_time = timestamp;
+ if (p->current)
+ p->current->end_time = timestamp;
+}
+
+static void pid_put_sample(struct timechart *tchart, int pid, int type,
+ unsigned int cpu, u64 start, u64 end,
+ const char *backtrace)
+{
+ struct per_pid *p;
+ struct per_pidcomm *c;
+ struct cpu_sample *sample;
+
+ p = find_create_pid(tchart, pid);
+ c = p->current;
+ if (!c) {
+ c = zalloc(sizeof(*c));
+ assert(c != NULL);
+ p->current = c;
+ c->next = p->all;
+ p->all = c;
+ }
+
+ sample = zalloc(sizeof(*sample));
+ assert(sample != NULL);
+ sample->start_time = start;
+ sample->end_time = end;
+ sample->type = type;
+ sample->next = c->samples;
+ sample->cpu = cpu;
+ sample->backtrace = backtrace;
+ c->samples = sample;
+
+ if (sample->type == TYPE_RUNNING && end > start && start > 0) {
+ c->total_time += (end-start);
+ p->total_time += (end-start);
+ }
+
+ if (c->start_time == 0 || c->start_time > start)
+ c->start_time = start;
+ if (p->start_time == 0 || p->start_time > start)
+ p->start_time = start;
+}
+
+#define MAX_CPUS 4096
+
+static u64 cpus_cstate_start_times[MAX_CPUS];
+static int cpus_cstate_state[MAX_CPUS];
+static u64 cpus_pstate_start_times[MAX_CPUS];
+static u64 cpus_pstate_state[MAX_CPUS];
+
+static int process_comm_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct timechart *tchart = container_of(tool, struct timechart, tool);
+ pid_set_comm(tchart, event->comm.tid, event->comm.comm);
+ return 0;
+}
+
+static int process_fork_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct timechart *tchart = container_of(tool, struct timechart, tool);
+ pid_fork(tchart, event->fork.pid, event->fork.ppid, event->fork.time);
+ return 0;
+}
+
+static int process_exit_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct timechart *tchart = container_of(tool, struct timechart, tool);
+ pid_exit(tchart, event->fork.pid, event->fork.time);
+ return 0;
+}
+
+#ifdef SUPPORT_OLD_POWER_EVENTS
+static int use_old_power_events;
+#endif
+
+static void c_state_start(int cpu, u64 timestamp, int state)
+{
+ cpus_cstate_start_times[cpu] = timestamp;
+ cpus_cstate_state[cpu] = state;
+}
+
+static void c_state_end(struct timechart *tchart, int cpu, u64 timestamp)
+{
+ struct power_event *pwr = zalloc(sizeof(*pwr));
+
+ if (!pwr)
+ return;
+
+ pwr->state = cpus_cstate_state[cpu];
+ pwr->start_time = cpus_cstate_start_times[cpu];
+ pwr->end_time = timestamp;
+ pwr->cpu = cpu;
+ pwr->type = CSTATE;
+ pwr->next = tchart->power_events;
+
+ tchart->power_events = pwr;
+}
+
+static void p_state_change(struct timechart *tchart, int cpu, u64 timestamp, u64 new_freq)
+{
+ struct power_event *pwr;
+
+ if (new_freq > 8000000) /* detect invalid data */
+ return;
+
+ pwr = zalloc(sizeof(*pwr));
+ if (!pwr)
+ return;
+
+ pwr->state = cpus_pstate_state[cpu];
+ pwr->start_time = cpus_pstate_start_times[cpu];
+ pwr->end_time = timestamp;
+ pwr->cpu = cpu;
+ pwr->type = PSTATE;
+ pwr->next = tchart->power_events;
+
+ if (!pwr->start_time)
+ pwr->start_time = tchart->first_time;
+
+ tchart->power_events = pwr;
+
+ cpus_pstate_state[cpu] = new_freq;
+ cpus_pstate_start_times[cpu] = timestamp;
+
+ if ((u64)new_freq > tchart->max_freq)
+ tchart->max_freq = new_freq;
+
+ if (new_freq < tchart->min_freq || tchart->min_freq == 0)
+ tchart->min_freq = new_freq;
+
+ if (new_freq == tchart->max_freq - 1000)
+ tchart->turbo_frequency = tchart->max_freq;
+}
+
+static void sched_wakeup(struct timechart *tchart, int cpu, u64 timestamp,
+ int waker, int wakee, u8 flags, const char *backtrace)
+{
+ struct per_pid *p;
+ struct wake_event *we = zalloc(sizeof(*we));
+
+ if (!we)
+ return;
+
+ we->time = timestamp;
+ we->waker = waker;
+ we->backtrace = backtrace;
+
+ if ((flags & TRACE_FLAG_HARDIRQ) || (flags & TRACE_FLAG_SOFTIRQ))
+ we->waker = -1;
+
+ we->wakee = wakee;
+ we->next = tchart->wake_events;
+ tchart->wake_events = we;
+ p = find_create_pid(tchart, we->wakee);
+
+ if (p && p->current && p->current->state == TYPE_NONE) {
+ p->current->state_since = timestamp;
+ p->current->state = TYPE_WAITING;
+ }
+ if (p && p->current && p->current->state == TYPE_BLOCKED) {
+ pid_put_sample(tchart, p->pid, p->current->state, cpu,
+ p->current->state_since, timestamp, NULL);
+ p->current->state_since = timestamp;
+ p->current->state = TYPE_WAITING;
+ }
+}
+
+static void sched_switch(struct timechart *tchart, int cpu, u64 timestamp,
+ int prev_pid, int next_pid, u64 prev_state,
+ const char *backtrace)
+{
+ struct per_pid *p = NULL, *prev_p;
+
+ prev_p = find_create_pid(tchart, prev_pid);
+
+ p = find_create_pid(tchart, next_pid);
+
+ if (prev_p->current && prev_p->current->state != TYPE_NONE)
+ pid_put_sample(tchart, prev_pid, TYPE_RUNNING, cpu,
+ prev_p->current->state_since, timestamp,
+ backtrace);
+ if (p && p->current) {
+ if (p->current->state != TYPE_NONE)
+ pid_put_sample(tchart, next_pid, p->current->state, cpu,
+ p->current->state_since, timestamp,
+ backtrace);
+
+ p->current->state_since = timestamp;
+ p->current->state = TYPE_RUNNING;
+ }
+
+ if (prev_p->current) {
+ prev_p->current->state = TYPE_NONE;
+ prev_p->current->state_since = timestamp;
+ if (prev_state & 2)
+ prev_p->current->state = TYPE_BLOCKED;
+ if (prev_state == 0)
+ prev_p->current->state = TYPE_WAITING;
+ }
+}
+
+static const char *cat_backtrace(union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct addr_location al;
+ unsigned int i;
+ char *p = NULL;
+ size_t p_len;
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ struct addr_location tal;
+ struct ip_callchain *chain = sample->callchain;
+ FILE *f = open_memstream(&p, &p_len);
+
+ if (!f) {
+ perror("open_memstream error");
+ return NULL;
+ }
+
+ if (!chain)
+ goto exit;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ fprintf(stderr, "problem processing %d event, skipping it.\n",
+ event->header.type);
+ goto exit;
+ }
+
+ for (i = 0; i < chain->nr; i++) {
+ u64 ip;
+
+ if (callchain_param.order == ORDER_CALLEE)
+ ip = chain->ips[i];
+ else
+ ip = chain->ips[chain->nr - i - 1];
+
+ if (ip >= PERF_CONTEXT_MAX) {
+ switch (ip) {
+ case PERF_CONTEXT_HV:
+ cpumode = PERF_RECORD_MISC_HYPERVISOR;
+ break;
+ case PERF_CONTEXT_KERNEL:
+ cpumode = PERF_RECORD_MISC_KERNEL;
+ break;
+ case PERF_CONTEXT_USER:
+ cpumode = PERF_RECORD_MISC_USER;
+ break;
+ default:
+ pr_debug("invalid callchain context: "
+ "%"PRId64"\n", (s64) ip);
+
+ /*
+ * It seems the callchain is corrupted.
+ * Discard all.
+ */
+ zfree(&p);
+ goto exit_put;
+ }
+ continue;
+ }
+
+ tal.filtered = 0;
+ if (thread__find_symbol(al.thread, cpumode, ip, &tal))
+ fprintf(f, "..... %016" PRIx64 " %s\n", ip, tal.sym->name);
+ else
+ fprintf(f, "..... %016" PRIx64 "\n", ip);
+ }
+exit_put:
+ addr_location__put(&al);
+exit:
+ fclose(f);
+
+ return p;
+}
+
+typedef int (*tracepoint_handler)(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ const char *backtrace);
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct timechart *tchart = container_of(tool, struct timechart, tool);
+
+ if (evsel->attr.sample_type & PERF_SAMPLE_TIME) {
+ if (!tchart->first_time || tchart->first_time > sample->time)
+ tchart->first_time = sample->time;
+ if (tchart->last_time < sample->time)
+ tchart->last_time = sample->time;
+ }
+
+ if (evsel->handler != NULL) {
+ tracepoint_handler f = evsel->handler;
+ return f(tchart, evsel, sample,
+ cat_backtrace(event, sample, machine));
+ }
+
+ return 0;
+}
+
+static int
+process_sample_cpu_idle(struct timechart *tchart __maybe_unused,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ const char *backtrace __maybe_unused)
+{
+ u32 state = perf_evsel__intval(evsel, sample, "state");
+ u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+
+ if (state == (u32)PWR_EVENT_EXIT)
+ c_state_end(tchart, cpu_id, sample->time);
+ else
+ c_state_start(cpu_id, sample->time, state);
+ return 0;
+}
+
+static int
+process_sample_cpu_frequency(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ const char *backtrace __maybe_unused)
+{
+ u32 state = perf_evsel__intval(evsel, sample, "state");
+ u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+
+ p_state_change(tchart, cpu_id, sample->time, state);
+ return 0;
+}
+
+static int
+process_sample_sched_wakeup(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ const char *backtrace)
+{
+ u8 flags = perf_evsel__intval(evsel, sample, "common_flags");
+ int waker = perf_evsel__intval(evsel, sample, "common_pid");
+ int wakee = perf_evsel__intval(evsel, sample, "pid");
+
+ sched_wakeup(tchart, sample->cpu, sample->time, waker, wakee, flags, backtrace);
+ return 0;
+}
+
+static int
+process_sample_sched_switch(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ const char *backtrace)
+{
+ int prev_pid = perf_evsel__intval(evsel, sample, "prev_pid");
+ int next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+ u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+
+ sched_switch(tchart, sample->cpu, sample->time, prev_pid, next_pid,
+ prev_state, backtrace);
+ return 0;
+}
+
+#ifdef SUPPORT_OLD_POWER_EVENTS
+static int
+process_sample_power_start(struct timechart *tchart __maybe_unused,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ const char *backtrace __maybe_unused)
+{
+ u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+ u64 value = perf_evsel__intval(evsel, sample, "value");
+
+ c_state_start(cpu_id, sample->time, value);
+ return 0;
+}
+
+static int
+process_sample_power_end(struct timechart *tchart,
+ struct perf_evsel *evsel __maybe_unused,
+ struct perf_sample *sample,
+ const char *backtrace __maybe_unused)
+{
+ c_state_end(tchart, sample->cpu, sample->time);
+ return 0;
+}
+
+static int
+process_sample_power_frequency(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ const char *backtrace __maybe_unused)
+{
+ u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+ u64 value = perf_evsel__intval(evsel, sample, "value");
+
+ p_state_change(tchart, cpu_id, sample->time, value);
+ return 0;
+}
+#endif /* SUPPORT_OLD_POWER_EVENTS */
+
+/*
+ * After the last sample we need to wrap up the current C/P state
+ * and close out each CPU for these.
+ */
+static void end_sample_processing(struct timechart *tchart)
+{
+ u64 cpu;
+ struct power_event *pwr;
+
+ for (cpu = 0; cpu <= tchart->numcpus; cpu++) {
+ /* C state */
+#if 0
+ pwr = zalloc(sizeof(*pwr));
+ if (!pwr)
+ return;
+
+ pwr->state = cpus_cstate_state[cpu];
+ pwr->start_time = cpus_cstate_start_times[cpu];
+ pwr->end_time = tchart->last_time;
+ pwr->cpu = cpu;
+ pwr->type = CSTATE;
+ pwr->next = tchart->power_events;
+
+ tchart->power_events = pwr;
+#endif
+ /* P state */
+
+ pwr = zalloc(sizeof(*pwr));
+ if (!pwr)
+ return;
+
+ pwr->state = cpus_pstate_state[cpu];
+ pwr->start_time = cpus_pstate_start_times[cpu];
+ pwr->end_time = tchart->last_time;
+ pwr->cpu = cpu;
+ pwr->type = PSTATE;
+ pwr->next = tchart->power_events;
+
+ if (!pwr->start_time)
+ pwr->start_time = tchart->first_time;
+ if (!pwr->state)
+ pwr->state = tchart->min_freq;
+ tchart->power_events = pwr;
+ }
+}
+
+static int pid_begin_io_sample(struct timechart *tchart, int pid, int type,
+ u64 start, int fd)
+{
+ struct per_pid *p = find_create_pid(tchart, pid);
+ struct per_pidcomm *c = p->current;
+ struct io_sample *sample;
+ struct io_sample *prev;
+
+ if (!c) {
+ c = zalloc(sizeof(*c));
+ if (!c)
+ return -ENOMEM;
+ p->current = c;
+ c->next = p->all;
+ p->all = c;
+ }
+
+ prev = c->io_samples;
+
+ if (prev && prev->start_time && !prev->end_time) {
+ pr_warning("Skip invalid start event: "
+ "previous event already started!\n");
+
+ /* remove previous event that has been started,
+ * we are not sure we will ever get an end for it */
+ c->io_samples = prev->next;
+ free(prev);
+ return 0;
+ }
+
+ sample = zalloc(sizeof(*sample));
+ if (!sample)
+ return -ENOMEM;
+ sample->start_time = start;
+ sample->type = type;
+ sample->fd = fd;
+ sample->next = c->io_samples;
+ c->io_samples = sample;
+
+ if (c->start_time == 0 || c->start_time > start)
+ c->start_time = start;
+
+ return 0;
+}
+
+static int pid_end_io_sample(struct timechart *tchart, int pid, int type,
+ u64 end, long ret)
+{
+ struct per_pid *p = find_create_pid(tchart, pid);
+ struct per_pidcomm *c = p->current;
+ struct io_sample *sample, *prev;
+
+ if (!c) {
+ pr_warning("Invalid pidcomm!\n");
+ return -1;
+ }
+
+ sample = c->io_samples;
+
+ if (!sample) /* skip partially captured events */
+ return 0;
+
+ if (sample->end_time) {
+ pr_warning("Skip invalid end event: "
+ "previous event already ended!\n");
+ return 0;
+ }
+
+ if (sample->type != type) {
+ pr_warning("Skip invalid end event: invalid event type!\n");
+ return 0;
+ }
+
+ sample->end_time = end;
+ prev = sample->next;
+
+ /* we want to be able to see small and fast transfers, so make them
+ * at least min_time long, but don't overlap them */
+ if (sample->end_time - sample->start_time < tchart->min_time)
+ sample->end_time = sample->start_time + tchart->min_time;
+ if (prev && sample->start_time < prev->end_time) {
+ if (prev->err) /* try to make errors more visible */
+ sample->start_time = prev->end_time;
+ else
+ prev->end_time = sample->start_time;
+ }
+
+ if (ret < 0) {
+ sample->err = ret;
+ } else if (type == IOTYPE_READ || type == IOTYPE_WRITE ||
+ type == IOTYPE_TX || type == IOTYPE_RX) {
+
+ if ((u64)ret > c->max_bytes)
+ c->max_bytes = ret;
+
+ c->total_bytes += ret;
+ p->total_bytes += ret;
+ sample->bytes = ret;
+ }
+
+ /* merge two requests to make svg smaller and render-friendly */
+ if (prev &&
+ prev->type == sample->type &&
+ prev->err == sample->err &&
+ prev->fd == sample->fd &&
+ prev->end_time + tchart->merge_dist >= sample->start_time) {
+
+ sample->bytes += prev->bytes;
+ sample->merges += prev->merges + 1;
+
+ sample->start_time = prev->start_time;
+ sample->next = prev->next;
+ free(prev);
+
+ if (!sample->err && sample->bytes > c->max_bytes)
+ c->max_bytes = sample->bytes;
+ }
+
+ tchart->io_events++;
+
+ return 0;
+}
+
+static int
+process_enter_read(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ long fd = perf_evsel__intval(evsel, sample, "fd");
+ return pid_begin_io_sample(tchart, sample->tid, IOTYPE_READ,
+ sample->time, fd);
+}
+
+static int
+process_exit_read(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ long ret = perf_evsel__intval(evsel, sample, "ret");
+ return pid_end_io_sample(tchart, sample->tid, IOTYPE_READ,
+ sample->time, ret);
+}
+
+static int
+process_enter_write(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ long fd = perf_evsel__intval(evsel, sample, "fd");
+ return pid_begin_io_sample(tchart, sample->tid, IOTYPE_WRITE,
+ sample->time, fd);
+}
+
+static int
+process_exit_write(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ long ret = perf_evsel__intval(evsel, sample, "ret");
+ return pid_end_io_sample(tchart, sample->tid, IOTYPE_WRITE,
+ sample->time, ret);
+}
+
+static int
+process_enter_sync(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ long fd = perf_evsel__intval(evsel, sample, "fd");
+ return pid_begin_io_sample(tchart, sample->tid, IOTYPE_SYNC,
+ sample->time, fd);
+}
+
+static int
+process_exit_sync(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ long ret = perf_evsel__intval(evsel, sample, "ret");
+ return pid_end_io_sample(tchart, sample->tid, IOTYPE_SYNC,
+ sample->time, ret);
+}
+
+static int
+process_enter_tx(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ long fd = perf_evsel__intval(evsel, sample, "fd");
+ return pid_begin_io_sample(tchart, sample->tid, IOTYPE_TX,
+ sample->time, fd);
+}
+
+static int
+process_exit_tx(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ long ret = perf_evsel__intval(evsel, sample, "ret");
+ return pid_end_io_sample(tchart, sample->tid, IOTYPE_TX,
+ sample->time, ret);
+}
+
+static int
+process_enter_rx(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ long fd = perf_evsel__intval(evsel, sample, "fd");
+ return pid_begin_io_sample(tchart, sample->tid, IOTYPE_RX,
+ sample->time, fd);
+}
+
+static int
+process_exit_rx(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ long ret = perf_evsel__intval(evsel, sample, "ret");
+ return pid_end_io_sample(tchart, sample->tid, IOTYPE_RX,
+ sample->time, ret);
+}
+
+static int
+process_enter_poll(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ long fd = perf_evsel__intval(evsel, sample, "fd");
+ return pid_begin_io_sample(tchart, sample->tid, IOTYPE_POLL,
+ sample->time, fd);
+}
+
+static int
+process_exit_poll(struct timechart *tchart,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ long ret = perf_evsel__intval(evsel, sample, "ret");
+ return pid_end_io_sample(tchart, sample->tid, IOTYPE_POLL,
+ sample->time, ret);
+}
+
+/*
+ * Sort the pid datastructure
+ */
+static void sort_pids(struct timechart *tchart)
+{
+ struct per_pid *new_list, *p, *cursor, *prev;
+ /* sort by ppid first, then by pid, lowest to highest */
+
+ new_list = NULL;
+
+ while (tchart->all_data) {
+ p = tchart->all_data;
+ tchart->all_data = p->next;
+ p->next = NULL;
+
+ if (new_list == NULL) {
+ new_list = p;
+ p->next = NULL;
+ continue;
+ }
+ prev = NULL;
+ cursor = new_list;
+ while (cursor) {
+ if (cursor->ppid > p->ppid ||
+ (cursor->ppid == p->ppid && cursor->pid > p->pid)) {
+ /* must insert before */
+ if (prev) {
+ p->next = prev->next;
+ prev->next = p;
+ cursor = NULL;
+ continue;
+ } else {
+ p->next = new_list;
+ new_list = p;
+ cursor = NULL;
+ continue;
+ }
+ }
+
+ prev = cursor;
+ cursor = cursor->next;
+ if (!cursor)
+ prev->next = p;
+ }
+ }
+ tchart->all_data = new_list;
+}
+
+
+static void draw_c_p_states(struct timechart *tchart)
+{
+ struct power_event *pwr;
+ pwr = tchart->power_events;
+
+ /*
+ * two pass drawing so that the P state bars are on top of the C state blocks
+ */
+ while (pwr) {
+ if (pwr->type == CSTATE)
+ svg_cstate(pwr->cpu, pwr->start_time, pwr->end_time, pwr->state);
+ pwr = pwr->next;
+ }
+
+ pwr = tchart->power_events;
+ while (pwr) {
+ if (pwr->type == PSTATE) {
+ if (!pwr->state)
+ pwr->state = tchart->min_freq;
+ svg_pstate(pwr->cpu, pwr->start_time, pwr->end_time, pwr->state);
+ }
+ pwr = pwr->next;
+ }
+}
+
+static void draw_wakeups(struct timechart *tchart)
+{
+ struct wake_event *we;
+ struct per_pid *p;
+ struct per_pidcomm *c;
+
+ we = tchart->wake_events;
+ while (we) {
+ int from = 0, to = 0;
+ char *task_from = NULL, *task_to = NULL;
+
+ /* locate the column of the waker and wakee */
+ p = tchart->all_data;
+ while (p) {
+ if (p->pid == we->waker || p->pid == we->wakee) {
+ c = p->all;
+ while (c) {
+ if (c->Y && c->start_time <= we->time && c->end_time >= we->time) {
+ if (p->pid == we->waker && !from) {
+ from = c->Y;
+ task_from = strdup(c->comm);
+ }
+ if (p->pid == we->wakee && !to) {
+ to = c->Y;
+ task_to = strdup(c->comm);
+ }
+ }
+ c = c->next;
+ }
+ c = p->all;
+ while (c) {
+ if (p->pid == we->waker && !from) {
+ from = c->Y;
+ task_from = strdup(c->comm);
+ }
+ if (p->pid == we->wakee && !to) {
+ to = c->Y;
+ task_to = strdup(c->comm);
+ }
+ c = c->next;
+ }
+ }
+ p = p->next;
+ }
+
+ if (!task_from) {
+ task_from = malloc(40);
+ sprintf(task_from, "[%i]", we->waker);
+ }
+ if (!task_to) {
+ task_to = malloc(40);
+ sprintf(task_to, "[%i]", we->wakee);
+ }
+
+ if (we->waker == -1)
+ svg_interrupt(we->time, to, we->backtrace);
+ else if (from && to && abs(from - to) == 1)
+ svg_wakeline(we->time, from, to, we->backtrace);
+ else
+ svg_partial_wakeline(we->time, from, task_from, to,
+ task_to, we->backtrace);
+ we = we->next;
+
+ free(task_from);
+ free(task_to);
+ }
+}
+
+static void draw_cpu_usage(struct timechart *tchart)
+{
+ struct per_pid *p;
+ struct per_pidcomm *c;
+ struct cpu_sample *sample;
+ p = tchart->all_data;
+ while (p) {
+ c = p->all;
+ while (c) {
+ sample = c->samples;
+ while (sample) {
+ if (sample->type == TYPE_RUNNING) {
+ svg_process(sample->cpu,
+ sample->start_time,
+ sample->end_time,
+ p->pid,
+ c->comm,
+ sample->backtrace);
+ }
+
+ sample = sample->next;
+ }
+ c = c->next;
+ }
+ p = p->next;
+ }
+}
+
+static void draw_io_bars(struct timechart *tchart)
+{
+ const char *suf;
+ double bytes;
+ char comm[256];
+ struct per_pid *p;
+ struct per_pidcomm *c;
+ struct io_sample *sample;
+ int Y = 1;
+
+ p = tchart->all_data;
+ while (p) {
+ c = p->all;
+ while (c) {
+ if (!c->display) {
+ c->Y = 0;
+ c = c->next;
+ continue;
+ }
+
+ svg_box(Y, c->start_time, c->end_time, "process3");
+ sample = c->io_samples;
+ for (sample = c->io_samples; sample; sample = sample->next) {
+ double h = (double)sample->bytes / c->max_bytes;
+
+ if (tchart->skip_eagain &&
+ sample->err == -EAGAIN)
+ continue;
+
+ if (sample->err)
+ h = 1;
+
+ if (sample->type == IOTYPE_SYNC)
+ svg_fbox(Y,
+ sample->start_time,
+ sample->end_time,
+ 1,
+ sample->err ? "error" : "sync",
+ sample->fd,
+ sample->err,
+ sample->merges);
+ else if (sample->type == IOTYPE_POLL)
+ svg_fbox(Y,
+ sample->start_time,
+ sample->end_time,
+ 1,
+ sample->err ? "error" : "poll",
+ sample->fd,
+ sample->err,
+ sample->merges);
+ else if (sample->type == IOTYPE_READ)
+ svg_ubox(Y,
+ sample->start_time,
+ sample->end_time,
+ h,
+ sample->err ? "error" : "disk",
+ sample->fd,
+ sample->err,
+ sample->merges);
+ else if (sample->type == IOTYPE_WRITE)
+ svg_lbox(Y,
+ sample->start_time,
+ sample->end_time,
+ h,
+ sample->err ? "error" : "disk",
+ sample->fd,
+ sample->err,
+ sample->merges);
+ else if (sample->type == IOTYPE_RX)
+ svg_ubox(Y,
+ sample->start_time,
+ sample->end_time,
+ h,
+ sample->err ? "error" : "net",
+ sample->fd,
+ sample->err,
+ sample->merges);
+ else if (sample->type == IOTYPE_TX)
+ svg_lbox(Y,
+ sample->start_time,
+ sample->end_time,
+ h,
+ sample->err ? "error" : "net",
+ sample->fd,
+ sample->err,
+ sample->merges);
+ }
+
+ suf = "";
+ bytes = c->total_bytes;
+ if (bytes > 1024) {
+ bytes = bytes / 1024;
+ suf = "K";
+ }
+ if (bytes > 1024) {
+ bytes = bytes / 1024;
+ suf = "M";
+ }
+ if (bytes > 1024) {
+ bytes = bytes / 1024;
+ suf = "G";
+ }
+
+
+ sprintf(comm, "%s:%i (%3.1f %sbytes)", c->comm ?: "", p->pid, bytes, suf);
+ svg_text(Y, c->start_time, comm);
+
+ c->Y = Y;
+ Y++;
+ c = c->next;
+ }
+ p = p->next;
+ }
+}
+
+static void draw_process_bars(struct timechart *tchart)
+{
+ struct per_pid *p;
+ struct per_pidcomm *c;
+ struct cpu_sample *sample;
+ int Y = 0;
+
+ Y = 2 * tchart->numcpus + 2;
+
+ p = tchart->all_data;
+ while (p) {
+ c = p->all;
+ while (c) {
+ if (!c->display) {
+ c->Y = 0;
+ c = c->next;
+ continue;
+ }
+
+ svg_box(Y, c->start_time, c->end_time, "process");
+ sample = c->samples;
+ while (sample) {
+ if (sample->type == TYPE_RUNNING)
+ svg_running(Y, sample->cpu,
+ sample->start_time,
+ sample->end_time,
+ sample->backtrace);
+ if (sample->type == TYPE_BLOCKED)
+ svg_blocked(Y, sample->cpu,
+ sample->start_time,
+ sample->end_time,
+ sample->backtrace);
+ if (sample->type == TYPE_WAITING)
+ svg_waiting(Y, sample->cpu,
+ sample->start_time,
+ sample->end_time,
+ sample->backtrace);
+ sample = sample->next;
+ }
+
+ if (c->comm) {
+ char comm[256];
+ if (c->total_time > 5000000000) /* 5 seconds */
+ sprintf(comm, "%s:%i (%2.2fs)", c->comm, p->pid, c->total_time / (double)NSEC_PER_SEC);
+ else
+ sprintf(comm, "%s:%i (%3.1fms)", c->comm, p->pid, c->total_time / (double)NSEC_PER_MSEC);
+
+ svg_text(Y, c->start_time, comm);
+ }
+ c->Y = Y;
+ Y++;
+ c = c->next;
+ }
+ p = p->next;
+ }
+}
+
+static void add_process_filter(const char *string)
+{
+ int pid = strtoull(string, NULL, 10);
+ struct process_filter *filt = malloc(sizeof(*filt));
+
+ if (!filt)
+ return;
+
+ filt->name = strdup(string);
+ filt->pid = pid;
+ filt->next = process_filter;
+
+ process_filter = filt;
+}
+
+static int passes_filter(struct per_pid *p, struct per_pidcomm *c)
+{
+ struct process_filter *filt;
+ if (!process_filter)
+ return 1;
+
+ filt = process_filter;
+ while (filt) {
+ if (filt->pid && p->pid == filt->pid)
+ return 1;
+ if (strcmp(filt->name, c->comm) == 0)
+ return 1;
+ filt = filt->next;
+ }
+ return 0;
+}
+
+static int determine_display_tasks_filtered(struct timechart *tchart)
+{
+ struct per_pid *p;
+ struct per_pidcomm *c;
+ int count = 0;
+
+ p = tchart->all_data;
+ while (p) {
+ p->display = 0;
+ if (p->start_time == 1)
+ p->start_time = tchart->first_time;
+
+ /* no exit marker, task kept running to the end */
+ if (p->end_time == 0)
+ p->end_time = tchart->last_time;
+
+ c = p->all;
+
+ while (c) {
+ c->display = 0;
+
+ if (c->start_time == 1)
+ c->start_time = tchart->first_time;
+
+ if (passes_filter(p, c)) {
+ c->display = 1;
+ p->display = 1;
+ count++;
+ }
+
+ if (c->end_time == 0)
+ c->end_time = tchart->last_time;
+
+ c = c->next;
+ }
+ p = p->next;
+ }
+ return count;
+}
+
+static int determine_display_tasks(struct timechart *tchart, u64 threshold)
+{
+ struct per_pid *p;
+ struct per_pidcomm *c;
+ int count = 0;
+
+ p = tchart->all_data;
+ while (p) {
+ p->display = 0;
+ if (p->start_time == 1)
+ p->start_time = tchart->first_time;
+
+ /* no exit marker, task kept running to the end */
+ if (p->end_time == 0)
+ p->end_time = tchart->last_time;
+ if (p->total_time >= threshold)
+ p->display = 1;
+
+ c = p->all;
+
+ while (c) {
+ c->display = 0;
+
+ if (c->start_time == 1)
+ c->start_time = tchart->first_time;
+
+ if (c->total_time >= threshold) {
+ c->display = 1;
+ count++;
+ }
+
+ if (c->end_time == 0)
+ c->end_time = tchart->last_time;
+
+ c = c->next;
+ }
+ p = p->next;
+ }
+ return count;
+}
+
+static int determine_display_io_tasks(struct timechart *timechart, u64 threshold)
+{
+ struct per_pid *p;
+ struct per_pidcomm *c;
+ int count = 0;
+
+ p = timechart->all_data;
+ while (p) {
+ /* no exit marker, task kept running to the end */
+ if (p->end_time == 0)
+ p->end_time = timechart->last_time;
+
+ c = p->all;
+
+ while (c) {
+ c->display = 0;
+
+ if (c->total_bytes >= threshold) {
+ c->display = 1;
+ count++;
+ }
+
+ if (c->end_time == 0)
+ c->end_time = timechart->last_time;
+
+ c = c->next;
+ }
+ p = p->next;
+ }
+ return count;
+}
+
+#define BYTES_THRESH (1 * 1024 * 1024)
+#define TIME_THRESH 10000000
+
+static void write_svg_file(struct timechart *tchart, const char *filename)
+{
+ u64 i;
+ int count;
+ int thresh = tchart->io_events ? BYTES_THRESH : TIME_THRESH;
+
+ if (tchart->power_only)
+ tchart->proc_num = 0;
+
+ /* We'd like to show at least proc_num tasks;
+ * be less picky if we have fewer */
+ do {
+ if (process_filter)
+ count = determine_display_tasks_filtered(tchart);
+ else if (tchart->io_events)
+ count = determine_display_io_tasks(tchart, thresh);
+ else
+ count = determine_display_tasks(tchart, thresh);
+ thresh /= 10;
+ } while (!process_filter && thresh && count < tchart->proc_num);
+
+ if (!tchart->proc_num)
+ count = 0;
+
+ if (tchart->io_events) {
+ open_svg(filename, 0, count, tchart->first_time, tchart->last_time);
+
+ svg_time_grid(0.5);
+ svg_io_legenda();
+
+ draw_io_bars(tchart);
+ } else {
+ open_svg(filename, tchart->numcpus, count, tchart->first_time, tchart->last_time);
+
+ svg_time_grid(0);
+
+ svg_legenda();
+
+ for (i = 0; i < tchart->numcpus; i++)
+ svg_cpu_box(i, tchart->max_freq, tchart->turbo_frequency);
+
+ draw_cpu_usage(tchart);
+ if (tchart->proc_num)
+ draw_process_bars(tchart);
+ if (!tchart->tasks_only)
+ draw_c_p_states(tchart);
+ if (tchart->proc_num)
+ draw_wakeups(tchart);
+ }
+
+ svg_close();
+}
+
+static int process_header(struct perf_file_section *section __maybe_unused,
+ struct perf_header *ph,
+ int feat,
+ int fd __maybe_unused,
+ void *data)
+{
+ struct timechart *tchart = data;
+
+ switch (feat) {
+ case HEADER_NRCPUS:
+ tchart->numcpus = ph->env.nr_cpus_avail;
+ break;
+
+ case HEADER_CPU_TOPOLOGY:
+ if (!tchart->topology)
+ break;
+
+ if (svg_build_topology_map(ph->env.sibling_cores,
+ ph->env.nr_sibling_cores,
+ ph->env.sibling_threads,
+ ph->env.nr_sibling_threads))
+ fprintf(stderr, "problem building topology\n");
+ break;
+
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int __cmd_timechart(struct timechart *tchart, const char *output_name)
+{
+ const struct perf_evsel_str_handler power_tracepoints[] = {
+ { "power:cpu_idle", process_sample_cpu_idle },
+ { "power:cpu_frequency", process_sample_cpu_frequency },
+ { "sched:sched_wakeup", process_sample_sched_wakeup },
+ { "sched:sched_switch", process_sample_sched_switch },
+#ifdef SUPPORT_OLD_POWER_EVENTS
+ { "power:power_start", process_sample_power_start },
+ { "power:power_end", process_sample_power_end },
+ { "power:power_frequency", process_sample_power_frequency },
+#endif
+
+ { "syscalls:sys_enter_read", process_enter_read },
+ { "syscalls:sys_enter_pread64", process_enter_read },
+ { "syscalls:sys_enter_readv", process_enter_read },
+ { "syscalls:sys_enter_preadv", process_enter_read },
+ { "syscalls:sys_enter_write", process_enter_write },
+ { "syscalls:sys_enter_pwrite64", process_enter_write },
+ { "syscalls:sys_enter_writev", process_enter_write },
+ { "syscalls:sys_enter_pwritev", process_enter_write },
+ { "syscalls:sys_enter_sync", process_enter_sync },
+ { "syscalls:sys_enter_sync_file_range", process_enter_sync },
+ { "syscalls:sys_enter_fsync", process_enter_sync },
+ { "syscalls:sys_enter_msync", process_enter_sync },
+ { "syscalls:sys_enter_recvfrom", process_enter_rx },
+ { "syscalls:sys_enter_recvmmsg", process_enter_rx },
+ { "syscalls:sys_enter_recvmsg", process_enter_rx },
+ { "syscalls:sys_enter_sendto", process_enter_tx },
+ { "syscalls:sys_enter_sendmsg", process_enter_tx },
+ { "syscalls:sys_enter_sendmmsg", process_enter_tx },
+ { "syscalls:sys_enter_epoll_pwait", process_enter_poll },
+ { "syscalls:sys_enter_epoll_wait", process_enter_poll },
+ { "syscalls:sys_enter_poll", process_enter_poll },
+ { "syscalls:sys_enter_ppoll", process_enter_poll },
+ { "syscalls:sys_enter_pselect6", process_enter_poll },
+ { "syscalls:sys_enter_select", process_enter_poll },
+
+ { "syscalls:sys_exit_read", process_exit_read },
+ { "syscalls:sys_exit_pread64", process_exit_read },
+ { "syscalls:sys_exit_readv", process_exit_read },
+ { "syscalls:sys_exit_preadv", process_exit_read },
+ { "syscalls:sys_exit_write", process_exit_write },
+ { "syscalls:sys_exit_pwrite64", process_exit_write },
+ { "syscalls:sys_exit_writev", process_exit_write },
+ { "syscalls:sys_exit_pwritev", process_exit_write },
+ { "syscalls:sys_exit_sync", process_exit_sync },
+ { "syscalls:sys_exit_sync_file_range", process_exit_sync },
+ { "syscalls:sys_exit_fsync", process_exit_sync },
+ { "syscalls:sys_exit_msync", process_exit_sync },
+ { "syscalls:sys_exit_recvfrom", process_exit_rx },
+ { "syscalls:sys_exit_recvmmsg", process_exit_rx },
+ { "syscalls:sys_exit_recvmsg", process_exit_rx },
+ { "syscalls:sys_exit_sendto", process_exit_tx },
+ { "syscalls:sys_exit_sendmsg", process_exit_tx },
+ { "syscalls:sys_exit_sendmmsg", process_exit_tx },
+ { "syscalls:sys_exit_epoll_pwait", process_exit_poll },
+ { "syscalls:sys_exit_epoll_wait", process_exit_poll },
+ { "syscalls:sys_exit_poll", process_exit_poll },
+ { "syscalls:sys_exit_ppoll", process_exit_poll },
+ { "syscalls:sys_exit_pselect6", process_exit_poll },
+ { "syscalls:sys_exit_select", process_exit_poll },
+ };
+ struct perf_data data = {
+ .file = {
+ .path = input_name,
+ },
+ .mode = PERF_DATA_MODE_READ,
+ .force = tchart->force,
+ };
+
+ struct perf_session *session = perf_session__new(&data, false,
+ &tchart->tool);
+ int ret = -EINVAL;
+
+ if (session == NULL)
+ return -1;
+
+ symbol__init(&session->header.env);
+
+ (void)perf_header__process_sections(&session->header,
+ perf_data__fd(session->data),
+ tchart,
+ process_header);
+
+ if (!perf_session__has_traces(session, "timechart record"))
+ goto out_delete;
+
+ if (perf_session__set_tracepoints_handlers(session,
+ power_tracepoints)) {
+ pr_err("Initializing session tracepoint handlers failed\n");
+ goto out_delete;
+ }
+
+ ret = perf_session__process_events(session);
+ if (ret)
+ goto out_delete;
+
+ end_sample_processing(tchart);
+
+ sort_pids(tchart);
+
+ write_svg_file(tchart, output_name);
+
+ pr_info("Written %2.1f seconds of trace to %s.\n",
+ (tchart->last_time - tchart->first_time) / (double)NSEC_PER_SEC, output_name);
+out_delete:
+ perf_session__delete(session);
+ return ret;
+}
+
+static int timechart__io_record(int argc, const char **argv)
+{
+ unsigned int rec_argc, i;
+ const char **rec_argv;
+ const char **p;
+ char *filter = NULL;
+
+ const char * const common_args[] = {
+ "record", "-a", "-R", "-c", "1",
+ };
+ unsigned int common_args_nr = ARRAY_SIZE(common_args);
+
+ const char * const disk_events[] = {
+ "syscalls:sys_enter_read",
+ "syscalls:sys_enter_pread64",
+ "syscalls:sys_enter_readv",
+ "syscalls:sys_enter_preadv",
+ "syscalls:sys_enter_write",
+ "syscalls:sys_enter_pwrite64",
+ "syscalls:sys_enter_writev",
+ "syscalls:sys_enter_pwritev",
+ "syscalls:sys_enter_sync",
+ "syscalls:sys_enter_sync_file_range",
+ "syscalls:sys_enter_fsync",
+ "syscalls:sys_enter_msync",
+
+ "syscalls:sys_exit_read",
+ "syscalls:sys_exit_pread64",
+ "syscalls:sys_exit_readv",
+ "syscalls:sys_exit_preadv",
+ "syscalls:sys_exit_write",
+ "syscalls:sys_exit_pwrite64",
+ "syscalls:sys_exit_writev",
+ "syscalls:sys_exit_pwritev",
+ "syscalls:sys_exit_sync",
+ "syscalls:sys_exit_sync_file_range",
+ "syscalls:sys_exit_fsync",
+ "syscalls:sys_exit_msync",
+ };
+ unsigned int disk_events_nr = ARRAY_SIZE(disk_events);
+
+ const char * const net_events[] = {
+ "syscalls:sys_enter_recvfrom",
+ "syscalls:sys_enter_recvmmsg",
+ "syscalls:sys_enter_recvmsg",
+ "syscalls:sys_enter_sendto",
+ "syscalls:sys_enter_sendmsg",
+ "syscalls:sys_enter_sendmmsg",
+
+ "syscalls:sys_exit_recvfrom",
+ "syscalls:sys_exit_recvmmsg",
+ "syscalls:sys_exit_recvmsg",
+ "syscalls:sys_exit_sendto",
+ "syscalls:sys_exit_sendmsg",
+ "syscalls:sys_exit_sendmmsg",
+ };
+ unsigned int net_events_nr = ARRAY_SIZE(net_events);
+
+ const char * const poll_events[] = {
+ "syscalls:sys_enter_epoll_pwait",
+ "syscalls:sys_enter_epoll_wait",
+ "syscalls:sys_enter_poll",
+ "syscalls:sys_enter_ppoll",
+ "syscalls:sys_enter_pselect6",
+ "syscalls:sys_enter_select",
+
+ "syscalls:sys_exit_epoll_pwait",
+ "syscalls:sys_exit_epoll_wait",
+ "syscalls:sys_exit_poll",
+ "syscalls:sys_exit_ppoll",
+ "syscalls:sys_exit_pselect6",
+ "syscalls:sys_exit_select",
+ };
+ unsigned int poll_events_nr = ARRAY_SIZE(poll_events);
+
+ rec_argc = common_args_nr +
+ disk_events_nr * 4 +
+ net_events_nr * 4 +
+ poll_events_nr * 4 +
+ argc;
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+ if (rec_argv == NULL)
+ return -ENOMEM;
+
+ if (asprintf(&filter, "common_pid != %d", getpid()) < 0) {
+ free(rec_argv);
+ return -ENOMEM;
+ }
+
+ p = rec_argv;
+ for (i = 0; i < common_args_nr; i++)
+ *p++ = strdup(common_args[i]);
+
+ for (i = 0; i < disk_events_nr; i++) {
+ if (!is_valid_tracepoint(disk_events[i])) {
+ rec_argc -= 4;
+ continue;
+ }
+
+ *p++ = "-e";
+ *p++ = strdup(disk_events[i]);
+ *p++ = "--filter";
+ *p++ = filter;
+ }
+ for (i = 0; i < net_events_nr; i++) {
+ if (!is_valid_tracepoint(net_events[i])) {
+ rec_argc -= 4;
+ continue;
+ }
+
+ *p++ = "-e";
+ *p++ = strdup(net_events[i]);
+ *p++ = "--filter";
+ *p++ = filter;
+ }
+ for (i = 0; i < poll_events_nr; i++) {
+ if (!is_valid_tracepoint(poll_events[i])) {
+ rec_argc -= 4;
+ continue;
+ }
+
+ *p++ = "-e";
+ *p++ = strdup(poll_events[i]);
+ *p++ = "--filter";
+ *p++ = filter;
+ }
+
+ for (i = 0; i < (unsigned int)argc; i++)
+ *p++ = argv[i];
+
+ return cmd_record(rec_argc, rec_argv);
+}
+
+
+static int timechart__record(struct timechart *tchart, int argc, const char **argv)
+{
+ unsigned int rec_argc, i, j;
+ const char **rec_argv;
+ const char **p;
+ unsigned int record_elems;
+
+ const char * const common_args[] = {
+ "record", "-a", "-R", "-c", "1",
+ };
+ unsigned int common_args_nr = ARRAY_SIZE(common_args);
+
+ const char * const backtrace_args[] = {
+ "-g",
+ };
+ unsigned int backtrace_args_no = ARRAY_SIZE(backtrace_args);
+
+ const char * const power_args[] = {
+ "-e", "power:cpu_frequency",
+ "-e", "power:cpu_idle",
+ };
+ unsigned int power_args_nr = ARRAY_SIZE(power_args);
+
+ const char * const old_power_args[] = {
+#ifdef SUPPORT_OLD_POWER_EVENTS
+ "-e", "power:power_start",
+ "-e", "power:power_end",
+ "-e", "power:power_frequency",
+#endif
+ };
+ unsigned int old_power_args_nr = ARRAY_SIZE(old_power_args);
+
+ const char * const tasks_args[] = {
+ "-e", "sched:sched_wakeup",
+ "-e", "sched:sched_switch",
+ };
+ unsigned int tasks_args_nr = ARRAY_SIZE(tasks_args);
+
+#ifdef SUPPORT_OLD_POWER_EVENTS
+ if (!is_valid_tracepoint("power:cpu_idle") &&
+ is_valid_tracepoint("power:power_start")) {
+ use_old_power_events = 1;
+ power_args_nr = 0;
+ } else {
+ old_power_args_nr = 0;
+ }
+#endif
+
+ if (tchart->power_only)
+ tasks_args_nr = 0;
+
+ if (tchart->tasks_only) {
+ power_args_nr = 0;
+ old_power_args_nr = 0;
+ }
+
+ if (!tchart->with_backtrace)
+ backtrace_args_no = 0;
+
+ record_elems = common_args_nr + tasks_args_nr +
+ power_args_nr + old_power_args_nr + backtrace_args_no;
+
+ rec_argc = record_elems + argc;
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+ if (rec_argv == NULL)
+ return -ENOMEM;
+
+ p = rec_argv;
+ for (i = 0; i < common_args_nr; i++)
+ *p++ = strdup(common_args[i]);
+
+ for (i = 0; i < backtrace_args_no; i++)
+ *p++ = strdup(backtrace_args[i]);
+
+ for (i = 0; i < tasks_args_nr; i++)
+ *p++ = strdup(tasks_args[i]);
+
+ for (i = 0; i < power_args_nr; i++)
+ *p++ = strdup(power_args[i]);
+
+ for (i = 0; i < old_power_args_nr; i++)
+ *p++ = strdup(old_power_args[i]);
+
+ for (j = 0; j < (unsigned int)argc; j++)
+ *p++ = argv[j];
+
+ return cmd_record(rec_argc, rec_argv);
+}
+
+static int
+parse_process(const struct option *opt __maybe_unused, const char *arg,
+ int __maybe_unused unset)
+{
+ if (arg)
+ add_process_filter(arg);
+ return 0;
+}
+
+static int
+parse_highlight(const struct option *opt __maybe_unused, const char *arg,
+ int __maybe_unused unset)
+{
+ unsigned long duration = strtoul(arg, NULL, 0);
+
+ if (svg_highlight || svg_highlight_name)
+ return -1;
+
+ if (duration)
+ svg_highlight = duration;
+ else
+ svg_highlight_name = strdup(arg);
+
+ return 0;
+}
+
+static int
+parse_time(const struct option *opt, const char *arg, int __maybe_unused unset)
+{
+ char unit = 'n';
+ u64 *value = opt->value;
+
+ if (sscanf(arg, "%" PRIu64 "%cs", value, &unit) > 0) {
+ switch (unit) {
+ case 'm':
+ *value *= NSEC_PER_MSEC;
+ break;
+ case 'u':
+ *value *= NSEC_PER_USEC;
+ break;
+ case 'n':
+ break;
+ default:
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int cmd_timechart(int argc, const char **argv)
+{
+ struct timechart tchart = {
+ .tool = {
+ .comm = process_comm_event,
+ .fork = process_fork_event,
+ .exit = process_exit_event,
+ .sample = process_sample_event,
+ .ordered_events = true,
+ },
+ .proc_num = 15,
+ .min_time = NSEC_PER_MSEC,
+ .merge_dist = 1000,
+ };
+ const char *output_name = "output.svg";
+ const struct option timechart_common_options[] = {
+ OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"),
+ OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, "output processes data only"),
+ OPT_END()
+ };
+ const struct option timechart_options[] = {
+ OPT_STRING('i', "input", &input_name, "file", "input file name"),
+ OPT_STRING('o', "output", &output_name, "file", "output file name"),
+ OPT_INTEGER('w', "width", &svg_page_width, "page width"),
+ OPT_CALLBACK(0, "highlight", NULL, "duration or task name",
+ "highlight tasks. Pass duration in ns or process name.",
+ parse_highlight),
+ OPT_CALLBACK('p', "process", NULL, "process",
+ "process selector. Pass a pid or process name.",
+ parse_process),
+ OPT_CALLBACK(0, "symfs", NULL, "directory",
+ "Look for files with symbols relative to this directory",
+ symbol__config_symfs),
+ OPT_INTEGER('n', "proc-num", &tchart.proc_num,
+ "min. number of tasks to print"),
+ OPT_BOOLEAN('t', "topology", &tchart.topology,
+ "sort CPUs according to topology"),
+ OPT_BOOLEAN(0, "io-skip-eagain", &tchart.skip_eagain,
+ "skip EAGAIN errors"),
+ OPT_CALLBACK(0, "io-min-time", &tchart.min_time, "time",
+ "all IO faster than min-time will visually appear longer",
+ parse_time),
+ OPT_CALLBACK(0, "io-merge-dist", &tchart.merge_dist, "time",
+ "merge events that are merge-dist us apart",
+ parse_time),
+ OPT_BOOLEAN('f', "force", &tchart.force, "don't complain, do it"),
+ OPT_PARENT(timechart_common_options),
+ };
+ const char * const timechart_subcommands[] = { "record", NULL };
+ const char *timechart_usage[] = {
+ "perf timechart [<options>] {record}",
+ NULL
+ };
+ const struct option timechart_record_options[] = {
+ OPT_BOOLEAN('I', "io-only", &tchart.io_only,
+ "record only IO data"),
+ OPT_BOOLEAN('g', "callchain", &tchart.with_backtrace, "record callchain"),
+ OPT_PARENT(timechart_common_options),
+ };
+ const char * const timechart_record_usage[] = {
+ "perf timechart record [<options>]",
+ NULL
+ };
+ argc = parse_options_subcommand(argc, argv, timechart_options, timechart_subcommands,
+ timechart_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (tchart.power_only && tchart.tasks_only) {
+ pr_err("-P and -T options cannot be used at the same time.\n");
+ return -1;
+ }
+
+ if (argc && !strncmp(argv[0], "rec", 3)) {
+ argc = parse_options(argc, argv, timechart_record_options,
+ timechart_record_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (tchart.power_only && tchart.tasks_only) {
+ pr_err("-P and -T options cannot be used at the same time.\n");
+ return -1;
+ }
+
+ if (tchart.io_only)
+ return timechart__io_record(argc, argv);
+ else
+ return timechart__record(&tchart, argc, argv);
+ } else if (argc)
+ usage_with_options(timechart_usage, timechart_options);
+
+ setup_pager();
+
+ return __cmd_timechart(&tchart, output_name);
+}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
new file mode 100644
index 000000000..9caab84c6
--- /dev/null
+++ b/tools/perf/builtin-top.c
@@ -0,0 +1,1518 @@
+/*
+ * builtin-top.c
+ *
+ * Builtin top command: Display a continuously updated profile of
+ * any workload, CPU or specific PID.
+ *
+ * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
+ * 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Improvements and fixes by:
+ *
+ * Arjan van de Ven <arjan@linux.intel.com>
+ * Yanmin Zhang <yanmin.zhang@intel.com>
+ * Wu Fengguang <fengguang.wu@intel.com>
+ * Mike Galbraith <efault@gmx.de>
+ * Paul Mackerras <paulus@samba.org>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include "builtin.h"
+
+#include "perf.h"
+
+#include "util/annotate.h"
+#include "util/config.h"
+#include "util/color.h"
+#include "util/drv_configs.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/event.h"
+#include "util/machine.h"
+#include "util/session.h"
+#include "util/symbol.h"
+#include "util/thread.h"
+#include "util/thread_map.h"
+#include "util/top.h"
+#include <linux/rbtree.h>
+#include <subcmd/parse-options.h>
+#include "util/parse-events.h"
+#include "util/cpumap.h"
+#include "util/xyarray.h"
+#include "util/sort.h"
+#include "util/term.h"
+#include "util/intlist.h"
+#include "util/parse-branch-options.h"
+#include "arch/common.h"
+
+#include "util/debug.h"
+
+#include <assert.h>
+#include <elf.h>
+#include <fcntl.h>
+
+#include <stdio.h>
+#include <termios.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include <errno.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <poll.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/mman.h>
+
+#include <linux/stringify.h>
+#include <linux/time64.h>
+#include <linux/types.h>
+
+#include "sane_ctype.h"
+
+static volatile int done;
+static volatile int resize;
+
+#define HEADER_LINE_NR 5
+
+static void perf_top__update_print_entries(struct perf_top *top)
+{
+ top->print_entries = top->winsize.ws_row - HEADER_LINE_NR;
+}
+
+static void winch_sig(int sig __maybe_unused)
+{
+ resize = 1;
+}
+
+static void perf_top__resize(struct perf_top *top)
+{
+ get_term_dimensions(&top->winsize);
+ perf_top__update_print_entries(top);
+}
+
+static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he)
+{
+ struct perf_evsel *evsel;
+ struct symbol *sym;
+ struct annotation *notes;
+ struct map *map;
+ int err = -1;
+
+ if (!he || !he->ms.sym)
+ return -1;
+
+ evsel = hists_to_evsel(he->hists);
+
+ sym = he->ms.sym;
+ map = he->ms.map;
+
+ /*
+ * We can't annotate with just /proc/kallsyms
+ */
+ if (map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+ !dso__is_kcore(map->dso)) {
+ pr_err("Can't annotate %s: No vmlinux file was found in the "
+ "path\n", sym->name);
+ sleep(1);
+ return -1;
+ }
+
+ notes = symbol__annotation(sym);
+ pthread_mutex_lock(&notes->lock);
+
+ if (!symbol__hists(sym, top->evlist->nr_entries)) {
+ pthread_mutex_unlock(&notes->lock);
+ pr_err("Not enough memory for annotating '%s' symbol!\n",
+ sym->name);
+ sleep(1);
+ return err;
+ }
+
+ err = symbol__annotate(sym, map, evsel, 0, &top->annotation_opts, NULL);
+ if (err == 0) {
+ top->sym_filter_entry = he;
+ } else {
+ char msg[BUFSIZ];
+ symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
+ pr_err("Couldn't annotate %s: %s\n", sym->name, msg);
+ }
+
+ pthread_mutex_unlock(&notes->lock);
+ return err;
+}
+
+static void __zero_source_counters(struct hist_entry *he)
+{
+ struct symbol *sym = he->ms.sym;
+ symbol__annotate_zero_histograms(sym);
+}
+
+static void ui__warn_map_erange(struct map *map, struct symbol *sym, u64 ip)
+{
+ struct utsname uts;
+ int err = uname(&uts);
+
+ ui__warning("Out of bounds address found:\n\n"
+ "Addr: %" PRIx64 "\n"
+ "DSO: %s %c\n"
+ "Map: %" PRIx64 "-%" PRIx64 "\n"
+ "Symbol: %" PRIx64 "-%" PRIx64 " %c %s\n"
+ "Arch: %s\n"
+ "Kernel: %s\n"
+ "Tools: %s\n\n"
+ "Not all samples will be on the annotation output.\n\n"
+ "Please report to linux-kernel@vger.kernel.org\n",
+ ip, map->dso->long_name, dso__symtab_origin(map->dso),
+ map->start, map->end, sym->start, sym->end,
+ sym->binding == STB_GLOBAL ? 'g' :
+ sym->binding == STB_LOCAL ? 'l' : 'w', sym->name,
+ err ? "[unknown]" : uts.machine,
+ err ? "[unknown]" : uts.release, perf_version_string);
+ if (use_browser <= 0)
+ sleep(5);
+
+ map->erange_warned = true;
+}
+
+static void perf_top__record_precise_ip(struct perf_top *top,
+ struct hist_entry *he,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel, u64 ip)
+{
+ struct annotation *notes;
+ struct symbol *sym = he->ms.sym;
+ int err = 0;
+
+ if (sym == NULL || (use_browser == 0 &&
+ (top->sym_filter_entry == NULL ||
+ top->sym_filter_entry->ms.sym != sym)))
+ return;
+
+ notes = symbol__annotation(sym);
+
+ if (pthread_mutex_trylock(&notes->lock))
+ return;
+
+ err = hist_entry__inc_addr_samples(he, sample, evsel, ip);
+
+ pthread_mutex_unlock(&notes->lock);
+
+ if (unlikely(err)) {
+ /*
+ * This function is now called with he->hists->lock held.
+ * Release it before going to sleep.
+ */
+ pthread_mutex_unlock(&he->hists->lock);
+
+ if (err == -ERANGE && !he->ms.map->erange_warned)
+ ui__warn_map_erange(he->ms.map, sym, ip);
+ else if (err == -ENOMEM) {
+ pr_err("Not enough memory for annotating '%s' symbol!\n",
+ sym->name);
+ sleep(1);
+ }
+
+ pthread_mutex_lock(&he->hists->lock);
+ }
+}
+
+static void perf_top__show_details(struct perf_top *top)
+{
+ struct hist_entry *he = top->sym_filter_entry;
+ struct perf_evsel *evsel;
+ struct annotation *notes;
+ struct symbol *symbol;
+ int more;
+
+ if (!he)
+ return;
+
+ evsel = hists_to_evsel(he->hists);
+
+ symbol = he->ms.sym;
+ notes = symbol__annotation(symbol);
+
+ pthread_mutex_lock(&notes->lock);
+
+ symbol__calc_percent(symbol, evsel);
+
+ if (notes->src == NULL)
+ goto out_unlock;
+
+ printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
+ printf(" Events Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt);
+
+ more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel, &top->annotation_opts);
+
+ if (top->evlist->enabled) {
+ if (top->zero)
+ symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
+ else
+ symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
+ }
+ if (more != 0)
+ printf("%d lines not displayed, maybe increase display entries [e]\n", more);
+out_unlock:
+ pthread_mutex_unlock(&notes->lock);
+}
+
+static void perf_top__print_sym_table(struct perf_top *top)
+{
+ char bf[160];
+ int printed = 0;
+ const int win_width = top->winsize.ws_col - 1;
+ struct perf_evsel *evsel = top->sym_evsel;
+ struct hists *hists = evsel__hists(evsel);
+
+ puts(CONSOLE_CLEAR);
+
+ perf_top__header_snprintf(top, bf, sizeof(bf));
+ printf("%s\n", bf);
+
+ perf_top__reset_sample_counters(top);
+
+ printf("%-*.*s\n", win_width, win_width, graph_dotted_line);
+
+ if (!top->record_opts.overwrite &&
+ (hists->stats.nr_lost_warned !=
+ hists->stats.nr_events[PERF_RECORD_LOST])) {
+ hists->stats.nr_lost_warned =
+ hists->stats.nr_events[PERF_RECORD_LOST];
+ color_fprintf(stdout, PERF_COLOR_RED,
+ "WARNING: LOST %d chunks, Check IO/CPU overload",
+ hists->stats.nr_lost_warned);
+ ++printed;
+ }
+
+ if (top->sym_filter_entry) {
+ perf_top__show_details(top);
+ return;
+ }
+
+ if (top->evlist->enabled) {
+ if (top->zero) {
+ hists__delete_entries(hists);
+ } else {
+ hists__decay_entries(hists, top->hide_user_symbols,
+ top->hide_kernel_symbols);
+ }
+ }
+
+ hists__collapse_resort(hists, NULL);
+ perf_evsel__output_resort(evsel, NULL);
+
+ hists__output_recalc_col_len(hists, top->print_entries - printed);
+ putchar('\n');
+ hists__fprintf(hists, false, top->print_entries - printed, win_width,
+ top->min_percent, stdout, !symbol_conf.use_callchain);
+}
+
+static void prompt_integer(int *target, const char *msg)
+{
+ char *buf = malloc(0), *p;
+ size_t dummy = 0;
+ int tmp;
+
+ fprintf(stdout, "\n%s: ", msg);
+ if (getline(&buf, &dummy, stdin) < 0)
+ return;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = 0;
+
+ p = buf;
+ while(*p) {
+ if (!isdigit(*p))
+ goto out_free;
+ p++;
+ }
+ tmp = strtoul(buf, NULL, 10);
+ *target = tmp;
+out_free:
+ free(buf);
+}
+
+static void prompt_percent(int *target, const char *msg)
+{
+ int tmp = 0;
+
+ prompt_integer(&tmp, msg);
+ if (tmp >= 0 && tmp <= 100)
+ *target = tmp;
+}
+
+static void perf_top__prompt_symbol(struct perf_top *top, const char *msg)
+{
+ char *buf = malloc(0), *p;
+ struct hist_entry *syme = top->sym_filter_entry, *n, *found = NULL;
+ struct hists *hists = evsel__hists(top->sym_evsel);
+ struct rb_node *next;
+ size_t dummy = 0;
+
+ /* zero counters of active symbol */
+ if (syme) {
+ __zero_source_counters(syme);
+ top->sym_filter_entry = NULL;
+ }
+
+ fprintf(stdout, "\n%s: ", msg);
+ if (getline(&buf, &dummy, stdin) < 0)
+ goto out_free;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = 0;
+
+ next = rb_first(&hists->entries);
+ while (next) {
+ n = rb_entry(next, struct hist_entry, rb_node);
+ if (n->ms.sym && !strcmp(buf, n->ms.sym->name)) {
+ found = n;
+ break;
+ }
+ next = rb_next(&n->rb_node);
+ }
+
+ if (!found) {
+ fprintf(stderr, "Sorry, %s is not active.\n", buf);
+ sleep(1);
+ } else
+ perf_top__parse_source(top, found);
+
+out_free:
+ free(buf);
+}
+
+static void perf_top__print_mapped_keys(struct perf_top *top)
+{
+ char *name = NULL;
+
+ if (top->sym_filter_entry) {
+ struct symbol *sym = top->sym_filter_entry->ms.sym;
+ name = sym->name;
+ }
+
+ fprintf(stdout, "\nMapped keys:\n");
+ fprintf(stdout, "\t[d] display refresh delay. \t(%d)\n", top->delay_secs);
+ fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top->print_entries);
+
+ if (top->evlist->nr_entries > 1)
+ fprintf(stdout, "\t[E] active event counter. \t(%s)\n", perf_evsel__name(top->sym_evsel));
+
+ fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter);
+
+ fprintf(stdout, "\t[F] annotate display filter (percent). \t(%d%%)\n", top->annotation_opts.min_pcnt);
+ fprintf(stdout, "\t[s] annotate symbol. \t(%s)\n", name?: "NULL");
+ fprintf(stdout, "\t[S] stop annotation.\n");
+
+ fprintf(stdout,
+ "\t[K] hide kernel symbols. \t(%s)\n",
+ top->hide_kernel_symbols ? "yes" : "no");
+ fprintf(stdout,
+ "\t[U] hide user symbols. \t(%s)\n",
+ top->hide_user_symbols ? "yes" : "no");
+ fprintf(stdout, "\t[z] toggle sample zeroing. \t(%d)\n", top->zero ? 1 : 0);
+ fprintf(stdout, "\t[qQ] quit.\n");
+}
+
+static int perf_top__key_mapped(struct perf_top *top, int c)
+{
+ switch (c) {
+ case 'd':
+ case 'e':
+ case 'f':
+ case 'z':
+ case 'q':
+ case 'Q':
+ case 'K':
+ case 'U':
+ case 'F':
+ case 's':
+ case 'S':
+ return 1;
+ case 'E':
+ return top->evlist->nr_entries > 1 ? 1 : 0;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static bool perf_top__handle_keypress(struct perf_top *top, int c)
+{
+ bool ret = true;
+
+ if (!perf_top__key_mapped(top, c)) {
+ struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
+ struct termios save;
+
+ perf_top__print_mapped_keys(top);
+ fprintf(stdout, "\nEnter selection, or unmapped key to continue: ");
+ fflush(stdout);
+
+ set_term_quiet_input(&save);
+
+ poll(&stdin_poll, 1, -1);
+ c = getc(stdin);
+
+ tcsetattr(0, TCSAFLUSH, &save);
+ if (!perf_top__key_mapped(top, c))
+ return ret;
+ }
+
+ switch (c) {
+ case 'd':
+ prompt_integer(&top->delay_secs, "Enter display delay");
+ if (top->delay_secs < 1)
+ top->delay_secs = 1;
+ break;
+ case 'e':
+ prompt_integer(&top->print_entries, "Enter display entries (lines)");
+ if (top->print_entries == 0) {
+ perf_top__resize(top);
+ signal(SIGWINCH, winch_sig);
+ } else {
+ signal(SIGWINCH, SIG_DFL);
+ }
+ break;
+ case 'E':
+ if (top->evlist->nr_entries > 1) {
+ /* Select 0 as the default event: */
+ int counter = 0;
+
+ fprintf(stderr, "\nAvailable events:");
+
+ evlist__for_each_entry(top->evlist, top->sym_evsel)
+ fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
+
+ prompt_integer(&counter, "Enter details event counter");
+
+ if (counter >= top->evlist->nr_entries) {
+ top->sym_evsel = perf_evlist__first(top->evlist);
+ fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
+ sleep(1);
+ break;
+ }
+ evlist__for_each_entry(top->evlist, top->sym_evsel)
+ if (top->sym_evsel->idx == counter)
+ break;
+ } else
+ top->sym_evsel = perf_evlist__first(top->evlist);
+ break;
+ case 'f':
+ prompt_integer(&top->count_filter, "Enter display event count filter");
+ break;
+ case 'F':
+ prompt_percent(&top->annotation_opts.min_pcnt,
+ "Enter details display event filter (percent)");
+ break;
+ case 'K':
+ top->hide_kernel_symbols = !top->hide_kernel_symbols;
+ break;
+ case 'q':
+ case 'Q':
+ printf("exiting.\n");
+ if (top->dump_symtab)
+ perf_session__fprintf_dsos(top->session, stderr);
+ ret = false;
+ break;
+ case 's':
+ perf_top__prompt_symbol(top, "Enter details symbol");
+ break;
+ case 'S':
+ if (!top->sym_filter_entry)
+ break;
+ else {
+ struct hist_entry *syme = top->sym_filter_entry;
+
+ top->sym_filter_entry = NULL;
+ __zero_source_counters(syme);
+ }
+ break;
+ case 'U':
+ top->hide_user_symbols = !top->hide_user_symbols;
+ break;
+ case 'z':
+ top->zero = !top->zero;
+ break;
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+static void perf_top__sort_new_samples(void *arg)
+{
+ struct perf_top *t = arg;
+ struct perf_evsel *evsel = t->sym_evsel;
+ struct hists *hists;
+
+ perf_top__reset_sample_counters(t);
+
+ if (t->evlist->selected != NULL)
+ t->sym_evsel = t->evlist->selected;
+
+ hists = evsel__hists(evsel);
+
+ if (t->evlist->enabled) {
+ if (t->zero) {
+ hists__delete_entries(hists);
+ } else {
+ hists__decay_entries(hists, t->hide_user_symbols,
+ t->hide_kernel_symbols);
+ }
+ }
+
+ hists__collapse_resort(hists, NULL);
+ perf_evsel__output_resort(evsel, NULL);
+}
+
+static void *display_thread_tui(void *arg)
+{
+ struct perf_evsel *pos;
+ struct perf_top *top = arg;
+ const char *help = "For a higher level overview, try: perf top --sort comm,dso";
+ struct hist_browser_timer hbt = {
+ .timer = perf_top__sort_new_samples,
+ .arg = top,
+ .refresh = top->delay_secs,
+ };
+
+ /* In order to read symbols from other namespaces perf to needs to call
+ * setns(2). This isn't permitted if the struct_fs has multiple users.
+ * unshare(2) the fs so that we may continue to setns into namespaces
+ * that we're observing.
+ */
+ unshare(CLONE_FS);
+
+ perf_top__sort_new_samples(top);
+
+ /*
+ * Initialize the uid_filter_str, in the future the TUI will allow
+ * Zooming in/out UIDs. For now juse use whatever the user passed
+ * via --uid.
+ */
+ evlist__for_each_entry(top->evlist, pos) {
+ struct hists *hists = evsel__hists(pos);
+ hists->uid_filter_str = top->record_opts.target.uid_str;
+ }
+
+ perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
+ top->min_percent,
+ &top->session->header.env,
+ !top->record_opts.overwrite,
+ &top->annotation_opts);
+
+ done = 1;
+ return NULL;
+}
+
+static void display_sig(int sig __maybe_unused)
+{
+ done = 1;
+}
+
+static void display_setup_sig(void)
+{
+ signal(SIGSEGV, sighandler_dump_stack);
+ signal(SIGFPE, sighandler_dump_stack);
+ signal(SIGINT, display_sig);
+ signal(SIGQUIT, display_sig);
+ signal(SIGTERM, display_sig);
+}
+
+static void *display_thread(void *arg)
+{
+ struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
+ struct termios save;
+ struct perf_top *top = arg;
+ int delay_msecs, c;
+
+ /* In order to read symbols from other namespaces perf to needs to call
+ * setns(2). This isn't permitted if the struct_fs has multiple users.
+ * unshare(2) the fs so that we may continue to setns into namespaces
+ * that we're observing.
+ */
+ unshare(CLONE_FS);
+
+ display_setup_sig();
+ pthread__unblock_sigwinch();
+repeat:
+ delay_msecs = top->delay_secs * MSEC_PER_SEC;
+ set_term_quiet_input(&save);
+ /* trash return*/
+ clearerr(stdin);
+ if (poll(&stdin_poll, 1, 0) > 0)
+ getc(stdin);
+
+ while (!done) {
+ perf_top__print_sym_table(top);
+ /*
+ * Either timeout expired or we got an EINTR due to SIGWINCH,
+ * refresh screen in both cases.
+ */
+ switch (poll(&stdin_poll, 1, delay_msecs)) {
+ case 0:
+ continue;
+ case -1:
+ if (errno == EINTR)
+ continue;
+ __fallthrough;
+ default:
+ c = getc(stdin);
+ tcsetattr(0, TCSAFLUSH, &save);
+
+ if (perf_top__handle_keypress(top, c))
+ goto repeat;
+ done = 1;
+ }
+ }
+
+ tcsetattr(0, TCSAFLUSH, &save);
+ return NULL;
+}
+
+static int hist_iter__top_callback(struct hist_entry_iter *iter,
+ struct addr_location *al, bool single,
+ void *arg)
+{
+ struct perf_top *top = arg;
+ struct hist_entry *he = iter->he;
+ struct perf_evsel *evsel = iter->evsel;
+
+ if (perf_hpp_list.sym && single)
+ perf_top__record_precise_ip(top, he, iter->sample, evsel, al->addr);
+
+ hist__account_cycles(iter->sample->branch_stack, al, iter->sample,
+ !(top->record_opts.branch_stack & PERF_SAMPLE_BRANCH_ANY));
+ return 0;
+}
+
+static void perf_event__process_sample(struct perf_tool *tool,
+ const union perf_event *event,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_top *top = container_of(tool, struct perf_top, tool);
+ struct addr_location al;
+ int err;
+
+ if (!machine && perf_guest) {
+ static struct intlist *seen;
+
+ if (!seen)
+ seen = intlist__new(NULL);
+
+ if (!intlist__has_entry(seen, sample->pid)) {
+ pr_err("Can't find guest [%d]'s kernel information\n",
+ sample->pid);
+ intlist__add(seen, sample->pid);
+ }
+ return;
+ }
+
+ if (!machine) {
+ pr_err("%u unprocessable samples recorded.\r",
+ top->session->evlist->stats.nr_unprocessable_samples++);
+ return;
+ }
+
+ if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
+ top->exact_samples++;
+
+ if (machine__resolve(machine, &al, sample) < 0)
+ return;
+
+ if (!machine->kptr_restrict_warned &&
+ symbol_conf.kptr_restrict &&
+ al.cpumode == PERF_RECORD_MISC_KERNEL) {
+ if (!perf_evlist__exclude_kernel(top->session->evlist)) {
+ ui__warning(
+"Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
+"Check /proc/sys/kernel/kptr_restrict.\n\n"
+"Kernel%s samples will not be resolved.\n",
+ al.map && map__has_symbols(al.map) ?
+ " modules" : "");
+ if (use_browser <= 0)
+ sleep(5);
+ }
+ machine->kptr_restrict_warned = true;
+ }
+
+ if (al.sym == NULL && al.map != NULL) {
+ const char *msg = "Kernel samples will not be resolved.\n";
+ /*
+ * As we do lazy loading of symtabs we only will know if the
+ * specified vmlinux file is invalid when we actually have a
+ * hit in kernel space and then try to load it. So if we get
+ * here and there are _no_ symbols in the DSO backing the
+ * kernel map, bail out.
+ *
+ * We may never get here, for instance, if we use -K/
+ * --hide-kernel-symbols, even if the user specifies an
+ * invalid --vmlinux ;-)
+ */
+ if (!machine->kptr_restrict_warned && !top->vmlinux_warned &&
+ __map__is_kernel(al.map) && map__has_symbols(al.map)) {
+ if (symbol_conf.vmlinux_name) {
+ char serr[256];
+ dso__strerror_load(al.map->dso, serr, sizeof(serr));
+ ui__warning("The %s file can't be used: %s\n%s",
+ symbol_conf.vmlinux_name, serr, msg);
+ } else {
+ ui__warning("A vmlinux file was not found.\n%s",
+ msg);
+ }
+
+ if (use_browser <= 0)
+ sleep(5);
+ top->vmlinux_warned = true;
+ }
+ }
+
+ if (al.sym == NULL || !al.sym->idle) {
+ struct hists *hists = evsel__hists(evsel);
+ struct hist_entry_iter iter = {
+ .evsel = evsel,
+ .sample = sample,
+ .add_entry_cb = hist_iter__top_callback,
+ };
+
+ if (symbol_conf.cumulate_callchain)
+ iter.ops = &hist_iter_cumulative;
+ else
+ iter.ops = &hist_iter_normal;
+
+ pthread_mutex_lock(&hists->lock);
+
+ err = hist_entry_iter__add(&iter, &al, top->max_stack, top);
+ if (err < 0)
+ pr_err("Problem incrementing symbol period, skipping event\n");
+
+ pthread_mutex_unlock(&hists->lock);
+ }
+
+ addr_location__put(&al);
+}
+
+static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
+{
+ struct record_opts *opts = &top->record_opts;
+ struct perf_evlist *evlist = top->evlist;
+ struct perf_sample sample;
+ struct perf_evsel *evsel;
+ struct perf_mmap *md;
+ struct perf_session *session = top->session;
+ union perf_event *event;
+ struct machine *machine;
+ int ret;
+
+ md = opts->overwrite ? &evlist->overwrite_mmap[idx] : &evlist->mmap[idx];
+ if (perf_mmap__read_init(md) < 0)
+ return;
+
+ while ((event = perf_mmap__read_event(md)) != NULL) {
+ ret = perf_evlist__parse_sample(evlist, event, &sample);
+ if (ret) {
+ pr_err("Can't parse sample, err = %d\n", ret);
+ goto next_event;
+ }
+
+ evsel = perf_evlist__id2evsel(session->evlist, sample.id);
+ assert(evsel != NULL);
+
+ if (event->header.type == PERF_RECORD_SAMPLE)
+ ++top->samples;
+
+ switch (sample.cpumode) {
+ case PERF_RECORD_MISC_USER:
+ ++top->us_samples;
+ if (top->hide_user_symbols)
+ goto next_event;
+ machine = &session->machines.host;
+ break;
+ case PERF_RECORD_MISC_KERNEL:
+ ++top->kernel_samples;
+ if (top->hide_kernel_symbols)
+ goto next_event;
+ machine = &session->machines.host;
+ break;
+ case PERF_RECORD_MISC_GUEST_KERNEL:
+ ++top->guest_kernel_samples;
+ machine = perf_session__find_machine(session,
+ sample.pid);
+ break;
+ case PERF_RECORD_MISC_GUEST_USER:
+ ++top->guest_us_samples;
+ /*
+ * TODO: we don't process guest user from host side
+ * except simple counting.
+ */
+ goto next_event;
+ default:
+ if (event->header.type == PERF_RECORD_SAMPLE)
+ goto next_event;
+ machine = &session->machines.host;
+ break;
+ }
+
+
+ if (event->header.type == PERF_RECORD_SAMPLE) {
+ perf_event__process_sample(&top->tool, event, evsel,
+ &sample, machine);
+ } else if (event->header.type < PERF_RECORD_MAX) {
+ hists__inc_nr_events(evsel__hists(evsel), event->header.type);
+ machine__process_event(machine, event, &sample);
+ } else
+ ++session->evlist->stats.nr_unknown_events;
+next_event:
+ perf_mmap__consume(md);
+ }
+
+ perf_mmap__read_done(md);
+}
+
+static void perf_top__mmap_read(struct perf_top *top)
+{
+ bool overwrite = top->record_opts.overwrite;
+ struct perf_evlist *evlist = top->evlist;
+ unsigned long long start, end;
+ int i;
+
+ start = rdclock();
+ if (overwrite)
+ perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_DATA_PENDING);
+
+ for (i = 0; i < top->evlist->nr_mmaps; i++)
+ perf_top__mmap_read_idx(top, i);
+
+ if (overwrite) {
+ perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY);
+ perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+ }
+ end = rdclock();
+
+ if ((end - start) > (unsigned long long)top->delay_secs * NSEC_PER_SEC)
+ ui__warning("Too slow to read ring buffer.\n"
+ "Please try increasing the period (-c) or\n"
+ "decreasing the freq (-F) or\n"
+ "limiting the number of CPUs (-C)\n");
+}
+
+/*
+ * Check per-event overwrite term.
+ * perf top should support consistent term for all events.
+ * - All events don't have per-event term
+ * E.g. "cpu/cpu-cycles/,cpu/instructions/"
+ * Nothing change, return 0.
+ * - All events have same per-event term
+ * E.g. "cpu/cpu-cycles,no-overwrite/,cpu/instructions,no-overwrite/
+ * Using the per-event setting to replace the opts->overwrite if
+ * they are different, then return 0.
+ * - Events have different per-event term
+ * E.g. "cpu/cpu-cycles,overwrite/,cpu/instructions,no-overwrite/"
+ * Return -1
+ * - Some of the event set per-event term, but some not.
+ * E.g. "cpu/cpu-cycles/,cpu/instructions,no-overwrite/"
+ * Return -1
+ */
+static int perf_top__overwrite_check(struct perf_top *top)
+{
+ struct record_opts *opts = &top->record_opts;
+ struct perf_evlist *evlist = top->evlist;
+ struct perf_evsel_config_term *term;
+ struct list_head *config_terms;
+ struct perf_evsel *evsel;
+ int set, overwrite = -1;
+
+ evlist__for_each_entry(evlist, evsel) {
+ set = -1;
+ config_terms = &evsel->config_terms;
+ list_for_each_entry(term, config_terms, list) {
+ if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE)
+ set = term->val.overwrite ? 1 : 0;
+ }
+
+ /* no term for current and previous event (likely) */
+ if ((overwrite < 0) && (set < 0))
+ continue;
+
+ /* has term for both current and previous event, compare */
+ if ((overwrite >= 0) && (set >= 0) && (overwrite != set))
+ return -1;
+
+ /* no term for current event but has term for previous one */
+ if ((overwrite >= 0) && (set < 0))
+ return -1;
+
+ /* has term for current event */
+ if ((overwrite < 0) && (set >= 0)) {
+ /* if it's first event, set overwrite */
+ if (evsel == perf_evlist__first(evlist))
+ overwrite = set;
+ else
+ return -1;
+ }
+ }
+
+ if ((overwrite >= 0) && (opts->overwrite != overwrite))
+ opts->overwrite = overwrite;
+
+ return 0;
+}
+
+static int perf_top_overwrite_fallback(struct perf_top *top,
+ struct perf_evsel *evsel)
+{
+ struct record_opts *opts = &top->record_opts;
+ struct perf_evlist *evlist = top->evlist;
+ struct perf_evsel *counter;
+
+ if (!opts->overwrite)
+ return 0;
+
+ /* only fall back when first event fails */
+ if (evsel != perf_evlist__first(evlist))
+ return 0;
+
+ evlist__for_each_entry(evlist, counter)
+ counter->attr.write_backward = false;
+ opts->overwrite = false;
+ pr_debug2("fall back to non-overwrite mode\n");
+ return 1;
+}
+
+static int perf_top__start_counters(struct perf_top *top)
+{
+ char msg[BUFSIZ];
+ struct perf_evsel *counter;
+ struct perf_evlist *evlist = top->evlist;
+ struct record_opts *opts = &top->record_opts;
+
+ if (perf_top__overwrite_check(top)) {
+ ui__error("perf top only support consistent per-event "
+ "overwrite setting for all events\n");
+ goto out_err;
+ }
+
+ perf_evlist__config(evlist, opts, &callchain_param);
+
+ evlist__for_each_entry(evlist, counter) {
+try_again:
+ if (perf_evsel__open(counter, top->evlist->cpus,
+ top->evlist->threads) < 0) {
+
+ /*
+ * Specially handle overwrite fall back.
+ * Because perf top is the only tool which has
+ * overwrite mode by default, support
+ * both overwrite and non-overwrite mode, and
+ * require consistent mode for all events.
+ *
+ * May move it to generic code with more tools
+ * have similar attribute.
+ */
+ if (perf_missing_features.write_backward &&
+ perf_top_overwrite_fallback(top, counter))
+ goto try_again;
+
+ if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+ if (verbose > 0)
+ ui__warning("%s\n", msg);
+ goto try_again;
+ }
+
+ perf_evsel__open_strerror(counter, &opts->target,
+ errno, msg, sizeof(msg));
+ ui__error("%s\n", msg);
+ goto out_err;
+ }
+ }
+
+ if (perf_evlist__mmap(evlist, opts->mmap_pages) < 0) {
+ ui__error("Failed to mmap with %d (%s)\n",
+ errno, str_error_r(errno, msg, sizeof(msg)));
+ goto out_err;
+ }
+
+ return 0;
+
+out_err:
+ return -1;
+}
+
+static int callchain_param__setup_sample_type(struct callchain_param *callchain)
+{
+ if (!perf_hpp_list.sym) {
+ if (callchain->enabled) {
+ ui__error("Selected -g but \"sym\" not present in --sort/-s.");
+ return -EINVAL;
+ }
+ } else if (callchain->mode != CHAIN_NONE) {
+ if (callchain_register_param(callchain) < 0) {
+ ui__error("Can't register callchain params.\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int __cmd_top(struct perf_top *top)
+{
+ char msg[512];
+ struct perf_evsel *pos;
+ struct perf_evsel_config_term *err_term;
+ struct perf_evlist *evlist = top->evlist;
+ struct record_opts *opts = &top->record_opts;
+ pthread_t thread;
+ int ret;
+
+ top->session = perf_session__new(NULL, false, NULL);
+ if (top->session == NULL)
+ return -1;
+
+ if (!top->annotation_opts.objdump_path) {
+ ret = perf_env__lookup_objdump(&top->session->header.env,
+ &top->annotation_opts.objdump_path);
+ if (ret)
+ goto out_delete;
+ }
+
+ ret = callchain_param__setup_sample_type(&callchain_param);
+ if (ret)
+ goto out_delete;
+
+ if (perf_session__register_idle_thread(top->session) < 0)
+ goto out_delete;
+
+ if (top->nr_threads_synthesize > 1)
+ perf_set_multithreaded();
+
+ machine__synthesize_threads(&top->session->machines.host, &opts->target,
+ top->evlist->threads, false,
+ opts->proc_map_timeout,
+ top->nr_threads_synthesize);
+
+ if (top->nr_threads_synthesize > 1)
+ perf_set_singlethreaded();
+
+ if (perf_hpp_list.socket) {
+ ret = perf_env__read_cpu_topology_map(&perf_env);
+ if (ret < 0)
+ goto out_err_cpu_topo;
+ }
+
+ ret = perf_top__start_counters(top);
+ if (ret)
+ goto out_delete;
+
+ ret = perf_evlist__apply_drv_configs(evlist, &pos, &err_term);
+ if (ret) {
+ pr_err("failed to set config \"%s\" on event %s with %d (%s)\n",
+ err_term->val.drv_cfg, perf_evsel__name(pos), errno,
+ str_error_r(errno, msg, sizeof(msg)));
+ goto out_delete;
+ }
+
+ top->session->evlist = top->evlist;
+ perf_session__set_id_hdr_size(top->session);
+
+ /*
+ * When perf is starting the traced process, all the events (apart from
+ * group members) have enable_on_exec=1 set, so don't spoil it by
+ * prematurely enabling them.
+ *
+ * XXX 'top' still doesn't start workloads like record, trace, but should,
+ * so leave the check here.
+ */
+ if (!target__none(&opts->target))
+ perf_evlist__enable(top->evlist);
+
+ /* Wait for a minimal set of events before starting the snapshot */
+ perf_evlist__poll(top->evlist, 100);
+
+ perf_top__mmap_read(top);
+
+ ret = -1;
+ if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui :
+ display_thread), top)) {
+ ui__error("Could not create display thread.\n");
+ goto out_delete;
+ }
+
+ if (top->realtime_prio) {
+ struct sched_param param;
+
+ param.sched_priority = top->realtime_prio;
+ if (sched_setscheduler(0, SCHED_FIFO, &param)) {
+ ui__error("Could not set realtime priority.\n");
+ goto out_join;
+ }
+ }
+
+ while (!done) {
+ u64 hits = top->samples;
+
+ perf_top__mmap_read(top);
+
+ if (opts->overwrite || (hits == top->samples))
+ ret = perf_evlist__poll(top->evlist, 100);
+
+ if (resize) {
+ perf_top__resize(top);
+ resize = 0;
+ }
+ }
+
+ ret = 0;
+out_join:
+ pthread_join(thread, NULL);
+out_delete:
+ perf_session__delete(top->session);
+ top->session = NULL;
+
+ return ret;
+
+out_err_cpu_topo: {
+ char errbuf[BUFSIZ];
+ const char *err = str_error_r(-ret, errbuf, sizeof(errbuf));
+
+ ui__error("Could not read the CPU topology map: %s\n", err);
+ goto out_delete;
+}
+}
+
+static int
+callchain_opt(const struct option *opt, const char *arg, int unset)
+{
+ symbol_conf.use_callchain = true;
+ return record_callchain_opt(opt, arg, unset);
+}
+
+static int
+parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+{
+ struct callchain_param *callchain = opt->value;
+
+ callchain->enabled = !unset;
+ callchain->record_mode = CALLCHAIN_FP;
+
+ /*
+ * --no-call-graph
+ */
+ if (unset) {
+ symbol_conf.use_callchain = false;
+ callchain->record_mode = CALLCHAIN_NONE;
+ return 0;
+ }
+
+ return parse_callchain_top_opt(arg);
+}
+
+static int perf_top_config(const char *var, const char *value, void *cb __maybe_unused)
+{
+ if (!strcmp(var, "top.call-graph")) {
+ var = "call-graph.record-mode";
+ return perf_default_config(var, value, cb);
+ }
+ if (!strcmp(var, "top.children")) {
+ symbol_conf.cumulate_callchain = perf_config_bool(var, value);
+ return 0;
+ }
+
+ return 0;
+}
+
+static int
+parse_percent_limit(const struct option *opt, const char *arg,
+ int unset __maybe_unused)
+{
+ struct perf_top *top = opt->value;
+
+ top->min_percent = strtof(arg, NULL);
+ return 0;
+}
+
+const char top_callchain_help[] = CALLCHAIN_RECORD_HELP CALLCHAIN_REPORT_HELP
+ "\n\t\t\t\tDefault: fp,graph,0.5,caller,function";
+
+int cmd_top(int argc, const char **argv)
+{
+ char errbuf[BUFSIZ];
+ struct perf_top top = {
+ .count_filter = 5,
+ .delay_secs = 2,
+ .record_opts = {
+ .mmap_pages = UINT_MAX,
+ .user_freq = UINT_MAX,
+ .user_interval = ULLONG_MAX,
+ .freq = 4000, /* 4 KHz */
+ .target = {
+ .uses_mmap = true,
+ },
+ .proc_map_timeout = 500,
+ .overwrite = 1,
+ },
+ .max_stack = sysctl__max_stack(),
+ .annotation_opts = annotation__default_options,
+ .nr_threads_synthesize = UINT_MAX,
+ };
+ struct record_opts *opts = &top.record_opts;
+ struct target *target = &opts->target;
+ const struct option options[] = {
+ OPT_CALLBACK('e', "event", &top.evlist, "event",
+ "event selector. use 'perf list' to list available events",
+ parse_events_option),
+ OPT_U64('c', "count", &opts->user_interval, "event period to sample"),
+ OPT_STRING('p', "pid", &target->pid, "pid",
+ "profile events on existing process id"),
+ OPT_STRING('t', "tid", &target->tid, "tid",
+ "profile events on existing thread id"),
+ OPT_BOOLEAN('a', "all-cpus", &target->system_wide,
+ "system-wide collection from all CPUs"),
+ OPT_STRING('C', "cpu", &target->cpu_list, "cpu",
+ "list of cpus to monitor"),
+ OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
+ OPT_BOOLEAN(0, "ignore-vmlinux", &symbol_conf.ignore_vmlinux,
+ "don't load vmlinux even if found"),
+ OPT_BOOLEAN('K', "hide_kernel_symbols", &top.hide_kernel_symbols,
+ "hide kernel symbols"),
+ OPT_CALLBACK('m', "mmap-pages", &opts->mmap_pages, "pages",
+ "number of mmap data pages",
+ perf_evlist__parse_mmap_pages),
+ OPT_INTEGER('r', "realtime", &top.realtime_prio,
+ "collect data with this RT SCHED_FIFO priority"),
+ OPT_INTEGER('d', "delay", &top.delay_secs,
+ "number of seconds to delay between refreshes"),
+ OPT_BOOLEAN('D', "dump-symtab", &top.dump_symtab,
+ "dump the symbol table used for profiling"),
+ OPT_INTEGER('f', "count-filter", &top.count_filter,
+ "only display functions with more events than this"),
+ OPT_BOOLEAN(0, "group", &opts->group,
+ "put the counters into a counter group"),
+ OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit,
+ "child tasks do not inherit counters"),
+ OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
+ "symbol to annotate"),
+ OPT_BOOLEAN('z', "zero", &top.zero, "zero history across updates"),
+ OPT_CALLBACK('F', "freq", &top.record_opts, "freq or 'max'",
+ "profile at this frequency",
+ record__parse_freq),
+ OPT_INTEGER('E', "entries", &top.print_entries,
+ "display this many functions"),
+ OPT_BOOLEAN('U', "hide_user_symbols", &top.hide_user_symbols,
+ "hide user symbols"),
+ OPT_BOOLEAN(0, "tui", &top.use_tui, "Use the TUI interface"),
+ OPT_BOOLEAN(0, "stdio", &top.use_stdio, "Use the stdio interface"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show counter open errors, etc)"),
+ OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
+ "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+ " Please refer the man page for the complete list."),
+ OPT_STRING(0, "fields", &field_order, "key[,keys...]",
+ "output field(s): overhead, period, sample plus all of sort keys"),
+ OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
+ "Show a column with the number of samples"),
+ OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
+ NULL, "enables call-graph recording and display",
+ &callchain_opt),
+ OPT_CALLBACK(0, "call-graph", &callchain_param,
+ "record_mode[,record_size],print_type,threshold[,print_limit],order,sort_key[,branch]",
+ top_callchain_help, &parse_callchain_opt),
+ OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
+ "Accumulate callchains of children and show total overhead as well"),
+ OPT_INTEGER(0, "max-stack", &top.max_stack,
+ "Set the maximum stack depth when parsing the callchain. "
+ "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
+ OPT_CALLBACK(0, "ignore-callees", NULL, "regex",
+ "ignore callees of these functions in call graphs",
+ report_parse_ignore_callees_opt),
+ OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
+ "Show a column with the sum of periods"),
+ OPT_STRING(0, "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
+ "only consider symbols in these dsos"),
+ OPT_STRING(0, "comms", &symbol_conf.comm_list_str, "comm[,comm...]",
+ "only consider symbols in these comms"),
+ OPT_STRING(0, "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
+ "only consider these symbols"),
+ OPT_BOOLEAN(0, "source", &top.annotation_opts.annotate_src,
+ "Interleave source code with assembly code (default)"),
+ OPT_BOOLEAN(0, "asm-raw", &top.annotation_opts.show_asm_raw,
+ "Display raw encoding of assembly instructions (default)"),
+ OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel,
+ "Enable kernel symbol demangling"),
+ OPT_STRING(0, "objdump", &top.annotation_opts.objdump_path, "path",
+ "objdump binary to use for disassembly and annotations"),
+ OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style",
+ "Specify disassembler style (e.g. -M intel for intel syntax)"),
+ OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
+ OPT_CALLBACK(0, "percent-limit", &top, "percent",
+ "Don't show entries under that percent", parse_percent_limit),
+ OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
+ "How to display percentage of filtered entries", parse_filter_percentage),
+ OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
+ "width[,width...]",
+ "don't try to adjust column width, use these fixed values"),
+ OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
+ "per thread proc mmap processing timeout in ms"),
+ OPT_CALLBACK_NOOPT('b', "branch-any", &opts->branch_stack,
+ "branch any", "sample any taken branches",
+ parse_branch_stack),
+ OPT_CALLBACK('j', "branch-filter", &opts->branch_stack,
+ "branch filter mask", "branch stack filter modes",
+ parse_branch_stack),
+ OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
+ "Show raw trace event output (do not use print fmt or plugins)"),
+ OPT_BOOLEAN(0, "hierarchy", &symbol_conf.report_hierarchy,
+ "Show entries in a hierarchy"),
+ OPT_BOOLEAN(0, "force", &symbol_conf.force, "don't complain, do it"),
+ OPT_UINTEGER(0, "num-thread-synthesize", &top.nr_threads_synthesize,
+ "number of thread to run event synthesize"),
+ OPT_END()
+ };
+ const char * const top_usage[] = {
+ "perf top [<options>]",
+ NULL
+ };
+ int status = hists__init();
+
+ if (status < 0)
+ return status;
+
+ top.annotation_opts.min_pcnt = 5;
+ top.annotation_opts.context = 4;
+
+ top.evlist = perf_evlist__new();
+ if (top.evlist == NULL)
+ return -ENOMEM;
+
+ status = perf_config(perf_top_config, &top);
+ if (status)
+ return status;
+
+ argc = parse_options(argc, argv, options, top_usage, 0);
+ if (argc)
+ usage_with_options(top_usage, options);
+
+ if (!top.evlist->nr_entries &&
+ perf_evlist__add_default(top.evlist) < 0) {
+ pr_err("Not enough memory for event selector list\n");
+ goto out_delete_evlist;
+ }
+
+ if (symbol_conf.report_hierarchy) {
+ /* disable incompatible options */
+ symbol_conf.event_group = false;
+ symbol_conf.cumulate_callchain = false;
+
+ if (field_order) {
+ pr_err("Error: --hierarchy and --fields options cannot be used together\n");
+ parse_options_usage(top_usage, options, "fields", 0);
+ parse_options_usage(NULL, options, "hierarchy", 0);
+ goto out_delete_evlist;
+ }
+ }
+
+ sort__mode = SORT_MODE__TOP;
+ /* display thread wants entries to be collapsed in a different tree */
+ perf_hpp_list.need_collapse = 1;
+
+ if (top.use_stdio)
+ use_browser = 0;
+ else if (top.use_tui)
+ use_browser = 1;
+
+ setup_browser(false);
+
+ if (setup_sorting(top.evlist) < 0) {
+ if (sort_order)
+ parse_options_usage(top_usage, options, "s", 1);
+ if (field_order)
+ parse_options_usage(sort_order ? NULL : top_usage,
+ options, "fields", 0);
+ goto out_delete_evlist;
+ }
+
+ status = target__validate(target);
+ if (status) {
+ target__strerror(target, status, errbuf, BUFSIZ);
+ ui__warning("%s\n", errbuf);
+ }
+
+ status = target__parse_uid(target);
+ if (status) {
+ int saved_errno = errno;
+
+ target__strerror(target, status, errbuf, BUFSIZ);
+ ui__error("%s\n", errbuf);
+
+ status = -saved_errno;
+ goto out_delete_evlist;
+ }
+
+ if (target__none(target))
+ target->system_wide = true;
+
+ if (perf_evlist__create_maps(top.evlist, target) < 0) {
+ ui__error("Couldn't create thread/CPU maps: %s\n",
+ errno == ENOENT ? "No such process" : str_error_r(errno, errbuf, sizeof(errbuf)));
+ goto out_delete_evlist;
+ }
+
+ if (top.delay_secs < 1)
+ top.delay_secs = 1;
+
+ if (record_opts__config(opts)) {
+ status = -EINVAL;
+ goto out_delete_evlist;
+ }
+
+ top.sym_evsel = perf_evlist__first(top.evlist);
+
+ if (!callchain_param.enabled) {
+ symbol_conf.cumulate_callchain = false;
+ perf_hpp__cancel_cumulate();
+ }
+
+ if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
+ callchain_param.order = ORDER_CALLER;
+
+ status = symbol__annotation_init();
+ if (status < 0)
+ goto out_delete_evlist;
+
+ annotation_config__init();
+
+ symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
+ status = symbol__init(NULL);
+ if (status < 0)
+ goto out_delete_evlist;
+
+ sort__setup_elide(stdout);
+
+ get_term_dimensions(&top.winsize);
+ if (top.print_entries == 0) {
+ perf_top__update_print_entries(&top);
+ signal(SIGWINCH, winch_sig);
+ }
+
+ status = __cmd_top(&top);
+
+out_delete_evlist:
+ perf_evlist__delete(top.evlist);
+
+ return status;
+}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
new file mode 100644
index 000000000..3f43aedb3
--- /dev/null
+++ b/tools/perf/builtin-trace.c
@@ -0,0 +1,3387 @@
+/*
+ * builtin-trace.c
+ *
+ * Builtin 'trace' command:
+ *
+ * Display a continuously updated trace of any workload, CPU, specific PID,
+ * system wide, etc. Default format is loosely strace like, but any other
+ * event may be specified using --event.
+ *
+ * Copyright (C) 2012, 2013, 2014, 2015 Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Initially based on the 'trace' prototype by Thomas Gleixner:
+ *
+ * http://lwn.net/Articles/415728/ ("Announcing a new utility: 'trace'")
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <traceevent/event-parse.h>
+#include <api/fs/tracing_path.h>
+#include "builtin.h"
+#include "util/cgroup.h"
+#include "util/color.h"
+#include "util/debug.h"
+#include "util/env.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include <subcmd/exec-cmd.h>
+#include "util/machine.h"
+#include "util/path.h"
+#include "util/session.h"
+#include "util/thread.h"
+#include <subcmd/parse-options.h>
+#include "util/strlist.h"
+#include "util/intlist.h"
+#include "util/thread_map.h"
+#include "util/stat.h"
+#include "trace/beauty/beauty.h"
+#include "trace-event.h"
+#include "util/parse-events.h"
+#include "util/bpf-loader.h"
+#include "callchain.h"
+#include "print_binary.h"
+#include "string2.h"
+#include "syscalltbl.h"
+#include "rb_resort.h"
+
+#include <errno.h>
+#include <inttypes.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/err.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+#include <linux/random.h>
+#include <linux/stringify.h>
+#include <linux/time64.h>
+#include <fcntl.h>
+
+#include "sane_ctype.h"
+
+#ifndef O_CLOEXEC
+# define O_CLOEXEC 02000000
+#endif
+
+#ifndef F_LINUX_SPECIFIC_BASE
+# define F_LINUX_SPECIFIC_BASE 1024
+#endif
+
+struct trace {
+ struct perf_tool tool;
+ struct syscalltbl *sctbl;
+ struct {
+ int max;
+ struct syscall *table;
+ struct {
+ struct perf_evsel *sys_enter,
+ *sys_exit,
+ *augmented;
+ } events;
+ } syscalls;
+ struct record_opts opts;
+ struct perf_evlist *evlist;
+ struct machine *host;
+ struct thread *current;
+ struct cgroup *cgroup;
+ u64 base_time;
+ FILE *output;
+ unsigned long nr_events;
+ struct strlist *ev_qualifier;
+ struct {
+ size_t nr;
+ int *entries;
+ } ev_qualifier_ids;
+ struct {
+ size_t nr;
+ pid_t *entries;
+ } filter_pids;
+ double duration_filter;
+ double runtime_ms;
+ struct {
+ u64 vfs_getname,
+ proc_getname;
+ } stats;
+ unsigned int max_stack;
+ unsigned int min_stack;
+ bool not_ev_qualifier;
+ bool live;
+ bool full_time;
+ bool sched;
+ bool multiple_threads;
+ bool summary;
+ bool summary_only;
+ bool failure_only;
+ bool show_comm;
+ bool print_sample;
+ bool show_tool_stats;
+ bool trace_syscalls;
+ bool kernel_syscallchains;
+ bool force;
+ bool vfs_getname;
+ int trace_pgfaults;
+};
+
+struct tp_field {
+ int offset;
+ union {
+ u64 (*integer)(struct tp_field *field, struct perf_sample *sample);
+ void *(*pointer)(struct tp_field *field, struct perf_sample *sample);
+ };
+};
+
+#define TP_UINT_FIELD(bits) \
+static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \
+{ \
+ u##bits value; \
+ memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
+ return value; \
+}
+
+TP_UINT_FIELD(8);
+TP_UINT_FIELD(16);
+TP_UINT_FIELD(32);
+TP_UINT_FIELD(64);
+
+#define TP_UINT_FIELD__SWAPPED(bits) \
+static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \
+{ \
+ u##bits value; \
+ memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \
+ return bswap_##bits(value);\
+}
+
+TP_UINT_FIELD__SWAPPED(16);
+TP_UINT_FIELD__SWAPPED(32);
+TP_UINT_FIELD__SWAPPED(64);
+
+static int __tp_field__init_uint(struct tp_field *field, int size, int offset, bool needs_swap)
+{
+ field->offset = offset;
+
+ switch (size) {
+ case 1:
+ field->integer = tp_field__u8;
+ break;
+ case 2:
+ field->integer = needs_swap ? tp_field__swapped_u16 : tp_field__u16;
+ break;
+ case 4:
+ field->integer = needs_swap ? tp_field__swapped_u32 : tp_field__u32;
+ break;
+ case 8:
+ field->integer = needs_swap ? tp_field__swapped_u64 : tp_field__u64;
+ break;
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
+static int tp_field__init_uint(struct tp_field *field, struct format_field *format_field, bool needs_swap)
+{
+ return __tp_field__init_uint(field, format_field->size, format_field->offset, needs_swap);
+}
+
+static void *tp_field__ptr(struct tp_field *field, struct perf_sample *sample)
+{
+ return sample->raw_data + field->offset;
+}
+
+static int __tp_field__init_ptr(struct tp_field *field, int offset)
+{
+ field->offset = offset;
+ field->pointer = tp_field__ptr;
+ return 0;
+}
+
+static int tp_field__init_ptr(struct tp_field *field, struct format_field *format_field)
+{
+ return __tp_field__init_ptr(field, format_field->offset);
+}
+
+struct syscall_tp {
+ struct tp_field id;
+ union {
+ struct tp_field args, ret;
+ };
+};
+
+static int perf_evsel__init_tp_uint_field(struct perf_evsel *evsel,
+ struct tp_field *field,
+ const char *name)
+{
+ struct format_field *format_field = perf_evsel__field(evsel, name);
+
+ if (format_field == NULL)
+ return -1;
+
+ return tp_field__init_uint(field, format_field, evsel->needs_swap);
+}
+
+#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
+ ({ struct syscall_tp *sc = evsel->priv;\
+ perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
+
+static int perf_evsel__init_tp_ptr_field(struct perf_evsel *evsel,
+ struct tp_field *field,
+ const char *name)
+{
+ struct format_field *format_field = perf_evsel__field(evsel, name);
+
+ if (format_field == NULL)
+ return -1;
+
+ return tp_field__init_ptr(field, format_field);
+}
+
+#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
+ ({ struct syscall_tp *sc = evsel->priv;\
+ perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
+
+static void perf_evsel__delete_priv(struct perf_evsel *evsel)
+{
+ zfree(&evsel->priv);
+ perf_evsel__delete(evsel);
+}
+
+static int perf_evsel__init_syscall_tp(struct perf_evsel *evsel)
+{
+ struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
+
+ if (evsel->priv != NULL) {
+ if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr"))
+ goto out_delete;
+ return 0;
+ }
+
+ return -ENOMEM;
+out_delete:
+ zfree(&evsel->priv);
+ return -ENOENT;
+}
+
+static int perf_evsel__init_augmented_syscall_tp(struct perf_evsel *evsel)
+{
+ struct syscall_tp *sc = evsel->priv = malloc(sizeof(struct syscall_tp));
+
+ if (evsel->priv != NULL) { /* field, sizeof_field, offsetof_field */
+ if (__tp_field__init_uint(&sc->id, sizeof(long), sizeof(long long), evsel->needs_swap))
+ goto out_delete;
+
+ return 0;
+ }
+
+ return -ENOMEM;
+out_delete:
+ zfree(&evsel->priv);
+ return -EINVAL;
+}
+
+static int perf_evsel__init_augmented_syscall_tp_args(struct perf_evsel *evsel)
+{
+ struct syscall_tp *sc = evsel->priv;
+
+ return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
+}
+
+static int perf_evsel__init_raw_syscall_tp(struct perf_evsel *evsel, void *handler)
+{
+ evsel->priv = malloc(sizeof(struct syscall_tp));
+ if (evsel->priv != NULL) {
+ if (perf_evsel__init_sc_tp_uint_field(evsel, id))
+ goto out_delete;
+
+ evsel->handler = handler;
+ return 0;
+ }
+
+ return -ENOMEM;
+
+out_delete:
+ zfree(&evsel->priv);
+ return -ENOENT;
+}
+
+static struct perf_evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
+{
+ struct perf_evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
+
+ /* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
+ if (IS_ERR(evsel))
+ evsel = perf_evsel__newtp("syscalls", direction);
+
+ if (IS_ERR(evsel))
+ return NULL;
+
+ if (perf_evsel__init_raw_syscall_tp(evsel, handler))
+ goto out_delete;
+
+ return evsel;
+
+out_delete:
+ perf_evsel__delete_priv(evsel);
+ return NULL;
+}
+
+#define perf_evsel__sc_tp_uint(evsel, name, sample) \
+ ({ struct syscall_tp *fields = evsel->priv; \
+ fields->name.integer(&fields->name, sample); })
+
+#define perf_evsel__sc_tp_ptr(evsel, name, sample) \
+ ({ struct syscall_tp *fields = evsel->priv; \
+ fields->name.pointer(&fields->name, sample); })
+
+size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val)
+{
+ int idx = val - sa->offset;
+
+ if (idx < 0 || idx >= sa->nr_entries || sa->entries[idx] == NULL)
+ return scnprintf(bf, size, intfmt, val);
+
+ return scnprintf(bf, size, "%s", sa->entries[idx]);
+}
+
+static size_t __syscall_arg__scnprintf_strarray(char *bf, size_t size,
+ const char *intfmt,
+ struct syscall_arg *arg)
+{
+ return strarray__scnprintf(arg->parm, bf, size, intfmt, arg->val);
+}
+
+static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ return __syscall_arg__scnprintf_strarray(bf, size, "%d", arg);
+}
+
+#define SCA_STRARRAY syscall_arg__scnprintf_strarray
+
+struct strarrays {
+ int nr_entries;
+ struct strarray **entries;
+};
+
+#define DEFINE_STRARRAYS(array) struct strarrays strarrays__##array = { \
+ .nr_entries = ARRAY_SIZE(array), \
+ .entries = array, \
+}
+
+size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ struct strarrays *sas = arg->parm;
+ int i;
+
+ for (i = 0; i < sas->nr_entries; ++i) {
+ struct strarray *sa = sas->entries[i];
+ int idx = arg->val - sa->offset;
+
+ if (idx >= 0 && idx < sa->nr_entries) {
+ if (sa->entries[idx] == NULL)
+ break;
+ return scnprintf(bf, size, "%s", sa->entries[idx]);
+ }
+ }
+
+ return scnprintf(bf, size, "%d", arg->val);
+}
+
+#ifndef AT_FDCWD
+#define AT_FDCWD -100
+#endif
+
+static size_t syscall_arg__scnprintf_fd_at(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int fd = arg->val;
+
+ if (fd == AT_FDCWD)
+ return scnprintf(bf, size, "CWD");
+
+ return syscall_arg__scnprintf_fd(bf, size, arg);
+}
+
+#define SCA_FDAT syscall_arg__scnprintf_fd_at
+
+static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
+ struct syscall_arg *arg);
+
+#define SCA_CLOSE_FD syscall_arg__scnprintf_close_fd
+
+size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg)
+{
+ return scnprintf(bf, size, "%#lx", arg->val);
+}
+
+size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg)
+{
+ return scnprintf(bf, size, "%d", arg->val);
+}
+
+size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg)
+{
+ return scnprintf(bf, size, "%ld", arg->val);
+}
+
+static const char *bpf_cmd[] = {
+ "MAP_CREATE", "MAP_LOOKUP_ELEM", "MAP_UPDATE_ELEM", "MAP_DELETE_ELEM",
+ "MAP_GET_NEXT_KEY", "PROG_LOAD",
+};
+static DEFINE_STRARRAY(bpf_cmd);
+
+static const char *epoll_ctl_ops[] = { "ADD", "DEL", "MOD", };
+static DEFINE_STRARRAY_OFFSET(epoll_ctl_ops, 1);
+
+static const char *itimers[] = { "REAL", "VIRTUAL", "PROF", };
+static DEFINE_STRARRAY(itimers);
+
+static const char *keyctl_options[] = {
+ "GET_KEYRING_ID", "JOIN_SESSION_KEYRING", "UPDATE", "REVOKE", "CHOWN",
+ "SETPERM", "DESCRIBE", "CLEAR", "LINK", "UNLINK", "SEARCH", "READ",
+ "INSTANTIATE", "NEGATE", "SET_REQKEY_KEYRING", "SET_TIMEOUT",
+ "ASSUME_AUTHORITY", "GET_SECURITY", "SESSION_TO_PARENT", "REJECT",
+ "INSTANTIATE_IOV", "INVALIDATE", "GET_PERSISTENT",
+};
+static DEFINE_STRARRAY(keyctl_options);
+
+static const char *whences[] = { "SET", "CUR", "END",
+#ifdef SEEK_DATA
+"DATA",
+#endif
+#ifdef SEEK_HOLE
+"HOLE",
+#endif
+};
+static DEFINE_STRARRAY(whences);
+
+static const char *fcntl_cmds[] = {
+ "DUPFD", "GETFD", "SETFD", "GETFL", "SETFL", "GETLK", "SETLK",
+ "SETLKW", "SETOWN", "GETOWN", "SETSIG", "GETSIG", "GETLK64",
+ "SETLK64", "SETLKW64", "SETOWN_EX", "GETOWN_EX",
+ "GETOWNER_UIDS",
+};
+static DEFINE_STRARRAY(fcntl_cmds);
+
+static const char *fcntl_linux_specific_cmds[] = {
+ "SETLEASE", "GETLEASE", "NOTIFY", [5] = "CANCELLK", "DUPFD_CLOEXEC",
+ "SETPIPE_SZ", "GETPIPE_SZ", "ADD_SEALS", "GET_SEALS",
+ "GET_RW_HINT", "SET_RW_HINT", "GET_FILE_RW_HINT", "SET_FILE_RW_HINT",
+};
+
+static DEFINE_STRARRAY_OFFSET(fcntl_linux_specific_cmds, F_LINUX_SPECIFIC_BASE);
+
+static struct strarray *fcntl_cmds_arrays[] = {
+ &strarray__fcntl_cmds,
+ &strarray__fcntl_linux_specific_cmds,
+};
+
+static DEFINE_STRARRAYS(fcntl_cmds_arrays);
+
+static const char *rlimit_resources[] = {
+ "CPU", "FSIZE", "DATA", "STACK", "CORE", "RSS", "NPROC", "NOFILE",
+ "MEMLOCK", "AS", "LOCKS", "SIGPENDING", "MSGQUEUE", "NICE", "RTPRIO",
+ "RTTIME",
+};
+static DEFINE_STRARRAY(rlimit_resources);
+
+static const char *sighow[] = { "BLOCK", "UNBLOCK", "SETMASK", };
+static DEFINE_STRARRAY(sighow);
+
+static const char *clockid[] = {
+ "REALTIME", "MONOTONIC", "PROCESS_CPUTIME_ID", "THREAD_CPUTIME_ID",
+ "MONOTONIC_RAW", "REALTIME_COARSE", "MONOTONIC_COARSE", "BOOTTIME",
+ "REALTIME_ALARM", "BOOTTIME_ALARM", "SGI_CYCLE", "TAI"
+};
+static DEFINE_STRARRAY(clockid);
+
+static const char *socket_families[] = {
+ "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
+ "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
+ "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
+ "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
+ "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
+ "ALG", "NFC", "VSOCK",
+};
+static DEFINE_STRARRAY(socket_families);
+
+static size_t syscall_arg__scnprintf_access_mode(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ size_t printed = 0;
+ int mode = arg->val;
+
+ if (mode == F_OK) /* 0 */
+ return scnprintf(bf, size, "F");
+#define P_MODE(n) \
+ if (mode & n##_OK) { \
+ printed += scnprintf(bf + printed, size - printed, "%s", #n); \
+ mode &= ~n##_OK; \
+ }
+
+ P_MODE(R);
+ P_MODE(W);
+ P_MODE(X);
+#undef P_MODE
+
+ if (mode)
+ printed += scnprintf(bf + printed, size - printed, "|%#x", mode);
+
+ return printed;
+}
+
+#define SCA_ACCMODE syscall_arg__scnprintf_access_mode
+
+static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
+ struct syscall_arg *arg);
+
+#define SCA_FILENAME syscall_arg__scnprintf_filename
+
+static size_t syscall_arg__scnprintf_pipe_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+#define P_FLAG(n) \
+ if (flags & O_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~O_##n; \
+ }
+
+ P_FLAG(CLOEXEC);
+ P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_PIPE_FLAGS syscall_arg__scnprintf_pipe_flags
+
+#ifndef GRND_NONBLOCK
+#define GRND_NONBLOCK 0x0001
+#endif
+#ifndef GRND_RANDOM
+#define GRND_RANDOM 0x0002
+#endif
+
+static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+#define P_FLAG(n) \
+ if (flags & GRND_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~GRND_##n; \
+ }
+
+ P_FLAG(RANDOM);
+ P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_GETRANDOM_FLAGS syscall_arg__scnprintf_getrandom_flags
+
+#define STRARRAY(name, array) \
+ { .scnprintf = SCA_STRARRAY, \
+ .parm = &strarray__##array, }
+
+#include "trace/beauty/arch_errno_names.c"
+#include "trace/beauty/eventfd.c"
+#include "trace/beauty/futex_op.c"
+#include "trace/beauty/futex_val3.c"
+#include "trace/beauty/mmap.c"
+#include "trace/beauty/mode_t.c"
+#include "trace/beauty/msg_flags.c"
+#include "trace/beauty/open_flags.c"
+#include "trace/beauty/perf_event_open.c"
+#include "trace/beauty/pid.c"
+#include "trace/beauty/sched_policy.c"
+#include "trace/beauty/seccomp.c"
+#include "trace/beauty/signum.c"
+#include "trace/beauty/socket_type.c"
+#include "trace/beauty/waitid_options.c"
+
+struct syscall_arg_fmt {
+ size_t (*scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
+ void *parm;
+ const char *name;
+ bool show_zero;
+};
+
+static struct syscall_fmt {
+ const char *name;
+ const char *alias;
+ struct syscall_arg_fmt arg[6];
+ u8 nr_args;
+ bool errpid;
+ bool timeout;
+ bool hexret;
+} syscall_fmts[] = {
+ { .name = "access",
+ .arg = { [1] = { .scnprintf = SCA_ACCMODE, /* mode */ }, }, },
+ { .name = "bpf",
+ .arg = { [0] = STRARRAY(cmd, bpf_cmd), }, },
+ { .name = "brk", .hexret = true,
+ .arg = { [0] = { .scnprintf = SCA_HEX, /* brk */ }, }, },
+ { .name = "clock_gettime",
+ .arg = { [0] = STRARRAY(clk_id, clockid), }, },
+ { .name = "clone", .errpid = true, .nr_args = 5,
+ .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, },
+ [1] = { .name = "child_stack", .scnprintf = SCA_HEX, },
+ [2] = { .name = "parent_tidptr", .scnprintf = SCA_HEX, },
+ [3] = { .name = "child_tidptr", .scnprintf = SCA_HEX, },
+ [4] = { .name = "tls", .scnprintf = SCA_HEX, }, }, },
+ { .name = "close",
+ .arg = { [0] = { .scnprintf = SCA_CLOSE_FD, /* fd */ }, }, },
+ { .name = "epoll_ctl",
+ .arg = { [1] = STRARRAY(op, epoll_ctl_ops), }, },
+ { .name = "eventfd2",
+ .arg = { [1] = { .scnprintf = SCA_EFD_FLAGS, /* flags */ }, }, },
+ { .name = "fchmodat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
+ { .name = "fchownat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
+ { .name = "fcntl",
+ .arg = { [1] = { .scnprintf = SCA_FCNTL_CMD, /* cmd */
+ .parm = &strarrays__fcntl_cmds_arrays,
+ .show_zero = true, },
+ [2] = { .scnprintf = SCA_FCNTL_ARG, /* arg */ }, }, },
+ { .name = "flock",
+ .arg = { [1] = { .scnprintf = SCA_FLOCK, /* cmd */ }, }, },
+ { .name = "fstat", .alias = "newfstat", },
+ { .name = "fstatat", .alias = "newfstatat", },
+ { .name = "futex",
+ .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ },
+ [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, },
+ { .name = "futimesat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
+ { .name = "getitimer",
+ .arg = { [0] = STRARRAY(which, itimers), }, },
+ { .name = "getpid", .errpid = true, },
+ { .name = "getpgid", .errpid = true, },
+ { .name = "getppid", .errpid = true, },
+ { .name = "getrandom",
+ .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, },
+ { .name = "getrlimit",
+ .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
+ { .name = "gettid", .errpid = true, },
+ { .name = "ioctl",
+ .arg = {
+#if defined(__i386__) || defined(__x86_64__)
+/*
+ * FIXME: Make this available to all arches.
+ */
+ [1] = { .scnprintf = SCA_IOCTL_CMD, /* cmd */ },
+ [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
+#else
+ [2] = { .scnprintf = SCA_HEX, /* arg */ }, }, },
+#endif
+ { .name = "kcmp", .nr_args = 5,
+ .arg = { [0] = { .name = "pid1", .scnprintf = SCA_PID, },
+ [1] = { .name = "pid2", .scnprintf = SCA_PID, },
+ [2] = { .name = "type", .scnprintf = SCA_KCMP_TYPE, },
+ [3] = { .name = "idx1", .scnprintf = SCA_KCMP_IDX, },
+ [4] = { .name = "idx2", .scnprintf = SCA_KCMP_IDX, }, }, },
+ { .name = "keyctl",
+ .arg = { [0] = STRARRAY(option, keyctl_options), }, },
+ { .name = "kill",
+ .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+ { .name = "linkat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
+ { .name = "lseek",
+ .arg = { [2] = STRARRAY(whence, whences), }, },
+ { .name = "lstat", .alias = "newlstat", },
+ { .name = "madvise",
+ .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
+ [2] = { .scnprintf = SCA_MADV_BHV, /* behavior */ }, }, },
+ { .name = "mkdirat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
+ { .name = "mknodat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, },
+ { .name = "mlock",
+ .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
+ { .name = "mlockall",
+ .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
+ { .name = "mmap", .hexret = true,
+/* The standard mmap maps to old_mmap on s390x */
+#if defined(__s390x__)
+ .alias = "old_mmap",
+#endif
+ .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
+ [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
+ [3] = { .scnprintf = SCA_MMAP_FLAGS, /* flags */ }, }, },
+ { .name = "mprotect",
+ .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
+ [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ }, }, },
+ { .name = "mq_unlink",
+ .arg = { [0] = { .scnprintf = SCA_FILENAME, /* u_name */ }, }, },
+ { .name = "mremap", .hexret = true,
+ .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ },
+ [3] = { .scnprintf = SCA_MREMAP_FLAGS, /* flags */ },
+ [4] = { .scnprintf = SCA_HEX, /* new_addr */ }, }, },
+ { .name = "munlock",
+ .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
+ { .name = "munmap",
+ .arg = { [0] = { .scnprintf = SCA_HEX, /* addr */ }, }, },
+ { .name = "name_to_handle_at",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+ { .name = "newfstatat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+ { .name = "open",
+ .arg = { [1] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
+ { .name = "open_by_handle_at",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
+ [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
+ { .name = "openat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ },
+ [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
+ { .name = "perf_event_open",
+ .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ },
+ [3] = { .scnprintf = SCA_FD, /* group_fd */ },
+ [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
+ { .name = "pipe2",
+ .arg = { [1] = { .scnprintf = SCA_PIPE_FLAGS, /* flags */ }, }, },
+ { .name = "pkey_alloc",
+ .arg = { [1] = { .scnprintf = SCA_PKEY_ALLOC_ACCESS_RIGHTS, /* access_rights */ }, }, },
+ { .name = "pkey_free",
+ .arg = { [0] = { .scnprintf = SCA_INT, /* key */ }, }, },
+ { .name = "pkey_mprotect",
+ .arg = { [0] = { .scnprintf = SCA_HEX, /* start */ },
+ [2] = { .scnprintf = SCA_MMAP_PROT, /* prot */ },
+ [3] = { .scnprintf = SCA_INT, /* pkey */ }, }, },
+ { .name = "poll", .timeout = true, },
+ { .name = "ppoll", .timeout = true, },
+ { .name = "prctl", .alias = "arch_prctl",
+ .arg = { [0] = { .scnprintf = SCA_PRCTL_OPTION, /* option */ },
+ [1] = { .scnprintf = SCA_PRCTL_ARG2, /* arg2 */ },
+ [2] = { .scnprintf = SCA_PRCTL_ARG3, /* arg3 */ }, }, },
+ { .name = "pread", .alias = "pread64", },
+ { .name = "preadv", .alias = "pread", },
+ { .name = "prlimit64",
+ .arg = { [1] = STRARRAY(resource, rlimit_resources), }, },
+ { .name = "pwrite", .alias = "pwrite64", },
+ { .name = "readlinkat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+ { .name = "recvfrom",
+ .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+ { .name = "recvmmsg",
+ .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+ { .name = "recvmsg",
+ .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+ { .name = "renameat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+ { .name = "rt_sigaction",
+ .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+ { .name = "rt_sigprocmask",
+ .arg = { [0] = STRARRAY(how, sighow), }, },
+ { .name = "rt_sigqueueinfo",
+ .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+ { .name = "rt_tgsigqueueinfo",
+ .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+ { .name = "sched_setscheduler",
+ .arg = { [1] = { .scnprintf = SCA_SCHED_POLICY, /* policy */ }, }, },
+ { .name = "seccomp",
+ .arg = { [0] = { .scnprintf = SCA_SECCOMP_OP, /* op */ },
+ [1] = { .scnprintf = SCA_SECCOMP_FLAGS, /* flags */ }, }, },
+ { .name = "select", .timeout = true, },
+ { .name = "sendmmsg",
+ .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+ { .name = "sendmsg",
+ .arg = { [2] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+ { .name = "sendto",
+ .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, }, },
+ { .name = "set_tid_address", .errpid = true, },
+ { .name = "setitimer",
+ .arg = { [0] = STRARRAY(which, itimers), }, },
+ { .name = "setrlimit",
+ .arg = { [0] = STRARRAY(resource, rlimit_resources), }, },
+ { .name = "socket",
+ .arg = { [0] = STRARRAY(family, socket_families),
+ [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
+ [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
+ { .name = "socketpair",
+ .arg = { [0] = STRARRAY(family, socket_families),
+ [1] = { .scnprintf = SCA_SK_TYPE, /* type */ },
+ [2] = { .scnprintf = SCA_SK_PROTO, /* protocol */ }, }, },
+ { .name = "stat", .alias = "newstat", },
+ { .name = "statx",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* fdat */ },
+ [2] = { .scnprintf = SCA_STATX_FLAGS, /* flags */ } ,
+ [3] = { .scnprintf = SCA_STATX_MASK, /* mask */ }, }, },
+ { .name = "swapoff",
+ .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
+ { .name = "swapon",
+ .arg = { [0] = { .scnprintf = SCA_FILENAME, /* specialfile */ }, }, },
+ { .name = "symlinkat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+ { .name = "tgkill",
+ .arg = { [2] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+ { .name = "tkill",
+ .arg = { [1] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, },
+ { .name = "uname", .alias = "newuname", },
+ { .name = "unlinkat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, }, },
+ { .name = "utimensat",
+ .arg = { [0] = { .scnprintf = SCA_FDAT, /* dirfd */ }, }, },
+ { .name = "wait4", .errpid = true,
+ .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
+ { .name = "waitid", .errpid = true,
+ .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, },
+};
+
+static int syscall_fmt__cmp(const void *name, const void *fmtp)
+{
+ const struct syscall_fmt *fmt = fmtp;
+ return strcmp(name, fmt->name);
+}
+
+static struct syscall_fmt *syscall_fmt__find(const char *name)
+{
+ const int nmemb = ARRAY_SIZE(syscall_fmts);
+ return bsearch(name, syscall_fmts, nmemb, sizeof(struct syscall_fmt), syscall_fmt__cmp);
+}
+
+/*
+ * is_exit: is this "exit" or "exit_group"?
+ * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter.
+ */
+struct syscall {
+ struct event_format *tp_format;
+ int nr_args;
+ bool is_exit;
+ bool is_open;
+ struct format_field *args;
+ const char *name;
+ struct syscall_fmt *fmt;
+ struct syscall_arg_fmt *arg_fmt;
+};
+
+/*
+ * We need to have this 'calculated' boolean because in some cases we really
+ * don't know what is the duration of a syscall, for instance, when we start
+ * a session and some threads are waiting for a syscall to finish, say 'poll',
+ * in which case all we can do is to print "( ? ) for duration and for the
+ * start timestamp.
+ */
+static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp)
+{
+ double duration = (double)t / NSEC_PER_MSEC;
+ size_t printed = fprintf(fp, "(");
+
+ if (!calculated)
+ printed += fprintf(fp, " ");
+ else if (duration >= 1.0)
+ printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration);
+ else if (duration >= 0.01)
+ printed += color_fprintf(fp, PERF_COLOR_YELLOW, "%6.3f ms", duration);
+ else
+ printed += color_fprintf(fp, PERF_COLOR_NORMAL, "%6.3f ms", duration);
+ return printed + fprintf(fp, "): ");
+}
+
+/**
+ * filename.ptr: The filename char pointer that will be vfs_getname'd
+ * filename.entry_str_pos: Where to insert the string translated from
+ * filename.ptr by the vfs_getname tracepoint/kprobe.
+ * ret_scnprintf: syscall args may set this to a different syscall return
+ * formatter, for instance, fcntl may return fds, file flags, etc.
+ */
+struct thread_trace {
+ u64 entry_time;
+ bool entry_pending;
+ unsigned long nr_events;
+ unsigned long pfmaj, pfmin;
+ char *entry_str;
+ double runtime_ms;
+ size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg);
+ struct {
+ unsigned long ptr;
+ short int entry_str_pos;
+ bool pending_open;
+ unsigned int namelen;
+ char *name;
+ } filename;
+ struct {
+ int max;
+ char **table;
+ } paths;
+
+ struct intlist *syscall_stats;
+};
+
+static struct thread_trace *thread_trace__new(void)
+{
+ struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace));
+
+ if (ttrace)
+ ttrace->paths.max = -1;
+
+ ttrace->syscall_stats = intlist__new(NULL);
+
+ return ttrace;
+}
+
+static struct thread_trace *thread__trace(struct thread *thread, FILE *fp)
+{
+ struct thread_trace *ttrace;
+
+ if (thread == NULL)
+ goto fail;
+
+ if (thread__priv(thread) == NULL)
+ thread__set_priv(thread, thread_trace__new());
+
+ if (thread__priv(thread) == NULL)
+ goto fail;
+
+ ttrace = thread__priv(thread);
+ ++ttrace->nr_events;
+
+ return ttrace;
+fail:
+ color_fprintf(fp, PERF_COLOR_RED,
+ "WARNING: not enough memory, dropping samples!\n");
+ return NULL;
+}
+
+
+void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
+ size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg))
+{
+ struct thread_trace *ttrace = thread__priv(arg->thread);
+
+ ttrace->ret_scnprintf = ret_scnprintf;
+}
+
+#define TRACE_PFMAJ (1 << 0)
+#define TRACE_PFMIN (1 << 1)
+
+static const size_t trace__entry_str_size = 2048;
+
+static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
+{
+ struct thread_trace *ttrace = thread__priv(thread);
+
+ if (fd > ttrace->paths.max) {
+ char **npath = realloc(ttrace->paths.table, (fd + 1) * sizeof(char *));
+
+ if (npath == NULL)
+ return -1;
+
+ if (ttrace->paths.max != -1) {
+ memset(npath + ttrace->paths.max + 1, 0,
+ (fd - ttrace->paths.max) * sizeof(char *));
+ } else {
+ memset(npath, 0, (fd + 1) * sizeof(char *));
+ }
+
+ ttrace->paths.table = npath;
+ ttrace->paths.max = fd;
+ }
+
+ ttrace->paths.table[fd] = strdup(pathname);
+
+ return ttrace->paths.table[fd] != NULL ? 0 : -1;
+}
+
+static int thread__read_fd_path(struct thread *thread, int fd)
+{
+ char linkname[PATH_MAX], pathname[PATH_MAX];
+ struct stat st;
+ int ret;
+
+ if (thread->pid_ == thread->tid) {
+ scnprintf(linkname, sizeof(linkname),
+ "/proc/%d/fd/%d", thread->pid_, fd);
+ } else {
+ scnprintf(linkname, sizeof(linkname),
+ "/proc/%d/task/%d/fd/%d", thread->pid_, thread->tid, fd);
+ }
+
+ if (lstat(linkname, &st) < 0 || st.st_size + 1 > (off_t)sizeof(pathname))
+ return -1;
+
+ ret = readlink(linkname, pathname, sizeof(pathname));
+
+ if (ret < 0 || ret > st.st_size)
+ return -1;
+
+ pathname[ret] = '\0';
+ return trace__set_fd_pathname(thread, fd, pathname);
+}
+
+static const char *thread__fd_path(struct thread *thread, int fd,
+ struct trace *trace)
+{
+ struct thread_trace *ttrace = thread__priv(thread);
+
+ if (ttrace == NULL)
+ return NULL;
+
+ if (fd < 0)
+ return NULL;
+
+ if ((fd > ttrace->paths.max || ttrace->paths.table[fd] == NULL)) {
+ if (!trace->live)
+ return NULL;
+ ++trace->stats.proc_getname;
+ if (thread__read_fd_path(thread, fd))
+ return NULL;
+ }
+
+ return ttrace->paths.table[fd];
+}
+
+size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int fd = arg->val;
+ size_t printed = scnprintf(bf, size, "%d", fd);
+ const char *path = thread__fd_path(arg->thread, fd, arg->trace);
+
+ if (path)
+ printed += scnprintf(bf + printed, size - printed, "<%s>", path);
+
+ return printed;
+}
+
+size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size)
+{
+ size_t printed = scnprintf(bf, size, "%d", fd);
+ struct thread *thread = machine__find_thread(trace->host, pid, pid);
+
+ if (thread) {
+ const char *path = thread__fd_path(thread, fd, trace);
+
+ if (path)
+ printed += scnprintf(bf + printed, size - printed, "<%s>", path);
+
+ thread__put(thread);
+ }
+
+ return printed;
+}
+
+static size_t syscall_arg__scnprintf_close_fd(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int fd = arg->val;
+ size_t printed = syscall_arg__scnprintf_fd(bf, size, arg);
+ struct thread_trace *ttrace = thread__priv(arg->thread);
+
+ if (ttrace && fd >= 0 && fd <= ttrace->paths.max)
+ zfree(&ttrace->paths.table[fd]);
+
+ return printed;
+}
+
+static void thread__set_filename_pos(struct thread *thread, const char *bf,
+ unsigned long ptr)
+{
+ struct thread_trace *ttrace = thread__priv(thread);
+
+ ttrace->filename.ptr = ptr;
+ ttrace->filename.entry_str_pos = bf - ttrace->entry_str;
+}
+
+static size_t syscall_arg__scnprintf_filename(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ unsigned long ptr = arg->val;
+
+ if (!arg->trace->vfs_getname)
+ return scnprintf(bf, size, "%#x", ptr);
+
+ thread__set_filename_pos(arg->thread, bf, ptr);
+ return 0;
+}
+
+static bool trace__filter_duration(struct trace *trace, double t)
+{
+ return t < (trace->duration_filter * NSEC_PER_MSEC);
+}
+
+static size_t __trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
+{
+ double ts = (double)(tstamp - trace->base_time) / NSEC_PER_MSEC;
+
+ return fprintf(fp, "%10.3f ", ts);
+}
+
+/*
+ * We're handling tstamp=0 as an undefined tstamp, i.e. like when we are
+ * using ttrace->entry_time for a thread that receives a sys_exit without
+ * first having received a sys_enter ("poll" issued before tracing session
+ * starts, lost sys_enter exit due to ring buffer overflow).
+ */
+static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
+{
+ if (tstamp > 0)
+ return __trace__fprintf_tstamp(trace, tstamp, fp);
+
+ return fprintf(fp, " ? ");
+}
+
+static bool done = false;
+static bool interrupted = false;
+
+static void sig_handler(int sig)
+{
+ done = true;
+ interrupted = sig == SIGINT;
+}
+
+static size_t trace__fprintf_entry_head(struct trace *trace, struct thread *thread,
+ u64 duration, bool duration_calculated, u64 tstamp, FILE *fp)
+{
+ size_t printed = trace__fprintf_tstamp(trace, tstamp, fp);
+ printed += fprintf_duration(duration, duration_calculated, fp);
+
+ if (trace->multiple_threads) {
+ if (trace->show_comm)
+ printed += fprintf(fp, "%.14s/", thread__comm_str(thread));
+ printed += fprintf(fp, "%d ", thread->tid);
+ }
+
+ return printed;
+}
+
+static int trace__process_event(struct trace *trace, struct machine *machine,
+ union perf_event *event, struct perf_sample *sample)
+{
+ int ret = 0;
+
+ switch (event->header.type) {
+ case PERF_RECORD_LOST:
+ color_fprintf(trace->output, PERF_COLOR_RED,
+ "LOST %" PRIu64 " events!\n", event->lost.lost);
+ ret = machine__process_lost_event(machine, event, sample);
+ break;
+ default:
+ ret = machine__process_event(machine, event, sample);
+ break;
+ }
+
+ return ret;
+}
+
+static int trace__tool_process(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct trace *trace = container_of(tool, struct trace, tool);
+ return trace__process_event(trace, machine, event, sample);
+}
+
+static char *trace__machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
+{
+ struct machine *machine = vmachine;
+
+ if (machine->kptr_restrict_warned)
+ return NULL;
+
+ if (symbol_conf.kptr_restrict) {
+ pr_warning("Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n"
+ "Check /proc/sys/kernel/kptr_restrict.\n\n"
+ "Kernel samples will not be resolved.\n");
+ machine->kptr_restrict_warned = true;
+ return NULL;
+ }
+
+ return machine__resolve_kernel_addr(vmachine, addrp, modp);
+}
+
+static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
+{
+ int err = symbol__init(NULL);
+
+ if (err)
+ return err;
+
+ trace->host = machine__new_host();
+ if (trace->host == NULL)
+ return -ENOMEM;
+
+ err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr);
+ if (err < 0)
+ goto out;
+
+ err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
+ evlist->threads, trace__tool_process, false,
+ trace->opts.proc_map_timeout, 1);
+out:
+ if (err)
+ symbol__exit();
+
+ return err;
+}
+
+static void trace__symbols__exit(struct trace *trace)
+{
+ machine__exit(trace->host);
+ trace->host = NULL;
+
+ symbol__exit();
+}
+
+static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
+{
+ int idx;
+
+ if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
+ nr_args = sc->fmt->nr_args;
+
+ sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
+ if (sc->arg_fmt == NULL)
+ return -1;
+
+ for (idx = 0; idx < nr_args; ++idx) {
+ if (sc->fmt)
+ sc->arg_fmt[idx] = sc->fmt->arg[idx];
+ }
+
+ sc->nr_args = nr_args;
+ return 0;
+}
+
+static int syscall__set_arg_fmts(struct syscall *sc)
+{
+ struct format_field *field;
+ int idx = 0, len;
+
+ for (field = sc->args; field; field = field->next, ++idx) {
+ if (sc->fmt && sc->fmt->arg[idx].scnprintf)
+ continue;
+
+ if (strcmp(field->type, "const char *") == 0 &&
+ (strcmp(field->name, "filename") == 0 ||
+ strcmp(field->name, "path") == 0 ||
+ strcmp(field->name, "pathname") == 0))
+ sc->arg_fmt[idx].scnprintf = SCA_FILENAME;
+ else if (field->flags & FIELD_IS_POINTER)
+ sc->arg_fmt[idx].scnprintf = syscall_arg__scnprintf_hex;
+ else if (strcmp(field->type, "pid_t") == 0)
+ sc->arg_fmt[idx].scnprintf = SCA_PID;
+ else if (strcmp(field->type, "umode_t") == 0)
+ sc->arg_fmt[idx].scnprintf = SCA_MODE_T;
+ else if ((strcmp(field->type, "int") == 0 ||
+ strcmp(field->type, "unsigned int") == 0 ||
+ strcmp(field->type, "long") == 0) &&
+ (len = strlen(field->name)) >= 2 &&
+ strcmp(field->name + len - 2, "fd") == 0) {
+ /*
+ * /sys/kernel/tracing/events/syscalls/sys_enter*
+ * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
+ * 65 int
+ * 23 unsigned int
+ * 7 unsigned long
+ */
+ sc->arg_fmt[idx].scnprintf = SCA_FD;
+ }
+ }
+
+ return 0;
+}
+
+static int trace__read_syscall_info(struct trace *trace, int id)
+{
+ char tp_name[128];
+ struct syscall *sc;
+ const char *name = syscalltbl__name(trace->sctbl, id);
+
+ if (name == NULL)
+ return -1;
+
+ if (id > trace->syscalls.max) {
+ struct syscall *nsyscalls = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
+
+ if (nsyscalls == NULL)
+ return -1;
+
+ if (trace->syscalls.max != -1) {
+ memset(nsyscalls + trace->syscalls.max + 1, 0,
+ (id - trace->syscalls.max) * sizeof(*sc));
+ } else {
+ memset(nsyscalls, 0, (id + 1) * sizeof(*sc));
+ }
+
+ trace->syscalls.table = nsyscalls;
+ trace->syscalls.max = id;
+ }
+
+ sc = trace->syscalls.table + id;
+ sc->name = name;
+
+ sc->fmt = syscall_fmt__find(sc->name);
+
+ snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->name);
+ sc->tp_format = trace_event__tp_format("syscalls", tp_name);
+
+ if (IS_ERR(sc->tp_format) && sc->fmt && sc->fmt->alias) {
+ snprintf(tp_name, sizeof(tp_name), "sys_enter_%s", sc->fmt->alias);
+ sc->tp_format = trace_event__tp_format("syscalls", tp_name);
+ }
+
+ if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
+ return -1;
+
+ if (IS_ERR(sc->tp_format))
+ return -1;
+
+ sc->args = sc->tp_format->format.fields;
+ /*
+ * We need to check and discard the first variable '__syscall_nr'
+ * or 'nr' that mean the syscall number. It is needless here.
+ * So drop '__syscall_nr' or 'nr' field but does not exist on older kernels.
+ */
+ if (sc->args && (!strcmp(sc->args->name, "__syscall_nr") || !strcmp(sc->args->name, "nr"))) {
+ sc->args = sc->args->next;
+ --sc->nr_args;
+ }
+
+ sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
+ sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
+
+ return syscall__set_arg_fmts(sc);
+}
+
+static int trace__validate_ev_qualifier(struct trace *trace)
+{
+ int err = 0, i;
+ size_t nr_allocated;
+ struct str_node *pos;
+
+ trace->ev_qualifier_ids.nr = strlist__nr_entries(trace->ev_qualifier);
+ trace->ev_qualifier_ids.entries = malloc(trace->ev_qualifier_ids.nr *
+ sizeof(trace->ev_qualifier_ids.entries[0]));
+
+ if (trace->ev_qualifier_ids.entries == NULL) {
+ fputs("Error:\tNot enough memory for allocating events qualifier ids\n",
+ trace->output);
+ err = -EINVAL;
+ goto out;
+ }
+
+ nr_allocated = trace->ev_qualifier_ids.nr;
+ i = 0;
+
+ strlist__for_each_entry(pos, trace->ev_qualifier) {
+ const char *sc = pos->s;
+ int id = syscalltbl__id(trace->sctbl, sc), match_next = -1;
+
+ if (id < 0) {
+ id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next);
+ if (id >= 0)
+ goto matches;
+
+ if (err == 0) {
+ fputs("Error:\tInvalid syscall ", trace->output);
+ err = -EINVAL;
+ } else {
+ fputs(", ", trace->output);
+ }
+
+ fputs(sc, trace->output);
+ }
+matches:
+ trace->ev_qualifier_ids.entries[i++] = id;
+ if (match_next == -1)
+ continue;
+
+ while (1) {
+ id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next);
+ if (id < 0)
+ break;
+ if (nr_allocated == trace->ev_qualifier_ids.nr) {
+ void *entries;
+
+ nr_allocated += 8;
+ entries = realloc(trace->ev_qualifier_ids.entries,
+ nr_allocated * sizeof(trace->ev_qualifier_ids.entries[0]));
+ if (entries == NULL) {
+ err = -ENOMEM;
+ fputs("\nError:\t Not enough memory for parsing\n", trace->output);
+ goto out_free;
+ }
+ trace->ev_qualifier_ids.entries = entries;
+ }
+ trace->ev_qualifier_ids.nr++;
+ trace->ev_qualifier_ids.entries[i++] = id;
+ }
+ }
+
+ if (err < 0) {
+ fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'"
+ "\nHint:\tand: 'man syscalls'\n", trace->output);
+out_free:
+ zfree(&trace->ev_qualifier_ids.entries);
+ trace->ev_qualifier_ids.nr = 0;
+ }
+out:
+ return err;
+}
+
+/*
+ * args is to be interpreted as a series of longs but we need to handle
+ * 8-byte unaligned accesses. args points to raw_data within the event
+ * and raw_data is guaranteed to be 8-byte unaligned because it is
+ * preceded by raw_size which is a u32. So we need to copy args to a temp
+ * variable to read it. Most notably this avoids extended load instructions
+ * on unaligned addresses
+ */
+unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx)
+{
+ unsigned long val;
+ unsigned char *p = arg->args + sizeof(unsigned long) * idx;
+
+ memcpy(&val, p, sizeof(val));
+ return val;
+}
+
+static size_t syscall__scnprintf_name(struct syscall *sc, char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ if (sc->arg_fmt && sc->arg_fmt[arg->idx].name)
+ return scnprintf(bf, size, "%s: ", sc->arg_fmt[arg->idx].name);
+
+ return scnprintf(bf, size, "arg%d: ", arg->idx);
+}
+
+static size_t syscall__scnprintf_val(struct syscall *sc, char *bf, size_t size,
+ struct syscall_arg *arg, unsigned long val)
+{
+ if (sc->arg_fmt && sc->arg_fmt[arg->idx].scnprintf) {
+ arg->val = val;
+ if (sc->arg_fmt[arg->idx].parm)
+ arg->parm = sc->arg_fmt[arg->idx].parm;
+ return sc->arg_fmt[arg->idx].scnprintf(bf, size, arg);
+ }
+ return scnprintf(bf, size, "%ld", val);
+}
+
+static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size,
+ unsigned char *args, struct trace *trace,
+ struct thread *thread)
+{
+ size_t printed = 0;
+ unsigned long val;
+ u8 bit = 1;
+ struct syscall_arg arg = {
+ .args = args,
+ .idx = 0,
+ .mask = 0,
+ .trace = trace,
+ .thread = thread,
+ };
+ struct thread_trace *ttrace = thread__priv(thread);
+
+ /*
+ * Things like fcntl will set this in its 'cmd' formatter to pick the
+ * right formatter for the return value (an fd? file flags?), which is
+ * not needed for syscalls that always return a given type, say an fd.
+ */
+ ttrace->ret_scnprintf = NULL;
+
+ if (sc->args != NULL) {
+ struct format_field *field;
+
+ for (field = sc->args; field;
+ field = field->next, ++arg.idx, bit <<= 1) {
+ if (arg.mask & bit)
+ continue;
+
+ val = syscall_arg__val(&arg, arg.idx);
+
+ /*
+ * Suppress this argument if its value is zero and
+ * and we don't have a string associated in an
+ * strarray for it.
+ */
+ if (val == 0 &&
+ !(sc->arg_fmt &&
+ (sc->arg_fmt[arg.idx].show_zero ||
+ sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAY ||
+ sc->arg_fmt[arg.idx].scnprintf == SCA_STRARRAYS) &&
+ sc->arg_fmt[arg.idx].parm))
+ continue;
+
+ printed += scnprintf(bf + printed, size - printed,
+ "%s%s: ", printed ? ", " : "", field->name);
+ printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
+ }
+ } else if (IS_ERR(sc->tp_format)) {
+ /*
+ * If we managed to read the tracepoint /format file, then we
+ * may end up not having any args, like with gettid(), so only
+ * print the raw args when we didn't manage to read it.
+ */
+ while (arg.idx < sc->nr_args) {
+ if (arg.mask & bit)
+ goto next_arg;
+ val = syscall_arg__val(&arg, arg.idx);
+ if (printed)
+ printed += scnprintf(bf + printed, size - printed, ", ");
+ printed += syscall__scnprintf_name(sc, bf + printed, size - printed, &arg);
+ printed += syscall__scnprintf_val(sc, bf + printed, size - printed, &arg, val);
+next_arg:
+ ++arg.idx;
+ bit <<= 1;
+ }
+ }
+
+ return printed;
+}
+
+typedef int (*tracepoint_handler)(struct trace *trace, struct perf_evsel *evsel,
+ union perf_event *event,
+ struct perf_sample *sample);
+
+static struct syscall *trace__syscall_info(struct trace *trace,
+ struct perf_evsel *evsel, int id)
+{
+
+ if (id < 0) {
+
+ /*
+ * XXX: Noticed on x86_64, reproduced as far back as 3.0.36, haven't tried
+ * before that, leaving at a higher verbosity level till that is
+ * explained. Reproduced with plain ftrace with:
+ *
+ * echo 1 > /t/events/raw_syscalls/sys_exit/enable
+ * grep "NR -1 " /t/trace_pipe
+ *
+ * After generating some load on the machine.
+ */
+ if (verbose > 1) {
+ static u64 n;
+ fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
+ id, perf_evsel__name(evsel), ++n);
+ }
+ return NULL;
+ }
+
+ if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL) &&
+ trace__read_syscall_info(trace, id))
+ goto out_cant_read;
+
+ if ((id > trace->syscalls.max || trace->syscalls.table[id].name == NULL))
+ goto out_cant_read;
+
+ return &trace->syscalls.table[id];
+
+out_cant_read:
+ if (verbose > 0) {
+ fprintf(trace->output, "Problems reading syscall %d", id);
+ if (id <= trace->syscalls.max && trace->syscalls.table[id].name != NULL)
+ fprintf(trace->output, "(%s)", trace->syscalls.table[id].name);
+ fputs(" information\n", trace->output);
+ }
+ return NULL;
+}
+
+static void thread__update_stats(struct thread_trace *ttrace,
+ int id, struct perf_sample *sample)
+{
+ struct int_node *inode;
+ struct stats *stats;
+ u64 duration = 0;
+
+ inode = intlist__findnew(ttrace->syscall_stats, id);
+ if (inode == NULL)
+ return;
+
+ stats = inode->priv;
+ if (stats == NULL) {
+ stats = malloc(sizeof(struct stats));
+ if (stats == NULL)
+ return;
+ init_stats(stats);
+ inode->priv = stats;
+ }
+
+ if (ttrace->entry_time && sample->time > ttrace->entry_time)
+ duration = sample->time - ttrace->entry_time;
+
+ update_stats(stats, duration);
+}
+
+static int trace__printf_interrupted_entry(struct trace *trace)
+{
+ struct thread_trace *ttrace;
+ size_t printed;
+
+ if (trace->failure_only || trace->current == NULL)
+ return 0;
+
+ ttrace = thread__priv(trace->current);
+
+ if (!ttrace->entry_pending)
+ return 0;
+
+ printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output);
+ printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str);
+ ttrace->entry_pending = false;
+
+ return printed;
+}
+
+static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel,
+ struct perf_sample *sample, struct thread *thread)
+{
+ int printed = 0;
+
+ if (trace->print_sample) {
+ double ts = (double)sample->time / NSEC_PER_MSEC;
+
+ printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
+ perf_evsel__name(evsel), ts,
+ thread__comm_str(thread),
+ sample->pid, sample->tid, sample->cpu);
+ }
+
+ return printed;
+}
+
+static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample)
+{
+ char *msg;
+ void *args;
+ size_t printed = 0;
+ struct thread *thread;
+ int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
+ struct syscall *sc = trace__syscall_info(trace, evsel, id);
+ struct thread_trace *ttrace;
+
+ if (sc == NULL)
+ return -1;
+
+ thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+ ttrace = thread__trace(thread, trace->output);
+ if (ttrace == NULL)
+ goto out_put;
+
+ trace__fprintf_sample(trace, evsel, sample, thread);
+
+ args = perf_evsel__sc_tp_ptr(evsel, args, sample);
+
+ if (ttrace->entry_str == NULL) {
+ ttrace->entry_str = malloc(trace__entry_str_size);
+ if (!ttrace->entry_str)
+ goto out_put;
+ }
+
+ if (!(trace->duration_filter || trace->summary_only || trace->min_stack))
+ trace__printf_interrupted_entry(trace);
+
+ ttrace->entry_time = sample->time;
+ msg = ttrace->entry_str;
+ printed += scnprintf(msg + printed, trace__entry_str_size - printed, "%s(", sc->name);
+
+ printed += syscall__scnprintf_args(sc, msg + printed, trace__entry_str_size - printed,
+ args, trace, thread);
+
+ if (sc->is_exit) {
+ if (!(trace->duration_filter || trace->summary_only || trace->failure_only || trace->min_stack)) {
+ trace__fprintf_entry_head(trace, thread, 0, false, ttrace->entry_time, trace->output);
+ fprintf(trace->output, "%-70s)\n", ttrace->entry_str);
+ }
+ } else {
+ ttrace->entry_pending = true;
+ /* See trace__vfs_getname & trace__sys_exit */
+ ttrace->filename.pending_open = false;
+ }
+
+ if (trace->current != thread) {
+ thread__put(trace->current);
+ trace->current = thread__get(thread);
+ }
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static int trace__fprintf_sys_enter(struct trace *trace, struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ struct thread_trace *ttrace;
+ struct thread *thread;
+ int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
+ struct syscall *sc = trace__syscall_info(trace, evsel, id);
+ char msg[1024];
+ void *args;
+
+ if (sc == NULL)
+ return -1;
+
+ thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+ ttrace = thread__trace(thread, trace->output);
+ /*
+ * We need to get ttrace just to make sure it is there when syscall__scnprintf_args()
+ * and the rest of the beautifiers accessing it via struct syscall_arg touches it.
+ */
+ if (ttrace == NULL)
+ goto out_put;
+
+ args = perf_evsel__sc_tp_ptr(evsel, args, sample);
+ syscall__scnprintf_args(sc, msg, sizeof(msg), args, trace, thread);
+ fprintf(trace->output, "%s", msg);
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct callchain_cursor *cursor)
+{
+ struct addr_location al;
+ int max_stack = evsel->attr.sample_max_stack ?
+ evsel->attr.sample_max_stack :
+ trace->max_stack;
+
+ if (machine__resolve(trace->host, &al, sample) < 0 ||
+ thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, max_stack))
+ return -1;
+
+ return 0;
+}
+
+static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sample)
+{
+ /* TODO: user-configurable print_opts */
+ const unsigned int print_opts = EVSEL__PRINT_SYM |
+ EVSEL__PRINT_DSO |
+ EVSEL__PRINT_UNKNOWN_AS_ADDR;
+
+ return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output);
+}
+
+static const char *errno_to_name(struct perf_evsel *evsel, int err)
+{
+ struct perf_env *env = perf_evsel__env(evsel);
+ const char *arch_name = perf_env__arch(env);
+
+ return arch_syscalls__strerrno(arch_name, err);
+}
+
+static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample)
+{
+ long ret;
+ u64 duration = 0;
+ bool duration_calculated = false;
+ struct thread *thread;
+ int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0;
+ struct syscall *sc = trace__syscall_info(trace, evsel, id);
+ struct thread_trace *ttrace;
+
+ if (sc == NULL)
+ return -1;
+
+ thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+ ttrace = thread__trace(thread, trace->output);
+ if (ttrace == NULL)
+ goto out_put;
+
+ trace__fprintf_sample(trace, evsel, sample, thread);
+
+ if (trace->summary)
+ thread__update_stats(ttrace, id, sample);
+
+ ret = perf_evsel__sc_tp_uint(evsel, ret, sample);
+
+ if (sc->is_open && ret >= 0 && ttrace->filename.pending_open) {
+ trace__set_fd_pathname(thread, ret, ttrace->filename.name);
+ ttrace->filename.pending_open = false;
+ ++trace->stats.vfs_getname;
+ }
+
+ if (ttrace->entry_time) {
+ duration = sample->time - ttrace->entry_time;
+ if (trace__filter_duration(trace, duration))
+ goto out;
+ duration_calculated = true;
+ } else if (trace->duration_filter)
+ goto out;
+
+ if (sample->callchain) {
+ callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+ if (callchain_ret == 0) {
+ if (callchain_cursor.nr < trace->min_stack)
+ goto out;
+ callchain_ret = 1;
+ }
+ }
+
+ if (trace->summary_only || (ret >= 0 && trace->failure_only))
+ goto out;
+
+ trace__fprintf_entry_head(trace, thread, duration, duration_calculated, ttrace->entry_time, trace->output);
+
+ if (ttrace->entry_pending) {
+ fprintf(trace->output, "%-70s", ttrace->entry_str);
+ } else {
+ fprintf(trace->output, " ... [");
+ color_fprintf(trace->output, PERF_COLOR_YELLOW, "continued");
+ fprintf(trace->output, "]: %s()", sc->name);
+ }
+
+ if (sc->fmt == NULL) {
+ if (ret < 0)
+ goto errno_print;
+signed_print:
+ fprintf(trace->output, ") = %ld", ret);
+ } else if (ret < 0) {
+errno_print: {
+ char bf[STRERR_BUFSIZE];
+ const char *emsg = str_error_r(-ret, bf, sizeof(bf)),
+ *e = errno_to_name(evsel, -ret);
+
+ fprintf(trace->output, ") = -1 %s %s", e, emsg);
+ }
+ } else if (ret == 0 && sc->fmt->timeout)
+ fprintf(trace->output, ") = 0 Timeout");
+ else if (ttrace->ret_scnprintf) {
+ char bf[1024];
+ struct syscall_arg arg = {
+ .val = ret,
+ .thread = thread,
+ .trace = trace,
+ };
+ ttrace->ret_scnprintf(bf, sizeof(bf), &arg);
+ ttrace->ret_scnprintf = NULL;
+ fprintf(trace->output, ") = %s", bf);
+ } else if (sc->fmt->hexret)
+ fprintf(trace->output, ") = %#lx", ret);
+ else if (sc->fmt->errpid) {
+ struct thread *child = machine__find_thread(trace->host, ret, ret);
+
+ if (child != NULL) {
+ fprintf(trace->output, ") = %ld", ret);
+ if (child->comm_set)
+ fprintf(trace->output, " (%s)", thread__comm_str(child));
+ thread__put(child);
+ }
+ } else
+ goto signed_print;
+
+ fputc('\n', trace->output);
+
+ if (callchain_ret > 0)
+ trace__fprintf_callchain(trace, sample);
+ else if (callchain_ret < 0)
+ pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+out:
+ ttrace->entry_pending = false;
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+ struct thread_trace *ttrace;
+ size_t filename_len, entry_str_len, to_move;
+ ssize_t remaining_space;
+ char *pos;
+ const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
+
+ if (!thread)
+ goto out;
+
+ ttrace = thread__priv(thread);
+ if (!ttrace)
+ goto out_put;
+
+ filename_len = strlen(filename);
+ if (filename_len == 0)
+ goto out_put;
+
+ if (ttrace->filename.namelen < filename_len) {
+ char *f = realloc(ttrace->filename.name, filename_len + 1);
+
+ if (f == NULL)
+ goto out_put;
+
+ ttrace->filename.namelen = filename_len;
+ ttrace->filename.name = f;
+ }
+
+ strcpy(ttrace->filename.name, filename);
+ ttrace->filename.pending_open = true;
+
+ if (!ttrace->filename.ptr)
+ goto out_put;
+
+ entry_str_len = strlen(ttrace->entry_str);
+ remaining_space = trace__entry_str_size - entry_str_len - 1; /* \0 */
+ if (remaining_space <= 0)
+ goto out_put;
+
+ if (filename_len > (size_t)remaining_space) {
+ filename += filename_len - remaining_space;
+ filename_len = remaining_space;
+ }
+
+ to_move = entry_str_len - ttrace->filename.entry_str_pos + 1; /* \0 */
+ pos = ttrace->entry_str + ttrace->filename.entry_str_pos;
+ memmove(pos + filename_len, pos, to_move);
+ memcpy(pos, filename, filename_len);
+
+ ttrace->filename.ptr = 0;
+ ttrace->filename.entry_str_pos = 0;
+out_put:
+ thread__put(thread);
+out:
+ return 0;
+}
+
+static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evsel,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample)
+{
+ u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
+ double runtime_ms = (double)runtime / NSEC_PER_MSEC;
+ struct thread *thread = machine__findnew_thread(trace->host,
+ sample->pid,
+ sample->tid);
+ struct thread_trace *ttrace = thread__trace(thread, trace->output);
+
+ if (ttrace == NULL)
+ goto out_dump;
+
+ ttrace->runtime_ms += runtime_ms;
+ trace->runtime_ms += runtime_ms;
+out_put:
+ thread__put(thread);
+ return 0;
+
+out_dump:
+ fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
+ evsel->name,
+ perf_evsel__strval(evsel, sample, "comm"),
+ (pid_t)perf_evsel__intval(evsel, sample, "pid"),
+ runtime,
+ perf_evsel__intval(evsel, sample, "vruntime"));
+ goto out_put;
+}
+
+static int bpf_output__printer(enum binary_printer_ops op,
+ unsigned int val, void *extra __maybe_unused, FILE *fp)
+{
+ unsigned char ch = (unsigned char)val;
+
+ switch (op) {
+ case BINARY_PRINT_CHAR_DATA:
+ return fprintf(fp, "%c", isprint(ch) ? ch : '.');
+ case BINARY_PRINT_DATA_BEGIN:
+ case BINARY_PRINT_LINE_BEGIN:
+ case BINARY_PRINT_ADDR:
+ case BINARY_PRINT_NUM_DATA:
+ case BINARY_PRINT_NUM_PAD:
+ case BINARY_PRINT_SEP:
+ case BINARY_PRINT_CHAR_PAD:
+ case BINARY_PRINT_LINE_END:
+ case BINARY_PRINT_DATA_END:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void bpf_output__fprintf(struct trace *trace,
+ struct perf_sample *sample)
+{
+ binary__fprintf(sample->raw_data, sample->raw_size, 8,
+ bpf_output__printer, NULL, trace->output);
+}
+
+static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample)
+{
+ int callchain_ret = 0;
+
+ if (sample->callchain) {
+ callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+ if (callchain_ret == 0) {
+ if (callchain_cursor.nr < trace->min_stack)
+ goto out;
+ callchain_ret = 1;
+ }
+ }
+
+ trace__printf_interrupted_entry(trace);
+ trace__fprintf_tstamp(trace, sample->time, trace->output);
+
+ if (trace->trace_syscalls)
+ fprintf(trace->output, "( ): ");
+
+ fprintf(trace->output, "%s:", evsel->name);
+
+ if (perf_evsel__is_bpf_output(evsel)) {
+ if (evsel == trace->syscalls.events.augmented)
+ trace__fprintf_sys_enter(trace, evsel, sample);
+ else
+ bpf_output__fprintf(trace, sample);
+ } else if (evsel->tp_format) {
+ if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
+ trace__fprintf_sys_enter(trace, evsel, sample)) {
+ event_format__fprintf(evsel->tp_format, sample->cpu,
+ sample->raw_data, sample->raw_size,
+ trace->output);
+ }
+ }
+
+ fprintf(trace->output, "\n");
+
+ if (callchain_ret > 0)
+ trace__fprintf_callchain(trace, sample);
+ else if (callchain_ret < 0)
+ pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+out:
+ return 0;
+}
+
+static void print_location(FILE *f, struct perf_sample *sample,
+ struct addr_location *al,
+ bool print_dso, bool print_sym)
+{
+
+ if ((verbose > 0 || print_dso) && al->map)
+ fprintf(f, "%s@", al->map->dso->long_name);
+
+ if ((verbose > 0 || print_sym) && al->sym)
+ fprintf(f, "%s+0x%" PRIx64, al->sym->name,
+ al->addr - al->sym->start);
+ else if (al->map)
+ fprintf(f, "0x%" PRIx64, al->addr);
+ else
+ fprintf(f, "0x%" PRIx64, sample->addr);
+}
+
+static int trace__pgfault(struct trace *trace,
+ struct perf_evsel *evsel,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample)
+{
+ struct thread *thread;
+ struct addr_location al;
+ char map_type = 'd';
+ struct thread_trace *ttrace;
+ int err = -1;
+ int callchain_ret = 0;
+
+ thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+
+ if (sample->callchain) {
+ callchain_ret = trace__resolve_callchain(trace, evsel, sample, &callchain_cursor);
+ if (callchain_ret == 0) {
+ if (callchain_cursor.nr < trace->min_stack)
+ goto out_put;
+ callchain_ret = 1;
+ }
+ }
+
+ ttrace = thread__trace(thread, trace->output);
+ if (ttrace == NULL)
+ goto out_put;
+
+ if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
+ ttrace->pfmaj++;
+ else
+ ttrace->pfmin++;
+
+ if (trace->summary_only)
+ goto out;
+
+ thread__find_symbol(thread, sample->cpumode, sample->ip, &al);
+
+ trace__fprintf_entry_head(trace, thread, 0, true, sample->time, trace->output);
+
+ fprintf(trace->output, "%sfault [",
+ evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
+ "maj" : "min");
+
+ print_location(trace->output, sample, &al, false, true);
+
+ fprintf(trace->output, "] => ");
+
+ thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
+
+ if (!al.map) {
+ thread__find_symbol(thread, sample->cpumode, sample->addr, &al);
+
+ if (al.map)
+ map_type = 'x';
+ else
+ map_type = '?';
+ }
+
+ print_location(trace->output, sample, &al, true, false);
+
+ fprintf(trace->output, " (%c%c)\n", map_type, al.level);
+
+ if (callchain_ret > 0)
+ trace__fprintf_callchain(trace, sample);
+ else if (callchain_ret < 0)
+ pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+out:
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static void trace__set_base_time(struct trace *trace,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ /*
+ * BPF events were not setting PERF_SAMPLE_TIME, so be more robust
+ * and don't use sample->time unconditionally, we may end up having
+ * some other event in the future without PERF_SAMPLE_TIME for good
+ * reason, i.e. we may not be interested in its timestamps, just in
+ * it taking place, picking some piece of information when it
+ * appears in our event stream (vfs_getname comes to mind).
+ */
+ if (trace->base_time == 0 && !trace->full_time &&
+ (evsel->attr.sample_type & PERF_SAMPLE_TIME))
+ trace->base_time = sample->time;
+}
+
+static int trace__process_sample(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine __maybe_unused)
+{
+ struct trace *trace = container_of(tool, struct trace, tool);
+ struct thread *thread;
+ int err = 0;
+
+ tracepoint_handler handler = evsel->handler;
+
+ thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
+ if (thread && thread__is_filtered(thread))
+ goto out;
+
+ trace__set_base_time(trace, evsel, sample);
+
+ if (handler) {
+ ++trace->nr_events;
+ handler(trace, evsel, event, sample);
+ }
+out:
+ thread__put(thread);
+ return err;
+}
+
+static int trace__record(struct trace *trace, int argc, const char **argv)
+{
+ unsigned int rec_argc, i, j;
+ const char **rec_argv;
+ const char * const record_args[] = {
+ "record",
+ "-R",
+ "-m", "1024",
+ "-c", "1",
+ };
+
+ const char * const sc_args[] = { "-e", };
+ unsigned int sc_args_nr = ARRAY_SIZE(sc_args);
+ const char * const majpf_args[] = { "-e", "major-faults" };
+ unsigned int majpf_args_nr = ARRAY_SIZE(majpf_args);
+ const char * const minpf_args[] = { "-e", "minor-faults" };
+ unsigned int minpf_args_nr = ARRAY_SIZE(minpf_args);
+
+ /* +1 is for the event string below */
+ rec_argc = ARRAY_SIZE(record_args) + sc_args_nr + 1 +
+ majpf_args_nr + minpf_args_nr + argc;
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+
+ if (rec_argv == NULL)
+ return -ENOMEM;
+
+ j = 0;
+ for (i = 0; i < ARRAY_SIZE(record_args); i++)
+ rec_argv[j++] = record_args[i];
+
+ if (trace->trace_syscalls) {
+ for (i = 0; i < sc_args_nr; i++)
+ rec_argv[j++] = sc_args[i];
+
+ /* event string may be different for older kernels - e.g., RHEL6 */
+ if (is_valid_tracepoint("raw_syscalls:sys_enter"))
+ rec_argv[j++] = "raw_syscalls:sys_enter,raw_syscalls:sys_exit";
+ else if (is_valid_tracepoint("syscalls:sys_enter"))
+ rec_argv[j++] = "syscalls:sys_enter,syscalls:sys_exit";
+ else {
+ pr_err("Neither raw_syscalls nor syscalls events exist.\n");
+ free(rec_argv);
+ return -1;
+ }
+ }
+
+ if (trace->trace_pgfaults & TRACE_PFMAJ)
+ for (i = 0; i < majpf_args_nr; i++)
+ rec_argv[j++] = majpf_args[i];
+
+ if (trace->trace_pgfaults & TRACE_PFMIN)
+ for (i = 0; i < minpf_args_nr; i++)
+ rec_argv[j++] = minpf_args[i];
+
+ for (i = 0; i < (unsigned int)argc; i++)
+ rec_argv[j++] = argv[i];
+
+ return cmd_record(j, rec_argv);
+}
+
+static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
+
+static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
+{
+ bool found = false;
+ struct perf_evsel *evsel, *tmp;
+ struct parse_events_error err = { .idx = 0, };
+ int ret = parse_events(evlist, "probe:vfs_getname*", &err);
+
+ if (ret)
+ return false;
+
+ evlist__for_each_entry_safe(evlist, evsel, tmp) {
+ if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname"))
+ continue;
+
+ if (perf_evsel__field(evsel, "pathname")) {
+ evsel->handler = trace__vfs_getname;
+ found = true;
+ continue;
+ }
+
+ list_del_init(&evsel->node);
+ evsel->evlist = NULL;
+ perf_evsel__delete(evsel);
+ }
+
+ return found;
+}
+
+static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
+{
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .mmap_data = 1,
+ };
+
+ attr.config = config;
+ attr.sample_period = 1;
+
+ event_attr_init(&attr);
+
+ evsel = perf_evsel__new(&attr);
+ if (evsel)
+ evsel->handler = trace__pgfault;
+
+ return evsel;
+}
+
+static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample)
+{
+ const u32 type = event->header.type;
+ struct perf_evsel *evsel;
+
+ if (type != PERF_RECORD_SAMPLE) {
+ trace__process_event(trace, trace->host, event, sample);
+ return;
+ }
+
+ evsel = perf_evlist__id2evsel(trace->evlist, sample->id);
+ if (evsel == NULL) {
+ fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id);
+ return;
+ }
+
+ trace__set_base_time(trace, evsel, sample);
+
+ if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
+ sample->raw_data == NULL) {
+ fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
+ perf_evsel__name(evsel), sample->tid,
+ sample->cpu, sample->raw_size);
+ } else {
+ tracepoint_handler handler = evsel->handler;
+ handler(trace, evsel, event, sample);
+ }
+}
+
+static int trace__add_syscall_newtp(struct trace *trace)
+{
+ int ret = -1;
+ struct perf_evlist *evlist = trace->evlist;
+ struct perf_evsel *sys_enter, *sys_exit;
+
+ sys_enter = perf_evsel__raw_syscall_newtp("sys_enter", trace__sys_enter);
+ if (sys_enter == NULL)
+ goto out;
+
+ if (perf_evsel__init_sc_tp_ptr_field(sys_enter, args))
+ goto out_delete_sys_enter;
+
+ sys_exit = perf_evsel__raw_syscall_newtp("sys_exit", trace__sys_exit);
+ if (sys_exit == NULL)
+ goto out_delete_sys_enter;
+
+ if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
+ goto out_delete_sys_exit;
+
+ perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
+ perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
+
+ perf_evlist__add(evlist, sys_enter);
+ perf_evlist__add(evlist, sys_exit);
+
+ if (callchain_param.enabled && !trace->kernel_syscallchains) {
+ /*
+ * We're interested only in the user space callchain
+ * leading to the syscall, allow overriding that for
+ * debugging reasons using --kernel_syscall_callchains
+ */
+ sys_exit->attr.exclude_callchain_kernel = 1;
+ }
+
+ trace->syscalls.events.sys_enter = sys_enter;
+ trace->syscalls.events.sys_exit = sys_exit;
+
+ ret = 0;
+out:
+ return ret;
+
+out_delete_sys_exit:
+ perf_evsel__delete_priv(sys_exit);
+out_delete_sys_enter:
+ perf_evsel__delete_priv(sys_enter);
+ goto out;
+}
+
+static int trace__set_ev_qualifier_filter(struct trace *trace)
+{
+ int err = -1;
+ struct perf_evsel *sys_exit;
+ char *filter = asprintf_expr_inout_ints("id", !trace->not_ev_qualifier,
+ trace->ev_qualifier_ids.nr,
+ trace->ev_qualifier_ids.entries);
+
+ if (filter == NULL)
+ goto out_enomem;
+
+ if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
+ filter)) {
+ sys_exit = trace->syscalls.events.sys_exit;
+ err = perf_evsel__append_tp_filter(sys_exit, filter);
+ }
+
+ free(filter);
+out:
+ return err;
+out_enomem:
+ errno = ENOMEM;
+ goto out;
+}
+
+static int trace__set_filter_loop_pids(struct trace *trace)
+{
+ unsigned int nr = 1;
+ pid_t pids[32] = {
+ getpid(),
+ };
+ struct thread *thread = machine__find_thread(trace->host, pids[0], pids[0]);
+
+ while (thread && nr < ARRAY_SIZE(pids)) {
+ struct thread *parent = machine__find_thread(trace->host, thread->ppid, thread->ppid);
+
+ if (parent == NULL)
+ break;
+
+ if (!strcmp(thread__comm_str(parent), "sshd")) {
+ pids[nr++] = parent->tid;
+ break;
+ }
+ thread = parent;
+ }
+
+ return perf_evlist__set_filter_pids(trace->evlist, nr, pids);
+}
+
+static int trace__run(struct trace *trace, int argc, const char **argv)
+{
+ struct perf_evlist *evlist = trace->evlist;
+ struct perf_evsel *evsel, *pgfault_maj = NULL, *pgfault_min = NULL;
+ int err = -1, i;
+ unsigned long before;
+ const bool forks = argc > 0;
+ bool draining = false;
+
+ trace->live = true;
+
+ if (trace->trace_syscalls && trace__add_syscall_newtp(trace))
+ goto out_error_raw_syscalls;
+
+ if (trace->trace_syscalls)
+ trace->vfs_getname = perf_evlist__add_vfs_getname(evlist);
+
+ if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
+ pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
+ if (pgfault_maj == NULL)
+ goto out_error_mem;
+ perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
+ perf_evlist__add(evlist, pgfault_maj);
+ }
+
+ if ((trace->trace_pgfaults & TRACE_PFMIN)) {
+ pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
+ if (pgfault_min == NULL)
+ goto out_error_mem;
+ perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
+ perf_evlist__add(evlist, pgfault_min);
+ }
+
+ if (trace->sched &&
+ perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
+ trace__sched_stat_runtime))
+ goto out_error_sched_stat_runtime;
+
+ /*
+ * If a global cgroup was set, apply it to all the events without an
+ * explicit cgroup. I.e.:
+ *
+ * trace -G A -e sched:*switch
+ *
+ * Will set all raw_syscalls:sys_{enter,exit}, pgfault, vfs_getname, etc
+ * _and_ sched:sched_switch to the 'A' cgroup, while:
+ *
+ * trace -e sched:*switch -G A
+ *
+ * will only set the sched:sched_switch event to the 'A' cgroup, all the
+ * other events (raw_syscalls:sys_{enter,exit}, etc are left "without"
+ * a cgroup (on the root cgroup, sys wide, etc).
+ *
+ * Multiple cgroups:
+ *
+ * trace -G A -e sched:*switch -G B
+ *
+ * the syscall ones go to the 'A' cgroup, the sched:sched_switch goes
+ * to the 'B' cgroup.
+ *
+ * evlist__set_default_cgroup() grabs a reference of the passed cgroup
+ * only for the evsels still without a cgroup, i.e. evsel->cgroup == NULL.
+ */
+ if (trace->cgroup)
+ evlist__set_default_cgroup(trace->evlist, trace->cgroup);
+
+ err = perf_evlist__create_maps(evlist, &trace->opts.target);
+ if (err < 0) {
+ fprintf(trace->output, "Problems parsing the target to trace, check your options!\n");
+ goto out_delete_evlist;
+ }
+
+ err = trace__symbols_init(trace, evlist);
+ if (err < 0) {
+ fprintf(trace->output, "Problems initializing symbol libraries!\n");
+ goto out_delete_evlist;
+ }
+
+ perf_evlist__config(evlist, &trace->opts, &callchain_param);
+
+ signal(SIGCHLD, sig_handler);
+ signal(SIGINT, sig_handler);
+
+ if (forks) {
+ err = perf_evlist__prepare_workload(evlist, &trace->opts.target,
+ argv, false, NULL);
+ if (err < 0) {
+ fprintf(trace->output, "Couldn't run the workload!\n");
+ goto out_delete_evlist;
+ }
+ }
+
+ err = perf_evlist__open(evlist);
+ if (err < 0)
+ goto out_error_open;
+
+ err = bpf__apply_obj_config();
+ if (err) {
+ char errbuf[BUFSIZ];
+
+ bpf__strerror_apply_obj_config(err, errbuf, sizeof(errbuf));
+ pr_err("ERROR: Apply config to BPF failed: %s\n",
+ errbuf);
+ goto out_error_open;
+ }
+
+ /*
+ * Better not use !target__has_task() here because we need to cover the
+ * case where no threads were specified in the command line, but a
+ * workload was, and in that case we will fill in the thread_map when
+ * we fork the workload in perf_evlist__prepare_workload.
+ */
+ if (trace->filter_pids.nr > 0)
+ err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries);
+ else if (thread_map__pid(evlist->threads, 0) == -1)
+ err = trace__set_filter_loop_pids(trace);
+
+ if (err < 0)
+ goto out_error_mem;
+
+ if (trace->ev_qualifier_ids.nr > 0) {
+ err = trace__set_ev_qualifier_filter(trace);
+ if (err < 0)
+ goto out_errno;
+
+ pr_debug("event qualifier tracepoint filter: %s\n",
+ trace->syscalls.events.sys_exit->filter);
+ }
+
+ err = perf_evlist__apply_filters(evlist, &evsel);
+ if (err < 0)
+ goto out_error_apply_filters;
+
+ err = perf_evlist__mmap(evlist, trace->opts.mmap_pages);
+ if (err < 0)
+ goto out_error_mmap;
+
+ if (!target__none(&trace->opts.target) && !trace->opts.initial_delay)
+ perf_evlist__enable(evlist);
+
+ if (forks)
+ perf_evlist__start_workload(evlist);
+
+ if (trace->opts.initial_delay) {
+ usleep(trace->opts.initial_delay * 1000);
+ perf_evlist__enable(evlist);
+ }
+
+ trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 ||
+ evlist->threads->nr > 1 ||
+ perf_evlist__first(evlist)->attr.inherit;
+
+ /*
+ * Now that we already used evsel->attr to ask the kernel to setup the
+ * events, lets reuse evsel->attr.sample_max_stack as the limit in
+ * trace__resolve_callchain(), allowing per-event max-stack settings
+ * to override an explicitely set --max-stack global setting.
+ */
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel__has_callchain(evsel) &&
+ evsel->attr.sample_max_stack == 0)
+ evsel->attr.sample_max_stack = trace->max_stack;
+ }
+again:
+ before = trace->nr_events;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ union perf_event *event;
+ struct perf_mmap *md;
+
+ md = &evlist->mmap[i];
+ if (perf_mmap__read_init(md) < 0)
+ continue;
+
+ while ((event = perf_mmap__read_event(md)) != NULL) {
+ struct perf_sample sample;
+
+ ++trace->nr_events;
+
+ err = perf_evlist__parse_sample(evlist, event, &sample);
+ if (err) {
+ fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err);
+ goto next_event;
+ }
+
+ trace__handle_event(trace, event, &sample);
+next_event:
+ perf_mmap__consume(md);
+
+ if (interrupted)
+ goto out_disable;
+
+ if (done && !draining) {
+ perf_evlist__disable(evlist);
+ draining = true;
+ }
+ }
+ perf_mmap__read_done(md);
+ }
+
+ if (trace->nr_events == before) {
+ int timeout = done ? 100 : -1;
+
+ if (!draining && perf_evlist__poll(evlist, timeout) > 0) {
+ if (perf_evlist__filter_pollfd(evlist, POLLERR | POLLHUP) == 0)
+ draining = true;
+
+ goto again;
+ }
+ } else {
+ goto again;
+ }
+
+out_disable:
+ thread__zput(trace->current);
+
+ perf_evlist__disable(evlist);
+
+ if (!err) {
+ if (trace->summary)
+ trace__fprintf_thread_summary(trace, trace->output);
+
+ if (trace->show_tool_stats) {
+ fprintf(trace->output, "Stats:\n "
+ " vfs_getname : %" PRIu64 "\n"
+ " proc_getname: %" PRIu64 "\n",
+ trace->stats.vfs_getname,
+ trace->stats.proc_getname);
+ }
+ }
+
+out_delete_evlist:
+ trace__symbols__exit(trace);
+
+ perf_evlist__delete(evlist);
+ cgroup__put(trace->cgroup);
+ trace->evlist = NULL;
+ trace->live = false;
+ return err;
+{
+ char errbuf[BUFSIZ];
+
+out_error_sched_stat_runtime:
+ tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "sched", "sched_stat_runtime");
+ goto out_error;
+
+out_error_raw_syscalls:
+ tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "raw_syscalls", "sys_(enter|exit)");
+ goto out_error;
+
+out_error_mmap:
+ perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
+ goto out_error;
+
+out_error_open:
+ perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+
+out_error:
+ fprintf(trace->output, "%s\n", errbuf);
+ goto out_delete_evlist;
+
+out_error_apply_filters:
+ fprintf(trace->output,
+ "Failed to set filter \"%s\" on event %s with %d (%s)\n",
+ evsel->filter, perf_evsel__name(evsel), errno,
+ str_error_r(errno, errbuf, sizeof(errbuf)));
+ goto out_delete_evlist;
+}
+out_error_mem:
+ fprintf(trace->output, "Not enough memory to run!\n");
+ goto out_delete_evlist;
+
+out_errno:
+ fprintf(trace->output, "errno=%d,%s\n", errno, strerror(errno));
+ goto out_delete_evlist;
+}
+
+static int trace__replay(struct trace *trace)
+{
+ const struct perf_evsel_str_handler handlers[] = {
+ { "probe:vfs_getname", trace__vfs_getname, },
+ };
+ struct perf_data data = {
+ .file = {
+ .path = input_name,
+ },
+ .mode = PERF_DATA_MODE_READ,
+ .force = trace->force,
+ };
+ struct perf_session *session;
+ struct perf_evsel *evsel;
+ int err = -1;
+
+ trace->tool.sample = trace__process_sample;
+ trace->tool.mmap = perf_event__process_mmap;
+ trace->tool.mmap2 = perf_event__process_mmap2;
+ trace->tool.comm = perf_event__process_comm;
+ trace->tool.exit = perf_event__process_exit;
+ trace->tool.fork = perf_event__process_fork;
+ trace->tool.attr = perf_event__process_attr;
+ trace->tool.tracing_data = perf_event__process_tracing_data;
+ trace->tool.build_id = perf_event__process_build_id;
+ trace->tool.namespaces = perf_event__process_namespaces;
+
+ trace->tool.ordered_events = true;
+ trace->tool.ordering_requires_timestamps = true;
+
+ /* add tid to output */
+ trace->multiple_threads = true;
+
+ session = perf_session__new(&data, false, &trace->tool);
+ if (session == NULL)
+ return -1;
+
+ if (trace->opts.target.pid)
+ symbol_conf.pid_list_str = strdup(trace->opts.target.pid);
+
+ if (trace->opts.target.tid)
+ symbol_conf.tid_list_str = strdup(trace->opts.target.tid);
+
+ if (symbol__init(&session->header.env) < 0)
+ goto out;
+
+ trace->host = &session->machines.host;
+
+ err = perf_session__set_tracepoints_handlers(session, handlers);
+ if (err)
+ goto out;
+
+ evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
+ "raw_syscalls:sys_enter");
+ /* older kernels have syscalls tp versus raw_syscalls */
+ if (evsel == NULL)
+ evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
+ "syscalls:sys_enter");
+
+ if (evsel &&
+ (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
+ perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
+ pr_err("Error during initialize raw_syscalls:sys_enter event\n");
+ goto out;
+ }
+
+ evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
+ "raw_syscalls:sys_exit");
+ if (evsel == NULL)
+ evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
+ "syscalls:sys_exit");
+ if (evsel &&
+ (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
+ perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
+ pr_err("Error during initialize raw_syscalls:sys_exit event\n");
+ goto out;
+ }
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->attr.type == PERF_TYPE_SOFTWARE &&
+ (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ||
+ evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
+ evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS))
+ evsel->handler = trace__pgfault;
+ }
+
+ setup_pager();
+
+ err = perf_session__process_events(session);
+ if (err)
+ pr_err("Failed to process events, error %d", err);
+
+ else if (trace->summary)
+ trace__fprintf_thread_summary(trace, trace->output);
+
+out:
+ perf_session__delete(session);
+
+ return err;
+}
+
+static size_t trace__fprintf_threads_header(FILE *fp)
+{
+ size_t printed;
+
+ printed = fprintf(fp, "\n Summary of events:\n\n");
+
+ return printed;
+}
+
+DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs,
+ struct stats *stats;
+ double msecs;
+ int syscall;
+)
+{
+ struct int_node *source = rb_entry(nd, struct int_node, rb_node);
+ struct stats *stats = source->priv;
+
+ entry->syscall = source->i;
+ entry->stats = stats;
+ entry->msecs = stats ? (u64)stats->n * (avg_stats(stats) / NSEC_PER_MSEC) : 0;
+}
+
+static size_t thread__dump_stats(struct thread_trace *ttrace,
+ struct trace *trace, FILE *fp)
+{
+ size_t printed = 0;
+ struct syscall *sc;
+ struct rb_node *nd;
+ DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats);
+
+ if (syscall_stats == NULL)
+ return 0;
+
+ printed += fprintf(fp, "\n");
+
+ printed += fprintf(fp, " syscall calls total min avg max stddev\n");
+ printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n");
+ printed += fprintf(fp, " --------------- -------- --------- --------- --------- --------- ------\n");
+
+ resort_rb__for_each_entry(nd, syscall_stats) {
+ struct stats *stats = syscall_stats_entry->stats;
+ if (stats) {
+ double min = (double)(stats->min) / NSEC_PER_MSEC;
+ double max = (double)(stats->max) / NSEC_PER_MSEC;
+ double avg = avg_stats(stats);
+ double pct;
+ u64 n = (u64) stats->n;
+
+ pct = avg ? 100.0 * stddev_stats(stats)/avg : 0.0;
+ avg /= NSEC_PER_MSEC;
+
+ sc = &trace->syscalls.table[syscall_stats_entry->syscall];
+ printed += fprintf(fp, " %-15s", sc->name);
+ printed += fprintf(fp, " %8" PRIu64 " %9.3f %9.3f %9.3f",
+ n, syscall_stats_entry->msecs, min, avg);
+ printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct);
+ }
+ }
+
+ resort_rb__delete(syscall_stats);
+ printed += fprintf(fp, "\n\n");
+
+ return printed;
+}
+
+static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace)
+{
+ size_t printed = 0;
+ struct thread_trace *ttrace = thread__priv(thread);
+ double ratio;
+
+ if (ttrace == NULL)
+ return 0;
+
+ ratio = (double)ttrace->nr_events / trace->nr_events * 100.0;
+
+ printed += fprintf(fp, " %s (%d), ", thread__comm_str(thread), thread->tid);
+ printed += fprintf(fp, "%lu events, ", ttrace->nr_events);
+ printed += fprintf(fp, "%.1f%%", ratio);
+ if (ttrace->pfmaj)
+ printed += fprintf(fp, ", %lu majfaults", ttrace->pfmaj);
+ if (ttrace->pfmin)
+ printed += fprintf(fp, ", %lu minfaults", ttrace->pfmin);
+ if (trace->sched)
+ printed += fprintf(fp, ", %.3f msec\n", ttrace->runtime_ms);
+ else if (fputc('\n', fp) != EOF)
+ ++printed;
+
+ printed += thread__dump_stats(ttrace, trace, fp);
+
+ return printed;
+}
+
+static unsigned long thread__nr_events(struct thread_trace *ttrace)
+{
+ return ttrace ? ttrace->nr_events : 0;
+}
+
+DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_events(b->thread->priv)),
+ struct thread *thread;
+)
+{
+ entry->thread = rb_entry(nd, struct thread, rb_node);
+}
+
+static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
+{
+ size_t printed = trace__fprintf_threads_header(fp);
+ struct rb_node *nd;
+ int i;
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);
+
+ if (threads == NULL) {
+ fprintf(fp, "%s", "Error sorting output by nr_events!\n");
+ return 0;
+ }
+
+ resort_rb__for_each_entry(nd, threads)
+ printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
+
+ resort_rb__delete(threads);
+ }
+ return printed;
+}
+
+static int trace__set_duration(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct trace *trace = opt->value;
+
+ trace->duration_filter = atof(str);
+ return 0;
+}
+
+static int trace__set_filter_pids(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ int ret = -1;
+ size_t i;
+ struct trace *trace = opt->value;
+ /*
+ * FIXME: introduce a intarray class, plain parse csv and create a
+ * { int nr, int entries[] } struct...
+ */
+ struct intlist *list = intlist__new(str);
+
+ if (list == NULL)
+ return -1;
+
+ i = trace->filter_pids.nr = intlist__nr_entries(list) + 1;
+ trace->filter_pids.entries = calloc(i, sizeof(pid_t));
+
+ if (trace->filter_pids.entries == NULL)
+ goto out;
+
+ trace->filter_pids.entries[0] = getpid();
+
+ for (i = 1; i < trace->filter_pids.nr; ++i)
+ trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i;
+
+ intlist__delete(list);
+ ret = 0;
+out:
+ return ret;
+}
+
+static int trace__open_output(struct trace *trace, const char *filename)
+{
+ struct stat st;
+
+ if (!stat(filename, &st) && st.st_size) {
+ char oldname[PATH_MAX];
+
+ scnprintf(oldname, sizeof(oldname), "%s.old", filename);
+ unlink(oldname);
+ rename(filename, oldname);
+ }
+
+ trace->output = fopen(filename, "w");
+
+ return trace->output == NULL ? -errno : 0;
+}
+
+static int parse_pagefaults(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ int *trace_pgfaults = opt->value;
+
+ if (strcmp(str, "all") == 0)
+ *trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
+ else if (strcmp(str, "maj") == 0)
+ *trace_pgfaults |= TRACE_PFMAJ;
+ else if (strcmp(str, "min") == 0)
+ *trace_pgfaults |= TRACE_PFMIN;
+ else
+ return -1;
+
+ return 0;
+}
+
+static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel->handler = handler;
+}
+
+static int evlist__set_syscall_tp_fields(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->priv || !evsel->tp_format)
+ continue;
+
+ if (strcmp(evsel->tp_format->system, "syscalls"))
+ continue;
+
+ if (perf_evsel__init_syscall_tp(evsel))
+ return -1;
+
+ if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
+ struct syscall_tp *sc = evsel->priv;
+
+ if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64)))
+ return -1;
+ } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) {
+ struct syscall_tp *sc = evsel->priv;
+
+ if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * XXX: Hackish, just splitting the combined -e+--event (syscalls
+ * (raw_syscalls:{sys_{enter,exit}} + events (tracepoints, HW, SW, etc) to use
+ * existing facilities unchanged (trace->ev_qualifier + parse_options()).
+ *
+ * It'd be better to introduce a parse_options() variant that would return a
+ * list with the terms it didn't match to an event...
+ */
+static int trace__parse_events_option(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct trace *trace = (struct trace *)opt->value;
+ const char *s = str;
+ char *sep = NULL, *lists[2] = { NULL, NULL, };
+ int len = strlen(str) + 1, err = -1, list, idx;
+ char *strace_groups_dir = system_path(STRACE_GROUPS_DIR);
+ char group_name[PATH_MAX];
+
+ if (strace_groups_dir == NULL)
+ return -1;
+
+ if (*s == '!') {
+ ++s;
+ trace->not_ev_qualifier = true;
+ }
+
+ while (1) {
+ if ((sep = strchr(s, ',')) != NULL)
+ *sep = '\0';
+
+ list = 0;
+ if (syscalltbl__id(trace->sctbl, s) >= 0 ||
+ syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) {
+ list = 1;
+ } else {
+ path__join(group_name, sizeof(group_name), strace_groups_dir, s);
+ if (access(group_name, R_OK) == 0)
+ list = 1;
+ }
+
+ if (lists[list]) {
+ sprintf(lists[list] + strlen(lists[list]), ",%s", s);
+ } else {
+ lists[list] = malloc(len);
+ if (lists[list] == NULL)
+ goto out;
+ strcpy(lists[list], s);
+ }
+
+ if (!sep)
+ break;
+
+ *sep = ',';
+ s = sep + 1;
+ }
+
+ if (lists[1] != NULL) {
+ struct strlist_config slist_config = {
+ .dirname = strace_groups_dir,
+ };
+
+ trace->ev_qualifier = strlist__new(lists[1], &slist_config);
+ if (trace->ev_qualifier == NULL) {
+ fputs("Not enough memory to parse event qualifier", trace->output);
+ goto out;
+ }
+
+ if (trace__validate_ev_qualifier(trace))
+ goto out;
+ trace->trace_syscalls = true;
+ }
+
+ err = 0;
+
+ if (lists[0]) {
+ struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
+ "event selector. use 'perf list' to list available events",
+ parse_events_option);
+ err = parse_events_option(&o, lists[0], 0);
+ }
+out:
+ if (sep)
+ *sep = ',';
+
+ return err;
+}
+
+static int trace__parse_cgroups(const struct option *opt, const char *str, int unset)
+{
+ struct trace *trace = opt->value;
+
+ if (!list_empty(&trace->evlist->entries))
+ return parse_cgroups(opt, str, unset);
+
+ trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
+
+ return 0;
+}
+
+int cmd_trace(int argc, const char **argv)
+{
+ const char *trace_usage[] = {
+ "perf trace [<options>] [<command>]",
+ "perf trace [<options>] -- <command> [<options>]",
+ "perf trace record [<options>] [<command>]",
+ "perf trace record [<options>] -- <command> [<options>]",
+ NULL
+ };
+ struct trace trace = {
+ .syscalls = {
+ . max = -1,
+ },
+ .opts = {
+ .target = {
+ .uid = UINT_MAX,
+ .uses_mmap = true,
+ },
+ .user_freq = UINT_MAX,
+ .user_interval = ULLONG_MAX,
+ .no_buffering = true,
+ .mmap_pages = UINT_MAX,
+ .proc_map_timeout = 500,
+ },
+ .output = stderr,
+ .show_comm = true,
+ .trace_syscalls = false,
+ .kernel_syscallchains = false,
+ .max_stack = UINT_MAX,
+ };
+ const char *output_name = NULL;
+ const struct option trace_options[] = {
+ OPT_CALLBACK('e', "event", &trace, "event",
+ "event/syscall selector. use 'perf list' to list available events",
+ trace__parse_events_option),
+ OPT_BOOLEAN(0, "comm", &trace.show_comm,
+ "show the thread COMM next to its id"),
+ OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
+ OPT_CALLBACK(0, "expr", &trace, "expr", "list of syscalls/events to trace",
+ trace__parse_events_option),
+ OPT_STRING('o', "output", &output_name, "file", "output file name"),
+ OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
+ OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",
+ "trace events on existing process id"),
+ OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
+ "trace events on existing thread id"),
+ OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
+ "pids to filter (by the kernel)", trace__set_filter_pids),
+ OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
+ "system-wide collection from all CPUs"),
+ OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",
+ "list of cpus to monitor"),
+ OPT_BOOLEAN(0, "no-inherit", &trace.opts.no_inherit,
+ "child tasks do not inherit counters"),
+ OPT_CALLBACK('m', "mmap-pages", &trace.opts.mmap_pages, "pages",
+ "number of mmap data pages",
+ perf_evlist__parse_mmap_pages),
+ OPT_STRING('u', "uid", &trace.opts.target.uid_str, "user",
+ "user to profile"),
+ OPT_CALLBACK(0, "duration", &trace, "float",
+ "show only events with duration > N.M ms",
+ trace__set_duration),
+ OPT_BOOLEAN(0, "sched", &trace.sched, "show blocking scheduler events"),
+ OPT_INCR('v', "verbose", &verbose, "be more verbose"),
+ OPT_BOOLEAN('T', "time", &trace.full_time,
+ "Show full timestamp, not time relative to first start"),
+ OPT_BOOLEAN(0, "failure", &trace.failure_only,
+ "Show only syscalls that failed"),
+ OPT_BOOLEAN('s', "summary", &trace.summary_only,
+ "Show only syscall summary with statistics"),
+ OPT_BOOLEAN('S', "with-summary", &trace.summary,
+ "Show all syscalls and summary with statistics"),
+ OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
+ "Trace pagefaults", parse_pagefaults, "maj"),
+ OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
+ OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
+ OPT_CALLBACK(0, "call-graph", &trace.opts,
+ "record_mode[,record_size]", record_callchain_help,
+ &record_parse_callchain_opt),
+ OPT_BOOLEAN(0, "kernel-syscall-graph", &trace.kernel_syscallchains,
+ "Show the kernel callchains on the syscall exit path"),
+ OPT_UINTEGER(0, "min-stack", &trace.min_stack,
+ "Set the minimum stack depth when parsing the callchain, "
+ "anything below the specified depth will be ignored."),
+ OPT_UINTEGER(0, "max-stack", &trace.max_stack,
+ "Set the maximum stack depth when parsing the callchain, "
+ "anything beyond the specified depth will be ignored. "
+ "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
+ OPT_BOOLEAN(0, "print-sample", &trace.print_sample,
+ "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"),
+ OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
+ "per thread proc mmap processing timeout in ms"),
+ OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
+ trace__parse_cgroups),
+ OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
+ "ms to wait before starting measurement after program "
+ "start"),
+ OPT_END()
+ };
+ bool __maybe_unused max_stack_user_set = true;
+ bool mmap_pages_user_set = true;
+ struct perf_evsel *evsel;
+ const char * const trace_subcommands[] = { "record", NULL };
+ int err = -1;
+ char bf[BUFSIZ];
+
+ signal(SIGSEGV, sighandler_dump_stack);
+ signal(SIGFPE, sighandler_dump_stack);
+
+ trace.evlist = perf_evlist__new();
+ trace.sctbl = syscalltbl__new();
+
+ if (trace.evlist == NULL || trace.sctbl == NULL) {
+ pr_err("Not enough memory to run!\n");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands,
+ trace_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if ((nr_cgroups || trace.cgroup) && !trace.opts.target.system_wide) {
+ usage_with_options_msg(trace_usage, trace_options,
+ "cgroup monitoring only available in system-wide mode");
+ }
+
+ evsel = bpf__setup_output_event(trace.evlist, "__augmented_syscalls__");
+ if (IS_ERR(evsel)) {
+ bpf__strerror_setup_output_event(trace.evlist, PTR_ERR(evsel), bf, sizeof(bf));
+ pr_err("ERROR: Setup trace syscalls enter failed: %s\n", bf);
+ goto out;
+ }
+
+ if (evsel) {
+ if (perf_evsel__init_augmented_syscall_tp(evsel) ||
+ perf_evsel__init_augmented_syscall_tp_args(evsel))
+ goto out;
+ trace.syscalls.events.augmented = evsel;
+ }
+
+ err = bpf__setup_stdout(trace.evlist);
+ if (err) {
+ bpf__strerror_setup_stdout(trace.evlist, err, bf, sizeof(bf));
+ pr_err("ERROR: Setup BPF stdout failed: %s\n", bf);
+ goto out;
+ }
+
+ err = -1;
+
+ if (trace.trace_pgfaults) {
+ trace.opts.sample_address = true;
+ trace.opts.sample_time = true;
+ }
+
+ if (trace.opts.mmap_pages == UINT_MAX)
+ mmap_pages_user_set = false;
+
+ if (trace.max_stack == UINT_MAX) {
+ trace.max_stack = input_name ? PERF_MAX_STACK_DEPTH : sysctl__max_stack();
+ max_stack_user_set = false;
+ }
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+ if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) {
+ record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false);
+ }
+#endif
+
+ if (callchain_param.enabled) {
+ if (!mmap_pages_user_set && geteuid() == 0)
+ trace.opts.mmap_pages = perf_event_mlock_kb_in_pages() * 4;
+
+ symbol_conf.use_callchain = true;
+ }
+
+ if (trace.evlist->nr_entries > 0) {
+ evlist__set_evsel_handler(trace.evlist, trace__event_handler);
+ if (evlist__set_syscall_tp_fields(trace.evlist)) {
+ perror("failed to set syscalls:* tracepoint fields");
+ goto out;
+ }
+ }
+
+ if ((argc >= 1) && (strcmp(argv[0], "record") == 0))
+ return trace__record(&trace, argc-1, &argv[1]);
+
+ /* summary_only implies summary option, but don't overwrite summary if set */
+ if (trace.summary_only)
+ trace.summary = trace.summary_only;
+
+ if (!trace.trace_syscalls && !trace.trace_pgfaults &&
+ trace.evlist->nr_entries == 0 /* Was --events used? */) {
+ trace.trace_syscalls = true;
+ }
+
+ if (output_name != NULL) {
+ err = trace__open_output(&trace, output_name);
+ if (err < 0) {
+ perror("failed to create output file");
+ goto out;
+ }
+ }
+
+ err = target__validate(&trace.opts.target);
+ if (err) {
+ target__strerror(&trace.opts.target, err, bf, sizeof(bf));
+ fprintf(trace.output, "%s", bf);
+ goto out_close;
+ }
+
+ err = target__parse_uid(&trace.opts.target);
+ if (err) {
+ target__strerror(&trace.opts.target, err, bf, sizeof(bf));
+ fprintf(trace.output, "%s", bf);
+ goto out_close;
+ }
+
+ if (!argc && target__none(&trace.opts.target))
+ trace.opts.target.system_wide = true;
+
+ if (input_name)
+ err = trace__replay(&trace);
+ else
+ err = trace__run(&trace, argc, argv);
+
+out_close:
+ if (output_name != NULL)
+ fclose(trace.output);
+out:
+ return err;
+}
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
new file mode 100644
index 000000000..b02c96104
--- /dev/null
+++ b/tools/perf/builtin-version.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "builtin.h"
+#include "perf.h"
+#include "color.h"
+#include <linux/compiler.h>
+#include <tools/config.h>
+#include <stdio.h>
+#include <string.h>
+#include <subcmd/parse-options.h>
+
+int version_verbose;
+
+struct version {
+ bool build_options;
+};
+
+static struct version version;
+
+static struct option version_options[] = {
+ OPT_BOOLEAN(0, "build-options", &version.build_options,
+ "display the build options"),
+ OPT_END(),
+};
+
+static const char * const version_usage[] = {
+ "perf version [<options>]",
+ NULL
+};
+
+static void on_off_print(const char *status)
+{
+ printf("[ ");
+
+ if (!strcmp(status, "OFF"))
+ color_fprintf(stdout, PERF_COLOR_RED, "%-3s", status);
+ else
+ color_fprintf(stdout, PERF_COLOR_GREEN, "%-3s", status);
+
+ printf(" ]");
+}
+
+static void status_print(const char *name, const char *macro,
+ const char *status)
+{
+ printf("%22s: ", name);
+ on_off_print(status);
+ printf(" # %s\n", macro);
+}
+
+#define STATUS(__d, __m) \
+do { \
+ if (IS_BUILTIN(__d)) \
+ status_print(#__m, #__d, "on"); \
+ else \
+ status_print(#__m, #__d, "OFF"); \
+} while (0)
+
+static void library_status(void)
+{
+ STATUS(HAVE_DWARF_SUPPORT, dwarf);
+ STATUS(HAVE_DWARF_GETLOCATIONS_SUPPORT, dwarf_getlocations);
+ STATUS(HAVE_GLIBC_SUPPORT, glibc);
+ STATUS(HAVE_GTK2_SUPPORT, gtk2);
+#ifndef HAVE_SYSCALL_TABLE_SUPPORT
+ STATUS(HAVE_LIBAUDIT_SUPPORT, libaudit);
+#endif
+ STATUS(HAVE_SYSCALL_TABLE_SUPPORT, syscall_table);
+ STATUS(HAVE_LIBBFD_SUPPORT, libbfd);
+ STATUS(HAVE_LIBELF_SUPPORT, libelf);
+ STATUS(HAVE_LIBNUMA_SUPPORT, libnuma);
+ STATUS(HAVE_LIBNUMA_SUPPORT, numa_num_possible_cpus);
+ STATUS(HAVE_LIBPERL_SUPPORT, libperl);
+ STATUS(HAVE_LIBPYTHON_SUPPORT, libpython);
+ STATUS(HAVE_SLANG_SUPPORT, libslang);
+ STATUS(HAVE_LIBCRYPTO_SUPPORT, libcrypto);
+ STATUS(HAVE_LIBUNWIND_SUPPORT, libunwind);
+ STATUS(HAVE_DWARF_SUPPORT, libdw-dwarf-unwind);
+ STATUS(HAVE_ZLIB_SUPPORT, zlib);
+ STATUS(HAVE_LZMA_SUPPORT, lzma);
+ STATUS(HAVE_AUXTRACE_SUPPORT, get_cpuid);
+ STATUS(HAVE_LIBBPF_SUPPORT, bpf);
+}
+
+int cmd_version(int argc, const char **argv)
+{
+ argc = parse_options(argc, argv, version_options, version_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
+ printf("perf version %s\n", perf_version_string);
+
+ if (version.build_options || version_verbose == 1)
+ library_status();
+
+ return 0;
+}
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
new file mode 100644
index 000000000..05745f3ce
--- /dev/null
+++ b/tools/perf/builtin.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BUILTIN_H
+#define BUILTIN_H
+
+#include "util/util.h"
+
+extern const char perf_usage_string[];
+extern const char perf_more_info_string[];
+
+void list_common_cmds_help(void);
+const char *help_unknown_cmd(const char *cmd);
+
+int cmd_annotate(int argc, const char **argv);
+int cmd_bench(int argc, const char **argv);
+int cmd_buildid_cache(int argc, const char **argv);
+int cmd_buildid_list(int argc, const char **argv);
+int cmd_config(int argc, const char **argv);
+int cmd_c2c(int argc, const char **argv);
+int cmd_diff(int argc, const char **argv);
+int cmd_evlist(int argc, const char **argv);
+int cmd_help(int argc, const char **argv);
+int cmd_sched(int argc, const char **argv);
+int cmd_kallsyms(int argc, const char **argv);
+int cmd_list(int argc, const char **argv);
+int cmd_record(int argc, const char **argv);
+int cmd_report(int argc, const char **argv);
+int cmd_stat(int argc, const char **argv);
+int cmd_timechart(int argc, const char **argv);
+int cmd_top(int argc, const char **argv);
+int cmd_script(int argc, const char **argv);
+int cmd_version(int argc, const char **argv);
+int cmd_probe(int argc, const char **argv);
+int cmd_kmem(int argc, const char **argv);
+int cmd_lock(int argc, const char **argv);
+int cmd_kvm(int argc, const char **argv);
+int cmd_test(int argc, const char **argv);
+int cmd_trace(int argc, const char **argv);
+int cmd_inject(int argc, const char **argv);
+int cmd_mem(int argc, const char **argv);
+int cmd_data(int argc, const char **argv);
+int cmd_ftrace(int argc, const char **argv);
+
+int find_scripts(char **scripts_array, char **scripts_path_array);
+#endif
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
new file mode 100755
index 000000000..c72cc73a6
--- /dev/null
+++ b/tools/perf/check-headers.sh
@@ -0,0 +1,108 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+HEADERS='
+include/uapi/drm/drm.h
+include/uapi/drm/i915_drm.h
+include/uapi/linux/fcntl.h
+include/uapi/linux/kcmp.h
+include/uapi/linux/kvm.h
+include/uapi/linux/in.h
+include/uapi/linux/perf_event.h
+include/uapi/linux/prctl.h
+include/uapi/linux/sched.h
+include/uapi/linux/stat.h
+include/uapi/linux/vhost.h
+include/uapi/sound/asound.h
+include/linux/bits.h
+include/linux/hash.h
+include/uapi/linux/hw_breakpoint.h
+arch/x86/include/asm/disabled-features.h
+arch/x86/include/asm/required-features.h
+arch/x86/include/asm/cpufeatures.h
+arch/arm/include/uapi/asm/perf_regs.h
+arch/arm64/include/uapi/asm/perf_regs.h
+arch/powerpc/include/uapi/asm/perf_regs.h
+arch/s390/include/uapi/asm/perf_regs.h
+arch/x86/include/uapi/asm/perf_regs.h
+arch/x86/include/uapi/asm/kvm.h
+arch/x86/include/uapi/asm/kvm_perf.h
+arch/x86/include/uapi/asm/svm.h
+arch/x86/include/uapi/asm/unistd.h
+arch/x86/include/uapi/asm/vmx.h
+arch/powerpc/include/uapi/asm/kvm.h
+arch/s390/include/uapi/asm/kvm.h
+arch/s390/include/uapi/asm/kvm_perf.h
+arch/s390/include/uapi/asm/ptrace.h
+arch/s390/include/uapi/asm/sie.h
+arch/arm/include/uapi/asm/kvm.h
+arch/arm64/include/uapi/asm/kvm.h
+arch/arm64/include/uapi/asm/unistd.h
+arch/alpha/include/uapi/asm/errno.h
+arch/mips/include/asm/errno.h
+arch/mips/include/uapi/asm/errno.h
+arch/parisc/include/uapi/asm/errno.h
+arch/powerpc/include/uapi/asm/errno.h
+arch/sparc/include/uapi/asm/errno.h
+arch/x86/include/uapi/asm/errno.h
+arch/powerpc/include/uapi/asm/unistd.h
+include/asm-generic/bitops/arch_hweight.h
+include/asm-generic/bitops/const_hweight.h
+include/asm-generic/bitops/__fls.h
+include/asm-generic/bitops/fls.h
+include/asm-generic/bitops/fls64.h
+include/linux/coresight-pmu.h
+include/uapi/asm-generic/errno.h
+include/uapi/asm-generic/errno-base.h
+include/uapi/asm-generic/ioctls.h
+include/uapi/asm-generic/mman-common.h
+include/uapi/asm-generic/unistd.h
+'
+
+check_2 () {
+ file1=$1
+ file2=$2
+
+ shift
+ shift
+
+ cmd="diff $* $file1 $file2 > /dev/null"
+
+ test -f $file2 && {
+ eval $cmd || {
+ echo "Warning: Kernel ABI header at '$file1' differs from latest version at '$file2'" >&2
+ echo diff -u $file1 $file2
+ }
+ }
+}
+
+check () {
+ file=$1
+
+ shift
+
+ check_2 tools/$file $file $*
+}
+
+# Check if we have the kernel headers (tools/perf/../../include), else
+# we're probably on a detached tarball, so no point in trying to check
+# differences.
+test -d ../../include || exit 0
+
+cd ../..
+
+# simple diff check
+for i in $HEADERS; do
+ check $i -B
+done
+
+# diff with extra ignore lines
+check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
+check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>"'
+check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common.h>"'
+check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
+
+# diff non-symmetric files
+check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
+
+cd tools/perf
diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt
new file mode 100644
index 000000000..2d0caf20f
--- /dev/null
+++ b/tools/perf/command-list.txt
@@ -0,0 +1,32 @@
+#
+# List of known perf commands.
+# command name category [deprecated] [common]
+#
+perf-annotate mainporcelain common
+perf-archive mainporcelain common
+perf-bench mainporcelain common
+perf-buildid-cache mainporcelain common
+perf-buildid-list mainporcelain common
+perf-data mainporcelain common
+perf-diff mainporcelain common
+perf-c2c mainporcelain common
+perf-config mainporcelain common
+perf-evlist mainporcelain common
+perf-ftrace mainporcelain common
+perf-inject mainporcelain common
+perf-kallsyms mainporcelain common
+perf-kmem mainporcelain common
+perf-kvm mainporcelain common
+perf-list mainporcelain common
+perf-lock mainporcelain common
+perf-mem mainporcelain common
+perf-probe mainporcelain full
+perf-record mainporcelain common
+perf-report mainporcelain common
+perf-sched mainporcelain common
+perf-script mainporcelain common
+perf-stat mainporcelain common
+perf-test mainporcelain common
+perf-timechart mainporcelain common
+perf-top mainporcelain common
+perf-trace mainporcelain audit
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
new file mode 100644
index 000000000..a28dca258
--- /dev/null
+++ b/tools/perf/design.txt
@@ -0,0 +1,462 @@
+
+Performance Counters for Linux
+------------------------------
+
+Performance counters are special hardware registers available on most modern
+CPUs. These registers count the number of certain types of hw events: such
+as instructions executed, cachemisses suffered, or branches mis-predicted -
+without slowing down the kernel or applications. These registers can also
+trigger interrupts when a threshold number of events have passed - and can
+thus be used to profile the code that runs on that CPU.
+
+The Linux Performance Counter subsystem provides an abstraction of these
+hardware capabilities. It provides per task and per CPU counters, counter
+groups, and it provides event capabilities on top of those. It
+provides "virtual" 64-bit counters, regardless of the width of the
+underlying hardware counters.
+
+Performance counters are accessed via special file descriptors.
+There's one file descriptor per virtual counter used.
+
+The special file descriptor is opened via the sys_perf_event_open()
+system call:
+
+ int sys_perf_event_open(struct perf_event_attr *hw_event_uptr,
+ pid_t pid, int cpu, int group_fd,
+ unsigned long flags);
+
+The syscall returns the new fd. The fd can be used via the normal
+VFS system calls: read() can be used to read the counter, fcntl()
+can be used to set the blocking mode, etc.
+
+Multiple counters can be kept open at a time, and the counters
+can be poll()ed.
+
+When creating a new counter fd, 'perf_event_attr' is:
+
+struct perf_event_attr {
+ /*
+ * The MSB of the config word signifies if the rest contains cpu
+ * specific (raw) counter configuration data, if unset, the next
+ * 7 bits are an event type and the rest of the bits are the event
+ * identifier.
+ */
+ __u64 config;
+
+ __u64 irq_period;
+ __u32 record_type;
+ __u32 read_format;
+
+ __u64 disabled : 1, /* off by default */
+ inherit : 1, /* children inherit it */
+ pinned : 1, /* must always be on PMU */
+ exclusive : 1, /* only group on PMU */
+ exclude_user : 1, /* don't count user */
+ exclude_kernel : 1, /* ditto kernel */
+ exclude_hv : 1, /* ditto hypervisor */
+ exclude_idle : 1, /* don't count when idle */
+ mmap : 1, /* include mmap data */
+ munmap : 1, /* include munmap data */
+ comm : 1, /* include comm data */
+
+ __reserved_1 : 52;
+
+ __u32 extra_config_len;
+ __u32 wakeup_events; /* wakeup every n events */
+
+ __u64 __reserved_2;
+ __u64 __reserved_3;
+};
+
+The 'config' field specifies what the counter should count. It
+is divided into 3 bit-fields:
+
+raw_type: 1 bit (most significant bit) 0x8000_0000_0000_0000
+type: 7 bits (next most significant) 0x7f00_0000_0000_0000
+event_id: 56 bits (least significant) 0x00ff_ffff_ffff_ffff
+
+If 'raw_type' is 1, then the counter will count a hardware event
+specified by the remaining 63 bits of event_config. The encoding is
+machine-specific.
+
+If 'raw_type' is 0, then the 'type' field says what kind of counter
+this is, with the following encoding:
+
+enum perf_type_id {
+ PERF_TYPE_HARDWARE = 0,
+ PERF_TYPE_SOFTWARE = 1,
+ PERF_TYPE_TRACEPOINT = 2,
+};
+
+A counter of PERF_TYPE_HARDWARE will count the hardware event
+specified by 'event_id':
+
+/*
+ * Generalized performance counter event types, used by the hw_event.event_id
+ * parameter of the sys_perf_event_open() syscall:
+ */
+enum perf_hw_id {
+ /*
+ * Common hardware events, generalized by the kernel:
+ */
+ PERF_COUNT_HW_CPU_CYCLES = 0,
+ PERF_COUNT_HW_INSTRUCTIONS = 1,
+ PERF_COUNT_HW_CACHE_REFERENCES = 2,
+ PERF_COUNT_HW_CACHE_MISSES = 3,
+ PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
+ PERF_COUNT_HW_BRANCH_MISSES = 5,
+ PERF_COUNT_HW_BUS_CYCLES = 6,
+};
+
+These are standardized types of events that work relatively uniformly
+on all CPUs that implement Performance Counters support under Linux,
+although there may be variations (e.g., different CPUs might count
+cache references and misses at different levels of the cache hierarchy).
+If a CPU is not able to count the selected event, then the system call
+will return -EINVAL.
+
+More hw_event_types are supported as well, but they are CPU-specific
+and accessed as raw events. For example, to count "External bus
+cycles while bus lock signal asserted" events on Intel Core CPUs, pass
+in a 0x4064 event_id value and set hw_event.raw_type to 1.
+
+A counter of type PERF_TYPE_SOFTWARE will count one of the available
+software events, selected by 'event_id':
+
+/*
+ * Special "software" counters provided by the kernel, even if the hardware
+ * does not support performance counters. These counters measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+ PERF_COUNT_SW_CPU_CLOCK = 0,
+ PERF_COUNT_SW_TASK_CLOCK = 1,
+ PERF_COUNT_SW_PAGE_FAULTS = 2,
+ PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
+ PERF_COUNT_SW_CPU_MIGRATIONS = 4,
+ PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
+ PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
+ PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
+ PERF_COUNT_SW_EMULATION_FAULTS = 8,
+};
+
+Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event
+tracer is available, and event_id values can be obtained from
+/debug/tracing/events/*/*/id
+
+
+Counters come in two flavours: counting counters and sampling
+counters. A "counting" counter is one that is used for counting the
+number of events that occur, and is characterised by having
+irq_period = 0.
+
+
+A read() on a counter returns the current value of the counter and possible
+additional values as specified by 'read_format', each value is a u64 (8 bytes)
+in size.
+
+/*
+ * Bits that can be set in hw_event.read_format to request that
+ * reads on the counter should return the indicated quantities,
+ * in increasing order of bit value, after the counter value.
+ */
+enum perf_event_read_format {
+ PERF_FORMAT_TOTAL_TIME_ENABLED = 1,
+ PERF_FORMAT_TOTAL_TIME_RUNNING = 2,
+};
+
+Using these additional values one can establish the overcommit ratio for a
+particular counter allowing one to take the round-robin scheduling effect
+into account.
+
+
+A "sampling" counter is one that is set up to generate an interrupt
+every N events, where N is given by 'irq_period'. A sampling counter
+has irq_period > 0. The record_type controls what data is recorded on each
+interrupt:
+
+/*
+ * Bits that can be set in hw_event.record_type to request information
+ * in the overflow packets.
+ */
+enum perf_event_record_format {
+ PERF_RECORD_IP = 1U << 0,
+ PERF_RECORD_TID = 1U << 1,
+ PERF_RECORD_TIME = 1U << 2,
+ PERF_RECORD_ADDR = 1U << 3,
+ PERF_RECORD_GROUP = 1U << 4,
+ PERF_RECORD_CALLCHAIN = 1U << 5,
+};
+
+Such (and other) events will be recorded in a ring-buffer, which is
+available to user-space using mmap() (see below).
+
+The 'disabled' bit specifies whether the counter starts out disabled
+or enabled. If it is initially disabled, it can be enabled by ioctl
+or prctl (see below).
+
+The 'inherit' bit, if set, specifies that this counter should count
+events on descendant tasks as well as the task specified. This only
+applies to new descendents, not to any existing descendents at the
+time the counter is created (nor to any new descendents of existing
+descendents).
+
+The 'pinned' bit, if set, specifies that the counter should always be
+on the CPU if at all possible. It only applies to hardware counters
+and only to group leaders. If a pinned counter cannot be put onto the
+CPU (e.g. because there are not enough hardware counters or because of
+a conflict with some other event), then the counter goes into an
+'error' state, where reads return end-of-file (i.e. read() returns 0)
+until the counter is subsequently enabled or disabled.
+
+The 'exclusive' bit, if set, specifies that when this counter's group
+is on the CPU, it should be the only group using the CPU's counters.
+In future, this will allow sophisticated monitoring programs to supply
+extra configuration information via 'extra_config_len' to exploit
+advanced features of the CPU's Performance Monitor Unit (PMU) that are
+not otherwise accessible and that might disrupt other hardware
+counters.
+
+The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a
+way to request that counting of events be restricted to times when the
+CPU is in user, kernel and/or hypervisor mode.
+
+The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap
+operations, these can be used to relate userspace IP addresses to actual
+code, even after the mapping (or even the whole process) is gone,
+these events are recorded in the ring-buffer (see below).
+
+The 'comm' bit allows tracking of process comm data on process creation.
+This too is recorded in the ring-buffer (see below).
+
+The 'pid' parameter to the sys_perf_event_open() system call allows the
+counter to be specific to a task:
+
+ pid == 0: if the pid parameter is zero, the counter is attached to the
+ current task.
+
+ pid > 0: the counter is attached to a specific task (if the current task
+ has sufficient privilege to do so)
+
+ pid < 0: all tasks are counted (per cpu counters)
+
+The 'cpu' parameter allows a counter to be made specific to a CPU:
+
+ cpu >= 0: the counter is restricted to a specific CPU
+ cpu == -1: the counter counts on all CPUs
+
+(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.)
+
+A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts
+events of that task and 'follows' that task to whatever CPU the task
+gets schedule to. Per task counters can be created by any user, for
+their own tasks.
+
+A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
+all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
+
+The 'flags' parameter is currently unused and must be zero.
+
+The 'group_fd' parameter allows counter "groups" to be set up. A
+counter group has one counter which is the group "leader". The leader
+is created first, with group_fd = -1 in the sys_perf_event_open call
+that creates it. The rest of the group members are created
+subsequently, with group_fd giving the fd of the group leader.
+(A single counter on its own is created with group_fd = -1 and is
+considered to be a group with only 1 member.)
+
+A counter group is scheduled onto the CPU as a unit, that is, it will
+only be put onto the CPU if all of the counters in the group can be
+put onto the CPU. This means that the values of the member counters
+can be meaningfully compared, added, divided (to get ratios), etc.,
+with each other, since they have counted events for the same set of
+executed instructions.
+
+
+Like stated, asynchronous events, like counter overflow or PROT_EXEC mmap
+tracking are logged into a ring-buffer. This ring-buffer is created and
+accessed through mmap().
+
+The mmap size should be 1+2^n pages, where the first page is a meta-data page
+(struct perf_event_mmap_page) that contains various bits of information such
+as where the ring-buffer head is.
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_event_mmap_page {
+ __u32 version; /* version number of this structure */
+ __u32 compat_version; /* lowest version this is compat with */
+
+ /*
+ * Bits needed to read the hw counters in user-space.
+ *
+ * u32 seq;
+ * s64 count;
+ *
+ * do {
+ * seq = pc->lock;
+ *
+ * barrier()
+ * if (pc->index) {
+ * count = pmc_read(pc->index - 1);
+ * count += pc->offset;
+ * } else
+ * goto regular_read;
+ *
+ * barrier();
+ * } while (pc->lock != seq);
+ *
+ * NOTE: for obvious reason this only works on self-monitoring
+ * processes.
+ */
+ __u32 lock; /* seqlock for synchronization */
+ __u32 index; /* hardware counter identifier */
+ __s64 offset; /* add to hardware counter value */
+
+ /*
+ * Control data for the mmap() data buffer.
+ *
+ * User-space reading this value should issue an rmb(), on SMP capable
+ * platforms, after reading this value -- see perf_event_wakeup().
+ */
+ __u32 data_head; /* head in the data section */
+};
+
+NOTE: the hw-counter userspace bits are arch specific and are currently only
+ implemented on powerpc.
+
+The following 2^n pages are the ring-buffer which contains events of the form:
+
+#define PERF_RECORD_MISC_KERNEL (1 << 0)
+#define PERF_RECORD_MISC_USER (1 << 1)
+#define PERF_RECORD_MISC_OVERFLOW (1 << 2)
+
+struct perf_event_header {
+ __u32 type;
+ __u16 misc;
+ __u16 size;
+};
+
+enum perf_event_type {
+
+ /*
+ * The MMAP events record the PROT_EXEC mappings so that we can
+ * correlate userspace IPs to code. They have the following structure:
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u32 pid, tid;
+ * u64 addr;
+ * u64 len;
+ * u64 pgoff;
+ * char filename[];
+ * };
+ */
+ PERF_RECORD_MMAP = 1,
+ PERF_RECORD_MUNMAP = 2,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u32 pid, tid;
+ * char comm[];
+ * };
+ */
+ PERF_RECORD_COMM = 3,
+
+ /*
+ * When header.misc & PERF_RECORD_MISC_OVERFLOW the event_type field
+ * will be PERF_RECORD_*
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * { u64 ip; } && PERF_RECORD_IP
+ * { u32 pid, tid; } && PERF_RECORD_TID
+ * { u64 time; } && PERF_RECORD_TIME
+ * { u64 addr; } && PERF_RECORD_ADDR
+ *
+ * { u64 nr;
+ * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP
+ *
+ * { u16 nr,
+ * hv,
+ * kernel,
+ * user;
+ * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN
+ * };
+ */
+};
+
+NOTE: PERF_RECORD_CALLCHAIN is arch specific and currently only implemented
+ on x86.
+
+Notification of new events is possible through poll()/select()/epoll() and
+fcntl() managing signals.
+
+Normally a notification is generated for every page filled, however one can
+additionally set perf_event_attr.wakeup_events to generate one every
+so many counter overflow events.
+
+Future work will include a splice() interface to the ring-buffer.
+
+
+Counters can be enabled and disabled in two ways: via ioctl and via
+prctl. When a counter is disabled, it doesn't count or generate
+events but does continue to exist and maintain its count value.
+
+An individual counter can be enabled with
+
+ ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
+
+or disabled with
+
+ ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
+
+For a counter group, pass PERF_IOC_FLAG_GROUP as the third argument.
+Enabling or disabling the leader of a group enables or disables the
+whole group; that is, while the group leader is disabled, none of the
+counters in the group will count. Enabling or disabling a member of a
+group other than the leader only affects that counter - disabling an
+non-leader stops that counter from counting but doesn't affect any
+other counter.
+
+Additionally, non-inherited overflow counters can use
+
+ ioctl(fd, PERF_EVENT_IOC_REFRESH, nr);
+
+to enable a counter for 'nr' events, after which it gets disabled again.
+
+A process can enable or disable all the counter groups that are
+attached to it, using prctl:
+
+ prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+ prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+This applies to all counters on the current process, whether created
+by this process or by another, and doesn't affect any counters that
+this process has created on other processes. It only enables or
+disables the group leaders, not any other members in the groups.
+
+
+Arch requirements
+-----------------
+
+If your architecture does not have hardware performance metrics, you can
+still use the generic software counters based on hrtimers for sampling.
+
+So to start with, in order to add HAVE_PERF_EVENTS to your Kconfig, you
+will need at least this:
+ - asm/perf_event.h - a basic stub will suffice at first
+ - support for atomic64 types (and associated helper functions)
+
+If your architecture does have hardware capabilities, you can override the
+weak stub hw_perf_event_init() to register hardware counters.
+
+Architectures that have d-cache aliassing issues, such as Sparc and ARM,
+should select PERF_USE_VMALLOC in order to avoid these for perf mmap().
diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c
new file mode 100644
index 000000000..b9c203219
--- /dev/null
+++ b/tools/perf/examples/bpf/5sec.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ Description:
+
+ . Disable strace like syscall tracing (--no-syscalls), or try tracing
+ just some (-e *sleep).
+
+ . Attach a filter function to a kernel function, returning when it should
+ be considered, i.e. appear on the output.
+
+ . Run it system wide, so that any sleep of >= 5 seconds and < than 6
+ seconds gets caught.
+
+ . Ask for callgraphs using DWARF info, so that userspace can be unwound
+
+ . While this is running, run something like "sleep 5s".
+
+ . If we decide to add tv_nsec as well, then it becomes:
+
+ int probe(hrtimer_nanosleep, rqtp->tv_sec rqtp->tv_nsec)(void *ctx, int err, long sec, long nsec)
+
+ I.e. add where it comes from (rqtp->tv_nsec) and where it will be
+ accessible in the function body (nsec)
+
+ # perf trace --no-syscalls -e tools/perf/examples/bpf/5sec.c/call-graph=dwarf/
+ 0.000 perf_bpf_probe:func:(ffffffff9811b5f0) tv_sec=5
+ hrtimer_nanosleep ([kernel.kallsyms])
+ __x64_sys_nanosleep ([kernel.kallsyms])
+ do_syscall_64 ([kernel.kallsyms])
+ entry_SYSCALL_64 ([kernel.kallsyms])
+ __GI___nanosleep (/usr/lib64/libc-2.26.so)
+ rpl_nanosleep (/usr/bin/sleep)
+ xnanosleep (/usr/bin/sleep)
+ main (/usr/bin/sleep)
+ __libc_start_main (/usr/lib64/libc-2.26.so)
+ _start (/usr/bin/sleep)
+ ^C#
+
+ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
+*/
+
+#include <bpf.h>
+
+int probe(hrtimer_nanosleep, rqtp->tv_sec)(void *ctx, int err, long sec)
+{
+ return sec == 5;
+}
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c
new file mode 100644
index 000000000..69a31386d
--- /dev/null
+++ b/tools/perf/examples/bpf/augmented_syscalls.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Augment the openat syscall with the contents of the filename pointer argument.
+ *
+ * Test it with:
+ *
+ * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null
+ *
+ * It'll catch some openat syscalls related to the dynamic linked and
+ * the last one should be the one for '/etc/passwd'.
+ *
+ * This matches what is marshalled into the raw_syscall:sys_enter payload
+ * expected by the 'perf trace' beautifiers, and can be used by them unmodified,
+ * which will be done as that feature is implemented in the next csets, for now
+ * it will appear in a dump done by the default tracepoint handler in 'perf trace',
+ * that uses bpf_output__fprintf() to just dump those contents, as done with
+ * the bpf-output event associated with the __bpf_output__ map declared in
+ * tools/perf/include/bpf/stdio.h.
+ */
+
+#include <stdio.h>
+
+struct bpf_map SEC("maps") __augmented_syscalls__ = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u32),
+ .max_entries = __NR_CPUS__,
+};
+
+struct syscall_enter_openat_args {
+ unsigned long long common_tp_fields;
+ long syscall_nr;
+ long dfd;
+ char *filename_ptr;
+ long flags;
+ long mode;
+};
+
+struct augmented_enter_openat_args {
+ struct syscall_enter_openat_args args;
+ char filename[64];
+};
+
+int syscall_enter(openat)(struct syscall_enter_openat_args *args)
+{
+ struct augmented_enter_openat_args augmented_args;
+
+ probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
+ probe_read_str(&augmented_args.filename, sizeof(augmented_args.filename), args->filename_ptr);
+ perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU,
+ &augmented_args, sizeof(augmented_args));
+ return 1;
+}
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c
new file mode 100644
index 000000000..3776d26db
--- /dev/null
+++ b/tools/perf/examples/bpf/empty.c
@@ -0,0 +1,3 @@
+#include <bpf.h>
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/hello.c b/tools/perf/examples/bpf/hello.c
new file mode 100644
index 000000000..cf3c2fdc7
--- /dev/null
+++ b/tools/perf/examples/bpf/hello.c
@@ -0,0 +1,9 @@
+#include <stdio.h>
+
+int syscall_enter(openat)(void *args)
+{
+ puts("Hello, world\n");
+ return 0;
+}
+
+license(GPL);
diff --git a/tools/perf/examples/bpf/sys_enter_openat.c b/tools/perf/examples/bpf/sys_enter_openat.c
new file mode 100644
index 000000000..9cd124b09
--- /dev/null
+++ b/tools/perf/examples/bpf/sys_enter_openat.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hook into 'openat' syscall entry tracepoint
+ *
+ * Test it with:
+ *
+ * perf trace -e tools/perf/examples/bpf/sys_enter_openat.c cat /etc/passwd > /dev/null
+ *
+ * It'll catch some openat syscalls related to the dynamic linked and
+ * the last one should be the one for '/etc/passwd'.
+ *
+ * The syscall_enter_openat_args can be used to get the syscall fields
+ * and use them for filtering calls, i.e. use in expressions for
+ * the return value.
+ */
+
+#include <bpf.h>
+
+struct syscall_enter_openat_args {
+ unsigned long long unused;
+ long syscall_nr;
+ long dfd;
+ char *filename_ptr;
+ long flags;
+ long mode;
+};
+
+int syscall_enter(openat)(struct syscall_enter_openat_args *args)
+{
+ return 1;
+}
+
+license(GPL);
diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h
new file mode 100644
index 000000000..47897d65e
--- /dev/null
+++ b/tools/perf/include/bpf/bpf.h
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef _PERF_BPF_H
+#define _PERF_BPF_H
+
+#include <uapi/linux/bpf.h>
+
+/*
+ * A helper structure used by eBPF C program to describe map attributes to
+ * elf_bpf loader, taken from tools/testing/selftests/bpf/bpf_helpers.h:
+ */
+struct bpf_map {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+ unsigned int map_flags;
+ unsigned int inner_map_idx;
+ unsigned int numa_node;
+};
+
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+#define probe(function, vars) \
+ SEC(#function "=" #function " " #vars) function
+
+#define syscall_enter(name) \
+ SEC("syscalls:sys_enter_" #name) syscall_enter_ ## name
+
+#define license(name) \
+char _license[] SEC("license") = #name; \
+int _version SEC("version") = LINUX_VERSION_CODE;
+
+static int (*probe_read)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read;
+static int (*probe_read_str)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read_str;
+
+#endif /* _PERF_BPF_H */
diff --git a/tools/perf/include/bpf/stdio.h b/tools/perf/include/bpf/stdio.h
new file mode 100644
index 000000000..2899cb7bf
--- /dev/null
+++ b/tools/perf/include/bpf/stdio.h
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <bpf.h>
+
+struct bpf_map SEC("maps") __bpf_stdout__ = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(u32),
+ .max_entries = __NR_CPUS__,
+};
+
+static int (*perf_event_output)(void *, struct bpf_map *, int, void *, unsigned long) =
+ (void *)BPF_FUNC_perf_event_output;
+
+#define puts(from) \
+ ({ const int __len = sizeof(from); \
+ char __from[__len] = from; \
+ perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \
+ &__from, __len & (sizeof(from) - 1)); })
diff --git a/tools/perf/jvmti/Build b/tools/perf/jvmti/Build
new file mode 100644
index 000000000..eaeb8cb53
--- /dev/null
+++ b/tools/perf/jvmti/Build
@@ -0,0 +1,8 @@
+jvmti-y += libjvmti.o
+jvmti-y += jvmti_agent.o
+
+CFLAGS_jvmti = -fPIC -DPIC -I$(JDIR)/include -I$(JDIR)/include/linux
+CFLAGS_REMOVE_jvmti = -Wmissing-declarations
+CFLAGS_REMOVE_jvmti += -Wstrict-prototypes
+CFLAGS_REMOVE_jvmti += -Wextra
+CFLAGS_REMOVE_jvmti += -Wwrite-strings
diff --git a/tools/perf/jvmti/jvmti_agent.c b/tools/perf/jvmti/jvmti_agent.c
new file mode 100644
index 000000000..88108598d
--- /dev/null
+++ b/tools/perf/jvmti/jvmti_agent.c
@@ -0,0 +1,496 @@
+/*
+ * jvmti_agent.c: JVMTI agent interface
+ *
+ * Adapted from the Oprofile code in opagent.c:
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#include <sys/types.h>
+#include <sys/stat.h> /* for mkdir() */
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <syscall.h> /* for gettid() */
+#include <err.h>
+#include <linux/kernel.h>
+
+#include "jvmti_agent.h"
+#include "../util/jitdump.h"
+
+#define JIT_LANG "java"
+
+static char jit_path[PATH_MAX];
+static void *marker_addr;
+
+#ifndef HAVE_GETTID
+static inline pid_t gettid(void)
+{
+ return (pid_t)syscall(__NR_gettid);
+}
+#endif
+
+static int get_e_machine(struct jitheader *hdr)
+{
+ ssize_t sret;
+ char id[16];
+ int fd, ret = -1;
+ struct {
+ uint16_t e_type;
+ uint16_t e_machine;
+ } info;
+
+ fd = open("/proc/self/exe", O_RDONLY);
+ if (fd == -1)
+ return -1;
+
+ sret = read(fd, id, sizeof(id));
+ if (sret != sizeof(id))
+ goto error;
+
+ /* check ELF signature */
+ if (id[0] != 0x7f || id[1] != 'E' || id[2] != 'L' || id[3] != 'F')
+ goto error;
+
+ sret = read(fd, &info, sizeof(info));
+ if (sret != sizeof(info))
+ goto error;
+
+ hdr->elf_mach = info.e_machine;
+ ret = 0;
+error:
+ close(fd);
+ return ret;
+}
+
+static int use_arch_timestamp;
+
+static inline uint64_t
+get_arch_timestamp(void)
+{
+#if defined(__i386__) || defined(__x86_64__)
+ unsigned int low, high;
+
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+ return low | ((uint64_t)high) << 32;
+#else
+ return 0;
+#endif
+}
+
+#define NSEC_PER_SEC 1000000000
+static int perf_clk_id = CLOCK_MONOTONIC;
+
+static inline uint64_t
+timespec_to_ns(const struct timespec *ts)
+{
+ return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
+static inline uint64_t
+perf_get_timestamp(void)
+{
+ struct timespec ts;
+ int ret;
+
+ if (use_arch_timestamp)
+ return get_arch_timestamp();
+
+ ret = clock_gettime(perf_clk_id, &ts);
+ if (ret)
+ return 0;
+
+ return timespec_to_ns(&ts);
+}
+
+static int
+create_jit_cache_dir(void)
+{
+ char str[32];
+ char *base, *p;
+ struct tm tm;
+ time_t t;
+ int ret;
+
+ time(&t);
+ localtime_r(&t, &tm);
+
+ base = getenv("JITDUMPDIR");
+ if (!base)
+ base = getenv("HOME");
+ if (!base)
+ base = ".";
+
+ strftime(str, sizeof(str), JIT_LANG"-jit-%Y%m%d", &tm);
+
+ ret = snprintf(jit_path, PATH_MAX, "%s/.debug/", base);
+ if (ret >= PATH_MAX) {
+ warnx("jvmti: cannot generate jit cache dir because %s/.debug/"
+ " is too long, please check the cwd, JITDUMPDIR, and"
+ " HOME variables", base);
+ return -1;
+ }
+ ret = mkdir(jit_path, 0755);
+ if (ret == -1) {
+ if (errno != EEXIST) {
+ warn("jvmti: cannot create jit cache dir %s", jit_path);
+ return -1;
+ }
+ }
+
+ ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit", base);
+ if (ret >= PATH_MAX) {
+ warnx("jvmti: cannot generate jit cache dir because"
+ " %s/.debug/jit is too long, please check the cwd,"
+ " JITDUMPDIR, and HOME variables", base);
+ return -1;
+ }
+ ret = mkdir(jit_path, 0755);
+ if (ret == -1) {
+ if (errno != EEXIST) {
+ warn("jvmti: cannot create jit cache dir %s", jit_path);
+ return -1;
+ }
+ }
+
+ ret = snprintf(jit_path, PATH_MAX, "%s/.debug/jit/%s.XXXXXXXX", base, str);
+ if (ret >= PATH_MAX) {
+ warnx("jvmti: cannot generate jit cache dir because"
+ " %s/.debug/jit/%s.XXXXXXXX is too long, please check"
+ " the cwd, JITDUMPDIR, and HOME variables",
+ base, str);
+ return -1;
+ }
+ p = mkdtemp(jit_path);
+ if (p != jit_path) {
+ warn("jvmti: cannot create jit cache dir %s", jit_path);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+perf_open_marker_file(int fd)
+{
+ long pgsz;
+
+ pgsz = sysconf(_SC_PAGESIZE);
+ if (pgsz == -1)
+ return -1;
+
+ /*
+ * we mmap the jitdump to create an MMAP RECORD in perf.data file.
+ * The mmap is captured either live (perf record running when we mmap)
+ * or in deferred mode, via /proc/PID/maps
+ * the MMAP record is used as a marker of a jitdump file for more meta
+ * data info about the jitted code. Perf report/annotate detect this
+ * special filename and process the jitdump file.
+ *
+ * mapping must be PROT_EXEC to ensure it is captured by perf record
+ * even when not using -d option
+ */
+ marker_addr = mmap(NULL, pgsz, PROT_READ|PROT_EXEC, MAP_PRIVATE, fd, 0);
+ return (marker_addr == MAP_FAILED) ? -1 : 0;
+}
+
+static void
+perf_close_marker_file(void)
+{
+ long pgsz;
+
+ if (!marker_addr)
+ return;
+
+ pgsz = sysconf(_SC_PAGESIZE);
+ if (pgsz == -1)
+ return;
+
+ munmap(marker_addr, pgsz);
+}
+
+static void
+init_arch_timestamp(void)
+{
+ char *str = getenv("JITDUMP_USE_ARCH_TIMESTAMP");
+
+ if (!str || !*str || !strcmp(str, "0"))
+ return;
+
+ use_arch_timestamp = 1;
+}
+
+void *jvmti_open(void)
+{
+ char dump_path[PATH_MAX];
+ struct jitheader header;
+ int fd, ret;
+ FILE *fp;
+
+ init_arch_timestamp();
+
+ /*
+ * check if clockid is supported
+ */
+ if (!perf_get_timestamp()) {
+ if (use_arch_timestamp)
+ warnx("jvmti: arch timestamp not supported");
+ else
+ warnx("jvmti: kernel does not support %d clock id", perf_clk_id);
+ }
+
+ memset(&header, 0, sizeof(header));
+
+ /*
+ * jitdump file dir
+ */
+ if (create_jit_cache_dir() < 0)
+ return NULL;
+
+ /*
+ * jitdump file name
+ */
+ ret = snprintf(dump_path, PATH_MAX, "%s/jit-%i.dump", jit_path, getpid());
+ if (ret >= PATH_MAX) {
+ warnx("jvmti: cannot generate jitdump file full path because"
+ " %s/jit-%i.dump is too long, please check the cwd,"
+ " JITDUMPDIR, and HOME variables", jit_path, getpid());
+ return NULL;
+ }
+
+ fd = open(dump_path, O_CREAT|O_TRUNC|O_RDWR, 0666);
+ if (fd == -1)
+ return NULL;
+
+ /*
+ * create perf.data maker for the jitdump file
+ */
+ if (perf_open_marker_file(fd)) {
+ warnx("jvmti: failed to create marker file");
+ return NULL;
+ }
+
+ fp = fdopen(fd, "w+");
+ if (!fp) {
+ warn("jvmti: cannot create %s", dump_path);
+ close(fd);
+ goto error;
+ }
+
+ warnx("jvmti: jitdump in %s", dump_path);
+
+ if (get_e_machine(&header)) {
+ warn("get_e_machine failed\n");
+ goto error;
+ }
+
+ header.magic = JITHEADER_MAGIC;
+ header.version = JITHEADER_VERSION;
+ header.total_size = sizeof(header);
+ header.pid = getpid();
+
+ header.timestamp = perf_get_timestamp();
+
+ if (use_arch_timestamp)
+ header.flags |= JITDUMP_FLAGS_ARCH_TIMESTAMP;
+
+ if (!fwrite(&header, sizeof(header), 1, fp)) {
+ warn("jvmti: cannot write dumpfile header");
+ goto error;
+ }
+ return fp;
+error:
+ fclose(fp);
+ return NULL;
+}
+
+int
+jvmti_close(void *agent)
+{
+ struct jr_code_close rec;
+ FILE *fp = agent;
+
+ if (!fp) {
+ warnx("jvmti: invalid fd in close_agent");
+ return -1;
+ }
+
+ rec.p.id = JIT_CODE_CLOSE;
+ rec.p.total_size = sizeof(rec);
+
+ rec.p.timestamp = perf_get_timestamp();
+
+ if (!fwrite(&rec, sizeof(rec), 1, fp))
+ return -1;
+
+ fclose(fp);
+
+ fp = NULL;
+
+ perf_close_marker_file();
+
+ return 0;
+}
+
+int
+jvmti_write_code(void *agent, char const *sym,
+ uint64_t vma, void const *code, unsigned int const size)
+{
+ static int code_generation = 1;
+ struct jr_code_load rec;
+ size_t sym_len;
+ FILE *fp = agent;
+ int ret = -1;
+
+ /* don't care about 0 length function, no samples */
+ if (size == 0)
+ return 0;
+
+ if (!fp) {
+ warnx("jvmti: invalid fd in write_native_code");
+ return -1;
+ }
+
+ sym_len = strlen(sym) + 1;
+
+ rec.p.id = JIT_CODE_LOAD;
+ rec.p.total_size = sizeof(rec) + sym_len;
+ rec.p.timestamp = perf_get_timestamp();
+
+ rec.code_size = size;
+ rec.vma = vma;
+ rec.code_addr = vma;
+ rec.pid = getpid();
+ rec.tid = gettid();
+
+ if (code)
+ rec.p.total_size += size;
+
+ /*
+ * If JVM is multi-threaded, nultiple concurrent calls to agent
+ * may be possible, so protect file writes
+ */
+ flockfile(fp);
+
+ /*
+ * get code index inside lock to avoid race condition
+ */
+ rec.code_index = code_generation++;
+
+ ret = fwrite_unlocked(&rec, sizeof(rec), 1, fp);
+ fwrite_unlocked(sym, sym_len, 1, fp);
+
+ if (code)
+ fwrite_unlocked(code, size, 1, fp);
+
+ funlockfile(fp);
+
+ ret = 0;
+
+ return ret;
+}
+
+int
+jvmti_write_debug_info(void *agent, uint64_t code,
+ int nr_lines, jvmti_line_info_t *li,
+ const char * const * file_names)
+{
+ struct jr_code_debug_info rec;
+ size_t sret, len, size, flen = 0;
+ uint64_t addr;
+ FILE *fp = agent;
+ int i;
+
+ /*
+ * no entry to write
+ */
+ if (!nr_lines)
+ return 0;
+
+ if (!fp) {
+ warnx("jvmti: invalid fd in write_debug_info");
+ return -1;
+ }
+
+ for (i = 0; i < nr_lines; ++i) {
+ flen += strlen(file_names[i]) + 1;
+ }
+
+ rec.p.id = JIT_CODE_DEBUG_INFO;
+ size = sizeof(rec);
+ rec.p.timestamp = perf_get_timestamp();
+ rec.code_addr = (uint64_t)(uintptr_t)code;
+ rec.nr_entry = nr_lines;
+
+ /*
+ * on disk source line info layout:
+ * uint64_t : addr
+ * int : line number
+ * int : column discriminator
+ * file[] : source file name
+ */
+ size += nr_lines * sizeof(struct debug_entry);
+ size += flen;
+ rec.p.total_size = size;
+
+ /*
+ * If JVM is multi-threaded, nultiple concurrent calls to agent
+ * may be possible, so protect file writes
+ */
+ flockfile(fp);
+
+ sret = fwrite_unlocked(&rec, sizeof(rec), 1, fp);
+ if (sret != 1)
+ goto error;
+
+ for (i = 0; i < nr_lines; i++) {
+
+ addr = (uint64_t)li[i].pc;
+ len = sizeof(addr);
+ sret = fwrite_unlocked(&addr, len, 1, fp);
+ if (sret != 1)
+ goto error;
+
+ len = sizeof(li[0].line_number);
+ sret = fwrite_unlocked(&li[i].line_number, len, 1, fp);
+ if (sret != 1)
+ goto error;
+
+ len = sizeof(li[0].discrim);
+ sret = fwrite_unlocked(&li[i].discrim, len, 1, fp);
+ if (sret != 1)
+ goto error;
+
+ sret = fwrite_unlocked(file_names[i], strlen(file_names[i]) + 1, 1, fp);
+ if (sret != 1)
+ goto error;
+ }
+ funlockfile(fp);
+ return 0;
+error:
+ funlockfile(fp);
+ return -1;
+}
diff --git a/tools/perf/jvmti/jvmti_agent.h b/tools/perf/jvmti/jvmti_agent.h
new file mode 100644
index 000000000..6ed82f6c0
--- /dev/null
+++ b/tools/perf/jvmti/jvmti_agent.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __JVMTI_AGENT_H__
+#define __JVMTI_AGENT_H__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <jvmti.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+typedef struct {
+ unsigned long pc;
+ int line_number;
+ int discrim; /* discriminator -- 0 for now */
+ jmethodID methodID;
+} jvmti_line_info_t;
+
+void *jvmti_open(void);
+int jvmti_close(void *agent);
+int jvmti_write_code(void *agent, char const *symbol_name,
+ uint64_t vma, void const *code,
+ const unsigned int code_size);
+
+int jvmti_write_debug_info(void *agent, uint64_t code, int nr_lines,
+ jvmti_line_info_t *li,
+ const char * const * file_names);
+
+#if defined(__cplusplus)
+}
+
+#endif
+#endif /* __JVMTI_H__ */
diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
new file mode 100644
index 000000000..3361d98a4
--- /dev/null
+++ b/tools/perf/jvmti/libjvmti.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <err.h>
+#include <jvmti.h>
+#include <jvmticmlr.h>
+#include <limits.h>
+
+#include "jvmti_agent.h"
+
+static int has_line_numbers;
+void *jvmti_agent;
+
+static void print_error(jvmtiEnv *jvmti, const char *msg, jvmtiError ret)
+{
+ char *err_msg = NULL;
+ jvmtiError err;
+ err = (*jvmti)->GetErrorName(jvmti, ret, &err_msg);
+ if (err == JVMTI_ERROR_NONE) {
+ warnx("%s failed with %s", msg, err_msg);
+ (*jvmti)->Deallocate(jvmti, (unsigned char *)err_msg);
+ } else {
+ warnx("%s failed with an unknown error %d", msg, ret);
+ }
+}
+
+static jvmtiError
+do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
+ jvmti_line_info_t *tab, jint *nr)
+{
+ jint i, lines = 0;
+ jint nr_lines = 0;
+ jvmtiLineNumberEntry *loc_tab = NULL;
+ jvmtiError ret;
+
+ ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab);
+ if (ret != JVMTI_ERROR_NONE) {
+ print_error(jvmti, "GetLineNumberTable", ret);
+ return ret;
+ }
+
+ for (i = 0; i < nr_lines; i++) {
+ if (loc_tab[i].start_location < bci) {
+ tab[lines].pc = (unsigned long)pc;
+ tab[lines].line_number = loc_tab[i].line_number;
+ tab[lines].discrim = 0; /* not yet used */
+ tab[lines].methodID = m;
+ lines++;
+ } else {
+ break;
+ }
+ }
+ (*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab);
+ *nr = lines;
+ return JVMTI_ERROR_NONE;
+}
+
+static jvmtiError
+get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **tab, int *nr_lines)
+{
+ const jvmtiCompiledMethodLoadRecordHeader *hdr;
+ jvmtiCompiledMethodLoadInlineRecord *rec;
+ jvmtiLineNumberEntry *lne = NULL;
+ PCStackInfo *c;
+ jint nr, ret;
+ int nr_total = 0;
+ int i, lines_total = 0;
+
+ if (!(tab && nr_lines))
+ return JVMTI_ERROR_NULL_POINTER;
+
+ /*
+ * Phase 1 -- get the number of lines necessary
+ */
+ for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
+ if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
+ rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
+ for (i = 0; i < rec->numpcs; i++) {
+ c = rec->pcinfo + i;
+ nr = 0;
+ /*
+ * unfortunately, need a tab to get the number of lines!
+ */
+ ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne);
+ if (ret == JVMTI_ERROR_NONE) {
+ /* free what was allocated for nothing */
+ (*jvmti)->Deallocate(jvmti, (unsigned char *)lne);
+ nr_total += (int)nr;
+ } else {
+ print_error(jvmti, "GetLineNumberTable", ret);
+ }
+ }
+ }
+ }
+
+ if (nr_total == 0)
+ return JVMTI_ERROR_NOT_FOUND;
+
+ /*
+ * Phase 2 -- allocate big enough line table
+ */
+ *tab = malloc(nr_total * sizeof(**tab));
+ if (!*tab)
+ return JVMTI_ERROR_OUT_OF_MEMORY;
+
+ for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
+ if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
+ rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
+ for (i = 0; i < rec->numpcs; i++) {
+ c = rec->pcinfo + i;
+ nr = 0;
+ ret = do_get_line_numbers(jvmti, c->pc,
+ c->methods[0],
+ c->bcis[0],
+ *tab + lines_total,
+ &nr);
+ if (ret == JVMTI_ERROR_NONE)
+ lines_total += nr;
+ }
+ }
+ }
+ *nr_lines = lines_total;
+ return JVMTI_ERROR_NONE;
+}
+
+static void
+copy_class_filename(const char * class_sign, const char * file_name, char * result, size_t max_length)
+{
+ /*
+ * Assume path name is class hierarchy, this is a common practice with Java programs
+ */
+ if (*class_sign == 'L') {
+ int j, i = 0;
+ char *p = strrchr(class_sign, '/');
+ if (p) {
+ /* drop the 'L' prefix and copy up to the final '/' */
+ for (i = 0; i < (p - class_sign); i++)
+ result[i] = class_sign[i+1];
+ }
+ /*
+ * append file name, we use loops and not string ops to avoid modifying
+ * class_sign which is used later for the symbol name
+ */
+ for (j = 0; i < (max_length - 1) && file_name && j < strlen(file_name); j++, i++)
+ result[i] = file_name[j];
+
+ result[i] = '\0';
+ } else {
+ /* fallback case */
+ strlcpy(result, file_name, max_length);
+ }
+}
+
+static jvmtiError
+get_source_filename(jvmtiEnv *jvmti, jmethodID methodID, char ** buffer)
+{
+ jvmtiError ret;
+ jclass decl_class;
+ char *file_name = NULL;
+ char *class_sign = NULL;
+ char fn[PATH_MAX];
+ size_t len;
+
+ ret = (*jvmti)->GetMethodDeclaringClass(jvmti, methodID, &decl_class);
+ if (ret != JVMTI_ERROR_NONE) {
+ print_error(jvmti, "GetMethodDeclaringClass", ret);
+ return ret;
+ }
+
+ ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
+ if (ret != JVMTI_ERROR_NONE) {
+ print_error(jvmti, "GetSourceFileName", ret);
+ return ret;
+ }
+
+ ret = (*jvmti)->GetClassSignature(jvmti, decl_class, &class_sign, NULL);
+ if (ret != JVMTI_ERROR_NONE) {
+ print_error(jvmti, "GetClassSignature", ret);
+ goto free_file_name_error;
+ }
+
+ copy_class_filename(class_sign, file_name, fn, PATH_MAX);
+ len = strlen(fn);
+ *buffer = malloc((len + 1) * sizeof(char));
+ if (!*buffer) {
+ print_error(jvmti, "GetClassSignature", ret);
+ ret = JVMTI_ERROR_OUT_OF_MEMORY;
+ goto free_class_sign_error;
+ }
+ strcpy(*buffer, fn);
+ ret = JVMTI_ERROR_NONE;
+
+free_class_sign_error:
+ (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
+free_file_name_error:
+ (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
+
+ return ret;
+}
+
+static jvmtiError
+fill_source_filenames(jvmtiEnv *jvmti, int nr_lines,
+ const jvmti_line_info_t * line_tab,
+ char ** file_names)
+{
+ int index;
+ jvmtiError ret;
+
+ for (index = 0; index < nr_lines; ++index) {
+ ret = get_source_filename(jvmti, line_tab[index].methodID, &(file_names[index]));
+ if (ret != JVMTI_ERROR_NONE)
+ return ret;
+ }
+
+ return JVMTI_ERROR_NONE;
+}
+
+static void JNICALL
+compiled_method_load_cb(jvmtiEnv *jvmti,
+ jmethodID method,
+ jint code_size,
+ void const *code_addr,
+ jint map_length,
+ jvmtiAddrLocationMap const *map,
+ const void *compile_info)
+{
+ jvmti_line_info_t *line_tab = NULL;
+ char ** line_file_names = NULL;
+ jclass decl_class;
+ char *class_sign = NULL;
+ char *func_name = NULL;
+ char *func_sign = NULL;
+ char *file_name = NULL;
+ char fn[PATH_MAX];
+ uint64_t addr = (uint64_t)(uintptr_t)code_addr;
+ jvmtiError ret;
+ int nr_lines = 0; /* in line_tab[] */
+ size_t len;
+ int output_debug_info = 0;
+
+ ret = (*jvmti)->GetMethodDeclaringClass(jvmti, method,
+ &decl_class);
+ if (ret != JVMTI_ERROR_NONE) {
+ print_error(jvmti, "GetMethodDeclaringClass", ret);
+ return;
+ }
+
+ if (has_line_numbers && map && map_length) {
+ ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines);
+ if (ret != JVMTI_ERROR_NONE) {
+ warnx("jvmti: cannot get line table for method");
+ nr_lines = 0;
+ } else if (nr_lines > 0) {
+ line_file_names = malloc(sizeof(char*) * nr_lines);
+ if (!line_file_names) {
+ warnx("jvmti: cannot allocate space for line table method names");
+ } else {
+ memset(line_file_names, 0, sizeof(char*) * nr_lines);
+ ret = fill_source_filenames(jvmti, nr_lines, line_tab, line_file_names);
+ if (ret != JVMTI_ERROR_NONE) {
+ warnx("jvmti: fill_source_filenames failed");
+ } else {
+ output_debug_info = 1;
+ }
+ }
+ }
+ }
+
+ ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
+ if (ret != JVMTI_ERROR_NONE) {
+ print_error(jvmti, "GetSourceFileName", ret);
+ goto error;
+ }
+
+ ret = (*jvmti)->GetClassSignature(jvmti, decl_class,
+ &class_sign, NULL);
+ if (ret != JVMTI_ERROR_NONE) {
+ print_error(jvmti, "GetClassSignature", ret);
+ goto error;
+ }
+
+ ret = (*jvmti)->GetMethodName(jvmti, method, &func_name,
+ &func_sign, NULL);
+ if (ret != JVMTI_ERROR_NONE) {
+ print_error(jvmti, "GetMethodName", ret);
+ goto error;
+ }
+
+ copy_class_filename(class_sign, file_name, fn, PATH_MAX);
+
+ /*
+ * write source line info record if we have it
+ */
+ if (output_debug_info)
+ if (jvmti_write_debug_info(jvmti_agent, addr, nr_lines, line_tab, (const char * const *) line_file_names))
+ warnx("jvmti: write_debug_info() failed");
+
+ len = strlen(func_name) + strlen(class_sign) + strlen(func_sign) + 2;
+ {
+ char str[len];
+ snprintf(str, len, "%s%s%s", class_sign, func_name, func_sign);
+
+ if (jvmti_write_code(jvmti_agent, str, addr, code_addr, code_size))
+ warnx("jvmti: write_code() failed");
+ }
+error:
+ (*jvmti)->Deallocate(jvmti, (unsigned char *)func_name);
+ (*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign);
+ (*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
+ (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
+ free(line_tab);
+ while (line_file_names && (nr_lines > 0)) {
+ if (line_file_names[nr_lines - 1]) {
+ free(line_file_names[nr_lines - 1]);
+ }
+ nr_lines -= 1;
+ }
+ free(line_file_names);
+}
+
+static void JNICALL
+code_generated_cb(jvmtiEnv *jvmti,
+ char const *name,
+ void const *code_addr,
+ jint code_size)
+{
+ uint64_t addr = (uint64_t)(unsigned long)code_addr;
+ int ret;
+
+ ret = jvmti_write_code(jvmti_agent, name, addr, code_addr, code_size);
+ if (ret)
+ warnx("jvmti: write_code() failed for code_generated");
+}
+
+JNIEXPORT jint JNICALL
+Agent_OnLoad(JavaVM *jvm, char *options, void *reserved __maybe_unused)
+{
+ jvmtiEventCallbacks cb;
+ jvmtiCapabilities caps1;
+ jvmtiJlocationFormat format;
+ jvmtiEnv *jvmti = NULL;
+ jint ret;
+
+ jvmti_agent = jvmti_open();
+ if (!jvmti_agent) {
+ warnx("jvmti: open_agent failed");
+ return -1;
+ }
+
+ /*
+ * Request a JVMTI interface version 1 environment
+ */
+ ret = (*jvm)->GetEnv(jvm, (void *)&jvmti, JVMTI_VERSION_1);
+ if (ret != JNI_OK) {
+ warnx("jvmti: jvmti version 1 not supported");
+ return -1;
+ }
+
+ /*
+ * acquire method_load capability, we require it
+ * request line numbers (optional)
+ */
+ memset(&caps1, 0, sizeof(caps1));
+ caps1.can_generate_compiled_method_load_events = 1;
+
+ ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
+ if (ret != JVMTI_ERROR_NONE) {
+ print_error(jvmti, "AddCapabilities", ret);
+ return -1;
+ }
+ ret = (*jvmti)->GetJLocationFormat(jvmti, &format);
+ if (ret == JVMTI_ERROR_NONE && format == JVMTI_JLOCATION_JVMBCI) {
+ memset(&caps1, 0, sizeof(caps1));
+ caps1.can_get_line_numbers = 1;
+ caps1.can_get_source_file_name = 1;
+ ret = (*jvmti)->AddCapabilities(jvmti, &caps1);
+ if (ret == JVMTI_ERROR_NONE)
+ has_line_numbers = 1;
+ } else if (ret != JVMTI_ERROR_NONE)
+ print_error(jvmti, "GetJLocationFormat", ret);
+
+
+ memset(&cb, 0, sizeof(cb));
+
+ cb.CompiledMethodLoad = compiled_method_load_cb;
+ cb.DynamicCodeGenerated = code_generated_cb;
+
+ ret = (*jvmti)->SetEventCallbacks(jvmti, &cb, sizeof(cb));
+ if (ret != JVMTI_ERROR_NONE) {
+ print_error(jvmti, "SetEventCallbacks", ret);
+ return -1;
+ }
+
+ ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
+ JVMTI_EVENT_COMPILED_METHOD_LOAD, NULL);
+ if (ret != JVMTI_ERROR_NONE) {
+ print_error(jvmti, "SetEventNotificationMode(METHOD_LOAD)", ret);
+ return -1;
+ }
+
+ ret = (*jvmti)->SetEventNotificationMode(jvmti, JVMTI_ENABLE,
+ JVMTI_EVENT_DYNAMIC_CODE_GENERATED, NULL);
+ if (ret != JVMTI_ERROR_NONE) {
+ print_error(jvmti, "SetEventNotificationMode(CODE_GENERATED)", ret);
+ return -1;
+ }
+ return 0;
+}
+
+JNIEXPORT void JNICALL
+Agent_OnUnload(JavaVM *jvm __maybe_unused)
+{
+ int ret;
+
+ ret = jvmti_close(jvmti_agent);
+ if (ret)
+ errx(1, "Error: op_close_agent()");
+}
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
new file mode 100644
index 000000000..0cfb3e2ce
--- /dev/null
+++ b/tools/perf/perf-archive.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# perf archive
+# Arnaldo Carvalho de Melo <acme@redhat.com>
+
+PERF_DATA=perf.data
+if [ $# -ne 0 ] ; then
+ PERF_DATA=$1
+fi
+
+#
+# PERF_BUILDID_DIR environment variable set by perf
+# path to buildid directory, default to $HOME/.debug
+#
+if [ -z $PERF_BUILDID_DIR ]; then
+ PERF_BUILDID_DIR=~/.debug/
+else
+ # append / to make substitutions work
+ PERF_BUILDID_DIR=$PERF_BUILDID_DIR/
+fi
+
+BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
+NOBUILDID=0000000000000000000000000000000000000000
+
+perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS
+if [ ! -s $BUILDIDS ] ; then
+ echo "perf archive: no build-ids found"
+ rm $BUILDIDS || true
+ exit 1
+fi
+
+MANIFEST=$(mktemp /tmp/perf-archive-manifest.XXXXXX)
+PERF_BUILDID_LINKDIR=$(readlink -f $PERF_BUILDID_DIR)/
+
+cut -d ' ' -f 1 $BUILDIDS | \
+while read build_id ; do
+ linkname=$PERF_BUILDID_DIR.build-id/${build_id:0:2}/${build_id:2}
+ filename=$(readlink -f $linkname)
+ echo ${linkname#$PERF_BUILDID_DIR} >> $MANIFEST
+ echo ${filename#$PERF_BUILDID_LINKDIR} >> $MANIFEST
+done
+
+tar cjf $PERF_DATA.tar.bz2 -C $PERF_BUILDID_DIR -T $MANIFEST
+rm $MANIFEST $BUILDIDS || true
+echo -e "Now please run:\n"
+echo -e "$ tar xvf $PERF_DATA.tar.bz2 -C ~/.debug\n"
+echo "wherever you need to run 'perf report' on."
+exit 0
diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh
new file mode 100644
index 000000000..fdf75d45e
--- /dev/null
+++ b/tools/perf/perf-completion.sh
@@ -0,0 +1,298 @@
+# perf bash and zsh completion
+# SPDX-License-Identifier: GPL-2.0
+
+# Taken from git.git's completion script.
+__my_reassemble_comp_words_by_ref()
+{
+ local exclude i j first
+ # Which word separators to exclude?
+ exclude="${1//[^$COMP_WORDBREAKS]}"
+ cword_=$COMP_CWORD
+ if [ -z "$exclude" ]; then
+ words_=("${COMP_WORDS[@]}")
+ return
+ fi
+ # List of word completion separators has shrunk;
+ # re-assemble words to complete.
+ for ((i=0, j=0; i < ${#COMP_WORDS[@]}; i++, j++)); do
+ # Append each nonempty word consisting of just
+ # word separator characters to the current word.
+ first=t
+ while
+ [ $i -gt 0 ] &&
+ [ -n "${COMP_WORDS[$i]}" ] &&
+ # word consists of excluded word separators
+ [ "${COMP_WORDS[$i]//[^$exclude]}" = "${COMP_WORDS[$i]}" ]
+ do
+ # Attach to the previous token,
+ # unless the previous token is the command name.
+ if [ $j -ge 2 ] && [ -n "$first" ]; then
+ ((j--))
+ fi
+ first=
+ words_[$j]=${words_[j]}${COMP_WORDS[i]}
+ if [ $i = $COMP_CWORD ]; then
+ cword_=$j
+ fi
+ if (($i < ${#COMP_WORDS[@]} - 1)); then
+ ((i++))
+ else
+ # Done.
+ return
+ fi
+ done
+ words_[$j]=${words_[j]}${COMP_WORDS[i]}
+ if [ $i = $COMP_CWORD ]; then
+ cword_=$j
+ fi
+ done
+}
+
+# Define preload_get_comp_words_by_ref="false", if the function
+# __perf_get_comp_words_by_ref() is required instead.
+preload_get_comp_words_by_ref="true"
+
+if [ $preload_get_comp_words_by_ref = "true" ]; then
+ type _get_comp_words_by_ref &>/dev/null ||
+ preload_get_comp_words_by_ref="false"
+fi
+[ $preload_get_comp_words_by_ref = "true" ] ||
+__perf_get_comp_words_by_ref()
+{
+ local exclude cur_ words_ cword_
+ if [ "$1" = "-n" ]; then
+ exclude=$2
+ shift 2
+ fi
+ __my_reassemble_comp_words_by_ref "$exclude"
+ cur_=${words_[cword_]}
+ while [ $# -gt 0 ]; do
+ case "$1" in
+ cur)
+ cur=$cur_
+ ;;
+ prev)
+ prev=${words_[$cword_-1]}
+ ;;
+ words)
+ words=("${words_[@]}")
+ ;;
+ cword)
+ cword=$cword_
+ ;;
+ esac
+ shift
+ done
+}
+
+# Define preload__ltrim_colon_completions="false", if the function
+# __perf__ltrim_colon_completions() is required instead.
+preload__ltrim_colon_completions="true"
+
+if [ $preload__ltrim_colon_completions = "true" ]; then
+ type __ltrim_colon_completions &>/dev/null ||
+ preload__ltrim_colon_completions="false"
+fi
+[ $preload__ltrim_colon_completions = "true" ] ||
+__perf__ltrim_colon_completions()
+{
+ if [[ "$1" == *:* && "$COMP_WORDBREAKS" == *:* ]]; then
+ # Remove colon-word prefix from COMPREPLY items
+ local colon_word=${1%"${1##*:}"}
+ local i=${#COMPREPLY[*]}
+ while [[ $((--i)) -ge 0 ]]; do
+ COMPREPLY[$i]=${COMPREPLY[$i]#"$colon_word"}
+ done
+ fi
+}
+
+__perfcomp ()
+{
+ COMPREPLY=( $( compgen -W "$1" -- "$2" ) )
+}
+
+__perfcomp_colon ()
+{
+ __perfcomp "$1" "$2"
+ if [ $preload__ltrim_colon_completions = "true" ]; then
+ __ltrim_colon_completions $cur
+ else
+ __perf__ltrim_colon_completions $cur
+ fi
+}
+
+__perf_prev_skip_opts ()
+{
+ local i cmd_ cmds_
+
+ let i=cword-1
+ cmds_=$($cmd $1 --list-cmds)
+ prev_skip_opts=()
+ while [ $i -ge 0 ]; do
+ if [[ ${words[i]} == $1 ]]; then
+ return
+ fi
+ for cmd_ in $cmds_; do
+ if [[ ${words[i]} == $cmd_ ]]; then
+ prev_skip_opts=${words[i]}
+ return
+ fi
+ done
+ ((i--))
+ done
+}
+
+__perf_main ()
+{
+ local cmd
+
+ cmd=${words[0]}
+ COMPREPLY=()
+
+ # Skip options backward and find the last perf command
+ __perf_prev_skip_opts
+ # List perf subcommands or long options
+ if [ -z $prev_skip_opts ]; then
+ if [[ $cur == --* ]]; then
+ cmds=$($cmd --list-opts)
+ else
+ cmds=$($cmd --list-cmds)
+ fi
+ __perfcomp "$cmds" "$cur"
+ # List possible events for -e option
+ elif [[ $prev == @("-e"|"--event") &&
+ $prev_skip_opts == @(record|stat|top) ]]; then
+
+ local cur1=${COMP_WORDS[COMP_CWORD]}
+ local raw_evts=$($cmd list --raw-dump)
+ local arr s tmp result
+
+ if [[ "$cur1" == */* && ${cur1#*/} =~ ^[A-Z] ]]; then
+ OLD_IFS="$IFS"
+ IFS=" "
+ arr=($raw_evts)
+ IFS="$OLD_IFS"
+
+ for s in ${arr[@]}
+ do
+ if [[ "$s" == *cpu/* ]]; then
+ tmp=${s#*cpu/}
+ result=$result" ""cpu/"${tmp^^}
+ else
+ result=$result" "$s
+ fi
+ done
+
+ evts=${result}" "$(ls /sys/bus/event_source/devices/cpu/events)
+ else
+ evts=${raw_evts}" "$(ls /sys/bus/event_source/devices/cpu/events)
+ fi
+
+ if [[ "$cur1" == , ]]; then
+ __perfcomp_colon "$evts" ""
+ else
+ __perfcomp_colon "$evts" "$cur1"
+ fi
+ else
+ # List subcommands for perf commands
+ if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched|
+ |data|help|script|test|timechart|trace) ]]; then
+ subcmds=$($cmd $prev_skip_opts --list-cmds)
+ __perfcomp_colon "$subcmds" "$cur"
+ fi
+ # List long option names
+ if [[ $cur == --* ]]; then
+ subcmd=$prev_skip_opts
+ __perf_prev_skip_opts $subcmd
+ subcmd=$subcmd" "$prev_skip_opts
+ opts=$($cmd $subcmd --list-opts)
+ __perfcomp "$opts" "$cur"
+ fi
+ fi
+}
+
+if [[ -n ${ZSH_VERSION-} ]]; then
+ autoload -U +X compinit && compinit
+
+ __perfcomp ()
+ {
+ emulate -L zsh
+
+ local c IFS=$' \t\n'
+ local -a array
+
+ for c in ${=1}; do
+ case $c in
+ --*=*|*.) ;;
+ *) c="$c " ;;
+ esac
+ array[${#array[@]}+1]="$c"
+ done
+
+ compset -P '*[=:]'
+ compadd -Q -S '' -a -- array && _ret=0
+ }
+
+ __perfcomp_colon ()
+ {
+ emulate -L zsh
+
+ local cur_="${2-$cur}"
+ local c IFS=$' \t\n'
+ local -a array
+
+ if [[ "$cur_" == *:* ]]; then
+ local colon_word=${cur_%"${cur_##*:}"}
+ fi
+
+ for c in ${=1}; do
+ case $c in
+ --*=*|*.) ;;
+ *) c="$c " ;;
+ esac
+ array[$#array+1]=${c#"$colon_word"}
+ done
+
+ compset -P '*[=:]'
+ compadd -Q -S '' -a -- array && _ret=0
+ }
+
+ _perf ()
+ {
+ local _ret=1 cur cword prev
+ cur=${words[CURRENT]}
+ prev=${words[CURRENT-1]}
+ let cword=CURRENT-1
+ emulate ksh -c __perf_main
+ let _ret && _default && _ret=0
+ return _ret
+ }
+
+ compdef _perf perf
+ return
+fi
+
+type perf &>/dev/null &&
+_perf()
+{
+ if [[ "$COMP_WORDBREAKS" != *,* ]]; then
+ COMP_WORDBREAKS="${COMP_WORDBREAKS},"
+ export COMP_WORDBREAKS
+ fi
+
+ if [[ "$COMP_WORDBREAKS" == *:* ]]; then
+ COMP_WORDBREAKS="${COMP_WORDBREAKS/:/}"
+ export COMP_WORDBREAKS
+ fi
+
+ local cur words cword prev
+ if [ $preload_get_comp_words_by_ref = "true" ]; then
+ _get_comp_words_by_ref -n =:, cur words cword prev
+ else
+ __perf_get_comp_words_by_ref -n =:, cur words cword prev
+ fi
+ __perf_main
+} &&
+
+complete -o bashdefault -o default -o nospace -F _perf perf 2>/dev/null \
+ || complete -o default -o nospace -F _perf perf
diff --git a/tools/perf/perf-read-vdso.c b/tools/perf/perf-read-vdso.c
new file mode 100644
index 000000000..8c0ca0cc4
--- /dev/null
+++ b/tools/perf/perf-read-vdso.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#define VDSO__MAP_NAME "[vdso]"
+
+/*
+ * Include definition of find_vdso_map() also used in util/vdso.c for
+ * building perf.
+ */
+#include "util/find-vdso-map.c"
+
+int main(void)
+{
+ void *start, *end;
+ size_t size, written;
+
+ if (find_vdso_map(&start, &end))
+ return 1;
+
+ size = end - start;
+
+ while (size) {
+ written = fwrite(start, 1, size, stdout);
+ if (!written)
+ return 1;
+ start += written;
+ size -= written;
+ }
+
+ if (fflush(stdout))
+ return 1;
+
+ return 0;
+}
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
new file mode 100644
index 000000000..3eb7a3916
--- /dev/null
+++ b/tools/perf/perf-sys.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_SYS_H
+#define _PERF_SYS_H
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/perf_event.h>
+#include <asm/barrier.h>
+
+#ifdef __powerpc__
+#define CPUINFO_PROC {"cpu"}
+#endif
+
+#ifdef __s390__
+#define CPUINFO_PROC {"vendor_id"}
+#endif
+
+#ifdef __sh__
+#define CPUINFO_PROC {"cpu type"}
+#endif
+
+#ifdef __hppa__
+#define CPUINFO_PROC {"cpu"}
+#endif
+
+#ifdef __sparc__
+#define CPUINFO_PROC {"cpu"}
+#endif
+
+#ifdef __alpha__
+#define CPUINFO_PROC {"cpu model"}
+#endif
+
+#ifdef __arm__
+#define CPUINFO_PROC {"model name", "Processor"}
+#endif
+
+#ifdef __mips__
+#define CPUINFO_PROC {"cpu model"}
+#endif
+
+#ifdef __arc__
+#define CPUINFO_PROC {"Processor"}
+#endif
+
+#ifdef __xtensa__
+#define CPUINFO_PROC {"core ID"}
+#endif
+
+#ifndef CPUINFO_PROC
+#define CPUINFO_PROC { "model name", }
+#endif
+
+static inline int
+sys_perf_event_open(struct perf_event_attr *attr,
+ pid_t pid, int cpu, int group_fd,
+ unsigned long flags)
+{
+ int fd;
+
+ fd = syscall(__NR_perf_event_open, attr, pid, cpu,
+ group_fd, flags);
+
+#ifdef HAVE_ATTR_TEST
+ if (unlikely(test_attr__enabled))
+ test_attr__open(attr, pid, cpu, fd, group_fd, flags);
+#endif
+ return fd;
+}
+
+#endif /* _PERF_SYS_H */
diff --git a/tools/perf/perf-with-kcore.sh b/tools/perf/perf-with-kcore.sh
new file mode 100644
index 000000000..7e47a7cbc
--- /dev/null
+++ b/tools/perf/perf-with-kcore.sh
@@ -0,0 +1,259 @@
+#!/bin/bash
+# perf-with-kcore: use perf with a copy of kcore
+# Copyright (c) 2014, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+
+set -e
+
+usage()
+{
+ echo "Usage: perf-with-kcore <perf sub-command> <perf.data directory> [<sub-command options> [ -- <workload>]]" >&2
+ echo " <perf sub-command> can be record, script, report or inject" >&2
+ echo " or: perf-with-kcore fix_buildid_cache_permissions" >&2
+ exit 1
+}
+
+find_perf()
+{
+ if [ -n "$PERF" ] ; then
+ return
+ fi
+ PERF=`which perf || true`
+ if [ -z "$PERF" ] ; then
+ echo "Failed to find perf" >&2
+ exit 1
+ fi
+ if [ ! -x "$PERF" ] ; then
+ echo "Failed to find perf" >&2
+ exit 1
+ fi
+ echo "Using $PERF"
+ "$PERF" version
+}
+
+copy_kcore()
+{
+ echo "Copying kcore"
+
+ if [ $EUID -eq 0 ] ; then
+ SUDO=""
+ else
+ SUDO="sudo"
+ fi
+
+ rm -f perf.data.junk
+ ("$PERF" record -o perf.data.junk "${PERF_OPTIONS[@]}" -- sleep 60) >/dev/null 2>/dev/null &
+ PERF_PID=$!
+
+ # Need to make sure that perf has started
+ sleep 1
+
+ KCORE=$(($SUDO "$PERF" buildid-cache -v -f -k /proc/kcore >/dev/null) 2>&1)
+ case "$KCORE" in
+ "kcore added to build-id cache directory "*)
+ KCORE_DIR=${KCORE#"kcore added to build-id cache directory "}
+ ;;
+ *)
+ kill $PERF_PID
+ wait >/dev/null 2>/dev/null || true
+ rm perf.data.junk
+ echo "$KCORE"
+ echo "Failed to find kcore" >&2
+ exit 1
+ ;;
+ esac
+
+ kill $PERF_PID
+ wait >/dev/null 2>/dev/null || true
+ rm perf.data.junk
+
+ $SUDO cp -a "$KCORE_DIR" "$(pwd)/$PERF_DATA_DIR"
+ $SUDO rm -f "$KCORE_DIR/kcore"
+ $SUDO rm -f "$KCORE_DIR/kallsyms"
+ $SUDO rm -f "$KCORE_DIR/modules"
+ $SUDO rmdir "$KCORE_DIR"
+
+ KCORE_DIR_BASENAME=$(basename "$KCORE_DIR")
+ KCORE_DIR="$(pwd)/$PERF_DATA_DIR/$KCORE_DIR_BASENAME"
+
+ $SUDO chown $UID "$KCORE_DIR"
+ $SUDO chown $UID "$KCORE_DIR/kcore"
+ $SUDO chown $UID "$KCORE_DIR/kallsyms"
+ $SUDO chown $UID "$KCORE_DIR/modules"
+
+ $SUDO chgrp $GROUPS "$KCORE_DIR"
+ $SUDO chgrp $GROUPS "$KCORE_DIR/kcore"
+ $SUDO chgrp $GROUPS "$KCORE_DIR/kallsyms"
+ $SUDO chgrp $GROUPS "$KCORE_DIR/modules"
+
+ ln -s "$KCORE_DIR_BASENAME" "$PERF_DATA_DIR/kcore_dir"
+}
+
+fix_buildid_cache_permissions()
+{
+ if [ $EUID -ne 0 ] ; then
+ echo "This script must be run as root via sudo " >&2
+ exit 1
+ fi
+
+ if [ -z "$SUDO_USER" ] ; then
+ echo "This script must be run via sudo" >&2
+ exit 1
+ fi
+
+ USER_HOME=$(bash <<< "echo ~$SUDO_USER")
+
+ if [ "$HOME" != "$USER_HOME" ] ; then
+ echo "Fix unnecessary because root has a home: $HOME" >&2
+ exit 1
+ fi
+
+ echo "Fixing buildid cache permissions"
+
+ find "$USER_HOME/.debug" -xdev -type d ! -user "$SUDO_USER" -ls -exec chown "$SUDO_USER" \{\} \;
+ find "$USER_HOME/.debug" -xdev -type f -links 1 ! -user "$SUDO_USER" -ls -exec chown "$SUDO_USER" \{\} \;
+ find "$USER_HOME/.debug" -xdev -type l ! -user "$SUDO_USER" -ls -exec chown -h "$SUDO_USER" \{\} \;
+
+ if [ -n "$SUDO_GID" ] ; then
+ find "$USER_HOME/.debug" -xdev -type d ! -group "$SUDO_GID" -ls -exec chgrp "$SUDO_GID" \{\} \;
+ find "$USER_HOME/.debug" -xdev -type f -links 1 ! -group "$SUDO_GID" -ls -exec chgrp "$SUDO_GID" \{\} \;
+ find "$USER_HOME/.debug" -xdev -type l ! -group "$SUDO_GID" -ls -exec chgrp -h "$SUDO_GID" \{\} \;
+ fi
+
+ echo "Done"
+}
+
+check_buildid_cache_permissions()
+{
+ if [ $EUID -eq 0 ] ; then
+ return
+ fi
+
+ PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type d ! -user "$USER" -print -quit)
+ PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type f -links 1 ! -user "$USER" -print -quit)
+ PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type l ! -user "$USER" -print -quit)
+
+ PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type d ! -group "$GROUPS" -print -quit)
+ PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type f -links 1 ! -group "$GROUPS" -print -quit)
+ PERMISSIONS_OK+=$(find "$HOME/.debug" -xdev -type l ! -group "$GROUPS" -print -quit)
+
+ if [ -n "$PERMISSIONS_OK" ] ; then
+ echo "*** WARNING *** buildid cache permissions may need fixing" >&2
+ fi
+}
+
+record()
+{
+ echo "Recording"
+
+ if [ $EUID -ne 0 ] ; then
+
+ if [ "$(cat /proc/sys/kernel/kptr_restrict)" -ne 0 ] ; then
+ echo "*** WARNING *** /proc/sys/kernel/kptr_restrict prevents access to kernel addresses" >&2
+ fi
+
+ if echo "${PERF_OPTIONS[@]}" | grep -q ' -a \|^-a \| -a$\|^-a$\| --all-cpus \|^--all-cpus \| --all-cpus$\|^--all-cpus$' ; then
+ echo "*** WARNING *** system-wide tracing without root access will not be able to read all necessary information from /proc" >&2
+ fi
+
+ if echo "${PERF_OPTIONS[@]}" | grep -q 'intel_pt\|intel_bts\| -I\|^-I' ; then
+ if [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt -1 ] ; then
+ echo "*** WARNING *** /proc/sys/kernel/perf_event_paranoid restricts buffer size and tracepoint (sched_switch) use" >&2
+ fi
+
+ if echo "${PERF_OPTIONS[@]}" | grep -q ' --per-thread \|^--per-thread \| --per-thread$\|^--per-thread$' ; then
+ true
+ elif echo "${PERF_OPTIONS[@]}" | grep -q ' -t \|^-t \| -t$\|^-t$' ; then
+ true
+ elif [ ! -r /sys/kernel/debug -o ! -x /sys/kernel/debug ] ; then
+ echo "*** WARNING *** /sys/kernel/debug permissions prevent tracepoint (sched_switch) use" >&2
+ fi
+ fi
+ fi
+
+ if [ -z "$1" ] ; then
+ echo "Workload is required for recording" >&2
+ usage
+ fi
+
+ if [ -e "$PERF_DATA_DIR" ] ; then
+ echo "'$PERF_DATA_DIR' exists" >&2
+ exit 1
+ fi
+
+ find_perf
+
+ mkdir "$PERF_DATA_DIR"
+
+ echo "$PERF record -o $PERF_DATA_DIR/perf.data ${PERF_OPTIONS[@]} -- $@"
+ "$PERF" record -o "$PERF_DATA_DIR/perf.data" "${PERF_OPTIONS[@]}" -- "$@" || true
+
+ if rmdir "$PERF_DATA_DIR" > /dev/null 2>/dev/null ; then
+ exit 1
+ fi
+
+ copy_kcore
+
+ echo "Done"
+}
+
+subcommand()
+{
+ find_perf
+ check_buildid_cache_permissions
+ echo "$PERF $PERF_SUB_COMMAND -i $PERF_DATA_DIR/perf.data --kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms $@"
+ "$PERF" $PERF_SUB_COMMAND -i "$PERF_DATA_DIR/perf.data" "--kallsyms=$PERF_DATA_DIR/kcore_dir/kallsyms" "$@"
+}
+
+if [ "$1" = "fix_buildid_cache_permissions" ] ; then
+ fix_buildid_cache_permissions
+ exit 0
+fi
+
+PERF_SUB_COMMAND=$1
+PERF_DATA_DIR=$2
+shift || true
+shift || true
+
+if [ -z "$PERF_SUB_COMMAND" ] ; then
+ usage
+fi
+
+if [ -z "$PERF_DATA_DIR" ] ; then
+ usage
+fi
+
+case "$PERF_SUB_COMMAND" in
+"record")
+ while [ "$1" != "--" ] ; do
+ PERF_OPTIONS+=("$1")
+ shift || break
+ done
+ if [ "$1" != "--" ] ; then
+ echo "Options and workload are required for recording" >&2
+ usage
+ fi
+ shift
+ record "$@"
+;;
+"script")
+ subcommand "$@"
+;;
+"report")
+ subcommand "$@"
+;;
+"inject")
+ subcommand "$@"
+;;
+*)
+ usage
+;;
+esac
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
new file mode 100644
index 000000000..80f8ae8b1
--- /dev/null
+++ b/tools/perf/perf.c
@@ -0,0 +1,539 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * perf.c
+ *
+ * Performance analysis utility.
+ *
+ * This is the main hub from which the sub-commands (perf stat,
+ * perf top, perf record, perf report, etc.) are started.
+ */
+#include "builtin.h"
+
+#include "util/env.h"
+#include <subcmd/exec-cmd.h>
+#include "util/config.h"
+#include <subcmd/run-command.h>
+#include "util/parse-events.h"
+#include <subcmd/parse-options.h>
+#include "util/bpf-loader.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include <api/fs/fs.h>
+#include <api/fs/tracing_path.h>
+#include <errno.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/kernel.h>
+
+const char perf_usage_string[] =
+ "perf [--version] [--help] [OPTIONS] COMMAND [ARGS]";
+
+const char perf_more_info_string[] =
+ "See 'perf help COMMAND' for more information on a specific command.";
+
+static int use_pager = -1;
+const char *input_name;
+
+struct cmd_struct {
+ const char *cmd;
+ int (*fn)(int, const char **);
+ int option;
+};
+
+static struct cmd_struct commands[] = {
+ { "buildid-cache", cmd_buildid_cache, 0 },
+ { "buildid-list", cmd_buildid_list, 0 },
+ { "config", cmd_config, 0 },
+ { "c2c", cmd_c2c, 0 },
+ { "diff", cmd_diff, 0 },
+ { "evlist", cmd_evlist, 0 },
+ { "help", cmd_help, 0 },
+ { "kallsyms", cmd_kallsyms, 0 },
+ { "list", cmd_list, 0 },
+ { "record", cmd_record, 0 },
+ { "report", cmd_report, 0 },
+ { "bench", cmd_bench, 0 },
+ { "stat", cmd_stat, 0 },
+ { "timechart", cmd_timechart, 0 },
+ { "top", cmd_top, 0 },
+ { "annotate", cmd_annotate, 0 },
+ { "version", cmd_version, 0 },
+ { "script", cmd_script, 0 },
+ { "sched", cmd_sched, 0 },
+#ifdef HAVE_LIBELF_SUPPORT
+ { "probe", cmd_probe, 0 },
+#endif
+ { "kmem", cmd_kmem, 0 },
+ { "lock", cmd_lock, 0 },
+ { "kvm", cmd_kvm, 0 },
+ { "test", cmd_test, 0 },
+#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
+ { "trace", cmd_trace, 0 },
+#endif
+ { "inject", cmd_inject, 0 },
+ { "mem", cmd_mem, 0 },
+ { "data", cmd_data, 0 },
+ { "ftrace", cmd_ftrace, 0 },
+};
+
+struct pager_config {
+ const char *cmd;
+ int val;
+};
+
+static int pager_command_config(const char *var, const char *value, void *data)
+{
+ struct pager_config *c = data;
+ if (strstarts(var, "pager.") && !strcmp(var + 6, c->cmd))
+ c->val = perf_config_bool(var, value);
+ return 0;
+}
+
+/* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */
+static int check_pager_config(const char *cmd)
+{
+ int err;
+ struct pager_config c;
+ c.cmd = cmd;
+ c.val = -1;
+ err = perf_config(pager_command_config, &c);
+ return err ?: c.val;
+}
+
+static int browser_command_config(const char *var, const char *value, void *data)
+{
+ struct pager_config *c = data;
+ if (strstarts(var, "tui.") && !strcmp(var + 4, c->cmd))
+ c->val = perf_config_bool(var, value);
+ if (strstarts(var, "gtk.") && !strcmp(var + 4, c->cmd))
+ c->val = perf_config_bool(var, value) ? 2 : 0;
+ return 0;
+}
+
+/*
+ * returns 0 for "no tui", 1 for "use tui", 2 for "use gtk",
+ * and -1 for "not specified"
+ */
+static int check_browser_config(const char *cmd)
+{
+ int err;
+ struct pager_config c;
+ c.cmd = cmd;
+ c.val = -1;
+ err = perf_config(browser_command_config, &c);
+ return err ?: c.val;
+}
+
+static void commit_pager_choice(void)
+{
+ switch (use_pager) {
+ case 0:
+ setenv(PERF_PAGER_ENVIRONMENT, "cat", 1);
+ break;
+ case 1:
+ /* setup_pager(); */
+ break;
+ default:
+ break;
+ }
+}
+
+struct option options[] = {
+ OPT_ARGUMENT("help", "help"),
+ OPT_ARGUMENT("version", "version"),
+ OPT_ARGUMENT("exec-path", "exec-path"),
+ OPT_ARGUMENT("html-path", "html-path"),
+ OPT_ARGUMENT("paginate", "paginate"),
+ OPT_ARGUMENT("no-pager", "no-pager"),
+ OPT_ARGUMENT("debugfs-dir", "debugfs-dir"),
+ OPT_ARGUMENT("buildid-dir", "buildid-dir"),
+ OPT_ARGUMENT("list-cmds", "list-cmds"),
+ OPT_ARGUMENT("list-opts", "list-opts"),
+ OPT_ARGUMENT("debug", "debug"),
+ OPT_END()
+};
+
+static int handle_options(const char ***argv, int *argc, int *envchanged)
+{
+ int handled = 0;
+
+ while (*argc > 0) {
+ const char *cmd = (*argv)[0];
+ if (cmd[0] != '-')
+ break;
+
+ /*
+ * For legacy reasons, the "version" and "help"
+ * commands can be written with "--" prepended
+ * to make them look like flags.
+ */
+ if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version"))
+ break;
+
+ /*
+ * Shortcut for '-h' and '-v' options to invoke help
+ * and version command.
+ */
+ if (!strcmp(cmd, "-h")) {
+ (*argv)[0] = "--help";
+ break;
+ }
+
+ if (!strcmp(cmd, "-v")) {
+ (*argv)[0] = "--version";
+ break;
+ }
+
+ if (!strcmp(cmd, "-vv")) {
+ (*argv)[0] = "version";
+ version_verbose = 1;
+ break;
+ }
+
+ /*
+ * Check remaining flags.
+ */
+ if (strstarts(cmd, CMD_EXEC_PATH)) {
+ cmd += strlen(CMD_EXEC_PATH);
+ if (*cmd == '=')
+ set_argv_exec_path(cmd + 1);
+ else {
+ puts(get_argv_exec_path());
+ exit(0);
+ }
+ } else if (!strcmp(cmd, "--html-path")) {
+ puts(system_path(PERF_HTML_PATH));
+ exit(0);
+ } else if (!strcmp(cmd, "-p") || !strcmp(cmd, "--paginate")) {
+ use_pager = 1;
+ } else if (!strcmp(cmd, "--no-pager")) {
+ use_pager = 0;
+ if (envchanged)
+ *envchanged = 1;
+ } else if (!strcmp(cmd, "--debugfs-dir")) {
+ if (*argc < 2) {
+ fprintf(stderr, "No directory given for --debugfs-dir.\n");
+ usage(perf_usage_string);
+ }
+ tracing_path_set((*argv)[1]);
+ if (envchanged)
+ *envchanged = 1;
+ (*argv)++;
+ (*argc)--;
+ } else if (!strcmp(cmd, "--buildid-dir")) {
+ if (*argc < 2) {
+ fprintf(stderr, "No directory given for --buildid-dir.\n");
+ usage(perf_usage_string);
+ }
+ set_buildid_dir((*argv)[1]);
+ if (envchanged)
+ *envchanged = 1;
+ (*argv)++;
+ (*argc)--;
+ } else if (strstarts(cmd, CMD_DEBUGFS_DIR)) {
+ tracing_path_set(cmd + strlen(CMD_DEBUGFS_DIR));
+ fprintf(stderr, "dir: %s\n", tracing_path_mount());
+ if (envchanged)
+ *envchanged = 1;
+ } else if (!strcmp(cmd, "--list-cmds")) {
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(commands); i++) {
+ struct cmd_struct *p = commands+i;
+ printf("%s ", p->cmd);
+ }
+ putchar('\n');
+ exit(0);
+ } else if (!strcmp(cmd, "--list-opts")) {
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(options)-1; i++) {
+ struct option *p = options+i;
+ printf("--%s ", p->long_name);
+ }
+ putchar('\n');
+ exit(0);
+ } else if (!strcmp(cmd, "--debug")) {
+ if (*argc < 2) {
+ fprintf(stderr, "No variable specified for --debug.\n");
+ usage(perf_usage_string);
+ }
+ if (perf_debug_option((*argv)[1]))
+ usage(perf_usage_string);
+
+ (*argv)++;
+ (*argc)--;
+ } else {
+ fprintf(stderr, "Unknown option: %s\n", cmd);
+ usage(perf_usage_string);
+ }
+
+ (*argv)++;
+ (*argc)--;
+ handled++;
+ }
+ return handled;
+}
+
+#define RUN_SETUP (1<<0)
+#define USE_PAGER (1<<1)
+
+static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
+{
+ int status;
+ struct stat st;
+ char sbuf[STRERR_BUFSIZE];
+
+ if (use_browser == -1)
+ use_browser = check_browser_config(p->cmd);
+
+ if (use_pager == -1 && p->option & RUN_SETUP)
+ use_pager = check_pager_config(p->cmd);
+ if (use_pager == -1 && p->option & USE_PAGER)
+ use_pager = 1;
+ commit_pager_choice();
+
+ perf_env__set_cmdline(&perf_env, argc, argv);
+ status = p->fn(argc, argv);
+ perf_config__exit();
+ exit_browser(status);
+ perf_env__exit(&perf_env);
+ bpf__clear();
+
+ if (status)
+ return status & 0xff;
+
+ /* Somebody closed stdout? */
+ if (fstat(fileno(stdout), &st))
+ return 0;
+ /* Ignore write errors for pipes and sockets.. */
+ if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode))
+ return 0;
+
+ status = 1;
+ /* Check for ENOSPC and EIO errors.. */
+ if (fflush(stdout)) {
+ fprintf(stderr, "write failure on standard output: %s",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out;
+ }
+ if (ferror(stdout)) {
+ fprintf(stderr, "unknown write failure on standard output");
+ goto out;
+ }
+ if (fclose(stdout)) {
+ fprintf(stderr, "close failed on standard output: %s",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out;
+ }
+ status = 0;
+out:
+ return status;
+}
+
+static void handle_internal_command(int argc, const char **argv)
+{
+ const char *cmd = argv[0];
+ unsigned int i;
+
+ /* Turn "perf cmd --help" into "perf help cmd" */
+ if (argc > 1 && !strcmp(argv[1], "--help")) {
+ argv[1] = argv[0];
+ argv[0] = cmd = "help";
+ }
+
+ for (i = 0; i < ARRAY_SIZE(commands); i++) {
+ struct cmd_struct *p = commands+i;
+ if (strcmp(p->cmd, cmd))
+ continue;
+ exit(run_builtin(p, argc, argv));
+ }
+}
+
+static void execv_dashed_external(const char **argv)
+{
+ char *cmd;
+ const char *tmp;
+ int status;
+
+ if (asprintf(&cmd, "perf-%s", argv[0]) < 0)
+ goto do_die;
+
+ /*
+ * argv[0] must be the perf command, but the argv array
+ * belongs to the caller, and may be reused in
+ * subsequent loop iterations. Save argv[0] and
+ * restore it on error.
+ */
+ tmp = argv[0];
+ argv[0] = cmd;
+
+ /*
+ * if we fail because the command is not found, it is
+ * OK to return. Otherwise, we just pass along the status code.
+ */
+ status = run_command_v_opt(argv, 0);
+ if (status != -ERR_RUN_COMMAND_EXEC) {
+ if (IS_RUN_COMMAND_ERR(status)) {
+do_die:
+ pr_err("FATAL: unable to run '%s'", argv[0]);
+ status = -128;
+ }
+ exit(-status);
+ }
+ errno = ENOENT; /* as if we called execvp */
+
+ argv[0] = tmp;
+ zfree(&cmd);
+}
+
+static int run_argv(int *argcp, const char ***argv)
+{
+ /* See if it's an internal command */
+ handle_internal_command(*argcp, *argv);
+
+ /* .. then try the external ones */
+ execv_dashed_external(*argv);
+ return 0;
+}
+
+static void pthread__block_sigwinch(void)
+{
+ sigset_t set;
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGWINCH);
+ pthread_sigmask(SIG_BLOCK, &set, NULL);
+}
+
+void pthread__unblock_sigwinch(void)
+{
+ sigset_t set;
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGWINCH);
+ pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+}
+
+int main(int argc, const char **argv)
+{
+ int err;
+ const char *cmd;
+ char sbuf[STRERR_BUFSIZE];
+
+ /* libsubcmd init */
+ exec_cmd_init("perf", PREFIX, PERF_EXEC_PATH, EXEC_PATH_ENVIRONMENT);
+ pager_init(PERF_PAGER_ENVIRONMENT);
+
+ /* The page_size is placed in util object. */
+ page_size = sysconf(_SC_PAGE_SIZE);
+
+ cmd = extract_argv0_path(argv[0]);
+ if (!cmd)
+ cmd = "perf-help";
+
+ srandom(time(NULL));
+
+ /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
+ config_exclusive_filename = getenv("PERF_CONFIG");
+
+ err = perf_config(perf_default_config, NULL);
+ if (err)
+ return err;
+ set_buildid_dir(NULL);
+
+ /*
+ * "perf-xxxx" is the same as "perf xxxx", but we obviously:
+ *
+ * - cannot take flags in between the "perf" and the "xxxx".
+ * - cannot execute it externally (since it would just do
+ * the same thing over again)
+ *
+ * So we just directly call the internal command handler. If that one
+ * fails to handle this, then maybe we just run a renamed perf binary
+ * that contains a dash in its name. To handle this scenario, we just
+ * fall through and ignore the "xxxx" part of the command string.
+ */
+ if (strstarts(cmd, "perf-")) {
+ cmd += 5;
+ argv[0] = cmd;
+ handle_internal_command(argc, argv);
+ /*
+ * If the command is handled, the above function does not
+ * return undo changes and fall through in such a case.
+ */
+ cmd -= 5;
+ argv[0] = cmd;
+ }
+ if (strstarts(cmd, "trace")) {
+#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
+ setup_path();
+ argv[0] = "trace";
+ return cmd_trace(argc, argv);
+#else
+ fprintf(stderr,
+ "trace command not available: missing audit-libs devel package at build time.\n");
+ goto out;
+#endif
+ }
+ /* Look for flags.. */
+ argv++;
+ argc--;
+ handle_options(&argv, &argc, NULL);
+ commit_pager_choice();
+
+ if (argc > 0) {
+ if (strstarts(argv[0], "--"))
+ argv[0] += 2;
+ } else {
+ /* The user didn't specify a command; give them help */
+ printf("\n usage: %s\n\n", perf_usage_string);
+ list_common_cmds_help();
+ printf("\n %s\n\n", perf_more_info_string);
+ goto out;
+ }
+ cmd = argv[0];
+
+ test_attr__init();
+
+ /*
+ * We use PATH to find perf commands, but we prepend some higher
+ * precedence paths: the "--exec-path" option, the PERF_EXEC_PATH
+ * environment, and the $(perfexecdir) from the Makefile at build
+ * time.
+ */
+ setup_path();
+ /*
+ * Block SIGWINCH notifications so that the thread that wants it can
+ * unblock and get syscalls like select interrupted instead of waiting
+ * forever while the signal goes to some other non interested thread.
+ */
+ pthread__block_sigwinch();
+
+ perf_debug_setup();
+
+ while (1) {
+ static int done_help;
+
+ run_argv(&argc, &argv);
+
+ if (errno != ENOENT)
+ break;
+
+ if (!done_help) {
+ cmd = argv[0] = help_unknown_cmd(cmd);
+ done_help = 1;
+ } else
+ break;
+ }
+
+ fprintf(stderr, "Failed to run command '%s': %s\n",
+ cmd, str_error_r(errno, sbuf, sizeof(sbuf)));
+out:
+ return 1;
+}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
new file mode 100644
index 000000000..19d435a96
--- /dev/null
+++ b/tools/perf/perf.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PERF_H
+#define _PERF_PERF_H
+
+#include <time.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+
+extern bool test_attr__enabled;
+void test_attr__ready(void);
+void test_attr__init(void);
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+ int fd, int group_fd, unsigned long flags);
+
+#define HAVE_ATTR_TEST
+#include "perf-sys.h"
+
+static inline unsigned long long rdclock(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
+}
+
+#ifndef MAX_NR_CPUS
+#define MAX_NR_CPUS 2048
+#endif
+
+extern const char *input_name;
+extern bool perf_host, perf_guest;
+extern const char perf_version_string[];
+
+void pthread__unblock_sigwinch(void);
+
+#include "util/target.h"
+
+struct record_opts {
+ struct target target;
+ bool group;
+ bool inherit_stat;
+ bool no_buffering;
+ bool no_inherit;
+ bool no_inherit_set;
+ bool no_samples;
+ bool raw_samples;
+ bool sample_address;
+ bool sample_phys_addr;
+ bool sample_weight;
+ bool sample_time;
+ bool sample_time_set;
+ bool sample_cpu;
+ bool period;
+ bool period_set;
+ bool running_time;
+ bool full_auxtrace;
+ bool auxtrace_snapshot_mode;
+ bool record_namespaces;
+ bool record_switch_events;
+ bool all_kernel;
+ bool all_user;
+ bool tail_synthesize;
+ bool overwrite;
+ bool ignore_missing_thread;
+ bool strict_freq;
+ bool sample_id;
+ unsigned int freq;
+ unsigned int mmap_pages;
+ unsigned int auxtrace_mmap_pages;
+ unsigned int user_freq;
+ u64 branch_stack;
+ u64 sample_intr_regs;
+ u64 sample_user_regs;
+ u64 default_interval;
+ u64 user_interval;
+ size_t auxtrace_snapshot_size;
+ const char *auxtrace_snapshot_opts;
+ bool sample_transaction;
+ unsigned initial_delay;
+ bool use_clockid;
+ clockid_t clockid;
+ unsigned int proc_map_timeout;
+};
+
+struct option;
+extern const char * const *record_usage;
+extern struct option *record_options;
+extern int version_verbose;
+
+int record__parse_freq(const struct option *opt, const char *str, int unset);
+#endif
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
new file mode 100644
index 000000000..215ba30b8
--- /dev/null
+++ b/tools/perf/pmu-events/Build
@@ -0,0 +1,15 @@
+hostprogs := jevents
+
+jevents-y += json.o jsmn.o jevents.o
+HOSTCFLAGS_jevents.o = -I$(srctree)/tools/include
+pmu-events-y += pmu-events.o
+JDIR = pmu-events/arch/$(SRCARCH)
+JSON = $(shell [ -d $(JDIR) ] && \
+ find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
+
+#
+# Locate/process JSON files in pmu-events/arch/
+# directory and create tables in pmu-events.c.
+#
+$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JEVENTS)
+ $(Q)$(call echo-cmd,gen)$(JEVENTS) $(SRCARCH) pmu-events/arch $(OUTPUT)pmu-events/pmu-events.c $(V)
diff --git a/tools/perf/pmu-events/README b/tools/perf/pmu-events/README
new file mode 100644
index 000000000..e62b09b6a
--- /dev/null
+++ b/tools/perf/pmu-events/README
@@ -0,0 +1,152 @@
+
+The contents of this directory allow users to specify PMU events in their
+CPUs by their symbolic names rather than raw event codes (see example below).
+
+The main program in this directory, is the 'jevents', which is built and
+executed _BEFORE_ the perf binary itself is built.
+
+The 'jevents' program tries to locate and process JSON files in the directory
+tree tools/perf/pmu-events/arch/foo.
+
+ - Regular files with '.json' extension in the name are assumed to be
+ JSON files, each of which describes a set of PMU events.
+
+ - The CSV file that maps a specific CPU to its set of PMU events is to
+ be named 'mapfile.csv' (see below for mapfile format).
+
+ - Directories are traversed, but all other files are ignored.
+
+ - To reduce JSON event duplication per architecture, platform JSONs may
+ use "ArchStdEvent" keyword to dereference an "Architecture standard
+ events", defined in architecture standard JSONs.
+ Architecture standard JSONs must be located in the architecture root
+ folder. Matching is based on the "EventName" field.
+
+The PMU events supported by a CPU model are expected to grouped into topics
+such as Pipelining, Cache, Memory, Floating-point etc. All events for a topic
+should be placed in a separate JSON file - where the file name identifies
+the topic. Eg: "Floating-point.json".
+
+All the topic JSON files for a CPU model/family should be in a separate
+sub directory. Thus for the Silvermont X86 CPU:
+
+ $ ls tools/perf/pmu-events/arch/x86/Silvermont_core
+ Cache.json Memory.json Virtual-Memory.json
+ Frontend.json Pipeline.json
+
+The JSONs folder for a CPU model/family may be placed in the root arch
+folder, or may be placed in a vendor sub-folder under the arch folder
+for instances where the arch and vendor are not the same.
+
+Using the JSON files and the mapfile, 'jevents' generates the C source file,
+'pmu-events.c', which encodes the two sets of tables:
+
+ - Set of 'PMU events tables' for all known CPUs in the architecture,
+ (one table like the following, per JSON file; table name 'pme_power8'
+ is derived from JSON file name, 'power8.json').
+
+ struct pmu_event pme_power8[] = {
+
+ ...
+
+ {
+ .name = "pm_1plus_ppc_cmpl",
+ .event = "event=0x100f2",
+ .desc = "1 or more ppc insts finished,",
+ },
+
+ ...
+ }
+
+ - A 'mapping table' that maps each CPU of the architecture, to its
+ 'PMU events table'
+
+ struct pmu_events_map pmu_events_map[] = {
+ {
+ .cpuid = "004b0000",
+ .version = "1",
+ .type = "core",
+ .table = pme_power8
+ },
+ ...
+
+ };
+
+After the 'pmu-events.c' is generated, it is compiled and the resulting
+'pmu-events.o' is added to 'libperf.a' which is then used to build perf.
+
+NOTES:
+ 1. Several CPUs can support same set of events and hence use a common
+ JSON file. Hence several entries in the pmu_events_map[] could map
+ to a single 'PMU events table'.
+
+ 2. The 'pmu-events.h' has an extern declaration for the mapping table
+ and the generated 'pmu-events.c' defines this table.
+
+ 3. _All_ known CPU tables for architecture are included in the perf
+ binary.
+
+At run time, perf determines the actual CPU it is running on, finds the
+matching events table and builds aliases for those events. This allows
+users to specify events by their name:
+
+ $ perf stat -e pm_1plus_ppc_cmpl sleep 1
+
+where 'pm_1plus_ppc_cmpl' is a Power8 PMU event.
+
+However some errors in processing may cause the perf build to fail.
+
+Mapfile format
+===============
+
+The mapfile enables multiple CPU models to share a single set of PMU events.
+It is required even if such mapping is 1:1.
+
+The mapfile.csv format is expected to be:
+
+ Header line
+ CPUID,Version,Dir/path/name,Type
+
+where:
+
+ Comma:
+ is the required field delimiter (i.e other fields cannot
+ have commas within them).
+
+ Comments:
+ Lines in which the first character is either '\n' or '#'
+ are ignored.
+
+ Header line
+ The header line is the first line in the file, which is
+ always _IGNORED_. It can empty.
+
+ CPUID:
+ CPUID is an arch-specific char string, that can be used
+ to identify CPU (and associate it with a set of PMU events
+ it supports). Multiple CPUIDS can point to the same
+ File/path/name.json.
+
+ Example:
+ CPUID == 'GenuineIntel-6-2E' (on x86).
+ CPUID == '004b0100' (PVR value in Powerpc)
+ Version:
+ is the Version of the mapfile.
+
+ Dir/path/name:
+ is the pathname to the directory containing the CPU's JSON
+ files, relative to the directory containing the mapfile.csv
+
+ Type:
+ indicates whether the events or "core" or "uncore" events.
+
+
+ Eg:
+
+ $ grep Silvermont tools/perf/pmu-events/arch/x86/mapfile.csv
+ GenuineIntel-6-37,V13,Silvermont_core,core
+ GenuineIntel-6-4D,V13,Silvermont_core,core
+ GenuineIntel-6-4C,V13,Silvermont_core,core
+
+ i.e the three CPU models use the JSON files (i.e PMU events) listed
+ in the directory 'tools/perf/pmu-events/arch/x86/Silvermont_core'.
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/core-imp-def.json
new file mode 100644
index 000000000..bc03c06c3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/core-imp-def.json
@@ -0,0 +1,32 @@
+[
+ {
+ "ArchStdEvent": "L1D_CACHE_RD",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WR",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_RD",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_WR",
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_RD",
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_WR",
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_RD",
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_WR",
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_RD",
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_WR",
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json
new file mode 100644
index 000000000..0b0e6b266
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/branch.json
@@ -0,0 +1,25 @@
+[
+ {
+ "ArchStdEvent": "BR_INDIRECT_SPEC",
+ },
+ {
+ "EventCode": "0xC9",
+ "EventName": "BR_COND",
+ "BriefDescription": "Conditional branch executed"
+ },
+ {
+ "EventCode": "0xCA",
+ "EventName": "BR_INDIRECT_MISPRED",
+ "BriefDescription": "Indirect branch mispredicted"
+ },
+ {
+ "EventCode": "0xCB",
+ "EventName": "BR_INDIRECT_MISPRED_ADDR",
+ "BriefDescription": "Indirect branch mispredicted because of address miscompare"
+ },
+ {
+ "EventCode": "0xCC",
+ "EventName": "BR_COND_MISPRED",
+ "BriefDescription": "Conditional branch mispredicted"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json
new file mode 100644
index 000000000..ce33b2553
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/bus.json
@@ -0,0 +1,8 @@
+[
+ {
+ "ArchStdEvent": "BUS_ACCESS_RD",
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_WR",
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json
new file mode 100644
index 000000000..5dfbec43c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/cache.json
@@ -0,0 +1,27 @@
+[
+ {
+ "EventCode": "0xC2",
+ "EventName": "PREFETCH_LINEFILL",
+ "BriefDescription": "Linefill because of prefetch"
+ },
+ {
+ "EventCode": "0xC3",
+ "EventName": "PREFETCH_LINEFILL_DROP",
+ "BriefDescription": "Instruction Cache Throttle occurred"
+ },
+ {
+ "EventCode": "0xC4",
+ "EventName": "READ_ALLOC_ENTER",
+ "BriefDescription": "Entering read allocate mode"
+ },
+ {
+ "EventCode": "0xC5",
+ "EventName": "READ_ALLOC",
+ "BriefDescription": "Read allocate mode"
+ },
+ {
+ "EventCode": "0xC8",
+ "EventName": "EXT_SNOOP",
+ "BriefDescription": "SCU Snooped data from another CPU for this CPU"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json
new file mode 100644
index 000000000..25ae642ba
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/memory.json
@@ -0,0 +1,12 @@
+[
+ {
+ "EventCode": "0xC0",
+ "EventName": "EXT_MEM_REQ",
+ "BriefDescription": "External memory request"
+ },
+ {
+ "EventCode": "0xC1",
+ "EventName": "EXT_MEM_REQ_NC",
+ "BriefDescription": "Non-cacheable external memory request"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json
new file mode 100644
index 000000000..6cc6cbd7b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/other.json
@@ -0,0 +1,28 @@
+[
+ {
+ "ArchStdEvent": "EXC_IRQ",
+ },
+ {
+ "ArchStdEvent": "EXC_FIQ",
+ },
+ {
+ "EventCode": "0xC6",
+ "EventName": "PRE_DECODE_ERR",
+ "BriefDescription": "Pre-decode error"
+ },
+ {
+ "EventCode": "0xD0",
+ "EventName": "L1I_CACHE_ERR",
+ "BriefDescription": "L1 Instruction Cache (data or tag) memory error"
+ },
+ {
+ "EventCode": "0xD1",
+ "EventName": "L1D_CACHE_ERR",
+ "BriefDescription": "L1 Data Cache (data, tag or dirty) memory error, correctable or non-correctable"
+ },
+ {
+ "EventCode": "0xD2",
+ "EventName": "TLB_ERR",
+ "BriefDescription": "TLB memory error"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json
new file mode 100644
index 000000000..f45a6b5d0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/arm/cortex-a53/pipeline.json
@@ -0,0 +1,52 @@
+[
+ {
+ "EventCode": "0xC7",
+ "EventName": "STALL_SB_FULL",
+ "BriefDescription": "Data Write operation that stalls the pipeline because the store buffer is full"
+ },
+ {
+ "EventCode": "0xE0",
+ "EventName": "OTHER_IQ_DEP_STALL",
+ "BriefDescription": "Cycles that the DPU IQ is empty and that is not because of a recent micro-TLB miss, instruction cache miss or pre-decode error"
+ },
+ {
+ "EventCode": "0xE1",
+ "EventName": "IC_DEP_STALL",
+ "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction cache miss being processed"
+ },
+ {
+ "EventCode": "0xE2",
+ "EventName": "IUTLB_DEP_STALL",
+ "BriefDescription": "Cycles the DPU IQ is empty and there is an instruction micro-TLB miss being processed"
+ },
+ {
+ "EventCode": "0xE3",
+ "EventName": "DECODE_DEP_STALL",
+ "BriefDescription": "Cycles the DPU IQ is empty and there is a pre-decode error being processed"
+ },
+ {
+ "EventCode": "0xE4",
+ "EventName": "OTHER_INTERLOCK_STALL",
+ "BriefDescription": "Cycles there is an interlock other than Advanced SIMD/Floating-point instructions or load/store instruction"
+ },
+ {
+ "EventCode": "0xE5",
+ "EventName": "AGU_DEP_STALL",
+ "BriefDescription": "Cycles there is an interlock for a load/store instruction waiting for data to calculate the address in the AGU"
+ },
+ {
+ "EventCode": "0xE6",
+ "EventName": "SIMD_DEP_STALL",
+ "BriefDescription": "Cycles there is an interlock for an Advanced SIMD/Floating-point operation."
+ },
+ {
+ "EventCode": "0xE7",
+ "EventName": "LD_DEP_STALL",
+ "BriefDescription": "Cycles there is a stall in the Wr stage because of a load miss"
+ },
+ {
+ "EventCode": "0xE8",
+ "EventName": "ST_DEP_STALL",
+ "BriefDescription": "Cycles there is a stall in the Wr stage because of a store"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/armv8-recommended.json b/tools/perf/pmu-events/arch/arm64/armv8-recommended.json
new file mode 100644
index 000000000..6328828c0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/armv8-recommended.json
@@ -0,0 +1,452 @@
+[
+ {
+ "PublicDescription": "Attributable Level 1 data cache access, read",
+ "EventCode": "0x40",
+ "EventName": "L1D_CACHE_RD",
+ "BriefDescription": "L1D cache access, read"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache access, write",
+ "EventCode": "0x41",
+ "EventName": "L1D_CACHE_WR",
+ "BriefDescription": "L1D cache access, write"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache refill, read",
+ "EventCode": "0x42",
+ "EventName": "L1D_CACHE_REFILL_RD",
+ "BriefDescription": "L1D cache refill, read"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache refill, write",
+ "EventCode": "0x43",
+ "EventName": "L1D_CACHE_REFILL_WR",
+ "BriefDescription": "L1D cache refill, write"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache refill, inner",
+ "EventCode": "0x44",
+ "EventName": "L1D_CACHE_REFILL_INNER",
+ "BriefDescription": "L1D cache refill, inner"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache refill, outer",
+ "EventCode": "0x45",
+ "EventName": "L1D_CACHE_REFILL_OUTER",
+ "BriefDescription": "L1D cache refill, outer"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache Write-Back, victim",
+ "EventCode": "0x46",
+ "EventName": "L1D_CACHE_WB_VICTIM",
+ "BriefDescription": "L1D cache Write-Back, victim"
+ },
+ {
+ "PublicDescription": "Level 1 data cache Write-Back, cleaning and coherency",
+ "EventCode": "0x47",
+ "EventName": "L1D_CACHE_WB_CLEAN",
+ "BriefDescription": "L1D cache Write-Back, cleaning and coherency"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data cache invalidate",
+ "EventCode": "0x48",
+ "EventName": "L1D_CACHE_INVAL",
+ "BriefDescription": "L1D cache invalidate"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data TLB refill, read",
+ "EventCode": "0x4C",
+ "EventName": "L1D_TLB_REFILL_RD",
+ "BriefDescription": "L1D tlb refill, read"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data TLB refill, write",
+ "EventCode": "0x4D",
+ "EventName": "L1D_TLB_REFILL_WR",
+ "BriefDescription": "L1D tlb refill, write"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data or unified TLB access, read",
+ "EventCode": "0x4E",
+ "EventName": "L1D_TLB_RD",
+ "BriefDescription": "L1D tlb access, read"
+ },
+ {
+ "PublicDescription": "Attributable Level 1 data or unified TLB access, write",
+ "EventCode": "0x4F",
+ "EventName": "L1D_TLB_WR",
+ "BriefDescription": "L1D tlb access, write"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data cache access, read",
+ "EventCode": "0x50",
+ "EventName": "L2D_CACHE_RD",
+ "BriefDescription": "L2D cache access, read"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data cache access, write",
+ "EventCode": "0x51",
+ "EventName": "L2D_CACHE_WR",
+ "BriefDescription": "L2D cache access, write"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data cache refill, read",
+ "EventCode": "0x52",
+ "EventName": "L2D_CACHE_REFILL_RD",
+ "BriefDescription": "L2D cache refill, read"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data cache refill, write",
+ "EventCode": "0x53",
+ "EventName": "L2D_CACHE_REFILL_WR",
+ "BriefDescription": "L2D cache refill, write"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data cache Write-Back, victim",
+ "EventCode": "0x56",
+ "EventName": "L2D_CACHE_WB_VICTIM",
+ "BriefDescription": "L2D cache Write-Back, victim"
+ },
+ {
+ "PublicDescription": "Level 2 data cache Write-Back, cleaning and coherency",
+ "EventCode": "0x57",
+ "EventName": "L2D_CACHE_WB_CLEAN",
+ "BriefDescription": "L2D cache Write-Back, cleaning and coherency"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data cache invalidate",
+ "EventCode": "0x58",
+ "EventName": "L2D_CACHE_INVAL",
+ "BriefDescription": "L2D cache invalidate"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data or unified TLB refill, read",
+ "EventCode": "0x5c",
+ "EventName": "L2D_TLB_REFILL_RD",
+ "BriefDescription": "L2D cache refill, read"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data or unified TLB refill, write",
+ "EventCode": "0x5d",
+ "EventName": "L2D_TLB_REFILL_WR",
+ "BriefDescription": "L2D cache refill, write"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data or unified TLB access, read",
+ "EventCode": "0x5e",
+ "EventName": "L2D_TLB_RD",
+ "BriefDescription": "L2D cache access, read"
+ },
+ {
+ "PublicDescription": "Attributable Level 2 data or unified TLB access, write",
+ "EventCode": "0x5f",
+ "EventName": "L2D_TLB_WR",
+ "BriefDescription": "L2D cache access, write"
+ },
+ {
+ "PublicDescription": "Bus access read",
+ "EventCode": "0x60",
+ "EventName": "BUS_ACCESS_RD",
+ "BriefDescription": "Bus access read"
+ },
+ {
+ "PublicDescription": "Bus access write",
+ "EventCode": "0x61",
+ "EventName": "BUS_ACCESS_WR",
+ "BriefDescription": "Bus access write"
+ }
+ {
+ "PublicDescription": "Bus access, Normal, Cacheable, Shareable",
+ "EventCode": "0x62",
+ "EventName": "BUS_ACCESS_SHARED",
+ "BriefDescription": "Bus access, Normal, Cacheable, Shareable"
+ }
+ {
+ "PublicDescription": "Bus access, not Normal, Cacheable, Shareable",
+ "EventCode": "0x63",
+ "EventName": "BUS_ACCESS_NOT_SHARED",
+ "BriefDescription": "Bus access, not Normal, Cacheable, Shareable"
+ }
+ {
+ "PublicDescription": "Bus access, Normal",
+ "EventCode": "0x64",
+ "EventName": "BUS_ACCESS_NORMAL",
+ "BriefDescription": "Bus access, Normal"
+ }
+ {
+ "PublicDescription": "Bus access, peripheral",
+ "EventCode": "0x65",
+ "EventName": "BUS_ACCESS_PERIPH",
+ "BriefDescription": "Bus access, peripheral"
+ }
+ {
+ "PublicDescription": "Data memory access, read",
+ "EventCode": "0x66",
+ "EventName": "MEM_ACCESS_RD",
+ "BriefDescription": "Data memory access, read"
+ }
+ {
+ "PublicDescription": "Data memory access, write",
+ "EventCode": "0x67",
+ "EventName": "MEM_ACCESS_WR",
+ "BriefDescription": "Data memory access, write"
+ }
+ {
+ "PublicDescription": "Unaligned access, read",
+ "EventCode": "0x68",
+ "EventName": "UNALIGNED_LD_SPEC",
+ "BriefDescription": "Unaligned access, read"
+ }
+ {
+ "PublicDescription": "Unaligned access, write",
+ "EventCode": "0x69",
+ "EventName": "UNALIGNED_ST_SPEC",
+ "BriefDescription": "Unaligned access, write"
+ }
+ {
+ "PublicDescription": "Unaligned access",
+ "EventCode": "0x6a",
+ "EventName": "UNALIGNED_LDST_SPEC",
+ "BriefDescription": "Unaligned access"
+ }
+ {
+ "PublicDescription": "Exclusive operation speculatively executed, LDREX or LDX",
+ "EventCode": "0x6c",
+ "EventName": "LDREX_SPEC",
+ "BriefDescription": "Exclusive operation speculatively executed, LDREX or LDX"
+ }
+ {
+ "PublicDescription": "Exclusive operation speculatively executed, STREX or STX pass",
+ "EventCode": "0x6d",
+ "EventName": "STREX_PASS_SPEC",
+ "BriefDescription": "Exclusive operation speculatively executed, STREX or STX pass"
+ }
+ {
+ "PublicDescription": "Exclusive operation speculatively executed, STREX or STX fail",
+ "EventCode": "0x6e",
+ "EventName": "STREX_FAIL_SPEC",
+ "BriefDescription": "Exclusive operation speculatively executed, STREX or STX fail"
+ }
+ {
+ "PublicDescription": "Exclusive operation speculatively executed, STREX or STX",
+ "EventCode": "0x6f",
+ "EventName": "STREX_SPEC",
+ "BriefDescription": "Exclusive operation speculatively executed, STREX or STX"
+ }
+ {
+ "PublicDescription": "Operation speculatively executed, load",
+ "EventCode": "0x70",
+ "EventName": "LD_SPEC",
+ "BriefDescription": "Operation speculatively executed, load"
+ }
+ {
+ "PublicDescription": "Operation speculatively executed, store"
+ "EventCode": "0x71",
+ "EventName": "ST_SPEC",
+ "BriefDescription": "Operation speculatively executed, store"
+ }
+ {
+ "PublicDescription": "Operation speculatively executed, load or store",
+ "EventCode": "0x72",
+ "EventName": "LDST_SPEC",
+ "BriefDescription": "Operation speculatively executed, load or store"
+ }
+ {
+ "PublicDescription": "Operation speculatively executed, integer data processing",
+ "EventCode": "0x73",
+ "EventName": "DP_SPEC",
+ "BriefDescription": "Operation speculatively executed, integer data processing"
+ }
+ {
+ "PublicDescription": "Operation speculatively executed, Advanced SIMD instruction",
+ "EventCode": "0x74",
+ "EventName": "ASE_SPEC",
+ "BriefDescription": "Operation speculatively executed, Advanced SIMD instruction",
+ }
+ {
+ "PublicDescription": "Operation speculatively executed, floating-point instruction",
+ "EventCode": "0x75",
+ "EventName": "VFP_SPEC",
+ "BriefDescription": "Operation speculatively executed, floating-point instruction"
+ }
+ {
+ "PublicDescription": "Operation speculatively executed, software change of the PC",
+ "EventCode": "0x76",
+ "EventName": "PC_WRITE_SPEC",
+ "BriefDescription": "Operation speculatively executed, software change of the PC"
+ }
+ {
+ "PublicDescription": "Operation speculatively executed, Cryptographic instruction",
+ "EventCode": "0x77",
+ "EventName": "CRYPTO_SPEC",
+ "BriefDescription": "Operation speculatively executed, Cryptographic instruction"
+ }
+ {
+ "PublicDescription": "Branch speculatively executed, immediate branch"
+ "EventCode": "0x78",
+ "EventName": "BR_IMMED_SPEC",
+ "BriefDescription": "Branch speculatively executed, immediate branch"
+ }
+ {
+ "PublicDescription": "Branch speculatively executed, procedure return"
+ "EventCode": "0x79",
+ "EventName": "BR_RETURN_SPEC",
+ "BriefDescription": "Branch speculatively executed, procedure return"
+ }
+ {
+ "PublicDescription": "Branch speculatively executed, indirect branch"
+ "EventCode": "0x7a",
+ "EventName": "BR_INDIRECT_SPEC",
+ "BriefDescription": "Branch speculatively executed, indirect branch"
+ }
+ {
+ "PublicDescription": "Barrier speculatively executed, ISB"
+ "EventCode": "0x7c",
+ "EventName": "ISB_SPEC",
+ "BriefDescription": "Barrier speculatively executed, ISB"
+ }
+ {
+ "PublicDescription": "Barrier speculatively executed, DSB"
+ "EventCode": "0x7d",
+ "EventName": "DSB_SPEC",
+ "BriefDescription": "Barrier speculatively executed, DSB"
+ }
+ {
+ "PublicDescription": "Barrier speculatively executed, DMB"
+ "EventCode": "0x7e",
+ "EventName": "DMB_SPEC",
+ "BriefDescription": "Barrier speculatively executed, DMB"
+ }
+ {
+ "PublicDescription": "Exception taken, Other synchronous"
+ "EventCode": "0x81",
+ "EventName": "EXC_UNDEF",
+ "BriefDescription": "Exception taken, Other synchronous"
+ }
+ {
+ "PublicDescription": "Exception taken, Supervisor Call"
+ "EventCode": "0x82",
+ "EventName": "EXC_SVC",
+ "BriefDescription": "Exception taken, Supervisor Call"
+ }
+ {
+ "PublicDescription": "Exception taken, Instruction Abort"
+ "EventCode": "0x83",
+ "EventName": "EXC_PABORT",
+ "BriefDescription": "Exception taken, Instruction Abort"
+ }
+ {
+ "PublicDescription": "Exception taken, Data Abort and SError"
+ "EventCode": "0x84",
+ "EventName": "EXC_DABORT",
+ "BriefDescription": "Exception taken, Data Abort and SError"
+ }
+ {
+ "PublicDescription": "Exception taken, IRQ"
+ "EventCode": "0x86",
+ "EventName": "EXC_IRQ",
+ "BriefDescription": "Exception taken, IRQ"
+ }
+ {
+ "PublicDescription": "Exception taken, FIQ"
+ "EventCode": "0x87",
+ "EventName": "EXC_FIQ",
+ "BriefDescription": "Exception taken, FIQ"
+ }
+ {
+ "PublicDescription": "Exception taken, Secure Monitor Call"
+ "EventCode": "0x88",
+ "EventName": "EXC_SMC",
+ "BriefDescription": "Exception taken, Secure Monitor Call"
+ }
+ {
+ "PublicDescription": "Exception taken, Hypervisor Call"
+ "EventCode": "0x8a",
+ "EventName": "EXC_HVC",
+ "BriefDescription": "Exception taken, Hypervisor Call"
+ }
+ {
+ "PublicDescription": "Exception taken, Instruction Abort not taken locally"
+ "EventCode": "0x8b",
+ "EventName": "EXC_TRAP_PABORT",
+ "BriefDescription": "Exception taken, Instruction Abort not taken locally"
+ }
+ {
+ "PublicDescription": "Exception taken, Data Abort or SError not taken locally"
+ "EventCode": "0x8c",
+ "EventName": "EXC_TRAP_DABORT",
+ "BriefDescription": "Exception taken, Data Abort or SError not taken locally"
+ }
+ {
+ "PublicDescription": "Exception taken, Other traps not taken locally"
+ "EventCode": "0x8d",
+ "EventName": "EXC_TRAP_OTHER",
+ "BriefDescription": "Exception taken, Other traps not taken locally"
+ }
+ {
+ "PublicDescription": "Exception taken, IRQ not taken locally"
+ "EventCode": "0x8e",
+ "EventName": "EXC_TRAP_IRQ",
+ "BriefDescription": "Exception taken, IRQ not taken locally"
+ }
+ {
+ "PublicDescription": "Exception taken, FIQ not taken locally"
+ "EventCode": "0x8f",
+ "EventName": "EXC_TRAP_FIQ",
+ "BriefDescription": "Exception taken, FIQ not taken locally"
+ }
+ {
+ "PublicDescription": "Release consistency operation speculatively executed, Load-Acquire"
+ "EventCode": "0x90",
+ "EventName": "RC_LD_SPEC",
+ "BriefDescription": "Release consistency operation speculatively executed, Load-Acquire"
+ }
+ {
+ "PublicDescription": "Release consistency operation speculatively executed, Store-Release"
+ "EventCode": "0x91",
+ "EventName": "RC_ST_SPEC",
+ "BriefDescription": "Release consistency operation speculatively executed, Store-Release"
+ }
+ {
+ "PublicDescription": "Attributable Level 3 data or unified cache access, read"
+ "EventCode": "0xa0",
+ "EventName": "L3D_CACHE_RD",
+ "BriefDescription": "Attributable Level 3 data or unified cache access, read"
+ }
+ {
+ "PublicDescription": "Attributable Level 3 data or unified cache access, write"
+ "EventCode": "0xa1",
+ "EventName": "L3D_CACHE_WR",
+ "BriefDescription": "Attributable Level 3 data or unified cache access, write"
+ }
+ {
+ "PublicDescription": "Attributable Level 3 data or unified cache refill, read"
+ "EventCode": "0xa2",
+ "EventName": "L3D_CACHE_REFILL_RD",
+ "BriefDescription": "Attributable Level 3 data or unified cache refill, read"
+ }
+ {
+ "PublicDescription": "Attributable Level 3 data or unified cache refill, write"
+ "EventCode": "0xa3",
+ "EventName": "L3D_CACHE_REFILL_WR",
+ "BriefDescription": "Attributable Level 3 data or unified cache refill, write"
+ }
+ {
+ "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, victim"
+ "EventCode": "0xa6",
+ "EventName": "L3D_CACHE_WB_VICTIM",
+ "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, victim"
+ }
+ {
+ "PublicDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean"
+ "EventCode": "0xa7",
+ "EventName": "L3D_CACHE_WB_CLEAN",
+ "BriefDescription": "Attributable Level 3 data or unified cache Write-Back, cache clean"
+ }
+ {
+ "PublicDescription": "Attributable Level 3 data or unified cache access, invalidate"
+ "EventCode": "0xa8",
+ "EventName": "L3D_CACHE_INVAL",
+ "BriefDescription": "Attributable Level 3 data or unified cache access, invalidate"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json
new file mode 100644
index 000000000..752e47eb6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2/core-imp-def.json
@@ -0,0 +1,113 @@
+[
+ {
+ "ArchStdEvent": "L1D_CACHE_RD",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WR",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_RD",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_WR",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_INNER",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_OUTER",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB_VICTIM",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB_CLEAN",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_INVAL",
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_RD",
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_WR",
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_RD",
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_WR",
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL_RD",
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_REFILL_WR",
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_RD",
+ },
+ {
+ "ArchStdEvent": "L2D_TLB_WR",
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_RD",
+ },
+ {
+ "ArchStdEvent": "BUS_ACCESS_WR",
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_RD",
+ },
+ {
+ "ArchStdEvent": "MEM_ACCESS_WR",
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_LD_SPEC",
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_ST_SPEC",
+ },
+ {
+ "ArchStdEvent": "UNALIGNED_LDST_SPEC",
+ },
+ {
+ "ArchStdEvent": "EXC_UNDEF",
+ },
+ {
+ "ArchStdEvent": "EXC_SVC",
+ },
+ {
+ "ArchStdEvent": "EXC_PABORT",
+ },
+ {
+ "ArchStdEvent": "EXC_DABORT",
+ },
+ {
+ "ArchStdEvent": "EXC_IRQ",
+ },
+ {
+ "ArchStdEvent": "EXC_FIQ",
+ },
+ {
+ "ArchStdEvent": "EXC_SMC",
+ },
+ {
+ "ArchStdEvent": "EXC_HVC",
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_PABORT",
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_DABORT",
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_OTHER",
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_IRQ",
+ },
+ {
+ "ArchStdEvent": "EXC_TRAP_FIQ",
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json
new file mode 100644
index 000000000..9f0f15d15
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/core-imp-def.json
@@ -0,0 +1,122 @@
+[
+ {
+ "ArchStdEvent": "L1D_CACHE_RD",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WR",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_RD",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_REFILL_WR",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB_VICTIM",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_WB_CLEAN",
+ },
+ {
+ "ArchStdEvent": "L1D_CACHE_INVAL",
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_RD",
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_REFILL_WR",
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_RD",
+ },
+ {
+ "ArchStdEvent": "L1D_TLB_WR",
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_RD",
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WR",
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL_RD",
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_REFILL_WR",
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB_VICTIM",
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_WB_CLEAN",
+ },
+ {
+ "ArchStdEvent": "L2D_CACHE_INVAL",
+ },
+ {
+ "PublicDescription": "Level 1 instruction cache prefetch access count",
+ "EventCode": "0x102e",
+ "EventName": "L1I_CACHE_PRF",
+ "BriefDescription": "L1I cache prefetch access count",
+ },
+ {
+ "PublicDescription": "Level 1 instruction cache miss due to prefetch access count",
+ "EventCode": "0x102f",
+ "EventName": "L1I_CACHE_PRF_REFILL",
+ "BriefDescription": "L1I cache miss due to prefetch access count",
+ },
+ {
+ "PublicDescription": "Instruction queue is empty",
+ "EventCode": "0x1043",
+ "EventName": "IQ_IS_EMPTY",
+ "BriefDescription": "Instruction queue is empty",
+ },
+ {
+ "PublicDescription": "Instruction fetch stall cycles",
+ "EventCode": "0x1044",
+ "EventName": "IF_IS_STALL",
+ "BriefDescription": "Instruction fetch stall cycles",
+ },
+ {
+ "PublicDescription": "Instructions can receive, but not send",
+ "EventCode": "0x2014",
+ "EventName": "FETCH_BUBBLE",
+ "BriefDescription": "Instructions can receive, but not send",
+ },
+ {
+ "PublicDescription": "Prefetch request from LSU",
+ "EventCode": "0x6013",
+ "EventName": "PRF_REQ",
+ "BriefDescription": "Prefetch request from LSU",
+ },
+ {
+ "PublicDescription": "Hit on prefetched data",
+ "EventCode": "0x6014",
+ "EventName": "HIT_ON_PRF",
+ "BriefDescription": "Hit on prefetched data",
+ },
+ {
+ "PublicDescription": "Cycles of that the number of issuing micro operations are less than 4",
+ "EventCode": "0x7001",
+ "EventName": "EXE_STALL_CYCLE",
+ "BriefDescription": "Cycles of that the number of issue ups are less than 4",
+ },
+ {
+ "PublicDescription": "No any micro operation is issued and meanwhile any load operation is not resolved",
+ "EventCode": "0x7004",
+ "EventName": "MEM_STALL_ANYLOAD",
+ "BriefDescription": "No any micro operation is issued and meanwhile any load operation is not resolved",
+ },
+ {
+ "PublicDescription": "No any micro operation is issued and meanwhile there is any load operation missing L1 cache and pending data refill",
+ "EventCode": "0x7006",
+ "EventName": "MEM_STALL_L1MISS",
+ "BriefDescription": "No any micro operation is issued and meanwhile there is any load operation missing L1 cache and pending data refill",
+ },
+ {
+ "PublicDescription": "No any micro operation is issued and meanwhile there is any load operation missing both L1 and L2 cache and pending data refill from L3 cache",
+ "EventCode": "0x7007",
+ "EventName": "MEM_STALL_L2MISS",
+ "BriefDescription": "No any micro operation is issued and meanwhile there is any load operation missing both L1 and L2 cache and pending data refill from L3 cache",
+ },
+]
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
new file mode 100644
index 000000000..59cd8604b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -0,0 +1,19 @@
+# Format:
+# MIDR,Version,JSON/file/pathname,Type
+#
+# where
+# MIDR Processor version
+# Variant[23:20] and Revision [3:0] should be zero.
+# Version could be used to track version of of JSON file
+# but currently unused.
+# JSON/file/pathname is the path to JSON file, relative
+# to tools/perf/pmu-events/arch/arm64/.
+# Type is core, uncore etc
+#
+#
+#Family-model,Version,Filename,EventType
+0x00000000410fd03[[:xdigit:]],v1,arm/cortex-a53,core
+0x00000000420f5160,v1,cavium/thunderx2,core
+0x00000000430f0af0,v1,cavium/thunderx2,core
+0x00000000480fd010,v1,hisilicon/hip08,core
+0x00000000500f0000,v1,ampere/emag,core
diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv
new file mode 100644
index 000000000..229150e7a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv
@@ -0,0 +1,17 @@
+# Format:
+# PVR,Version,JSON/file/pathname,Type
+#
+# where
+# PVR Processor version
+# Version could be used to track version of of JSON file
+# but currently unused.
+# JSON/file/pathname is the path to JSON file, relative
+# to tools/perf/pmu-events/arch/powerpc/.
+# Type is core, uncore etc
+#
+# Multiple PVRs could map to a single JSON file.
+#
+
+# Power8 entries
+004[bcd][[:xdigit:]]{4},1,power8,core
+004e[[:xdigit:]]{4},1,power9,core
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/cache.json b/tools/perf/pmu-events/arch/powerpc/power8/cache.json
new file mode 100644
index 000000000..4a3daa6b4
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power8/cache.json
@@ -0,0 +1,176 @@
+[
+ {,
+ "EventCode": "0x4c048",
+ "EventName": "PM_DATA_FROM_DL2L3_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x3c048",
+ "EventName": "PM_DATA_FROM_DL2L3_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x3c04c",
+ "EventName": "PM_DATA_FROM_DL4",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x1c042",
+ "EventName": "PM_DATA_FROM_L2",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x200fe",
+ "EventName": "PM_DATA_FROM_L2MISS",
+ "BriefDescription": "Demand LD - L2 Miss (not L2 hit)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1c04e",
+ "EventName": "PM_DATA_FROM_L2MISS_MOD",
+ "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x3c040",
+ "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x4c040",
+ "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x2c040",
+ "EventName": "PM_DATA_FROM_L2_MEPF",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x1c040",
+ "EventName": "PM_DATA_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x4c042",
+ "EventName": "PM_DATA_FROM_L3",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x300fe",
+ "EventName": "PM_DATA_FROM_L3MISS",
+ "BriefDescription": "Demand LD - L3 Miss (not L2 hit and not L3 hit)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c04e",
+ "EventName": "PM_DATA_FROM_L3MISS_MOD",
+ "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x3c042",
+ "EventName": "PM_DATA_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x2c042",
+ "EventName": "PM_DATA_FROM_L3_MEPF",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x1c044",
+ "EventName": "PM_DATA_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x1c04c",
+ "EventName": "PM_DATA_FROM_LL4",
+ "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x4c04a",
+ "EventName": "PM_DATA_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x1c048",
+ "EventName": "PM_DATA_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x2c046",
+ "EventName": "PM_DATA_FROM_RL2L3_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x1c04a",
+ "EventName": "PM_DATA_FROM_RL2L3_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x3001a",
+ "EventName": "PM_DATA_TABLEWALK_CYC",
+ "BriefDescription": "Tablwalk Cycles (could be 1 or 2 active)",
+ "PublicDescription": "Data Tablewalk Active"
+ },
+ {,
+ "EventCode": "0x4e04e",
+ "EventName": "PM_DPTEG_FROM_L3MISS",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xd094",
+ "EventName": "PM_DSLB_MISS",
+ "BriefDescription": "Data SLB Miss - Total of all segment sizes",
+ "PublicDescription": "Data SLB Miss - Total of all segment sizesData SLB misses"
+ },
+ {,
+ "EventCode": "0x1002c",
+ "EventName": "PM_L1_DCACHE_RELOADED_ALL",
+ "BriefDescription": "L1 data cache reloaded for demand or prefetch",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x300f6",
+ "EventName": "PM_L1_DCACHE_RELOAD_VALID",
+ "BriefDescription": "DL1 reloaded due to Demand Load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3e054",
+ "EventName": "PM_LD_MISS_L1",
+ "BriefDescription": "Load Missed L1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x100ee",
+ "EventName": "PM_LD_REF_L1",
+ "BriefDescription": "All L1 D cache load references counted at finish, gated by reject",
+ "PublicDescription": "Load Ref count combined for all units"
+ },
+ {,
+ "EventCode": "0x300f0",
+ "EventName": "PM_ST_MISS_L1",
+ "BriefDescription": "Store Missed L1",
+ "PublicDescription": ""
+ },
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/floating-point.json b/tools/perf/pmu-events/arch/powerpc/power8/floating-point.json
new file mode 100644
index 000000000..5f1bb9fca
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power8/floating-point.json
@@ -0,0 +1,14 @@
+[
+ {,
+ "EventCode": "0x2000e",
+ "EventName": "PM_FXU_BUSY",
+ "BriefDescription": "fxu0 busy and fxu1 busy",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1000e",
+ "EventName": "PM_FXU_IDLE",
+ "BriefDescription": "fxu0 idle and fxu1 idle",
+ "PublicDescription": ""
+ },
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/frontend.json b/tools/perf/pmu-events/arch/powerpc/power8/frontend.json
new file mode 100644
index 000000000..04c5f1b7b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power8/frontend.json
@@ -0,0 +1,470 @@
+[
+ {,
+ "EventCode": "0x2505e",
+ "EventName": "PM_BACK_BR_CMPL",
+ "BriefDescription": "Branch instruction completed with a target address less than current instruction address",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10068",
+ "EventName": "PM_BRU_FIN",
+ "BriefDescription": "Branch Instruction Finished",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20036",
+ "EventName": "PM_BR_2PATH",
+ "BriefDescription": "two path branch",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40060",
+ "EventName": "PM_BR_CMPL",
+ "BriefDescription": "Branch Instruction completed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x400f6",
+ "EventName": "PM_BR_MPRED_CMPL",
+ "BriefDescription": "Number of Branch Mispredicts",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x200fa",
+ "EventName": "PM_BR_TAKEN_CMPL",
+ "BriefDescription": "New event for Branch Taken",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10018",
+ "EventName": "PM_IC_DEMAND_CYC",
+ "BriefDescription": "Cycles when a demand ifetch was pending",
+ "PublicDescription": "Demand ifetch pending"
+ },
+ {,
+ "EventCode": "0x100f6",
+ "EventName": "PM_IERAT_RELOAD",
+ "BriefDescription": "Number of I-ERAT reloads",
+ "PublicDescription": "IERAT Reloaded (Miss)"
+ },
+ {,
+ "EventCode": "0x4006a",
+ "EventName": "PM_IERAT_RELOAD_16M",
+ "BriefDescription": "IERAT Reloaded (Miss) for a 16M page",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20064",
+ "EventName": "PM_IERAT_RELOAD_4K",
+ "BriefDescription": "IERAT Miss (Not implemented as DI on POWER6)",
+ "PublicDescription": "IERAT Reloaded (Miss) for a 4k page"
+ },
+ {,
+ "EventCode": "0x3006a",
+ "EventName": "PM_IERAT_RELOAD_64K",
+ "BriefDescription": "IERAT Reloaded (Miss) for a 64k page",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x14050",
+ "EventName": "PM_INST_CHIP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for an instruction fetch",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x2",
+ "EventName": "PM_INST_CMPL",
+ "BriefDescription": "Number of PowerPC Instructions that completed",
+ "PublicDescription": "PPC Instructions Finished (completed)"
+ },
+ {,
+ "EventCode": "0x200f2",
+ "EventName": "PM_INST_DISP",
+ "BriefDescription": "PPC Dispatched",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x44048",
+ "EventName": "PM_INST_FROM_DL2L3_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x34048",
+ "EventName": "PM_INST_FROM_DL2L3_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x3404c",
+ "EventName": "PM_INST_FROM_DL4",
+ "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x4404c",
+ "EventName": "PM_INST_FROM_DMEM",
+ "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x14042",
+ "EventName": "PM_INST_FROM_L2",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x1404e",
+ "EventName": "PM_INST_FROM_L2MISS",
+ "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x34040",
+ "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_LDHITST",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x44040",
+ "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_OTHER",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x24040",
+ "EventName": "PM_INST_FROM_L2_MEPF",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x14040",
+ "EventName": "PM_INST_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 without conflict due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 without conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x44042",
+ "EventName": "PM_INST_FROM_L3",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x300fa",
+ "EventName": "PM_INST_FROM_L3MISS",
+ "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet",
+ "PublicDescription": "Inst from L3 miss"
+ },
+ {,
+ "EventCode": "0x4404e",
+ "EventName": "PM_INST_FROM_L3MISS_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to a instruction fetch",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x34042",
+ "EventName": "PM_INST_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x24042",
+ "EventName": "PM_INST_FROM_L3_MEPF",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x14044",
+ "EventName": "PM_INST_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without conflict due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 without conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x1404c",
+ "EventName": "PM_INST_FROM_LL4",
+ "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's L4 cache due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from the local chip's L4 cache due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x24048",
+ "EventName": "PM_INST_FROM_LMEM",
+ "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x2404c",
+ "EventName": "PM_INST_FROM_MEMORY",
+ "BriefDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x4404a",
+ "EventName": "PM_INST_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x14048",
+ "EventName": "PM_INST_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x24046",
+ "EventName": "PM_INST_FROM_RL2L3_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x1404a",
+ "EventName": "PM_INST_FROM_RL2L3_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x2404a",
+ "EventName": "PM_INST_FROM_RL4",
+ "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x3404a",
+ "EventName": "PM_INST_FROM_RMEM",
+ "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x24050",
+ "EventName": "PM_INST_GRP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for an instruction fetch",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x24052",
+ "EventName": "PM_INST_GRP_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for an instruction fetch",
+ "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro"
+ },
+ {,
+ "EventCode": "0x14052",
+ "EventName": "PM_INST_GRP_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch",
+ "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor an instruction fetch"
+ },
+ {,
+ "EventCode": "0x1003a",
+ "EventName": "PM_INST_IMC_MATCH_CMPL",
+ "BriefDescription": "IMC Match Count ( Not architected in P8)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x14054",
+ "EventName": "PM_INST_PUMP_CPRED",
+ "BriefDescription": "Pump prediction correct. Counts across all types of pumps for an instruction fetch",
+ "PublicDescription": "Pump prediction correct. Counts across all types of pumpsfor an instruction fetch"
+ },
+ {,
+ "EventCode": "0x44052",
+ "EventName": "PM_INST_PUMP_MPRED",
+ "BriefDescription": "Pump misprediction. Counts across all types of pumps for an instruction fetch",
+ "PublicDescription": "Pump Mis prediction Counts across all types of pumpsfor an instruction fetch"
+ },
+ {,
+ "EventCode": "0x34050",
+ "EventName": "PM_INST_SYS_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for an instruction fetch",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was system pump for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x34052",
+ "EventName": "PM_INST_SYS_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for an instruction fetch",
+ "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or"
+ },
+ {,
+ "EventCode": "0x44050",
+ "EventName": "PM_INST_SYS_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for an instruction fetch",
+ "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x45048",
+ "EventName": "PM_IPTEG_FROM_DL2L3_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x35048",
+ "EventName": "PM_IPTEG_FROM_DL2L3_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3504c",
+ "EventName": "PM_IPTEG_FROM_DL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4504c",
+ "EventName": "PM_IPTEG_FROM_DMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x15042",
+ "EventName": "PM_IPTEG_FROM_L2",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1504e",
+ "EventName": "PM_IPTEG_FROM_L2MISS",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x25040",
+ "EventName": "PM_IPTEG_FROM_L2_MEPF",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x15040",
+ "EventName": "PM_IPTEG_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x45042",
+ "EventName": "PM_IPTEG_FROM_L3",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4504e",
+ "EventName": "PM_IPTEG_FROM_L3MISS",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x35042",
+ "EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x25042",
+ "EventName": "PM_IPTEG_FROM_L3_MEPF",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x15044",
+ "EventName": "PM_IPTEG_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1504c",
+ "EventName": "PM_IPTEG_FROM_LL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x25048",
+ "EventName": "PM_IPTEG_FROM_LMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2504c",
+ "EventName": "PM_IPTEG_FROM_MEMORY",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4504a",
+ "EventName": "PM_IPTEG_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x15048",
+ "EventName": "PM_IPTEG_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x25046",
+ "EventName": "PM_IPTEG_FROM_RL2L3_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1504a",
+ "EventName": "PM_IPTEG_FROM_RL2L3_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2504a",
+ "EventName": "PM_IPTEG_FROM_RL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3504a",
+ "EventName": "PM_IPTEG_FROM_RMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xd096",
+ "EventName": "PM_ISLB_MISS",
+ "BriefDescription": "I SLB Miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x400fc",
+ "EventName": "PM_ITLB_MISS",
+ "BriefDescription": "ITLB Reloaded (always zero on POWER6)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x200fd",
+ "EventName": "PM_L1_ICACHE_MISS",
+ "BriefDescription": "Demand iCache Miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40012",
+ "EventName": "PM_L1_ICACHE_RELOADED_ALL",
+ "BriefDescription": "Counts all Icache reloads includes demand, prefetchm prefetch turned into demand and demand turned into prefetch",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30068",
+ "EventName": "PM_L1_ICACHE_RELOADED_PREF",
+ "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x300f4",
+ "EventName": "PM_THRD_CONC_RUN_INST",
+ "BriefDescription": "PPC Instructions Finished when both threads in run_cycles",
+ "PublicDescription": "Concurrent Run Instructions"
+ },
+ {,
+ "EventCode": "0x30060",
+ "EventName": "PM_TM_TRANS_RUN_INST",
+ "BriefDescription": "Instructions completed in transactional state",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4e014",
+ "EventName": "PM_TM_TX_PASS_RUN_INST",
+ "BriefDescription": "run instructions spent in successful transactions",
+ "PublicDescription": ""
+ },
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/marked.json b/tools/perf/pmu-events/arch/powerpc/power8/marked.json
new file mode 100644
index 000000000..dcdcede3c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power8/marked.json
@@ -0,0 +1,794 @@
+[
+ {,
+ "EventCode": "0x3515e",
+ "EventName": "PM_MRK_BACK_BR_CMPL",
+ "BriefDescription": "Marked branch instruction completed with a target address less than current instruction address",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2013a",
+ "EventName": "PM_MRK_BRU_FIN",
+ "BriefDescription": "bru marked instr finish",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1016e",
+ "EventName": "PM_MRK_BR_CMPL",
+ "BriefDescription": "Branch Instruction completed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x301e4",
+ "EventName": "PM_MRK_BR_MPRED_CMPL",
+ "BriefDescription": "Marked Branch Mispredicted",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x101e2",
+ "EventName": "PM_MRK_BR_TAKEN_CMPL",
+ "BriefDescription": "Marked Branch Taken completed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d148",
+ "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d128",
+ "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC",
+ "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3d148",
+ "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c128",
+ "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC",
+ "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3d14c",
+ "EventName": "PM_MRK_DATA_FROM_DL4",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c12c",
+ "EventName": "PM_MRK_DATA_FROM_DL4_CYC",
+ "BriefDescription": "Duration in cycles to reload from another chip's L4 on a different Node or Group (Distant) due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d14c",
+ "EventName": "PM_MRK_DATA_FROM_DMEM",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d12c",
+ "EventName": "PM_MRK_DATA_FROM_DMEM_CYC",
+ "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1d142",
+ "EventName": "PM_MRK_DATA_FROM_L2",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1d14e",
+ "EventName": "PM_MRK_DATA_FROM_L2MISS",
+ "BriefDescription": "Data cache reload L2 miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c12e",
+ "EventName": "PM_MRK_DATA_FROM_L2MISS_CYC",
+ "BriefDescription": "Duration in cycles to reload from a localtion other than the local core's L2 due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c122",
+ "EventName": "PM_MRK_DATA_FROM_L2_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L2 due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3d140",
+ "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c120",
+ "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L2 with load hit store conflict due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d140",
+ "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d120",
+ "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L2 with dispatch conflict due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d140",
+ "EventName": "PM_MRK_DATA_FROM_L2_MEPF",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d120",
+ "EventName": "PM_MRK_DATA_FROM_L2_MEPF_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1d140",
+ "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c120",
+ "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L2 without conflict due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d142",
+ "EventName": "PM_MRK_DATA_FROM_L3",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x201e4",
+ "EventName": "PM_MRK_DATA_FROM_L3MISS",
+ "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d12e",
+ "EventName": "PM_MRK_DATA_FROM_L3MISS_CYC",
+ "BriefDescription": "Duration in cycles to reload from a localtion other than the local core's L3 due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d122",
+ "EventName": "PM_MRK_DATA_FROM_L3_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L3 due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3d142",
+ "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c122",
+ "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d142",
+ "EventName": "PM_MRK_DATA_FROM_L3_MEPF",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d122",
+ "EventName": "PM_MRK_DATA_FROM_L3_MEPF_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1d144",
+ "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c124",
+ "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L3 without conflict due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1d14c",
+ "EventName": "PM_MRK_DATA_FROM_LL4",
+ "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c12c",
+ "EventName": "PM_MRK_DATA_FROM_LL4_CYC",
+ "BriefDescription": "Duration in cycles to reload from the local chip's L4 cache due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d148",
+ "EventName": "PM_MRK_DATA_FROM_LMEM",
+ "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d128",
+ "EventName": "PM_MRK_DATA_FROM_LMEM_CYC",
+ "BriefDescription": "Duration in cycles to reload from the local chip's Memory due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d14c",
+ "EventName": "PM_MRK_DATA_FROM_MEMORY",
+ "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d12c",
+ "EventName": "PM_MRK_DATA_FROM_MEMORY_CYC",
+ "BriefDescription": "Duration in cycles to reload from a memory location including L4 from local remote or distant due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d14a",
+ "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d12a",
+ "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC",
+ "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1d148",
+ "EventName": "PM_MRK_DATA_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c128",
+ "EventName": "PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC",
+ "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d146",
+ "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d126",
+ "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC",
+ "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1d14a",
+ "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c12a",
+ "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC",
+ "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d14a",
+ "EventName": "PM_MRK_DATA_FROM_RL4",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d12a",
+ "EventName": "PM_MRK_DATA_FROM_RL4_CYC",
+ "BriefDescription": "Duration in cycles to reload from another chip's L4 on the same Node or Group ( Remote) due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3d14a",
+ "EventName": "PM_MRK_DATA_FROM_RMEM",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c12a",
+ "EventName": "PM_MRK_DATA_FROM_RMEM_CYC",
+ "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group ( Remote) due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40118",
+ "EventName": "PM_MRK_DCACHE_RELOAD_INTV",
+ "BriefDescription": "Combined Intervention event",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x301e6",
+ "EventName": "PM_MRK_DERAT_MISS",
+ "BriefDescription": "Erat Miss (TLB Access) All page sizes",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d154",
+ "EventName": "PM_MRK_DERAT_MISS_16G",
+ "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16G",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3d154",
+ "EventName": "PM_MRK_DERAT_MISS_16M",
+ "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16M",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1d156",
+ "EventName": "PM_MRK_DERAT_MISS_4K",
+ "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 4K",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d154",
+ "EventName": "PM_MRK_DERAT_MISS_64K",
+ "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 64K",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20132",
+ "EventName": "PM_MRK_DFU_FIN",
+ "BriefDescription": "Decimal Unit marked Instruction Finish",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4f148",
+ "EventName": "PM_MRK_DPTEG_FROM_DL2L3_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3f148",
+ "EventName": "PM_MRK_DPTEG_FROM_DL2L3_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3f14c",
+ "EventName": "PM_MRK_DPTEG_FROM_DL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4f14c",
+ "EventName": "PM_MRK_DPTEG_FROM_DMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1f142",
+ "EventName": "PM_MRK_DPTEG_FROM_L2",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1f14e",
+ "EventName": "PM_MRK_DPTEG_FROM_L2MISS",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2f140",
+ "EventName": "PM_MRK_DPTEG_FROM_L2_MEPF",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1f140",
+ "EventName": "PM_MRK_DPTEG_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4f142",
+ "EventName": "PM_MRK_DPTEG_FROM_L3",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4f14e",
+ "EventName": "PM_MRK_DPTEG_FROM_L3MISS",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L3 due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3f142",
+ "EventName": "PM_MRK_DPTEG_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2f142",
+ "EventName": "PM_MRK_DPTEG_FROM_L3_MEPF",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1f144",
+ "EventName": "PM_MRK_DPTEG_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1f14c",
+ "EventName": "PM_MRK_DPTEG_FROM_LL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2f148",
+ "EventName": "PM_MRK_DPTEG_FROM_LMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2f14c",
+ "EventName": "PM_MRK_DPTEG_FROM_MEMORY",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4f14a",
+ "EventName": "PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1f148",
+ "EventName": "PM_MRK_DPTEG_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2f146",
+ "EventName": "PM_MRK_DPTEG_FROM_RL2L3_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1f14a",
+ "EventName": "PM_MRK_DPTEG_FROM_RL2L3_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2f14a",
+ "EventName": "PM_MRK_DPTEG_FROM_RL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3f14a",
+ "EventName": "PM_MRK_DPTEG_FROM_RMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x401e4",
+ "EventName": "PM_MRK_DTLB_MISS",
+ "BriefDescription": "Marked dtlb miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1d158",
+ "EventName": "PM_MRK_DTLB_MISS_16G",
+ "BriefDescription": "Marked Data TLB Miss page size 16G",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d156",
+ "EventName": "PM_MRK_DTLB_MISS_16M",
+ "BriefDescription": "Marked Data TLB Miss page size 16M",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d156",
+ "EventName": "PM_MRK_DTLB_MISS_4K",
+ "BriefDescription": "Marked Data TLB Miss page size 4k",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3d156",
+ "EventName": "PM_MRK_DTLB_MISS_64K",
+ "BriefDescription": "Marked Data TLB Miss page size 64K",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40154",
+ "EventName": "PM_MRK_FAB_RSP_BKILL",
+ "BriefDescription": "Marked store had to do a bkill",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2f150",
+ "EventName": "PM_MRK_FAB_RSP_BKILL_CYC",
+ "BriefDescription": "cycles L2 RC took for a bkill",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3015e",
+ "EventName": "PM_MRK_FAB_RSP_CLAIM_RTY",
+ "BriefDescription": "Sampled store did a rwitm and got a rty",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30154",
+ "EventName": "PM_MRK_FAB_RSP_DCLAIM",
+ "BriefDescription": "Marked store had to do a dclaim",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2f152",
+ "EventName": "PM_MRK_FAB_RSP_DCLAIM_CYC",
+ "BriefDescription": "cycles L2 RC took for a dclaim",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4015e",
+ "EventName": "PM_MRK_FAB_RSP_RD_RTY",
+ "BriefDescription": "Sampled L2 reads retry count",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1015e",
+ "EventName": "PM_MRK_FAB_RSP_RD_T_INTV",
+ "BriefDescription": "Sampled Read got a T intervention",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4f150",
+ "EventName": "PM_MRK_FAB_RSP_RWITM_CYC",
+ "BriefDescription": "cycles L2 RC took for a rwitm",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2015e",
+ "EventName": "PM_MRK_FAB_RSP_RWITM_RTY",
+ "BriefDescription": "Sampled store did a rwitm and got a rty",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20134",
+ "EventName": "PM_MRK_FXU_FIN",
+ "BriefDescription": "fxu marked instr finish",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x401e0",
+ "EventName": "PM_MRK_INST_CMPL",
+ "BriefDescription": "marked instruction completed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20130",
+ "EventName": "PM_MRK_INST_DECODED",
+ "BriefDescription": "marked instruction decoded",
+ "PublicDescription": "marked instruction decoded. Name from ISU?"
+ },
+ {,
+ "EventCode": "0x101e0",
+ "EventName": "PM_MRK_INST_DISP",
+ "BriefDescription": "The thread has dispatched a randomly sampled marked instruction",
+ "PublicDescription": "Marked Instruction dispatched"
+ },
+ {,
+ "EventCode": "0x30130",
+ "EventName": "PM_MRK_INST_FIN",
+ "BriefDescription": "marked instruction finished",
+ "PublicDescription": "marked instr finish any unit"
+ },
+ {,
+ "EventCode": "0x401e6",
+ "EventName": "PM_MRK_INST_FROM_L3MISS",
+ "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet",
+ "PublicDescription": "n/a"
+ },
+ {,
+ "EventCode": "0x10132",
+ "EventName": "PM_MRK_INST_ISSUED",
+ "BriefDescription": "Marked instruction issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40134",
+ "EventName": "PM_MRK_INST_TIMEO",
+ "BriefDescription": "marked Instruction finish timeout (instruction lost)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x101e4",
+ "EventName": "PM_MRK_L1_ICACHE_MISS",
+ "BriefDescription": "sampled Instruction suffered an icache Miss",
+ "PublicDescription": "Marked L1 Icache Miss"
+ },
+ {,
+ "EventCode": "0x101ea",
+ "EventName": "PM_MRK_L1_RELOAD_VALID",
+ "BriefDescription": "Marked demand reload",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20114",
+ "EventName": "PM_MRK_L2_RC_DISP",
+ "BriefDescription": "Marked Instruction RC dispatched in L2",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3012a",
+ "EventName": "PM_MRK_L2_RC_DONE",
+ "BriefDescription": "Marked RC done",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40116",
+ "EventName": "PM_MRK_LARX_FIN",
+ "BriefDescription": "Larx finished",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1013e",
+ "EventName": "PM_MRK_LD_MISS_EXPOSED_CYC",
+ "BriefDescription": "Marked Load exposed Miss cycles",
+ "PublicDescription": "Marked Load exposed Miss (use edge detect to count #)"
+ },
+ {,
+ "EventCode": "0x201e2",
+ "EventName": "PM_MRK_LD_MISS_L1",
+ "BriefDescription": "Marked DL1 Demand Miss counted at exec time",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4013e",
+ "EventName": "PM_MRK_LD_MISS_L1_CYC",
+ "BriefDescription": "Marked ld latency",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40132",
+ "EventName": "PM_MRK_LSU_FIN",
+ "BriefDescription": "lsu marked instr finish",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20112",
+ "EventName": "PM_MRK_NTF_FIN",
+ "BriefDescription": "Marked next to finish instruction finished",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1d15e",
+ "EventName": "PM_MRK_RUN_CYC",
+ "BriefDescription": "Marked run cycles",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3013e",
+ "EventName": "PM_MRK_STALL_CMPLU_CYC",
+ "BriefDescription": "Marked Group completion Stall",
+ "PublicDescription": "Marked Group Completion Stall cycles (use edge detect to count #)"
+ },
+ {,
+ "EventCode": "0x3e158",
+ "EventName": "PM_MRK_STCX_FAIL",
+ "BriefDescription": "marked stcx failed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10134",
+ "EventName": "PM_MRK_ST_CMPL",
+ "BriefDescription": "marked store completed and sent to nest",
+ "PublicDescription": "Marked store completed"
+ },
+ {,
+ "EventCode": "0x30134",
+ "EventName": "PM_MRK_ST_CMPL_INT",
+ "BriefDescription": "marked store finished with intervention",
+ "PublicDescription": "marked store complete (data home) with intervention"
+ },
+ {,
+ "EventCode": "0x3f150",
+ "EventName": "PM_MRK_ST_DRAIN_TO_L2DISP_CYC",
+ "BriefDescription": "cycles to drain st from core to L2",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3012c",
+ "EventName": "PM_MRK_ST_FWD",
+ "BriefDescription": "Marked st forwards",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1f150",
+ "EventName": "PM_MRK_ST_L2DISP_TO_CMPL_CYC",
+ "BriefDescription": "cycles from L2 rc disp to l2 rc completion",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20138",
+ "EventName": "PM_MRK_ST_NEST",
+ "BriefDescription": "Marked store sent to nest",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30132",
+ "EventName": "PM_MRK_VSU_FIN",
+ "BriefDescription": "VSU marked instr finish",
+ "PublicDescription": "vsu (fpu) marked instr finish"
+ },
+ {,
+ "EventCode": "0x3d15e",
+ "EventName": "PM_MULT_MRK",
+ "BriefDescription": "mult marked instr",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x15152",
+ "EventName": "PM_SYNC_MRK_BR_LINK",
+ "BriefDescription": "Marked Branch and link branch that can cause a synchronous interrupt",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1515c",
+ "EventName": "PM_SYNC_MRK_BR_MPRED",
+ "BriefDescription": "Marked Branch mispredict that can cause a synchronous interrupt",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x15156",
+ "EventName": "PM_SYNC_MRK_FX_DIVIDE",
+ "BriefDescription": "Marked fixed point divide that can cause a synchronous interrupt",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x15158",
+ "EventName": "PM_SYNC_MRK_L2HIT",
+ "BriefDescription": "Marked L2 Hits that can throw a synchronous interrupt",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1515a",
+ "EventName": "PM_SYNC_MRK_L2MISS",
+ "BriefDescription": "Marked L2 Miss that can throw a synchronous interrupt",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x15154",
+ "EventName": "PM_SYNC_MRK_L3MISS",
+ "BriefDescription": "Marked L3 misses that can throw a synchronous interrupt",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x15150",
+ "EventName": "PM_SYNC_MRK_PROBE_NOP",
+ "BriefDescription": "Marked probeNops which can cause synchronous interrupts",
+ "PublicDescription": ""
+ },
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/memory.json b/tools/perf/pmu-events/arch/powerpc/power8/memory.json
new file mode 100644
index 000000000..87cdaadba
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power8/memory.json
@@ -0,0 +1,212 @@
+[
+ {,
+ "EventCode": "0x10050",
+ "EventName": "PM_CHIP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for all data types ( demand load,data,inst prefetch,inst fetch,xlate (I or d)"
+ },
+ {,
+ "EventCode": "0x1c050",
+ "EventName": "PM_DATA_CHIP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for a demand load",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for a demand load"
+ },
+ {,
+ "EventCode": "0x4c04c",
+ "EventName": "PM_DATA_FROM_DMEM",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x2c048",
+ "EventName": "PM_DATA_FROM_LMEM",
+ "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from the local chip's Memory due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x2c04c",
+ "EventName": "PM_DATA_FROM_MEMORY",
+ "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x2c04a",
+ "EventName": "PM_DATA_FROM_RL4",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x3c04a",
+ "EventName": "PM_DATA_FROM_RMEM",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x2c050",
+ "EventName": "PM_DATA_GRP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for a demand load",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for a demand load"
+ },
+ {,
+ "EventCode": "0x2c052",
+ "EventName": "PM_DATA_GRP_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for a demand load",
+ "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro"
+ },
+ {,
+ "EventCode": "0x1c052",
+ "EventName": "PM_DATA_GRP_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for a demand load",
+ "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor a demand load"
+ },
+ {,
+ "EventCode": "0x1c054",
+ "EventName": "PM_DATA_PUMP_CPRED",
+ "BriefDescription": "Pump prediction correct. Counts across all types of pumps for a demand load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c052",
+ "EventName": "PM_DATA_PUMP_MPRED",
+ "BriefDescription": "Pump misprediction. Counts across all types of pumps for a demand load",
+ "PublicDescription": "Pump Mis prediction Counts across all types of pumpsfor a demand load"
+ },
+ {,
+ "EventCode": "0x3c050",
+ "EventName": "PM_DATA_SYS_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for a demand load",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was system pump for a demand load"
+ },
+ {,
+ "EventCode": "0x3c052",
+ "EventName": "PM_DATA_SYS_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load",
+ "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or"
+ },
+ {,
+ "EventCode": "0x4c050",
+ "EventName": "PM_DATA_SYS_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for a demand load",
+ "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for a demand load"
+ },
+ {,
+ "EventCode": "0x3e04c",
+ "EventName": "PM_DPTEG_FROM_DL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4e04c",
+ "EventName": "PM_DPTEG_FROM_DMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3e04a",
+ "EventName": "PM_DPTEG_FROM_RMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20050",
+ "EventName": "PM_GRP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20052",
+ "EventName": "PM_GRP_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro"
+ },
+ {,
+ "EventCode": "0x10052",
+ "EventName": "PM_GRP_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x18082",
+ "EventName": "PM_L3_CO_MEPF",
+ "BriefDescription": "L3 CO of line in Mep state ( includes casthrough",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c058",
+ "EventName": "PM_MEM_CO",
+ "BriefDescription": "Memory castouts from this lpar",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10058",
+ "EventName": "PM_MEM_LOC_THRESH_IFU",
+ "BriefDescription": "Local Memory above threshold for IFU speculation control",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40056",
+ "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH",
+ "BriefDescription": "Local memory above threshold for LSU medium",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1c05e",
+ "EventName": "PM_MEM_LOC_THRESH_LSU_MED",
+ "BriefDescription": "Local memory above theshold for data prefetch",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c058",
+ "EventName": "PM_MEM_PREF",
+ "BriefDescription": "Memory prefetch for this lpar. Includes L4",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10056",
+ "EventName": "PM_MEM_READ",
+ "BriefDescription": "Reads from Memory from this lpar (includes data/inst/xlate/l1prefetch/inst prefetch). Includes L4",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3c05e",
+ "EventName": "PM_MEM_RWITM",
+ "BriefDescription": "Memory rwitm for this lpar",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3006e",
+ "EventName": "PM_NEST_REF_CLK",
+ "BriefDescription": "Multiply by 4 to obtain the number of PB cycles",
+ "PublicDescription": "Nest reference clocks"
+ },
+ {,
+ "EventCode": "0x10054",
+ "EventName": "PM_PUMP_CPRED",
+ "BriefDescription": "Pump prediction correct. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Pump prediction correct. Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x40052",
+ "EventName": "PM_PUMP_MPRED",
+ "BriefDescription": "Pump misprediction. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Pump Mis prediction Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x30050",
+ "EventName": "PM_SYS_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x30052",
+ "EventName": "PM_SYS_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or"
+ },
+ {,
+ "EventCode": "0x40050",
+ "EventName": "PM_SYS_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/other.json b/tools/perf/pmu-events/arch/powerpc/power8/other.json
new file mode 100644
index 000000000..704302c3e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power8/other.json
@@ -0,0 +1,4064 @@
+[
+ {,
+ "EventCode": "0x1f05e",
+ "EventName": "PM_1LPAR_CYC",
+ "BriefDescription": "Number of cycles in single lpar mode. All threads in the core are assigned to the same lpar",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2006e",
+ "EventName": "PM_2LPAR_CYC",
+ "BriefDescription": "Cycles in 2-lpar mode. Threads 0-3 belong to Lpar0 and threads 4-7 belong to Lpar1",
+ "PublicDescription": "Number of cycles in 2 lpar mode"
+ },
+ {,
+ "EventCode": "0x4e05e",
+ "EventName": "PM_4LPAR_CYC",
+ "BriefDescription": "Number of cycles in 4 LPAR mode. Threads 0-1 belong to lpar0, threads 2-3 belong to lpar1, threads 4-5 belong to lpar2, and threads 6-7 belong to lpar3",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x610050",
+ "EventName": "PM_ALL_CHIP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for all data types ( demand load,data,inst prefetch,inst fetch,xlate (I or d)"
+ },
+ {,
+ "EventCode": "0x520050",
+ "EventName": "PM_ALL_GRP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x620052",
+ "EventName": "PM_ALL_GRP_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro"
+ },
+ {,
+ "EventCode": "0x610052",
+ "EventName": "PM_ALL_GRP_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x610054",
+ "EventName": "PM_ALL_PUMP_CPRED",
+ "BriefDescription": "Pump prediction correct. Counts across all types of pumps for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Pump prediction correct. Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x640052",
+ "EventName": "PM_ALL_PUMP_MPRED",
+ "BriefDescription": "Pump misprediction. Counts across all types of pumps for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Pump Mis prediction Counts across all types of pumpsfor all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x630050",
+ "EventName": "PM_ALL_SYS_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was system pump for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x630052",
+ "EventName": "PM_ALL_SYS_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or"
+ },
+ {,
+ "EventCode": "0x640050",
+ "EventName": "PM_ALL_SYS_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for all data types (demand load,data prefetch,inst prefetch,inst fetch,xlate)",
+ "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x4082",
+ "EventName": "PM_BANK_CONFLICT",
+ "BriefDescription": "Read blocked due to interleave conflict. The ifar logic will detect an interleave conflict and kill the data that was read that cycle",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x5086",
+ "EventName": "PM_BR_BC_8",
+ "BriefDescription": "Pairable BC+8 branch that has not been converted to a Resolve Finished in the BRU pipeline",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x5084",
+ "EventName": "PM_BR_BC_8_CONV",
+ "BriefDescription": "Pairable BC+8 branch that was converted to a Resolve Finished in the BRU pipeline",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40ac",
+ "EventName": "PM_BR_MPRED_CCACHE",
+ "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Count Cache Target Prediction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40b8",
+ "EventName": "PM_BR_MPRED_CR",
+ "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the BHT Direction Prediction (taken/not taken)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40ae",
+ "EventName": "PM_BR_MPRED_LSTACK",
+ "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Link Stack Target Prediction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40ba",
+ "EventName": "PM_BR_MPRED_TA",
+ "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Target Address Prediction from the Count Cache or Link Stack. Only XL-form branches that resolved Taken set this event",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10138",
+ "EventName": "PM_BR_MRK_2PATH",
+ "BriefDescription": "marked two path branch",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x409c",
+ "EventName": "PM_BR_PRED_BR0",
+ "BriefDescription": "Conditional Branch Completed on BR0 (1st branch in group) in which the HW predicted the Direction or Target",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x409e",
+ "EventName": "PM_BR_PRED_BR1",
+ "BriefDescription": "Conditional Branch Completed on BR1 (2nd branch in group) in which the HW predicted the Direction or Target. Note: BR1 can only be used in Single Thread Mode. In all of the SMT modes, only one branch can complete, thus BR1 is unused",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x489c",
+ "EventName": "PM_BR_PRED_BR_CMPL",
+ "BriefDescription": "Completion Time Event. This event can also be calculated from the direct bus as follows: if_pc_br0_br_pred(0) OR if_pc_br0_br_pred(1)",
+ "PublicDescription": "IFU"
+ },
+ {,
+ "EventCode": "0x40a4",
+ "EventName": "PM_BR_PRED_CCACHE_BR0",
+ "BriefDescription": "Conditional Branch Completed on BR0 that used the Count Cache for Target Prediction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40a6",
+ "EventName": "PM_BR_PRED_CCACHE_BR1",
+ "BriefDescription": "Conditional Branch Completed on BR1 that used the Count Cache for Target Prediction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x48a4",
+ "EventName": "PM_BR_PRED_CCACHE_CMPL",
+ "BriefDescription": "Completion Time Event. This event can also be calculated from the direct bus as follows: if_pc_br0_br_pred(0) AND if_pc_br0_pred_type",
+ "PublicDescription": "IFU"
+ },
+ {,
+ "EventCode": "0x40b0",
+ "EventName": "PM_BR_PRED_CR_BR0",
+ "BriefDescription": "Conditional Branch Completed on BR0 that had its direction predicted. I-form branches do not set this event. In addition, B-form branches which do not use the BHT do not set this event - these are branches with BO-field set to 'always taken' and branches",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40b2",
+ "EventName": "PM_BR_PRED_CR_BR1",
+ "BriefDescription": "Conditional Branch Completed on BR1 that had its direction predicted. I-form branches do not set this event. In addition, B-form branches which do not use the BHT do not set this event - these are branches with BO-field set to 'always taken' and branches",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x48b0",
+ "EventName": "PM_BR_PRED_CR_CMPL",
+ "BriefDescription": "Completion Time Event. This event can also be calculated from the direct bus as follows: if_pc_br0_br_pred(1)='1'",
+ "PublicDescription": "IFU"
+ },
+ {,
+ "EventCode": "0x40a8",
+ "EventName": "PM_BR_PRED_LSTACK_BR0",
+ "BriefDescription": "Conditional Branch Completed on BR0 that used the Link Stack for Target Prediction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40aa",
+ "EventName": "PM_BR_PRED_LSTACK_BR1",
+ "BriefDescription": "Conditional Branch Completed on BR1 that used the Link Stack for Target Prediction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x48a8",
+ "EventName": "PM_BR_PRED_LSTACK_CMPL",
+ "BriefDescription": "Completion Time Event. This event can also be calculated from the direct bus as follows: if_pc_br0_br_pred(0) AND (not if_pc_br0_pred_type)",
+ "PublicDescription": "IFU"
+ },
+ {,
+ "EventCode": "0x40b4",
+ "EventName": "PM_BR_PRED_TA_BR0",
+ "BriefDescription": "Conditional Branch Completed on BR0 that had its target address predicted. Only XL-form branches set this event",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40b6",
+ "EventName": "PM_BR_PRED_TA_BR1",
+ "BriefDescription": "Conditional Branch Completed on BR1 that had its target address predicted. Only XL-form branches set this event",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x48b4",
+ "EventName": "PM_BR_PRED_TA_CMPL",
+ "BriefDescription": "Completion Time Event. This event can also be calculated from the direct bus as follows: if_pc_br0_br_pred(0)='1'",
+ "PublicDescription": "IFU"
+ },
+ {,
+ "EventCode": "0x40a0",
+ "EventName": "PM_BR_UNCOND_BR0",
+ "BriefDescription": "Unconditional Branch Completed on BR0. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was coverted to a Resolve",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40a2",
+ "EventName": "PM_BR_UNCOND_BR1",
+ "BriefDescription": "Unconditional Branch Completed on BR1. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was coverted to a Resolve",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x48a0",
+ "EventName": "PM_BR_UNCOND_CMPL",
+ "BriefDescription": "Completion Time Event. This event can also be calculated from the direct bus as follows: if_pc_br0_br_pred=00 AND if_pc_br0_completed",
+ "PublicDescription": "IFU"
+ },
+ {,
+ "EventCode": "0x3094",
+ "EventName": "PM_CASTOUT_ISSUED",
+ "BriefDescription": "Castouts issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3096",
+ "EventName": "PM_CASTOUT_ISSUED_GPR",
+ "BriefDescription": "Castouts issued GPR",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2090",
+ "EventName": "PM_CLB_HELD",
+ "BriefDescription": "CLB Hold: Any Reason",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d018",
+ "EventName": "PM_CMPLU_STALL_BRU_CRU",
+ "BriefDescription": "Completion stall due to IFU",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30026",
+ "EventName": "PM_CMPLU_STALL_COQ_FULL",
+ "BriefDescription": "Completion stall due to CO q full",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30038",
+ "EventName": "PM_CMPLU_STALL_FLUSH",
+ "BriefDescription": "completion stall due to flush by own thread",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30028",
+ "EventName": "PM_CMPLU_STALL_MEM_ECC_DELAY",
+ "BriefDescription": "Completion stall due to mem ECC delay",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e01c",
+ "EventName": "PM_CMPLU_STALL_NO_NTF",
+ "BriefDescription": "Completion stall due to nop",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e01e",
+ "EventName": "PM_CMPLU_STALL_NTCG_FLUSH",
+ "BriefDescription": "Completion stall due to ntcg flush",
+ "PublicDescription": "Completion stall due to reject (load hit store)"
+ },
+ {,
+ "EventCode": "0x4c010",
+ "EventName": "PM_CMPLU_STALL_REJECT",
+ "BriefDescription": "Completion stall due to LSU reject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c01a",
+ "EventName": "PM_CMPLU_STALL_REJECT_LHS",
+ "BriefDescription": "Completion stall due to reject (load hit store)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c014",
+ "EventName": "PM_CMPLU_STALL_REJ_LMQ_FULL",
+ "BriefDescription": "Completion stall due to LSU reject LMQ full",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d010",
+ "EventName": "PM_CMPLU_STALL_SCALAR",
+ "BriefDescription": "Completion stall due to VSU scalar instruction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d010",
+ "EventName": "PM_CMPLU_STALL_SCALAR_LONG",
+ "BriefDescription": "Completion stall due to VSU scalar long latency instruction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c014",
+ "EventName": "PM_CMPLU_STALL_STORE",
+ "BriefDescription": "Completion stall by stores this includes store agen finishes in pipe LS0/LS1 and store data finishes in LS2/LS3",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d014",
+ "EventName": "PM_CMPLU_STALL_VECTOR",
+ "BriefDescription": "Completion stall due to VSU vector instruction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d012",
+ "EventName": "PM_CMPLU_STALL_VECTOR_LONG",
+ "BriefDescription": "Completion stall due to VSU vector long instruction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d012",
+ "EventName": "PM_CMPLU_STALL_VSU",
+ "BriefDescription": "Completion stall due to VSU instruction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x16083",
+ "EventName": "PM_CO0_ALLOC",
+ "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0x16082",
+ "EventName": "PM_CO0_BUSY",
+ "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x517082",
+ "EventName": "PM_CO_DISP_FAIL",
+ "BriefDescription": "CO dispatch failed due to all CO machines being busy",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x527084",
+ "EventName": "PM_CO_TM_SC_FOOTPRINT",
+ "BriefDescription": "L2 did a cleanifdirty CO to the L3 (ie created an SC line in the L3)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3608a",
+ "EventName": "PM_CO_USAGE",
+ "BriefDescription": "Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 CO machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40066",
+ "EventName": "PM_CRU_FIN",
+ "BriefDescription": "IFU Finished a (non-branch) instruction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x61c050",
+ "EventName": "PM_DATA_ALL_CHIP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for either demand loads or data prefetch",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for a demand load"
+ },
+ {,
+ "EventCode": "0x64c048",
+ "EventName": "PM_DATA_ALL_FROM_DL2L3_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x63c048",
+ "EventName": "PM_DATA_ALL_FROM_DL2L3_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x63c04c",
+ "EventName": "PM_DATA_ALL_FROM_DL4",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x64c04c",
+ "EventName": "PM_DATA_ALL_FROM_DMEM",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x61c042",
+ "EventName": "PM_DATA_ALL_FROM_L2",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x64c046",
+ "EventName": "PM_DATA_ALL_FROM_L21_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x63c046",
+ "EventName": "PM_DATA_ALL_FROM_L21_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x61c04e",
+ "EventName": "PM_DATA_ALL_FROM_L2MISS_MOD",
+ "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L2 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x63c040",
+ "EventName": "PM_DATA_ALL_FROM_L2_DISP_CONFLICT_LDHITST",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x64c040",
+ "EventName": "PM_DATA_ALL_FROM_L2_DISP_CONFLICT_OTHER",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x62c040",
+ "EventName": "PM_DATA_ALL_FROM_L2_MEPF",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x61c040",
+ "EventName": "PM_DATA_ALL_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x64c042",
+ "EventName": "PM_DATA_ALL_FROM_L3",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x64c044",
+ "EventName": "PM_DATA_ALL_FROM_L31_ECO_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x63c044",
+ "EventName": "PM_DATA_ALL_FROM_L31_ECO_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x62c044",
+ "EventName": "PM_DATA_ALL_FROM_L31_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x61c046",
+ "EventName": "PM_DATA_ALL_FROM_L31_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x64c04e",
+ "EventName": "PM_DATA_ALL_FROM_L3MISS_MOD",
+ "BriefDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from a localtion other than the local core's L3 due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x63c042",
+ "EventName": "PM_DATA_ALL_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x62c042",
+ "EventName": "PM_DATA_ALL_FROM_L3_MEPF",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x61c044",
+ "EventName": "PM_DATA_ALL_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x61c04c",
+ "EventName": "PM_DATA_ALL_FROM_LL4",
+ "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x62c048",
+ "EventName": "PM_DATA_ALL_FROM_LMEM",
+ "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from the local chip's Memory due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x62c04c",
+ "EventName": "PM_DATA_ALL_FROM_MEMORY",
+ "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x64c04a",
+ "EventName": "PM_DATA_ALL_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x61c048",
+ "EventName": "PM_DATA_ALL_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x62c046",
+ "EventName": "PM_DATA_ALL_FROM_RL2L3_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x61c04a",
+ "EventName": "PM_DATA_ALL_FROM_RL2L3_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x62c04a",
+ "EventName": "PM_DATA_ALL_FROM_RL4",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x63c04a",
+ "EventName": "PM_DATA_ALL_FROM_RMEM",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either demand loads or data prefetch",
+ "PublicDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x62c050",
+ "EventName": "PM_DATA_ALL_GRP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for either demand loads or data prefetch",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for a demand load"
+ },
+ {,
+ "EventCode": "0x62c052",
+ "EventName": "PM_DATA_ALL_GRP_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for either demand loads or data prefetch",
+ "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro"
+ },
+ {,
+ "EventCode": "0x61c052",
+ "EventName": "PM_DATA_ALL_GRP_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for either demand loads or data prefetch",
+ "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor a demand load"
+ },
+ {,
+ "EventCode": "0x61c054",
+ "EventName": "PM_DATA_ALL_PUMP_CPRED",
+ "BriefDescription": "Pump prediction correct. Counts across all types of pumps for either demand loads or data prefetch",
+ "PublicDescription": "Pump prediction correct. Counts across all types of pumps for a demand load"
+ },
+ {,
+ "EventCode": "0x64c052",
+ "EventName": "PM_DATA_ALL_PUMP_MPRED",
+ "BriefDescription": "Pump misprediction. Counts across all types of pumps for either demand loads or data prefetch",
+ "PublicDescription": "Pump Mis prediction Counts across all types of pumpsfor a demand load"
+ },
+ {,
+ "EventCode": "0x63c050",
+ "EventName": "PM_DATA_ALL_SYS_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for either demand loads or data prefetch",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was system pump for a demand load"
+ },
+ {,
+ "EventCode": "0x63c052",
+ "EventName": "PM_DATA_ALL_SYS_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for either demand loads or data prefetch",
+ "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or"
+ },
+ {,
+ "EventCode": "0x64c050",
+ "EventName": "PM_DATA_ALL_SYS_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for either demand loads or data prefetch",
+ "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for a demand load"
+ },
+ {,
+ "EventCode": "0x4c046",
+ "EventName": "PM_DATA_FROM_L21_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x3c046",
+ "EventName": "PM_DATA_FROM_L21_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x4c044",
+ "EventName": "PM_DATA_FROM_L31_ECO_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x3c044",
+ "EventName": "PM_DATA_FROM_L31_ECO_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x2c044",
+ "EventName": "PM_DATA_FROM_L31_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x1c046",
+ "EventName": "PM_DATA_FROM_L31_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a demand load",
+ "PublicDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either only demand loads or demand loads plus prefetches if MMCR1[16] is 1"
+ },
+ {,
+ "EventCode": "0x400fe",
+ "EventName": "PM_DATA_FROM_MEM",
+ "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a demand load",
+ "PublicDescription": "Data cache reload from memory (including L4)"
+ },
+ {,
+ "EventCode": "0xe0bc",
+ "EventName": "PM_DC_COLLISIONS",
+ "BriefDescription": "DATA Cache collisions",
+ "PublicDescription": "DATA Cache collisions42"
+ },
+ {,
+ "EventCode": "0x1e050",
+ "EventName": "PM_DC_PREF_STREAM_ALLOC",
+ "BriefDescription": "Stream marked valid. The stream could have been allocated through the hardware prefetch mechanism or through software. This is combined ls0 and ls1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e050",
+ "EventName": "PM_DC_PREF_STREAM_CONF",
+ "BriefDescription": "A demand load referenced a line in an active prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software. Combine up + down",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4e050",
+ "EventName": "PM_DC_PREF_STREAM_FUZZY_CONF",
+ "BriefDescription": "A demand load referenced a line in an active fuzzy prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software.Fuzzy stream confirm (out of order effects, or pf cant keep up)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3e050",
+ "EventName": "PM_DC_PREF_STREAM_STRIDED_CONF",
+ "BriefDescription": "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0ba",
+ "EventName": "PM_DFU",
+ "BriefDescription": "Finish DFU (all finish)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0be",
+ "EventName": "PM_DFU_DCFFIX",
+ "BriefDescription": "Convert from fixed opcode finish (dcffix,dcffixq)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0bc",
+ "EventName": "PM_DFU_DENBCD",
+ "BriefDescription": "BCD->DPD opcode finish (denbcd, denbcdq)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0b8",
+ "EventName": "PM_DFU_MC",
+ "BriefDescription": "Finish DFU multicycle",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2092",
+ "EventName": "PM_DISP_CLB_HELD_BAL",
+ "BriefDescription": "Dispatch/CLB Hold: Balance",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2094",
+ "EventName": "PM_DISP_CLB_HELD_RES",
+ "BriefDescription": "Dispatch/CLB Hold: Resource",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20a8",
+ "EventName": "PM_DISP_CLB_HELD_SB",
+ "BriefDescription": "Dispatch/CLB Hold: Scoreboard",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2098",
+ "EventName": "PM_DISP_CLB_HELD_SYNC",
+ "BriefDescription": "Dispatch/CLB Hold: Sync type instruction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2096",
+ "EventName": "PM_DISP_CLB_HELD_TLBIE",
+ "BriefDescription": "Dispatch Hold: Due to TLBIE",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20006",
+ "EventName": "PM_DISP_HELD_IQ_FULL",
+ "BriefDescription": "Dispatch held due to Issue q full",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1002a",
+ "EventName": "PM_DISP_HELD_MAP_FULL",
+ "BriefDescription": "Dispatch for this thread was held because the Mappers were full",
+ "PublicDescription": "Dispatch held due to Mapper full"
+ },
+ {,
+ "EventCode": "0x30018",
+ "EventName": "PM_DISP_HELD_SRQ_FULL",
+ "BriefDescription": "Dispatch held due SRQ no room",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30a6",
+ "EventName": "PM_DISP_HOLD_GCT_FULL",
+ "BriefDescription": "Dispatch Hold Due to no space in the GCT",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30008",
+ "EventName": "PM_DISP_WT",
+ "BriefDescription": "Dispatched Starved",
+ "PublicDescription": "Dispatched Starved (not held, nothing to dispatch)"
+ },
+ {,
+ "EventCode": "0x4e046",
+ "EventName": "PM_DPTEG_FROM_L21_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3e046",
+ "EventName": "PM_DPTEG_FROM_L21_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3e040",
+ "EventName": "PM_DPTEG_FROM_L2_DISP_CONFLICT_LDHITST",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 with load hit store conflict due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4e040",
+ "EventName": "PM_DPTEG_FROM_L2_DISP_CONFLICT_OTHER",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 with dispatch conflict due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4e044",
+ "EventName": "PM_DPTEG_FROM_L31_ECO_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3e044",
+ "EventName": "PM_DPTEG_FROM_L31_ECO_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e044",
+ "EventName": "PM_DPTEG_FROM_L31_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1e046",
+ "EventName": "PM_DPTEG_FROM_L31_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x50a8",
+ "EventName": "PM_EAT_FORCE_MISPRED",
+ "BriefDescription": "XL-form branch was mispredicted due to the predicted target address missing from EAT. The EAT forces a mispredict in this case since there is no predicated target to validate. This is a rare case that may occur when the EAT is full and a branch is issue",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4084",
+ "EventName": "PM_EAT_FULL_CYC",
+ "BriefDescription": "Cycles No room in EAT",
+ "PublicDescription": "Cycles No room in EATSet on bank conflict and case where no ibuffers available"
+ },
+ {,
+ "EventCode": "0x2080",
+ "EventName": "PM_EE_OFF_EXT_INT",
+ "BriefDescription": "Ee off and external interrupt",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20b4",
+ "EventName": "PM_FAV_TBEGIN",
+ "BriefDescription": "Dispatch time Favored tbegin",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x100f4",
+ "EventName": "PM_FLOP",
+ "BriefDescription": "Floating Point Operation Finished",
+ "PublicDescription": "Floating Point Operations Finished"
+ },
+ {,
+ "EventCode": "0xa0ae",
+ "EventName": "PM_FLOP_SUM_SCALAR",
+ "BriefDescription": "flops summary scalar instructions",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0ac",
+ "EventName": "PM_FLOP_SUM_VEC",
+ "BriefDescription": "flops summary vector instructions",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2084",
+ "EventName": "PM_FLUSH_BR_MPRED",
+ "BriefDescription": "Flush caused by branch mispredict",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2082",
+ "EventName": "PM_FLUSH_DISP",
+ "BriefDescription": "Dispatch flush",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x208c",
+ "EventName": "PM_FLUSH_DISP_SB",
+ "BriefDescription": "Dispatch Flush: Scoreboard",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2088",
+ "EventName": "PM_FLUSH_DISP_SYNC",
+ "BriefDescription": "Dispatch Flush: Sync",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x208a",
+ "EventName": "PM_FLUSH_DISP_TLBIE",
+ "BriefDescription": "Dispatch Flush: TLBIE",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x208e",
+ "EventName": "PM_FLUSH_LSU",
+ "BriefDescription": "Flush initiated by LSU",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2086",
+ "EventName": "PM_FLUSH_PARTIAL",
+ "BriefDescription": "Partial flush",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0b0",
+ "EventName": "PM_FPU0_FCONV",
+ "BriefDescription": "Convert instruction executed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0b8",
+ "EventName": "PM_FPU0_FEST",
+ "BriefDescription": "Estimate instruction executed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0b4",
+ "EventName": "PM_FPU0_FRSP",
+ "BriefDescription": "Round to single precision instruction executed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0b2",
+ "EventName": "PM_FPU1_FCONV",
+ "BriefDescription": "Convert instruction executed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0ba",
+ "EventName": "PM_FPU1_FEST",
+ "BriefDescription": "Estimate instruction executed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0b6",
+ "EventName": "PM_FPU1_FRSP",
+ "BriefDescription": "Round to single precision instruction executed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x50b0",
+ "EventName": "PM_FUSION_TOC_GRP0_1",
+ "BriefDescription": "One pair of instructions fused with TOC in Group0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x50ae",
+ "EventName": "PM_FUSION_TOC_GRP0_2",
+ "BriefDescription": "Two pairs of instructions fused with TOCin Group0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x50ac",
+ "EventName": "PM_FUSION_TOC_GRP0_3",
+ "BriefDescription": "Three pairs of instructions fused with TOC in Group0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x50b2",
+ "EventName": "PM_FUSION_TOC_GRP1_1",
+ "BriefDescription": "One pair of instructions fused with TOX in Group1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x50b8",
+ "EventName": "PM_FUSION_VSX_GRP0_1",
+ "BriefDescription": "One pair of instructions fused with VSX in Group0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x50b6",
+ "EventName": "PM_FUSION_VSX_GRP0_2",
+ "BriefDescription": "Two pairs of instructions fused with VSX in Group0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x50b4",
+ "EventName": "PM_FUSION_VSX_GRP0_3",
+ "BriefDescription": "Three pairs of instructions fused with VSX in Group0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x50ba",
+ "EventName": "PM_FUSION_VSX_GRP1_1",
+ "BriefDescription": "One pair of instructions fused with VSX in Group1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3000e",
+ "EventName": "PM_FXU0_BUSY_FXU1_IDLE",
+ "BriefDescription": "fxu0 busy and fxu1 idle",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10004",
+ "EventName": "PM_FXU0_FIN",
+ "BriefDescription": "The fixed point unit Unit 0 finished an instruction. Instructions that finish may not necessary complete",
+ "PublicDescription": "FXU0 Finished"
+ },
+ {,
+ "EventCode": "0x4000e",
+ "EventName": "PM_FXU1_BUSY_FXU0_IDLE",
+ "BriefDescription": "fxu0 idle and fxu1 busy",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40004",
+ "EventName": "PM_FXU1_FIN",
+ "BriefDescription": "FXU1 Finished",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20008",
+ "EventName": "PM_GCT_EMPTY_CYC",
+ "BriefDescription": "No itags assigned either thread (GCT Empty)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30a4",
+ "EventName": "PM_GCT_MERGE",
+ "BriefDescription": "Group dispatched on a merged GCT empty. GCT entries can be merged only within the same thread",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d01e",
+ "EventName": "PM_GCT_NOSLOT_BR_MPRED",
+ "BriefDescription": "Gct empty for this thread due to branch mispred",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d01a",
+ "EventName": "PM_GCT_NOSLOT_BR_MPRED_ICMISS",
+ "BriefDescription": "Gct empty for this thread due to Icache Miss and branch mispred",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x100f8",
+ "EventName": "PM_GCT_NOSLOT_CYC",
+ "BriefDescription": "No itags assigned",
+ "PublicDescription": "Pipeline empty (No itags assigned , no GCT slots used)"
+ },
+ {,
+ "EventCode": "0x2d01e",
+ "EventName": "PM_GCT_NOSLOT_DISP_HELD_ISSQ",
+ "BriefDescription": "Gct empty for this thread due to dispatch hold on this thread due to Issue q full",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d01c",
+ "EventName": "PM_GCT_NOSLOT_DISP_HELD_MAP",
+ "BriefDescription": "Gct empty for this thread due to dispatch hold on this thread due to Mapper full",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e010",
+ "EventName": "PM_GCT_NOSLOT_DISP_HELD_OTHER",
+ "BriefDescription": "Gct empty for this thread due to dispatch hold on this thread due to sync",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d01c",
+ "EventName": "PM_GCT_NOSLOT_DISP_HELD_SRQ",
+ "BriefDescription": "Gct empty for this thread due to dispatch hold on this thread due to SRQ full",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4e010",
+ "EventName": "PM_GCT_NOSLOT_IC_L3MISS",
+ "BriefDescription": "Gct empty for this thread due to icach l3 miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d01a",
+ "EventName": "PM_GCT_NOSLOT_IC_MISS",
+ "BriefDescription": "Gct empty for this thread due to Icache Miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20a2",
+ "EventName": "PM_GCT_UTIL_11_14_ENTRIES",
+ "BriefDescription": "GCT Utilization 11-14 entries",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20a4",
+ "EventName": "PM_GCT_UTIL_15_17_ENTRIES",
+ "BriefDescription": "GCT Utilization 15-17 entries",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20a6",
+ "EventName": "PM_GCT_UTIL_18_ENTRIES",
+ "BriefDescription": "GCT Utilization 18+ entries",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x209c",
+ "EventName": "PM_GCT_UTIL_1_2_ENTRIES",
+ "BriefDescription": "GCT Utilization 1-2 entries",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x209e",
+ "EventName": "PM_GCT_UTIL_3_6_ENTRIES",
+ "BriefDescription": "GCT Utilization 3-6 entries",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20a0",
+ "EventName": "PM_GCT_UTIL_7_10_ENTRIES",
+ "BriefDescription": "GCT Utilization 7-10 entries",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1000a",
+ "EventName": "PM_GRP_BR_MPRED_NONSPEC",
+ "BriefDescription": "Group experienced non-speculative branch redirect",
+ "PublicDescription": "Group experienced Non-speculative br mispredicct"
+ },
+ {,
+ "EventCode": "0x30004",
+ "EventName": "PM_GRP_CMPL",
+ "BriefDescription": "group completed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3000a",
+ "EventName": "PM_GRP_DISP",
+ "BriefDescription": "group dispatch",
+ "PublicDescription": "dispatch_success (Group Dispatched)"
+ },
+ {,
+ "EventCode": "0x1000c",
+ "EventName": "PM_GRP_IC_MISS_NONSPEC",
+ "BriefDescription": "Group experienced non-speculative I cache miss",
+ "PublicDescription": "Group experi enced Non-specu lative I cache miss"
+ },
+ {,
+ "EventCode": "0x10130",
+ "EventName": "PM_GRP_MRK",
+ "BriefDescription": "Instruction Marked",
+ "PublicDescription": "Instruction marked in idu"
+ },
+ {,
+ "EventCode": "0x509c",
+ "EventName": "PM_GRP_NON_FULL_GROUP",
+ "BriefDescription": "GROUPs where we did not have 6 non branch instructions in the group(ST mode), in SMT mode 3 non branches",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x50a4",
+ "EventName": "PM_GRP_TERM_2ND_BRANCH",
+ "BriefDescription": "There were enough instructions in the Ibuffer, but 2nd branch ends group",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x50a6",
+ "EventName": "PM_GRP_TERM_FPU_AFTER_BR",
+ "BriefDescription": "There were enough instructions in the Ibuffer, but FPU OP IN same group after a branch terminates a group, cant do partial flushes",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x509e",
+ "EventName": "PM_GRP_TERM_NOINST",
+ "BriefDescription": "Do not fill every slot in the group, Not enough instructions in the Ibuffer. This includes cases where the group started with enough instructions, but some got knocked out by a cache miss or branch redirect (which would also empty the Ibuffer)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x50a0",
+ "EventName": "PM_GRP_TERM_OTHER",
+ "BriefDescription": "There were enough instructions in the Ibuffer, but the group terminated early for some other reason, most likely due to a First or Last",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x50a2",
+ "EventName": "PM_GRP_TERM_SLOT_LIMIT",
+ "BriefDescription": "There were enough instructions in the Ibuffer, but 3 src RA/RB/RC , 2 way crack caused a group termination",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4086",
+ "EventName": "PM_IBUF_FULL_CYC",
+ "BriefDescription": "Cycles No room in ibuff",
+ "PublicDescription": "Cycles No room in ibufffully qualified transfer (if5 valid)"
+ },
+ {,
+ "EventCode": "0x4098",
+ "EventName": "PM_IC_DEMAND_L2_BHT_REDIRECT",
+ "BriefDescription": "L2 I cache demand request due to BHT redirect, branch redirect ( 2 bubbles 3 cycles)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x409a",
+ "EventName": "PM_IC_DEMAND_L2_BR_REDIRECT",
+ "BriefDescription": "L2 I cache demand request due to branch Mispredict ( 15 cycle path)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4088",
+ "EventName": "PM_IC_DEMAND_REQ",
+ "BriefDescription": "Demand Instruction fetch request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x508a",
+ "EventName": "PM_IC_INVALIDATE",
+ "BriefDescription": "Ic line invalidated",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4092",
+ "EventName": "PM_IC_PREF_CANCEL_HIT",
+ "BriefDescription": "Prefetch Canceled due to icache hit",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4094",
+ "EventName": "PM_IC_PREF_CANCEL_L2",
+ "BriefDescription": "L2 Squashed request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4090",
+ "EventName": "PM_IC_PREF_CANCEL_PAGE",
+ "BriefDescription": "Prefetch Canceled due to page boundary",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x408a",
+ "EventName": "PM_IC_PREF_REQ",
+ "BriefDescription": "Instruction prefetch requests",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x408e",
+ "EventName": "PM_IC_PREF_WRITE",
+ "BriefDescription": "Instruction prefetch written into IL1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4096",
+ "EventName": "PM_IC_RELOAD_PRIVATE",
+ "BriefDescription": "Reloading line was brought in private for a specific thread. Most lines are brought in shared for all eight thrreads. If RA does not match then invalidates and then brings it shared to other thread. In P7 line brought in private , then line was invalidat",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x5088",
+ "EventName": "PM_IFU_L2_TOUCH",
+ "BriefDescription": "L2 touch to update MRU on a line",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x514050",
+ "EventName": "PM_INST_ALL_CHIP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for instruction fetches and prefetches",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was chip pump (prediction=correct) for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x544048",
+ "EventName": "PM_INST_ALL_FROM_DL2L3_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x534048",
+ "EventName": "PM_INST_ALL_FROM_DL2L3_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x53404c",
+ "EventName": "PM_INST_ALL_FROM_DL4",
+ "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x54404c",
+ "EventName": "PM_INST_ALL_FROM_DMEM",
+ "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x514042",
+ "EventName": "PM_INST_ALL_FROM_L2",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x544046",
+ "EventName": "PM_INST_ALL_FROM_L21_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x534046",
+ "EventName": "PM_INST_ALL_FROM_L21_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x51404e",
+ "EventName": "PM_INST_ALL_FROM_L2MISS",
+ "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L2 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x534040",
+ "EventName": "PM_INST_ALL_FROM_L2_DISP_CONFLICT_LDHITST",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x544040",
+ "EventName": "PM_INST_ALL_FROM_L2_DISP_CONFLICT_OTHER",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x524040",
+ "EventName": "PM_INST_ALL_FROM_L2_MEPF",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x514040",
+ "EventName": "PM_INST_ALL_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 without conflict due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L2 without conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x544042",
+ "EventName": "PM_INST_ALL_FROM_L3",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x544044",
+ "EventName": "PM_INST_ALL_FROM_L31_ECO_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x534044",
+ "EventName": "PM_INST_ALL_FROM_L31_ECO_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x524044",
+ "EventName": "PM_INST_ALL_FROM_L31_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x514046",
+ "EventName": "PM_INST_ALL_FROM_L31_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x54404e",
+ "EventName": "PM_INST_ALL_FROM_L3MISS_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to a instruction fetch",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a localtion other than the local core's L3 due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x534042",
+ "EventName": "PM_INST_ALL_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x524042",
+ "EventName": "PM_INST_ALL_FROM_L3_MEPF",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x514044",
+ "EventName": "PM_INST_ALL_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without conflict due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from local core's L3 without conflict due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x51404c",
+ "EventName": "PM_INST_ALL_FROM_LL4",
+ "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's L4 cache due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from the local chip's L4 cache due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x524048",
+ "EventName": "PM_INST_ALL_FROM_LMEM",
+ "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x52404c",
+ "EventName": "PM_INST_ALL_FROM_MEMORY",
+ "BriefDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x54404a",
+ "EventName": "PM_INST_ALL_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x514048",
+ "EventName": "PM_INST_ALL_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x524046",
+ "EventName": "PM_INST_ALL_FROM_RL2L3_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x51404a",
+ "EventName": "PM_INST_ALL_FROM_RL2L3_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x52404a",
+ "EventName": "PM_INST_ALL_FROM_RL4",
+ "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x53404a",
+ "EventName": "PM_INST_ALL_FROM_RMEM",
+ "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to instruction fetches and prefetches",
+ "PublicDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x524050",
+ "EventName": "PM_INST_ALL_GRP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for instruction fetches and prefetches",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x524052",
+ "EventName": "PM_INST_ALL_GRP_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for instruction fetches and prefetches",
+ "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope OR Final Pump Scope(Group) got data from source that was at smaller scope(Chip) Final pump was group pump and initial pump was chip or final and initial pump was gro"
+ },
+ {,
+ "EventCode": "0x514052",
+ "EventName": "PM_INST_ALL_GRP_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for instruction fetches and prefetches",
+ "PublicDescription": "Final Pump Scope(Group) to get data sourced, ended up larger than Initial Pump Scope (Chip) Final pump was group pump and initial pump was chip pumpfor an instruction fetch"
+ },
+ {,
+ "EventCode": "0x514054",
+ "EventName": "PM_INST_ALL_PUMP_CPRED",
+ "BriefDescription": "Pump prediction correct. Counts across all types of pumps for instruction fetches and prefetches",
+ "PublicDescription": "Pump prediction correct. Counts across all types of pumpsfor an instruction fetch"
+ },
+ {,
+ "EventCode": "0x544052",
+ "EventName": "PM_INST_ALL_PUMP_MPRED",
+ "BriefDescription": "Pump misprediction. Counts across all types of pumps for instruction fetches and prefetches",
+ "PublicDescription": "Pump Mis prediction Counts across all types of pumpsfor an instruction fetch"
+ },
+ {,
+ "EventCode": "0x534050",
+ "EventName": "PM_INST_ALL_SYS_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for instruction fetches and prefetches",
+ "PublicDescription": "Initial and Final Pump Scope and data sourced across this scope was system pump for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x534052",
+ "EventName": "PM_INST_ALL_SYS_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for instruction fetches and prefetches",
+ "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope(Chip/Group) OR Final Pump Scope(system) got data from source that was at smaller scope(Chip/group) Final pump was system pump and initial pump was chip or group or"
+ },
+ {,
+ "EventCode": "0x544050",
+ "EventName": "PM_INST_ALL_SYS_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for instruction fetches and prefetches",
+ "PublicDescription": "Final Pump Scope(system) to get data sourced, ended up larger than Initial Pump Scope (Chip or Group) for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x4080",
+ "EventName": "PM_INST_FROM_L1",
+ "BriefDescription": "Instruction fetches from L1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x44046",
+ "EventName": "PM_INST_FROM_L21_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x34046",
+ "EventName": "PM_INST_FROM_L21_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x44044",
+ "EventName": "PM_INST_FROM_L31_ECO_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x34044",
+ "EventName": "PM_INST_FROM_L31_ECO_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x24044",
+ "EventName": "PM_INST_FROM_L31_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x14046",
+ "EventName": "PM_INST_FROM_L31_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)",
+ "PublicDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to either an instruction fetch or instruction fetch plus prefetch if MMCR1[17] is 1"
+ },
+ {,
+ "EventCode": "0x30016",
+ "EventName": "PM_INST_IMC_MATCH_DISP",
+ "BriefDescription": "Matched Instructions Dispatched",
+ "PublicDescription": "IMC Matches dispatched"
+ },
+ {,
+ "EventCode": "0x30014",
+ "EventName": "PM_IOPS_DISP",
+ "BriefDescription": "Internal Operations dispatched",
+ "PublicDescription": "IOPS dispatched"
+ },
+ {,
+ "EventCode": "0x45046",
+ "EventName": "PM_IPTEG_FROM_L21_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x35046",
+ "EventName": "PM_IPTEG_FROM_L21_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x35040",
+ "EventName": "PM_IPTEG_FROM_L2_DISP_CONFLICT_LDHITST",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 with load hit store conflict due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x45040",
+ "EventName": "PM_IPTEG_FROM_L2_DISP_CONFLICT_OTHER",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 with dispatch conflict due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x45044",
+ "EventName": "PM_IPTEG_FROM_L31_ECO_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x35044",
+ "EventName": "PM_IPTEG_FROM_L31_ECO_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x25044",
+ "EventName": "PM_IPTEG_FROM_L31_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x15046",
+ "EventName": "PM_IPTEG_FROM_L31_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a instruction side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x617082",
+ "EventName": "PM_ISIDE_DISP",
+ "BriefDescription": "All i-side dispatch attempts",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x627084",
+ "EventName": "PM_ISIDE_DISP_FAIL",
+ "BriefDescription": "All i-side dispatch attempts that failed due to a addr collision with another machine",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x627086",
+ "EventName": "PM_ISIDE_DISP_FAIL_OTHER",
+ "BriefDescription": "All i-side dispatch attempts that failed due to a reason other than addrs collision",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4608e",
+ "EventName": "PM_ISIDE_L2MEMACC",
+ "BriefDescription": "valid when first beat of data comes in for an i-side fetch where data came from mem(or L4)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x44608e",
+ "EventName": "PM_ISIDE_MRU_TOUCH",
+ "BriefDescription": "Iside L2 MRU touch",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30ac",
+ "EventName": "PM_ISU_REF_FX0",
+ "BriefDescription": "FX0 ISU reject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30ae",
+ "EventName": "PM_ISU_REF_FX1",
+ "BriefDescription": "FX1 ISU reject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x38ac",
+ "EventName": "PM_ISU_REF_FXU",
+ "BriefDescription": "FXU ISU reject from either pipe",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30b0",
+ "EventName": "PM_ISU_REF_LS0",
+ "BriefDescription": "LS0 ISU reject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30b2",
+ "EventName": "PM_ISU_REF_LS1",
+ "BriefDescription": "LS1 ISU reject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30b4",
+ "EventName": "PM_ISU_REF_LS2",
+ "BriefDescription": "LS2 ISU reject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30b6",
+ "EventName": "PM_ISU_REF_LS3",
+ "BriefDescription": "LS3 ISU reject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x309c",
+ "EventName": "PM_ISU_REJECTS_ALL",
+ "BriefDescription": "All isu rejects could be more than 1 per cycle",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30a2",
+ "EventName": "PM_ISU_REJECT_RES_NA",
+ "BriefDescription": "ISU reject due to resource not available",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x309e",
+ "EventName": "PM_ISU_REJECT_SAR_BYPASS",
+ "BriefDescription": "Reject because of SAR bypass",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30a0",
+ "EventName": "PM_ISU_REJECT_SRC_NA",
+ "BriefDescription": "ISU reject due to source not available",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30a8",
+ "EventName": "PM_ISU_REJ_VS0",
+ "BriefDescription": "VS0 ISU reject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30aa",
+ "EventName": "PM_ISU_REJ_VS1",
+ "BriefDescription": "VS1 ISU reject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x38a8",
+ "EventName": "PM_ISU_REJ_VSU",
+ "BriefDescription": "VSU ISU reject from either pipe",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30b8",
+ "EventName": "PM_ISYNC",
+ "BriefDescription": "Isync count per thread",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x200301ea",
+ "EventName": "PM_L1MISS_LAT_EXC_1024",
+ "BriefDescription": "L1 misses that took longer than 1024 cyles to resolve (miss to reload)",
+ "PublicDescription": "Reload latency exceeded 1024 cyc"
+ },
+ {,
+ "EventCode": "0x200401ec",
+ "EventName": "PM_L1MISS_LAT_EXC_2048",
+ "BriefDescription": "L1 misses that took longer than 2048 cyles to resolve (miss to reload)",
+ "PublicDescription": "Reload latency exceeded 2048 cyc"
+ },
+ {,
+ "EventCode": "0x200101e8",
+ "EventName": "PM_L1MISS_LAT_EXC_256",
+ "BriefDescription": "L1 misses that took longer than 256 cyles to resolve (miss to reload)",
+ "PublicDescription": "Reload latency exceeded 256 cyc"
+ },
+ {,
+ "EventCode": "0x200201e6",
+ "EventName": "PM_L1MISS_LAT_EXC_32",
+ "BriefDescription": "L1 misses that took longer than 32 cyles to resolve (miss to reload)",
+ "PublicDescription": "Reload latency exceeded 32 cyc"
+ },
+ {,
+ "EventCode": "0x26086",
+ "EventName": "PM_L1PF_L2MEMACC",
+ "BriefDescription": "valid when first beat of data comes in for an L1pref where data came from mem(or L4)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x408c",
+ "EventName": "PM_L1_DEMAND_WRITE",
+ "BriefDescription": "Instruction Demand sectors wriittent into IL1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x417080",
+ "EventName": "PM_L2_CASTOUT_MOD",
+ "BriefDescription": "L2 Castouts - Modified (M, Mu, Me)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x417082",
+ "EventName": "PM_L2_CASTOUT_SHR",
+ "BriefDescription": "L2 Castouts - Shared (T, Te, Si, S)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x27084",
+ "EventName": "PM_L2_CHIP_PUMP",
+ "BriefDescription": "RC requests that were local on chip pump attempts",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x427086",
+ "EventName": "PM_L2_DC_INV",
+ "BriefDescription": "Dcache invalidates from L2",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x44608c",
+ "EventName": "PM_L2_DISP_ALL_L2MISS",
+ "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x27086",
+ "EventName": "PM_L2_GROUP_PUMP",
+ "BriefDescription": "RC requests that were on Node Pump attempts",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x626084",
+ "EventName": "PM_L2_GRP_GUESS_CORRECT",
+ "BriefDescription": "L2 guess grp and guess was correct (data intra-6chip AND ^on-chip)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x626086",
+ "EventName": "PM_L2_GRP_GUESS_WRONG",
+ "BriefDescription": "L2 guess grp and guess was not correct (ie data on-chip OR beyond-6chip)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x427084",
+ "EventName": "PM_L2_IC_INV",
+ "BriefDescription": "Icache Invalidates from L2",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x436088",
+ "EventName": "PM_L2_INST",
+ "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x43608a",
+ "EventName": "PM_L2_INST_MISS",
+ "BriefDescription": "All successful i-side dispatches that were an L2miss for this thread (excludes i_l2mru_tch reqs)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x416080",
+ "EventName": "PM_L2_LD",
+ "BriefDescription": "All successful D-side Load dispatches for this thread",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x437088",
+ "EventName": "PM_L2_LD_DISP",
+ "BriefDescription": "All successful load dispatches",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x43708a",
+ "EventName": "PM_L2_LD_HIT",
+ "BriefDescription": "All successful load dispatches that were L2 hits",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x426084",
+ "EventName": "PM_L2_LD_MISS",
+ "BriefDescription": "All successful D-Side Load dispatches that were an L2miss for this thread",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x616080",
+ "EventName": "PM_L2_LOC_GUESS_CORRECT",
+ "BriefDescription": "L2 guess loc and guess was correct (ie data local)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x616082",
+ "EventName": "PM_L2_LOC_GUESS_WRONG",
+ "BriefDescription": "L2 guess loc and guess was not correct (ie data not on chip)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x516080",
+ "EventName": "PM_L2_RCLD_DISP",
+ "BriefDescription": "L2 RC load dispatch attempt",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x516082",
+ "EventName": "PM_L2_RCLD_DISP_FAIL_ADDR",
+ "BriefDescription": "L2 RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x526084",
+ "EventName": "PM_L2_RCLD_DISP_FAIL_OTHER",
+ "BriefDescription": "L2 RC load dispatch attempt failed due to other reasons",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x536088",
+ "EventName": "PM_L2_RCST_DISP",
+ "BriefDescription": "L2 RC store dispatch attempt",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x53608a",
+ "EventName": "PM_L2_RCST_DISP_FAIL_ADDR",
+ "BriefDescription": "L2 RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x54608c",
+ "EventName": "PM_L2_RCST_DISP_FAIL_OTHER",
+ "BriefDescription": "L2 RC store dispatch attempt failed due to other reasons",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x537088",
+ "EventName": "PM_L2_RC_ST_DONE",
+ "BriefDescription": "RC did st to line that was Tx or Sx",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x63708a",
+ "EventName": "PM_L2_RTY_LD",
+ "BriefDescription": "RC retries on PB for any load from core",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3708a",
+ "EventName": "PM_L2_RTY_ST",
+ "BriefDescription": "RC retries on PB for any store from core",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x54708c",
+ "EventName": "PM_L2_SN_M_RD_DONE",
+ "BriefDescription": "SNP dispatched for a read and was M",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x54708e",
+ "EventName": "PM_L2_SN_M_WR_DONE",
+ "BriefDescription": "SNP dispatched for a write and was M",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x53708a",
+ "EventName": "PM_L2_SN_SX_I_DONE",
+ "BriefDescription": "SNP dispatched and went from Sx or Tx to Ix",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x17080",
+ "EventName": "PM_L2_ST",
+ "BriefDescription": "All successful D-side store dispatches for this thread",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x44708c",
+ "EventName": "PM_L2_ST_DISP",
+ "BriefDescription": "All successful store dispatches",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x44708e",
+ "EventName": "PM_L2_ST_HIT",
+ "BriefDescription": "All successful store dispatches that were L2Hits",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x17082",
+ "EventName": "PM_L2_ST_MISS",
+ "BriefDescription": "All successful D-side store dispatches for this thread that were L2 Miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x636088",
+ "EventName": "PM_L2_SYS_GUESS_CORRECT",
+ "BriefDescription": "L2 guess sys and guess was correct (ie data beyond-6chip)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x63608a",
+ "EventName": "PM_L2_SYS_GUESS_WRONG",
+ "BriefDescription": "L2 guess sys and guess was not correct (ie data ^beyond-6chip)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x617080",
+ "EventName": "PM_L2_SYS_PUMP",
+ "BriefDescription": "RC requests that were system pump attempts",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1e05e",
+ "EventName": "PM_L2_TM_REQ_ABORT",
+ "BriefDescription": "TM abort",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3e05c",
+ "EventName": "PM_L2_TM_ST_ABORT_SISTER",
+ "BriefDescription": "TM marked store abort",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x23808a",
+ "EventName": "PM_L3_CINJ",
+ "BriefDescription": "l3 ci of cache inject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x128084",
+ "EventName": "PM_L3_CI_HIT",
+ "BriefDescription": "L3 Castins Hit (total count",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x128086",
+ "EventName": "PM_L3_CI_MISS",
+ "BriefDescription": "L3 castins miss (total count",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x819082",
+ "EventName": "PM_L3_CI_USAGE",
+ "BriefDescription": "rotating sample of 16 CI or CO actives",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x438088",
+ "EventName": "PM_L3_CO",
+ "BriefDescription": "l3 castout occurring ( does not include casthrough or log writes (cinj/dmaw)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x83908b",
+ "EventName": "PM_L3_CO0_ALLOC",
+ "BriefDescription": "lifetime, sample of CO machine 0 valid",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0x83908a",
+ "EventName": "PM_L3_CO0_BUSY",
+ "BriefDescription": "lifetime, sample of CO machine 0 valid",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x28086",
+ "EventName": "PM_L3_CO_L31",
+ "BriefDescription": "L3 CO to L3.1 OR of port 0 and 1 ( lossy)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x238088",
+ "EventName": "PM_L3_CO_LCO",
+ "BriefDescription": "Total L3 castouts occurred on LCO",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x28084",
+ "EventName": "PM_L3_CO_MEM",
+ "BriefDescription": "L3 CO to memory OR of port 0 and 1 ( lossy)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb19082",
+ "EventName": "PM_L3_GRP_GUESS_CORRECT",
+ "BriefDescription": "Initial scope=group and data from same group (near) (pred successful)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb3908a",
+ "EventName": "PM_L3_GRP_GUESS_WRONG_HIGH",
+ "BriefDescription": "Initial scope=group but data from local node. Predition too high",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb39088",
+ "EventName": "PM_L3_GRP_GUESS_WRONG_LOW",
+ "BriefDescription": "Initial scope=group but data from outside group (far or rem). Prediction too Low",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x218080",
+ "EventName": "PM_L3_HIT",
+ "BriefDescription": "L3 Hits",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x138088",
+ "EventName": "PM_L3_L2_CO_HIT",
+ "BriefDescription": "L2 castout hits",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x13808a",
+ "EventName": "PM_L3_L2_CO_MISS",
+ "BriefDescription": "L2 castout miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x14808c",
+ "EventName": "PM_L3_LAT_CI_HIT",
+ "BriefDescription": "L3 Lateral Castins Hit",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x14808e",
+ "EventName": "PM_L3_LAT_CI_MISS",
+ "BriefDescription": "L3 Lateral Castins Miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x228084",
+ "EventName": "PM_L3_LD_HIT",
+ "BriefDescription": "L3 demand LD Hits",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x228086",
+ "EventName": "PM_L3_LD_MISS",
+ "BriefDescription": "L3 demand LD Miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1e052",
+ "EventName": "PM_L3_LD_PREF",
+ "BriefDescription": "L3 Load Prefetches",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb19080",
+ "EventName": "PM_L3_LOC_GUESS_CORRECT",
+ "BriefDescription": "initial scope=node/chip and data from local node (local) (pred successful)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb29086",
+ "EventName": "PM_L3_LOC_GUESS_WRONG",
+ "BriefDescription": "Initial scope=node but data from out side local node (near or far or rem). Prediction too Low",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x218082",
+ "EventName": "PM_L3_MISS",
+ "BriefDescription": "L3 Misses",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x54808c",
+ "EventName": "PM_L3_P0_CO_L31",
+ "BriefDescription": "l3 CO to L3.1 (lco) port 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x538088",
+ "EventName": "PM_L3_P0_CO_MEM",
+ "BriefDescription": "l3 CO to memory port 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x929084",
+ "EventName": "PM_L3_P0_CO_RTY",
+ "BriefDescription": "L3 CO received retry port 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa29084",
+ "EventName": "PM_L3_P0_GRP_PUMP",
+ "BriefDescription": "L3 pf sent with grp scope port 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x528084",
+ "EventName": "PM_L3_P0_LCO_DATA",
+ "BriefDescription": "lco sent with data port 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x518080",
+ "EventName": "PM_L3_P0_LCO_NO_DATA",
+ "BriefDescription": "dataless l3 lco sent port 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa4908c",
+ "EventName": "PM_L3_P0_LCO_RTY",
+ "BriefDescription": "L3 LCO received retry port 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa19080",
+ "EventName": "PM_L3_P0_NODE_PUMP",
+ "BriefDescription": "L3 pf sent with nodal scope port 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x919080",
+ "EventName": "PM_L3_P0_PF_RTY",
+ "BriefDescription": "L3 PF received retry port 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x939088",
+ "EventName": "PM_L3_P0_SN_HIT",
+ "BriefDescription": "L3 snoop hit port 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x118080",
+ "EventName": "PM_L3_P0_SN_INV",
+ "BriefDescription": "Port0 snooper detects someone doing a store to a line thats Sx",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x94908c",
+ "EventName": "PM_L3_P0_SN_MISS",
+ "BriefDescription": "L3 snoop miss port 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa39088",
+ "EventName": "PM_L3_P0_SYS_PUMP",
+ "BriefDescription": "L3 pf sent with sys scope port 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x54808e",
+ "EventName": "PM_L3_P1_CO_L31",
+ "BriefDescription": "l3 CO to L3.1 (lco) port 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x53808a",
+ "EventName": "PM_L3_P1_CO_MEM",
+ "BriefDescription": "l3 CO to memory port 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x929086",
+ "EventName": "PM_L3_P1_CO_RTY",
+ "BriefDescription": "L3 CO received retry port 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa29086",
+ "EventName": "PM_L3_P1_GRP_PUMP",
+ "BriefDescription": "L3 pf sent with grp scope port 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x528086",
+ "EventName": "PM_L3_P1_LCO_DATA",
+ "BriefDescription": "lco sent with data port 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x518082",
+ "EventName": "PM_L3_P1_LCO_NO_DATA",
+ "BriefDescription": "dataless l3 lco sent port 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa4908e",
+ "EventName": "PM_L3_P1_LCO_RTY",
+ "BriefDescription": "L3 LCO received retry port 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa19082",
+ "EventName": "PM_L3_P1_NODE_PUMP",
+ "BriefDescription": "L3 pf sent with nodal scope port 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x919082",
+ "EventName": "PM_L3_P1_PF_RTY",
+ "BriefDescription": "L3 PF received retry port 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x93908a",
+ "EventName": "PM_L3_P1_SN_HIT",
+ "BriefDescription": "L3 snoop hit port 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x118082",
+ "EventName": "PM_L3_P1_SN_INV",
+ "BriefDescription": "Port1 snooper detects someone doing a store to a line thats Sx",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x94908e",
+ "EventName": "PM_L3_P1_SN_MISS",
+ "BriefDescription": "L3 snoop miss port 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa3908a",
+ "EventName": "PM_L3_P1_SYS_PUMP",
+ "BriefDescription": "L3 pf sent with sys scope port 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x84908d",
+ "EventName": "PM_L3_PF0_ALLOC",
+ "BriefDescription": "lifetime, sample of PF machine 0 valid",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0x84908c",
+ "EventName": "PM_L3_PF0_BUSY",
+ "BriefDescription": "lifetime, sample of PF machine 0 valid",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x428084",
+ "EventName": "PM_L3_PF_HIT_L3",
+ "BriefDescription": "l3 pf hit in l3",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x18080",
+ "EventName": "PM_L3_PF_MISS_L3",
+ "BriefDescription": "L3 Prefetch missed in L3",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3808a",
+ "EventName": "PM_L3_PF_OFF_CHIP_CACHE",
+ "BriefDescription": "L3 Prefetch from Off chip cache",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4808e",
+ "EventName": "PM_L3_PF_OFF_CHIP_MEM",
+ "BriefDescription": "L3 Prefetch from Off chip memory",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x38088",
+ "EventName": "PM_L3_PF_ON_CHIP_CACHE",
+ "BriefDescription": "L3 Prefetch from On chip cache",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4808c",
+ "EventName": "PM_L3_PF_ON_CHIP_MEM",
+ "BriefDescription": "L3 Prefetch from On chip memory",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x829084",
+ "EventName": "PM_L3_PF_USAGE",
+ "BriefDescription": "rotating sample of 32 PF actives",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4e052",
+ "EventName": "PM_L3_PREF_ALL",
+ "BriefDescription": "Total HW L3 prefetches(Load+store)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x84908f",
+ "EventName": "PM_L3_RD0_ALLOC",
+ "BriefDescription": "lifetime, sample of RD machine 0 valid",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0x84908e",
+ "EventName": "PM_L3_RD0_BUSY",
+ "BriefDescription": "lifetime, sample of RD machine 0 valid",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x829086",
+ "EventName": "PM_L3_RD_USAGE",
+ "BriefDescription": "rotating sample of 16 RD actives",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x839089",
+ "EventName": "PM_L3_SN0_ALLOC",
+ "BriefDescription": "lifetime, sample of snooper machine 0 valid",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0x839088",
+ "EventName": "PM_L3_SN0_BUSY",
+ "BriefDescription": "lifetime, sample of snooper machine 0 valid",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x819080",
+ "EventName": "PM_L3_SN_USAGE",
+ "BriefDescription": "rotating sample of 8 snoop valids",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e052",
+ "EventName": "PM_L3_ST_PREF",
+ "BriefDescription": "L3 store Prefetches",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3e052",
+ "EventName": "PM_L3_SW_PREF",
+ "BriefDescription": "Data stream touchto L3",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb29084",
+ "EventName": "PM_L3_SYS_GUESS_CORRECT",
+ "BriefDescription": "Initial scope=system and data from outside group (far or rem)(pred successful)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb4908c",
+ "EventName": "PM_L3_SYS_GUESS_WRONG",
+ "BriefDescription": "Initial scope=system but data from local or near. Predction too high",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x24808e",
+ "EventName": "PM_L3_TRANS_PF",
+ "BriefDescription": "L3 Transient prefetch",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x18081",
+ "EventName": "PM_L3_WI0_ALLOC",
+ "BriefDescription": "lifetime, sample of Write Inject machine 0 valid",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0x418080",
+ "EventName": "PM_L3_WI0_BUSY",
+ "BriefDescription": "lifetime, sample of Write Inject machine 0 valid",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x418082",
+ "EventName": "PM_L3_WI_USAGE",
+ "BriefDescription": "rotating sample of 8 WI actives",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xc080",
+ "EventName": "PM_LD_REF_L1_LSU0",
+ "BriefDescription": "LS0 L1 D cache load references counted at finish, gated by reject",
+ "PublicDescription": "LS0 L1 D cache load references counted at finish, gated by rejectLSU0 L1 D cache load references"
+ },
+ {,
+ "EventCode": "0xc082",
+ "EventName": "PM_LD_REF_L1_LSU1",
+ "BriefDescription": "LS1 L1 D cache load references counted at finish, gated by reject",
+ "PublicDescription": "LS1 L1 D cache load references counted at finish, gated by rejectLSU1 L1 D cache load references"
+ },
+ {,
+ "EventCode": "0xc094",
+ "EventName": "PM_LD_REF_L1_LSU2",
+ "BriefDescription": "LS2 L1 D cache load references counted at finish, gated by reject",
+ "PublicDescription": "LS2 L1 D cache load references counted at finish, gated by reject42"
+ },
+ {,
+ "EventCode": "0xc096",
+ "EventName": "PM_LD_REF_L1_LSU3",
+ "BriefDescription": "LS3 L1 D cache load references counted at finish, gated by reject",
+ "PublicDescription": "LS3 L1 D cache load references counted at finish, gated by reject42"
+ },
+ {,
+ "EventCode": "0x509a",
+ "EventName": "PM_LINK_STACK_INVALID_PTR",
+ "BriefDescription": "A flush were LS ptr is invalid, results in a pop , A lot of interrupts between push and pops",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x5098",
+ "EventName": "PM_LINK_STACK_WRONG_ADD_PRED",
+ "BriefDescription": "Link stack predicts wrong address, because of link stack design limitation",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xe080",
+ "EventName": "PM_LS0_ERAT_MISS_PREF",
+ "BriefDescription": "LS0 Erat miss due to prefetch",
+ "PublicDescription": "LS0 Erat miss due to prefetch42"
+ },
+ {,
+ "EventCode": "0xd0b8",
+ "EventName": "PM_LS0_L1_PREF",
+ "BriefDescription": "LS0 L1 cache data prefetches",
+ "PublicDescription": "LS0 L1 cache data prefetches42"
+ },
+ {,
+ "EventCode": "0xc098",
+ "EventName": "PM_LS0_L1_SW_PREF",
+ "BriefDescription": "Software L1 Prefetches, including SW Transient Prefetches",
+ "PublicDescription": "Software L1 Prefetches, including SW Transient Prefetches42"
+ },
+ {,
+ "EventCode": "0xe082",
+ "EventName": "PM_LS1_ERAT_MISS_PREF",
+ "BriefDescription": "LS1 Erat miss due to prefetch",
+ "PublicDescription": "LS1 Erat miss due to prefetch42"
+ },
+ {,
+ "EventCode": "0xd0ba",
+ "EventName": "PM_LS1_L1_PREF",
+ "BriefDescription": "LS1 L1 cache data prefetches",
+ "PublicDescription": "LS1 L1 cache data prefetches42"
+ },
+ {,
+ "EventCode": "0xc09a",
+ "EventName": "PM_LS1_L1_SW_PREF",
+ "BriefDescription": "Software L1 Prefetches, including SW Transient Prefetches",
+ "PublicDescription": "Software L1 Prefetches, including SW Transient Prefetches42"
+ },
+ {,
+ "EventCode": "0xc0b0",
+ "EventName": "PM_LSU0_FLUSH_LRQ",
+ "BriefDescription": "LS0 Flush: LRQ",
+ "PublicDescription": "LS0 Flush: LRQLSU0 LRQ flushes"
+ },
+ {,
+ "EventCode": "0xc0b8",
+ "EventName": "PM_LSU0_FLUSH_SRQ",
+ "BriefDescription": "LS0 Flush: SRQ",
+ "PublicDescription": "LS0 Flush: SRQLSU0 SRQ lhs flushes"
+ },
+ {,
+ "EventCode": "0xc0a4",
+ "EventName": "PM_LSU0_FLUSH_ULD",
+ "BriefDescription": "LS0 Flush: Unaligned Load",
+ "PublicDescription": "LS0 Flush: Unaligned LoadLSU0 unaligned load flushes"
+ },
+ {,
+ "EventCode": "0xc0ac",
+ "EventName": "PM_LSU0_FLUSH_UST",
+ "BriefDescription": "LS0 Flush: Unaligned Store",
+ "PublicDescription": "LS0 Flush: Unaligned StoreLSU0 unaligned store flushes"
+ },
+ {,
+ "EventCode": "0xf088",
+ "EventName": "PM_LSU0_L1_CAM_CANCEL",
+ "BriefDescription": "ls0 l1 tm cam cancel",
+ "PublicDescription": "ls0 l1 tm cam cancel42"
+ },
+ {,
+ "EventCode": "0x1e056",
+ "EventName": "PM_LSU0_LARX_FIN",
+ "BriefDescription": "Larx finished in LSU pipe0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xd08c",
+ "EventName": "PM_LSU0_LMQ_LHR_MERGE",
+ "BriefDescription": "LS0 Load Merged with another cacheline request",
+ "PublicDescription": "LS0 Load Merged with another cacheline request42"
+ },
+ {,
+ "EventCode": "0xc08c",
+ "EventName": "PM_LSU0_NCLD",
+ "BriefDescription": "LS0 Non-cachable Loads counted at finish",
+ "PublicDescription": "LS0 Non-cachable Loads counted at finishLSU0 non-cacheable loads"
+ },
+ {,
+ "EventCode": "0xe090",
+ "EventName": "PM_LSU0_PRIMARY_ERAT_HIT",
+ "BriefDescription": "Primary ERAT hit",
+ "PublicDescription": "Primary ERAT hit42"
+ },
+ {,
+ "EventCode": "0x1e05a",
+ "EventName": "PM_LSU0_REJECT",
+ "BriefDescription": "LSU0 reject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xc09c",
+ "EventName": "PM_LSU0_SRQ_STFWD",
+ "BriefDescription": "LS0 SRQ forwarded data to a load",
+ "PublicDescription": "LS0 SRQ forwarded data to a loadLSU0 SRQ store forwarded"
+ },
+ {,
+ "EventCode": "0xf084",
+ "EventName": "PM_LSU0_STORE_REJECT",
+ "BriefDescription": "ls0 store reject",
+ "PublicDescription": "ls0 store reject42"
+ },
+ {,
+ "EventCode": "0xe0a8",
+ "EventName": "PM_LSU0_TMA_REQ_L2",
+ "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding",
+ "PublicDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding42"
+ },
+ {,
+ "EventCode": "0xe098",
+ "EventName": "PM_LSU0_TM_L1_HIT",
+ "BriefDescription": "Load tm hit in L1",
+ "PublicDescription": "Load tm hit in L142"
+ },
+ {,
+ "EventCode": "0xe0a0",
+ "EventName": "PM_LSU0_TM_L1_MISS",
+ "BriefDescription": "Load tm L1 miss",
+ "PublicDescription": "Load tm L1 miss42"
+ },
+ {,
+ "EventCode": "0xc0b2",
+ "EventName": "PM_LSU1_FLUSH_LRQ",
+ "BriefDescription": "LS1 Flush: LRQ",
+ "PublicDescription": "LS1 Flush: LRQLSU1 LRQ flushes"
+ },
+ {,
+ "EventCode": "0xc0ba",
+ "EventName": "PM_LSU1_FLUSH_SRQ",
+ "BriefDescription": "LS1 Flush: SRQ",
+ "PublicDescription": "LS1 Flush: SRQLSU1 SRQ lhs flushes"
+ },
+ {,
+ "EventCode": "0xc0a6",
+ "EventName": "PM_LSU1_FLUSH_ULD",
+ "BriefDescription": "LS 1 Flush: Unaligned Load",
+ "PublicDescription": "LS 1 Flush: Unaligned LoadLSU1 unaligned load flushes"
+ },
+ {,
+ "EventCode": "0xc0ae",
+ "EventName": "PM_LSU1_FLUSH_UST",
+ "BriefDescription": "LS1 Flush: Unaligned Store",
+ "PublicDescription": "LS1 Flush: Unaligned StoreLSU1 unaligned store flushes"
+ },
+ {,
+ "EventCode": "0xf08a",
+ "EventName": "PM_LSU1_L1_CAM_CANCEL",
+ "BriefDescription": "ls1 l1 tm cam cancel",
+ "PublicDescription": "ls1 l1 tm cam cancel42"
+ },
+ {,
+ "EventCode": "0x2e056",
+ "EventName": "PM_LSU1_LARX_FIN",
+ "BriefDescription": "Larx finished in LSU pipe1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xd08e",
+ "EventName": "PM_LSU1_LMQ_LHR_MERGE",
+ "BriefDescription": "LS1 Load Merge with another cacheline request",
+ "PublicDescription": "LS1 Load Merge with another cacheline request42"
+ },
+ {,
+ "EventCode": "0xc08e",
+ "EventName": "PM_LSU1_NCLD",
+ "BriefDescription": "LS1 Non-cachable Loads counted at finish",
+ "PublicDescription": "LS1 Non-cachable Loads counted at finishLSU1 non-cacheable loads"
+ },
+ {,
+ "EventCode": "0xe092",
+ "EventName": "PM_LSU1_PRIMARY_ERAT_HIT",
+ "BriefDescription": "Primary ERAT hit",
+ "PublicDescription": "Primary ERAT hit42"
+ },
+ {,
+ "EventCode": "0x2e05a",
+ "EventName": "PM_LSU1_REJECT",
+ "BriefDescription": "LSU1 reject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xc09e",
+ "EventName": "PM_LSU1_SRQ_STFWD",
+ "BriefDescription": "LS1 SRQ forwarded data to a load",
+ "PublicDescription": "LS1 SRQ forwarded data to a loadLSU1 SRQ store forwarded"
+ },
+ {,
+ "EventCode": "0xf086",
+ "EventName": "PM_LSU1_STORE_REJECT",
+ "BriefDescription": "ls1 store reject",
+ "PublicDescription": "ls1 store reject42"
+ },
+ {,
+ "EventCode": "0xe0aa",
+ "EventName": "PM_LSU1_TMA_REQ_L2",
+ "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding",
+ "PublicDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding42"
+ },
+ {,
+ "EventCode": "0xe09a",
+ "EventName": "PM_LSU1_TM_L1_HIT",
+ "BriefDescription": "Load tm hit in L1",
+ "PublicDescription": "Load tm hit in L142"
+ },
+ {,
+ "EventCode": "0xe0a2",
+ "EventName": "PM_LSU1_TM_L1_MISS",
+ "BriefDescription": "Load tm L1 miss",
+ "PublicDescription": "Load tm L1 miss42"
+ },
+ {,
+ "EventCode": "0xc0b4",
+ "EventName": "PM_LSU2_FLUSH_LRQ",
+ "BriefDescription": "LS02Flush: LRQ",
+ "PublicDescription": "LS02Flush: LRQ42"
+ },
+ {,
+ "EventCode": "0xc0bc",
+ "EventName": "PM_LSU2_FLUSH_SRQ",
+ "BriefDescription": "LS2 Flush: SRQ",
+ "PublicDescription": "LS2 Flush: SRQ42"
+ },
+ {,
+ "EventCode": "0xc0a8",
+ "EventName": "PM_LSU2_FLUSH_ULD",
+ "BriefDescription": "LS3 Flush: Unaligned Load",
+ "PublicDescription": "LS3 Flush: Unaligned Load42"
+ },
+ {,
+ "EventCode": "0xf08c",
+ "EventName": "PM_LSU2_L1_CAM_CANCEL",
+ "BriefDescription": "ls2 l1 tm cam cancel",
+ "PublicDescription": "ls2 l1 tm cam cancel42"
+ },
+ {,
+ "EventCode": "0x3e056",
+ "EventName": "PM_LSU2_LARX_FIN",
+ "BriefDescription": "Larx finished in LSU pipe2",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xc084",
+ "EventName": "PM_LSU2_LDF",
+ "BriefDescription": "LS2 Scalar Loads",
+ "PublicDescription": "LS2 Scalar Loads42"
+ },
+ {,
+ "EventCode": "0xc088",
+ "EventName": "PM_LSU2_LDX",
+ "BriefDescription": "LS0 Vector Loads",
+ "PublicDescription": "LS0 Vector Loads42"
+ },
+ {,
+ "EventCode": "0xd090",
+ "EventName": "PM_LSU2_LMQ_LHR_MERGE",
+ "BriefDescription": "LS0 Load Merged with another cacheline request",
+ "PublicDescription": "LS0 Load Merged with another cacheline request42"
+ },
+ {,
+ "EventCode": "0xe094",
+ "EventName": "PM_LSU2_PRIMARY_ERAT_HIT",
+ "BriefDescription": "Primary ERAT hit",
+ "PublicDescription": "Primary ERAT hit42"
+ },
+ {,
+ "EventCode": "0x3e05a",
+ "EventName": "PM_LSU2_REJECT",
+ "BriefDescription": "LSU2 reject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xc0a0",
+ "EventName": "PM_LSU2_SRQ_STFWD",
+ "BriefDescription": "LS2 SRQ forwarded data to a load",
+ "PublicDescription": "LS2 SRQ forwarded data to a load42"
+ },
+ {,
+ "EventCode": "0xe0ac",
+ "EventName": "PM_LSU2_TMA_REQ_L2",
+ "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding",
+ "PublicDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding42"
+ },
+ {,
+ "EventCode": "0xe09c",
+ "EventName": "PM_LSU2_TM_L1_HIT",
+ "BriefDescription": "Load tm hit in L1",
+ "PublicDescription": "Load tm hit in L142"
+ },
+ {,
+ "EventCode": "0xe0a4",
+ "EventName": "PM_LSU2_TM_L1_MISS",
+ "BriefDescription": "Load tm L1 miss",
+ "PublicDescription": "Load tm L1 miss42"
+ },
+ {,
+ "EventCode": "0xc0b6",
+ "EventName": "PM_LSU3_FLUSH_LRQ",
+ "BriefDescription": "LS3 Flush: LRQ",
+ "PublicDescription": "LS3 Flush: LRQ42"
+ },
+ {,
+ "EventCode": "0xc0be",
+ "EventName": "PM_LSU3_FLUSH_SRQ",
+ "BriefDescription": "LS13 Flush: SRQ",
+ "PublicDescription": "LS13 Flush: SRQ42"
+ },
+ {,
+ "EventCode": "0xc0aa",
+ "EventName": "PM_LSU3_FLUSH_ULD",
+ "BriefDescription": "LS 14Flush: Unaligned Load",
+ "PublicDescription": "LS 14Flush: Unaligned Load42"
+ },
+ {,
+ "EventCode": "0xf08e",
+ "EventName": "PM_LSU3_L1_CAM_CANCEL",
+ "BriefDescription": "ls3 l1 tm cam cancel",
+ "PublicDescription": "ls3 l1 tm cam cancel42"
+ },
+ {,
+ "EventCode": "0x4e056",
+ "EventName": "PM_LSU3_LARX_FIN",
+ "BriefDescription": "Larx finished in LSU pipe3",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xc086",
+ "EventName": "PM_LSU3_LDF",
+ "BriefDescription": "LS3 Scalar Loads",
+ "PublicDescription": "LS3 Scalar Loads 42"
+ },
+ {,
+ "EventCode": "0xc08a",
+ "EventName": "PM_LSU3_LDX",
+ "BriefDescription": "LS1 Vector Loads",
+ "PublicDescription": "LS1 Vector Loads42"
+ },
+ {,
+ "EventCode": "0xd092",
+ "EventName": "PM_LSU3_LMQ_LHR_MERGE",
+ "BriefDescription": "LS1 Load Merge with another cacheline request",
+ "PublicDescription": "LS1 Load Merge with another cacheline request42"
+ },
+ {,
+ "EventCode": "0xe096",
+ "EventName": "PM_LSU3_PRIMARY_ERAT_HIT",
+ "BriefDescription": "Primary ERAT hit",
+ "PublicDescription": "Primary ERAT hit42"
+ },
+ {,
+ "EventCode": "0x4e05a",
+ "EventName": "PM_LSU3_REJECT",
+ "BriefDescription": "LSU3 reject",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xc0a2",
+ "EventName": "PM_LSU3_SRQ_STFWD",
+ "BriefDescription": "LS3 SRQ forwarded data to a load",
+ "PublicDescription": "LS3 SRQ forwarded data to a load42"
+ },
+ {,
+ "EventCode": "0xe0ae",
+ "EventName": "PM_LSU3_TMA_REQ_L2",
+ "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding",
+ "PublicDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding42"
+ },
+ {,
+ "EventCode": "0xe09e",
+ "EventName": "PM_LSU3_TM_L1_HIT",
+ "BriefDescription": "Load tm hit in L1",
+ "PublicDescription": "Load tm hit in L142"
+ },
+ {,
+ "EventCode": "0xe0a6",
+ "EventName": "PM_LSU3_TM_L1_MISS",
+ "BriefDescription": "Load tm L1 miss",
+ "PublicDescription": "Load tm L1 miss42"
+ },
+ {,
+ "EventCode": "0xe880",
+ "EventName": "PM_LSU_ERAT_MISS_PREF",
+ "BriefDescription": "Erat miss due to prefetch, on either pipe",
+ "PublicDescription": "LSU"
+ },
+ {,
+ "EventCode": "0xc8ac",
+ "EventName": "PM_LSU_FLUSH_UST",
+ "BriefDescription": "Unaligned Store Flush on either pipe",
+ "PublicDescription": "LSU"
+ },
+ {,
+ "EventCode": "0xd0a4",
+ "EventName": "PM_LSU_FOUR_TABLEWALK_CYC",
+ "BriefDescription": "Cycles when four tablewalks pending on this thread",
+ "PublicDescription": "Cycles when four tablewalks pending on this thread42"
+ },
+ {,
+ "EventCode": "0x10066",
+ "EventName": "PM_LSU_FX_FIN",
+ "BriefDescription": "LSU Finished a FX operation (up to 2 per cycle",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xd8b8",
+ "EventName": "PM_LSU_L1_PREF",
+ "BriefDescription": "hw initiated , include sw streaming forms as well , include sw streams as a separate event",
+ "PublicDescription": "LSU"
+ },
+ {,
+ "EventCode": "0xc898",
+ "EventName": "PM_LSU_L1_SW_PREF",
+ "BriefDescription": "Software L1 Prefetches, including SW Transient Prefetches, on both pipes",
+ "PublicDescription": "LSU"
+ },
+ {,
+ "EventCode": "0xc884",
+ "EventName": "PM_LSU_LDF",
+ "BriefDescription": "FPU loads only on LS2/LS3 ie LU0/LU1",
+ "PublicDescription": "LSU"
+ },
+ {,
+ "EventCode": "0xc888",
+ "EventName": "PM_LSU_LDX",
+ "BriefDescription": "Vector loads can issue only on LS2/LS3",
+ "PublicDescription": "LSU"
+ },
+ {,
+ "EventCode": "0xd0a2",
+ "EventName": "PM_LSU_LMQ_FULL_CYC",
+ "BriefDescription": "LMQ full",
+ "PublicDescription": "LMQ fullCycles LMQ full"
+ },
+ {,
+ "EventCode": "0xd0a1",
+ "EventName": "PM_LSU_LMQ_S0_ALLOC",
+ "BriefDescription": "Per thread - use edge detect to count allocates On a per thread basis, level signal indicating Slot 0 is valid. By instrumenting a single slot we can calculate service time for that slot. Previous machines required a separate signal indicating the slot was allocated. Because any signal can be routed to any counter in P8, we can count level in one PMC and edge detect in another PMC using the same signal",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0xd0a0",
+ "EventName": "PM_LSU_LMQ_S0_VALID",
+ "BriefDescription": "Slot 0 of LMQ valid",
+ "PublicDescription": "Slot 0 of LMQ validLMQ slot 0 valid"
+ },
+ {,
+ "EventCode": "0x3001c",
+ "EventName": "PM_LSU_LMQ_SRQ_EMPTY_ALL_CYC",
+ "BriefDescription": "ALL threads lsu empty (lmq and srq empty)",
+ "PublicDescription": "ALL threads lsu empty (lmq and srq empty). Issue HW016541"
+ },
+ {,
+ "EventCode": "0xd09f",
+ "EventName": "PM_LSU_LRQ_S0_ALLOC",
+ "BriefDescription": "Per thread - use edge detect to count allocates On a per thread basis, level signal indicating Slot 0 is valid. By instrumenting a single slot we can calculate service time for that slot. Previous machines required a separate signal indicating the slot was allocated. Because any signal can be routed to any counter in P8, we can count level in one PMC and edge detect in another PMC using the same signal",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0xd09e",
+ "EventName": "PM_LSU_LRQ_S0_VALID",
+ "BriefDescription": "Slot 0 of LRQ valid",
+ "PublicDescription": "Slot 0 of LRQ validLRQ slot 0 valid"
+ },
+ {,
+ "EventCode": "0xf091",
+ "EventName": "PM_LSU_LRQ_S43_ALLOC",
+ "BriefDescription": "LRQ slot 43 was released",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0xf090",
+ "EventName": "PM_LSU_LRQ_S43_VALID",
+ "BriefDescription": "LRQ slot 43 was busy",
+ "PublicDescription": "LRQ slot 43 was busy42"
+ },
+ {,
+ "EventCode": "0x30162",
+ "EventName": "PM_LSU_MRK_DERAT_MISS",
+ "BriefDescription": "DERAT Reloaded (Miss)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xc88c",
+ "EventName": "PM_LSU_NCLD",
+ "BriefDescription": "count at finish so can return only on ls0 or ls1",
+ "PublicDescription": "LSU"
+ },
+ {,
+ "EventCode": "0xc092",
+ "EventName": "PM_LSU_NCST",
+ "BriefDescription": "Non-cachable Stores sent to nest",
+ "PublicDescription": "Non-cachable Stores sent to nest42"
+ },
+ {,
+ "EventCode": "0x10064",
+ "EventName": "PM_LSU_REJECT",
+ "BriefDescription": "LSU Reject (up to 4 per cycle)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xd082",
+ "EventName": "PM_LSU_SET_MPRED",
+ "BriefDescription": "Line already in cache at reload time",
+ "PublicDescription": "Line already in cache at reload time42"
+ },
+ {,
+ "EventCode": "0x40008",
+ "EventName": "PM_LSU_SRQ_EMPTY_CYC",
+ "BriefDescription": "ALL threads srq empty",
+ "PublicDescription": "All threads srq empty"
+ },
+ {,
+ "EventCode": "0xd09d",
+ "EventName": "PM_LSU_SRQ_S0_ALLOC",
+ "BriefDescription": "Per thread - use edge detect to count allocates On a per thread basis, level signal indicating Slot 0 is valid. By instrumenting a single slot we can calculate service time for that slot. Previous machines required a separate signal indicating the slot was allocated. Because any signal can be routed to any counter in P8, we can count level in one PMC and edge detect in another PMC using the same signal",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0xd09c",
+ "EventName": "PM_LSU_SRQ_S0_VALID",
+ "BriefDescription": "Slot 0 of SRQ valid",
+ "PublicDescription": "Slot 0 of SRQ validSRQ slot 0 valid"
+ },
+ {,
+ "EventCode": "0xf093",
+ "EventName": "PM_LSU_SRQ_S39_ALLOC",
+ "BriefDescription": "SRQ slot 39 was released",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0xf092",
+ "EventName": "PM_LSU_SRQ_S39_VALID",
+ "BriefDescription": "SRQ slot 39 was busy",
+ "PublicDescription": "SRQ slot 39 was busy42"
+ },
+ {,
+ "EventCode": "0xd09b",
+ "EventName": "PM_LSU_SRQ_SYNC",
+ "BriefDescription": "A sync in the SRQ ended",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0xd09a",
+ "EventName": "PM_LSU_SRQ_SYNC_CYC",
+ "BriefDescription": "A sync is in the SRQ (edge detect to count)",
+ "PublicDescription": "A sync is in the SRQ (edge detect to count)SRQ sync duration"
+ },
+ {,
+ "EventCode": "0xf084",
+ "EventName": "PM_LSU_STORE_REJECT",
+ "BriefDescription": "Store reject on either pipe",
+ "PublicDescription": "LSU"
+ },
+ {,
+ "EventCode": "0xd0a6",
+ "EventName": "PM_LSU_TWO_TABLEWALK_CYC",
+ "BriefDescription": "Cycles when two tablewalks pending on this thread",
+ "PublicDescription": "Cycles when two tablewalks pending on this thread42"
+ },
+ {,
+ "EventCode": "0x5094",
+ "EventName": "PM_LWSYNC",
+ "BriefDescription": "threaded version, IC Misses where we got EA dir hit but no sector valids were on. ICBI took line out",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x209a",
+ "EventName": "PM_LWSYNC_HELD",
+ "BriefDescription": "LWSYNC held at dispatch",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3013a",
+ "EventName": "PM_MRK_CRU_FIN",
+ "BriefDescription": "IFU non-branch finished",
+ "PublicDescription": "IFU non-branch marked instruction finished"
+ },
+ {,
+ "EventCode": "0x4d146",
+ "EventName": "PM_MRK_DATA_FROM_L21_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d126",
+ "EventName": "PM_MRK_DATA_FROM_L21_MOD_CYC",
+ "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L2 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3d146",
+ "EventName": "PM_MRK_DATA_FROM_L21_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c126",
+ "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC",
+ "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d144",
+ "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d124",
+ "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD_CYC",
+ "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3d144",
+ "EventName": "PM_MRK_DATA_FROM_L31_ECO_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c124",
+ "EventName": "PM_MRK_DATA_FROM_L31_ECO_SHR_CYC",
+ "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's ECO L3 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d144",
+ "EventName": "PM_MRK_DATA_FROM_L31_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d124",
+ "EventName": "PM_MRK_DATA_FROM_L31_MOD_CYC",
+ "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L3 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1d146",
+ "EventName": "PM_MRK_DATA_FROM_L31_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c126",
+ "EventName": "PM_MRK_DATA_FROM_L31_SHR_CYC",
+ "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L3 on the same chip due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x201e0",
+ "EventName": "PM_MRK_DATA_FROM_MEM",
+ "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4f146",
+ "EventName": "PM_MRK_DPTEG_FROM_L21_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3f146",
+ "EventName": "PM_MRK_DPTEG_FROM_L21_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3f140",
+ "EventName": "PM_MRK_DPTEG_FROM_L2_DISP_CONFLICT_LDHITST",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 with load hit store conflict due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4f140",
+ "EventName": "PM_MRK_DPTEG_FROM_L2_DISP_CONFLICT_OTHER",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 with dispatch conflict due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4f144",
+ "EventName": "PM_MRK_DPTEG_FROM_L31_ECO_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3f144",
+ "EventName": "PM_MRK_DPTEG_FROM_L31_ECO_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2f144",
+ "EventName": "PM_MRK_DPTEG_FROM_L31_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1f146",
+ "EventName": "PM_MRK_DPTEG_FROM_L31_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a marked data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30156",
+ "EventName": "PM_MRK_FAB_RSP_MATCH",
+ "BriefDescription": "ttype and cresp matched as specified in MMCR1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4f152",
+ "EventName": "PM_MRK_FAB_RSP_MATCH_CYC",
+ "BriefDescription": "cresp/ttype match cycles",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2013c",
+ "EventName": "PM_MRK_FILT_MATCH",
+ "BriefDescription": "Marked filter Match",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1013c",
+ "EventName": "PM_MRK_FIN_STALL_CYC",
+ "BriefDescription": "Marked instruction Finish Stall cycles (marked finish after NTC) (use edge detect to count )",
+ "PublicDescription": "Marked instruction Finish Stall cycles (marked finish after NTC) (use edge detect to count #)"
+ },
+ {,
+ "EventCode": "0x40130",
+ "EventName": "PM_MRK_GRP_CMPL",
+ "BriefDescription": "marked instruction finished (completed)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4013a",
+ "EventName": "PM_MRK_GRP_IC_MISS",
+ "BriefDescription": "Marked Group experienced I cache miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3013c",
+ "EventName": "PM_MRK_GRP_NTC",
+ "BriefDescription": "Marked group ntc cycles",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1013f",
+ "EventName": "PM_MRK_LD_MISS_EXPOSED",
+ "BriefDescription": "Marked Load exposed Miss (exposed period ended)",
+ "PublicDescription": "Marked Load exposed Miss (use edge detect to count #)"
+ },
+ {,
+ "EventCode": "0xd180",
+ "EventName": "PM_MRK_LSU_FLUSH",
+ "BriefDescription": "Flush: (marked) : All Cases",
+ "PublicDescription": "Flush: (marked) : All Cases42"
+ },
+ {,
+ "EventCode": "0xd188",
+ "EventName": "PM_MRK_LSU_FLUSH_LRQ",
+ "BriefDescription": "Flush: (marked) LRQ",
+ "PublicDescription": "Flush: (marked) LRQMarked LRQ flushes"
+ },
+ {,
+ "EventCode": "0xd18a",
+ "EventName": "PM_MRK_LSU_FLUSH_SRQ",
+ "BriefDescription": "Flush: (marked) SRQ",
+ "PublicDescription": "Flush: (marked) SRQMarked SRQ lhs flushes"
+ },
+ {,
+ "EventCode": "0xd184",
+ "EventName": "PM_MRK_LSU_FLUSH_ULD",
+ "BriefDescription": "Flush: (marked) Unaligned Load",
+ "PublicDescription": "Flush: (marked) Unaligned LoadMarked unaligned load flushes"
+ },
+ {,
+ "EventCode": "0xd186",
+ "EventName": "PM_MRK_LSU_FLUSH_UST",
+ "BriefDescription": "Flush: (marked) Unaligned Store",
+ "PublicDescription": "Flush: (marked) Unaligned StoreMarked unaligned store flushes"
+ },
+ {,
+ "EventCode": "0x40164",
+ "EventName": "PM_MRK_LSU_REJECT",
+ "BriefDescription": "LSU marked reject (up to 2 per cycle)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30164",
+ "EventName": "PM_MRK_LSU_REJECT_ERAT_MISS",
+ "BriefDescription": "LSU marked reject due to ERAT (up to 2 per cycle)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1d15a",
+ "EventName": "PM_MRK_SRC_PREF_TRACK_EFF",
+ "BriefDescription": "Marked src pref track was effective",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3d15a",
+ "EventName": "PM_MRK_SRC_PREF_TRACK_INEFF",
+ "BriefDescription": "Prefetch tracked was ineffective for marked src",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d15c",
+ "EventName": "PM_MRK_SRC_PREF_TRACK_MOD",
+ "BriefDescription": "Prefetch tracked was moderate for marked src",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1d15c",
+ "EventName": "PM_MRK_SRC_PREF_TRACK_MOD_L2",
+ "BriefDescription": "Marked src Prefetch Tracked was moderate (source L2)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3d15c",
+ "EventName": "PM_MRK_SRC_PREF_TRACK_MOD_L3",
+ "BriefDescription": "Prefetch tracked was moderate (L3 hit) for marked src",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1c15a",
+ "EventName": "PM_MRK_TGT_PREF_TRACK_EFF",
+ "BriefDescription": "Marked target pref track was effective",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3c15a",
+ "EventName": "PM_MRK_TGT_PREF_TRACK_INEFF",
+ "BriefDescription": "Prefetch tracked was ineffective for marked target",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c15c",
+ "EventName": "PM_MRK_TGT_PREF_TRACK_MOD",
+ "BriefDescription": "Prefetch tracked was moderate for marked target",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1c15c",
+ "EventName": "PM_MRK_TGT_PREF_TRACK_MOD_L2",
+ "BriefDescription": "Marked target Prefetch Tracked was moderate (source L2)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3c15c",
+ "EventName": "PM_MRK_TGT_PREF_TRACK_MOD_L3",
+ "BriefDescription": "Prefetch tracked was moderate (L3 hit) for marked target",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20b0",
+ "EventName": "PM_NESTED_TEND",
+ "BriefDescription": "Completion time nested tend",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20b6",
+ "EventName": "PM_NON_FAV_TBEGIN",
+ "BriefDescription": "Dispatch time non favored tbegin",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x328084",
+ "EventName": "PM_NON_TM_RST_SC",
+ "BriefDescription": "non tm snp rst tm sc",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2001a",
+ "EventName": "PM_NTCG_ALL_FIN",
+ "BriefDescription": "Cycles after all instructions have finished to group completed",
+ "PublicDescription": "Ccycles after all instructions have finished to group completed"
+ },
+ {,
+ "EventCode": "0x20ac",
+ "EventName": "PM_OUTER_TBEGIN",
+ "BriefDescription": "Completion time outer tbegin",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20ae",
+ "EventName": "PM_OUTER_TEND",
+ "BriefDescription": "Completion time outer tend",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2005a",
+ "EventName": "PM_PREF_TRACKED",
+ "BriefDescription": "Total number of Prefetch Operations that were tracked",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1005a",
+ "EventName": "PM_PREF_TRACK_EFF",
+ "BriefDescription": "Prefetch Tracked was effective",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3005a",
+ "EventName": "PM_PREF_TRACK_INEFF",
+ "BriefDescription": "Prefetch tracked was ineffective",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4005a",
+ "EventName": "PM_PREF_TRACK_MOD",
+ "BriefDescription": "Prefetch tracked was moderate",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1005c",
+ "EventName": "PM_PREF_TRACK_MOD_L2",
+ "BriefDescription": "Prefetch Tracked was moderate (source L2)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3005c",
+ "EventName": "PM_PREF_TRACK_MOD_L3",
+ "BriefDescription": "Prefetch tracked was moderate (L3)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xe084",
+ "EventName": "PM_PTE_PREFETCH",
+ "BriefDescription": "PTE prefetches",
+ "PublicDescription": "PTE prefetches42"
+ },
+ {,
+ "EventCode": "0x16081",
+ "EventName": "PM_RC0_ALLOC",
+ "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0x16080",
+ "EventName": "PM_RC0_BUSY",
+ "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x200301ea",
+ "EventName": "PM_RC_LIFETIME_EXC_1024",
+ "BriefDescription": "Number of times the RC machine for a sampled instruction was active for more than 1024 cycles",
+ "PublicDescription": "Reload latency exceeded 1024 cyc"
+ },
+ {,
+ "EventCode": "0x200401ec",
+ "EventName": "PM_RC_LIFETIME_EXC_2048",
+ "BriefDescription": "Number of times the RC machine for a sampled instruction was active for more than 2048 cycles",
+ "PublicDescription": "Threshold counter exceeded a value of 2048"
+ },
+ {,
+ "EventCode": "0x200101e8",
+ "EventName": "PM_RC_LIFETIME_EXC_256",
+ "BriefDescription": "Number of times the RC machine for a sampled instruction was active for more than 256 cycles",
+ "PublicDescription": "Threshold counter exceed a count of 256"
+ },
+ {,
+ "EventCode": "0x200201e6",
+ "EventName": "PM_RC_LIFETIME_EXC_32",
+ "BriefDescription": "Number of times the RC machine for a sampled instruction was active for more than 32 cycles",
+ "PublicDescription": "Reload latency exceeded 32 cyc"
+ },
+ {,
+ "EventCode": "0x36088",
+ "EventName": "PM_RC_USAGE",
+ "BriefDescription": "Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 RC machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x34808e",
+ "EventName": "PM_RD_CLEARING_SC",
+ "BriefDescription": "rd clearing sc",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x34808c",
+ "EventName": "PM_RD_FORMING_SC",
+ "BriefDescription": "rd forming sc",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x428086",
+ "EventName": "PM_RD_HIT_PF",
+ "BriefDescription": "rd machine hit l3 pf machine",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20004",
+ "EventName": "PM_REAL_SRQ_FULL",
+ "BriefDescription": "Out of real srq entries",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2006a",
+ "EventName": "PM_RUN_CYC_SMT2_SHRD_MODE",
+ "BriefDescription": "cycles this threads run latch is set and the core is in SMT2 shared mode",
+ "PublicDescription": "Cycles run latch is set and core is in SMT2-shared mode"
+ },
+ {,
+ "EventCode": "0x1006a",
+ "EventName": "PM_RUN_CYC_SMT2_SPLIT_MODE",
+ "BriefDescription": "Cycles run latch is set and core is in SMT2-split mode",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4006c",
+ "EventName": "PM_RUN_CYC_SMT8_MODE",
+ "BriefDescription": "Cycles run latch is set and core is in SMT8 mode",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xf082",
+ "EventName": "PM_SEC_ERAT_HIT",
+ "BriefDescription": "secondary ERAT Hit",
+ "PublicDescription": "secondary ERAT Hit42"
+ },
+ {,
+ "EventCode": "0x508c",
+ "EventName": "PM_SHL_CREATED",
+ "BriefDescription": "Store-Hit-Load Table Entry Created",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x508e",
+ "EventName": "PM_SHL_ST_CONVERT",
+ "BriefDescription": "Store-Hit-Load Table Read Hit with entry Enabled",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x5090",
+ "EventName": "PM_SHL_ST_DISABLE",
+ "BriefDescription": "Store-Hit-Load Table Read Hit with entry Disabled (entry was disabled due to the entry shown to not prevent the flush)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x26085",
+ "EventName": "PM_SN0_ALLOC",
+ "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)",
+ "PublicDescription": "0.0"
+ },
+ {,
+ "EventCode": "0x26084",
+ "EventName": "PM_SN0_BUSY",
+ "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xd0b2",
+ "EventName": "PM_SNOOP_TLBIE",
+ "BriefDescription": "TLBIE snoop",
+ "PublicDescription": "TLBIE snoopSnoop TLBIE"
+ },
+ {,
+ "EventCode": "0x338088",
+ "EventName": "PM_SNP_TM_HIT_M",
+ "BriefDescription": "snp tm st hit m mu",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x33808a",
+ "EventName": "PM_SNP_TM_HIT_T",
+ "BriefDescription": "snp tm_st_hit t tn te",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4608c",
+ "EventName": "PM_SN_USAGE",
+ "BriefDescription": "Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 SN machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10028",
+ "EventName": "PM_STALL_END_GCT_EMPTY",
+ "BriefDescription": "Count ended because GCT went empty",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xc090",
+ "EventName": "PM_STCX_LSU",
+ "BriefDescription": "STCX executed reported at sent to nest",
+ "PublicDescription": "STCX executed reported at sent to nest42"
+ },
+ {,
+ "EventCode": "0x717080",
+ "EventName": "PM_ST_CAUSED_FAIL",
+ "BriefDescription": "Non TM St caused any thread to fail",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3090",
+ "EventName": "PM_SWAP_CANCEL",
+ "BriefDescription": "SWAP cancel , rtag not available",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3092",
+ "EventName": "PM_SWAP_CANCEL_GPR",
+ "BriefDescription": "SWAP cancel , rtag not available for gpr",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x308c",
+ "EventName": "PM_SWAP_COMPLETE",
+ "BriefDescription": "swap cast in completed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x308e",
+ "EventName": "PM_SWAP_COMPLETE_GPR",
+ "BriefDescription": "swap cast in completed fpr gpr",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xe086",
+ "EventName": "PM_TABLEWALK_CYC_PREF",
+ "BriefDescription": "tablewalk qualified for pte prefetches",
+ "PublicDescription": "tablewalk qualified for pte prefetches42"
+ },
+ {,
+ "EventCode": "0x20b2",
+ "EventName": "PM_TABORT_TRECLAIM",
+ "BriefDescription": "Completion time tabortnoncd, tabortcd, treclaim",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xe0ba",
+ "EventName": "PM_TEND_PEND_CYC",
+ "BriefDescription": "TEND latency per thread",
+ "PublicDescription": "TEND latency per thread42"
+ },
+ {,
+ "EventCode": "0x10012",
+ "EventName": "PM_THRD_GRP_CMPL_BOTH_CYC",
+ "BriefDescription": "Cycles group completed on both completion slots by any thread",
+ "PublicDescription": "Two threads finished same cycle (gated by run latch)"
+ },
+ {,
+ "EventCode": "0x40bc",
+ "EventName": "PM_THRD_PRIO_0_1_CYC",
+ "BriefDescription": "Cycles thread running at priority level 0 or 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x40be",
+ "EventName": "PM_THRD_PRIO_2_3_CYC",
+ "BriefDescription": "Cycles thread running at priority level 2 or 3",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x5080",
+ "EventName": "PM_THRD_PRIO_4_5_CYC",
+ "BriefDescription": "Cycles thread running at priority level 4 or 5",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x5082",
+ "EventName": "PM_THRD_PRIO_6_7_CYC",
+ "BriefDescription": "Cycles thread running at priority level 6 or 7",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3098",
+ "EventName": "PM_THRD_REBAL_CYC",
+ "BriefDescription": "cycles rebalance was active",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20b8",
+ "EventName": "PM_TM_BEGIN_ALL",
+ "BriefDescription": "Tm any tbegin",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x318082",
+ "EventName": "PM_TM_CAM_OVERFLOW",
+ "BriefDescription": "l3 tm cam overflow during L2 co of SC",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x74708c",
+ "EventName": "PM_TM_CAP_OVERFLOW",
+ "BriefDescription": "TM Footprint Capactiy Overflow",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20ba",
+ "EventName": "PM_TM_END_ALL",
+ "BriefDescription": "Tm any tend",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3086",
+ "EventName": "PM_TM_FAIL_CONF_NON_TM",
+ "BriefDescription": "TEXAS fail reason @ completion",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3088",
+ "EventName": "PM_TM_FAIL_CON_TM",
+ "BriefDescription": "TEXAS fail reason @ completion",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xe0b2",
+ "EventName": "PM_TM_FAIL_DISALLOW",
+ "BriefDescription": "TM fail disallow",
+ "PublicDescription": "TM fail disallow42"
+ },
+ {,
+ "EventCode": "0x3084",
+ "EventName": "PM_TM_FAIL_FOOTPRINT_OVERFLOW",
+ "BriefDescription": "TEXAS fail reason @ completion",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xe0b8",
+ "EventName": "PM_TM_FAIL_NON_TX_CONFLICT",
+ "BriefDescription": "Non transactional conflict from LSU whtver gets repoted to texas",
+ "PublicDescription": "Non transactional conflict from LSU whtver gets repoted to texas42"
+ },
+ {,
+ "EventCode": "0x308a",
+ "EventName": "PM_TM_FAIL_SELF",
+ "BriefDescription": "TEXAS fail reason @ completion",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xe0b4",
+ "EventName": "PM_TM_FAIL_TLBIE",
+ "BriefDescription": "TLBIE hit bloom filter",
+ "PublicDescription": "TLBIE hit bloom filter42"
+ },
+ {,
+ "EventCode": "0xe0b6",
+ "EventName": "PM_TM_FAIL_TX_CONFLICT",
+ "BriefDescription": "Transactional conflict from LSU, whatever gets reported to texas",
+ "PublicDescription": "Transactional conflict from LSU, whatever gets reported to texas 42"
+ },
+ {,
+ "EventCode": "0x727086",
+ "EventName": "PM_TM_FAV_CAUSED_FAIL",
+ "BriefDescription": "TM Load (fav) caused another thread to fail",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x717082",
+ "EventName": "PM_TM_LD_CAUSED_FAIL",
+ "BriefDescription": "Non TM Ld caused any thread to fail",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x727084",
+ "EventName": "PM_TM_LD_CONF",
+ "BriefDescription": "TM Load (fav or non-fav) ran into conflict (failed)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x328086",
+ "EventName": "PM_TM_RST_SC",
+ "BriefDescription": "tm snp rst tm sc",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x318080",
+ "EventName": "PM_TM_SC_CO",
+ "BriefDescription": "l3 castout tm Sc line",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x73708a",
+ "EventName": "PM_TM_ST_CAUSED_FAIL",
+ "BriefDescription": "TM Store (fav or non-fav) caused another thread to fail",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x737088",
+ "EventName": "PM_TM_ST_CONF",
+ "BriefDescription": "TM Store (fav or non-fav) ran into conflict (failed)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20bc",
+ "EventName": "PM_TM_TBEGIN",
+ "BriefDescription": "Tm nested tbegin",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3080",
+ "EventName": "PM_TM_TRESUME",
+ "BriefDescription": "Tm resume",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20be",
+ "EventName": "PM_TM_TSUSPEND",
+ "BriefDescription": "Tm suspend",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xe08c",
+ "EventName": "PM_UP_PREF_L3",
+ "BriefDescription": "Micropartition prefetch",
+ "PublicDescription": "Micropartition prefetch42"
+ },
+ {,
+ "EventCode": "0xe08e",
+ "EventName": "PM_UP_PREF_POINTER",
+ "BriefDescription": "Micrpartition pointer prefetches",
+ "PublicDescription": "Micrpartition pointer prefetches42"
+ },
+ {,
+ "EventCode": "0xa0a4",
+ "EventName": "PM_VSU0_16FLOP",
+ "BriefDescription": "Sixteen flops operation (SP vector versions of fdiv,fsqrt)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa080",
+ "EventName": "PM_VSU0_1FLOP",
+ "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation finished",
+ "PublicDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation finishedDecode into 1,2,4 FLOP according to instr IOP, multiplied by #vector elements according to route( eg x1, x2, x4) Only if instr sends finish to ISU"
+ },
+ {,
+ "EventCode": "0xa098",
+ "EventName": "PM_VSU0_2FLOP",
+ "BriefDescription": "two flops operation (scalar fmadd, fnmadd, fmsub, fnmsub and DP vector versions of single flop instructions)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa09c",
+ "EventName": "PM_VSU0_4FLOP",
+ "BriefDescription": "four flops operation (scalar fdiv, fsqrt, DP vector version of fmadd, fnmadd, fmsub, fnmsub, SP vector versions of single flop instructions)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0a0",
+ "EventName": "PM_VSU0_8FLOP",
+ "BriefDescription": "eight flops operation (DP vector versions of fdiv,fsqrt and SP vector versions of fmadd,fnmadd,fmsub,fnmsub)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0a4",
+ "EventName": "PM_VSU0_COMPLEX_ISSUED",
+ "BriefDescription": "Complex VMX instruction issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0b4",
+ "EventName": "PM_VSU0_CY_ISSUED",
+ "BriefDescription": "Cryptographic instruction RFC02196 Issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0a8",
+ "EventName": "PM_VSU0_DD_ISSUED",
+ "BriefDescription": "64BIT Decimal Issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa08c",
+ "EventName": "PM_VSU0_DP_2FLOP",
+ "BriefDescription": "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa090",
+ "EventName": "PM_VSU0_DP_FMA",
+ "BriefDescription": "DP vector version of fmadd,fnmadd,fmsub,fnmsub",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa094",
+ "EventName": "PM_VSU0_DP_FSQRT_FDIV",
+ "BriefDescription": "DP vector versions of fdiv,fsqrt",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0ac",
+ "EventName": "PM_VSU0_DQ_ISSUED",
+ "BriefDescription": "128BIT Decimal Issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0b0",
+ "EventName": "PM_VSU0_EX_ISSUED",
+ "BriefDescription": "Direct move 32/64b VRFtoGPR RFC02206 Issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0bc",
+ "EventName": "PM_VSU0_FIN",
+ "BriefDescription": "VSU0 Finished an instruction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa084",
+ "EventName": "PM_VSU0_FMA",
+ "BriefDescription": "two flops operation (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only!",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb098",
+ "EventName": "PM_VSU0_FPSCR",
+ "BriefDescription": "Move to/from FPSCR type instruction issued on Pipe 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa088",
+ "EventName": "PM_VSU0_FSQRT_FDIV",
+ "BriefDescription": "four flops operation (fdiv,fsqrt) Scalar Instructions only!",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb090",
+ "EventName": "PM_VSU0_PERMUTE_ISSUED",
+ "BriefDescription": "Permute VMX Instruction Issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb088",
+ "EventName": "PM_VSU0_SCALAR_DP_ISSUED",
+ "BriefDescription": "Double Precision scalar instruction issued on Pipe0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb094",
+ "EventName": "PM_VSU0_SIMPLE_ISSUED",
+ "BriefDescription": "Simple VMX instruction issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0a8",
+ "EventName": "PM_VSU0_SINGLE",
+ "BriefDescription": "FPU single precision",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb09c",
+ "EventName": "PM_VSU0_SQ",
+ "BriefDescription": "Store Vector Issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb08c",
+ "EventName": "PM_VSU0_STF",
+ "BriefDescription": "FPU store (SP or DP) issued on Pipe0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb080",
+ "EventName": "PM_VSU0_VECTOR_DP_ISSUED",
+ "BriefDescription": "Double Precision vector instruction issued on Pipe0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb084",
+ "EventName": "PM_VSU0_VECTOR_SP_ISSUED",
+ "BriefDescription": "Single Precision vector instruction issued (executed)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0a6",
+ "EventName": "PM_VSU1_16FLOP",
+ "BriefDescription": "Sixteen flops operation (SP vector versions of fdiv,fsqrt)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa082",
+ "EventName": "PM_VSU1_1FLOP",
+ "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation finished",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa09a",
+ "EventName": "PM_VSU1_2FLOP",
+ "BriefDescription": "two flops operation (scalar fmadd, fnmadd, fmsub, fnmsub and DP vector versions of single flop instructions)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa09e",
+ "EventName": "PM_VSU1_4FLOP",
+ "BriefDescription": "four flops operation (scalar fdiv, fsqrt, DP vector version of fmadd, fnmadd, fmsub, fnmsub, SP vector versions of single flop instructions)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0a2",
+ "EventName": "PM_VSU1_8FLOP",
+ "BriefDescription": "eight flops operation (DP vector versions of fdiv,fsqrt and SP vector versions of fmadd,fnmadd,fmsub,fnmsub)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0a6",
+ "EventName": "PM_VSU1_COMPLEX_ISSUED",
+ "BriefDescription": "Complex VMX instruction issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0b6",
+ "EventName": "PM_VSU1_CY_ISSUED",
+ "BriefDescription": "Cryptographic instruction RFC02196 Issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0aa",
+ "EventName": "PM_VSU1_DD_ISSUED",
+ "BriefDescription": "64BIT Decimal Issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa08e",
+ "EventName": "PM_VSU1_DP_2FLOP",
+ "BriefDescription": "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa092",
+ "EventName": "PM_VSU1_DP_FMA",
+ "BriefDescription": "DP vector version of fmadd,fnmadd,fmsub,fnmsub",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa096",
+ "EventName": "PM_VSU1_DP_FSQRT_FDIV",
+ "BriefDescription": "DP vector versions of fdiv,fsqrt",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0ae",
+ "EventName": "PM_VSU1_DQ_ISSUED",
+ "BriefDescription": "128BIT Decimal Issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb0b2",
+ "EventName": "PM_VSU1_EX_ISSUED",
+ "BriefDescription": "Direct move 32/64b VRFtoGPR RFC02206 Issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0be",
+ "EventName": "PM_VSU1_FIN",
+ "BriefDescription": "VSU1 Finished an instruction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa086",
+ "EventName": "PM_VSU1_FMA",
+ "BriefDescription": "two flops operation (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only!",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb09a",
+ "EventName": "PM_VSU1_FPSCR",
+ "BriefDescription": "Move to/from FPSCR type instruction issued on Pipe 0",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa08a",
+ "EventName": "PM_VSU1_FSQRT_FDIV",
+ "BriefDescription": "four flops operation (fdiv,fsqrt) Scalar Instructions only!",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb092",
+ "EventName": "PM_VSU1_PERMUTE_ISSUED",
+ "BriefDescription": "Permute VMX Instruction Issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb08a",
+ "EventName": "PM_VSU1_SCALAR_DP_ISSUED",
+ "BriefDescription": "Double Precision scalar instruction issued on Pipe1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb096",
+ "EventName": "PM_VSU1_SIMPLE_ISSUED",
+ "BriefDescription": "Simple VMX instruction issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xa0aa",
+ "EventName": "PM_VSU1_SINGLE",
+ "BriefDescription": "FPU single precision",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb09e",
+ "EventName": "PM_VSU1_SQ",
+ "BriefDescription": "Store Vector Issued",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb08e",
+ "EventName": "PM_VSU1_STF",
+ "BriefDescription": "FPU store (SP or DP) issued on Pipe1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb082",
+ "EventName": "PM_VSU1_VECTOR_DP_ISSUED",
+ "BriefDescription": "Double Precision vector instruction issued on Pipe1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0xb086",
+ "EventName": "PM_VSU1_VECTOR_SP_ISSUED",
+ "BriefDescription": "Single Precision vector instruction issued (executed)",
+ "PublicDescription": ""
+ },
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power8/pipeline.json
new file mode 100644
index 000000000..293f3a4c6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power8/pipeline.json
@@ -0,0 +1,350 @@
+[
+ {,
+ "EventCode": "0x100f2",
+ "EventName": "PM_1PLUS_PPC_CMPL",
+ "BriefDescription": "1 or more ppc insts finished",
+ "PublicDescription": "1 or more ppc insts finished (completed)"
+ },
+ {,
+ "EventCode": "0x400f2",
+ "EventName": "PM_1PLUS_PPC_DISP",
+ "BriefDescription": "Cycles at least one Instr Dispatched",
+ "PublicDescription": "Cycles at least one Instr Dispatched. Could be a group with only microcode. Issue HW016521"
+ },
+ {,
+ "EventCode": "0x100fa",
+ "EventName": "PM_ANY_THRD_RUN_CYC",
+ "BriefDescription": "One of threads in run_cycles",
+ "PublicDescription": "Any thread in run_cycles (was one thread in run_cycles)"
+ },
+ {,
+ "EventCode": "0x4000a",
+ "EventName": "PM_CMPLU_STALL",
+ "BriefDescription": "Completion stall",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d018",
+ "EventName": "PM_CMPLU_STALL_BRU",
+ "BriefDescription": "Completion stall due to a Branch Unit",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c012",
+ "EventName": "PM_CMPLU_STALL_DCACHE_MISS",
+ "BriefDescription": "Completion stall by Dcache miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c018",
+ "EventName": "PM_CMPLU_STALL_DMISS_L21_L31",
+ "BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c016",
+ "EventName": "PM_CMPLU_STALL_DMISS_L2L3",
+ "BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c016",
+ "EventName": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT",
+ "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict",
+ "PublicDescription": "Completion stall due to cache miss resolving in core's L2/L3 with a conflict"
+ },
+ {,
+ "EventCode": "0x4c01a",
+ "EventName": "PM_CMPLU_STALL_DMISS_L3MISS",
+ "BriefDescription": "Completion stall due to cache miss resolving missed the L3",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c018",
+ "EventName": "PM_CMPLU_STALL_DMISS_LMEM",
+ "BriefDescription": "Completion stall due to cache miss that resolves in local memory",
+ "PublicDescription": "Completion stall due to cache miss resolving in core's Local Memory"
+ },
+ {,
+ "EventCode": "0x2c01c",
+ "EventName": "PM_CMPLU_STALL_DMISS_REMOTE",
+ "BriefDescription": "Completion stall by Dcache miss which resolved from remote chip (cache or memory)",
+ "PublicDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)"
+ },
+ {,
+ "EventCode": "0x4c012",
+ "EventName": "PM_CMPLU_STALL_ERAT_MISS",
+ "BriefDescription": "Completion stall due to LSU reject ERAT miss",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d016",
+ "EventName": "PM_CMPLU_STALL_FXLONG",
+ "BriefDescription": "Completion stall due to a long latency fixed point instruction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2d016",
+ "EventName": "PM_CMPLU_STALL_FXU",
+ "BriefDescription": "Completion stall due to FXU",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30036",
+ "EventName": "PM_CMPLU_STALL_HWSYNC",
+ "BriefDescription": "completion stall due to hwsync",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4d014",
+ "EventName": "PM_CMPLU_STALL_LOAD_FINISH",
+ "BriefDescription": "Completion stall due to a Load finish",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c010",
+ "EventName": "PM_CMPLU_STALL_LSU",
+ "BriefDescription": "Completion stall by LSU instruction",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10036",
+ "EventName": "PM_CMPLU_STALL_LWSYNC",
+ "BriefDescription": "completion stall due to isync/lwsync",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30006",
+ "EventName": "PM_CMPLU_STALL_OTHER_CMPL",
+ "BriefDescription": "Instructions core completed while this tread was stalled",
+ "PublicDescription": "Instructions core completed while this thread was stalled"
+ },
+ {,
+ "EventCode": "0x4c01c",
+ "EventName": "PM_CMPLU_STALL_ST_FWD",
+ "BriefDescription": "Completion stall due to store forward",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1001c",
+ "EventName": "PM_CMPLU_STALL_THRD",
+ "BriefDescription": "Completion Stalled due to thread conflict. Group ready to complete but it was another thread's turn",
+ "PublicDescription": "Completion stall due to thread conflict"
+ },
+ {,
+ "EventCode": "0x1e",
+ "EventName": "PM_CYC",
+ "BriefDescription": "Cycles",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10006",
+ "EventName": "PM_DISP_HELD",
+ "BriefDescription": "Dispatch Held",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4003c",
+ "EventName": "PM_DISP_HELD_SYNC_HOLD",
+ "BriefDescription": "Dispatch held due to SYNC hold",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x200f8",
+ "EventName": "PM_EXT_INT",
+ "BriefDescription": "external interrupt",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x400f8",
+ "EventName": "PM_FLUSH",
+ "BriefDescription": "Flush (any type)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30012",
+ "EventName": "PM_FLUSH_COMPLETION",
+ "BriefDescription": "Completion Flush",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3000c",
+ "EventName": "PM_FREQ_DOWN",
+ "BriefDescription": "Power Management: Below Threshold B",
+ "PublicDescription": "Frequency is being slewed down due to Power Management"
+ },
+ {,
+ "EventCode": "0x4000c",
+ "EventName": "PM_FREQ_UP",
+ "BriefDescription": "Power Management: Above Threshold A",
+ "PublicDescription": "Frequency is being slewed up due to Power Management"
+ },
+ {,
+ "EventCode": "0x2000a",
+ "EventName": "PM_HV_CYC",
+ "BriefDescription": "Cycles in which msr_hv is high. Note that this event does not take msr_pr into consideration",
+ "PublicDescription": "cycles in hypervisor mode"
+ },
+ {,
+ "EventCode": "0x3405e",
+ "EventName": "PM_IFETCH_THROTTLE",
+ "BriefDescription": "Cycles in which Instruction fetch throttle was active",
+ "PublicDescription": "Cycles instruction fecth was throttled in IFU"
+ },
+ {,
+ "EventCode": "0x10014",
+ "EventName": "PM_IOPS_CMPL",
+ "BriefDescription": "Internal Operations completed",
+ "PublicDescription": "IOPS Completed"
+ },
+ {,
+ "EventCode": "0x3c058",
+ "EventName": "PM_LARX_FIN",
+ "BriefDescription": "Larx finished",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1002e",
+ "EventName": "PM_LD_CMPL",
+ "BriefDescription": "count of Loads completed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10062",
+ "EventName": "PM_LD_L3MISS_PEND_CYC",
+ "BriefDescription": "Cycles L3 miss was pending for this thread",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30066",
+ "EventName": "PM_LSU_FIN",
+ "BriefDescription": "LSU Finished an instruction (up to 2 per cycle)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2003e",
+ "EventName": "PM_LSU_LMQ_SRQ_EMPTY_CYC",
+ "BriefDescription": "LSU empty (lmq and srq empty)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e05c",
+ "EventName": "PM_LSU_REJECT_ERAT_MISS",
+ "BriefDescription": "LSU Reject due to ERAT (up to 4 per cycles)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4e05c",
+ "EventName": "PM_LSU_REJECT_LHS",
+ "BriefDescription": "LSU Reject due to LHS (up to 4 per cycle)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1e05c",
+ "EventName": "PM_LSU_REJECT_LMQ_FULL",
+ "BriefDescription": "LSU reject due to LMQ full ( 4 per cycle)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1001a",
+ "EventName": "PM_LSU_SRQ_FULL_CYC",
+ "BriefDescription": "Storage Queue is full and is blocking dispatch",
+ "PublicDescription": "SRQ is Full"
+ },
+ {,
+ "EventCode": "0x40014",
+ "EventName": "PM_PROBE_NOP_DISP",
+ "BriefDescription": "ProbeNops dispatched",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x600f4",
+ "EventName": "PM_RUN_CYC",
+ "BriefDescription": "Run_cycles",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3006c",
+ "EventName": "PM_RUN_CYC_SMT2_MODE",
+ "BriefDescription": "Cycles run latch is set and core is in SMT2 mode",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2006c",
+ "EventName": "PM_RUN_CYC_SMT4_MODE",
+ "BriefDescription": "cycles this threads run latch is set and the core is in SMT4 mode",
+ "PublicDescription": "Cycles run latch is set and core is in SMT4 mode"
+ },
+ {,
+ "EventCode": "0x1006c",
+ "EventName": "PM_RUN_CYC_ST_MODE",
+ "BriefDescription": "Cycles run latch is set and core is in ST mode",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x500fa",
+ "EventName": "PM_RUN_INST_CMPL",
+ "BriefDescription": "Run_Instructions",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1e058",
+ "EventName": "PM_STCX_FAIL",
+ "BriefDescription": "stcx failed",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x20016",
+ "EventName": "PM_ST_CMPL",
+ "BriefDescription": "Store completion count",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x200f0",
+ "EventName": "PM_ST_FIN",
+ "BriefDescription": "Store Instructions Finished",
+ "PublicDescription": "Store Instructions Finished (store sent to nest)"
+ },
+ {,
+ "EventCode": "0x20018",
+ "EventName": "PM_ST_FWD",
+ "BriefDescription": "Store forwards that finished",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10026",
+ "EventName": "PM_TABLEWALK_CYC",
+ "BriefDescription": "Cycles when a tablewalk (I or D) is active",
+ "PublicDescription": "Tablewalk Active"
+ },
+ {,
+ "EventCode": "0x300f8",
+ "EventName": "PM_TB_BIT_TRANS",
+ "BriefDescription": "timebase event",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2000c",
+ "EventName": "PM_THRD_ALL_RUN_CYC",
+ "BriefDescription": "All Threads in Run_cycles (was both threads in run_cycles)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30058",
+ "EventName": "PM_TLBIE_FIN",
+ "BriefDescription": "tlbie finished",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10060",
+ "EventName": "PM_TM_TRANS_RUN_CYC",
+ "BriefDescription": "run cycles in transactional state",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e012",
+ "EventName": "PM_TM_TX_PASS_RUN_CYC",
+ "BriefDescription": "cycles spent in successful transactions",
+ "PublicDescription": "run cycles spent in successful transactions"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/pmc.json b/tools/perf/pmu-events/arch/powerpc/power8/pmc.json
new file mode 100644
index 000000000..583e4d937
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power8/pmc.json
@@ -0,0 +1,140 @@
+[
+ {,
+ "EventCode": "0x20010",
+ "EventName": "PM_PMC1_OVERFLOW",
+ "BriefDescription": "Overflow from counter 1",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30010",
+ "EventName": "PM_PMC2_OVERFLOW",
+ "BriefDescription": "Overflow from counter 2",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30020",
+ "EventName": "PM_PMC2_REWIND",
+ "BriefDescription": "PMC2 Rewind Event (did not match condition)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10022",
+ "EventName": "PM_PMC2_SAVED",
+ "BriefDescription": "PMC2 Rewind Value saved",
+ "PublicDescription": "PMC2 Rewind Value saved (matched condition)"
+ },
+ {,
+ "EventCode": "0x40010",
+ "EventName": "PM_PMC3_OVERFLOW",
+ "BriefDescription": "Overflow from counter 3",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10010",
+ "EventName": "PM_PMC4_OVERFLOW",
+ "BriefDescription": "Overflow from counter 4",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10020",
+ "EventName": "PM_PMC4_REWIND",
+ "BriefDescription": "PMC4 Rewind Event",
+ "PublicDescription": "PMC4 Rewind Event (did not match condition)"
+ },
+ {,
+ "EventCode": "0x30022",
+ "EventName": "PM_PMC4_SAVED",
+ "BriefDescription": "PMC4 Rewind Value saved (matched condition)",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10024",
+ "EventName": "PM_PMC5_OVERFLOW",
+ "BriefDescription": "Overflow from counter 5",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x30024",
+ "EventName": "PM_PMC6_OVERFLOW",
+ "BriefDescription": "Overflow from counter 6",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x400f4",
+ "EventName": "PM_RUN_PURR",
+ "BriefDescription": "Run_PURR",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x10008",
+ "EventName": "PM_RUN_SPURR",
+ "BriefDescription": "Run SPURR",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x0",
+ "EventName": "PM_SUSPENDED",
+ "BriefDescription": "Counter OFF",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x301ea",
+ "EventName": "PM_THRESH_EXC_1024",
+ "BriefDescription": "Threshold counter exceeded a value of 1024",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x401ea",
+ "EventName": "PM_THRESH_EXC_128",
+ "BriefDescription": "Threshold counter exceeded a value of 128",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x401ec",
+ "EventName": "PM_THRESH_EXC_2048",
+ "BriefDescription": "Threshold counter exceeded a value of 2048",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x101e8",
+ "EventName": "PM_THRESH_EXC_256",
+ "BriefDescription": "Threshold counter exceed a count of 256",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x201e6",
+ "EventName": "PM_THRESH_EXC_32",
+ "BriefDescription": "Threshold counter exceeded a value of 32",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x101e6",
+ "EventName": "PM_THRESH_EXC_4096",
+ "BriefDescription": "Threshold counter exceed a count of 4096",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x201e8",
+ "EventName": "PM_THRESH_EXC_512",
+ "BriefDescription": "Threshold counter exceeded a value of 512",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x301e8",
+ "EventName": "PM_THRESH_EXC_64",
+ "BriefDescription": "IFU non-branch finished",
+ "PublicDescription": "Threshold counter exceeded a value of 64"
+ },
+ {,
+ "EventCode": "0x101ec",
+ "EventName": "PM_THRESH_MET",
+ "BriefDescription": "threshold exceeded",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4016e",
+ "EventName": "PM_THRESH_NOT_MET",
+ "BriefDescription": "Threshold counter did not meet threshold",
+ "PublicDescription": ""
+ },
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/translation.json b/tools/perf/pmu-events/arch/powerpc/power8/translation.json
new file mode 100644
index 000000000..e47a55459
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power8/translation.json
@@ -0,0 +1,176 @@
+[
+ {,
+ "EventCode": "0x4c054",
+ "EventName": "PM_DERAT_MISS_16G",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16G",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3c054",
+ "EventName": "PM_DERAT_MISS_16M",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1c056",
+ "EventName": "PM_DERAT_MISS_4K",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c054",
+ "EventName": "PM_DERAT_MISS_64K",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4e048",
+ "EventName": "PM_DPTEG_FROM_DL2L3_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3e048",
+ "EventName": "PM_DPTEG_FROM_DL2L3_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1e042",
+ "EventName": "PM_DPTEG_FROM_L2",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1e04e",
+ "EventName": "PM_DPTEG_FROM_L2MISS",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a localtion other than the local core's L2 due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e040",
+ "EventName": "PM_DPTEG_FROM_L2_MEPF",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1e040",
+ "EventName": "PM_DPTEG_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4e042",
+ "EventName": "PM_DPTEG_FROM_L3",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3e042",
+ "EventName": "PM_DPTEG_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e042",
+ "EventName": "PM_DPTEG_FROM_L3_MEPF",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1e044",
+ "EventName": "PM_DPTEG_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1e04c",
+ "EventName": "PM_DPTEG_FROM_LL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e048",
+ "EventName": "PM_DPTEG_FROM_LMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e04c",
+ "EventName": "PM_DPTEG_FROM_MEMORY",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4e04a",
+ "EventName": "PM_DPTEG_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1e048",
+ "EventName": "PM_DPTEG_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e046",
+ "EventName": "PM_DPTEG_FROM_RL2L3_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x1e04a",
+ "EventName": "PM_DPTEG_FROM_RL2L3_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2e04a",
+ "EventName": "PM_DPTEG_FROM_RL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a data side request",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x300fc",
+ "EventName": "PM_DTLB_MISS",
+ "BriefDescription": "Data PTEG reload",
+ "PublicDescription": "Data PTEG Reloaded (DTLB Miss)"
+ },
+ {,
+ "EventCode": "0x1c058",
+ "EventName": "PM_DTLB_MISS_16G",
+ "BriefDescription": "Data TLB Miss page size 16G",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x4c056",
+ "EventName": "PM_DTLB_MISS_16M",
+ "BriefDescription": "Data TLB Miss page size 16M",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x2c056",
+ "EventName": "PM_DTLB_MISS_4K",
+ "BriefDescription": "Data TLB Miss page size 4k",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x3c056",
+ "EventName": "PM_DTLB_MISS_64K",
+ "BriefDescription": "Data TLB Miss page size 64K",
+ "PublicDescription": ""
+ },
+ {,
+ "EventCode": "0x200f6",
+ "EventName": "PM_LSU_DERAT_MISS",
+ "BriefDescription": "DERAT Reloaded due to a DERAT miss",
+ "PublicDescription": "DERAT Reloaded (Miss)"
+ },
+ {,
+ "EventCode": "0x20066",
+ "EventName": "PM_TLB_MISS",
+ "BriefDescription": "TLB Miss (I + D)",
+ "PublicDescription": ""
+ },
+]
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/cache.json b/tools/perf/pmu-events/arch/powerpc/power9/cache.json
new file mode 100644
index 000000000..851072105
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/cache.json
@@ -0,0 +1,107 @@
+[
+ {,
+ "EventCode": "0x300F4",
+ "EventName": "PM_THRD_CONC_RUN_INST",
+ "BriefDescription": "PPC Instructions Finished by this thread when all threads in the core had the run-latch set"
+ },
+ {,
+ "EventCode": "0x1E056",
+ "EventName": "PM_CMPLU_STALL_FLUSH_ANY_THREAD",
+ "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because any of the 4 threads in the same core suffered a flush, which blocks completion"
+ },
+ {,
+ "EventCode": "0x4D016",
+ "EventName": "PM_CMPLU_STALL_FXLONG",
+ "BriefDescription": "Completion stall due to a long latency scalar fixed point instruction (division, square root)"
+ },
+ {,
+ "EventCode": "0x2D016",
+ "EventName": "PM_CMPLU_STALL_FXU",
+ "BriefDescription": "Finish stall due to a scalar fixed point or CR instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes"
+ },
+ {,
+ "EventCode": "0x4D12A",
+ "EventName": "PM_MRK_DATA_FROM_RL4_CYC",
+ "BriefDescription": "Duration in cycles to reload from another chip's L4 on the same Node or Group ( Remote) due to a marked load"
+ },
+ {,
+ "EventCode": "0x1003C",
+ "EventName": "PM_CMPLU_STALL_DMISS_L2L3",
+ "BriefDescription": "Completion stall by Dcache miss which resolved in L2/L3"
+ },
+ {,
+ "EventCode": "0x4C014",
+ "EventName": "PM_CMPLU_STALL_LMQ_FULL",
+ "BriefDescription": "Finish stall because the NTF instruction was a load that missed in the L1 and the LMQ was unable to accept this load miss request because it was full"
+ },
+ {,
+ "EventCode": "0x14048",
+ "EventName": "PM_INST_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x4D014",
+ "EventName": "PM_CMPLU_STALL_LOAD_FINISH",
+ "BriefDescription": "Finish stall because the NTF instruction was a load instruction with all its dependencies satisfied just going through the LSU pipe to finish"
+ },
+ {,
+ "EventCode": "0x2404A",
+ "EventName": "PM_INST_FROM_RL4",
+ "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x1404A",
+ "EventName": "PM_INST_FROM_RL2L3_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x401EA",
+ "EventName": "PM_THRESH_EXC_128",
+ "BriefDescription": "Threshold counter exceeded a value of 128"
+ },
+ {,
+ "EventCode": "0x400F6",
+ "EventName": "PM_BR_MPRED_CMPL",
+ "BriefDescription": "Number of Branch Mispredicts"
+ },
+ {,
+ "EventCode": "0x2F140",
+ "EventName": "PM_MRK_DPTEG_FROM_L2_MEPF",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x101E6",
+ "EventName": "PM_THRESH_EXC_4096",
+ "BriefDescription": "Threshold counter exceed a count of 4096"
+ },
+ {,
+ "EventCode": "0x3F14A",
+ "EventName": "PM_MRK_DPTEG_FROM_RMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x4C016",
+ "EventName": "PM_CMPLU_STALL_DMISS_L2L3_CONFLICT",
+ "BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 with a conflict"
+ },
+ {,
+ "EventCode": "0x2C01A",
+ "EventName": "PM_CMPLU_STALL_LHS",
+ "BriefDescription": "Finish stall because the NTF instruction was a load that hit on an older store and it was waiting for store data"
+ },
+ {,
+ "EventCode": "0x401E4",
+ "EventName": "PM_MRK_DTLB_MISS",
+ "BriefDescription": "Marked dtlb miss"
+ },
+ {,
+ "EventCode": "0x24046",
+ "EventName": "PM_INST_FROM_RL2L3_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x1002A",
+ "EventName": "PM_CMPLU_STALL_LARX",
+ "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/floating-point.json b/tools/perf/pmu-events/arch/powerpc/power9/floating-point.json
new file mode 100644
index 000000000..8a83bca26
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/floating-point.json
@@ -0,0 +1,32 @@
+[
+ {,
+ "EventCode": "0x1415A",
+ "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L2 with load hit store conflict due to a marked load"
+ },
+ {,
+ "EventCode": "0x10058",
+ "EventName": "PM_MEM_LOC_THRESH_IFU",
+ "BriefDescription": "Local Memory above threshold for IFU speculation control"
+ },
+ {,
+ "EventCode": "0x2D028",
+ "EventName": "PM_RADIX_PWC_L2_PDE_FROM_L2",
+ "BriefDescription": "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L2 data cache"
+ },
+ {,
+ "EventCode": "0x30012",
+ "EventName": "PM_FLUSH_COMPLETION",
+ "BriefDescription": "The instruction that was next to complete did not complete because it suffered a flush"
+ },
+ {,
+ "EventCode": "0x2D154",
+ "EventName": "PM_MRK_DERAT_MISS_64K",
+ "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 64K"
+ },
+ {,
+ "EventCode": "0x4016E",
+ "EventName": "PM_THRESH_NOT_MET",
+ "BriefDescription": "Threshold counter did not meet threshold"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
new file mode 100644
index 000000000..f9fa84b16
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json
@@ -0,0 +1,357 @@
+[
+ {,
+ "EventCode": "0x25044",
+ "EventName": "PM_IPTEG_FROM_L31_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x101E8",
+ "EventName": "PM_THRESH_EXC_256",
+ "BriefDescription": "Threshold counter exceed a count of 256"
+ },
+ {,
+ "EventCode": "0x4504E",
+ "EventName": "PM_IPTEG_FROM_L3MISS",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x1006A",
+ "EventName": "PM_NTC_ISSUE_HELD_DARQ_FULL",
+ "BriefDescription": "The NTC instruction is being held at dispatch because there are no slots in the DARQ for it"
+ },
+ {,
+ "EventCode": "0x4E016",
+ "EventName": "PM_CMPLU_STALL_LSAQ_ARB",
+ "BriefDescription": "Finish stall because the NTF instruction was a load or store that was held in LSAQ because an older instruction from SRQ or LRQ won arbitration to the LSU pipe when this instruction tried to launch"
+ },
+ {,
+ "EventCode": "0x1001A",
+ "EventName": "PM_LSU_SRQ_FULL_CYC",
+ "BriefDescription": "Cycles in which the Store Queue is full on all 4 slices. This is event is not per thread. All the threads will see the same count for this core resource"
+ },
+ {,
+ "EventCode": "0x1E15E",
+ "EventName": "PM_MRK_L2_TM_REQ_ABORT",
+ "BriefDescription": "TM abort"
+ },
+ {,
+ "EventCode": "0x34052",
+ "EventName": "PM_INST_SYS_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x20114",
+ "EventName": "PM_MRK_L2_RC_DISP",
+ "BriefDescription": "Marked Instruction RC dispatched in L2"
+ },
+ {,
+ "EventCode": "0x4C044",
+ "EventName": "PM_DATA_FROM_L31_ECO_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a demand load"
+ },
+ {,
+ "EventCode": "0x1C044",
+ "EventName": "PM_DATA_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a demand load"
+ },
+ {,
+ "EventCode": "0x44050",
+ "EventName": "PM_INST_SYS_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x30154",
+ "EventName": "PM_MRK_FAB_RSP_DCLAIM",
+ "BriefDescription": "Marked store had to do a dclaim"
+ },
+ {,
+ "EventCode": "0x30014",
+ "EventName": "PM_CMPLU_STALL_STORE_FIN_ARB",
+ "BriefDescription": "Finish stall because the NTF instruction was a store waiting for a slot in the store finish pipe. This means the instruction is ready to finish but there are instructions ahead of it, using the finish pipe"
+ },
+ {,
+ "EventCode": "0x3E054",
+ "EventName": "PM_LD_MISS_L1",
+ "BriefDescription": "Load Missed L1, counted at execution time (can be greater than loads finished). LMQ merges are not included in this count. i.e. if a load instruction misses on an address that is already allocated on the LMQ, this event will not increment for that load). Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
+ },
+ {,
+ "EventCode": "0x2E01A",
+ "EventName": "PM_CMPLU_STALL_LSU_FLUSH_NEXT",
+ "BriefDescription": "Completion stall of one cycle because the LSU requested to flush the next iop in the sequence. It takes 1 cycle for the ISU to process this request before the LSU instruction is allowed to complete"
+ },
+ {,
+ "EventCode": "0x2D01C",
+ "EventName": "PM_CMPLU_STALL_STCX",
+ "BriefDescription": "Finish stall because the NTF instruction was a stcx waiting for response from L2"
+ },
+ {,
+ "EventCode": "0x2C010",
+ "EventName": "PM_CMPLU_STALL_LSU",
+ "BriefDescription": "Completion stall by LSU instruction"
+ },
+ {,
+ "EventCode": "0x2C042",
+ "EventName": "PM_DATA_FROM_L3_MEPF",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state due to a demand load"
+ },
+ {,
+ "EventCode": "0x4E012",
+ "EventName": "PM_CMPLU_STALL_MTFPSCR",
+ "BriefDescription": "Completion stall because the ISU is updating the register and notifying the Effective Address Table (EAT)"
+ },
+ {,
+ "EventCode": "0x100F2",
+ "EventName": "PM_1PLUS_PPC_CMPL",
+ "BriefDescription": "1 or more ppc insts finished"
+ },
+ {,
+ "EventCode": "0x3001C",
+ "EventName": "PM_LSU_REJECT_LMQ_FULL",
+ "BriefDescription": "LSU Reject due to LMQ full (up to 4 per cycles)"
+ },
+ {,
+ "EventCode": "0x15046",
+ "EventName": "PM_IPTEG_FROM_L31_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x1015E",
+ "EventName": "PM_MRK_FAB_RSP_RD_T_INTV",
+ "BriefDescription": "Sampled Read got a T intervention"
+ },
+ {,
+ "EventCode": "0x101EC",
+ "EventName": "PM_THRESH_MET",
+ "BriefDescription": "threshold exceeded"
+ },
+ {,
+ "EventCode": "0x10020",
+ "EventName": "PM_PMC4_REWIND",
+ "BriefDescription": "PMC4 Rewind Event"
+ },
+ {,
+ "EventCode": "0x301EA",
+ "EventName": "PM_THRESH_EXC_1024",
+ "BriefDescription": "Threshold counter exceeded a value of 1024"
+ },
+ {,
+ "EventCode": "0x34056",
+ "EventName": "PM_CMPLU_STALL_LSU_MFSPR",
+ "BriefDescription": "Finish stall because the NTF instruction was a mfspr instruction targeting an LSU SPR and it was waiting for the register data to be returned"
+ },
+ {,
+ "EventCode": "0x44056",
+ "EventName": "PM_VECTOR_ST_CMPL",
+ "BriefDescription": "Number of vector store instructions completed"
+ },
+ {,
+ "EventCode": "0x2C124",
+ "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a marked load"
+ },
+ {,
+ "EventCode": "0x4C12A",
+ "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR_CYC",
+ "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x30060",
+ "EventName": "PM_TM_TRANS_RUN_INST",
+ "BriefDescription": "Run instructions completed in transactional state (gated by the run latch)"
+ },
+ {,
+ "EventCode": "0x2C014",
+ "EventName": "PM_CMPLU_STALL_STORE_FINISH",
+ "BriefDescription": "Finish stall because the NTF instruction was a store with all its dependencies met, just waiting to go through the LSU pipe to finish"
+ },
+ {,
+ "EventCode": "0x3515A",
+ "EventName": "PM_MRK_DATA_FROM_ON_CHIP_CACHE_CYC",
+ "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on the same chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x34050",
+ "EventName": "PM_INST_SYS_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x3015E",
+ "EventName": "PM_MRK_FAB_RSP_CLAIM_RTY",
+ "BriefDescription": "Sampled store did a rwitm and got a rty"
+ },
+ {,
+ "EventCode": "0x0",
+ "EventName": "PM_SUSPENDED",
+ "BriefDescription": "Counter OFF"
+ },
+ {,
+ "EventCode": "0x10010",
+ "EventName": "PM_PMC4_OVERFLOW",
+ "BriefDescription": "Overflow from counter 4"
+ },
+ {,
+ "EventCode": "0x3E04A",
+ "EventName": "PM_DPTEG_FROM_RMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x2F152",
+ "EventName": "PM_MRK_FAB_RSP_DCLAIM_CYC",
+ "BriefDescription": "cycles L2 RC took for a dclaim"
+ },
+ {,
+ "EventCode": "0x10004",
+ "EventName": "PM_CMPLU_STALL_LRQ_OTHER",
+ "BriefDescription": "Finish stall due to LRQ miscellaneous reasons, lost arbitration to LMQ slot, bank collisions, set prediction cleanup, set prediction multihit and others"
+ },
+ {,
+ "EventCode": "0x4F150",
+ "EventName": "PM_MRK_FAB_RSP_RWITM_CYC",
+ "BriefDescription": "cycles L2 RC took for a rwitm"
+ },
+ {,
+ "EventCode": "0x4E042",
+ "EventName": "PM_DPTEG_FROM_L3",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x1F054",
+ "EventName": "PM_TLB_HIT",
+ "BriefDescription": "Number of times the TLB had the data required by the instruction. Applies to both HPT and RPT"
+ },
+ {,
+ "EventCode": "0x2C01E",
+ "EventName": "PM_CMPLU_STALL_SYNC_PMU_INT",
+ "BriefDescription": "Cycles in which the NTC instruction is waiting for a synchronous PMU interrupt"
+ },
+ {,
+ "EventCode": "0x24050",
+ "EventName": "PM_IOPS_CMPL",
+ "BriefDescription": "Internal Operations completed"
+ },
+ {,
+ "EventCode": "0x1515C",
+ "EventName": "PM_SYNC_MRK_BR_MPRED",
+ "BriefDescription": "Marked Branch mispredict that can cause a synchronous interrupt"
+ },
+ {,
+ "EventCode": "0x300FA",
+ "EventName": "PM_INST_FROM_L3MISS",
+ "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet"
+ },
+ {,
+ "EventCode": "0x15044",
+ "EventName": "PM_IPTEG_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x15152",
+ "EventName": "PM_SYNC_MRK_BR_LINK",
+ "BriefDescription": "Marked Branch and link branch that can cause a synchronous interrupt"
+ },
+ {,
+ "EventCode": "0x1E050",
+ "EventName": "PM_CMPLU_STALL_TEND",
+ "BriefDescription": "Finish stall because the NTF instruction was a tend instruction awaiting response from L2"
+ },
+ {,
+ "EventCode": "0x1013E",
+ "EventName": "PM_MRK_LD_MISS_EXPOSED_CYC",
+ "BriefDescription": "Marked Load exposed Miss (use edge detect to count #)"
+ },
+ {,
+ "EventCode": "0x25042",
+ "EventName": "PM_IPTEG_FROM_L3_MEPF",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x14054",
+ "EventName": "PM_INST_PUMP_CPRED",
+ "BriefDescription": "Pump prediction correct. Counts across all types of pumps for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x4015E",
+ "EventName": "PM_MRK_FAB_RSP_RD_RTY",
+ "BriefDescription": "Sampled L2 reads retry count"
+ },
+ {,
+ "EventCode": "0x45048",
+ "EventName": "PM_IPTEG_FROM_DL2L3_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x44052",
+ "EventName": "PM_INST_PUMP_MPRED",
+ "BriefDescription": "Pump misprediction. Counts across all types of pumps for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x30026",
+ "EventName": "PM_CMPLU_STALL_STORE_DATA",
+ "BriefDescription": "Finish stall because the next to finish instruction was a store waiting on data"
+ },
+ {,
+ "EventCode": "0x301E6",
+ "EventName": "PM_MRK_DERAT_MISS",
+ "BriefDescription": "Erat Miss (TLB Access) All page sizes"
+ },
+ {,
+ "EventCode": "0x24154",
+ "EventName": "PM_THRESH_ACC",
+ "BriefDescription": "This event increments every time the threshold event counter ticks. Thresholding must be enabled (via MMCRA) and the thresholding start event must occur for this counter to increment. It will stop incrementing when the thresholding stop event occurs or when thresholding is disabled, until the next time a configured thresholding start event occurs."
+ },
+ {,
+ "EventCode": "0x2015E",
+ "EventName": "PM_MRK_FAB_RSP_RWITM_RTY",
+ "BriefDescription": "Sampled store did a rwitm and got a rty"
+ },
+ {,
+ "EventCode": "0x200FA",
+ "EventName": "PM_BR_TAKEN_CMPL",
+ "BriefDescription": "New event for Branch Taken"
+ },
+ {,
+ "EventCode": "0x35044",
+ "EventName": "PM_IPTEG_FROM_L31_ECO_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x4C010",
+ "EventName": "PM_CMPLU_STALL_STORE_PIPE_ARB",
+ "BriefDescription": "Finish stall because the NTF instruction was a store waiting for the next relaunch opportunity after an internal reject. This means the instruction is ready to relaunch and tried once but lost arbitration"
+ },
+ {,
+ "EventCode": "0x4C01C",
+ "EventName": "PM_CMPLU_STALL_ST_FWD",
+ "BriefDescription": "Completion stall due to store forward"
+ },
+ {,
+ "EventCode": "0x3515C",
+ "EventName": "PM_MRK_DATA_FROM_RL4",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a marked load"
+ },
+ {,
+ "EventCode": "0x2D14C",
+ "EventName": "PM_MRK_DATA_FROM_L31_ECO_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x40116",
+ "EventName": "PM_MRK_LARX_FIN",
+ "BriefDescription": "Larx finished"
+ },
+ {,
+ "EventCode": "0x1003A",
+ "EventName": "PM_CMPLU_STALL_LSU_FIN",
+ "BriefDescription": "Finish stall because the NTF instruction was an LSU op (other than a load or a store) with all its dependencies met and just going through the LSU pipe to finish"
+ },
+ {,
+ "EventCode": "0x3012A",
+ "EventName": "PM_MRK_L2_RC_DONE",
+ "BriefDescription": "Marked RC done"
+ },
+ {,
+ "EventCode": "0x45044",
+ "EventName": "PM_IPTEG_FROM_L31_ECO_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/marked.json b/tools/perf/pmu-events/arch/powerpc/power9/marked.json
new file mode 100644
index 000000000..b1954c38b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/marked.json
@@ -0,0 +1,627 @@
+[
+ {,
+ "EventCode": "0x3013E",
+ "EventName": "PM_MRK_STALL_CMPLU_CYC",
+ "BriefDescription": "Number of cycles the marked instruction is experiencing a stall while it is next to complete (NTC)"
+ },
+ {,
+ "EventCode": "0x4F056",
+ "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L3MISS",
+ "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from beyond the core's L3 data cache. The source could be local/remote/distant memory or another core's cache"
+ },
+ {,
+ "EventCode": "0x24158",
+ "EventName": "PM_MRK_INST",
+ "BriefDescription": "An instruction was marked. Includes both Random Instruction Sampling (RIS) at decode time and Random Event Sampling (RES) at the time the configured event happens"
+ },
+ {,
+ "EventCode": "0x1E046",
+ "EventName": "PM_DPTEG_FROM_L31_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x3C04A",
+ "EventName": "PM_DATA_FROM_RMEM",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a demand load"
+ },
+ {,
+ "EventCode": "0x2C01C",
+ "EventName": "PM_CMPLU_STALL_DMISS_REMOTE",
+ "BriefDescription": "Completion stall by Dcache miss which resolved from remote chip (cache or memory)"
+ },
+ {,
+ "EventCode": "0x44040",
+ "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_OTHER",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with dispatch conflict due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x2E050",
+ "EventName": "PM_DARQ0_7_9_ENTRIES",
+ "BriefDescription": "Cycles in which 7,8, or 9 DARQ entries (out of 12) are in use"
+ },
+ {,
+ "EventCode": "0x2D02E",
+ "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L2",
+ "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L2 data cache. This implies that a level 4 PWC access was not necessary for this translation"
+ },
+ {,
+ "EventCode": "0x3F05E",
+ "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L3",
+ "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from the core's L3 data cache. This implies that a level 4 PWC access was not necessary for this translation"
+ },
+ {,
+ "EventCode": "0x2E01E",
+ "EventName": "PM_CMPLU_STALL_NTC_FLUSH",
+ "BriefDescription": "Completion stall due to ntc flush"
+ },
+ {,
+ "EventCode": "0x1F14C",
+ "EventName": "PM_MRK_DPTEG_FROM_LL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x20130",
+ "EventName": "PM_MRK_INST_DECODED",
+ "BriefDescription": "An instruction was marked at decode time. Random Instruction Sampling (RIS) only"
+ },
+ {,
+ "EventCode": "0x3F144",
+ "EventName": "PM_MRK_DPTEG_FROM_L31_ECO_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x4D058",
+ "EventName": "PM_VECTOR_FLOP_CMPL",
+ "BriefDescription": "Vector FP instruction completed"
+ },
+ {,
+ "EventCode": "0x14040",
+ "EventName": "PM_INST_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 without conflict due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x4404E",
+ "EventName": "PM_INST_FROM_L3MISS_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L3 due to a instruction fetch"
+ },
+ {,
+ "EventCode": "0x3003A",
+ "EventName": "PM_CMPLU_STALL_EXCEPTION",
+ "BriefDescription": "Cycles in which the NTC instruction is not allowed to complete because it was interrupted by ANY exception, which has to be serviced before the instruction can complete"
+ },
+ {,
+ "EventCode": "0x4F144",
+ "EventName": "PM_MRK_DPTEG_FROM_L31_ECO_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x3E044",
+ "EventName": "PM_DPTEG_FROM_L31_ECO_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's ECO L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x300F6",
+ "EventName": "PM_L1_DCACHE_RELOAD_VALID",
+ "BriefDescription": "DL1 reloaded due to Demand Load"
+ },
+ {,
+ "EventCode": "0x1415E",
+ "EventName": "PM_MRK_DATA_FROM_L3MISS_CYC",
+ "BriefDescription": "Duration in cycles to reload from a location other than the local core's L3 due to a marked load"
+ },
+ {,
+ "EventCode": "0x1E052",
+ "EventName": "PM_CMPLU_STALL_SLB",
+ "BriefDescription": "Finish stall because the NTF instruction was awaiting L2 response for an SLB"
+ },
+ {,
+ "EventCode": "0x4404C",
+ "EventName": "PM_INST_FROM_DMEM",
+ "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group (Distant) due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x3000E",
+ "EventName": "PM_FXU_1PLUS_BUSY",
+ "BriefDescription": "At least one of the 4 FXU units is busy"
+ },
+ {,
+ "EventCode": "0x2C048",
+ "EventName": "PM_DATA_FROM_LMEM",
+ "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a demand load"
+ },
+ {,
+ "EventCode": "0x3000A",
+ "EventName": "PM_CMPLU_STALL_PM",
+ "BriefDescription": "Finish stall because the NTF instruction was issued to the Permute execution pipe and waiting to finish. Includes permute and decimal fixed point instructions (128 bit BCD arithmetic) + a few 128 bit fixpoint add/subtract instructions with carry. Not qualified by vector or multicycle"
+ },
+ {,
+ "EventCode": "0x1504E",
+ "EventName": "PM_IPTEG_FROM_L2MISS",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x1C052",
+ "EventName": "PM_DATA_GRP_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for a demand load"
+ },
+ {,
+ "EventCode": "0x30008",
+ "EventName": "PM_DISP_STARVED",
+ "BriefDescription": "Dispatched Starved"
+ },
+ {,
+ "EventCode": "0x14042",
+ "EventName": "PM_INST_FROM_L2",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x4000C",
+ "EventName": "PM_FREQ_UP",
+ "BriefDescription": "Power Management: Above Threshold A"
+ },
+ {,
+ "EventCode": "0x3C050",
+ "EventName": "PM_DATA_SYS_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was system pump (prediction=correct) for a demand load"
+ },
+ {,
+ "EventCode": "0x25040",
+ "EventName": "PM_IPTEG_FROM_L2_MEPF",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x10132",
+ "EventName": "PM_MRK_INST_ISSUED",
+ "BriefDescription": "Marked instruction issued"
+ },
+ {,
+ "EventCode": "0x1C046",
+ "EventName": "PM_DATA_FROM_L31_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a demand load"
+ },
+ {,
+ "EventCode": "0x2C044",
+ "EventName": "PM_DATA_FROM_L31_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a demand load"
+ },
+ {,
+ "EventCode": "0x2C04A",
+ "EventName": "PM_DATA_FROM_RL4",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on the same Node or Group ( Remote) due to a demand load"
+ },
+ {,
+ "EventCode": "0x24044",
+ "EventName": "PM_INST_FROM_L31_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x4C050",
+ "EventName": "PM_DATA_SYS_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for a demand load"
+ },
+ {,
+ "EventCode": "0x2C052",
+ "EventName": "PM_DATA_GRP_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for a demand load"
+ },
+ {,
+ "EventCode": "0x2F148",
+ "EventName": "PM_MRK_DPTEG_FROM_LMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x4D01A",
+ "EventName": "PM_CMPLU_STALL_EIEIO",
+ "BriefDescription": "Finish stall because the NTF instruction is an EIEIO waiting for response from L2"
+ },
+ {,
+ "EventCode": "0x4F14E",
+ "EventName": "PM_MRK_DPTEG_FROM_L3MISS",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x4F05A",
+ "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L3",
+ "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L3 data cache. This is the deepest level of PWC possible for a translation"
+ },
+ {,
+ "EventCode": "0x1F05A",
+ "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L2",
+ "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from the core's L2 data cache. This is the deepest level of PWC possible for a translation"
+ },
+ {,
+ "EventCode": "0x30068",
+ "EventName": "PM_L1_ICACHE_RELOADED_PREF",
+ "BriefDescription": "Counts all Icache prefetch reloads ( includes demand turned into prefetch)"
+ },
+ {,
+ "EventCode": "0x4C04A",
+ "EventName": "PM_DATA_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a demand load"
+ },
+ {,
+ "EventCode": "0x400FE",
+ "EventName": "PM_DATA_FROM_MEMORY",
+ "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a demand load"
+ },
+ {,
+ "EventCode": "0x3F058",
+ "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L3",
+ "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L3 data cache"
+ },
+ {,
+ "EventCode": "0x3C052",
+ "EventName": "PM_DATA_SYS_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load"
+ },
+ {,
+ "EventCode": "0x4D142",
+ "EventName": "PM_MRK_DATA_FROM_L3",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a marked load"
+ },
+ {,
+ "EventCode": "0x30050",
+ "EventName": "PM_SYS_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was system pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x30028",
+ "EventName": "PM_CMPLU_STALL_SPEC_FINISH",
+ "BriefDescription": "Finish stall while waiting for the non-speculative finish of either a stcx waiting for its result or a load waiting for non-critical sectors of data and ECC"
+ },
+ {,
+ "EventCode": "0x400F4",
+ "EventName": "PM_RUN_PURR",
+ "BriefDescription": "Run_PURR"
+ },
+ {,
+ "EventCode": "0x3404C",
+ "EventName": "PM_INST_FROM_DL4",
+ "BriefDescription": "The processor's Instruction cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x3D05A",
+ "EventName": "PM_NTC_ISSUE_HELD_OTHER",
+ "BriefDescription": "The NTC instruction is being held at dispatch during regular pipeline cycles, or because the VSU is busy with multi-cycle instructions, or because of a write-back collision with VSU"
+ },
+ {,
+ "EventCode": "0x2E048",
+ "EventName": "PM_DPTEG_FROM_LMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x2D02A",
+ "EventName": "PM_RADIX_PWC_L3_PDE_FROM_L2",
+ "BriefDescription": "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L2 data cache"
+ },
+ {,
+ "EventCode": "0x1F05C",
+ "EventName": "PM_RADIX_PWC_L3_PDE_FROM_L3",
+ "BriefDescription": "A Page Directory Entry was reloaded to a level 3 page walk cache from the core's L3 data cache"
+ },
+ {,
+ "EventCode": "0x4D04A",
+ "EventName": "PM_DARQ0_0_3_ENTRIES",
+ "BriefDescription": "Cycles in which 3 or less DARQ entries (out of 12) are in use"
+ },
+ {,
+ "EventCode": "0x1404C",
+ "EventName": "PM_INST_FROM_LL4",
+ "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's L4 cache due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x200FD",
+ "EventName": "PM_L1_ICACHE_MISS",
+ "BriefDescription": "Demand iCache Miss"
+ },
+ {,
+ "EventCode": "0x34040",
+ "EventName": "PM_INST_FROM_L2_DISP_CONFLICT_LDHITST",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 with load hit store conflict due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x20138",
+ "EventName": "PM_MRK_ST_NEST",
+ "BriefDescription": "Marked store sent to nest"
+ },
+ {,
+ "EventCode": "0x44048",
+ "EventName": "PM_INST_FROM_DL2L3_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x35046",
+ "EventName": "PM_IPTEG_FROM_L21_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x4C04E",
+ "EventName": "PM_DATA_FROM_L3MISS_MOD",
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a demand load"
+ },
+ {,
+ "EventCode": "0x401E0",
+ "EventName": "PM_MRK_INST_CMPL",
+ "BriefDescription": "marked instruction completed"
+ },
+ {,
+ "EventCode": "0x2C128",
+ "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR_CYC",
+ "BriefDescription": "Duration in cycles to reload with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x34044",
+ "EventName": "PM_INST_FROM_L31_ECO_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x4E018",
+ "EventName": "PM_CMPLU_STALL_NTC_DISP_FIN",
+ "BriefDescription": "Finish stall because the NTF instruction was one that must finish at dispatch."
+ },
+ {,
+ "EventCode": "0x2E05E",
+ "EventName": "PM_LMQ_EMPTY_CYC",
+ "BriefDescription": "Cycles in which the LMQ has no pending load misses for this thread"
+ },
+ {,
+ "EventCode": "0x4C122",
+ "EventName": "PM_DARQ1_0_3_ENTRIES",
+ "BriefDescription": "Cycles in which 3 or fewer DARQ1 entries (out of 12) are in use"
+ },
+ {,
+ "EventCode": "0x4F058",
+ "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L3",
+ "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L3 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation"
+ },
+ {,
+ "EventCode": "0x14046",
+ "EventName": "PM_INST_FROM_L31_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L3 on the same chip due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x3012C",
+ "EventName": "PM_MRK_ST_FWD",
+ "BriefDescription": "Marked st forwards"
+ },
+ {,
+ "EventCode": "0x101E0",
+ "EventName": "PM_MRK_INST_DISP",
+ "BriefDescription": "The thread has dispatched a randomly sampled marked instruction"
+ },
+ {,
+ "EventCode": "0x1D058",
+ "EventName": "PM_DARQ0_10_12_ENTRIES",
+ "BriefDescription": "Cycles in which 10 or more DARQ entries (out of 12) are in use"
+ },
+ {,
+ "EventCode": "0x300FE",
+ "EventName": "PM_DATA_FROM_L3MISS",
+ "BriefDescription": "Demand LD - L3 Miss (not L2 hit and not L3 hit)"
+ },
+ {,
+ "EventCode": "0x30006",
+ "EventName": "PM_CMPLU_STALL_OTHER_CMPL",
+ "BriefDescription": "Instructions the core completed while this tread was stalled"
+ },
+ {,
+ "EventCode": "0x1005C",
+ "EventName": "PM_CMPLU_STALL_DP",
+ "BriefDescription": "Finish stall because the NTF instruction was a scalar instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Not qualified multicycle. Qualified by NOT vector"
+ },
+ {,
+ "EventCode": "0x1E042",
+ "EventName": "PM_DPTEG_FROM_L2",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x1016E",
+ "EventName": "PM_MRK_BR_CMPL",
+ "BriefDescription": "Branch Instruction completed"
+ },
+ {,
+ "EventCode": "0x2013A",
+ "EventName": "PM_MRK_BRU_FIN",
+ "BriefDescription": "bru marked instr finish"
+ },
+ {,
+ "EventCode": "0x4F05E",
+ "EventName": "PM_RADIX_PWC_L3_PTE_FROM_L3MISS",
+ "BriefDescription": "A Page Table Entry was reloaded to a level 3 page walk cache from beyond the core's L3 data cache. This implies that a level 4 PWC access was not necessary for this translation. The source could be local/remote/distant memory or another core's cache"
+ },
+ {,
+ "EventCode": "0x400FC",
+ "EventName": "PM_ITLB_MISS",
+ "BriefDescription": "ITLB Reloaded. Counts 1 per ITLB miss for HPT but multiple for radix depending on number of levels traveresed"
+ },
+ {,
+ "EventCode": "0x1E044",
+ "EventName": "PM_DPTEG_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x4D05A",
+ "EventName": "PM_NON_MATH_FLOP_CMPL",
+ "BriefDescription": "Non FLOP operation completed"
+ },
+ {,
+ "EventCode": "0x101E2",
+ "EventName": "PM_MRK_BR_TAKEN_CMPL",
+ "BriefDescription": "Marked Branch Taken completed"
+ },
+ {,
+ "EventCode": "0x3E158",
+ "EventName": "PM_MRK_STCX_FAIL",
+ "BriefDescription": "marked stcx failed"
+ },
+ {,
+ "EventCode": "0x1C048",
+ "EventName": "PM_DATA_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a demand load"
+ },
+ {,
+ "EventCode": "0x1C054",
+ "EventName": "PM_DATA_PUMP_CPRED",
+ "BriefDescription": "Pump prediction correct. Counts across all types of pumps for a demand load"
+ },
+ {,
+ "EventCode": "0x4405E",
+ "EventName": "PM_DARQ_STORE_REJECT",
+ "BriefDescription": "The DARQ attempted to transmit a store into an LSAQ or SRQ entry but It was rejected. Divide by PM_DARQ_STORE_XMIT to get reject ratio"
+ },
+ {,
+ "EventCode": "0x1C042",
+ "EventName": "PM_DATA_FROM_L2",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a demand load"
+ },
+ {,
+ "EventCode": "0x1D14C",
+ "EventName": "PM_MRK_DATA_FROM_LL4",
+ "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a marked load"
+ },
+ {,
+ "EventCode": "0x1006C",
+ "EventName": "PM_RUN_CYC_ST_MODE",
+ "BriefDescription": "Cycles run latch is set and core is in ST mode"
+ },
+ {,
+ "EventCode": "0x3C044",
+ "EventName": "PM_DATA_FROM_L31_ECO_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's ECO L3 on the same chip due to a demand load"
+ },
+ {,
+ "EventCode": "0x4C052",
+ "EventName": "PM_DATA_PUMP_MPRED",
+ "BriefDescription": "Pump misprediction. Counts across all types of pumps for a demand load"
+ },
+ {,
+ "EventCode": "0x20050",
+ "EventName": "PM_GRP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope and data sourced across this scope was group pump for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x1F150",
+ "EventName": "PM_MRK_ST_L2DISP_TO_CMPL_CYC",
+ "BriefDescription": "cycles from L2 rc disp to l2 rc completion"
+ },
+ {,
+ "EventCode": "0x4505A",
+ "EventName": "PM_SP_FLOP_CMPL",
+ "BriefDescription": "SP instruction completed"
+ },
+ {,
+ "EventCode": "0x4000A",
+ "EventName": "PM_ISQ_36_44_ENTRIES",
+ "BriefDescription": "Cycles in which 36 or more Issue Queue entries are in use. This is a shared event, not per thread. There are 44 issue queue entries across 4 slices in the whole core"
+ },
+ {,
+ "EventCode": "0x2C12E",
+ "EventName": "PM_MRK_DATA_FROM_LL4_CYC",
+ "BriefDescription": "Duration in cycles to reload from the local chip's L4 cache due to a marked load"
+ },
+ {,
+ "EventCode": "0x2C058",
+ "EventName": "PM_MEM_PREF",
+ "BriefDescription": "Memory prefetch for this thread. Includes L4"
+ },
+ {,
+ "EventCode": "0x40012",
+ "EventName": "PM_L1_ICACHE_RELOADED_ALL",
+ "BriefDescription": "Counts all Icache reloads includes demand, prefetch, prefetch turned into demand and demand turned into prefetch"
+ },
+ {,
+ "EventCode": "0x3003C",
+ "EventName": "PM_CMPLU_STALL_NESTED_TEND",
+ "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tend and decrement the TEXASR nested level. This is a short delay"
+ },
+ {,
+ "EventCode": "0x3D05C",
+ "EventName": "PM_DISP_HELD_HB_FULL",
+ "BriefDescription": "Dispatch held due to History Buffer full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)"
+ },
+ {,
+ "EventCode": "0x30052",
+ "EventName": "PM_SYS_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x2E044",
+ "EventName": "PM_DPTEG_FROM_L31_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x34048",
+ "EventName": "PM_INST_FROM_DL2L3_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x45042",
+ "EventName": "PM_IPTEG_FROM_L3",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x15042",
+ "EventName": "PM_IPTEG_FROM_L2",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x1C05E",
+ "EventName": "PM_MEM_LOC_THRESH_LSU_MED",
+ "BriefDescription": "Local memory above threshold for data prefetch"
+ },
+ {,
+ "EventCode": "0x40134",
+ "EventName": "PM_MRK_INST_TIMEO",
+ "BriefDescription": "marked Instruction finish timeout (instruction lost)"
+ },
+ {,
+ "EventCode": "0x1002C",
+ "EventName": "PM_L1_DCACHE_RELOADED_ALL",
+ "BriefDescription": "L1 data cache reloaded for demand. If MMCR1[16] is 1, prefetches will be included as well"
+ },
+ {,
+ "EventCode": "0x30130",
+ "EventName": "PM_MRK_INST_FIN",
+ "BriefDescription": "marked instruction finished"
+ },
+ {,
+ "EventCode": "0x1F14A",
+ "EventName": "PM_MRK_DPTEG_FROM_RL2L3_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x3504E",
+ "EventName": "PM_DARQ0_4_6_ENTRIES",
+ "BriefDescription": "Cycles in which 4, 5, or 6 DARQ entries (out of 12) are in use"
+ },
+ {,
+ "EventCode": "0x30064",
+ "EventName": "PM_DARQ_STORE_XMIT",
+ "BriefDescription": "The DARQ attempted to transmit a store into an LSAQ or SRQ entry. Includes rejects. Not qualified by thread, so it includes counts for the whole core"
+ },
+ {,
+ "EventCode": "0x45046",
+ "EventName": "PM_IPTEG_FROM_L21_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x2C016",
+ "EventName": "PM_CMPLU_STALL_PASTE",
+ "BriefDescription": "Finish stall because the NTF instruction was a paste waiting for response from L2"
+ },
+ {,
+ "EventCode": "0x24156",
+ "EventName": "PM_MRK_STCX_FIN",
+ "BriefDescription": "Number of marked stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed"
+ },
+ {,
+ "EventCode": "0x15150",
+ "EventName": "PM_SYNC_MRK_PROBE_NOP",
+ "BriefDescription": "Marked probeNops which can cause synchronous interrupts"
+ },
+ {,
+ "EventCode": "0x301E4",
+ "EventName": "PM_MRK_BR_MPRED_CMPL",
+ "BriefDescription": "Marked Branch Mispredicted"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/memory.json b/tools/perf/pmu-events/arch/powerpc/power9/memory.json
new file mode 100644
index 000000000..2e2ebc700
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/memory.json
@@ -0,0 +1,127 @@
+[
+ {,
+ "EventCode": "0x3006E",
+ "EventName": "PM_NEST_REF_CLK",
+ "BriefDescription": "Multiply by 4 to obtain the number of PB cycles"
+ },
+ {,
+ "EventCode": "0x20010",
+ "EventName": "PM_PMC1_OVERFLOW",
+ "BriefDescription": "Overflow from counter 1"
+ },
+ {,
+ "EventCode": "0x2005A",
+ "EventName": "PM_DARQ1_7_9_ENTRIES",
+ "BriefDescription": "Cycles in which 7 to 9 DARQ1 entries (out of 12) are in use"
+ },
+ {,
+ "EventCode": "0x3C048",
+ "EventName": "PM_DATA_FROM_DL2L3_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load"
+ },
+ {,
+ "EventCode": "0x10008",
+ "EventName": "PM_RUN_SPURR",
+ "BriefDescription": "Run SPURR"
+ },
+ {,
+ "EventCode": "0x200F6",
+ "EventName": "PM_LSU_DERAT_MISS",
+ "BriefDescription": "DERAT Reloaded due to a DERAT miss"
+ },
+ {,
+ "EventCode": "0x4C048",
+ "EventName": "PM_DATA_FROM_DL2L3_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a demand load"
+ },
+ {,
+ "EventCode": "0x1D15E",
+ "EventName": "PM_MRK_RUN_CYC",
+ "BriefDescription": "Run cycles in which a marked instruction is in the pipeline"
+ },
+ {,
+ "EventCode": "0x4003E",
+ "EventName": "PM_LD_CMPL",
+ "BriefDescription": "count of Loads completed"
+ },
+ {,
+ "EventCode": "0x4C042",
+ "EventName": "PM_DATA_FROM_L3",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 due to a demand load"
+ },
+ {,
+ "EventCode": "0x4D02C",
+ "EventName": "PM_PMC1_REWIND",
+ "BriefDescription": ""
+ },
+ {,
+ "EventCode": "0x15158",
+ "EventName": "PM_SYNC_MRK_L2HIT",
+ "BriefDescription": "Marked L2 Hits that can throw a synchronous interrupt"
+ },
+ {,
+ "EventCode": "0x3404A",
+ "EventName": "PM_INST_FROM_RMEM",
+ "BriefDescription": "The processor's Instruction cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x301E2",
+ "EventName": "PM_MRK_ST_CMPL",
+ "BriefDescription": "Marked store completed and sent to nest"
+ },
+ {,
+ "EventCode": "0x1C050",
+ "EventName": "PM_DATA_CHIP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for a demand load"
+ },
+ {,
+ "EventCode": "0x4C040",
+ "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_OTHER",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 with dispatch conflict due to a demand load"
+ },
+ {,
+ "EventCode": "0x2E05C",
+ "EventName": "PM_LSU_REJECT_ERAT_MISS",
+ "BriefDescription": "LSU Reject due to ERAT (up to 4 per cycles)"
+ },
+ {,
+ "EventCode": "0x1000A",
+ "EventName": "PM_PMC3_REWIND",
+ "BriefDescription": "PMC3 rewind event. A rewind happens when a speculative event (such as latency or CPI stack) is selected on PMC3 and the stall reason or reload source did not match the one programmed in PMC3. When this occurs, the count in PMC3 will not change."
+ },
+ {,
+ "EventCode": "0x3C058",
+ "EventName": "PM_LARX_FIN",
+ "BriefDescription": "Larx finished"
+ },
+ {,
+ "EventCode": "0x1C040",
+ "EventName": "PM_DATA_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a demand load"
+ },
+ {,
+ "EventCode": "0x2C040",
+ "EventName": "PM_DATA_FROM_L2_MEPF",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state due to a demand load"
+ },
+ {,
+ "EventCode": "0x2E05A",
+ "EventName": "PM_LRQ_REJECT",
+ "BriefDescription": "Internal LSU reject from LRQ. Rejects cause the load to go back to LRQ, but it stays contained within the LSU once it gets issued. This event counts the number of times the LRQ attempts to relaunch an instruction after a reject. Any load can suffer multiple rejects"
+ },
+ {,
+ "EventCode": "0x2C05C",
+ "EventName": "PM_INST_GRP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for an instruction fetch (demand only)"
+ },
+ {,
+ "EventCode": "0x4D056",
+ "EventName": "PM_NON_FMA_FLOP_CMPL",
+ "BriefDescription": "Non FMA instruction completed"
+ },
+ {,
+ "EventCode": "0x3E050",
+ "EventName": "PM_DARQ1_4_6_ENTRIES",
+ "BriefDescription": "Cycles in which 4, 5, or 6 DARQ1 entries (out of 12) are in use"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json
new file mode 100644
index 000000000..48cf4f920
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json
@@ -0,0 +1,2337 @@
+[
+ {,
+ "EventCode": "0x3084",
+ "EventName": "PM_ISU1_ISS_HOLD_ALL",
+ "BriefDescription": "All ISU rejects"
+ },
+ {,
+ "EventCode": "0xF880",
+ "EventName": "PM_SNOOP_TLBIE",
+ "BriefDescription": "TLBIE snoop"
+ },
+ {,
+ "EventCode": "0x4088",
+ "EventName": "PM_IC_DEMAND_REQ",
+ "BriefDescription": "Demand Instruction fetch request"
+ },
+ {,
+ "EventCode": "0x20A4",
+ "EventName": "PM_TM_TRESUME",
+ "BriefDescription": "TM resume instruction completed"
+ },
+ {,
+ "EventCode": "0x40008",
+ "EventName": "PM_SRQ_EMPTY_CYC",
+ "BriefDescription": "Cycles in which the SRQ has at least one (out of four) empty slice"
+ },
+ {,
+ "EventCode": "0x20064",
+ "EventName": "PM_IERAT_RELOAD_4K",
+ "BriefDescription": "IERAT reloaded (after a miss) for 4K pages"
+ },
+ {,
+ "EventCode": "0x260B4",
+ "EventName": "PM_L3_P2_LCO_RTY",
+ "BriefDescription": "L3 initiated LCO received retry on port 2 (can try 4 times)"
+ },
+ {,
+ "EventCode": "0x20006",
+ "EventName": "PM_DISP_HELD_ISSQ_FULL",
+ "BriefDescription": "Dispatch held due to Issue q full. Includes issue queue and branch queue"
+ },
+ {,
+ "EventCode": "0x201E4",
+ "EventName": "PM_MRK_DATA_FROM_L3MISS",
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L3 due to a marked load"
+ },
+ {,
+ "EventCode": "0x4E044",
+ "EventName": "PM_DPTEG_FROM_L31_ECO_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x40B8",
+ "EventName": "PM_BR_MPRED_TAKEN_CR",
+ "BriefDescription": "A Conditional Branch that resolved to taken was mispredicted as not taken (due to the BHT Direction Prediction)."
+ },
+ {,
+ "EventCode": "0xF8AC",
+ "EventName": "PM_DC_DEALLOC_NO_CONF",
+ "BriefDescription": "A demand load referenced a line in an active fuzzy prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software.Fuzzy stream confirm (out of order effects, or pf cant keep up)"
+ },
+ {,
+ "EventCode": "0xD090",
+ "EventName": "PM_LS0_DC_COLLISIONS",
+ "BriefDescription": "Read-write data cache collisions"
+ },
+ {,
+ "EventCode": "0x40BC",
+ "EventName": "PM_THRD_PRIO_0_1_CYC",
+ "BriefDescription": "Cycles thread running at priority level 0 or 1"
+ },
+ {,
+ "EventCode": "0x4C054",
+ "EventName": "PM_DERAT_MISS_16G_1G",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16G (hpt mode) or 1G (radix mode)"
+ },
+ {,
+ "EventCode": "0x2084",
+ "EventName": "PM_FLUSH_HB_RESTORE_CYC",
+ "BriefDescription": "Cycles in which no new instructions can be dispatched to the ICT after a flush. History buffer recovery"
+ },
+ {,
+ "EventCode": "0x4F054",
+ "EventName": "PM_RADIX_PWC_MISS",
+ "BriefDescription": "A radix translation attempt missed in the TLB and all levels of page walk cache."
+ },
+ {,
+ "EventCode": "0x26882",
+ "EventName": "PM_L2_DC_INV",
+ "BriefDescription": "D-cache invalidates sent over the reload bus to the core"
+ },
+ {,
+ "EventCode": "0x24048",
+ "EventName": "PM_INST_FROM_LMEM",
+ "BriefDescription": "The processor's Instruction cache was reloaded from the local chip's Memory due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0xD8B4",
+ "EventName": "PM_LSU0_LRQ_S0_VALID_CYC",
+ "BriefDescription": "Slot 0 of LRQ valid"
+ },
+ {,
+ "EventCode": "0x2E052",
+ "EventName": "PM_TM_PASSED",
+ "BriefDescription": "Number of TM transactions that passed"
+ },
+ {,
+ "EventCode": "0xF088",
+ "EventName": "PM_LSU0_STORE_REJECT",
+ "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met"
+ },
+ {,
+ "EventCode": "0x360B2",
+ "EventName": "PM_L3_GRP_GUESS_WRONG_LOW",
+ "BriefDescription": "Prefetch scope predictor selected GS or NNS, but was wrong because scope was LNS"
+ },
+ {,
+ "EventCode": "0x168A6",
+ "EventName": "PM_TM_CAM_OVERFLOW",
+ "BriefDescription": "L3 TM CAM is full when a L2 castout of TM_SC line occurs. Line is pushed to memory"
+ },
+ {,
+ "EventCode": "0xE8B0",
+ "EventName": "PM_TEND_PEND_CYC",
+ "BriefDescription": "TEND latency per thread"
+ },
+ {,
+ "EventCode": "0x4884",
+ "EventName": "PM_IBUF_FULL_CYC",
+ "BriefDescription": "Cycles No room in ibuff"
+ },
+ {,
+ "EventCode": "0xD08C",
+ "EventName": "PM_LSU2_LDMX_FIN",
+ "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])."
+ },
+ {,
+ "EventCode": "0x300F8",
+ "EventName": "PM_TB_BIT_TRANS",
+ "BriefDescription": "timebase event"
+ },
+ {,
+ "EventCode": "0x3C040",
+ "EventName": "PM_DATA_FROM_L2_DISP_CONFLICT_LDHITST",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a demand load"
+ },
+ {,
+ "EventCode": "0xE0BC",
+ "EventName": "PM_LS0_PTE_TABLEWALK_CYC",
+ "BriefDescription": "Cycles when a tablewalk is pending on this thread on table 0"
+ },
+ {,
+ "EventCode": "0x3884",
+ "EventName": "PM_ISU3_ISS_HOLD_ALL",
+ "BriefDescription": "All ISU rejects"
+ },
+ {,
+ "EventCode": "0x468A0",
+ "EventName": "PM_L3_PF_OFF_CHIP_MEM",
+ "BriefDescription": "L3 PF from Off chip memory"
+ },
+ {,
+ "EventCode": "0x268AA",
+ "EventName": "PM_L3_P1_LCO_DATA",
+ "BriefDescription": "LCO sent with data port 1"
+ },
+ {,
+ "EventCode": "0xE894",
+ "EventName": "PM_LSU1_TM_L1_HIT",
+ "BriefDescription": "Load tm hit in L1"
+ },
+ {,
+ "EventCode": "0x5888",
+ "EventName": "PM_IC_INVALIDATE",
+ "BriefDescription": "Ic line invalidated"
+ },
+ {,
+ "EventCode": "0x2890",
+ "EventName": "PM_DISP_CLB_HELD_TLBIE",
+ "BriefDescription": "Dispatch Hold: Due to TLBIE"
+ },
+ {,
+ "EventCode": "0x1001C",
+ "EventName": "PM_CMPLU_STALL_THRD",
+ "BriefDescription": "Completion Stalled because the thread was blocked"
+ },
+ {,
+ "EventCode": "0x368A6",
+ "EventName": "PM_SNP_TM_HIT_T",
+ "BriefDescription": "TM snoop that is a store hits line in L3 in T, Tn or Te state (shared modified)"
+ },
+ {,
+ "EventCode": "0x3001A",
+ "EventName": "PM_DATA_TABLEWALK_CYC",
+ "BriefDescription": "Data Tablewalk Cycles. Could be 1 or 2 active tablewalks. Includes data prefetches."
+ },
+ {,
+ "EventCode": "0xD894",
+ "EventName": "PM_LS3_DC_COLLISIONS",
+ "BriefDescription": "Read-write data cache collisions"
+ },
+ {,
+ "EventCode": "0x35158",
+ "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD_CYC",
+ "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load"
+ },
+ {,
+ "EventCode": "0xF0B4",
+ "EventName": "PM_DC_PREF_CONS_ALLOC",
+ "BriefDescription": "Prefetch stream allocated in the conservative phase by either the hardware prefetch mechanism or software prefetch. The sum of this pair subtracted from the total number of allocs will give the total allocs in normal phase"
+ },
+ {,
+ "EventCode": "0xF894",
+ "EventName": "PM_LSU3_L1_CAM_CANCEL",
+ "BriefDescription": "ls3 l1 tm cam cancel"
+ },
+ {,
+ "EventCode": "0x2888",
+ "EventName": "PM_FLUSH_DISP_TLBIE",
+ "BriefDescription": "Dispatch Flush: TLBIE"
+ },
+ {,
+ "EventCode": "0x4E11E",
+ "EventName": "PM_MRK_DATA_FROM_DMEM_CYC",
+ "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load"
+ },
+ {,
+ "EventCode": "0x14156",
+ "EventName": "PM_MRK_DATA_FROM_L2_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L2 due to a marked load"
+ },
+ {,
+ "EventCode": "0x468A6",
+ "EventName": "PM_RD_CLEARING_SC",
+ "BriefDescription": "Core TM load hits line in L3 in TM_SC state and causes it to be invalidated"
+ },
+ {,
+ "EventCode": "0xD0B0",
+ "EventName": "PM_HWSYNC",
+ "BriefDescription": ""
+ },
+ {,
+ "EventCode": "0x168B0",
+ "EventName": "PM_L3_P1_NODE_PUMP",
+ "BriefDescription": "L3 PF sent with nodal scope port 1, counts even retried requests"
+ },
+ {,
+ "EventCode": "0xD0BC",
+ "EventName": "PM_LSU0_1_LRQF_FULL_CYC",
+ "BriefDescription": "Counts the number of cycles the LRQF is full. LRQF is the queue that holds loads between finish and completion. If it fills up, instructions stay in LRQ until completion, potentially backing up the LRQ"
+ },
+ {,
+ "EventCode": "0x2D148",
+ "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load"
+ },
+ {,
+ "EventCode": "0x468AE",
+ "EventName": "PM_L3_P3_CO_RTY",
+ "BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted"
+ },
+ {,
+ "EventCode": "0x460A8",
+ "EventName": "PM_SN_HIT",
+ "BriefDescription": "Any port snooper hit L3. Up to 4 can happen in a cycle but we only count 1"
+ },
+ {,
+ "EventCode": "0x360AA",
+ "EventName": "PM_L3_P0_CO_MEM",
+ "BriefDescription": "L3 CO to memory port 0 with or without data"
+ },
+ {,
+ "EventCode": "0xF0A4",
+ "EventName": "PM_DC_PREF_HW_ALLOC",
+ "BriefDescription": "Prefetch stream allocated by the hardware prefetch mechanism"
+ },
+ {,
+ "EventCode": "0xF0BC",
+ "EventName": "PM_LS2_UNALIGNED_ST",
+ "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+ },
+ {,
+ "EventCode": "0xD0AC",
+ "EventName": "PM_SRQ_SYNC_CYC",
+ "BriefDescription": "A sync is in the S2Q (edge detect to count)"
+ },
+ {,
+ "EventCode": "0x401E6",
+ "EventName": "PM_MRK_INST_FROM_L3MISS",
+ "BriefDescription": "Marked instruction was reloaded from a location beyond the local chiplet"
+ },
+ {,
+ "EventCode": "0x58A8",
+ "EventName": "PM_DECODE_HOLD_ICT_FULL",
+ "BriefDescription": "Counts the number of cycles in which the IFU was not able to decode and transmit one or more instructions because all itags were in use. This means the ICT is full for this thread"
+ },
+ {,
+ "EventCode": "0x26082",
+ "EventName": "PM_L2_IC_INV",
+ "BriefDescription": "I-cache Invalidates sent over the realod bus to the core"
+ },
+ {,
+ "EventCode": "0xC8AC",
+ "EventName": "PM_LSU_FLUSH_RELAUNCH_MISS",
+ "BriefDescription": "If a load that has already returned data and has to relaunch for any reason then gets a miss (erat, setp, data cache), it will often be flushed at relaunch time because the data might be inconsistent"
+ },
+ {,
+ "EventCode": "0x260A4",
+ "EventName": "PM_L3_LD_HIT",
+ "BriefDescription": "L3 Hits for demand LDs"
+ },
+ {,
+ "EventCode": "0xF0A0",
+ "EventName": "PM_DATA_STORE",
+ "BriefDescription": "All ops that drain from s2q to L2 containing data"
+ },
+ {,
+ "EventCode": "0x1D148",
+ "EventName": "PM_MRK_DATA_FROM_RMEM",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group ( Remote) due to a marked load"
+ },
+ {,
+ "EventCode": "0x16088",
+ "EventName": "PM_L2_LOC_GUESS_CORRECT",
+ "BriefDescription": "L2 guess local (LNS) and guess was correct (ie data local)"
+ },
+ {,
+ "EventCode": "0x160A4",
+ "EventName": "PM_L3_HIT",
+ "BriefDescription": "L3 Hits (L2 miss hitting L3, including data/instrn/xlate)"
+ },
+ {,
+ "EventCode": "0xE09C",
+ "EventName": "PM_LSU0_TM_L1_MISS",
+ "BriefDescription": "Load tm L1 miss"
+ },
+ {,
+ "EventCode": "0x168B4",
+ "EventName": "PM_L3_P1_LCO_RTY",
+ "BriefDescription": "L3 initiated LCO received retry on port 1 (can try 4 times)"
+ },
+ {,
+ "EventCode": "0x268AC",
+ "EventName": "PM_L3_RD_USAGE",
+ "BriefDescription": "Rotating sample of 16 RD actives"
+ },
+ {,
+ "EventCode": "0x1415C",
+ "EventName": "PM_MRK_DATA_FROM_L3_MEPF_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L3 without dispatch conflicts hit on Mepf state due to a marked load"
+ },
+ {,
+ "EventCode": "0xE880",
+ "EventName": "PM_L1_SW_PREF",
+ "BriefDescription": "Software L1 Prefetches, including SW Transient Prefetches"
+ },
+ {,
+ "EventCode": "0x288C",
+ "EventName": "PM_DISP_CLB_HELD_BAL",
+ "BriefDescription": "Dispatch/CLB Hold: Balance Flush"
+ },
+ {,
+ "EventCode": "0x101EA",
+ "EventName": "PM_MRK_L1_RELOAD_VALID",
+ "BriefDescription": "Marked demand reload"
+ },
+ {,
+ "EventCode": "0x1D156",
+ "EventName": "PM_MRK_LD_MISS_L1_CYC",
+ "BriefDescription": "Marked ld latency"
+ },
+ {,
+ "EventCode": "0x4C01A",
+ "EventName": "PM_CMPLU_STALL_DMISS_L3MISS",
+ "BriefDescription": "Completion stall due to cache miss resolving missed the L3"
+ },
+ {,
+ "EventCode": "0x2006C",
+ "EventName": "PM_RUN_CYC_SMT4_MODE",
+ "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT4 mode"
+ },
+ {,
+ "EventCode": "0x1D14E",
+ "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC",
+ "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load"
+ },
+ {,
+ "EventCode": "0xF888",
+ "EventName": "PM_LSU1_STORE_REJECT",
+ "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met"
+ },
+ {,
+ "EventCode": "0xC098",
+ "EventName": "PM_LS2_UNALIGNED_LD",
+ "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+ },
+ {,
+ "EventCode": "0x20058",
+ "EventName": "PM_DARQ1_10_12_ENTRIES",
+ "BriefDescription": "Cycles in which 10 or more DARQ1 entries (out of 12) are in use"
+ },
+ {,
+ "EventCode": "0x360A6",
+ "EventName": "PM_SNP_TM_HIT_M",
+ "BriefDescription": "TM snoop that is a store hits line in L3 in M or Mu state (exclusive modified)"
+ },
+ {,
+ "EventCode": "0x5898",
+ "EventName": "PM_LINK_STACK_INVALID_PTR",
+ "BriefDescription": "It is most often caused by certain types of flush where the pointer is not available. Can result in the data in the link stack becoming unusable."
+ },
+ {,
+ "EventCode": "0x46088",
+ "EventName": "PM_L2_CHIP_PUMP",
+ "BriefDescription": "RC requests that were local (aka chip) pump attempts"
+ },
+ {,
+ "EventCode": "0x28A0",
+ "EventName": "PM_TM_TSUSPEND",
+ "BriefDescription": "TM suspend instruction completed"
+ },
+ {,
+ "EventCode": "0x20054",
+ "EventName": "PM_L1_PREF",
+ "BriefDescription": "A data line was written to the L1 due to a hardware or software prefetch"
+ },
+ {,
+ "EventCode": "0x2608E",
+ "EventName": "PM_TM_LD_CONF",
+ "BriefDescription": "TM Load (fav or non-fav) ran into conflict (failed)"
+ },
+ {,
+ "EventCode": "0x1D144",
+ "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a marked load"
+ },
+ {,
+ "EventCode": "0x400FA",
+ "EventName": "PM_RUN_INST_CMPL",
+ "BriefDescription": "Run_Instructions"
+ },
+ {,
+ "EventCode": "0x15154",
+ "EventName": "PM_SYNC_MRK_L3MISS",
+ "BriefDescription": "Marked L3 misses that can throw a synchronous interrupt"
+ },
+ {,
+ "EventCode": "0xE0B4",
+ "EventName": "PM_LS0_TM_DISALLOW",
+ "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it"
+ },
+ {,
+ "EventCode": "0x26884",
+ "EventName": "PM_DSIDE_MRU_TOUCH",
+ "BriefDescription": "D-side L2 MRU touch commands sent to the L2"
+ },
+ {,
+ "EventCode": "0x30134",
+ "EventName": "PM_MRK_ST_CMPL_INT",
+ "BriefDescription": "marked store finished with intervention"
+ },
+ {,
+ "EventCode": "0xC0B8",
+ "EventName": "PM_LSU_FLUSH_SAO",
+ "BriefDescription": "A load-hit-load condition with Strong Address Ordering will have address compare disabled and flush"
+ },
+ {,
+ "EventCode": "0x50A8",
+ "EventName": "PM_EAT_FORCE_MISPRED",
+ "BriefDescription": "XL-form branch was mispredicted due to the predicted target address missing from EAT. The EAT forces a mispredict in this case since there is no predicated target to validate. This is a rare case that may occur when the EAT is full and a branch is issued"
+ },
+ {,
+ "EventCode": "0xC094",
+ "EventName": "PM_LS0_UNALIGNED_LD",
+ "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+ },
+ {,
+ "EventCode": "0xF8BC",
+ "EventName": "PM_LS3_UNALIGNED_ST",
+ "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+ },
+ {,
+ "EventCode": "0x460AE",
+ "EventName": "PM_L3_P2_CO_RTY",
+ "BriefDescription": "L3 CO received retry port 2 (memory only), every retry counted"
+ },
+ {,
+ "EventCode": "0x58B0",
+ "EventName": "PM_BTAC_GOOD_RESULT",
+ "BriefDescription": "BTAC predicts a taken branch and the BHT agrees, and the target address is correct"
+ },
+ {,
+ "EventCode": "0x1C04C",
+ "EventName": "PM_DATA_FROM_LL4",
+ "BriefDescription": "The processor's data cache was reloaded from the local chip's L4 cache due to a demand load"
+ },
+ {,
+ "EventCode": "0x3608E",
+ "EventName": "PM_TM_ST_CONF",
+ "BriefDescription": "TM Store (fav or non-fav) ran into conflict (failed)"
+ },
+ {,
+ "EventCode": "0xF8A0",
+ "EventName": "PM_NON_DATA_STORE",
+ "BriefDescription": "All ops that drain from s2q to L2 and contain no data"
+ },
+ {,
+ "EventCode": "0x3F146",
+ "EventName": "PM_MRK_DPTEG_FROM_L21_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x40A0",
+ "EventName": "PM_BR_UNCOND",
+ "BriefDescription": "Unconditional Branch Completed. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was covenrted to a Resolve."
+ },
+ {,
+ "EventCode": "0xF8A8",
+ "EventName": "PM_DC_PREF_FUZZY_CONF",
+ "BriefDescription": "A demand load referenced a line in an active fuzzy prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software.Fuzzy stream confirm (out of order effects, or pf cant keep up)"
+ },
+ {,
+ "EventCode": "0xF8A4",
+ "EventName": "PM_DC_PREF_SW_ALLOC",
+ "BriefDescription": "Prefetch stream allocated by software prefetching"
+ },
+ {,
+ "EventCode": "0xE0A0",
+ "EventName": "PM_LSU2_TM_L1_MISS",
+ "BriefDescription": "Load tm L1 miss"
+ },
+ {,
+ "EventCode": "0xC880",
+ "EventName": "PM_LS1_LD_VECTOR_FIN",
+ "BriefDescription": "LS1 finished load vector op"
+ },
+ {,
+ "EventCode": "0x2894",
+ "EventName": "PM_TM_OUTER_TEND",
+ "BriefDescription": "Completion time outer tend"
+ },
+ {,
+ "EventCode": "0xF098",
+ "EventName": "PM_XLATE_HPT_MODE",
+ "BriefDescription": "LSU reports every cycle the thread is in HPT translation mode (as opposed to radix mode)"
+ },
+ {,
+ "EventCode": "0x2C04E",
+ "EventName": "PM_LD_MISS_L1_FIN",
+ "BriefDescription": "Number of load instructions that finished with an L1 miss. Note that even if a load spans multiple slices this event will increment only once per load op."
+ },
+ {,
+ "EventCode": "0x30162",
+ "EventName": "PM_MRK_LSU_DERAT_MISS",
+ "BriefDescription": "Marked derat reload (miss) for any page size"
+ },
+ {,
+ "EventCode": "0x160A0",
+ "EventName": "PM_L3_PF_MISS_L3",
+ "BriefDescription": "L3 PF missed in L3"
+ },
+ {,
+ "EventCode": "0x1C04A",
+ "EventName": "PM_DATA_FROM_RL2L3_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load"
+ },
+ {,
+ "EventCode": "0x268B0",
+ "EventName": "PM_L3_P1_GRP_PUMP",
+ "BriefDescription": "L3 PF sent with grp scope port 1, counts even retried requests"
+ },
+ {,
+ "EventCode": "0x30016",
+ "EventName": "PM_CMPLU_STALL_SRQ_FULL",
+ "BriefDescription": "Finish stall because the NTF instruction was a store that was held in LSAQ because the SRQ was full"
+ },
+ {,
+ "EventCode": "0x40B4",
+ "EventName": "PM_BR_PRED_TA",
+ "BriefDescription": "Conditional Branch Completed that had its target address predicted. Only XL-form branches set this event. This equal the sum of CCACHE, LSTACK, and PCACHE"
+ },
+ {,
+ "EventCode": "0x40AC",
+ "EventName": "PM_BR_MPRED_CCACHE",
+ "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Count Cache Target Prediction"
+ },
+ {,
+ "EventCode": "0x3688A",
+ "EventName": "PM_L2_RTY_LD",
+ "BriefDescription": "RC retries on PB for any load from core (excludes DCBFs)"
+ },
+ {,
+ "EventCode": "0xE08C",
+ "EventName": "PM_LSU0_ERAT_HIT",
+ "BriefDescription": "Primary ERAT hit. There is no secondary ERAT"
+ },
+ {,
+ "EventCode": "0xE088",
+ "EventName": "PM_LS2_ERAT_MISS_PREF",
+ "BriefDescription": "LS0 Erat miss due to prefetch"
+ },
+ {,
+ "EventCode": "0xF0A8",
+ "EventName": "PM_DC_PREF_CONF",
+ "BriefDescription": "A demand load referenced a line in an active prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software. Includes forwards and backwards streams"
+ },
+ {,
+ "EventCode": "0x16888",
+ "EventName": "PM_L2_LOC_GUESS_WRONG",
+ "BriefDescription": "L2 guess local (LNS) and guess was not correct (ie data not on chip)"
+ },
+ {,
+ "EventCode": "0xC888",
+ "EventName": "PM_LSU_DTLB_MISS_64K",
+ "BriefDescription": "Data TLB Miss page size 64K"
+ },
+ {,
+ "EventCode": "0xE0A4",
+ "EventName": "PM_TMA_REQ_L2",
+ "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding"
+ },
+ {,
+ "EventCode": "0xC088",
+ "EventName": "PM_LSU_DTLB_MISS_4K",
+ "BriefDescription": "Data TLB Miss page size 4K"
+ },
+ {,
+ "EventCode": "0x3C042",
+ "EventName": "PM_DATA_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 with dispatch conflict due to a demand load"
+ },
+ {,
+ "EventCode": "0x168AA",
+ "EventName": "PM_L3_P1_LCO_NO_DATA",
+ "BriefDescription": "Dataless L3 LCO sent port 1"
+ },
+ {,
+ "EventCode": "0x3D140",
+ "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_OTHER_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L2 with dispatch conflict due to a marked load"
+ },
+ {,
+ "EventCode": "0xC89C",
+ "EventName": "PM_LS1_LAUNCH_HELD_PREF",
+ "BriefDescription": "Number of times a load or store instruction was unable to launch/relaunch because a high priority prefetch used that relaunch cycle"
+ },
+ {,
+ "EventCode": "0x4894",
+ "EventName": "PM_IC_RELOAD_PRIVATE",
+ "BriefDescription": "Reloading line was brought in private for a specific thread. Most lines are brought in shared for all eight threads. If RA does not match then invalidates and then brings it shared to other thread. In P7 line brought in private , then line was invalidat"
+ },
+ {,
+ "EventCode": "0x1688E",
+ "EventName": "PM_TM_LD_CAUSED_FAIL",
+ "BriefDescription": "Non-TM Load caused any thread to fail"
+ },
+ {,
+ "EventCode": "0x26084",
+ "EventName": "PM_L2_RCLD_DISP_FAIL_OTHER",
+ "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread that failed due to reasons other than an address collision conflicts with an L2 machines (e.g. Read-Claim/Snoop machine not available)"
+ },
+ {,
+ "EventCode": "0x101E4",
+ "EventName": "PM_MRK_L1_ICACHE_MISS",
+ "BriefDescription": "sampled Instruction suffered an icache Miss"
+ },
+ {,
+ "EventCode": "0x20A0",
+ "EventName": "PM_TM_NESTED_TBEGIN",
+ "BriefDescription": "Completion Tm nested tbegin"
+ },
+ {,
+ "EventCode": "0x368AA",
+ "EventName": "PM_L3_P1_CO_MEM",
+ "BriefDescription": "L3 CO to memory port 1 with or without data"
+ },
+ {,
+ "EventCode": "0xC8A4",
+ "EventName": "PM_LSU3_FALSE_LHS",
+ "BriefDescription": "False LHS match detected"
+ },
+ {,
+ "EventCode": "0xF0B0",
+ "EventName": "PM_L3_LD_PREF",
+ "BriefDescription": "L3 load prefetch, sourced from a hardware or software stream, was sent to the nest"
+ },
+ {,
+ "EventCode": "0x4D012",
+ "EventName": "PM_PMC3_SAVED",
+ "BriefDescription": "PMC3 Rewind Value saved"
+ },
+ {,
+ "EventCode": "0xE888",
+ "EventName": "PM_LS3_ERAT_MISS_PREF",
+ "BriefDescription": "LS1 Erat miss due to prefetch"
+ },
+ {,
+ "EventCode": "0x368B4",
+ "EventName": "PM_L3_RD0_BUSY",
+ "BriefDescription": "Lifetime, sample of RD machine 0 valid"
+ },
+ {,
+ "EventCode": "0x46080",
+ "EventName": "PM_L2_DISP_ALL_L2MISS",
+ "BriefDescription": "All successful D-side-Ld/St or I-side-instruction-fetch dispatches for this thread that were an L2 miss"
+ },
+ {,
+ "EventCode": "0xF8B8",
+ "EventName": "PM_LS1_UNALIGNED_ST",
+ "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+ },
+ {,
+ "EventCode": "0x408C",
+ "EventName": "PM_L1_DEMAND_WRITE",
+ "BriefDescription": "Instruction Demand sectors written into IL1"
+ },
+ {,
+ "EventCode": "0x368A8",
+ "EventName": "PM_SN_INVL",
+ "BriefDescription": "Any port snooper detects a store to a line in the Sx state and invalidates the line. Up to 4 can happen in a cycle but we only count 1"
+ },
+ {,
+ "EventCode": "0x160B2",
+ "EventName": "PM_L3_LOC_GUESS_CORRECT",
+ "BriefDescription": "Prefetch scope predictor selected LNS and was correct"
+ },
+ {,
+ "EventCode": "0x48B4",
+ "EventName": "PM_DECODE_FUSION_CONST_GEN",
+ "BriefDescription": "32-bit constant generation"
+ },
+ {,
+ "EventCode": "0x4D146",
+ "EventName": "PM_MRK_DATA_FROM_L21_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a marked load"
+ },
+ {,
+ "EventCode": "0xE080",
+ "EventName": "PM_S2Q_FULL",
+ "BriefDescription": "Cycles during which the S2Q is full"
+ },
+ {,
+ "EventCode": "0x268B4",
+ "EventName": "PM_L3_P3_LCO_RTY",
+ "BriefDescription": "L3 initiated LCO received retry on port 3 (can try 4 times)"
+ },
+ {,
+ "EventCode": "0xD8B8",
+ "EventName": "PM_LSU0_LMQ_S0_VALID",
+ "BriefDescription": "Slot 0 of LMQ valid"
+ },
+ {,
+ "EventCode": "0x2098",
+ "EventName": "PM_TM_NESTED_TEND",
+ "BriefDescription": "Completion time nested tend"
+ },
+ {,
+ "EventCode": "0x368A0",
+ "EventName": "PM_L3_PF_OFF_CHIP_CACHE",
+ "BriefDescription": "L3 PF from Off chip cache"
+ },
+ {,
+ "EventCode": "0x20056",
+ "EventName": "PM_TAKEN_BR_MPRED_CMPL",
+ "BriefDescription": "Total number of taken branches that were incorrectly predicted as not-taken. This event counts branches completed and does not include speculative instructions"
+ },
+ {,
+ "EventCode": "0x4688A",
+ "EventName": "PM_L2_SYS_PUMP",
+ "BriefDescription": "RC requests that were system pump attempts"
+ },
+ {,
+ "EventCode": "0xE090",
+ "EventName": "PM_LSU2_ERAT_HIT",
+ "BriefDescription": "Primary ERAT hit. There is no secondary ERAT"
+ },
+ {,
+ "EventCode": "0x4001C",
+ "EventName": "PM_INST_IMC_MATCH_CMPL",
+ "BriefDescription": "IMC Match Count"
+ },
+ {,
+ "EventCode": "0x40A8",
+ "EventName": "PM_BR_PRED_LSTACK",
+ "BriefDescription": "Conditional Branch Completed that used the Link Stack for Target Prediction"
+ },
+ {,
+ "EventCode": "0x268A2",
+ "EventName": "PM_L3_CI_MISS",
+ "BriefDescription": "L3 castins miss (total count)"
+ },
+ {,
+ "EventCode": "0x289C",
+ "EventName": "PM_TM_NON_FAV_TBEGIN",
+ "BriefDescription": "Dispatch time non favored tbegin"
+ },
+ {,
+ "EventCode": "0xF08C",
+ "EventName": "PM_LSU2_STORE_REJECT",
+ "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met"
+ },
+ {,
+ "EventCode": "0x360A0",
+ "EventName": "PM_L3_PF_ON_CHIP_CACHE",
+ "BriefDescription": "L3 PF from On chip cache"
+ },
+ {,
+ "EventCode": "0x35152",
+ "EventName": "PM_MRK_DATA_FROM_L2MISS_CYC",
+ "BriefDescription": "Duration in cycles to reload from a location other than the local core's L2 due to a marked load"
+ },
+ {,
+ "EventCode": "0x160AC",
+ "EventName": "PM_L3_SN_USAGE",
+ "BriefDescription": "Rotating sample of 16 snoop valids"
+ },
+ {,
+ "EventCode": "0x1608C",
+ "EventName": "PM_RC0_BUSY",
+ "BriefDescription": "RC mach 0 Busy. Used by PMU to sample ave RC lifetime (mach0 used as sample point)"
+ },
+ {,
+ "EventCode": "0x36082",
+ "EventName": "PM_L2_LD_DISP",
+ "BriefDescription": "All successful D-side-Ld or I-side-instruction-fetch dispatches for this thread"
+ },
+ {,
+ "EventCode": "0xF8B0",
+ "EventName": "PM_L3_SW_PREF",
+ "BriefDescription": "L3 load prefetch, sourced from a software prefetch stream, was sent to the nest"
+ },
+ {,
+ "EventCode": "0xF884",
+ "EventName": "PM_TABLEWALK_CYC_PREF",
+ "BriefDescription": "tablewalk qualified for pte prefetches"
+ },
+ {,
+ "EventCode": "0x4D144",
+ "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x16884",
+ "EventName": "PM_L2_RCLD_DISP_FAIL_ADDR",
+ "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread that failed due to an address collision conflicts with an L2 machines already working on this line (e.g. ld-hit-stq or Read-claim/Castout/Snoop machines)"
+ },
+ {,
+ "EventCode": "0x460A0",
+ "EventName": "PM_L3_PF_ON_CHIP_MEM",
+ "BriefDescription": "L3 PF from On chip memory"
+ },
+ {,
+ "EventCode": "0xF084",
+ "EventName": "PM_PTE_PREFETCH",
+ "BriefDescription": "PTE prefetches"
+ },
+ {,
+ "EventCode": "0x2D026",
+ "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L2",
+ "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L2 data cache"
+ },
+ {,
+ "EventCode": "0x48B0",
+ "EventName": "PM_BR_MPRED_PCACHE",
+ "BriefDescription": "Conditional Branch Completed that was Mispredicted due to pattern cache prediction"
+ },
+ {,
+ "EventCode": "0x2C126",
+ "EventName": "PM_MRK_DATA_FROM_L2",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 due to a marked load"
+ },
+ {,
+ "EventCode": "0xE0AC",
+ "EventName": "PM_TM_FAIL_TLBIE",
+ "BriefDescription": "Transaction failed because there was a TLBIE hit in the bloom filter"
+ },
+ {,
+ "EventCode": "0x260AA",
+ "EventName": "PM_L3_P0_LCO_DATA",
+ "BriefDescription": "LCO sent with data port 0"
+ },
+ {,
+ "EventCode": "0x4888",
+ "EventName": "PM_IC_PREF_REQ",
+ "BriefDescription": "Instruction prefetch requests"
+ },
+ {,
+ "EventCode": "0xC898",
+ "EventName": "PM_LS3_UNALIGNED_LD",
+ "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+ },
+ {,
+ "EventCode": "0x488C",
+ "EventName": "PM_IC_PREF_WRITE",
+ "BriefDescription": "Instruction prefetch written into IL1"
+ },
+ {,
+ "EventCode": "0xF89C",
+ "EventName": "PM_XLATE_MISS",
+ "BriefDescription": "The LSU requested a line from L2 for translation. It may be satisfied from any source beyond L2. Includes speculative instructions. Includes instruction, prefetch and demand"
+ },
+ {,
+ "EventCode": "0x14158",
+ "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L2 without conflict due to a marked load"
+ },
+ {,
+ "EventCode": "0x35156",
+ "EventName": "PM_MRK_DATA_FROM_L31_SHR_CYC",
+ "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L3 on the same chip due to a marked load"
+ },
+ {,
+ "EventCode": "0xC88C",
+ "EventName": "PM_LSU_DTLB_MISS_16G_1G",
+ "BriefDescription": "Data TLB Miss page size 16G (HPT) or 1G (Radix)"
+ },
+ {,
+ "EventCode": "0x268A6",
+ "EventName": "PM_TM_RST_SC",
+ "BriefDescription": "TM snoop hits line in L3 that is TM_SC state and causes it to be invalidated"
+ },
+ {,
+ "EventCode": "0x468A4",
+ "EventName": "PM_L3_TRANS_PF",
+ "BriefDescription": "L3 Transient prefetch received from L2"
+ },
+ {,
+ "EventCode": "0x4094",
+ "EventName": "PM_IC_PREF_CANCEL_L2",
+ "BriefDescription": "L2 Squashed a demand or prefetch request"
+ },
+ {,
+ "EventCode": "0x48AC",
+ "EventName": "PM_BR_MPRED_LSTACK",
+ "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Link Stack Target Prediction"
+ },
+ {,
+ "EventCode": "0xE88C",
+ "EventName": "PM_LSU1_ERAT_HIT",
+ "BriefDescription": "Primary ERAT hit. There is no secondary ERAT"
+ },
+ {,
+ "EventCode": "0xC0B4",
+ "EventName": "PM_LSU_FLUSH_WRK_ARND",
+ "BriefDescription": "LSU workaround flush. These flushes are setup with programmable scan only latches to perform various actions when the flush macro receives a trigger from the dbg macros. These actions include things like flushing the next op encountered for a particular thread or flushing the next op that is NTC op that is encountered on a particular slice. The kind of flush that the workaround is setup to perform is highly variable."
+ },
+ {,
+ "EventCode": "0x34054",
+ "EventName": "PM_PARTIAL_ST_FIN",
+ "BriefDescription": "Any store finished by an LSU slice"
+ },
+ {,
+ "EventCode": "0x5880",
+ "EventName": "PM_THRD_PRIO_6_7_CYC",
+ "BriefDescription": "Cycles thread running at priority level 6 or 7"
+ },
+ {,
+ "EventCode": "0x4898",
+ "EventName": "PM_IC_DEMAND_L2_BR_REDIRECT",
+ "BriefDescription": "L2 I cache demand request due to branch Mispredict ( 15 cycle path)"
+ },
+ {,
+ "EventCode": "0x4880",
+ "EventName": "PM_BANK_CONFLICT",
+ "BriefDescription": "Read blocked due to interleave conflict. The ifar logic will detect an interleave conflict and kill the data that was read that cycle."
+ },
+ {,
+ "EventCode": "0x360B0",
+ "EventName": "PM_L3_P0_SYS_PUMP",
+ "BriefDescription": "L3 PF sent with sys scope port 0, counts even retried requests"
+ },
+ {,
+ "EventCode": "0x3006A",
+ "EventName": "PM_IERAT_RELOAD_64K",
+ "BriefDescription": "IERAT Reloaded (Miss) for a 64k page"
+ },
+ {,
+ "EventCode": "0xD8BC",
+ "EventName": "PM_LSU2_3_LRQF_FULL_CYC",
+ "BriefDescription": "Counts the number of cycles the LRQF is full. LRQF is the queue that holds loads between finish and completion. If it fills up, instructions stay in LRQ until completion, potentially backing up the LRQ"
+ },
+ {,
+ "EventCode": "0x46086",
+ "EventName": "PM_L2_SN_M_RD_DONE",
+ "BriefDescription": "Snoop dispatched for a read and was M (true M)"
+ },
+ {,
+ "EventCode": "0x40154",
+ "EventName": "PM_MRK_FAB_RSP_BKILL",
+ "BriefDescription": "Marked store had to do a bkill"
+ },
+ {,
+ "EventCode": "0xF094",
+ "EventName": "PM_LSU2_L1_CAM_CANCEL",
+ "BriefDescription": "ls2 l1 tm cam cancel"
+ },
+ {,
+ "EventCode": "0x2D014",
+ "EventName": "PM_CMPLU_STALL_LRQ_FULL",
+ "BriefDescription": "Finish stall because the NTF instruction was a load that was held in LSAQ (load-store address queue) because the LRQ (load-reorder queue) was full"
+ },
+ {,
+ "EventCode": "0x3E05E",
+ "EventName": "PM_L3_CO_MEPF",
+ "BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory). The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request"
+ },
+ {,
+ "EventCode": "0x460A2",
+ "EventName": "PM_L3_LAT_CI_HIT",
+ "BriefDescription": "L3 Lateral Castins Hit"
+ },
+ {,
+ "EventCode": "0x3D14E",
+ "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x3D15E",
+ "EventName": "PM_MULT_MRK",
+ "BriefDescription": "mult marked instr"
+ },
+ {,
+ "EventCode": "0x4084",
+ "EventName": "PM_EAT_FULL_CYC",
+ "BriefDescription": "Cycles No room in EAT"
+ },
+ {,
+ "EventCode": "0x5098",
+ "EventName": "PM_LINK_STACK_WRONG_ADD_PRED",
+ "BriefDescription": "Link stack predicts wrong address, because of link stack design limitation or software violating the coding conventions"
+ },
+ {,
+ "EventCode": "0x2C050",
+ "EventName": "PM_DATA_GRP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was group pump (prediction=correct) for a demand load"
+ },
+ {,
+ "EventCode": "0xC0A4",
+ "EventName": "PM_LSU2_FALSE_LHS",
+ "BriefDescription": "False LHS match detected"
+ },
+ {,
+ "EventCode": "0x58A0",
+ "EventName": "PM_LINK_STACK_CORRECT",
+ "BriefDescription": "Link stack predicts right address"
+ },
+ {,
+ "EventCode": "0x36886",
+ "EventName": "PM_L2_SN_SX_I_DONE",
+ "BriefDescription": "Snoop dispatched and went from Sx to Ix"
+ },
+ {,
+ "EventCode": "0x4E04A",
+ "EventName": "PM_DPTEG_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x2C12C",
+ "EventName": "PM_MRK_DATA_FROM_DL4_CYC",
+ "BriefDescription": "Duration in cycles to reload from another chip's L4 on a different Node or Group (Distant) due to a marked load"
+ },
+ {,
+ "EventCode": "0x4080",
+ "EventName": "PM_INST_FROM_L1",
+ "BriefDescription": "Instruction fetches from L1. L1 instruction hit"
+ },
+ {,
+ "EventCode": "0xE898",
+ "EventName": "PM_LSU3_TM_L1_HIT",
+ "BriefDescription": "Load tm hit in L1"
+ },
+ {,
+ "EventCode": "0x260A0",
+ "EventName": "PM_L3_CO_MEM",
+ "BriefDescription": "L3 CO to memory OR of port 0 and 1 (lossy = may undercount if two cresp come in the same cyc)"
+ },
+ {,
+ "EventCode": "0x16082",
+ "EventName": "PM_L2_CASTOUT_MOD",
+ "BriefDescription": "L2 Castouts - Modified (M,Mu,Me)"
+ },
+ {,
+ "EventCode": "0xC09C",
+ "EventName": "PM_LS0_LAUNCH_HELD_PREF",
+ "BriefDescription": "Number of times a load or store instruction was unable to launch/relaunch because a high priority prefetch used that relaunch cycle"
+ },
+ {,
+ "EventCode": "0xC8B8",
+ "EventName": "PM_LSU_FLUSH_LARX_STCX",
+ "BriefDescription": "A larx is flushed because an older larx has an LMQ reservation for the same thread. A stcx is flushed because an older stcx is in the LMQ. The flush happens when the older larx/stcx relaunches"
+ },
+ {,
+ "EventCode": "0x260A6",
+ "EventName": "PM_NON_TM_RST_SC",
+ "BriefDescription": "Non-TM snoop hits line in L3 that is TM_SC state and causes it to be invalidated"
+ },
+ {,
+ "EventCode": "0x3608A",
+ "EventName": "PM_L2_RTY_ST",
+ "BriefDescription": "RC retries on PB for any store from core (excludes DCBFs)"
+ },
+ {,
+ "EventCode": "0x24040",
+ "EventName": "PM_INST_FROM_L2_MEPF",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x209C",
+ "EventName": "PM_TM_FAV_TBEGIN",
+ "BriefDescription": "Dispatch time Favored tbegin"
+ },
+ {,
+ "EventCode": "0x2D01E",
+ "EventName": "PM_ICT_NOSLOT_DISP_HELD_ISSQ",
+ "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full"
+ },
+ {,
+ "EventCode": "0x50A4",
+ "EventName": "PM_FLUSH_MPRED",
+ "BriefDescription": "Branch mispredict flushes. Includes target and address misprecition"
+ },
+ {,
+ "EventCode": "0x1504C",
+ "EventName": "PM_IPTEG_FROM_LL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x268A4",
+ "EventName": "PM_L3_LD_MISS",
+ "BriefDescription": "L3 Misses for demand LDs"
+ },
+ {,
+ "EventCode": "0x26088",
+ "EventName": "PM_L2_GRP_GUESS_CORRECT",
+ "BriefDescription": "L2 guess grp (GS or NNS) and guess was correct (data intra-group AND ^on-chip)"
+ },
+ {,
+ "EventCode": "0xD088",
+ "EventName": "PM_LSU0_LDMX_FIN",
+ "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])."
+ },
+ {,
+ "EventCode": "0xE8B4",
+ "EventName": "PM_LS1_TM_DISALLOW",
+ "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it"
+ },
+ {,
+ "EventCode": "0x1688C",
+ "EventName": "PM_RC_USAGE",
+ "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each RC machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running"
+ },
+ {,
+ "EventCode": "0x3F054",
+ "EventName": "PM_RADIX_PWC_L4_PTE_FROM_L3MISS",
+ "BriefDescription": "A Page Table Entry was reloaded to a level 4 page walk cache from beyond the core's L3 data cache. This is the deepest level of PWC possible for a translation. The source could be local/remote/distant memory or another core's cache"
+ },
+ {,
+ "EventCode": "0x2608A",
+ "EventName": "PM_ISIDE_DISP_FAIL_ADDR",
+ "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread that failed due to an address collision conflict with an L2 machine already working on this line (e.g. ld-hit-stq or RC/CO/SN machines)"
+ },
+ {,
+ "EventCode": "0x50B4",
+ "EventName": "PM_TAGE_CORRECT_TAKEN_CMPL",
+ "BriefDescription": "The TAGE overrode BHT direction prediction and it was correct. Counted at completion for taken branches only"
+ },
+ {,
+ "EventCode": "0x2090",
+ "EventName": "PM_DISP_CLB_HELD_SB",
+ "BriefDescription": "Dispatch/CLB Hold: Scoreboard"
+ },
+ {,
+ "EventCode": "0xE0B0",
+ "EventName": "PM_TM_FAIL_NON_TX_CONFLICT",
+ "BriefDescription": "Non transactional conflict from LSU, gets reported to TEXASR"
+ },
+ {,
+ "EventCode": "0x201E0",
+ "EventName": "PM_MRK_DATA_FROM_MEMORY",
+ "BriefDescription": "The processor's data cache was reloaded from a memory location including L4 from local remote or distant due to a marked load"
+ },
+ {,
+ "EventCode": "0x368A2",
+ "EventName": "PM_L3_L2_CO_MISS",
+ "BriefDescription": "L2 CO miss"
+ },
+ {,
+ "EventCode": "0x3608C",
+ "EventName": "PM_CO0_BUSY",
+ "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave CO lifetime (mach0 used as sample point)"
+ },
+ {,
+ "EventCode": "0x2C122",
+ "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L3 with dispatch conflict due to a marked load"
+ },
+ {,
+ "EventCode": "0x35154",
+ "EventName": "PM_MRK_DATA_FROM_L3_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L3 due to a marked load"
+ },
+ {,
+ "EventCode": "0x1D140",
+ "EventName": "PM_MRK_DATA_FROM_L31_MOD_CYC",
+ "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L3 on the same chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x4404A",
+ "EventName": "PM_INST_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "The processor's Instruction cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x28AC",
+ "EventName": "PM_TM_FAIL_SELF",
+ "BriefDescription": "TM aborted because a self-induced conflict occurred in Suspended state, due to one of the following: a store to a storage location that was previously accessed transactionally; a dcbf, dcbi, or icbi specify- ing a block that was previously accessed transactionally; a dcbst specifying a block that was previously written transactionally; or a tlbie that specifies a translation that was pre- viously used transactionally"
+ },
+ {,
+ "EventCode": "0x45056",
+ "EventName": "PM_SCALAR_FLOP_CMPL",
+ "BriefDescription": "Scalar flop operation completed"
+ },
+ {,
+ "EventCode": "0x16092",
+ "EventName": "PM_L2_LD_MISS_128B",
+ "BriefDescription": "All successful D-side load dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 128B (i.e., M=0)"
+ },
+ {,
+ "EventCode": "0x2E014",
+ "EventName": "PM_STCX_FIN",
+ "BriefDescription": "Number of stcx instructions finished. This includes instructions in the speculative path of a branch that may be flushed"
+ },
+ {,
+ "EventCode": "0xD8AC",
+ "EventName": "PM_LWSYNC",
+ "BriefDescription": ""
+ },
+ {,
+ "EventCode": "0x2094",
+ "EventName": "PM_TM_OUTER_TBEGIN",
+ "BriefDescription": "Completion time outer tbegin"
+ },
+ {,
+ "EventCode": "0x160B4",
+ "EventName": "PM_L3_P0_LCO_RTY",
+ "BriefDescription": "L3 initiated LCO received retry on port 0 (can try 4 times)"
+ },
+ {,
+ "EventCode": "0x36892",
+ "EventName": "PM_DSIDE_OTHER_64B_L2MEMACC",
+ "BriefDescription": "Valid when first beat of data comes in for an D-side fetch where data came EXCLUSIVELY from memory that was for hpc_read64, (RC had to fetch other 64B of a line from MC) i.e., number of times RC had to go to memory to get 'missing' 64B"
+ },
+ {,
+ "EventCode": "0x20A8",
+ "EventName": "PM_TM_FAIL_FOOTPRINT_OVERFLOW",
+ "BriefDescription": "TM aborted because the tracking limit for transactional storage accesses was exceeded.. Asynchronous"
+ },
+ {,
+ "EventCode": "0x30018",
+ "EventName": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL",
+ "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)"
+ },
+ {,
+ "EventCode": "0xC894",
+ "EventName": "PM_LS1_UNALIGNED_LD",
+ "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+ },
+ {,
+ "EventCode": "0x360A2",
+ "EventName": "PM_L3_L2_CO_HIT",
+ "BriefDescription": "L2 CO hits"
+ },
+ {,
+ "EventCode": "0x36092",
+ "EventName": "PM_DSIDE_L2MEMACC",
+ "BriefDescription": "Valid when first beat of data comes in for an D-side fetch where data came EXCLUSIVELY from memory (excluding hpcread64 accesses), i.e., total memory accesses by RCs"
+ },
+ {,
+ "EventCode": "0x10138",
+ "EventName": "PM_MRK_BR_2PATH",
+ "BriefDescription": "marked branches which are not strongly biased"
+ },
+ {,
+ "EventCode": "0x2884",
+ "EventName": "PM_ISYNC",
+ "BriefDescription": "Isync completion count per thread"
+ },
+ {,
+ "EventCode": "0x16882",
+ "EventName": "PM_L2_CASTOUT_SHR",
+ "BriefDescription": "L2 Castouts - Shared (Tx,Sx)"
+ },
+ {,
+ "EventCode": "0x26092",
+ "EventName": "PM_L2_LD_MISS_64B",
+ "BriefDescription": "All successful D-side load dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 64B(i.e., M=1)"
+ },
+ {,
+ "EventCode": "0x26080",
+ "EventName": "PM_L2_LD_MISS",
+ "BriefDescription": "All successful D-Side Load dispatches that were an L2 miss for this thread"
+ },
+ {,
+ "EventCode": "0x3D14C",
+ "EventName": "PM_MRK_DATA_FROM_DMEM",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a marked load"
+ },
+ {,
+ "EventCode": "0x100FA",
+ "EventName": "PM_ANY_THRD_RUN_CYC",
+ "BriefDescription": "Cycles in which at least one thread has the run latch set"
+ },
+ {,
+ "EventCode": "0x2C12A",
+ "EventName": "PM_MRK_DATA_FROM_RMEM_CYC",
+ "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group ( Remote) due to a marked load"
+ },
+ {,
+ "EventCode": "0x25048",
+ "EventName": "PM_IPTEG_FROM_LMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's Memory due to a instruction side request"
+ },
+ {,
+ "EventCode": "0xD8A8",
+ "EventName": "PM_ISLB_MISS",
+ "BriefDescription": "Instruction SLB Miss - Total of all segment sizes"
+ },
+ {,
+ "EventCode": "0x368AE",
+ "EventName": "PM_L3_P1_CO_RTY",
+ "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted"
+ },
+ {,
+ "EventCode": "0x260A2",
+ "EventName": "PM_L3_CI_HIT",
+ "BriefDescription": "L3 Castins Hit (total count)"
+ },
+ {,
+ "EventCode": "0x44054",
+ "EventName": "PM_VECTOR_LD_CMPL",
+ "BriefDescription": "Number of vector load instructions completed"
+ },
+ {,
+ "EventCode": "0x1E05C",
+ "EventName": "PM_CMPLU_STALL_NESTED_TBEGIN",
+ "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT"
+ },
+ {,
+ "EventCode": "0xC084",
+ "EventName": "PM_LS2_LD_VECTOR_FIN",
+ "BriefDescription": "LS2 finished load vector op"
+ },
+ {,
+ "EventCode": "0x1608E",
+ "EventName": "PM_ST_CAUSED_FAIL",
+ "BriefDescription": "Non-TM Store caused any thread to fail"
+ },
+ {,
+ "EventCode": "0x3080",
+ "EventName": "PM_ISU0_ISS_HOLD_ALL",
+ "BriefDescription": "All ISU rejects"
+ },
+ {,
+ "EventCode": "0x1515A",
+ "EventName": "PM_SYNC_MRK_L2MISS",
+ "BriefDescription": "Marked L2 Miss that can throw a synchronous interrupt"
+ },
+ {,
+ "EventCode": "0x26892",
+ "EventName": "PM_L2_ST_MISS_64B",
+ "BriefDescription": "All successful D-side store dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 64B (i.e., M=1)"
+ },
+ {,
+ "EventCode": "0x2688C",
+ "EventName": "PM_CO_USAGE",
+ "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each CO machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running"
+ },
+ {,
+ "EventCode": "0x48B8",
+ "EventName": "PM_BR_MPRED_TAKEN_TA",
+ "BriefDescription": "Conditional Branch Completed that was Mispredicted due to the Target Address Prediction from the Count Cache or Link Stack. Only XL-form branches that resolved Taken set this event."
+ },
+ {,
+ "EventCode": "0x50B0",
+ "EventName": "PM_BTAC_BAD_RESULT",
+ "BriefDescription": "BTAC thinks branch will be taken but it is either predicted not-taken by the BHT, or the target address is wrong (less common). In both cases, a redirect will happen"
+ },
+ {,
+ "EventCode": "0xD888",
+ "EventName": "PM_LSU1_LDMX_FIN",
+ "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])."
+ },
+ {,
+ "EventCode": "0x58B4",
+ "EventName": "PM_TAGE_CORRECT",
+ "BriefDescription": "The TAGE overrode BHT direction prediction and it was correct. Includes taken and not taken and is counted at execution time"
+ },
+ {,
+ "EventCode": "0x3688C",
+ "EventName": "PM_SN_USAGE",
+ "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each SN machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running"
+ },
+ {,
+ "EventCode": "0x36084",
+ "EventName": "PM_L2_RCST_DISP",
+ "BriefDescription": "All D-side store dispatch attempts for this thread"
+ },
+ {,
+ "EventCode": "0x46084",
+ "EventName": "PM_L2_RCST_DISP_FAIL_OTHER",
+ "BriefDescription": "All D-side store dispatch attempts for this thread that failed due to reason other than address collision"
+ },
+ {,
+ "EventCode": "0xF0AC",
+ "EventName": "PM_DC_PREF_STRIDED_CONF",
+ "BriefDescription": "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software."
+ },
+ {,
+ "EventCode": "0x45054",
+ "EventName": "PM_FMA_CMPL",
+ "BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only. "
+ },
+ {,
+ "EventCode": "0x201E8",
+ "EventName": "PM_THRESH_EXC_512",
+ "BriefDescription": "Threshold counter exceeded a value of 512"
+ },
+ {,
+ "EventCode": "0x36080",
+ "EventName": "PM_L2_INST",
+ "BriefDescription": "All successful I-side-instruction-fetch (e.g. i-demand, i-prefetch) dispatches for this thread"
+ },
+ {,
+ "EventCode": "0x3504C",
+ "EventName": "PM_IPTEG_FROM_DL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a instruction side request"
+ },
+ {,
+ "EventCode": "0xD890",
+ "EventName": "PM_LS1_DC_COLLISIONS",
+ "BriefDescription": "Read-write data cache collisions"
+ },
+ {,
+ "EventCode": "0x1688A",
+ "EventName": "PM_ISIDE_DISP",
+ "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread"
+ },
+ {,
+ "EventCode": "0x468AA",
+ "EventName": "PM_L3_P1_CO_L31",
+ "BriefDescription": "L3 CO to L3.1 (LCO) port 1 with or without data"
+ },
+ {,
+ "EventCode": "0x28B0",
+ "EventName": "PM_DISP_HELD_TBEGIN",
+ "BriefDescription": "This outer tbegin transaction cannot be dispatched until the previous tend instruction completes"
+ },
+ {,
+ "EventCode": "0xE8A0",
+ "EventName": "PM_LSU3_TM_L1_MISS",
+ "BriefDescription": "Load tm L1 miss"
+ },
+ {,
+ "EventCode": "0x2C05E",
+ "EventName": "PM_INST_GRP_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for an instruction fetch (demand only)"
+ },
+ {,
+ "EventCode": "0xC8BC",
+ "EventName": "PM_STCX_SUCCESS_CMPL",
+ "BriefDescription": "Number of stcx instructions that completed successfully"
+ },
+ {,
+ "EventCode": "0xE098",
+ "EventName": "PM_LSU2_TM_L1_HIT",
+ "BriefDescription": "Load tm hit in L1"
+ },
+ {,
+ "EventCode": "0xE0B8",
+ "EventName": "PM_LS2_TM_DISALLOW",
+ "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it"
+ },
+ {,
+ "EventCode": "0x44044",
+ "EventName": "PM_INST_FROM_L31_ECO_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's ECO L3 on the same chip due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x16886",
+ "EventName": "PM_CO_DISP_FAIL",
+ "BriefDescription": "CO dispatch failed due to all CO machines being busy"
+ },
+ {,
+ "EventCode": "0x3D146",
+ "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 without conflict due to a marked load"
+ },
+ {,
+ "EventCode": "0x16892",
+ "EventName": "PM_L2_ST_MISS_128B",
+ "BriefDescription": "All successful D-side store dispatches that were an L2 miss (NOT Sx,Tx,Mx) for this thread and the RC calculated the request should be for 128B (i.e., M=0)"
+ },
+ {,
+ "EventCode": "0x26890",
+ "EventName": "PM_ISIDE_L2MEMACC",
+ "BriefDescription": "Valid when first beat of data comes in for an I-side fetch where data came from memory"
+ },
+ {,
+ "EventCode": "0xD094",
+ "EventName": "PM_LS2_DC_COLLISIONS",
+ "BriefDescription": "Read-write data cache collisions"
+ },
+ {,
+ "EventCode": "0x3C05E",
+ "EventName": "PM_MEM_RWITM",
+ "BriefDescription": "Memory Read With Intent to Modify for this thread"
+ },
+ {,
+ "EventCode": "0xC090",
+ "EventName": "PM_LSU_STCX",
+ "BriefDescription": "STCX sent to nest, i.e. total"
+ },
+ {,
+ "EventCode": "0x2C120",
+ "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 without conflict due to a marked load"
+ },
+ {,
+ "EventCode": "0x36086",
+ "EventName": "PM_L2_RC_ST_DONE",
+ "BriefDescription": "Read-claim machine did store to line that was in Tx or Sx (Tagged or Shared state)"
+ },
+ {,
+ "EventCode": "0xE8AC",
+ "EventName": "PM_TM_FAIL_TX_CONFLICT",
+ "BriefDescription": "Transactional conflict from LSU, gets reported to TEXASR"
+ },
+ {,
+ "EventCode": "0x48A8",
+ "EventName": "PM_DECODE_FUSION_LD_ST_DISP",
+ "BriefDescription": "32-bit displacement D-form and 16-bit displacement X-form"
+ },
+ {,
+ "EventCode": "0x3D144",
+ "EventName": "PM_MRK_DATA_FROM_L2_MEPF_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load"
+ },
+ {,
+ "EventCode": "0x44046",
+ "EventName": "PM_INST_FROM_L21_MOD",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Modified (M) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x40B0",
+ "EventName": "PM_BR_PRED_TAKEN_CR",
+ "BriefDescription": "Conditional Branch that had its direction predicted. I-form branches do not set this event. In addition, B-form branches which do not use the BHT do not set this event - these are branches with BO-field set to 'always taken' and branches"
+ },
+ {,
+ "EventCode": "0x15040",
+ "EventName": "PM_IPTEG_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x460A6",
+ "EventName": "PM_RD_FORMING_SC",
+ "BriefDescription": "Doesn't occur"
+ },
+ {,
+ "EventCode": "0x35042",
+ "EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a instruction side request"
+ },
+ {,
+ "EventCode": "0xF898",
+ "EventName": "PM_XLATE_RADIX_MODE",
+ "BriefDescription": "LSU reports every cycle the thread is in radix translation mode (as opposed to HPT mode)"
+ },
+ {,
+ "EventCode": "0x2D142",
+ "EventName": "PM_MRK_DATA_FROM_L3_MEPF",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked load"
+ },
+ {,
+ "EventCode": "0x160B0",
+ "EventName": "PM_L3_P0_NODE_PUMP",
+ "BriefDescription": "L3 PF sent with nodal scope port 0, counts even retried requests"
+ },
+ {,
+ "EventCode": "0xD88C",
+ "EventName": "PM_LSU3_LDMX_FIN",
+ "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])."
+ },
+ {,
+ "EventCode": "0x36882",
+ "EventName": "PM_L2_LD_HIT",
+ "BriefDescription": "All successful D-side-Ld or I-side-instruction-fetch dispatches for this thread that were L2 hits"
+ },
+ {,
+ "EventCode": "0x168AC",
+ "EventName": "PM_L3_CI_USAGE",
+ "BriefDescription": "Rotating sample of 16 CI or CO actives"
+ },
+ {,
+ "EventCode": "0x20134",
+ "EventName": "PM_MRK_FXU_FIN",
+ "BriefDescription": "fxu marked instr finish"
+ },
+ {,
+ "EventCode": "0x4608E",
+ "EventName": "PM_TM_CAP_OVERFLOW",
+ "BriefDescription": "TM Footprint Capacity Overflow"
+ },
+ {,
+ "EventCode": "0x4F05C",
+ "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L3MISS",
+ "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from beyond the core's L3 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation. The source could be local/remote/distant memory or another core's cache"
+ },
+ {,
+ "EventCode": "0x40014",
+ "EventName": "PM_PROBE_NOP_DISP",
+ "BriefDescription": "ProbeNops dispatched"
+ },
+ {,
+ "EventCode": "0x10052",
+ "EventName": "PM_GRP_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x2505E",
+ "EventName": "PM_BACK_BR_CMPL",
+ "BriefDescription": "Branch instruction completed with a target address less than current instruction address"
+ },
+ {,
+ "EventCode": "0x2688A",
+ "EventName": "PM_ISIDE_DISP_FAIL_OTHER",
+ "BriefDescription": "All I-side-instruction-fetch dispatch attempts for this thread that failed due to reasons other than an address collision conflict with an L2 machine (e.g. no available RC/CO machines)"
+ },
+ {,
+ "EventCode": "0x2001A",
+ "EventName": "PM_NTC_ALL_FIN",
+ "BriefDescription": "Cycles after instruction finished to instruction completed."
+ },
+ {,
+ "EventCode": "0x3005A",
+ "EventName": "PM_ISQ_0_8_ENTRIES",
+ "BriefDescription": "Cycles in which 8 or less Issue Queue entries are in use. This is a shared event, not per thread"
+ },
+ {,
+ "EventCode": "0x3515E",
+ "EventName": "PM_MRK_BACK_BR_CMPL",
+ "BriefDescription": "Marked branch instruction completed with a target address less than current instruction address"
+ },
+ {,
+ "EventCode": "0xF890",
+ "EventName": "PM_LSU1_L1_CAM_CANCEL",
+ "BriefDescription": "ls1 l1 tm cam cancel"
+ },
+ {,
+ "EventCode": "0x268AE",
+ "EventName": "PM_L3_P3_PF_RTY",
+ "BriefDescription": "L3 PF received retry port 3, every retry counted"
+ },
+ {,
+ "EventCode": "0xE884",
+ "EventName": "PM_LS1_ERAT_MISS_PREF",
+ "BriefDescription": "LS1 Erat miss due to prefetch"
+ },
+ {,
+ "EventCode": "0xE89C",
+ "EventName": "PM_LSU1_TM_L1_MISS",
+ "BriefDescription": "Load tm L1 miss"
+ },
+ {,
+ "EventCode": "0x28A8",
+ "EventName": "PM_TM_FAIL_CONF_NON_TM",
+ "BriefDescription": "TM aborted because a conflict occurred with a non-transactional access by another processor"
+ },
+ {,
+ "EventCode": "0x16890",
+ "EventName": "PM_L1PF_L2MEMACC",
+ "BriefDescription": "Valid when first beat of data comes in for an L1PF where data came from memory"
+ },
+ {,
+ "EventCode": "0x4504C",
+ "EventName": "PM_IPTEG_FROM_DMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x1002E",
+ "EventName": "PM_LMQ_MERGE",
+ "BriefDescription": "A demand miss collides with a prefetch for the same line"
+ },
+ {,
+ "EventCode": "0x160B6",
+ "EventName": "PM_L3_WI0_BUSY",
+ "BriefDescription": "Rotating sample of 8 WI valid (duplicate)"
+ },
+ {,
+ "EventCode": "0x368AC",
+ "EventName": "PM_L3_CO0_BUSY",
+ "BriefDescription": "Lifetime, sample of CO machine 0 valid"
+ },
+ {,
+ "EventCode": "0x2E040",
+ "EventName": "PM_DPTEG_FROM_L2_MEPF",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 hit without dispatch conflicts on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x1D152",
+ "EventName": "PM_MRK_DATA_FROM_DL4",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a marked load"
+ },
+ {,
+ "EventCode": "0x46880",
+ "EventName": "PM_ISIDE_MRU_TOUCH",
+ "BriefDescription": "I-side L2 MRU touch sent to L2 for this thread I-side L2 MRU touch commands sent to the L2 for this thread"
+ },
+ {,
+ "EventCode": "0x508C",
+ "EventName": "PM_SHL_CREATED",
+ "BriefDescription": "Store-Hit-Load Table Entry Created"
+ },
+ {,
+ "EventCode": "0x50B8",
+ "EventName": "PM_TAGE_OVERRIDE_WRONG",
+ "BriefDescription": "The TAGE overrode BHT direction prediction but it was incorrect. Counted at completion for taken branches only"
+ },
+ {,
+ "EventCode": "0x160AE",
+ "EventName": "PM_L3_P0_PF_RTY",
+ "BriefDescription": "L3 PF received retry port 0, every retry counted"
+ },
+ {,
+ "EventCode": "0x268B2",
+ "EventName": "PM_L3_LOC_GUESS_WRONG",
+ "BriefDescription": "Prefetch scope predictor selected LNS, but was wrong"
+ },
+ {,
+ "EventCode": "0x36088",
+ "EventName": "PM_L2_SYS_GUESS_CORRECT",
+ "BriefDescription": "L2 guess system (VGS or RNS) and guess was correct (ie data beyond-group)"
+ },
+ {,
+ "EventCode": "0x260AE",
+ "EventName": "PM_L3_P2_PF_RTY",
+ "BriefDescription": "L3 PF received retry port 2, every retry counted"
+ },
+ {,
+ "EventCode": "0xD8B0",
+ "EventName": "PM_PTESYNC",
+ "BriefDescription": ""
+ },
+ {,
+ "EventCode": "0x26086",
+ "EventName": "PM_CO_TM_SC_FOOTPRINT",
+ "BriefDescription": "L2 did a cleanifdirty CO to the L3 (ie created an SC line in the L3) OR L2 TM_store hit dirty HPC line and L3 indicated SC line formed in L3 on RDR bus"
+ },
+ {,
+ "EventCode": "0x1E05A",
+ "EventName": "PM_CMPLU_STALL_ANY_SYNC",
+ "BriefDescription": "Cycles in which the NTC sync instruction (isync, lwsync or hwsync) is not allowed to complete"
+ },
+ {,
+ "EventCode": "0xF090",
+ "EventName": "PM_LSU0_L1_CAM_CANCEL",
+ "BriefDescription": "ls0 l1 tm cam cancel"
+ },
+ {,
+ "EventCode": "0xC0A8",
+ "EventName": "PM_LSU_FLUSH_CI",
+ "BriefDescription": "Load was not issued to LSU as a cache inhibited (non-cacheable) load but it was later determined to be cache inhibited"
+ },
+ {,
+ "EventCode": "0x20AC",
+ "EventName": "PM_TM_FAIL_CONF_TM",
+ "BriefDescription": "TM aborted because a conflict occurred with another transaction."
+ },
+ {,
+ "EventCode": "0x588C",
+ "EventName": "PM_SHL_ST_DEP_CREATED",
+ "BriefDescription": "Store-Hit-Load Table Read Hit with entry Enabled"
+ },
+ {,
+ "EventCode": "0x46882",
+ "EventName": "PM_L2_ST_HIT",
+ "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits"
+ },
+ {,
+ "EventCode": "0x360AC",
+ "EventName": "PM_L3_SN0_BUSY",
+ "BriefDescription": "Lifetime, sample of snooper machine 0 valid"
+ },
+ {,
+ "EventCode": "0x3005C",
+ "EventName": "PM_BFU_BUSY",
+ "BriefDescription": "Cycles in which all 4 Binary Floating Point units are busy. The BFU is running at capacity"
+ },
+ {,
+ "EventCode": "0x48A0",
+ "EventName": "PM_BR_PRED_PCACHE",
+ "BriefDescription": "Conditional branch completed that used pattern cache prediction"
+ },
+ {,
+ "EventCode": "0x26880",
+ "EventName": "PM_L2_ST_MISS",
+ "BriefDescription": "All successful D-Side Store dispatches that were an L2 miss for this thread"
+ },
+ {,
+ "EventCode": "0xF8B4",
+ "EventName": "PM_DC_PREF_XCONS_ALLOC",
+ "BriefDescription": "Prefetch stream allocated in the Ultra conservative phase by either the hardware prefetch mechanism or software prefetch"
+ },
+ {,
+ "EventCode": "0x35048",
+ "EventName": "PM_IPTEG_FROM_DL2L3_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x260A8",
+ "EventName": "PM_L3_PF_HIT_L3",
+ "BriefDescription": "L3 PF hit in L3 (abandoned)"
+ },
+ {,
+ "EventCode": "0x360B4",
+ "EventName": "PM_L3_PF0_BUSY",
+ "BriefDescription": "Lifetime, sample of PF machine 0 valid"
+ },
+ {,
+ "EventCode": "0xC0B0",
+ "EventName": "PM_LSU_FLUSH_UE",
+ "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time"
+ },
+ {,
+ "EventCode": "0x4013A",
+ "EventName": "PM_MRK_IC_MISS",
+ "BriefDescription": "Marked instruction experienced I cache miss"
+ },
+ {,
+ "EventCode": "0x2088",
+ "EventName": "PM_FLUSH_DISP_SB",
+ "BriefDescription": "Dispatch Flush: Scoreboard"
+ },
+ {,
+ "EventCode": "0x401E8",
+ "EventName": "PM_MRK_DATA_FROM_L2MISS",
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a marked load"
+ },
+ {,
+ "EventCode": "0x3688E",
+ "EventName": "PM_TM_ST_CAUSED_FAIL",
+ "BriefDescription": "TM Store (fav or non-fav) caused another thread to fail"
+ },
+ {,
+ "EventCode": "0x460B2",
+ "EventName": "PM_L3_SYS_GUESS_WRONG",
+ "BriefDescription": "Prefetch scope predictor selected VGS or RNS, but was wrong"
+ },
+ {,
+ "EventCode": "0x58B8",
+ "EventName": "PM_TAGE_OVERRIDE_WRONG_SPEC",
+ "BriefDescription": "The TAGE overrode BHT direction prediction and it was correct. Includes taken and not taken and is counted at execution time"
+ },
+ {,
+ "EventCode": "0xE890",
+ "EventName": "PM_LSU3_ERAT_HIT",
+ "BriefDescription": "Primary ERAT hit. There is no secondary ERAT"
+ },
+ {,
+ "EventCode": "0x2898",
+ "EventName": "PM_TM_TABORT_TRECLAIM",
+ "BriefDescription": "Completion time tabortnoncd, tabortcd, treclaim"
+ },
+ {,
+ "EventCode": "0x268A0",
+ "EventName": "PM_L3_CO_L31",
+ "BriefDescription": "L3 CO to L3.1 OR of port 0 and 1 (lossy = may undercount if two cresps come in the same cyc)"
+ },
+ {,
+ "EventCode": "0x5080",
+ "EventName": "PM_THRD_PRIO_4_5_CYC",
+ "BriefDescription": "Cycles thread running at priority level 4 or 5"
+ },
+ {,
+ "EventCode": "0x2505C",
+ "EventName": "PM_VSU_FIN",
+ "BriefDescription": "VSU instruction finished. Up to 4 per cycle"
+ },
+ {,
+ "EventCode": "0x40A4",
+ "EventName": "PM_BR_PRED_CCACHE",
+ "BriefDescription": "Conditional Branch Completed that used the Count Cache for Target Prediction"
+ },
+ {,
+ "EventCode": "0x2E04A",
+ "EventName": "PM_DPTEG_FROM_RL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x4D12E",
+ "EventName": "PM_MRK_DATA_FROM_DL2L3_MOD_CYC",
+ "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load"
+ },
+ {,
+ "EventCode": "0xC8B4",
+ "EventName": "PM_LSU_FLUSH_LHL_SHL",
+ "BriefDescription": "The instruction was flushed because of a sequential load/store consistency. If a load or store hits on an older load that has either been snooped (for loads) or has stale data (for stores)."
+ },
+ {,
+ "EventCode": "0x58A4",
+ "EventName": "PM_FLUSH_LSU",
+ "BriefDescription": "LSU flushes. Includes all lsu flushes"
+ },
+ {,
+ "EventCode": "0x1D150",
+ "EventName": "PM_MRK_DATA_FROM_DL2L3_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked load"
+ },
+ {,
+ "EventCode": "0xC8A0",
+ "EventName": "PM_LSU1_FALSE_LHS",
+ "BriefDescription": "False LHS match detected"
+ },
+ {,
+ "EventCode": "0x48BC",
+ "EventName": "PM_THRD_PRIO_2_3_CYC",
+ "BriefDescription": "Cycles thread running at priority level 2 or 3"
+ },
+ {,
+ "EventCode": "0x368B2",
+ "EventName": "PM_L3_GRP_GUESS_WRONG_HIGH",
+ "BriefDescription": "Prefetch scope predictor selected GS or NNS, but was wrong because scope was VGS or RNS"
+ },
+ {,
+ "EventCode": "0xE8BC",
+ "EventName": "PM_LS1_PTE_TABLEWALK_CYC",
+ "BriefDescription": "Cycles when a tablewalk is pending on this thread on table 1"
+ },
+ {,
+ "EventCode": "0x1F152",
+ "EventName": "PM_MRK_FAB_RSP_BKILL_CYC",
+ "BriefDescription": "cycles L2 RC took for a bkill"
+ },
+ {,
+ "EventCode": "0x4C124",
+ "EventName": "PM_MRK_DATA_FROM_L3_NO_CONFLICT_CYC",
+ "BriefDescription": "Duration in cycles to reload from local core's L3 without conflict due to a marked load"
+ },
+ {,
+ "EventCode": "0x2F14A",
+ "EventName": "PM_MRK_DPTEG_FROM_RL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x26888",
+ "EventName": "PM_L2_GRP_GUESS_WRONG",
+ "BriefDescription": "L2 guess grp (GS or NNS) and guess was not correct (ie data on-chip OR beyond-group)"
+ },
+ {,
+ "EventCode": "0xC0AC",
+ "EventName": "PM_LSU_FLUSH_EMSH",
+ "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address"
+ },
+ {,
+ "EventCode": "0x260B2",
+ "EventName": "PM_L3_SYS_GUESS_CORRECT",
+ "BriefDescription": "Prefetch scope predictor selected VGS or RNS and was correct"
+ },
+ {,
+ "EventCode": "0x1D146",
+ "EventName": "PM_MRK_DATA_FROM_MEMORY_CYC",
+ "BriefDescription": "Duration in cycles to reload from a memory location including L4 from local remote or distant due to a marked load"
+ },
+ {,
+ "EventCode": "0xE094",
+ "EventName": "PM_LSU0_TM_L1_HIT",
+ "BriefDescription": "Load tm hit in L1"
+ },
+ {,
+ "EventCode": "0x46888",
+ "EventName": "PM_L2_GROUP_PUMP",
+ "BriefDescription": "RC requests that were on group (aka nodel) pump attempts"
+ },
+ {,
+ "EventCode": "0xC08C",
+ "EventName": "PM_LSU_DTLB_MISS_16M_2M",
+ "BriefDescription": "Data TLB Miss page size 16M (HPT) or 2M (Radix)"
+ },
+ {,
+ "EventCode": "0x16080",
+ "EventName": "PM_L2_LD",
+ "BriefDescription": "All successful D-side Load dispatches for this thread (L2 miss + L2 hits)"
+ },
+ {,
+ "EventCode": "0x4505C",
+ "EventName": "PM_MATH_FLOP_CMPL",
+ "BriefDescription": "Math flop instruction completed"
+ },
+ {,
+ "EventCode": "0xC080",
+ "EventName": "PM_LS0_LD_VECTOR_FIN",
+ "BriefDescription": "LS0 finished load vector op"
+ },
+ {,
+ "EventCode": "0x368B0",
+ "EventName": "PM_L3_P1_SYS_PUMP",
+ "BriefDescription": "L3 PF sent with sys scope port 1, counts even retried requests"
+ },
+ {,
+ "EventCode": "0x1F146",
+ "EventName": "PM_MRK_DPTEG_FROM_L31_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x2000C",
+ "EventName": "PM_THRD_ALL_RUN_CYC",
+ "BriefDescription": "Cycles in which all the threads have the run latch set"
+ },
+ {,
+ "EventCode": "0xC0BC",
+ "EventName": "PM_LSU_FLUSH_OTHER",
+ "BriefDescription": "Other LSU flushes including: Sync (sync ack from L2 caused search of LRQ for oldest snooped load, This will either signal a Precise Flush of the oldest snooped loa or a Flush Next PPC); Data Valid Flush Next (several cases of this, one example is store and reload are lined up such that a store-hit-reload scenario exists and the CDF has already launched and has gotten bad/stale data); Bad Data Valid Flush Next (might be a few cases of this, one example is a larxa (D$ hit) return data and dval but can't allocate to LMQ (LMQ full or other reason). Already gave dval but can't watch it for snoop_hit_larx. Need to take the “bad dval” back and flush all younger ops)"
+ },
+ {,
+ "EventCode": "0x5094",
+ "EventName": "PM_IC_MISS_ICBI",
+ "BriefDescription": "threaded version, IC Misses where we got EA dir hit but no sector valids were on. ICBI took line out"
+ },
+ {,
+ "EventCode": "0xC8A8",
+ "EventName": "PM_LSU_FLUSH_ATOMIC",
+ "BriefDescription": "Quad-word loads (lq) are considered atomic because they always span at least 2 slices. If a snoop or store from another thread changes the data the load is accessing between the 2 or 3 pieces of the lq instruction, the lq will be flushed"
+ },
+ {,
+ "EventCode": "0x1E04E",
+ "EventName": "PM_DPTEG_FROM_L2MISS",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x4D05E",
+ "EventName": "PM_BR_CMPL",
+ "BriefDescription": "Any Branch instruction completed"
+ },
+ {,
+ "EventCode": "0x260B0",
+ "EventName": "PM_L3_P0_GRP_PUMP",
+ "BriefDescription": "L3 PF sent with grp scope port 0, counts even retried requests"
+ },
+ {,
+ "EventCode": "0x30132",
+ "EventName": "PM_MRK_VSU_FIN",
+ "BriefDescription": "VSU marked instr finish"
+ },
+ {,
+ "EventCode": "0x2D120",
+ "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load"
+ },
+ {,
+ "EventCode": "0x1E048",
+ "EventName": "PM_DPTEG_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x16086",
+ "EventName": "PM_L2_SN_M_WR_DONE",
+ "BriefDescription": "SNP dispatched for a write and was M (true M); for DMA cacheinj this will pulse if rty/push is required (won't pulse if cacheinj is accepted)"
+ },
+ {,
+ "EventCode": "0x489C",
+ "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL",
+ "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time"
+ },
+ {,
+ "EventCode": "0xF0B8",
+ "EventName": "PM_LS0_UNALIGNED_ST",
+ "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty"
+ },
+ {,
+ "EventCode": "0x20132",
+ "EventName": "PM_MRK_DFU_FIN",
+ "BriefDescription": "Decimal Unit marked Instruction Finish"
+ },
+ {,
+ "EventCode": "0x160A6",
+ "EventName": "PM_TM_SC_CO",
+ "BriefDescription": "L3 castout of line that was StoreCopy (original value of speculatively written line) in a Transaction"
+ },
+ {,
+ "EventCode": "0xC8B0",
+ "EventName": "PM_LSU_FLUSH_LHS",
+ "BriefDescription": "Effective Address alias flush : no EA match but Real Address match. If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed"
+ },
+ {,
+ "EventCode": "0x16084",
+ "EventName": "PM_L2_RCLD_DISP",
+ "BriefDescription": "All D-side-Ld or I-side-instruction-fetch dispatch attempts for this thread"
+ },
+ {,
+ "EventCode": "0x3F150",
+ "EventName": "PM_MRK_ST_DRAIN_TO_L2DISP_CYC",
+ "BriefDescription": "cycles to drain st from core to L2"
+ },
+ {,
+ "EventCode": "0x168A4",
+ "EventName": "PM_L3_MISS",
+ "BriefDescription": "L3 Misses (L2 miss also missing L3, including data/instrn/xlate)"
+ },
+ {,
+ "EventCode": "0xF080",
+ "EventName": "PM_LSU_STCX_FAIL",
+ "BriefDescription": ""
+ },
+ {,
+ "EventCode": "0x30038",
+ "EventName": "PM_CMPLU_STALL_DMISS_LMEM",
+ "BriefDescription": "Completion stall due to cache miss that resolves in local memory"
+ },
+ {,
+ "EventCode": "0x28A4",
+ "EventName": "PM_MRK_TEND_FAIL",
+ "BriefDescription": "Nested or not nested tend failed for a marked tend instruction"
+ },
+ {,
+ "EventCode": "0x100FC",
+ "EventName": "PM_LD_REF_L1",
+ "BriefDescription": "All L1 D cache load references counted at finish, gated by reject"
+ },
+ {,
+ "EventCode": "0xC0A0",
+ "EventName": "PM_LSU0_FALSE_LHS",
+ "BriefDescription": "False LHS match detected"
+ },
+ {,
+ "EventCode": "0x468A8",
+ "EventName": "PM_SN_MISS",
+ "BriefDescription": "Any port snooper L3 miss or collision. Up to 4 can happen in a cycle but we only count 1"
+ },
+ {,
+ "EventCode": "0x36888",
+ "EventName": "PM_L2_SYS_GUESS_WRONG",
+ "BriefDescription": "L2 guess system (VGS or RNS) and guess was not correct (ie data ^beyond-group)"
+ },
+ {,
+ "EventCode": "0x2080",
+ "EventName": "PM_EE_OFF_EXT_INT",
+ "BriefDescription": "CyclesMSR[EE] is off and external interrupts are active"
+ },
+ {,
+ "EventCode": "0xE8B8",
+ "EventName": "PM_LS3_TM_DISALLOW",
+ "BriefDescription": "A TM-ineligible instruction tries to execute inside a transaction and the LSU disallows it"
+ },
+ {,
+ "EventCode": "0x2688E",
+ "EventName": "PM_TM_FAV_CAUSED_FAIL",
+ "BriefDescription": "TM Load (fav) caused another thread to fail"
+ },
+ {,
+ "EventCode": "0x16090",
+ "EventName": "PM_SN0_BUSY",
+ "BriefDescription": "SN mach 0 Busy. Used by PMU to sample ave SN lifetime (mach0 used as sample point)"
+ },
+ {,
+ "EventCode": "0x360AE",
+ "EventName": "PM_L3_P0_CO_RTY",
+ "BriefDescription": "L3 CO received retry port 0 (memory only), every retry counted"
+ },
+ {,
+ "EventCode": "0x168A8",
+ "EventName": "PM_L3_WI_USAGE",
+ "BriefDescription": "Lifetime, sample of Write Inject machine 0 valid"
+ },
+ {,
+ "EventCode": "0x468A2",
+ "EventName": "PM_L3_LAT_CI_MISS",
+ "BriefDescription": "L3 Lateral Castins Miss"
+ },
+ {,
+ "EventCode": "0x4090",
+ "EventName": "PM_IC_PREF_CANCEL_PAGE",
+ "BriefDescription": "Prefetch Canceled due to page boundary"
+ },
+ {,
+ "EventCode": "0x460AA",
+ "EventName": "PM_L3_P0_CO_L31",
+ "BriefDescription": "L3 CO to L3.1 (LCO) port 0 with or without data"
+ },
+ {,
+ "EventCode": "0x2880",
+ "EventName": "PM_FLUSH_DISP",
+ "BriefDescription": "Dispatch flush"
+ },
+ {,
+ "EventCode": "0x168AE",
+ "EventName": "PM_L3_P1_PF_RTY",
+ "BriefDescription": "L3 PF received retry port 1, every retry counted"
+ },
+ {,
+ "EventCode": "0x46082",
+ "EventName": "PM_L2_ST_DISP",
+ "BriefDescription": "All successful D-side store dispatches for this thread"
+ },
+ {,
+ "EventCode": "0x36880",
+ "EventName": "PM_L2_INST_MISS",
+ "BriefDescription": "All successful I-side-instruction-fetch (e.g. i-demand, i-prefetch) dispatches for this thread that were an L2 miss"
+ },
+ {,
+ "EventCode": "0xE084",
+ "EventName": "PM_LS0_ERAT_MISS_PREF",
+ "BriefDescription": "LS0 Erat miss due to prefetch"
+ },
+ {,
+ "EventCode": "0x409C",
+ "EventName": "PM_BR_PRED",
+ "BriefDescription": "Conditional Branch Executed in which the HW predicted the Direction or Target. Includes taken and not taken and is counted at execution time"
+ },
+ {,
+ "EventCode": "0x2D144",
+ "EventName": "PM_MRK_DATA_FROM_L31_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L3 on the same chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x360A4",
+ "EventName": "PM_L3_CO_LCO",
+ "BriefDescription": "Total L3 COs occurred on LCO L3.1 (good cresp, may end up in mem on a retry)"
+ },
+ {,
+ "EventCode": "0x4890",
+ "EventName": "PM_IC_PREF_CANCEL_HIT",
+ "BriefDescription": "Prefetch Canceled due to icache hit"
+ },
+ {,
+ "EventCode": "0x268A8",
+ "EventName": "PM_RD_HIT_PF",
+ "BriefDescription": "RD machine hit L3 PF machine"
+ },
+ {,
+ "EventCode": "0x16880",
+ "EventName": "PM_L2_ST",
+ "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)"
+ },
+ {,
+ "EventCode": "0x4098",
+ "EventName": "PM_IC_DEMAND_L2_BHT_REDIRECT",
+ "BriefDescription": "L2 I cache demand request due to BHT redirect, branch redirect ( 2 bubbles 3 cycles)"
+ },
+ {,
+ "EventCode": "0xD0B4",
+ "EventName": "PM_LSU0_SRQ_S0_VALID_CYC",
+ "BriefDescription": "Slot 0 of SRQ valid"
+ },
+ {,
+ "EventCode": "0x160AA",
+ "EventName": "PM_L3_P0_LCO_NO_DATA",
+ "BriefDescription": "Dataless L3 LCO sent port 0"
+ },
+ {,
+ "EventCode": "0x208C",
+ "EventName": "PM_CLB_HELD",
+ "BriefDescription": "CLB (control logic block - indicates quadword fetch block) Hold: Any Reason"
+ },
+ {,
+ "EventCode": "0xF88C",
+ "EventName": "PM_LSU3_STORE_REJECT",
+ "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met"
+ },
+ {,
+ "EventCode": "0x200F2",
+ "EventName": "PM_INST_DISP",
+ "BriefDescription": "# PPC Dispatched"
+ },
+ {,
+ "EventCode": "0x4E05E",
+ "EventName": "PM_TM_OUTER_TBEGIN_DISP",
+ "BriefDescription": "Number of outer tbegin instructions dispatched. The dispatch unit determines whether the tbegin instruction is outer or nested. This is a speculative count, which includes flushed instructions"
+ },
+ {,
+ "EventCode": "0x2D018",
+ "EventName": "PM_CMPLU_STALL_EXEC_UNIT",
+ "BriefDescription": "Completion stall due to execution units (FXU/VSU/CRU)"
+ },
+ {,
+ "EventCode": "0x20B0",
+ "EventName": "PM_LSU_FLUSH_NEXT",
+ "BriefDescription": "LSU flush next reported at flush time. Sometimes these also come with an exception"
+ },
+ {,
+ "EventCode": "0x3880",
+ "EventName": "PM_ISU2_ISS_HOLD_ALL",
+ "BriefDescription": "All ISU rejects"
+ },
+ {,
+ "EventCode": "0xC884",
+ "EventName": "PM_LS3_LD_VECTOR_FIN",
+ "BriefDescription": "LS3 finished load vector op"
+ },
+ {,
+ "EventCode": "0x360A8",
+ "EventName": "PM_L3_CO",
+ "BriefDescription": "L3 castout occurring (does not include casthrough or log writes (cinj/dmaw))"
+ },
+ {,
+ "EventCode": "0x368A4",
+ "EventName": "PM_L3_CINJ",
+ "BriefDescription": "L3 castin of cache inject"
+ },
+ {,
+ "EventCode": "0xC890",
+ "EventName": "PM_LSU_NCST",
+ "BriefDescription": "Asserts when a i=1 store op is sent to the nest. No record of issue pipe (LS0/LS1) is maintained so this is for both pipes. Probably don't need separate LS0 and LS1"
+ },
+ {,
+ "EventCode": "0xD0B8",
+ "EventName": "PM_LSU_LMQ_FULL_CYC",
+ "BriefDescription": "Counts the number of cycles the LMQ is full"
+ },
+ {,
+ "EventCode": "0x168B2",
+ "EventName": "PM_L3_GRP_GUESS_CORRECT",
+ "BriefDescription": "Prefetch scope predictor selected GS or NNS and was correct"
+ },
+ {,
+ "EventCode": "0x48A4",
+ "EventName": "PM_STOP_FETCH_PENDING_CYC",
+ "BriefDescription": "Fetching is stopped due to an incoming instruction that will result in a flush"
+ },
+ {,
+ "EventCode": "0x36884",
+ "EventName": "PM_L2_RCST_DISP_FAIL_ADDR",
+ "BriefDescription": "All D-side store dispatch attempts for this thread that failed due to address collision with RC/CO/SN/SQ"
+ },
+ {,
+ "EventCode": "0x260AC",
+ "EventName": "PM_L3_PF_USAGE",
+ "BriefDescription": "Rotating sample of 32 PF actives"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
new file mode 100644
index 000000000..b4772f54a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json
@@ -0,0 +1,532 @@
+[
+ {,
+ "EventCode": "0x4D04C",
+ "EventName": "PM_DFU_BUSY",
+ "BriefDescription": "Cycles in which all 4 Decimal Floating Point units are busy. The DFU is running at capacity"
+ },
+ {,
+ "EventCode": "0x100F6",
+ "EventName": "PM_IERAT_RELOAD",
+ "BriefDescription": "Number of I-ERAT reloads"
+ },
+ {,
+ "EventCode": "0x201E2",
+ "EventName": "PM_MRK_LD_MISS_L1",
+ "BriefDescription": "Marked DL1 Demand Miss counted at exec time. Note that this count is per slice, so if a load spans multiple slices this event will increment multiple times for a single load."
+ },
+ {,
+ "EventCode": "0x40010",
+ "EventName": "PM_PMC3_OVERFLOW",
+ "BriefDescription": "Overflow from counter 3"
+ },
+ {,
+ "EventCode": "0x1005A",
+ "EventName": "PM_CMPLU_STALL_DFLONG",
+ "BriefDescription": "Finish stall because the NTF instruction was a multi-cycle instruction issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Qualified by multicycle"
+ },
+ {,
+ "EventCode": "0x4D140",
+ "EventName": "PM_MRK_DATA_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "The processor's data cache was reloaded either shared or modified data from another core's L2/L3 on the same chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x3F14C",
+ "EventName": "PM_MRK_DPTEG_FROM_DL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x1E040",
+ "EventName": "PM_DPTEG_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x24052",
+ "EventName": "PM_FXU_IDLE",
+ "BriefDescription": "Cycles in which FXU0, FXU1, FXU2, and FXU3 are all idle"
+ },
+ {,
+ "EventCode": "0x1E054",
+ "EventName": "PM_CMPLU_STALL",
+ "BriefDescription": "Nothing completed and ICT not empty"
+ },
+ {,
+ "EventCode": "0x2",
+ "EventName": "PM_INST_CMPL",
+ "BriefDescription": "Number of PowerPC Instructions that completed."
+ },
+ {,
+ "EventCode": "0x3D058",
+ "EventName": "PM_VSU_DP_FSQRT_FDIV",
+ "BriefDescription": "vector versions of fdiv,fsqrt"
+ },
+ {,
+ "EventCode": "0x10006",
+ "EventName": "PM_DISP_HELD",
+ "BriefDescription": "Dispatch Held"
+ },
+ {,
+ "EventCode": "0x200F8",
+ "EventName": "PM_EXT_INT",
+ "BriefDescription": "external interrupt"
+ },
+ {,
+ "EventCode": "0x20008",
+ "EventName": "PM_ICT_EMPTY_CYC",
+ "BriefDescription": "Cycles in which the ICT is completely empty. No itags are assigned to any thread"
+ },
+ {,
+ "EventCode": "0x4F146",
+ "EventName": "PM_MRK_DPTEG_FROM_L21_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x10056",
+ "EventName": "PM_MEM_READ",
+ "BriefDescription": "Reads from Memory from this thread (includes data/inst/xlate/l1prefetch/inst prefetch). Includes L4"
+ },
+ {,
+ "EventCode": "0x3C04C",
+ "EventName": "PM_DATA_FROM_DL4",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's L4 on a different Node or Group (Distant) due to a demand load"
+ },
+ {,
+ "EventCode": "0x4E046",
+ "EventName": "PM_DPTEG_FROM_L21_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L2 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x2E016",
+ "EventName": "PM_NTC_ISSUE_HELD_ARB",
+ "BriefDescription": "The NTC instruction is being held at dispatch because it lost arbitration onto the issue pipe to another instruction (from the same thread or a different thread)"
+ },
+ {,
+ "EventCode": "0x15156",
+ "EventName": "PM_SYNC_MRK_FX_DIVIDE",
+ "BriefDescription": "Marked fixed point divide that can cause a synchronous interrupt"
+ },
+ {,
+ "EventCode": "0x1C056",
+ "EventName": "PM_DERAT_MISS_4K",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 4K"
+ },
+ {,
+ "EventCode": "0x2F142",
+ "EventName": "PM_MRK_DPTEG_FROM_L3_MEPF",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x4C15C",
+ "EventName": "PM_MRK_DERAT_MISS_16G_1G",
+ "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16G (hpt mode) and 1G (radix mode)"
+ },
+ {,
+ "EventCode": "0x10024",
+ "EventName": "PM_PMC5_OVERFLOW",
+ "BriefDescription": "Overflow from counter 5"
+ },
+ {,
+ "EventCode": "0x4505E",
+ "EventName": "PM_FLOP_CMPL",
+ "BriefDescription": "Floating Point Operation Finished"
+ },
+ {,
+ "EventCode": "0x2C018",
+ "EventName": "PM_CMPLU_STALL_DMISS_L21_L31",
+ "BriefDescription": "Completion stall by Dcache miss which resolved on chip ( excluding local L2/L3)"
+ },
+ {,
+ "EventCode": "0x4006A",
+ "EventName": "PM_IERAT_RELOAD_16M",
+ "BriefDescription": "IERAT Reloaded (Miss) for a 16M page"
+ },
+ {,
+ "EventCode": "0x4E010",
+ "EventName": "PM_ICT_NOSLOT_IC_L3MISS",
+ "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache"
+ },
+ {,
+ "EventCode": "0x4D01C",
+ "EventName": "PM_ICT_NOSLOT_DISP_HELD_SYNC",
+ "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch"
+ },
+ {,
+ "EventCode": "0x2D01A",
+ "EventName": "PM_ICT_NOSLOT_IC_MISS",
+ "BriefDescription": "Ict empty for this thread due to Icache Miss"
+ },
+ {,
+ "EventCode": "0x4F14A",
+ "EventName": "PM_MRK_DPTEG_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x30058",
+ "EventName": "PM_TLBIE_FIN",
+ "BriefDescription": "tlbie finished"
+ },
+ {,
+ "EventCode": "0x100F8",
+ "EventName": "PM_ICT_NOSLOT_CYC",
+ "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread"
+ },
+ {,
+ "EventCode": "0x3E042",
+ "EventName": "PM_DPTEG_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x1F140",
+ "EventName": "PM_MRK_DPTEG_FROM_L2_NO_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x1F058",
+ "EventName": "PM_RADIX_PWC_L2_PTE_FROM_L2",
+ "BriefDescription": "A Page Table Entry was reloaded to a level 2 page walk cache from the core's L2 data cache. This implies that level 3 and level 4 PWC accesses were not necessary for this translation"
+ },
+ {,
+ "EventCode": "0x1D14A",
+ "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x10050",
+ "EventName": "PM_CHIP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x45058",
+ "EventName": "PM_IC_MISS_CMPL",
+ "BriefDescription": "Non-speculative icache miss, counted at completion"
+ },
+ {,
+ "EventCode": "0x2D150",
+ "EventName": "PM_MRK_DERAT_MISS_4K",
+ "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 4K"
+ },
+ {,
+ "EventCode": "0x34058",
+ "EventName": "PM_ICT_NOSLOT_BR_MPRED_ICMISS",
+ "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred"
+ },
+ {,
+ "EventCode": "0x10022",
+ "EventName": "PM_PMC2_SAVED",
+ "BriefDescription": "PMC2 Rewind Value saved"
+ },
+ {,
+ "EventCode": "0x2000A",
+ "EventName": "PM_HV_CYC",
+ "BriefDescription": "Cycles in which msr_hv is high. Note that this event does not take msr_pr into consideration"
+ },
+ {,
+ "EventCode": "0x1F144",
+ "EventName": "PM_MRK_DPTEG_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x300FC",
+ "EventName": "PM_DTLB_MISS",
+ "BriefDescription": "Data PTEG reload"
+ },
+ {,
+ "EventCode": "0x2C046",
+ "EventName": "PM_DATA_FROM_RL2L3_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load"
+ },
+ {,
+ "EventCode": "0x20052",
+ "EventName": "PM_GRP_PUMP_MPRED",
+ "BriefDescription": "Final Pump Scope (Group) ended up either larger or smaller than Initial Pump Scope for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x3F05A",
+ "EventName": "PM_RADIX_PWC_L2_PDE_FROM_L3",
+ "BriefDescription": "A Page Directory Entry was reloaded to a level 2 page walk cache from the core's L3 data cache"
+ },
+ {,
+ "EventCode": "0x1E04A",
+ "EventName": "PM_DPTEG_FROM_RL2L3_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x10064",
+ "EventName": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN",
+ "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch"
+ },
+ {,
+ "EventCode": "0x2E046",
+ "EventName": "PM_DPTEG_FROM_RL2L3_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x4F14C",
+ "EventName": "PM_MRK_DPTEG_FROM_DMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x2E042",
+ "EventName": "PM_DPTEG_FROM_L3_MEPF",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 without dispatch conflicts hit on Mepf state. due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x2D012",
+ "EventName": "PM_CMPLU_STALL_DFU",
+ "BriefDescription": "Finish stall because the NTF instruction was issued to the Decimal Floating Point execution pipe and waiting to finish. Includes decimal floating point instructions + 128 bit binary floating point instructions. Not qualified by multicycle"
+ },
+ {,
+ "EventCode": "0x3C054",
+ "EventName": "PM_DERAT_MISS_16M_2M",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 16M (HPT mode) or 2M (Radix mode)"
+ },
+ {,
+ "EventCode": "0x4C04C",
+ "EventName": "PM_DATA_FROM_DMEM",
+ "BriefDescription": "The processor's data cache was reloaded from another chip's memory on the same Node or Group (Distant) due to a demand load"
+ },
+ {,
+ "EventCode": "0x30022",
+ "EventName": "PM_PMC4_SAVED",
+ "BriefDescription": "PMC4 Rewind Value saved (matched condition)"
+ },
+ {,
+ "EventCode": "0x200F4",
+ "EventName": "PM_RUN_CYC",
+ "BriefDescription": "Run_cycles"
+ },
+ {,
+ "EventCode": "0x400F2",
+ "EventName": "PM_1PLUS_PPC_DISP",
+ "BriefDescription": "Cycles at least one Instr Dispatched"
+ },
+ {,
+ "EventCode": "0x3D148",
+ "EventName": "PM_MRK_DATA_FROM_L21_MOD_CYC",
+ "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's L2 on the same chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x2F146",
+ "EventName": "PM_MRK_DPTEG_FROM_RL2L3_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x4E01A",
+ "EventName": "PM_ICT_NOSLOT_DISP_HELD",
+ "BriefDescription": "Cycles in which the NTC instruction is held at dispatch for any reason"
+ },
+ {,
+ "EventCode": "0x401EC",
+ "EventName": "PM_THRESH_EXC_2048",
+ "BriefDescription": "Threshold counter exceeded a value of 2048"
+ },
+ {,
+ "EventCode": "0x35150",
+ "EventName": "PM_MRK_DATA_FROM_RL2L3_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x3E052",
+ "EventName": "PM_ICT_NOSLOT_IC_L3",
+ "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3"
+ },
+ {,
+ "EventCode": "0x2405A",
+ "EventName": "PM_NTC_FIN",
+ "BriefDescription": "Cycles in which the oldest instruction in the pipeline (NTC) finishes. This event is used to account for cycles in which work is being completed in the CPI stack"
+ },
+ {,
+ "EventCode": "0x40052",
+ "EventName": "PM_PUMP_MPRED",
+ "BriefDescription": "Pump misprediction. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x30056",
+ "EventName": "PM_TM_ABORTS",
+ "BriefDescription": "Number of TM transactions aborted"
+ },
+ {,
+ "EventCode": "0x2404C",
+ "EventName": "PM_INST_FROM_MEMORY",
+ "BriefDescription": "The processor's Instruction cache was reloaded from a memory location including L4 from local remote or distant due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x30024",
+ "EventName": "PM_PMC6_OVERFLOW",
+ "BriefDescription": "Overflow from counter 6"
+ },
+ {,
+ "EventCode": "0x10068",
+ "EventName": "PM_BRU_FIN",
+ "BriefDescription": "Branch Instruction Finished"
+ },
+ {,
+ "EventCode": "0x3D154",
+ "EventName": "PM_MRK_DERAT_MISS_16M_2M",
+ "BriefDescription": "Marked Data ERAT Miss (Data TLB Access) page size 16M (hpt mode) or 2M (radix mode)"
+ },
+ {,
+ "EventCode": "0x30020",
+ "EventName": "PM_PMC2_REWIND",
+ "BriefDescription": "PMC2 Rewind Event (did not match condition)"
+ },
+ {,
+ "EventCode": "0x40064",
+ "EventName": "PM_DUMMY2_REMOVE_ME",
+ "BriefDescription": "Space holder for LS_PC_RELOAD_RA"
+ },
+ {,
+ "EventCode": "0x3F148",
+ "EventName": "PM_MRK_DPTEG_FROM_DL2L3_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x4D01E",
+ "EventName": "PM_ICT_NOSLOT_BR_MPRED",
+ "BriefDescription": "Ict empty for this thread due to branch mispred"
+ },
+ {,
+ "EventCode": "0x1F148",
+ "EventName": "PM_MRK_DPTEG_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x3E046",
+ "EventName": "PM_DPTEG_FROM_L21_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L2 on the same chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x2F144",
+ "EventName": "PM_MRK_DPTEG_FROM_L31_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's L3 on the same chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x14052",
+ "EventName": "PM_INST_GRP_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (Group) ended up larger than Initial Pump Scope (Chip) for an instruction fetch"
+ },
+ {,
+ "EventCode": "0xD0A8",
+ "EventName": "PM_DSLB_MISS",
+ "BriefDescription": "gate_and(sd_pc_c0_comp_valid AND sd_pc_c0_comp_thread(0:1)=tid,sd_pc_c0_comp_ppc_count(0:3)) + gate_and(sd_pc_c1_comp_valid AND sd_pc_c1_comp_thread(0:1)=tid,sd_pc_c1_comp_ppc_count(0:3))"
+ },
+ {,
+ "EventCode": "0x4C058",
+ "EventName": "PM_MEM_CO",
+ "BriefDescription": "Memory castouts from this thread"
+ },
+ {,
+ "EventCode": "0x40004",
+ "EventName": "PM_FXU_FIN",
+ "BriefDescription": "The fixed point unit Unit finished an instruction. Instructions that finish may not necessary complete."
+ },
+ {,
+ "EventCode": "0x2C054",
+ "EventName": "PM_DERAT_MISS_64K",
+ "BriefDescription": "Data ERAT Miss (Data TLB Access) page size 64K"
+ },
+ {,
+ "EventCode": "0x10018",
+ "EventName": "PM_IC_DEMAND_CYC",
+ "BriefDescription": "Icache miss demand cycles"
+ },
+ {,
+ "EventCode": "0x2D14E",
+ "EventName": "PM_MRK_DATA_FROM_L21_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x3405C",
+ "EventName": "PM_CMPLU_STALL_DPLONG",
+ "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Qualified by NOT vector AND multicycle"
+ },
+ {,
+ "EventCode": "0x4D052",
+ "EventName": "PM_2FLOP_CMPL",
+ "BriefDescription": "DP vector version of fmul, fsub, fcmp, fsel, fabs, fnabs, fres ,fsqrte, fneg "
+ },
+ {,
+ "EventCode": "0x1F142",
+ "EventName": "PM_MRK_DPTEG_FROM_L2",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x40062",
+ "EventName": "PM_DUMMY1_REMOVE_ME",
+ "BriefDescription": "Space holder for L2_PC_PM_MK_LDST_SCOPE_PRED_STATUS"
+ },
+ {,
+ "EventCode": "0x4C012",
+ "EventName": "PM_CMPLU_STALL_ERAT_MISS",
+ "BriefDescription": "Finish stall because the NTF instruction was a load or store that suffered a translation miss"
+ },
+ {,
+ "EventCode": "0x4D050",
+ "EventName": "PM_VSU_NON_FLOP_CMPL",
+ "BriefDescription": "Non FLOP operation completed"
+ },
+ {,
+ "EventCode": "0x2E012",
+ "EventName": "PM_TM_TX_PASS_RUN_CYC",
+ "BriefDescription": "cycles spent in successful transactions"
+ },
+ {,
+ "EventCode": "0x4D04E",
+ "EventName": "PM_VSU_FSQRT_FDIV",
+ "BriefDescription": "four flops operation (fdiv,fsqrt) Scalar Instructions only"
+ },
+ {,
+ "EventCode": "0x4C120",
+ "EventName": "PM_MRK_DATA_FROM_L2_MEPF",
+ "BriefDescription": "The processor's data cache was reloaded from local core's L2 hit without dispatch conflicts on Mepf state. due to a marked load"
+ },
+ {,
+ "EventCode": "0x10062",
+ "EventName": "PM_LD_L3MISS_PEND_CYC",
+ "BriefDescription": "Cycles L3 miss was pending for this thread"
+ },
+ {,
+ "EventCode": "0x2F14C",
+ "EventName": "PM_MRK_DPTEG_FROM_MEMORY",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x14050",
+ "EventName": "PM_INST_CHIP_PUMP_CPRED",
+ "BriefDescription": "Initial and Final Pump Scope was chip pump (prediction=correct) for an instruction fetch"
+ },
+ {,
+ "EventCode": "0x2000E",
+ "EventName": "PM_FXU_BUSY",
+ "BriefDescription": "Cycles in which all 4 FXUs are busy. The FXU is running at capacity"
+ },
+ {,
+ "EventCode": "0x20066",
+ "EventName": "PM_TLB_MISS",
+ "BriefDescription": "TLB Miss (I + D)"
+ },
+ {,
+ "EventCode": "0x10054",
+ "EventName": "PM_PUMP_CPRED",
+ "BriefDescription": "Pump prediction correct. Counts across all types of pumps for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x4D124",
+ "EventName": "PM_MRK_DATA_FROM_L31_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L3 on the same chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x400F8",
+ "EventName": "PM_FLUSH",
+ "BriefDescription": "Flush (any type)"
+ },
+ {,
+ "EventCode": "0x30004",
+ "EventName": "PM_CMPLU_STALL_EMQ_FULL",
+ "BriefDescription": "Finish stall because the next to finish instruction suffered an ERAT miss and the EMQ was full"
+ },
+ {,
+ "EventCode": "0x1D154",
+ "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC",
+ "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
new file mode 100644
index 000000000..8b3b0f3be
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json
@@ -0,0 +1,117 @@
+[
+ {,
+ "EventCode": "0x20036",
+ "EventName": "PM_BR_2PATH",
+ "BriefDescription": "Branches that are not strongly biased"
+ },
+ {,
+ "EventCode": "0x40056",
+ "EventName": "PM_MEM_LOC_THRESH_LSU_HIGH",
+ "BriefDescription": "Local memory above threshold for LSU medium"
+ },
+ {,
+ "EventCode": "0x40118",
+ "EventName": "PM_MRK_DCACHE_RELOAD_INTV",
+ "BriefDescription": "Combined Intervention event"
+ },
+ {,
+ "EventCode": "0x4F148",
+ "EventName": "PM_MRK_DPTEG_FROM_DL2L3_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x301E8",
+ "EventName": "PM_THRESH_EXC_64",
+ "BriefDescription": "Threshold counter exceeded a value of 64"
+ },
+ {,
+ "EventCode": "0x4E04E",
+ "EventName": "PM_DPTEG_FROM_L3MISS",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L3 due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x40050",
+ "EventName": "PM_SYS_PUMP_MPRED_RTY",
+ "BriefDescription": "Final Pump Scope (system) ended up larger than Initial Pump Scope (Chip/Group) for all data types excluding data prefetch (demand load,inst prefetch,inst fetch,xlate)"
+ },
+ {,
+ "EventCode": "0x1F14E",
+ "EventName": "PM_MRK_DPTEG_FROM_L2MISS",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a location other than the local core's L2 due to a marked data side request.. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x4D018",
+ "EventName": "PM_CMPLU_STALL_BRU",
+ "BriefDescription": "Completion stall due to a Branch Unit"
+ },
+ {,
+ "EventCode": "0x45052",
+ "EventName": "PM_4FLOP_CMPL",
+ "BriefDescription": "4 FLOP instruction completed"
+ },
+ {,
+ "EventCode": "0x3D142",
+ "EventName": "PM_MRK_DATA_FROM_LMEM",
+ "BriefDescription": "The processor's data cache was reloaded from the local chip's Memory due to a marked load"
+ },
+ {,
+ "EventCode": "0x4C01E",
+ "EventName": "PM_CMPLU_STALL_CRYPTO",
+ "BriefDescription": "Finish stall because the NTF instruction was routed to the crypto execution pipe and was waiting to finish"
+ },
+ {,
+ "EventCode": "0x3000C",
+ "EventName": "PM_FREQ_DOWN",
+ "BriefDescription": "Power Management: Below Threshold B"
+ },
+ {,
+ "EventCode": "0x4D128",
+ "EventName": "PM_MRK_DATA_FROM_LMEM_CYC",
+ "BriefDescription": "Duration in cycles to reload from the local chip's Memory due to a marked load"
+ },
+ {,
+ "EventCode": "0x4D054",
+ "EventName": "PM_8FLOP_CMPL",
+ "BriefDescription": "8 FLOP instruction completed"
+ },
+ {,
+ "EventCode": "0x10026",
+ "EventName": "PM_TABLEWALK_CYC",
+ "BriefDescription": "Cycles when an instruction tablewalk is active"
+ },
+ {,
+ "EventCode": "0x2C012",
+ "EventName": "PM_CMPLU_STALL_DCACHE_MISS",
+ "BriefDescription": "Finish stall because the NTF instruction was a load that missed the L1 and was waiting for the data to return from the nest"
+ },
+ {,
+ "EventCode": "0x2E04C",
+ "EventName": "PM_DPTEG_FROM_MEMORY",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x3F142",
+ "EventName": "PM_MRK_DPTEG_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 with dispatch conflict due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x4F142",
+ "EventName": "PM_MRK_DPTEG_FROM_L3",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L3 due to a marked data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x10060",
+ "EventName": "PM_TM_TRANS_RUN_CYC",
+ "BriefDescription": "run cycles in transactional state"
+ },
+ {,
+ "EventCode": "0x1E04C",
+ "EventName": "PM_DPTEG_FROM_LL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from the local chip's L4 cache due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x45050",
+ "EventName": "PM_1FLOP_CMPL",
+ "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/translation.json b/tools/perf/pmu-events/arch/powerpc/power9/translation.json
new file mode 100644
index 000000000..b27642676
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/translation.json
@@ -0,0 +1,227 @@
+[
+ {,
+ "EventCode": "0x1E",
+ "EventName": "PM_CYC",
+ "BriefDescription": "Processor cycles"
+ },
+ {,
+ "EventCode": "0x30010",
+ "EventName": "PM_PMC2_OVERFLOW",
+ "BriefDescription": "Overflow from counter 2"
+ },
+ {,
+ "EventCode": "0x3C046",
+ "EventName": "PM_DATA_FROM_L21_SHR",
+ "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another core's L2 on the same chip due to a demand load"
+ },
+ {,
+ "EventCode": "0x4D05C",
+ "EventName": "PM_DP_QP_FLOP_CMPL",
+ "BriefDescription": "Double-Precion or Quad-Precision instruction completed"
+ },
+ {,
+ "EventCode": "0x4E04C",
+ "EventName": "PM_DPTEG_FROM_DMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group (Distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x20016",
+ "EventName": "PM_ST_FIN",
+ "BriefDescription": "Store finish count. Includes speculative activity"
+ },
+ {,
+ "EventCode": "0x1504A",
+ "EventName": "PM_IPTEG_FROM_RL2L3_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x40132",
+ "EventName": "PM_MRK_LSU_FIN",
+ "BriefDescription": "lsu marked instr PPC finish"
+ },
+ {,
+ "EventCode": "0x3C05C",
+ "EventName": "PM_CMPLU_STALL_VFXU",
+ "BriefDescription": "Finish stall due to a vector fixed point instruction in the execution pipeline. These instructions get routed to the ALU, ALU2, and DIV pipes"
+ },
+ {,
+ "EventCode": "0x30066",
+ "EventName": "PM_LSU_FIN",
+ "BriefDescription": "LSU Finished a PPC instruction (up to 4 per cycle)"
+ },
+ {,
+ "EventCode": "0x2011C",
+ "EventName": "PM_MRK_NTC_CYC",
+ "BriefDescription": "Cycles during which the marked instruction is next to complete (completion is held up because the marked instruction hasn't completed yet)"
+ },
+ {,
+ "EventCode": "0x3E048",
+ "EventName": "PM_DPTEG_FROM_DL2L3_SHR",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x2E018",
+ "EventName": "PM_CMPLU_STALL_VFXLONG",
+ "BriefDescription": "Completion stall due to a long latency vector fixed point instruction (division, square root)"
+ },
+ {,
+ "EventCode": "0x1C04E",
+ "EventName": "PM_DATA_FROM_L2MISS_MOD",
+ "BriefDescription": "The processor's data cache was reloaded from a location other than the local core's L2 due to a demand load"
+ },
+ {,
+ "EventCode": "0x15048",
+ "EventName": "PM_IPTEG_FROM_ON_CHIP_CACHE",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on the same chip due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x34046",
+ "EventName": "PM_INST_FROM_L21_SHR",
+ "BriefDescription": "The processor's Instruction cache was reloaded with Shared (S) data from another core's L2 on the same chip due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x1E058",
+ "EventName": "PM_STCX_FAIL",
+ "BriefDescription": "stcx failed"
+ },
+ {,
+ "EventCode": "0x300F0",
+ "EventName": "PM_ST_MISS_L1",
+ "BriefDescription": "Store Missed L1"
+ },
+ {,
+ "EventCode": "0x4C046",
+ "EventName": "PM_DATA_FROM_L21_MOD",
+ "BriefDescription": "The processor's data cache was reloaded with Modified (M) data from another core's L2 on the same chip due to a demand load"
+ },
+ {,
+ "EventCode": "0x2504A",
+ "EventName": "PM_IPTEG_FROM_RL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on the same Node or Group ( Remote) due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x2003E",
+ "EventName": "PM_LSU_LMQ_SRQ_EMPTY_CYC",
+ "BriefDescription": "Cycles in which the LSU is empty for all threads (lmq and srq are completely empty)"
+ },
+ {,
+ "EventCode": "0x201E6",
+ "EventName": "PM_THRESH_EXC_32",
+ "BriefDescription": "Threshold counter exceeded a value of 32"
+ },
+ {,
+ "EventCode": "0x4405C",
+ "EventName": "PM_CMPLU_STALL_VDP",
+ "BriefDescription": "Finish stall because the NTF instruction was a vector instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Not qualified multicycle. Qualified by vector"
+ },
+ {,
+ "EventCode": "0x4D010",
+ "EventName": "PM_PMC1_SAVED",
+ "BriefDescription": "PMC1 Rewind Value saved"
+ },
+ {,
+ "EventCode": "0x44042",
+ "EventName": "PM_INST_FROM_L3",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x200FE",
+ "EventName": "PM_DATA_FROM_L2MISS",
+ "BriefDescription": "Demand LD - L2 Miss (not L2 hit)"
+ },
+ {,
+ "EventCode": "0x2D14A",
+ "EventName": "PM_MRK_DATA_FROM_RL2L3_MOD_CYC",
+ "BriefDescription": "Duration in cycles to reload with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x10028",
+ "EventName": "PM_STALL_END_ICT_EMPTY",
+ "BriefDescription": "The number a times the core transitioned from a stall to ICT-empty for this thread"
+ },
+ {,
+ "EventCode": "0x2504C",
+ "EventName": "PM_IPTEG_FROM_MEMORY",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from a memory location including L4 from local remote or distant due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x4504A",
+ "EventName": "PM_IPTEG_FROM_OFF_CHIP_CACHE",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x1404E",
+ "EventName": "PM_INST_FROM_L2MISS",
+ "BriefDescription": "The processor's Instruction cache was reloaded from a location other than the local core's L2 due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x34042",
+ "EventName": "PM_INST_FROM_L3_DISP_CONFLICT",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 with dispatch conflict due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x4E048",
+ "EventName": "PM_DPTEG_FROM_DL2L3_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on a different Node or Group (Distant), as this chip due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x200F0",
+ "EventName": "PM_ST_CMPL",
+ "BriefDescription": "Stores completed from S2Q (2nd-level store queue)."
+ },
+ {,
+ "EventCode": "0x4E05C",
+ "EventName": "PM_LSU_REJECT_LHS",
+ "BriefDescription": "LSU Reject due to LHS (up to 4 per cycle)"
+ },
+ {,
+ "EventCode": "0x14044",
+ "EventName": "PM_INST_FROM_L3_NO_CONFLICT",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without conflict due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x3E04C",
+ "EventName": "PM_DPTEG_FROM_DL4",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's L4 on a different Node or Group (Distant) due to a data side request. When using Radix Page Translation, this count excludes PDE reloads. Only PTE reloads are included"
+ },
+ {,
+ "EventCode": "0x1F15E",
+ "EventName": "PM_MRK_PROBE_NOP_CMPL",
+ "BriefDescription": "Marked probeNops completed"
+ },
+ {,
+ "EventCode": "0x20018",
+ "EventName": "PM_ST_FWD",
+ "BriefDescription": "Store forwards that finished"
+ },
+ {,
+ "EventCode": "0x1D142",
+ "EventName": "PM_MRK_DATA_FROM_L31_ECO_SHR_CYC",
+ "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's ECO L3 on the same chip due to a marked load"
+ },
+ {,
+ "EventCode": "0x24042",
+ "EventName": "PM_INST_FROM_L3_MEPF",
+ "BriefDescription": "The processor's Instruction cache was reloaded from local core's L3 without dispatch conflicts hit on Mepf state. due to an instruction fetch (not prefetch)"
+ },
+ {,
+ "EventCode": "0x25046",
+ "EventName": "PM_IPTEG_FROM_RL2L3_MOD",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x3504A",
+ "EventName": "PM_IPTEG_FROM_RMEM",
+ "BriefDescription": "A Page Table Entry was loaded into the TLB from another chip's memory on the same Node or Group ( Remote) due to a instruction side request"
+ },
+ {,
+ "EventCode": "0x3C05A",
+ "EventName": "PM_CMPLU_STALL_VDPLONG",
+ "BriefDescription": "Finish stall because the NTF instruction was a scalar multi-cycle instruction issued to the Double Precision execution pipe and waiting to finish. Includes binary floating point instructions in 32 and 64 bit binary floating point format. Qualified by NOT vector AND multicycle"
+ },
+ {,
+ "EventCode": "0x2E01C",
+ "EventName": "PM_CMPLU_STALL_TLBIE",
+ "BriefDescription": "Finish stall because the NTF instruction was a tlbie waiting for response from L2"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/basic.json b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json
new file mode 100644
index 000000000..2dd8dafff
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z10/basic.json
@@ -0,0 +1,86 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "0",
+ "EventName": "CPU_CYCLES",
+ "BriefDescription": "CPU Cycles",
+ "PublicDescription": "Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "1",
+ "EventName": "INSTRUCTIONS",
+ "BriefDescription": "Instructions",
+ "PublicDescription": "Instruction Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "2",
+ "EventName": "L1I_DIR_WRITES",
+ "BriefDescription": "L1I Directory Writes",
+ "PublicDescription": "Level-1 I-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "3",
+ "EventName": "L1I_PENALTY_CYCLES",
+ "BriefDescription": "L1I Penalty Cycles",
+ "PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "4",
+ "EventName": "L1D_DIR_WRITES",
+ "BriefDescription": "L1D Directory Writes",
+ "PublicDescription": "Level-1 D-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "5",
+ "EventName": "L1D_PENALTY_CYCLES",
+ "BriefDescription": "L1D Penalty Cycles",
+ "PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "32",
+ "EventName": "PROBLEM_STATE_CPU_CYCLES",
+ "BriefDescription": "Problem-State CPU Cycles",
+ "PublicDescription": "Problem-State Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "33",
+ "EventName": "PROBLEM_STATE_INSTRUCTIONS",
+ "BriefDescription": "Problem-State Instructions",
+ "PublicDescription": "Problem-State Instruction Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "34",
+ "EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
+ "BriefDescription": "Problem-State L1I Directory Writes",
+ "PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "35",
+ "EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
+ "BriefDescription": "Problem-State L1I Penalty Cycles",
+ "PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "36",
+ "EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
+ "BriefDescription": "Problem-State L1D Directory Writes",
+ "PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "37",
+ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
+ "BriefDescription": "Problem-State L1D Penalty Cycles",
+ "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json
new file mode 100644
index 000000000..db286f19e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z10/crypto.json
@@ -0,0 +1,114 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "64",
+ "EventName": "PRNG_FUNCTIONS",
+ "BriefDescription": "PRNG Functions",
+ "PublicDescription": "Total number of the PRNG functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "65",
+ "EventName": "PRNG_CYCLES",
+ "BriefDescription": "PRNG Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "66",
+ "EventName": "PRNG_BLOCKED_FUNCTIONS",
+ "BriefDescription": "PRNG Blocked Functions",
+ "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "67",
+ "EventName": "PRNG_BLOCKED_CYCLES",
+ "BriefDescription": "PRNG Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "68",
+ "EventName": "SHA_FUNCTIONS",
+ "BriefDescription": "SHA Functions",
+ "PublicDescription": "Total number of SHA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "69",
+ "EventName": "SHA_CYCLES",
+ "BriefDescription": "SHA Cycles",
+ "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "70",
+ "EventName": "SHA_BLOCKED_FUNCTIONS",
+ "BriefDescription": "SHA Blocked Functions",
+ "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "71",
+ "EventName": "SHA_BLOCKED_CYCLES",
+ "BriefDescription": "SHA Bloced Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "72",
+ "EventName": "DEA_FUNCTIONS",
+ "BriefDescription": "DEA Functions",
+ "PublicDescription": "Total number of the DEA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "73",
+ "EventName": "DEA_CYCLES",
+ "BriefDescription": "DEA Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "74",
+ "EventName": "DEA_BLOCKED_FUNCTIONS",
+ "BriefDescription": "DEA Blocked Functions",
+ "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "75",
+ "EventName": "DEA_BLOCKED_CYCLES",
+ "BriefDescription": "DEA Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "76",
+ "EventName": "AES_FUNCTIONS",
+ "BriefDescription": "AES Functions",
+ "PublicDescription": "Total number of AES functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "77",
+ "EventName": "AES_CYCLES",
+ "BriefDescription": "AES Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "78",
+ "EventName": "AES_BLOCKED_FUNCTIONS",
+ "BriefDescription": "AES Blocked Functions",
+ "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "79",
+ "EventName": "AES_BLOCKED_CYCLES",
+ "BriefDescription": "AES Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z10/extended.json b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json
new file mode 100644
index 000000000..b6b7f29ca
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z10/extended.json
@@ -0,0 +1,128 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "128",
+ "EventName": "L1I_L2_SOURCED_WRITES",
+ "BriefDescription": "L1I L2 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from the Level-2 (L1.5) cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "129",
+ "EventName": "L1D_L2_SOURCED_WRITES",
+ "BriefDescription": "L1D L2 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from the Level-2 (L1.5) cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "130",
+ "EventName": "L1I_L3_LOCAL_WRITES",
+ "BriefDescription": "L1I L3 Local Writes",
+ "PublicDescription": "A directory write to the Level-1 I-Cache directory where the installed cache line was sourced from the Level-3 cache that is on the same book as the Instruction cache (Local L2 cache)"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "131",
+ "EventName": "L1D_L3_LOCAL_WRITES",
+ "BriefDescription": "L1D L3 Local Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache directory where the installtion cache line was source from the Level-3 cache that is on the same book as the Data cache (Local L2 cache)"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "132",
+ "EventName": "L1I_L3_REMOTE_WRITES",
+ "BriefDescription": "L1I L3 Remote Writes",
+ "PublicDescription": "A directory write to the Level-1 I-Cache directory where the installed cache line was sourced from a Level-3 cache that is not on the same book as the Instruction cache (Remote L2 cache)"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "133",
+ "EventName": "L1D_L3_REMOTE_WRITES",
+ "BriefDescription": "L1D L3 Remote Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from a Level-3 cache that is not on the same book as the Data cache (Remote L2 cache)"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "134",
+ "EventName": "L1D_LMEM_SOURCED_WRITES",
+ "BriefDescription": "L1D Local Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache directory where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "135",
+ "EventName": "L1I_LMEM_SOURCED_WRITES",
+ "BriefDescription": "L1I Local Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 I-Cache where the installed cache line was sourced from memory that is attached to the s ame book as the Instruction cache (Local Memory)"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "136",
+ "EventName": "L1D_RO_EXCL_WRITES",
+ "BriefDescription": "L1D Read-only Exclusive Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "137",
+ "EventName": "L1I_CACHELINE_INVALIDATES",
+ "BriefDescription": "L1I Cacheline Invalidates",
+ "PublicDescription": "A cache line in the Level-1 I-Cache has been invalidated by a store on the same CPU as the Level-1 I-Cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "138",
+ "EventName": "ITLB1_WRITES",
+ "BriefDescription": "ITLB1 Writes",
+ "PublicDescription": "A translation entry has been written into the Level-1 Instruction Translation Lookaside Buffer"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "139",
+ "EventName": "DTLB1_WRITES",
+ "BriefDescription": "DTLB1 Writes",
+ "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "140",
+ "EventName": "TLB2_PTE_WRITES",
+ "BriefDescription": "TLB2 PTE Writes",
+ "PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "141",
+ "EventName": "TLB2_CRSTE_WRITES",
+ "BriefDescription": "TLB2 CRSTE Writes",
+ "PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "142",
+ "EventName": "TLB2_CRSTE_HPAGE_WRITES",
+ "BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
+ "PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "145",
+ "EventName": "ITLB1_MISSES",
+ "BriefDescription": "ITLB1 Misses",
+ "PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle an ITLB1 miss is in progress"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "146",
+ "EventName": "DTLB1_MISSES",
+ "BriefDescription": "DTLB1 Misses",
+ "PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle an DTLB1 miss is in progress"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "147",
+ "EventName": "L2C_STORES_SENT",
+ "BriefDescription": "L2C Stores Sent",
+ "PublicDescription": "Incremented by one for every store sent to Level-2 (L1.5) cache"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/basic.json b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json
new file mode 100644
index 000000000..2dd8dafff
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z13/basic.json
@@ -0,0 +1,86 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "0",
+ "EventName": "CPU_CYCLES",
+ "BriefDescription": "CPU Cycles",
+ "PublicDescription": "Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "1",
+ "EventName": "INSTRUCTIONS",
+ "BriefDescription": "Instructions",
+ "PublicDescription": "Instruction Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "2",
+ "EventName": "L1I_DIR_WRITES",
+ "BriefDescription": "L1I Directory Writes",
+ "PublicDescription": "Level-1 I-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "3",
+ "EventName": "L1I_PENALTY_CYCLES",
+ "BriefDescription": "L1I Penalty Cycles",
+ "PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "4",
+ "EventName": "L1D_DIR_WRITES",
+ "BriefDescription": "L1D Directory Writes",
+ "PublicDescription": "Level-1 D-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "5",
+ "EventName": "L1D_PENALTY_CYCLES",
+ "BriefDescription": "L1D Penalty Cycles",
+ "PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "32",
+ "EventName": "PROBLEM_STATE_CPU_CYCLES",
+ "BriefDescription": "Problem-State CPU Cycles",
+ "PublicDescription": "Problem-State Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "33",
+ "EventName": "PROBLEM_STATE_INSTRUCTIONS",
+ "BriefDescription": "Problem-State Instructions",
+ "PublicDescription": "Problem-State Instruction Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "34",
+ "EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
+ "BriefDescription": "Problem-State L1I Directory Writes",
+ "PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "35",
+ "EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
+ "BriefDescription": "Problem-State L1I Penalty Cycles",
+ "PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "36",
+ "EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
+ "BriefDescription": "Problem-State L1D Directory Writes",
+ "PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "37",
+ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
+ "BriefDescription": "Problem-State L1D Penalty Cycles",
+ "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json
new file mode 100644
index 000000000..db286f19e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z13/crypto.json
@@ -0,0 +1,114 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "64",
+ "EventName": "PRNG_FUNCTIONS",
+ "BriefDescription": "PRNG Functions",
+ "PublicDescription": "Total number of the PRNG functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "65",
+ "EventName": "PRNG_CYCLES",
+ "BriefDescription": "PRNG Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "66",
+ "EventName": "PRNG_BLOCKED_FUNCTIONS",
+ "BriefDescription": "PRNG Blocked Functions",
+ "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "67",
+ "EventName": "PRNG_BLOCKED_CYCLES",
+ "BriefDescription": "PRNG Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "68",
+ "EventName": "SHA_FUNCTIONS",
+ "BriefDescription": "SHA Functions",
+ "PublicDescription": "Total number of SHA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "69",
+ "EventName": "SHA_CYCLES",
+ "BriefDescription": "SHA Cycles",
+ "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "70",
+ "EventName": "SHA_BLOCKED_FUNCTIONS",
+ "BriefDescription": "SHA Blocked Functions",
+ "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "71",
+ "EventName": "SHA_BLOCKED_CYCLES",
+ "BriefDescription": "SHA Bloced Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "72",
+ "EventName": "DEA_FUNCTIONS",
+ "BriefDescription": "DEA Functions",
+ "PublicDescription": "Total number of the DEA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "73",
+ "EventName": "DEA_CYCLES",
+ "BriefDescription": "DEA Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "74",
+ "EventName": "DEA_BLOCKED_FUNCTIONS",
+ "BriefDescription": "DEA Blocked Functions",
+ "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "75",
+ "EventName": "DEA_BLOCKED_CYCLES",
+ "BriefDescription": "DEA Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "76",
+ "EventName": "AES_FUNCTIONS",
+ "BriefDescription": "AES Functions",
+ "PublicDescription": "Total number of AES functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "77",
+ "EventName": "AES_CYCLES",
+ "BriefDescription": "AES Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "78",
+ "EventName": "AES_BLOCKED_FUNCTIONS",
+ "BriefDescription": "AES Blocked Functions",
+ "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "79",
+ "EventName": "AES_BLOCKED_CYCLES",
+ "BriefDescription": "AES Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/extended.json b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json
new file mode 100644
index 000000000..436ce33f1
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z13/extended.json
@@ -0,0 +1,394 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "128",
+ "EventName": "L1D_RO_EXCL_WRITES",
+ "BriefDescription": "L1D Read-only Exclusive Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line."
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "129",
+ "EventName": "DTLB1_WRITES",
+ "BriefDescription": "DTLB1 Writes",
+ "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "130",
+ "EventName": "DTLB1_MISSES",
+ "BriefDescription": "DTLB1 Misses",
+ "PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress."
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "131",
+ "EventName": "DTLB1_HPAGE_WRITES",
+ "BriefDescription": "DTLB1 One-Megabyte Page Writes",
+ "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "132",
+ "EventName": "DTLB1_GPAGE_WRITES",
+ "BriefDescription": "DTLB1 Two-Gigabyte Page Writes",
+ "PublicDescription": "Counter:132 Name:DTLB1_GPAGE_WRITES A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a two-gigabyte page."
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "133",
+ "EventName": "L1D_L2D_SOURCED_WRITES",
+ "BriefDescription": "L1D L2D Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "134",
+ "EventName": "ITLB1_WRITES",
+ "BriefDescription": "ITLB1 Writes",
+ "PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "135",
+ "EventName": "ITLB1_MISSES",
+ "BriefDescription": "ITLB1 Misses",
+ "PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle an ITLB1 miss is in progress"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "136",
+ "EventName": "L1I_L2I_SOURCED_WRITES",
+ "BriefDescription": "L1I L2I Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "137",
+ "EventName": "TLB2_PTE_WRITES",
+ "BriefDescription": "TLB2 PTE Writes",
+ "PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "138",
+ "EventName": "TLB2_CRSTE_HPAGE_WRITES",
+ "BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
+ "PublicDescription": "A translation entry has been written to the Level-2 TLB Combined Region Segment Table Entry arrays for a one-megabyte large page translation"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "139",
+ "EventName": "TLB2_CRSTE_WRITES",
+ "BriefDescription": "TLB2 CRSTE Writes",
+ "PublicDescription": "A translation entry has been written to the Level-2 TLB Combined Region Segment Table Entry arrays"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "140",
+ "EventName": "TX_C_TEND",
+ "BriefDescription": "Completed TEND instructions in constrained TX mode",
+ "PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "141",
+ "EventName": "TX_NC_TEND",
+ "BriefDescription": "Completed TEND instructions in non-constrained TX mode",
+ "PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "143",
+ "EventName": "L1C_TLB1_MISSES",
+ "BriefDescription": "L1C TLB1 Misses",
+ "PublicDescription": "Increments by one for any cycle where a Level-1 cache or Level-1 TLB miss is in progress."
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "144",
+ "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "145",
+ "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "146",
+ "EventName": "L1D_ONNODE_L4_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Node L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "147",
+ "EventName": "L1D_ONNODE_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D On-Node L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "148",
+ "EventName": "L1D_ONNODE_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Node L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Node Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "149",
+ "EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Drawer L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "150",
+ "EventName": "L1D_ONDRAWER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D On-Drawer L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "151",
+ "EventName": "L1D_ONDRAWER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Drawer L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "152",
+ "EventName": "L1D_OFFDRAWER_SCOL_L4_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Drawer Same-Column L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "153",
+ "EventName": "L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D Off-Drawer Same-Column L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "154",
+ "EventName": "L1D_OFFDRAWER_SCOL_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Drawer Same-Column L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "155",
+ "EventName": "L1D_OFFDRAWER_FCOL_L4_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "156",
+ "EventName": "L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "157",
+ "EventName": "L1D_OFFDRAWER_FCOL_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Drawer Far-Column L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "158",
+ "EventName": "L1D_ONNODE_MEM_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Node Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Node memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "159",
+ "EventName": "L1D_ONDRAWER_MEM_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Drawer Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "160",
+ "EventName": "L1D_OFFDRAWER_MEM_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Drawer Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "161",
+ "EventName": "L1D_ONCHIP_MEM_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Chip Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "162",
+ "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "163",
+ "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "164",
+ "EventName": "L1I_ONNODE_L4_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Chip L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "165",
+ "EventName": "L1I_ONNODE_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I On-Node L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "166",
+ "EventName": "L1I_ONNODE_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Node L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Node Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "167",
+ "EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Drawer L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "168",
+ "EventName": "L1I_ONDRAWER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I On-Drawer L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "169",
+ "EventName": "L1I_ONDRAWER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Drawer L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Drawer Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "170",
+ "EventName": "L1I_OFFDRAWER_SCOL_L4_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Drawer Same-Column L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "171",
+ "EventName": "L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I Off-Drawer Same-Column L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "172",
+ "EventName": "L1I_OFFDRAWER_SCOL_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Drawer Same-Column L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Same-Column Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "173",
+ "EventName": "L1I_OFFDRAWER_FCOL_L4_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Drawer Far-Column L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "174",
+ "EventName": "L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I Off-Drawer Far-Column L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "175",
+ "EventName": "L1I_OFFDRAWER_FCOL_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Drawer Far-Column L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Far-Column Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "176",
+ "EventName": "L1I_ONNODE_MEM_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Node Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Node memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "177",
+ "EventName": "L1I_ONDRAWER_MEM_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Drawer Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "178",
+ "EventName": "L1I_OFFDRAWER_MEM_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Drawer Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "179",
+ "EventName": "L1I_ONCHIP_MEM_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Chip Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Chip memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "218",
+ "EventName": "TX_NC_TABORT",
+ "BriefDescription": "Aborted transactions in non-constrained TX mode",
+ "PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "219",
+ "EventName": "TX_C_TABORT_NO_SPECIAL",
+ "BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
+ "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "220",
+ "EventName": "TX_C_TABORT_SPECIAL",
+ "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
+ "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "448",
+ "EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
+ "BriefDescription": "Cycle count with one thread active",
+ "PublicDescription": "Cycle count with one thread active"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "449",
+ "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE",
+ "BriefDescription": "Cycle count with two threads active",
+ "PublicDescription": "Cycle count with two threads active"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z13/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z13/transaction.json
new file mode 100644
index 000000000..1a0034f79
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z13/transaction.json
@@ -0,0 +1,7 @@
+[
+ {
+ "BriefDescription": "Transaction count",
+ "MetricName": "transaction",
+ "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/basic.json b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json
new file mode 100644
index 000000000..17fb52419
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z14/basic.json
@@ -0,0 +1,58 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "0",
+ "EventName": "CPU_CYCLES",
+ "BriefDescription": "CPU Cycles",
+ "PublicDescription": "Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "1",
+ "EventName": "INSTRUCTIONS",
+ "BriefDescription": "Instructions",
+ "PublicDescription": "Instruction Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "2",
+ "EventName": "L1I_DIR_WRITES",
+ "BriefDescription": "L1I Directory Writes",
+ "PublicDescription": "Level-1 I-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "3",
+ "EventName": "L1I_PENALTY_CYCLES",
+ "BriefDescription": "L1I Penalty Cycles",
+ "PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "4",
+ "EventName": "L1D_DIR_WRITES",
+ "BriefDescription": "L1D Directory Writes",
+ "PublicDescription": "Level-1 D-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "5",
+ "EventName": "L1D_PENALTY_CYCLES",
+ "BriefDescription": "L1D Penalty Cycles",
+ "PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "32",
+ "EventName": "PROBLEM_STATE_CPU_CYCLES",
+ "BriefDescription": "Problem-State CPU Cycles",
+ "PublicDescription": "Problem-State Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "33",
+ "EventName": "PROBLEM_STATE_INSTRUCTIONS",
+ "BriefDescription": "Problem-State Instructions",
+ "PublicDescription": "Problem-State Instruction Count"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json
new file mode 100644
index 000000000..db286f19e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z14/crypto.json
@@ -0,0 +1,114 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "64",
+ "EventName": "PRNG_FUNCTIONS",
+ "BriefDescription": "PRNG Functions",
+ "PublicDescription": "Total number of the PRNG functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "65",
+ "EventName": "PRNG_CYCLES",
+ "BriefDescription": "PRNG Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "66",
+ "EventName": "PRNG_BLOCKED_FUNCTIONS",
+ "BriefDescription": "PRNG Blocked Functions",
+ "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "67",
+ "EventName": "PRNG_BLOCKED_CYCLES",
+ "BriefDescription": "PRNG Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "68",
+ "EventName": "SHA_FUNCTIONS",
+ "BriefDescription": "SHA Functions",
+ "PublicDescription": "Total number of SHA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "69",
+ "EventName": "SHA_CYCLES",
+ "BriefDescription": "SHA Cycles",
+ "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "70",
+ "EventName": "SHA_BLOCKED_FUNCTIONS",
+ "BriefDescription": "SHA Blocked Functions",
+ "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "71",
+ "EventName": "SHA_BLOCKED_CYCLES",
+ "BriefDescription": "SHA Bloced Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "72",
+ "EventName": "DEA_FUNCTIONS",
+ "BriefDescription": "DEA Functions",
+ "PublicDescription": "Total number of the DEA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "73",
+ "EventName": "DEA_CYCLES",
+ "BriefDescription": "DEA Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "74",
+ "EventName": "DEA_BLOCKED_FUNCTIONS",
+ "BriefDescription": "DEA Blocked Functions",
+ "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "75",
+ "EventName": "DEA_BLOCKED_CYCLES",
+ "BriefDescription": "DEA Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "76",
+ "EventName": "AES_FUNCTIONS",
+ "BriefDescription": "AES Functions",
+ "PublicDescription": "Total number of AES functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "77",
+ "EventName": "AES_CYCLES",
+ "BriefDescription": "AES Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "78",
+ "EventName": "AES_BLOCKED_FUNCTIONS",
+ "BriefDescription": "AES Blocked Functions",
+ "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "79",
+ "EventName": "AES_BLOCKED_CYCLES",
+ "BriefDescription": "AES Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/extended.json b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json
new file mode 100644
index 000000000..e7a3524b7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z14/extended.json
@@ -0,0 +1,373 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "128",
+ "EventName": "L1D_RO_EXCL_WRITES",
+ "BriefDescription": "L1D Read-only Exclusive Writes",
+ "PublicDescription": "Counter:128 Name:L1D_RO_EXCL_WRITES A directory write to the Level-1 Data cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "129",
+ "EventName": "DTLB2_WRITES",
+ "BriefDescription": "DTLB2 Writes",
+ "PublicDescription": "A translation has been written into The Translation Lookaside Buffer 2 (TLB2) and the request was made by the data cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "130",
+ "EventName": "DTLB2_MISSES",
+ "BriefDescription": "DTLB2 Misses",
+ "PublicDescription": "A TLB2 miss is in progress for a request made by the data cache. Incremented by one for every TLB2 miss in progress for the Level-1 Data cache on this cycle"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "131",
+ "EventName": "DTLB2_HPAGE_WRITES",
+ "BriefDescription": "DTLB2 One-Megabyte Page Writes",
+ "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "132",
+ "EventName": "DTLB2_GPAGE_WRITES",
+ "BriefDescription": "DTLB2 Two-Gigabyte Page Writes",
+ "PublicDescription": "A translation entry for a two-gigabyte page was written into the Level-2 TLB"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "133",
+ "EventName": "L1D_L2D_SOURCED_WRITES",
+ "BriefDescription": "L1D L2D Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "134",
+ "EventName": "ITLB2_WRITES",
+ "BriefDescription": "ITLB2 Writes",
+ "PublicDescription": "A translation entry has been written into the Translation Lookaside Buffer 2 (TLB2) and the request was made by the instruction cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "135",
+ "EventName": "ITLB2_MISSES",
+ "BriefDescription": "ITLB2 Misses",
+ "PublicDescription": "A TLB2 miss is in progress for a request made by the instruction cache. Incremented by one for every TLB2 miss in progress for the Level-1 Instruction cache in a cycle"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "136",
+ "EventName": "L1I_L2I_SOURCED_WRITES",
+ "BriefDescription": "L1I L2I Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "137",
+ "EventName": "TLB2_PTE_WRITES",
+ "BriefDescription": "TLB2 PTE Writes",
+ "PublicDescription": "A translation entry was written into the Page Table Entry array in the Level-2 TLB"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "138",
+ "EventName": "TLB2_CRSTE_WRITES",
+ "BriefDescription": "TLB2 CRSTE Writes",
+ "PublicDescription": "Translation entries were written into the Combined Region and Segment Table Entry array and the Page Table Entry array in the Level-2 TLB"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "139",
+ "EventName": "TLB2_ENGINES_BUSY",
+ "BriefDescription": "TLB2 Engines Busy",
+ "PublicDescription": "The number of Level-2 TLB translation engines busy in a cycle"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "140",
+ "EventName": "TX_C_TEND",
+ "BriefDescription": "Completed TEND instructions in constrained TX mode",
+ "PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "141",
+ "EventName": "TX_NC_TEND",
+ "BriefDescription": "Completed TEND instructions in non-constrained TX mode",
+ "PublicDescription": "A TEND instruction has completed in a non-constrained transactional-execution mode"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "143",
+ "EventName": "L1C_TLB2_MISSES",
+ "BriefDescription": "L1C TLB2 Misses",
+ "PublicDescription": "Increments by one for any cycle where a level-1 cache or level-2 TLB miss is in progress"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "144",
+ "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "145",
+ "EventName": "L1D_ONCHIP_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Chip Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "146",
+ "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Chip Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "147",
+ "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Cluster L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Cluster Level-3 cache withountervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "148",
+ "EventName": "L1D_ONCLUSTER_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Cluster Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "149",
+ "EventName": "L1D_ONCLUSTER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D On-Cluster L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "150",
+ "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Cluster L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "151",
+ "EventName": "L1D_OFFCLUSTER_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Cluster Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Cluster memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "152",
+ "EventName": "L1D_OFFCLUSTER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D Off-Cluster L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "153",
+ "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Drawer L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "154",
+ "EventName": "L1D_OFFDRAWER_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Drawer Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "155",
+ "EventName": "L1D_OFFDRAWER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D Off-Drawer L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "156",
+ "EventName": "L1D_ONDRAWER_L4_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Drawer L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "157",
+ "EventName": "L1D_OFFDRAWER_L4_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Drawer L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "158",
+ "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_RO",
+ "BriefDescription": "L1D On-Chip L3 Sourced Writes read-only",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from On-Chip L3 but a read-only invalidate was done to remove other copies of the cache line"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "162",
+ "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "163",
+ "EventName": "L1I_ONCHIP_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Chip Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from On-Chip memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "164",
+ "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache ine was sourced from an On-Chip Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "165",
+ "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Cluster L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "166",
+ "EventName": "L1I_ONCLUSTER_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Cluster Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On-Cluster memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "167",
+ "EventName": "L1I_ONCLUSTER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I On-Cluster L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Cluster Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "168",
+ "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Cluster L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "169",
+ "EventName": "L1I_OFFCLUSTER_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Cluster Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Cluster memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "170",
+ "EventName": "L1I_OFFCLUSTER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I Off-Cluster L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Cluster Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "171",
+ "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Drawer L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "172",
+ "EventName": "L1I_OFFDRAWER_MEMORY_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Drawer Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer memory"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "173",
+ "EventName": "L1I_OFFDRAWER_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I Off-Drawer L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off-Drawer Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "174",
+ "EventName": "L1I_ONDRAWER_L4_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Drawer L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from On-Drawer Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "175",
+ "EventName": "L1I_OFFDRAWER_L4_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Drawer L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from Off-Drawer Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "224",
+ "EventName": "BCD_DFP_EXECUTION_SLOTS",
+ "BriefDescription": "BCD DFP Execution Slots",
+ "PublicDescription": "Count of floating point execution slots used for finished Binary Coded Decimal to Decimal Floating Point conversions. Instructions: CDZT, CXZT, CZDT, CZXT"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "225",
+ "EventName": "VX_BCD_EXECUTION_SLOTS",
+ "BriefDescription": "VX BCD Execution Slots",
+ "PublicDescription": "Count of floating point execution slots used for finished vector arithmetic Binary Coded Decimal instructions. Instructions: VAP, VSP, VMPVMSP, VDP, VSDP, VRP, VLIP, VSRP, VPSOPVCP, VTP, VPKZ, VUPKZ, VCVB, VCVBG, VCVDVCVDG"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "226",
+ "EventName": "DECIMAL_INSTRUCTIONS",
+ "BriefDescription": "Decimal Instructions",
+ "PublicDescription": "Decimal instructions dispatched. Instructions: CVB, CVD, AP, CP, DP, ED, EDMK, MP, SRP, SP, ZAP"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "232",
+ "EventName": "LAST_HOST_TRANSLATIONS",
+ "BriefDescription": "Last host translation done",
+ "PublicDescription": "Last Host Translation done"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "243",
+ "EventName": "TX_NC_TABORT",
+ "BriefDescription": "Aborted transactions in non-constrained TX mode",
+ "PublicDescription": "A transaction abort has occurred in a non-constrained transactional-execution mode"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "244",
+ "EventName": "TX_C_TABORT_NO_SPECIAL",
+ "BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
+ "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "245",
+ "EventName": "TX_C_TABORT_SPECIAL",
+ "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
+ "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "448",
+ "EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
+ "BriefDescription": "Cycle count with one thread active",
+ "PublicDescription": "Cycle count with one thread active"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "449",
+ "EventName": "MT_DIAG_CYCLES_TWO_THR_ACTIVE",
+ "BriefDescription": "Cycle count with two threads active",
+ "PublicDescription": "Cycle count with two threads active"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z14/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z14/transaction.json
new file mode 100644
index 000000000..1a0034f79
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z14/transaction.json
@@ -0,0 +1,7 @@
+[
+ {
+ "BriefDescription": "Transaction count",
+ "MetricName": "transaction",
+ "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/basic.json b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json
new file mode 100644
index 000000000..2dd8dafff
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z196/basic.json
@@ -0,0 +1,86 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "0",
+ "EventName": "CPU_CYCLES",
+ "BriefDescription": "CPU Cycles",
+ "PublicDescription": "Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "1",
+ "EventName": "INSTRUCTIONS",
+ "BriefDescription": "Instructions",
+ "PublicDescription": "Instruction Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "2",
+ "EventName": "L1I_DIR_WRITES",
+ "BriefDescription": "L1I Directory Writes",
+ "PublicDescription": "Level-1 I-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "3",
+ "EventName": "L1I_PENALTY_CYCLES",
+ "BriefDescription": "L1I Penalty Cycles",
+ "PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "4",
+ "EventName": "L1D_DIR_WRITES",
+ "BriefDescription": "L1D Directory Writes",
+ "PublicDescription": "Level-1 D-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "5",
+ "EventName": "L1D_PENALTY_CYCLES",
+ "BriefDescription": "L1D Penalty Cycles",
+ "PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "32",
+ "EventName": "PROBLEM_STATE_CPU_CYCLES",
+ "BriefDescription": "Problem-State CPU Cycles",
+ "PublicDescription": "Problem-State Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "33",
+ "EventName": "PROBLEM_STATE_INSTRUCTIONS",
+ "BriefDescription": "Problem-State Instructions",
+ "PublicDescription": "Problem-State Instruction Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "34",
+ "EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
+ "BriefDescription": "Problem-State L1I Directory Writes",
+ "PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "35",
+ "EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
+ "BriefDescription": "Problem-State L1I Penalty Cycles",
+ "PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "36",
+ "EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
+ "BriefDescription": "Problem-State L1D Directory Writes",
+ "PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "37",
+ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
+ "BriefDescription": "Problem-State L1D Penalty Cycles",
+ "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json
new file mode 100644
index 000000000..db286f19e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z196/crypto.json
@@ -0,0 +1,114 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "64",
+ "EventName": "PRNG_FUNCTIONS",
+ "BriefDescription": "PRNG Functions",
+ "PublicDescription": "Total number of the PRNG functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "65",
+ "EventName": "PRNG_CYCLES",
+ "BriefDescription": "PRNG Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "66",
+ "EventName": "PRNG_BLOCKED_FUNCTIONS",
+ "BriefDescription": "PRNG Blocked Functions",
+ "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "67",
+ "EventName": "PRNG_BLOCKED_CYCLES",
+ "BriefDescription": "PRNG Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "68",
+ "EventName": "SHA_FUNCTIONS",
+ "BriefDescription": "SHA Functions",
+ "PublicDescription": "Total number of SHA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "69",
+ "EventName": "SHA_CYCLES",
+ "BriefDescription": "SHA Cycles",
+ "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "70",
+ "EventName": "SHA_BLOCKED_FUNCTIONS",
+ "BriefDescription": "SHA Blocked Functions",
+ "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "71",
+ "EventName": "SHA_BLOCKED_CYCLES",
+ "BriefDescription": "SHA Bloced Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "72",
+ "EventName": "DEA_FUNCTIONS",
+ "BriefDescription": "DEA Functions",
+ "PublicDescription": "Total number of the DEA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "73",
+ "EventName": "DEA_CYCLES",
+ "BriefDescription": "DEA Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "74",
+ "EventName": "DEA_BLOCKED_FUNCTIONS",
+ "BriefDescription": "DEA Blocked Functions",
+ "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "75",
+ "EventName": "DEA_BLOCKED_CYCLES",
+ "BriefDescription": "DEA Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "76",
+ "EventName": "AES_FUNCTIONS",
+ "BriefDescription": "AES Functions",
+ "PublicDescription": "Total number of AES functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "77",
+ "EventName": "AES_CYCLES",
+ "BriefDescription": "AES Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "78",
+ "EventName": "AES_BLOCKED_FUNCTIONS",
+ "BriefDescription": "AES Blocked Functions",
+ "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "79",
+ "EventName": "AES_BLOCKED_CYCLES",
+ "BriefDescription": "AES Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z196/extended.json b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json
new file mode 100644
index 000000000..b7b42a870
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_z196/extended.json
@@ -0,0 +1,170 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "128",
+ "EventName": "L1D_L2_SOURCED_WRITES",
+ "BriefDescription": "L1D L2 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from the Level-2 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "129",
+ "EventName": "L1I_L2_SOURCED_WRITES",
+ "BriefDescription": "L1I L2 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from the Level-2 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "130",
+ "EventName": "DTLB1_MISSES",
+ "BriefDescription": "DTLB1 Misses",
+ "PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress."
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "131",
+ "EventName": "ITLB1_MISSES",
+ "BriefDescription": "ITLB1 Misses",
+ "PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle a ITLB1 miss is in progress."
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "133",
+ "EventName": "L2C_STORES_SENT",
+ "BriefDescription": "L2C Stores Sent",
+ "PublicDescription": "Incremented by one for every store sent to Level-2 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "134",
+ "EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Book L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Book Level-3 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "135",
+ "EventName": "L1D_ONBOOK_L4_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Book L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an On Book Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "136",
+ "EventName": "L1I_ONBOOK_L4_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Book L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an On Book Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "137",
+ "EventName": "L1D_RO_EXCL_WRITES",
+ "BriefDescription": "L1D Read-only Exclusive Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "138",
+ "EventName": "L1D_OFFBOOK_L4_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Book L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "139",
+ "EventName": "L1I_OFFBOOK_L4_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Book L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "140",
+ "EventName": "DTLB1_HPAGE_WRITES",
+ "BriefDescription": "DTLB1 One-Megabyte Page Writes",
+ "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "141",
+ "EventName": "L1D_LMEM_SOURCED_WRITES",
+ "BriefDescription": "L1D Local Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "142",
+ "EventName": "L1I_LMEM_SOURCED_WRITES",
+ "BriefDescription": "L1I Local Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 I-Cache where the installed cache line was sourced from memory that is attached to the same book as the Instruction cache (Local Memory)"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "143",
+ "EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Book L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Book Level-3 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "144",
+ "EventName": "DTLB1_WRITES",
+ "BriefDescription": "DTLB1 Writes",
+ "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "145",
+ "EventName": "ITLB1_WRITES",
+ "BriefDescription": "ITLB1 Writes",
+ "PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "146",
+ "EventName": "TLB2_PTE_WRITES",
+ "BriefDescription": "TLB2 PTE Writes",
+ "PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "147",
+ "EventName": "TLB2_CRSTE_HPAGE_WRITES",
+ "BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
+ "PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "148",
+ "EventName": "TLB2_CRSTE_WRITES",
+ "BriefDescription": "TLB2 CRSTE Writes",
+ "PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "150",
+ "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an On Chip Level-3 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "152",
+ "EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "153",
+ "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an On Chip Level-3 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "155",
+ "EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 I-Cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json
new file mode 100644
index 000000000..2dd8dafff
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_zec12/basic.json
@@ -0,0 +1,86 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "0",
+ "EventName": "CPU_CYCLES",
+ "BriefDescription": "CPU Cycles",
+ "PublicDescription": "Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "1",
+ "EventName": "INSTRUCTIONS",
+ "BriefDescription": "Instructions",
+ "PublicDescription": "Instruction Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "2",
+ "EventName": "L1I_DIR_WRITES",
+ "BriefDescription": "L1I Directory Writes",
+ "PublicDescription": "Level-1 I-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "3",
+ "EventName": "L1I_PENALTY_CYCLES",
+ "BriefDescription": "L1I Penalty Cycles",
+ "PublicDescription": "Level-1 I-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "4",
+ "EventName": "L1D_DIR_WRITES",
+ "BriefDescription": "L1D Directory Writes",
+ "PublicDescription": "Level-1 D-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "5",
+ "EventName": "L1D_PENALTY_CYCLES",
+ "BriefDescription": "L1D Penalty Cycles",
+ "PublicDescription": "Level-1 D-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "32",
+ "EventName": "PROBLEM_STATE_CPU_CYCLES",
+ "BriefDescription": "Problem-State CPU Cycles",
+ "PublicDescription": "Problem-State Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "33",
+ "EventName": "PROBLEM_STATE_INSTRUCTIONS",
+ "BriefDescription": "Problem-State Instructions",
+ "PublicDescription": "Problem-State Instruction Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "34",
+ "EventName": "PROBLEM_STATE_L1I_DIR_WRITES",
+ "BriefDescription": "Problem-State L1I Directory Writes",
+ "PublicDescription": "Problem-State Level-1 I-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "35",
+ "EventName": "PROBLEM_STATE_L1I_PENALTY_CYCLES",
+ "BriefDescription": "Problem-State L1I Penalty Cycles",
+ "PublicDescription": "Problem-State Level-1 I-Cache Penalty Cycle Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "36",
+ "EventName": "PROBLEM_STATE_L1D_DIR_WRITES",
+ "BriefDescription": "Problem-State L1D Directory Writes",
+ "PublicDescription": "Problem-State Level-1 D-Cache Directory Write Count"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "37",
+ "EventName": "PROBLEM_STATE_L1D_PENALTY_CYCLES",
+ "BriefDescription": "Problem-State L1D Penalty Cycles",
+ "PublicDescription": "Problem-State Level-1 D-Cache Penalty Cycle Count"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json
new file mode 100644
index 000000000..db286f19e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_zec12/crypto.json
@@ -0,0 +1,114 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "64",
+ "EventName": "PRNG_FUNCTIONS",
+ "BriefDescription": "PRNG Functions",
+ "PublicDescription": "Total number of the PRNG functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "65",
+ "EventName": "PRNG_CYCLES",
+ "BriefDescription": "PRNG Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing PRNG functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "66",
+ "EventName": "PRNG_BLOCKED_FUNCTIONS",
+ "BriefDescription": "PRNG Blocked Functions",
+ "PublicDescription": "Total number of the PRNG functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "67",
+ "EventName": "PRNG_BLOCKED_CYCLES",
+ "BriefDescription": "PRNG Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the PRNG functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "68",
+ "EventName": "SHA_FUNCTIONS",
+ "BriefDescription": "SHA Functions",
+ "PublicDescription": "Total number of SHA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "69",
+ "EventName": "SHA_CYCLES",
+ "BriefDescription": "SHA Cycles",
+ "PublicDescription": "Total number of CPU cycles when the SHA coprocessor is busy performing the SHA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "70",
+ "EventName": "SHA_BLOCKED_FUNCTIONS",
+ "BriefDescription": "SHA Blocked Functions",
+ "PublicDescription": "Total number of the SHA functions that are issued by the CPU and are blocked because the SHA coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "71",
+ "EventName": "SHA_BLOCKED_CYCLES",
+ "BriefDescription": "SHA Bloced Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the SHA functions issued by the CPU because the SHA coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "72",
+ "EventName": "DEA_FUNCTIONS",
+ "BriefDescription": "DEA Functions",
+ "PublicDescription": "Total number of the DEA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "73",
+ "EventName": "DEA_CYCLES",
+ "BriefDescription": "DEA Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the DEA functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "74",
+ "EventName": "DEA_BLOCKED_FUNCTIONS",
+ "BriefDescription": "DEA Blocked Functions",
+ "PublicDescription": "Total number of the DEA functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "75",
+ "EventName": "DEA_BLOCKED_CYCLES",
+ "BriefDescription": "DEA Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the DEA functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "76",
+ "EventName": "AES_FUNCTIONS",
+ "BriefDescription": "AES Functions",
+ "PublicDescription": "Total number of AES functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "77",
+ "EventName": "AES_CYCLES",
+ "BriefDescription": "AES Cycles",
+ "PublicDescription": "Total number of CPU cycles when the DEA/AES coprocessor is busy performing the AES functions issued by the CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "78",
+ "EventName": "AES_BLOCKED_FUNCTIONS",
+ "BriefDescription": "AES Blocked Functions",
+ "PublicDescription": "Total number of AES functions that are issued by the CPU and are blocked because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "79",
+ "EventName": "AES_BLOCKED_CYCLES",
+ "BriefDescription": "AES Blocked Cycles",
+ "PublicDescription": "Total number of CPU cycles blocked for the AES functions issued by the CPU because the DEA/AES coprocessor is busy performing a function issued by another CPU"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json
new file mode 100644
index 000000000..162251037
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_zec12/extended.json
@@ -0,0 +1,247 @@
+[
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "128",
+ "EventName": "DTLB1_MISSES",
+ "BriefDescription": "DTLB1 Misses",
+ "PublicDescription": "Level-1 Data TLB miss in progress. Incremented by one for every cycle a DTLB1 miss is in progress."
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "129",
+ "EventName": "ITLB1_MISSES",
+ "BriefDescription": "ITLB1 Misses",
+ "PublicDescription": "Level-1 Instruction TLB miss in progress. Incremented by one for every cycle a ITLB1 miss is in progress."
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "130",
+ "EventName": "L1D_L2I_SOURCED_WRITES",
+ "BriefDescription": "L1D L2I Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "131",
+ "EventName": "L1I_L2I_SOURCED_WRITES",
+ "BriefDescription": "L1I L2I Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from the Level-2 Instruction cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "132",
+ "EventName": "L1D_L2D_SOURCED_WRITES",
+ "BriefDescription": "L1D L2D Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from the Level-2 Data cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "133",
+ "EventName": "DTLB1_WRITES",
+ "BriefDescription": "DTLB1 Writes",
+ "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "135",
+ "EventName": "L1D_LMEM_SOURCED_WRITES",
+ "BriefDescription": "L1D Local Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache where the installed cache line was sourced from memory that is attached to the same book as the Data cache (Local Memory)"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "137",
+ "EventName": "L1I_LMEM_SOURCED_WRITES",
+ "BriefDescription": "L1I Local Memory Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache where the installed cache line was sourced from memory that is attached to the same book as the Instruction cache (Local Memory)"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "138",
+ "EventName": "L1D_RO_EXCL_WRITES",
+ "BriefDescription": "L1D Read-only Exclusive Writes",
+ "PublicDescription": "A directory write to the Level-1 D-Cache where the line was originally in a Read-Only state in the cache but has been updated to be in the Exclusive state that allows stores to the cache line"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "139",
+ "EventName": "DTLB1_HPAGE_WRITES",
+ "BriefDescription": "DTLB1 One-Megabyte Page Writes",
+ "PublicDescription": "A translation entry has been written to the Level-1 Data Translation Lookaside Buffer for a one-megabyte page"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "140",
+ "EventName": "ITLB1_WRITES",
+ "BriefDescription": "ITLB1 Writes",
+ "PublicDescription": "A translation entry has been written to the Level-1 Instruction Translation Lookaside Buffer"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "141",
+ "EventName": "TLB2_PTE_WRITES",
+ "BriefDescription": "TLB2 PTE Writes",
+ "PublicDescription": "A translation entry has been written to the Level-2 TLB Page Table Entry arrays"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "142",
+ "EventName": "TLB2_CRSTE_HPAGE_WRITES",
+ "BriefDescription": "TLB2 CRSTE One-Megabyte Page Writes",
+ "PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays for a one-megabyte large page translation"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "143",
+ "EventName": "TLB2_CRSTE_WRITES",
+ "BriefDescription": "TLB2 CRSTE Writes",
+ "PublicDescription": "A translation entry has been written to the Level-2 TLB Common Region Segment Table Entry arrays"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "144",
+ "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On Chip Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "145",
+ "EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "146",
+ "EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Book L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "147",
+ "EventName": "L1D_ONBOOK_L4_SOURCED_WRITES",
+ "BriefDescription": "L1D On-Book L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an On Book Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "148",
+ "EventName": "L1D_OFFBOOK_L4_SOURCED_WRITES",
+ "BriefDescription": "L1D Off-Book L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "149",
+ "EventName": "TX_NC_TEND",
+ "BriefDescription": "Completed TEND instructions in non-constrained TX mode",
+ "PublicDescription": "A TEND instruction has completed in a nonconstrained transactional-execution mode"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "150",
+ "EventName": "L1D_ONCHIP_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D On-Chip L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from a On Chip Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "151",
+ "EventName": "L1D_OFFCHIP_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D Off-Chip L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "152",
+ "EventName": "L1D_OFFBOOK_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1D Off-Book L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Data cache directory where the returned cache line was sourced from an Off Book Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "153",
+ "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "154",
+ "EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Chip L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "155",
+ "EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Book L3 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-3 cache without intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "156",
+ "EventName": "L1I_ONBOOK_L4_SOURCED_WRITES",
+ "BriefDescription": "L1I On-Book L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Book Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "157",
+ "EventName": "L1I_OFFBOOK_L4_SOURCED_WRITES",
+ "BriefDescription": "L1I Off-Book L4 Sourced Writes",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-4 cache"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "158",
+ "EventName": "TX_C_TEND",
+ "BriefDescription": "Completed TEND instructions in constrained TX mode",
+ "PublicDescription": "A TEND instruction has completed in a constrained transactional-execution mode"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "159",
+ "EventName": "L1I_ONCHIP_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I On-Chip L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an On Chip Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "160",
+ "EventName": "L1I_OFFCHIP_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I Off-Chip L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Chip/On Book Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "161",
+ "EventName": "L1I_OFFBOOK_L3_SOURCED_WRITES_IV",
+ "BriefDescription": "L1I Off-Book L3 Sourced Writes with Intervention",
+ "PublicDescription": "A directory write to the Level-1 Instruction cache directory where the returned cache line was sourced from an Off Book Level-3 cache with intervention"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "177",
+ "EventName": "TX_NC_TABORT",
+ "BriefDescription": "Aborted transactions in non-constrained TX mode",
+ "PublicDescription": "A transaction abort has occurred in a nonconstrained transactional-execution mode"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "178",
+ "EventName": "TX_C_TABORT_NO_SPECIAL",
+ "BriefDescription": "Aborted transactions in constrained TX mode not using special completion logic",
+ "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is not using any special logic to allow the transaction to complete"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "179",
+ "EventName": "TX_C_TABORT_SPECIAL",
+ "BriefDescription": "Aborted transactions in constrained TX mode using special completion logic",
+ "PublicDescription": "A transaction abort has occurred in a constrained transactional-execution mode and the CPU is using special logic to allow the transaction to complete"
+ },
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_zec12/transaction.json b/tools/perf/pmu-events/arch/s390/cf_zec12/transaction.json
new file mode 100644
index 000000000..1a0034f79
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/cf_zec12/transaction.json
@@ -0,0 +1,7 @@
+[
+ {
+ "BriefDescription": "Transaction count",
+ "MetricName": "transaction",
+ "MetricExpr": "TX_C_TEND + TX_NC_TEND + TX_NC_TABORT + TX_C_TABORT_SPECIAL + TX_C_TABORT_NO_SPECIAL"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/s390/mapfile.csv b/tools/perf/pmu-events/arch/s390/mapfile.csv
new file mode 100644
index 000000000..78bcf7f8e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/s390/mapfile.csv
@@ -0,0 +1,6 @@
+Family-model,Version,Filename,EventType
+^IBM.209[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z10,core
+^IBM.281[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z196,core
+^IBM.282[78].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_zec12,core
+^IBM.296[45].*[13]\.[1-5].[[:xdigit:]]+$,1,cf_z13,core
+^IBM.390[67].*[13]\.[1-5].[[:xdigit:]]+$,3,cf_z14,core
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/cache.json b/tools/perf/pmu-events/arch/x86/bonnell/cache.json
new file mode 100644
index 000000000..ffab90c58
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/bonnell/cache.json
@@ -0,0 +1,746 @@
+[
+ {
+ "EventCode": "0x21",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "L2_ADS.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Cycles L2 address bus is in use."
+ },
+ {
+ "EventCode": "0x22",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "L2_DBUS_BUSY.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Cycles the L2 cache data bus is busy."
+ },
+ {
+ "EventCode": "0x23",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "L2_DBUS_BUSY_RD.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Cycles the L2 transfers data to the core."
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1",
+ "UMask": "0x70",
+ "EventName": "L2_LINES_IN.SELF.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache misses."
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "L2_LINES_IN.SELF.DEMAND",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache misses."
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1",
+ "UMask": "0x50",
+ "EventName": "L2_LINES_IN.SELF.PREFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache misses."
+ },
+ {
+ "EventCode": "0x25",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "L2_M_LINES_IN.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache line modifications."
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1",
+ "UMask": "0x70",
+ "EventName": "L2_LINES_OUT.SELF.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache lines evicted."
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "L2_LINES_OUT.SELF.DEMAND",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache lines evicted."
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1",
+ "UMask": "0x50",
+ "EventName": "L2_LINES_OUT.SELF.PREFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache lines evicted."
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1",
+ "UMask": "0x70",
+ "EventName": "L2_M_LINES_OUT.SELF.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Modified lines evicted from the L2 cache"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "L2_M_LINES_OUT.SELF.DEMAND",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Modified lines evicted from the L2 cache"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1",
+ "UMask": "0x50",
+ "EventName": "L2_M_LINES_OUT.SELF.PREFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Modified lines evicted from the L2 cache"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1",
+ "UMask": "0x44",
+ "EventName": "L2_IFETCH.SELF.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cacheable instruction fetch requests"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1",
+ "UMask": "0x41",
+ "EventName": "L2_IFETCH.SELF.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cacheable instruction fetch requests"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1",
+ "UMask": "0x48",
+ "EventName": "L2_IFETCH.SELF.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cacheable instruction fetch requests"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1",
+ "UMask": "0x42",
+ "EventName": "L2_IFETCH.SELF.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cacheable instruction fetch requests"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1",
+ "UMask": "0x4f",
+ "EventName": "L2_IFETCH.SELF.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cacheable instruction fetch requests"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x74",
+ "EventName": "L2_LD.SELF.ANY.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x71",
+ "EventName": "L2_LD.SELF.ANY.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x78",
+ "EventName": "L2_LD.SELF.ANY.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x72",
+ "EventName": "L2_LD.SELF.ANY.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x7f",
+ "EventName": "L2_LD.SELF.ANY.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x44",
+ "EventName": "L2_LD.SELF.DEMAND.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x41",
+ "EventName": "L2_LD.SELF.DEMAND.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x48",
+ "EventName": "L2_LD.SELF.DEMAND.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x42",
+ "EventName": "L2_LD.SELF.DEMAND.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x4f",
+ "EventName": "L2_LD.SELF.DEMAND.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x54",
+ "EventName": "L2_LD.SELF.PREFETCH.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x51",
+ "EventName": "L2_LD.SELF.PREFETCH.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x58",
+ "EventName": "L2_LD.SELF.PREFETCH.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x52",
+ "EventName": "L2_LD.SELF.PREFETCH.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x29",
+ "Counter": "0,1",
+ "UMask": "0x5f",
+ "EventName": "L2_LD.SELF.PREFETCH.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache reads"
+ },
+ {
+ "EventCode": "0x2A",
+ "Counter": "0,1",
+ "UMask": "0x44",
+ "EventName": "L2_ST.SELF.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 store requests"
+ },
+ {
+ "EventCode": "0x2A",
+ "Counter": "0,1",
+ "UMask": "0x41",
+ "EventName": "L2_ST.SELF.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 store requests"
+ },
+ {
+ "EventCode": "0x2A",
+ "Counter": "0,1",
+ "UMask": "0x48",
+ "EventName": "L2_ST.SELF.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 store requests"
+ },
+ {
+ "EventCode": "0x2A",
+ "Counter": "0,1",
+ "UMask": "0x42",
+ "EventName": "L2_ST.SELF.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 store requests"
+ },
+ {
+ "EventCode": "0x2A",
+ "Counter": "0,1",
+ "UMask": "0x4f",
+ "EventName": "L2_ST.SELF.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 store requests"
+ },
+ {
+ "EventCode": "0x2B",
+ "Counter": "0,1",
+ "UMask": "0x44",
+ "EventName": "L2_LOCK.SELF.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 locked accesses"
+ },
+ {
+ "EventCode": "0x2B",
+ "Counter": "0,1",
+ "UMask": "0x41",
+ "EventName": "L2_LOCK.SELF.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 locked accesses"
+ },
+ {
+ "EventCode": "0x2B",
+ "Counter": "0,1",
+ "UMask": "0x48",
+ "EventName": "L2_LOCK.SELF.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 locked accesses"
+ },
+ {
+ "EventCode": "0x2B",
+ "Counter": "0,1",
+ "UMask": "0x42",
+ "EventName": "L2_LOCK.SELF.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 locked accesses"
+ },
+ {
+ "EventCode": "0x2B",
+ "Counter": "0,1",
+ "UMask": "0x4f",
+ "EventName": "L2_LOCK.SELF.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 locked accesses"
+ },
+ {
+ "EventCode": "0x2C",
+ "Counter": "0,1",
+ "UMask": "0x44",
+ "EventName": "L2_DATA_RQSTS.SELF.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All data requests from the L1 data cache"
+ },
+ {
+ "EventCode": "0x2C",
+ "Counter": "0,1",
+ "UMask": "0x41",
+ "EventName": "L2_DATA_RQSTS.SELF.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All data requests from the L1 data cache"
+ },
+ {
+ "EventCode": "0x2C",
+ "Counter": "0,1",
+ "UMask": "0x48",
+ "EventName": "L2_DATA_RQSTS.SELF.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All data requests from the L1 data cache"
+ },
+ {
+ "EventCode": "0x2C",
+ "Counter": "0,1",
+ "UMask": "0x42",
+ "EventName": "L2_DATA_RQSTS.SELF.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All data requests from the L1 data cache"
+ },
+ {
+ "EventCode": "0x2C",
+ "Counter": "0,1",
+ "UMask": "0x4f",
+ "EventName": "L2_DATA_RQSTS.SELF.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All data requests from the L1 data cache"
+ },
+ {
+ "EventCode": "0x2D",
+ "Counter": "0,1",
+ "UMask": "0x44",
+ "EventName": "L2_LD_IFETCH.SELF.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All read requests from L1 instruction and data caches"
+ },
+ {
+ "EventCode": "0x2D",
+ "Counter": "0,1",
+ "UMask": "0x41",
+ "EventName": "L2_LD_IFETCH.SELF.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All read requests from L1 instruction and data caches"
+ },
+ {
+ "EventCode": "0x2D",
+ "Counter": "0,1",
+ "UMask": "0x48",
+ "EventName": "L2_LD_IFETCH.SELF.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All read requests from L1 instruction and data caches"
+ },
+ {
+ "EventCode": "0x2D",
+ "Counter": "0,1",
+ "UMask": "0x42",
+ "EventName": "L2_LD_IFETCH.SELF.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All read requests from L1 instruction and data caches"
+ },
+ {
+ "EventCode": "0x2D",
+ "Counter": "0,1",
+ "UMask": "0x4f",
+ "EventName": "L2_LD_IFETCH.SELF.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All read requests from L1 instruction and data caches"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x74",
+ "EventName": "L2_RQSTS.SELF.ANY.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x71",
+ "EventName": "L2_RQSTS.SELF.ANY.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x78",
+ "EventName": "L2_RQSTS.SELF.ANY.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x72",
+ "EventName": "L2_RQSTS.SELF.ANY.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x7f",
+ "EventName": "L2_RQSTS.SELF.ANY.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x44",
+ "EventName": "L2_RQSTS.SELF.DEMAND.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x48",
+ "EventName": "L2_RQSTS.SELF.DEMAND.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x42",
+ "EventName": "L2_RQSTS.SELF.DEMAND.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x54",
+ "EventName": "L2_RQSTS.SELF.PREFETCH.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x51",
+ "EventName": "L2_RQSTS.SELF.PREFETCH.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x58",
+ "EventName": "L2_RQSTS.SELF.PREFETCH.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x52",
+ "EventName": "L2_RQSTS.SELF.PREFETCH.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x5f",
+ "EventName": "L2_RQSTS.SELF.PREFETCH.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x41",
+ "EventName": "L2_RQSTS.SELF.DEMAND.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache demand requests from this core that missed the L2"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x4f",
+ "EventName": "L2_RQSTS.SELF.DEMAND.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 cache demand requests from this core"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x74",
+ "EventName": "L2_REJECT_BUSQ.SELF.ANY.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x71",
+ "EventName": "L2_REJECT_BUSQ.SELF.ANY.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x78",
+ "EventName": "L2_REJECT_BUSQ.SELF.ANY.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x72",
+ "EventName": "L2_REJECT_BUSQ.SELF.ANY.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x7f",
+ "EventName": "L2_REJECT_BUSQ.SELF.ANY.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x44",
+ "EventName": "L2_REJECT_BUSQ.SELF.DEMAND.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x41",
+ "EventName": "L2_REJECT_BUSQ.SELF.DEMAND.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x48",
+ "EventName": "L2_REJECT_BUSQ.SELF.DEMAND.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x42",
+ "EventName": "L2_REJECT_BUSQ.SELF.DEMAND.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x4f",
+ "EventName": "L2_REJECT_BUSQ.SELF.DEMAND.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x54",
+ "EventName": "L2_REJECT_BUSQ.SELF.PREFETCH.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x51",
+ "EventName": "L2_REJECT_BUSQ.SELF.PREFETCH.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x58",
+ "EventName": "L2_REJECT_BUSQ.SELF.PREFETCH.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x52",
+ "EventName": "L2_REJECT_BUSQ.SELF.PREFETCH.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x5f",
+ "EventName": "L2_REJECT_BUSQ.SELF.PREFETCH.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Rejected L2 cache requests"
+ },
+ {
+ "EventCode": "0x32",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "L2_NO_REQ.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Cycles no L2 cache requests are pending"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0xa1",
+ "EventName": "L1D_CACHE.LD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 Cacheable Data Reads"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0xa2",
+ "EventName": "L1D_CACHE.ST",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 Cacheable Data Writes"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0x83",
+ "EventName": "L1D_CACHE.ALL_REF",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 Data reads and writes"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0xa3",
+ "EventName": "L1D_CACHE.ALL_CACHE_REF",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 Data Cacheable reads and writes"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "L1D_CACHE.REPL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1 Data line replacements"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0x48",
+ "EventName": "L1D_CACHE.REPLM",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Modified cache lines allocated in the L1 data cache"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "L1D_CACHE.EVICT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Modified cache lines evicted from the L1 data cache"
+ },
+ {
+ "EventCode": "0xCB",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that hit the L2 cache (precise event)."
+ },
+ {
+ "EventCode": "0xCB",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Retired loads that miss the L2 cache"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/floating-point.json b/tools/perf/pmu-events/arch/x86/bonnell/floating-point.json
new file mode 100644
index 000000000..f0e090cdb
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/bonnell/floating-point.json
@@ -0,0 +1,261 @@
+[
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "X87_COMP_OPS_EXE.ANY.S",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Floating point computational micro-ops executed."
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0x10",
+ "Counter": "0,1",
+ "UMask": "0x81",
+ "EventName": "X87_COMP_OPS_EXE.ANY.AR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Floating point computational micro-ops retired."
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "X87_COMP_OPS_EXE.FXCH.S",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "FXCH uops executed."
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0x10",
+ "Counter": "0,1",
+ "UMask": "0x82",
+ "EventName": "X87_COMP_OPS_EXE.FXCH.AR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "FXCH uops retired."
+ },
+ {
+ "EventCode": "0x11",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "FP_ASSIST.S",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Floating point assists."
+ },
+ {
+ "EventCode": "0x11",
+ "Counter": "0,1",
+ "UMask": "0x81",
+ "EventName": "FP_ASSIST.AR",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Floating point assists for retired operations."
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "SIMD_UOPS_EXEC.S",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD micro-ops executed (excluding stores)."
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB0",
+ "Counter": "0,1",
+ "UMask": "0x80",
+ "EventName": "SIMD_UOPS_EXEC.AR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD micro-ops retired (excluding stores)."
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "SIMD_SAT_UOP_EXEC.S",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD saturated arithmetic micro-ops executed."
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1",
+ "UMask": "0x80",
+ "EventName": "SIMD_SAT_UOP_EXEC.AR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD saturated arithmetic micro-ops retired."
+ },
+ {
+ "EventCode": "0xB3",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "SIMD_UOP_TYPE_EXEC.MUL.S",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD packed multiply micro-ops executed"
+ },
+ {
+ "EventCode": "0xB3",
+ "Counter": "0,1",
+ "UMask": "0x81",
+ "EventName": "SIMD_UOP_TYPE_EXEC.MUL.AR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD packed multiply micro-ops retired"
+ },
+ {
+ "EventCode": "0xB3",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "SIMD_UOP_TYPE_EXEC.SHIFT.S",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD packed shift micro-ops executed"
+ },
+ {
+ "EventCode": "0xB3",
+ "Counter": "0,1",
+ "UMask": "0x82",
+ "EventName": "SIMD_UOP_TYPE_EXEC.SHIFT.AR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD packed shift micro-ops retired"
+ },
+ {
+ "EventCode": "0xB3",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "SIMD_UOP_TYPE_EXEC.PACK.S",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD packed micro-ops executed"
+ },
+ {
+ "EventCode": "0xB3",
+ "Counter": "0,1",
+ "UMask": "0x84",
+ "EventName": "SIMD_UOP_TYPE_EXEC.PACK.AR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD packed micro-ops retired"
+ },
+ {
+ "EventCode": "0xB3",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "SIMD_UOP_TYPE_EXEC.UNPACK.S",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD unpacked micro-ops executed"
+ },
+ {
+ "EventCode": "0xB3",
+ "Counter": "0,1",
+ "UMask": "0x88",
+ "EventName": "SIMD_UOP_TYPE_EXEC.UNPACK.AR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD unpacked micro-ops retired"
+ },
+ {
+ "EventCode": "0xB3",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "SIMD_UOP_TYPE_EXEC.LOGICAL.S",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD packed logical micro-ops executed"
+ },
+ {
+ "EventCode": "0xB3",
+ "Counter": "0,1",
+ "UMask": "0x90",
+ "EventName": "SIMD_UOP_TYPE_EXEC.LOGICAL.AR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD packed logical micro-ops retired"
+ },
+ {
+ "EventCode": "0xB3",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "SIMD_UOP_TYPE_EXEC.ARITHMETIC.S",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD packed arithmetic micro-ops executed"
+ },
+ {
+ "EventCode": "0xB3",
+ "Counter": "0,1",
+ "UMask": "0xa0",
+ "EventName": "SIMD_UOP_TYPE_EXEC.ARITHMETIC.AR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD packed arithmetic micro-ops retired"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "SIMD_INST_RETIRED.PACKED_SINGLE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired Streaming SIMD Extensions (SSE) packed-single instructions."
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "SIMD_INST_RETIRED.SCALAR_SINGLE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired Streaming SIMD Extensions (SSE) scalar-single instructions."
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "SIMD_INST_RETIRED.SCALAR_DOUBLE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired Streaming SIMD Extensions 2 (SSE2) scalar-double instructions."
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "SIMD_INST_RETIRED.VECTOR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired Streaming SIMD Extensions 2 (SSE2) vector instructions."
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "SIMD_COMP_INST_RETIRED.PACKED_SINGLE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired computational Streaming SIMD Extensions (SSE) packed-single instructions."
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "SIMD_COMP_INST_RETIRED.SCALAR_SINGLE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired computational Streaming SIMD Extensions (SSE) scalar-single instructions."
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "SIMD_COMP_INST_RETIRED.SCALAR_DOUBLE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired computational Streaming SIMD Extensions 2 (SSE2) scalar-double instructions."
+ },
+ {
+ "EventCode": "0xCD",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "SIMD_ASSIST",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "SIMD assists invoked."
+ },
+ {
+ "EventCode": "0xCE",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "SIMD_INSTR_RETIRED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SIMD Instructions retired."
+ },
+ {
+ "EventCode": "0xCF",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "SIMD_SAT_INSTR_RETIRED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Saturated arithmetic instructions retired."
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/frontend.json b/tools/perf/pmu-events/arch/x86/bonnell/frontend.json
new file mode 100644
index 000000000..935b7dcf0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/bonnell/frontend.json
@@ -0,0 +1,83 @@
+[
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1",
+ "UMask": "0x3",
+ "EventName": "ICACHE.ACCESSES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Instruction fetches."
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "ICACHE.HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Icache hit"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "ICACHE.MISSES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Icache miss"
+ },
+ {
+ "EventCode": "0x86",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "CYCLES_ICACHE_MEM_STALLED.ICACHE_MEM_STALLED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles during which instruction fetches are stalled."
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "DECODE_STALL.PFB_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Decode stall due to PFB empty"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "DECODE_STALL.IQ_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Decode stall due to IQ full"
+ },
+ {
+ "EventCode": "0xAA",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "MACRO_INSTS.NON_CISC_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Non-CISC nacro instructions decoded"
+ },
+ {
+ "EventCode": "0xAA",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "MACRO_INSTS.CISC_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "CISC macro instructions decoded"
+ },
+ {
+ "EventCode": "0xAA",
+ "Counter": "0,1",
+ "UMask": "0x3",
+ "EventName": "MACRO_INSTS.ALL_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All Instructions decoded"
+ },
+ {
+ "EventCode": "0xA9",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "UOPS.MS_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "This event counts the cycles where 1 or more uops are issued by the micro-sequencer (MS), including microcode assists and inserted flows, and written to the IQ. ",
+ "CounterMask": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/memory.json b/tools/perf/pmu-events/arch/x86/bonnell/memory.json
new file mode 100644
index 000000000..3ae843b20
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/bonnell/memory.json
@@ -0,0 +1,154 @@
+[
+ {
+ "EventCode": "0x5",
+ "Counter": "0,1",
+ "UMask": "0xf",
+ "EventName": "MISALIGN_MEM_REF.SPLIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Memory references that cross an 8-byte boundary."
+ },
+ {
+ "EventCode": "0x5",
+ "Counter": "0,1",
+ "UMask": "0x9",
+ "EventName": "MISALIGN_MEM_REF.LD_SPLIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Load splits"
+ },
+ {
+ "EventCode": "0x5",
+ "Counter": "0,1",
+ "UMask": "0xa",
+ "EventName": "MISALIGN_MEM_REF.ST_SPLIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Store splits"
+ },
+ {
+ "EventCode": "0x5",
+ "Counter": "0,1",
+ "UMask": "0x8f",
+ "EventName": "MISALIGN_MEM_REF.SPLIT.AR",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Memory references that cross an 8-byte boundary (At Retirement)"
+ },
+ {
+ "EventCode": "0x5",
+ "Counter": "0,1",
+ "UMask": "0x89",
+ "EventName": "MISALIGN_MEM_REF.LD_SPLIT.AR",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Load splits (At Retirement)"
+ },
+ {
+ "EventCode": "0x5",
+ "Counter": "0,1",
+ "UMask": "0x8a",
+ "EventName": "MISALIGN_MEM_REF.ST_SPLIT.AR",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Store splits (Ar Retirement)"
+ },
+ {
+ "EventCode": "0x5",
+ "Counter": "0,1",
+ "UMask": "0x8c",
+ "EventName": "MISALIGN_MEM_REF.RMW_SPLIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ld-op-st splits"
+ },
+ {
+ "EventCode": "0x5",
+ "Counter": "0,1",
+ "UMask": "0x97",
+ "EventName": "MISALIGN_MEM_REF.BUBBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Nonzero segbase 1 bubble"
+ },
+ {
+ "EventCode": "0x5",
+ "Counter": "0,1",
+ "UMask": "0x91",
+ "EventName": "MISALIGN_MEM_REF.LD_BUBBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Nonzero segbase load 1 bubble"
+ },
+ {
+ "EventCode": "0x5",
+ "Counter": "0,1",
+ "UMask": "0x92",
+ "EventName": "MISALIGN_MEM_REF.ST_BUBBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Nonzero segbase store 1 bubble"
+ },
+ {
+ "EventCode": "0x5",
+ "Counter": "0,1",
+ "UMask": "0x94",
+ "EventName": "MISALIGN_MEM_REF.RMW_BUBBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Nonzero segbase ld-op-st 1 bubble"
+ },
+ {
+ "EventCode": "0x7",
+ "Counter": "0,1",
+ "UMask": "0x81",
+ "EventName": "PREFETCH.PREFETCHT0",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Streaming SIMD Extensions (SSE) PrefetchT0 instructions executed."
+ },
+ {
+ "EventCode": "0x7",
+ "Counter": "0,1",
+ "UMask": "0x82",
+ "EventName": "PREFETCH.PREFETCHT1",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Streaming SIMD Extensions (SSE) PrefetchT1 instructions executed."
+ },
+ {
+ "EventCode": "0x7",
+ "Counter": "0,1",
+ "UMask": "0x84",
+ "EventName": "PREFETCH.PREFETCHT2",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Streaming SIMD Extensions (SSE) PrefetchT2 instructions executed."
+ },
+ {
+ "EventCode": "0x7",
+ "Counter": "0,1",
+ "UMask": "0x86",
+ "EventName": "PREFETCH.SW_L2",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Streaming SIMD Extensions (SSE) PrefetchT1 and PrefetchT2 instructions executed"
+ },
+ {
+ "EventCode": "0x7",
+ "Counter": "0,1",
+ "UMask": "0x88",
+ "EventName": "PREFETCH.PREFETCHNTA",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Streaming SIMD Extensions (SSE) Prefetch NTA instructions executed"
+ },
+ {
+ "EventCode": "0x7",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "PREFETCH.HW_PREFETCH",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 hardware prefetch request"
+ },
+ {
+ "EventCode": "0x7",
+ "Counter": "0,1",
+ "UMask": "0xf",
+ "EventName": "PREFETCH.SOFTWARE_PREFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Any Software prefetch"
+ },
+ {
+ "EventCode": "0x7",
+ "Counter": "0,1",
+ "UMask": "0x8f",
+ "EventName": "PREFETCH.SOFTWARE_PREFETCH.AR",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Any Software prefetch"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/other.json b/tools/perf/pmu-events/arch/x86/bonnell/other.json
new file mode 100644
index 000000000..4bc1c582d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/bonnell/other.json
@@ -0,0 +1,450 @@
+[
+ {
+ "EventCode": "0x6",
+ "Counter": "0,1",
+ "UMask": "0x80",
+ "EventName": "SEGMENT_REG_LOADS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Number of segment register loads."
+ },
+ {
+ "EventCode": "0x9",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "DISPATCH_BLOCKED.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Memory cluster signals to block micro-op dispatch for any reason"
+ },
+ {
+ "EventCode": "0x3A",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "EIST_TRANS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions"
+ },
+ {
+ "EventCode": "0x3B",
+ "Counter": "0,1",
+ "UMask": "0xc0",
+ "EventName": "THERMAL_TRIP",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Number of thermal trips"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_REQUEST_OUTSTANDING.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Outstanding cacheable data read bus requests duration."
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_REQUEST_OUTSTANDING.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Outstanding cacheable data read bus requests duration."
+ },
+ {
+ "EventCode": "0x61",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "BUS_BNR_DRV.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Number of Bus Not Ready signals asserted."
+ },
+ {
+ "EventCode": "0x61",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "BUS_BNR_DRV.THIS_AGENT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Number of Bus Not Ready signals asserted."
+ },
+ {
+ "EventCode": "0x62",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "BUS_DRDY_CLOCKS.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Bus cycles when data is sent on the bus."
+ },
+ {
+ "EventCode": "0x62",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "BUS_DRDY_CLOCKS.THIS_AGENT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Bus cycles when data is sent on the bus."
+ },
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_LOCK_CLOCKS.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Bus cycles when a LOCK signal is asserted."
+ },
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_LOCK_CLOCKS.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Bus cycles when a LOCK signal is asserted."
+ },
+ {
+ "EventCode": "0x64",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_DATA_RCV.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Bus cycles while processor receives data."
+ },
+ {
+ "EventCode": "0x65",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_TRANS_BRD.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Burst read bus transactions."
+ },
+ {
+ "EventCode": "0x65",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_TRANS_BRD.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Burst read bus transactions."
+ },
+ {
+ "EventCode": "0x66",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_TRANS_RFO.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "RFO bus transactions."
+ },
+ {
+ "EventCode": "0x66",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_TRANS_RFO.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "RFO bus transactions."
+ },
+ {
+ "EventCode": "0x67",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_TRANS_WB.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Explicit writeback bus transactions."
+ },
+ {
+ "EventCode": "0x67",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_TRANS_WB.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Explicit writeback bus transactions."
+ },
+ {
+ "EventCode": "0x68",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_TRANS_IFETCH.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Instruction-fetch bus transactions."
+ },
+ {
+ "EventCode": "0x68",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_TRANS_IFETCH.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Instruction-fetch bus transactions."
+ },
+ {
+ "EventCode": "0x69",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_TRANS_INVAL.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Invalidate bus transactions."
+ },
+ {
+ "EventCode": "0x69",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_TRANS_INVAL.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Invalidate bus transactions."
+ },
+ {
+ "EventCode": "0x6A",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_TRANS_PWR.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Partial write bus transaction."
+ },
+ {
+ "EventCode": "0x6A",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_TRANS_PWR.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Partial write bus transaction."
+ },
+ {
+ "EventCode": "0x6B",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_TRANS_P.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Partial bus transactions."
+ },
+ {
+ "EventCode": "0x6B",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_TRANS_P.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Partial bus transactions."
+ },
+ {
+ "EventCode": "0x6C",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_TRANS_IO.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "IO bus transactions."
+ },
+ {
+ "EventCode": "0x6C",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_TRANS_IO.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "IO bus transactions."
+ },
+ {
+ "EventCode": "0x6D",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_TRANS_DEF.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Deferred bus transactions."
+ },
+ {
+ "EventCode": "0x6D",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_TRANS_DEF.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Deferred bus transactions."
+ },
+ {
+ "EventCode": "0x6E",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_TRANS_BURST.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Burst (full cache-line) bus transactions."
+ },
+ {
+ "EventCode": "0x6E",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_TRANS_BURST.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Burst (full cache-line) bus transactions."
+ },
+ {
+ "EventCode": "0x6F",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_TRANS_MEM.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Memory bus transactions."
+ },
+ {
+ "EventCode": "0x6F",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_TRANS_MEM.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Memory bus transactions."
+ },
+ {
+ "EventCode": "0x70",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "BUS_TRANS_ANY.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All bus transactions."
+ },
+ {
+ "EventCode": "0x70",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_TRANS_ANY.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All bus transactions."
+ },
+ {
+ "EventCode": "0x77",
+ "Counter": "0,1",
+ "UMask": "0xb",
+ "EventName": "EXT_SNOOP.THIS_AGENT.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "External snoops."
+ },
+ {
+ "EventCode": "0x77",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "EXT_SNOOP.THIS_AGENT.CLEAN",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "External snoops."
+ },
+ {
+ "EventCode": "0x77",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "EXT_SNOOP.THIS_AGENT.HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "External snoops."
+ },
+ {
+ "EventCode": "0x77",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "EXT_SNOOP.THIS_AGENT.HITM",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "External snoops."
+ },
+ {
+ "EventCode": "0x77",
+ "Counter": "0,1",
+ "UMask": "0x2b",
+ "EventName": "EXT_SNOOP.ALL_AGENTS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "External snoops."
+ },
+ {
+ "EventCode": "0x77",
+ "Counter": "0,1",
+ "UMask": "0x21",
+ "EventName": "EXT_SNOOP.ALL_AGENTS.CLEAN",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "External snoops."
+ },
+ {
+ "EventCode": "0x77",
+ "Counter": "0,1",
+ "UMask": "0x22",
+ "EventName": "EXT_SNOOP.ALL_AGENTS.HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "External snoops."
+ },
+ {
+ "EventCode": "0x77",
+ "Counter": "0,1",
+ "UMask": "0x28",
+ "EventName": "EXT_SNOOP.ALL_AGENTS.HITM",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "External snoops."
+ },
+ {
+ "EventCode": "0x7A",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "BUS_HIT_DRV.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "HIT signal asserted."
+ },
+ {
+ "EventCode": "0x7A",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "BUS_HIT_DRV.THIS_AGENT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "HIT signal asserted."
+ },
+ {
+ "EventCode": "0x7B",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "BUS_HITM_DRV.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "HITM signal asserted."
+ },
+ {
+ "EventCode": "0x7B",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "BUS_HITM_DRV.THIS_AGENT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "HITM signal asserted."
+ },
+ {
+ "EventCode": "0x7D",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUSQ_EMPTY.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Bus queue is empty."
+ },
+ {
+ "EventCode": "0x7E",
+ "Counter": "0,1",
+ "UMask": "0xe0",
+ "EventName": "SNOOP_STALL_DRV.ALL_AGENTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Bus stalled for snoops."
+ },
+ {
+ "EventCode": "0x7E",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "SNOOP_STALL_DRV.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Bus stalled for snoops."
+ },
+ {
+ "EventCode": "0x7F",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "BUS_IO_WAIT.SELF",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "IO requests waiting in the bus queue."
+ },
+ {
+ "EventCode": "0xC6",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "CYCLES_INT_MASKED.CYCLES_INT_MASKED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles during which interrupts are disabled."
+ },
+ {
+ "EventCode": "0xC6",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "CYCLES_INT_MASKED.CYCLES_INT_PENDING_AND_MASKED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles during which interrupts are pending and disabled."
+ },
+ {
+ "EventCode": "0xC8",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "HW_INT_RCV",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Hardware interrupts received."
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json b/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json
new file mode 100644
index 000000000..b2e681c78
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/bonnell/pipeline.json
@@ -0,0 +1,364 @@
+[
+ {
+ "EventCode": "0x2",
+ "Counter": "0,1",
+ "UMask": "0x83",
+ "EventName": "STORE_FORWARDS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All store forwards"
+ },
+ {
+ "EventCode": "0x2",
+ "Counter": "0,1",
+ "UMask": "0x81",
+ "EventName": "STORE_FORWARDS.GOOD",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Good store forwards"
+ },
+ {
+ "EventCode": "0x3",
+ "Counter": "0,1",
+ "UMask": "0x7f",
+ "EventName": "REISSUE.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Micro-op reissues for any cause"
+ },
+ {
+ "EventCode": "0x3",
+ "Counter": "0,1",
+ "UMask": "0xff",
+ "EventName": "REISSUE.ANY.AR",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Micro-op reissues for any cause (At Retirement)"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "MUL.S",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Multiply operations executed."
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1",
+ "UMask": "0x81",
+ "EventName": "MUL.AR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Multiply operations retired"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "DIV.S",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Divide operations executed."
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1",
+ "UMask": "0x81",
+ "EventName": "DIV.AR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Divide operations retired"
+ },
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "CYCLES_DIV_BUSY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles the divider is busy."
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.CORE_P",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Core cycles when core is not halted"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.BUS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Bus cycles when core is not halted"
+ },
+ {
+ "EventCode": "0xA",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Core cycles when core is not halted"
+ },
+ {
+ "EventCode": "0xA",
+ "Counter": "Fixed counter 3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.REF",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Reference cycles when core is not halted."
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "BR_INST_TYPE_RETIRED.COND",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All macro conditional branch instructions."
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "BR_INST_TYPE_RETIRED.UNCOND",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All macro unconditional branch instructions, excluding calls and indirects"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "BR_INST_TYPE_RETIRED.IND",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All indirect branches that are not calls."
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "BR_INST_TYPE_RETIRED.RET",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All indirect branches that have a return mnemonic"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "BR_INST_TYPE_RETIRED.DIR_CALL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All non-indirect calls"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "BR_INST_TYPE_RETIRED.IND_CALL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All indirect calls, including both register and memory indirect."
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1",
+ "UMask": "0x41",
+ "EventName": "BR_INST_TYPE_RETIRED.COND_TAKEN",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Only taken macro conditional branch instructions"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "BR_MISSP_TYPE_RETIRED.COND",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Mispredicted cond branch instructions retired"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "BR_MISSP_TYPE_RETIRED.IND",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Mispredicted ind branches that are not calls"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "BR_MISSP_TYPE_RETIRED.RETURN",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Mispredicted return branches"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "BR_MISSP_TYPE_RETIRED.IND_CALL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Mispredicted indirect calls, including both register and memory indirect. "
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1",
+ "UMask": "0x11",
+ "EventName": "BR_MISSP_TYPE_RETIRED.COND_TAKEN",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Mispredicted and taken cond branch instructions retired"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC0",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired (precise event)."
+ },
+ {
+ "EventCode": "0xA",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired."
+ },
+ {
+ "EventCode": "0xC2",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "UOPS_RETIRED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Micro-ops retired."
+ },
+ {
+ "EventCode": "0xC2",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "UOPS_RETIRED.STALLED_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no micro-ops retired."
+ },
+ {
+ "EventCode": "0xC2",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "UOPS_RETIRED.STALLS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Periods no micro-ops retired."
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Self-Modifying Code detected."
+ },
+ {
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "BR_INST_RETIRED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired branch instructions."
+ },
+ {
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "BR_INST_RETIRED.PRED_NOT_TAKEN",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired branch instructions that were predicted not-taken."
+ },
+ {
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.MISPRED_NOT_TAKEN",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired branch instructions that were mispredicted not-taken."
+ },
+ {
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "BR_INST_RETIRED.PRED_TAKEN",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired branch instructions that were predicted taken."
+ },
+ {
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "BR_INST_RETIRED.MISPRED_TAKEN",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired branch instructions that were mispredicted taken."
+ },
+ {
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xc",
+ "EventName": "BR_INST_RETIRED.TAKEN",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired taken branch instructions."
+ },
+ {
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xf",
+ "EventName": "BR_INST_RETIRED.ANY1",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired branch instructions."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "BR_INST_RETIRED.MISPRED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired mispredicted branch instructions (precise event)."
+ },
+ {
+ "EventCode": "0xDC",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "RESOURCE_STALLS.DIV_BUSY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles issue is stalled due to div busy."
+ },
+ {
+ "EventCode": "0xE0",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "BR_INST_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Branch instructions decoded"
+ },
+ {
+ "EventCode": "0xE4",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "BOGUS_BR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Bogus branches"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "BACLEARS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "BACLEARS asserted."
+ },
+ {
+ "EventCode": "0x3",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "REISSUE.OVERLAP_STORE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Micro-op reissues on a store-load collision"
+ },
+ {
+ "EventCode": "0x3",
+ "Counter": "0,1",
+ "UMask": "0x81",
+ "EventName": "REISSUE.OVERLAP_STORE.AR",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Micro-op reissues on a store-load collision (At Retirement)"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/bonnell/virtual-memory.json b/tools/perf/pmu-events/arch/x86/bonnell/virtual-memory.json
new file mode 100644
index 000000000..7bb817588
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/bonnell/virtual-memory.json
@@ -0,0 +1,124 @@
+[
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1",
+ "UMask": "0x7",
+ "EventName": "DATA_TLB_MISSES.DTLB_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Memory accesses that missed the DTLB."
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1",
+ "UMask": "0x5",
+ "EventName": "DATA_TLB_MISSES.DTLB_MISS_LD",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB misses due to load operations."
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1",
+ "UMask": "0x9",
+ "EventName": "DATA_TLB_MISSES.L0_DTLB_MISS_LD",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L0 DTLB misses due to load operations."
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1",
+ "UMask": "0x6",
+ "EventName": "DATA_TLB_MISSES.DTLB_MISS_ST",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB misses due to store operations."
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1",
+ "UMask": "0xa",
+ "EventName": "DATA_TLB_MISSES.L0_DTLB_MISS_ST",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L0 DTLB misses due to store operations"
+ },
+ {
+ "EventCode": "0xC",
+ "Counter": "0,1",
+ "UMask": "0x3",
+ "EventName": "PAGE_WALKS.WALKS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Number of page-walks executed."
+ },
+ {
+ "EventCode": "0xC",
+ "Counter": "0,1",
+ "UMask": "0x3",
+ "EventName": "PAGE_WALKS.CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Duration of page-walks in core cycles"
+ },
+ {
+ "EventCode": "0xC",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "PAGE_WALKS.D_SIDE_WALKS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Number of D-side only page walks"
+ },
+ {
+ "EventCode": "0xC",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "PAGE_WALKS.D_SIDE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Duration of D-side only page walks"
+ },
+ {
+ "EventCode": "0xC",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "PAGE_WALKS.I_SIDE_WALKS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Number of I-Side page walks"
+ },
+ {
+ "EventCode": "0xC",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "PAGE_WALKS.I_SIDE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Duration of I-Side page walks"
+ },
+ {
+ "EventCode": "0x82",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "ITLB.HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB hits."
+ },
+ {
+ "EventCode": "0x82",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "ITLB.FLUSH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB flushes."
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0x82",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "ITLB.MISSES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB misses."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_RETIRED.DTLB_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that miss the DTLB (precise event)."
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
new file mode 100644
index 000000000..00bfdb5c5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
@@ -0,0 +1,164 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/cache.json b/tools/perf/pmu-events/arch/x86/broadwell/cache.json
new file mode 100644
index 000000000..bba3152ec
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/cache.json
@@ -0,0 +1,3399 @@
+[
+ {
+ "PublicDescription": "This event counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read miss L2, no rejects",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x22",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that miss L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache misses when fetching instructions.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x27",
+ "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand requests that miss L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that miss L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "L2_RQSTS.L2_PF_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 prefetch requests that miss L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "EventName": "L2_RQSTS.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "All requests that miss L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x42",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that hit L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x44",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that hit L2 cache. L3 prefetch new types.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x50",
+ "EventName": "L2_RQSTS.L2_PF_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 prefetch requests that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe1",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe2",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests to L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the total number of L2 code requests.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe4",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 code requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe7",
+ "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand requests to L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the total number of requests from the L2 hardware prefetchers.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf8",
+ "EventName": "L2_RQSTS.ALL_PF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from L2 hardware prefetchers",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "All L2 requests.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of WB requests that hit L2 cache.",
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x50",
+ "EventName": "L2_DEMAND_RQSTS.WB_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts core-originated cacheable demand requests that miss the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests missed L3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts core-originated cacheable demand requests that refer to the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand; from the demand Hit FB, if it is allocated by hardware or software prefetch.\nNote: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D miss oustandings duration in cycles",
+ "CounterHTOff": "2"
+ },
+ {
+ "PublicDescription": "This event counts duration of L1D miss outstanding in cycles.",
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "CounterMask": "1",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+ "CounterMask": "1",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D.REPLACEMENT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D data line replacements",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.\nNote: A prefetch promoted to Demand is counted from the promotion point.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "BDM76",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "BDM76",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "BDM76",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "BDM76",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "Errata": "BDM76",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "Errata": "BDM76",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "Errata": "BDM76",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "Errata": "BDM76",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).",
+ "EventCode": "0x63",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when L1D is locked",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand Data Read requests sent to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts both cacheable and noncachaeble code read requests.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cacheable and noncachaeble code read requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable Demands and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand and prefetch data reads",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.\nNote: Writeback pending FIFO has six entries.",
+ "EventCode": "0xb2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x11",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x12",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "L1_Hit_Indication": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "Errata": "BDM35",
+ "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x42",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "L1_Hit_Indication": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "All retired load uops. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "L1_Hit_Indication": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "BDM35",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "Errata": "BDM100",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "Errata": "BDM100, BDE70",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "BDM100",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "BDM100",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "Errata": "BDM100",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "Errata": "BDM100",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.",
+ "EventCode": "0xD3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "BDE70, BDM100",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PublicDescription": "This event counts Demand Data Read requests that access L2 cache, including rejects.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_TRANS.DEMAND_DATA_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts Read for Ownership (RFO) requests that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_TRANS.RFO",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of L2 cache accesses when fetching instructions.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_TRANS.CODE_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache accesses when fetching instructions",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts L2 or L3 HW prefetches that access L2 cache including rejects.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_TRANS.ALL_PF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 or L3 HW prefetches that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts L1D writebacks that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_TRANS.L1D_WB",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L1D writebacks that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts L2 fill requests that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_TRANS.L2_FILL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 fill requests that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts L2 writebacks that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_TRANS.L2_WB",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 writebacks that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts transactions that access the L2 pipe including snoops, pagewalks, and so on.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_TRANS.ALL_REQUESTS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Transactions accessing L2 pipe",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of L2 cache lines in the Invalidate state filling the L2. Counting does not cover rejects.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_IN.I",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in I state filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of L2 cache lines in the Shared state filling the L2. Counting does not cover rejects.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_IN.S",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in S state filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of L2 cache lines in the Exclusive state filling the L2. Counting does not cover rejects.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_IN.E",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in E state filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "L2_LINES_IN.ALL",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Clean L2 cache lines evicted by demand.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of split locks in the super queue.",
+ "EventCode": "0xf4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Split locks in SQ",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand data writes (RFOs) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts writebacks (modified to exclusive) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts writebacks (modified to exclusive) that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts any other requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000018000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts any other requests that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080028000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100028000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200028000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400028000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000028000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80028000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts any other requests that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c8000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts any other requests that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c8000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts any other requests that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c8000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts any other requests that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts any other requests that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c8000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts any other requests that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c8000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts any other requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c8000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts any other requests that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch data reads that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch RFOs that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch code reads that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch code reads that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch code reads that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch code reads that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f80020122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00803c0122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01003c0122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02003c0122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json
new file mode 100644
index 000000000..689d478da
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json
@@ -0,0 +1,165 @@
+[
+ {
+ "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.",
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "Errata": "BDM30",
+ "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.",
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "Errata": "BDM30",
+ "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x15",
+ "EventName": "FP_ARITH_INST_RETIRED.DOUBLE",
+ "SampleAfterValue": "2000006",
+ "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2a",
+ "EventName": "FP_ARITH_INST_RETIRED.SINGLE",
+ "SampleAfterValue": "2000005",
+ "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3c",
+ "EventName": "FP_ARITH_INST_RETIRED.PACKED",
+ "SampleAfterValue": "2000004",
+ "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_ASSIST.X87_OUTPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of X87 assists due to output value.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_ASSIST.X87_INPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of X87 assists due to input value.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "FP_ASSIST.SIMD_OUTPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of SIMD FP assists due to Output values",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_ASSIST.SIMD_INPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of SIMD FP assists due to input values",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1e",
+ "EventName": "FP_ASSIST.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles with any input/output SSE or FP assist",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/frontend.json b/tools/perf/pmu-events/arch/x86/broadwell/frontend.json
new file mode 100644
index 000000000..7142c76d7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/frontend.json
@@ -0,0 +1,286 @@
+[
+ {
+ "PublicDescription": "This counts the number of cycles that the instruction decoder queue is empty and can indicate that the application may be bound in the front end. It does not determine whether there are uops being delivered to the Alloc stage since uops can be delivered by bypass skipping the Instruction Decode Queue (IDQ) when it is empty.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "IDQ.EMPTY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "IDQ.MS_DSB_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "IDQ.MS_DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EdgeDetect": "1",
+ "EventName": "IDQ.MS_DSB_OCCUR",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "IDQ.MS_MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering 4 Uops",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering any Uop",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EdgeDetect": "1",
+ "EventName": "IDQ.MS_SWITCHES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3c",
+ "EventName": "IDQ.MITE_ALL_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of both cacheable and noncacheable Instruction Cache, Streaming Buffer and Victim Cache Reads including UC fetches.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ICACHE.HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes UC accesses.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ICACHE.MISSES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes Uncacheable accesses.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which the demand fetch waits for data (wfdM104H) from L2 or iSB (opportunistic hit).",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ICACHE.IFDATA_STALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >=3.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+ "EventCode": "0xAB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/memory.json b/tools/perf/pmu-events/arch/x86/broadwell/memory.json
new file mode 100644
index 000000000..c9154cebb
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/memory.json
@@ -0,0 +1,3045 @@
+[
+ {
+ "PublicDescription": "This event counts speculative cache-line split load uops dispatched to the L1 cache.",
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MISALIGN_MEM_REF.LOADS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts speculative cache line split store-address (STA) uops dispatched to the L1 cache.",
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MISALIGN_MEM_REF.STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times a TSX line had a cache conflict.",
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TX_MEM.ABORT_CONFLICT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a TSX line had a cache conflict",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times a TSX Abort was triggered due to an evicted line caused by a transaction overflow.",
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to an evicted line caused by a transaction overflow",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times we could not allocate Lock Buffer",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TX_EXEC.MISC1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "TX_EXEC.MISC2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "TX_EXEC.MISC3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "RTM region detected inside HLE.",
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "TX_EXEC.MISC4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "TX_EXEC.MISC5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:\n1. memory disambiguation,\n2. external snoop, or\n3. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times we entered an HLE region\n does not count nested transactions.",
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "HLE_RETIRED.START",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times we entered an HLE region; does not count nested transactions",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times HLE commit succeeded.",
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "HLE_RETIRED.COMMIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times HLE commit succeeded",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Number of times HLE abort was triggered (PEBS).",
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "HLE_RETIRED.ABORTED",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times HLE abort was triggered (PEBS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times an HLE abort was attributed to a Memory condition (See TSX_Memory event for additional details).",
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "HLE_RETIRED.ABORTED_MISC1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times the TSX watchdog signaled an HLE abort.",
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "HLE_RETIRED.ABORTED_MISC2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to uncommon conditions",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times a disallowed operation caused an HLE abort.",
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "HLE_RETIRED.ABORTED_MISC3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times HLE caused a fault.",
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "HLE_RETIRED.ABORTED_MISC4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times HLE aborted and was not due to the abort conditions in subevents 3-6.",
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "HLE_RETIRED.ABORTED_MISC5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times we entered an RTM region\n does not count nested transactions.",
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RTM_RETIRED.START",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times we entered an RTM region; does not count nested transactions",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of times RTM commit succeeded.",
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RTM_RETIRED.COMMIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times RTM commit succeeded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Number of times RTM abort was triggered (PEBS).",
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RTM_RETIRED.ABORTED",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times RTM abort was triggered (PEBS)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of times an RTM abort was attributed to a Memory condition (See TSX_Memory event for additional details).",
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RTM_RETIRED.ABORTED_MISC1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of times the TSX watchdog signaled an RTM abort.",
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RTM_RETIRED.ABORTED_MISC2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of times a disallowed operation caused an RTM abort.",
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "RTM_RETIRED.ABORTED_MISC3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of times a RTM caused a fault.",
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "RTM_RETIRED.ABORTED_MISC4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of times RTM aborted and was not due to the abort conditions in subevents 3-6.",
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "RTM_RETIRED.ABORTED_MISC5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This event counts loads with latency value being above four.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "BDM100, BDM35",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Loads with latency value being above 4",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This event counts loads with latency value being above eight.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "BDM100, BDM35",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Loads with latency value being above 8",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This event counts loads with latency value being above 16.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "BDM100, BDM35",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Loads with latency value being above 16",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This event counts loads with latency value being above 32.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "BDM100, BDM35",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Loads with latency value being above 32",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This event counts loads with latency value being above 64.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "BDM100, BDM35",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2003",
+ "BriefDescription": "Loads with latency value being above 64",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This event counts loads with latency value being above 128.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "BDM100, BDM35",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "1009",
+ "BriefDescription": "Loads with latency value being above 128",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This event counts loads with latency value being above 256.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "BDM100, BDM35",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "503",
+ "BriefDescription": "Loads with latency value being above 256",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This event counts loads with latency value being above 512.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "BDM100, BDM35",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "101",
+ "BriefDescription": "Loads with latency value being above 512",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts writebacks (modified to exclusive) that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts writebacks (modified to exclusive) that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts writebacks (modified to exclusive) that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts writebacks (modified to exclusive) that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000040 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L2_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "PF_L3_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000028000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts any other requests that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c8000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts any other requests that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts any other requests that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts any other requests that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts any other requests that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts any other requests that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "OTHER & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000090 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000120 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch code reads that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch code reads that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch code reads that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000240 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_PF_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000091 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2000020122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20003c0122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the target was non-DRAM system address.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004000122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f84000122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 with no details on snoop-related information.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 with a snoop miss response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000122 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "ALL_RFO & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/other.json b/tools/perf/pmu-events/arch/x86/broadwell/other.json
new file mode 100644
index 000000000..4f829c5fe
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/other.json
@@ -0,0 +1,44 @@
+[
+ {
+ "PublicDescription": "This event counts the unhalted core cycles during which the thread is in the ring 0 privileged mode.",
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPL_CYCLES.RING0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts when there is a transition from ring 1,2 or 3 to ring0.",
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "CPL_CYCLES.RING0_TRANS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Number of intervals between processor halts while thread is in ring 0",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.",
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPL_CYCLES.RING123",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles in which the L1 and L2 are locked due to a UC lock or split lock. A lock is asserted in case of locked memory access, due to noncacheable memory, locked operation that spans two cache lines, or a page walk from the noncacheable page table. L1D and L2 locks have a very high performance penalty and it is highly recommended to avoid such access.",
+ "EventCode": "0x63",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
new file mode 100644
index 000000000..97c5d0784
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json
@@ -0,0 +1,1427 @@
+[
+ {
+ "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 0",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired from execution.",
+ "CounterHTOff": "Fixed counter 0"
+ },
+ {
+ "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when the thread is not in halt state",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x2",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x3",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "CounterHTOff": "Fixed counter 2"
+ },
+ {
+ "PublicDescription": "This event counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:\n - preceding store conflicts with the load (incomplete overlap);\n - store forwarding is impossible due to u-arch limitations;\n - preceding lock RMW operations are not forwarded;\n - store has the no-forward bit set (uncacheable/page-split/masked stores);\n - all-blocking stores are used (mostly, fences and port I/O);\nand others.\nThe most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events.\nSee the table of not supported store forwards in the Optimization Guide.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+ "EventCode": "0x07",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "False dependencies in MOB due to partial compare",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.",
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "AnyThread": "1",
+ "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.",
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "INT_MISC.RAT_STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS).",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+ "EventCode": "0x0E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive\n added by GSR u-arch.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive; added by GSR u-arch.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_ISSUED.SLOW_LEA",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_ISSUED.SINGLE_MUL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of the divide operations executed. Uses edge-detect and a cmask value of 1 on ARITH.FPU_DIV_ACTIVE to get the number of the divide operations executed.",
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.FPU_DIV_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when divider is busy executing divide operations",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Thread cycles when thread is not in halt state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This is a fixed-frequency event programmed to general counters. It counts when the core is unhalted at 100 Mhz.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3c",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by asm inspection of the nearby instructions.",
+ "EventCode": "0x4c",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOAD_HIT_PRE.SW_PF",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the hardware prefetch.",
+ "EventCode": "0x4C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_HIT_PRE.HW_PF",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which the reservation station (RS) is empty for the thread.\nNote: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+ "EventCode": "0x5E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ILD_STALL.LCP",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts not taken macro-conditional branch instructions.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not taken macro-conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts taken speculative and retired macro-conditional branch instructions.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired macro-conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts taken speculative and retired macro-conditional branch instructions excluding calls and indirect branches.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts taken speculative and retired indirect branches excluding calls and return branches.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts taken speculative and retired indirect branches that have a return mnemonic.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts taken speculative and retired direct near calls.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x90",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired direct near calls",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts taken speculative and retired indirect calls including both register and memory indirect.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa0",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect calls",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts both taken and not taken speculative and retired macro-conditional branch instructions.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc1",
+ "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired macro-conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts both taken and not taken speculative and retired macro-unconditional branch instructions, excluding calls and indirects.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc2",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts both taken and not taken speculative and retired indirect branches excluding calls and return branches.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc4",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired indirect branches excluding calls and returns",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts both taken and not taken speculative and retired indirect branches that have a return mnemonic.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc8",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired indirect return branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts both taken and not taken speculative and retired direct near calls.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xd0",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired direct near calls",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts both taken and not taken speculative and retired branch instructions.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts not taken speculative and retired mispredicted macro conditional branch instructions.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts taken speculative and retired mispredicted macro conditional branch instructions.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts taken speculative and retired mispredicted indirect branches that have a return mnemonic.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa0",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted macro conditional branch instructions.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc1",
+ "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts both taken and not taken mispredicted indirect branches excluding calls and returns.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc4",
+ "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Mispredicted indirect branches excluding calls and returns",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted branch instructions.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.",
+ "EventCode": "0xA0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 0",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 0",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 1.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 3.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 4.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 5",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 5.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 5",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 6.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_6",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 7",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 7.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_7",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 7",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource-related stall cycles",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RESOURCE_STALLS.RS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RESOURCE_STALLS.SB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RESOURCE_STALLS.ROB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to re-order buffer full.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts number of cycles nothing is executed on any execution port.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Total execution stalls.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+ "CounterMask": "5",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+ "CounterMask": "5",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x6",
+ "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x6",
+ "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.",
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0x8",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "CounterMask": "8",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0x8",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "CounterMask": "8",
+ "CounterHTOff": "2"
+ },
+ {
+ "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.",
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0xc",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "CounterMask": "12",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0xc",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "CounterMask": "12",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread.",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of uops executed from any thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of uops executed on the core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "Errata": "BDM61",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.",
+ "EventCode": "0xC0",
+ "Counter": "1",
+ "UMask": "0x1",
+ "Errata": "BDM11, BDM55",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+ "CounterHTOff": "1"
+ },
+ {
+ "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "INST_RETIRED.X87",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Actually retired uops. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1"
+ },
+ {
+ "PublicDescription": "This event counts cycles without actually retired uops.",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "CounterMask": "10",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retirement slots used. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACHINE_CLEARS.CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MACHINE_CLEARS.MASKMOV",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts all (macro) branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "Errata": "BDW98",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Return instructions retired. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts not taken branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Not taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts far branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "Errata": "BDW98",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Far branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts all mispredicted macro branch instructions retired.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All mispredicted macro branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_MISP_RETIRED.RET",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.",
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count cases of saving new LBR",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xe6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "EventName": "BACLEARS.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/uncore.json b/tools/perf/pmu-events/arch/x86/broadwell/uncore.json
new file mode 100644
index 000000000..28e1e159a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/uncore.json
@@ -0,0 +1,278 @@
+[
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x41",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE",
+ "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+ "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x81",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION",
+ "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+ "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x44",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE",
+ "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+ "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x48",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE",
+ "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+ "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x11",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_M",
+ "BriefDescription": "L3 Lookup read request that access cache and found line in M-state",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in M-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x21",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M",
+ "BriefDescription": "L3 Lookup write request that access cache and found line in M-state",
+ "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x81",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M",
+ "BriefDescription": "L3 Lookup any request that access cache and found line in M-state",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x18",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I",
+ "BriefDescription": "L3 Lookup read request that access cache and found line in I-state",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x88",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I",
+ "BriefDescription": "L3 Lookup any request that access cache and found line in I-state",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x1f",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI",
+ "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x2f",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI",
+ "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state",
+ "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x8f",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI",
+ "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x86",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES",
+ "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x16",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES",
+ "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x26",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES",
+ "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state",
+ "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x80",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+ "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+ "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+ "Counter": "0,",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x80",
+ "UMask": "0x02",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.DRD_DIRECT",
+ "BriefDescription": "Each cycle count number of 'valid' coherent Data Read entries that are in DirectData mode. Such entry is defined as valid when it is allocated till data sent to Core (first chunk, IDI0). Applicable for IA Cores' requests in normal case.",
+ "PublicDescription": "Each cycle count number of 'valid' coherent Data Read entries that are in DirectData mode. Such entry is defined as valid when it is allocated till data sent to Core (first chunk, IDI0). Applicable for IA Cores' requests in normal case.",
+ "Counter": "0,",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x81",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+ "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+ "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x81",
+ "UMask": "0x02",
+ "EventName": "UNC_ARB_TRK_REQUESTS.DRD_DIRECT",
+ "BriefDescription": "Number of Core coherent Data Read entries allocated in DirectData mode",
+ "PublicDescription": "Number of Core coherent Data Read entries allocated in DirectData mode.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x81",
+ "UMask": "0x20",
+ "EventName": "UNC_ARB_TRK_REQUESTS.WRITES",
+ "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+ "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x84",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+ "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+ "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x80",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
+ "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.;",
+ "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+ "Counter": "0,",
+ "CounterMask": "1",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "NCU",
+ "EventCode": "0x0",
+ "UMask": "0x01",
+ "EventName": "UNC_CLOCK.SOCKET",
+ "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles",
+ "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+ "Counter": "FIXED",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json b/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json
new file mode 100644
index 000000000..2a015e4c7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json
@@ -0,0 +1,388 @@
+[
+ {
+ "PublicDescription": "This event counts load misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "BDM69",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "BDM69",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "Errata": "BDM69",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "Errata": "BDM69",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "Errata": "BDM69",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "Errata": "BDM69",
+ "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT_4K",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (4K).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT_2M",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (2M).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x60",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "BDM69",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "BDM69",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "Errata": "BDM69",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "Errata": "BDM69",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (1G)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "Errata": "BDM69",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "Errata": "BDM69",
+ "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT_4K",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (4K).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT_2M",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (2M).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x60",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles for an extended page table walk. The Extended Page directory cache differs from standard TLB caches by the operating system that use it. Virtual machine operating systems use the extended page directory cache, while guest operating systems use the standard TLB caches.",
+ "EventCode": "0x4F",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "EPT.WALK_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycle count for an Extended Page table walk.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "BDM69",
+ "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Misses at all ITLB levels that cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "BDM69",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "Errata": "BDM69",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "Errata": "BDM69",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "Errata": "BDM69",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Misses in all ITLB levels that cause completed page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "Errata": "BDM69",
+ "EventName": "ITLB_MISSES.WALK_DURATION",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ITLB_MISSES.STLB_HIT_4K",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core misses that miss the DTLB and hit the STLB (4K).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "ITLB_MISSES.STLB_HIT_2M",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Code misses that miss the DTLB and hit the STLB (2M).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x60",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+ "EventCode": "0xAE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB.ITLB_FLUSH",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x11",
+ "Errata": "BDM69, BDM98",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of DTLB page walker hits in the L1+FB.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x12",
+ "Errata": "BDM69, BDM98",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of DTLB page walker hits in the L2.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x14",
+ "Errata": "BDM69, BDM98",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "Errata": "BDM69, BDM98",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of DTLB page walker hits in Memory.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "Errata": "BDM69, BDM98",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of ITLB page walker hits in the L1+FB.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x22",
+ "Errata": "BDM69, BDM98",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of ITLB page walker hits in the L2.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "Errata": "BDM69, BDM98",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts the number of DTLB flush attempts of the thread-specific entries.",
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, and so on).",
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "TLB_FLUSH.STLB_ANY",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "STLB flush attempts",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
new file mode 100644
index 000000000..49c5f123d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
@@ -0,0 +1,164 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / INST_RETIRED.ANY / cycles",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / ( cpu@uops_executed.core\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* ( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - ( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED ) ) / RS_EVENTS.EMPTY_END",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_LOAD_MISSES.WALK_DURATION\\,cmask\\=1@ + cpu@DTLB_STORE_MISSES.WALK_DURATION\\,cmask\\=1@ + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED)) / ( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE ) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
new file mode 100644
index 000000000..bf243fe2a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json
@@ -0,0 +1,809 @@
+[
+ {
+ "EventCode": "0x24",
+ "UMask": "0x21",
+ "BriefDescription": "Demand Data Read miss L2, no rejects",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "PublicDescription": "This event counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x22",
+ "BriefDescription": "RFO requests that miss L2 cache.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x24",
+ "BriefDescription": "L2 cache misses when fetching instructions.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x27",
+ "BriefDescription": "Demand requests that miss L2 cache.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x30",
+ "BriefDescription": "L2 prefetch requests that miss L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.L2_PF_MISS",
+ "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x3f",
+ "BriefDescription": "All requests that miss L2 cache.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.MISS",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x41",
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x42",
+ "BriefDescription": "RFO requests that hit L2 cache.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x44",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x50",
+ "BriefDescription": "L2 prefetch requests that hit L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.L2_PF_HIT",
+ "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that hit L2 cache. L3 prefetch new types.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe1",
+ "BriefDescription": "Demand Data Read requests",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "PublicDescription": "This event counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe2",
+ "BriefDescription": "RFO requests to L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "PublicDescription": "This event counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe4",
+ "BriefDescription": "L2 code requests",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "PublicDescription": "This event counts the total number of L2 code requests.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe7",
+ "BriefDescription": "Demand requests to L2 cache.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xf8",
+ "BriefDescription": "Requests from L2 hardware prefetchers",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_PF",
+ "PublicDescription": "This event counts the total number of requests from the L2 hardware prefetchers.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xff",
+ "BriefDescription": "All L2 requests.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x27",
+ "UMask": "0x50",
+ "BriefDescription": "Not rejected writebacks that hit L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_DEMAND_RQSTS.WB_HIT",
+ "PublicDescription": "This event counts the number of WB requests that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x2E",
+ "UMask": "0x41",
+ "BriefDescription": "Core-originated cacheable demand requests missed L3",
+ "Counter": "0,1,2,3",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PublicDescription": "This event counts core-originated cacheable demand requests that miss the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x2E",
+ "UMask": "0x4f",
+ "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+ "Counter": "0,1,2,3",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "PublicDescription": "This event counts core-originated cacheable demand requests that refer to the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x1",
+ "BriefDescription": "L1D miss oustandings duration in cycles",
+ "Counter": "2",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "PublicDescription": "This event counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand; from the demand Hit FB, if it is allocated by hardware or software prefetch.\nNote: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "Counter": "2",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts duration of L1D miss outstanding in cycles.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+ "Counter": "2",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+ "AnyThread": "1",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+ "Counter": "0,1,2,3",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x51",
+ "UMask": "0x1",
+ "BriefDescription": "L1D data line replacements",
+ "Counter": "0,1,2,3",
+ "EventName": "L1D.REPLACEMENT",
+ "PublicDescription": "This event counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.\nNote: A prefetch promoted to Demand is counted from the promotion point.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "CounterMask": "1",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+ "CounterMask": "6",
+ "Errata": "BDM76",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x2",
+ "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "CounterMask": "1",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "CounterMask": "1",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x63",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles when L1D is locked",
+ "Counter": "0,1,2,3",
+ "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+ "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x1",
+ "BriefDescription": "Demand Data Read requests sent to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "PublicDescription": "This event counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x2",
+ "BriefDescription": "Cacheable and noncachaeble code read requests",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "PublicDescription": "This event counts both cacheable and noncachaeble code read requests.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x4",
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "PublicDescription": "This event counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x8",
+ "BriefDescription": "Demand and prefetch data reads",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable Demands and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb2",
+ "UMask": "0x1",
+ "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+ "PublicDescription": "This event counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.\nNote: Writeback pending FIFO has six entries.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x11",
+ "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x12",
+ "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+ "SampleAfterValue": "100003",
+ "L1_Hit_Indication": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x21",
+ "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+ "Errata": "BDM35",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x41",
+ "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x42",
+ "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+ "SampleAfterValue": "100003",
+ "L1_Hit_Indication": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x81",
+ "BriefDescription": "All retired load uops. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x82",
+ "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.",
+ "SampleAfterValue": "2000003",
+ "L1_Hit_Indication": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x1",
+ "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x2",
+ "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "Errata": "BDM35",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x4",
+ "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "Errata": "BDM100",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.",
+ "SampleAfterValue": "50021",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x8",
+ "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x10",
+ "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.",
+ "SampleAfterValue": "50021",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x20",
+ "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS",
+ "Errata": "BDM100, BDE70",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x40",
+ "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x1",
+ "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS",
+ "Errata": "BDM100",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x2",
+ "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT",
+ "Errata": "BDM100",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x4",
+ "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM",
+ "Errata": "BDM100",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x8",
+ "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE",
+ "Errata": "BDM100",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x1",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
+ "Errata": "BDE70, BDM100",
+ "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x4",
+ "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM",
+ "Errata": "BDE70",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x10",
+ "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM",
+ "Errata": "BDE70",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x20",
+ "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD",
+ "Errata": "BDE70",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x1",
+ "BriefDescription": "Demand Data Read requests that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.DEMAND_DATA_RD",
+ "PublicDescription": "This event counts Demand Data Read requests that access L2 cache, including rejects.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x2",
+ "BriefDescription": "RFO requests that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.RFO",
+ "PublicDescription": "This event counts Read for Ownership (RFO) requests that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x4",
+ "BriefDescription": "L2 cache accesses when fetching instructions",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.CODE_RD",
+ "PublicDescription": "This event counts the number of L2 cache accesses when fetching instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x8",
+ "BriefDescription": "L2 or L3 HW prefetches that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.ALL_PF",
+ "PublicDescription": "This event counts L2 or L3 HW prefetches that access L2 cache including rejects.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x10",
+ "BriefDescription": "L1D writebacks that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.L1D_WB",
+ "PublicDescription": "This event counts L1D writebacks that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x20",
+ "BriefDescription": "L2 fill requests that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.L2_FILL",
+ "PublicDescription": "This event counts L2 fill requests that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x40",
+ "BriefDescription": "L2 writebacks that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.L2_WB",
+ "PublicDescription": "This event counts L2 writebacks that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x80",
+ "BriefDescription": "Transactions accessing L2 pipe",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.ALL_REQUESTS",
+ "PublicDescription": "This event counts transactions that access the L2 pipe including snoops, pagewalks, and so on.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "UMask": "0x1",
+ "BriefDescription": "L2 cache lines in I state filling L2",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_IN.I",
+ "PublicDescription": "This event counts the number of L2 cache lines in the Invalidate state filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "UMask": "0x2",
+ "BriefDescription": "L2 cache lines in S state filling L2",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_IN.S",
+ "PublicDescription": "This event counts the number of L2 cache lines in the Shared state filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "UMask": "0x4",
+ "BriefDescription": "L2 cache lines in E state filling L2",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_IN.E",
+ "PublicDescription": "This event counts the number of L2 cache lines in the Exclusive state filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "UMask": "0x7",
+ "BriefDescription": "L2 cache lines filling L2",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_IN.ALL",
+ "PublicDescription": "This event counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "UMask": "0x5",
+ "BriefDescription": "Clean L2 cache lines evicted by demand.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xf4",
+ "UMask": "0x10",
+ "BriefDescription": "Split locks in SQ",
+ "Counter": "0,1,2,3",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "PublicDescription": "This event counts the number of split locks in the super queue.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json
new file mode 100644
index 000000000..d7b9d9c9c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json
@@ -0,0 +1,165 @@
+[
+ {
+ "EventCode": "0xC1",
+ "UMask": "0x8",
+ "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+ "Counter": "0,1,2,3",
+ "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+ "Errata": "BDM30",
+ "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "UMask": "0x10",
+ "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+ "Counter": "0,1,2,3",
+ "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+ "Errata": "BDM30",
+ "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x1",
+ "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x2",
+ "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x3",
+ "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x4",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x8",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x10",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x15",
+ "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.DOUBLE",
+ "SampleAfterValue": "2000006",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc7",
+ "UMask": "0x20",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x2a",
+ "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.SINGLE",
+ "SampleAfterValue": "2000005",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x3c",
+ "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.PACKED",
+ "SampleAfterValue": "2000004",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x2",
+ "BriefDescription": "Number of X87 assists due to output value.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.X87_OUTPUT",
+ "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x4",
+ "BriefDescription": "Number of X87 assists due to input value.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.X87_INPUT",
+ "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x8",
+ "BriefDescription": "Number of SIMD FP assists due to Output values",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.SIMD_OUTPUT",
+ "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x10",
+ "BriefDescription": "Number of SIMD FP assists due to input values",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.SIMD_INPUT",
+ "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x1e",
+ "BriefDescription": "Cycles with any input/output SSE or FP assist",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.ANY",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json b/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json
new file mode 100644
index 000000000..72781e1e3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json
@@ -0,0 +1,286 @@
+[
+ {
+ "EventCode": "0x79",
+ "UMask": "0x2",
+ "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.EMPTY",
+ "PublicDescription": "This counts the number of cycles that the instruction decoder queue is empty and can indicate that the application may be bound in the front end. It does not determine whether there are uops being delivered to the Alloc stage since uops can be delivered by bypass skipping the Instruction Decode Queue (IDQ) when it is empty.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x4",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MITE_UOPS",
+ "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MITE_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x8",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.DSB_UOPS",
+ "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.DSB_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x10",
+ "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_DSB_UOPS",
+ "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_DSB_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "UMask": "0x10",
+ "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_DSB_OCCUR",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x18",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "CounterMask": "4",
+ "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x18",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x20",
+ "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_MITE_UOPS",
+ "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x24",
+ "BriefDescription": "Cycles MITE is delivering 4 Uops",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+ "CounterMask": "4",
+ "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x24",
+ "BriefDescription": "Cycles MITE is delivering any Uop",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x30",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_UOPS",
+ "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x30",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "UMask": "0x30",
+ "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_SWITCHES",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x3c",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MITE_ALL_UOPS",
+ "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "UMask": "0x1",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE.HIT",
+ "PublicDescription": "This event counts the number of both cacheable and noncacheable Instruction Cache, Streaming Buffer and Victim Cache Reads including UC fetches.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "UMask": "0x2",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes Uncacheable accesses.",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE.MISSES",
+ "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes UC accesses.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE.IFDATA_STALL",
+ "PublicDescription": "This event counts cycles during which the demand fetch waits for data (wfdM104H) from L2 or iSB (opportunistic hit).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "CounterMask": "4",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+ "CounterMask": "3",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >=3.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+ "CounterMask": "2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xAB",
+ "UMask": "0x2",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+ "Counter": "0,1,2,3",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/memory.json b/tools/perf/pmu-events/arch/x86/broadwellde/memory.json
new file mode 100644
index 000000000..e44f73c24
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/memory.json
@@ -0,0 +1,432 @@
+[
+ {
+ "EventCode": "0x05",
+ "UMask": "0x1",
+ "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "MISALIGN_MEM_REF.LOADS",
+ "PublicDescription": "This event counts speculative cache-line split load uops dispatched to the L1 cache.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x05",
+ "UMask": "0x2",
+ "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "MISALIGN_MEM_REF.STORES",
+ "PublicDescription": "This event counts speculative cache line split store-address (STA) uops dispatched to the L1 cache.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x1",
+ "BriefDescription": "Number of times a TSX line had a cache conflict",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_CONFLICT",
+ "PublicDescription": "Number of times a TSX line had a cache conflict.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to an evicted line caused by a transaction overflow",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to an evicted line caused by a transaction overflow.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x4",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x8",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x10",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x20",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times we could not allocate Lock Buffer",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+ "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x1",
+ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x2",
+ "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC2",
+ "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x4",
+ "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC3",
+ "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x8",
+ "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC4",
+ "PublicDescription": "RTM region detected inside HLE.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x10",
+ "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC5",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x2",
+ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:\n1. memory disambiguation,\n2. external snoop, or\n3. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x1",
+ "BriefDescription": "Number of times we entered an HLE region; does not count nested transactions",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.START",
+ "PublicDescription": "Number of times we entered an HLE region\n does not count nested transactions.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times HLE commit succeeded",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.COMMIT",
+ "PublicDescription": "Number of times HLE commit succeeded.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x4",
+ "BriefDescription": "Number of times HLE abort was triggered (PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED",
+ "PublicDescription": "Number of times HLE abort was triggered (PEBS).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x8",
+ "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC1",
+ "PublicDescription": "Number of times an HLE abort was attributed to a Memory condition (See TSX_Memory event for additional details).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x10",
+ "BriefDescription": "Number of times an HLE execution aborted due to uncommon conditions",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC2",
+ "PublicDescription": "Number of times the TSX watchdog signaled an HLE abort.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x20",
+ "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC3",
+ "PublicDescription": "Number of times a disallowed operation caused an HLE abort.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC4",
+ "PublicDescription": "Number of times HLE caused a fault.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x80",
+ "BriefDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts)",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC5",
+ "PublicDescription": "Number of times HLE aborted and was not due to the abort conditions in subevents 3-6.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x1",
+ "BriefDescription": "Number of times we entered an RTM region; does not count nested transactions",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.START",
+ "PublicDescription": "Number of times we entered an RTM region\n does not count nested transactions.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times RTM commit succeeded",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.COMMIT",
+ "PublicDescription": "Number of times RTM commit succeeded.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x4",
+ "BriefDescription": "Number of times RTM abort was triggered (PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED",
+ "PublicDescription": "Number of times RTM abort was triggered (PEBS).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x8",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC1",
+ "PublicDescription": "Number of times an RTM abort was attributed to a Memory condition (See TSX_Memory event for additional details).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x10",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC2",
+ "PublicDescription": "Number of times the TSX watchdog signaled an RTM abort.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x20",
+ "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC3",
+ "PublicDescription": "Number of times a disallowed operation caused an RTM abort.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC4",
+ "PublicDescription": "Number of times a RTM caused a fault.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x80",
+ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC5",
+ "PublicDescription": "Number of times RTM aborted and was not due to the abort conditions in subevents 3-6.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 4",
+ "PEBS": "2",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above four.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 8",
+ "PEBS": "2",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above eight.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "50021",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 16",
+ "PEBS": "2",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above 16.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 32",
+ "PEBS": "2",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above 32.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 64",
+ "PEBS": "2",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above 64.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "2003",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 128",
+ "PEBS": "2",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above 128.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "1009",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 256",
+ "PEBS": "2",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above 256.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "503",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 512",
+ "PEBS": "2",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above 512.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "101",
+ "CounterHTOff": "3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/other.json b/tools/perf/pmu-events/arch/x86/broadwellde/other.json
new file mode 100644
index 000000000..4475249ea
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/other.json
@@ -0,0 +1,44 @@
+[
+ {
+ "EventCode": "0x5C",
+ "UMask": "0x1",
+ "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+ "Counter": "0,1,2,3",
+ "EventName": "CPL_CYCLES.RING0",
+ "PublicDescription": "This event counts the unhalted core cycles during which the thread is in the ring 0 privileged mode.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0x5C",
+ "UMask": "0x1",
+ "BriefDescription": "Number of intervals between processor halts while thread is in ring 0",
+ "Counter": "0,1,2,3",
+ "EventName": "CPL_CYCLES.RING0_TRANS",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts when there is a transition from ring 1,2 or 3 to ring0.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5C",
+ "UMask": "0x2",
+ "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+ "Counter": "0,1,2,3",
+ "EventName": "CPL_CYCLES.RING123",
+ "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x63",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+ "Counter": "0,1,2,3",
+ "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+ "PublicDescription": "This event counts cycles in which the L1 and L2 are locked due to a UC lock or split lock. A lock is asserted in case of locked memory access, due to noncacheable memory, locked operation that spans two cache lines, or a page walk from the noncacheable page table. L1D and L2 locks have a very high performance penalty and it is highly recommended to avoid such access.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json
new file mode 100644
index 000000000..920c89da9
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json
@@ -0,0 +1,1427 @@
+[
+ {
+ "EventCode": "0x00",
+ "UMask": "0x1",
+ "BriefDescription": "Instructions retired from execution.",
+ "Counter": "Fixed counter 0",
+ "EventName": "INST_RETIRED.ANY",
+ "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 0"
+ },
+ {
+ "EventCode": "0x00",
+ "UMask": "0x2",
+ "BriefDescription": "Core cycles when the thread is not in halt state",
+ "Counter": "Fixed counter 1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "EventCode": "0x00",
+ "UMask": "0x2",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "Counter": "Fixed counter 1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "EventCode": "0x00",
+ "UMask": "0x3",
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "Counter": "Fixed counter 2",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 2"
+ },
+ {
+ "EventCode": "0x03",
+ "UMask": "0x2",
+ "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding",
+ "Counter": "0,1,2,3",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "PublicDescription": "This event counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:\n - preceding store conflicts with the load (incomplete overlap);\n - store forwarding is impossible due to u-arch limitations;\n - preceding lock RMW operations are not forwarded;\n - store has the no-forward bit set (uncacheable/page-split/masked stores);\n - all-blocking stores are used (mostly, fences and port I/O);\nand others.\nThe most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events.\nSee the table of not supported store forwards in the Optimization Guide.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x03",
+ "UMask": "0x8",
+ "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "Counter": "0,1,2,3",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x07",
+ "UMask": "0x1",
+ "BriefDescription": "False dependencies in MOB due to partial compare",
+ "Counter": "0,1,2,3",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "PublicDescription": "This event counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "UMask": "0x3",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+ "Counter": "0,1,2,3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "UMask": "0x3",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+ "Counter": "0,1,2,3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+ "AnyThread": "1",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread",
+ "Counter": "0,1,2,3",
+ "EventName": "INT_MISC.RAT_STALL_CYCLES",
+ "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x1",
+ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.ANY",
+ "PublicDescription": "This event counts the number of Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0x0E",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x10",
+ "BriefDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive; added by GSR u-arch.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+ "PublicDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive\n added by GSR u-arch.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x20",
+ "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.SLOW_LEA",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x40",
+ "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.SINGLE_MUL",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x14",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when divider is busy executing divide operations",
+ "Counter": "0,1,2,3",
+ "EventName": "ARITH.FPU_DIV_ACTIVE",
+ "PublicDescription": "This event counts the number of the divide operations executed. Uses edge-detect and a cmask value of 1 on ARITH.FPU_DIV_ACTIVE to get the number of the divide operations executed.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x0",
+ "BriefDescription": "Thread cycles when thread is not in halt state",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x0",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+ "PublicDescription": "This is a fixed-frequency event programmed to general counters. It counts when the core is unhalted at 100 Mhz.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3c",
+ "UMask": "0x2",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x2",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4c",
+ "UMask": "0x1",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+ "Counter": "0,1,2,3",
+ "EventName": "LOAD_HIT_PRE.SW_PF",
+ "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by asm inspection of the nearby instructions.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4C",
+ "UMask": "0x2",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+ "Counter": "0,1,2,3",
+ "EventName": "LOAD_HIT_PRE.HW_PF",
+ "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the hardware prefetch.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "UMask": "0x1",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+ "Counter": "0,1,2,3",
+ "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "UMask": "0x2",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+ "Counter": "0,1,2,3",
+ "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "UMask": "0x4",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+ "Counter": "0,1,2,3",
+ "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "UMask": "0x8",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+ "Counter": "0,1,2,3",
+ "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5E",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+ "Counter": "0,1,2,3",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "PublicDescription": "This event counts cycles during which the reservation station (RS) is empty for the thread.\nNote: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "Invert": "1",
+ "EventCode": "0x5E",
+ "UMask": "0x1",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+ "Counter": "0,1,2,3",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "CounterMask": "1",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x87",
+ "UMask": "0x1",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "Counter": "0,1,2,3",
+ "EventName": "ILD_STALL.LCP",
+ "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x41",
+ "BriefDescription": "Not taken macro-conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+ "PublicDescription": "This event counts not taken macro-conditional branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x81",
+ "BriefDescription": "Taken speculative and retired macro-conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+ "PublicDescription": "This event counts taken speculative and retired macro-conditional branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x82",
+ "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+ "PublicDescription": "This event counts taken speculative and retired macro-conditional branch instructions excluding calls and indirect branches.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x84",
+ "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "PublicDescription": "This event counts taken speculative and retired indirect branches excluding calls and return branches.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x88",
+ "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+ "PublicDescription": "This event counts taken speculative and retired indirect branches that have a return mnemonic.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x90",
+ "BriefDescription": "Taken speculative and retired direct near calls",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+ "PublicDescription": "This event counts taken speculative and retired direct near calls.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xa0",
+ "BriefDescription": "Taken speculative and retired indirect calls",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "PublicDescription": "This event counts taken speculative and retired indirect calls including both register and memory indirect.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xc1",
+ "BriefDescription": "Speculative and retired macro-conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired macro-conditional branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xc2",
+ "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired macro-unconditional branch instructions, excluding calls and indirects.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xc4",
+ "BriefDescription": "Speculative and retired indirect branches excluding calls and returns",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired indirect branches excluding calls and return branches.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xc8",
+ "BriefDescription": "Speculative and retired indirect return branches.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired indirect branches that have a return mnemonic.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xd0",
+ "BriefDescription": "Speculative and retired direct near calls",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired direct near calls.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xff",
+ "BriefDescription": "Speculative and retired branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0x41",
+ "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+ "PublicDescription": "This event counts not taken speculative and retired mispredicted macro conditional branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0x81",
+ "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+ "PublicDescription": "This event counts taken speculative and retired mispredicted macro conditional branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0x84",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "PublicDescription": "This event counts taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0x88",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+ "PublicDescription": "This event counts taken speculative and retired mispredicted indirect branches that have a return mnemonic.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0xa0",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0xc1",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted macro conditional branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0xc4",
+ "BriefDescription": "Mispredicted indirect branches excluding calls and returns",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "PublicDescription": "This event counts both taken and not taken mispredicted indirect branches excluding calls and returns.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0xff",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA0",
+ "UMask": "0x3",
+ "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports",
+ "Counter": "0,1,2,3",
+ "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF",
+ "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when uops are executed in port 0",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 0.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when uops are executed in port 0",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_0",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles per thread when uops are executed in port 1",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 1.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles per thread when uops are executed in port 1",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_1",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles per thread when uops are executed in port 2",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 2.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles per thread when uops are executed in port 2",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_2",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles per thread when uops are executed in port 3",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 3.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles per thread when uops are executed in port 3",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_3",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles per thread when uops are executed in port 4",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 4.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles per thread when uops are executed in port 4",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_4",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x20",
+ "BriefDescription": "Cycles per thread when uops are executed in port 5",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x20",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 5.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x20",
+ "BriefDescription": "Cycles per thread when uops are executed in port 5",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_5",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x40",
+ "BriefDescription": "Cycles per thread when uops are executed in port 6",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x40",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 6.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x40",
+ "BriefDescription": "Cycles per thread when uops are executed in port 6",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_6",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x80",
+ "BriefDescription": "Cycles per thread when uops are executed in port 7",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x80",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 7.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x80",
+ "BriefDescription": "Cycles per thread when uops are executed in port 7",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_7",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x1",
+ "BriefDescription": "Resource-related stall cycles",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.RS",
+ "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.SB",
+ "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles stalled due to re-order buffer full.",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.ROB",
+ "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+ "CounterMask": "1",
+ "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+ "CounterMask": "2",
+ "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+ "CounterMask": "2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x4",
+ "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+ "CounterMask": "4",
+ "PublicDescription": "Counts number of cycles nothing is executed on any execution port.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x4",
+ "BriefDescription": "Total execution stalls.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+ "CounterMask": "4",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x5",
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+ "CounterMask": "5",
+ "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x5",
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+ "CounterMask": "5",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x6",
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+ "CounterMask": "6",
+ "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x6",
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+ "CounterMask": "6",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "Counter": "2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+ "CounterMask": "8",
+ "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "Counter": "2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+ "CounterMask": "8",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0xc",
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "Counter": "2",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+ "CounterMask": "12",
+ "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0xc",
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "Counter": "2",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+ "CounterMask": "12",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA8",
+ "UMask": "0x1",
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "Counter": "0,1,2,3",
+ "EventName": "LSD.UOPS",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "Counter": "0,1,2,3",
+ "EventName": "LSD.CYCLES_4_UOPS",
+ "CounterMask": "4",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "Counter": "0,1,2,3",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+ "CounterMask": "2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+ "CounterMask": "3",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+ "CounterMask": "4",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x2",
+ "BriefDescription": "Number of uops executed on the core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "PublicDescription": "Number of uops executed from any thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "CounterMask": "2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "CounterMask": "3",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "CounterMask": "4",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC0",
+ "UMask": "0x0",
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "Counter": "0,1,2,3",
+ "EventName": "INST_RETIRED.ANY_P",
+ "Errata": "BDM61",
+ "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC0",
+ "UMask": "0x1",
+ "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+ "PEBS": "2",
+ "Counter": "1",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "Errata": "BDM11, BDM55",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "1"
+ },
+ {
+ "EventCode": "0xC0",
+ "UMask": "0x2",
+ "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:",
+ "Counter": "0,1,2,3",
+ "EventName": "INST_RETIRED.X87",
+ "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+ "Counter": "0,1,2,3",
+ "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC2",
+ "UMask": "0x1",
+ "BriefDescription": "Actually retired uops. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.ALL",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xC2",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xC2",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "CounterMask": "10",
+ "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC2",
+ "UMask": "0x2",
+ "BriefDescription": "Retirement slots used. (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.CYCLES",
+ "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0xC3",
+ "UMask": "0x1",
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "CounterMask": "1",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x4",
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x20",
+ "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.MASKMOV",
+ "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x0",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "PublicDescription": "This event counts all (macro) branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x1",
+ "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x2",
+ "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x2",
+ "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x4",
+ "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)",
+ "PEBS": "2",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "Errata": "BDW98",
+ "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x8",
+ "BriefDescription": "Return instructions retired. (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x10",
+ "BriefDescription": "Not taken branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+ "PublicDescription": "This event counts not taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x20",
+ "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x40",
+ "BriefDescription": "Far branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "Errata": "BDW98",
+ "PublicDescription": "This event counts far branch instructions retired.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x0",
+ "BriefDescription": "All mispredicted macro branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "PublicDescription": "This event counts all mispredicted macro branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x1",
+ "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x4",
+ "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)",
+ "PEBS": "2",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+ "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x8",
+ "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.RET",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x20",
+ "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCC",
+ "UMask": "0x20",
+ "BriefDescription": "Count cases of saving new LBR",
+ "Counter": "0,1,2,3",
+ "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+ "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xe6",
+ "UMask": "0x1f",
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "Counter": "0,1,2,3",
+ "EventName": "BACLEARS.ANY",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json
new file mode 100644
index 000000000..58ed6d33d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-cache.json
@@ -0,0 +1,317 @@
+[
+ {
+ "BriefDescription": "Uncore cache clock ticks",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_C_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x34",
+ "EventName": "UNC_C_LLC_LOOKUP.ANY",
+ "Filter": "filter_state=0x1",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x11",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "M line evictions from LLC (writebacks to memory)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_C_LLC_VICTIMS.M_STATE",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.DATA_READ",
+ "Filter": "filter_opc=0x182",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.UNCACHEABLE",
+ "Filter": "filter_opc=0x187",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "MMIO reads. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.MMIO_READ",
+ "Filter": "filter_opc=0x187,filter_nc=1",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "MMIO writes. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.MMIO_WRITE",
+ "Filter": "filter_opc=0x18f,filter_nc=1",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC prefetch misses for RFO. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.RFO_LLC_PREFETCH",
+ "Filter": "filter_opc=0x190",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC prefetch misses for code reads. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.CODE_LLC_PREFETCH",
+ "Filter": "filter_opc=0x191",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC prefetch misses for data reads. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.DATA_LLC_PREFETCH",
+ "Filter": "filter_opc=0x192",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses for PCIe read current. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_READ",
+ "Filter": "filter_opc=0x19e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "ItoM write misses (as part of fast string memcpy stores) + PCIe full line writes. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_WRITE",
+ "Filter": "filter_opc=0x1c8",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe write misses (full cache line). Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_NON_SNOOP_WRITE",
+ "Filter": "filter_opc=0x1c8,filter_tid=0x3e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe writes (partial cache line). Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_NS_PARTIAL_WRITE",
+ "Filter": "filter_opc=0x180,filter_tid=0x3e",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "L2 demand and L2 prefetch code references to LLC. Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.CODE_LLC_PREFETCH",
+ "Filter": "filter_opc=0x181",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Streaming stores (full cache line). Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.STREAMING_FULL",
+ "Filter": "filter_opc=0x18c",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Streaming stores (partial cache line). Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
+ "Filter": "filter_opc=0x18d",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe read current. Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_READ",
+ "Filter": "filter_opc=0x19e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe write references (full cache line). Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_WRITE",
+ "Filter": "filter_opc=0x1c8,filter_tid=0x3e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Occupancy counter for LLC data reads (demand and L2 prefetch). Derived from unc_c_tor_occupancy.miss_opcode",
+ "EventCode": "0x36",
+ "EventName": "UNC_C_TOR_OCCUPANCY.LLC_DATA_READ",
+ "Filter": "filter_opc=0x182",
+ "PerPkg": "1",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "read requests to home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.READS",
+ "PerPkg": "1",
+ "UMask": "0x3",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "read requests to local home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.READS_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "read requests to remote home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.READS_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "write requests to home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.WRITES",
+ "PerPkg": "1",
+ "UMask": "0xC",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "write requests to local home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.WRITES_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "write requests to remote home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.WRITES_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0x8",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT",
+ "PerPkg": "1",
+ "UMask": "0x40",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "M line forwarded from remote cache along with writeback to memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x20",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "M line forwarded from remote cache with no writeback to memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPIFWD",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x4",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "Shared line response from remote cache",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPS",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x2",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "Shared line forwarded from remote cache",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPSFWD",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x8",
+ "Unit": "HA"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json
new file mode 100644
index 000000000..f4b0745cd
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-memory.json
@@ -0,0 +1,86 @@
+[
+ {
+ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_READ",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_WRITE",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0xC",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Memory controller clock ticks",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_M_DCLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x85",
+ "EventName": "UNC_M_POWER_CHANNEL_PPD",
+ "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_DCLOCKTICKS) * 100.",
+ "MetricName": "power_channel_ppd %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles all ranks are in critical thermal throttle",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x86",
+ "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES",
+ "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_DCLOCKTICKS) * 100.",
+ "MetricName": "power_critical_throttle_cycles %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles Memory is in self refresh power mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x43",
+ "EventName": "UNC_M_POWER_SELF_REFRESH",
+ "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_DCLOCKTICKS) * 100.",
+ "MetricName": "power_self_refresh %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Pre-charges due to page misses",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Pre-charge for reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.RD",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Pre-charge for writes",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.WR",
+ "PerPkg": "1",
+ "UMask": "0x8",
+ "Unit": "iMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json
new file mode 100644
index 000000000..dd1b95655
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/uncore-power.json
@@ -0,0 +1,92 @@
+[
+ {
+ "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_P_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+ "Filter": "occ_sel=1",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c0 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+ "Filter": "occ_sel=2",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c3 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+ "Filter": "occ_sel=3",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c6 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xA",
+ "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES",
+ "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "prochot_external_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_limit_thermal_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x6",
+ "EventName": "UNC_P_FREQ_MAX_OS_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_os_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5",
+ "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_power_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x74",
+ "EventName": "UNC_P_FREQ_TRANS_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_trans_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/virtual-memory.json b/tools/perf/pmu-events/arch/x86/broadwellde/virtual-memory.json
new file mode 100644
index 000000000..7d79c707c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/virtual-memory.json
@@ -0,0 +1,388 @@
+[
+ {
+ "EventCode": "0x08",
+ "UMask": "0x1",
+ "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts load misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x2",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x4",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x8",
+ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0xe",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "Errata": "BDM69",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x20",
+ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (4K).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT_4K",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x40",
+ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (2M).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT_2M",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x60",
+ "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x1",
+ "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x2",
+ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x4",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x8",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (1G)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0xe",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "Errata": "BDM69",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x20",
+ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (4K).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT_4K",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x40",
+ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (2M).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT_2M",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x60",
+ "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4F",
+ "UMask": "0x10",
+ "BriefDescription": "Cycle count for an Extended Page table walk.",
+ "Counter": "0,1,2,3",
+ "EventName": "EPT.WALK_CYCLES",
+ "PublicDescription": "This event counts cycles for an extended page table walk. The Extended Page directory cache differs from standard TLB caches by the operating system that use it. Virtual machine operating systems use the extended page directory cache, while guest operating systems use the standard TLB caches.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x1",
+ "BriefDescription": "Misses at all ITLB levels that cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x2",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x4",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x8",
+ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0xe",
+ "BriefDescription": "Misses in all ITLB levels that cause completed page walks.",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "Errata": "BDM69",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_DURATION",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x20",
+ "BriefDescription": "Core misses that miss the DTLB and hit the STLB (4K).",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.STLB_HIT_4K",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x40",
+ "BriefDescription": "Code misses that miss the DTLB and hit the STLB (2M).",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.STLB_HIT_2M",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x60",
+ "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xAE",
+ "UMask": "0x1",
+ "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB.ITLB_FLUSH",
+ "PublicDescription": "This event counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x11",
+ "BriefDescription": "Number of DTLB page walker hits in the L1+FB.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L1",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x12",
+ "BriefDescription": "Number of DTLB page walker hits in the L2.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L2",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x14",
+ "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L3",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x18",
+ "BriefDescription": "Number of DTLB page walker hits in Memory.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x21",
+ "BriefDescription": "Number of ITLB page walker hits in the L1+FB.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L1",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x22",
+ "BriefDescription": "Number of ITLB page walker hits in the L2.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L2",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x24",
+ "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L3",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBD",
+ "UMask": "0x1",
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+ "Counter": "0,1,2,3",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "PublicDescription": "This event counts the number of DTLB flush attempts of the thread-specific entries.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBD",
+ "UMask": "0x20",
+ "BriefDescription": "STLB flush attempts",
+ "Counter": "0,1,2,3",
+ "EventName": "TLB_FLUSH.STLB_ANY",
+ "PublicDescription": "This event counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, and so on).",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
new file mode 100644
index 000000000..5a7f1ec24
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
@@ -0,0 +1,164 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + cpu@ITLB_MISSES.WALK_DURATION\\,cmask\\=1@ + 7* ITLB_MISSES.WALK_COMPLETED )) ) / RS_EVENTS.EMPTY_END)",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION + 7*(DTLB_STORE_MISSES.WALK_COMPLETED+DTLB_LOAD_MISSES.WALK_COMPLETED+ITLB_MISSES.WALK_COMPLETED) ) / (2*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles))",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/cache.json b/tools/perf/pmu-events/arch/x86/broadwellx/cache.json
new file mode 100644
index 000000000..bf0c51272
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/cache.json
@@ -0,0 +1,965 @@
+[
+ {
+ "EventCode": "0x24",
+ "UMask": "0x21",
+ "BriefDescription": "Demand Data Read miss L2, no rejects",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "PublicDescription": "This event counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x22",
+ "BriefDescription": "RFO requests that miss L2 cache.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x24",
+ "BriefDescription": "L2 cache misses when fetching instructions.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x27",
+ "BriefDescription": "Demand requests that miss L2 cache.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x30",
+ "BriefDescription": "L2 prefetch requests that miss L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.L2_PF_MISS",
+ "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x3f",
+ "BriefDescription": "All requests that miss L2 cache.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.MISS",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x41",
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x42",
+ "BriefDescription": "RFO requests that hit L2 cache.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x44",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x50",
+ "BriefDescription": "L2 prefetch requests that hit L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.L2_PF_HIT",
+ "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that hit L2 cache. L3 prefetch new types.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe1",
+ "BriefDescription": "Demand Data Read requests",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "PublicDescription": "This event counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe2",
+ "BriefDescription": "RFO requests to L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "PublicDescription": "This event counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe4",
+ "BriefDescription": "L2 code requests",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "PublicDescription": "This event counts the total number of L2 code requests.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe7",
+ "BriefDescription": "Demand requests to L2 cache.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xf8",
+ "BriefDescription": "Requests from L2 hardware prefetchers",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_PF",
+ "PublicDescription": "This event counts the total number of requests from the L2 hardware prefetchers.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xff",
+ "BriefDescription": "All L2 requests.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x27",
+ "UMask": "0x50",
+ "BriefDescription": "Not rejected writebacks that hit L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_DEMAND_RQSTS.WB_HIT",
+ "PublicDescription": "This event counts the number of WB requests that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x2E",
+ "UMask": "0x41",
+ "BriefDescription": "Core-originated cacheable demand requests missed L3",
+ "Counter": "0,1,2,3",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PublicDescription": "This event counts core-originated cacheable demand requests that miss the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x2E",
+ "UMask": "0x4f",
+ "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+ "Counter": "0,1,2,3",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "PublicDescription": "This event counts core-originated cacheable demand requests that refer to the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x1",
+ "BriefDescription": "L1D miss oustandings duration in cycles",
+ "Counter": "2",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "PublicDescription": "This event counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand; from the demand Hit FB, if it is allocated by hardware or software prefetch.\nNote: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "Counter": "2",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts duration of L1D miss outstanding in cycles.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+ "Counter": "2",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+ "AnyThread": "1",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+ "Counter": "0,1,2,3",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x51",
+ "UMask": "0x1",
+ "BriefDescription": "L1D data line replacements",
+ "Counter": "0,1,2,3",
+ "EventName": "L1D.REPLACEMENT",
+ "PublicDescription": "This event counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.\nNote: A prefetch promoted to Demand is counted from the promotion point.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "CounterMask": "1",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+ "CounterMask": "6",
+ "Errata": "BDM76",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x2",
+ "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "CounterMask": "1",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "CounterMask": "1",
+ "Errata": "BDM76",
+ "PublicDescription": "This event counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x63",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles when L1D is locked",
+ "Counter": "0,1,2,3",
+ "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+ "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x1",
+ "BriefDescription": "Demand Data Read requests sent to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "PublicDescription": "This event counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x2",
+ "BriefDescription": "Cacheable and noncachaeble code read requests",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "PublicDescription": "This event counts both cacheable and noncachaeble code read requests.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x4",
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "PublicDescription": "This event counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x8",
+ "BriefDescription": "Demand and prefetch data reads",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable Demands and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb2",
+ "UMask": "0x1",
+ "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+ "PublicDescription": "This event counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.\nNote: Writeback pending FIFO has six entries.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x11",
+ "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x12",
+ "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.",
+ "SampleAfterValue": "100003",
+ "L1_Hit_Indication": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x21",
+ "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+ "Errata": "BDM35",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x41",
+ "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x42",
+ "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+ "SampleAfterValue": "100003",
+ "L1_Hit_Indication": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x81",
+ "BriefDescription": "All retired load uops. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x82",
+ "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.",
+ "SampleAfterValue": "2000003",
+ "L1_Hit_Indication": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x1",
+ "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x2",
+ "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "Errata": "BDM35",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x4",
+ "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "Errata": "BDM100",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.",
+ "SampleAfterValue": "50021",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x8",
+ "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x10",
+ "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.",
+ "SampleAfterValue": "50021",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x20",
+ "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS",
+ "Errata": "BDM100, BDE70",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x40",
+ "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x1",
+ "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS",
+ "Errata": "BDM100",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x2",
+ "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT",
+ "Errata": "BDM100",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x4",
+ "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM",
+ "Errata": "BDM100",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x8",
+ "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE",
+ "Errata": "BDM100",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x1",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
+ "Errata": "BDE70, BDM100",
+ "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x4",
+ "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM",
+ "Errata": "BDE70",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x10",
+ "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM",
+ "Errata": "BDE70",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x20",
+ "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD",
+ "Errata": "BDE70",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x1",
+ "BriefDescription": "Demand Data Read requests that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.DEMAND_DATA_RD",
+ "PublicDescription": "This event counts Demand Data Read requests that access L2 cache, including rejects.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x2",
+ "BriefDescription": "RFO requests that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.RFO",
+ "PublicDescription": "This event counts Read for Ownership (RFO) requests that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x4",
+ "BriefDescription": "L2 cache accesses when fetching instructions",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.CODE_RD",
+ "PublicDescription": "This event counts the number of L2 cache accesses when fetching instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x8",
+ "BriefDescription": "L2 or L3 HW prefetches that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.ALL_PF",
+ "PublicDescription": "This event counts L2 or L3 HW prefetches that access L2 cache including rejects.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x10",
+ "BriefDescription": "L1D writebacks that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.L1D_WB",
+ "PublicDescription": "This event counts L1D writebacks that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x20",
+ "BriefDescription": "L2 fill requests that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.L2_FILL",
+ "PublicDescription": "This event counts L2 fill requests that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x40",
+ "BriefDescription": "L2 writebacks that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.L2_WB",
+ "PublicDescription": "This event counts L2 writebacks that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x80",
+ "BriefDescription": "Transactions accessing L2 pipe",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.ALL_REQUESTS",
+ "PublicDescription": "This event counts transactions that access the L2 pipe including snoops, pagewalks, and so on.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "UMask": "0x1",
+ "BriefDescription": "L2 cache lines in I state filling L2",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_IN.I",
+ "PublicDescription": "This event counts the number of L2 cache lines in the Invalidate state filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "UMask": "0x2",
+ "BriefDescription": "L2 cache lines in S state filling L2",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_IN.S",
+ "PublicDescription": "This event counts the number of L2 cache lines in the Shared state filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "UMask": "0x4",
+ "BriefDescription": "L2 cache lines in E state filling L2",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_IN.E",
+ "PublicDescription": "This event counts the number of L2 cache lines in the Exclusive state filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "UMask": "0x7",
+ "BriefDescription": "L2 cache lines filling L2",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_IN.ALL",
+ "PublicDescription": "This event counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "UMask": "0x5",
+ "BriefDescription": "Clean L2 cache lines evicted by demand.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xf4",
+ "UMask": "0x10",
+ "BriefDescription": "Split locks in SQ",
+ "Counter": "0,1,2,3",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "PublicDescription": "This event counts the number of split locks in the super queue.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all requests that hit in the L3",
+ "MSRValue": "0x3f803c8fff",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "MSRValue": "0x10003c07f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "MSRValue": "0x04003c07f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "MSRValue": "0x04003c0244",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "MSRValue": "0x10003c0122",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "MSRValue": "0x04003c0122",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "MSRValue": "0x10003c0091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "MSRValue": "0x04003c0091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3",
+ "MSRValue": "0x3f803c0200",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3",
+ "MSRValue": "0x3f803c0100",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "MSRValue": "0x10003c0002",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3",
+ "MSRValue": "0x3f803c0002",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json
new file mode 100644
index 000000000..d7b9d9c9c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json
@@ -0,0 +1,165 @@
+[
+ {
+ "EventCode": "0xC1",
+ "UMask": "0x8",
+ "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+ "Counter": "0,1,2,3",
+ "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+ "Errata": "BDM30",
+ "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "UMask": "0x10",
+ "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+ "Counter": "0,1,2,3",
+ "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+ "Errata": "BDM30",
+ "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x1",
+ "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x2",
+ "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x3",
+ "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x4",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x8",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x10",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x15",
+ "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.DOUBLE",
+ "SampleAfterValue": "2000006",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc7",
+ "UMask": "0x20",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x2a",
+ "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.SINGLE",
+ "SampleAfterValue": "2000005",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x3c",
+ "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.PACKED",
+ "SampleAfterValue": "2000004",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x2",
+ "BriefDescription": "Number of X87 assists due to output value.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.X87_OUTPUT",
+ "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x4",
+ "BriefDescription": "Number of X87 assists due to input value.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.X87_INPUT",
+ "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x8",
+ "BriefDescription": "Number of SIMD FP assists due to Output values",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.SIMD_OUTPUT",
+ "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x10",
+ "BriefDescription": "Number of SIMD FP assists due to input values",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.SIMD_INPUT",
+ "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x1e",
+ "BriefDescription": "Cycles with any input/output SSE or FP assist",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.ANY",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/frontend.json b/tools/perf/pmu-events/arch/x86/broadwellx/frontend.json
new file mode 100644
index 000000000..72781e1e3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/frontend.json
@@ -0,0 +1,286 @@
+[
+ {
+ "EventCode": "0x79",
+ "UMask": "0x2",
+ "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.EMPTY",
+ "PublicDescription": "This counts the number of cycles that the instruction decoder queue is empty and can indicate that the application may be bound in the front end. It does not determine whether there are uops being delivered to the Alloc stage since uops can be delivered by bypass skipping the Instruction Decode Queue (IDQ) when it is empty.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x4",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MITE_UOPS",
+ "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MITE_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x8",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.DSB_UOPS",
+ "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.DSB_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x10",
+ "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_DSB_UOPS",
+ "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_DSB_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "UMask": "0x10",
+ "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_DSB_OCCUR",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x18",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "CounterMask": "4",
+ "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x18",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x20",
+ "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_MITE_UOPS",
+ "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x24",
+ "BriefDescription": "Cycles MITE is delivering 4 Uops",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+ "CounterMask": "4",
+ "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x24",
+ "BriefDescription": "Cycles MITE is delivering any Uop",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x30",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_UOPS",
+ "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x30",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "UMask": "0x30",
+ "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_SWITCHES",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x3c",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MITE_ALL_UOPS",
+ "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "UMask": "0x1",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE.HIT",
+ "PublicDescription": "This event counts the number of both cacheable and noncacheable Instruction Cache, Streaming Buffer and Victim Cache Reads including UC fetches.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "UMask": "0x2",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes Uncacheable accesses.",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE.MISSES",
+ "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes UC accesses.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE.IFDATA_STALL",
+ "PublicDescription": "This event counts cycles during which the demand fetch waits for data (wfdM104H) from L2 or iSB (opportunistic hit).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "CounterMask": "4",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+ "CounterMask": "3",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >=3.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+ "CounterMask": "2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xAB",
+ "UMask": "0x2",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+ "Counter": "0,1,2,3",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/memory.json b/tools/perf/pmu-events/arch/x86/broadwellx/memory.json
new file mode 100644
index 000000000..d79a5cfea
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/memory.json
@@ -0,0 +1,679 @@
+[
+ {
+ "EventCode": "0x05",
+ "UMask": "0x1",
+ "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "MISALIGN_MEM_REF.LOADS",
+ "PublicDescription": "This event counts speculative cache-line split load uops dispatched to the L1 cache.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x05",
+ "UMask": "0x2",
+ "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "MISALIGN_MEM_REF.STORES",
+ "PublicDescription": "This event counts speculative cache line split store-address (STA) uops dispatched to the L1 cache.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x1",
+ "BriefDescription": "Number of times a TSX line had a cache conflict",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_CONFLICT",
+ "PublicDescription": "Number of times a TSX line had a cache conflict.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to an evicted line caused by a transaction overflow",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to an evicted line caused by a transaction overflow.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x4",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x8",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x10",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x20",
+ "BriefDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times we could not allocate Lock Buffer",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+ "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x1",
+ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x2",
+ "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC2",
+ "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x4",
+ "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC3",
+ "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x8",
+ "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC4",
+ "PublicDescription": "RTM region detected inside HLE.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x10",
+ "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC5",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x2",
+ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:\n1. memory disambiguation,\n2. external snoop, or\n3. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x1",
+ "BriefDescription": "Number of times we entered an HLE region; does not count nested transactions",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.START",
+ "PublicDescription": "Number of times we entered an HLE region\n does not count nested transactions.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times HLE commit succeeded",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.COMMIT",
+ "PublicDescription": "Number of times HLE commit succeeded.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x4",
+ "BriefDescription": "Number of times HLE abort was triggered (PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED",
+ "PublicDescription": "Number of times HLE abort was triggered (PEBS).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x8",
+ "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC1",
+ "PublicDescription": "Number of times an HLE abort was attributed to a Memory condition (See TSX_Memory event for additional details).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x10",
+ "BriefDescription": "Number of times an HLE execution aborted due to uncommon conditions",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC2",
+ "PublicDescription": "Number of times the TSX watchdog signaled an HLE abort.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x20",
+ "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC3",
+ "PublicDescription": "Number of times a disallowed operation caused an HLE abort.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC4",
+ "PublicDescription": "Number of times HLE caused a fault.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x80",
+ "BriefDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts)",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC5",
+ "PublicDescription": "Number of times HLE aborted and was not due to the abort conditions in subevents 3-6.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x1",
+ "BriefDescription": "Number of times we entered an RTM region; does not count nested transactions",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.START",
+ "PublicDescription": "Number of times we entered an RTM region\n does not count nested transactions.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times RTM commit succeeded",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.COMMIT",
+ "PublicDescription": "Number of times RTM commit succeeded.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x4",
+ "BriefDescription": "Number of times RTM abort was triggered (PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED",
+ "PublicDescription": "Number of times RTM abort was triggered (PEBS).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x8",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC1",
+ "PublicDescription": "Number of times an RTM abort was attributed to a Memory condition (See TSX_Memory event for additional details).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x10",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC2",
+ "PublicDescription": "Number of times the TSX watchdog signaled an RTM abort.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x20",
+ "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC3",
+ "PublicDescription": "Number of times a disallowed operation caused an RTM abort.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC4",
+ "PublicDescription": "Number of times a RTM caused a fault.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x80",
+ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC5",
+ "PublicDescription": "Number of times RTM aborted and was not due to the abort conditions in subevents 3-6.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 4",
+ "PEBS": "2",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above four.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 8",
+ "PEBS": "2",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above eight.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "50021",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 16",
+ "PEBS": "2",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above 16.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 32",
+ "PEBS": "2",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above 32.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 64",
+ "PEBS": "2",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above 64.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "2003",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 128",
+ "PEBS": "2",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above 128.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "1009",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 256",
+ "PEBS": "2",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above 256.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "503",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 512",
+ "PEBS": "2",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "Errata": "BDM100, BDM35",
+ "PublicDescription": "This event counts loads with latency value being above 512.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "101",
+ "CounterHTOff": "3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all requests that miss in the L3",
+ "MSRValue": "0x3fbfc08fff",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache",
+ "MSRValue": "0x087fc007f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache",
+ "MSRValue": "0x103fc007f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram",
+ "MSRValue": "0x063bc007f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram",
+ "MSRValue": "0x06040007f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3",
+ "MSRValue": "0x3fbfc007f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram",
+ "MSRValue": "0x0604000244",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch code reads that miss in the L3",
+ "MSRValue": "0x3fbfc00244",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram",
+ "MSRValue": "0x0604000122",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3",
+ "MSRValue": "0x3fbfc00122",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache",
+ "MSRValue": "0x087fc00091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache",
+ "MSRValue": "0x103fc00091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram",
+ "MSRValue": "0x063bc00091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram",
+ "MSRValue": "0x0604000091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3",
+ "MSRValue": "0x3fbfc00091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3",
+ "MSRValue": "0x3fbfc00200",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3",
+ "MSRValue": "0x3fbfc00100",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache",
+ "MSRValue": "0x103fc00002",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3",
+ "MSRValue": "0x3fbfc00002",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/other.json b/tools/perf/pmu-events/arch/x86/broadwellx/other.json
new file mode 100644
index 000000000..4475249ea
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/other.json
@@ -0,0 +1,44 @@
+[
+ {
+ "EventCode": "0x5C",
+ "UMask": "0x1",
+ "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+ "Counter": "0,1,2,3",
+ "EventName": "CPL_CYCLES.RING0",
+ "PublicDescription": "This event counts the unhalted core cycles during which the thread is in the ring 0 privileged mode.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0x5C",
+ "UMask": "0x1",
+ "BriefDescription": "Number of intervals between processor halts while thread is in ring 0",
+ "Counter": "0,1,2,3",
+ "EventName": "CPL_CYCLES.RING0_TRANS",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts when there is a transition from ring 1,2 or 3 to ring0.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5C",
+ "UMask": "0x2",
+ "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+ "Counter": "0,1,2,3",
+ "EventName": "CPL_CYCLES.RING123",
+ "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x63",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+ "Counter": "0,1,2,3",
+ "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+ "PublicDescription": "This event counts cycles in which the L1 and L2 are locked due to a UC lock or split lock. A lock is asserted in case of locked memory access, due to noncacheable memory, locked operation that spans two cache lines, or a page walk from the noncacheable page table. L1D and L2 locks have a very high performance penalty and it is highly recommended to avoid such access.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json
new file mode 100644
index 000000000..920c89da9
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json
@@ -0,0 +1,1427 @@
+[
+ {
+ "EventCode": "0x00",
+ "UMask": "0x1",
+ "BriefDescription": "Instructions retired from execution.",
+ "Counter": "Fixed counter 0",
+ "EventName": "INST_RETIRED.ANY",
+ "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 0"
+ },
+ {
+ "EventCode": "0x00",
+ "UMask": "0x2",
+ "BriefDescription": "Core cycles when the thread is not in halt state",
+ "Counter": "Fixed counter 1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "EventCode": "0x00",
+ "UMask": "0x2",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "Counter": "Fixed counter 1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "EventCode": "0x00",
+ "UMask": "0x3",
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "Counter": "Fixed counter 2",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 2"
+ },
+ {
+ "EventCode": "0x03",
+ "UMask": "0x2",
+ "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding",
+ "Counter": "0,1,2,3",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "PublicDescription": "This event counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:\n - preceding store conflicts with the load (incomplete overlap);\n - store forwarding is impossible due to u-arch limitations;\n - preceding lock RMW operations are not forwarded;\n - store has the no-forward bit set (uncacheable/page-split/masked stores);\n - all-blocking stores are used (mostly, fences and port I/O);\nand others.\nThe most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events.\nSee the table of not supported store forwards in the Optimization Guide.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x03",
+ "UMask": "0x8",
+ "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "Counter": "0,1,2,3",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x07",
+ "UMask": "0x1",
+ "BriefDescription": "False dependencies in MOB due to partial compare",
+ "Counter": "0,1,2,3",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "PublicDescription": "This event counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "UMask": "0x3",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+ "Counter": "0,1,2,3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "UMask": "0x3",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+ "Counter": "0,1,2,3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+ "AnyThread": "1",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread",
+ "Counter": "0,1,2,3",
+ "EventName": "INT_MISC.RAT_STALL_CYCLES",
+ "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x1",
+ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.ANY",
+ "PublicDescription": "This event counts the number of Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0x0E",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x10",
+ "BriefDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive; added by GSR u-arch.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+ "PublicDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive\n added by GSR u-arch.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x20",
+ "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.SLOW_LEA",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x40",
+ "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.SINGLE_MUL",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x14",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when divider is busy executing divide operations",
+ "Counter": "0,1,2,3",
+ "EventName": "ARITH.FPU_DIV_ACTIVE",
+ "PublicDescription": "This event counts the number of the divide operations executed. Uses edge-detect and a cmask value of 1 on ARITH.FPU_DIV_ACTIVE to get the number of the divide operations executed.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x0",
+ "BriefDescription": "Thread cycles when thread is not in halt state",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x0",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+ "PublicDescription": "This is a fixed-frequency event programmed to general counters. It counts when the core is unhalted at 100 Mhz.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3c",
+ "UMask": "0x2",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x2",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4c",
+ "UMask": "0x1",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+ "Counter": "0,1,2,3",
+ "EventName": "LOAD_HIT_PRE.SW_PF",
+ "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by asm inspection of the nearby instructions.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4C",
+ "UMask": "0x2",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+ "Counter": "0,1,2,3",
+ "EventName": "LOAD_HIT_PRE.HW_PF",
+ "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the hardware prefetch.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "UMask": "0x1",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+ "Counter": "0,1,2,3",
+ "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "UMask": "0x2",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+ "Counter": "0,1,2,3",
+ "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "UMask": "0x4",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+ "Counter": "0,1,2,3",
+ "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "UMask": "0x8",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+ "Counter": "0,1,2,3",
+ "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5E",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+ "Counter": "0,1,2,3",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "PublicDescription": "This event counts cycles during which the reservation station (RS) is empty for the thread.\nNote: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "Invert": "1",
+ "EventCode": "0x5E",
+ "UMask": "0x1",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+ "Counter": "0,1,2,3",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "CounterMask": "1",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x87",
+ "UMask": "0x1",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "Counter": "0,1,2,3",
+ "EventName": "ILD_STALL.LCP",
+ "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x41",
+ "BriefDescription": "Not taken macro-conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+ "PublicDescription": "This event counts not taken macro-conditional branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x81",
+ "BriefDescription": "Taken speculative and retired macro-conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+ "PublicDescription": "This event counts taken speculative and retired macro-conditional branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x82",
+ "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+ "PublicDescription": "This event counts taken speculative and retired macro-conditional branch instructions excluding calls and indirect branches.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x84",
+ "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "PublicDescription": "This event counts taken speculative and retired indirect branches excluding calls and return branches.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x88",
+ "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+ "PublicDescription": "This event counts taken speculative and retired indirect branches that have a return mnemonic.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x90",
+ "BriefDescription": "Taken speculative and retired direct near calls",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+ "PublicDescription": "This event counts taken speculative and retired direct near calls.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xa0",
+ "BriefDescription": "Taken speculative and retired indirect calls",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "PublicDescription": "This event counts taken speculative and retired indirect calls including both register and memory indirect.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xc1",
+ "BriefDescription": "Speculative and retired macro-conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired macro-conditional branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xc2",
+ "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired macro-unconditional branch instructions, excluding calls and indirects.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xc4",
+ "BriefDescription": "Speculative and retired indirect branches excluding calls and returns",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired indirect branches excluding calls and return branches.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xc8",
+ "BriefDescription": "Speculative and retired indirect return branches.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired indirect branches that have a return mnemonic.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xd0",
+ "BriefDescription": "Speculative and retired direct near calls",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired direct near calls.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xff",
+ "BriefDescription": "Speculative and retired branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0x41",
+ "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+ "PublicDescription": "This event counts not taken speculative and retired mispredicted macro conditional branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0x81",
+ "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+ "PublicDescription": "This event counts taken speculative and retired mispredicted macro conditional branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0x84",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "PublicDescription": "This event counts taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0x88",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+ "PublicDescription": "This event counts taken speculative and retired mispredicted indirect branches that have a return mnemonic.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0xa0",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0xc1",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted macro conditional branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0xc4",
+ "BriefDescription": "Mispredicted indirect branches excluding calls and returns",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "PublicDescription": "This event counts both taken and not taken mispredicted indirect branches excluding calls and returns.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0xff",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+ "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted branch instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA0",
+ "UMask": "0x3",
+ "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports",
+ "Counter": "0,1,2,3",
+ "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF",
+ "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when uops are executed in port 0",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 0.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when uops are executed in port 0",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_0",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles per thread when uops are executed in port 1",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 1.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles per thread when uops are executed in port 1",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_1",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles per thread when uops are executed in port 2",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 2.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles per thread when uops are executed in port 2",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_2",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles per thread when uops are executed in port 3",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 3.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles per thread when uops are executed in port 3",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_3",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles per thread when uops are executed in port 4",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 4.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles per thread when uops are executed in port 4",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_4",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x20",
+ "BriefDescription": "Cycles per thread when uops are executed in port 5",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x20",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 5.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x20",
+ "BriefDescription": "Cycles per thread when uops are executed in port 5",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_5",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x40",
+ "BriefDescription": "Cycles per thread when uops are executed in port 6",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x40",
+ "BriefDescription": "Cycles per core when uops are exectuted in port 6.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x40",
+ "BriefDescription": "Cycles per thread when uops are executed in port 6",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_6",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x80",
+ "BriefDescription": "Cycles per thread when uops are executed in port 7",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x80",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 7.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x80",
+ "BriefDescription": "Cycles per thread when uops are executed in port 7",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_7",
+ "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x1",
+ "BriefDescription": "Resource-related stall cycles",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.RS",
+ "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.SB",
+ "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles stalled due to re-order buffer full.",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.ROB",
+ "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+ "CounterMask": "1",
+ "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+ "CounterMask": "2",
+ "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+ "CounterMask": "2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x4",
+ "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+ "CounterMask": "4",
+ "PublicDescription": "Counts number of cycles nothing is executed on any execution port.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x4",
+ "BriefDescription": "Total execution stalls.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+ "CounterMask": "4",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x5",
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+ "CounterMask": "5",
+ "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x5",
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+ "CounterMask": "5",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x6",
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+ "CounterMask": "6",
+ "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x6",
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+ "CounterMask": "6",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "Counter": "2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+ "CounterMask": "8",
+ "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "Counter": "2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+ "CounterMask": "8",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0xc",
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "Counter": "2",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+ "CounterMask": "12",
+ "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0xc",
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "Counter": "2",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+ "CounterMask": "12",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA8",
+ "UMask": "0x1",
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "Counter": "0,1,2,3",
+ "EventName": "LSD.UOPS",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "Counter": "0,1,2,3",
+ "EventName": "LSD.CYCLES_4_UOPS",
+ "CounterMask": "4",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "Counter": "0,1,2,3",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+ "CounterMask": "2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+ "CounterMask": "3",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+ "CounterMask": "4",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x2",
+ "BriefDescription": "Number of uops executed on the core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "PublicDescription": "Number of uops executed from any thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "CounterMask": "2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "CounterMask": "3",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "CounterMask": "4",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC0",
+ "UMask": "0x0",
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "Counter": "0,1,2,3",
+ "EventName": "INST_RETIRED.ANY_P",
+ "Errata": "BDM61",
+ "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC0",
+ "UMask": "0x1",
+ "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+ "PEBS": "2",
+ "Counter": "1",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "Errata": "BDM11, BDM55",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "1"
+ },
+ {
+ "EventCode": "0xC0",
+ "UMask": "0x2",
+ "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:",
+ "Counter": "0,1,2,3",
+ "EventName": "INST_RETIRED.X87",
+ "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+ "Counter": "0,1,2,3",
+ "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC2",
+ "UMask": "0x1",
+ "BriefDescription": "Actually retired uops. (Precise Event - PEBS)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.ALL",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xC2",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xC2",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "CounterMask": "10",
+ "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC2",
+ "UMask": "0x2",
+ "BriefDescription": "Retirement slots used. (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.CYCLES",
+ "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0xC3",
+ "UMask": "0x1",
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "CounterMask": "1",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x4",
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x20",
+ "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.MASKMOV",
+ "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x0",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "PublicDescription": "This event counts all (macro) branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x1",
+ "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x2",
+ "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x2",
+ "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x4",
+ "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)",
+ "PEBS": "2",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "Errata": "BDW98",
+ "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x8",
+ "BriefDescription": "Return instructions retired. (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x10",
+ "BriefDescription": "Not taken branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+ "PublicDescription": "This event counts not taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x20",
+ "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x40",
+ "BriefDescription": "Far branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "Errata": "BDW98",
+ "PublicDescription": "This event counts far branch instructions retired.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x0",
+ "BriefDescription": "All mispredicted macro branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "PublicDescription": "This event counts all mispredicted macro branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x1",
+ "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x4",
+ "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)",
+ "PEBS": "2",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+ "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x8",
+ "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.RET",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x20",
+ "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCC",
+ "UMask": "0x20",
+ "BriefDescription": "Count cases of saving new LBR",
+ "Counter": "0,1,2,3",
+ "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+ "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xe6",
+ "UMask": "0x1f",
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "Counter": "0,1,2,3",
+ "EventName": "BACLEARS.ANY",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json
new file mode 100644
index 000000000..58ed6d33d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-cache.json
@@ -0,0 +1,317 @@
+[
+ {
+ "BriefDescription": "Uncore cache clock ticks",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_C_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x34",
+ "EventName": "UNC_C_LLC_LOOKUP.ANY",
+ "Filter": "filter_state=0x1",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x11",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "M line evictions from LLC (writebacks to memory)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_C_LLC_VICTIMS.M_STATE",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.DATA_READ",
+ "Filter": "filter_opc=0x182",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.UNCACHEABLE",
+ "Filter": "filter_opc=0x187",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "MMIO reads. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.MMIO_READ",
+ "Filter": "filter_opc=0x187,filter_nc=1",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "MMIO writes. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.MMIO_WRITE",
+ "Filter": "filter_opc=0x18f,filter_nc=1",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC prefetch misses for RFO. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.RFO_LLC_PREFETCH",
+ "Filter": "filter_opc=0x190",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC prefetch misses for code reads. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.CODE_LLC_PREFETCH",
+ "Filter": "filter_opc=0x191",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC prefetch misses for data reads. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.DATA_LLC_PREFETCH",
+ "Filter": "filter_opc=0x192",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses for PCIe read current. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_READ",
+ "Filter": "filter_opc=0x19e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "ItoM write misses (as part of fast string memcpy stores) + PCIe full line writes. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_WRITE",
+ "Filter": "filter_opc=0x1c8",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe write misses (full cache line). Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_NON_SNOOP_WRITE",
+ "Filter": "filter_opc=0x1c8,filter_tid=0x3e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe writes (partial cache line). Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_NS_PARTIAL_WRITE",
+ "Filter": "filter_opc=0x180,filter_tid=0x3e",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "L2 demand and L2 prefetch code references to LLC. Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.CODE_LLC_PREFETCH",
+ "Filter": "filter_opc=0x181",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Streaming stores (full cache line). Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.STREAMING_FULL",
+ "Filter": "filter_opc=0x18c",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Streaming stores (partial cache line). Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
+ "Filter": "filter_opc=0x18d",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe read current. Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_READ",
+ "Filter": "filter_opc=0x19e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe write references (full cache line). Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_WRITE",
+ "Filter": "filter_opc=0x1c8,filter_tid=0x3e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Occupancy counter for LLC data reads (demand and L2 prefetch). Derived from unc_c_tor_occupancy.miss_opcode",
+ "EventCode": "0x36",
+ "EventName": "UNC_C_TOR_OCCUPANCY.LLC_DATA_READ",
+ "Filter": "filter_opc=0x182",
+ "PerPkg": "1",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "read requests to home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.READS",
+ "PerPkg": "1",
+ "UMask": "0x3",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "read requests to local home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.READS_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "read requests to remote home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.READS_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "write requests to home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.WRITES",
+ "PerPkg": "1",
+ "UMask": "0xC",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "write requests to local home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.WRITES_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "write requests to remote home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.WRITES_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0x8",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT",
+ "PerPkg": "1",
+ "UMask": "0x40",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "M line forwarded from remote cache along with writeback to memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x20",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "M line forwarded from remote cache with no writeback to memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPIFWD",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x4",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "Shared line response from remote cache",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPS",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x2",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "Shared line forwarded from remote cache",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPSFWD",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x8",
+ "Unit": "HA"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json
new file mode 100644
index 000000000..824961318
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-interconnect.json
@@ -0,0 +1,28 @@
+[
+ {
+ "BriefDescription": "QPI clock ticks",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x14",
+ "EventName": "UNC_Q_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "QPI LL"
+ },
+ {
+ "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data",
+ "Counter": "0,1,2,3",
+ "EventName": "QPI_DATA_BANDWIDTH_TX",
+ "PerPkg": "1",
+ "ScaleUnit": "8Bytes",
+ "UMask": "0x2",
+ "Unit": "QPI LL"
+ },
+ {
+ "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data",
+ "Counter": "0,1,2,3",
+ "EventName": "QPI_CTL_BANDWIDTH_TX",
+ "PerPkg": "1",
+ "ScaleUnit": "8Bytes",
+ "UMask": "0x4",
+ "Unit": "QPI LL"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json
new file mode 100644
index 000000000..66eed3997
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-memory.json
@@ -0,0 +1,86 @@
+[
+ {
+ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_READ",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_WRITE",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0xC",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Memory controller clock ticks",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_M_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x85",
+ "EventName": "UNC_M_POWER_CHANNEL_PPD",
+ "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_channel_ppd %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles all ranks are in critical thermal throttle",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x86",
+ "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES",
+ "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_critical_throttle_cycles %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles Memory is in self refresh power mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x43",
+ "EventName": "UNC_M_POWER_SELF_REFRESH",
+ "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_self_refresh %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Pre-charges due to page misses",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Pre-charge for reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.RD",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Pre-charge for writes",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.WR",
+ "PerPkg": "1",
+ "UMask": "0x8",
+ "Unit": "iMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json
new file mode 100644
index 000000000..dd1b95655
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/uncore-power.json
@@ -0,0 +1,92 @@
+[
+ {
+ "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_P_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+ "Filter": "occ_sel=1",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c0 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+ "Filter": "occ_sel=2",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c3 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+ "Filter": "occ_sel=3",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c6 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xA",
+ "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES",
+ "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "prochot_external_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_limit_thermal_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x6",
+ "EventName": "UNC_P_FREQ_MAX_OS_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_os_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5",
+ "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_power_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x74",
+ "EventName": "UNC_P_FREQ_TRANS_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_trans_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/virtual-memory.json b/tools/perf/pmu-events/arch/x86/broadwellx/virtual-memory.json
new file mode 100644
index 000000000..7d79c707c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/virtual-memory.json
@@ -0,0 +1,388 @@
+[
+ {
+ "EventCode": "0x08",
+ "UMask": "0x1",
+ "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts load misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x2",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x4",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x8",
+ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0xe",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "Errata": "BDM69",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x20",
+ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (4K).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT_4K",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x40",
+ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (2M).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT_2M",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x60",
+ "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x1",
+ "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x2",
+ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x4",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x8",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (1G)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0xe",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "Errata": "BDM69",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x20",
+ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (4K).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT_4K",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x40",
+ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (2M).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT_2M",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x60",
+ "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4F",
+ "UMask": "0x10",
+ "BriefDescription": "Cycle count for an Extended Page table walk.",
+ "Counter": "0,1,2,3",
+ "EventName": "EPT.WALK_CYCLES",
+ "PublicDescription": "This event counts cycles for an extended page table walk. The Extended Page directory cache differs from standard TLB caches by the operating system that use it. Virtual machine operating systems use the extended page directory cache, while guest operating systems use the standard TLB caches.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x1",
+ "BriefDescription": "Misses at all ITLB levels that cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x2",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x4",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x8",
+ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0xe",
+ "BriefDescription": "Misses in all ITLB levels that cause completed page walks.",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "Errata": "BDM69",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_DURATION",
+ "Errata": "BDM69",
+ "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x20",
+ "BriefDescription": "Core misses that miss the DTLB and hit the STLB (4K).",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.STLB_HIT_4K",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x40",
+ "BriefDescription": "Code misses that miss the DTLB and hit the STLB (2M).",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.STLB_HIT_2M",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x60",
+ "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xAE",
+ "UMask": "0x1",
+ "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB.ITLB_FLUSH",
+ "PublicDescription": "This event counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x11",
+ "BriefDescription": "Number of DTLB page walker hits in the L1+FB.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L1",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x12",
+ "BriefDescription": "Number of DTLB page walker hits in the L2.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L2",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x14",
+ "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L3",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x18",
+ "BriefDescription": "Number of DTLB page walker hits in Memory.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x21",
+ "BriefDescription": "Number of ITLB page walker hits in the L1+FB.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L1",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x22",
+ "BriefDescription": "Number of ITLB page walker hits in the L2.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L2",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x24",
+ "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L3",
+ "Errata": "BDM69, BDM98",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBD",
+ "UMask": "0x1",
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+ "Counter": "0,1,2,3",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "PublicDescription": "This event counts the number of DTLB flush attempts of the thread-specific entries.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBD",
+ "UMask": "0x20",
+ "BriefDescription": "STLB flush attempts",
+ "Counter": "0,1,2,3",
+ "EventName": "TLB_FLUSH.STLB_ANY",
+ "PublicDescription": "This event counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, and so on).",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmont/cache.json b/tools/perf/pmu-events/arch/x86/goldmont/cache.json
new file mode 100644
index 000000000..f8bbe087b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmont/cache.json
@@ -0,0 +1,2045 @@
+[
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts memory requests originating from the core that miss in the L2 cache.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache request misses"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts memory requests originating from the core that reference a cache line in the L2 cache.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the intra-die interconnect (IDI) fabric. The XQ may reject transactions from the L2Q (non-cacheable requests), L2 misses and L2 write-back victims.",
+ "EventCode": "0x30",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "L2_REJECT_XQ.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests rejected by the XQ"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of demand and L1 prefetcher requests rejected by the L2Q due to a full or nearly full condition which likely indicates back pressure from L2Q. It also counts requests that would have gone directly to the XQ, but are rejected due to a full or nearly full condition, indicating back pressure from the IDI link. The L2Q may also reject transactions from a core to ensure fairness between cores, or to delay a core's dirty eviction when the address conflicts with incoming external snoops.",
+ "EventCode": "0x31",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CORE_REJECT_L2Q.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests rejected by the L2Q"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts when a modified (dirty) cache line is evicted from the data L1 cache and needs to be written back to memory. No count will occur if the evicted line is clean, and hence does not require a writeback.",
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DL1.DIRTY_EVICTION",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L1 Cache evictions for dirty data"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss. Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.",
+ "EventCode": "0x86",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles code-fetch stalled due to an outstanding ICache miss."
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "EventCode": "0xB7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts locked memory uops retired. This includes regular locks and bus locks. (To specifically count bus locks only, see the Offcore response event.) A locked access is one with a lock prefix, or an exchange to memory. See the SDM for a complete description of which memory load accesses are locks.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Locked load uops retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired where the data requested spans a 64 byte cache line boundary.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts store uops retired where the data requested spans a 64 byte cache line boundary.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x42",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts memory uops retired where the data requested spans a 64 byte cache line boundary.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x43",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts the number of load uops retired.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts the number of store uops retired.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Store uops retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts the number of memory uops retired that is either a loads or a store or both.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x83",
+ "EventName": "MEM_UOPS_RETIRED.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Memory uops retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired that hit the L1 data cache.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired that hit in the L2 cache.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired that hit L2 (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired that miss the L1 data cache.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired that miss in the L2 cache.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired that missed L2 (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired where the cache line containing the data was in the modified state of another core or modules cache (HITM). More specifically, this means that when the load address was checked by other caching agents (typically another processor) in the system, one of those caching agents indicated that they had a dirty copy of the data. Loads that obtain a HITM response incur greater latency than most is typical for a load. In addition, since HITM indicates that some other processor had this data in its cache, it implies that the data was shared between processors, or potentially was a lock or semaphore value. This event is useful for locating sharing, false sharing, and contended locks.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.HITM",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts memory load uops retired where the data is retrieved from the WCB (or fill buffer), indicating that the load found its data while that data was in the process of being brought into the L1 cache. Typically a load will receive this indication when some other load or prefetch missed the L1 cache and was in the process of retrieving the cache line containing the data, but that process had not yet finished (and written the data back to the cache). For example, consider load X and Y, both referencing the same cache line that is not in the L1 cache. If load X misses cache first, it obtains and WCB (or fill buffer) and begins the process of requesting the data. When load Y requests the data, it will either hit the WCB, or the L1 cache, depending on exactly what time the request to Y occurs.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads retired that hit WCB (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts memory load uops retired where the data is retrieved from DRAM. Event is counted at retirement, so the speculative loads are ignored. A memory load can hit (or miss) the L1 cache, hit (or miss) the L2 cache, hit DRAM, hit in the WCB or receive a HITM response.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads retired that came from DRAM (Precise event capable)"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x40000032b7 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x36000032b7 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x10000032b7 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x04000032b7 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x02000032b7 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x00000432b7 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x00000132b7 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000022 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600000022 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000022 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000022 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000022 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040022 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010022 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000003091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600003091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000003091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400003091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200003091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000043091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads (demand & prefetch) that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000013091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads (demand & prefetch) that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000003010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600003010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000003010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400003010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200003010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000043010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000013010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the uncore subsystem that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts requests to the uncore subsystem that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the uncore subsystem that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000048000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts requests to the uncore subsystem that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000018000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000004800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600004800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000004800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400004800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200004800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000044800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000014800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000004000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600004000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000004000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400004000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200004000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000044000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000014000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000002000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600002000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000002000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400002000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200002000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000042000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000012000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache lines requests by software prefetch instructions that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000001000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache lines requests by software prefetch instructions that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600001000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000001000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400001000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200001000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000041000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache lines requests by software prefetch instructions that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000011000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache lines requests by software prefetch instructions that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600000800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts bus lock and split lock requests that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000400 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts bus lock and split lock requests that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600000400 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000400 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000400 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts bus lock and split lock requests that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000400 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts bus lock and split lock requests that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts bus lock and split lock requests that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040400 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts bus lock and split lock requests that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010400 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts code reads in uncacheable (UC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts code reads in uncacheable (UC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts code reads in uncacheable (UC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts code reads in uncacheable (UC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts code reads in uncacheable (UC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts code reads in uncacheable (UC) memory region that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts code reads in uncacheable (UC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts code reads in uncacheable (UC) memory region that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.ANY",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L2_HIT",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600000002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand cacheable data reads of full cache lines that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data reads of full cache lines that are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x3600000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand cacheable data reads of full cache lines that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data reads of full cache lines that true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand cacheable data reads of full cache lines that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data reads of full cache lines that hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand cacheable data reads of full cache lines that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data reads of full cache lines that have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmont/frontend.json b/tools/perf/pmu-events/arch/x86/goldmont/frontend.json
new file mode 100644
index 000000000..9ba085186
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmont/frontend.json
@@ -0,0 +1,52 @@
+[
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line and that cache line is in the ICache (hit). The event strives to count on a cache line basis, so that multiple accesses which hit in a single cache line count as one ICACHE.HIT. Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ICACHE.HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "References per ICache line that are available in the ICache (hit). This event counts differently than Intel processors based on Silvermont microarchitecture"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line and that cache line is not in the ICache (miss). The event strives to count on a cache line basis, so that multiple accesses which miss in a single cache line count as one ICACHE.MISS. Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is not in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ICACHE.MISSES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "References per ICache line that are not available in the ICache (miss). This event counts differently than Intel processors based on Silvermont microarchitecture"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line. The event strives to count on a cache line basis, so that multiple fetches to a single cache line count as one ICACHE.ACCESS. Specifically, the event counts when accesses from straight line code crosses the cache line boundary, or when a branch target is to a new line.\r\nThis event counts differently than Intel processors based on Silvermont microarchitecture.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "ICACHE.ACCESSES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "References per ICache line. This event counts differently than Intel processors based on Silvermont microarchitecture"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of times the Microcode Sequencer (MS) starts a flow of uops from the MSROM. It does not count every time a uop is read from the MSROM. The most common case that this counts is when a micro-coded instruction is encountered by the front end of the machine. Other cases include when an instruction encounters a fault, trap, or microcode assist of any sort that initiates a flow of uops. The event will count MS startups for uops that are speculative, and subsequently cleared by branch mispredict or a machine clear.",
+ "EventCode": "0xE7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MS_DECODED.MS_ENTRY",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "MS decode starts"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of times the prediction (from the predecode cache) for instruction length is incorrect.",
+ "EventCode": "0xE9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DECODE_RESTRICTION.PREDECODE_WRONG",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Decode restrictions due to predicting wrong instruction length"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmont/memory.json b/tools/perf/pmu-events/arch/x86/goldmont/memory.json
new file mode 100644
index 000000000..690cebd12
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmont/memory.json
@@ -0,0 +1,294 @@
+[
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts when a memory load of a uop spans a page boundary (a split) is retired.",
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MISALIGN_MEM_REF.LOAD_PAGE_SPLIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops that split a page (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts when a memory store of a uop spans a page boundary (a split) is retired.",
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Store uops that split a page (Precise event capable)"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts machine clears due to memory ordering issues. This occurs when a snoop request happens and the machine is uncertain if memory ordering will be preserved as another core is in the process of modifying the data.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Machine clears due to memory ordering issue"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x20000032b7 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000000022 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000003091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000003010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000008000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000004800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000004000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000002000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000001000 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000000800 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000000400 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000000200 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000000100 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000000080 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000000020 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000000010 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000000008 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000000004 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000000002 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache and targets non-DRAM system address.",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmont/other.json b/tools/perf/pmu-events/arch/x86/goldmont/other.json
new file mode 100644
index 000000000..959cadd7c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmont/other.json
@@ -0,0 +1,82 @@
+[
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes. This will include cycles due to an ITLB miss, ICache miss and other events.",
+ "EventCode": "0x86",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "FETCH_STALL.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles code-fetch stalled due to any reason."
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ITLB miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ITLB miss. Note: this event is not the same as page walk cycles to retrieve an instruction translation.",
+ "EventCode": "0x86",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FETCH_STALL.ITLB_FILL_PENDING_CYCLES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles code-fetch stalled due to an outstanding ITLB miss."
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend due to either a full resource in the backend (RESOURCE_FULL) or due to the processor recovering from some event (RECOVERY).",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "ISSUE_SLOTS_NOT_CONSUMED.ANY",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Unfilled issue slots per cycle"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed because of a full resource in the backend. Including but not limited to resources such as the Re-order Buffer (ROB), reservation stations (RS), load/store buffers, physical registers, or any other needed machine resource that is currently unavailable. Note that uops must be available for consumption in order for this event to fire. If a uop is not available (Instruction Queue is empty), this event will not count.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Unfilled issue slots per cycle because of a full resource in the backend"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend because allocation is stalled waiting for a mispredicted jump to retire or other branch-like conditions (e.g. the event is relevant during certain microcode flows). Counts all issue slots blocked while within this window including slots where uops were not available in the Instruction Queue.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RECOVERY",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Unfilled issue slots per cycle to recover"
+ },
+ {
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts hardware interrupts received by the processor.",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "HW_INTERRUPTS.RECEIVED",
+ "SampleAfterValue": "203",
+ "BriefDescription": "Hardware interrupts received"
+ },
+ {
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts the number of core cycles during which interrupts are masked (disabled). Increments by 1 each core cycle that EFLAGS.IF is 0, regardless of whether interrupts are pending or not.",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "HW_INTERRUPTS.MASKED",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles hardware interrupts are masked"
+ },
+ {
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts core cycles during which there are pending interrupts, but interrupts are masked (EFLAGS.IF = 0).",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles pending interrupts are masked"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json b/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json
new file mode 100644
index 000000000..254788af8
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json
@@ -0,0 +1,455 @@
+[
+ {
+ "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0. You cannot collect a PEBs record for this event.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 0",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired (Fixed event)"
+ },
+ {
+ "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1. You cannot collect a PEBs record for this event.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when core is not halted (Fixed event)"
+ },
+ {
+ "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. This event uses fixed counter 2. You cannot collect a PEBs record for this event.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x3",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when core is not halted (Fixed event)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts a load blocked from using a store forward, but did not occur because the store data was not available at the right time. The forward might occur subsequently when the data is available.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads blocked due to store data not ready (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts a load blocked from using a store forward because of an address/size mismatch, only one of the loads blocked from each store will be counted.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads blocked due to store forward restriction (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts loads that block because their address modulo 4K matches a pending store.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "LD_BLOCKS.4K_ALIAS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads blocked because address has 4k partial address false dependence (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts loads blocked because they are unable to find their physical address in the micro TLB (UTLB).",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "LD_BLOCKS.UTLB_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads blocked because address in not in the UTLB (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts anytime a load that retires is blocked for any reason.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "LD_BLOCKS.ALL_BLOCK",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads blocked (Precise event capable)"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts uops issued by the front end and allocated into the back end of the machine. This event counts uops that retire as well as uops that were speculatively executed but didn't retire. The sort of speculative uops that might be counted includes, but is not limited to those uops issued in the shadow of a miss-predicted branch, those uops that are inserted during an assist (such as for a denormal floating point result), and (previously allocated) uops that might be canceled during a machine clear.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "UOPS_ISSUED.ANY",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Uops issued to the back end per cycle"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Core cycles when core is not halted. This event uses a (_P)rogrammable general purpose performance counter.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.CORE_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when core is not halted"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Reference cycles when core is not halted. This event uses a programmable general purpose performance counter.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when core is not halted"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "This event used to measure front-end inefficiencies. I.e. when front-end of the machine is not delivering uops to the back-end and the back-end has is not stalled. This event can be used to identify if the machine is truly front-end bound. When this event occurs, it is an indication that the front-end of the machine is operating at less than its theoretical peak performance. Background: We can think of the processor pipeline as being divided into 2 broader parts: Front-end and Back-end. Front-end is responsible for fetching the instruction, decoding into uops in machine understandable format and putting them into a uop queue to be consumed by back end. The back-end then takes these uops, allocates the required resources. When all resources are ready, uops are executed. If the back-end is not ready to accept uops from the front-end, then we do not want to count these as front-end bottlenecks. However, whenever we have bottlenecks in the back-end, we will have allocation unit stalls and eventually forcing the front-end to wait until the back-end is ready to receive more uops. This event counts only when back-end is requesting more uops and front-end is not able to provide them. When 3 uops are requested and no uops are delivered, the event counts 3. When 3 are requested, and only 1 is delivered, the event counts 2. When only 2 are delivered, the event counts 1. Alternatively stated, the event will not count if 3 uops are delivered, or if the back end is stalled and not requesting any uops at all. Counts indicate missed opportunities for the front-end to deliver a uop to the back end. Some examples of conditions that cause front-end efficiencies are: ICache misses, ITLB misses, and decoder restrictions that limit the front-end bandwidth. Known Issues: Some uops require multiple allocation slots. These uops will not be charged as a front end 'not delivered' opportunity, and will be regarded as a back end problem. For example, the INC instruction has one uop that requires 2 issue slots. A stream of INC instructions will not count as UOPS_NOT_DELIVERED, even though only one instruction can be issued per clock. The low uop issue rate for a stream of INC instructions is considered to be a back end issue.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "UOPS_NOT_DELIVERED.ANY",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Uops requested but not-delivered to the back-end per cycle"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The event continues counting during hardware interrupts, traps, and inside interrupt handlers. This is an architectural performance event. This event uses a (_P)rogrammable general purpose performance counter. *This event is Precise Event capable: The EventingRIP field in the PEBS record is precise to the address of the instruction which caused the event. Note: Because PEBS records can be collected only on IA32_PMC0, only one event can use the PEBS facility at a time.",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts uops which retired.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "UOPS_RETIRED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts uops retired that are from the complex flows issued by the micro-sequencer (MS). Counts both the uops from a micro-coded instruction, and the uops that might be generated from a micro-coded assist.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.MS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "MS uops retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of floating point divide uops retired.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "UOPS_RETIRED.FPDIV",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Floating point divide uops retired. (Precise Event Capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of integer divide uops retired.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "UOPS_RETIRED.IDIV",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Integer divide uops retired. (Precise Event Capable)"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts machine clears for any reason.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "MACHINE_CLEARS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "All machine clears"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification. Self-modifying code (SMC) causes a severe penalty in all Intel architecture processors.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Self-Modifying Code detected"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts machine clears due to floating point (FP) operations needing assists. For instance, if the result was a floating point denormal, the hardware clears the pipeline and reissues uops to produce the correct IEEE compliant denormal result.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.FP_ASSIST",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Machine clears due to FP assists"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts machine clears due to memory disambiguation. Memory disambiguation happens when a load which has been issued conflicts with a previous unretired store in the pipeline whose address was not known at issue time, but is later resolved to be the same as the load address.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Machine clears due to memory disambiguation"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts branch instructions retired for all branch types. This is an architectural performance event.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired branch instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was taken and when it was not taken.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7e",
+ "EventName": "BR_INST_RETIRED.JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired conditional branch instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of taken branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "BR_INST_RETIRED.ALL_TAKEN_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired taken branch instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts far branch instructions retired. This includes far jump, far call and return, and Interrupt call and return.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xbf",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired far branch instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts near indirect call or near indirect jmp branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xeb",
+ "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired instructions of near indirect Jmp or call (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts near return branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf7",
+ "EventName": "BR_INST_RETIRED.RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired near return instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts near CALL branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf9",
+ "EventName": "BR_INST_RETIRED.CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired near call instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts near indirect CALL branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xfb",
+ "EventName": "BR_INST_RETIRED.IND_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired near indirect call instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts near relative CALL branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xfd",
+ "EventName": "BR_INST_RETIRED.REL_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired near relative call instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were taken and does not count when the Jcc branch instruction were not taken.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xfe",
+ "EventName": "BR_INST_RETIRED.TAKEN_JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired conditional branch instructions that were taken (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts mispredicted branch instructions retired including all branch types.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired mispredicted branch instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was supposed to be taken and when it was not supposed to be taken (but the processor predicted the opposite condition).",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7e",
+ "EventName": "BR_MISP_RETIRED.JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired mispredicted conditional branch instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts mispredicted branch instructions retired that were near indirect call or near indirect jmp, where the target address taken was not what the processor predicted.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0xeb",
+ "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired mispredicted instructions of near indirect Jmp or near indirect call. (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts mispredicted near RET branch instructions retired, where the return address taken was not what the processor predicted.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf7",
+ "EventName": "BR_MISP_RETIRED.RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired mispredicted near return instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts mispredicted near indirect CALL branch instructions retired, where the target address taken was not what the processor predicted.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0xfb",
+ "EventName": "BR_MISP_RETIRED.IND_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired mispredicted near indirect call instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were supposed to be taken but the processor predicted that it would not be taken.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0xfe",
+ "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired mispredicted conditional branch instructions that were taken (Precise event capable)"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts core cycles if either divide unit is busy.",
+ "EventCode": "0xCD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CYCLES_DIV_BUSY.ALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles a divider is busy"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts core cycles the integer divide unit is busy.",
+ "EventCode": "0xCD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CYCLES_DIV_BUSY.IDIV",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles the integer divide unit is busy"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts core cycles the floating point divide unit is busy.",
+ "EventCode": "0xCD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CYCLES_DIV_BUSY.FPDIV",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles the FP divide unit is busy"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of times a BACLEAR is signaled for any reason, including, but not limited to indirect branch/call, Jcc (Jump on Conditional Code/Jump if Condition is Met) branch, unconditional branch/call, and returns.",
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BACLEARS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "BACLEARs asserted for any branch type"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts BACLEARS on return instructions.",
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BACLEARS.RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "BACLEARs asserted for return branch"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts BACLEARS on Jcc (Jump on Conditional Code/Jump if Condition is Met) branches.",
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BACLEARS.COND",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "BACLEARs asserted for conditional branch"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json b/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json
new file mode 100644
index 000000000..9805198d3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json
@@ -0,0 +1,75 @@
+[
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts every core cycle when a Data-side (walks due to a data operation) page walk is in progress.",
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "PAGE_WALKS.D_SIDE_CYCLES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Duration of D-side page-walks in cycles"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts every core cycle when a Instruction-side (walks due to an instruction fetch) page walk is in progress.",
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "PAGE_WALKS.I_SIDE_CYCLES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Duration of I-side pagewalks in cycles"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts every core cycle a page-walk is in progress due to either a data memory operation or an instruction fetch.",
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "PAGE_WALKS.CYCLES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Duration of page-walks in cycles"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of times the machine was unable to find a translation in the Instruction Translation Lookaside Buffer (ITLB) for a linear address of an instruction fetch. It counts when new translation are filled into the ITLB. The event is speculative in nature, but will not count translations (page walks) that are begun and not finished, or translations that are finished but not filled into the ITLB.",
+ "EventCode": "0x81",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ITLB.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "ITLB misses"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired that caused a DTLB miss.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x11",
+ "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts store uops retired that caused a DTLB miss.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x12",
+ "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts uops retired that had a DTLB miss on load, store or either. Note that when two distinct memory operations to the same page miss the DTLB, only one of them will be recorded as a DTLB miss.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x13",
+ "EventName": "MEM_UOPS_RETIRED.DTLB_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json b/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json
new file mode 100644
index 000000000..b4791b443
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/cache.json
@@ -0,0 +1,1453 @@
+[
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts memory requests originating from the core that miss in the L2 cache.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache request misses"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts memory requests originating from the core that reference a cache line in the L2 cache.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache requests"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the intra-die interconnect (IDI) fabric. The XQ may reject transactions from the L2Q (non-cacheable requests), L2 misses and L2 write-back victims.",
+ "EventCode": "0x30",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "L2_REJECT_XQ.ALL",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests rejected by the XQ"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of demand and L1 prefetcher requests rejected by the L2Q due to a full or nearly full condition which likely indicates back pressure from L2Q. It also counts requests that would have gone directly to the XQ, but are rejected due to a full or nearly full condition, indicating back pressure from the IDI link. The L2Q may also reject transactions from a core to insure fairness between cores, or to delay a core's dirty eviction when the address conflicts with incoming external snoops.",
+ "EventCode": "0x31",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "CORE_REJECT_L2Q.ALL",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests rejected by the L2Q"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts when a modified (dirty) cache line is evicted from the data L1 cache and needs to be written back to memory. No count will occur if the evicted line is clean, and hence does not require a writeback.",
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "DL1.REPLACEMENT",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L1 Cache evictions for dirty data"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss. Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.",
+ "EventCode": "0x86",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles code-fetch stalled due to an outstanding ICache miss."
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "EventCode": "0xB7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts locked memory uops retired. This includes regular locks and bus locks. (To specifically count bus locks only, see the Offcore response event.) A locked access is one with a lock prefix, or an exchange to memory. See the SDM for a complete description of which memory load accesses are locks.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Locked load uops retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired where the data requested spans a 64 byte cache line boundary.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts store uops retired where the data requested spans a 64 byte cache line boundary.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x42",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts memory uops retired where the data requested spans a 64 byte cache line boundary.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x43",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts the number of load uops retired.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts the number of store uops retired.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Store uops retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts the number of memory uops retired that is either a loads or a store or both.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x83",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Memory uops retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired that hit the L1 data cache.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired that hit L1 data cache (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired that hit in the L2 cache.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired that hit L2 (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired that miss the L1 data cache.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired that miss in the L2 cache.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired that missed L2 (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired where the cache line containing the data was in the modified state of another core or modules cache (HITM). More specifically, this means that when the load address was checked by other caching agents (typically another processor) in the system, one of those caching agents indicated that they had a dirty copy of the data. Loads that obtain a HITM response incur greater latency than most is typical for a load. In addition, since HITM indicates that some other processor had this data in its cache, it implies that the data was shared between processors, or potentially was a lock or semaphore value. This event is useful for locating sharing, false sharing, and contended locks.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.HITM",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Memory uop retired where cross core or cross module HITM occurred (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts memory load uops retired where the data is retrieved from the WCB (or fill buffer), indicating that the load found its data while that data was in the process of being brought into the L1 cache. Typically a load will receive this indication when some other load or prefetch missed the L1 cache and was in the process of retrieving the cache line containing the data, but that process had not yet finished (and written the data back to the cache). For example, consider load X and Y, both referencing the same cache line that is not in the L1 cache. If load X misses cache first, it obtains and WCB (or fill buffer) and begins the process of requesting the data. When load Y requests the data, it will either hit the WCB, or the L1 cache, depending on exactly what time the request to Y occurs.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.WCB_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads retired that hit WCB (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts memory load uops retired where the data is retrieved from DRAM. Event is counted at retirement, so the speculative loads are ignored. A memory load can hit (or miss) the L1 cache, hit (or miss) the L2 cache, hit DRAM, hit in the WCB or receive a HITM response.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads retired that came from DRAM (Precise event capable)"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand cacheable data reads of full cache lines have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data reads of full cache lines have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand cacheable data reads of full cache lines hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data reads of full cache lines hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand cacheable data reads of full cache lines true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data reads of full cache lines true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand cacheable data reads of full cache lines miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data reads of full cache lines miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand cacheable data reads of full cache lines outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data reads of full cache lines outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.COREWB.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts bus lock and split lock requests have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010400",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts bus lock and split lock requests have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts bus lock and split lock requests hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040400",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts bus lock and split lock requests hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts bus lock and split lock requests true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000400",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts bus lock and split lock requests true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts bus lock and split lock requests miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000400",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts bus lock and split lock requests miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts bus lock and split lock requests outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000400",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts bus lock and split lock requests outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache lines requests by software prefetch instructions have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000011000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache lines requests by software prefetch instructions have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache lines requests by software prefetch instructions hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000041000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache lines requests by software prefetch instructions hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache lines requests by software prefetch instructions true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200001000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache lines requests by software prefetch instructions true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache lines requests by software prefetch instructions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000001000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache lines requests by software prefetch instructions miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache lines requests by software prefetch instructions outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000001000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache lines requests by software prefetch instructions outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000012000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000042000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200002000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000002000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000002000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000014800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000044800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200004800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000004800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000004800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the uncore subsystem have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000018000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts requests to the uncore subsystem have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the uncore subsystem hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000048000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts requests to the uncore subsystem hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the uncore subsystem true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200008000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts requests to the uncore subsystem true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the uncore subsystem miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000008000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts requests to the uncore subsystem miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the uncore subsystem outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000008000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts requests to the uncore subsystem outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000013010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000043010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200003010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000003010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000003010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads (demand & prefetch) have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000013091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads (demand & prefetch) have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads (demand & prefetch) hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000043091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads (demand & prefetch) hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200003091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000003091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data reads (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000003091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data reads (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000040022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x00000132b7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) have any transaction responses from the uncore subsystem.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x00000432b7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) hit the L2 cache.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x02000032b7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) true miss for the L2 cache with a snoop miss in the other processor module. ",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x10000032b7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.HITM_OTHER_CORE",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6, 0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.",
+ "Offcore": "1"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)",
+ "EventCode": "0xB7",
+ "MSRValue": "0x40000032b7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING",
+ "PDIR_COUNTER": "na",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/frontend.json b/tools/perf/pmu-events/arch/x86/goldmontplus/frontend.json
new file mode 100644
index 000000000..a7878965c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/frontend.json
@@ -0,0 +1,62 @@
+[
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line and that cache line is in the ICache (hit). The event strives to count on a cache line basis, so that multiple accesses which hit in a single cache line count as one ICACHE.HIT. Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "ICACHE.HIT",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "References per ICache line that are available in the ICache (hit). This event counts differently than Intel processors based on Silvermont microarchitecture"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line and that cache line is not in the ICache (miss). The event strives to count on a cache line basis, so that multiple accesses which miss in a single cache line count as one ICACHE.MISS. Specifically, the event counts when straight line code crosses the cache line boundary, or when a branch target is to a new line, and that cache line is not in the ICache. This event counts differently than Intel processors based on Silvermont microarchitecture.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "ICACHE.MISSES",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "References per ICache line that are not available in the ICache (miss). This event counts differently than Intel processors based on Silvermont microarchitecture"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts requests to the Instruction Cache (ICache) for one or more bytes in an ICache Line. The event strives to count on a cache line basis, so that multiple fetches to a single cache line count as one ICACHE.ACCESS. Specifically, the event counts when accesses from straight line code crosses the cache line boundary, or when a branch target is to a new line.\r\nThis event counts differently than Intel processors based on Silvermont microarchitecture.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "ICACHE.ACCESSES",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "References per ICache line. This event counts differently than Intel processors based on Silvermont microarchitecture"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of times the Microcode Sequencer (MS) starts a flow of uops from the MSROM. It does not count every time a uop is read from the MSROM. The most common case that this counts is when a micro-coded instruction is encountered by the front end of the machine. Other cases include when an instruction encounters a fault, trap, or microcode assist of any sort that initiates a flow of uops. The event will count MS startups for uops that are speculative, and subsequently cleared by branch mispredict or a machine clear.",
+ "EventCode": "0xE7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MS_DECODED.MS_ENTRY",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "MS decode starts"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of times the prediction (from the predecode cache) for instruction length is incorrect.",
+ "EventCode": "0xE9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "DECODE_RESTRICTION.PREDECODE_WRONG",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Decode restrictions due to predicting wrong instruction length"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/memory.json b/tools/perf/pmu-events/arch/x86/goldmontplus/memory.json
new file mode 100644
index 000000000..91e0815f3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/memory.json
@@ -0,0 +1,38 @@
+[
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts when a memory load of a uop spans a page boundary (a split) is retired.",
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MISALIGN_MEM_REF.LOAD_PAGE_SPLIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops that split a page (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts when a memory store of a uop spans a page boundary (a split) is retired.",
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Store uops that split a page (Precise event capable)"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts machine clears due to memory ordering issues. This occurs when a snoop request happens and the machine is uncertain if memory ordering will be preserved - as another core is in the process of modifying the data.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "20003",
+ "BriefDescription": "Machine clears due to memory ordering issue"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/other.json b/tools/perf/pmu-events/arch/x86/goldmontplus/other.json
new file mode 100644
index 000000000..b86037441
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/other.json
@@ -0,0 +1,98 @@
+[
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes. This will include cycles due to an ITLB miss, ICache miss and other events.",
+ "EventCode": "0x86",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "FETCH_STALL.ALL",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles code-fetch stalled due to any reason."
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ITLB miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ITLB miss. Note: this event is not the same as page walk cycles to retrieve an instruction translation.",
+ "EventCode": "0x86",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "FETCH_STALL.ITLB_FILL_PENDING_CYCLES",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles the code-fetch stalls and an ITLB miss is outstanding."
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend due to either a full resource in the backend (RESOURCE_FULL) or due to the processor recovering from some event (RECOVERY).",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "ISSUE_SLOTS_NOT_CONSUMED.ANY",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Unfilled issue slots per cycle"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed because of a full resource in the backend. Including but not limited to resources such as the Re-order Buffer (ROB), reservation stations (RS), load/store buffers, physical registers, or any other needed machine resource that is currently unavailable. Note that uops must be available for consumption in order for this event to fire. If a uop is not available (Instruction Queue is empty), this event will not count.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Unfilled issue slots per cycle because of a full resource in the backend"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend because allocation is stalled waiting for a mispredicted jump to retire or other branch-like conditions (e.g. the event is relevant during certain microcode flows). Counts all issue slots blocked while within this window including slots where uops were not available in the Instruction Queue.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RECOVERY",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Unfilled issue slots per cycle to recover"
+ },
+ {
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts hardware interrupts received by the processor.",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "HW_INTERRUPTS.RECEIVED",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "203",
+ "BriefDescription": "Hardware interrupts received"
+ },
+ {
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts the number of core cycles during which interrupts are masked (disabled). Increments by 1 each core cycle that EFLAGS.IF is 0, regardless of whether interrupts are pending or not.",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "HW_INTERRUPTS.MASKED",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles hardware interrupts are masked"
+ },
+ {
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts core cycles during which there are pending interrupts, but interrupts are masked (EFLAGS.IF = 0).",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles pending interrupts are masked"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json b/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json
new file mode 100644
index 000000000..ccf1aed69
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/pipeline.json
@@ -0,0 +1,544 @@
+[
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0. You cannot collect a PEBs record for this event.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 0",
+ "UMask": "0x1",
+ "PEBScounters": "32",
+ "EventName": "INST_RETIRED.ANY",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired (Fixed event)"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1. You cannot collect a PEBs record for this event.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x2",
+ "PEBScounters": "33",
+ "EventName": "CPU_CLK_UNHALTED.CORE",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when core is not halted (Fixed event)"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. This event uses fixed counter 2. You cannot collect a PEBs record for this event.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x3",
+ "PEBScounters": "34",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when core is not halted (Fixed event)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts a load blocked from using a store forward, but did not occur because the store data was not available at the right time. The forward might occur subsequently when the data is available.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads blocked due to store data not ready (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts a load blocked from using a store forward because of an address/size mismatch, only one of the loads blocked from each store will be counted.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads blocked due to store forward restriction (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts loads that block because their address modulo 4K matches a pending store.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "LD_BLOCKS.4K_ALIAS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads blocked because address has 4k partial address false dependence (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts loads blocked because they are unable to find their physical address in the micro TLB (UTLB).",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "LD_BLOCKS.UTLB_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads blocked because address in not in the UTLB (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts anytime a load that retires is blocked for any reason.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "LD_BLOCKS.ALL_BLOCK",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads blocked (Precise event capable)"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts uops issued by the front end and allocated into the back end of the machine. This event counts uops that retire as well as uops that were speculatively executed but didn't retire. The sort of speculative uops that might be counted includes, but is not limited to those uops issued in the shadow of a miss-predicted branch, those uops that are inserted during an assist (such as for a denormal floating point result), and (previously allocated) uops that might be canceled during a machine clear.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.ANY",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Uops issued to the back end per cycle"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Core cycles when core is not halted. This event uses a (_P)rogrammable general purpose performance counter.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.CORE_P",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when core is not halted"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Reference cycles when core is not halted. This event uses a (_P)rogrammable general purpose performance counter.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.REF",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when core is not halted"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "This event used to measure front-end inefficiencies. I.e. when front-end of the machine is not delivering uops to the back-end and the back-end has is not stalled. This event can be used to identify if the machine is truly front-end bound. When this event occurs, it is an indication that the front-end of the machine is operating at less than its theoretical peak performance. Background: We can think of the processor pipeline as being divided into 2 broader parts: Front-end and Back-end. Front-end is responsible for fetching the instruction, decoding into uops in machine understandable format and putting them into a uop queue to be consumed by back end. The back-end then takes these uops, allocates the required resources. When all resources are ready, uops are executed. If the back-end is not ready to accept uops from the front-end, then we do not want to count these as front-end bottlenecks. However, whenever we have bottlenecks in the back-end, we will have allocation unit stalls and eventually forcing the front-end to wait until the back-end is ready to receive more uops. This event counts only when back-end is requesting more uops and front-end is not able to provide them. When 3 uops are requested and no uops are delivered, the event counts 3. When 3 are requested, and only 1 is delivered, the event counts 2. When only 2 are delivered, the event counts 1. Alternatively stated, the event will not count if 3 uops are delivered, or if the back end is stalled and not requesting any uops at all. Counts indicate missed opportunities for the front-end to deliver a uop to the back end. Some examples of conditions that cause front-end efficiencies are: ICache misses, ITLB misses, and decoder restrictions that limit the front-end bandwidth. Known Issues: Some uops require multiple allocation slots. These uops will not be charged as a front end 'not delivered' opportunity, and will be regarded as a back end problem. For example, the INC instruction has one uop that requires 2 issue slots. A stream of INC instructions will not count as UOPS_NOT_DELIVERED, even though only one instruction can be issued per clock. The low uop issue rate for a stream of INC instructions is considered to be a back end issue.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "UOPS_NOT_DELIVERED.ANY",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Uops requested but not-delivered to the back-end per cycle"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The event continues counting during hardware interrupts, traps, and inside interrupt handlers. This is an architectural performance event. This event uses a (_P)rogrammable general purpose performance counter. *This event is Precise Event capable: The EventingRIP field in the PEBS record is precise to the address of the instruction which caused the event. Note: Because PEBS records can be collected only on IA32_PMC0, only one event can use the PEBS facility at a time.",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts INST_RETIRED.ANY using the Reduced Skid PEBS feature that reduces the shadow in which events aren't counted allowing for a more unbiased distribution of samples across instructions retired.",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired - using Reduced Skid PEBS feature"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts uops which retired.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.ANY",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts uops retired that are from the complex flows issued by the micro-sequencer (MS). Counts both the uops from a micro-coded instruction, and the uops that might be generated from a micro-coded assist.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.MS",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "MS uops retired (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of floating point divide uops retired.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.FPDIV",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Floating point divide uops retired (Precise Event Capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of integer divide uops retired.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.IDIV",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Integer divide uops retired (Precise Event Capable)"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts machine clears for any reason.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.ALL",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "20003",
+ "BriefDescription": "All machine clears"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification. Self-modifying code (SMC) causes a severe penalty in all Intel architecture processors.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "20003",
+ "BriefDescription": "Self-Modifying Code detected"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts machine clears due to floating point (FP) operations needing assists. For instance, if the result was a floating point denormal, the hardware clears the pipeline and reissues uops to produce the correct IEEE compliant denormal result.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.FP_ASSIST",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "20003",
+ "BriefDescription": "Machine clears due to FP assists"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts machine clears due to memory disambiguation. Memory disambiguation happens when a load which has been issued conflicts with a previous unretired store in the pipeline whose address was not known at issue time, but is later resolved to be the same as the load address.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "20003",
+ "BriefDescription": "Machine clears due to memory disambiguation"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of times that the machines clears due to a page fault. Covers both I-side and D-side(Loads/Stores) page faults. A page fault occurs when either page is not present, or an access violation",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.PAGE_FAULT",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "20003",
+ "BriefDescription": "Machines clear due to a page fault"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts branch instructions retired for all branch types. This is an architectural performance event.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired branch instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was taken and when it was not taken.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7e",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired conditional branch instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts the number of taken branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.ALL_TAKEN_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired taken branch instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts far branch instructions retired. This includes far jump, far call and return, and Interrupt call and return.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xbf",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired far branch instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts near indirect call or near indirect jmp branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xeb",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired instructions of near indirect Jmp or call (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts near return branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf7",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired near return instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts near CALL branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf9",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired near call instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts near indirect CALL branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xfb",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.IND_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired near indirect call instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts near relative CALL branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xfd",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.REL_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired near relative call instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were taken and does not count when the Jcc branch instruction were not taken.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0xfe",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.TAKEN_JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired conditional branch instructions that were taken (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts mispredicted branch instructions retired including all branch types.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired mispredicted branch instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was supposed to be taken and when it was not supposed to be taken (but the processor predicted the opposite condition).",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7e",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired mispredicted conditional branch instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts mispredicted branch instructions retired that were near indirect call or near indirect jmp, where the target address taken was not what the processor predicted.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0xeb",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired mispredicted instructions of near indirect Jmp or near indirect call (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts mispredicted near RET branch instructions retired, where the return address taken was not what the processor predicted.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf7",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired mispredicted near return instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts mispredicted near indirect CALL branch instructions retired, where the target address taken was not what the processor predicted.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0xfb",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.IND_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired mispredicted near indirect call instructions (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were supposed to be taken but the processor predicted that it would not be taken.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0xfe",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Retired mispredicted conditional branch instructions that were taken (Precise event capable)"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts core cycles if either divide unit is busy.",
+ "EventCode": "0xCD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "CYCLES_DIV_BUSY.ALL",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles a divider is busy"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts core cycles the integer divide unit is busy.",
+ "EventCode": "0xCD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "CYCLES_DIV_BUSY.IDIV",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles the integer divide unit is busy"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts core cycles the floating point divide unit is busy.",
+ "EventCode": "0xCD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "CYCLES_DIV_BUSY.FPDIV",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles the FP divide unit is busy"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of times a BACLEAR is signaled for any reason, including, but not limited to indirect branch/call, Jcc (Jump on Conditional Code/Jump if Condition is Met) branch, unconditional branch/call, and returns.",
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BACLEARS.ALL",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "BACLEARs asserted for any branch type"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts BACLEARS on return instructions.",
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BACLEARS.RETURN",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "BACLEARs asserted for return branch"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts BACLEARS on Jcc (Jump on Conditional Code/Jump if Condition is Met) branches.",
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "BACLEARS.COND",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "BACLEARs asserted for conditional branch"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json b/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json
new file mode 100644
index 000000000..0b53a3b0d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/goldmontplus/virtual-memory.json
@@ -0,0 +1,218 @@
+[
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts page walks completed due to demand data loads (including SW prefetches) whose address translations missed in all TLB levels and were mapped to 4K pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Page walk completed due to a demand load to a 4K page"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts page walks completed due to demand data loads (including SW prefetches) whose address translations missed in all TLB levels and were mapped to 2M or 4M pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Page walk completed due to a demand load to a 2M or 4M page"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts page walks completed due to demand data loads (including SW prefetches) whose address translations missed in all TLB levels and were mapped to 1GB pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1GB",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Page walk completed due to a demand load to a 1GB page"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts once per cycle for each page walk occurring due to a load (demand data loads or SW prefetches). Includes cycles spent traversing the Extended Page Table (EPT). Average cycles per walk can be calculated by dividing by the number of walks.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Page walks outstanding due to a demand load every cycle."
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Page walk completed due to a demand data store to a 4K page"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M or 4M pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Page walk completed due to a demand data store to a 2M or 4M page"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1GB pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1GB",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Page walk completed due to a demand data store to a 1GB page"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts once per cycle for each page walk occurring due to a demand data store. Includes cycles spent traversing the Extended Page Table (EPT). Average cycles per walk can be calculated by dividing by the number of walks.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Page walks outstanding due to a demand data store every cycle."
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts once per cycle for each page walk only while traversing the Extended Page Table (EPT), and does not count during the rest of the translation. The EPT is used for translating Guest-Physical Addresses to Physical Addresses for Virtual Machine Monitors (VMMs). Average cycles per walk can be calculated by dividing the count by number of walks.",
+ "EventCode": "0x4F",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "EPT.WALK_PENDING",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Page walks outstanding due to walking the EPT every cycle"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts the number of times the machine was unable to find a translation in the Instruction Translation Lookaside Buffer (ITLB) for a linear address of an instruction fetch. It counts when new translation are filled into the ITLB. The event is speculative in nature, but will not count translations (page walks) that are begun and not finished, or translations that are finished but not filled into the ITLB.",
+ "EventCode": "0x81",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "ITLB.MISS",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "ITLB misses"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts page walks completed due to instruction fetches whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Page walk completed due to an instruction fetch in a 4K page"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts page walks completed due to instruction fetches whose address translations missed in the TLB and were mapped to 2M or 4M pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Page walk completed due to an instruction fetch in a 2M or 4M page"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts page walks completed due to instruction fetches whose address translations missed in the TLB and were mapped to 1GB pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_1GB",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Page walk completed due to an instruction fetch in a 1GB page"
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts once per cycle for each page walk occurring due to an instruction fetch. Includes cycles spent traversing the Extended Page Table (EPT). Average cycles per walk can be calculated by dividing by the number of walks.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_PENDING",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Page walks outstanding due to an instruction fetch every cycle."
+ },
+ {
+ "CollectPEBSRecord": "1",
+ "PublicDescription": "Counts STLB flushes. The TLBs are flushed on instructions like INVLPG and MOV to CR3.",
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "TLB_FLUSHES.STLB_ANY",
+ "PDIR_COUNTER": "na",
+ "SampleAfterValue": "20003",
+ "BriefDescription": "STLB flushes"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts load uops retired that caused a DTLB miss.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x11",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops retired that missed the DTLB (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts store uops retired that caused a DTLB miss.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x12",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_STORES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Store uops retired that missed the DTLB (Precise event capable)"
+ },
+ {
+ "PEBS": "2",
+ "CollectPEBSRecord": "2",
+ "PublicDescription": "Counts uops retired that had a DTLB miss on load, store or either. Note that when two distinct memory operations to the same page miss the DTLB, only one of them will be recorded as a DTLB miss.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x13",
+ "PEBScounters": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.DTLB_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswell/cache.json b/tools/perf/pmu-events/arch/x86/haswell/cache.json
new file mode 100644
index 000000000..da4d6ddd4
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswell/cache.json
@@ -0,0 +1,1064 @@
+[
+ {
+ "PublicDescription": "Demand data read requests that missed L2, no rejects.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "Errata": "HSD78",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read miss L2, no rejects",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x22",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that miss L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of instruction fetches that missed the L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache misses when fetching instructions",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand requests that miss L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x27",
+ "Errata": "HSD78",
+ "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand requests that miss L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 HW prefetcher requests that missed L2.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "L2_RQSTS.L2_PF_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 prefetch requests that miss L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "All requests that missed L2.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "Errata": "HSD78",
+ "EventName": "L2_RQSTS.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "All requests that miss L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand data read requests that hit L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "Errata": "HSD78",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x42",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of instruction fetches that hit the L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x44",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x50",
+ "EventName": "L2_RQSTS.L2_PF_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 prefetch requests that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe1",
+ "Errata": "HSD78",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 store RFO requests.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe2",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests to L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 code requests.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe4",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 code requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand requests to L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe7",
+ "Errata": "HSD78",
+ "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand requests to L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 HW prefetcher requests.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf8",
+ "EventName": "L2_RQSTS.ALL_PF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from L2 hardware prefetchers",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "All requests to L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "Errata": "HSD78",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "All L2 requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Not rejected writebacks that hit L2 cache.",
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x50",
+ "EventName": "L2_DEMAND_RQSTS.WB_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts each cache miss condition for references to the last level cache.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests missed L3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts requests originating from the core that reference a cache line in the last level cache.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increments the number of outstanding L1D misses every cycle. Set Cmask = 1 and Edge =1 to count occurrences.",
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D miss oustandings duration in cycles",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "CounterMask": "1",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+ "CounterMask": "1",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D_PEND_MISS.REQUEST_FB_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch. HWP are e.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts when new data lines are brought into the L1 Data cache, which cause other lines to be evicted from the cache.",
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D.REPLACEMENT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D data line replacements",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding demand data read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD78, HSD62, HSD61",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD78, HSD62, HSD61",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD78, HSD62, HSD61",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding Demand code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "HSD62, HSD61",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "Errata": "HSD62, HSD61",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "Errata": "HSD62, HSD61",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "Errata": "HSD62, HSD61",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "Errata": "HSD62, HSD61",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles in which the L1D is locked.",
+ "EventCode": "0x63",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when L1D is locked",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand data read requests sent to uncore.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD78",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand Data Read requests sent to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand code read requests sent to uncore.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cacheable and noncachaeble code read requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand RFO read requests sent to uncore, including regular RFOs, locks, ItoM.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Data read requests sent to uncore (demand and prefetch).",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand and prefetch data reads",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x11",
+ "Errata": "HSD29, HSM30",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops that miss the STLB. (precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x12",
+ "Errata": "HSD29, HSM30",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store uops that miss the STLB. (precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "L1_Hit_Indication": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "Errata": "HSD76, HSD29, HSM30",
+ "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops with locked access. (precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts load uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "Errata": "HSD29, HSM30",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops that split across a cacheline boundary. (precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts store uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x42",
+ "Errata": "HSD29, HSM30",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store uops that split across a cacheline boundary. (precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "L1_Hit_Indication": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "Errata": "HSD29, HSM30",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "All retired load uops. (precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts all store uops retired. This is a precise event.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "Errata": "HSD29, HSM30",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "All retired store uops. (precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "L1_Hit_Indication": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD29, HSM30",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "HSD76, HSD29, HSM30",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts retired load uops in which data sources were data hits in the L3 cache without snoops required. This does not include hardware prefetches. This is a precise event.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts retired load uops in which data sources missed in the L1 cache. This does not include hardware prefetches. This is a precise event.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "Errata": "HSM30",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops misses in L1 cache as data sources.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "Errata": "HSD29, HSM30",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Retired load uops with L2 cache misses as data sources.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "Errata": "HSM30",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD29, HSD25, HSM26, HSM30",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HIT in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "HSD29, HSD25, HSM26, HSM30",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. ",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HITM (hit modified) in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "Errata": "HSD29, HSD25, HSM26, HSM30",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. ",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.",
+ "EventCode": "0xD3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD74, HSD29, HSD25, HSM30",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PublicDescription": "Demand data read requests that access L2 cache.",
+ "EventCode": "0xf0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_TRANS.DEMAND_DATA_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "RFO requests that access L2 cache.",
+ "EventCode": "0xf0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_TRANS.RFO",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache accesses when fetching instructions.",
+ "EventCode": "0xf0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_TRANS.CODE_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache accesses when fetching instructions",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Any MLC or L3 HW prefetch accessing L2, including rejects.",
+ "EventCode": "0xf0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_TRANS.ALL_PF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 or L3 HW prefetches that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L1D writebacks that access L2 cache.",
+ "EventCode": "0xf0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_TRANS.L1D_WB",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L1D writebacks that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 fill requests that access L2 cache.",
+ "EventCode": "0xf0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_TRANS.L2_FILL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 fill requests that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 writebacks that access L2 cache.",
+ "EventCode": "0xf0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_TRANS.L2_WB",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 writebacks that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Transactions accessing L2 pipe.",
+ "EventCode": "0xf0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_TRANS.ALL_REQUESTS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Transactions accessing L2 pipe",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache lines in I state filling L2.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_IN.I",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in I state filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache lines in S state filling L2.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_IN.S",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in S state filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache lines in E state filling L2.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_IN.E",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in E state filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of L2 cache lines brought into the L2 cache. Lines are filled into the L2 cache when there was an L2 miss.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "L2_LINES_IN.ALL",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Clean L2 cache lines evicted by demand.",
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Clean L2 cache lines evicted by demand",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Dirty L2 cache lines evicted by demand.",
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x6",
+ "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Dirty L2 cache lines evicted by demand",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "",
+ "EventCode": "0xf4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Split locks in SQ",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c8fff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.L3_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all requests that hit in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c07f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c07f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0100",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswell/floating-point.json b/tools/perf/pmu-events/arch/x86/haswell/floating-point.json
new file mode 100644
index 000000000..f9843e5a9
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswell/floating-point.json
@@ -0,0 +1,83 @@
+[
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "Errata": "HSD56, HSM57",
+ "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "Errata": "HSD56, HSM57",
+ "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Note that a whole rep string only counts AVX_INST.ALL once.",
+ "EventCode": "0xC6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "AVX_INSTS.ALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Approximate counts of AVX & AVX2 256-bit instructions, including non-arithmetic instructions, loads, and stores. May count non-AVX instructions that employ 256-bit operations, including (but not necessarily limited to) rep string instructions that use 256-bit loads and stores for optimized performance, XSAVE* and XRSTOR*, and operations that transition the x87 FPU data registers between x87 and MMX.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of X87 FP assists due to output values.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_ASSIST.X87_OUTPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of X87 assists due to output value.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of X87 FP assists due to input values.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_ASSIST.X87_INPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of X87 assists due to input value.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SIMD FP assists due to output values.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "FP_ASSIST.SIMD_OUTPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of SIMD FP assists due to Output values",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SIMD FP assists due to input values.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_ASSIST.SIMD_INPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of SIMD FP assists due to input values",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with any input/output SSE* or FP assists.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1e",
+ "EventName": "FP_ASSIST.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles with any input/output SSE or FP assist",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswell/frontend.json b/tools/perf/pmu-events/arch/x86/haswell/frontend.json
new file mode 100644
index 000000000..c0a5bedcc
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswell/frontend.json
@@ -0,0 +1,294 @@
+[
+ {
+ "PublicDescription": "Counts cycles the IDQ is empty.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "HSD135",
+ "EventName": "IDQ.EMPTY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MITE path. Set Cmask = 1 to count cycles.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "IDQ.MS_DSB_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "IDQ.MS_DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EdgeDetect": "1",
+ "EventName": "IDQ.MS_DSB_OCCUR",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles DSB is delivered four uops. Set Cmask = 4.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles DSB is delivered at least one uops. Set Cmask = 1.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "IDQ.MS_MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering 4 Uops",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles MITE is delivered at least one uop. Set Cmask = 1.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering any Uop",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts uops delivered by the Front-end with the assistance of the microcode sequencer. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the Front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EdgeDetect": "1",
+ "EventName": "IDQ.MS_SWITCHES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of uops delivered to IDQ from any path.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3c",
+ "EventName": "IDQ.MITE_ALL_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ICACHE.HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts Instruction Cache (ICACHE) misses.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ICACHE.MISSES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes Uncacheable accesses.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ICACHE.IFETCH_STALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ICACHE.IFDATA_STALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event count the number of undelivered (unallocated) uops from the Front-end to the Resource Allocation Table (RAT) while the Back-end of the processor is not stalled. The Front-end can allocate up to 4 uops per cycle so this event can increment 0-4 times per cycle depending on the number of unallocated uops. This event is counted on a per-core basis.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD135",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts the number cycles during which the Front-end allocated exactly zero uops to the Resource Allocation Table (RAT) while the Back-end of the processor is not stalled. This event is counted on a per-core basis.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD135",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD135",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD135",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD135",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD135",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xAB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
new file mode 100644
index 000000000..5ab5c78fe
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
@@ -0,0 +1,158 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/haswell/memory.json b/tools/perf/pmu-events/arch/x86/haswell/memory.json
new file mode 100644
index 000000000..e5f9fa665
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswell/memory.json
@@ -0,0 +1,676 @@
+[
+ {
+ "PublicDescription": "Speculative cache-line split load uops dispatched to L1D.",
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MISALIGN_MEM_REF.LOADS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Speculative cache-line split store-address uops dispatched to L1D.",
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MISALIGN_MEM_REF.STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TX_MEM.ABORT_CONFLICT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional writes.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TX_EXEC.MISC1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "TX_EXEC.MISC2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "TX_EXEC.MISC3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "TX_EXEC.MISC4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "TX_EXEC.MISC5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of memory ordering machine clears detected. Memory ordering machine clears can result from memory address aliasing or snoops from another hardware thread or core to data inflight in the pipeline. Machine clears can have a significant performance impact if they are happening frequently.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "HLE_RETIRED.START",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution started.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "HLE_RETIRED.COMMIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution successfully committed.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "HLE_RETIRED.ABORTED",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "HLE_RETIRED.ABORTED_MISC1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "HLE_RETIRED.ABORTED_MISC2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to uncommon conditions.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "HLE_RETIRED.ABORTED_MISC3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "Errata": "HSD65",
+ "EventName": "HLE_RETIRED.ABORTED_MISC4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts).",
+ "EventCode": "0xc8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "HLE_RETIRED.ABORTED_MISC5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RTM_RETIRED.START",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution started.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RTM_RETIRED.COMMIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution successfully committed.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RTM_RETIRED.ABORTED",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RTM_RETIRED.ABORTED_MISC1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RTM_RETIRED.ABORTED_MISC2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "RTM_RETIRED.ABORTED_MISC3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "Errata": "HSD65",
+ "EventName": "RTM_RETIRED.ABORTED_MISC4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+ "EventCode": "0xc9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "RTM_RETIRED.ABORTED_MISC5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "HSD76, HSD25, HSM26",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Loads with latency value being above 4.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "HSD76, HSD25, HSM26",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Loads with latency value being above 8.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "HSD76, HSD25, HSM26",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Loads with latency value being above 16.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "HSD76, HSD25, HSM26",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Loads with latency value being above 32.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "HSD76, HSD25, HSM26",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2003",
+ "BriefDescription": "Loads with latency value being above 64.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "HSD76, HSD25, HSM26",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "1009",
+ "BriefDescription": "Loads with latency value being above 128.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "HSD76, HSD25, HSM26",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "503",
+ "BriefDescription": "Loads with latency value being above 256.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "UMask": "0x1",
+ "Errata": "HSD76, HSD25, HSM26",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "101",
+ "BriefDescription": "Loads with latency value being above 512.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc08fff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all requests that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01004007f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc007f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100400244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.L3_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc00244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch code reads that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100400122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc00122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100400091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc00091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc00200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_CODE_RD.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc00100",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc00080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc00040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc00020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc00010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100400004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc00004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100400002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc00002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100400001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc00001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss in the L3",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswell/other.json b/tools/perf/pmu-events/arch/x86/haswell/other.json
new file mode 100644
index 000000000..8a4d898d7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswell/other.json
@@ -0,0 +1,43 @@
+[
+ {
+ "PublicDescription": "Unhalted core cycles when the thread is in ring 0.",
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPL_CYCLES.RING0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "CPL_CYCLES.RING0_TRANS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of intervals between processor halts while thread is in ring 0.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.",
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPL_CYCLES.RING123",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.",
+ "EventCode": "0x63",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswell/pipeline.json b/tools/perf/pmu-events/arch/x86/haswell/pipeline.json
new file mode 100644
index 000000000..a4dcfce4a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswell/pipeline.json
@@ -0,0 +1,1338 @@
+[
+ {
+ "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. INST_RETIRED.ANY is counted by a designated fixed counter, leaving the programmable counters available for other events. Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 0",
+ "UMask": "0x1",
+ "Errata": "HSD140, HSD143",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired from execution.",
+ "CounterHTOff": "Fixed counter 0"
+ },
+ {
+ "PublicDescription": "This event counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when the thread is not in halt state.",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x2",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x3",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "CounterHTOff": "Fixed counter 2"
+ },
+ {
+ "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store. The penalty for blocked store forwarding is that the load must wait for the store to write its value to the cache before it can be issued.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "loads blocked by overlapping with store buffer that cannot be forwarded",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Aliasing occurs when a load is issued after a store and their memory addresses are offset by 4K. This event counts the number of loads that aliased with a preceding store, resulting in an extended address check in the pipeline which can have a performance impact.",
+ "EventCode": "0x07",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles spent waiting for a recovery after an event such as a processor nuke, JEClear, assist, hle/rtm abort etc.",
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "AnyThread": "1",
+ "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of uops issued by the Front-end of the pipeline to the Back-end. This event is counted at the allocation stage and will count both retired and non-retired uops.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x0E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of flags-merge uops allocated. Such uops add delay.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive; added by GSR u-arch.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of slow LEA or similar uops allocated. Such uop has 3 sources (for example, 2 sources + immediate) regardless of whether it is a result of LEA instruction or not.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_ISSUED.SLOW_LEA",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of multiply packed/scalar single precision uops allocated.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_ISSUED.SINGLE_MUL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ARITH.DIVIDER_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Any uop executed by the Divider. (This includes all divide uops, sqrt, ...)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Thread cycles when thread is not in halt state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increments at the frequency of XCLK (100 MHz) when not halted.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3c",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.",
+ "EventCode": "0x4c",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOAD_HIT_PRE.SW_PF",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for H/W prefetch.",
+ "EventCode": "0x4c",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_HIT_PRE.HW_PF",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of integer move elimination candidate uops that were eliminated.",
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SIMD move elimination candidate uops that were eliminated.",
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of integer move elimination candidate uops that were not eliminated.",
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SIMD move elimination candidate uops that were not eliminated.",
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles when the Reservation Station ( RS ) is empty for the thread. The RS is a structure that buffers allocated micro-ops from the Front-end. If there are many cycles when the RS is empty, it may represent an underflow of instructions delivered from the Front-end.",
+ "EventCode": "0x5E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles where the decoder is stalled on an instruction with a length changing prefix (LCP).",
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ILD_STALL.LCP",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Stall cycles due to IQ is full.",
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ILD_STALL.IQ_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Stall cycles because IQ is full",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not taken macro-conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired macro-conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x90",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired direct near calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa0",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc1",
+ "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired macro-conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc2",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc4",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired indirect branches excluding calls and returns.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc8",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired indirect return branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xd0",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired direct near calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa0",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc1",
+ "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc4",
+ "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Mispredicted indirect branches excluding calls and returns.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a uop is dispatched on port 0 in this thread.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 0",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when uops are exectuted in port 0.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are executed in port 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a uop is dispatched on port 1 in this thread.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when uops are exectuted in port 1.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are executed in port 1.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 1.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a uop is dispatched on port 2 in this thread.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a uop is dispatched on port 3 in this thread.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 3.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 3.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a uop is dispatched on port 4 in this thread.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when uops are exectuted in port 4.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are executed in port 4.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 4.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a uop is dispatched on port 5 in this thread.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 5",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when uops are exectuted in port 5.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are executed in port 5.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 5.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a uop is dispatched on port 6 in this thread.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_6",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when uops are exectuted in port 6.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are executed in port 6.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 6.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a uop is dispatched on port 7 in this thread.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_7",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 7",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 7.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 7.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles allocation is stalled due to resource related reason.",
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD135",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource-related stall cycles",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RESOURCE_STALLS.RS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which no instructions were allocated because no Store Buffers (SB) were available.",
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RESOURCE_STALLS.SB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RESOURCE_STALLS.ROB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to re-order buffer full.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with pending L2 miss loads. Set Cmask=2 to count cycle.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD78",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with pending L2 cache miss loads.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with pending memory loads. Set Cmask=2 to count cycle.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with pending memory loads.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which no instructions were executed in the execution stage of the pipeline.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of loads missed L2.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls due to L2 cache misses.",
+ "CounterMask": "5",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which no instructions were executed in the execution stage of the pipeline and there were memory instructions pending (waiting for data).",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x6",
+ "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls due to memory subsystem.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Cycles with pending L1 data cache miss loads. Set Cmask=8 to count cycle.",
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0x8",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with pending L1 cache miss loads.",
+ "CounterMask": "8",
+ "CounterHTOff": "2"
+ },
+ {
+ "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.",
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0xc",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls due to L1 data cache misses",
+ "CounterMask": "12",
+ "CounterHTOff": "2"
+ },
+ {
+ "PublicDescription": "Number of uops delivered by the LSD.",
+ "EventCode": "0xa8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD144, HSD30, HSM31",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This events counts the cycles where at least one uop was executed. It is counted per thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD144, HSD30, HSM31",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This events counts the cycles where at least two uop were executed. It is counted per thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD144, HSD30, HSM31",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This events counts the cycles where at least three uop were executed. It is counted per thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD144, HSD30, HSM31",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "HSD144, HSD30, HSM31",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts total number of uops to be executed per-core each cycle.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "HSD30, HSM31",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of uops executed on the core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "HSD30, HSM31",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "HSD30, HSM31",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "HSD30, HSM31",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "HSD30, HSM31",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "HSD30, HSM31",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of instructions at retirement.",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "Errata": "HSD11, HSD140",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.",
+ "EventCode": "0xC0",
+ "Counter": "1",
+ "UMask": "0x1",
+ "Errata": "HSD140",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+ "CounterHTOff": "1"
+ },
+ {
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "INST_RETIRED.X87",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions: Counts also flows that have several X87 or flows that use X87 uops in the exception handling.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.",
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Actually retired uops.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "CounterMask": "10",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retirement slots used.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACHINE_CLEARS.CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear. Machine clears can have a significant performance impact if they are happening frequently.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MACHINE_CLEARS.MASKMOV",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Branch instructions at retirement.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Conditional branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Direct and indirect near call instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Return instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of not taken branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Not taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of far branches retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Far branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Mispredicted branch instructions at retirement.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All mispredicted macro branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This event counts all mispredicted branch instructions retired. This is a precise event.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted macro branch instructions retired.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Count cases of saving new LBR records by hardware.",
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count cases of saving new LBR",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of front end re-steers due to BPU misprediction.",
+ "EventCode": "0xe6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "EventName": "BACLEARS.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswell/uncore.json b/tools/perf/pmu-events/arch/x86/haswell/uncore.json
new file mode 100644
index 000000000..3ef5c21fe
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswell/uncore.json
@@ -0,0 +1,374 @@
+[
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x21",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EXTERNAL",
+ "BriefDescription": "An external snoop misses in some processor core.",
+ "PublicDescription": "An external snoop misses in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x41",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE",
+ "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+ "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x81",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION",
+ "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+ "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x24",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_EXTERNAL",
+ "BriefDescription": "An external snoop hits a non-modified line in some processor core.",
+ "PublicDescription": "An external snoop hits a non-modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x44",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE",
+ "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+ "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x84",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_EVICTION",
+ "BriefDescription": "A cross-core snoop resulted from L3 Eviction which hits a non-modified line in some processor core.",
+ "PublicDescription": "A cross-core snoop resulted from L3 Eviction which hits a non-modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x28",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_EXTERNAL",
+ "BriefDescription": "An external snoop hits a modified line in some processor core.",
+ "PublicDescription": "An external snoop hits a modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x48",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE",
+ "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+ "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x88",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_EVICTION",
+ "BriefDescription": "A cross-core snoop resulted from L3 Eviction which hits a modified line in some processor core.",
+ "PublicDescription": "A cross-core snoop resulted from L3 Eviction which hits a modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x11",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_M",
+ "BriefDescription": "L3 Lookup read request that access cache and found line in M-state.",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in M-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x21",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M",
+ "BriefDescription": "L3 Lookup write request that access cache and found line in M-state.",
+ "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x41",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_M",
+ "BriefDescription": "L3 Lookup external snoop request that access cache and found line in M-state.",
+ "PublicDescription": "L3 Lookup external snoop request that access cache and found line in M-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x81",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M",
+ "BriefDescription": "L3 Lookup any request that access cache and found line in M-state.",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x18",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I",
+ "BriefDescription": "L3 Lookup read request that access cache and found line in I-state.",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x28",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_I",
+ "BriefDescription": "L3 Lookup write request that access cache and found line in I-state.",
+ "PublicDescription": "L3 Lookup write request that access cache and found line in I-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x48",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_I",
+ "BriefDescription": "L3 Lookup external snoop request that access cache and found line in I-state.",
+ "PublicDescription": "L3 Lookup external snoop request that access cache and found line in I-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x88",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I",
+ "BriefDescription": "L3 Lookup any request that access cache and found line in I-state.",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x1f",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI",
+ "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state.",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x2f",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI",
+ "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state.",
+ "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x4f",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_MESI",
+ "BriefDescription": "L3 Lookup external snoop request that access cache and found line in MESI-state.",
+ "PublicDescription": "L3 Lookup external snoop request that access cache and found line in MESI-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x8f",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI",
+ "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state.",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x86",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES",
+ "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state.",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x46",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_ES",
+ "BriefDescription": "L3 Lookup external snoop request that access cache and found line in E or S-state.",
+ "PublicDescription": "L3 Lookup external snoop request that access cache and found line in E or S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x16",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES",
+ "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state.",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x26",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES",
+ "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state.",
+ "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x80",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+ "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+ "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+ "Counter": "0",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x81",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+ "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+ "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x81",
+ "UMask": "0x20",
+ "EventName": "UNC_ARB_TRK_REQUESTS.WRITES",
+ "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+ "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x83",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_COH_TRK_OCCUPANCY.All",
+ "BriefDescription": "Each cycle count number of valid entries in Coherency Tracker queue from allocation till deallocation. Aperture requests (snoops) appear as NC decoded internally and become coherent (snoop L3, access memory)",
+ "PublicDescription": "Each cycle count number of valid entries in Coherency Tracker queue from allocation till deallocation. Aperture requests (snoops) appear as NC decoded internally and become coherent (snoop L3, access memory).",
+ "Counter": "0",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x84",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+ "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+ "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "NCU",
+ "EventCode": "0x0",
+ "UMask": "0x01",
+ "EventName": "UNC_CLOCK.SOCKET",
+ "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+ "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+ "Counter": "FIXED",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswell/virtual-memory.json b/tools/perf/pmu-events/arch/x86/haswell/virtual-memory.json
new file mode 100644
index 000000000..777b500a5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswell/virtual-memory.json
@@ -0,0 +1,484 @@
+[
+ {
+ "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Completed page walks due to demand load misses that caused 4K page walks in any TLB levels.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Completed page walks due to demand load misses that caused 2M/4M page walks in any TLB levels.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Completed page walks in any TLB of any page size due to demand load misses.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by DTLB load misses.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts load operations from a 4K page that miss the first DTLB level but hit the second and do not cause page walks.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT_4K",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (4K)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts load operations from a 2M page that miss the first DTLB level but hit the second and do not cause page walks.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT_2M",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (2M)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of cache load STLB hits. No page walk.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x60",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "DTLB demand load misses with low part of linear-to-physical address translation missed.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "DTLB_LOAD_MISSES.PDE_CACHE_MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DTLB demand load misses with low part of linear-to-physical address translation missed",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Miss in all TLB levels causes a page walk of any page size (4K/2M/4M/1G).",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Completed page walks due to store misses in one or more TLB levels of 4K page structure.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Completed page walks due to store misses in one or more TLB levels of 2M/4M page structure.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks. (1G)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Completed page walks due to store miss in any TLB levels of any page size (4K/2M/4M/1G).",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by DTLB store misses.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts store operations from a 4K page that miss the first DTLB level but hit the second and do not cause page walks.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT_4K",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (4K)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts store operations from a 2M page that miss the first DTLB level but hit the second and do not cause page walks.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT_2M",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (2M)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x60",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "DTLB store misses with low part of linear-to-physical address translation missed.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "DTLB_STORE_MISSES.PDE_CACHE_MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DTLB store misses with low part of linear-to-physical address translation missed",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4f",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "EPT.WALK_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycle count for an Extended Page table walk.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Misses in ITLB that causes a page walk of any page size.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Misses at all ITLB levels that cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Completed page walks due to misses in ITLB 4K page entries.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Completed page walks due to misses in ITLB 2M/4M page entries.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Completed page walks in ITLB of any page size.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Misses in all ITLB levels that cause completed page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by ITLB misses.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "ITLB_MISSES.WALK_DURATION",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "ITLB misses that hit STLB (4K).",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ITLB_MISSES.STLB_HIT_4K",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core misses that miss the DTLB and hit the STLB (4K)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "ITLB misses that hit STLB (2M).",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "ITLB_MISSES.STLB_HIT_2M",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Code misses that miss the DTLB and hit the STLB (2M)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "ITLB misses that hit STLB. No page walk.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x60",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of ITLB flushes, includes 4k/2M/4M pages.",
+ "EventCode": "0xae",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB.ITLB_FLUSH",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of DTLB page walker loads that hit in the L1+FB.",
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x11",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of DTLB page walker hits in the L1+FB",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of DTLB page walker loads that hit in the L2.",
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x12",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of DTLB page walker hits in the L2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of DTLB page walker loads that hit in the L3.",
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x14",
+ "Errata": "HSD25",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of DTLB page walker loads from memory.",
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "Errata": "HSD25",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of DTLB page walker hits in Memory",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of ITLB page walker loads that hit in the L1+FB.",
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of ITLB page walker hits in the L1+FB",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of ITLB page walker loads that hit in the L2.",
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x22",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of ITLB page walker hits in the L2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of ITLB page walker loads that hit in the L3.",
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "Errata": "HSD25",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of ITLB page walker loads from memory.",
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x28",
+ "Errata": "HSD25",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_MEMORY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of ITLB page walker hits in Memory",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L1 and FB.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x42",
+ "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L2.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x44",
+ "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L3.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x48",
+ "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_MEMORY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in memory.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L1 and FB.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_MEMORY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in memory.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "DTLB flush attempts of the thread-specific entries.",
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Count number of STLB flush attempts.",
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "TLB_FLUSH.STLB_ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "STLB flush attempts",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/cache.json b/tools/perf/pmu-events/arch/x86/haswellx/cache.json
new file mode 100644
index 000000000..b2fbd6173
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/cache.json
@@ -0,0 +1,1100 @@
+[
+ {
+ "EventCode": "0x24",
+ "UMask": "0x21",
+ "BriefDescription": "Demand Data Read miss L2, no rejects",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "Errata": "HSD78",
+ "PublicDescription": "Demand data read requests that missed L2, no rejects.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x22",
+ "BriefDescription": "RFO requests that miss L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x24",
+ "BriefDescription": "L2 cache misses when fetching instructions",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "PublicDescription": "Number of instruction fetches that missed the L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x27",
+ "BriefDescription": "Demand requests that miss L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+ "Errata": "HSD78",
+ "PublicDescription": "Demand requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x30",
+ "BriefDescription": "L2 prefetch requests that miss L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.L2_PF_MISS",
+ "PublicDescription": "Counts all L2 HW prefetcher requests that missed L2.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x3f",
+ "BriefDescription": "All requests that miss L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.MISS",
+ "Errata": "HSD78",
+ "PublicDescription": "All requests that missed L2.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x41",
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "Errata": "HSD78",
+ "PublicDescription": "Demand data read requests that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x42",
+ "BriefDescription": "RFO requests that hit L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x44",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "PublicDescription": "Number of instruction fetches that hit the L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x50",
+ "BriefDescription": "L2 prefetch requests that hit L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.L2_PF_HIT",
+ "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe1",
+ "BriefDescription": "Demand Data Read requests",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "Errata": "HSD78",
+ "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe2",
+ "BriefDescription": "RFO requests to L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "PublicDescription": "Counts all L2 store RFO requests.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe4",
+ "BriefDescription": "L2 code requests",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "PublicDescription": "Counts all L2 code requests.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe7",
+ "BriefDescription": "Demand requests to L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+ "Errata": "HSD78",
+ "PublicDescription": "Demand requests to L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xf8",
+ "BriefDescription": "Requests from L2 hardware prefetchers",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_PF",
+ "PublicDescription": "Counts all L2 HW prefetcher requests.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xff",
+ "BriefDescription": "All L2 requests",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "Errata": "HSD78",
+ "PublicDescription": "All requests to L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x27",
+ "UMask": "0x50",
+ "BriefDescription": "Not rejected writebacks that hit L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_DEMAND_RQSTS.WB_HIT",
+ "PublicDescription": "Not rejected writebacks that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x2E",
+ "UMask": "0x41",
+ "BriefDescription": "Core-originated cacheable demand requests missed L3",
+ "Counter": "0,1,2,3",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PublicDescription": "This event counts each cache miss condition for references to the last level cache.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x2E",
+ "UMask": "0x4f",
+ "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+ "Counter": "0,1,2,3",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "PublicDescription": "This event counts requests originating from the core that reference a cache line in the last level cache.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x1",
+ "BriefDescription": "L1D miss oustandings duration in cycles",
+ "Counter": "2",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "PublicDescription": "Increments the number of outstanding L1D misses every cycle. Set Cmask = 1 and Edge =1 to count occurrences.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "Counter": "2",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+ "Counter": "2",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+ "AnyThread": "1",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch. HWP are e.",
+ "Counter": "0,1,2,3",
+ "EventName": "L1D_PEND_MISS.REQUEST_FB_FULL",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+ "Counter": "0,1,2,3",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x51",
+ "UMask": "0x1",
+ "BriefDescription": "L1D data line replacements",
+ "Counter": "0,1,2,3",
+ "EventName": "L1D.REPLACEMENT",
+ "PublicDescription": "This event counts when new data lines are brought into the L1 Data cache, which cause other lines to be evicted from the cache.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "Errata": "HSD78, HSD62, HSD61",
+ "PublicDescription": "Offcore outstanding demand data read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "CounterMask": "1",
+ "Errata": "HSD78, HSD62, HSD61",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+ "CounterMask": "6",
+ "Errata": "HSD78, HSD62, HSD61",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x2",
+ "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+ "Errata": "HSD62, HSD61",
+ "PublicDescription": "Offcore outstanding Demand code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "Errata": "HSD62, HSD61",
+ "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "CounterMask": "1",
+ "Errata": "HSD62, HSD61",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "Errata": "HSD62, HSD61",
+ "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "CounterMask": "1",
+ "Errata": "HSD62, HSD61",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x63",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles when L1D is locked",
+ "Counter": "0,1,2,3",
+ "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+ "PublicDescription": "Cycles in which the L1D is locked.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x1",
+ "BriefDescription": "Demand Data Read requests sent to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "Errata": "HSD78",
+ "PublicDescription": "Demand data read requests sent to uncore.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x2",
+ "BriefDescription": "Cacheable and noncachaeble code read requests",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "PublicDescription": "Demand code read requests sent to uncore.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x4",
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "PublicDescription": "Demand RFO read requests sent to uncore, including regular RFOs, locks, ItoM.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x8",
+ "BriefDescription": "Demand and prefetch data reads",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "PublicDescription": "Data read requests sent to uncore (demand and prefetch).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb2",
+ "UMask": "0x1",
+ "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x11",
+ "BriefDescription": "Retired load uops that miss the STLB. (precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+ "Errata": "HSD29, HSM30",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x12",
+ "BriefDescription": "Retired store uops that miss the STLB. (precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+ "Errata": "HSD29, HSM30",
+ "L1_Hit_Indication": "1",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x21",
+ "BriefDescription": "Retired load uops with locked access. (precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+ "Errata": "HSD76, HSD29, HSM30",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x41",
+ "BriefDescription": "Retired load uops that split across a cacheline boundary. (precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "Errata": "HSD29, HSM30",
+ "PublicDescription": "This event counts load uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x42",
+ "BriefDescription": "Retired store uops that split across a cacheline boundary. (precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+ "Errata": "HSD29, HSM30",
+ "L1_Hit_Indication": "1",
+ "PublicDescription": "This event counts store uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x81",
+ "BriefDescription": "All retired load uops. (precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "Errata": "HSD29, HSM30",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x82",
+ "BriefDescription": "All retired store uops. (precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "Errata": "HSD29, HSM30",
+ "L1_Hit_Indication": "1",
+ "PublicDescription": "This event counts all store uops retired. This is a precise event.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x1",
+ "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+ "Errata": "HSD29, HSM30",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x2",
+ "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "Errata": "HSD76, HSD29, HSM30",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x4",
+ "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
+ "PublicDescription": "This event counts retired load uops in which data sources were data hits in the L3 cache without snoops required. This does not include hardware prefetches. This is a precise event.",
+ "SampleAfterValue": "50021",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x8",
+ "BriefDescription": "Retired load uops misses in L1 cache as data sources.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+ "Errata": "HSM30",
+ "PublicDescription": "This event counts retired load uops in which data sources missed in the L1 cache. This does not include hardware prefetches. This is a precise event.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x10",
+ "BriefDescription": "Retired load uops with L2 cache misses as data sources.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+ "Errata": "HSD29, HSM30",
+ "SampleAfterValue": "50021",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x20",
+ "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS",
+ "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x40",
+ "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+ "Errata": "HSM30",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x1",
+ "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS",
+ "Errata": "HSD29, HSD25, HSM26, HSM30",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x2",
+ "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. ",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT",
+ "Errata": "HSD29, HSD25, HSM26, HSM30",
+ "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HIT in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x4",
+ "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. ",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM",
+ "Errata": "HSD29, HSD25, HSM26, HSM30",
+ "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HITM (hit modified) in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x8",
+ "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE",
+ "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x1",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM",
+ "Errata": "HSD74, HSD29, HSD25, HSM30",
+ "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x4",
+ "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM",
+ "Errata": "HSD29, HSM30",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x10",
+ "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM",
+ "Errata": "HSM30",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x20",
+ "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD",
+ "Errata": "HSM30",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xf0",
+ "UMask": "0x1",
+ "BriefDescription": "Demand Data Read requests that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.DEMAND_DATA_RD",
+ "PublicDescription": "Demand data read requests that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xf0",
+ "UMask": "0x2",
+ "BriefDescription": "RFO requests that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.RFO",
+ "PublicDescription": "RFO requests that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xf0",
+ "UMask": "0x4",
+ "BriefDescription": "L2 cache accesses when fetching instructions",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.CODE_RD",
+ "PublicDescription": "L2 cache accesses when fetching instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xf0",
+ "UMask": "0x8",
+ "BriefDescription": "L2 or L3 HW prefetches that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.ALL_PF",
+ "PublicDescription": "Any MLC or L3 HW prefetch accessing L2, including rejects.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xf0",
+ "UMask": "0x10",
+ "BriefDescription": "L1D writebacks that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.L1D_WB",
+ "PublicDescription": "L1D writebacks that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xf0",
+ "UMask": "0x20",
+ "BriefDescription": "L2 fill requests that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.L2_FILL",
+ "PublicDescription": "L2 fill requests that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xf0",
+ "UMask": "0x40",
+ "BriefDescription": "L2 writebacks that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.L2_WB",
+ "PublicDescription": "L2 writebacks that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xf0",
+ "UMask": "0x80",
+ "BriefDescription": "Transactions accessing L2 pipe",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.ALL_REQUESTS",
+ "PublicDescription": "Transactions accessing L2 pipe.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "UMask": "0x1",
+ "BriefDescription": "L2 cache lines in I state filling L2",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_IN.I",
+ "PublicDescription": "L2 cache lines in I state filling L2.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "UMask": "0x2",
+ "BriefDescription": "L2 cache lines in S state filling L2",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_IN.S",
+ "PublicDescription": "L2 cache lines in S state filling L2.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "UMask": "0x4",
+ "BriefDescription": "L2 cache lines in E state filling L2",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_IN.E",
+ "PublicDescription": "L2 cache lines in E state filling L2.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "UMask": "0x7",
+ "BriefDescription": "L2 cache lines filling L2",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_IN.ALL",
+ "PublicDescription": "This event counts the number of L2 cache lines brought into the L2 cache. Lines are filled into the L2 cache when there was an L2 miss.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "UMask": "0x5",
+ "BriefDescription": "Clean L2 cache lines evicted by demand",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "PublicDescription": "Clean L2 cache lines evicted by demand.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "UMask": "0x6",
+ "BriefDescription": "Dirty L2 cache lines evicted by demand",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+ "PublicDescription": "Dirty L2 cache lines evicted by demand.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xf4",
+ "UMask": "0x10",
+ "BriefDescription": "Split locks in SQ",
+ "Counter": "0,1,2,3",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "PublicDescription": "",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "MSRValue": "0x04003c0001",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "MSRValue": "0x10003c0001",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "MSRValue": "0x04003c0002",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "MSRValue": "0x10003c0002",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "MSRValue": "0x04003c0004",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "MSRValue": "0x10003c0004",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3",
+ "MSRValue": "0x3f803c0010",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3",
+ "MSRValue": "0x3f803c0020",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3",
+ "MSRValue": "0x3f803c0040",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3",
+ "MSRValue": "0x3f803c0080",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3",
+ "MSRValue": "0x3f803c0100",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3",
+ "MSRValue": "0x3f803c0200",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "MSRValue": "0x04003c0091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "MSRValue": "0x10003c0091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "MSRValue": "0x04003c0122",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "MSRValue": "0x10003c0122",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "MSRValue": "0x04003c0244",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "MSRValue": "0x04003c07f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "MSRValue": "0x10003c07f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all requests that hit in the L3",
+ "MSRValue": "0x3f803c8fff",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/floating-point.json b/tools/perf/pmu-events/arch/x86/haswellx/floating-point.json
new file mode 100644
index 000000000..bc08cc1f2
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/floating-point.json
@@ -0,0 +1,83 @@
+[
+ {
+ "EventCode": "0xC1",
+ "UMask": "0x8",
+ "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+ "Counter": "0,1,2,3",
+ "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+ "Errata": "HSD56, HSM57",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "UMask": "0x10",
+ "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+ "Counter": "0,1,2,3",
+ "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+ "Errata": "HSD56, HSM57",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x7",
+ "BriefDescription": "Approximate counts of AVX & AVX2 256-bit instructions, including non-arithmetic instructions, loads, and stores. May count non-AVX instructions that employ 256-bit operations, including (but not necessarily limited to) rep string instructions that use 256-bit loads and stores for optimized performance, XSAVE* and XRSTOR*, and operations that transition the x87 FPU data registers between x87 and MMX.",
+ "Counter": "0,1,2,3",
+ "EventName": "AVX_INSTS.ALL",
+ "PublicDescription": "Note that a whole rep string only counts AVX_INST.ALL once.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x2",
+ "BriefDescription": "Number of X87 assists due to output value.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.X87_OUTPUT",
+ "PublicDescription": "Number of X87 FP assists due to output values.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x4",
+ "BriefDescription": "Number of X87 assists due to input value.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.X87_INPUT",
+ "PublicDescription": "Number of X87 FP assists due to input values.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x8",
+ "BriefDescription": "Number of SIMD FP assists due to Output values",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.SIMD_OUTPUT",
+ "PublicDescription": "Number of SIMD FP assists due to output values.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x10",
+ "BriefDescription": "Number of SIMD FP assists due to input values",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.SIMD_INPUT",
+ "PublicDescription": "Number of SIMD FP assists due to input values.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x1e",
+ "BriefDescription": "Cycles with any input/output SSE or FP assist",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.ANY",
+ "CounterMask": "1",
+ "PublicDescription": "Cycles with any input/output SSE* or FP assists.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/frontend.json b/tools/perf/pmu-events/arch/x86/haswellx/frontend.json
new file mode 100644
index 000000000..a4d9f1fcf
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/frontend.json
@@ -0,0 +1,294 @@
+[
+ {
+ "EventCode": "0x79",
+ "UMask": "0x2",
+ "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.EMPTY",
+ "Errata": "HSD135",
+ "PublicDescription": "Counts cycles the IDQ is empty.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x4",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MITE_UOPS",
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MITE path. Set Cmask = 1 to count cycles.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MITE_CYCLES",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x8",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.DSB_UOPS",
+ "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.DSB_CYCLES",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x10",
+ "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_DSB_UOPS",
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_DSB_CYCLES",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "UMask": "0x10",
+ "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_DSB_OCCUR",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x18",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "CounterMask": "4",
+ "PublicDescription": "Counts cycles DSB is delivered four uops. Set Cmask = 4.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x18",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+ "CounterMask": "1",
+ "PublicDescription": "Counts cycles DSB is delivered at least one uops. Set Cmask = 1.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x20",
+ "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_MITE_UOPS",
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x24",
+ "BriefDescription": "Cycles MITE is delivering 4 Uops",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+ "CounterMask": "4",
+ "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x24",
+ "BriefDescription": "Cycles MITE is delivering any Uop",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+ "CounterMask": "1",
+ "PublicDescription": "Counts cycles MITE is delivered at least one uop. Set Cmask = 1.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x30",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_UOPS",
+ "PublicDescription": "This event counts uops delivered by the Front-end with the assistance of the microcode sequencer. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x30",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the Front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "UMask": "0x30",
+ "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_SWITCHES",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x3c",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MITE_ALL_UOPS",
+ "PublicDescription": "Number of uops delivered to IDQ from any path.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "UMask": "0x1",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE.HIT",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "UMask": "0x2",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes Uncacheable accesses.",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE.MISSES",
+ "PublicDescription": "This event counts Instruction Cache (ICACHE) misses.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE.IFETCH_STALL",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE.IFDATA_STALL",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "Errata": "HSD135",
+ "PublicDescription": "This event count the number of undelivered (unallocated) uops from the Front-end to the Resource Allocation Table (RAT) while the Back-end of the processor is not stalled. The Front-end can allocate up to 4 uops per cycle so this event can increment 0-4 times per cycle depending on the number of unallocated uops. This event is counted on a per-core basis.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "CounterMask": "4",
+ "Errata": "HSD135",
+ "PublicDescription": "This event counts the number cycles during which the Front-end allocated exactly zero uops to the Resource Allocation Table (RAT) while the Back-end of the processor is not stalled. This event is counted on a per-core basis.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+ "CounterMask": "3",
+ "Errata": "HSD135",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+ "CounterMask": "2",
+ "Errata": "HSD135",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+ "CounterMask": "1",
+ "Errata": "HSD135",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "CounterMask": "1",
+ "Errata": "HSD135",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xAB",
+ "UMask": "0x2",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+ "Counter": "0,1,2,3",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
new file mode 100644
index 000000000..5ab5c78fe
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
@@ -0,0 +1,158 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , IDQ.MITE_UOPS / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 16 * ( ICACHE.HIT + ICACHE.MISSES ) / 4.0 ) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "( UOPS_EXECUTED.CORE / 2 / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@) ) if #SMT_on else UOPS_EXECUTED.CORE / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@)",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFDATA_STALL - (( 14 * ITLB_MISSES.STLB_HIT + ITLB_MISSES.WALK_DURATION )) ) / RS_EVENTS.EMPTY_END)",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/memory.json b/tools/perf/pmu-events/arch/x86/haswellx/memory.json
new file mode 100644
index 000000000..56b0f24b8
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/memory.json
@@ -0,0 +1,767 @@
+[
+ {
+ "EventCode": "0x05",
+ "UMask": "0x1",
+ "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "MISALIGN_MEM_REF.LOADS",
+ "PublicDescription": "Speculative cache-line split load uops dispatched to L1D.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x05",
+ "UMask": "0x2",
+ "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "MISALIGN_MEM_REF.STORES",
+ "PublicDescription": "Speculative cache-line split store-address uops dispatched to L1D.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x1",
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_CONFLICT",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional writes.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x4",
+ "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x8",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x10",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x20",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x1",
+ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x2",
+ "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x4",
+ "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC3",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x8",
+ "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC4",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x10",
+ "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC5",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x2",
+ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "PublicDescription": "This event counts the number of memory ordering machine clears detected. Memory ordering machine clears can result from memory address aliasing or snoops from another hardware thread or core to data inflight in the pipeline. Machine clears can have a significant performance impact if they are happening frequently.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "UMask": "0x1",
+ "BriefDescription": "Number of times an HLE execution started.",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.START",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times an HLE execution successfully committed.",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.COMMIT",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x4",
+ "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one).",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x8",
+ "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x10",
+ "BriefDescription": "Number of times an HLE execution aborted due to uncommon conditions.",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x20",
+ "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions.",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC3",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC4",
+ "Errata": "HSD65",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc8",
+ "UMask": "0x80",
+ "BriefDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts)",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MISC5",
+ "PublicDescription": "Number of times an HLE execution aborted due to none of the previous 4 categories (e.g. interrupts).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC9",
+ "UMask": "0x1",
+ "BriefDescription": "Number of times an RTM execution started.",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.START",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times an RTM execution successfully committed.",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.COMMIT",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x4",
+ "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one).",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x8",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC1",
+ "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x10",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x20",
+ "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC3",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC4",
+ "Errata": "HSD65",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xc9",
+ "UMask": "0x80",
+ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MISC5",
+ "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 4.",
+ "PEBS": "2",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "Errata": "HSD76, HSD25, HSM26",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 8.",
+ "PEBS": "2",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "Errata": "HSD76, HSD25, HSM26",
+ "TakenAlone": "1",
+ "SampleAfterValue": "50021",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 16.",
+ "PEBS": "2",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "Errata": "HSD76, HSD25, HSM26",
+ "TakenAlone": "1",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 32.",
+ "PEBS": "2",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "Errata": "HSD76, HSD25, HSM26",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 64.",
+ "PEBS": "2",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "Errata": "HSD76, HSD25, HSM26",
+ "TakenAlone": "1",
+ "SampleAfterValue": "2003",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 128.",
+ "PEBS": "2",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "Errata": "HSD76, HSD25, HSM26",
+ "TakenAlone": "1",
+ "SampleAfterValue": "1009",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 256.",
+ "PEBS": "2",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "Errata": "HSD76, HSD25, HSM26",
+ "TakenAlone": "1",
+ "SampleAfterValue": "503",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Loads with latency value being above 512.",
+ "PEBS": "2",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "Errata": "HSD76, HSD25, HSM26",
+ "TakenAlone": "1",
+ "SampleAfterValue": "101",
+ "CounterHTOff": "3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that miss in the L3",
+ "MSRValue": "0x3fbfc00001",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram",
+ "MSRValue": "0x0600400001",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3",
+ "MSRValue": "0x3fbfc00002",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram",
+ "MSRValue": "0x0600400002",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache",
+ "MSRValue": "0x103fc00002",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that miss in the L3",
+ "MSRValue": "0x3fbfc00004",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram",
+ "MSRValue": "0x0600400004",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3",
+ "MSRValue": "0x3fbfc00010",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3",
+ "MSRValue": "0x3fbfc00020",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3",
+ "MSRValue": "0x3fbfc00040",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3",
+ "MSRValue": "0x3fbfc00080",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3",
+ "MSRValue": "0x3fbfc00100",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3",
+ "MSRValue": "0x3fbfc00200",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3",
+ "MSRValue": "0x3fbfc00091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram",
+ "MSRValue": "0x0600400091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram",
+ "MSRValue": "0x063f800091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache",
+ "MSRValue": "0x103fc00091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache",
+ "MSRValue": "0x083fc00091",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3",
+ "MSRValue": "0x3fbfc00122",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram",
+ "MSRValue": "0x0600400122",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch code reads that miss in the L3",
+ "MSRValue": "0x3fbfc00244",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram",
+ "MSRValue": "0x0600400244",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3",
+ "MSRValue": "0x3fbfc007f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram",
+ "MSRValue": "0x06004007f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram",
+ "MSRValue": "0x063f8007f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache",
+ "MSRValue": "0x103fc007f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache",
+ "MSRValue": "0x083fc007f7",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all requests that miss in the L3",
+ "MSRValue": "0x3fbfc08fff",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/other.json b/tools/perf/pmu-events/arch/x86/haswellx/other.json
new file mode 100644
index 000000000..800e65df3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/other.json
@@ -0,0 +1,43 @@
+[
+ {
+ "EventCode": "0x5C",
+ "UMask": "0x1",
+ "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+ "Counter": "0,1,2,3",
+ "EventName": "CPL_CYCLES.RING0",
+ "PublicDescription": "Unhalted core cycles when the thread is in ring 0.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0x5C",
+ "UMask": "0x1",
+ "BriefDescription": "Number of intervals between processor halts while thread is in ring 0.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPL_CYCLES.RING0_TRANS",
+ "CounterMask": "1",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5C",
+ "UMask": "0x2",
+ "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+ "Counter": "0,1,2,3",
+ "EventName": "CPL_CYCLES.RING123",
+ "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x63",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+ "Counter": "0,1,2,3",
+ "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+ "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json b/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json
new file mode 100644
index 000000000..8a18bfe9e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json
@@ -0,0 +1,1338 @@
+[
+ {
+ "EventCode": "0x00",
+ "UMask": "0x1",
+ "BriefDescription": "Instructions retired from execution.",
+ "Counter": "Fixed counter 0",
+ "EventName": "INST_RETIRED.ANY",
+ "Errata": "HSD140, HSD143",
+ "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. INST_RETIRED.ANY is counted by a designated fixed counter, leaving the programmable counters available for other events. Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 0"
+ },
+ {
+ "EventCode": "0x00",
+ "UMask": "0x2",
+ "BriefDescription": "Core cycles when the thread is not in halt state.",
+ "Counter": "Fixed counter 1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "PublicDescription": "This event counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "EventCode": "0x00",
+ "UMask": "0x2",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "Counter": "Fixed counter 1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "EventCode": "0x00",
+ "UMask": "0x3",
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "Counter": "Fixed counter 2",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 2"
+ },
+ {
+ "EventCode": "0x03",
+ "UMask": "0x2",
+ "BriefDescription": "loads blocked by overlapping with store buffer that cannot be forwarded",
+ "Counter": "0,1,2,3",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store. The penalty for blocked store forwarding is that the load must wait for the store to write its value to the cache before it can be issued.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x03",
+ "UMask": "0x8",
+ "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
+ "Counter": "0,1,2,3",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x07",
+ "UMask": "0x1",
+ "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+ "Counter": "0,1,2,3",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "PublicDescription": "Aliasing occurs when a load is issued after a store and their memory addresses are offset by 4K. This event counts the number of loads that aliased with a preceding store, resulting in an extended address check in the pipeline which can have a performance impact.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "UMask": "0x3",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+ "Counter": "0,1,2,3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This event counts the number of cycles spent waiting for a recovery after an event such as a processor nuke, JEClear, assist, hle/rtm abort etc.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "UMask": "0x3",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)",
+ "Counter": "0,1,2,3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+ "AnyThread": "1",
+ "CounterMask": "1",
+ "PublicDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x1",
+ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.ANY",
+ "PublicDescription": "This event counts the number of uops issued by the Front-end of the pipeline to the Back-end. This event is counted at the allocation stage and will count both retired and non-retired uops.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0x0E",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0x0E",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+ "AnyThread": "1",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x10",
+ "BriefDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive; added by GSR u-arch.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+ "PublicDescription": "Number of flags-merge uops allocated. Such uops add delay.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x20",
+ "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.SLOW_LEA",
+ "PublicDescription": "Number of slow LEA or similar uops allocated. Such uop has 3 sources (for example, 2 sources + immediate) regardless of whether it is a result of LEA instruction or not.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x40",
+ "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.SINGLE_MUL",
+ "PublicDescription": "Number of multiply packed/scalar single precision uops allocated.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x14",
+ "UMask": "0x2",
+ "BriefDescription": "Any uop executed by the Divider. (This includes all divide uops, sqrt, ...)",
+ "Counter": "0,1,2,3",
+ "EventName": "ARITH.DIVIDER_UOPS",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x0",
+ "BriefDescription": "Thread cycles when thread is not in halt state",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x0",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+ "PublicDescription": "Increments at the frequency of XCLK (100 MHz) when not halted.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+ "AnyThread": "1",
+ "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+ "AnyThread": "1",
+ "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3c",
+ "UMask": "0x2",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x2",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4c",
+ "UMask": "0x1",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+ "Counter": "0,1,2,3",
+ "EventName": "LOAD_HIT_PRE.SW_PF",
+ "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4c",
+ "UMask": "0x2",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+ "Counter": "0,1,2,3",
+ "EventName": "LOAD_HIT_PRE.HW_PF",
+ "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for H/W prefetch.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "UMask": "0x1",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+ "Counter": "0,1,2,3",
+ "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+ "PublicDescription": "Number of integer move elimination candidate uops that were eliminated.",
+ "SampleAfterValue": "1000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "UMask": "0x2",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+ "Counter": "0,1,2,3",
+ "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+ "PublicDescription": "Number of SIMD move elimination candidate uops that were eliminated.",
+ "SampleAfterValue": "1000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "UMask": "0x4",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+ "Counter": "0,1,2,3",
+ "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+ "PublicDescription": "Number of integer move elimination candidate uops that were not eliminated.",
+ "SampleAfterValue": "1000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "UMask": "0x8",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+ "Counter": "0,1,2,3",
+ "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+ "PublicDescription": "Number of SIMD move elimination candidate uops that were not eliminated.",
+ "SampleAfterValue": "1000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5E",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+ "Counter": "0,1,2,3",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "PublicDescription": "This event counts cycles when the Reservation Station ( RS ) is empty for the thread. The RS is a structure that buffers allocated micro-ops from the Front-end. If there are many cycles when the RS is empty, it may represent an underflow of instructions delivered from the Front-end.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "Invert": "1",
+ "EventCode": "0x5E",
+ "UMask": "0x1",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+ "Counter": "0,1,2,3",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "CounterMask": "1",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x87",
+ "UMask": "0x1",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "Counter": "0,1,2,3",
+ "EventName": "ILD_STALL.LCP",
+ "PublicDescription": "This event counts cycles where the decoder is stalled on an instruction with a length changing prefix (LCP).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x87",
+ "UMask": "0x4",
+ "BriefDescription": "Stall cycles because IQ is full",
+ "Counter": "0,1,2,3",
+ "EventName": "ILD_STALL.IQ_FULL",
+ "PublicDescription": "Stall cycles due to IQ is full.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x41",
+ "BriefDescription": "Not taken macro-conditional branches.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x81",
+ "BriefDescription": "Taken speculative and retired macro-conditional branches.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x82",
+ "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x84",
+ "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x88",
+ "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0x90",
+ "BriefDescription": "Taken speculative and retired direct near calls.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xa0",
+ "BriefDescription": "Taken speculative and retired indirect calls.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xc1",
+ "BriefDescription": "Speculative and retired macro-conditional branches.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xc2",
+ "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xc4",
+ "BriefDescription": "Speculative and retired indirect branches excluding calls and returns.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xc8",
+ "BriefDescription": "Speculative and retired indirect return branches.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xd0",
+ "BriefDescription": "Speculative and retired direct near calls.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "UMask": "0xff",
+ "BriefDescription": "Speculative and retired branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+ "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0x41",
+ "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0x81",
+ "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0x84",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0x88",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0xa0",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0xc1",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0xc4",
+ "BriefDescription": "Mispredicted indirect branches excluding calls and returns.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "UMask": "0xff",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+ "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when uops are executed in port 0",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_0",
+ "PublicDescription": "Cycles which a uop is dispatched on port 0 in this thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per core when uops are executed in port 0.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE",
+ "AnyThread": "1",
+ "PublicDescription": "Cycles per core when uops are exectuted in port 0.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when uops are executed in port 0.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles per thread when uops are executed in port 1",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_1",
+ "PublicDescription": "Cycles which a uop is dispatched on port 1 in this thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles per core when uops are executed in port 1.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE",
+ "AnyThread": "1",
+ "PublicDescription": "Cycles per core when uops are exectuted in port 1.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles per thread when uops are executed in port 1.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles per thread when uops are executed in port 2",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_2",
+ "PublicDescription": "Cycles which a uop is dispatched on port 2 in this thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 2.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles per thread when uops are executed in port 2.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles per thread when uops are executed in port 3",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_3",
+ "PublicDescription": "Cycles which a uop is dispatched on port 3 in this thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 3.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles per thread when uops are executed in port 3.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles per thread when uops are executed in port 4",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_4",
+ "PublicDescription": "Cycles which a uop is dispatched on port 4 in this thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles per core when uops are executed in port 4.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE",
+ "AnyThread": "1",
+ "PublicDescription": "Cycles per core when uops are exectuted in port 4.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles per thread when uops are executed in port 4.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x20",
+ "BriefDescription": "Cycles per thread when uops are executed in port 5",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_5",
+ "PublicDescription": "Cycles which a uop is dispatched on port 5 in this thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x20",
+ "BriefDescription": "Cycles per core when uops are executed in port 5.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE",
+ "AnyThread": "1",
+ "PublicDescription": "Cycles per core when uops are exectuted in port 5.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x20",
+ "BriefDescription": "Cycles per thread when uops are executed in port 5.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x40",
+ "BriefDescription": "Cycles per thread when uops are executed in port 6",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_6",
+ "PublicDescription": "Cycles which a uop is dispatched on port 6 in this thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x40",
+ "BriefDescription": "Cycles per core when uops are executed in port 6.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE",
+ "AnyThread": "1",
+ "PublicDescription": "Cycles per core when uops are exectuted in port 6.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x40",
+ "BriefDescription": "Cycles per thread when uops are executed in port 6.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x80",
+ "BriefDescription": "Cycles per thread when uops are executed in port 7",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_7",
+ "PublicDescription": "Cycles which a uop is dispatched on port 7 in this thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x80",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 7.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x80",
+ "BriefDescription": "Cycles per thread when uops are executed in port 7.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x1",
+ "BriefDescription": "Resource-related stall cycles",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "Errata": "HSD135",
+ "PublicDescription": "Cycles allocation is stalled due to resource related reason.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.RS",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.SB",
+ "PublicDescription": "This event counts cycles during which no instructions were allocated because no Store Buffers (SB) were available.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles stalled due to re-order buffer full.",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.ROB",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with pending L2 cache miss loads.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+ "CounterMask": "1",
+ "Errata": "HSD78",
+ "PublicDescription": "Cycles with pending L2 miss loads. Set Cmask=2 to count cycle.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles with pending memory loads.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+ "CounterMask": "2",
+ "PublicDescription": "Cycles with pending memory loads. Set Cmask=2 to count cycle.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x4",
+ "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+ "CounterMask": "4",
+ "PublicDescription": "This event counts cycles during which no instructions were executed in the execution stage of the pipeline.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x5",
+ "BriefDescription": "Execution stalls due to L2 cache misses.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+ "CounterMask": "5",
+ "PublicDescription": "Number of loads missed L2.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x6",
+ "BriefDescription": "Execution stalls due to memory subsystem.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+ "CounterMask": "6",
+ "PublicDescription": "This event counts cycles during which no instructions were executed in the execution stage of the pipeline and there were memory instructions pending (waiting for data).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles with pending L1 cache miss loads.",
+ "Counter": "2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+ "CounterMask": "8",
+ "PublicDescription": "Cycles with pending L1 data cache miss loads. Set Cmask=8 to count cycle.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0xc",
+ "BriefDescription": "Execution stalls due to L1 data cache misses",
+ "Counter": "2",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+ "CounterMask": "12",
+ "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xa8",
+ "UMask": "0x1",
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "Counter": "0,1,2,3",
+ "EventName": "LSD.UOPS",
+ "PublicDescription": "Number of uops delivered by the LSD.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "Counter": "0,1,2,3",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "Counter": "0,1,2,3",
+ "EventName": "LSD.CYCLES_4_UOPS",
+ "CounterMask": "4",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "CounterMask": "1",
+ "Errata": "HSD144, HSD30, HSM31",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "CounterMask": "1",
+ "Errata": "HSD144, HSD30, HSM31",
+ "PublicDescription": "This events counts the cycles where at least one uop was executed. It is counted per thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+ "CounterMask": "2",
+ "Errata": "HSD144, HSD30, HSM31",
+ "PublicDescription": "This events counts the cycles where at least two uop were executed. It is counted per thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+ "CounterMask": "3",
+ "Errata": "HSD144, HSD30, HSM31",
+ "PublicDescription": "This events counts the cycles where at least three uop were executed. It is counted per thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+ "CounterMask": "4",
+ "Errata": "HSD144, HSD30, HSM31",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x2",
+ "BriefDescription": "Number of uops executed on the core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "Errata": "HSD30, HSM31",
+ "PublicDescription": "Counts total number of uops to be executed per-core each cycle.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "CounterMask": "1",
+ "Errata": "HSD30, HSM31",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "CounterMask": "2",
+ "Errata": "HSD30, HSM31",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "CounterMask": "3",
+ "Errata": "HSD30, HSM31",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "CounterMask": "4",
+ "Errata": "HSD30, HSM31",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xb1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+ "Errata": "HSD30, HSM31",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC0",
+ "UMask": "0x0",
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "Counter": "0,1,2,3",
+ "EventName": "INST_RETIRED.ANY_P",
+ "Errata": "HSD11, HSD140",
+ "PublicDescription": "Number of instructions at retirement.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC0",
+ "UMask": "0x1",
+ "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+ "PEBS": "2",
+ "Counter": "1",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "Errata": "HSD140",
+ "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "1"
+ },
+ {
+ "EventCode": "0xC0",
+ "UMask": "0x2",
+ "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions: Counts also flows that have several X87 or flows that use X87 uops in the exception handling.",
+ "Counter": "0,1,2,3",
+ "EventName": "INST_RETIRED.X87",
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+ "Counter": "0,1,2,3",
+ "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+ "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC2",
+ "UMask": "0x1",
+ "BriefDescription": "Actually retired uops.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.ALL",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xC2",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xC2",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "CounterMask": "10",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xC2",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
+ "AnyThread": "1",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC2",
+ "UMask": "0x2",
+ "BriefDescription": "Retirement slots used.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.CYCLES",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0xC3",
+ "UMask": "0x1",
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "CounterMask": "1",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x4",
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear. Machine clears can have a significant performance impact if they are happening frequently.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x20",
+ "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.MASKMOV",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x0",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "PublicDescription": "Branch instructions at retirement.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x1",
+ "BriefDescription": "Conditional branch instructions retired.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x2",
+ "BriefDescription": "Direct and indirect near call instructions retired.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x2",
+ "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x4",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "PEBS": "2",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x8",
+ "BriefDescription": "Return instructions retired.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x10",
+ "BriefDescription": "Not taken branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+ "PublicDescription": "Counts the number of not taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x20",
+ "BriefDescription": "Taken branch instructions retired.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x40",
+ "BriefDescription": "Far branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "PublicDescription": "Number of far branches retired.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x0",
+ "BriefDescription": "All mispredicted macro branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "PublicDescription": "Mispredicted branch instructions at retirement.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x1",
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x4",
+ "BriefDescription": "Mispredicted macro branch instructions retired.",
+ "PEBS": "2",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+ "PublicDescription": "This event counts all mispredicted branch instructions retired. This is a precise event.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x20",
+ "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCC",
+ "UMask": "0x20",
+ "BriefDescription": "Count cases of saving new LBR",
+ "Counter": "0,1,2,3",
+ "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+ "PublicDescription": "Count cases of saving new LBR records by hardware.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xe6",
+ "UMask": "0x1f",
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "Counter": "0,1,2,3",
+ "EventName": "BACLEARS.ANY",
+ "PublicDescription": "Number of front end re-steers due to BPU misprediction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json
new file mode 100644
index 000000000..58ed6d33d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-cache.json
@@ -0,0 +1,317 @@
+[
+ {
+ "BriefDescription": "Uncore cache clock ticks",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_C_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x34",
+ "EventName": "UNC_C_LLC_LOOKUP.ANY",
+ "Filter": "filter_state=0x1",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x11",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "M line evictions from LLC (writebacks to memory)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_C_LLC_VICTIMS.M_STATE",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.DATA_READ",
+ "Filter": "filter_opc=0x182",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.UNCACHEABLE",
+ "Filter": "filter_opc=0x187",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "MMIO reads. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.MMIO_READ",
+ "Filter": "filter_opc=0x187,filter_nc=1",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "MMIO writes. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.MMIO_WRITE",
+ "Filter": "filter_opc=0x18f,filter_nc=1",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC prefetch misses for RFO. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.RFO_LLC_PREFETCH",
+ "Filter": "filter_opc=0x190",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC prefetch misses for code reads. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.CODE_LLC_PREFETCH",
+ "Filter": "filter_opc=0x191",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC prefetch misses for data reads. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.DATA_LLC_PREFETCH",
+ "Filter": "filter_opc=0x192",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses for PCIe read current. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_READ",
+ "Filter": "filter_opc=0x19e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "ItoM write misses (as part of fast string memcpy stores) + PCIe full line writes. Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_WRITE",
+ "Filter": "filter_opc=0x1c8",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe write misses (full cache line). Derived from unc_c_tor_inserts.miss_opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_NON_SNOOP_WRITE",
+ "Filter": "filter_opc=0x1c8,filter_tid=0x3e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe writes (partial cache line). Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_NS_PARTIAL_WRITE",
+ "Filter": "filter_opc=0x180,filter_tid=0x3e",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "L2 demand and L2 prefetch code references to LLC. Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.CODE_LLC_PREFETCH",
+ "Filter": "filter_opc=0x181",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Streaming stores (full cache line). Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.STREAMING_FULL",
+ "Filter": "filter_opc=0x18c",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Streaming stores (partial cache line). Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
+ "Filter": "filter_opc=0x18d",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe read current. Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_READ",
+ "Filter": "filter_opc=0x19e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe write references (full cache line). Derived from unc_c_tor_inserts.opcode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_WRITE",
+ "Filter": "filter_opc=0x1c8,filter_tid=0x3e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Occupancy counter for LLC data reads (demand and L2 prefetch). Derived from unc_c_tor_occupancy.miss_opcode",
+ "EventCode": "0x36",
+ "EventName": "UNC_C_TOR_OCCUPANCY.LLC_DATA_READ",
+ "Filter": "filter_opc=0x182",
+ "PerPkg": "1",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "read requests to home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.READS",
+ "PerPkg": "1",
+ "UMask": "0x3",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "read requests to local home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.READS_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "read requests to remote home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.READS_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "write requests to home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.WRITES",
+ "PerPkg": "1",
+ "UMask": "0xC",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "write requests to local home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.WRITES_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "write requests to remote home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.WRITES_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0x8",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "Conflict requests (requests for same address from multiple agents simultaneously)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPCNFLCT",
+ "PerPkg": "1",
+ "UMask": "0x40",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "M line forwarded from remote cache along with writeback to memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x20",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "M line forwarded from remote cache with no writeback to memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPIFWD",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x4",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "Shared line response from remote cache",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPS",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x2",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "Shared line forwarded from remote cache",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPSFWD",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x8",
+ "Unit": "HA"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json
new file mode 100644
index 000000000..824961318
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-interconnect.json
@@ -0,0 +1,28 @@
+[
+ {
+ "BriefDescription": "QPI clock ticks",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x14",
+ "EventName": "UNC_Q_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "QPI LL"
+ },
+ {
+ "BriefDescription": "Number of data flits transmitted . Derived from unc_q_txl_flits_g0.data",
+ "Counter": "0,1,2,3",
+ "EventName": "QPI_DATA_BANDWIDTH_TX",
+ "PerPkg": "1",
+ "ScaleUnit": "8Bytes",
+ "UMask": "0x2",
+ "Unit": "QPI LL"
+ },
+ {
+ "BriefDescription": "Number of non data (control) flits transmitted . Derived from unc_q_txl_flits_g0.non_data",
+ "Counter": "0,1,2,3",
+ "EventName": "QPI_CTL_BANDWIDTH_TX",
+ "PerPkg": "1",
+ "ScaleUnit": "8Bytes",
+ "UMask": "0x4",
+ "Unit": "QPI LL"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json
new file mode 100644
index 000000000..66eed3997
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-memory.json
@@ -0,0 +1,86 @@
+[
+ {
+ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_READ",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_WRITE",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0xC",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Memory controller clock ticks",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_M_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x85",
+ "EventName": "UNC_M_POWER_CHANNEL_PPD",
+ "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_channel_ppd %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles all ranks are in critical thermal throttle",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x86",
+ "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES",
+ "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_critical_throttle_cycles %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles Memory is in self refresh power mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x43",
+ "EventName": "UNC_M_POWER_SELF_REFRESH",
+ "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_self_refresh %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Pre-charges due to page misses",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Pre-charge for reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.RD",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Pre-charge for writes",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.WR",
+ "PerPkg": "1",
+ "UMask": "0x8",
+ "Unit": "iMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json b/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json
new file mode 100644
index 000000000..dd1b95655
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/uncore-power.json
@@ -0,0 +1,92 @@
+[
+ {
+ "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_P_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+ "Filter": "occ_sel=1",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c0 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+ "Filter": "occ_sel=2",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c3 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+ "Filter": "occ_sel=3",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c6 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xA",
+ "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES",
+ "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "prochot_external_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_limit_thermal_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x6",
+ "EventName": "UNC_P_FREQ_MAX_OS_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_os_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5",
+ "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_power_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x74",
+ "EventName": "UNC_P_FREQ_TRANS_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_trans_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/virtual-memory.json b/tools/perf/pmu-events/arch/x86/haswellx/virtual-memory.json
new file mode 100644
index 000000000..168df552b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/haswellx/virtual-memory.json
@@ -0,0 +1,484 @@
+[
+ {
+ "EventCode": "0x08",
+ "UMask": "0x1",
+ "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+ "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x2",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Completed page walks due to demand load misses that caused 4K page walks in any TLB levels.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x4",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Completed page walks due to demand load misses that caused 2M/4M page walks in any TLB levels.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x8",
+ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0xe",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Completed page walks in any TLB of any page size due to demand load misses.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+ "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by DTLB load misses.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x20",
+ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (4K)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT_4K",
+ "PublicDescription": "This event counts load operations from a 4K page that miss the first DTLB level but hit the second and do not cause page walks.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x40",
+ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (2M)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT_2M",
+ "PublicDescription": "This event counts load operations from a 2M page that miss the first DTLB level but hit the second and do not cause page walks.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x60",
+ "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "PublicDescription": "Number of cache load STLB hits. No page walk.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x80",
+ "BriefDescription": "DTLB demand load misses with low part of linear-to-physical address translation missed",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.PDE_CACHE_MISS",
+ "PublicDescription": "DTLB demand load misses with low part of linear-to-physical address translation missed.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x1",
+ "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+ "PublicDescription": "Miss in all TLB levels causes a page walk of any page size (4K/2M/4M/1G).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x2",
+ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Completed page walks due to store misses in one or more TLB levels of 4K page structure.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x4",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Completed page walks due to store misses in one or more TLB levels of 2M/4M page structure.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x8",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks. (1G)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0xe",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Completed page walks due to store miss in any TLB levels of any page size (4K/2M/4M/1G).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+ "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by DTLB store misses.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x20",
+ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (4K)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT_4K",
+ "PublicDescription": "This event counts store operations from a 4K page that miss the first DTLB level but hit the second and do not cause page walks.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x40",
+ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (2M)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT_2M",
+ "PublicDescription": "This event counts store operations from a 2M page that miss the first DTLB level but hit the second and do not cause page walks.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x60",
+ "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x80",
+ "BriefDescription": "DTLB store misses with low part of linear-to-physical address translation missed",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.PDE_CACHE_MISS",
+ "PublicDescription": "DTLB store misses with low part of linear-to-physical address translation missed.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4f",
+ "UMask": "0x10",
+ "BriefDescription": "Cycle count for an Extended Page table walk.",
+ "Counter": "0,1,2,3",
+ "EventName": "EPT.WALK_CYCLES",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x1",
+ "BriefDescription": "Misses at all ITLB levels that cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+ "PublicDescription": "Misses in ITLB that causes a page walk of any page size.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x2",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Completed page walks due to misses in ITLB 4K page entries.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x4",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Completed page walks due to misses in ITLB 2M/4M page entries.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x8",
+ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0xe",
+ "BriefDescription": "Misses in all ITLB levels that cause completed page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Completed page walks in ITLB of any page size.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_DURATION",
+ "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by ITLB misses.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x20",
+ "BriefDescription": "Core misses that miss the DTLB and hit the STLB (4K)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.STLB_HIT_4K",
+ "PublicDescription": "ITLB misses that hit STLB (4K).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x40",
+ "BriefDescription": "Code misses that miss the DTLB and hit the STLB (2M)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.STLB_HIT_2M",
+ "PublicDescription": "ITLB misses that hit STLB (2M).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x60",
+ "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "PublicDescription": "ITLB misses that hit STLB. No page walk.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xae",
+ "UMask": "0x1",
+ "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB.ITLB_FLUSH",
+ "PublicDescription": "Counts the number of ITLB flushes, includes 4k/2M/4M pages.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x11",
+ "BriefDescription": "Number of DTLB page walker hits in the L1+FB",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L1",
+ "PublicDescription": "Number of DTLB page walker loads that hit in the L1+FB.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x12",
+ "BriefDescription": "Number of DTLB page walker hits in the L2",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L2",
+ "PublicDescription": "Number of DTLB page walker loads that hit in the L2.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x14",
+ "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L3",
+ "Errata": "HSD25",
+ "PublicDescription": "Number of DTLB page walker loads that hit in the L3.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x18",
+ "BriefDescription": "Number of DTLB page walker hits in Memory",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY",
+ "Errata": "HSD25",
+ "PublicDescription": "Number of DTLB page walker loads from memory.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x21",
+ "BriefDescription": "Number of ITLB page walker hits in the L1+FB",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L1",
+ "PublicDescription": "Number of ITLB page walker loads that hit in the L1+FB.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x22",
+ "BriefDescription": "Number of ITLB page walker hits in the L2",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L2",
+ "PublicDescription": "Number of ITLB page walker loads that hit in the L2.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x24",
+ "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_L3",
+ "Errata": "HSD25",
+ "PublicDescription": "Number of ITLB page walker loads that hit in the L3.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x28",
+ "BriefDescription": "Number of ITLB page walker hits in Memory",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.ITLB_MEMORY",
+ "Errata": "HSD25",
+ "PublicDescription": "Number of ITLB page walker loads from memory.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x41",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L1 and FB.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x42",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L2.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x44",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L3.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L3",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x48",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in memory.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_MEMORY",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x81",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L1 and FB.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x82",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x84",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L3",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBC",
+ "UMask": "0x88",
+ "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in memory.",
+ "Counter": "0,1,2,3",
+ "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_MEMORY",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xBD",
+ "UMask": "0x1",
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+ "Counter": "0,1,2,3",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "PublicDescription": "DTLB flush attempts of the thread-specific entries.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBD",
+ "UMask": "0x20",
+ "BriefDescription": "STLB flush attempts",
+ "Counter": "0,1,2,3",
+ "EventName": "TLB_FLUSH.STLB_ANY",
+ "PublicDescription": "Count number of STLB flush attempts.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/cache.json b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json
new file mode 100644
index 000000000..999a01bc6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json
@@ -0,0 +1,1102 @@
+[
+ {
+ "PublicDescription": "Demand Data Read requests that hit L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "RFO requests that hit L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that miss L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 store RFO requests.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests to L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of instruction fetches that hit the L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of instruction fetches that missed the L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache misses when fetching instructions",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 code requests.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 code requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_RQSTS.PF_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from the L2 hardware prefetchers that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 HW prefetcher requests that missed L2.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_RQSTS.PF_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 HW prefetcher requests.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc0",
+ "EventName": "L2_RQSTS.ALL_PF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from L2 hardware prefetchers",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "RFOs that miss cache lines.",
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_STORE_LOCK_RQSTS.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that miss cache lines",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "RFOs that hit cache lines in M state.",
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_STORE_LOCK_RQSTS.HIT_M",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that hit cache lines in M state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "RFOs that access cache lines in any state.",
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_STORE_LOCK_RQSTS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that access cache lines in any state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Not rejected writebacks that missed LLC.",
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_L1D_WB_RQSTS.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Not rejected writebacks from L1D to L2 cache lines in E state.",
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_L1D_WB_RQSTS.HIT_E",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in E state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Not rejected writebacks from L1D to L2 cache lines in M state.",
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_L1D_WB_RQSTS.HIT_M",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in M state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_L1D_WB_RQSTS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in any state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts each cache miss condition for references to the last level cache.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests missed LLC",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts requests originating from the core that reference a cache line in the last level cache.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests that refer to LLC",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increments the number of outstanding L1D misses every cycle. Set Cmask = 1 and Edge =1 to count occurrences.",
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D miss oustandings duration in cycles",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "CounterMask": "1",
+ "CounterHTOff": "2"
+ },
+ {
+ "PublicDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core",
+ "CounterMask": "1",
+ "CounterHTOff": "2"
+ },
+ {
+ "PublicDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+ "EventCode": "0x48",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of lines brought into the L1 data cache.",
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D.REPLACEMENT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D data line replacements",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding Demand Data Read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding Demand Code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles in which the L1D is locked.",
+ "EventCode": "0x63",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when L1D is locked",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand data read requests sent to uncore.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand Data Read requests sent to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand code read requests sent to uncore.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cacheable and noncachaeble code read requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand RFO read requests sent to uncore, including regular RFOs, locks, ItoM.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Data read requests sent to uncore (demand and prefetch).",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand and prefetch data reads",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cases when offcore requests buffer cannot take more entries for core.",
+ "EventCode": "0xB2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cases when offcore requests buffer cannot take more entries for core",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x11",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops that miss the STLB. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x12",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store uops that miss the STLB. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired load uops with locked access. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops that split across a cacheline boundary. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x42",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "All retired load uops. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "All retired store uops. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources following L1 data-cache miss.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Retired load uops with L2 cache misses as data sources.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_MISS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Retired load uops whose data source was local memory (cross-socket snoop not needed or missed).",
+ "EventCode": "0xD3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired load uops which data sources missed LLC but serviced from local dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Demand Data Read requests that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_TRANS.DEMAND_DATA_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "RFO requests that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_TRANS.RFO",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache accesses when fetching instructions.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_TRANS.CODE_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache accesses when fetching instructions",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Any MLC or LLC HW prefetch accessing L2, including rejects.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_TRANS.ALL_PF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 or LLC HW prefetches that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L1D writebacks that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_TRANS.L1D_WB",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L1D writebacks that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 fill requests that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_TRANS.L2_FILL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 fill requests that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 writebacks that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_TRANS.L2_WB",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 writebacks that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Transactions accessing L2 pipe.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_TRANS.ALL_REQUESTS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Transactions accessing L2 pipe",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache lines in I state filling L2.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_IN.I",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in I state filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache lines in S state filling L2.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_IN.S",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in S state filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache lines in E state filling L2.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_IN.E",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in E state filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache lines filling L2.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "L2_LINES_IN.ALL",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Clean L2 cache lines evicted by demand.",
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Clean L2 cache lines evicted by demand",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Dirty L2 cache lines evicted by demand.",
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Dirty L2 cache lines evicted by demand",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Clean L2 cache lines evicted by the MLC prefetcher.",
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_OUT.PF_CLEAN",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Clean L2 cache lines evicted by L2 prefetch",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Dirty L2 cache lines evicted by the MLC prefetcher.",
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_LINES_OUT.PF_DIRTY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Dirty L2 cache lines evicted by L2 prefetch",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Dirty L2 cache lines filling the L2.",
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa",
+ "EventName": "L2_LINES_OUT.DIRTY_ALL",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Dirty L2 cache lines filling the L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Split locks in SQ",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch code reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all writebacks from the core to the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x18000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses. It also includes L2 hints sent to LLC to keep a line from being evicted out of the core caches",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10400",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address ",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts non-temporal stores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data reads ",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand rfo's ",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x000105B3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch prefetch RFOs ",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x000107F7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo references (demand & prefetch) ",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/floating-point.json b/tools/perf/pmu-events/arch/x86/ivybridge/floating-point.json
new file mode 100644
index 000000000..950b62c09
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/floating-point.json
@@ -0,0 +1,151 @@
+[
+ {
+ "PublicDescription": "Counts number of X87 uops executed.",
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_COMP_OPS_EXE.X87",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of FP Computational Uops Executed this cycle. The number of FADD, FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This event does not distinguish an FADD used in the middle of a transcendental flow from a s",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SSE* or AVX-128 FP Computational packed double-precision uops issued this cycle.",
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed double-precision uops issued this cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SSE* or AVX-128 FP Computational scalar single-precision uops issued this cycle.",
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar single-precision uops issued this cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SSE* or AVX-128 FP Computational packed single-precision uops issued this cycle.",
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed single-precision uops issued this cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts number of SSE* or AVX-128 double precision FP scalar uops executed.",
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar double-precision uops issued this cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts 256-bit packed single-precision floating-point instructions.",
+ "EventCode": "0x11",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_FP_256.PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "number of GSSE-256 Computational FP single precision uops issued this cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts 256-bit packed double-precision floating-point instructions.",
+ "EventCode": "0x11",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_FP_256.PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "number of AVX-256 Computational FP double precision uops issued this cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of assists associated with 256-bit AVX store operations.",
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OTHER_ASSISTS.AVX_STORE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of GSSE memory assist for stores. GSSE microcode assist is being invoked whenever the hardware is unable to properly handle GSSE-256b operations.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of X87 FP assists due to output values.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_ASSIST.X87_OUTPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of X87 assists due to output value.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of X87 FP assists due to input values.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_ASSIST.X87_INPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of X87 assists due to input value.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SIMD FP assists due to output values.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "FP_ASSIST.SIMD_OUTPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of SIMD FP assists due to Output values",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SIMD FP assists due to input values.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_ASSIST.SIMD_INPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of SIMD FP assists due to input values",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with any input/output SSE* or FP assists.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1e",
+ "EventName": "FP_ASSIST.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles with any input/output SSE or FP assist",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/frontend.json b/tools/perf/pmu-events/arch/x86/ivybridge/frontend.json
new file mode 100644
index 000000000..efaa949ea
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/frontend.json
@@ -0,0 +1,305 @@
+[
+ {
+ "PublicDescription": "Counts cycles the IDQ is empty.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "IDQ.EMPTY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MITE path. Set Cmask = 1 to count cycles.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "IDQ.MS_DSB_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "IDQ.MS_DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EdgeDetect": "1",
+ "EventName": "IDQ.MS_DSB_OCCUR",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles DSB is delivered four uops. Set Cmask = 4.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles DSB is delivered at least one uops. Set Cmask = 1.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "IDQ.MS_MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering 4 Uops",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles MITE is delivered at least one uops. Set Cmask = 1.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering any Uop",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE. Set Cmask = 1 to count cycles.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EdgeDetect": "1",
+ "EventName": "IDQ.MS_SWITCHES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of uops delivered to IDQ from any path.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3c",
+ "EventName": "IDQ.MITE_ALL_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ICACHE.HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes UC accesses.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ICACHE.MISSES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Instruction cache, streaming buffer and victim cache misses",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles where a code-fetch stalled due to L1 instruction-cache miss or an iTLB miss.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ICACHE.IFETCH_STALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where a code-fetch stalled due to L1 instruction-cache miss or an iTLB miss",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Count issue pipeline slots where no uop was delivered from the front end to the back end when there is no back-end stall.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of DSB to MITE switches.",
+ "EventCode": "0xAB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DSB2MITE_SWITCHES.COUNT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles DSB to MITE switches caused delay.",
+ "EventCode": "0xAB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "DSB Fill encountered > 3 DSB lines.",
+ "EventCode": "0xAC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "DSB_FILL.EXCEED_DSB_LINES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
new file mode 100644
index 000000000..7c2679514
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
@@ -0,0 +1,164 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/memory.json b/tools/perf/pmu-events/arch/x86/ivybridge/memory.json
new file mode 100644
index 000000000..a74d54f56
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/memory.json
@@ -0,0 +1,236 @@
+[
+ {
+ "PublicDescription": "Speculative cache-line split load uops dispatched to L1D.",
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MISALIGN_MEM_REF.LOADS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Speculative cache-line split Store-address uops dispatched to L1D.",
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MISALIGN_MEM_REF.STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "PAGE_WALKS.LLC_MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of any page walk that had a miss in LLC.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 4.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Loads with latency value being above 4",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 8.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Loads with latency value being above 8",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 16.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Loads with latency value being above 16",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 32.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Loads with latency value being above 32",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 64.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2003",
+ "BriefDescription": "Loads with latency value being above 64",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 128.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "1009",
+ "BriefDescription": "Loads with latency value being above 128",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 256.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "503",
+ "BriefDescription": "Loads with latency value being above 256",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 512.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "101",
+ "BriefDescription": "Loads with latency value being above 512",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "Counter": "3",
+ "UMask": "0x2",
+ "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only.",
+ "PRECISE_STORE": "1",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data returned from dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the LLC and the data returned from dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3004003f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data returned from dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand code reads that miss the LLC and the data returned from dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6004001b3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN_SOCKET.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts LLC replacements",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/other.json b/tools/perf/pmu-events/arch/x86/ivybridge/other.json
new file mode 100644
index 000000000..4eb83ee40
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/other.json
@@ -0,0 +1,44 @@
+[
+ {
+ "PublicDescription": "Unhalted core cycles when the thread is in ring 0.",
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPL_CYCLES.RING0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of intervals between processor halts while thread is in ring 0.",
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "CPL_CYCLES.RING0_TRANS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Number of intervals between processor halts while thread is in ring 0",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.",
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPL_CYCLES.RING123",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.",
+ "EventCode": "0x63",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json b/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json
new file mode 100644
index 000000000..0afbfd95e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json
@@ -0,0 +1,1309 @@
+[
+ {
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 0",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired from execution.",
+ "CounterHTOff": "Fixed counter 0"
+ },
+ {
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when the thread is not in halt state.",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x2",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x3",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "CounterHTOff": "Fixed counter 2"
+ },
+ {
+ "PublicDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "False dependencies in MOB due to partial compare on address.",
+ "EventCode": "0x07",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "False dependencies in MOB due to partial compare on address",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc.)",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EdgeDetect": "1",
+ "EventName": "INT_MISC.RECOVERY_STALLS_COUNT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc.)",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "AnyThread": "1",
+ "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increments each cycle the # of Uops issued by the RAT to RS. Set Cmask = 1, Inv = 1, Any= 1to count stalled cycles of this core.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
+ "EventCode": "0x0E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+ "EventCode": "0x0E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of flags-merge uops allocated. Such uops adds delay.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of flags-merge uops being allocated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of slow LEA or similar uops allocated. Such uop has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_ISSUED.SLOW_LEA",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of multiply packed/scalar single precision uops allocated.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_ISSUED.SINGLE_MUL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles that the divider is active, includes INT and FP. Set 'edge =1, cmask=1' to count the number of divides.",
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.FPU_DIV_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when divider is busy executing divide operations",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Divide operations executed.",
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EdgeDetect": "1",
+ "EventName": "ARITH.FPU_DIV",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Divide operations executed",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Thread cycles when thread is not in halt state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increments at the frequency of XCLK (100 MHz) when not halted.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.",
+ "EventCode": "0x4C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOAD_HIT_PRE.SW_PF",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for H/W prefetch.",
+ "EventCode": "0x4C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_HIT_PRE.HW_PF",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles the RS is empty for the thread.",
+ "EventCode": "0x5E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ILD_STALL.LCP",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Stall cycles due to IQ is full.",
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ILD_STALL.IQ_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Stall cycles because IQ is full",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Not taken macro-conditional branches.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not taken macro-conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired macro-conditional branches.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired macro-conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired indirect branches with return mnemonic.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired direct near calls.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x90",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired direct near calls",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired indirect calls.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa0",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect calls",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Speculative and retired macro-conditional branches.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc1",
+ "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired macro-conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc2",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Speculative and retired indirect branches excluding calls and returns.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc4",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired indirect branches excluding calls and returns",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc8",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired indirect return branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Speculative and retired direct near calls.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xd0",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired direct near calls",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired mispredicted indirect calls.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa0",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect calls",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Speculative and retired mispredicted macro conditional branches.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc1",
+ "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Mispredicted indirect branches excluding calls and returns.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc4",
+ "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Mispredicted indirect branches excluding calls and returns",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a Uop is dispatched on port 0.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 0",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when uops are dispatched to port 0.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 0",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a Uop is dispatched on port 1.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when uops are dispatched to port 1.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a Uop is dispatched on port 2.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops dispatched to port 2, loads and stores per core (speculative and retired).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a Uop is dispatched on port 3.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when load or STA uops are dispatched to port 3.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a Uop is dispatched on port 4.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when uops are dispatched to port 4.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a Uop is dispatched on port 5.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 5",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when uops are dispatched to port 5.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 5",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles Allocation is stalled due to Resource Related reason.",
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource-related stall cycles",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RESOURCE_STALLS.RS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles stalled due to no store buffers available (not including draining form sync).",
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RESOURCE_STALLS.SB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RESOURCE_STALLS.ROB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to re-order buffer full.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with pending L2 miss loads. Set AnyThread to count per core.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with pending L2 cache miss loads.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while L2 cache miss load* is outstanding.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with pending memory loads. Set AnyThread to count per core.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with pending memory loads.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Total execution stalls.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Total execution stalls.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of loads missed L2.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls due to L2 cache misses.",
+ "CounterMask": "5",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while L2 cache miss load* is outstanding.",
+ "CounterMask": "5",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x6",
+ "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls due to memory subsystem.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x6",
+ "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Cycles with pending L1 cache miss loads. Set AnyThread to count per core.",
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0x8",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with pending L1 cache miss loads.",
+ "CounterMask": "8",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0x8",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "CounterMask": "8",
+ "CounterHTOff": "2"
+ },
+ {
+ "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.",
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0xc",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls due to L1 data cache misses",
+ "CounterMask": "12",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0xc",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "CounterMask": "12",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts total number of uops to be executed per-thread each cycle. Set Cmask = 1, INV =1 to count stall cycles.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts total number of uops to be executed per-core each cycle.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of uops executed on the core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of instructions at retirement.",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.",
+ "EventCode": "0xC0",
+ "Counter": "1",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+ "CounterHTOff": "1"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retired uops.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "CounterMask": "10",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retirement slots used.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of self-modifying-code machine clears detected.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of executed AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MACHINE_CLEARS.MASKMOV",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Branch instructions at retirement.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Conditional branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Direct and indirect near call instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Return instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of not taken branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Not taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of far branches retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Far branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Mispredicted branch instructions at retirement.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All mispredicted macro branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted macro branch instructions retired.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Count cases of saving new LBR records by hardware.",
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count cases of saving new LBR",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of front end re-steers due to BPU misprediction.",
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "EventName": "BACLEARS.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/uncore.json b/tools/perf/pmu-events/arch/x86/ivybridge/uncore.json
new file mode 100644
index 000000000..42c70eed0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/uncore.json
@@ -0,0 +1,314 @@
+[
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x01",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.MISS",
+ "BriefDescription": "A snoop misses in some processor core.",
+ "PublicDescription": "A snoop misses in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x02",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL",
+ "BriefDescription": "A snoop invalidates a non-modified line in some processor core.",
+ "PublicDescription": "A snoop invalidates a non-modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x04",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HIT",
+ "BriefDescription": "A snoop hits a non-modified line in some processor core.",
+ "PublicDescription": "A snoop hits a non-modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x08",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HITM",
+ "BriefDescription": "A snoop hits a modified line in some processor core.",
+ "PublicDescription": "A snoop hits a modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x10",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL_M",
+ "BriefDescription": "A snoop invalidates a modified line in some processor core.",
+ "PublicDescription": "A snoop invalidates a modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x20",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.EXTERNAL_FILTER",
+ "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.",
+ "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x40",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.XCORE_FILTER",
+ "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.",
+ "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x80",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.EVICTION_FILTER",
+ "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.",
+ "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x01",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.M",
+ "BriefDescription": "LLC lookup request that access cache and found line in M-state.",
+ "PublicDescription": "LLC lookup request that access cache and found line in M-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x02",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.E",
+ "BriefDescription": "LLC lookup request that access cache and found line in E-state.",
+ "PublicDescription": "LLC lookup request that access cache and found line in E-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x04",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.S",
+ "BriefDescription": "LLC lookup request that access cache and found line in S-state.",
+ "PublicDescription": "LLC lookup request that access cache and found line in S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x08",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.I",
+ "BriefDescription": "LLC lookup request that access cache and found line in I-state.",
+ "PublicDescription": "LLC lookup request that access cache and found line in I-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x10",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_FILTER",
+ "BriefDescription": "Filter on processor core initiated cacheable read requests.",
+ "PublicDescription": "Filter on processor core initiated cacheable read requests.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x20",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_FILTER",
+ "BriefDescription": "Filter on processor core initiated cacheable write requests.",
+ "PublicDescription": "Filter on processor core initiated cacheable write requests.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x40",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_FILTER",
+ "BriefDescription": "Filter on external snoop requests.",
+ "PublicDescription": "Filter on external snoop requests.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x80",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_REQUEST_FILTER",
+ "BriefDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.",
+ "PublicDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x80",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+ "BriefDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+ "PublicDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+ "Counter": "0",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x81",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+ "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+ "PublicDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x81",
+ "UMask": "0x20",
+ "EventName": "UNC_ARB_TRK_REQUESTS.WRITES",
+ "BriefDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.",
+ "PublicDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x81",
+ "UMask": "0x80",
+ "EventName": "UNC_ARB_TRK_REQUESTS.EVICTIONS",
+ "BriefDescription": "Counts the number of LLC evictions allocated.",
+ "PublicDescription": "Counts the number of LLC evictions allocated.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x83",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_COH_TRK_OCCUPANCY.ALL",
+ "BriefDescription": "Cycles weighted by number of requests pending in Coherency Tracker.",
+ "PublicDescription": "Cycles weighted by number of requests pending in Coherency Tracker.",
+ "Counter": "0",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x84",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+ "BriefDescription": "Number of requests allocated in Coherency Tracker.",
+ "PublicDescription": "Number of requests allocated in Coherency Tracker.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x80",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
+ "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+ "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+ "Counter": "0,1",
+ "CounterMask": "1",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x80",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_OVER_HALF_FULL",
+ "BriefDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+ "PublicDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+ "Counter": "0,1",
+ "CounterMask": "10",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x0",
+ "UMask": "0x01",
+ "EventName": "UNC_CLOCK.SOCKET",
+ "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+ "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+ "Counter": "Fixed",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x06",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ES",
+ "BriefDescription": "LLC lookup request that access cache and found line in E-state or S-state.",
+ "PublicDescription": "LLC lookup request that access cache and found line in E-state or S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/virtual-memory.json b/tools/perf/pmu-events/arch/x86/ivybridge/virtual-memory.json
new file mode 100644
index 000000000..f243551b4
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/virtual-memory.json
@@ -0,0 +1,180 @@
+[
+ {
+ "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size from demand loads.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Misses in all TLB levels that caused page walk completed of any size by demand loads.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycle PMH is busy with a walk due to demand loads.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "DTLB_LOAD_MISSES.LARGE_PAGE_WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Page walk for a large page completed for Demand load.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Miss in all TLB levels causes a page walk of any page size (4K/2M/4M/1G).",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Miss in all TLB levels causes a page walk that completes of any page size (4K/2M/4M/1G).",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles PMH is busy with this walk.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4F",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "EPT.WALK_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycle count for an Extended Page table walk. The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts load operations that missed 1st level DTLB but hit the 2nd level.",
+ "EventCode": "0x5F",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Misses in all ITLB levels that cause page walks.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Misses at all ITLB levels that cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Misses in all ITLB levels that cause completed page walks.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Misses in all ITLB levels that cause completed page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycle PMH is busy with a walk.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ITLB_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of cache load STLB hits. No page walk.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Completed page walks in ITLB due to STLB load misses for large pages.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "ITLB_MISSES.LARGE_PAGE_WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Completed page walks in ITLB due to STLB load misses for large pages",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of ITLB flushes, includes 4k/2M/4M pages.",
+ "EventCode": "0xAE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB.ITLB_FLUSH",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "DTLB flush attempts of the thread-specific entries.",
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Count number of STLB flush attempts.",
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "TLB_FLUSH.STLB_ANY",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "STLB flush attempts",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/cache.json b/tools/perf/pmu-events/arch/x86/ivytown/cache.json
new file mode 100644
index 000000000..6dad3ad6b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/cache.json
@@ -0,0 +1,1260 @@
+[
+ {
+ "PublicDescription": "Demand Data Read requests that hit L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "RFO requests that hit L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that miss L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 store RFO requests.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests to L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of instruction fetches that hit the L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of instruction fetches that missed the L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache misses when fetching instructions",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 code requests.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 code requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_RQSTS.PF_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from the L2 hardware prefetchers that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 HW prefetcher requests that missed L2.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_RQSTS.PF_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all L2 HW prefetcher requests.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc0",
+ "EventName": "L2_RQSTS.ALL_PF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from L2 hardware prefetchers",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "RFOs that miss cache lines.",
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_STORE_LOCK_RQSTS.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that miss cache lines",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "RFOs that hit cache lines in M state.",
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_STORE_LOCK_RQSTS.HIT_M",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that hit cache lines in M state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "RFOs that access cache lines in any state.",
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_STORE_LOCK_RQSTS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that access cache lines in any state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Not rejected writebacks that missed LLC.",
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_L1D_WB_RQSTS.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Not rejected writebacks from L1D to L2 cache lines in E state.",
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_L1D_WB_RQSTS.HIT_E",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in E state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Not rejected writebacks from L1D to L2 cache lines in M state.",
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_L1D_WB_RQSTS.HIT_M",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in M state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_L1D_WB_RQSTS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in any state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts each cache miss condition for references to the last level cache.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests missed LLC",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts requests originating from the core that reference a cache line in the last level cache.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests that refer to LLC",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increments the number of outstanding L1D misses every cycle. Set Cmask = 1 and Edge =1 to count occurrences.",
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D miss oustandings duration in cycles",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "CounterMask": "1",
+ "CounterHTOff": "2"
+ },
+ {
+ "PublicDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core",
+ "CounterMask": "1",
+ "CounterHTOff": "2"
+ },
+ {
+ "PublicDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+ "EventCode": "0x48",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of lines brought into the L1 data cache.",
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D.REPLACEMENT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D data line replacements",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding Demand Data Read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding Demand Code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles in which the L1D is locked.",
+ "EventCode": "0x63",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when L1D is locked",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand data read requests sent to uncore.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand Data Read requests sent to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand code read requests sent to uncore.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cacheable and noncachaeble code read requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand RFO read requests sent to uncore, including regular RFOs, locks, ItoM.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Data read requests sent to uncore (demand and prefetch).",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand and prefetch data reads",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cases when offcore requests buffer cannot take more entries for core.",
+ "EventCode": "0xB2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cases when offcore requests buffer cannot take more entries for core",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x11",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops that miss the STLB. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x12",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store uops that miss the STLB. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired load uops with locked access. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops that split across a cacheline boundary. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x42",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "All retired load uops. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "All retired store uops. (Precise Event)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources following L1 data-cache miss.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Retired load uops with L2 cache misses as data sources.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_MISS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired load uops whose data source was local DRAM (Snoop not needed, Snoop Miss, or Snoop Hit data not forwarded).",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired load uops whose data source was remote DRAM (Snoop not needed, Snoop Miss, or Snoop Hit data not forwarded).",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Remote cache HITM.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Data forwarded from remote cache.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Demand Data Read requests that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_TRANS.DEMAND_DATA_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "RFO requests that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_TRANS.RFO",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache accesses when fetching instructions.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_TRANS.CODE_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache accesses when fetching instructions",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Any MLC or LLC HW prefetch accessing L2, including rejects.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_TRANS.ALL_PF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 or LLC HW prefetches that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L1D writebacks that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_TRANS.L1D_WB",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L1D writebacks that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 fill requests that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_TRANS.L2_FILL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 fill requests that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 writebacks that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_TRANS.L2_WB",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 writebacks that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Transactions accessing L2 pipe.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_TRANS.ALL_REQUESTS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Transactions accessing L2 pipe",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache lines in I state filling L2.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_IN.I",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in I state filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache lines in S state filling L2.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_IN.S",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in S state filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache lines in E state filling L2.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_IN.E",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in E state filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "L2 cache lines filling L2.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "L2_LINES_IN.ALL",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Clean L2 cache lines evicted by demand.",
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Clean L2 cache lines evicted by demand",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Dirty L2 cache lines evicted by demand.",
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Dirty L2 cache lines evicted by demand",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Clean L2 cache lines evicted by the MLC prefetcher.",
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_OUT.PF_CLEAN",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Clean L2 cache lines evicted by L2 prefetch",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Dirty L2 cache lines evicted by the MLC prefetcher.",
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_LINES_OUT.PF_DIRTY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Dirty L2 cache lines evicted by L2 prefetch",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Dirty L2 cache lines filling the L2.",
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa",
+ "EventName": "L2_LINES_OUT.DIRTY_ALL",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Dirty L2 cache lines filling the L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Split locks in SQ",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoop returned a clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch data reads that hit the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoop returned a clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoop returned a clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all writebacks from the core to the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoop returned a clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x803c8000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LRU_HINTS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts L2 hints sent to LLC to keep a line from being evicted out of the core caches",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x23ffc08000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.PORTIO_MMIO_UC",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10400",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts non-temporal stores",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/floating-point.json b/tools/perf/pmu-events/arch/x86/ivytown/floating-point.json
new file mode 100644
index 000000000..950b62c09
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/floating-point.json
@@ -0,0 +1,151 @@
+[
+ {
+ "PublicDescription": "Counts number of X87 uops executed.",
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_COMP_OPS_EXE.X87",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of FP Computational Uops Executed this cycle. The number of FADD, FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This event does not distinguish an FADD used in the middle of a transcendental flow from a s",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SSE* or AVX-128 FP Computational packed double-precision uops issued this cycle.",
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed double-precision uops issued this cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SSE* or AVX-128 FP Computational scalar single-precision uops issued this cycle.",
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar single-precision uops issued this cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SSE* or AVX-128 FP Computational packed single-precision uops issued this cycle.",
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed single-precision uops issued this cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts number of SSE* or AVX-128 double precision FP scalar uops executed.",
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar double-precision uops issued this cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts 256-bit packed single-precision floating-point instructions.",
+ "EventCode": "0x11",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_FP_256.PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "number of GSSE-256 Computational FP single precision uops issued this cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts 256-bit packed double-precision floating-point instructions.",
+ "EventCode": "0x11",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_FP_256.PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "number of AVX-256 Computational FP double precision uops issued this cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of assists associated with 256-bit AVX store operations.",
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OTHER_ASSISTS.AVX_STORE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of GSSE memory assist for stores. GSSE microcode assist is being invoked whenever the hardware is unable to properly handle GSSE-256b operations.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of X87 FP assists due to output values.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_ASSIST.X87_OUTPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of X87 assists due to output value.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of X87 FP assists due to input values.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_ASSIST.X87_INPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of X87 assists due to input value.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SIMD FP assists due to output values.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "FP_ASSIST.SIMD_OUTPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of SIMD FP assists due to Output values",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of SIMD FP assists due to input values.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_ASSIST.SIMD_INPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of SIMD FP assists due to input values",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with any input/output SSE* or FP assists.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1e",
+ "EventName": "FP_ASSIST.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles with any input/output SSE or FP assist",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/frontend.json b/tools/perf/pmu-events/arch/x86/ivytown/frontend.json
new file mode 100644
index 000000000..efaa949ea
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/frontend.json
@@ -0,0 +1,305 @@
+[
+ {
+ "PublicDescription": "Counts cycles the IDQ is empty.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "IDQ.EMPTY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MITE path. Set Cmask = 1 to count cycles.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "IDQ.MS_DSB_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "IDQ.MS_DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EdgeDetect": "1",
+ "EventName": "IDQ.MS_DSB_OCCUR",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles DSB is delivered four uops. Set Cmask = 4.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles DSB is delivered at least one uops. Set Cmask = 1.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "IDQ.MS_MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering 4 Uops",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles MITE is delivered at least one uops. Set Cmask = 1.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering any Uop",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE. Set Cmask = 1 to count cycles.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EdgeDetect": "1",
+ "EventName": "IDQ.MS_SWITCHES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of uops delivered to IDQ from any path.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3c",
+ "EventName": "IDQ.MITE_ALL_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ICACHE.HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Misses. Includes UC accesses.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ICACHE.MISSES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Instruction cache, streaming buffer and victim cache misses",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles where a code-fetch stalled due to L1 instruction-cache miss or an iTLB miss.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ICACHE.IFETCH_STALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where a code-fetch stalled due to L1 instruction-cache miss or an iTLB miss",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Count issue pipeline slots where no uop was delivered from the front end to the back end when there is no back-end stall.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of DSB to MITE switches.",
+ "EventCode": "0xAB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DSB2MITE_SWITCHES.COUNT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles DSB to MITE switches caused delay.",
+ "EventCode": "0xAB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "DSB Fill encountered > 3 DSB lines.",
+ "EventCode": "0xAC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "DSB_FILL.EXCEED_DSB_LINES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
new file mode 100644
index 000000000..7c2679514
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
@@ -0,0 +1,164 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (( cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2) if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE.IFETCH_STALL ) / RS_EVENTS.EMPTY_END)",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_UOPS_RETIRED.L1_MISS + mem_load_uops_retired.hit_lfb )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (( cpu@l1d_pend_miss.pending_cycles\\,any\\=1@ / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_DURATION + DTLB_LOAD_MISSES.WALK_DURATION + DTLB_STORE_MISSES.WALK_DURATION ) / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/memory.json b/tools/perf/pmu-events/arch/x86/ivytown/memory.json
new file mode 100644
index 000000000..3a7b86af8
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/memory.json
@@ -0,0 +1,503 @@
+[
+ {
+ "PublicDescription": "Speculative cache-line split load uops dispatched to L1D.",
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MISALIGN_MEM_REF.LOADS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Speculative cache-line split Store-address uops dispatched to L1D.",
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MISALIGN_MEM_REF.STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 4.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Loads with latency value being above 4",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 8.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Loads with latency value being above 8",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 16.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Loads with latency value being above 16",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 32.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Loads with latency value being above 32",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 64.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2003",
+ "BriefDescription": "Loads with latency value being above 64",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 128.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "1009",
+ "BriefDescription": "Loads with latency value being above 128",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 256.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "503",
+ "BriefDescription": "Loads with latency value being above 256",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Loads with latency value being above 512.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "101",
+ "BriefDescription": "Loads with latency value being above 512",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "Counter": "3",
+ "UMask": "0x2",
+ "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only.",
+ "PRECISE_STORE": "1",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc00244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x67f800244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data returned from remote dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x87f800244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data forwarded from remote cache",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc20091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that hits the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc203f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6004003f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data returned from local dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x87f8203f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data forwarded from remote cache",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x107fc003f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC the data is found in M state in remote cache and forwarded from there",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc20004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x600400004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss the LLC and the data returned from local dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x67f800004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss the LLC and the data returned from remote dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x87f820004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss the LLC and the data forwarded from remote cache",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x107fc00004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss the LLC the data is found in M state in remote cache and forwarded from there",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x67fc00001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from remote & local dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc20001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x600400001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from local dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x67f800001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from remote dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x87f820001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the LLC and the data forwarded from remote cache",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x107fc00001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the LLC the data is found in M state in remote cache and forwarded from there",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x107fc20002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the LLC and the data is found in M state in remote cache and forwarded from there.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc20040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that miss the LLC and the data returned from remote & local dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x67fc00010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from remote & local dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc20010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x600400010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from local dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x67f800010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from remote dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x87f820010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data forwarded from remote cache",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x107fc00010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC the data is found in M state in remote cache and forwarded from there",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc20200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc20080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that miss in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/other.json b/tools/perf/pmu-events/arch/x86/ivytown/other.json
new file mode 100644
index 000000000..4eb83ee40
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/other.json
@@ -0,0 +1,44 @@
+[
+ {
+ "PublicDescription": "Unhalted core cycles when the thread is in ring 0.",
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPL_CYCLES.RING0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Unhalted core cycles when the thread is in ring 0",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of intervals between processor halts while thread is in ring 0.",
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "CPL_CYCLES.RING0_TRANS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Number of intervals between processor halts while thread is in ring 0",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.",
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPL_CYCLES.RING123",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.",
+ "EventCode": "0x63",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json b/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json
new file mode 100644
index 000000000..0afbfd95e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json
@@ -0,0 +1,1309 @@
+[
+ {
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 0",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired from execution.",
+ "CounterHTOff": "Fixed counter 0"
+ },
+ {
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when the thread is not in halt state.",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x2",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x3",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "CounterHTOff": "Fixed counter 2"
+ },
+ {
+ "PublicDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "False dependencies in MOB due to partial compare on address.",
+ "EventCode": "0x07",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "False dependencies in MOB due to partial compare on address",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc.)",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EdgeDetect": "1",
+ "EventName": "INT_MISC.RECOVERY_STALLS_COUNT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc.)",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "AnyThread": "1",
+ "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increments each cycle the # of Uops issued by the RAT to RS. Set Cmask = 1, Inv = 1, Any= 1to count stalled cycles of this core.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
+ "EventCode": "0x0E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+ "EventCode": "0x0E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of flags-merge uops allocated. Such uops adds delay.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "UOPS_ISSUED.FLAGS_MERGE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of flags-merge uops being allocated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of slow LEA or similar uops allocated. Such uop has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_ISSUED.SLOW_LEA",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of multiply packed/scalar single precision uops allocated.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_ISSUED.SINGLE_MUL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles that the divider is active, includes INT and FP. Set 'edge =1, cmask=1' to count the number of divides.",
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.FPU_DIV_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when divider is busy executing divide operations",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Divide operations executed.",
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EdgeDetect": "1",
+ "EventName": "ARITH.FPU_DIV",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Divide operations executed",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Thread cycles when thread is not in halt state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increments at the frequency of XCLK (100 MHz) when not halted.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.",
+ "EventCode": "0x4C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOAD_HIT_PRE.SW_PF",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for H/W prefetch.",
+ "EventCode": "0x4C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_HIT_PRE.HW_PF",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MOVE_ELIMINATION.INT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x58",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED",
+ "SampleAfterValue": "1000003",
+ "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles the RS is empty for the thread.",
+ "EventCode": "0x5E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ILD_STALL.LCP",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Stall cycles due to IQ is full.",
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ILD_STALL.IQ_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Stall cycles because IQ is full",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Not taken macro-conditional branches.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not taken macro-conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired macro-conditional branches.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired macro-conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired indirect branches with return mnemonic.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired direct near calls.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x90",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired direct near calls",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired indirect calls.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa0",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect calls",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Speculative and retired macro-conditional branches.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc1",
+ "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired macro-conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc2",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Speculative and retired indirect branches excluding calls and returns.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc4",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired indirect branches excluding calls and returns",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc8",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired indirect return branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Speculative and retired direct near calls.",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xd0",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired direct near calls",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Taken speculative and retired mispredicted indirect calls.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa0",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect calls",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Speculative and retired mispredicted macro conditional branches.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc1",
+ "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Mispredicted indirect branches excluding calls and returns.",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc4",
+ "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Mispredicted indirect branches excluding calls and returns",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all near executed branches (not necessarily retired).",
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a Uop is dispatched on port 0.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 0",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when uops are dispatched to port 0.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 0",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a Uop is dispatched on port 1.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when uops are dispatched to port 1.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a Uop is dispatched on port 2.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops dispatched to port 2, loads and stores per core (speculative and retired).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a Uop is dispatched on port 3.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when load or STA uops are dispatched to port 3.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a Uop is dispatched on port 4.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when uops are dispatched to port 4.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles which a Uop is dispatched on port 5.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 5",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles per core when uops are dispatched to port 5.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 5",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles Allocation is stalled due to Resource Related reason.",
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource-related stall cycles",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RESOURCE_STALLS.RS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles stalled due to no store buffers available (not including draining form sync).",
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RESOURCE_STALLS.SB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RESOURCE_STALLS.ROB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to re-order buffer full.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with pending L2 miss loads. Set AnyThread to count per core.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with pending L2 cache miss loads.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while L2 cache miss load* is outstanding.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with pending memory loads. Set AnyThread to count per core.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with pending memory loads.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Total execution stalls.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Total execution stalls.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Number of loads missed L2.",
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls due to L2 cache misses.",
+ "CounterMask": "5",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while L2 cache miss load* is outstanding.",
+ "CounterMask": "5",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x6",
+ "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls due to memory subsystem.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x6",
+ "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Cycles with pending L1 cache miss loads. Set AnyThread to count per core.",
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0x8",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with pending L1 cache miss loads.",
+ "CounterMask": "8",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0x8",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "CounterMask": "8",
+ "CounterHTOff": "2"
+ },
+ {
+ "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.",
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0xc",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls due to L1 data cache misses",
+ "CounterMask": "12",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0xc",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "CounterMask": "12",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts total number of uops to be executed per-thread each cycle. Set Cmask = 1, INV =1 to count stall cycles.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts total number of uops to be executed per-core each cycle.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of uops executed on the core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of instructions at retirement.",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.",
+ "EventCode": "0xC0",
+ "Counter": "1",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+ "CounterHTOff": "1"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retired uops.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "CounterMask": "10",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retirement slots used.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of self-modifying-code machine clears detected.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of executed AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MACHINE_CLEARS.MASKMOV",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Branch instructions at retirement.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Conditional branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Direct and indirect near call instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL_R3",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Return instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of not taken branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Not taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of far branches retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Far branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Mispredicted branch instructions at retirement.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All mispredicted macro branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted macro branch instructions retired.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Count cases of saving new LBR records by hardware.",
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count cases of saving new LBR",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of front end re-steers due to BPU misprediction.",
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "EventName": "BACLEARS.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json
new file mode 100644
index 000000000..267410594
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-cache.json
@@ -0,0 +1,322 @@
+[
+ {
+ "BriefDescription": "Uncore cache clock ticks",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_C_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)",
+ "Counter": "0,1",
+ "EventCode": "0x34",
+ "EventName": "UNC_C_LLC_LOOKUP.ANY",
+ "Filter": "filter_state=0x1",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x11",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "M line evictions from LLC (writebacks to memory)",
+ "Counter": "0,1",
+ "EventCode": "0x37",
+ "EventName": "UNC_C_LLC_VICTIMS.M_STATE",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches. Derived from unc_c_tor_inserts.miss_opcode.demand",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.DATA_READ",
+ "Filter": "filter_opc=0x182",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses - Uncacheable reads. Derived from unc_c_tor_inserts.miss_opcode.uncacheable",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.UNCACHEABLE",
+ "Filter": "filter_opc=0x187",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC prefetch misses for RFO. Derived from unc_c_tor_inserts.miss_opcode.rfo_prefetch",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.RFO_LLC_PREFETCH",
+ "Filter": "filter_opc=0x190",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC prefetch misses for code reads. Derived from unc_c_tor_inserts.miss_opcode.code",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.CODE_LLC_PREFETCH",
+ "Filter": "filter_opc=0x191",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC prefetch misses for data reads. Derived from unc_c_tor_inserts.miss_opcode.data_read",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.DATA_LLC_PREFETCH",
+ "Filter": "filter_opc=0x192",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe allocating writes that miss LLC - DDIO misses. Derived from unc_c_tor_inserts.miss_opcode.ddio_miss",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_WRITE",
+ "Filter": "filter_opc=0x19c",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses for PCIe read current. Derived from unc_c_tor_inserts.miss_opcode.pcie_read",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_READ",
+ "Filter": "filter_opc=0x19e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses for ItoM writes (as part of fast string memcpy stores). Derived from unc_c_tor_inserts.miss_opcode.itom_write",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.ITOM_WRITE",
+ "Filter": "filter_opc=0x1c8",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses for PCIe non-snoop reads. Derived from unc_c_tor_inserts.miss_opcode.pcie_read",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_NON_SNOOP_READ",
+ "Filter": "filter_opc=0x1e4",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses for PCIe non-snoop writes (full line). Derived from unc_c_tor_inserts.miss_opcode.pcie_write",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_NON_SNOOP_WRITE",
+ "Filter": "filter_opc=0x1e6",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Streaming stores (full cache line). Derived from unc_c_tor_inserts.opcode.streaming_full",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.STREAMING_FULL",
+ "Filter": "filter_opc=0x18c",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Streaming stores (partial cache line). Derived from unc_c_tor_inserts.opcode.streaming_partial",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
+ "Filter": "filter_opc=0x18d",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Partial PCIe reads. Derived from unc_c_tor_inserts.opcode.pcie_partial",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_PARTIAL_READ",
+ "Filter": "filter_opc=0x195",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe allocating writes that hit in LLC (DDIO hits). Derived from unc_c_tor_inserts.opcode.ddio_hit",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_WRITE",
+ "Filter": "filter_opc=0x19c",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe read current. Derived from unc_c_tor_inserts.opcode.pcie_read_current",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_READ",
+ "Filter": "filter_opc=0x19e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "ItoM write hits (as part of fast string memcpy stores). Derived from unc_c_tor_inserts.opcode.itom_write_hit",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.ITOM_WRITE",
+ "Filter": "filter_opc=0x1c8",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe non-snoop reads. Derived from unc_c_tor_inserts.opcode.pcie_read",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_NS_READ",
+ "Filter": "filter_opc=0x1e4",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe non-snoop writes (partial). Derived from unc_c_tor_inserts.opcode.pcie_partial_write",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_NS_PARTIAL_WRITE",
+ "Filter": "filter_opc=0x1e5",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe non-snoop writes (full line). Derived from unc_c_tor_inserts.opcode.pcie_full_write",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_NS_WRITE",
+ "Filter": "filter_opc=0x1e6",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Occupancy for all LLC misses that are addressed to local memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_C_TOR_OCCUPANCY.MISS_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0x2A",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Occupancy counter for LLC data reads (demand and L2 prefetch). Derived from unc_c_tor_occupancy.miss_opcode.llc_data_read",
+ "EventCode": "0x36",
+ "EventName": "UNC_C_TOR_OCCUPANCY.LLC_DATA_READ",
+ "Filter": "filter_opc=0x182",
+ "PerPkg": "1",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Occupancy for all LLC misses that are addressed to remote memory",
+ "EventCode": "0x36",
+ "EventName": "UNC_C_TOR_OCCUPANCY.MISS_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0x8A",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Read requests to home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.READS",
+ "PerPkg": "1",
+ "UMask": "0x3",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "Write requests to home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.WRITES",
+ "PerPkg": "1",
+ "UMask": "0xC",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "M line forwarded from remote cache along with writeback to memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSP_FWD_WB",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x20",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "M line forwarded from remote cache with no writeback to memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPIFWD",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x4",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "Shared line response from remote cache",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPS",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x2",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "Shared line forwarded from remote cache",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_H_SNOOP_RESP.RSPSFWD",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x8",
+ "Unit": "HA"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json
new file mode 100644
index 000000000..b798a860b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-interconnect.json
@@ -0,0 +1,48 @@
+[
+ {
+ "BriefDescription": "QPI clock ticks. Use to get percentages for QPI cycles events",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x14",
+ "EventName": "UNC_Q_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "QPI LL"
+ },
+ {
+ "BriefDescription": "Cycles where receiving QPI link is in half-width mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x10",
+ "EventName": "UNC_Q_RxL0P_POWER_CYCLES",
+ "MetricExpr": "(UNC_Q_RxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.",
+ "MetricName": "rxl0p_power_cycles %",
+ "PerPkg": "1",
+ "Unit": "QPI LL"
+ },
+ {
+ "BriefDescription": "Cycles where transmitting QPI link is in half-width mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd",
+ "EventName": "UNC_Q_TxL0P_POWER_CYCLES",
+ "MetricExpr": "(UNC_Q_TxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.",
+ "MetricName": "txl0p_power_cycles %",
+ "PerPkg": "1",
+ "Unit": "QPI LL"
+ },
+ {
+ "BriefDescription": "Number of data flits transmitted ",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_Q_TxL_FLITS_G0.DATA",
+ "PerPkg": "1",
+ "ScaleUnit": "8Bytes",
+ "UMask": "0x2",
+ "Unit": "QPI LL"
+ },
+ {
+ "BriefDescription": "Number of non data (control) flits transmitted ",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA",
+ "PerPkg": "1",
+ "ScaleUnit": "8Bytes",
+ "UMask": "0x4",
+ "Unit": "QPI LL"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json
new file mode 100644
index 000000000..df4b43294
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-memory.json
@@ -0,0 +1,78 @@
+[
+ {
+ "BriefDescription": "Memory page activates for reads and writes",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_M_ACT_COUNT.RD",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Umask": "0x3",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Read requests to memory controller. Derived from unc_m_cas_count.rd",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_READ",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Write requests to memory controller. Derived from unc_m_cas_count.wr",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_WRITE",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0xC",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Memory controller clock ticks. Use to generate percentages for memory controller CYCLES events",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_M_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x85",
+ "EventName": "UNC_M_POWER_CHANNEL_PPD",
+ "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_channel_ppd %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles all ranks are in critical thermal throttle",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x86",
+ "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES",
+ "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_critical_throttle_cycles %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles Memory is in self refresh power mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x43",
+ "EventName": "UNC_M_POWER_SELF_REFRESH",
+ "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_self_refresh %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Memory page conflicts",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "iMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
new file mode 100644
index 000000000..635c09fda
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/uncore-power.json
@@ -0,0 +1,274 @@
+[
+ {
+ "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_P_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xb",
+ "EventName": "UNC_P_FREQ_BAND0_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band0_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xc",
+ "EventName": "UNC_P_FREQ_BAND1_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band1_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd",
+ "EventName": "UNC_P_FREQ_BAND2_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band2_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xe",
+ "EventName": "UNC_P_FREQ_BAND3_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band3_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of times that the uncore transitioned a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band0_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xb",
+ "EventName": "UNC_P_FREQ_BAND0_TRANSITIONS",
+ "Filter": "edge=1",
+ "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band0_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of times that the uncore transitioned to a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band1_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xc",
+ "EventName": "UNC_P_FREQ_BAND1_TRANSITIONS",
+ "Filter": "edge=1",
+ "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band1_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band2_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd",
+ "EventName": "UNC_P_FREQ_BAND2_TRANSITIONS",
+ "Filter": "edge=1",
+ "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band2_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band3_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xe",
+ "EventName": "UNC_P_FREQ_BAND3_TRANSITIONS",
+ "Filter": "edge=1",
+ "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band3_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+ "Filter": "occ_sel=1",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c0 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+ "Filter": "occ_sel=2",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c3 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+ "Filter": "occ_sel=3",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c6 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xa",
+ "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES",
+ "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "prochot_external_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_limit_thermal_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x6",
+ "EventName": "UNC_P_FREQ_MAX_OS_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_os_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5",
+ "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_power_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x7",
+ "EventName": "UNC_P_FREQ_MAX_CURRENT_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_CURRENT_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_current_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Cycles spent changing Frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x60",
+ "EventName": "UNC_P_FREQ_TRANS_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_trans_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 1.2Ghz. Derived from unc_p_freq_band0_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xb",
+ "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES",
+ "Filter": "filter_band0=12",
+ "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_1200mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 2Ghz. Derived from unc_p_freq_band1_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xc",
+ "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES",
+ "Filter": "filter_band1=20",
+ "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_2000mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 3Ghz. Derived from unc_p_freq_band2_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd",
+ "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES",
+ "Filter": "filter_band2=30",
+ "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_3000mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 4Ghz. Derived from unc_p_freq_band3_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xe",
+ "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES",
+ "Filter": "filter_band3=40",
+ "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_4000mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of times that the uncore transitioned to a frequency greater than or equal to 1.2Ghz. Derived from unc_p_freq_band0_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xb",
+ "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS",
+ "Filter": "edge=1,filter_band0=12",
+ "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_1200mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of times that the uncore transitioned to a frequency greater than or equal to 2Ghz. Derived from unc_p_freq_band1_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xc",
+ "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS",
+ "Filter": "edge=1,filter_band1=20",
+ "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_2000mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to 3Ghz. Derived from unc_p_freq_band2_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd",
+ "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS",
+ "Filter": "edge=1,filter_band2=30",
+ "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_3000mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to 4Ghz. Derived from unc_p_freq_band3_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xe",
+ "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS",
+ "Filter": "edge=1,filter_band3=40",
+ "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_4000mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/virtual-memory.json b/tools/perf/pmu-events/arch/x86/ivytown/virtual-memory.json
new file mode 100644
index 000000000..4645e9d3f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/ivytown/virtual-memory.json
@@ -0,0 +1,198 @@
+[
+ {
+ "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size from demand loads.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "DTLB_LOAD_MISSES.DEMAND_LD_WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Misses in all TLB levels that caused page walk completed of any size by demand loads.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "DTLB_LOAD_MISSES.DEMAND_LD_WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycle PMH is busy with a walk due to demand loads.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "DTLB_LOAD_MISSES.LARGE_PAGE_WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Page walk for a large page completed for Demand load.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Miss in all TLB levels causes a page walk of any page size (4K/2M/4M/1G).",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Miss in all TLB levels causes a page walk that completes of any page size (4K/2M/4M/1G).",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles PMH is busy with this walk.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4F",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "EPT.WALK_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycle count for an Extended Page table walk. The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts load operations that missed 1st level DTLB but hit the 2nd level.",
+ "EventCode": "0x5F",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Misses in all ITLB levels that cause page walks.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Misses at all ITLB levels that cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Misses in all ITLB levels that cause completed page walks.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Misses in all ITLB levels that cause completed page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycle PMH is busy with a walk.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ITLB_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when PMH is busy with page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of cache load STLB hits. No page walk.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Completed page walks in ITLB due to STLB load misses for large pages.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "ITLB_MISSES.LARGE_PAGE_WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Completed page walks in ITLB due to STLB load misses for large pages",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of ITLB flushes, includes 4k/2M/4M pages.",
+ "EventCode": "0xAE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB.ITLB_FLUSH",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "DTLB flush attempts of the thread-specific entries.",
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Count number of STLB flush attempts.",
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "TLB_FLUSH.STLB_ANY",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "STLB flush attempts",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/cache.json b/tools/perf/pmu-events/arch/x86/jaketown/cache.json
new file mode 100644
index 000000000..f723e8f7b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/cache.json
@@ -0,0 +1,1290 @@
+[
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x11",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops that miss the STLB.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x12",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store uops that miss the STLB.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired load uops with locked access.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops that split across a cacheline boundary.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x42",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store uops that split across a cacheline boundary.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of load uops retired",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "All retired load uops.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of store uops retired.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "All retired store uops.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts retired load uops that hit in the last-level (L3) cache without snoops required.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_MISS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package). Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line. In this case, a snoop was required, and another L2 had the line in a non-modified state.",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package). Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line. In this case, a snoop was required, and another L2 had the line in a modified state, so the line had to be invalidated in that L2 cache and transferred to the requesting L2.",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Data from remote DRAM either Snoop not needed or Snoop Miss (RspI)",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts L1D data line replacements. Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier. ",
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D.REPLACEMENT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D data line replacements.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D.ALLOCATED_IN_M",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Allocated L1D data cache lines in M state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L1D.EVICTION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D data cache lines in M state evicted due to replacement.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L1D.ALL_M_REPLACEMENT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cache lines in M state evicted out of L1D due to Snoop HitM or dirty line replacement.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D miss oustandings duration in cycles.",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "CounterMask": "1",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when L1D is locked.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand Data Read requests sent to uncore.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cacheable and noncachaeble code read requests.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand and prefetch data reads.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cases when offcore requests buffer cannot take more entries for core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests that hit L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that hit L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that miss L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache misses when fetching instructions.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_RQSTS.PF_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from the L2 hardware prefetchers that hit L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_RQSTS.PF_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_STORE_LOCK_RQSTS.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that miss cache lines.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_STORE_LOCK_RQSTS.HIT_E",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that hit cache lines in E state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_STORE_LOCK_RQSTS.HIT_M",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that hit cache lines in M state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_STORE_LOCK_RQSTS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that access cache lines in any state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_L1D_WB_RQSTS.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_L1D_WB_RQSTS.HIT_S",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in S state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_L1D_WB_RQSTS.HIT_E",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in E state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_L1D_WB_RQSTS.HIT_M",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in M state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_L1D_WB_RQSTS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in any state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_TRANS.DEMAND_DATA_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests that access L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_TRANS.RFO",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that access L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_TRANS.CODE_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache accesses when fetching instructions.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_TRANS.ALL_PF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 or LLC HW prefetches that access L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_TRANS.L1D_WB",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L1D writebacks that access L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_TRANS.L2_FILL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 fill requests that access L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_TRANS.L2_WB",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 writebacks that access L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_TRANS.ALL_REQUESTS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Transactions accessing L2 pipe.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_IN.I",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in I state filling L2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_IN.S",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in S state filling L2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_IN.E",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in E state filling L2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of L2 cache lines brought into the L2 cache. Lines are filled into the L2 cache when there was an L2 miss.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "L2_LINES_IN.ALL",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines filling L2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Clean L2 cache lines evicted by demand.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Dirty L2 cache lines evicted by demand.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_OUT.PF_CLEAN",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Clean L2 cache lines evicted by L2 prefetch.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_LINES_OUT.PF_DIRTY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Dirty L2 cache lines evicted by L2 prefetch.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa",
+ "EventName": "L2_LINES_OUT.DIRTY_ALL",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Dirty L2 cache lines filling the L2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests missed LLC.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests that refer to LLC.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Split locks in SQ.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests to L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 code requests.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc0",
+ "EventName": "L2_RQSTS.ALL_PF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from L2 hardware prefetchers.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "L1D_BLOCKS.BANK_CONFLICT_CYCLES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles when dispatched loads are cancelled due to L1D bank conflicts with other load ports.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_C6",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+ "CounterMask": "1",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoop returned a clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch data reads that hit the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoop returned a clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoop returned a clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all writebacks from the core to the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoop returned a clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x803c8000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LRU_HINTS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts L2 hints sent to LLC to keep a line from being evicted out of the core caches",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x23ffc08000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.PORTIO_MMIO_UC",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10400",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts non-temporal stores",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data reads",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand rfo's",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x000105B3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch prefetch RFOs",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x000107F7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo references (demand & prefetch)",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/floating-point.json b/tools/perf/pmu-events/arch/x86/jaketown/floating-point.json
new file mode 100644
index 000000000..982eda487
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/floating-point.json
@@ -0,0 +1,138 @@
+[
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OTHER_ASSISTS.AVX_STORE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of GSSE memory assist for stores. GSSE microcode assist is being invoked whenever the hardware is unable to properly handle GSSE-256b operations.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_ASSIST.X87_OUTPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of X87 assists due to output value.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_ASSIST.X87_INPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of X87 assists due to input value.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "FP_ASSIST.SIMD_OUTPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of SIMD FP assists due to Output values.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_ASSIST.SIMD_INPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of SIMD FP assists due to input values.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_COMP_OPS_EXE.X87",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of FP Computational Uops Executed this cycle. The number of FADD, FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This event does not distinguish an FADD used in the middle of a transcendental flow from a s.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed double-precision uops issued this cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar single-precision uops issued this cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed single-precision uops issued this cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar double-precision uops issued this cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x11",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_FP_256.PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of GSSE-256 Computational FP single precision uops issued this cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x11",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_FP_256.PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of AVX-256 Computational FP double precision uops issued this cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1e",
+ "EventName": "FP_ASSIST.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles with any input/output SSE or FP assist.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/frontend.json b/tools/perf/pmu-events/arch/x86/jaketown/frontend.json
new file mode 100644
index 000000000..1b7b1dd36
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/frontend.json
@@ -0,0 +1,305 @@
+[
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ICACHE.HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes unchacheable accesses.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ICACHE.MISSES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Instruction cache, streaming buffer and victim cache misses.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "IDQ.EMPTY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "IDQ.MS_DSB_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "IDQ.MS_MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance. See the Intel? 64 and IA-32 Architectures Optimization Reference Manual for more information.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of uops not delivered to the back-end per cycle, per thread, when the back-end was not stalled. In the ideal case 4 uops can be delivered each cycle. The event counts the undelivered uops - so if 3 were delivered in one cycle, the counter would be incremented by 1 for that cycle (4 - 3). If the back-end is stalled, the count for this event is not incremented even when uops were not delivered, because the back-end would not have been able to accept them. This event is used in determining the front-end bound category of the top-down pipeline slots characterization.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled .",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xAB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DSB2MITE_SWITCHES.COUNT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the cycles attributed to a switch from the Decoded Stream Buffer (DSB), which holds decoded instructions, to the legacy decode pipeline. It excludes cycles when the back-end cannot accept new micro-ops. The penalty for these switches is potentially several cycles of instruction starvation, where no micro-ops are delivered to the back-end.",
+ "EventCode": "0xAB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xAC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DSB_FILL.OTHER_CANCEL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cases of cancelling valid DSB fill not because of exceeding way limit.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xAC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "DSB_FILL.EXCEED_DSB_LINES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "IDQ.MS_DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EdgeDetect": "1",
+ "EventName": "IDQ.MS_DSB_OCCUR",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_GE_1_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when 1 or more uops were delivered to the by the front end.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering 4 Uops.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering any Uop.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xAC",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa",
+ "EventName": "DSB_FILL.ALL_CANCEL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cases of cancelling valid Decode Stream Buffer (DSB) fill not because of exceeding way limit.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x9C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3c",
+ "EventName": "IDQ.MITE_ALL_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EdgeDetect": "1",
+ "EventName": "IDQ.MS_SWITCHES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
new file mode 100644
index 000000000..fd7d7c438
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -0,0 +1,140 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/memory.json b/tools/perf/pmu-events/arch/x86/jaketown/memory.json
new file mode 100644
index 000000000..27e636428
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/memory.json
@@ -0,0 +1,422 @@
+[
+ {
+ "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from memory disambiguation, external snoops, or cross SMT-HW-thread snoop (stores) hitting load buffers. Machine clears can have a significant performance impact if they are happening frequently.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Loads with latency value being above 4 .",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Loads with latency value being above 8.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Loads with latency value being above 16.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Loads with latency value being above 32.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2003",
+ "BriefDescription": "Loads with latency value being above 64.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "1009",
+ "BriefDescription": "Loads with latency value being above 128.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "503",
+ "BriefDescription": "Loads with latency value being above 256.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "101",
+ "BriefDescription": "Loads with latency value being above 512.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "Counter": "3",
+ "UMask": "0x2",
+ "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only. (Precise Event - PEBS).",
+ "PRECISE_STORE": "1",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MISALIGN_MEM_REF.LOADS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MISALIGN_MEM_REF.STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc20004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x600400004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss the LLC and the data returned from local dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x67f800004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss the LLC and the data returned from remote dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x87f820004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss the LLC and the data forwarded from remote cache",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x107fc00004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that miss the LLC the data is found in M state in remote cache and forwarded from there",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x67fc00001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from remote & local dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc20001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x600400001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from local dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x67f800001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from remote dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x87f820001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the LLC and the data forwarded from remote cache",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x107fc00001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the LLC the data is found in M state in remote cache and forwarded from there",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc20040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that miss the LLC and the data returned from remote & local dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x67fc00010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from remote & local dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc20010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x600400010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from local dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x67f800010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from remote dram",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x87f820010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data forwarded from remote cache",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x107fc00010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC the data is found in M state in remote cache and forwarded from there",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc20200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the LLC",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fffc20080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x600400077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DEMAND_MLC_PREF_READS.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all local dram accesses for all demand and L2 prefetches. LLC prefetches are excluded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3FFFC20077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DEMAND_MLC_PREF_READS.LLC_MISS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts all LLC misses for all demand and L2 prefetches. LLC prefetches are excluded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x187FC20077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DEMAND_MLC_PREF_READS.LLC_MISS.REMOTE_HITM_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts all remote cache-to-cache transfers (includes HITM and HIT-Forward) for all demand and L2 prefetches. LLC prefetches are excluded.",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/other.json b/tools/perf/pmu-events/arch/x86/jaketown/other.json
new file mode 100644
index 000000000..64b195b82
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/other.json
@@ -0,0 +1,58 @@
+[
+ {
+ "EventCode": "0x17",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INSTS_WRITTEN_TO_IQ.INSTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Valid instructions written to IQ per cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPL_CYCLES.RING0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Unhalted core cycles when the thread is in ring 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "CPL_CYCLES.RING0_TRANS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Number of intervals between processor halts while thread is in ring 0.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPL_CYCLES.RING123",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "HW_PRE_REQ.DL1_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Hardware Prefetch requests that miss the L1D cache. This accounts for both L1 streamer and IP-based (IPP) HW prefetchers. A request is being counted each time it access the cache & miss it, including if a block is applicable or if hit the Fill Buffer for .",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json b/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json
new file mode 100644
index 000000000..8a597e45e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/pipeline.json
@@ -0,0 +1,1220 @@
+[
+ {
+ "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. ",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired from execution.",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when the thread is not in halt state.",
+ "CounterHTOff": "Fixed counter 2"
+ },
+ {
+ "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 3",
+ "UMask": "0x3",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "CounterHTOff": "Fixed counter 3"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not taken macro-conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired macro-conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x90",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired direct near calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa0",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc1",
+ "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired macro-conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc2",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc4",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired indirect branches excluding calls and returns.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc8",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired indirect return branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xd0",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired direct near calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x90",
+ "EventName": "BR_MISP_EXEC.TAKEN_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted direct near calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa0",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc1",
+ "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc4",
+ "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Mispredicted indirect branches excluding calls and returns.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xd0",
+ "EventName": "BR_MISP_EXEC.ALL_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted direct near calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Thread cycles when thread is not in halt state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ILD_STALL.LCP",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ILD_STALL.IQ_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Stall cycles because IQ is full.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "INT_MISC.RAT_STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x59",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Increments the number of flags-merge uops in flight each cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles with at least one slow LEA uop being allocated. A uop is generally considered as slow LEA if it has three sources (for example, two sources and immediate) regardless of whether it is a result of LEA instruction or not. Examples of the slow LEA uop are or uops with base, index, and offset source operands using base and index reqisters, where base is EBR/RBP/R13, using RIP relative or 16-bit addressing modes. See the Intel? 64 and IA-32 Architectures Optimization Reference Manual for more details about slow LEA instructions.",
+ "EventCode": "0x59",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with at least one slow LEA uop being allocated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x59",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "PARTIAL_RAT_STALLS.MUL_SINGLE_UOP",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Multiply packed/scalar single precision uops allocated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource-related stall cycles.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RESOURCE_STALLS.LB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the cycles of stall due to lack of load buffers.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RESOURCE_STALLS.RS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RESOURCE_STALLS.SB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RESOURCE_STALLS.ROB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to re-order buffer full.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5B",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "RESOURCE_STALLS2.BOB_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Allocator is stalled if BOB is full and new branch needs it.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of Uops issued by the front-end of the pipeilne to the back-end.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x0E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x5E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count cases of saving new LBR.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear. Machine clears can have a significant performance impact if they are happening frequently.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MACHINE_CLEARS.MASKMOV",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of micro-ops retired.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Actually retired uops.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of retirement slots used each cycle. There are potentially 4 slots that can be used each cycle - meaning, 4 micro-ops or 4 instructions could retire each cycle. This event is used in determining the 'Retiring' category of the Top-Down pipeline slots characterization.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retirement slots used.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "CounterMask": "10",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Conditional branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Direct and indirect near call instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Return instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Not taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Far branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS).",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Direct and indirect mispredicted near call instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All mispredicted macro branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_MISP_RETIRED.NOT_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted not taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_MISP_RETIRED.TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS).",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OTHER_ASSISTS.ITLB_MISS_RETIRED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired instructions experiencing ITLB misses.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.FPU_DIV_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when divider is busy executing divide operations.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of the divide operations executed.",
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "ARITH.FPU_DIV",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Divide operations executed.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DISPATCHED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops dispatched per thread.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DISPATCHED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops dispatched from any thread.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 1.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 4.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 5.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_NO_DISPATCH",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Each cycle there was no dispatch for this thread, increment by 1. Note this is connect to Umask 2. No dispatch can be deduced from the UOPS_EXECUTED event.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0x2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Each cycle there was a miss-pending demand load this thread, increment by 1. Note this is in DCU and connected to Umask 1. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.",
+ "CounterMask": "2",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Each cycle there was a MLC-miss pending demand load this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0x6",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Each cycle there was a miss-pending demand load this thread and no uops dispatched, increment by 1. Note this is in DCU and connected to Umask 1 and 2. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.",
+ "CounterMask": "6",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Each cycle there was a MLC-miss pending demand load and no uops dispatched on this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0 and 2.",
+ "CounterMask": "5",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x4C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOAD_HIT_PRE.SW_PF",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_HIT_PRE.HW_PF",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Loads delayed due to SB blocks, preceding store operations with known addresses but unknown data.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceeding smaller uncompleted store. See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual. The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "LD_BLOCKS.ALL_BLOCK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of cases where any load ends up with a valid block-code written to the load buffer (including blocks due to Memory Order Buffer (MOB), Data Cache Unit (DCU), TLB, but load has no DCU miss).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Aliasing occurs when a load is issued after a store and their memory addresses are offset by 4K. This event counts the number of loads that aliased with a preceding store, resulting in an extended address check in the pipeline. The enhanced address check typically has a performance penalty of 5 cycles.",
+ "EventCode": "0x07",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "False dependencies in MOB due to partial compare.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x07",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "LD_BLOCKS_PARTIAL.ALL_STA_BLOCK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts the number of times that load operations are temporarily blocked because of older stores, with addresses that are not yet known. A load operation may incur more than one block of this type.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "AGU_BYPASS_CANCEL.COUNT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts executed load operations with all the following traits: 1. addressing of the format [base + offset], 2. the offset is between 1 and 2047, 3. the address specified in the base register is in one page and the address [base+offset] is in an.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 1.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 4.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 5.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC0",
+ "Counter": "1",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired. (Precise Event - PEBS).",
+ "TakenAlone": "1",
+ "CounterHTOff": "1"
+ },
+ {
+ "EventCode": "0x5B",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "RESOURCE_STALLS2.ALL_PRF_CONTROL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource stalls2 control structures full for physical registers.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5B",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "RESOURCE_STALLS2.ALL_FL_EMPTY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with either free list is empty.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "EventName": "RESOURCE_STALLS.MEM_RS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource stalls due to memory buffers or Reservation Station (RS) being fully utilized.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf0",
+ "EventName": "RESOURCE_STALLS.OOO_RSRC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource stalls due to Rob being full, FCSW, MXCSR and OTHER.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5B",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "RESOURCE_STALLS2.OOO_RSRC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource stalls out of order resources full.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa",
+ "EventName": "RESOURCE_STALLS.LB_SB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource stalls due to load or store buffers all being in use.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles spent executing performance-sensitive flags-merging uops. For example, shift CL (merge_arith_flags). For more details, See the Intel? 64 and IA-32 Architectures Optimization Reference Manual.",
+ "EventCode": "0x59",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Performance sensitive flags-merging uops added by Sandy Bridge u-arch.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EdgeDetect": "1",
+ "EventName": "INT_MISC.RECOVERY_STALLS_COUNT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "EventName": "BACLEARS.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x2",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "CounterHTOff": "Fixed counter 2"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "AnyThread": "1",
+ "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json
new file mode 100644
index 000000000..3fa61d962
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-cache.json
@@ -0,0 +1,210 @@
+[
+ {
+ "BriefDescription": "Uncore cache clock ticks",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_C_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "All LLC Misses (code+ data rd + data wr - including demand and prefetch)",
+ "Counter": "0,1",
+ "EventCode": "0x34",
+ "EventName": "UNC_C_LLC_LOOKUP.ANY",
+ "Filter": "filter_state=0x1",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x11",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "M line evictions from LLC (writebacks to memory)",
+ "Counter": "0,1",
+ "EventCode": "0x37",
+ "EventName": "UNC_C_LLC_VICTIMS.M_STATE",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses - demand and prefetch data reads - excludes LLC prefetches. Derived from unc_c_tor_inserts.miss_opcode.demand",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.DATA_READ",
+ "Filter": "filter_opc=0x182",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses - Uncacheable reads. Derived from unc_c_tor_inserts.miss_opcode.uncacheable",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.UNCACHEABLE",
+ "Filter": "filter_opc=0x187",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe allocating writes that miss LLC - DDIO misses. Derived from unc_c_tor_inserts.miss_opcode.ddio_miss",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.PCIE_WRITE",
+ "Filter": "filter_opc=0x19c",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "LLC misses for ItoM writes (as part of fast string memcpy stores). Derived from unc_c_tor_inserts.miss_opcode.itom_write",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.ITOM_WRITE",
+ "Filter": "filter_opc=0x1c8",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Streaming stores (full cache line). Derived from unc_c_tor_inserts.opcode.streaming_full",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.STREAMING_FULL",
+ "Filter": "filter_opc=0x18c",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Streaming stores (partial cache line). Derived from unc_c_tor_inserts.opcode.streaming_partial",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
+ "Filter": "filter_opc=0x18d",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Partial PCIe reads. Derived from unc_c_tor_inserts.opcode.pcie_partial",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_PARTIAL_READ",
+ "Filter": "filter_opc=0x195",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe allocating writes that hit in LLC (DDIO hits). Derived from unc_c_tor_inserts.opcode.ddio_hit",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_WRITE",
+ "Filter": "filter_opc=0x19c",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe read current. Derived from unc_c_tor_inserts.opcode.pcie_read_current",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_READ",
+ "Filter": "filter_opc=0x19e",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "ItoM write hits (as part of fast string memcpy stores). Derived from unc_c_tor_inserts.opcode.itom_write_hit",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.ITOM_WRITE",
+ "Filter": "filter_opc=0x1c8",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe non-snoop reads. Derived from unc_c_tor_inserts.opcode.pcie_read",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_NS_READ",
+ "Filter": "filter_opc=0x1e4",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe non-snoop writes (partial). Derived from unc_c_tor_inserts.opcode.pcie_partial_write",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_NS_PARTIAL_WRITE",
+ "Filter": "filter_opc=0x1e5",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "PCIe non-snoop writes (full line). Derived from unc_c_tor_inserts.opcode.pcie_full_write",
+ "Counter": "0,1",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.PCIE_NS_WRITE",
+ "Filter": "filter_opc=0x1e6",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x1",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Occupancy counter for all LLC misses; we divide this by UNC_C_CLOCKTICKS to get average Q depth",
+ "EventCode": "0x36",
+ "EventName": "UNC_C_TOR_OCCUPANCY.MISS_ALL",
+ "Filter": "filter_opc=0x182",
+ "MetricExpr": "(UNC_C_TOR_OCCUPANCY.MISS_ALL / UNC_C_CLOCKTICKS) * 100.",
+ "MetricName": "tor_occupancy.miss_all %",
+ "PerPkg": "1",
+ "UMask": "0xa",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "Occupancy counter for LLC data reads (demand and L2 prefetch). Derived from unc_c_tor_occupancy.miss_opcode.llc_data_read",
+ "EventCode": "0x36",
+ "EventName": "UNC_C_TOR_OCCUPANCY.LLC_DATA_READ",
+ "PerPkg": "1",
+ "UMask": "0x3",
+ "Unit": "CBO"
+ },
+ {
+ "BriefDescription": "read requests to home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.READS",
+ "PerPkg": "1",
+ "UMask": "0x3",
+ "Unit": "HA"
+ },
+ {
+ "BriefDescription": "write requests to home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_H_REQUESTS.WRITES",
+ "PerPkg": "1",
+ "UMask": "0xc",
+ "Unit": "HA"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json
new file mode 100644
index 000000000..1b53c0e60
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-interconnect.json
@@ -0,0 +1,48 @@
+[
+ {
+ "BriefDescription": "QPI clock ticks. Used to get percentages of QPI cycles events",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x14",
+ "EventName": "UNC_Q_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "QPI LL"
+ },
+ {
+ "BriefDescription": "Cycles where receiving QPI link is in half-width mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x10",
+ "EventName": "UNC_Q_RxL0P_POWER_CYCLES",
+ "MetricExpr": "(UNC_Q_RxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.",
+ "MetricName": "rxl0p_power_cycles %",
+ "PerPkg": "1",
+ "Unit": "QPI LL"
+ },
+ {
+ "BriefDescription": "Cycles where transmitting QPI link is in half-width mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd",
+ "EventName": "UNC_Q_TxL0P_POWER_CYCLES",
+ "MetricExpr": "(UNC_Q_TxL0P_POWER_CYCLES / UNC_Q_CLOCKTICKS) * 100.",
+ "MetricName": "txl0p_power_cycles %",
+ "PerPkg": "1",
+ "Unit": "QPI LL"
+ },
+ {
+ "BriefDescription": "Number of data flits transmitted ",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_Q_TxL_FLITS_G0.DATA",
+ "PerPkg": "1",
+ "ScaleUnit": "8Bytes",
+ "UMask": "0x2",
+ "Unit": "QPI LL"
+ },
+ {
+ "BriefDescription": "Number of non data (control) flits transmitted ",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_Q_TxL_FLITS_G0.NON_DATA",
+ "PerPkg": "1",
+ "ScaleUnit": "8Bytes",
+ "UMask": "0x4",
+ "Unit": "QPI LL"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json
new file mode 100644
index 000000000..8551cebeb
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-memory.json
@@ -0,0 +1,82 @@
+[
+ {
+ "BriefDescription": "Memory page activates",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_M_ACT_COUNT",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_READ",
+ "PerPkg": "1",
+ "UMask": "0x3",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_WRITE",
+ "PerPkg": "1",
+ "UMask": "0xc",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Memory controller clock ticks. Used to get percentages of memory controller cycles events",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_M_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x85",
+ "EventName": "UNC_M_POWER_CHANNEL_PPD",
+ "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_channel_ppd %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles all ranks are in critical thermal throttle",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x86",
+ "EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES",
+ "MetricExpr": "(UNC_M_POWER_CRITICAL_THROTTLE_CYCLES / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_critical_throttle_cycles %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles Memory is in self refresh power mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x43",
+ "EventName": "UNC_M_POWER_SELF_REFRESH",
+ "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_self_refresh %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Memory page conflicts",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Occupancy counter for memory read queue",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_M_RPQ_OCCUPANCY",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
new file mode 100644
index 000000000..8755693d8
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/uncore-power.json
@@ -0,0 +1,273 @@
+[
+ {
+ "BriefDescription": "PCU clock ticks. Use to get percentages of PCU cycles events",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_P_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xb",
+ "EventName": "UNC_P_FREQ_BAND0_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band0_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xc",
+ "EventName": "UNC_P_FREQ_BAND1_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band1_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd",
+ "EventName": "UNC_P_FREQ_BAND2_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band2_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xe",
+ "EventName": "UNC_P_FREQ_BAND3_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band3_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of times that the uncore transitioned a frequency greater than or equal to the frequency that is configured in the filter. (filter_band0=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band0_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xb",
+ "EventName": "UNC_P_FREQ_BAND0_TRANSITIONS",
+ "Filter": "edge=1",
+ "MetricExpr": "(UNC_P_FREQ_BAND0_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band0_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of times that the uncore transistioned to a frequency greater than or equal to the frequency that is configured in the filter. (filter_band1=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band1_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xc",
+ "EventName": "UNC_P_FREQ_BAND1_TRANSITIONS",
+ "Filter": "edge=1",
+ "MetricExpr": "(UNC_P_FREQ_BAND1_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band1_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to the frequency that is configured in the filter. (filter_band2=XXX with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band2_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd",
+ "EventName": "UNC_P_FREQ_BAND2_TRANSITIONS",
+ "Filter": "edge=1",
+ "MetricExpr": "(UNC_P_FREQ_BAND2_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band2_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to the frequency that is configured in the filter. (filter_band3=XXX, with XXX in 100Mhz units). One can also use inversion (filter_inv=1) to track cycles when we were less than the configured frequency. Derived from unc_p_freq_band3_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xe",
+ "EventName": "UNC_P_FREQ_BAND3_TRANSITIONS",
+ "Filter": "edge=1",
+ "MetricExpr": "(UNC_P_FREQ_BAND3_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_band3_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C0. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C0",
+ "Filter": "occ_sel=1",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C0 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c0 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C3. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events and occupancy triggering to capture other details",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C3",
+ "Filter": "occ_sel=2",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C3 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c3 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "This is an occupancy event that tracks the number of cores that are in C6. It can be used by itself to get the average number of cores in C0, with threshholding to generate histograms, or with other PCU events ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_P_POWER_STATE_OCCUPANCY.CORES_C6",
+ "Filter": "occ_sel=3",
+ "MetricExpr": "(UNC_P_POWER_STATE_OCCUPANCY.CORES_C6 / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "power_state_occupancy.cores_c6 %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that we are in external PROCHOT mode. This mode is triggered when a sensor off the die determines that something off-die (like DRAM) is too hot and must throttle to avoid damaging the chip",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xa",
+ "EventName": "UNC_P_PROCHOT_EXTERNAL_CYCLES",
+ "MetricExpr": "(UNC_P_PROCHOT_EXTERNAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "prochot_external_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when temperature is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_LIMIT_THERMAL_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_limit_thermal_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when the OS is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x6",
+ "EventName": "UNC_P_FREQ_MAX_OS_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_OS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_os_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when power is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5",
+ "EventName": "UNC_P_FREQ_MAX_POWER_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_POWER_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_power_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles when current is the upper limit on frequency",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x7",
+ "EventName": "UNC_P_FREQ_MAX_CURRENT_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_MAX_CURRENT_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_max_current_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Cycles spent changing Frequency",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_P_FREQ_TRANS_CYCLES",
+ "MetricExpr": "(UNC_P_FREQ_TRANS_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_trans_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 1.2Ghz. Derived from unc_p_freq_band0_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xb",
+ "EventName": "UNC_P_FREQ_GE_1200MHZ_CYCLES",
+ "Filter": "filter_band0=12",
+ "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_1200mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 2Ghz. Derived from unc_p_freq_band1_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xc",
+ "EventName": "UNC_P_FREQ_GE_2000MHZ_CYCLES",
+ "Filter": "filter_band1=20",
+ "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_2000mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 3Ghz. Derived from unc_p_freq_band2_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd",
+ "EventName": "UNC_P_FREQ_GE_3000MHZ_CYCLES",
+ "Filter": "filter_band2=30",
+ "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_3000mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore was running at a frequency greater than or equal to 4Ghz. Derived from unc_p_freq_band3_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xe",
+ "EventName": "UNC_P_FREQ_GE_4000MHZ_CYCLES",
+ "Filter": "filter_band3=40",
+ "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_4000mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of times that the uncore transitioned to a frequency greater than or equal to 1.2Ghz. Derived from unc_p_freq_band0_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xb",
+ "EventName": "UNC_P_FREQ_GE_1200MHZ_TRANSITIONS",
+ "Filter": "edge=1,filter_band0=12",
+ "MetricExpr": "(UNC_P_FREQ_GE_1200MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_1200mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of times that the uncore transitioned to a frequency greater than or equal to 2Ghz. Derived from unc_p_freq_band1_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xc",
+ "EventName": "UNC_P_FREQ_GE_2000MHZ_TRANSITIONS",
+ "Filter": "edge=1,filter_band1=20",
+ "MetricExpr": "(UNC_P_FREQ_GE_2000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_2000mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to 3Ghz. Derived from unc_p_freq_band2_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd",
+ "EventName": "UNC_P_FREQ_GE_3000MHZ_TRANSITIONS",
+ "Filter": "edge=1,filter_band2=30",
+ "MetricExpr": "(UNC_P_FREQ_GE_3000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_3000mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the uncore transitioned to a frequency greater than or equal to 4Ghz. Derived from unc_p_freq_band3_cycles",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xe",
+ "EventName": "UNC_P_FREQ_GE_4000MHZ_TRANSITIONS",
+ "Filter": "edge=1,filter_band3=40",
+ "MetricExpr": "(UNC_P_FREQ_GE_4000MHZ_CYCLES / UNC_P_CLOCKTICKS) * 100.",
+ "MetricName": "freq_ge_4000mhz_cycles %",
+ "PerPkg": "1",
+ "Unit": "PCU"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/virtual-memory.json b/tools/perf/pmu-events/arch/x86/jaketown/virtual-memory.json
new file mode 100644
index 000000000..a654ab771
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/jaketown/virtual-memory.json
@@ -0,0 +1,149 @@
+[
+ {
+ "EventCode": "0xAE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB.ITLB_FLUSH",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4F",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "EPT.WALK_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycle count for an Extended Page table walk. The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Misses at all ITLB levels that cause page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Misses in all ITLB levels that cause completed page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event count cycles when Page Miss Handler (PMH) is servicing page walks caused by ITLB misses.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ITLB_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when PMH is busy with page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Load misses in all DTLB levels that cause page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Load misses at all DTLB levels that cause completed page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by DTLB load misses.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when PMH is busy with page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts load operations that miss the first DTLB level but hit the second and do not cause any page walks. The penalty in this case is approximately 7 cycles.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when PMH is busy with page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "TLB_FLUSH.STLB_ANY",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "STLB flush attempts.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/cache.json b/tools/perf/pmu-events/arch/x86/knightslanding/cache.json
new file mode 100644
index 000000000..88ba5994b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/knightslanding/cache.json
@@ -0,0 +1,2305 @@
+[
+ {
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "L2_REQUESTS_REJECT.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of MEC requests from the L2Q that reference a cache line (cacheable requests) exlcuding SW prefetches filling only to L2 cache and L1 evictions (automatically exlcudes L2HWP, UC, WC) that were rejected - Multiple repeated rejects should be counted multiple times"
+ },
+ {
+ "EventCode": "0x31",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "CORE_REJECT_L2Q.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of MEC requests that were not accepted into the L2Q because of any L2 queue reject condition. There is no concept of at-ret here. It might include requests due to instructions in the speculative path."
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x4f",
+ "EventName": "L2_REQUESTS.REFERENCE",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the total number of L2 cache references."
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x41",
+ "EventName": "L2_REQUESTS.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of L2 cache misses"
+ },
+ {
+ "PublicDescription": "This event counts the number of core cycles the fetch stalls because of an icache miss. This is a cumulative count of cycles the NIP stalled for all icache misses. ",
+ "EventCode": "0x86",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of core cycles the fetch stalls because of an icache miss. This is a cummulative count of core cycles the fetch stalled for all icache misses. "
+ },
+ {
+ "PublicDescription": "This event counts the number of load micro-ops retired that miss in L1 Data cache. Note that prefetch misses will not be counted. ",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "MEM_UOPS_RETIRED.L1_MISS_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of load micro-ops retired that miss in L1 D cache"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "MEM_UOPS_RETIRED.L2_HIT_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of load micro-ops retired that hit in the L2",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "MEM_UOPS_RETIRED.L2_MISS_LOADS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of load micro-ops retired that miss in the L2",
+ "Data_LA": "1"
+ },
+ {
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "MEM_UOPS_RETIRED.UTLB_MISS_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of load micro-ops retired that caused micro TLB miss"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "MEM_UOPS_RETIRED.HITM",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the loads retired that get the data from the other core in the same tile in M state",
+ "Data_LA": "1"
+ },
+ {
+ "PublicDescription": "This event counts the number of load micro-ops retired.",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts all the load micro-ops retired"
+ },
+ {
+ "PublicDescription": "This event counts the number of store micro-ops retired.",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x80",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts all the store micro-ops retired"
+ },
+ {
+ "EventCode": "0xB7",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the matrix events specified by MSR_OFFCORE_RESPx"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000400070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800400070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000080070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800080070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x40000032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x10004032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x08004032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x10000832f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x08000832f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x00000132f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000400044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800400044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000080044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800080044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000400022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800400022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000080022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800080022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000003091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000403091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800403091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000083091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800083091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000013091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000008000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000408000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800408000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000088000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800088000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000018000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000014800 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts all streaming stores (WC and should be programmed on PMC1) that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000014000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.ANY_RESPONSE",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial streaming stores (WC and should be programmed on PMC1) that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000002000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000402000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800402000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000082000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800082000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000012000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000001000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000401000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800401000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000081000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800081000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000011000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010800 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Full streaming stores (WC and should be programmed on PMC1) that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000400400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800400400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000080400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800080400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000400200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800400200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000080200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800080200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000400100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800400100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000080100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800080100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.ANY_RESPONSE",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000400080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800400080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000080080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800080080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000400040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800400040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000080040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800080040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000400020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800400020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000080020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800080020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000020020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.SUPPLIER_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that provides no supplier details",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000400004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800400004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000080004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800080004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000400002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800400002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000080002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800080002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that are outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000400001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_FAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800400001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_FAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000080001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_NEAR_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0800080001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_NEAR_TILE_E_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for any response",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002000001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002000002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002000004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002000020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002000080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002000100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002000200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002000400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002001000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002002000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002008000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002003091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002000022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002000044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x00020032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0002000070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_THIS_TILE_M",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in M state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004000001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004000002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004000004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004000020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004000040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004000080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004000100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004000200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004000400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004001000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004002000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004008000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004003091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004000022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004000044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x00040032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0004000070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_THIS_TILE_E",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in E state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008000001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008000002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008000004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008000020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008000080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008000100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008000200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008000400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008001000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008002000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008008000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008003091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008000022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0008000044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x00080032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_S",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for responses which hit its own tile's L2 with data in S state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010000001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010000002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010000004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010000020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010000040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010000080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010000100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010000200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010000400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010001000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010002000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010008000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010003091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010000022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010000044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x00100032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0010000070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_THIS_TILE_F",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for responses which hit its own tile's L2 with data in F state ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800180002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800180004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800180020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800180040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800180080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800180100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800180200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800180400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800181000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800182000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800188000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800183091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800180022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800180044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x18001832f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800180070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_NEAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800400002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800400004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800400040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800400080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800400100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800400400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800401000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800402000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800408000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800403091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800400022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800400044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x18004032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1800400070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.L2_HIT_FAR_TILE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode.",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/frontend.json b/tools/perf/pmu-events/arch/x86/knightslanding/frontend.json
new file mode 100644
index 000000000..6d3863668
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/knightslanding/frontend.json
@@ -0,0 +1,34 @@
+[
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1",
+ "UMask": "0x3",
+ "EventName": "ICACHE.ACCESSES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts all instruction fetches, including uncacheable fetches."
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "ICACHE.HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts all instruction fetches that hit the instruction cache."
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "ICACHE.MISSES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts all instruction fetches that miss the instruction cache or produce memory requests. An instruction fetch miss is counted only once and not once for every cycle it is outstanding."
+ },
+ {
+ "EventCode": "0xE7",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "MS_DECODED.MS_ENTRY",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of times the MSROM starts a flow of uops."
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/memory.json b/tools/perf/pmu-events/arch/x86/knightslanding/memory.json
new file mode 100644
index 000000000..700652566
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/knightslanding/memory.json
@@ -0,0 +1,1110 @@
+[
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of times the machine clears due to memory ordering hazards"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100400070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080200070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101000070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080800070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x01004032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x00802032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x01010032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x00808032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100400044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080200044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101000044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080800044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100400022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080200022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101000022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080800022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100403091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080203091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101003091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080803091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100408000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080208000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101008000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080808000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100402000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080202000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101002000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080802000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100401000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080201000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101001000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080801000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100400400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080200400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101000400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080800400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100400200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080200200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101000200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080800200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100400100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.MCDRAM_FAR",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080200100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.MCDRAM_NEAR",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101000100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.DDR_FAR",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080800100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.DDR_NEAR",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000020080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses from any NON_DRAM system address. This includes MMIO transactions",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100400080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080200080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101000080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080800080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100400040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080200040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101000040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080800040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2000020020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.NON_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from any NON_DRAM system address. This includes MMIO transactions",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100400020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080200020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101000020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080800020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100400004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080200004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101000004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080800004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100400002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080200002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101000002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080800002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0100400001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.MCDRAM_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from MCDRAM Far or Other tile L2 hit far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080200001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.MCDRAM_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from MCDRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0101000001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.DDR_FAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from DRAM Far. ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080800001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.DDR_NEAR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for data responses from DRAM Local.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180600001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180600002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180600004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180600020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180600080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180600100 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.MCDRAM",
+ "MSRIndex": "0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial writes (UC or WT or WP and should be programmed on PMC1) that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180600200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180600400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180601000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180608000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180603091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180600022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180600044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x01806032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0180600070 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_PF_L2.MCDRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Prefetch requests that accounts for responses from MCDRAM (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181800001 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand cacheable data and L1 prefetch data reads that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181800002 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data writes that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181800004 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand code reads and prefetch code reads that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181800020 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 data RFO prefetches (includes PREFETCHW instruction) that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181800040 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L2 code HW prefetches that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181800080 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Partial reads (UC or WC and is valid only for Outstanding response type). that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181800200 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.UC_CODE_READS.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts UC code reads (valid only for Outstanding response type) that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181800400 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Bus locks and split lock requests that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181801000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_SOFTWARE.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Software Prefetches that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181802000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts L1 data HW prefetches that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181808000 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181803091 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data and L1 prefetch data read requests that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181800022 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand cacheable data write requests that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0181800044 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts Demand code reads and prefetch code read requests that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x01818032f7 ",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_READ.DDR",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any Read request that accounts for responses from DDR (local and far)",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/pipeline.json b/tools/perf/pmu-events/arch/x86/knightslanding/pipeline.json
new file mode 100644
index 000000000..bb5494cfb
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/knightslanding/pipeline.json
@@ -0,0 +1,435 @@
+[
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0x7e",
+ "EventName": "BR_INST_RETIRED.JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of branch instructions retired that were conditional jumps."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xfe",
+ "EventName": "BR_INST_RETIRED.TAKEN_JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of branch instructions retired that were conditional jumps and predicted taken."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xf9",
+ "EventName": "BR_INST_RETIRED.CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of near CALL branch instructions retired."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xfd",
+ "EventName": "BR_INST_RETIRED.REL_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of near relative CALL branch instructions retired."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xfb",
+ "EventName": "BR_INST_RETIRED.IND_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of near indirect CALL branch instructions retired."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xf7",
+ "EventName": "BR_INST_RETIRED.RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of near RET branch instructions retired."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xeb",
+ "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of branch instructions retired that were near indirect CALL or near indirect JMP."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xbf",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of far branch instructions retired."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0x7e",
+ "EventName": "BR_MISP_RETIRED.JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted branch instructions retired that were conditional jumps."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0xfe",
+ "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted branch instructions retired that were conditional jumps and predicted taken."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0xfb",
+ "EventName": "BR_MISP_RETIRED.IND_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0xf7",
+ "EventName": "BR_MISP_RETIRED.RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0xeb",
+ "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted branch instructions retired that were near indirect CALL or near indirect JMP."
+ },
+ {
+ "PublicDescription": "This event counts the number of micro-ops retired that were supplied from MSROM.",
+ "EventCode": "0xC2",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.MS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of micro-ops retired that are from the complex flows issued by the micro-sequencer (MS)."
+ },
+ {
+ "PublicDescription": "This event counts the number of micro-ops (uops) retired. The processor decodes complex macro instructions into a sequence of simpler uops. Most instructions are composed of one or two uops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists. ",
+ "EventCode": "0xC2",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "UOPS_RETIRED.ALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of micro-ops retired"
+ },
+ {
+ "PublicDescription": "This event counts the number of scalar SSE, AVX, AVX2, AVX-512 micro-ops retired (floating point, integer and store) except for loads (memory-to-register mov-type micro ops), division, sqrt.",
+ "EventCode": "0xC2",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "UOPS_RETIRED.SCALAR_SIMD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of scalar SSE, AVX, AVX2, AVX-512 micro-ops retired. More specifically, it counts scalar SSE, AVX, AVX2, AVX-512 micro-ops except for loads (memory-to-register mov-type micro ops), division, sqrt."
+ },
+ {
+ "PublicDescription": "This event counts the number of packed vector SSE, AVX, AVX2, and AVX-512 micro-ops retired (floating point, integer and store) except for loads (memory-to-register mov-type micro-ops), packed byte and word multiplies.",
+ "EventCode": "0xC2",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "UOPS_RETIRED.PACKED_SIMD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of vector SSE, AVX, AVX2, AVX-512 micro-ops retired. More specifically, it counts packed SSE, AVX, AVX2, AVX-512 micro-ops (both floating point and integer) except for loads (memory-to-register mov-type micro-ops), packed byte and word multiplies."
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of times that the machine clears due to program modifying data within 1K of a recently fetched code page"
+ },
+ {
+ "PublicDescription": "This event counts the number of times that the pipeline stalled due to FP operations needing assists.",
+ "EventCode": "0xC3",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.FP_ASSIST",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of floating operations retired that required microcode assists"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "MACHINE_CLEARS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts all nukes"
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "NO_ALLOC_CYCLES.ROB_FULL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of core cycles when no micro-ops are allocated and the ROB is full"
+ },
+ {
+ "PublicDescription": "This event counts the number of core cycles when no uops are allocated and the alloc pipe is stalled waiting for a mispredicted branch to retire.",
+ "EventCode": "0xCA",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "NO_ALLOC_CYCLES.MISPREDICTS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of core cycles when no micro-ops are allocated and the alloc pipe is stalled waiting for a mispredicted branch to retire."
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "NO_ALLOC_CYCLES.RAT_STALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of core cycles when no micro-ops are allocated and a RATstall (caused by reservation station full) is asserted. "
+ },
+ {
+ "PublicDescription": "This event counts the number of core cycles when no uops are allocated, the instruction queue is empty and the alloc pipe is stalled waiting for instructions to be fetched.",
+ "EventCode": "0xCA",
+ "Counter": "0,1",
+ "UMask": "0x90",
+ "EventName": "NO_ALLOC_CYCLES.NOT_DELIVERED",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of core cycles when no micro-ops are allocated, the IQ is empty, and no other condition is blocking allocation."
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1",
+ "UMask": "0x7f",
+ "EventName": "NO_ALLOC_CYCLES.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the total number of core cycles when no micro-ops are allocated for any reason."
+ },
+ {
+ "EventCode": "0xCB",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "RS_FULL_STALL.MEC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of core cycles when allocation pipeline is stalled and is waiting for a free MEC reservation station entry."
+ },
+ {
+ "EventCode": "0xCB",
+ "Counter": "0,1",
+ "UMask": "0x1f",
+ "EventName": "RS_FULL_STALL.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the total number of core cycles the Alloc pipeline is stalled when any one of the reservation stations is full. "
+ },
+ {
+ "EventCode": "0xC0",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the total number of instructions retired"
+ },
+ {
+ "PublicDescription": "This event counts cycles when the divider is busy. More specifically cycles when the divide unit is unable to accept a new divide uop because it is busy processing a previously dispatched uop. The cycles will be counted irrespective of whether or not another divide uop is waiting to enter the divide unit (from the RS). This event counts integer divides, x87 divides, divss, divsd, sqrtss, sqrtsd event and does not count vector divides.",
+ "EventCode": "0xCD",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "CYCLES_DIV_BUSY.ALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles the number of core cycles when divider is busy. Does not imply a stall waiting for the divider. "
+ },
+ {
+ "PublicDescription": "This event counts the number of instructions that retire. For instructions that consist of multiple micro-ops, this event counts exactly once, as the last micro-op of the instruction retires. The event continues counting while instructions retire, including during interrupt service routines caused by hardware interrupts, faults or traps.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Fixed Counter: Counts the number of instructions retired"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of unhalted core clock cycles"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of unhalted reference clock cycles"
+ },
+ {
+ "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter\r\n",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles"
+ },
+ {
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 3",
+ "UMask": "0x3",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Fixed Counter: Counts the number of unhalted reference clock cycles"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "BACLEARS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of times the front end resteers for any branch as a result of another branch handling mechanism in the front end."
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "BACLEARS.RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of times the front end resteers for RET branches as a result of another branch handling mechanism in the front end."
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "BACLEARS.COND",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of times the front end resteers for conditional branches as a result of another branch handling mechanism in the front end."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "RECYCLEQ.LD_BLOCK_ST_FORWARD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of occurences a retired load gets blocked because its address partially overlaps with a store ",
+ "Data_LA": "1"
+ },
+ {
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "RECYCLEQ.LD_BLOCK_STD_NOTREADY",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of occurences a retired load gets blocked because its address overlaps with a store whose data is not ready"
+ },
+ {
+ "PublicDescription": "This event counts the number of retired store that experienced a cache line boundary split(Precise Event). Note that each spilt should be counted only once.",
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "RECYCLEQ.ST_SPLITS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of occurences a retired store that is a cache line split. Each split should be counted only once."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "RECYCLEQ.LD_SPLITS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of occurences a retired load that is a cache line split. Each split should be counted only once.",
+ "Data_LA": "1"
+ },
+ {
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "RECYCLEQ.LOCK",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts all the retired locked loads. It does not include stores because we would double count if we count stores"
+ },
+ {
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "RECYCLEQ.STA_FULL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the store micro-ops retired that were pushed in the rehad queue because the store address buffer is full"
+ },
+ {
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "RECYCLEQ.ANY_LD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts any retired load that was pushed into the recycle queue for any reason."
+ },
+ {
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x80",
+ "EventName": "RECYCLEQ.ANY_ST",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts any retired store that was pushed into the recycle queue for any reason."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0xf9",
+ "EventName": "BR_MISP_RETIRED.CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted near CALL branch instructions retired."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0xfd",
+ "EventName": "BR_MISP_RETIRED.REL_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted near relative CALL branch instructions retired."
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0xbf",
+ "EventName": "BR_MISP_RETIRED.FAR_BRANCH",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted far branch instructions retired."
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/uncore-memory.json b/tools/perf/pmu-events/arch/x86/knightslanding/uncore-memory.json
new file mode 100644
index 000000000..e3bcd86c4
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/knightslanding/uncore-memory.json
@@ -0,0 +1,42 @@
+[
+ {
+ "BriefDescription": "ddr bandwidth read (CPU traffic only) (MB/sec). ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x03",
+ "EventName": "UNC_M_CAS_COUNT.RD",
+ "PerPkg": "1",
+ "ScaleUnit": "6.4e-05MiB",
+ "UMask": "0x01",
+ "Unit": "imc"
+ },
+ {
+ "BriefDescription": "ddr bandwidth write (CPU traffic only) (MB/sec). ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x03",
+ "EventName": "UNC_M_CAS_COUNT.WR",
+ "PerPkg": "1",
+ "ScaleUnit": "6.4e-05MiB",
+ "UMask": "0x02",
+ "Unit": "imc"
+ },
+ {
+ "BriefDescription": "mcdram bandwidth read (CPU traffic only) (MB/sec). ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x01",
+ "EventName": "UNC_E_RPQ_INSERTS",
+ "PerPkg": "1",
+ "ScaleUnit": "6.4e-05MiB",
+ "UMask": "0x01",
+ "Unit": "edc_eclk"
+ },
+ {
+ "BriefDescription": "mcdram bandwidth write (CPU traffic only) (MB/sec). ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x02",
+ "EventName": "UNC_E_WPQ_INSERTS",
+ "PerPkg": "1",
+ "ScaleUnit": "6.4e-05MiB",
+ "UMask": "0x01",
+ "Unit": "edc_eclk"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/knightslanding/virtual-memory.json b/tools/perf/pmu-events/arch/x86/knightslanding/virtual-memory.json
new file mode 100644
index 000000000..f31594507
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/knightslanding/virtual-memory.json
@@ -0,0 +1,65 @@
+[
+ {
+ "PEBS": "1",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of load micro-ops retired that cause a DTLB miss",
+ "Data_LA": "1"
+ },
+ {
+ "EventCode": "0x05",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "PAGE_WALKS.D_SIDE_WALKS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the total D-side page walks that are completed or started. The page walks started in the speculative path will also be counted",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0x05",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "PAGE_WALKS.D_SIDE_CYCLES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the total number of core cycles for all the D-side page walks. The cycles for page walks started in speculative path will also be included."
+ },
+ {
+ "EventCode": "0x05",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "PAGE_WALKS.I_SIDE_WALKS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the total I-side page walks that are completed.",
+ "EdgeDetect": "1"
+ },
+ {
+ "PublicDescription": "This event counts every cycle when an I-side (walks due to an instruction fetch) page walk is in progress. ",
+ "EventCode": "0x05",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "PAGE_WALKS.I_SIDE_CYCLES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the total number of core cycles for all the I-side page walks. The cycles for page walks started in speculative path will also be included."
+ },
+ {
+ "EventCode": "0x05",
+ "Counter": "0,1",
+ "UMask": "0x3",
+ "EventName": "PAGE_WALKS.WALKS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the total page walks that are completed (I-side and D-side)",
+ "EdgeDetect": "1"
+ },
+ {
+ "PublicDescription": "This event counts every cycle when a data (D) page walk or instruction (I) page walk is in progress.",
+ "EventCode": "0x05",
+ "Counter": "0,1",
+ "UMask": "0x3",
+ "EventName": "PAGE_WALKS.CYCLES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the total number of core cycles for all the page walks. The cycles for page walks started in speculative path will also be included."
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
new file mode 100644
index 000000000..7e3cce3bc
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -0,0 +1,34 @@
+Family-model,Version,Filename,EventType
+GenuineIntel-6-56,v5,broadwellde,core
+GenuineIntel-6-3D,v17,broadwell,core
+GenuineIntel-6-47,v17,broadwell,core
+GenuineIntel-6-4F,v10,broadwellx,core
+GenuineIntel-6-1C,v4,bonnell,core
+GenuineIntel-6-26,v4,bonnell,core
+GenuineIntel-6-27,v4,bonnell,core
+GenuineIntel-6-36,v4,bonnell,core
+GenuineIntel-6-35,v4,bonnell,core
+GenuineIntel-6-5C,v8,goldmont,core
+GenuineIntel-6-7A,v1,goldmontplus,core
+GenuineIntel-6-3C,v24,haswell,core
+GenuineIntel-6-45,v24,haswell,core
+GenuineIntel-6-46,v24,haswell,core
+GenuineIntel-6-3F,v17,haswellx,core
+GenuineIntel-6-3A,v18,ivybridge,core
+GenuineIntel-6-3E,v19,ivytown,core
+GenuineIntel-6-2D,v20,jaketown,core
+GenuineIntel-6-57,v9,knightslanding,core
+GenuineIntel-6-85,v9,knightslanding,core
+GenuineIntel-6-1E,v2,nehalemep,core
+GenuineIntel-6-1F,v2,nehalemep,core
+GenuineIntel-6-1A,v2,nehalemep,core
+GenuineIntel-6-2E,v2,nehalemex,core
+GenuineIntel-6-[4589]E,v24,skylake,core
+GenuineIntel-6-37,v13,silvermont,core
+GenuineIntel-6-4D,v13,silvermont,core
+GenuineIntel-6-4C,v13,silvermont,core
+GenuineIntel-6-2A,v15,sandybridge,core
+GenuineIntel-6-2C,v2,westmereep-dp,core
+GenuineIntel-6-25,v2,westmereep-sp,core
+GenuineIntel-6-2F,v2,westmereex,core
+GenuineIntel-6-55,v1,skylakex,core
diff --git a/tools/perf/pmu-events/arch/x86/nehalemep/cache.json b/tools/perf/pmu-events/arch/x86/nehalemep/cache.json
new file mode 100644
index 000000000..a11029efd
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemep/cache.json
@@ -0,0 +1,3229 @@
+[
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "CACHE_LOCK_CYCLES.L1D",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles L1D locked"
+ },
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "CACHE_LOCK_CYCLES.L1D_L2",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles L1D and L2 locked"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D.M_EVICT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D cache lines replaced in M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D.M_REPL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D cache lines allocated in the M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "L1D.M_SNOOP_EVICT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D snoop eviction of cache lines in M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D.REPL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache lines allocated"
+ },
+ {
+ "EventCode": "0x43",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_ALL_REF.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All references to the L1 data cache"
+ },
+ {
+ "EventCode": "0x43",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D_ALL_REF.CACHEABLE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cacheable reads and writes"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D_CACHE_LD.E_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache read in E state"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_CACHE_LD.I_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache read in I state (misses)"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "L1D_CACHE_LD.M_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache read in M state"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0xf",
+ "EventName": "L1D_CACHE_LD.MESI",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache reads"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D_CACHE_LD.S_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache read in S state"
+ },
+ {
+ "EventCode": "0x42",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D_CACHE_LOCK.E_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache load locks in E state"
+ },
+ {
+ "EventCode": "0x42",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_CACHE_LOCK.HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache load lock hits"
+ },
+ {
+ "EventCode": "0x42",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "L1D_CACHE_LOCK.M_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache load locks in M state"
+ },
+ {
+ "EventCode": "0x42",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D_CACHE_LOCK.S_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache load locks in S state"
+ },
+ {
+ "EventCode": "0x53",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_CACHE_LOCK_FB_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D load lock accepted in fill buffer"
+ },
+ {
+ "EventCode": "0x52",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_CACHE_PREFETCH_LOCK_FB_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D prefetch load lock accepted in fill buffer"
+ },
+ {
+ "EventCode": "0x41",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D_CACHE_ST.E_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache stores in E state"
+ },
+ {
+ "EventCode": "0x41",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "L1D_CACHE_ST.M_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache stores in M state"
+ },
+ {
+ "EventCode": "0x41",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D_CACHE_ST.S_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache stores in S state"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D_PREFETCH.MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch misses"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_PREFETCH.REQUESTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch requests"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D_PREFETCH.TRIGGERS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch requests triggered"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L1D_WB_L2.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in E state"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D_WB_L2.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in I state (misses)"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L1D_WB_L2.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in M state"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L1D_WB_L2.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L1 writebacks to L2"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D_WB_L2.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in S state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "L2_DATA_RQSTS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 data requests"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_DATA_RQSTS.DEMAND.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in E state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_DATA_RQSTS.DEMAND.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in I state (misses)"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_DATA_RQSTS.DEMAND.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in M state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_DATA_RQSTS.DEMAND.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand requests"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_DATA_RQSTS.DEMAND.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in S state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in E state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in the I state (misses)"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in M state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf0",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 data prefetches"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in the S state"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "L2_LINES_IN.ANY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines alloacated"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_IN.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines allocated in the E state"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_IN.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines allocated in the S state"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_LINES_OUT.ANY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted by a demand request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 modified lines evicted by a demand request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_OUT.PREFETCH_CLEAN",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted by a prefetch request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_LINES_OUT.PREFETCH_DIRTY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 modified lines evicted by a prefetch request"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_RQSTS.IFETCH_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_RQSTS.IFETCH_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "L2_RQSTS.IFETCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetches"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_RQSTS.LD_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 load hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_RQSTS.LD_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 load misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L2_RQSTS.LOADS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 requests"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xaa",
+ "EventName": "L2_RQSTS.MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_RQSTS.PREFETCH_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_RQSTS.PREFETCH_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc0",
+ "EventName": "L2_RQSTS.PREFETCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 prefetches"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 requests"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "L2_RQSTS.RFOS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO requests"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_TRANSACTIONS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_TRANSACTIONS.FILL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 fill transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_TRANSACTIONS.IFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_TRANSACTIONS.L1D_WB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D writeback to L2 transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_TRANSACTIONS.LOAD",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 Load transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_TRANSACTIONS.PREFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_TRANSACTIONS.RFO",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_TRANSACTIONS.WB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 writeback to LLC transactions"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_WRITE.LOCK.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in E state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe0",
+ "EventName": "L2_WRITE.LOCK.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All demand L2 lock RFOs that hit the cache"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_WRITE.LOCK.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in I state (misses)"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_WRITE.LOCK.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in M state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf0",
+ "EventName": "L2_WRITE.LOCK.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All demand L2 lock RFOs"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_WRITE.LOCK.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in S state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "EventName": "L2_WRITE.RFO.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L2 demand store RFOs that hit the cache"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_WRITE.RFO.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in I state (misses)"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_WRITE.RFO.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in M state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_WRITE.RFO.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L2 demand store RFOs"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_WRITE.RFO.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in S state"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Longest latency cache miss"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Longest latency cache reference"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_INST_RETIRED.LOADS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired which contains a load (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_INST_RETIRED.STORES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired which contains a store (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "MEM_LOAD_RETIRED.HIT_LFB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_RETIRED.L1D_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired loads that hit the L1 data cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that hit the L2 cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "MEM_LOAD_RETIRED.LLC_MISS",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Retired loads that miss the LLC cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_RETIRED.LLC_UNSHARED_HIT",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Retired loads that hit valid versions in the LLC cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MEM_UNCORE_RETIRED.LOCAL_DRAM",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Load instructions retired with a data source of local DRAM or locally homed remote hitm (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Load instructions retired that HIT modified data in sibling core (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Load instructions retired remote cache HIT data source (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "MEM_UNCORE_RETIRED.REMOTE_DRAM",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "MEM_UNCORE_RETIRED.UNCACHEABLE",
+ "SampleAfterValue": "4000",
+ "BriefDescription": "Load instructions retired IO (Precise Event)"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "OFFCORE_REQUESTS.L1D_WRITEBACK",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore L1 data cache writebacks"
+ },
+ {
+ "EventCode": "0xB2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_SQ_FULL",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests blocked due to Super Queue full"
+ },
+ {
+ "EventCode": "0xF4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Super Queue lock splits across a cache line"
+ },
+ {
+ "EventCode": "0x6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "STORE_BLOCKS.AT_RET",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Loads delayed with at-Retirement block code"
+ },
+ {
+ "EventCode": "0x6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "STORE_BLOCKS.L1D_BLOCK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Cacheable loads delayed with L1D block code"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x0",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_0",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Memory instructions retired above 0 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x400",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_1024",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100",
+ "BriefDescription": "Memory instructions retired above 1024 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_128",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "1000",
+ "BriefDescription": "Memory instructions retired above 128 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Memory instructions retired above 16 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x4000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16384",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "5",
+ "BriefDescription": "Memory instructions retired above 16384 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x800",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_2048",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50",
+ "BriefDescription": "Memory instructions retired above 2048 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_256",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "500",
+ "BriefDescription": "Memory instructions retired above 256 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "5000",
+ "BriefDescription": "Memory instructions retired above 32 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x8000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32768",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "3",
+ "BriefDescription": "Memory instructions retired above 32768 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50000",
+ "BriefDescription": "Memory instructions retired above 4 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x1000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4096",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20",
+ "BriefDescription": "Memory instructions retired above 4096 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_512",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "200",
+ "BriefDescription": "Memory instructions retired above 512 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_64",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Memory instructions retired above 64 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Memory instructions retired above 8 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x2000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8192",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "10",
+ "BriefDescription": "Memory instructions retired above 8192 clocks (Precise Event)"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F11",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF11",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore data reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x111",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x211",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x411",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x711",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4711",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1811",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3811",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x811",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F44",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF44",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore code reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x144",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x244",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x444",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x744",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4744",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1844",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3844",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x844",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7FFF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFFFF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x80FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x47FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x18FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x38FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x10FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F22",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF22",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore RFO requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x122",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x222",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x422",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x722",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4722",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1822",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3822",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x822",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F08",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF08",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore writebacks",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x108",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x408",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x708",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4708",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1808",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3808",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x808",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F77",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF77",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore code or data read requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x177",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x277",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x477",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x777",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4777",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1877",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3877",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x877",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F33",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any cache_dram",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF33",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any location",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x133",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x233",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x433",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x733",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = local cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4733",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = local cache or dram",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1833",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3833",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = remote cache or dram",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches that HIT in a remote cache ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x833",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F03",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF03",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand data requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x103",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x203",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x403",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x703",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4703",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1803",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3803",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x803",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F01",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF01",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand data reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x101",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x201",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x401",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x701",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4701",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1801",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3801",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x801",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F04",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF04",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand code reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x104",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x204",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x404",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x704",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4704",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1804",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3804",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x804",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F02",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF02",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand RFO requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x102",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x202",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x402",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x702",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4702",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1802",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3802",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x802",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F80",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF80",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore other requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8080",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x180",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x280",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x480",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x780",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4780",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1880",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3880",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1080",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x880",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F30",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF30",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch data requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x130",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x230",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x430",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x730",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4730",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1830",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3830",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x830",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F10",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF10",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch data reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x110",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x210",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x410",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x710",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4710",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1810",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3810",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x810",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F40",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF40",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch code reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x140",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x240",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x440",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x740",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4740",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1840",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3840",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x840",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F20",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF20",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch RFO requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x120",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x220",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x420",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x720",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4720",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1820",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3820",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x820",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F70",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF70",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x170",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x270",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x470",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x770",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4770",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1870",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3870",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x870",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests that HITM in a remote cache",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/nehalemep/floating-point.json b/tools/perf/pmu-events/arch/x86/nehalemep/floating-point.json
new file mode 100644
index 000000000..7d2f71a9d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemep/floating-point.json
@@ -0,0 +1,229 @@
+[
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_ASSIST.ALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating point assists (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_ASSIST.INPUT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating poiint assists for invalid input value (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_ASSIST.OUTPUT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating point assists for invalid output value (Precise Event)"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_COMP_OPS_EXE.MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "MMX Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE* FP double precision Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE and SSE2 FP Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP_PACKED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE FP packed Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP_SCALAR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE FP scalar Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE* FP single precision Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "FP_COMP_OPS_EXE.SSE2_INTEGER",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE2 integer Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_COMP_OPS_EXE.X87",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Computational floating-point operations executed"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "FP_MMX_TRANS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All Floating Point to and from MMX transitions"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_MMX_TRANS.TO_FP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Transitions from MMX to Floating Point instructions"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_MMX_TRANS.TO_MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Transitions from Floating Point to MMX instructions"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SIMD_INT_128.PACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer pack operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "SIMD_INT_128.PACKED_ARITH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer arithmetic operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SIMD_INT_128.PACKED_LOGICAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer logical operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_INT_128.PACKED_MPY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer multiply operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_INT_128.PACKED_SHIFT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer shift operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "SIMD_INT_128.SHUFFLE_MOVE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer shuffle/move operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SIMD_INT_128.UNPACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer unpack operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SIMD_INT_64.PACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit pack operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "SIMD_INT_64.PACKED_ARITH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit arithmetic operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SIMD_INT_64.PACKED_LOGICAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit logical operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_INT_64.PACKED_MPY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit packed multiply operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_INT_64.PACKED_SHIFT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit shift operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "SIMD_INT_64.SHUFFLE_MOVE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit shuffle/move operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SIMD_INT_64.UNPACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit unpack operations"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/nehalemep/frontend.json b/tools/perf/pmu-events/arch/x86/nehalemep/frontend.json
new file mode 100644
index 000000000..e5e21e034
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemep/frontend.json
@@ -0,0 +1,26 @@
+[
+ {
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACRO_INSTS.DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions decoded"
+ },
+ {
+ "EventCode": "0xA6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACRO_INSTS.FUSIONS_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Macro-fused instructions decoded"
+ },
+ {
+ "EventCode": "0x19",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TWO_UOP_INSTS_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Two Uop instructions decoded"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/nehalemep/memory.json b/tools/perf/pmu-events/arch/x86/nehalemep/memory.json
new file mode 100644
index 000000000..f914a4525
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemep/memory.json
@@ -0,0 +1,739 @@
+[
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF811",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF844",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x60FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF8FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x40FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x20FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF822",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF808",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF877",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF833",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any LLC miss",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the local DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF803",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF801",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF804",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF802",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6080",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF880",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2080",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF830",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF810",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF840",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF820",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF870",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/nehalemep/other.json b/tools/perf/pmu-events/arch/x86/nehalemep/other.json
new file mode 100644
index 000000000..af0860622
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemep/other.json
@@ -0,0 +1,210 @@
+[
+ {
+ "EventCode": "0xE8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BPU_CLEARS.EARLY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Early Branch Prediciton Unit clears"
+ },
+ {
+ "EventCode": "0xE8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BPU_CLEARS.LATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Late Branch Prediction Unit clears"
+ },
+ {
+ "EventCode": "0xE5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BPU_MISSED_CALL_RET",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Branch prediction unit missed call or return"
+ },
+ {
+ "EventCode": "0xD5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ES_REG_RENAMES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ES segment renames"
+ },
+ {
+ "EventCode": "0x6C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IO_TRANSACTIONS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "I/O transactions"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L1I.CYCLES_STALLED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch stall cycles"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1I.HITS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch hits"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1I.MISSES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch misses"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L1I.READS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I Instruction fetches"
+ },
+ {
+ "EventCode": "0x82",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LARGE_ITLB.HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Large ITLB hit"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "LOAD_DISPATCH.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All loads dispatched"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "LOAD_DISPATCH.MOB",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched from the MOB"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOAD_DISPATCH.RS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched that bypass the MOB"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_DISPATCH.RS_DELAYED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched from stage 305"
+ },
+ {
+ "EventCode": "0x7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "PARTIAL_ADDRESS_ALIAS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "False dependencies due to partial address aliasing"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "RAT_STALLS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All RAT stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RAT_STALLS.FLAGS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Flag stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RAT_STALLS.REGISTERS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Partial register stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RAT_STALLS.ROB_READ_PORT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ROB read port stalls cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RAT_STALLS.SCOREBOARD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Scoreboard stall cycles"
+ },
+ {
+ "EventCode": "0x4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "SB_DRAIN.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All Store buffer stall cycles"
+ },
+ {
+ "EventCode": "0xD4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SEG_RENAME_STALLS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Segment rename stall cycles"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SNOOP_RESPONSE.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HIT to snoop"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SNOOP_RESPONSE.HITE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HITE to snoop"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SNOOP_RESPONSE.HITM",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HITM to snoop"
+ },
+ {
+ "EventCode": "0xF6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SQ_FULL_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Super Queue full stall cycles"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/nehalemep/pipeline.json b/tools/perf/pmu-events/arch/x86/nehalemep/pipeline.json
new file mode 100644
index 000000000..41006ddcd
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemep/pipeline.json
@@ -0,0 +1,881 @@
+[
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.CYCLES_DIV_BUSY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles the divider is busy"
+ },
+ {
+ "EventCode": "0x14",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.DIV",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Divide Operations executed",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ARITH.MUL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Multiply operations executed"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BACLEAR.BAD_TARGET",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "BACLEAR asserted with bad target address"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BACLEAR.CLEAR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "BACLEAR asserted, regardless of cause "
+ },
+ {
+ "EventCode": "0xA7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BACLEAR_FORCE_IQ",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instruction queue forced BACLEAR"
+ },
+ {
+ "EventCode": "0xE0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Branch instructions decoded"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7f",
+ "EventName": "BR_INST_EXEC.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Branch instructions executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_EXEC.COND",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Conditional branch instructions executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_EXEC.DIRECT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Unconditional branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_INST_EXEC.DIRECT_NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Unconditional call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_INST_EXEC.INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_EXEC.INDIRECT_NON_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect non call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "BR_INST_EXEC.NEAR_CALLS",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "BR_INST_EXEC.NON_CALLS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All non call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_INST_EXEC.RETURN_NEAR",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect return branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_INST_EXEC.TAKEN",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Taken branches executed"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired branch instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired conditional branch instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Retired near call instructions (Precise Event)"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7f",
+ "EventName": "BR_MISP_EXEC.ANY",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_EXEC.COND",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted conditional branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_MISP_EXEC.DIRECT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted unconditional branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_MISP_EXEC.DIRECT_NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_MISP_EXEC.INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted indirect call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_EXEC.INDIRECT_NON_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted indirect non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "BR_MISP_EXEC.NEAR_CALLS",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "BR_MISP_EXEC.NON_CALLS",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_MISP_EXEC.RETURN_NEAR",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted return branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_MISP_EXEC.TAKEN",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted taken branches executed"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted near retired calls (Precise Event)"
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.REF",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Reference cycles when thread is not halted (fixed counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF_P",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)"
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when thread is not halted (fixed counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when thread is not halted (programmable counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total CPU cycles",
+ "CounterMask": "2"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "ILD_STALL.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Any Instruction Length Decoder stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ILD_STALL.IQ_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instruction Queue full stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ILD_STALL.LCP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Length Change Prefix stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ILD_STALL.MRU",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stall cycles due to BPU MRU bypass"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "ILD_STALL.REGEN",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Regen stall cycles"
+ },
+ {
+ "EventCode": "0x18",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_DECODED.DEC0",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions that must be decoded by decoder 0"
+ },
+ {
+ "EventCode": "0x1E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_QUEUE_WRITE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles instructions are written to the instruction queue"
+ },
+ {
+ "EventCode": "0x17",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_QUEUE_WRITES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions written to instruction queue."
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired (fixed counter)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired (Programmable counter and Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "INST_RETIRED.MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired MMX instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles (Precise Event)",
+ "CounterMask": "16"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "INST_RETIRED.X87",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired floating-point operations (Precise Event)"
+ },
+ {
+ "EventCode": "0x4C",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "LOAD_HIT_PRE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Load operations conflicting with software prefetches"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.ACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when uops were delivered by the LSD",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xA8",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.INACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no uops were delivered by the LSD",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x20",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD_OVERFLOW",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loops that can't stream from the instruction queue"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACHINE_CLEARS.CYCLES",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Cycles machine clear asserted"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEM_ORDER",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Execution pipeline restart due to Memory ordering conflicts"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Self-Modifying Code detected"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Resource related stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "RESOURCE_STALLS.FPCW",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "FPU control word write stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RESOURCE_STALLS.LOAD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Load buffer stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "RESOURCE_STALLS.MXCSR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "MXCSR rename stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "RESOURCE_STALLS.OTHER",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Other Resource related stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RESOURCE_STALLS.ROB_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ROB full stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RESOURCE_STALLS.RS_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Reservation Station full stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RESOURCE_STALLS.STORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Store buffer stall cycles"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SSEX_UOPS_RETIRED.PACKED_DOUBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Packed-Double Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SSEX_UOPS_RETIRED.PACKED_SINGLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Packed-Single Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SSEX_UOPS_RETIRED.SCALAR_DOUBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Scalar-Double Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SSEX_UOPS_RETIRED.SCALAR_SINGLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Scalar-Single Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SSEX_UOPS_RETIRED.VECTOR_INTEGER",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Vector Integer Uops retired (Precise Event)"
+ },
+ {
+ "EventCode": "0xDB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOP_UNFUSION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uop unfusions due to FP exceptions"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_DECODED.ESP_FOLDING",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stack pointer instructions decoded"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "UOPS_DECODED.ESP_SYNC",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stack pointer sync operations"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DECODED.MS_CYCLES_ACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops decoded by Microcode Sequencer",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xD1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DECODED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops are decoded",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops executed on any port (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops executed on ports 0-4 (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on any port (core count)",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on ports 0-4 (core count)",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on any port (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on ports 0-4 (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.PORT0",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 0"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_EXECUTED.PORT015",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued on ports 0, 1 or 5"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_EXECUTED.PORT015_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on ports 0, 1 or 5",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.PORT1",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT2_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 2 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT234_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued on ports 2, 3 or 4"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT3_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 3 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT4_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 4 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_EXECUTED.PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 5"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued"
+ },
+ {
+ "EventCode": "0xE",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops were issued on any thread",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CYCLES_ALL_THREADS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops were issued on either thread",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_ISSUED.FUSED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Fused Uops issued"
+ },
+ {
+ "EventCode": "0xE",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops were issued",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ACTIVE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops are being retired",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_RETIRED.MACRO_FUSED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Macro-fused Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retirement slots used (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops are not retiring (Precise Event)",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles using precise uop retired event (Precise Event)",
+ "CounterMask": "16"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC0",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles (Precise Event)",
+ "CounterMask": "16"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/nehalemep/virtual-memory.json b/tools/perf/pmu-events/arch/x86/nehalemep/virtual-memory.json
new file mode 100644
index 000000000..0596094e0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemep/virtual-memory.json
@@ -0,0 +1,109 @@
+[
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_LOAD_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load misses"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "DTLB_LOAD_MISSES.PDE_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss caused by low part of address"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "DTLB second level hit"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss page walks complete"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB misses"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB first level misses but second level hit"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB miss page walks"
+ },
+ {
+ "EventCode": "0xAE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_FLUSH",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ITLB flushes"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ITLB_MISS_RETIRED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired instructions that missed the ITLB (Precise Event)"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB miss"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB miss page walks"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "MEM_LOAD_RETIRED.DTLB_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that miss the DTLB (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_STORE_RETIRED.DTLB_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired stores that miss the DTLB (Precise Event)"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/nehalemex/cache.json b/tools/perf/pmu-events/arch/x86/nehalemex/cache.json
new file mode 100644
index 000000000..21a0f8fd0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemex/cache.json
@@ -0,0 +1,3184 @@
+[
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "CACHE_LOCK_CYCLES.L1D",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles L1D locked"
+ },
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "CACHE_LOCK_CYCLES.L1D_L2",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles L1D and L2 locked"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D.M_EVICT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D cache lines replaced in M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D.M_REPL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D cache lines allocated in the M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "L1D.M_SNOOP_EVICT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D snoop eviction of cache lines in M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D.REPL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache lines allocated"
+ },
+ {
+ "EventCode": "0x43",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_ALL_REF.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All references to the L1 data cache"
+ },
+ {
+ "EventCode": "0x43",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D_ALL_REF.CACHEABLE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cacheable reads and writes"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D_CACHE_LD.E_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache read in E state"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_CACHE_LD.I_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache read in I state (misses)"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "L1D_CACHE_LD.M_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache read in M state"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0xf",
+ "EventName": "L1D_CACHE_LD.MESI",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache reads"
+ },
+ {
+ "EventCode": "0x40",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D_CACHE_LD.S_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache read in S state"
+ },
+ {
+ "EventCode": "0x42",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D_CACHE_LOCK.E_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache load locks in E state"
+ },
+ {
+ "EventCode": "0x42",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_CACHE_LOCK.HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache load lock hits"
+ },
+ {
+ "EventCode": "0x42",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "L1D_CACHE_LOCK.M_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache load locks in M state"
+ },
+ {
+ "EventCode": "0x42",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D_CACHE_LOCK.S_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache load locks in S state"
+ },
+ {
+ "EventCode": "0x53",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_CACHE_LOCK_FB_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D load lock accepted in fill buffer"
+ },
+ {
+ "EventCode": "0x52",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_CACHE_PREFETCH_LOCK_FB_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D prefetch load lock accepted in fill buffer"
+ },
+ {
+ "EventCode": "0x41",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D_CACHE_ST.E_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache stores in E state"
+ },
+ {
+ "EventCode": "0x41",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "L1D_CACHE_ST.M_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache stores in M state"
+ },
+ {
+ "EventCode": "0x41",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D_CACHE_ST.S_STATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache stores in S state"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D_PREFETCH.MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch misses"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_PREFETCH.REQUESTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch requests"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D_PREFETCH.TRIGGERS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch requests triggered"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L1D_WB_L2.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in E state"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D_WB_L2.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in I state (misses)"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L1D_WB_L2.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in M state"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L1D_WB_L2.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L1 writebacks to L2"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D_WB_L2.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in S state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "L2_DATA_RQSTS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 data requests"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_DATA_RQSTS.DEMAND.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in E state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_DATA_RQSTS.DEMAND.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in I state (misses)"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_DATA_RQSTS.DEMAND.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in M state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_DATA_RQSTS.DEMAND.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand requests"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_DATA_RQSTS.DEMAND.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in S state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in E state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in the I state (misses)"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in M state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf0",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 data prefetches"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in the S state"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "L2_LINES_IN.ANY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines alloacated"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_IN.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines allocated in the E state"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_IN.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines allocated in the S state"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_LINES_OUT.ANY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted by a demand request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 modified lines evicted by a demand request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_OUT.PREFETCH_CLEAN",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted by a prefetch request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_LINES_OUT.PREFETCH_DIRTY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 modified lines evicted by a prefetch request"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_RQSTS.IFETCH_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_RQSTS.IFETCH_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "L2_RQSTS.IFETCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetches"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_RQSTS.LD_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 load hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_RQSTS.LD_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 load misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L2_RQSTS.LOADS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 requests"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xaa",
+ "EventName": "L2_RQSTS.MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_RQSTS.PREFETCH_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_RQSTS.PREFETCH_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc0",
+ "EventName": "L2_RQSTS.PREFETCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 prefetches"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 requests"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "L2_RQSTS.RFOS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO requests"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_TRANSACTIONS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_TRANSACTIONS.FILL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 fill transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_TRANSACTIONS.IFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_TRANSACTIONS.L1D_WB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D writeback to L2 transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_TRANSACTIONS.LOAD",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 Load transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_TRANSACTIONS.PREFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_TRANSACTIONS.RFO",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_TRANSACTIONS.WB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 writeback to LLC transactions"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_WRITE.LOCK.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in E state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe0",
+ "EventName": "L2_WRITE.LOCK.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All demand L2 lock RFOs that hit the cache"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_WRITE.LOCK.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in I state (misses)"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_WRITE.LOCK.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in M state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf0",
+ "EventName": "L2_WRITE.LOCK.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All demand L2 lock RFOs"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_WRITE.LOCK.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in S state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "EventName": "L2_WRITE.RFO.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L2 demand store RFOs that hit the cache"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_WRITE.RFO.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in I state (misses)"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_WRITE.RFO.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in M state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_WRITE.RFO.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L2 demand store RFOs"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_WRITE.RFO.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in S state"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Longest latency cache miss"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Longest latency cache reference"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_INST_RETIRED.LOADS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired which contains a load (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_INST_RETIRED.STORES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired which contains a store (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "MEM_LOAD_RETIRED.HIT_LFB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_RETIRED.L1D_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired loads that hit the L1 data cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that hit the L2 cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "MEM_LOAD_RETIRED.LLC_MISS",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Retired loads that miss the LLC cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_RETIRED.LLC_UNSHARED_HIT",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Retired loads that hit valid versions in the LLC cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "OFFCORE_REQUESTS.L1D_WRITEBACK",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore L1 data cache writebacks"
+ },
+ {
+ "EventCode": "0xB2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_SQ_FULL",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests blocked due to Super Queue full"
+ },
+ {
+ "EventCode": "0xF4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Super Queue lock splits across a cache line"
+ },
+ {
+ "EventCode": "0x6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "STORE_BLOCKS.AT_RET",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Loads delayed with at-Retirement block code"
+ },
+ {
+ "EventCode": "0x6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "STORE_BLOCKS.L1D_BLOCK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Cacheable loads delayed with L1D block code"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x0",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_0",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Memory instructions retired above 0 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x400",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_1024",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100",
+ "BriefDescription": "Memory instructions retired above 1024 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_128",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "1000",
+ "BriefDescription": "Memory instructions retired above 128 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Memory instructions retired above 16 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x4000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16384",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "5",
+ "BriefDescription": "Memory instructions retired above 16384 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x800",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_2048",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50",
+ "BriefDescription": "Memory instructions retired above 2048 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_256",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "500",
+ "BriefDescription": "Memory instructions retired above 256 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "5000",
+ "BriefDescription": "Memory instructions retired above 32 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x8000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32768",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "3",
+ "BriefDescription": "Memory instructions retired above 32768 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50000",
+ "BriefDescription": "Memory instructions retired above 4 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x1000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4096",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20",
+ "BriefDescription": "Memory instructions retired above 4096 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_512",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "200",
+ "BriefDescription": "Memory instructions retired above 512 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_64",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Memory instructions retired above 64 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Memory instructions retired above 8 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x2000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8192",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "10",
+ "BriefDescription": "Memory instructions retired above 8192 clocks (Precise Event)"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F11",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF11",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore data reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x111",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x211",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x411",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x711",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4711",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1811",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3811",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x811",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F44",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF44",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore code reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x144",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x244",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x444",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x744",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4744",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1844",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3844",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x844",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7FFF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFFFF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x80FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x47FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x18FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x38FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x10FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F22",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF22",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore RFO requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x122",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x222",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x422",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x722",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4722",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1822",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3822",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x822",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F08",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF08",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore writebacks",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x108",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x408",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x708",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4708",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1808",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3808",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x808",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F77",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF77",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore code or data read requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x177",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x277",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x477",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x777",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4777",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1877",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3877",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x877",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F33",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any cache_dram",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF33",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any location",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x133",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x233",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x433",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x733",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = local cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4733",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = local cache or dram",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1833",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3833",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = remote cache or dram",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches that HIT in a remote cache ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x833",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F03",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF03",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand data requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x103",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x203",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x403",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x703",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4703",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1803",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3803",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x803",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F01",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF01",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand data reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x101",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x201",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x401",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x701",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4701",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1801",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3801",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x801",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F04",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF04",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand code reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x104",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x204",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x404",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x704",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4704",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1804",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3804",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x804",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F02",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF02",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand RFO requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x102",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x202",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x402",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x702",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4702",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1802",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3802",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x802",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F80",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF80",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore other requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8080",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x180",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x280",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x480",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x780",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4780",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1880",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3880",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1080",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x880",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F30",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF30",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch data requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x130",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x230",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x430",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x730",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4730",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1830",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3830",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x830",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F10",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF10",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch data reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x110",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x210",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x410",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x710",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4710",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1810",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3810",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x810",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F40",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF40",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch code reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x140",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x240",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x440",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x740",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4740",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1840",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3840",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x840",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F20",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF20",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch RFO requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x120",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x220",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x420",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x720",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4720",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1820",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3820",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x820",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F70",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF70",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x170",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x270",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x470",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x770",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4770",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1870",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3870",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x870",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests that HITM in a remote cache",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/nehalemex/floating-point.json b/tools/perf/pmu-events/arch/x86/nehalemex/floating-point.json
new file mode 100644
index 000000000..7d2f71a9d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemex/floating-point.json
@@ -0,0 +1,229 @@
+[
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_ASSIST.ALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating point assists (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_ASSIST.INPUT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating poiint assists for invalid input value (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_ASSIST.OUTPUT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating point assists for invalid output value (Precise Event)"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_COMP_OPS_EXE.MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "MMX Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE* FP double precision Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE and SSE2 FP Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP_PACKED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE FP packed Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP_SCALAR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE FP scalar Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE* FP single precision Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "FP_COMP_OPS_EXE.SSE2_INTEGER",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE2 integer Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_COMP_OPS_EXE.X87",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Computational floating-point operations executed"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "FP_MMX_TRANS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All Floating Point to and from MMX transitions"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_MMX_TRANS.TO_FP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Transitions from MMX to Floating Point instructions"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_MMX_TRANS.TO_MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Transitions from Floating Point to MMX instructions"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SIMD_INT_128.PACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer pack operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "SIMD_INT_128.PACKED_ARITH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer arithmetic operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SIMD_INT_128.PACKED_LOGICAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer logical operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_INT_128.PACKED_MPY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer multiply operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_INT_128.PACKED_SHIFT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer shift operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "SIMD_INT_128.SHUFFLE_MOVE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer shuffle/move operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SIMD_INT_128.UNPACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer unpack operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SIMD_INT_64.PACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit pack operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "SIMD_INT_64.PACKED_ARITH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit arithmetic operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SIMD_INT_64.PACKED_LOGICAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit logical operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_INT_64.PACKED_MPY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit packed multiply operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_INT_64.PACKED_SHIFT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit shift operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "SIMD_INT_64.SHUFFLE_MOVE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit shuffle/move operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SIMD_INT_64.UNPACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit unpack operations"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/nehalemex/frontend.json b/tools/perf/pmu-events/arch/x86/nehalemex/frontend.json
new file mode 100644
index 000000000..e5e21e034
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemex/frontend.json
@@ -0,0 +1,26 @@
+[
+ {
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACRO_INSTS.DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions decoded"
+ },
+ {
+ "EventCode": "0xA6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACRO_INSTS.FUSIONS_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Macro-fused instructions decoded"
+ },
+ {
+ "EventCode": "0x19",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TWO_UOP_INSTS_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Two Uop instructions decoded"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/nehalemex/memory.json b/tools/perf/pmu-events/arch/x86/nehalemex/memory.json
new file mode 100644
index 000000000..f914a4525
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemex/memory.json
@@ -0,0 +1,739 @@
+[
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF811",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF844",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x60FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF8FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x40FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x20FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF822",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF808",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF877",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF833",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any LLC miss",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the local DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF803",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF801",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF804",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF802",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6080",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF880",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2080",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF830",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF810",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF840",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF820",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF870",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/nehalemex/other.json b/tools/perf/pmu-events/arch/x86/nehalemex/other.json
new file mode 100644
index 000000000..af0860622
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemex/other.json
@@ -0,0 +1,210 @@
+[
+ {
+ "EventCode": "0xE8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BPU_CLEARS.EARLY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Early Branch Prediciton Unit clears"
+ },
+ {
+ "EventCode": "0xE8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BPU_CLEARS.LATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Late Branch Prediction Unit clears"
+ },
+ {
+ "EventCode": "0xE5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BPU_MISSED_CALL_RET",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Branch prediction unit missed call or return"
+ },
+ {
+ "EventCode": "0xD5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ES_REG_RENAMES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ES segment renames"
+ },
+ {
+ "EventCode": "0x6C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IO_TRANSACTIONS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "I/O transactions"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L1I.CYCLES_STALLED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch stall cycles"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1I.HITS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch hits"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1I.MISSES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch misses"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L1I.READS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I Instruction fetches"
+ },
+ {
+ "EventCode": "0x82",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LARGE_ITLB.HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Large ITLB hit"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "LOAD_DISPATCH.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All loads dispatched"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "LOAD_DISPATCH.MOB",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched from the MOB"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOAD_DISPATCH.RS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched that bypass the MOB"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_DISPATCH.RS_DELAYED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched from stage 305"
+ },
+ {
+ "EventCode": "0x7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "PARTIAL_ADDRESS_ALIAS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "False dependencies due to partial address aliasing"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "RAT_STALLS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All RAT stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RAT_STALLS.FLAGS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Flag stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RAT_STALLS.REGISTERS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Partial register stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RAT_STALLS.ROB_READ_PORT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ROB read port stalls cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RAT_STALLS.SCOREBOARD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Scoreboard stall cycles"
+ },
+ {
+ "EventCode": "0x4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "SB_DRAIN.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All Store buffer stall cycles"
+ },
+ {
+ "EventCode": "0xD4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SEG_RENAME_STALLS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Segment rename stall cycles"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SNOOP_RESPONSE.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HIT to snoop"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SNOOP_RESPONSE.HITE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HITE to snoop"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SNOOP_RESPONSE.HITM",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HITM to snoop"
+ },
+ {
+ "EventCode": "0xF6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SQ_FULL_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Super Queue full stall cycles"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/nehalemex/pipeline.json b/tools/perf/pmu-events/arch/x86/nehalemex/pipeline.json
new file mode 100644
index 000000000..41006ddcd
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemex/pipeline.json
@@ -0,0 +1,881 @@
+[
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.CYCLES_DIV_BUSY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles the divider is busy"
+ },
+ {
+ "EventCode": "0x14",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.DIV",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Divide Operations executed",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ARITH.MUL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Multiply operations executed"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BACLEAR.BAD_TARGET",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "BACLEAR asserted with bad target address"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BACLEAR.CLEAR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "BACLEAR asserted, regardless of cause "
+ },
+ {
+ "EventCode": "0xA7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BACLEAR_FORCE_IQ",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instruction queue forced BACLEAR"
+ },
+ {
+ "EventCode": "0xE0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Branch instructions decoded"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7f",
+ "EventName": "BR_INST_EXEC.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Branch instructions executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_EXEC.COND",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Conditional branch instructions executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_EXEC.DIRECT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Unconditional branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_INST_EXEC.DIRECT_NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Unconditional call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_INST_EXEC.INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_EXEC.INDIRECT_NON_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect non call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "BR_INST_EXEC.NEAR_CALLS",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "BR_INST_EXEC.NON_CALLS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All non call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_INST_EXEC.RETURN_NEAR",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect return branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_INST_EXEC.TAKEN",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Taken branches executed"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired branch instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired conditional branch instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Retired near call instructions (Precise Event)"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7f",
+ "EventName": "BR_MISP_EXEC.ANY",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_EXEC.COND",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted conditional branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_MISP_EXEC.DIRECT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted unconditional branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_MISP_EXEC.DIRECT_NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_MISP_EXEC.INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted indirect call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_EXEC.INDIRECT_NON_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted indirect non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "BR_MISP_EXEC.NEAR_CALLS",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "BR_MISP_EXEC.NON_CALLS",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_MISP_EXEC.RETURN_NEAR",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted return branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_MISP_EXEC.TAKEN",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted taken branches executed"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted near retired calls (Precise Event)"
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.REF",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Reference cycles when thread is not halted (fixed counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF_P",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)"
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when thread is not halted (fixed counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when thread is not halted (programmable counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total CPU cycles",
+ "CounterMask": "2"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "ILD_STALL.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Any Instruction Length Decoder stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ILD_STALL.IQ_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instruction Queue full stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ILD_STALL.LCP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Length Change Prefix stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ILD_STALL.MRU",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stall cycles due to BPU MRU bypass"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "ILD_STALL.REGEN",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Regen stall cycles"
+ },
+ {
+ "EventCode": "0x18",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_DECODED.DEC0",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions that must be decoded by decoder 0"
+ },
+ {
+ "EventCode": "0x1E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_QUEUE_WRITE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles instructions are written to the instruction queue"
+ },
+ {
+ "EventCode": "0x17",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_QUEUE_WRITES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions written to instruction queue."
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired (fixed counter)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired (Programmable counter and Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "INST_RETIRED.MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired MMX instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles (Precise Event)",
+ "CounterMask": "16"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "INST_RETIRED.X87",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired floating-point operations (Precise Event)"
+ },
+ {
+ "EventCode": "0x4C",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "LOAD_HIT_PRE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Load operations conflicting with software prefetches"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.ACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when uops were delivered by the LSD",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xA8",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.INACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no uops were delivered by the LSD",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x20",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD_OVERFLOW",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loops that can't stream from the instruction queue"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACHINE_CLEARS.CYCLES",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Cycles machine clear asserted"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEM_ORDER",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Execution pipeline restart due to Memory ordering conflicts"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Self-Modifying Code detected"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Resource related stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "RESOURCE_STALLS.FPCW",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "FPU control word write stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RESOURCE_STALLS.LOAD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Load buffer stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "RESOURCE_STALLS.MXCSR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "MXCSR rename stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "RESOURCE_STALLS.OTHER",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Other Resource related stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RESOURCE_STALLS.ROB_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ROB full stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RESOURCE_STALLS.RS_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Reservation Station full stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RESOURCE_STALLS.STORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Store buffer stall cycles"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SSEX_UOPS_RETIRED.PACKED_DOUBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Packed-Double Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SSEX_UOPS_RETIRED.PACKED_SINGLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Packed-Single Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SSEX_UOPS_RETIRED.SCALAR_DOUBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Scalar-Double Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SSEX_UOPS_RETIRED.SCALAR_SINGLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Scalar-Single Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SSEX_UOPS_RETIRED.VECTOR_INTEGER",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Vector Integer Uops retired (Precise Event)"
+ },
+ {
+ "EventCode": "0xDB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOP_UNFUSION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uop unfusions due to FP exceptions"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_DECODED.ESP_FOLDING",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stack pointer instructions decoded"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "UOPS_DECODED.ESP_SYNC",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stack pointer sync operations"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DECODED.MS_CYCLES_ACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops decoded by Microcode Sequencer",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xD1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DECODED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops are decoded",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops executed on any port (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops executed on ports 0-4 (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on any port (core count)",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on ports 0-4 (core count)",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on any port (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on ports 0-4 (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.PORT0",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 0"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_EXECUTED.PORT015",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued on ports 0, 1 or 5"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_EXECUTED.PORT015_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on ports 0, 1 or 5",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.PORT1",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT2_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 2 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT234_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued on ports 2, 3 or 4"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT3_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 3 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT4_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 4 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_EXECUTED.PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 5"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued"
+ },
+ {
+ "EventCode": "0xE",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops were issued on any thread",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CYCLES_ALL_THREADS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops were issued on either thread",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_ISSUED.FUSED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Fused Uops issued"
+ },
+ {
+ "EventCode": "0xE",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops were issued",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ACTIVE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops are being retired",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_RETIRED.MACRO_FUSED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Macro-fused Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retirement slots used (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops are not retiring (Precise Event)",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles using precise uop retired event (Precise Event)",
+ "CounterMask": "16"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC0",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles (Precise Event)",
+ "CounterMask": "16"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/nehalemex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/nehalemex/virtual-memory.json
new file mode 100644
index 000000000..0596094e0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/nehalemex/virtual-memory.json
@@ -0,0 +1,109 @@
+[
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_LOAD_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load misses"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "DTLB_LOAD_MISSES.PDE_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss caused by low part of address"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "DTLB second level hit"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss page walks complete"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB misses"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB first level misses but second level hit"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB miss page walks"
+ },
+ {
+ "EventCode": "0xAE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_FLUSH",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ITLB flushes"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ITLB_MISS_RETIRED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired instructions that missed the ITLB (Precise Event)"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB miss"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB miss page walks"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "MEM_LOAD_RETIRED.DTLB_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that miss the DTLB (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_STORE_RETIRED.DTLB_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired stores that miss the DTLB (Precise Event)"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/cache.json b/tools/perf/pmu-events/arch/x86/sandybridge/cache.json
new file mode 100644
index 000000000..bef73c499
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/cache.json
@@ -0,0 +1,1879 @@
+[
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x11",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops that miss the STLB.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x12",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store uops that miss the STLB.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired load uops with locked access.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops that split across a cacheline boundary.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x42",
+ "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store uops that split across a cacheline boundary.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of load uops retired",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "All retired load uops.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of store uops retired.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "All retired store uops.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retired load uops with L1 cache hits as data sources.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops with L2 cache hits as data sources.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts retired load uops that hit in the last-level (L3) cache without snoops required.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package). Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line. In this case, a snoop was required, and another L2 had the line in a non-modified state.",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts retired load uops that hit in the last-level cache (L3) and were found in a non-modified state in a neighboring core's private cache (same package). Since the last level cache is inclusive, hits to the L3 may require snooping the private L2 caches of any cores on the same socket that have the line. In this case, a snoop was required, and another L2 had the line in a modified state, so the line had to be invalidated in that L2 cache and transferred to the requesting L2.",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts retired demand loads that missed the last-level (L3) cache. This means that the load is usually satisfied from memory in a client system or possibly from the remote socket in a server. Demand loads are non speculative load uops.",
+ "EventCode": "0xD4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired load uops with unknown information as data source in cache serviced the load.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts L1D data line replacements. Replacements occur when a new line is brought into the cache, causing eviction of a line loaded earlier. ",
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D.REPLACEMENT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D data line replacements.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D.ALLOCATED_IN_M",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Allocated L1D data cache lines in M state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L1D.EVICTION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D data cache lines in M state evicted due to replacement.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L1D.ALL_M_REPLACEMENT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cache lines in M state evicted out of L1D due to Snoop HitM or dirty line replacement.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D miss oustandings duration in cycles.",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "CounterMask": "1",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when L1D is locked.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand Data Read requests sent to uncore.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cacheable and noncachaeble code read requests.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand and prefetch data reads.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cases when offcore requests buffer cannot take more entries for core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests that hit L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that hit L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that miss L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache misses when fetching instructions.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_RQSTS.PF_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from the L2 hardware prefetchers that hit L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_RQSTS.PF_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_STORE_LOCK_RQSTS.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that miss cache lines.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_STORE_LOCK_RQSTS.HIT_E",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that hit cache lines in E state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_STORE_LOCK_RQSTS.HIT_M",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that hit cache lines in M state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_STORE_LOCK_RQSTS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFOs that access cache lines in any state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_L1D_WB_RQSTS.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Count the number of modified Lines evicted from L1 and missed L2. (Non-rejected WBs from the DCU.).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_L1D_WB_RQSTS.HIT_S",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in S state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_L1D_WB_RQSTS.HIT_E",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in E state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_L1D_WB_RQSTS.HIT_M",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in M state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_L1D_WB_RQSTS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not rejected writebacks from L1D to L2 cache lines in any state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_TRANS.DEMAND_DATA_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests that access L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_TRANS.RFO",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that access L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_TRANS.CODE_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache accesses when fetching instructions.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_TRANS.ALL_PF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 or LLC HW prefetches that access L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_TRANS.L1D_WB",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L1D writebacks that access L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_TRANS.L2_FILL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 fill requests that access L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_TRANS.L2_WB",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 writebacks that access L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_TRANS.ALL_REQUESTS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Transactions accessing L2 pipe.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_IN.I",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in I state filling L2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_IN.S",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in S state filling L2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_IN.E",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines in E state filling L2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of L2 cache lines brought into the L2 cache. Lines are filled into the L2 cache when there was an L2 miss.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "L2_LINES_IN.ALL",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines filling L2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Clean L2 cache lines evicted by demand.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Dirty L2 cache lines evicted by demand.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_OUT.PF_CLEAN",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Clean L2 cache lines evicted by L2 prefetch.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_LINES_OUT.PF_DIRTY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Dirty L2 cache lines evicted by L2 prefetch.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa",
+ "EventName": "L2_LINES_OUT.DIRTY_ALL",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Dirty L2 cache lines filling the L2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests missed LLC.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests that refer to LLC.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Split locks in SQ.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests to L2 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 code requests.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc0",
+ "EventName": "L2_RQSTS.ALL_PF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from L2 hardware prefetchers.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "L1D_BLOCKS.BANK_CONFLICT_CYCLES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles when dispatched loads are cancelled due to L1D bank conflicts with other load ports.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_C6",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "2",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+ "CounterMask": "1",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch code reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch code reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0240",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch code reads that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0240",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch code reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0240",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch code reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0240",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0240",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch code reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch data reads that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0120",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch RFOs that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0120",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch RFOs that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0120",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch RFOs that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0120",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0120",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch RFOs that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c03f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "COREWB & ANY_RESPONSE",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand code reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand code reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand code reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data reads that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x18000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses. It also includes L2 hints sent to LLC to keep a line from being evicted out of the core caches.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x803c8000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LRU_HINTS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts L2 hints sent to LLC to keep a line from being evicted out of the core caches.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2380408000",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.PORTIO_MMIO_UC",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) code reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) code reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) code reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) data reads that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) RFOs that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) RFOs that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) RFOs that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3f803c0100",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the LLC.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003c0100",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) RFOs that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10003c0100",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) RFOs that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003c0100",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003c0100",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to LLC only) RFOs that hit in the LLC and the snoops sent to sibling cores return clean response.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10400",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10800",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts non-temporal stores.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand data reads .",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand rfo's .",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand code reads.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x000105B3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00010122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch prefetch RFOs .",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x000107F7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo references (demand & prefetch) .",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10433",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": " REQUEST = DATA_INTO_CORE and RESPONSE = ANY_RESPONSE",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000040002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_M.HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": " REQUEST = DEMAND_RFO and RESPONSE = LLC_HIT_M and SNOOP = HITM",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": " REQUEST = PF_RFO and RESPONSE = ANY_RESPONSE",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": " REQUEST = PF_LLC_DATA_RD and RESPONSE = ANY_RESPONSE",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L_IFETCH.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": " REQUEST = PF_LLC_IFETCH and RESPONSE = ANY_RESPONSE",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/floating-point.json b/tools/perf/pmu-events/arch/x86/sandybridge/floating-point.json
new file mode 100644
index 000000000..982eda487
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/floating-point.json
@@ -0,0 +1,138 @@
+[
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OTHER_ASSISTS.AVX_STORE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of GSSE memory assist for stores. GSSE microcode assist is being invoked whenever the hardware is unable to properly handle GSSE-256b operations.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "OTHER_ASSISTS.AVX_TO_SSE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of transitions from AVX-256 to legacy SSE when penalty applicable.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "OTHER_ASSISTS.SSE_TO_AVX",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_ASSIST.X87_OUTPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of X87 assists due to output value.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_ASSIST.X87_INPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of X87 assists due to input value.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "FP_ASSIST.SIMD_OUTPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of SIMD FP assists due to Output values.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_ASSIST.SIMD_INPUT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of SIMD FP assists due to input values.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_COMP_OPS_EXE.X87",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of FP Computational Uops Executed this cycle. The number of FADD, FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer DIVs, and IDIVs. This event does not distinguish an FADD used in the middle of a transcendental flow from a s.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed double-precision uops issued this cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar single-precision uops issued this cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "FP_COMP_OPS_EXE.SSE_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational packed single-precision uops issued this cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE* or AVX-128 FP Computational scalar double-precision uops issued this cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x11",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_FP_256.PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of GSSE-256 Computational FP single precision uops issued this cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x11",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_FP_256.PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of AVX-256 Computational FP double precision uops issued this cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1e",
+ "EventName": "FP_ASSIST.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles with any input/output SSE or FP assist.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/frontend.json b/tools/perf/pmu-events/arch/x86/sandybridge/frontend.json
new file mode 100644
index 000000000..1b7b1dd36
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/frontend.json
@@ -0,0 +1,305 @@
+[
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ICACHE.HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Instruction Cache, Streaming Buffer and Victim Cache Reads. both cacheable and noncacheable, including UC fetches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of instruction cache, streaming buffer and victim cache misses. Counting includes unchacheable accesses.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ICACHE.MISSES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Instruction cache, streaming buffer and victim cache misses.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "IDQ.EMPTY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instruction Decode Queue (IDQ) empty cycles.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "IDQ.MS_DSB_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "IDQ.MS_MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance. See the Intel? 64 and IA-32 Architectures Optimization Reference Manual for more information.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of uops not delivered to the back-end per cycle, per thread, when the back-end was not stalled. In the ideal case 4 uops can be delivered each cycle. The event counts the undelivered uops - so if 3 were delivered in one cycle, the counter would be incremented by 1 for that cycle (4 - 3). If the back-end is stalled, the count for this event is not incremented even when uops were not delivered, because the back-end would not have been able to accept them. This event is used in determining the front-end bound category of the top-down pipeline slots characterization.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled .",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled.",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xAB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DSB2MITE_SWITCHES.COUNT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the cycles attributed to a switch from the Decoded Stream Buffer (DSB), which holds decoded instructions, to the legacy decode pipeline. It excludes cycles when the back-end cannot accept new micro-ops. The penalty for these switches is potentially several cycles of instruction starvation, where no micro-ops are delivered to the back-end.",
+ "EventCode": "0xAB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xAC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DSB_FILL.OTHER_CANCEL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cases of cancelling valid DSB fill not because of exceeding way limit.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xAC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "DSB_FILL.EXCEED_DSB_LINES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "IDQ.MS_DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EdgeDetect": "1",
+ "EventName": "IDQ.MS_DSB_OCCUR",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while Microcode Sequenser (MS) is busy.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x9C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_GE_1_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when 1 or more uops were delivered to the by the front end.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering 4 Uops.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering any Uop.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xAC",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa",
+ "EventName": "DSB_FILL.ALL_CANCEL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cases of cancelling valid Decode Stream Buffer (DSB) fill not because of exceeding way limit.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x9C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3c",
+ "EventName": "IDQ.MITE_ALL_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EdgeDetect": "1",
+ "EventName": "IDQ.MS_SWITCHES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/memory.json b/tools/perf/pmu-events/arch/x86/sandybridge/memory.json
new file mode 100644
index 000000000..e6dfa89d0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/memory.json
@@ -0,0 +1,445 @@
+[
+ {
+ "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from memory disambiguation, external snoops, or cross SMT-HW-thread snoop (stores) hitting load buffers. Machine clears can have a significant performance impact if they are happening frequently.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Loads with latency value being above 4 .",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Loads with latency value being above 8.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Loads with latency value being above 16.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Loads with latency value being above 32.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2003",
+ "BriefDescription": "Loads with latency value being above 64.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "1009",
+ "BriefDescription": "Loads with latency value being above 128.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "503",
+ "BriefDescription": "Loads with latency value being above 256.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "101",
+ "BriefDescription": "Loads with latency value being above 512.",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xCD",
+ "Counter": "3",
+ "UMask": "0x2",
+ "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only. (Precise Event - PEBS).",
+ "PRECISE_STORE": "1",
+ "TakenAlone": "1",
+ "CounterHTOff": "3"
+ },
+ {
+ "EventCode": "0xBE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "PAGE_WALKS.LLC_MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of any page walk that had a miss in LLC. Does not necessary cause a SUSPEND.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MISALIGN_MEM_REF.LOADS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Speculative cache line split load uops dispatched to L1 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x05",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MISALIGN_MEM_REF.STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Speculative cache line split STA uops dispatched to L1 cache.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400091",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400240",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_CODE_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch code reads that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400090",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch data reads that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400120",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch RFOs that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3004003f7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand code reads that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data writes (RFOs) that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x300400100",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the LLC and the data returned from dram.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts all data requests (demand/prefetch data reads and demand data writes (RFOs) that miss the LLC where the data is returned from local DRAM",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6004001b3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN_SOCKET.LLC_MISS.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts LLC replacements.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "This event counts any requests that miss the LLC where the data was returned from local DRAM",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1f80408fff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_MISS_LOCAL.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": " REQUEST = ANY_REQUEST and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x17004001b3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN_SOCKET.LLC_MISS_LOCAL.ANY_LLC_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": " REQUEST = DATA_IN_SOCKET and RESPONSE = LLC_MISS_LOCAL and SNOOP = ANY_LLC_HIT",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1f80400004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_MISS_LOCAL.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": " REQUEST = DEMAND_IFETCH and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1f80400010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_MISS_LOCAL.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": " REQUEST = PF_DATA_RD and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1f80400040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_MISS_LOCAL.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": " REQUEST = PF_RFO and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1f80400080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L_DATA_RD.LLC_MISS_LOCAL.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": " REQUEST = PF_LLC_DATA_RD and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1f80400200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Offcore": "1",
+ "EventName": "OFFCORE_RESPONSE.PF_L_IFETCH.LLC_MISS_LOCAL.DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": " REQUEST = PF_LLC_IFETCH and RESPONSE = LLC_MISS_LOCAL and SNOOP = DRAM",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/other.json b/tools/perf/pmu-events/arch/x86/sandybridge/other.json
new file mode 100644
index 000000000..64b195b82
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/other.json
@@ -0,0 +1,58 @@
+[
+ {
+ "EventCode": "0x17",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INSTS_WRITTEN_TO_IQ.INSTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Valid instructions written to IQ per cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPL_CYCLES.RING0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Unhalted core cycles when the thread is in ring 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "CPL_CYCLES.RING0_TRANS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Number of intervals between processor halts while thread is in ring 0.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPL_CYCLES.RING123",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "HW_PRE_REQ.DL1_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Hardware Prefetch requests that miss the L1D cache. This accounts for both L1 streamer and IP-based (IPP) HW prefetchers. A request is being counted each time it access the cache & miss it, including if a block is applicable or if hit the Fill Buffer for .",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when L1 and L2 are locked due to UC or split lock.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/pipeline.json b/tools/perf/pmu-events/arch/x86/sandybridge/pipeline.json
new file mode 100644
index 000000000..8a597e45e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/pipeline.json
@@ -0,0 +1,1220 @@
+[
+ {
+ "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. ",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired from execution.",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when the thread is not in halt state.",
+ "CounterHTOff": "Fixed counter 2"
+ },
+ {
+ "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. ",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 3",
+ "UMask": "0x3",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "CounterHTOff": "Fixed counter 3"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "BR_INST_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not taken macro-conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "BR_INST_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired macro-conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_JUMP",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired macro-conditional branch instructions excluding calls and indirects.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect branches excluding calls and returns.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect branches with return mnemonic.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x90",
+ "EventName": "BR_INST_EXEC.TAKEN_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired direct near calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa0",
+ "EventName": "BR_INST_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired indirect calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc1",
+ "EventName": "BR_INST_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired macro-conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc2",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_JMP",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired macro-unconditional branches excluding calls and indirects.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc4",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired indirect branches excluding calls and returns.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc8",
+ "EventName": "BR_INST_EXEC.ALL_INDIRECT_NEAR_RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired indirect return branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xd0",
+ "EventName": "BR_INST_EXEC.ALL_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired direct near calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "BR_MISP_EXEC.NONTAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Not taken speculative and retired mispredicted macro conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "BR_MISP_EXEC.TAKEN_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted macro conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x84",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches excluding calls and returns.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x88",
+ "EventName": "BR_MISP_EXEC.TAKEN_RETURN_NEAR",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x90",
+ "EventName": "BR_MISP_EXEC.TAKEN_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted direct near calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa0",
+ "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Taken speculative and retired mispredicted indirect calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc1",
+ "EventName": "BR_MISP_EXEC.ALL_CONDITIONAL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc4",
+ "EventName": "BR_MISP_EXEC.ALL_INDIRECT_JUMP_NON_CALL_RET",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Mispredicted indirect branches excluding calls and returns.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xd0",
+ "EventName": "BR_MISP_EXEC.ALL_DIRECT_NEAR_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted direct near calls.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Thread cycles when thread is not in halt state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ILD_STALL.LCP",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ILD_STALL.IQ_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Stall cycles because IQ is full.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "INT_MISC.RAT_STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x59",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Increments the number of flags-merge uops in flight each cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles with at least one slow LEA uop being allocated. A uop is generally considered as slow LEA if it has three sources (for example, two sources and immediate) regardless of whether it is a result of LEA instruction or not. Examples of the slow LEA uop are or uops with base, index, and offset source operands using base and index reqisters, where base is EBR/RBP/R13, using RIP relative or 16-bit addressing modes. See the Intel? 64 and IA-32 Architectures Optimization Reference Manual for more details about slow LEA instructions.",
+ "EventCode": "0x59",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with at least one slow LEA uop being allocated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x59",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "PARTIAL_RAT_STALLS.MUL_SINGLE_UOP",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Multiply packed/scalar single precision uops allocated.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource-related stall cycles.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RESOURCE_STALLS.LB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the cycles of stall due to lack of load buffers.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RESOURCE_STALLS.RS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to no eligible RS entry available.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RESOURCE_STALLS.SB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RESOURCE_STALLS.ROB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to re-order buffer full.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5B",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "RESOURCE_STALLS2.BOB_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Allocator is stalled if BOB is full and new branch needs it.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of Uops issued by the front-end of the pipeilne to the back-end.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x0E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x5E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count cases of saving new LBR.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear. Machine clears can have a significant performance impact if they are happening frequently.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MACHINE_CLEARS.MASKMOV",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of micro-ops retired.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Actually retired uops.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of retirement slots used each cycle. There are potentially 4 slots that can be used each cycle - meaning, 4 micro-ops or 4 instructions could retire each cycle. This event is used in determining the 'Retiring' category of the Top-Down pipeline slots characterization.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retirement slots used.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "CounterMask": "10",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Conditional branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Direct and indirect near call instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Return instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Not taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Far branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS).",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Direct and indirect mispredicted near call instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All mispredicted macro branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_MISP_RETIRED.NOT_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted not taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_MISP_RETIRED.TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS).",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OTHER_ASSISTS.ITLB_MISS_RETIRED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired instructions experiencing ITLB misses.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.FPU_DIV_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when divider is busy executing divide operations.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of the divide operations executed.",
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "ARITH.FPU_DIV",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Divide operations executed.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DISPATCHED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops dispatched per thread.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DISPATCHED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops dispatched from any thread.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 1.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 4.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are dispatched to port 5.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_NO_DISPATCH",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Each cycle there was no dispatch for this thread, increment by 1. Note this is connect to Umask 2. No dispatch can be deduced from the UOPS_EXECUTED event.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0x2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Each cycle there was a miss-pending demand load this thread, increment by 1. Note this is in DCU and connected to Umask 1. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.",
+ "CounterMask": "2",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Each cycle there was a MLC-miss pending demand load this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "2",
+ "UMask": "0x6",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Each cycle there was a miss-pending demand load this thread and no uops dispatched, increment by 1. Note this is in DCU and connected to Umask 1 and 2. Miss Pending demand load should be deduced by OR-ing increment bits of DCACHE_MISS_PEND.PENDING.",
+ "CounterMask": "6",
+ "CounterHTOff": "2"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Each cycle there was a MLC-miss pending demand load and no uops dispatched on this thread (i.e. Non-completed valid SQ entry allocated for demand load and waiting for Uncore), increment by 1. Note this is in MLC and connected to Umask 0 and 2.",
+ "CounterMask": "5",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0x4C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOAD_HIT_PRE.SW_PF",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for software prefetch.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_HIT_PRE.HW_PF",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Not software-prefetch load dispatches that hit FB allocated for hardware prefetch.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Loads delayed due to SB blocks, preceding store operations with known addresses but unknown data.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceeding smaller uncompleted store. See the table of not supported store forwards in the Intel? 64 and IA-32 Architectures Optimization Reference Manual. The penalty for blocked store forwarding is that the load must wait for the store to complete before it can be issued.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cases when loads get true Block-on-Store blocking code preventing store forwarding.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "LD_BLOCKS.ALL_BLOCK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of cases where any load ends up with a valid block-code written to the load buffer (including blocks due to Memory Order Buffer (MOB), Data Cache Unit (DCU), TLB, but load has no DCU miss).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Aliasing occurs when a load is issued after a store and their memory addresses are offset by 4K. This event counts the number of loads that aliased with a preceding store, resulting in an extended address check in the pipeline. The enhanced address check typically has a performance penalty of 5 cycles.",
+ "EventCode": "0x07",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "False dependencies in MOB due to partial compare.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x07",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "LD_BLOCKS_PARTIAL.ALL_STA_BLOCK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts the number of times that load operations are temporarily blocked because of older stores, with addresses that are not yet known. A load operation may incur more than one block of this type.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "AGU_BYPASS_CANCEL.COUNT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "This event counts executed load operations with all the following traits: 1. addressing of the format [base + offset], 2. the offset is between 1 and 2047, 3. the address specified in the base register is in one page and the address [base+offset] is in an.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 0.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 1.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 4.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when uops are dispatched to port 5.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 2.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "AnyThread": "1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC0",
+ "Counter": "1",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired. (Precise Event - PEBS).",
+ "TakenAlone": "1",
+ "CounterHTOff": "1"
+ },
+ {
+ "EventCode": "0x5B",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "RESOURCE_STALLS2.ALL_PRF_CONTROL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource stalls2 control structures full for physical registers.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5B",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "RESOURCE_STALLS2.ALL_FL_EMPTY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with either free list is empty.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "EventName": "RESOURCE_STALLS.MEM_RS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource stalls due to memory buffers or Reservation Station (RS) being fully utilized.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf0",
+ "EventName": "RESOURCE_STALLS.OOO_RSRC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource stalls due to Rob being full, FCSW, MXCSR and OTHER.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5B",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "RESOURCE_STALLS2.OOO_RSRC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource stalls out of order resources full.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xa",
+ "EventName": "RESOURCE_STALLS.LB_SB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource stalls due to load or store buffers all being in use.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts the number of cycles spent executing performance-sensitive flags-merging uops. For example, shift CL (merge_arith_flags). For more details, See the Intel? 64 and IA-32 Architectures Optimization Reference Manual.",
+ "EventCode": "0x59",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Performance sensitive flags-merging uops added by Sandy Bridge u-arch.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EdgeDetect": "1",
+ "EventName": "INT_MISC.RECOVERY_STALLS_COUNT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of occurences waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...).",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "EventName": "BACLEARS.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "BR_INST_EXEC.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "BR_MISP_EXEC.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Speculative and retired mispredicted macro conditional branches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xc3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x2",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "CounterHTOff": "Fixed counter 2"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "AnyThread": "1",
+ "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
new file mode 100644
index 000000000..fd7d7c438
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
@@ -0,0 +1,140 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ( (UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 32 * ( ICACHE.HIT + ICACHE.MISSES ) / 4) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_DISPATCHED.THREAD / (( cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@ / 2) if #SMT_on else cpu@UOPS_DISPATCHED.CORE\\,cmask\\=1@)",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(( 1*( FP_COMP_OPS_EXE.SSE_SCALAR_SINGLE + FP_COMP_OPS_EXE.SSE_SCALAR_DOUBLE ) + 2* FP_COMP_OPS_EXE.SSE_PACKED_DOUBLE + 4*( FP_COMP_OPS_EXE.SSE_PACKED_SINGLE + SIMD_FP_256.PACKED_DOUBLE ) + 8* SIMD_FP_256.PACKED_SINGLE )) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/uncore.json b/tools/perf/pmu-events/arch/x86/sandybridge/uncore.json
new file mode 100644
index 000000000..42c70eed0
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/uncore.json
@@ -0,0 +1,314 @@
+[
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x01",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.MISS",
+ "BriefDescription": "A snoop misses in some processor core.",
+ "PublicDescription": "A snoop misses in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x02",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL",
+ "BriefDescription": "A snoop invalidates a non-modified line in some processor core.",
+ "PublicDescription": "A snoop invalidates a non-modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x04",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HIT",
+ "BriefDescription": "A snoop hits a non-modified line in some processor core.",
+ "PublicDescription": "A snoop hits a non-modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x08",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HITM",
+ "BriefDescription": "A snoop hits a modified line in some processor core.",
+ "PublicDescription": "A snoop hits a modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x10",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.INVAL_M",
+ "BriefDescription": "A snoop invalidates a modified line in some processor core.",
+ "PublicDescription": "A snoop invalidates a modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x20",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.EXTERNAL_FILTER",
+ "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.",
+ "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to external snoop request.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x40",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.XCORE_FILTER",
+ "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.",
+ "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to processor core memory request.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x80",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.EVICTION_FILTER",
+ "BriefDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.",
+ "PublicDescription": "Filter on cross-core snoops initiated by this Cbox due to LLC eviction.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x01",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.M",
+ "BriefDescription": "LLC lookup request that access cache and found line in M-state.",
+ "PublicDescription": "LLC lookup request that access cache and found line in M-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x02",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.E",
+ "BriefDescription": "LLC lookup request that access cache and found line in E-state.",
+ "PublicDescription": "LLC lookup request that access cache and found line in E-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x04",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.S",
+ "BriefDescription": "LLC lookup request that access cache and found line in S-state.",
+ "PublicDescription": "LLC lookup request that access cache and found line in S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x08",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.I",
+ "BriefDescription": "LLC lookup request that access cache and found line in I-state.",
+ "PublicDescription": "LLC lookup request that access cache and found line in I-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x10",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_FILTER",
+ "BriefDescription": "Filter on processor core initiated cacheable read requests.",
+ "PublicDescription": "Filter on processor core initiated cacheable read requests.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x20",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_FILTER",
+ "BriefDescription": "Filter on processor core initiated cacheable write requests.",
+ "PublicDescription": "Filter on processor core initiated cacheable write requests.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x40",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.EXTSNP_FILTER",
+ "BriefDescription": "Filter on external snoop requests.",
+ "PublicDescription": "Filter on external snoop requests.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x80",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_REQUEST_FILTER",
+ "BriefDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.",
+ "PublicDescription": "Filter on any IRQ or IPQ initiated requests including uncacheable, non-coherent requests.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x80",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+ "BriefDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+ "PublicDescription": "Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+ "Counter": "0",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x81",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+ "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+ "PublicDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x81",
+ "UMask": "0x20",
+ "EventName": "UNC_ARB_TRK_REQUESTS.WRITES",
+ "BriefDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.",
+ "PublicDescription": "Counts the number of allocated write entries, include full, partial, and LLC evictions.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x81",
+ "UMask": "0x80",
+ "EventName": "UNC_ARB_TRK_REQUESTS.EVICTIONS",
+ "BriefDescription": "Counts the number of LLC evictions allocated.",
+ "PublicDescription": "Counts the number of LLC evictions allocated.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x83",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_COH_TRK_OCCUPANCY.ALL",
+ "BriefDescription": "Cycles weighted by number of requests pending in Coherency Tracker.",
+ "PublicDescription": "Cycles weighted by number of requests pending in Coherency Tracker.",
+ "Counter": "0",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x84",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+ "BriefDescription": "Number of requests allocated in Coherency Tracker.",
+ "PublicDescription": "Number of requests allocated in Coherency Tracker.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x80",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
+ "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+ "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+ "Counter": "0,1",
+ "CounterMask": "1",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x80",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_OVER_HALF_FULL",
+ "BriefDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+ "PublicDescription": "Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+ "Counter": "0,1",
+ "CounterMask": "10",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "ARB",
+ "EventCode": "0x0",
+ "UMask": "0x01",
+ "EventName": "UNC_CLOCK.SOCKET",
+ "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+ "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+ "Counter": "Fixed",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x06",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ES",
+ "BriefDescription": "LLC lookup request that access cache and found line in E-state or S-state.",
+ "PublicDescription": "LLC lookup request that access cache and found line in E-state or S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/virtual-memory.json b/tools/perf/pmu-events/arch/x86/sandybridge/virtual-memory.json
new file mode 100644
index 000000000..a654ab771
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/virtual-memory.json
@@ -0,0 +1,149 @@
+[
+ {
+ "EventCode": "0xAE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB.ITLB_FLUSH",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4F",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "EPT.WALK_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycle count for an Extended Page table walk. The Extended Page Directory cache is used by Virtual Machine operating systems while the guest operating systems use the standard TLB caches.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Misses at all ITLB levels that cause page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Misses in all ITLB levels that cause completed page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event count cycles when Page Miss Handler (PMH) is servicing page walks caused by ITLB misses.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ITLB_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when PMH is busy with page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Load misses in all DTLB levels that cause page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Load misses at all DTLB levels that cause completed page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by DTLB load misses.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_LOAD_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when PMH is busy with page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This event counts load operations that miss the first DTLB level but hit the second and do not cause any page walks. The penalty in this case is approximately 7 cycles.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_STORE_MISSES.WALK_DURATION",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when PMH is busy with page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "TLB_FLUSH.STLB_ANY",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "STLB flush attempts.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/silvermont/cache.json b/tools/perf/pmu-events/arch/x86/silvermont/cache.json
new file mode 100644
index 000000000..82be7d1b8
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/silvermont/cache.json
@@ -0,0 +1,812 @@
+[
+ {
+ "PublicDescription": "This event counts the number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the IDI link. The XQ may reject transactions from the L2Q (non-cacheable requests), BBS (L2 misses) and WOB (L2 write-back victims).",
+ "EventCode": "0x30",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "L2_REJECT_XQ.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of request from the L2 that were not accepted into the XQ"
+ },
+ {
+ "PublicDescription": "Counts the number of (demand and L1 prefetchers) core requests rejected by the L2Q due to a full or nearly full w condition which likely indicates back pressure from L2Q. It also counts requests that would have gone directly to the XQ, but are rejected due to a full or nearly full condition, indicating back pressure from the IDI link. The L2Q may also reject transactions from a core to insure fairness between cores, or to delay a core?s dirty eviction when the address conflicts incoming external snoops. (Note that L2 prefetcher requests that are dropped are not counted by this event.)",
+ "EventCode": "0x31",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "CORE_REJECT_L2Q.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of request that were not accepted into the L2Q because the L2Q is FULL."
+ },
+ {
+ "PublicDescription": "This event counts requests originating from the core that references a cache line in the L2 cache.",
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x4f",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache requests from this core"
+ },
+ {
+ "PublicDescription": "This event counts the total number of L2 cache references and the number of L2 cache misses respectively.",
+ "EventCode": "0x2E",
+ "Counter": "0,1",
+ "UMask": "0x41",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache request misses"
+ },
+ {
+ "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss. Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.\r\nCounts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes. This will include cycles due to an ITLB miss, ICache miss and other events. \r\n",
+ "EventCode": "0x86",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles code-fetch stalled due to an outstanding ICache miss."
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of retired loads that were prohibited from receiving forwarded data from the store because of address mismatch.",
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "REHABQ.LD_BLOCK_ST_FORWARD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads blocked due to store forward restriction"
+ },
+ {
+ "PublicDescription": "This event counts the cases where a forward was technically possible, but did not occur because the store data was not available at the right time.",
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "REHABQ.LD_BLOCK_STD_NOTREADY",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads blocked due to store data not ready"
+ },
+ {
+ "PublicDescription": "This event counts the number of retire stores that experienced cache line boundary splits.",
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "REHABQ.ST_SPLITS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Store uops that split cache line boundary"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of retire loads that experienced cache line boundary splits.",
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "REHABQ.LD_SPLITS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Load uops that split cache line boundary"
+ },
+ {
+ "PublicDescription": "This event counts the number of retired memory operations with lock semantics. These are either implicit locked instructions such as the XCHG instruction or instructions with an explicit LOCK prefix (0xF0).",
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "REHABQ.LOCK",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Uops with lock semantics"
+ },
+ {
+ "PublicDescription": "This event counts the number of retired stores that are delayed because there is not a store address buffer available.",
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "REHABQ.STA_FULL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Store address buffer full"
+ },
+ {
+ "PublicDescription": "This event counts the number of load uops reissued from Rehabq.",
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "REHABQ.ANY_LD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Any reissued load uops"
+ },
+ {
+ "PublicDescription": "This event counts the number of store uops reissued from Rehabq.",
+ "EventCode": "0x03",
+ "Counter": "0,1",
+ "UMask": "0x80",
+ "EventName": "REHABQ.ANY_ST",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Any reissued store uops"
+ },
+ {
+ "PublicDescription": "This event counts the number of load ops retired that miss in L1 Data cache. Note that prefetch misses will not be counted.",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "MEM_UOPS_RETIRED.L1_MISS_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads missed L1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of load ops retired that hit in the L2.",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "MEM_UOPS_RETIRED.L2_HIT_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads hit L2"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of load ops retired that miss in the L2.",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "MEM_UOPS_RETIRED.L2_MISS_LOADS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Loads missed L2"
+ },
+ {
+ "PublicDescription": "This event counts the number of load ops retired that had UTLB miss.",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "MEM_UOPS_RETIRED.UTLB_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads missed UTLB"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of load ops retired that got data from the other core or from the other module.",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "MEM_UOPS_RETIRED.HITM",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cross core or cross module hitm"
+ },
+ {
+ "PublicDescription": "This event counts the number of load ops retired.",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x40",
+ "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "All Loads"
+ },
+ {
+ "PublicDescription": "This event counts the number of store ops retired.",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x80",
+ "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "All Stores"
+ },
+ {
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680000044",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any code reads (demand & prefetch) that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000044",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any code reads (demand & prefetch) that hit in the other module where modified copies were found in other core's L1 cache.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000044",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any code reads (demand & prefetch) that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000044",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.L2_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any code reads (demand & prefetch) that miss L2 with a snoop miss response.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010044",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any code reads (demand & prefetch) that have any response type.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680000022",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any rfo reads (demand & prefetch) that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000022",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any rfo reads (demand & prefetch) that hit in the other module where modified copies were found in other core's L1 cache.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000022",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any rfo reads (demand & prefetch) that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000022",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any rfo reads (demand & prefetch) that miss L2 with a snoop miss response.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010022",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any rfo reads (demand & prefetch) that have any response type.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680003091",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data read (demand & prefetch) that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000003091",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data read (demand & prefetch) that hit in the other module where modified copies were found in other core's L1 cache.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400003091",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data read (demand & prefetch) that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200003091",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data read (demand & prefetch) that miss L2 with a snoop miss response.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000013091",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any data read (demand & prefetch) that have any response type.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680004800",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts streaming store that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000008008",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that hit in the other module where modified copies were found in other core's L1 cache.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400008008",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200008008",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that miss L2 with a snoop miss response.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000018008",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts any request that have any response type.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680002000",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts DCU hardware prefetcher data read that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000002000",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts DCU hardware prefetcher data read that hit in the other module where modified copies were found in other core's L1 cache.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400002000",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts DCU hardware prefetcher data read that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200002000",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts DCU hardware prefetcher data read that miss L2 with a snoop miss response.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000012000",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts DCU hardware prefetcher data read that have any response type.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680000100",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Countsof demand RFO requests to write to partial cache lines that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680000080",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand reads of partial cache lines (including UC and WC) that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680000040",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts code reads generated by L2 prefetchers that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000040",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts code reads generated by L2 prefetchers that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000040",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.L2_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts code reads generated by L2 prefetchers that miss L2 with a snoop miss response.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680000020",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts RFO requests generated by L2 prefetchers that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000020",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts RFO requests generated by L2 prefetchers that hit in the other module where modified copies were found in other core's L1 cache.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000020",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts RFO requests generated by L2 prefetchers that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000020",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts RFO requests generated by L2 prefetchers that miss L2 with a snoop miss response.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680000010",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by L2 prefetchers that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000010",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by L2 prefetchers that hit in the other module where modified copies were found in other core's L1 cache.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000010",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by L2 prefetchers that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000010",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts data cacheline reads generated by L2 prefetchers that miss L2 with a snoop miss response.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680000008",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts writeback (modified to exclusive) that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0080000008",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts writeback (modified to exclusive) that miss L2 with no details on snoop-related information.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000004",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch instruction cacheline that are are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680000004",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch instruction cacheline that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000004",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch instruction cacheline that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000004",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch instruction cacheline that miss L2 with a snoop miss response.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010004",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch instruction cacheline that have any response type.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000002",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch RFOs that are are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680000002",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch RFOs that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000002",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch RFOs that hit in the other module where modified copies were found in other core's L1 cache.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000002",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch RFOs that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000002",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch RFOs that miss L2 with a snoop miss response.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4000000001",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OUTSTANDING",
+ "MSRIndex": "0x1a6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch data read that are are outstanding, per cycle, from the time of the L2 miss to when any response is received.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1680000001",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.ANY",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch data read that miss L2.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1000000001",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch data read that hit in the other module where modified copies were found in other core's L1 cache.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0400000001",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch data read that miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0200000001",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch data read that miss L2 with a snoop miss response.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x0000010001",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts demand and DCU prefetch data read that have any response type.",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/silvermont/frontend.json b/tools/perf/pmu-events/arch/x86/silvermont/frontend.json
new file mode 100644
index 000000000..204473bad
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/silvermont/frontend.json
@@ -0,0 +1,47 @@
+[
+ {
+ "PublicDescription": "This event counts all instruction fetches, not including most uncacheable\r\nfetches.",
+ "EventCode": "0x80",
+ "Counter": "0,1",
+ "UMask": "0x3",
+ "EventName": "ICACHE.ACCESSES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Instruction fetches"
+ },
+ {
+ "PublicDescription": "This event counts all instruction fetches from the instruction cache.",
+ "EventCode": "0x80",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "ICACHE.HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Instruction fetches from Icache"
+ },
+ {
+ "PublicDescription": "This event counts all instruction fetches that miss the Instruction cache or produce memory requests. This includes uncacheable fetches. An instruction fetch miss is counted only once and not once for every cycle it is outstanding.",
+ "EventCode": "0x80",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "ICACHE.MISSES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Icache miss"
+ },
+ {
+ "PublicDescription": "Counts the number of times the MSROM starts a flow of UOPS. It does not count every time a UOP is read from the microcode ROM. The most common case that this counts is when a micro-coded instruction is encountered by the front end of the machine. Other cases include when an instruction encounters a fault, trap, or microcode assist of any sort. The event will count MSROM startups for UOPS that are speculative, and subsequently cleared by branch mispredict or machine clear. Background: UOPS are produced by two mechanisms. Either they are generated by hardware that decodes instructions into UOPS, or they are delivered by a ROM (called the MSROM) that holds UOPS associated with a specific instruction. MSROM UOPS might also be delivered in response to some condition such as a fault or other exceptional condition. This event is an excellent mechanism for detecting instructions that require the use of MSROM instructions.",
+ "EventCode": "0xE7",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "MS_DECODED.MS_ENTRY",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of times entered into a ucode flow in the FEC. Includes inserted flows due to front-end detected faults or assists. Speculative count."
+ },
+ {
+ "PublicDescription": "Counts the number of times a decode restriction reduced the decode throughput due to wrong instruction length prediction.",
+ "EventCode": "0xE9",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "DECODE_RESTRICTION.PREDECODE_WRONG",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of times a decode restriction reduced the decode throughput due to wrong instruction length prediction"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/silvermont/memory.json b/tools/perf/pmu-events/arch/x86/silvermont/memory.json
new file mode 100644
index 000000000..d72e09a5f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/silvermont/memory.json
@@ -0,0 +1,11 @@
+[
+ {
+ "PublicDescription": "This event counts the number of times that pipeline was cleared due to memory ordering issues.",
+ "EventCode": "0xC3",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Stalls due to Memory ordering"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/silvermont/pipeline.json b/tools/perf/pmu-events/arch/x86/silvermont/pipeline.json
new file mode 100644
index 000000000..7468af991
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/silvermont/pipeline.json
@@ -0,0 +1,359 @@
+[
+ {
+ "PEBS": "1",
+ "PublicDescription": "ALL_BRANCHES counts the number of any branch instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of branch instructions retired..."
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "JCC counts the number of conditional branch (JCC) instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0x7e",
+ "EventName": "BR_INST_RETIRED.JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of JCC branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "TAKEN_JCC counts the number of taken conditional branch (JCC) instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xfe",
+ "EventName": "BR_INST_RETIRED.TAKEN_JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of taken JCC branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "CALL counts the number of near CALL branch instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xf9",
+ "EventName": "BR_INST_RETIRED.CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of near CALL branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "REL_CALL counts the number of near relative CALL branch instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xfd",
+ "EventName": "BR_INST_RETIRED.REL_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of near relative CALL branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "IND_CALL counts the number of near indirect CALL branch instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xfb",
+ "EventName": "BR_INST_RETIRED.IND_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of near indirect CALL branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "RETURN counts the number of near RET branch instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xf7",
+ "EventName": "BR_INST_RETIRED.RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of near RET branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "NON_RETURN_IND counts the number of near indirect JMP and near indirect CALL branch instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xeb",
+ "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "FAR counts the number of far branch instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0xbf",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of far branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "ALL_BRANCHES counts the number of any mispredicted branch instructions retired. This umask is an architecturally defined event. This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa. When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "JCC counts the number of mispredicted conditional branches (JCC) instructions retired. This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa. When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0x7e",
+ "EventName": "BR_MISP_RETIRED.JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted JCC branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "TAKEN_JCC counts the number of mispredicted taken conditional branch (JCC) instructions retired. This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa. When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0xfe",
+ "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted taken JCC branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "IND_CALL counts the number of mispredicted near indirect CALL branch instructions retired. This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa. When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0xfb",
+ "EventName": "BR_MISP_RETIRED.IND_CALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "RETURN counts the number of mispredicted near RET branch instructions retired. This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa. When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0xf7",
+ "EventName": "BR_MISP_RETIRED.RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "NON_RETURN_IND counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired. This event counts the number of retired branch instructions that were mispredicted by the processor, categorized by type. A branch misprediction occurs when the processor predicts that the branch would be taken, but it is not, or vice-versa. When the misprediction is discovered, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "EventCode": "0xC5",
+ "Counter": "0,1",
+ "UMask": "0xeb",
+ "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired"
+ },
+ {
+ "PublicDescription": "This event counts the number of micro-ops retired that were supplied from MSROM.",
+ "EventCode": "0xC2",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.MS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "MSROM micro-ops retired"
+ },
+ {
+ "PublicDescription": "This event counts the number of micro-ops retired. The processor decodes complex macro instructions into a sequence of simpler micro-ops. Most instructions are composed of one or two micro-ops. Some instructions are decoded into longer sequences such as repeat instructions, floating point transcendental instructions, and assists. In some cases micro-op sequences are fused or whole instructions are fused into one micro-op. See other UOPS_RETIRED events for differentiating retired fused and non-fused micro-ops.",
+ "EventCode": "0xC2",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "UOPS_RETIRED.ALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Micro-ops retired"
+ },
+ {
+ "PublicDescription": "This event counts the number of times that a program writes to a code section. Self-modifying code causes a severe penalty in all Intel? architecture processors.",
+ "EventCode": "0xC3",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Self-Modifying Code detected"
+ },
+ {
+ "PublicDescription": "This event counts the number of times that pipeline stalled due to FP operations needing assists.",
+ "EventCode": "0xC3",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.FP_ASSIST",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Stalls due to FP assists"
+ },
+ {
+ "PublicDescription": "Machine clears happen when something happens in the machine that causes the hardware to need to take special care to get the right answer. When such a condition is signaled on an instruction, the front end of the machine is notified that it must restart, so no more instructions will be decoded from the current path. All instructions \"older\" than this one will be allowed to finish. This instruction and all \"younger\" instructions must be cleared, since they must not be allowed to complete. Essentially, the hardware waits until the problematic instruction is the oldest instruction in the machine. This means all older instructions are retired, and all pending stores (from older instructions) are completed. Then the new path of instructions from the front end are allowed to start into the machine. There are many conditions that might cause a machine clear (including the receipt of an interrupt, or a trap or a fault). All those conditions (including but not limited to MACHINE_CLEARS.MEMORY_ORDERING, MACHINE_CLEARS.SMC, and MACHINE_CLEARS.FP_ASSIST) are captured in the ANY event. In addition, some conditions can be specifically counted (i.e. SMC, MEMORY_ORDERING, FP_ASSIST). However, the sum of SMC, MEMORY_ORDERING, and FP_ASSIST machine clears will not necessarily equal the number of ANY.",
+ "EventCode": "0xC3",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "MACHINE_CLEARS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts all machine clears"
+ },
+ {
+ "PublicDescription": "Counts the number of cycles when no uops are allocated and the ROB is full (less than 2 entries available).",
+ "EventCode": "0xCA",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "NO_ALLOC_CYCLES.ROB_FULL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of cycles when no uops are allocated and the ROB is full (less than 2 entries available)"
+ },
+ {
+ "PublicDescription": "Counts the number of cycles when no uops are allocated and the alloc pipe is stalled waiting for a mispredicted jump to retire. After the misprediction is detected, the front end will start immediately but the allocate pipe stalls until the mispredicted.",
+ "EventCode": "0xCA",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "NO_ALLOC_CYCLES.MISPREDICTS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of cycles when no uops are allocated and the alloc pipe is stalled waiting for a mispredicted jump to retire. After the misprediction is detected, the front end will start immediately but the allocate pipe stalls until the mispredicted "
+ },
+ {
+ "EventCode": "0xCA",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "NO_ALLOC_CYCLES.RAT_STALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of cycles when no uops are allocated and a RATstall is asserted."
+ },
+ {
+ "PublicDescription": "The NO_ALLOC_CYCLES.NOT_DELIVERED event is used to measure front-end inefficiencies, i.e. when front-end of the machine is not delivering micro-ops to the back-end and the back-end is not stalled. This event can be used to identify if the machine is truly front-end bound. When this event occurs, it is an indication that the front-end of the machine is operating at less than its theoretical peak performance. Background: We can think of the processor pipeline as being divided into 2 broader parts: Front-end and Back-end. Front-end is responsible for fetching the instruction, decoding into micro-ops (uops) in machine understandable format and putting them into a micro-op queue to be consumed by back end. The back-end then takes these micro-ops, allocates the required resources. When all resources are ready, micro-ops are executed. If the back-end is not ready to accept micro-ops from the front-end, then we do not want to count these as front-end bottlenecks. However, whenever we have bottlenecks in the back-end, we will have allocation unit stalls and eventually forcing the front-end to wait until the back-end is ready to receive more UOPS. This event counts the cycles only when back-end is requesting more uops and front-end is not able to provide them. Some examples of conditions that cause front-end efficiencies are: Icache misses, ITLB misses, and decoder restrictions that limit the the front-end bandwidth.",
+ "EventCode": "0xCA",
+ "Counter": "0,1",
+ "UMask": "0x50",
+ "EventName": "NO_ALLOC_CYCLES.NOT_DELIVERED",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of cycles when no uops are allocated, the IQ is empty, and no other condition is blocking allocation."
+ },
+ {
+ "PublicDescription": "The NO_ALLOC_CYCLES.ALL event counts the number of cycles when the front-end does not provide any instructions to be allocated for any reason. This event indicates the cycles where an allocation stalls occurs, and no UOPS are allocated in that cycle.",
+ "EventCode": "0xCA",
+ "Counter": "0,1",
+ "UMask": "0x3f",
+ "EventName": "NO_ALLOC_CYCLES.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of cycles when no uops are allocated for any reason."
+ },
+ {
+ "PublicDescription": "Counts the number of cycles and allocation pipeline is stalled and is waiting for a free MEC reservation station entry. The cycles should be appropriately counted in case of the cracked ops e.g. In case of a cracked load-op, the load portion is sent to M.",
+ "EventCode": "0xCB",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "RS_FULL_STALL.MEC",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of cycles and allocation pipeline is stalled and is waiting for a free MEC reservation station entry. The cycles should be appropriately counted in case of the cracked ops e.g. In case of a cracked load-op, the load portion is sent to M"
+ },
+ {
+ "EventCode": "0xCB",
+ "Counter": "0,1",
+ "UMask": "0x1f",
+ "EventName": "RS_FULL_STALL.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of cycles the Alloc pipeline is stalled when any one of the RSs (IEC, FPC and MEC) is full. This event is a superset of all the individual RS stall event counts."
+ },
+ {
+ "PublicDescription": "This event counts the number of instructions that retire execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers.",
+ "EventCode": "0xC0",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired"
+ },
+ {
+ "PublicDescription": "Cycles the divider is busy.This event counts the cycles when the divide unit is unable to accept a new divide UOP because it is busy processing a previously dispatched UOP. The cycles will be counted irrespective of whether or not another divide UOP is waiting to enter the divide unit (from the RS). This event might count cycles while a divide is in progress even if the RS is empty. The divide instruction is one of the longest latency instructions in the machine. Hence, it has a special event associated with it to help determine if divides are delaying the retirement of instructions.",
+ "EventCode": "0xCD",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "CYCLES_DIV_BUSY.ALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles the divider is busy. Does not imply a stall waiting for the divider."
+ },
+ {
+ "PublicDescription": "This event counts the number of instructions that retire. For instructions that consist of multiple micro-ops, this event counts exactly once, as the last micro-op of the instruction retires. The event continues counting while instructions retire, including during interrupt service routines caused by hardware interrupts, faults or traps. Background: Modern microprocessors employ extensive pipelining and speculative techniques. Since sometimes an instruction is started but never completed, the notion of \"retirement\" is introduced. A retired instruction is one that commits its states. Or stated differently, an instruction might be abandoned at some point. No instruction is truly finished until it retires. This counter measures the number of completed instructions. The fixed event is INST_RETIRED.ANY and the programmable event is INST_RETIRED.ANY_P.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Fixed Counter: Counts the number of instructions retired"
+ },
+ {
+ "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. In systems with a constant core frequency, this event can give you a measurement of the elapsed time while the core was not in halt state by dividing the event count by the core frequency. This event is architecturally defined and is a designated fixed counter. CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.CORE_P use the core frequency which may change from time to time. CPU_CLK_UNHALTE.REF_TSC and CPU_CLK_UNHALTED.REF are not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. The fixed events are CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.REF_TSC and the programmable events are CPU_CLK_UNHALTED.CORE_P and CPU_CLK_UNHALTED.REF.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Fixed Counter: Counts the number of unhalted core clock cycles"
+ },
+ {
+ "PublicDescription": "Counts the number of reference cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. Divide this event count by core frequency to determine the elapsed time while the core was not in halt state. Divide this event count by core frequency to determine the elapsed time while the core was not in halt state. This event is architecturally defined and is a designated fixed counter. CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.CORE_P use the core frequency which may change from time to time. CPU_CLK_UNHALTE.REF_TSC and CPU_CLK_UNHALTED.REF are not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. The fixed events are CPU_CLK_UNHALTED.CORE and CPU_CLK_UNHALTED.REF_TSC and the programmable events are CPU_CLK_UNHALTED.CORE_P and CPU_CLK_UNHALTED.REF.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 3",
+ "UMask": "0x3",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Fixed Counter: Counts the number of unhalted reference clock cycles"
+ },
+ {
+ "PublicDescription": "This event counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time.",
+ "EventCode": "0x3C",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.CORE_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when core is not halted"
+ },
+ {
+ "PublicDescription": "This event counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time.",
+ "EventCode": "0x3C",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when core is not halted"
+ },
+ {
+ "PublicDescription": "The BACLEARS event counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end. The BACLEARS.ANY event counts the number of baclears for any type of branch.",
+ "EventCode": "0xE6",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "BACLEARS.ALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of baclears"
+ },
+ {
+ "PublicDescription": "The BACLEARS event counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end. The BACLEARS.RETURN event counts the number of RETURN baclears.",
+ "EventCode": "0xE6",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "BACLEARS.RETURN",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of RETURN baclears"
+ },
+ {
+ "PublicDescription": "The BACLEARS event counts the number of times the front end is resteered, mainly when the Branch Prediction Unit cannot provide a correct prediction and this is corrected by the Branch Address Calculator at the front end. The BACLEARS.COND event counts the number of JCC (Jump on Condtional Code) baclears.",
+ "EventCode": "0xE6",
+ "Counter": "0,1",
+ "UMask": "0x10",
+ "EventName": "BACLEARS.COND",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of JCC baclears"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "ALL_TAKEN_BRANCHES counts the number of all taken branch instructions retired. Branch prediction predicts the branch target and enables the processor to begin executing instructions long before the branch true execution path is known. All branches utilize the branch prediction unit (BPU) for prediction. This unit predicts the target address not only based on the EIP of the branch but also based on the execution path through which execution reached this EIP. The BPU can efficiently predict the following branch types: conditional branches, direct calls and jumps, indirect calls and jumps, returns.",
+ "EventCode": "0xC4",
+ "Counter": "0,1",
+ "UMask": "0x80",
+ "PEBScounters": "0,1",
+ "EventName": "BR_INST_RETIRED.ALL_TAKEN_BRANCHES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of taken branch instructions retired"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/silvermont/virtual-memory.json b/tools/perf/pmu-events/arch/x86/silvermont/virtual-memory.json
new file mode 100644
index 000000000..ad31479f8
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/silvermont/virtual-memory.json
@@ -0,0 +1,69 @@
+[
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts the number of load ops retired that had DTLB miss.",
+ "EventCode": "0x04",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "MEM_UOPS_RETIRED.DTLB_MISS_LOADS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Loads missed DTLB"
+ },
+ {
+ "PublicDescription": "This event counts when a data (D) page walk is completed or started. Since a page walk implies a TLB miss, the number of TLB misses can be counted by counting the number of pagewalks.",
+ "EventCode": "0x05",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "PAGE_WALKS.D_SIDE_WALKS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "D-side page-walks",
+ "EdgeDetect": "1"
+ },
+ {
+ "PublicDescription": "This event counts every cycle when a D-side (walks due to a load) page walk is in progress. Page walk duration divided by number of page walks is the average duration of page-walks.",
+ "EventCode": "0x05",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "PAGE_WALKS.D_SIDE_CYCLES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Duration of D-side page-walks in core cycles"
+ },
+ {
+ "PublicDescription": "This event counts when an instruction (I) page walk is completed or started. Since a page walk implies a TLB miss, the number of TLB misses can be counted by counting the number of pagewalks.",
+ "EventCode": "0x05",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "PAGE_WALKS.I_SIDE_WALKS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "I-side page-walks",
+ "EdgeDetect": "1"
+ },
+ {
+ "PublicDescription": "This event counts every cycle when a I-side (walks due to an instruction fetch) page walk is in progress. Page walk duration divided by number of page walks is the average duration of page-walks.",
+ "EventCode": "0x05",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "PAGE_WALKS.I_SIDE_CYCLES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Duration of I-side page-walks in core cycles"
+ },
+ {
+ "PublicDescription": "This event counts when a data (D) page walk or an instruction (I) page walk is completed or started. Since a page walk implies a TLB miss, the number of TLB misses can be counted by counting the number of pagewalks.",
+ "EventCode": "0x05",
+ "Counter": "0,1",
+ "UMask": "0x3",
+ "EventName": "PAGE_WALKS.WALKS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Total page walks that are completed (I-side and D-side)",
+ "EdgeDetect": "1"
+ },
+ {
+ "PublicDescription": "This event counts every cycle when a data (D) page walk or instruction (I) page walk is in progress. Since a pagewalk implies a TLB miss, the approximate cost of a TLB miss can be determined from this event.",
+ "EventCode": "0x05",
+ "Counter": "0,1",
+ "UMask": "0x3",
+ "EventName": "PAGE_WALKS.CYCLES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Total cycles for all the page walks. (I-side and D-side)"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/cache.json b/tools/perf/pmu-events/arch/x86/skylake/cache.json
new file mode 100644
index 000000000..54bfe9e40
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylake/cache.json
@@ -0,0 +1,939 @@
+[
+ {
+ "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read miss L2, no rejects",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x22",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that miss L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache misses when fetching instructions",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand requests that miss L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x27",
+ "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand requests that miss L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x38",
+ "EventName": "L2_RQSTS.PF_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "All requests that miss L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "EventName": "L2_RQSTS.MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "All requests that miss L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of demand Data Read requests that hit L2 cache. Only non rejected loads are counted.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x42",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x44",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xd8",
+ "EventName": "L2_RQSTS.PF_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe1",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand Data Read requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe2",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "RFO requests to L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the total number of L2 code requests.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe4",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 code requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand requests to L2 cache.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe7",
+ "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Demand requests to L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf8",
+ "EventName": "L2_RQSTS.ALL_PF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "All L2 requests.",
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "All L2 requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "Errata": "SKL057",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests missed L3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.",
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "Errata": "SKL057",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+ "EventCode": "0x48",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D miss outstandings duration in cycles",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+ "EventCode": "0x48",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
+ "EventCode": "0x48",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+ "EventCode": "0x51",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D.REPLACEMENT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "L1D data line replacements",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand Data Read requests sent to uncore",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cacheable and noncachaeble code read requests",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand and prefetch data reads",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Any memory transaction that reached the SQ.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
+ "EventCode": "0xB2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions that miss the STLB.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x11",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Retired store instructions that miss the STLB.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x12",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "L1_Hit_Indication": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x21",
+ "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired load instructions with locked access. (Precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x42",
+ "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "L1_Hit_Indication": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x81",
+ "EventName": "MEM_INST_RETIRED.ALL_LOADS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "All retired load instructions. (Precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "All retired store instructions.",
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x82",
+ "EventName": "MEM_INST_RETIRED.ALL_STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "All retired store instructions. (Precise Event)",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1",
+ "L1_Hit_Indication": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.\r\n",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions with L3 cache hits as data sources.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load instructions missed L1 cache as data sources",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Retired load instructions missed L2 cache as data sources",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions missed L3 cache as data sources.",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired load instructions missed L3 cache as data sources",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. \r\n",
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xD4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
+ "CounterHTOff": "0,1,2,3",
+ "Data_LA": "1"
+ },
+ {
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_TRANS.L2_WB",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "L2 writebacks that access L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "EventName": "L2_LINES_IN.ALL",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "L2 cache lines filling L2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_OUT.SILENT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_OUT.NON_SILENT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache.",
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_OUT.USELESS_PREF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_OUT.USELESS_HWPF",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
+ "EventCode": "0xF4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of cache line split locks sent to uncore.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fc0400001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000400001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400400001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200400001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100400001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080400001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fc01c0001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_HIT & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10001c0001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x04001c0001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x02001c0001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x01001c0001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00801c0001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fc0020001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1000020001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0400020001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0200020001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0100020001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0080020001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0000010001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts demand data reads that have any response type.",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/floating-point.json b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json
new file mode 100644
index 000000000..213dd6230
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json
@@ -0,0 +1,67 @@
+[
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+ "EventCode": "0xCA",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1e",
+ "EventName": "FP_ASSIST.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles with any input/output SSE or FP assist",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/frontend.json b/tools/perf/pmu-events/arch/x86/skylake/frontend.json
new file mode 100644
index 000000000..578dff5bd
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylake/frontend.json
@@ -0,0 +1,482 @@
+[
+ {
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "IDQ.MITE_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "IDQ.DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "IDQ.MS_DSB_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x18",
+ "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "IDQ.MS_MITE_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering 4 Uops",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x24",
+ "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles MITE is delivering any Uop",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EdgeDetect": "1",
+ "EventName": "IDQ.MS_SWITCHES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+ "EventCode": "0x79",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "IDQ.MS_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ICACHE_16B.IFDATA_STALL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x83",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ICACHE_64B.IFTAG_HIT",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x83",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ICACHE_64B.IFTAG_MISS",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x83",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ICACHE_64B.IFTAG_STALL",
+ "SampleAfterValue": "200003",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
+ "EventCode": "0x9C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x9C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+ "EventCode": "0xAB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. \r\n",
+ "EventCode": "0xC6",
+ "MSRValue": "0x11",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC6",
+ "MSRValue": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.L1I_MISS",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC6",
+ "MSRValue": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.L2_MISS",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+ "EventCode": "0xC6",
+ "MSRValue": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+ "EventCode": "0xC6",
+ "MSRValue": "0x15",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.STLB_MISS",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC6",
+ "MSRValue": "0x400206",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC6",
+ "MSRValue": "0x200206",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC6",
+ "MSRValue": "0x400406",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. \r\n",
+ "EventCode": "0xC6",
+ "MSRValue": "0x400806",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.\r\n",
+ "EventCode": "0xC6",
+ "MSRValue": "0x401006",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.\r\n",
+ "EventCode": "0xC6",
+ "MSRValue": "0x402006",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC6",
+ "MSRValue": "0x404006",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC6",
+ "MSRValue": "0x408006",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC6",
+ "MSRValue": "0x410006",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC6",
+ "MSRValue": "0x420006",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.\r\n",
+ "EventCode": "0xC6",
+ "MSRValue": "0x100206",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC6",
+ "MSRValue": "0x300206",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
+ "MSRIndex": "0x3F7",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/memory.json b/tools/perf/pmu-events/arch/x86/skylake/memory.json
new file mode 100644
index 000000000..3bd8b712c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylake/memory.json
@@ -0,0 +1,615 @@
+[
+ {
+ "PublicDescription": "Number of times a TSX line had a cache conflict.",
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TX_MEM.ABORT_CONFLICT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "TX_MEM.ABORT_CAPACITY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+ "EventCode": "0x54",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TX_EXEC.MISC1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "TX_EXEC.MISC2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "TX_EXEC.MISC3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "RTM region detected inside HLE.",
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "TX_EXEC.MISC4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+ "EventCode": "0x5d",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "TX_EXEC.MISC5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x6",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+ "CounterMask": "6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Demand Data Read requests who miss L3 cache.",
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand Data Read requests who miss L3 cache",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "SKL089",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "HLE_RETIRED.START",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution started.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times HLE commit succeeded.",
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "HLE_RETIRED.COMMIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution successfully committed",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Number of times HLE abort was triggered. (PEBS)",
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "HLE_RETIRED.ABORTED",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one). ",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "HLE_RETIRED.ABORTED_MEM",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "HLE_RETIRED.ABORTED_TIMER",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "HLE_RETIRED.ABORTED_EVENTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
+ "EventCode": "0xC9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RTM_RETIRED.START",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution started.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times RTM commit succeeded.",
+ "EventCode": "0xC9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RTM_RETIRED.COMMIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution successfully committed",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Number of times RTM abort was triggered. (PEBS)",
+ "EventCode": "0xC9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RTM_RETIRED.ABORTED",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one). ",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+ "EventCode": "0xC9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RTM_RETIRED.ABORTED_MEM",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RTM_RETIRED.ABORTED_TIMER",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+ "EventCode": "0xC9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
+ "EventCode": "0xC9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+ "EventCode": "0xC9",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "RTM_RETIRED.ABORTED_EVENTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50021",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20011",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x20",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x40",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2003",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "1009",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x100",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "503",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
+ "EventCode": "0xCD",
+ "MSRValue": "0x200",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "101",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles.",
+ "TakenAlone": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3ffc000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x103c000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x043c000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x023c000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x013c000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x00bc000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3fc4000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0404000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0204000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0104000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x0084000001 ",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE",
+ "Offcore": "1",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/other.json b/tools/perf/pmu-events/arch/x86/skylake/other.json
new file mode 100644
index 000000000..84a316d38
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylake/other.json
@@ -0,0 +1,48 @@
+[
+ {
+ "EventCode": "0x32",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SW_PREFETCH_ACCESS.NTA",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x32",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SW_PREFETCH_ACCESS.T0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x32",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x32",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of PREFETCHW instructions executed.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "HW_INTERRUPTS.RECEIVED",
+ "SampleAfterValue": "203",
+ "BriefDescription": "Number of hardware interrupts received by the processor.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json
new file mode 100644
index 000000000..bc6d2afbc
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json
@@ -0,0 +1,950 @@
+[
+ {
+ "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 0",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Instructions retired from execution.",
+ "CounterHTOff": "Fixed counter 0"
+ },
+ {
+ "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when the thread is not in halt state",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x2",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+ "EventCode": "0x00",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x3",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "CounterHTOff": "Fixed counter 2"
+ },
+ {
+ "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "EventCode": "0x03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+ "EventCode": "0x07",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+ "EventCode": "0x0E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_ISSUED.SLOW_LEA",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.DIVIDER_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Thread cycles when thread is not in halt state",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EdgeDetect": "1",
+ "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2503",
+ "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2503",
+ "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2503",
+ "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+ "SampleAfterValue": "2503",
+ "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2503",
+ "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
+ "EventCode": "0x4C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOAD_HIT_PRE.SW_PF",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+ "EventCode": "0x5E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
+ "EventCode": "0x5E",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ILD_STALL.LCP",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 0",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 5",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 6",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
+ "EventCode": "0xA1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles per thread when uops are executed in port 7",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts resource-related stall cycles. Reasons for stalls can be as follows:a. *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots).b. *any* u-arch structure got empty (like INT/SIMD FreeLists).c. FPU control word (FPCW), MXCSR.and others. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Resource-related stall cycles",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RESOURCE_STALLS.SB",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Total execution stalls.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x5",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+ "CounterMask": "5",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "CounterMask": "8",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "CounterMask": "12",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "CounterMask": "16",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x14",
+ "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "CounterMask": "20",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
+ "EventCode": "0xA6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+ "EventCode": "0xA6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+ "EventCode": "0xA6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "EventCode": "0xA6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "EventCode": "0xA6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.CYCLES_4_UOPS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of uops executed from any thread.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of uops executed on the core.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "CounterMask": "2",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "CounterMask": "3",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "CounterMask": "4",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of x87 uops executed.",
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "UOPS_EXECUTED.X87",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts the number of x87 uops dispatched.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "Errata": "SKL091, SKL044",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
+ "EventCode": "0xC0",
+ "Counter": "1",
+ "UMask": "0x1",
+ "Errata": "SKL091, SKL044",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+ "CounterHTOff": "1"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "Number of cycles using an always true condition applied to PEBS instructions retired event. (inst_ret< 16)",
+ "EventCode": "0xC0",
+ "Invert": "1",
+ "Counter": "0,2,3",
+ "UMask": "0x1",
+ "Errata": "SKL091, SKL044",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Number of cycles using always true condition applied to PEBS instructions retired event.",
+ "CounterMask": "10",
+ "CounterHTOff": "0,2,3"
+ },
+ {
+ "EventCode": "0xC1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "EventName": "OTHER_ASSISTS.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the retirement slots used.",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Retirement slots used.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "CounterMask": "10",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EdgeDetect": "1",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all (macro) branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "Errata": "SKL091",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "Errata": "SKL091",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Conditional branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "Errata": "SKL091",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Direct and indirect near call instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "Errata": "SKL091",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "Errata": "SKL091",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Return instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "Errata": "SKL091",
+ "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Not taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "Errata": "SKL091",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Taken branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "Errata": "SKL091",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Counts the number of far branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "All mispredicted macro branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PEBS": "2",
+ "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Mispredicted macro branch instructions retired.",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "PEBS": "1",
+ "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "400009",
+ "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken. ",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Increments whenever there is an update to the LBR array.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BACLEARS.ANY",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
new file mode 100644
index 000000000..71e9737f4
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -0,0 +1,164 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/skylake/uncore.json b/tools/perf/pmu-events/arch/x86/skylake/uncore.json
new file mode 100644
index 000000000..dbc193252
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylake/uncore.json
@@ -0,0 +1,254 @@
+[
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x41",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_XCORE",
+ "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+ "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x81",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION",
+ "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+ "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x44",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HIT_XCORE",
+ "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+ "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x48",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.HITM_XCORE",
+ "BriefDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+ "PublicDescription": "A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x21",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_M",
+ "BriefDescription": "L3 Lookup write request that access cache and found line in M-state",
+ "PublicDescription": "L3 Lookup write request that access cache and found line in M-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x81",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_M",
+ "BriefDescription": "L3 Lookup any request that access cache and found line in M-state",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in M-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x18",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_I",
+ "BriefDescription": "L3 Lookup read request that access cache and found line in I-state",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in I-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x88",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_I",
+ "BriefDescription": "L3 Lookup any request that access cache and found line in I-state",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in I-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x1f",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_MESI",
+ "BriefDescription": "L3 Lookup read request that access cache and found line in any MESI-state",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in any MESI-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x2f",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_MESI",
+ "BriefDescription": "L3 Lookup write request that access cache and found line in MESI-state",
+ "PublicDescription": "L3 Lookup write request that access cache and found line in MESI-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x8f",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_MESI",
+ "BriefDescription": "L3 Lookup any request that access cache and found line in MESI-state",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in MESI-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x86",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.ANY_ES",
+ "BriefDescription": "L3 Lookup any request that access cache and found line in E or S-state",
+ "PublicDescription": "L3 Lookup any request that access cache and found line in E or S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x16",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.READ_ES",
+ "BriefDescription": "L3 Lookup read request that access cache and found line in E or S-state",
+ "PublicDescription": "L3 Lookup read request that access cache and found line in E or S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x34",
+ "UMask": "0x26",
+ "EventName": "UNC_CBO_CACHE_LOOKUP.WRITE_ES",
+ "BriefDescription": "L3 Lookup write request that access cache and found line in E or S-state",
+ "PublicDescription": "L3 Lookup write request that access cache and found line in E or S-state.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x80",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+ "BriefDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from its allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+ "PublicDescription": "Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from its allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic.",
+ "Counter": "0",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x81",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+ "BriefDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+ "PublicDescription": "Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x81",
+ "UMask": "0x02",
+ "EventName": "UNC_ARB_TRK_REQUESTS.DRD_DIRECT",
+ "BriefDescription": "Number of Core coherent Data Read entries allocated in DirectData mode",
+ "PublicDescription": "Number of Core coherent Data Read entries allocated in DirectData mode.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x81",
+ "UMask": "0x20",
+ "EventName": "UNC_ARB_TRK_REQUESTS.WRITES",
+ "BriefDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+ "PublicDescription": "Number of Writes allocated - any write transactions: full/partials writes and evictions.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x84",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+ "BriefDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+ "PublicDescription": "Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "iMPH-U",
+ "EventCode": "0x80",
+ "UMask": "0x01",
+ "EventName": "UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST",
+ "BriefDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.;",
+ "PublicDescription": "Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.",
+ "Counter": "0",
+ "CounterMask": "1",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ },
+ {
+ "Unit": "NCU",
+ "EventCode": "0x0",
+ "UMask": "0x01",
+ "EventName": "UNC_CLOCK.SOCKET",
+ "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles",
+ "PublicDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+ "Counter": "FIXED",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json
new file mode 100644
index 000000000..2bcba7dac
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json
@@ -0,0 +1,284 @@
+[
+ {
+ "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
+ "EventCode": "0x08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
+ "EventCode": "0x4F",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "EPT.WALK_PENDING",
+ "SampleAfterValue": "2000003",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Misses at all ITLB levels that cause page walks",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "ITLB_MISSES.WALK_PENDING",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "ITLB_MISSES.WALK_ACTIVE",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
+ "CounterMask": "1",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+ "EventCode": "0xAE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB.ITLB_FLUSH",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
+ "EventCode": "0xBD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "TLB_FLUSH.STLB_ANY",
+ "SampleAfterValue": "100007",
+ "BriefDescription": "STLB flush attempts",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
new file mode 100644
index 000000000..5c9940866
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json
@@ -0,0 +1,1749 @@
+[
+ {
+ "EventCode": "0x24",
+ "UMask": "0x21",
+ "BriefDescription": "Demand Data Read miss L2, no rejects",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
+ "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x22",
+ "BriefDescription": "RFO requests that miss L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x24",
+ "BriefDescription": "L2 cache misses when fetching instructions",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.CODE_RD_MISS",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x27",
+ "BriefDescription": "Demand requests that miss L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
+ "PublicDescription": "Demand requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x38",
+ "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.PF_MISS",
+ "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x3f",
+ "BriefDescription": "All requests that miss L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.MISS",
+ "PublicDescription": "All requests that miss L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x41",
+ "BriefDescription": "Demand Data Read requests that hit L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
+ "PublicDescription": "Counts the number of demand Data Read requests that hit L2 cache. Only non rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x42",
+ "BriefDescription": "RFO requests that hit L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0x44",
+ "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.CODE_RD_HIT",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xd8",
+ "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.PF_HIT",
+ "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe1",
+ "BriefDescription": "Demand Data Read requests",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
+ "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe2",
+ "BriefDescription": "RFO requests to L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_RFO",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe4",
+ "BriefDescription": "L2 code requests",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_CODE_RD",
+ "PublicDescription": "Counts the total number of L2 code requests.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xe7",
+ "BriefDescription": "Demand requests to L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
+ "PublicDescription": "Demand requests to L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xf8",
+ "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.ALL_PF",
+ "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x24",
+ "UMask": "0xff",
+ "BriefDescription": "All L2 requests",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "PublicDescription": "All L2 requests.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x2E",
+ "UMask": "0x41",
+ "BriefDescription": "Core-originated cacheable demand requests missed L3",
+ "Counter": "0,1,2,3",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x2E",
+ "UMask": "0x4f",
+ "BriefDescription": "Core-originated cacheable demand requests that refer to L3",
+ "Counter": "0,1,2,3",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x1",
+ "BriefDescription": "L1D miss outstandings duration in cycles",
+ "Counter": "0,1,2,3",
+ "EventName": "L1D_PEND_MISS.PENDING",
+ "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with L1D load Misses outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY",
+ "AnyThread": "1",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x48",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.",
+ "Counter": "0,1,2,3",
+ "EventName": "L1D_PEND_MISS.FB_FULL",
+ "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x51",
+ "UMask": "0x1",
+ "BriefDescription": "L1D data line replacements",
+ "Counter": "0,1,2,3",
+ "EventName": "L1D.REPLACEMENT",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
+ "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+ "CounterMask": "1",
+ "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+ "CounterMask": "6",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x2",
+ "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
+ "CounterMask": "1",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
+ "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
+ "CounterMask": "1",
+ "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
+ "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
+ "CounterMask": "1",
+ "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x1",
+ "BriefDescription": "Demand Data Read requests sent to uncore",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x2",
+ "BriefDescription": "Cacheable and noncachaeble code read requests",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
+ "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x4",
+ "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x8",
+ "BriefDescription": "Demand and prefetch data reads",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x80",
+ "BriefDescription": "Any memory transaction that reached the SQ.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
+ "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB2",
+ "UMask": "0x1",
+ "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+ "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE",
+ "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x11",
+ "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
+ "PublicDescription": "Retired load instructions that miss the STLB.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x12",
+ "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
+ "PublicDescription": "Retired store instructions that miss the STLB.",
+ "SampleAfterValue": "100003",
+ "L1_Hit_Indication": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x21",
+ "BriefDescription": "Retired load instructions with locked access. (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x41",
+ "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x42",
+ "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
+ "SampleAfterValue": "100003",
+ "L1_Hit_Indication": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x81",
+ "BriefDescription": "All retired load instructions. (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_INST_RETIRED.ALL_LOADS",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD0",
+ "UMask": "0x82",
+ "BriefDescription": "All retired store instructions. (Precise Event)",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_INST_RETIRED.ALL_STORES",
+ "PublicDescription": "All retired store instructions.",
+ "SampleAfterValue": "2000003",
+ "L1_Hit_Indication": "1",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x1",
+ "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.\r\n",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x2",
+ "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "PublicDescription": "Retired load instructions with L2 cache hits as data sources.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x4",
+ "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+ "PublicDescription": "Retired load instructions with L3 cache hits as data sources.",
+ "SampleAfterValue": "50021",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x8",
+ "BriefDescription": "Retired load instructions missed L1 cache as data sources",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x10",
+ "BriefDescription": "Retired load instructions missed L2 cache as data sources",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+ "PublicDescription": "Retired load instructions missed L2 cache as data sources.",
+ "SampleAfterValue": "50021",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x20",
+ "BriefDescription": "Retired load instructions missed L3 cache as data sources",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+ "PublicDescription": "Retired load instructions missed L3 cache as data sources.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD1",
+ "UMask": "0x40",
+ "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. \r\n",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x1",
+ "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x2",
+ "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
+ "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x4",
+ "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
+ "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD2",
+ "UMask": "0x8",
+ "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+ "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x1",
+ "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x2",
+ "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from remote dram",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x4",
+ "BriefDescription": "Retired load instructions whose data sources was remote HITM",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD3",
+ "UMask": "0x8",
+ "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xD4",
+ "UMask": "0x4",
+ "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
+ "Data_LA": "1",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xF0",
+ "UMask": "0x40",
+ "BriefDescription": "L2 writebacks that access L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_TRANS.L2_WB",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF1",
+ "UMask": "0x1f",
+ "BriefDescription": "L2 cache lines filling L2",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_IN.ALL",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "UMask": "0x1",
+ "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared state. A non-threaded event.",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_OUT.SILENT",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "UMask": "0x2",
+ "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_OUT.NON_SILENT",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "UMask": "0x4",
+ "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_OUT.USELESS_PREF",
+ "PublicDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF2",
+ "UMask": "0x4",
+ "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "L2_LINES_OUT.USELESS_HWPF",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xF4",
+ "UMask": "0x10",
+ "BriefDescription": "Number of cache line split locks sent to uncore.",
+ "Counter": "0,1,2,3",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "PublicDescription": "Counts the number of cache line split locks sent to the uncore.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that have any response type.",
+ "MSRValue": "0x0000010001 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "MSRValue": "0x01003c0001 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x04003c0001 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "MSRValue": "0x08003c0001 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x10003c0001 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that hit in the L3.",
+ "MSRValue": "0x3f803c0001 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.",
+ "MSRValue": "0x0000010002 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "MSRValue": "0x01003c0002 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x04003c0002 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "MSRValue": "0x08003c0002 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x10003c0002 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3.",
+ "MSRValue": "0x3f803c0002 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that have any response type.",
+ "MSRValue": "0x0000010004 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "MSRValue": "0x01003c0004 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x04003c0004 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "MSRValue": "0x08003c0004 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x10003c0004 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that hit in the L3.",
+ "MSRValue": "0x3f803c0004 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.",
+ "MSRValue": "0x0000010010 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "MSRValue": "0x01003c0010 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x04003c0010 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "PF_L2_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "MSRValue": "0x08003c0010 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x10003c0010 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.",
+ "MSRValue": "0x3f803c0010 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.",
+ "MSRValue": "0x0000010020 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "MSRValue": "0x01003c0020 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x04003c0020 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "PF_L2_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "MSRValue": "0x08003c0020 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x10003c0020 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.",
+ "MSRValue": "0x3f803c0020 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.",
+ "MSRValue": "0x0000010080 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "MSRValue": "0x01003c0080 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x04003c0080 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "MSRValue": "0x08003c0080 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x10003c0080 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.",
+ "MSRValue": "0x3f803c0080 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.",
+ "MSRValue": "0x0000010100 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "MSRValue": "0x01003c0100 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x04003c0100 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "MSRValue": "0x08003c0100 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x10003c0100 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.",
+ "MSRValue": "0x3f803c0100 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.",
+ "MSRValue": "0x0000010400 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "MSRValue": "0x01003c0400 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x04003c0400 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "PF_L1D_AND_SW & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "MSRValue": "0x08003c0400 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x10003c0400 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.",
+ "MSRValue": "0x3f803c0400 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts any other requests that have any response type.",
+ "MSRValue": "0x0000018000 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts any other requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "MSRValue": "0x01003c8000 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x04003c8000 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "OTHER & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "MSRValue": "0x08003c8000 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x10003c8000 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts any other requests that hit in the L3.",
+ "MSRValue": "0x3f803c8000 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts any other requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch data reads that have any response type.",
+ "MSRValue": "0x0000010490 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "MSRValue": "0x01003c0490 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x04003c0490 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "ALL_PF_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "MSRValue": "0x08003c0490 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x10003c0490 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch data reads that hit in the L3.",
+ "MSRValue": "0x3f803c0490 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch RFOs that have any response type.",
+ "MSRValue": "0x0000010120 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "MSRValue": "0x01003c0120 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x04003c0120 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "ALL_PF_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "MSRValue": "0x08003c0120 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x10003c0120 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch RFOs that hit in the L3.",
+ "MSRValue": "0x3f803c0120 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that have any response type.",
+ "MSRValue": "0x0000010491 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "MSRValue": "0x01003c0491 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x04003c0491 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "ALL_DATA_RD & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "MSRValue": "0x08003c0491 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x10003c0491 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that hit in the L3.",
+ "MSRValue": "0x3f803c0491 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that have any response type.",
+ "MSRValue": "0x0000010122 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.",
+ "MSRValue": "0x01003c0122 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x04003c0122 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "ALL_RFO & L3_HIT & SNOOP_HIT_WITH_FWD",
+ "MSRValue": "0x08003c0122 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.",
+ "MSRValue": "0x10003c0122 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that hit in the L3.",
+ "MSRValue": "0x3f803c0122 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
new file mode 100644
index 000000000..286ed1a37
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json
@@ -0,0 +1,87 @@
+[
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x1",
+ "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x2",
+ "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x4",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x8",
+ "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x10",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x20",
+ "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x40",
+ "BriefDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8)",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
+ "PublicDescription": "Number of Packed Double-Precision FP arithmetic instructions (Use operation multiplier of 8).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC7",
+ "UMask": "0x80",
+ "BriefDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16)",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
+ "PublicDescription": "Number of Packed Single-Precision FP arithmetic instructions (Use operation multiplier of 16).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCA",
+ "UMask": "0x1e",
+ "BriefDescription": "Cycles with any input/output SSE or FP assist",
+ "Counter": "0,1,2,3",
+ "EventName": "FP_ASSIST.ANY",
+ "CounterMask": "1",
+ "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
new file mode 100644
index 000000000..403a4f89e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json
@@ -0,0 +1,482 @@
+[
+ {
+ "EventCode": "0x79",
+ "UMask": "0x4",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MITE_UOPS",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MITE_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x8",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.DSB_UOPS",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.DSB_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_DSB_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x18",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS",
+ "CounterMask": "4",
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x18",
+ "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS",
+ "CounterMask": "1",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x20",
+ "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_MITE_UOPS",
+ "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x24",
+ "BriefDescription": "Cycles MITE is delivering 4 Uops",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS",
+ "CounterMask": "4",
+ "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x24",
+ "BriefDescription": "Cycles MITE is delivering any Uop",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS",
+ "CounterMask": "1",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x30",
+ "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0x79",
+ "UMask": "0x30",
+ "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_SWITCHES",
+ "CounterMask": "1",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x79",
+ "UMask": "0x30",
+ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ.MS_UOPS",
+ "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x80",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE_16B.IFDATA_STALL",
+ "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x83",
+ "UMask": "0x1",
+ "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE_64B.IFTAG_HIT",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x83",
+ "UMask": "0x2",
+ "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE_64B.IFTAG_MISS",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x83",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
+ "Counter": "0,1,2,3",
+ "EventName": "ICACHE_64B.IFTAG_STALL",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
+ "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when 4 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+ "CounterMask": "4",
+ "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when 3 or more uops are not delivered to Resource Allocation Table (RAT) when backend of the machine is not stalled",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_1_UOP_DELIV.CORE",
+ "CounterMask": "3",
+ "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with less than 2 uops delivered by the front end.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_2_UOP_DELIV.CORE",
+ "CounterMask": "2",
+ "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles with less than 3 uops delivered by the front end.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_LE_3_UOP_DELIV.CORE",
+ "CounterMask": "1",
+ "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0x9C",
+ "UMask": "0x1",
+ "BriefDescription": "Counts cycles FE delivered 4 uops or Resource Allocation Table (RAT) was stalling FE.",
+ "Counter": "0,1,2,3",
+ "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xAB",
+ "UMask": "0x2",
+ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.",
+ "Counter": "0,1,2,3",
+ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
+ "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x11",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.DSB_MISS",
+ "MSRIndex": "0x3F7",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. \r\n",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x12",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.L1I_MISS",
+ "MSRIndex": "0x3F7",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x13",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.L2_MISS",
+ "MSRIndex": "0x3F7",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x14",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.ITLB_MISS",
+ "MSRIndex": "0x3F7",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x15",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.STLB_MISS",
+ "MSRIndex": "0x3F7",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x400206",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
+ "MSRIndex": "0x3F7",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x200206",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2",
+ "MSRIndex": "0x3F7",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x400406",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
+ "MSRIndex": "0x3F7",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
+ "PEBS": "1",
+ "MSRValue": "0x400806",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
+ "MSRIndex": "0x3F7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. \r\n",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x401006",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
+ "MSRIndex": "0x3F7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.\r\n",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x402006",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
+ "MSRIndex": "0x3F7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.\r\n",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x404006",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
+ "MSRIndex": "0x3F7",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x408006",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
+ "MSRIndex": "0x3F7",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x410006",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
+ "MSRIndex": "0x3F7",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x420006",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
+ "MSRIndex": "0x3F7",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x100206",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
+ "MSRIndex": "0x3F7",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.\r\n",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC6",
+ "UMask": "0x1",
+ "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.",
+ "PEBS": "1",
+ "MSRValue": "0x300206",
+ "Counter": "0,1,2,3",
+ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3",
+ "MSRIndex": "0x3F7",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
new file mode 100644
index 000000000..e7f1aa312
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json
@@ -0,0 +1,1473 @@
+[
+ {
+ "EventCode": "0x54",
+ "UMask": "0x1",
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_CONFLICT",
+ "PublicDescription": "Number of times a TSX line had a cache conflict.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_CAPACITY",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x4",
+ "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x8",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x10",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x20",
+ "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT",
+ "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x54",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL",
+ "PublicDescription": "Number of times we could not allocate Lock Buffer.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x1",
+ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x2",
+ "BriefDescription": "Counts the number of times a class of instructions (e.g., vzeroupper) that may cause a transactional abort was executed inside a transactional region",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC2",
+ "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x4",
+ "BriefDescription": "Counts the number of times an instruction execution caused the transactional nest count supported to be exceeded",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC3",
+ "PublicDescription": "Unfriendly TSX abort triggered by a nest count that is too deep.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x8",
+ "BriefDescription": "Counts the number of times a XBEGIN instruction was executed inside an HLE transactional region.",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC4",
+ "PublicDescription": "RTM region detected inside HLE.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5d",
+ "UMask": "0x10",
+ "BriefDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region",
+ "Counter": "0,1,2,3",
+ "EventName": "TX_EXEC.MISC5",
+ "PublicDescription": "Counts the number of times an HLE XACQUIRE instruction was executed inside an RTM transactional region.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x10",
+ "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
+ "CounterMask": "6",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
+ "CounterMask": "2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x6",
+ "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+ "CounterMask": "6",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB0",
+ "UMask": "0x10",
+ "BriefDescription": "Demand Data Read requests who miss L3 cache",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+ "PublicDescription": "Demand Data Read requests who miss L3 cache.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x2",
+ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+ "Errata": "SKL089",
+ "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "UMask": "0x1",
+ "BriefDescription": "Number of times an HLE execution started.",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.START",
+ "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times an HLE execution successfully committed",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.COMMIT",
+ "PublicDescription": "Number of times HLE commit succeeded.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "UMask": "0x4",
+ "BriefDescription": "Number of times an HLE execution aborted due to any reasons (multiple categories may count as one). ",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED",
+ "PublicDescription": "Number of times HLE abort was triggered. (PEBS)",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "UMask": "0x8",
+ "BriefDescription": "Number of times an HLE execution aborted due to various memory events (e.g., read/write capacity and conflicts).",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MEM",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "UMask": "0x10",
+ "BriefDescription": "Number of times an HLE execution aborted due to hardware timer expiration.",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_TIMER",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "UMask": "0x20",
+ "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times an HLE execution aborted due to incompatible memory type",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_MEMTYPE",
+ "PublicDescription": "Number of times an HLE execution aborted due to incompatible memory type.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC8",
+ "UMask": "0x80",
+ "BriefDescription": "Number of times an HLE execution aborted due to unfriendly events (such as interrupts).",
+ "Counter": "0,1,2,3",
+ "EventName": "HLE_RETIRED.ABORTED_EVENTS",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC9",
+ "UMask": "0x1",
+ "BriefDescription": "Number of times an RTM execution started.",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.START",
+ "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC9",
+ "UMask": "0x2",
+ "BriefDescription": "Number of times an RTM execution successfully committed",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.COMMIT",
+ "PublicDescription": "Number of times RTM commit succeeded.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC9",
+ "UMask": "0x4",
+ "BriefDescription": "Number of times an RTM execution aborted due to any reasons (multiple categories may count as one). ",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED",
+ "PublicDescription": "Number of times RTM abort was triggered. (PEBS)",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC9",
+ "UMask": "0x8",
+ "BriefDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts)",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MEM",
+ "PublicDescription": "Number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC9",
+ "UMask": "0x10",
+ "BriefDescription": "Number of times an RTM execution aborted due to uncommon conditions.",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_TIMER",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC9",
+ "UMask": "0x20",
+ "BriefDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
+ "PublicDescription": "Number of times an RTM execution aborted due to HLE-unfriendly instructions.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC9",
+ "UMask": "0x40",
+ "BriefDescription": "Number of times an RTM execution aborted due to incompatible memory type",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
+ "PublicDescription": "Number of times an RTM execution aborted due to incompatible memory type.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC9",
+ "UMask": "0x80",
+ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)",
+ "Counter": "0,1,2,3",
+ "EventName": "RTM_RETIRED.ABORTED_EVENTS",
+ "PublicDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles.",
+ "PEBS": "2",
+ "MSRValue": "0x4",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
+ "MSRIndex": "0x3F6",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles.",
+ "PEBS": "2",
+ "MSRValue": "0x8",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
+ "MSRIndex": "0x3F6",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "50021",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles.",
+ "PEBS": "2",
+ "MSRValue": "0x10",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
+ "MSRIndex": "0x3F6",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "20011",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles.",
+ "PEBS": "2",
+ "MSRValue": "0x20",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
+ "MSRIndex": "0x3F6",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles.",
+ "PEBS": "2",
+ "MSRValue": "0x40",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
+ "MSRIndex": "0x3F6",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "2003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles.",
+ "PEBS": "2",
+ "MSRValue": "0x80",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
+ "MSRIndex": "0x3F6",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "1009",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles.",
+ "PEBS": "2",
+ "MSRValue": "0x100",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
+ "MSRIndex": "0x3F6",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "503",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xCD",
+ "UMask": "0x1",
+ "BriefDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles.",
+ "PEBS": "2",
+ "MSRValue": "0x200",
+ "Counter": "0,1,2,3",
+ "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
+ "MSRIndex": "0x3F6",
+ "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
+ "TakenAlone": "1",
+ "SampleAfterValue": "101",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that miss in the L3.",
+ "MSRValue": "0x3fbc000001 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+ "MSRValue": "0x083fc00001 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache.",
+ "MSRValue": "0x103fc00001 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram.",
+ "MSRValue": "0x063fc00001 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram.",
+ "MSRValue": "0x063b800001 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram.",
+ "MSRValue": "0x0604000001 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3.",
+ "MSRValue": "0x3fbc000002 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache.",
+ "MSRValue": "0x083fc00002 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache.",
+ "MSRValue": "0x103fc00002 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram.",
+ "MSRValue": "0x063fc00002 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram.",
+ "MSRValue": "0x063b800002 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram.",
+ "MSRValue": "0x0604000002 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that miss in the L3.",
+ "MSRValue": "0x3fbc000004 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache.",
+ "MSRValue": "0x083fc00004 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache.",
+ "MSRValue": "0x103fc00004 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram.",
+ "MSRValue": "0x063fc00004 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram.",
+ "MSRValue": "0x063b800004 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram.",
+ "MSRValue": "0x0604000004 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3.",
+ "MSRValue": "0x3fbc000010 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+ "MSRValue": "0x083fc00010 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache.",
+ "MSRValue": "0x103fc00010 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram.",
+ "MSRValue": "0x063fc00010 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram.",
+ "MSRValue": "0x063b800010 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram.",
+ "MSRValue": "0x0604000010 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3.",
+ "MSRValue": "0x3fbc000020 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
+ "MSRValue": "0x083fc00020 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache.",
+ "MSRValue": "0x103fc00020 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram.",
+ "MSRValue": "0x063fc00020 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram.",
+ "MSRValue": "0x063b800020 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram.",
+ "MSRValue": "0x0604000020 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3.",
+ "MSRValue": "0x3fbc000080 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+ "MSRValue": "0x083fc00080 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache.",
+ "MSRValue": "0x103fc00080 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram.",
+ "MSRValue": "0x063fc00080 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram.",
+ "MSRValue": "0x063b800080 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram.",
+ "MSRValue": "0x0604000080 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3.",
+ "MSRValue": "0x3fbc000100 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
+ "MSRValue": "0x083fc00100 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache.",
+ "MSRValue": "0x103fc00100 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram.",
+ "MSRValue": "0x063fc00100 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram.",
+ "MSRValue": "0x063b800100 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram.",
+ "MSRValue": "0x0604000100 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3.",
+ "MSRValue": "0x3fbc000400 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache.",
+ "MSRValue": "0x083fc00400 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache.",
+ "MSRValue": "0x103fc00400 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram.",
+ "MSRValue": "0x063fc00400 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram.",
+ "MSRValue": "0x063b800400 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram.",
+ "MSRValue": "0x0604000400 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts any other requests that miss in the L3.",
+ "MSRValue": "0x3fbc008000 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts any other requests that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts any other requests that miss the L3 and clean or shared data is transferred from remote cache.",
+ "MSRValue": "0x083fc08000 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts any other requests that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts any other requests that miss the L3 and the modified data is transferred from remote cache.",
+ "MSRValue": "0x103fc08000 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts any other requests that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from local or remote dram.",
+ "MSRValue": "0x063fc08000 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from remote dram.",
+ "MSRValue": "0x063b808000 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from local dram.",
+ "MSRValue": "0x0604008000 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch data reads that miss in the L3.",
+ "MSRValue": "0x3fbc000490 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+ "MSRValue": "0x083fc00490 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
+ "MSRValue": "0x103fc00490 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
+ "MSRValue": "0x063fc00490 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram.",
+ "MSRValue": "0x063b800490 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram.",
+ "MSRValue": "0x0604000490 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch RFOs that miss in the L3.",
+ "MSRValue": "0x3fbc000120 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
+ "MSRValue": "0x083fc00120 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
+ "MSRValue": "0x103fc00120 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
+ "MSRValue": "0x063fc00120 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram.",
+ "MSRValue": "0x063b800120 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram.",
+ "MSRValue": "0x0604000120 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss in the L3.",
+ "MSRValue": "0x3fbc000491 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache.",
+ "MSRValue": "0x083fc00491 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache.",
+ "MSRValue": "0x103fc00491 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram.",
+ "MSRValue": "0x063fc00491 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram.",
+ "MSRValue": "0x063b800491 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram.",
+ "MSRValue": "0x0604000491 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss in the L3.",
+ "MSRValue": "0x3fbc000122 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache.",
+ "MSRValue": "0x083fc00122 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache.",
+ "MSRValue": "0x103fc00122 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram.",
+ "MSRValue": "0x063fc00122 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram.",
+ "MSRValue": "0x063b800122 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "Offcore": "1",
+ "EventCode": "0xB7, 0xBB",
+ "UMask": "0x1",
+ "BriefDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.",
+ "MSRValue": "0x0604000122 ",
+ "Counter": "0,1,2,3",
+ "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/other.json b/tools/perf/pmu-events/arch/x86/skylakex/other.json
new file mode 100644
index 000000000..778a54146
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylakex/other.json
@@ -0,0 +1,164 @@
+[
+ {
+ "EventCode": "0x28",
+ "UMask": "0x7",
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the Non-AVX turbo schedule.",
+ "Counter": "0,1,2,3",
+ "EventName": "CORE_POWER.LVL0_TURBO_LICENSE",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for baseline license level 0. This includes non-AVX codes, SSE, AVX 128-bit, and low-current AVX 256-bit codes.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "UMask": "0x18",
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX2 turbo schedule.",
+ "Counter": "0,1,2,3",
+ "EventName": "CORE_POWER.LVL1_TURBO_LICENSE",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for license level 1. This includes high current AVX 256-bit instructions as well as low current AVX 512-bit instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "UMask": "0x20",
+ "BriefDescription": "Core cycles where the core was running in a manner where Turbo may be clipped to the AVX512 turbo schedule.",
+ "Counter": "0,1,2,3",
+ "EventName": "CORE_POWER.LVL2_TURBO_LICENSE",
+ "PublicDescription": "Core cycles where the core was running with power-delivery for license level 2 (introduced in Skylake Server michroarchtecture). This includes high current AVX 512-bit instructions.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x28",
+ "UMask": "0x40",
+ "BriefDescription": "Core cycles the core was throttled due to a pending power level request.",
+ "Counter": "0,1,2,3",
+ "EventName": "CORE_POWER.THROTTLE",
+ "PublicDescription": "Core cycles the out-of-order engine was throttled due to a pending power level request.",
+ "SampleAfterValue": "200003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x32",
+ "UMask": "0x1",
+ "BriefDescription": "Number of PREFETCHNTA instructions executed.",
+ "Counter": "0,1,2,3",
+ "EventName": "SW_PREFETCH_ACCESS.NTA",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x32",
+ "UMask": "0x2",
+ "BriefDescription": "Number of PREFETCHT0 instructions executed.",
+ "Counter": "0,1,2,3",
+ "EventName": "SW_PREFETCH_ACCESS.T0",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x32",
+ "UMask": "0x4",
+ "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
+ "Counter": "0,1,2,3",
+ "EventName": "SW_PREFETCH_ACCESS.T1_T2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x32",
+ "UMask": "0x8",
+ "BriefDescription": "Number of PREFETCHW instructions executed.",
+ "Counter": "0,1,2,3",
+ "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCB",
+ "UMask": "0x1",
+ "BriefDescription": "Number of hardware interrupts received by the processor.",
+ "Counter": "0,1,2,3",
+ "EventName": "HW_INTERRUPTS.RECEIVED",
+ "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
+ "SampleAfterValue": "203",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xEF",
+ "UMask": "0x1",
+ "Counter": "0,1,2,3",
+ "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITI",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xEF",
+ "UMask": "0x2",
+ "Counter": "0,1,2,3",
+ "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITFSE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xEF",
+ "UMask": "0x4",
+ "Counter": "0,1,2,3",
+ "EventName": "CORE_SNOOP_RESPONSE.RSP_SHITFSE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xEF",
+ "UMask": "0x8",
+ "Counter": "0,1,2,3",
+ "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDM",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xEF",
+ "UMask": "0x10",
+ "Counter": "0,1,2,3",
+ "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDM",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xEF",
+ "UMask": "0x20",
+ "Counter": "0,1,2,3",
+ "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDFE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xEF",
+ "UMask": "0x40",
+ "Counter": "0,1,2,3",
+ "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDFE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xFE",
+ "UMask": "0x2",
+ "BriefDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly",
+ "Counter": "0,1,2,3",
+ "EventName": "IDI_MISC.WB_UPGRADE",
+ "PublicDescription": "Counts number of cache lines that are allocated and written back to L3 with the intention that they are more likely to be reused shortly.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xFE",
+ "UMask": "0x4",
+ "BriefDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly",
+ "Counter": "0,1,2,3",
+ "EventName": "IDI_MISC.WB_DOWNGRADE",
+ "PublicDescription": "Counts number of cache lines that are dropped and not written back to L3 as they are deemed to be less likely to be reused shortly.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
new file mode 100644
index 000000000..f99f7ae27
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json
@@ -0,0 +1,950 @@
+[
+ {
+ "EventCode": "0x00",
+ "UMask": "0x1",
+ "BriefDescription": "Instructions retired from execution.",
+ "Counter": "Fixed counter 0",
+ "EventName": "INST_RETIRED.ANY",
+ "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 0"
+ },
+ {
+ "EventCode": "0x00",
+ "UMask": "0x2",
+ "BriefDescription": "Core cycles when the thread is not in halt state",
+ "Counter": "Fixed counter 1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "EventCode": "0x00",
+ "UMask": "0x2",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "Counter": "Fixed counter 1",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 1"
+ },
+ {
+ "EventCode": "0x00",
+ "UMask": "0x3",
+ "BriefDescription": "Reference cycles when the core is not in halt state.",
+ "Counter": "Fixed counter 2",
+ "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "Fixed counter 2"
+ },
+ {
+ "EventCode": "0x03",
+ "UMask": "0x2",
+ "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .",
+ "Counter": "0,1,2,3",
+ "EventName": "LD_BLOCKS.STORE_FORWARD",
+ "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x03",
+ "UMask": "0x8",
+ "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use",
+ "Counter": "0,1,2,3",
+ "EventName": "LD_BLOCKS.NO_SR",
+ "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x07",
+ "UMask": "0x1",
+ "BriefDescription": "False dependencies in MOB due to partial compare on address.",
+ "Counter": "0,1,2,3",
+ "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS",
+ "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "UMask": "0x1",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)",
+ "Counter": "0,1,2,3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES",
+ "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "UMask": "0x1",
+ "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).",
+ "Counter": "0,1,2,3",
+ "EventName": "INT_MISC.RECOVERY_CYCLES_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0D",
+ "UMask": "0x80",
+ "BriefDescription": "Cycles the issue-stage is waiting for front-end to fetch from resteered path following branch misprediction or machine clear events.",
+ "Counter": "0,1,2,3",
+ "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x1",
+ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.ANY",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0x0E",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x2",
+ "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH",
+ "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x0E",
+ "UMask": "0x20",
+ "BriefDescription": "Number of slow LEA uops being allocated. A uop is generally considered SlowLea if it has 3 sources (e.g. 2 sources + immediate) regardless if as a result of LEA instruction or not.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_ISSUED.SLOW_LEA",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x14",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
+ "Counter": "0,1,2,3",
+ "EventName": "ARITH.DIVIDER_ACTIVE",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x0",
+ "BriefDescription": "Thread cycles when thread is not in halt state",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x0",
+ "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0x3C",
+ "UMask": "0x0",
+ "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.RING0_TRANS",
+ "CounterMask": "1",
+ "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2503",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2503",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Core crystal clock cycles when the thread is unhalted.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK",
+ "SampleAfterValue": "2503",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x1",
+ "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY",
+ "AnyThread": "1",
+ "SampleAfterValue": "2503",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x2",
+ "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x3C",
+ "UMask": "0x2",
+ "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
+ "Counter": "0,1,2,3",
+ "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
+ "SampleAfterValue": "2503",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4C",
+ "UMask": "0x1",
+ "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
+ "Counter": "0,1,2,3",
+ "EventName": "LOAD_HIT_PRE.SW_PF",
+ "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x5E",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread",
+ "Counter": "0,1,2,3",
+ "EventName": "RS_EVENTS.EMPTY_CYCLES",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "Invert": "1",
+ "EventCode": "0x5E",
+ "UMask": "0x1",
+ "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.",
+ "Counter": "0,1,2,3",
+ "EventName": "RS_EVENTS.EMPTY_END",
+ "CounterMask": "1",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x87",
+ "UMask": "0x1",
+ "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
+ "Counter": "0,1,2,3",
+ "EventName": "ILD_STALL.LCP",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles per thread when uops are executed in port 0",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_0",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles per thread when uops are executed in port 1",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_1",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles per thread when uops are executed in port 2",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_2",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles per thread when uops are executed in port 3",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_3",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles per thread when uops are executed in port 4",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_4",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x20",
+ "BriefDescription": "Cycles per thread when uops are executed in port 5",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_5",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x40",
+ "BriefDescription": "Cycles per thread when uops are executed in port 6",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_6",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA1",
+ "UMask": "0x80",
+ "BriefDescription": "Cycles per thread when uops are executed in port 7",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_DISPATCHED_PORT.PORT_7",
+ "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x1",
+ "BriefDescription": "Resource-related stall cycles",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "PublicDescription": "Counts resource-related stall cycles. Reasons for stalls can be as follows:a. *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots).b. *any* u-arch structure got empty (like INT/SIMD FreeLists).c. FPU control word (FPCW), MXCSR.and others. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA2",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
+ "Counter": "0,1,2,3",
+ "EventName": "RESOURCE_STALLS.SB",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x4",
+ "BriefDescription": "Total execution stalls.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
+ "CounterMask": "4",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x5",
+ "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+ "CounterMask": "5",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
+ "CounterMask": "8",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0xc",
+ "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+ "CounterMask": "12",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
+ "CounterMask": "16",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA3",
+ "UMask": "0x14",
+ "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
+ "Counter": "0,1,2,3",
+ "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+ "CounterMask": "20",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xA6",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where no uops were executed, the Reservation Station was not empty, the Store Buffer was full and there was no outstanding load.",
+ "Counter": "0,1,2,3",
+ "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+ "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA6",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
+ "Counter": "0,1,2,3",
+ "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA6",
+ "UMask": "0x4",
+ "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
+ "Counter": "0,1,2,3",
+ "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA6",
+ "UMask": "0x8",
+ "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
+ "Counter": "0,1,2,3",
+ "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA6",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
+ "Counter": "0,1,2,3",
+ "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA6",
+ "UMask": "0x40",
+ "BriefDescription": "Cycles where the Store Buffer was full and no outstanding load.",
+ "Counter": "0,1,2,3",
+ "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "UMask": "0x1",
+ "BriefDescription": "Number of Uops delivered by the LSD.",
+ "Counter": "0,1,2,3",
+ "EventName": "LSD.UOPS",
+ "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
+ "Counter": "0,1,2,3",
+ "EventName": "LSD.CYCLES_ACTIVE",
+ "CounterMask": "1",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xA8",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.",
+ "Counter": "0,1,2,3",
+ "EventName": "LSD.CYCLES_4_UOPS",
+ "CounterMask": "4",
+ "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.THREAD",
+ "PublicDescription": "Number of uops to be executed per-thread each cycle.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.STALL_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC",
+ "CounterMask": "1",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC",
+ "CounterMask": "2",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC",
+ "CounterMask": "3",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x1",
+ "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC",
+ "CounterMask": "4",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x2",
+ "BriefDescription": "Number of uops executed on the core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE",
+ "PublicDescription": "Number of uops executed from any thread.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+ "CounterMask": "2",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+ "CounterMask": "3",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
+ "CounterMask": "4",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xB1",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+ "CounterMask": "1",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xB1",
+ "UMask": "0x10",
+ "BriefDescription": "Counts the number of x87 uops dispatched.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_EXECUTED.X87",
+ "PublicDescription": "Counts the number of x87 uops executed.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC0",
+ "UMask": "0x0",
+ "BriefDescription": "Number of instructions retired. General Counter - architectural event",
+ "Counter": "0,1,2,3",
+ "EventName": "INST_RETIRED.ANY_P",
+ "Errata": "SKL091, SKL044",
+ "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC0",
+ "UMask": "0x1",
+ "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution",
+ "PEBS": "2",
+ "Counter": "1",
+ "EventName": "INST_RETIRED.PREC_DIST",
+ "Errata": "SKL091, SKL044",
+ "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "1"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xC0",
+ "UMask": "0x1",
+ "BriefDescription": "Number of cycles using always true condition applied to PEBS instructions retired event.",
+ "PEBS": "2",
+ "Counter": "0,2,3",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+ "CounterMask": "10",
+ "Errata": "SKL091, SKL044",
+ "PublicDescription": "Number of cycles using an always true condition applied to PEBS instructions retired event. (inst_ret< 16)",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,2,3"
+ },
+ {
+ "EventCode": "0xC1",
+ "UMask": "0x3f",
+ "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.",
+ "Counter": "0,1,2,3",
+ "EventName": "OTHER_ASSISTS.ANY",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC2",
+ "UMask": "0x2",
+ "BriefDescription": "Retirement slots used.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "PublicDescription": "Counts the retirement slots used.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xC2",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles without actually retired uops.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "CounterMask": "1",
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "Invert": "1",
+ "EventCode": "0xC2",
+ "UMask": "0x2",
+ "BriefDescription": "Cycles with less than 10 actually retired uops.",
+ "Counter": "0,1,2,3",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "CounterMask": "10",
+ "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EdgeDetect": "1",
+ "EventCode": "0xC3",
+ "UMask": "0x1",
+ "BriefDescription": "Number of machine clears (nukes) of any type.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.COUNT",
+ "CounterMask": "1",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC3",
+ "UMask": "0x4",
+ "BriefDescription": "Self-modifying code (SMC) detected.",
+ "Counter": "0,1,2,3",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x0",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "Errata": "SKL091",
+ "PublicDescription": "Counts all (macro) branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x1",
+ "BriefDescription": "Conditional branch instructions retired.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "Errata": "SKL091",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x2",
+ "BriefDescription": "Direct and indirect near call instructions retired.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "Errata": "SKL091",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x4",
+ "BriefDescription": "All (macro) branch instructions retired.",
+ "PEBS": "2",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS",
+ "Errata": "SKL091",
+ "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x8",
+ "BriefDescription": "Return instructions retired.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+ "Errata": "SKL091",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x10",
+ "BriefDescription": "Not taken branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NOT_TAKEN",
+ "Errata": "SKL091",
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x20",
+ "BriefDescription": "Taken branch instructions retired.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "Errata": "SKL091",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC4",
+ "UMask": "0x40",
+ "BriefDescription": "Counts the number of far branch instructions retired.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+ "Errata": "SKL091",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x0",
+ "BriefDescription": "All mispredicted macro branch instructions retired.",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x1",
+ "BriefDescription": "Mispredicted conditional branch instructions retired.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x2",
+ "BriefDescription": "Mispredicted direct and indirect near call instructions retired.",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+ "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x4",
+ "BriefDescription": "Mispredicted macro branch instructions retired.",
+ "PEBS": "2",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS",
+ "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3"
+ },
+ {
+ "EventCode": "0xC5",
+ "UMask": "0x20",
+ "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken. ",
+ "PEBS": "1",
+ "Counter": "0,1,2,3",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.",
+ "SampleAfterValue": "400009",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xCC",
+ "UMask": "0x20",
+ "BriefDescription": "Increments whenever there is an update to the LBR array.",
+ "Counter": "0,1,2,3",
+ "EventName": "ROB_MISC_EVENTS.LBR_INSERTS",
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xE6",
+ "UMask": "0x1",
+ "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.",
+ "Counter": "0,1,2,3",
+ "EventName": "BACLEARS.ANY",
+ "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
new file mode 100644
index 000000000..71e9737f4
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -0,0 +1,164 @@
+[
+ {
+ "BriefDescription": "Instructions Per Cycle (per logical thread)",
+ "MetricExpr": "INST_RETIRED.ANY / CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "IPC"
+ },
+ {
+ "BriefDescription": "Uops Per Instruction",
+ "MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY",
+ "MetricGroup": "Pipeline",
+ "MetricName": "UPI"
+ },
+ {
+ "BriefDescription": "Rough Estimation of fraction of fetched lines bytes that were likely consumed by program instructions",
+ "MetricExpr": "min( 1 , UOPS_ISSUED.ANY / ((UOPS_RETIRED.RETIRE_SLOTS / INST_RETIRED.ANY) * 64 * ( ICACHE_64B.IFTAG_HIT + ICACHE_64B.IFTAG_MISS ) / 4.1) )",
+ "MetricGroup": "Frontend",
+ "MetricName": "IFetch_Line_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)",
+ "MetricExpr": "IDQ.DSB_UOPS / ( IDQ.DSB_UOPS + LSD.UOPS + IDQ.MITE_UOPS + IDQ.MS_UOPS )",
+ "MetricGroup": "DSB; Frontend_Bandwidth",
+ "MetricName": "DSB_Coverage"
+ },
+ {
+ "BriefDescription": "Cycles Per Instruction (threaded)",
+ "MetricExpr": "1 / (INST_RETIRED.ANY / cycles)",
+ "MetricGroup": "Pipeline;Summary",
+ "MetricName": "CPI"
+ },
+ {
+ "BriefDescription": "Per-thread actual clocks when the logical processor is active. This is called 'Clockticks' in VTune.",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "Summary",
+ "MetricName": "CLKS"
+ },
+ {
+ "BriefDescription": "Total issue-pipeline slots",
+ "MetricExpr": "4*(( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "TopDownL1",
+ "MetricName": "SLOTS"
+ },
+ {
+ "BriefDescription": "Total number of retired Instructions",
+ "MetricExpr": "INST_RETIRED.ANY",
+ "MetricGroup": "Summary",
+ "MetricName": "Instructions"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle (per physical core)",
+ "MetricExpr": "INST_RETIRED.ANY / (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles)",
+ "MetricGroup": "SMT",
+ "MetricName": "CoreIPC"
+ },
+ {
+ "BriefDescription": "Instruction-Level-Parallelism (average number of uops executed when there is at least 1 uop executed)",
+ "MetricExpr": "UOPS_EXECUTED.THREAD / (( UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2) if #SMT_on else UOPS_EXECUTED.CORE_CYCLES_GE_1)",
+ "MetricGroup": "Pipeline;Ports_Utilization",
+ "MetricName": "ILP"
+ },
+ {
+ "BriefDescription": "Average Branch Address Clear Cost (fraction of cycles)",
+ "MetricExpr": "2* (( RS_EVENTS.EMPTY_CYCLES - ICACHE_16B.IFDATA_STALL - ICACHE_64B.IFTAG_STALL ) / RS_EVENTS.EMPTY_END)",
+ "MetricGroup": "Unknown_Branches",
+ "MetricName": "BAClear_Cost"
+ },
+ {
+ "BriefDescription": "Core actual clocks when any thread is active on the physical core",
+ "MetricExpr": "( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else CPU_CLK_UNHALTED.THREAD",
+ "MetricGroup": "SMT",
+ "MetricName": "CORE_CLKS"
+ },
+ {
+ "BriefDescription": "Actual Average Latency for L1 data-cache miss demand loads",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / ( MEM_LOAD_RETIRED.L1_MISS + MEM_LOAD_RETIRED.FB_HIT )",
+ "MetricGroup": "Memory_Bound;Memory_Lat",
+ "MetricName": "Load_Miss_Real_Latency"
+ },
+ {
+ "BriefDescription": "Memory-Level-Parallelism (average number of L1 miss demand load when there is at least 1 such miss)",
+ "MetricExpr": "L1D_PEND_MISS.PENDING / (( L1D_PEND_MISS.PENDING_CYCLES_ANY / 2) if #SMT_on else L1D_PEND_MISS.PENDING_CYCLES)",
+ "MetricGroup": "Memory_Bound;Memory_BW",
+ "MetricName": "MLP"
+ },
+ {
+ "BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
+ "MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * (( CPU_CLK_UNHALTED.THREAD_ANY / 2 ) if #SMT_on else cycles) )",
+ "MetricGroup": "TLB",
+ "MetricName": "Page_Walks_Utilization"
+ },
+ {
+ "BriefDescription": "Average CPU Utilization",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+ "MetricGroup": "Summary",
+ "MetricName": "CPU_Utilization"
+ },
+ {
+ "BriefDescription": "Giga Floating Point Operations Per Second",
+ "MetricExpr": "(( 1*( FP_ARITH_INST_RETIRED.SCALAR_SINGLE + FP_ARITH_INST_RETIRED.SCALAR_DOUBLE ) + 2* FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE + 4*( FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE + FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE ) + 8* FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE )) / 1000000000 / duration_time",
+ "MetricGroup": "FLOPS;Summary",
+ "MetricName": "GFLOPs"
+ },
+ {
+ "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Power",
+ "MetricName": "Turbo_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles where both hardware threads were active",
+ "MetricExpr": "1 - CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE / ( CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY / 2 ) if #SMT_on else 0",
+ "MetricGroup": "SMT;Summary",
+ "MetricName": "SMT_2T_Utilization"
+ },
+ {
+ "BriefDescription": "Fraction of cycles spent in Kernel mode",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:u / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricGroup": "Summary",
+ "MetricName": "Kernel_Utilization"
+ },
+ {
+ "BriefDescription": "C3 residency percent per core",
+ "MetricExpr": "(cstate_core@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Core_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per core",
+ "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Core_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per core",
+ "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Core_Residency"
+ },
+ {
+ "BriefDescription": "C2 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C2_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C3 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C3_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C6 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C6_Pkg_Residency"
+ },
+ {
+ "BriefDescription": "C7 residency percent per package",
+ "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+ "MetricGroup": "Power",
+ "MetricName": "C7_Pkg_Residency"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
new file mode 100644
index 000000000..9c7e5f8be
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-memory.json
@@ -0,0 +1,172 @@
+[
+ {
+ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_READ",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_WRITE",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0xC",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Memory controller clock ticks",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_M_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles where DRAM ranks are in power down (CKE) mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x85",
+ "EventName": "UNC_M_POWER_CHANNEL_PPD",
+ "MetricExpr": "(UNC_M_POWER_CHANNEL_PPD / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_channel_ppd %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Cycles Memory is in self refresh power mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x43",
+ "EventName": "UNC_M_POWER_SELF_REFRESH",
+ "MetricExpr": "(UNC_M_POWER_SELF_REFRESH / UNC_M_CLOCKTICKS) * 100.",
+ "MetricName": "power_self_refresh %",
+ "PerPkg": "1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Pre-charges due to page misses",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.PAGE_MISS",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Pre-charge for reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.RD",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Pre-charge for writes",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_M_PRE_COUNT.WR",
+ "PerPkg": "1",
+ "UMask": "0x8",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "DRAM Page Activate commands sent due to a write request",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_M_ACT_COUNT.WR",
+ "PerPkg": "1",
+ "PublicDescription": "Counts DRAM Page Activate commands sent on this channel due to a write request to the iMC (Memory Controller). Activate commands are issued to open up a page on the DRAM devices so that it can be read or written to with a CAS (Column Access Select) command.",
+ "UMask": "0x2",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "All DRAM CAS Commands issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "UNC_M_CAS_COUNT.ALL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts all CAS (Column Address Select) commands issued to DRAM per memory channel. CAS commands are issued to specify the address to read or write on DRAM, so this event increments for every read and write. This event counts whether AutoPrecharge (which closes the DRAM Page automatically after a read/write) is enabled or not.",
+ "UMask": "0xF",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "read requests to memory controller. Derived from unc_m_cas_count.rd",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_READ",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x3",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "All DRAM Read CAS Commands issued (does not include underfills) ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "UNC_M_CAS_COUNT.RD_REG",
+ "PerPkg": "1",
+ "PublicDescription": "Counts CAS (Column Access Select) regular read commands issued to DRAM on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this event increments for every regular read. This event only counts regular reads and does not includes underfill reads due to partial write requests. This event counts whether AutoPrecharge (which closes the DRAM Page automatically after a read/write) is enabled or not.",
+ "UMask": "0x1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "DRAM Underfill Read CAS Commands issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "UNC_M_CAS_COUNT.RD_UNDERFILL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts CAS (Column Access Select) underfill read commands issued to DRAM due to a partial write, on a per channel basis. CAS commands are issued to specify the address to read or write on DRAM, and this command counts underfill reads. Partial writes must be completed by first reading in the underfill from DRAM and then merging in the partial write data before writing the full line back to DRAM. This event will generally count about the same as the number of partial writes, but may be slightly less because of partials hitting in the WPQ (due to a previous write request). ",
+ "UMask": "0x2",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "write requests to memory controller. Derived from unc_m_cas_count.wr",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x4",
+ "EventName": "LLC_MISSES.MEM_WRITE",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0xC",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Read Pending Queue Allocations",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x10",
+ "EventName": "UNC_M_RPQ_INSERTS",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of read requests allocated into the Read Pending Queue (RPQ). This queue is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. The requests deallocate after the read CAS command has been issued to DRAM. This event counts both Isochronous and non-Isochronous requests which were issued to the RPQ. ",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Read Pending Queue Occupancy",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x80",
+ "EventName": "UNC_M_RPQ_OCCUPANCY",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of entries in the Read Pending Queue (RPQ) at each cycle. This can then be used to calculate both the average occupancy of the queue (in conjunction with the number of cycles not empty) and the average latency in the queue (in conjunction with the number of allocations). The RPQ is used to schedule reads out to the memory controller and to track the requests. Requests allocate into the RPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC. They deallocate from the RPQ after the CAS command has been issued to memory.",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Write Pending Queue Allocations",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x20",
+ "EventName": "UNC_M_WPQ_INSERTS",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of writes requests allocated into the Write Pending Queue (WPQ). The WPQ is used to schedule writes out to the memory controller and to track the requests. Requests allocate into the WPQ soon after they enter the memory controller, and need credits for an entry in this buffer before being sent from the CHA to the iMC (Memory Controller). The write requests deallocate after being issued to DRAM. Write requests themselves are able to complete (from the perspective of the rest of the system) as soon they have 'posted' to the iMC.",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Write Pending Queue Occupancy",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x81",
+ "EventName": "UNC_M_WPQ_OCCUPANCY",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of entries in the Write Pending Queue (WPQ) at each cycle. This can then be used to calculate both the average queue occupancy (in conjunction with the number of cycles not empty) and the average latency (in conjunction with the number of allocations). The WPQ is used to schedule writes out to the memory controller and to track the requests.",
+ "Unit": "iMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
new file mode 100644
index 000000000..de6e70e55
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylakex/uncore-other.json
@@ -0,0 +1,1156 @@
+[
+ {
+ "BriefDescription": "Uncore cache clock ticks",
+ "Counter": "0,1,2,3",
+ "EventName": "UNC_CHA_CLOCKTICKS",
+ "PerPkg": "1",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_cha_tor_inserts.ia_miss",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.UNCACHEABLE",
+ "Filter": "config1=0x40e33",
+ "PerPkg": "1",
+ "UMask": "0x21",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "MMIO reads. Derived from unc_cha_tor_inserts.ia_miss",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.MMIO_READ",
+ "Filter": "config1=0x40040e33",
+ "PerPkg": "1",
+ "UMask": "0x21",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "MMIO writes. Derived from unc_cha_tor_inserts.ia_miss",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_MISSES.MMIO_WRITE",
+ "Filter": "config1=0x40041e33",
+ "PerPkg": "1",
+ "UMask": "0x21",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Streaming stores (full cache line). Derived from unc_cha_tor_inserts.ia_miss",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.STREAMING_FULL",
+ "Filter": "config1=0x41833",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x21",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Streaming stores (partial cache line). Derived from unc_cha_tor_inserts.ia_miss",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "LLC_REFERENCES.STREAMING_PARTIAL",
+ "Filter": "config1=0x41a33",
+ "PerPkg": "1",
+ "ScaleUnit": "64Bytes",
+ "UMask": "0x21",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "read requests from home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.READS",
+ "PerPkg": "1",
+ "UMask": "0x03",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "read requests from local home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.READS_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "read requests from remote home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.READS_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0x02",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "write requests from home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.WRITES",
+ "PerPkg": "1",
+ "UMask": "0x0C",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "write requests from local home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.WRITES_LOCAL",
+ "PerPkg": "1",
+ "UMask": "0x04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "write requests from remote home agent",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.WRITES_REMOTE",
+ "PerPkg": "1",
+ "UMask": "0x08",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "UPI interconnect send bandwidth for payload. Derived from unc_upi_txl_flits.all_data",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UPI_DATA_BANDWIDTH_TX",
+ "PerPkg": "1",
+ "ScaleUnit": "7.11E-06Bytes",
+ "UMask": "0x0F",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "PCI Express bandwidth reading at IIO. Derived from unc_iio_data_req_of_cpu.mem_read.part0",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "LLC_MISSES.PCIE_READ",
+ "FCMask": "0x07",
+ "Filter": "ch_mask=0x1f",
+ "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+ "MetricName": "LLC_MISSES.PCIE_READ",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "ScaleUnit": "4Bytes",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCI Express bandwidth writing at IIO. Derived from unc_iio_data_req_of_cpu.mem_write.part0",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "LLC_MISSES.PCIE_WRITE",
+ "FCMask": "0x07",
+ "Filter": "ch_mask=0x1f",
+ "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+ "MetricName": "LLC_MISSES.PCIE_WRITE",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "ScaleUnit": "4Bytes",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCI Express bandwidth writing at IIO, part 0",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0",
+ "FCMask": "0x07",
+ "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2 +UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+ "MetricName": "LLC_MISSES.PCIE_WRITE",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "ScaleUnit": "4Bytes",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCI Express bandwidth writing at IIO, part 1",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "ScaleUnit": "4Bytes",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCI Express bandwidth writing at IIO, part 2",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "ScaleUnit": "4Bytes",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCI Express bandwidth writing at IIO, part 3",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "ScaleUnit": "4Bytes",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCI Express bandwidth reading at IIO, part 0",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0",
+ "FCMask": "0x07",
+ "MetricExpr": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2 + UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+ "MetricName": "LLC_MISSES.PCIE_READ",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "ScaleUnit": "4Bytes",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCI Express bandwidth reading at IIO, part 1",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "ScaleUnit": "4Bytes",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCI Express bandwidth reading at IIO, part 2",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "ScaleUnit": "4Bytes",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "PCI Express bandwidth reading at IIO, part 3",
+ "Counter": "0,1",
+ "EventCode": "0x83",
+ "EventName": "UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "ScaleUnit": "4Bytes",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Core Cross Snoops Issued; Multiple Core Requests",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x33",
+ "EventName": "UNC_CHA_CORE_SNP.CORE_GTONE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of transactions that trigger a configurable number of cross snoops. Cores are snooped if the transaction looks up the cache and determines that it is necessary based on the operation type and what CoreValid bits are set. For example, if 2 CV bits are set on a data read, the cores must have the data in S state so it is not necessary to snoop them. However, if only 1 CV bit is set the core my have modified the data. If the transaction was an RFO, it would need to invalidate the lines. This event can be filtered based on who triggered the initial snoop(s).",
+ "UMask": "0x42",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Core Cross Snoops Issued; Multiple Eviction",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x33",
+ "EventName": "UNC_CHA_CORE_SNP.EVICT_GTONE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the number of transactions that trigger a configurable number of cross snoops. Cores are snooped if the transaction looks up the cache and determines that it is necessary based on the operation type and what CoreValid bits are set. For example, if 2 CV bits are set on a data read, the cores must have the data in S state so it is not necessary to snoop them. However, if only 1 CV bit is set the core my have modified the data. If the transaction was an RFO, it would need to invalidate the lines. This event can be filtered based on who triggered the initial snoop(s).",
+ "UMask": "0x82",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory state lookups; Snoop Not Needed",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x53",
+ "EventName": "UNC_CHA_DIR_LOOKUP.NO_SNP",
+ "PerPkg": "1",
+ "PublicDescription": "Counts transactions that looked into the multi-socket cacheline Directory state, and therefore did not send a snoop because the Directory indicated it was not needed",
+ "UMask": "0x02",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory state lookups; Snoop Needed",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x53",
+ "EventName": "UNC_CHA_DIR_LOOKUP.SNP",
+ "PerPkg": "1",
+ "PublicDescription": "Counts transactions that looked into the multi-socket cacheline Directory state, and sent one or more snoops, because the Directory indicated it was needed",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory state updates; Directory Updated memory write from the HA pipe",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x54",
+ "EventName": "UNC_CHA_DIR_UPDATE.HA",
+ "PerPkg": "1",
+ "PublicDescription": "Counts only multi-socket cacheline Directory state updates memory writes issued from the HA pipe. This does not include memory write requests which are for I (Invalid) or E (Exclusive) cachelines.",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory state updates; Directory Updated memory write from TOR pipe",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x54",
+ "EventName": "UNC_CHA_DIR_UPDATE.TOR",
+ "PerPkg": "1",
+ "PublicDescription": "Counts only multi-socket cacheline Directory state updates due to memory writes issued from the TOR pipe which are the result of remote transaction hitting the SF/LLC and returning data Core2Core. This does not include memory write requests which are for I (Invalid) or E (Exclusive) cachelines.",
+ "UMask": "0x02",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Read request from a remote socket which hit in the HitMe Cache to a line In the E state",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5F",
+ "EventName": "UNC_CHA_HITME_HIT.EX_RDS",
+ "PerPkg": "1",
+ "PublicDescription": "Counts read requests from a remote socket which hit in the HitME cache (used to cache the multi-socket Directory state) to a line in the E(Exclusive) state. This includes the following read opcodes (RdCode, RdData, RdDataMigratory, RdCur, RdInv*, Inv*)",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Normal priority reads issued to the memory controller from the CHA",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x59",
+ "EventName": "UNC_CHA_IMC_READS_COUNT.NORMAL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when a normal (Non-Isochronous) read is issued to any of the memory controller channels from the CHA.",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "CHA to iMC Full Line Writes Issued; Full Line Non-ISOCH",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5B",
+ "EventName": "UNC_CHA_IMC_WRITES_COUNT.FULL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when a normal (Non-Isochronous) full line write is issued from the CHA to the any of the memory controller channels.",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Number of times that an RFO hit in S state.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x39",
+ "EventName": "UNC_CHA_MISC.RFO_HIT_S",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when a RFO (the Read for Ownership issued before a write) request hit a cacheline in the S (Shared) state.",
+ "UMask": "0x08",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Local requests for exclusive ownership of a cache line without receiving data",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.INVITOE_LOCAL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the total number of requests coming from a unit on this socket for exclusive ownership of a cache line without receiving data (INVITOE) to the CHA.",
+ "UMask": "0x10",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Local requests for exclusive ownership of a cache line without receiving data",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x50",
+ "EventName": "UNC_CHA_REQUESTS.INVITOE_REMOTE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts the total number of requests coming from a remote socket for exclusive ownership of a cache line without receiving data (INVITOE) to the CHA.",
+ "UMask": "0x20",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "RspCnflct* Snoop Responses Received",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5C",
+ "EventName": "UNC_CHA_SNOOP_RESP.RSPCNFLCTS",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when a a transaction with the opcode type RspCnflct* Snoop Response was received. This is returned when a snoop finds an existing outstanding transaction in a remote caching agent. This triggers conflict resolution hardware. This covers both the opcode RspCnflct and RspCnflctWbI.",
+ "UMask": "0x40",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "RspI Snoop Responses Received",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5C",
+ "EventName": "UNC_CHA_SNOOP_RESP.RSPI",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when a transaction with the opcode type RspI Snoop Response was received which indicates the remote cache does not have the data, or when the remote cache silently evicts data (such as when an RFO: the Read for Ownership issued before a write hits non-modified data).",
+ "UMask": "0x01",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "RspIFwd Snoop Responses Received",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5C",
+ "EventName": "UNC_CHA_SNOOP_RESP.RSPIFWD",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when a a transaction with the opcode type RspIFwd Snoop Response was received which indicates a remote caching agent forwarded the data and the requesting agent is able to acquire the data in E (Exclusive) or M (modified) states. This is commonly returned with RFO (the Read for Ownership issued before a write) transactions. The snoop could have either been to a cacheline in the M,E,F (Modified, Exclusive or Forward) states.",
+ "UMask": "0x04",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "RspSFwd Snoop Responses Received",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5C",
+ "EventName": "UNC_CHA_SNOOP_RESP.RSPSFWD",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when a a transaction with the opcode type RspSFwd Snoop Response was received which indicates a remote caching agent forwarded the data but held on to its current copy. This is common for data and code reads that hit in a remote socket in E (Exclusive) or F (Forward) state.",
+ "UMask": "0x08",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Rsp*Fwd*WB Snoop Responses Received",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5C",
+ "EventName": "UNC_CHA_SNOOP_RESP.RSP_FWD_WB",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when a transaction with the opcode type Rsp*Fwd*WB Snoop Response was received which indicates the data was written back to it's home socket, and the cacheline was forwarded to the requestor socket. This snoop response is only used in >= 4 socket systems. It is used when a snoop HITM's in a remote caching agent and it directly forwards data to a requestor, and simultaneously returns data to it's home socket to be written back to memory.",
+ "UMask": "0x20",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Rsp*WB Snoop Responses Received",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x5C",
+ "EventName": "UNC_CHA_SNOOP_RESP.RSP_WBWB",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when a transaction with the opcode type Rsp*WB Snoop Response was received which indicates which indicates the data was written back to it's home. This is returned when a non-RFO request hits a cacheline in the Modified state. The Cache can either downgrade the cacheline to a S (Shared) or I (Invalid) state depending on how the system has been configured. This reponse will also be sent when a cache requests E (Exclusive) ownership of a cache line without receiving data, because the cache must acquire ownership.",
+ "UMask": "0x10",
+ "Unit": "CHA"
+ },
+ {
+ "BriefDescription": "Clockticks of the IIO Traffic Controller",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_IIO_CLOCKTICKS",
+ "PerPkg": "1",
+ "PublicDescription": "Counts clockticks of the 1GHz trafiic controller clock in the IIO unit.",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part0",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part1",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part2",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Read request for 4 bytes made by the CPU to IIO Part3",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every read request for 4 bytes of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Write request of 4 bytes made to IIO Part0 by the CPU",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Write request of 4 bytes made to IIO Part1 by the CPU",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Write request of 4 bytes made to IIO Part2 by the CPU ",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Write request of 4 bytes made to IIO Part3 by the CPU ",
+ "Counter": "2,3",
+ "EventCode": "0xC0",
+ "EventName": "UNC_IIO_DATA_REQ_BY_CPU.MEM_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every write request of 4 bytes of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part0",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part0. In the general case, part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part1",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part1. In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part2",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part2. In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Read request for up to a 64 byte transaction is made by the CPU to IIO Part3",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by a unit on the main die (generally a core) to the MMIO space of a card on IIO Part3. In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part0 by the CPU",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part0 by a unit on the main die (generally a core). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part1 by the CPU",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part1 by a unit on the main die (generally a core). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part2 by the CPU ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part2 by a unit on the main die (generally a core). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Write request of up to a 64 byte transaction is made to IIO Part3 by the CPU ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xC1",
+ "EventName": "UNC_IIO_TXN_REQ_BY_CPU.MEM_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made to the MMIO space of a card on IIO Part3 by a unit on the main die (generally a core). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part0 to Memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by IIO Part0 to a unit on the main die (generally memory). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part1 to Memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by IIO Part1 to a unit on the main die (generally memory). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part2 to Memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by IIO Part2 to a unit on the main die (generally memory). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Read request for up to a 64 byte transaction is made by IIO Part3 to Memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_READ.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every read request for up to a 64 byte transaction of data made by IIO Part3 to a unit on the main die (generally memory). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x04",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Write request of up to a 64 byte transaction is made by IIO Part0 to Memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART0",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x01",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made by IIO Part0 to a unit on the main die (generally memory). In the general case, Part0 refers to a standard PCIe card of any size (x16,x8,x4) that is plugged directly into one of the PCIe slots. Part0 could also refer to any device plugged into the first slot of a PCIe riser card or to a device attached to the IIO unit which starts its use of the bus using lane 0 of the 16 lanes supported by the bus.",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Write request of up to a 64 byte transaction is made by IIO Part1 to Memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART1",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x02",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made by IIO Part1 to a unit on the main die (generally memory). In the general case, Part1 refers to a x4 PCIe card plugged into the second slot of a PCIe riser card, but it could refer to any x4 device attached to the IIO unit using lanes starting at lane 4 of the 16 lanes supported by the bus.",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Write request of up to a 64 byte transaction is made by IIO Part2 to Memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART2",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x04",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made by IIO Part2 to a unit on the main die (generally memory). In the general case, Part2 refers to a x4 or x8 PCIe card plugged into the third slot of a PCIe riser card, but it could refer to any x4 or x8 device attached to the IIO unit and using lanes starting at lane 8 of the 16 lanes supported by the bus.",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Write request of up to a 64 byte transaction is made by IIO Part3 to Memory",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x84",
+ "EventName": "UNC_IIO_TXN_REQ_OF_CPU.MEM_WRITE.PART3",
+ "FCMask": "0x07",
+ "PerPkg": "1",
+ "PortMask": "0x08",
+ "PublicDescription": "Counts every write request of up to a 64 byte transaction of data made by IIO Part3 to a unit on the main die (generally memory). In the general case, Part3 refers to a x4 PCIe card plugged into the fourth slot of a PCIe riser card, but it could brefer to any device attached to the IIO unit using the lanes starting at lane 12 of the 16 lanes supported by the bus.",
+ "UMask": "0x01",
+ "Unit": "IIO"
+ },
+ {
+ "BriefDescription": "Traffic in which the M2M to iMC Bypass was not taken",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x22",
+ "EventName": "UNC_M2M_BYPASS_M2M_Egress.NOT_TAKEN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts traffic in which the M2M (Mesh to Memory) to iMC (Memory Controller) bypass was not taken",
+ "UMask": "0x2",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Cycles when direct to core mode (which bypasses the CHA) was disabled",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x24",
+ "EventName": "UNC_M2M_DIRECT2CORE_NOT_TAKEN_DIRSTATE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts cycles when direct to core mode (which bypasses the CHA) was disabled",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Messages sent direct to core (bypassing the CHA)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x23",
+ "EventName": "UNC_M2M_DIRECT2CORE_TAKEN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when messages were sent direct to core (bypassing the CHA)",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Number of reads in which direct to core transaction were overridden",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x25",
+ "EventName": "UNC_M2M_DIRECT2CORE_TXN_OVERRIDE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts reads in which direct to core transactions (which would have bypassed the CHA) were overridden",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Number of reads in which direct to Intel UPI transactions were overridden",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x28",
+ "EventName": "UNC_M2M_DIRECT2UPI_NOT_TAKEN_CREDITS",
+ "PerPkg": "1",
+ "PublicDescription": "Counts reads in which direct to Intel Ultra Path Interconnect (UPI) transactions (which would have bypassed the CHA) were overridden",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Cycles when direct to Intel UPI was disabled",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "UNC_M2M_DIRECT2UPI_NOT_TAKEN_DIRSTATE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts cycles when the ability to send messages direct to the Intel Ultra Path Interconnect (bypassing the CHA) was disabled",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Messages sent direct to the Intel UPI",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x26",
+ "EventName": "UNC_M2M_DIRECT2UPI_TAKEN",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when messages were sent direct to the Intel Ultra Path Interconnect (bypassing the CHA)",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Number of reads that a message sent direct2 Intel UPI was overridden",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x29",
+ "EventName": "UNC_M2M_DIRECT2UPI_TXN_OVERRIDE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when a read message that was sent direct to the Intel Ultra Path Interconnect (bypassing the CHA) was overridden",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory lookups (any state found)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2D",
+ "EventName": "UNC_M2M_DIRECTORY_LOOKUP.ANY",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) looks into the multi-socket cacheline Directory state, and found the cacheline marked in Any State (A, I, S or unused)",
+ "UMask": "0x1",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory lookups (cacheline found in A state) ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2D",
+ "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_A",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) looks into the multi-socket cacheline Directory state, and found the cacheline marked in the A (SnoopAll) state, indicating the cacheline is stored in another socket in any state, and we must snoop the other sockets to make sure we get the latest data. The data may be stored in any state in the local socket.",
+ "UMask": "0x8",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in I state) ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2D",
+ "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_I",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) looks into the multi-socket cacheline Directory state , and found the cacheline marked in the I (Invalid) state indicating the cacheline is not stored in another socket, and so there is no need to snoop the other sockets for the latest data. The data may be stored in any state in the local socket.",
+ "UMask": "0x2",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory lookup (cacheline found in S state) ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2D",
+ "EventName": "UNC_M2M_DIRECTORY_LOOKUP.STATE_S",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) looks into the multi-socket cacheline Directory state , and found the cacheline marked in the S (Shared) state indicating the cacheline is either stored in another socket in the S(hared) state , and so there is no need to snoop the other sockets for the latest data. The data may be stored in any state in the local socket.",
+ "UMask": "0x4",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory update from A to I",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2E",
+ "EventName": "UNC_M2M_DIRECTORY_UPDATE.A2I",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from A (SnoopAll) to I (Invalid)",
+ "UMask": "0x20",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory update from A to S",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2E",
+ "EventName": "UNC_M2M_DIRECTORY_UPDATE.A2S",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from A (SnoopAll) to S (Shared)",
+ "UMask": "0x40",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory update from/to Any state ",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2E",
+ "EventName": "UNC_M2M_DIRECTORY_UPDATE.ANY",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory to a new state",
+ "UMask": "0x1",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory update from I to A",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2E",
+ "EventName": "UNC_M2M_DIRECTORY_UPDATE.I2A",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from I (Invalid) to A (SnoopAll)",
+ "UMask": "0x4",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory update from I to S",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2E",
+ "EventName": "UNC_M2M_DIRECTORY_UPDATE.I2S",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from I (Invalid) to S (Shared)",
+ "UMask": "0x2",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory update from S to A",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2E",
+ "EventName": "UNC_M2M_DIRECTORY_UPDATE.S2A",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from S (Shared) to A (SnoopAll)",
+ "UMask": "0x10",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Multi-socket cacheline Directory update from S to I",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2E",
+ "EventName": "UNC_M2M_DIRECTORY_UPDATE.S2I",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) updates the multi-socket cacheline Directory state from from S (Shared) to I (Invalid)",
+ "UMask": "0x8",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Reads to iMC issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_M2M_IMC_READS.ALL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller). ",
+ "UMask": "0x4",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Reads to iMC issued at Normal Priority (Non-Isochronous)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x37",
+ "EventName": "UNC_M2M_IMC_READS.NORMAL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) issues reads to the iMC (Memory Controller). It only counts normal priority non-isochronous reads.",
+ "UMask": "0x1",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Writes to iMC issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x38",
+ "EventName": "UNC_M2M_IMC_WRITES.ALL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) issues writes to the iMC (Memory Controller).",
+ "UMask": "0x10",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Partial Non-Isochronous writes to the iMC",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x38",
+ "EventName": "UNC_M2M_IMC_WRITES.PARTIAL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) issues partial writes to the iMC (Memory Controller). It only counts normal priority non-isochronous writes.",
+ "UMask": "0x2",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Prefecth requests that got turn into a demand request",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x56",
+ "EventName": "UNC_M2M_PREFCAM_DEMAND_PROMOTIONS",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) promotes a outstanding request in the prefetch queue due to a subsequent demand read request that entered the M2M with the same address. Explanatory Side Note: The Prefecth queue is made of CAM (Content Addressable Memory)",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Inserts into the Memory Controller Prefetch Queue",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x57",
+ "EventName": "UNC_M2M_PREFCAM_INSERTS",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the M2M (Mesh to Memory) recieves a prefetch request and inserts it into its outstanding prefetch queue. Explanatory Side Note: the prefect queue is made from CAM: Content Addressable Memory",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "AD Ingress (from CMS) Queue Inserts",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_M2M_RxC_AD_INSERTS",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the a new entry is Received(RxC) and then added to the AD (Address Ring) Ingress Queue from the CMS (Common Mesh Stop). This is generally used for reads, and ",
+ "Unit": "M2M"
+ },
+ {
+ "BriefDescription": "Prefetches generated by the flow control queue of the M3UPI unit.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x29",
+ "EventName": "UNC_M3UPI_UPI_PREFETCH_SPAWN",
+ "PerPkg": "1",
+ "PublicDescription": "Count cases where flow control queue that sits between the Intel Ultra Path Interconnect (UPI) and the mesh spawns a prefetch to the iMC (Memory Controller)",
+ "Unit": "M3UPI"
+ },
+ {
+ "BriefDescription": "Clocks of the Intel Ultra Path Interconnect (UPI)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1",
+ "EventName": "UNC_UPI_CLOCKTICKS",
+ "PerPkg": "1",
+ "PublicDescription": "Counts clockticks of the fixed frequency clock controlling the Intel Ultra Path Interconnect (UPI). This clock runs at1/8th the 'GT/s' speed of the UPI link. For example, a 9.6GT/s link will have a fixed Frequency of 1.2 Ghz.",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Data Response packets that go direct to core",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x12",
+ "EventName": "UNC_UPI_DIRECT_ATTEMPTS.D2C",
+ "PerPkg": "1",
+ "PublicDescription": "Counts Data Response (DRS) packets that attempted to go direct to core bypassing the CHA.",
+ "UMask": "0x1",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Data Response packets that go direct to Intel UPI",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x12",
+ "EventName": "UNC_UPI_DIRECT_ATTEMPTS.D2U",
+ "PerPkg": "1",
+ "PublicDescription": "Counts Data Response (DRS) packets that attempted to go direct to Intel Ultra Path Interconnect (UPI) bypassing the CHA .",
+ "UMask": "0x2",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Cycles Intel UPI is in L1 power mode (shutdown)",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x21",
+ "EventName": "UNC_UPI_L1_POWER_CYCLES",
+ "PerPkg": "1",
+ "PublicDescription": "Counts cycles when the Intel Ultra Path Interconnect (UPI) is in L1 power mode. L1 is a mode that totally shuts down the UPI link. Link power states are per link and per direction, so for example the Tx direction could be in one state while Rx was in another, this event only coutns when both links are shutdown.",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Cycles the Rx of the Intel UPI is in L0p power mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x25",
+ "EventName": "UNC_UPI_RxL0P_POWER_CYCLES",
+ "PerPkg": "1",
+ "PublicDescription": "Counts cycles when the the receive side (Rx) of the Intel Ultra Path Interconnect(UPI) is in L0p power mode. L0p is a mode where we disable 60% of the UPI lanes, decreasing our bandwidth in order to save power.",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "FLITs received which bypassed the Slot0 Receive Buffer",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x31",
+ "EventName": "UNC_UPI_RxL_BYPASSED.SLOT0",
+ "PerPkg": "1",
+ "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot0 RxQ buffer (Receive Queue) and passed directly to the Egress. This is a latency optimization, and should generally be the common case. If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
+ "UMask": "0x1",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "FLITs received which bypassed the Slot0 Receive Buffer",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x31",
+ "EventName": "UNC_UPI_RxL_BYPASSED.SLOT1",
+ "PerPkg": "1",
+ "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the slot1 RxQ buffer (Receive Queue) and passed directly across the BGF and into the Egress. This is a latency optimization, and should generally be the common case. If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
+ "UMask": "0x2",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "FLITs received which bypassed the Slot0 Recieve Buffer",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x31",
+ "EventName": "UNC_UPI_RxL_BYPASSED.SLOT2",
+ "PerPkg": "1",
+ "PublicDescription": "Counts incoming FLITs (FLow control unITs) whcih bypassed the slot2 RxQ buffer (Receive Queue) and passed directly to the Egress. This is a latency optimization, and should generally be the common case. If this value is less than the number of FLITs transfered, it implies that there was queueing getting onto the ring, and thus the transactions saw higher latency.",
+ "UMask": "0x4",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Valid data FLITs received from any slot",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3",
+ "EventName": "UNC_UPI_RxL_FLITS.ALL_DATA",
+ "PerPkg": "1",
+ "PublicDescription": "Counts valid data FLITs (80 bit FLow control unITs: 64bits of data) received from any of the 3 Intel Ultra Path Interconnect (UPI) Receive Queue slots on this UPI unit.",
+ "UMask": "0x0F",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Null FLITs received from any slot",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3",
+ "EventName": "UNC_UPI_RxL_FLITS.ALL_NULL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts null FLITs (80 bit FLow control unITs) received from any of the 3 Intel Ultra Path Interconnect (UPI) Receive Queue slots on this UPI unit.",
+ "UMask": "0x27",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Protocol header and credit FLITs received from any slot",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x3",
+ "EventName": "UNC_UPI_RxL_FLITS.NON_DATA",
+ "PerPkg": "1",
+ "PublicDescription": "Counts protocol header and credit FLITs (80 bit FLow control unITs) received from any of the 3 UPI slots on this UPI unit.",
+ "UMask": "0x97",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Cycles in which the Tx of the Intel Ultra Path Interconnect (UPI) is in L0p power mode",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "UNC_UPI_TxL0P_POWER_CYCLES",
+ "PerPkg": "1",
+ "PublicDescription": "Counts cycles when the transmit side (Tx) of the Intel Ultra Path Interconnect(UPI) is in L0p power mode. L0p is a mode where we disable 60% of the UPI lanes, decreasing our bandwidth in order to save power.",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "FLITs that bypassed the TxL Buffer",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x41",
+ "EventName": "UNC_UPI_TxL_BYPASSED",
+ "PerPkg": "1",
+ "PublicDescription": "Counts incoming FLITs (FLow control unITs) which bypassed the TxL(transmit) FLIT buffer and pass directly out the UPI Link. Generally, when data is transmitted across the Intel Ultra Path Interconnect (UPI), it will bypass the TxQ and pass directly to the link. However, the TxQ will be used in L0p (Low Power) mode and (Link Layer Retry) LLR mode, increasing latency to transfer out to the link.",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "UPI interconnect send bandwidth for payload. Derived from unc_upi_txl_flits.all_data",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UPI_DATA_BANDWIDTH_TX",
+ "PerPkg": "1",
+ "ScaleUnit": "7.11E-06Bytes",
+ "UMask": "0x0F",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Null FLITs transmitted from any slot",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_UPI_TxL_FLITS.ALL_NULL",
+ "PerPkg": "1",
+ "PublicDescription": "Counts null FLITs (80 bit FLow control unITs) transmitted via any of the 3 Intel Ulra Path Interconnect (UPI) slots on this UPI unit.",
+ "UMask": "0x27",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Idle FLITs transmitted",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_UPI_TxL_FLITS.IDLE",
+ "PerPkg": "1",
+ "PublicDescription": "Counts when the Intel Ultra Path Interconnect(UPI) transmits an idle FLIT(80 bit FLow control unITs). Every UPI cycle must be sending either data FLITs, protocol/credit FLITs or idle FLITs.",
+ "UMask": "0x47",
+ "Unit": "UPI LL"
+ },
+ {
+ "BriefDescription": "Protocol header and credit FLITs transmitted across any slot",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2",
+ "EventName": "UNC_UPI_TxL_FLITS.NON_DATA",
+ "PerPkg": "1",
+ "PublicDescription": "Counts protocol header and credit FLITs (80 bit FLow control unITs) transmitted across any of the 3 UPI (Ultra Path Interconnect) slots on this UPI unit.",
+ "UMask": "0x97",
+ "Unit": "UPI LL"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
new file mode 100644
index 000000000..7f466c97e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json
@@ -0,0 +1,284 @@
+[
+ {
+ "EventCode": "0x08",
+ "UMask": "0x1",
+ "BriefDescription": "Load misses in all DTLB levels that cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK",
+ "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x2",
+ "BriefDescription": "Page walk completed due to a demand data load to a 4K page",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x4",
+ "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x8",
+ "BriefDescription": "Page walk completed due to a demand data load to a 1G page",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+ "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0xe",
+ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x10",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+ "CounterMask": "1",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x08",
+ "UMask": "0x20",
+ "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x1",
+ "BriefDescription": "Store misses in all DTLB levels that cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.MISS_CAUSES_A_WALK",
+ "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x2",
+ "BriefDescription": "Page walk completed due to a demand data store to a 4K page",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x4",
+ "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x8",
+ "BriefDescription": "Page walk completed due to a demand data store to a 1G page",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
+ "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0xe",
+ "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x10",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
+ "CounterMask": "1",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x49",
+ "UMask": "0x20",
+ "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
+ "Counter": "0,1,2,3",
+ "EventName": "DTLB_STORE_MISSES.STLB_HIT",
+ "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x4F",
+ "UMask": "0x10",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.",
+ "Counter": "0,1,2,3",
+ "EventName": "EPT.WALK_PENDING",
+ "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.",
+ "SampleAfterValue": "2000003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x1",
+ "BriefDescription": "Misses at all ITLB levels that cause page walks",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK",
+ "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x2",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
+ "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x4",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x8",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED_1G",
+ "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0xe",
+ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x10",
+ "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_PENDING",
+ "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x10",
+ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.WALK_ACTIVE",
+ "CounterMask": "1",
+ "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0x85",
+ "UMask": "0x20",
+ "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "100003",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xAE",
+ "UMask": "0x1",
+ "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.",
+ "Counter": "0,1,2,3",
+ "EventName": "ITLB.ITLB_FLUSH",
+ "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBD",
+ "UMask": "0x1",
+ "BriefDescription": "DTLB flush attempts of the thread-specific entries",
+ "Counter": "0,1,2,3",
+ "EventName": "TLB_FLUSH.DTLB_THREAD",
+ "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ },
+ {
+ "EventCode": "0xBD",
+ "UMask": "0x20",
+ "BriefDescription": "STLB flush attempts",
+ "Counter": "0,1,2,3",
+ "EventName": "TLB_FLUSH.STLB_ANY",
+ "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).",
+ "SampleAfterValue": "100007",
+ "CounterHTOff": "0,1,2,3,4,5,6,7"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-dp/cache.json b/tools/perf/pmu-events/arch/x86/westmereep-dp/cache.json
new file mode 100644
index 000000000..6e61ae20d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-dp/cache.json
@@ -0,0 +1,2817 @@
+[
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "CACHE_LOCK_CYCLES.L1D",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles L1D locked"
+ },
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "CACHE_LOCK_CYCLES.L1D_L2",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles L1D and L2 locked"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D.M_EVICT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D cache lines replaced in M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D.M_REPL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D cache lines allocated in the M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "L1D.M_SNOOP_EVICT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D snoop eviction of cache lines in M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D.REPL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache lines allocated"
+ },
+ {
+ "EventCode": "0x52",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_CACHE_PREFETCH_LOCK_FB_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D prefetch load lock accepted in fill buffer"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D_PREFETCH.MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch misses"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_PREFETCH.REQUESTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch requests"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D_PREFETCH.TRIGGERS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch requests triggered"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L1D_WB_L2.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in E state"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D_WB_L2.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in I state (misses)"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L1D_WB_L2.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in M state"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L1D_WB_L2.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L1 writebacks to L2"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D_WB_L2.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in S state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "L2_DATA_RQSTS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 data requests"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_DATA_RQSTS.DEMAND.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in E state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_DATA_RQSTS.DEMAND.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in I state (misses)"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_DATA_RQSTS.DEMAND.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in M state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_DATA_RQSTS.DEMAND.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand requests"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_DATA_RQSTS.DEMAND.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in S state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in E state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in the I state (misses)"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in M state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf0",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 data prefetches"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in the S state"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "L2_LINES_IN.ANY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines alloacated"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_IN.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines allocated in the E state"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_IN.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines allocated in the S state"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_LINES_OUT.ANY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted by a demand request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 modified lines evicted by a demand request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_OUT.PREFETCH_CLEAN",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted by a prefetch request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_LINES_OUT.PREFETCH_DIRTY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 modified lines evicted by a prefetch request"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_RQSTS.IFETCH_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_RQSTS.IFETCH_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "L2_RQSTS.IFETCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetches"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_RQSTS.LD_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 load hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_RQSTS.LD_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 load misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L2_RQSTS.LOADS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 requests"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xaa",
+ "EventName": "L2_RQSTS.MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_RQSTS.PREFETCH_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_RQSTS.PREFETCH_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc0",
+ "EventName": "L2_RQSTS.PREFETCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 prefetches"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 requests"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "L2_RQSTS.RFOS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO requests"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_TRANSACTIONS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_TRANSACTIONS.FILL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 fill transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_TRANSACTIONS.IFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_TRANSACTIONS.L1D_WB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D writeback to L2 transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_TRANSACTIONS.LOAD",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 Load transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_TRANSACTIONS.PREFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_TRANSACTIONS.RFO",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_TRANSACTIONS.WB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 writeback to LLC transactions"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_WRITE.LOCK.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in E state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe0",
+ "EventName": "L2_WRITE.LOCK.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All demand L2 lock RFOs that hit the cache"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_WRITE.LOCK.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in I state (misses)"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_WRITE.LOCK.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in M state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf0",
+ "EventName": "L2_WRITE.LOCK.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All demand L2 lock RFOs"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_WRITE.LOCK.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in S state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "EventName": "L2_WRITE.RFO.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L2 demand store RFOs that hit the cache"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_WRITE.RFO.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in I state (misses)"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_WRITE.RFO.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in M state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_WRITE.RFO.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L2 demand store RFOs"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_WRITE.RFO.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in S state"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Longest latency cache miss"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Longest latency cache reference"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_INST_RETIRED.LOADS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired which contains a load (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_INST_RETIRED.STORES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired which contains a store (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "MEM_LOAD_RETIRED.HIT_LFB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_RETIRED.L1D_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired loads that hit the L1 data cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that hit the L2 cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "MEM_LOAD_RETIRED.LLC_MISS",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Retired loads that miss the LLC cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_RETIRED.LLC_UNSHARED_HIT",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Retired loads that hit valid versions in the LLC cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "OFFCORE_REQUESTS.ANY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS.ANY.READ",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore read requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "OFFCORE_REQUESTS.ANY.RFO",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS.DEMAND.READ_CODE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code read requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS.DEMAND.READ_DATA",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data read requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS.DEMAND.RFO",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "OFFCORE_REQUESTS.L1D_WRITEBACK",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore L1 data cache writebacks"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ANY.READ",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding offcore reads"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ANY.READ_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles offcore reads busy",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding offcore demand code reads"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles offcore demand code read busy",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding offcore demand data reads"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles offcore demand data read busy",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding offcore demand RFOs"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles offcore demand RFOs busy",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_SQ_FULL",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests blocked due to Super Queue full"
+ },
+ {
+ "EventCode": "0xF4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SQ_MISC.LRU_HINTS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Super Queue LRU hints sent to LLC"
+ },
+ {
+ "EventCode": "0xF4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Super Queue lock splits across a cache line"
+ },
+ {
+ "EventCode": "0x6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "STORE_BLOCKS.AT_RET",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Loads delayed with at-Retirement block code"
+ },
+ {
+ "EventCode": "0x6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "STORE_BLOCKS.L1D_BLOCK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Cacheable loads delayed with L1D block code"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x0",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_0",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Memory instructions retired above 0 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x400",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_1024",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100",
+ "BriefDescription": "Memory instructions retired above 1024 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_128",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "1000",
+ "BriefDescription": "Memory instructions retired above 128 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Memory instructions retired above 16 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x4000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16384",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "5",
+ "BriefDescription": "Memory instructions retired above 16384 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x800",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_2048",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50",
+ "BriefDescription": "Memory instructions retired above 2048 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_256",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "500",
+ "BriefDescription": "Memory instructions retired above 256 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "5000",
+ "BriefDescription": "Memory instructions retired above 32 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x8000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32768",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "3",
+ "BriefDescription": "Memory instructions retired above 32768 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50000",
+ "BriefDescription": "Memory instructions retired above 4 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x1000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4096",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20",
+ "BriefDescription": "Memory instructions retired above 4096 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_512",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "200",
+ "BriefDescription": "Memory instructions retired above 512 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_64",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Memory instructions retired above 64 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Memory instructions retired above 8 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x2000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8192",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "10",
+ "BriefDescription": "Memory instructions retired above 8192 clocks (Precise Event)"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5011",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f11",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff11",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8011",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x111",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x211",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x411",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x711",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1011",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x811",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5044",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f44",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff44",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8044",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x144",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x444",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x744",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1044",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x844",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x50ff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7fff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xffff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x80ff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1ff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2ff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4ff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7ff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10ff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8ff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f22",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff22",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x222",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x422",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x722",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x822",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x108",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x208",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x408",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x708",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x808",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f77",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff77",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x177",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x277",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x477",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x777",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x877",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5033",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f33",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff33",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8033",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x133",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x233",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x433",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x733",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1033",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x833",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5003",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8003",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x103",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x203",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x403",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x703",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x803",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f01",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff01",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x101",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x201",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x401",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x701",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x801",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f04",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff04",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x104",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x204",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x404",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x704",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x804",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f02",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff02",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x102",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x202",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x402",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x702",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x802",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x180",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x280",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x480",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x780",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x880",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5050",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f50",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff50",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8050",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x150",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x250",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x450",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x750",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1050",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x850",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x110",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x210",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x410",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x710",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x810",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f40",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff40",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x140",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x240",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x440",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x740",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x840",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f20",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff20",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x120",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x220",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x420",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x720",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x820",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5070",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ALL_LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = ALL_LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7f70",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = ANY_CACHE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xff70",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = ANY_LOCATION",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8070",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = IO_CSR_MMIO",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x170",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = LLC_HIT_NO_OTHER_CORE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x270",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x470",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = LLC_HIT_OTHER_CORE_HITM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x770",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = LOCAL_CACHE",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1070",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = LOCAL_DRAM AND REMOTE_CACHE_HIT",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x870",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = REMOTE_CACHE_HITM",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-dp/floating-point.json b/tools/perf/pmu-events/arch/x86/westmereep-dp/floating-point.json
new file mode 100644
index 000000000..7d2f71a9d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-dp/floating-point.json
@@ -0,0 +1,229 @@
+[
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_ASSIST.ALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating point assists (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_ASSIST.INPUT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating poiint assists for invalid input value (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_ASSIST.OUTPUT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating point assists for invalid output value (Precise Event)"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_COMP_OPS_EXE.MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "MMX Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE* FP double precision Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE and SSE2 FP Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP_PACKED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE FP packed Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP_SCALAR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE FP scalar Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE* FP single precision Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "FP_COMP_OPS_EXE.SSE2_INTEGER",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE2 integer Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_COMP_OPS_EXE.X87",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Computational floating-point operations executed"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "FP_MMX_TRANS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All Floating Point to and from MMX transitions"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_MMX_TRANS.TO_FP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Transitions from MMX to Floating Point instructions"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_MMX_TRANS.TO_MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Transitions from Floating Point to MMX instructions"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SIMD_INT_128.PACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer pack operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "SIMD_INT_128.PACKED_ARITH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer arithmetic operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SIMD_INT_128.PACKED_LOGICAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer logical operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_INT_128.PACKED_MPY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer multiply operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_INT_128.PACKED_SHIFT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer shift operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "SIMD_INT_128.SHUFFLE_MOVE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer shuffle/move operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SIMD_INT_128.UNPACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer unpack operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SIMD_INT_64.PACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit pack operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "SIMD_INT_64.PACKED_ARITH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit arithmetic operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SIMD_INT_64.PACKED_LOGICAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit logical operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_INT_64.PACKED_MPY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit packed multiply operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_INT_64.PACKED_SHIFT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit shift operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "SIMD_INT_64.SHUFFLE_MOVE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit shuffle/move operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SIMD_INT_64.UNPACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit unpack operations"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-dp/frontend.json b/tools/perf/pmu-events/arch/x86/westmereep-dp/frontend.json
new file mode 100644
index 000000000..e5e21e034
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-dp/frontend.json
@@ -0,0 +1,26 @@
+[
+ {
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACRO_INSTS.DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions decoded"
+ },
+ {
+ "EventCode": "0xA6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACRO_INSTS.FUSIONS_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Macro-fused instructions decoded"
+ },
+ {
+ "EventCode": "0x19",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TWO_UOP_INSTS_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Two Uop instructions decoded"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-dp/memory.json b/tools/perf/pmu-events/arch/x86/westmereep-dp/memory.json
new file mode 100644
index 000000000..6e0829b76
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-dp/memory.json
@@ -0,0 +1,758 @@
+[
+ {
+ "EventCode": "0x5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MISALIGN_MEM_REF.STORE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Misaligned store references"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3011",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf811",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4011",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2011",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_DATA read and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3044",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf844",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4044",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2044",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY IFETCH and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x30ff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf8ff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x40ff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20ff",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY_REQUEST and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf822",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = ANY RFO and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf808",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = CORE_WB and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf877",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IFETCH and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3033",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf833",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4033",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2033",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DATA_IN and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3003",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf803",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf801",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_DATA_RD and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf804",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_IFETCH and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf802",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = DEMAND_RFO and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf880",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = OTHER and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3050",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf850",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4050",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2050",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf810",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_DATA_RD and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf840",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_RFO and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf820",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PF_IFETCH and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x3070",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_DRAM_AND_REMOTE_FWD",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = ANY_DRAM AND REMOTE_FWD",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xf870",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = ANY_LLC_MISS",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4070",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.OTHER_LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = OTHER_LOCAL_DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2070",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "REQUEST = PREFETCH and RESPONSE = REMOTE_DRAM",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-dp/other.json b/tools/perf/pmu-events/arch/x86/westmereep-dp/other.json
new file mode 100644
index 000000000..85133d6a5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-dp/other.json
@@ -0,0 +1,287 @@
+[
+ {
+ "EventCode": "0xE8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BPU_CLEARS.EARLY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Early Branch Prediciton Unit clears"
+ },
+ {
+ "EventCode": "0xE8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BPU_CLEARS.LATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Late Branch Prediction Unit clears"
+ },
+ {
+ "EventCode": "0xE5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BPU_MISSED_CALL_RET",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Branch prediction unit missed call or return"
+ },
+ {
+ "EventCode": "0xD5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ES_REG_RENAMES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ES segment renames"
+ },
+ {
+ "EventCode": "0x6C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IO_TRANSACTIONS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "I/O transactions"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L1I.CYCLES_STALLED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch stall cycles"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1I.HITS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch hits"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1I.MISSES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch misses"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L1I.READS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I Instruction fetches"
+ },
+ {
+ "EventCode": "0x82",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LARGE_ITLB.HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Large ITLB hit"
+ },
+ {
+ "EventCode": "0x3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_BLOCK.OVERLAP_STORE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Loads that partially overlap an earlier store"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "LOAD_DISPATCH.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All loads dispatched"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "LOAD_DISPATCH.MOB",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched from the MOB"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOAD_DISPATCH.RS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched that bypass the MOB"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_DISPATCH.RS_DELAYED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched from stage 305"
+ },
+ {
+ "EventCode": "0x7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "PARTIAL_ADDRESS_ALIAS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "False dependencies due to partial address aliasing"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "RAT_STALLS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All RAT stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RAT_STALLS.FLAGS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Flag stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RAT_STALLS.REGISTERS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Partial register stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RAT_STALLS.ROB_READ_PORT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ROB read port stalls cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RAT_STALLS.SCOREBOARD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Scoreboard stall cycles"
+ },
+ {
+ "EventCode": "0x4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "SB_DRAIN.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All Store buffer stall cycles"
+ },
+ {
+ "EventCode": "0xD4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SEG_RENAME_STALLS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Segment rename stall cycles"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SNOOP_RESPONSE.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HIT to snoop"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SNOOP_RESPONSE.HITE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HITE to snoop"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SNOOP_RESPONSE.HITM",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HITM to snoop"
+ },
+ {
+ "EventCode": "0xB4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SNOOPQ_REQUESTS.CODE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Snoop code requests"
+ },
+ {
+ "EventCode": "0xB4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SNOOPQ_REQUESTS.DATA",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Snoop data requests"
+ },
+ {
+ "EventCode": "0xB4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SNOOPQ_REQUESTS.INVALIDATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Snoop invalidate requests"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x4",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.CODE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding snoop code requests"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x4",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.CODE_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles snoop code requests queued",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x1",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.DATA",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding snoop data requests"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x1",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.DATA_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles snoop data requests queued",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x2",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding snoop invalidate requests"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x2",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles snoop invalidate requests queued",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xF6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SQ_FULL_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Super Queue full stall cycles"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-dp/pipeline.json b/tools/perf/pmu-events/arch/x86/westmereep-dp/pipeline.json
new file mode 100644
index 000000000..f130510f7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-dp/pipeline.json
@@ -0,0 +1,899 @@
+[
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.CYCLES_DIV_BUSY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles the divider is busy"
+ },
+ {
+ "EventCode": "0x14",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.DIV",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Divide Operations executed",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ARITH.MUL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Multiply operations executed"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BACLEAR.BAD_TARGET",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "BACLEAR asserted with bad target address"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BACLEAR.CLEAR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "BACLEAR asserted, regardless of cause "
+ },
+ {
+ "EventCode": "0xA7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BACLEAR_FORCE_IQ",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instruction queue forced BACLEAR"
+ },
+ {
+ "EventCode": "0xE0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Branch instructions decoded"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7f",
+ "EventName": "BR_INST_EXEC.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Branch instructions executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_EXEC.COND",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Conditional branch instructions executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_EXEC.DIRECT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Unconditional branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_INST_EXEC.DIRECT_NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Unconditional call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_INST_EXEC.INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_EXEC.INDIRECT_NON_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect non call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "BR_INST_EXEC.NEAR_CALLS",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "BR_INST_EXEC.NON_CALLS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All non call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_INST_EXEC.RETURN_NEAR",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect return branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_INST_EXEC.TAKEN",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Taken branches executed"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired branch instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired conditional branch instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Retired near call instructions (Precise Event)"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7f",
+ "EventName": "BR_MISP_EXEC.ANY",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_EXEC.COND",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted conditional branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_MISP_EXEC.DIRECT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted unconditional branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_MISP_EXEC.DIRECT_NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_MISP_EXEC.INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted indirect call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_EXEC.INDIRECT_NON_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted indirect non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "BR_MISP_EXEC.NEAR_CALLS",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "BR_MISP_EXEC.NON_CALLS",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_MISP_EXEC.RETURN_NEAR",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted return branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_MISP_EXEC.TAKEN",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted taken branches executed"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted retired branch instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted conditional retired branches (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted near retired calls (Precise Event)"
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.REF",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Reference cycles when thread is not halted (fixed counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF_P",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)"
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when thread is not halted (fixed counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when thread is not halted (programmable counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total CPU cycles",
+ "CounterMask": "2"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "ILD_STALL.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Any Instruction Length Decoder stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ILD_STALL.IQ_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instruction Queue full stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ILD_STALL.LCP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Length Change Prefix stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ILD_STALL.MRU",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stall cycles due to BPU MRU bypass"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "ILD_STALL.REGEN",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Regen stall cycles"
+ },
+ {
+ "EventCode": "0x18",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_DECODED.DEC0",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions that must be decoded by decoder 0"
+ },
+ {
+ "EventCode": "0x1E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_QUEUE_WRITE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles instructions are written to the instruction queue"
+ },
+ {
+ "EventCode": "0x17",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_QUEUE_WRITES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions written to instruction queue."
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired (fixed counter)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired (Programmable counter and Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "INST_RETIRED.MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired MMX instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles (Precise Event)",
+ "CounterMask": "16"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "INST_RETIRED.X87",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired floating-point operations (Precise Event)"
+ },
+ {
+ "EventCode": "0x4C",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "LOAD_HIT_PRE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Load operations conflicting with software prefetches"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.ACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when uops were delivered by the LSD",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xA8",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.INACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no uops were delivered by the LSD",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x20",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD_OVERFLOW",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loops that can't stream from the instruction queue"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACHINE_CLEARS.CYCLES",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Cycles machine clear asserted"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEM_ORDER",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Execution pipeline restart due to Memory ordering conflicts"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Self-Modifying Code detected"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Resource related stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "RESOURCE_STALLS.FPCW",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "FPU control word write stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RESOURCE_STALLS.LOAD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Load buffer stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "RESOURCE_STALLS.MXCSR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "MXCSR rename stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "RESOURCE_STALLS.OTHER",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Other Resource related stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RESOURCE_STALLS.ROB_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ROB full stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RESOURCE_STALLS.RS_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Reservation Station full stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RESOURCE_STALLS.STORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Store buffer stall cycles"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SSEX_UOPS_RETIRED.PACKED_DOUBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Packed-Double Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SSEX_UOPS_RETIRED.PACKED_SINGLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Packed-Single Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SSEX_UOPS_RETIRED.SCALAR_DOUBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Scalar-Double Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SSEX_UOPS_RETIRED.SCALAR_SINGLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Scalar-Single Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SSEX_UOPS_RETIRED.VECTOR_INTEGER",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Vector Integer Uops retired (Precise Event)"
+ },
+ {
+ "EventCode": "0xDB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOP_UNFUSION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uop unfusions due to FP exceptions"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_DECODED.ESP_FOLDING",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stack pointer instructions decoded"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "UOPS_DECODED.ESP_SYNC",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stack pointer sync operations"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DECODED.MS_CYCLES_ACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops decoded by Microcode Sequencer",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xD1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DECODED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops are decoded",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops executed on any port (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops executed on ports 0-4 (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on any port (core count)",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on ports 0-4 (core count)",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on any port (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on ports 0-4 (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.PORT0",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 0"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_EXECUTED.PORT015",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued on ports 0, 1 or 5"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_EXECUTED.PORT015_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on ports 0, 1 or 5",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.PORT1",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT2_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 2 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT234_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued on ports 2, 3 or 4"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT3_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 3 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT4_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 4 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_EXECUTED.PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 5"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued"
+ },
+ {
+ "EventCode": "0xE",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops were issued on any thread",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CYCLES_ALL_THREADS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops were issued on either thread",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_ISSUED.FUSED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Fused Uops issued"
+ },
+ {
+ "EventCode": "0xE",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops were issued",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ACTIVE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops are being retired",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_RETIRED.MACRO_FUSED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Macro-fused Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retirement slots used (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops are not retiring (Precise Event)",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles using precise uop retired event (Precise Event)",
+ "CounterMask": "16"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC0",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles (Precise Event)",
+ "CounterMask": "16"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-dp/virtual-memory.json b/tools/perf/pmu-events/arch/x86/westmereep-dp/virtual-memory.json
new file mode 100644
index 000000000..57b53562e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-dp/virtual-memory.json
@@ -0,0 +1,173 @@
+[
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_LOAD_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load misses"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "DTLB_LOAD_MISSES.LARGE_WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss large page walks"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "DTLB_LOAD_MISSES.PDE_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss caused by low part of address"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "DTLB second level hit"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss page walks complete"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_LOAD_MISSES.WALK_CYCLES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss page walk cycles"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB misses"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "DTLB_MISSES.LARGE_WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB miss large page walks"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "DTLB_MISSES.PDE_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB misses casued by low part of address"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB first level misses but second level hit"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB miss page walks"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_MISSES.WALK_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "DTLB miss page walk cycles"
+ },
+ {
+ "EventCode": "0x4F",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "EPT.WALK_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Extended Page Table walk cycles"
+ },
+ {
+ "EventCode": "0xAE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_FLUSH",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ITLB flushes"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ITLB_MISS_RETIRED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired instructions that missed the ITLB (Precise Event)"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB miss"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "ITLB_MISSES.LARGE_WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB miss large page walks"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB miss page walks"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ITLB_MISSES.WALK_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ITLB miss page walk cycles"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "MEM_LOAD_RETIRED.DTLB_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that miss the DTLB (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_STORE_RETIRED.DTLB_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired stores that miss the DTLB (Precise Event)"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-sp/cache.json b/tools/perf/pmu-events/arch/x86/westmereep-sp/cache.json
new file mode 100644
index 000000000..dad20f0e3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-sp/cache.json
@@ -0,0 +1,3233 @@
+[
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "CACHE_LOCK_CYCLES.L1D",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles L1D locked"
+ },
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "CACHE_LOCK_CYCLES.L1D_L2",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles L1D and L2 locked"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D.M_EVICT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D cache lines replaced in M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D.M_REPL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D cache lines allocated in the M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "L1D.M_SNOOP_EVICT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D snoop eviction of cache lines in M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D.REPL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache lines allocated"
+ },
+ {
+ "EventCode": "0x52",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_CACHE_PREFETCH_LOCK_FB_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D prefetch load lock accepted in fill buffer"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D_PREFETCH.MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch misses"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_PREFETCH.REQUESTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch requests"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D_PREFETCH.TRIGGERS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch requests triggered"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L1D_WB_L2.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in E state"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D_WB_L2.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in I state (misses)"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L1D_WB_L2.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in M state"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L1D_WB_L2.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L1 writebacks to L2"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D_WB_L2.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in S state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "L2_DATA_RQSTS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 data requests"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_DATA_RQSTS.DEMAND.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in E state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_DATA_RQSTS.DEMAND.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in I state (misses)"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_DATA_RQSTS.DEMAND.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in M state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_DATA_RQSTS.DEMAND.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand requests"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_DATA_RQSTS.DEMAND.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in S state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in E state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in the I state (misses)"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in M state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf0",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 data prefetches"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in the S state"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "L2_LINES_IN.ANY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines alloacated"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_IN.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines allocated in the E state"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_IN.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines allocated in the S state"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_LINES_OUT.ANY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted by a demand request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 modified lines evicted by a demand request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_OUT.PREFETCH_CLEAN",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted by a prefetch request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_LINES_OUT.PREFETCH_DIRTY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 modified lines evicted by a prefetch request"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_RQSTS.IFETCH_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_RQSTS.IFETCH_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "L2_RQSTS.IFETCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetches"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_RQSTS.LD_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 load hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_RQSTS.LD_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 load misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L2_RQSTS.LOADS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 requests"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xaa",
+ "EventName": "L2_RQSTS.MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_RQSTS.PREFETCH_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_RQSTS.PREFETCH_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc0",
+ "EventName": "L2_RQSTS.PREFETCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 prefetches"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 requests"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "L2_RQSTS.RFOS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO requests"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_TRANSACTIONS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_TRANSACTIONS.FILL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 fill transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_TRANSACTIONS.IFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_TRANSACTIONS.L1D_WB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D writeback to L2 transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_TRANSACTIONS.LOAD",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 Load transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_TRANSACTIONS.PREFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_TRANSACTIONS.RFO",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_TRANSACTIONS.WB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 writeback to LLC transactions"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_WRITE.LOCK.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in E state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe0",
+ "EventName": "L2_WRITE.LOCK.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All demand L2 lock RFOs that hit the cache"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_WRITE.LOCK.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in I state (misses)"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_WRITE.LOCK.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in M state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf0",
+ "EventName": "L2_WRITE.LOCK.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All demand L2 lock RFOs"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_WRITE.LOCK.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in S state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "EventName": "L2_WRITE.RFO.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L2 demand store RFOs that hit the cache"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_WRITE.RFO.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in I state (misses)"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_WRITE.RFO.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in M state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_WRITE.RFO.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L2 demand store RFOs"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_WRITE.RFO.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in S state"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Longest latency cache miss"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Longest latency cache reference"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_INST_RETIRED.LOADS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired which contains a load (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_INST_RETIRED.STORES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired which contains a store (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "MEM_LOAD_RETIRED.HIT_LFB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_RETIRED.L1D_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired loads that hit the L1 data cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that hit the L2 cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "MEM_LOAD_RETIRED.LLC_MISS",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Retired loads that miss the LLC cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_RETIRED.LLC_UNSHARED_HIT",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Retired loads that hit valid versions in the LLC cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "MEM_UNCORE_RETIRED.LOCAL_DRAM",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Load instructions retired with a data source of local DRAM or locally homed remote hitm (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Load instructions retired that HIT modified data in sibling core (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Load instructions retired remote cache HIT data source (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MEM_UNCORE_RETIRED.REMOTE_DRAM",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "MEM_UNCORE_RETIRED.UNCACHEABLE",
+ "SampleAfterValue": "4000",
+ "BriefDescription": "Load instructions retired IO (Precise Event)"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "OFFCORE_REQUESTS.ANY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS.ANY.READ",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore read requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "OFFCORE_REQUESTS.ANY.RFO",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS.DEMAND.READ_CODE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code read requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS.DEMAND.READ_DATA",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data read requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS.DEMAND.RFO",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "OFFCORE_REQUESTS.L1D_WRITEBACK",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore L1 data cache writebacks"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "OFFCORE_REQUESTS.UNCACHED_MEM",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore uncached memory accesses"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ANY.READ",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding offcore reads"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ANY.READ_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles offcore reads busy",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding offcore demand code reads"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles offcore demand code read busy",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding offcore demand data reads"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles offcore demand data read busy",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding offcore demand RFOs"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles offcore demand RFOs busy",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_SQ_FULL",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests blocked due to Super Queue full"
+ },
+ {
+ "EventCode": "0xF4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SQ_MISC.LRU_HINTS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Super Queue LRU hints sent to LLC"
+ },
+ {
+ "EventCode": "0xF4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Super Queue lock splits across a cache line"
+ },
+ {
+ "EventCode": "0x6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "STORE_BLOCKS.AT_RET",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Loads delayed with at-Retirement block code"
+ },
+ {
+ "EventCode": "0x6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "STORE_BLOCKS.L1D_BLOCK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Cacheable loads delayed with L1D block code"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x0",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_0",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Memory instructions retired above 0 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x400",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_1024",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100",
+ "BriefDescription": "Memory instructions retired above 1024 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_128",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "1000",
+ "BriefDescription": "Memory instructions retired above 128 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Memory instructions retired above 16 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x4000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16384",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "5",
+ "BriefDescription": "Memory instructions retired above 16384 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x800",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_2048",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50",
+ "BriefDescription": "Memory instructions retired above 2048 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_256",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "500",
+ "BriefDescription": "Memory instructions retired above 256 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "5000",
+ "BriefDescription": "Memory instructions retired above 32 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x8000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32768",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "3",
+ "BriefDescription": "Memory instructions retired above 32768 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50000",
+ "BriefDescription": "Memory instructions retired above 4 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x1000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4096",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20",
+ "BriefDescription": "Memory instructions retired above 4096 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_512",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "200",
+ "BriefDescription": "Memory instructions retired above 512 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_64",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Memory instructions retired above 64 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Memory instructions retired above 8 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x2000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8192",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "10",
+ "BriefDescription": "Memory instructions retired above 8192 clocks (Precise Event)"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F11",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF11",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore data reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8011",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x111",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x211",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x411",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x711",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2711",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1811",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5811",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1011",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x811",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F44",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF44",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore code reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8044",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x144",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x244",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x444",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x744",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2744",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1844",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5844",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1044",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x844",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7FFF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFFFF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x80FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x27FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x18FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x58FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x10FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F22",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF22",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore RFO requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x122",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x222",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x422",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x722",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2722",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1822",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5822",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x822",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF08",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore writebacks",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x108",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x408",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x708",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2708",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1808",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5808",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x808",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F77",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF77",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore code or data read requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x177",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x277",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x477",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x777",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2777",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1877",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5877",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x877",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F33",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any cache_dram",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF33",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any location",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8033",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x133",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x233",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x433",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x733",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = local cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2733",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = local cache or dram",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1833",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5833",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = remote cache or dram",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1033",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches that HIT in a remote cache ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x833",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF03",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand data requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8003",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x103",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x203",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x403",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x703",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2703",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1803",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5803",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1003",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x803",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F01",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF01",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand data reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x101",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x201",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x401",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x701",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2701",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1801",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5801",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x801",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F04",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF04",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand code reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x104",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x204",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x404",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x704",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2704",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1804",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5804",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x804",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F02",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF02",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand RFO requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x102",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x202",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x402",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x702",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2702",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1802",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5802",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x802",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore other requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x180",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x280",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x480",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x780",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2780",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1880",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5880",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x880",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F50",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF50",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch data requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8050",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x150",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x250",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x450",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x750",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2750",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1850",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5850",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1050",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x850",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch data reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x110",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x210",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x410",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x710",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2710",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1810",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5810",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x810",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F40",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF40",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch code reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x140",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x240",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x440",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x740",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2740",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1840",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5840",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x840",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F20",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF20",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch RFO requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x120",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x220",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x420",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x720",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2720",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1820",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5820",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x820",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x7F70",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xFF70",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x8070",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x170",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x270",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x470",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x770",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2770",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1870",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x5870",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x1070",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x870",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests that HITM in a remote cache",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-sp/floating-point.json b/tools/perf/pmu-events/arch/x86/westmereep-sp/floating-point.json
new file mode 100644
index 000000000..7d2f71a9d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-sp/floating-point.json
@@ -0,0 +1,229 @@
+[
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_ASSIST.ALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating point assists (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_ASSIST.INPUT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating poiint assists for invalid input value (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_ASSIST.OUTPUT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating point assists for invalid output value (Precise Event)"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_COMP_OPS_EXE.MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "MMX Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE* FP double precision Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE and SSE2 FP Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP_PACKED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE FP packed Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP_SCALAR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE FP scalar Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE* FP single precision Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "FP_COMP_OPS_EXE.SSE2_INTEGER",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE2 integer Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_COMP_OPS_EXE.X87",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Computational floating-point operations executed"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "FP_MMX_TRANS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All Floating Point to and from MMX transitions"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_MMX_TRANS.TO_FP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Transitions from MMX to Floating Point instructions"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_MMX_TRANS.TO_MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Transitions from Floating Point to MMX instructions"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SIMD_INT_128.PACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer pack operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "SIMD_INT_128.PACKED_ARITH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer arithmetic operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SIMD_INT_128.PACKED_LOGICAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer logical operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_INT_128.PACKED_MPY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer multiply operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_INT_128.PACKED_SHIFT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer shift operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "SIMD_INT_128.SHUFFLE_MOVE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer shuffle/move operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SIMD_INT_128.UNPACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer unpack operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SIMD_INT_64.PACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit pack operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "SIMD_INT_64.PACKED_ARITH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit arithmetic operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SIMD_INT_64.PACKED_LOGICAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit logical operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_INT_64.PACKED_MPY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit packed multiply operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_INT_64.PACKED_SHIFT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit shift operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "SIMD_INT_64.SHUFFLE_MOVE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit shuffle/move operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SIMD_INT_64.UNPACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit unpack operations"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-sp/frontend.json b/tools/perf/pmu-events/arch/x86/westmereep-sp/frontend.json
new file mode 100644
index 000000000..e5e21e034
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-sp/frontend.json
@@ -0,0 +1,26 @@
+[
+ {
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACRO_INSTS.DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions decoded"
+ },
+ {
+ "EventCode": "0xA6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACRO_INSTS.FUSIONS_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Macro-fused instructions decoded"
+ },
+ {
+ "EventCode": "0x19",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TWO_UOP_INSTS_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Two Uop instructions decoded"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-sp/memory.json b/tools/perf/pmu-events/arch/x86/westmereep-sp/memory.json
new file mode 100644
index 000000000..90eb6aac3
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-sp/memory.json
@@ -0,0 +1,739 @@
+[
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6011",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF811",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2011",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4011",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6044",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF844",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2044",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4044",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x60FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF8FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x20FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x40FF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF822",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4022",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF808",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4008",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF877",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4077",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6033",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF833",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any LLC miss",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2033",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the local DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4033",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6003",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF803",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2003",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4003",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF801",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4001",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF804",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4004",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF802",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4002",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF880",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4080",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6050",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF850",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2050",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4050",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF810",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4010",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF840",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4040",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF820",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4020",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x6070",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0xF870",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x2070",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7, 0xBB",
+ "MSRValue": "0x4070",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-sp/other.json b/tools/perf/pmu-events/arch/x86/westmereep-sp/other.json
new file mode 100644
index 000000000..85133d6a5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-sp/other.json
@@ -0,0 +1,287 @@
+[
+ {
+ "EventCode": "0xE8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BPU_CLEARS.EARLY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Early Branch Prediciton Unit clears"
+ },
+ {
+ "EventCode": "0xE8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BPU_CLEARS.LATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Late Branch Prediction Unit clears"
+ },
+ {
+ "EventCode": "0xE5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BPU_MISSED_CALL_RET",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Branch prediction unit missed call or return"
+ },
+ {
+ "EventCode": "0xD5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ES_REG_RENAMES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ES segment renames"
+ },
+ {
+ "EventCode": "0x6C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IO_TRANSACTIONS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "I/O transactions"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L1I.CYCLES_STALLED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch stall cycles"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1I.HITS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch hits"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1I.MISSES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch misses"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L1I.READS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I Instruction fetches"
+ },
+ {
+ "EventCode": "0x82",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LARGE_ITLB.HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Large ITLB hit"
+ },
+ {
+ "EventCode": "0x3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_BLOCK.OVERLAP_STORE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Loads that partially overlap an earlier store"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "LOAD_DISPATCH.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All loads dispatched"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "LOAD_DISPATCH.MOB",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched from the MOB"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOAD_DISPATCH.RS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched that bypass the MOB"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_DISPATCH.RS_DELAYED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched from stage 305"
+ },
+ {
+ "EventCode": "0x7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "PARTIAL_ADDRESS_ALIAS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "False dependencies due to partial address aliasing"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "RAT_STALLS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All RAT stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RAT_STALLS.FLAGS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Flag stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RAT_STALLS.REGISTERS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Partial register stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RAT_STALLS.ROB_READ_PORT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ROB read port stalls cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RAT_STALLS.SCOREBOARD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Scoreboard stall cycles"
+ },
+ {
+ "EventCode": "0x4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "SB_DRAIN.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All Store buffer stall cycles"
+ },
+ {
+ "EventCode": "0xD4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SEG_RENAME_STALLS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Segment rename stall cycles"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SNOOP_RESPONSE.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HIT to snoop"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SNOOP_RESPONSE.HITE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HITE to snoop"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SNOOP_RESPONSE.HITM",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HITM to snoop"
+ },
+ {
+ "EventCode": "0xB4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SNOOPQ_REQUESTS.CODE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Snoop code requests"
+ },
+ {
+ "EventCode": "0xB4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SNOOPQ_REQUESTS.DATA",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Snoop data requests"
+ },
+ {
+ "EventCode": "0xB4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SNOOPQ_REQUESTS.INVALIDATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Snoop invalidate requests"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x4",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.CODE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding snoop code requests"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x4",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.CODE_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles snoop code requests queued",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x1",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.DATA",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding snoop data requests"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x1",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.DATA_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles snoop data requests queued",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x2",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding snoop invalidate requests"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x2",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles snoop invalidate requests queued",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xF6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SQ_FULL_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Super Queue full stall cycles"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-sp/pipeline.json b/tools/perf/pmu-events/arch/x86/westmereep-sp/pipeline.json
new file mode 100644
index 000000000..f130510f7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-sp/pipeline.json
@@ -0,0 +1,899 @@
+[
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.CYCLES_DIV_BUSY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles the divider is busy"
+ },
+ {
+ "EventCode": "0x14",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.DIV",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Divide Operations executed",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ARITH.MUL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Multiply operations executed"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BACLEAR.BAD_TARGET",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "BACLEAR asserted with bad target address"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BACLEAR.CLEAR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "BACLEAR asserted, regardless of cause "
+ },
+ {
+ "EventCode": "0xA7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BACLEAR_FORCE_IQ",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instruction queue forced BACLEAR"
+ },
+ {
+ "EventCode": "0xE0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Branch instructions decoded"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7f",
+ "EventName": "BR_INST_EXEC.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Branch instructions executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_EXEC.COND",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Conditional branch instructions executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_EXEC.DIRECT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Unconditional branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_INST_EXEC.DIRECT_NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Unconditional call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_INST_EXEC.INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_EXEC.INDIRECT_NON_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect non call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "BR_INST_EXEC.NEAR_CALLS",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "BR_INST_EXEC.NON_CALLS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All non call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_INST_EXEC.RETURN_NEAR",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect return branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_INST_EXEC.TAKEN",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Taken branches executed"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired branch instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired conditional branch instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Retired near call instructions (Precise Event)"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7f",
+ "EventName": "BR_MISP_EXEC.ANY",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_EXEC.COND",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted conditional branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_MISP_EXEC.DIRECT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted unconditional branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_MISP_EXEC.DIRECT_NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_MISP_EXEC.INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted indirect call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_EXEC.INDIRECT_NON_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted indirect non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "BR_MISP_EXEC.NEAR_CALLS",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "BR_MISP_EXEC.NON_CALLS",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_MISP_EXEC.RETURN_NEAR",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted return branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_MISP_EXEC.TAKEN",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted taken branches executed"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted retired branch instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted conditional retired branches (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted near retired calls (Precise Event)"
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.REF",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Reference cycles when thread is not halted (fixed counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF_P",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)"
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when thread is not halted (fixed counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when thread is not halted (programmable counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total CPU cycles",
+ "CounterMask": "2"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "ILD_STALL.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Any Instruction Length Decoder stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ILD_STALL.IQ_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instruction Queue full stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ILD_STALL.LCP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Length Change Prefix stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ILD_STALL.MRU",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stall cycles due to BPU MRU bypass"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "ILD_STALL.REGEN",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Regen stall cycles"
+ },
+ {
+ "EventCode": "0x18",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_DECODED.DEC0",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions that must be decoded by decoder 0"
+ },
+ {
+ "EventCode": "0x1E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_QUEUE_WRITE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles instructions are written to the instruction queue"
+ },
+ {
+ "EventCode": "0x17",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_QUEUE_WRITES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions written to instruction queue."
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired (fixed counter)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired (Programmable counter and Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "INST_RETIRED.MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired MMX instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles (Precise Event)",
+ "CounterMask": "16"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "INST_RETIRED.X87",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired floating-point operations (Precise Event)"
+ },
+ {
+ "EventCode": "0x4C",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "LOAD_HIT_PRE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Load operations conflicting with software prefetches"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.ACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when uops were delivered by the LSD",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xA8",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.INACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no uops were delivered by the LSD",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x20",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD_OVERFLOW",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loops that can't stream from the instruction queue"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACHINE_CLEARS.CYCLES",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Cycles machine clear asserted"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEM_ORDER",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Execution pipeline restart due to Memory ordering conflicts"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Self-Modifying Code detected"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Resource related stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "RESOURCE_STALLS.FPCW",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "FPU control word write stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RESOURCE_STALLS.LOAD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Load buffer stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "RESOURCE_STALLS.MXCSR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "MXCSR rename stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "RESOURCE_STALLS.OTHER",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Other Resource related stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RESOURCE_STALLS.ROB_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ROB full stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RESOURCE_STALLS.RS_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Reservation Station full stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RESOURCE_STALLS.STORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Store buffer stall cycles"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SSEX_UOPS_RETIRED.PACKED_DOUBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Packed-Double Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SSEX_UOPS_RETIRED.PACKED_SINGLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Packed-Single Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SSEX_UOPS_RETIRED.SCALAR_DOUBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Scalar-Double Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SSEX_UOPS_RETIRED.SCALAR_SINGLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Scalar-Single Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SSEX_UOPS_RETIRED.VECTOR_INTEGER",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Vector Integer Uops retired (Precise Event)"
+ },
+ {
+ "EventCode": "0xDB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOP_UNFUSION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uop unfusions due to FP exceptions"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_DECODED.ESP_FOLDING",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stack pointer instructions decoded"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "UOPS_DECODED.ESP_SYNC",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stack pointer sync operations"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DECODED.MS_CYCLES_ACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops decoded by Microcode Sequencer",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xD1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DECODED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops are decoded",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops executed on any port (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops executed on ports 0-4 (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on any port (core count)",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on ports 0-4 (core count)",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on any port (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on ports 0-4 (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.PORT0",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 0"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_EXECUTED.PORT015",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued on ports 0, 1 or 5"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_EXECUTED.PORT015_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on ports 0, 1 or 5",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.PORT1",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT2_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 2 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT234_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued on ports 2, 3 or 4"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT3_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 3 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT4_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 4 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_EXECUTED.PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 5"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued"
+ },
+ {
+ "EventCode": "0xE",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops were issued on any thread",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CYCLES_ALL_THREADS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops were issued on either thread",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_ISSUED.FUSED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Fused Uops issued"
+ },
+ {
+ "EventCode": "0xE",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops were issued",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ACTIVE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops are being retired",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_RETIRED.MACRO_FUSED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Macro-fused Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retirement slots used (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops are not retiring (Precise Event)",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles using precise uop retired event (Precise Event)",
+ "CounterMask": "16"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC0",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles (Precise Event)",
+ "CounterMask": "16"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereep-sp/virtual-memory.json b/tools/perf/pmu-events/arch/x86/westmereep-sp/virtual-memory.json
new file mode 100644
index 000000000..2153b3f5d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereep-sp/virtual-memory.json
@@ -0,0 +1,149 @@
+[
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_LOAD_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load misses"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "DTLB_LOAD_MISSES.PDE_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss caused by low part of address"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "DTLB second level hit"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss page walks complete"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_LOAD_MISSES.WALK_CYCLES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss page walk cycles"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB misses"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "DTLB_MISSES.LARGE_WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB miss large page walks"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB first level misses but second level hit"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB miss page walks"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_MISSES.WALK_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "DTLB miss page walk cycles"
+ },
+ {
+ "EventCode": "0x4F",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "EPT.WALK_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Extended Page Table walk cycles"
+ },
+ {
+ "EventCode": "0xAE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_FLUSH",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ITLB flushes"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ITLB_MISS_RETIRED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired instructions that missed the ITLB (Precise Event)"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB miss"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB miss page walks"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ITLB_MISSES.WALK_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ITLB miss page walk cycles"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "MEM_LOAD_RETIRED.DTLB_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that miss the DTLB (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_STORE_RETIRED.DTLB_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired stores that miss the DTLB (Precise Event)"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereex/cache.json b/tools/perf/pmu-events/arch/x86/westmereex/cache.json
new file mode 100644
index 000000000..f9bc7fdd4
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereex/cache.json
@@ -0,0 +1,3225 @@
+[
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "CACHE_LOCK_CYCLES.L1D",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles L1D locked"
+ },
+ {
+ "EventCode": "0x63",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "CACHE_LOCK_CYCLES.L1D_L2",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles L1D and L2 locked"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D.M_EVICT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D cache lines replaced in M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D.M_REPL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D cache lines allocated in the M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x8",
+ "EventName": "L1D.M_SNOOP_EVICT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D snoop eviction of cache lines in M state"
+ },
+ {
+ "EventCode": "0x51",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D.REPL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1 data cache lines allocated"
+ },
+ {
+ "EventCode": "0x52",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_CACHE_PREFETCH_LOCK_FB_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1D prefetch load lock accepted in fill buffer"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x2",
+ "EventName": "L1D_PREFETCH.MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch misses"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "L1D_PREFETCH.REQUESTS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch requests"
+ },
+ {
+ "EventCode": "0x4E",
+ "Counter": "0,1",
+ "UMask": "0x4",
+ "EventName": "L1D_PREFETCH.TRIGGERS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D hardware prefetch requests triggered"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L1D_WB_L2.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in E state"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1D_WB_L2.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in I state (misses)"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L1D_WB_L2.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in M state"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L1D_WB_L2.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L1 writebacks to L2"
+ },
+ {
+ "EventCode": "0x28",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1D_WB_L2.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L1 writebacks to L2 in S state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "L2_DATA_RQSTS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 data requests"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_DATA_RQSTS.DEMAND.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in E state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_DATA_RQSTS.DEMAND.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in I state (misses)"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_DATA_RQSTS.DEMAND.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in M state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_DATA_RQSTS.DEMAND.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand requests"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_DATA_RQSTS.DEMAND.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data demand loads in S state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.E_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in E state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.I_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in the I state (misses)"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.M_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in M state"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf0",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.MESI",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 data prefetches"
+ },
+ {
+ "EventCode": "0x26",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_DATA_RQSTS.PREFETCH.S_STATE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 data prefetches in the S state"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "L2_LINES_IN.ANY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines alloacated"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_IN.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines allocated in the E state"
+ },
+ {
+ "EventCode": "0xF1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_IN.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines allocated in the S state"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_LINES_OUT.ANY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_LINES_OUT.DEMAND_CLEAN",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted by a demand request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_LINES_OUT.DEMAND_DIRTY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 modified lines evicted by a demand request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_LINES_OUT.PREFETCH_CLEAN",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 lines evicted by a prefetch request"
+ },
+ {
+ "EventCode": "0xF2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_LINES_OUT.PREFETCH_DIRTY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 modified lines evicted by a prefetch request"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_RQSTS.IFETCH_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_RQSTS.IFETCH_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "L2_RQSTS.IFETCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetches"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_RQSTS.LD_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 load hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_RQSTS.LD_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 load misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L2_RQSTS.LOADS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 requests"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xaa",
+ "EventName": "L2_RQSTS.MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_RQSTS.PREFETCH_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_RQSTS.PREFETCH_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc0",
+ "EventName": "L2_RQSTS.PREFETCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 prefetches"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xff",
+ "EventName": "L2_RQSTS.REFERENCES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 requests"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_RQSTS.RFO_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO hits"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_RQSTS.RFO_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO misses"
+ },
+ {
+ "EventCode": "0x24",
+ "Counter": "0,1,2,3",
+ "UMask": "0xc",
+ "EventName": "L2_RQSTS.RFOS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO requests"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_TRANSACTIONS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All L2 transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_TRANSACTIONS.FILL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 fill transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L2_TRANSACTIONS.IFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 instruction fetch transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_TRANSACTIONS.L1D_WB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L1D writeback to L2 transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_TRANSACTIONS.LOAD",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 Load transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_TRANSACTIONS.PREFETCH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 prefetch transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_TRANSACTIONS.RFO",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 RFO transactions"
+ },
+ {
+ "EventCode": "0xF0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_TRANSACTIONS.WB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "L2 writeback to LLC transactions"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "L2_WRITE.LOCK.E_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in E state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe0",
+ "EventName": "L2_WRITE.LOCK.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All demand L2 lock RFOs that hit the cache"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "L2_WRITE.LOCK.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in I state (misses)"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "L2_WRITE.LOCK.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in M state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf0",
+ "EventName": "L2_WRITE.LOCK.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All demand L2 lock RFOs"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "L2_WRITE.LOCK.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand lock RFOs in S state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xe",
+ "EventName": "L2_WRITE.RFO.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L2 demand store RFOs that hit the cache"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L2_WRITE.RFO.I_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in I state (misses)"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "L2_WRITE.RFO.M_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in M state"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "L2_WRITE.RFO.MESI",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All L2 demand store RFOs"
+ },
+ {
+ "EventCode": "0x27",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L2_WRITE.RFO.S_STATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "L2 demand store RFOs in S state"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x41",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Longest latency cache miss"
+ },
+ {
+ "EventCode": "0x2E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4f",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Longest latency cache reference"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_INST_RETIRED.LOADS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired which contains a load (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_INST_RETIRED.STORES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired which contains a store (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "MEM_LOAD_RETIRED.HIT_LFB",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that miss L1D and hit an previously allocated LFB (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_LOAD_RETIRED.L1D_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired loads that hit the L1 data cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that hit the L2 cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "MEM_LOAD_RETIRED.LLC_MISS",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Retired loads that miss the LLC cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_LOAD_RETIRED.LLC_UNSHARED_HIT",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Retired loads that hit valid versions in the LLC cache (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Retired loads that hit sibling core's L2 in modified or unmodified states (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MEM_UNCORE_RETIRED.LOCAL_HITM",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Load instructions retired that HIT modified data in sibling core (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "MEM_UNCORE_RETIRED.LOCAL_DRAM_AND_REMOTE_CACHE_HIT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Load instructions retired local dram and remote cache HIT data sources (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "MEM_UNCORE_RETIRED.REMOTE_DRAM",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Load instructions retired remote DRAM and remote home-remote cache HITM (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "MEM_UNCORE_RETIRED.UNCACHEABLE",
+ "SampleAfterValue": "4000",
+ "BriefDescription": "Load instructions retired IO (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MEM_UNCORE_RETIRED.REMOTE_HITM",
+ "SampleAfterValue": "40000",
+ "BriefDescription": "Retired loads that hit remote socket in modified state (Precise Event)"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "OFFCORE_REQUESTS.ANY",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS.ANY.READ",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore read requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "OFFCORE_REQUESTS.ANY.RFO",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS.DEMAND.READ_CODE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code read requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS.DEMAND.READ_DATA",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data read requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS.DEMAND.RFO",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests"
+ },
+ {
+ "EventCode": "0xB0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "OFFCORE_REQUESTS.L1D_WRITEBACK",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore L1 data cache writebacks"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ANY.READ",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding offcore reads"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x8",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ANY.READ_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles offcore reads busy",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding offcore demand code reads"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x2",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles offcore demand code read busy",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding offcore demand data reads"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles offcore demand data read busy",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding offcore demand RFOs"
+ },
+ {
+ "EventCode": "0x60",
+ "UMask": "0x4",
+ "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles offcore demand RFOs busy",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_REQUESTS_SQ_FULL",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests blocked due to Super Queue full"
+ },
+ {
+ "EventCode": "0xF4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SQ_MISC.LRU_HINTS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Super Queue LRU hints sent to LLC"
+ },
+ {
+ "EventCode": "0xF4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SQ_MISC.SPLIT_LOCK",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Super Queue lock splits across a cache line"
+ },
+ {
+ "EventCode": "0x6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "STORE_BLOCKS.AT_RET",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Loads delayed with at-Retirement block code"
+ },
+ {
+ "EventCode": "0x6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "STORE_BLOCKS.L1D_BLOCK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Cacheable loads delayed with L1D block code"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x0",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_0",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Memory instructions retired above 0 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x400",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_1024",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "100",
+ "BriefDescription": "Memory instructions retired above 1024 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x80",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_128",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "1000",
+ "BriefDescription": "Memory instructions retired above 128 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x10",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "10000",
+ "BriefDescription": "Memory instructions retired above 16 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x4000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_16384",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "5",
+ "BriefDescription": "Memory instructions retired above 16384 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x800",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_2048",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50",
+ "BriefDescription": "Memory instructions retired above 2048 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x100",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_256",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "500",
+ "BriefDescription": "Memory instructions retired above 256 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x20",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "5000",
+ "BriefDescription": "Memory instructions retired above 32 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x8000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_32768",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "3",
+ "BriefDescription": "Memory instructions retired above 32768 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x4",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "50000",
+ "BriefDescription": "Memory instructions retired above 4 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x1000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_4096",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20",
+ "BriefDescription": "Memory instructions retired above 4096 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x200",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_512",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "200",
+ "BriefDescription": "Memory instructions retired above 512 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x40",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_64",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Memory instructions retired above 64 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x8",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Memory instructions retired above 8 clocks (Precise Event)"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xB",
+ "MSRValue": "0x2000",
+ "Counter": "3",
+ "UMask": "0x10",
+ "EventName": "MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD_8192",
+ "MSRIndex": "0x3F6",
+ "SampleAfterValue": "10",
+ "BriefDescription": "Memory instructions retired above 8192 clocks (Precise Event)"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F11",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF11",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore data reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x111",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x211",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x411",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x711",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4711",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1811",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3811",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x811",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F44",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF44",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore code reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x144",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x244",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x444",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x744",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4744",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1844",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3844",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x844",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7FFF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFFFF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x80FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x47FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x18FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x38FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x10FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F22",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF22",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore RFO requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x122",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x222",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x422",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x722",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4722",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1822",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3822",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x822",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F08",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF08",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore writebacks",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x108",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x408",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x708",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4708",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1808",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3808",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x808",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F77",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF77",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore code or data read requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x177",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x277",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x477",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x777",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4777",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1877",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3877",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x877",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F33",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any cache_dram",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF33",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any location",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x133",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x233",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x433",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x733",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = local cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4733",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = local cache or dram",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1833",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3833",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = remote cache or dram",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches that HIT in a remote cache ",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x833",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F03",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF03",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand data requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x103",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x203",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x403",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x703",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4703",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1803",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3803",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x803",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F01",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF01",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand data reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x101",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x201",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x401",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x701",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4701",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1801",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3801",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x801",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F04",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF04",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand code reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x104",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x204",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x404",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x704",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4704",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1804",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3804",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x804",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F02",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF02",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore demand RFO requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x102",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x202",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x402",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x702",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4702",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1802",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3802",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x802",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F80",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF80",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore other requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8080",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x180",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x280",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x480",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x780",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4780",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1880",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3880",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1080",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x880",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F30",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by any cache or DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF30",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch data requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the IO, CSR, MMIO unit.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x130",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x230",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x430",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x730",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4730",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1830",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3830",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x830",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F10",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF10",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch data reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x110",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x210",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x410",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x710",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4710",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1810",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3810",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x810",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F40",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF40",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch code reads",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x140",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x240",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x440",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x740",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4740",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1840",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3840",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x840",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F20",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF20",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch RFO requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x120",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x220",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x420",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x720",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4720",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1820",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3820",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x820",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests that HITM in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x7F70",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by any cache or DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xFF70",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LOCATION",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "All offcore prefetch requests",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x8070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.IO_CSR_MMIO",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the IO, CSR, MMIO unit",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x170",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_NO_OTHER_CORE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC and not found in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x270",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC and HIT in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x470",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LLC_HIT_OTHER_CORE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC and HITM in a sibling core",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x770",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4770",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the LLC or local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1870",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x3870",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by a remote cache or remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x1070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HIT",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests that HIT in a remote cache",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x870",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_CACHE_HITM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests that HITM in a remote cache",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereex/floating-point.json b/tools/perf/pmu-events/arch/x86/westmereex/floating-point.json
new file mode 100644
index 000000000..7d2f71a9d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereex/floating-point.json
@@ -0,0 +1,229 @@
+[
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_ASSIST.ALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating point assists (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_ASSIST.INPUT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating poiint assists for invalid input value (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xF7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_ASSIST.OUTPUT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "X87 Floating point assists for invalid output value (Precise Event)"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_COMP_OPS_EXE.MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "MMX Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE* FP double precision Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE and SSE2 FP Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP_PACKED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE FP packed Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "FP_COMP_OPS_EXE.SSE_FP_SCALAR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE FP scalar Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE* FP single precision Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "FP_COMP_OPS_EXE.SSE2_INTEGER",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "SSE2 integer Uops"
+ },
+ {
+ "EventCode": "0x10",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_COMP_OPS_EXE.X87",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Computational floating-point operations executed"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "FP_MMX_TRANS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All Floating Point to and from MMX transitions"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "FP_MMX_TRANS.TO_FP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Transitions from MMX to Floating Point instructions"
+ },
+ {
+ "EventCode": "0xCC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "FP_MMX_TRANS.TO_MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Transitions from Floating Point to MMX instructions"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SIMD_INT_128.PACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer pack operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "SIMD_INT_128.PACKED_ARITH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer arithmetic operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SIMD_INT_128.PACKED_LOGICAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer logical operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_INT_128.PACKED_MPY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer multiply operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_INT_128.PACKED_SHIFT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer shift operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "SIMD_INT_128.SHUFFLE_MOVE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer shuffle/move operations"
+ },
+ {
+ "EventCode": "0x12",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SIMD_INT_128.UNPACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "128 bit SIMD integer unpack operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SIMD_INT_64.PACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit pack operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "SIMD_INT_64.PACKED_ARITH",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit arithmetic operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SIMD_INT_64.PACKED_LOGICAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit logical operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SIMD_INT_64.PACKED_MPY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit packed multiply operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SIMD_INT_64.PACKED_SHIFT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit shift operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "SIMD_INT_64.SHUFFLE_MOVE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit shuffle/move operations"
+ },
+ {
+ "EventCode": "0xFD",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SIMD_INT_64.UNPACK",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD integer 64 bit unpack operations"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereex/frontend.json b/tools/perf/pmu-events/arch/x86/westmereex/frontend.json
new file mode 100644
index 000000000..e5e21e034
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereex/frontend.json
@@ -0,0 +1,26 @@
+[
+ {
+ "EventCode": "0xD0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACRO_INSTS.DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions decoded"
+ },
+ {
+ "EventCode": "0xA6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACRO_INSTS.FUSIONS_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Macro-fused instructions decoded"
+ },
+ {
+ "EventCode": "0x19",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "TWO_UOP_INSTS_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Two Uop instructions decoded"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereex/memory.json b/tools/perf/pmu-events/arch/x86/westmereex/memory.json
new file mode 100644
index 000000000..3ba555e73
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereex/memory.json
@@ -0,0 +1,747 @@
+[
+ {
+ "EventCode": "0x5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MISALIGN_MEM_REF.STORE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Misaligned store references"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF811",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2011",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF844",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2044",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x60FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF8FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x40FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x20FF",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF822",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2022",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.ANY_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore RFO requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF808",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2008",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.COREWB.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore writebacks to a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF877",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2077",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore code or data read requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF833",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore request = all data, response = any LLC miss",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the local DRAM.",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2033",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DATA_IN.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore data reads, RFO's and prefetches statisfied by the remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF803",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2003",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF801",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2001",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand data reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF804",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2004",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand code reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF802",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2002",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore demand RFO requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6080",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF880",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2080",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.OTHER.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore other requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF830",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2030",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF810",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2010",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_DATA_RD.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch data reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF840",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2040",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_IFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch code reads satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF820",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2020",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PF_RFO.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch RFO requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x6070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by any DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0xF870",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests that missed the LLC",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x4070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.LOCAL_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by the local DRAM",
+ "Offcore": "1"
+ },
+ {
+ "EventCode": "0xB7",
+ "MSRValue": "0x2070",
+ "Counter": "2",
+ "UMask": "0x1",
+ "EventName": "OFFCORE_RESPONSE.PREFETCH.REMOTE_DRAM",
+ "MSRIndex": "0x1A6",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Offcore prefetch requests satisfied by a remote DRAM",
+ "Offcore": "1"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereex/other.json b/tools/perf/pmu-events/arch/x86/westmereex/other.json
new file mode 100644
index 000000000..85133d6a5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereex/other.json
@@ -0,0 +1,287 @@
+[
+ {
+ "EventCode": "0xE8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BPU_CLEARS.EARLY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Early Branch Prediciton Unit clears"
+ },
+ {
+ "EventCode": "0xE8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BPU_CLEARS.LATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Late Branch Prediction Unit clears"
+ },
+ {
+ "EventCode": "0xE5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BPU_MISSED_CALL_RET",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Branch prediction unit missed call or return"
+ },
+ {
+ "EventCode": "0xD5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ES_REG_RENAMES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ES segment renames"
+ },
+ {
+ "EventCode": "0x6C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "IO_TRANSACTIONS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "I/O transactions"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "L1I.CYCLES_STALLED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch stall cycles"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "L1I.HITS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch hits"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "L1I.MISSES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I instruction fetch misses"
+ },
+ {
+ "EventCode": "0x80",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3",
+ "EventName": "L1I.READS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "L1I Instruction fetches"
+ },
+ {
+ "EventCode": "0x82",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LARGE_ITLB.HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Large ITLB hit"
+ },
+ {
+ "EventCode": "0x3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_BLOCK.OVERLAP_STORE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Loads that partially overlap an earlier store"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "LOAD_DISPATCH.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All loads dispatched"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "LOAD_DISPATCH.MOB",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched from the MOB"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LOAD_DISPATCH.RS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched that bypass the MOB"
+ },
+ {
+ "EventCode": "0x13",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "LOAD_DISPATCH.RS_DELAYED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loads dispatched from stage 305"
+ },
+ {
+ "EventCode": "0x7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "PARTIAL_ADDRESS_ALIAS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "False dependencies due to partial address aliasing"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "RAT_STALLS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "All RAT stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RAT_STALLS.FLAGS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Flag stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RAT_STALLS.REGISTERS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Partial register stall cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RAT_STALLS.ROB_READ_PORT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ROB read port stalls cycles"
+ },
+ {
+ "EventCode": "0xD2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RAT_STALLS.SCOREBOARD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Scoreboard stall cycles"
+ },
+ {
+ "EventCode": "0x4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "SB_DRAIN.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All Store buffer stall cycles"
+ },
+ {
+ "EventCode": "0xD4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SEG_RENAME_STALLS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Segment rename stall cycles"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SNOOP_RESPONSE.HIT",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HIT to snoop"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SNOOP_RESPONSE.HITE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HITE to snoop"
+ },
+ {
+ "EventCode": "0xB8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SNOOP_RESPONSE.HITM",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Thread responded HITM to snoop"
+ },
+ {
+ "EventCode": "0xB4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SNOOPQ_REQUESTS.CODE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Snoop code requests"
+ },
+ {
+ "EventCode": "0xB4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SNOOPQ_REQUESTS.DATA",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Snoop data requests"
+ },
+ {
+ "EventCode": "0xB4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SNOOPQ_REQUESTS.INVALIDATE",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Snoop invalidate requests"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x4",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.CODE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding snoop code requests"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x4",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.CODE_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles snoop code requests queued",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x1",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.DATA",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding snoop data requests"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x1",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.DATA_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles snoop data requests queued",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x2",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Outstanding snoop invalidate requests"
+ },
+ {
+ "EventCode": "0xB3",
+ "UMask": "0x2",
+ "EventName": "SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE_NOT_EMPTY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles snoop invalidate requests queued",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xF6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SQ_FULL_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Super Queue full stall cycles"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereex/pipeline.json b/tools/perf/pmu-events/arch/x86/westmereex/pipeline.json
new file mode 100644
index 000000000..799c57d94
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereex/pipeline.json
@@ -0,0 +1,905 @@
+[
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.CYCLES_DIV_BUSY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles the divider is busy"
+ },
+ {
+ "EventCode": "0x14",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ARITH.DIV",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Divide Operations executed",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0x14",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ARITH.MUL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Multiply operations executed"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BACLEAR.BAD_TARGET",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "BACLEAR asserted with bad target address"
+ },
+ {
+ "EventCode": "0xE6",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BACLEAR.CLEAR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "BACLEAR asserted, regardless of cause "
+ },
+ {
+ "EventCode": "0xA7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BACLEAR_FORCE_IQ",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instruction queue forced BACLEAR"
+ },
+ {
+ "EventCode": "0xE0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_DECODED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Branch instructions decoded"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7f",
+ "EventName": "BR_INST_EXEC.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Branch instructions executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_EXEC.COND",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Conditional branch instructions executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_EXEC.DIRECT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Unconditional branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_INST_EXEC.DIRECT_NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Unconditional call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_INST_EXEC.INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_EXEC.INDIRECT_NON_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect non call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "BR_INST_EXEC.NEAR_CALLS",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "BR_INST_EXEC.NON_CALLS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "All non call branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_INST_EXEC.RETURN_NEAR",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Indirect return branches executed"
+ },
+ {
+ "EventCode": "0x88",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_INST_EXEC.TAKEN",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Taken branches executed"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired branch instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_INST_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired conditional branch instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC4",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_INST_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Retired near call instructions (Precise Event)"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7f",
+ "EventName": "BR_MISP_EXEC.ANY",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_EXEC.COND",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted conditional branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_MISP_EXEC.DIRECT",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted unconditional branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "BR_MISP_EXEC.DIRECT_NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "BR_MISP_EXEC.INDIRECT_NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted indirect call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_EXEC.INDIRECT_NON_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted indirect non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x30",
+ "EventName": "BR_MISP_EXEC.NEAR_CALLS",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x7",
+ "EventName": "BR_MISP_EXEC.NON_CALLS",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted non call branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "BR_MISP_EXEC.RETURN_NEAR",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted return branches executed"
+ },
+ {
+ "EventCode": "0x89",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "BR_MISP_EXEC.TAKEN",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted taken branches executed"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted retired branch instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "BR_MISP_RETIRED.CONDITIONAL",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Mispredicted conditional retired branches (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC5",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "BR_MISP_RETIRED.NEAR_CALL",
+ "SampleAfterValue": "2000",
+ "BriefDescription": "Mispredicted near retired calls (Precise Event)"
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.REF",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Reference cycles when thread is not halted (fixed counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "CPU_CLK_UNHALTED.REF_P",
+ "SampleAfterValue": "100000",
+ "BriefDescription": "Reference base clock (133 Mhz) cycles when thread is not halted (programmable counter)"
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 2",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when thread is not halted (fixed counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when thread is not halted (programmable counter)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total CPU cycles",
+ "CounterMask": "2"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0xf",
+ "EventName": "ILD_STALL.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Any Instruction Length Decoder stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ILD_STALL.IQ_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instruction Queue full stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ILD_STALL.LCP",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Length Change Prefix stall cycles"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ILD_STALL.MRU",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stall cycles due to BPU MRU bypass"
+ },
+ {
+ "EventCode": "0x87",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "ILD_STALL.REGEN",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Regen stall cycles"
+ },
+ {
+ "EventCode": "0x18",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_DECODED.DEC0",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions that must be decoded by decoder 0"
+ },
+ {
+ "EventCode": "0x1E",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_QUEUE_WRITE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles instructions are written to the instruction queue"
+ },
+ {
+ "EventCode": "0x17",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_QUEUE_WRITES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions written to instruction queue."
+ },
+ {
+ "EventCode": "0x0",
+ "Counter": "Fixed counter 1",
+ "UMask": "0x0",
+ "EventName": "INST_RETIRED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired (fixed counter)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.ANY_P",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Instructions retired (Programmable counter and Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "INST_RETIRED.MMX",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired MMX instructions (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles (Precise Event)",
+ "CounterMask": "16"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC0",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "INST_RETIRED.X87",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retired floating-point operations (Precise Event)"
+ },
+ {
+ "EventCode": "0x4C",
+ "Counter": "0,1",
+ "UMask": "0x1",
+ "EventName": "LOAD_HIT_PRE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Load operations conflicting with software prefetches"
+ },
+ {
+ "EventCode": "0xA8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.ACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles when uops were delivered by the LSD",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xA8",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD.INACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no uops were delivered by the LSD",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0x20",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "LSD_OVERFLOW",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Loops that can't stream from the instruction queue"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MACHINE_CLEARS.CYCLES",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Cycles machine clear asserted"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "MACHINE_CLEARS.MEM_ORDER",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Execution pipeline restart due to Memory ordering conflicts"
+ },
+ {
+ "EventCode": "0xC3",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "MACHINE_CLEARS.SMC",
+ "SampleAfterValue": "20000",
+ "BriefDescription": "Self-Modifying Code detected"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "RESOURCE_STALLS.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Resource related stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "RESOURCE_STALLS.FPCW",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "FPU control word write stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "RESOURCE_STALLS.LOAD",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Load buffer stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "RESOURCE_STALLS.MXCSR",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "MXCSR rename stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "RESOURCE_STALLS.OTHER",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Other Resource related stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "RESOURCE_STALLS.ROB_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ROB full stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "RESOURCE_STALLS.RS_FULL",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Reservation Station full stall cycles"
+ },
+ {
+ "EventCode": "0xA2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "RESOURCE_STALLS.STORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Store buffer stall cycles"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "SSEX_UOPS_RETIRED.PACKED_DOUBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Packed-Double Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "SSEX_UOPS_RETIRED.PACKED_SINGLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Packed-Single Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "SSEX_UOPS_RETIRED.SCALAR_DOUBLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Scalar-Double Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "SSEX_UOPS_RETIRED.SCALAR_SINGLE",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Scalar-Single Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC7",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "SSEX_UOPS_RETIRED.VECTOR_INTEGER",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "SIMD Vector Integer Uops retired (Precise Event)"
+ },
+ {
+ "EventCode": "0x3C",
+ "Counter": "0,1,2,3",
+ "UMask": "0x0",
+ "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles thread is active"
+ },
+ {
+ "EventCode": "0xDB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOP_UNFUSION",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uop unfusions due to FP exceptions"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_DECODED.ESP_FOLDING",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stack pointer instructions decoded"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "EventName": "UOPS_DECODED.ESP_SYNC",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Stack pointer sync operations"
+ },
+ {
+ "EventCode": "0xD1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_DECODED.MS_CYCLES_ACTIVE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops decoded by Microcode Sequencer",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xD1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_DECODED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops are decoded",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops executed on any port (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops executed on ports 0-4 (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on any port (core count)",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_COUNT_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on ports 0-4 (core count)",
+ "CounterMask": "1",
+ "EdgeDetect": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x3f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on any port (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1f",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.CORE_STALL_CYCLES_NO_PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on ports 0-4 (core count)",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_EXECUTED.PORT0",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 0"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_EXECUTED.PORT015",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued on ports 0, 1 or 5"
+ },
+ {
+ "EventCode": "0xB1",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x40",
+ "EventName": "UOPS_EXECUTED.PORT015_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops issued on ports 0, 1 or 5",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_EXECUTED.PORT1",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 1"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT2_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 2 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT234_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued on ports 2, 3 or 4"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x8",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT3_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 3 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "AnyThread": "1",
+ "EventName": "UOPS_EXECUTED.PORT4_CORE",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 4 (core count)"
+ },
+ {
+ "EventCode": "0xB1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "UOPS_EXECUTED.PORT5",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops executed on port 5"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops issued"
+ },
+ {
+ "EventCode": "0xE",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops were issued on any thread",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "AnyThread": "1",
+ "EventName": "UOPS_ISSUED.CYCLES_ALL_THREADS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops were issued on either thread",
+ "CounterMask": "1"
+ },
+ {
+ "EventCode": "0xE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_ISSUED.FUSED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Fused Uops issued"
+ },
+ {
+ "EventCode": "0xE",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_ISSUED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles no Uops were issued",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ACTIVE_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops are being retired",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.ANY",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "UOPS_RETIRED.MACRO_FUSED",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Macro-fused Uops retired (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "UOPS_RETIRED.RETIRE_SLOTS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Retirement slots used (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.STALL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Cycles Uops are not retiring (Precise Event)",
+ "CounterMask": "1"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC2",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "UOPS_RETIRED.TOTAL_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles using precise uop retired event (Precise Event)",
+ "CounterMask": "16"
+ },
+ {
+ "PEBS": "2",
+ "EventCode": "0xC0",
+ "Invert": "1",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "INST_RETIRED.TOTAL_CYCLES_PS",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Total cycles (Precise Event)",
+ "CounterMask": "16"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/x86/westmereex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/westmereex/virtual-memory.json
new file mode 100644
index 000000000..ad989207e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/westmereex/virtual-memory.json
@@ -0,0 +1,173 @@
+[
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_LOAD_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load misses"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "DTLB_LOAD_MISSES.LARGE_WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss large page walks"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "DTLB_LOAD_MISSES.PDE_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss caused by low part of address"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "DTLB second level hit"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss page walks complete"
+ },
+ {
+ "EventCode": "0x8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_LOAD_MISSES.WALK_CYCLES",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB load miss page walk cycles"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "DTLB_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB misses"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "DTLB_MISSES.LARGE_WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB miss large page walks"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "DTLB_MISSES.PDE_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB misses caused by low part of address. Count also includes 2M page references because 2M pages do not use the PDE."
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "DTLB_MISSES.STLB_HIT",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB first level misses but second level hit"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "DTLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "DTLB miss page walks"
+ },
+ {
+ "EventCode": "0x49",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "DTLB_MISSES.WALK_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "DTLB miss page walk cycles"
+ },
+ {
+ "EventCode": "0x4F",
+ "Counter": "0,1,2,3",
+ "UMask": "0x10",
+ "EventName": "EPT.WALK_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "Extended Page Table walk cycles"
+ },
+ {
+ "EventCode": "0xAE",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_FLUSH",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ITLB flushes"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC8",
+ "Counter": "0,1,2,3",
+ "UMask": "0x20",
+ "EventName": "ITLB_MISS_RETIRED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired instructions that missed the ITLB (Precise Event)"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "ITLB_MISSES.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB miss"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "ITLB_MISSES.LARGE_WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB miss large page walks"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x2",
+ "EventName": "ITLB_MISSES.WALK_COMPLETED",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "ITLB miss page walks"
+ },
+ {
+ "EventCode": "0x85",
+ "Counter": "0,1,2,3",
+ "UMask": "0x4",
+ "EventName": "ITLB_MISSES.WALK_CYCLES",
+ "SampleAfterValue": "2000000",
+ "BriefDescription": "ITLB miss page walk cycles"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xCB",
+ "Counter": "0,1,2,3",
+ "UMask": "0x80",
+ "EventName": "MEM_LOAD_RETIRED.DTLB_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired loads that miss the DTLB (Precise Event)"
+ },
+ {
+ "PEBS": "1",
+ "EventCode": "0xC",
+ "Counter": "0,1,2,3",
+ "UMask": "0x1",
+ "EventName": "MEM_STORE_RETIRED.DTLB_MISS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Retired stores that miss the DTLB (Precise Event)"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
new file mode 100644
index 000000000..0f5a63026
--- /dev/null
+++ b/tools/perf/pmu-events/jevents.c
@@ -0,0 +1,1172 @@
+#define _XOPEN_SOURCE 500 /* needed for nftw() */
+#define _GNU_SOURCE /* needed for asprintf() */
+
+/* Parse event JSON files */
+
+/*
+ * Copyright (c) 2014, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <libgen.h>
+#include <limits.h>
+#include <dirent.h>
+#include <sys/time.h> /* getrlimit */
+#include <sys/resource.h> /* getrlimit */
+#include <ftw.h>
+#include <sys/stat.h>
+#include <linux/list.h>
+#include "jsmn.h"
+#include "json.h"
+#include "jevents.h"
+
+int verbose;
+char *prog;
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+
+ int ret;
+ va_list args;
+
+ if (var < level)
+ return 0;
+
+ va_start(args, fmt);
+
+ ret = vfprintf(stderr, fmt, args);
+
+ va_end(args);
+
+ return ret;
+}
+
+__attribute__((weak)) char *get_cpu_str(void)
+{
+ return NULL;
+}
+
+static void addfield(char *map, char **dst, const char *sep,
+ const char *a, jsmntok_t *bt)
+{
+ unsigned int len = strlen(a) + 1 + strlen(sep);
+ int olen = *dst ? strlen(*dst) : 0;
+ int blen = bt ? json_len(bt) : 0;
+ char *out;
+
+ out = realloc(*dst, len + olen + blen);
+ if (!out) {
+ /* Don't add field in this case */
+ return;
+ }
+ *dst = out;
+
+ if (!olen)
+ *(*dst) = 0;
+ else
+ strcat(*dst, sep);
+ strcat(*dst, a);
+ if (bt)
+ strncat(*dst, map + bt->start, blen);
+}
+
+static void fixname(char *s)
+{
+ for (; *s; s++)
+ *s = tolower(*s);
+}
+
+static void fixdesc(char *s)
+{
+ char *e = s + strlen(s);
+
+ /* Remove trailing dots that look ugly in perf list */
+ --e;
+ while (e >= s && isspace(*e))
+ --e;
+ if (*e == '.')
+ *e = 0;
+}
+
+/* Add escapes for '\' so they are proper C strings. */
+static char *fixregex(char *s)
+{
+ int len = 0;
+ int esc_count = 0;
+ char *fixed = NULL;
+ char *p, *q;
+
+ /* Count the number of '\' in string */
+ for (p = s; *p; p++) {
+ ++len;
+ if (*p == '\\')
+ ++esc_count;
+ }
+
+ if (esc_count == 0)
+ return s;
+
+ /* allocate space for a new string */
+ fixed = (char *) malloc(len + esc_count + 1);
+ if (!fixed)
+ return NULL;
+
+ /* copy over the characters */
+ q = fixed;
+ for (p = s; *p; p++) {
+ if (*p == '\\') {
+ *q = '\\';
+ ++q;
+ }
+ *q = *p;
+ ++q;
+ }
+ *q = '\0';
+ return fixed;
+}
+
+static struct msrmap {
+ const char *num;
+ const char *pname;
+} msrmap[] = {
+ { "0x3F6", "ldlat=" },
+ { "0x1A6", "offcore_rsp=" },
+ { "0x1A7", "offcore_rsp=" },
+ { "0x3F7", "frontend=" },
+ { NULL, NULL }
+};
+
+static struct field {
+ const char *field;
+ const char *kernel;
+} fields[] = {
+ { "UMask", "umask=" },
+ { "CounterMask", "cmask=" },
+ { "Invert", "inv=" },
+ { "AnyThread", "any=" },
+ { "EdgeDetect", "edge=" },
+ { "SampleAfterValue", "period=" },
+ { "FCMask", "fc_mask=" },
+ { "PortMask", "ch_mask=" },
+ { NULL, NULL }
+};
+
+static void cut_comma(char *map, jsmntok_t *newval)
+{
+ int i;
+
+ /* Cut off everything after comma */
+ for (i = newval->start; i < newval->end; i++) {
+ if (map[i] == ',')
+ newval->end = i;
+ }
+}
+
+static int match_field(char *map, jsmntok_t *field, int nz,
+ char **event, jsmntok_t *val)
+{
+ struct field *f;
+ jsmntok_t newval = *val;
+
+ for (f = fields; f->field; f++)
+ if (json_streq(map, field, f->field) && nz) {
+ cut_comma(map, &newval);
+ addfield(map, event, ",", f->kernel, &newval);
+ return 1;
+ }
+ return 0;
+}
+
+static struct msrmap *lookup_msr(char *map, jsmntok_t *val)
+{
+ jsmntok_t newval = *val;
+ static bool warned;
+ int i;
+
+ cut_comma(map, &newval);
+ for (i = 0; msrmap[i].num; i++)
+ if (json_streq(map, &newval, msrmap[i].num))
+ return &msrmap[i];
+ if (!warned) {
+ warned = true;
+ pr_err("%s: Unknown MSR in event file %.*s\n", prog,
+ json_len(val), map + val->start);
+ }
+ return NULL;
+}
+
+static struct map {
+ const char *json;
+ const char *perf;
+} unit_to_pmu[] = {
+ { "CBO", "uncore_cbox" },
+ { "QPI LL", "uncore_qpi" },
+ { "SBO", "uncore_sbox" },
+ { "iMPH-U", "uncore_arb" },
+ { "CPU-M-CF", "cpum_cf" },
+ { "CPU-M-SF", "cpum_sf" },
+ {}
+};
+
+static const char *field_to_perf(struct map *table, char *map, jsmntok_t *val)
+{
+ int i;
+
+ for (i = 0; table[i].json; i++) {
+ if (json_streq(map, val, table[i].json))
+ return table[i].perf;
+ }
+ return NULL;
+}
+
+#define EXPECT(e, t, m) do { if (!(e)) { \
+ jsmntok_t *loc = (t); \
+ if (!(t)->start && (t) > tokens) \
+ loc = (t) - 1; \
+ pr_err("%s:%d: " m ", got %s\n", fn, \
+ json_line(map, loc), \
+ json_name(t)); \
+ err = -EIO; \
+ goto out_free; \
+} } while (0)
+
+static char *topic;
+
+static char *get_topic(void)
+{
+ char *tp;
+ int i;
+
+ /* tp is free'd in process_one_file() */
+ i = asprintf(&tp, "%s", topic);
+ if (i < 0) {
+ pr_info("%s: asprintf() error %s\n", prog);
+ return NULL;
+ }
+
+ for (i = 0; i < (int) strlen(tp); i++) {
+ char c = tp[i];
+
+ if (c == '-')
+ tp[i] = ' ';
+ else if (c == '.') {
+ tp[i] = '\0';
+ break;
+ }
+ }
+
+ return tp;
+}
+
+static int add_topic(char *bname)
+{
+ free(topic);
+ topic = strdup(bname);
+ if (!topic) {
+ pr_info("%s: strdup() error %s for file %s\n", prog,
+ strerror(errno), bname);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+struct perf_entry_data {
+ FILE *outfp;
+ char *topic;
+};
+
+static int close_table;
+
+static void print_events_table_prefix(FILE *fp, const char *tblname)
+{
+ fprintf(fp, "struct pmu_event %s[] = {\n", tblname);
+ close_table = 1;
+}
+
+static int print_events_table_entry(void *data, char *name, char *event,
+ char *desc, char *long_desc,
+ char *pmu, char *unit, char *perpkg,
+ char *metric_expr,
+ char *metric_name, char *metric_group)
+{
+ struct perf_entry_data *pd = data;
+ FILE *outfp = pd->outfp;
+ char *topic = pd->topic;
+
+ /*
+ * TODO: Remove formatting chars after debugging to reduce
+ * string lengths.
+ */
+ fprintf(outfp, "{\n");
+
+ if (name)
+ fprintf(outfp, "\t.name = \"%s\",\n", name);
+ if (event)
+ fprintf(outfp, "\t.event = \"%s\",\n", event);
+ fprintf(outfp, "\t.desc = \"%s\",\n", desc);
+ fprintf(outfp, "\t.topic = \"%s\",\n", topic);
+ if (long_desc && long_desc[0])
+ fprintf(outfp, "\t.long_desc = \"%s\",\n", long_desc);
+ if (pmu)
+ fprintf(outfp, "\t.pmu = \"%s\",\n", pmu);
+ if (unit)
+ fprintf(outfp, "\t.unit = \"%s\",\n", unit);
+ if (perpkg)
+ fprintf(outfp, "\t.perpkg = \"%s\",\n", perpkg);
+ if (metric_expr)
+ fprintf(outfp, "\t.metric_expr = \"%s\",\n", metric_expr);
+ if (metric_name)
+ fprintf(outfp, "\t.metric_name = \"%s\",\n", metric_name);
+ if (metric_group)
+ fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group);
+ fprintf(outfp, "},\n");
+
+ return 0;
+}
+
+struct event_struct {
+ struct list_head list;
+ char *name;
+ char *event;
+ char *desc;
+ char *long_desc;
+ char *pmu;
+ char *unit;
+ char *perpkg;
+ char *metric_expr;
+ char *metric_name;
+ char *metric_group;
+};
+
+#define ADD_EVENT_FIELD(field) do { if (field) { \
+ es->field = strdup(field); \
+ if (!es->field) \
+ goto out_free; \
+} } while (0)
+
+#define FREE_EVENT_FIELD(field) free(es->field)
+
+#define TRY_FIXUP_FIELD(field) do { if (es->field && !*field) {\
+ *field = strdup(es->field); \
+ if (!*field) \
+ return -ENOMEM; \
+} } while (0)
+
+#define FOR_ALL_EVENT_STRUCT_FIELDS(op) do { \
+ op(name); \
+ op(event); \
+ op(desc); \
+ op(long_desc); \
+ op(pmu); \
+ op(unit); \
+ op(perpkg); \
+ op(metric_expr); \
+ op(metric_name); \
+ op(metric_group); \
+} while (0)
+
+static LIST_HEAD(arch_std_events);
+
+static void free_arch_std_events(void)
+{
+ struct event_struct *es, *next;
+
+ list_for_each_entry_safe(es, next, &arch_std_events, list) {
+ FOR_ALL_EVENT_STRUCT_FIELDS(FREE_EVENT_FIELD);
+ list_del(&es->list);
+ free(es);
+ }
+}
+
+static int save_arch_std_events(void *data, char *name, char *event,
+ char *desc, char *long_desc, char *pmu,
+ char *unit, char *perpkg, char *metric_expr,
+ char *metric_name, char *metric_group)
+{
+ struct event_struct *es;
+ struct stat *sb = data;
+
+ es = malloc(sizeof(*es));
+ if (!es)
+ return -ENOMEM;
+ memset(es, 0, sizeof(*es));
+ FOR_ALL_EVENT_STRUCT_FIELDS(ADD_EVENT_FIELD);
+ list_add_tail(&es->list, &arch_std_events);
+ return 0;
+out_free:
+ FOR_ALL_EVENT_STRUCT_FIELDS(FREE_EVENT_FIELD);
+ free(es);
+ return -ENOMEM;
+}
+
+static void print_events_table_suffix(FILE *outfp)
+{
+ fprintf(outfp, "{\n");
+
+ fprintf(outfp, "\t.name = 0,\n");
+ fprintf(outfp, "\t.event = 0,\n");
+ fprintf(outfp, "\t.desc = 0,\n");
+
+ fprintf(outfp, "},\n");
+ fprintf(outfp, "};\n");
+ close_table = 0;
+}
+
+static struct fixed {
+ const char *name;
+ const char *event;
+} fixed[] = {
+ { "inst_retired.any", "event=0xc0,period=2000003" },
+ { "inst_retired.any_p", "event=0xc0,period=2000003" },
+ { "cpu_clk_unhalted.ref", "event=0x0,umask=0x03,period=2000003" },
+ { "cpu_clk_unhalted.thread", "event=0x3c,period=2000003" },
+ { "cpu_clk_unhalted.core", "event=0x3c,period=2000003" },
+ { "cpu_clk_unhalted.thread_any", "event=0x3c,any=1,period=2000003" },
+ { NULL, NULL},
+};
+
+/*
+ * Handle different fixed counter encodings between JSON and perf.
+ */
+static char *real_event(const char *name, char *event)
+{
+ int i;
+
+ if (!name)
+ return NULL;
+
+ for (i = 0; fixed[i].name; i++)
+ if (!strcasecmp(name, fixed[i].name))
+ return (char *)fixed[i].event;
+ return event;
+}
+
+static int
+try_fixup(const char *fn, char *arch_std, char **event, char **desc,
+ char **name, char **long_desc, char **pmu, char **filter,
+ char **perpkg, char **unit, char **metric_expr, char **metric_name,
+ char **metric_group, unsigned long long eventcode)
+{
+ /* try to find matching event from arch standard values */
+ struct event_struct *es;
+
+ list_for_each_entry(es, &arch_std_events, list) {
+ if (!strcmp(arch_std, es->name)) {
+ if (!eventcode && es->event) {
+ /* allow EventCode to be overridden */
+ free(*event);
+ *event = NULL;
+ }
+ FOR_ALL_EVENT_STRUCT_FIELDS(TRY_FIXUP_FIELD);
+ return 0;
+ }
+ }
+
+ pr_err("%s: could not find matching %s for %s\n",
+ prog, arch_std, fn);
+ return -1;
+}
+
+/* Call func with each event in the json file */
+int json_events(const char *fn,
+ int (*func)(void *data, char *name, char *event, char *desc,
+ char *long_desc,
+ char *pmu, char *unit, char *perpkg,
+ char *metric_expr,
+ char *metric_name, char *metric_group),
+ void *data)
+{
+ int err;
+ size_t size;
+ jsmntok_t *tokens, *tok;
+ int i, j, len;
+ char *map;
+ char buf[128];
+
+ if (!fn)
+ return -ENOENT;
+
+ tokens = parse_json(fn, &map, &size, &len);
+ if (!tokens)
+ return -EIO;
+ EXPECT(tokens->type == JSMN_ARRAY, tokens, "expected top level array");
+ tok = tokens + 1;
+ for (i = 0; i < tokens->size; i++) {
+ char *event = NULL, *desc = NULL, *name = NULL;
+ char *long_desc = NULL;
+ char *extra_desc = NULL;
+ char *pmu = NULL;
+ char *filter = NULL;
+ char *perpkg = NULL;
+ char *unit = NULL;
+ char *metric_expr = NULL;
+ char *metric_name = NULL;
+ char *metric_group = NULL;
+ char *arch_std = NULL;
+ unsigned long long eventcode = 0;
+ struct msrmap *msr = NULL;
+ jsmntok_t *msrval = NULL;
+ jsmntok_t *precise = NULL;
+ jsmntok_t *obj = tok++;
+
+ EXPECT(obj->type == JSMN_OBJECT, obj, "expected object");
+ for (j = 0; j < obj->size; j += 2) {
+ jsmntok_t *field, *val;
+ int nz;
+ char *s;
+
+ field = tok + j;
+ EXPECT(field->type == JSMN_STRING, tok + j,
+ "Expected field name");
+ val = tok + j + 1;
+ EXPECT(val->type == JSMN_STRING, tok + j + 1,
+ "Expected string value");
+
+ nz = !json_streq(map, val, "0");
+ if (match_field(map, field, nz, &event, val)) {
+ /* ok */
+ } else if (json_streq(map, field, "EventCode")) {
+ char *code = NULL;
+ addfield(map, &code, "", "", val);
+ eventcode |= strtoul(code, NULL, 0);
+ free(code);
+ } else if (json_streq(map, field, "ExtSel")) {
+ char *code = NULL;
+ addfield(map, &code, "", "", val);
+ eventcode |= strtoul(code, NULL, 0) << 8;
+ free(code);
+ } else if (json_streq(map, field, "EventName")) {
+ addfield(map, &name, "", "", val);
+ } else if (json_streq(map, field, "BriefDescription")) {
+ addfield(map, &desc, "", "", val);
+ fixdesc(desc);
+ } else if (json_streq(map, field,
+ "PublicDescription")) {
+ addfield(map, &long_desc, "", "", val);
+ fixdesc(long_desc);
+ } else if (json_streq(map, field, "PEBS") && nz) {
+ precise = val;
+ } else if (json_streq(map, field, "MSRIndex") && nz) {
+ msr = lookup_msr(map, val);
+ } else if (json_streq(map, field, "MSRValue")) {
+ msrval = val;
+ } else if (json_streq(map, field, "Errata") &&
+ !json_streq(map, val, "null")) {
+ addfield(map, &extra_desc, ". ",
+ " Spec update: ", val);
+ } else if (json_streq(map, field, "Data_LA") && nz) {
+ addfield(map, &extra_desc, ". ",
+ " Supports address when precise",
+ NULL);
+ } else if (json_streq(map, field, "Unit")) {
+ const char *ppmu;
+
+ ppmu = field_to_perf(unit_to_pmu, map, val);
+ if (ppmu) {
+ pmu = strdup(ppmu);
+ } else {
+ if (!pmu)
+ pmu = strdup("uncore_");
+ addfield(map, &pmu, "", "", val);
+ for (s = pmu; *s; s++)
+ *s = tolower(*s);
+ }
+ addfield(map, &desc, ". ", "Unit: ", NULL);
+ addfield(map, &desc, "", pmu, NULL);
+ addfield(map, &desc, "", " ", NULL);
+ } else if (json_streq(map, field, "Filter")) {
+ addfield(map, &filter, "", "", val);
+ } else if (json_streq(map, field, "ScaleUnit")) {
+ addfield(map, &unit, "", "", val);
+ } else if (json_streq(map, field, "PerPkg")) {
+ addfield(map, &perpkg, "", "", val);
+ } else if (json_streq(map, field, "MetricName")) {
+ addfield(map, &metric_name, "", "", val);
+ } else if (json_streq(map, field, "MetricGroup")) {
+ addfield(map, &metric_group, "", "", val);
+ } else if (json_streq(map, field, "MetricExpr")) {
+ addfield(map, &metric_expr, "", "", val);
+ for (s = metric_expr; *s; s++)
+ *s = tolower(*s);
+ } else if (json_streq(map, field, "ArchStdEvent")) {
+ addfield(map, &arch_std, "", "", val);
+ for (s = arch_std; *s; s++)
+ *s = tolower(*s);
+ }
+ /* ignore unknown fields */
+ }
+ if (precise && desc && !strstr(desc, "(Precise Event)")) {
+ if (json_streq(map, precise, "2"))
+ addfield(map, &extra_desc, " ",
+ "(Must be precise)", NULL);
+ else
+ addfield(map, &extra_desc, " ",
+ "(Precise event)", NULL);
+ }
+ snprintf(buf, sizeof buf, "event=%#llx", eventcode);
+ addfield(map, &event, ",", buf, NULL);
+ if (desc && extra_desc)
+ addfield(map, &desc, " ", extra_desc, NULL);
+ if (long_desc && extra_desc)
+ addfield(map, &long_desc, " ", extra_desc, NULL);
+ if (filter)
+ addfield(map, &event, ",", filter, NULL);
+ if (msr != NULL)
+ addfield(map, &event, ",", msr->pname, msrval);
+ if (name)
+ fixname(name);
+
+ if (arch_std) {
+ /*
+ * An arch standard event is referenced, so try to
+ * fixup any unassigned values.
+ */
+ err = try_fixup(fn, arch_std, &event, &desc, &name,
+ &long_desc, &pmu, &filter, &perpkg,
+ &unit, &metric_expr, &metric_name,
+ &metric_group, eventcode);
+ if (err)
+ goto free_strings;
+ }
+ err = func(data, name, real_event(name, event), desc, long_desc,
+ pmu, unit, perpkg, metric_expr, metric_name, metric_group);
+free_strings:
+ free(event);
+ free(desc);
+ free(name);
+ free(long_desc);
+ free(extra_desc);
+ free(pmu);
+ free(filter);
+ free(perpkg);
+ free(unit);
+ free(metric_expr);
+ free(metric_name);
+ free(metric_group);
+ free(arch_std);
+
+ if (err)
+ break;
+ tok += j;
+ }
+ EXPECT(tok - tokens == len, tok, "unexpected objects at end");
+ err = 0;
+out_free:
+ free_json(map, size, tokens);
+ return err;
+}
+
+static char *file_name_to_table_name(char *fname)
+{
+ unsigned int i;
+ int n;
+ int c;
+ char *tblname;
+
+ /*
+ * Ensure tablename starts with alphabetic character.
+ * Derive rest of table name from basename of the JSON file,
+ * replacing hyphens and stripping out .json suffix.
+ */
+ n = asprintf(&tblname, "pme_%s", fname);
+ if (n < 0) {
+ pr_info("%s: asprintf() error %s for file %s\n", prog,
+ strerror(errno), fname);
+ return NULL;
+ }
+
+ for (i = 0; i < strlen(tblname); i++) {
+ c = tblname[i];
+
+ if (c == '-' || c == '/')
+ tblname[i] = '_';
+ else if (c == '.') {
+ tblname[i] = '\0';
+ break;
+ } else if (!isalnum(c) && c != '_') {
+ pr_err("%s: Invalid character '%c' in file name %s\n",
+ prog, c, basename(fname));
+ free(tblname);
+ tblname = NULL;
+ break;
+ }
+ }
+
+ return tblname;
+}
+
+static void print_mapping_table_prefix(FILE *outfp)
+{
+ fprintf(outfp, "struct pmu_events_map pmu_events_map[] = {\n");
+}
+
+static void print_mapping_table_suffix(FILE *outfp)
+{
+ /*
+ * Print the terminating, NULL entry.
+ */
+ fprintf(outfp, "{\n");
+ fprintf(outfp, "\t.cpuid = 0,\n");
+ fprintf(outfp, "\t.version = 0,\n");
+ fprintf(outfp, "\t.type = 0,\n");
+ fprintf(outfp, "\t.table = 0,\n");
+ fprintf(outfp, "},\n");
+
+ /* and finally, the closing curly bracket for the struct */
+ fprintf(outfp, "};\n");
+}
+
+static int process_mapfile(FILE *outfp, char *fpath)
+{
+ int n = 16384;
+ FILE *mapfp;
+ char *save = NULL;
+ char *line, *p;
+ int line_num;
+ char *tblname;
+ int ret = 0;
+
+ pr_info("%s: Processing mapfile %s\n", prog, fpath);
+
+ line = malloc(n);
+ if (!line)
+ return -1;
+
+ mapfp = fopen(fpath, "r");
+ if (!mapfp) {
+ pr_info("%s: Error %s opening %s\n", prog, strerror(errno),
+ fpath);
+ free(line);
+ return -1;
+ }
+
+ print_mapping_table_prefix(outfp);
+
+ /* Skip first line (header) */
+ p = fgets(line, n, mapfp);
+ if (!p)
+ goto out;
+
+ line_num = 1;
+ while (1) {
+ char *cpuid, *version, *type, *fname;
+
+ line_num++;
+ p = fgets(line, n, mapfp);
+ if (!p)
+ break;
+
+ if (line[0] == '#' || line[0] == '\n')
+ continue;
+
+ if (line[strlen(line)-1] != '\n') {
+ /* TODO Deal with lines longer than 16K */
+ pr_info("%s: Mapfile %s: line %d too long, aborting\n",
+ prog, fpath, line_num);
+ ret = -1;
+ goto out;
+ }
+ line[strlen(line)-1] = '\0';
+
+ cpuid = fixregex(strtok_r(p, ",", &save));
+ version = strtok_r(NULL, ",", &save);
+ fname = strtok_r(NULL, ",", &save);
+ type = strtok_r(NULL, ",", &save);
+
+ tblname = file_name_to_table_name(fname);
+ fprintf(outfp, "{\n");
+ fprintf(outfp, "\t.cpuid = \"%s\",\n", cpuid);
+ fprintf(outfp, "\t.version = \"%s\",\n", version);
+ fprintf(outfp, "\t.type = \"%s\",\n", type);
+
+ /*
+ * CHECK: We can't use the type (eg "core") field in the
+ * table name. For us to do that, we need to somehow tweak
+ * the other caller of file_name_to_table(), process_json()
+ * to determine the type. process_json() file has no way
+ * of knowing these are "core" events unless file name has
+ * core in it. If filename has core in it, we can safely
+ * ignore the type field here also.
+ */
+ fprintf(outfp, "\t.table = %s\n", tblname);
+ fprintf(outfp, "},\n");
+ }
+
+out:
+ print_mapping_table_suffix(outfp);
+ fclose(mapfp);
+ free(line);
+ return ret;
+}
+
+/*
+ * If we fail to locate/process JSON and map files, create a NULL mapping
+ * table. This would at least allow perf to build even if we can't find/use
+ * the aliases.
+ */
+static void create_empty_mapping(const char *output_file)
+{
+ FILE *outfp;
+
+ pr_info("%s: Creating empty pmu_events_map[] table\n", prog);
+
+ /* Truncate file to clear any partial writes to it */
+ outfp = fopen(output_file, "w");
+ if (!outfp) {
+ perror("fopen()");
+ _Exit(1);
+ }
+
+ fprintf(outfp, "#include \"../../pmu-events/pmu-events.h\"\n");
+ print_mapping_table_prefix(outfp);
+ print_mapping_table_suffix(outfp);
+ fclose(outfp);
+}
+
+static int get_maxfds(void)
+{
+ struct rlimit rlim;
+
+ if (getrlimit(RLIMIT_NOFILE, &rlim) == 0)
+ return min(rlim.rlim_max / 2, (rlim_t)512);
+
+ return 512;
+}
+
+/*
+ * nftw() doesn't let us pass an argument to the processing function,
+ * so use a global variables.
+ */
+static FILE *eventsfp;
+static char *mapfile;
+
+static int is_leaf_dir(const char *fpath)
+{
+ DIR *d;
+ struct dirent *dir;
+ int res = 1;
+
+ d = opendir(fpath);
+ if (!d)
+ return 0;
+
+ while ((dir = readdir(d)) != NULL) {
+ if (!strcmp(dir->d_name, ".") || !strcmp(dir->d_name, ".."))
+ continue;
+
+ if (dir->d_type == DT_DIR) {
+ res = 0;
+ break;
+ } else if (dir->d_type == DT_UNKNOWN) {
+ char path[PATH_MAX];
+ struct stat st;
+
+ sprintf(path, "%s/%s", fpath, dir->d_name);
+ if (stat(path, &st))
+ break;
+
+ if (S_ISDIR(st.st_mode)) {
+ res = 0;
+ break;
+ }
+ }
+ }
+
+ closedir(d);
+
+ return res;
+}
+
+static int is_json_file(const char *name)
+{
+ const char *suffix;
+
+ if (strlen(name) < 5)
+ return 0;
+
+ suffix = name + strlen(name) - 5;
+
+ if (strncmp(suffix, ".json", 5) == 0)
+ return 1;
+ return 0;
+}
+
+static int preprocess_arch_std_files(const char *fpath, const struct stat *sb,
+ int typeflag, struct FTW *ftwbuf)
+{
+ int level = ftwbuf->level;
+ int is_file = typeflag == FTW_F;
+
+ if (level == 1 && is_file && is_json_file(fpath))
+ return json_events(fpath, save_arch_std_events, (void *)sb);
+
+ return 0;
+}
+
+static int process_one_file(const char *fpath, const struct stat *sb,
+ int typeflag, struct FTW *ftwbuf)
+{
+ char *tblname, *bname;
+ int is_dir = typeflag == FTW_D;
+ int is_file = typeflag == FTW_F;
+ int level = ftwbuf->level;
+ int err = 0;
+
+ if (level == 2 && is_dir) {
+ /*
+ * For level 2 directory, bname will include parent name,
+ * like vendor/platform. So search back from platform dir
+ * to find this.
+ */
+ bname = (char *) fpath + ftwbuf->base - 2;
+ for (;;) {
+ if (*bname == '/')
+ break;
+ bname--;
+ }
+ bname++;
+ } else
+ bname = (char *) fpath + ftwbuf->base;
+
+ pr_debug("%s %d %7jd %-20s %s\n",
+ is_file ? "f" : is_dir ? "d" : "x",
+ level, sb->st_size, bname, fpath);
+
+ /* base dir or too deep */
+ if (level == 0 || level > 3)
+ return 0;
+
+
+ /* model directory, reset topic */
+ if ((level == 1 && is_dir && is_leaf_dir(fpath)) ||
+ (level == 2 && is_dir)) {
+ if (close_table)
+ print_events_table_suffix(eventsfp);
+
+ /*
+ * Drop file name suffix. Replace hyphens with underscores.
+ * Fail if file name contains any alphanum characters besides
+ * underscores.
+ */
+ tblname = file_name_to_table_name(bname);
+ if (!tblname) {
+ pr_info("%s: Error determining table name for %s\n", prog,
+ bname);
+ return -1;
+ }
+
+ print_events_table_prefix(eventsfp, tblname);
+ return 0;
+ }
+
+ /*
+ * Save the mapfile name for now. We will process mapfile
+ * after processing all JSON files (so we can write out the
+ * mapping table after all PMU events tables).
+ *
+ */
+ if (level == 1 && is_file) {
+ if (!strcmp(bname, "mapfile.csv")) {
+ mapfile = strdup(fpath);
+ return 0;
+ }
+
+ pr_info("%s: Ignoring file %s\n", prog, fpath);
+ return 0;
+ }
+
+ /*
+ * If the file name does not have a .json extension,
+ * ignore it. It could be a readme.txt for instance.
+ */
+ if (is_file) {
+ if (!is_json_file(bname)) {
+ pr_info("%s: Ignoring file without .json suffix %s\n", prog,
+ fpath);
+ return 0;
+ }
+ }
+
+ if (level > 1 && add_topic(bname))
+ return -ENOMEM;
+
+ /*
+ * Assume all other files are JSON files.
+ *
+ * If mapfile refers to 'power7_core.json', we create a table
+ * named 'power7_core'. Any inconsistencies between the mapfile
+ * and directory tree could result in build failure due to table
+ * names not being found.
+ *
+ * Atleast for now, be strict with processing JSON file names.
+ * i.e. if JSON file name cannot be mapped to C-style table name,
+ * fail.
+ */
+ if (is_file) {
+ struct perf_entry_data data = {
+ .topic = get_topic(),
+ .outfp = eventsfp,
+ };
+
+ err = json_events(fpath, print_events_table_entry, &data);
+
+ free(data.topic);
+ }
+
+ return err;
+}
+
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+/*
+ * Starting in directory 'start_dirname', find the "mapfile.csv" and
+ * the set of JSON files for the architecture 'arch'.
+ *
+ * From each JSON file, create a C-style "PMU events table" from the
+ * JSON file (see struct pmu_event).
+ *
+ * From the mapfile, create a mapping between the CPU revisions and
+ * PMU event tables (see struct pmu_events_map).
+ *
+ * Write out the PMU events tables and the mapping table to pmu-event.c.
+ */
+int main(int argc, char *argv[])
+{
+ int rc, ret = 0;
+ int maxfds;
+ char ldirname[PATH_MAX];
+ const char *arch;
+ const char *output_file;
+ const char *start_dirname;
+ struct stat stbuf;
+
+ prog = basename(argv[0]);
+ if (argc < 4) {
+ pr_err("Usage: %s <arch> <starting_dir> <output_file>\n", prog);
+ return 1;
+ }
+
+ arch = argv[1];
+ start_dirname = argv[2];
+ output_file = argv[3];
+
+ if (argc > 4)
+ verbose = atoi(argv[4]);
+
+ eventsfp = fopen(output_file, "w");
+ if (!eventsfp) {
+ pr_err("%s Unable to create required file %s (%s)\n",
+ prog, output_file, strerror(errno));
+ return 2;
+ }
+
+ sprintf(ldirname, "%s/%s", start_dirname, arch);
+
+ /* If architecture does not have any event lists, bail out */
+ if (stat(ldirname, &stbuf) < 0) {
+ pr_info("%s: Arch %s has no PMU event lists\n", prog, arch);
+ goto empty_map;
+ }
+
+ /* Include pmu-events.h first */
+ fprintf(eventsfp, "#include \"../../pmu-events/pmu-events.h\"\n");
+
+ /*
+ * The mapfile allows multiple CPUids to point to the same JSON file,
+ * so, not sure if there is a need for symlinks within the pmu-events
+ * directory.
+ *
+ * For now, treat symlinks of JSON files as regular files and create
+ * separate tables for each symlink (presumably, each symlink refers
+ * to specific version of the CPU).
+ */
+
+ maxfds = get_maxfds();
+ mapfile = NULL;
+ rc = nftw(ldirname, preprocess_arch_std_files, maxfds, 0);
+ if (rc && verbose) {
+ pr_info("%s: Error preprocessing arch standard files %s\n",
+ prog, ldirname);
+ goto empty_map;
+ } else if (rc < 0) {
+ /* Make build fail */
+ fclose(eventsfp);
+ free_arch_std_events();
+ return 1;
+ } else if (rc) {
+ goto empty_map;
+ }
+
+ rc = nftw(ldirname, process_one_file, maxfds, 0);
+ if (rc && verbose) {
+ pr_info("%s: Error walking file tree %s\n", prog, ldirname);
+ goto empty_map;
+ } else if (rc < 0) {
+ /* Make build fail */
+ fclose(eventsfp);
+ free_arch_std_events();
+ ret = 1;
+ goto out_free_mapfile;
+ } else if (rc) {
+ goto empty_map;
+ }
+
+ if (close_table)
+ print_events_table_suffix(eventsfp);
+
+ if (!mapfile) {
+ pr_info("%s: No CPU->JSON mapping?\n", prog);
+ goto empty_map;
+ }
+
+ if (process_mapfile(eventsfp, mapfile)) {
+ pr_info("%s: Error processing mapfile %s\n", prog, mapfile);
+ /* Make build fail */
+ fclose(eventsfp);
+ free_arch_std_events();
+ ret = 1;
+ }
+
+
+ goto out_free_mapfile;
+
+empty_map:
+ fclose(eventsfp);
+ create_empty_mapping(output_file);
+ free_arch_std_events();
+out_free_mapfile:
+ free(mapfile);
+ return ret;
+}
diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h
new file mode 100644
index 000000000..4684c673c
--- /dev/null
+++ b/tools/perf/pmu-events/jevents.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef JEVENTS_H
+#define JEVENTS_H 1
+
+int json_events(const char *fn,
+ int (*func)(void *data, char *name, char *event, char *desc,
+ char *long_desc,
+ char *pmu,
+ char *unit, char *perpkg, char *metric_expr,
+ char *metric_name, char *metric_group),
+ void *data);
+char *get_cpu_str(void);
+
+#ifndef min
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+#endif
+
+#endif
diff --git a/tools/perf/pmu-events/jsmn.c b/tools/perf/pmu-events/jsmn.c
new file mode 100644
index 000000000..11d1fa18b
--- /dev/null
+++ b/tools/perf/pmu-events/jsmn.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2010 Serge A. Zaitsev
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * Slightly modified by AK to not assume 0 terminated input.
+ */
+
+#include <stdlib.h>
+#include "jsmn.h"
+
+/*
+ * Allocates a fresh unused token from the token pool.
+ */
+static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser,
+ jsmntok_t *tokens, size_t num_tokens)
+{
+ jsmntok_t *tok;
+
+ if ((unsigned)parser->toknext >= num_tokens)
+ return NULL;
+ tok = &tokens[parser->toknext++];
+ tok->start = tok->end = -1;
+ tok->size = 0;
+ return tok;
+}
+
+/*
+ * Fills token type and boundaries.
+ */
+static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type,
+ int start, int end)
+{
+ token->type = type;
+ token->start = start;
+ token->end = end;
+ token->size = 0;
+}
+
+/*
+ * Fills next available token with JSON primitive.
+ */
+static jsmnerr_t jsmn_parse_primitive(jsmn_parser *parser, const char *js,
+ size_t len,
+ jsmntok_t *tokens, size_t num_tokens)
+{
+ jsmntok_t *token;
+ int start;
+
+ start = parser->pos;
+
+ for (; parser->pos < len; parser->pos++) {
+ switch (js[parser->pos]) {
+#ifndef JSMN_STRICT
+ /*
+ * In strict mode primitive must be followed by ","
+ * or "}" or "]"
+ */
+ case ':':
+#endif
+ case '\t':
+ case '\r':
+ case '\n':
+ case ' ':
+ case ',':
+ case ']':
+ case '}':
+ goto found;
+ default:
+ break;
+ }
+ if (js[parser->pos] < 32 || js[parser->pos] >= 127) {
+ parser->pos = start;
+ return JSMN_ERROR_INVAL;
+ }
+ }
+#ifdef JSMN_STRICT
+ /*
+ * In strict mode primitive must be followed by a
+ * comma/object/array.
+ */
+ parser->pos = start;
+ return JSMN_ERROR_PART;
+#endif
+
+found:
+ token = jsmn_alloc_token(parser, tokens, num_tokens);
+ if (token == NULL) {
+ parser->pos = start;
+ return JSMN_ERROR_NOMEM;
+ }
+ jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos);
+ parser->pos--; /* parent sees closing brackets */
+ return JSMN_SUCCESS;
+}
+
+/*
+ * Fills next token with JSON string.
+ */
+static jsmnerr_t jsmn_parse_string(jsmn_parser *parser, const char *js,
+ size_t len,
+ jsmntok_t *tokens, size_t num_tokens)
+{
+ jsmntok_t *token;
+ int start = parser->pos;
+
+ /* Skip starting quote */
+ parser->pos++;
+
+ for (; parser->pos < len; parser->pos++) {
+ char c = js[parser->pos];
+
+ /* Quote: end of string */
+ if (c == '\"') {
+ token = jsmn_alloc_token(parser, tokens, num_tokens);
+ if (token == NULL) {
+ parser->pos = start;
+ return JSMN_ERROR_NOMEM;
+ }
+ jsmn_fill_token(token, JSMN_STRING, start+1,
+ parser->pos);
+ return JSMN_SUCCESS;
+ }
+
+ /* Backslash: Quoted symbol expected */
+ if (c == '\\') {
+ parser->pos++;
+ switch (js[parser->pos]) {
+ /* Allowed escaped symbols */
+ case '\"':
+ case '/':
+ case '\\':
+ case 'b':
+ case 'f':
+ case 'r':
+ case 'n':
+ case 't':
+ break;
+ /* Allows escaped symbol \uXXXX */
+ case 'u':
+ /* TODO */
+ break;
+ /* Unexpected symbol */
+ default:
+ parser->pos = start;
+ return JSMN_ERROR_INVAL;
+ }
+ }
+ }
+ parser->pos = start;
+ return JSMN_ERROR_PART;
+}
+
+/*
+ * Parse JSON string and fill tokens.
+ */
+jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len,
+ jsmntok_t *tokens, unsigned int num_tokens)
+{
+ jsmnerr_t r;
+ int i;
+ jsmntok_t *token;
+
+ for (; parser->pos < len; parser->pos++) {
+ char c;
+ jsmntype_t type;
+
+ c = js[parser->pos];
+ switch (c) {
+ case '{':
+ case '[':
+ token = jsmn_alloc_token(parser, tokens, num_tokens);
+ if (token == NULL)
+ return JSMN_ERROR_NOMEM;
+ if (parser->toksuper != -1)
+ tokens[parser->toksuper].size++;
+ token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY);
+ token->start = parser->pos;
+ parser->toksuper = parser->toknext - 1;
+ break;
+ case '}':
+ case ']':
+ type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY);
+ for (i = parser->toknext - 1; i >= 0; i--) {
+ token = &tokens[i];
+ if (token->start != -1 && token->end == -1) {
+ if (token->type != type)
+ return JSMN_ERROR_INVAL;
+ parser->toksuper = -1;
+ token->end = parser->pos + 1;
+ break;
+ }
+ }
+ /* Error if unmatched closing bracket */
+ if (i == -1)
+ return JSMN_ERROR_INVAL;
+ for (; i >= 0; i--) {
+ token = &tokens[i];
+ if (token->start != -1 && token->end == -1) {
+ parser->toksuper = i;
+ break;
+ }
+ }
+ break;
+ case '\"':
+ r = jsmn_parse_string(parser, js, len, tokens,
+ num_tokens);
+ if (r < 0)
+ return r;
+ if (parser->toksuper != -1)
+ tokens[parser->toksuper].size++;
+ break;
+ case '\t':
+ case '\r':
+ case '\n':
+ case ':':
+ case ',':
+ case ' ':
+ break;
+#ifdef JSMN_STRICT
+ /*
+ * In strict mode primitives are:
+ * numbers and booleans.
+ */
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ case 't':
+ case 'f':
+ case 'n':
+#else
+ /*
+ * In non-strict mode every unquoted value
+ * is a primitive.
+ */
+ /*FALL THROUGH */
+ default:
+#endif
+ r = jsmn_parse_primitive(parser, js, len, tokens,
+ num_tokens);
+ if (r < 0)
+ return r;
+ if (parser->toksuper != -1)
+ tokens[parser->toksuper].size++;
+ break;
+
+#ifdef JSMN_STRICT
+ /* Unexpected char in strict mode */
+ default:
+ return JSMN_ERROR_INVAL;
+#endif
+ }
+ }
+
+ for (i = parser->toknext - 1; i >= 0; i--) {
+ /* Unmatched opened object or array */
+ if (tokens[i].start != -1 && tokens[i].end == -1)
+ return JSMN_ERROR_PART;
+ }
+
+ return JSMN_SUCCESS;
+}
+
+/*
+ * Creates a new parser based over a given buffer with an array of tokens
+ * available.
+ */
+void jsmn_init(jsmn_parser *parser)
+{
+ parser->pos = 0;
+ parser->toknext = 0;
+ parser->toksuper = -1;
+}
+
+const char *jsmn_strerror(jsmnerr_t err)
+{
+ switch (err) {
+ case JSMN_ERROR_NOMEM:
+ return "No enough tokens";
+ case JSMN_ERROR_INVAL:
+ return "Invalid character inside JSON string";
+ case JSMN_ERROR_PART:
+ return "The string is not a full JSON packet, more bytes expected";
+ case JSMN_SUCCESS:
+ return "Success";
+ default:
+ return "Unknown json error";
+ }
+}
diff --git a/tools/perf/pmu-events/jsmn.h b/tools/perf/pmu-events/jsmn.h
new file mode 100644
index 000000000..c7b0f6ea2
--- /dev/null
+++ b/tools/perf/pmu-events/jsmn.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __JSMN_H_
+#define __JSMN_H_
+
+/*
+ * JSON type identifier. Basic types are:
+ * o Object
+ * o Array
+ * o String
+ * o Other primitive: number, boolean (true/false) or null
+ */
+typedef enum {
+ JSMN_PRIMITIVE = 0,
+ JSMN_OBJECT = 1,
+ JSMN_ARRAY = 2,
+ JSMN_STRING = 3
+} jsmntype_t;
+
+typedef enum {
+ /* Not enough tokens were provided */
+ JSMN_ERROR_NOMEM = -1,
+ /* Invalid character inside JSON string */
+ JSMN_ERROR_INVAL = -2,
+ /* The string is not a full JSON packet, more bytes expected */
+ JSMN_ERROR_PART = -3,
+ /* Everything was fine */
+ JSMN_SUCCESS = 0
+} jsmnerr_t;
+
+/*
+ * JSON token description.
+ * @param type type (object, array, string etc.)
+ * @param start start position in JSON data string
+ * @param end end position in JSON data string
+ */
+typedef struct {
+ jsmntype_t type;
+ int start;
+ int end;
+ int size;
+} jsmntok_t;
+
+/*
+ * JSON parser. Contains an array of token blocks available. Also stores
+ * the string being parsed now and current position in that string
+ */
+typedef struct {
+ unsigned int pos; /* offset in the JSON string */
+ int toknext; /* next token to allocate */
+ int toksuper; /* superior token node, e.g parent object or array */
+} jsmn_parser;
+
+/*
+ * Create JSON parser over an array of tokens
+ */
+void jsmn_init(jsmn_parser *parser);
+
+/*
+ * Run JSON parser. It parses a JSON data string into and array of tokens,
+ * each describing a single JSON object.
+ */
+jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js,
+ size_t len,
+ jsmntok_t *tokens, unsigned int num_tokens);
+
+const char *jsmn_strerror(jsmnerr_t err);
+
+#endif /* __JSMN_H_ */
diff --git a/tools/perf/pmu-events/json.c b/tools/perf/pmu-events/json.c
new file mode 100644
index 000000000..0544398d6
--- /dev/null
+++ b/tools/perf/pmu-events/json.c
@@ -0,0 +1,162 @@
+/* Parse JSON files using the JSMN parser. */
+
+/*
+ * Copyright (c) 2014, Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include "jsmn.h"
+#include "json.h"
+#include <linux/kernel.h>
+
+
+static char *mapfile(const char *fn, size_t *size)
+{
+ unsigned ps = sysconf(_SC_PAGESIZE);
+ struct stat st;
+ char *map = NULL;
+ int err;
+ int fd = open(fn, O_RDONLY);
+
+ if (fd < 0 && verbose > 0 && fn) {
+ pr_err("Error opening events file '%s': %s\n", fn,
+ strerror(errno));
+ }
+
+ if (fd < 0)
+ return NULL;
+ err = fstat(fd, &st);
+ if (err < 0)
+ goto out;
+ *size = st.st_size;
+ map = mmap(NULL,
+ (st.st_size + ps - 1) & ~(ps - 1),
+ PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (map == MAP_FAILED)
+ map = NULL;
+out:
+ close(fd);
+ return map;
+}
+
+static void unmapfile(char *map, size_t size)
+{
+ unsigned ps = sysconf(_SC_PAGESIZE);
+ munmap(map, roundup(size, ps));
+}
+
+/*
+ * Parse json file using jsmn. Return array of tokens,
+ * and mapped file. Caller needs to free array.
+ */
+jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len)
+{
+ jsmn_parser parser;
+ jsmntok_t *tokens;
+ jsmnerr_t res;
+ unsigned sz;
+
+ *map = mapfile(fn, size);
+ if (!*map)
+ return NULL;
+ /* Heuristic */
+ sz = *size * 16;
+ tokens = malloc(sz);
+ if (!tokens)
+ goto error;
+ jsmn_init(&parser);
+ res = jsmn_parse(&parser, *map, *size, tokens,
+ sz / sizeof(jsmntok_t));
+ if (res != JSMN_SUCCESS) {
+ pr_err("%s: json error %s\n", fn, jsmn_strerror(res));
+ goto error_free;
+ }
+ if (len)
+ *len = parser.toknext;
+ return tokens;
+error_free:
+ free(tokens);
+error:
+ unmapfile(*map, *size);
+ return NULL;
+}
+
+void free_json(char *map, size_t size, jsmntok_t *tokens)
+{
+ free(tokens);
+ unmapfile(map, size);
+}
+
+static int countchar(char *map, char c, int end)
+{
+ int i;
+ int count = 0;
+ for (i = 0; i < end; i++)
+ if (map[i] == c)
+ count++;
+ return count;
+}
+
+/* Return line number of a jsmn token */
+int json_line(char *map, jsmntok_t *t)
+{
+ return countchar(map, '\n', t->start) + 1;
+}
+
+static const char * const jsmn_types[] = {
+ [JSMN_PRIMITIVE] = "primitive",
+ [JSMN_ARRAY] = "array",
+ [JSMN_OBJECT] = "object",
+ [JSMN_STRING] = "string"
+};
+
+#define LOOKUP(a, i) ((i) < (sizeof(a)/sizeof(*(a))) ? ((a)[i]) : "?")
+
+/* Return type name of a jsmn token */
+const char *json_name(jsmntok_t *t)
+{
+ return LOOKUP(jsmn_types, t->type);
+}
+
+int json_len(jsmntok_t *t)
+{
+ return t->end - t->start;
+}
+
+/* Is string t equal to s? */
+int json_streq(char *map, jsmntok_t *t, const char *s)
+{
+ unsigned len = json_len(t);
+ return len == strlen(s) && !strncasecmp(map + t->start, s, len);
+}
diff --git a/tools/perf/pmu-events/json.h b/tools/perf/pmu-events/json.h
new file mode 100644
index 000000000..fbcd5a059
--- /dev/null
+++ b/tools/perf/pmu-events/json.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef JSON_H
+#define JSON_H 1
+
+#include "jsmn.h"
+
+jsmntok_t *parse_json(const char *fn, char **map, size_t *size, int *len);
+void free_json(char *map, size_t size, jsmntok_t *tokens);
+int json_line(char *map, jsmntok_t *t);
+const char *json_name(jsmntok_t *t);
+int json_streq(char *map, jsmntok_t *t, const char *s);
+int json_len(jsmntok_t *t);
+
+extern int verbose;
+
+#include <stdbool.h>
+
+extern int eprintf(int level, int var, const char *fmt, ...);
+#define pr_fmt(fmt) fmt
+
+#define pr_err(fmt, ...) \
+ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_info(fmt, ...) \
+ eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_debug(fmt, ...) \
+ eprintf(2, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+
+#ifndef roundup
+#define roundup(x, y) ( \
+{ \
+ const typeof(y) __y = y; \
+ (((x) + (__y - 1)) / __y) * __y; \
+} \
+)
+#endif
+
+#endif
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
new file mode 100644
index 000000000..92a4d15ee
--- /dev/null
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PMU_EVENTS_H
+#define PMU_EVENTS_H
+
+/*
+ * Describe each PMU event. Each CPU has a table of PMU events.
+ */
+struct pmu_event {
+ const char *name;
+ const char *event;
+ const char *desc;
+ const char *topic;
+ const char *long_desc;
+ const char *pmu;
+ const char *unit;
+ const char *perpkg;
+ const char *metric_expr;
+ const char *metric_name;
+ const char *metric_group;
+};
+
+/*
+ *
+ * Map a CPU to its table of PMU events. The CPU is identified by the
+ * cpuid field, which is an arch-specific identifier for the CPU.
+ * The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
+ * must match the get_cpustr() in tools/perf/arch/xxx/util/header.c)
+ *
+ * The cpuid can contain any character other than the comma.
+ */
+struct pmu_events_map {
+ const char *cpuid;
+ const char *version;
+ const char *type; /* core, uncore etc */
+ struct pmu_event *table;
+};
+
+/*
+ * Global table mapping each known CPU for the architecture to its
+ * table of PMU events.
+ */
+extern struct pmu_events_map pmu_events_map[];
+
+#endif
diff --git a/tools/perf/python/tracepoint.py b/tools/perf/python/tracepoint.py
new file mode 100755
index 000000000..461848c7f
--- /dev/null
+++ b/tools/perf/python/tracepoint.py
@@ -0,0 +1,48 @@
+#! /usr/bin/env python
+# SPDX-License-Identifier: GPL-2.0
+# -*- python -*-
+# -*- coding: utf-8 -*-
+
+import perf
+
+class tracepoint(perf.evsel):
+ def __init__(self, sys, name):
+ config = perf.tracepoint(sys, name)
+ perf.evsel.__init__(self,
+ type = perf.TYPE_TRACEPOINT,
+ config = config,
+ freq = 0, sample_period = 1, wakeup_events = 1,
+ sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU | perf.SAMPLE_RAW | perf.SAMPLE_TIME)
+
+def main():
+ tp = tracepoint("sched", "sched_switch")
+ cpus = perf.cpu_map()
+ threads = perf.thread_map(-1)
+
+ evlist = perf.evlist(cpus, threads)
+ evlist.add(tp)
+ evlist.open()
+ evlist.mmap()
+
+ while True:
+ evlist.poll(timeout = -1)
+ for cpu in cpus:
+ event = evlist.read_on_cpu(cpu)
+ if not event:
+ continue
+
+ if not isinstance(event, perf.sample_event):
+ continue
+
+ print "time %u prev_comm=%s prev_pid=%d prev_prio=%d prev_state=0x%x ==> next_comm=%s next_pid=%d next_prio=%d" % (
+ event.sample_time,
+ event.prev_comm,
+ event.prev_pid,
+ event.prev_prio,
+ event.prev_state,
+ event.next_comm,
+ event.next_pid,
+ event.next_prio)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/perf/python/twatch.py b/tools/perf/python/twatch.py
new file mode 100755
index 000000000..0a29c5c30
--- /dev/null
+++ b/tools/perf/python/twatch.py
@@ -0,0 +1,68 @@
+#! /usr/bin/python
+# -*- python -*-
+# -*- coding: utf-8 -*-
+# twatch - Experimental use of the perf python interface
+# Copyright (C) 2011 Arnaldo Carvalho de Melo <acme@redhat.com>
+#
+# This application is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2.
+#
+# This application is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+
+import perf
+
+def main(context_switch = 0, thread = -1):
+ cpus = perf.cpu_map()
+ threads = perf.thread_map(thread)
+ evsel = perf.evsel(type = perf.TYPE_SOFTWARE,
+ config = perf.COUNT_SW_DUMMY,
+ task = 1, comm = 1, mmap = 0, freq = 0,
+ wakeup_events = 1, watermark = 1,
+ sample_id_all = 1, context_switch = context_switch,
+ sample_type = perf.SAMPLE_PERIOD | perf.SAMPLE_TID | perf.SAMPLE_CPU)
+
+ """What we want are just the PERF_RECORD_ lifetime events for threads,
+ using the default, PERF_TYPE_HARDWARE + PERF_COUNT_HW_CYCLES & freq=1
+ (the default), makes perf reenable irq_vectors:local_timer_entry, when
+ disabling nohz, not good for some use cases where all we want is to get
+ threads comes and goes... So use (perf.TYPE_SOFTWARE, perf_COUNT_SW_DUMMY,
+ freq=0) instead."""
+
+ evsel.open(cpus = cpus, threads = threads);
+ evlist = perf.evlist(cpus, threads)
+ evlist.add(evsel)
+ evlist.mmap()
+ while True:
+ evlist.poll(timeout = -1)
+ for cpu in cpus:
+ event = evlist.read_on_cpu(cpu)
+ if not event:
+ continue
+ print("cpu: {0}, pid: {1}, tid: {2} {3}".format(event.sample_cpu,
+ event.sample_pid,
+ event.sample_tid,
+ event))
+
+if __name__ == '__main__':
+ """
+ To test the PERF_RECORD_SWITCH record, pick a pid and replace
+ in the following line.
+
+ Example output:
+
+cpu: 3, pid: 31463, tid: 31593 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31593, switch_out: 1 }
+cpu: 1, pid: 31463, tid: 31489 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31489, switch_out: 1 }
+cpu: 2, pid: 31463, tid: 31496 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31496, switch_out: 1 }
+cpu: 3, pid: 31463, tid: 31491 { type: context_switch, next_prev_pid: 31463, next_prev_tid: 31491, switch_out: 0 }
+
+ It is possible as well to use event.misc & perf.PERF_RECORD_MISC_SWITCH_OUT
+ to figure out if this is a context switch in or out of the monitored threads.
+
+ If bored, please add command line option parsing support for these options :-)
+ """
+ # main(context_switch = 1, thread = 31463)
+ main()
diff --git a/tools/perf/scripts/Build b/tools/perf/scripts/Build
new file mode 100644
index 000000000..41efd7e36
--- /dev/null
+++ b/tools/perf/scripts/Build
@@ -0,0 +1,2 @@
+libperf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/
+libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build
new file mode 100644
index 000000000..34faecf77
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build
@@ -0,0 +1,5 @@
+libperf-y += Context.o
+
+CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes
+CFLAGS_Context.o += -Wno-unused-parameter -Wno-nested-externs -Wno-undef
+CFLAGS_Context.o += -Wno-switch-default -Wno-shadow
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.c b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
new file mode 100644
index 000000000..28431d1bb
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.c
@@ -0,0 +1,138 @@
+/*
+ * This file was generated automatically by ExtUtils::ParseXS version 2.18_02 from the
+ * contents of Context.xs. Do not edit this file, edit Context.xs instead.
+ *
+ * ANY CHANGES MADE HERE WILL BE LOST!
+ *
+ */
+#include <stdbool.h>
+#ifndef HAS_BOOL
+# define HAS_BOOL 1
+#endif
+#line 1 "Context.xs"
+/*
+ * Context.xs. XS interfaces for perf script.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+#include "../../../perf.h"
+#include "../../../util/trace-event.h"
+
+#ifndef PERL_UNUSED_VAR
+# define PERL_UNUSED_VAR(var) if (0) var = var
+#endif
+
+#line 42 "Context.c"
+
+XS(XS_Perf__Trace__Context_common_pc); /* prototype to pass -Wmissing-prototypes */
+XS(XS_Perf__Trace__Context_common_pc)
+{
+#ifdef dVAR
+ dVAR; dXSARGS;
+#else
+ dXSARGS;
+#endif
+ if (items != 1)
+ Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_pc", "context");
+ PERL_UNUSED_VAR(cv); /* -W */
+ {
+ struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
+ int RETVAL;
+ dXSTARG;
+
+ RETVAL = common_pc(context);
+ XSprePUSH; PUSHi((IV)RETVAL);
+ }
+ XSRETURN(1);
+}
+
+
+XS(XS_Perf__Trace__Context_common_flags); /* prototype to pass -Wmissing-prototypes */
+XS(XS_Perf__Trace__Context_common_flags)
+{
+#ifdef dVAR
+ dVAR; dXSARGS;
+#else
+ dXSARGS;
+#endif
+ if (items != 1)
+ Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_flags", "context");
+ PERL_UNUSED_VAR(cv); /* -W */
+ {
+ struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
+ int RETVAL;
+ dXSTARG;
+
+ RETVAL = common_flags(context);
+ XSprePUSH; PUSHi((IV)RETVAL);
+ }
+ XSRETURN(1);
+}
+
+
+XS(XS_Perf__Trace__Context_common_lock_depth); /* prototype to pass -Wmissing-prototypes */
+XS(XS_Perf__Trace__Context_common_lock_depth)
+{
+#ifdef dVAR
+ dVAR; dXSARGS;
+#else
+ dXSARGS;
+#endif
+ if (items != 1)
+ Perl_croak(aTHX_ "Usage: %s(%s)", "Perf::Trace::Context::common_lock_depth", "context");
+ PERL_UNUSED_VAR(cv); /* -W */
+ {
+ struct scripting_context * context = INT2PTR(struct scripting_context *,SvIV(ST(0)));
+ int RETVAL;
+ dXSTARG;
+
+ RETVAL = common_lock_depth(context);
+ XSprePUSH; PUSHi((IV)RETVAL);
+ }
+ XSRETURN(1);
+}
+
+#ifdef __cplusplus
+extern "C"
+#endif
+XS(boot_Perf__Trace__Context); /* prototype to pass -Wmissing-prototypes */
+XS(boot_Perf__Trace__Context)
+{
+#ifdef dVAR
+ dVAR; dXSARGS;
+#else
+ dXSARGS;
+#endif
+ const char* file = __FILE__;
+
+ PERL_UNUSED_VAR(cv); /* -W */
+ PERL_UNUSED_VAR(items); /* -W */
+ XS_VERSION_BOOTCHECK ;
+
+ newXSproto("Perf::Trace::Context::common_pc", XS_Perf__Trace__Context_common_pc, file, "$");
+ newXSproto("Perf::Trace::Context::common_flags", XS_Perf__Trace__Context_common_flags, file, "$");
+ newXSproto("Perf::Trace::Context::common_lock_depth", XS_Perf__Trace__Context_common_lock_depth, file, "$");
+ if (PL_unitcheckav)
+ call_list(PL_scopestack_ix, PL_unitcheckav);
+ XSRETURN_YES;
+}
+
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
new file mode 100644
index 000000000..8c7ea4244
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs
@@ -0,0 +1,42 @@
+/*
+ * Context.xs. XS interfaces for perf script.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include "EXTERN.h"
+#include "perl.h"
+#include "XSUB.h"
+#include "../../../perf.h"
+#include "../../../util/trace-event.h"
+
+MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context
+PROTOTYPES: ENABLE
+
+int
+common_pc(context)
+ struct scripting_context * context
+
+int
+common_flags(context)
+ struct scripting_context * context
+
+int
+common_lock_depth(context)
+ struct scripting_context * context
+
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL b/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL
new file mode 100644
index 000000000..e8994332d
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+use 5.010000;
+use ExtUtils::MakeMaker;
+# See lib/ExtUtils/MakeMaker.pm for details of how to influence
+# the contents of the Makefile that is written.
+WriteMakefile(
+ NAME => 'Perf::Trace::Context',
+ VERSION_FROM => 'lib/Perf/Trace/Context.pm', # finds $VERSION
+ PREREQ_PM => {}, # e.g., Module::Name => 1.1
+ ($] >= 5.005 ? ## Add these new keywords supported since 5.005
+ (ABSTRACT_FROM => 'lib/Perf/Trace/Context.pm', # retrieve abstract from module
+ AUTHOR => 'Tom Zanussi <tzanussi@gmail.com>') : ()),
+ LIBS => [''], # e.g., '-lm'
+ DEFINE => '-I ../..', # e.g., '-DHAVE_SOMETHING'
+ INC => '-I.', # e.g., '-I. -I/usr/include/other'
+ # Un-comment this if you add C files to link with later:
+ OBJECT => 'Context.o', # link all the C files too
+);
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/README b/tools/perf/scripts/perl/Perf-Trace-Util/README
new file mode 100644
index 000000000..2f0c7f304
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/README
@@ -0,0 +1,59 @@
+Perf-Trace-Util version 0.01
+============================
+
+This module contains utility functions for use with perf script.
+
+Core.pm and Util.pm are pure Perl modules; Core.pm contains routines
+that the core perf support for Perl calls on and should always be
+'used', while Util.pm contains useful but optional utility functions
+that scripts may want to use. Context.pm contains the Perl->C
+interface that allows scripts to access data in the embedding perf
+executable; scripts wishing to do that should 'use Context.pm'.
+
+The Perl->C perf interface is completely driven by Context.xs. If you
+want to add new Perl functions that end up accessing C data in the
+perf executable, you add desciptions of the new functions here.
+scripting_context is a pointer to the perf data in the perf executable
+that you want to access - it's passed as the second parameter,
+$context, to all handler functions.
+
+After you do that:
+
+ perl Makefile.PL # to create a Makefile for the next step
+ make # to create Context.c
+
+ edit Context.c to add const to the char* file = __FILE__ line in
+ XS(boot_Perf__Trace__Context) to silence a warning/error.
+
+ You can delete the Makefile, object files and anything else that was
+ generated e.g. blib and shared library, etc, except for of course
+ Context.c
+
+ You should then be able to run the normal perf make as usual.
+
+INSTALLATION
+
+Building perf with perf script Perl scripting should install this
+module in the right place.
+
+You should make sure libperl and ExtUtils/Embed.pm are installed first
+e.g. apt-get install libperl-dev or yum install perl-ExtUtils-Embed.
+
+DEPENDENCIES
+
+This module requires these other modules and libraries:
+
+ None
+
+COPYRIGHT AND LICENCE
+
+Copyright (C) 2009 by Tom Zanussi <tzanussi@gmail.com>
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
new file mode 100644
index 000000000..4e2f6039a
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm
@@ -0,0 +1,55 @@
+package Perf::Trace::Context;
+
+use 5.010000;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+ common_pc common_flags common_lock_depth
+);
+
+our $VERSION = '0.01';
+
+require XSLoader;
+XSLoader::load('Perf::Trace::Context', $VERSION);
+
+1;
+__END__
+=head1 NAME
+
+Perf::Trace::Context - Perl extension for accessing functions in perf.
+
+=head1 SYNOPSIS
+
+ use Perf::Trace::Context;
+
+=head1 SEE ALSO
+
+Perf (script) documentation
+
+=head1 AUTHOR
+
+Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2009 by Tom Zanussi
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
+=cut
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
new file mode 100644
index 000000000..9158458d3
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm
@@ -0,0 +1,192 @@
+package Perf::Trace::Core;
+
+use 5.010000;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+define_flag_field define_flag_value flag_str dump_flag_fields
+define_symbolic_field define_symbolic_value symbol_str dump_symbolic_fields
+trace_flag_str
+);
+
+our $VERSION = '0.01';
+
+my %trace_flags = (0x00 => "NONE",
+ 0x01 => "IRQS_OFF",
+ 0x02 => "IRQS_NOSUPPORT",
+ 0x04 => "NEED_RESCHED",
+ 0x08 => "HARDIRQ",
+ 0x10 => "SOFTIRQ");
+
+sub trace_flag_str
+{
+ my ($value) = @_;
+
+ my $string;
+
+ my $print_delim = 0;
+
+ foreach my $idx (sort {$a <=> $b} keys %trace_flags) {
+ if (!$value && !$idx) {
+ $string .= "NONE";
+ last;
+ }
+
+ if ($idx && ($value & $idx) == $idx) {
+ if ($print_delim) {
+ $string .= " | ";
+ }
+ $string .= "$trace_flags{$idx}";
+ $print_delim = 1;
+ $value &= ~$idx;
+ }
+ }
+
+ return $string;
+}
+
+my %flag_fields;
+my %symbolic_fields;
+
+sub flag_str
+{
+ my ($event_name, $field_name, $value) = @_;
+
+ my $string;
+
+ if ($flag_fields{$event_name}{$field_name}) {
+ my $print_delim = 0;
+ foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event_name}{$field_name}{"values"}}) {
+ if (!$value && !$idx) {
+ $string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}";
+ last;
+ }
+ if ($idx && ($value & $idx) == $idx) {
+ if ($print_delim && $flag_fields{$event_name}{$field_name}{'delim'}) {
+ $string .= " $flag_fields{$event_name}{$field_name}{'delim'} ";
+ }
+ $string .= "$flag_fields{$event_name}{$field_name}{'values'}{$idx}";
+ $print_delim = 1;
+ $value &= ~$idx;
+ }
+ }
+ }
+
+ return $string;
+}
+
+sub define_flag_field
+{
+ my ($event_name, $field_name, $delim) = @_;
+
+ $flag_fields{$event_name}{$field_name}{"delim"} = $delim;
+}
+
+sub define_flag_value
+{
+ my ($event_name, $field_name, $value, $field_str) = @_;
+
+ $flag_fields{$event_name}{$field_name}{"values"}{$value} = $field_str;
+}
+
+sub dump_flag_fields
+{
+ for my $event (keys %flag_fields) {
+ print "event $event:\n";
+ for my $field (keys %{$flag_fields{$event}}) {
+ print " field: $field:\n";
+ print " delim: $flag_fields{$event}{$field}{'delim'}\n";
+ foreach my $idx (sort {$a <=> $b} keys %{$flag_fields{$event}{$field}{"values"}}) {
+ print " value $idx: $flag_fields{$event}{$field}{'values'}{$idx}\n";
+ }
+ }
+ }
+}
+
+sub symbol_str
+{
+ my ($event_name, $field_name, $value) = @_;
+
+ if ($symbolic_fields{$event_name}{$field_name}) {
+ foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event_name}{$field_name}{"values"}}) {
+ if (!$value && !$idx) {
+ return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}";
+ last;
+ }
+ if ($value == $idx) {
+ return "$symbolic_fields{$event_name}{$field_name}{'values'}{$idx}";
+ }
+ }
+ }
+
+ return undef;
+}
+
+sub define_symbolic_field
+{
+ my ($event_name, $field_name) = @_;
+
+ # nothing to do, really
+}
+
+sub define_symbolic_value
+{
+ my ($event_name, $field_name, $value, $field_str) = @_;
+
+ $symbolic_fields{$event_name}{$field_name}{"values"}{$value} = $field_str;
+}
+
+sub dump_symbolic_fields
+{
+ for my $event (keys %symbolic_fields) {
+ print "event $event:\n";
+ for my $field (keys %{$symbolic_fields{$event}}) {
+ print " field: $field:\n";
+ foreach my $idx (sort {$a <=> $b} keys %{$symbolic_fields{$event}{$field}{"values"}}) {
+ print " value $idx: $symbolic_fields{$event}{$field}{'values'}{$idx}\n";
+ }
+ }
+ }
+}
+
+1;
+__END__
+=head1 NAME
+
+Perf::Trace::Core - Perl extension for perf script
+
+=head1 SYNOPSIS
+
+ use Perf::Trace::Core
+
+=head1 SEE ALSO
+
+Perf (script) documentation
+
+=head1 AUTHOR
+
+Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2009 by Tom Zanussi
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
+=cut
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
new file mode 100644
index 000000000..053500114
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm
@@ -0,0 +1,94 @@
+package Perf::Trace::Util;
+
+use 5.010000;
+use strict;
+use warnings;
+
+require Exporter;
+
+our @ISA = qw(Exporter);
+
+our %EXPORT_TAGS = ( 'all' => [ qw(
+) ] );
+
+our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
+
+our @EXPORT = qw(
+avg nsecs nsecs_secs nsecs_nsecs nsecs_usecs print_nsecs
+clear_term
+);
+
+our $VERSION = '0.01';
+
+sub avg
+{
+ my ($total, $n) = @_;
+
+ return $total / $n;
+}
+
+my $NSECS_PER_SEC = 1000000000;
+
+sub nsecs
+{
+ my ($secs, $nsecs) = @_;
+
+ return $secs * $NSECS_PER_SEC + $nsecs;
+}
+
+sub nsecs_secs {
+ my ($nsecs) = @_;
+
+ return $nsecs / $NSECS_PER_SEC;
+}
+
+sub nsecs_nsecs {
+ my ($nsecs) = @_;
+
+ return $nsecs % $NSECS_PER_SEC;
+}
+
+sub nsecs_str {
+ my ($nsecs) = @_;
+
+ my $str = sprintf("%5u.%09u", nsecs_secs($nsecs), nsecs_nsecs($nsecs));
+
+ return $str;
+}
+
+sub clear_term
+{
+ print "\x1b[H\x1b[2J";
+}
+
+1;
+__END__
+=head1 NAME
+
+Perf::Trace::Util - Perl extension for perf script
+
+=head1 SYNOPSIS
+
+ use Perf::Trace::Util;
+
+=head1 SEE ALSO
+
+Perf (script) documentation
+
+=head1 AUTHOR
+
+Tom Zanussi, E<lt>tzanussi@gmail.com<gt>
+
+=head1 COPYRIGHT AND LICENSE
+
+Copyright (C) 2009 by Tom Zanussi
+
+This library is free software; you can redistribute it and/or modify
+it under the same terms as Perl itself, either Perl version 5.10.0 or,
+at your option, any later version of Perl 5 you may have available.
+
+Alternatively, this software may be distributed under the terms of the
+GNU General Public License ("GPL") version 2 as published by the Free
+Software Foundation.
+
+=cut
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/typemap b/tools/perf/scripts/perl/Perf-Trace-Util/typemap
new file mode 100644
index 000000000..840836804
--- /dev/null
+++ b/tools/perf/scripts/perl/Perf-Trace-Util/typemap
@@ -0,0 +1 @@
+struct scripting_context * T_PTR
diff --git a/tools/perf/scripts/perl/bin/check-perf-trace-record b/tools/perf/scripts/perl/bin/check-perf-trace-record
new file mode 100644
index 000000000..423ad6aed
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/check-perf-trace-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -a -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-record b/tools/perf/scripts/perl/bin/failed-syscalls-record
new file mode 100644
index 000000000..74685f318
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-record
@@ -0,0 +1,3 @@
+#!/bin/bash
+(perf record -e raw_syscalls:sys_exit $@ || \
+ perf record -e syscalls:sys_exit $@) 2> /dev/null
diff --git a/tools/perf/scripts/perl/bin/failed-syscalls-report b/tools/perf/scripts/perl/bin/failed-syscalls-report
new file mode 100644
index 000000000..9f83cc1ad
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/failed-syscalls-report
@@ -0,0 +1,10 @@
+#!/bin/bash
+# description: system-wide failed syscalls
+# args: [comm]
+if [ $# -gt 0 ] ; then
+ if ! expr match "$1" "-" > /dev/null ; then
+ comm=$1
+ shift
+ fi
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/failed-syscalls.pl $comm
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-record b/tools/perf/scripts/perl/bin/rw-by-file-record
new file mode 100644
index 000000000..33efc8673
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rw-by-file-record
@@ -0,0 +1,3 @@
+#!/bin/bash
+perf record -e syscalls:sys_enter_read -e syscalls:sys_enter_write $@
+
diff --git a/tools/perf/scripts/perl/bin/rw-by-file-report b/tools/perf/scripts/perl/bin/rw-by-file-report
new file mode 100644
index 000000000..77200b3f3
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rw-by-file-report
@@ -0,0 +1,10 @@
+#!/bin/bash
+# description: r/w activity for a program, by file
+# args: <comm>
+if [ $# -lt 1 ] ; then
+ echo "usage: rw-by-file <comm>"
+ exit
+fi
+comm=$1
+shift
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-file.pl $comm
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-record b/tools/perf/scripts/perl/bin/rw-by-pid-record
new file mode 100644
index 000000000..7cb9db230
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
diff --git a/tools/perf/scripts/perl/bin/rw-by-pid-report b/tools/perf/scripts/perl/bin/rw-by-pid-report
new file mode 100644
index 000000000..a27b9f311
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rw-by-pid-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: system-wide r/w activity
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rw-by-pid.pl
diff --git a/tools/perf/scripts/perl/bin/rwtop-record b/tools/perf/scripts/perl/bin/rwtop-record
new file mode 100644
index 000000000..7cb9db230
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rwtop-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@
diff --git a/tools/perf/scripts/perl/bin/rwtop-report b/tools/perf/scripts/perl/bin/rwtop-report
new file mode 100644
index 000000000..83e11ec2e
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/rwtop-report
@@ -0,0 +1,20 @@
+#!/bin/bash
+# description: system-wide r/w top
+# args: [interval]
+n_args=0
+for i in "$@"
+do
+ if expr match "$i" "-" > /dev/null ; then
+ break
+ fi
+ n_args=$(( $n_args + 1 ))
+done
+if [ "$n_args" -gt 1 ] ; then
+ echo "usage: rwtop-report [interval]"
+ exit
+fi
+if [ "$n_args" -gt 0 ] ; then
+ interval=$1
+ shift
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/rwtop.pl $interval
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-record b/tools/perf/scripts/perl/bin/wakeup-latency-record
new file mode 100644
index 000000000..464251a1b
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-record
@@ -0,0 +1,6 @@
+#!/bin/bash
+perf record -e sched:sched_switch -e sched:sched_wakeup $@
+
+
+
+
diff --git a/tools/perf/scripts/perl/bin/wakeup-latency-report b/tools/perf/scripts/perl/bin/wakeup-latency-report
new file mode 100644
index 000000000..889e8130c
--- /dev/null
+++ b/tools/perf/scripts/perl/bin/wakeup-latency-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: system-wide min/max/avg wakeup latency
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/wakeup-latency.pl
diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl
new file mode 100644
index 000000000..4e7076c20
--- /dev/null
+++ b/tools/perf/scripts/perl/check-perf-trace.pl
@@ -0,0 +1,106 @@
+# perf script event handlers, generated by perf script -g perl
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# This script tests basic functionality such as flag and symbol
+# strings, common_xxx() calls back into perf, begin, end, unhandled
+# events, etc. Basically, if this script runs successfully and
+# displays expected results, perl scripting support should be ok.
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Context;
+use Perf::Trace::Util;
+
+sub trace_begin
+{
+ print "trace_begin\n";
+}
+
+sub trace_end
+{
+ print "trace_end\n";
+
+ print_unhandled();
+}
+
+sub irq::softirq_entry
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm,
+ $vec) = @_;
+
+ print_header($event_name, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm);
+
+ print_uncommon($context);
+
+ printf("vec=%s\n",
+ symbol_str("irq::softirq_entry", "vec", $vec));
+}
+
+sub kmem::kmalloc
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm,
+ $call_site, $ptr, $bytes_req, $bytes_alloc,
+ $gfp_flags) = @_;
+
+ print_header($event_name, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm);
+
+ print_uncommon($context);
+
+ printf("call_site=%p, ptr=%p, bytes_req=%u, bytes_alloc=%u, ".
+ "gfp_flags=%s\n",
+ $call_site, $ptr, $bytes_req, $bytes_alloc,
+
+ flag_str("kmem::kmalloc", "gfp_flags", $gfp_flags));
+}
+
+# print trace fields not included in handler args
+sub print_uncommon
+{
+ my ($context) = @_;
+
+ printf("common_preempt_count=%d, common_flags=%s, common_lock_depth=%d, ",
+ common_pc($context), trace_flag_str(common_flags($context)),
+ common_lock_depth($context));
+
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+ if ((scalar keys %unhandled) == 0) {
+ return;
+ }
+
+ print "\nunhandled events:\n\n";
+
+ printf("%-40s %10s\n", "event", "count");
+ printf("%-40s %10s\n", "----------------------------------------",
+ "-----------");
+
+ foreach my $event_name (keys %unhandled) {
+ printf("%-40s %10d\n", $event_name, $unhandled{$event_name});
+ }
+}
+
+sub trace_unhandled
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm) = @_;
+
+ $unhandled{$event_name}++;
+}
+
+sub print_header
+{
+ my ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;
+
+ printf("%-20s %5u %05u.%09u %8u %-20s ",
+ $event_name, $cpu, $secs, $nsecs, $pid, $comm);
+}
diff --git a/tools/perf/scripts/perl/failed-syscalls.pl b/tools/perf/scripts/perl/failed-syscalls.pl
new file mode 100644
index 000000000..55e7ae4c5
--- /dev/null
+++ b/tools/perf/scripts/perl/failed-syscalls.pl
@@ -0,0 +1,47 @@
+# failed system call counts
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Displays system-wide failed system call totals
+# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Context;
+use Perf::Trace::Util;
+
+my $for_comm = shift;
+
+my %failed_syscalls;
+
+sub raw_syscalls::sys_exit
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm,
+ $id, $ret) = @_;
+
+ if ($ret < 0) {
+ $failed_syscalls{$common_comm}++;
+ }
+}
+
+sub syscalls::sys_exit
+{
+ raw_syscalls::sys_exit(@_)
+}
+
+sub trace_end
+{
+ printf("\nfailed syscalls by comm:\n\n");
+
+ printf("%-20s %10s\n", "comm", "# errors");
+ printf("%-20s %6s %10s\n", "--------------------", "----------");
+
+ foreach my $comm (sort {$failed_syscalls{$b} <=> $failed_syscalls{$a}}
+ keys %failed_syscalls) {
+ next if ($for_comm && $comm ne $for_comm);
+
+ printf("%-20s %10s\n", $comm, $failed_syscalls{$comm});
+ }
+}
diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl
new file mode 100644
index 000000000..74844ee2b
--- /dev/null
+++ b/tools/perf/scripts/perl/rw-by-file.pl
@@ -0,0 +1,106 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Display r/w activity for files read/written to for a given program
+
+# The common_* event handler fields are the most useful fields common to
+# all events. They don't necessarily correspond to the 'common_*' fields
+# in the status files. Those fields not available as handler params can
+# be retrieved via script functions of the form get_common_*().
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+my $usage = "perf script -s rw-by-file.pl <comm>\n";
+
+my $for_comm = shift or die $usage;
+
+my %reads;
+my %writes;
+
+sub syscalls::sys_enter_read
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+
+ if ($common_comm eq $for_comm) {
+ $reads{$fd}{bytes_requested} += $count;
+ $reads{$fd}{total_reads}++;
+ }
+}
+
+sub syscalls::sys_enter_write
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+
+ if ($common_comm eq $for_comm) {
+ $writes{$fd}{bytes_written} += $count;
+ $writes{$fd}{total_writes}++;
+ }
+}
+
+sub trace_end
+{
+ printf("file read counts for $for_comm:\n\n");
+
+ printf("%6s %10s %10s\n", "fd", "# reads", "bytes_requested");
+ printf("%6s %10s %10s\n", "------", "----------", "-----------");
+
+ foreach my $fd (sort {$reads{$b}{bytes_requested} <=>
+ $reads{$a}{bytes_requested}} keys %reads) {
+ my $total_reads = $reads{$fd}{total_reads};
+ my $bytes_requested = $reads{$fd}{bytes_requested};
+ printf("%6u %10u %10u\n", $fd, $total_reads, $bytes_requested);
+ }
+
+ printf("\nfile write counts for $for_comm:\n\n");
+
+ printf("%6s %10s %10s\n", "fd", "# writes", "bytes_written");
+ printf("%6s %10s %10s\n", "------", "----------", "-----------");
+
+ foreach my $fd (sort {$writes{$b}{bytes_written} <=>
+ $writes{$a}{bytes_written}} keys %writes) {
+ my $total_writes = $writes{$fd}{total_writes};
+ my $bytes_written = $writes{$fd}{bytes_written};
+ printf("%6u %10u %10u\n", $fd, $total_writes, $bytes_written);
+ }
+
+ print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+ if ((scalar keys %unhandled) == 0) {
+ return;
+ }
+
+ print "\nunhandled events:\n\n";
+
+ printf("%-40s %10s\n", "event", "count");
+ printf("%-40s %10s\n", "----------------------------------------",
+ "-----------");
+
+ foreach my $event_name (keys %unhandled) {
+ printf("%-40s %10d\n", $event_name, $unhandled{$event_name});
+ }
+}
+
+sub trace_unhandled
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm) = @_;
+
+ $unhandled{$event_name}++;
+}
+
+
diff --git a/tools/perf/scripts/perl/rw-by-pid.pl b/tools/perf/scripts/perl/rw-by-pid.pl
new file mode 100644
index 000000000..9db23c9da
--- /dev/null
+++ b/tools/perf/scripts/perl/rw-by-pid.pl
@@ -0,0 +1,184 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Display r/w activity for all processes
+
+# The common_* event handler fields are the most useful fields common to
+# all events. They don't necessarily correspond to the 'common_*' fields
+# in the status files. Those fields not available as handler params can
+# be retrieved via script functions of the form get_common_*().
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+my %reads;
+my %writes;
+
+sub syscalls::sys_exit_read
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm,
+ $nr, $ret) = @_;
+
+ if ($ret > 0) {
+ $reads{$common_pid}{bytes_read} += $ret;
+ } else {
+ if (!defined ($reads{$common_pid}{bytes_read})) {
+ $reads{$common_pid}{bytes_read} = 0;
+ }
+ $reads{$common_pid}{errors}{$ret}++;
+ }
+}
+
+sub syscalls::sys_enter_read
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm,
+ $nr, $fd, $buf, $count) = @_;
+
+ $reads{$common_pid}{bytes_requested} += $count;
+ $reads{$common_pid}{total_reads}++;
+ $reads{$common_pid}{comm} = $common_comm;
+}
+
+sub syscalls::sys_exit_write
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm,
+ $nr, $ret) = @_;
+
+ if ($ret <= 0) {
+ $writes{$common_pid}{errors}{$ret}++;
+ }
+}
+
+sub syscalls::sys_enter_write
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm,
+ $nr, $fd, $buf, $count) = @_;
+
+ $writes{$common_pid}{bytes_written} += $count;
+ $writes{$common_pid}{total_writes}++;
+ $writes{$common_pid}{comm} = $common_comm;
+}
+
+sub trace_end
+{
+ printf("read counts by pid:\n\n");
+
+ printf("%6s %20s %10s %10s %10s\n", "pid", "comm",
+ "# reads", "bytes_requested", "bytes_read");
+ printf("%6s %-20s %10s %10s %10s\n", "------", "--------------------",
+ "-----------", "----------", "----------");
+
+ foreach my $pid (sort { ($reads{$b}{bytes_read} || 0) <=>
+ ($reads{$a}{bytes_read} || 0) } keys %reads) {
+ my $comm = $reads{$pid}{comm} || "";
+ my $total_reads = $reads{$pid}{total_reads} || 0;
+ my $bytes_requested = $reads{$pid}{bytes_requested} || 0;
+ my $bytes_read = $reads{$pid}{bytes_read} || 0;
+
+ printf("%6s %-20s %10s %10s %10s\n", $pid, $comm,
+ $total_reads, $bytes_requested, $bytes_read);
+ }
+
+ printf("\nfailed reads by pid:\n\n");
+
+ printf("%6s %20s %6s %10s\n", "pid", "comm", "error #", "# errors");
+ printf("%6s %20s %6s %10s\n", "------", "--------------------",
+ "------", "----------");
+
+ my @errcounts = ();
+
+ foreach my $pid (keys %reads) {
+ foreach my $error (keys %{$reads{$pid}{errors}}) {
+ my $comm = $reads{$pid}{comm} || "";
+ my $errcount = $reads{$pid}{errors}{$error} || 0;
+ push @errcounts, [$pid, $comm, $error, $errcount];
+ }
+ }
+
+ @errcounts = sort { $b->[3] <=> $a->[3] } @errcounts;
+
+ for my $i (0 .. $#errcounts) {
+ printf("%6d %-20s %6d %10s\n", $errcounts[$i][0],
+ $errcounts[$i][1], $errcounts[$i][2], $errcounts[$i][3]);
+ }
+
+ printf("\nwrite counts by pid:\n\n");
+
+ printf("%6s %20s %10s %10s\n", "pid", "comm",
+ "# writes", "bytes_written");
+ printf("%6s %-20s %10s %10s\n", "------", "--------------------",
+ "-----------", "----------");
+
+ foreach my $pid (sort { ($writes{$b}{bytes_written} || 0) <=>
+ ($writes{$a}{bytes_written} || 0)} keys %writes) {
+ my $comm = $writes{$pid}{comm} || "";
+ my $total_writes = $writes{$pid}{total_writes} || 0;
+ my $bytes_written = $writes{$pid}{bytes_written} || 0;
+
+ printf("%6s %-20s %10s %10s\n", $pid, $comm,
+ $total_writes, $bytes_written);
+ }
+
+ printf("\nfailed writes by pid:\n\n");
+
+ printf("%6s %20s %6s %10s\n", "pid", "comm", "error #", "# errors");
+ printf("%6s %20s %6s %10s\n", "------", "--------------------",
+ "------", "----------");
+
+ @errcounts = ();
+
+ foreach my $pid (keys %writes) {
+ foreach my $error (keys %{$writes{$pid}{errors}}) {
+ my $comm = $writes{$pid}{comm} || "";
+ my $errcount = $writes{$pid}{errors}{$error} || 0;
+ push @errcounts, [$pid, $comm, $error, $errcount];
+ }
+ }
+
+ @errcounts = sort { $b->[3] <=> $a->[3] } @errcounts;
+
+ for my $i (0 .. $#errcounts) {
+ printf("%6d %-20s %6d %10s\n", $errcounts[$i][0],
+ $errcounts[$i][1], $errcounts[$i][2], $errcounts[$i][3]);
+ }
+
+ print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+ if ((scalar keys %unhandled) == 0) {
+ return;
+ }
+
+ print "\nunhandled events:\n\n";
+
+ printf("%-40s %10s\n", "event", "count");
+ printf("%-40s %10s\n", "----------------------------------------",
+ "-----------");
+
+ foreach my $event_name (keys %unhandled) {
+ printf("%-40s %10d\n", $event_name, $unhandled{$event_name});
+ }
+}
+
+sub trace_unhandled
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm) = @_;
+
+ $unhandled{$event_name}++;
+}
diff --git a/tools/perf/scripts/perl/rwtop.pl b/tools/perf/scripts/perl/rwtop.pl
new file mode 100644
index 000000000..8b2078702
--- /dev/null
+++ b/tools/perf/scripts/perl/rwtop.pl
@@ -0,0 +1,203 @@
+#!/usr/bin/perl -w
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# read/write top
+#
+# Periodically displays system-wide r/w call activity, broken down by
+# pid. If an [interval] arg is specified, the display will be
+# refreshed every [interval] seconds. The default interval is 3
+# seconds.
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+use POSIX qw/SIGALRM SA_RESTART/;
+
+my $default_interval = 3;
+my $nlines = 20;
+my $print_thread;
+my $print_pending = 0;
+
+my %reads;
+my %writes;
+
+my $interval = shift;
+if (!$interval) {
+ $interval = $default_interval;
+}
+
+sub syscalls::sys_exit_read
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm,
+ $nr, $ret) = @_;
+
+ print_check();
+
+ if ($ret > 0) {
+ $reads{$common_pid}{bytes_read} += $ret;
+ } else {
+ if (!defined ($reads{$common_pid}{bytes_read})) {
+ $reads{$common_pid}{bytes_read} = 0;
+ }
+ $reads{$common_pid}{errors}{$ret}++;
+ }
+}
+
+sub syscalls::sys_enter_read
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm,
+ $nr, $fd, $buf, $count) = @_;
+
+ print_check();
+
+ $reads{$common_pid}{bytes_requested} += $count;
+ $reads{$common_pid}{total_reads}++;
+ $reads{$common_pid}{comm} = $common_comm;
+}
+
+sub syscalls::sys_exit_write
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm,
+ $nr, $ret) = @_;
+
+ print_check();
+
+ if ($ret <= 0) {
+ $writes{$common_pid}{errors}{$ret}++;
+ }
+}
+
+sub syscalls::sys_enter_write
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm,
+ $nr, $fd, $buf, $count) = @_;
+
+ print_check();
+
+ $writes{$common_pid}{bytes_written} += $count;
+ $writes{$common_pid}{total_writes}++;
+ $writes{$common_pid}{comm} = $common_comm;
+}
+
+sub trace_begin
+{
+ my $sa = POSIX::SigAction->new(\&set_print_pending);
+ $sa->flags(SA_RESTART);
+ $sa->safe(1);
+ POSIX::sigaction(SIGALRM, $sa) or die "Can't set SIGALRM handler: $!\n";
+ alarm 1;
+}
+
+sub trace_end
+{
+ print_unhandled();
+ print_totals();
+}
+
+sub print_check()
+{
+ if ($print_pending == 1) {
+ $print_pending = 0;
+ print_totals();
+ }
+}
+
+sub set_print_pending()
+{
+ $print_pending = 1;
+ alarm $interval;
+}
+
+sub print_totals
+{
+ my $count;
+
+ $count = 0;
+
+ clear_term();
+
+ printf("\nread counts by pid:\n\n");
+
+ printf("%6s %20s %10s %10s %10s\n", "pid", "comm",
+ "# reads", "bytes_req", "bytes_read");
+ printf("%6s %-20s %10s %10s %10s\n", "------", "--------------------",
+ "----------", "----------", "----------");
+
+ foreach my $pid (sort { ($reads{$b}{bytes_read} || 0) <=>
+ ($reads{$a}{bytes_read} || 0) } keys %reads) {
+ my $comm = $reads{$pid}{comm} || "";
+ my $total_reads = $reads{$pid}{total_reads} || 0;
+ my $bytes_requested = $reads{$pid}{bytes_requested} || 0;
+ my $bytes_read = $reads{$pid}{bytes_read} || 0;
+
+ printf("%6s %-20s %10s %10s %10s\n", $pid, $comm,
+ $total_reads, $bytes_requested, $bytes_read);
+
+ if (++$count == $nlines) {
+ last;
+ }
+ }
+
+ $count = 0;
+
+ printf("\nwrite counts by pid:\n\n");
+
+ printf("%6s %20s %10s %13s\n", "pid", "comm",
+ "# writes", "bytes_written");
+ printf("%6s %-20s %10s %13s\n", "------", "--------------------",
+ "----------", "-------------");
+
+ foreach my $pid (sort { ($writes{$b}{bytes_written} || 0) <=>
+ ($writes{$a}{bytes_written} || 0)} keys %writes) {
+ my $comm = $writes{$pid}{comm} || "";
+ my $total_writes = $writes{$pid}{total_writes} || 0;
+ my $bytes_written = $writes{$pid}{bytes_written} || 0;
+
+ printf("%6s %-20s %10s %13s\n", $pid, $comm,
+ $total_writes, $bytes_written);
+
+ if (++$count == $nlines) {
+ last;
+ }
+ }
+
+ %reads = ();
+ %writes = ();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+ if ((scalar keys %unhandled) == 0) {
+ return;
+ }
+
+ print "\nunhandled events:\n\n";
+
+ printf("%-40s %10s\n", "event", "count");
+ printf("%-40s %10s\n", "----------------------------------------",
+ "-----------");
+
+ foreach my $event_name (keys %unhandled) {
+ printf("%-40s %10d\n", $event_name, $unhandled{$event_name});
+ }
+}
+
+sub trace_unhandled
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm) = @_;
+
+ $unhandled{$event_name}++;
+}
diff --git a/tools/perf/scripts/perl/wakeup-latency.pl b/tools/perf/scripts/perl/wakeup-latency.pl
new file mode 100644
index 000000000..d9143dcec
--- /dev/null
+++ b/tools/perf/scripts/perl/wakeup-latency.pl
@@ -0,0 +1,107 @@
+#!/usr/bin/perl -w
+# (c) 2009, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+
+# Display avg/min/max wakeup latency
+
+# The common_* event handler fields are the most useful fields common to
+# all events. They don't necessarily correspond to the 'common_*' fields
+# in the status files. Those fields not available as handler params can
+# be retrieved via script functions of the form get_common_*().
+
+use 5.010000;
+use strict;
+use warnings;
+
+use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+use lib "./Perf-Trace-Util/lib";
+use Perf::Trace::Core;
+use Perf::Trace::Util;
+
+my %last_wakeup;
+
+my $max_wakeup_latency;
+my $min_wakeup_latency;
+my $total_wakeup_latency = 0;
+my $total_wakeups = 0;
+
+sub sched::sched_switch
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm,
+ $prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid,
+ $next_prio) = @_;
+
+ my $wakeup_ts = $last_wakeup{$common_cpu}{ts};
+ if ($wakeup_ts) {
+ my $switch_ts = nsecs($common_secs, $common_nsecs);
+ my $wakeup_latency = $switch_ts - $wakeup_ts;
+ if ($wakeup_latency > $max_wakeup_latency) {
+ $max_wakeup_latency = $wakeup_latency;
+ }
+ if ($wakeup_latency < $min_wakeup_latency) {
+ $min_wakeup_latency = $wakeup_latency;
+ }
+ $total_wakeup_latency += $wakeup_latency;
+ $total_wakeups++;
+ }
+ $last_wakeup{$common_cpu}{ts} = 0;
+}
+
+sub sched::sched_wakeup
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm,
+ $comm, $pid, $prio, $success, $target_cpu) = @_;
+
+ $last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs);
+}
+
+sub trace_begin
+{
+ $min_wakeup_latency = 1000000000;
+ $max_wakeup_latency = 0;
+}
+
+sub trace_end
+{
+ printf("wakeup_latency stats:\n\n");
+ print "total_wakeups: $total_wakeups\n";
+ if ($total_wakeups) {
+ printf("avg_wakeup_latency (ns): %u\n",
+ avg($total_wakeup_latency, $total_wakeups));
+ } else {
+ printf("avg_wakeup_latency (ns): N/A\n");
+ }
+ printf("min_wakeup_latency (ns): %u\n", $min_wakeup_latency);
+ printf("max_wakeup_latency (ns): %u\n", $max_wakeup_latency);
+
+ print_unhandled();
+}
+
+my %unhandled;
+
+sub print_unhandled
+{
+ if ((scalar keys %unhandled) == 0) {
+ return;
+ }
+
+ print "\nunhandled events:\n\n";
+
+ printf("%-40s %10s\n", "event", "count");
+ printf("%-40s %10s\n", "----------------------------------------",
+ "-----------");
+
+ foreach my $event_name (keys %unhandled) {
+ printf("%-40s %10d\n", $event_name, $unhandled{$event_name});
+ }
+}
+
+sub trace_unhandled
+{
+ my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
+ $common_pid, $common_comm) = @_;
+
+ $unhandled{$event_name}++;
+}
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build
new file mode 100644
index 000000000..aefc15c94
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Build
@@ -0,0 +1,3 @@
+libperf-y += Context.o
+
+CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
new file mode 100644
index 000000000..1a0d27757
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c
@@ -0,0 +1,116 @@
+/*
+ * Context.c. Python interfaces for perf script.
+ *
+ * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <Python.h>
+#include "../../../perf.h"
+#include "../../../util/trace-event.h"
+
+#if PY_MAJOR_VERSION < 3
+#define _PyCapsule_GetPointer(arg1, arg2) \
+ PyCObject_AsVoidPtr(arg1)
+
+PyMODINIT_FUNC initperf_trace_context(void);
+#else
+#define _PyCapsule_GetPointer(arg1, arg2) \
+ PyCapsule_GetPointer((arg1), (arg2))
+
+PyMODINIT_FUNC PyInit_perf_trace_context(void);
+#endif
+
+static PyObject *perf_trace_context_common_pc(PyObject *obj, PyObject *args)
+{
+ static struct scripting_context *scripting_context;
+ PyObject *context;
+ int retval;
+
+ if (!PyArg_ParseTuple(args, "O", &context))
+ return NULL;
+
+ scripting_context = _PyCapsule_GetPointer(context, NULL);
+ retval = common_pc(scripting_context);
+
+ return Py_BuildValue("i", retval);
+}
+
+static PyObject *perf_trace_context_common_flags(PyObject *obj,
+ PyObject *args)
+{
+ static struct scripting_context *scripting_context;
+ PyObject *context;
+ int retval;
+
+ if (!PyArg_ParseTuple(args, "O", &context))
+ return NULL;
+
+ scripting_context = _PyCapsule_GetPointer(context, NULL);
+ retval = common_flags(scripting_context);
+
+ return Py_BuildValue("i", retval);
+}
+
+static PyObject *perf_trace_context_common_lock_depth(PyObject *obj,
+ PyObject *args)
+{
+ static struct scripting_context *scripting_context;
+ PyObject *context;
+ int retval;
+
+ if (!PyArg_ParseTuple(args, "O", &context))
+ return NULL;
+
+ scripting_context = _PyCapsule_GetPointer(context, NULL);
+ retval = common_lock_depth(scripting_context);
+
+ return Py_BuildValue("i", retval);
+}
+
+static PyMethodDef ContextMethods[] = {
+ { "common_pc", perf_trace_context_common_pc, METH_VARARGS,
+ "Get the common preempt count event field value."},
+ { "common_flags", perf_trace_context_common_flags, METH_VARARGS,
+ "Get the common flags event field value."},
+ { "common_lock_depth", perf_trace_context_common_lock_depth,
+ METH_VARARGS, "Get the common lock depth event field value."},
+ { NULL, NULL, 0, NULL}
+};
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initperf_trace_context(void)
+{
+ (void) Py_InitModule("perf_trace_context", ContextMethods);
+}
+#else
+PyMODINIT_FUNC PyInit_perf_trace_context(void)
+{
+ static struct PyModuleDef moduledef = {
+ PyModuleDef_HEAD_INIT,
+ "perf_trace_context", /* m_name */
+ "", /* m_doc */
+ -1, /* m_size */
+ ContextMethods, /* m_methods */
+ NULL, /* m_reload */
+ NULL, /* m_traverse */
+ NULL, /* m_clear */
+ NULL, /* m_free */
+ };
+ return PyModule_Create(&moduledef);
+}
+#endif
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
new file mode 100644
index 000000000..54ace2f6b
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py
@@ -0,0 +1,116 @@
+# Core.py - Python extension for perf script, core functions
+#
+# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
+#
+# This software may be distributed under the terms of the GNU General
+# Public License ("GPL") version 2 as published by the Free Software
+# Foundation.
+
+from collections import defaultdict
+
+def autodict():
+ return defaultdict(autodict)
+
+flag_fields = autodict()
+symbolic_fields = autodict()
+
+def define_flag_field(event_name, field_name, delim):
+ flag_fields[event_name][field_name]['delim'] = delim
+
+def define_flag_value(event_name, field_name, value, field_str):
+ flag_fields[event_name][field_name]['values'][value] = field_str
+
+def define_symbolic_field(event_name, field_name):
+ # nothing to do, really
+ pass
+
+def define_symbolic_value(event_name, field_name, value, field_str):
+ symbolic_fields[event_name][field_name]['values'][value] = field_str
+
+def flag_str(event_name, field_name, value):
+ string = ""
+
+ if flag_fields[event_name][field_name]:
+ print_delim = 0
+ for idx in sorted(flag_fields[event_name][field_name]['values']):
+ if not value and not idx:
+ string += flag_fields[event_name][field_name]['values'][idx]
+ break
+ if idx and (value & idx) == idx:
+ if print_delim and flag_fields[event_name][field_name]['delim']:
+ string += " " + flag_fields[event_name][field_name]['delim'] + " "
+ string += flag_fields[event_name][field_name]['values'][idx]
+ print_delim = 1
+ value &= ~idx
+
+ return string
+
+def symbol_str(event_name, field_name, value):
+ string = ""
+
+ if symbolic_fields[event_name][field_name]:
+ for idx in sorted(symbolic_fields[event_name][field_name]['values']):
+ if not value and not idx:
+ string = symbolic_fields[event_name][field_name]['values'][idx]
+ break
+ if (value == idx):
+ string = symbolic_fields[event_name][field_name]['values'][idx]
+ break
+
+ return string
+
+trace_flags = { 0x00: "NONE", \
+ 0x01: "IRQS_OFF", \
+ 0x02: "IRQS_NOSUPPORT", \
+ 0x04: "NEED_RESCHED", \
+ 0x08: "HARDIRQ", \
+ 0x10: "SOFTIRQ" }
+
+def trace_flag_str(value):
+ string = ""
+ print_delim = 0
+
+ for idx in trace_flags:
+ if not value and not idx:
+ string += "NONE"
+ break
+
+ if idx and (value & idx) == idx:
+ if print_delim:
+ string += " | ";
+ string += trace_flags[idx]
+ print_delim = 1
+ value &= ~idx
+
+ return string
+
+
+def taskState(state):
+ states = {
+ 0 : "R",
+ 1 : "S",
+ 2 : "D",
+ 64: "DEAD"
+ }
+
+ if state not in states:
+ return "Unknown"
+
+ return states[state]
+
+
+class EventHeaders:
+ def __init__(self, common_cpu, common_secs, common_nsecs,
+ common_pid, common_comm, common_callchain):
+ self.cpu = common_cpu
+ self.secs = common_secs
+ self.nsecs = common_nsecs
+ self.pid = common_pid
+ self.comm = common_comm
+ self.callchain = common_callchain
+
+ def ts(self):
+ return (self.secs * (10 ** 9)) + self.nsecs
+
+ def ts_format(self):
+ return "%d.%d" % (self.secs, int(self.nsecs / 1000))
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
new file mode 100755
index 000000000..21a7a1298
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py
@@ -0,0 +1,97 @@
+# EventClass.py
+# SPDX-License-Identifier: GPL-2.0
+#
+# This is a library defining some events types classes, which could
+# be used by other scripts to analyzing the perf samples.
+#
+# Currently there are just a few classes defined for examples,
+# PerfEvent is the base class for all perf event sample, PebsEvent
+# is a HW base Intel x86 PEBS event, and user could add more SW/HW
+# event classes based on requirements.
+from __future__ import print_function
+
+import struct
+
+# Event types, user could add more here
+EVTYPE_GENERIC = 0
+EVTYPE_PEBS = 1 # Basic PEBS event
+EVTYPE_PEBS_LL = 2 # PEBS event with load latency info
+EVTYPE_IBS = 3
+
+#
+# Currently we don't have good way to tell the event type, but by
+# the size of raw buffer, raw PEBS event with load latency data's
+# size is 176 bytes, while the pure PEBS event's size is 144 bytes.
+#
+def create_event(name, comm, dso, symbol, raw_buf):
+ if (len(raw_buf) == 144):
+ event = PebsEvent(name, comm, dso, symbol, raw_buf)
+ elif (len(raw_buf) == 176):
+ event = PebsNHM(name, comm, dso, symbol, raw_buf)
+ else:
+ event = PerfEvent(name, comm, dso, symbol, raw_buf)
+
+ return event
+
+class PerfEvent(object):
+ event_num = 0
+ def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_GENERIC):
+ self.name = name
+ self.comm = comm
+ self.dso = dso
+ self.symbol = symbol
+ self.raw_buf = raw_buf
+ self.ev_type = ev_type
+ PerfEvent.event_num += 1
+
+ def show(self):
+ print("PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" %
+ (self.name, self.symbol, self.comm, self.dso))
+
+#
+# Basic Intel PEBS (Precise Event-based Sampling) event, whose raw buffer
+# contains the context info when that event happened: the EFLAGS and
+# linear IP info, as well as all the registers.
+#
+class PebsEvent(PerfEvent):
+ pebs_num = 0
+ def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_PEBS):
+ tmp_buf=raw_buf[0:80]
+ flags, ip, ax, bx, cx, dx, si, di, bp, sp = struct.unpack('QQQQQQQQQQ', tmp_buf)
+ self.flags = flags
+ self.ip = ip
+ self.ax = ax
+ self.bx = bx
+ self.cx = cx
+ self.dx = dx
+ self.si = si
+ self.di = di
+ self.bp = bp
+ self.sp = sp
+
+ PerfEvent.__init__(self, name, comm, dso, symbol, raw_buf, ev_type)
+ PebsEvent.pebs_num += 1
+ del tmp_buf
+
+#
+# Intel Nehalem and Westmere support PEBS plus Load Latency info which lie
+# in the four 64 bit words write after the PEBS data:
+# Status: records the IA32_PERF_GLOBAL_STATUS register value
+# DLA: Data Linear Address (EIP)
+# DSE: Data Source Encoding, where the latency happens, hit or miss
+# in L1/L2/L3 or IO operations
+# LAT: the actual latency in cycles
+#
+class PebsNHM(PebsEvent):
+ pebs_nhm_num = 0
+ def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_PEBS_LL):
+ tmp_buf=raw_buf[144:176]
+ status, dla, dse, lat = struct.unpack('QQQQ', tmp_buf)
+ self.status = status
+ self.dla = dla
+ self.dse = dse
+ self.lat = lat
+
+ PebsEvent.__init__(self, name, comm, dso, symbol, raw_buf, ev_type)
+ PebsNHM.pebs_nhm_num += 1
+ del tmp_buf
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
new file mode 100644
index 000000000..cac7b2542
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py
@@ -0,0 +1,184 @@
+# SchedGui.py - Python extension for perf script, basic GUI code for
+# traces drawing and overview.
+#
+# Copyright (C) 2010 by Frederic Weisbecker <fweisbec@gmail.com>
+#
+# This software is distributed under the terms of the GNU General
+# Public License ("GPL") version 2 as published by the Free Software
+# Foundation.
+
+
+try:
+ import wx
+except ImportError:
+ raise ImportError("You need to install the wxpython lib for this script")
+
+
+class RootFrame(wx.Frame):
+ Y_OFFSET = 100
+ RECT_HEIGHT = 100
+ RECT_SPACE = 50
+ EVENT_MARKING_WIDTH = 5
+
+ def __init__(self, sched_tracer, title, parent = None, id = -1):
+ wx.Frame.__init__(self, parent, id, title)
+
+ (self.screen_width, self.screen_height) = wx.GetDisplaySize()
+ self.screen_width -= 10
+ self.screen_height -= 10
+ self.zoom = 0.5
+ self.scroll_scale = 20
+ self.sched_tracer = sched_tracer
+ self.sched_tracer.set_root_win(self)
+ (self.ts_start, self.ts_end) = sched_tracer.interval()
+ self.update_width_virtual()
+ self.nr_rects = sched_tracer.nr_rectangles() + 1
+ self.height_virtual = RootFrame.Y_OFFSET + (self.nr_rects * (RootFrame.RECT_HEIGHT + RootFrame.RECT_SPACE))
+
+ # whole window panel
+ self.panel = wx.Panel(self, size=(self.screen_width, self.screen_height))
+
+ # scrollable container
+ self.scroll = wx.ScrolledWindow(self.panel)
+ self.scroll.SetScrollbars(self.scroll_scale, self.scroll_scale, self.width_virtual / self.scroll_scale, self.height_virtual / self.scroll_scale)
+ self.scroll.EnableScrolling(True, True)
+ self.scroll.SetFocus()
+
+ # scrollable drawing area
+ self.scroll_panel = wx.Panel(self.scroll, size=(self.screen_width - 15, self.screen_height / 2))
+ self.scroll_panel.Bind(wx.EVT_PAINT, self.on_paint)
+ self.scroll_panel.Bind(wx.EVT_KEY_DOWN, self.on_key_press)
+ self.scroll_panel.Bind(wx.EVT_LEFT_DOWN, self.on_mouse_down)
+ self.scroll.Bind(wx.EVT_PAINT, self.on_paint)
+ self.scroll.Bind(wx.EVT_KEY_DOWN, self.on_key_press)
+ self.scroll.Bind(wx.EVT_LEFT_DOWN, self.on_mouse_down)
+
+ self.scroll.Fit()
+ self.Fit()
+
+ self.scroll_panel.SetDimensions(-1, -1, self.width_virtual, self.height_virtual, wx.SIZE_USE_EXISTING)
+
+ self.txt = None
+
+ self.Show(True)
+
+ def us_to_px(self, val):
+ return val / (10 ** 3) * self.zoom
+
+ def px_to_us(self, val):
+ return (val / self.zoom) * (10 ** 3)
+
+ def scroll_start(self):
+ (x, y) = self.scroll.GetViewStart()
+ return (x * self.scroll_scale, y * self.scroll_scale)
+
+ def scroll_start_us(self):
+ (x, y) = self.scroll_start()
+ return self.px_to_us(x)
+
+ def paint_rectangle_zone(self, nr, color, top_color, start, end):
+ offset_px = self.us_to_px(start - self.ts_start)
+ width_px = self.us_to_px(end - self.ts_start)
+
+ offset_py = RootFrame.Y_OFFSET + (nr * (RootFrame.RECT_HEIGHT + RootFrame.RECT_SPACE))
+ width_py = RootFrame.RECT_HEIGHT
+
+ dc = self.dc
+
+ if top_color is not None:
+ (r, g, b) = top_color
+ top_color = wx.Colour(r, g, b)
+ brush = wx.Brush(top_color, wx.SOLID)
+ dc.SetBrush(brush)
+ dc.DrawRectangle(offset_px, offset_py, width_px, RootFrame.EVENT_MARKING_WIDTH)
+ width_py -= RootFrame.EVENT_MARKING_WIDTH
+ offset_py += RootFrame.EVENT_MARKING_WIDTH
+
+ (r ,g, b) = color
+ color = wx.Colour(r, g, b)
+ brush = wx.Brush(color, wx.SOLID)
+ dc.SetBrush(brush)
+ dc.DrawRectangle(offset_px, offset_py, width_px, width_py)
+
+ def update_rectangles(self, dc, start, end):
+ start += self.ts_start
+ end += self.ts_start
+ self.sched_tracer.fill_zone(start, end)
+
+ def on_paint(self, event):
+ dc = wx.PaintDC(self.scroll_panel)
+ self.dc = dc
+
+ width = min(self.width_virtual, self.screen_width)
+ (x, y) = self.scroll_start()
+ start = self.px_to_us(x)
+ end = self.px_to_us(x + width)
+ self.update_rectangles(dc, start, end)
+
+ def rect_from_ypixel(self, y):
+ y -= RootFrame.Y_OFFSET
+ rect = y / (RootFrame.RECT_HEIGHT + RootFrame.RECT_SPACE)
+ height = y % (RootFrame.RECT_HEIGHT + RootFrame.RECT_SPACE)
+
+ if rect < 0 or rect > self.nr_rects - 1 or height > RootFrame.RECT_HEIGHT:
+ return -1
+
+ return rect
+
+ def update_summary(self, txt):
+ if self.txt:
+ self.txt.Destroy()
+ self.txt = wx.StaticText(self.panel, -1, txt, (0, (self.screen_height / 2) + 50))
+
+
+ def on_mouse_down(self, event):
+ (x, y) = event.GetPositionTuple()
+ rect = self.rect_from_ypixel(y)
+ if rect == -1:
+ return
+
+ t = self.px_to_us(x) + self.ts_start
+
+ self.sched_tracer.mouse_down(rect, t)
+
+
+ def update_width_virtual(self):
+ self.width_virtual = self.us_to_px(self.ts_end - self.ts_start)
+
+ def __zoom(self, x):
+ self.update_width_virtual()
+ (xpos, ypos) = self.scroll.GetViewStart()
+ xpos = self.us_to_px(x) / self.scroll_scale
+ self.scroll.SetScrollbars(self.scroll_scale, self.scroll_scale, self.width_virtual / self.scroll_scale, self.height_virtual / self.scroll_scale, xpos, ypos)
+ self.Refresh()
+
+ def zoom_in(self):
+ x = self.scroll_start_us()
+ self.zoom *= 2
+ self.__zoom(x)
+
+ def zoom_out(self):
+ x = self.scroll_start_us()
+ self.zoom /= 2
+ self.__zoom(x)
+
+
+ def on_key_press(self, event):
+ key = event.GetRawKeyCode()
+ if key == ord("+"):
+ self.zoom_in()
+ return
+ if key == ord("-"):
+ self.zoom_out()
+ return
+
+ key = event.GetKeyCode()
+ (x, y) = self.scroll.GetViewStart()
+ if key == wx.WXK_RIGHT:
+ self.scroll.Scroll(x + 1, y)
+ elif key == wx.WXK_LEFT:
+ self.scroll.Scroll(x - 1, y)
+ elif key == wx.WXK_DOWN:
+ self.scroll.Scroll(x, y + 1)
+ elif key == wx.WXK_UP:
+ self.scroll.Scroll(x, y - 1)
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
new file mode 100644
index 000000000..7384dcb62
--- /dev/null
+++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py
@@ -0,0 +1,91 @@
+# Util.py - Python extension for perf script, miscellaneous utility code
+#
+# Copyright (C) 2010 by Tom Zanussi <tzanussi@gmail.com>
+#
+# This software may be distributed under the terms of the GNU General
+# Public License ("GPL") version 2 as published by the Free Software
+# Foundation.
+from __future__ import print_function
+
+import errno, os
+
+FUTEX_WAIT = 0
+FUTEX_WAKE = 1
+FUTEX_PRIVATE_FLAG = 128
+FUTEX_CLOCK_REALTIME = 256
+FUTEX_CMD_MASK = ~(FUTEX_PRIVATE_FLAG | FUTEX_CLOCK_REALTIME)
+
+NSECS_PER_SEC = 1000000000
+
+def avg(total, n):
+ return total / n
+
+def nsecs(secs, nsecs):
+ return secs * NSECS_PER_SEC + nsecs
+
+def nsecs_secs(nsecs):
+ return nsecs / NSECS_PER_SEC
+
+def nsecs_nsecs(nsecs):
+ return nsecs % NSECS_PER_SEC
+
+def nsecs_str(nsecs):
+ str = "%5u.%09u" % (nsecs_secs(nsecs), nsecs_nsecs(nsecs)),
+ return str
+
+def add_stats(dict, key, value):
+ if key not in dict:
+ dict[key] = (value, value, value, 1)
+ else:
+ min, max, avg, count = dict[key]
+ if value < min:
+ min = value
+ if value > max:
+ max = value
+ avg = (avg + value) / 2
+ dict[key] = (min, max, avg, count + 1)
+
+def clear_term():
+ print("\x1b[H\x1b[2J")
+
+audit_package_warned = False
+
+try:
+ import audit
+ machine_to_id = {
+ 'x86_64': audit.MACH_86_64,
+ 'alpha' : audit.MACH_ALPHA,
+ 'ia64' : audit.MACH_IA64,
+ 'ppc' : audit.MACH_PPC,
+ 'ppc64' : audit.MACH_PPC64,
+ 'ppc64le' : audit.MACH_PPC64LE,
+ 's390' : audit.MACH_S390,
+ 's390x' : audit.MACH_S390X,
+ 'i386' : audit.MACH_X86,
+ 'i586' : audit.MACH_X86,
+ 'i686' : audit.MACH_X86,
+ }
+ try:
+ machine_to_id['armeb'] = audit.MACH_ARMEB
+ except:
+ pass
+ machine_id = machine_to_id[os.uname()[4]]
+except:
+ if not audit_package_warned:
+ audit_package_warned = True
+ print("Install the audit-libs-python package to get syscall names.\n"
+ "For example:\n # apt-get install python-audit (Ubuntu)"
+ "\n # yum install audit-libs-python (Fedora)"
+ "\n etc.\n")
+
+def syscall_name(id):
+ try:
+ return audit.audit_syscall_to_name(id, machine_id)
+ except:
+ return str(id)
+
+def strerror(nr):
+ try:
+ return errno.errorcode[abs(nr)]
+ except:
+ return "Unknown %d errno" % nr
diff --git a/tools/perf/scripts/python/bin/compaction-times-record b/tools/perf/scripts/python/bin/compaction-times-record
new file mode 100644
index 000000000..6edcd40e1
--- /dev/null
+++ b/tools/perf/scripts/python/bin/compaction-times-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e compaction:mm_compaction_begin -e compaction:mm_compaction_end -e compaction:mm_compaction_migratepages -e compaction:mm_compaction_isolate_migratepages -e compaction:mm_compaction_isolate_freepages $@
diff --git a/tools/perf/scripts/python/bin/compaction-times-report b/tools/perf/scripts/python/bin/compaction-times-report
new file mode 100644
index 000000000..3dc13897c
--- /dev/null
+++ b/tools/perf/scripts/python/bin/compaction-times-report
@@ -0,0 +1,4 @@
+#!/bin/bash
+#description: display time taken by mm compaction
+#args: [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex]
+perf script -s "$PERF_EXEC_PATH"/scripts/python/compaction-times.py $@
diff --git a/tools/perf/scripts/python/bin/event_analyzing_sample-record b/tools/perf/scripts/python/bin/event_analyzing_sample-record
new file mode 100644
index 000000000..5ce652dab
--- /dev/null
+++ b/tools/perf/scripts/python/bin/event_analyzing_sample-record
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+#
+# event_analyzing_sample.py can cover all type of perf samples including
+# the tracepoints, so no special record requirements, just record what
+# you want to analyze.
+#
+perf record $@
diff --git a/tools/perf/scripts/python/bin/event_analyzing_sample-report b/tools/perf/scripts/python/bin/event_analyzing_sample-report
new file mode 100644
index 000000000..0941fc94e
--- /dev/null
+++ b/tools/perf/scripts/python/bin/event_analyzing_sample-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: analyze all perf samples
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/event_analyzing_sample.py
diff --git a/tools/perf/scripts/python/bin/export-to-postgresql-record b/tools/perf/scripts/python/bin/export-to-postgresql-record
new file mode 100644
index 000000000..221d66e05
--- /dev/null
+++ b/tools/perf/scripts/python/bin/export-to-postgresql-record
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+#
+# export perf data to a postgresql database. Can cover
+# perf ip samples (excluding the tracepoints). No special
+# record requirements, just record what you want to export.
+#
+perf record $@
diff --git a/tools/perf/scripts/python/bin/export-to-postgresql-report b/tools/perf/scripts/python/bin/export-to-postgresql-report
new file mode 100644
index 000000000..cd335b6e2
--- /dev/null
+++ b/tools/perf/scripts/python/bin/export-to-postgresql-report
@@ -0,0 +1,29 @@
+#!/bin/bash
+# description: export perf data to a postgresql database
+# args: [database name] [columns] [calls]
+n_args=0
+for i in "$@"
+do
+ if expr match "$i" "-" > /dev/null ; then
+ break
+ fi
+ n_args=$(( $n_args + 1 ))
+done
+if [ "$n_args" -gt 3 ] ; then
+ echo "usage: export-to-postgresql-report [database name] [columns] [calls]"
+ exit
+fi
+if [ "$n_args" -gt 2 ] ; then
+ dbname=$1
+ columns=$2
+ calls=$3
+ shift 3
+elif [ "$n_args" -gt 1 ] ; then
+ dbname=$1
+ columns=$2
+ shift 2
+elif [ "$n_args" -gt 0 ] ; then
+ dbname=$1
+ shift
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/export-to-postgresql.py $dbname $columns $calls
diff --git a/tools/perf/scripts/python/bin/export-to-sqlite-record b/tools/perf/scripts/python/bin/export-to-sqlite-record
new file mode 100644
index 000000000..070204fd6
--- /dev/null
+++ b/tools/perf/scripts/python/bin/export-to-sqlite-record
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+#
+# export perf data to a sqlite3 database. Can cover
+# perf ip samples (excluding the tracepoints). No special
+# record requirements, just record what you want to export.
+#
+perf record $@
diff --git a/tools/perf/scripts/python/bin/export-to-sqlite-report b/tools/perf/scripts/python/bin/export-to-sqlite-report
new file mode 100644
index 000000000..5ff6033e7
--- /dev/null
+++ b/tools/perf/scripts/python/bin/export-to-sqlite-report
@@ -0,0 +1,29 @@
+#!/bin/bash
+# description: export perf data to a sqlite3 database
+# args: [database name] [columns] [calls]
+n_args=0
+for i in "$@"
+do
+ if expr match "$i" "-" > /dev/null ; then
+ break
+ fi
+ n_args=$(( $n_args + 1 ))
+done
+if [ "$n_args" -gt 3 ] ; then
+ echo "usage: export-to-sqlite-report [database name] [columns] [calls]"
+ exit
+fi
+if [ "$n_args" -gt 2 ] ; then
+ dbname=$1
+ columns=$2
+ calls=$3
+ shift 3
+elif [ "$n_args" -gt 1 ] ; then
+ dbname=$1
+ columns=$2
+ shift 2
+elif [ "$n_args" -gt 0 ] ; then
+ dbname=$1
+ shift
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/export-to-sqlite.py $dbname $columns $calls
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
new file mode 100644
index 000000000..74685f318
--- /dev/null
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record
@@ -0,0 +1,3 @@
+#!/bin/bash
+(perf record -e raw_syscalls:sys_exit $@ || \
+ perf record -e syscalls:sys_exit $@) 2> /dev/null
diff --git a/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
new file mode 100644
index 000000000..fda5096d0
--- /dev/null
+++ b/tools/perf/scripts/python/bin/failed-syscalls-by-pid-report
@@ -0,0 +1,10 @@
+#!/bin/bash
+# description: system-wide failed syscalls, by pid
+# args: [comm]
+if [ $# -gt 0 ] ; then
+ if ! expr match "$1" "-" > /dev/null ; then
+ comm=$1
+ shift
+ fi
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/failed-syscalls-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/futex-contention-record b/tools/perf/scripts/python/bin/futex-contention-record
new file mode 100644
index 000000000..b1495c9a9
--- /dev/null
+++ b/tools/perf/scripts/python/bin/futex-contention-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e syscalls:sys_enter_futex -e syscalls:sys_exit_futex $@
diff --git a/tools/perf/scripts/python/bin/futex-contention-report b/tools/perf/scripts/python/bin/futex-contention-report
new file mode 100644
index 000000000..6c4427109
--- /dev/null
+++ b/tools/perf/scripts/python/bin/futex-contention-report
@@ -0,0 +1,4 @@
+#!/bin/bash
+# description: futext contention measurement
+
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/futex-contention.py
diff --git a/tools/perf/scripts/python/bin/intel-pt-events-record b/tools/perf/scripts/python/bin/intel-pt-events-record
new file mode 100644
index 000000000..10fe2b697
--- /dev/null
+++ b/tools/perf/scripts/python/bin/intel-pt-events-record
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+#
+# print Intel PT Power Events and PTWRITE. The intel_pt PMU event needs
+# to be specified with appropriate config terms.
+#
+if ! echo "$@" | grep -q intel_pt ; then
+ echo "Options must include the Intel PT event e.g. -e intel_pt/pwr_evt,ptw/"
+ echo "and for power events it probably needs to be system wide i.e. -a option"
+ echo "For example: -a -e intel_pt/pwr_evt,branch=0/ sleep 1"
+ exit 1
+fi
+perf record $@
diff --git a/tools/perf/scripts/python/bin/intel-pt-events-report b/tools/perf/scripts/python/bin/intel-pt-events-report
new file mode 100644
index 000000000..9a9c92fcd
--- /dev/null
+++ b/tools/perf/scripts/python/bin/intel-pt-events-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: print Intel PT Power Events and PTWRITE
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/intel-pt-events.py \ No newline at end of file
diff --git a/tools/perf/scripts/python/bin/mem-phys-addr-record b/tools/perf/scripts/python/bin/mem-phys-addr-record
new file mode 100644
index 000000000..5a875122a
--- /dev/null
+++ b/tools/perf/scripts/python/bin/mem-phys-addr-record
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+#
+# Profiling physical memory by all retired load instructions/uops event
+# MEM_INST_RETIRED.ALL_LOADS or MEM_UOPS_RETIRED.ALL_LOADS
+#
+
+load=`perf list | grep mem_inst_retired.all_loads`
+if [ -z "$load" ]; then
+ load=`perf list | grep mem_uops_retired.all_loads`
+fi
+if [ -z "$load" ]; then
+ echo "There is no event to count all retired load instructions/uops."
+ exit 1
+fi
+
+arg=$(echo $load | tr -d ' ')
+arg="$arg:P"
+perf record --phys-data -e $arg $@
diff --git a/tools/perf/scripts/python/bin/mem-phys-addr-report b/tools/perf/scripts/python/bin/mem-phys-addr-report
new file mode 100644
index 000000000..3f2b847e2
--- /dev/null
+++ b/tools/perf/scripts/python/bin/mem-phys-addr-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: resolve physical address samples
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/mem-phys-addr.py
diff --git a/tools/perf/scripts/python/bin/net_dropmonitor-record b/tools/perf/scripts/python/bin/net_dropmonitor-record
new file mode 100755
index 000000000..423fb81da
--- /dev/null
+++ b/tools/perf/scripts/python/bin/net_dropmonitor-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e skb:kfree_skb $@
diff --git a/tools/perf/scripts/python/bin/net_dropmonitor-report b/tools/perf/scripts/python/bin/net_dropmonitor-report
new file mode 100755
index 000000000..8d698f5a0
--- /dev/null
+++ b/tools/perf/scripts/python/bin/net_dropmonitor-report
@@ -0,0 +1,4 @@
+#!/bin/bash
+# description: display a table of dropped frames
+
+perf script -s "$PERF_EXEC_PATH"/scripts/python/net_dropmonitor.py $@
diff --git a/tools/perf/scripts/python/bin/netdev-times-record b/tools/perf/scripts/python/bin/netdev-times-record
new file mode 100644
index 000000000..558754b84
--- /dev/null
+++ b/tools/perf/scripts/python/bin/netdev-times-record
@@ -0,0 +1,8 @@
+#!/bin/bash
+perf record -e net:net_dev_xmit -e net:net_dev_queue \
+ -e net:netif_receive_skb -e net:netif_rx \
+ -e skb:consume_skb -e skb:kfree_skb \
+ -e skb:skb_copy_datagram_iovec -e napi:napi_poll \
+ -e irq:irq_handler_entry -e irq:irq_handler_exit \
+ -e irq:softirq_entry -e irq:softirq_exit \
+ -e irq:softirq_raise $@
diff --git a/tools/perf/scripts/python/bin/netdev-times-report b/tools/perf/scripts/python/bin/netdev-times-report
new file mode 100644
index 000000000..8f759291d
--- /dev/null
+++ b/tools/perf/scripts/python/bin/netdev-times-report
@@ -0,0 +1,5 @@
+#!/bin/bash
+# description: display a process of packet and processing time
+# args: [tx] [rx] [dev=] [debug]
+
+perf script -s "$PERF_EXEC_PATH"/scripts/python/netdev-times.py $@
diff --git a/tools/perf/scripts/python/bin/powerpc-hcalls-record b/tools/perf/scripts/python/bin/powerpc-hcalls-record
new file mode 100644
index 000000000..b7402aa91
--- /dev/null
+++ b/tools/perf/scripts/python/bin/powerpc-hcalls-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e "{powerpc:hcall_entry,powerpc:hcall_exit}" $@
diff --git a/tools/perf/scripts/python/bin/powerpc-hcalls-report b/tools/perf/scripts/python/bin/powerpc-hcalls-report
new file mode 100644
index 000000000..dd32ad746
--- /dev/null
+++ b/tools/perf/scripts/python/bin/powerpc-hcalls-report
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/powerpc-hcalls.py
diff --git a/tools/perf/scripts/python/bin/sched-migration-record b/tools/perf/scripts/python/bin/sched-migration-record
new file mode 100644
index 000000000..7493fddbe
--- /dev/null
+++ b/tools/perf/scripts/python/bin/sched-migration-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -m 16384 -e sched:sched_wakeup -e sched:sched_wakeup_new -e sched:sched_switch -e sched:sched_migrate_task $@
diff --git a/tools/perf/scripts/python/bin/sched-migration-report b/tools/perf/scripts/python/bin/sched-migration-report
new file mode 100644
index 000000000..68b037a18
--- /dev/null
+++ b/tools/perf/scripts/python/bin/sched-migration-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: sched migration overview
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sched-migration.py
diff --git a/tools/perf/scripts/python/bin/sctop-record b/tools/perf/scripts/python/bin/sctop-record
new file mode 100644
index 000000000..d6940841e
--- /dev/null
+++ b/tools/perf/scripts/python/bin/sctop-record
@@ -0,0 +1,3 @@
+#!/bin/bash
+(perf record -e raw_syscalls:sys_enter $@ || \
+ perf record -e syscalls:sys_enter $@) 2> /dev/null
diff --git a/tools/perf/scripts/python/bin/sctop-report b/tools/perf/scripts/python/bin/sctop-report
new file mode 100644
index 000000000..c32db2941
--- /dev/null
+++ b/tools/perf/scripts/python/bin/sctop-report
@@ -0,0 +1,24 @@
+#!/bin/bash
+# description: syscall top
+# args: [comm] [interval]
+n_args=0
+for i in "$@"
+do
+ if expr match "$i" "-" > /dev/null ; then
+ break
+ fi
+ n_args=$(( $n_args + 1 ))
+done
+if [ "$n_args" -gt 2 ] ; then
+ echo "usage: sctop-report [comm] [interval]"
+ exit
+fi
+if [ "$n_args" -gt 1 ] ; then
+ comm=$1
+ interval=$2
+ shift 2
+elif [ "$n_args" -gt 0 ] ; then
+ interval=$1
+ shift
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/sctop.py $comm $interval
diff --git a/tools/perf/scripts/python/bin/stackcollapse-record b/tools/perf/scripts/python/bin/stackcollapse-record
new file mode 100755
index 000000000..9d8f9f0f3
--- /dev/null
+++ b/tools/perf/scripts/python/bin/stackcollapse-record
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+#
+# stackcollapse.py can cover all type of perf samples including
+# the tracepoints, so no special record requirements, just record what
+# you want to analyze.
+#
+perf record "$@"
diff --git a/tools/perf/scripts/python/bin/stackcollapse-report b/tools/perf/scripts/python/bin/stackcollapse-report
new file mode 100755
index 000000000..356b96563
--- /dev/null
+++ b/tools/perf/scripts/python/bin/stackcollapse-report
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: produce callgraphs in short form for scripting use
+perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@"
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-record b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record
new file mode 100644
index 000000000..d6940841e
--- /dev/null
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-record
@@ -0,0 +1,3 @@
+#!/bin/bash
+(perf record -e raw_syscalls:sys_enter $@ || \
+ perf record -e syscalls:sys_enter $@) 2> /dev/null
diff --git a/tools/perf/scripts/python/bin/syscall-counts-by-pid-report b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
new file mode 100644
index 000000000..16eb8d65c
--- /dev/null
+++ b/tools/perf/scripts/python/bin/syscall-counts-by-pid-report
@@ -0,0 +1,10 @@
+#!/bin/bash
+# description: system-wide syscall counts, by pid
+# args: [comm]
+if [ $# -gt 0 ] ; then
+ if ! expr match "$1" "-" > /dev/null ; then
+ comm=$1
+ shift
+ fi
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts-by-pid.py $comm
diff --git a/tools/perf/scripts/python/bin/syscall-counts-record b/tools/perf/scripts/python/bin/syscall-counts-record
new file mode 100644
index 000000000..d6940841e
--- /dev/null
+++ b/tools/perf/scripts/python/bin/syscall-counts-record
@@ -0,0 +1,3 @@
+#!/bin/bash
+(perf record -e raw_syscalls:sys_enter $@ || \
+ perf record -e syscalls:sys_enter $@) 2> /dev/null
diff --git a/tools/perf/scripts/python/bin/syscall-counts-report b/tools/perf/scripts/python/bin/syscall-counts-report
new file mode 100644
index 000000000..0f0e9d453
--- /dev/null
+++ b/tools/perf/scripts/python/bin/syscall-counts-report
@@ -0,0 +1,10 @@
+#!/bin/bash
+# description: system-wide syscall counts
+# args: [comm]
+if [ $# -gt 0 ] ; then
+ if ! expr match "$1" "-" > /dev/null ; then
+ comm=$1
+ shift
+ fi
+fi
+perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/syscall-counts.py $comm
diff --git a/tools/perf/scripts/python/call-graph-from-sql.py b/tools/perf/scripts/python/call-graph-from-sql.py
new file mode 100644
index 000000000..b494a67a1
--- /dev/null
+++ b/tools/perf/scripts/python/call-graph-from-sql.py
@@ -0,0 +1,339 @@
+#!/usr/bin/python2
+# call-graph-from-sql.py: create call-graph from sql database
+# Copyright (c) 2014-2017, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+
+# To use this script you will need to have exported data using either the
+# export-to-sqlite.py or the export-to-postgresql.py script. Refer to those
+# scripts for details.
+#
+# Following on from the example in the export scripts, a
+# call-graph can be displayed for the pt_example database like this:
+#
+# python tools/perf/scripts/python/call-graph-from-sql.py pt_example
+#
+# Note that for PostgreSQL, this script supports connecting to remote databases
+# by setting hostname, port, username, password, and dbname e.g.
+#
+# python tools/perf/scripts/python/call-graph-from-sql.py "hostname=myhost username=myuser password=mypassword dbname=pt_example"
+#
+# The result is a GUI window with a tree representing a context-sensitive
+# call-graph. Expanding a couple of levels of the tree and adjusting column
+# widths to suit will display something like:
+#
+# Call Graph: pt_example
+# Call Path Object Count Time(ns) Time(%) Branch Count Branch Count(%)
+# v- ls
+# v- 2638:2638
+# v- _start ld-2.19.so 1 10074071 100.0 211135 100.0
+# |- unknown unknown 1 13198 0.1 1 0.0
+# >- _dl_start ld-2.19.so 1 1400980 13.9 19637 9.3
+# >- _d_linit_internal ld-2.19.so 1 448152 4.4 11094 5.3
+# v-__libc_start_main@plt ls 1 8211741 81.5 180397 85.4
+# >- _dl_fixup ld-2.19.so 1 7607 0.1 108 0.1
+# >- __cxa_atexit libc-2.19.so 1 11737 0.1 10 0.0
+# >- __libc_csu_init ls 1 10354 0.1 10 0.0
+# |- _setjmp libc-2.19.so 1 0 0.0 4 0.0
+# v- main ls 1 8182043 99.6 180254 99.9
+#
+# Points to note:
+# The top level is a command name (comm)
+# The next level is a thread (pid:tid)
+# Subsequent levels are functions
+# 'Count' is the number of calls
+# 'Time' is the elapsed time until the function returns
+# Percentages are relative to the level above
+# 'Branch Count' is the total number of branches for that function and all
+# functions that it calls
+
+import sys
+from PySide.QtCore import *
+from PySide.QtGui import *
+from PySide.QtSql import *
+from decimal import *
+
+class TreeItem():
+
+ def __init__(self, db, row, parent_item):
+ self.db = db
+ self.row = row
+ self.parent_item = parent_item
+ self.query_done = False;
+ self.child_count = 0
+ self.child_items = []
+ self.data = ["", "", "", "", "", "", ""]
+ self.comm_id = 0
+ self.thread_id = 0
+ self.call_path_id = 1
+ self.branch_count = 0
+ self.time = 0
+ if not parent_item:
+ self.setUpRoot()
+
+ def setUpRoot(self):
+ self.query_done = True
+ query = QSqlQuery(self.db)
+ ret = query.exec_('SELECT id, comm FROM comms')
+ if not ret:
+ raise Exception("Query failed: " + query.lastError().text())
+ while query.next():
+ if not query.value(0):
+ continue
+ child_item = TreeItem(self.db, self.child_count, self)
+ self.child_items.append(child_item)
+ self.child_count += 1
+ child_item.setUpLevel1(query.value(0), query.value(1))
+
+ def setUpLevel1(self, comm_id, comm):
+ self.query_done = True;
+ self.comm_id = comm_id
+ self.data[0] = comm
+ self.child_items = []
+ self.child_count = 0
+ query = QSqlQuery(self.db)
+ ret = query.exec_('SELECT thread_id, ( SELECT pid FROM threads WHERE id = thread_id ), ( SELECT tid FROM threads WHERE id = thread_id ) FROM comm_threads WHERE comm_id = ' + str(comm_id))
+ if not ret:
+ raise Exception("Query failed: " + query.lastError().text())
+ while query.next():
+ child_item = TreeItem(self.db, self.child_count, self)
+ self.child_items.append(child_item)
+ self.child_count += 1
+ child_item.setUpLevel2(comm_id, query.value(0), query.value(1), query.value(2))
+
+ def setUpLevel2(self, comm_id, thread_id, pid, tid):
+ self.comm_id = comm_id
+ self.thread_id = thread_id
+ self.data[0] = str(pid) + ":" + str(tid)
+
+ def getChildItem(self, row):
+ return self.child_items[row]
+
+ def getParentItem(self):
+ return self.parent_item
+
+ def getRow(self):
+ return self.row
+
+ def timePercent(self, b):
+ if not self.time:
+ return "0.0"
+ x = (b * Decimal(100)) / self.time
+ return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
+
+ def branchPercent(self, b):
+ if not self.branch_count:
+ return "0.0"
+ x = (b * Decimal(100)) / self.branch_count
+ return str(x.quantize(Decimal('.1'), rounding=ROUND_HALF_UP))
+
+ def addChild(self, call_path_id, name, dso, count, time, branch_count):
+ child_item = TreeItem(self.db, self.child_count, self)
+ child_item.comm_id = self.comm_id
+ child_item.thread_id = self.thread_id
+ child_item.call_path_id = call_path_id
+ child_item.branch_count = branch_count
+ child_item.time = time
+ child_item.data[0] = name
+ if dso == "[kernel.kallsyms]":
+ dso = "[kernel]"
+ child_item.data[1] = dso
+ child_item.data[2] = str(count)
+ child_item.data[3] = str(time)
+ child_item.data[4] = self.timePercent(time)
+ child_item.data[5] = str(branch_count)
+ child_item.data[6] = self.branchPercent(branch_count)
+ self.child_items.append(child_item)
+ self.child_count += 1
+
+ def selectCalls(self):
+ self.query_done = True;
+ query = QSqlQuery(self.db)
+ ret = query.exec_('SELECT id, call_path_id, branch_count, call_time, return_time, '
+ '( SELECT name FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ), '
+ '( SELECT short_name FROM dsos WHERE id = ( SELECT dso_id FROM symbols WHERE id = ( SELECT symbol_id FROM call_paths WHERE id = call_path_id ) ) ), '
+ '( SELECT ip FROM call_paths where id = call_path_id ) '
+ 'FROM calls WHERE parent_call_path_id = ' + str(self.call_path_id) + ' AND comm_id = ' + str(self.comm_id) + ' AND thread_id = ' + str(self.thread_id) +
+ ' ORDER BY call_path_id')
+ if not ret:
+ raise Exception("Query failed: " + query.lastError().text())
+ last_call_path_id = 0
+ name = ""
+ dso = ""
+ count = 0
+ branch_count = 0
+ total_branch_count = 0
+ time = 0
+ total_time = 0
+ while query.next():
+ if query.value(1) == last_call_path_id:
+ count += 1
+ branch_count += query.value(2)
+ time += query.value(4) - query.value(3)
+ else:
+ if count:
+ self.addChild(last_call_path_id, name, dso, count, time, branch_count)
+ last_call_path_id = query.value(1)
+ name = query.value(5)
+ dso = query.value(6)
+ count = 1
+ total_branch_count += branch_count
+ total_time += time
+ branch_count = query.value(2)
+ time = query.value(4) - query.value(3)
+ if count:
+ self.addChild(last_call_path_id, name, dso, count, time, branch_count)
+ total_branch_count += branch_count
+ total_time += time
+ # Top level does not have time or branch count, so fix that here
+ if total_branch_count > self.branch_count:
+ self.branch_count = total_branch_count
+ if self.branch_count:
+ for child_item in self.child_items:
+ child_item.data[6] = self.branchPercent(child_item.branch_count)
+ if total_time > self.time:
+ self.time = total_time
+ if self.time:
+ for child_item in self.child_items:
+ child_item.data[4] = self.timePercent(child_item.time)
+
+ def childCount(self):
+ if not self.query_done:
+ self.selectCalls()
+ return self.child_count
+
+ def columnCount(self):
+ return 7
+
+ def columnHeader(self, column):
+ headers = ["Call Path", "Object", "Count ", "Time (ns) ", "Time (%) ", "Branch Count ", "Branch Count (%) "]
+ return headers[column]
+
+ def getData(self, column):
+ return self.data[column]
+
+class TreeModel(QAbstractItemModel):
+
+ def __init__(self, db, parent=None):
+ super(TreeModel, self).__init__(parent)
+ self.db = db
+ self.root = TreeItem(db, 0, None)
+
+ def columnCount(self, parent):
+ return self.root.columnCount()
+
+ def rowCount(self, parent):
+ if parent.isValid():
+ parent_item = parent.internalPointer()
+ else:
+ parent_item = self.root
+ return parent_item.childCount()
+
+ def headerData(self, section, orientation, role):
+ if role == Qt.TextAlignmentRole:
+ if section > 1:
+ return Qt.AlignRight
+ if role != Qt.DisplayRole:
+ return None
+ if orientation != Qt.Horizontal:
+ return None
+ return self.root.columnHeader(section)
+
+ def parent(self, child):
+ child_item = child.internalPointer()
+ if child_item is self.root:
+ return QModelIndex()
+ parent_item = child_item.getParentItem()
+ return self.createIndex(parent_item.getRow(), 0, parent_item)
+
+ def index(self, row, column, parent):
+ if parent.isValid():
+ parent_item = parent.internalPointer()
+ else:
+ parent_item = self.root
+ child_item = parent_item.getChildItem(row)
+ return self.createIndex(row, column, child_item)
+
+ def data(self, index, role):
+ if role == Qt.TextAlignmentRole:
+ if index.column() > 1:
+ return Qt.AlignRight
+ if role != Qt.DisplayRole:
+ return None
+ index_item = index.internalPointer()
+ return index_item.getData(index.column())
+
+class MainWindow(QMainWindow):
+
+ def __init__(self, db, dbname, parent=None):
+ super(MainWindow, self).__init__(parent)
+
+ self.setObjectName("MainWindow")
+ self.setWindowTitle("Call Graph: " + dbname)
+ self.move(100, 100)
+ self.resize(800, 600)
+ style = self.style()
+ icon = style.standardIcon(QStyle.SP_MessageBoxInformation)
+ self.setWindowIcon(icon);
+
+ self.model = TreeModel(db)
+
+ self.view = QTreeView()
+ self.view.setModel(self.model)
+
+ self.setCentralWidget(self.view)
+
+if __name__ == '__main__':
+ if (len(sys.argv) < 2):
+ print >> sys.stderr, "Usage is: call-graph-from-sql.py <database name>"
+ raise Exception("Too few arguments")
+
+ dbname = sys.argv[1]
+
+ is_sqlite3 = False
+ try:
+ f = open(dbname)
+ if f.read(15) == "SQLite format 3":
+ is_sqlite3 = True
+ f.close()
+ except:
+ pass
+
+ if is_sqlite3:
+ db = QSqlDatabase.addDatabase('QSQLITE')
+ else:
+ db = QSqlDatabase.addDatabase('QPSQL')
+ opts = dbname.split()
+ for opt in opts:
+ if '=' in opt:
+ opt = opt.split('=')
+ if opt[0] == 'hostname':
+ db.setHostName(opt[1])
+ elif opt[0] == 'port':
+ db.setPort(int(opt[1]))
+ elif opt[0] == 'username':
+ db.setUserName(opt[1])
+ elif opt[0] == 'password':
+ db.setPassword(opt[1])
+ elif opt[0] == 'dbname':
+ dbname = opt[1]
+ else:
+ dbname = opt
+
+ db.setDatabaseName(dbname)
+ if not db.open():
+ raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
+
+ app = QApplication(sys.argv)
+ window = MainWindow(db, dbname)
+ window.show()
+ err = app.exec_()
+ db.close()
+ sys.exit(err)
diff --git a/tools/perf/scripts/python/check-perf-trace.py b/tools/perf/scripts/python/check-perf-trace.py
new file mode 100644
index 000000000..334599c60
--- /dev/null
+++ b/tools/perf/scripts/python/check-perf-trace.py
@@ -0,0 +1,82 @@
+# perf script event handlers, generated by perf script -g python
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# This script tests basic functionality such as flag and symbol
+# strings, common_xxx() calls back into perf, begin, end, unhandled
+# events, etc. Basically, if this script runs successfully and
+# displays expected results, Python scripting support should be ok.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from Core import *
+from perf_trace_context import *
+
+unhandled = autodict()
+
+def trace_begin():
+ print "trace_begin"
+ pass
+
+def trace_end():
+ print_unhandled()
+
+def irq__softirq_entry(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, vec):
+ print_header(event_name, common_cpu, common_secs, common_nsecs,
+ common_pid, common_comm)
+
+ print_uncommon(context)
+
+ print "vec=%s\n" % \
+ (symbol_str("irq__softirq_entry", "vec", vec)),
+
+def kmem__kmalloc(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, call_site, ptr, bytes_req, bytes_alloc,
+ gfp_flags):
+ print_header(event_name, common_cpu, common_secs, common_nsecs,
+ common_pid, common_comm)
+
+ print_uncommon(context)
+
+ print "call_site=%u, ptr=%u, bytes_req=%u, " \
+ "bytes_alloc=%u, gfp_flags=%s\n" % \
+ (call_site, ptr, bytes_req, bytes_alloc,
+
+ flag_str("kmem__kmalloc", "gfp_flags", gfp_flags)),
+
+def trace_unhandled(event_name, context, event_fields_dict):
+ try:
+ unhandled[event_name] += 1
+ except TypeError:
+ unhandled[event_name] = 1
+
+def print_header(event_name, cpu, secs, nsecs, pid, comm):
+ print "%-20s %5u %05u.%09u %8u %-20s " % \
+ (event_name, cpu, secs, nsecs, pid, comm),
+
+# print trace fields not included in handler args
+def print_uncommon(context):
+ print "common_preempt_count=%d, common_flags=%s, common_lock_depth=%d, " \
+ % (common_pc(context), trace_flag_str(common_flags(context)), \
+ common_lock_depth(context))
+
+def print_unhandled():
+ keys = unhandled.keys()
+ if not keys:
+ return
+
+ print "\nunhandled events:\n\n",
+
+ print "%-40s %10s\n" % ("event", "count"),
+ print "%-40s %10s\n" % ("----------------------------------------", \
+ "-----------"),
+
+ for event_name in keys:
+ print "%-40s %10d\n" % (event_name, unhandled[event_name])
diff --git a/tools/perf/scripts/python/compaction-times.py b/tools/perf/scripts/python/compaction-times.py
new file mode 100644
index 000000000..239cb0568
--- /dev/null
+++ b/tools/perf/scripts/python/compaction-times.py
@@ -0,0 +1,311 @@
+# report time spent in compaction
+# Licensed under the terms of the GNU GPL License version 2
+
+# testing:
+# 'echo 1 > /proc/sys/vm/compact_memory' to force compaction of all zones
+
+import os
+import sys
+import re
+
+import signal
+signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+
+usage = "usage: perf script report compaction-times.py -- [-h] [-u] [-p|-pv] [-t | [-m] [-fs] [-ms]] [pid|pid-range|comm-regex]\n"
+
+class popt:
+ DISP_DFL = 0
+ DISP_PROC = 1
+ DISP_PROC_VERBOSE=2
+
+class topt:
+ DISP_TIME = 0
+ DISP_MIG = 1
+ DISP_ISOLFREE = 2
+ DISP_ISOLMIG = 4
+ DISP_ALL = 7
+
+class comm_filter:
+ def __init__(self, re):
+ self.re = re
+
+ def filter(self, pid, comm):
+ m = self.re.search(comm)
+ return m == None or m.group() == ""
+
+class pid_filter:
+ def __init__(self, low, high):
+ self.low = (0 if low == "" else int(low))
+ self.high = (0 if high == "" else int(high))
+
+ def filter(self, pid, comm):
+ return not (pid >= self.low and (self.high == 0 or pid <= self.high))
+
+def set_type(t):
+ global opt_disp
+ opt_disp = (t if opt_disp == topt.DISP_ALL else opt_disp|t)
+
+def ns(sec, nsec):
+ return (sec * 1000000000) + nsec
+
+def time(ns):
+ return "%dns" % ns if opt_ns else "%dus" % (round(ns, -3) / 1000)
+
+class pair:
+ def __init__(self, aval, bval, alabel = None, blabel = None):
+ self.alabel = alabel
+ self.blabel = blabel
+ self.aval = aval
+ self.bval = bval
+
+ def __add__(self, rhs):
+ self.aval += rhs.aval
+ self.bval += rhs.bval
+ return self
+
+ def __str__(self):
+ return "%s=%d %s=%d" % (self.alabel, self.aval, self.blabel, self.bval)
+
+class cnode:
+ def __init__(self, ns):
+ self.ns = ns
+ self.migrated = pair(0, 0, "moved", "failed")
+ self.fscan = pair(0,0, "scanned", "isolated")
+ self.mscan = pair(0,0, "scanned", "isolated")
+
+ def __add__(self, rhs):
+ self.ns += rhs.ns
+ self.migrated += rhs.migrated
+ self.fscan += rhs.fscan
+ self.mscan += rhs.mscan
+ return self
+
+ def __str__(self):
+ prev = 0
+ s = "%s " % time(self.ns)
+ if (opt_disp & topt.DISP_MIG):
+ s += "migration: %s" % self.migrated
+ prev = 1
+ if (opt_disp & topt.DISP_ISOLFREE):
+ s += "%sfree_scanner: %s" % (" " if prev else "", self.fscan)
+ prev = 1
+ if (opt_disp & topt.DISP_ISOLMIG):
+ s += "%smigration_scanner: %s" % (" " if prev else "", self.mscan)
+ return s
+
+ def complete(self, secs, nsecs):
+ self.ns = ns(secs, nsecs) - self.ns
+
+ def increment(self, migrated, fscan, mscan):
+ if (migrated != None):
+ self.migrated += migrated
+ if (fscan != None):
+ self.fscan += fscan
+ if (mscan != None):
+ self.mscan += mscan
+
+
+class chead:
+ heads = {}
+ val = cnode(0);
+ fobj = None
+
+ @classmethod
+ def add_filter(cls, filter):
+ cls.fobj = filter
+
+ @classmethod
+ def create_pending(cls, pid, comm, start_secs, start_nsecs):
+ filtered = 0
+ try:
+ head = cls.heads[pid]
+ filtered = head.is_filtered()
+ except KeyError:
+ if cls.fobj != None:
+ filtered = cls.fobj.filter(pid, comm)
+ head = cls.heads[pid] = chead(comm, pid, filtered)
+
+ if not filtered:
+ head.mark_pending(start_secs, start_nsecs)
+
+ @classmethod
+ def increment_pending(cls, pid, migrated, fscan, mscan):
+ head = cls.heads[pid]
+ if not head.is_filtered():
+ if head.is_pending():
+ head.do_increment(migrated, fscan, mscan)
+ else:
+ sys.stderr.write("missing start compaction event for pid %d\n" % pid)
+
+ @classmethod
+ def complete_pending(cls, pid, secs, nsecs):
+ head = cls.heads[pid]
+ if not head.is_filtered():
+ if head.is_pending():
+ head.make_complete(secs, nsecs)
+ else:
+ sys.stderr.write("missing start compaction event for pid %d\n" % pid)
+
+ @classmethod
+ def gen(cls):
+ if opt_proc != popt.DISP_DFL:
+ for i in cls.heads:
+ yield cls.heads[i]
+
+ @classmethod
+ def str(cls):
+ return cls.val
+
+ def __init__(self, comm, pid, filtered):
+ self.comm = comm
+ self.pid = pid
+ self.val = cnode(0)
+ self.pending = None
+ self.filtered = filtered
+ self.list = []
+
+ def __add__(self, rhs):
+ self.ns += rhs.ns
+ self.val += rhs.val
+ return self
+
+ def mark_pending(self, secs, nsecs):
+ self.pending = cnode(ns(secs, nsecs))
+
+ def do_increment(self, migrated, fscan, mscan):
+ self.pending.increment(migrated, fscan, mscan)
+
+ def make_complete(self, secs, nsecs):
+ self.pending.complete(secs, nsecs)
+ chead.val += self.pending
+
+ if opt_proc != popt.DISP_DFL:
+ self.val += self.pending
+
+ if opt_proc == popt.DISP_PROC_VERBOSE:
+ self.list.append(self.pending)
+ self.pending = None
+
+ def enumerate(self):
+ if opt_proc == popt.DISP_PROC_VERBOSE and not self.is_filtered():
+ for i, pelem in enumerate(self.list):
+ sys.stdout.write("%d[%s].%d: %s\n" % (self.pid, self.comm, i+1, pelem))
+
+ def is_pending(self):
+ return self.pending != None
+
+ def is_filtered(self):
+ return self.filtered
+
+ def display(self):
+ if not self.is_filtered():
+ sys.stdout.write("%d[%s]: %s\n" % (self.pid, self.comm, self.val))
+
+
+def trace_end():
+ sys.stdout.write("total: %s\n" % chead.str())
+ for i in chead.gen():
+ i.display(),
+ i.enumerate()
+
+def compaction__mm_compaction_migratepages(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, nr_migrated, nr_failed):
+
+ chead.increment_pending(common_pid,
+ pair(nr_migrated, nr_failed), None, None)
+
+def compaction__mm_compaction_isolate_freepages(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken):
+
+ chead.increment_pending(common_pid,
+ None, pair(nr_scanned, nr_taken), None)
+
+def compaction__mm_compaction_isolate_migratepages(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, start_pfn, end_pfn, nr_scanned, nr_taken):
+
+ chead.increment_pending(common_pid,
+ None, None, pair(nr_scanned, nr_taken))
+
+def compaction__mm_compaction_end(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, zone_start, migrate_start, free_start, zone_end,
+ sync, status):
+
+ chead.complete_pending(common_pid, common_secs, common_nsecs)
+
+def compaction__mm_compaction_begin(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, zone_start, migrate_start, free_start, zone_end,
+ sync):
+
+ chead.create_pending(common_pid, common_comm, common_secs, common_nsecs)
+
+def pr_help():
+ global usage
+
+ sys.stdout.write(usage)
+ sys.stdout.write("\n")
+ sys.stdout.write("-h display this help\n")
+ sys.stdout.write("-p display by process\n")
+ sys.stdout.write("-pv display by process (verbose)\n")
+ sys.stdout.write("-t display stall times only\n")
+ sys.stdout.write("-m display stats for migration\n")
+ sys.stdout.write("-fs display stats for free scanner\n")
+ sys.stdout.write("-ms display stats for migration scanner\n")
+ sys.stdout.write("-u display results in microseconds (default nanoseconds)\n")
+
+
+comm_re = None
+pid_re = None
+pid_regex = "^(\d*)-(\d*)$|^(\d*)$"
+
+opt_proc = popt.DISP_DFL
+opt_disp = topt.DISP_ALL
+
+opt_ns = True
+
+argc = len(sys.argv) - 1
+if argc >= 1:
+ pid_re = re.compile(pid_regex)
+
+ for i, opt in enumerate(sys.argv[1:]):
+ if opt[0] == "-":
+ if opt == "-h":
+ pr_help()
+ exit(0);
+ elif opt == "-p":
+ opt_proc = popt.DISP_PROC
+ elif opt == "-pv":
+ opt_proc = popt.DISP_PROC_VERBOSE
+ elif opt == '-u':
+ opt_ns = False
+ elif opt == "-t":
+ set_type(topt.DISP_TIME)
+ elif opt == "-m":
+ set_type(topt.DISP_MIG)
+ elif opt == "-fs":
+ set_type(topt.DISP_ISOLFREE)
+ elif opt == "-ms":
+ set_type(topt.DISP_ISOLMIG)
+ else:
+ sys.exit(usage)
+
+ elif i == argc - 1:
+ m = pid_re.search(opt)
+ if m != None and m.group() != "":
+ if m.group(3) != None:
+ f = pid_filter(m.group(3), m.group(3))
+ else:
+ f = pid_filter(m.group(1), m.group(2))
+ else:
+ try:
+ comm_re=re.compile(opt)
+ except:
+ sys.stderr.write("invalid regex '%s'" % opt)
+ sys.exit(usage)
+ f = comm_filter(comm_re)
+
+ chead.add_filter(f)
diff --git a/tools/perf/scripts/python/event_analyzing_sample.py b/tools/perf/scripts/python/event_analyzing_sample.py
new file mode 100644
index 000000000..4e843b986
--- /dev/null
+++ b/tools/perf/scripts/python/event_analyzing_sample.py
@@ -0,0 +1,190 @@
+# event_analyzing_sample.py: general event handler in python
+# SPDX-License-Identifier: GPL-2.0
+#
+# Current perf report is already very powerful with the annotation integrated,
+# and this script is not trying to be as powerful as perf report, but
+# providing end user/developer a flexible way to analyze the events other
+# than trace points.
+#
+# The 2 database related functions in this script just show how to gather
+# the basic information, and users can modify and write their own functions
+# according to their specific requirement.
+#
+# The first function "show_general_events" just does a basic grouping for all
+# generic events with the help of sqlite, and the 2nd one "show_pebs_ll" is
+# for a x86 HW PMU event: PEBS with load latency data.
+#
+
+import os
+import sys
+import math
+import struct
+import sqlite3
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from EventClass import *
+
+#
+# If the perf.data has a big number of samples, then the insert operation
+# will be very time consuming (about 10+ minutes for 10000 samples) if the
+# .db database is on disk. Move the .db file to RAM based FS to speedup
+# the handling, which will cut the time down to several seconds.
+#
+con = sqlite3.connect("/dev/shm/perf.db")
+con.isolation_level = None
+
+def trace_begin():
+ print "In trace_begin:\n"
+
+ #
+ # Will create several tables at the start, pebs_ll is for PEBS data with
+ # load latency info, while gen_events is for general event.
+ #
+ con.execute("""
+ create table if not exists gen_events (
+ name text,
+ symbol text,
+ comm text,
+ dso text
+ );""")
+ con.execute("""
+ create table if not exists pebs_ll (
+ name text,
+ symbol text,
+ comm text,
+ dso text,
+ flags integer,
+ ip integer,
+ status integer,
+ dse integer,
+ dla integer,
+ lat integer
+ );""")
+
+#
+# Create and insert event object to a database so that user could
+# do more analysis with simple database commands.
+#
+def process_event(param_dict):
+ event_attr = param_dict["attr"]
+ sample = param_dict["sample"]
+ raw_buf = param_dict["raw_buf"]
+ comm = param_dict["comm"]
+ name = param_dict["ev_name"]
+
+ # Symbol and dso info are not always resolved
+ if (param_dict.has_key("dso")):
+ dso = param_dict["dso"]
+ else:
+ dso = "Unknown_dso"
+
+ if (param_dict.has_key("symbol")):
+ symbol = param_dict["symbol"]
+ else:
+ symbol = "Unknown_symbol"
+
+ # Create the event object and insert it to the right table in database
+ event = create_event(name, comm, dso, symbol, raw_buf)
+ insert_db(event)
+
+def insert_db(event):
+ if event.ev_type == EVTYPE_GENERIC:
+ con.execute("insert into gen_events values(?, ?, ?, ?)",
+ (event.name, event.symbol, event.comm, event.dso))
+ elif event.ev_type == EVTYPE_PEBS_LL:
+ event.ip &= 0x7fffffffffffffff
+ event.dla &= 0x7fffffffffffffff
+ con.execute("insert into pebs_ll values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+ (event.name, event.symbol, event.comm, event.dso, event.flags,
+ event.ip, event.status, event.dse, event.dla, event.lat))
+
+def trace_end():
+ print "In trace_end:\n"
+ # We show the basic info for the 2 type of event classes
+ show_general_events()
+ show_pebs_ll()
+ con.close()
+
+#
+# As the event number may be very big, so we can't use linear way
+# to show the histogram in real number, but use a log2 algorithm.
+#
+
+def num2sym(num):
+ # Each number will have at least one '#'
+ snum = '#' * (int)(math.log(num, 2) + 1)
+ return snum
+
+def show_general_events():
+
+ # Check the total record number in the table
+ count = con.execute("select count(*) from gen_events")
+ for t in count:
+ print "There is %d records in gen_events table" % t[0]
+ if t[0] == 0:
+ return
+
+ print "Statistics about the general events grouped by thread/symbol/dso: \n"
+
+ # Group by thread
+ commq = con.execute("select comm, count(comm) from gen_events group by comm order by -count(comm)")
+ print "\n%16s %8s %16s\n%s" % ("comm", "number", "histogram", "="*42)
+ for row in commq:
+ print "%16s %8d %s" % (row[0], row[1], num2sym(row[1]))
+
+ # Group by symbol
+ print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histogram", "="*58)
+ symbolq = con.execute("select symbol, count(symbol) from gen_events group by symbol order by -count(symbol)")
+ for row in symbolq:
+ print "%32s %8d %s" % (row[0], row[1], num2sym(row[1]))
+
+ # Group by dso
+ print "\n%40s %8s %16s\n%s" % ("dso", "number", "histogram", "="*74)
+ dsoq = con.execute("select dso, count(dso) from gen_events group by dso order by -count(dso)")
+ for row in dsoq:
+ print "%40s %8d %s" % (row[0], row[1], num2sym(row[1]))
+
+#
+# This function just shows the basic info, and we could do more with the
+# data in the tables, like checking the function parameters when some
+# big latency events happen.
+#
+def show_pebs_ll():
+
+ count = con.execute("select count(*) from pebs_ll")
+ for t in count:
+ print "There is %d records in pebs_ll table" % t[0]
+ if t[0] == 0:
+ return
+
+ print "Statistics about the PEBS Load Latency events grouped by thread/symbol/dse/latency: \n"
+
+ # Group by thread
+ commq = con.execute("select comm, count(comm) from pebs_ll group by comm order by -count(comm)")
+ print "\n%16s %8s %16s\n%s" % ("comm", "number", "histogram", "="*42)
+ for row in commq:
+ print "%16s %8d %s" % (row[0], row[1], num2sym(row[1]))
+
+ # Group by symbol
+ print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histogram", "="*58)
+ symbolq = con.execute("select symbol, count(symbol) from pebs_ll group by symbol order by -count(symbol)")
+ for row in symbolq:
+ print "%32s %8d %s" % (row[0], row[1], num2sym(row[1]))
+
+ # Group by dse
+ dseq = con.execute("select dse, count(dse) from pebs_ll group by dse order by -count(dse)")
+ print "\n%32s %8s %16s\n%s" % ("dse", "number", "histogram", "="*58)
+ for row in dseq:
+ print "%32s %8d %s" % (row[0], row[1], num2sym(row[1]))
+
+ # Group by latency
+ latq = con.execute("select lat, count(lat) from pebs_ll group by lat order by lat")
+ print "\n%32s %8s %16s\n%s" % ("latency", "number", "histogram", "="*58)
+ for row in latq:
+ print "%32s %8d %s" % (row[0], row[1], num2sym(row[1]))
+
+def trace_unhandled(event_name, context, event_fields_dict):
+ print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])
diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py
new file mode 100644
index 000000000..e46f51b17
--- /dev/null
+++ b/tools/perf/scripts/python/export-to-postgresql.py
@@ -0,0 +1,734 @@
+# export-to-postgresql.py: export perf data to a postgresql database
+# Copyright (c) 2014, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+
+import os
+import sys
+import struct
+import datetime
+
+# To use this script you will need to have installed package python-pyside which
+# provides LGPL-licensed Python bindings for Qt. You will also need the package
+# libqt4-sql-psql for Qt postgresql support.
+#
+# The script assumes postgresql is running on the local machine and that the
+# user has postgresql permissions to create databases. Examples of installing
+# postgresql and adding such a user are:
+#
+# fedora:
+#
+# $ sudo yum install postgresql postgresql-server python-pyside qt-postgresql
+# $ sudo su - postgres -c initdb
+# $ sudo service postgresql start
+# $ sudo su - postgres
+# $ createuser <your user id here>
+# Shall the new role be a superuser? (y/n) y
+#
+# ubuntu:
+#
+# $ sudo apt-get install postgresql python-pyside.qtsql libqt4-sql-psql
+# $ sudo su - postgres
+# $ createuser -s <your user id here>
+#
+# An example of using this script with Intel PT:
+#
+# $ perf record -e intel_pt//u ls
+# $ perf script -s ~/libexec/perf-core/scripts/python/export-to-postgresql.py pt_example branches calls
+# 2015-05-29 12:49:23.464364 Creating database...
+# 2015-05-29 12:49:26.281717 Writing to intermediate files...
+# 2015-05-29 12:49:27.190383 Copying to database...
+# 2015-05-29 12:49:28.140451 Removing intermediate files...
+# 2015-05-29 12:49:28.147451 Adding primary keys
+# 2015-05-29 12:49:28.655683 Adding foreign keys
+# 2015-05-29 12:49:29.365350 Done
+#
+# To browse the database, psql can be used e.g.
+#
+# $ psql pt_example
+# pt_example=# select * from samples_view where id < 100;
+# pt_example=# \d+
+# pt_example=# \d+ samples_view
+# pt_example=# \q
+#
+# An example of using the database is provided by the script
+# call-graph-from-sql.py. Refer to that script for details.
+#
+# Tables:
+#
+# The tables largely correspond to perf tools' data structures. They are largely self-explanatory.
+#
+# samples
+#
+# 'samples' is the main table. It represents what instruction was executing at a point in time
+# when something (a selected event) happened. The memory address is the instruction pointer or 'ip'.
+#
+# calls
+#
+# 'calls' represents function calls and is related to 'samples' by 'call_id' and 'return_id'.
+# 'calls' is only created when the 'calls' option to this script is specified.
+#
+# call_paths
+#
+# 'call_paths' represents all the call stacks. Each 'call' has an associated record in 'call_paths'.
+# 'calls_paths' is only created when the 'calls' option to this script is specified.
+#
+# branch_types
+#
+# 'branch_types' provides descriptions for each type of branch.
+#
+# comm_threads
+#
+# 'comm_threads' shows how 'comms' relates to 'threads'.
+#
+# comms
+#
+# 'comms' contains a record for each 'comm' - the name given to the executable that is running.
+#
+# dsos
+#
+# 'dsos' contains a record for each executable file or library.
+#
+# machines
+#
+# 'machines' can be used to distinguish virtual machines if virtualization is supported.
+#
+# selected_events
+#
+# 'selected_events' contains a record for each kind of event that has been sampled.
+#
+# symbols
+#
+# 'symbols' contains a record for each symbol. Only symbols that have samples are present.
+#
+# threads
+#
+# 'threads' contains a record for each thread.
+#
+# Views:
+#
+# Most of the tables have views for more friendly display. The views are:
+#
+# calls_view
+# call_paths_view
+# comm_threads_view
+# dsos_view
+# machines_view
+# samples_view
+# symbols_view
+# threads_view
+#
+# More examples of browsing the database with psql:
+# Note that some of the examples are not the most optimal SQL query.
+# Note that call information is only available if the script's 'calls' option has been used.
+#
+# Top 10 function calls (not aggregated by symbol):
+#
+# SELECT * FROM calls_view ORDER BY elapsed_time DESC LIMIT 10;
+#
+# Top 10 function calls (aggregated by symbol):
+#
+# SELECT symbol_id,(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,
+# SUM(elapsed_time) AS tot_elapsed_time,SUM(branch_count) AS tot_branch_count
+# FROM calls_view GROUP BY symbol_id ORDER BY tot_elapsed_time DESC LIMIT 10;
+#
+# Note that the branch count gives a rough estimation of cpu usage, so functions
+# that took a long time but have a relatively low branch count must have spent time
+# waiting.
+#
+# Find symbols by pattern matching on part of the name (e.g. names containing 'alloc'):
+#
+# SELECT * FROM symbols_view WHERE name LIKE '%alloc%';
+#
+# Top 10 function calls for a specific symbol (e.g. whose symbol_id is 187):
+#
+# SELECT * FROM calls_view WHERE symbol_id = 187 ORDER BY elapsed_time DESC LIMIT 10;
+#
+# Show function calls made by function in the same context (i.e. same call path) (e.g. one with call_path_id 254):
+#
+# SELECT * FROM calls_view WHERE parent_call_path_id = 254;
+#
+# Show branches made during a function call (e.g. where call_id is 29357 and return_id is 29370 and tid is 29670)
+#
+# SELECT * FROM samples_view WHERE id >= 29357 AND id <= 29370 AND tid = 29670 AND event LIKE 'branches%';
+#
+# Show transactions:
+#
+# SELECT * FROM samples_view WHERE event = 'transactions';
+#
+# Note transaction start has 'in_tx' true whereas, transaction end has 'in_tx' false.
+# Transaction aborts have branch_type_name 'transaction abort'
+#
+# Show transaction aborts:
+#
+# SELECT * FROM samples_view WHERE event = 'transactions' AND branch_type_name = 'transaction abort';
+#
+# To print a call stack requires walking the call_paths table. For example this python script:
+# #!/usr/bin/python2
+#
+# import sys
+# from PySide.QtSql import *
+#
+# if __name__ == '__main__':
+# if (len(sys.argv) < 3):
+# print >> sys.stderr, "Usage is: printcallstack.py <database name> <call_path_id>"
+# raise Exception("Too few arguments")
+# dbname = sys.argv[1]
+# call_path_id = sys.argv[2]
+# db = QSqlDatabase.addDatabase('QPSQL')
+# db.setDatabaseName(dbname)
+# if not db.open():
+# raise Exception("Failed to open database " + dbname + " error: " + db.lastError().text())
+# query = QSqlQuery(db)
+# print " id ip symbol_id symbol dso_id dso_short_name"
+# while call_path_id != 0 and call_path_id != 1:
+# ret = query.exec_('SELECT * FROM call_paths_view WHERE id = ' + str(call_path_id))
+# if not ret:
+# raise Exception("Query failed: " + query.lastError().text())
+# if not query.next():
+# raise Exception("Query failed")
+# print "{0:>6} {1:>10} {2:>9} {3:<30} {4:>6} {5:<30}".format(query.value(0), query.value(1), query.value(2), query.value(3), query.value(4), query.value(5))
+# call_path_id = query.value(6)
+
+from PySide.QtSql import *
+
+# Need to access PostgreSQL C library directly to use COPY FROM STDIN
+from ctypes import *
+libpq = CDLL("libpq.so.5")
+PQconnectdb = libpq.PQconnectdb
+PQconnectdb.restype = c_void_p
+PQconnectdb.argtypes = [ c_char_p ]
+PQfinish = libpq.PQfinish
+PQfinish.argtypes = [ c_void_p ]
+PQstatus = libpq.PQstatus
+PQstatus.restype = c_int
+PQstatus.argtypes = [ c_void_p ]
+PQexec = libpq.PQexec
+PQexec.restype = c_void_p
+PQexec.argtypes = [ c_void_p, c_char_p ]
+PQresultStatus = libpq.PQresultStatus
+PQresultStatus.restype = c_int
+PQresultStatus.argtypes = [ c_void_p ]
+PQputCopyData = libpq.PQputCopyData
+PQputCopyData.restype = c_int
+PQputCopyData.argtypes = [ c_void_p, c_void_p, c_int ]
+PQputCopyEnd = libpq.PQputCopyEnd
+PQputCopyEnd.restype = c_int
+PQputCopyEnd.argtypes = [ c_void_p, c_void_p ]
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+# These perf imports are not used at present
+#from perf_trace_context import *
+#from Core import *
+
+perf_db_export_mode = True
+perf_db_export_calls = False
+perf_db_export_callchains = False
+
+
+def usage():
+ print >> sys.stderr, "Usage is: export-to-postgresql.py <database name> [<columns>] [<calls>] [<callchains>]"
+ print >> sys.stderr, "where: columns 'all' or 'branches'"
+ print >> sys.stderr, " calls 'calls' => create calls and call_paths table"
+ print >> sys.stderr, " callchains 'callchains' => create call_paths table"
+ raise Exception("Too few arguments")
+
+if (len(sys.argv) < 2):
+ usage()
+
+dbname = sys.argv[1]
+
+if (len(sys.argv) >= 3):
+ columns = sys.argv[2]
+else:
+ columns = "all"
+
+if columns not in ("all", "branches"):
+ usage()
+
+branches = (columns == "branches")
+
+for i in range(3,len(sys.argv)):
+ if (sys.argv[i] == "calls"):
+ perf_db_export_calls = True
+ elif (sys.argv[i] == "callchains"):
+ perf_db_export_callchains = True
+ else:
+ usage()
+
+output_dir_name = os.getcwd() + "/" + dbname + "-perf-data"
+os.mkdir(output_dir_name)
+
+def do_query(q, s):
+ if (q.exec_(s)):
+ return
+ raise Exception("Query failed: " + q.lastError().text())
+
+print datetime.datetime.today(), "Creating database..."
+
+db = QSqlDatabase.addDatabase('QPSQL')
+query = QSqlQuery(db)
+db.setDatabaseName('postgres')
+db.open()
+try:
+ do_query(query, 'CREATE DATABASE ' + dbname)
+except:
+ os.rmdir(output_dir_name)
+ raise
+query.finish()
+query.clear()
+db.close()
+
+db.setDatabaseName(dbname)
+db.open()
+
+query = QSqlQuery(db)
+do_query(query, 'SET client_min_messages TO WARNING')
+
+do_query(query, 'CREATE TABLE selected_events ('
+ 'id bigint NOT NULL,'
+ 'name varchar(80))')
+do_query(query, 'CREATE TABLE machines ('
+ 'id bigint NOT NULL,'
+ 'pid integer,'
+ 'root_dir varchar(4096))')
+do_query(query, 'CREATE TABLE threads ('
+ 'id bigint NOT NULL,'
+ 'machine_id bigint,'
+ 'process_id bigint,'
+ 'pid integer,'
+ 'tid integer)')
+do_query(query, 'CREATE TABLE comms ('
+ 'id bigint NOT NULL,'
+ 'comm varchar(16))')
+do_query(query, 'CREATE TABLE comm_threads ('
+ 'id bigint NOT NULL,'
+ 'comm_id bigint,'
+ 'thread_id bigint)')
+do_query(query, 'CREATE TABLE dsos ('
+ 'id bigint NOT NULL,'
+ 'machine_id bigint,'
+ 'short_name varchar(256),'
+ 'long_name varchar(4096),'
+ 'build_id varchar(64))')
+do_query(query, 'CREATE TABLE symbols ('
+ 'id bigint NOT NULL,'
+ 'dso_id bigint,'
+ 'sym_start bigint,'
+ 'sym_end bigint,'
+ 'binding integer,'
+ 'name varchar(2048))')
+do_query(query, 'CREATE TABLE branch_types ('
+ 'id integer NOT NULL,'
+ 'name varchar(80))')
+
+if branches:
+ do_query(query, 'CREATE TABLE samples ('
+ 'id bigint NOT NULL,'
+ 'evsel_id bigint,'
+ 'machine_id bigint,'
+ 'thread_id bigint,'
+ 'comm_id bigint,'
+ 'dso_id bigint,'
+ 'symbol_id bigint,'
+ 'sym_offset bigint,'
+ 'ip bigint,'
+ 'time bigint,'
+ 'cpu integer,'
+ 'to_dso_id bigint,'
+ 'to_symbol_id bigint,'
+ 'to_sym_offset bigint,'
+ 'to_ip bigint,'
+ 'branch_type integer,'
+ 'in_tx boolean,'
+ 'call_path_id bigint)')
+else:
+ do_query(query, 'CREATE TABLE samples ('
+ 'id bigint NOT NULL,'
+ 'evsel_id bigint,'
+ 'machine_id bigint,'
+ 'thread_id bigint,'
+ 'comm_id bigint,'
+ 'dso_id bigint,'
+ 'symbol_id bigint,'
+ 'sym_offset bigint,'
+ 'ip bigint,'
+ 'time bigint,'
+ 'cpu integer,'
+ 'to_dso_id bigint,'
+ 'to_symbol_id bigint,'
+ 'to_sym_offset bigint,'
+ 'to_ip bigint,'
+ 'period bigint,'
+ 'weight bigint,'
+ 'transaction bigint,'
+ 'data_src bigint,'
+ 'branch_type integer,'
+ 'in_tx boolean,'
+ 'call_path_id bigint)')
+
+if perf_db_export_calls or perf_db_export_callchains:
+ do_query(query, 'CREATE TABLE call_paths ('
+ 'id bigint NOT NULL,'
+ 'parent_id bigint,'
+ 'symbol_id bigint,'
+ 'ip bigint)')
+if perf_db_export_calls:
+ do_query(query, 'CREATE TABLE calls ('
+ 'id bigint NOT NULL,'
+ 'thread_id bigint,'
+ 'comm_id bigint,'
+ 'call_path_id bigint,'
+ 'call_time bigint,'
+ 'return_time bigint,'
+ 'branch_count bigint,'
+ 'call_id bigint,'
+ 'return_id bigint,'
+ 'parent_call_path_id bigint,'
+ 'flags integer)')
+
+do_query(query, 'CREATE VIEW machines_view AS '
+ 'SELECT '
+ 'id,'
+ 'pid,'
+ 'root_dir,'
+ 'CASE WHEN id=0 THEN \'unknown\' WHEN pid=-1 THEN \'host\' ELSE \'guest\' END AS host_or_guest'
+ ' FROM machines')
+
+do_query(query, 'CREATE VIEW dsos_view AS '
+ 'SELECT '
+ 'id,'
+ 'machine_id,'
+ '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,'
+ 'short_name,'
+ 'long_name,'
+ 'build_id'
+ ' FROM dsos')
+
+do_query(query, 'CREATE VIEW symbols_view AS '
+ 'SELECT '
+ 'id,'
+ 'name,'
+ '(SELECT short_name FROM dsos WHERE id=dso_id) AS dso,'
+ 'dso_id,'
+ 'sym_start,'
+ 'sym_end,'
+ 'CASE WHEN binding=0 THEN \'local\' WHEN binding=1 THEN \'global\' ELSE \'weak\' END AS binding'
+ ' FROM symbols')
+
+do_query(query, 'CREATE VIEW threads_view AS '
+ 'SELECT '
+ 'id,'
+ 'machine_id,'
+ '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,'
+ 'process_id,'
+ 'pid,'
+ 'tid'
+ ' FROM threads')
+
+do_query(query, 'CREATE VIEW comm_threads_view AS '
+ 'SELECT '
+ 'comm_id,'
+ '(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+ 'thread_id,'
+ '(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+ '(SELECT tid FROM threads WHERE id = thread_id) AS tid'
+ ' FROM comm_threads')
+
+if perf_db_export_calls or perf_db_export_callchains:
+ do_query(query, 'CREATE VIEW call_paths_view AS '
+ 'SELECT '
+ 'c.id,'
+ 'to_hex(c.ip) AS ip,'
+ 'c.symbol_id,'
+ '(SELECT name FROM symbols WHERE id = c.symbol_id) AS symbol,'
+ '(SELECT dso_id FROM symbols WHERE id = c.symbol_id) AS dso_id,'
+ '(SELECT dso FROM symbols_view WHERE id = c.symbol_id) AS dso_short_name,'
+ 'c.parent_id,'
+ 'to_hex(p.ip) AS parent_ip,'
+ 'p.symbol_id AS parent_symbol_id,'
+ '(SELECT name FROM symbols WHERE id = p.symbol_id) AS parent_symbol,'
+ '(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
+ '(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name'
+ ' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
+if perf_db_export_calls:
+ do_query(query, 'CREATE VIEW calls_view AS '
+ 'SELECT '
+ 'calls.id,'
+ 'thread_id,'
+ '(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+ '(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
+ '(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+ 'call_path_id,'
+ 'to_hex(ip) AS ip,'
+ 'symbol_id,'
+ '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
+ 'call_time,'
+ 'return_time,'
+ 'return_time - call_time AS elapsed_time,'
+ 'branch_count,'
+ 'call_id,'
+ 'return_id,'
+ 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,'
+ 'parent_call_path_id'
+ ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id')
+
+do_query(query, 'CREATE VIEW samples_view AS '
+ 'SELECT '
+ 'id,'
+ 'time,'
+ 'cpu,'
+ '(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+ '(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
+ '(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+ '(SELECT name FROM selected_events WHERE id = evsel_id) AS event,'
+ 'to_hex(ip) AS ip_hex,'
+ '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
+ 'sym_offset,'
+ '(SELECT short_name FROM dsos WHERE id = dso_id) AS dso_short_name,'
+ 'to_hex(to_ip) AS to_ip_hex,'
+ '(SELECT name FROM symbols WHERE id = to_symbol_id) AS to_symbol,'
+ 'to_sym_offset,'
+ '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,'
+ '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,'
+ 'in_tx'
+ ' FROM samples')
+
+
+file_header = struct.pack("!11sii", "PGCOPY\n\377\r\n\0", 0, 0)
+file_trailer = "\377\377"
+
+def open_output_file(file_name):
+ path_name = output_dir_name + "/" + file_name
+ file = open(path_name, "w+")
+ file.write(file_header)
+ return file
+
+def close_output_file(file):
+ file.write(file_trailer)
+ file.close()
+
+def copy_output_file_direct(file, table_name):
+ close_output_file(file)
+ sql = "COPY " + table_name + " FROM '" + file.name + "' (FORMAT 'binary')"
+ do_query(query, sql)
+
+# Use COPY FROM STDIN because security may prevent postgres from accessing the files directly
+def copy_output_file(file, table_name):
+ conn = PQconnectdb("dbname = " + dbname)
+ if (PQstatus(conn)):
+ raise Exception("COPY FROM STDIN PQconnectdb failed")
+ file.write(file_trailer)
+ file.seek(0)
+ sql = "COPY " + table_name + " FROM STDIN (FORMAT 'binary')"
+ res = PQexec(conn, sql)
+ if (PQresultStatus(res) != 4):
+ raise Exception("COPY FROM STDIN PQexec failed")
+ data = file.read(65536)
+ while (len(data)):
+ ret = PQputCopyData(conn, data, len(data))
+ if (ret != 1):
+ raise Exception("COPY FROM STDIN PQputCopyData failed, error " + str(ret))
+ data = file.read(65536)
+ ret = PQputCopyEnd(conn, None)
+ if (ret != 1):
+ raise Exception("COPY FROM STDIN PQputCopyEnd failed, error " + str(ret))
+ PQfinish(conn)
+
+def remove_output_file(file):
+ name = file.name
+ file.close()
+ os.unlink(name)
+
+evsel_file = open_output_file("evsel_table.bin")
+machine_file = open_output_file("machine_table.bin")
+thread_file = open_output_file("thread_table.bin")
+comm_file = open_output_file("comm_table.bin")
+comm_thread_file = open_output_file("comm_thread_table.bin")
+dso_file = open_output_file("dso_table.bin")
+symbol_file = open_output_file("symbol_table.bin")
+branch_type_file = open_output_file("branch_type_table.bin")
+sample_file = open_output_file("sample_table.bin")
+if perf_db_export_calls or perf_db_export_callchains:
+ call_path_file = open_output_file("call_path_table.bin")
+if perf_db_export_calls:
+ call_file = open_output_file("call_table.bin")
+
+def trace_begin():
+ print datetime.datetime.today(), "Writing to intermediate files..."
+ # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs
+ evsel_table(0, "unknown")
+ machine_table(0, 0, "unknown")
+ thread_table(0, 0, 0, -1, -1)
+ comm_table(0, "unknown")
+ dso_table(0, 0, "unknown", "unknown", "")
+ symbol_table(0, 0, 0, 0, 0, "unknown")
+ sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+ if perf_db_export_calls or perf_db_export_callchains:
+ call_path_table(0, 0, 0, 0)
+
+unhandled_count = 0
+
+def trace_end():
+ print datetime.datetime.today(), "Copying to database..."
+ copy_output_file(evsel_file, "selected_events")
+ copy_output_file(machine_file, "machines")
+ copy_output_file(thread_file, "threads")
+ copy_output_file(comm_file, "comms")
+ copy_output_file(comm_thread_file, "comm_threads")
+ copy_output_file(dso_file, "dsos")
+ copy_output_file(symbol_file, "symbols")
+ copy_output_file(branch_type_file, "branch_types")
+ copy_output_file(sample_file, "samples")
+ if perf_db_export_calls or perf_db_export_callchains:
+ copy_output_file(call_path_file, "call_paths")
+ if perf_db_export_calls:
+ copy_output_file(call_file, "calls")
+
+ print datetime.datetime.today(), "Removing intermediate files..."
+ remove_output_file(evsel_file)
+ remove_output_file(machine_file)
+ remove_output_file(thread_file)
+ remove_output_file(comm_file)
+ remove_output_file(comm_thread_file)
+ remove_output_file(dso_file)
+ remove_output_file(symbol_file)
+ remove_output_file(branch_type_file)
+ remove_output_file(sample_file)
+ if perf_db_export_calls or perf_db_export_callchains:
+ remove_output_file(call_path_file)
+ if perf_db_export_calls:
+ remove_output_file(call_file)
+ os.rmdir(output_dir_name)
+ print datetime.datetime.today(), "Adding primary keys"
+ do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)')
+ do_query(query, 'ALTER TABLE machines ADD PRIMARY KEY (id)')
+ do_query(query, 'ALTER TABLE threads ADD PRIMARY KEY (id)')
+ do_query(query, 'ALTER TABLE comms ADD PRIMARY KEY (id)')
+ do_query(query, 'ALTER TABLE comm_threads ADD PRIMARY KEY (id)')
+ do_query(query, 'ALTER TABLE dsos ADD PRIMARY KEY (id)')
+ do_query(query, 'ALTER TABLE symbols ADD PRIMARY KEY (id)')
+ do_query(query, 'ALTER TABLE branch_types ADD PRIMARY KEY (id)')
+ do_query(query, 'ALTER TABLE samples ADD PRIMARY KEY (id)')
+ if perf_db_export_calls or perf_db_export_callchains:
+ do_query(query, 'ALTER TABLE call_paths ADD PRIMARY KEY (id)')
+ if perf_db_export_calls:
+ do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)')
+
+ print datetime.datetime.today(), "Adding foreign keys"
+ do_query(query, 'ALTER TABLE threads '
+ 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),'
+ 'ADD CONSTRAINT processfk FOREIGN KEY (process_id) REFERENCES threads (id)')
+ do_query(query, 'ALTER TABLE comm_threads '
+ 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
+ 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id)')
+ do_query(query, 'ALTER TABLE dsos '
+ 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id)')
+ do_query(query, 'ALTER TABLE symbols '
+ 'ADD CONSTRAINT dsofk FOREIGN KEY (dso_id) REFERENCES dsos (id)')
+ do_query(query, 'ALTER TABLE samples '
+ 'ADD CONSTRAINT evselfk FOREIGN KEY (evsel_id) REFERENCES selected_events (id),'
+ 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),'
+ 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),'
+ 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
+ 'ADD CONSTRAINT dsofk FOREIGN KEY (dso_id) REFERENCES dsos (id),'
+ 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id),'
+ 'ADD CONSTRAINT todsofk FOREIGN KEY (to_dso_id) REFERENCES dsos (id),'
+ 'ADD CONSTRAINT tosymbolfk FOREIGN KEY (to_symbol_id) REFERENCES symbols (id)')
+ if perf_db_export_calls or perf_db_export_callchains:
+ do_query(query, 'ALTER TABLE call_paths '
+ 'ADD CONSTRAINT parentfk FOREIGN KEY (parent_id) REFERENCES call_paths (id),'
+ 'ADD CONSTRAINT symbolfk FOREIGN KEY (symbol_id) REFERENCES symbols (id)')
+ if perf_db_export_calls:
+ do_query(query, 'ALTER TABLE calls '
+ 'ADD CONSTRAINT threadfk FOREIGN KEY (thread_id) REFERENCES threads (id),'
+ 'ADD CONSTRAINT commfk FOREIGN KEY (comm_id) REFERENCES comms (id),'
+ 'ADD CONSTRAINT call_pathfk FOREIGN KEY (call_path_id) REFERENCES call_paths (id),'
+ 'ADD CONSTRAINT callfk FOREIGN KEY (call_id) REFERENCES samples (id),'
+ 'ADD CONSTRAINT returnfk FOREIGN KEY (return_id) REFERENCES samples (id),'
+ 'ADD CONSTRAINT parent_call_pathfk FOREIGN KEY (parent_call_path_id) REFERENCES call_paths (id)')
+ do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
+
+ if (unhandled_count):
+ print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events"
+ print datetime.datetime.today(), "Done"
+
+def trace_unhandled(event_name, context, event_fields_dict):
+ global unhandled_count
+ unhandled_count += 1
+
+def sched__sched_switch(*x):
+ pass
+
+def evsel_table(evsel_id, evsel_name, *x):
+ n = len(evsel_name)
+ fmt = "!hiqi" + str(n) + "s"
+ value = struct.pack(fmt, 2, 8, evsel_id, n, evsel_name)
+ evsel_file.write(value)
+
+def machine_table(machine_id, pid, root_dir, *x):
+ n = len(root_dir)
+ fmt = "!hiqiii" + str(n) + "s"
+ value = struct.pack(fmt, 3, 8, machine_id, 4, pid, n, root_dir)
+ machine_file.write(value)
+
+def thread_table(thread_id, machine_id, process_id, pid, tid, *x):
+ value = struct.pack("!hiqiqiqiiii", 5, 8, thread_id, 8, machine_id, 8, process_id, 4, pid, 4, tid)
+ thread_file.write(value)
+
+def comm_table(comm_id, comm_str, *x):
+ n = len(comm_str)
+ fmt = "!hiqi" + str(n) + "s"
+ value = struct.pack(fmt, 2, 8, comm_id, n, comm_str)
+ comm_file.write(value)
+
+def comm_thread_table(comm_thread_id, comm_id, thread_id, *x):
+ fmt = "!hiqiqiq"
+ value = struct.pack(fmt, 3, 8, comm_thread_id, 8, comm_id, 8, thread_id)
+ comm_thread_file.write(value)
+
+def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x):
+ n1 = len(short_name)
+ n2 = len(long_name)
+ n3 = len(build_id)
+ fmt = "!hiqiqi" + str(n1) + "si" + str(n2) + "si" + str(n3) + "s"
+ value = struct.pack(fmt, 5, 8, dso_id, 8, machine_id, n1, short_name, n2, long_name, n3, build_id)
+ dso_file.write(value)
+
+def symbol_table(symbol_id, dso_id, sym_start, sym_end, binding, symbol_name, *x):
+ n = len(symbol_name)
+ fmt = "!hiqiqiqiqiii" + str(n) + "s"
+ value = struct.pack(fmt, 6, 8, symbol_id, 8, dso_id, 8, sym_start, 8, sym_end, 4, binding, n, symbol_name)
+ symbol_file.write(value)
+
+def branch_type_table(branch_type, name, *x):
+ n = len(name)
+ fmt = "!hiii" + str(n) + "s"
+ value = struct.pack(fmt, 2, 4, branch_type, n, name)
+ branch_type_file.write(value)
+
+def sample_table(sample_id, evsel_id, machine_id, thread_id, comm_id, dso_id, symbol_id, sym_offset, ip, time, cpu, to_dso_id, to_symbol_id, to_sym_offset, to_ip, period, weight, transaction, data_src, branch_type, in_tx, call_path_id, *x):
+ if branches:
+ value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiiiBiq", 18, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 4, branch_type, 1, in_tx, 8, call_path_id)
+ else:
+ value = struct.pack("!hiqiqiqiqiqiqiqiqiqiqiiiqiqiqiqiqiqiqiqiiiBiq", 22, 8, sample_id, 8, evsel_id, 8, machine_id, 8, thread_id, 8, comm_id, 8, dso_id, 8, symbol_id, 8, sym_offset, 8, ip, 8, time, 4, cpu, 8, to_dso_id, 8, to_symbol_id, 8, to_sym_offset, 8, to_ip, 8, period, 8, weight, 8, transaction, 8, data_src, 4, branch_type, 1, in_tx, 8, call_path_id)
+ sample_file.write(value)
+
+def call_path_table(cp_id, parent_id, symbol_id, ip, *x):
+ fmt = "!hiqiqiqiq"
+ value = struct.pack(fmt, 4, 8, cp_id, 8, parent_id, 8, symbol_id, 8, ip)
+ call_path_file.write(value)
+
+def call_return_table(cr_id, thread_id, comm_id, call_path_id, call_time, return_time, branch_count, call_id, return_id, parent_call_path_id, flags, *x):
+ fmt = "!hiqiqiqiqiqiqiqiqiqiqii"
+ value = struct.pack(fmt, 11, 8, cr_id, 8, thread_id, 8, comm_id, 8, call_path_id, 8, call_time, 8, return_time, 8, branch_count, 8, call_id, 8, return_id, 8, parent_call_path_id, 4, flags)
+ call_file.write(value)
diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py
new file mode 100644
index 000000000..e4bb82c8a
--- /dev/null
+++ b/tools/perf/scripts/python/export-to-sqlite.py
@@ -0,0 +1,455 @@
+# export-to-sqlite.py: export perf data to a sqlite3 database
+# Copyright (c) 2017, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+
+import os
+import sys
+import struct
+import datetime
+
+# To use this script you will need to have installed package python-pyside which
+# provides LGPL-licensed Python bindings for Qt. You will also need the package
+# libqt4-sql-sqlite for Qt sqlite3 support.
+#
+# An example of using this script with Intel PT:
+#
+# $ perf record -e intel_pt//u ls
+# $ perf script -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py pt_example branches calls
+# 2017-07-31 14:26:07.326913 Creating database...
+# 2017-07-31 14:26:07.538097 Writing records...
+# 2017-07-31 14:26:09.889292 Adding indexes
+# 2017-07-31 14:26:09.958746 Done
+#
+# To browse the database, sqlite3 can be used e.g.
+#
+# $ sqlite3 pt_example
+# sqlite> .header on
+# sqlite> select * from samples_view where id < 10;
+# sqlite> .mode column
+# sqlite> select * from samples_view where id < 10;
+# sqlite> .tables
+# sqlite> .schema samples_view
+# sqlite> .quit
+#
+# An example of using the database is provided by the script
+# call-graph-from-sql.py. Refer to that script for details.
+#
+# The database structure is practically the same as created by the script
+# export-to-postgresql.py. Refer to that script for details. A notable
+# difference is the 'transaction' column of the 'samples' table which is
+# renamed 'transaction_' in sqlite because 'transaction' is a reserved word.
+
+from PySide.QtSql import *
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+# These perf imports are not used at present
+#from perf_trace_context import *
+#from Core import *
+
+perf_db_export_mode = True
+perf_db_export_calls = False
+perf_db_export_callchains = False
+
+def usage():
+ print >> sys.stderr, "Usage is: export-to-sqlite.py <database name> [<columns>] [<calls>] [<callchains>]"
+ print >> sys.stderr, "where: columns 'all' or 'branches'"
+ print >> sys.stderr, " calls 'calls' => create calls and call_paths table"
+ print >> sys.stderr, " callchains 'callchains' => create call_paths table"
+ raise Exception("Too few arguments")
+
+if (len(sys.argv) < 2):
+ usage()
+
+dbname = sys.argv[1]
+
+if (len(sys.argv) >= 3):
+ columns = sys.argv[2]
+else:
+ columns = "all"
+
+if columns not in ("all", "branches"):
+ usage()
+
+branches = (columns == "branches")
+
+for i in range(3,len(sys.argv)):
+ if (sys.argv[i] == "calls"):
+ perf_db_export_calls = True
+ elif (sys.argv[i] == "callchains"):
+ perf_db_export_callchains = True
+ else:
+ usage()
+
+def do_query(q, s):
+ if (q.exec_(s)):
+ return
+ raise Exception("Query failed: " + q.lastError().text())
+
+def do_query_(q):
+ if (q.exec_()):
+ return
+ raise Exception("Query failed: " + q.lastError().text())
+
+print datetime.datetime.today(), "Creating database..."
+
+db_exists = False
+try:
+ f = open(dbname)
+ f.close()
+ db_exists = True
+except:
+ pass
+
+if db_exists:
+ raise Exception(dbname + " already exists")
+
+db = QSqlDatabase.addDatabase('QSQLITE')
+db.setDatabaseName(dbname)
+db.open()
+
+query = QSqlQuery(db)
+
+do_query(query, 'PRAGMA journal_mode = OFF')
+do_query(query, 'BEGIN TRANSACTION')
+
+do_query(query, 'CREATE TABLE selected_events ('
+ 'id integer NOT NULL PRIMARY KEY,'
+ 'name varchar(80))')
+do_query(query, 'CREATE TABLE machines ('
+ 'id integer NOT NULL PRIMARY KEY,'
+ 'pid integer,'
+ 'root_dir varchar(4096))')
+do_query(query, 'CREATE TABLE threads ('
+ 'id integer NOT NULL PRIMARY KEY,'
+ 'machine_id bigint,'
+ 'process_id bigint,'
+ 'pid integer,'
+ 'tid integer)')
+do_query(query, 'CREATE TABLE comms ('
+ 'id integer NOT NULL PRIMARY KEY,'
+ 'comm varchar(16))')
+do_query(query, 'CREATE TABLE comm_threads ('
+ 'id integer NOT NULL PRIMARY KEY,'
+ 'comm_id bigint,'
+ 'thread_id bigint)')
+do_query(query, 'CREATE TABLE dsos ('
+ 'id integer NOT NULL PRIMARY KEY,'
+ 'machine_id bigint,'
+ 'short_name varchar(256),'
+ 'long_name varchar(4096),'
+ 'build_id varchar(64))')
+do_query(query, 'CREATE TABLE symbols ('
+ 'id integer NOT NULL PRIMARY KEY,'
+ 'dso_id bigint,'
+ 'sym_start bigint,'
+ 'sym_end bigint,'
+ 'binding integer,'
+ 'name varchar(2048))')
+do_query(query, 'CREATE TABLE branch_types ('
+ 'id integer NOT NULL PRIMARY KEY,'
+ 'name varchar(80))')
+
+if branches:
+ do_query(query, 'CREATE TABLE samples ('
+ 'id integer NOT NULL PRIMARY KEY,'
+ 'evsel_id bigint,'
+ 'machine_id bigint,'
+ 'thread_id bigint,'
+ 'comm_id bigint,'
+ 'dso_id bigint,'
+ 'symbol_id bigint,'
+ 'sym_offset bigint,'
+ 'ip bigint,'
+ 'time bigint,'
+ 'cpu integer,'
+ 'to_dso_id bigint,'
+ 'to_symbol_id bigint,'
+ 'to_sym_offset bigint,'
+ 'to_ip bigint,'
+ 'branch_type integer,'
+ 'in_tx boolean,'
+ 'call_path_id bigint)')
+else:
+ do_query(query, 'CREATE TABLE samples ('
+ 'id integer NOT NULL PRIMARY KEY,'
+ 'evsel_id bigint,'
+ 'machine_id bigint,'
+ 'thread_id bigint,'
+ 'comm_id bigint,'
+ 'dso_id bigint,'
+ 'symbol_id bigint,'
+ 'sym_offset bigint,'
+ 'ip bigint,'
+ 'time bigint,'
+ 'cpu integer,'
+ 'to_dso_id bigint,'
+ 'to_symbol_id bigint,'
+ 'to_sym_offset bigint,'
+ 'to_ip bigint,'
+ 'period bigint,'
+ 'weight bigint,'
+ 'transaction_ bigint,'
+ 'data_src bigint,'
+ 'branch_type integer,'
+ 'in_tx boolean,'
+ 'call_path_id bigint)')
+
+if perf_db_export_calls or perf_db_export_callchains:
+ do_query(query, 'CREATE TABLE call_paths ('
+ 'id integer NOT NULL PRIMARY KEY,'
+ 'parent_id bigint,'
+ 'symbol_id bigint,'
+ 'ip bigint)')
+if perf_db_export_calls:
+ do_query(query, 'CREATE TABLE calls ('
+ 'id integer NOT NULL PRIMARY KEY,'
+ 'thread_id bigint,'
+ 'comm_id bigint,'
+ 'call_path_id bigint,'
+ 'call_time bigint,'
+ 'return_time bigint,'
+ 'branch_count bigint,'
+ 'call_id bigint,'
+ 'return_id bigint,'
+ 'parent_call_path_id bigint,'
+ 'flags integer)')
+
+# printf was added to sqlite in version 3.8.3
+sqlite_has_printf = False
+try:
+ do_query(query, 'SELECT printf("") FROM machines')
+ sqlite_has_printf = True
+except:
+ pass
+
+def emit_to_hex(x):
+ if sqlite_has_printf:
+ return 'printf("%x", ' + x + ')'
+ else:
+ return x
+
+do_query(query, 'CREATE VIEW machines_view AS '
+ 'SELECT '
+ 'id,'
+ 'pid,'
+ 'root_dir,'
+ 'CASE WHEN id=0 THEN \'unknown\' WHEN pid=-1 THEN \'host\' ELSE \'guest\' END AS host_or_guest'
+ ' FROM machines')
+
+do_query(query, 'CREATE VIEW dsos_view AS '
+ 'SELECT '
+ 'id,'
+ 'machine_id,'
+ '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,'
+ 'short_name,'
+ 'long_name,'
+ 'build_id'
+ ' FROM dsos')
+
+do_query(query, 'CREATE VIEW symbols_view AS '
+ 'SELECT '
+ 'id,'
+ 'name,'
+ '(SELECT short_name FROM dsos WHERE id=dso_id) AS dso,'
+ 'dso_id,'
+ 'sym_start,'
+ 'sym_end,'
+ 'CASE WHEN binding=0 THEN \'local\' WHEN binding=1 THEN \'global\' ELSE \'weak\' END AS binding'
+ ' FROM symbols')
+
+do_query(query, 'CREATE VIEW threads_view AS '
+ 'SELECT '
+ 'id,'
+ 'machine_id,'
+ '(SELECT host_or_guest FROM machines_view WHERE id = machine_id) AS host_or_guest,'
+ 'process_id,'
+ 'pid,'
+ 'tid'
+ ' FROM threads')
+
+do_query(query, 'CREATE VIEW comm_threads_view AS '
+ 'SELECT '
+ 'comm_id,'
+ '(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+ 'thread_id,'
+ '(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+ '(SELECT tid FROM threads WHERE id = thread_id) AS tid'
+ ' FROM comm_threads')
+
+if perf_db_export_calls or perf_db_export_callchains:
+ do_query(query, 'CREATE VIEW call_paths_view AS '
+ 'SELECT '
+ 'c.id,'
+ + emit_to_hex('c.ip') + ' AS ip,'
+ 'c.symbol_id,'
+ '(SELECT name FROM symbols WHERE id = c.symbol_id) AS symbol,'
+ '(SELECT dso_id FROM symbols WHERE id = c.symbol_id) AS dso_id,'
+ '(SELECT dso FROM symbols_view WHERE id = c.symbol_id) AS dso_short_name,'
+ 'c.parent_id,'
+ + emit_to_hex('p.ip') + ' AS parent_ip,'
+ 'p.symbol_id AS parent_symbol_id,'
+ '(SELECT name FROM symbols WHERE id = p.symbol_id) AS parent_symbol,'
+ '(SELECT dso_id FROM symbols WHERE id = p.symbol_id) AS parent_dso_id,'
+ '(SELECT dso FROM symbols_view WHERE id = p.symbol_id) AS parent_dso_short_name'
+ ' FROM call_paths c INNER JOIN call_paths p ON p.id = c.parent_id')
+if perf_db_export_calls:
+ do_query(query, 'CREATE VIEW calls_view AS '
+ 'SELECT '
+ 'calls.id,'
+ 'thread_id,'
+ '(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+ '(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
+ '(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+ 'call_path_id,'
+ + emit_to_hex('ip') + ' AS ip,'
+ 'symbol_id,'
+ '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
+ 'call_time,'
+ 'return_time,'
+ 'return_time - call_time AS elapsed_time,'
+ 'branch_count,'
+ 'call_id,'
+ 'return_id,'
+ 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,'
+ 'parent_call_path_id'
+ ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id')
+
+do_query(query, 'CREATE VIEW samples_view AS '
+ 'SELECT '
+ 'id,'
+ 'time,'
+ 'cpu,'
+ '(SELECT pid FROM threads WHERE id = thread_id) AS pid,'
+ '(SELECT tid FROM threads WHERE id = thread_id) AS tid,'
+ '(SELECT comm FROM comms WHERE id = comm_id) AS command,'
+ '(SELECT name FROM selected_events WHERE id = evsel_id) AS event,'
+ + emit_to_hex('ip') + ' AS ip_hex,'
+ '(SELECT name FROM symbols WHERE id = symbol_id) AS symbol,'
+ 'sym_offset,'
+ '(SELECT short_name FROM dsos WHERE id = dso_id) AS dso_short_name,'
+ + emit_to_hex('to_ip') + ' AS to_ip_hex,'
+ '(SELECT name FROM symbols WHERE id = to_symbol_id) AS to_symbol,'
+ 'to_sym_offset,'
+ '(SELECT short_name FROM dsos WHERE id = to_dso_id) AS to_dso_short_name,'
+ '(SELECT name FROM branch_types WHERE id = branch_type) AS branch_type_name,'
+ 'in_tx'
+ ' FROM samples')
+
+do_query(query, 'END TRANSACTION')
+
+evsel_query = QSqlQuery(db)
+evsel_query.prepare("INSERT INTO selected_events VALUES (?, ?)")
+machine_query = QSqlQuery(db)
+machine_query.prepare("INSERT INTO machines VALUES (?, ?, ?)")
+thread_query = QSqlQuery(db)
+thread_query.prepare("INSERT INTO threads VALUES (?, ?, ?, ?, ?)")
+comm_query = QSqlQuery(db)
+comm_query.prepare("INSERT INTO comms VALUES (?, ?)")
+comm_thread_query = QSqlQuery(db)
+comm_thread_query.prepare("INSERT INTO comm_threads VALUES (?, ?, ?)")
+dso_query = QSqlQuery(db)
+dso_query.prepare("INSERT INTO dsos VALUES (?, ?, ?, ?, ?)")
+symbol_query = QSqlQuery(db)
+symbol_query.prepare("INSERT INTO symbols VALUES (?, ?, ?, ?, ?, ?)")
+branch_type_query = QSqlQuery(db)
+branch_type_query.prepare("INSERT INTO branch_types VALUES (?, ?)")
+sample_query = QSqlQuery(db)
+if branches:
+ sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
+else:
+ sample_query.prepare("INSERT INTO samples VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
+if perf_db_export_calls or perf_db_export_callchains:
+ call_path_query = QSqlQuery(db)
+ call_path_query.prepare("INSERT INTO call_paths VALUES (?, ?, ?, ?)")
+if perf_db_export_calls:
+ call_query = QSqlQuery(db)
+ call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)")
+
+def trace_begin():
+ print datetime.datetime.today(), "Writing records..."
+ do_query(query, 'BEGIN TRANSACTION')
+ # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs
+ evsel_table(0, "unknown")
+ machine_table(0, 0, "unknown")
+ thread_table(0, 0, 0, -1, -1)
+ comm_table(0, "unknown")
+ dso_table(0, 0, "unknown", "unknown", "")
+ symbol_table(0, 0, 0, 0, 0, "unknown")
+ sample_table(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+ if perf_db_export_calls or perf_db_export_callchains:
+ call_path_table(0, 0, 0, 0)
+
+unhandled_count = 0
+
+def trace_end():
+ do_query(query, 'END TRANSACTION')
+
+ print datetime.datetime.today(), "Adding indexes"
+ if perf_db_export_calls:
+ do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)')
+
+ if (unhandled_count):
+ print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events"
+ print datetime.datetime.today(), "Done"
+
+def trace_unhandled(event_name, context, event_fields_dict):
+ global unhandled_count
+ unhandled_count += 1
+
+def sched__sched_switch(*x):
+ pass
+
+def bind_exec(q, n, x):
+ for xx in x[0:n]:
+ q.addBindValue(str(xx))
+ do_query_(q)
+
+def evsel_table(*x):
+ bind_exec(evsel_query, 2, x)
+
+def machine_table(*x):
+ bind_exec(machine_query, 3, x)
+
+def thread_table(*x):
+ bind_exec(thread_query, 5, x)
+
+def comm_table(*x):
+ bind_exec(comm_query, 2, x)
+
+def comm_thread_table(*x):
+ bind_exec(comm_thread_query, 3, x)
+
+def dso_table(*x):
+ bind_exec(dso_query, 5, x)
+
+def symbol_table(*x):
+ bind_exec(symbol_query, 6, x)
+
+def branch_type_table(*x):
+ bind_exec(branch_type_query, 2, x)
+
+def sample_table(*x):
+ if branches:
+ for xx in x[0:15]:
+ sample_query.addBindValue(str(xx))
+ for xx in x[19:22]:
+ sample_query.addBindValue(str(xx))
+ do_query_(sample_query)
+ else:
+ bind_exec(sample_query, 22, x)
+
+def call_path_table(*x):
+ bind_exec(call_path_query, 4, x)
+
+def call_return_table(*x):
+ bind_exec(call_query, 11, x)
diff --git a/tools/perf/scripts/python/failed-syscalls-by-pid.py b/tools/perf/scripts/python/failed-syscalls-by-pid.py
new file mode 100644
index 000000000..cafeff3d7
--- /dev/null
+++ b/tools/perf/scripts/python/failed-syscalls-by-pid.py
@@ -0,0 +1,78 @@
+# failed system call counts, by pid
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Displays system-wide failed system call totals, broken down by pid.
+# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+usage = "perf script -s syscall-counts-by-pid.py [comm|pid]\n";
+
+for_comm = None
+for_pid = None
+
+if len(sys.argv) > 2:
+ sys.exit(usage)
+
+if len(sys.argv) > 1:
+ try:
+ for_pid = int(sys.argv[1])
+ except:
+ for_comm = sys.argv[1]
+
+syscalls = autodict()
+
+def trace_begin():
+ print "Press control+C to stop and show the summary"
+
+def trace_end():
+ print_error_totals()
+
+def raw_syscalls__sys_exit(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, id, ret):
+ if (for_comm and common_comm != for_comm) or \
+ (for_pid and common_pid != for_pid ):
+ return
+
+ if ret < 0:
+ try:
+ syscalls[common_comm][common_pid][id][ret] += 1
+ except TypeError:
+ syscalls[common_comm][common_pid][id][ret] = 1
+
+def syscalls__sys_exit(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ id, ret):
+ raw_syscalls__sys_exit(**locals())
+
+def print_error_totals():
+ if for_comm is not None:
+ print "\nsyscall errors for %s:\n\n" % (for_comm),
+ else:
+ print "\nsyscall errors:\n\n",
+
+ print "%-30s %10s\n" % ("comm [pid]", "count"),
+ print "%-30s %10s\n" % ("------------------------------", \
+ "----------"),
+
+ comm_keys = syscalls.keys()
+ for comm in comm_keys:
+ pid_keys = syscalls[comm].keys()
+ for pid in pid_keys:
+ print "\n%s [%d]\n" % (comm, pid),
+ id_keys = syscalls[comm][pid].keys()
+ for id in id_keys:
+ print " syscall: %-16s\n" % syscall_name(id),
+ ret_keys = syscalls[comm][pid][id].keys()
+ for ret, val in sorted(syscalls[comm][pid][id].iteritems(), key = lambda(k, v): (v, k), reverse = True):
+ print " err = %-20s %10d\n" % (strerror(ret), val),
diff --git a/tools/perf/scripts/python/futex-contention.py b/tools/perf/scripts/python/futex-contention.py
new file mode 100644
index 000000000..0f5cf437b
--- /dev/null
+++ b/tools/perf/scripts/python/futex-contention.py
@@ -0,0 +1,50 @@
+# futex contention
+# (c) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Translation of:
+#
+# http://sourceware.org/systemtap/wiki/WSFutexContention
+#
+# to perf python scripting.
+#
+# Measures futex contention
+
+import os, sys
+sys.path.append(os.environ['PERF_EXEC_PATH'] + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+from Util import *
+
+process_names = {}
+thread_thislock = {}
+thread_blocktime = {}
+
+lock_waits = {} # long-lived stats on (tid,lock) blockage elapsed time
+process_names = {} # long-lived pid-to-execname mapping
+
+def syscalls__sys_enter_futex(event, ctxt, cpu, s, ns, tid, comm, callchain,
+ nr, uaddr, op, val, utime, uaddr2, val3):
+ cmd = op & FUTEX_CMD_MASK
+ if cmd != FUTEX_WAIT:
+ return # we don't care about originators of WAKE events
+
+ process_names[tid] = comm
+ thread_thislock[tid] = uaddr
+ thread_blocktime[tid] = nsecs(s, ns)
+
+def syscalls__sys_exit_futex(event, ctxt, cpu, s, ns, tid, comm, callchain,
+ nr, ret):
+ if thread_blocktime.has_key(tid):
+ elapsed = nsecs(s, ns) - thread_blocktime[tid]
+ add_stats(lock_waits, (tid, thread_thislock[tid]), elapsed)
+ del thread_blocktime[tid]
+ del thread_thislock[tid]
+
+def trace_begin():
+ print "Press control+C to stop and show the summary"
+
+def trace_end():
+ for (tid, lock) in lock_waits:
+ min, max, avg, count = lock_waits[tid, lock]
+ print "%s[%d] lock %x contended %d times, %d avg ns" % \
+ (process_names[tid], tid, lock, count, avg)
+
diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py
new file mode 100644
index 000000000..b19172d67
--- /dev/null
+++ b/tools/perf/scripts/python/intel-pt-events.py
@@ -0,0 +1,128 @@
+# intel-pt-events.py: Print Intel PT Power Events and PTWRITE
+# Copyright (c) 2017, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+
+import os
+import sys
+import struct
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+# These perf imports are not used at present
+#from perf_trace_context import *
+#from Core import *
+
+def trace_begin():
+ print "Intel PT Power Events and PTWRITE"
+
+def trace_end():
+ print "End"
+
+def trace_unhandled(event_name, context, event_fields_dict):
+ print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])
+
+def print_ptwrite(raw_buf):
+ data = struct.unpack_from("<IQ", raw_buf)
+ flags = data[0]
+ payload = data[1]
+ exact_ip = flags & 1
+ print "IP: %u payload: %#x" % (exact_ip, payload),
+
+def print_cbr(raw_buf):
+ data = struct.unpack_from("<BBBBII", raw_buf)
+ cbr = data[0]
+ f = (data[4] + 500) / 1000
+ p = ((cbr * 1000 / data[2]) + 5) / 10
+ print "%3u freq: %4u MHz (%3u%%)" % (cbr, f, p),
+
+def print_mwait(raw_buf):
+ data = struct.unpack_from("<IQ", raw_buf)
+ payload = data[1]
+ hints = payload & 0xff
+ extensions = (payload >> 32) & 0x3
+ print "hints: %#x extensions: %#x" % (hints, extensions),
+
+def print_pwre(raw_buf):
+ data = struct.unpack_from("<IQ", raw_buf)
+ payload = data[1]
+ hw = (payload >> 7) & 1
+ cstate = (payload >> 12) & 0xf
+ subcstate = (payload >> 8) & 0xf
+ print "hw: %u cstate: %u sub-cstate: %u" % (hw, cstate, subcstate),
+
+def print_exstop(raw_buf):
+ data = struct.unpack_from("<I", raw_buf)
+ flags = data[0]
+ exact_ip = flags & 1
+ print "IP: %u" % (exact_ip),
+
+def print_pwrx(raw_buf):
+ data = struct.unpack_from("<IQ", raw_buf)
+ payload = data[1]
+ deepest_cstate = payload & 0xf
+ last_cstate = (payload >> 4) & 0xf
+ wake_reason = (payload >> 8) & 0xf
+ print "deepest cstate: %u last cstate: %u wake reason: %#x" % (deepest_cstate, last_cstate, wake_reason),
+
+def print_common_start(comm, sample, name):
+ ts = sample["time"]
+ cpu = sample["cpu"]
+ pid = sample["pid"]
+ tid = sample["tid"]
+ print "%16s %5u/%-5u [%03u] %9u.%09u %7s:" % (comm, pid, tid, cpu, ts / 1000000000, ts %1000000000, name),
+
+def print_common_ip(sample, symbol, dso):
+ ip = sample["ip"]
+ print "%16x %s (%s)" % (ip, symbol, dso)
+
+def process_event(param_dict):
+ event_attr = param_dict["attr"]
+ sample = param_dict["sample"]
+ raw_buf = param_dict["raw_buf"]
+ comm = param_dict["comm"]
+ name = param_dict["ev_name"]
+
+ # Symbol and dso info are not always resolved
+ if (param_dict.has_key("dso")):
+ dso = param_dict["dso"]
+ else:
+ dso = "[unknown]"
+
+ if (param_dict.has_key("symbol")):
+ symbol = param_dict["symbol"]
+ else:
+ symbol = "[unknown]"
+
+ if name == "ptwrite":
+ print_common_start(comm, sample, name)
+ print_ptwrite(raw_buf)
+ print_common_ip(sample, symbol, dso)
+ elif name == "cbr":
+ print_common_start(comm, sample, name)
+ print_cbr(raw_buf)
+ print_common_ip(sample, symbol, dso)
+ elif name == "mwait":
+ print_common_start(comm, sample, name)
+ print_mwait(raw_buf)
+ print_common_ip(sample, symbol, dso)
+ elif name == "pwre":
+ print_common_start(comm, sample, name)
+ print_pwre(raw_buf)
+ print_common_ip(sample, symbol, dso)
+ elif name == "exstop":
+ print_common_start(comm, sample, name)
+ print_exstop(raw_buf)
+ print_common_ip(sample, symbol, dso)
+ elif name == "pwrx":
+ print_common_start(comm, sample, name)
+ print_pwrx(raw_buf)
+ print_common_ip(sample, symbol, dso)
diff --git a/tools/perf/scripts/python/mem-phys-addr.py b/tools/perf/scripts/python/mem-phys-addr.py
new file mode 100644
index 000000000..ebee2c5ae
--- /dev/null
+++ b/tools/perf/scripts/python/mem-phys-addr.py
@@ -0,0 +1,95 @@
+# mem-phys-addr.py: Resolve physical address samples
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2018, Intel Corporation.
+
+from __future__ import division
+import os
+import sys
+import struct
+import re
+import bisect
+import collections
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+#physical address ranges for System RAM
+system_ram = []
+#physical address ranges for Persistent Memory
+pmem = []
+#file object for proc iomem
+f = None
+#Count for each type of memory
+load_mem_type_cnt = collections.Counter()
+#perf event name
+event_name = None
+
+def parse_iomem():
+ global f
+ f = open('/proc/iomem', 'r')
+ for i, j in enumerate(f):
+ m = re.split('-|:',j,2)
+ if m[2].strip() == 'System RAM':
+ system_ram.append(long(m[0], 16))
+ system_ram.append(long(m[1], 16))
+ if m[2].strip() == 'Persistent Memory':
+ pmem.append(long(m[0], 16))
+ pmem.append(long(m[1], 16))
+
+def print_memory_type():
+ print "Event: %s" % (event_name)
+ print "%-40s %10s %10s\n" % ("Memory type", "count", "percentage"),
+ print "%-40s %10s %10s\n" % ("----------------------------------------", \
+ "-----------", "-----------"),
+ total = sum(load_mem_type_cnt.values())
+ for mem_type, count in sorted(load_mem_type_cnt.most_common(), \
+ key = lambda(k, v): (v, k), reverse = True):
+ print "%-40s %10d %10.1f%%\n" % (mem_type, count, 100 * count / total),
+
+def trace_begin():
+ parse_iomem()
+
+def trace_end():
+ print_memory_type()
+ f.close()
+
+def is_system_ram(phys_addr):
+ #/proc/iomem is sorted
+ position = bisect.bisect(system_ram, phys_addr)
+ if position % 2 == 0:
+ return False
+ return True
+
+def is_persistent_mem(phys_addr):
+ position = bisect.bisect(pmem, phys_addr)
+ if position % 2 == 0:
+ return False
+ return True
+
+def find_memory_type(phys_addr):
+ if phys_addr == 0:
+ return "N/A"
+ if is_system_ram(phys_addr):
+ return "System RAM"
+
+ if is_persistent_mem(phys_addr):
+ return "Persistent Memory"
+
+ #slow path, search all
+ f.seek(0, 0)
+ for j in f:
+ m = re.split('-|:',j,2)
+ if long(m[0], 16) <= phys_addr <= long(m[1], 16):
+ return m[2]
+ return "N/A"
+
+def process_event(param_dict):
+ name = param_dict["ev_name"]
+ sample = param_dict["sample"]
+ phys_addr = sample["phys_addr"]
+
+ global event_name
+ if event_name == None:
+ event_name = name
+ load_mem_type_cnt[find_memory_type(phys_addr)] += 1
diff --git a/tools/perf/scripts/python/net_dropmonitor.py b/tools/perf/scripts/python/net_dropmonitor.py
new file mode 100755
index 000000000..a150164b4
--- /dev/null
+++ b/tools/perf/scripts/python/net_dropmonitor.py
@@ -0,0 +1,76 @@
+# Monitor the system for dropped packets and proudce a report of drop locations and counts
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+drop_log = {}
+kallsyms = []
+
+def get_kallsyms_table():
+ global kallsyms
+
+ try:
+ f = open("/proc/kallsyms", "r")
+ except:
+ return
+
+ for line in f:
+ loc = int(line.split()[0], 16)
+ name = line.split()[2]
+ kallsyms.append((loc, name))
+ kallsyms.sort()
+
+def get_sym(sloc):
+ loc = int(sloc)
+
+ # Invariant: kallsyms[i][0] <= loc for all 0 <= i <= start
+ # kallsyms[i][0] > loc for all end <= i < len(kallsyms)
+ start, end = -1, len(kallsyms)
+ while end != start + 1:
+ pivot = (start + end) // 2
+ if loc < kallsyms[pivot][0]:
+ end = pivot
+ else:
+ start = pivot
+
+ # Now (start == -1 or kallsyms[start][0] <= loc)
+ # and (start == len(kallsyms) - 1 or loc < kallsyms[start + 1][0])
+ if start >= 0:
+ symloc, name = kallsyms[start]
+ return (name, loc - symloc)
+ else:
+ return (None, 0)
+
+def print_drop_table():
+ print "%25s %25s %25s" % ("LOCATION", "OFFSET", "COUNT")
+ for i in drop_log.keys():
+ (sym, off) = get_sym(i)
+ if sym == None:
+ sym = i
+ print "%25s %25s %25s" % (sym, off, drop_log[i])
+
+
+def trace_begin():
+ print "Starting trace (Ctrl-C to dump results)"
+
+def trace_end():
+ print "Gathering kallsyms data"
+ get_kallsyms_table()
+ print_drop_table()
+
+# called from perf, when it finds a correspoinding event
+def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, callchain,
+ skbaddr, location, protocol):
+ slocation = str(location)
+ try:
+ drop_log[slocation] = drop_log[slocation] + 1
+ except:
+ drop_log[slocation] = 1
diff --git a/tools/perf/scripts/python/netdev-times.py b/tools/perf/scripts/python/netdev-times.py
new file mode 100644
index 000000000..9b2050f77
--- /dev/null
+++ b/tools/perf/scripts/python/netdev-times.py
@@ -0,0 +1,468 @@
+# Display a process of packets and processed time.
+# SPDX-License-Identifier: GPL-2.0
+# It helps us to investigate networking or network device.
+#
+# options
+# tx: show only tx chart
+# rx: show only rx chart
+# dev=: show only thing related to specified device
+# debug: work with debug mode. It shows buffer status.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+all_event_list = []; # insert all tracepoint event related with this script
+irq_dic = {}; # key is cpu and value is a list which stacks irqs
+ # which raise NET_RX softirq
+net_rx_dic = {}; # key is cpu and value include time of NET_RX softirq-entry
+ # and a list which stacks receive
+receive_hunk_list = []; # a list which include a sequence of receive events
+rx_skb_list = []; # received packet list for matching
+ # skb_copy_datagram_iovec
+
+buffer_budget = 65536; # the budget of rx_skb_list, tx_queue_list and
+ # tx_xmit_list
+of_count_rx_skb_list = 0; # overflow count
+
+tx_queue_list = []; # list of packets which pass through dev_queue_xmit
+of_count_tx_queue_list = 0; # overflow count
+
+tx_xmit_list = []; # list of packets which pass through dev_hard_start_xmit
+of_count_tx_xmit_list = 0; # overflow count
+
+tx_free_list = []; # list of packets which is freed
+
+# options
+show_tx = 0;
+show_rx = 0;
+dev = 0; # store a name of device specified by option "dev="
+debug = 0;
+
+# indices of event_info tuple
+EINFO_IDX_NAME= 0
+EINFO_IDX_CONTEXT=1
+EINFO_IDX_CPU= 2
+EINFO_IDX_TIME= 3
+EINFO_IDX_PID= 4
+EINFO_IDX_COMM= 5
+
+# Calculate a time interval(msec) from src(nsec) to dst(nsec)
+def diff_msec(src, dst):
+ return (dst - src) / 1000000.0
+
+# Display a process of transmitting a packet
+def print_transmit(hunk):
+ if dev != 0 and hunk['dev'].find(dev) < 0:
+ return
+ print "%7s %5d %6d.%06dsec %12.3fmsec %12.3fmsec" % \
+ (hunk['dev'], hunk['len'],
+ nsecs_secs(hunk['queue_t']),
+ nsecs_nsecs(hunk['queue_t'])/1000,
+ diff_msec(hunk['queue_t'], hunk['xmit_t']),
+ diff_msec(hunk['xmit_t'], hunk['free_t']))
+
+# Format for displaying rx packet processing
+PF_IRQ_ENTRY= " irq_entry(+%.3fmsec irq=%d:%s)"
+PF_SOFT_ENTRY=" softirq_entry(+%.3fmsec)"
+PF_NAPI_POLL= " napi_poll_exit(+%.3fmsec %s)"
+PF_JOINT= " |"
+PF_WJOINT= " | |"
+PF_NET_RECV= " |---netif_receive_skb(+%.3fmsec skb=%x len=%d)"
+PF_NET_RX= " |---netif_rx(+%.3fmsec skb=%x)"
+PF_CPY_DGRAM= " | skb_copy_datagram_iovec(+%.3fmsec %d:%s)"
+PF_KFREE_SKB= " | kfree_skb(+%.3fmsec location=%x)"
+PF_CONS_SKB= " | consume_skb(+%.3fmsec)"
+
+# Display a process of received packets and interrputs associated with
+# a NET_RX softirq
+def print_receive(hunk):
+ show_hunk = 0
+ irq_list = hunk['irq_list']
+ cpu = irq_list[0]['cpu']
+ base_t = irq_list[0]['irq_ent_t']
+ # check if this hunk should be showed
+ if dev != 0:
+ for i in range(len(irq_list)):
+ if irq_list[i]['name'].find(dev) >= 0:
+ show_hunk = 1
+ break
+ else:
+ show_hunk = 1
+ if show_hunk == 0:
+ return
+
+ print "%d.%06dsec cpu=%d" % \
+ (nsecs_secs(base_t), nsecs_nsecs(base_t)/1000, cpu)
+ for i in range(len(irq_list)):
+ print PF_IRQ_ENTRY % \
+ (diff_msec(base_t, irq_list[i]['irq_ent_t']),
+ irq_list[i]['irq'], irq_list[i]['name'])
+ print PF_JOINT
+ irq_event_list = irq_list[i]['event_list']
+ for j in range(len(irq_event_list)):
+ irq_event = irq_event_list[j]
+ if irq_event['event'] == 'netif_rx':
+ print PF_NET_RX % \
+ (diff_msec(base_t, irq_event['time']),
+ irq_event['skbaddr'])
+ print PF_JOINT
+ print PF_SOFT_ENTRY % \
+ diff_msec(base_t, hunk['sirq_ent_t'])
+ print PF_JOINT
+ event_list = hunk['event_list']
+ for i in range(len(event_list)):
+ event = event_list[i]
+ if event['event_name'] == 'napi_poll':
+ print PF_NAPI_POLL % \
+ (diff_msec(base_t, event['event_t']), event['dev'])
+ if i == len(event_list) - 1:
+ print ""
+ else:
+ print PF_JOINT
+ else:
+ print PF_NET_RECV % \
+ (diff_msec(base_t, event['event_t']), event['skbaddr'],
+ event['len'])
+ if 'comm' in event.keys():
+ print PF_WJOINT
+ print PF_CPY_DGRAM % \
+ (diff_msec(base_t, event['comm_t']),
+ event['pid'], event['comm'])
+ elif 'handle' in event.keys():
+ print PF_WJOINT
+ if event['handle'] == "kfree_skb":
+ print PF_KFREE_SKB % \
+ (diff_msec(base_t,
+ event['comm_t']),
+ event['location'])
+ elif event['handle'] == "consume_skb":
+ print PF_CONS_SKB % \
+ diff_msec(base_t,
+ event['comm_t'])
+ print PF_JOINT
+
+def trace_begin():
+ global show_tx
+ global show_rx
+ global dev
+ global debug
+
+ for i in range(len(sys.argv)):
+ if i == 0:
+ continue
+ arg = sys.argv[i]
+ if arg == 'tx':
+ show_tx = 1
+ elif arg =='rx':
+ show_rx = 1
+ elif arg.find('dev=',0, 4) >= 0:
+ dev = arg[4:]
+ elif arg == 'debug':
+ debug = 1
+ if show_tx == 0 and show_rx == 0:
+ show_tx = 1
+ show_rx = 1
+
+def trace_end():
+ # order all events in time
+ all_event_list.sort(lambda a,b :cmp(a[EINFO_IDX_TIME],
+ b[EINFO_IDX_TIME]))
+ # process all events
+ for i in range(len(all_event_list)):
+ event_info = all_event_list[i]
+ name = event_info[EINFO_IDX_NAME]
+ if name == 'irq__softirq_exit':
+ handle_irq_softirq_exit(event_info)
+ elif name == 'irq__softirq_entry':
+ handle_irq_softirq_entry(event_info)
+ elif name == 'irq__softirq_raise':
+ handle_irq_softirq_raise(event_info)
+ elif name == 'irq__irq_handler_entry':
+ handle_irq_handler_entry(event_info)
+ elif name == 'irq__irq_handler_exit':
+ handle_irq_handler_exit(event_info)
+ elif name == 'napi__napi_poll':
+ handle_napi_poll(event_info)
+ elif name == 'net__netif_receive_skb':
+ handle_netif_receive_skb(event_info)
+ elif name == 'net__netif_rx':
+ handle_netif_rx(event_info)
+ elif name == 'skb__skb_copy_datagram_iovec':
+ handle_skb_copy_datagram_iovec(event_info)
+ elif name == 'net__net_dev_queue':
+ handle_net_dev_queue(event_info)
+ elif name == 'net__net_dev_xmit':
+ handle_net_dev_xmit(event_info)
+ elif name == 'skb__kfree_skb':
+ handle_kfree_skb(event_info)
+ elif name == 'skb__consume_skb':
+ handle_consume_skb(event_info)
+ # display receive hunks
+ if show_rx:
+ for i in range(len(receive_hunk_list)):
+ print_receive(receive_hunk_list[i])
+ # display transmit hunks
+ if show_tx:
+ print " dev len Qdisc " \
+ " netdevice free"
+ for i in range(len(tx_free_list)):
+ print_transmit(tx_free_list[i])
+ if debug:
+ print "debug buffer status"
+ print "----------------------------"
+ print "xmit Qdisc:remain:%d overflow:%d" % \
+ (len(tx_queue_list), of_count_tx_queue_list)
+ print "xmit netdevice:remain:%d overflow:%d" % \
+ (len(tx_xmit_list), of_count_tx_xmit_list)
+ print "receive:remain:%d overflow:%d" % \
+ (len(rx_skb_list), of_count_rx_skb_list)
+
+# called from perf, when it finds a correspoinding event
+def irq__softirq_entry(name, context, cpu, sec, nsec, pid, comm, callchain, vec):
+ if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
+ return
+ event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
+ all_event_list.append(event_info)
+
+def irq__softirq_exit(name, context, cpu, sec, nsec, pid, comm, callchain, vec):
+ if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
+ return
+ event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
+ all_event_list.append(event_info)
+
+def irq__softirq_raise(name, context, cpu, sec, nsec, pid, comm, callchain, vec):
+ if symbol_str("irq__softirq_entry", "vec", vec) != "NET_RX":
+ return
+ event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, vec)
+ all_event_list.append(event_info)
+
+def irq__irq_handler_entry(name, context, cpu, sec, nsec, pid, comm,
+ callchain, irq, irq_name):
+ event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+ irq, irq_name)
+ all_event_list.append(event_info)
+
+def irq__irq_handler_exit(name, context, cpu, sec, nsec, pid, comm, callchain, irq, ret):
+ event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm, irq, ret)
+ all_event_list.append(event_info)
+
+def napi__napi_poll(name, context, cpu, sec, nsec, pid, comm, callchain, napi,
+ dev_name, work=None, budget=None):
+ event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+ napi, dev_name, work, budget)
+ all_event_list.append(event_info)
+
+def net__netif_receive_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr,
+ skblen, dev_name):
+ event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+ skbaddr, skblen, dev_name)
+ all_event_list.append(event_info)
+
+def net__netif_rx(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr,
+ skblen, dev_name):
+ event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+ skbaddr, skblen, dev_name)
+ all_event_list.append(event_info)
+
+def net__net_dev_queue(name, context, cpu, sec, nsec, pid, comm, callchain,
+ skbaddr, skblen, dev_name):
+ event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+ skbaddr, skblen, dev_name)
+ all_event_list.append(event_info)
+
+def net__net_dev_xmit(name, context, cpu, sec, nsec, pid, comm, callchain,
+ skbaddr, skblen, rc, dev_name):
+ event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+ skbaddr, skblen, rc ,dev_name)
+ all_event_list.append(event_info)
+
+def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, callchain,
+ skbaddr, protocol, location):
+ event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+ skbaddr, protocol, location)
+ all_event_list.append(event_info)
+
+def skb__consume_skb(name, context, cpu, sec, nsec, pid, comm, callchain, skbaddr):
+ event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+ skbaddr)
+ all_event_list.append(event_info)
+
+def skb__skb_copy_datagram_iovec(name, context, cpu, sec, nsec, pid, comm, callchain,
+ skbaddr, skblen):
+ event_info = (name, context, cpu, nsecs(sec, nsec), pid, comm,
+ skbaddr, skblen)
+ all_event_list.append(event_info)
+
+def handle_irq_handler_entry(event_info):
+ (name, context, cpu, time, pid, comm, irq, irq_name) = event_info
+ if cpu not in irq_dic.keys():
+ irq_dic[cpu] = []
+ irq_record = {'irq':irq, 'name':irq_name, 'cpu':cpu, 'irq_ent_t':time}
+ irq_dic[cpu].append(irq_record)
+
+def handle_irq_handler_exit(event_info):
+ (name, context, cpu, time, pid, comm, irq, ret) = event_info
+ if cpu not in irq_dic.keys():
+ return
+ irq_record = irq_dic[cpu].pop()
+ if irq != irq_record['irq']:
+ return
+ irq_record.update({'irq_ext_t':time})
+ # if an irq doesn't include NET_RX softirq, drop.
+ if 'event_list' in irq_record.keys():
+ irq_dic[cpu].append(irq_record)
+
+def handle_irq_softirq_raise(event_info):
+ (name, context, cpu, time, pid, comm, vec) = event_info
+ if cpu not in irq_dic.keys() \
+ or len(irq_dic[cpu]) == 0:
+ return
+ irq_record = irq_dic[cpu].pop()
+ if 'event_list' in irq_record.keys():
+ irq_event_list = irq_record['event_list']
+ else:
+ irq_event_list = []
+ irq_event_list.append({'time':time, 'event':'sirq_raise'})
+ irq_record.update({'event_list':irq_event_list})
+ irq_dic[cpu].append(irq_record)
+
+def handle_irq_softirq_entry(event_info):
+ (name, context, cpu, time, pid, comm, vec) = event_info
+ net_rx_dic[cpu] = {'sirq_ent_t':time, 'event_list':[]}
+
+def handle_irq_softirq_exit(event_info):
+ (name, context, cpu, time, pid, comm, vec) = event_info
+ irq_list = []
+ event_list = 0
+ if cpu in irq_dic.keys():
+ irq_list = irq_dic[cpu]
+ del irq_dic[cpu]
+ if cpu in net_rx_dic.keys():
+ sirq_ent_t = net_rx_dic[cpu]['sirq_ent_t']
+ event_list = net_rx_dic[cpu]['event_list']
+ del net_rx_dic[cpu]
+ if irq_list == [] or event_list == 0:
+ return
+ rec_data = {'sirq_ent_t':sirq_ent_t, 'sirq_ext_t':time,
+ 'irq_list':irq_list, 'event_list':event_list}
+ # merge information realted to a NET_RX softirq
+ receive_hunk_list.append(rec_data)
+
+def handle_napi_poll(event_info):
+ (name, context, cpu, time, pid, comm, napi, dev_name,
+ work, budget) = event_info
+ if cpu in net_rx_dic.keys():
+ event_list = net_rx_dic[cpu]['event_list']
+ rec_data = {'event_name':'napi_poll',
+ 'dev':dev_name, 'event_t':time,
+ 'work':work, 'budget':budget}
+ event_list.append(rec_data)
+
+def handle_netif_rx(event_info):
+ (name, context, cpu, time, pid, comm,
+ skbaddr, skblen, dev_name) = event_info
+ if cpu not in irq_dic.keys() \
+ or len(irq_dic[cpu]) == 0:
+ return
+ irq_record = irq_dic[cpu].pop()
+ if 'event_list' in irq_record.keys():
+ irq_event_list = irq_record['event_list']
+ else:
+ irq_event_list = []
+ irq_event_list.append({'time':time, 'event':'netif_rx',
+ 'skbaddr':skbaddr, 'skblen':skblen, 'dev_name':dev_name})
+ irq_record.update({'event_list':irq_event_list})
+ irq_dic[cpu].append(irq_record)
+
+def handle_netif_receive_skb(event_info):
+ global of_count_rx_skb_list
+
+ (name, context, cpu, time, pid, comm,
+ skbaddr, skblen, dev_name) = event_info
+ if cpu in net_rx_dic.keys():
+ rec_data = {'event_name':'netif_receive_skb',
+ 'event_t':time, 'skbaddr':skbaddr, 'len':skblen}
+ event_list = net_rx_dic[cpu]['event_list']
+ event_list.append(rec_data)
+ rx_skb_list.insert(0, rec_data)
+ if len(rx_skb_list) > buffer_budget:
+ rx_skb_list.pop()
+ of_count_rx_skb_list += 1
+
+def handle_net_dev_queue(event_info):
+ global of_count_tx_queue_list
+
+ (name, context, cpu, time, pid, comm,
+ skbaddr, skblen, dev_name) = event_info
+ skb = {'dev':dev_name, 'skbaddr':skbaddr, 'len':skblen, 'queue_t':time}
+ tx_queue_list.insert(0, skb)
+ if len(tx_queue_list) > buffer_budget:
+ tx_queue_list.pop()
+ of_count_tx_queue_list += 1
+
+def handle_net_dev_xmit(event_info):
+ global of_count_tx_xmit_list
+
+ (name, context, cpu, time, pid, comm,
+ skbaddr, skblen, rc, dev_name) = event_info
+ if rc == 0: # NETDEV_TX_OK
+ for i in range(len(tx_queue_list)):
+ skb = tx_queue_list[i]
+ if skb['skbaddr'] == skbaddr:
+ skb['xmit_t'] = time
+ tx_xmit_list.insert(0, skb)
+ del tx_queue_list[i]
+ if len(tx_xmit_list) > buffer_budget:
+ tx_xmit_list.pop()
+ of_count_tx_xmit_list += 1
+ return
+
+def handle_kfree_skb(event_info):
+ (name, context, cpu, time, pid, comm,
+ skbaddr, protocol, location) = event_info
+ for i in range(len(tx_queue_list)):
+ skb = tx_queue_list[i]
+ if skb['skbaddr'] == skbaddr:
+ del tx_queue_list[i]
+ return
+ for i in range(len(tx_xmit_list)):
+ skb = tx_xmit_list[i]
+ if skb['skbaddr'] == skbaddr:
+ skb['free_t'] = time
+ tx_free_list.append(skb)
+ del tx_xmit_list[i]
+ return
+ for i in range(len(rx_skb_list)):
+ rec_data = rx_skb_list[i]
+ if rec_data['skbaddr'] == skbaddr:
+ rec_data.update({'handle':"kfree_skb",
+ 'comm':comm, 'pid':pid, 'comm_t':time})
+ del rx_skb_list[i]
+ return
+
+def handle_consume_skb(event_info):
+ (name, context, cpu, time, pid, comm, skbaddr) = event_info
+ for i in range(len(tx_xmit_list)):
+ skb = tx_xmit_list[i]
+ if skb['skbaddr'] == skbaddr:
+ skb['free_t'] = time
+ tx_free_list.append(skb)
+ del tx_xmit_list[i]
+ return
+
+def handle_skb_copy_datagram_iovec(event_info):
+ (name, context, cpu, time, pid, comm, skbaddr, skblen) = event_info
+ for i in range(len(rx_skb_list)):
+ rec_data = rx_skb_list[i]
+ if skbaddr == rec_data['skbaddr']:
+ rec_data.update({'handle':"skb_copy_datagram_iovec",
+ 'comm':comm, 'pid':pid, 'comm_t':time})
+ del rx_skb_list[i]
+ return
diff --git a/tools/perf/scripts/python/powerpc-hcalls.py b/tools/perf/scripts/python/powerpc-hcalls.py
new file mode 100644
index 000000000..00e0e7476
--- /dev/null
+++ b/tools/perf/scripts/python/powerpc-hcalls.py
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Copyright (C) 2018 Ravi Bangoria, IBM Corporation
+#
+# Hypervisor call statisics
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+# output: {
+# opcode: {
+# 'min': minimum time nsec
+# 'max': maximum time nsec
+# 'time': average time nsec
+# 'cnt': counter
+# } ...
+# }
+output = {}
+
+# d_enter: {
+# cpu: {
+# opcode: nsec
+# } ...
+# }
+d_enter = {}
+
+hcall_table = {
+ 4: 'H_REMOVE',
+ 8: 'H_ENTER',
+ 12: 'H_READ',
+ 16: 'H_CLEAR_MOD',
+ 20: 'H_CLEAR_REF',
+ 24: 'H_PROTECT',
+ 28: 'H_GET_TCE',
+ 32: 'H_PUT_TCE',
+ 36: 'H_SET_SPRG0',
+ 40: 'H_SET_DABR',
+ 44: 'H_PAGE_INIT',
+ 48: 'H_SET_ASR',
+ 52: 'H_ASR_ON',
+ 56: 'H_ASR_OFF',
+ 60: 'H_LOGICAL_CI_LOAD',
+ 64: 'H_LOGICAL_CI_STORE',
+ 68: 'H_LOGICAL_CACHE_LOAD',
+ 72: 'H_LOGICAL_CACHE_STORE',
+ 76: 'H_LOGICAL_ICBI',
+ 80: 'H_LOGICAL_DCBF',
+ 84: 'H_GET_TERM_CHAR',
+ 88: 'H_PUT_TERM_CHAR',
+ 92: 'H_REAL_TO_LOGICAL',
+ 96: 'H_HYPERVISOR_DATA',
+ 100: 'H_EOI',
+ 104: 'H_CPPR',
+ 108: 'H_IPI',
+ 112: 'H_IPOLL',
+ 116: 'H_XIRR',
+ 120: 'H_MIGRATE_DMA',
+ 124: 'H_PERFMON',
+ 220: 'H_REGISTER_VPA',
+ 224: 'H_CEDE',
+ 228: 'H_CONFER',
+ 232: 'H_PROD',
+ 236: 'H_GET_PPP',
+ 240: 'H_SET_PPP',
+ 244: 'H_PURR',
+ 248: 'H_PIC',
+ 252: 'H_REG_CRQ',
+ 256: 'H_FREE_CRQ',
+ 260: 'H_VIO_SIGNAL',
+ 264: 'H_SEND_CRQ',
+ 272: 'H_COPY_RDMA',
+ 276: 'H_REGISTER_LOGICAL_LAN',
+ 280: 'H_FREE_LOGICAL_LAN',
+ 284: 'H_ADD_LOGICAL_LAN_BUFFER',
+ 288: 'H_SEND_LOGICAL_LAN',
+ 292: 'H_BULK_REMOVE',
+ 304: 'H_MULTICAST_CTRL',
+ 308: 'H_SET_XDABR',
+ 312: 'H_STUFF_TCE',
+ 316: 'H_PUT_TCE_INDIRECT',
+ 332: 'H_CHANGE_LOGICAL_LAN_MAC',
+ 336: 'H_VTERM_PARTNER_INFO',
+ 340: 'H_REGISTER_VTERM',
+ 344: 'H_FREE_VTERM',
+ 348: 'H_RESET_EVENTS',
+ 352: 'H_ALLOC_RESOURCE',
+ 356: 'H_FREE_RESOURCE',
+ 360: 'H_MODIFY_QP',
+ 364: 'H_QUERY_QP',
+ 368: 'H_REREGISTER_PMR',
+ 372: 'H_REGISTER_SMR',
+ 376: 'H_QUERY_MR',
+ 380: 'H_QUERY_MW',
+ 384: 'H_QUERY_HCA',
+ 388: 'H_QUERY_PORT',
+ 392: 'H_MODIFY_PORT',
+ 396: 'H_DEFINE_AQP1',
+ 400: 'H_GET_TRACE_BUFFER',
+ 404: 'H_DEFINE_AQP0',
+ 408: 'H_RESIZE_MR',
+ 412: 'H_ATTACH_MCQP',
+ 416: 'H_DETACH_MCQP',
+ 420: 'H_CREATE_RPT',
+ 424: 'H_REMOVE_RPT',
+ 428: 'H_REGISTER_RPAGES',
+ 432: 'H_DISABLE_AND_GETC',
+ 436: 'H_ERROR_DATA',
+ 440: 'H_GET_HCA_INFO',
+ 444: 'H_GET_PERF_COUNT',
+ 448: 'H_MANAGE_TRACE',
+ 468: 'H_FREE_LOGICAL_LAN_BUFFER',
+ 472: 'H_POLL_PENDING',
+ 484: 'H_QUERY_INT_STATE',
+ 580: 'H_ILLAN_ATTRIBUTES',
+ 592: 'H_MODIFY_HEA_QP',
+ 596: 'H_QUERY_HEA_QP',
+ 600: 'H_QUERY_HEA',
+ 604: 'H_QUERY_HEA_PORT',
+ 608: 'H_MODIFY_HEA_PORT',
+ 612: 'H_REG_BCMC',
+ 616: 'H_DEREG_BCMC',
+ 620: 'H_REGISTER_HEA_RPAGES',
+ 624: 'H_DISABLE_AND_GET_HEA',
+ 628: 'H_GET_HEA_INFO',
+ 632: 'H_ALLOC_HEA_RESOURCE',
+ 644: 'H_ADD_CONN',
+ 648: 'H_DEL_CONN',
+ 664: 'H_JOIN',
+ 676: 'H_VASI_STATE',
+ 688: 'H_ENABLE_CRQ',
+ 696: 'H_GET_EM_PARMS',
+ 720: 'H_SET_MPP',
+ 724: 'H_GET_MPP',
+ 748: 'H_HOME_NODE_ASSOCIATIVITY',
+ 756: 'H_BEST_ENERGY',
+ 764: 'H_XIRR_X',
+ 768: 'H_RANDOM',
+ 772: 'H_COP',
+ 788: 'H_GET_MPP_X',
+ 796: 'H_SET_MODE',
+ 61440: 'H_RTAS',
+}
+
+def hcall_table_lookup(opcode):
+ if (hcall_table.has_key(opcode)):
+ return hcall_table[opcode]
+ else:
+ return opcode
+
+print_ptrn = '%-28s%10s%10s%10s%10s'
+
+def trace_end():
+ print print_ptrn % ('hcall', 'count', 'min(ns)', 'max(ns)', 'avg(ns)')
+ print '-' * 68
+ for opcode in output:
+ h_name = hcall_table_lookup(opcode)
+ time = output[opcode]['time']
+ cnt = output[opcode]['cnt']
+ min_t = output[opcode]['min']
+ max_t = output[opcode]['max']
+
+ print print_ptrn % (h_name, cnt, min_t, max_t, time/cnt)
+
+def powerpc__hcall_exit(name, context, cpu, sec, nsec, pid, comm, callchain,
+ opcode, retval):
+ if (d_enter.has_key(cpu) and d_enter[cpu].has_key(opcode)):
+ diff = nsecs(sec, nsec) - d_enter[cpu][opcode]
+
+ if (output.has_key(opcode)):
+ output[opcode]['time'] += diff
+ output[opcode]['cnt'] += 1
+ if (output[opcode]['min'] > diff):
+ output[opcode]['min'] = diff
+ if (output[opcode]['max'] < diff):
+ output[opcode]['max'] = diff
+ else:
+ output[opcode] = {
+ 'time': diff,
+ 'cnt': 1,
+ 'min': diff,
+ 'max': diff,
+ }
+
+ del d_enter[cpu][opcode]
+# else:
+# print "Can't find matching hcall_enter event. Ignoring sample"
+
+def powerpc__hcall_entry(event_name, context, cpu, sec, nsec, pid, comm,
+ callchain, opcode):
+ if (d_enter.has_key(cpu)):
+ d_enter[cpu][opcode] = nsecs(sec, nsec)
+ else:
+ d_enter[cpu] = {opcode: nsecs(sec, nsec)}
diff --git a/tools/perf/scripts/python/sched-migration.py b/tools/perf/scripts/python/sched-migration.py
new file mode 100644
index 000000000..3473e7f66
--- /dev/null
+++ b/tools/perf/scripts/python/sched-migration.py
@@ -0,0 +1,464 @@
+#!/usr/bin/python
+#
+# Cpu task migration overview toy
+#
+# Copyright (C) 2010 Frederic Weisbecker <fweisbec@gmail.com>
+#
+# perf script event handlers have been generated by perf script -g python
+#
+# This software is distributed under the terms of the GNU General
+# Public License ("GPL") version 2 as published by the Free Software
+# Foundation.
+from __future__ import print_function
+
+import os
+import sys
+
+from collections import defaultdict
+try:
+ from UserList import UserList
+except ImportError:
+ # Python 3: UserList moved to the collections package
+ from collections import UserList
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+sys.path.append('scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from SchedGui import *
+
+
+threads = { 0 : "idle"}
+
+def thread_name(pid):
+ return "%s:%d" % (threads[pid], pid)
+
+class RunqueueEventUnknown:
+ @staticmethod
+ def color():
+ return None
+
+ def __repr__(self):
+ return "unknown"
+
+class RunqueueEventSleep:
+ @staticmethod
+ def color():
+ return (0, 0, 0xff)
+
+ def __init__(self, sleeper):
+ self.sleeper = sleeper
+
+ def __repr__(self):
+ return "%s gone to sleep" % thread_name(self.sleeper)
+
+class RunqueueEventWakeup:
+ @staticmethod
+ def color():
+ return (0xff, 0xff, 0)
+
+ def __init__(self, wakee):
+ self.wakee = wakee
+
+ def __repr__(self):
+ return "%s woke up" % thread_name(self.wakee)
+
+class RunqueueEventFork:
+ @staticmethod
+ def color():
+ return (0, 0xff, 0)
+
+ def __init__(self, child):
+ self.child = child
+
+ def __repr__(self):
+ return "new forked task %s" % thread_name(self.child)
+
+class RunqueueMigrateIn:
+ @staticmethod
+ def color():
+ return (0, 0xf0, 0xff)
+
+ def __init__(self, new):
+ self.new = new
+
+ def __repr__(self):
+ return "task migrated in %s" % thread_name(self.new)
+
+class RunqueueMigrateOut:
+ @staticmethod
+ def color():
+ return (0xff, 0, 0xff)
+
+ def __init__(self, old):
+ self.old = old
+
+ def __repr__(self):
+ return "task migrated out %s" % thread_name(self.old)
+
+class RunqueueSnapshot:
+ def __init__(self, tasks = [0], event = RunqueueEventUnknown()):
+ self.tasks = tuple(tasks)
+ self.event = event
+
+ def sched_switch(self, prev, prev_state, next):
+ event = RunqueueEventUnknown()
+
+ if taskState(prev_state) == "R" and next in self.tasks \
+ and prev in self.tasks:
+ return self
+
+ if taskState(prev_state) != "R":
+ event = RunqueueEventSleep(prev)
+
+ next_tasks = list(self.tasks[:])
+ if prev in self.tasks:
+ if taskState(prev_state) != "R":
+ next_tasks.remove(prev)
+ elif taskState(prev_state) == "R":
+ next_tasks.append(prev)
+
+ if next not in next_tasks:
+ next_tasks.append(next)
+
+ return RunqueueSnapshot(next_tasks, event)
+
+ def migrate_out(self, old):
+ if old not in self.tasks:
+ return self
+ next_tasks = [task for task in self.tasks if task != old]
+
+ return RunqueueSnapshot(next_tasks, RunqueueMigrateOut(old))
+
+ def __migrate_in(self, new, event):
+ if new in self.tasks:
+ self.event = event
+ return self
+ next_tasks = self.tasks[:] + tuple([new])
+
+ return RunqueueSnapshot(next_tasks, event)
+
+ def migrate_in(self, new):
+ return self.__migrate_in(new, RunqueueMigrateIn(new))
+
+ def wake_up(self, new):
+ return self.__migrate_in(new, RunqueueEventWakeup(new))
+
+ def wake_up_new(self, new):
+ return self.__migrate_in(new, RunqueueEventFork(new))
+
+ def load(self):
+ """ Provide the number of tasks on the runqueue.
+ Don't count idle"""
+ return len(self.tasks) - 1
+
+ def __repr__(self):
+ ret = self.tasks.__repr__()
+ ret += self.origin_tostring()
+
+ return ret
+
+class TimeSlice:
+ def __init__(self, start, prev):
+ self.start = start
+ self.prev = prev
+ self.end = start
+ # cpus that triggered the event
+ self.event_cpus = []
+ if prev is not None:
+ self.total_load = prev.total_load
+ self.rqs = prev.rqs.copy()
+ else:
+ self.rqs = defaultdict(RunqueueSnapshot)
+ self.total_load = 0
+
+ def __update_total_load(self, old_rq, new_rq):
+ diff = new_rq.load() - old_rq.load()
+ self.total_load += diff
+
+ def sched_switch(self, ts_list, prev, prev_state, next, cpu):
+ old_rq = self.prev.rqs[cpu]
+ new_rq = old_rq.sched_switch(prev, prev_state, next)
+
+ if old_rq is new_rq:
+ return
+
+ self.rqs[cpu] = new_rq
+ self.__update_total_load(old_rq, new_rq)
+ ts_list.append(self)
+ self.event_cpus = [cpu]
+
+ def migrate(self, ts_list, new, old_cpu, new_cpu):
+ if old_cpu == new_cpu:
+ return
+ old_rq = self.prev.rqs[old_cpu]
+ out_rq = old_rq.migrate_out(new)
+ self.rqs[old_cpu] = out_rq
+ self.__update_total_load(old_rq, out_rq)
+
+ new_rq = self.prev.rqs[new_cpu]
+ in_rq = new_rq.migrate_in(new)
+ self.rqs[new_cpu] = in_rq
+ self.__update_total_load(new_rq, in_rq)
+
+ ts_list.append(self)
+
+ if old_rq is not out_rq:
+ self.event_cpus.append(old_cpu)
+ self.event_cpus.append(new_cpu)
+
+ def wake_up(self, ts_list, pid, cpu, fork):
+ old_rq = self.prev.rqs[cpu]
+ if fork:
+ new_rq = old_rq.wake_up_new(pid)
+ else:
+ new_rq = old_rq.wake_up(pid)
+
+ if new_rq is old_rq:
+ return
+ self.rqs[cpu] = new_rq
+ self.__update_total_load(old_rq, new_rq)
+ ts_list.append(self)
+ self.event_cpus = [cpu]
+
+ def next(self, t):
+ self.end = t
+ return TimeSlice(t, self)
+
+class TimeSliceList(UserList):
+ def __init__(self, arg = []):
+ self.data = arg
+
+ def get_time_slice(self, ts):
+ if len(self.data) == 0:
+ slice = TimeSlice(ts, TimeSlice(-1, None))
+ else:
+ slice = self.data[-1].next(ts)
+ return slice
+
+ def find_time_slice(self, ts):
+ start = 0
+ end = len(self.data)
+ found = -1
+ searching = True
+ while searching:
+ if start == end or start == end - 1:
+ searching = False
+
+ i = (end + start) / 2
+ if self.data[i].start <= ts and self.data[i].end >= ts:
+ found = i
+ end = i
+ continue
+
+ if self.data[i].end < ts:
+ start = i
+
+ elif self.data[i].start > ts:
+ end = i
+
+ return found
+
+ def set_root_win(self, win):
+ self.root_win = win
+
+ def mouse_down(self, cpu, t):
+ idx = self.find_time_slice(t)
+ if idx == -1:
+ return
+
+ ts = self[idx]
+ rq = ts.rqs[cpu]
+ raw = "CPU: %d\n" % cpu
+ raw += "Last event : %s\n" % rq.event.__repr__()
+ raw += "Timestamp : %d.%06d\n" % (ts.start / (10 ** 9), (ts.start % (10 ** 9)) / 1000)
+ raw += "Duration : %6d us\n" % ((ts.end - ts.start) / (10 ** 6))
+ raw += "Load = %d\n" % rq.load()
+ for t in rq.tasks:
+ raw += "%s \n" % thread_name(t)
+
+ self.root_win.update_summary(raw)
+
+ def update_rectangle_cpu(self, slice, cpu):
+ rq = slice.rqs[cpu]
+
+ if slice.total_load != 0:
+ load_rate = rq.load() / float(slice.total_load)
+ else:
+ load_rate = 0
+
+ red_power = int(0xff - (0xff * load_rate))
+ color = (0xff, red_power, red_power)
+
+ top_color = None
+
+ if cpu in slice.event_cpus:
+ top_color = rq.event.color()
+
+ self.root_win.paint_rectangle_zone(cpu, color, top_color, slice.start, slice.end)
+
+ def fill_zone(self, start, end):
+ i = self.find_time_slice(start)
+ if i == -1:
+ return
+
+ for i in range(i, len(self.data)):
+ timeslice = self.data[i]
+ if timeslice.start > end:
+ return
+
+ for cpu in timeslice.rqs:
+ self.update_rectangle_cpu(timeslice, cpu)
+
+ def interval(self):
+ if len(self.data) == 0:
+ return (0, 0)
+
+ return (self.data[0].start, self.data[-1].end)
+
+ def nr_rectangles(self):
+ last_ts = self.data[-1]
+ max_cpu = 0
+ for cpu in last_ts.rqs:
+ if cpu > max_cpu:
+ max_cpu = cpu
+ return max_cpu
+
+
+class SchedEventProxy:
+ def __init__(self):
+ self.current_tsk = defaultdict(lambda : -1)
+ self.timeslices = TimeSliceList()
+
+ def sched_switch(self, headers, prev_comm, prev_pid, prev_prio, prev_state,
+ next_comm, next_pid, next_prio):
+ """ Ensure the task we sched out this cpu is really the one
+ we logged. Otherwise we may have missed traces """
+
+ on_cpu_task = self.current_tsk[headers.cpu]
+
+ if on_cpu_task != -1 and on_cpu_task != prev_pid:
+ print("Sched switch event rejected ts: %s cpu: %d prev: %s(%d) next: %s(%d)" % \
+ headers.ts_format(), headers.cpu, prev_comm, prev_pid, next_comm, next_pid)
+
+ threads[prev_pid] = prev_comm
+ threads[next_pid] = next_comm
+ self.current_tsk[headers.cpu] = next_pid
+
+ ts = self.timeslices.get_time_slice(headers.ts())
+ ts.sched_switch(self.timeslices, prev_pid, prev_state, next_pid, headers.cpu)
+
+ def migrate(self, headers, pid, prio, orig_cpu, dest_cpu):
+ ts = self.timeslices.get_time_slice(headers.ts())
+ ts.migrate(self.timeslices, pid, orig_cpu, dest_cpu)
+
+ def wake_up(self, headers, comm, pid, success, target_cpu, fork):
+ if success == 0:
+ return
+ ts = self.timeslices.get_time_slice(headers.ts())
+ ts.wake_up(self.timeslices, pid, target_cpu, fork)
+
+
+def trace_begin():
+ global parser
+ parser = SchedEventProxy()
+
+def trace_end():
+ app = wx.App(False)
+ timeslices = parser.timeslices
+ frame = RootFrame(timeslices, "Migration")
+ app.MainLoop()
+
+def sched__sched_stat_runtime(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, comm, pid, runtime, vruntime):
+ pass
+
+def sched__sched_stat_iowait(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, comm, pid, delay):
+ pass
+
+def sched__sched_stat_sleep(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, comm, pid, delay):
+ pass
+
+def sched__sched_stat_wait(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, comm, pid, delay):
+ pass
+
+def sched__sched_process_fork(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, parent_comm, parent_pid, child_comm, child_pid):
+ pass
+
+def sched__sched_process_wait(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, comm, pid, prio):
+ pass
+
+def sched__sched_process_exit(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, comm, pid, prio):
+ pass
+
+def sched__sched_process_free(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, comm, pid, prio):
+ pass
+
+def sched__sched_migrate_task(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, comm, pid, prio, orig_cpu,
+ dest_cpu):
+ headers = EventHeaders(common_cpu, common_secs, common_nsecs,
+ common_pid, common_comm, common_callchain)
+ parser.migrate(headers, pid, prio, orig_cpu, dest_cpu)
+
+def sched__sched_switch(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm, common_callchain,
+ prev_comm, prev_pid, prev_prio, prev_state,
+ next_comm, next_pid, next_prio):
+
+ headers = EventHeaders(common_cpu, common_secs, common_nsecs,
+ common_pid, common_comm, common_callchain)
+ parser.sched_switch(headers, prev_comm, prev_pid, prev_prio, prev_state,
+ next_comm, next_pid, next_prio)
+
+def sched__sched_wakeup_new(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, comm, pid, prio, success,
+ target_cpu):
+ headers = EventHeaders(common_cpu, common_secs, common_nsecs,
+ common_pid, common_comm, common_callchain)
+ parser.wake_up(headers, comm, pid, success, target_cpu, 1)
+
+def sched__sched_wakeup(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, comm, pid, prio, success,
+ target_cpu):
+ headers = EventHeaders(common_cpu, common_secs, common_nsecs,
+ common_pid, common_comm, common_callchain)
+ parser.wake_up(headers, comm, pid, success, target_cpu, 0)
+
+def sched__sched_wait_task(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, comm, pid, prio):
+ pass
+
+def sched__sched_kthread_stop_ret(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, ret):
+ pass
+
+def sched__sched_kthread_stop(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, comm, pid):
+ pass
+
+def trace_unhandled(event_name, context, event_fields_dict):
+ pass
diff --git a/tools/perf/scripts/python/sctop.py b/tools/perf/scripts/python/sctop.py
new file mode 100644
index 000000000..61621b93a
--- /dev/null
+++ b/tools/perf/scripts/python/sctop.py
@@ -0,0 +1,80 @@
+# system call top
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Periodically displays system-wide system call totals, broken down by
+# syscall. If a [comm] arg is specified, only syscalls called by
+# [comm] are displayed. If an [interval] arg is specified, the display
+# will be refreshed every [interval] seconds. The default interval is
+# 3 seconds.
+
+import os, sys, thread, time
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+usage = "perf script -s sctop.py [comm] [interval]\n";
+
+for_comm = None
+default_interval = 3
+interval = default_interval
+
+if len(sys.argv) > 3:
+ sys.exit(usage)
+
+if len(sys.argv) > 2:
+ for_comm = sys.argv[1]
+ interval = int(sys.argv[2])
+elif len(sys.argv) > 1:
+ try:
+ interval = int(sys.argv[1])
+ except ValueError:
+ for_comm = sys.argv[1]
+ interval = default_interval
+
+syscalls = autodict()
+
+def trace_begin():
+ thread.start_new_thread(print_syscall_totals, (interval,))
+ pass
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, id, args):
+ if for_comm is not None:
+ if common_comm != for_comm:
+ return
+ try:
+ syscalls[id] += 1
+ except TypeError:
+ syscalls[id] = 1
+
+def syscalls__sys_enter(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ id, args):
+ raw_syscalls__sys_enter(**locals())
+
+def print_syscall_totals(interval):
+ while 1:
+ clear_term()
+ if for_comm is not None:
+ print "\nsyscall events for %s:\n\n" % (for_comm),
+ else:
+ print "\nsyscall events:\n\n",
+
+ print "%-40s %10s\n" % ("event", "count"),
+ print "%-40s %10s\n" % ("----------------------------------------", \
+ "----------"),
+
+ for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
+ reverse = True):
+ try:
+ print "%-40s %10d\n" % (syscall_name(id), val),
+ except TypeError:
+ pass
+ syscalls.clear()
+ time.sleep(interval)
diff --git a/tools/perf/scripts/python/stackcollapse.py b/tools/perf/scripts/python/stackcollapse.py
new file mode 100755
index 000000000..1697b5e18
--- /dev/null
+++ b/tools/perf/scripts/python/stackcollapse.py
@@ -0,0 +1,126 @@
+# stackcollapse.py - format perf samples with one line per distinct call stack
+# SPDX-License-Identifier: GPL-2.0
+#
+# This script's output has two space-separated fields. The first is a semicolon
+# separated stack including the program name (from the "comm" field) and the
+# function names from the call stack. The second is a count:
+#
+# swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 2
+#
+# The file is sorted according to the first field.
+#
+# Input may be created and processed using:
+#
+# perf record -a -g -F 99 sleep 60
+# perf script report stackcollapse > out.stacks-folded
+#
+# (perf script record stackcollapse works too).
+#
+# Written by Paolo Bonzini <pbonzini@redhat.com>
+# Based on Brendan Gregg's stackcollapse-perf.pl script.
+
+import os
+import sys
+from collections import defaultdict
+from optparse import OptionParser, make_option
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from EventClass import *
+
+# command line parsing
+
+option_list = [
+ # formatting options for the bottom entry of the stack
+ make_option("--include-tid", dest="include_tid",
+ action="store_true", default=False,
+ help="include thread id in stack"),
+ make_option("--include-pid", dest="include_pid",
+ action="store_true", default=False,
+ help="include process id in stack"),
+ make_option("--no-comm", dest="include_comm",
+ action="store_false", default=True,
+ help="do not separate stacks according to comm"),
+ make_option("--tidy-java", dest="tidy_java",
+ action="store_true", default=False,
+ help="beautify Java signatures"),
+ make_option("--kernel", dest="annotate_kernel",
+ action="store_true", default=False,
+ help="annotate kernel functions with _[k]")
+]
+
+parser = OptionParser(option_list=option_list)
+(opts, args) = parser.parse_args()
+
+if len(args) != 0:
+ parser.error("unexpected command line argument")
+if opts.include_tid and not opts.include_comm:
+ parser.error("requesting tid but not comm is invalid")
+if opts.include_pid and not opts.include_comm:
+ parser.error("requesting pid but not comm is invalid")
+
+# event handlers
+
+lines = defaultdict(lambda: 0)
+
+def process_event(param_dict):
+ def tidy_function_name(sym, dso):
+ if sym is None:
+ sym = '[unknown]'
+
+ sym = sym.replace(';', ':')
+ if opts.tidy_java:
+ # the original stackcollapse-perf.pl script gives the
+ # example of converting this:
+ # Lorg/mozilla/javascript/MemberBox;.<init>(Ljava/lang/reflect/Method;)V
+ # to this:
+ # org/mozilla/javascript/MemberBox:.init
+ sym = sym.replace('<', '')
+ sym = sym.replace('>', '')
+ if sym[0] == 'L' and sym.find('/'):
+ sym = sym[1:]
+ try:
+ sym = sym[:sym.index('(')]
+ except ValueError:
+ pass
+
+ if opts.annotate_kernel and dso == '[kernel.kallsyms]':
+ return sym + '_[k]'
+ else:
+ return sym
+
+ stack = list()
+ if 'callchain' in param_dict:
+ for entry in param_dict['callchain']:
+ entry.setdefault('sym', dict())
+ entry['sym'].setdefault('name', None)
+ entry.setdefault('dso', None)
+ stack.append(tidy_function_name(entry['sym']['name'],
+ entry['dso']))
+ else:
+ param_dict.setdefault('symbol', None)
+ param_dict.setdefault('dso', None)
+ stack.append(tidy_function_name(param_dict['symbol'],
+ param_dict['dso']))
+
+ if opts.include_comm:
+ comm = param_dict["comm"].replace(' ', '_')
+ sep = "-"
+ if opts.include_pid:
+ comm = comm + sep + str(param_dict['sample']['pid'])
+ sep = "/"
+ if opts.include_tid:
+ comm = comm + sep + str(param_dict['sample']['tid'])
+ stack.append(comm)
+
+ stack_string = ';'.join(reversed(stack))
+ lines[stack_string] = lines[stack_string] + 1
+
+def trace_end():
+ list = lines.keys()
+ list.sort()
+ for stack in list:
+ print "%s %d" % (stack, lines[stack])
diff --git a/tools/perf/scripts/python/stat-cpi.py b/tools/perf/scripts/python/stat-cpi.py
new file mode 100644
index 000000000..8410672ef
--- /dev/null
+++ b/tools/perf/scripts/python/stat-cpi.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+# SPDX-License-Identifier: GPL-2.0
+
+data = {}
+times = []
+threads = []
+cpus = []
+
+def get_key(time, event, cpu, thread):
+ return "%d-%s-%d-%d" % (time, event, cpu, thread)
+
+def store_key(time, cpu, thread):
+ if (time not in times):
+ times.append(time)
+
+ if (cpu not in cpus):
+ cpus.append(cpu)
+
+ if (thread not in threads):
+ threads.append(thread)
+
+def store(time, event, cpu, thread, val, ena, run):
+ #print "event %s cpu %d, thread %d, time %d, val %d, ena %d, run %d" % \
+ # (event, cpu, thread, time, val, ena, run)
+
+ store_key(time, cpu, thread)
+ key = get_key(time, event, cpu, thread)
+ data[key] = [ val, ena, run]
+
+def get(time, event, cpu, thread):
+ key = get_key(time, event, cpu, thread)
+ return data[key][0]
+
+def stat__cycles_k(cpu, thread, time, val, ena, run):
+ store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions_k(cpu, thread, time, val, ena, run):
+ store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__cycles_u(cpu, thread, time, val, ena, run):
+ store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions_u(cpu, thread, time, val, ena, run):
+ store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__cycles(cpu, thread, time, val, ena, run):
+ store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions(cpu, thread, time, val, ena, run):
+ store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__interval(time):
+ for cpu in cpus:
+ for thread in threads:
+ cyc = get(time, "cycles", cpu, thread)
+ ins = get(time, "instructions", cpu, thread)
+ cpi = 0
+
+ if ins != 0:
+ cpi = cyc/float(ins)
+
+ print "%15f: cpu %d, thread %d -> cpi %f (%d/%d)" % (time/(float(1000000000)), cpu, thread, cpi, cyc, ins)
+
+def trace_end():
+ pass
+# XXX trace_end callback could be used as an alternative place
+# to compute same values as in the script above:
+#
+# for time in times:
+# for cpu in cpus:
+# for thread in threads:
+# cyc = get(time, "cycles", cpu, thread)
+# ins = get(time, "instructions", cpu, thread)
+#
+# if ins != 0:
+# cpi = cyc/float(ins)
+#
+# print "time %.9f, cpu %d, thread %d -> cpi %f" % (time/(float(1000000000)), cpu, thread, cpi)
diff --git a/tools/perf/scripts/python/syscall-counts-by-pid.py b/tools/perf/scripts/python/syscall-counts-by-pid.py
new file mode 100644
index 000000000..daf314cc5
--- /dev/null
+++ b/tools/perf/scripts/python/syscall-counts-by-pid.py
@@ -0,0 +1,74 @@
+# system call counts, by pid
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Displays system-wide system call totals, broken down by syscall.
+# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
+
+import os, sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import syscall_name
+
+usage = "perf script -s syscall-counts-by-pid.py [comm]\n";
+
+for_comm = None
+for_pid = None
+
+if len(sys.argv) > 2:
+ sys.exit(usage)
+
+if len(sys.argv) > 1:
+ try:
+ for_pid = int(sys.argv[1])
+ except:
+ for_comm = sys.argv[1]
+
+syscalls = autodict()
+
+def trace_begin():
+ print "Press control+C to stop and show the summary"
+
+def trace_end():
+ print_syscall_totals()
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, id, args):
+
+ if (for_comm and common_comm != for_comm) or \
+ (for_pid and common_pid != for_pid ):
+ return
+ try:
+ syscalls[common_comm][common_pid][id] += 1
+ except TypeError:
+ syscalls[common_comm][common_pid][id] = 1
+
+def syscalls__sys_enter(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ id, args):
+ raw_syscalls__sys_enter(**locals())
+
+def print_syscall_totals():
+ if for_comm is not None:
+ print "\nsyscall events for %s:\n\n" % (for_comm),
+ else:
+ print "\nsyscall events by comm/pid:\n\n",
+
+ print "%-40s %10s\n" % ("comm [pid]/syscalls", "count"),
+ print "%-40s %10s\n" % ("----------------------------------------", \
+ "----------"),
+
+ comm_keys = syscalls.keys()
+ for comm in comm_keys:
+ pid_keys = syscalls[comm].keys()
+ for pid in pid_keys:
+ print "\n%s [%d]\n" % (comm, pid),
+ id_keys = syscalls[comm][pid].keys()
+ for id, val in sorted(syscalls[comm][pid].iteritems(), \
+ key = lambda(k, v): (v, k), reverse = True):
+ print " %-38s %10d\n" % (syscall_name(id), val),
diff --git a/tools/perf/scripts/python/syscall-counts.py b/tools/perf/scripts/python/syscall-counts.py
new file mode 100644
index 000000000..e66a7730a
--- /dev/null
+++ b/tools/perf/scripts/python/syscall-counts.py
@@ -0,0 +1,64 @@
+# system call counts
+# (c) 2010, Tom Zanussi <tzanussi@gmail.com>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Displays system-wide system call totals, broken down by syscall.
+# If a [comm] arg is specified, only syscalls called by [comm] are displayed.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import syscall_name
+
+usage = "perf script -s syscall-counts.py [comm]\n";
+
+for_comm = None
+
+if len(sys.argv) > 2:
+ sys.exit(usage)
+
+if len(sys.argv) > 1:
+ for_comm = sys.argv[1]
+
+syscalls = autodict()
+
+def trace_begin():
+ print "Press control+C to stop and show the summary"
+
+def trace_end():
+ print_syscall_totals()
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ common_callchain, id, args):
+ if for_comm is not None:
+ if common_comm != for_comm:
+ return
+ try:
+ syscalls[id] += 1
+ except TypeError:
+ syscalls[id] = 1
+
+def syscalls__sys_enter(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ id, args):
+ raw_syscalls__sys_enter(**locals())
+
+def print_syscall_totals():
+ if for_comm is not None:
+ print "\nsyscall events for %s:\n\n" % (for_comm),
+ else:
+ print "\nsyscall events:\n\n",
+
+ print "%-40s %10s\n" % ("event", "count"),
+ print "%-40s %10s\n" % ("----------------------------------------", \
+ "-----------"),
+
+ for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
+ reverse = True):
+ print "%-40s %10d\n" % (syscall_name(id), val),
diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore
new file mode 100644
index 000000000..8cc30e731
--- /dev/null
+++ b/tools/perf/tests/.gitignore
@@ -0,0 +1,4 @@
+llvm-src-base.c
+llvm-src-kbuild.c
+llvm-src-prologue.c
+llvm-src-relocation.c
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
new file mode 100644
index 000000000..6c108fa79
--- /dev/null
+++ b/tools/perf/tests/Build
@@ -0,0 +1,87 @@
+perf-y += builtin-test.o
+perf-y += parse-events.o
+perf-y += dso-data.o
+perf-y += attr.o
+perf-y += vmlinux-kallsyms.o
+perf-y += openat-syscall.o
+perf-y += openat-syscall-all-cpus.o
+perf-y += openat-syscall-tp-fields.o
+perf-y += mmap-basic.o
+perf-y += perf-record.o
+perf-y += evsel-roundtrip-name.o
+perf-y += evsel-tp-sched.o
+perf-y += fdarray.o
+perf-y += pmu.o
+perf-y += hists_common.o
+perf-y += hists_link.o
+perf-y += hists_filter.o
+perf-y += hists_output.o
+perf-y += hists_cumulate.o
+perf-y += python-use.o
+perf-y += bp_signal.o
+perf-y += bp_signal_overflow.o
+perf-y += bp_account.o
+perf-y += task-exit.o
+perf-y += sw-clock.o
+perf-y += mmap-thread-lookup.o
+perf-y += thread-mg-share.o
+perf-y += switch-tracking.o
+perf-y += keep-tracking.o
+perf-y += code-reading.o
+perf-y += sample-parsing.o
+perf-y += parse-no-sample-id-all.o
+perf-y += kmod-path.o
+perf-y += thread-map.o
+perf-y += llvm.o llvm-src-base.o llvm-src-kbuild.o llvm-src-prologue.o llvm-src-relocation.o
+perf-y += bpf.o
+perf-y += topology.o
+perf-y += mem.o
+perf-y += cpumap.o
+perf-y += stat.o
+perf-y += event_update.o
+perf-y += event-times.o
+perf-y += expr.o
+perf-y += backward-ring-buffer.o
+perf-y += sdt.o
+perf-y += is_printable_array.o
+perf-y += bitmap.o
+perf-y += perf-hooks.o
+perf-y += clang.o
+perf-y += unit_number__scnprintf.o
+perf-y += mem2node.o
+
+$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
+ $(call rule_mkdir)
+ $(Q)echo '#include <tests/llvm.h>' > $@
+ $(Q)echo 'const char test_llvm__bpf_base_prog[] =' >> $@
+ $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+ $(Q)echo ';' >> $@
+
+$(OUTPUT)tests/llvm-src-kbuild.c: tests/bpf-script-test-kbuild.c tests/Build
+ $(call rule_mkdir)
+ $(Q)echo '#include <tests/llvm.h>' > $@
+ $(Q)echo 'const char test_llvm__bpf_test_kbuild_prog[] =' >> $@
+ $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+ $(Q)echo ';' >> $@
+
+$(OUTPUT)tests/llvm-src-prologue.c: tests/bpf-script-test-prologue.c tests/Build
+ $(call rule_mkdir)
+ $(Q)echo '#include <tests/llvm.h>' > $@
+ $(Q)echo 'const char test_llvm__bpf_test_prologue_prog[] =' >> $@
+ $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+ $(Q)echo ';' >> $@
+
+$(OUTPUT)tests/llvm-src-relocation.c: tests/bpf-script-test-relocation.c tests/Build
+ $(call rule_mkdir)
+ $(Q)echo '#include <tests/llvm.h>' > $@
+ $(Q)echo 'const char test_llvm__bpf_test_relocation[] =' >> $@
+ $(Q)sed -e 's/"/\\"/g' -e 's/\(.*\)/"\1\\n"/g' $< >> $@
+ $(Q)echo ';' >> $@
+
+ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc))
+perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
+endif
+
+CFLAGS_attr.o += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))"
+CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))"
+CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
new file mode 100644
index 000000000..05dfe11c2
--- /dev/null
+++ b/tools/perf/tests/attr.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The struct perf_event_attr test support.
+ *
+ * This test is embedded inside into perf directly and is governed
+ * by the PERF_TEST_ATTR environment variable and hook inside
+ * sys_perf_event_open function.
+ *
+ * The general idea is to store 'struct perf_event_attr' details for
+ * each event created within single perf command. Each event details
+ * are stored into separate text file. Once perf command is finished
+ * these files can be checked for values we expect for command.
+ *
+ * Besides 'struct perf_event_attr' values we also store 'fd' and
+ * 'group_fd' values to allow checking for groups created.
+ *
+ * This all is triggered by setting PERF_TEST_ATTR environment variable.
+ * It must contain name of existing directory with access and write
+ * permissions. All the event text files are stored there.
+ */
+
+#include <debug.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "../perf.h"
+#include <subcmd/exec-cmd.h>
+#include "tests.h"
+
+#define ENV "PERF_TEST_ATTR"
+
+static char *dir;
+static bool ready;
+
+void test_attr__init(void)
+{
+ dir = getenv(ENV);
+ test_attr__enabled = (dir != NULL);
+}
+
+#define BUFSIZE 1024
+
+#define __WRITE_ASS(str, fmt, data) \
+do { \
+ char buf[BUFSIZE]; \
+ size_t size; \
+ \
+ size = snprintf(buf, BUFSIZE, #str "=%"fmt "\n", data); \
+ if (1 != fwrite(buf, size, 1, file)) { \
+ perror("test attr - failed to write event file"); \
+ fclose(file); \
+ return -1; \
+ } \
+ \
+} while (0)
+
+#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field)
+
+static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu,
+ int fd, int group_fd, unsigned long flags)
+{
+ FILE *file;
+ char path[PATH_MAX];
+
+ if (!ready)
+ return 0;
+
+ snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir,
+ attr->type, attr->config, fd);
+
+ file = fopen(path, "w+");
+ if (!file) {
+ perror("test attr - failed to open event file");
+ return -1;
+ }
+
+ if (fprintf(file, "[event-%d-%llu-%d]\n",
+ attr->type, attr->config, fd) < 0) {
+ perror("test attr - failed to write event file");
+ fclose(file);
+ return -1;
+ }
+
+ /* syscall arguments */
+ __WRITE_ASS(fd, "d", fd);
+ __WRITE_ASS(group_fd, "d", group_fd);
+ __WRITE_ASS(cpu, "d", cpu);
+ __WRITE_ASS(pid, "d", pid);
+ __WRITE_ASS(flags, "lu", flags);
+
+ /* struct perf_event_attr */
+ WRITE_ASS(type, PRIu32);
+ WRITE_ASS(size, PRIu32);
+ WRITE_ASS(config, "llu");
+ WRITE_ASS(sample_period, "llu");
+ WRITE_ASS(sample_type, "llu");
+ WRITE_ASS(read_format, "llu");
+ WRITE_ASS(disabled, "d");
+ WRITE_ASS(inherit, "d");
+ WRITE_ASS(pinned, "d");
+ WRITE_ASS(exclusive, "d");
+ WRITE_ASS(exclude_user, "d");
+ WRITE_ASS(exclude_kernel, "d");
+ WRITE_ASS(exclude_hv, "d");
+ WRITE_ASS(exclude_idle, "d");
+ WRITE_ASS(mmap, "d");
+ WRITE_ASS(comm, "d");
+ WRITE_ASS(freq, "d");
+ WRITE_ASS(inherit_stat, "d");
+ WRITE_ASS(enable_on_exec, "d");
+ WRITE_ASS(task, "d");
+ WRITE_ASS(watermark, "d");
+ WRITE_ASS(precise_ip, "d");
+ WRITE_ASS(mmap_data, "d");
+ WRITE_ASS(sample_id_all, "d");
+ WRITE_ASS(exclude_host, "d");
+ WRITE_ASS(exclude_guest, "d");
+ WRITE_ASS(exclude_callchain_kernel, "d");
+ WRITE_ASS(exclude_callchain_user, "d");
+ WRITE_ASS(mmap2, "d");
+ WRITE_ASS(comm_exec, "d");
+ WRITE_ASS(context_switch, "d");
+ WRITE_ASS(write_backward, "d");
+ WRITE_ASS(namespaces, "d");
+ WRITE_ASS(use_clockid, "d");
+ WRITE_ASS(wakeup_events, PRIu32);
+ WRITE_ASS(bp_type, PRIu32);
+ WRITE_ASS(config1, "llu");
+ WRITE_ASS(config2, "llu");
+ WRITE_ASS(branch_sample_type, "llu");
+ WRITE_ASS(sample_regs_user, "llu");
+ WRITE_ASS(sample_stack_user, PRIu32);
+
+ fclose(file);
+ return 0;
+}
+
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+ int fd, int group_fd, unsigned long flags)
+{
+ int errno_saved = errno;
+
+ if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) {
+ pr_err("test attr FAILED");
+ exit(128);
+ }
+
+ errno = errno_saved;
+}
+
+void test_attr__ready(void)
+{
+ if (unlikely(test_attr__enabled) && !ready)
+ ready = true;
+}
+
+static int run_dir(const char *d, const char *perf)
+{
+ char v[] = "-vvvvv";
+ int vcnt = min(verbose, (int) sizeof(v) - 1);
+ char cmd[3*PATH_MAX];
+
+ if (verbose > 0)
+ vcnt++;
+
+ scnprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s",
+ d, d, perf, vcnt, v);
+
+ return system(cmd) ? TEST_FAIL : TEST_OK;
+}
+
+int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct stat st;
+ char path_perf[PATH_MAX];
+ char path_dir[PATH_MAX];
+
+ /* First try developement tree tests. */
+ if (!lstat("./tests", &st))
+ return run_dir("./tests", "./perf");
+
+ /* Then installed path. */
+ snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path());
+ snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR);
+
+ if (!lstat(path_dir, &st) &&
+ !lstat(path_perf, &st))
+ return run_dir(path_dir, path_perf);
+
+ return TEST_SKIP;
+}
diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
new file mode 100644
index 000000000..44090a9a1
--- /dev/null
+++ b/tools/perf/tests/attr.py
@@ -0,0 +1,392 @@
+#! /usr/bin/python
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import sys
+import glob
+import optparse
+import tempfile
+import logging
+import shutil
+import ConfigParser
+
+def data_equal(a, b):
+ # Allow multiple values in assignment separated by '|'
+ a_list = a.split('|')
+ b_list = b.split('|')
+
+ for a_item in a_list:
+ for b_item in b_list:
+ if (a_item == b_item):
+ return True
+ elif (a_item == '*') or (b_item == '*'):
+ return True
+
+ return False
+
+class Fail(Exception):
+ def __init__(self, test, msg):
+ self.msg = msg
+ self.test = test
+ def getMsg(self):
+ return '\'%s\' - %s' % (self.test.path, self.msg)
+
+class Notest(Exception):
+ def __init__(self, test, arch):
+ self.arch = arch
+ self.test = test
+ def getMsg(self):
+ return '[%s] \'%s\'' % (self.arch, self.test.path)
+
+class Unsup(Exception):
+ def __init__(self, test):
+ self.test = test
+ def getMsg(self):
+ return '\'%s\'' % self.test.path
+
+class Event(dict):
+ terms = [
+ 'cpu',
+ 'flags',
+ 'type',
+ 'size',
+ 'config',
+ 'sample_period',
+ 'sample_type',
+ 'read_format',
+ 'disabled',
+ 'inherit',
+ 'pinned',
+ 'exclusive',
+ 'exclude_user',
+ 'exclude_kernel',
+ 'exclude_hv',
+ 'exclude_idle',
+ 'mmap',
+ 'comm',
+ 'freq',
+ 'inherit_stat',
+ 'enable_on_exec',
+ 'task',
+ 'watermark',
+ 'precise_ip',
+ 'mmap_data',
+ 'sample_id_all',
+ 'exclude_host',
+ 'exclude_guest',
+ 'exclude_callchain_kernel',
+ 'exclude_callchain_user',
+ 'wakeup_events',
+ 'bp_type',
+ 'config1',
+ 'config2',
+ 'branch_sample_type',
+ 'sample_regs_user',
+ 'sample_stack_user',
+ ]
+
+ def add(self, data):
+ for key, val in data:
+ log.debug(" %s = %s" % (key, val))
+ self[key] = val
+
+ def __init__(self, name, data, base):
+ log.debug(" Event %s" % name);
+ self.name = name;
+ self.group = ''
+ self.add(base)
+ self.add(data)
+
+ def equal(self, other):
+ for t in Event.terms:
+ log.debug(" [%s] %s %s" % (t, self[t], other[t]));
+ if not self.has_key(t) or not other.has_key(t):
+ return False
+ if not data_equal(self[t], other[t]):
+ return False
+ return True
+
+ def optional(self):
+ if self.has_key('optional') and self['optional'] == '1':
+ return True
+ return False
+
+ def diff(self, other):
+ for t in Event.terms:
+ if not self.has_key(t) or not other.has_key(t):
+ continue
+ if not data_equal(self[t], other[t]):
+ log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
+
+# Test file description needs to have following sections:
+# [config]
+# - just single instance in file
+# - needs to specify:
+# 'command' - perf command name
+# 'args' - special command arguments
+# 'ret' - expected command return value (0 by default)
+# 'arch' - architecture specific test (optional)
+# comma separated list, ! at the beginning
+# negates it.
+#
+# [eventX:base]
+# - one or multiple instances in file
+# - expected values assignments
+class Test(object):
+ def __init__(self, path, options):
+ parser = ConfigParser.SafeConfigParser()
+ parser.read(path)
+
+ log.warning("running '%s'" % path)
+
+ self.path = path
+ self.test_dir = options.test_dir
+ self.perf = options.perf
+ self.command = parser.get('config', 'command')
+ self.args = parser.get('config', 'args')
+
+ try:
+ self.ret = parser.get('config', 'ret')
+ except:
+ self.ret = 0
+
+ try:
+ self.arch = parser.get('config', 'arch')
+ log.warning("test limitation '%s'" % self.arch)
+ except:
+ self.arch = ''
+
+ self.expect = {}
+ self.result = {}
+ log.debug(" loading expected events");
+ self.load_events(path, self.expect)
+
+ def is_event(self, name):
+ if name.find("event") == -1:
+ return False
+ else:
+ return True
+
+ def skip_test(self, myarch):
+ # If architecture not set always run test
+ if self.arch == '':
+ # log.warning("test for arch %s is ok" % myarch)
+ return False
+
+ # Allow multiple values in assignment separated by ','
+ arch_list = self.arch.split(',')
+
+ # Handle negated list such as !s390x,ppc
+ if arch_list[0][0] == '!':
+ arch_list[0] = arch_list[0][1:]
+ log.warning("excluded architecture list %s" % arch_list)
+ for arch_item in arch_list:
+ # log.warning("test for %s arch is %s" % (arch_item, myarch))
+ if arch_item == myarch:
+ return True
+ return False
+
+ for arch_item in arch_list:
+ # log.warning("test for architecture '%s' current '%s'" % (arch_item, myarch))
+ if arch_item == myarch:
+ return False
+ return True
+
+ def load_events(self, path, events):
+ parser_event = ConfigParser.SafeConfigParser()
+ parser_event.read(path)
+
+ # The event record section header contains 'event' word,
+ # optionaly followed by ':' allowing to load 'parent
+ # event' first as a base
+ for section in filter(self.is_event, parser_event.sections()):
+
+ parser_items = parser_event.items(section);
+ base_items = {}
+
+ # Read parent event if there's any
+ if (':' in section):
+ base = section[section.index(':') + 1:]
+ parser_base = ConfigParser.SafeConfigParser()
+ parser_base.read(self.test_dir + '/' + base)
+ base_items = parser_base.items('event')
+
+ e = Event(section, parser_items, base_items)
+ events[section] = e
+
+ def run_cmd(self, tempdir):
+ junk1, junk2, junk3, junk4, myarch = (os.uname())
+
+ if self.skip_test(myarch):
+ raise Notest(self, myarch)
+
+ cmd = "PERF_TEST_ATTR=%s %s %s -o %s/perf.data %s" % (tempdir,
+ self.perf, self.command, tempdir, self.args)
+ ret = os.WEXITSTATUS(os.system(cmd))
+
+ log.info(" '%s' ret '%s', expected '%s'" % (cmd, str(ret), str(self.ret)))
+
+ if not data_equal(str(ret), str(self.ret)):
+ raise Unsup(self)
+
+ def compare(self, expect, result):
+ match = {}
+
+ log.debug(" compare");
+
+ # For each expected event find all matching
+ # events in result. Fail if there's not any.
+ for exp_name, exp_event in expect.items():
+ exp_list = []
+ res_event = {}
+ log.debug(" matching [%s]" % exp_name)
+ for res_name, res_event in result.items():
+ log.debug(" to [%s]" % res_name)
+ if (exp_event.equal(res_event)):
+ exp_list.append(res_name)
+ log.debug(" ->OK")
+ else:
+ log.debug(" ->FAIL");
+
+ log.debug(" match: [%s] matches %s" % (exp_name, str(exp_list)))
+
+ # we did not any matching event - fail
+ if not exp_list:
+ if exp_event.optional():
+ log.debug(" %s does not match, but is optional" % exp_name)
+ else:
+ if not res_event:
+ log.debug(" res_event is empty");
+ else:
+ exp_event.diff(res_event)
+ raise Fail(self, 'match failure');
+
+ match[exp_name] = exp_list
+
+ # For each defined group in the expected events
+ # check we match the same group in the result.
+ for exp_name, exp_event in expect.items():
+ group = exp_event.group
+
+ if (group == ''):
+ continue
+
+ for res_name in match[exp_name]:
+ res_group = result[res_name].group
+ if res_group not in match[group]:
+ raise Fail(self, 'group failure')
+
+ log.debug(" group: [%s] matches group leader %s" %
+ (exp_name, str(match[group])))
+
+ log.debug(" matched")
+
+ def resolve_groups(self, events):
+ for name, event in events.items():
+ group_fd = event['group_fd'];
+ if group_fd == '-1':
+ continue;
+
+ for iname, ievent in events.items():
+ if (ievent['fd'] == group_fd):
+ event.group = iname
+ log.debug('[%s] has group leader [%s]' % (name, iname))
+ break;
+
+ def run(self):
+ tempdir = tempfile.mkdtemp();
+
+ try:
+ # run the test script
+ self.run_cmd(tempdir);
+
+ # load events expectation for the test
+ log.debug(" loading result events");
+ for f in glob.glob(tempdir + '/event*'):
+ self.load_events(f, self.result);
+
+ # resolve group_fd to event names
+ self.resolve_groups(self.expect);
+ self.resolve_groups(self.result);
+
+ # do the expectation - results matching - both ways
+ self.compare(self.expect, self.result)
+ self.compare(self.result, self.expect)
+
+ finally:
+ # cleanup
+ shutil.rmtree(tempdir)
+
+
+def run_tests(options):
+ for f in glob.glob(options.test_dir + '/' + options.test):
+ try:
+ Test(f, options).run()
+ except Unsup, obj:
+ log.warning("unsupp %s" % obj.getMsg())
+ except Notest, obj:
+ log.warning("skipped %s" % obj.getMsg())
+
+def setup_log(verbose):
+ global log
+ level = logging.CRITICAL
+
+ if verbose == 1:
+ level = logging.WARNING
+ if verbose == 2:
+ level = logging.INFO
+ if verbose >= 3:
+ level = logging.DEBUG
+
+ log = logging.getLogger('test')
+ log.setLevel(level)
+ ch = logging.StreamHandler()
+ ch.setLevel(level)
+ formatter = logging.Formatter('%(message)s')
+ ch.setFormatter(formatter)
+ log.addHandler(ch)
+
+USAGE = '''%s [OPTIONS]
+ -d dir # tests dir
+ -p path # perf binary
+ -t test # single test
+ -v # verbose level
+''' % sys.argv[0]
+
+def main():
+ parser = optparse.OptionParser(usage=USAGE)
+
+ parser.add_option("-t", "--test",
+ action="store", type="string", dest="test")
+ parser.add_option("-d", "--test-dir",
+ action="store", type="string", dest="test_dir")
+ parser.add_option("-p", "--perf",
+ action="store", type="string", dest="perf")
+ parser.add_option("-v", "--verbose",
+ action="count", dest="verbose")
+
+ options, args = parser.parse_args()
+ if args:
+ parser.error('FAILED wrong arguments %s' % ' '.join(args))
+ return -1
+
+ setup_log(options.verbose)
+
+ if not options.test_dir:
+ print 'FAILED no -d option specified'
+ sys.exit(-1)
+
+ if not options.test:
+ options.test = 'test*'
+
+ try:
+ run_tests(options)
+
+ except Fail, obj:
+ print "FAILED %s" % obj.getMsg();
+ sys.exit(-1)
+
+ sys.exit(0)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README
new file mode 100644
index 000000000..430024f61
--- /dev/null
+++ b/tools/perf/tests/attr/README
@@ -0,0 +1,64 @@
+The struct perf_event_attr test (attr tests) support
+====================================================
+This testing support is embedded into perf directly and is governed
+by the PERF_TEST_ATTR environment variable and hook inside the
+sys_perf_event_open function.
+
+The general idea is to store 'struct perf_event_attr' details for
+each event created within single perf command. Each event details
+are stored into separate text file. Once perf command is finished
+these files are checked for values we expect for command.
+
+The attr tests consist of following parts:
+
+tests/attr.c
+------------
+This is the sys_perf_event_open hook implementation. The hook
+is triggered when the PERF_TEST_ATTR environment variable is
+defined. It must contain name of existing directory with access
+and write permissions.
+
+For each sys_perf_event_open call event details are stored in
+separate file. Besides 'struct perf_event_attr' values we also
+store 'fd' and 'group_fd' values to allow checking for groups.
+
+tests/attr.py
+-------------
+This is the python script that does all the hard work. It reads
+the test definition, executes it and checks results.
+
+tests/attr/
+-----------
+Directory containing all attr test definitions.
+Following tests are defined (with perf commands):
+
+ perf record kill (test-record-basic)
+ perf record -b kill (test-record-branch-any)
+ perf record -j any kill (test-record-branch-filter-any)
+ perf record -j any_call kill (test-record-branch-filter-any_call)
+ perf record -j any_ret kill (test-record-branch-filter-any_ret)
+ perf record -j hv kill (test-record-branch-filter-hv)
+ perf record -j ind_call kill (test-record-branch-filter-ind_call)
+ perf record -j k kill (test-record-branch-filter-k)
+ perf record -j u kill (test-record-branch-filter-u)
+ perf record -c 123 kill (test-record-count)
+ perf record -d kill (test-record-data)
+ perf record -F 100 kill (test-record-freq)
+ perf record -g kill (test-record-graph-default)
+ perf record --call-graph dwarf kill (test-record-graph-dwarf)
+ perf record --call-graph fp kill (test-record-graph-fp)
+ perf record --group -e cycles,instructions kill (test-record-group)
+ perf record -e '{cycles,instructions}' kill (test-record-group1)
+ perf record -D kill (test-record-no-delay)
+ perf record -i kill (test-record-no-inherit)
+ perf record -n kill (test-record-no-samples)
+ perf record -c 100 -P kill (test-record-period)
+ perf record -R kill (test-record-raw)
+ perf stat -e cycles kill (test-stat-basic)
+ perf stat kill (test-stat-default)
+ perf stat -d kill (test-stat-detailed-1)
+ perf stat -dd kill (test-stat-detailed-2)
+ perf stat -ddd kill (test-stat-detailed-3)
+ perf stat --group -e cycles,instructions kill (test-stat-group)
+ perf stat -e '{cycles,instructions}' kill (test-stat-group1)
+ perf stat -i -e cycles kill (test-stat-no-inherit)
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
new file mode 100644
index 000000000..efd0157b9
--- /dev/null
+++ b/tools/perf/tests/attr/base-record
@@ -0,0 +1,41 @@
+[event]
+fd=1
+group_fd=-1
+# 0 or PERF_FLAG_FD_CLOEXEC flag
+flags=0|8
+cpu=*
+type=0|1
+size=112
+config=0
+sample_period=*
+sample_type=263
+read_format=0|4
+disabled=1
+inherit=1
+pinned=0
+exclusive=0
+exclude_user=0
+exclude_kernel=0|1
+exclude_hv=0
+exclude_idle=0
+mmap=1
+comm=1
+freq=1
+inherit_stat=0
+enable_on_exec=1
+task=1
+watermark=0
+precise_ip=0|1|2|3
+mmap_data=0
+sample_id_all=1
+exclude_host=0|1
+exclude_guest=0|1
+exclude_callchain_kernel=0
+exclude_callchain_user=0
+wakeup_events=0
+bp_type=0
+config1=0
+config2=0
+branch_sample_type=0
+sample_regs_user=0
+sample_stack_user=0
diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat
new file mode 100644
index 000000000..4d0c2e42b
--- /dev/null
+++ b/tools/perf/tests/attr/base-stat
@@ -0,0 +1,41 @@
+[event]
+fd=1
+group_fd=-1
+# 0 or PERF_FLAG_FD_CLOEXEC flag
+flags=0|8
+cpu=*
+type=0
+size=112
+config=0
+sample_period=0
+sample_type=65536
+read_format=3
+disabled=1
+inherit=1
+pinned=0
+exclusive=0
+exclude_user=0
+exclude_kernel=0|1
+exclude_hv=0
+exclude_idle=0
+mmap=0
+comm=0
+freq=0
+inherit_stat=0
+enable_on_exec=1
+task=0
+watermark=0
+precise_ip=0
+mmap_data=0
+sample_id_all=0
+exclude_host=0|1
+exclude_guest=0|1
+exclude_callchain_kernel=0
+exclude_callchain_user=0
+wakeup_events=0
+bp_type=0
+config1=0
+config2=0
+branch_sample_type=0
+sample_regs_user=0
+sample_stack_user=0
diff --git a/tools/perf/tests/attr/test-record-C0 b/tools/perf/tests/attr/test-record-C0
new file mode 100644
index 000000000..cb0a3138f
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-C0
@@ -0,0 +1,14 @@
+[config]
+command = record
+args = -C 0 kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+cpu=0
+
+# no enable on exec for CPU attached
+enable_on_exec=0
+
+# PERF_SAMPLE_IP | PERF_SAMPLE_TID PERF_SAMPLE_TIME | # PERF_SAMPLE_PERIOD
+# + PERF_SAMPLE_CPU added by -C 0
+sample_type=391
diff --git a/tools/perf/tests/attr/test-record-basic b/tools/perf/tests/attr/test-record-basic
new file mode 100644
index 000000000..85a23cf35
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-basic
@@ -0,0 +1,6 @@
+[config]
+command = record
+args = kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
diff --git a/tools/perf/tests/attr/test-record-branch-any b/tools/perf/tests/attr/test-record-branch-any
new file mode 100644
index 000000000..81f839e2f
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-any
@@ -0,0 +1,8 @@
+[config]
+command = record
+args = -b kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=8
diff --git a/tools/perf/tests/attr/test-record-branch-filter-any b/tools/perf/tests/attr/test-record-branch-filter-any
new file mode 100644
index 000000000..357421f4d
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-any
@@ -0,0 +1,8 @@
+[config]
+command = record
+args = -j any kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=8
diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_call b/tools/perf/tests/attr/test-record-branch-filter-any_call
new file mode 100644
index 000000000..dbc55f2ab
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-any_call
@@ -0,0 +1,8 @@
+[config]
+command = record
+args = -j any_call kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=16
diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_ret b/tools/perf/tests/attr/test-record-branch-filter-any_ret
new file mode 100644
index 000000000..a0824ff8e
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-any_ret
@@ -0,0 +1,8 @@
+[config]
+command = record
+args = -j any_ret kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=32
diff --git a/tools/perf/tests/attr/test-record-branch-filter-hv b/tools/perf/tests/attr/test-record-branch-filter-hv
new file mode 100644
index 000000000..f34d6f120
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-hv
@@ -0,0 +1,8 @@
+[config]
+command = record
+args = -j hv kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=8
diff --git a/tools/perf/tests/attr/test-record-branch-filter-ind_call b/tools/perf/tests/attr/test-record-branch-filter-ind_call
new file mode 100644
index 000000000..b86a35232
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-ind_call
@@ -0,0 +1,8 @@
+[config]
+command = record
+args = -j ind_call kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=64
diff --git a/tools/perf/tests/attr/test-record-branch-filter-k b/tools/perf/tests/attr/test-record-branch-filter-k
new file mode 100644
index 000000000..d3fbc5e18
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-k
@@ -0,0 +1,8 @@
+[config]
+command = record
+args = -j k kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=8
diff --git a/tools/perf/tests/attr/test-record-branch-filter-u b/tools/perf/tests/attr/test-record-branch-filter-u
new file mode 100644
index 000000000..a318f0dda
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-branch-filter-u
@@ -0,0 +1,8 @@
+[config]
+command = record
+args = -j u kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=2311
+branch_sample_type=8
diff --git a/tools/perf/tests/attr/test-record-count b/tools/perf/tests/attr/test-record-count
new file mode 100644
index 000000000..34f6cc577
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-count
@@ -0,0 +1,9 @@
+[config]
+command = record
+args = -c 123 kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_period=123
+sample_type=7
+freq=0
diff --git a/tools/perf/tests/attr/test-record-data b/tools/perf/tests/attr/test-record-data
new file mode 100644
index 000000000..a9cf2233b
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-data
@@ -0,0 +1,10 @@
+[config]
+command = record
+args = -d kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+# sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME |
+# PERF_SAMPLE_ADDR | PERF_SAMPLE_PERIOD | PERF_SAMPLE_DATA_SRC
+sample_type=33039
+mmap_data=1
diff --git a/tools/perf/tests/attr/test-record-freq b/tools/perf/tests/attr/test-record-freq
new file mode 100644
index 000000000..bf4cb459f
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-freq
@@ -0,0 +1,7 @@
+[config]
+command = record
+args = -F 100 kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_period=100
diff --git a/tools/perf/tests/attr/test-record-graph-default b/tools/perf/tests/attr/test-record-graph-default
new file mode 100644
index 000000000..0b216e697
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-graph-default
@@ -0,0 +1,7 @@
+[config]
+command = record
+args = -g kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=295
diff --git a/tools/perf/tests/attr/test-record-graph-dwarf b/tools/perf/tests/attr/test-record-graph-dwarf
new file mode 100644
index 000000000..da2fa73bd
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-graph-dwarf
@@ -0,0 +1,12 @@
+[config]
+command = record
+args = --call-graph dwarf -- kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=45359
+exclude_callchain_user=1
+sample_stack_user=8192
+# TODO different for each arch, no support for that now
+sample_regs_user=*
+mmap_data=1
diff --git a/tools/perf/tests/attr/test-record-graph-fp b/tools/perf/tests/attr/test-record-graph-fp
new file mode 100644
index 000000000..625d190bb
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-graph-fp
@@ -0,0 +1,7 @@
+[config]
+command = record
+args = --call-graph fp kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=295
diff --git a/tools/perf/tests/attr/test-record-group b/tools/perf/tests/attr/test-record-group
new file mode 100644
index 000000000..618ba1c17
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-group
@@ -0,0 +1,22 @@
+[config]
+command = record
+args = --group -e cycles,instructions kill >/dev/null 2>&1
+ret = 1
+
+[event-1:base-record]
+fd=1
+group_fd=-1
+sample_type=327
+read_format=4
+
+[event-2:base-record]
+fd=2
+group_fd=1
+config=1
+sample_type=327
+read_format=4
+mmap=0
+comm=0
+task=0
+enable_on_exec=0
+disabled=0
diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling
new file mode 100644
index 000000000..f0729c454
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-group-sampling
@@ -0,0 +1,39 @@
+[config]
+command = record
+args = -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1
+ret = 1
+
+[event-1:base-record]
+fd=1
+group_fd=-1
+sample_type=343
+read_format=12
+inherit=0
+
+[event-2:base-record]
+fd=2
+group_fd=1
+
+# cache-misses
+type=0
+config=3
+
+# default | PERF_SAMPLE_READ
+sample_type=343
+
+# PERF_FORMAT_ID | PERF_FORMAT_GROUP
+read_format=12
+task=0
+mmap=0
+comm=0
+enable_on_exec=0
+disabled=0
+
+# inherit is disabled for group sampling
+inherit=0
+
+# sampling disabled
+sample_freq=0
+sample_period=0
+freq=0
+write_backward=0
diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/attr/test-record-group1
new file mode 100644
index 000000000..48e8bd12f
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-group1
@@ -0,0 +1,23 @@
+[config]
+command = record
+args = -e '{cycles,instructions}' kill >/dev/null 2>&1
+ret = 1
+
+[event-1:base-record]
+fd=1
+group_fd=-1
+sample_type=327
+read_format=4
+
+[event-2:base-record]
+fd=2
+group_fd=1
+type=0
+config=1
+sample_type=327
+read_format=4
+mmap=0
+comm=0
+task=0
+enable_on_exec=0
+disabled=0
diff --git a/tools/perf/tests/attr/test-record-no-buffering b/tools/perf/tests/attr/test-record-no-buffering
new file mode 100644
index 000000000..aa3956d8f
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-no-buffering
@@ -0,0 +1,9 @@
+[config]
+command = record
+args = --no-buffering kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=263
+watermark=0
+wakeup_events=1
diff --git a/tools/perf/tests/attr/test-record-no-inherit b/tools/perf/tests/attr/test-record-no-inherit
new file mode 100644
index 000000000..560943dec
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-no-inherit
@@ -0,0 +1,8 @@
+[config]
+command = record
+args = -i kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=263
+inherit=0
diff --git a/tools/perf/tests/attr/test-record-no-samples b/tools/perf/tests/attr/test-record-no-samples
new file mode 100644
index 000000000..8eb73ab63
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-no-samples
@@ -0,0 +1,7 @@
+[config]
+command = record
+args = -n kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_period=0
diff --git a/tools/perf/tests/attr/test-record-period b/tools/perf/tests/attr/test-record-period
new file mode 100644
index 000000000..69bc748f0
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-period
@@ -0,0 +1,8 @@
+[config]
+command = record
+args = -c 100 -P kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_period=100
+freq=0
diff --git a/tools/perf/tests/attr/test-record-raw b/tools/perf/tests/attr/test-record-raw
new file mode 100644
index 000000000..a188a614a
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-raw
@@ -0,0 +1,7 @@
+[config]
+command = record
+args = -R kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_type=1415
diff --git a/tools/perf/tests/attr/test-stat-C0 b/tools/perf/tests/attr/test-stat-C0
new file mode 100644
index 000000000..a2c76d10b
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-C0
@@ -0,0 +1,10 @@
+[config]
+command = stat
+args = -e cycles -C 0 kill >/dev/null 2>&1
+ret = 1
+
+[event:base-stat]
+# events are disabled by default when attached to cpu
+disabled=1
+enable_on_exec=0
+optional=1
diff --git a/tools/perf/tests/attr/test-stat-basic b/tools/perf/tests/attr/test-stat-basic
new file mode 100644
index 000000000..69867d049
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-basic
@@ -0,0 +1,7 @@
+[config]
+command = stat
+args = -e cycles kill >/dev/null 2>&1
+ret = 1
+
+[event:base-stat]
+optional=1
diff --git a/tools/perf/tests/attr/test-stat-default b/tools/perf/tests/attr/test-stat-default
new file mode 100644
index 000000000..d9e99b3f7
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-default
@@ -0,0 +1,70 @@
+[config]
+command = stat
+args = kill >/dev/null 2>&1
+ret = 1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_TASK_CLOCK
+[event1:base-stat]
+fd=1
+type=1
+config=1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CONTEXT_SWITCHES
+[event2:base-stat]
+fd=2
+type=1
+config=3
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CPU_MIGRATIONS
+[event3:base-stat]
+fd=3
+type=1
+config=4
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_PAGE_FAULTS
+[event4:base-stat]
+fd=4
+type=1
+config=2
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_CPU_CYCLES
+[event5:base-stat]
+fd=5
+type=0
+config=0
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+[event6:base-stat]
+fd=6
+type=0
+config=7
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+[event7:base-stat]
+fd=7
+type=0
+config=8
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS
+[event8:base-stat]
+fd=8
+type=0
+config=1
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
+[event9:base-stat]
+fd=9
+type=0
+config=4
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
+[event10:base-stat]
+fd=10
+type=0
+config=5
+optional=1
diff --git a/tools/perf/tests/attr/test-stat-detailed-1 b/tools/perf/tests/attr/test-stat-detailed-1
new file mode 100644
index 000000000..8b04a055d
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-detailed-1
@@ -0,0 +1,111 @@
+[config]
+command = stat
+args = -d kill >/dev/null 2>&1
+ret = 1
+
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_TASK_CLOCK
+[event1:base-stat]
+fd=1
+type=1
+config=1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CONTEXT_SWITCHES
+[event2:base-stat]
+fd=2
+type=1
+config=3
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CPU_MIGRATIONS
+[event3:base-stat]
+fd=3
+type=1
+config=4
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_PAGE_FAULTS
+[event4:base-stat]
+fd=4
+type=1
+config=2
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_CPU_CYCLES
+[event5:base-stat]
+fd=5
+type=0
+config=0
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+[event6:base-stat]
+fd=6
+type=0
+config=7
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+[event7:base-stat]
+fd=7
+type=0
+config=8
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS
+[event8:base-stat]
+fd=8
+type=0
+config=1
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
+[event9:base-stat]
+fd=9
+type=0
+config=4
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
+[event10:base-stat]
+fd=10
+type=0
+config=5
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+# PERF_COUNT_HW_CACHE_L1D << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
+[event11:base-stat]
+fd=11
+type=3
+config=0
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+# PERF_COUNT_HW_CACHE_L1D << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
+[event12:base-stat]
+fd=12
+type=3
+config=65536
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+# PERF_COUNT_HW_CACHE_LL << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
+[event13:base-stat]
+fd=13
+type=3
+config=2
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_LL << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
+[event14:base-stat]
+fd=14
+type=3
+config=65538
+optional=1
diff --git a/tools/perf/tests/attr/test-stat-detailed-2 b/tools/perf/tests/attr/test-stat-detailed-2
new file mode 100644
index 000000000..4fca9f1bf
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-detailed-2
@@ -0,0 +1,171 @@
+[config]
+command = stat
+args = -dd kill >/dev/null 2>&1
+ret = 1
+
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_TASK_CLOCK
+[event1:base-stat]
+fd=1
+type=1
+config=1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CONTEXT_SWITCHES
+[event2:base-stat]
+fd=2
+type=1
+config=3
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CPU_MIGRATIONS
+[event3:base-stat]
+fd=3
+type=1
+config=4
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_PAGE_FAULTS
+[event4:base-stat]
+fd=4
+type=1
+config=2
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_CPU_CYCLES
+[event5:base-stat]
+fd=5
+type=0
+config=0
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+[event6:base-stat]
+fd=6
+type=0
+config=7
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+[event7:base-stat]
+fd=7
+type=0
+config=8
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS
+[event8:base-stat]
+fd=8
+type=0
+config=1
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
+[event9:base-stat]
+fd=9
+type=0
+config=4
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
+[event10:base-stat]
+fd=10
+type=0
+config=5
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+# PERF_COUNT_HW_CACHE_L1D << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
+[event11:base-stat]
+fd=11
+type=3
+config=0
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+# PERF_COUNT_HW_CACHE_L1D << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
+[event12:base-stat]
+fd=12
+type=3
+config=65536
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+# PERF_COUNT_HW_CACHE_LL << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
+[event13:base-stat]
+fd=13
+type=3
+config=2
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_LL << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
+[event14:base-stat]
+fd=14
+type=3
+config=65538
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_L1I << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
+[event15:base-stat]
+fd=15
+type=3
+config=1
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_L1I << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
+[event16:base-stat]
+fd=16
+type=3
+config=65537
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_DTLB << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
+[event17:base-stat]
+fd=17
+type=3
+config=3
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_DTLB << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
+[event18:base-stat]
+fd=18
+type=3
+config=65539
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_ITLB << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
+[event19:base-stat]
+fd=19
+type=3
+config=4
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_ITLB << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
+[event20:base-stat]
+fd=20
+type=3
+config=65540
+optional=1
diff --git a/tools/perf/tests/attr/test-stat-detailed-3 b/tools/perf/tests/attr/test-stat-detailed-3
new file mode 100644
index 000000000..4bb58e1c8
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-detailed-3
@@ -0,0 +1,191 @@
+[config]
+command = stat
+args = -ddd kill >/dev/null 2>&1
+ret = 1
+
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_TASK_CLOCK
+[event1:base-stat]
+fd=1
+type=1
+config=1
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CONTEXT_SWITCHES
+[event2:base-stat]
+fd=2
+type=1
+config=3
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_CPU_MIGRATIONS
+[event3:base-stat]
+fd=3
+type=1
+config=4
+
+# PERF_TYPE_SOFTWARE / PERF_COUNT_SW_PAGE_FAULTS
+[event4:base-stat]
+fd=4
+type=1
+config=2
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_CPU_CYCLES
+[event5:base-stat]
+fd=5
+type=0
+config=0
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+[event6:base-stat]
+fd=6
+type=0
+config=7
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+[event7:base-stat]
+fd=7
+type=0
+config=8
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_INSTRUCTIONS
+[event8:base-stat]
+fd=8
+type=0
+config=1
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_INSTRUCTIONS
+[event9:base-stat]
+fd=9
+type=0
+config=4
+optional=1
+
+# PERF_TYPE_HARDWARE / PERF_COUNT_HW_BRANCH_MISSES
+[event10:base-stat]
+fd=10
+type=0
+config=5
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+# PERF_COUNT_HW_CACHE_L1D << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
+[event11:base-stat]
+fd=11
+type=3
+config=0
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+# PERF_COUNT_HW_CACHE_L1D << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
+[event12:base-stat]
+fd=12
+type=3
+config=65536
+optional=1
+
+# PERF_TYPE_HW_CACHE /
+# PERF_COUNT_HW_CACHE_LL << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
+[event13:base-stat]
+fd=13
+type=3
+config=2
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_LL << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
+[event14:base-stat]
+fd=14
+type=3
+config=65538
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_L1I << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
+[event15:base-stat]
+fd=15
+type=3
+config=1
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_L1I << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
+[event16:base-stat]
+fd=16
+type=3
+config=65537
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_DTLB << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
+[event17:base-stat]
+fd=17
+type=3
+config=3
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_DTLB << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
+[event18:base-stat]
+fd=18
+type=3
+config=65539
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_ITLB << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
+[event19:base-stat]
+fd=19
+type=3
+config=4
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_ITLB << 0 |
+# (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
+[event20:base-stat]
+fd=20
+type=3
+config=65540
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_L1D << 0 |
+# (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16)
+[event21:base-stat]
+fd=21
+type=3
+config=512
+optional=1
+
+# PERF_TYPE_HW_CACHE,
+# PERF_COUNT_HW_CACHE_L1D << 0 |
+# (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) |
+# (PERF_COUNT_HW_CACHE_RESULT_MISS << 16)
+[event22:base-stat]
+fd=22
+type=3
+config=66048
+optional=1
diff --git a/tools/perf/tests/attr/test-stat-group b/tools/perf/tests/attr/test-stat-group
new file mode 100644
index 000000000..e15d6946e
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-group
@@ -0,0 +1,17 @@
+[config]
+command = stat
+args = --group -e cycles,instructions kill >/dev/null 2>&1
+ret = 1
+
+[event-1:base-stat]
+fd=1
+group_fd=-1
+read_format=3|15
+
+[event-2:base-stat]
+fd=2
+group_fd=1
+config=1
+disabled=0
+enable_on_exec=0
+read_format=3|15
diff --git a/tools/perf/tests/attr/test-stat-group1 b/tools/perf/tests/attr/test-stat-group1
new file mode 100644
index 000000000..174675112
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-group1
@@ -0,0 +1,17 @@
+[config]
+command = stat
+args = -e '{cycles,instructions}' kill >/dev/null 2>&1
+ret = 1
+
+[event-1:base-stat]
+fd=1
+group_fd=-1
+read_format=3|15
+
+[event-2:base-stat]
+fd=2
+group_fd=1
+config=1
+disabled=0
+enable_on_exec=0
+read_format=3|15
diff --git a/tools/perf/tests/attr/test-stat-no-inherit b/tools/perf/tests/attr/test-stat-no-inherit
new file mode 100644
index 000000000..924fbb930
--- /dev/null
+++ b/tools/perf/tests/attr/test-stat-no-inherit
@@ -0,0 +1,8 @@
+[config]
+command = stat
+args = -i -e cycles kill >/dev/null 2>&1
+ret = 1
+
+[event:base-stat]
+inherit=0
+optional=1
diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c
new file mode 100644
index 000000000..6d598cc07
--- /dev/null
+++ b/tools/perf/tests/backward-ring-buffer.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test backward bit in event attribute, read ring buffer from end to
+ * beginning
+ */
+
+#include <perf.h>
+#include <evlist.h>
+#include <sys/prctl.h>
+#include "tests.h"
+#include "debug.h"
+#include <errno.h>
+
+#define NR_ITERS 111
+
+static void testcase(void)
+{
+ int i;
+
+ for (i = 0; i < NR_ITERS; i++) {
+ char proc_name[10];
+
+ snprintf(proc_name, sizeof(proc_name), "p:%d\n", i);
+ prctl(PR_SET_NAME, proc_name);
+ }
+}
+
+static int count_samples(struct perf_evlist *evlist, int *sample_count,
+ int *comm_count)
+{
+ int i;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ struct perf_mmap *map = &evlist->overwrite_mmap[i];
+ union perf_event *event;
+
+ perf_mmap__read_init(map);
+ while ((event = perf_mmap__read_event(map)) != NULL) {
+ const u32 type = event->header.type;
+
+ switch (type) {
+ case PERF_RECORD_SAMPLE:
+ (*sample_count)++;
+ break;
+ case PERF_RECORD_COMM:
+ (*comm_count)++;
+ break;
+ default:
+ pr_err("Unexpected record of type %d\n", type);
+ return TEST_FAIL;
+ }
+ }
+ perf_mmap__read_done(map);
+ }
+ return TEST_OK;
+}
+
+static int do_test(struct perf_evlist *evlist, int mmap_pages,
+ int *sample_count, int *comm_count)
+{
+ int err;
+ char sbuf[STRERR_BUFSIZE];
+
+ err = perf_evlist__mmap(evlist, mmap_pages);
+ if (err < 0) {
+ pr_debug("perf_evlist__mmap: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ return TEST_FAIL;
+ }
+
+ perf_evlist__enable(evlist);
+ testcase();
+ perf_evlist__disable(evlist);
+
+ err = count_samples(evlist, sample_count, comm_count);
+ perf_evlist__munmap(evlist);
+ return err;
+}
+
+
+int test__backward_ring_buffer(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int ret = TEST_SKIP, err, sample_count = 0, comm_count = 0;
+ char pid[16], sbuf[STRERR_BUFSIZE];
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel __maybe_unused;
+ struct parse_events_error parse_error;
+ struct record_opts opts = {
+ .target = {
+ .uid = UINT_MAX,
+ .uses_mmap = true,
+ },
+ .freq = 0,
+ .mmap_pages = 256,
+ .default_interval = 1,
+ };
+
+ snprintf(pid, sizeof(pid), "%d", getpid());
+ pid[sizeof(pid) - 1] = '\0';
+ opts.target.tid = opts.target.pid = pid;
+
+ evlist = perf_evlist__new();
+ if (!evlist) {
+ pr_debug("Not enough memory to create evlist\n");
+ return TEST_FAIL;
+ }
+
+ err = perf_evlist__create_maps(evlist, &opts.target);
+ if (err < 0) {
+ pr_debug("Not enough memory to create thread/cpu maps\n");
+ goto out_delete_evlist;
+ }
+
+ bzero(&parse_error, sizeof(parse_error));
+ /*
+ * Set backward bit, ring buffer should be writing from end. Record
+ * it in aux evlist
+ */
+ err = parse_events(evlist, "syscalls:sys_enter_prctl/overwrite/", &parse_error);
+ if (err) {
+ pr_debug("Failed to parse tracepoint event, try use root\n");
+ ret = TEST_SKIP;
+ goto out_delete_evlist;
+ }
+
+ perf_evlist__config(evlist, &opts, NULL);
+
+ err = perf_evlist__open(evlist);
+ if (err < 0) {
+ pr_debug("perf_evlist__open: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ ret = TEST_FAIL;
+ err = do_test(evlist, opts.mmap_pages, &sample_count,
+ &comm_count);
+ if (err != TEST_OK)
+ goto out_delete_evlist;
+
+ if ((sample_count != NR_ITERS) || (comm_count != NR_ITERS)) {
+ pr_err("Unexpected counter: sample_count=%d, comm_count=%d\n",
+ sample_count, comm_count);
+ goto out_delete_evlist;
+ }
+
+ err = do_test(evlist, 1, &sample_count, &comm_count);
+ if (err != TEST_OK)
+ goto out_delete_evlist;
+
+ ret = TEST_OK;
+out_delete_evlist:
+ perf_evlist__delete(evlist);
+ return ret;
+}
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
new file mode 100644
index 000000000..96e7fc1ad
--- /dev/null
+++ b/tools/perf/tests/bitmap.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/bitmap.h>
+#include "tests.h"
+#include "cpumap.h"
+#include "debug.h"
+
+#define NBITS 100
+
+static unsigned long *get_bitmap(const char *str, int nbits)
+{
+ struct cpu_map *map = cpu_map__new(str);
+ unsigned long *bm = NULL;
+ int i;
+
+ bm = bitmap_alloc(nbits);
+
+ if (map && bm) {
+ for (i = 0; i < map->nr; i++)
+ set_bit(map->map[i], bm);
+ }
+
+ if (map)
+ cpu_map__put(map);
+ return bm;
+}
+
+static int test_bitmap(const char *str)
+{
+ unsigned long *bm = get_bitmap(str, NBITS);
+ char buf[100];
+ int ret;
+
+ bitmap_scnprintf(bm, NBITS, buf, sizeof(buf));
+ pr_debug("bitmap: %s\n", buf);
+
+ ret = !strcmp(buf, str);
+ free(bm);
+ return ret;
+}
+
+int test__bitmap_print(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ TEST_ASSERT_VAL("failed to convert map", test_bitmap("1"));
+ TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,5"));
+ TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,3,5,7,9,11,13,15,17,19,21-40"));
+ TEST_ASSERT_VAL("failed to convert map", test_bitmap("2-5"));
+ TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,3-6,8-10,24,35-37"));
+ TEST_ASSERT_VAL("failed to convert map", test_bitmap("1,3-6,8-10,24,35-37"));
+ TEST_ASSERT_VAL("failed to convert map", test_bitmap("1-10,12-20,22-30,32-40"));
+ return 0;
+}
diff --git a/tools/perf/tests/bp_account.c b/tools/perf/tests/bp_account.c
new file mode 100644
index 000000000..624e4ef73
--- /dev/null
+++ b/tools/perf/tests/bp_account.c
@@ -0,0 +1,193 @@
+/*
+ * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
+ * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
+ */
+#define __SANE_USERSPACE_TYPES__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <time.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <linux/compiler.h>
+#include <linux/hw_breakpoint.h>
+#include <sys/ioctl.h>
+
+#include "tests.h"
+#include "debug.h"
+#include "perf.h"
+#include "cloexec.h"
+
+static volatile long the_var;
+
+static noinline int test_function(void)
+{
+ return 0;
+}
+
+static int __event(bool is_x, void *addr, struct perf_event_attr *attr)
+{
+ int fd;
+
+ memset(attr, 0, sizeof(struct perf_event_attr));
+ attr->type = PERF_TYPE_BREAKPOINT;
+ attr->size = sizeof(struct perf_event_attr);
+
+ attr->config = 0;
+ attr->bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W;
+ attr->bp_addr = (unsigned long) addr;
+ attr->bp_len = sizeof(long);
+
+ attr->sample_period = 1;
+ attr->sample_type = PERF_SAMPLE_IP;
+
+ attr->exclude_kernel = 1;
+ attr->exclude_hv = 1;
+
+ fd = sys_perf_event_open(attr, -1, 0, -1,
+ perf_event_open_cloexec_flag());
+ if (fd < 0) {
+ pr_debug("failed opening event %llx\n", attr->config);
+ return TEST_FAIL;
+ }
+
+ return fd;
+}
+
+static int wp_event(void *addr, struct perf_event_attr *attr)
+{
+ return __event(false, addr, attr);
+}
+
+static int bp_event(void *addr, struct perf_event_attr *attr)
+{
+ return __event(true, addr, attr);
+}
+
+static int bp_accounting(int wp_cnt, int share)
+{
+ struct perf_event_attr attr, attr_mod, attr_new;
+ int i, fd[wp_cnt], fd_wp, ret;
+
+ for (i = 0; i < wp_cnt; i++) {
+ fd[i] = wp_event((void *)&the_var, &attr);
+ TEST_ASSERT_VAL("failed to create wp\n", fd[i] != -1);
+ pr_debug("wp %d created\n", i);
+ }
+
+ attr_mod = attr;
+ attr_mod.bp_type = HW_BREAKPOINT_X;
+ attr_mod.bp_addr = (unsigned long) test_function;
+
+ ret = ioctl(fd[0], PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &attr_mod);
+ TEST_ASSERT_VAL("failed to modify wp\n", ret == 0);
+
+ pr_debug("wp 0 modified to bp\n");
+
+ if (!share) {
+ fd_wp = wp_event((void *)&the_var, &attr_new);
+ TEST_ASSERT_VAL("failed to create max wp\n", fd_wp != -1);
+ pr_debug("wp max created\n");
+ }
+
+ for (i = 0; i < wp_cnt; i++)
+ close(fd[i]);
+
+ return 0;
+}
+
+static int detect_cnt(bool is_x)
+{
+ struct perf_event_attr attr;
+ void *addr = is_x ? (void *)test_function : (void *)&the_var;
+ int fd[100], cnt = 0, i;
+
+ while (1) {
+ if (cnt == 100) {
+ pr_debug("way too many debug registers, fix the test\n");
+ return 0;
+ }
+ fd[cnt] = __event(is_x, addr, &attr);
+
+ if (fd[cnt] < 0)
+ break;
+ cnt++;
+ }
+
+ for (i = 0; i < cnt; i++)
+ close(fd[i]);
+
+ return cnt;
+}
+
+static int detect_ioctl(void)
+{
+ struct perf_event_attr attr;
+ int fd, ret = 1;
+
+ fd = wp_event((void *) &the_var, &attr);
+ if (fd > 0) {
+ ret = ioctl(fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &attr);
+ close(fd);
+ }
+
+ return ret ? 0 : 1;
+}
+
+static int detect_share(int wp_cnt, int bp_cnt)
+{
+ struct perf_event_attr attr;
+ int i, fd[wp_cnt + bp_cnt], ret;
+
+ for (i = 0; i < wp_cnt; i++) {
+ fd[i] = wp_event((void *)&the_var, &attr);
+ TEST_ASSERT_VAL("failed to create wp\n", fd[i] != -1);
+ }
+
+ for (; i < (bp_cnt + wp_cnt); i++) {
+ fd[i] = bp_event((void *)test_function, &attr);
+ if (fd[i] == -1)
+ break;
+ }
+
+ ret = i != (bp_cnt + wp_cnt);
+
+ while (i--)
+ close(fd[i]);
+
+ return ret;
+}
+
+/*
+ * This test does following:
+ * - detects the number of watch/break-points,
+ * skip test if any is missing
+ * - detects PERF_EVENT_IOC_MODIFY_ATTRIBUTES ioctl,
+ * skip test if it's missing
+ * - detects if watchpoints and breakpoints share
+ * same slots
+ * - create all possible watchpoints on cpu 0
+ * - change one of it to breakpoint
+ * - in case wp and bp do not share slots,
+ * we create another watchpoint to ensure
+ * the slot accounting is correct
+ */
+int test__bp_accounting(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int has_ioctl = detect_ioctl();
+ int wp_cnt = detect_cnt(false);
+ int bp_cnt = detect_cnt(true);
+ int share = detect_share(wp_cnt, bp_cnt);
+
+ pr_debug("watchpoints count %d, breakpoints count %d, has_ioctl %d, share %d\n",
+ wp_cnt, bp_cnt, has_ioctl, share);
+
+ if (!wp_cnt || !bp_cnt || !has_ioctl)
+ return TEST_SKIP;
+
+ return bp_accounting(wp_cnt, share);
+}
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
new file mode 100644
index 000000000..697423ce3
--- /dev/null
+++ b/tools/perf/tests/bp_signal.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Inspired by breakpoint overflow test done by
+ * Vince Weaver <vincent.weaver@maine.edu> for perf_event_tests
+ * (git://github.com/deater/perf_event_tests)
+ */
+
+/*
+ * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
+ * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
+ */
+#define __SANE_USERSPACE_TYPES__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <time.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <linux/compiler.h>
+#include <linux/hw_breakpoint.h>
+
+#include "tests.h"
+#include "debug.h"
+#include "perf.h"
+#include "cloexec.h"
+
+static int fd1;
+static int fd2;
+static int fd3;
+static int overflows;
+static int overflows_2;
+
+volatile long the_var;
+
+
+/*
+ * Use ASM to ensure watchpoint and breakpoint can be triggered
+ * at one instruction.
+ */
+#if defined (__x86_64__)
+extern void __test_function(volatile long *ptr);
+asm (
+ ".pushsection .text;"
+ ".globl __test_function\n"
+ ".type __test_function, @function;"
+ "__test_function:\n"
+ "incq (%rdi)\n"
+ "ret\n"
+ ".popsection\n");
+#else
+static void __test_function(volatile long *ptr)
+{
+ *ptr = 0x1234;
+}
+#endif
+
+static noinline int test_function(void)
+{
+ __test_function(&the_var);
+ the_var++;
+ return time(NULL);
+}
+
+static void sig_handler_2(int signum __maybe_unused,
+ siginfo_t *oh __maybe_unused,
+ void *uc __maybe_unused)
+{
+ overflows_2++;
+ if (overflows_2 > 10) {
+ ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
+ ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+ ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
+ }
+}
+
+static void sig_handler(int signum __maybe_unused,
+ siginfo_t *oh __maybe_unused,
+ void *uc __maybe_unused)
+{
+ overflows++;
+
+ if (overflows > 10) {
+ /*
+ * This should be executed only once during
+ * this test, if we are here for the 10th
+ * time, consider this the recursive issue.
+ *
+ * We can get out of here by disable events,
+ * so no new SIGIO is delivered.
+ */
+ ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
+ ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+ ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
+ }
+}
+
+static int __event(bool is_x, void *addr, int sig)
+{
+ struct perf_event_attr pe;
+ int fd;
+
+ memset(&pe, 0, sizeof(struct perf_event_attr));
+ pe.type = PERF_TYPE_BREAKPOINT;
+ pe.size = sizeof(struct perf_event_attr);
+
+ pe.config = 0;
+ pe.bp_type = is_x ? HW_BREAKPOINT_X : HW_BREAKPOINT_W;
+ pe.bp_addr = (unsigned long) addr;
+ pe.bp_len = sizeof(long);
+
+ pe.sample_period = 1;
+ pe.sample_type = PERF_SAMPLE_IP;
+ pe.wakeup_events = 1;
+
+ pe.disabled = 1;
+ pe.exclude_kernel = 1;
+ pe.exclude_hv = 1;
+
+ fd = sys_perf_event_open(&pe, 0, -1, -1,
+ perf_event_open_cloexec_flag());
+ if (fd < 0) {
+ pr_debug("failed opening event %llx\n", pe.config);
+ return TEST_FAIL;
+ }
+
+ fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
+ fcntl(fd, F_SETSIG, sig);
+ fcntl(fd, F_SETOWN, getpid());
+
+ ioctl(fd, PERF_EVENT_IOC_RESET, 0);
+
+ return fd;
+}
+
+static int bp_event(void *addr, int sig)
+{
+ return __event(true, addr, sig);
+}
+
+static int wp_event(void *addr, int sig)
+{
+ return __event(false, addr, sig);
+}
+
+static long long bp_count(int fd)
+{
+ long long count;
+ int ret;
+
+ ret = read(fd, &count, sizeof(long long));
+ if (ret != sizeof(long long)) {
+ pr_debug("failed to read: %d\n", ret);
+ return TEST_FAIL;
+ }
+
+ return count;
+}
+
+int test__bp_signal(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct sigaction sa;
+ long long count1, count2, count3;
+
+ /* setup SIGIO signal handler */
+ memset(&sa, 0, sizeof(struct sigaction));
+ sa.sa_sigaction = (void *) sig_handler;
+ sa.sa_flags = SA_SIGINFO;
+
+ if (sigaction(SIGIO, &sa, NULL) < 0) {
+ pr_debug("failed setting up signal handler\n");
+ return TEST_FAIL;
+ }
+
+ sa.sa_sigaction = (void *) sig_handler_2;
+ if (sigaction(SIGUSR1, &sa, NULL) < 0) {
+ pr_debug("failed setting up signal handler 2\n");
+ return TEST_FAIL;
+ }
+
+ /*
+ * We create following events:
+ *
+ * fd1 - breakpoint event on __test_function with SIGIO
+ * signal configured. We should get signal
+ * notification each time the breakpoint is hit
+ *
+ * fd2 - breakpoint event on sig_handler with SIGUSR1
+ * configured. We should get SIGUSR1 each time when
+ * breakpoint is hit
+ *
+ * fd3 - watchpoint event on __test_function with SIGIO
+ * configured.
+ *
+ * Following processing should happen:
+ * Exec: Action: Result:
+ * incq (%rdi) - fd1 event breakpoint hit -> count1 == 1
+ * - SIGIO is delivered
+ * sig_handler - fd2 event breakpoint hit -> count2 == 1
+ * - SIGUSR1 is delivered
+ * sig_handler_2 -> overflows_2 == 1 (nested signal)
+ * sys_rt_sigreturn - return from sig_handler_2
+ * overflows++ -> overflows = 1
+ * sys_rt_sigreturn - return from sig_handler
+ * incq (%rdi) - fd3 event watchpoint hit -> count3 == 1 (wp and bp in one insn)
+ * - SIGIO is delivered
+ * sig_handler - fd2 event breakpoint hit -> count2 == 2
+ * - SIGUSR1 is delivered
+ * sig_handler_2 -> overflows_2 == 2 (nested signal)
+ * sys_rt_sigreturn - return from sig_handler_2
+ * overflows++ -> overflows = 2
+ * sys_rt_sigreturn - return from sig_handler
+ * the_var++ - fd3 event watchpoint hit -> count3 == 2 (standalone watchpoint)
+ * - SIGIO is delivered
+ * sig_handler - fd2 event breakpoint hit -> count2 == 3
+ * - SIGUSR1 is delivered
+ * sig_handler_2 -> overflows_2 == 3 (nested signal)
+ * sys_rt_sigreturn - return from sig_handler_2
+ * overflows++ -> overflows == 3
+ * sys_rt_sigreturn - return from sig_handler
+ *
+ * The test case check following error conditions:
+ * - we get stuck in signal handler because of debug
+ * exception being triggered receursively due to
+ * the wrong RF EFLAG management
+ *
+ * - we never trigger the sig_handler breakpoint due
+ * to the rong RF EFLAG management
+ *
+ */
+
+ fd1 = bp_event(__test_function, SIGIO);
+ fd2 = bp_event(sig_handler, SIGUSR1);
+ fd3 = wp_event((void *)&the_var, SIGIO);
+
+ ioctl(fd1, PERF_EVENT_IOC_ENABLE, 0);
+ ioctl(fd2, PERF_EVENT_IOC_ENABLE, 0);
+ ioctl(fd3, PERF_EVENT_IOC_ENABLE, 0);
+
+ /*
+ * Kick off the test by trigering 'fd1'
+ * breakpoint.
+ */
+ test_function();
+
+ ioctl(fd1, PERF_EVENT_IOC_DISABLE, 0);
+ ioctl(fd2, PERF_EVENT_IOC_DISABLE, 0);
+ ioctl(fd3, PERF_EVENT_IOC_DISABLE, 0);
+
+ count1 = bp_count(fd1);
+ count2 = bp_count(fd2);
+ count3 = bp_count(fd3);
+
+ close(fd1);
+ close(fd2);
+ close(fd3);
+
+ pr_debug("count1 %lld, count2 %lld, count3 %lld, overflow %d, overflows_2 %d\n",
+ count1, count2, count3, overflows, overflows_2);
+
+ if (count1 != 1) {
+ if (count1 == 11)
+ pr_debug("failed: RF EFLAG recursion issue detected\n");
+ else
+ pr_debug("failed: wrong count for bp1%lld\n", count1);
+ }
+
+ if (overflows != 3)
+ pr_debug("failed: wrong overflow hit\n");
+
+ if (overflows_2 != 3)
+ pr_debug("failed: wrong overflow_2 hit\n");
+
+ if (count2 != 3)
+ pr_debug("failed: wrong count for bp2\n");
+
+ if (count3 != 2)
+ pr_debug("failed: wrong count for bp3\n");
+
+ return count1 == 1 && overflows == 3 && count2 == 3 && overflows_2 == 3 && count3 == 2 ?
+ TEST_OK : TEST_FAIL;
+}
+
+bool test__bp_signal_is_supported(void)
+{
+ /*
+ * PowerPC and S390 do not support creation of instruction
+ * breakpoints using the perf_event interface.
+ *
+ * ARM requires explicit rounding down of the instruction
+ * pointer in Thumb mode, and then requires the single-step
+ * to be handled explicitly in the overflow handler to avoid
+ * stepping into the SIGIO handler and getting stuck on the
+ * breakpointed instruction.
+ *
+ * Since arm64 has the same issue with arm for the single-step
+ * handling, this case also gets suck on the breakpointed
+ * instruction.
+ *
+ * Just disable the test for these architectures until these
+ * issues are resolved.
+ */
+#if defined(__powerpc__) || defined(__s390x__) || defined(__arm__) || \
+ defined(__aarch64__)
+ return false;
+#else
+ return true;
+#endif
+}
diff --git a/tools/perf/tests/bp_signal_overflow.c b/tools/perf/tests/bp_signal_overflow.c
new file mode 100644
index 000000000..ca962559e
--- /dev/null
+++ b/tools/perf/tests/bp_signal_overflow.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Originally done by Vince Weaver <vincent.weaver@maine.edu> for
+ * perf_event_tests (git://github.com/deater/perf_event_tests)
+ */
+
+/*
+ * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
+ * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
+ */
+#define __SANE_USERSPACE_TYPES__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <time.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <linux/compiler.h>
+#include <linux/hw_breakpoint.h>
+
+#include "tests.h"
+#include "debug.h"
+#include "perf.h"
+#include "cloexec.h"
+
+static int overflows;
+
+static noinline int test_function(void)
+{
+ return time(NULL);
+}
+
+static void sig_handler(int signum __maybe_unused,
+ siginfo_t *oh __maybe_unused,
+ void *uc __maybe_unused)
+{
+ overflows++;
+}
+
+static long long bp_count(int fd)
+{
+ long long count;
+ int ret;
+
+ ret = read(fd, &count, sizeof(long long));
+ if (ret != sizeof(long long)) {
+ pr_debug("failed to read: %d\n", ret);
+ return TEST_FAIL;
+ }
+
+ return count;
+}
+
+#define EXECUTIONS 10000
+#define THRESHOLD 100
+
+int test__bp_signal_overflow(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct perf_event_attr pe;
+ struct sigaction sa;
+ long long count;
+ int fd, i, fails = 0;
+
+ /* setup SIGIO signal handler */
+ memset(&sa, 0, sizeof(struct sigaction));
+ sa.sa_sigaction = (void *) sig_handler;
+ sa.sa_flags = SA_SIGINFO;
+
+ if (sigaction(SIGIO, &sa, NULL) < 0) {
+ pr_debug("failed setting up signal handler\n");
+ return TEST_FAIL;
+ }
+
+ memset(&pe, 0, sizeof(struct perf_event_attr));
+ pe.type = PERF_TYPE_BREAKPOINT;
+ pe.size = sizeof(struct perf_event_attr);
+
+ pe.config = 0;
+ pe.bp_type = HW_BREAKPOINT_X;
+ pe.bp_addr = (unsigned long) test_function;
+ pe.bp_len = sizeof(long);
+
+ pe.sample_period = THRESHOLD;
+ pe.sample_type = PERF_SAMPLE_IP;
+ pe.wakeup_events = 1;
+
+ pe.disabled = 1;
+ pe.exclude_kernel = 1;
+ pe.exclude_hv = 1;
+
+ fd = sys_perf_event_open(&pe, 0, -1, -1,
+ perf_event_open_cloexec_flag());
+ if (fd < 0) {
+ pr_debug("failed opening event %llx\n", pe.config);
+ return TEST_FAIL;
+ }
+
+ fcntl(fd, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
+ fcntl(fd, F_SETSIG, SIGIO);
+ fcntl(fd, F_SETOWN, getpid());
+
+ ioctl(fd, PERF_EVENT_IOC_RESET, 0);
+ ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
+
+ for (i = 0; i < EXECUTIONS; i++)
+ test_function();
+
+ ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
+
+ count = bp_count(fd);
+
+ close(fd);
+
+ pr_debug("count %lld, overflow %d\n",
+ count, overflows);
+
+ if (count != EXECUTIONS) {
+ pr_debug("\tWrong number of executions %lld != %d\n",
+ count, EXECUTIONS);
+ fails++;
+ }
+
+ if (overflows != EXECUTIONS / THRESHOLD) {
+ pr_debug("\tWrong number of overflows %d != %d\n",
+ overflows, EXECUTIONS / THRESHOLD);
+ fails++;
+ }
+
+ return fails ? TEST_FAIL : TEST_OK;
+}
diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c
new file mode 100644
index 000000000..1ca5106df
--- /dev/null
+++ b/tools/perf/tests/bpf-script-example.c
@@ -0,0 +1,48 @@
+/*
+ * bpf-script-example.c
+ * Test basic LLVM building
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define BPF_ANY 0
+#define BPF_MAP_TYPE_ARRAY 2
+#define BPF_FUNC_map_lookup_elem 1
+#define BPF_FUNC_map_update_elem 2
+
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_lookup_elem;
+static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) =
+ (void *) BPF_FUNC_map_update_elem;
+
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+};
+
+#define SEC(NAME) __attribute__((section(NAME), used))
+struct bpf_map_def SEC("maps") flip_table = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1,
+};
+
+SEC("func=do_epoll_wait")
+int bpf_func__SyS_epoll_pwait(void *ctx)
+{
+ int ind =0;
+ int *flag = bpf_map_lookup_elem(&flip_table, &ind);
+ int new_flag;
+ if (!flag)
+ return 0;
+ /* flip flag and store back */
+ new_flag = !*flag;
+ bpf_map_update_elem(&flip_table, &ind, &new_flag, BPF_ANY);
+ return new_flag;
+}
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/tests/bpf-script-test-kbuild.c b/tools/perf/tests/bpf-script-test-kbuild.c
new file mode 100644
index 000000000..ff3ec8337
--- /dev/null
+++ b/tools/perf/tests/bpf-script-test-kbuild.c
@@ -0,0 +1,20 @@
+/*
+ * bpf-script-test-kbuild.c
+ * Test include from kernel header
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+#include <uapi/linux/fs.h>
+
+SEC("func=vfs_llseek")
+int bpf_func__vfs_llseek(void *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/tests/bpf-script-test-prologue.c b/tools/perf/tests/bpf-script-test-prologue.c
new file mode 100644
index 000000000..43f1e1648
--- /dev/null
+++ b/tools/perf/tests/bpf-script-test-prologue.c
@@ -0,0 +1,46 @@
+/*
+ * bpf-script-test-prologue.c
+ * Test BPF prologue
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+#include <uapi/linux/fs.h>
+
+/*
+ * If CONFIG_PROFILE_ALL_BRANCHES is selected,
+ * 'if' is redefined after include kernel header.
+ * Recover 'if' for BPF object code.
+ */
+#ifdef if
+# undef if
+#endif
+
+#define FMODE_READ 0x1
+#define FMODE_WRITE 0x2
+
+static void (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+ (void *) 6;
+
+SEC("func=null_lseek file->f_mode offset orig")
+int bpf_func__null_lseek(void *ctx, int err, unsigned long _f_mode,
+ unsigned long offset, unsigned long orig)
+{
+ fmode_t f_mode = (fmode_t)_f_mode;
+
+ if (err)
+ return 0;
+ if (f_mode & FMODE_WRITE)
+ return 0;
+ if (offset & 1)
+ return 0;
+ if (orig == SEEK_CUR)
+ return 0;
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/tests/bpf-script-test-relocation.c b/tools/perf/tests/bpf-script-test-relocation.c
new file mode 100644
index 000000000..93af77421
--- /dev/null
+++ b/tools/perf/tests/bpf-script-test-relocation.c
@@ -0,0 +1,50 @@
+/*
+ * bpf-script-test-relocation.c
+ * Test BPF loader checking relocation
+ */
+#ifndef LINUX_VERSION_CODE
+# error Need LINUX_VERSION_CODE
+# error Example: for 4.2 kernel, put 'clang-opt="-DLINUX_VERSION_CODE=0x40200" into llvm section of ~/.perfconfig'
+#endif
+#define BPF_ANY 0
+#define BPF_MAP_TYPE_ARRAY 2
+#define BPF_FUNC_map_lookup_elem 1
+#define BPF_FUNC_map_update_elem 2
+
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_lookup_elem;
+static void *(*bpf_map_update_elem)(void *map, void *key, void *value, int flags) =
+ (void *) BPF_FUNC_map_update_elem;
+
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+};
+
+#define SEC(NAME) __attribute__((section(NAME), used))
+struct bpf_map_def SEC("maps") my_table = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1,
+};
+
+int this_is_a_global_val;
+
+SEC("func=sys_write")
+int bpf_func__sys_write(void *ctx)
+{
+ int key = 0;
+ int value = 0;
+
+ /*
+ * Incorrect relocation. Should not allow this program be
+ * loaded into kernel.
+ */
+ bpf_map_update_elem(&this_is_a_global_val, &key, &value, 0);
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
new file mode 100644
index 000000000..df478f67b
--- /dev/null
+++ b/tools/perf/tests/bpf.c
@@ -0,0 +1,363 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <util/util.h>
+#include <util/bpf-loader.h>
+#include <util/evlist.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+#include <api/fs/fs.h>
+#include <bpf/bpf.h>
+#include "tests.h"
+#include "llvm.h"
+#include "debug.h"
+#define NR_ITERS 111
+#define PERF_TEST_BPF_PATH "/sys/fs/bpf/perf_test"
+
+#ifdef HAVE_LIBBPF_SUPPORT
+
+static int epoll_pwait_loop(void)
+{
+ int i;
+
+ /* Should fail NR_ITERS times */
+ for (i = 0; i < NR_ITERS; i++)
+ epoll_pwait(-(i + 1), NULL, 0, 0, NULL);
+ return 0;
+}
+
+#ifdef HAVE_BPF_PROLOGUE
+
+static int llseek_loop(void)
+{
+ int fds[2], i;
+
+ fds[0] = open("/dev/null", O_RDONLY);
+ fds[1] = open("/dev/null", O_RDWR);
+
+ if (fds[0] < 0 || fds[1] < 0)
+ return -1;
+
+ for (i = 0; i < NR_ITERS; i++) {
+ lseek(fds[i % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
+ lseek(fds[(i + 1) % 2], i, (i / 2) % 2 ? SEEK_CUR : SEEK_SET);
+ }
+ close(fds[0]);
+ close(fds[1]);
+ return 0;
+}
+
+#endif
+
+static struct {
+ enum test_llvm__testcase prog_id;
+ const char *desc;
+ const char *name;
+ const char *msg_compile_fail;
+ const char *msg_load_fail;
+ int (*target_func)(void);
+ int expect_result;
+ bool pin;
+} bpf_testcase_table[] = {
+ {
+ .prog_id = LLVM_TESTCASE_BASE,
+ .desc = "Basic BPF filtering",
+ .name = "[basic_bpf_test]",
+ .msg_compile_fail = "fix 'perf test LLVM' first",
+ .msg_load_fail = "load bpf object failed",
+ .target_func = &epoll_pwait_loop,
+ .expect_result = (NR_ITERS + 1) / 2,
+ },
+ {
+ .prog_id = LLVM_TESTCASE_BASE,
+ .desc = "BPF pinning",
+ .name = "[bpf_pinning]",
+ .msg_compile_fail = "fix kbuild first",
+ .msg_load_fail = "check your vmlinux setting?",
+ .target_func = &epoll_pwait_loop,
+ .expect_result = (NR_ITERS + 1) / 2,
+ .pin = true,
+ },
+#ifdef HAVE_BPF_PROLOGUE
+ {
+ .prog_id = LLVM_TESTCASE_BPF_PROLOGUE,
+ .desc = "BPF prologue generation",
+ .name = "[bpf_prologue_test]",
+ .msg_compile_fail = "fix kbuild first",
+ .msg_load_fail = "check your vmlinux setting?",
+ .target_func = &llseek_loop,
+ .expect_result = (NR_ITERS + 1) / 4,
+ },
+#endif
+ {
+ .prog_id = LLVM_TESTCASE_BPF_RELOCATION,
+ .desc = "BPF relocation checker",
+ .name = "[bpf_relocation_test]",
+ .msg_compile_fail = "fix 'perf test LLVM' first",
+ .msg_load_fail = "libbpf error when dealing with relocation",
+ },
+};
+
+static int do_test(struct bpf_object *obj, int (*func)(void),
+ int expect)
+{
+ struct record_opts opts = {
+ .target = {
+ .uid = UINT_MAX,
+ .uses_mmap = true,
+ },
+ .freq = 0,
+ .mmap_pages = 256,
+ .default_interval = 1,
+ };
+
+ char pid[16];
+ char sbuf[STRERR_BUFSIZE];
+ struct perf_evlist *evlist;
+ int i, ret = TEST_FAIL, err = 0, count = 0;
+
+ struct parse_events_state parse_state;
+ struct parse_events_error parse_error;
+
+ bzero(&parse_error, sizeof(parse_error));
+ bzero(&parse_state, sizeof(parse_state));
+ parse_state.error = &parse_error;
+ INIT_LIST_HEAD(&parse_state.list);
+
+ err = parse_events_load_bpf_obj(&parse_state, &parse_state.list, obj, NULL);
+ if (err || list_empty(&parse_state.list)) {
+ pr_debug("Failed to add events selected by BPF\n");
+ return TEST_FAIL;
+ }
+
+ snprintf(pid, sizeof(pid), "%d", getpid());
+ pid[sizeof(pid) - 1] = '\0';
+ opts.target.tid = opts.target.pid = pid;
+
+ /* Instead of perf_evlist__new_default, don't add default events */
+ evlist = perf_evlist__new();
+ if (!evlist) {
+ pr_debug("Not enough memory to create evlist\n");
+ return TEST_FAIL;
+ }
+
+ err = perf_evlist__create_maps(evlist, &opts.target);
+ if (err < 0) {
+ pr_debug("Not enough memory to create thread/cpu maps\n");
+ goto out_delete_evlist;
+ }
+
+ perf_evlist__splice_list_tail(evlist, &parse_state.list);
+ evlist->nr_groups = parse_state.nr_groups;
+
+ perf_evlist__config(evlist, &opts, NULL);
+
+ err = perf_evlist__open(evlist);
+ if (err < 0) {
+ pr_debug("perf_evlist__open: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ err = perf_evlist__mmap(evlist, opts.mmap_pages);
+ if (err < 0) {
+ pr_debug("perf_evlist__mmap: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ perf_evlist__enable(evlist);
+ (*func)();
+ perf_evlist__disable(evlist);
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ union perf_event *event;
+ struct perf_mmap *md;
+
+ md = &evlist->mmap[i];
+ if (perf_mmap__read_init(md) < 0)
+ continue;
+
+ while ((event = perf_mmap__read_event(md)) != NULL) {
+ const u32 type = event->header.type;
+
+ if (type == PERF_RECORD_SAMPLE)
+ count ++;
+ }
+ perf_mmap__read_done(md);
+ }
+
+ if (count != expect) {
+ pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count);
+ goto out_delete_evlist;
+ }
+
+ ret = TEST_OK;
+
+out_delete_evlist:
+ perf_evlist__delete(evlist);
+ return ret;
+}
+
+static struct bpf_object *
+prepare_bpf(void *obj_buf, size_t obj_buf_sz, const char *name)
+{
+ struct bpf_object *obj;
+
+ obj = bpf__prepare_load_buffer(obj_buf, obj_buf_sz, name);
+ if (IS_ERR(obj)) {
+ pr_debug("Compile BPF program failed.\n");
+ return NULL;
+ }
+ return obj;
+}
+
+static int __test__bpf(int idx)
+{
+ int ret;
+ void *obj_buf;
+ size_t obj_buf_sz;
+ struct bpf_object *obj;
+
+ ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
+ bpf_testcase_table[idx].prog_id,
+ true, NULL);
+ if (ret != TEST_OK || !obj_buf || !obj_buf_sz) {
+ pr_debug("Unable to get BPF object, %s\n",
+ bpf_testcase_table[idx].msg_compile_fail);
+ if (idx == 0)
+ return TEST_SKIP;
+ else
+ return TEST_FAIL;
+ }
+
+ obj = prepare_bpf(obj_buf, obj_buf_sz,
+ bpf_testcase_table[idx].name);
+ if ((!!bpf_testcase_table[idx].target_func) != (!!obj)) {
+ if (!obj)
+ pr_debug("Fail to load BPF object: %s\n",
+ bpf_testcase_table[idx].msg_load_fail);
+ else
+ pr_debug("Success unexpectedly: %s\n",
+ bpf_testcase_table[idx].msg_load_fail);
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ if (obj) {
+ ret = do_test(obj,
+ bpf_testcase_table[idx].target_func,
+ bpf_testcase_table[idx].expect_result);
+ if (ret != TEST_OK)
+ goto out;
+ if (bpf_testcase_table[idx].pin) {
+ int err;
+
+ if (!bpf_fs__mount()) {
+ pr_debug("BPF filesystem not mounted\n");
+ ret = TEST_FAIL;
+ goto out;
+ }
+ err = mkdir(PERF_TEST_BPF_PATH, 0777);
+ if (err && errno != EEXIST) {
+ pr_debug("Failed to make perf_test dir: %s\n",
+ strerror(errno));
+ ret = TEST_FAIL;
+ goto out;
+ }
+ if (bpf_object__pin(obj, PERF_TEST_BPF_PATH))
+ ret = TEST_FAIL;
+ if (rm_rf(PERF_TEST_BPF_PATH))
+ ret = TEST_FAIL;
+ }
+ }
+
+out:
+ free(obj_buf);
+ bpf__clear();
+ return ret;
+}
+
+int test__bpf_subtest_get_nr(void)
+{
+ return (int)ARRAY_SIZE(bpf_testcase_table);
+}
+
+const char *test__bpf_subtest_get_desc(int i)
+{
+ if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
+ return NULL;
+ return bpf_testcase_table[i].desc;
+}
+
+static int check_env(void)
+{
+ int err;
+ unsigned int kver_int;
+ char license[] = "GPL";
+
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ err = fetch_kernel_version(&kver_int, NULL, 0);
+ if (err) {
+ pr_debug("Unable to get kernel version\n");
+ return err;
+ }
+
+ err = bpf_load_program(BPF_PROG_TYPE_KPROBE, insns,
+ sizeof(insns) / sizeof(insns[0]),
+ license, kver_int, NULL, 0);
+ if (err < 0) {
+ pr_err("Missing basic BPF support, skip this test: %s\n",
+ strerror(errno));
+ return err;
+ }
+ close(err);
+
+ return 0;
+}
+
+int test__bpf(struct test *test __maybe_unused, int i)
+{
+ int err;
+
+ if (i < 0 || i >= (int)ARRAY_SIZE(bpf_testcase_table))
+ return TEST_FAIL;
+
+ if (geteuid() != 0) {
+ pr_debug("Only root can run BPF test\n");
+ return TEST_SKIP;
+ }
+
+ if (check_env())
+ return TEST_SKIP;
+
+ err = __test__bpf(i);
+ return err;
+}
+
+#else
+int test__bpf_subtest_get_nr(void)
+{
+ return 0;
+}
+
+const char *test__bpf_subtest_get_desc(int i __maybe_unused)
+{
+ return NULL;
+}
+
+int test__bpf(struct test *test __maybe_unused, int i __maybe_unused)
+{
+ pr_debug("Skip BPF test because BPF support is not compiled\n");
+ return TEST_SKIP;
+}
+#endif
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
new file mode 100644
index 000000000..d7a5e1b9a
--- /dev/null
+++ b/tools/perf/tests/builtin-test.c
@@ -0,0 +1,710 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * builtin-test.c
+ *
+ * Builtin regression testing command: ever growing number of sanity tests
+ */
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include "builtin.h"
+#include "hist.h"
+#include "intlist.h"
+#include "tests.h"
+#include "debug.h"
+#include "color.h"
+#include <subcmd/parse-options.h>
+#include "string2.h"
+#include "symbol.h"
+#include <linux/kernel.h>
+#include <subcmd/exec-cmd.h>
+
+static bool dont_fork;
+
+struct test __weak arch_tests[] = {
+ {
+ .func = NULL,
+ },
+};
+
+static struct test generic_tests[] = {
+ {
+ .desc = "vmlinux symtab matches kallsyms",
+ .func = test__vmlinux_matches_kallsyms,
+ },
+ {
+ .desc = "Detect openat syscall event",
+ .func = test__openat_syscall_event,
+ },
+ {
+ .desc = "Detect openat syscall event on all cpus",
+ .func = test__openat_syscall_event_on_all_cpus,
+ },
+ {
+ .desc = "Read samples using the mmap interface",
+ .func = test__basic_mmap,
+ },
+ {
+ .desc = "Test data source output",
+ .func = test__mem,
+ },
+ {
+ .desc = "Parse event definition strings",
+ .func = test__parse_events,
+ },
+ {
+ .desc = "Simple expression parser",
+ .func = test__expr,
+ },
+ {
+ .desc = "PERF_RECORD_* events & perf_sample fields",
+ .func = test__PERF_RECORD,
+ },
+ {
+ .desc = "Parse perf pmu format",
+ .func = test__pmu,
+ },
+ {
+ .desc = "DSO data read",
+ .func = test__dso_data,
+ },
+ {
+ .desc = "DSO data cache",
+ .func = test__dso_data_cache,
+ },
+ {
+ .desc = "DSO data reopen",
+ .func = test__dso_data_reopen,
+ },
+ {
+ .desc = "Roundtrip evsel->name",
+ .func = test__perf_evsel__roundtrip_name_test,
+ },
+ {
+ .desc = "Parse sched tracepoints fields",
+ .func = test__perf_evsel__tp_sched_test,
+ },
+ {
+ .desc = "syscalls:sys_enter_openat event fields",
+ .func = test__syscall_openat_tp_fields,
+ },
+ {
+ .desc = "Setup struct perf_event_attr",
+ .func = test__attr,
+ },
+ {
+ .desc = "Match and link multiple hists",
+ .func = test__hists_link,
+ },
+ {
+ .desc = "'import perf' in python",
+ .func = test__python_use,
+ },
+ {
+ .desc = "Breakpoint overflow signal handler",
+ .func = test__bp_signal,
+ .is_supported = test__bp_signal_is_supported,
+ },
+ {
+ .desc = "Breakpoint overflow sampling",
+ .func = test__bp_signal_overflow,
+ .is_supported = test__bp_signal_is_supported,
+ },
+ {
+ .desc = "Breakpoint accounting",
+ .func = test__bp_accounting,
+ .is_supported = test__bp_signal_is_supported,
+ },
+ {
+ .desc = "Number of exit events of a simple workload",
+ .func = test__task_exit,
+ },
+ {
+ .desc = "Software clock events period values",
+ .func = test__sw_clock_freq,
+ },
+ {
+ .desc = "Object code reading",
+ .func = test__code_reading,
+ },
+ {
+ .desc = "Sample parsing",
+ .func = test__sample_parsing,
+ },
+ {
+ .desc = "Use a dummy software event to keep tracking",
+ .func = test__keep_tracking,
+ },
+ {
+ .desc = "Parse with no sample_id_all bit set",
+ .func = test__parse_no_sample_id_all,
+ },
+ {
+ .desc = "Filter hist entries",
+ .func = test__hists_filter,
+ },
+ {
+ .desc = "Lookup mmap thread",
+ .func = test__mmap_thread_lookup,
+ },
+ {
+ .desc = "Share thread mg",
+ .func = test__thread_mg_share,
+ },
+ {
+ .desc = "Sort output of hist entries",
+ .func = test__hists_output,
+ },
+ {
+ .desc = "Cumulate child hist entries",
+ .func = test__hists_cumulate,
+ },
+ {
+ .desc = "Track with sched_switch",
+ .func = test__switch_tracking,
+ },
+ {
+ .desc = "Filter fds with revents mask in a fdarray",
+ .func = test__fdarray__filter,
+ },
+ {
+ .desc = "Add fd to a fdarray, making it autogrow",
+ .func = test__fdarray__add,
+ },
+ {
+ .desc = "kmod_path__parse",
+ .func = test__kmod_path__parse,
+ },
+ {
+ .desc = "Thread map",
+ .func = test__thread_map,
+ },
+ {
+ .desc = "LLVM search and compile",
+ .func = test__llvm,
+ .subtest = {
+ .skip_if_fail = true,
+ .get_nr = test__llvm_subtest_get_nr,
+ .get_desc = test__llvm_subtest_get_desc,
+ },
+ },
+ {
+ .desc = "Session topology",
+ .func = test__session_topology,
+ },
+ {
+ .desc = "BPF filter",
+ .func = test__bpf,
+ .subtest = {
+ .skip_if_fail = true,
+ .get_nr = test__bpf_subtest_get_nr,
+ .get_desc = test__bpf_subtest_get_desc,
+ },
+ },
+ {
+ .desc = "Synthesize thread map",
+ .func = test__thread_map_synthesize,
+ },
+ {
+ .desc = "Remove thread map",
+ .func = test__thread_map_remove,
+ },
+ {
+ .desc = "Synthesize cpu map",
+ .func = test__cpu_map_synthesize,
+ },
+ {
+ .desc = "Synthesize stat config",
+ .func = test__synthesize_stat_config,
+ },
+ {
+ .desc = "Synthesize stat",
+ .func = test__synthesize_stat,
+ },
+ {
+ .desc = "Synthesize stat round",
+ .func = test__synthesize_stat_round,
+ },
+ {
+ .desc = "Synthesize attr update",
+ .func = test__event_update,
+ },
+ {
+ .desc = "Event times",
+ .func = test__event_times,
+ },
+ {
+ .desc = "Read backward ring buffer",
+ .func = test__backward_ring_buffer,
+ },
+ {
+ .desc = "Print cpu map",
+ .func = test__cpu_map_print,
+ },
+ {
+ .desc = "Probe SDT events",
+ .func = test__sdt_event,
+ },
+ {
+ .desc = "is_printable_array",
+ .func = test__is_printable_array,
+ },
+ {
+ .desc = "Print bitmap",
+ .func = test__bitmap_print,
+ },
+ {
+ .desc = "perf hooks",
+ .func = test__perf_hooks,
+ },
+ {
+ .desc = "builtin clang support",
+ .func = test__clang,
+ .subtest = {
+ .skip_if_fail = true,
+ .get_nr = test__clang_subtest_get_nr,
+ .get_desc = test__clang_subtest_get_desc,
+ }
+ },
+ {
+ .desc = "unit_number__scnprintf",
+ .func = test__unit_number__scnprint,
+ },
+ {
+ .desc = "mem2node",
+ .func = test__mem2node,
+ },
+ {
+ .func = NULL,
+ },
+};
+
+static struct test *tests[] = {
+ generic_tests,
+ arch_tests,
+};
+
+static bool perf_test__matches(struct test *test, int curr, int argc, const char *argv[])
+{
+ int i;
+
+ if (argc == 0)
+ return true;
+
+ for (i = 0; i < argc; ++i) {
+ char *end;
+ long nr = strtoul(argv[i], &end, 10);
+
+ if (*end == '\0') {
+ if (nr == curr + 1)
+ return true;
+ continue;
+ }
+
+ if (strcasestr(test->desc, argv[i]))
+ return true;
+ }
+
+ return false;
+}
+
+static int run_test(struct test *test, int subtest)
+{
+ int status, err = -1, child = dont_fork ? 0 : fork();
+ char sbuf[STRERR_BUFSIZE];
+
+ if (child < 0) {
+ pr_err("failed to fork test: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ return -1;
+ }
+
+ if (!child) {
+ if (!dont_fork) {
+ pr_debug("test child forked, pid %d\n", getpid());
+
+ if (verbose <= 0) {
+ int nullfd = open("/dev/null", O_WRONLY);
+
+ if (nullfd >= 0) {
+ close(STDERR_FILENO);
+ close(STDOUT_FILENO);
+
+ dup2(nullfd, STDOUT_FILENO);
+ dup2(STDOUT_FILENO, STDERR_FILENO);
+ close(nullfd);
+ }
+ } else {
+ signal(SIGSEGV, sighandler_dump_stack);
+ signal(SIGFPE, sighandler_dump_stack);
+ }
+ }
+
+ err = test->func(test, subtest);
+ if (!dont_fork)
+ exit(err);
+ }
+
+ if (!dont_fork) {
+ wait(&status);
+
+ if (WIFEXITED(status)) {
+ err = (signed char)WEXITSTATUS(status);
+ pr_debug("test child finished with %d\n", err);
+ } else if (WIFSIGNALED(status)) {
+ err = -1;
+ pr_debug("test child interrupted\n");
+ }
+ }
+
+ return err;
+}
+
+#define for_each_test(j, t) \
+ for (j = 0; j < ARRAY_SIZE(tests); j++) \
+ for (t = &tests[j][0]; t->func; t++)
+
+static int test_and_print(struct test *t, bool force_skip, int subtest)
+{
+ int err;
+
+ if (!force_skip) {
+ pr_debug("\n--- start ---\n");
+ err = run_test(t, subtest);
+ pr_debug("---- end ----\n");
+ } else {
+ pr_debug("\n--- force skipped ---\n");
+ err = TEST_SKIP;
+ }
+
+ if (!t->subtest.get_nr)
+ pr_debug("%s:", t->desc);
+ else
+ pr_debug("%s subtest %d:", t->desc, subtest + 1);
+
+ switch (err) {
+ case TEST_OK:
+ pr_info(" Ok\n");
+ break;
+ case TEST_SKIP:
+ color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
+ break;
+ case TEST_FAIL:
+ default:
+ color_fprintf(stderr, PERF_COLOR_RED, " FAILED!\n");
+ break;
+ }
+
+ return err;
+}
+
+static const char *shell_test__description(char *description, size_t size,
+ const char *path, const char *name)
+{
+ FILE *fp;
+ char filename[PATH_MAX];
+
+ path__join(filename, sizeof(filename), path, name);
+ fp = fopen(filename, "r");
+ if (!fp)
+ return NULL;
+
+ description = fgets(description, size, fp);
+ fclose(fp);
+
+ return description ? trim(description + 1) : NULL;
+}
+
+#define for_each_shell_test(dir, base, ent) \
+ while ((ent = readdir(dir)) != NULL) \
+ if (!is_directory(base, ent) && ent->d_name[0] != '.')
+
+static const char *shell_tests__dir(char *path, size_t size)
+{
+ const char *devel_dirs[] = { "./tools/perf/tests", "./tests", };
+ char *exec_path;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(devel_dirs); ++i) {
+ struct stat st;
+ if (!lstat(devel_dirs[i], &st)) {
+ scnprintf(path, size, "%s/shell", devel_dirs[i]);
+ if (!lstat(devel_dirs[i], &st))
+ return path;
+ }
+ }
+
+ /* Then installed path. */
+ exec_path = get_argv_exec_path();
+ scnprintf(path, size, "%s/tests/shell", exec_path);
+ free(exec_path);
+ return path;
+}
+
+static int shell_tests__max_desc_width(void)
+{
+ DIR *dir;
+ struct dirent *ent;
+ char path_dir[PATH_MAX];
+ const char *path = shell_tests__dir(path_dir, sizeof(path_dir));
+ int width = 0;
+
+ if (path == NULL)
+ return -1;
+
+ dir = opendir(path);
+ if (!dir)
+ return -1;
+
+ for_each_shell_test(dir, path, ent) {
+ char bf[256];
+ const char *desc = shell_test__description(bf, sizeof(bf), path, ent->d_name);
+
+ if (desc) {
+ int len = strlen(desc);
+
+ if (width < len)
+ width = len;
+ }
+ }
+
+ closedir(dir);
+ return width;
+}
+
+struct shell_test {
+ const char *dir;
+ const char *file;
+};
+
+static int shell_test__run(struct test *test, int subdir __maybe_unused)
+{
+ int err;
+ char script[PATH_MAX];
+ struct shell_test *st = test->priv;
+
+ path__join(script, sizeof(script), st->dir, st->file);
+
+ err = system(script);
+ if (!err)
+ return TEST_OK;
+
+ return WEXITSTATUS(err) == 2 ? TEST_SKIP : TEST_FAIL;
+}
+
+static int run_shell_tests(int argc, const char *argv[], int i, int width)
+{
+ DIR *dir;
+ struct dirent *ent;
+ char path_dir[PATH_MAX];
+ struct shell_test st = {
+ .dir = shell_tests__dir(path_dir, sizeof(path_dir)),
+ };
+
+ if (st.dir == NULL)
+ return -1;
+
+ dir = opendir(st.dir);
+ if (!dir)
+ return -1;
+
+ for_each_shell_test(dir, st.dir, ent) {
+ int curr = i++;
+ char desc[256];
+ struct test test = {
+ .desc = shell_test__description(desc, sizeof(desc), st.dir, ent->d_name),
+ .func = shell_test__run,
+ .priv = &st,
+ };
+
+ if (!perf_test__matches(&test, curr, argc, argv))
+ continue;
+
+ st.file = ent->d_name;
+ pr_info("%2d: %-*s:", i, width, test.desc);
+ test_and_print(&test, false, -1);
+ }
+
+ closedir(dir);
+ return 0;
+}
+
+static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
+{
+ struct test *t;
+ unsigned int j;
+ int i = 0;
+ int width = shell_tests__max_desc_width();
+
+ for_each_test(j, t) {
+ int len = strlen(t->desc);
+
+ if (width < len)
+ width = len;
+ }
+
+ for_each_test(j, t) {
+ int curr = i++, err;
+
+ if (!perf_test__matches(t, curr, argc, argv))
+ continue;
+
+ if (t->is_supported && !t->is_supported()) {
+ pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc);
+ continue;
+ }
+
+ pr_info("%2d: %-*s:", i, width, t->desc);
+
+ if (intlist__find(skiplist, i)) {
+ color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n");
+ continue;
+ }
+
+ if (!t->subtest.get_nr) {
+ test_and_print(t, false, -1);
+ } else {
+ int subn = t->subtest.get_nr();
+ /*
+ * minus 2 to align with normal testcases.
+ * For subtest we print additional '.x' in number.
+ * for example:
+ *
+ * 35: Test LLVM searching and compiling :
+ * 35.1: Basic BPF llvm compiling test : Ok
+ */
+ int subw = width > 2 ? width - 2 : width;
+ bool skip = false;
+ int subi;
+
+ if (subn <= 0) {
+ color_fprintf(stderr, PERF_COLOR_YELLOW,
+ " Skip (not compiled in)\n");
+ continue;
+ }
+ pr_info("\n");
+
+ for (subi = 0; subi < subn; subi++) {
+ int len = strlen(t->subtest.get_desc(subi));
+
+ if (subw < len)
+ subw = len;
+ }
+
+ for (subi = 0; subi < subn; subi++) {
+ pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
+ t->subtest.get_desc(subi));
+ err = test_and_print(t, skip, subi);
+ if (err != TEST_OK && t->subtest.skip_if_fail)
+ skip = true;
+ }
+ }
+ }
+
+ return run_shell_tests(argc, argv, i, width);
+}
+
+static int perf_test__list_shell(int argc, const char **argv, int i)
+{
+ DIR *dir;
+ struct dirent *ent;
+ char path_dir[PATH_MAX];
+ const char *path = shell_tests__dir(path_dir, sizeof(path_dir));
+
+ if (path == NULL)
+ return -1;
+
+ dir = opendir(path);
+ if (!dir)
+ return -1;
+
+ for_each_shell_test(dir, path, ent) {
+ int curr = i++;
+ char bf[256];
+ struct test t = {
+ .desc = shell_test__description(bf, sizeof(bf), path, ent->d_name),
+ };
+
+ if (!perf_test__matches(&t, curr, argc, argv))
+ continue;
+
+ pr_info("%2d: %s\n", i, t.desc);
+ }
+
+ closedir(dir);
+ return 0;
+}
+
+static int perf_test__list(int argc, const char **argv)
+{
+ unsigned int j;
+ struct test *t;
+ int i = 0;
+
+ for_each_test(j, t) {
+ int curr = i++;
+
+ if (!perf_test__matches(t, curr, argc, argv) ||
+ (t->is_supported && !t->is_supported()))
+ continue;
+
+ pr_info("%2d: %s\n", i, t->desc);
+
+ if (t->subtest.get_nr) {
+ int subn = t->subtest.get_nr();
+ int subi;
+
+ for (subi = 0; subi < subn; subi++)
+ pr_info("%2d:%1d: %s\n", i, subi + 1,
+ t->subtest.get_desc(subi));
+ }
+ }
+
+ perf_test__list_shell(argc, argv, i);
+
+ return 0;
+}
+
+int cmd_test(int argc, const char **argv)
+{
+ const char *test_usage[] = {
+ "perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]",
+ NULL,
+ };
+ const char *skip = NULL;
+ const struct option test_options[] = {
+ OPT_STRING('s', "skip", &skip, "tests", "tests to skip"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show symbol address, etc)"),
+ OPT_BOOLEAN('F', "dont-fork", &dont_fork,
+ "Do not fork for testcase"),
+ OPT_END()
+ };
+ const char * const test_subcommands[] = { "list", NULL };
+ struct intlist *skiplist = NULL;
+ int ret = hists__init();
+
+ if (ret < 0)
+ return ret;
+
+ argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0);
+ if (argc >= 1 && !strcmp(argv[0], "list"))
+ return perf_test__list(argc - 1, argv + 1);
+
+ symbol_conf.priv_size = sizeof(int);
+ symbol_conf.sort_by_name = true;
+ symbol_conf.try_vmlinux_path = true;
+
+ if (symbol__init(NULL) < 0)
+ return -1;
+
+ if (skip != NULL)
+ skiplist = intlist__new(skip);
+
+ return __cmd_test(argc, argv, skiplist);
+}
diff --git a/tools/perf/tests/clang.c b/tools/perf/tests/clang.c
new file mode 100644
index 000000000..f45fe11dc
--- /dev/null
+++ b/tools/perf/tests/clang.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tests.h"
+#include "debug.h"
+#include "util.h"
+#include "c++/clang-c.h"
+#include <linux/kernel.h>
+
+static struct {
+ int (*func)(void);
+ const char *desc;
+} clang_testcase_table[] = {
+#ifdef HAVE_LIBCLANGLLVM_SUPPORT
+ {
+ .func = test__clang_to_IR,
+ .desc = "builtin clang compile C source to IR",
+ },
+ {
+ .func = test__clang_to_obj,
+ .desc = "builtin clang compile C source to ELF object",
+ },
+#endif
+};
+
+int test__clang_subtest_get_nr(void)
+{
+ return (int)ARRAY_SIZE(clang_testcase_table);
+}
+
+const char *test__clang_subtest_get_desc(int i)
+{
+ if (i < 0 || i >= (int)ARRAY_SIZE(clang_testcase_table))
+ return NULL;
+ return clang_testcase_table[i].desc;
+}
+
+#ifndef HAVE_LIBCLANGLLVM_SUPPORT
+int test__clang(struct test *test __maybe_unused, int i __maybe_unused)
+{
+ return TEST_SKIP;
+}
+#else
+int test__clang(struct test *test __maybe_unused, int i)
+{
+ if (i < 0 || i >= (int)ARRAY_SIZE(clang_testcase_table))
+ return TEST_FAIL;
+ return clang_testcase_table[i].func();
+}
+#endif
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
new file mode 100644
index 000000000..6b049f3f5
--- /dev/null
+++ b/tools/perf/tests/code-reading.c
@@ -0,0 +1,741 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/param.h>
+
+#include "parse-events.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "machine.h"
+#include "event.h"
+#include "thread.h"
+
+#include "tests.h"
+
+#include "sane_ctype.h"
+
+#define BUFSZ 1024
+#define READLEN 128
+
+struct state {
+ u64 done[1024];
+ size_t done_cnt;
+};
+
+static unsigned int hex(char c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 10;
+ return c - 'A' + 10;
+}
+
+static size_t read_objdump_chunk(const char **line, unsigned char **buf,
+ size_t *buf_len)
+{
+ size_t bytes_read = 0;
+ unsigned char *chunk_start = *buf;
+
+ /* Read bytes */
+ while (*buf_len > 0) {
+ char c1, c2;
+
+ /* Get 2 hex digits */
+ c1 = *(*line)++;
+ if (!isxdigit(c1))
+ break;
+ c2 = *(*line)++;
+ if (!isxdigit(c2))
+ break;
+
+ /* Store byte and advance buf */
+ **buf = (hex(c1) << 4) | hex(c2);
+ (*buf)++;
+ (*buf_len)--;
+ bytes_read++;
+
+ /* End of chunk? */
+ if (isspace(**line))
+ break;
+ }
+
+ /*
+ * objdump will display raw insn as LE if code endian
+ * is LE and bytes_per_chunk > 1. In that case reverse
+ * the chunk we just read.
+ *
+ * see disassemble_bytes() at binutils/objdump.c for details
+ * how objdump chooses display endian)
+ */
+ if (bytes_read > 1 && !bigendian()) {
+ unsigned char *chunk_end = chunk_start + bytes_read - 1;
+ unsigned char tmp;
+
+ while (chunk_start < chunk_end) {
+ tmp = *chunk_start;
+ *chunk_start = *chunk_end;
+ *chunk_end = tmp;
+ chunk_start++;
+ chunk_end--;
+ }
+ }
+
+ return bytes_read;
+}
+
+static size_t read_objdump_line(const char *line, unsigned char *buf,
+ size_t buf_len)
+{
+ const char *p;
+ size_t ret, bytes_read = 0;
+
+ /* Skip to a colon */
+ p = strchr(line, ':');
+ if (!p)
+ return 0;
+ p++;
+
+ /* Skip initial spaces */
+ while (*p) {
+ if (!isspace(*p))
+ break;
+ p++;
+ }
+
+ do {
+ ret = read_objdump_chunk(&p, &buf, &buf_len);
+ bytes_read += ret;
+ p++;
+ } while (ret > 0);
+
+ /* return number of successfully read bytes */
+ return bytes_read;
+}
+
+static int read_objdump_output(FILE *f, void *buf, size_t *len, u64 start_addr)
+{
+ char *line = NULL;
+ size_t line_len, off_last = 0;
+ ssize_t ret;
+ int err = 0;
+ u64 addr, last_addr = start_addr;
+
+ while (off_last < *len) {
+ size_t off, read_bytes, written_bytes;
+ unsigned char tmp[BUFSZ];
+
+ ret = getline(&line, &line_len, f);
+ if (feof(f))
+ break;
+ if (ret < 0) {
+ pr_debug("getline failed\n");
+ err = -1;
+ break;
+ }
+
+ /* read objdump data into temporary buffer */
+ read_bytes = read_objdump_line(line, tmp, sizeof(tmp));
+ if (!read_bytes)
+ continue;
+
+ if (sscanf(line, "%"PRIx64, &addr) != 1)
+ continue;
+ if (addr < last_addr) {
+ pr_debug("addr going backwards, read beyond section?\n");
+ break;
+ }
+ last_addr = addr;
+
+ /* copy it from temporary buffer to 'buf' according
+ * to address on current objdump line */
+ off = addr - start_addr;
+ if (off >= *len)
+ break;
+ written_bytes = MIN(read_bytes, *len - off);
+ memcpy(buf + off, tmp, written_bytes);
+ off_last = off + written_bytes;
+ }
+
+ /* len returns number of bytes that could not be read */
+ *len -= off_last;
+
+ free(line);
+
+ return err;
+}
+
+static int read_via_objdump(const char *filename, u64 addr, void *buf,
+ size_t len)
+{
+ char cmd[PATH_MAX * 2];
+ const char *fmt;
+ FILE *f;
+ int ret;
+
+ fmt = "%s -z -d --start-address=0x%"PRIx64" --stop-address=0x%"PRIx64" %s";
+ ret = snprintf(cmd, sizeof(cmd), fmt, "objdump", addr, addr + len,
+ filename);
+ if (ret <= 0 || (size_t)ret >= sizeof(cmd))
+ return -1;
+
+ pr_debug("Objdump command is: %s\n", cmd);
+
+ /* Ignore objdump errors */
+ strcat(cmd, " 2>/dev/null");
+
+ f = popen(cmd, "r");
+ if (!f) {
+ pr_debug("popen failed\n");
+ return -1;
+ }
+
+ ret = read_objdump_output(f, buf, &len, addr);
+ if (len) {
+ pr_debug("objdump read too few bytes: %zd\n", len);
+ if (!ret)
+ ret = len;
+ }
+
+ pclose(f);
+
+ return ret;
+}
+
+static void dump_buf(unsigned char *buf, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ pr_debug("0x%02x ", buf[i]);
+ if (i % 16 == 15)
+ pr_debug("\n");
+ }
+ pr_debug("\n");
+}
+
+static int read_object_code(u64 addr, size_t len, u8 cpumode,
+ struct thread *thread, struct state *state)
+{
+ struct addr_location al;
+ unsigned char buf1[BUFSZ];
+ unsigned char buf2[BUFSZ];
+ size_t ret_len;
+ u64 objdump_addr;
+ const char *objdump_name;
+ char decomp_name[KMOD_DECOMP_LEN];
+ bool decomp = false;
+ int ret;
+
+ pr_debug("Reading object code for memory address: %#"PRIx64"\n", addr);
+
+ if (!thread__find_map(thread, cpumode, addr, &al) || !al.map->dso) {
+ if (cpumode == PERF_RECORD_MISC_HYPERVISOR) {
+ pr_debug("Hypervisor address can not be resolved - skipping\n");
+ return 0;
+ }
+
+ pr_debug("thread__find_map failed\n");
+ return -1;
+ }
+
+ pr_debug("File is: %s\n", al.map->dso->long_name);
+
+ if (al.map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+ !dso__is_kcore(al.map->dso)) {
+ pr_debug("Unexpected kernel address - skipping\n");
+ return 0;
+ }
+
+ pr_debug("On file address is: %#"PRIx64"\n", al.addr);
+
+ if (len > BUFSZ)
+ len = BUFSZ;
+
+ /* Do not go off the map */
+ if (addr + len > al.map->end)
+ len = al.map->end - addr;
+
+ /* Read the object code using perf */
+ ret_len = dso__data_read_offset(al.map->dso, thread->mg->machine,
+ al.addr, buf1, len);
+ if (ret_len != len) {
+ pr_debug("dso__data_read_offset failed\n");
+ return -1;
+ }
+
+ /*
+ * Converting addresses for use by objdump requires more information.
+ * map__load() does that. See map__rip_2objdump() for details.
+ */
+ if (map__load(al.map))
+ return -1;
+
+ /* objdump struggles with kcore - try each map only once */
+ if (dso__is_kcore(al.map->dso)) {
+ size_t d;
+
+ for (d = 0; d < state->done_cnt; d++) {
+ if (state->done[d] == al.map->start) {
+ pr_debug("kcore map tested already");
+ pr_debug(" - skipping\n");
+ return 0;
+ }
+ }
+ if (state->done_cnt >= ARRAY_SIZE(state->done)) {
+ pr_debug("Too many kcore maps - skipping\n");
+ return 0;
+ }
+ state->done[state->done_cnt++] = al.map->start;
+ }
+
+ objdump_name = al.map->dso->long_name;
+ if (dso__needs_decompress(al.map->dso)) {
+ if (dso__decompress_kmodule_path(al.map->dso, objdump_name,
+ decomp_name,
+ sizeof(decomp_name)) < 0) {
+ pr_debug("decompression failed\n");
+ return -1;
+ }
+
+ decomp = true;
+ objdump_name = decomp_name;
+ }
+
+ /* Read the object code using objdump */
+ objdump_addr = map__rip_2objdump(al.map, al.addr);
+ ret = read_via_objdump(objdump_name, objdump_addr, buf2, len);
+
+ if (decomp)
+ unlink(objdump_name);
+
+ if (ret > 0) {
+ /*
+ * The kernel maps are inaccurate - assume objdump is right in
+ * that case.
+ */
+ if (cpumode == PERF_RECORD_MISC_KERNEL ||
+ cpumode == PERF_RECORD_MISC_GUEST_KERNEL) {
+ len -= ret;
+ if (len) {
+ pr_debug("Reducing len to %zu\n", len);
+ } else if (dso__is_kcore(al.map->dso)) {
+ /*
+ * objdump cannot handle very large segments
+ * that may be found in kcore.
+ */
+ pr_debug("objdump failed for kcore");
+ pr_debug(" - skipping\n");
+ return 0;
+ } else {
+ return -1;
+ }
+ }
+ }
+ if (ret < 0) {
+ pr_debug("read_via_objdump failed\n");
+ return -1;
+ }
+
+ /* The results should be identical */
+ if (memcmp(buf1, buf2, len)) {
+ pr_debug("Bytes read differ from those read by objdump\n");
+ pr_debug("buf1 (dso):\n");
+ dump_buf(buf1, len);
+ pr_debug("buf2 (objdump):\n");
+ dump_buf(buf2, len);
+ return -1;
+ }
+ pr_debug("Bytes read match those read by objdump\n");
+
+ return 0;
+}
+
+static int process_sample_event(struct machine *machine,
+ struct perf_evlist *evlist,
+ union perf_event *event, struct state *state)
+{
+ struct perf_sample sample;
+ struct thread *thread;
+ int ret;
+
+ if (perf_evlist__parse_sample(evlist, event, &sample)) {
+ pr_debug("perf_evlist__parse_sample failed\n");
+ return -1;
+ }
+
+ thread = machine__findnew_thread(machine, sample.pid, sample.tid);
+ if (!thread) {
+ pr_debug("machine__findnew_thread failed\n");
+ return -1;
+ }
+
+ ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
+ thread__put(thread);
+ return ret;
+}
+
+static int process_event(struct machine *machine, struct perf_evlist *evlist,
+ union perf_event *event, struct state *state)
+{
+ if (event->header.type == PERF_RECORD_SAMPLE)
+ return process_sample_event(machine, evlist, event, state);
+
+ if (event->header.type == PERF_RECORD_THROTTLE ||
+ event->header.type == PERF_RECORD_UNTHROTTLE)
+ return 0;
+
+ if (event->header.type < PERF_RECORD_MAX) {
+ int ret;
+
+ ret = machine__process_event(machine, event, NULL);
+ if (ret < 0)
+ pr_debug("machine__process_event failed, event type %u\n",
+ event->header.type);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int process_events(struct machine *machine, struct perf_evlist *evlist,
+ struct state *state)
+{
+ union perf_event *event;
+ struct perf_mmap *md;
+ int i, ret;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ md = &evlist->mmap[i];
+ if (perf_mmap__read_init(md) < 0)
+ continue;
+
+ while ((event = perf_mmap__read_event(md)) != NULL) {
+ ret = process_event(machine, evlist, event, state);
+ perf_mmap__consume(md);
+ if (ret < 0)
+ return ret;
+ }
+ perf_mmap__read_done(md);
+ }
+ return 0;
+}
+
+static int comp(const void *a, const void *b)
+{
+ return *(int *)a - *(int *)b;
+}
+
+static void do_sort_something(void)
+{
+ int buf[40960], i;
+
+ for (i = 0; i < (int)ARRAY_SIZE(buf); i++)
+ buf[i] = ARRAY_SIZE(buf) - i - 1;
+
+ qsort(buf, ARRAY_SIZE(buf), sizeof(int), comp);
+
+ for (i = 0; i < (int)ARRAY_SIZE(buf); i++) {
+ if (buf[i] != i) {
+ pr_debug("qsort failed\n");
+ break;
+ }
+ }
+}
+
+static void sort_something(void)
+{
+ int i;
+
+ for (i = 0; i < 10; i++)
+ do_sort_something();
+}
+
+static void syscall_something(void)
+{
+ int pipefd[2];
+ int i;
+
+ for (i = 0; i < 1000; i++) {
+ if (pipe(pipefd) < 0) {
+ pr_debug("pipe failed\n");
+ break;
+ }
+ close(pipefd[1]);
+ close(pipefd[0]);
+ }
+}
+
+static void fs_something(void)
+{
+ const char *test_file_name = "temp-perf-code-reading-test-file--";
+ FILE *f;
+ int i;
+
+ for (i = 0; i < 1000; i++) {
+ f = fopen(test_file_name, "w+");
+ if (f) {
+ fclose(f);
+ unlink(test_file_name);
+ }
+ }
+}
+
+static const char *do_determine_event(bool excl_kernel)
+{
+ const char *event = excl_kernel ? "cycles:u" : "cycles";
+
+#ifdef __s390x__
+ char cpuid[128], model[16], model_c[16], cpum_cf_v[16];
+ unsigned int family;
+ int ret, cpum_cf_a;
+
+ if (get_cpuid(cpuid, sizeof(cpuid)))
+ goto out_clocks;
+ ret = sscanf(cpuid, "%*[^,],%u,%[^,],%[^,],%[^,],%x", &family, model_c,
+ model, cpum_cf_v, &cpum_cf_a);
+ if (ret != 5) /* Not available */
+ goto out_clocks;
+ if (excl_kernel && (cpum_cf_a & 4))
+ return event;
+ if (!excl_kernel && (cpum_cf_a & 2))
+ return event;
+
+ /* Fall through: missing authorization */
+out_clocks:
+ event = excl_kernel ? "cpu-clock:u" : "cpu-clock";
+
+#endif
+ return event;
+}
+
+static void do_something(void)
+{
+ fs_something();
+
+ sort_something();
+
+ syscall_something();
+}
+
+enum {
+ TEST_CODE_READING_OK,
+ TEST_CODE_READING_NO_VMLINUX,
+ TEST_CODE_READING_NO_KCORE,
+ TEST_CODE_READING_NO_ACCESS,
+ TEST_CODE_READING_NO_KERNEL_OBJ,
+};
+
+static int do_test_code_reading(bool try_kcore)
+{
+ struct machine *machine;
+ struct thread *thread;
+ struct record_opts opts = {
+ .mmap_pages = UINT_MAX,
+ .user_freq = UINT_MAX,
+ .user_interval = ULLONG_MAX,
+ .freq = 500,
+ .target = {
+ .uses_mmap = true,
+ },
+ };
+ struct state state = {
+ .done_cnt = 0,
+ };
+ struct thread_map *threads = NULL;
+ struct cpu_map *cpus = NULL;
+ struct perf_evlist *evlist = NULL;
+ struct perf_evsel *evsel = NULL;
+ int err = -1, ret;
+ pid_t pid;
+ struct map *map;
+ bool have_vmlinux, have_kcore, excl_kernel = false;
+
+ pid = getpid();
+
+ machine = machine__new_host();
+ machine->env = &perf_env;
+
+ ret = machine__create_kernel_maps(machine);
+ if (ret < 0) {
+ pr_debug("machine__create_kernel_maps failed\n");
+ goto out_err;
+ }
+
+ /* Force the use of kallsyms instead of vmlinux to try kcore */
+ if (try_kcore)
+ symbol_conf.kallsyms_name = "/proc/kallsyms";
+
+ /* Load kernel map */
+ map = machine__kernel_map(machine);
+ ret = map__load(map);
+ if (ret < 0) {
+ pr_debug("map__load failed\n");
+ goto out_err;
+ }
+ have_vmlinux = dso__is_vmlinux(map->dso);
+ have_kcore = dso__is_kcore(map->dso);
+
+ /* 2nd time through we just try kcore */
+ if (try_kcore && !have_kcore)
+ return TEST_CODE_READING_NO_KCORE;
+
+ /* No point getting kernel events if there is no kernel object */
+ if (!have_vmlinux && !have_kcore)
+ excl_kernel = true;
+
+ threads = thread_map__new_by_tid(pid);
+ if (!threads) {
+ pr_debug("thread_map__new_by_tid failed\n");
+ goto out_err;
+ }
+
+ ret = perf_event__synthesize_thread_map(NULL, threads,
+ perf_event__process, machine, false, 500);
+ if (ret < 0) {
+ pr_debug("perf_event__synthesize_thread_map failed\n");
+ goto out_err;
+ }
+
+ thread = machine__findnew_thread(machine, pid, pid);
+ if (!thread) {
+ pr_debug("machine__findnew_thread failed\n");
+ goto out_put;
+ }
+
+ cpus = cpu_map__new(NULL);
+ if (!cpus) {
+ pr_debug("cpu_map__new failed\n");
+ goto out_put;
+ }
+
+ while (1) {
+ const char *str;
+
+ evlist = perf_evlist__new();
+ if (!evlist) {
+ pr_debug("perf_evlist__new failed\n");
+ goto out_put;
+ }
+
+ perf_evlist__set_maps(evlist, cpus, threads);
+
+ str = do_determine_event(excl_kernel);
+ pr_debug("Parsing event '%s'\n", str);
+ ret = parse_events(evlist, str, NULL);
+ if (ret < 0) {
+ pr_debug("parse_events failed\n");
+ goto out_put;
+ }
+
+ perf_evlist__config(evlist, &opts, NULL);
+
+ evsel = perf_evlist__first(evlist);
+
+ evsel->attr.comm = 1;
+ evsel->attr.disabled = 1;
+ evsel->attr.enable_on_exec = 0;
+
+ ret = perf_evlist__open(evlist);
+ if (ret < 0) {
+ if (!excl_kernel) {
+ excl_kernel = true;
+ /*
+ * Both cpus and threads are now owned by evlist
+ * and will be freed by following perf_evlist__set_maps
+ * call. Getting refference to keep them alive.
+ */
+ cpu_map__get(cpus);
+ thread_map__get(threads);
+ perf_evlist__set_maps(evlist, NULL, NULL);
+ perf_evlist__delete(evlist);
+ evlist = NULL;
+ continue;
+ }
+
+ if (verbose > 0) {
+ char errbuf[512];
+ perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+ pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
+ }
+
+ goto out_put;
+ }
+ break;
+ }
+
+ ret = perf_evlist__mmap(evlist, UINT_MAX);
+ if (ret < 0) {
+ pr_debug("perf_evlist__mmap failed\n");
+ goto out_put;
+ }
+
+ perf_evlist__enable(evlist);
+
+ do_something();
+
+ perf_evlist__disable(evlist);
+
+ ret = process_events(machine, evlist, &state);
+ if (ret < 0)
+ goto out_put;
+
+ if (!have_vmlinux && !have_kcore && !try_kcore)
+ err = TEST_CODE_READING_NO_KERNEL_OBJ;
+ else if (!have_vmlinux && !try_kcore)
+ err = TEST_CODE_READING_NO_VMLINUX;
+ else if (excl_kernel)
+ err = TEST_CODE_READING_NO_ACCESS;
+ else
+ err = TEST_CODE_READING_OK;
+out_put:
+ thread__put(thread);
+out_err:
+
+ if (evlist) {
+ perf_evlist__delete(evlist);
+ } else {
+ cpu_map__put(cpus);
+ thread_map__put(threads);
+ }
+ machine__delete_threads(machine);
+ machine__delete(machine);
+
+ return err;
+}
+
+int test__code_reading(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int ret;
+
+ ret = do_test_code_reading(false);
+ if (!ret)
+ ret = do_test_code_reading(true);
+
+ switch (ret) {
+ case TEST_CODE_READING_OK:
+ return 0;
+ case TEST_CODE_READING_NO_VMLINUX:
+ pr_debug("no vmlinux\n");
+ return 0;
+ case TEST_CODE_READING_NO_KCORE:
+ pr_debug("no kcore\n");
+ return 0;
+ case TEST_CODE_READING_NO_ACCESS:
+ pr_debug("no access\n");
+ return 0;
+ case TEST_CODE_READING_NO_KERNEL_OBJ:
+ pr_debug("no kernel obj\n");
+ return 0;
+ default:
+ return -1;
+ };
+}
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
new file mode 100644
index 000000000..e78b89767
--- /dev/null
+++ b/tools/perf/tests/cpumap.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tests.h"
+#include <stdio.h>
+#include "cpumap.h"
+#include "event.h"
+#include <string.h>
+#include <linux/bitops.h>
+#include "debug.h"
+
+struct machine;
+
+static int process_event_mask(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct cpu_map_event *map_event = &event->cpu_map;
+ struct cpu_map_mask *mask;
+ struct cpu_map_data *data;
+ struct cpu_map *map;
+ int i;
+
+ data = &map_event->data;
+
+ TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__MASK);
+
+ mask = (struct cpu_map_mask *)data->data;
+
+ TEST_ASSERT_VAL("wrong nr", mask->nr == 1);
+
+ for (i = 0; i < 20; i++) {
+ TEST_ASSERT_VAL("wrong cpu", test_bit(i, mask->mask));
+ }
+
+ map = cpu_map__new_data(data);
+ TEST_ASSERT_VAL("wrong nr", map->nr == 20);
+
+ for (i = 0; i < 20; i++) {
+ TEST_ASSERT_VAL("wrong cpu", map->map[i] == i);
+ }
+
+ cpu_map__put(map);
+ return 0;
+}
+
+static int process_event_cpus(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct cpu_map_event *map_event = &event->cpu_map;
+ struct cpu_map_entries *cpus;
+ struct cpu_map_data *data;
+ struct cpu_map *map;
+
+ data = &map_event->data;
+
+ TEST_ASSERT_VAL("wrong type", data->type == PERF_CPU_MAP__CPUS);
+
+ cpus = (struct cpu_map_entries *)data->data;
+
+ TEST_ASSERT_VAL("wrong nr", cpus->nr == 2);
+ TEST_ASSERT_VAL("wrong cpu", cpus->cpu[0] == 1);
+ TEST_ASSERT_VAL("wrong cpu", cpus->cpu[1] == 256);
+
+ map = cpu_map__new_data(data);
+ TEST_ASSERT_VAL("wrong nr", map->nr == 2);
+ TEST_ASSERT_VAL("wrong cpu", map->map[0] == 1);
+ TEST_ASSERT_VAL("wrong cpu", map->map[1] == 256);
+ TEST_ASSERT_VAL("wrong refcnt", refcount_read(&map->refcnt) == 1);
+ cpu_map__put(map);
+ return 0;
+}
+
+
+int test__cpu_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct cpu_map *cpus;
+
+ /* This one is better stores in mask. */
+ cpus = cpu_map__new("0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19");
+
+ TEST_ASSERT_VAL("failed to synthesize map",
+ !perf_event__synthesize_cpu_map(NULL, cpus, process_event_mask, NULL));
+
+ cpu_map__put(cpus);
+
+ /* This one is better stores in cpu values. */
+ cpus = cpu_map__new("1,256");
+
+ TEST_ASSERT_VAL("failed to synthesize map",
+ !perf_event__synthesize_cpu_map(NULL, cpus, process_event_cpus, NULL));
+
+ cpu_map__put(cpus);
+ return 0;
+}
+
+static int cpu_map_print(const char *str)
+{
+ struct cpu_map *map = cpu_map__new(str);
+ char buf[100];
+
+ if (!map)
+ return -1;
+
+ cpu_map__snprint(map, buf, sizeof(buf));
+ return !strcmp(buf, str);
+}
+
+int test__cpu_map_print(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1"));
+ TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,5"));
+ TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,3,5,7,9,11,13,15,17,19,21-40"));
+ TEST_ASSERT_VAL("failed to convert map", cpu_map_print("2-5"));
+ TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,3-6,8-10,24,35-37"));
+ TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1,3-6,8-10,24,35-37"));
+ TEST_ASSERT_VAL("failed to convert map", cpu_map_print("1-10,12-20,22-30,32-40"));
+ return 0;
+}
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
new file mode 100644
index 000000000..7f6c52021
--- /dev/null
+++ b/tools/perf/tests/dso-data.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <stdlib.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <api/fs/fs.h>
+#include "util.h"
+#include "machine.h"
+#include "symbol.h"
+#include "tests.h"
+#include "debug.h"
+
+static char *test_file(int size)
+{
+#define TEMPL "/tmp/perf-test-XXXXXX"
+ static char buf_templ[sizeof(TEMPL)];
+ char *templ = buf_templ;
+ int fd, i;
+ unsigned char *buf;
+
+ strcpy(buf_templ, TEMPL);
+#undef TEMPL
+
+ fd = mkstemp(templ);
+ if (fd < 0) {
+ perror("mkstemp failed");
+ return NULL;
+ }
+
+ buf = malloc(size);
+ if (!buf) {
+ close(fd);
+ return NULL;
+ }
+
+ for (i = 0; i < size; i++)
+ buf[i] = (unsigned char) ((int) i % 10);
+
+ if (size != write(fd, buf, size))
+ templ = NULL;
+
+ free(buf);
+ close(fd);
+ return templ;
+}
+
+#define TEST_FILE_SIZE (DSO__DATA_CACHE_SIZE * 20)
+
+struct test_data_offset {
+ off_t offset;
+ u8 data[10];
+ int size;
+};
+
+struct test_data_offset offsets[] = {
+ /* Fill first cache page. */
+ {
+ .offset = 10,
+ .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+ .size = 10,
+ },
+ /* Read first cache page. */
+ {
+ .offset = 10,
+ .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+ .size = 10,
+ },
+ /* Fill cache boundary pages. */
+ {
+ .offset = DSO__DATA_CACHE_SIZE - DSO__DATA_CACHE_SIZE % 10,
+ .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+ .size = 10,
+ },
+ /* Read cache boundary pages. */
+ {
+ .offset = DSO__DATA_CACHE_SIZE - DSO__DATA_CACHE_SIZE % 10,
+ .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+ .size = 10,
+ },
+ /* Fill final cache page. */
+ {
+ .offset = TEST_FILE_SIZE - 10,
+ .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+ .size = 10,
+ },
+ /* Read final cache page. */
+ {
+ .offset = TEST_FILE_SIZE - 10,
+ .data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
+ .size = 10,
+ },
+ /* Read final cache page. */
+ {
+ .offset = TEST_FILE_SIZE - 3,
+ .data = { 7, 8, 9, 0, 0, 0, 0, 0, 0, 0 },
+ .size = 3,
+ },
+};
+
+/* move it from util/dso.c for compatibility */
+static int dso__data_fd(struct dso *dso, struct machine *machine)
+{
+ int fd = dso__data_get_fd(dso, machine);
+
+ if (fd >= 0)
+ dso__data_put_fd(dso);
+
+ return fd;
+}
+
+int test__dso_data(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct machine machine;
+ struct dso *dso;
+ char *file = test_file(TEST_FILE_SIZE);
+ size_t i;
+
+ TEST_ASSERT_VAL("No test file", file);
+
+ memset(&machine, 0, sizeof(machine));
+
+ dso = dso__new((const char *)file);
+
+ TEST_ASSERT_VAL("Failed to access to dso",
+ dso__data_fd(dso, &machine) >= 0);
+
+ /* Basic 10 bytes tests. */
+ for (i = 0; i < ARRAY_SIZE(offsets); i++) {
+ struct test_data_offset *data = &offsets[i];
+ ssize_t size;
+ u8 buf[10];
+
+ memset(buf, 0, 10);
+ size = dso__data_read_offset(dso, &machine, data->offset,
+ buf, 10);
+
+ TEST_ASSERT_VAL("Wrong size", size == data->size);
+ TEST_ASSERT_VAL("Wrong data", !memcmp(buf, data->data, 10));
+ }
+
+ /* Read cross multiple cache pages. */
+ {
+ ssize_t size;
+ int c;
+ u8 *buf;
+
+ buf = malloc(TEST_FILE_SIZE);
+ TEST_ASSERT_VAL("ENOMEM\n", buf);
+
+ /* First iteration to fill caches, second one to read them. */
+ for (c = 0; c < 2; c++) {
+ memset(buf, 0, TEST_FILE_SIZE);
+ size = dso__data_read_offset(dso, &machine, 10,
+ buf, TEST_FILE_SIZE);
+
+ TEST_ASSERT_VAL("Wrong size",
+ size == (TEST_FILE_SIZE - 10));
+
+ for (i = 0; i < (size_t)size; i++)
+ TEST_ASSERT_VAL("Wrong data",
+ buf[i] == (i % 10));
+ }
+
+ free(buf);
+ }
+
+ dso__put(dso);
+ unlink(file);
+ return 0;
+}
+
+static long open_files_cnt(void)
+{
+ char path[PATH_MAX];
+ struct dirent *dent;
+ DIR *dir;
+ long nr = 0;
+
+ scnprintf(path, PATH_MAX, "%s/self/fd", procfs__mountpoint());
+ pr_debug("fd path: %s\n", path);
+
+ dir = opendir(path);
+ TEST_ASSERT_VAL("failed to open fd directory", dir);
+
+ while ((dent = readdir(dir)) != NULL) {
+ if (!strcmp(dent->d_name, ".") ||
+ !strcmp(dent->d_name, ".."))
+ continue;
+
+ nr++;
+ }
+
+ closedir(dir);
+ return nr - 1;
+}
+
+static struct dso **dsos;
+
+static int dsos__create(int cnt, int size)
+{
+ int i;
+
+ dsos = malloc(sizeof(*dsos) * cnt);
+ TEST_ASSERT_VAL("failed to alloc dsos array", dsos);
+
+ for (i = 0; i < cnt; i++) {
+ char *file;
+
+ file = test_file(size);
+ TEST_ASSERT_VAL("failed to get dso file", file);
+
+ dsos[i] = dso__new(file);
+ TEST_ASSERT_VAL("failed to get dso", dsos[i]);
+ }
+
+ return 0;
+}
+
+static void dsos__delete(int cnt)
+{
+ int i;
+
+ for (i = 0; i < cnt; i++) {
+ struct dso *dso = dsos[i];
+
+ unlink(dso->name);
+ dso__put(dso);
+ }
+
+ free(dsos);
+}
+
+static int set_fd_limit(int n)
+{
+ struct rlimit rlim;
+
+ if (getrlimit(RLIMIT_NOFILE, &rlim))
+ return -1;
+
+ pr_debug("file limit %ld, new %d\n", (long) rlim.rlim_cur, n);
+
+ rlim.rlim_cur = n;
+ return setrlimit(RLIMIT_NOFILE, &rlim);
+}
+
+int test__dso_data_cache(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct machine machine;
+ long nr_end, nr = open_files_cnt();
+ int dso_cnt, limit, i, fd;
+
+ /* Rest the internal dso open counter limit. */
+ reset_fd_limit();
+
+ memset(&machine, 0, sizeof(machine));
+
+ /* set as system limit */
+ limit = nr * 4;
+ TEST_ASSERT_VAL("failed to set file limit", !set_fd_limit(limit));
+
+ /* and this is now our dso open FDs limit */
+ dso_cnt = limit / 2;
+ TEST_ASSERT_VAL("failed to create dsos\n",
+ !dsos__create(dso_cnt, TEST_FILE_SIZE));
+
+ for (i = 0; i < (dso_cnt - 1); i++) {
+ struct dso *dso = dsos[i];
+
+ /*
+ * Open dsos via dso__data_fd(), it opens the data
+ * file and keep it open (unless open file limit).
+ */
+ fd = dso__data_fd(dso, &machine);
+ TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+ if (i % 2) {
+ #define BUFSIZE 10
+ u8 buf[BUFSIZE];
+ ssize_t n;
+
+ n = dso__data_read_offset(dso, &machine, 0, buf, BUFSIZE);
+ TEST_ASSERT_VAL("failed to read dso", n == BUFSIZE);
+ }
+ }
+
+ /* verify the first one is already open */
+ TEST_ASSERT_VAL("dsos[0] is not open", dsos[0]->data.fd != -1);
+
+ /* open +1 dso to reach the allowed limit */
+ fd = dso__data_fd(dsos[i], &machine);
+ TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+ /* should force the first one to be closed */
+ TEST_ASSERT_VAL("failed to close dsos[0]", dsos[0]->data.fd == -1);
+
+ /* cleanup everything */
+ dsos__delete(dso_cnt);
+
+ /* Make sure we did not leak any file descriptor. */
+ nr_end = open_files_cnt();
+ pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end);
+ TEST_ASSERT_VAL("failed leadking files", nr == nr_end);
+ return 0;
+}
+
+int test__dso_data_reopen(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct machine machine;
+ long nr_end, nr = open_files_cnt();
+ int fd, fd_extra;
+
+#define dso_0 (dsos[0])
+#define dso_1 (dsos[1])
+#define dso_2 (dsos[2])
+
+ /* Rest the internal dso open counter limit. */
+ reset_fd_limit();
+
+ memset(&machine, 0, sizeof(machine));
+
+ /*
+ * Test scenario:
+ * - create 3 dso objects
+ * - set process file descriptor limit to current
+ * files count + 3
+ * - test that the first dso gets closed when we
+ * reach the files count limit
+ */
+
+ /* Make sure we are able to open 3 fds anyway */
+ TEST_ASSERT_VAL("failed to set file limit",
+ !set_fd_limit((nr + 3)));
+
+ TEST_ASSERT_VAL("failed to create dsos\n", !dsos__create(3, TEST_FILE_SIZE));
+
+ /* open dso_0 */
+ fd = dso__data_fd(dso_0, &machine);
+ TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+ /* open dso_1 */
+ fd = dso__data_fd(dso_1, &machine);
+ TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+ /*
+ * open extra file descriptor and we just
+ * reached the files count limit
+ */
+ fd_extra = open("/dev/null", O_RDONLY);
+ TEST_ASSERT_VAL("failed to open extra fd", fd_extra > 0);
+
+ /* open dso_2 */
+ fd = dso__data_fd(dso_2, &machine);
+ TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+ /*
+ * dso_0 should get closed, because we reached
+ * the file descriptor limit
+ */
+ TEST_ASSERT_VAL("failed to close dso_0", dso_0->data.fd == -1);
+
+ /* open dso_0 */
+ fd = dso__data_fd(dso_0, &machine);
+ TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+ /*
+ * dso_1 should get closed, because we reached
+ * the file descriptor limit
+ */
+ TEST_ASSERT_VAL("failed to close dso_1", dso_1->data.fd == -1);
+
+ /* cleanup everything */
+ close(fd_extra);
+ dsos__delete(3);
+
+ /* Make sure we did not leak any file descriptor. */
+ nr_end = open_files_cnt();
+ pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end);
+ TEST_ASSERT_VAL("failed leadking files", nr == nr_end);
+ return 0;
+}
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
new file mode 100644
index 000000000..2f008067d
--- /dev/null
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include "tests.h"
+#include "debug.h"
+#include "machine.h"
+#include "event.h"
+#include "../util/unwind.h"
+#include "perf_regs.h"
+#include "map.h"
+#include "thread.h"
+#include "callchain.h"
+
+#if defined (__x86_64__) || defined (__i386__) || defined (__powerpc__)
+#include "arch-tests.h"
+#endif
+
+/* For bsearch. We try to unwind functions in shared object. */
+#include <stdlib.h>
+
+static int mmap_handler(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_mmap2_event(machine, event, sample);
+}
+
+static int init_live_machine(struct machine *machine)
+{
+ union perf_event event;
+ pid_t pid = getpid();
+
+ return perf_event__synthesize_mmap_events(NULL, &event, pid, pid,
+ mmap_handler, machine, true, 500);
+}
+
+/*
+ * We need to keep these functions global, despite the
+ * fact that they are used only locally in this object,
+ * in order to keep them around even if the binary is
+ * stripped. If they are gone, the unwind check for
+ * symbol fails.
+ */
+int test_dwarf_unwind__thread(struct thread *thread);
+int test_dwarf_unwind__compare(void *p1, void *p2);
+int test_dwarf_unwind__krava_3(struct thread *thread);
+int test_dwarf_unwind__krava_2(struct thread *thread);
+int test_dwarf_unwind__krava_1(struct thread *thread);
+
+#define MAX_STACK 8
+
+static int unwind_entry(struct unwind_entry *entry, void *arg)
+{
+ unsigned long *cnt = (unsigned long *) arg;
+ char *symbol = entry->sym ? entry->sym->name : NULL;
+ static const char *funcs[MAX_STACK] = {
+ "test__arch_unwind_sample",
+ "test_dwarf_unwind__thread",
+ "test_dwarf_unwind__compare",
+ "bsearch",
+ "test_dwarf_unwind__krava_3",
+ "test_dwarf_unwind__krava_2",
+ "test_dwarf_unwind__krava_1",
+ "test__dwarf_unwind"
+ };
+ /*
+ * The funcs[MAX_STACK] array index, based on the
+ * callchain order setup.
+ */
+ int idx = callchain_param.order == ORDER_CALLER ?
+ MAX_STACK - *cnt - 1 : *cnt;
+
+ if (*cnt >= MAX_STACK) {
+ pr_debug("failed: crossed the max stack value %d\n", MAX_STACK);
+ return -1;
+ }
+
+ if (!symbol) {
+ pr_debug("failed: got unresolved address 0x%" PRIx64 "\n",
+ entry->ip);
+ return -1;
+ }
+
+ (*cnt)++;
+ pr_debug("got: %s 0x%" PRIx64 ", expecting %s\n",
+ symbol, entry->ip, funcs[idx]);
+ return strcmp((const char *) symbol, funcs[idx]);
+}
+
+noinline int test_dwarf_unwind__thread(struct thread *thread)
+{
+ struct perf_sample sample;
+ unsigned long cnt = 0;
+ int err = -1;
+
+ memset(&sample, 0, sizeof(sample));
+
+ if (test__arch_unwind_sample(&sample, thread)) {
+ pr_debug("failed to get unwind sample\n");
+ goto out;
+ }
+
+ err = unwind__get_entries(unwind_entry, &cnt, thread,
+ &sample, MAX_STACK);
+ if (err)
+ pr_debug("unwind failed\n");
+ else if (cnt != MAX_STACK) {
+ pr_debug("got wrong number of stack entries %lu != %d\n",
+ cnt, MAX_STACK);
+ err = -1;
+ }
+
+ out:
+ free(sample.user_stack.data);
+ free(sample.user_regs.regs);
+ return err;
+}
+
+static int global_unwind_retval = -INT_MAX;
+
+noinline int test_dwarf_unwind__compare(void *p1, void *p2)
+{
+ /* Any possible value should be 'thread' */
+ struct thread *thread = *(struct thread **)p1;
+
+ if (global_unwind_retval == -INT_MAX) {
+ /* Call unwinder twice for both callchain orders. */
+ callchain_param.order = ORDER_CALLER;
+
+ global_unwind_retval = test_dwarf_unwind__thread(thread);
+ if (!global_unwind_retval) {
+ callchain_param.order = ORDER_CALLEE;
+ global_unwind_retval = test_dwarf_unwind__thread(thread);
+ }
+ }
+
+ return p1 - p2;
+}
+
+noinline int test_dwarf_unwind__krava_3(struct thread *thread)
+{
+ struct thread *array[2] = {thread, thread};
+ void *fp = &bsearch;
+ /*
+ * make _bsearch a volatile function pointer to
+ * prevent potential optimization, which may expand
+ * bsearch and call compare directly from this function,
+ * instead of libc shared object.
+ */
+ void *(*volatile _bsearch)(void *, void *, size_t,
+ size_t, int (*)(void *, void *));
+
+ _bsearch = fp;
+ _bsearch(array, &thread, 2, sizeof(struct thread **),
+ test_dwarf_unwind__compare);
+ return global_unwind_retval;
+}
+
+noinline int test_dwarf_unwind__krava_2(struct thread *thread)
+{
+ return test_dwarf_unwind__krava_3(thread);
+}
+
+noinline int test_dwarf_unwind__krava_1(struct thread *thread)
+{
+ return test_dwarf_unwind__krava_2(thread);
+}
+
+int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct machine *machine;
+ struct thread *thread;
+ int err = -1;
+
+ machine = machine__new_host();
+ if (!machine) {
+ pr_err("Could not get machine\n");
+ return -1;
+ }
+
+ if (machine__create_kernel_maps(machine)) {
+ pr_err("Failed to create kernel maps\n");
+ return -1;
+ }
+
+ callchain_param.record_mode = CALLCHAIN_DWARF;
+ dwarf_callchain_users = true;
+
+ if (init_live_machine(machine)) {
+ pr_err("Could not init machine\n");
+ goto out;
+ }
+
+ if (verbose > 1)
+ machine__fprintf(machine, stderr);
+
+ thread = machine__find_thread(machine, getpid(), getpid());
+ if (!thread) {
+ pr_err("Could not get thread\n");
+ goto out;
+ }
+
+ err = test_dwarf_unwind__krava_1(thread);
+ thread__put(thread);
+
+ out:
+ machine__delete_threads(machine);
+ machine__delete(machine);
+ return err;
+}
diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c
new file mode 100644
index 000000000..1a2686f1f
--- /dev/null
+++ b/tools/perf/tests/event-times.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/wait.h>
+#include "tests.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "util.h"
+#include "debug.h"
+#include "thread_map.h"
+#include "target.h"
+
+static int attach__enable_on_exec(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__last(evlist);
+ struct target target = {
+ .uid = UINT_MAX,
+ };
+ const char *argv[] = { "true", NULL, };
+ char sbuf[STRERR_BUFSIZE];
+ int err;
+
+ pr_debug("attaching to spawned child, enable on exec\n");
+
+ err = perf_evlist__create_maps(evlist, &target);
+ if (err < 0) {
+ pr_debug("Not enough memory to create thread/cpu maps\n");
+ return err;
+ }
+
+ err = perf_evlist__prepare_workload(evlist, &target, argv, false, NULL);
+ if (err < 0) {
+ pr_debug("Couldn't run the workload!\n");
+ return err;
+ }
+
+ evsel->attr.enable_on_exec = 1;
+
+ err = perf_evlist__open(evlist);
+ if (err < 0) {
+ pr_debug("perf_evlist__open: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ return err;
+ }
+
+ return perf_evlist__start_workload(evlist) == 1 ? TEST_OK : TEST_FAIL;
+}
+
+static int detach__enable_on_exec(struct perf_evlist *evlist)
+{
+ waitpid(evlist->workload.pid, NULL, 0);
+ return 0;
+}
+
+static int attach__current_disabled(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__last(evlist);
+ struct thread_map *threads;
+ int err;
+
+ pr_debug("attaching to current thread as disabled\n");
+
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
+ if (threads == NULL) {
+ pr_debug("thread_map__new\n");
+ return -1;
+ }
+
+ evsel->attr.disabled = 1;
+
+ err = perf_evsel__open_per_thread(evsel, threads);
+ if (err) {
+ pr_debug("Failed to open event cpu-clock:u\n");
+ return err;
+ }
+
+ thread_map__put(threads);
+ return perf_evsel__enable(evsel) == 0 ? TEST_OK : TEST_FAIL;
+}
+
+static int attach__current_enabled(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__last(evlist);
+ struct thread_map *threads;
+ int err;
+
+ pr_debug("attaching to current thread as enabled\n");
+
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
+ if (threads == NULL) {
+ pr_debug("failed to call thread_map__new\n");
+ return -1;
+ }
+
+ err = perf_evsel__open_per_thread(evsel, threads);
+
+ thread_map__put(threads);
+ return err == 0 ? TEST_OK : TEST_FAIL;
+}
+
+static int detach__disable(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__last(evlist);
+
+ return perf_evsel__enable(evsel);
+}
+
+static int attach__cpu_disabled(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__last(evlist);
+ struct cpu_map *cpus;
+ int err;
+
+ pr_debug("attaching to CPU 0 as enabled\n");
+
+ cpus = cpu_map__new("0");
+ if (cpus == NULL) {
+ pr_debug("failed to call cpu_map__new\n");
+ return -1;
+ }
+
+ evsel->attr.disabled = 1;
+
+ err = perf_evsel__open_per_cpu(evsel, cpus);
+ if (err) {
+ if (err == -EACCES)
+ return TEST_SKIP;
+
+ pr_debug("Failed to open event cpu-clock:u\n");
+ return err;
+ }
+
+ cpu_map__put(cpus);
+ return perf_evsel__enable(evsel);
+}
+
+static int attach__cpu_enabled(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__last(evlist);
+ struct cpu_map *cpus;
+ int err;
+
+ pr_debug("attaching to CPU 0 as enabled\n");
+
+ cpus = cpu_map__new("0");
+ if (cpus == NULL) {
+ pr_debug("failed to call cpu_map__new\n");
+ return -1;
+ }
+
+ err = perf_evsel__open_per_cpu(evsel, cpus);
+ if (err == -EACCES)
+ return TEST_SKIP;
+
+ cpu_map__put(cpus);
+ return err ? TEST_FAIL : TEST_OK;
+}
+
+static int test_times(int (attach)(struct perf_evlist *),
+ int (detach)(struct perf_evlist *))
+{
+ struct perf_counts_values count;
+ struct perf_evlist *evlist = NULL;
+ struct perf_evsel *evsel;
+ int err = -1, i;
+
+ evlist = perf_evlist__new();
+ if (!evlist) {
+ pr_debug("failed to create event list\n");
+ goto out_err;
+ }
+
+ err = parse_events(evlist, "cpu-clock:u", NULL);
+ if (err) {
+ pr_debug("failed to parse event cpu-clock:u\n");
+ goto out_err;
+ }
+
+ evsel = perf_evlist__last(evlist);
+ evsel->attr.read_format |=
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+
+ err = attach(evlist);
+ if (err == TEST_SKIP) {
+ pr_debug(" SKIP : not enough rights\n");
+ return err;
+ }
+
+ TEST_ASSERT_VAL("failed to attach", !err);
+
+ for (i = 0; i < 100000000; i++) { }
+
+ TEST_ASSERT_VAL("failed to detach", !detach(evlist));
+
+ perf_evsel__read(evsel, 0, 0, &count);
+
+ err = !(count.ena == count.run);
+
+ pr_debug(" %s: ena %" PRIu64", run %" PRIu64"\n",
+ !err ? "OK " : "FAILED",
+ count.ena, count.run);
+
+out_err:
+ perf_evlist__delete(evlist);
+ return !err ? TEST_OK : TEST_FAIL;
+}
+
+/*
+ * This test creates software event 'cpu-clock'
+ * attaches it in several ways (explained below)
+ * and checks that enabled and running times
+ * match.
+ */
+int test__event_times(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err, ret = 0;
+
+#define _T(attach, detach) \
+ err = test_times(attach, detach); \
+ if (err && (ret == TEST_OK || ret == TEST_SKIP)) \
+ ret = err;
+
+ /* attach on newly spawned process after exec */
+ _T(attach__enable_on_exec, detach__enable_on_exec)
+ /* attach on current process as enabled */
+ _T(attach__current_enabled, detach__disable)
+ /* attach on current process as disabled */
+ _T(attach__current_disabled, detach__disable)
+ /* attach on cpu as disabled */
+ _T(attach__cpu_disabled, detach__disable)
+ /* attach on cpu as enabled */
+ _T(attach__cpu_enabled, detach__disable)
+
+#undef _T
+ return ret;
+}
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
new file mode 100644
index 000000000..f14dcd613
--- /dev/null
+++ b/tools/perf/tests/event_update.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "machine.h"
+#include "tests.h"
+#include "debug.h"
+
+static int process_event_unit(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct event_update_event *ev = (struct event_update_event *) event;
+
+ TEST_ASSERT_VAL("wrong id", ev->id == 123);
+ TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__UNIT);
+ TEST_ASSERT_VAL("wrong unit", !strcmp(ev->data, "KRAVA"));
+ return 0;
+}
+
+static int process_event_scale(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct event_update_event *ev = (struct event_update_event *) event;
+ struct event_update_event_scale *ev_data;
+
+ ev_data = (struct event_update_event_scale *) ev->data;
+
+ TEST_ASSERT_VAL("wrong id", ev->id == 123);
+ TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__SCALE);
+ TEST_ASSERT_VAL("wrong scale", ev_data->scale == 0.123);
+ return 0;
+}
+
+struct event_name {
+ struct perf_tool tool;
+ const char *name;
+};
+
+static int process_event_name(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct event_name *tmp = container_of(tool, struct event_name, tool);
+ struct event_update_event *ev = (struct event_update_event*) event;
+
+ TEST_ASSERT_VAL("wrong id", ev->id == 123);
+ TEST_ASSERT_VAL("wrong id", ev->type == PERF_EVENT_UPDATE__NAME);
+ TEST_ASSERT_VAL("wrong name", !strcmp(ev->data, tmp->name));
+ return 0;
+}
+
+static int process_event_cpus(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct event_update_event *ev = (struct event_update_event*) event;
+ struct event_update_event_cpus *ev_data;
+ struct cpu_map *map;
+
+ ev_data = (struct event_update_event_cpus*) ev->data;
+
+ map = cpu_map__new_data(&ev_data->cpus);
+
+ TEST_ASSERT_VAL("wrong id", ev->id == 123);
+ TEST_ASSERT_VAL("wrong type", ev->type == PERF_EVENT_UPDATE__CPUS);
+ TEST_ASSERT_VAL("wrong cpus", map->nr == 3);
+ TEST_ASSERT_VAL("wrong cpus", map->map[0] == 1);
+ TEST_ASSERT_VAL("wrong cpus", map->map[1] == 2);
+ TEST_ASSERT_VAL("wrong cpus", map->map[2] == 3);
+ cpu_map__put(map);
+ return 0;
+}
+
+int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct event_name tmp;
+
+ evlist = perf_evlist__new_default();
+ TEST_ASSERT_VAL("failed to get evlist", evlist);
+
+ evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("failed to allos ids",
+ !perf_evsel__alloc_id(evsel, 1, 1));
+
+ perf_evlist__id_add(evlist, evsel, 0, 0, 123);
+
+ evsel->unit = strdup("KRAVA");
+
+ TEST_ASSERT_VAL("failed to synthesize attr update unit",
+ !perf_event__synthesize_event_update_unit(NULL, evsel, process_event_unit));
+
+ evsel->scale = 0.123;
+
+ TEST_ASSERT_VAL("failed to synthesize attr update scale",
+ !perf_event__synthesize_event_update_scale(NULL, evsel, process_event_scale));
+
+ tmp.name = perf_evsel__name(evsel);
+
+ TEST_ASSERT_VAL("failed to synthesize attr update name",
+ !perf_event__synthesize_event_update_name(&tmp.tool, evsel, process_event_name));
+
+ evsel->own_cpus = cpu_map__new("1,2,3");
+
+ TEST_ASSERT_VAL("failed to synthesize attr update cpus",
+ !perf_event__synthesize_event_update_cpus(&tmp.tool, evsel, process_event_cpus));
+
+ cpu_map__put(evsel->own_cpus);
+ return 0;
+}
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
new file mode 100644
index 000000000..a104728eb
--- /dev/null
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "evlist.h"
+#include "evsel.h"
+#include "parse-events.h"
+#include "tests.h"
+#include "debug.h"
+#include <errno.h>
+#include <linux/kernel.h>
+
+static int perf_evsel__roundtrip_cache_name_test(void)
+{
+ char name[128];
+ int type, op, err = 0, ret = 0, i, idx;
+ struct perf_evsel *evsel;
+ struct perf_evlist *evlist = perf_evlist__new();
+
+ if (evlist == NULL)
+ return -ENOMEM;
+
+ for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+ for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+ /* skip invalid cache type */
+ if (!perf_evsel__is_cache_op_valid(type, op))
+ continue;
+
+ for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+ __perf_evsel__hw_cache_type_op_res_name(type, op, i,
+ name, sizeof(name));
+ err = parse_events(evlist, name, NULL);
+ if (err)
+ ret = err;
+ }
+ }
+ }
+
+ idx = 0;
+ evsel = perf_evlist__first(evlist);
+
+ for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+ for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+ /* skip invalid cache type */
+ if (!perf_evsel__is_cache_op_valid(type, op))
+ continue;
+
+ for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+ __perf_evsel__hw_cache_type_op_res_name(type, op, i,
+ name, sizeof(name));
+ if (evsel->idx != idx)
+ continue;
+
+ ++idx;
+
+ if (strcmp(perf_evsel__name(evsel), name)) {
+ pr_debug("%s != %s\n", perf_evsel__name(evsel), name);
+ ret = -1;
+ }
+
+ evsel = perf_evsel__next(evsel);
+ }
+ }
+ }
+
+ perf_evlist__delete(evlist);
+ return ret;
+}
+
+static int __perf_evsel__name_array_test(const char *names[], int nr_names)
+{
+ int i, err;
+ struct perf_evsel *evsel;
+ struct perf_evlist *evlist = perf_evlist__new();
+
+ if (evlist == NULL)
+ return -ENOMEM;
+
+ for (i = 0; i < nr_names; ++i) {
+ err = parse_events(evlist, names[i], NULL);
+ if (err) {
+ pr_debug("failed to parse event '%s', err %d\n",
+ names[i], err);
+ goto out_delete_evlist;
+ }
+ }
+
+ err = 0;
+ evlist__for_each_entry(evlist, evsel) {
+ if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) {
+ --err;
+ pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]);
+ }
+ }
+
+out_delete_evlist:
+ perf_evlist__delete(evlist);
+ return err;
+}
+
+#define perf_evsel__name_array_test(names) \
+ __perf_evsel__name_array_test(names, ARRAY_SIZE(names))
+
+int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err = 0, ret = 0;
+
+ err = perf_evsel__name_array_test(perf_evsel__hw_names);
+ if (err)
+ ret = err;
+
+ err = __perf_evsel__name_array_test(perf_evsel__sw_names,
+ PERF_COUNT_SW_DUMMY + 1);
+ if (err)
+ ret = err;
+
+ err = perf_evsel__roundtrip_cache_name_test();
+ if (err)
+ ret = err;
+
+ return ret;
+}
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
new file mode 100644
index 000000000..926a8e1b5
--- /dev/null
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/err.h>
+#include <traceevent/event-parse.h>
+#include "evsel.h"
+#include "tests.h"
+#include "debug.h"
+
+static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
+ int size, bool should_be_signed)
+{
+ struct format_field *field = perf_evsel__field(evsel, name);
+ int is_signed;
+ int ret = 0;
+
+ if (field == NULL) {
+ pr_debug("%s: \"%s\" field not found!\n", evsel->name, name);
+ return -1;
+ }
+
+ is_signed = !!(field->flags & FIELD_IS_SIGNED);
+ if (should_be_signed && !is_signed) {
+ pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n",
+ evsel->name, name, is_signed, should_be_signed);
+ ret = -1;
+ }
+
+ if (field->size != size) {
+ pr_debug("%s: \"%s\" size (%d) should be %d!\n",
+ evsel->name, name, field->size, size);
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct perf_evsel *evsel = perf_evsel__newtp("sched", "sched_switch");
+ int ret = 0;
+
+ if (IS_ERR(evsel)) {
+ pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel));
+ return -1;
+ }
+
+ if (perf_evsel__test_field(evsel, "prev_comm", 16, false))
+ ret = -1;
+
+ if (perf_evsel__test_field(evsel, "prev_pid", 4, true))
+ ret = -1;
+
+ if (perf_evsel__test_field(evsel, "prev_prio", 4, true))
+ ret = -1;
+
+ if (perf_evsel__test_field(evsel, "prev_state", sizeof(long), true))
+ ret = -1;
+
+ if (perf_evsel__test_field(evsel, "next_comm", 16, false))
+ ret = -1;
+
+ if (perf_evsel__test_field(evsel, "next_pid", 4, true))
+ ret = -1;
+
+ if (perf_evsel__test_field(evsel, "next_prio", 4, true))
+ ret = -1;
+
+ perf_evsel__delete(evsel);
+
+ evsel = perf_evsel__newtp("sched", "sched_wakeup");
+
+ if (IS_ERR(evsel)) {
+ pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel));
+ return -1;
+ }
+
+ if (perf_evsel__test_field(evsel, "comm", 16, false))
+ ret = -1;
+
+ if (perf_evsel__test_field(evsel, "pid", 4, true))
+ ret = -1;
+
+ if (perf_evsel__test_field(evsel, "prio", 4, true))
+ ret = -1;
+
+ if (perf_evsel__test_field(evsel, "target_cpu", 4, true))
+ ret = -1;
+
+ perf_evsel__delete(evsel);
+ return ret;
+}
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
new file mode 100644
index 000000000..9acc1e80b
--- /dev/null
+++ b/tools/perf/tests/expr.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/debug.h"
+#include "util/expr.h"
+#include "tests.h"
+#include <stdlib.h>
+
+static int test(struct parse_ctx *ctx, const char *e, double val2)
+{
+ double val;
+
+ if (expr__parse(&val, ctx, &e))
+ TEST_ASSERT_VAL("parse test failed", 0);
+ TEST_ASSERT_VAL("unexpected value", val == val2);
+ return 0;
+}
+
+int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
+{
+ const char *p;
+ const char **other;
+ double val;
+ int i, ret;
+ struct parse_ctx ctx;
+ int num_other;
+
+ expr__ctx_init(&ctx);
+ expr__add_id(&ctx, "FOO", 1);
+ expr__add_id(&ctx, "BAR", 2);
+
+ ret = test(&ctx, "1+1", 2);
+ ret |= test(&ctx, "FOO+BAR", 3);
+ ret |= test(&ctx, "(BAR/2)%2", 1);
+ ret |= test(&ctx, "1 - -4", 5);
+ ret |= test(&ctx, "(FOO-1)*2 + (BAR/2)%2 - -4", 5);
+ ret |= test(&ctx, "1-1 | 1", 1);
+ ret |= test(&ctx, "1-1 & 1", 0);
+ ret |= test(&ctx, "min(1,2) + 1", 2);
+ ret |= test(&ctx, "max(1,2) + 1", 3);
+ ret |= test(&ctx, "1+1 if 3*4 else 0", 2);
+
+ if (ret)
+ return ret;
+
+ p = "FOO/0";
+ ret = expr__parse(&val, &ctx, &p);
+ TEST_ASSERT_VAL("division by zero", ret == 1);
+
+ p = "BAR/";
+ ret = expr__parse(&val, &ctx, &p);
+ TEST_ASSERT_VAL("missing operand", ret == 1);
+
+ TEST_ASSERT_VAL("find other",
+ expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0);
+ TEST_ASSERT_VAL("find other", num_other == 3);
+ TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR"));
+ TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ"));
+ TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO"));
+ TEST_ASSERT_VAL("find other", other[3] == NULL);
+
+ for (i = 0; i < num_other; i++)
+ free((void *)other[i]);
+ free((void *)other);
+
+ return 0;
+}
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
new file mode 100644
index 000000000..c7c81c4a5
--- /dev/null
+++ b/tools/perf/tests/fdarray.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <api/fd/array.h>
+#include <poll.h>
+#include "util/debug.h"
+#include "tests/tests.h"
+
+static void fdarray__init_revents(struct fdarray *fda, short revents)
+{
+ int fd;
+
+ fda->nr = fda->nr_alloc;
+
+ for (fd = 0; fd < fda->nr; ++fd) {
+ fda->entries[fd].fd = fda->nr - fd;
+ fda->entries[fd].revents = revents;
+ }
+}
+
+static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE *fp)
+{
+ int printed = 0;
+
+ if (verbose <= 0)
+ return 0;
+
+ printed += fprintf(fp, "\n%s: ", prefix);
+ return printed + fdarray__fprintf(fda, fp);
+}
+
+int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
+ struct fdarray *fda = fdarray__new(5, 5);
+
+ if (fda == NULL) {
+ pr_debug("\nfdarray__new() failed!");
+ goto out;
+ }
+
+ fdarray__init_revents(fda, POLLIN);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
+ if (nr_fds != fda->nr_alloc) {
+ pr_debug("\nfdarray__filter()=%d != %d shouldn't have filtered anything",
+ nr_fds, fda->nr_alloc);
+ goto out_delete;
+ }
+
+ fdarray__init_revents(fda, POLLHUP);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
+ if (nr_fds != 0) {
+ pr_debug("\nfdarray__filter()=%d != %d, should have filtered all fds",
+ nr_fds, fda->nr_alloc);
+ goto out_delete;
+ }
+
+ fdarray__init_revents(fda, POLLHUP);
+ fda->entries[2].revents = POLLIN;
+ expected_fd[0] = fda->entries[2].fd;
+
+ pr_debug("\nfiltering all but fda->entries[2]:");
+ fdarray__fprintf_prefix(fda, "before", stderr);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
+ fdarray__fprintf_prefix(fda, " after", stderr);
+ if (nr_fds != 1) {
+ pr_debug("\nfdarray__filter()=%d != 1, should have left just one event", nr_fds);
+ goto out_delete;
+ }
+
+ if (fda->entries[0].fd != expected_fd[0]) {
+ pr_debug("\nfda->entries[0].fd=%d != %d\n",
+ fda->entries[0].fd, expected_fd[0]);
+ goto out_delete;
+ }
+
+ fdarray__init_revents(fda, POLLHUP);
+ fda->entries[0].revents = POLLIN;
+ expected_fd[0] = fda->entries[0].fd;
+ fda->entries[3].revents = POLLIN;
+ expected_fd[1] = fda->entries[3].fd;
+
+ pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
+ fdarray__fprintf_prefix(fda, "before", stderr);
+ nr_fds = fdarray__filter(fda, POLLHUP, NULL, NULL);
+ fdarray__fprintf_prefix(fda, " after", stderr);
+ if (nr_fds != 2) {
+ pr_debug("\nfdarray__filter()=%d != 2, should have left just two events",
+ nr_fds);
+ goto out_delete;
+ }
+
+ for (fd = 0; fd < 2; ++fd) {
+ if (fda->entries[fd].fd != expected_fd[fd]) {
+ pr_debug("\nfda->entries[%d].fd=%d != %d\n", fd,
+ fda->entries[fd].fd, expected_fd[fd]);
+ goto out_delete;
+ }
+ }
+
+ pr_debug("\n");
+
+ err = 0;
+out_delete:
+ fdarray__delete(fda);
+out:
+ return err;
+}
+
+int test__fdarray__add(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err = TEST_FAIL;
+ struct fdarray *fda = fdarray__new(2, 2);
+
+ if (fda == NULL) {
+ pr_debug("\nfdarray__new() failed!");
+ goto out;
+ }
+
+#define FDA_CHECK(_idx, _fd, _revents) \
+ if (fda->entries[_idx].fd != _fd) { \
+ pr_debug("\n%d: fda->entries[%d](%d) != %d!", \
+ __LINE__, _idx, fda->entries[1].fd, _fd); \
+ goto out_delete; \
+ } \
+ if (fda->entries[_idx].events != (_revents)) { \
+ pr_debug("\n%d: fda->entries[%d].revents(%d) != %d!", \
+ __LINE__, _idx, fda->entries[_idx].fd, _revents); \
+ goto out_delete; \
+ }
+
+#define FDA_ADD(_idx, _fd, _revents, _nr) \
+ if (fdarray__add(fda, _fd, _revents) < 0) { \
+ pr_debug("\n%d: fdarray__add(fda, %d, %d) failed!", \
+ __LINE__,_fd, _revents); \
+ goto out_delete; \
+ } \
+ if (fda->nr != _nr) { \
+ pr_debug("\n%d: fdarray__add(fda, %d, %d)=%d != %d", \
+ __LINE__,_fd, _revents, fda->nr, _nr); \
+ goto out_delete; \
+ } \
+ FDA_CHECK(_idx, _fd, _revents)
+
+ FDA_ADD(0, 1, POLLIN, 1);
+ FDA_ADD(1, 2, POLLERR, 2);
+
+ fdarray__fprintf_prefix(fda, "before growing array", stderr);
+
+ FDA_ADD(2, 35, POLLHUP, 3);
+
+ if (fda->entries == NULL) {
+ pr_debug("\nfdarray__add(fda, 35, POLLHUP) should have allocated fda->pollfd!");
+ goto out_delete;
+ }
+
+ fdarray__fprintf_prefix(fda, "after 3rd add", stderr);
+
+ FDA_ADD(3, 88, POLLIN | POLLOUT, 4);
+
+ fdarray__fprintf_prefix(fda, "after 4th add", stderr);
+
+ FDA_CHECK(0, 1, POLLIN);
+ FDA_CHECK(1, 2, POLLERR);
+ FDA_CHECK(2, 35, POLLHUP);
+ FDA_CHECK(3, 88, POLLIN | POLLOUT);
+
+#undef FDA_ADD
+#undef FDA_CHECK
+
+ pr_debug("\n");
+
+ err = 0;
+out_delete:
+ fdarray__delete(fda);
+out:
+ return err;
+}
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
new file mode 100644
index 000000000..b889a28fd
--- /dev/null
+++ b/tools/perf/tests/hists_common.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include "perf.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "tests/hists_common.h"
+#include <linux/kernel.h>
+
+static struct {
+ u32 pid;
+ const char *comm;
+} fake_threads[] = {
+ { FAKE_PID_PERF1, "perf" },
+ { FAKE_PID_PERF2, "perf" },
+ { FAKE_PID_BASH, "bash" },
+};
+
+static struct {
+ u32 pid;
+ u64 start;
+ const char *filename;
+} fake_mmap_info[] = {
+ { FAKE_PID_PERF1, FAKE_MAP_PERF, "perf" },
+ { FAKE_PID_PERF1, FAKE_MAP_LIBC, "libc" },
+ { FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" },
+ { FAKE_PID_PERF2, FAKE_MAP_PERF, "perf" },
+ { FAKE_PID_PERF2, FAKE_MAP_LIBC, "libc" },
+ { FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" },
+ { FAKE_PID_BASH, FAKE_MAP_BASH, "bash" },
+ { FAKE_PID_BASH, FAKE_MAP_LIBC, "libc" },
+ { FAKE_PID_BASH, FAKE_MAP_KERNEL, "[kernel]" },
+};
+
+struct fake_sym {
+ u64 start;
+ u64 length;
+ const char *name;
+};
+
+static struct fake_sym perf_syms[] = {
+ { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
+ { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" },
+ { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" },
+};
+
+static struct fake_sym bash_syms[] = {
+ { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
+ { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" },
+ { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" },
+};
+
+static struct fake_sym libc_syms[] = {
+ { 700, 100, "malloc" },
+ { 800, 100, "free" },
+ { 900, 100, "realloc" },
+ { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" },
+ { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" },
+ { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" },
+};
+
+static struct fake_sym kernel_syms[] = {
+ { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" },
+ { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" },
+ { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" },
+};
+
+static struct {
+ const char *dso_name;
+ struct fake_sym *syms;
+ size_t nr_syms;
+} fake_symbols[] = {
+ { "perf", perf_syms, ARRAY_SIZE(perf_syms) },
+ { "bash", bash_syms, ARRAY_SIZE(bash_syms) },
+ { "libc", libc_syms, ARRAY_SIZE(libc_syms) },
+ { "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) },
+};
+
+struct machine *setup_fake_machine(struct machines *machines)
+{
+ struct machine *machine = machines__find(machines, HOST_KERNEL_ID);
+ size_t i;
+
+ if (machine == NULL) {
+ pr_debug("Not enough memory for machine setup\n");
+ return NULL;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
+ struct thread *thread;
+
+ thread = machine__findnew_thread(machine, fake_threads[i].pid,
+ fake_threads[i].pid);
+ if (thread == NULL)
+ goto out;
+
+ thread__set_comm(thread, fake_threads[i].comm, 0);
+ thread__put(thread);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
+ struct perf_sample sample = {
+ .cpumode = PERF_RECORD_MISC_USER,
+ };
+ union perf_event fake_mmap_event = {
+ .mmap = {
+ .pid = fake_mmap_info[i].pid,
+ .tid = fake_mmap_info[i].pid,
+ .start = fake_mmap_info[i].start,
+ .len = FAKE_MAP_LENGTH,
+ .pgoff = 0ULL,
+ },
+ };
+
+ strcpy(fake_mmap_event.mmap.filename,
+ fake_mmap_info[i].filename);
+
+ machine__process_mmap_event(machine, &fake_mmap_event, &sample);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
+ size_t k;
+ struct dso *dso;
+
+ dso = machine__findnew_dso(machine, fake_symbols[i].dso_name);
+ if (dso == NULL)
+ goto out;
+
+ /* emulate dso__load() */
+ dso__set_loaded(dso);
+
+ for (k = 0; k < fake_symbols[i].nr_syms; k++) {
+ struct symbol *sym;
+ struct fake_sym *fsym = &fake_symbols[i].syms[k];
+
+ sym = symbol__new(fsym->start, fsym->length,
+ STB_GLOBAL, STT_FUNC, fsym->name);
+ if (sym == NULL) {
+ dso__put(dso);
+ goto out;
+ }
+
+ symbols__insert(&dso->symbols, sym);
+ }
+
+ dso__put(dso);
+ }
+
+ return machine;
+
+out:
+ pr_debug("Not enough memory for machine setup\n");
+ machine__delete_threads(machine);
+ return NULL;
+}
+
+void print_hists_in(struct hists *hists)
+{
+ int i = 0;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ if (hists__has(hists, need_collapse))
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ pr_info("----- %s --------\n", __func__);
+ node = rb_first(root);
+ while (node) {
+ struct hist_entry *he;
+
+ he = rb_entry(node, struct hist_entry, rb_node_in);
+
+ if (!he->filtered) {
+ pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n",
+ i, thread__comm_str(he->thread),
+ he->ms.map->dso->short_name,
+ he->ms.sym->name, he->stat.period);
+ }
+
+ i++;
+ node = rb_next(node);
+ }
+}
+
+void print_hists_out(struct hists *hists)
+{
+ int i = 0;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ root = &hists->entries;
+
+ pr_info("----- %s --------\n", __func__);
+ node = rb_first(root);
+ while (node) {
+ struct hist_entry *he;
+
+ he = rb_entry(node, struct hist_entry, rb_node);
+
+ if (!he->filtered) {
+ pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n",
+ i, thread__comm_str(he->thread), he->thread->tid,
+ he->ms.map->dso->short_name,
+ he->ms.sym->name, he->stat.period,
+ he->stat_acc ? he->stat_acc->period : 0);
+ }
+
+ i++;
+ node = rb_next(node);
+ }
+}
diff --git a/tools/perf/tests/hists_common.h b/tools/perf/tests/hists_common.h
new file mode 100644
index 000000000..a2de0ff0c
--- /dev/null
+++ b/tools/perf/tests/hists_common.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TESTS__HISTS_COMMON_H__
+#define __PERF_TESTS__HISTS_COMMON_H__
+
+struct machine;
+struct machines;
+
+#define FAKE_PID_PERF1 100
+#define FAKE_PID_PERF2 200
+#define FAKE_PID_BASH 300
+
+#define FAKE_MAP_PERF 0x400000
+#define FAKE_MAP_BASH 0x400000
+#define FAKE_MAP_LIBC 0x500000
+#define FAKE_MAP_KERNEL 0xf00000
+#define FAKE_MAP_LENGTH 0x100000
+
+#define FAKE_SYM_OFFSET1 700
+#define FAKE_SYM_OFFSET2 800
+#define FAKE_SYM_OFFSET3 900
+#define FAKE_SYM_LENGTH 100
+
+#define FAKE_IP_PERF_MAIN FAKE_MAP_PERF + FAKE_SYM_OFFSET1
+#define FAKE_IP_PERF_RUN_COMMAND FAKE_MAP_PERF + FAKE_SYM_OFFSET2
+#define FAKE_IP_PERF_CMD_RECORD FAKE_MAP_PERF + FAKE_SYM_OFFSET3
+#define FAKE_IP_BASH_MAIN FAKE_MAP_BASH + FAKE_SYM_OFFSET1
+#define FAKE_IP_BASH_XMALLOC FAKE_MAP_BASH + FAKE_SYM_OFFSET2
+#define FAKE_IP_BASH_XFREE FAKE_MAP_BASH + FAKE_SYM_OFFSET3
+#define FAKE_IP_LIBC_MALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET1
+#define FAKE_IP_LIBC_FREE FAKE_MAP_LIBC + FAKE_SYM_OFFSET2
+#define FAKE_IP_LIBC_REALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET3
+#define FAKE_IP_KERNEL_SCHEDULE FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1
+#define FAKE_IP_KERNEL_PAGE_FAULT FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2
+#define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3
+
+/*
+ * The setup_fake_machine() provides a test environment which consists
+ * of 3 processes that have 3 mappings and in turn, have 3 symbols
+ * respectively. See below table:
+ *
+ * Command: Pid Shared Object Symbol
+ * ............. ............. ...................
+ * perf: 100 perf main
+ * perf: 100 perf run_command
+ * perf: 100 perf cmd_record
+ * perf: 100 libc malloc
+ * perf: 100 libc free
+ * perf: 100 libc realloc
+ * perf: 100 [kernel] schedule
+ * perf: 100 [kernel] page_fault
+ * perf: 100 [kernel] sys_perf_event_open
+ * perf: 200 perf main
+ * perf: 200 perf run_command
+ * perf: 200 perf cmd_record
+ * perf: 200 libc malloc
+ * perf: 200 libc free
+ * perf: 200 libc realloc
+ * perf: 200 [kernel] schedule
+ * perf: 200 [kernel] page_fault
+ * perf: 200 [kernel] sys_perf_event_open
+ * bash: 300 bash main
+ * bash: 300 bash xmalloc
+ * bash: 300 bash xfree
+ * bash: 300 libc malloc
+ * bash: 300 libc free
+ * bash: 300 libc realloc
+ * bash: 300 [kernel] schedule
+ * bash: 300 [kernel] page_fault
+ * bash: 300 [kernel] sys_perf_event_open
+ */
+struct machine *setup_fake_machine(struct machines *machines);
+
+void print_hists_in(struct hists *hists);
+void print_hists_out(struct hists *hists);
+
+#endif /* __PERF_TESTS__HISTS_COMMON_H__ */
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
new file mode 100644
index 000000000..65fe02beb
--- /dev/null
+++ b/tools/perf/tests/hists_cumulate.c
@@ -0,0 +1,737 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "perf.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "util/parse-events.h"
+#include "tests/tests.h"
+#include "tests/hists_common.h"
+#include <linux/kernel.h>
+
+struct sample {
+ u32 pid;
+ u64 ip;
+ struct thread *thread;
+ struct map *map;
+ struct symbol *sym;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_samples[] = {
+ /* perf [kernel] schedule() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
+ /* perf [perf] main() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
+ /* perf [perf] cmd_record() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
+ /* perf [libc] malloc() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
+ /* perf [libc] free() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
+ /* perf [perf] main() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
+ /* perf [kernel] page_fault() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+ /* bash [bash] main() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
+ /* bash [bash] xmalloc() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
+ /* bash [kernel] page_fault() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+};
+
+/*
+ * Will be casted to struct ip_callchain which has all 64 bit entries
+ * of nr and ips[].
+ */
+static u64 fake_callchains[][10] = {
+ /* schedule => run_command => main */
+ { 3, FAKE_IP_KERNEL_SCHEDULE, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+ /* main */
+ { 1, FAKE_IP_PERF_MAIN, },
+ /* cmd_record => run_command => main */
+ { 3, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+ /* malloc => cmd_record => run_command => main */
+ { 4, FAKE_IP_LIBC_MALLOC, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
+ FAKE_IP_PERF_MAIN, },
+ /* free => cmd_record => run_command => main */
+ { 4, FAKE_IP_LIBC_FREE, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
+ FAKE_IP_PERF_MAIN, },
+ /* main */
+ { 1, FAKE_IP_PERF_MAIN, },
+ /* page_fault => sys_perf_event_open => run_command => main */
+ { 4, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN,
+ FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+ /* main */
+ { 1, FAKE_IP_BASH_MAIN, },
+ /* xmalloc => malloc => xmalloc => malloc => xmalloc => main */
+ { 6, FAKE_IP_BASH_XMALLOC, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC,
+ FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, FAKE_IP_BASH_MAIN, },
+ /* page_fault => malloc => main */
+ { 3, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_MAIN, },
+};
+
+static int add_hist_entries(struct hists *hists, struct machine *machine)
+{
+ struct addr_location al;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ struct perf_sample sample = { .period = 1000, };
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
+ struct hist_entry_iter iter = {
+ .evsel = evsel,
+ .sample = &sample,
+ .hide_unresolved = false,
+ };
+
+ if (symbol_conf.cumulate_callchain)
+ iter.ops = &hist_iter_cumulative;
+ else
+ iter.ops = &hist_iter_normal;
+
+ sample.cpumode = PERF_RECORD_MISC_USER;
+ sample.pid = fake_samples[i].pid;
+ sample.tid = fake_samples[i].pid;
+ sample.ip = fake_samples[i].ip;
+ sample.callchain = (struct ip_callchain *)fake_callchains[i];
+
+ if (machine__resolve(machine, &al, &sample) < 0)
+ goto out;
+
+ if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
+ NULL) < 0) {
+ addr_location__put(&al);
+ goto out;
+ }
+
+ fake_samples[i].thread = al.thread;
+ fake_samples[i].map = al.map;
+ fake_samples[i].sym = al.sym;
+ }
+
+ return TEST_OK;
+
+out:
+ pr_debug("Not enough memory for adding a hist entry\n");
+ return TEST_FAIL;
+}
+
+static void del_hist_entries(struct hists *hists)
+{
+ struct hist_entry *he;
+ struct rb_root *root_in;
+ struct rb_root *root_out;
+ struct rb_node *node;
+
+ if (hists__has(hists, need_collapse))
+ root_in = &hists->entries_collapsed;
+ else
+ root_in = hists->entries_in;
+
+ root_out = &hists->entries;
+
+ while (!RB_EMPTY_ROOT(root_out)) {
+ node = rb_first(root_out);
+
+ he = rb_entry(node, struct hist_entry, rb_node);
+ rb_erase(node, root_out);
+ rb_erase(&he->rb_node_in, root_in);
+ hist_entry__delete(he);
+ }
+}
+
+typedef int (*test_fn_t)(struct perf_evsel *, struct machine *);
+
+#define COMM(he) (thread__comm_str(he->thread))
+#define DSO(he) (he->ms.map->dso->short_name)
+#define SYM(he) (he->ms.sym->name)
+#define CPU(he) (he->cpu)
+#define PID(he) (he->thread->tid)
+#define DEPTH(he) (he->callchain->max_depth)
+#define CDSO(cl) (cl->ms.map->dso->short_name)
+#define CSYM(cl) (cl->ms.sym->name)
+
+struct result {
+ u64 children;
+ u64 self;
+ const char *comm;
+ const char *dso;
+ const char *sym;
+};
+
+struct callchain_result {
+ u64 nr;
+ struct {
+ const char *dso;
+ const char *sym;
+ } node[10];
+};
+
+static int do_test(struct hists *hists, struct result *expected, size_t nr_expected,
+ struct callchain_result *expected_callchain, size_t nr_callchain)
+{
+ char buf[32];
+ size_t i, c;
+ struct hist_entry *he;
+ struct rb_root *root;
+ struct rb_node *node;
+ struct callchain_node *cnode;
+ struct callchain_list *clist;
+
+ /*
+ * adding and deleting hist entries must be done outside of this
+ * function since TEST_ASSERT_VAL() returns in case of failure.
+ */
+ hists__collapse_resort(hists, NULL);
+ perf_evsel__output_resort(hists_to_evsel(hists), NULL);
+
+ if (verbose > 2) {
+ pr_info("use callchain: %d, cumulate callchain: %d\n",
+ symbol_conf.use_callchain,
+ symbol_conf.cumulate_callchain);
+ print_hists_out(hists);
+ }
+
+ root = &hists->entries;
+ for (node = rb_first(root), i = 0;
+ node && (he = rb_entry(node, struct hist_entry, rb_node));
+ node = rb_next(node), i++) {
+ scnprintf(buf, sizeof(buf), "Invalid hist entry #%zd", i);
+
+ TEST_ASSERT_VAL("Incorrect number of hist entry",
+ i < nr_expected);
+ TEST_ASSERT_VAL(buf, he->stat.period == expected[i].self &&
+ !strcmp(COMM(he), expected[i].comm) &&
+ !strcmp(DSO(he), expected[i].dso) &&
+ !strcmp(SYM(he), expected[i].sym));
+
+ if (symbol_conf.cumulate_callchain)
+ TEST_ASSERT_VAL(buf, he->stat_acc->period == expected[i].children);
+
+ if (!symbol_conf.use_callchain)
+ continue;
+
+ /* check callchain entries */
+ root = &he->callchain->node.rb_root;
+
+ TEST_ASSERT_VAL("callchains expected", !RB_EMPTY_ROOT(root));
+ cnode = rb_entry(rb_first(root), struct callchain_node, rb_node);
+
+ c = 0;
+ list_for_each_entry(clist, &cnode->val, list) {
+ scnprintf(buf, sizeof(buf), "Invalid callchain entry #%zd/%zd", i, c);
+
+ TEST_ASSERT_VAL("Incorrect number of callchain entry",
+ c < expected_callchain[i].nr);
+ TEST_ASSERT_VAL(buf,
+ !strcmp(CDSO(clist), expected_callchain[i].node[c].dso) &&
+ !strcmp(CSYM(clist), expected_callchain[i].node[c].sym));
+ c++;
+ }
+ /* TODO: handle multiple child nodes properly */
+ TEST_ASSERT_VAL("Incorrect number of callchain entry",
+ c <= expected_callchain[i].nr);
+ }
+ TEST_ASSERT_VAL("Incorrect number of hist entry",
+ i == nr_expected);
+ TEST_ASSERT_VAL("Incorrect number of callchain entry",
+ !symbol_conf.use_callchain || nr_expected == nr_callchain);
+ return 0;
+}
+
+/* NO callchain + NO children */
+static int test1(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = evsel__hists(evsel);
+ /*
+ * expected output:
+ *
+ * Overhead Command Shared Object Symbol
+ * ======== ======= ============= ==============
+ * 20.00% perf perf [.] main
+ * 10.00% bash [kernel] [k] page_fault
+ * 10.00% bash bash [.] main
+ * 10.00% bash bash [.] xmalloc
+ * 10.00% perf [kernel] [k] page_fault
+ * 10.00% perf [kernel] [k] schedule
+ * 10.00% perf libc [.] free
+ * 10.00% perf libc [.] malloc
+ * 10.00% perf perf [.] cmd_record
+ */
+ struct result expected[] = {
+ { 0, 2000, "perf", "perf", "main" },
+ { 0, 1000, "bash", "[kernel]", "page_fault" },
+ { 0, 1000, "bash", "bash", "main" },
+ { 0, 1000, "bash", "bash", "xmalloc" },
+ { 0, 1000, "perf", "[kernel]", "page_fault" },
+ { 0, 1000, "perf", "[kernel]", "schedule" },
+ { 0, 1000, "perf", "libc", "free" },
+ { 0, 1000, "perf", "libc", "malloc" },
+ { 0, 1000, "perf", "perf", "cmd_record" },
+ };
+
+ symbol_conf.use_callchain = false;
+ symbol_conf.cumulate_callchain = false;
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+
+ setup_sorting(NULL);
+ callchain_register_param(&callchain_param);
+
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* callcain + NO children */
+static int test2(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = evsel__hists(evsel);
+ /*
+ * expected output:
+ *
+ * Overhead Command Shared Object Symbol
+ * ======== ======= ============= ==============
+ * 20.00% perf perf [.] main
+ * |
+ * --- main
+ *
+ * 10.00% bash [kernel] [k] page_fault
+ * |
+ * --- page_fault
+ * malloc
+ * main
+ *
+ * 10.00% bash bash [.] main
+ * |
+ * --- main
+ *
+ * 10.00% bash bash [.] xmalloc
+ * |
+ * --- xmalloc
+ * malloc
+ * xmalloc <--- NOTE: there's a cycle
+ * malloc
+ * xmalloc
+ * main
+ *
+ * 10.00% perf [kernel] [k] page_fault
+ * |
+ * --- page_fault
+ * sys_perf_event_open
+ * run_command
+ * main
+ *
+ * 10.00% perf [kernel] [k] schedule
+ * |
+ * --- schedule
+ * run_command
+ * main
+ *
+ * 10.00% perf libc [.] free
+ * |
+ * --- free
+ * cmd_record
+ * run_command
+ * main
+ *
+ * 10.00% perf libc [.] malloc
+ * |
+ * --- malloc
+ * cmd_record
+ * run_command
+ * main
+ *
+ * 10.00% perf perf [.] cmd_record
+ * |
+ * --- cmd_record
+ * run_command
+ * main
+ *
+ */
+ struct result expected[] = {
+ { 0, 2000, "perf", "perf", "main" },
+ { 0, 1000, "bash", "[kernel]", "page_fault" },
+ { 0, 1000, "bash", "bash", "main" },
+ { 0, 1000, "bash", "bash", "xmalloc" },
+ { 0, 1000, "perf", "[kernel]", "page_fault" },
+ { 0, 1000, "perf", "[kernel]", "schedule" },
+ { 0, 1000, "perf", "libc", "free" },
+ { 0, 1000, "perf", "libc", "malloc" },
+ { 0, 1000, "perf", "perf", "cmd_record" },
+ };
+ struct callchain_result expected_callchain[] = {
+ {
+ 1, { { "perf", "main" }, },
+ },
+ {
+ 3, { { "[kernel]", "page_fault" },
+ { "libc", "malloc" },
+ { "bash", "main" }, },
+ },
+ {
+ 1, { { "bash", "main" }, },
+ },
+ {
+ 6, { { "bash", "xmalloc" },
+ { "libc", "malloc" },
+ { "bash", "xmalloc" },
+ { "libc", "malloc" },
+ { "bash", "xmalloc" },
+ { "bash", "main" }, },
+ },
+ {
+ 4, { { "[kernel]", "page_fault" },
+ { "[kernel]", "sys_perf_event_open" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 3, { { "[kernel]", "schedule" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 4, { { "libc", "free" },
+ { "perf", "cmd_record" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 4, { { "libc", "malloc" },
+ { "perf", "cmd_record" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 3, { { "perf", "cmd_record" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ };
+
+ symbol_conf.use_callchain = true;
+ symbol_conf.cumulate_callchain = false;
+ perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+
+ setup_sorting(NULL);
+ callchain_register_param(&callchain_param);
+
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ err = do_test(hists, expected, ARRAY_SIZE(expected),
+ expected_callchain, ARRAY_SIZE(expected_callchain));
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* NO callchain + children */
+static int test3(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = evsel__hists(evsel);
+ /*
+ * expected output:
+ *
+ * Children Self Command Shared Object Symbol
+ * ======== ======== ======= ============= =======================
+ * 70.00% 20.00% perf perf [.] main
+ * 50.00% 0.00% perf perf [.] run_command
+ * 30.00% 10.00% bash bash [.] main
+ * 30.00% 10.00% perf perf [.] cmd_record
+ * 20.00% 0.00% bash libc [.] malloc
+ * 10.00% 10.00% bash [kernel] [k] page_fault
+ * 10.00% 10.00% bash bash [.] xmalloc
+ * 10.00% 10.00% perf [kernel] [k] page_fault
+ * 10.00% 10.00% perf libc [.] malloc
+ * 10.00% 10.00% perf [kernel] [k] schedule
+ * 10.00% 10.00% perf libc [.] free
+ * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open
+ */
+ struct result expected[] = {
+ { 7000, 2000, "perf", "perf", "main" },
+ { 5000, 0, "perf", "perf", "run_command" },
+ { 3000, 1000, "bash", "bash", "main" },
+ { 3000, 1000, "perf", "perf", "cmd_record" },
+ { 2000, 0, "bash", "libc", "malloc" },
+ { 1000, 1000, "bash", "[kernel]", "page_fault" },
+ { 1000, 1000, "bash", "bash", "xmalloc" },
+ { 1000, 1000, "perf", "[kernel]", "page_fault" },
+ { 1000, 1000, "perf", "[kernel]", "schedule" },
+ { 1000, 1000, "perf", "libc", "free" },
+ { 1000, 1000, "perf", "libc", "malloc" },
+ { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" },
+ };
+
+ symbol_conf.use_callchain = false;
+ symbol_conf.cumulate_callchain = true;
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+
+ setup_sorting(NULL);
+ callchain_register_param(&callchain_param);
+
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* callchain + children */
+static int test4(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = evsel__hists(evsel);
+ /*
+ * expected output:
+ *
+ * Children Self Command Shared Object Symbol
+ * ======== ======== ======= ============= =======================
+ * 70.00% 20.00% perf perf [.] main
+ * |
+ * --- main
+ *
+ * 50.00% 0.00% perf perf [.] run_command
+ * |
+ * --- run_command
+ * main
+ *
+ * 30.00% 10.00% bash bash [.] main
+ * |
+ * --- main
+ *
+ * 30.00% 10.00% perf perf [.] cmd_record
+ * |
+ * --- cmd_record
+ * run_command
+ * main
+ *
+ * 20.00% 0.00% bash libc [.] malloc
+ * |
+ * --- malloc
+ * |
+ * |--50.00%-- xmalloc
+ * | main
+ * --50.00%-- main
+ *
+ * 10.00% 10.00% bash [kernel] [k] page_fault
+ * |
+ * --- page_fault
+ * malloc
+ * main
+ *
+ * 10.00% 10.00% bash bash [.] xmalloc
+ * |
+ * --- xmalloc
+ * malloc
+ * xmalloc <--- NOTE: there's a cycle
+ * malloc
+ * xmalloc
+ * main
+ *
+ * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open
+ * |
+ * --- sys_perf_event_open
+ * run_command
+ * main
+ *
+ * 10.00% 10.00% perf [kernel] [k] page_fault
+ * |
+ * --- page_fault
+ * sys_perf_event_open
+ * run_command
+ * main
+ *
+ * 10.00% 10.00% perf [kernel] [k] schedule
+ * |
+ * --- schedule
+ * run_command
+ * main
+ *
+ * 10.00% 10.00% perf libc [.] free
+ * |
+ * --- free
+ * cmd_record
+ * run_command
+ * main
+ *
+ * 10.00% 10.00% perf libc [.] malloc
+ * |
+ * --- malloc
+ * cmd_record
+ * run_command
+ * main
+ *
+ */
+ struct result expected[] = {
+ { 7000, 2000, "perf", "perf", "main" },
+ { 5000, 0, "perf", "perf", "run_command" },
+ { 3000, 1000, "bash", "bash", "main" },
+ { 3000, 1000, "perf", "perf", "cmd_record" },
+ { 2000, 0, "bash", "libc", "malloc" },
+ { 1000, 1000, "bash", "[kernel]", "page_fault" },
+ { 1000, 1000, "bash", "bash", "xmalloc" },
+ { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" },
+ { 1000, 1000, "perf", "[kernel]", "page_fault" },
+ { 1000, 1000, "perf", "[kernel]", "schedule" },
+ { 1000, 1000, "perf", "libc", "free" },
+ { 1000, 1000, "perf", "libc", "malloc" },
+ };
+ struct callchain_result expected_callchain[] = {
+ {
+ 1, { { "perf", "main" }, },
+ },
+ {
+ 2, { { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 1, { { "bash", "main" }, },
+ },
+ {
+ 3, { { "perf", "cmd_record" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 4, { { "libc", "malloc" },
+ { "bash", "xmalloc" },
+ { "bash", "main" },
+ { "bash", "main" }, },
+ },
+ {
+ 3, { { "[kernel]", "page_fault" },
+ { "libc", "malloc" },
+ { "bash", "main" }, },
+ },
+ {
+ 6, { { "bash", "xmalloc" },
+ { "libc", "malloc" },
+ { "bash", "xmalloc" },
+ { "libc", "malloc" },
+ { "bash", "xmalloc" },
+ { "bash", "main" }, },
+ },
+ {
+ 3, { { "[kernel]", "sys_perf_event_open" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 4, { { "[kernel]", "page_fault" },
+ { "[kernel]", "sys_perf_event_open" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 3, { { "[kernel]", "schedule" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 4, { { "libc", "free" },
+ { "perf", "cmd_record" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 4, { { "libc", "malloc" },
+ { "perf", "cmd_record" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ };
+
+ symbol_conf.use_callchain = true;
+ symbol_conf.cumulate_callchain = true;
+ perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+
+ setup_sorting(NULL);
+
+ callchain_param = callchain_param_default;
+ callchain_register_param(&callchain_param);
+
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ err = do_test(hists, expected, ARRAY_SIZE(expected),
+ expected_callchain, ARRAY_SIZE(expected_callchain));
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+int test__hists_cumulate(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err = TEST_FAIL;
+ struct machines machines;
+ struct machine *machine;
+ struct perf_evsel *evsel;
+ struct perf_evlist *evlist = perf_evlist__new();
+ size_t i;
+ test_fn_t testcases[] = {
+ test1,
+ test2,
+ test3,
+ test4,
+ };
+
+ TEST_ASSERT_VAL("No memory", evlist);
+
+ err = parse_events(evlist, "cpu-clock", NULL);
+ if (err)
+ goto out;
+ err = TEST_FAIL;
+
+ machines__init(&machines);
+
+ /* setup threads/dso/map/symbols also */
+ machine = setup_fake_machine(&machines);
+ if (!machine)
+ goto out;
+
+ if (verbose > 1)
+ machine__fprintf(machine, stderr);
+
+ evsel = perf_evlist__first(evlist);
+
+ for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+ err = testcases[i](evsel, machine);
+ if (err < 0)
+ break;
+ }
+
+out:
+ /* tear down everything */
+ perf_evlist__delete(evlist);
+ machines__exit(&machines);
+
+ return err;
+}
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
new file mode 100644
index 000000000..1c5bedab3
--- /dev/null
+++ b/tools/perf/tests/hists_filter.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "perf.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "util/parse-events.h"
+#include "tests/tests.h"
+#include "tests/hists_common.h"
+#include <linux/kernel.h>
+
+struct sample {
+ u32 pid;
+ u64 ip;
+ struct thread *thread;
+ struct map *map;
+ struct symbol *sym;
+ int socket;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_samples[] = {
+ /* perf [kernel] schedule() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, .socket = 0 },
+ /* perf [perf] main() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, .socket = 0 },
+ /* perf [libc] malloc() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, .socket = 0 },
+ /* perf [perf] main() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, .socket = 0 }, /* will be merged */
+ /* perf [perf] cmd_record() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, .socket = 1 },
+ /* perf [kernel] page_fault() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, .socket = 1 },
+ /* bash [bash] main() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, .socket = 2 },
+ /* bash [bash] xmalloc() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, .socket = 2 },
+ /* bash [libc] malloc() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, .socket = 3 },
+ /* bash [kernel] page_fault() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, .socket = 3 },
+};
+
+static int add_hist_entries(struct perf_evlist *evlist,
+ struct machine *machine)
+{
+ struct perf_evsel *evsel;
+ struct addr_location al;
+ struct perf_sample sample = { .period = 100, };
+ size_t i;
+
+ /*
+ * each evsel will have 10 samples but the 4th sample
+ * (perf [perf] main) will be collapsed to an existing entry
+ * so total 9 entries will be in the tree.
+ */
+ evlist__for_each_entry(evlist, evsel) {
+ for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
+ struct hist_entry_iter iter = {
+ .evsel = evsel,
+ .sample = &sample,
+ .ops = &hist_iter_normal,
+ .hide_unresolved = false,
+ };
+ struct hists *hists = evsel__hists(evsel);
+
+ /* make sure it has no filter at first */
+ hists->thread_filter = NULL;
+ hists->dso_filter = NULL;
+ hists->symbol_filter_str = NULL;
+
+ sample.cpumode = PERF_RECORD_MISC_USER;
+ sample.pid = fake_samples[i].pid;
+ sample.tid = fake_samples[i].pid;
+ sample.ip = fake_samples[i].ip;
+
+ if (machine__resolve(machine, &al, &sample) < 0)
+ goto out;
+
+ al.socket = fake_samples[i].socket;
+ if (hist_entry_iter__add(&iter, &al,
+ sysctl_perf_event_max_stack, NULL) < 0) {
+ addr_location__put(&al);
+ goto out;
+ }
+
+ fake_samples[i].thread = al.thread;
+ fake_samples[i].map = al.map;
+ fake_samples[i].sym = al.sym;
+ }
+ }
+
+ return 0;
+
+out:
+ pr_debug("Not enough memory for adding a hist entry\n");
+ return TEST_FAIL;
+}
+
+int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err = TEST_FAIL;
+ struct machines machines;
+ struct machine *machine;
+ struct perf_evsel *evsel;
+ struct perf_evlist *evlist = perf_evlist__new();
+
+ TEST_ASSERT_VAL("No memory", evlist);
+
+ err = parse_events(evlist, "cpu-clock", NULL);
+ if (err)
+ goto out;
+ err = parse_events(evlist, "task-clock", NULL);
+ if (err)
+ goto out;
+ err = TEST_FAIL;
+
+ /* default sort order (comm,dso,sym) will be used */
+ if (setup_sorting(NULL) < 0)
+ goto out;
+
+ machines__init(&machines);
+
+ /* setup threads/dso/map/symbols also */
+ machine = setup_fake_machine(&machines);
+ if (!machine)
+ goto out;
+
+ if (verbose > 1)
+ machine__fprintf(machine, stderr);
+
+ /* process sample events */
+ err = add_hist_entries(evlist, machine);
+ if (err < 0)
+ goto out;
+
+ evlist__for_each_entry(evlist, evsel) {
+ struct hists *hists = evsel__hists(evsel);
+
+ hists__collapse_resort(hists, NULL);
+ perf_evsel__output_resort(evsel, NULL);
+
+ if (verbose > 2) {
+ pr_info("Normal histogram\n");
+ print_hists_out(hists);
+ }
+
+ TEST_ASSERT_VAL("Invalid nr samples",
+ hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+ TEST_ASSERT_VAL("Invalid nr hist entries",
+ hists->nr_entries == 9);
+ TEST_ASSERT_VAL("Invalid total period",
+ hists->stats.total_period == 1000);
+ TEST_ASSERT_VAL("Unmatched nr samples",
+ hists->stats.nr_events[PERF_RECORD_SAMPLE] ==
+ hists->stats.nr_non_filtered_samples);
+ TEST_ASSERT_VAL("Unmatched nr hist entries",
+ hists->nr_entries == hists->nr_non_filtered_entries);
+ TEST_ASSERT_VAL("Unmatched total period",
+ hists->stats.total_period ==
+ hists->stats.total_non_filtered_period);
+
+ /* now applying thread filter for 'bash' */
+ hists->thread_filter = fake_samples[9].thread;
+ hists__filter_by_thread(hists);
+
+ if (verbose > 2) {
+ pr_info("Histogram for thread filter\n");
+ print_hists_out(hists);
+ }
+
+ /* normal stats should be invariant */
+ TEST_ASSERT_VAL("Invalid nr samples",
+ hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+ TEST_ASSERT_VAL("Invalid nr hist entries",
+ hists->nr_entries == 9);
+ TEST_ASSERT_VAL("Invalid total period",
+ hists->stats.total_period == 1000);
+
+ /* but filter stats are changed */
+ TEST_ASSERT_VAL("Unmatched nr samples for thread filter",
+ hists->stats.nr_non_filtered_samples == 4);
+ TEST_ASSERT_VAL("Unmatched nr hist entries for thread filter",
+ hists->nr_non_filtered_entries == 4);
+ TEST_ASSERT_VAL("Unmatched total period for thread filter",
+ hists->stats.total_non_filtered_period == 400);
+
+ /* remove thread filter first */
+ hists->thread_filter = NULL;
+ hists__filter_by_thread(hists);
+
+ /* now applying dso filter for 'kernel' */
+ hists->dso_filter = fake_samples[0].map->dso;
+ hists__filter_by_dso(hists);
+
+ if (verbose > 2) {
+ pr_info("Histogram for dso filter\n");
+ print_hists_out(hists);
+ }
+
+ /* normal stats should be invariant */
+ TEST_ASSERT_VAL("Invalid nr samples",
+ hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+ TEST_ASSERT_VAL("Invalid nr hist entries",
+ hists->nr_entries == 9);
+ TEST_ASSERT_VAL("Invalid total period",
+ hists->stats.total_period == 1000);
+
+ /* but filter stats are changed */
+ TEST_ASSERT_VAL("Unmatched nr samples for dso filter",
+ hists->stats.nr_non_filtered_samples == 3);
+ TEST_ASSERT_VAL("Unmatched nr hist entries for dso filter",
+ hists->nr_non_filtered_entries == 3);
+ TEST_ASSERT_VAL("Unmatched total period for dso filter",
+ hists->stats.total_non_filtered_period == 300);
+
+ /* remove dso filter first */
+ hists->dso_filter = NULL;
+ hists__filter_by_dso(hists);
+
+ /*
+ * now applying symbol filter for 'main'. Also note that
+ * there's 3 samples that have 'main' symbol but the 4th
+ * entry of fake_samples was collapsed already so it won't
+ * be counted as a separate entry but the sample count and
+ * total period will be remained.
+ */
+ hists->symbol_filter_str = "main";
+ hists__filter_by_symbol(hists);
+
+ if (verbose > 2) {
+ pr_info("Histogram for symbol filter\n");
+ print_hists_out(hists);
+ }
+
+ /* normal stats should be invariant */
+ TEST_ASSERT_VAL("Invalid nr samples",
+ hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+ TEST_ASSERT_VAL("Invalid nr hist entries",
+ hists->nr_entries == 9);
+ TEST_ASSERT_VAL("Invalid total period",
+ hists->stats.total_period == 1000);
+
+ /* but filter stats are changed */
+ TEST_ASSERT_VAL("Unmatched nr samples for symbol filter",
+ hists->stats.nr_non_filtered_samples == 3);
+ TEST_ASSERT_VAL("Unmatched nr hist entries for symbol filter",
+ hists->nr_non_filtered_entries == 2);
+ TEST_ASSERT_VAL("Unmatched total period for symbol filter",
+ hists->stats.total_non_filtered_period == 300);
+
+ /* remove symbol filter first */
+ hists->symbol_filter_str = NULL;
+ hists__filter_by_symbol(hists);
+
+ /* now applying socket filters */
+ hists->socket_filter = 2;
+ hists__filter_by_socket(hists);
+
+ if (verbose > 2) {
+ pr_info("Histogram for socket filters\n");
+ print_hists_out(hists);
+ }
+
+ /* normal stats should be invariant */
+ TEST_ASSERT_VAL("Invalid nr samples",
+ hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+ TEST_ASSERT_VAL("Invalid nr hist entries",
+ hists->nr_entries == 9);
+ TEST_ASSERT_VAL("Invalid total period",
+ hists->stats.total_period == 1000);
+
+ /* but filter stats are changed */
+ TEST_ASSERT_VAL("Unmatched nr samples for socket filter",
+ hists->stats.nr_non_filtered_samples == 2);
+ TEST_ASSERT_VAL("Unmatched nr hist entries for socket filter",
+ hists->nr_non_filtered_entries == 2);
+ TEST_ASSERT_VAL("Unmatched total period for socket filter",
+ hists->stats.total_non_filtered_period == 200);
+
+ /* remove socket filter first */
+ hists->socket_filter = -1;
+ hists__filter_by_socket(hists);
+
+ /* now applying all filters at once. */
+ hists->thread_filter = fake_samples[1].thread;
+ hists->dso_filter = fake_samples[1].map->dso;
+ hists__filter_by_thread(hists);
+ hists__filter_by_dso(hists);
+
+ if (verbose > 2) {
+ pr_info("Histogram for all filters\n");
+ print_hists_out(hists);
+ }
+
+ /* normal stats should be invariant */
+ TEST_ASSERT_VAL("Invalid nr samples",
+ hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+ TEST_ASSERT_VAL("Invalid nr hist entries",
+ hists->nr_entries == 9);
+ TEST_ASSERT_VAL("Invalid total period",
+ hists->stats.total_period == 1000);
+
+ /* but filter stats are changed */
+ TEST_ASSERT_VAL("Unmatched nr samples for all filter",
+ hists->stats.nr_non_filtered_samples == 2);
+ TEST_ASSERT_VAL("Unmatched nr hist entries for all filter",
+ hists->nr_non_filtered_entries == 1);
+ TEST_ASSERT_VAL("Unmatched total period for all filter",
+ hists->stats.total_non_filtered_period == 200);
+ }
+
+
+ err = TEST_OK;
+
+out:
+ /* tear down everything */
+ perf_evlist__delete(evlist);
+ reset_output_field();
+ machines__exit(&machines);
+
+ return err;
+}
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
new file mode 100644
index 000000000..9a9d06cb0
--- /dev/null
+++ b/tools/perf/tests/hists_link.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "perf.h"
+#include "tests.h"
+#include "debug.h"
+#include "symbol.h"
+#include "sort.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "thread.h"
+#include "parse-events.h"
+#include "hists_common.h"
+#include <errno.h>
+#include <linux/kernel.h>
+
+struct sample {
+ u32 pid;
+ u64 ip;
+ struct thread *thread;
+ struct map *map;
+ struct symbol *sym;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_common_samples[] = {
+ /* perf [kernel] schedule() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
+ /* perf [perf] main() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
+ /* perf [perf] cmd_record() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
+ /* bash [bash] xmalloc() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
+ /* bash [libc] malloc() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, },
+};
+
+static struct sample fake_samples[][5] = {
+ {
+ /* perf [perf] run_command() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, },
+ /* perf [libc] malloc() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
+ /* perf [kernel] page_fault() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+ /* perf [kernel] sys_perf_event_open() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, },
+ /* bash [libc] free() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_FREE, },
+ },
+ {
+ /* perf [libc] free() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, },
+ /* bash [libc] malloc() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */
+ /* bash [bash] xfee() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XFREE, },
+ /* bash [libc] realloc() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_REALLOC, },
+ /* bash [kernel] page_fault() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+ },
+};
+
+static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
+{
+ struct perf_evsel *evsel;
+ struct addr_location al;
+ struct hist_entry *he;
+ struct perf_sample sample = { .period = 1, .weight = 1, };
+ size_t i = 0, k;
+
+ /*
+ * each evsel will have 10 samples - 5 common and 5 distinct.
+ * However the second evsel also has a collapsed entry for
+ * "bash [libc] malloc" so total 9 entries will be in the tree.
+ */
+ evlist__for_each_entry(evlist, evsel) {
+ struct hists *hists = evsel__hists(evsel);
+
+ for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) {
+ sample.cpumode = PERF_RECORD_MISC_USER;
+ sample.pid = fake_common_samples[k].pid;
+ sample.tid = fake_common_samples[k].pid;
+ sample.ip = fake_common_samples[k].ip;
+
+ if (machine__resolve(machine, &al, &sample) < 0)
+ goto out;
+
+ he = hists__add_entry(hists, &al, NULL,
+ NULL, NULL, &sample, true);
+ if (he == NULL) {
+ addr_location__put(&al);
+ goto out;
+ }
+
+ fake_common_samples[k].thread = al.thread;
+ fake_common_samples[k].map = al.map;
+ fake_common_samples[k].sym = al.sym;
+ }
+
+ for (k = 0; k < ARRAY_SIZE(fake_samples[i]); k++) {
+ sample.pid = fake_samples[i][k].pid;
+ sample.tid = fake_samples[i][k].pid;
+ sample.ip = fake_samples[i][k].ip;
+ if (machine__resolve(machine, &al, &sample) < 0)
+ goto out;
+
+ he = hists__add_entry(hists, &al, NULL,
+ NULL, NULL, &sample, true);
+ if (he == NULL) {
+ addr_location__put(&al);
+ goto out;
+ }
+
+ fake_samples[i][k].thread = al.thread;
+ fake_samples[i][k].map = al.map;
+ fake_samples[i][k].sym = al.sym;
+ }
+ i++;
+ }
+
+ return 0;
+
+out:
+ pr_debug("Not enough memory for adding a hist entry\n");
+ return -1;
+}
+
+static int find_sample(struct sample *samples, size_t nr_samples,
+ struct thread *t, struct map *m, struct symbol *s)
+{
+ while (nr_samples--) {
+ if (samples->thread == t && samples->map == m &&
+ samples->sym == s)
+ return 1;
+ samples++;
+ }
+ return 0;
+}
+
+static int __validate_match(struct hists *hists)
+{
+ size_t count = 0;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ /*
+ * Only entries from fake_common_samples should have a pair.
+ */
+ if (hists__has(hists, need_collapse))
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ node = rb_first(root);
+ while (node) {
+ struct hist_entry *he;
+
+ he = rb_entry(node, struct hist_entry, rb_node_in);
+
+ if (hist_entry__has_pairs(he)) {
+ if (find_sample(fake_common_samples,
+ ARRAY_SIZE(fake_common_samples),
+ he->thread, he->ms.map, he->ms.sym)) {
+ count++;
+ } else {
+ pr_debug("Can't find the matched entry\n");
+ return -1;
+ }
+ }
+
+ node = rb_next(node);
+ }
+
+ if (count != ARRAY_SIZE(fake_common_samples)) {
+ pr_debug("Invalid count for matched entries: %zd of %zd\n",
+ count, ARRAY_SIZE(fake_common_samples));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int validate_match(struct hists *leader, struct hists *other)
+{
+ return __validate_match(leader) || __validate_match(other);
+}
+
+static int __validate_link(struct hists *hists, int idx)
+{
+ size_t count = 0;
+ size_t count_pair = 0;
+ size_t count_dummy = 0;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ /*
+ * Leader hists (idx = 0) will have dummy entries from other,
+ * and some entries will have no pair. However every entry
+ * in other hists should have (dummy) pair.
+ */
+ if (hists__has(hists, need_collapse))
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ node = rb_first(root);
+ while (node) {
+ struct hist_entry *he;
+
+ he = rb_entry(node, struct hist_entry, rb_node_in);
+
+ if (hist_entry__has_pairs(he)) {
+ if (!find_sample(fake_common_samples,
+ ARRAY_SIZE(fake_common_samples),
+ he->thread, he->ms.map, he->ms.sym) &&
+ !find_sample(fake_samples[idx],
+ ARRAY_SIZE(fake_samples[idx]),
+ he->thread, he->ms.map, he->ms.sym)) {
+ count_dummy++;
+ }
+ count_pair++;
+ } else if (idx) {
+ pr_debug("A entry from the other hists should have pair\n");
+ return -1;
+ }
+
+ count++;
+ node = rb_next(node);
+ }
+
+ /*
+ * Note that we have a entry collapsed in the other (idx = 1) hists.
+ */
+ if (idx == 0) {
+ if (count_dummy != ARRAY_SIZE(fake_samples[1]) - 1) {
+ pr_debug("Invalid count of dummy entries: %zd of %zd\n",
+ count_dummy, ARRAY_SIZE(fake_samples[1]) - 1);
+ return -1;
+ }
+ if (count != count_pair + ARRAY_SIZE(fake_samples[0])) {
+ pr_debug("Invalid count of total leader entries: %zd of %zd\n",
+ count, count_pair + ARRAY_SIZE(fake_samples[0]));
+ return -1;
+ }
+ } else {
+ if (count != count_pair) {
+ pr_debug("Invalid count of total other entries: %zd of %zd\n",
+ count, count_pair);
+ return -1;
+ }
+ if (count_dummy > 0) {
+ pr_debug("Other hists should not have dummy entries: %zd\n",
+ count_dummy);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int validate_link(struct hists *leader, struct hists *other)
+{
+ return __validate_link(leader, 0) || __validate_link(other, 1);
+}
+
+int test__hists_link(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err = -1;
+ struct hists *hists, *first_hists;
+ struct machines machines;
+ struct machine *machine = NULL;
+ struct perf_evsel *evsel, *first;
+ struct perf_evlist *evlist = perf_evlist__new();
+
+ if (evlist == NULL)
+ return -ENOMEM;
+
+ err = parse_events(evlist, "cpu-clock", NULL);
+ if (err)
+ goto out;
+ err = parse_events(evlist, "task-clock", NULL);
+ if (err)
+ goto out;
+
+ err = TEST_FAIL;
+ /* default sort order (comm,dso,sym) will be used */
+ if (setup_sorting(NULL) < 0)
+ goto out;
+
+ machines__init(&machines);
+
+ /* setup threads/dso/map/symbols also */
+ machine = setup_fake_machine(&machines);
+ if (!machine)
+ goto out;
+
+ if (verbose > 1)
+ machine__fprintf(machine, stderr);
+
+ /* process sample events */
+ err = add_hist_entries(evlist, machine);
+ if (err < 0)
+ goto out;
+
+ evlist__for_each_entry(evlist, evsel) {
+ hists = evsel__hists(evsel);
+ hists__collapse_resort(hists, NULL);
+
+ if (verbose > 2)
+ print_hists_in(hists);
+ }
+
+ first = perf_evlist__first(evlist);
+ evsel = perf_evlist__last(evlist);
+
+ first_hists = evsel__hists(first);
+ hists = evsel__hists(evsel);
+
+ /* match common entries */
+ hists__match(first_hists, hists);
+ err = validate_match(first_hists, hists);
+ if (err)
+ goto out;
+
+ /* link common and/or dummy entries */
+ hists__link(first_hists, hists);
+ err = validate_link(first_hists, hists);
+ if (err)
+ goto out;
+
+ err = 0;
+
+out:
+ /* tear down everything */
+ perf_evlist__delete(evlist);
+ reset_output_field();
+ machines__exit(&machines);
+
+ return err;
+}
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
new file mode 100644
index 000000000..faacb4f41
--- /dev/null
+++ b/tools/perf/tests/hists_output.c
@@ -0,0 +1,624 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "perf.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "util/parse-events.h"
+#include "tests/tests.h"
+#include "tests/hists_common.h"
+#include <linux/kernel.h>
+
+struct sample {
+ u32 cpu;
+ u32 pid;
+ u64 ip;
+ struct thread *thread;
+ struct map *map;
+ struct symbol *sym;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_samples[] = {
+ /* perf [kernel] schedule() */
+ { .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
+ /* perf [perf] main() */
+ { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
+ /* perf [perf] cmd_record() */
+ { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
+ /* perf [libc] malloc() */
+ { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
+ /* perf [libc] free() */
+ { .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
+ /* perf [perf] main() */
+ { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
+ /* perf [kernel] page_fault() */
+ { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+ /* bash [bash] main() */
+ { .cpu = 3, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
+ /* bash [bash] xmalloc() */
+ { .cpu = 0, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
+ /* bash [kernel] page_fault() */
+ { .cpu = 1, .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+};
+
+static int add_hist_entries(struct hists *hists, struct machine *machine)
+{
+ struct addr_location al;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ struct perf_sample sample = { .period = 100, };
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
+ struct hist_entry_iter iter = {
+ .evsel = evsel,
+ .sample = &sample,
+ .ops = &hist_iter_normal,
+ .hide_unresolved = false,
+ };
+
+ sample.cpumode = PERF_RECORD_MISC_USER;
+ sample.cpu = fake_samples[i].cpu;
+ sample.pid = fake_samples[i].pid;
+ sample.tid = fake_samples[i].pid;
+ sample.ip = fake_samples[i].ip;
+
+ if (machine__resolve(machine, &al, &sample) < 0)
+ goto out;
+
+ if (hist_entry_iter__add(&iter, &al, sysctl_perf_event_max_stack,
+ NULL) < 0) {
+ addr_location__put(&al);
+ goto out;
+ }
+
+ fake_samples[i].thread = al.thread;
+ fake_samples[i].map = al.map;
+ fake_samples[i].sym = al.sym;
+ }
+
+ return TEST_OK;
+
+out:
+ pr_debug("Not enough memory for adding a hist entry\n");
+ return TEST_FAIL;
+}
+
+static void del_hist_entries(struct hists *hists)
+{
+ struct hist_entry *he;
+ struct rb_root *root_in;
+ struct rb_root *root_out;
+ struct rb_node *node;
+
+ if (hists__has(hists, need_collapse))
+ root_in = &hists->entries_collapsed;
+ else
+ root_in = hists->entries_in;
+
+ root_out = &hists->entries;
+
+ while (!RB_EMPTY_ROOT(root_out)) {
+ node = rb_first(root_out);
+
+ he = rb_entry(node, struct hist_entry, rb_node);
+ rb_erase(node, root_out);
+ rb_erase(&he->rb_node_in, root_in);
+ hist_entry__delete(he);
+ }
+}
+
+typedef int (*test_fn_t)(struct perf_evsel *, struct machine *);
+
+#define COMM(he) (thread__comm_str(he->thread))
+#define DSO(he) (he->ms.map->dso->short_name)
+#define SYM(he) (he->ms.sym->name)
+#define CPU(he) (he->cpu)
+#define PID(he) (he->thread->tid)
+
+/* default sort keys (no field) */
+static int test1(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = evsel__hists(evsel);
+ struct hist_entry *he;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ field_order = NULL;
+ sort_order = NULL; /* equivalent to sort_order = "comm,dso,sym" */
+
+ setup_sorting(NULL);
+
+ /*
+ * expected output:
+ *
+ * Overhead Command Shared Object Symbol
+ * ======== ======= ============= ==============
+ * 20.00% perf perf [.] main
+ * 10.00% bash [kernel] [k] page_fault
+ * 10.00% bash bash [.] main
+ * 10.00% bash bash [.] xmalloc
+ * 10.00% perf [kernel] [k] page_fault
+ * 10.00% perf [kernel] [k] schedule
+ * 10.00% perf libc [.] free
+ * 10.00% perf libc [.] malloc
+ * 10.00% perf perf [.] cmd_record
+ */
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ hists__collapse_resort(hists, NULL);
+ perf_evsel__output_resort(evsel, NULL);
+
+ if (verbose > 2) {
+ pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+ print_hists_out(hists);
+ }
+
+ root = &hists->entries;
+ node = rb_first(root);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+ !strcmp(SYM(he), "main") && he->stat.period == 200);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "[kernel]") &&
+ !strcmp(SYM(he), "page_fault") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+ !strcmp(SYM(he), "main") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+ !strcmp(SYM(he), "xmalloc") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+ !strcmp(SYM(he), "page_fault") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+ !strcmp(SYM(he), "schedule") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+ !strcmp(SYM(he), "free") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+ !strcmp(SYM(he), "malloc") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+ !strcmp(SYM(he), "cmd_record") && he->stat.period == 100);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* mixed fields and sort keys */
+static int test2(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = evsel__hists(evsel);
+ struct hist_entry *he;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ field_order = "overhead,cpu";
+ sort_order = "pid";
+
+ setup_sorting(NULL);
+
+ /*
+ * expected output:
+ *
+ * Overhead CPU Command: Pid
+ * ======== === =============
+ * 30.00% 1 perf : 100
+ * 10.00% 0 perf : 100
+ * 10.00% 2 perf : 100
+ * 20.00% 2 perf : 200
+ * 10.00% 0 bash : 300
+ * 10.00% 1 bash : 300
+ * 10.00% 3 bash : 300
+ */
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ hists__collapse_resort(hists, NULL);
+ perf_evsel__output_resort(evsel, NULL);
+
+ if (verbose > 2) {
+ pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+ print_hists_out(hists);
+ }
+
+ root = &hists->entries;
+ node = rb_first(root);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 1 && PID(he) == 100 && he->stat.period == 300);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 0 && PID(he) == 100 && he->stat.period == 100);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* fields only (no sort key) */
+static int test3(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = evsel__hists(evsel);
+ struct hist_entry *he;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ field_order = "comm,overhead,dso";
+ sort_order = NULL;
+
+ setup_sorting(NULL);
+
+ /*
+ * expected output:
+ *
+ * Command Overhead Shared Object
+ * ======= ======== =============
+ * bash 20.00% bash
+ * bash 10.00% [kernel]
+ * perf 30.00% perf
+ * perf 20.00% [kernel]
+ * perf 20.00% libc
+ */
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ hists__collapse_resort(hists, NULL);
+ perf_evsel__output_resort(evsel, NULL);
+
+ if (verbose > 2) {
+ pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+ print_hists_out(hists);
+ }
+
+ root = &hists->entries;
+ node = rb_first(root);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+ he->stat.period == 200);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "[kernel]") &&
+ he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+ he->stat.period == 300);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+ he->stat.period == 200);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+ he->stat.period == 200);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* handle duplicate 'dso' field */
+static int test4(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = evsel__hists(evsel);
+ struct hist_entry *he;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ field_order = "dso,sym,comm,overhead,dso";
+ sort_order = "sym";
+
+ setup_sorting(NULL);
+
+ /*
+ * expected output:
+ *
+ * Shared Object Symbol Command Overhead
+ * ============= ============== ======= ========
+ * perf [.] cmd_record perf 10.00%
+ * libc [.] free perf 10.00%
+ * bash [.] main bash 10.00%
+ * perf [.] main perf 20.00%
+ * libc [.] malloc perf 10.00%
+ * [kernel] [k] page_fault bash 10.00%
+ * [kernel] [k] page_fault perf 10.00%
+ * [kernel] [k] schedule perf 10.00%
+ * bash [.] xmalloc bash 10.00%
+ */
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ hists__collapse_resort(hists, NULL);
+ perf_evsel__output_resort(evsel, NULL);
+
+ if (verbose > 2) {
+ pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+ print_hists_out(hists);
+ }
+
+ root = &hists->entries;
+ node = rb_first(root);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "perf") && !strcmp(SYM(he), "cmd_record") &&
+ !strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "libc") && !strcmp(SYM(he), "free") &&
+ !strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "bash") && !strcmp(SYM(he), "main") &&
+ !strcmp(COMM(he), "bash") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "perf") && !strcmp(SYM(he), "main") &&
+ !strcmp(COMM(he), "perf") && he->stat.period == 200);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "libc") && !strcmp(SYM(he), "malloc") &&
+ !strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "[kernel]") && !strcmp(SYM(he), "page_fault") &&
+ !strcmp(COMM(he), "bash") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "[kernel]") && !strcmp(SYM(he), "page_fault") &&
+ !strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "[kernel]") && !strcmp(SYM(he), "schedule") &&
+ !strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "bash") && !strcmp(SYM(he), "xmalloc") &&
+ !strcmp(COMM(he), "bash") && he->stat.period == 100);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* full sort keys w/o overhead field */
+static int test5(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = evsel__hists(evsel);
+ struct hist_entry *he;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ field_order = "cpu,pid,comm,dso,sym";
+ sort_order = "dso,pid";
+
+ setup_sorting(NULL);
+
+ /*
+ * expected output:
+ *
+ * CPU Command: Pid Command Shared Object Symbol
+ * === ============= ======= ============= ==============
+ * 0 perf: 100 perf [kernel] [k] schedule
+ * 2 perf: 200 perf [kernel] [k] page_fault
+ * 1 bash: 300 bash [kernel] [k] page_fault
+ * 0 bash: 300 bash bash [.] xmalloc
+ * 3 bash: 300 bash bash [.] main
+ * 1 perf: 100 perf libc [.] malloc
+ * 2 perf: 100 perf libc [.] free
+ * 1 perf: 100 perf perf [.] cmd_record
+ * 1 perf: 100 perf perf [.] main
+ * 2 perf: 200 perf perf [.] main
+ */
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ hists__collapse_resort(hists, NULL);
+ perf_evsel__output_resort(evsel, NULL);
+
+ if (verbose > 2) {
+ pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+ print_hists_out(hists);
+ }
+
+ root = &hists->entries;
+ node = rb_first(root);
+ he = rb_entry(node, struct hist_entry, rb_node);
+
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 0 && PID(he) == 100 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+ !strcmp(SYM(he), "schedule") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 2 && PID(he) == 200 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+ !strcmp(SYM(he), "page_fault") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 1 && PID(he) == 300 &&
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "[kernel]") &&
+ !strcmp(SYM(he), "page_fault") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 0 && PID(he) == 300 &&
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+ !strcmp(SYM(he), "xmalloc") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 3 && PID(he) == 300 &&
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+ !strcmp(SYM(he), "main") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 1 && PID(he) == 100 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+ !strcmp(SYM(he), "malloc") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 2 && PID(he) == 100 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+ !strcmp(SYM(he), "free") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 1 && PID(he) == 100 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+ !strcmp(SYM(he), "cmd_record") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 1 && PID(he) == 100 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+ !strcmp(SYM(he), "main") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 2 && PID(he) == 200 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+ !strcmp(SYM(he), "main") && he->stat.period == 100);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+int test__hists_output(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err = TEST_FAIL;
+ struct machines machines;
+ struct machine *machine;
+ struct perf_evsel *evsel;
+ struct perf_evlist *evlist = perf_evlist__new();
+ size_t i;
+ test_fn_t testcases[] = {
+ test1,
+ test2,
+ test3,
+ test4,
+ test5,
+ };
+
+ TEST_ASSERT_VAL("No memory", evlist);
+
+ err = parse_events(evlist, "cpu-clock", NULL);
+ if (err)
+ goto out;
+ err = TEST_FAIL;
+
+ machines__init(&machines);
+
+ /* setup threads/dso/map/symbols also */
+ machine = setup_fake_machine(&machines);
+ if (!machine)
+ goto out;
+
+ if (verbose > 1)
+ machine__fprintf(machine, stderr);
+
+ evsel = perf_evlist__first(evlist);
+
+ for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+ err = testcases[i](evsel, machine);
+ if (err < 0)
+ break;
+ }
+
+out:
+ /* tear down everything */
+ perf_evlist__delete(evlist);
+ machines__exit(&machines);
+
+ return err;
+}
diff --git a/tools/perf/tests/is_printable_array.c b/tools/perf/tests/is_printable_array.c
new file mode 100644
index 000000000..9c7b3baca
--- /dev/null
+++ b/tools/perf/tests/is_printable_array.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include "tests.h"
+#include "debug.h"
+#include "print_binary.h"
+
+int test__is_printable_array(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ char buf1[] = { 'k', 'r', 4, 'v', 'a', 0 };
+ char buf2[] = { 'k', 'r', 'a', 'v', 4, 0 };
+ struct {
+ char *buf;
+ unsigned int len;
+ int ret;
+ } t[] = {
+ { (char *) "krava", sizeof("krava"), 1 },
+ { (char *) "krava", sizeof("krava") - 1, 0 },
+ { (char *) "", sizeof(""), 1 },
+ { (char *) "", 0, 0 },
+ { NULL, 0, 0 },
+ { buf1, sizeof(buf1), 0 },
+ { buf2, sizeof(buf2), 0 },
+ };
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(t); i++) {
+ int ret;
+
+ ret = is_printable_array((char *) t[i].buf, t[i].len);
+ if (ret != t[i].ret) {
+ pr_err("failed: test %u\n", i);
+ return TEST_FAIL;
+ }
+ }
+
+ return TEST_OK;
+}
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
new file mode 100644
index 000000000..17c46f3e6
--- /dev/null
+++ b/tools/perf/tests/keep-tracking.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <unistd.h>
+#include <sys/prctl.h>
+
+#include "parse-events.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "tests.h"
+
+#define CHECK__(x) { \
+ while ((x) < 0) { \
+ pr_debug(#x " failed!\n"); \
+ goto out_err; \
+ } \
+}
+
+#define CHECK_NOT_NULL__(x) { \
+ while ((x) == NULL) { \
+ pr_debug(#x " failed!\n"); \
+ goto out_err; \
+ } \
+}
+
+static int find_comm(struct perf_evlist *evlist, const char *comm)
+{
+ union perf_event *event;
+ struct perf_mmap *md;
+ int i, found;
+
+ found = 0;
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ md = &evlist->mmap[i];
+ if (perf_mmap__read_init(md) < 0)
+ continue;
+ while ((event = perf_mmap__read_event(md)) != NULL) {
+ if (event->header.type == PERF_RECORD_COMM &&
+ (pid_t)event->comm.pid == getpid() &&
+ (pid_t)event->comm.tid == getpid() &&
+ strcmp(event->comm.comm, comm) == 0)
+ found += 1;
+ perf_mmap__consume(md);
+ }
+ perf_mmap__read_done(md);
+ }
+ return found;
+}
+
+/**
+ * test__keep_tracking - test using a dummy software event to keep tracking.
+ *
+ * This function implements a test that checks that tracking events continue
+ * when an event is disabled but a dummy software event is not disabled. If the
+ * test passes %0 is returned, otherwise %-1 is returned.
+ */
+int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct record_opts opts = {
+ .mmap_pages = UINT_MAX,
+ .user_freq = UINT_MAX,
+ .user_interval = ULLONG_MAX,
+ .target = {
+ .uses_mmap = true,
+ },
+ };
+ struct thread_map *threads = NULL;
+ struct cpu_map *cpus = NULL;
+ struct perf_evlist *evlist = NULL;
+ struct perf_evsel *evsel = NULL;
+ int found, err = -1;
+ const char *comm;
+
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
+ CHECK_NOT_NULL__(threads);
+
+ cpus = cpu_map__new(NULL);
+ CHECK_NOT_NULL__(cpus);
+
+ evlist = perf_evlist__new();
+ CHECK_NOT_NULL__(evlist);
+
+ perf_evlist__set_maps(evlist, cpus, threads);
+
+ CHECK__(parse_events(evlist, "dummy:u", NULL));
+ CHECK__(parse_events(evlist, "cycles:u", NULL));
+
+ perf_evlist__config(evlist, &opts, NULL);
+
+ evsel = perf_evlist__first(evlist);
+
+ evsel->attr.comm = 1;
+ evsel->attr.disabled = 1;
+ evsel->attr.enable_on_exec = 0;
+
+ if (perf_evlist__open(evlist) < 0) {
+ pr_debug("Unable to open dummy and cycles event\n");
+ err = TEST_SKIP;
+ goto out_err;
+ }
+
+ CHECK__(perf_evlist__mmap(evlist, UINT_MAX));
+
+ /*
+ * First, test that a 'comm' event can be found when the event is
+ * enabled.
+ */
+
+ perf_evlist__enable(evlist);
+
+ comm = "Test COMM 1";
+ CHECK__(prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0));
+
+ perf_evlist__disable(evlist);
+
+ found = find_comm(evlist, comm);
+ if (found != 1) {
+ pr_debug("First time, failed to find tracking event.\n");
+ goto out_err;
+ }
+
+ /*
+ * Secondly, test that a 'comm' event can be found when the event is
+ * disabled with the dummy event still enabled.
+ */
+
+ perf_evlist__enable(evlist);
+
+ evsel = perf_evlist__last(evlist);
+
+ CHECK__(perf_evsel__disable(evsel));
+
+ comm = "Test COMM 2";
+ CHECK__(prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0));
+
+ perf_evlist__disable(evlist);
+
+ found = find_comm(evlist, comm);
+ if (found != 1) {
+ pr_debug("Seconf time, failed to find tracking event.\n");
+ goto out_err;
+ }
+
+ err = 0;
+
+out_err:
+ if (evlist) {
+ perf_evlist__disable(evlist);
+ perf_evlist__delete(evlist);
+ } else {
+ cpu_map__put(cpus);
+ thread_map__put(threads);
+ }
+
+ return err;
+}
diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
new file mode 100644
index 000000000..0579a70bb
--- /dev/null
+++ b/tools/perf/tests/kmod-path.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <stdlib.h>
+#include "tests.h"
+#include "dso.h"
+#include "debug.h"
+
+static int test(const char *path, bool alloc_name, bool kmod,
+ int comp, const char *name)
+{
+ struct kmod_path m;
+
+ memset(&m, 0x0, sizeof(m));
+
+ TEST_ASSERT_VAL("kmod_path__parse",
+ !__kmod_path__parse(&m, path, alloc_name));
+
+ pr_debug("%s - alloc name %d, kmod %d, comp %d, name '%s'\n",
+ path, alloc_name, m.kmod, m.comp, m.name);
+
+ TEST_ASSERT_VAL("wrong kmod", m.kmod == kmod);
+ TEST_ASSERT_VAL("wrong comp", m.comp == comp);
+
+ if (name)
+ TEST_ASSERT_VAL("wrong name", m.name && !strcmp(name, m.name));
+ else
+ TEST_ASSERT_VAL("wrong name", !m.name);
+
+ free(m.name);
+ return 0;
+}
+
+static int test_is_kernel_module(const char *path, int cpumode, bool expect)
+{
+ TEST_ASSERT_VAL("is_kernel_module",
+ (!!is_kernel_module(path, cpumode)) == (!!expect));
+ pr_debug("%s (cpumode: %d) - is_kernel_module: %s\n",
+ path, cpumode, expect ? "true" : "false");
+ return 0;
+}
+
+#define T(path, an, k, c, n) \
+ TEST_ASSERT_VAL("failed", !test(path, an, k, c, n))
+
+#define M(path, c, e) \
+ TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
+
+int test__kmod_path__parse(struct test *t __maybe_unused, int subtest __maybe_unused)
+{
+ /* path alloc_name kmod comp name */
+ T("/xxxx/xxxx/x-x.ko", true , true, 0 , "[x_x]");
+ T("/xxxx/xxxx/x-x.ko", false , true, 0 , NULL );
+ T("/xxxx/xxxx/x-x.ko", true , true, 0 , "[x_x]");
+ T("/xxxx/xxxx/x-x.ko", false , true, 0 , NULL );
+ M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+ M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_KERNEL, true);
+ M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_USER, false);
+
+#ifdef HAVE_ZLIB_SUPPORT
+ /* path alloc_name kmod comp name */
+ T("/xxxx/xxxx/x.ko.gz", true , true, 1 , "[x]");
+ T("/xxxx/xxxx/x.ko.gz", false , true, 1 , NULL );
+ T("/xxxx/xxxx/x.ko.gz", true , true, 1 , "[x]");
+ T("/xxxx/xxxx/x.ko.gz", false , true, 1 , NULL );
+ M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+ M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
+ M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_USER, false);
+
+ /* path alloc_name kmod comp name */
+ T("/xxxx/xxxx/x.gz", true , false, 1 , "x.gz");
+ T("/xxxx/xxxx/x.gz", false , false, 1 , NULL );
+ T("/xxxx/xxxx/x.gz", true , false, 1 , "x.gz");
+ T("/xxxx/xxxx/x.gz", false , false, 1 , NULL );
+ M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+ M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_KERNEL, false);
+ M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_USER, false);
+
+ /* path alloc_name kmod comp name */
+ T("x.gz", true , false, 1 , "x.gz");
+ T("x.gz", false , false, 1 , NULL );
+ T("x.gz", true , false, 1 , "x.gz");
+ T("x.gz", false , false, 1 , NULL );
+ M("x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+ M("x.gz", PERF_RECORD_MISC_KERNEL, false);
+ M("x.gz", PERF_RECORD_MISC_USER, false);
+
+ /* path alloc_name kmod comp name */
+ T("x.ko.gz", true , true, 1 , "[x]");
+ T("x.ko.gz", false , true, 1 , NULL );
+ T("x.ko.gz", true , true, 1 , "[x]");
+ T("x.ko.gz", false , true, 1 , NULL );
+ M("x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+ M("x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
+ M("x.ko.gz", PERF_RECORD_MISC_USER, false);
+#endif
+
+ /* path alloc_name kmod comp name */
+ T("[test_module]", true , true, false, "[test_module]");
+ T("[test_module]", false , true, false, NULL );
+ T("[test_module]", true , true, false, "[test_module]");
+ T("[test_module]", false , true, false, NULL );
+ M("[test_module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+ M("[test_module]", PERF_RECORD_MISC_KERNEL, true);
+ M("[test_module]", PERF_RECORD_MISC_USER, false);
+
+ /* path alloc_name kmod comp name */
+ T("[test.module]", true , true, false, "[test.module]");
+ T("[test.module]", false , true, false, NULL );
+ T("[test.module]", true , true, false, "[test.module]");
+ T("[test.module]", false , true, false, NULL );
+ M("[test.module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+ M("[test.module]", PERF_RECORD_MISC_KERNEL, true);
+ M("[test.module]", PERF_RECORD_MISC_USER, false);
+
+ /* path alloc_name kmod comp name */
+ T("[vdso]", true , false, false, "[vdso]");
+ T("[vdso]", false , false, false, NULL );
+ T("[vdso]", true , false, false, "[vdso]");
+ T("[vdso]", false , false, false, NULL );
+ M("[vdso]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+ M("[vdso]", PERF_RECORD_MISC_KERNEL, false);
+ M("[vdso]", PERF_RECORD_MISC_USER, false);
+
+ T("[vdso32]", true , false, false, "[vdso32]");
+ T("[vdso32]", false , false, false, NULL );
+ T("[vdso32]", true , false, false, "[vdso32]");
+ T("[vdso32]", false , false, false, NULL );
+ M("[vdso32]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+ M("[vdso32]", PERF_RECORD_MISC_KERNEL, false);
+ M("[vdso32]", PERF_RECORD_MISC_USER, false);
+
+ T("[vdsox32]", true , false, false, "[vdsox32]");
+ T("[vdsox32]", false , false, false, NULL );
+ T("[vdsox32]", true , false, false, "[vdsox32]");
+ T("[vdsox32]", false , false, false, NULL );
+ M("[vdsox32]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+ M("[vdsox32]", PERF_RECORD_MISC_KERNEL, false);
+ M("[vdsox32]", PERF_RECORD_MISC_USER, false);
+
+ /* path alloc_name kmod comp name */
+ T("[vsyscall]", true , false, false, "[vsyscall]");
+ T("[vsyscall]", false , false, false, NULL );
+ T("[vsyscall]", true , false, false, "[vsyscall]");
+ T("[vsyscall]", false , false, false, NULL );
+ M("[vsyscall]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+ M("[vsyscall]", PERF_RECORD_MISC_KERNEL, false);
+ M("[vsyscall]", PERF_RECORD_MISC_USER, false);
+
+ /* path alloc_name kmod comp name */
+ T("[kernel.kallsyms]", true , false, false, "[kernel.kallsyms]");
+ T("[kernel.kallsyms]", false , false, false, NULL );
+ T("[kernel.kallsyms]", true , false, false, "[kernel.kallsyms]");
+ T("[kernel.kallsyms]", false , false, false, NULL );
+ M("[kernel.kallsyms]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+ M("[kernel.kallsyms]", PERF_RECORD_MISC_KERNEL, false);
+ M("[kernel.kallsyms]", PERF_RECORD_MISC_USER, false);
+
+ return 0;
+}
diff --git a/tools/perf/tests/llvm.c b/tools/perf/tests/llvm.c
new file mode 100644
index 000000000..a039f9319
--- /dev/null
+++ b/tools/perf/tests/llvm.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <bpf/libbpf.h>
+#include <util/llvm-utils.h>
+#include <util/cache.h>
+#include "llvm.h"
+#include "tests.h"
+#include "debug.h"
+#include "util.h"
+
+#ifdef HAVE_LIBBPF_SUPPORT
+static int test__bpf_parsing(void *obj_buf, size_t obj_buf_sz)
+{
+ struct bpf_object *obj;
+
+ obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, NULL);
+ if (libbpf_get_error(obj))
+ return TEST_FAIL;
+ bpf_object__close(obj);
+ return TEST_OK;
+}
+#else
+static int test__bpf_parsing(void *obj_buf __maybe_unused,
+ size_t obj_buf_sz __maybe_unused)
+{
+ pr_debug("Skip bpf parsing\n");
+ return TEST_OK;
+}
+#endif
+
+static struct {
+ const char *source;
+ const char *desc;
+ bool should_load_fail;
+} bpf_source_table[__LLVM_TESTCASE_MAX] = {
+ [LLVM_TESTCASE_BASE] = {
+ .source = test_llvm__bpf_base_prog,
+ .desc = "Basic BPF llvm compile",
+ },
+ [LLVM_TESTCASE_KBUILD] = {
+ .source = test_llvm__bpf_test_kbuild_prog,
+ .desc = "kbuild searching",
+ },
+ [LLVM_TESTCASE_BPF_PROLOGUE] = {
+ .source = test_llvm__bpf_test_prologue_prog,
+ .desc = "Compile source for BPF prologue generation",
+ },
+ [LLVM_TESTCASE_BPF_RELOCATION] = {
+ .source = test_llvm__bpf_test_relocation,
+ .desc = "Compile source for BPF relocation",
+ .should_load_fail = true,
+ },
+};
+
+int
+test_llvm__fetch_bpf_obj(void **p_obj_buf,
+ size_t *p_obj_buf_sz,
+ enum test_llvm__testcase idx,
+ bool force,
+ bool *should_load_fail)
+{
+ const char *source;
+ const char *desc;
+ const char *tmpl_old, *clang_opt_old;
+ char *tmpl_new = NULL, *clang_opt_new = NULL;
+ int err, old_verbose, ret = TEST_FAIL;
+
+ if (idx >= __LLVM_TESTCASE_MAX)
+ return TEST_FAIL;
+
+ source = bpf_source_table[idx].source;
+ desc = bpf_source_table[idx].desc;
+ if (should_load_fail)
+ *should_load_fail = bpf_source_table[idx].should_load_fail;
+
+ /*
+ * Skip this test if user's .perfconfig doesn't set [llvm] section
+ * and clang is not found in $PATH, and this is not perf test -v
+ */
+ if (!force && (verbose <= 0 &&
+ !llvm_param.user_set_param &&
+ llvm__search_clang())) {
+ pr_debug("No clang and no verbosive, skip this test\n");
+ return TEST_SKIP;
+ }
+
+ /*
+ * llvm is verbosity when error. Suppress all error output if
+ * not 'perf test -v'.
+ */
+ old_verbose = verbose;
+ if (verbose == 0)
+ verbose = -1;
+
+ *p_obj_buf = NULL;
+ *p_obj_buf_sz = 0;
+
+ if (!llvm_param.clang_bpf_cmd_template)
+ goto out;
+
+ if (!llvm_param.clang_opt)
+ llvm_param.clang_opt = strdup("");
+
+ err = asprintf(&tmpl_new, "echo '%s' | %s%s", source,
+ llvm_param.clang_bpf_cmd_template,
+ old_verbose ? "" : " 2>/dev/null");
+ if (err < 0)
+ goto out;
+ err = asprintf(&clang_opt_new, "-xc %s", llvm_param.clang_opt);
+ if (err < 0)
+ goto out;
+
+ tmpl_old = llvm_param.clang_bpf_cmd_template;
+ llvm_param.clang_bpf_cmd_template = tmpl_new;
+ clang_opt_old = llvm_param.clang_opt;
+ llvm_param.clang_opt = clang_opt_new;
+
+ err = llvm__compile_bpf("-", p_obj_buf, p_obj_buf_sz);
+
+ llvm_param.clang_bpf_cmd_template = tmpl_old;
+ llvm_param.clang_opt = clang_opt_old;
+
+ verbose = old_verbose;
+ if (err)
+ goto out;
+
+ ret = TEST_OK;
+out:
+ free(tmpl_new);
+ free(clang_opt_new);
+ if (ret != TEST_OK)
+ pr_debug("Failed to compile test case: '%s'\n", desc);
+ return ret;
+}
+
+int test__llvm(struct test *test __maybe_unused, int subtest)
+{
+ int ret;
+ void *obj_buf = NULL;
+ size_t obj_buf_sz = 0;
+ bool should_load_fail = false;
+
+ if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
+ return TEST_FAIL;
+
+ ret = test_llvm__fetch_bpf_obj(&obj_buf, &obj_buf_sz,
+ subtest, false, &should_load_fail);
+
+ if (ret == TEST_OK && !should_load_fail) {
+ ret = test__bpf_parsing(obj_buf, obj_buf_sz);
+ if (ret != TEST_OK) {
+ pr_debug("Failed to parse test case '%s'\n",
+ bpf_source_table[subtest].desc);
+ }
+ }
+ free(obj_buf);
+
+ return ret;
+}
+
+int test__llvm_subtest_get_nr(void)
+{
+ return __LLVM_TESTCASE_MAX;
+}
+
+const char *test__llvm_subtest_get_desc(int subtest)
+{
+ if ((subtest < 0) || (subtest >= __LLVM_TESTCASE_MAX))
+ return NULL;
+
+ return bpf_source_table[subtest].desc;
+}
diff --git a/tools/perf/tests/llvm.h b/tools/perf/tests/llvm.h
new file mode 100644
index 000000000..f68b0d9b8
--- /dev/null
+++ b/tools/perf/tests/llvm.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_TEST_LLVM_H
+#define PERF_TEST_LLVM_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h> /* for size_t */
+#include <stdbool.h> /* for bool */
+
+extern const char test_llvm__bpf_base_prog[];
+extern const char test_llvm__bpf_test_kbuild_prog[];
+extern const char test_llvm__bpf_test_prologue_prog[];
+extern const char test_llvm__bpf_test_relocation[];
+
+enum test_llvm__testcase {
+ LLVM_TESTCASE_BASE,
+ LLVM_TESTCASE_KBUILD,
+ LLVM_TESTCASE_BPF_PROLOGUE,
+ LLVM_TESTCASE_BPF_RELOCATION,
+ __LLVM_TESTCASE_MAX,
+};
+
+int test_llvm__fetch_bpf_obj(void **p_obj_buf, size_t *p_obj_buf_sz,
+ enum test_llvm__testcase index, bool force,
+ bool *should_load_fail);
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
new file mode 100644
index 000000000..e46723568
--- /dev/null
+++ b/tools/perf/tests/make
@@ -0,0 +1,368 @@
+include ../scripts/Makefile.include
+
+ifndef MK
+ifeq ($(MAKECMDGOALS),)
+# no target specified, trigger the whole suite
+all:
+ @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile
+ @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf SET_PARALLEL=1 SET_O=1
+else
+# run only specific test over 'Makefile'
+%:
+ @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile $@
+endif
+else
+PERF := .
+PERF_O := $(PERF)
+O_OPT :=
+FULL_O := $(shell readlink -f $(PERF_O) || echo $(PERF_O))
+
+ifneq ($(O),)
+ FULL_O := $(shell readlink -f $(O) || echo $(O))
+ PERF_O := $(FULL_O)
+ ifeq ($(SET_O),1)
+ O_OPT := 'O=$(FULL_O)'
+ endif
+ K_O_OPT := 'O=$(FULL_O)'
+endif
+
+PARALLEL_OPT=
+ifeq ($(SET_PARALLEL),1)
+ cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+ ifeq ($(cores),0)
+ cores := 1
+ endif
+ PARALLEL_OPT="-j$(cores)"
+endif
+
+# As per kernel Makefile, avoid funny character set dependencies
+unexport LC_ALL
+LC_COLLATE=C
+LC_NUMERIC=C
+export LC_COLLATE LC_NUMERIC
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+include $(srctree)/tools/scripts/Makefile.arch
+
+# FIXME looks like x86 is the only arch running tests ;-)
+# we need some IS_(32/64) flag to make this generic
+ifeq ($(ARCH)$(IS_64_BIT), x861)
+lib = lib64
+else
+lib = lib
+endif
+
+has = $(shell which $1 2>/dev/null)
+
+# standard single make variable specified
+make_clean_all := clean all
+make_python_perf_so := python/perf.so
+make_debug := DEBUG=1
+make_no_libperl := NO_LIBPERL=1
+make_no_libpython := NO_LIBPYTHON=1
+make_no_scripts := NO_LIBPYTHON=1 NO_LIBPERL=1
+make_no_newt := NO_NEWT=1
+make_no_slang := NO_SLANG=1
+make_no_gtk2 := NO_GTK2=1
+make_no_ui := NO_NEWT=1 NO_SLANG=1 NO_GTK2=1
+make_no_demangle := NO_DEMANGLE=1
+make_no_libelf := NO_LIBELF=1
+make_no_libunwind := NO_LIBUNWIND=1
+make_no_libdw_dwarf_unwind := NO_LIBDW_DWARF_UNWIND=1
+make_no_backtrace := NO_BACKTRACE=1
+make_no_libnuma := NO_LIBNUMA=1
+make_no_libaudit := NO_LIBAUDIT=1
+make_no_libbionic := NO_LIBBIONIC=1
+make_no_auxtrace := NO_AUXTRACE=1
+make_no_libbpf := NO_LIBBPF=1
+make_no_libcrypto := NO_LIBCRYPTO=1
+make_with_babeltrace:= LIBBABELTRACE=1
+make_no_sdt := NO_SDT=1
+make_with_clangllvm := LIBCLANGLLVM=1
+make_tags := tags
+make_cscope := cscope
+make_help := help
+make_doc := doc
+make_perf_o := perf.o
+make_util_map_o := util/map.o
+make_util_pmu_bison_o := util/pmu-bison.o
+make_install := install
+make_install_bin := install-bin
+make_install_doc := install-doc
+make_install_man := install-man
+make_install_html := install-html
+make_install_info := install-info
+make_install_pdf := install-pdf
+make_install_prefix := install prefix=/tmp/krava
+make_install_prefix_slash := install prefix=/tmp/krava/
+make_static := LDFLAGS=-static
+
+# all the NO_* variable combined
+make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
+make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
+make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
+make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
+make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1
+
+# $(run) contains all available tests
+run := make_pure
+# Targets 'clean all' can be run together only through top level
+# Makefile because we detect clean target in Makefile.perf and
+# disable features detection
+ifeq ($(MK),Makefile)
+run += make_clean_all
+MAKE_F := $(MAKE)
+else
+MAKE_F := $(MAKE) -f $(MK)
+endif
+run += make_python_perf_so
+run += make_debug
+run += make_no_libperl
+run += make_no_libpython
+run += make_no_scripts
+run += make_no_newt
+run += make_no_slang
+run += make_no_gtk2
+run += make_no_ui
+run += make_no_demangle
+run += make_no_libelf
+run += make_no_libunwind
+run += make_no_libdw_dwarf_unwind
+run += make_no_backtrace
+run += make_no_libnuma
+run += make_no_libaudit
+run += make_no_libbionic
+run += make_no_auxtrace
+run += make_no_libbpf
+run += make_with_babeltrace
+run += make_with_clangllvm
+run += make_help
+run += make_doc
+run += make_perf_o
+run += make_util_map_o
+run += make_util_pmu_bison_o
+run += make_install
+run += make_install_bin
+run += make_install_prefix
+run += make_install_prefix_slash
+# FIXME 'install-*' commented out till they're fixed
+# run += make_install_doc
+# run += make_install_man
+# run += make_install_html
+# run += make_install_info
+# run += make_install_pdf
+run += make_minimal
+run += make_static
+
+ifneq ($(call has,ctags),)
+run += make_tags
+endif
+ifneq ($(call has,cscope),)
+run += make_cscope
+endif
+
+# $(run_O) contains same portion of $(run) tests with '_O' attached
+# to distinguish O=... tests
+run_O := $(addsuffix _O,$(run))
+
+# disable some tests for O=...
+run_O := $(filter-out make_python_perf_so_O,$(run_O))
+
+# define test for each compile as 'test_NAME' variable
+# with the test itself as a value
+test_make_tags = test -f tags
+test_make_cscope = test -f cscope.out
+
+test_make_tags_O := $(test_make_tags)
+test_make_cscope_O := $(test_make_cscope)
+
+test_ok := true
+test_make_help := $(test_ok)
+test_make_doc := $(test_ok)
+test_make_help_O := $(test_ok)
+test_make_doc_O := $(test_ok)
+
+test_make_python_perf_so := test -f $(PERF_O)/python/perf.so
+
+test_make_perf_o := test -f $(PERF_O)/perf.o
+test_make_util_map_o := test -f $(PERF_O)/util/map.o
+test_make_util_pmu_bison_o := test -f $(PERF_O)/util/pmu-bison.o
+
+define test_dest_files
+ for file in $(1); do \
+ if [ ! -x $$TMP_DEST/$$file ]; then \
+ echo " failed to find: $$file"; \
+ fi \
+ done
+endef
+
+installed_files_bin := bin/perf
+installed_files_bin += etc/bash_completion.d/perf
+installed_files_bin += libexec/perf-core/perf-archive
+
+installed_files_plugins := $(lib)/traceevent/plugins/plugin_cfg80211.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_scsi.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_xen.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_function.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_sched_switch.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_mac80211.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_kvm.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_kmem.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_hrtimer.so
+installed_files_plugins += $(lib)/traceevent/plugins/plugin_jbd2.so
+
+installed_files_all := $(installed_files_bin)
+installed_files_all += $(installed_files_plugins)
+
+test_make_install := $(call test_dest_files,$(installed_files_all))
+test_make_install_O := $(call test_dest_files,$(installed_files_all))
+test_make_install_bin := $(call test_dest_files,$(installed_files_bin))
+test_make_install_bin_O := $(call test_dest_files,$(installed_files_bin))
+
+# We prefix all installed files for make_install_prefix(_slash)
+# with '/tmp/krava' to match installed/prefix-ed files.
+installed_files_all_prefix := $(addprefix /tmp/krava/,$(installed_files_all))
+test_make_install_prefix := $(call test_dest_files,$(installed_files_all_prefix))
+test_make_install_prefix_O := $(call test_dest_files,$(installed_files_all_prefix))
+
+test_make_install_prefix_slash := $(test_make_install_prefix)
+test_make_install_prefix_slash_O := $(test_make_install_prefix_O)
+
+# FIXME nothing gets installed
+test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1
+test_make_install_man_O := $(test_make_install_man)
+
+# FIXME nothing gets installed
+test_make_install_doc := $(test_ok)
+test_make_install_doc_O := $(test_ok)
+
+# FIXME nothing gets installed
+test_make_install_html := $(test_ok)
+test_make_install_html_O := $(test_ok)
+
+# FIXME nothing gets installed
+test_make_install_info := $(test_ok)
+test_make_install_info_O := $(test_ok)
+
+# FIXME nothing gets installed
+test_make_install_pdf := $(test_ok)
+test_make_install_pdf_O := $(test_ok)
+
+test_make_python_perf_so_O := test -f $$TMP_O/python/perf.so
+test_make_perf_o_O := test -f $$TMP_O/perf.o
+test_make_util_map_o_O := test -f $$TMP_O/util/map.o
+test_make_util_pmu_bison_o_O := test -f $$TMP_O/util/pmu-bison.o
+
+test_default = test -x $(PERF_O)/perf
+test = $(if $(test_$1),$(test_$1),$(test_default))
+
+test_default_O = test -x $$TMP_O/perf
+test_O = $(if $(test_$1),$(test_$1),$(test_default_O))
+
+all:
+
+ifdef SHUF
+run := $(shell shuf -e $(run))
+run_O := $(shell shuf -e $(run_O))
+endif
+
+max_width := $(shell echo $(run_O) | sed 's/ /\n/g' | wc -L)
+
+ifdef DEBUG
+d := $(info run $(run))
+d := $(info run_O $(run_O))
+endif
+
+MAKEFLAGS := --no-print-directory
+
+clean := @(cd $(PERF); $(MAKE_F) -s $(O_OPT) clean >/dev/null && $(MAKE) -s $(O_OPT) -C ../build clean >/dev/null)
+
+$(run):
+ $(call clean)
+ @TMP_DEST=$$(mktemp -d); \
+ cmd="cd $(PERF) && $(MAKE_F) $($@) $(PARALLEL_OPT) $(O_OPT) DESTDIR=$$TMP_DEST"; \
+ printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \
+ ( eval $$cmd ) >> $@ 2>&1; \
+ echo " test: $(call test,$@)" >> $@ 2>&1; \
+ $(call test,$@) && \
+ rm -rf $@ $$TMP_DEST || (cat $@ ; false)
+
+$(run_O):
+ $(call clean)
+ @TMP_O=$$(mktemp -d); \
+ TMP_DEST=$$(mktemp -d); \
+ cmd="cd $(PERF) && $(MAKE_F) $($(patsubst %_O,%,$@)) $(PARALLEL_OPT) O=$$TMP_O DESTDIR=$$TMP_DEST"; \
+ printf "%*.*s: %s\n" $(max_width) $(max_width) "$@" "$$cmd" && echo $$cmd > $@ && \
+ ( eval $$cmd ) >> $@ 2>&1 && \
+ echo " test: $(call test_O,$@)" >> $@ 2>&1; \
+ $(call test_O,$@) && \
+ rm -rf $@ $$TMP_O $$TMP_DEST || (cat $@ ; false)
+
+tarpkg:
+ @cmd="$(PERF)/tests/perf-targz-src-pkg $(PERF)"; \
+ echo "- $@: $$cmd" && echo $$cmd > $@ && \
+ ( eval $$cmd ) >> $@ 2>&1 && \
+ rm -f $@
+
+KERNEL_O := ../..
+ifneq ($(O),)
+ KERNEL_O := $(O)
+endif
+
+make_kernelsrc:
+ @echo "- make -C <kernelsrc> $(PARALLEL_OPT) $(K_O_OPT) tools/perf"
+ $(call clean); \
+ (make -C ../.. $(PARALLEL_OPT) $(K_O_OPT) tools/perf) > $@ 2>&1 && \
+ test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
+
+make_kernelsrc_tools:
+ @echo "- make -C <kernelsrc>/tools $(PARALLEL_OPT) $(K_O_OPT) perf"
+ $(call clean); \
+ (make -C ../../tools $(PARALLEL_OPT) $(K_O_OPT) perf) > $@ 2>&1 && \
+ test -x $(KERNEL_O)/tools/perf/perf && rm -f $@ || (cat $@ ; false)
+
+FEATURES_DUMP_FILE := $(FULL_O)/BUILD_TEST_FEATURE_DUMP
+FEATURES_DUMP_FILE_STATIC := $(FULL_O)/BUILD_TEST_FEATURE_DUMP_STATIC
+
+all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
+ @echo OK
+ @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC)
+
+out: $(run_O)
+ @echo OK
+ @rm -f $(FEATURES_DUMP_FILE) $(FEATURES_DUMP_FILE_STATIC)
+
+ifeq ($(REUSE_FEATURES_DUMP),1)
+$(FEATURES_DUMP_FILE):
+ $(call clean)
+ @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) feature-dump"; \
+ echo "- $@: $$cmd" && echo $$cmd && \
+ ( eval $$cmd ) > /dev/null 2>&1
+
+$(FEATURES_DUMP_FILE_STATIC):
+ $(call clean)
+ @cmd="cd $(PERF) && make FEATURE_DUMP_COPY=$@ $(O_OPT) LDFLAGS='-static' feature-dump"; \
+ echo "- $@: $$cmd" && echo $$cmd && \
+ ( eval $$cmd ) > /dev/null 2>&1
+
+# Add feature dump dependency for run/run_O targets
+$(foreach t,$(run) $(run_O),$(eval \
+ $(t): $(if $(findstring make_static,$(t)),\
+ $(FEATURES_DUMP_FILE_STATIC),\
+ $(FEATURES_DUMP_FILE))))
+
+# Append 'FEATURES_DUMP=' option to all test cases. For example:
+# make_no_libbpf: NO_LIBBPF=1 --> NO_LIBBPF=1 FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP
+# make_static: LDFLAGS=-static --> LDFLAGS=-static FEATURES_DUMP=/a/b/BUILD_TEST_FEATURE_DUMP_STATIC
+$(foreach t,$(run),$(if $(findstring make_static,$(t)),\
+ $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE_STATIC)),\
+ $(eval $(t) := $($(t)) FEATURES_DUMP=$(FEATURES_DUMP_FILE))))
+endif
+
+.PHONY: all $(run) $(run_O) tarpkg clean make_kernelsrc make_kernelsrc_tools
+endif # ifndef MK
diff --git a/tools/perf/tests/mem.c b/tools/perf/tests/mem.c
new file mode 100644
index 000000000..0f82ee9fd
--- /dev/null
+++ b/tools/perf/tests/mem.c
@@ -0,0 +1,56 @@
+#include "util/mem-events.h"
+#include "util/symbol.h"
+#include "linux/perf_event.h"
+#include "util/debug.h"
+#include "tests.h"
+#include <string.h>
+
+static int check(union perf_mem_data_src data_src,
+ const char *string)
+{
+ char out[100];
+ char failure[100];
+ struct mem_info mi = { .data_src = data_src };
+
+ int n;
+
+ n = perf_mem__snp_scnprintf(out, sizeof out, &mi);
+ n += perf_mem__lvl_scnprintf(out + n, sizeof out - n, &mi);
+ scnprintf(failure, sizeof failure, "unexpected %s", out);
+ TEST_ASSERT_VAL(failure, !strcmp(string, out));
+ return 0;
+}
+
+int test__mem(struct test *text __maybe_unused, int subtest __maybe_unused)
+{
+ int ret = 0;
+ union perf_mem_data_src src;
+
+ memset(&src, 0, sizeof(src));
+
+ src.mem_lvl = PERF_MEM_LVL_HIT;
+ src.mem_lvl_num = 4;
+
+ ret |= check(src, "N/AL4 hit");
+
+ src.mem_remote = 1;
+
+ ret |= check(src, "N/ARemote L4 hit");
+
+ src.mem_lvl = PERF_MEM_LVL_MISS;
+ src.mem_lvl_num = PERF_MEM_LVLNUM_PMEM;
+ src.mem_remote = 0;
+
+ ret |= check(src, "N/APMEM miss");
+
+ src.mem_remote = 1;
+
+ ret |= check(src, "N/ARemote PMEM miss");
+
+ src.mem_snoopx = PERF_MEM_SNOOPX_FWD;
+ src.mem_lvl_num = PERF_MEM_LVLNUM_RAM;
+
+ ret |= check(src , "FwdRemote RAM miss");
+
+ return ret;
+}
diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c
new file mode 100644
index 000000000..9e9e4d37c
--- /dev/null
+++ b/tools/perf/tests/mem2node.c
@@ -0,0 +1,73 @@
+#include <linux/compiler.h>
+#include <linux/bitmap.h>
+#include "cpumap.h"
+#include "mem2node.h"
+#include "tests.h"
+
+static struct node {
+ int node;
+ const char *map;
+} test_nodes[] = {
+ { .node = 0, .map = "0" },
+ { .node = 1, .map = "1-2" },
+ { .node = 3, .map = "5-7,9" },
+};
+
+#define T TEST_ASSERT_VAL
+
+static unsigned long *get_bitmap(const char *str, int nbits)
+{
+ struct cpu_map *map = cpu_map__new(str);
+ unsigned long *bm = NULL;
+ int i;
+
+ bm = bitmap_alloc(nbits);
+
+ if (map && bm) {
+ for (i = 0; i < map->nr; i++) {
+ set_bit(map->map[i], bm);
+ }
+ }
+
+ if (map)
+ cpu_map__put(map);
+ else
+ free(bm);
+
+ return bm && map ? bm : NULL;
+}
+
+int test__mem2node(struct test *t __maybe_unused, int subtest __maybe_unused)
+{
+ struct mem2node map;
+ struct memory_node nodes[3];
+ struct perf_env env = {
+ .memory_nodes = (struct memory_node *) &nodes[0],
+ .nr_memory_nodes = ARRAY_SIZE(nodes),
+ .memory_bsize = 0x100,
+ };
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(nodes); i++) {
+ nodes[i].node = test_nodes[i].node;
+ nodes[i].size = 10;
+
+ T("failed: alloc bitmap",
+ (nodes[i].set = get_bitmap(test_nodes[i].map, 10)));
+ }
+
+ T("failed: mem2node__init", !mem2node__init(&map, &env));
+ T("failed: mem2node__node", 0 == mem2node__node(&map, 0x50));
+ T("failed: mem2node__node", 1 == mem2node__node(&map, 0x100));
+ T("failed: mem2node__node", 1 == mem2node__node(&map, 0x250));
+ T("failed: mem2node__node", 3 == mem2node__node(&map, 0x500));
+ T("failed: mem2node__node", 3 == mem2node__node(&map, 0x650));
+ T("failed: mem2node__node", -1 == mem2node__node(&map, 0x450));
+ T("failed: mem2node__node", -1 == mem2node__node(&map, 0x1050));
+
+ for (i = 0; i < ARRAY_SIZE(nodes); i++)
+ free(nodes[i].set);
+
+ mem2node__exit(&map);
+ return 0;
+}
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
new file mode 100644
index 000000000..0919b0793
--- /dev/null
+++ b/tools/perf/tests/mmap-basic.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+/* For the CLR_() macros */
+#include <pthread.h>
+
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "tests.h"
+#include <linux/err.h>
+#include <linux/kernel.h>
+
+/*
+ * This test will generate random numbers of calls to some getpid syscalls,
+ * then establish an mmap for a group of events that are created to monitor
+ * the syscalls.
+ *
+ * It will receive the events, using mmap, use its PERF_SAMPLE_ID generated
+ * sample.id field to map back to its respective perf_evsel instance.
+ *
+ * Then it checks if the number of syscalls reported as perf events by
+ * the kernel corresponds to the number of syscalls made.
+ */
+int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err = -1;
+ union perf_event *event;
+ struct thread_map *threads;
+ struct cpu_map *cpus;
+ struct perf_evlist *evlist;
+ cpu_set_t cpu_set;
+ const char *syscall_names[] = { "getsid", "getppid", "getpgid", };
+ pid_t (*syscalls[])(void) = { (void *)getsid, getppid, (void*)getpgid };
+#define nsyscalls ARRAY_SIZE(syscall_names)
+ unsigned int nr_events[nsyscalls],
+ expected_nr_events[nsyscalls], i, j;
+ struct perf_evsel *evsels[nsyscalls], *evsel;
+ char sbuf[STRERR_BUFSIZE];
+ struct perf_mmap *md;
+
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
+ if (threads == NULL) {
+ pr_debug("thread_map__new\n");
+ return -1;
+ }
+
+ cpus = cpu_map__new(NULL);
+ if (cpus == NULL) {
+ pr_debug("cpu_map__new\n");
+ goto out_free_threads;
+ }
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(cpus->map[0], &cpu_set);
+ sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+ if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
+ pr_debug("sched_setaffinity() failed on CPU %d: %s ",
+ cpus->map[0], str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_free_cpus;
+ }
+
+ evlist = perf_evlist__new();
+ if (evlist == NULL) {
+ pr_debug("perf_evlist__new\n");
+ goto out_free_cpus;
+ }
+
+ perf_evlist__set_maps(evlist, cpus, threads);
+
+ for (i = 0; i < nsyscalls; ++i) {
+ char name[64];
+
+ snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
+ evsels[i] = perf_evsel__newtp("syscalls", name);
+ if (IS_ERR(evsels[i])) {
+ pr_debug("perf_evsel__new(%s)\n", name);
+ goto out_delete_evlist;
+ }
+
+ evsels[i]->attr.wakeup_events = 1;
+ perf_evsel__set_sample_id(evsels[i], false);
+
+ perf_evlist__add(evlist, evsels[i]);
+
+ if (perf_evsel__open(evsels[i], cpus, threads) < 0) {
+ pr_debug("failed to open counter: %s, "
+ "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ nr_events[i] = 0;
+ expected_nr_events[i] = 1 + rand() % 127;
+ }
+
+ if (perf_evlist__mmap(evlist, 128) < 0) {
+ pr_debug("failed to mmap events: %d (%s)\n", errno,
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ for (i = 0; i < nsyscalls; ++i)
+ for (j = 0; j < expected_nr_events[i]; ++j) {
+ int foo = syscalls[i]();
+ ++foo;
+ }
+
+ md = &evlist->mmap[0];
+ if (perf_mmap__read_init(md) < 0)
+ goto out_init;
+
+ while ((event = perf_mmap__read_event(md)) != NULL) {
+ struct perf_sample sample;
+
+ if (event->header.type != PERF_RECORD_SAMPLE) {
+ pr_debug("unexpected %s event\n",
+ perf_event__name(event->header.type));
+ goto out_delete_evlist;
+ }
+
+ err = perf_evlist__parse_sample(evlist, event, &sample);
+ if (err) {
+ pr_err("Can't parse sample, err = %d\n", err);
+ goto out_delete_evlist;
+ }
+
+ err = -1;
+ evsel = perf_evlist__id2evsel(evlist, sample.id);
+ if (evsel == NULL) {
+ pr_debug("event with id %" PRIu64
+ " doesn't map to an evsel\n", sample.id);
+ goto out_delete_evlist;
+ }
+ nr_events[evsel->idx]++;
+ perf_mmap__consume(md);
+ }
+ perf_mmap__read_done(md);
+
+out_init:
+ err = 0;
+ evlist__for_each_entry(evlist, evsel) {
+ if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
+ pr_debug("expected %d %s events, got %d\n",
+ expected_nr_events[evsel->idx],
+ perf_evsel__name(evsel), nr_events[evsel->idx]);
+ err = -1;
+ goto out_delete_evlist;
+ }
+ }
+
+out_delete_evlist:
+ perf_evlist__delete(evlist);
+ cpus = NULL;
+ threads = NULL;
+out_free_cpus:
+ cpu_map__put(cpus);
+out_free_threads:
+ thread_map__put(threads);
+ return err;
+}
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
new file mode 100644
index 000000000..7a9b123c7
--- /dev/null
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "debug.h"
+#include "tests.h"
+#include "machine.h"
+#include "thread_map.h"
+#include "symbol.h"
+#include "thread.h"
+#include "util.h"
+
+#define THREADS 4
+
+static int go_away;
+
+struct thread_data {
+ pthread_t pt;
+ pid_t tid;
+ void *map;
+ int ready[2];
+};
+
+static struct thread_data threads[THREADS];
+
+static int thread_init(struct thread_data *td)
+{
+ void *map;
+
+ map = mmap(NULL, page_size,
+ PROT_READ|PROT_WRITE|PROT_EXEC,
+ MAP_SHARED|MAP_ANONYMOUS, -1, 0);
+
+ if (map == MAP_FAILED) {
+ perror("mmap failed");
+ return -1;
+ }
+
+ td->map = map;
+ td->tid = syscall(SYS_gettid);
+
+ pr_debug("tid = %d, map = %p\n", td->tid, map);
+ return 0;
+}
+
+static void *thread_fn(void *arg)
+{
+ struct thread_data *td = arg;
+ ssize_t ret;
+ int go = 0;
+
+ if (thread_init(td))
+ return NULL;
+
+ /* Signal thread_create thread is initialized. */
+ ret = write(td->ready[1], &go, sizeof(int));
+ if (ret != sizeof(int)) {
+ pr_err("failed to notify\n");
+ return NULL;
+ }
+
+ while (!go_away) {
+ /* Waiting for main thread to kill us. */
+ usleep(100);
+ }
+
+ munmap(td->map, page_size);
+ return NULL;
+}
+
+static int thread_create(int i)
+{
+ struct thread_data *td = &threads[i];
+ int err, go;
+
+ if (pipe(td->ready))
+ return -1;
+
+ err = pthread_create(&td->pt, NULL, thread_fn, td);
+ if (!err) {
+ /* Wait for thread initialization. */
+ ssize_t ret = read(td->ready[0], &go, sizeof(int));
+ err = ret != sizeof(int);
+ }
+
+ close(td->ready[0]);
+ close(td->ready[1]);
+ return err;
+}
+
+static int threads_create(void)
+{
+ struct thread_data *td0 = &threads[0];
+ int i, err = 0;
+
+ go_away = 0;
+
+ /* 0 is main thread */
+ if (thread_init(td0))
+ return -1;
+
+ for (i = 1; !err && i < THREADS; i++)
+ err = thread_create(i);
+
+ return err;
+}
+
+static int threads_destroy(void)
+{
+ struct thread_data *td0 = &threads[0];
+ int i, err = 0;
+
+ /* cleanup the main thread */
+ munmap(td0->map, page_size);
+
+ go_away = 1;
+
+ for (i = 1; !err && i < THREADS; i++)
+ err = pthread_join(threads[i].pt, NULL);
+
+ return err;
+}
+
+typedef int (*synth_cb)(struct machine *machine);
+
+static int synth_all(struct machine *machine)
+{
+ return perf_event__synthesize_threads(NULL,
+ perf_event__process,
+ machine, 0, 500, 1);
+}
+
+static int synth_process(struct machine *machine)
+{
+ struct thread_map *map;
+ int err;
+
+ map = thread_map__new_by_pid(getpid());
+
+ err = perf_event__synthesize_thread_map(NULL, map,
+ perf_event__process,
+ machine, 0, 500);
+
+ thread_map__put(map);
+ return err;
+}
+
+static int mmap_events(synth_cb synth)
+{
+ struct machine *machine;
+ int err, i;
+
+ /*
+ * The threads_create will not return before all threads
+ * are spawned and all created memory map.
+ *
+ * They will loop until threads_destroy is called, so we
+ * can safely run synthesizing function.
+ */
+ TEST_ASSERT_VAL("failed to create threads", !threads_create());
+
+ machine = machine__new_host();
+
+ dump_trace = verbose > 1 ? 1 : 0;
+
+ err = synth(machine);
+
+ dump_trace = 0;
+
+ TEST_ASSERT_VAL("failed to destroy threads", !threads_destroy());
+ TEST_ASSERT_VAL("failed to synthesize maps", !err);
+
+ /*
+ * All data is synthesized, try to find map for each
+ * thread object.
+ */
+ for (i = 0; i < THREADS; i++) {
+ struct thread_data *td = &threads[i];
+ struct addr_location al;
+ struct thread *thread;
+
+ thread = machine__findnew_thread(machine, getpid(), td->tid);
+
+ pr_debug("looking for map %p\n", td->map);
+
+ thread__find_map(thread, PERF_RECORD_MISC_USER,
+ (unsigned long) (td->map + 1), &al);
+
+ thread__put(thread);
+
+ if (!al.map) {
+ pr_debug("failed, couldn't find map\n");
+ err = -1;
+ break;
+ }
+
+ pr_debug("map %p, addr %" PRIx64 "\n", al.map, al.map->start);
+ }
+
+ machine__delete_threads(machine);
+ machine__delete(machine);
+ return err;
+}
+
+/*
+ * This test creates 'THREADS' number of threads (including
+ * main thread) and each thread creates memory map.
+ *
+ * When threads are created, we synthesize them with both
+ * (separate tests):
+ * perf_event__synthesize_thread_map (process based)
+ * perf_event__synthesize_threads (global)
+ *
+ * We test we can find all memory maps via:
+ * thread__find_map
+ *
+ * by using all thread objects.
+ */
+int test__mmap_thread_lookup(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ /* perf_event__synthesize_threads synthesize */
+ TEST_ASSERT_VAL("failed with sythesizing all",
+ !mmap_events(synth_all));
+
+ /* perf_event__synthesize_thread_map synthesize */
+ TEST_ASSERT_VAL("failed with sythesizing process",
+ !mmap_events(synth_process));
+
+ return 0;
+}
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
new file mode 100644
index 000000000..493ecb611
--- /dev/null
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+/* For the CPU_* macros */
+#include <pthread.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <api/fs/fs.h>
+#include <linux/err.h>
+#include <api/fs/tracing_path.h>
+#include "evsel.h"
+#include "tests.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "debug.h"
+#include "stat.h"
+
+int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err = -1, fd, cpu;
+ struct cpu_map *cpus;
+ struct perf_evsel *evsel;
+ unsigned int nr_openat_calls = 111, i;
+ cpu_set_t cpu_set;
+ struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
+ char sbuf[STRERR_BUFSIZE];
+ char errbuf[BUFSIZ];
+
+ if (threads == NULL) {
+ pr_debug("thread_map__new\n");
+ return -1;
+ }
+
+ cpus = cpu_map__new(NULL);
+ if (cpus == NULL) {
+ pr_debug("cpu_map__new\n");
+ goto out_thread_map_delete;
+ }
+
+ CPU_ZERO(&cpu_set);
+
+ evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+ if (IS_ERR(evsel)) {
+ tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat");
+ pr_debug("%s\n", errbuf);
+ goto out_cpu_map_delete;
+ }
+
+ if (perf_evsel__open(evsel, cpus, threads) < 0) {
+ pr_debug("failed to open counter: %s, "
+ "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_evsel_delete;
+ }
+
+ for (cpu = 0; cpu < cpus->nr; ++cpu) {
+ unsigned int ncalls = nr_openat_calls + cpu;
+ /*
+ * XXX eventually lift this restriction in a way that
+ * keeps perf building on older glibc installations
+ * without CPU_ALLOC. 1024 cpus in 2010 still seems
+ * a reasonable upper limit tho :-)
+ */
+ if (cpus->map[cpu] >= CPU_SETSIZE) {
+ pr_debug("Ignoring CPU %d\n", cpus->map[cpu]);
+ continue;
+ }
+
+ CPU_SET(cpus->map[cpu], &cpu_set);
+ if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0) {
+ pr_debug("sched_setaffinity() failed on CPU %d: %s ",
+ cpus->map[cpu],
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_close_fd;
+ }
+ for (i = 0; i < ncalls; ++i) {
+ fd = openat(0, "/etc/passwd", O_RDONLY);
+ close(fd);
+ }
+ CPU_CLR(cpus->map[cpu], &cpu_set);
+ }
+
+ /*
+ * Here we need to explicitly preallocate the counts, as if
+ * we use the auto allocation it will allocate just for 1 cpu,
+ * as we start by cpu 0.
+ */
+ if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) {
+ pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
+ goto out_close_fd;
+ }
+
+ err = 0;
+
+ for (cpu = 0; cpu < cpus->nr; ++cpu) {
+ unsigned int expected;
+
+ if (cpus->map[cpu] >= CPU_SETSIZE)
+ continue;
+
+ if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
+ pr_debug("perf_evsel__read_on_cpu\n");
+ err = -1;
+ break;
+ }
+
+ expected = nr_openat_calls + cpu;
+ if (perf_counts(evsel->counts, cpu, 0)->val != expected) {
+ pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
+ expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val);
+ err = -1;
+ }
+ }
+
+ perf_evsel__free_counts(evsel);
+out_close_fd:
+ perf_evsel__close_fd(evsel);
+out_evsel_delete:
+ perf_evsel__delete(evsel);
+out_cpu_map_delete:
+ cpu_map__put(cpus);
+out_thread_map_delete:
+ thread_map__put(threads);
+ return err;
+}
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
new file mode 100644
index 000000000..344dc3ac2
--- /dev/null
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/err.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "perf.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "tests.h"
+#include "debug.h"
+#include <errno.h>
+
+#ifndef O_DIRECTORY
+#define O_DIRECTORY 00200000
+#endif
+#ifndef AT_FDCWD
+#define AT_FDCWD -100
+#endif
+
+int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct record_opts opts = {
+ .target = {
+ .uid = UINT_MAX,
+ .uses_mmap = true,
+ },
+ .no_buffering = true,
+ .freq = 1,
+ .mmap_pages = 256,
+ .raw_samples = true,
+ };
+ const char *filename = "/etc/passwd";
+ int flags = O_RDONLY | O_DIRECTORY;
+ struct perf_evlist *evlist = perf_evlist__new();
+ struct perf_evsel *evsel;
+ int err = -1, i, nr_events = 0, nr_polls = 0;
+ char sbuf[STRERR_BUFSIZE];
+
+ if (evlist == NULL) {
+ pr_debug("%s: perf_evlist__new\n", __func__);
+ goto out;
+ }
+
+ evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+ if (IS_ERR(evsel)) {
+ pr_debug("%s: perf_evsel__newtp\n", __func__);
+ goto out_delete_evlist;
+ }
+
+ perf_evlist__add(evlist, evsel);
+
+ err = perf_evlist__create_maps(evlist, &opts.target);
+ if (err < 0) {
+ pr_debug("%s: perf_evlist__create_maps\n", __func__);
+ goto out_delete_evlist;
+ }
+
+ perf_evsel__config(evsel, &opts, NULL);
+
+ thread_map__set_pid(evlist->threads, 0, getpid());
+
+ err = perf_evlist__open(evlist);
+ if (err < 0) {
+ pr_debug("perf_evlist__open: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ err = perf_evlist__mmap(evlist, UINT_MAX);
+ if (err < 0) {
+ pr_debug("perf_evlist__mmap: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ perf_evlist__enable(evlist);
+
+ /*
+ * Generate the event:
+ */
+ openat(AT_FDCWD, filename, flags);
+
+ while (1) {
+ int before = nr_events;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ union perf_event *event;
+ struct perf_mmap *md;
+
+ md = &evlist->mmap[i];
+ if (perf_mmap__read_init(md) < 0)
+ continue;
+
+ while ((event = perf_mmap__read_event(md)) != NULL) {
+ const u32 type = event->header.type;
+ int tp_flags;
+ struct perf_sample sample;
+
+ ++nr_events;
+
+ if (type != PERF_RECORD_SAMPLE) {
+ perf_mmap__consume(md);
+ continue;
+ }
+
+ err = perf_evsel__parse_sample(evsel, event, &sample);
+ if (err) {
+ pr_debug("Can't parse sample, err = %d\n", err);
+ goto out_delete_evlist;
+ }
+
+ tp_flags = perf_evsel__intval(evsel, &sample, "flags");
+
+ if (flags != tp_flags) {
+ pr_debug("%s: Expected flags=%#x, got %#x\n",
+ __func__, flags, tp_flags);
+ goto out_delete_evlist;
+ }
+
+ goto out_ok;
+ }
+ perf_mmap__read_done(md);
+ }
+
+ if (nr_events == before)
+ perf_evlist__poll(evlist, 10);
+
+ if (++nr_polls > 5) {
+ pr_debug("%s: no events!\n", __func__);
+ goto out_delete_evlist;
+ }
+ }
+out_ok:
+ err = 0;
+out_delete_evlist:
+ perf_evlist__delete(evlist);
+out:
+ return err;
+}
diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c
new file mode 100644
index 000000000..00cd63f90
--- /dev/null
+++ b/tools/perf/tests/openat-syscall.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <api/fs/tracing_path.h>
+#include <linux/err.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "thread_map.h"
+#include "evsel.h"
+#include "debug.h"
+#include "tests.h"
+
+int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err = -1, fd;
+ struct perf_evsel *evsel;
+ unsigned int nr_openat_calls = 111, i;
+ struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
+ char sbuf[STRERR_BUFSIZE];
+ char errbuf[BUFSIZ];
+
+ if (threads == NULL) {
+ pr_debug("thread_map__new\n");
+ return -1;
+ }
+
+ evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+ if (IS_ERR(evsel)) {
+ tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat");
+ pr_debug("%s\n", errbuf);
+ goto out_thread_map_delete;
+ }
+
+ if (perf_evsel__open_per_thread(evsel, threads) < 0) {
+ pr_debug("failed to open counter: %s, "
+ "tweak /proc/sys/kernel/perf_event_paranoid?\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_evsel_delete;
+ }
+
+ for (i = 0; i < nr_openat_calls; ++i) {
+ fd = openat(0, "/etc/passwd", O_RDONLY);
+ close(fd);
+ }
+
+ if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) {
+ pr_debug("perf_evsel__read_on_cpu\n");
+ goto out_close_fd;
+ }
+
+ if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) {
+ pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
+ nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val);
+ goto out_close_fd;
+ }
+
+ err = 0;
+out_close_fd:
+ perf_evsel__close_fd(evsel);
+out_evsel_delete:
+ perf_evsel__delete(evsel);
+out_thread_map_delete:
+ thread_map__put(threads);
+ return err;
+}
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
new file mode 100644
index 000000000..532c95e8f
--- /dev/null
+++ b/tools/perf/tests/parse-events.c
@@ -0,0 +1,1914 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "parse-events.h"
+#include "evsel.h"
+#include "evlist.h"
+#include <api/fs/fs.h>
+#include "tests.h"
+#include "debug.h"
+#include "util.h"
+#include <dirent.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/kernel.h>
+#include <linux/hw_breakpoint.h>
+#include <api/fs/tracing_path.h>
+
+#define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \
+ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD)
+
+#if defined(__s390x__)
+/* Return true if kvm module is available and loaded. Test this
+ * and retun success when trace point kvm_s390_create_vm
+ * exists. Otherwise this test always fails.
+ */
+static bool kvm_s390_create_vm_valid(void)
+{
+ char *eventfile;
+ bool rc = false;
+
+ eventfile = get_events_file("kvm-s390");
+
+ if (eventfile) {
+ DIR *mydir = opendir(eventfile);
+
+ if (mydir) {
+ rc = true;
+ closedir(mydir);
+ }
+ put_events_file(eventfile);
+ }
+
+ return rc;
+}
+#endif
+
+static int test__checkevent_tracepoint(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong sample_type",
+ PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type);
+ TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period);
+ return 0;
+}
+
+static int test__checkevent_tracepoint_multi(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
+ TEST_ASSERT_VAL("wrong number of groups", 0 == evlist->nr_groups);
+
+ evlist__for_each_entry(evlist, evsel) {
+ TEST_ASSERT_VAL("wrong type",
+ PERF_TYPE_TRACEPOINT == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong sample_type",
+ PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type);
+ TEST_ASSERT_VAL("wrong sample_period",
+ 1 == evsel->attr.sample_period);
+ }
+ return 0;
+}
+
+static int test__checkevent_raw(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 0x1a == evsel->attr.config);
+ return 0;
+}
+
+static int test__checkevent_numeric(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+ return 0;
+}
+
+static int test__checkevent_symbolic_name(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+ return 0;
+}
+
+static int test__checkevent_symbolic_name_config(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ /*
+ * The period value gets configured within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period",
+ 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong config1",
+ 0 == evsel->attr.config1);
+ TEST_ASSERT_VAL("wrong config2",
+ 1 == evsel->attr.config2);
+ return 0;
+}
+
+static int test__checkevent_symbolic_alias(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_SW_PAGE_FAULTS == evsel->attr.config);
+ return 0;
+}
+
+static int test__checkevent_genhw(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HW_CACHE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", (1 << 16) == evsel->attr.config);
+ return 0;
+}
+
+static int test__checkevent_breakpoint(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) ==
+ evsel->attr.bp_type);
+ TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_4 ==
+ evsel->attr.bp_len);
+ return 0;
+}
+
+static int test__checkevent_breakpoint_x(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong bp_type",
+ HW_BREAKPOINT_X == evsel->attr.bp_type);
+ TEST_ASSERT_VAL("wrong bp_len", sizeof(long) == evsel->attr.bp_len);
+ return 0;
+}
+
+static int test__checkevent_breakpoint_r(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type",
+ PERF_TYPE_BREAKPOINT == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong bp_type",
+ HW_BREAKPOINT_R == evsel->attr.bp_type);
+ TEST_ASSERT_VAL("wrong bp_len",
+ HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len);
+ return 0;
+}
+
+static int test__checkevent_breakpoint_w(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type",
+ PERF_TYPE_BREAKPOINT == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong bp_type",
+ HW_BREAKPOINT_W == evsel->attr.bp_type);
+ TEST_ASSERT_VAL("wrong bp_len",
+ HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len);
+ return 0;
+}
+
+static int test__checkevent_breakpoint_rw(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type",
+ PERF_TYPE_BREAKPOINT == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong bp_type",
+ (HW_BREAKPOINT_R|HW_BREAKPOINT_W) == evsel->attr.bp_type);
+ TEST_ASSERT_VAL("wrong bp_len",
+ HW_BREAKPOINT_LEN_4 == evsel->attr.bp_len);
+ return 0;
+}
+
+static int test__checkevent_tracepoint_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+ return test__checkevent_tracepoint(evlist);
+}
+
+static int
+test__checkevent_tracepoint_multi_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ TEST_ASSERT_VAL("wrong number of entries", evlist->nr_entries > 1);
+
+ evlist__for_each_entry(evlist, evsel) {
+ TEST_ASSERT_VAL("wrong exclude_user",
+ !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel",
+ evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ }
+
+ return test__checkevent_tracepoint_multi(evlist);
+}
+
+static int test__checkevent_raw_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+
+ return test__checkevent_raw(evlist);
+}
+
+static int test__checkevent_numeric_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+
+ return test__checkevent_numeric(evlist);
+}
+
+static int test__checkevent_symbolic_name_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+ return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__checkevent_exclude_host_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+
+ return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__checkevent_exclude_guest_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+
+ return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__checkevent_symbolic_alias_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+ return test__checkevent_symbolic_alias(evlist);
+}
+
+static int test__checkevent_genhw_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+
+ return test__checkevent_genhw(evlist);
+}
+
+static int test__checkevent_exclude_idle_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude idle", evsel->attr.exclude_idle);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+ return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__checkevent_exclude_idle_modifier_1(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude idle", evsel->attr.exclude_idle);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+ return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong name",
+ !strcmp(perf_evsel__name(evsel), "mem:0:u"));
+
+ return test__checkevent_breakpoint(evlist);
+}
+
+static int test__checkevent_breakpoint_x_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong name",
+ !strcmp(perf_evsel__name(evsel), "mem:0:x:k"));
+
+ return test__checkevent_breakpoint_x(evlist);
+}
+
+static int test__checkevent_breakpoint_r_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong name",
+ !strcmp(perf_evsel__name(evsel), "mem:0:r:hp"));
+
+ return test__checkevent_breakpoint_r(evlist);
+}
+
+static int test__checkevent_breakpoint_w_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong name",
+ !strcmp(perf_evsel__name(evsel), "mem:0:w:up"));
+
+ return test__checkevent_breakpoint_w(evlist);
+}
+
+static int test__checkevent_breakpoint_rw_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong name",
+ !strcmp(perf_evsel__name(evsel), "mem:0:rw:kp"));
+
+ return test__checkevent_breakpoint_rw(evlist);
+}
+
+static int test__checkevent_pmu(struct perf_evlist *evlist)
+{
+
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 10 == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong config1", 1 == evsel->attr.config1);
+ TEST_ASSERT_VAL("wrong config2", 3 == evsel->attr.config2);
+ /*
+ * The period value gets configured within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+
+ return 0;
+}
+
+static int test__checkevent_list(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
+
+ /* r1 */
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong config1", 0 == evsel->attr.config1);
+ TEST_ASSERT_VAL("wrong config2", 0 == evsel->attr.config2);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+ /* syscalls:sys_enter_openat:k */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong sample_type",
+ PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type);
+ TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+ /* 1:1:hp */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+
+ return 0;
+}
+
+static int test__checkevent_pmu_name(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ /* cpu/config=1,name=krava/u */
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava"));
+
+ /* cpu/config=2/u" */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong name",
+ !strcmp(perf_evsel__name(evsel), "cpu/config=2/u"));
+
+ return 0;
+}
+
+static int test__checkevent_pmu_partial_time_callgraph(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ /* cpu/config=1,call-graph=fp,time,period=100000/ */
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 1 == evsel->attr.config);
+ /*
+ * The period, time and callgraph value gets configured
+ * within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong callgraph", !evsel__has_callchain(evsel));
+ TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+ /* cpu/config=2,call-graph=no,time=0,period=2000/ */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 2 == evsel->attr.config);
+ /*
+ * The period, time and callgraph value gets configured
+ * within perf_evlist__config,
+ * while this test executes only parse events method.
+ */
+ TEST_ASSERT_VAL("wrong period", 0 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong callgraph", !evsel__has_callchain(evsel));
+ TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->attr.sample_type));
+
+ return 0;
+}
+
+static int test__checkevent_pmu_events(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong exclude_user",
+ !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel",
+ evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+ return 0;
+}
+
+
+static int test__checkevent_pmu_events_mix(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ /* pmu-event:u */
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong exclude_user",
+ !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel",
+ evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+ /* cpu/pmu-event/u*/
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong exclude_user",
+ !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel",
+ evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+ return 0;
+}
+
+static int test__checkterms_simple(struct list_head *terms)
+{
+ struct parse_events_term *term;
+
+ /* config=10 */
+ term = list_entry(terms->next, struct parse_events_term, list);
+ TEST_ASSERT_VAL("wrong type term",
+ term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG);
+ TEST_ASSERT_VAL("wrong type val",
+ term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ TEST_ASSERT_VAL("wrong val", term->val.num == 10);
+ TEST_ASSERT_VAL("wrong config", !term->config);
+
+ /* config1 */
+ term = list_entry(term->list.next, struct parse_events_term, list);
+ TEST_ASSERT_VAL("wrong type term",
+ term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG1);
+ TEST_ASSERT_VAL("wrong type val",
+ term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ TEST_ASSERT_VAL("wrong val", term->val.num == 1);
+ TEST_ASSERT_VAL("wrong config", !term->config);
+
+ /* config2=3 */
+ term = list_entry(term->list.next, struct parse_events_term, list);
+ TEST_ASSERT_VAL("wrong type term",
+ term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG2);
+ TEST_ASSERT_VAL("wrong type val",
+ term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ TEST_ASSERT_VAL("wrong val", term->val.num == 3);
+ TEST_ASSERT_VAL("wrong config", !term->config);
+
+ /* umask=1*/
+ term = list_entry(term->list.next, struct parse_events_term, list);
+ TEST_ASSERT_VAL("wrong type term",
+ term->type_term == PARSE_EVENTS__TERM_TYPE_USER);
+ TEST_ASSERT_VAL("wrong type val",
+ term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ TEST_ASSERT_VAL("wrong val", term->val.num == 1);
+ TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "umask"));
+
+ return 0;
+}
+
+static int test__group1(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+ /* instructions:k */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* cycles:upp */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ /* use of precise requires exclude_guest */
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ return 0;
+}
+
+static int test__group2(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+ /* faults + :ku modifier */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_SW_PAGE_FAULTS == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* cache-references + :u modifier */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CACHE_REFERENCES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* cycles:k */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ return 0;
+}
+
+static int test__group3(struct perf_evlist *evlist __maybe_unused)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
+
+ /* group1 syscalls:sys_enter_openat:H */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong sample_type",
+ PERF_TP_SAMPLE_TYPE == evsel->attr.sample_type);
+ TEST_ASSERT_VAL("wrong sample_period", 1 == evsel->attr.sample_period);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong group name",
+ !strcmp(leader->group_name, "group1"));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* group1 cycles:kppp */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ /* use of precise requires exclude_guest */
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 3);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* group2 cycles + G modifier */
+ evsel = leader = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong group name",
+ !strcmp(leader->group_name, "group2"));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* group2 1:3 + G modifier */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", 1 == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 3 == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* instructions:u */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ return 0;
+}
+
+static int test__group4(struct perf_evlist *evlist __maybe_unused)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+ /* cycles:u + p */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ /* use of precise requires exclude_guest */
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 1);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* instructions:kp + p */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ /* use of precise requires exclude_guest */
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip == 2);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ return 0;
+}
+
+static int test__group5(struct perf_evlist *evlist __maybe_unused)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
+
+ /* cycles + G */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* instructions + G */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* cycles:G */
+ evsel = leader = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
+
+ /* instructions:G */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+
+ /* cycles */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+
+ return 0;
+}
+
+static int test__group_gh1(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+ /* cycles + :H group modifier */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+
+ /* cache-misses:G + :H group modifier */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+
+ return 0;
+}
+
+static int test__group_gh2(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+ /* cycles + :G group modifier */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+
+ /* cache-misses:H + :G group modifier */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+
+ return 0;
+}
+
+static int test__group_gh3(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+ /* cycles:G + :u group modifier */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+
+ /* cache-misses:H + :u group modifier */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+
+ return 0;
+}
+
+static int test__group_gh4(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong number of groups", 1 == evlist->nr_groups);
+
+ /* cycles:G + :uG group modifier */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong nr_members", evsel->nr_members == 2);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+
+ /* cache-misses:H + :uG group modifier */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+
+ return 0;
+}
+
+static int test__leader_sample1(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
+
+ /* cycles - sampling group leader */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+ /* cache-misses - not sampling */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+ /* branch-misses - not sampling */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+ return 0;
+}
+
+static int test__leader_sample2(struct perf_evlist *evlist __maybe_unused)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+
+ /* instructions - sampling group leader */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_INSTRUCTIONS == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+ /* branch-misses - not sampling */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
+
+ return 0;
+}
+
+static int test__checkevent_pinned_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", evsel->attr.precise_ip);
+ TEST_ASSERT_VAL("wrong pinned", evsel->attr.pinned);
+
+ return test__checkevent_symbolic_name(evlist);
+}
+
+static int test__pinned_group(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel, *leader;
+
+ TEST_ASSERT_VAL("wrong number of entries", 3 == evlist->nr_entries);
+
+ /* cycles - group leader */
+ evsel = leader = perf_evlist__first(evlist);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CPU_CYCLES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
+ TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
+ TEST_ASSERT_VAL("wrong pinned", evsel->attr.pinned);
+
+ /* cache-misses - can not be pinned, but will go on with the leader */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_CACHE_MISSES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+ /* branch-misses - ditto */
+ evsel = perf_evsel__next(evsel);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_HW_BRANCH_MISSES == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong pinned", !evsel->attr.pinned);
+
+ return 0;
+}
+
+static int test__checkevent_breakpoint_len(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong bp_type", (HW_BREAKPOINT_R | HW_BREAKPOINT_W) ==
+ evsel->attr.bp_type);
+ TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_1 ==
+ evsel->attr.bp_len);
+
+ return 0;
+}
+
+static int test__checkevent_breakpoint_len_w(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_BREAKPOINT == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config", 0 == evsel->attr.config);
+ TEST_ASSERT_VAL("wrong bp_type", HW_BREAKPOINT_W ==
+ evsel->attr.bp_type);
+ TEST_ASSERT_VAL("wrong bp_len", HW_BREAKPOINT_LEN_2 ==
+ evsel->attr.bp_len);
+
+ return 0;
+}
+
+static int
+test__checkevent_breakpoint_len_rw_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user);
+ TEST_ASSERT_VAL("wrong exclude_kernel", evsel->attr.exclude_kernel);
+ TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv);
+ TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
+
+ return test__checkevent_breakpoint_rw(evlist);
+}
+
+static int test__checkevent_precise_max_modifier(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->attr.type);
+ TEST_ASSERT_VAL("wrong config",
+ PERF_COUNT_SW_TASK_CLOCK == evsel->attr.config);
+ return 0;
+}
+
+static int test__checkevent_config_symbol(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "insn") == 0);
+ return 0;
+}
+
+static int test__checkevent_config_raw(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "rawpmu") == 0);
+ return 0;
+}
+
+static int test__checkevent_config_num(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "numpmu") == 0);
+ return 0;
+}
+
+static int test__checkevent_config_cache(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "cachepmu") == 0);
+ return 0;
+}
+
+static bool test__intel_pt_valid(void)
+{
+ return !!perf_pmu__find("intel_pt");
+}
+
+static int test__intel_pt(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong name setting", strcmp(evsel->name, "intel_pt//u") == 0);
+ return 0;
+}
+
+static int test__checkevent_complex_name(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = perf_evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong complex name parsing", strcmp(evsel->name, "COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks") == 0);
+ return 0;
+}
+
+static int count_tracepoints(void)
+{
+ struct dirent *events_ent;
+ DIR *events_dir;
+ int cnt = 0;
+
+ events_dir = tracing_events__opendir();
+
+ TEST_ASSERT_VAL("Can't open events dir", events_dir);
+
+ while ((events_ent = readdir(events_dir))) {
+ char *sys_path;
+ struct dirent *sys_ent;
+ DIR *sys_dir;
+
+ if (!strcmp(events_ent->d_name, ".")
+ || !strcmp(events_ent->d_name, "..")
+ || !strcmp(events_ent->d_name, "enable")
+ || !strcmp(events_ent->d_name, "header_event")
+ || !strcmp(events_ent->d_name, "header_page"))
+ continue;
+
+ sys_path = get_events_file(events_ent->d_name);
+ TEST_ASSERT_VAL("Can't get sys path", sys_path);
+
+ sys_dir = opendir(sys_path);
+ TEST_ASSERT_VAL("Can't open sys dir", sys_dir);
+
+ while ((sys_ent = readdir(sys_dir))) {
+ if (!strcmp(sys_ent->d_name, ".")
+ || !strcmp(sys_ent->d_name, "..")
+ || !strcmp(sys_ent->d_name, "enable")
+ || !strcmp(sys_ent->d_name, "filter"))
+ continue;
+
+ cnt++;
+ }
+
+ closedir(sys_dir);
+ put_events_file(sys_path);
+ }
+
+ closedir(events_dir);
+ return cnt;
+}
+
+static int test__all_tracepoints(struct perf_evlist *evlist)
+{
+ TEST_ASSERT_VAL("wrong events count",
+ count_tracepoints() == evlist->nr_entries);
+
+ return test__checkevent_tracepoint_multi(evlist);
+}
+
+struct evlist_test {
+ const char *name;
+ __u32 type;
+ const int id;
+ bool (*valid)(void);
+ int (*check)(struct perf_evlist *evlist);
+};
+
+static struct evlist_test test__events[] = {
+ {
+ .name = "syscalls:sys_enter_openat",
+ .check = test__checkevent_tracepoint,
+ .id = 0,
+ },
+ {
+ .name = "syscalls:*",
+ .check = test__checkevent_tracepoint_multi,
+ .id = 1,
+ },
+ {
+ .name = "r1a",
+ .check = test__checkevent_raw,
+ .id = 2,
+ },
+ {
+ .name = "1:1",
+ .check = test__checkevent_numeric,
+ .id = 3,
+ },
+ {
+ .name = "instructions",
+ .check = test__checkevent_symbolic_name,
+ .id = 4,
+ },
+ {
+ .name = "cycles/period=100000,config2/",
+ .check = test__checkevent_symbolic_name_config,
+ .id = 5,
+ },
+ {
+ .name = "faults",
+ .check = test__checkevent_symbolic_alias,
+ .id = 6,
+ },
+ {
+ .name = "L1-dcache-load-miss",
+ .check = test__checkevent_genhw,
+ .id = 7,
+ },
+ {
+ .name = "mem:0",
+ .check = test__checkevent_breakpoint,
+ .id = 8,
+ },
+ {
+ .name = "mem:0:x",
+ .check = test__checkevent_breakpoint_x,
+ .id = 9,
+ },
+ {
+ .name = "mem:0:r",
+ .check = test__checkevent_breakpoint_r,
+ .id = 10,
+ },
+ {
+ .name = "mem:0:w",
+ .check = test__checkevent_breakpoint_w,
+ .id = 11,
+ },
+ {
+ .name = "syscalls:sys_enter_openat:k",
+ .check = test__checkevent_tracepoint_modifier,
+ .id = 12,
+ },
+ {
+ .name = "syscalls:*:u",
+ .check = test__checkevent_tracepoint_multi_modifier,
+ .id = 13,
+ },
+ {
+ .name = "r1a:kp",
+ .check = test__checkevent_raw_modifier,
+ .id = 14,
+ },
+ {
+ .name = "1:1:hp",
+ .check = test__checkevent_numeric_modifier,
+ .id = 15,
+ },
+ {
+ .name = "instructions:h",
+ .check = test__checkevent_symbolic_name_modifier,
+ .id = 16,
+ },
+ {
+ .name = "faults:u",
+ .check = test__checkevent_symbolic_alias_modifier,
+ .id = 17,
+ },
+ {
+ .name = "L1-dcache-load-miss:kp",
+ .check = test__checkevent_genhw_modifier,
+ .id = 18,
+ },
+ {
+ .name = "mem:0:u",
+ .check = test__checkevent_breakpoint_modifier,
+ .id = 19,
+ },
+ {
+ .name = "mem:0:x:k",
+ .check = test__checkevent_breakpoint_x_modifier,
+ .id = 20,
+ },
+ {
+ .name = "mem:0:r:hp",
+ .check = test__checkevent_breakpoint_r_modifier,
+ .id = 21,
+ },
+ {
+ .name = "mem:0:w:up",
+ .check = test__checkevent_breakpoint_w_modifier,
+ .id = 22,
+ },
+ {
+ .name = "r1,syscalls:sys_enter_openat:k,1:1:hp",
+ .check = test__checkevent_list,
+ .id = 23,
+ },
+ {
+ .name = "instructions:G",
+ .check = test__checkevent_exclude_host_modifier,
+ .id = 24,
+ },
+ {
+ .name = "instructions:H",
+ .check = test__checkevent_exclude_guest_modifier,
+ .id = 25,
+ },
+ {
+ .name = "mem:0:rw",
+ .check = test__checkevent_breakpoint_rw,
+ .id = 26,
+ },
+ {
+ .name = "mem:0:rw:kp",
+ .check = test__checkevent_breakpoint_rw_modifier,
+ .id = 27,
+ },
+ {
+ .name = "{instructions:k,cycles:upp}",
+ .check = test__group1,
+ .id = 28,
+ },
+ {
+ .name = "{faults:k,cache-references}:u,cycles:k",
+ .check = test__group2,
+ .id = 29,
+ },
+ {
+ .name = "group1{syscalls:sys_enter_openat:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
+ .check = test__group3,
+ .id = 30,
+ },
+ {
+ .name = "{cycles:u,instructions:kp}:p",
+ .check = test__group4,
+ .id = 31,
+ },
+ {
+ .name = "{cycles,instructions}:G,{cycles:G,instructions:G},cycles",
+ .check = test__group5,
+ .id = 32,
+ },
+ {
+ .name = "*:*",
+ .check = test__all_tracepoints,
+ .id = 33,
+ },
+ {
+ .name = "{cycles,cache-misses:G}:H",
+ .check = test__group_gh1,
+ .id = 34,
+ },
+ {
+ .name = "{cycles,cache-misses:H}:G",
+ .check = test__group_gh2,
+ .id = 35,
+ },
+ {
+ .name = "{cycles:G,cache-misses:H}:u",
+ .check = test__group_gh3,
+ .id = 36,
+ },
+ {
+ .name = "{cycles:G,cache-misses:H}:uG",
+ .check = test__group_gh4,
+ .id = 37,
+ },
+ {
+ .name = "{cycles,cache-misses,branch-misses}:S",
+ .check = test__leader_sample1,
+ .id = 38,
+ },
+ {
+ .name = "{instructions,branch-misses}:Su",
+ .check = test__leader_sample2,
+ .id = 39,
+ },
+ {
+ .name = "instructions:uDp",
+ .check = test__checkevent_pinned_modifier,
+ .id = 40,
+ },
+ {
+ .name = "{cycles,cache-misses,branch-misses}:D",
+ .check = test__pinned_group,
+ .id = 41,
+ },
+ {
+ .name = "mem:0/1",
+ .check = test__checkevent_breakpoint_len,
+ .id = 42,
+ },
+ {
+ .name = "mem:0/2:w",
+ .check = test__checkevent_breakpoint_len_w,
+ .id = 43,
+ },
+ {
+ .name = "mem:0/4:rw:u",
+ .check = test__checkevent_breakpoint_len_rw_modifier,
+ .id = 44
+ },
+#if defined(__s390x__)
+ {
+ .name = "kvm-s390:kvm_s390_create_vm",
+ .check = test__checkevent_tracepoint,
+ .valid = kvm_s390_create_vm_valid,
+ .id = 100,
+ },
+#endif
+ {
+ .name = "instructions:I",
+ .check = test__checkevent_exclude_idle_modifier,
+ .id = 45,
+ },
+ {
+ .name = "instructions:kIG",
+ .check = test__checkevent_exclude_idle_modifier_1,
+ .id = 46,
+ },
+ {
+ .name = "task-clock:P,cycles",
+ .check = test__checkevent_precise_max_modifier,
+ .id = 47,
+ },
+ {
+ .name = "instructions/name=insn/",
+ .check = test__checkevent_config_symbol,
+ .id = 48,
+ },
+ {
+ .name = "r1234/name=rawpmu/",
+ .check = test__checkevent_config_raw,
+ .id = 49,
+ },
+ {
+ .name = "4:0x6530160/name=numpmu/",
+ .check = test__checkevent_config_num,
+ .id = 50,
+ },
+ {
+ .name = "L1-dcache-misses/name=cachepmu/",
+ .check = test__checkevent_config_cache,
+ .id = 51,
+ },
+ {
+ .name = "intel_pt//u",
+ .valid = test__intel_pt_valid,
+ .check = test__intel_pt,
+ .id = 52,
+ },
+ {
+ .name = "cycles/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks'/Duk",
+ .check = test__checkevent_complex_name,
+ .id = 53
+ }
+};
+
+static struct evlist_test test__events_pmu[] = {
+ {
+ .name = "cpu/config=10,config1,config2=3,period=1000/u",
+ .check = test__checkevent_pmu,
+ .id = 0,
+ },
+ {
+ .name = "cpu/config=1,name=krava/u,cpu/config=2/u",
+ .check = test__checkevent_pmu_name,
+ .id = 1,
+ },
+ {
+ .name = "cpu/config=1,call-graph=fp,time,period=100000/,cpu/config=2,call-graph=no,time=0,period=2000/",
+ .check = test__checkevent_pmu_partial_time_callgraph,
+ .id = 2,
+ },
+ {
+ .name = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp",
+ .check = test__checkevent_complex_name,
+ .id = 3,
+ }
+};
+
+struct terms_test {
+ const char *str;
+ __u32 type;
+ int (*check)(struct list_head *terms);
+};
+
+static struct terms_test test__terms[] = {
+ [0] = {
+ .str = "config=10,config1,config2=3,umask=1",
+ .check = test__checkterms_simple,
+ },
+};
+
+static int test_event(struct evlist_test *e)
+{
+ struct parse_events_error err = { .idx = 0, };
+ struct perf_evlist *evlist;
+ int ret;
+
+ if (e->valid && !e->valid()) {
+ pr_debug("... SKIP");
+ return 0;
+ }
+
+ evlist = perf_evlist__new();
+ if (evlist == NULL)
+ return -ENOMEM;
+
+ ret = parse_events(evlist, e->name, &err);
+ if (ret) {
+ pr_debug("failed to parse event '%s', err %d, str '%s'\n",
+ e->name, ret, err.str);
+ parse_events_print_error(&err, e->name);
+ } else {
+ ret = e->check(evlist);
+ }
+
+ perf_evlist__delete(evlist);
+
+ return ret;
+}
+
+static int test_events(struct evlist_test *events, unsigned cnt)
+{
+ int ret1, ret2 = 0;
+ unsigned i;
+
+ for (i = 0; i < cnt; i++) {
+ struct evlist_test *e = &events[i];
+
+ pr_debug("running test %d '%s'", e->id, e->name);
+ ret1 = test_event(e);
+ if (ret1)
+ ret2 = ret1;
+ pr_debug("\n");
+ }
+
+ return ret2;
+}
+
+static int test_term(struct terms_test *t)
+{
+ struct list_head terms;
+ int ret;
+
+ INIT_LIST_HEAD(&terms);
+
+ ret = parse_events_terms(&terms, t->str);
+ if (ret) {
+ pr_debug("failed to parse terms '%s', err %d\n",
+ t->str , ret);
+ return ret;
+ }
+
+ ret = t->check(&terms);
+ parse_events_terms__purge(&terms);
+
+ return ret;
+}
+
+static int test_terms(struct terms_test *terms, unsigned cnt)
+{
+ int ret = 0;
+ unsigned i;
+
+ for (i = 0; i < cnt; i++) {
+ struct terms_test *t = &terms[i];
+
+ pr_debug("running test %d '%s'\n", i, t->str);
+ ret = test_term(t);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int test_pmu(void)
+{
+ struct stat st;
+ char path[PATH_MAX];
+ int ret;
+
+ snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/format/",
+ sysfs__mountpoint());
+
+ ret = stat(path, &st);
+ if (ret)
+ pr_debug("omitting PMU cpu tests\n");
+ return !ret;
+}
+
+static int test_pmu_events(void)
+{
+ struct stat st;
+ char path[PATH_MAX];
+ struct dirent *ent;
+ DIR *dir;
+ int ret;
+
+ snprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu/events/",
+ sysfs__mountpoint());
+
+ ret = stat(path, &st);
+ if (ret) {
+ pr_debug("omitting PMU cpu events tests\n");
+ return 0;
+ }
+
+ dir = opendir(path);
+ if (!dir) {
+ pr_debug("can't open pmu event dir");
+ return -1;
+ }
+
+ while (!ret && (ent = readdir(dir))) {
+ struct evlist_test e = { .id = 0, };
+ char name[2 * NAME_MAX + 1 + 12 + 3];
+
+ /* Names containing . are special and cannot be used directly */
+ if (strchr(ent->d_name, '.'))
+ continue;
+
+ snprintf(name, sizeof(name), "cpu/event=%s/u", ent->d_name);
+
+ e.name = name;
+ e.check = test__checkevent_pmu_events;
+
+ ret = test_event(&e);
+ if (ret)
+ break;
+ snprintf(name, sizeof(name), "%s:u,cpu/event=%s/u", ent->d_name, ent->d_name);
+ e.name = name;
+ e.check = test__checkevent_pmu_events_mix;
+ ret = test_event(&e);
+ }
+
+ closedir(dir);
+ return ret;
+}
+
+int test__parse_events(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int ret1, ret2 = 0;
+
+#define TEST_EVENTS(tests) \
+do { \
+ ret1 = test_events(tests, ARRAY_SIZE(tests)); \
+ if (!ret2) \
+ ret2 = ret1; \
+} while (0)
+
+ TEST_EVENTS(test__events);
+
+ if (test_pmu())
+ TEST_EVENTS(test__events_pmu);
+
+ if (test_pmu()) {
+ int ret = test_pmu_events();
+ if (ret)
+ return ret;
+ }
+
+ ret1 = test_terms(test__terms, ARRAY_SIZE(test__terms));
+ if (!ret2)
+ ret2 = ret1;
+
+ return ret2;
+}
diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c
new file mode 100644
index 000000000..2196d1497
--- /dev/null
+++ b/tools/perf/tests/parse-no-sample-id-all.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <stddef.h>
+
+#include "tests.h"
+
+#include "event.h"
+#include "evlist.h"
+#include "header.h"
+#include "util.h"
+#include "debug.h"
+
+static int process_event(struct perf_evlist **pevlist, union perf_event *event)
+{
+ struct perf_sample sample;
+
+ if (event->header.type == PERF_RECORD_HEADER_ATTR) {
+ if (perf_event__process_attr(NULL, event, pevlist)) {
+ pr_debug("perf_event__process_attr failed\n");
+ return -1;
+ }
+ return 0;
+ }
+
+ if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+ return -1;
+
+ if (!*pevlist)
+ return -1;
+
+ if (perf_evlist__parse_sample(*pevlist, event, &sample)) {
+ pr_debug("perf_evlist__parse_sample failed\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int process_events(union perf_event **events, size_t count)
+{
+ struct perf_evlist *evlist = NULL;
+ int err = 0;
+ size_t i;
+
+ for (i = 0; i < count && !err; i++)
+ err = process_event(&evlist, events[i]);
+
+ perf_evlist__delete(evlist);
+
+ return err;
+}
+
+struct test_attr_event {
+ struct perf_event_header header;
+ struct perf_event_attr attr;
+ u64 id;
+};
+
+/**
+ * test__parse_no_sample_id_all - test parsing with no sample_id_all bit set.
+ *
+ * This function tests parsing data produced on kernel's that do not support the
+ * sample_id_all bit. Without the sample_id_all bit, non-sample events (such as
+ * mmap events) do not have an id sample appended, and consequently logic
+ * designed to determine the id will not work. That case happens when there is
+ * more than one selected event, so this test processes three events: 2
+ * attributes representing the selected events and one mmap event.
+ *
+ * Return: %0 on success, %-1 if the test fails.
+ */
+int test__parse_no_sample_id_all(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err;
+
+ struct test_attr_event event1 = {
+ .header = {
+ .type = PERF_RECORD_HEADER_ATTR,
+ .size = sizeof(struct test_attr_event),
+ },
+ .id = 1,
+ };
+ struct test_attr_event event2 = {
+ .header = {
+ .type = PERF_RECORD_HEADER_ATTR,
+ .size = sizeof(struct test_attr_event),
+ },
+ .id = 2,
+ };
+ struct mmap_event event3 = {
+ .header = {
+ .type = PERF_RECORD_MMAP,
+ .size = sizeof(struct mmap_event),
+ },
+ };
+ union perf_event *events[] = {
+ (union perf_event *)&event1,
+ (union perf_event *)&event2,
+ (union perf_event *)&event3,
+ };
+
+ err = process_events(events, ARRAY_SIZE(events));
+ if (err)
+ return -1;
+
+ return 0;
+}
diff --git a/tools/perf/tests/perf-hooks.c b/tools/perf/tests/perf-hooks.c
new file mode 100644
index 000000000..44c16fd11
--- /dev/null
+++ b/tools/perf/tests/perf-hooks.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <signal.h>
+#include <stdlib.h>
+
+#include "tests.h"
+#include "debug.h"
+#include "util.h"
+#include "perf-hooks.h"
+
+static void sigsegv_handler(int sig __maybe_unused)
+{
+ pr_debug("SIGSEGV is observed as expected, try to recover.\n");
+ perf_hooks__recover();
+ signal(SIGSEGV, SIG_DFL);
+ raise(SIGSEGV);
+ exit(-1);
+}
+
+
+static void the_hook(void *_hook_flags)
+{
+ int *hook_flags = _hook_flags;
+
+ *hook_flags = 1234;
+
+ /* Generate a segfault, test perf_hooks__recover */
+ raise(SIGSEGV);
+}
+
+int test__perf_hooks(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int hook_flags = 0;
+
+ signal(SIGSEGV, sigsegv_handler);
+ perf_hooks__set_hook("test", the_hook, &hook_flags);
+ perf_hooks__invoke_test();
+
+ /* hook is triggered? */
+ if (hook_flags != 1234) {
+ pr_debug("Setting failed: %d (%p)\n", hook_flags, &hook_flags);
+ return TEST_FAIL;
+ }
+
+ /* the buggy hook is removed? */
+ if (perf_hooks__get_hook("test"))
+ return TEST_FAIL;
+ return TEST_OK;
+}
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
new file mode 100644
index 000000000..34394cc05
--- /dev/null
+++ b/tools/perf/tests/perf-record.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+/* For the CLR_() macros */
+#include <pthread.h>
+
+#include <sched.h>
+#include "evlist.h"
+#include "evsel.h"
+#include "perf.h"
+#include "debug.h"
+#include "tests.h"
+
+static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
+{
+ int i, cpu = -1, nrcpus = 1024;
+realloc:
+ CPU_ZERO(maskp);
+
+ if (sched_getaffinity(pid, sizeof(*maskp), maskp) == -1) {
+ if (errno == EINVAL && nrcpus < (1024 << 8)) {
+ nrcpus = nrcpus << 2;
+ goto realloc;
+ }
+ perror("sched_getaffinity");
+ return -1;
+ }
+
+ for (i = 0; i < nrcpus; i++) {
+ if (CPU_ISSET(i, maskp)) {
+ if (cpu == -1)
+ cpu = i;
+ else
+ CPU_CLR(i, maskp);
+ }
+ }
+
+ return cpu;
+}
+
+int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct record_opts opts = {
+ .target = {
+ .uid = UINT_MAX,
+ .uses_mmap = true,
+ },
+ .no_buffering = true,
+ .mmap_pages = 256,
+ };
+ cpu_set_t cpu_mask;
+ size_t cpu_mask_size = sizeof(cpu_mask);
+ struct perf_evlist *evlist = perf_evlist__new_dummy();
+ struct perf_evsel *evsel;
+ struct perf_sample sample;
+ const char *cmd = "sleep";
+ const char *argv[] = { cmd, "1", NULL, };
+ char *bname, *mmap_filename;
+ u64 prev_time = 0;
+ bool found_cmd_mmap = false,
+ found_libc_mmap = false,
+ found_vdso_mmap = false,
+ found_ld_mmap = false;
+ int err = -1, errs = 0, i, wakeups = 0;
+ u32 cpu;
+ int total_events = 0, nr_events[PERF_RECORD_MAX] = { 0, };
+ char sbuf[STRERR_BUFSIZE];
+
+ if (evlist == NULL) /* Fallback for kernels lacking PERF_COUNT_SW_DUMMY */
+ evlist = perf_evlist__new_default();
+
+ if (evlist == NULL) {
+ pr_debug("Not enough memory to create evlist\n");
+ goto out;
+ }
+
+ /*
+ * Create maps of threads and cpus to monitor. In this case
+ * we start with all threads and cpus (-1, -1) but then in
+ * perf_evlist__prepare_workload we'll fill in the only thread
+ * we're monitoring, the one forked there.
+ */
+ err = perf_evlist__create_maps(evlist, &opts.target);
+ if (err < 0) {
+ pr_debug("Not enough memory to create thread/cpu maps\n");
+ goto out_delete_evlist;
+ }
+
+ /*
+ * Prepare the workload in argv[] to run, it'll fork it, and then wait
+ * for perf_evlist__start_workload() to exec it. This is done this way
+ * so that we have time to open the evlist (calling sys_perf_event_open
+ * on all the fds) and then mmap them.
+ */
+ err = perf_evlist__prepare_workload(evlist, &opts.target, argv, false, NULL);
+ if (err < 0) {
+ pr_debug("Couldn't run the workload!\n");
+ goto out_delete_evlist;
+ }
+
+ /*
+ * Config the evsels, setting attr->comm on the first one, etc.
+ */
+ evsel = perf_evlist__first(evlist);
+ perf_evsel__set_sample_bit(evsel, CPU);
+ perf_evsel__set_sample_bit(evsel, TID);
+ perf_evsel__set_sample_bit(evsel, TIME);
+ perf_evlist__config(evlist, &opts, NULL);
+
+ err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
+ if (err < 0) {
+ pr_debug("sched__get_first_possible_cpu: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ cpu = err;
+
+ /*
+ * So that we can check perf_sample.cpu on all the samples.
+ */
+ if (sched_setaffinity(evlist->workload.pid, cpu_mask_size, &cpu_mask) < 0) {
+ pr_debug("sched_setaffinity: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ /*
+ * Call sys_perf_event_open on all the fds on all the evsels,
+ * grouping them if asked to.
+ */
+ err = perf_evlist__open(evlist);
+ if (err < 0) {
+ pr_debug("perf_evlist__open: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ /*
+ * mmap the first fd on a given CPU and ask for events for the other
+ * fds in the same CPU to be injected in the same mmap ring buffer
+ * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)).
+ */
+ err = perf_evlist__mmap(evlist, opts.mmap_pages);
+ if (err < 0) {
+ pr_debug("perf_evlist__mmap: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ /*
+ * Now that all is properly set up, enable the events, they will
+ * count just on workload.pid, which will start...
+ */
+ perf_evlist__enable(evlist);
+
+ /*
+ * Now!
+ */
+ perf_evlist__start_workload(evlist);
+
+ while (1) {
+ int before = total_events;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ union perf_event *event;
+ struct perf_mmap *md;
+
+ md = &evlist->mmap[i];
+ if (perf_mmap__read_init(md) < 0)
+ continue;
+
+ while ((event = perf_mmap__read_event(md)) != NULL) {
+ const u32 type = event->header.type;
+ const char *name = perf_event__name(type);
+
+ ++total_events;
+ if (type < PERF_RECORD_MAX)
+ nr_events[type]++;
+
+ err = perf_evlist__parse_sample(evlist, event, &sample);
+ if (err < 0) {
+ if (verbose > 0)
+ perf_event__fprintf(event, stderr);
+ pr_debug("Couldn't parse sample\n");
+ goto out_delete_evlist;
+ }
+
+ if (verbose > 0) {
+ pr_info("%" PRIu64" %d ", sample.time, sample.cpu);
+ perf_event__fprintf(event, stderr);
+ }
+
+ if (prev_time > sample.time) {
+ pr_debug("%s going backwards in time, prev=%" PRIu64 ", curr=%" PRIu64 "\n",
+ name, prev_time, sample.time);
+ ++errs;
+ }
+
+ prev_time = sample.time;
+
+ if (sample.cpu != cpu) {
+ pr_debug("%s with unexpected cpu, expected %d, got %d\n",
+ name, cpu, sample.cpu);
+ ++errs;
+ }
+
+ if ((pid_t)sample.pid != evlist->workload.pid) {
+ pr_debug("%s with unexpected pid, expected %d, got %d\n",
+ name, evlist->workload.pid, sample.pid);
+ ++errs;
+ }
+
+ if ((pid_t)sample.tid != evlist->workload.pid) {
+ pr_debug("%s with unexpected tid, expected %d, got %d\n",
+ name, evlist->workload.pid, sample.tid);
+ ++errs;
+ }
+
+ if ((type == PERF_RECORD_COMM ||
+ type == PERF_RECORD_MMAP ||
+ type == PERF_RECORD_MMAP2 ||
+ type == PERF_RECORD_FORK ||
+ type == PERF_RECORD_EXIT) &&
+ (pid_t)event->comm.pid != evlist->workload.pid) {
+ pr_debug("%s with unexpected pid/tid\n", name);
+ ++errs;
+ }
+
+ if ((type == PERF_RECORD_COMM ||
+ type == PERF_RECORD_MMAP ||
+ type == PERF_RECORD_MMAP2) &&
+ event->comm.pid != event->comm.tid) {
+ pr_debug("%s with different pid/tid!\n", name);
+ ++errs;
+ }
+
+ switch (type) {
+ case PERF_RECORD_COMM:
+ if (strcmp(event->comm.comm, cmd)) {
+ pr_debug("%s with unexpected comm!\n", name);
+ ++errs;
+ }
+ break;
+ case PERF_RECORD_EXIT:
+ goto found_exit;
+ case PERF_RECORD_MMAP:
+ mmap_filename = event->mmap.filename;
+ goto check_bname;
+ case PERF_RECORD_MMAP2:
+ mmap_filename = event->mmap2.filename;
+ check_bname:
+ bname = strrchr(mmap_filename, '/');
+ if (bname != NULL) {
+ if (!found_cmd_mmap)
+ found_cmd_mmap = !strcmp(bname + 1, cmd);
+ if (!found_libc_mmap)
+ found_libc_mmap = !strncmp(bname + 1, "libc", 4);
+ if (!found_ld_mmap)
+ found_ld_mmap = !strncmp(bname + 1, "ld", 2);
+ } else if (!found_vdso_mmap)
+ found_vdso_mmap = !strcmp(mmap_filename, "[vdso]");
+ break;
+
+ case PERF_RECORD_SAMPLE:
+ /* Just ignore samples for now */
+ break;
+ default:
+ pr_debug("Unexpected perf_event->header.type %d!\n",
+ type);
+ ++errs;
+ }
+
+ perf_mmap__consume(md);
+ }
+ perf_mmap__read_done(md);
+ }
+
+ /*
+ * We don't use poll here because at least at 3.1 times the
+ * PERF_RECORD_{!SAMPLE} events don't honour
+ * perf_event_attr.wakeup_events, just PERF_EVENT_SAMPLE does.
+ */
+ if (total_events == before && false)
+ perf_evlist__poll(evlist, -1);
+
+ sleep(1);
+ if (++wakeups > 5) {
+ pr_debug("No PERF_RECORD_EXIT event!\n");
+ break;
+ }
+ }
+
+found_exit:
+ if (nr_events[PERF_RECORD_COMM] > 1) {
+ pr_debug("Excessive number of PERF_RECORD_COMM events!\n");
+ ++errs;
+ }
+
+ if (nr_events[PERF_RECORD_COMM] == 0) {
+ pr_debug("Missing PERF_RECORD_COMM for %s!\n", cmd);
+ ++errs;
+ }
+
+ if (!found_cmd_mmap) {
+ pr_debug("PERF_RECORD_MMAP for %s missing!\n", cmd);
+ ++errs;
+ }
+
+ if (!found_libc_mmap) {
+ pr_debug("PERF_RECORD_MMAP for %s missing!\n", "libc");
+ ++errs;
+ }
+
+ if (!found_ld_mmap) {
+ pr_debug("PERF_RECORD_MMAP for %s missing!\n", "ld");
+ ++errs;
+ }
+
+ if (!found_vdso_mmap) {
+ pr_debug("PERF_RECORD_MMAP for %s missing!\n", "[vdso]");
+ ++errs;
+ }
+out_delete_evlist:
+ perf_evlist__delete(evlist);
+out:
+ return (err < 0 || errs > 0) ? -1 : 0;
+}
diff --git a/tools/perf/tests/perf-targz-src-pkg b/tools/perf/tests/perf-targz-src-pkg
new file mode 100755
index 000000000..fae26b1cf
--- /dev/null
+++ b/tools/perf/tests/perf-targz-src-pkg
@@ -0,0 +1,22 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Test one of the main kernel Makefile targets to generate a perf sources tarball
+# suitable for build outside the full kernel sources.
+#
+# This is to test that the tools/perf/MANIFEST file lists all the files needed to
+# be in such tarball, which sometimes gets broken when we move files around,
+# like when we made some files that were in tools/perf/ available to other tools/
+# codebases by moving it to tools/include/, etc.
+
+PERF=$1
+cd ${PERF}/../..
+make perf-targz-src-pkg > /dev/null
+TARBALL=$(ls -rt perf-*.tar.gz)
+TMP_DEST=$(mktemp -d)
+tar xf ${TARBALL} -C $TMP_DEST
+rm -f ${TARBALL}
+cd - > /dev/null
+make -C $TMP_DEST/perf*/tools/perf > /dev/null
+RC=$?
+rm -rf ${TMP_DEST}
+exit $RC
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
new file mode 100644
index 000000000..3e183eef6
--- /dev/null
+++ b/tools/perf/tests/pmu.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "parse-events.h"
+#include "pmu.h"
+#include "util.h"
+#include "tests.h"
+#include <errno.h>
+#include <linux/kernel.h>
+
+/* Simulated format definitions. */
+static struct test_format {
+ const char *name;
+ const char *value;
+} test_formats[] = {
+ { "krava01", "config:0-1,62-63\n", },
+ { "krava02", "config:10-17\n", },
+ { "krava03", "config:5\n", },
+ { "krava11", "config1:0,2,4,6,8,20-28\n", },
+ { "krava12", "config1:63\n", },
+ { "krava13", "config1:45-47\n", },
+ { "krava21", "config2:0-3,10-13,20-23,30-33,40-43,50-53,60-63\n", },
+ { "krava22", "config2:8,18,48,58\n", },
+ { "krava23", "config2:28-29,38\n", },
+};
+
+/* Simulated users input. */
+static struct parse_events_term test_terms[] = {
+ {
+ .config = (char *) "krava01",
+ .val.num = 15,
+ .type_val = PARSE_EVENTS__TERM_TYPE_NUM,
+ .type_term = PARSE_EVENTS__TERM_TYPE_USER,
+ },
+ {
+ .config = (char *) "krava02",
+ .val.num = 170,
+ .type_val = PARSE_EVENTS__TERM_TYPE_NUM,
+ .type_term = PARSE_EVENTS__TERM_TYPE_USER,
+ },
+ {
+ .config = (char *) "krava03",
+ .val.num = 1,
+ .type_val = PARSE_EVENTS__TERM_TYPE_NUM,
+ .type_term = PARSE_EVENTS__TERM_TYPE_USER,
+ },
+ {
+ .config = (char *) "krava11",
+ .val.num = 27,
+ .type_val = PARSE_EVENTS__TERM_TYPE_NUM,
+ .type_term = PARSE_EVENTS__TERM_TYPE_USER,
+ },
+ {
+ .config = (char *) "krava12",
+ .val.num = 1,
+ .type_val = PARSE_EVENTS__TERM_TYPE_NUM,
+ .type_term = PARSE_EVENTS__TERM_TYPE_USER,
+ },
+ {
+ .config = (char *) "krava13",
+ .val.num = 2,
+ .type_val = PARSE_EVENTS__TERM_TYPE_NUM,
+ .type_term = PARSE_EVENTS__TERM_TYPE_USER,
+ },
+ {
+ .config = (char *) "krava21",
+ .val.num = 119,
+ .type_val = PARSE_EVENTS__TERM_TYPE_NUM,
+ .type_term = PARSE_EVENTS__TERM_TYPE_USER,
+ },
+ {
+ .config = (char *) "krava22",
+ .val.num = 11,
+ .type_val = PARSE_EVENTS__TERM_TYPE_NUM,
+ .type_term = PARSE_EVENTS__TERM_TYPE_USER,
+ },
+ {
+ .config = (char *) "krava23",
+ .val.num = 2,
+ .type_val = PARSE_EVENTS__TERM_TYPE_NUM,
+ .type_term = PARSE_EVENTS__TERM_TYPE_USER,
+ },
+};
+
+/*
+ * Prepare format directory data, exported by kernel
+ * at /sys/bus/event_source/devices/<dev>/format.
+ */
+static char *test_format_dir_get(void)
+{
+ static char dir[PATH_MAX];
+ unsigned int i;
+
+ snprintf(dir, PATH_MAX, "/tmp/perf-pmu-test-format-XXXXXX");
+ if (!mkdtemp(dir))
+ return NULL;
+
+ for (i = 0; i < ARRAY_SIZE(test_formats); i++) {
+ static char name[PATH_MAX];
+ struct test_format *format = &test_formats[i];
+ FILE *file;
+
+ scnprintf(name, PATH_MAX, "%s/%s", dir, format->name);
+
+ file = fopen(name, "w");
+ if (!file)
+ return NULL;
+
+ if (1 != fwrite(format->value, strlen(format->value), 1, file))
+ break;
+
+ fclose(file);
+ }
+
+ return dir;
+}
+
+/* Cleanup format directory. */
+static int test_format_dir_put(char *dir)
+{
+ char buf[PATH_MAX];
+ snprintf(buf, PATH_MAX, "rm -f %s/*\n", dir);
+ if (system(buf))
+ return -1;
+
+ snprintf(buf, PATH_MAX, "rmdir %s\n", dir);
+ return system(buf);
+}
+
+static struct list_head *test_terms_list(void)
+{
+ static LIST_HEAD(terms);
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_terms); i++)
+ list_add_tail(&test_terms[i].list, &terms);
+
+ return &terms;
+}
+
+int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ char *format = test_format_dir_get();
+ LIST_HEAD(formats);
+ struct list_head *terms = test_terms_list();
+ int ret;
+
+ if (!format)
+ return -EINVAL;
+
+ do {
+ struct perf_event_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+
+ ret = perf_pmu__format_parse(format, &formats);
+ if (ret)
+ break;
+
+ ret = perf_pmu__config_terms(&formats, &attr, terms,
+ false, NULL);
+ if (ret)
+ break;
+
+ ret = -EINVAL;
+
+ if (attr.config != 0xc00000000002a823)
+ break;
+ if (attr.config1 != 0x8000400000000145)
+ break;
+ if (attr.config2 != 0x0400000020041d07)
+ break;
+
+ ret = 0;
+ } while (0);
+
+ perf_pmu__del_formats(&formats);
+ test_format_dir_put(format);
+ return ret;
+}
diff --git a/tools/perf/tests/python-use.c b/tools/perf/tests/python-use.c
new file mode 100644
index 000000000..40ab72149
--- /dev/null
+++ b/tools/perf/tests/python-use.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Just test if we can load the python binding.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include "tests.h"
+#include "util/debug.h"
+
+int test__python_use(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ char *cmd;
+ int ret;
+
+ if (asprintf(&cmd, "echo \"import sys ; sys.path.append('%s'); import perf\" | %s %s",
+ PYTHONPATH, PYTHON, verbose > 0 ? "" : "2> /dev/null") < 0)
+ return -1;
+
+ ret = system(cmd) ? -1 : 0;
+ free(cmd);
+ return ret;
+}
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
new file mode 100644
index 000000000..66e46bc8d
--- /dev/null
+++ b/tools/perf/tests/sample-parsing.c
@@ -0,0 +1,353 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "util.h"
+#include "event.h"
+#include "evsel.h"
+#include "debug.h"
+
+#include "tests.h"
+
+#define COMP(m) do { \
+ if (s1->m != s2->m) { \
+ pr_debug("Samples differ at '"#m"'\n"); \
+ return false; \
+ } \
+} while (0)
+
+#define MCOMP(m) do { \
+ if (memcmp(&s1->m, &s2->m, sizeof(s1->m))) { \
+ pr_debug("Samples differ at '"#m"'\n"); \
+ return false; \
+ } \
+} while (0)
+
+static bool samples_same(const struct perf_sample *s1,
+ const struct perf_sample *s2,
+ u64 type, u64 read_format)
+{
+ size_t i;
+
+ if (type & PERF_SAMPLE_IDENTIFIER)
+ COMP(id);
+
+ if (type & PERF_SAMPLE_IP)
+ COMP(ip);
+
+ if (type & PERF_SAMPLE_TID) {
+ COMP(pid);
+ COMP(tid);
+ }
+
+ if (type & PERF_SAMPLE_TIME)
+ COMP(time);
+
+ if (type & PERF_SAMPLE_ADDR)
+ COMP(addr);
+
+ if (type & PERF_SAMPLE_ID)
+ COMP(id);
+
+ if (type & PERF_SAMPLE_STREAM_ID)
+ COMP(stream_id);
+
+ if (type & PERF_SAMPLE_CPU)
+ COMP(cpu);
+
+ if (type & PERF_SAMPLE_PERIOD)
+ COMP(period);
+
+ if (type & PERF_SAMPLE_READ) {
+ if (read_format & PERF_FORMAT_GROUP)
+ COMP(read.group.nr);
+ else
+ COMP(read.one.value);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ COMP(read.time_enabled);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ COMP(read.time_running);
+ /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+ if (read_format & PERF_FORMAT_GROUP) {
+ for (i = 0; i < s1->read.group.nr; i++)
+ MCOMP(read.group.values[i]);
+ } else {
+ COMP(read.one.id);
+ }
+ }
+
+ if (type & PERF_SAMPLE_CALLCHAIN) {
+ COMP(callchain->nr);
+ for (i = 0; i < s1->callchain->nr; i++)
+ COMP(callchain->ips[i]);
+ }
+
+ if (type & PERF_SAMPLE_RAW) {
+ COMP(raw_size);
+ if (memcmp(s1->raw_data, s2->raw_data, s1->raw_size)) {
+ pr_debug("Samples differ at 'raw_data'\n");
+ return false;
+ }
+ }
+
+ if (type & PERF_SAMPLE_BRANCH_STACK) {
+ COMP(branch_stack->nr);
+ for (i = 0; i < s1->branch_stack->nr; i++)
+ MCOMP(branch_stack->entries[i]);
+ }
+
+ if (type & PERF_SAMPLE_REGS_USER) {
+ size_t sz = hweight_long(s1->user_regs.mask) * sizeof(u64);
+
+ COMP(user_regs.mask);
+ COMP(user_regs.abi);
+ if (s1->user_regs.abi &&
+ (!s1->user_regs.regs || !s2->user_regs.regs ||
+ memcmp(s1->user_regs.regs, s2->user_regs.regs, sz))) {
+ pr_debug("Samples differ at 'user_regs'\n");
+ return false;
+ }
+ }
+
+ if (type & PERF_SAMPLE_STACK_USER) {
+ COMP(user_stack.size);
+ if (memcmp(s1->user_stack.data, s2->user_stack.data,
+ s1->user_stack.size)) {
+ pr_debug("Samples differ at 'user_stack'\n");
+ return false;
+ }
+ }
+
+ if (type & PERF_SAMPLE_WEIGHT)
+ COMP(weight);
+
+ if (type & PERF_SAMPLE_DATA_SRC)
+ COMP(data_src);
+
+ if (type & PERF_SAMPLE_TRANSACTION)
+ COMP(transaction);
+
+ if (type & PERF_SAMPLE_REGS_INTR) {
+ size_t sz = hweight_long(s1->intr_regs.mask) * sizeof(u64);
+
+ COMP(intr_regs.mask);
+ COMP(intr_regs.abi);
+ if (s1->intr_regs.abi &&
+ (!s1->intr_regs.regs || !s2->intr_regs.regs ||
+ memcmp(s1->intr_regs.regs, s2->intr_regs.regs, sz))) {
+ pr_debug("Samples differ at 'intr_regs'\n");
+ return false;
+ }
+ }
+
+ if (type & PERF_SAMPLE_PHYS_ADDR)
+ COMP(phys_addr);
+
+ return true;
+}
+
+static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
+{
+ struct perf_evsel evsel = {
+ .needs_swap = false,
+ .attr = {
+ .sample_type = sample_type,
+ .read_format = read_format,
+ },
+ };
+ union perf_event *event;
+ union {
+ struct ip_callchain callchain;
+ u64 data[64];
+ } callchain = {
+ /* 3 ips */
+ .data = {3, 201, 202, 203},
+ };
+ union {
+ struct branch_stack branch_stack;
+ u64 data[64];
+ } branch_stack = {
+ /* 1 branch_entry */
+ .data = {1, 211, 212, 213},
+ };
+ u64 regs[64];
+ const u32 raw_data[] = {0x12345678, 0x0a0b0c0d, 0x11020304, 0x05060708, 0 };
+ const u64 data[] = {0x2211443366558877ULL, 0, 0xaabbccddeeff4321ULL};
+ struct perf_sample sample = {
+ .ip = 101,
+ .pid = 102,
+ .tid = 103,
+ .time = 104,
+ .addr = 105,
+ .id = 106,
+ .stream_id = 107,
+ .period = 108,
+ .weight = 109,
+ .cpu = 110,
+ .raw_size = sizeof(raw_data),
+ .data_src = 111,
+ .transaction = 112,
+ .raw_data = (void *)raw_data,
+ .callchain = &callchain.callchain,
+ .branch_stack = &branch_stack.branch_stack,
+ .user_regs = {
+ .abi = PERF_SAMPLE_REGS_ABI_64,
+ .mask = sample_regs,
+ .regs = regs,
+ },
+ .user_stack = {
+ .size = sizeof(data),
+ .data = (void *)data,
+ },
+ .read = {
+ .time_enabled = 0x030a59d664fca7deULL,
+ .time_running = 0x011b6ae553eb98edULL,
+ },
+ .intr_regs = {
+ .abi = PERF_SAMPLE_REGS_ABI_64,
+ .mask = sample_regs,
+ .regs = regs,
+ },
+ .phys_addr = 113,
+ };
+ struct sample_read_value values[] = {{1, 5}, {9, 3}, {2, 7}, {6, 4},};
+ struct perf_sample sample_out;
+ size_t i, sz, bufsz;
+ int err, ret = -1;
+
+ if (sample_type & PERF_SAMPLE_REGS_USER)
+ evsel.attr.sample_regs_user = sample_regs;
+
+ if (sample_type & PERF_SAMPLE_REGS_INTR)
+ evsel.attr.sample_regs_intr = sample_regs;
+
+ for (i = 0; i < sizeof(regs); i++)
+ *(i + (u8 *)regs) = i & 0xfe;
+
+ if (read_format & PERF_FORMAT_GROUP) {
+ sample.read.group.nr = 4;
+ sample.read.group.values = values;
+ } else {
+ sample.read.one.value = 0x08789faeb786aa87ULL;
+ sample.read.one.id = 99;
+ }
+
+ sz = perf_event__sample_event_size(&sample, sample_type, read_format);
+ bufsz = sz + 4096; /* Add a bit for overrun checking */
+ event = malloc(bufsz);
+ if (!event) {
+ pr_debug("malloc failed\n");
+ return -1;
+ }
+
+ memset(event, 0xff, bufsz);
+ event->header.type = PERF_RECORD_SAMPLE;
+ event->header.misc = 0;
+ event->header.size = sz;
+
+ err = perf_event__synthesize_sample(event, sample_type, read_format,
+ &sample);
+ if (err) {
+ pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+ "perf_event__synthesize_sample", sample_type, err);
+ goto out_free;
+ }
+
+ /* The data does not contain 0xff so we use that to check the size */
+ for (i = bufsz; i > 0; i--) {
+ if (*(i - 1 + (u8 *)event) != 0xff)
+ break;
+ }
+ if (i != sz) {
+ pr_debug("Event size mismatch: actual %zu vs expected %zu\n",
+ i, sz);
+ goto out_free;
+ }
+
+ evsel.sample_size = __perf_evsel__sample_size(sample_type);
+
+ err = perf_evsel__parse_sample(&evsel, event, &sample_out);
+ if (err) {
+ pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+ "perf_evsel__parse_sample", sample_type, err);
+ goto out_free;
+ }
+
+ if (!samples_same(&sample, &sample_out, sample_type, read_format)) {
+ pr_debug("parsing failed for sample_type %#"PRIx64"\n",
+ sample_type);
+ goto out_free;
+ }
+
+ ret = 0;
+out_free:
+ free(event);
+ if (ret && read_format)
+ pr_debug("read_format %#"PRIx64"\n", read_format);
+ return ret;
+}
+
+/**
+ * test__sample_parsing - test sample parsing.
+ *
+ * This function implements a test that synthesizes a sample event, parses it
+ * and then checks that the parsed sample matches the original sample. The test
+ * checks sample format bits separately and together. If the test passes %0 is
+ * returned, otherwise %-1 is returned.
+ */
+int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ const u64 rf[] = {4, 5, 6, 7, 12, 13, 14, 15};
+ u64 sample_type;
+ u64 sample_regs;
+ size_t i;
+ int err;
+
+ /*
+ * Fail the test if it has not been updated when new sample format bits
+ * were added. Please actually update the test rather than just change
+ * the condition below.
+ */
+ if (PERF_SAMPLE_MAX > PERF_SAMPLE_PHYS_ADDR << 1) {
+ pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
+ return -1;
+ }
+
+ /* Test each sample format bit separately */
+ for (sample_type = 1; sample_type != PERF_SAMPLE_MAX;
+ sample_type <<= 1) {
+ /* Test read_format variations */
+ if (sample_type == PERF_SAMPLE_READ) {
+ for (i = 0; i < ARRAY_SIZE(rf); i++) {
+ err = do_test(sample_type, 0, rf[i]);
+ if (err)
+ return err;
+ }
+ continue;
+ }
+ sample_regs = 0;
+
+ if (sample_type == PERF_SAMPLE_REGS_USER)
+ sample_regs = 0x3fff;
+
+ if (sample_type == PERF_SAMPLE_REGS_INTR)
+ sample_regs = 0xff0fff;
+
+ err = do_test(sample_type, sample_regs, 0);
+ if (err)
+ return err;
+ }
+
+ /* Test all sample format bits together */
+ sample_type = PERF_SAMPLE_MAX - 1;
+ sample_regs = 0x3fff; /* shared yb intr and user regs */
+ for (i = 0; i < ARRAY_SIZE(rf); i++) {
+ err = do_test(sample_type, sample_regs, rf[i]);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/tests/sdt.c b/tools/perf/tests/sdt.c
new file mode 100644
index 000000000..5059452d2
--- /dev/null
+++ b/tools/perf/tests/sdt.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdio.h>
+#include <sys/epoll.h>
+#include <util/evlist.h>
+#include <linux/filter.h>
+#include "tests.h"
+#include "debug.h"
+#include "probe-file.h"
+#include "build-id.h"
+
+/* To test SDT event, we need libelf support to scan elf binary */
+#if defined(HAVE_SDT_EVENT) && defined(HAVE_LIBELF_SUPPORT)
+
+#include <sys/sdt.h>
+
+static int target_function(void)
+{
+ DTRACE_PROBE(perf, test_target);
+ return TEST_OK;
+}
+
+/* Copied from builtin-buildid-cache.c */
+static int build_id_cache__add_file(const char *filename)
+{
+ char sbuild_id[SBUILD_ID_SIZE];
+ u8 build_id[BUILD_ID_SIZE];
+ int err;
+
+ err = filename__read_build_id(filename, &build_id, sizeof(build_id));
+ if (err < 0) {
+ pr_debug("Failed to read build id of %s\n", filename);
+ return err;
+ }
+
+ build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ err = build_id_cache__add_s(sbuild_id, filename, NULL, false, false);
+ if (err < 0)
+ pr_debug("Failed to add build id cache of %s\n", filename);
+ return err;
+}
+
+static char *get_self_path(void)
+{
+ char *buf = calloc(PATH_MAX, sizeof(char));
+
+ if (buf && readlink("/proc/self/exe", buf, PATH_MAX - 1) < 0) {
+ pr_debug("Failed to get correct path of perf\n");
+ free(buf);
+ return NULL;
+ }
+ return buf;
+}
+
+static int search_cached_probe(const char *target,
+ const char *group, const char *event)
+{
+ struct probe_cache *cache = probe_cache__new(target, NULL);
+ int ret = 0;
+
+ if (!cache) {
+ pr_debug("Failed to open probe cache of %s\n", target);
+ return -EINVAL;
+ }
+
+ if (!probe_cache__find_by_name(cache, group, event)) {
+ pr_debug("Failed to find %s:%s in the cache\n", group, event);
+ ret = -ENOENT;
+ }
+ probe_cache__delete(cache);
+
+ return ret;
+}
+
+int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused)
+{
+ int ret = TEST_FAIL;
+ char __tempdir[] = "./test-buildid-XXXXXX";
+ char *tempdir = NULL, *myself = get_self_path();
+
+ if (myself == NULL || mkdtemp(__tempdir) == NULL) {
+ pr_debug("Failed to make a tempdir for build-id cache\n");
+ goto error;
+ }
+ /* Note that buildid_dir must be an absolute path */
+ tempdir = realpath(__tempdir, NULL);
+ if (tempdir == NULL)
+ goto error_rmdir;
+
+ /* At first, scan itself */
+ set_buildid_dir(tempdir);
+ if (build_id_cache__add_file(myself) < 0)
+ goto error_rmdir;
+
+ /* Open a cache and make sure the SDT is stored */
+ if (search_cached_probe(myself, "sdt_perf", "test_target") < 0)
+ goto error_rmdir;
+
+ /* TBD: probing on the SDT event and collect logs */
+
+ /* Call the target and get an event */
+ ret = target_function();
+
+error_rmdir:
+ /* Cleanup temporary buildid dir */
+ rm_rf(__tempdir);
+error:
+ free(tempdir);
+ free(myself);
+ return ret;
+}
+#else
+int test__sdt_event(struct test *test __maybe_unused, int subtests __maybe_unused)
+{
+ pr_debug("Skip SDT event test because SDT support is not compiled\n");
+ return TEST_SKIP;
+}
+#endif
diff --git a/tools/perf/tests/shell/lib/probe.sh b/tools/perf/tests/shell/lib/probe.sh
new file mode 100644
index 000000000..6293cc660
--- /dev/null
+++ b/tools/perf/tests/shell/lib/probe.sh
@@ -0,0 +1,6 @@
+# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
+
+skip_if_no_perf_probe() {
+ perf probe 2>&1 | grep -q 'is not a perf-command' && return 2
+ return 0
+}
diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
new file mode 100644
index 000000000..c2cc42daf
--- /dev/null
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -0,0 +1,24 @@
+# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
+
+perf probe -l 2>&1 | grep -q probe:vfs_getname
+had_vfs_getname=$?
+
+cleanup_probe_vfs_getname() {
+ if [ $had_vfs_getname -eq 1 ] ; then
+ perf probe -q -d probe:vfs_getname*
+ fi
+}
+
+add_probe_vfs_getname() {
+ local verbose=$1
+ if [ $had_vfs_getname -eq 1 ] ; then
+ line=$(perf probe -L getname_flags 2>&1 | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/')
+ perf probe -q "vfs_getname=getname_flags:${line} pathname=result->name:string" || \
+ perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring"
+ fi
+}
+
+skip_if_no_debuginfo() {
+ add_probe_vfs_getname -v 2>&1 | egrep -q "^(Failed to find the path for kernel|Debuginfo-analysis is not supported)" && return 2
+ return 1
+}
diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh
new file mode 100755
index 000000000..9b7635184
--- /dev/null
+++ b/tools/perf/tests/shell/probe_vfs_getname.sh
@@ -0,0 +1,14 @@
+# Add vfs_getname probe to get syscall args filenames
+#
+# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
+
+. $(dirname $0)/lib/probe.sh
+
+skip_if_no_perf_probe || exit 2
+
+. $(dirname $0)/lib/probe_vfs_getname.sh
+
+add_probe_vfs_getname || skip_if_no_debuginfo
+err=$?
+cleanup_probe_vfs_getname
+exit $err
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
new file mode 100755
index 000000000..f5837f28f
--- /dev/null
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -0,0 +1,94 @@
+# probe libc's inet_pton & backtrace it with ping
+
+# Installs a probe on libc's inet_pton function, that will use uprobes,
+# then use 'perf trace' on a ping to localhost asking for just one packet
+# with the a backtrace 3 levels deep, check that it is what we expect.
+# This needs no debuginfo package, all is done using the libc ELF symtab
+# and the CFI info in the binaries.
+
+# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
+
+. $(dirname $0)/lib/probe.sh
+
+libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g')
+nm -Dg $libc 2>/dev/null | fgrep -q inet_pton || exit 254
+
+event_pattern='probe_libc:inet_pton(\_[[:digit:]]+)?'
+
+add_libc_inet_pton_event() {
+
+ event_name=$(perf probe -f -x $libc -a inet_pton 2>&1 | tail -n +2 | head -n -5 | \
+ grep -P -o "$event_pattern(?=[[:space:]]\(on inet_pton in $libc\))")
+
+ if [ $? -ne 0 -o -z "$event_name" ] ; then
+ printf "FAIL: could not add event\n"
+ return 1
+ fi
+}
+
+trace_libc_inet_pton_backtrace() {
+
+ expected=`mktemp -u /tmp/expected.XXX`
+
+ echo "ping[][0-9 \.:]+$event_name: \([[:xdigit:]]+\)" > $expected
+ echo ".*inet_pton\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
+ case "$(uname -m)" in
+ s390x)
+ eventattr='call-graph=dwarf,max-stack=4'
+ echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
+ echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected
+ echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected
+ ;;
+ ppc64|ppc64le)
+ eventattr='max-stack=4'
+ echo "gaih_inet.*\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
+ echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
+ echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected
+ ;;
+ *)
+ eventattr='max-stack=3'
+ echo "getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc\)$" >> $expected
+ echo ".*(\+0x[[:xdigit:]]+|\[unknown\])[[:space:]]\(.*/bin/ping.*\)$" >> $expected
+ ;;
+ esac
+
+ perf_data=`mktemp -u /tmp/perf.data.XXX`
+ perf_script=`mktemp -u /tmp/perf.script.XXX`
+ perf record -e $event_name/$eventattr/ -o $perf_data ping -6 -c 1 ::1 > /dev/null 2>&1
+ perf script -i $perf_data > $perf_script
+
+ exec 3<$perf_script
+ exec 4<$expected
+ while read line <&3 && read -r pattern <&4; do
+ [ -z "$pattern" ] && break
+ echo $line
+ echo "$line" | egrep -q "$pattern"
+ if [ $? -ne 0 ] ; then
+ printf "FAIL: expected backtrace entry \"%s\" got \"%s\"\n" "$pattern" "$line"
+ return 1
+ fi
+ done
+
+ # If any statements are executed from this point onwards,
+ # the exit code of the last among these will be reflected
+ # in err below. If the exit code is 0, the test will pass
+ # even if the perf script output does not match.
+}
+
+delete_libc_inet_pton_event() {
+
+ if [ -n "$event_name" ] ; then
+ perf probe -q -d $event_name
+ fi
+}
+
+# Check for IPv6 interface existence
+ip a sh lo | fgrep -q inet6 || exit 2
+
+skip_if_no_perf_probe && \
+add_libc_inet_pton_event && \
+trace_libc_inet_pton_backtrace
+err=$?
+rm -f ${perf_data} ${perf_script} ${expected}
+delete_libc_inet_pton_event
+exit $err
diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
new file mode 100755
index 000000000..ba29535b8
--- /dev/null
+++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
@@ -0,0 +1,41 @@
+# Use vfs_getname probe to get syscall args filenames
+
+# Uses the 'perf test shell' library to add probe:vfs_getname to the system
+# then use it with 'perf record' using 'touch' to write to a temp file, then
+# checks that that was captured by the vfs_getname probe in the generated
+# perf.data file, with the temp file name as the pathname argument.
+
+# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
+
+. $(dirname $0)/lib/probe.sh
+
+skip_if_no_perf_probe || exit 2
+
+. $(dirname $0)/lib/probe_vfs_getname.sh
+
+perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+file=$(mktemp /tmp/temporary_file.XXXXX)
+
+record_open_file() {
+ echo "Recording open file:"
+ perf record -o ${perfdata} -e probe:vfs_getname touch $file
+}
+
+perf_script_filenames() {
+ echo "Looking at perf.data file for vfs_getname records for the file we touched:"
+ perf script -i ${perfdata} | \
+ egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname: +\([[:xdigit:]]+\) +pathname=\"${file}\""
+}
+
+add_probe_vfs_getname || skip_if_no_debuginfo
+err=$?
+if [ $err -ne 0 ] ; then
+ exit $err
+fi
+
+record_open_file && perf_script_filenames
+err=$?
+rm -f ${perfdata}
+rm -f ${file}
+cleanup_probe_vfs_getname
+exit $err
diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
new file mode 100755
index 000000000..fe223fc5c
--- /dev/null
+++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
@@ -0,0 +1,40 @@
+# Check open filename arg using perf trace + vfs_getname
+
+# Uses the 'perf test shell' library to add probe:vfs_getname to the system
+# then use it with 'perf trace' using 'touch' to write to a temp file, then
+# checks that that was captured by the vfs_getname was used by 'perf trace',
+# that already handles "probe:vfs_getname" if present, and used in the
+# "open" syscall "filename" argument beautifier.
+
+# Arnaldo Carvalho de Melo <acme@kernel.org>, 2017
+
+. $(dirname $0)/lib/probe.sh
+
+skip_if_no_perf_probe || exit 2
+
+. $(dirname $0)/lib/probe_vfs_getname.sh
+
+file=$(mktemp /tmp/temporary_file.XXXXX)
+
+trace_open_vfs_getname() {
+ evts=$(echo $(perf list syscalls:sys_enter_open* 2>&1 | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
+ perf trace -e $evts touch $file 2>&1 | \
+ egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
+}
+
+
+add_probe_vfs_getname || skip_if_no_debuginfo
+err=$?
+if [ $err -ne 0 ] ; then
+ exit $err
+fi
+
+# Do not use whatever ~/.perfconfig file, it may change the output
+# via trace.{show_timestamp,show_prefix,etc}
+export PERF_CONFIG=/dev/null
+
+trace_open_vfs_getname
+err=$?
+rm -f ${file}
+cleanup_probe_vfs_getname
+exit $err
diff --git a/tools/perf/tests/stat.c b/tools/perf/tests/stat.c
new file mode 100644
index 000000000..942500246
--- /dev/null
+++ b/tools/perf/tests/stat.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include "event.h"
+#include "tests.h"
+#include "stat.h"
+#include "counts.h"
+#include "debug.h"
+
+static bool has_term(struct stat_config_event *config,
+ u64 tag, u64 val)
+{
+ unsigned i;
+
+ for (i = 0; i < config->nr; i++) {
+ if ((config->data[i].tag == tag) &&
+ (config->data[i].val == val))
+ return true;
+ }
+
+ return false;
+}
+
+static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct stat_config_event *config = &event->stat_config;
+ struct perf_stat_config stat_config;
+
+#define HAS(term, val) \
+ has_term(config, PERF_STAT_CONFIG_TERM__##term, val)
+
+ TEST_ASSERT_VAL("wrong nr", config->nr == PERF_STAT_CONFIG_TERM__MAX);
+ TEST_ASSERT_VAL("wrong aggr_mode", HAS(AGGR_MODE, AGGR_CORE));
+ TEST_ASSERT_VAL("wrong scale", HAS(SCALE, 1));
+ TEST_ASSERT_VAL("wrong interval", HAS(INTERVAL, 1));
+
+#undef HAS
+
+ perf_event__read_stat_config(&stat_config, config);
+
+ TEST_ASSERT_VAL("wrong aggr_mode", stat_config.aggr_mode == AGGR_CORE);
+ TEST_ASSERT_VAL("wrong scale", stat_config.scale == 1);
+ TEST_ASSERT_VAL("wrong interval", stat_config.interval == 1);
+ return 0;
+}
+
+int test__synthesize_stat_config(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct perf_stat_config stat_config = {
+ .aggr_mode = AGGR_CORE,
+ .scale = 1,
+ .interval = 1,
+ };
+
+ TEST_ASSERT_VAL("failed to synthesize stat_config",
+ !perf_event__synthesize_stat_config(NULL, &stat_config, process_stat_config_event, NULL));
+
+ return 0;
+}
+
+static int process_stat_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct stat_event *st = &event->stat;
+
+ TEST_ASSERT_VAL("wrong cpu", st->cpu == 1);
+ TEST_ASSERT_VAL("wrong thread", st->thread == 2);
+ TEST_ASSERT_VAL("wrong id", st->id == 3);
+ TEST_ASSERT_VAL("wrong val", st->val == 100);
+ TEST_ASSERT_VAL("wrong run", st->ena == 200);
+ TEST_ASSERT_VAL("wrong ena", st->run == 300);
+ return 0;
+}
+
+int test__synthesize_stat(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct perf_counts_values count;
+
+ count.val = 100;
+ count.ena = 200;
+ count.run = 300;
+
+ TEST_ASSERT_VAL("failed to synthesize stat_config",
+ !perf_event__synthesize_stat(NULL, 1, 2, 3, &count, process_stat_event, NULL));
+
+ return 0;
+}
+
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct stat_round_event *stat_round = &event->stat_round;
+
+ TEST_ASSERT_VAL("wrong time", stat_round->time == 0xdeadbeef);
+ TEST_ASSERT_VAL("wrong type", stat_round->type == PERF_STAT_ROUND_TYPE__INTERVAL);
+ return 0;
+}
+
+int test__synthesize_stat_round(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ TEST_ASSERT_VAL("failed to synthesize stat_config",
+ !perf_event__synthesize_stat_round(NULL, 0xdeadbeef, PERF_STAT_ROUND_TYPE__INTERVAL,
+ process_stat_round_event, NULL));
+
+ return 0;
+}
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
new file mode 100644
index 000000000..f9490b237
--- /dev/null
+++ b/tools/perf/tests/sw-clock.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/mman.h>
+
+#include "tests.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+
+#define NR_LOOPS 10000000
+
+/*
+ * This test will open software clock events (cpu-clock, task-clock)
+ * then check their frequency -> period conversion has no artifact of
+ * setting period to 1 forcefully.
+ */
+static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
+{
+ int i, err = -1;
+ volatile int tmp = 0;
+ u64 total_periods = 0;
+ int nr_samples = 0;
+ char sbuf[STRERR_BUFSIZE];
+ union perf_event *event;
+ struct perf_evsel *evsel;
+ struct perf_evlist *evlist;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = clock_id,
+ .sample_type = PERF_SAMPLE_PERIOD,
+ .exclude_kernel = 1,
+ .disabled = 1,
+ .freq = 1,
+ };
+ struct cpu_map *cpus;
+ struct thread_map *threads;
+ struct perf_mmap *md;
+
+ attr.sample_freq = 500;
+
+ evlist = perf_evlist__new();
+ if (evlist == NULL) {
+ pr_debug("perf_evlist__new\n");
+ return -1;
+ }
+
+ evsel = perf_evsel__new(&attr);
+ if (evsel == NULL) {
+ pr_debug("perf_evsel__new\n");
+ goto out_delete_evlist;
+ }
+ perf_evlist__add(evlist, evsel);
+
+ cpus = cpu_map__dummy_new();
+ threads = thread_map__new_by_tid(getpid());
+ if (!cpus || !threads) {
+ err = -ENOMEM;
+ pr_debug("Not enough memory to create thread/cpu maps\n");
+ goto out_free_maps;
+ }
+
+ perf_evlist__set_maps(evlist, cpus, threads);
+
+ cpus = NULL;
+ threads = NULL;
+
+ if (perf_evlist__open(evlist)) {
+ const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate";
+
+ err = -errno;
+ pr_debug("Couldn't open evlist: %s\nHint: check %s, using %" PRIu64 " in this test.\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)),
+ knob, (u64)attr.sample_freq);
+ goto out_delete_evlist;
+ }
+
+ err = perf_evlist__mmap(evlist, 128);
+ if (err < 0) {
+ pr_debug("failed to mmap event: %d (%s)\n", errno,
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ perf_evlist__enable(evlist);
+
+ /* collect samples */
+ for (i = 0; i < NR_LOOPS; i++)
+ tmp++;
+
+ perf_evlist__disable(evlist);
+
+ md = &evlist->mmap[0];
+ if (perf_mmap__read_init(md) < 0)
+ goto out_init;
+
+ while ((event = perf_mmap__read_event(md)) != NULL) {
+ struct perf_sample sample;
+
+ if (event->header.type != PERF_RECORD_SAMPLE)
+ goto next_event;
+
+ err = perf_evlist__parse_sample(evlist, event, &sample);
+ if (err < 0) {
+ pr_debug("Error during parse sample\n");
+ goto out_delete_evlist;
+ }
+
+ total_periods += sample.period;
+ nr_samples++;
+next_event:
+ perf_mmap__consume(md);
+ }
+ perf_mmap__read_done(md);
+
+out_init:
+ if ((u64) nr_samples == total_periods) {
+ pr_debug("All (%d) samples have period value of 1!\n",
+ nr_samples);
+ err = -1;
+ }
+
+out_free_maps:
+ cpu_map__put(cpus);
+ thread_map__put(threads);
+out_delete_evlist:
+ perf_evlist__delete(evlist);
+ return err;
+}
+
+int test__sw_clock_freq(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int ret;
+
+ ret = __test__sw_clock_freq(PERF_COUNT_SW_CPU_CLOCK);
+ if (!ret)
+ ret = __test__sw_clock_freq(PERF_COUNT_SW_TASK_CLOCK);
+
+ return ret;
+}
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
new file mode 100644
index 000000000..9b5be51e5
--- /dev/null
+++ b/tools/perf/tests/switch-tracking.c
@@ -0,0 +1,580 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/time.h>
+#include <sys/prctl.h>
+#include <errno.h>
+#include <time.h>
+#include <stdlib.h>
+
+#include "parse-events.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "tests.h"
+
+static int spin_sleep(void)
+{
+ struct timeval start, now, diff, maxtime;
+ struct timespec ts;
+ int err, i;
+
+ maxtime.tv_sec = 0;
+ maxtime.tv_usec = 50000;
+
+ err = gettimeofday(&start, NULL);
+ if (err)
+ return err;
+
+ /* Spin for 50ms */
+ while (1) {
+ for (i = 0; i < 1000; i++)
+ barrier();
+
+ err = gettimeofday(&now, NULL);
+ if (err)
+ return err;
+
+ timersub(&now, &start, &diff);
+ if (timercmp(&diff, &maxtime, > /* For checkpatch */))
+ break;
+ }
+
+ ts.tv_nsec = 50 * 1000 * 1000;
+ ts.tv_sec = 0;
+
+ /* Sleep for 50ms */
+ err = nanosleep(&ts, NULL);
+ if (err == EINTR)
+ err = 0;
+
+ return err;
+}
+
+struct switch_tracking {
+ struct perf_evsel *switch_evsel;
+ struct perf_evsel *cycles_evsel;
+ pid_t *tids;
+ int nr_tids;
+ int comm_seen[4];
+ int cycles_before_comm_1;
+ int cycles_between_comm_2_and_comm_3;
+ int cycles_after_comm_4;
+};
+
+static int check_comm(struct switch_tracking *switch_tracking,
+ union perf_event *event, const char *comm, int nr)
+{
+ if (event->header.type == PERF_RECORD_COMM &&
+ (pid_t)event->comm.pid == getpid() &&
+ (pid_t)event->comm.tid == getpid() &&
+ strcmp(event->comm.comm, comm) == 0) {
+ if (switch_tracking->comm_seen[nr]) {
+ pr_debug("Duplicate comm event\n");
+ return -1;
+ }
+ switch_tracking->comm_seen[nr] = 1;
+ pr_debug3("comm event: %s nr: %d\n", event->comm.comm, nr);
+ return 1;
+ }
+ return 0;
+}
+
+static int check_cpu(struct switch_tracking *switch_tracking, int cpu)
+{
+ int i, nr = cpu + 1;
+
+ if (cpu < 0)
+ return -1;
+
+ if (!switch_tracking->tids) {
+ switch_tracking->tids = calloc(nr, sizeof(pid_t));
+ if (!switch_tracking->tids)
+ return -1;
+ for (i = 0; i < nr; i++)
+ switch_tracking->tids[i] = -1;
+ switch_tracking->nr_tids = nr;
+ return 0;
+ }
+
+ if (cpu >= switch_tracking->nr_tids) {
+ void *addr;
+
+ addr = realloc(switch_tracking->tids, nr * sizeof(pid_t));
+ if (!addr)
+ return -1;
+ switch_tracking->tids = addr;
+ for (i = switch_tracking->nr_tids; i < nr; i++)
+ switch_tracking->tids[i] = -1;
+ switch_tracking->nr_tids = nr;
+ return 0;
+ }
+
+ return 0;
+}
+
+static int process_sample_event(struct perf_evlist *evlist,
+ union perf_event *event,
+ struct switch_tracking *switch_tracking)
+{
+ struct perf_sample sample;
+ struct perf_evsel *evsel;
+ pid_t next_tid, prev_tid;
+ int cpu, err;
+
+ if (perf_evlist__parse_sample(evlist, event, &sample)) {
+ pr_debug("perf_evlist__parse_sample failed\n");
+ return -1;
+ }
+
+ evsel = perf_evlist__id2evsel(evlist, sample.id);
+ if (evsel == switch_tracking->switch_evsel) {
+ next_tid = perf_evsel__intval(evsel, &sample, "next_pid");
+ prev_tid = perf_evsel__intval(evsel, &sample, "prev_pid");
+ cpu = sample.cpu;
+ pr_debug3("sched_switch: cpu: %d prev_tid %d next_tid %d\n",
+ cpu, prev_tid, next_tid);
+ err = check_cpu(switch_tracking, cpu);
+ if (err)
+ return err;
+ /*
+ * Check for no missing sched_switch events i.e. that the
+ * evsel->system_wide flag has worked.
+ */
+ if (switch_tracking->tids[cpu] != -1 &&
+ switch_tracking->tids[cpu] != prev_tid) {
+ pr_debug("Missing sched_switch events\n");
+ return -1;
+ }
+ switch_tracking->tids[cpu] = next_tid;
+ }
+
+ if (evsel == switch_tracking->cycles_evsel) {
+ pr_debug3("cycles event\n");
+ if (!switch_tracking->comm_seen[0])
+ switch_tracking->cycles_before_comm_1 = 1;
+ if (switch_tracking->comm_seen[1] &&
+ !switch_tracking->comm_seen[2])
+ switch_tracking->cycles_between_comm_2_and_comm_3 = 1;
+ if (switch_tracking->comm_seen[3])
+ switch_tracking->cycles_after_comm_4 = 1;
+ }
+
+ return 0;
+}
+
+static int process_event(struct perf_evlist *evlist, union perf_event *event,
+ struct switch_tracking *switch_tracking)
+{
+ if (event->header.type == PERF_RECORD_SAMPLE)
+ return process_sample_event(evlist, event, switch_tracking);
+
+ if (event->header.type == PERF_RECORD_COMM) {
+ int err, done = 0;
+
+ err = check_comm(switch_tracking, event, "Test COMM 1", 0);
+ if (err < 0)
+ return -1;
+ done += err;
+ err = check_comm(switch_tracking, event, "Test COMM 2", 1);
+ if (err < 0)
+ return -1;
+ done += err;
+ err = check_comm(switch_tracking, event, "Test COMM 3", 2);
+ if (err < 0)
+ return -1;
+ done += err;
+ err = check_comm(switch_tracking, event, "Test COMM 4", 3);
+ if (err < 0)
+ return -1;
+ done += err;
+ if (done != 1) {
+ pr_debug("Unexpected comm event\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+struct event_node {
+ struct list_head list;
+ union perf_event *event;
+ u64 event_time;
+};
+
+static int add_event(struct perf_evlist *evlist, struct list_head *events,
+ union perf_event *event)
+{
+ struct perf_sample sample;
+ struct event_node *node;
+
+ node = malloc(sizeof(struct event_node));
+ if (!node) {
+ pr_debug("malloc failed\n");
+ return -1;
+ }
+ node->event = event;
+ list_add(&node->list, events);
+
+ if (perf_evlist__parse_sample(evlist, event, &sample)) {
+ pr_debug("perf_evlist__parse_sample failed\n");
+ return -1;
+ }
+
+ if (!sample.time) {
+ pr_debug("event with no time\n");
+ return -1;
+ }
+
+ node->event_time = sample.time;
+
+ return 0;
+}
+
+static void free_event_nodes(struct list_head *events)
+{
+ struct event_node *node;
+
+ while (!list_empty(events)) {
+ node = list_entry(events->next, struct event_node, list);
+ list_del(&node->list);
+ free(node);
+ }
+}
+
+static int compar(const void *a, const void *b)
+{
+ const struct event_node *nodea = a;
+ const struct event_node *nodeb = b;
+ s64 cmp = nodea->event_time - nodeb->event_time;
+
+ return cmp;
+}
+
+static int process_events(struct perf_evlist *evlist,
+ struct switch_tracking *switch_tracking)
+{
+ union perf_event *event;
+ unsigned pos, cnt = 0;
+ LIST_HEAD(events);
+ struct event_node *events_array, *node;
+ struct perf_mmap *md;
+ int i, ret;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ md = &evlist->mmap[i];
+ if (perf_mmap__read_init(md) < 0)
+ continue;
+
+ while ((event = perf_mmap__read_event(md)) != NULL) {
+ cnt += 1;
+ ret = add_event(evlist, &events, event);
+ perf_mmap__consume(md);
+ if (ret < 0)
+ goto out_free_nodes;
+ }
+ perf_mmap__read_done(md);
+ }
+
+ events_array = calloc(cnt, sizeof(struct event_node));
+ if (!events_array) {
+ pr_debug("calloc failed\n");
+ ret = -1;
+ goto out_free_nodes;
+ }
+
+ pos = 0;
+ list_for_each_entry(node, &events, list)
+ events_array[pos++] = *node;
+
+ qsort(events_array, cnt, sizeof(struct event_node), compar);
+
+ for (pos = 0; pos < cnt; pos++) {
+ ret = process_event(evlist, events_array[pos].event,
+ switch_tracking);
+ if (ret < 0)
+ goto out_free;
+ }
+
+ ret = 0;
+out_free:
+ pr_debug("%u events recorded\n", cnt);
+ free(events_array);
+out_free_nodes:
+ free_event_nodes(&events);
+ return ret;
+}
+
+/**
+ * test__switch_tracking - test using sched_switch and tracking events.
+ *
+ * This function implements a test that checks that sched_switch events and
+ * tracking events can be recorded for a workload (current process) using the
+ * evsel->system_wide and evsel->tracking flags (respectively) with other events
+ * sometimes enabled or disabled.
+ */
+int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ const char *sched_switch = "sched:sched_switch";
+ struct switch_tracking switch_tracking = { .tids = NULL, };
+ struct record_opts opts = {
+ .mmap_pages = UINT_MAX,
+ .user_freq = UINT_MAX,
+ .user_interval = ULLONG_MAX,
+ .freq = 4000,
+ .target = {
+ .uses_mmap = true,
+ },
+ };
+ struct thread_map *threads = NULL;
+ struct cpu_map *cpus = NULL;
+ struct perf_evlist *evlist = NULL;
+ struct perf_evsel *evsel, *cpu_clocks_evsel, *cycles_evsel;
+ struct perf_evsel *switch_evsel, *tracking_evsel;
+ const char *comm;
+ int err = -1;
+
+ threads = thread_map__new(-1, getpid(), UINT_MAX);
+ if (!threads) {
+ pr_debug("thread_map__new failed!\n");
+ goto out_err;
+ }
+
+ cpus = cpu_map__new(NULL);
+ if (!cpus) {
+ pr_debug("cpu_map__new failed!\n");
+ goto out_err;
+ }
+
+ evlist = perf_evlist__new();
+ if (!evlist) {
+ pr_debug("perf_evlist__new failed!\n");
+ goto out_err;
+ }
+
+ perf_evlist__set_maps(evlist, cpus, threads);
+
+ /* First event */
+ err = parse_events(evlist, "cpu-clock:u", NULL);
+ if (err) {
+ pr_debug("Failed to parse event dummy:u\n");
+ goto out_err;
+ }
+
+ cpu_clocks_evsel = perf_evlist__last(evlist);
+
+ /* Second event */
+ err = parse_events(evlist, "cycles:u", NULL);
+ if (err) {
+ pr_debug("Failed to parse event cycles:u\n");
+ goto out_err;
+ }
+
+ cycles_evsel = perf_evlist__last(evlist);
+
+ /* Third event */
+ if (!perf_evlist__can_select_event(evlist, sched_switch)) {
+ pr_debug("No sched_switch\n");
+ err = 0;
+ goto out;
+ }
+
+ err = parse_events(evlist, sched_switch, NULL);
+ if (err) {
+ pr_debug("Failed to parse event %s\n", sched_switch);
+ goto out_err;
+ }
+
+ switch_evsel = perf_evlist__last(evlist);
+
+ perf_evsel__set_sample_bit(switch_evsel, CPU);
+ perf_evsel__set_sample_bit(switch_evsel, TIME);
+
+ switch_evsel->system_wide = true;
+ switch_evsel->no_aux_samples = true;
+ switch_evsel->immediate = true;
+
+ /* Test moving an event to the front */
+ if (cycles_evsel == perf_evlist__first(evlist)) {
+ pr_debug("cycles event already at front");
+ goto out_err;
+ }
+ perf_evlist__to_front(evlist, cycles_evsel);
+ if (cycles_evsel != perf_evlist__first(evlist)) {
+ pr_debug("Failed to move cycles event to front");
+ goto out_err;
+ }
+
+ perf_evsel__set_sample_bit(cycles_evsel, CPU);
+ perf_evsel__set_sample_bit(cycles_evsel, TIME);
+
+ /* Fourth event */
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err) {
+ pr_debug("Failed to parse event dummy:u\n");
+ goto out_err;
+ }
+
+ tracking_evsel = perf_evlist__last(evlist);
+
+ perf_evlist__set_tracking_event(evlist, tracking_evsel);
+
+ tracking_evsel->attr.freq = 0;
+ tracking_evsel->attr.sample_period = 1;
+
+ perf_evsel__set_sample_bit(tracking_evsel, TIME);
+
+ /* Config events */
+ perf_evlist__config(evlist, &opts, NULL);
+
+ /* Check moved event is still at the front */
+ if (cycles_evsel != perf_evlist__first(evlist)) {
+ pr_debug("Front event no longer at front");
+ goto out_err;
+ }
+
+ /* Check tracking event is tracking */
+ if (!tracking_evsel->attr.mmap || !tracking_evsel->attr.comm) {
+ pr_debug("Tracking event not tracking\n");
+ goto out_err;
+ }
+
+ /* Check non-tracking events are not tracking */
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel != tracking_evsel) {
+ if (evsel->attr.mmap || evsel->attr.comm) {
+ pr_debug("Non-tracking event is tracking\n");
+ goto out_err;
+ }
+ }
+ }
+
+ if (perf_evlist__open(evlist) < 0) {
+ pr_debug("Not supported\n");
+ err = 0;
+ goto out;
+ }
+
+ err = perf_evlist__mmap(evlist, UINT_MAX);
+ if (err) {
+ pr_debug("perf_evlist__mmap failed!\n");
+ goto out_err;
+ }
+
+ perf_evlist__enable(evlist);
+
+ err = perf_evsel__disable(cpu_clocks_evsel);
+ if (err) {
+ pr_debug("perf_evlist__disable_event failed!\n");
+ goto out_err;
+ }
+
+ err = spin_sleep();
+ if (err) {
+ pr_debug("spin_sleep failed!\n");
+ goto out_err;
+ }
+
+ comm = "Test COMM 1";
+ err = prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0);
+ if (err) {
+ pr_debug("PR_SET_NAME failed!\n");
+ goto out_err;
+ }
+
+ err = perf_evsel__disable(cycles_evsel);
+ if (err) {
+ pr_debug("perf_evlist__disable_event failed!\n");
+ goto out_err;
+ }
+
+ comm = "Test COMM 2";
+ err = prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0);
+ if (err) {
+ pr_debug("PR_SET_NAME failed!\n");
+ goto out_err;
+ }
+
+ err = spin_sleep();
+ if (err) {
+ pr_debug("spin_sleep failed!\n");
+ goto out_err;
+ }
+
+ comm = "Test COMM 3";
+ err = prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0);
+ if (err) {
+ pr_debug("PR_SET_NAME failed!\n");
+ goto out_err;
+ }
+
+ err = perf_evsel__enable(cycles_evsel);
+ if (err) {
+ pr_debug("perf_evlist__disable_event failed!\n");
+ goto out_err;
+ }
+
+ comm = "Test COMM 4";
+ err = prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0);
+ if (err) {
+ pr_debug("PR_SET_NAME failed!\n");
+ goto out_err;
+ }
+
+ err = spin_sleep();
+ if (err) {
+ pr_debug("spin_sleep failed!\n");
+ goto out_err;
+ }
+
+ perf_evlist__disable(evlist);
+
+ switch_tracking.switch_evsel = switch_evsel;
+ switch_tracking.cycles_evsel = cycles_evsel;
+
+ err = process_events(evlist, &switch_tracking);
+
+ zfree(&switch_tracking.tids);
+
+ if (err)
+ goto out_err;
+
+ /* Check all 4 comm events were seen i.e. that evsel->tracking works */
+ if (!switch_tracking.comm_seen[0] || !switch_tracking.comm_seen[1] ||
+ !switch_tracking.comm_seen[2] || !switch_tracking.comm_seen[3]) {
+ pr_debug("Missing comm events\n");
+ goto out_err;
+ }
+
+ /* Check cycles event got enabled */
+ if (!switch_tracking.cycles_before_comm_1) {
+ pr_debug("Missing cycles events\n");
+ goto out_err;
+ }
+
+ /* Check cycles event got disabled */
+ if (switch_tracking.cycles_between_comm_2_and_comm_3) {
+ pr_debug("cycles events even though event was disabled\n");
+ goto out_err;
+ }
+
+ /* Check cycles event got enabled again */
+ if (!switch_tracking.cycles_after_comm_4) {
+ pr_debug("Missing cycles events\n");
+ goto out_err;
+ }
+out:
+ if (evlist) {
+ perf_evlist__disable(evlist);
+ perf_evlist__delete(evlist);
+ } else {
+ cpu_map__put(cpus);
+ thread_map__put(threads);
+ }
+
+ return err;
+
+out_err:
+ err = -1;
+ goto out;
+}
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
new file mode 100644
index 000000000..788b0805d
--- /dev/null
+++ b/tools/perf/tests/task-exit.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "tests.h"
+
+#include <errno.h>
+#include <signal.h>
+
+static int exited;
+static int nr_exit;
+
+static void sig_handler(int sig __maybe_unused)
+{
+ exited = 1;
+}
+
+/*
+ * perf_evlist__prepare_workload will send a SIGUSR1 if the fork fails, since
+ * we asked by setting its exec_error to this handler.
+ */
+static void workload_exec_failed_signal(int signo __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *ucontext __maybe_unused)
+{
+ exited = 1;
+ nr_exit = -1;
+}
+
+/*
+ * This test will start a workload that does nothing then it checks
+ * if the number of exit event reported by the kernel is 1 or not
+ * in order to check the kernel returns correct number of event.
+ */
+int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err = -1;
+ union perf_event *event;
+ struct perf_evsel *evsel;
+ struct perf_evlist *evlist;
+ struct target target = {
+ .uid = UINT_MAX,
+ .uses_mmap = true,
+ };
+ const char *argv[] = { "true", NULL };
+ char sbuf[STRERR_BUFSIZE];
+ struct cpu_map *cpus;
+ struct thread_map *threads;
+ struct perf_mmap *md;
+
+ signal(SIGCHLD, sig_handler);
+
+ evlist = perf_evlist__new_default();
+ if (evlist == NULL) {
+ pr_debug("perf_evlist__new_default\n");
+ return -1;
+ }
+
+ /*
+ * Create maps of threads and cpus to monitor. In this case
+ * we start with all threads and cpus (-1, -1) but then in
+ * perf_evlist__prepare_workload we'll fill in the only thread
+ * we're monitoring, the one forked there.
+ */
+ cpus = cpu_map__dummy_new();
+ threads = thread_map__new_by_tid(-1);
+ if (!cpus || !threads) {
+ err = -ENOMEM;
+ pr_debug("Not enough memory to create thread/cpu maps\n");
+ goto out_free_maps;
+ }
+
+ perf_evlist__set_maps(evlist, cpus, threads);
+
+ cpus = NULL;
+ threads = NULL;
+
+ err = perf_evlist__prepare_workload(evlist, &target, argv, false,
+ workload_exec_failed_signal);
+ if (err < 0) {
+ pr_debug("Couldn't run the workload!\n");
+ goto out_delete_evlist;
+ }
+
+ evsel = perf_evlist__first(evlist);
+ evsel->attr.task = 1;
+#ifdef __s390x__
+ evsel->attr.sample_freq = 1000000;
+#else
+ evsel->attr.sample_freq = 1;
+#endif
+ evsel->attr.inherit = 0;
+ evsel->attr.watermark = 0;
+ evsel->attr.wakeup_events = 1;
+ evsel->attr.exclude_kernel = 1;
+
+ err = perf_evlist__open(evlist);
+ if (err < 0) {
+ pr_debug("Couldn't open the evlist: %s\n",
+ str_error_r(-err, sbuf, sizeof(sbuf)));
+ goto out_delete_evlist;
+ }
+
+ if (perf_evlist__mmap(evlist, 128) < 0) {
+ pr_debug("failed to mmap events: %d (%s)\n", errno,
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ err = -1;
+ goto out_delete_evlist;
+ }
+
+ perf_evlist__start_workload(evlist);
+
+retry:
+ md = &evlist->mmap[0];
+ if (perf_mmap__read_init(md) < 0)
+ goto out_init;
+
+ while ((event = perf_mmap__read_event(md)) != NULL) {
+ if (event->header.type == PERF_RECORD_EXIT)
+ nr_exit++;
+
+ perf_mmap__consume(md);
+ }
+ perf_mmap__read_done(md);
+
+out_init:
+ if (!exited || !nr_exit) {
+ perf_evlist__poll(evlist, -1);
+ goto retry;
+ }
+
+ if (nr_exit != 1) {
+ pr_debug("received %d EXIT records\n", nr_exit);
+ err = -1;
+ }
+
+out_free_maps:
+ cpu_map__put(cpus);
+ thread_map__put(threads);
+out_delete_evlist:
+ perf_evlist__delete(evlist);
+ return err;
+}
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
new file mode 100644
index 000000000..a9760e790
--- /dev/null
+++ b/tools/perf/tests/tests.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TESTS_H
+#define TESTS_H
+
+#include <stdbool.h>
+
+#define TEST_ASSERT_VAL(text, cond) \
+do { \
+ if (!(cond)) { \
+ pr_debug("FAILED %s:%d %s\n", __FILE__, __LINE__, text); \
+ return -1; \
+ } \
+} while (0)
+
+#define TEST_ASSERT_EQUAL(text, val, expected) \
+do { \
+ if (val != expected) { \
+ pr_debug("FAILED %s:%d %s (%d != %d)\n", \
+ __FILE__, __LINE__, text, val, expected); \
+ return -1; \
+ } \
+} while (0)
+
+enum {
+ TEST_OK = 0,
+ TEST_FAIL = -1,
+ TEST_SKIP = -2,
+};
+
+struct test {
+ const char *desc;
+ int (*func)(struct test *test, int subtest);
+ struct {
+ bool skip_if_fail;
+ int (*get_nr)(void);
+ const char *(*get_desc)(int subtest);
+ } subtest;
+ bool (*is_supported)(void);
+ void *priv;
+};
+
+/* Tests */
+int test__vmlinux_matches_kallsyms(struct test *test, int subtest);
+int test__openat_syscall_event(struct test *test, int subtest);
+int test__openat_syscall_event_on_all_cpus(struct test *test, int subtest);
+int test__basic_mmap(struct test *test, int subtest);
+int test__PERF_RECORD(struct test *test, int subtest);
+int test__perf_evsel__roundtrip_name_test(struct test *test, int subtest);
+int test__perf_evsel__tp_sched_test(struct test *test, int subtest);
+int test__syscall_openat_tp_fields(struct test *test, int subtest);
+int test__pmu(struct test *test, int subtest);
+int test__attr(struct test *test, int subtest);
+int test__dso_data(struct test *test, int subtest);
+int test__dso_data_cache(struct test *test, int subtest);
+int test__dso_data_reopen(struct test *test, int subtest);
+int test__parse_events(struct test *test, int subtest);
+int test__hists_link(struct test *test, int subtest);
+int test__python_use(struct test *test, int subtest);
+int test__bp_signal(struct test *test, int subtest);
+int test__bp_signal_overflow(struct test *test, int subtest);
+int test__bp_accounting(struct test *test, int subtest);
+int test__task_exit(struct test *test, int subtest);
+int test__mem(struct test *test, int subtest);
+int test__sw_clock_freq(struct test *test, int subtest);
+int test__code_reading(struct test *test, int subtest);
+int test__sample_parsing(struct test *test, int subtest);
+int test__keep_tracking(struct test *test, int subtest);
+int test__parse_no_sample_id_all(struct test *test, int subtest);
+int test__dwarf_unwind(struct test *test, int subtest);
+int test__expr(struct test *test, int subtest);
+int test__hists_filter(struct test *test, int subtest);
+int test__mmap_thread_lookup(struct test *test, int subtest);
+int test__thread_mg_share(struct test *test, int subtest);
+int test__hists_output(struct test *test, int subtest);
+int test__hists_cumulate(struct test *test, int subtest);
+int test__switch_tracking(struct test *test, int subtest);
+int test__fdarray__filter(struct test *test, int subtest);
+int test__fdarray__add(struct test *test, int subtest);
+int test__kmod_path__parse(struct test *test, int subtest);
+int test__thread_map(struct test *test, int subtest);
+int test__llvm(struct test *test, int subtest);
+const char *test__llvm_subtest_get_desc(int subtest);
+int test__llvm_subtest_get_nr(void);
+int test__bpf(struct test *test, int subtest);
+const char *test__bpf_subtest_get_desc(int subtest);
+int test__bpf_subtest_get_nr(void);
+int test__session_topology(struct test *test, int subtest);
+int test__thread_map_synthesize(struct test *test, int subtest);
+int test__thread_map_remove(struct test *test, int subtest);
+int test__cpu_map_synthesize(struct test *test, int subtest);
+int test__synthesize_stat_config(struct test *test, int subtest);
+int test__synthesize_stat(struct test *test, int subtest);
+int test__synthesize_stat_round(struct test *test, int subtest);
+int test__event_update(struct test *test, int subtest);
+int test__event_times(struct test *test, int subtest);
+int test__backward_ring_buffer(struct test *test, int subtest);
+int test__cpu_map_print(struct test *test, int subtest);
+int test__sdt_event(struct test *test, int subtest);
+int test__is_printable_array(struct test *test, int subtest);
+int test__bitmap_print(struct test *test, int subtest);
+int test__perf_hooks(struct test *test, int subtest);
+int test__clang(struct test *test, int subtest);
+const char *test__clang_subtest_get_desc(int subtest);
+int test__clang_subtest_get_nr(void);
+int test__unit_number__scnprint(struct test *test, int subtest);
+int test__mem2node(struct test *t, int subtest);
+
+bool test__bp_signal_is_supported(void);
+
+#if defined(__arm__) || defined(__aarch64__)
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread);
+#endif
+#endif
+#endif /* TESTS_H */
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
new file mode 100644
index 000000000..4de1939b5
--- /dev/null
+++ b/tools/perf/tests/thread-map.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/prctl.h>
+#include "tests.h"
+#include "thread_map.h"
+#include "debug.h"
+
+#define NAME (const char *) "perf"
+#define NAMEUL (unsigned long) NAME
+
+int test__thread_map(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct thread_map *map;
+
+ TEST_ASSERT_VAL("failed to set process name",
+ !prctl(PR_SET_NAME, NAMEUL, 0, 0, 0));
+
+ /* test map on current pid */
+ map = thread_map__new_by_pid(getpid());
+ TEST_ASSERT_VAL("failed to alloc map", map);
+
+ thread_map__read_comms(map);
+
+ TEST_ASSERT_VAL("wrong nr", map->nr == 1);
+ TEST_ASSERT_VAL("wrong pid",
+ thread_map__pid(map, 0) == getpid());
+ TEST_ASSERT_VAL("wrong comm",
+ thread_map__comm(map, 0) &&
+ !strcmp(thread_map__comm(map, 0), NAME));
+ TEST_ASSERT_VAL("wrong refcnt",
+ refcount_read(&map->refcnt) == 1);
+ thread_map__put(map);
+
+ /* test dummy pid */
+ map = thread_map__new_dummy();
+ TEST_ASSERT_VAL("failed to alloc map", map);
+
+ thread_map__read_comms(map);
+
+ TEST_ASSERT_VAL("wrong nr", map->nr == 1);
+ TEST_ASSERT_VAL("wrong pid", thread_map__pid(map, 0) == -1);
+ TEST_ASSERT_VAL("wrong comm",
+ thread_map__comm(map, 0) &&
+ !strcmp(thread_map__comm(map, 0), "dummy"));
+ TEST_ASSERT_VAL("wrong refcnt",
+ refcount_read(&map->refcnt) == 1);
+ thread_map__put(map);
+ return 0;
+}
+
+static int process_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct thread_map_event *map = &event->thread_map;
+ struct thread_map *threads;
+
+ TEST_ASSERT_VAL("wrong nr", map->nr == 1);
+ TEST_ASSERT_VAL("wrong pid", map->entries[0].pid == (u64) getpid());
+ TEST_ASSERT_VAL("wrong comm", !strcmp(map->entries[0].comm, NAME));
+
+ threads = thread_map__new_event(&event->thread_map);
+ TEST_ASSERT_VAL("failed to alloc map", threads);
+
+ TEST_ASSERT_VAL("wrong nr", threads->nr == 1);
+ TEST_ASSERT_VAL("wrong pid",
+ thread_map__pid(threads, 0) == getpid());
+ TEST_ASSERT_VAL("wrong comm",
+ thread_map__comm(threads, 0) &&
+ !strcmp(thread_map__comm(threads, 0), NAME));
+ TEST_ASSERT_VAL("wrong refcnt",
+ refcount_read(&threads->refcnt) == 1);
+ thread_map__put(threads);
+ return 0;
+}
+
+int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct thread_map *threads;
+
+ TEST_ASSERT_VAL("failed to set process name",
+ !prctl(PR_SET_NAME, NAMEUL, 0, 0, 0));
+
+ /* test map on current pid */
+ threads = thread_map__new_by_pid(getpid());
+ TEST_ASSERT_VAL("failed to alloc map", threads);
+
+ thread_map__read_comms(threads);
+
+ TEST_ASSERT_VAL("failed to synthesize map",
+ !perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL));
+
+ return 0;
+}
+
+int test__thread_map_remove(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct thread_map *threads;
+ char *str;
+ int i;
+
+ TEST_ASSERT_VAL("failed to allocate map string",
+ asprintf(&str, "%d,%d", getpid(), getppid()) >= 0);
+
+ threads = thread_map__new_str(str, NULL, 0, false);
+
+ TEST_ASSERT_VAL("failed to allocate thread_map",
+ threads);
+
+ if (verbose > 0)
+ thread_map__fprintf(threads, stderr);
+
+ TEST_ASSERT_VAL("failed to remove thread",
+ !thread_map__remove(threads, 0));
+
+ TEST_ASSERT_VAL("thread_map count != 1", threads->nr == 1);
+
+ if (verbose > 0)
+ thread_map__fprintf(threads, stderr);
+
+ TEST_ASSERT_VAL("failed to remove thread",
+ !thread_map__remove(threads, 0));
+
+ TEST_ASSERT_VAL("thread_map count != 0", threads->nr == 0);
+
+ if (verbose > 0)
+ thread_map__fprintf(threads, stderr);
+
+ TEST_ASSERT_VAL("failed to not remove thread",
+ thread_map__remove(threads, 0));
+
+ for (i = 0; i < threads->nr; i++)
+ free(threads->map[i].comm);
+
+ free(threads);
+ return 0;
+}
diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c
new file mode 100644
index 000000000..b1d1bbafe
--- /dev/null
+++ b/tools/perf/tests/thread-mg-share.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "tests.h"
+#include "machine.h"
+#include "thread.h"
+#include "map.h"
+#include "debug.h"
+
+int test__thread_mg_share(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ struct machines machines;
+ struct machine *machine;
+
+ /* thread group */
+ struct thread *leader;
+ struct thread *t1, *t2, *t3;
+ struct map_groups *mg;
+
+ /* other process */
+ struct thread *other, *other_leader;
+ struct map_groups *other_mg;
+
+ /*
+ * This test create 2 processes abstractions (struct thread)
+ * with several threads and checks they properly share and
+ * maintain map groups info (struct map_groups).
+ *
+ * thread group (pid: 0, tids: 0, 1, 2, 3)
+ * other group (pid: 4, tids: 4, 5)
+ */
+
+ machines__init(&machines);
+ machine = &machines.host;
+
+ /* create process with 4 threads */
+ leader = machine__findnew_thread(machine, 0, 0);
+ t1 = machine__findnew_thread(machine, 0, 1);
+ t2 = machine__findnew_thread(machine, 0, 2);
+ t3 = machine__findnew_thread(machine, 0, 3);
+
+ /* and create 1 separated process, without thread leader */
+ other = machine__findnew_thread(machine, 4, 5);
+
+ TEST_ASSERT_VAL("failed to create threads",
+ leader && t1 && t2 && t3 && other);
+
+ mg = leader->mg;
+ TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 4);
+
+ /* test the map groups pointer is shared */
+ TEST_ASSERT_VAL("map groups don't match", mg == t1->mg);
+ TEST_ASSERT_VAL("map groups don't match", mg == t2->mg);
+ TEST_ASSERT_VAL("map groups don't match", mg == t3->mg);
+
+ /*
+ * Verify the other leader was created by previous call.
+ * It should have shared map groups with no change in
+ * refcnt.
+ */
+ other_leader = machine__find_thread(machine, 4, 4);
+ TEST_ASSERT_VAL("failed to find other leader", other_leader);
+
+ /*
+ * Ok, now that all the rbtree related operations were done,
+ * lets remove all of them from there so that we can do the
+ * refcounting tests.
+ */
+ machine__remove_thread(machine, leader);
+ machine__remove_thread(machine, t1);
+ machine__remove_thread(machine, t2);
+ machine__remove_thread(machine, t3);
+ machine__remove_thread(machine, other);
+ machine__remove_thread(machine, other_leader);
+
+ other_mg = other->mg;
+ TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_mg->refcnt), 2);
+
+ TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg);
+
+ /* release thread group */
+ thread__put(leader);
+ TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 3);
+
+ thread__put(t1);
+ TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 2);
+
+ thread__put(t2);
+ TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&mg->refcnt), 1);
+
+ thread__put(t3);
+
+ /* release other group */
+ thread__put(other_leader);
+ TEST_ASSERT_EQUAL("wrong refcnt", refcount_read(&other_mg->refcnt), 1);
+
+ thread__put(other);
+
+ machines__exit(&machines);
+ return 0;
+}
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
new file mode 100644
index 000000000..9497d02f6
--- /dev/null
+++ b/tools/perf/tests/topology.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "tests.h"
+#include "util.h"
+#include "session.h"
+#include "evlist.h"
+#include "debug.h"
+
+#define TEMPL "/tmp/perf-test-XXXXXX"
+#define DATA_SIZE 10
+
+static int get_temp(char *path)
+{
+ int fd;
+
+ strcpy(path, TEMPL);
+
+ fd = mkstemp(path);
+ if (fd < 0) {
+ perror("mkstemp failed");
+ return -1;
+ }
+
+ close(fd);
+ return 0;
+}
+
+static int session_write_header(char *path)
+{
+ struct perf_session *session;
+ struct perf_data data = {
+ .file = {
+ .path = path,
+ },
+ .mode = PERF_DATA_MODE_WRITE,
+ };
+
+ session = perf_session__new(&data, false, NULL);
+ TEST_ASSERT_VAL("can't get session", session);
+
+ session->evlist = perf_evlist__new_default();
+ TEST_ASSERT_VAL("can't get evlist", session->evlist);
+
+ perf_header__set_feat(&session->header, HEADER_CPU_TOPOLOGY);
+ perf_header__set_feat(&session->header, HEADER_NRCPUS);
+ perf_header__set_feat(&session->header, HEADER_ARCH);
+
+ session->header.data_size += DATA_SIZE;
+
+ TEST_ASSERT_VAL("failed to write header",
+ !perf_session__write_header(session, session->evlist, data.file.fd, true));
+
+ perf_session__delete(session);
+
+ return 0;
+}
+
+static int check_cpu_topology(char *path, struct cpu_map *map)
+{
+ struct perf_session *session;
+ struct perf_data data = {
+ .file = {
+ .path = path,
+ },
+ .mode = PERF_DATA_MODE_READ,
+ };
+ int i;
+
+ session = perf_session__new(&data, false, NULL);
+ TEST_ASSERT_VAL("can't get session", session);
+
+ /* On platforms with large numbers of CPUs process_cpu_topology()
+ * might issue an error while reading the perf.data file section
+ * HEADER_CPU_TOPOLOGY and the cpu_topology_map pointed to by member
+ * cpu is a NULL pointer.
+ * Example: On s390
+ * CPU 0 is on core_id 0 and physical_package_id 6
+ * CPU 1 is on core_id 1 and physical_package_id 3
+ *
+ * Core_id and physical_package_id are platform and architecture
+ * dependend and might have higher numbers than the CPU id.
+ * This actually depends on the configuration.
+ *
+ * In this case process_cpu_topology() prints error message:
+ * "socket_id number is too big. You may need to upgrade the
+ * perf tool."
+ *
+ * This is the reason why this test might be skipped.
+ */
+ if (!session->header.env.cpu)
+ return TEST_SKIP;
+
+ for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
+ if (!cpu_map__has(map, i))
+ continue;
+ pr_debug("CPU %d, core %d, socket %d\n", i,
+ session->header.env.cpu[i].core_id,
+ session->header.env.cpu[i].socket_id);
+ }
+
+ for (i = 0; i < map->nr; i++) {
+ TEST_ASSERT_VAL("Core ID doesn't match",
+ (session->header.env.cpu[map->map[i]].core_id == (cpu_map__get_core(map, i, NULL) & 0xffff)));
+
+ TEST_ASSERT_VAL("Socket ID doesn't match",
+ (session->header.env.cpu[map->map[i]].socket_id == cpu_map__get_socket(map, i, NULL)));
+ }
+
+ perf_session__delete(session);
+
+ return 0;
+}
+
+int test__session_topology(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ char path[PATH_MAX];
+ struct cpu_map *map;
+ int ret = TEST_FAIL;
+
+ TEST_ASSERT_VAL("can't get templ file", !get_temp(path));
+
+ pr_debug("templ file: %s\n", path);
+
+ if (session_write_header(path))
+ goto free_path;
+
+ map = cpu_map__new(NULL);
+ if (map == NULL) {
+ pr_debug("failed to get system cpumap\n");
+ goto free_path;
+ }
+
+ ret = check_cpu_topology(path, map);
+ cpu_map__put(map);
+
+free_path:
+ unlink(path);
+ return ret;
+}
diff --git a/tools/perf/tests/unit_number__scnprintf.c b/tools/perf/tests/unit_number__scnprintf.c
new file mode 100644
index 000000000..2bb8cb003
--- /dev/null
+++ b/tools/perf/tests/unit_number__scnprintf.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include "tests.h"
+#include "units.h"
+#include "debug.h"
+
+int test__unit_number__scnprint(struct test *t __maybe_unused, int subtest __maybe_unused)
+{
+ struct {
+ u64 n;
+ const char *str;
+ } test[] = {
+ { 1, "1B" },
+ { 10*1024, "10K" },
+ { 20*1024*1024, "20M" },
+ { 30*1024*1024*1024ULL, "30G" },
+ { 0, "0B" },
+ { 0, NULL },
+ };
+ unsigned i = 0;
+
+ while (test[i].str) {
+ char buf[100];
+
+ unit_number__scnprintf(buf, sizeof(buf), test[i].n);
+
+ pr_debug("n %" PRIu64 ", str '%s', buf '%s'\n",
+ test[i].n, test[i].str, buf);
+
+ if (strcmp(test[i].str, buf))
+ return TEST_FAIL;
+
+ i++;
+ }
+
+ return TEST_OK;
+}
diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
new file mode 100644
index 000000000..7691980b7
--- /dev/null
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/rbtree.h>
+#include <inttypes.h>
+#include <string.h>
+#include "map.h"
+#include "symbol.h"
+#include "util.h"
+#include "tests.h"
+#include "debug.h"
+#include "machine.h"
+
+#define UM(x) kallsyms_map->unmap_ip(kallsyms_map, (x))
+
+int test__vmlinux_matches_kallsyms(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int err = -1;
+ struct rb_node *nd;
+ struct symbol *sym;
+ struct map *kallsyms_map, *vmlinux_map, *map;
+ struct machine kallsyms, vmlinux;
+ struct maps *maps = machine__kernel_maps(&vmlinux);
+ u64 mem_start, mem_end;
+ bool header_printed;
+
+ /*
+ * Step 1:
+ *
+ * Init the machines that will hold kernel, modules obtained from
+ * both vmlinux + .ko files and from /proc/kallsyms split by modules.
+ */
+ machine__init(&kallsyms, "", HOST_KERNEL_ID);
+ machine__init(&vmlinux, "", HOST_KERNEL_ID);
+
+ /*
+ * Step 2:
+ *
+ * Create the kernel maps for kallsyms and the DSO where we will then
+ * load /proc/kallsyms. Also create the modules maps from /proc/modules
+ * and find the .ko files that match them in /lib/modules/`uname -r`/.
+ */
+ if (machine__create_kernel_maps(&kallsyms) < 0) {
+ pr_debug("machine__create_kernel_maps ");
+ goto out;
+ }
+
+ /*
+ * Step 3:
+ *
+ * Load and split /proc/kallsyms into multiple maps, one per module.
+ * Do not use kcore, as this test was designed before kcore support
+ * and has parts that only make sense if using the non-kcore code.
+ * XXX: extend it to stress the kcorre code as well, hint: the list
+ * of modules extracted from /proc/kcore, in its current form, can't
+ * be compacted against the list of modules found in the "vmlinux"
+ * code and with the one got from /proc/modules from the "kallsyms" code.
+ */
+ if (machine__load_kallsyms(&kallsyms, "/proc/kallsyms") <= 0) {
+ pr_debug("dso__load_kallsyms ");
+ goto out;
+ }
+
+ /*
+ * Step 4:
+ *
+ * kallsyms will be internally on demand sorted by name so that we can
+ * find the reference relocation * symbol, i.e. the symbol we will use
+ * to see if the running kernel was relocated by checking if it has the
+ * same value in the vmlinux file we load.
+ */
+ kallsyms_map = machine__kernel_map(&kallsyms);
+
+ /*
+ * Step 5:
+ *
+ * Now repeat step 2, this time for the vmlinux file we'll auto-locate.
+ */
+ if (machine__create_kernel_maps(&vmlinux) < 0) {
+ pr_debug("machine__create_kernel_maps ");
+ goto out;
+ }
+
+ vmlinux_map = machine__kernel_map(&vmlinux);
+
+ /*
+ * Step 6:
+ *
+ * Locate a vmlinux file in the vmlinux path that has a buildid that
+ * matches the one of the running kernel.
+ *
+ * While doing that look if we find the ref reloc symbol, if we find it
+ * we'll have its ref_reloc_symbol.unrelocated_addr and then
+ * maps__reloc_vmlinux will notice and set proper ->[un]map_ip routines
+ * to fixup the symbols.
+ */
+ if (machine__load_vmlinux_path(&vmlinux) <= 0) {
+ pr_debug("Couldn't find a vmlinux that matches the kernel running on this machine, skipping test\n");
+ err = TEST_SKIP;
+ goto out;
+ }
+
+ err = 0;
+ /*
+ * Step 7:
+ *
+ * Now look at the symbols in the vmlinux DSO and check if we find all of them
+ * in the kallsyms dso. For the ones that are in both, check its names and
+ * end addresses too.
+ */
+ map__for_each_symbol(vmlinux_map, sym, nd) {
+ struct symbol *pair, *first_pair;
+
+ sym = rb_entry(nd, struct symbol, rb_node);
+
+ if (sym->start == sym->end)
+ continue;
+
+ mem_start = vmlinux_map->unmap_ip(vmlinux_map, sym->start);
+ mem_end = vmlinux_map->unmap_ip(vmlinux_map, sym->end);
+
+ first_pair = machine__find_kernel_symbol(&kallsyms, mem_start, NULL);
+ pair = first_pair;
+
+ if (pair && UM(pair->start) == mem_start) {
+next_pair:
+ if (arch__compare_symbol_names(sym->name, pair->name) == 0) {
+ /*
+ * kallsyms don't have the symbol end, so we
+ * set that by using the next symbol start - 1,
+ * in some cases we get this up to a page
+ * wrong, trace_kmalloc when I was developing
+ * this code was one such example, 2106 bytes
+ * off the real size. More than that and we
+ * _really_ have a problem.
+ */
+ s64 skew = mem_end - UM(pair->end);
+ if (llabs(skew) >= page_size)
+ pr_debug("WARN: %#" PRIx64 ": diff end addr for %s v: %#" PRIx64 " k: %#" PRIx64 "\n",
+ mem_start, sym->name, mem_end,
+ UM(pair->end));
+
+ /*
+ * Do not count this as a failure, because we
+ * could really find a case where it's not
+ * possible to get proper function end from
+ * kallsyms.
+ */
+ continue;
+ } else {
+ pair = machine__find_kernel_symbol_by_name(&kallsyms, sym->name, NULL);
+ if (pair) {
+ if (UM(pair->start) == mem_start)
+ goto next_pair;
+
+ pr_debug("WARN: %#" PRIx64 ": diff name v: %s k: %s\n",
+ mem_start, sym->name, pair->name);
+ } else {
+ pr_debug("WARN: %#" PRIx64 ": diff name v: %s k: %s\n",
+ mem_start, sym->name, first_pair->name);
+ }
+
+ continue;
+ }
+ } else
+ pr_debug("ERR : %#" PRIx64 ": %s not on kallsyms\n",
+ mem_start, sym->name);
+
+ err = -1;
+ }
+
+ if (verbose <= 0)
+ goto out;
+
+ header_printed = false;
+
+ for (map = maps__first(maps); map; map = map__next(map)) {
+ struct map *
+ /*
+ * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while
+ * the kernel will have the path for the vmlinux file being used,
+ * so use the short name, less descriptive but the same ("[kernel]" in
+ * both cases.
+ */
+ pair = map_groups__find_by_name(&kallsyms.kmaps,
+ (map->dso->kernel ?
+ map->dso->short_name :
+ map->dso->name));
+ if (pair) {
+ pair->priv = 1;
+ } else {
+ if (!header_printed) {
+ pr_info("WARN: Maps only in vmlinux:\n");
+ header_printed = true;
+ }
+ map__fprintf(map, stderr);
+ }
+ }
+
+ header_printed = false;
+
+ for (map = maps__first(maps); map; map = map__next(map)) {
+ struct map *pair;
+
+ mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start);
+ mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end);
+
+ pair = map_groups__find(&kallsyms.kmaps, mem_start);
+ if (pair == NULL || pair->priv)
+ continue;
+
+ if (pair->start == mem_start) {
+ if (!header_printed) {
+ pr_info("WARN: Maps in vmlinux with a different name in kallsyms:\n");
+ header_printed = true;
+ }
+
+ pr_info("WARN: %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
+ map->start, map->end, map->pgoff, map->dso->name);
+ if (mem_end != pair->end)
+ pr_info(":\nWARN: *%" PRIx64 "-%" PRIx64 " %" PRIx64,
+ pair->start, pair->end, pair->pgoff);
+ pr_info(" %s\n", pair->dso->name);
+ pair->priv = 1;
+ }
+ }
+
+ header_printed = false;
+
+ maps = machine__kernel_maps(&kallsyms);
+
+ for (map = maps__first(maps); map; map = map__next(map)) {
+ if (!map->priv) {
+ if (!header_printed) {
+ pr_info("WARN: Maps only in kallsyms:\n");
+ header_printed = true;
+ }
+ map__fprintf(map, stderr);
+ }
+ }
+out:
+ machine__exit(&kallsyms);
+ machine__exit(&vmlinux);
+ return err;
+}
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
new file mode 100644
index 000000000..f528ba35e
--- /dev/null
+++ b/tools/perf/trace/beauty/Build
@@ -0,0 +1,11 @@
+libperf-y += clone.o
+libperf-y += fcntl.o
+libperf-y += flock.o
+ifeq ($(SRCARCH),$(filter $(SRCARCH),x86))
+libperf-y += ioctl.o
+endif
+libperf-y += kcmp.o
+libperf-y += pkey_alloc.o
+libperf-y += prctl.o
+libperf-y += socket.o
+libperf-y += statx.o
diff --git a/tools/perf/trace/beauty/arch_errno_names.c b/tools/perf/trace/beauty/arch_errno_names.c
new file mode 100644
index 000000000..ede031c3a
--- /dev/null
+++ b/tools/perf/trace/beauty/arch_errno_names.c
@@ -0,0 +1 @@
+#include "trace/beauty/generated/arch_errno_name_array.c"
diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh
new file mode 100755
index 000000000..f8c44a856
--- /dev/null
+++ b/tools/perf/trace/beauty/arch_errno_names.sh
@@ -0,0 +1,100 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generate C file mapping errno codes to errno names.
+#
+# Copyright IBM Corp. 2018
+# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+
+gcc="$1"
+toolsdir="$2"
+include_path="-I$toolsdir/include/uapi"
+
+arch_string()
+{
+ echo "$1" |sed -e 'y/- /__/' |tr '[[:upper:]]' '[[:lower:]]'
+}
+
+asm_errno_file()
+{
+ local arch="$1"
+ local header
+
+ header="$toolsdir/arch/$arch/include/uapi/asm/errno.h"
+ if test -r "$header"; then
+ echo "$header"
+ else
+ echo "$toolsdir/include/uapi/asm-generic/errno.h"
+ fi
+}
+
+create_errno_lookup_func()
+{
+ local arch=$(arch_string "$1")
+ local nr name
+
+ cat <<EoFuncBegin
+static const char *errno_to_name__$arch(int err)
+{
+ switch (err) {
+EoFuncBegin
+
+ while read name nr; do
+ printf '\tcase %d: return "%s";\n' $nr $name
+ done
+
+ cat <<EoFuncEnd
+ default:
+ return "(unknown)";
+ }
+}
+
+EoFuncEnd
+}
+
+process_arch()
+{
+ local arch="$1"
+ local asm_errno=$(asm_errno_file "$arch")
+
+ $gcc $include_path -E -dM -x c $asm_errno \
+ |grep -hE '^#define[[:blank:]]+(E[^[:blank:]]+)[[:blank:]]+([[:digit:]]+).*' \
+ |awk '{ print $2","$3; }' \
+ |sort -t, -k2 -nu \
+ |IFS=, create_errno_lookup_func "$arch"
+}
+
+create_arch_errno_table_func()
+{
+ local archlist="$1"
+ local default="$2"
+ local arch
+
+ printf 'const char *arch_syscalls__strerrno(const char *arch, int err)\n'
+ printf '{\n'
+ for arch in $archlist; do
+ printf '\tif (!strcmp(arch, "%s"))\n' $(arch_string "$arch")
+ printf '\t\treturn errno_to_name__%s(err);\n' $(arch_string "$arch")
+ done
+ printf '\treturn errno_to_name__%s(err);\n' $(arch_string "$default")
+ printf '}\n'
+}
+
+cat <<EoHEADER
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <string.h>
+
+EoHEADER
+
+# Create list of architectures and ignore those that do not appear
+# in tools/perf/arch
+archlist=""
+for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | grep -v x86 | sort); do
+ test -d $toolsdir/perf/arch/$arch && archlist="$archlist $arch"
+done
+
+for arch in x86 $archlist generic; do
+ process_arch "$arch"
+done
+create_arch_errno_table_func "x86 $archlist" "generic"
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
new file mode 100644
index 000000000..9615af5d4
--- /dev/null
+++ b/tools/perf/trace/beauty/beauty.h
@@ -0,0 +1,125 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_TRACE_BEAUTY_H
+#define _PERF_TRACE_BEAUTY_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <sys/types.h>
+
+struct strarray {
+ int offset;
+ int nr_entries;
+ const char **entries;
+};
+
+#define DEFINE_STRARRAY(array) struct strarray strarray__##array = { \
+ .nr_entries = ARRAY_SIZE(array), \
+ .entries = array, \
+}
+
+#define DEFINE_STRARRAY_OFFSET(array, off) struct strarray strarray__##array = { \
+ .offset = off, \
+ .nr_entries = ARRAY_SIZE(array), \
+ .entries = array, \
+}
+
+size_t strarray__scnprintf(struct strarray *sa, char *bf, size_t size, const char *intfmt, int val);
+
+struct trace;
+struct thread;
+
+size_t pid__scnprintf_fd(struct trace *trace, pid_t pid, int fd, char *bf, size_t size);
+
+/**
+ * @val: value of syscall argument being formatted
+ * @args: All the args, use syscall_args__val(arg, nth) to access one
+ * @thread: tid state (maps, pid, tid, etc)
+ * @trace: 'perf trace' internals: all threads, etc
+ * @parm: private area, may be an strarray, for instance
+ * @idx: syscall arg idx (is this the first?)
+ * @mask: a syscall arg may mask another arg, see syscall_arg__scnprintf_futex_op
+ */
+
+struct syscall_arg {
+ unsigned long val;
+ unsigned char *args;
+ struct thread *thread;
+ struct trace *trace;
+ void *parm;
+ u8 idx;
+ u8 mask;
+};
+
+unsigned long syscall_arg__val(struct syscall_arg *arg, u8 idx);
+
+size_t syscall_arg__scnprintf_strarrays(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_STRARRAYS syscall_arg__scnprintf_strarrays
+
+size_t syscall_arg__scnprintf_fd(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FD syscall_arg__scnprintf_fd
+
+size_t syscall_arg__scnprintf_hex(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_HEX syscall_arg__scnprintf_hex
+
+size_t syscall_arg__scnprintf_int(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_INT syscall_arg__scnprintf_int
+
+size_t syscall_arg__scnprintf_long(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_LONG syscall_arg__scnprintf_long
+
+size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_PID syscall_arg__scnprintf_pid
+
+size_t syscall_arg__scnprintf_clone_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_CLONE_FLAGS syscall_arg__scnprintf_clone_flags
+
+size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FCNTL_CMD syscall_arg__scnprintf_fcntl_cmd
+
+size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FCNTL_ARG syscall_arg__scnprintf_fcntl_arg
+
+size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_FLOCK syscall_arg__scnprintf_flock
+
+size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd
+
+size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_KCMP_TYPE syscall_arg__scnprintf_kcmp_type
+
+size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_KCMP_IDX syscall_arg__scnprintf_kcmp_idx
+
+size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_PKEY_ALLOC_ACCESS_RIGHTS syscall_arg__scnprintf_pkey_alloc_access_rights
+
+size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
+
+size_t syscall_arg__scnprintf_prctl_option(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_PRCTL_OPTION syscall_arg__scnprintf_prctl_option
+
+size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_PRCTL_ARG2 syscall_arg__scnprintf_prctl_arg2
+
+size_t syscall_arg__scnprintf_prctl_arg3(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_PRCTL_ARG3 syscall_arg__scnprintf_prctl_arg3
+
+size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_SK_PROTO syscall_arg__scnprintf_socket_protocol
+
+size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_STATX_FLAGS syscall_arg__scnprintf_statx_flags
+
+size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_STATX_MASK syscall_arg__scnprintf_statx_mask
+
+size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size);
+
+void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
+ size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg));
+
+const char *arch_syscalls__strerrno(const char *arch, int err);
+
+#endif /* _PERF_TRACE_BEAUTY_H */
diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c
new file mode 100644
index 000000000..d64d049ab
--- /dev/null
+++ b/tools/perf/trace/beauty/clone.c
@@ -0,0 +1,75 @@
+/*
+ * trace/beauty/cone.c
+ *
+ * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <sys/types.h>
+#include <uapi/linux/sched.h>
+
+static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+ int printed = 0;
+
+#define P_FLAG(n) \
+ if (flags & CLONE_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~CLONE_##n; \
+ }
+
+ P_FLAG(VM);
+ P_FLAG(FS);
+ P_FLAG(FILES);
+ P_FLAG(SIGHAND);
+ P_FLAG(PTRACE);
+ P_FLAG(VFORK);
+ P_FLAG(PARENT);
+ P_FLAG(THREAD);
+ P_FLAG(NEWNS);
+ P_FLAG(SYSVSEM);
+ P_FLAG(SETTLS);
+ P_FLAG(PARENT_SETTID);
+ P_FLAG(CHILD_CLEARTID);
+ P_FLAG(DETACHED);
+ P_FLAG(UNTRACED);
+ P_FLAG(CHILD_SETTID);
+ P_FLAG(NEWCGROUP);
+ P_FLAG(NEWUTS);
+ P_FLAG(NEWIPC);
+ P_FLAG(NEWUSER);
+ P_FLAG(NEWPID);
+ P_FLAG(NEWNET);
+ P_FLAG(IO);
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+size_t syscall_arg__scnprintf_clone_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long flags = arg->val;
+ enum syscall_clone_args {
+ SCC_FLAGS = (1 << 0),
+ SCC_CHILD_STACK = (1 << 1),
+ SCC_PARENT_TIDPTR = (1 << 2),
+ SCC_CHILD_TIDPTR = (1 << 3),
+ SCC_TLS = (1 << 4),
+ };
+ if (!(flags & CLONE_PARENT_SETTID))
+ arg->mask |= SCC_PARENT_TIDPTR;
+
+ if (!(flags & (CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)))
+ arg->mask |= SCC_CHILD_TIDPTR;
+
+ if (!(flags & CLONE_SETTLS))
+ arg->mask |= SCC_TLS;
+
+ return clone__scnprintf_flags(flags, bf, size);
+}
diff --git a/tools/perf/trace/beauty/drm_ioctl.sh b/tools/perf/trace/beauty/drm_ioctl.sh
new file mode 100755
index 000000000..9d3816815
--- /dev/null
+++ b/tools/perf/trace/beauty/drm_ioctl.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/drm/
+
+printf "#ifndef DRM_COMMAND_BASE\n"
+grep "#define DRM_COMMAND_BASE" $header_dir/drm.h
+printf "#endif\n"
+
+printf "static const char *drm_ioctl_cmds[] = {\n"
+grep "^#define DRM_IOCTL.*DRM_IO" $header_dir/drm.h | \
+ sed -r 's/^#define +DRM_IOCTL_([A-Z0-9_]+)[ ]+DRM_IO[A-Z]* *\( *(0x[[:xdigit:]]+),*.*/ [\2] = "\1",/g'
+grep "^#define DRM_I915_[A-Z_0-9]\+[ ]\+0x" $header_dir/i915_drm.h | \
+ sed -r 's/^#define +DRM_I915_([A-Z0-9_]+)[ ]+(0x[[:xdigit:]]+)/\t[DRM_COMMAND_BASE + \2] = "I915_\1",/g'
+printf "};\n"
diff --git a/tools/perf/trace/beauty/eventfd.c b/tools/perf/trace/beauty/eventfd.c
new file mode 100644
index 000000000..5d6a477a6
--- /dev/null
+++ b/tools/perf/trace/beauty/eventfd.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef EFD_SEMAPHORE
+#define EFD_SEMAPHORE 1
+#endif
+
+#ifndef EFD_NONBLOCK
+#define EFD_NONBLOCK 00004000
+#endif
+
+#ifndef EFD_CLOEXEC
+#define EFD_CLOEXEC 02000000
+#endif
+
+static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+ if (flags == 0)
+ return scnprintf(bf, size, "NONE");
+#define P_FLAG(n) \
+ if (flags & EFD_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~EFD_##n; \
+ }
+
+ P_FLAG(SEMAPHORE);
+ P_FLAG(CLOEXEC);
+ P_FLAG(NONBLOCK);
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_EFD_FLAGS syscall_arg__scnprintf_eventfd_flags
diff --git a/tools/perf/trace/beauty/fcntl.c b/tools/perf/trace/beauty/fcntl.c
new file mode 100644
index 000000000..9e8900c13
--- /dev/null
+++ b/tools/perf/trace/beauty/fcntl.c
@@ -0,0 +1,100 @@
+/*
+ * trace/beauty/fcntl.c
+ *
+ * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <uapi/linux/fcntl.h>
+
+static size_t fcntl__scnprintf_getfd(unsigned long val, char *bf, size_t size)
+{
+ return scnprintf(bf, size, "%s", val ? "CLOEXEC" : "0");
+}
+
+static size_t syscall_arg__scnprintf_fcntl_getfd(char *bf, size_t size, struct syscall_arg *arg)
+{
+ return fcntl__scnprintf_getfd(arg->val, bf, size);
+}
+
+static size_t fcntl__scnprintf_getlease(unsigned long val, char *bf, size_t size)
+{
+ static const char *fcntl_setlease[] = { "RDLCK", "WRLCK", "UNLCK", };
+ static DEFINE_STRARRAY(fcntl_setlease);
+
+ return strarray__scnprintf(&strarray__fcntl_setlease, bf, size, "%x", val);
+}
+
+static size_t syscall_arg__scnprintf_fcntl_getlease(char *bf, size_t size, struct syscall_arg *arg)
+{
+ return fcntl__scnprintf_getlease(arg->val, bf, size);
+}
+
+size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_arg *arg)
+{
+ if (arg->val == F_GETFL) {
+ syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_open_flags);
+ goto mask_arg;
+ }
+ if (arg->val == F_GETFD) {
+ syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_fcntl_getfd);
+ goto mask_arg;
+ }
+ if (arg->val == F_DUPFD_CLOEXEC || arg->val == F_DUPFD) {
+ syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_fd);
+ goto out;
+ }
+ if (arg->val == F_GETOWN) {
+ syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_pid);
+ goto mask_arg;
+ }
+ if (arg->val == F_GETLEASE) {
+ syscall_arg__set_ret_scnprintf(arg, syscall_arg__scnprintf_fcntl_getlease);
+ goto mask_arg;
+ }
+ /*
+ * Some commands ignore the third fcntl argument, "arg", so mask it
+ */
+ if (arg->val == F_GET_SEALS ||
+ arg->val == F_GETSIG) {
+mask_arg:
+ arg->mask |= (1 << 2);
+ }
+out:
+ return syscall_arg__scnprintf_strarrays(bf, size, arg);
+}
+
+size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int cmd = syscall_arg__val(arg, 1);
+
+ if (cmd == F_DUPFD)
+ return syscall_arg__scnprintf_fd(bf, size, arg);
+
+ if (cmd == F_SETFD)
+ return fcntl__scnprintf_getfd(arg->val, bf, size);
+
+ if (cmd == F_SETFL)
+ return open__scnprintf_flags(arg->val, bf, size);
+
+ if (cmd == F_SETOWN)
+ return syscall_arg__scnprintf_pid(bf, size, arg);
+
+ if (cmd == F_SETLEASE)
+ return fcntl__scnprintf_getlease(arg->val, bf, size);
+ /*
+ * We still don't grab the contents of pointers on entry or exit,
+ * so just print them as hex numbers
+ */
+ if (cmd == F_SETLK || cmd == F_SETLKW || cmd == F_GETLK ||
+ cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW || cmd == F_OFD_GETLK ||
+ cmd == F_GETOWN_EX || cmd == F_SETOWN_EX ||
+ cmd == F_GET_RW_HINT || cmd == F_SET_RW_HINT ||
+ cmd == F_GET_FILE_RW_HINT || cmd == F_SET_FILE_RW_HINT)
+ return syscall_arg__scnprintf_hex(bf, size, arg);
+
+ return syscall_arg__scnprintf_long(bf, size, arg);
+}
diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c
new file mode 100644
index 000000000..c4ff6ad30
--- /dev/null
+++ b/tools/perf/trace/beauty/flock.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <uapi/linux/fcntl.h>
+
+#ifndef LOCK_MAND
+#define LOCK_MAND 32
+#endif
+
+#ifndef LOCK_READ
+#define LOCK_READ 64
+#endif
+
+#ifndef LOCK_WRITE
+#define LOCK_WRITE 128
+#endif
+
+#ifndef LOCK_RW
+#define LOCK_RW 192
+#endif
+
+size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int printed = 0, op = arg->val;
+
+ if (op == 0)
+ return scnprintf(bf, size, "NONE");
+#define P_CMD(cmd) \
+ if ((op & LOCK_##cmd) == LOCK_##cmd) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #cmd); \
+ op &= ~LOCK_##cmd; \
+ }
+
+ P_CMD(SH);
+ P_CMD(EX);
+ P_CMD(NB);
+ P_CMD(UN);
+ P_CMD(MAND);
+ P_CMD(RW);
+ P_CMD(READ);
+ P_CMD(WRITE);
+#undef P_OP
+
+ if (op)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", op);
+
+ return printed;
+}
diff --git a/tools/perf/trace/beauty/futex_op.c b/tools/perf/trace/beauty/futex_op.c
new file mode 100644
index 000000000..61850fbc8
--- /dev/null
+++ b/tools/perf/trace/beauty/futex_op.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/futex.h>
+
+#ifndef FUTEX_WAIT_BITSET
+#define FUTEX_WAIT_BITSET 9
+#endif
+#ifndef FUTEX_WAKE_BITSET
+#define FUTEX_WAKE_BITSET 10
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI
+#define FUTEX_WAIT_REQUEUE_PI 11
+#endif
+#ifndef FUTEX_CMP_REQUEUE_PI
+#define FUTEX_CMP_REQUEUE_PI 12
+#endif
+#ifndef FUTEX_CLOCK_REALTIME
+#define FUTEX_CLOCK_REALTIME 256
+#endif
+
+static size_t syscall_arg__scnprintf_futex_op(char *bf, size_t size, struct syscall_arg *arg)
+{
+ enum syscall_futex_args {
+ SCF_UADDR = (1 << 0),
+ SCF_OP = (1 << 1),
+ SCF_VAL = (1 << 2),
+ SCF_TIMEOUT = (1 << 3),
+ SCF_UADDR2 = (1 << 4),
+ SCF_VAL3 = (1 << 5),
+ };
+ int op = arg->val;
+ int cmd = op & FUTEX_CMD_MASK;
+ size_t printed = 0;
+
+ switch (cmd) {
+#define P_FUTEX_OP(n) case FUTEX_##n: printed = scnprintf(bf, size, #n);
+ P_FUTEX_OP(WAIT); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
+ P_FUTEX_OP(WAKE); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+ P_FUTEX_OP(FD); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+ P_FUTEX_OP(REQUEUE); arg->mask |= SCF_VAL3|SCF_TIMEOUT; break;
+ P_FUTEX_OP(CMP_REQUEUE); arg->mask |= SCF_TIMEOUT; break;
+ P_FUTEX_OP(CMP_REQUEUE_PI); arg->mask |= SCF_TIMEOUT; break;
+ P_FUTEX_OP(WAKE_OP); break;
+ P_FUTEX_OP(LOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+ P_FUTEX_OP(UNLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2|SCF_TIMEOUT; break;
+ P_FUTEX_OP(TRYLOCK_PI); arg->mask |= SCF_VAL3|SCF_UADDR2; break;
+ P_FUTEX_OP(WAIT_BITSET); arg->mask |= SCF_UADDR2; break;
+ P_FUTEX_OP(WAKE_BITSET); arg->mask |= SCF_UADDR2; break;
+ P_FUTEX_OP(WAIT_REQUEUE_PI); break;
+ default: printed = scnprintf(bf, size, "%#x", cmd); break;
+ }
+
+ if (op & FUTEX_PRIVATE_FLAG)
+ printed += scnprintf(bf + printed, size - printed, "|PRIV");
+
+ if (op & FUTEX_CLOCK_REALTIME)
+ printed += scnprintf(bf + printed, size - printed, "|CLKRT");
+
+ return printed;
+}
+
+#define SCA_FUTEX_OP syscall_arg__scnprintf_futex_op
diff --git a/tools/perf/trace/beauty/futex_val3.c b/tools/perf/trace/beauty/futex_val3.c
new file mode 100644
index 000000000..26f6b3253
--- /dev/null
+++ b/tools/perf/trace/beauty/futex_val3.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/futex.h>
+
+#ifndef FUTEX_BITSET_MATCH_ANY
+#define FUTEX_BITSET_MATCH_ANY 0xffffffff
+#endif
+
+static size_t syscall_arg__scnprintf_futex_val3(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned int bitset = arg->val;
+
+ if (bitset == FUTEX_BITSET_MATCH_ANY)
+ return scnprintf(bf, size, "MATCH_ANY");
+
+ return scnprintf(bf, size, "%#xd", bitset);
+}
+
+#define SCA_FUTEX_VAL3 syscall_arg__scnprintf_futex_val3
diff --git a/tools/perf/trace/beauty/ioctl.c b/tools/perf/trace/beauty/ioctl.c
new file mode 100644
index 000000000..82346ca06
--- /dev/null
+++ b/tools/perf/trace/beauty/ioctl.c
@@ -0,0 +1,162 @@
+/*
+ * trace/beauty/ioctl.c
+ *
+ * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+
+/*
+ * FIXME: to support all arches we have to improve this, for
+ * now, to build on older systems without things like TIOCGEXCL,
+ * get it directly from our copy.
+ *
+ * Right now only x86 is being supported for beautifying ioctl args
+ * in 'perf trace', see tools/perf/trace/beauty/Build and builtin-trace.c
+ */
+#include <uapi/asm-generic/ioctls.h>
+
+static size_t ioctl__scnprintf_tty_cmd(int nr, int dir, char *bf, size_t size)
+{
+ static const char *ioctl_tty_cmd[] = {
+ [_IOC_NR(TCGETS)] = "TCGETS", "TCSETS", "TCSETSW", "TCSETSF", "TCGETA", "TCSETA", "TCSETAW",
+ "TCSETAF", "TCSBRK", "TCXONC", "TCFLSH", "TIOCEXCL", "TIOCNXCL", "TIOCSCTTY",
+ "TIOCGPGRP", "TIOCSPGRP", "TIOCOUTQ", "TIOCSTI", "TIOCGWINSZ", "TIOCSWINSZ",
+ "TIOCMGET", "TIOCMBIS", "TIOCMBIC", "TIOCMSET", "TIOCGSOFTCAR", "TIOCSSOFTCAR",
+ "FIONREAD", "TIOCLINUX", "TIOCCONS", "TIOCGSERIAL", "TIOCSSERIAL", "TIOCPKT",
+ "FIONBIO", "TIOCNOTTY", "TIOCSETD", "TIOCGETD", "TCSBRKP",
+ [_IOC_NR(TIOCSBRK)] = "TIOCSBRK", "TIOCCBRK", "TIOCGSID", "TCGETS2", "TCSETS2",
+ "TCSETSW2", "TCSETSF2", "TIOCGRS48", "TIOCSRS485", "TIOCGPTN", "TIOCSPTLCK",
+ "TIOCGDEV", "TCSETX", "TCSETXF", "TCSETXW", "TIOCSIG", "TIOCVHANGUP", "TIOCGPKT",
+ "TIOCGPTLCK", [_IOC_NR(TIOCGEXCL)] = "TIOCGEXCL", "TIOCGPTPEER",
+ [_IOC_NR(FIONCLEX)] = "FIONCLEX", "FIOCLEX", "FIOASYNC", "TIOCSERCONFIG",
+ "TIOCSERGWILD", "TIOCSERSWILD", "TIOCGLCKTRMIOS", "TIOCSLCKTRMIOS",
+ "TIOCSERGSTRUCT", "TIOCSERGETLSR", "TIOCSERGETMULTI", "TIOCSERSETMULTI",
+ "TIOCMIWAIT", "TIOCGICOUNT", };
+ static DEFINE_STRARRAY(ioctl_tty_cmd);
+
+ if (nr < strarray__ioctl_tty_cmd.nr_entries && strarray__ioctl_tty_cmd.entries[nr] != NULL)
+ return scnprintf(bf, size, "%s", strarray__ioctl_tty_cmd.entries[nr]);
+
+ return scnprintf(bf, size, "(%#x, %#x, %#x)", 'T', nr, dir);
+}
+
+static size_t ioctl__scnprintf_drm_cmd(int nr, int dir, char *bf, size_t size)
+{
+#include "trace/beauty/generated/ioctl/drm_ioctl_array.c"
+ static DEFINE_STRARRAY(drm_ioctl_cmds);
+
+ if (nr < strarray__drm_ioctl_cmds.nr_entries && strarray__drm_ioctl_cmds.entries[nr] != NULL)
+ return scnprintf(bf, size, "DRM_%s", strarray__drm_ioctl_cmds.entries[nr]);
+
+ return scnprintf(bf, size, "(%#x, %#x, %#x)", 'd', nr, dir);
+}
+
+static size_t ioctl__scnprintf_sndrv_pcm_cmd(int nr, int dir, char *bf, size_t size)
+{
+#include "trace/beauty/generated/ioctl/sndrv_pcm_ioctl_array.c"
+ static DEFINE_STRARRAY(sndrv_pcm_ioctl_cmds);
+
+ if (nr < strarray__sndrv_pcm_ioctl_cmds.nr_entries && strarray__sndrv_pcm_ioctl_cmds.entries[nr] != NULL)
+ return scnprintf(bf, size, "SNDRV_PCM_%s", strarray__sndrv_pcm_ioctl_cmds.entries[nr]);
+
+ return scnprintf(bf, size, "(%#x, %#x, %#x)", 'A', nr, dir);
+}
+
+static size_t ioctl__scnprintf_sndrv_ctl_cmd(int nr, int dir, char *bf, size_t size)
+{
+#include "trace/beauty/generated/ioctl/sndrv_ctl_ioctl_array.c"
+ static DEFINE_STRARRAY(sndrv_ctl_ioctl_cmds);
+
+ if (nr < strarray__sndrv_ctl_ioctl_cmds.nr_entries && strarray__sndrv_ctl_ioctl_cmds.entries[nr] != NULL)
+ return scnprintf(bf, size, "SNDRV_CTL_%s", strarray__sndrv_ctl_ioctl_cmds.entries[nr]);
+
+ return scnprintf(bf, size, "(%#x, %#x, %#x)", 'U', nr, dir);
+}
+
+static size_t ioctl__scnprintf_kvm_cmd(int nr, int dir, char *bf, size_t size)
+{
+#include "trace/beauty/generated/ioctl/kvm_ioctl_array.c"
+ static DEFINE_STRARRAY(kvm_ioctl_cmds);
+
+ if (nr < strarray__kvm_ioctl_cmds.nr_entries && strarray__kvm_ioctl_cmds.entries[nr] != NULL)
+ return scnprintf(bf, size, "KVM_%s", strarray__kvm_ioctl_cmds.entries[nr]);
+
+ return scnprintf(bf, size, "(%#x, %#x, %#x)", 0xAE, nr, dir);
+}
+
+static size_t ioctl__scnprintf_vhost_virtio_cmd(int nr, int dir, char *bf, size_t size)
+{
+#include "trace/beauty/generated/ioctl/vhost_virtio_ioctl_array.c"
+ static DEFINE_STRARRAY(vhost_virtio_ioctl_cmds);
+ static DEFINE_STRARRAY(vhost_virtio_ioctl_read_cmds);
+ struct strarray *s = (dir & _IOC_READ) ? &strarray__vhost_virtio_ioctl_read_cmds : &strarray__vhost_virtio_ioctl_cmds;
+
+ if (nr < s->nr_entries && s->entries[nr] != NULL)
+ return scnprintf(bf, size, "VHOST_%s", s->entries[nr]);
+
+ return scnprintf(bf, size, "(%#x, %#x, %#x)", 0xAF, nr, dir);
+}
+
+static size_t ioctl__scnprintf_perf_cmd(int nr, int dir, char *bf, size_t size)
+{
+#include "trace/beauty/generated/ioctl/perf_ioctl_array.c"
+ static DEFINE_STRARRAY(perf_ioctl_cmds);
+
+ if (nr < strarray__perf_ioctl_cmds.nr_entries && strarray__perf_ioctl_cmds.entries[nr] != NULL)
+ return scnprintf(bf, size, "PERF_%s", strarray__perf_ioctl_cmds.entries[nr]);
+
+ return scnprintf(bf, size, "(%#x, %#x, %#x)", 0xAE, nr, dir);
+}
+
+static size_t ioctl__scnprintf_cmd(unsigned long cmd, char *bf, size_t size)
+{
+ int dir = _IOC_DIR(cmd),
+ type = _IOC_TYPE(cmd),
+ nr = _IOC_NR(cmd),
+ sz = _IOC_SIZE(cmd);
+ int printed = 0;
+ static const struct ioctl_type {
+ int type;
+ size_t (*scnprintf)(int nr, int dir, char *bf, size_t size);
+ } ioctl_types[] = { /* Must be ordered by type */
+ { .type = '$', .scnprintf = ioctl__scnprintf_perf_cmd, },
+ ['A' - '$'] = { .type = 'A', .scnprintf = ioctl__scnprintf_sndrv_pcm_cmd, },
+ ['T' - '$'] = { .type = 'T', .scnprintf = ioctl__scnprintf_tty_cmd, },
+ ['U' - '$'] = { .type = 'U', .scnprintf = ioctl__scnprintf_sndrv_ctl_cmd, },
+ ['d' - '$'] = { .type = 'd', .scnprintf = ioctl__scnprintf_drm_cmd, },
+ [0xAE - '$'] = { .type = 0xAE, .scnprintf = ioctl__scnprintf_kvm_cmd, },
+ [0xAF - '$'] = { .type = 0xAF, .scnprintf = ioctl__scnprintf_vhost_virtio_cmd, },
+ };
+ const int nr_types = ARRAY_SIZE(ioctl_types);
+
+ if (type >= ioctl_types[0].type && type <= ioctl_types[nr_types - 1].type) {
+ const int index = type - ioctl_types[0].type;
+
+ if (ioctl_types[index].scnprintf != NULL)
+ return ioctl_types[index].scnprintf(nr, dir, bf, size);
+ }
+
+ printed += scnprintf(bf + printed, size - printed, "%c", '(');
+
+ if (dir == _IOC_NONE) {
+ printed += scnprintf(bf + printed, size - printed, "%s", "NONE");
+ } else {
+ if (dir & _IOC_READ)
+ printed += scnprintf(bf + printed, size - printed, "%s", "READ");
+ if (dir & _IOC_WRITE)
+ printed += scnprintf(bf + printed, size - printed, "%s%s", dir & _IOC_READ ? "|" : "", "WRITE");
+ }
+
+ return printed + scnprintf(bf + printed, size - printed, ", %#x, %#x, %#x)", type, nr, sz);
+}
+
+size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long cmd = arg->val;
+
+ return ioctl__scnprintf_cmd(cmd, bf, size);
+}
diff --git a/tools/perf/trace/beauty/kcmp.c b/tools/perf/trace/beauty/kcmp.c
new file mode 100644
index 000000000..f62040eb9
--- /dev/null
+++ b/tools/perf/trace/beauty/kcmp.c
@@ -0,0 +1,44 @@
+/*
+ * trace/beauty/kcmp.c
+ *
+ * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <sys/types.h>
+#include <machine.h>
+#include <uapi/linux/kcmp.h>
+
+#include "trace/beauty/generated/kcmp_type_array.c"
+
+size_t syscall_arg__scnprintf_kcmp_idx(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long fd = arg->val;
+ int type = syscall_arg__val(arg, 2);
+ pid_t pid;
+
+ if (type != KCMP_FILE)
+ return syscall_arg__scnprintf_long(bf, size, arg);
+
+ pid = syscall_arg__val(arg, arg->idx == 3 ? 0 : 1); /* idx1 -> pid1, idx2 -> pid2 */
+ return pid__scnprintf_fd(arg->trace, pid, fd, bf, size);
+}
+
+static size_t kcmp__scnprintf_type(int type, char *bf, size_t size)
+{
+ static DEFINE_STRARRAY(kcmp_types);
+ return strarray__scnprintf(&strarray__kcmp_types, bf, size, "%d", type);
+}
+
+size_t syscall_arg__scnprintf_kcmp_type(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long type = arg->val;
+
+ if (type != KCMP_FILE)
+ arg->mask |= (1 << 3) | (1 << 4); /* Ignore idx1 and idx2 */
+
+ return kcmp__scnprintf_type(type, bf, size);
+}
diff --git a/tools/perf/trace/beauty/kcmp_type.sh b/tools/perf/trace/beauty/kcmp_type.sh
new file mode 100755
index 000000000..a3c304caa
--- /dev/null
+++ b/tools/perf/trace/beauty/kcmp_type.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+
+printf "static const char *kcmp_types[] = {\n"
+regex='^[[:space:]]+(KCMP_(\w+)),'
+egrep $regex ${header_dir}/kcmp.h | grep -v KCMP_TYPES, | \
+ sed -r "s/$regex/\1 \2/g" | \
+ xargs printf "\t[%s]\t= \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/kvm_ioctl.sh b/tools/perf/trace/beauty/kvm_ioctl.sh
new file mode 100755
index 000000000..c4699fd46
--- /dev/null
+++ b/tools/perf/trace/beauty/kvm_ioctl.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+
+printf "static const char *kvm_ioctl_cmds[] = {\n"
+regex='^#[[:space:]]*define[[:space:]]+KVM_(\w+)[[:space:]]+_IO[RW]*\([[:space:]]*KVMIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
+egrep $regex ${header_dir}/kvm.h | \
+ sed -r "s/$regex/\2 \1/g" | \
+ egrep -v " ((ARM|PPC|S390)_|[GS]ET_(DEBUGREGS|PIT2|XSAVE|TSC_KHZ)|CREATE_SPAPR_TCE_64)" | \
+ sort | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh
new file mode 100755
index 000000000..431639eb4
--- /dev/null
+++ b/tools/perf/trace/beauty/madvise_behavior.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/
+
+printf "static const char *madvise_advices[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MADV_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
+egrep $regex ${header_dir}/mman-common.h | \
+ sed -r "s/$regex/\2 \1/g" | \
+ sort -n | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
new file mode 100644
index 000000000..9f68077b2
--- /dev/null
+++ b/tools/perf/trace/beauty/mmap.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/mman.h>
+
+static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, prot = arg->val;
+
+ if (prot == PROT_NONE)
+ return scnprintf(bf, size, "NONE");
+#define P_MMAP_PROT(n) \
+ if (prot & PROT_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ prot &= ~PROT_##n; \
+ }
+
+ P_MMAP_PROT(EXEC);
+ P_MMAP_PROT(READ);
+ P_MMAP_PROT(WRITE);
+ P_MMAP_PROT(SEM);
+ P_MMAP_PROT(GROWSDOWN);
+ P_MMAP_PROT(GROWSUP);
+#undef P_MMAP_PROT
+
+ if (prot)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", prot);
+
+ return printed;
+}
+
+#define SCA_MMAP_PROT syscall_arg__scnprintf_mmap_prot
+
+static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+ if (flags & MAP_ANONYMOUS)
+ arg->mask |= (1 << 4) | (1 << 5); /* Mask 4th ('fd') and 5th ('offset') args, ignored */
+
+#define P_MMAP_FLAG(n) \
+ if (flags & MAP_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~MAP_##n; \
+ }
+
+ P_MMAP_FLAG(SHARED);
+ P_MMAP_FLAG(PRIVATE);
+#ifdef MAP_32BIT
+ P_MMAP_FLAG(32BIT);
+#endif
+ P_MMAP_FLAG(ANONYMOUS);
+ P_MMAP_FLAG(DENYWRITE);
+ P_MMAP_FLAG(EXECUTABLE);
+ P_MMAP_FLAG(FILE);
+ P_MMAP_FLAG(FIXED);
+#ifdef MAP_FIXED_NOREPLACE
+ P_MMAP_FLAG(FIXED_NOREPLACE);
+#endif
+ P_MMAP_FLAG(GROWSDOWN);
+ P_MMAP_FLAG(HUGETLB);
+ P_MMAP_FLAG(LOCKED);
+ P_MMAP_FLAG(NONBLOCK);
+ P_MMAP_FLAG(NORESERVE);
+ P_MMAP_FLAG(POPULATE);
+ P_MMAP_FLAG(STACK);
+ P_MMAP_FLAG(UNINITIALIZED);
+#ifdef MAP_SYNC
+ P_MMAP_FLAG(SYNC);
+#endif
+#undef P_MMAP_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_MMAP_FLAGS syscall_arg__scnprintf_mmap_flags
+
+static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+#define P_MREMAP_FLAG(n) \
+ if (flags & MREMAP_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~MREMAP_##n; \
+ }
+
+ P_MREMAP_FLAG(MAYMOVE);
+ P_MREMAP_FLAG(FIXED);
+#undef P_MREMAP_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_MREMAP_FLAGS syscall_arg__scnprintf_mremap_flags
+
+static size_t madvise__scnprintf_behavior(int behavior, char *bf, size_t size)
+{
+#include "trace/beauty/generated/madvise_behavior_array.c"
+ static DEFINE_STRARRAY(madvise_advices);
+
+ if (behavior < strarray__madvise_advices.nr_entries && strarray__madvise_advices.entries[behavior] != NULL)
+ return scnprintf(bf, size, "MADV_%s", strarray__madvise_advices.entries[behavior]);
+
+ return scnprintf(bf, size, "%#", behavior);
+}
+
+static size_t syscall_arg__scnprintf_madvise_behavior(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ return madvise__scnprintf_behavior(arg->val, bf, size);
+}
+
+#define SCA_MADV_BHV syscall_arg__scnprintf_madvise_behavior
diff --git a/tools/perf/trace/beauty/mode_t.c b/tools/perf/trace/beauty/mode_t.c
new file mode 100644
index 000000000..d929ad7dd
--- /dev/null
+++ b/tools/perf/trace/beauty/mode_t.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+/* From include/linux/stat.h */
+#ifndef S_IRWXUGO
+#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO)
+#endif
+#ifndef S_IALLUGO
+#define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO)
+#endif
+#ifndef S_IRUGO
+#define S_IRUGO (S_IRUSR|S_IRGRP|S_IROTH)
+#endif
+#ifndef S_IWUGO
+#define S_IWUGO (S_IWUSR|S_IWGRP|S_IWOTH)
+#endif
+#ifndef S_IXUGO
+#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH)
+#endif
+
+static size_t syscall_arg__scnprintf_mode_t(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int printed = 0, mode = arg->val;
+
+#define P_MODE(n) \
+ if ((mode & S_##n) == S_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ mode &= ~S_##n; \
+ }
+
+ P_MODE(IALLUGO);
+ P_MODE(IRWXUGO);
+ P_MODE(IRUGO);
+ P_MODE(IWUGO);
+ P_MODE(IXUGO);
+ P_MODE(IFMT);
+ P_MODE(IFSOCK);
+ P_MODE(IFLNK);
+ P_MODE(IFREG);
+ P_MODE(IFBLK);
+ P_MODE(IFDIR);
+ P_MODE(IFCHR);
+ P_MODE(IFIFO);
+ P_MODE(ISUID);
+ P_MODE(ISGID);
+ P_MODE(ISVTX);
+ P_MODE(IRWXU);
+ P_MODE(IRUSR);
+ P_MODE(IWUSR);
+ P_MODE(IXUSR);
+ P_MODE(IRWXG);
+ P_MODE(IRGRP);
+ P_MODE(IWGRP);
+ P_MODE(IXGRP);
+ P_MODE(IRWXO);
+ P_MODE(IROTH);
+ P_MODE(IWOTH);
+ P_MODE(IXOTH);
+#undef P_MODE
+
+ if (mode)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", mode);
+
+ return printed;
+}
+
+#define SCA_MODE_T syscall_arg__scnprintf_mode_t
diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
new file mode 100644
index 000000000..c064d6aae
--- /dev/null
+++ b/tools/perf/trace/beauty/msg_flags.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef MSG_PROBE
+#define MSG_PROBE 0x10
+#endif
+#ifndef MSG_WAITFORONE
+#define MSG_WAITFORONE 0x10000
+#endif
+#ifndef MSG_SENDPAGE_NOTLAST
+#define MSG_SENDPAGE_NOTLAST 0x20000
+#endif
+#ifndef MSG_FASTOPEN
+#define MSG_FASTOPEN 0x20000000
+#endif
+#ifndef MSG_CMSG_CLOEXEC
+# define MSG_CMSG_CLOEXEC 0x40000000
+#endif
+
+static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+ if (flags == 0)
+ return scnprintf(bf, size, "NONE");
+#define P_MSG_FLAG(n) \
+ if (flags & MSG_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~MSG_##n; \
+ }
+
+ P_MSG_FLAG(OOB);
+ P_MSG_FLAG(PEEK);
+ P_MSG_FLAG(DONTROUTE);
+ P_MSG_FLAG(CTRUNC);
+ P_MSG_FLAG(PROBE);
+ P_MSG_FLAG(TRUNC);
+ P_MSG_FLAG(DONTWAIT);
+ P_MSG_FLAG(EOR);
+ P_MSG_FLAG(WAITALL);
+ P_MSG_FLAG(FIN);
+ P_MSG_FLAG(SYN);
+ P_MSG_FLAG(CONFIRM);
+ P_MSG_FLAG(RST);
+ P_MSG_FLAG(ERRQUEUE);
+ P_MSG_FLAG(NOSIGNAL);
+ P_MSG_FLAG(MORE);
+ P_MSG_FLAG(WAITFORONE);
+ P_MSG_FLAG(SENDPAGE_NOTLAST);
+ P_MSG_FLAG(FASTOPEN);
+ P_MSG_FLAG(CMSG_CLOEXEC);
+#undef P_MSG_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_MSG_FLAGS syscall_arg__scnprintf_msg_flags
diff --git a/tools/perf/trace/beauty/open_flags.c b/tools/perf/trace/beauty/open_flags.c
new file mode 100644
index 000000000..6aec6178a
--- /dev/null
+++ b/tools/perf/trace/beauty/open_flags.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#ifndef O_DIRECT
+#define O_DIRECT 00040000
+#endif
+
+#ifndef O_DIRECTORY
+#define O_DIRECTORY 00200000
+#endif
+
+#ifndef O_NOATIME
+#define O_NOATIME 01000000
+#endif
+
+#ifndef O_TMPFILE
+#define O_TMPFILE 020000000
+#endif
+
+#undef O_LARGEFILE
+#define O_LARGEFILE 00100000
+
+size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size)
+{
+ int printed = 0;
+
+ if (flags == 0)
+ return scnprintf(bf, size, "RDONLY");
+#define P_FLAG(n) \
+ if (flags & O_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~O_##n; \
+ }
+
+ P_FLAG(RDWR);
+ P_FLAG(APPEND);
+ P_FLAG(ASYNC);
+ P_FLAG(CLOEXEC);
+ P_FLAG(CREAT);
+ P_FLAG(DIRECT);
+ P_FLAG(DIRECTORY);
+ P_FLAG(EXCL);
+ P_FLAG(LARGEFILE);
+ P_FLAG(NOFOLLOW);
+ P_FLAG(TMPFILE);
+ P_FLAG(NOATIME);
+ P_FLAG(NOCTTY);
+#ifdef O_NONBLOCK
+ P_FLAG(NONBLOCK);
+#elif O_NDELAY
+ P_FLAG(NDELAY);
+#endif
+#ifdef O_PATH
+ P_FLAG(PATH);
+#endif
+#ifdef O_DSYNC
+ if ((flags & O_SYNC) == O_SYNC)
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", "SYNC");
+ else {
+ P_FLAG(DSYNC);
+ }
+#else
+ P_FLAG(SYNC);
+#endif
+ P_FLAG(TRUNC);
+ P_FLAG(WRONLY);
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int flags = arg->val;
+
+ if (!(flags & O_CREAT))
+ arg->mask |= 1 << (arg->idx + 1); /* Mask the mode parm */
+
+ return open__scnprintf_flags(flags, bf, size);
+}
diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c
new file mode 100644
index 000000000..2bafd7c99
--- /dev/null
+++ b/tools/perf/trace/beauty/perf_event_open.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef PERF_FLAG_FD_NO_GROUP
+# define PERF_FLAG_FD_NO_GROUP (1UL << 0)
+#endif
+
+#ifndef PERF_FLAG_FD_OUTPUT
+# define PERF_FLAG_FD_OUTPUT (1UL << 1)
+#endif
+
+#ifndef PERF_FLAG_PID_CGROUP
+# define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
+#endif
+
+#ifndef PERF_FLAG_FD_CLOEXEC
+# define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
+#endif
+
+static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+ if (flags == 0)
+ return 0;
+
+#define P_FLAG(n) \
+ if (flags & PERF_FLAG_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~PERF_FLAG_##n; \
+ }
+
+ P_FLAG(FD_NO_GROUP);
+ P_FLAG(FD_OUTPUT);
+ P_FLAG(PID_CGROUP);
+ P_FLAG(FD_CLOEXEC);
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
diff --git a/tools/perf/trace/beauty/perf_ioctl.sh b/tools/perf/trace/beauty/perf_ioctl.sh
new file mode 100755
index 000000000..6492c74df
--- /dev/null
+++ b/tools/perf/trace/beauty/perf_ioctl.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+
+printf "static const char *perf_ioctl_cmds[] = {\n"
+regex='^#[[:space:]]*define[[:space:]]+PERF_EVENT_IOC_(\w+)[[:space:]]+_IO[RW]*[[:space:]]*\([[:space:]]*.\$.[[:space:]]*,[[:space:]]*([[:digit:]]+).*'
+egrep $regex ${header_dir}/perf_event.h | \
+ sed -r "s/$regex/\2 \1/g" | \
+ sort | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/pid.c b/tools/perf/trace/beauty/pid.c
new file mode 100644
index 000000000..0313df342
--- /dev/null
+++ b/tools/perf/trace/beauty/pid.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+size_t syscall_arg__scnprintf_pid(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int pid = arg->val;
+ struct trace *trace = arg->trace;
+ size_t printed = scnprintf(bf, size, "%d", pid);
+ struct thread *thread = machine__findnew_thread(trace->host, pid, pid);
+
+ if (thread != NULL) {
+ if (!thread->comm_set)
+ thread__set_comm_from_proc(thread);
+
+ if (thread->comm_set)
+ printed += scnprintf(bf + printed, size - printed,
+ " (%s)", thread__comm_str(thread));
+ thread__put(thread);
+ }
+
+ return printed;
+}
diff --git a/tools/perf/trace/beauty/pkey_alloc.c b/tools/perf/trace/beauty/pkey_alloc.c
new file mode 100644
index 000000000..2ba784a37
--- /dev/null
+++ b/tools/perf/trace/beauty/pkey_alloc.c
@@ -0,0 +1,50 @@
+/*
+ * trace/beauty/pkey_alloc.c
+ *
+ * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <linux/log2.h>
+
+static size_t pkey_alloc__scnprintf_access_rights(int access_rights, char *bf, size_t size)
+{
+ int i, printed = 0;
+
+#include "trace/beauty/generated/pkey_alloc_access_rights_array.c"
+ static DEFINE_STRARRAY(pkey_alloc_access_rights);
+
+ if (access_rights == 0) {
+ const char *s = strarray__pkey_alloc_access_rights.entries[0];
+ if (s)
+ return scnprintf(bf, size, "%s", s);
+ return scnprintf(bf, size, "%d", 0);
+ }
+
+ for (i = 1; i < strarray__pkey_alloc_access_rights.nr_entries; ++i) {
+ int bit = 1 << (i - 1);
+
+ if (!(access_rights & bit))
+ continue;
+
+ if (printed != 0)
+ printed += scnprintf(bf + printed, size - printed, "|");
+
+ if (strarray__pkey_alloc_access_rights.entries[i] != NULL)
+ printed += scnprintf(bf + printed, size - printed, "%s", strarray__pkey_alloc_access_rights.entries[i]);
+ else
+ printed += scnprintf(bf + printed, size - printed, "0x%#", bit);
+ }
+
+ return printed;
+}
+
+size_t syscall_arg__scnprintf_pkey_alloc_access_rights(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long cmd = arg->val;
+
+ return pkey_alloc__scnprintf_access_rights(cmd, bf, size);
+}
diff --git a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
new file mode 100755
index 000000000..e0a51aeb2
--- /dev/null
+++ b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/asm-generic/
+
+printf "static const char *pkey_alloc_access_rights[] = {\n"
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+PKEY_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*'
+egrep $regex ${header_dir}/mman-common.h | \
+ sed -r "s/$regex/\2 \2 \1/g" | \
+ sort | xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/prctl.c b/tools/perf/trace/beauty/prctl.c
new file mode 100644
index 000000000..246130dad
--- /dev/null
+++ b/tools/perf/trace/beauty/prctl.c
@@ -0,0 +1,82 @@
+/*
+ * trace/beauty/prctl.c
+ *
+ * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <uapi/linux/prctl.h>
+
+#include "trace/beauty/generated/prctl_option_array.c"
+
+static size_t prctl__scnprintf_option(int option, char *bf, size_t size)
+{
+ static DEFINE_STRARRAY(prctl_options);
+ return strarray__scnprintf(&strarray__prctl_options, bf, size, "%d", option);
+}
+
+static size_t prctl__scnprintf_set_mm(int option, char *bf, size_t size)
+{
+ static DEFINE_STRARRAY(prctl_set_mm_options);
+ return strarray__scnprintf(&strarray__prctl_set_mm_options, bf, size, "%d", option);
+}
+
+size_t syscall_arg__scnprintf_prctl_arg2(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int option = syscall_arg__val(arg, 0);
+
+ if (option == PR_SET_MM)
+ return prctl__scnprintf_set_mm(arg->val, bf, size);
+ /*
+ * We still don't grab the contents of pointers on entry or exit,
+ * so just print them as hex numbers
+ */
+ if (option == PR_SET_NAME)
+ return syscall_arg__scnprintf_hex(bf, size, arg);
+
+ return syscall_arg__scnprintf_long(bf, size, arg);
+}
+
+size_t syscall_arg__scnprintf_prctl_arg3(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int option = syscall_arg__val(arg, 0);
+
+ if (option == PR_SET_MM)
+ return syscall_arg__scnprintf_hex(bf, size, arg);
+
+ return syscall_arg__scnprintf_long(bf, size, arg);
+}
+
+size_t syscall_arg__scnprintf_prctl_option(char *bf, size_t size, struct syscall_arg *arg)
+{
+ unsigned long option = arg->val;
+ enum {
+ SPO_ARG2 = (1 << 1),
+ SPO_ARG3 = (1 << 2),
+ SPO_ARG4 = (1 << 3),
+ SPO_ARG5 = (1 << 4),
+ SPO_ARG6 = (1 << 5),
+ };
+ const u8 all_but2 = SPO_ARG3 | SPO_ARG4 | SPO_ARG5 | SPO_ARG6;
+ const u8 all = SPO_ARG2 | all_but2;
+ const u8 masks[] = {
+ [PR_GET_DUMPABLE] = all,
+ [PR_SET_DUMPABLE] = all_but2,
+ [PR_SET_NAME] = all_but2,
+ [PR_GET_CHILD_SUBREAPER] = all_but2,
+ [PR_SET_CHILD_SUBREAPER] = all_but2,
+ [PR_GET_SECUREBITS] = all,
+ [PR_SET_SECUREBITS] = all_but2,
+ [PR_SET_MM] = SPO_ARG4 | SPO_ARG5 | SPO_ARG6,
+ [PR_GET_PDEATHSIG] = all,
+ [PR_SET_PDEATHSIG] = all_but2,
+ };
+
+ if (option < ARRAY_SIZE(masks))
+ arg->mask |= masks[option];
+
+ return prctl__scnprintf_option(option, bf, size);
+}
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
new file mode 100755
index 000000000..f24722146
--- /dev/null
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+
+printf "static const char *prctl_options[] = {\n"
+regex='^#define[[:space:]]+PR_([GS]ET\w+)[[:space:]]*([[:xdigit:]]+).*'
+egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \
+ sed -r "s/$regex/\2 \1/g" | \
+ sort -n | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
+
+printf "static const char *prctl_set_mm_options[] = {\n"
+regex='^#[[:space:]]+define[[:space:]]+PR_SET_MM_(\w+)[[:space:]]*([[:digit:]]+).*'
+egrep $regex ${header_dir}/prctl.h | \
+ sed -r "s/$regex/\2 \1/g" | \
+ sort -n | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/sched_policy.c b/tools/perf/trace/beauty/sched_policy.c
new file mode 100644
index 000000000..ba5096ae7
--- /dev/null
+++ b/tools/perf/trace/beauty/sched_policy.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sched.h>
+
+/*
+ * Not defined anywhere else, probably, just to make sure we
+ * catch future flags
+ */
+#define SCHED_POLICY_MASK 0xff
+
+#ifndef SCHED_DEADLINE
+#define SCHED_DEADLINE 6
+#endif
+#ifndef SCHED_RESET_ON_FORK
+#define SCHED_RESET_ON_FORK 0x40000000
+#endif
+
+static size_t syscall_arg__scnprintf_sched_policy(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ const char *policies[] = {
+ "NORMAL", "FIFO", "RR", "BATCH", "ISO", "IDLE", "DEADLINE",
+ };
+ size_t printed;
+ int policy = arg->val,
+ flags = policy & ~SCHED_POLICY_MASK;
+
+ policy &= SCHED_POLICY_MASK;
+ if (policy <= SCHED_DEADLINE)
+ printed = scnprintf(bf, size, "%s", policies[policy]);
+ else
+ printed = scnprintf(bf, size, "%#x", policy);
+
+#define P_POLICY_FLAG(n) \
+ if (flags & SCHED_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
+ flags &= ~SCHED_##n; \
+ }
+
+ P_POLICY_FLAG(RESET_ON_FORK);
+#undef P_POLICY_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
+
+ return printed;
+}
+
+#define SCA_SCHED_POLICY syscall_arg__scnprintf_sched_policy
diff --git a/tools/perf/trace/beauty/seccomp.c b/tools/perf/trace/beauty/seccomp.c
new file mode 100644
index 000000000..b7097fd5f
--- /dev/null
+++ b/tools/perf/trace/beauty/seccomp.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+static size_t syscall_arg__scnprintf_seccomp_op(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int op = arg->val;
+ size_t printed = 0;
+
+ switch (op) {
+#define P_SECCOMP_SET_MODE_OP(n) case SECCOMP_SET_MODE_##n: printed = scnprintf(bf, size, #n); break
+ P_SECCOMP_SET_MODE_OP(STRICT);
+ P_SECCOMP_SET_MODE_OP(FILTER);
+#undef P_SECCOMP_SET_MODE_OP
+ default: printed = scnprintf(bf, size, "%#x", op); break;
+ }
+
+ return printed;
+}
+
+#define SCA_SECCOMP_OP syscall_arg__scnprintf_seccomp_op
+
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC 1
+#endif
+
+static size_t syscall_arg__scnprintf_seccomp_flags(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+#define P_FLAG(n) \
+ if (flags & SECCOMP_FILTER_FLAG_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~SECCOMP_FILTER_FLAG_##n; \
+ }
+
+ P_FLAG(TSYNC);
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+#define SCA_SECCOMP_FLAGS syscall_arg__scnprintf_seccomp_flags
diff --git a/tools/perf/trace/beauty/signum.c b/tools/perf/trace/beauty/signum.c
new file mode 100644
index 000000000..bde18a53f
--- /dev/null
+++ b/tools/perf/trace/beauty/signum.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <signal.h>
+
+static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int sig = arg->val;
+
+ switch (sig) {
+#define P_SIGNUM(n) case SIG##n: return scnprintf(bf, size, #n)
+ P_SIGNUM(HUP);
+ P_SIGNUM(INT);
+ P_SIGNUM(QUIT);
+ P_SIGNUM(ILL);
+ P_SIGNUM(TRAP);
+ P_SIGNUM(ABRT);
+ P_SIGNUM(BUS);
+ P_SIGNUM(FPE);
+ P_SIGNUM(KILL);
+ P_SIGNUM(USR1);
+ P_SIGNUM(SEGV);
+ P_SIGNUM(USR2);
+ P_SIGNUM(PIPE);
+ P_SIGNUM(ALRM);
+ P_SIGNUM(TERM);
+ P_SIGNUM(CHLD);
+ P_SIGNUM(CONT);
+ P_SIGNUM(STOP);
+ P_SIGNUM(TSTP);
+ P_SIGNUM(TTIN);
+ P_SIGNUM(TTOU);
+ P_SIGNUM(URG);
+ P_SIGNUM(XCPU);
+ P_SIGNUM(XFSZ);
+ P_SIGNUM(VTALRM);
+ P_SIGNUM(PROF);
+ P_SIGNUM(WINCH);
+ P_SIGNUM(IO);
+ P_SIGNUM(PWR);
+ P_SIGNUM(SYS);
+#ifdef SIGEMT
+ P_SIGNUM(EMT);
+#endif
+#ifdef SIGSTKFLT
+ P_SIGNUM(STKFLT);
+#endif
+#ifdef SIGSWI
+ P_SIGNUM(SWI);
+#endif
+ default: break;
+ }
+
+ return scnprintf(bf, size, "%#x", sig);
+}
+
+#define SCA_SIGNUM syscall_arg__scnprintf_signum
diff --git a/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
new file mode 100755
index 000000000..eb511bb5f
--- /dev/null
+++ b/tools/perf/trace/beauty/sndrv_ctl_ioctl.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
+
+printf "static const char *sndrv_ctl_ioctl_cmds[] = {\n"
+grep "^#define[\t ]\+SNDRV_CTL_IOCTL_" $header_dir/asound.h | \
+ sed -r 's/^#define +SNDRV_CTL_IOCTL_([A-Z0-9_]+)[\t ]+_IO[RW]*\( *.U., *(0x[[:xdigit:]]+),?.*/\t[\2] = \"\1\",/g'
+printf "};\n"
diff --git a/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
new file mode 100755
index 000000000..681839296
--- /dev/null
+++ b/tools/perf/trace/beauty/sndrv_pcm_ioctl.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/sound/
+
+printf "static const char *sndrv_pcm_ioctl_cmds[] = {\n"
+grep "^#define[\t ]\+SNDRV_PCM_IOCTL_" $header_dir/asound.h | \
+ sed -r 's/^#define +SNDRV_PCM_IOCTL_([A-Z0-9_]+)[\t ]+_IO[RW]*\( *.A., *(0x[[:xdigit:]]+),?.*/\t[\2] = \"\1\",/g'
+printf "};\n"
diff --git a/tools/perf/trace/beauty/socket.c b/tools/perf/trace/beauty/socket.c
new file mode 100644
index 000000000..652272693
--- /dev/null
+++ b/tools/perf/trace/beauty/socket.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * trace/beauty/socket.c
+ *
+ * Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "trace/beauty/beauty.h"
+#include <sys/types.h>
+#include <sys/socket.h>
+
+static size_t socket__scnprintf_ipproto(int protocol, char *bf, size_t size)
+{
+#include "trace/beauty/generated/socket_ipproto_array.c"
+ static DEFINE_STRARRAY(socket_ipproto);
+
+ return strarray__scnprintf(&strarray__socket_ipproto, bf, size, "%d", protocol);
+}
+
+size_t syscall_arg__scnprintf_socket_protocol(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int domain = syscall_arg__val(arg, 0);
+
+ if (domain == AF_INET || domain == AF_INET6)
+ return socket__scnprintf_ipproto(arg->val, bf, size);
+
+ return syscall_arg__scnprintf_int(bf, size, arg);
+}
diff --git a/tools/perf/trace/beauty/socket_ipproto.sh b/tools/perf/trace/beauty/socket_ipproto.sh
new file mode 100755
index 000000000..a3cc24633
--- /dev/null
+++ b/tools/perf/trace/beauty/socket_ipproto.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+
+printf "static const char *socket_ipproto[] = {\n"
+regex='^[[:space:]]+IPPROTO_(\w+)[[:space:]]+=[[:space:]]+([[:digit:]]+),.*'
+
+egrep $regex ${header_dir}/in.h | \
+ sed -r "s/$regex/\2 \1/g" | \
+ sort | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/socket_type.c b/tools/perf/trace/beauty/socket_type.c
new file mode 100644
index 000000000..bca26aef4
--- /dev/null
+++ b/tools/perf/trace/beauty/socket_type.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef SOCK_DCCP
+# define SOCK_DCCP 6
+#endif
+
+#ifndef SOCK_CLOEXEC
+# define SOCK_CLOEXEC 02000000
+#endif
+
+#ifndef SOCK_NONBLOCK
+# define SOCK_NONBLOCK 00004000
+#endif
+
+#ifndef SOCK_TYPE_MASK
+#define SOCK_TYPE_MASK 0xf
+#endif
+
+static size_t syscall_arg__scnprintf_socket_type(char *bf, size_t size, struct syscall_arg *arg)
+{
+ size_t printed;
+ int type = arg->val,
+ flags = type & ~SOCK_TYPE_MASK;
+
+ type &= SOCK_TYPE_MASK;
+ /*
+ * Can't use a strarray, MIPS may override for ABI reasons.
+ */
+ switch (type) {
+#define P_SK_TYPE(n) case SOCK_##n: printed = scnprintf(bf, size, #n); break;
+ P_SK_TYPE(STREAM);
+ P_SK_TYPE(DGRAM);
+ P_SK_TYPE(RAW);
+ P_SK_TYPE(RDM);
+ P_SK_TYPE(SEQPACKET);
+ P_SK_TYPE(DCCP);
+ P_SK_TYPE(PACKET);
+#undef P_SK_TYPE
+ default:
+ printed = scnprintf(bf, size, "%#x", type);
+ }
+
+#define P_SK_FLAG(n) \
+ if (flags & SOCK_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "|%s", #n); \
+ flags &= ~SOCK_##n; \
+ }
+
+ P_SK_FLAG(CLOEXEC);
+ P_SK_FLAG(NONBLOCK);
+#undef P_SK_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "|%#x", flags);
+
+ return printed;
+}
+
+#define SCA_SK_TYPE syscall_arg__scnprintf_socket_type
diff --git a/tools/perf/trace/beauty/statx.c b/tools/perf/trace/beauty/statx.c
new file mode 100644
index 000000000..5643b692a
--- /dev/null
+++ b/tools/perf/trace/beauty/statx.c
@@ -0,0 +1,72 @@
+/*
+ * trace/beauty/statx.c
+ *
+ * Copyright (C) 2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "trace/beauty/beauty.h"
+#include <linux/kernel.h>
+#include <sys/types.h>
+#include <uapi/linux/fcntl.h>
+#include <uapi/linux/stat.h>
+
+size_t syscall_arg__scnprintf_statx_flags(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+ if (flags == 0)
+ return scnprintf(bf, size, "SYNC_AS_STAT");
+#define P_FLAG(n) \
+ if (flags & AT_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~AT_##n; \
+ }
+
+ P_FLAG(SYMLINK_NOFOLLOW);
+ P_FLAG(REMOVEDIR);
+ P_FLAG(SYMLINK_FOLLOW);
+ P_FLAG(NO_AUTOMOUNT);
+ P_FLAG(EMPTY_PATH);
+ P_FLAG(STATX_FORCE_SYNC);
+ P_FLAG(STATX_DONT_SYNC);
+
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
+
+size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_arg *arg)
+{
+ int printed = 0, flags = arg->val;
+
+#define P_FLAG(n) \
+ if (flags & STATX_##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ flags &= ~STATX_##n; \
+ }
+
+ P_FLAG(TYPE);
+ P_FLAG(MODE);
+ P_FLAG(NLINK);
+ P_FLAG(UID);
+ P_FLAG(GID);
+ P_FLAG(ATIME);
+ P_FLAG(MTIME);
+ P_FLAG(CTIME);
+ P_FLAG(INO);
+ P_FLAG(SIZE);
+ P_FLAG(BLOCKS);
+ P_FLAG(BTIME);
+
+#undef P_FLAG
+
+ if (flags)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+ return printed;
+}
diff --git a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
new file mode 100755
index 000000000..0f6a5197d
--- /dev/null
+++ b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/include/uapi/linux/
+
+printf "static const char *vhost_virtio_ioctl_cmds[] = {\n"
+regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
+egrep $regex ${header_dir}/vhost.h | \
+ sed -r "s/$regex/\2 \1/g" | \
+ sort | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
+
+printf "static const char *vhost_virtio_ioctl_read_cmds[] = {\n"
+regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?R\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
+egrep $regex ${header_dir}/vhost.h | \
+ sed -r "s/$regex/\2 \1/g" | \
+ sort | xargs printf "\t[%s] = \"%s\",\n"
+printf "};\n"
diff --git a/tools/perf/trace/beauty/waitid_options.c b/tools/perf/trace/beauty/waitid_options.c
new file mode 100644
index 000000000..8465281a0
--- /dev/null
+++ b/tools/perf/trace/beauty/waitid_options.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <sys/wait.h>
+
+static size_t syscall_arg__scnprintf_waitid_options(char *bf, size_t size,
+ struct syscall_arg *arg)
+{
+ int printed = 0, options = arg->val;
+
+#define P_OPTION(n) \
+ if (options & W##n) { \
+ printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+ options &= ~W##n; \
+ }
+
+ P_OPTION(NOHANG);
+ P_OPTION(UNTRACED);
+ P_OPTION(CONTINUED);
+#undef P_OPTION
+
+ if (options)
+ printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", options);
+
+ return printed;
+}
+
+#define SCA_WAITID_OPTIONS syscall_arg__scnprintf_waitid_options
diff --git a/tools/perf/trace/strace/groups/file b/tools/perf/trace/strace/groups/file
new file mode 100644
index 000000000..722e25d20
--- /dev/null
+++ b/tools/perf/trace/strace/groups/file
@@ -0,0 +1,20 @@
+access
+chmod
+creat
+execve
+faccessat
+getcwd
+lstat
+mkdir
+open
+openat
+quotactl
+read
+readlink
+rename
+rmdir
+stat
+statfs
+symlink
+unlink
+write
diff --git a/tools/perf/ui/Build b/tools/perf/ui/Build
new file mode 100644
index 000000000..0a73538c0
--- /dev/null
+++ b/tools/perf/ui/Build
@@ -0,0 +1,14 @@
+libperf-y += setup.o
+libperf-y += helpline.o
+libperf-y += progress.o
+libperf-y += util.o
+libperf-y += hist.o
+libperf-y += stdio/hist.o
+
+CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))"
+
+libperf-$(CONFIG_SLANG) += browser.o
+libperf-$(CONFIG_SLANG) += browsers/
+libperf-$(CONFIG_SLANG) += tui/
+
+CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST
diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c
new file mode 100644
index 000000000..4f7556142
--- /dev/null
+++ b/tools/perf/ui/browser.c
@@ -0,0 +1,793 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../util.h"
+#include "../string2.h"
+#include "../config.h"
+#include "../../perf.h"
+#include "libslang.h"
+#include "ui.h"
+#include "util.h"
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/string.h>
+#include <stdlib.h>
+#include <sys/ttydefaults.h>
+#include "browser.h"
+#include "helpline.h"
+#include "keysyms.h"
+#include "../color.h"
+#include "sane_ctype.h"
+
+static int ui_browser__percent_color(struct ui_browser *browser,
+ double percent, bool current)
+{
+ if (current && (!browser->use_navkeypressed || browser->navkeypressed))
+ return HE_COLORSET_SELECTED;
+ if (percent >= MIN_RED)
+ return HE_COLORSET_TOP;
+ if (percent >= MIN_GREEN)
+ return HE_COLORSET_MEDIUM;
+ return HE_COLORSET_NORMAL;
+}
+
+int ui_browser__set_color(struct ui_browser *browser, int color)
+{
+ int ret = browser->current_color;
+ browser->current_color = color;
+ SLsmg_set_color(color);
+ return ret;
+}
+
+void ui_browser__set_percent_color(struct ui_browser *browser,
+ double percent, bool current)
+{
+ int color = ui_browser__percent_color(browser, percent, current);
+ ui_browser__set_color(browser, color);
+}
+
+void ui_browser__gotorc_title(struct ui_browser *browser, int y, int x)
+{
+ SLsmg_gotorc(browser->y + y, browser->x + x);
+}
+
+void ui_browser__gotorc(struct ui_browser *browser, int y, int x)
+{
+ SLsmg_gotorc(browser->y + y + browser->extra_title_lines, browser->x + x);
+}
+
+void ui_browser__write_nstring(struct ui_browser *browser __maybe_unused, const char *msg,
+ unsigned int width)
+{
+ slsmg_write_nstring(msg, width);
+}
+
+void ui_browser__vprintf(struct ui_browser *browser __maybe_unused, const char *fmt, va_list args)
+{
+ slsmg_vprintf(fmt, args);
+}
+
+void ui_browser__printf(struct ui_browser *browser __maybe_unused, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ ui_browser__vprintf(browser, fmt, args);
+ va_end(args);
+}
+
+static struct list_head *
+ui_browser__list_head_filter_entries(struct ui_browser *browser,
+ struct list_head *pos)
+{
+ do {
+ if (!browser->filter || !browser->filter(browser, pos))
+ return pos;
+ pos = pos->next;
+ } while (pos != browser->entries);
+
+ return NULL;
+}
+
+static struct list_head *
+ui_browser__list_head_filter_prev_entries(struct ui_browser *browser,
+ struct list_head *pos)
+{
+ do {
+ if (!browser->filter || !browser->filter(browser, pos))
+ return pos;
+ pos = pos->prev;
+ } while (pos != browser->entries);
+
+ return NULL;
+}
+
+void ui_browser__list_head_seek(struct ui_browser *browser, off_t offset, int whence)
+{
+ struct list_head *head = browser->entries;
+ struct list_head *pos;
+
+ if (browser->nr_entries == 0)
+ return;
+
+ switch (whence) {
+ case SEEK_SET:
+ pos = ui_browser__list_head_filter_entries(browser, head->next);
+ break;
+ case SEEK_CUR:
+ pos = browser->top;
+ break;
+ case SEEK_END:
+ pos = ui_browser__list_head_filter_prev_entries(browser, head->prev);
+ break;
+ default:
+ return;
+ }
+
+ assert(pos != NULL);
+
+ if (offset > 0) {
+ while (offset-- != 0)
+ pos = ui_browser__list_head_filter_entries(browser, pos->next);
+ } else {
+ while (offset++ != 0)
+ pos = ui_browser__list_head_filter_prev_entries(browser, pos->prev);
+ }
+
+ browser->top = pos;
+}
+
+void ui_browser__rb_tree_seek(struct ui_browser *browser, off_t offset, int whence)
+{
+ struct rb_root *root = browser->entries;
+ struct rb_node *nd;
+
+ switch (whence) {
+ case SEEK_SET:
+ nd = rb_first(root);
+ break;
+ case SEEK_CUR:
+ nd = browser->top;
+ break;
+ case SEEK_END:
+ nd = rb_last(root);
+ break;
+ default:
+ return;
+ }
+
+ if (offset > 0) {
+ while (offset-- != 0)
+ nd = rb_next(nd);
+ } else {
+ while (offset++ != 0)
+ nd = rb_prev(nd);
+ }
+
+ browser->top = nd;
+}
+
+unsigned int ui_browser__rb_tree_refresh(struct ui_browser *browser)
+{
+ struct rb_node *nd;
+ int row = 0;
+
+ if (browser->top == NULL)
+ browser->top = rb_first(browser->entries);
+
+ nd = browser->top;
+
+ while (nd != NULL) {
+ ui_browser__gotorc(browser, row, 0);
+ browser->write(browser, nd, row);
+ if (++row == browser->rows)
+ break;
+ nd = rb_next(nd);
+ }
+
+ return row;
+}
+
+bool ui_browser__is_current_entry(struct ui_browser *browser, unsigned row)
+{
+ return browser->top_idx + row == browser->index;
+}
+
+void ui_browser__refresh_dimensions(struct ui_browser *browser)
+{
+ browser->width = SLtt_Screen_Cols - 1;
+ browser->height = browser->rows = SLtt_Screen_Rows - 2;
+ browser->rows -= browser->extra_title_lines;
+ browser->y = 1;
+ browser->x = 0;
+}
+
+void ui_browser__handle_resize(struct ui_browser *browser)
+{
+ ui__refresh_dimensions(false);
+ ui_browser__show(browser, browser->title, ui_helpline__current);
+ ui_browser__refresh(browser);
+}
+
+int ui_browser__warning(struct ui_browser *browser, int timeout,
+ const char *format, ...)
+{
+ va_list args;
+ char *text;
+ int key = 0, err;
+
+ va_start(args, format);
+ err = vasprintf(&text, format, args);
+ va_end(args);
+
+ if (err < 0) {
+ va_start(args, format);
+ ui_helpline__vpush(format, args);
+ va_end(args);
+ } else {
+ while ((key = ui__question_window("Warning!", text,
+ "Press any key...",
+ timeout)) == K_RESIZE)
+ ui_browser__handle_resize(browser);
+ free(text);
+ }
+
+ return key;
+}
+
+int ui_browser__help_window(struct ui_browser *browser, const char *text)
+{
+ int key;
+
+ while ((key = ui__help_window(text)) == K_RESIZE)
+ ui_browser__handle_resize(browser);
+
+ return key;
+}
+
+bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text)
+{
+ int key;
+
+ while ((key = ui__dialog_yesno(text)) == K_RESIZE)
+ ui_browser__handle_resize(browser);
+
+ return key == K_ENTER || toupper(key) == 'Y';
+}
+
+void ui_browser__reset_index(struct ui_browser *browser)
+{
+ browser->index = browser->top_idx = 0;
+ browser->seek(browser, 0, SEEK_SET);
+}
+
+void __ui_browser__show_title(struct ui_browser *browser, const char *title)
+{
+ SLsmg_gotorc(0, 0);
+ ui_browser__set_color(browser, HE_COLORSET_ROOT);
+ ui_browser__write_nstring(browser, title, browser->width + 1);
+}
+
+void ui_browser__show_title(struct ui_browser *browser, const char *title)
+{
+ pthread_mutex_lock(&ui__lock);
+ __ui_browser__show_title(browser, title);
+ pthread_mutex_unlock(&ui__lock);
+}
+
+int ui_browser__show(struct ui_browser *browser, const char *title,
+ const char *helpline, ...)
+{
+ int err;
+ va_list ap;
+
+ if (browser->refresh_dimensions == NULL)
+ browser->refresh_dimensions = ui_browser__refresh_dimensions;
+
+ browser->refresh_dimensions(browser);
+
+ pthread_mutex_lock(&ui__lock);
+ __ui_browser__show_title(browser, title);
+
+ browser->title = title;
+ zfree(&browser->helpline);
+
+ va_start(ap, helpline);
+ err = vasprintf(&browser->helpline, helpline, ap);
+ va_end(ap);
+ if (err > 0)
+ ui_helpline__push(browser->helpline);
+ pthread_mutex_unlock(&ui__lock);
+ return err ? 0 : -1;
+}
+
+void ui_browser__hide(struct ui_browser *browser)
+{
+ pthread_mutex_lock(&ui__lock);
+ ui_helpline__pop();
+ zfree(&browser->helpline);
+ pthread_mutex_unlock(&ui__lock);
+}
+
+static void ui_browser__scrollbar_set(struct ui_browser *browser)
+{
+ int height = browser->height, h = 0, pct = 0,
+ col = browser->width,
+ row = 0;
+
+ if (browser->nr_entries > 1) {
+ pct = ((browser->index * (browser->height - 1)) /
+ (browser->nr_entries - 1));
+ }
+
+ SLsmg_set_char_set(1);
+
+ while (h < height) {
+ ui_browser__gotorc(browser, row++, col);
+ SLsmg_write_char(h == pct ? SLSMG_DIAMOND_CHAR : SLSMG_CKBRD_CHAR);
+ ++h;
+ }
+
+ SLsmg_set_char_set(0);
+}
+
+static int __ui_browser__refresh(struct ui_browser *browser)
+{
+ int row;
+ int width = browser->width;
+
+ row = browser->refresh(browser);
+ ui_browser__set_color(browser, HE_COLORSET_NORMAL);
+
+ if (!browser->use_navkeypressed || browser->navkeypressed)
+ ui_browser__scrollbar_set(browser);
+ else
+ width += 1;
+
+ SLsmg_fill_region(browser->y + row + browser->extra_title_lines, browser->x,
+ browser->rows - row, width, ' ');
+
+ return 0;
+}
+
+int ui_browser__refresh(struct ui_browser *browser)
+{
+ pthread_mutex_lock(&ui__lock);
+ __ui_browser__refresh(browser);
+ pthread_mutex_unlock(&ui__lock);
+
+ return 0;
+}
+
+/*
+ * Here we're updating nr_entries _after_ we started browsing, i.e. we have to
+ * forget about any reference to any entry in the underlying data structure,
+ * that is why we do a SEEK_SET. Think about 'perf top' in the hists browser
+ * after an output_resort and hist decay.
+ */
+void ui_browser__update_nr_entries(struct ui_browser *browser, u32 nr_entries)
+{
+ off_t offset = nr_entries - browser->nr_entries;
+
+ browser->nr_entries = nr_entries;
+
+ if (offset < 0) {
+ if (browser->top_idx < (u64)-offset)
+ offset = -browser->top_idx;
+
+ browser->index += offset;
+ browser->top_idx += offset;
+ }
+
+ browser->top = NULL;
+ browser->seek(browser, browser->top_idx, SEEK_SET);
+}
+
+int ui_browser__run(struct ui_browser *browser, int delay_secs)
+{
+ int err, key;
+
+ while (1) {
+ off_t offset;
+
+ pthread_mutex_lock(&ui__lock);
+ err = __ui_browser__refresh(browser);
+ SLsmg_refresh();
+ pthread_mutex_unlock(&ui__lock);
+ if (err < 0)
+ break;
+
+ key = ui__getch(delay_secs);
+
+ if (key == K_RESIZE) {
+ ui__refresh_dimensions(false);
+ browser->refresh_dimensions(browser);
+ __ui_browser__show_title(browser, browser->title);
+ ui_helpline__puts(browser->helpline);
+ continue;
+ }
+
+ if (browser->use_navkeypressed && !browser->navkeypressed) {
+ if (key == K_DOWN || key == K_UP ||
+ (browser->columns && (key == K_LEFT || key == K_RIGHT)) ||
+ key == K_PGDN || key == K_PGUP ||
+ key == K_HOME || key == K_END ||
+ key == ' ') {
+ browser->navkeypressed = true;
+ continue;
+ } else
+ return key;
+ }
+
+ switch (key) {
+ case K_DOWN:
+ if (browser->index == browser->nr_entries - 1)
+ break;
+ ++browser->index;
+ if (browser->index == browser->top_idx + browser->rows) {
+ ++browser->top_idx;
+ browser->seek(browser, +1, SEEK_CUR);
+ }
+ break;
+ case K_UP:
+ if (browser->index == 0)
+ break;
+ --browser->index;
+ if (browser->index < browser->top_idx) {
+ --browser->top_idx;
+ browser->seek(browser, -1, SEEK_CUR);
+ }
+ break;
+ case K_RIGHT:
+ if (!browser->columns)
+ goto out;
+ if (browser->horiz_scroll < browser->columns - 1)
+ ++browser->horiz_scroll;
+ break;
+ case K_LEFT:
+ if (!browser->columns)
+ goto out;
+ if (browser->horiz_scroll != 0)
+ --browser->horiz_scroll;
+ break;
+ case K_PGDN:
+ case ' ':
+ if (browser->top_idx + browser->rows > browser->nr_entries - 1)
+ break;
+
+ offset = browser->rows;
+ if (browser->index + offset > browser->nr_entries - 1)
+ offset = browser->nr_entries - 1 - browser->index;
+ browser->index += offset;
+ browser->top_idx += offset;
+ browser->seek(browser, +offset, SEEK_CUR);
+ break;
+ case K_PGUP:
+ if (browser->top_idx == 0)
+ break;
+
+ if (browser->top_idx < browser->rows)
+ offset = browser->top_idx;
+ else
+ offset = browser->rows;
+
+ browser->index -= offset;
+ browser->top_idx -= offset;
+ browser->seek(browser, -offset, SEEK_CUR);
+ break;
+ case K_HOME:
+ ui_browser__reset_index(browser);
+ break;
+ case K_END:
+ offset = browser->rows - 1;
+ if (offset >= browser->nr_entries)
+ offset = browser->nr_entries - 1;
+
+ browser->index = browser->nr_entries - 1;
+ browser->top_idx = browser->index - offset;
+ browser->seek(browser, -offset, SEEK_END);
+ break;
+ default:
+ out:
+ return key;
+ }
+ }
+ return -1;
+}
+
+unsigned int ui_browser__list_head_refresh(struct ui_browser *browser)
+{
+ struct list_head *pos;
+ struct list_head *head = browser->entries;
+ int row = 0;
+
+ if (browser->top == NULL || browser->top == browser->entries)
+ browser->top = ui_browser__list_head_filter_entries(browser, head->next);
+
+ pos = browser->top;
+
+ list_for_each_from(pos, head) {
+ if (!browser->filter || !browser->filter(browser, pos)) {
+ ui_browser__gotorc(browser, row, 0);
+ browser->write(browser, pos, row);
+ if (++row == browser->rows)
+ break;
+ }
+ }
+
+ return row;
+}
+
+static struct ui_browser_colorset {
+ const char *name, *fg, *bg;
+ int colorset;
+} ui_browser__colorsets[] = {
+ {
+ .colorset = HE_COLORSET_TOP,
+ .name = "top",
+ .fg = "red",
+ .bg = "default",
+ },
+ {
+ .colorset = HE_COLORSET_MEDIUM,
+ .name = "medium",
+ .fg = "green",
+ .bg = "default",
+ },
+ {
+ .colorset = HE_COLORSET_NORMAL,
+ .name = "normal",
+ .fg = "default",
+ .bg = "default",
+ },
+ {
+ .colorset = HE_COLORSET_SELECTED,
+ .name = "selected",
+ .fg = "black",
+ .bg = "yellow",
+ },
+ {
+ .colorset = HE_COLORSET_JUMP_ARROWS,
+ .name = "jump_arrows",
+ .fg = "blue",
+ .bg = "default",
+ },
+ {
+ .colorset = HE_COLORSET_ADDR,
+ .name = "addr",
+ .fg = "magenta",
+ .bg = "default",
+ },
+ {
+ .colorset = HE_COLORSET_ROOT,
+ .name = "root",
+ .fg = "white",
+ .bg = "blue",
+ },
+ {
+ .name = NULL,
+ }
+};
+
+
+static int ui_browser__color_config(const char *var, const char *value,
+ void *data __maybe_unused)
+{
+ char *fg = NULL, *bg;
+ int i;
+
+ /* same dir for all commands */
+ if (!strstarts(var, "colors.") != 0)
+ return 0;
+
+ for (i = 0; ui_browser__colorsets[i].name != NULL; ++i) {
+ const char *name = var + 7;
+
+ if (strcmp(ui_browser__colorsets[i].name, name) != 0)
+ continue;
+
+ fg = strdup(value);
+ if (fg == NULL)
+ break;
+
+ bg = strchr(fg, ',');
+ if (bg == NULL)
+ break;
+
+ *bg = '\0';
+ bg = ltrim(++bg);
+ ui_browser__colorsets[i].bg = bg;
+ ui_browser__colorsets[i].fg = fg;
+ return 0;
+ }
+
+ free(fg);
+ return -1;
+}
+
+void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence)
+{
+ switch (whence) {
+ case SEEK_SET:
+ browser->top = browser->entries;
+ break;
+ case SEEK_CUR:
+ browser->top = browser->top + browser->top_idx + offset;
+ break;
+ case SEEK_END:
+ browser->top = browser->top + browser->nr_entries - 1 + offset;
+ break;
+ default:
+ return;
+ }
+}
+
+unsigned int ui_browser__argv_refresh(struct ui_browser *browser)
+{
+ unsigned int row = 0, idx = browser->top_idx;
+ char **pos;
+
+ if (browser->top == NULL)
+ browser->top = browser->entries;
+
+ pos = (char **)browser->top;
+ while (idx < browser->nr_entries) {
+ if (!browser->filter || !browser->filter(browser, *pos)) {
+ ui_browser__gotorc(browser, row, 0);
+ browser->write(browser, pos, row);
+ if (++row == browser->rows)
+ break;
+ }
+
+ ++idx;
+ ++pos;
+ }
+
+ return row;
+}
+
+void __ui_browser__vline(struct ui_browser *browser, unsigned int column,
+ u16 start, u16 end)
+{
+ SLsmg_set_char_set(1);
+ ui_browser__gotorc(browser, start, column);
+ SLsmg_draw_vline(end - start + 1);
+ SLsmg_set_char_set(0);
+}
+
+void ui_browser__write_graph(struct ui_browser *browser __maybe_unused,
+ int graph)
+{
+ SLsmg_set_char_set(1);
+ SLsmg_write_char(graph);
+ SLsmg_set_char_set(0);
+}
+
+static void __ui_browser__line_arrow_up(struct ui_browser *browser,
+ unsigned int column,
+ u64 start, u64 end)
+{
+ unsigned int row, end_row;
+
+ SLsmg_set_char_set(1);
+
+ if (start < browser->top_idx + browser->rows) {
+ row = start - browser->top_idx;
+ ui_browser__gotorc(browser, row, column);
+ SLsmg_write_char(SLSMG_LLCORN_CHAR);
+ ui_browser__gotorc(browser, row, column + 1);
+ SLsmg_draw_hline(2);
+
+ if (row-- == 0)
+ goto out;
+ } else
+ row = browser->rows - 1;
+
+ if (end > browser->top_idx)
+ end_row = end - browser->top_idx;
+ else
+ end_row = 0;
+
+ ui_browser__gotorc(browser, end_row, column);
+ SLsmg_draw_vline(row - end_row + 1);
+
+ ui_browser__gotorc(browser, end_row, column);
+ if (end >= browser->top_idx) {
+ SLsmg_write_char(SLSMG_ULCORN_CHAR);
+ ui_browser__gotorc(browser, end_row, column + 1);
+ SLsmg_write_char(SLSMG_HLINE_CHAR);
+ ui_browser__gotorc(browser, end_row, column + 2);
+ SLsmg_write_char(SLSMG_RARROW_CHAR);
+ }
+out:
+ SLsmg_set_char_set(0);
+}
+
+static void __ui_browser__line_arrow_down(struct ui_browser *browser,
+ unsigned int column,
+ u64 start, u64 end)
+{
+ unsigned int row, end_row;
+
+ SLsmg_set_char_set(1);
+
+ if (start >= browser->top_idx) {
+ row = start - browser->top_idx;
+ ui_browser__gotorc(browser, row, column);
+ SLsmg_write_char(SLSMG_ULCORN_CHAR);
+ ui_browser__gotorc(browser, row, column + 1);
+ SLsmg_draw_hline(2);
+
+ if (++row == 0)
+ goto out;
+ } else
+ row = 0;
+
+ if (end >= browser->top_idx + browser->rows)
+ end_row = browser->rows - 1;
+ else
+ end_row = end - browser->top_idx;
+
+ ui_browser__gotorc(browser, row, column);
+ SLsmg_draw_vline(end_row - row + 1);
+
+ ui_browser__gotorc(browser, end_row, column);
+ if (end < browser->top_idx + browser->rows) {
+ SLsmg_write_char(SLSMG_LLCORN_CHAR);
+ ui_browser__gotorc(browser, end_row, column + 1);
+ SLsmg_write_char(SLSMG_HLINE_CHAR);
+ ui_browser__gotorc(browser, end_row, column + 2);
+ SLsmg_write_char(SLSMG_RARROW_CHAR);
+ }
+out:
+ SLsmg_set_char_set(0);
+}
+
+void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
+ u64 start, u64 end)
+{
+ if (start > end)
+ __ui_browser__line_arrow_up(browser, column, start, end);
+ else
+ __ui_browser__line_arrow_down(browser, column, start, end);
+}
+
+void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
+ unsigned int row, bool arrow_down)
+{
+ unsigned int end_row;
+
+ if (row >= browser->top_idx)
+ end_row = row - browser->top_idx;
+ else
+ return;
+
+ SLsmg_set_char_set(1);
+
+ if (arrow_down) {
+ ui_browser__gotorc(browser, end_row, column - 1);
+ SLsmg_write_char(SLSMG_ULCORN_CHAR);
+ ui_browser__gotorc(browser, end_row, column);
+ SLsmg_draw_hline(2);
+ ui_browser__gotorc(browser, end_row + 1, column - 1);
+ SLsmg_write_char(SLSMG_LTEE_CHAR);
+ } else {
+ ui_browser__gotorc(browser, end_row, column - 1);
+ SLsmg_write_char(SLSMG_LTEE_CHAR);
+ ui_browser__gotorc(browser, end_row, column);
+ SLsmg_draw_hline(2);
+ }
+
+ SLsmg_set_char_set(0);
+}
+
+void ui_browser__init(void)
+{
+ int i = 0;
+
+ perf_config(ui_browser__color_config, NULL);
+
+ while (ui_browser__colorsets[i].name) {
+ struct ui_browser_colorset *c = &ui_browser__colorsets[i++];
+ sltt_set_color(c->colorset, c->name, c->fg, c->bg);
+ }
+}
diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h
new file mode 100644
index 000000000..aa5932e1d
--- /dev/null
+++ b/tools/perf/ui/browser.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_BROWSER_H_
+#define _PERF_UI_BROWSER_H_ 1
+
+#include <linux/types.h>
+#include <stdarg.h>
+
+#define HE_COLORSET_TOP 50
+#define HE_COLORSET_MEDIUM 51
+#define HE_COLORSET_NORMAL 52
+#define HE_COLORSET_SELECTED 53
+#define HE_COLORSET_JUMP_ARROWS 54
+#define HE_COLORSET_ADDR 55
+#define HE_COLORSET_ROOT 56
+
+struct ui_browser {
+ u64 index, top_idx;
+ void *top, *entries;
+ u16 y, x, width, height, rows, columns, horiz_scroll;
+ u8 extra_title_lines;
+ int current_color;
+ void *priv;
+ const char *title;
+ char *helpline;
+ void (*refresh_dimensions)(struct ui_browser *browser);
+ unsigned int (*refresh)(struct ui_browser *browser);
+ void (*write)(struct ui_browser *browser, void *entry, int row);
+ void (*seek)(struct ui_browser *browser, off_t offset, int whence);
+ bool (*filter)(struct ui_browser *browser, void *entry);
+ u32 nr_entries;
+ bool navkeypressed;
+ bool use_navkeypressed;
+};
+
+int ui_browser__set_color(struct ui_browser *browser, int color);
+void ui_browser__set_percent_color(struct ui_browser *browser,
+ double percent, bool current);
+bool ui_browser__is_current_entry(struct ui_browser *browser, unsigned row);
+void ui_browser__refresh_dimensions(struct ui_browser *browser);
+void ui_browser__reset_index(struct ui_browser *browser);
+
+void ui_browser__gotorc_title(struct ui_browser *browser, int y, int x);
+void ui_browser__gotorc(struct ui_browser *browser, int y, int x);
+void ui_browser__write_nstring(struct ui_browser *browser, const char *msg,
+ unsigned int width);
+void ui_browser__vprintf(struct ui_browser *browser, const char *fmt, va_list args);
+void ui_browser__printf(struct ui_browser *browser, const char *fmt, ...);
+void ui_browser__write_graph(struct ui_browser *browser, int graph);
+void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column,
+ u64 start, u64 end);
+void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column,
+ unsigned int row, bool arrow_down);
+void __ui_browser__show_title(struct ui_browser *browser, const char *title);
+void ui_browser__show_title(struct ui_browser *browser, const char *title);
+int ui_browser__show(struct ui_browser *browser, const char *title,
+ const char *helpline, ...);
+void ui_browser__hide(struct ui_browser *browser);
+int ui_browser__refresh(struct ui_browser *browser);
+int ui_browser__run(struct ui_browser *browser, int delay_secs);
+void ui_browser__update_nr_entries(struct ui_browser *browser, u32 nr_entries);
+void ui_browser__handle_resize(struct ui_browser *browser);
+void __ui_browser__vline(struct ui_browser *browser, unsigned int column,
+ u16 start, u16 end);
+
+int ui_browser__warning(struct ui_browser *browser, int timeout,
+ const char *format, ...);
+int ui_browser__help_window(struct ui_browser *browser, const char *text);
+bool ui_browser__dialog_yesno(struct ui_browser *browser, const char *text);
+int ui_browser__input_window(const char *title, const char *text, char *input,
+ const char *exit_msg, int delay_sec);
+struct perf_env;
+int tui__header_window(struct perf_env *env);
+
+void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence);
+unsigned int ui_browser__argv_refresh(struct ui_browser *browser);
+
+void ui_browser__rb_tree_seek(struct ui_browser *browser, off_t offset, int whence);
+unsigned int ui_browser__rb_tree_refresh(struct ui_browser *browser);
+
+void ui_browser__list_head_seek(struct ui_browser *browser, off_t offset, int whence);
+unsigned int ui_browser__list_head_refresh(struct ui_browser *browser);
+
+void ui_browser__init(void);
+#endif /* _PERF_UI_BROWSER_H_ */
diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build
new file mode 100644
index 000000000..de223f5be
--- /dev/null
+++ b/tools/perf/ui/browsers/Build
@@ -0,0 +1,10 @@
+libperf-y += annotate.o
+libperf-y += hists.o
+libperf-y += map.o
+libperf-y += scripts.o
+libperf-y += header.o
+
+CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST
+CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST
+CFLAGS_map.o += -DENABLE_SLFUTURE_CONST
+CFLAGS_scripts.o += -DENABLE_SLFUTURE_CONST
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
new file mode 100644
index 000000000..a3c255228
--- /dev/null
+++ b/tools/perf/ui/browsers/annotate.c
@@ -0,0 +1,940 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../util/util.h"
+#include "../browser.h"
+#include "../helpline.h"
+#include "../ui.h"
+#include "../util.h"
+#include "../../util/annotate.h"
+#include "../../util/hist.h"
+#include "../../util/sort.h"
+#include "../../util/symbol.h"
+#include "../../util/evsel.h"
+#include "../../util/evlist.h"
+#include <inttypes.h>
+#include <pthread.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <sys/ttydefaults.h>
+#include <asm/bug.h>
+
+struct disasm_line_samples {
+ double percent;
+ struct sym_hist_entry he;
+};
+
+struct arch;
+
+struct annotate_browser {
+ struct ui_browser b;
+ struct rb_root entries;
+ struct rb_node *curr_hot;
+ struct annotation_line *selection;
+ struct arch *arch;
+ struct annotation_options *opts;
+ bool searching_backwards;
+ char search_bf[128];
+};
+
+static inline struct annotation *browser__annotation(struct ui_browser *browser)
+{
+ struct map_symbol *ms = browser->priv;
+ return symbol__annotation(ms->sym);
+}
+
+static bool disasm_line__filter(struct ui_browser *browser, void *entry)
+{
+ struct annotation *notes = browser__annotation(browser);
+ struct annotation_line *al = list_entry(entry, struct annotation_line, node);
+ return annotation_line__filter(al, notes);
+}
+
+static int ui_browser__jumps_percent_color(struct ui_browser *browser, int nr, bool current)
+{
+ struct annotation *notes = browser__annotation(browser);
+
+ if (current && (!browser->use_navkeypressed || browser->navkeypressed))
+ return HE_COLORSET_SELECTED;
+ if (nr == notes->max_jump_sources)
+ return HE_COLORSET_TOP;
+ if (nr > 1)
+ return HE_COLORSET_MEDIUM;
+ return HE_COLORSET_NORMAL;
+}
+
+static int ui_browser__set_jumps_percent_color(void *browser, int nr, bool current)
+{
+ int color = ui_browser__jumps_percent_color(browser, nr, current);
+ return ui_browser__set_color(browser, color);
+}
+
+static int annotate_browser__set_color(void *browser, int color)
+{
+ return ui_browser__set_color(browser, color);
+}
+
+static void annotate_browser__write_graph(void *browser, int graph)
+{
+ ui_browser__write_graph(browser, graph);
+}
+
+static void annotate_browser__set_percent_color(void *browser, double percent, bool current)
+{
+ ui_browser__set_percent_color(browser, percent, current);
+}
+
+static void annotate_browser__printf(void *browser, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ ui_browser__vprintf(browser, fmt, args);
+ va_end(args);
+}
+
+static void annotate_browser__write(struct ui_browser *browser, void *entry, int row)
+{
+ struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
+ struct annotation *notes = browser__annotation(browser);
+ struct annotation_line *al = list_entry(entry, struct annotation_line, node);
+ const bool is_current_entry = ui_browser__is_current_entry(browser, row);
+ struct annotation_write_ops ops = {
+ .first_line = row == 0,
+ .current_entry = is_current_entry,
+ .change_color = (!notes->options->hide_src_code &&
+ (!is_current_entry ||
+ (browser->use_navkeypressed &&
+ !browser->navkeypressed))),
+ .width = browser->width,
+ .obj = browser,
+ .set_color = annotate_browser__set_color,
+ .set_percent_color = annotate_browser__set_percent_color,
+ .set_jumps_percent_color = ui_browser__set_jumps_percent_color,
+ .printf = annotate_browser__printf,
+ .write_graph = annotate_browser__write_graph,
+ };
+
+ /* The scroll bar isn't being used */
+ if (!browser->navkeypressed)
+ ops.width += 1;
+
+ annotation_line__write(al, notes, &ops, ab->opts);
+
+ if (ops.current_entry)
+ ab->selection = al;
+}
+
+static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor)
+{
+ struct disasm_line *pos = list_prev_entry(cursor, al.node);
+ const char *name;
+
+ if (!pos)
+ return false;
+
+ if (ins__is_lock(&pos->ins))
+ name = pos->ops.locked.ins.name;
+ else
+ name = pos->ins.name;
+
+ if (!name || !cursor->ins.name)
+ return false;
+
+ return ins__is_fused(ab->arch, name, cursor->ins.name);
+}
+
+static void annotate_browser__draw_current_jump(struct ui_browser *browser)
+{
+ struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
+ struct disasm_line *cursor = disasm_line(ab->selection);
+ struct annotation_line *target;
+ unsigned int from, to;
+ struct map_symbol *ms = ab->b.priv;
+ struct symbol *sym = ms->sym;
+ struct annotation *notes = symbol__annotation(sym);
+ u8 pcnt_width = annotation__pcnt_width(notes);
+ int width;
+
+ /* PLT symbols contain external offsets */
+ if (strstr(sym->name, "@plt"))
+ return;
+
+ if (!disasm_line__is_valid_local_jump(cursor, sym))
+ return;
+
+ /*
+ * This first was seen with a gcc function, _cpp_lex_token, that
+ * has the usual jumps:
+ *
+ * │1159e6c: ↓ jne 115aa32 <_cpp_lex_token@@Base+0xf92>
+ *
+ * I.e. jumps to a label inside that function (_cpp_lex_token), and
+ * those works, but also this kind:
+ *
+ * │1159e8b: ↓ jne c469be <cpp_named_operator2name@@Base+0xa72>
+ *
+ * I.e. jumps to another function, outside _cpp_lex_token, which
+ * are not being correctly handled generating as a side effect references
+ * to ab->offset[] entries that are set to NULL, so to make this code
+ * more robust, check that here.
+ *
+ * A proper fix for will be put in place, looking at the function
+ * name right after the '<' token and probably treating this like a
+ * 'call' instruction.
+ */
+ target = notes->offsets[cursor->ops.target.offset];
+ if (target == NULL) {
+ ui_helpline__printf("WARN: jump target inconsistency, press 'o', notes->offsets[%#x] = NULL\n",
+ cursor->ops.target.offset);
+ return;
+ }
+
+ if (notes->options->hide_src_code) {
+ from = cursor->al.idx_asm;
+ to = target->idx_asm;
+ } else {
+ from = (u64)cursor->al.idx;
+ to = (u64)target->idx;
+ }
+
+ width = annotation__cycles_width(notes);
+
+ ui_browser__set_color(browser, HE_COLORSET_JUMP_ARROWS);
+ __ui_browser__line_arrow(browser,
+ pcnt_width + 2 + notes->widths.addr + width,
+ from, to);
+
+ if (is_fused(ab, cursor)) {
+ ui_browser__mark_fused(browser,
+ pcnt_width + 3 + notes->widths.addr + width,
+ from - 1,
+ to > from ? true : false);
+ }
+}
+
+static unsigned int annotate_browser__refresh(struct ui_browser *browser)
+{
+ struct annotation *notes = browser__annotation(browser);
+ int ret = ui_browser__list_head_refresh(browser);
+ int pcnt_width = annotation__pcnt_width(notes);
+
+ if (notes->options->jump_arrows)
+ annotate_browser__draw_current_jump(browser);
+
+ ui_browser__set_color(browser, HE_COLORSET_NORMAL);
+ __ui_browser__vline(browser, pcnt_width, 0, browser->rows - 1);
+ return ret;
+}
+
+static int disasm__cmp(struct annotation_line *a, struct annotation_line *b)
+{
+ int i;
+
+ for (i = 0; i < a->data_nr; i++) {
+ if (a->data[i].percent == b->data[i].percent)
+ continue;
+ return a->data[i].percent < b->data[i].percent;
+ }
+ return 0;
+}
+
+static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct annotation_line *l;
+
+ while (*p != NULL) {
+ parent = *p;
+ l = rb_entry(parent, struct annotation_line, rb_node);
+
+ if (disasm__cmp(al, l))
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&al->rb_node, parent, p);
+ rb_insert_color(&al->rb_node, root);
+}
+
+static void annotate_browser__set_top(struct annotate_browser *browser,
+ struct annotation_line *pos, u32 idx)
+{
+ struct annotation *notes = browser__annotation(&browser->b);
+ unsigned back;
+
+ ui_browser__refresh_dimensions(&browser->b);
+ back = browser->b.height / 2;
+ browser->b.top_idx = browser->b.index = idx;
+
+ while (browser->b.top_idx != 0 && back != 0) {
+ pos = list_entry(pos->node.prev, struct annotation_line, node);
+
+ if (annotation_line__filter(pos, notes))
+ continue;
+
+ --browser->b.top_idx;
+ --back;
+ }
+
+ browser->b.top = pos;
+ browser->b.navkeypressed = true;
+}
+
+static void annotate_browser__set_rb_top(struct annotate_browser *browser,
+ struct rb_node *nd)
+{
+ struct annotation *notes = browser__annotation(&browser->b);
+ struct annotation_line * pos = rb_entry(nd, struct annotation_line, rb_node);
+ u32 idx = pos->idx;
+
+ if (notes->options->hide_src_code)
+ idx = pos->idx_asm;
+ annotate_browser__set_top(browser, pos, idx);
+ browser->curr_hot = nd;
+}
+
+static void annotate_browser__calc_percent(struct annotate_browser *browser,
+ struct perf_evsel *evsel)
+{
+ struct map_symbol *ms = browser->b.priv;
+ struct symbol *sym = ms->sym;
+ struct annotation *notes = symbol__annotation(sym);
+ struct disasm_line *pos;
+
+ browser->entries = RB_ROOT;
+
+ pthread_mutex_lock(&notes->lock);
+
+ symbol__calc_percent(sym, evsel);
+
+ list_for_each_entry(pos, &notes->src->source, al.node) {
+ double max_percent = 0.0;
+ int i;
+
+ if (pos->al.offset == -1) {
+ RB_CLEAR_NODE(&pos->al.rb_node);
+ continue;
+ }
+
+ for (i = 0; i < pos->al.data_nr; i++) {
+ double percent;
+
+ percent = annotation_data__percent(&pos->al.data[i],
+ browser->opts->percent_type);
+
+ if (max_percent < percent)
+ max_percent = percent;
+ }
+
+ if (max_percent < 0.01 && pos->al.ipc == 0) {
+ RB_CLEAR_NODE(&pos->al.rb_node);
+ continue;
+ }
+ disasm_rb_tree__insert(&browser->entries, &pos->al);
+ }
+ pthread_mutex_unlock(&notes->lock);
+
+ browser->curr_hot = rb_last(&browser->entries);
+}
+
+static bool annotate_browser__toggle_source(struct annotate_browser *browser)
+{
+ struct annotation *notes = browser__annotation(&browser->b);
+ struct annotation_line *al;
+ off_t offset = browser->b.index - browser->b.top_idx;
+
+ browser->b.seek(&browser->b, offset, SEEK_CUR);
+ al = list_entry(browser->b.top, struct annotation_line, node);
+
+ if (notes->options->hide_src_code) {
+ if (al->idx_asm < offset)
+ offset = al->idx;
+
+ browser->b.nr_entries = notes->nr_entries;
+ notes->options->hide_src_code = false;
+ browser->b.seek(&browser->b, -offset, SEEK_CUR);
+ browser->b.top_idx = al->idx - offset;
+ browser->b.index = al->idx;
+ } else {
+ if (al->idx_asm < 0) {
+ ui_helpline__puts("Only available for assembly lines.");
+ browser->b.seek(&browser->b, -offset, SEEK_CUR);
+ return false;
+ }
+
+ if (al->idx_asm < offset)
+ offset = al->idx_asm;
+
+ browser->b.nr_entries = notes->nr_asm_entries;
+ notes->options->hide_src_code = true;
+ browser->b.seek(&browser->b, -offset, SEEK_CUR);
+ browser->b.top_idx = al->idx_asm - offset;
+ browser->b.index = al->idx_asm;
+ }
+
+ return true;
+}
+
+static void ui_browser__init_asm_mode(struct ui_browser *browser)
+{
+ struct annotation *notes = browser__annotation(browser);
+ ui_browser__reset_index(browser);
+ browser->nr_entries = notes->nr_asm_entries;
+}
+
+#define SYM_TITLE_MAX_SIZE (PATH_MAX + 64)
+
+static int sym_title(struct symbol *sym, struct map *map, char *title,
+ size_t sz, int percent_type)
+{
+ return snprintf(title, sz, "%s %s [Percent: %s]", sym->name, map->dso->long_name,
+ percent_type_str(percent_type));
+}
+
+/*
+ * This can be called from external jumps, i.e. jumps from one functon
+ * to another, like from the kernel's entry_SYSCALL_64 function to the
+ * swapgs_restore_regs_and_return_to_usermode() function.
+ *
+ * So all we check here is that dl->ops.target.sym is set, if it is, just
+ * go to that function and when exiting from its disassembly, come back
+ * to the calling function.
+ */
+static bool annotate_browser__callq(struct annotate_browser *browser,
+ struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt)
+{
+ struct map_symbol *ms = browser->b.priv;
+ struct disasm_line *dl = disasm_line(browser->selection);
+ struct annotation *notes;
+ char title[SYM_TITLE_MAX_SIZE];
+
+ if (!dl->ops.target.sym) {
+ ui_helpline__puts("The called function was not found.");
+ return true;
+ }
+
+ notes = symbol__annotation(dl->ops.target.sym);
+ pthread_mutex_lock(&notes->lock);
+
+ if (!symbol__hists(dl->ops.target.sym, evsel->evlist->nr_entries)) {
+ pthread_mutex_unlock(&notes->lock);
+ ui__warning("Not enough memory for annotating '%s' symbol!\n",
+ dl->ops.target.sym->name);
+ return true;
+ }
+
+ pthread_mutex_unlock(&notes->lock);
+ symbol__tui_annotate(dl->ops.target.sym, ms->map, evsel, hbt, browser->opts);
+ sym_title(ms->sym, ms->map, title, sizeof(title), browser->opts->percent_type);
+ ui_browser__show_title(&browser->b, title);
+ return true;
+}
+
+static
+struct disasm_line *annotate_browser__find_offset(struct annotate_browser *browser,
+ s64 offset, s64 *idx)
+{
+ struct annotation *notes = browser__annotation(&browser->b);
+ struct disasm_line *pos;
+
+ *idx = 0;
+ list_for_each_entry(pos, &notes->src->source, al.node) {
+ if (pos->al.offset == offset)
+ return pos;
+ if (!annotation_line__filter(&pos->al, notes))
+ ++*idx;
+ }
+
+ return NULL;
+}
+
+static bool annotate_browser__jump(struct annotate_browser *browser,
+ struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt)
+{
+ struct disasm_line *dl = disasm_line(browser->selection);
+ u64 offset;
+ s64 idx;
+
+ if (!ins__is_jump(&dl->ins))
+ return false;
+
+ if (dl->ops.target.outside) {
+ annotate_browser__callq(browser, evsel, hbt);
+ return true;
+ }
+
+ offset = dl->ops.target.offset;
+ dl = annotate_browser__find_offset(browser, offset, &idx);
+ if (dl == NULL) {
+ ui_helpline__printf("Invalid jump offset: %" PRIx64, offset);
+ return true;
+ }
+
+ annotate_browser__set_top(browser, &dl->al, idx);
+
+ return true;
+}
+
+static
+struct annotation_line *annotate_browser__find_string(struct annotate_browser *browser,
+ char *s, s64 *idx)
+{
+ struct annotation *notes = browser__annotation(&browser->b);
+ struct annotation_line *al = browser->selection;
+
+ *idx = browser->b.index;
+ list_for_each_entry_continue(al, &notes->src->source, node) {
+ if (annotation_line__filter(al, notes))
+ continue;
+
+ ++*idx;
+
+ if (al->line && strstr(al->line, s) != NULL)
+ return al;
+ }
+
+ return NULL;
+}
+
+static bool __annotate_browser__search(struct annotate_browser *browser)
+{
+ struct annotation_line *al;
+ s64 idx;
+
+ al = annotate_browser__find_string(browser, browser->search_bf, &idx);
+ if (al == NULL) {
+ ui_helpline__puts("String not found!");
+ return false;
+ }
+
+ annotate_browser__set_top(browser, al, idx);
+ browser->searching_backwards = false;
+ return true;
+}
+
+static
+struct annotation_line *annotate_browser__find_string_reverse(struct annotate_browser *browser,
+ char *s, s64 *idx)
+{
+ struct annotation *notes = browser__annotation(&browser->b);
+ struct annotation_line *al = browser->selection;
+
+ *idx = browser->b.index;
+ list_for_each_entry_continue_reverse(al, &notes->src->source, node) {
+ if (annotation_line__filter(al, notes))
+ continue;
+
+ --*idx;
+
+ if (al->line && strstr(al->line, s) != NULL)
+ return al;
+ }
+
+ return NULL;
+}
+
+static bool __annotate_browser__search_reverse(struct annotate_browser *browser)
+{
+ struct annotation_line *al;
+ s64 idx;
+
+ al = annotate_browser__find_string_reverse(browser, browser->search_bf, &idx);
+ if (al == NULL) {
+ ui_helpline__puts("String not found!");
+ return false;
+ }
+
+ annotate_browser__set_top(browser, al, idx);
+ browser->searching_backwards = true;
+ return true;
+}
+
+static bool annotate_browser__search_window(struct annotate_browser *browser,
+ int delay_secs)
+{
+ if (ui_browser__input_window("Search", "String: ", browser->search_bf,
+ "ENTER: OK, ESC: Cancel",
+ delay_secs * 2) != K_ENTER ||
+ !*browser->search_bf)
+ return false;
+
+ return true;
+}
+
+static bool annotate_browser__search(struct annotate_browser *browser, int delay_secs)
+{
+ if (annotate_browser__search_window(browser, delay_secs))
+ return __annotate_browser__search(browser);
+
+ return false;
+}
+
+static bool annotate_browser__continue_search(struct annotate_browser *browser,
+ int delay_secs)
+{
+ if (!*browser->search_bf)
+ return annotate_browser__search(browser, delay_secs);
+
+ return __annotate_browser__search(browser);
+}
+
+static bool annotate_browser__search_reverse(struct annotate_browser *browser,
+ int delay_secs)
+{
+ if (annotate_browser__search_window(browser, delay_secs))
+ return __annotate_browser__search_reverse(browser);
+
+ return false;
+}
+
+static
+bool annotate_browser__continue_search_reverse(struct annotate_browser *browser,
+ int delay_secs)
+{
+ if (!*browser->search_bf)
+ return annotate_browser__search_reverse(browser, delay_secs);
+
+ return __annotate_browser__search_reverse(browser);
+}
+
+static int annotate_browser__show(struct ui_browser *browser, char *title, const char *help)
+{
+ struct annotate_browser *ab = container_of(browser, struct annotate_browser, b);
+ struct map_symbol *ms = browser->priv;
+ struct symbol *sym = ms->sym;
+ char symbol_dso[SYM_TITLE_MAX_SIZE];
+
+ if (ui_browser__show(browser, title, help) < 0)
+ return -1;
+
+ sym_title(sym, ms->map, symbol_dso, sizeof(symbol_dso), ab->opts->percent_type);
+
+ ui_browser__gotorc_title(browser, 0, 0);
+ ui_browser__set_color(browser, HE_COLORSET_ROOT);
+ ui_browser__write_nstring(browser, symbol_dso, browser->width + 1);
+ return 0;
+}
+
+static void
+switch_percent_type(struct annotation_options *opts, bool base)
+{
+ switch (opts->percent_type) {
+ case PERCENT_HITS_LOCAL:
+ if (base)
+ opts->percent_type = PERCENT_PERIOD_LOCAL;
+ else
+ opts->percent_type = PERCENT_HITS_GLOBAL;
+ break;
+ case PERCENT_HITS_GLOBAL:
+ if (base)
+ opts->percent_type = PERCENT_PERIOD_GLOBAL;
+ else
+ opts->percent_type = PERCENT_HITS_LOCAL;
+ break;
+ case PERCENT_PERIOD_LOCAL:
+ if (base)
+ opts->percent_type = PERCENT_HITS_LOCAL;
+ else
+ opts->percent_type = PERCENT_PERIOD_GLOBAL;
+ break;
+ case PERCENT_PERIOD_GLOBAL:
+ if (base)
+ opts->percent_type = PERCENT_HITS_GLOBAL;
+ else
+ opts->percent_type = PERCENT_PERIOD_LOCAL;
+ break;
+ default:
+ WARN_ON(1);
+ }
+}
+
+static int annotate_browser__run(struct annotate_browser *browser,
+ struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt)
+{
+ struct rb_node *nd = NULL;
+ struct hists *hists = evsel__hists(evsel);
+ struct map_symbol *ms = browser->b.priv;
+ struct symbol *sym = ms->sym;
+ struct annotation *notes = symbol__annotation(ms->sym);
+ const char *help = "Press 'h' for help on key bindings";
+ int delay_secs = hbt ? hbt->refresh : 0;
+ char title[256];
+ int key;
+
+ hists__scnprintf_title(hists, title, sizeof(title));
+ if (annotate_browser__show(&browser->b, title, help) < 0)
+ return -1;
+
+ annotate_browser__calc_percent(browser, evsel);
+
+ if (browser->curr_hot) {
+ annotate_browser__set_rb_top(browser, browser->curr_hot);
+ browser->b.navkeypressed = false;
+ }
+
+ nd = browser->curr_hot;
+
+ while (1) {
+ key = ui_browser__run(&browser->b, delay_secs);
+
+ if (delay_secs != 0) {
+ annotate_browser__calc_percent(browser, evsel);
+ /*
+ * Current line focus got out of the list of most active
+ * lines, NULL it so that if TAB|UNTAB is pressed, we
+ * move to curr_hot (current hottest line).
+ */
+ if (nd != NULL && RB_EMPTY_NODE(nd))
+ nd = NULL;
+ }
+
+ switch (key) {
+ case K_TIMER:
+ if (hbt)
+ hbt->timer(hbt->arg);
+
+ if (delay_secs != 0) {
+ symbol__annotate_decay_histogram(sym, evsel->idx);
+ hists__scnprintf_title(hists, title, sizeof(title));
+ annotate_browser__show(&browser->b, title, help);
+ }
+ continue;
+ case K_TAB:
+ if (nd != NULL) {
+ nd = rb_prev(nd);
+ if (nd == NULL)
+ nd = rb_last(&browser->entries);
+ } else
+ nd = browser->curr_hot;
+ break;
+ case K_UNTAB:
+ if (nd != NULL) {
+ nd = rb_next(nd);
+ if (nd == NULL)
+ nd = rb_first(&browser->entries);
+ } else
+ nd = browser->curr_hot;
+ break;
+ case K_F1:
+ case 'h':
+ ui_browser__help_window(&browser->b,
+ "UP/DOWN/PGUP\n"
+ "PGDN/SPACE Navigate\n"
+ "q/ESC/CTRL+C Exit\n\n"
+ "ENTER Go to target\n"
+ "ESC Exit\n"
+ "H Go to hottest instruction\n"
+ "TAB/shift+TAB Cycle thru hottest instructions\n"
+ "j Toggle showing jump to target arrows\n"
+ "J Toggle showing number of jump sources on targets\n"
+ "n Search next string\n"
+ "o Toggle disassembler output/simplified view\n"
+ "O Bump offset level (jump targets -> +call -> all -> cycle thru)\n"
+ "s Toggle source code view\n"
+ "t Circulate percent, total period, samples view\n"
+ "c Show min/max cycle\n"
+ "/ Search string\n"
+ "k Toggle line numbers\n"
+ "P Print to [symbol_name].annotation file.\n"
+ "r Run available scripts\n"
+ "p Toggle percent type [local/global]\n"
+ "b Toggle percent base [period/hits]\n"
+ "? Search string backwards\n");
+ continue;
+ case 'r':
+ {
+ script_browse(NULL);
+ continue;
+ }
+ case 'k':
+ notes->options->show_linenr = !notes->options->show_linenr;
+ break;
+ case 'H':
+ nd = browser->curr_hot;
+ break;
+ case 's':
+ if (annotate_browser__toggle_source(browser))
+ ui_helpline__puts(help);
+ continue;
+ case 'o':
+ notes->options->use_offset = !notes->options->use_offset;
+ annotation__update_column_widths(notes);
+ continue;
+ case 'O':
+ if (++notes->options->offset_level > ANNOTATION__MAX_OFFSET_LEVEL)
+ notes->options->offset_level = ANNOTATION__MIN_OFFSET_LEVEL;
+ continue;
+ case 'j':
+ notes->options->jump_arrows = !notes->options->jump_arrows;
+ continue;
+ case 'J':
+ notes->options->show_nr_jumps = !notes->options->show_nr_jumps;
+ annotation__update_column_widths(notes);
+ continue;
+ case '/':
+ if (annotate_browser__search(browser, delay_secs)) {
+show_help:
+ ui_helpline__puts(help);
+ }
+ continue;
+ case 'n':
+ if (browser->searching_backwards ?
+ annotate_browser__continue_search_reverse(browser, delay_secs) :
+ annotate_browser__continue_search(browser, delay_secs))
+ goto show_help;
+ continue;
+ case '?':
+ if (annotate_browser__search_reverse(browser, delay_secs))
+ goto show_help;
+ continue;
+ case 'D': {
+ static int seq;
+ ui_helpline__pop();
+ ui_helpline__fpush("%d: nr_ent=%d, height=%d, idx=%d, top_idx=%d, nr_asm_entries=%d",
+ seq++, browser->b.nr_entries,
+ browser->b.height,
+ browser->b.index,
+ browser->b.top_idx,
+ notes->nr_asm_entries);
+ }
+ continue;
+ case K_ENTER:
+ case K_RIGHT:
+ {
+ struct disasm_line *dl = disasm_line(browser->selection);
+
+ if (browser->selection == NULL)
+ ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org");
+ else if (browser->selection->offset == -1)
+ ui_helpline__puts("Actions are only available for assembly lines.");
+ else if (!dl->ins.ops)
+ goto show_sup_ins;
+ else if (ins__is_ret(&dl->ins))
+ goto out;
+ else if (!(annotate_browser__jump(browser, evsel, hbt) ||
+ annotate_browser__callq(browser, evsel, hbt))) {
+show_sup_ins:
+ ui_helpline__puts("Actions are only available for function call/return & jump/branch instructions.");
+ }
+ continue;
+ }
+ case 'P':
+ map_symbol__annotation_dump(ms, evsel, browser->opts);
+ continue;
+ case 't':
+ if (notes->options->show_total_period) {
+ notes->options->show_total_period = false;
+ notes->options->show_nr_samples = true;
+ } else if (notes->options->show_nr_samples)
+ notes->options->show_nr_samples = false;
+ else
+ notes->options->show_total_period = true;
+ annotation__update_column_widths(notes);
+ continue;
+ case 'c':
+ if (notes->options->show_minmax_cycle)
+ notes->options->show_minmax_cycle = false;
+ else
+ notes->options->show_minmax_cycle = true;
+ annotation__update_column_widths(notes);
+ continue;
+ case 'p':
+ case 'b':
+ switch_percent_type(browser->opts, key == 'b');
+ hists__scnprintf_title(hists, title, sizeof(title));
+ annotate_browser__show(&browser->b, title, help);
+ continue;
+ case K_LEFT:
+ case K_ESC:
+ case 'q':
+ case CTRL('c'):
+ goto out;
+ default:
+ continue;
+ }
+
+ if (nd != NULL)
+ annotate_browser__set_rb_top(browser, nd);
+ }
+out:
+ ui_browser__hide(&browser->b);
+ return key;
+}
+
+int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt,
+ struct annotation_options *opts)
+{
+ return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt, opts);
+}
+
+int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt,
+ struct annotation_options *opts)
+{
+ /* reset abort key so that it can get Ctrl-C as a key */
+ SLang_reset_tty();
+ SLang_init_tty(0, 0, 0);
+
+ return map_symbol__tui_annotate(&he->ms, evsel, hbt, opts);
+}
+
+int symbol__tui_annotate(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt,
+ struct annotation_options *opts)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct map_symbol ms = {
+ .map = map,
+ .sym = sym,
+ };
+ struct annotate_browser browser = {
+ .b = {
+ .refresh = annotate_browser__refresh,
+ .seek = ui_browser__list_head_seek,
+ .write = annotate_browser__write,
+ .filter = disasm_line__filter,
+ .extra_title_lines = 1, /* for hists__scnprintf_title() */
+ .priv = &ms,
+ .use_navkeypressed = true,
+ },
+ .opts = opts,
+ };
+ int ret = -1, err;
+
+ if (sym == NULL)
+ return -1;
+
+ if (map->dso->annotate_warned)
+ return -1;
+
+ err = symbol__annotate2(sym, map, evsel, opts, &browser.arch);
+ if (err) {
+ char msg[BUFSIZ];
+ symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
+ ui__error("Couldn't annotate %s:\n%s", sym->name, msg);
+ goto out_free_offsets;
+ }
+
+ ui_helpline__push("Press ESC to exit");
+
+ browser.b.width = notes->max_line_len;
+ browser.b.nr_entries = notes->nr_entries;
+ browser.b.entries = &notes->src->source,
+ browser.b.width += 18; /* Percentage */
+
+ if (notes->options->hide_src_code)
+ ui_browser__init_asm_mode(&browser.b);
+
+ ret = annotate_browser__run(&browser, evsel, hbt);
+
+ annotated_source__purge(notes->src);
+
+out_free_offsets:
+ zfree(&notes->offsets);
+ return ret;
+}
diff --git a/tools/perf/ui/browsers/header.c b/tools/perf/ui/browsers/header.c
new file mode 100644
index 000000000..d75492189
--- /dev/null
+++ b/tools/perf/ui/browsers/header.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util/cache.h"
+#include "util/debug.h"
+#include "ui/browser.h"
+#include "ui/keysyms.h"
+#include "ui/ui.h"
+#include "ui/util.h"
+#include "ui/libslang.h"
+#include "util/header.h"
+#include "util/session.h"
+
+#include <sys/ttydefaults.h>
+
+static void ui_browser__argv_write(struct ui_browser *browser,
+ void *entry, int row)
+{
+ char **arg = entry;
+ char *str = *arg;
+ char empty[] = " ";
+ bool current_entry = ui_browser__is_current_entry(browser, row);
+ unsigned long offset = (unsigned long)browser->priv;
+
+ if (offset >= strlen(str))
+ str = empty;
+ else
+ str = str + offset;
+
+ ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
+ HE_COLORSET_NORMAL);
+
+ ui_browser__write_nstring(browser, str, browser->width);
+}
+
+static int list_menu__run(struct ui_browser *menu)
+{
+ int key;
+ unsigned long offset;
+ const char help[] =
+ "h/?/F1 Show this window\n"
+ "UP/DOWN/PGUP\n"
+ "PGDN/SPACE\n"
+ "LEFT/RIGHT Navigate\n"
+ "q/ESC/CTRL+C Exit browser";
+
+ if (ui_browser__show(menu, "Header information", "Press 'q' to exit") < 0)
+ return -1;
+
+ while (1) {
+ key = ui_browser__run(menu, 0);
+
+ switch (key) {
+ case K_RIGHT:
+ offset = (unsigned long)menu->priv;
+ offset += 10;
+ menu->priv = (void *)offset;
+ continue;
+ case K_LEFT:
+ offset = (unsigned long)menu->priv;
+ if (offset >= 10)
+ offset -= 10;
+ menu->priv = (void *)offset;
+ continue;
+ case K_F1:
+ case 'h':
+ case '?':
+ ui_browser__help_window(menu, help);
+ continue;
+ case K_ESC:
+ case 'q':
+ case CTRL('c'):
+ key = -1;
+ break;
+ default:
+ continue;
+ }
+
+ break;
+ }
+
+ ui_browser__hide(menu);
+ return key;
+}
+
+static int ui__list_menu(int argc, char * const argv[])
+{
+ struct ui_browser menu = {
+ .entries = (void *)argv,
+ .refresh = ui_browser__argv_refresh,
+ .seek = ui_browser__argv_seek,
+ .write = ui_browser__argv_write,
+ .nr_entries = argc,
+ };
+
+ return list_menu__run(&menu);
+}
+
+int tui__header_window(struct perf_env *env)
+{
+ int i, argc = 0;
+ char **argv;
+ struct perf_session *session;
+ char *ptr, *pos;
+ size_t size;
+ FILE *fp = open_memstream(&ptr, &size);
+
+ session = container_of(env, struct perf_session, header.env);
+ perf_header__fprintf_info(session, fp, true);
+ fclose(fp);
+
+ for (pos = ptr, argc = 0; (pos = strchr(pos, '\n')) != NULL; pos++)
+ argc++;
+
+ argv = calloc(argc + 1, sizeof(*argv));
+ if (argv == NULL)
+ goto out;
+
+ argv[0] = pos = ptr;
+ for (i = 1; (pos = strchr(pos, '\n')) != NULL; i++) {
+ *pos++ = '\0';
+ argv[i] = pos;
+ }
+
+ BUG_ON(i != argc + 1);
+
+ ui__list_menu(argc, argv);
+
+out:
+ free(argv);
+ free(ptr);
+ return 0;
+}
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
new file mode 100644
index 000000000..ed3490202
--- /dev/null
+++ b/tools/perf/ui/browsers/hists.c
@@ -0,0 +1,3310 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/rbtree.h>
+#include <sys/ttydefaults.h>
+
+#include "../../util/evsel.h"
+#include "../../util/evlist.h"
+#include "../../util/hist.h"
+#include "../../util/pstack.h"
+#include "../../util/sort.h"
+#include "../../util/util.h"
+#include "../../util/top.h"
+#include "../../util/thread.h"
+#include "../../arch/common.h"
+
+#include "../browsers/hists.h"
+#include "../helpline.h"
+#include "../util.h"
+#include "../ui.h"
+#include "map.h"
+#include "annotate.h"
+#include "srcline.h"
+#include "string2.h"
+#include "units.h"
+
+#include "sane_ctype.h"
+
+extern void hist_browser__init_hpp(void);
+
+static int hists_browser__scnprintf_title(struct hist_browser *browser, char *bf, size_t size);
+static void hist_browser__update_nr_entries(struct hist_browser *hb);
+
+static struct rb_node *hists__filter_entries(struct rb_node *nd,
+ float min_pcnt);
+
+static bool hist_browser__has_filter(struct hist_browser *hb)
+{
+ return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter || hb->c2c_filter;
+}
+
+static int hist_browser__get_folding(struct hist_browser *browser)
+{
+ struct rb_node *nd;
+ struct hists *hists = browser->hists;
+ int unfolded_rows = 0;
+
+ for (nd = rb_first(&hists->entries);
+ (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL;
+ nd = rb_hierarchy_next(nd)) {
+ struct hist_entry *he =
+ rb_entry(nd, struct hist_entry, rb_node);
+
+ if (he->leaf && he->unfolded)
+ unfolded_rows += he->nr_rows;
+ }
+ return unfolded_rows;
+}
+
+static void hist_browser__set_title_space(struct hist_browser *hb)
+{
+ struct ui_browser *browser = &hb->b;
+ struct hists *hists = hb->hists;
+ struct perf_hpp_list *hpp_list = hists->hpp_list;
+
+ browser->extra_title_lines = hb->show_headers ? hpp_list->nr_header_lines : 0;
+}
+
+static u32 hist_browser__nr_entries(struct hist_browser *hb)
+{
+ u32 nr_entries;
+
+ if (symbol_conf.report_hierarchy)
+ nr_entries = hb->nr_hierarchy_entries;
+ else if (hist_browser__has_filter(hb))
+ nr_entries = hb->nr_non_filtered_entries;
+ else
+ nr_entries = hb->hists->nr_entries;
+
+ hb->nr_callchain_rows = hist_browser__get_folding(hb);
+ return nr_entries + hb->nr_callchain_rows;
+}
+
+static void hist_browser__update_rows(struct hist_browser *hb)
+{
+ struct ui_browser *browser = &hb->b;
+ struct hists *hists = hb->hists;
+ struct perf_hpp_list *hpp_list = hists->hpp_list;
+ u16 index_row;
+
+ if (!hb->show_headers) {
+ browser->rows += browser->extra_title_lines;
+ browser->extra_title_lines = 0;
+ return;
+ }
+
+ browser->extra_title_lines = hpp_list->nr_header_lines;
+ browser->rows -= browser->extra_title_lines;
+ /*
+ * Verify if we were at the last line and that line isn't
+ * visibe because we now show the header line(s).
+ */
+ index_row = browser->index - browser->top_idx;
+ if (index_row >= browser->rows)
+ browser->index -= index_row - browser->rows + 1;
+}
+
+static void hist_browser__refresh_dimensions(struct ui_browser *browser)
+{
+ struct hist_browser *hb = container_of(browser, struct hist_browser, b);
+
+ /* 3 == +/- toggle symbol before actual hist_entry rendering */
+ browser->width = 3 + (hists__sort_list_width(hb->hists) + sizeof("[k]"));
+ /*
+ * FIXME: Just keeping existing behaviour, but this really should be
+ * before updating browser->width, as it will invalidate the
+ * calculation above. Fix this and the fallout in another
+ * changeset.
+ */
+ ui_browser__refresh_dimensions(browser);
+}
+
+static void hist_browser__reset(struct hist_browser *browser)
+{
+ /*
+ * The hists__remove_entry_filter() already folds non-filtered
+ * entries so we can assume it has 0 callchain rows.
+ */
+ browser->nr_callchain_rows = 0;
+
+ hist_browser__update_nr_entries(browser);
+ browser->b.nr_entries = hist_browser__nr_entries(browser);
+ hist_browser__refresh_dimensions(&browser->b);
+ ui_browser__reset_index(&browser->b);
+}
+
+static char tree__folded_sign(bool unfolded)
+{
+ return unfolded ? '-' : '+';
+}
+
+static char hist_entry__folded(const struct hist_entry *he)
+{
+ return he->has_children ? tree__folded_sign(he->unfolded) : ' ';
+}
+
+static char callchain_list__folded(const struct callchain_list *cl)
+{
+ return cl->has_children ? tree__folded_sign(cl->unfolded) : ' ';
+}
+
+static void callchain_list__set_folding(struct callchain_list *cl, bool unfold)
+{
+ cl->unfolded = unfold ? cl->has_children : false;
+}
+
+static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
+{
+ int n = 0;
+ struct rb_node *nd;
+
+ for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) {
+ struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
+ struct callchain_list *chain;
+ char folded_sign = ' '; /* No children */
+
+ list_for_each_entry(chain, &child->val, list) {
+ ++n;
+
+ /* We need this because we may not have children */
+ folded_sign = callchain_list__folded(chain);
+ if (folded_sign == '+')
+ break;
+ }
+
+ if (folded_sign == '-') /* Have children and they're unfolded */
+ n += callchain_node__count_rows_rb_tree(child);
+ }
+
+ return n;
+}
+
+static int callchain_node__count_flat_rows(struct callchain_node *node)
+{
+ struct callchain_list *chain;
+ char folded_sign = 0;
+ int n = 0;
+
+ list_for_each_entry(chain, &node->parent_val, list) {
+ if (!folded_sign) {
+ /* only check first chain list entry */
+ folded_sign = callchain_list__folded(chain);
+ if (folded_sign == '+')
+ return 1;
+ }
+ n++;
+ }
+
+ list_for_each_entry(chain, &node->val, list) {
+ if (!folded_sign) {
+ /* node->parent_val list might be empty */
+ folded_sign = callchain_list__folded(chain);
+ if (folded_sign == '+')
+ return 1;
+ }
+ n++;
+ }
+
+ return n;
+}
+
+static int callchain_node__count_folded_rows(struct callchain_node *node __maybe_unused)
+{
+ return 1;
+}
+
+static int callchain_node__count_rows(struct callchain_node *node)
+{
+ struct callchain_list *chain;
+ bool unfolded = false;
+ int n = 0;
+
+ if (callchain_param.mode == CHAIN_FLAT)
+ return callchain_node__count_flat_rows(node);
+ else if (callchain_param.mode == CHAIN_FOLDED)
+ return callchain_node__count_folded_rows(node);
+
+ list_for_each_entry(chain, &node->val, list) {
+ ++n;
+
+ unfolded = chain->unfolded;
+ }
+
+ if (unfolded)
+ n += callchain_node__count_rows_rb_tree(node);
+
+ return n;
+}
+
+static int callchain__count_rows(struct rb_root *chain)
+{
+ struct rb_node *nd;
+ int n = 0;
+
+ for (nd = rb_first(chain); nd; nd = rb_next(nd)) {
+ struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
+ n += callchain_node__count_rows(node);
+ }
+
+ return n;
+}
+
+static int hierarchy_count_rows(struct hist_browser *hb, struct hist_entry *he,
+ bool include_children)
+{
+ int count = 0;
+ struct rb_node *node;
+ struct hist_entry *child;
+
+ if (he->leaf)
+ return callchain__count_rows(&he->sorted_chain);
+
+ if (he->has_no_entry)
+ return 1;
+
+ node = rb_first(&he->hroot_out);
+ while (node) {
+ float percent;
+
+ child = rb_entry(node, struct hist_entry, rb_node);
+ percent = hist_entry__get_percent_limit(child);
+
+ if (!child->filtered && percent >= hb->min_pcnt) {
+ count++;
+
+ if (include_children && child->unfolded)
+ count += hierarchy_count_rows(hb, child, true);
+ }
+
+ node = rb_next(node);
+ }
+ return count;
+}
+
+static bool hist_entry__toggle_fold(struct hist_entry *he)
+{
+ if (!he)
+ return false;
+
+ if (!he->has_children)
+ return false;
+
+ he->unfolded = !he->unfolded;
+ return true;
+}
+
+static bool callchain_list__toggle_fold(struct callchain_list *cl)
+{
+ if (!cl)
+ return false;
+
+ if (!cl->has_children)
+ return false;
+
+ cl->unfolded = !cl->unfolded;
+ return true;
+}
+
+static void callchain_node__init_have_children_rb_tree(struct callchain_node *node)
+{
+ struct rb_node *nd = rb_first(&node->rb_root);
+
+ for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) {
+ struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
+ struct callchain_list *chain;
+ bool first = true;
+
+ list_for_each_entry(chain, &child->val, list) {
+ if (first) {
+ first = false;
+ chain->has_children = chain->list.next != &child->val ||
+ !RB_EMPTY_ROOT(&child->rb_root);
+ } else
+ chain->has_children = chain->list.next == &child->val &&
+ !RB_EMPTY_ROOT(&child->rb_root);
+ }
+
+ callchain_node__init_have_children_rb_tree(child);
+ }
+}
+
+static void callchain_node__init_have_children(struct callchain_node *node,
+ bool has_sibling)
+{
+ struct callchain_list *chain;
+
+ chain = list_entry(node->val.next, struct callchain_list, list);
+ chain->has_children = has_sibling;
+
+ if (!list_empty(&node->val)) {
+ chain = list_entry(node->val.prev, struct callchain_list, list);
+ chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
+ }
+
+ callchain_node__init_have_children_rb_tree(node);
+}
+
+static void callchain__init_have_children(struct rb_root *root)
+{
+ struct rb_node *nd = rb_first(root);
+ bool has_sibling = nd && rb_next(nd);
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
+ callchain_node__init_have_children(node, has_sibling);
+ if (callchain_param.mode == CHAIN_FLAT ||
+ callchain_param.mode == CHAIN_FOLDED)
+ callchain_node__make_parent_list(node);
+ }
+}
+
+static void hist_entry__init_have_children(struct hist_entry *he)
+{
+ if (he->init_have_children)
+ return;
+
+ if (he->leaf) {
+ he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
+ callchain__init_have_children(&he->sorted_chain);
+ } else {
+ he->has_children = !RB_EMPTY_ROOT(&he->hroot_out);
+ }
+
+ he->init_have_children = true;
+}
+
+static bool hist_browser__toggle_fold(struct hist_browser *browser)
+{
+ struct hist_entry *he = browser->he_selection;
+ struct map_symbol *ms = browser->selection;
+ struct callchain_list *cl = container_of(ms, struct callchain_list, ms);
+ bool has_children;
+
+ if (!he || !ms)
+ return false;
+
+ if (ms == &he->ms)
+ has_children = hist_entry__toggle_fold(he);
+ else
+ has_children = callchain_list__toggle_fold(cl);
+
+ if (has_children) {
+ int child_rows = 0;
+
+ hist_entry__init_have_children(he);
+ browser->b.nr_entries -= he->nr_rows;
+
+ if (he->leaf)
+ browser->nr_callchain_rows -= he->nr_rows;
+ else
+ browser->nr_hierarchy_entries -= he->nr_rows;
+
+ if (symbol_conf.report_hierarchy)
+ child_rows = hierarchy_count_rows(browser, he, true);
+
+ if (he->unfolded) {
+ if (he->leaf)
+ he->nr_rows = callchain__count_rows(
+ &he->sorted_chain);
+ else
+ he->nr_rows = hierarchy_count_rows(browser, he, false);
+
+ /* account grand children */
+ if (symbol_conf.report_hierarchy)
+ browser->b.nr_entries += child_rows - he->nr_rows;
+
+ if (!he->leaf && he->nr_rows == 0) {
+ he->has_no_entry = true;
+ he->nr_rows = 1;
+ }
+ } else {
+ if (symbol_conf.report_hierarchy)
+ browser->b.nr_entries -= child_rows - he->nr_rows;
+
+ if (he->has_no_entry)
+ he->has_no_entry = false;
+
+ he->nr_rows = 0;
+ }
+
+ browser->b.nr_entries += he->nr_rows;
+
+ if (he->leaf)
+ browser->nr_callchain_rows += he->nr_rows;
+ else
+ browser->nr_hierarchy_entries += he->nr_rows;
+
+ return true;
+ }
+
+ /* If it doesn't have children, no toggling performed */
+ return false;
+}
+
+static int callchain_node__set_folding_rb_tree(struct callchain_node *node, bool unfold)
+{
+ int n = 0;
+ struct rb_node *nd;
+
+ for (nd = rb_first(&node->rb_root); nd; nd = rb_next(nd)) {
+ struct callchain_node *child = rb_entry(nd, struct callchain_node, rb_node);
+ struct callchain_list *chain;
+ bool has_children = false;
+
+ list_for_each_entry(chain, &child->val, list) {
+ ++n;
+ callchain_list__set_folding(chain, unfold);
+ has_children = chain->has_children;
+ }
+
+ if (has_children)
+ n += callchain_node__set_folding_rb_tree(child, unfold);
+ }
+
+ return n;
+}
+
+static int callchain_node__set_folding(struct callchain_node *node, bool unfold)
+{
+ struct callchain_list *chain;
+ bool has_children = false;
+ int n = 0;
+
+ list_for_each_entry(chain, &node->val, list) {
+ ++n;
+ callchain_list__set_folding(chain, unfold);
+ has_children = chain->has_children;
+ }
+
+ if (has_children)
+ n += callchain_node__set_folding_rb_tree(node, unfold);
+
+ return n;
+}
+
+static int callchain__set_folding(struct rb_root *chain, bool unfold)
+{
+ struct rb_node *nd;
+ int n = 0;
+
+ for (nd = rb_first(chain); nd; nd = rb_next(nd)) {
+ struct callchain_node *node = rb_entry(nd, struct callchain_node, rb_node);
+ n += callchain_node__set_folding(node, unfold);
+ }
+
+ return n;
+}
+
+static int hierarchy_set_folding(struct hist_browser *hb, struct hist_entry *he,
+ bool unfold __maybe_unused)
+{
+ float percent;
+ struct rb_node *nd;
+ struct hist_entry *child;
+ int n = 0;
+
+ for (nd = rb_first(&he->hroot_out); nd; nd = rb_next(nd)) {
+ child = rb_entry(nd, struct hist_entry, rb_node);
+ percent = hist_entry__get_percent_limit(child);
+ if (!child->filtered && percent >= hb->min_pcnt)
+ n++;
+ }
+
+ return n;
+}
+
+static void __hist_entry__set_folding(struct hist_entry *he,
+ struct hist_browser *hb, bool unfold)
+{
+ hist_entry__init_have_children(he);
+ he->unfolded = unfold ? he->has_children : false;
+
+ if (he->has_children) {
+ int n;
+
+ if (he->leaf)
+ n = callchain__set_folding(&he->sorted_chain, unfold);
+ else
+ n = hierarchy_set_folding(hb, he, unfold);
+
+ he->nr_rows = unfold ? n : 0;
+ } else
+ he->nr_rows = 0;
+}
+
+static void hist_entry__set_folding(struct hist_entry *he,
+ struct hist_browser *browser, bool unfold)
+{
+ double percent;
+
+ percent = hist_entry__get_percent_limit(he);
+ if (he->filtered || percent < browser->min_pcnt)
+ return;
+
+ __hist_entry__set_folding(he, browser, unfold);
+
+ if (!he->depth || unfold)
+ browser->nr_hierarchy_entries++;
+ if (he->leaf)
+ browser->nr_callchain_rows += he->nr_rows;
+ else if (unfold && !hist_entry__has_hierarchy_children(he, browser->min_pcnt)) {
+ browser->nr_hierarchy_entries++;
+ he->has_no_entry = true;
+ he->nr_rows = 1;
+ } else
+ he->has_no_entry = false;
+}
+
+static void
+__hist_browser__set_folding(struct hist_browser *browser, bool unfold)
+{
+ struct rb_node *nd;
+ struct hist_entry *he;
+
+ nd = rb_first(&browser->hists->entries);
+ while (nd) {
+ he = rb_entry(nd, struct hist_entry, rb_node);
+
+ /* set folding state even if it's currently folded */
+ nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+ hist_entry__set_folding(he, browser, unfold);
+ }
+}
+
+static void hist_browser__set_folding(struct hist_browser *browser, bool unfold)
+{
+ browser->nr_hierarchy_entries = 0;
+ browser->nr_callchain_rows = 0;
+ __hist_browser__set_folding(browser, unfold);
+
+ browser->b.nr_entries = hist_browser__nr_entries(browser);
+ /* Go to the start, we may be way after valid entries after a collapse */
+ ui_browser__reset_index(&browser->b);
+}
+
+static void hist_browser__set_folding_selected(struct hist_browser *browser, bool unfold)
+{
+ if (!browser->he_selection)
+ return;
+
+ hist_entry__set_folding(browser->he_selection, browser, unfold);
+ browser->b.nr_entries = hist_browser__nr_entries(browser);
+}
+
+static void ui_browser__warn_lost_events(struct ui_browser *browser)
+{
+ ui_browser__warning(browser, 4,
+ "Events are being lost, check IO/CPU overload!\n\n"
+ "You may want to run 'perf' using a RT scheduler policy:\n\n"
+ " perf top -r 80\n\n"
+ "Or reduce the sampling frequency.");
+}
+
+static int hist_browser__title(struct hist_browser *browser, char *bf, size_t size)
+{
+ return browser->title ? browser->title(browser, bf, size) : 0;
+}
+
+int hist_browser__run(struct hist_browser *browser, const char *help,
+ bool warn_lost_event)
+{
+ int key;
+ char title[160];
+ struct hist_browser_timer *hbt = browser->hbt;
+ int delay_secs = hbt ? hbt->refresh : 0;
+
+ browser->b.entries = &browser->hists->entries;
+ browser->b.nr_entries = hist_browser__nr_entries(browser);
+
+ hist_browser__title(browser, title, sizeof(title));
+
+ if (ui_browser__show(&browser->b, title, "%s", help) < 0)
+ return -1;
+
+ while (1) {
+ key = ui_browser__run(&browser->b, delay_secs);
+
+ switch (key) {
+ case K_TIMER: {
+ u64 nr_entries;
+
+ WARN_ON_ONCE(!hbt);
+
+ if (hbt)
+ hbt->timer(hbt->arg);
+
+ if (hist_browser__has_filter(browser) ||
+ symbol_conf.report_hierarchy)
+ hist_browser__update_nr_entries(browser);
+
+ nr_entries = hist_browser__nr_entries(browser);
+ ui_browser__update_nr_entries(&browser->b, nr_entries);
+
+ if (warn_lost_event &&
+ (browser->hists->stats.nr_lost_warned !=
+ browser->hists->stats.nr_events[PERF_RECORD_LOST])) {
+ browser->hists->stats.nr_lost_warned =
+ browser->hists->stats.nr_events[PERF_RECORD_LOST];
+ ui_browser__warn_lost_events(&browser->b);
+ }
+
+ hist_browser__title(browser, title, sizeof(title));
+ ui_browser__show_title(&browser->b, title);
+ continue;
+ }
+ case 'D': { /* Debug */
+ static int seq;
+ struct hist_entry *h = rb_entry(browser->b.top,
+ struct hist_entry, rb_node);
+ ui_helpline__pop();
+ ui_helpline__fpush("%d: nr_ent=(%d,%d), etl: %d, rows=%d, idx=%d, fve: idx=%d, row_off=%d, nrows=%d",
+ seq++, browser->b.nr_entries,
+ browser->hists->nr_entries,
+ browser->b.extra_title_lines,
+ browser->b.rows,
+ browser->b.index,
+ browser->b.top_idx,
+ h->row_offset, h->nr_rows);
+ }
+ break;
+ case 'C':
+ /* Collapse the whole world. */
+ hist_browser__set_folding(browser, false);
+ break;
+ case 'c':
+ /* Collapse the selected entry. */
+ hist_browser__set_folding_selected(browser, false);
+ break;
+ case 'E':
+ /* Expand the whole world. */
+ hist_browser__set_folding(browser, true);
+ break;
+ case 'e':
+ /* Expand the selected entry. */
+ hist_browser__set_folding_selected(browser, true);
+ break;
+ case 'H':
+ browser->show_headers = !browser->show_headers;
+ hist_browser__update_rows(browser);
+ break;
+ case K_ENTER:
+ if (hist_browser__toggle_fold(browser))
+ break;
+ /* fall thru */
+ default:
+ goto out;
+ }
+ }
+out:
+ ui_browser__hide(&browser->b);
+ return key;
+}
+
+struct callchain_print_arg {
+ /* for hists browser */
+ off_t row_offset;
+ bool is_current_entry;
+
+ /* for file dump */
+ FILE *fp;
+ int printed;
+};
+
+typedef void (*print_callchain_entry_fn)(struct hist_browser *browser,
+ struct callchain_list *chain,
+ const char *str, int offset,
+ unsigned short row,
+ struct callchain_print_arg *arg);
+
+static void hist_browser__show_callchain_entry(struct hist_browser *browser,
+ struct callchain_list *chain,
+ const char *str, int offset,
+ unsigned short row,
+ struct callchain_print_arg *arg)
+{
+ int color, width;
+ char folded_sign = callchain_list__folded(chain);
+ bool show_annotated = browser->show_dso && chain->ms.sym && symbol__annotation(chain->ms.sym)->src;
+
+ color = HE_COLORSET_NORMAL;
+ width = browser->b.width - (offset + 2);
+ if (ui_browser__is_current_entry(&browser->b, row)) {
+ browser->selection = &chain->ms;
+ color = HE_COLORSET_SELECTED;
+ arg->is_current_entry = true;
+ }
+
+ ui_browser__set_color(&browser->b, color);
+ ui_browser__gotorc(&browser->b, row, 0);
+ ui_browser__write_nstring(&browser->b, " ", offset);
+ ui_browser__printf(&browser->b, "%c", folded_sign);
+ ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' ');
+ ui_browser__write_nstring(&browser->b, str, width);
+}
+
+static void hist_browser__fprintf_callchain_entry(struct hist_browser *b __maybe_unused,
+ struct callchain_list *chain,
+ const char *str, int offset,
+ unsigned short row __maybe_unused,
+ struct callchain_print_arg *arg)
+{
+ char folded_sign = callchain_list__folded(chain);
+
+ arg->printed += fprintf(arg->fp, "%*s%c %s\n", offset, " ",
+ folded_sign, str);
+}
+
+typedef bool (*check_output_full_fn)(struct hist_browser *browser,
+ unsigned short row);
+
+static bool hist_browser__check_output_full(struct hist_browser *browser,
+ unsigned short row)
+{
+ return browser->b.rows == row;
+}
+
+static bool hist_browser__check_dump_full(struct hist_browser *browser __maybe_unused,
+ unsigned short row __maybe_unused)
+{
+ return false;
+}
+
+#define LEVEL_OFFSET_STEP 3
+
+static int hist_browser__show_callchain_list(struct hist_browser *browser,
+ struct callchain_node *node,
+ struct callchain_list *chain,
+ unsigned short row, u64 total,
+ bool need_percent, int offset,
+ print_callchain_entry_fn print,
+ struct callchain_print_arg *arg)
+{
+ char bf[1024], *alloc_str;
+ char buf[64], *alloc_str2;
+ const char *str;
+ int ret = 1;
+
+ if (arg->row_offset != 0) {
+ arg->row_offset--;
+ return 0;
+ }
+
+ alloc_str = NULL;
+ alloc_str2 = NULL;
+
+ str = callchain_list__sym_name(chain, bf, sizeof(bf),
+ browser->show_dso);
+
+ if (symbol_conf.show_branchflag_count) {
+ callchain_list_counts__printf_value(chain, NULL,
+ buf, sizeof(buf));
+
+ if (asprintf(&alloc_str2, "%s%s", str, buf) < 0)
+ str = "Not enough memory!";
+ else
+ str = alloc_str2;
+ }
+
+ if (need_percent) {
+ callchain_node__scnprintf_value(node, buf, sizeof(buf),
+ total);
+
+ if (asprintf(&alloc_str, "%s %s", buf, str) < 0)
+ str = "Not enough memory!";
+ else
+ str = alloc_str;
+ }
+
+ print(browser, chain, str, offset, row, arg);
+ free(alloc_str);
+ free(alloc_str2);
+
+ return ret;
+}
+
+static bool check_percent_display(struct rb_node *node, u64 parent_total)
+{
+ struct callchain_node *child;
+
+ if (node == NULL)
+ return false;
+
+ if (rb_next(node))
+ return true;
+
+ child = rb_entry(node, struct callchain_node, rb_node);
+ return callchain_cumul_hits(child) != parent_total;
+}
+
+static int hist_browser__show_callchain_flat(struct hist_browser *browser,
+ struct rb_root *root,
+ unsigned short row, u64 total,
+ u64 parent_total,
+ print_callchain_entry_fn print,
+ struct callchain_print_arg *arg,
+ check_output_full_fn is_output_full)
+{
+ struct rb_node *node;
+ int first_row = row, offset = LEVEL_OFFSET_STEP;
+ bool need_percent;
+
+ node = rb_first(root);
+ need_percent = check_percent_display(node, parent_total);
+
+ while (node) {
+ struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
+ struct rb_node *next = rb_next(node);
+ struct callchain_list *chain;
+ char folded_sign = ' ';
+ int first = true;
+ int extra_offset = 0;
+
+ list_for_each_entry(chain, &child->parent_val, list) {
+ bool was_first = first;
+
+ if (first)
+ first = false;
+ else if (need_percent)
+ extra_offset = LEVEL_OFFSET_STEP;
+
+ folded_sign = callchain_list__folded(chain);
+
+ row += hist_browser__show_callchain_list(browser, child,
+ chain, row, total,
+ was_first && need_percent,
+ offset + extra_offset,
+ print, arg);
+
+ if (is_output_full(browser, row))
+ goto out;
+
+ if (folded_sign == '+')
+ goto next;
+ }
+
+ list_for_each_entry(chain, &child->val, list) {
+ bool was_first = first;
+
+ if (first)
+ first = false;
+ else if (need_percent)
+ extra_offset = LEVEL_OFFSET_STEP;
+
+ folded_sign = callchain_list__folded(chain);
+
+ row += hist_browser__show_callchain_list(browser, child,
+ chain, row, total,
+ was_first && need_percent,
+ offset + extra_offset,
+ print, arg);
+
+ if (is_output_full(browser, row))
+ goto out;
+
+ if (folded_sign == '+')
+ break;
+ }
+
+next:
+ if (is_output_full(browser, row))
+ break;
+ node = next;
+ }
+out:
+ return row - first_row;
+}
+
+static char *hist_browser__folded_callchain_str(struct hist_browser *browser,
+ struct callchain_list *chain,
+ char *value_str, char *old_str)
+{
+ char bf[1024];
+ const char *str;
+ char *new;
+
+ str = callchain_list__sym_name(chain, bf, sizeof(bf),
+ browser->show_dso);
+ if (old_str) {
+ if (asprintf(&new, "%s%s%s", old_str,
+ symbol_conf.field_sep ?: ";", str) < 0)
+ new = NULL;
+ } else {
+ if (value_str) {
+ if (asprintf(&new, "%s %s", value_str, str) < 0)
+ new = NULL;
+ } else {
+ if (asprintf(&new, "%s", str) < 0)
+ new = NULL;
+ }
+ }
+ return new;
+}
+
+static int hist_browser__show_callchain_folded(struct hist_browser *browser,
+ struct rb_root *root,
+ unsigned short row, u64 total,
+ u64 parent_total,
+ print_callchain_entry_fn print,
+ struct callchain_print_arg *arg,
+ check_output_full_fn is_output_full)
+{
+ struct rb_node *node;
+ int first_row = row, offset = LEVEL_OFFSET_STEP;
+ bool need_percent;
+
+ node = rb_first(root);
+ need_percent = check_percent_display(node, parent_total);
+
+ while (node) {
+ struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
+ struct rb_node *next = rb_next(node);
+ struct callchain_list *chain, *first_chain = NULL;
+ int first = true;
+ char *value_str = NULL, *value_str_alloc = NULL;
+ char *chain_str = NULL, *chain_str_alloc = NULL;
+
+ if (arg->row_offset != 0) {
+ arg->row_offset--;
+ goto next;
+ }
+
+ if (need_percent) {
+ char buf[64];
+
+ callchain_node__scnprintf_value(child, buf, sizeof(buf), total);
+ if (asprintf(&value_str, "%s", buf) < 0) {
+ value_str = (char *)"<...>";
+ goto do_print;
+ }
+ value_str_alloc = value_str;
+ }
+
+ list_for_each_entry(chain, &child->parent_val, list) {
+ chain_str = hist_browser__folded_callchain_str(browser,
+ chain, value_str, chain_str);
+ if (first) {
+ first = false;
+ first_chain = chain;
+ }
+
+ if (chain_str == NULL) {
+ chain_str = (char *)"Not enough memory!";
+ goto do_print;
+ }
+
+ chain_str_alloc = chain_str;
+ }
+
+ list_for_each_entry(chain, &child->val, list) {
+ chain_str = hist_browser__folded_callchain_str(browser,
+ chain, value_str, chain_str);
+ if (first) {
+ first = false;
+ first_chain = chain;
+ }
+
+ if (chain_str == NULL) {
+ chain_str = (char *)"Not enough memory!";
+ goto do_print;
+ }
+
+ chain_str_alloc = chain_str;
+ }
+
+do_print:
+ print(browser, first_chain, chain_str, offset, row++, arg);
+ free(value_str_alloc);
+ free(chain_str_alloc);
+
+next:
+ if (is_output_full(browser, row))
+ break;
+ node = next;
+ }
+
+ return row - first_row;
+}
+
+static int hist_browser__show_callchain_graph(struct hist_browser *browser,
+ struct rb_root *root, int level,
+ unsigned short row, u64 total,
+ u64 parent_total,
+ print_callchain_entry_fn print,
+ struct callchain_print_arg *arg,
+ check_output_full_fn is_output_full)
+{
+ struct rb_node *node;
+ int first_row = row, offset = level * LEVEL_OFFSET_STEP;
+ bool need_percent;
+ u64 percent_total = total;
+
+ if (callchain_param.mode == CHAIN_GRAPH_REL)
+ percent_total = parent_total;
+
+ node = rb_first(root);
+ need_percent = check_percent_display(node, parent_total);
+
+ while (node) {
+ struct callchain_node *child = rb_entry(node, struct callchain_node, rb_node);
+ struct rb_node *next = rb_next(node);
+ struct callchain_list *chain;
+ char folded_sign = ' ';
+ int first = true;
+ int extra_offset = 0;
+
+ list_for_each_entry(chain, &child->val, list) {
+ bool was_first = first;
+
+ if (first)
+ first = false;
+ else if (need_percent)
+ extra_offset = LEVEL_OFFSET_STEP;
+
+ folded_sign = callchain_list__folded(chain);
+
+ row += hist_browser__show_callchain_list(browser, child,
+ chain, row, percent_total,
+ was_first && need_percent,
+ offset + extra_offset,
+ print, arg);
+
+ if (is_output_full(browser, row))
+ goto out;
+
+ if (folded_sign == '+')
+ break;
+ }
+
+ if (folded_sign == '-') {
+ const int new_level = level + (extra_offset ? 2 : 1);
+
+ row += hist_browser__show_callchain_graph(browser, &child->rb_root,
+ new_level, row, total,
+ child->children_hit,
+ print, arg, is_output_full);
+ }
+ if (is_output_full(browser, row))
+ break;
+ node = next;
+ }
+out:
+ return row - first_row;
+}
+
+static int hist_browser__show_callchain(struct hist_browser *browser,
+ struct hist_entry *entry, int level,
+ unsigned short row,
+ print_callchain_entry_fn print,
+ struct callchain_print_arg *arg,
+ check_output_full_fn is_output_full)
+{
+ u64 total = hists__total_period(entry->hists);
+ u64 parent_total;
+ int printed;
+
+ if (symbol_conf.cumulate_callchain)
+ parent_total = entry->stat_acc->period;
+ else
+ parent_total = entry->stat.period;
+
+ if (callchain_param.mode == CHAIN_FLAT) {
+ printed = hist_browser__show_callchain_flat(browser,
+ &entry->sorted_chain, row,
+ total, parent_total, print, arg,
+ is_output_full);
+ } else if (callchain_param.mode == CHAIN_FOLDED) {
+ printed = hist_browser__show_callchain_folded(browser,
+ &entry->sorted_chain, row,
+ total, parent_total, print, arg,
+ is_output_full);
+ } else {
+ printed = hist_browser__show_callchain_graph(browser,
+ &entry->sorted_chain, level, row,
+ total, parent_total, print, arg,
+ is_output_full);
+ }
+
+ if (arg->is_current_entry)
+ browser->he_selection = entry;
+
+ return printed;
+}
+
+struct hpp_arg {
+ struct ui_browser *b;
+ char folded_sign;
+ bool current_entry;
+};
+
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...)
+{
+ struct hpp_arg *arg = hpp->ptr;
+ int ret, len;
+ va_list args;
+ double percent;
+
+ va_start(args, fmt);
+ len = va_arg(args, int);
+ percent = va_arg(args, double);
+ va_end(args);
+
+ ui_browser__set_percent_color(arg->b, percent, arg->current_entry);
+
+ ret = scnprintf(hpp->buf, hpp->size, fmt, len, percent);
+ ui_browser__printf(arg->b, "%s", hpp->buf);
+
+ return ret;
+}
+
+#define __HPP_COLOR_PERCENT_FN(_type, _field) \
+static u64 __hpp_get_##_field(struct hist_entry *he) \
+{ \
+ return he->stat._field; \
+} \
+ \
+static int \
+hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt, \
+ struct perf_hpp *hpp, \
+ struct hist_entry *he) \
+{ \
+ return hpp__fmt(fmt, hpp, he, __hpp_get_##_field, " %*.2f%%", \
+ __hpp__slsmg_color_printf, true); \
+}
+
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
+static u64 __hpp_get_acc_##_field(struct hist_entry *he) \
+{ \
+ return he->stat_acc->_field; \
+} \
+ \
+static int \
+hist_browser__hpp_color_##_type(struct perf_hpp_fmt *fmt, \
+ struct perf_hpp *hpp, \
+ struct hist_entry *he) \
+{ \
+ if (!symbol_conf.cumulate_callchain) { \
+ struct hpp_arg *arg = hpp->ptr; \
+ int len = fmt->user_len ?: fmt->len; \
+ int ret = scnprintf(hpp->buf, hpp->size, \
+ "%*s", len, "N/A"); \
+ ui_browser__printf(arg->b, "%s", hpp->buf); \
+ \
+ return ret; \
+ } \
+ return hpp__fmt(fmt, hpp, he, __hpp_get_acc_##_field, \
+ " %*.2f%%", __hpp__slsmg_color_printf, true); \
+}
+
+__HPP_COLOR_PERCENT_FN(overhead, period)
+__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
+__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
+__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
+__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
+__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
+
+#undef __HPP_COLOR_PERCENT_FN
+#undef __HPP_COLOR_ACC_PERCENT_FN
+
+void hist_browser__init_hpp(void)
+{
+ perf_hpp__format[PERF_HPP__OVERHEAD].color =
+ hist_browser__hpp_color_overhead;
+ perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color =
+ hist_browser__hpp_color_overhead_sys;
+ perf_hpp__format[PERF_HPP__OVERHEAD_US].color =
+ hist_browser__hpp_color_overhead_us;
+ perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color =
+ hist_browser__hpp_color_overhead_guest_sys;
+ perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
+ hist_browser__hpp_color_overhead_guest_us;
+ perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
+ hist_browser__hpp_color_overhead_acc;
+}
+
+static int hist_browser__show_entry(struct hist_browser *browser,
+ struct hist_entry *entry,
+ unsigned short row)
+{
+ int printed = 0;
+ int width = browser->b.width;
+ char folded_sign = ' ';
+ bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+ bool use_callchain = hist_entry__has_callchains(entry) && symbol_conf.use_callchain;
+ off_t row_offset = entry->row_offset;
+ bool first = true;
+ struct perf_hpp_fmt *fmt;
+
+ if (current_entry) {
+ browser->he_selection = entry;
+ browser->selection = &entry->ms;
+ }
+
+ if (use_callchain) {
+ hist_entry__init_have_children(entry);
+ folded_sign = hist_entry__folded(entry);
+ }
+
+ if (row_offset == 0) {
+ struct hpp_arg arg = {
+ .b = &browser->b,
+ .folded_sign = folded_sign,
+ .current_entry = current_entry,
+ };
+ int column = 0;
+
+ ui_browser__gotorc(&browser->b, row, 0);
+
+ hists__for_each_format(browser->hists, fmt) {
+ char s[2048];
+ struct perf_hpp hpp = {
+ .buf = s,
+ .size = sizeof(s),
+ .ptr = &arg,
+ };
+
+ if (perf_hpp__should_skip(fmt, entry->hists) ||
+ column++ < browser->b.horiz_scroll)
+ continue;
+
+ if (current_entry && browser->b.navkeypressed) {
+ ui_browser__set_color(&browser->b,
+ HE_COLORSET_SELECTED);
+ } else {
+ ui_browser__set_color(&browser->b,
+ HE_COLORSET_NORMAL);
+ }
+
+ if (first) {
+ if (use_callchain) {
+ ui_browser__printf(&browser->b, "%c ", folded_sign);
+ width -= 2;
+ }
+ first = false;
+ } else {
+ ui_browser__printf(&browser->b, " ");
+ width -= 2;
+ }
+
+ if (fmt->color) {
+ int ret = fmt->color(fmt, &hpp, entry);
+ hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+ /*
+ * fmt->color() already used ui_browser to
+ * print the non alignment bits, skip it (+ret):
+ */
+ ui_browser__printf(&browser->b, "%s", s + ret);
+ } else {
+ hist_entry__snprintf_alignment(entry, &hpp, fmt, fmt->entry(fmt, &hpp, entry));
+ ui_browser__printf(&browser->b, "%s", s);
+ }
+ width -= hpp.buf - s;
+ }
+
+ /* The scroll bar isn't being used */
+ if (!browser->b.navkeypressed)
+ width += 1;
+
+ ui_browser__write_nstring(&browser->b, "", width);
+
+ ++row;
+ ++printed;
+ } else
+ --row_offset;
+
+ if (folded_sign == '-' && row != browser->b.rows) {
+ struct callchain_print_arg arg = {
+ .row_offset = row_offset,
+ .is_current_entry = current_entry,
+ };
+
+ printed += hist_browser__show_callchain(browser,
+ entry, 1, row,
+ hist_browser__show_callchain_entry,
+ &arg,
+ hist_browser__check_output_full);
+ }
+
+ return printed;
+}
+
+static int hist_browser__show_hierarchy_entry(struct hist_browser *browser,
+ struct hist_entry *entry,
+ unsigned short row,
+ int level)
+{
+ int printed = 0;
+ int width = browser->b.width;
+ char folded_sign = ' ';
+ bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+ off_t row_offset = entry->row_offset;
+ bool first = true;
+ struct perf_hpp_fmt *fmt;
+ struct perf_hpp_list_node *fmt_node;
+ struct hpp_arg arg = {
+ .b = &browser->b,
+ .current_entry = current_entry,
+ };
+ int column = 0;
+ int hierarchy_indent = (entry->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
+
+ if (current_entry) {
+ browser->he_selection = entry;
+ browser->selection = &entry->ms;
+ }
+
+ hist_entry__init_have_children(entry);
+ folded_sign = hist_entry__folded(entry);
+ arg.folded_sign = folded_sign;
+
+ if (entry->leaf && row_offset) {
+ row_offset--;
+ goto show_callchain;
+ }
+
+ ui_browser__gotorc(&browser->b, row, 0);
+
+ if (current_entry && browser->b.navkeypressed)
+ ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
+ else
+ ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+
+ ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT);
+ width -= level * HIERARCHY_INDENT;
+
+ /* the first hpp_list_node is for overhead columns */
+ fmt_node = list_first_entry(&entry->hists->hpp_formats,
+ struct perf_hpp_list_node, list);
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+ char s[2048];
+ struct perf_hpp hpp = {
+ .buf = s,
+ .size = sizeof(s),
+ .ptr = &arg,
+ };
+
+ if (perf_hpp__should_skip(fmt, entry->hists) ||
+ column++ < browser->b.horiz_scroll)
+ continue;
+
+ if (current_entry && browser->b.navkeypressed) {
+ ui_browser__set_color(&browser->b,
+ HE_COLORSET_SELECTED);
+ } else {
+ ui_browser__set_color(&browser->b,
+ HE_COLORSET_NORMAL);
+ }
+
+ if (first) {
+ ui_browser__printf(&browser->b, "%c ", folded_sign);
+ width -= 2;
+ first = false;
+ } else {
+ ui_browser__printf(&browser->b, " ");
+ width -= 2;
+ }
+
+ if (fmt->color) {
+ int ret = fmt->color(fmt, &hpp, entry);
+ hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+ /*
+ * fmt->color() already used ui_browser to
+ * print the non alignment bits, skip it (+ret):
+ */
+ ui_browser__printf(&browser->b, "%s", s + ret);
+ } else {
+ int ret = fmt->entry(fmt, &hpp, entry);
+ hist_entry__snprintf_alignment(entry, &hpp, fmt, ret);
+ ui_browser__printf(&browser->b, "%s", s);
+ }
+ width -= hpp.buf - s;
+ }
+
+ if (!first) {
+ ui_browser__write_nstring(&browser->b, "", hierarchy_indent);
+ width -= hierarchy_indent;
+ }
+
+ if (column >= browser->b.horiz_scroll) {
+ char s[2048];
+ struct perf_hpp hpp = {
+ .buf = s,
+ .size = sizeof(s),
+ .ptr = &arg,
+ };
+
+ if (current_entry && browser->b.navkeypressed) {
+ ui_browser__set_color(&browser->b,
+ HE_COLORSET_SELECTED);
+ } else {
+ ui_browser__set_color(&browser->b,
+ HE_COLORSET_NORMAL);
+ }
+
+ perf_hpp_list__for_each_format(entry->hpp_list, fmt) {
+ if (first) {
+ ui_browser__printf(&browser->b, "%c ", folded_sign);
+ first = false;
+ } else {
+ ui_browser__write_nstring(&browser->b, "", 2);
+ }
+
+ width -= 2;
+
+ /*
+ * No need to call hist_entry__snprintf_alignment()
+ * since this fmt is always the last column in the
+ * hierarchy mode.
+ */
+ if (fmt->color) {
+ width -= fmt->color(fmt, &hpp, entry);
+ } else {
+ int i = 0;
+
+ width -= fmt->entry(fmt, &hpp, entry);
+ ui_browser__printf(&browser->b, "%s", ltrim(s));
+
+ while (isspace(s[i++]))
+ width++;
+ }
+ }
+ }
+
+ /* The scroll bar isn't being used */
+ if (!browser->b.navkeypressed)
+ width += 1;
+
+ ui_browser__write_nstring(&browser->b, "", width);
+
+ ++row;
+ ++printed;
+
+show_callchain:
+ if (entry->leaf && folded_sign == '-' && row != browser->b.rows) {
+ struct callchain_print_arg carg = {
+ .row_offset = row_offset,
+ };
+
+ printed += hist_browser__show_callchain(browser, entry,
+ level + 1, row,
+ hist_browser__show_callchain_entry, &carg,
+ hist_browser__check_output_full);
+ }
+
+ return printed;
+}
+
+static int hist_browser__show_no_entry(struct hist_browser *browser,
+ unsigned short row, int level)
+{
+ int width = browser->b.width;
+ bool current_entry = ui_browser__is_current_entry(&browser->b, row);
+ bool first = true;
+ int column = 0;
+ int ret;
+ struct perf_hpp_fmt *fmt;
+ struct perf_hpp_list_node *fmt_node;
+ int indent = browser->hists->nr_hpp_node - 2;
+
+ if (current_entry) {
+ browser->he_selection = NULL;
+ browser->selection = NULL;
+ }
+
+ ui_browser__gotorc(&browser->b, row, 0);
+
+ if (current_entry && browser->b.navkeypressed)
+ ui_browser__set_color(&browser->b, HE_COLORSET_SELECTED);
+ else
+ ui_browser__set_color(&browser->b, HE_COLORSET_NORMAL);
+
+ ui_browser__write_nstring(&browser->b, "", level * HIERARCHY_INDENT);
+ width -= level * HIERARCHY_INDENT;
+
+ /* the first hpp_list_node is for overhead columns */
+ fmt_node = list_first_entry(&browser->hists->hpp_formats,
+ struct perf_hpp_list_node, list);
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+ if (perf_hpp__should_skip(fmt, browser->hists) ||
+ column++ < browser->b.horiz_scroll)
+ continue;
+
+ ret = fmt->width(fmt, NULL, browser->hists);
+
+ if (first) {
+ /* for folded sign */
+ first = false;
+ ret++;
+ } else {
+ /* space between columns */
+ ret += 2;
+ }
+
+ ui_browser__write_nstring(&browser->b, "", ret);
+ width -= ret;
+ }
+
+ ui_browser__write_nstring(&browser->b, "", indent * HIERARCHY_INDENT);
+ width -= indent * HIERARCHY_INDENT;
+
+ if (column >= browser->b.horiz_scroll) {
+ char buf[32];
+
+ ret = snprintf(buf, sizeof(buf), "no entry >= %.2f%%", browser->min_pcnt);
+ ui_browser__printf(&browser->b, " %s", buf);
+ width -= ret + 2;
+ }
+
+ /* The scroll bar isn't being used */
+ if (!browser->b.navkeypressed)
+ width += 1;
+
+ ui_browser__write_nstring(&browser->b, "", width);
+ return 1;
+}
+
+static int advance_hpp_check(struct perf_hpp *hpp, int inc)
+{
+ advance_hpp(hpp, inc);
+ return hpp->size <= 0;
+}
+
+static int
+hists_browser__scnprintf_headers(struct hist_browser *browser, char *buf,
+ size_t size, int line)
+{
+ struct hists *hists = browser->hists;
+ struct perf_hpp dummy_hpp = {
+ .buf = buf,
+ .size = size,
+ };
+ struct perf_hpp_fmt *fmt;
+ size_t ret = 0;
+ int column = 0;
+ int span = 0;
+
+ if (hists__has_callchains(hists) && symbol_conf.use_callchain) {
+ ret = scnprintf(buf, size, " ");
+ if (advance_hpp_check(&dummy_hpp, ret))
+ return ret;
+ }
+
+ hists__for_each_format(browser->hists, fmt) {
+ if (perf_hpp__should_skip(fmt, hists) || column++ < browser->b.horiz_scroll)
+ continue;
+
+ ret = fmt->header(fmt, &dummy_hpp, hists, line, &span);
+ if (advance_hpp_check(&dummy_hpp, ret))
+ break;
+
+ if (span)
+ continue;
+
+ ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " ");
+ if (advance_hpp_check(&dummy_hpp, ret))
+ break;
+ }
+
+ return ret;
+}
+
+static int hists_browser__scnprintf_hierarchy_headers(struct hist_browser *browser, char *buf, size_t size)
+{
+ struct hists *hists = browser->hists;
+ struct perf_hpp dummy_hpp = {
+ .buf = buf,
+ .size = size,
+ };
+ struct perf_hpp_fmt *fmt;
+ struct perf_hpp_list_node *fmt_node;
+ size_t ret = 0;
+ int column = 0;
+ int indent = hists->nr_hpp_node - 2;
+ bool first_node, first_col;
+
+ ret = scnprintf(buf, size, " ");
+ if (advance_hpp_check(&dummy_hpp, ret))
+ return ret;
+
+ first_node = true;
+ /* the first hpp_list_node is for overhead columns */
+ fmt_node = list_first_entry(&hists->hpp_formats,
+ struct perf_hpp_list_node, list);
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+ if (column++ < browser->b.horiz_scroll)
+ continue;
+
+ ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL);
+ if (advance_hpp_check(&dummy_hpp, ret))
+ break;
+
+ ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " ");
+ if (advance_hpp_check(&dummy_hpp, ret))
+ break;
+
+ first_node = false;
+ }
+
+ if (!first_node) {
+ ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "%*s",
+ indent * HIERARCHY_INDENT, "");
+ if (advance_hpp_check(&dummy_hpp, ret))
+ return ret;
+ }
+
+ first_node = true;
+ list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+ if (!first_node) {
+ ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, " / ");
+ if (advance_hpp_check(&dummy_hpp, ret))
+ break;
+ }
+ first_node = false;
+
+ first_col = true;
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+ char *start;
+
+ if (perf_hpp__should_skip(fmt, hists))
+ continue;
+
+ if (!first_col) {
+ ret = scnprintf(dummy_hpp.buf, dummy_hpp.size, "+");
+ if (advance_hpp_check(&dummy_hpp, ret))
+ break;
+ }
+ first_col = false;
+
+ ret = fmt->header(fmt, &dummy_hpp, hists, 0, NULL);
+ dummy_hpp.buf[ret] = '\0';
+
+ start = trim(dummy_hpp.buf);
+ ret = strlen(start);
+
+ if (start != dummy_hpp.buf)
+ memmove(dummy_hpp.buf, start, ret + 1);
+
+ if (advance_hpp_check(&dummy_hpp, ret))
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void hists_browser__hierarchy_headers(struct hist_browser *browser)
+{
+ char headers[1024];
+
+ hists_browser__scnprintf_hierarchy_headers(browser, headers,
+ sizeof(headers));
+
+ ui_browser__gotorc(&browser->b, 0, 0);
+ ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
+ ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+}
+
+static void hists_browser__headers(struct hist_browser *browser)
+{
+ struct hists *hists = browser->hists;
+ struct perf_hpp_list *hpp_list = hists->hpp_list;
+
+ int line;
+
+ for (line = 0; line < hpp_list->nr_header_lines; line++) {
+ char headers[1024];
+
+ hists_browser__scnprintf_headers(browser, headers,
+ sizeof(headers), line);
+
+ ui_browser__gotorc_title(&browser->b, line, 0);
+ ui_browser__set_color(&browser->b, HE_COLORSET_ROOT);
+ ui_browser__write_nstring(&browser->b, headers, browser->b.width + 1);
+ }
+}
+
+static void hist_browser__show_headers(struct hist_browser *browser)
+{
+ if (symbol_conf.report_hierarchy)
+ hists_browser__hierarchy_headers(browser);
+ else
+ hists_browser__headers(browser);
+}
+
+static void ui_browser__hists_init_top(struct ui_browser *browser)
+{
+ if (browser->top == NULL) {
+ struct hist_browser *hb;
+
+ hb = container_of(browser, struct hist_browser, b);
+ browser->top = rb_first(&hb->hists->entries);
+ }
+}
+
+static unsigned int hist_browser__refresh(struct ui_browser *browser)
+{
+ unsigned row = 0;
+ struct rb_node *nd;
+ struct hist_browser *hb = container_of(browser, struct hist_browser, b);
+
+ if (hb->show_headers)
+ hist_browser__show_headers(hb);
+
+ ui_browser__hists_init_top(browser);
+ hb->he_selection = NULL;
+ hb->selection = NULL;
+
+ for (nd = browser->top; nd; nd = rb_hierarchy_next(nd)) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+ float percent;
+
+ if (h->filtered) {
+ /* let it move to sibling */
+ h->unfolded = false;
+ continue;
+ }
+
+ percent = hist_entry__get_percent_limit(h);
+ if (percent < hb->min_pcnt)
+ continue;
+
+ if (symbol_conf.report_hierarchy) {
+ row += hist_browser__show_hierarchy_entry(hb, h, row,
+ h->depth);
+ if (row == browser->rows)
+ break;
+
+ if (h->has_no_entry) {
+ hist_browser__show_no_entry(hb, row, h->depth + 1);
+ row++;
+ }
+ } else {
+ row += hist_browser__show_entry(hb, h, row);
+ }
+
+ if (row == browser->rows)
+ break;
+ }
+
+ return row;
+}
+
+static struct rb_node *hists__filter_entries(struct rb_node *nd,
+ float min_pcnt)
+{
+ while (nd != NULL) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+ float percent = hist_entry__get_percent_limit(h);
+
+ if (!h->filtered && percent >= min_pcnt)
+ return nd;
+
+ /*
+ * If it's filtered, its all children also were filtered.
+ * So move to sibling node.
+ */
+ if (rb_next(nd))
+ nd = rb_next(nd);
+ else
+ nd = rb_hierarchy_next(nd);
+ }
+
+ return NULL;
+}
+
+static struct rb_node *hists__filter_prev_entries(struct rb_node *nd,
+ float min_pcnt)
+{
+ while (nd != NULL) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+ float percent = hist_entry__get_percent_limit(h);
+
+ if (!h->filtered && percent >= min_pcnt)
+ return nd;
+
+ nd = rb_hierarchy_prev(nd);
+ }
+
+ return NULL;
+}
+
+static void ui_browser__hists_seek(struct ui_browser *browser,
+ off_t offset, int whence)
+{
+ struct hist_entry *h;
+ struct rb_node *nd;
+ bool first = true;
+ struct hist_browser *hb;
+
+ hb = container_of(browser, struct hist_browser, b);
+
+ if (browser->nr_entries == 0)
+ return;
+
+ ui_browser__hists_init_top(browser);
+
+ switch (whence) {
+ case SEEK_SET:
+ nd = hists__filter_entries(rb_first(browser->entries),
+ hb->min_pcnt);
+ break;
+ case SEEK_CUR:
+ nd = browser->top;
+ goto do_offset;
+ case SEEK_END:
+ nd = rb_hierarchy_last(rb_last(browser->entries));
+ nd = hists__filter_prev_entries(nd, hb->min_pcnt);
+ first = false;
+ break;
+ default:
+ return;
+ }
+
+ /*
+ * Moves not relative to the first visible entry invalidates its
+ * row_offset:
+ */
+ h = rb_entry(browser->top, struct hist_entry, rb_node);
+ h->row_offset = 0;
+
+ /*
+ * Here we have to check if nd is expanded (+), if it is we can't go
+ * the next top level hist_entry, instead we must compute an offset of
+ * what _not_ to show and not change the first visible entry.
+ *
+ * This offset increments when we are going from top to bottom and
+ * decreases when we're going from bottom to top.
+ *
+ * As we don't have backpointers to the top level in the callchains
+ * structure, we need to always print the whole hist_entry callchain,
+ * skipping the first ones that are before the first visible entry
+ * and stop when we printed enough lines to fill the screen.
+ */
+do_offset:
+ if (!nd)
+ return;
+
+ if (offset > 0) {
+ do {
+ h = rb_entry(nd, struct hist_entry, rb_node);
+ if (h->unfolded && h->leaf) {
+ u16 remaining = h->nr_rows - h->row_offset;
+ if (offset > remaining) {
+ offset -= remaining;
+ h->row_offset = 0;
+ } else {
+ h->row_offset += offset;
+ offset = 0;
+ browser->top = nd;
+ break;
+ }
+ }
+ nd = hists__filter_entries(rb_hierarchy_next(nd),
+ hb->min_pcnt);
+ if (nd == NULL)
+ break;
+ --offset;
+ browser->top = nd;
+ } while (offset != 0);
+ } else if (offset < 0) {
+ while (1) {
+ h = rb_entry(nd, struct hist_entry, rb_node);
+ if (h->unfolded && h->leaf) {
+ if (first) {
+ if (-offset > h->row_offset) {
+ offset += h->row_offset;
+ h->row_offset = 0;
+ } else {
+ h->row_offset += offset;
+ offset = 0;
+ browser->top = nd;
+ break;
+ }
+ } else {
+ if (-offset > h->nr_rows) {
+ offset += h->nr_rows;
+ h->row_offset = 0;
+ } else {
+ h->row_offset = h->nr_rows + offset;
+ offset = 0;
+ browser->top = nd;
+ break;
+ }
+ }
+ }
+
+ nd = hists__filter_prev_entries(rb_hierarchy_prev(nd),
+ hb->min_pcnt);
+ if (nd == NULL)
+ break;
+ ++offset;
+ browser->top = nd;
+ if (offset == 0) {
+ /*
+ * Last unfiltered hist_entry, check if it is
+ * unfolded, if it is then we should have
+ * row_offset at its last entry.
+ */
+ h = rb_entry(nd, struct hist_entry, rb_node);
+ if (h->unfolded && h->leaf)
+ h->row_offset = h->nr_rows;
+ break;
+ }
+ first = false;
+ }
+ } else {
+ browser->top = nd;
+ h = rb_entry(nd, struct hist_entry, rb_node);
+ h->row_offset = 0;
+ }
+}
+
+static int hist_browser__fprintf_callchain(struct hist_browser *browser,
+ struct hist_entry *he, FILE *fp,
+ int level)
+{
+ struct callchain_print_arg arg = {
+ .fp = fp,
+ };
+
+ hist_browser__show_callchain(browser, he, level, 0,
+ hist_browser__fprintf_callchain_entry, &arg,
+ hist_browser__check_dump_full);
+ return arg.printed;
+}
+
+static int hist_browser__fprintf_entry(struct hist_browser *browser,
+ struct hist_entry *he, FILE *fp)
+{
+ char s[8192];
+ int printed = 0;
+ char folded_sign = ' ';
+ struct perf_hpp hpp = {
+ .buf = s,
+ .size = sizeof(s),
+ };
+ struct perf_hpp_fmt *fmt;
+ bool first = true;
+ int ret;
+
+ if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
+ folded_sign = hist_entry__folded(he);
+ printed += fprintf(fp, "%c ", folded_sign);
+ }
+
+ hists__for_each_format(browser->hists, fmt) {
+ if (perf_hpp__should_skip(fmt, he->hists))
+ continue;
+
+ if (!first) {
+ ret = scnprintf(hpp.buf, hpp.size, " ");
+ advance_hpp(&hpp, ret);
+ } else
+ first = false;
+
+ ret = fmt->entry(fmt, &hpp, he);
+ ret = hist_entry__snprintf_alignment(he, &hpp, fmt, ret);
+ advance_hpp(&hpp, ret);
+ }
+ printed += fprintf(fp, "%s\n", s);
+
+ if (folded_sign == '-')
+ printed += hist_browser__fprintf_callchain(browser, he, fp, 1);
+
+ return printed;
+}
+
+
+static int hist_browser__fprintf_hierarchy_entry(struct hist_browser *browser,
+ struct hist_entry *he,
+ FILE *fp, int level)
+{
+ char s[8192];
+ int printed = 0;
+ char folded_sign = ' ';
+ struct perf_hpp hpp = {
+ .buf = s,
+ .size = sizeof(s),
+ };
+ struct perf_hpp_fmt *fmt;
+ struct perf_hpp_list_node *fmt_node;
+ bool first = true;
+ int ret;
+ int hierarchy_indent = (he->hists->nr_hpp_node - 2) * HIERARCHY_INDENT;
+
+ printed = fprintf(fp, "%*s", level * HIERARCHY_INDENT, "");
+
+ folded_sign = hist_entry__folded(he);
+ printed += fprintf(fp, "%c", folded_sign);
+
+ /* the first hpp_list_node is for overhead columns */
+ fmt_node = list_first_entry(&he->hists->hpp_formats,
+ struct perf_hpp_list_node, list);
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+ if (!first) {
+ ret = scnprintf(hpp.buf, hpp.size, " ");
+ advance_hpp(&hpp, ret);
+ } else
+ first = false;
+
+ ret = fmt->entry(fmt, &hpp, he);
+ advance_hpp(&hpp, ret);
+ }
+
+ ret = scnprintf(hpp.buf, hpp.size, "%*s", hierarchy_indent, "");
+ advance_hpp(&hpp, ret);
+
+ perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+ ret = scnprintf(hpp.buf, hpp.size, " ");
+ advance_hpp(&hpp, ret);
+
+ ret = fmt->entry(fmt, &hpp, he);
+ advance_hpp(&hpp, ret);
+ }
+
+ printed += fprintf(fp, "%s\n", rtrim(s));
+
+ if (he->leaf && folded_sign == '-') {
+ printed += hist_browser__fprintf_callchain(browser, he, fp,
+ he->depth + 1);
+ }
+
+ return printed;
+}
+
+static int hist_browser__fprintf(struct hist_browser *browser, FILE *fp)
+{
+ struct rb_node *nd = hists__filter_entries(rb_first(browser->b.entries),
+ browser->min_pcnt);
+ int printed = 0;
+
+ while (nd) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+ if (symbol_conf.report_hierarchy) {
+ printed += hist_browser__fprintf_hierarchy_entry(browser,
+ h, fp,
+ h->depth);
+ } else {
+ printed += hist_browser__fprintf_entry(browser, h, fp);
+ }
+
+ nd = hists__filter_entries(rb_hierarchy_next(nd),
+ browser->min_pcnt);
+ }
+
+ return printed;
+}
+
+static int hist_browser__dump(struct hist_browser *browser)
+{
+ char filename[64];
+ FILE *fp;
+
+ while (1) {
+ scnprintf(filename, sizeof(filename), "perf.hist.%d", browser->print_seq);
+ if (access(filename, F_OK))
+ break;
+ /*
+ * XXX: Just an arbitrary lazy upper limit
+ */
+ if (++browser->print_seq == 8192) {
+ ui_helpline__fpush("Too many perf.hist.N files, nothing written!");
+ return -1;
+ }
+ }
+
+ fp = fopen(filename, "w");
+ if (fp == NULL) {
+ char bf[64];
+ const char *err = str_error_r(errno, bf, sizeof(bf));
+ ui_helpline__fpush("Couldn't write to %s: %s", filename, err);
+ return -1;
+ }
+
+ ++browser->print_seq;
+ hist_browser__fprintf(browser, fp);
+ fclose(fp);
+ ui_helpline__fpush("%s written!", filename);
+
+ return 0;
+}
+
+void hist_browser__init(struct hist_browser *browser,
+ struct hists *hists)
+{
+ struct perf_hpp_fmt *fmt;
+
+ browser->hists = hists;
+ browser->b.refresh = hist_browser__refresh;
+ browser->b.refresh_dimensions = hist_browser__refresh_dimensions;
+ browser->b.seek = ui_browser__hists_seek;
+ browser->b.use_navkeypressed = true;
+ browser->show_headers = symbol_conf.show_hist_headers;
+ hist_browser__set_title_space(browser);
+
+ if (symbol_conf.report_hierarchy) {
+ struct perf_hpp_list_node *fmt_node;
+
+ /* count overhead columns (in the first node) */
+ fmt_node = list_first_entry(&hists->hpp_formats,
+ struct perf_hpp_list_node, list);
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt)
+ ++browser->b.columns;
+
+ /* add a single column for whole hierarchy sort keys*/
+ ++browser->b.columns;
+ } else {
+ hists__for_each_format(hists, fmt)
+ ++browser->b.columns;
+ }
+
+ hists__reset_column_width(hists);
+}
+
+struct hist_browser *hist_browser__new(struct hists *hists)
+{
+ struct hist_browser *browser = zalloc(sizeof(*browser));
+
+ if (browser)
+ hist_browser__init(browser, hists);
+
+ return browser;
+}
+
+static struct hist_browser *
+perf_evsel_browser__new(struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt,
+ struct perf_env *env,
+ struct annotation_options *annotation_opts)
+{
+ struct hist_browser *browser = hist_browser__new(evsel__hists(evsel));
+
+ if (browser) {
+ browser->hbt = hbt;
+ browser->env = env;
+ browser->title = hists_browser__scnprintf_title;
+ browser->annotation_opts = annotation_opts;
+ }
+ return browser;
+}
+
+void hist_browser__delete(struct hist_browser *browser)
+{
+ free(browser);
+}
+
+static struct hist_entry *hist_browser__selected_entry(struct hist_browser *browser)
+{
+ return browser->he_selection;
+}
+
+static struct thread *hist_browser__selected_thread(struct hist_browser *browser)
+{
+ return browser->he_selection->thread;
+}
+
+/* Check whether the browser is for 'top' or 'report' */
+static inline bool is_report_browser(void *timer)
+{
+ return timer == NULL;
+}
+
+static int hists_browser__scnprintf_title(struct hist_browser *browser, char *bf, size_t size)
+{
+ struct hist_browser_timer *hbt = browser->hbt;
+ int printed = __hists__scnprintf_title(browser->hists, bf, size, !is_report_browser(hbt));
+
+ if (!is_report_browser(hbt)) {
+ struct perf_top *top = hbt->arg;
+
+ if (top->zero)
+ printed += scnprintf(bf + printed, size - printed, " [z]");
+ }
+
+ return printed;
+}
+
+static inline void free_popup_options(char **options, int n)
+{
+ int i;
+
+ for (i = 0; i < n; ++i)
+ zfree(&options[i]);
+}
+
+/*
+ * Only runtime switching of perf data file will make "input_name" point
+ * to a malloced buffer. So add "is_input_name_malloced" flag to decide
+ * whether we need to call free() for current "input_name" during the switch.
+ */
+static bool is_input_name_malloced = false;
+
+static int switch_data_file(void)
+{
+ char *pwd, *options[32], *abs_path[32], *tmp;
+ DIR *pwd_dir;
+ int nr_options = 0, choice = -1, ret = -1;
+ struct dirent *dent;
+
+ pwd = getenv("PWD");
+ if (!pwd)
+ return ret;
+
+ pwd_dir = opendir(pwd);
+ if (!pwd_dir)
+ return ret;
+
+ memset(options, 0, sizeof(options));
+ memset(abs_path, 0, sizeof(abs_path));
+
+ while ((dent = readdir(pwd_dir))) {
+ char path[PATH_MAX];
+ u64 magic;
+ char *name = dent->d_name;
+ FILE *file;
+
+ if (!(dent->d_type == DT_REG))
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s", pwd, name);
+
+ file = fopen(path, "r");
+ if (!file)
+ continue;
+
+ if (fread(&magic, 1, 8, file) < 8)
+ goto close_file_and_continue;
+
+ if (is_perf_magic(magic)) {
+ options[nr_options] = strdup(name);
+ if (!options[nr_options])
+ goto close_file_and_continue;
+
+ abs_path[nr_options] = strdup(path);
+ if (!abs_path[nr_options]) {
+ zfree(&options[nr_options]);
+ ui__warning("Can't search all data files due to memory shortage.\n");
+ fclose(file);
+ break;
+ }
+
+ nr_options++;
+ }
+
+close_file_and_continue:
+ fclose(file);
+ if (nr_options >= 32) {
+ ui__warning("Too many perf data files in PWD!\n"
+ "Only the first 32 files will be listed.\n");
+ break;
+ }
+ }
+ closedir(pwd_dir);
+
+ if (nr_options) {
+ choice = ui__popup_menu(nr_options, options);
+ if (choice < nr_options && choice >= 0) {
+ tmp = strdup(abs_path[choice]);
+ if (tmp) {
+ if (is_input_name_malloced)
+ free((void *)input_name);
+ input_name = tmp;
+ is_input_name_malloced = true;
+ ret = 0;
+ } else
+ ui__warning("Data switch failed due to memory shortage!\n");
+ }
+ }
+
+ free_popup_options(options, nr_options);
+ free_popup_options(abs_path, nr_options);
+ return ret;
+}
+
+struct popup_action {
+ struct thread *thread;
+ struct map_symbol ms;
+ int socket;
+
+ int (*fn)(struct hist_browser *browser, struct popup_action *act);
+};
+
+static int
+do_annotate(struct hist_browser *browser, struct popup_action *act)
+{
+ struct perf_evsel *evsel;
+ struct annotation *notes;
+ struct hist_entry *he;
+ int err;
+
+ if (!browser->annotation_opts->objdump_path &&
+ perf_env__lookup_objdump(browser->env, &browser->annotation_opts->objdump_path))
+ return 0;
+
+ notes = symbol__annotation(act->ms.sym);
+ if (!notes->src)
+ return 0;
+
+ evsel = hists_to_evsel(browser->hists);
+ err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt,
+ browser->annotation_opts);
+ he = hist_browser__selected_entry(browser);
+ /*
+ * offer option to annotate the other branch source or target
+ * (if they exists) when returning from annotate
+ */
+ if ((err == 'q' || err == CTRL('c')) && he->branch_info)
+ return 1;
+
+ ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
+ if (err)
+ ui_browser__handle_resize(&browser->b);
+ return 0;
+}
+
+static int
+add_annotate_opt(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act, char **optstr,
+ struct map *map, struct symbol *sym)
+{
+ if (sym == NULL || map->dso->annotate_warned)
+ return 0;
+
+ if (asprintf(optstr, "Annotate %s", sym->name) < 0)
+ return 0;
+
+ act->ms.map = map;
+ act->ms.sym = sym;
+ act->fn = do_annotate;
+ return 1;
+}
+
+static int
+do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
+{
+ struct thread *thread = act->thread;
+
+ if ((!hists__has(browser->hists, thread) &&
+ !hists__has(browser->hists, comm)) || thread == NULL)
+ return 0;
+
+ if (browser->hists->thread_filter) {
+ pstack__remove(browser->pstack, &browser->hists->thread_filter);
+ perf_hpp__set_elide(HISTC_THREAD, false);
+ thread__zput(browser->hists->thread_filter);
+ ui_helpline__pop();
+ } else {
+ if (hists__has(browser->hists, thread)) {
+ ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s(%d) thread\"",
+ thread->comm_set ? thread__comm_str(thread) : "",
+ thread->tid);
+ } else {
+ ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s thread\"",
+ thread->comm_set ? thread__comm_str(thread) : "");
+ }
+
+ browser->hists->thread_filter = thread__get(thread);
+ perf_hpp__set_elide(HISTC_THREAD, false);
+ pstack__push(browser->pstack, &browser->hists->thread_filter);
+ }
+
+ hists__filter_by_thread(browser->hists);
+ hist_browser__reset(browser);
+ return 0;
+}
+
+static int
+add_thread_opt(struct hist_browser *browser, struct popup_action *act,
+ char **optstr, struct thread *thread)
+{
+ int ret;
+
+ if ((!hists__has(browser->hists, thread) &&
+ !hists__has(browser->hists, comm)) || thread == NULL)
+ return 0;
+
+ if (hists__has(browser->hists, thread)) {
+ ret = asprintf(optstr, "Zoom %s %s(%d) thread",
+ browser->hists->thread_filter ? "out of" : "into",
+ thread->comm_set ? thread__comm_str(thread) : "",
+ thread->tid);
+ } else {
+ ret = asprintf(optstr, "Zoom %s %s thread",
+ browser->hists->thread_filter ? "out of" : "into",
+ thread->comm_set ? thread__comm_str(thread) : "");
+ }
+ if (ret < 0)
+ return 0;
+
+ act->thread = thread;
+ act->fn = do_zoom_thread;
+ return 1;
+}
+
+static int
+do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
+{
+ struct map *map = act->ms.map;
+
+ if (!hists__has(browser->hists, dso) || map == NULL)
+ return 0;
+
+ if (browser->hists->dso_filter) {
+ pstack__remove(browser->pstack, &browser->hists->dso_filter);
+ perf_hpp__set_elide(HISTC_DSO, false);
+ browser->hists->dso_filter = NULL;
+ ui_helpline__pop();
+ } else {
+ ui_helpline__fpush("To zoom out press ESC or ENTER + \"Zoom out of %s DSO\"",
+ __map__is_kernel(map) ? "the Kernel" : map->dso->short_name);
+ browser->hists->dso_filter = map->dso;
+ perf_hpp__set_elide(HISTC_DSO, true);
+ pstack__push(browser->pstack, &browser->hists->dso_filter);
+ }
+
+ hists__filter_by_dso(browser->hists);
+ hist_browser__reset(browser);
+ return 0;
+}
+
+static int
+add_dso_opt(struct hist_browser *browser, struct popup_action *act,
+ char **optstr, struct map *map)
+{
+ if (!hists__has(browser->hists, dso) || map == NULL)
+ return 0;
+
+ if (asprintf(optstr, "Zoom %s %s DSO",
+ browser->hists->dso_filter ? "out of" : "into",
+ __map__is_kernel(map) ? "the Kernel" : map->dso->short_name) < 0)
+ return 0;
+
+ act->ms.map = map;
+ act->fn = do_zoom_dso;
+ return 1;
+}
+
+static int
+do_browse_map(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act)
+{
+ map__browse(act->ms.map);
+ return 0;
+}
+
+static int
+add_map_opt(struct hist_browser *browser,
+ struct popup_action *act, char **optstr, struct map *map)
+{
+ if (!hists__has(browser->hists, dso) || map == NULL)
+ return 0;
+
+ if (asprintf(optstr, "Browse map details") < 0)
+ return 0;
+
+ act->ms.map = map;
+ act->fn = do_browse_map;
+ return 1;
+}
+
+static int
+do_run_script(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act)
+{
+ char script_opt[64];
+ memset(script_opt, 0, sizeof(script_opt));
+
+ if (act->thread) {
+ scnprintf(script_opt, sizeof(script_opt), " -c %s ",
+ thread__comm_str(act->thread));
+ } else if (act->ms.sym) {
+ scnprintf(script_opt, sizeof(script_opt), " -S %s ",
+ act->ms.sym->name);
+ }
+
+ script_browse(script_opt);
+ return 0;
+}
+
+static int
+add_script_opt(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act, char **optstr,
+ struct thread *thread, struct symbol *sym)
+{
+ if (thread) {
+ if (asprintf(optstr, "Run scripts for samples of thread [%s]",
+ thread__comm_str(thread)) < 0)
+ return 0;
+ } else if (sym) {
+ if (asprintf(optstr, "Run scripts for samples of symbol [%s]",
+ sym->name) < 0)
+ return 0;
+ } else {
+ if (asprintf(optstr, "Run scripts for all samples") < 0)
+ return 0;
+ }
+
+ act->thread = thread;
+ act->ms.sym = sym;
+ act->fn = do_run_script;
+ return 1;
+}
+
+static int
+do_switch_data(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act __maybe_unused)
+{
+ if (switch_data_file()) {
+ ui__warning("Won't switch the data files due to\n"
+ "no valid data file get selected!\n");
+ return 0;
+ }
+
+ return K_SWITCH_INPUT_DATA;
+}
+
+static int
+add_switch_opt(struct hist_browser *browser,
+ struct popup_action *act, char **optstr)
+{
+ if (!is_report_browser(browser->hbt))
+ return 0;
+
+ if (asprintf(optstr, "Switch to another data file in PWD") < 0)
+ return 0;
+
+ act->fn = do_switch_data;
+ return 1;
+}
+
+static int
+do_exit_browser(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act __maybe_unused)
+{
+ return 0;
+}
+
+static int
+add_exit_opt(struct hist_browser *browser __maybe_unused,
+ struct popup_action *act, char **optstr)
+{
+ if (asprintf(optstr, "Exit") < 0)
+ return 0;
+
+ act->fn = do_exit_browser;
+ return 1;
+}
+
+static int
+do_zoom_socket(struct hist_browser *browser, struct popup_action *act)
+{
+ if (!hists__has(browser->hists, socket) || act->socket < 0)
+ return 0;
+
+ if (browser->hists->socket_filter > -1) {
+ pstack__remove(browser->pstack, &browser->hists->socket_filter);
+ browser->hists->socket_filter = -1;
+ perf_hpp__set_elide(HISTC_SOCKET, false);
+ } else {
+ browser->hists->socket_filter = act->socket;
+ perf_hpp__set_elide(HISTC_SOCKET, true);
+ pstack__push(browser->pstack, &browser->hists->socket_filter);
+ }
+
+ hists__filter_by_socket(browser->hists);
+ hist_browser__reset(browser);
+ return 0;
+}
+
+static int
+add_socket_opt(struct hist_browser *browser, struct popup_action *act,
+ char **optstr, int socket_id)
+{
+ if (!hists__has(browser->hists, socket) || socket_id < 0)
+ return 0;
+
+ if (asprintf(optstr, "Zoom %s Processor Socket %d",
+ (browser->hists->socket_filter > -1) ? "out of" : "into",
+ socket_id) < 0)
+ return 0;
+
+ act->socket = socket_id;
+ act->fn = do_zoom_socket;
+ return 1;
+}
+
+static void hist_browser__update_nr_entries(struct hist_browser *hb)
+{
+ u64 nr_entries = 0;
+ struct rb_node *nd = rb_first(&hb->hists->entries);
+
+ if (hb->min_pcnt == 0 && !symbol_conf.report_hierarchy) {
+ hb->nr_non_filtered_entries = hb->hists->nr_non_filtered_entries;
+ return;
+ }
+
+ while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
+ nr_entries++;
+ nd = rb_hierarchy_next(nd);
+ }
+
+ hb->nr_non_filtered_entries = nr_entries;
+ hb->nr_hierarchy_entries = nr_entries;
+}
+
+static void hist_browser__update_percent_limit(struct hist_browser *hb,
+ double percent)
+{
+ struct hist_entry *he;
+ struct rb_node *nd = rb_first(&hb->hists->entries);
+ u64 total = hists__total_period(hb->hists);
+ u64 min_callchain_hits = total * (percent / 100);
+
+ hb->min_pcnt = callchain_param.min_percent = percent;
+
+ while ((nd = hists__filter_entries(nd, hb->min_pcnt)) != NULL) {
+ he = rb_entry(nd, struct hist_entry, rb_node);
+
+ if (he->has_no_entry) {
+ he->has_no_entry = false;
+ he->nr_rows = 0;
+ }
+
+ if (!he->leaf || !hist_entry__has_callchains(he) || !symbol_conf.use_callchain)
+ goto next;
+
+ if (callchain_param.mode == CHAIN_GRAPH_REL) {
+ total = he->stat.period;
+
+ if (symbol_conf.cumulate_callchain)
+ total = he->stat_acc->period;
+
+ min_callchain_hits = total * (percent / 100);
+ }
+
+ callchain_param.sort(&he->sorted_chain, he->callchain,
+ min_callchain_hits, &callchain_param);
+
+next:
+ nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD);
+
+ /* force to re-evaluate folding state of callchains */
+ he->init_have_children = false;
+ hist_entry__set_folding(he, hb, false);
+ }
+}
+
+static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
+ const char *helpline,
+ bool left_exits,
+ struct hist_browser_timer *hbt,
+ float min_pcnt,
+ struct perf_env *env,
+ bool warn_lost_event,
+ struct annotation_options *annotation_opts)
+{
+ struct hists *hists = evsel__hists(evsel);
+ struct hist_browser *browser = perf_evsel_browser__new(evsel, hbt, env, annotation_opts);
+ struct branch_info *bi = NULL;
+#define MAX_OPTIONS 16
+ char *options[MAX_OPTIONS];
+ struct popup_action actions[MAX_OPTIONS];
+ int nr_options = 0;
+ int key = -1;
+ char buf[64];
+ int delay_secs = hbt ? hbt->refresh : 0;
+
+#define HIST_BROWSER_HELP_COMMON \
+ "h/?/F1 Show this window\n" \
+ "UP/DOWN/PGUP\n" \
+ "PGDN/SPACE Navigate\n" \
+ "q/ESC/CTRL+C Exit browser or go back to previous screen\n\n" \
+ "For multiple event sessions:\n\n" \
+ "TAB/UNTAB Switch events\n\n" \
+ "For symbolic views (--sort has sym):\n\n" \
+ "ENTER Zoom into DSO/Threads & Annotate current symbol\n" \
+ "ESC Zoom out\n" \
+ "a Annotate current symbol\n" \
+ "C Collapse all callchains\n" \
+ "d Zoom into current DSO\n" \
+ "E Expand all callchains\n" \
+ "F Toggle percentage of filtered entries\n" \
+ "H Display column headers\n" \
+ "L Change percent limit\n" \
+ "m Display context menu\n" \
+ "S Zoom into current Processor Socket\n" \
+
+ /* help messages are sorted by lexical order of the hotkey */
+ const char report_help[] = HIST_BROWSER_HELP_COMMON
+ "i Show header information\n"
+ "P Print histograms to perf.hist.N\n"
+ "r Run available scripts\n"
+ "s Switch to another data file in PWD\n"
+ "t Zoom into current Thread\n"
+ "V Verbose (DSO names in callchains, etc)\n"
+ "/ Filter symbol by name";
+ const char top_help[] = HIST_BROWSER_HELP_COMMON
+ "P Print histograms to perf.hist.N\n"
+ "t Zoom into current Thread\n"
+ "V Verbose (DSO names in callchains, etc)\n"
+ "z Toggle zeroing of samples\n"
+ "f Enable/Disable events\n"
+ "/ Filter symbol by name";
+
+ if (browser == NULL)
+ return -1;
+
+ /* reset abort key so that it can get Ctrl-C as a key */
+ SLang_reset_tty();
+ SLang_init_tty(0, 0, 0);
+
+ if (min_pcnt)
+ browser->min_pcnt = min_pcnt;
+ hist_browser__update_nr_entries(browser);
+
+ browser->pstack = pstack__new(3);
+ if (browser->pstack == NULL)
+ goto out;
+
+ ui_helpline__push(helpline);
+
+ memset(options, 0, sizeof(options));
+ memset(actions, 0, sizeof(actions));
+
+ if (symbol_conf.col_width_list_str)
+ perf_hpp__set_user_width(symbol_conf.col_width_list_str);
+
+ while (1) {
+ struct thread *thread = NULL;
+ struct map *map = NULL;
+ int choice = 0;
+ int socked_id = -1;
+
+ nr_options = 0;
+
+ key = hist_browser__run(browser, helpline,
+ warn_lost_event);
+
+ if (browser->he_selection != NULL) {
+ thread = hist_browser__selected_thread(browser);
+ map = browser->selection->map;
+ socked_id = browser->he_selection->socket;
+ }
+ switch (key) {
+ case K_TAB:
+ case K_UNTAB:
+ if (nr_events == 1)
+ continue;
+ /*
+ * Exit the browser, let hists__browser_tree
+ * go to the next or previous
+ */
+ goto out_free_stack;
+ case 'a':
+ if (!hists__has(hists, sym)) {
+ ui_browser__warning(&browser->b, delay_secs * 2,
+ "Annotation is only available for symbolic views, "
+ "include \"sym*\" in --sort to use it.");
+ continue;
+ }
+
+ if (browser->selection == NULL ||
+ browser->selection->sym == NULL ||
+ browser->selection->map->dso->annotate_warned)
+ continue;
+
+ actions->ms.map = browser->selection->map;
+ actions->ms.sym = browser->selection->sym;
+ do_annotate(browser, actions);
+ continue;
+ case 'P':
+ hist_browser__dump(browser);
+ continue;
+ case 'd':
+ actions->ms.map = map;
+ do_zoom_dso(browser, actions);
+ continue;
+ case 'V':
+ verbose = (verbose + 1) % 4;
+ browser->show_dso = verbose > 0;
+ ui_helpline__fpush("Verbosity level set to %d\n",
+ verbose);
+ continue;
+ case 't':
+ actions->thread = thread;
+ do_zoom_thread(browser, actions);
+ continue;
+ case 'S':
+ actions->socket = socked_id;
+ do_zoom_socket(browser, actions);
+ continue;
+ case '/':
+ if (ui_browser__input_window("Symbol to show",
+ "Please enter the name of symbol you want to see.\n"
+ "To remove the filter later, press / + ENTER.",
+ buf, "ENTER: OK, ESC: Cancel",
+ delay_secs * 2) == K_ENTER) {
+ hists->symbol_filter_str = *buf ? buf : NULL;
+ hists__filter_by_symbol(hists);
+ hist_browser__reset(browser);
+ }
+ continue;
+ case 'r':
+ if (is_report_browser(hbt)) {
+ actions->thread = NULL;
+ actions->ms.sym = NULL;
+ do_run_script(browser, actions);
+ }
+ continue;
+ case 's':
+ if (is_report_browser(hbt)) {
+ key = do_switch_data(browser, actions);
+ if (key == K_SWITCH_INPUT_DATA)
+ goto out_free_stack;
+ }
+ continue;
+ case 'i':
+ /* env->arch is NULL for live-mode (i.e. perf top) */
+ if (env->arch)
+ tui__header_window(env);
+ continue;
+ case 'F':
+ symbol_conf.filter_relative ^= 1;
+ continue;
+ case 'z':
+ if (!is_report_browser(hbt)) {
+ struct perf_top *top = hbt->arg;
+
+ top->zero = !top->zero;
+ }
+ continue;
+ case 'L':
+ if (ui_browser__input_window("Percent Limit",
+ "Please enter the value you want to hide entries under that percent.",
+ buf, "ENTER: OK, ESC: Cancel",
+ delay_secs * 2) == K_ENTER) {
+ char *end;
+ double new_percent = strtod(buf, &end);
+
+ if (new_percent < 0 || new_percent > 100) {
+ ui_browser__warning(&browser->b, delay_secs * 2,
+ "Invalid percent: %.2f", new_percent);
+ continue;
+ }
+
+ hist_browser__update_percent_limit(browser, new_percent);
+ hist_browser__reset(browser);
+ }
+ continue;
+ case K_F1:
+ case 'h':
+ case '?':
+ ui_browser__help_window(&browser->b,
+ is_report_browser(hbt) ? report_help : top_help);
+ continue;
+ case K_ENTER:
+ case K_RIGHT:
+ case 'm':
+ /* menu */
+ break;
+ case K_ESC:
+ case K_LEFT: {
+ const void *top;
+
+ if (pstack__empty(browser->pstack)) {
+ /*
+ * Go back to the perf_evsel_menu__run or other user
+ */
+ if (left_exits)
+ goto out_free_stack;
+
+ if (key == K_ESC &&
+ ui_browser__dialog_yesno(&browser->b,
+ "Do you really want to exit?"))
+ goto out_free_stack;
+
+ continue;
+ }
+ actions->ms.map = map;
+ top = pstack__peek(browser->pstack);
+ if (top == &browser->hists->dso_filter) {
+ /*
+ * No need to set actions->dso here since
+ * it's just to remove the current filter.
+ * Ditto for thread below.
+ */
+ do_zoom_dso(browser, actions);
+ } else if (top == &browser->hists->thread_filter) {
+ do_zoom_thread(browser, actions);
+ } else if (top == &browser->hists->socket_filter) {
+ do_zoom_socket(browser, actions);
+ }
+ continue;
+ }
+ case 'q':
+ case CTRL('c'):
+ goto out_free_stack;
+ case 'f':
+ if (!is_report_browser(hbt)) {
+ struct perf_top *top = hbt->arg;
+
+ perf_evlist__toggle_enable(top->evlist);
+ /*
+ * No need to refresh, resort/decay histogram
+ * entries if we are not collecting samples:
+ */
+ if (top->evlist->enabled) {
+ helpline = "Press 'f' to disable the events or 'h' to see other hotkeys";
+ hbt->refresh = delay_secs;
+ } else {
+ helpline = "Press 'f' again to re-enable the events";
+ hbt->refresh = 0;
+ }
+ continue;
+ }
+ /* Fall thru */
+ default:
+ helpline = "Press '?' for help on key bindings";
+ continue;
+ }
+
+ if (!hists__has(hists, sym) || browser->selection == NULL)
+ goto skip_annotation;
+
+ if (sort__mode == SORT_MODE__BRANCH) {
+
+ if (browser->he_selection)
+ bi = browser->he_selection->branch_info;
+
+ if (bi == NULL)
+ goto skip_annotation;
+
+ nr_options += add_annotate_opt(browser,
+ &actions[nr_options],
+ &options[nr_options],
+ bi->from.map,
+ bi->from.sym);
+ if (bi->to.sym != bi->from.sym)
+ nr_options += add_annotate_opt(browser,
+ &actions[nr_options],
+ &options[nr_options],
+ bi->to.map,
+ bi->to.sym);
+ } else {
+ nr_options += add_annotate_opt(browser,
+ &actions[nr_options],
+ &options[nr_options],
+ browser->selection->map,
+ browser->selection->sym);
+ }
+skip_annotation:
+ nr_options += add_thread_opt(browser, &actions[nr_options],
+ &options[nr_options], thread);
+ nr_options += add_dso_opt(browser, &actions[nr_options],
+ &options[nr_options], map);
+ nr_options += add_map_opt(browser, &actions[nr_options],
+ &options[nr_options],
+ browser->selection ?
+ browser->selection->map : NULL);
+ nr_options += add_socket_opt(browser, &actions[nr_options],
+ &options[nr_options],
+ socked_id);
+ /* perf script support */
+ if (!is_report_browser(hbt))
+ goto skip_scripting;
+
+ if (browser->he_selection) {
+ if (hists__has(hists, thread) && thread) {
+ nr_options += add_script_opt(browser,
+ &actions[nr_options],
+ &options[nr_options],
+ thread, NULL);
+ }
+ /*
+ * Note that browser->selection != NULL
+ * when browser->he_selection is not NULL,
+ * so we don't need to check browser->selection
+ * before fetching browser->selection->sym like what
+ * we do before fetching browser->selection->map.
+ *
+ * See hist_browser__show_entry.
+ */
+ if (hists__has(hists, sym) && browser->selection->sym) {
+ nr_options += add_script_opt(browser,
+ &actions[nr_options],
+ &options[nr_options],
+ NULL, browser->selection->sym);
+ }
+ }
+ nr_options += add_script_opt(browser, &actions[nr_options],
+ &options[nr_options], NULL, NULL);
+ nr_options += add_switch_opt(browser, &actions[nr_options],
+ &options[nr_options]);
+skip_scripting:
+ nr_options += add_exit_opt(browser, &actions[nr_options],
+ &options[nr_options]);
+
+ do {
+ struct popup_action *act;
+
+ choice = ui__popup_menu(nr_options, options);
+ if (choice == -1 || choice >= nr_options)
+ break;
+
+ act = &actions[choice];
+ key = act->fn(browser, act);
+ } while (key == 1);
+
+ if (key == K_SWITCH_INPUT_DATA)
+ break;
+ }
+out_free_stack:
+ pstack__delete(browser->pstack);
+out:
+ hist_browser__delete(browser);
+ free_popup_options(options, MAX_OPTIONS);
+ return key;
+}
+
+struct perf_evsel_menu {
+ struct ui_browser b;
+ struct perf_evsel *selection;
+ struct annotation_options *annotation_opts;
+ bool lost_events, lost_events_warned;
+ float min_pcnt;
+ struct perf_env *env;
+};
+
+static void perf_evsel_menu__write(struct ui_browser *browser,
+ void *entry, int row)
+{
+ struct perf_evsel_menu *menu = container_of(browser,
+ struct perf_evsel_menu, b);
+ struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);
+ struct hists *hists = evsel__hists(evsel);
+ bool current_entry = ui_browser__is_current_entry(browser, row);
+ unsigned long nr_events = hists->stats.nr_events[PERF_RECORD_SAMPLE];
+ const char *ev_name = perf_evsel__name(evsel);
+ char bf[256], unit;
+ const char *warn = " ";
+ size_t printed;
+
+ ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
+ HE_COLORSET_NORMAL);
+
+ if (perf_evsel__is_group_event(evsel)) {
+ struct perf_evsel *pos;
+
+ ev_name = perf_evsel__group_name(evsel);
+
+ for_each_group_member(pos, evsel) {
+ struct hists *pos_hists = evsel__hists(pos);
+ nr_events += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE];
+ }
+ }
+
+ nr_events = convert_unit(nr_events, &unit);
+ printed = scnprintf(bf, sizeof(bf), "%lu%c%s%s", nr_events,
+ unit, unit == ' ' ? "" : " ", ev_name);
+ ui_browser__printf(browser, "%s", bf);
+
+ nr_events = hists->stats.nr_events[PERF_RECORD_LOST];
+ if (nr_events != 0) {
+ menu->lost_events = true;
+ if (!current_entry)
+ ui_browser__set_color(browser, HE_COLORSET_TOP);
+ nr_events = convert_unit(nr_events, &unit);
+ printed += scnprintf(bf, sizeof(bf), ": %ld%c%schunks LOST!",
+ nr_events, unit, unit == ' ' ? "" : " ");
+ warn = bf;
+ }
+
+ ui_browser__write_nstring(browser, warn, browser->width - printed);
+
+ if (current_entry)
+ menu->selection = evsel;
+}
+
+static int perf_evsel_menu__run(struct perf_evsel_menu *menu,
+ int nr_events, const char *help,
+ struct hist_browser_timer *hbt,
+ bool warn_lost_event)
+{
+ struct perf_evlist *evlist = menu->b.priv;
+ struct perf_evsel *pos;
+ const char *title = "Available samples";
+ int delay_secs = hbt ? hbt->refresh : 0;
+ int key;
+
+ if (ui_browser__show(&menu->b, title,
+ "ESC: exit, ENTER|->: Browse histograms") < 0)
+ return -1;
+
+ while (1) {
+ key = ui_browser__run(&menu->b, delay_secs);
+
+ switch (key) {
+ case K_TIMER:
+ if (hbt)
+ hbt->timer(hbt->arg);
+
+ if (!menu->lost_events_warned &&
+ menu->lost_events &&
+ warn_lost_event) {
+ ui_browser__warn_lost_events(&menu->b);
+ menu->lost_events_warned = true;
+ }
+ continue;
+ case K_RIGHT:
+ case K_ENTER:
+ if (!menu->selection)
+ continue;
+ pos = menu->selection;
+browse_hists:
+ perf_evlist__set_selected(evlist, pos);
+ /*
+ * Give the calling tool a chance to populate the non
+ * default evsel resorted hists tree.
+ */
+ if (hbt)
+ hbt->timer(hbt->arg);
+ key = perf_evsel__hists_browse(pos, nr_events, help,
+ true, hbt,
+ menu->min_pcnt,
+ menu->env,
+ warn_lost_event,
+ menu->annotation_opts);
+ ui_browser__show_title(&menu->b, title);
+ switch (key) {
+ case K_TAB:
+ if (pos->node.next == &evlist->entries)
+ pos = perf_evlist__first(evlist);
+ else
+ pos = perf_evsel__next(pos);
+ goto browse_hists;
+ case K_UNTAB:
+ if (pos->node.prev == &evlist->entries)
+ pos = perf_evlist__last(evlist);
+ else
+ pos = perf_evsel__prev(pos);
+ goto browse_hists;
+ case K_SWITCH_INPUT_DATA:
+ case 'q':
+ case CTRL('c'):
+ goto out;
+ case K_ESC:
+ default:
+ continue;
+ }
+ case K_LEFT:
+ continue;
+ case K_ESC:
+ if (!ui_browser__dialog_yesno(&menu->b,
+ "Do you really want to exit?"))
+ continue;
+ /* Fall thru */
+ case 'q':
+ case CTRL('c'):
+ goto out;
+ default:
+ continue;
+ }
+ }
+
+out:
+ ui_browser__hide(&menu->b);
+ return key;
+}
+
+static bool filter_group_entries(struct ui_browser *browser __maybe_unused,
+ void *entry)
+{
+ struct perf_evsel *evsel = list_entry(entry, struct perf_evsel, node);
+
+ if (symbol_conf.event_group && !perf_evsel__is_group_leader(evsel))
+ return true;
+
+ return false;
+}
+
+static int __perf_evlist__tui_browse_hists(struct perf_evlist *evlist,
+ int nr_entries, const char *help,
+ struct hist_browser_timer *hbt,
+ float min_pcnt,
+ struct perf_env *env,
+ bool warn_lost_event,
+ struct annotation_options *annotation_opts)
+{
+ struct perf_evsel *pos;
+ struct perf_evsel_menu menu = {
+ .b = {
+ .entries = &evlist->entries,
+ .refresh = ui_browser__list_head_refresh,
+ .seek = ui_browser__list_head_seek,
+ .write = perf_evsel_menu__write,
+ .filter = filter_group_entries,
+ .nr_entries = nr_entries,
+ .priv = evlist,
+ },
+ .min_pcnt = min_pcnt,
+ .env = env,
+ .annotation_opts = annotation_opts,
+ };
+
+ ui_helpline__push("Press ESC to exit");
+
+ evlist__for_each_entry(evlist, pos) {
+ const char *ev_name = perf_evsel__name(pos);
+ size_t line_len = strlen(ev_name) + 7;
+
+ if (menu.b.width < line_len)
+ menu.b.width = line_len;
+ }
+
+ return perf_evsel_menu__run(&menu, nr_entries, help,
+ hbt, warn_lost_event);
+}
+
+int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
+ struct hist_browser_timer *hbt,
+ float min_pcnt,
+ struct perf_env *env,
+ bool warn_lost_event,
+ struct annotation_options *annotation_opts)
+{
+ int nr_entries = evlist->nr_entries;
+
+single_entry:
+ if (nr_entries == 1) {
+ struct perf_evsel *first = perf_evlist__first(evlist);
+
+ return perf_evsel__hists_browse(first, nr_entries, help,
+ false, hbt, min_pcnt,
+ env, warn_lost_event,
+ annotation_opts);
+ }
+
+ if (symbol_conf.event_group) {
+ struct perf_evsel *pos;
+
+ nr_entries = 0;
+ evlist__for_each_entry(evlist, pos) {
+ if (perf_evsel__is_group_leader(pos))
+ nr_entries++;
+ }
+
+ if (nr_entries == 1)
+ goto single_entry;
+ }
+
+ return __perf_evlist__tui_browse_hists(evlist, nr_entries, help,
+ hbt, min_pcnt, env,
+ warn_lost_event,
+ annotation_opts);
+}
diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h
new file mode 100644
index 000000000..91d3e18b5
--- /dev/null
+++ b/tools/perf/ui/browsers/hists.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_BROWSER_HISTS_H_
+#define _PERF_UI_BROWSER_HISTS_H_ 1
+
+#include "ui/browser.h"
+
+struct annotation_options;
+
+struct hist_browser {
+ struct ui_browser b;
+ struct hists *hists;
+ struct hist_entry *he_selection;
+ struct map_symbol *selection;
+ struct hist_browser_timer *hbt;
+ struct pstack *pstack;
+ struct perf_env *env;
+ struct annotation_options *annotation_opts;
+ int print_seq;
+ bool show_dso;
+ bool show_headers;
+ float min_pcnt;
+ u64 nr_non_filtered_entries;
+ u64 nr_hierarchy_entries;
+ u64 nr_callchain_rows;
+ bool c2c_filter;
+
+ /* Get title string. */
+ int (*title)(struct hist_browser *browser,
+ char *bf, size_t size);
+};
+
+struct hist_browser *hist_browser__new(struct hists *hists);
+void hist_browser__delete(struct hist_browser *browser);
+int hist_browser__run(struct hist_browser *browser, const char *help,
+ bool warn_lost_event);
+void hist_browser__init(struct hist_browser *browser,
+ struct hists *hists);
+#endif /* _PERF_UI_BROWSER_HISTS_H_ */
diff --git a/tools/perf/ui/browsers/map.c b/tools/perf/ui/browsers/map.c
new file mode 100644
index 000000000..5b8b8c637
--- /dev/null
+++ b/tools/perf/ui/browsers/map.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elf.h>
+#include <inttypes.h>
+#include <sys/ttydefaults.h>
+#include <string.h>
+#include <linux/bitops.h>
+#include "../../util/util.h"
+#include "../../util/debug.h"
+#include "../../util/symbol.h"
+#include "../browser.h"
+#include "../helpline.h"
+#include "../keysyms.h"
+#include "map.h"
+
+#include "sane_ctype.h"
+
+struct map_browser {
+ struct ui_browser b;
+ struct map *map;
+ u8 addrlen;
+};
+
+static void map_browser__write(struct ui_browser *browser, void *nd, int row)
+{
+ struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+ struct map_browser *mb = container_of(browser, struct map_browser, b);
+ bool current_entry = ui_browser__is_current_entry(browser, row);
+ int width;
+
+ ui_browser__set_percent_color(browser, 0, current_entry);
+ ui_browser__printf(browser, "%*" PRIx64 " %*" PRIx64 " %c ",
+ mb->addrlen, sym->start, mb->addrlen, sym->end,
+ sym->binding == STB_GLOBAL ? 'g' :
+ sym->binding == STB_LOCAL ? 'l' : 'w');
+ width = browser->width - ((mb->addrlen * 2) + 4);
+ if (width > 0)
+ ui_browser__write_nstring(browser, sym->name, width);
+}
+
+/* FIXME uber-kludgy, see comment on cmd_report... */
+static u32 *symbol__browser_index(struct symbol *browser)
+{
+ return ((void *)browser) - sizeof(struct rb_node) - sizeof(u32);
+}
+
+static int map_browser__search(struct map_browser *browser)
+{
+ char target[512];
+ struct symbol *sym;
+ int err = ui_browser__input_window("Search by name/addr",
+ "Prefix with 0x to search by address",
+ target, "ENTER: OK, ESC: Cancel", 0);
+ if (err != K_ENTER)
+ return -1;
+
+ if (target[0] == '0' && tolower(target[1]) == 'x') {
+ u64 addr = strtoull(target, NULL, 16);
+ sym = map__find_symbol(browser->map, addr);
+ } else
+ sym = map__find_symbol_by_name(browser->map, target);
+
+ if (sym != NULL) {
+ u32 *idx = symbol__browser_index(sym);
+
+ browser->b.top = &sym->rb_node;
+ browser->b.index = browser->b.top_idx = *idx;
+ } else
+ ui_helpline__fpush("%s not found!", target);
+
+ return 0;
+}
+
+static int map_browser__run(struct map_browser *browser)
+{
+ int key;
+
+ if (ui_browser__show(&browser->b, browser->map->dso->long_name,
+ "Press ESC to exit, %s / to search",
+ verbose > 0 ? "" : "restart with -v to use") < 0)
+ return -1;
+
+ while (1) {
+ key = ui_browser__run(&browser->b, 0);
+
+ switch (key) {
+ case '/':
+ if (verbose > 0)
+ map_browser__search(browser);
+ default:
+ break;
+ case K_LEFT:
+ case K_ESC:
+ case 'q':
+ case CTRL('c'):
+ goto out;
+ }
+ }
+out:
+ ui_browser__hide(&browser->b);
+ return key;
+}
+
+int map__browse(struct map *map)
+{
+ struct map_browser mb = {
+ .b = {
+ .entries = &map->dso->symbols,
+ .refresh = ui_browser__rb_tree_refresh,
+ .seek = ui_browser__rb_tree_seek,
+ .write = map_browser__write,
+ },
+ .map = map,
+ };
+ struct rb_node *nd;
+ char tmp[BITS_PER_LONG / 4];
+ u64 maxaddr = 0;
+
+ for (nd = rb_first(mb.b.entries); nd; nd = rb_next(nd)) {
+ struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+
+ if (maxaddr < pos->end)
+ maxaddr = pos->end;
+ if (verbose > 0) {
+ u32 *idx = symbol__browser_index(pos);
+ *idx = mb.b.nr_entries;
+ }
+ ++mb.b.nr_entries;
+ }
+
+ mb.addrlen = snprintf(tmp, sizeof(tmp), "%" PRIx64, maxaddr);
+ return map_browser__run(&mb);
+}
diff --git a/tools/perf/ui/browsers/map.h b/tools/perf/ui/browsers/map.h
new file mode 100644
index 000000000..0ed7dbb3a
--- /dev/null
+++ b/tools/perf/ui/browsers/map.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_MAP_BROWSER_H_
+#define _PERF_UI_MAP_BROWSER_H_ 1
+struct map;
+
+int map__browse(struct map *map);
+#endif /* _PERF_UI_MAP_BROWSER_H_ */
diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c
new file mode 100644
index 000000000..90a32ac69
--- /dev/null
+++ b/tools/perf/ui/browsers/scripts.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elf.h>
+#include <inttypes.h>
+#include <sys/ttydefaults.h>
+#include <string.h>
+#include "../../util/sort.h"
+#include "../../util/util.h"
+#include "../../util/hist.h"
+#include "../../util/debug.h"
+#include "../../util/symbol.h"
+#include "../browser.h"
+#include "../helpline.h"
+#include "../libslang.h"
+
+/* 2048 lines should be enough for a script output */
+#define MAX_LINES 2048
+
+/* 160 bytes for one output line */
+#define AVERAGE_LINE_LEN 160
+
+struct script_line {
+ struct list_head node;
+ char line[AVERAGE_LINE_LEN];
+};
+
+struct perf_script_browser {
+ struct ui_browser b;
+ struct list_head entries;
+ const char *script_name;
+ int nr_lines;
+};
+
+#define SCRIPT_NAMELEN 128
+#define SCRIPT_MAX_NO 64
+/*
+ * Usually the full path for a script is:
+ * /home/username/libexec/perf-core/scripts/python/xxx.py
+ * /home/username/libexec/perf-core/scripts/perl/xxx.pl
+ * So 256 should be long enough to contain the full path.
+ */
+#define SCRIPT_FULLPATH_LEN 256
+
+/*
+ * When success, will copy the full path of the selected script
+ * into the buffer pointed by script_name, and return 0.
+ * Return -1 on failure.
+ */
+static int list_scripts(char *script_name)
+{
+ char *buf, *names[SCRIPT_MAX_NO], *paths[SCRIPT_MAX_NO];
+ int i, num, choice, ret = -1;
+
+ /* Preset the script name to SCRIPT_NAMELEN */
+ buf = malloc(SCRIPT_MAX_NO * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN));
+ if (!buf)
+ return ret;
+
+ for (i = 0; i < SCRIPT_MAX_NO; i++) {
+ names[i] = buf + i * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN);
+ paths[i] = names[i] + SCRIPT_NAMELEN;
+ }
+
+ num = find_scripts(names, paths);
+ if (num > 0) {
+ choice = ui__popup_menu(num, names);
+ if (choice < num && choice >= 0) {
+ strcpy(script_name, paths[choice]);
+ ret = 0;
+ }
+ }
+
+ free(buf);
+ return ret;
+}
+
+static void script_browser__write(struct ui_browser *browser,
+ void *entry, int row)
+{
+ struct script_line *sline = list_entry(entry, struct script_line, node);
+ bool current_entry = ui_browser__is_current_entry(browser, row);
+
+ ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
+ HE_COLORSET_NORMAL);
+
+ ui_browser__write_nstring(browser, sline->line, browser->width);
+}
+
+static int script_browser__run(struct perf_script_browser *browser)
+{
+ int key;
+
+ if (ui_browser__show(&browser->b, browser->script_name,
+ "Press ESC to exit") < 0)
+ return -1;
+
+ while (1) {
+ key = ui_browser__run(&browser->b, 0);
+
+ /* We can add some special key handling here if needed */
+ break;
+ }
+
+ ui_browser__hide(&browser->b);
+ return key;
+}
+
+
+int script_browse(const char *script_opt)
+{
+ char cmd[SCRIPT_FULLPATH_LEN*2], script_name[SCRIPT_FULLPATH_LEN];
+ char *line = NULL;
+ size_t len = 0;
+ ssize_t retlen;
+ int ret = -1, nr_entries = 0;
+ FILE *fp;
+ void *buf;
+ struct script_line *sline;
+
+ struct perf_script_browser script = {
+ .b = {
+ .refresh = ui_browser__list_head_refresh,
+ .seek = ui_browser__list_head_seek,
+ .write = script_browser__write,
+ },
+ .script_name = script_name,
+ };
+
+ INIT_LIST_HEAD(&script.entries);
+
+ /* Save each line of the output in one struct script_line object. */
+ buf = zalloc((sizeof(*sline)) * MAX_LINES);
+ if (!buf)
+ return -1;
+ sline = buf;
+
+ memset(script_name, 0, SCRIPT_FULLPATH_LEN);
+ if (list_scripts(script_name))
+ goto exit;
+
+ sprintf(cmd, "perf script -s %s ", script_name);
+
+ if (script_opt)
+ strcat(cmd, script_opt);
+
+ if (input_name) {
+ strcat(cmd, " -i ");
+ strcat(cmd, input_name);
+ }
+
+ strcat(cmd, " 2>&1");
+
+ fp = popen(cmd, "r");
+ if (!fp)
+ goto exit;
+
+ while ((retlen = getline(&line, &len, fp)) != -1) {
+ strncpy(sline->line, line, AVERAGE_LINE_LEN);
+
+ /* If one output line is very large, just cut it short */
+ if (retlen >= AVERAGE_LINE_LEN) {
+ sline->line[AVERAGE_LINE_LEN - 1] = '\0';
+ sline->line[AVERAGE_LINE_LEN - 2] = '\n';
+ }
+ list_add_tail(&sline->node, &script.entries);
+
+ if (script.b.width < retlen)
+ script.b.width = retlen;
+
+ if (nr_entries++ >= MAX_LINES - 1)
+ break;
+ sline++;
+ }
+
+ if (script.b.width > AVERAGE_LINE_LEN)
+ script.b.width = AVERAGE_LINE_LEN;
+
+ free(line);
+ pclose(fp);
+
+ script.nr_lines = nr_entries;
+ script.b.nr_entries = nr_entries;
+ script.b.entries = &script.entries;
+
+ ret = script_browser__run(&script);
+exit:
+ free(buf);
+ return ret;
+}
diff --git a/tools/perf/ui/gtk/Build b/tools/perf/ui/gtk/Build
new file mode 100644
index 000000000..ec22e899a
--- /dev/null
+++ b/tools/perf/ui/gtk/Build
@@ -0,0 +1,9 @@
+CFLAGS_gtk += -fPIC $(GTK_CFLAGS)
+
+gtk-y += browser.o
+gtk-y += hists.o
+gtk-y += setup.o
+gtk-y += util.o
+gtk-y += helpline.o
+gtk-y += progress.o
+gtk-y += annotate.o
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
new file mode 100644
index 000000000..48428c9ac
--- /dev/null
+++ b/tools/perf/ui/gtk/annotate.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "gtk.h"
+#include "util/debug.h"
+#include "util/annotate.h"
+#include "util/evsel.h"
+#include "ui/helpline.h"
+#include <inttypes.h>
+#include <signal.h>
+
+enum {
+ ANN_COL__PERCENT,
+ ANN_COL__OFFSET,
+ ANN_COL__LINE,
+
+ MAX_ANN_COLS
+};
+
+static const char *const col_names[] = {
+ "Overhead",
+ "Offset",
+ "Line"
+};
+
+static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym,
+ struct disasm_line *dl, int evidx)
+{
+ struct sym_hist *symhist;
+ double percent = 0.0;
+ const char *markup;
+ int ret = 0;
+
+ strcpy(buf, "");
+
+ if (dl->al.offset == (s64) -1)
+ return 0;
+
+ symhist = annotation__histogram(symbol__annotation(sym), evidx);
+ if (!symbol_conf.event_group && !symhist->addr[dl->al.offset].nr_samples)
+ return 0;
+
+ percent = 100.0 * symhist->addr[dl->al.offset].nr_samples / symhist->nr_samples;
+
+ markup = perf_gtk__get_percent_color(percent);
+ if (markup)
+ ret += scnprintf(buf, size, "%s", markup);
+ ret += scnprintf(buf + ret, size - ret, "%6.2f%%", percent);
+ if (markup)
+ ret += scnprintf(buf + ret, size - ret, "</span>");
+
+ return ret;
+}
+
+static int perf_gtk__get_offset(char *buf, size_t size, struct symbol *sym,
+ struct map *map, struct disasm_line *dl)
+{
+ u64 start = map__rip_2objdump(map, sym->start);
+
+ strcpy(buf, "");
+
+ if (dl->al.offset == (s64) -1)
+ return 0;
+
+ return scnprintf(buf, size, "%"PRIx64, start + dl->al.offset);
+}
+
+static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl)
+{
+ int ret = 0;
+ char *line = g_markup_escape_text(dl->al.line, -1);
+ const char *markup = "<span fgcolor='gray'>";
+
+ strcpy(buf, "");
+
+ if (!line)
+ return 0;
+
+ if (dl->al.offset != (s64) -1)
+ markup = NULL;
+
+ if (markup)
+ ret += scnprintf(buf, size, "%s", markup);
+ ret += scnprintf(buf + ret, size - ret, "%s", line);
+ if (markup)
+ ret += scnprintf(buf + ret, size - ret, "</span>");
+
+ g_free(line);
+ return ret;
+}
+
+static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym,
+ struct map *map, struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt __maybe_unused)
+{
+ struct disasm_line *pos, *n;
+ struct annotation *notes;
+ GType col_types[MAX_ANN_COLS];
+ GtkCellRenderer *renderer;
+ GtkListStore *store;
+ GtkWidget *view;
+ int i;
+ char s[512];
+
+ notes = symbol__annotation(sym);
+
+ for (i = 0; i < MAX_ANN_COLS; i++) {
+ col_types[i] = G_TYPE_STRING;
+ }
+ store = gtk_list_store_newv(MAX_ANN_COLS, col_types);
+
+ view = gtk_tree_view_new();
+ renderer = gtk_cell_renderer_text_new();
+
+ for (i = 0; i < MAX_ANN_COLS; i++) {
+ gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+ -1, col_names[i], renderer, "markup",
+ i, NULL);
+ }
+
+ gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+ g_object_unref(GTK_TREE_MODEL(store));
+
+ list_for_each_entry(pos, &notes->src->source, al.node) {
+ GtkTreeIter iter;
+ int ret = 0;
+
+ gtk_list_store_append(store, &iter);
+
+ if (perf_evsel__is_group_event(evsel)) {
+ for (i = 0; i < evsel->nr_members; i++) {
+ ret += perf_gtk__get_percent(s + ret,
+ sizeof(s) - ret,
+ sym, pos,
+ evsel->idx + i);
+ ret += scnprintf(s + ret, sizeof(s) - ret, " ");
+ }
+ } else {
+ ret = perf_gtk__get_percent(s, sizeof(s), sym, pos,
+ evsel->idx);
+ }
+
+ if (ret)
+ gtk_list_store_set(store, &iter, ANN_COL__PERCENT, s, -1);
+ if (perf_gtk__get_offset(s, sizeof(s), sym, map, pos))
+ gtk_list_store_set(store, &iter, ANN_COL__OFFSET, s, -1);
+ if (perf_gtk__get_line(s, sizeof(s), pos))
+ gtk_list_store_set(store, &iter, ANN_COL__LINE, s, -1);
+ }
+
+ gtk_container_add(GTK_CONTAINER(window), view);
+
+ list_for_each_entry_safe(pos, n, &notes->src->source, al.node) {
+ list_del(&pos->al.node);
+ disasm_line__free(pos);
+ }
+
+ return 0;
+}
+
+static int symbol__gtk_annotate(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt)
+{
+ GtkWidget *window;
+ GtkWidget *notebook;
+ GtkWidget *scrolled_window;
+ GtkWidget *tab_label;
+ int err;
+
+ if (map->dso->annotate_warned)
+ return -1;
+
+ err = symbol__annotate(sym, map, evsel, 0, &annotation__default_options, NULL);
+ if (err) {
+ char msg[BUFSIZ];
+ symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
+ ui__error("Couldn't annotate %s: %s\n", sym->name, msg);
+ return -1;
+ }
+
+ symbol__calc_percent(sym, evsel);
+
+ if (perf_gtk__is_active_context(pgctx)) {
+ window = pgctx->main_window;
+ notebook = pgctx->notebook;
+ } else {
+ GtkWidget *vbox;
+ GtkWidget *infobar;
+ GtkWidget *statbar;
+
+ signal(SIGSEGV, perf_gtk__signal);
+ signal(SIGFPE, perf_gtk__signal);
+ signal(SIGINT, perf_gtk__signal);
+ signal(SIGQUIT, perf_gtk__signal);
+ signal(SIGTERM, perf_gtk__signal);
+
+ window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
+ gtk_window_set_title(GTK_WINDOW(window), "perf annotate");
+
+ g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
+
+ pgctx = perf_gtk__activate_context(window);
+ if (!pgctx)
+ return -1;
+
+ vbox = gtk_vbox_new(FALSE, 0);
+ notebook = gtk_notebook_new();
+ pgctx->notebook = notebook;
+
+ gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
+
+ infobar = perf_gtk__setup_info_bar();
+ if (infobar) {
+ gtk_box_pack_start(GTK_BOX(vbox), infobar,
+ FALSE, FALSE, 0);
+ }
+
+ statbar = perf_gtk__setup_statusbar();
+ gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
+
+ gtk_container_add(GTK_CONTAINER(window), vbox);
+ }
+
+ scrolled_window = gtk_scrolled_window_new(NULL, NULL);
+ tab_label = gtk_label_new(sym->name);
+
+ gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window),
+ GTK_POLICY_AUTOMATIC,
+ GTK_POLICY_AUTOMATIC);
+
+ gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window,
+ tab_label);
+
+ perf_gtk__annotate_symbol(scrolled_window, sym, map, evsel, hbt);
+ return 0;
+}
+
+int hist_entry__gtk_annotate(struct hist_entry *he,
+ struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt)
+{
+ return symbol__gtk_annotate(he->ms.sym, he->ms.map, evsel, hbt);
+}
+
+void perf_gtk__show_annotations(void)
+{
+ GtkWidget *window;
+
+ if (!perf_gtk__is_active_context(pgctx))
+ return;
+
+ window = pgctx->main_window;
+ gtk_widget_show_all(window);
+
+ perf_gtk__resize_window(window);
+ gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
+
+ gtk_main();
+
+ perf_gtk__deactivate_context(&pgctx);
+}
diff --git a/tools/perf/ui/gtk/browser.c b/tools/perf/ui/gtk/browser.c
new file mode 100644
index 000000000..4820e25ac
--- /dev/null
+++ b/tools/perf/ui/gtk/browser.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../evlist.h"
+#include "../cache.h"
+#include "../evsel.h"
+#include "../sort.h"
+#include "../hist.h"
+#include "../helpline.h"
+#include "gtk.h"
+
+#include <signal.h>
+
+void perf_gtk__signal(int sig)
+{
+ perf_gtk__exit(false);
+ psignal(sig, "perf");
+}
+
+void perf_gtk__resize_window(GtkWidget *window)
+{
+ GdkRectangle rect;
+ GdkScreen *screen;
+ int monitor;
+ int height;
+ int width;
+
+ screen = gtk_widget_get_screen(window);
+
+ monitor = gdk_screen_get_monitor_at_window(screen, window->window);
+
+ gdk_screen_get_monitor_geometry(screen, monitor, &rect);
+
+ width = rect.width * 3 / 4;
+ height = rect.height * 3 / 4;
+
+ gtk_window_resize(GTK_WINDOW(window), width, height);
+}
+
+const char *perf_gtk__get_percent_color(double percent)
+{
+ if (percent >= MIN_RED)
+ return "<span fgcolor='red'>";
+ if (percent >= MIN_GREEN)
+ return "<span fgcolor='dark green'>";
+ return NULL;
+}
+
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
+GtkWidget *perf_gtk__setup_info_bar(void)
+{
+ GtkWidget *info_bar;
+ GtkWidget *label;
+ GtkWidget *content_area;
+
+ info_bar = gtk_info_bar_new();
+ gtk_widget_set_no_show_all(info_bar, TRUE);
+
+ label = gtk_label_new("");
+ gtk_widget_show(label);
+
+ content_area = gtk_info_bar_get_content_area(GTK_INFO_BAR(info_bar));
+ gtk_container_add(GTK_CONTAINER(content_area), label);
+
+ gtk_info_bar_add_button(GTK_INFO_BAR(info_bar), GTK_STOCK_OK,
+ GTK_RESPONSE_OK);
+ g_signal_connect(info_bar, "response",
+ G_CALLBACK(gtk_widget_hide), NULL);
+
+ pgctx->info_bar = info_bar;
+ pgctx->message_label = label;
+
+ return info_bar;
+}
+#endif
+
+GtkWidget *perf_gtk__setup_statusbar(void)
+{
+ GtkWidget *stbar;
+ unsigned ctxid;
+
+ stbar = gtk_statusbar_new();
+
+ ctxid = gtk_statusbar_get_context_id(GTK_STATUSBAR(stbar),
+ "perf report");
+ pgctx->statbar = stbar;
+ pgctx->statbar_ctx_id = ctxid;
+
+ return stbar;
+}
diff --git a/tools/perf/ui/gtk/gtk.h b/tools/perf/ui/gtk/gtk.h
new file mode 100644
index 000000000..9846ea5c8
--- /dev/null
+++ b/tools/perf/ui/gtk/gtk.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_GTK_H_
+#define _PERF_GTK_H_ 1
+
+#include <stdbool.h>
+
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+#include <gtk/gtk.h>
+#pragma GCC diagnostic error "-Wstrict-prototypes"
+
+
+struct perf_gtk_context {
+ GtkWidget *main_window;
+ GtkWidget *notebook;
+
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
+ GtkWidget *info_bar;
+ GtkWidget *message_label;
+#endif
+ GtkWidget *statbar;
+ guint statbar_ctx_id;
+};
+
+int perf_gtk__init(void);
+void perf_gtk__exit(bool wait_for_ok);
+
+extern struct perf_gtk_context *pgctx;
+
+static inline bool perf_gtk__is_active_context(struct perf_gtk_context *ctx)
+{
+ return ctx && ctx->main_window;
+}
+
+struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window);
+int perf_gtk__deactivate_context(struct perf_gtk_context **ctx);
+
+void perf_gtk__init_helpline(void);
+void gtk_ui_progress__init(void);
+void perf_gtk__init_hpp(void);
+
+void perf_gtk__signal(int sig);
+void perf_gtk__resize_window(GtkWidget *window);
+const char *perf_gtk__get_percent_color(double percent);
+GtkWidget *perf_gtk__setup_statusbar(void);
+
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
+GtkWidget *perf_gtk__setup_info_bar(void);
+#else
+static inline GtkWidget *perf_gtk__setup_info_bar(void)
+{
+ return NULL;
+}
+#endif
+
+struct perf_evsel;
+struct perf_evlist;
+struct hist_entry;
+struct hist_browser_timer;
+
+int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist, const char *help,
+ struct hist_browser_timer *hbt,
+ float min_pcnt);
+int hist_entry__gtk_annotate(struct hist_entry *he,
+ struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt);
+void perf_gtk__show_annotations(void);
+
+#endif /* _PERF_GTK_H_ */
diff --git a/tools/perf/ui/gtk/helpline.c b/tools/perf/ui/gtk/helpline.c
new file mode 100644
index 000000000..fbf1ea9ce
--- /dev/null
+++ b/tools/perf/ui/gtk/helpline.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include "gtk.h"
+#include "../ui.h"
+#include "../helpline.h"
+#include "../../util/debug.h"
+
+static void gtk_helpline_pop(void)
+{
+ if (!perf_gtk__is_active_context(pgctx))
+ return;
+
+ gtk_statusbar_pop(GTK_STATUSBAR(pgctx->statbar),
+ pgctx->statbar_ctx_id);
+}
+
+static void gtk_helpline_push(const char *msg)
+{
+ if (!perf_gtk__is_active_context(pgctx))
+ return;
+
+ gtk_statusbar_push(GTK_STATUSBAR(pgctx->statbar),
+ pgctx->statbar_ctx_id, msg);
+}
+
+static int gtk_helpline_show(const char *fmt, va_list ap)
+{
+ int ret;
+ char *ptr;
+ static int backlog;
+
+ ret = vscnprintf(ui_helpline__current + backlog,
+ sizeof(ui_helpline__current) - backlog, fmt, ap);
+ backlog += ret;
+
+ /* only first line can be displayed */
+ ptr = strchr(ui_helpline__current, '\n');
+ if (ptr && (ptr - ui_helpline__current) <= backlog) {
+ *ptr = '\0';
+ ui_helpline__puts(ui_helpline__current);
+ backlog = 0;
+ }
+
+ return ret;
+}
+
+static struct ui_helpline gtk_helpline_fns = {
+ .pop = gtk_helpline_pop,
+ .push = gtk_helpline_push,
+ .show = gtk_helpline_show,
+};
+
+void perf_gtk__init_helpline(void)
+{
+ helpline_fns = &gtk_helpline_fns;
+}
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
new file mode 100644
index 000000000..4ab663ec3
--- /dev/null
+++ b/tools/perf/ui/gtk/hists.c
@@ -0,0 +1,681 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../evlist.h"
+#include "../cache.h"
+#include "../evsel.h"
+#include "../sort.h"
+#include "../hist.h"
+#include "../helpline.h"
+#include "../string2.h"
+#include "gtk.h"
+#include <signal.h>
+
+#define MAX_COLUMNS 32
+
+static int __percent_color_snprintf(struct perf_hpp *hpp, const char *fmt, ...)
+{
+ int ret = 0;
+ int len;
+ va_list args;
+ double percent;
+ const char *markup;
+ char *buf = hpp->buf;
+ size_t size = hpp->size;
+
+ va_start(args, fmt);
+ len = va_arg(args, int);
+ percent = va_arg(args, double);
+ va_end(args);
+
+ markup = perf_gtk__get_percent_color(percent);
+ if (markup)
+ ret += scnprintf(buf, size, markup);
+
+ ret += scnprintf(buf + ret, size - ret, fmt, len, percent);
+
+ if (markup)
+ ret += scnprintf(buf + ret, size - ret, "</span>");
+
+ return ret;
+}
+
+#define __HPP_COLOR_PERCENT_FN(_type, _field) \
+static u64 he_get_##_field(struct hist_entry *he) \
+{ \
+ return he->stat._field; \
+} \
+ \
+static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt, \
+ struct perf_hpp *hpp, \
+ struct hist_entry *he) \
+{ \
+ return hpp__fmt(fmt, hpp, he, he_get_##_field, " %*.2f%%", \
+ __percent_color_snprintf, true); \
+}
+
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
+static u64 he_get_acc_##_field(struct hist_entry *he) \
+{ \
+ return he->stat_acc->_field; \
+} \
+ \
+static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt, \
+ struct perf_hpp *hpp, \
+ struct hist_entry *he) \
+{ \
+ return hpp__fmt_acc(fmt, hpp, he, he_get_acc_##_field, " %*.2f%%", \
+ __percent_color_snprintf, true); \
+}
+
+__HPP_COLOR_PERCENT_FN(overhead, period)
+__HPP_COLOR_PERCENT_FN(overhead_sys, period_sys)
+__HPP_COLOR_PERCENT_FN(overhead_us, period_us)
+__HPP_COLOR_PERCENT_FN(overhead_guest_sys, period_guest_sys)
+__HPP_COLOR_PERCENT_FN(overhead_guest_us, period_guest_us)
+__HPP_COLOR_ACC_PERCENT_FN(overhead_acc, period)
+
+#undef __HPP_COLOR_PERCENT_FN
+
+
+void perf_gtk__init_hpp(void)
+{
+ perf_hpp__format[PERF_HPP__OVERHEAD].color =
+ perf_gtk__hpp_color_overhead;
+ perf_hpp__format[PERF_HPP__OVERHEAD_SYS].color =
+ perf_gtk__hpp_color_overhead_sys;
+ perf_hpp__format[PERF_HPP__OVERHEAD_US].color =
+ perf_gtk__hpp_color_overhead_us;
+ perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_SYS].color =
+ perf_gtk__hpp_color_overhead_guest_sys;
+ perf_hpp__format[PERF_HPP__OVERHEAD_GUEST_US].color =
+ perf_gtk__hpp_color_overhead_guest_us;
+ perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color =
+ perf_gtk__hpp_color_overhead_acc;
+}
+
+static void perf_gtk__add_callchain_flat(struct rb_root *root, GtkTreeStore *store,
+ GtkTreeIter *parent, int col, u64 total)
+{
+ struct rb_node *nd;
+ bool has_single_node = (rb_first(root) == rb_last(root));
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ struct callchain_node *node;
+ struct callchain_list *chain;
+ GtkTreeIter iter, new_parent;
+ bool need_new_parent;
+
+ node = rb_entry(nd, struct callchain_node, rb_node);
+
+ new_parent = *parent;
+ need_new_parent = !has_single_node;
+
+ callchain_node__make_parent_list(node);
+
+ list_for_each_entry(chain, &node->parent_val, list) {
+ char buf[128];
+
+ gtk_tree_store_append(store, &iter, &new_parent);
+
+ callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+ gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+ callchain_list__sym_name(chain, buf, sizeof(buf), false);
+ gtk_tree_store_set(store, &iter, col, buf, -1);
+
+ if (need_new_parent) {
+ /*
+ * Only show the top-most symbol in a callchain
+ * if it's not the only callchain.
+ */
+ new_parent = iter;
+ need_new_parent = false;
+ }
+ }
+
+ list_for_each_entry(chain, &node->val, list) {
+ char buf[128];
+
+ gtk_tree_store_append(store, &iter, &new_parent);
+
+ callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+ gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+ callchain_list__sym_name(chain, buf, sizeof(buf), false);
+ gtk_tree_store_set(store, &iter, col, buf, -1);
+
+ if (need_new_parent) {
+ /*
+ * Only show the top-most symbol in a callchain
+ * if it's not the only callchain.
+ */
+ new_parent = iter;
+ need_new_parent = false;
+ }
+ }
+ }
+}
+
+static void perf_gtk__add_callchain_folded(struct rb_root *root, GtkTreeStore *store,
+ GtkTreeIter *parent, int col, u64 total)
+{
+ struct rb_node *nd;
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ struct callchain_node *node;
+ struct callchain_list *chain;
+ GtkTreeIter iter;
+ char buf[64];
+ char *str, *str_alloc = NULL;
+ bool first = true;
+
+ node = rb_entry(nd, struct callchain_node, rb_node);
+
+ callchain_node__make_parent_list(node);
+
+ list_for_each_entry(chain, &node->parent_val, list) {
+ char name[1024];
+
+ callchain_list__sym_name(chain, name, sizeof(name), false);
+
+ if (asprintf(&str, "%s%s%s",
+ first ? "" : str_alloc,
+ first ? "" : symbol_conf.field_sep ?: "; ",
+ name) < 0)
+ return;
+
+ first = false;
+ free(str_alloc);
+ str_alloc = str;
+ }
+
+ list_for_each_entry(chain, &node->val, list) {
+ char name[1024];
+
+ callchain_list__sym_name(chain, name, sizeof(name), false);
+
+ if (asprintf(&str, "%s%s%s",
+ first ? "" : str_alloc,
+ first ? "" : symbol_conf.field_sep ?: "; ",
+ name) < 0)
+ return;
+
+ first = false;
+ free(str_alloc);
+ str_alloc = str;
+ }
+
+ gtk_tree_store_append(store, &iter, parent);
+
+ callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+ gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+ gtk_tree_store_set(store, &iter, col, str, -1);
+
+ free(str_alloc);
+ }
+}
+
+static void perf_gtk__add_callchain_graph(struct rb_root *root, GtkTreeStore *store,
+ GtkTreeIter *parent, int col, u64 total)
+{
+ struct rb_node *nd;
+ bool has_single_node = (rb_first(root) == rb_last(root));
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ struct callchain_node *node;
+ struct callchain_list *chain;
+ GtkTreeIter iter, new_parent;
+ bool need_new_parent;
+ u64 child_total;
+
+ node = rb_entry(nd, struct callchain_node, rb_node);
+
+ new_parent = *parent;
+ need_new_parent = !has_single_node && (node->val_nr > 1);
+
+ list_for_each_entry(chain, &node->val, list) {
+ char buf[128];
+
+ gtk_tree_store_append(store, &iter, &new_parent);
+
+ callchain_node__scnprintf_value(node, buf, sizeof(buf), total);
+ gtk_tree_store_set(store, &iter, 0, buf, -1);
+
+ callchain_list__sym_name(chain, buf, sizeof(buf), false);
+ gtk_tree_store_set(store, &iter, col, buf, -1);
+
+ if (need_new_parent) {
+ /*
+ * Only show the top-most symbol in a callchain
+ * if it's not the only callchain.
+ */
+ new_parent = iter;
+ need_new_parent = false;
+ }
+ }
+
+ if (callchain_param.mode == CHAIN_GRAPH_REL)
+ child_total = node->children_hit;
+ else
+ child_total = total;
+
+ /* Now 'iter' contains info of the last callchain_list */
+ perf_gtk__add_callchain_graph(&node->rb_root, store, &iter, col,
+ child_total);
+ }
+}
+
+static void perf_gtk__add_callchain(struct rb_root *root, GtkTreeStore *store,
+ GtkTreeIter *parent, int col, u64 total)
+{
+ if (callchain_param.mode == CHAIN_FLAT)
+ perf_gtk__add_callchain_flat(root, store, parent, col, total);
+ else if (callchain_param.mode == CHAIN_FOLDED)
+ perf_gtk__add_callchain_folded(root, store, parent, col, total);
+ else
+ perf_gtk__add_callchain_graph(root, store, parent, col, total);
+}
+
+static void on_row_activated(GtkTreeView *view, GtkTreePath *path,
+ GtkTreeViewColumn *col __maybe_unused,
+ gpointer user_data __maybe_unused)
+{
+ bool expanded = gtk_tree_view_row_expanded(view, path);
+
+ if (expanded)
+ gtk_tree_view_collapse_row(view, path);
+ else
+ gtk_tree_view_expand_row(view, path, FALSE);
+}
+
+static void perf_gtk__show_hists(GtkWidget *window, struct hists *hists,
+ float min_pcnt)
+{
+ struct perf_hpp_fmt *fmt;
+ GType col_types[MAX_COLUMNS];
+ GtkCellRenderer *renderer;
+ GtkTreeStore *store;
+ struct rb_node *nd;
+ GtkWidget *view;
+ int col_idx;
+ int sym_col = -1;
+ int nr_cols;
+ char s[512];
+
+ struct perf_hpp hpp = {
+ .buf = s,
+ .size = sizeof(s),
+ };
+
+ nr_cols = 0;
+
+ hists__for_each_format(hists, fmt)
+ col_types[nr_cols++] = G_TYPE_STRING;
+
+ store = gtk_tree_store_newv(nr_cols, col_types);
+
+ view = gtk_tree_view_new();
+
+ renderer = gtk_cell_renderer_text_new();
+
+ col_idx = 0;
+
+ hists__for_each_format(hists, fmt) {
+ if (perf_hpp__should_skip(fmt, hists))
+ continue;
+
+ /*
+ * XXX no way to determine where symcol column is..
+ * Just use last column for now.
+ */
+ if (perf_hpp__is_sort_entry(fmt))
+ sym_col = col_idx;
+
+ gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+ -1, fmt->name,
+ renderer, "markup",
+ col_idx++, NULL);
+ }
+
+ for (col_idx = 0; col_idx < nr_cols; col_idx++) {
+ GtkTreeViewColumn *column;
+
+ column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), col_idx);
+ gtk_tree_view_column_set_resizable(column, TRUE);
+
+ if (col_idx == sym_col) {
+ gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view),
+ column);
+ }
+ }
+
+ gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+
+ g_object_unref(GTK_TREE_MODEL(store));
+
+ for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+ GtkTreeIter iter;
+ u64 total = hists__total_period(h->hists);
+ float percent;
+
+ if (h->filtered)
+ continue;
+
+ percent = hist_entry__get_percent_limit(h);
+ if (percent < min_pcnt)
+ continue;
+
+ gtk_tree_store_append(store, &iter, NULL);
+
+ col_idx = 0;
+
+ hists__for_each_format(hists, fmt) {
+ if (perf_hpp__should_skip(fmt, h->hists))
+ continue;
+
+ if (fmt->color)
+ fmt->color(fmt, &hpp, h);
+ else
+ fmt->entry(fmt, &hpp, h);
+
+ gtk_tree_store_set(store, &iter, col_idx++, s, -1);
+ }
+
+ if (hist_entry__has_callchains(h) &&
+ symbol_conf.use_callchain && hists__has(hists, sym)) {
+ if (callchain_param.mode == CHAIN_GRAPH_REL)
+ total = symbol_conf.cumulate_callchain ?
+ h->stat_acc->period : h->stat.period;
+
+ perf_gtk__add_callchain(&h->sorted_chain, store, &iter,
+ sym_col, total);
+ }
+ }
+
+ gtk_tree_view_set_rules_hint(GTK_TREE_VIEW(view), TRUE);
+
+ g_signal_connect(view, "row-activated",
+ G_CALLBACK(on_row_activated), NULL);
+ gtk_container_add(GTK_CONTAINER(window), view);
+}
+
+static void perf_gtk__add_hierarchy_entries(struct hists *hists,
+ struct rb_root *root,
+ GtkTreeStore *store,
+ GtkTreeIter *parent,
+ struct perf_hpp *hpp,
+ float min_pcnt)
+{
+ int col_idx = 0;
+ struct rb_node *node;
+ struct hist_entry *he;
+ struct perf_hpp_fmt *fmt;
+ struct perf_hpp_list_node *fmt_node;
+ u64 total = hists__total_period(hists);
+ int size;
+
+ for (node = rb_first(root); node; node = rb_next(node)) {
+ GtkTreeIter iter;
+ float percent;
+ char *bf;
+
+ he = rb_entry(node, struct hist_entry, rb_node);
+ if (he->filtered)
+ continue;
+
+ percent = hist_entry__get_percent_limit(he);
+ if (percent < min_pcnt)
+ continue;
+
+ gtk_tree_store_append(store, &iter, parent);
+
+ col_idx = 0;
+
+ /* the first hpp_list_node is for overhead columns */
+ fmt_node = list_first_entry(&hists->hpp_formats,
+ struct perf_hpp_list_node, list);
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+ if (fmt->color)
+ fmt->color(fmt, hpp, he);
+ else
+ fmt->entry(fmt, hpp, he);
+
+ gtk_tree_store_set(store, &iter, col_idx++, hpp->buf, -1);
+ }
+
+ bf = hpp->buf;
+ size = hpp->size;
+ perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+ int ret;
+
+ if (fmt->color)
+ ret = fmt->color(fmt, hpp, he);
+ else
+ ret = fmt->entry(fmt, hpp, he);
+
+ snprintf(hpp->buf + ret, hpp->size - ret, " ");
+ advance_hpp(hpp, ret + 2);
+ }
+
+ gtk_tree_store_set(store, &iter, col_idx, ltrim(rtrim(bf)), -1);
+
+ if (!he->leaf) {
+ hpp->buf = bf;
+ hpp->size = size;
+
+ perf_gtk__add_hierarchy_entries(hists, &he->hroot_out,
+ store, &iter, hpp,
+ min_pcnt);
+
+ if (!hist_entry__has_hierarchy_children(he, min_pcnt)) {
+ char buf[32];
+ GtkTreeIter child;
+
+ snprintf(buf, sizeof(buf), "no entry >= %.2f%%",
+ min_pcnt);
+
+ gtk_tree_store_append(store, &child, &iter);
+ gtk_tree_store_set(store, &child, col_idx, buf, -1);
+ }
+ }
+
+ if (he->leaf && hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
+ if (callchain_param.mode == CHAIN_GRAPH_REL)
+ total = symbol_conf.cumulate_callchain ?
+ he->stat_acc->period : he->stat.period;
+
+ perf_gtk__add_callchain(&he->sorted_chain, store, &iter,
+ col_idx, total);
+ }
+ }
+
+}
+
+static void perf_gtk__show_hierarchy(GtkWidget *window, struct hists *hists,
+ float min_pcnt)
+{
+ struct perf_hpp_fmt *fmt;
+ struct perf_hpp_list_node *fmt_node;
+ GType col_types[MAX_COLUMNS];
+ GtkCellRenderer *renderer;
+ GtkTreeStore *store;
+ GtkWidget *view;
+ int col_idx;
+ int nr_cols = 0;
+ char s[512];
+ char buf[512];
+ bool first_node, first_col;
+ struct perf_hpp hpp = {
+ .buf = s,
+ .size = sizeof(s),
+ };
+
+ hists__for_each_format(hists, fmt) {
+ if (perf_hpp__is_sort_entry(fmt) ||
+ perf_hpp__is_dynamic_entry(fmt))
+ break;
+
+ col_types[nr_cols++] = G_TYPE_STRING;
+ }
+ col_types[nr_cols++] = G_TYPE_STRING;
+
+ store = gtk_tree_store_newv(nr_cols, col_types);
+ view = gtk_tree_view_new();
+ renderer = gtk_cell_renderer_text_new();
+
+ col_idx = 0;
+
+ /* the first hpp_list_node is for overhead columns */
+ fmt_node = list_first_entry(&hists->hpp_formats,
+ struct perf_hpp_list_node, list);
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+ gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+ -1, fmt->name,
+ renderer, "markup",
+ col_idx++, NULL);
+ }
+
+ /* construct merged column header since sort keys share single column */
+ buf[0] = '\0';
+ first_node = true;
+ list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+ if (!first_node)
+ strcat(buf, " / ");
+ first_node = false;
+
+ first_col = true;
+ perf_hpp_list__for_each_format(&fmt_node->hpp ,fmt) {
+ if (perf_hpp__should_skip(fmt, hists))
+ continue;
+
+ if (!first_col)
+ strcat(buf, "+");
+ first_col = false;
+
+ fmt->header(fmt, &hpp, hists, 0, NULL);
+ strcat(buf, ltrim(rtrim(hpp.buf)));
+ }
+ }
+
+ gtk_tree_view_insert_column_with_attributes(GTK_TREE_VIEW(view),
+ -1, buf,
+ renderer, "markup",
+ col_idx++, NULL);
+
+ for (col_idx = 0; col_idx < nr_cols; col_idx++) {
+ GtkTreeViewColumn *column;
+
+ column = gtk_tree_view_get_column(GTK_TREE_VIEW(view), col_idx);
+ gtk_tree_view_column_set_resizable(column, TRUE);
+
+ if (col_idx == 0) {
+ gtk_tree_view_set_expander_column(GTK_TREE_VIEW(view),
+ column);
+ }
+ }
+
+ gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store));
+ g_object_unref(GTK_TREE_MODEL(store));
+
+ perf_gtk__add_hierarchy_entries(hists, &hists->entries, store,
+ NULL, &hpp, min_pcnt);
+
+ gtk_tree_view_set_rules_hint(GTK_TREE_VIEW(view), TRUE);
+
+ g_signal_connect(view, "row-activated",
+ G_CALLBACK(on_row_activated), NULL);
+ gtk_container_add(GTK_CONTAINER(window), view);
+}
+
+int perf_evlist__gtk_browse_hists(struct perf_evlist *evlist,
+ const char *help,
+ struct hist_browser_timer *hbt __maybe_unused,
+ float min_pcnt)
+{
+ struct perf_evsel *pos;
+ GtkWidget *vbox;
+ GtkWidget *notebook;
+ GtkWidget *info_bar;
+ GtkWidget *statbar;
+ GtkWidget *window;
+
+ signal(SIGSEGV, perf_gtk__signal);
+ signal(SIGFPE, perf_gtk__signal);
+ signal(SIGINT, perf_gtk__signal);
+ signal(SIGQUIT, perf_gtk__signal);
+ signal(SIGTERM, perf_gtk__signal);
+
+ window = gtk_window_new(GTK_WINDOW_TOPLEVEL);
+
+ gtk_window_set_title(GTK_WINDOW(window), "perf report");
+
+ g_signal_connect(window, "delete_event", gtk_main_quit, NULL);
+
+ pgctx = perf_gtk__activate_context(window);
+ if (!pgctx)
+ return -1;
+
+ vbox = gtk_vbox_new(FALSE, 0);
+
+ notebook = gtk_notebook_new();
+
+ gtk_box_pack_start(GTK_BOX(vbox), notebook, TRUE, TRUE, 0);
+
+ info_bar = perf_gtk__setup_info_bar();
+ if (info_bar)
+ gtk_box_pack_start(GTK_BOX(vbox), info_bar, FALSE, FALSE, 0);
+
+ statbar = perf_gtk__setup_statusbar();
+ gtk_box_pack_start(GTK_BOX(vbox), statbar, FALSE, FALSE, 0);
+
+ gtk_container_add(GTK_CONTAINER(window), vbox);
+
+ evlist__for_each_entry(evlist, pos) {
+ struct hists *hists = evsel__hists(pos);
+ const char *evname = perf_evsel__name(pos);
+ GtkWidget *scrolled_window;
+ GtkWidget *tab_label;
+ char buf[512];
+ size_t size = sizeof(buf);
+
+ if (symbol_conf.event_group) {
+ if (!perf_evsel__is_group_leader(pos))
+ continue;
+
+ if (pos->nr_members > 1) {
+ perf_evsel__group_desc(pos, buf, size);
+ evname = buf;
+ }
+ }
+
+ scrolled_window = gtk_scrolled_window_new(NULL, NULL);
+
+ gtk_scrolled_window_set_policy(GTK_SCROLLED_WINDOW(scrolled_window),
+ GTK_POLICY_AUTOMATIC,
+ GTK_POLICY_AUTOMATIC);
+
+ if (symbol_conf.report_hierarchy)
+ perf_gtk__show_hierarchy(scrolled_window, hists, min_pcnt);
+ else
+ perf_gtk__show_hists(scrolled_window, hists, min_pcnt);
+
+ tab_label = gtk_label_new(evname);
+
+ gtk_notebook_append_page(GTK_NOTEBOOK(notebook), scrolled_window, tab_label);
+ }
+
+ gtk_widget_show_all(window);
+
+ perf_gtk__resize_window(window);
+
+ gtk_window_set_position(GTK_WINDOW(window), GTK_WIN_POS_CENTER);
+
+ ui_helpline__push(help);
+
+ gtk_main();
+
+ perf_gtk__deactivate_context(&pgctx);
+
+ return 0;
+}
diff --git a/tools/perf/ui/gtk/progress.c b/tools/perf/ui/gtk/progress.c
new file mode 100644
index 000000000..b6ad8857d
--- /dev/null
+++ b/tools/perf/ui/gtk/progress.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+
+#include "gtk.h"
+#include "../progress.h"
+#include "util.h"
+
+static GtkWidget *dialog;
+static GtkWidget *progress;
+
+static void gtk_ui_progress__update(struct ui_progress *p)
+{
+ double fraction = p->total ? 1.0 * p->curr / p->total : 0.0;
+ char buf[1024];
+
+ if (dialog == NULL) {
+ GtkWidget *vbox = gtk_vbox_new(TRUE, 5);
+ GtkWidget *label = gtk_label_new(p->title);
+
+ dialog = gtk_window_new(GTK_WINDOW_TOPLEVEL);
+ progress = gtk_progress_bar_new();
+
+ gtk_box_pack_start(GTK_BOX(vbox), label, TRUE, FALSE, 3);
+ gtk_box_pack_start(GTK_BOX(vbox), progress, TRUE, TRUE, 3);
+
+ gtk_container_add(GTK_CONTAINER(dialog), vbox);
+
+ gtk_window_set_title(GTK_WINDOW(dialog), "perf");
+ gtk_window_resize(GTK_WINDOW(dialog), 300, 80);
+ gtk_window_set_position(GTK_WINDOW(dialog), GTK_WIN_POS_CENTER);
+
+ gtk_widget_show_all(dialog);
+ }
+
+ gtk_progress_bar_set_fraction(GTK_PROGRESS_BAR(progress), fraction);
+ snprintf(buf, sizeof(buf), "%"PRIu64" / %"PRIu64, p->curr, p->total);
+ gtk_progress_bar_set_text(GTK_PROGRESS_BAR(progress), buf);
+
+ /* we didn't call gtk_main yet, so do it manually */
+ while (gtk_events_pending())
+ gtk_main_iteration();
+}
+
+static void gtk_ui_progress__finish(void)
+{
+ /* this will also destroy all of its children */
+ gtk_widget_destroy(dialog);
+
+ dialog = NULL;
+}
+
+static struct ui_progress_ops gtk_ui_progress__ops = {
+ .update = gtk_ui_progress__update,
+ .finish = gtk_ui_progress__finish,
+};
+
+void gtk_ui_progress__init(void)
+{
+ ui_progress__ops = &gtk_ui_progress__ops;
+}
diff --git a/tools/perf/ui/gtk/setup.c b/tools/perf/ui/gtk/setup.c
new file mode 100644
index 000000000..506e73b38
--- /dev/null
+++ b/tools/perf/ui/gtk/setup.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "gtk.h"
+#include "../../util/cache.h"
+#include "../../util/debug.h"
+
+extern struct perf_error_ops perf_gtk_eops;
+
+int perf_gtk__init(void)
+{
+ perf_error__register(&perf_gtk_eops);
+ perf_gtk__init_helpline();
+ gtk_ui_progress__init();
+ perf_gtk__init_hpp();
+
+ return gtk_init_check(NULL, NULL) ? 0 : -1;
+}
+
+void perf_gtk__exit(bool wait_for_ok __maybe_unused)
+{
+ if (!perf_gtk__is_active_context(pgctx))
+ return;
+ perf_error__unregister(&perf_gtk_eops);
+ gtk_main_quit();
+}
diff --git a/tools/perf/ui/gtk/util.c b/tools/perf/ui/gtk/util.c
new file mode 100644
index 000000000..7250d8101
--- /dev/null
+++ b/tools/perf/ui/gtk/util.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../util.h"
+#include "../../util/util.h"
+#include "../../util/debug.h"
+#include "gtk.h"
+
+#include <string.h>
+
+
+struct perf_gtk_context *pgctx;
+
+struct perf_gtk_context *perf_gtk__activate_context(GtkWidget *window)
+{
+ struct perf_gtk_context *ctx;
+
+ ctx = malloc(sizeof(*pgctx));
+ if (ctx)
+ ctx->main_window = window;
+
+ return ctx;
+}
+
+int perf_gtk__deactivate_context(struct perf_gtk_context **ctx)
+{
+ if (!perf_gtk__is_active_context(*ctx))
+ return -1;
+
+ zfree(ctx);
+ return 0;
+}
+
+static int perf_gtk__error(const char *format, va_list args)
+{
+ char *msg;
+ GtkWidget *dialog;
+
+ if (!perf_gtk__is_active_context(pgctx) ||
+ vasprintf(&msg, format, args) < 0) {
+ fprintf(stderr, "Error:\n");
+ vfprintf(stderr, format, args);
+ fprintf(stderr, "\n");
+ return -1;
+ }
+
+ dialog = gtk_message_dialog_new_with_markup(GTK_WINDOW(pgctx->main_window),
+ GTK_DIALOG_DESTROY_WITH_PARENT,
+ GTK_MESSAGE_ERROR,
+ GTK_BUTTONS_CLOSE,
+ "<b>Error</b>\n\n%s", msg);
+ gtk_dialog_run(GTK_DIALOG(dialog));
+
+ gtk_widget_destroy(dialog);
+ free(msg);
+ return 0;
+}
+
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
+static int perf_gtk__warning_info_bar(const char *format, va_list args)
+{
+ char *msg;
+
+ if (!perf_gtk__is_active_context(pgctx) ||
+ vasprintf(&msg, format, args) < 0) {
+ fprintf(stderr, "Warning:\n");
+ vfprintf(stderr, format, args);
+ fprintf(stderr, "\n");
+ return -1;
+ }
+
+ gtk_label_set_text(GTK_LABEL(pgctx->message_label), msg);
+ gtk_info_bar_set_message_type(GTK_INFO_BAR(pgctx->info_bar),
+ GTK_MESSAGE_WARNING);
+ gtk_widget_show(pgctx->info_bar);
+
+ free(msg);
+ return 0;
+}
+#else
+static int perf_gtk__warning_statusbar(const char *format, va_list args)
+{
+ char *msg, *p;
+
+ if (!perf_gtk__is_active_context(pgctx) ||
+ vasprintf(&msg, format, args) < 0) {
+ fprintf(stderr, "Warning:\n");
+ vfprintf(stderr, format, args);
+ fprintf(stderr, "\n");
+ return -1;
+ }
+
+ gtk_statusbar_pop(GTK_STATUSBAR(pgctx->statbar),
+ pgctx->statbar_ctx_id);
+
+ /* Only first line can be displayed */
+ p = strchr(msg, '\n');
+ if (p)
+ *p = '\0';
+
+ gtk_statusbar_push(GTK_STATUSBAR(pgctx->statbar),
+ pgctx->statbar_ctx_id, msg);
+
+ free(msg);
+ return 0;
+}
+#endif
+
+struct perf_error_ops perf_gtk_eops = {
+ .error = perf_gtk__error,
+#ifdef HAVE_GTK_INFO_BAR_SUPPORT
+ .warning = perf_gtk__warning_info_bar,
+#else
+ .warning = perf_gtk__warning_statusbar,
+#endif
+};
diff --git a/tools/perf/ui/helpline.c b/tools/perf/ui/helpline.c
new file mode 100644
index 000000000..b3c421429
--- /dev/null
+++ b/tools/perf/ui/helpline.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../debug.h"
+#include "helpline.h"
+#include "ui.h"
+#include "../util.h"
+
+char ui_helpline__current[512];
+
+static void nop_helpline__pop(void)
+{
+}
+
+static void nop_helpline__push(const char *msg __maybe_unused)
+{
+}
+
+static int nop_helpline__show(const char *fmt __maybe_unused,
+ va_list ap __maybe_unused)
+{
+ return 0;
+}
+
+static struct ui_helpline default_helpline_fns = {
+ .pop = nop_helpline__pop,
+ .push = nop_helpline__push,
+ .show = nop_helpline__show,
+};
+
+struct ui_helpline *helpline_fns = &default_helpline_fns;
+
+void ui_helpline__pop(void)
+{
+ helpline_fns->pop();
+}
+
+void ui_helpline__push(const char *msg)
+{
+ helpline_fns->push(msg);
+}
+
+void ui_helpline__vpush(const char *fmt, va_list ap)
+{
+ char *s;
+
+ if (vasprintf(&s, fmt, ap) < 0)
+ vfprintf(stderr, fmt, ap);
+ else {
+ ui_helpline__push(s);
+ free(s);
+ }
+}
+
+void ui_helpline__fpush(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ ui_helpline__vpush(fmt, ap);
+ va_end(ap);
+}
+
+void ui_helpline__puts(const char *msg)
+{
+ ui_helpline__pop();
+ ui_helpline__push(msg);
+}
+
+int ui_helpline__vshow(const char *fmt, va_list ap)
+{
+ return helpline_fns->show(fmt, ap);
+}
+
+void ui_helpline__printf(const char *fmt, ...)
+{
+ va_list ap;
+
+ ui_helpline__pop();
+ va_start(ap, fmt);
+ ui_helpline__vpush(fmt, ap);
+ va_end(ap);
+}
diff --git a/tools/perf/ui/helpline.h b/tools/perf/ui/helpline.h
new file mode 100644
index 000000000..8f775a053
--- /dev/null
+++ b/tools/perf/ui/helpline.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_HELPLINE_H_
+#define _PERF_UI_HELPLINE_H_ 1
+
+#include <stdio.h>
+#include <stdarg.h>
+
+#include "../util/cache.h"
+
+struct ui_helpline {
+ void (*pop)(void);
+ void (*push)(const char *msg);
+ int (*show)(const char *fmt, va_list ap);
+};
+
+extern struct ui_helpline *helpline_fns;
+
+void ui_helpline__init(void);
+
+void ui_helpline__pop(void);
+void ui_helpline__push(const char *msg);
+void ui_helpline__vpush(const char *fmt, va_list ap);
+void ui_helpline__fpush(const char *fmt, ...);
+void ui_helpline__puts(const char *msg);
+void ui_helpline__printf(const char *fmt, ...);
+int ui_helpline__vshow(const char *fmt, va_list ap);
+
+extern char ui_helpline__current[512];
+extern char ui_helpline__last_msg[];
+
+#endif /* _PERF_UI_HELPLINE_H_ */
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
new file mode 100644
index 000000000..4afb63cea
--- /dev/null
+++ b/tools/perf/ui/hist.c
@@ -0,0 +1,822 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <math.h>
+#include <linux/compiler.h>
+
+#include "../util/hist.h"
+#include "../util/util.h"
+#include "../util/sort.h"
+#include "../util/evsel.h"
+#include "../util/evlist.h"
+
+/* hist period print (hpp) functions */
+
+#define hpp__call_print_fn(hpp, fn, fmt, ...) \
+({ \
+ int __ret = fn(hpp, fmt, ##__VA_ARGS__); \
+ advance_hpp(hpp, __ret); \
+ __ret; \
+})
+
+static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
+ hpp_field_fn get_field, const char *fmt, int len,
+ hpp_snprint_fn print_fn, bool fmt_percent)
+{
+ int ret;
+ struct hists *hists = he->hists;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ char *buf = hpp->buf;
+ size_t size = hpp->size;
+
+ if (fmt_percent) {
+ double percent = 0.0;
+ u64 total = hists__total_period(hists);
+
+ if (total)
+ percent = 100.0 * get_field(he) / total;
+
+ ret = hpp__call_print_fn(hpp, print_fn, fmt, len, percent);
+ } else
+ ret = hpp__call_print_fn(hpp, print_fn, fmt, len, get_field(he));
+
+ if (perf_evsel__is_group_event(evsel)) {
+ int prev_idx, idx_delta;
+ struct hist_entry *pair;
+ int nr_members = evsel->nr_members;
+
+ prev_idx = perf_evsel__group_idx(evsel);
+
+ list_for_each_entry(pair, &he->pairs.head, pairs.node) {
+ u64 period = get_field(pair);
+ u64 total = hists__total_period(pair->hists);
+
+ if (!total)
+ continue;
+
+ evsel = hists_to_evsel(pair->hists);
+ idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
+
+ while (idx_delta--) {
+ /*
+ * zero-fill group members in the middle which
+ * have no sample
+ */
+ if (fmt_percent) {
+ ret += hpp__call_print_fn(hpp, print_fn,
+ fmt, len, 0.0);
+ } else {
+ ret += hpp__call_print_fn(hpp, print_fn,
+ fmt, len, 0ULL);
+ }
+ }
+
+ if (fmt_percent) {
+ ret += hpp__call_print_fn(hpp, print_fn, fmt, len,
+ 100.0 * period / total);
+ } else {
+ ret += hpp__call_print_fn(hpp, print_fn, fmt,
+ len, period);
+ }
+
+ prev_idx = perf_evsel__group_idx(evsel);
+ }
+
+ idx_delta = nr_members - prev_idx - 1;
+
+ while (idx_delta--) {
+ /*
+ * zero-fill group members at last which have no sample
+ */
+ if (fmt_percent) {
+ ret += hpp__call_print_fn(hpp, print_fn,
+ fmt, len, 0.0);
+ } else {
+ ret += hpp__call_print_fn(hpp, print_fn,
+ fmt, len, 0ULL);
+ }
+ }
+ }
+
+ /*
+ * Restore original buf and size as it's where caller expects
+ * the result will be saved.
+ */
+ hpp->buf = buf;
+ hpp->size = size;
+
+ return ret;
+}
+
+int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he, hpp_field_fn get_field,
+ const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent)
+{
+ int len = fmt->user_len ?: fmt->len;
+
+ if (symbol_conf.field_sep) {
+ return __hpp__fmt(hpp, he, get_field, fmtstr, 1,
+ print_fn, fmt_percent);
+ }
+
+ if (fmt_percent)
+ len -= 2; /* 2 for a space and a % sign */
+ else
+ len -= 1;
+
+ return __hpp__fmt(hpp, he, get_field, fmtstr, len, print_fn, fmt_percent);
+}
+
+int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he, hpp_field_fn get_field,
+ const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent)
+{
+ if (!symbol_conf.cumulate_callchain) {
+ int len = fmt->user_len ?: fmt->len;
+ return snprintf(hpp->buf, hpp->size, " %*s", len - 1, "N/A");
+ }
+
+ return hpp__fmt(fmt, hpp, he, get_field, fmtstr, print_fn, fmt_percent);
+}
+
+static int field_cmp(u64 field_a, u64 field_b)
+{
+ if (field_a > field_b)
+ return 1;
+ if (field_a < field_b)
+ return -1;
+ return 0;
+}
+
+static int __hpp__sort(struct hist_entry *a, struct hist_entry *b,
+ hpp_field_fn get_field)
+{
+ s64 ret;
+ int i, nr_members;
+ struct perf_evsel *evsel;
+ struct hist_entry *pair;
+ u64 *fields_a, *fields_b;
+
+ ret = field_cmp(get_field(a), get_field(b));
+ if (ret || !symbol_conf.event_group)
+ return ret;
+
+ evsel = hists_to_evsel(a->hists);
+ if (!perf_evsel__is_group_event(evsel))
+ return ret;
+
+ nr_members = evsel->nr_members;
+ fields_a = calloc(nr_members, sizeof(*fields_a));
+ fields_b = calloc(nr_members, sizeof(*fields_b));
+
+ if (!fields_a || !fields_b)
+ goto out;
+
+ list_for_each_entry(pair, &a->pairs.head, pairs.node) {
+ evsel = hists_to_evsel(pair->hists);
+ fields_a[perf_evsel__group_idx(evsel)] = get_field(pair);
+ }
+
+ list_for_each_entry(pair, &b->pairs.head, pairs.node) {
+ evsel = hists_to_evsel(pair->hists);
+ fields_b[perf_evsel__group_idx(evsel)] = get_field(pair);
+ }
+
+ for (i = 1; i < nr_members; i++) {
+ ret = field_cmp(fields_a[i], fields_b[i]);
+ if (ret)
+ break;
+ }
+
+out:
+ free(fields_a);
+ free(fields_b);
+
+ return ret;
+}
+
+static int __hpp__sort_acc(struct hist_entry *a, struct hist_entry *b,
+ hpp_field_fn get_field)
+{
+ s64 ret = 0;
+
+ if (symbol_conf.cumulate_callchain) {
+ /*
+ * Put caller above callee when they have equal period.
+ */
+ ret = field_cmp(get_field(a), get_field(b));
+ if (ret)
+ return ret;
+
+ if (a->thread != b->thread || !hist_entry__has_callchains(a) || !symbol_conf.use_callchain)
+ return 0;
+
+ ret = b->callchain->max_depth - a->callchain->max_depth;
+ if (callchain_param.order == ORDER_CALLER)
+ ret = -ret;
+ }
+ return ret;
+}
+
+static int hpp__width_fn(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct hists *hists)
+{
+ int len = fmt->user_len ?: fmt->len;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+
+ if (symbol_conf.event_group)
+ len = max(len, evsel->nr_members * fmt->len);
+
+ if (len < (int)strlen(fmt->name))
+ len = strlen(fmt->name);
+
+ return len;
+}
+
+static int hpp__header_fn(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hists *hists, int line __maybe_unused,
+ int *span __maybe_unused)
+{
+ int len = hpp__width_fn(fmt, hpp, hists);
+ return scnprintf(hpp->buf, hpp->size, "%*s", len, fmt->name);
+}
+
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
+{
+ va_list args;
+ ssize_t ssize = hpp->size;
+ double percent;
+ int ret, len;
+
+ va_start(args, fmt);
+ len = va_arg(args, int);
+ percent = va_arg(args, double);
+ ret = percent_color_len_snprintf(hpp->buf, hpp->size, fmt, len, percent);
+ va_end(args);
+
+ return (ret >= ssize) ? (ssize - 1) : ret;
+}
+
+static int hpp_entry_scnprintf(struct perf_hpp *hpp, const char *fmt, ...)
+{
+ va_list args;
+ ssize_t ssize = hpp->size;
+ int ret;
+
+ va_start(args, fmt);
+ ret = vsnprintf(hpp->buf, hpp->size, fmt, args);
+ va_end(args);
+
+ return (ret >= ssize) ? (ssize - 1) : ret;
+}
+
+#define __HPP_COLOR_PERCENT_FN(_type, _field) \
+static u64 he_get_##_field(struct hist_entry *he) \
+{ \
+ return he->stat._field; \
+} \
+ \
+static int hpp__color_##_type(struct perf_hpp_fmt *fmt, \
+ struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ return hpp__fmt(fmt, hpp, he, he_get_##_field, " %*.2f%%", \
+ hpp_color_scnprintf, true); \
+}
+
+#define __HPP_ENTRY_PERCENT_FN(_type, _field) \
+static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \
+ struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ return hpp__fmt(fmt, hpp, he, he_get_##_field, " %*.2f%%", \
+ hpp_entry_scnprintf, true); \
+}
+
+#define __HPP_SORT_FN(_type, _field) \
+static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
+ struct hist_entry *a, struct hist_entry *b) \
+{ \
+ return __hpp__sort(a, b, he_get_##_field); \
+}
+
+#define __HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
+static u64 he_get_acc_##_field(struct hist_entry *he) \
+{ \
+ return he->stat_acc->_field; \
+} \
+ \
+static int hpp__color_##_type(struct perf_hpp_fmt *fmt, \
+ struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ return hpp__fmt_acc(fmt, hpp, he, he_get_acc_##_field, " %*.2f%%", \
+ hpp_color_scnprintf, true); \
+}
+
+#define __HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \
+static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \
+ struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ return hpp__fmt_acc(fmt, hpp, he, he_get_acc_##_field, " %*.2f%%", \
+ hpp_entry_scnprintf, true); \
+}
+
+#define __HPP_SORT_ACC_FN(_type, _field) \
+static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
+ struct hist_entry *a, struct hist_entry *b) \
+{ \
+ return __hpp__sort_acc(a, b, he_get_acc_##_field); \
+}
+
+#define __HPP_ENTRY_RAW_FN(_type, _field) \
+static u64 he_get_raw_##_field(struct hist_entry *he) \
+{ \
+ return he->stat._field; \
+} \
+ \
+static int hpp__entry_##_type(struct perf_hpp_fmt *fmt, \
+ struct perf_hpp *hpp, struct hist_entry *he) \
+{ \
+ return hpp__fmt(fmt, hpp, he, he_get_raw_##_field, " %*"PRIu64, \
+ hpp_entry_scnprintf, false); \
+}
+
+#define __HPP_SORT_RAW_FN(_type, _field) \
+static int64_t hpp__sort_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
+ struct hist_entry *a, struct hist_entry *b) \
+{ \
+ return __hpp__sort(a, b, he_get_raw_##_field); \
+}
+
+
+#define HPP_PERCENT_FNS(_type, _field) \
+__HPP_COLOR_PERCENT_FN(_type, _field) \
+__HPP_ENTRY_PERCENT_FN(_type, _field) \
+__HPP_SORT_FN(_type, _field)
+
+#define HPP_PERCENT_ACC_FNS(_type, _field) \
+__HPP_COLOR_ACC_PERCENT_FN(_type, _field) \
+__HPP_ENTRY_ACC_PERCENT_FN(_type, _field) \
+__HPP_SORT_ACC_FN(_type, _field)
+
+#define HPP_RAW_FNS(_type, _field) \
+__HPP_ENTRY_RAW_FN(_type, _field) \
+__HPP_SORT_RAW_FN(_type, _field)
+
+HPP_PERCENT_FNS(overhead, period)
+HPP_PERCENT_FNS(overhead_sys, period_sys)
+HPP_PERCENT_FNS(overhead_us, period_us)
+HPP_PERCENT_FNS(overhead_guest_sys, period_guest_sys)
+HPP_PERCENT_FNS(overhead_guest_us, period_guest_us)
+HPP_PERCENT_ACC_FNS(overhead_acc, period)
+
+HPP_RAW_FNS(samples, nr_events)
+HPP_RAW_FNS(period, period)
+
+static int64_t hpp__nop_cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *a __maybe_unused,
+ struct hist_entry *b __maybe_unused)
+{
+ return 0;
+}
+
+static bool perf_hpp__is_hpp_entry(struct perf_hpp_fmt *a)
+{
+ return a->header == hpp__header_fn;
+}
+
+static bool hpp__equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+ if (!perf_hpp__is_hpp_entry(a) || !perf_hpp__is_hpp_entry(b))
+ return false;
+
+ return a->idx == b->idx;
+}
+
+#define HPP__COLOR_PRINT_FNS(_name, _fn, _idx) \
+ { \
+ .name = _name, \
+ .header = hpp__header_fn, \
+ .width = hpp__width_fn, \
+ .color = hpp__color_ ## _fn, \
+ .entry = hpp__entry_ ## _fn, \
+ .cmp = hpp__nop_cmp, \
+ .collapse = hpp__nop_cmp, \
+ .sort = hpp__sort_ ## _fn, \
+ .idx = PERF_HPP__ ## _idx, \
+ .equal = hpp__equal, \
+ }
+
+#define HPP__COLOR_ACC_PRINT_FNS(_name, _fn, _idx) \
+ { \
+ .name = _name, \
+ .header = hpp__header_fn, \
+ .width = hpp__width_fn, \
+ .color = hpp__color_ ## _fn, \
+ .entry = hpp__entry_ ## _fn, \
+ .cmp = hpp__nop_cmp, \
+ .collapse = hpp__nop_cmp, \
+ .sort = hpp__sort_ ## _fn, \
+ .idx = PERF_HPP__ ## _idx, \
+ .equal = hpp__equal, \
+ }
+
+#define HPP__PRINT_FNS(_name, _fn, _idx) \
+ { \
+ .name = _name, \
+ .header = hpp__header_fn, \
+ .width = hpp__width_fn, \
+ .entry = hpp__entry_ ## _fn, \
+ .cmp = hpp__nop_cmp, \
+ .collapse = hpp__nop_cmp, \
+ .sort = hpp__sort_ ## _fn, \
+ .idx = PERF_HPP__ ## _idx, \
+ .equal = hpp__equal, \
+ }
+
+struct perf_hpp_fmt perf_hpp__format[] = {
+ HPP__COLOR_PRINT_FNS("Overhead", overhead, OVERHEAD),
+ HPP__COLOR_PRINT_FNS("sys", overhead_sys, OVERHEAD_SYS),
+ HPP__COLOR_PRINT_FNS("usr", overhead_us, OVERHEAD_US),
+ HPP__COLOR_PRINT_FNS("guest sys", overhead_guest_sys, OVERHEAD_GUEST_SYS),
+ HPP__COLOR_PRINT_FNS("guest usr", overhead_guest_us, OVERHEAD_GUEST_US),
+ HPP__COLOR_ACC_PRINT_FNS("Children", overhead_acc, OVERHEAD_ACC),
+ HPP__PRINT_FNS("Samples", samples, SAMPLES),
+ HPP__PRINT_FNS("Period", period, PERIOD)
+};
+
+struct perf_hpp_list perf_hpp_list = {
+ .fields = LIST_HEAD_INIT(perf_hpp_list.fields),
+ .sorts = LIST_HEAD_INIT(perf_hpp_list.sorts),
+ .nr_header_lines = 1,
+};
+
+#undef HPP__COLOR_PRINT_FNS
+#undef HPP__COLOR_ACC_PRINT_FNS
+#undef HPP__PRINT_FNS
+
+#undef HPP_PERCENT_FNS
+#undef HPP_PERCENT_ACC_FNS
+#undef HPP_RAW_FNS
+
+#undef __HPP_HEADER_FN
+#undef __HPP_WIDTH_FN
+#undef __HPP_COLOR_PERCENT_FN
+#undef __HPP_ENTRY_PERCENT_FN
+#undef __HPP_COLOR_ACC_PERCENT_FN
+#undef __HPP_ENTRY_ACC_PERCENT_FN
+#undef __HPP_ENTRY_RAW_FN
+#undef __HPP_SORT_FN
+#undef __HPP_SORT_ACC_FN
+#undef __HPP_SORT_RAW_FN
+
+static void fmt_free(struct perf_hpp_fmt *fmt)
+{
+ /*
+ * At this point fmt should be completely
+ * unhooked, if not it's a bug.
+ */
+ BUG_ON(!list_empty(&fmt->list));
+ BUG_ON(!list_empty(&fmt->sort_list));
+
+ if (fmt->free)
+ fmt->free(fmt);
+}
+
+void perf_hpp__init(void)
+{
+ int i;
+
+ for (i = 0; i < PERF_HPP__MAX_INDEX; i++) {
+ struct perf_hpp_fmt *fmt = &perf_hpp__format[i];
+
+ INIT_LIST_HEAD(&fmt->list);
+
+ /* sort_list may be linked by setup_sorting() */
+ if (fmt->sort_list.next == NULL)
+ INIT_LIST_HEAD(&fmt->sort_list);
+ }
+
+ /*
+ * If user specified field order, no need to setup default fields.
+ */
+ if (is_strict_order(field_order))
+ return;
+
+ if (symbol_conf.cumulate_callchain) {
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_ACC);
+ perf_hpp__format[PERF_HPP__OVERHEAD].name = "Self";
+ }
+
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD);
+
+ if (symbol_conf.show_cpu_utilization) {
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_SYS);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_US);
+
+ if (perf_guest) {
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_SYS);
+ hpp_dimension__add_output(PERF_HPP__OVERHEAD_GUEST_US);
+ }
+ }
+
+ if (symbol_conf.show_nr_samples)
+ hpp_dimension__add_output(PERF_HPP__SAMPLES);
+
+ if (symbol_conf.show_total_period)
+ hpp_dimension__add_output(PERF_HPP__PERIOD);
+}
+
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+ struct perf_hpp_fmt *format)
+{
+ list_add_tail(&format->list, &list->fields);
+}
+
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+ struct perf_hpp_fmt *format)
+{
+ list_add_tail(&format->sort_list, &list->sorts);
+}
+
+void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list,
+ struct perf_hpp_fmt *format)
+{
+ list_add(&format->sort_list, &list->sorts);
+}
+
+static void perf_hpp__column_unregister(struct perf_hpp_fmt *format)
+{
+ list_del_init(&format->list);
+ fmt_free(format);
+}
+
+void perf_hpp__cancel_cumulate(void)
+{
+ struct perf_hpp_fmt *fmt, *acc, *ovh, *tmp;
+
+ if (is_strict_order(field_order))
+ return;
+
+ ovh = &perf_hpp__format[PERF_HPP__OVERHEAD];
+ acc = &perf_hpp__format[PERF_HPP__OVERHEAD_ACC];
+
+ perf_hpp_list__for_each_format_safe(&perf_hpp_list, fmt, tmp) {
+ if (acc->equal(acc, fmt)) {
+ perf_hpp__column_unregister(fmt);
+ continue;
+ }
+
+ if (ovh->equal(ovh, fmt))
+ fmt->name = "Overhead";
+ }
+}
+
+static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+ return a->equal && a->equal(a, b);
+}
+
+void perf_hpp__setup_output_field(struct perf_hpp_list *list)
+{
+ struct perf_hpp_fmt *fmt;
+
+ /* append sort keys to output field */
+ perf_hpp_list__for_each_sort_list(list, fmt) {
+ struct perf_hpp_fmt *pos;
+
+ /* skip sort-only fields ("sort_compute" in perf diff) */
+ if (!fmt->entry && !fmt->color)
+ continue;
+
+ perf_hpp_list__for_each_format(list, pos) {
+ if (fmt_equal(fmt, pos))
+ goto next;
+ }
+
+ perf_hpp__column_register(fmt);
+next:
+ continue;
+ }
+}
+
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list)
+{
+ struct perf_hpp_fmt *fmt;
+
+ /* append output fields to sort keys */
+ perf_hpp_list__for_each_format(list, fmt) {
+ struct perf_hpp_fmt *pos;
+
+ perf_hpp_list__for_each_sort_list(list, pos) {
+ if (fmt_equal(fmt, pos))
+ goto next;
+ }
+
+ perf_hpp__register_sort_field(fmt);
+next:
+ continue;
+ }
+}
+
+
+void perf_hpp__reset_output_field(struct perf_hpp_list *list)
+{
+ struct perf_hpp_fmt *fmt, *tmp;
+
+ /* reset output fields */
+ perf_hpp_list__for_each_format_safe(list, fmt, tmp) {
+ list_del_init(&fmt->list);
+ list_del_init(&fmt->sort_list);
+ fmt_free(fmt);
+ }
+
+ /* reset sort keys */
+ perf_hpp_list__for_each_sort_list_safe(list, fmt, tmp) {
+ list_del_init(&fmt->list);
+ list_del_init(&fmt->sort_list);
+ fmt_free(fmt);
+ }
+}
+
+/*
+ * See hists__fprintf to match the column widths
+ */
+unsigned int hists__sort_list_width(struct hists *hists)
+{
+ struct perf_hpp_fmt *fmt;
+ int ret = 0;
+ bool first = true;
+ struct perf_hpp dummy_hpp;
+
+ hists__for_each_format(hists, fmt) {
+ if (perf_hpp__should_skip(fmt, hists))
+ continue;
+
+ if (first)
+ first = false;
+ else
+ ret += 2;
+
+ ret += fmt->width(fmt, &dummy_hpp, hists);
+ }
+
+ if (verbose > 0 && hists__has(hists, sym)) /* Addr + origin */
+ ret += 3 + BITS_PER_LONG / 4;
+
+ return ret;
+}
+
+unsigned int hists__overhead_width(struct hists *hists)
+{
+ struct perf_hpp_fmt *fmt;
+ int ret = 0;
+ bool first = true;
+ struct perf_hpp dummy_hpp;
+
+ hists__for_each_format(hists, fmt) {
+ if (perf_hpp__is_sort_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+ break;
+
+ if (first)
+ first = false;
+ else
+ ret += 2;
+
+ ret += fmt->width(fmt, &dummy_hpp, hists);
+ }
+
+ return ret;
+}
+
+void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+ if (perf_hpp__is_sort_entry(fmt))
+ return perf_hpp__reset_sort_width(fmt, hists);
+
+ if (perf_hpp__is_dynamic_entry(fmt))
+ return;
+
+ BUG_ON(fmt->idx >= PERF_HPP__MAX_INDEX);
+
+ switch (fmt->idx) {
+ case PERF_HPP__OVERHEAD:
+ case PERF_HPP__OVERHEAD_SYS:
+ case PERF_HPP__OVERHEAD_US:
+ case PERF_HPP__OVERHEAD_ACC:
+ fmt->len = 8;
+ break;
+
+ case PERF_HPP__OVERHEAD_GUEST_SYS:
+ case PERF_HPP__OVERHEAD_GUEST_US:
+ fmt->len = 9;
+ break;
+
+ case PERF_HPP__SAMPLES:
+ case PERF_HPP__PERIOD:
+ fmt->len = 12;
+ break;
+
+ default:
+ break;
+ }
+}
+
+void hists__reset_column_width(struct hists *hists)
+{
+ struct perf_hpp_fmt *fmt;
+ struct perf_hpp_list_node *node;
+
+ hists__for_each_format(hists, fmt)
+ perf_hpp__reset_width(fmt, hists);
+
+ /* hierarchy entries have their own hpp list */
+ list_for_each_entry(node, &hists->hpp_formats, list) {
+ perf_hpp_list__for_each_format(&node->hpp, fmt)
+ perf_hpp__reset_width(fmt, hists);
+ }
+}
+
+void perf_hpp__set_user_width(const char *width_list_str)
+{
+ struct perf_hpp_fmt *fmt;
+ const char *ptr = width_list_str;
+
+ perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+ char *p;
+
+ int len = strtol(ptr, &p, 10);
+ fmt->user_len = len;
+
+ if (*p == ',')
+ ptr = p + 1;
+ else
+ break;
+ }
+}
+
+static int add_hierarchy_fmt(struct hists *hists, struct perf_hpp_fmt *fmt)
+{
+ struct perf_hpp_list_node *node = NULL;
+ struct perf_hpp_fmt *fmt_copy;
+ bool found = false;
+ bool skip = perf_hpp__should_skip(fmt, hists);
+
+ list_for_each_entry(node, &hists->hpp_formats, list) {
+ if (node->level == fmt->level) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ node = malloc(sizeof(*node));
+ if (node == NULL)
+ return -1;
+
+ node->skip = skip;
+ node->level = fmt->level;
+ perf_hpp_list__init(&node->hpp);
+
+ hists->nr_hpp_node++;
+ list_add_tail(&node->list, &hists->hpp_formats);
+ }
+
+ fmt_copy = perf_hpp_fmt__dup(fmt);
+ if (fmt_copy == NULL)
+ return -1;
+
+ if (!skip)
+ node->skip = false;
+
+ list_add_tail(&fmt_copy->list, &node->hpp.fields);
+ list_add_tail(&fmt_copy->sort_list, &node->hpp.sorts);
+
+ return 0;
+}
+
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+ struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ struct perf_hpp_fmt *fmt;
+ struct hists *hists;
+ int ret;
+
+ if (!symbol_conf.report_hierarchy)
+ return 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ hists = evsel__hists(evsel);
+
+ perf_hpp_list__for_each_sort_list(list, fmt) {
+ if (perf_hpp__is_dynamic_entry(fmt) &&
+ !perf_hpp__defined_dynamic_entry(fmt, hists))
+ continue;
+
+ ret = add_hierarchy_fmt(hists, fmt);
+ if (ret < 0)
+ return ret;
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/perf/ui/keysyms.h b/tools/perf/ui/keysyms.h
new file mode 100644
index 000000000..fbfac2907
--- /dev/null
+++ b/tools/perf/ui/keysyms.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_KEYSYMS_H_
+#define _PERF_KEYSYMS_H_ 1
+
+#include "libslang.h"
+
+#define K_DOWN SL_KEY_DOWN
+#define K_END SL_KEY_END
+#define K_ENTER '\r'
+#define K_ESC 033
+#define K_F1 SL_KEY_F(1)
+#define K_HOME SL_KEY_HOME
+#define K_LEFT SL_KEY_LEFT
+#define K_PGDN SL_KEY_NPAGE
+#define K_PGUP SL_KEY_PPAGE
+#define K_RIGHT SL_KEY_RIGHT
+#define K_TAB '\t'
+#define K_UNTAB SL_KEY_UNTAB
+#define K_UP SL_KEY_UP
+#define K_BKSPC 0x7f
+#define K_DEL SL_KEY_DELETE
+
+/* Not really keys */
+#define K_TIMER -1
+#define K_ERROR -2
+#define K_RESIZE -3
+#define K_SWITCH_INPUT_DATA -4
+
+#endif /* _PERF_KEYSYMS_H_ */
diff --git a/tools/perf/ui/libslang.h b/tools/perf/ui/libslang.h
new file mode 100644
index 000000000..c0686cda3
--- /dev/null
+++ b/tools/perf/ui/libslang.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_SLANG_H_
+#define _PERF_UI_SLANG_H_ 1
+/*
+ * slang versions <= 2.0.6 have a "#if HAVE_LONG_LONG" that breaks
+ * the build if it isn't defined. Use the equivalent one that glibc
+ * has on features.h.
+ */
+#include <features.h>
+#ifndef HAVE_LONG_LONG
+#define HAVE_LONG_LONG __GLIBC_HAVE_LONG_LONG
+#endif
+#include <slang.h>
+
+#if SLANG_VERSION < 20104
+#define slsmg_printf(msg, args...) \
+ SLsmg_printf((char *)(msg), ##args)
+#define slsmg_vprintf(msg, vargs) \
+ SLsmg_vprintf((char *)(msg), vargs)
+#define slsmg_write_nstring(msg, len) \
+ SLsmg_write_nstring((char *)(msg), len)
+#define sltt_set_color(obj, name, fg, bg) \
+ SLtt_set_color(obj,(char *)(name), (char *)(fg), (char *)(bg))
+#else
+#define slsmg_printf SLsmg_printf
+#define slsmg_vprintf SLsmg_vprintf
+#define slsmg_write_nstring SLsmg_write_nstring
+#define sltt_set_color SLtt_set_color
+#endif
+
+#define SL_KEY_UNTAB 0x1000
+
+#endif /* _PERF_UI_SLANG_H_ */
diff --git a/tools/perf/ui/progress.c b/tools/perf/ui/progress.c
new file mode 100644
index 000000000..bbfbc91a0
--- /dev/null
+++ b/tools/perf/ui/progress.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include "../cache.h"
+#include "progress.h"
+
+static void null_progress__update(struct ui_progress *p __maybe_unused)
+{
+}
+
+static struct ui_progress_ops null_progress__ops =
+{
+ .update = null_progress__update,
+};
+
+struct ui_progress_ops *ui_progress__ops = &null_progress__ops;
+
+void ui_progress__update(struct ui_progress *p, u64 adv)
+{
+ u64 last = p->curr;
+
+ p->curr += adv;
+
+ if (p->curr >= p->next) {
+ u64 nr = DIV_ROUND_UP(p->curr - last, p->step);
+
+ p->next += nr * p->step;
+ ui_progress__ops->update(p);
+ }
+}
+
+void __ui_progress__init(struct ui_progress *p, u64 total,
+ const char *title, bool size)
+{
+ p->curr = 0;
+ p->next = p->step = total / 16 ?: 1;
+ p->total = total;
+ p->title = title;
+ p->size = size;
+
+ if (ui_progress__ops->init)
+ ui_progress__ops->init(p);
+}
+
+void ui_progress__finish(void)
+{
+ if (ui_progress__ops->finish)
+ ui_progress__ops->finish();
+}
diff --git a/tools/perf/ui/progress.h b/tools/perf/ui/progress.h
new file mode 100644
index 000000000..4f52c37b2
--- /dev/null
+++ b/tools/perf/ui/progress.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_PROGRESS_H_
+#define _PERF_UI_PROGRESS_H_ 1
+
+#include <linux/types.h>
+
+void ui_progress__finish(void);
+
+struct ui_progress {
+ const char *title;
+ u64 curr, next, step, total;
+ bool size;
+};
+
+void __ui_progress__init(struct ui_progress *p, u64 total,
+ const char *title, bool size);
+
+#define ui_progress__init(p, total, title) \
+ __ui_progress__init(p, total, title, false)
+
+#define ui_progress__init_size(p, total, title) \
+ __ui_progress__init(p, total, title, true)
+
+void ui_progress__update(struct ui_progress *p, u64 adv);
+
+struct ui_progress_ops {
+ void (*init)(struct ui_progress *p);
+ void (*update)(struct ui_progress *p);
+ void (*finish)(void);
+};
+
+extern struct ui_progress_ops *ui_progress__ops;
+
+#endif
diff --git a/tools/perf/ui/setup.c b/tools/perf/ui/setup.c
new file mode 100644
index 000000000..44fe824e9
--- /dev/null
+++ b/tools/perf/ui/setup.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
+#include <dlfcn.h>
+
+#include "../util/cache.h"
+#include "../util/debug.h"
+#include "../util/hist.h"
+#include "../util/util.h"
+
+pthread_mutex_t ui__lock = PTHREAD_MUTEX_INITIALIZER;
+void *perf_gtk_handle;
+int use_browser = -1;
+
+#define PERF_GTK_DSO "libperf-gtk.so"
+
+#ifdef HAVE_GTK2_SUPPORT
+
+static int setup_gtk_browser(void)
+{
+ int (*perf_ui_init)(void);
+
+ if (perf_gtk_handle)
+ return 0;
+
+ perf_gtk_handle = dlopen(PERF_GTK_DSO, RTLD_LAZY);
+ if (perf_gtk_handle == NULL) {
+ char buf[PATH_MAX];
+ scnprintf(buf, sizeof(buf), "%s/%s", LIBDIR, PERF_GTK_DSO);
+ perf_gtk_handle = dlopen(buf, RTLD_LAZY);
+ }
+ if (perf_gtk_handle == NULL)
+ return -1;
+
+ perf_ui_init = dlsym(perf_gtk_handle, "perf_gtk__init");
+ if (perf_ui_init == NULL)
+ goto out_close;
+
+ if (perf_ui_init() == 0)
+ return 0;
+
+out_close:
+ dlclose(perf_gtk_handle);
+ return -1;
+}
+
+static void exit_gtk_browser(bool wait_for_ok)
+{
+ void (*perf_ui_exit)(bool);
+
+ if (perf_gtk_handle == NULL)
+ return;
+
+ perf_ui_exit = dlsym(perf_gtk_handle, "perf_gtk__exit");
+ if (perf_ui_exit == NULL)
+ goto out_close;
+
+ perf_ui_exit(wait_for_ok);
+
+out_close:
+ dlclose(perf_gtk_handle);
+
+ perf_gtk_handle = NULL;
+}
+#else
+static inline int setup_gtk_browser(void) { return -1; }
+static inline void exit_gtk_browser(bool wait_for_ok __maybe_unused) {}
+#endif
+
+int stdio__config_color(const struct option *opt __maybe_unused,
+ const char *mode, int unset __maybe_unused)
+{
+ perf_use_color_default = perf_config_colorbool("color.ui", mode, -1);
+ return 0;
+}
+
+void setup_browser(bool fallback_to_pager)
+{
+ if (use_browser < 2 && (!isatty(1) || dump_trace))
+ use_browser = 0;
+
+ /* default to TUI */
+ if (use_browser < 0)
+ use_browser = 1;
+
+ switch (use_browser) {
+ case 2:
+ if (setup_gtk_browser() == 0)
+ break;
+ printf("GTK browser requested but could not find %s\n",
+ PERF_GTK_DSO);
+ sleep(1);
+ /* fall through */
+ case 1:
+ use_browser = 1;
+ if (ui__init() == 0)
+ break;
+ /* fall through */
+ default:
+ use_browser = 0;
+ if (fallback_to_pager)
+ setup_pager();
+ break;
+ }
+}
+
+void exit_browser(bool wait_for_ok)
+{
+ switch (use_browser) {
+ case 2:
+ exit_gtk_browser(wait_for_ok);
+ break;
+
+ case 1:
+ ui__exit(wait_for_ok);
+ break;
+
+ default:
+ break;
+ }
+}
diff --git a/tools/perf/ui/stdio/hist.c b/tools/perf/ui/stdio/hist.c
new file mode 100644
index 000000000..74c4ae1f0
--- /dev/null
+++ b/tools/perf/ui/stdio/hist.c
@@ -0,0 +1,850 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <linux/string.h>
+
+#include "../../util/util.h"
+#include "../../util/hist.h"
+#include "../../util/sort.h"
+#include "../../util/evsel.h"
+#include "../../util/srcline.h"
+#include "../../util/string2.h"
+#include "../../util/thread.h"
+#include "../../util/sane_ctype.h"
+
+static size_t callchain__fprintf_left_margin(FILE *fp, int left_margin)
+{
+ int i;
+ int ret = fprintf(fp, " ");
+
+ for (i = 0; i < left_margin; i++)
+ ret += fprintf(fp, " ");
+
+ return ret;
+}
+
+static size_t ipchain__fprintf_graph_line(FILE *fp, int depth, int depth_mask,
+ int left_margin)
+{
+ int i;
+ size_t ret = callchain__fprintf_left_margin(fp, left_margin);
+
+ for (i = 0; i < depth; i++)
+ if (depth_mask & (1 << i))
+ ret += fprintf(fp, "| ");
+ else
+ ret += fprintf(fp, " ");
+
+ ret += fprintf(fp, "\n");
+
+ return ret;
+}
+
+static size_t ipchain__fprintf_graph(FILE *fp, struct callchain_node *node,
+ struct callchain_list *chain,
+ int depth, int depth_mask, int period,
+ u64 total_samples, int left_margin)
+{
+ int i;
+ size_t ret = 0;
+ char bf[1024], *alloc_str = NULL;
+ char buf[64];
+ const char *str;
+
+ ret += callchain__fprintf_left_margin(fp, left_margin);
+ for (i = 0; i < depth; i++) {
+ if (depth_mask & (1 << i))
+ ret += fprintf(fp, "|");
+ else
+ ret += fprintf(fp, " ");
+ if (!period && i == depth - 1) {
+ ret += fprintf(fp, "--");
+ ret += callchain_node__fprintf_value(node, fp, total_samples);
+ ret += fprintf(fp, "--");
+ } else
+ ret += fprintf(fp, "%s", " ");
+ }
+
+ str = callchain_list__sym_name(chain, bf, sizeof(bf), false);
+
+ if (symbol_conf.show_branchflag_count) {
+ callchain_list_counts__printf_value(chain, NULL,
+ buf, sizeof(buf));
+
+ if (asprintf(&alloc_str, "%s%s", str, buf) < 0)
+ str = "Not enough memory!";
+ else
+ str = alloc_str;
+ }
+
+ fputs(str, fp);
+ fputc('\n', fp);
+ free(alloc_str);
+
+ return ret;
+}
+
+static struct symbol *rem_sq_bracket;
+static struct callchain_list rem_hits;
+
+static void init_rem_hits(void)
+{
+ rem_sq_bracket = malloc(sizeof(*rem_sq_bracket) + 6);
+ if (!rem_sq_bracket) {
+ fprintf(stderr, "Not enough memory to display remaining hits\n");
+ return;
+ }
+
+ strcpy(rem_sq_bracket->name, "[...]");
+ rem_hits.ms.sym = rem_sq_bracket;
+}
+
+static size_t __callchain__fprintf_graph(FILE *fp, struct rb_root *root,
+ u64 total_samples, int depth,
+ int depth_mask, int left_margin)
+{
+ struct rb_node *node, *next;
+ struct callchain_node *child = NULL;
+ struct callchain_list *chain;
+ int new_depth_mask = depth_mask;
+ u64 remaining;
+ size_t ret = 0;
+ int i;
+ uint entries_printed = 0;
+ int cumul_count = 0;
+
+ remaining = total_samples;
+
+ node = rb_first(root);
+ while (node) {
+ u64 new_total;
+ u64 cumul;
+
+ child = rb_entry(node, struct callchain_node, rb_node);
+ cumul = callchain_cumul_hits(child);
+ remaining -= cumul;
+ cumul_count += callchain_cumul_counts(child);
+
+ /*
+ * The depth mask manages the output of pipes that show
+ * the depth. We don't want to keep the pipes of the current
+ * level for the last child of this depth.
+ * Except if we have remaining filtered hits. They will
+ * supersede the last child
+ */
+ next = rb_next(node);
+ if (!next && (callchain_param.mode != CHAIN_GRAPH_REL || !remaining))
+ new_depth_mask &= ~(1 << (depth - 1));
+
+ /*
+ * But we keep the older depth mask for the line separator
+ * to keep the level link until we reach the last child
+ */
+ ret += ipchain__fprintf_graph_line(fp, depth, depth_mask,
+ left_margin);
+ i = 0;
+ list_for_each_entry(chain, &child->val, list) {
+ ret += ipchain__fprintf_graph(fp, child, chain, depth,
+ new_depth_mask, i++,
+ total_samples,
+ left_margin);
+ }
+
+ if (callchain_param.mode == CHAIN_GRAPH_REL)
+ new_total = child->children_hit;
+ else
+ new_total = total_samples;
+
+ ret += __callchain__fprintf_graph(fp, &child->rb_root, new_total,
+ depth + 1,
+ new_depth_mask | (1 << depth),
+ left_margin);
+ node = next;
+ if (++entries_printed == callchain_param.print_limit)
+ break;
+ }
+
+ if (callchain_param.mode == CHAIN_GRAPH_REL &&
+ remaining && remaining != total_samples) {
+ struct callchain_node rem_node = {
+ .hit = remaining,
+ };
+
+ if (!rem_sq_bracket)
+ return ret;
+
+ if (callchain_param.value == CCVAL_COUNT && child && child->parent) {
+ rem_node.count = child->parent->children_count - cumul_count;
+ if (rem_node.count <= 0)
+ return ret;
+ }
+
+ new_depth_mask &= ~(1 << (depth - 1));
+ ret += ipchain__fprintf_graph(fp, &rem_node, &rem_hits, depth,
+ new_depth_mask, 0, total_samples,
+ left_margin);
+ }
+
+ return ret;
+}
+
+/*
+ * If have one single callchain root, don't bother printing
+ * its percentage (100 % in fractal mode and the same percentage
+ * than the hist in graph mode). This also avoid one level of column.
+ *
+ * However when percent-limit applied, it's possible that single callchain
+ * node have different (non-100% in fractal mode) percentage.
+ */
+static bool need_percent_display(struct rb_node *node, u64 parent_samples)
+{
+ struct callchain_node *cnode;
+
+ if (rb_next(node))
+ return true;
+
+ cnode = rb_entry(node, struct callchain_node, rb_node);
+ return callchain_cumul_hits(cnode) != parent_samples;
+}
+
+static size_t callchain__fprintf_graph(FILE *fp, struct rb_root *root,
+ u64 total_samples, u64 parent_samples,
+ int left_margin)
+{
+ struct callchain_node *cnode;
+ struct callchain_list *chain;
+ u32 entries_printed = 0;
+ bool printed = false;
+ struct rb_node *node;
+ int i = 0;
+ int ret = 0;
+ char bf[1024];
+
+ node = rb_first(root);
+ if (node && !need_percent_display(node, parent_samples)) {
+ cnode = rb_entry(node, struct callchain_node, rb_node);
+ list_for_each_entry(chain, &cnode->val, list) {
+ /*
+ * If we sort by symbol, the first entry is the same than
+ * the symbol. No need to print it otherwise it appears as
+ * displayed twice.
+ */
+ if (!i++ && field_order == NULL &&
+ sort_order && strstarts(sort_order, "sym"))
+ continue;
+
+ if (!printed) {
+ ret += callchain__fprintf_left_margin(fp, left_margin);
+ ret += fprintf(fp, "|\n");
+ ret += callchain__fprintf_left_margin(fp, left_margin);
+ ret += fprintf(fp, "---");
+ left_margin += 3;
+ printed = true;
+ } else
+ ret += callchain__fprintf_left_margin(fp, left_margin);
+
+ ret += fprintf(fp, "%s",
+ callchain_list__sym_name(chain, bf,
+ sizeof(bf),
+ false));
+
+ if (symbol_conf.show_branchflag_count)
+ ret += callchain_list_counts__printf_value(
+ chain, fp, NULL, 0);
+ ret += fprintf(fp, "\n");
+
+ if (++entries_printed == callchain_param.print_limit)
+ break;
+ }
+ root = &cnode->rb_root;
+ }
+
+ if (callchain_param.mode == CHAIN_GRAPH_REL)
+ total_samples = parent_samples;
+
+ ret += __callchain__fprintf_graph(fp, root, total_samples,
+ 1, 1, left_margin);
+ if (ret) {
+ /* do not add a blank line if it printed nothing */
+ ret += fprintf(fp, "\n");
+ }
+
+ return ret;
+}
+
+static size_t __callchain__fprintf_flat(FILE *fp, struct callchain_node *node,
+ u64 total_samples)
+{
+ struct callchain_list *chain;
+ size_t ret = 0;
+ char bf[1024];
+
+ if (!node)
+ return 0;
+
+ ret += __callchain__fprintf_flat(fp, node->parent, total_samples);
+
+
+ list_for_each_entry(chain, &node->val, list) {
+ if (chain->ip >= PERF_CONTEXT_MAX)
+ continue;
+ ret += fprintf(fp, " %s\n", callchain_list__sym_name(chain,
+ bf, sizeof(bf), false));
+ }
+
+ return ret;
+}
+
+static size_t callchain__fprintf_flat(FILE *fp, struct rb_root *tree,
+ u64 total_samples)
+{
+ size_t ret = 0;
+ u32 entries_printed = 0;
+ struct callchain_node *chain;
+ struct rb_node *rb_node = rb_first(tree);
+
+ while (rb_node) {
+ chain = rb_entry(rb_node, struct callchain_node, rb_node);
+
+ ret += fprintf(fp, " ");
+ ret += callchain_node__fprintf_value(chain, fp, total_samples);
+ ret += fprintf(fp, "\n");
+ ret += __callchain__fprintf_flat(fp, chain, total_samples);
+ ret += fprintf(fp, "\n");
+ if (++entries_printed == callchain_param.print_limit)
+ break;
+
+ rb_node = rb_next(rb_node);
+ }
+
+ return ret;
+}
+
+static size_t __callchain__fprintf_folded(FILE *fp, struct callchain_node *node)
+{
+ const char *sep = symbol_conf.field_sep ?: ";";
+ struct callchain_list *chain;
+ size_t ret = 0;
+ char bf[1024];
+ bool first;
+
+ if (!node)
+ return 0;
+
+ ret += __callchain__fprintf_folded(fp, node->parent);
+
+ first = (ret == 0);
+ list_for_each_entry(chain, &node->val, list) {
+ if (chain->ip >= PERF_CONTEXT_MAX)
+ continue;
+ ret += fprintf(fp, "%s%s", first ? "" : sep,
+ callchain_list__sym_name(chain,
+ bf, sizeof(bf), false));
+ first = false;
+ }
+
+ return ret;
+}
+
+static size_t callchain__fprintf_folded(FILE *fp, struct rb_root *tree,
+ u64 total_samples)
+{
+ size_t ret = 0;
+ u32 entries_printed = 0;
+ struct callchain_node *chain;
+ struct rb_node *rb_node = rb_first(tree);
+
+ while (rb_node) {
+
+ chain = rb_entry(rb_node, struct callchain_node, rb_node);
+
+ ret += callchain_node__fprintf_value(chain, fp, total_samples);
+ ret += fprintf(fp, " ");
+ ret += __callchain__fprintf_folded(fp, chain);
+ ret += fprintf(fp, "\n");
+ if (++entries_printed == callchain_param.print_limit)
+ break;
+
+ rb_node = rb_next(rb_node);
+ }
+
+ return ret;
+}
+
+static size_t hist_entry_callchain__fprintf(struct hist_entry *he,
+ u64 total_samples, int left_margin,
+ FILE *fp)
+{
+ u64 parent_samples = he->stat.period;
+
+ if (symbol_conf.cumulate_callchain)
+ parent_samples = he->stat_acc->period;
+
+ switch (callchain_param.mode) {
+ case CHAIN_GRAPH_REL:
+ return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples,
+ parent_samples, left_margin);
+ break;
+ case CHAIN_GRAPH_ABS:
+ return callchain__fprintf_graph(fp, &he->sorted_chain, total_samples,
+ parent_samples, left_margin);
+ break;
+ case CHAIN_FLAT:
+ return callchain__fprintf_flat(fp, &he->sorted_chain, total_samples);
+ break;
+ case CHAIN_FOLDED:
+ return callchain__fprintf_folded(fp, &he->sorted_chain, total_samples);
+ break;
+ case CHAIN_NONE:
+ break;
+ default:
+ pr_err("Bad callchain mode\n");
+ }
+
+ return 0;
+}
+
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list)
+{
+ const char *sep = symbol_conf.field_sep;
+ struct perf_hpp_fmt *fmt;
+ char *start = hpp->buf;
+ int ret;
+ bool first = true;
+
+ if (symbol_conf.exclude_other && !he->parent)
+ return 0;
+
+ perf_hpp_list__for_each_format(hpp_list, fmt) {
+ if (perf_hpp__should_skip(fmt, he->hists))
+ continue;
+
+ /*
+ * If there's no field_sep, we still need
+ * to display initial ' '.
+ */
+ if (!sep || !first) {
+ ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: " ");
+ advance_hpp(hpp, ret);
+ } else
+ first = false;
+
+ if (perf_hpp__use_color() && fmt->color)
+ ret = fmt->color(fmt, hpp, he);
+ else
+ ret = fmt->entry(fmt, hpp, he);
+
+ ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret);
+ advance_hpp(hpp, ret);
+ }
+
+ return hpp->buf - start;
+}
+
+static int hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp)
+{
+ return __hist_entry__snprintf(he, hpp, he->hists->hpp_list);
+}
+
+static int hist_entry__hierarchy_fprintf(struct hist_entry *he,
+ struct perf_hpp *hpp,
+ struct hists *hists,
+ FILE *fp)
+{
+ const char *sep = symbol_conf.field_sep;
+ struct perf_hpp_fmt *fmt;
+ struct perf_hpp_list_node *fmt_node;
+ char *buf = hpp->buf;
+ size_t size = hpp->size;
+ int ret, printed = 0;
+ bool first = true;
+
+ if (symbol_conf.exclude_other && !he->parent)
+ return 0;
+
+ ret = scnprintf(hpp->buf, hpp->size, "%*s", he->depth * HIERARCHY_INDENT, "");
+ advance_hpp(hpp, ret);
+
+ /* the first hpp_list_node is for overhead columns */
+ fmt_node = list_first_entry(&hists->hpp_formats,
+ struct perf_hpp_list_node, list);
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+ /*
+ * If there's no field_sep, we still need
+ * to display initial ' '.
+ */
+ if (!sep || !first) {
+ ret = scnprintf(hpp->buf, hpp->size, "%s", sep ?: " ");
+ advance_hpp(hpp, ret);
+ } else
+ first = false;
+
+ if (perf_hpp__use_color() && fmt->color)
+ ret = fmt->color(fmt, hpp, he);
+ else
+ ret = fmt->entry(fmt, hpp, he);
+
+ ret = hist_entry__snprintf_alignment(he, hpp, fmt, ret);
+ advance_hpp(hpp, ret);
+ }
+
+ if (!sep)
+ ret = scnprintf(hpp->buf, hpp->size, "%*s",
+ (hists->nr_hpp_node - 2) * HIERARCHY_INDENT, "");
+ advance_hpp(hpp, ret);
+
+ printed += fprintf(fp, "%s", buf);
+
+ perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+ hpp->buf = buf;
+ hpp->size = size;
+
+ /*
+ * No need to call hist_entry__snprintf_alignment() since this
+ * fmt is always the last column in the hierarchy mode.
+ */
+ if (perf_hpp__use_color() && fmt->color)
+ fmt->color(fmt, hpp, he);
+ else
+ fmt->entry(fmt, hpp, he);
+
+ /*
+ * dynamic entries are right-aligned but we want left-aligned
+ * in the hierarchy mode
+ */
+ printed += fprintf(fp, "%s%s", sep ?: " ", ltrim(buf));
+ }
+ printed += putc('\n', fp);
+
+ if (he->leaf && hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
+ u64 total = hists__total_period(hists);
+
+ printed += hist_entry_callchain__fprintf(he, total, 0, fp);
+ goto out;
+ }
+
+out:
+ return printed;
+}
+
+static int hist_entry__fprintf(struct hist_entry *he, size_t size,
+ char *bf, size_t bfsz, FILE *fp,
+ bool ignore_callchains)
+{
+ int ret;
+ int callchain_ret = 0;
+ struct perf_hpp hpp = {
+ .buf = bf,
+ .size = size,
+ };
+ struct hists *hists = he->hists;
+ u64 total_period = hists->stats.total_period;
+
+ if (size == 0 || size > bfsz)
+ size = hpp.size = bfsz;
+
+ if (symbol_conf.report_hierarchy)
+ return hist_entry__hierarchy_fprintf(he, &hpp, hists, fp);
+
+ hist_entry__snprintf(he, &hpp);
+
+ ret = fprintf(fp, "%s\n", bf);
+
+ if (hist_entry__has_callchains(he) && !ignore_callchains)
+ callchain_ret = hist_entry_callchain__fprintf(he, total_period,
+ 0, fp);
+
+ ret += callchain_ret;
+
+ return ret;
+}
+
+static int print_hierarchy_indent(const char *sep, int indent,
+ const char *line, FILE *fp)
+{
+ if (sep != NULL || indent < 2)
+ return 0;
+
+ return fprintf(fp, "%-.*s", (indent - 2) * HIERARCHY_INDENT, line);
+}
+
+static int hists__fprintf_hierarchy_headers(struct hists *hists,
+ struct perf_hpp *hpp, FILE *fp)
+{
+ bool first_node, first_col;
+ int indent;
+ int depth;
+ unsigned width = 0;
+ unsigned header_width = 0;
+ struct perf_hpp_fmt *fmt;
+ struct perf_hpp_list_node *fmt_node;
+ const char *sep = symbol_conf.field_sep;
+
+ indent = hists->nr_hpp_node;
+
+ /* preserve max indent depth for column headers */
+ print_hierarchy_indent(sep, indent, spaces, fp);
+
+ /* the first hpp_list_node is for overhead columns */
+ fmt_node = list_first_entry(&hists->hpp_formats,
+ struct perf_hpp_list_node, list);
+
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+ fmt->header(fmt, hpp, hists, 0, NULL);
+ fprintf(fp, "%s%s", hpp->buf, sep ?: " ");
+ }
+
+ /* combine sort headers with ' / ' */
+ first_node = true;
+ list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+ if (!first_node)
+ header_width += fprintf(fp, " / ");
+ first_node = false;
+
+ first_col = true;
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+ if (perf_hpp__should_skip(fmt, hists))
+ continue;
+
+ if (!first_col)
+ header_width += fprintf(fp, "+");
+ first_col = false;
+
+ fmt->header(fmt, hpp, hists, 0, NULL);
+
+ header_width += fprintf(fp, "%s", trim(hpp->buf));
+ }
+ }
+
+ fprintf(fp, "\n# ");
+
+ /* preserve max indent depth for initial dots */
+ print_hierarchy_indent(sep, indent, dots, fp);
+
+ /* the first hpp_list_node is for overhead columns */
+ fmt_node = list_first_entry(&hists->hpp_formats,
+ struct perf_hpp_list_node, list);
+
+ first_col = true;
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+ if (!first_col)
+ fprintf(fp, "%s", sep ?: "..");
+ first_col = false;
+
+ width = fmt->width(fmt, hpp, hists);
+ fprintf(fp, "%.*s", width, dots);
+ }
+
+ depth = 0;
+ list_for_each_entry_continue(fmt_node, &hists->hpp_formats, list) {
+ first_col = true;
+ width = depth * HIERARCHY_INDENT;
+
+ perf_hpp_list__for_each_format(&fmt_node->hpp, fmt) {
+ if (perf_hpp__should_skip(fmt, hists))
+ continue;
+
+ if (!first_col)
+ width++; /* for '+' sign between column header */
+ first_col = false;
+
+ width += fmt->width(fmt, hpp, hists);
+ }
+
+ if (width > header_width)
+ header_width = width;
+
+ depth++;
+ }
+
+ fprintf(fp, "%s%-.*s", sep ?: " ", header_width, dots);
+
+ fprintf(fp, "\n#\n");
+
+ return 2;
+}
+
+static void fprintf_line(struct hists *hists, struct perf_hpp *hpp,
+ int line, FILE *fp)
+{
+ struct perf_hpp_fmt *fmt;
+ const char *sep = symbol_conf.field_sep;
+ bool first = true;
+ int span = 0;
+
+ hists__for_each_format(hists, fmt) {
+ if (perf_hpp__should_skip(fmt, hists))
+ continue;
+
+ if (!first && !span)
+ fprintf(fp, "%s", sep ?: " ");
+ else
+ first = false;
+
+ fmt->header(fmt, hpp, hists, line, &span);
+
+ if (!span)
+ fprintf(fp, "%s", hpp->buf);
+ }
+}
+
+static int
+hists__fprintf_standard_headers(struct hists *hists,
+ struct perf_hpp *hpp,
+ FILE *fp)
+{
+ struct perf_hpp_list *hpp_list = hists->hpp_list;
+ struct perf_hpp_fmt *fmt;
+ unsigned int width;
+ const char *sep = symbol_conf.field_sep;
+ bool first = true;
+ int line;
+
+ for (line = 0; line < hpp_list->nr_header_lines; line++) {
+ /* first # is displayed one level up */
+ if (line)
+ fprintf(fp, "# ");
+ fprintf_line(hists, hpp, line, fp);
+ fprintf(fp, "\n");
+ }
+
+ if (sep)
+ return hpp_list->nr_header_lines;
+
+ first = true;
+
+ fprintf(fp, "# ");
+
+ hists__for_each_format(hists, fmt) {
+ unsigned int i;
+
+ if (perf_hpp__should_skip(fmt, hists))
+ continue;
+
+ if (!first)
+ fprintf(fp, "%s", sep ?: " ");
+ else
+ first = false;
+
+ width = fmt->width(fmt, hpp, hists);
+ for (i = 0; i < width; i++)
+ fprintf(fp, ".");
+ }
+
+ fprintf(fp, "\n");
+ fprintf(fp, "#\n");
+ return hpp_list->nr_header_lines + 2;
+}
+
+int hists__fprintf_headers(struct hists *hists, FILE *fp)
+{
+ char bf[1024];
+ struct perf_hpp dummy_hpp = {
+ .buf = bf,
+ .size = sizeof(bf),
+ };
+
+ fprintf(fp, "# ");
+
+ if (symbol_conf.report_hierarchy)
+ return hists__fprintf_hierarchy_headers(hists, &dummy_hpp, fp);
+ else
+ return hists__fprintf_standard_headers(hists, &dummy_hpp, fp);
+
+}
+
+size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
+ int max_cols, float min_pcnt, FILE *fp,
+ bool ignore_callchains)
+{
+ struct rb_node *nd;
+ size_t ret = 0;
+ const char *sep = symbol_conf.field_sep;
+ int nr_rows = 0;
+ size_t linesz;
+ char *line = NULL;
+ unsigned indent;
+
+ init_rem_hits();
+
+ hists__reset_column_width(hists);
+
+ if (symbol_conf.col_width_list_str)
+ perf_hpp__set_user_width(symbol_conf.col_width_list_str);
+
+ if (show_header)
+ nr_rows += hists__fprintf_headers(hists, fp);
+
+ if (max_rows && nr_rows >= max_rows)
+ goto out;
+
+ linesz = hists__sort_list_width(hists) + 3 + 1;
+ linesz += perf_hpp__color_overhead();
+ line = malloc(linesz);
+ if (line == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ indent = hists__overhead_width(hists) + 4;
+
+ for (nd = rb_first(&hists->entries); nd; nd = __rb_hierarchy_next(nd, HMD_FORCE_CHILD)) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+ float percent;
+
+ if (h->filtered)
+ continue;
+
+ percent = hist_entry__get_percent_limit(h);
+ if (percent < min_pcnt)
+ continue;
+
+ ret += hist_entry__fprintf(h, max_cols, line, linesz, fp, ignore_callchains);
+
+ if (max_rows && ++nr_rows >= max_rows)
+ break;
+
+ /*
+ * If all children are filtered out or percent-limited,
+ * display "no entry >= x.xx%" message.
+ */
+ if (!h->leaf && !hist_entry__has_hierarchy_children(h, min_pcnt)) {
+ int depth = hists->nr_hpp_node + h->depth + 1;
+
+ print_hierarchy_indent(sep, depth, spaces, fp);
+ fprintf(fp, "%*sno entry >= %.2f%%\n", indent, "", min_pcnt);
+
+ if (max_rows && ++nr_rows >= max_rows)
+ break;
+ }
+
+ if (h->ms.map == NULL && verbose > 1) {
+ map_groups__fprintf(h->thread->mg, fp);
+ fprintf(fp, "%.10s end\n", graph_dotted_line);
+ }
+ }
+
+ free(line);
+out:
+ zfree(&rem_sq_bracket);
+
+ return ret;
+}
+
+size_t events_stats__fprintf(struct events_stats *stats, FILE *fp)
+{
+ int i;
+ size_t ret = 0;
+
+ for (i = 0; i < PERF_RECORD_HEADER_MAX; ++i) {
+ const char *name;
+
+ name = perf_event__name(i);
+ if (!strcmp(name, "UNKNOWN"))
+ continue;
+
+ ret += fprintf(fp, "%16s events: %10d\n", name, stats->nr_events[i]);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/ui/tui/Build b/tools/perf/ui/tui/Build
new file mode 100644
index 000000000..9e4c6ca41
--- /dev/null
+++ b/tools/perf/ui/tui/Build
@@ -0,0 +1,4 @@
+libperf-y += setup.o
+libperf-y += util.o
+libperf-y += helpline.o
+libperf-y += progress.o
diff --git a/tools/perf/ui/tui/helpline.c b/tools/perf/ui/tui/helpline.c
new file mode 100644
index 000000000..93d6b7240
--- /dev/null
+++ b/tools/perf/ui/tui/helpline.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "../../util/debug.h"
+#include "../helpline.h"
+#include "../ui.h"
+#include "../libslang.h"
+
+char ui_helpline__last_msg[1024];
+bool tui_helpline__set;
+
+static void tui_helpline__pop(void)
+{
+}
+
+static void tui_helpline__push(const char *msg)
+{
+ const size_t sz = sizeof(ui_helpline__current);
+
+ SLsmg_gotorc(SLtt_Screen_Rows - 1, 0);
+ SLsmg_set_color(0);
+ SLsmg_write_nstring((char *)msg, SLtt_Screen_Cols);
+ SLsmg_refresh();
+ strlcpy(ui_helpline__current, msg, sz);
+}
+
+static int tui_helpline__show(const char *format, va_list ap)
+{
+ int ret;
+ static int backlog;
+
+ pthread_mutex_lock(&ui__lock);
+ ret = vscnprintf(ui_helpline__last_msg + backlog,
+ sizeof(ui_helpline__last_msg) - backlog, format, ap);
+ backlog += ret;
+
+ tui_helpline__set = true;
+
+ if (ui_helpline__last_msg[backlog - 1] == '\n') {
+ ui_helpline__puts(ui_helpline__last_msg);
+ SLsmg_refresh();
+ backlog = 0;
+ }
+ pthread_mutex_unlock(&ui__lock);
+
+ return ret;
+}
+
+struct ui_helpline tui_helpline_fns = {
+ .pop = tui_helpline__pop,
+ .push = tui_helpline__push,
+ .show = tui_helpline__show,
+};
+
+void ui_helpline__init(void)
+{
+ helpline_fns = &tui_helpline_fns;
+ ui_helpline__puts(" ");
+}
diff --git a/tools/perf/ui/tui/progress.c b/tools/perf/ui/tui/progress.c
new file mode 100644
index 000000000..bc134b828
--- /dev/null
+++ b/tools/perf/ui/tui/progress.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include "../cache.h"
+#include "../progress.h"
+#include "../libslang.h"
+#include "../ui.h"
+#include "tui.h"
+#include "units.h"
+#include "../browser.h"
+
+static void __tui_progress__init(struct ui_progress *p)
+{
+ p->next = p->step = p->total / (SLtt_Screen_Cols - 2) ?: 1;
+}
+
+static int get_title(struct ui_progress *p, char *buf, size_t size)
+{
+ char buf_cur[20];
+ char buf_tot[20];
+ int ret;
+
+ ret = unit_number__scnprintf(buf_cur, sizeof(buf_cur), p->curr);
+ ret += unit_number__scnprintf(buf_tot, sizeof(buf_tot), p->total);
+
+ return ret + scnprintf(buf, size, "%s [%s/%s]",
+ p->title, buf_cur, buf_tot);
+}
+
+static void tui_progress__update(struct ui_progress *p)
+{
+ char buf[100], *title = (char *) p->title;
+ int bar, y;
+ /*
+ * FIXME: We should have a per UI backend way of showing progress,
+ * stdio will just show a percentage as NN%, etc.
+ */
+ if (use_browser <= 0)
+ return;
+
+ if (p->total == 0)
+ return;
+
+ if (p->size) {
+ get_title(p, buf, sizeof(buf));
+ title = buf;
+ }
+
+ ui__refresh_dimensions(false);
+ pthread_mutex_lock(&ui__lock);
+ y = SLtt_Screen_Rows / 2 - 2;
+ SLsmg_set_color(0);
+ SLsmg_draw_box(y, 0, 3, SLtt_Screen_Cols);
+ SLsmg_gotorc(y++, 1);
+ SLsmg_write_string(title);
+ SLsmg_fill_region(y, 1, 1, SLtt_Screen_Cols - 2, ' ');
+ SLsmg_set_color(HE_COLORSET_SELECTED);
+ bar = ((SLtt_Screen_Cols - 2) * p->curr) / p->total;
+ SLsmg_fill_region(y, 1, 1, bar, ' ');
+ SLsmg_refresh();
+ pthread_mutex_unlock(&ui__lock);
+}
+
+static void tui_progress__finish(void)
+{
+ int y;
+
+ if (use_browser <= 0)
+ return;
+
+ ui__refresh_dimensions(false);
+ pthread_mutex_lock(&ui__lock);
+ y = SLtt_Screen_Rows / 2 - 2;
+ SLsmg_set_color(0);
+ SLsmg_fill_region(y, 0, 3, SLtt_Screen_Cols, ' ');
+ SLsmg_refresh();
+ pthread_mutex_unlock(&ui__lock);
+}
+
+static struct ui_progress_ops tui_progress__ops = {
+ .init = __tui_progress__init,
+ .update = tui_progress__update,
+ .finish = tui_progress__finish,
+};
+
+void tui_progress__init(void)
+{
+ ui_progress__ops = &tui_progress__ops;
+}
diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c
new file mode 100644
index 000000000..d4ac41679
--- /dev/null
+++ b/tools/perf/ui/tui/setup.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <linux/kernel.h>
+#ifdef HAVE_BACKTRACE_SUPPORT
+#include <execinfo.h>
+#endif
+
+#include "../../util/cache.h"
+#include "../../util/debug.h"
+#include "../../util/util.h"
+#include "../browser.h"
+#include "../helpline.h"
+#include "../ui.h"
+#include "../util.h"
+#include "../libslang.h"
+#include "../keysyms.h"
+#include "tui.h"
+
+static volatile int ui__need_resize;
+
+extern struct perf_error_ops perf_tui_eops;
+extern bool tui_helpline__set;
+
+extern void hist_browser__init_hpp(void);
+
+void ui__refresh_dimensions(bool force)
+{
+ if (force || ui__need_resize) {
+ ui__need_resize = 0;
+ pthread_mutex_lock(&ui__lock);
+ SLtt_get_screen_size();
+ SLsmg_reinit_smg();
+ pthread_mutex_unlock(&ui__lock);
+ }
+}
+
+static void ui__sigwinch(int sig __maybe_unused)
+{
+ ui__need_resize = 1;
+}
+
+static void ui__setup_sigwinch(void)
+{
+ static bool done;
+
+ if (done)
+ return;
+
+ done = true;
+ pthread__unblock_sigwinch();
+ signal(SIGWINCH, ui__sigwinch);
+}
+
+int ui__getch(int delay_secs)
+{
+ struct timeval timeout, *ptimeout = delay_secs ? &timeout : NULL;
+ fd_set read_set;
+ int err, key;
+
+ ui__setup_sigwinch();
+
+ FD_ZERO(&read_set);
+ FD_SET(0, &read_set);
+
+ if (delay_secs) {
+ timeout.tv_sec = delay_secs;
+ timeout.tv_usec = 0;
+ }
+
+ err = select(1, &read_set, NULL, NULL, ptimeout);
+
+ if (err == 0)
+ return K_TIMER;
+
+ if (err == -1) {
+ if (errno == EINTR)
+ return K_RESIZE;
+ return K_ERROR;
+ }
+
+ key = SLang_getkey();
+ if (key != K_ESC)
+ return key;
+
+ FD_ZERO(&read_set);
+ FD_SET(0, &read_set);
+ timeout.tv_sec = 0;
+ timeout.tv_usec = 20;
+ err = select(1, &read_set, NULL, NULL, &timeout);
+ if (err == 0)
+ return K_ESC;
+
+ SLang_ungetkey(key);
+ return SLkp_getkey();
+}
+
+#ifdef HAVE_BACKTRACE_SUPPORT
+static void ui__signal_backtrace(int sig)
+{
+ void *stackdump[32];
+ size_t size;
+
+ ui__exit(false);
+ psignal(sig, "perf");
+
+ printf("-------- backtrace --------\n");
+ size = backtrace(stackdump, ARRAY_SIZE(stackdump));
+ backtrace_symbols_fd(stackdump, size, STDOUT_FILENO);
+
+ exit(0);
+}
+#else
+# define ui__signal_backtrace ui__signal
+#endif
+
+static void ui__signal(int sig)
+{
+ ui__exit(false);
+ psignal(sig, "perf");
+ exit(0);
+}
+
+int ui__init(void)
+{
+ int err;
+
+ SLutf8_enable(-1);
+ SLtt_get_terminfo();
+ SLtt_get_screen_size();
+
+ err = SLsmg_init_smg();
+ if (err < 0)
+ goto out;
+ err = SLang_init_tty(-1, 0, 0);
+ if (err < 0)
+ goto out;
+
+ err = SLkp_init();
+ if (err < 0) {
+ pr_err("TUI initialization failed.\n");
+ goto out;
+ }
+
+ SLkp_define_keysym((char *)"^(kB)", SL_KEY_UNTAB);
+
+ signal(SIGSEGV, ui__signal_backtrace);
+ signal(SIGFPE, ui__signal_backtrace);
+ signal(SIGINT, ui__signal);
+ signal(SIGQUIT, ui__signal);
+ signal(SIGTERM, ui__signal);
+
+ perf_error__register(&perf_tui_eops);
+
+ ui_helpline__init();
+ ui_browser__init();
+ tui_progress__init();
+
+ hist_browser__init_hpp();
+out:
+ return err;
+}
+
+void ui__exit(bool wait_for_ok)
+{
+ if (wait_for_ok && tui_helpline__set)
+ ui__question_window("Fatal Error",
+ ui_helpline__last_msg,
+ "Press any key...", 0);
+
+ SLtt_set_cursor_visibility(1);
+ SLsmg_refresh();
+ SLsmg_reset_smg();
+ SLang_reset_tty();
+
+ perf_error__unregister(&perf_tui_eops);
+}
diff --git a/tools/perf/ui/tui/tui.h b/tools/perf/ui/tui/tui.h
new file mode 100644
index 000000000..8de06f634
--- /dev/null
+++ b/tools/perf/ui/tui/tui.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_TUI_H_
+#define _PERF_TUI_H_ 1
+
+void tui_progress__init(void);
+
+#endif /* _PERF_TUI_H_ */
diff --git a/tools/perf/ui/tui/util.c b/tools/perf/ui/tui/util.c
new file mode 100644
index 000000000..b9794d618
--- /dev/null
+++ b/tools/perf/ui/tui/util.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../../util/util.h"
+#include <signal.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/ttydefaults.h>
+
+#include "../../util/cache.h"
+#include "../../util/debug.h"
+#include "../browser.h"
+#include "../keysyms.h"
+#include "../helpline.h"
+#include "../ui.h"
+#include "../util.h"
+#include "../libslang.h"
+
+static void ui_browser__argv_write(struct ui_browser *browser,
+ void *entry, int row)
+{
+ char **arg = entry;
+ bool current_entry = ui_browser__is_current_entry(browser, row);
+
+ ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
+ HE_COLORSET_NORMAL);
+ ui_browser__write_nstring(browser, *arg, browser->width);
+}
+
+static int popup_menu__run(struct ui_browser *menu)
+{
+ int key;
+
+ if (ui_browser__show(menu, " ", "ESC: exit, ENTER|->: Select option") < 0)
+ return -1;
+
+ while (1) {
+ key = ui_browser__run(menu, 0);
+
+ switch (key) {
+ case K_RIGHT:
+ case K_ENTER:
+ key = menu->index;
+ break;
+ case K_LEFT:
+ case K_ESC:
+ case 'q':
+ case CTRL('c'):
+ key = -1;
+ break;
+ default:
+ continue;
+ }
+
+ break;
+ }
+
+ ui_browser__hide(menu);
+ return key;
+}
+
+int ui__popup_menu(int argc, char * const argv[])
+{
+ struct ui_browser menu = {
+ .entries = (void *)argv,
+ .refresh = ui_browser__argv_refresh,
+ .seek = ui_browser__argv_seek,
+ .write = ui_browser__argv_write,
+ .nr_entries = argc,
+ };
+
+ return popup_menu__run(&menu);
+}
+
+int ui_browser__input_window(const char *title, const char *text, char *input,
+ const char *exit_msg, int delay_secs)
+{
+ int x, y, len, key;
+ int max_len = 60, nr_lines = 0;
+ static char buf[50];
+ const char *t;
+
+ t = text;
+ while (1) {
+ const char *sep = strchr(t, '\n');
+
+ if (sep == NULL)
+ sep = strchr(t, '\0');
+ len = sep - t;
+ if (max_len < len)
+ max_len = len;
+ ++nr_lines;
+ if (*sep == '\0')
+ break;
+ t = sep + 1;
+ }
+
+ pthread_mutex_lock(&ui__lock);
+
+ max_len += 2;
+ nr_lines += 8;
+ y = SLtt_Screen_Rows / 2 - nr_lines / 2;
+ x = SLtt_Screen_Cols / 2 - max_len / 2;
+
+ SLsmg_set_color(0);
+ SLsmg_draw_box(y, x++, nr_lines, max_len);
+ if (title) {
+ SLsmg_gotorc(y, x + 1);
+ SLsmg_write_string((char *)title);
+ }
+ SLsmg_gotorc(++y, x);
+ nr_lines -= 7;
+ max_len -= 2;
+ SLsmg_write_wrapped_string((unsigned char *)text, y, x,
+ nr_lines, max_len, 1);
+ y += nr_lines;
+ len = 5;
+ while (len--) {
+ SLsmg_gotorc(y + len - 1, x);
+ SLsmg_write_nstring((char *)" ", max_len);
+ }
+ SLsmg_draw_box(y++, x + 1, 3, max_len - 2);
+
+ SLsmg_gotorc(y + 3, x);
+ SLsmg_write_nstring((char *)exit_msg, max_len);
+ SLsmg_refresh();
+
+ pthread_mutex_unlock(&ui__lock);
+
+ x += 2;
+ len = 0;
+ key = ui__getch(delay_secs);
+ while (key != K_TIMER && key != K_ENTER && key != K_ESC) {
+ pthread_mutex_lock(&ui__lock);
+
+ if (key == K_BKSPC) {
+ if (len == 0) {
+ pthread_mutex_unlock(&ui__lock);
+ goto next_key;
+ }
+ SLsmg_gotorc(y, x + --len);
+ SLsmg_write_char(' ');
+ } else {
+ buf[len] = key;
+ SLsmg_gotorc(y, x + len++);
+ SLsmg_write_char(key);
+ }
+ SLsmg_refresh();
+
+ pthread_mutex_unlock(&ui__lock);
+
+ /* XXX more graceful overflow handling needed */
+ if (len == sizeof(buf) - 1) {
+ ui_helpline__push("maximum size of symbol name reached!");
+ key = K_ENTER;
+ break;
+ }
+next_key:
+ key = ui__getch(delay_secs);
+ }
+
+ buf[len] = '\0';
+ strncpy(input, buf, len+1);
+ return key;
+}
+
+int ui__question_window(const char *title, const char *text,
+ const char *exit_msg, int delay_secs)
+{
+ int x, y;
+ int max_len = 0, nr_lines = 0;
+ const char *t;
+
+ t = text;
+ while (1) {
+ const char *sep = strchr(t, '\n');
+ int len;
+
+ if (sep == NULL)
+ sep = strchr(t, '\0');
+ len = sep - t;
+ if (max_len < len)
+ max_len = len;
+ ++nr_lines;
+ if (*sep == '\0')
+ break;
+ t = sep + 1;
+ }
+
+ pthread_mutex_lock(&ui__lock);
+
+ max_len += 2;
+ nr_lines += 4;
+ y = SLtt_Screen_Rows / 2 - nr_lines / 2,
+ x = SLtt_Screen_Cols / 2 - max_len / 2;
+
+ SLsmg_set_color(0);
+ SLsmg_draw_box(y, x++, nr_lines, max_len);
+ if (title) {
+ SLsmg_gotorc(y, x + 1);
+ SLsmg_write_string((char *)title);
+ }
+ SLsmg_gotorc(++y, x);
+ nr_lines -= 2;
+ max_len -= 2;
+ SLsmg_write_wrapped_string((unsigned char *)text, y, x,
+ nr_lines, max_len, 1);
+ SLsmg_gotorc(y + nr_lines - 2, x);
+ SLsmg_write_nstring((char *)" ", max_len);
+ SLsmg_gotorc(y + nr_lines - 1, x);
+ SLsmg_write_nstring((char *)exit_msg, max_len);
+ SLsmg_refresh();
+
+ pthread_mutex_unlock(&ui__lock);
+
+ return ui__getch(delay_secs);
+}
+
+int ui__help_window(const char *text)
+{
+ return ui__question_window("Help", text, "Press any key...", 0);
+}
+
+int ui__dialog_yesno(const char *msg)
+{
+ return ui__question_window(NULL, msg, "Enter: Yes, ESC: No", 0);
+}
+
+static int __ui__warning(const char *title, const char *format, va_list args)
+{
+ char *s;
+
+ if (vasprintf(&s, format, args) > 0) {
+ int key;
+
+ key = ui__question_window(title, s, "Press any key...", 0);
+ free(s);
+ return key;
+ }
+
+ fprintf(stderr, "%s\n", title);
+ vfprintf(stderr, format, args);
+ return K_ESC;
+}
+
+static int perf_tui__error(const char *format, va_list args)
+{
+ return __ui__warning("Error:", format, args);
+}
+
+static int perf_tui__warning(const char *format, va_list args)
+{
+ return __ui__warning("Warning:", format, args);
+}
+
+struct perf_error_ops perf_tui_eops = {
+ .error = perf_tui__error,
+ .warning = perf_tui__warning,
+};
diff --git a/tools/perf/ui/ui.h b/tools/perf/ui/ui.h
new file mode 100644
index 000000000..9b6fdf06e
--- /dev/null
+++ b/tools/perf/ui/ui.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_H_
+#define _PERF_UI_H_ 1
+
+#include <pthread.h>
+#include <stdbool.h>
+#include <linux/compiler.h>
+
+extern pthread_mutex_t ui__lock;
+extern void *perf_gtk_handle;
+
+extern int use_browser;
+
+void setup_browser(bool fallback_to_pager);
+void exit_browser(bool wait_for_ok);
+
+#ifdef HAVE_SLANG_SUPPORT
+int ui__init(void);
+void ui__exit(bool wait_for_ok);
+#else
+static inline int ui__init(void)
+{
+ return -1;
+}
+static inline void ui__exit(bool wait_for_ok __maybe_unused) {}
+#endif
+
+void ui__refresh_dimensions(bool force);
+
+struct option;
+
+int stdio__config_color(const struct option *opt, const char *mode, int unset);
+
+#endif /* _PERF_UI_H_ */
diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c
new file mode 100644
index 000000000..63bf06e80
--- /dev/null
+++ b/tools/perf/ui/util.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include "../debug.h"
+
+
+/*
+ * Default error logging functions
+ */
+static int perf_stdio__error(const char *format, va_list args)
+{
+ fprintf(stderr, "Error:\n");
+ vfprintf(stderr, format, args);
+ return 0;
+}
+
+static int perf_stdio__warning(const char *format, va_list args)
+{
+ fprintf(stderr, "Warning:\n");
+ vfprintf(stderr, format, args);
+ return 0;
+}
+
+static struct perf_error_ops default_eops =
+{
+ .error = perf_stdio__error,
+ .warning = perf_stdio__warning,
+};
+
+static struct perf_error_ops *perf_eops = &default_eops;
+
+
+int ui__error(const char *format, ...)
+{
+ int ret;
+ va_list args;
+
+ va_start(args, format);
+ ret = perf_eops->error(format, args);
+ va_end(args);
+
+ return ret;
+}
+
+int ui__warning(const char *format, ...)
+{
+ int ret;
+ va_list args;
+
+ va_start(args, format);
+ ret = perf_eops->warning(format, args);
+ va_end(args);
+
+ return ret;
+}
+
+/**
+ * perf_error__register - Register error logging functions
+ * @eops: The pointer to error logging function struct
+ *
+ * Register UI-specific error logging functions. Before calling this,
+ * other logging functions should be unregistered, if any.
+ */
+int perf_error__register(struct perf_error_ops *eops)
+{
+ if (perf_eops != &default_eops)
+ return -1;
+
+ perf_eops = eops;
+ return 0;
+}
+
+/**
+ * perf_error__unregister - Unregister error logging functions
+ * @eops: The pointer to error logging function struct
+ *
+ * Unregister already registered error logging functions.
+ */
+int perf_error__unregister(struct perf_error_ops *eops)
+{
+ if (perf_eops != eops)
+ return -1;
+
+ perf_eops = &default_eops;
+ return 0;
+}
diff --git a/tools/perf/ui/util.h b/tools/perf/ui/util.h
new file mode 100644
index 000000000..5e44223b5
--- /dev/null
+++ b/tools/perf/ui/util.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UI_UTIL_H_
+#define _PERF_UI_UTIL_H_ 1
+
+#include <stdarg.h>
+
+int ui__getch(int delay_secs);
+int ui__popup_menu(int argc, char * const argv[]);
+int ui__help_window(const char *text);
+int ui__dialog_yesno(const char *msg);
+int ui__question_window(const char *title, const char *text,
+ const char *exit_msg, int delay_secs);
+
+struct perf_error_ops {
+ int (*error)(const char *format, va_list args);
+ int (*warning)(const char *format, va_list args);
+};
+
+int perf_error__register(struct perf_error_ops *eops);
+int perf_error__unregister(struct perf_error_ops *eops);
+
+#endif /* _PERF_UI_UTIL_H_ */
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
new file mode 100644
index 000000000..4eaac6aaa
--- /dev/null
+++ b/tools/perf/util/Build
@@ -0,0 +1,224 @@
+libperf-y += annotate.o
+libperf-y += block-range.o
+libperf-y += build-id.o
+libperf-y += config.o
+libperf-y += ctype.o
+libperf-y += db-export.o
+libperf-y += env.o
+libperf-y += event.o
+libperf-y += evlist.o
+libperf-y += evsel.o
+libperf-y += evsel_fprintf.o
+libperf-y += find_bit.o
+libperf-y += get_current_dir_name.o
+libperf-y += kallsyms.o
+libperf-y += levenshtein.o
+libperf-y += llvm-utils.o
+libperf-y += mmap.o
+libperf-y += memswap.o
+libperf-y += parse-events.o
+libperf-y += perf_regs.o
+libperf-y += path.o
+libperf-y += print_binary.o
+libperf-y += rbtree.o
+libperf-y += libstring.o
+libperf-y += bitmap.o
+libperf-y += hweight.o
+libperf-y += smt.o
+libperf-y += strbuf.o
+libperf-y += string.o
+libperf-y += strlist.o
+libperf-y += strfilter.o
+libperf-y += top.o
+libperf-y += usage.o
+libperf-y += dso.o
+libperf-y += symbol.o
+libperf-y += symbol_fprintf.o
+libperf-y += color.o
+libperf-y += metricgroup.o
+libperf-y += header.o
+libperf-y += callchain.o
+libperf-y += values.o
+libperf-y += debug.o
+libperf-y += machine.o
+libperf-y += map.o
+libperf-y += pstack.o
+libperf-y += session.o
+libperf-$(CONFIG_TRACE) += syscalltbl.o
+libperf-y += ordered-events.o
+libperf-y += namespaces.o
+libperf-y += comm.o
+libperf-y += thread.o
+libperf-y += thread_map.o
+libperf-y += trace-event-parse.o
+libperf-y += parse-events-flex.o
+libperf-y += parse-events-bison.o
+libperf-y += pmu.o
+libperf-y += pmu-flex.o
+libperf-y += pmu-bison.o
+libperf-y += trace-event-read.o
+libperf-y += trace-event-info.o
+libperf-y += trace-event-scripting.o
+libperf-y += trace-event.o
+libperf-y += svghelper.o
+libperf-y += sort.o
+libperf-y += hist.o
+libperf-y += util.o
+libperf-y += xyarray.o
+libperf-y += cpumap.o
+libperf-y += cgroup.o
+libperf-y += target.o
+libperf-y += rblist.o
+libperf-y += intlist.o
+libperf-y += vdso.o
+libperf-y += counts.o
+libperf-y += stat.o
+libperf-y += stat-shadow.o
+libperf-y += record.o
+libperf-y += srcline.o
+libperf-y += data.o
+libperf-y += tsc.o
+libperf-y += cloexec.o
+libperf-y += call-path.o
+libperf-y += rwsem.o
+libperf-y += thread-stack.o
+libperf-$(CONFIG_AUXTRACE) += auxtrace.o
+libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
+libperf-$(CONFIG_AUXTRACE) += intel-pt.o
+libperf-$(CONFIG_AUXTRACE) += intel-bts.o
+libperf-$(CONFIG_AUXTRACE) += arm-spe.o
+libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
+libperf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
+
+ifdef CONFIG_LIBOPENCSD
+libperf-$(CONFIG_AUXTRACE) += cs-etm.o
+libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/
+endif
+
+libperf-y += parse-branch-options.o
+libperf-y += dump-insn.o
+libperf-y += parse-regs-options.o
+libperf-y += term.o
+libperf-y += help-unknown-cmd.o
+libperf-y += mem-events.o
+libperf-y += vsprintf.o
+libperf-y += drv_configs.o
+libperf-y += units.o
+libperf-y += time-utils.o
+libperf-y += expr-bison.o
+libperf-y += branch.o
+libperf-y += mem2node.o
+
+libperf-$(CONFIG_LIBBPF) += bpf-loader.o
+libperf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
+libperf-$(CONFIG_LIBELF) += symbol-elf.o
+libperf-$(CONFIG_LIBELF) += probe-file.o
+libperf-$(CONFIG_LIBELF) += probe-event.o
+
+ifndef CONFIG_LIBELF
+libperf-y += symbol-minimal.o
+endif
+
+ifndef CONFIG_SETNS
+libperf-y += setns.o
+endif
+
+libperf-$(CONFIG_DWARF) += probe-finder.o
+libperf-$(CONFIG_DWARF) += dwarf-aux.o
+libperf-$(CONFIG_DWARF) += dwarf-regs.o
+
+libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o
+libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
+libperf-$(CONFIG_LIBUNWIND_X86) += libunwind/x86_32.o
+libperf-$(CONFIG_LIBUNWIND_AARCH64) += libunwind/arm64.o
+
+libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
+
+libperf-y += scripting-engines/
+
+libperf-$(CONFIG_ZLIB) += zlib.o
+libperf-$(CONFIG_LZMA) += lzma.o
+libperf-y += demangle-java.o
+libperf-y += demangle-rust.o
+
+ifdef CONFIG_JITDUMP
+libperf-$(CONFIG_LIBELF) += jitdump.o
+libperf-$(CONFIG_LIBELF) += genelf.o
+libperf-$(CONFIG_DWARF) += genelf_debug.o
+endif
+
+libperf-y += perf-hooks.o
+
+libperf-$(CONFIG_CXX) += c++/
+
+CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
+
+# avoid compiler warnings in 32-bit mode
+CFLAGS_genelf_debug.o += -Wno-packed
+
+$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
+
+$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+
+$(OUTPUT)util/expr-bison.c: util/expr.y
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__
+
+$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
+
+$(OUTPUT)util/pmu-bison.c: util/pmu.y
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
+
+CFLAGS_parse-events-flex.o += -w
+CFLAGS_pmu-flex.o += -w
+CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
+CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
+CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
+
+$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
+$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+
+CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
+CFLAGS_parse-events.o += -Wno-redundant-decls
+CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE
+
+$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/bitmap.o: ../lib/bitmap.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/find_bit.o: ../lib/find_bit.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/libstring.o: ../lib/string.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)util/vsprintf.o: ../lib/vsprintf.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
diff --git a/tools/perf/util/PERF-VERSION-GEN b/tools/perf/util/PERF-VERSION-GEN
new file mode 100755
index 000000000..3802cee5e
--- /dev/null
+++ b/tools/perf/util/PERF-VERSION-GEN
@@ -0,0 +1,51 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+if [ $# -eq 1 ] ; then
+ OUTPUT=$1
+fi
+
+GVF=${OUTPUT}PERF-VERSION-FILE
+
+LF='
+'
+
+#
+# First check if there is a .git to get the version from git describe
+# otherwise try to get the version from the kernel Makefile
+#
+CID=
+TAG=
+if test -d ../../.git -o -f ../../.git
+then
+ TAG=$(git describe --abbrev=0 --match "v[0-9].[0-9]*" 2>/dev/null )
+ CID=$(git log -1 --abbrev=4 --pretty=format:"%h" 2>/dev/null) && CID="-g$CID"
+elif test -f ../../PERF-VERSION-FILE
+then
+ TAG=$(cut -d' ' -f3 ../../PERF-VERSION-FILE | sed -e 's/\"//g')
+fi
+if test -z "$TAG"
+then
+ TAG=$(MAKEFLAGS= make -sC ../.. kernelversion)
+fi
+VN="$TAG$CID"
+if test -n "$CID"
+then
+ # format version string, strip trailing zero of sublevel:
+ VN=$(echo "$VN" | sed -e 's/-/./g;s/\([0-9]*[.][0-9]*\)[.]0/\1/')
+fi
+
+VN=$(expr "$VN" : v*'\(.*\)')
+
+if test -r $GVF
+then
+ VC=$(sed -e 's/^#define PERF_VERSION "\(.*\)"/\1/' <$GVF)
+else
+ VC=unset
+fi
+test "$VN" = "$VC" || {
+ echo >&2 " PERF_VERSION = $VN"
+ echo "#define PERF_VERSION \"$VN\"" >$GVF
+}
+
+
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
new file mode 100644
index 000000000..6958d7eed
--- /dev/null
+++ b/tools/perf/util/annotate.c
@@ -0,0 +1,2878 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-annotate.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include "util.h"
+#include "ui/ui.h"
+#include "sort.h"
+#include "build-id.h"
+#include "color.h"
+#include "config.h"
+#include "cache.h"
+#include "symbol.h"
+#include "units.h"
+#include "debug.h"
+#include "annotate.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "block-range.h"
+#include "string2.h"
+#include "arch/common.h"
+#include <regex.h>
+#include <pthread.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+
+/* FIXME: For the HE_COLORSET */
+#include "ui/browser.h"
+
+/*
+ * FIXME: Using the same values as slang.h,
+ * but that header may not be available everywhere
+ */
+#define LARROW_CHAR ((unsigned char)',')
+#define RARROW_CHAR ((unsigned char)'+')
+#define DARROW_CHAR ((unsigned char)'.')
+#define UARROW_CHAR ((unsigned char)'-')
+
+#include "sane_ctype.h"
+
+struct annotation_options annotation__default_options = {
+ .use_offset = true,
+ .jump_arrows = true,
+ .annotate_src = true,
+ .offset_level = ANNOTATION__OFFSET_JUMP_TARGETS,
+ .percent_type = PERCENT_PERIOD_LOCAL,
+};
+
+static regex_t file_lineno;
+
+static struct ins_ops *ins__find(struct arch *arch, const char *name);
+static void ins__sort(struct arch *arch);
+static int disasm_line__parse(char *line, const char **namep, char **rawp);
+
+struct arch {
+ const char *name;
+ struct ins *instructions;
+ size_t nr_instructions;
+ size_t nr_instructions_allocated;
+ struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name);
+ bool sorted_instructions;
+ bool initialized;
+ void *priv;
+ unsigned int model;
+ unsigned int family;
+ int (*init)(struct arch *arch, char *cpuid);
+ bool (*ins_is_fused)(struct arch *arch, const char *ins1,
+ const char *ins2);
+ struct {
+ char comment_char;
+ char skip_functions_char;
+ } objdump;
+};
+
+static struct ins_ops call_ops;
+static struct ins_ops dec_ops;
+static struct ins_ops jump_ops;
+static struct ins_ops mov_ops;
+static struct ins_ops nop_ops;
+static struct ins_ops lock_ops;
+static struct ins_ops ret_ops;
+
+static int arch__grow_instructions(struct arch *arch)
+{
+ struct ins *new_instructions;
+ size_t new_nr_allocated;
+
+ if (arch->nr_instructions_allocated == 0 && arch->instructions)
+ goto grow_from_non_allocated_table;
+
+ new_nr_allocated = arch->nr_instructions_allocated + 128;
+ new_instructions = realloc(arch->instructions, new_nr_allocated * sizeof(struct ins));
+ if (new_instructions == NULL)
+ return -1;
+
+out_update_instructions:
+ arch->instructions = new_instructions;
+ arch->nr_instructions_allocated = new_nr_allocated;
+ return 0;
+
+grow_from_non_allocated_table:
+ new_nr_allocated = arch->nr_instructions + 128;
+ new_instructions = calloc(new_nr_allocated, sizeof(struct ins));
+ if (new_instructions == NULL)
+ return -1;
+
+ memcpy(new_instructions, arch->instructions, arch->nr_instructions);
+ goto out_update_instructions;
+}
+
+static int arch__associate_ins_ops(struct arch* arch, const char *name, struct ins_ops *ops)
+{
+ struct ins *ins;
+
+ if (arch->nr_instructions == arch->nr_instructions_allocated &&
+ arch__grow_instructions(arch))
+ return -1;
+
+ ins = &arch->instructions[arch->nr_instructions];
+ ins->name = strdup(name);
+ if (!ins->name)
+ return -1;
+
+ ins->ops = ops;
+ arch->nr_instructions++;
+
+ ins__sort(arch);
+ return 0;
+}
+
+#include "arch/arm/annotate/instructions.c"
+#include "arch/arm64/annotate/instructions.c"
+#include "arch/x86/annotate/instructions.c"
+#include "arch/powerpc/annotate/instructions.c"
+#include "arch/s390/annotate/instructions.c"
+
+static struct arch architectures[] = {
+ {
+ .name = "arm",
+ .init = arm__annotate_init,
+ },
+ {
+ .name = "arm64",
+ .init = arm64__annotate_init,
+ },
+ {
+ .name = "x86",
+ .init = x86__annotate_init,
+ .instructions = x86__instructions,
+ .nr_instructions = ARRAY_SIZE(x86__instructions),
+ .ins_is_fused = x86__ins_is_fused,
+ .objdump = {
+ .comment_char = '#',
+ },
+ },
+ {
+ .name = "powerpc",
+ .init = powerpc__annotate_init,
+ },
+ {
+ .name = "s390",
+ .init = s390__annotate_init,
+ .objdump = {
+ .comment_char = '#',
+ },
+ },
+};
+
+static void ins__delete(struct ins_operands *ops)
+{
+ if (ops == NULL)
+ return;
+ zfree(&ops->source.raw);
+ zfree(&ops->source.name);
+ zfree(&ops->target.raw);
+ zfree(&ops->target.name);
+}
+
+static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops)
+{
+ return scnprintf(bf, size, "%-6s %s", ins->name, ops->raw);
+}
+
+int ins__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops)
+{
+ if (ins->ops->scnprintf)
+ return ins->ops->scnprintf(ins, bf, size, ops);
+
+ return ins__raw_scnprintf(ins, bf, size, ops);
+}
+
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2)
+{
+ if (!arch || !arch->ins_is_fused)
+ return false;
+
+ return arch->ins_is_fused(arch, ins1, ins2);
+}
+
+static int call__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+ char *endptr, *tok, *name;
+ struct map *map = ms->map;
+ struct addr_map_symbol target = {
+ .map = map,
+ };
+
+ ops->target.addr = strtoull(ops->raw, &endptr, 16);
+
+ name = strchr(endptr, '<');
+ if (name == NULL)
+ goto indirect_call;
+
+ name++;
+
+ if (arch->objdump.skip_functions_char &&
+ strchr(name, arch->objdump.skip_functions_char))
+ return -1;
+
+ tok = strchr(name, '>');
+ if (tok == NULL)
+ return -1;
+
+ *tok = '\0';
+ ops->target.name = strdup(name);
+ *tok = '>';
+
+ if (ops->target.name == NULL)
+ return -1;
+find_target:
+ target.addr = map__objdump_2mem(map, ops->target.addr);
+
+ if (map_groups__find_ams(&target) == 0 &&
+ map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
+ ops->target.sym = target.sym;
+
+ return 0;
+
+indirect_call:
+ tok = strchr(endptr, '*');
+ if (tok != NULL) {
+ endptr++;
+
+ /* Indirect call can use a non-rip register and offset: callq *0x8(%rbx).
+ * Do not parse such instruction. */
+ if (strstr(endptr, "(%r") == NULL)
+ ops->target.addr = strtoull(endptr, NULL, 16);
+ }
+ goto find_target;
+}
+
+static int call__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops)
+{
+ if (ops->target.sym)
+ return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name);
+
+ if (ops->target.addr == 0)
+ return ins__raw_scnprintf(ins, bf, size, ops);
+
+ if (ops->target.name)
+ return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name);
+
+ return scnprintf(bf, size, "%-6s *%" PRIx64, ins->name, ops->target.addr);
+}
+
+static struct ins_ops call_ops = {
+ .parse = call__parse,
+ .scnprintf = call__scnprintf,
+};
+
+bool ins__is_call(const struct ins *ins)
+{
+ return ins->ops == &call_ops || ins->ops == &s390_call_ops;
+}
+
+/*
+ * Prevents from matching commas in the comment section, e.g.:
+ * ffff200008446e70: b.cs ffff2000084470f4 <generic_exec_single+0x314> // b.hs, b.nlast
+ */
+static inline const char *validate_comma(const char *c, struct ins_operands *ops)
+{
+ if (ops->raw_comment && c > ops->raw_comment)
+ return NULL;
+
+ return c;
+}
+
+static int jump__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+ struct map *map = ms->map;
+ struct symbol *sym = ms->sym;
+ struct addr_map_symbol target = {
+ .map = map,
+ };
+ const char *c = strchr(ops->raw, ',');
+ u64 start, end;
+
+ ops->raw_comment = strchr(ops->raw, arch->objdump.comment_char);
+ c = validate_comma(c, ops);
+
+ /*
+ * Examples of lines to parse for the _cpp_lex_token@@Base
+ * function:
+ *
+ * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92>
+ * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72>
+ *
+ * The first is a jump to an offset inside the same function,
+ * the second is to another function, i.e. that 0xa72 is an
+ * offset in the cpp_named_operator2name@@base function.
+ */
+ /*
+ * skip over possible up to 2 operands to get to address, e.g.:
+ * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
+ */
+ if (c++ != NULL) {
+ ops->target.addr = strtoull(c, NULL, 16);
+ if (!ops->target.addr) {
+ c = strchr(c, ',');
+ c = validate_comma(c, ops);
+ if (c++ != NULL)
+ ops->target.addr = strtoull(c, NULL, 16);
+ }
+ } else {
+ ops->target.addr = strtoull(ops->raw, NULL, 16);
+ }
+
+ target.addr = map__objdump_2mem(map, ops->target.addr);
+ start = map->unmap_ip(map, sym->start),
+ end = map->unmap_ip(map, sym->end);
+
+ ops->target.outside = target.addr < start || target.addr > end;
+
+ /*
+ * FIXME: things like this in _cpp_lex_token (gcc's cc1 program):
+
+ cpp_named_operator2name@@Base+0xa72
+
+ * Point to a place that is after the cpp_named_operator2name
+ * boundaries, i.e. in the ELF symbol table for cc1
+ * cpp_named_operator2name is marked as being 32-bytes long, but it in
+ * fact is much larger than that, so we seem to need a symbols__find()
+ * routine that looks for >= current->start and < next_symbol->start,
+ * possibly just for C++ objects?
+ *
+ * For now lets just make some progress by marking jumps to outside the
+ * current function as call like.
+ *
+ * Actual navigation will come next, with further understanding of how
+ * the symbol searching and disassembly should be done.
+ */
+ if (map_groups__find_ams(&target) == 0 &&
+ map__rip_2objdump(target.map, map->map_ip(target.map, target.addr)) == ops->target.addr)
+ ops->target.sym = target.sym;
+
+ if (!ops->target.outside) {
+ ops->target.offset = target.addr - start;
+ ops->target.offset_avail = true;
+ } else {
+ ops->target.offset_avail = false;
+ }
+
+ return 0;
+}
+
+static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops)
+{
+ const char *c;
+
+ if (!ops->target.addr || ops->target.offset < 0)
+ return ins__raw_scnprintf(ins, bf, size, ops);
+
+ if (ops->target.outside && ops->target.sym != NULL)
+ return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.sym->name);
+
+ c = strchr(ops->raw, ',');
+ c = validate_comma(c, ops);
+
+ if (c != NULL) {
+ const char *c2 = strchr(c + 1, ',');
+
+ c2 = validate_comma(c2, ops);
+ /* check for 3-op insn */
+ if (c2 != NULL)
+ c = c2;
+ c++;
+
+ /* mirror arch objdump's space-after-comma style */
+ if (*c == ' ')
+ c++;
+ }
+
+ return scnprintf(bf, size, "%-6s %.*s%" PRIx64,
+ ins->name, c ? c - ops->raw : 0, ops->raw,
+ ops->target.offset);
+}
+
+static struct ins_ops jump_ops = {
+ .parse = jump__parse,
+ .scnprintf = jump__scnprintf,
+};
+
+bool ins__is_jump(const struct ins *ins)
+{
+ return ins->ops == &jump_ops;
+}
+
+static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep)
+{
+ char *endptr, *name, *t;
+
+ if (strstr(raw, "(%rip)") == NULL)
+ return 0;
+
+ *addrp = strtoull(comment, &endptr, 16);
+ if (endptr == comment)
+ return 0;
+ name = strchr(endptr, '<');
+ if (name == NULL)
+ return -1;
+
+ name++;
+
+ t = strchr(name, '>');
+ if (t == NULL)
+ return 0;
+
+ *t = '\0';
+ *namep = strdup(name);
+ *t = '>';
+
+ return 0;
+}
+
+static int lock__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms)
+{
+ ops->locked.ops = zalloc(sizeof(*ops->locked.ops));
+ if (ops->locked.ops == NULL)
+ return 0;
+
+ if (disasm_line__parse(ops->raw, &ops->locked.ins.name, &ops->locked.ops->raw) < 0)
+ goto out_free_ops;
+
+ ops->locked.ins.ops = ins__find(arch, ops->locked.ins.name);
+
+ if (ops->locked.ins.ops == NULL)
+ goto out_free_ops;
+
+ if (ops->locked.ins.ops->parse &&
+ ops->locked.ins.ops->parse(arch, ops->locked.ops, ms) < 0)
+ goto out_free_ops;
+
+ return 0;
+
+out_free_ops:
+ zfree(&ops->locked.ops);
+ return 0;
+}
+
+static int lock__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops)
+{
+ int printed;
+
+ if (ops->locked.ins.ops == NULL)
+ return ins__raw_scnprintf(ins, bf, size, ops);
+
+ printed = scnprintf(bf, size, "%-6s ", ins->name);
+ return printed + ins__scnprintf(&ops->locked.ins, bf + printed,
+ size - printed, ops->locked.ops);
+}
+
+static void lock__delete(struct ins_operands *ops)
+{
+ struct ins *ins = &ops->locked.ins;
+
+ if (ins->ops && ins->ops->free)
+ ins->ops->free(ops->locked.ops);
+ else
+ ins__delete(ops->locked.ops);
+
+ zfree(&ops->locked.ops);
+ zfree(&ops->target.raw);
+ zfree(&ops->target.name);
+}
+
+static struct ins_ops lock_ops = {
+ .free = lock__delete,
+ .parse = lock__parse,
+ .scnprintf = lock__scnprintf,
+};
+
+static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+{
+ char *s = strchr(ops->raw, ','), *target, *comment, prev;
+
+ if (s == NULL)
+ return -1;
+
+ *s = '\0';
+ ops->source.raw = strdup(ops->raw);
+ *s = ',';
+
+ if (ops->source.raw == NULL)
+ return -1;
+
+ target = ++s;
+ comment = strchr(s, arch->objdump.comment_char);
+
+ if (comment != NULL)
+ s = comment - 1;
+ else
+ s = strchr(s, '\0') - 1;
+
+ while (s > target && isspace(s[0]))
+ --s;
+ s++;
+ prev = *s;
+ *s = '\0';
+
+ ops->target.raw = strdup(target);
+ *s = prev;
+
+ if (ops->target.raw == NULL)
+ goto out_free_source;
+
+ if (comment == NULL)
+ return 0;
+
+ comment = ltrim(comment);
+ comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name);
+ comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
+
+ return 0;
+
+out_free_source:
+ zfree(&ops->source.raw);
+ return -1;
+}
+
+static int mov__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops)
+{
+ return scnprintf(bf, size, "%-6s %s,%s", ins->name,
+ ops->source.name ?: ops->source.raw,
+ ops->target.name ?: ops->target.raw);
+}
+
+static struct ins_ops mov_ops = {
+ .parse = mov__parse,
+ .scnprintf = mov__scnprintf,
+};
+
+static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused)
+{
+ char *target, *comment, *s, prev;
+
+ target = s = ops->raw;
+
+ while (s[0] != '\0' && !isspace(s[0]))
+ ++s;
+ prev = *s;
+ *s = '\0';
+
+ ops->target.raw = strdup(target);
+ *s = prev;
+
+ if (ops->target.raw == NULL)
+ return -1;
+
+ comment = strchr(s, arch->objdump.comment_char);
+ if (comment == NULL)
+ return 0;
+
+ comment = ltrim(comment);
+ comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name);
+
+ return 0;
+}
+
+static int dec__scnprintf(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops)
+{
+ return scnprintf(bf, size, "%-6s %s", ins->name,
+ ops->target.name ?: ops->target.raw);
+}
+
+static struct ins_ops dec_ops = {
+ .parse = dec__parse,
+ .scnprintf = dec__scnprintf,
+};
+
+static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size,
+ struct ins_operands *ops __maybe_unused)
+{
+ return scnprintf(bf, size, "%-6s", "nop");
+}
+
+static struct ins_ops nop_ops = {
+ .scnprintf = nop__scnprintf,
+};
+
+static struct ins_ops ret_ops = {
+ .scnprintf = ins__raw_scnprintf,
+};
+
+bool ins__is_ret(const struct ins *ins)
+{
+ return ins->ops == &ret_ops;
+}
+
+bool ins__is_lock(const struct ins *ins)
+{
+ return ins->ops == &lock_ops;
+}
+
+static int ins__key_cmp(const void *name, const void *insp)
+{
+ const struct ins *ins = insp;
+
+ return strcmp(name, ins->name);
+}
+
+static int ins__cmp(const void *a, const void *b)
+{
+ const struct ins *ia = a;
+ const struct ins *ib = b;
+
+ return strcmp(ia->name, ib->name);
+}
+
+static void ins__sort(struct arch *arch)
+{
+ const int nmemb = arch->nr_instructions;
+
+ qsort(arch->instructions, nmemb, sizeof(struct ins), ins__cmp);
+}
+
+static struct ins_ops *__ins__find(struct arch *arch, const char *name)
+{
+ struct ins *ins;
+ const int nmemb = arch->nr_instructions;
+
+ if (!arch->sorted_instructions) {
+ ins__sort(arch);
+ arch->sorted_instructions = true;
+ }
+
+ ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp);
+ return ins ? ins->ops : NULL;
+}
+
+static struct ins_ops *ins__find(struct arch *arch, const char *name)
+{
+ struct ins_ops *ops = __ins__find(arch, name);
+
+ if (!ops && arch->associate_instruction_ops)
+ ops = arch->associate_instruction_ops(arch, name);
+
+ return ops;
+}
+
+static int arch__key_cmp(const void *name, const void *archp)
+{
+ const struct arch *arch = archp;
+
+ return strcmp(name, arch->name);
+}
+
+static int arch__cmp(const void *a, const void *b)
+{
+ const struct arch *aa = a;
+ const struct arch *ab = b;
+
+ return strcmp(aa->name, ab->name);
+}
+
+static void arch__sort(void)
+{
+ const int nmemb = ARRAY_SIZE(architectures);
+
+ qsort(architectures, nmemb, sizeof(struct arch), arch__cmp);
+}
+
+static struct arch *arch__find(const char *name)
+{
+ const int nmemb = ARRAY_SIZE(architectures);
+ static bool sorted;
+
+ if (!sorted) {
+ arch__sort();
+ sorted = true;
+ }
+
+ return bsearch(name, architectures, nmemb, sizeof(struct arch), arch__key_cmp);
+}
+
+static struct annotated_source *annotated_source__new(void)
+{
+ struct annotated_source *src = zalloc(sizeof(*src));
+
+ if (src != NULL)
+ INIT_LIST_HEAD(&src->source);
+
+ return src;
+}
+
+static __maybe_unused void annotated_source__delete(struct annotated_source *src)
+{
+ if (src == NULL)
+ return;
+ zfree(&src->histograms);
+ zfree(&src->cycles_hist);
+ free(src);
+}
+
+static int annotated_source__alloc_histograms(struct annotated_source *src,
+ size_t size, int nr_hists)
+{
+ size_t sizeof_sym_hist;
+
+ /*
+ * Add buffer of one element for zero length symbol.
+ * When sample is taken from first instruction of
+ * zero length symbol, perf still resolves it and
+ * shows symbol name in perf report and allows to
+ * annotate it.
+ */
+ if (size == 0)
+ size = 1;
+
+ /* Check for overflow when calculating sizeof_sym_hist */
+ if (size > (SIZE_MAX - sizeof(struct sym_hist)) / sizeof(struct sym_hist_entry))
+ return -1;
+
+ sizeof_sym_hist = (sizeof(struct sym_hist) + size * sizeof(struct sym_hist_entry));
+
+ /* Check for overflow in zalloc argument */
+ if (sizeof_sym_hist > SIZE_MAX / nr_hists)
+ return -1;
+
+ src->sizeof_sym_hist = sizeof_sym_hist;
+ src->nr_histograms = nr_hists;
+ src->histograms = calloc(nr_hists, sizeof_sym_hist) ;
+ return src->histograms ? 0 : -1;
+}
+
+/* The cycles histogram is lazily allocated. */
+static int symbol__alloc_hist_cycles(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ const size_t size = symbol__size(sym);
+
+ notes->src->cycles_hist = calloc(size, sizeof(struct cyc_hist));
+ if (notes->src->cycles_hist == NULL)
+ return -1;
+ return 0;
+}
+
+void symbol__annotate_zero_histograms(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ pthread_mutex_lock(&notes->lock);
+ if (notes->src != NULL) {
+ memset(notes->src->histograms, 0,
+ notes->src->nr_histograms * notes->src->sizeof_sym_hist);
+ if (notes->src->cycles_hist)
+ memset(notes->src->cycles_hist, 0,
+ symbol__size(sym) * sizeof(struct cyc_hist));
+ }
+ pthread_mutex_unlock(&notes->lock);
+}
+
+static int __symbol__account_cycles(struct cyc_hist *ch,
+ u64 start,
+ unsigned offset, unsigned cycles,
+ unsigned have_start)
+{
+ /*
+ * For now we can only account one basic block per
+ * final jump. But multiple could be overlapping.
+ * Always account the longest one. So when
+ * a shorter one has been already seen throw it away.
+ *
+ * We separately always account the full cycles.
+ */
+ ch[offset].num_aggr++;
+ ch[offset].cycles_aggr += cycles;
+
+ if (cycles > ch[offset].cycles_max)
+ ch[offset].cycles_max = cycles;
+
+ if (ch[offset].cycles_min) {
+ if (cycles && cycles < ch[offset].cycles_min)
+ ch[offset].cycles_min = cycles;
+ } else
+ ch[offset].cycles_min = cycles;
+
+ if (!have_start && ch[offset].have_start)
+ return 0;
+ if (ch[offset].num) {
+ if (have_start && (!ch[offset].have_start ||
+ ch[offset].start > start)) {
+ ch[offset].have_start = 0;
+ ch[offset].cycles = 0;
+ ch[offset].num = 0;
+ if (ch[offset].reset < 0xffff)
+ ch[offset].reset++;
+ } else if (have_start &&
+ ch[offset].start < start)
+ return 0;
+ }
+ ch[offset].have_start = have_start;
+ ch[offset].start = start;
+ ch[offset].cycles += cycles;
+ ch[offset].num++;
+ return 0;
+}
+
+static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
+ struct annotated_source *src, int evidx, u64 addr,
+ struct perf_sample *sample)
+{
+ unsigned offset;
+ struct sym_hist *h;
+
+ pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, addr));
+
+ if ((addr < sym->start || addr >= sym->end) &&
+ (addr != sym->end || sym->start != sym->end)) {
+ pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 "\n",
+ __func__, __LINE__, sym->name, sym->start, addr, sym->end);
+ return -ERANGE;
+ }
+
+ offset = addr - sym->start;
+ h = annotated_source__histogram(src, evidx);
+ if (h == NULL) {
+ pr_debug("%s(%d): ENOMEM! sym->name=%s, start=%#" PRIx64 ", addr=%#" PRIx64 ", end=%#" PRIx64 ", func: %d\n",
+ __func__, __LINE__, sym->name, sym->start, addr, sym->end, sym->type == STT_FUNC);
+ return -ENOMEM;
+ }
+ h->nr_samples++;
+ h->addr[offset].nr_samples++;
+ h->period += sample->period;
+ h->addr[offset].period += sample->period;
+
+ pr_debug3("%#" PRIx64 " %s: period++ [addr: %#" PRIx64 ", %#" PRIx64
+ ", evidx=%d] => nr_samples: %" PRIu64 ", period: %" PRIu64 "\n",
+ sym->start, sym->name, addr, addr - sym->start, evidx,
+ h->addr[offset].nr_samples, h->addr[offset].period);
+ return 0;
+}
+
+static struct cyc_hist *symbol__cycles_hist(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ if (notes->src == NULL) {
+ notes->src = annotated_source__new();
+ if (notes->src == NULL)
+ return NULL;
+ goto alloc_cycles_hist;
+ }
+
+ if (!notes->src->cycles_hist) {
+alloc_cycles_hist:
+ symbol__alloc_hist_cycles(sym);
+ }
+
+ return notes->src->cycles_hist;
+}
+
+struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ if (notes->src == NULL) {
+ notes->src = annotated_source__new();
+ if (notes->src == NULL)
+ return NULL;
+ goto alloc_histograms;
+ }
+
+ if (notes->src->histograms == NULL) {
+alloc_histograms:
+ annotated_source__alloc_histograms(notes->src, symbol__size(sym),
+ nr_hists);
+ }
+
+ return notes->src;
+}
+
+static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel, u64 addr,
+ struct perf_sample *sample)
+{
+ struct annotated_source *src;
+
+ if (sym == NULL)
+ return 0;
+ src = symbol__hists(sym, evsel->evlist->nr_entries);
+ return (src) ? __symbol__inc_addr_samples(sym, map, src, evsel->idx,
+ addr, sample) : 0;
+}
+
+static int symbol__account_cycles(u64 addr, u64 start,
+ struct symbol *sym, unsigned cycles)
+{
+ struct cyc_hist *cycles_hist;
+ unsigned offset;
+
+ if (sym == NULL)
+ return 0;
+ cycles_hist = symbol__cycles_hist(sym);
+ if (cycles_hist == NULL)
+ return -ENOMEM;
+ if (addr < sym->start || addr >= sym->end)
+ return -ERANGE;
+
+ if (start) {
+ if (start < sym->start || start >= sym->end)
+ return -ERANGE;
+ if (start >= addr)
+ start = 0;
+ }
+ offset = addr - sym->start;
+ return __symbol__account_cycles(cycles_hist,
+ start ? start - sym->start : 0,
+ offset, cycles,
+ !!start);
+}
+
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+ struct addr_map_symbol *start,
+ unsigned cycles)
+{
+ u64 saddr = 0;
+ int err;
+
+ if (!cycles)
+ return 0;
+
+ /*
+ * Only set start when IPC can be computed. We can only
+ * compute it when the basic block is completely in a single
+ * function.
+ * Special case the case when the jump is elsewhere, but
+ * it starts on the function start.
+ */
+ if (start &&
+ (start->sym == ams->sym ||
+ (ams->sym &&
+ start->addr == ams->sym->start + ams->map->start)))
+ saddr = start->al_addr;
+ if (saddr == 0)
+ pr_debug2("BB with bad start: addr %"PRIx64" start %"PRIx64" sym %"PRIx64" saddr %"PRIx64"\n",
+ ams->addr,
+ start ? start->addr : 0,
+ ams->sym ? ams->sym->start + ams->map->start : 0,
+ saddr);
+ err = symbol__account_cycles(ams->al_addr, saddr, ams->sym, cycles);
+ if (err)
+ pr_debug2("account_cycles failed %d\n", err);
+ return err;
+}
+
+static unsigned annotation__count_insn(struct annotation *notes, u64 start, u64 end)
+{
+ unsigned n_insn = 0;
+ u64 offset;
+
+ for (offset = start; offset <= end; offset++) {
+ if (notes->offsets[offset])
+ n_insn++;
+ }
+ return n_insn;
+}
+
+static void annotation__count_and_fill(struct annotation *notes, u64 start, u64 end, struct cyc_hist *ch)
+{
+ unsigned n_insn;
+ u64 offset;
+
+ n_insn = annotation__count_insn(notes, start, end);
+ if (n_insn && ch->num && ch->cycles) {
+ float ipc = n_insn / ((double)ch->cycles / (double)ch->num);
+
+ /* Hide data when there are too many overlaps. */
+ if (ch->reset >= 0x7fff || ch->reset >= ch->num / 2)
+ return;
+
+ for (offset = start; offset <= end; offset++) {
+ struct annotation_line *al = notes->offsets[offset];
+
+ if (al)
+ al->ipc = ipc;
+ }
+ }
+}
+
+void annotation__compute_ipc(struct annotation *notes, size_t size)
+{
+ u64 offset;
+
+ if (!notes->src || !notes->src->cycles_hist)
+ return;
+
+ pthread_mutex_lock(&notes->lock);
+ for (offset = 0; offset < size; ++offset) {
+ struct cyc_hist *ch;
+
+ ch = &notes->src->cycles_hist[offset];
+ if (ch && ch->cycles) {
+ struct annotation_line *al;
+
+ if (ch->have_start)
+ annotation__count_and_fill(notes, ch->start, offset, ch);
+ al = notes->offsets[offset];
+ if (al && ch->num_aggr) {
+ al->cycles = ch->cycles_aggr / ch->num_aggr;
+ al->cycles_max = ch->cycles_max;
+ al->cycles_min = ch->cycles_min;
+ }
+ notes->have_cycles = true;
+ }
+ }
+ pthread_mutex_unlock(&notes->lock);
+}
+
+int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
+ struct perf_evsel *evsel)
+{
+ return symbol__inc_addr_samples(ams->sym, ams->map, evsel, ams->al_addr, sample);
+}
+
+int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
+ struct perf_evsel *evsel, u64 ip)
+{
+ return symbol__inc_addr_samples(he->ms.sym, he->ms.map, evsel, ip, sample);
+}
+
+static void disasm_line__init_ins(struct disasm_line *dl, struct arch *arch, struct map_symbol *ms)
+{
+ dl->ins.ops = ins__find(arch, dl->ins.name);
+
+ if (!dl->ins.ops)
+ return;
+
+ if (dl->ins.ops->parse && dl->ins.ops->parse(arch, &dl->ops, ms) < 0)
+ dl->ins.ops = NULL;
+}
+
+static int disasm_line__parse(char *line, const char **namep, char **rawp)
+{
+ char tmp, *name = ltrim(line);
+
+ if (name[0] == '\0')
+ return -1;
+
+ *rawp = name + 1;
+
+ while ((*rawp)[0] != '\0' && !isspace((*rawp)[0]))
+ ++*rawp;
+
+ tmp = (*rawp)[0];
+ (*rawp)[0] = '\0';
+ *namep = strdup(name);
+
+ if (*namep == NULL)
+ goto out;
+
+ (*rawp)[0] = tmp;
+ *rawp = ltrim(*rawp);
+
+ return 0;
+
+out:
+ return -1;
+}
+
+struct annotate_args {
+ size_t privsize;
+ struct arch *arch;
+ struct map_symbol ms;
+ struct perf_evsel *evsel;
+ struct annotation_options *options;
+ s64 offset;
+ char *line;
+ int line_nr;
+};
+
+static void annotation_line__delete(struct annotation_line *al)
+{
+ void *ptr = (void *) al - al->privsize;
+
+ free_srcline(al->path);
+ zfree(&al->line);
+ free(ptr);
+}
+
+/*
+ * Allocating the annotation line data with following
+ * structure:
+ *
+ * --------------------------------------
+ * private space | struct annotation_line
+ * --------------------------------------
+ *
+ * Size of the private space is stored in 'struct annotation_line'.
+ *
+ */
+static struct annotation_line *
+annotation_line__new(struct annotate_args *args, size_t privsize)
+{
+ struct annotation_line *al;
+ struct perf_evsel *evsel = args->evsel;
+ size_t size = privsize + sizeof(*al);
+ int nr = 1;
+
+ if (perf_evsel__is_group_event(evsel))
+ nr = evsel->nr_members;
+
+ size += sizeof(al->data[0]) * nr;
+
+ al = zalloc(size);
+ if (al) {
+ al = (void *) al + privsize;
+ al->privsize = privsize;
+ al->offset = args->offset;
+ al->line = strdup(args->line);
+ al->line_nr = args->line_nr;
+ al->data_nr = nr;
+ }
+
+ return al;
+}
+
+/*
+ * Allocating the disasm annotation line data with
+ * following structure:
+ *
+ * ------------------------------------------------------------
+ * privsize space | struct disasm_line | struct annotation_line
+ * ------------------------------------------------------------
+ *
+ * We have 'struct annotation_line' member as last member
+ * of 'struct disasm_line' to have an easy access.
+ *
+ */
+static struct disasm_line *disasm_line__new(struct annotate_args *args)
+{
+ struct disasm_line *dl = NULL;
+ struct annotation_line *al;
+ size_t privsize = args->privsize + offsetof(struct disasm_line, al);
+
+ al = annotation_line__new(args, privsize);
+ if (al != NULL) {
+ dl = disasm_line(al);
+
+ if (dl->al.line == NULL)
+ goto out_delete;
+
+ if (args->offset != -1) {
+ if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0)
+ goto out_free_line;
+
+ disasm_line__init_ins(dl, args->arch, &args->ms);
+ }
+ }
+
+ return dl;
+
+out_free_line:
+ zfree(&dl->al.line);
+out_delete:
+ free(dl);
+ return NULL;
+}
+
+void disasm_line__free(struct disasm_line *dl)
+{
+ if (dl->ins.ops && dl->ins.ops->free)
+ dl->ins.ops->free(&dl->ops);
+ else
+ ins__delete(&dl->ops);
+ free((void *)dl->ins.name);
+ dl->ins.name = NULL;
+ annotation_line__delete(&dl->al);
+}
+
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw)
+{
+ if (raw || !dl->ins.ops)
+ return scnprintf(bf, size, "%-6s %s", dl->ins.name, dl->ops.raw);
+
+ return ins__scnprintf(&dl->ins, bf, size, &dl->ops);
+}
+
+static void annotation_line__add(struct annotation_line *al, struct list_head *head)
+{
+ list_add_tail(&al->node, head);
+}
+
+struct annotation_line *
+annotation_line__next(struct annotation_line *pos, struct list_head *head)
+{
+ list_for_each_entry_continue(pos, head, node)
+ if (pos->offset >= 0)
+ return pos;
+
+ return NULL;
+}
+
+static const char *annotate__address_color(struct block_range *br)
+{
+ double cov = block_range__coverage(br);
+
+ if (cov >= 0) {
+ /* mark red for >75% coverage */
+ if (cov > 0.75)
+ return PERF_COLOR_RED;
+
+ /* mark dull for <1% coverage */
+ if (cov < 0.01)
+ return PERF_COLOR_NORMAL;
+ }
+
+ return PERF_COLOR_MAGENTA;
+}
+
+static const char *annotate__asm_color(struct block_range *br)
+{
+ double cov = block_range__coverage(br);
+
+ if (cov >= 0) {
+ /* mark dull for <1% coverage */
+ if (cov < 0.01)
+ return PERF_COLOR_NORMAL;
+ }
+
+ return PERF_COLOR_BLUE;
+}
+
+static void annotate__branch_printf(struct block_range *br, u64 addr)
+{
+ bool emit_comment = true;
+
+ if (!br)
+ return;
+
+#if 1
+ if (br->is_target && br->start == addr) {
+ struct block_range *branch = br;
+ double p;
+
+ /*
+ * Find matching branch to our target.
+ */
+ while (!branch->is_branch)
+ branch = block_range__next(branch);
+
+ p = 100 *(double)br->entry / branch->coverage;
+
+ if (p > 0.1) {
+ if (emit_comment) {
+ emit_comment = false;
+ printf("\t#");
+ }
+
+ /*
+ * The percentage of coverage joined at this target in relation
+ * to the next branch.
+ */
+ printf(" +%.2f%%", p);
+ }
+ }
+#endif
+ if (br->is_branch && br->end == addr) {
+ double p = 100*(double)br->taken / br->coverage;
+
+ if (p > 0.1) {
+ if (emit_comment) {
+ emit_comment = false;
+ printf("\t#");
+ }
+
+ /*
+ * The percentage of coverage leaving at this branch, and
+ * its prediction ratio.
+ */
+ printf(" -%.2f%% (p:%.2f%%)", p, 100*(double)br->pred / br->taken);
+ }
+ }
+}
+
+static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_width)
+{
+ s64 offset = dl->al.offset;
+ const u64 addr = start + offset;
+ struct block_range *br;
+
+ br = block_range__find(addr);
+ color_fprintf(stdout, annotate__address_color(br), " %*" PRIx64 ":", addr_fmt_width, addr);
+ color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line);
+ annotate__branch_printf(br, addr);
+ return 0;
+}
+
+static int
+annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start,
+ struct perf_evsel *evsel, u64 len, int min_pcnt, int printed,
+ int max_lines, struct annotation_line *queue, int addr_fmt_width,
+ int percent_type)
+{
+ struct disasm_line *dl = container_of(al, struct disasm_line, al);
+ static const char *prev_line;
+ static const char *prev_color;
+
+ if (al->offset != -1) {
+ double max_percent = 0.0;
+ int i, nr_percent = 1;
+ const char *color;
+ struct annotation *notes = symbol__annotation(sym);
+
+ for (i = 0; i < al->data_nr; i++) {
+ double percent;
+
+ percent = annotation_data__percent(&al->data[i],
+ percent_type);
+
+ if (percent > max_percent)
+ max_percent = percent;
+ }
+
+ if (al->data_nr > nr_percent)
+ nr_percent = al->data_nr;
+
+ if (max_percent < min_pcnt)
+ return -1;
+
+ if (max_lines && printed >= max_lines)
+ return 1;
+
+ if (queue != NULL) {
+ list_for_each_entry_from(queue, &notes->src->source, node) {
+ if (queue == al)
+ break;
+ annotation_line__print(queue, sym, start, evsel, len,
+ 0, 0, 1, NULL, addr_fmt_width,
+ percent_type);
+ }
+ }
+
+ color = get_percent_color(max_percent);
+
+ /*
+ * Also color the filename and line if needed, with
+ * the same color than the percentage. Don't print it
+ * twice for close colored addr with the same filename:line
+ */
+ if (al->path) {
+ if (!prev_line || strcmp(prev_line, al->path)
+ || color != prev_color) {
+ color_fprintf(stdout, color, " %s", al->path);
+ prev_line = al->path;
+ prev_color = color;
+ }
+ }
+
+ for (i = 0; i < nr_percent; i++) {
+ struct annotation_data *data = &al->data[i];
+ double percent;
+
+ percent = annotation_data__percent(data, percent_type);
+ color = get_percent_color(percent);
+
+ if (symbol_conf.show_total_period)
+ color_fprintf(stdout, color, " %11" PRIu64,
+ data->he.period);
+ else if (symbol_conf.show_nr_samples)
+ color_fprintf(stdout, color, " %7" PRIu64,
+ data->he.nr_samples);
+ else
+ color_fprintf(stdout, color, " %7.2f", percent);
+ }
+
+ printf(" : ");
+
+ disasm_line__print(dl, start, addr_fmt_width);
+ printf("\n");
+ } else if (max_lines && printed >= max_lines)
+ return 1;
+ else {
+ int width = symbol_conf.show_total_period ? 12 : 8;
+
+ if (queue)
+ return -1;
+
+ if (perf_evsel__is_group_event(evsel))
+ width *= evsel->nr_members;
+
+ if (!*al->line)
+ printf(" %*s:\n", width, " ");
+ else
+ printf(" %*s: %*s %s\n", width, " ", addr_fmt_width, " ", al->line);
+ }
+
+ return 0;
+}
+
+/*
+ * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw)
+ * which looks like following
+ *
+ * 0000000000415500 <_init>:
+ * 415500: sub $0x8,%rsp
+ * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8>
+ * 41550b: test %rax,%rax
+ * 41550e: je 415515 <_init+0x15>
+ * 415510: callq 416e70 <__gmon_start__@plt>
+ * 415515: add $0x8,%rsp
+ * 415519: retq
+ *
+ * it will be parsed and saved into struct disasm_line as
+ * <offset> <name> <ops.raw>
+ *
+ * The offset will be a relative offset from the start of the symbol and -1
+ * means that it's not a disassembly line so should be treated differently.
+ * The ops.raw part will be parsed further according to type of the instruction.
+ */
+static int symbol__parse_objdump_line(struct symbol *sym, FILE *file,
+ struct annotate_args *args,
+ int *line_nr)
+{
+ struct map *map = args->ms.map;
+ struct annotation *notes = symbol__annotation(sym);
+ struct disasm_line *dl;
+ char *line = NULL, *parsed_line, *tmp, *tmp2;
+ size_t line_len;
+ s64 line_ip, offset = -1;
+ regmatch_t match[2];
+
+ if (getline(&line, &line_len, file) < 0)
+ return -1;
+
+ if (!line)
+ return -1;
+
+ line_ip = -1;
+ parsed_line = rtrim(line);
+
+ /* /filename:linenr ? Save line number and ignore. */
+ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) {
+ *line_nr = atoi(parsed_line + match[1].rm_so);
+ return 0;
+ }
+
+ tmp = ltrim(parsed_line);
+ if (*tmp) {
+ /*
+ * Parse hexa addresses followed by ':'
+ */
+ line_ip = strtoull(tmp, &tmp2, 16);
+ if (*tmp2 != ':' || tmp == tmp2 || tmp2[1] == '\0')
+ line_ip = -1;
+ }
+
+ if (line_ip != -1) {
+ u64 start = map__rip_2objdump(map, sym->start),
+ end = map__rip_2objdump(map, sym->end);
+
+ offset = line_ip - start;
+ if ((u64)line_ip < start || (u64)line_ip >= end)
+ offset = -1;
+ else
+ parsed_line = tmp2 + 1;
+ }
+
+ args->offset = offset;
+ args->line = parsed_line;
+ args->line_nr = *line_nr;
+ args->ms.sym = sym;
+
+ dl = disasm_line__new(args);
+ free(line);
+ (*line_nr)++;
+
+ if (dl == NULL)
+ return -1;
+
+ if (!disasm_line__has_local_offset(dl)) {
+ dl->ops.target.offset = dl->ops.target.addr -
+ map__rip_2objdump(map, sym->start);
+ dl->ops.target.offset_avail = true;
+ }
+
+ /* kcore has no symbols, so add the call target symbol */
+ if (dl->ins.ops && ins__is_call(&dl->ins) && !dl->ops.target.sym) {
+ struct addr_map_symbol target = {
+ .map = map,
+ .addr = dl->ops.target.addr,
+ };
+
+ if (!map_groups__find_ams(&target) &&
+ target.sym->start == target.al_addr)
+ dl->ops.target.sym = target.sym;
+ }
+
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ return 0;
+}
+
+static __attribute__((constructor)) void symbol__init_regexpr(void)
+{
+ regcomp(&file_lineno, "^/[^:]+:([0-9]+)", REG_EXTENDED);
+}
+
+static void delete_last_nop(struct symbol *sym)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct list_head *list = &notes->src->source;
+ struct disasm_line *dl;
+
+ while (!list_empty(list)) {
+ dl = list_entry(list->prev, struct disasm_line, al.node);
+
+ if (dl->ins.ops) {
+ if (dl->ins.ops != &nop_ops)
+ return;
+ } else {
+ if (!strstr(dl->al.line, " nop ") &&
+ !strstr(dl->al.line, " nopl ") &&
+ !strstr(dl->al.line, " nopw "))
+ return;
+ }
+
+ list_del(&dl->al.node);
+ disasm_line__free(dl);
+ }
+}
+
+int symbol__strerror_disassemble(struct symbol *sym __maybe_unused, struct map *map,
+ int errnum, char *buf, size_t buflen)
+{
+ struct dso *dso = map->dso;
+
+ BUG_ON(buflen == 0);
+
+ if (errnum >= 0) {
+ str_error_r(errnum, buf, buflen);
+ return 0;
+ }
+
+ switch (errnum) {
+ case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: {
+ char bf[SBUILD_ID_SIZE + 15] = " with build id ";
+ char *build_id_msg = NULL;
+
+ if (dso->has_build_id) {
+ build_id__sprintf(dso->build_id,
+ sizeof(dso->build_id), bf + 15);
+ build_id_msg = bf;
+ }
+ scnprintf(buf, buflen,
+ "No vmlinux file%s\nwas found in the path.\n\n"
+ "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n"
+ "Please use:\n\n"
+ " perf buildid-cache -vu vmlinux\n\n"
+ "or:\n\n"
+ " --vmlinux vmlinux\n", build_id_msg ?: "");
+ }
+ break;
+ default:
+ scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum);
+ break;
+ }
+
+ return 0;
+}
+
+static int dso__disassemble_filename(struct dso *dso, char *filename, size_t filename_size)
+{
+ char linkname[PATH_MAX];
+ char *build_id_filename;
+ char *build_id_path = NULL;
+ char *pos;
+
+ if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
+ !dso__is_kcore(dso))
+ return SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX;
+
+ build_id_filename = dso__build_id_filename(dso, NULL, 0, false);
+ if (build_id_filename) {
+ __symbol__join_symfs(filename, filename_size, build_id_filename);
+ free(build_id_filename);
+ } else {
+ if (dso->has_build_id)
+ return ENOMEM;
+ goto fallback;
+ }
+
+ build_id_path = strdup(filename);
+ if (!build_id_path)
+ return ENOMEM;
+
+ /*
+ * old style build-id cache has name of XX/XXXXXXX.. while
+ * new style has XX/XXXXXXX../{elf,kallsyms,vdso}.
+ * extract the build-id part of dirname in the new style only.
+ */
+ pos = strrchr(build_id_path, '/');
+ if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
+ dirname(build_id_path);
+
+ if (dso__is_kcore(dso) ||
+ readlink(build_id_path, linkname, sizeof(linkname)) < 0 ||
+ strstr(linkname, DSO__NAME_KALLSYMS) ||
+ access(filename, R_OK)) {
+fallback:
+ /*
+ * If we don't have build-ids or the build-id file isn't in the
+ * cache, or is just a kallsyms file, well, lets hope that this
+ * DSO is the same as when 'perf record' ran.
+ */
+ __symbol__join_symfs(filename, filename_size, dso->long_name);
+ }
+
+ free(build_id_path);
+ return 0;
+}
+
+static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
+{
+ struct annotation_options *opts = args->options;
+ struct map *map = args->ms.map;
+ struct dso *dso = map->dso;
+ char *command;
+ FILE *file;
+ char symfs_filename[PATH_MAX];
+ struct kcore_extract kce;
+ bool delete_extract = false;
+ bool decomp = false;
+ int stdout_fd[2];
+ int lineno = 0;
+ int nline;
+ pid_t pid;
+ int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename));
+
+ if (err)
+ return err;
+
+ pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__,
+ symfs_filename, sym->name, map->unmap_ip(map, sym->start),
+ map->unmap_ip(map, sym->end));
+
+ pr_debug("annotating [%p] %30s : [%p] %30s\n",
+ dso, dso->long_name, sym, sym->name);
+
+ if (dso__is_kcore(dso)) {
+ kce.kcore_filename = symfs_filename;
+ kce.addr = map__rip_2objdump(map, sym->start);
+ kce.offs = sym->start;
+ kce.len = sym->end - sym->start;
+ if (!kcore_extract__create(&kce)) {
+ delete_extract = true;
+ strlcpy(symfs_filename, kce.extract_filename,
+ sizeof(symfs_filename));
+ }
+ } else if (dso__needs_decompress(dso)) {
+ char tmp[KMOD_DECOMP_LEN];
+
+ if (dso__decompress_kmodule_path(dso, symfs_filename,
+ tmp, sizeof(tmp)) < 0)
+ goto out;
+
+ decomp = true;
+ strcpy(symfs_filename, tmp);
+ }
+
+ err = asprintf(&command,
+ "%s %s%s --start-address=0x%016" PRIx64
+ " --stop-address=0x%016" PRIx64
+ " -l -d %s %s -C \"%s\" 2>/dev/null|grep -v \"%s:\"|expand",
+ opts->objdump_path ?: "objdump",
+ opts->disassembler_style ? "-M " : "",
+ opts->disassembler_style ?: "",
+ map__rip_2objdump(map, sym->start),
+ map__rip_2objdump(map, sym->end),
+ opts->show_asm_raw ? "" : "--no-show-raw",
+ opts->annotate_src ? "-S" : "",
+ symfs_filename, symfs_filename);
+
+ if (err < 0) {
+ pr_err("Failure allocating memory for the command to run\n");
+ goto out_remove_tmp;
+ }
+
+ pr_debug("Executing: %s\n", command);
+
+ err = -1;
+ if (pipe(stdout_fd) < 0) {
+ pr_err("Failure creating the pipe to run %s\n", command);
+ goto out_free_command;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ pr_err("Failure forking to run %s\n", command);
+ goto out_close_stdout;
+ }
+
+ if (pid == 0) {
+ close(stdout_fd[0]);
+ dup2(stdout_fd[1], 1);
+ close(stdout_fd[1]);
+ execl("/bin/sh", "sh", "-c", command, NULL);
+ perror(command);
+ exit(-1);
+ }
+
+ close(stdout_fd[1]);
+
+ file = fdopen(stdout_fd[0], "r");
+ if (!file) {
+ pr_err("Failure creating FILE stream for %s\n", command);
+ /*
+ * If we were using debug info should retry with
+ * original binary.
+ */
+ goto out_free_command;
+ }
+
+ nline = 0;
+ while (!feof(file)) {
+ /*
+ * The source code line number (lineno) needs to be kept in
+ * accross calls to symbol__parse_objdump_line(), so that it
+ * can associate it with the instructions till the next one.
+ * See disasm_line__new() and struct disasm_line::line_nr.
+ */
+ if (symbol__parse_objdump_line(sym, file, args, &lineno) < 0)
+ break;
+ nline++;
+ }
+
+ if (nline == 0)
+ pr_err("No output from %s\n", command);
+
+ /*
+ * kallsyms does not have symbol sizes so there may a nop at the end.
+ * Remove it.
+ */
+ if (dso__is_kcore(dso))
+ delete_last_nop(sym);
+
+ fclose(file);
+ err = 0;
+out_free_command:
+ free(command);
+out_remove_tmp:
+ close(stdout_fd[0]);
+
+ if (decomp)
+ unlink(symfs_filename);
+
+ if (delete_extract)
+ kcore_extract__delete(&kce);
+out:
+ return err;
+
+out_close_stdout:
+ close(stdout_fd[1]);
+ goto out_free_command;
+}
+
+static void calc_percent(struct sym_hist *sym_hist,
+ struct hists *hists,
+ struct annotation_data *data,
+ s64 offset, s64 end)
+{
+ unsigned int hits = 0;
+ u64 period = 0;
+
+ while (offset < end) {
+ hits += sym_hist->addr[offset].nr_samples;
+ period += sym_hist->addr[offset].period;
+ ++offset;
+ }
+
+ if (sym_hist->nr_samples) {
+ data->he.period = period;
+ data->he.nr_samples = hits;
+ data->percent[PERCENT_HITS_LOCAL] = 100.0 * hits / sym_hist->nr_samples;
+ }
+
+ if (hists->stats.nr_non_filtered_samples)
+ data->percent[PERCENT_HITS_GLOBAL] = 100.0 * hits / hists->stats.nr_non_filtered_samples;
+
+ if (sym_hist->period)
+ data->percent[PERCENT_PERIOD_LOCAL] = 100.0 * period / sym_hist->period;
+
+ if (hists->stats.total_period)
+ data->percent[PERCENT_PERIOD_GLOBAL] = 100.0 * period / hists->stats.total_period;
+}
+
+static void annotation__calc_percent(struct annotation *notes,
+ struct perf_evsel *leader, s64 len)
+{
+ struct annotation_line *al, *next;
+ struct perf_evsel *evsel;
+
+ list_for_each_entry(al, &notes->src->source, node) {
+ s64 end;
+ int i = 0;
+
+ if (al->offset == -1)
+ continue;
+
+ next = annotation_line__next(al, &notes->src->source);
+ end = next ? next->offset : len;
+
+ for_each_group_evsel(evsel, leader) {
+ struct hists *hists = evsel__hists(evsel);
+ struct annotation_data *data;
+ struct sym_hist *sym_hist;
+
+ BUG_ON(i >= al->data_nr);
+
+ sym_hist = annotation__histogram(notes, evsel->idx);
+ data = &al->data[i++];
+
+ calc_percent(sym_hist, hists, data, al->offset, end);
+ }
+ }
+}
+
+void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ annotation__calc_percent(notes, evsel, symbol__size(sym));
+}
+
+int symbol__annotate(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel, size_t privsize,
+ struct annotation_options *options,
+ struct arch **parch)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct annotate_args args = {
+ .privsize = privsize,
+ .evsel = evsel,
+ .options = options,
+ };
+ struct perf_env *env = perf_evsel__env(evsel);
+ const char *arch_name = perf_env__arch(env);
+ struct arch *arch;
+ int err;
+
+ if (!arch_name)
+ return errno;
+
+ args.arch = arch = arch__find(arch_name);
+ if (arch == NULL)
+ return ENOTSUP;
+
+ if (parch)
+ *parch = arch;
+
+ if (arch->init) {
+ err = arch->init(arch, env ? env->cpuid : NULL);
+ if (err) {
+ pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name);
+ return err;
+ }
+ }
+
+ args.ms.map = map;
+ args.ms.sym = sym;
+ notes->start = map__rip_2objdump(map, sym->start);
+
+ return symbol__disassemble(sym, &args);
+}
+
+static void insert_source_line(struct rb_root *root, struct annotation_line *al,
+ struct annotation_options *opts)
+{
+ struct annotation_line *iter;
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ int i, ret;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct annotation_line, rb_node);
+
+ ret = strcmp(iter->path, al->path);
+ if (ret == 0) {
+ for (i = 0; i < al->data_nr; i++) {
+ iter->data[i].percent_sum += annotation_data__percent(&al->data[i],
+ opts->percent_type);
+ }
+ return;
+ }
+
+ if (ret < 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ for (i = 0; i < al->data_nr; i++) {
+ al->data[i].percent_sum = annotation_data__percent(&al->data[i],
+ opts->percent_type);
+ }
+
+ rb_link_node(&al->rb_node, parent, p);
+ rb_insert_color(&al->rb_node, root);
+}
+
+static int cmp_source_line(struct annotation_line *a, struct annotation_line *b)
+{
+ int i;
+
+ for (i = 0; i < a->data_nr; i++) {
+ if (a->data[i].percent_sum == b->data[i].percent_sum)
+ continue;
+ return a->data[i].percent_sum > b->data[i].percent_sum;
+ }
+
+ return 0;
+}
+
+static void __resort_source_line(struct rb_root *root, struct annotation_line *al)
+{
+ struct annotation_line *iter;
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct annotation_line, rb_node);
+
+ if (cmp_source_line(al, iter))
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&al->rb_node, parent, p);
+ rb_insert_color(&al->rb_node, root);
+}
+
+static void resort_source_line(struct rb_root *dest_root, struct rb_root *src_root)
+{
+ struct annotation_line *al;
+ struct rb_node *node;
+
+ node = rb_first(src_root);
+ while (node) {
+ struct rb_node *next;
+
+ al = rb_entry(node, struct annotation_line, rb_node);
+ next = rb_next(node);
+ rb_erase(node, src_root);
+
+ __resort_source_line(dest_root, al);
+ node = next;
+ }
+}
+
+static void print_summary(struct rb_root *root, const char *filename)
+{
+ struct annotation_line *al;
+ struct rb_node *node;
+
+ printf("\nSorted summary for file %s\n", filename);
+ printf("----------------------------------------------\n\n");
+
+ if (RB_EMPTY_ROOT(root)) {
+ printf(" Nothing higher than %1.1f%%\n", MIN_GREEN);
+ return;
+ }
+
+ node = rb_first(root);
+ while (node) {
+ double percent, percent_max = 0.0;
+ const char *color;
+ char *path;
+ int i;
+
+ al = rb_entry(node, struct annotation_line, rb_node);
+ for (i = 0; i < al->data_nr; i++) {
+ percent = al->data[i].percent_sum;
+ color = get_percent_color(percent);
+ color_fprintf(stdout, color, " %7.2f", percent);
+
+ if (percent > percent_max)
+ percent_max = percent;
+ }
+
+ path = al->path;
+ color = get_percent_color(percent_max);
+ color_fprintf(stdout, color, " %s\n", path);
+
+ node = rb_next(node);
+ }
+}
+
+static void symbol__annotate_hits(struct symbol *sym, struct perf_evsel *evsel)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evsel->idx);
+ u64 len = symbol__size(sym), offset;
+
+ for (offset = 0; offset < len; ++offset)
+ if (h->addr[offset].nr_samples != 0)
+ printf("%*" PRIx64 ": %" PRIu64 "\n", BITS_PER_LONG / 2,
+ sym->start + offset, h->addr[offset].nr_samples);
+ printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples);
+}
+
+static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start)
+{
+ char bf[32];
+ struct annotation_line *line;
+
+ list_for_each_entry_reverse(line, lines, node) {
+ if (line->offset != -1)
+ return scnprintf(bf, sizeof(bf), "%" PRIx64, start + line->offset);
+ }
+
+ return 0;
+}
+
+int symbol__annotate_printf(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel,
+ struct annotation_options *opts)
+{
+ struct dso *dso = map->dso;
+ char *filename;
+ const char *d_filename;
+ const char *evsel_name = perf_evsel__name(evsel);
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evsel->idx);
+ struct annotation_line *pos, *queue = NULL;
+ u64 start = map__rip_2objdump(map, sym->start);
+ int printed = 2, queue_len = 0, addr_fmt_width;
+ int more = 0;
+ bool context = opts->context;
+ u64 len;
+ int width = symbol_conf.show_total_period ? 12 : 8;
+ int graph_dotted_len;
+ char buf[512];
+
+ filename = strdup(dso->long_name);
+ if (!filename)
+ return -ENOMEM;
+
+ if (opts->full_path)
+ d_filename = filename;
+ else
+ d_filename = basename(filename);
+
+ len = symbol__size(sym);
+
+ if (perf_evsel__is_group_event(evsel)) {
+ width *= evsel->nr_members;
+ perf_evsel__group_desc(evsel, buf, sizeof(buf));
+ evsel_name = buf;
+ }
+
+ graph_dotted_len = printf(" %-*.*s| Source code & Disassembly of %s for %s (%" PRIu64 " samples, "
+ "percent: %s)\n",
+ width, width, symbol_conf.show_total_period ? "Period" :
+ symbol_conf.show_nr_samples ? "Samples" : "Percent",
+ d_filename, evsel_name, h->nr_samples,
+ percent_type_str(opts->percent_type));
+
+ printf("%-*.*s----\n",
+ graph_dotted_len, graph_dotted_len, graph_dotted_line);
+
+ if (verbose > 0)
+ symbol__annotate_hits(sym, evsel);
+
+ addr_fmt_width = annotated_source__addr_fmt_width(&notes->src->source, start);
+
+ list_for_each_entry(pos, &notes->src->source, node) {
+ int err;
+
+ if (context && queue == NULL) {
+ queue = pos;
+ queue_len = 0;
+ }
+
+ err = annotation_line__print(pos, sym, start, evsel, len,
+ opts->min_pcnt, printed, opts->max_lines,
+ queue, addr_fmt_width, opts->percent_type);
+
+ switch (err) {
+ case 0:
+ ++printed;
+ if (context) {
+ printed += queue_len;
+ queue = NULL;
+ queue_len = 0;
+ }
+ break;
+ case 1:
+ /* filtered by max_lines */
+ ++more;
+ break;
+ case -1:
+ default:
+ /*
+ * Filtered by min_pcnt or non IP lines when
+ * context != 0
+ */
+ if (!context)
+ break;
+ if (queue_len == context)
+ queue = list_entry(queue->node.next, typeof(*queue), node);
+ else
+ ++queue_len;
+ break;
+ }
+ }
+
+ free(filename);
+
+ return more;
+}
+
+static void FILE__set_percent_color(void *fp __maybe_unused,
+ double percent __maybe_unused,
+ bool current __maybe_unused)
+{
+}
+
+static int FILE__set_jumps_percent_color(void *fp __maybe_unused,
+ int nr __maybe_unused, bool current __maybe_unused)
+{
+ return 0;
+}
+
+static int FILE__set_color(void *fp __maybe_unused, int color __maybe_unused)
+{
+ return 0;
+}
+
+static void FILE__printf(void *fp, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vfprintf(fp, fmt, args);
+ va_end(args);
+}
+
+static void FILE__write_graph(void *fp, int graph)
+{
+ const char *s;
+ switch (graph) {
+
+ case DARROW_CHAR: s = "↓"; break;
+ case UARROW_CHAR: s = "↑"; break;
+ case LARROW_CHAR: s = "←"; break;
+ case RARROW_CHAR: s = "→"; break;
+ default: s = "?"; break;
+ }
+
+ fputs(s, fp);
+}
+
+static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp,
+ struct annotation_options *opts)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct annotation_write_ops wops = {
+ .first_line = true,
+ .obj = fp,
+ .set_color = FILE__set_color,
+ .set_percent_color = FILE__set_percent_color,
+ .set_jumps_percent_color = FILE__set_jumps_percent_color,
+ .printf = FILE__printf,
+ .write_graph = FILE__write_graph,
+ };
+ struct annotation_line *al;
+
+ list_for_each_entry(al, &notes->src->source, node) {
+ if (annotation_line__filter(al, notes))
+ continue;
+ annotation_line__write(al, notes, &wops, opts);
+ fputc('\n', fp);
+ wops.first_line = false;
+ }
+
+ return 0;
+}
+
+int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel,
+ struct annotation_options *opts)
+{
+ const char *ev_name = perf_evsel__name(evsel);
+ char buf[1024];
+ char *filename;
+ int err = -1;
+ FILE *fp;
+
+ if (asprintf(&filename, "%s.annotation", ms->sym->name) < 0)
+ return -1;
+
+ fp = fopen(filename, "w");
+ if (fp == NULL)
+ goto out_free_filename;
+
+ if (perf_evsel__is_group_event(evsel)) {
+ perf_evsel__group_desc(evsel, buf, sizeof(buf));
+ ev_name = buf;
+ }
+
+ fprintf(fp, "%s() %s\nEvent: %s\n\n",
+ ms->sym->name, ms->map->dso->long_name, ev_name);
+ symbol__annotate_fprintf2(ms->sym, fp, opts);
+
+ fclose(fp);
+ err = 0;
+out_free_filename:
+ free(filename);
+ return err;
+}
+
+void symbol__annotate_zero_histogram(struct symbol *sym, int evidx)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+
+ memset(h, 0, notes->src->sizeof_sym_hist);
+}
+
+void symbol__annotate_decay_histogram(struct symbol *sym, int evidx)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct sym_hist *h = annotation__histogram(notes, evidx);
+ int len = symbol__size(sym), offset;
+
+ h->nr_samples = 0;
+ for (offset = 0; offset < len; ++offset) {
+ h->addr[offset].nr_samples = h->addr[offset].nr_samples * 7 / 8;
+ h->nr_samples += h->addr[offset].nr_samples;
+ }
+}
+
+void annotated_source__purge(struct annotated_source *as)
+{
+ struct annotation_line *al, *n;
+
+ list_for_each_entry_safe(al, n, &as->source, node) {
+ list_del(&al->node);
+ disasm_line__free(disasm_line(al));
+ }
+}
+
+static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp)
+{
+ size_t printed;
+
+ if (dl->al.offset == -1)
+ return fprintf(fp, "%s\n", dl->al.line);
+
+ printed = fprintf(fp, "%#" PRIx64 " %s", dl->al.offset, dl->ins.name);
+
+ if (dl->ops.raw[0] != '\0') {
+ printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ",
+ dl->ops.raw);
+ }
+
+ return printed + fprintf(fp, "\n");
+}
+
+size_t disasm__fprintf(struct list_head *head, FILE *fp)
+{
+ struct disasm_line *pos;
+ size_t printed = 0;
+
+ list_for_each_entry(pos, head, al.node)
+ printed += disasm_line__fprintf(pos, fp);
+
+ return printed;
+}
+
+bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym)
+{
+ if (!dl || !dl->ins.ops || !ins__is_jump(&dl->ins) ||
+ !disasm_line__has_local_offset(dl) || dl->ops.target.offset < 0 ||
+ dl->ops.target.offset >= (s64)symbol__size(sym))
+ return false;
+
+ return true;
+}
+
+void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
+{
+ u64 offset, size = symbol__size(sym);
+
+ /* PLT symbols contain external offsets */
+ if (strstr(sym->name, "@plt"))
+ return;
+
+ for (offset = 0; offset < size; ++offset) {
+ struct annotation_line *al = notes->offsets[offset];
+ struct disasm_line *dl;
+
+ dl = disasm_line(al);
+
+ if (!disasm_line__is_valid_local_jump(dl, sym))
+ continue;
+
+ al = notes->offsets[dl->ops.target.offset];
+
+ /*
+ * FIXME: Oops, no jump target? Buggy disassembler? Or do we
+ * have to adjust to the previous offset?
+ */
+ if (al == NULL)
+ continue;
+
+ if (++al->jump_sources > notes->max_jump_sources)
+ notes->max_jump_sources = al->jump_sources;
+
+ ++notes->nr_jumps;
+ }
+}
+
+void annotation__set_offsets(struct annotation *notes, s64 size)
+{
+ struct annotation_line *al;
+
+ notes->max_line_len = 0;
+
+ list_for_each_entry(al, &notes->src->source, node) {
+ size_t line_len = strlen(al->line);
+
+ if (notes->max_line_len < line_len)
+ notes->max_line_len = line_len;
+ al->idx = notes->nr_entries++;
+ if (al->offset != -1) {
+ al->idx_asm = notes->nr_asm_entries++;
+ /*
+ * FIXME: short term bandaid to cope with assembly
+ * routines that comes with labels in the same column
+ * as the address in objdump, sigh.
+ *
+ * E.g. copy_user_generic_unrolled
+ */
+ if (al->offset < size)
+ notes->offsets[al->offset] = al;
+ } else
+ al->idx_asm = -1;
+ }
+}
+
+static inline int width_jumps(int n)
+{
+ if (n >= 100)
+ return 5;
+ if (n / 10)
+ return 2;
+ return 1;
+}
+
+void annotation__init_column_widths(struct annotation *notes, struct symbol *sym)
+{
+ notes->widths.addr = notes->widths.target =
+ notes->widths.min_addr = hex_width(symbol__size(sym));
+ notes->widths.max_addr = hex_width(sym->end);
+ notes->widths.jumps = width_jumps(notes->max_jump_sources);
+}
+
+void annotation__update_column_widths(struct annotation *notes)
+{
+ if (notes->options->use_offset)
+ notes->widths.target = notes->widths.min_addr;
+ else
+ notes->widths.target = notes->widths.max_addr;
+
+ notes->widths.addr = notes->widths.target;
+
+ if (notes->options->show_nr_jumps)
+ notes->widths.addr += notes->widths.jumps + 1;
+}
+
+static void annotation__calc_lines(struct annotation *notes, struct map *map,
+ struct rb_root *root,
+ struct annotation_options *opts)
+{
+ struct annotation_line *al;
+ struct rb_root tmp_root = RB_ROOT;
+
+ list_for_each_entry(al, &notes->src->source, node) {
+ double percent_max = 0.0;
+ int i;
+
+ for (i = 0; i < al->data_nr; i++) {
+ double percent;
+
+ percent = annotation_data__percent(&al->data[i],
+ opts->percent_type);
+
+ if (percent > percent_max)
+ percent_max = percent;
+ }
+
+ if (percent_max <= 0.5)
+ continue;
+
+ al->path = get_srcline(map->dso, notes->start + al->offset, NULL,
+ false, true, notes->start + al->offset);
+ insert_source_line(&tmp_root, al, opts);
+ }
+
+ resort_source_line(root, &tmp_root);
+}
+
+static void symbol__calc_lines(struct symbol *sym, struct map *map,
+ struct rb_root *root,
+ struct annotation_options *opts)
+{
+ struct annotation *notes = symbol__annotation(sym);
+
+ annotation__calc_lines(notes, map, root, opts);
+}
+
+int symbol__tty_annotate2(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel,
+ struct annotation_options *opts)
+{
+ struct dso *dso = map->dso;
+ struct rb_root source_line = RB_ROOT;
+ struct hists *hists = evsel__hists(evsel);
+ char buf[1024];
+
+ if (symbol__annotate2(sym, map, evsel, opts, NULL) < 0)
+ return -1;
+
+ if (opts->print_lines) {
+ srcline_full_filename = opts->full_path;
+ symbol__calc_lines(sym, map, &source_line, opts);
+ print_summary(&source_line, dso->long_name);
+ }
+
+ hists__scnprintf_title(hists, buf, sizeof(buf));
+ fprintf(stdout, "%s, [percent: %s]\n%s() %s\n",
+ buf, percent_type_str(opts->percent_type), sym->name, dso->long_name);
+ symbol__annotate_fprintf2(sym, stdout, opts);
+
+ annotated_source__purge(symbol__annotation(sym)->src);
+
+ return 0;
+}
+
+int symbol__tty_annotate(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel,
+ struct annotation_options *opts)
+{
+ struct dso *dso = map->dso;
+ struct rb_root source_line = RB_ROOT;
+
+ if (symbol__annotate(sym, map, evsel, 0, opts, NULL) < 0)
+ return -1;
+
+ symbol__calc_percent(sym, evsel);
+
+ if (opts->print_lines) {
+ srcline_full_filename = opts->full_path;
+ symbol__calc_lines(sym, map, &source_line, opts);
+ print_summary(&source_line, dso->long_name);
+ }
+
+ symbol__annotate_printf(sym, map, evsel, opts);
+
+ annotated_source__purge(symbol__annotation(sym)->src);
+
+ return 0;
+}
+
+bool ui__has_annotation(void)
+{
+ return use_browser == 1 && perf_hpp_list.sym;
+}
+
+
+static double annotation_line__max_percent(struct annotation_line *al,
+ struct annotation *notes,
+ unsigned int percent_type)
+{
+ double percent_max = 0.0;
+ int i;
+
+ for (i = 0; i < notes->nr_events; i++) {
+ double percent;
+
+ percent = annotation_data__percent(&al->data[i],
+ percent_type);
+
+ if (percent > percent_max)
+ percent_max = percent;
+ }
+
+ return percent_max;
+}
+
+static void disasm_line__write(struct disasm_line *dl, struct annotation *notes,
+ void *obj, char *bf, size_t size,
+ void (*obj__printf)(void *obj, const char *fmt, ...),
+ void (*obj__write_graph)(void *obj, int graph))
+{
+ if (dl->ins.ops && dl->ins.ops->scnprintf) {
+ if (ins__is_jump(&dl->ins)) {
+ bool fwd;
+
+ if (dl->ops.target.outside)
+ goto call_like;
+ fwd = dl->ops.target.offset > dl->al.offset;
+ obj__write_graph(obj, fwd ? DARROW_CHAR : UARROW_CHAR);
+ obj__printf(obj, " ");
+ } else if (ins__is_call(&dl->ins)) {
+call_like:
+ obj__write_graph(obj, RARROW_CHAR);
+ obj__printf(obj, " ");
+ } else if (ins__is_ret(&dl->ins)) {
+ obj__write_graph(obj, LARROW_CHAR);
+ obj__printf(obj, " ");
+ } else {
+ obj__printf(obj, " ");
+ }
+ } else {
+ obj__printf(obj, " ");
+ }
+
+ disasm_line__scnprintf(dl, bf, size, !notes->options->use_offset);
+}
+
+static void __annotation_line__write(struct annotation_line *al, struct annotation *notes,
+ bool first_line, bool current_entry, bool change_color, int width,
+ void *obj, unsigned int percent_type,
+ int (*obj__set_color)(void *obj, int color),
+ void (*obj__set_percent_color)(void *obj, double percent, bool current),
+ int (*obj__set_jumps_percent_color)(void *obj, int nr, bool current),
+ void (*obj__printf)(void *obj, const char *fmt, ...),
+ void (*obj__write_graph)(void *obj, int graph))
+
+{
+ double percent_max = annotation_line__max_percent(al, notes, percent_type);
+ int pcnt_width = annotation__pcnt_width(notes),
+ cycles_width = annotation__cycles_width(notes);
+ bool show_title = false;
+ char bf[256];
+ int printed;
+
+ if (first_line && (al->offset == -1 || percent_max == 0.0)) {
+ if (notes->have_cycles) {
+ if (al->ipc == 0.0 && al->cycles == 0)
+ show_title = true;
+ } else
+ show_title = true;
+ }
+
+ if (al->offset != -1 && percent_max != 0.0) {
+ int i;
+
+ for (i = 0; i < notes->nr_events; i++) {
+ double percent;
+
+ percent = annotation_data__percent(&al->data[i], percent_type);
+
+ obj__set_percent_color(obj, percent, current_entry);
+ if (notes->options->show_total_period) {
+ obj__printf(obj, "%11" PRIu64 " ", al->data[i].he.period);
+ } else if (notes->options->show_nr_samples) {
+ obj__printf(obj, "%6" PRIu64 " ",
+ al->data[i].he.nr_samples);
+ } else {
+ obj__printf(obj, "%6.2f ", percent);
+ }
+ }
+ } else {
+ obj__set_percent_color(obj, 0, current_entry);
+
+ if (!show_title)
+ obj__printf(obj, "%-*s", pcnt_width, " ");
+ else {
+ obj__printf(obj, "%-*s", pcnt_width,
+ notes->options->show_total_period ? "Period" :
+ notes->options->show_nr_samples ? "Samples" : "Percent");
+ }
+ }
+
+ if (notes->have_cycles) {
+ if (al->ipc)
+ obj__printf(obj, "%*.2f ", ANNOTATION__IPC_WIDTH - 1, al->ipc);
+ else if (!show_title)
+ obj__printf(obj, "%*s", ANNOTATION__IPC_WIDTH, " ");
+ else
+ obj__printf(obj, "%*s ", ANNOTATION__IPC_WIDTH - 1, "IPC");
+
+ if (!notes->options->show_minmax_cycle) {
+ if (al->cycles)
+ obj__printf(obj, "%*" PRIu64 " ",
+ ANNOTATION__CYCLES_WIDTH - 1, al->cycles);
+ else if (!show_title)
+ obj__printf(obj, "%*s",
+ ANNOTATION__CYCLES_WIDTH, " ");
+ else
+ obj__printf(obj, "%*s ",
+ ANNOTATION__CYCLES_WIDTH - 1,
+ "Cycle");
+ } else {
+ if (al->cycles) {
+ char str[32];
+
+ scnprintf(str, sizeof(str),
+ "%" PRIu64 "(%" PRIu64 "/%" PRIu64 ")",
+ al->cycles, al->cycles_min,
+ al->cycles_max);
+
+ obj__printf(obj, "%*s ",
+ ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
+ str);
+ } else if (!show_title)
+ obj__printf(obj, "%*s",
+ ANNOTATION__MINMAX_CYCLES_WIDTH,
+ " ");
+ else
+ obj__printf(obj, "%*s ",
+ ANNOTATION__MINMAX_CYCLES_WIDTH - 1,
+ "Cycle(min/max)");
+ }
+ }
+
+ obj__printf(obj, " ");
+
+ if (!*al->line)
+ obj__printf(obj, "%-*s", width - pcnt_width - cycles_width, " ");
+ else if (al->offset == -1) {
+ if (al->line_nr && notes->options->show_linenr)
+ printed = scnprintf(bf, sizeof(bf), "%-*d ", notes->widths.addr + 1, al->line_nr);
+ else
+ printed = scnprintf(bf, sizeof(bf), "%-*s ", notes->widths.addr, " ");
+ obj__printf(obj, bf);
+ obj__printf(obj, "%-*s", width - printed - pcnt_width - cycles_width + 1, al->line);
+ } else {
+ u64 addr = al->offset;
+ int color = -1;
+
+ if (!notes->options->use_offset)
+ addr += notes->start;
+
+ if (!notes->options->use_offset) {
+ printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr);
+ } else {
+ if (al->jump_sources &&
+ notes->options->offset_level >= ANNOTATION__OFFSET_JUMP_TARGETS) {
+ if (notes->options->show_nr_jumps) {
+ int prev;
+ printed = scnprintf(bf, sizeof(bf), "%*d ",
+ notes->widths.jumps,
+ al->jump_sources);
+ prev = obj__set_jumps_percent_color(obj, al->jump_sources,
+ current_entry);
+ obj__printf(obj, bf);
+ obj__set_color(obj, prev);
+ }
+print_addr:
+ printed = scnprintf(bf, sizeof(bf), "%*" PRIx64 ": ",
+ notes->widths.target, addr);
+ } else if (ins__is_call(&disasm_line(al)->ins) &&
+ notes->options->offset_level >= ANNOTATION__OFFSET_CALL) {
+ goto print_addr;
+ } else if (notes->options->offset_level == ANNOTATION__MAX_OFFSET_LEVEL) {
+ goto print_addr;
+ } else {
+ printed = scnprintf(bf, sizeof(bf), "%-*s ",
+ notes->widths.addr, " ");
+ }
+ }
+
+ if (change_color)
+ color = obj__set_color(obj, HE_COLORSET_ADDR);
+ obj__printf(obj, bf);
+ if (change_color)
+ obj__set_color(obj, color);
+
+ disasm_line__write(disasm_line(al), notes, obj, bf, sizeof(bf), obj__printf, obj__write_graph);
+
+ obj__printf(obj, "%-*s", width - pcnt_width - cycles_width - 3 - printed, bf);
+ }
+
+}
+
+void annotation_line__write(struct annotation_line *al, struct annotation *notes,
+ struct annotation_write_ops *wops,
+ struct annotation_options *opts)
+{
+ __annotation_line__write(al, notes, wops->first_line, wops->current_entry,
+ wops->change_color, wops->width, wops->obj,
+ opts->percent_type,
+ wops->set_color, wops->set_percent_color,
+ wops->set_jumps_percent_color, wops->printf,
+ wops->write_graph);
+}
+
+int symbol__annotate2(struct symbol *sym, struct map *map, struct perf_evsel *evsel,
+ struct annotation_options *options, struct arch **parch)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ size_t size = symbol__size(sym);
+ int nr_pcnt = 1, err;
+
+ notes->offsets = zalloc(size * sizeof(struct annotation_line *));
+ if (notes->offsets == NULL)
+ return ENOMEM;
+
+ if (perf_evsel__is_group_event(evsel))
+ nr_pcnt = evsel->nr_members;
+
+ err = symbol__annotate(sym, map, evsel, 0, options, parch);
+ if (err)
+ goto out_free_offsets;
+
+ notes->options = options;
+
+ symbol__calc_percent(sym, evsel);
+
+ annotation__set_offsets(notes, size);
+ annotation__mark_jump_targets(notes, sym);
+ annotation__compute_ipc(notes, size);
+ annotation__init_column_widths(notes, sym);
+ notes->nr_events = nr_pcnt;
+
+ annotation__update_column_widths(notes);
+
+ return 0;
+
+out_free_offsets:
+ zfree(&notes->offsets);
+ return err;
+}
+
+#define ANNOTATION__CFG(n) \
+ { .name = #n, .value = &annotation__default_options.n, }
+
+/*
+ * Keep the entries sorted, they are bsearch'ed
+ */
+static struct annotation_config {
+ const char *name;
+ void *value;
+} annotation__configs[] = {
+ ANNOTATION__CFG(hide_src_code),
+ ANNOTATION__CFG(jump_arrows),
+ ANNOTATION__CFG(offset_level),
+ ANNOTATION__CFG(show_linenr),
+ ANNOTATION__CFG(show_nr_jumps),
+ ANNOTATION__CFG(show_nr_samples),
+ ANNOTATION__CFG(show_total_period),
+ ANNOTATION__CFG(use_offset),
+};
+
+#undef ANNOTATION__CFG
+
+static int annotation_config__cmp(const void *name, const void *cfgp)
+{
+ const struct annotation_config *cfg = cfgp;
+
+ return strcmp(name, cfg->name);
+}
+
+static int annotation__config(const char *var, const char *value,
+ void *data __maybe_unused)
+{
+ struct annotation_config *cfg;
+ const char *name;
+
+ if (!strstarts(var, "annotate."))
+ return 0;
+
+ name = var + 9;
+ cfg = bsearch(name, annotation__configs, ARRAY_SIZE(annotation__configs),
+ sizeof(struct annotation_config), annotation_config__cmp);
+
+ if (cfg == NULL)
+ pr_debug("%s variable unknown, ignoring...", var);
+ else if (strcmp(var, "annotate.offset_level") == 0) {
+ perf_config_int(cfg->value, name, value);
+
+ if (*(int *)cfg->value > ANNOTATION__MAX_OFFSET_LEVEL)
+ *(int *)cfg->value = ANNOTATION__MAX_OFFSET_LEVEL;
+ else if (*(int *)cfg->value < ANNOTATION__MIN_OFFSET_LEVEL)
+ *(int *)cfg->value = ANNOTATION__MIN_OFFSET_LEVEL;
+ } else {
+ *(bool *)cfg->value = perf_config_bool(name, value);
+ }
+ return 0;
+}
+
+void annotation_config__init(void)
+{
+ perf_config(annotation__config, NULL);
+
+ annotation__default_options.show_total_period = symbol_conf.show_total_period;
+ annotation__default_options.show_nr_samples = symbol_conf.show_nr_samples;
+}
+
+static unsigned int parse_percent_type(char *str1, char *str2)
+{
+ unsigned int type = (unsigned int) -1;
+
+ if (!strcmp("period", str1)) {
+ if (!strcmp("local", str2))
+ type = PERCENT_PERIOD_LOCAL;
+ else if (!strcmp("global", str2))
+ type = PERCENT_PERIOD_GLOBAL;
+ }
+
+ if (!strcmp("hits", str1)) {
+ if (!strcmp("local", str2))
+ type = PERCENT_HITS_LOCAL;
+ else if (!strcmp("global", str2))
+ type = PERCENT_HITS_GLOBAL;
+ }
+
+ return type;
+}
+
+int annotate_parse_percent_type(const struct option *opt, const char *_str,
+ int unset __maybe_unused)
+{
+ struct annotation_options *opts = opt->value;
+ unsigned int type;
+ char *str1, *str2;
+ int err = -1;
+
+ str1 = strdup(_str);
+ if (!str1)
+ return -ENOMEM;
+
+ str2 = strchr(str1, '-');
+ if (!str2)
+ goto out;
+
+ *str2++ = 0;
+
+ type = parse_percent_type(str1, str2);
+ if (type == (unsigned int) -1)
+ type = parse_percent_type(str2, str1);
+ if (type != (unsigned int) -1) {
+ opts->percent_type = type;
+ err = 0;
+ }
+
+out:
+ free(str1);
+ return err;
+}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
new file mode 100644
index 000000000..5399ba232
--- /dev/null
+++ b/tools/perf/util/annotate.h
@@ -0,0 +1,403 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ANNOTATE_H
+#define __PERF_ANNOTATE_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <linux/types.h>
+#include "symbol.h"
+#include "hist.h"
+#include "sort.h"
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <pthread.h>
+#include <asm/bug.h>
+
+struct ins_ops;
+
+struct ins {
+ const char *name;
+ struct ins_ops *ops;
+};
+
+struct ins_operands {
+ char *raw;
+ char *raw_comment;
+ struct {
+ char *raw;
+ char *name;
+ struct symbol *sym;
+ u64 addr;
+ s64 offset;
+ bool offset_avail;
+ bool outside;
+ } target;
+ union {
+ struct {
+ char *raw;
+ char *name;
+ u64 addr;
+ } source;
+ struct {
+ struct ins ins;
+ struct ins_operands *ops;
+ } locked;
+ };
+};
+
+struct arch;
+
+struct ins_ops {
+ void (*free)(struct ins_operands *ops);
+ int (*parse)(struct arch *arch, struct ins_operands *ops, struct map_symbol *ms);
+ int (*scnprintf)(struct ins *ins, char *bf, size_t size,
+ struct ins_operands *ops);
+};
+
+bool ins__is_jump(const struct ins *ins);
+bool ins__is_call(const struct ins *ins);
+bool ins__is_ret(const struct ins *ins);
+bool ins__is_lock(const struct ins *ins);
+int ins__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops);
+bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
+
+#define ANNOTATION__IPC_WIDTH 6
+#define ANNOTATION__CYCLES_WIDTH 6
+#define ANNOTATION__MINMAX_CYCLES_WIDTH 19
+
+struct annotation_options {
+ bool hide_src_code,
+ use_offset,
+ jump_arrows,
+ print_lines,
+ full_path,
+ show_linenr,
+ show_nr_jumps,
+ show_nr_samples,
+ show_total_period,
+ show_minmax_cycle,
+ show_asm_raw,
+ annotate_src;
+ u8 offset_level;
+ int min_pcnt;
+ int max_lines;
+ int context;
+ const char *objdump_path;
+ const char *disassembler_style;
+ unsigned int percent_type;
+};
+
+enum {
+ ANNOTATION__OFFSET_JUMP_TARGETS = 1,
+ ANNOTATION__OFFSET_CALL,
+ ANNOTATION__MAX_OFFSET_LEVEL,
+};
+
+#define ANNOTATION__MIN_OFFSET_LEVEL ANNOTATION__OFFSET_JUMP_TARGETS
+
+extern struct annotation_options annotation__default_options;
+
+struct annotation;
+
+struct sym_hist_entry {
+ u64 nr_samples;
+ u64 period;
+};
+
+enum {
+ PERCENT_HITS_LOCAL,
+ PERCENT_HITS_GLOBAL,
+ PERCENT_PERIOD_LOCAL,
+ PERCENT_PERIOD_GLOBAL,
+ PERCENT_MAX,
+};
+
+struct annotation_data {
+ double percent[PERCENT_MAX];
+ double percent_sum;
+ struct sym_hist_entry he;
+};
+
+struct annotation_line {
+ struct list_head node;
+ struct rb_node rb_node;
+ s64 offset;
+ char *line;
+ int line_nr;
+ int jump_sources;
+ float ipc;
+ u64 cycles;
+ u64 cycles_max;
+ u64 cycles_min;
+ size_t privsize;
+ char *path;
+ u32 idx;
+ int idx_asm;
+ int data_nr;
+ struct annotation_data data[0];
+};
+
+struct disasm_line {
+ struct ins ins;
+ struct ins_operands ops;
+
+ /* This needs to be at the end. */
+ struct annotation_line al;
+};
+
+static inline double annotation_data__percent(struct annotation_data *data,
+ unsigned int which)
+{
+ return which < PERCENT_MAX ? data->percent[which] : -1;
+}
+
+static inline const char *percent_type_str(unsigned int type)
+{
+ static const char *str[PERCENT_MAX] = {
+ "local hits",
+ "global hits",
+ "local period",
+ "global period",
+ };
+
+ if (WARN_ON(type >= PERCENT_MAX))
+ return "N/A";
+
+ return str[type];
+}
+
+static inline struct disasm_line *disasm_line(struct annotation_line *al)
+{
+ return al ? container_of(al, struct disasm_line, al) : NULL;
+}
+
+/*
+ * Is this offset in the same function as the line it is used?
+ * asm functions jump to other functions, for instance.
+ */
+static inline bool disasm_line__has_local_offset(const struct disasm_line *dl)
+{
+ return dl->ops.target.offset_avail && !dl->ops.target.outside;
+}
+
+/*
+ * Can we draw an arrow from the jump to its target, for instance? I.e.
+ * is the jump and its target in the same function?
+ */
+bool disasm_line__is_valid_local_jump(struct disasm_line *dl, struct symbol *sym);
+
+void disasm_line__free(struct disasm_line *dl);
+struct annotation_line *
+annotation_line__next(struct annotation_line *pos, struct list_head *head);
+
+struct annotation_write_ops {
+ bool first_line, current_entry, change_color;
+ int width;
+ void *obj;
+ int (*set_color)(void *obj, int color);
+ void (*set_percent_color)(void *obj, double percent, bool current);
+ int (*set_jumps_percent_color)(void *obj, int nr, bool current);
+ void (*printf)(void *obj, const char *fmt, ...);
+ void (*write_graph)(void *obj, int graph);
+};
+
+void annotation_line__write(struct annotation_line *al, struct annotation *notes,
+ struct annotation_write_ops *ops,
+ struct annotation_options *opts);
+
+int __annotation__scnprintf_samples_period(struct annotation *notes,
+ char *bf, size_t size,
+ struct perf_evsel *evsel,
+ bool show_freq);
+
+int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
+size_t disasm__fprintf(struct list_head *head, FILE *fp);
+void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel);
+
+struct sym_hist {
+ u64 nr_samples;
+ u64 period;
+ struct sym_hist_entry addr[0];
+};
+
+struct cyc_hist {
+ u64 start;
+ u64 cycles;
+ u64 cycles_aggr;
+ u64 cycles_max;
+ u64 cycles_min;
+ u32 num;
+ u32 num_aggr;
+ u8 have_start;
+ /* 1 byte padding */
+ u16 reset;
+};
+
+/** struct annotated_source - symbols with hits have this attached as in sannotation
+ *
+ * @histograms: Array of addr hit histograms per event being monitored
+ * nr_histograms: This may not be the same as evsel->evlist->nr_entries if
+ * we have more than a group in a evlist, where we will want
+ * to see each group separately, that is why symbol__annotate2()
+ * sets src->nr_histograms to evsel->nr_members.
+ * @lines: If 'print_lines' is specified, per source code line percentages
+ * @source: source parsed from a disassembler like objdump -dS
+ * @cyc_hist: Average cycles per basic block
+ *
+ * lines is allocated, percentages calculated and all sorted by percentage
+ * when the annotation is about to be presented, so the percentages are for
+ * one of the entries in the histogram array, i.e. for the event/counter being
+ * presented. It is deallocated right after symbol__{tui,tty,etc}_annotate
+ * returns.
+ */
+struct annotated_source {
+ struct list_head source;
+ int nr_histograms;
+ size_t sizeof_sym_hist;
+ struct cyc_hist *cycles_hist;
+ struct sym_hist *histograms;
+};
+
+struct annotation {
+ pthread_mutex_t lock;
+ u64 max_coverage;
+ u64 start;
+ struct annotation_options *options;
+ struct annotation_line **offsets;
+ int nr_events;
+ int nr_jumps;
+ int max_jump_sources;
+ int nr_entries;
+ int nr_asm_entries;
+ u16 max_line_len;
+ struct {
+ u8 addr;
+ u8 jumps;
+ u8 target;
+ u8 min_addr;
+ u8 max_addr;
+ } widths;
+ bool have_cycles;
+ struct annotated_source *src;
+};
+
+static inline int annotation__cycles_width(struct annotation *notes)
+{
+ if (notes->have_cycles && notes->options->show_minmax_cycle)
+ return ANNOTATION__IPC_WIDTH + ANNOTATION__MINMAX_CYCLES_WIDTH;
+
+ return notes->have_cycles ? ANNOTATION__IPC_WIDTH + ANNOTATION__CYCLES_WIDTH : 0;
+}
+
+static inline int annotation__pcnt_width(struct annotation *notes)
+{
+ return (notes->options->show_total_period ? 12 : 7) * notes->nr_events;
+}
+
+static inline bool annotation_line__filter(struct annotation_line *al, struct annotation *notes)
+{
+ return notes->options->hide_src_code && al->offset == -1;
+}
+
+void annotation__set_offsets(struct annotation *notes, s64 size);
+void annotation__compute_ipc(struct annotation *notes, size_t size);
+void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym);
+void annotation__update_column_widths(struct annotation *notes);
+void annotation__init_column_widths(struct annotation *notes, struct symbol *sym);
+
+static inline struct sym_hist *annotated_source__histogram(struct annotated_source *src, int idx)
+{
+ return ((void *)src->histograms) + (src->sizeof_sym_hist * idx);
+}
+
+static inline struct sym_hist *annotation__histogram(struct annotation *notes, int idx)
+{
+ return annotated_source__histogram(notes->src, idx);
+}
+
+static inline struct annotation *symbol__annotation(struct symbol *sym)
+{
+ return (void *)sym - symbol_conf.priv_size;
+}
+
+int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample,
+ struct perf_evsel *evsel);
+
+int addr_map_symbol__account_cycles(struct addr_map_symbol *ams,
+ struct addr_map_symbol *start,
+ unsigned cycles);
+
+int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *sample,
+ struct perf_evsel *evsel, u64 addr);
+
+struct annotated_source *symbol__hists(struct symbol *sym, int nr_hists);
+void symbol__annotate_zero_histograms(struct symbol *sym);
+
+int symbol__annotate(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel, size_t privsize,
+ struct annotation_options *options,
+ struct arch **parch);
+int symbol__annotate2(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel,
+ struct annotation_options *options,
+ struct arch **parch);
+
+enum symbol_disassemble_errno {
+ SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0,
+
+ /*
+ * Choose an arbitrary negative big number not to clash with standard
+ * errno since SUS requires the errno has distinct positive values.
+ * See 'Issue 6' in the link below.
+ *
+ * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+ */
+ __SYMBOL_ANNOTATE_ERRNO__START = -10000,
+
+ SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX = __SYMBOL_ANNOTATE_ERRNO__START,
+
+ __SYMBOL_ANNOTATE_ERRNO__END,
+};
+
+int symbol__strerror_disassemble(struct symbol *sym, struct map *map,
+ int errnum, char *buf, size_t buflen);
+
+int symbol__annotate_printf(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel,
+ struct annotation_options *options);
+void symbol__annotate_zero_histogram(struct symbol *sym, int evidx);
+void symbol__annotate_decay_histogram(struct symbol *sym, int evidx);
+void annotated_source__purge(struct annotated_source *as);
+
+int map_symbol__annotation_dump(struct map_symbol *ms, struct perf_evsel *evsel,
+ struct annotation_options *opts);
+
+bool ui__has_annotation(void);
+
+int symbol__tty_annotate(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel, struct annotation_options *opts);
+
+int symbol__tty_annotate2(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel, struct annotation_options *opts);
+
+#ifdef HAVE_SLANG_SUPPORT
+int symbol__tui_annotate(struct symbol *sym, struct map *map,
+ struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt,
+ struct annotation_options *opts);
+#else
+static inline int symbol__tui_annotate(struct symbol *sym __maybe_unused,
+ struct map *map __maybe_unused,
+ struct perf_evsel *evsel __maybe_unused,
+ struct hist_browser_timer *hbt __maybe_unused,
+ struct annotation_options *opts __maybe_unused)
+{
+ return 0;
+}
+#endif
+
+void annotation_config__init(void);
+
+int annotate_parse_percent_type(const struct option *opt, const char *_str,
+ int unset);
+#endif /* __PERF_ANNOTATE_H */
diff --git a/tools/perf/util/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-pkt-decoder.c
new file mode 100644
index 000000000..b94001b75
--- /dev/null
+++ b/tools/perf/util/arm-spe-pkt-decoder.c
@@ -0,0 +1,462 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+
+#include "arm-spe-pkt-decoder.h"
+
+#define BIT(n) (1ULL << (n))
+
+#define NS_FLAG BIT(63)
+#define EL_FLAG (BIT(62) | BIT(61))
+
+#define SPE_HEADER0_PAD 0x0
+#define SPE_HEADER0_END 0x1
+#define SPE_HEADER0_ADDRESS 0x30 /* address packet (short) */
+#define SPE_HEADER0_ADDRESS_MASK 0x38
+#define SPE_HEADER0_COUNTER 0x18 /* counter packet (short) */
+#define SPE_HEADER0_COUNTER_MASK 0x38
+#define SPE_HEADER0_TIMESTAMP 0x71
+#define SPE_HEADER0_TIMESTAMP 0x71
+#define SPE_HEADER0_EVENTS 0x2
+#define SPE_HEADER0_EVENTS_MASK 0xf
+#define SPE_HEADER0_SOURCE 0x3
+#define SPE_HEADER0_SOURCE_MASK 0xf
+#define SPE_HEADER0_CONTEXT 0x24
+#define SPE_HEADER0_CONTEXT_MASK 0x3c
+#define SPE_HEADER0_OP_TYPE 0x8
+#define SPE_HEADER0_OP_TYPE_MASK 0x3c
+#define SPE_HEADER1_ALIGNMENT 0x0
+#define SPE_HEADER1_ADDRESS 0xb0 /* address packet (extended) */
+#define SPE_HEADER1_ADDRESS_MASK 0xf8
+#define SPE_HEADER1_COUNTER 0x98 /* counter packet (extended) */
+#define SPE_HEADER1_COUNTER_MASK 0xf8
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define le16_to_cpu bswap_16
+#define le32_to_cpu bswap_32
+#define le64_to_cpu bswap_64
+#define memcpy_le64(d, s, n) do { \
+ memcpy((d), (s), (n)); \
+ *(d) = le64_to_cpu(*(d)); \
+} while (0)
+#else
+#define le16_to_cpu
+#define le32_to_cpu
+#define le64_to_cpu
+#define memcpy_le64 memcpy
+#endif
+
+static const char * const arm_spe_packet_name[] = {
+ [ARM_SPE_PAD] = "PAD",
+ [ARM_SPE_END] = "END",
+ [ARM_SPE_TIMESTAMP] = "TS",
+ [ARM_SPE_ADDRESS] = "ADDR",
+ [ARM_SPE_COUNTER] = "LAT",
+ [ARM_SPE_CONTEXT] = "CONTEXT",
+ [ARM_SPE_OP_TYPE] = "OP-TYPE",
+ [ARM_SPE_EVENTS] = "EVENTS",
+ [ARM_SPE_DATA_SOURCE] = "DATA-SOURCE",
+};
+
+const char *arm_spe_pkt_name(enum arm_spe_pkt_type type)
+{
+ return arm_spe_packet_name[type];
+}
+
+/* return ARM SPE payload size from its encoding,
+ * which is in bits 5:4 of the byte.
+ * 00 : byte
+ * 01 : halfword (2)
+ * 10 : word (4)
+ * 11 : doubleword (8)
+ */
+static int payloadlen(unsigned char byte)
+{
+ return 1 << ((byte & 0x30) >> 4);
+}
+
+static int arm_spe_get_payload(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ size_t payload_len = payloadlen(buf[0]);
+
+ if (len < 1 + payload_len)
+ return ARM_SPE_NEED_MORE_BYTES;
+
+ buf++;
+
+ switch (payload_len) {
+ case 1: packet->payload = *(uint8_t *)buf; break;
+ case 2: packet->payload = le16_to_cpu(*(uint16_t *)buf); break;
+ case 4: packet->payload = le32_to_cpu(*(uint32_t *)buf); break;
+ case 8: packet->payload = le64_to_cpu(*(uint64_t *)buf); break;
+ default: return ARM_SPE_BAD_PACKET;
+ }
+
+ return 1 + payload_len;
+}
+
+static int arm_spe_get_pad(struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_PAD;
+ return 1;
+}
+
+static int arm_spe_get_alignment(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ unsigned int alignment = 1 << ((buf[0] & 0xf) + 1);
+
+ if (len < alignment)
+ return ARM_SPE_NEED_MORE_BYTES;
+
+ packet->type = ARM_SPE_PAD;
+ return alignment - (((uintptr_t)buf) & (alignment - 1));
+}
+
+static int arm_spe_get_end(struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_END;
+ return 1;
+}
+
+static int arm_spe_get_timestamp(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_TIMESTAMP;
+ return arm_spe_get_payload(buf, len, packet);
+}
+
+static int arm_spe_get_events(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ int ret = arm_spe_get_payload(buf, len, packet);
+
+ packet->type = ARM_SPE_EVENTS;
+
+ /* we use index to identify Events with a less number of
+ * comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS,
+ * LLC-REFILL, and REMOTE-ACCESS events are identified iff
+ * index > 1.
+ */
+ packet->index = ret - 1;
+
+ return ret;
+}
+
+static int arm_spe_get_data_source(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_DATA_SOURCE;
+ return arm_spe_get_payload(buf, len, packet);
+}
+
+static int arm_spe_get_context(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_CONTEXT;
+ packet->index = buf[0] & 0x3;
+
+ return arm_spe_get_payload(buf, len, packet);
+}
+
+static int arm_spe_get_op_type(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ packet->type = ARM_SPE_OP_TYPE;
+ packet->index = buf[0] & 0x3;
+ return arm_spe_get_payload(buf, len, packet);
+}
+
+static int arm_spe_get_counter(const unsigned char *buf, size_t len,
+ const unsigned char ext_hdr, struct arm_spe_pkt *packet)
+{
+ if (len < 2)
+ return ARM_SPE_NEED_MORE_BYTES;
+
+ packet->type = ARM_SPE_COUNTER;
+ if (ext_hdr)
+ packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
+ else
+ packet->index = buf[0] & 0x7;
+
+ packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
+
+ return 1 + ext_hdr + 2;
+}
+
+static int arm_spe_get_addr(const unsigned char *buf, size_t len,
+ const unsigned char ext_hdr, struct arm_spe_pkt *packet)
+{
+ if (len < 8)
+ return ARM_SPE_NEED_MORE_BYTES;
+
+ packet->type = ARM_SPE_ADDRESS;
+ if (ext_hdr)
+ packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7);
+ else
+ packet->index = buf[0] & 0x7;
+
+ memcpy_le64(&packet->payload, buf + 1, 8);
+
+ return 1 + ext_hdr + 8;
+}
+
+static int arm_spe_do_get_packet(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ unsigned int byte;
+
+ memset(packet, 0, sizeof(struct arm_spe_pkt));
+
+ if (!len)
+ return ARM_SPE_NEED_MORE_BYTES;
+
+ byte = buf[0];
+ if (byte == SPE_HEADER0_PAD)
+ return arm_spe_get_pad(packet);
+ else if (byte == SPE_HEADER0_END) /* no timestamp at end of record */
+ return arm_spe_get_end(packet);
+ else if (byte & 0xc0 /* 0y11xxxxxx */) {
+ if (byte & 0x80) {
+ if ((byte & SPE_HEADER0_ADDRESS_MASK) == SPE_HEADER0_ADDRESS)
+ return arm_spe_get_addr(buf, len, 0, packet);
+ if ((byte & SPE_HEADER0_COUNTER_MASK) == SPE_HEADER0_COUNTER)
+ return arm_spe_get_counter(buf, len, 0, packet);
+ } else
+ if (byte == SPE_HEADER0_TIMESTAMP)
+ return arm_spe_get_timestamp(buf, len, packet);
+ else if ((byte & SPE_HEADER0_EVENTS_MASK) == SPE_HEADER0_EVENTS)
+ return arm_spe_get_events(buf, len, packet);
+ else if ((byte & SPE_HEADER0_SOURCE_MASK) == SPE_HEADER0_SOURCE)
+ return arm_spe_get_data_source(buf, len, packet);
+ else if ((byte & SPE_HEADER0_CONTEXT_MASK) == SPE_HEADER0_CONTEXT)
+ return arm_spe_get_context(buf, len, packet);
+ else if ((byte & SPE_HEADER0_OP_TYPE_MASK) == SPE_HEADER0_OP_TYPE)
+ return arm_spe_get_op_type(buf, len, packet);
+ } else if ((byte & 0xe0) == 0x20 /* 0y001xxxxx */) {
+ /* 16-bit header */
+ byte = buf[1];
+ if (byte == SPE_HEADER1_ALIGNMENT)
+ return arm_spe_get_alignment(buf, len, packet);
+ else if ((byte & SPE_HEADER1_ADDRESS_MASK) == SPE_HEADER1_ADDRESS)
+ return arm_spe_get_addr(buf, len, 1, packet);
+ else if ((byte & SPE_HEADER1_COUNTER_MASK) == SPE_HEADER1_COUNTER)
+ return arm_spe_get_counter(buf, len, 1, packet);
+ }
+
+ return ARM_SPE_BAD_PACKET;
+}
+
+int arm_spe_get_packet(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet)
+{
+ int ret;
+
+ ret = arm_spe_do_get_packet(buf, len, packet);
+ /* put multiple consecutive PADs on the same line, up to
+ * the fixed-width output format of 16 bytes per line.
+ */
+ if (ret > 0 && packet->type == ARM_SPE_PAD) {
+ while (ret < 16 && len > (size_t)ret && !buf[ret])
+ ret += 1;
+ }
+ return ret;
+}
+
+int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf,
+ size_t buf_len)
+{
+ int ret, ns, el, idx = packet->index;
+ unsigned long long payload = packet->payload;
+ const char *name = arm_spe_pkt_name(packet->type);
+
+ switch (packet->type) {
+ case ARM_SPE_BAD:
+ case ARM_SPE_PAD:
+ case ARM_SPE_END:
+ return snprintf(buf, buf_len, "%s", name);
+ case ARM_SPE_EVENTS: {
+ size_t blen = buf_len;
+
+ ret = 0;
+ ret = snprintf(buf, buf_len, "EV");
+ buf += ret;
+ blen -= ret;
+ if (payload & 0x1) {
+ ret = snprintf(buf, buf_len, " EXCEPTION-GEN");
+ buf += ret;
+ blen -= ret;
+ }
+ if (payload & 0x2) {
+ ret = snprintf(buf, buf_len, " RETIRED");
+ buf += ret;
+ blen -= ret;
+ }
+ if (payload & 0x4) {
+ ret = snprintf(buf, buf_len, " L1D-ACCESS");
+ buf += ret;
+ blen -= ret;
+ }
+ if (payload & 0x8) {
+ ret = snprintf(buf, buf_len, " L1D-REFILL");
+ buf += ret;
+ blen -= ret;
+ }
+ if (payload & 0x10) {
+ ret = snprintf(buf, buf_len, " TLB-ACCESS");
+ buf += ret;
+ blen -= ret;
+ }
+ if (payload & 0x20) {
+ ret = snprintf(buf, buf_len, " TLB-REFILL");
+ buf += ret;
+ blen -= ret;
+ }
+ if (payload & 0x40) {
+ ret = snprintf(buf, buf_len, " NOT-TAKEN");
+ buf += ret;
+ blen -= ret;
+ }
+ if (payload & 0x80) {
+ ret = snprintf(buf, buf_len, " MISPRED");
+ buf += ret;
+ blen -= ret;
+ }
+ if (idx > 1) {
+ if (payload & 0x100) {
+ ret = snprintf(buf, buf_len, " LLC-ACCESS");
+ buf += ret;
+ blen -= ret;
+ }
+ if (payload & 0x200) {
+ ret = snprintf(buf, buf_len, " LLC-REFILL");
+ buf += ret;
+ blen -= ret;
+ }
+ if (payload & 0x400) {
+ ret = snprintf(buf, buf_len, " REMOTE-ACCESS");
+ buf += ret;
+ blen -= ret;
+ }
+ }
+ if (ret < 0)
+ return ret;
+ blen -= ret;
+ return buf_len - blen;
+ }
+ case ARM_SPE_OP_TYPE:
+ switch (idx) {
+ case 0: return snprintf(buf, buf_len, "%s", payload & 0x1 ?
+ "COND-SELECT" : "INSN-OTHER");
+ case 1: {
+ size_t blen = buf_len;
+
+ if (payload & 0x1)
+ ret = snprintf(buf, buf_len, "ST");
+ else
+ ret = snprintf(buf, buf_len, "LD");
+ buf += ret;
+ blen -= ret;
+ if (payload & 0x2) {
+ if (payload & 0x4) {
+ ret = snprintf(buf, buf_len, " AT");
+ buf += ret;
+ blen -= ret;
+ }
+ if (payload & 0x8) {
+ ret = snprintf(buf, buf_len, " EXCL");
+ buf += ret;
+ blen -= ret;
+ }
+ if (payload & 0x10) {
+ ret = snprintf(buf, buf_len, " AR");
+ buf += ret;
+ blen -= ret;
+ }
+ } else if (payload & 0x4) {
+ ret = snprintf(buf, buf_len, " SIMD-FP");
+ buf += ret;
+ blen -= ret;
+ }
+ if (ret < 0)
+ return ret;
+ blen -= ret;
+ return buf_len - blen;
+ }
+ case 2: {
+ size_t blen = buf_len;
+
+ ret = snprintf(buf, buf_len, "B");
+ buf += ret;
+ blen -= ret;
+ if (payload & 0x1) {
+ ret = snprintf(buf, buf_len, " COND");
+ buf += ret;
+ blen -= ret;
+ }
+ if (payload & 0x2) {
+ ret = snprintf(buf, buf_len, " IND");
+ buf += ret;
+ blen -= ret;
+ }
+ if (ret < 0)
+ return ret;
+ blen -= ret;
+ return buf_len - blen;
+ }
+ default: return 0;
+ }
+ case ARM_SPE_DATA_SOURCE:
+ case ARM_SPE_TIMESTAMP:
+ return snprintf(buf, buf_len, "%s %lld", name, payload);
+ case ARM_SPE_ADDRESS:
+ switch (idx) {
+ case 0:
+ case 1: ns = !!(packet->payload & NS_FLAG);
+ el = (packet->payload & EL_FLAG) >> 61;
+ payload &= ~(0xffULL << 56);
+ return snprintf(buf, buf_len, "%s 0x%llx el%d ns=%d",
+ (idx == 1) ? "TGT" : "PC", payload, el, ns);
+ case 2: return snprintf(buf, buf_len, "VA 0x%llx", payload);
+ case 3: ns = !!(packet->payload & NS_FLAG);
+ payload &= ~(0xffULL << 56);
+ return snprintf(buf, buf_len, "PA 0x%llx ns=%d",
+ payload, ns);
+ default: return 0;
+ }
+ case ARM_SPE_CONTEXT:
+ return snprintf(buf, buf_len, "%s 0x%lx el%d", name,
+ (unsigned long)payload, idx + 1);
+ case ARM_SPE_COUNTER: {
+ size_t blen = buf_len;
+
+ ret = snprintf(buf, buf_len, "%s %d ", name,
+ (unsigned short)payload);
+ buf += ret;
+ blen -= ret;
+ switch (idx) {
+ case 0: ret = snprintf(buf, buf_len, "TOT"); break;
+ case 1: ret = snprintf(buf, buf_len, "ISSUE"); break;
+ case 2: ret = snprintf(buf, buf_len, "XLAT"); break;
+ default: ret = 0;
+ }
+ if (ret < 0)
+ return ret;
+ blen -= ret;
+ return buf_len - blen;
+ }
+ default:
+ break;
+ }
+
+ return snprintf(buf, buf_len, "%s 0x%llx (%d)",
+ name, payload, packet->index);
+}
diff --git a/tools/perf/util/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-pkt-decoder.h
new file mode 100644
index 000000000..d786ef651
--- /dev/null
+++ b/tools/perf/util/arm-spe-pkt-decoder.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__
+#define INCLUDE__ARM_SPE_PKT_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define ARM_SPE_PKT_DESC_MAX 256
+
+#define ARM_SPE_NEED_MORE_BYTES -1
+#define ARM_SPE_BAD_PACKET -2
+
+enum arm_spe_pkt_type {
+ ARM_SPE_BAD,
+ ARM_SPE_PAD,
+ ARM_SPE_END,
+ ARM_SPE_TIMESTAMP,
+ ARM_SPE_ADDRESS,
+ ARM_SPE_COUNTER,
+ ARM_SPE_CONTEXT,
+ ARM_SPE_OP_TYPE,
+ ARM_SPE_EVENTS,
+ ARM_SPE_DATA_SOURCE,
+};
+
+struct arm_spe_pkt {
+ enum arm_spe_pkt_type type;
+ unsigned char index;
+ uint64_t payload;
+};
+
+const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
+
+int arm_spe_get_packet(const unsigned char *buf, size_t len,
+ struct arm_spe_pkt *packet);
+
+int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len);
+#endif
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
new file mode 100644
index 000000000..6067267cc
--- /dev/null
+++ b/tools/perf/util/arm-spe.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#include <endian.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "cpumap.h"
+#include "color.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "session.h"
+#include "util.h"
+#include "thread.h"
+#include "debug.h"
+#include "auxtrace.h"
+#include "arm-spe.h"
+#include "arm-spe-pkt-decoder.h"
+
+struct arm_spe {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ u32 auxtrace_type;
+ struct perf_session *session;
+ struct machine *machine;
+ u32 pmu_type;
+};
+
+struct arm_spe_queue {
+ struct arm_spe *spe;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ bool on_heap;
+ bool done;
+ pid_t pid;
+ pid_t tid;
+ int cpu;
+};
+
+static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
+ unsigned char *buf, size_t len)
+{
+ struct arm_spe_pkt packet;
+ size_t pos = 0;
+ int ret, pkt_len, i;
+ char desc[ARM_SPE_PKT_DESC_MAX];
+ const char *color = PERF_COLOR_BLUE;
+
+ color_fprintf(stdout, color,
+ ". ... ARM SPE data: size %zu bytes\n",
+ len);
+
+ while (len) {
+ ret = arm_spe_get_packet(buf, len, &packet);
+ if (ret > 0)
+ pkt_len = ret;
+ else
+ pkt_len = 1;
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ for (i = 0; i < pkt_len; i++)
+ color_fprintf(stdout, color, " %02x", buf[i]);
+ for (; i < 16; i++)
+ color_fprintf(stdout, color, " ");
+ if (ret > 0) {
+ ret = arm_spe_pkt_desc(&packet, desc,
+ ARM_SPE_PKT_DESC_MAX);
+ if (ret > 0)
+ color_fprintf(stdout, color, " %s\n", desc);
+ } else {
+ color_fprintf(stdout, color, " Bad packet!\n");
+ }
+ pos += pkt_len;
+ buf += pkt_len;
+ len -= pkt_len;
+ }
+}
+
+static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+ arm_spe_dump(spe, buf, len);
+}
+
+static int arm_spe_process_event(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static int arm_spe_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+ auxtrace);
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int fd = perf_data__fd(session->data);
+ int err;
+
+ if (perf_data__is_pipe(session->data)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&spe->queues, session, event,
+ data_offset, &buffer);
+ if (err)
+ return err;
+
+ /* Dump here now we have copied a piped trace out of the pipe */
+ if (dump_trace) {
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ arm_spe_dump_event(spe, buffer->data,
+ buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
+ }
+
+ return 0;
+}
+
+static int arm_spe_flush(struct perf_session *session __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static void arm_spe_free_queue(void *priv)
+{
+ struct arm_spe_queue *speq = priv;
+
+ if (!speq)
+ return;
+ free(speq);
+}
+
+static void arm_spe_free_events(struct perf_session *session)
+{
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+ auxtrace);
+ struct auxtrace_queues *queues = &spe->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ arm_spe_free_queue(queues->queue_array[i].priv);
+ queues->queue_array[i].priv = NULL;
+ }
+ auxtrace_queues__free(queues);
+}
+
+static void arm_spe_free(struct perf_session *session)
+{
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+ auxtrace);
+
+ auxtrace_heap__free(&spe->heap);
+ arm_spe_free_events(session);
+ session->auxtrace = NULL;
+ free(spe);
+}
+
+static const char * const arm_spe_info_fmts[] = {
+ [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
+};
+
+static void arm_spe_print_info(u64 *arr)
+{
+ if (!dump_trace)
+ return;
+
+ fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
+}
+
+int arm_spe_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
+ size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE;
+ struct arm_spe *spe;
+ int err;
+
+ if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
+ min_sz)
+ return -EINVAL;
+
+ spe = zalloc(sizeof(struct arm_spe));
+ if (!spe)
+ return -ENOMEM;
+
+ err = auxtrace_queues__init(&spe->queues);
+ if (err)
+ goto err_free;
+
+ spe->session = session;
+ spe->machine = &session->machines.host; /* No kvm support */
+ spe->auxtrace_type = auxtrace_info->type;
+ spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
+
+ spe->auxtrace.process_event = arm_spe_process_event;
+ spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
+ spe->auxtrace.flush_events = arm_spe_flush;
+ spe->auxtrace.free_events = arm_spe_free_events;
+ spe->auxtrace.free = arm_spe_free;
+ session->auxtrace = &spe->auxtrace;
+
+ arm_spe_print_info(&auxtrace_info->priv[0]);
+
+ return 0;
+
+err_free:
+ free(spe);
+ return err;
+}
diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h
new file mode 100644
index 000000000..98d323578
--- /dev/null
+++ b/tools/perf/util/arm-spe.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Arm Statistical Profiling Extensions (SPE) support
+ * Copyright (c) 2017-2018, Arm Ltd.
+ */
+
+#ifndef INCLUDE__PERF_ARM_SPE_H__
+#define INCLUDE__PERF_ARM_SPE_H__
+
+#define ARM_SPE_PMU_NAME "arm_spe_"
+
+enum {
+ ARM_SPE_PMU_TYPE,
+ ARM_SPE_PER_CPU_MMAPS,
+ ARM_SPE_AUXTRACE_PRIV_MAX,
+};
+
+#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64))
+
+union perf_event;
+struct perf_session;
+struct perf_pmu;
+
+struct auxtrace_record *arm_spe_recording_init(int *err,
+ struct perf_pmu *arm_spe_pmu);
+
+int arm_spe_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+
+struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu);
+#endif
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
new file mode 100644
index 000000000..f7c4dcb9d
--- /dev/null
+++ b/tools/perf/util/auxtrace.c
@@ -0,0 +1,2166 @@
+/*
+ * auxtrace.c: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <stdbool.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include <linux/string.h>
+
+#include <sys/param.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <linux/list.h>
+
+#include "../perf.h"
+#include "util.h"
+#include "evlist.h"
+#include "dso.h"
+#include "map.h"
+#include "pmu.h"
+#include "evsel.h"
+#include "cpumap.h"
+#include "thread_map.h"
+#include "asm/bug.h"
+#include "auxtrace.h"
+
+#include <linux/hash.h>
+
+#include "event.h"
+#include "session.h"
+#include "debug.h"
+#include <subcmd/parse-options.h>
+
+#include "cs-etm.h"
+#include "intel-pt.h"
+#include "intel-bts.h"
+#include "arm-spe.h"
+#include "s390-cpumsf.h"
+
+#include "sane_ctype.h"
+#include "symbol/kallsyms.h"
+
+static bool auxtrace__dont_decode(struct perf_session *session)
+{
+ return !session->itrace_synth_opts ||
+ session->itrace_synth_opts->dont_decode;
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+ struct auxtrace_mmap_params *mp,
+ void *userpg, int fd)
+{
+ struct perf_event_mmap_page *pc = userpg;
+
+ WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n");
+
+ mm->userpg = userpg;
+ mm->mask = mp->mask;
+ mm->len = mp->len;
+ mm->prev = 0;
+ mm->idx = mp->idx;
+ mm->tid = mp->tid;
+ mm->cpu = mp->cpu;
+
+ if (!mp->len) {
+ mm->base = NULL;
+ return 0;
+ }
+
+#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+ pr_err("Cannot use AUX area tracing mmaps\n");
+ return -1;
+#endif
+
+ pc->aux_offset = mp->offset;
+ pc->aux_size = mp->len;
+
+ mm->base = mmap(NULL, mp->len, mp->prot, MAP_SHARED, fd, mp->offset);
+ if (mm->base == MAP_FAILED) {
+ pr_debug2("failed to mmap AUX area\n");
+ mm->base = NULL;
+ return -1;
+ }
+
+ return 0;
+}
+
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm)
+{
+ if (mm->base) {
+ munmap(mm->base, mm->len);
+ mm->base = NULL;
+ }
+}
+
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+ off_t auxtrace_offset,
+ unsigned int auxtrace_pages,
+ bool auxtrace_overwrite)
+{
+ if (auxtrace_pages) {
+ mp->offset = auxtrace_offset;
+ mp->len = auxtrace_pages * (size_t)page_size;
+ mp->mask = is_power_of_2(mp->len) ? mp->len - 1 : 0;
+ mp->prot = PROT_READ | (auxtrace_overwrite ? 0 : PROT_WRITE);
+ pr_debug2("AUX area mmap length %zu\n", mp->len);
+ } else {
+ mp->len = 0;
+ }
+}
+
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+ struct perf_evlist *evlist, int idx,
+ bool per_cpu)
+{
+ mp->idx = idx;
+
+ if (per_cpu) {
+ mp->cpu = evlist->cpus->map[idx];
+ if (evlist->threads)
+ mp->tid = thread_map__pid(evlist->threads, 0);
+ else
+ mp->tid = -1;
+ } else {
+ mp->cpu = -1;
+ mp->tid = thread_map__pid(evlist->threads, idx);
+ }
+}
+
+#define AUXTRACE_INIT_NR_QUEUES 32
+
+static struct auxtrace_queue *auxtrace_alloc_queue_array(unsigned int nr_queues)
+{
+ struct auxtrace_queue *queue_array;
+ unsigned int max_nr_queues, i;
+
+ max_nr_queues = UINT_MAX / sizeof(struct auxtrace_queue);
+ if (nr_queues > max_nr_queues)
+ return NULL;
+
+ queue_array = calloc(nr_queues, sizeof(struct auxtrace_queue));
+ if (!queue_array)
+ return NULL;
+
+ for (i = 0; i < nr_queues; i++) {
+ INIT_LIST_HEAD(&queue_array[i].head);
+ queue_array[i].priv = NULL;
+ }
+
+ return queue_array;
+}
+
+int auxtrace_queues__init(struct auxtrace_queues *queues)
+{
+ queues->nr_queues = AUXTRACE_INIT_NR_QUEUES;
+ queues->queue_array = auxtrace_alloc_queue_array(queues->nr_queues);
+ if (!queues->queue_array)
+ return -ENOMEM;
+ return 0;
+}
+
+static int auxtrace_queues__grow(struct auxtrace_queues *queues,
+ unsigned int new_nr_queues)
+{
+ unsigned int nr_queues = queues->nr_queues;
+ struct auxtrace_queue *queue_array;
+ unsigned int i;
+
+ if (!nr_queues)
+ nr_queues = AUXTRACE_INIT_NR_QUEUES;
+
+ while (nr_queues && nr_queues < new_nr_queues)
+ nr_queues <<= 1;
+
+ if (nr_queues < queues->nr_queues || nr_queues < new_nr_queues)
+ return -EINVAL;
+
+ queue_array = auxtrace_alloc_queue_array(nr_queues);
+ if (!queue_array)
+ return -ENOMEM;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ list_splice_tail(&queues->queue_array[i].head,
+ &queue_array[i].head);
+ queue_array[i].tid = queues->queue_array[i].tid;
+ queue_array[i].cpu = queues->queue_array[i].cpu;
+ queue_array[i].set = queues->queue_array[i].set;
+ queue_array[i].priv = queues->queue_array[i].priv;
+ }
+
+ queues->nr_queues = nr_queues;
+ queues->queue_array = queue_array;
+
+ return 0;
+}
+
+static void *auxtrace_copy_data(u64 size, struct perf_session *session)
+{
+ int fd = perf_data__fd(session->data);
+ void *p;
+ ssize_t ret;
+
+ if (size > SSIZE_MAX)
+ return NULL;
+
+ p = malloc(size);
+ if (!p)
+ return NULL;
+
+ ret = readn(fd, p, size);
+ if (ret != (ssize_t)size) {
+ free(p);
+ return NULL;
+ }
+
+ return p;
+}
+
+static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
+ unsigned int idx,
+ struct auxtrace_buffer *buffer)
+{
+ struct auxtrace_queue *queue;
+ int err;
+
+ if (idx >= queues->nr_queues) {
+ err = auxtrace_queues__grow(queues, idx + 1);
+ if (err)
+ return err;
+ }
+
+ queue = &queues->queue_array[idx];
+
+ if (!queue->set) {
+ queue->set = true;
+ queue->tid = buffer->tid;
+ queue->cpu = buffer->cpu;
+ }
+
+ buffer->buffer_nr = queues->next_buffer_nr++;
+
+ list_add_tail(&buffer->list, &queue->head);
+
+ queues->new_data = true;
+ queues->populated = true;
+
+ return 0;
+}
+
+/* Limit buffers to 32MiB on 32-bit */
+#define BUFFER_LIMIT_FOR_32_BIT (32 * 1024 * 1024)
+
+static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
+ unsigned int idx,
+ struct auxtrace_buffer *buffer)
+{
+ u64 sz = buffer->size;
+ bool consecutive = false;
+ struct auxtrace_buffer *b;
+ int err;
+
+ while (sz > BUFFER_LIMIT_FOR_32_BIT) {
+ b = memdup(buffer, sizeof(struct auxtrace_buffer));
+ if (!b)
+ return -ENOMEM;
+ b->size = BUFFER_LIMIT_FOR_32_BIT;
+ b->consecutive = consecutive;
+ err = auxtrace_queues__queue_buffer(queues, idx, b);
+ if (err) {
+ auxtrace_buffer__free(b);
+ return err;
+ }
+ buffer->data_offset += BUFFER_LIMIT_FOR_32_BIT;
+ sz -= BUFFER_LIMIT_FOR_32_BIT;
+ consecutive = true;
+ }
+
+ buffer->size = sz;
+ buffer->consecutive = consecutive;
+
+ return 0;
+}
+
+static bool filter_cpu(struct perf_session *session, int cpu)
+{
+ unsigned long *cpu_bitmap = session->itrace_synth_opts->cpu_bitmap;
+
+ return cpu_bitmap && cpu != -1 && !test_bit(cpu, cpu_bitmap);
+}
+
+static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ unsigned int idx,
+ struct auxtrace_buffer *buffer,
+ struct auxtrace_buffer **buffer_ptr)
+{
+ int err = -ENOMEM;
+
+ if (filter_cpu(session, buffer->cpu))
+ return 0;
+
+ buffer = memdup(buffer, sizeof(*buffer));
+ if (!buffer)
+ return -ENOMEM;
+
+ if (session->one_mmap) {
+ buffer->data = buffer->data_offset - session->one_mmap_offset +
+ session->one_mmap_addr;
+ } else if (perf_data__is_pipe(session->data)) {
+ buffer->data = auxtrace_copy_data(buffer->size, session);
+ if (!buffer->data)
+ goto out_free;
+ buffer->data_needs_freeing = true;
+ } else if (BITS_PER_LONG == 32 &&
+ buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
+ err = auxtrace_queues__split_buffer(queues, idx, buffer);
+ if (err)
+ goto out_free;
+ }
+
+ err = auxtrace_queues__queue_buffer(queues, idx, buffer);
+ if (err)
+ goto out_free;
+
+ /* FIXME: Doesn't work for split buffer */
+ if (buffer_ptr)
+ *buffer_ptr = buffer;
+
+ return 0;
+
+out_free:
+ auxtrace_buffer__free(buffer);
+ return err;
+}
+
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ union perf_event *event, off_t data_offset,
+ struct auxtrace_buffer **buffer_ptr)
+{
+ struct auxtrace_buffer buffer = {
+ .pid = -1,
+ .tid = event->auxtrace.tid,
+ .cpu = event->auxtrace.cpu,
+ .data_offset = data_offset,
+ .offset = event->auxtrace.offset,
+ .reference = event->auxtrace.reference,
+ .size = event->auxtrace.size,
+ };
+ unsigned int idx = event->auxtrace.idx;
+
+ return auxtrace_queues__add_buffer(queues, session, idx, &buffer,
+ buffer_ptr);
+}
+
+static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ off_t file_offset, size_t sz)
+{
+ union perf_event *event;
+ int err;
+ char buf[PERF_SAMPLE_MAX_SIZE];
+
+ err = perf_session__peek_event(session, file_offset, buf,
+ PERF_SAMPLE_MAX_SIZE, &event, NULL);
+ if (err)
+ return err;
+
+ if (event->header.type == PERF_RECORD_AUXTRACE) {
+ if (event->header.size < sizeof(struct auxtrace_event) ||
+ event->header.size != sz) {
+ err = -EINVAL;
+ goto out;
+ }
+ file_offset += event->header.size;
+ err = auxtrace_queues__add_event(queues, session, event,
+ file_offset, NULL);
+ }
+out:
+ return err;
+}
+
+void auxtrace_queues__free(struct auxtrace_queues *queues)
+{
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ while (!list_empty(&queues->queue_array[i].head)) {
+ struct auxtrace_buffer *buffer;
+
+ buffer = list_entry(queues->queue_array[i].head.next,
+ struct auxtrace_buffer, list);
+ list_del(&buffer->list);
+ auxtrace_buffer__free(buffer);
+ }
+ }
+
+ zfree(&queues->queue_array);
+ queues->nr_queues = 0;
+}
+
+static void auxtrace_heapify(struct auxtrace_heap_item *heap_array,
+ unsigned int pos, unsigned int queue_nr,
+ u64 ordinal)
+{
+ unsigned int parent;
+
+ while (pos) {
+ parent = (pos - 1) >> 1;
+ if (heap_array[parent].ordinal <= ordinal)
+ break;
+ heap_array[pos] = heap_array[parent];
+ pos = parent;
+ }
+ heap_array[pos].queue_nr = queue_nr;
+ heap_array[pos].ordinal = ordinal;
+}
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+ u64 ordinal)
+{
+ struct auxtrace_heap_item *heap_array;
+
+ if (queue_nr >= heap->heap_sz) {
+ unsigned int heap_sz = AUXTRACE_INIT_NR_QUEUES;
+
+ while (heap_sz <= queue_nr)
+ heap_sz <<= 1;
+ heap_array = realloc(heap->heap_array,
+ heap_sz * sizeof(struct auxtrace_heap_item));
+ if (!heap_array)
+ return -ENOMEM;
+ heap->heap_array = heap_array;
+ heap->heap_sz = heap_sz;
+ }
+
+ auxtrace_heapify(heap->heap_array, heap->heap_cnt++, queue_nr, ordinal);
+
+ return 0;
+}
+
+void auxtrace_heap__free(struct auxtrace_heap *heap)
+{
+ zfree(&heap->heap_array);
+ heap->heap_cnt = 0;
+ heap->heap_sz = 0;
+}
+
+void auxtrace_heap__pop(struct auxtrace_heap *heap)
+{
+ unsigned int pos, last, heap_cnt = heap->heap_cnt;
+ struct auxtrace_heap_item *heap_array;
+
+ if (!heap_cnt)
+ return;
+
+ heap->heap_cnt -= 1;
+
+ heap_array = heap->heap_array;
+
+ pos = 0;
+ while (1) {
+ unsigned int left, right;
+
+ left = (pos << 1) + 1;
+ if (left >= heap_cnt)
+ break;
+ right = left + 1;
+ if (right >= heap_cnt) {
+ heap_array[pos] = heap_array[left];
+ return;
+ }
+ if (heap_array[left].ordinal < heap_array[right].ordinal) {
+ heap_array[pos] = heap_array[left];
+ pos = left;
+ } else {
+ heap_array[pos] = heap_array[right];
+ pos = right;
+ }
+ }
+
+ last = heap_cnt - 1;
+ auxtrace_heapify(heap_array, pos, heap_array[last].queue_nr,
+ heap_array[last].ordinal);
+}
+
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+ struct perf_evlist *evlist)
+{
+ if (itr)
+ return itr->info_priv_size(itr, evlist);
+ return 0;
+}
+
+static int auxtrace_not_supported(void)
+{
+ pr_err("AUX area tracing is not supported on this architecture\n");
+ return -EINVAL;
+}
+
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *auxtrace_info,
+ size_t priv_size)
+{
+ if (itr)
+ return itr->info_fill(itr, session, auxtrace_info, priv_size);
+ return auxtrace_not_supported();
+}
+
+void auxtrace_record__free(struct auxtrace_record *itr)
+{
+ if (itr)
+ itr->free(itr);
+}
+
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr)
+{
+ if (itr && itr->snapshot_start)
+ return itr->snapshot_start(itr);
+ return 0;
+}
+
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr)
+{
+ if (itr && itr->snapshot_finish)
+ return itr->snapshot_finish(itr);
+ return 0;
+}
+
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm,
+ unsigned char *data, u64 *head, u64 *old)
+{
+ if (itr && itr->find_snapshot)
+ return itr->find_snapshot(itr, idx, mm, data, head, old);
+ return 0;
+}
+
+int auxtrace_record__options(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts)
+{
+ if (itr)
+ return itr->recording_options(itr, evlist, opts);
+ return 0;
+}
+
+u64 auxtrace_record__reference(struct auxtrace_record *itr)
+{
+ if (itr)
+ return itr->reference(itr);
+ return 0;
+}
+
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+ struct record_opts *opts, const char *str)
+{
+ if (!str)
+ return 0;
+
+ if (itr)
+ return itr->parse_snapshot_options(itr, opts, str);
+
+ pr_err("No AUX area tracing to snapshot\n");
+ return -EINVAL;
+}
+
+struct auxtrace_record *__weak
+auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, int *err)
+{
+ *err = 0;
+ return NULL;
+}
+
+static int auxtrace_index__alloc(struct list_head *head)
+{
+ struct auxtrace_index *auxtrace_index;
+
+ auxtrace_index = malloc(sizeof(struct auxtrace_index));
+ if (!auxtrace_index)
+ return -ENOMEM;
+
+ auxtrace_index->nr = 0;
+ INIT_LIST_HEAD(&auxtrace_index->list);
+
+ list_add_tail(&auxtrace_index->list, head);
+
+ return 0;
+}
+
+void auxtrace_index__free(struct list_head *head)
+{
+ struct auxtrace_index *auxtrace_index, *n;
+
+ list_for_each_entry_safe(auxtrace_index, n, head, list) {
+ list_del(&auxtrace_index->list);
+ free(auxtrace_index);
+ }
+}
+
+static struct auxtrace_index *auxtrace_index__last(struct list_head *head)
+{
+ struct auxtrace_index *auxtrace_index;
+ int err;
+
+ if (list_empty(head)) {
+ err = auxtrace_index__alloc(head);
+ if (err)
+ return NULL;
+ }
+
+ auxtrace_index = list_entry(head->prev, struct auxtrace_index, list);
+
+ if (auxtrace_index->nr >= PERF_AUXTRACE_INDEX_ENTRY_COUNT) {
+ err = auxtrace_index__alloc(head);
+ if (err)
+ return NULL;
+ auxtrace_index = list_entry(head->prev, struct auxtrace_index,
+ list);
+ }
+
+ return auxtrace_index;
+}
+
+int auxtrace_index__auxtrace_event(struct list_head *head,
+ union perf_event *event, off_t file_offset)
+{
+ struct auxtrace_index *auxtrace_index;
+ size_t nr;
+
+ auxtrace_index = auxtrace_index__last(head);
+ if (!auxtrace_index)
+ return -ENOMEM;
+
+ nr = auxtrace_index->nr;
+ auxtrace_index->entries[nr].file_offset = file_offset;
+ auxtrace_index->entries[nr].sz = event->header.size;
+ auxtrace_index->nr += 1;
+
+ return 0;
+}
+
+static int auxtrace_index__do_write(int fd,
+ struct auxtrace_index *auxtrace_index)
+{
+ struct auxtrace_index_entry ent;
+ size_t i;
+
+ for (i = 0; i < auxtrace_index->nr; i++) {
+ ent.file_offset = auxtrace_index->entries[i].file_offset;
+ ent.sz = auxtrace_index->entries[i].sz;
+ if (writen(fd, &ent, sizeof(ent)) != sizeof(ent))
+ return -errno;
+ }
+ return 0;
+}
+
+int auxtrace_index__write(int fd, struct list_head *head)
+{
+ struct auxtrace_index *auxtrace_index;
+ u64 total = 0;
+ int err;
+
+ list_for_each_entry(auxtrace_index, head, list)
+ total += auxtrace_index->nr;
+
+ if (writen(fd, &total, sizeof(total)) != sizeof(total))
+ return -errno;
+
+ list_for_each_entry(auxtrace_index, head, list) {
+ err = auxtrace_index__do_write(fd, auxtrace_index);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int auxtrace_index__process_entry(int fd, struct list_head *head,
+ bool needs_swap)
+{
+ struct auxtrace_index *auxtrace_index;
+ struct auxtrace_index_entry ent;
+ size_t nr;
+
+ if (readn(fd, &ent, sizeof(ent)) != sizeof(ent))
+ return -1;
+
+ auxtrace_index = auxtrace_index__last(head);
+ if (!auxtrace_index)
+ return -1;
+
+ nr = auxtrace_index->nr;
+ if (needs_swap) {
+ auxtrace_index->entries[nr].file_offset =
+ bswap_64(ent.file_offset);
+ auxtrace_index->entries[nr].sz = bswap_64(ent.sz);
+ } else {
+ auxtrace_index->entries[nr].file_offset = ent.file_offset;
+ auxtrace_index->entries[nr].sz = ent.sz;
+ }
+
+ auxtrace_index->nr = nr + 1;
+
+ return 0;
+}
+
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+ bool needs_swap)
+{
+ struct list_head *head = &session->auxtrace_index;
+ u64 nr;
+
+ if (readn(fd, &nr, sizeof(u64)) != sizeof(u64))
+ return -1;
+
+ if (needs_swap)
+ nr = bswap_64(nr);
+
+ if (sizeof(u64) + nr * sizeof(struct auxtrace_index_entry) > size)
+ return -1;
+
+ while (nr--) {
+ int err;
+
+ err = auxtrace_index__process_entry(fd, head, needs_swap);
+ if (err)
+ return -1;
+ }
+
+ return 0;
+}
+
+static int auxtrace_queues__process_index_entry(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ struct auxtrace_index_entry *ent)
+{
+ return auxtrace_queues__add_indexed_event(queues, session,
+ ent->file_offset, ent->sz);
+}
+
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+ struct perf_session *session)
+{
+ struct auxtrace_index *auxtrace_index;
+ struct auxtrace_index_entry *ent;
+ size_t i;
+ int err;
+
+ if (auxtrace__dont_decode(session))
+ return 0;
+
+ list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
+ for (i = 0; i < auxtrace_index->nr; i++) {
+ ent = &auxtrace_index->entries[i];
+ err = auxtrace_queues__process_index_entry(queues,
+ session,
+ ent);
+ if (err)
+ return err;
+ }
+ }
+ return 0;
+}
+
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+ struct auxtrace_buffer *buffer)
+{
+ if (buffer) {
+ if (list_is_last(&buffer->list, &queue->head))
+ return NULL;
+ return list_entry(buffer->list.next, struct auxtrace_buffer,
+ list);
+ } else {
+ if (list_empty(&queue->head))
+ return NULL;
+ return list_entry(queue->head.next, struct auxtrace_buffer,
+ list);
+ }
+}
+
+void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd)
+{
+ size_t adj = buffer->data_offset & (page_size - 1);
+ size_t size = buffer->size + adj;
+ off_t file_offset = buffer->data_offset - adj;
+ void *addr;
+
+ if (buffer->data)
+ return buffer->data;
+
+ addr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, file_offset);
+ if (addr == MAP_FAILED)
+ return NULL;
+
+ buffer->mmap_addr = addr;
+ buffer->mmap_size = size;
+
+ buffer->data = addr + adj;
+
+ return buffer->data;
+}
+
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer)
+{
+ if (!buffer->data || !buffer->mmap_addr)
+ return;
+ munmap(buffer->mmap_addr, buffer->mmap_size);
+ buffer->mmap_addr = NULL;
+ buffer->mmap_size = 0;
+ buffer->data = NULL;
+ buffer->use_data = NULL;
+}
+
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer)
+{
+ auxtrace_buffer__put_data(buffer);
+ if (buffer->data_needs_freeing) {
+ buffer->data_needs_freeing = false;
+ zfree(&buffer->data);
+ buffer->use_data = NULL;
+ buffer->size = 0;
+ }
+}
+
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer)
+{
+ auxtrace_buffer__drop_data(buffer);
+ free(buffer);
+}
+
+void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
+ int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+ const char *msg)
+{
+ size_t size;
+
+ memset(auxtrace_error, 0, sizeof(struct auxtrace_error_event));
+
+ auxtrace_error->header.type = PERF_RECORD_AUXTRACE_ERROR;
+ auxtrace_error->type = type;
+ auxtrace_error->code = code;
+ auxtrace_error->cpu = cpu;
+ auxtrace_error->pid = pid;
+ auxtrace_error->tid = tid;
+ auxtrace_error->ip = ip;
+ strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG);
+
+ size = (void *)auxtrace_error->msg - (void *)auxtrace_error +
+ strlen(auxtrace_error->msg) + 1;
+ auxtrace_error->header.size = PERF_ALIGN(size, sizeof(u64));
+}
+
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
+ struct perf_tool *tool,
+ struct perf_session *session,
+ perf_event__handler_t process)
+{
+ union perf_event *ev;
+ size_t priv_size;
+ int err;
+
+ pr_debug2("Synthesizing auxtrace information\n");
+ priv_size = auxtrace_record__info_priv_size(itr, session->evlist);
+ ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size);
+ if (!ev)
+ return -ENOMEM;
+
+ ev->auxtrace_info.header.type = PERF_RECORD_AUXTRACE_INFO;
+ ev->auxtrace_info.header.size = sizeof(struct auxtrace_info_event) +
+ priv_size;
+ err = auxtrace_record__info_fill(itr, session, &ev->auxtrace_info,
+ priv_size);
+ if (err)
+ goto out_free;
+
+ err = process(tool, ev, NULL, NULL);
+out_free:
+ free(ev);
+ return err;
+}
+
+int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ enum auxtrace_type type = event->auxtrace_info.type;
+
+ if (dump_trace)
+ fprintf(stdout, " type: %u\n", type);
+
+ switch (type) {
+ case PERF_AUXTRACE_INTEL_PT:
+ return intel_pt_process_auxtrace_info(event, session);
+ case PERF_AUXTRACE_INTEL_BTS:
+ return intel_bts_process_auxtrace_info(event, session);
+ case PERF_AUXTRACE_ARM_SPE:
+ return arm_spe_process_auxtrace_info(event, session);
+ case PERF_AUXTRACE_CS_ETM:
+ return cs_etm__process_auxtrace_info(event, session);
+ case PERF_AUXTRACE_S390_CPUMSF:
+ return s390_cpumsf_process_auxtrace_info(event, session);
+ case PERF_AUXTRACE_UNKNOWN:
+ default:
+ return -EINVAL;
+ }
+}
+
+s64 perf_event__process_auxtrace(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ s64 err;
+
+ if (dump_trace)
+ fprintf(stdout, " size: %#"PRIx64" offset: %#"PRIx64" ref: %#"PRIx64" idx: %u tid: %d cpu: %d\n",
+ event->auxtrace.size, event->auxtrace.offset,
+ event->auxtrace.reference, event->auxtrace.idx,
+ event->auxtrace.tid, event->auxtrace.cpu);
+
+ if (auxtrace__dont_decode(session))
+ return event->auxtrace.size;
+
+ if (!session->auxtrace || event->header.type != PERF_RECORD_AUXTRACE)
+ return -EINVAL;
+
+ err = session->auxtrace->process_auxtrace_event(session, event, tool);
+ if (err < 0)
+ return err;
+
+ return event->auxtrace.size;
+}
+
+#define PERF_ITRACE_DEFAULT_PERIOD_TYPE PERF_ITRACE_PERIOD_NANOSECS
+#define PERF_ITRACE_DEFAULT_PERIOD 100000
+#define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ 16
+#define PERF_ITRACE_MAX_CALLCHAIN_SZ 1024
+#define PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ 64
+#define PERF_ITRACE_MAX_LAST_BRANCH_SZ 1024
+
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
+{
+ synth_opts->instructions = true;
+ synth_opts->branches = true;
+ synth_opts->transactions = true;
+ synth_opts->ptwrites = true;
+ synth_opts->pwr_events = true;
+ synth_opts->errors = true;
+ synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+ synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+ synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+ synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+ synth_opts->initial_skip = 0;
+}
+
+/*
+ * Please check tools/perf/Documentation/perf-script.txt for information
+ * about the options parsed here, which is introduced after this cset,
+ * when support in 'perf script' for these options is introduced.
+ */
+int itrace_parse_synth_opts(const struct option *opt, const char *str,
+ int unset)
+{
+ struct itrace_synth_opts *synth_opts = opt->value;
+ const char *p;
+ char *endptr;
+ bool period_type_set = false;
+ bool period_set = false;
+
+ synth_opts->set = true;
+
+ if (unset) {
+ synth_opts->dont_decode = true;
+ return 0;
+ }
+
+ if (!str) {
+ itrace_synth_opts__set_default(synth_opts);
+ return 0;
+ }
+
+ for (p = str; *p;) {
+ switch (*p++) {
+ case 'i':
+ synth_opts->instructions = true;
+ while (*p == ' ' || *p == ',')
+ p += 1;
+ if (isdigit(*p)) {
+ synth_opts->period = strtoull(p, &endptr, 10);
+ period_set = true;
+ p = endptr;
+ while (*p == ' ' || *p == ',')
+ p += 1;
+ switch (*p++) {
+ case 'i':
+ synth_opts->period_type =
+ PERF_ITRACE_PERIOD_INSTRUCTIONS;
+ period_type_set = true;
+ break;
+ case 't':
+ synth_opts->period_type =
+ PERF_ITRACE_PERIOD_TICKS;
+ period_type_set = true;
+ break;
+ case 'm':
+ synth_opts->period *= 1000;
+ /* Fall through */
+ case 'u':
+ synth_opts->period *= 1000;
+ /* Fall through */
+ case 'n':
+ if (*p++ != 's')
+ goto out_err;
+ synth_opts->period_type =
+ PERF_ITRACE_PERIOD_NANOSECS;
+ period_type_set = true;
+ break;
+ case '\0':
+ goto out;
+ default:
+ goto out_err;
+ }
+ }
+ break;
+ case 'b':
+ synth_opts->branches = true;
+ break;
+ case 'x':
+ synth_opts->transactions = true;
+ break;
+ case 'w':
+ synth_opts->ptwrites = true;
+ break;
+ case 'p':
+ synth_opts->pwr_events = true;
+ break;
+ case 'e':
+ synth_opts->errors = true;
+ break;
+ case 'd':
+ synth_opts->log = true;
+ break;
+ case 'c':
+ synth_opts->branches = true;
+ synth_opts->calls = true;
+ break;
+ case 'r':
+ synth_opts->branches = true;
+ synth_opts->returns = true;
+ break;
+ case 'g':
+ synth_opts->callchain = true;
+ synth_opts->callchain_sz =
+ PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+ while (*p == ' ' || *p == ',')
+ p += 1;
+ if (isdigit(*p)) {
+ unsigned int val;
+
+ val = strtoul(p, &endptr, 10);
+ p = endptr;
+ if (!val || val > PERF_ITRACE_MAX_CALLCHAIN_SZ)
+ goto out_err;
+ synth_opts->callchain_sz = val;
+ }
+ break;
+ case 'l':
+ synth_opts->last_branch = true;
+ synth_opts->last_branch_sz =
+ PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+ while (*p == ' ' || *p == ',')
+ p += 1;
+ if (isdigit(*p)) {
+ unsigned int val;
+
+ val = strtoul(p, &endptr, 10);
+ p = endptr;
+ if (!val ||
+ val > PERF_ITRACE_MAX_LAST_BRANCH_SZ)
+ goto out_err;
+ synth_opts->last_branch_sz = val;
+ }
+ break;
+ case 's':
+ synth_opts->initial_skip = strtoul(p, &endptr, 10);
+ if (p == endptr)
+ goto out_err;
+ p = endptr;
+ break;
+ case ' ':
+ case ',':
+ break;
+ default:
+ goto out_err;
+ }
+ }
+out:
+ if (synth_opts->instructions) {
+ if (!period_type_set)
+ synth_opts->period_type =
+ PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+ if (!period_set)
+ synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+ }
+
+ return 0;
+
+out_err:
+ pr_err("Bad Instruction Tracing options '%s'\n", str);
+ return -EINVAL;
+}
+
+static const char * const auxtrace_error_type_name[] = {
+ [PERF_AUXTRACE_ERROR_ITRACE] = "instruction trace",
+};
+
+static const char *auxtrace_error_name(int type)
+{
+ const char *error_type_name = NULL;
+
+ if (type < PERF_AUXTRACE_ERROR_MAX)
+ error_type_name = auxtrace_error_type_name[type];
+ if (!error_type_name)
+ error_type_name = "unknown AUX";
+ return error_type_name;
+}
+
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp)
+{
+ struct auxtrace_error_event *e = &event->auxtrace_error;
+ int ret;
+
+ ret = fprintf(fp, " %s error type %u",
+ auxtrace_error_name(e->type), e->type);
+ ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n",
+ e->cpu, e->pid, e->tid, e->ip, e->code, e->msg);
+ return ret;
+}
+
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+ union perf_event *event)
+{
+ struct auxtrace_error_event *e = &event->auxtrace_error;
+
+ if (e->type < PERF_AUXTRACE_ERROR_MAX)
+ session->evlist->stats.nr_auxtrace_errors[e->type] += 1;
+}
+
+void events_stats__auxtrace_error_warn(const struct events_stats *stats)
+{
+ int i;
+
+ for (i = 0; i < PERF_AUXTRACE_ERROR_MAX; i++) {
+ if (!stats->nr_auxtrace_errors[i])
+ continue;
+ ui__warning("%u %s errors\n",
+ stats->nr_auxtrace_errors[i],
+ auxtrace_error_name(i));
+ }
+}
+
+int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ if (auxtrace__dont_decode(session))
+ return 0;
+
+ perf_event__fprintf_auxtrace_error(event, stdout);
+ return 0;
+}
+
+static int __auxtrace_mmap__read(struct auxtrace_mmap *mm,
+ struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn,
+ bool snapshot, size_t snapshot_size)
+{
+ u64 head, old = mm->prev, offset, ref;
+ unsigned char *data = mm->base;
+ size_t size, head_off, old_off, len1, len2, padding;
+ union perf_event ev;
+ void *data1, *data2;
+
+ if (snapshot) {
+ head = auxtrace_mmap__read_snapshot_head(mm);
+ if (auxtrace_record__find_snapshot(itr, mm->idx, mm, data,
+ &head, &old))
+ return -1;
+ } else {
+ head = auxtrace_mmap__read_head(mm);
+ }
+
+ if (old == head)
+ return 0;
+
+ pr_debug3("auxtrace idx %d old %#"PRIx64" head %#"PRIx64" diff %#"PRIx64"\n",
+ mm->idx, old, head, head - old);
+
+ if (mm->mask) {
+ head_off = head & mm->mask;
+ old_off = old & mm->mask;
+ } else {
+ head_off = head % mm->len;
+ old_off = old % mm->len;
+ }
+
+ if (head_off > old_off)
+ size = head_off - old_off;
+ else
+ size = mm->len - (old_off - head_off);
+
+ if (snapshot && size > snapshot_size)
+ size = snapshot_size;
+
+ ref = auxtrace_record__reference(itr);
+
+ if (head > old || size <= head || mm->mask) {
+ offset = head - size;
+ } else {
+ /*
+ * When the buffer size is not a power of 2, 'head' wraps at the
+ * highest multiple of the buffer size, so we have to subtract
+ * the remainder here.
+ */
+ u64 rem = (0ULL - mm->len) % mm->len;
+
+ offset = head - size - rem;
+ }
+
+ if (size > head_off) {
+ len1 = size - head_off;
+ data1 = &data[mm->len - len1];
+ len2 = head_off;
+ data2 = &data[0];
+ } else {
+ len1 = size;
+ data1 = &data[head_off - len1];
+ len2 = 0;
+ data2 = NULL;
+ }
+
+ if (itr->alignment) {
+ unsigned int unwanted = len1 % itr->alignment;
+
+ len1 -= unwanted;
+ size -= unwanted;
+ }
+
+ /* padding must be written by fn() e.g. record__process_auxtrace() */
+ padding = size & (PERF_AUXTRACE_RECORD_ALIGNMENT - 1);
+ if (padding)
+ padding = PERF_AUXTRACE_RECORD_ALIGNMENT - padding;
+
+ memset(&ev, 0, sizeof(ev));
+ ev.auxtrace.header.type = PERF_RECORD_AUXTRACE;
+ ev.auxtrace.header.size = sizeof(ev.auxtrace);
+ ev.auxtrace.size = size + padding;
+ ev.auxtrace.offset = offset;
+ ev.auxtrace.reference = ref;
+ ev.auxtrace.idx = mm->idx;
+ ev.auxtrace.tid = mm->tid;
+ ev.auxtrace.cpu = mm->cpu;
+
+ if (fn(tool, &ev, data1, len1, data2, len2))
+ return -1;
+
+ mm->prev = head;
+
+ if (!snapshot) {
+ auxtrace_mmap__write_tail(mm, head);
+ if (itr->read_finish) {
+ int err;
+
+ err = itr->read_finish(itr, mm->idx);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ return 1;
+}
+
+int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn)
+{
+ return __auxtrace_mmap__read(mm, itr, tool, fn, false, 0);
+}
+
+int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+ struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn,
+ size_t snapshot_size)
+{
+ return __auxtrace_mmap__read(mm, itr, tool, fn, true, snapshot_size);
+}
+
+/**
+ * struct auxtrace_cache - hash table to implement a cache
+ * @hashtable: the hashtable
+ * @sz: hashtable size (number of hlists)
+ * @entry_size: size of an entry
+ * @limit: limit the number of entries to this maximum, when reached the cache
+ * is dropped and caching begins again with an empty cache
+ * @cnt: current number of entries
+ * @bits: hashtable size (@sz = 2^@bits)
+ */
+struct auxtrace_cache {
+ struct hlist_head *hashtable;
+ size_t sz;
+ size_t entry_size;
+ size_t limit;
+ size_t cnt;
+ unsigned int bits;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+ unsigned int limit_percent)
+{
+ struct auxtrace_cache *c;
+ struct hlist_head *ht;
+ size_t sz, i;
+
+ c = zalloc(sizeof(struct auxtrace_cache));
+ if (!c)
+ return NULL;
+
+ sz = 1UL << bits;
+
+ ht = calloc(sz, sizeof(struct hlist_head));
+ if (!ht)
+ goto out_free;
+
+ for (i = 0; i < sz; i++)
+ INIT_HLIST_HEAD(&ht[i]);
+
+ c->hashtable = ht;
+ c->sz = sz;
+ c->entry_size = entry_size;
+ c->limit = (c->sz * limit_percent) / 100;
+ c->bits = bits;
+
+ return c;
+
+out_free:
+ free(c);
+ return NULL;
+}
+
+static void auxtrace_cache__drop(struct auxtrace_cache *c)
+{
+ struct auxtrace_cache_entry *entry;
+ struct hlist_node *tmp;
+ size_t i;
+
+ if (!c)
+ return;
+
+ for (i = 0; i < c->sz; i++) {
+ hlist_for_each_entry_safe(entry, tmp, &c->hashtable[i], hash) {
+ hlist_del(&entry->hash);
+ auxtrace_cache__free_entry(c, entry);
+ }
+ }
+
+ c->cnt = 0;
+}
+
+void auxtrace_cache__free(struct auxtrace_cache *c)
+{
+ if (!c)
+ return;
+
+ auxtrace_cache__drop(c);
+ free(c->hashtable);
+ free(c);
+}
+
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c)
+{
+ return malloc(c->entry_size);
+}
+
+void auxtrace_cache__free_entry(struct auxtrace_cache *c __maybe_unused,
+ void *entry)
+{
+ free(entry);
+}
+
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+ struct auxtrace_cache_entry *entry)
+{
+ if (c->limit && ++c->cnt > c->limit)
+ auxtrace_cache__drop(c);
+
+ entry->key = key;
+ hlist_add_head(&entry->hash, &c->hashtable[hash_32(key, c->bits)]);
+
+ return 0;
+}
+
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key)
+{
+ struct auxtrace_cache_entry *entry;
+ struct hlist_head *hlist;
+
+ if (!c)
+ return NULL;
+
+ hlist = &c->hashtable[hash_32(key, c->bits)];
+ hlist_for_each_entry(entry, hlist, hash) {
+ if (entry->key == key)
+ return entry;
+ }
+
+ return NULL;
+}
+
+static void addr_filter__free_str(struct addr_filter *filt)
+{
+ free(filt->str);
+ filt->action = NULL;
+ filt->sym_from = NULL;
+ filt->sym_to = NULL;
+ filt->filename = NULL;
+ filt->str = NULL;
+}
+
+static struct addr_filter *addr_filter__new(void)
+{
+ struct addr_filter *filt = zalloc(sizeof(*filt));
+
+ if (filt)
+ INIT_LIST_HEAD(&filt->list);
+
+ return filt;
+}
+
+static void addr_filter__free(struct addr_filter *filt)
+{
+ if (filt)
+ addr_filter__free_str(filt);
+ free(filt);
+}
+
+static void addr_filters__add(struct addr_filters *filts,
+ struct addr_filter *filt)
+{
+ list_add_tail(&filt->list, &filts->head);
+ filts->cnt += 1;
+}
+
+static void addr_filters__del(struct addr_filters *filts,
+ struct addr_filter *filt)
+{
+ list_del_init(&filt->list);
+ filts->cnt -= 1;
+}
+
+void addr_filters__init(struct addr_filters *filts)
+{
+ INIT_LIST_HEAD(&filts->head);
+ filts->cnt = 0;
+}
+
+void addr_filters__exit(struct addr_filters *filts)
+{
+ struct addr_filter *filt, *n;
+
+ list_for_each_entry_safe(filt, n, &filts->head, list) {
+ addr_filters__del(filts, filt);
+ addr_filter__free(filt);
+ }
+}
+
+static int parse_num_or_str(char **inp, u64 *num, const char **str,
+ const char *str_delim)
+{
+ *inp += strspn(*inp, " ");
+
+ if (isdigit(**inp)) {
+ char *endptr;
+
+ if (!num)
+ return -EINVAL;
+ errno = 0;
+ *num = strtoull(*inp, &endptr, 0);
+ if (errno)
+ return -errno;
+ if (endptr == *inp)
+ return -EINVAL;
+ *inp = endptr;
+ } else {
+ size_t n;
+
+ if (!str)
+ return -EINVAL;
+ *inp += strspn(*inp, " ");
+ *str = *inp;
+ n = strcspn(*inp, str_delim);
+ if (!n)
+ return -EINVAL;
+ *inp += n;
+ if (**inp) {
+ **inp = '\0';
+ *inp += 1;
+ }
+ }
+ return 0;
+}
+
+static int parse_action(struct addr_filter *filt)
+{
+ if (!strcmp(filt->action, "filter")) {
+ filt->start = true;
+ filt->range = true;
+ } else if (!strcmp(filt->action, "start")) {
+ filt->start = true;
+ } else if (!strcmp(filt->action, "stop")) {
+ filt->start = false;
+ } else if (!strcmp(filt->action, "tracestop")) {
+ filt->start = false;
+ filt->range = true;
+ filt->action += 5; /* Change 'tracestop' to 'stop' */
+ } else {
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int parse_sym_idx(char **inp, int *idx)
+{
+ *idx = -1;
+
+ *inp += strspn(*inp, " ");
+
+ if (**inp != '#')
+ return 0;
+
+ *inp += 1;
+
+ if (**inp == 'g' || **inp == 'G') {
+ *inp += 1;
+ *idx = 0;
+ } else {
+ unsigned long num;
+ char *endptr;
+
+ errno = 0;
+ num = strtoul(*inp, &endptr, 0);
+ if (errno)
+ return -errno;
+ if (endptr == *inp || num > INT_MAX)
+ return -EINVAL;
+ *inp = endptr;
+ *idx = num;
+ }
+
+ return 0;
+}
+
+static int parse_addr_size(char **inp, u64 *num, const char **str, int *idx)
+{
+ int err = parse_num_or_str(inp, num, str, " ");
+
+ if (!err && *str)
+ err = parse_sym_idx(inp, idx);
+
+ return err;
+}
+
+static int parse_one_filter(struct addr_filter *filt, const char **filter_inp)
+{
+ char *fstr;
+ int err;
+
+ filt->str = fstr = strdup(*filter_inp);
+ if (!fstr)
+ return -ENOMEM;
+
+ err = parse_num_or_str(&fstr, NULL, &filt->action, " ");
+ if (err)
+ goto out_err;
+
+ err = parse_action(filt);
+ if (err)
+ goto out_err;
+
+ err = parse_addr_size(&fstr, &filt->addr, &filt->sym_from,
+ &filt->sym_from_idx);
+ if (err)
+ goto out_err;
+
+ fstr += strspn(fstr, " ");
+
+ if (*fstr == '/') {
+ fstr += 1;
+ err = parse_addr_size(&fstr, &filt->size, &filt->sym_to,
+ &filt->sym_to_idx);
+ if (err)
+ goto out_err;
+ filt->range = true;
+ }
+
+ fstr += strspn(fstr, " ");
+
+ if (*fstr == '@') {
+ fstr += 1;
+ err = parse_num_or_str(&fstr, NULL, &filt->filename, " ,");
+ if (err)
+ goto out_err;
+ }
+
+ fstr += strspn(fstr, " ,");
+
+ *filter_inp += fstr - filt->str;
+
+ return 0;
+
+out_err:
+ addr_filter__free_str(filt);
+
+ return err;
+}
+
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+ const char *filter)
+{
+ struct addr_filter *filt;
+ const char *fstr = filter;
+ int err;
+
+ while (*fstr) {
+ filt = addr_filter__new();
+ err = parse_one_filter(filt, &fstr);
+ if (err) {
+ addr_filter__free(filt);
+ addr_filters__exit(filts);
+ return err;
+ }
+ addr_filters__add(filts, filt);
+ }
+
+ return 0;
+}
+
+struct sym_args {
+ const char *name;
+ u64 start;
+ u64 size;
+ int idx;
+ int cnt;
+ bool started;
+ bool global;
+ bool selected;
+ bool duplicate;
+ bool near;
+};
+
+static bool kern_sym_match(struct sym_args *args, const char *name, char type)
+{
+ /* A function with the same name, and global or the n'th found or any */
+ return kallsyms__is_function(type) &&
+ !strcmp(name, args->name) &&
+ ((args->global && isupper(type)) ||
+ (args->selected && ++(args->cnt) == args->idx) ||
+ (!args->global && !args->selected));
+}
+
+static int find_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+ struct sym_args *args = arg;
+
+ if (args->started) {
+ if (!args->size)
+ args->size = start - args->start;
+ if (args->selected) {
+ if (args->size)
+ return 1;
+ } else if (kern_sym_match(args, name, type)) {
+ args->duplicate = true;
+ return 1;
+ }
+ } else if (kern_sym_match(args, name, type)) {
+ args->started = true;
+ args->start = start;
+ }
+
+ return 0;
+}
+
+static int print_kern_sym_cb(void *arg, const char *name, char type, u64 start)
+{
+ struct sym_args *args = arg;
+
+ if (kern_sym_match(args, name, type)) {
+ pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+ ++args->cnt, start, type, name);
+ args->near = true;
+ } else if (args->near) {
+ args->near = false;
+ pr_err("\t\twhich is near\t\t%s\n", name);
+ }
+
+ return 0;
+}
+
+static int sym_not_found_error(const char *sym_name, int idx)
+{
+ if (idx > 0) {
+ pr_err("N'th occurrence (N=%d) of symbol '%s' not found.\n",
+ idx, sym_name);
+ } else if (!idx) {
+ pr_err("Global symbol '%s' not found.\n", sym_name);
+ } else {
+ pr_err("Symbol '%s' not found.\n", sym_name);
+ }
+ pr_err("Note that symbols must be functions.\n");
+
+ return -EINVAL;
+}
+
+static int find_kern_sym(const char *sym_name, u64 *start, u64 *size, int idx)
+{
+ struct sym_args args = {
+ .name = sym_name,
+ .idx = idx,
+ .global = !idx,
+ .selected = idx > 0,
+ };
+ int err;
+
+ *start = 0;
+ *size = 0;
+
+ err = kallsyms__parse("/proc/kallsyms", &args, find_kern_sym_cb);
+ if (err < 0) {
+ pr_err("Failed to parse /proc/kallsyms\n");
+ return err;
+ }
+
+ if (args.duplicate) {
+ pr_err("Multiple kernel symbols with name '%s'\n", sym_name);
+ args.cnt = 0;
+ kallsyms__parse("/proc/kallsyms", &args, print_kern_sym_cb);
+ pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+ sym_name);
+ pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+ return -EINVAL;
+ }
+
+ if (!args.started) {
+ pr_err("Kernel symbol lookup: ");
+ return sym_not_found_error(sym_name, idx);
+ }
+
+ *start = args.start;
+ *size = args.size;
+
+ return 0;
+}
+
+static int find_entire_kern_cb(void *arg, const char *name __maybe_unused,
+ char type, u64 start)
+{
+ struct sym_args *args = arg;
+
+ if (!kallsyms__is_function(type))
+ return 0;
+
+ if (!args->started) {
+ args->started = true;
+ args->start = start;
+ }
+ /* Don't know exactly where the kernel ends, so we add a page */
+ args->size = round_up(start, page_size) + page_size - args->start;
+
+ return 0;
+}
+
+static int addr_filter__entire_kernel(struct addr_filter *filt)
+{
+ struct sym_args args = { .started = false };
+ int err;
+
+ err = kallsyms__parse("/proc/kallsyms", &args, find_entire_kern_cb);
+ if (err < 0 || !args.started) {
+ pr_err("Failed to parse /proc/kallsyms\n");
+ return err;
+ }
+
+ filt->addr = args.start;
+ filt->size = args.size;
+
+ return 0;
+}
+
+static int check_end_after_start(struct addr_filter *filt, u64 start, u64 size)
+{
+ if (start + size >= filt->addr)
+ return 0;
+
+ if (filt->sym_from) {
+ pr_err("Symbol '%s' (0x%"PRIx64") comes before '%s' (0x%"PRIx64")\n",
+ filt->sym_to, start, filt->sym_from, filt->addr);
+ } else {
+ pr_err("Symbol '%s' (0x%"PRIx64") comes before address 0x%"PRIx64")\n",
+ filt->sym_to, start, filt->addr);
+ }
+
+ return -EINVAL;
+}
+
+static int addr_filter__resolve_kernel_syms(struct addr_filter *filt)
+{
+ bool no_size = false;
+ u64 start, size;
+ int err;
+
+ if (symbol_conf.kptr_restrict) {
+ pr_err("Kernel addresses are restricted. Unable to resolve kernel symbols.\n");
+ return -EINVAL;
+ }
+
+ if (filt->sym_from && !strcmp(filt->sym_from, "*"))
+ return addr_filter__entire_kernel(filt);
+
+ if (filt->sym_from) {
+ err = find_kern_sym(filt->sym_from, &start, &size,
+ filt->sym_from_idx);
+ if (err)
+ return err;
+ filt->addr = start;
+ if (filt->range && !filt->size && !filt->sym_to) {
+ filt->size = size;
+ no_size = !size;
+ }
+ }
+
+ if (filt->sym_to) {
+ err = find_kern_sym(filt->sym_to, &start, &size,
+ filt->sym_to_idx);
+ if (err)
+ return err;
+
+ err = check_end_after_start(filt, start, size);
+ if (err)
+ return err;
+ filt->size = start + size - filt->addr;
+ no_size = !size;
+ }
+
+ /* The very last symbol in kallsyms does not imply a particular size */
+ if (no_size) {
+ pr_err("Cannot determine size of symbol '%s'\n",
+ filt->sym_to ? filt->sym_to : filt->sym_from);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct dso *load_dso(const char *name)
+{
+ struct map *map;
+ struct dso *dso;
+
+ map = dso__new_map(name);
+ if (!map)
+ return NULL;
+
+ map__load(map);
+
+ dso = dso__get(map->dso);
+
+ map__put(map);
+
+ return dso;
+}
+
+static bool dso_sym_match(struct symbol *sym, const char *name, int *cnt,
+ int idx)
+{
+ /* Same name, and global or the n'th found or any */
+ return !arch__compare_symbol_names(name, sym->name) &&
+ ((!idx && sym->binding == STB_GLOBAL) ||
+ (idx > 0 && ++*cnt == idx) ||
+ idx < 0);
+}
+
+static void print_duplicate_syms(struct dso *dso, const char *sym_name)
+{
+ struct symbol *sym;
+ bool near = false;
+ int cnt = 0;
+
+ pr_err("Multiple symbols with name '%s'\n", sym_name);
+
+ sym = dso__first_symbol(dso);
+ while (sym) {
+ if (dso_sym_match(sym, sym_name, &cnt, -1)) {
+ pr_err("#%d\t0x%"PRIx64"\t%c\t%s\n",
+ ++cnt, sym->start,
+ sym->binding == STB_GLOBAL ? 'g' :
+ sym->binding == STB_LOCAL ? 'l' : 'w',
+ sym->name);
+ near = true;
+ } else if (near) {
+ near = false;
+ pr_err("\t\twhich is near\t\t%s\n", sym->name);
+ }
+ sym = dso__next_symbol(sym);
+ }
+
+ pr_err("Disambiguate symbol name by inserting #n after the name e.g. %s #2\n",
+ sym_name);
+ pr_err("Or select a global symbol by inserting #0 or #g or #G\n");
+}
+
+static int find_dso_sym(struct dso *dso, const char *sym_name, u64 *start,
+ u64 *size, int idx)
+{
+ struct symbol *sym;
+ int cnt = 0;
+
+ *start = 0;
+ *size = 0;
+
+ sym = dso__first_symbol(dso);
+ while (sym) {
+ if (*start) {
+ if (!*size)
+ *size = sym->start - *start;
+ if (idx > 0) {
+ if (*size)
+ return 1;
+ } else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+ print_duplicate_syms(dso, sym_name);
+ return -EINVAL;
+ }
+ } else if (dso_sym_match(sym, sym_name, &cnt, idx)) {
+ *start = sym->start;
+ *size = sym->end - sym->start;
+ }
+ sym = dso__next_symbol(sym);
+ }
+
+ if (!*start)
+ return sym_not_found_error(sym_name, idx);
+
+ return 0;
+}
+
+static int addr_filter__entire_dso(struct addr_filter *filt, struct dso *dso)
+{
+ struct symbol *first_sym = dso__first_symbol(dso);
+ struct symbol *last_sym = dso__last_symbol(dso);
+
+ if (!first_sym || !last_sym) {
+ pr_err("Failed to determine filter for %s\nNo symbols found.\n",
+ filt->filename);
+ return -EINVAL;
+ }
+
+ filt->addr = first_sym->start;
+ filt->size = last_sym->end - first_sym->start;
+
+ return 0;
+}
+
+static int addr_filter__resolve_syms(struct addr_filter *filt)
+{
+ u64 start, size;
+ struct dso *dso;
+ int err = 0;
+
+ if (!filt->sym_from && !filt->sym_to)
+ return 0;
+
+ if (!filt->filename)
+ return addr_filter__resolve_kernel_syms(filt);
+
+ dso = load_dso(filt->filename);
+ if (!dso) {
+ pr_err("Failed to load symbols from: %s\n", filt->filename);
+ return -EINVAL;
+ }
+
+ if (filt->sym_from && !strcmp(filt->sym_from, "*")) {
+ err = addr_filter__entire_dso(filt, dso);
+ goto put_dso;
+ }
+
+ if (filt->sym_from) {
+ err = find_dso_sym(dso, filt->sym_from, &start, &size,
+ filt->sym_from_idx);
+ if (err)
+ goto put_dso;
+ filt->addr = start;
+ if (filt->range && !filt->size && !filt->sym_to)
+ filt->size = size;
+ }
+
+ if (filt->sym_to) {
+ err = find_dso_sym(dso, filt->sym_to, &start, &size,
+ filt->sym_to_idx);
+ if (err)
+ goto put_dso;
+
+ err = check_end_after_start(filt, start, size);
+ if (err)
+ return err;
+
+ filt->size = start + size - filt->addr;
+ }
+
+put_dso:
+ dso__put(dso);
+
+ return err;
+}
+
+static char *addr_filter__to_str(struct addr_filter *filt)
+{
+ char filename_buf[PATH_MAX];
+ const char *at = "";
+ const char *fn = "";
+ char *filter;
+ int err;
+
+ if (filt->filename) {
+ at = "@";
+ fn = realpath(filt->filename, filename_buf);
+ if (!fn)
+ return NULL;
+ }
+
+ if (filt->range) {
+ err = asprintf(&filter, "%s 0x%"PRIx64"/0x%"PRIx64"%s%s",
+ filt->action, filt->addr, filt->size, at, fn);
+ } else {
+ err = asprintf(&filter, "%s 0x%"PRIx64"%s%s",
+ filt->action, filt->addr, at, fn);
+ }
+
+ return err < 0 ? NULL : filter;
+}
+
+static int parse_addr_filter(struct perf_evsel *evsel, const char *filter,
+ int max_nr)
+{
+ struct addr_filters filts;
+ struct addr_filter *filt;
+ int err;
+
+ addr_filters__init(&filts);
+
+ err = addr_filters__parse_bare_filter(&filts, filter);
+ if (err)
+ goto out_exit;
+
+ if (filts.cnt > max_nr) {
+ pr_err("Error: number of address filters (%d) exceeds maximum (%d)\n",
+ filts.cnt, max_nr);
+ err = -EINVAL;
+ goto out_exit;
+ }
+
+ list_for_each_entry(filt, &filts.head, list) {
+ char *new_filter;
+
+ err = addr_filter__resolve_syms(filt);
+ if (err)
+ goto out_exit;
+
+ new_filter = addr_filter__to_str(filt);
+ if (!new_filter) {
+ err = -ENOMEM;
+ goto out_exit;
+ }
+
+ if (perf_evsel__append_addr_filter(evsel, new_filter)) {
+ err = -ENOMEM;
+ goto out_exit;
+ }
+ }
+
+out_exit:
+ addr_filters__exit(&filts);
+
+ if (err) {
+ pr_err("Failed to parse address filter: '%s'\n", filter);
+ pr_err("Filter format is: filter|start|stop|tracestop <start symbol or address> [/ <end symbol or size>] [@<file name>]\n");
+ pr_err("Where multiple filters are separated by space or comma.\n");
+ }
+
+ return err;
+}
+
+static struct perf_pmu *perf_evsel__find_pmu(struct perf_evsel *evsel)
+{
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ if (pmu->type == evsel->attr.type)
+ break;
+ }
+
+ return pmu;
+}
+
+static int perf_evsel__nr_addr_filter(struct perf_evsel *evsel)
+{
+ struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
+ int nr_addr_filters = 0;
+
+ if (!pmu)
+ return 0;
+
+ perf_pmu__scan_file(pmu, "nr_addr_filters", "%d", &nr_addr_filters);
+
+ return nr_addr_filters;
+}
+
+int auxtrace_parse_filters(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ char *filter;
+ int err, max_nr;
+
+ evlist__for_each_entry(evlist, evsel) {
+ filter = evsel->filter;
+ max_nr = perf_evsel__nr_addr_filter(evsel);
+ if (!filter || !max_nr)
+ continue;
+ evsel->filter = NULL;
+ err = parse_addr_filter(evsel, filter, max_nr);
+ free(filter);
+ if (err)
+ return err;
+ pr_debug("Address filter: %s\n", evsel->filter);
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
new file mode 100644
index 000000000..136c5f95f
--- /dev/null
+++ b/tools/perf/util/auxtrace.h
@@ -0,0 +1,725 @@
+/*
+ * auxtrace.h: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_AUXTRACE_H
+#define __PERF_AUXTRACE_H
+
+#include <sys/types.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/list.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include "../perf.h"
+#include "event.h"
+#include "session.h"
+#include "debug.h"
+
+union perf_event;
+struct perf_session;
+struct perf_evlist;
+struct perf_tool;
+struct option;
+struct record_opts;
+struct auxtrace_info_event;
+struct events_stats;
+
+/* Auxtrace records must have the same alignment as perf event records */
+#define PERF_AUXTRACE_RECORD_ALIGNMENT 8
+
+enum auxtrace_type {
+ PERF_AUXTRACE_UNKNOWN,
+ PERF_AUXTRACE_INTEL_PT,
+ PERF_AUXTRACE_INTEL_BTS,
+ PERF_AUXTRACE_CS_ETM,
+ PERF_AUXTRACE_ARM_SPE,
+ PERF_AUXTRACE_S390_CPUMSF,
+};
+
+enum itrace_period_type {
+ PERF_ITRACE_PERIOD_INSTRUCTIONS,
+ PERF_ITRACE_PERIOD_TICKS,
+ PERF_ITRACE_PERIOD_NANOSECS,
+};
+
+/**
+ * struct itrace_synth_opts - AUX area tracing synthesis options.
+ * @set: indicates whether or not options have been set
+ * @inject: indicates the event (not just the sample) must be fully synthesized
+ * because 'perf inject' will write it out
+ * @instructions: whether to synthesize 'instructions' events
+ * @branches: whether to synthesize 'branches' events
+ * @transactions: whether to synthesize events for transactions
+ * @ptwrites: whether to synthesize events for ptwrites
+ * @pwr_events: whether to synthesize power events
+ * @errors: whether to synthesize decoder error events
+ * @dont_decode: whether to skip decoding entirely
+ * @log: write a decoding log
+ * @calls: limit branch samples to calls (can be combined with @returns)
+ * @returns: limit branch samples to returns (can be combined with @calls)
+ * @callchain: add callchain to 'instructions' events
+ * @thread_stack: feed branches to the thread_stack
+ * @last_branch: add branch context to 'instruction' events
+ * @callchain_sz: maximum callchain size
+ * @last_branch_sz: branch context size
+ * @period: 'instructions' events period
+ * @period_type: 'instructions' events period type
+ * @initial_skip: skip N events at the beginning.
+ * @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
+ */
+struct itrace_synth_opts {
+ bool set;
+ bool inject;
+ bool instructions;
+ bool branches;
+ bool transactions;
+ bool ptwrites;
+ bool pwr_events;
+ bool errors;
+ bool dont_decode;
+ bool log;
+ bool calls;
+ bool returns;
+ bool callchain;
+ bool thread_stack;
+ bool last_branch;
+ unsigned int callchain_sz;
+ unsigned int last_branch_sz;
+ unsigned long long period;
+ enum itrace_period_type period_type;
+ unsigned long initial_skip;
+ unsigned long *cpu_bitmap;
+};
+
+/**
+ * struct auxtrace_index_entry - indexes a AUX area tracing event within a
+ * perf.data file.
+ * @file_offset: offset within the perf.data file
+ * @sz: size of the event
+ */
+struct auxtrace_index_entry {
+ u64 file_offset;
+ u64 sz;
+};
+
+#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
+
+/**
+ * struct auxtrace_index - index of AUX area tracing events within a perf.data
+ * file.
+ * @list: linking a number of arrays of entries
+ * @nr: number of entries
+ * @entries: array of entries
+ */
+struct auxtrace_index {
+ struct list_head list;
+ size_t nr;
+ struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
+};
+
+/**
+ * struct auxtrace - session callbacks to allow AUX area data decoding.
+ * @process_event: lets the decoder see all session events
+ * @process_auxtrace_event: process a PERF_RECORD_AUXTRACE event
+ * @flush_events: process any remaining data
+ * @free_events: free resources associated with event processing
+ * @free: free resources associated with the session
+ */
+struct auxtrace {
+ int (*process_event)(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool);
+ int (*process_auxtrace_event)(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool);
+ int (*flush_events)(struct perf_session *session,
+ struct perf_tool *tool);
+ void (*free_events)(struct perf_session *session);
+ void (*free)(struct perf_session *session);
+};
+
+/**
+ * struct auxtrace_buffer - a buffer containing AUX area tracing data.
+ * @list: buffers are queued in a list held by struct auxtrace_queue
+ * @size: size of the buffer in bytes
+ * @pid: in per-thread mode, the pid this buffer is associated with
+ * @tid: in per-thread mode, the tid this buffer is associated with
+ * @cpu: in per-cpu mode, the cpu this buffer is associated with
+ * @data: actual buffer data (can be null if the data has not been loaded)
+ * @data_offset: file offset at which the buffer can be read
+ * @mmap_addr: mmap address at which the buffer can be read
+ * @mmap_size: size of the mmap at @mmap_addr
+ * @data_needs_freeing: @data was malloc'd so free it when it is no longer
+ * needed
+ * @consecutive: the original data was split up and this buffer is consecutive
+ * to the previous buffer
+ * @offset: offset as determined by aux_head / aux_tail members of struct
+ * perf_event_mmap_page
+ * @reference: an implementation-specific reference determined when the data is
+ * recorded
+ * @buffer_nr: used to number each buffer
+ * @use_size: implementation actually only uses this number of bytes
+ * @use_data: implementation actually only uses data starting at this address
+ */
+struct auxtrace_buffer {
+ struct list_head list;
+ size_t size;
+ pid_t pid;
+ pid_t tid;
+ int cpu;
+ void *data;
+ off_t data_offset;
+ void *mmap_addr;
+ size_t mmap_size;
+ bool data_needs_freeing;
+ bool consecutive;
+ u64 offset;
+ u64 reference;
+ u64 buffer_nr;
+ size_t use_size;
+ void *use_data;
+};
+
+/**
+ * struct auxtrace_queue - a queue of AUX area tracing data buffers.
+ * @head: head of buffer list
+ * @tid: in per-thread mode, the tid this queue is associated with
+ * @cpu: in per-cpu mode, the cpu this queue is associated with
+ * @set: %true once this queue has been dedicated to a specific thread or cpu
+ * @priv: implementation-specific data
+ */
+struct auxtrace_queue {
+ struct list_head head;
+ pid_t tid;
+ int cpu;
+ bool set;
+ void *priv;
+};
+
+/**
+ * struct auxtrace_queues - an array of AUX area tracing queues.
+ * @queue_array: array of queues
+ * @nr_queues: number of queues
+ * @new_data: set whenever new data is queued
+ * @populated: queues have been fully populated using the auxtrace_index
+ * @next_buffer_nr: used to number each buffer
+ */
+struct auxtrace_queues {
+ struct auxtrace_queue *queue_array;
+ unsigned int nr_queues;
+ bool new_data;
+ bool populated;
+ u64 next_buffer_nr;
+};
+
+/**
+ * struct auxtrace_heap_item - element of struct auxtrace_heap.
+ * @queue_nr: queue number
+ * @ordinal: value used for sorting (lowest ordinal is top of the heap) expected
+ * to be a timestamp
+ */
+struct auxtrace_heap_item {
+ unsigned int queue_nr;
+ u64 ordinal;
+};
+
+/**
+ * struct auxtrace_heap - a heap suitable for sorting AUX area tracing queues.
+ * @heap_array: the heap
+ * @heap_cnt: the number of elements in the heap
+ * @heap_sz: maximum number of elements (grows as needed)
+ */
+struct auxtrace_heap {
+ struct auxtrace_heap_item *heap_array;
+ unsigned int heap_cnt;
+ unsigned int heap_sz;
+};
+
+/**
+ * struct auxtrace_mmap - records an mmap of the auxtrace buffer.
+ * @base: address of mapped area
+ * @userpg: pointer to buffer's perf_event_mmap_page
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @len: size of mapped area
+ * @prev: previous aux_head
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ * mmap) otherwise %0
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap {
+ void *base;
+ void *userpg;
+ size_t mask;
+ size_t len;
+ u64 prev;
+ int idx;
+ pid_t tid;
+ int cpu;
+};
+
+/**
+ * struct auxtrace_mmap_params - parameters to set up struct auxtrace_mmap.
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @offset: file offset of mapped area
+ * @len: size of mapped area
+ * @prot: mmap memory protection
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ * mmap) otherwise %0
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap_params {
+ size_t mask;
+ off_t offset;
+ size_t len;
+ int prot;
+ int idx;
+ pid_t tid;
+ int cpu;
+};
+
+/**
+ * struct auxtrace_record - callbacks for recording AUX area data.
+ * @recording_options: validate and process recording options
+ * @info_priv_size: return the size of the private data in auxtrace_info_event
+ * @info_fill: fill-in the private data in auxtrace_info_event
+ * @free: free this auxtrace record structure
+ * @snapshot_start: starting a snapshot
+ * @snapshot_finish: finishing a snapshot
+ * @find_snapshot: find data to snapshot within auxtrace mmap
+ * @parse_snapshot_options: parse snapshot options
+ * @reference: provide a 64-bit reference number for auxtrace_event
+ * @read_finish: called after reading from an auxtrace mmap
+ * @alignment: alignment (if any) for AUX area data
+ */
+struct auxtrace_record {
+ int (*recording_options)(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts);
+ size_t (*info_priv_size)(struct auxtrace_record *itr,
+ struct perf_evlist *evlist);
+ int (*info_fill)(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *auxtrace_info,
+ size_t priv_size);
+ void (*free)(struct auxtrace_record *itr);
+ int (*snapshot_start)(struct auxtrace_record *itr);
+ int (*snapshot_finish)(struct auxtrace_record *itr);
+ int (*find_snapshot)(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm, unsigned char *data,
+ u64 *head, u64 *old);
+ int (*parse_snapshot_options)(struct auxtrace_record *itr,
+ struct record_opts *opts,
+ const char *str);
+ u64 (*reference)(struct auxtrace_record *itr);
+ int (*read_finish)(struct auxtrace_record *itr, int idx);
+ unsigned int alignment;
+};
+
+/**
+ * struct addr_filter - address filter.
+ * @list: list node
+ * @range: true if it is a range filter
+ * @start: true if action is 'filter' or 'start'
+ * @action: 'filter', 'start' or 'stop' ('tracestop' is accepted but converted
+ * to 'stop')
+ * @sym_from: symbol name for the filter address
+ * @sym_to: symbol name that determines the filter size
+ * @sym_from_idx: selects n'th from symbols with the same name (0 means global
+ * and less than 0 means symbol must be unique)
+ * @sym_to_idx: same as @sym_from_idx but for @sym_to
+ * @addr: filter address
+ * @size: filter region size (for range filters)
+ * @filename: DSO file name or NULL for the kernel
+ * @str: allocated string that contains the other string members
+ */
+struct addr_filter {
+ struct list_head list;
+ bool range;
+ bool start;
+ const char *action;
+ const char *sym_from;
+ const char *sym_to;
+ int sym_from_idx;
+ int sym_to_idx;
+ u64 addr;
+ u64 size;
+ const char *filename;
+ char *str;
+};
+
+/**
+ * struct addr_filters - list of address filters.
+ * @head: list of address filters
+ * @cnt: number of address filters
+ */
+struct addr_filters {
+ struct list_head head;
+ int cnt;
+};
+
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+/*
+ * In snapshot mode the mmapped page is read-only which makes using
+ * __sync_val_compare_and_swap() problematic. However, snapshot mode expects
+ * the buffer is not updated while the snapshot is made (e.g. Intel PT disables
+ * the event) so there is not a race anyway.
+ */
+static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm)
+{
+ struct perf_event_mmap_page *pc = mm->userpg;
+ u64 head = READ_ONCE(pc->aux_head);
+
+ /* Ensure all reads are done after we read the head */
+ rmb();
+ return head;
+}
+
+static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
+{
+ struct perf_event_mmap_page *pc = mm->userpg;
+#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+ u64 head = READ_ONCE(pc->aux_head);
+#else
+ u64 head = __sync_val_compare_and_swap(&pc->aux_head, 0, 0);
+#endif
+
+ /* Ensure all reads are done after we read the head */
+ rmb();
+ return head;
+}
+
+static inline void auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
+{
+ struct perf_event_mmap_page *pc = mm->userpg;
+#if BITS_PER_LONG != 64 && defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+ u64 old_tail;
+#endif
+
+ /* Ensure all reads are done before we write the tail out */
+ mb();
+#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+ pc->aux_tail = tail;
+#else
+ do {
+ old_tail = __sync_val_compare_and_swap(&pc->aux_tail, 0, 0);
+ } while (!__sync_bool_compare_and_swap(&pc->aux_tail, old_tail, tail));
+#endif
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+ struct auxtrace_mmap_params *mp,
+ void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+ off_t auxtrace_offset,
+ unsigned int auxtrace_pages,
+ bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+ struct perf_evlist *evlist, int idx,
+ bool per_cpu);
+
+typedef int (*process_auxtrace_t)(struct perf_tool *tool,
+ union perf_event *event, void *data1,
+ size_t len1, void *data2, size_t len2);
+
+int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn);
+
+int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+ struct auxtrace_record *itr,
+ struct perf_tool *tool, process_auxtrace_t fn,
+ size_t snapshot_size);
+
+int auxtrace_queues__init(struct auxtrace_queues *queues);
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+ struct perf_session *session,
+ union perf_event *event, off_t data_offset,
+ struct auxtrace_buffer **buffer_ptr);
+void auxtrace_queues__free(struct auxtrace_queues *queues);
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+ struct perf_session *session);
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+ struct auxtrace_buffer *buffer);
+void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd);
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer);
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+ u64 ordinal);
+void auxtrace_heap__pop(struct auxtrace_heap *heap);
+void auxtrace_heap__free(struct auxtrace_heap *heap);
+
+struct auxtrace_cache_entry {
+ struct hlist_node hash;
+ u32 key;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+ unsigned int limit_percent);
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache);
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c);
+void auxtrace_cache__free_entry(struct auxtrace_cache *c, void *entry);
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+ struct auxtrace_cache_entry *entry);
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key);
+
+struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
+ int *err);
+
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+ struct record_opts *opts,
+ const char *str);
+int auxtrace_record__options(struct auxtrace_record *itr,
+ struct perf_evlist *evlist,
+ struct record_opts *opts);
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr,
+ struct perf_evlist *evlist);
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+ struct perf_session *session,
+ struct auxtrace_info_event *auxtrace_info,
+ size_t priv_size);
+void auxtrace_record__free(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr);
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+ struct auxtrace_mmap *mm,
+ unsigned char *data, u64 *head, u64 *old);
+u64 auxtrace_record__reference(struct auxtrace_record *itr);
+
+int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event,
+ off_t file_offset);
+int auxtrace_index__write(int fd, struct list_head *head);
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+ bool needs_swap);
+void auxtrace_index__free(struct list_head *head);
+
+void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
+ int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+ const char *msg);
+
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
+ struct perf_tool *tool,
+ struct perf_session *session,
+ perf_event__handler_t process);
+int perf_event__process_auxtrace_info(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
+s64 perf_event__process_auxtrace(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
+int perf_event__process_auxtrace_error(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
+int itrace_parse_synth_opts(const struct option *opt, const char *str,
+ int unset);
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
+
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+ union perf_event *event);
+void events_stats__auxtrace_error_warn(const struct events_stats *stats);
+
+void addr_filters__init(struct addr_filters *filts);
+void addr_filters__exit(struct addr_filters *filts);
+int addr_filters__parse_bare_filter(struct addr_filters *filts,
+ const char *filter);
+int auxtrace_parse_filters(struct perf_evlist *evlist);
+
+static inline int auxtrace__process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ if (!session->auxtrace)
+ return 0;
+
+ return session->auxtrace->process_event(session, event, sample, tool);
+}
+
+static inline int auxtrace__flush_events(struct perf_session *session,
+ struct perf_tool *tool)
+{
+ if (!session->auxtrace)
+ return 0;
+
+ return session->auxtrace->flush_events(session, tool);
+}
+
+static inline void auxtrace__free_events(struct perf_session *session)
+{
+ if (!session->auxtrace)
+ return;
+
+ return session->auxtrace->free_events(session);
+}
+
+static inline void auxtrace__free(struct perf_session *session)
+{
+ if (!session->auxtrace)
+ return;
+
+ return session->auxtrace->free(session);
+}
+
+#else
+
+static inline struct auxtrace_record *
+auxtrace_record__init(struct perf_evlist *evlist __maybe_unused,
+ int *err)
+{
+ *err = 0;
+ return NULL;
+}
+
+static inline
+void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused)
+{
+}
+
+static inline int
+perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
+ struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
+ perf_event__handler_t process __maybe_unused)
+{
+ return -EINVAL;
+}
+
+static inline
+int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused,
+ struct record_opts *opts __maybe_unused)
+{
+ return 0;
+}
+
+#define perf_event__process_auxtrace_info 0
+#define perf_event__process_auxtrace 0
+#define perf_event__process_auxtrace_error 0
+
+static inline
+void perf_session__auxtrace_error_inc(struct perf_session *session
+ __maybe_unused,
+ union perf_event *event
+ __maybe_unused)
+{
+}
+
+static inline
+void events_stats__auxtrace_error_warn(const struct events_stats *stats
+ __maybe_unused)
+{
+}
+
+static inline
+int itrace_parse_synth_opts(const struct option *opt __maybe_unused,
+ const char *str __maybe_unused,
+ int unset __maybe_unused)
+{
+ pr_err("AUX area tracing not supported\n");
+ return -EINVAL;
+}
+
+static inline
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
+ struct record_opts *opts __maybe_unused,
+ const char *str)
+{
+ if (!str)
+ return 0;
+ pr_err("AUX area tracing not supported\n");
+ return -EINVAL;
+}
+
+static inline
+int auxtrace__process_event(struct perf_session *session __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+int auxtrace__flush_events(struct perf_session *session __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static inline
+void auxtrace__free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache __maybe_unused)
+{
+}
+
+static inline
+void auxtrace__free(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_index__write(int fd __maybe_unused,
+ struct list_head *head __maybe_unused)
+{
+ return -EINVAL;
+}
+
+static inline
+int auxtrace_index__process(int fd __maybe_unused,
+ u64 size __maybe_unused,
+ struct perf_session *session __maybe_unused,
+ bool needs_swap __maybe_unused)
+{
+ return -EINVAL;
+}
+
+static inline
+void auxtrace_index__free(struct list_head *head __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_parse_filters(struct perf_evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+ struct auxtrace_mmap_params *mp,
+ void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+ off_t auxtrace_offset,
+ unsigned int auxtrace_pages,
+ bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+ struct perf_evlist *evlist, int idx,
+ bool per_cpu);
+
+#endif
+
+#endif
diff --git a/tools/perf/util/block-range.c b/tools/perf/util/block-range.c
new file mode 100644
index 000000000..f1451c987
--- /dev/null
+++ b/tools/perf/util/block-range.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "block-range.h"
+#include "annotate.h"
+
+struct {
+ struct rb_root root;
+ u64 blocks;
+} block_ranges;
+
+static void block_range__debug(void)
+{
+ /*
+ * XXX still paranoid for now; see if we can make this depend on
+ * DEBUG=1 builds.
+ */
+#if 1
+ struct rb_node *rb;
+ u64 old = 0; /* NULL isn't executable */
+
+ for (rb = rb_first(&block_ranges.root); rb; rb = rb_next(rb)) {
+ struct block_range *entry = rb_entry(rb, struct block_range, node);
+
+ assert(old < entry->start);
+ assert(entry->start <= entry->end); /* single instruction block; jump to a jump */
+
+ old = entry->end;
+ }
+#endif
+}
+
+struct block_range *block_range__find(u64 addr)
+{
+ struct rb_node **p = &block_ranges.root.rb_node;
+ struct rb_node *parent = NULL;
+ struct block_range *entry;
+
+ while (*p != NULL) {
+ parent = *p;
+ entry = rb_entry(parent, struct block_range, node);
+
+ if (addr < entry->start)
+ p = &parent->rb_left;
+ else if (addr > entry->end)
+ p = &parent->rb_right;
+ else
+ return entry;
+ }
+
+ return NULL;
+}
+
+static inline void rb_link_left_of_node(struct rb_node *left, struct rb_node *node)
+{
+ struct rb_node **p = &node->rb_left;
+ while (*p) {
+ node = *p;
+ p = &node->rb_right;
+ }
+ rb_link_node(left, node, p);
+}
+
+static inline void rb_link_right_of_node(struct rb_node *right, struct rb_node *node)
+{
+ struct rb_node **p = &node->rb_right;
+ while (*p) {
+ node = *p;
+ p = &node->rb_left;
+ }
+ rb_link_node(right, node, p);
+}
+
+/**
+ * block_range__create
+ * @start: branch target starting this basic block
+ * @end: branch ending this basic block
+ *
+ * Create all the required block ranges to precisely span the given range.
+ */
+struct block_range_iter block_range__create(u64 start, u64 end)
+{
+ struct rb_node **p = &block_ranges.root.rb_node;
+ struct rb_node *n, *parent = NULL;
+ struct block_range *next, *entry = NULL;
+ struct block_range_iter iter = { NULL, NULL };
+
+ while (*p != NULL) {
+ parent = *p;
+ entry = rb_entry(parent, struct block_range, node);
+
+ if (start < entry->start)
+ p = &parent->rb_left;
+ else if (start > entry->end)
+ p = &parent->rb_right;
+ else
+ break;
+ }
+
+ /*
+ * Didn't find anything.. there's a hole at @start, however @end might
+ * be inside/behind the next range.
+ */
+ if (!*p) {
+ if (!entry) /* tree empty */
+ goto do_whole;
+
+ /*
+ * If the last node is before, advance one to find the next.
+ */
+ n = parent;
+ if (entry->end < start) {
+ n = rb_next(n);
+ if (!n)
+ goto do_whole;
+ }
+ next = rb_entry(n, struct block_range, node);
+
+ if (next->start <= end) { /* add head: [start...][n->start...] */
+ struct block_range *head = malloc(sizeof(struct block_range));
+ if (!head)
+ return iter;
+
+ *head = (struct block_range){
+ .start = start,
+ .end = next->start - 1,
+ .is_target = 1,
+ .is_branch = 0,
+ };
+
+ rb_link_left_of_node(&head->node, &next->node);
+ rb_insert_color(&head->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.start = head;
+ goto do_tail;
+ }
+
+do_whole:
+ /*
+ * The whole [start..end] range is non-overlapping.
+ */
+ entry = malloc(sizeof(struct block_range));
+ if (!entry)
+ return iter;
+
+ *entry = (struct block_range){
+ .start = start,
+ .end = end,
+ .is_target = 1,
+ .is_branch = 1,
+ };
+
+ rb_link_node(&entry->node, parent, p);
+ rb_insert_color(&entry->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.start = entry;
+ iter.end = entry;
+ goto done;
+ }
+
+ /*
+ * We found a range that overlapped with ours, split if needed.
+ */
+ if (entry->start < start) { /* split: [e->start...][start...] */
+ struct block_range *head = malloc(sizeof(struct block_range));
+ if (!head)
+ return iter;
+
+ *head = (struct block_range){
+ .start = entry->start,
+ .end = start - 1,
+ .is_target = entry->is_target,
+ .is_branch = 0,
+
+ .coverage = entry->coverage,
+ .entry = entry->entry,
+ };
+
+ entry->start = start;
+ entry->is_target = 1;
+ entry->entry = 0;
+
+ rb_link_left_of_node(&head->node, &entry->node);
+ rb_insert_color(&head->node, &block_ranges.root);
+ block_range__debug();
+
+ } else if (entry->start == start)
+ entry->is_target = 1;
+
+ iter.start = entry;
+
+do_tail:
+ /*
+ * At this point we've got: @iter.start = [@start...] but @end can still be
+ * inside or beyond it.
+ */
+ entry = iter.start;
+ for (;;) {
+ /*
+ * If @end is inside @entry, split.
+ */
+ if (end < entry->end) { /* split: [...end][...e->end] */
+ struct block_range *tail = malloc(sizeof(struct block_range));
+ if (!tail)
+ return iter;
+
+ *tail = (struct block_range){
+ .start = end + 1,
+ .end = entry->end,
+ .is_target = 0,
+ .is_branch = entry->is_branch,
+
+ .coverage = entry->coverage,
+ .taken = entry->taken,
+ .pred = entry->pred,
+ };
+
+ entry->end = end;
+ entry->is_branch = 1;
+ entry->taken = 0;
+ entry->pred = 0;
+
+ rb_link_right_of_node(&tail->node, &entry->node);
+ rb_insert_color(&tail->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.end = entry;
+ goto done;
+ }
+
+ /*
+ * If @end matches @entry, done
+ */
+ if (end == entry->end) {
+ entry->is_branch = 1;
+ iter.end = entry;
+ goto done;
+ }
+
+ next = block_range__next(entry);
+ if (!next)
+ goto add_tail;
+
+ /*
+ * If @end is in beyond @entry but not inside @next, add tail.
+ */
+ if (end < next->start) { /* add tail: [...e->end][...end] */
+ struct block_range *tail;
+add_tail:
+ tail = malloc(sizeof(struct block_range));
+ if (!tail)
+ return iter;
+
+ *tail = (struct block_range){
+ .start = entry->end + 1,
+ .end = end,
+ .is_target = 0,
+ .is_branch = 1,
+ };
+
+ rb_link_right_of_node(&tail->node, &entry->node);
+ rb_insert_color(&tail->node, &block_ranges.root);
+ block_range__debug();
+
+ iter.end = tail;
+ goto done;
+ }
+
+ /*
+ * If there is a hole between @entry and @next, fill it.
+ */
+ if (entry->end + 1 != next->start) {
+ struct block_range *hole = malloc(sizeof(struct block_range));
+ if (!hole)
+ return iter;
+
+ *hole = (struct block_range){
+ .start = entry->end + 1,
+ .end = next->start - 1,
+ .is_target = 0,
+ .is_branch = 0,
+ };
+
+ rb_link_left_of_node(&hole->node, &next->node);
+ rb_insert_color(&hole->node, &block_ranges.root);
+ block_range__debug();
+ }
+
+ entry = next;
+ }
+
+done:
+ assert(iter.start->start == start && iter.start->is_target);
+ assert(iter.end->end == end && iter.end->is_branch);
+
+ block_ranges.blocks++;
+
+ return iter;
+}
+
+
+/*
+ * Compute coverage as:
+ *
+ * br->coverage / br->sym->max_coverage
+ *
+ * This ensures each symbol has a 100% spot, to reflect that each symbol has a
+ * most covered section.
+ *
+ * Returns [0-1] for coverage and -1 if we had no data what so ever or the
+ * symbol does not exist.
+ */
+double block_range__coverage(struct block_range *br)
+{
+ struct symbol *sym;
+
+ if (!br) {
+ if (block_ranges.blocks)
+ return 0;
+
+ return -1;
+ }
+
+ sym = br->sym;
+ if (!sym)
+ return -1;
+
+ return (double)br->coverage / symbol__annotation(sym)->max_coverage;
+}
diff --git a/tools/perf/util/block-range.h b/tools/perf/util/block-range.h
new file mode 100644
index 000000000..a5ba719d6
--- /dev/null
+++ b/tools/perf/util/block-range.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_BLOCK_RANGE_H
+#define __PERF_BLOCK_RANGE_H
+
+#include "symbol.h"
+
+/*
+ * struct block_range - non-overlapping parts of basic blocks
+ * @node: treenode
+ * @start: inclusive start of range
+ * @end: inclusive end of range
+ * @is_target: @start is a jump target
+ * @is_branch: @end is a branch instruction
+ * @coverage: number of blocks that cover this range
+ * @taken: number of times the branch is taken (requires @is_branch)
+ * @pred: number of times the taken branch was predicted
+ */
+struct block_range {
+ struct rb_node node;
+
+ struct symbol *sym;
+
+ u64 start;
+ u64 end;
+
+ int is_target, is_branch;
+
+ u64 coverage;
+ u64 entry;
+ u64 taken;
+ u64 pred;
+};
+
+static inline struct block_range *block_range__next(struct block_range *br)
+{
+ struct rb_node *n = rb_next(&br->node);
+ if (!n)
+ return NULL;
+ return rb_entry(n, struct block_range, node);
+}
+
+struct block_range_iter {
+ struct block_range *start;
+ struct block_range *end;
+};
+
+static inline struct block_range *block_range_iter(struct block_range_iter *iter)
+{
+ return iter->start;
+}
+
+static inline bool block_range_iter__next(struct block_range_iter *iter)
+{
+ if (iter->start == iter->end)
+ return false;
+
+ iter->start = block_range__next(iter->start);
+ return true;
+}
+
+static inline bool block_range_iter__valid(struct block_range_iter *iter)
+{
+ if (!iter->start || !iter->end)
+ return false;
+ return true;
+}
+
+extern struct block_range *block_range__find(u64 addr);
+extern struct block_range_iter block_range__create(u64 start, u64 end);
+extern double block_range__coverage(struct block_range *br);
+
+#endif /* __PERF_BLOCK_RANGE_H */
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
new file mode 100644
index 000000000..47aac4134
--- /dev/null
+++ b/tools/perf/util/bpf-loader.c
@@ -0,0 +1,1801 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bpf-loader.c
+ *
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+
+#include <linux/bpf.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <errno.h>
+#include "perf.h"
+#include "debug.h"
+#include "bpf-loader.h"
+#include "bpf-prologue.h"
+#include "probe-event.h"
+#include "probe-finder.h" // for MAX_PROBES
+#include "parse-events.h"
+#include "strfilter.h"
+#include "llvm-utils.h"
+#include "c++/clang-c.h"
+
+#define DEFINE_PRINT_FN(name, level) \
+static int libbpf_##name(const char *fmt, ...) \
+{ \
+ va_list args; \
+ int ret; \
+ \
+ va_start(args, fmt); \
+ ret = veprintf(level, verbose, pr_fmt(fmt), args);\
+ va_end(args); \
+ return ret; \
+}
+
+DEFINE_PRINT_FN(warning, 1)
+DEFINE_PRINT_FN(info, 1)
+DEFINE_PRINT_FN(debug, 1)
+
+struct bpf_prog_priv {
+ bool is_tp;
+ char *sys_name;
+ char *evt_name;
+ struct perf_probe_event pev;
+ bool need_prologue;
+ struct bpf_insn *insns_buf;
+ int nr_types;
+ int *type_mapping;
+};
+
+static bool libbpf_initialized;
+
+struct bpf_object *
+bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
+{
+ struct bpf_object *obj;
+
+ if (!libbpf_initialized) {
+ libbpf_set_print(libbpf_warning,
+ libbpf_info,
+ libbpf_debug);
+ libbpf_initialized = true;
+ }
+
+ obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name);
+ if (IS_ERR_OR_NULL(obj)) {
+ pr_debug("bpf: failed to load buffer\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ return obj;
+}
+
+struct bpf_object *bpf__prepare_load(const char *filename, bool source)
+{
+ struct bpf_object *obj;
+
+ if (!libbpf_initialized) {
+ libbpf_set_print(libbpf_warning,
+ libbpf_info,
+ libbpf_debug);
+ libbpf_initialized = true;
+ }
+
+ if (source) {
+ int err;
+ void *obj_buf;
+ size_t obj_buf_sz;
+
+ perf_clang__init();
+ err = perf_clang__compile_bpf(filename, &obj_buf, &obj_buf_sz);
+ perf_clang__cleanup();
+ if (err) {
+ pr_debug("bpf: builtin compilation failed: %d, try external compiler\n", err);
+ err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz);
+ if (err)
+ return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE);
+ } else
+ pr_debug("bpf: successfull builtin compilation\n");
+ obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
+
+ if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)
+ llvm__dump_obj(filename, obj_buf, obj_buf_sz);
+
+ free(obj_buf);
+ } else
+ obj = bpf_object__open(filename);
+
+ if (IS_ERR_OR_NULL(obj)) {
+ pr_debug("bpf: failed to load %s\n", filename);
+ return obj;
+ }
+
+ return obj;
+}
+
+void bpf__clear(void)
+{
+ struct bpf_object *obj, *tmp;
+
+ bpf_object__for_each_safe(obj, tmp) {
+ bpf__unprobe(obj);
+ bpf_object__close(obj);
+ }
+}
+
+static void
+clear_prog_priv(struct bpf_program *prog __maybe_unused,
+ void *_priv)
+{
+ struct bpf_prog_priv *priv = _priv;
+
+ cleanup_perf_probe_events(&priv->pev, 1);
+ zfree(&priv->insns_buf);
+ zfree(&priv->type_mapping);
+ zfree(&priv->sys_name);
+ zfree(&priv->evt_name);
+ free(priv);
+}
+
+static int
+prog_config__exec(const char *value, struct perf_probe_event *pev)
+{
+ pev->uprobes = true;
+ pev->target = strdup(value);
+ if (!pev->target)
+ return -ENOMEM;
+ return 0;
+}
+
+static int
+prog_config__module(const char *value, struct perf_probe_event *pev)
+{
+ pev->uprobes = false;
+ pev->target = strdup(value);
+ if (!pev->target)
+ return -ENOMEM;
+ return 0;
+}
+
+static int
+prog_config__bool(const char *value, bool *pbool, bool invert)
+{
+ int err;
+ bool bool_value;
+
+ if (!pbool)
+ return -EINVAL;
+
+ err = strtobool(value, &bool_value);
+ if (err)
+ return err;
+
+ *pbool = invert ? !bool_value : bool_value;
+ return 0;
+}
+
+static int
+prog_config__inlines(const char *value,
+ struct perf_probe_event *pev __maybe_unused)
+{
+ return prog_config__bool(value, &probe_conf.no_inlines, true);
+}
+
+static int
+prog_config__force(const char *value,
+ struct perf_probe_event *pev __maybe_unused)
+{
+ return prog_config__bool(value, &probe_conf.force_add, false);
+}
+
+static struct {
+ const char *key;
+ const char *usage;
+ const char *desc;
+ int (*func)(const char *, struct perf_probe_event *);
+} bpf_prog_config_terms[] = {
+ {
+ .key = "exec",
+ .usage = "exec=<full path of file>",
+ .desc = "Set uprobe target",
+ .func = prog_config__exec,
+ },
+ {
+ .key = "module",
+ .usage = "module=<module name> ",
+ .desc = "Set kprobe module",
+ .func = prog_config__module,
+ },
+ {
+ .key = "inlines",
+ .usage = "inlines=[yes|no] ",
+ .desc = "Probe at inline symbol",
+ .func = prog_config__inlines,
+ },
+ {
+ .key = "force",
+ .usage = "force=[yes|no] ",
+ .desc = "Forcibly add events with existing name",
+ .func = prog_config__force,
+ },
+};
+
+static int
+do_prog_config(const char *key, const char *value,
+ struct perf_probe_event *pev)
+{
+ unsigned int i;
+
+ pr_debug("config bpf program: %s=%s\n", key, value);
+ for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
+ if (strcmp(key, bpf_prog_config_terms[i].key) == 0)
+ return bpf_prog_config_terms[i].func(value, pev);
+
+ pr_debug("BPF: ERROR: invalid program config option: %s=%s\n",
+ key, value);
+
+ pr_debug("\nHint: Valid options are:\n");
+ for (i = 0; i < ARRAY_SIZE(bpf_prog_config_terms); i++)
+ pr_debug("\t%s:\t%s\n", bpf_prog_config_terms[i].usage,
+ bpf_prog_config_terms[i].desc);
+ pr_debug("\n");
+
+ return -BPF_LOADER_ERRNO__PROGCONF_TERM;
+}
+
+static const char *
+parse_prog_config_kvpair(const char *config_str, struct perf_probe_event *pev)
+{
+ char *text = strdup(config_str);
+ char *sep, *line;
+ const char *main_str = NULL;
+ int err = 0;
+
+ if (!text) {
+ pr_debug("Not enough memory: dup config_str failed\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+ line = text;
+ while ((sep = strchr(line, ';'))) {
+ char *equ;
+
+ *sep = '\0';
+ equ = strchr(line, '=');
+ if (!equ) {
+ pr_warning("WARNING: invalid config in BPF object: %s\n",
+ line);
+ pr_warning("\tShould be 'key=value'.\n");
+ goto nextline;
+ }
+ *equ = '\0';
+
+ err = do_prog_config(line, equ + 1, pev);
+ if (err)
+ break;
+nextline:
+ line = sep + 1;
+ }
+
+ if (!err)
+ main_str = config_str + (line - text);
+ free(text);
+
+ return err ? ERR_PTR(err) : main_str;
+}
+
+static int
+parse_prog_config(const char *config_str, const char **p_main_str,
+ bool *is_tp, struct perf_probe_event *pev)
+{
+ int err;
+ const char *main_str = parse_prog_config_kvpair(config_str, pev);
+
+ if (IS_ERR(main_str))
+ return PTR_ERR(main_str);
+
+ *p_main_str = main_str;
+ if (!strchr(main_str, '=')) {
+ /* Is a tracepoint event? */
+ const char *s = strchr(main_str, ':');
+
+ if (!s) {
+ pr_debug("bpf: '%s' is not a valid tracepoint\n",
+ config_str);
+ return -BPF_LOADER_ERRNO__CONFIG;
+ }
+
+ *is_tp = true;
+ return 0;
+ }
+
+ *is_tp = false;
+ err = parse_perf_probe_command(main_str, pev);
+ if (err < 0) {
+ pr_debug("bpf: '%s' is not a valid config string\n",
+ config_str);
+ /* parse failed, don't need clear pev. */
+ return -BPF_LOADER_ERRNO__CONFIG;
+ }
+ return 0;
+}
+
+static int
+config_bpf_program(struct bpf_program *prog)
+{
+ struct perf_probe_event *pev = NULL;
+ struct bpf_prog_priv *priv = NULL;
+ const char *config_str, *main_str;
+ bool is_tp = false;
+ int err;
+
+ /* Initialize per-program probing setting */
+ probe_conf.no_inlines = false;
+ probe_conf.force_add = false;
+
+ config_str = bpf_program__title(prog, false);
+ if (IS_ERR(config_str)) {
+ pr_debug("bpf: unable to get title for program\n");
+ return PTR_ERR(config_str);
+ }
+
+ priv = calloc(sizeof(*priv), 1);
+ if (!priv) {
+ pr_debug("bpf: failed to alloc priv\n");
+ return -ENOMEM;
+ }
+ pev = &priv->pev;
+
+ pr_debug("bpf: config program '%s'\n", config_str);
+ err = parse_prog_config(config_str, &main_str, &is_tp, pev);
+ if (err)
+ goto errout;
+
+ if (is_tp) {
+ char *s = strchr(main_str, ':');
+
+ priv->is_tp = true;
+ priv->sys_name = strndup(main_str, s - main_str);
+ priv->evt_name = strdup(s + 1);
+ goto set_priv;
+ }
+
+ if (pev->group && strcmp(pev->group, PERF_BPF_PROBE_GROUP)) {
+ pr_debug("bpf: '%s': group for event is set and not '%s'.\n",
+ config_str, PERF_BPF_PROBE_GROUP);
+ err = -BPF_LOADER_ERRNO__GROUP;
+ goto errout;
+ } else if (!pev->group)
+ pev->group = strdup(PERF_BPF_PROBE_GROUP);
+
+ if (!pev->group) {
+ pr_debug("bpf: strdup failed\n");
+ err = -ENOMEM;
+ goto errout;
+ }
+
+ if (!pev->event) {
+ pr_debug("bpf: '%s': event name is missing. Section name should be 'key=value'\n",
+ config_str);
+ err = -BPF_LOADER_ERRNO__EVENTNAME;
+ goto errout;
+ }
+ pr_debug("bpf: config '%s' is ok\n", config_str);
+
+set_priv:
+ err = bpf_program__set_priv(prog, priv, clear_prog_priv);
+ if (err) {
+ pr_debug("Failed to set priv for program '%s'\n", config_str);
+ goto errout;
+ }
+
+ return 0;
+
+errout:
+ if (pev)
+ clear_perf_probe_event(pev);
+ free(priv);
+ return err;
+}
+
+static int bpf__prepare_probe(void)
+{
+ static int err = 0;
+ static bool initialized = false;
+
+ /*
+ * Make err static, so if init failed the first, bpf__prepare_probe()
+ * fails each time without calling init_probe_symbol_maps multiple
+ * times.
+ */
+ if (initialized)
+ return err;
+
+ initialized = true;
+ err = init_probe_symbol_maps(false);
+ if (err < 0)
+ pr_debug("Failed to init_probe_symbol_maps\n");
+ probe_conf.max_probes = MAX_PROBES;
+ return err;
+}
+
+static int
+preproc_gen_prologue(struct bpf_program *prog, int n,
+ struct bpf_insn *orig_insns, int orig_insns_cnt,
+ struct bpf_prog_prep_result *res)
+{
+ struct bpf_prog_priv *priv = bpf_program__priv(prog);
+ struct probe_trace_event *tev;
+ struct perf_probe_event *pev;
+ struct bpf_insn *buf;
+ size_t prologue_cnt = 0;
+ int i, err;
+
+ if (IS_ERR(priv) || !priv || priv->is_tp)
+ goto errout;
+
+ pev = &priv->pev;
+
+ if (n < 0 || n >= priv->nr_types)
+ goto errout;
+
+ /* Find a tev belongs to that type */
+ for (i = 0; i < pev->ntevs; i++) {
+ if (priv->type_mapping[i] == n)
+ break;
+ }
+
+ if (i >= pev->ntevs) {
+ pr_debug("Internal error: prologue type %d not found\n", n);
+ return -BPF_LOADER_ERRNO__PROLOGUE;
+ }
+
+ tev = &pev->tevs[i];
+
+ buf = priv->insns_buf;
+ err = bpf__gen_prologue(tev->args, tev->nargs,
+ buf, &prologue_cnt,
+ BPF_MAXINSNS - orig_insns_cnt);
+ if (err) {
+ const char *title;
+
+ title = bpf_program__title(prog, false);
+ if (!title)
+ title = "[unknown]";
+
+ pr_debug("Failed to generate prologue for program %s\n",
+ title);
+ return err;
+ }
+
+ memcpy(&buf[prologue_cnt], orig_insns,
+ sizeof(struct bpf_insn) * orig_insns_cnt);
+
+ res->new_insn_ptr = buf;
+ res->new_insn_cnt = prologue_cnt + orig_insns_cnt;
+ res->pfd = NULL;
+ return 0;
+
+errout:
+ pr_debug("Internal error in preproc_gen_prologue\n");
+ return -BPF_LOADER_ERRNO__PROLOGUE;
+}
+
+/*
+ * compare_tev_args is reflexive, transitive and antisymmetric.
+ * I can proof it but this margin is too narrow to contain.
+ */
+static int compare_tev_args(const void *ptev1, const void *ptev2)
+{
+ int i, ret;
+ const struct probe_trace_event *tev1 =
+ *(const struct probe_trace_event **)ptev1;
+ const struct probe_trace_event *tev2 =
+ *(const struct probe_trace_event **)ptev2;
+
+ ret = tev2->nargs - tev1->nargs;
+ if (ret)
+ return ret;
+
+ for (i = 0; i < tev1->nargs; i++) {
+ struct probe_trace_arg *arg1, *arg2;
+ struct probe_trace_arg_ref *ref1, *ref2;
+
+ arg1 = &tev1->args[i];
+ arg2 = &tev2->args[i];
+
+ ret = strcmp(arg1->value, arg2->value);
+ if (ret)
+ return ret;
+
+ ref1 = arg1->ref;
+ ref2 = arg2->ref;
+
+ while (ref1 && ref2) {
+ ret = ref2->offset - ref1->offset;
+ if (ret)
+ return ret;
+
+ ref1 = ref1->next;
+ ref2 = ref2->next;
+ }
+
+ if (ref1 || ref2)
+ return ref2 ? 1 : -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Assign a type number to each tevs in a pev.
+ * mapping is an array with same slots as tevs in that pev.
+ * nr_types will be set to number of types.
+ */
+static int map_prologue(struct perf_probe_event *pev, int *mapping,
+ int *nr_types)
+{
+ int i, type = 0;
+ struct probe_trace_event **ptevs;
+
+ size_t array_sz = sizeof(*ptevs) * pev->ntevs;
+
+ ptevs = malloc(array_sz);
+ if (!ptevs) {
+ pr_debug("Not enough memory: alloc ptevs failed\n");
+ return -ENOMEM;
+ }
+
+ pr_debug("In map_prologue, ntevs=%d\n", pev->ntevs);
+ for (i = 0; i < pev->ntevs; i++)
+ ptevs[i] = &pev->tevs[i];
+
+ qsort(ptevs, pev->ntevs, sizeof(*ptevs),
+ compare_tev_args);
+
+ for (i = 0; i < pev->ntevs; i++) {
+ int n;
+
+ n = ptevs[i] - pev->tevs;
+ if (i == 0) {
+ mapping[n] = type;
+ pr_debug("mapping[%d]=%d\n", n, type);
+ continue;
+ }
+
+ if (compare_tev_args(ptevs + i, ptevs + i - 1) == 0)
+ mapping[n] = type;
+ else
+ mapping[n] = ++type;
+
+ pr_debug("mapping[%d]=%d\n", n, mapping[n]);
+ }
+ free(ptevs);
+ *nr_types = type + 1;
+
+ return 0;
+}
+
+static int hook_load_preprocessor(struct bpf_program *prog)
+{
+ struct bpf_prog_priv *priv = bpf_program__priv(prog);
+ struct perf_probe_event *pev;
+ bool need_prologue = false;
+ int err, i;
+
+ if (IS_ERR(priv) || !priv) {
+ pr_debug("Internal error when hook preprocessor\n");
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ if (priv->is_tp) {
+ priv->need_prologue = false;
+ return 0;
+ }
+
+ pev = &priv->pev;
+ for (i = 0; i < pev->ntevs; i++) {
+ struct probe_trace_event *tev = &pev->tevs[i];
+
+ if (tev->nargs > 0) {
+ need_prologue = true;
+ break;
+ }
+ }
+
+ /*
+ * Since all tevs don't have argument, we don't need generate
+ * prologue.
+ */
+ if (!need_prologue) {
+ priv->need_prologue = false;
+ return 0;
+ }
+
+ priv->need_prologue = true;
+ priv->insns_buf = malloc(sizeof(struct bpf_insn) * BPF_MAXINSNS);
+ if (!priv->insns_buf) {
+ pr_debug("Not enough memory: alloc insns_buf failed\n");
+ return -ENOMEM;
+ }
+
+ priv->type_mapping = malloc(sizeof(int) * pev->ntevs);
+ if (!priv->type_mapping) {
+ pr_debug("Not enough memory: alloc type_mapping failed\n");
+ return -ENOMEM;
+ }
+ memset(priv->type_mapping, -1,
+ sizeof(int) * pev->ntevs);
+
+ err = map_prologue(pev, priv->type_mapping, &priv->nr_types);
+ if (err)
+ return err;
+
+ err = bpf_program__set_prep(prog, priv->nr_types,
+ preproc_gen_prologue);
+ return err;
+}
+
+int bpf__probe(struct bpf_object *obj)
+{
+ int err = 0;
+ struct bpf_program *prog;
+ struct bpf_prog_priv *priv;
+ struct perf_probe_event *pev;
+
+ err = bpf__prepare_probe();
+ if (err) {
+ pr_debug("bpf__prepare_probe failed\n");
+ return err;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ err = config_bpf_program(prog);
+ if (err)
+ goto out;
+
+ priv = bpf_program__priv(prog);
+ if (IS_ERR(priv) || !priv) {
+ err = PTR_ERR(priv);
+ goto out;
+ }
+
+ if (priv->is_tp) {
+ bpf_program__set_tracepoint(prog);
+ continue;
+ }
+
+ bpf_program__set_kprobe(prog);
+ pev = &priv->pev;
+
+ err = convert_perf_probe_events(pev, 1);
+ if (err < 0) {
+ pr_debug("bpf_probe: failed to convert perf probe events\n");
+ goto out;
+ }
+
+ err = apply_perf_probe_events(pev, 1);
+ if (err < 0) {
+ pr_debug("bpf_probe: failed to apply perf probe events\n");
+ goto out;
+ }
+
+ /*
+ * After probing, let's consider prologue, which
+ * adds program fetcher to BPF programs.
+ *
+ * hook_load_preprocessorr() hooks pre-processor
+ * to bpf_program, let it generate prologue
+ * dynamically during loading.
+ */
+ err = hook_load_preprocessor(prog);
+ if (err)
+ goto out;
+ }
+out:
+ return err < 0 ? err : 0;
+}
+
+#define EVENTS_WRITE_BUFSIZE 4096
+int bpf__unprobe(struct bpf_object *obj)
+{
+ int err, ret = 0;
+ struct bpf_program *prog;
+
+ bpf_object__for_each_program(prog, obj) {
+ struct bpf_prog_priv *priv = bpf_program__priv(prog);
+ int i;
+
+ if (IS_ERR(priv) || !priv || priv->is_tp)
+ continue;
+
+ for (i = 0; i < priv->pev.ntevs; i++) {
+ struct probe_trace_event *tev = &priv->pev.tevs[i];
+ char name_buf[EVENTS_WRITE_BUFSIZE];
+ struct strfilter *delfilter;
+
+ snprintf(name_buf, EVENTS_WRITE_BUFSIZE,
+ "%s:%s", tev->group, tev->event);
+ name_buf[EVENTS_WRITE_BUFSIZE - 1] = '\0';
+
+ delfilter = strfilter__new(name_buf, NULL);
+ if (!delfilter) {
+ pr_debug("Failed to create filter for unprobing\n");
+ ret = -ENOMEM;
+ continue;
+ }
+
+ err = del_perf_probe_events(delfilter);
+ strfilter__delete(delfilter);
+ if (err) {
+ pr_debug("Failed to delete %s\n", name_buf);
+ ret = err;
+ continue;
+ }
+ }
+ }
+ return ret;
+}
+
+int bpf__load(struct bpf_object *obj)
+{
+ int err;
+
+ err = bpf_object__load(obj);
+ if (err) {
+ char bf[128];
+ libbpf_strerror(err, bf, sizeof(bf));
+ pr_debug("bpf: load objects failed: err=%d: (%s)\n", err, bf);
+ return err;
+ }
+ return 0;
+}
+
+int bpf__foreach_event(struct bpf_object *obj,
+ bpf_prog_iter_callback_t func,
+ void *arg)
+{
+ struct bpf_program *prog;
+ int err;
+
+ bpf_object__for_each_program(prog, obj) {
+ struct bpf_prog_priv *priv = bpf_program__priv(prog);
+ struct probe_trace_event *tev;
+ struct perf_probe_event *pev;
+ int i, fd;
+
+ if (IS_ERR(priv) || !priv) {
+ pr_debug("bpf: failed to get private field\n");
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ if (priv->is_tp) {
+ fd = bpf_program__fd(prog);
+ err = (*func)(priv->sys_name, priv->evt_name, fd, arg);
+ if (err) {
+ pr_debug("bpf: tracepoint call back failed, stop iterate\n");
+ return err;
+ }
+ continue;
+ }
+
+ pev = &priv->pev;
+ for (i = 0; i < pev->ntevs; i++) {
+ tev = &pev->tevs[i];
+
+ if (priv->need_prologue) {
+ int type = priv->type_mapping[i];
+
+ fd = bpf_program__nth_fd(prog, type);
+ } else {
+ fd = bpf_program__fd(prog);
+ }
+
+ if (fd < 0) {
+ pr_debug("bpf: failed to get file descriptor\n");
+ return fd;
+ }
+
+ err = (*func)(tev->group, tev->event, fd, arg);
+ if (err) {
+ pr_debug("bpf: call back failed, stop iterate\n");
+ return err;
+ }
+ }
+ }
+ return 0;
+}
+
+enum bpf_map_op_type {
+ BPF_MAP_OP_SET_VALUE,
+ BPF_MAP_OP_SET_EVSEL,
+};
+
+enum bpf_map_key_type {
+ BPF_MAP_KEY_ALL,
+ BPF_MAP_KEY_RANGES,
+};
+
+struct bpf_map_op {
+ struct list_head list;
+ enum bpf_map_op_type op_type;
+ enum bpf_map_key_type key_type;
+ union {
+ struct parse_events_array array;
+ } k;
+ union {
+ u64 value;
+ struct perf_evsel *evsel;
+ } v;
+};
+
+struct bpf_map_priv {
+ struct list_head ops_list;
+};
+
+static void
+bpf_map_op__delete(struct bpf_map_op *op)
+{
+ if (!list_empty(&op->list))
+ list_del(&op->list);
+ if (op->key_type == BPF_MAP_KEY_RANGES)
+ parse_events__clear_array(&op->k.array);
+ free(op);
+}
+
+static void
+bpf_map_priv__purge(struct bpf_map_priv *priv)
+{
+ struct bpf_map_op *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &priv->ops_list, list) {
+ list_del_init(&pos->list);
+ bpf_map_op__delete(pos);
+ }
+}
+
+static void
+bpf_map_priv__clear(struct bpf_map *map __maybe_unused,
+ void *_priv)
+{
+ struct bpf_map_priv *priv = _priv;
+
+ bpf_map_priv__purge(priv);
+ free(priv);
+}
+
+static int
+bpf_map_op_setkey(struct bpf_map_op *op, struct parse_events_term *term)
+{
+ op->key_type = BPF_MAP_KEY_ALL;
+ if (!term)
+ return 0;
+
+ if (term->array.nr_ranges) {
+ size_t memsz = term->array.nr_ranges *
+ sizeof(op->k.array.ranges[0]);
+
+ op->k.array.ranges = memdup(term->array.ranges, memsz);
+ if (!op->k.array.ranges) {
+ pr_debug("Not enough memory to alloc indices for map\n");
+ return -ENOMEM;
+ }
+ op->key_type = BPF_MAP_KEY_RANGES;
+ op->k.array.nr_ranges = term->array.nr_ranges;
+ }
+ return 0;
+}
+
+static struct bpf_map_op *
+bpf_map_op__new(struct parse_events_term *term)
+{
+ struct bpf_map_op *op;
+ int err;
+
+ op = zalloc(sizeof(*op));
+ if (!op) {
+ pr_debug("Failed to alloc bpf_map_op\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ INIT_LIST_HEAD(&op->list);
+
+ err = bpf_map_op_setkey(op, term);
+ if (err) {
+ free(op);
+ return ERR_PTR(err);
+ }
+ return op;
+}
+
+static struct bpf_map_op *
+bpf_map_op__clone(struct bpf_map_op *op)
+{
+ struct bpf_map_op *newop;
+
+ newop = memdup(op, sizeof(*op));
+ if (!newop) {
+ pr_debug("Failed to alloc bpf_map_op\n");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&newop->list);
+ if (op->key_type == BPF_MAP_KEY_RANGES) {
+ size_t memsz = op->k.array.nr_ranges *
+ sizeof(op->k.array.ranges[0]);
+
+ newop->k.array.ranges = memdup(op->k.array.ranges, memsz);
+ if (!newop->k.array.ranges) {
+ pr_debug("Failed to alloc indices for map\n");
+ free(newop);
+ return NULL;
+ }
+ }
+
+ return newop;
+}
+
+static struct bpf_map_priv *
+bpf_map_priv__clone(struct bpf_map_priv *priv)
+{
+ struct bpf_map_priv *newpriv;
+ struct bpf_map_op *pos, *newop;
+
+ newpriv = zalloc(sizeof(*newpriv));
+ if (!newpriv) {
+ pr_debug("Not enough memory to alloc map private\n");
+ return NULL;
+ }
+ INIT_LIST_HEAD(&newpriv->ops_list);
+
+ list_for_each_entry(pos, &priv->ops_list, list) {
+ newop = bpf_map_op__clone(pos);
+ if (!newop) {
+ bpf_map_priv__purge(newpriv);
+ return NULL;
+ }
+ list_add_tail(&newop->list, &newpriv->ops_list);
+ }
+
+ return newpriv;
+}
+
+static int
+bpf_map__add_op(struct bpf_map *map, struct bpf_map_op *op)
+{
+ const char *map_name = bpf_map__name(map);
+ struct bpf_map_priv *priv = bpf_map__priv(map);
+
+ if (IS_ERR(priv)) {
+ pr_debug("Failed to get private from map %s\n", map_name);
+ return PTR_ERR(priv);
+ }
+
+ if (!priv) {
+ priv = zalloc(sizeof(*priv));
+ if (!priv) {
+ pr_debug("Not enough memory to alloc map private\n");
+ return -ENOMEM;
+ }
+ INIT_LIST_HEAD(&priv->ops_list);
+
+ if (bpf_map__set_priv(map, priv, bpf_map_priv__clear)) {
+ free(priv);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+ }
+
+ list_add_tail(&op->list, &priv->ops_list);
+ return 0;
+}
+
+static struct bpf_map_op *
+bpf_map__add_newop(struct bpf_map *map, struct parse_events_term *term)
+{
+ struct bpf_map_op *op;
+ int err;
+
+ op = bpf_map_op__new(term);
+ if (IS_ERR(op))
+ return op;
+
+ err = bpf_map__add_op(map, op);
+ if (err) {
+ bpf_map_op__delete(op);
+ return ERR_PTR(err);
+ }
+ return op;
+}
+
+static int
+__bpf_map__config_value(struct bpf_map *map,
+ struct parse_events_term *term)
+{
+ struct bpf_map_op *op;
+ const char *map_name = bpf_map__name(map);
+ const struct bpf_map_def *def = bpf_map__def(map);
+
+ if (IS_ERR(def)) {
+ pr_debug("Unable to get map definition from '%s'\n",
+ map_name);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ if (def->type != BPF_MAP_TYPE_ARRAY) {
+ pr_debug("Map %s type is not BPF_MAP_TYPE_ARRAY\n",
+ map_name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+ }
+ if (def->key_size < sizeof(unsigned int)) {
+ pr_debug("Map %s has incorrect key size\n", map_name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE;
+ }
+ switch (def->value_size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ break;
+ default:
+ pr_debug("Map %s has incorrect value size\n", map_name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+ }
+
+ op = bpf_map__add_newop(map, term);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+ op->op_type = BPF_MAP_OP_SET_VALUE;
+ op->v.value = term->val.num;
+ return 0;
+}
+
+static int
+bpf_map__config_value(struct bpf_map *map,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ if (!term->err_val) {
+ pr_debug("Config value not set\n");
+ return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+ }
+
+ if (term->type_val != PARSE_EVENTS__TERM_TYPE_NUM) {
+ pr_debug("ERROR: wrong value type for 'value'\n");
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+ }
+
+ return __bpf_map__config_value(map, term);
+}
+
+static int
+__bpf_map__config_event(struct bpf_map *map,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ const struct bpf_map_def *def;
+ struct bpf_map_op *op;
+ const char *map_name = bpf_map__name(map);
+
+ evsel = perf_evlist__find_evsel_by_str(evlist, term->val.str);
+ if (!evsel) {
+ pr_debug("Event (for '%s') '%s' doesn't exist\n",
+ map_name, term->val.str);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT;
+ }
+
+ def = bpf_map__def(map);
+ if (IS_ERR(def)) {
+ pr_debug("Unable to get map definition from '%s'\n",
+ map_name);
+ return PTR_ERR(def);
+ }
+
+ /*
+ * No need to check key_size and value_size:
+ * kernel has already checked them.
+ */
+ if (def->type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
+ pr_debug("Map %s type is not BPF_MAP_TYPE_PERF_EVENT_ARRAY\n",
+ map_name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+ }
+
+ op = bpf_map__add_newop(map, term);
+ if (IS_ERR(op))
+ return PTR_ERR(op);
+ op->op_type = BPF_MAP_OP_SET_EVSEL;
+ op->v.evsel = evsel;
+ return 0;
+}
+
+static int
+bpf_map__config_event(struct bpf_map *map,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist)
+{
+ if (!term->err_val) {
+ pr_debug("Config value not set\n");
+ return -BPF_LOADER_ERRNO__OBJCONF_CONF;
+ }
+
+ if (term->type_val != PARSE_EVENTS__TERM_TYPE_STR) {
+ pr_debug("ERROR: wrong value type for 'event'\n");
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE;
+ }
+
+ return __bpf_map__config_event(map, term, evlist);
+}
+
+struct bpf_obj_config__map_func {
+ const char *config_opt;
+ int (*config_func)(struct bpf_map *, struct parse_events_term *,
+ struct perf_evlist *);
+};
+
+struct bpf_obj_config__map_func bpf_obj_config__map_funcs[] = {
+ {"value", bpf_map__config_value},
+ {"event", bpf_map__config_event},
+};
+
+static int
+config_map_indices_range_check(struct parse_events_term *term,
+ struct bpf_map *map,
+ const char *map_name)
+{
+ struct parse_events_array *array = &term->array;
+ const struct bpf_map_def *def;
+ unsigned int i;
+
+ if (!array->nr_ranges)
+ return 0;
+ if (!array->ranges) {
+ pr_debug("ERROR: map %s: array->nr_ranges is %d but range array is NULL\n",
+ map_name, (int)array->nr_ranges);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ def = bpf_map__def(map);
+ if (IS_ERR(def)) {
+ pr_debug("ERROR: Unable to get map definition from '%s'\n",
+ map_name);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ for (i = 0; i < array->nr_ranges; i++) {
+ unsigned int start = array->ranges[i].start;
+ size_t length = array->ranges[i].length;
+ unsigned int idx = start + length - 1;
+
+ if (idx >= def->max_entries) {
+ pr_debug("ERROR: index %d too large\n", idx);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG;
+ }
+ }
+ return 0;
+}
+
+static int
+bpf__obj_config_map(struct bpf_object *obj,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist,
+ int *key_scan_pos)
+{
+ /* key is "map:<mapname>.<config opt>" */
+ char *map_name = strdup(term->config + sizeof("map:") - 1);
+ struct bpf_map *map;
+ int err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+ char *map_opt;
+ size_t i;
+
+ if (!map_name)
+ return -ENOMEM;
+
+ map_opt = strchr(map_name, '.');
+ if (!map_opt) {
+ pr_debug("ERROR: Invalid map config: %s\n", map_name);
+ goto out;
+ }
+
+ *map_opt++ = '\0';
+ if (*map_opt == '\0') {
+ pr_debug("ERROR: Invalid map option: %s\n", term->config);
+ goto out;
+ }
+
+ map = bpf_object__find_map_by_name(obj, map_name);
+ if (!map) {
+ pr_debug("ERROR: Map %s doesn't exist\n", map_name);
+ err = -BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST;
+ goto out;
+ }
+
+ *key_scan_pos += strlen(map_opt);
+ err = config_map_indices_range_check(term, map, map_name);
+ if (err)
+ goto out;
+ *key_scan_pos -= strlen(map_opt);
+
+ for (i = 0; i < ARRAY_SIZE(bpf_obj_config__map_funcs); i++) {
+ struct bpf_obj_config__map_func *func =
+ &bpf_obj_config__map_funcs[i];
+
+ if (strcmp(map_opt, func->config_opt) == 0) {
+ err = func->config_func(map, term, evlist);
+ goto out;
+ }
+ }
+
+ pr_debug("ERROR: Invalid map config option '%s'\n", map_opt);
+ err = -BPF_LOADER_ERRNO__OBJCONF_MAP_OPT;
+out:
+ free(map_name);
+ if (!err)
+ key_scan_pos += strlen(map_opt);
+ return err;
+}
+
+int bpf__config_obj(struct bpf_object *obj,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist,
+ int *error_pos)
+{
+ int key_scan_pos = 0;
+ int err;
+
+ if (!obj || !term || !term->config)
+ return -EINVAL;
+
+ if (strstarts(term->config, "map:")) {
+ key_scan_pos = sizeof("map:") - 1;
+ err = bpf__obj_config_map(obj, term, evlist, &key_scan_pos);
+ goto out;
+ }
+ err = -BPF_LOADER_ERRNO__OBJCONF_OPT;
+out:
+ if (error_pos)
+ *error_pos = key_scan_pos;
+ return err;
+
+}
+
+typedef int (*map_config_func_t)(const char *name, int map_fd,
+ const struct bpf_map_def *pdef,
+ struct bpf_map_op *op,
+ void *pkey, void *arg);
+
+static int
+foreach_key_array_all(map_config_func_t func,
+ void *arg, const char *name,
+ int map_fd, const struct bpf_map_def *pdef,
+ struct bpf_map_op *op)
+{
+ unsigned int i;
+ int err;
+
+ for (i = 0; i < pdef->max_entries; i++) {
+ err = func(name, map_fd, pdef, op, &i, arg);
+ if (err) {
+ pr_debug("ERROR: failed to insert value to %s[%u]\n",
+ name, i);
+ return err;
+ }
+ }
+ return 0;
+}
+
+static int
+foreach_key_array_ranges(map_config_func_t func, void *arg,
+ const char *name, int map_fd,
+ const struct bpf_map_def *pdef,
+ struct bpf_map_op *op)
+{
+ unsigned int i, j;
+ int err;
+
+ for (i = 0; i < op->k.array.nr_ranges; i++) {
+ unsigned int start = op->k.array.ranges[i].start;
+ size_t length = op->k.array.ranges[i].length;
+
+ for (j = 0; j < length; j++) {
+ unsigned int idx = start + j;
+
+ err = func(name, map_fd, pdef, op, &idx, arg);
+ if (err) {
+ pr_debug("ERROR: failed to insert value to %s[%u]\n",
+ name, idx);
+ return err;
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+bpf_map_config_foreach_key(struct bpf_map *map,
+ map_config_func_t func,
+ void *arg)
+{
+ int err, map_fd;
+ struct bpf_map_op *op;
+ const struct bpf_map_def *def;
+ const char *name = bpf_map__name(map);
+ struct bpf_map_priv *priv = bpf_map__priv(map);
+
+ if (IS_ERR(priv)) {
+ pr_debug("ERROR: failed to get private from map %s\n", name);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+ if (!priv || list_empty(&priv->ops_list)) {
+ pr_debug("INFO: nothing to config for map %s\n", name);
+ return 0;
+ }
+
+ def = bpf_map__def(map);
+ if (IS_ERR(def)) {
+ pr_debug("ERROR: failed to get definition from map %s\n", name);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+ map_fd = bpf_map__fd(map);
+ if (map_fd < 0) {
+ pr_debug("ERROR: failed to get fd from map %s\n", name);
+ return map_fd;
+ }
+
+ list_for_each_entry(op, &priv->ops_list, list) {
+ switch (def->type) {
+ case BPF_MAP_TYPE_ARRAY:
+ case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+ switch (op->key_type) {
+ case BPF_MAP_KEY_ALL:
+ err = foreach_key_array_all(func, arg, name,
+ map_fd, def, op);
+ break;
+ case BPF_MAP_KEY_RANGES:
+ err = foreach_key_array_ranges(func, arg, name,
+ map_fd, def,
+ op);
+ break;
+ default:
+ pr_debug("ERROR: keytype for map '%s' invalid\n",
+ name);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+ if (err)
+ return err;
+ break;
+ default:
+ pr_debug("ERROR: type of '%s' incorrect\n", name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE;
+ }
+ }
+
+ return 0;
+}
+
+static int
+apply_config_value_for_key(int map_fd, void *pkey,
+ size_t val_size, u64 val)
+{
+ int err = 0;
+
+ switch (val_size) {
+ case 1: {
+ u8 _val = (u8)(val);
+ err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+ break;
+ }
+ case 2: {
+ u16 _val = (u16)(val);
+ err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+ break;
+ }
+ case 4: {
+ u32 _val = (u32)(val);
+ err = bpf_map_update_elem(map_fd, pkey, &_val, BPF_ANY);
+ break;
+ }
+ case 8: {
+ err = bpf_map_update_elem(map_fd, pkey, &val, BPF_ANY);
+ break;
+ }
+ default:
+ pr_debug("ERROR: invalid value size\n");
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE;
+ }
+ if (err && errno)
+ err = -errno;
+ return err;
+}
+
+static int
+apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
+ struct perf_evsel *evsel)
+{
+ struct xyarray *xy = evsel->fd;
+ struct perf_event_attr *attr;
+ unsigned int key, events;
+ bool check_pass = false;
+ int *evt_fd;
+ int err;
+
+ if (!xy) {
+ pr_debug("ERROR: evsel not ready for map %s\n", name);
+ return -BPF_LOADER_ERRNO__INTERNAL;
+ }
+
+ if (xy->row_size / xy->entry_size != 1) {
+ pr_debug("ERROR: Dimension of target event is incorrect for map %s\n",
+ name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM;
+ }
+
+ attr = &evsel->attr;
+ if (attr->inherit) {
+ pr_debug("ERROR: Can't put inherit event into map %s\n", name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
+ }
+
+ if (perf_evsel__is_bpf_output(evsel))
+ check_pass = true;
+ if (attr->type == PERF_TYPE_RAW)
+ check_pass = true;
+ if (attr->type == PERF_TYPE_HARDWARE)
+ check_pass = true;
+ if (!check_pass) {
+ pr_debug("ERROR: Event type is wrong for map %s\n", name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE;
+ }
+
+ events = xy->entries / (xy->row_size / xy->entry_size);
+ key = *((unsigned int *)pkey);
+ if (key >= events) {
+ pr_debug("ERROR: there is no event %d for map %s\n",
+ key, name);
+ return -BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE;
+ }
+ evt_fd = xyarray__entry(xy, key, 0);
+ err = bpf_map_update_elem(map_fd, pkey, evt_fd, BPF_ANY);
+ if (err && errno)
+ err = -errno;
+ return err;
+}
+
+static int
+apply_obj_config_map_for_key(const char *name, int map_fd,
+ const struct bpf_map_def *pdef,
+ struct bpf_map_op *op,
+ void *pkey, void *arg __maybe_unused)
+{
+ int err;
+
+ switch (op->op_type) {
+ case BPF_MAP_OP_SET_VALUE:
+ err = apply_config_value_for_key(map_fd, pkey,
+ pdef->value_size,
+ op->v.value);
+ break;
+ case BPF_MAP_OP_SET_EVSEL:
+ err = apply_config_evsel_for_key(name, map_fd, pkey,
+ op->v.evsel);
+ break;
+ default:
+ pr_debug("ERROR: unknown value type for '%s'\n", name);
+ err = -BPF_LOADER_ERRNO__INTERNAL;
+ }
+ return err;
+}
+
+static int
+apply_obj_config_map(struct bpf_map *map)
+{
+ return bpf_map_config_foreach_key(map,
+ apply_obj_config_map_for_key,
+ NULL);
+}
+
+static int
+apply_obj_config_object(struct bpf_object *obj)
+{
+ struct bpf_map *map;
+ int err;
+
+ bpf_map__for_each(map, obj) {
+ err = apply_obj_config_map(map);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int bpf__apply_obj_config(void)
+{
+ struct bpf_object *obj, *tmp;
+ int err;
+
+ bpf_object__for_each_safe(obj, tmp) {
+ err = apply_obj_config_object(obj);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+#define bpf__for_each_map(pos, obj, objtmp) \
+ bpf_object__for_each_safe(obj, objtmp) \
+ bpf_map__for_each(pos, obj)
+
+#define bpf__for_each_map_named(pos, obj, objtmp, name) \
+ bpf__for_each_map(pos, obj, objtmp) \
+ if (bpf_map__name(pos) && \
+ (strcmp(name, \
+ bpf_map__name(pos)) == 0))
+
+struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const char *name)
+{
+ struct bpf_map_priv *tmpl_priv = NULL;
+ struct bpf_object *obj, *tmp;
+ struct perf_evsel *evsel = NULL;
+ struct bpf_map *map;
+ int err;
+ bool need_init = false;
+
+ bpf__for_each_map_named(map, obj, tmp, name) {
+ struct bpf_map_priv *priv = bpf_map__priv(map);
+
+ if (IS_ERR(priv))
+ return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL);
+
+ /*
+ * No need to check map type: type should have been
+ * verified by kernel.
+ */
+ if (!need_init && !priv)
+ need_init = !priv;
+ if (!tmpl_priv && priv)
+ tmpl_priv = priv;
+ }
+
+ if (!need_init)
+ return NULL;
+
+ if (!tmpl_priv) {
+ char *event_definition = NULL;
+
+ if (asprintf(&event_definition, "bpf-output/no-inherit=1,name=%s/", name) < 0)
+ return ERR_PTR(-ENOMEM);
+
+ err = parse_events(evlist, event_definition, NULL);
+ free(event_definition);
+
+ if (err) {
+ pr_debug("ERROR: failed to create the \"%s\" bpf-output event\n", name);
+ return ERR_PTR(-err);
+ }
+
+ evsel = perf_evlist__last(evlist);
+ }
+
+ bpf__for_each_map_named(map, obj, tmp, name) {
+ struct bpf_map_priv *priv = bpf_map__priv(map);
+
+ if (IS_ERR(priv))
+ return ERR_PTR(-BPF_LOADER_ERRNO__INTERNAL);
+ if (priv)
+ continue;
+
+ if (tmpl_priv) {
+ priv = bpf_map_priv__clone(tmpl_priv);
+ if (!priv)
+ return ERR_PTR(-ENOMEM);
+
+ err = bpf_map__set_priv(map, priv, bpf_map_priv__clear);
+ if (err) {
+ bpf_map_priv__clear(map, priv);
+ return ERR_PTR(err);
+ }
+ } else if (evsel) {
+ struct bpf_map_op *op;
+
+ op = bpf_map__add_newop(map, NULL);
+ if (IS_ERR(op))
+ return ERR_PTR(PTR_ERR(op));
+ op->op_type = BPF_MAP_OP_SET_EVSEL;
+ op->v.evsel = evsel;
+ }
+ }
+
+ return evsel;
+}
+
+int bpf__setup_stdout(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel = bpf__setup_output_event(evlist, "__bpf_stdout__");
+ return IS_ERR(evsel) ? PTR_ERR(evsel) : 0;
+}
+
+#define ERRNO_OFFSET(e) ((e) - __BPF_LOADER_ERRNO__START)
+#define ERRCODE_OFFSET(c) ERRNO_OFFSET(BPF_LOADER_ERRNO__##c)
+#define NR_ERRNO (__BPF_LOADER_ERRNO__END - __BPF_LOADER_ERRNO__START)
+
+static const char *bpf_loader_strerror_table[NR_ERRNO] = {
+ [ERRCODE_OFFSET(CONFIG)] = "Invalid config string",
+ [ERRCODE_OFFSET(GROUP)] = "Invalid group name",
+ [ERRCODE_OFFSET(EVENTNAME)] = "No event name found in config string",
+ [ERRCODE_OFFSET(INTERNAL)] = "BPF loader internal error",
+ [ERRCODE_OFFSET(COMPILE)] = "Error when compiling BPF scriptlet",
+ [ERRCODE_OFFSET(PROGCONF_TERM)] = "Invalid program config term in config string",
+ [ERRCODE_OFFSET(PROLOGUE)] = "Failed to generate prologue",
+ [ERRCODE_OFFSET(PROLOGUE2BIG)] = "Prologue too big for program",
+ [ERRCODE_OFFSET(PROLOGUEOOB)] = "Offset out of bound for prologue",
+ [ERRCODE_OFFSET(OBJCONF_OPT)] = "Invalid object config option",
+ [ERRCODE_OFFSET(OBJCONF_CONF)] = "Config value not set (missing '=')",
+ [ERRCODE_OFFSET(OBJCONF_MAP_OPT)] = "Invalid object map config option",
+ [ERRCODE_OFFSET(OBJCONF_MAP_NOTEXIST)] = "Target map doesn't exist",
+ [ERRCODE_OFFSET(OBJCONF_MAP_VALUE)] = "Incorrect value type for map",
+ [ERRCODE_OFFSET(OBJCONF_MAP_TYPE)] = "Incorrect map type",
+ [ERRCODE_OFFSET(OBJCONF_MAP_KEYSIZE)] = "Incorrect map key size",
+ [ERRCODE_OFFSET(OBJCONF_MAP_VALUESIZE)] = "Incorrect map value size",
+ [ERRCODE_OFFSET(OBJCONF_MAP_NOEVT)] = "Event not found for map setting",
+ [ERRCODE_OFFSET(OBJCONF_MAP_MAPSIZE)] = "Invalid map size for event setting",
+ [ERRCODE_OFFSET(OBJCONF_MAP_EVTDIM)] = "Event dimension too large",
+ [ERRCODE_OFFSET(OBJCONF_MAP_EVTINH)] = "Doesn't support inherit event",
+ [ERRCODE_OFFSET(OBJCONF_MAP_EVTTYPE)] = "Wrong event type for map",
+ [ERRCODE_OFFSET(OBJCONF_MAP_IDX2BIG)] = "Index too large",
+};
+
+static int
+bpf_loader_strerror(int err, char *buf, size_t size)
+{
+ char sbuf[STRERR_BUFSIZE];
+ const char *msg;
+
+ if (!buf || !size)
+ return -1;
+
+ err = err > 0 ? err : -err;
+
+ if (err >= __LIBBPF_ERRNO__START)
+ return libbpf_strerror(err, buf, size);
+
+ if (err >= __BPF_LOADER_ERRNO__START && err < __BPF_LOADER_ERRNO__END) {
+ msg = bpf_loader_strerror_table[ERRNO_OFFSET(err)];
+ snprintf(buf, size, "%s", msg);
+ buf[size - 1] = '\0';
+ return 0;
+ }
+
+ if (err >= __BPF_LOADER_ERRNO__END)
+ snprintf(buf, size, "Unknown bpf loader error %d", err);
+ else
+ snprintf(buf, size, "%s",
+ str_error_r(err, sbuf, sizeof(sbuf)));
+
+ buf[size - 1] = '\0';
+ return -1;
+}
+
+#define bpf__strerror_head(err, buf, size) \
+ char sbuf[STRERR_BUFSIZE], *emsg;\
+ if (!size)\
+ return 0;\
+ if (err < 0)\
+ err = -err;\
+ bpf_loader_strerror(err, sbuf, sizeof(sbuf));\
+ emsg = sbuf;\
+ switch (err) {\
+ default:\
+ scnprintf(buf, size, "%s", emsg);\
+ break;
+
+#define bpf__strerror_entry(val, fmt...)\
+ case val: {\
+ scnprintf(buf, size, fmt);\
+ break;\
+ }
+
+#define bpf__strerror_end(buf, size)\
+ }\
+ buf[size - 1] = '\0';
+
+int bpf__strerror_prepare_load(const char *filename, bool source,
+ int err, char *buf, size_t size)
+{
+ size_t n;
+ int ret;
+
+ n = snprintf(buf, size, "Failed to load %s%s: ",
+ filename, source ? " from source" : "");
+ if (n >= size) {
+ buf[size - 1] = '\0';
+ return 0;
+ }
+ buf += n;
+ size -= n;
+
+ ret = bpf_loader_strerror(err, buf, size);
+ buf[size - 1] = '\0';
+ return ret;
+}
+
+int bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
+ int err, char *buf, size_t size)
+{
+ bpf__strerror_head(err, buf, size);
+ case BPF_LOADER_ERRNO__PROGCONF_TERM: {
+ scnprintf(buf, size, "%s (add -v to see detail)", emsg);
+ break;
+ }
+ bpf__strerror_entry(EEXIST, "Probe point exist. Try 'perf probe -d \"*\"' and set 'force=yes'");
+ bpf__strerror_entry(EACCES, "You need to be root");
+ bpf__strerror_entry(EPERM, "You need to be root, and /proc/sys/kernel/kptr_restrict should be 0");
+ bpf__strerror_entry(ENOENT, "You need to check probing points in BPF file");
+ bpf__strerror_end(buf, size);
+ return 0;
+}
+
+int bpf__strerror_load(struct bpf_object *obj,
+ int err, char *buf, size_t size)
+{
+ bpf__strerror_head(err, buf, size);
+ case LIBBPF_ERRNO__KVER: {
+ unsigned int obj_kver = bpf_object__kversion(obj);
+ unsigned int real_kver;
+
+ if (fetch_kernel_version(&real_kver, NULL, 0)) {
+ scnprintf(buf, size, "Unable to fetch kernel version");
+ break;
+ }
+
+ if (obj_kver != real_kver) {
+ scnprintf(buf, size,
+ "'version' ("KVER_FMT") doesn't match running kernel ("KVER_FMT")",
+ KVER_PARAM(obj_kver),
+ KVER_PARAM(real_kver));
+ break;
+ }
+
+ scnprintf(buf, size, "Failed to load program for unknown reason");
+ break;
+ }
+ bpf__strerror_end(buf, size);
+ return 0;
+}
+
+int bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+ struct parse_events_term *term __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused,
+ int *error_pos __maybe_unused, int err,
+ char *buf, size_t size)
+{
+ bpf__strerror_head(err, buf, size);
+ bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE,
+ "Can't use this config term with this map type");
+ bpf__strerror_end(buf, size);
+ return 0;
+}
+
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size)
+{
+ bpf__strerror_head(err, buf, size);
+ bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM,
+ "Cannot set event to BPF map in multi-thread tracing");
+ bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH,
+ "%s (Hint: use -i to turn off inherit)", emsg);
+ bpf__strerror_entry(BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE,
+ "Can only put raw, hardware and BPF output event into a BPF map");
+ bpf__strerror_end(buf, size);
+ return 0;
+}
+
+int bpf__strerror_setup_output_event(struct perf_evlist *evlist __maybe_unused,
+ int err, char *buf, size_t size)
+{
+ bpf__strerror_head(err, buf, size);
+ bpf__strerror_end(buf, size);
+ return 0;
+}
diff --git a/tools/perf/util/bpf-loader.h b/tools/perf/util/bpf-loader.h
new file mode 100644
index 000000000..62d245a90
--- /dev/null
+++ b/tools/perf/util/bpf-loader.h
@@ -0,0 +1,214 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __BPF_LOADER_H
+#define __BPF_LOADER_H
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <string.h>
+#include <bpf/libbpf.h>
+#include "probe-event.h"
+#include "evlist.h"
+#include "debug.h"
+
+enum bpf_loader_errno {
+ __BPF_LOADER_ERRNO__START = __LIBBPF_ERRNO__START - 100,
+ /* Invalid config string */
+ BPF_LOADER_ERRNO__CONFIG = __BPF_LOADER_ERRNO__START,
+ BPF_LOADER_ERRNO__GROUP, /* Invalid group name */
+ BPF_LOADER_ERRNO__EVENTNAME, /* Event name is missing */
+ BPF_LOADER_ERRNO__INTERNAL, /* BPF loader internal error */
+ BPF_LOADER_ERRNO__COMPILE, /* Error when compiling BPF scriptlet */
+ BPF_LOADER_ERRNO__PROGCONF_TERM,/* Invalid program config term in config string */
+ BPF_LOADER_ERRNO__PROLOGUE, /* Failed to generate prologue */
+ BPF_LOADER_ERRNO__PROLOGUE2BIG, /* Prologue too big for program */
+ BPF_LOADER_ERRNO__PROLOGUEOOB, /* Offset out of bound for prologue */
+ BPF_LOADER_ERRNO__OBJCONF_OPT, /* Invalid object config option */
+ BPF_LOADER_ERRNO__OBJCONF_CONF, /* Config value not set (lost '=')) */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_OPT, /* Invalid object map config option */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_NOTEXIST, /* Target map not exist */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE, /* Incorrect value type for map */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_TYPE, /* Incorrect map type */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_KEYSIZE, /* Incorrect map key size */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_VALUESIZE,/* Incorrect map value size */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_NOEVT, /* Event not found for map setting */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_MAPSIZE, /* Invalid map size for event setting */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_EVTDIM, /* Event dimension too large */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH, /* Doesn't support inherit event */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_EVTTYPE, /* Wrong event type for map */
+ BPF_LOADER_ERRNO__OBJCONF_MAP_IDX2BIG, /* Index too large */
+ __BPF_LOADER_ERRNO__END,
+};
+
+struct perf_evsel;
+struct bpf_object;
+struct parse_events_term;
+#define PERF_BPF_PROBE_GROUP "perf_bpf_probe"
+
+typedef int (*bpf_prog_iter_callback_t)(const char *group, const char *event,
+ int fd, void *arg);
+
+#ifdef HAVE_LIBBPF_SUPPORT
+struct bpf_object *bpf__prepare_load(const char *filename, bool source);
+int bpf__strerror_prepare_load(const char *filename, bool source,
+ int err, char *buf, size_t size);
+
+struct bpf_object *bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz,
+ const char *name);
+
+void bpf__clear(void);
+
+int bpf__probe(struct bpf_object *obj);
+int bpf__unprobe(struct bpf_object *obj);
+int bpf__strerror_probe(struct bpf_object *obj, int err,
+ char *buf, size_t size);
+
+int bpf__load(struct bpf_object *obj);
+int bpf__strerror_load(struct bpf_object *obj, int err,
+ char *buf, size_t size);
+int bpf__foreach_event(struct bpf_object *obj,
+ bpf_prog_iter_callback_t func, void *arg);
+
+int bpf__config_obj(struct bpf_object *obj, struct parse_events_term *term,
+ struct perf_evlist *evlist, int *error_pos);
+int bpf__strerror_config_obj(struct bpf_object *obj,
+ struct parse_events_term *term,
+ struct perf_evlist *evlist,
+ int *error_pos, int err, char *buf,
+ size_t size);
+int bpf__apply_obj_config(void);
+int bpf__strerror_apply_obj_config(int err, char *buf, size_t size);
+
+int bpf__setup_stdout(struct perf_evlist *evlist);
+struct perf_evsel *bpf__setup_output_event(struct perf_evlist *evlist, const char *name);
+int bpf__strerror_setup_output_event(struct perf_evlist *evlist, int err, char *buf, size_t size);
+#else
+#include <errno.h>
+
+static inline struct bpf_object *
+bpf__prepare_load(const char *filename __maybe_unused,
+ bool source __maybe_unused)
+{
+ pr_debug("ERROR: eBPF object loading is disabled during compiling.\n");
+ return ERR_PTR(-ENOTSUP);
+}
+
+static inline struct bpf_object *
+bpf__prepare_load_buffer(void *obj_buf __maybe_unused,
+ size_t obj_buf_sz __maybe_unused)
+{
+ return ERR_PTR(-ENOTSUP);
+}
+
+static inline void bpf__clear(void) { }
+
+static inline int bpf__probe(struct bpf_object *obj __maybe_unused) { return 0;}
+static inline int bpf__unprobe(struct bpf_object *obj __maybe_unused) { return 0;}
+static inline int bpf__load(struct bpf_object *obj __maybe_unused) { return 0; }
+
+static inline int
+bpf__foreach_event(struct bpf_object *obj __maybe_unused,
+ bpf_prog_iter_callback_t func __maybe_unused,
+ void *arg __maybe_unused)
+{
+ return 0;
+}
+
+static inline int
+bpf__config_obj(struct bpf_object *obj __maybe_unused,
+ struct parse_events_term *term __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused,
+ int *error_pos __maybe_unused)
+{
+ return 0;
+}
+
+static inline int
+bpf__apply_obj_config(void)
+{
+ return 0;
+}
+
+static inline int
+bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
+static inline struct perf_evsel *
+bpf__setup_output_event(struct perf_evlist *evlist __maybe_unused, const char *name __maybe_unused)
+{
+ return NULL;
+}
+
+static inline int
+__bpf_strerror(char *buf, size_t size)
+{
+ if (!size)
+ return 0;
+ strncpy(buf,
+ "ERROR: eBPF object loading is disabled during compiling.\n",
+ size);
+ buf[size - 1] = '\0';
+ return 0;
+}
+
+static inline
+int bpf__strerror_prepare_load(const char *filename __maybe_unused,
+ bool source __maybe_unused,
+ int err __maybe_unused,
+ char *buf, size_t size)
+{
+ return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_probe(struct bpf_object *obj __maybe_unused,
+ int err __maybe_unused,
+ char *buf, size_t size)
+{
+ return __bpf_strerror(buf, size);
+}
+
+static inline int bpf__strerror_load(struct bpf_object *obj __maybe_unused,
+ int err __maybe_unused,
+ char *buf, size_t size)
+{
+ return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_config_obj(struct bpf_object *obj __maybe_unused,
+ struct parse_events_term *term __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused,
+ int *error_pos __maybe_unused,
+ int err __maybe_unused,
+ char *buf, size_t size)
+{
+ return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_apply_obj_config(int err __maybe_unused,
+ char *buf, size_t size)
+{
+ return __bpf_strerror(buf, size);
+}
+
+static inline int
+bpf__strerror_setup_output_event(struct perf_evlist *evlist __maybe_unused,
+ int err __maybe_unused, char *buf, size_t size)
+{
+ return __bpf_strerror(buf, size);
+}
+
+#endif
+
+static inline int bpf__strerror_setup_stdout(struct perf_evlist *evlist, int err, char *buf, size_t size)
+{
+ return bpf__strerror_setup_output_event(evlist, err, buf, size);
+}
+#endif
diff --git a/tools/perf/util/bpf-prologue.c b/tools/perf/util/bpf-prologue.c
new file mode 100644
index 000000000..77e4891e1
--- /dev/null
+++ b/tools/perf/util/bpf-prologue.c
@@ -0,0 +1,502 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bpf-prologue.c
+ *
+ * Copyright (C) 2015 He Kuang <hekuang@huawei.com>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ */
+
+#include <bpf/libbpf.h>
+#include "perf.h"
+#include "debug.h"
+#include "bpf-loader.h"
+#include "bpf-prologue.h"
+#include "probe-finder.h"
+#include <errno.h>
+#include <dwarf-regs.h>
+#include <linux/filter.h>
+
+#define BPF_REG_SIZE 8
+
+#define JMP_TO_ERROR_CODE -1
+#define JMP_TO_SUCCESS_CODE -2
+#define JMP_TO_USER_CODE -3
+
+struct bpf_insn_pos {
+ struct bpf_insn *begin;
+ struct bpf_insn *end;
+ struct bpf_insn *pos;
+};
+
+static inline int
+pos_get_cnt(struct bpf_insn_pos *pos)
+{
+ return pos->pos - pos->begin;
+}
+
+static int
+append_insn(struct bpf_insn new_insn, struct bpf_insn_pos *pos)
+{
+ if (!pos->pos)
+ return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+
+ if (pos->pos + 1 >= pos->end) {
+ pr_err("bpf prologue: prologue too long\n");
+ pos->pos = NULL;
+ return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+ }
+
+ *(pos->pos)++ = new_insn;
+ return 0;
+}
+
+static int
+check_pos(struct bpf_insn_pos *pos)
+{
+ if (!pos->pos || pos->pos >= pos->end)
+ return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+ return 0;
+}
+
+/*
+ * Convert type string (u8/u16/u32/u64/s8/s16/s32/s64 ..., see
+ * Documentation/trace/kprobetrace.rst) to size field of BPF_LDX_MEM
+ * instruction (BPF_{B,H,W,DW}).
+ */
+static int
+argtype_to_ldx_size(const char *type)
+{
+ int arg_size = type ? atoi(&type[1]) : 64;
+
+ switch (arg_size) {
+ case 8:
+ return BPF_B;
+ case 16:
+ return BPF_H;
+ case 32:
+ return BPF_W;
+ case 64:
+ default:
+ return BPF_DW;
+ }
+}
+
+static const char *
+insn_sz_to_str(int insn_sz)
+{
+ switch (insn_sz) {
+ case BPF_B:
+ return "BPF_B";
+ case BPF_H:
+ return "BPF_H";
+ case BPF_W:
+ return "BPF_W";
+ case BPF_DW:
+ return "BPF_DW";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+/* Give it a shorter name */
+#define ins(i, p) append_insn((i), (p))
+
+/*
+ * Give a register name (in 'reg'), generate instruction to
+ * load register into an eBPF register rd:
+ * 'ldd target_reg, offset(ctx_reg)', where:
+ * ctx_reg is pre initialized to pointer of 'struct pt_regs'.
+ */
+static int
+gen_ldx_reg_from_ctx(struct bpf_insn_pos *pos, int ctx_reg,
+ const char *reg, int target_reg)
+{
+ int offset = regs_query_register_offset(reg);
+
+ if (offset < 0) {
+ pr_err("bpf: prologue: failed to get register %s\n",
+ reg);
+ return offset;
+ }
+ ins(BPF_LDX_MEM(BPF_DW, target_reg, ctx_reg, offset), pos);
+
+ return check_pos(pos);
+}
+
+/*
+ * Generate a BPF_FUNC_probe_read function call.
+ *
+ * src_base_addr_reg is a register holding base address,
+ * dst_addr_reg is a register holding dest address (on stack),
+ * result is:
+ *
+ * *[dst_addr_reg] = *([src_base_addr_reg] + offset)
+ *
+ * Arguments of BPF_FUNC_probe_read:
+ * ARG1: ptr to stack (dest)
+ * ARG2: size (8)
+ * ARG3: unsafe ptr (src)
+ */
+static int
+gen_read_mem(struct bpf_insn_pos *pos,
+ int src_base_addr_reg,
+ int dst_addr_reg,
+ long offset)
+{
+ /* mov arg3, src_base_addr_reg */
+ if (src_base_addr_reg != BPF_REG_ARG3)
+ ins(BPF_MOV64_REG(BPF_REG_ARG3, src_base_addr_reg), pos);
+ /* add arg3, #offset */
+ if (offset)
+ ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG3, offset), pos);
+
+ /* mov arg2, #reg_size */
+ ins(BPF_ALU64_IMM(BPF_MOV, BPF_REG_ARG2, BPF_REG_SIZE), pos);
+
+ /* mov arg1, dst_addr_reg */
+ if (dst_addr_reg != BPF_REG_ARG1)
+ ins(BPF_MOV64_REG(BPF_REG_ARG1, dst_addr_reg), pos);
+
+ /* Call probe_read */
+ ins(BPF_EMIT_CALL(BPF_FUNC_probe_read), pos);
+ /*
+ * Error processing: if read fail, goto error code,
+ * will be relocated. Target should be the start of
+ * error processing code.
+ */
+ ins(BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, JMP_TO_ERROR_CODE),
+ pos);
+
+ return check_pos(pos);
+}
+
+/*
+ * Each arg should be bare register. Fetch and save them into argument
+ * registers (r3 - r5).
+ *
+ * BPF_REG_1 should have been initialized with pointer to
+ * 'struct pt_regs'.
+ */
+static int
+gen_prologue_fastpath(struct bpf_insn_pos *pos,
+ struct probe_trace_arg *args, int nargs)
+{
+ int i, err = 0;
+
+ for (i = 0; i < nargs; i++) {
+ err = gen_ldx_reg_from_ctx(pos, BPF_REG_1, args[i].value,
+ BPF_PROLOGUE_START_ARG_REG + i);
+ if (err)
+ goto errout;
+ }
+
+ return check_pos(pos);
+errout:
+ return err;
+}
+
+/*
+ * Slow path:
+ * At least one argument has the form of 'offset($rx)'.
+ *
+ * Following code first stores them into stack, then loads all of then
+ * to r2 - r5.
+ * Before final loading, the final result should be:
+ *
+ * low address
+ * BPF_REG_FP - 24 ARG3
+ * BPF_REG_FP - 16 ARG2
+ * BPF_REG_FP - 8 ARG1
+ * BPF_REG_FP
+ * high address
+ *
+ * For each argument (described as: offn(...off2(off1(reg)))),
+ * generates following code:
+ *
+ * r7 <- fp
+ * r7 <- r7 - stack_offset // Ideal code should initialize r7 using
+ * // fp before generating args. However,
+ * // eBPF won't regard r7 as stack pointer
+ * // if it is generated by minus 8 from
+ * // another stack pointer except fp.
+ * // This is why we have to set r7
+ * // to fp for each variable.
+ * r3 <- value of 'reg'-> generated using gen_ldx_reg_from_ctx()
+ * (r7) <- r3 // skip following instructions for bare reg
+ * r3 <- r3 + off1 . // skip if off1 == 0
+ * r2 <- 8 \
+ * r1 <- r7 |-> generated by gen_read_mem()
+ * call probe_read /
+ * jnei r0, 0, err ./
+ * r3 <- (r7)
+ * r3 <- r3 + off2 . // skip if off2 == 0
+ * r2 <- 8 \ // r2 may be broken by probe_read, so set again
+ * r1 <- r7 |-> generated by gen_read_mem()
+ * call probe_read /
+ * jnei r0, 0, err ./
+ * ...
+ */
+static int
+gen_prologue_slowpath(struct bpf_insn_pos *pos,
+ struct probe_trace_arg *args, int nargs)
+{
+ int err, i;
+
+ for (i = 0; i < nargs; i++) {
+ struct probe_trace_arg *arg = &args[i];
+ const char *reg = arg->value;
+ struct probe_trace_arg_ref *ref = NULL;
+ int stack_offset = (i + 1) * -8;
+
+ pr_debug("prologue: fetch arg %d, base reg is %s\n",
+ i, reg);
+
+ /* value of base register is stored into ARG3 */
+ err = gen_ldx_reg_from_ctx(pos, BPF_REG_CTX, reg,
+ BPF_REG_ARG3);
+ if (err) {
+ pr_err("prologue: failed to get offset of register %s\n",
+ reg);
+ goto errout;
+ }
+
+ /* Make r7 the stack pointer. */
+ ins(BPF_MOV64_REG(BPF_REG_7, BPF_REG_FP), pos);
+ /* r7 += -8 */
+ ins(BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, stack_offset), pos);
+ /*
+ * Store r3 (base register) onto stack
+ * Ensure fp[offset] is set.
+ * fp is the only valid base register when storing
+ * into stack. We are not allowed to use r7 as base
+ * register here.
+ */
+ ins(BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_ARG3,
+ stack_offset), pos);
+
+ ref = arg->ref;
+ while (ref) {
+ pr_debug("prologue: arg %d: offset %ld\n",
+ i, ref->offset);
+ err = gen_read_mem(pos, BPF_REG_3, BPF_REG_7,
+ ref->offset);
+ if (err) {
+ pr_err("prologue: failed to generate probe_read function call\n");
+ goto errout;
+ }
+
+ ref = ref->next;
+ /*
+ * Load previous result into ARG3. Use
+ * BPF_REG_FP instead of r7 because verifier
+ * allows FP based addressing only.
+ */
+ if (ref)
+ ins(BPF_LDX_MEM(BPF_DW, BPF_REG_ARG3,
+ BPF_REG_FP, stack_offset), pos);
+ }
+ }
+
+ /* Final pass: read to registers */
+ for (i = 0; i < nargs; i++) {
+ int insn_sz = (args[i].ref) ? argtype_to_ldx_size(args[i].type) : BPF_DW;
+
+ pr_debug("prologue: load arg %d, insn_sz is %s\n",
+ i, insn_sz_to_str(insn_sz));
+ ins(BPF_LDX_MEM(insn_sz, BPF_PROLOGUE_START_ARG_REG + i,
+ BPF_REG_FP, -BPF_REG_SIZE * (i + 1)), pos);
+ }
+
+ ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_SUCCESS_CODE), pos);
+
+ return check_pos(pos);
+errout:
+ return err;
+}
+
+static int
+prologue_relocate(struct bpf_insn_pos *pos, struct bpf_insn *error_code,
+ struct bpf_insn *success_code, struct bpf_insn *user_code)
+{
+ struct bpf_insn *insn;
+
+ if (check_pos(pos))
+ return -BPF_LOADER_ERRNO__PROLOGUE2BIG;
+
+ for (insn = pos->begin; insn < pos->pos; insn++) {
+ struct bpf_insn *target;
+ u8 class = BPF_CLASS(insn->code);
+ u8 opcode;
+
+ if (class != BPF_JMP)
+ continue;
+ opcode = BPF_OP(insn->code);
+ if (opcode == BPF_CALL)
+ continue;
+
+ switch (insn->off) {
+ case JMP_TO_ERROR_CODE:
+ target = error_code;
+ break;
+ case JMP_TO_SUCCESS_CODE:
+ target = success_code;
+ break;
+ case JMP_TO_USER_CODE:
+ target = user_code;
+ break;
+ default:
+ pr_err("bpf prologue: internal error: relocation failed\n");
+ return -BPF_LOADER_ERRNO__PROLOGUE;
+ }
+
+ insn->off = target - (insn + 1);
+ }
+ return 0;
+}
+
+int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
+ struct bpf_insn *new_prog, size_t *new_cnt,
+ size_t cnt_space)
+{
+ struct bpf_insn *success_code = NULL;
+ struct bpf_insn *error_code = NULL;
+ struct bpf_insn *user_code = NULL;
+ struct bpf_insn_pos pos;
+ bool fastpath = true;
+ int err = 0, i;
+
+ if (!new_prog || !new_cnt)
+ return -EINVAL;
+
+ if (cnt_space > BPF_MAXINSNS)
+ cnt_space = BPF_MAXINSNS;
+
+ pos.begin = new_prog;
+ pos.end = new_prog + cnt_space;
+ pos.pos = new_prog;
+
+ if (!nargs) {
+ ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0),
+ &pos);
+
+ if (check_pos(&pos))
+ goto errout;
+
+ *new_cnt = pos_get_cnt(&pos);
+ return 0;
+ }
+
+ if (nargs > BPF_PROLOGUE_MAX_ARGS) {
+ pr_warning("bpf: prologue: %d arguments are dropped\n",
+ nargs - BPF_PROLOGUE_MAX_ARGS);
+ nargs = BPF_PROLOGUE_MAX_ARGS;
+ }
+
+ /* First pass: validation */
+ for (i = 0; i < nargs; i++) {
+ struct probe_trace_arg_ref *ref = args[i].ref;
+
+ if (args[i].value[0] == '@') {
+ /* TODO: fetch global variable */
+ pr_err("bpf: prologue: global %s%+ld not support\n",
+ args[i].value, ref ? ref->offset : 0);
+ return -ENOTSUP;
+ }
+
+ while (ref) {
+ /* fastpath is true if all args has ref == NULL */
+ fastpath = false;
+
+ /*
+ * Instruction encodes immediate value using
+ * s32, ref->offset is long. On systems which
+ * can't fill long in s32, refuse to process if
+ * ref->offset too large (or small).
+ */
+#ifdef __LP64__
+#define OFFSET_MAX ((1LL << 31) - 1)
+#define OFFSET_MIN ((1LL << 31) * -1)
+ if (ref->offset > OFFSET_MAX ||
+ ref->offset < OFFSET_MIN) {
+ pr_err("bpf: prologue: offset out of bound: %ld\n",
+ ref->offset);
+ return -BPF_LOADER_ERRNO__PROLOGUEOOB;
+ }
+#endif
+ ref = ref->next;
+ }
+ }
+ pr_debug("prologue: pass validation\n");
+
+ if (fastpath) {
+ /* If all variables are registers... */
+ pr_debug("prologue: fast path\n");
+ err = gen_prologue_fastpath(&pos, args, nargs);
+ if (err)
+ goto errout;
+ } else {
+ pr_debug("prologue: slow path\n");
+
+ /* Initialization: move ctx to a callee saved register. */
+ ins(BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1), &pos);
+
+ err = gen_prologue_slowpath(&pos, args, nargs);
+ if (err)
+ goto errout;
+ /*
+ * start of ERROR_CODE (only slow pass needs error code)
+ * mov r2 <- 1 // r2 is error number
+ * mov r3 <- 0 // r3, r4... should be touched or
+ * // verifier would complain
+ * mov r4 <- 0
+ * ...
+ * goto usercode
+ */
+ error_code = pos.pos;
+ ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 1),
+ &pos);
+
+ for (i = 0; i < nargs; i++)
+ ins(BPF_ALU64_IMM(BPF_MOV,
+ BPF_PROLOGUE_START_ARG_REG + i,
+ 0),
+ &pos);
+ ins(BPF_JMP_IMM(BPF_JA, BPF_REG_0, 0, JMP_TO_USER_CODE),
+ &pos);
+ }
+
+ /*
+ * start of SUCCESS_CODE:
+ * mov r2 <- 0
+ * goto usercode // skip
+ */
+ success_code = pos.pos;
+ ins(BPF_ALU64_IMM(BPF_MOV, BPF_PROLOGUE_FETCH_RESULT_REG, 0), &pos);
+
+ /*
+ * start of USER_CODE:
+ * Restore ctx to r1
+ */
+ user_code = pos.pos;
+ if (!fastpath) {
+ /*
+ * Only slow path needs restoring of ctx. In fast path,
+ * register are loaded directly from r1.
+ */
+ ins(BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX), &pos);
+ err = prologue_relocate(&pos, error_code, success_code,
+ user_code);
+ if (err)
+ goto errout;
+ }
+
+ err = check_pos(&pos);
+ if (err)
+ goto errout;
+
+ *new_cnt = pos_get_cnt(&pos);
+ return 0;
+errout:
+ return err;
+}
diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h
new file mode 100644
index 000000000..c50c73580
--- /dev/null
+++ b/tools/perf/util/bpf-prologue.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015, He Kuang <hekuang@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __BPF_PROLOGUE_H
+#define __BPF_PROLOGUE_H
+
+#include <linux/compiler.h>
+#include <linux/filter.h>
+#include "probe-event.h"
+
+#define BPF_PROLOGUE_MAX_ARGS 3
+#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3
+#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2
+
+#ifdef HAVE_BPF_PROLOGUE
+int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
+ struct bpf_insn *new_prog, size_t *new_cnt,
+ size_t cnt_space);
+#else
+#include <errno.h>
+
+static inline int
+bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused,
+ int nargs __maybe_unused,
+ struct bpf_insn *new_prog __maybe_unused,
+ size_t *new_cnt,
+ size_t cnt_space __maybe_unused)
+{
+ if (!new_cnt)
+ return -EINVAL;
+ *new_cnt = 0;
+ return -ENOTSUP;
+}
+#endif
+#endif /* __BPF_PROLOGUE_H */
diff --git a/tools/perf/util/branch.c b/tools/perf/util/branch.c
new file mode 100644
index 000000000..a4fce2729
--- /dev/null
+++ b/tools/perf/util/branch.c
@@ -0,0 +1,147 @@
+#include "perf.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/branch.h"
+
+static bool cross_area(u64 addr1, u64 addr2, int size)
+{
+ u64 align1, align2;
+
+ align1 = addr1 & ~(size - 1);
+ align2 = addr2 & ~(size - 1);
+
+ return (align1 != align2) ? true : false;
+}
+
+#define AREA_4K 4096
+#define AREA_2M (2 * 1024 * 1024)
+
+void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
+ u64 from, u64 to)
+{
+ if (flags->type == PERF_BR_UNKNOWN || from == 0)
+ return;
+
+ st->counts[flags->type]++;
+
+ if (flags->type == PERF_BR_COND) {
+ if (to > from)
+ st->cond_fwd++;
+ else
+ st->cond_bwd++;
+ }
+
+ if (cross_area(from, to, AREA_2M))
+ st->cross_2m++;
+ else if (cross_area(from, to, AREA_4K))
+ st->cross_4k++;
+}
+
+const char *branch_type_name(int type)
+{
+ const char *branch_names[PERF_BR_MAX] = {
+ "N/A",
+ "COND",
+ "UNCOND",
+ "IND",
+ "CALL",
+ "IND_CALL",
+ "RET",
+ "SYSCALL",
+ "SYSRET",
+ "COND_CALL",
+ "COND_RET"
+ };
+
+ if (type >= 0 && type < PERF_BR_MAX)
+ return branch_names[type];
+
+ return NULL;
+}
+
+void branch_type_stat_display(FILE *fp, struct branch_type_stat *st)
+{
+ u64 total = 0;
+ int i;
+
+ for (i = 0; i < PERF_BR_MAX; i++)
+ total += st->counts[i];
+
+ if (total == 0)
+ return;
+
+ fprintf(fp, "\n#");
+ fprintf(fp, "\n# Branch Statistics:");
+ fprintf(fp, "\n#");
+
+ if (st->cond_fwd > 0) {
+ fprintf(fp, "\n%8s: %5.1f%%",
+ "COND_FWD",
+ 100.0 * (double)st->cond_fwd / (double)total);
+ }
+
+ if (st->cond_bwd > 0) {
+ fprintf(fp, "\n%8s: %5.1f%%",
+ "COND_BWD",
+ 100.0 * (double)st->cond_bwd / (double)total);
+ }
+
+ if (st->cross_4k > 0) {
+ fprintf(fp, "\n%8s: %5.1f%%",
+ "CROSS_4K",
+ 100.0 * (double)st->cross_4k / (double)total);
+ }
+
+ if (st->cross_2m > 0) {
+ fprintf(fp, "\n%8s: %5.1f%%",
+ "CROSS_2M",
+ 100.0 * (double)st->cross_2m / (double)total);
+ }
+
+ for (i = 0; i < PERF_BR_MAX; i++) {
+ if (st->counts[i] > 0)
+ fprintf(fp, "\n%8s: %5.1f%%",
+ branch_type_name(i),
+ 100.0 *
+ (double)st->counts[i] / (double)total);
+ }
+}
+
+static int count_str_scnprintf(int idx, const char *str, char *bf, int size)
+{
+ return scnprintf(bf, size, "%s%s", (idx) ? " " : " (", str);
+}
+
+int branch_type_str(struct branch_type_stat *st, char *bf, int size)
+{
+ int i, j = 0, printed = 0;
+ u64 total = 0;
+
+ for (i = 0; i < PERF_BR_MAX; i++)
+ total += st->counts[i];
+
+ if (total == 0)
+ return 0;
+
+ if (st->cond_fwd > 0)
+ printed += count_str_scnprintf(j++, "COND_FWD", bf + printed, size - printed);
+
+ if (st->cond_bwd > 0)
+ printed += count_str_scnprintf(j++, "COND_BWD", bf + printed, size - printed);
+
+ for (i = 0; i < PERF_BR_MAX; i++) {
+ if (i == PERF_BR_COND)
+ continue;
+
+ if (st->counts[i] > 0)
+ printed += count_str_scnprintf(j++, branch_type_name(i), bf + printed, size - printed);
+ }
+
+ if (st->cross_4k > 0)
+ printed += count_str_scnprintf(j++, "CROSS_4K", bf + printed, size - printed);
+
+ if (st->cross_2m > 0)
+ printed += count_str_scnprintf(j++, "CROSS_2M", bf + printed, size - printed);
+
+ return printed;
+}
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
new file mode 100644
index 000000000..1e3c7c5cd
--- /dev/null
+++ b/tools/perf/util/branch.h
@@ -0,0 +1,25 @@
+#ifndef _PERF_BRANCH_H
+#define _PERF_BRANCH_H 1
+
+#include <stdint.h>
+#include "../perf.h"
+
+struct branch_type_stat {
+ bool branch_to;
+ u64 counts[PERF_BR_MAX];
+ u64 cond_fwd;
+ u64 cond_bwd;
+ u64 cross_4k;
+ u64 cross_2m;
+};
+
+struct branch_flags;
+
+void branch_type_count(struct branch_type_stat *st, struct branch_flags *flags,
+ u64 from, u64 to);
+
+const char *branch_type_name(int type);
+void branch_type_stat_display(FILE *fp, struct branch_type_stat *st);
+int branch_type_str(struct branch_type_stat *st, char *bf, int bfsize);
+
+#endif /* _PERF_BRANCH_H */
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
new file mode 100644
index 000000000..1d352621b
--- /dev/null
+++ b/tools/perf/util/build-id.c
@@ -0,0 +1,876 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * build-id.c
+ *
+ * build-id support
+ *
+ * Copyright (C) 2009, 2010 Red Hat Inc.
+ * Copyright (C) 2009, 2010 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+#include "util.h"
+#include <dirent.h>
+#include <errno.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include "build-id.h"
+#include "event.h"
+#include "symbol.h"
+#include "thread.h"
+#include <linux/kernel.h>
+#include "debug.h"
+#include "session.h"
+#include "tool.h"
+#include "header.h"
+#include "vdso.h"
+#include "path.h"
+#include "probe-file.h"
+#include "strlist.h"
+
+#include "sane_ctype.h"
+
+static bool no_buildid_cache;
+
+int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine)
+{
+ struct addr_location al;
+ struct thread *thread = machine__findnew_thread(machine, sample->pid,
+ sample->tid);
+
+ if (thread == NULL) {
+ pr_err("problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ if (thread__find_map(thread, sample->cpumode, sample->ip, &al))
+ al.map->dso->hit = 1;
+
+ thread__put(thread);
+ return 0;
+}
+
+static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample
+ __maybe_unused,
+ struct machine *machine)
+{
+ struct thread *thread = machine__findnew_thread(machine,
+ event->fork.pid,
+ event->fork.tid);
+
+ dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
+ event->fork.ppid, event->fork.ptid);
+
+ if (thread) {
+ machine__remove_thread(machine, thread);
+ thread__put(thread);
+ }
+
+ return 0;
+}
+
+struct perf_tool build_id__mark_dso_hit_ops = {
+ .sample = build_id__mark_dso_hit,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .fork = perf_event__process_fork,
+ .exit = perf_event__exit_del_thread,
+ .attr = perf_event__process_attr,
+ .build_id = perf_event__process_build_id,
+ .ordered_events = true,
+};
+
+int build_id__sprintf(const u8 *build_id, int len, char *bf)
+{
+ char *bid = bf;
+ const u8 *raw = build_id;
+ int i;
+
+ for (i = 0; i < len; ++i) {
+ sprintf(bid, "%02x", *raw);
+ ++raw;
+ bid += 2;
+ }
+
+ return (bid - bf) + 1;
+}
+
+int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id)
+{
+ char notes[PATH_MAX];
+ u8 build_id[BUILD_ID_SIZE];
+ int ret;
+
+ if (!root_dir)
+ root_dir = "";
+
+ scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
+
+ ret = sysfs__read_build_id(notes, build_id, sizeof(build_id));
+ if (ret < 0)
+ return ret;
+
+ return build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+}
+
+int filename__sprintf_build_id(const char *pathname, char *sbuild_id)
+{
+ u8 build_id[BUILD_ID_SIZE];
+ int ret;
+
+ ret = filename__read_build_id(pathname, build_id, sizeof(build_id));
+ if (ret < 0)
+ return ret;
+ else if (ret != sizeof(build_id))
+ return -EINVAL;
+
+ return build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+}
+
+/* asnprintf consolidates asprintf and snprintf */
+static int asnprintf(char **strp, size_t size, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ if (!strp)
+ return -EINVAL;
+
+ va_start(ap, fmt);
+ if (*strp)
+ ret = vsnprintf(*strp, size, fmt, ap);
+ else
+ ret = vasprintf(strp, fmt, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+ size_t size)
+{
+ bool retry_old = true;
+
+ snprintf(bf, size, "%s/%s/%s/kallsyms",
+ buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+retry:
+ if (!access(bf, F_OK))
+ return bf;
+ if (retry_old) {
+ /* Try old style kallsyms cache */
+ snprintf(bf, size, "%s/%s/%s",
+ buildid_dir, DSO__NAME_KALLSYMS, sbuild_id);
+ retry_old = false;
+ goto retry;
+ }
+
+ return NULL;
+}
+
+char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size)
+{
+ char *tmp = bf;
+ int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir,
+ sbuild_id, sbuild_id + 2);
+ if (ret < 0 || (tmp && size < (unsigned int)ret))
+ return NULL;
+ return bf;
+}
+
+/* The caller is responsible to free the returned buffer. */
+char *build_id_cache__origname(const char *sbuild_id)
+{
+ char *linkname;
+ char buf[PATH_MAX];
+ char *ret = NULL, *p;
+ size_t offs = 5; /* == strlen("../..") */
+ ssize_t len;
+
+ linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
+ if (!linkname)
+ return NULL;
+
+ len = readlink(linkname, buf, sizeof(buf) - 1);
+ if (len <= 0)
+ goto out;
+ buf[len] = '\0';
+
+ /* The link should be "../..<origpath>/<sbuild_id>" */
+ p = strrchr(buf, '/'); /* Cut off the "/<sbuild_id>" */
+ if (p && (p > buf + offs)) {
+ *p = '\0';
+ if (buf[offs + 1] == '[')
+ offs++; /*
+ * This is a DSO name, like [kernel.kallsyms].
+ * Skip the first '/', since this is not the
+ * cache of a regular file.
+ */
+ ret = strdup(buf + offs); /* Skip "../..[/]" */
+ }
+out:
+ free(linkname);
+ return ret;
+}
+
+/* Check if the given build_id cache is valid on current running system */
+static bool build_id_cache__valid_id(char *sbuild_id)
+{
+ char real_sbuild_id[SBUILD_ID_SIZE] = "";
+ char *pathname;
+ int ret = 0;
+ bool result = false;
+
+ pathname = build_id_cache__origname(sbuild_id);
+ if (!pathname)
+ return false;
+
+ if (!strcmp(pathname, DSO__NAME_KALLSYMS))
+ ret = sysfs__sprintf_build_id("/", real_sbuild_id);
+ else if (pathname[0] == '/')
+ ret = filename__sprintf_build_id(pathname, real_sbuild_id);
+ else
+ ret = -EINVAL; /* Should we support other special DSO cache? */
+ if (ret >= 0)
+ result = (strcmp(sbuild_id, real_sbuild_id) == 0);
+ free(pathname);
+
+ return result;
+}
+
+static const char *build_id_cache__basename(bool is_kallsyms, bool is_vdso,
+ bool is_debug)
+{
+ return is_kallsyms ? "kallsyms" : (is_vdso ? "vdso" : (is_debug ?
+ "debug" : "elf"));
+}
+
+char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+ bool is_debug)
+{
+ bool is_kallsyms = dso__is_kallsyms((struct dso *)dso);
+ bool is_vdso = dso__is_vdso((struct dso *)dso);
+ char sbuild_id[SBUILD_ID_SIZE];
+ char *linkname;
+ bool alloc = (bf == NULL);
+ int ret;
+
+ if (!dso->has_build_id)
+ return NULL;
+
+ build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ linkname = build_id_cache__linkname(sbuild_id, NULL, 0);
+ if (!linkname)
+ return NULL;
+
+ /* Check if old style build_id cache */
+ if (is_regular_file(linkname))
+ ret = asnprintf(&bf, size, "%s", linkname);
+ else
+ ret = asnprintf(&bf, size, "%s/%s", linkname,
+ build_id_cache__basename(is_kallsyms, is_vdso,
+ is_debug));
+ if (ret < 0 || (!alloc && size < (unsigned int)ret))
+ bf = NULL;
+ free(linkname);
+
+ return bf;
+}
+
+#define dsos__for_each_with_build_id(pos, head) \
+ list_for_each_entry(pos, head, node) \
+ if (!pos->has_build_id) \
+ continue; \
+ else
+
+static int write_buildid(const char *name, size_t name_len, u8 *build_id,
+ pid_t pid, u16 misc, struct feat_fd *fd)
+{
+ int err;
+ struct build_id_event b;
+ size_t len;
+
+ len = name_len + 1;
+ len = PERF_ALIGN(len, NAME_ALIGN);
+
+ memset(&b, 0, sizeof(b));
+ memcpy(&b.build_id, build_id, BUILD_ID_SIZE);
+ b.pid = pid;
+ b.header.misc = misc;
+ b.header.size = sizeof(b) + len;
+
+ err = do_write(fd, &b, sizeof(b));
+ if (err < 0)
+ return err;
+
+ return write_padded(fd, name, name_len + 1, len);
+}
+
+static int machine__write_buildid_table(struct machine *machine,
+ struct feat_fd *fd)
+{
+ int err = 0;
+ struct dso *pos;
+ u16 kmisc = PERF_RECORD_MISC_KERNEL,
+ umisc = PERF_RECORD_MISC_USER;
+
+ if (!machine__is_host(machine)) {
+ kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
+ umisc = PERF_RECORD_MISC_GUEST_USER;
+ }
+
+ dsos__for_each_with_build_id(pos, &machine->dsos.head) {
+ const char *name;
+ size_t name_len;
+ bool in_kernel = false;
+
+ if (!pos->hit && !dso__is_vdso(pos))
+ continue;
+
+ if (dso__is_vdso(pos)) {
+ name = pos->short_name;
+ name_len = pos->short_name_len;
+ } else if (dso__is_kcore(pos)) {
+ name = machine->mmap_name;
+ name_len = strlen(name);
+ } else {
+ name = pos->long_name;
+ name_len = pos->long_name_len;
+ }
+
+ in_kernel = pos->kernel ||
+ is_kernel_module(name,
+ PERF_RECORD_MISC_CPUMODE_UNKNOWN);
+ err = write_buildid(name, name_len, pos->build_id, machine->pid,
+ in_kernel ? kmisc : umisc, fd);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+int perf_session__write_buildid_table(struct perf_session *session,
+ struct feat_fd *fd)
+{
+ struct rb_node *nd;
+ int err = machine__write_buildid_table(&session->machines.host, fd);
+
+ if (err)
+ return err;
+
+ for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ err = machine__write_buildid_table(pos, fd);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+static int __dsos__hit_all(struct list_head *head)
+{
+ struct dso *pos;
+
+ list_for_each_entry(pos, head, node)
+ pos->hit = true;
+
+ return 0;
+}
+
+static int machine__hit_all_dsos(struct machine *machine)
+{
+ return __dsos__hit_all(&machine->dsos.head);
+}
+
+int dsos__hit_all(struct perf_session *session)
+{
+ struct rb_node *nd;
+ int err;
+
+ err = machine__hit_all_dsos(&session->machines.host);
+ if (err)
+ return err;
+
+ for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+
+ err = machine__hit_all_dsos(pos);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+void disable_buildid_cache(void)
+{
+ no_buildid_cache = true;
+}
+
+static bool lsdir_bid_head_filter(const char *name __maybe_unused,
+ struct dirent *d)
+{
+ return (strlen(d->d_name) == 2) &&
+ isxdigit(d->d_name[0]) && isxdigit(d->d_name[1]);
+}
+
+static bool lsdir_bid_tail_filter(const char *name __maybe_unused,
+ struct dirent *d)
+{
+ int i = 0;
+ while (isxdigit(d->d_name[i]) && i < SBUILD_ID_SIZE - 3)
+ i++;
+ return (i == SBUILD_ID_SIZE - 3) && (d->d_name[i] == '\0');
+}
+
+struct strlist *build_id_cache__list_all(bool validonly)
+{
+ struct strlist *toplist, *linklist = NULL, *bidlist;
+ struct str_node *nd, *nd2;
+ char *topdir, *linkdir = NULL;
+ char sbuild_id[SBUILD_ID_SIZE];
+
+ /* for filename__ functions */
+ if (validonly)
+ symbol__init(NULL);
+
+ /* Open the top-level directory */
+ if (asprintf(&topdir, "%s/.build-id/", buildid_dir) < 0)
+ return NULL;
+
+ bidlist = strlist__new(NULL, NULL);
+ if (!bidlist)
+ goto out;
+
+ toplist = lsdir(topdir, lsdir_bid_head_filter);
+ if (!toplist) {
+ pr_debug("Error in lsdir(%s): %d\n", topdir, errno);
+ /* If there is no buildid cache, return an empty list */
+ if (errno == ENOENT)
+ goto out;
+ goto err_out;
+ }
+
+ strlist__for_each_entry(nd, toplist) {
+ if (asprintf(&linkdir, "%s/%s", topdir, nd->s) < 0)
+ goto err_out;
+ /* Open the lower-level directory */
+ linklist = lsdir(linkdir, lsdir_bid_tail_filter);
+ if (!linklist) {
+ pr_debug("Error in lsdir(%s): %d\n", linkdir, errno);
+ goto err_out;
+ }
+ strlist__for_each_entry(nd2, linklist) {
+ if (snprintf(sbuild_id, SBUILD_ID_SIZE, "%s%s",
+ nd->s, nd2->s) != SBUILD_ID_SIZE - 1)
+ goto err_out;
+ if (validonly && !build_id_cache__valid_id(sbuild_id))
+ continue;
+ if (strlist__add(bidlist, sbuild_id) < 0)
+ goto err_out;
+ }
+ strlist__delete(linklist);
+ zfree(&linkdir);
+ }
+
+out_free:
+ strlist__delete(toplist);
+out:
+ free(topdir);
+
+ return bidlist;
+
+err_out:
+ strlist__delete(linklist);
+ zfree(&linkdir);
+ strlist__delete(bidlist);
+ bidlist = NULL;
+ goto out_free;
+}
+
+static bool str_is_build_id(const char *maybe_sbuild_id, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (!isxdigit(maybe_sbuild_id[i]))
+ return false;
+ }
+ return true;
+}
+
+/* Return the valid complete build-id */
+char *build_id_cache__complement(const char *incomplete_sbuild_id)
+{
+ struct strlist *bidlist;
+ struct str_node *nd, *cand = NULL;
+ char *sbuild_id = NULL;
+ size_t len = strlen(incomplete_sbuild_id);
+
+ if (len >= SBUILD_ID_SIZE ||
+ !str_is_build_id(incomplete_sbuild_id, len))
+ return NULL;
+
+ bidlist = build_id_cache__list_all(true);
+ if (!bidlist)
+ return NULL;
+
+ strlist__for_each_entry(nd, bidlist) {
+ if (strncmp(nd->s, incomplete_sbuild_id, len) != 0)
+ continue;
+ if (cand) { /* Error: There are more than 2 candidates. */
+ cand = NULL;
+ break;
+ }
+ cand = nd;
+ }
+ if (cand)
+ sbuild_id = strdup(cand->s);
+ strlist__delete(bidlist);
+
+ return sbuild_id;
+}
+
+char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
+ struct nsinfo *nsi, bool is_kallsyms,
+ bool is_vdso)
+{
+ char *realname = (char *)name, *filename;
+ bool slash = is_kallsyms || is_vdso;
+
+ if (!slash) {
+ realname = nsinfo__realpath(name, nsi);
+ if (!realname)
+ return NULL;
+ }
+
+ if (asprintf(&filename, "%s%s%s%s%s", buildid_dir, slash ? "/" : "",
+ is_vdso ? DSO__NAME_VDSO : realname,
+ sbuild_id ? "/" : "", sbuild_id ?: "") < 0)
+ filename = NULL;
+
+ if (!slash)
+ free(realname);
+
+ return filename;
+}
+
+int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi,
+ struct strlist **result)
+{
+ char *dir_name;
+ int ret = 0;
+
+ dir_name = build_id_cache__cachedir(NULL, pathname, nsi, false, false);
+ if (!dir_name)
+ return -ENOMEM;
+
+ *result = lsdir(dir_name, lsdir_no_dot_filter);
+ if (!*result)
+ ret = -errno;
+ free(dir_name);
+
+ return ret;
+}
+
+#if defined(HAVE_LIBELF_SUPPORT) && defined(HAVE_GELF_GETNOTE_SUPPORT)
+static int build_id_cache__add_sdt_cache(const char *sbuild_id,
+ const char *realname,
+ struct nsinfo *nsi)
+{
+ struct probe_cache *cache;
+ int ret;
+ struct nscookie nsc;
+
+ cache = probe_cache__new(sbuild_id, nsi);
+ if (!cache)
+ return -1;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = probe_cache__scan_sdt(cache, realname);
+ nsinfo__mountns_exit(&nsc);
+ if (ret >= 0) {
+ pr_debug4("Found %d SDTs in %s\n", ret, realname);
+ if (probe_cache__commit(cache) < 0)
+ ret = -1;
+ }
+ probe_cache__delete(cache);
+ return ret;
+}
+#else
+#define build_id_cache__add_sdt_cache(sbuild_id, realname, nsi) (0)
+#endif
+
+static char *build_id_cache__find_debug(const char *sbuild_id,
+ struct nsinfo *nsi)
+{
+ char *realname = NULL;
+ char *debugfile;
+ struct nscookie nsc;
+ size_t len = 0;
+
+ debugfile = calloc(1, PATH_MAX);
+ if (!debugfile)
+ goto out;
+
+ len = __symbol__join_symfs(debugfile, PATH_MAX,
+ "/usr/lib/debug/.build-id/");
+ snprintf(debugfile + len, PATH_MAX - len, "%.2s/%s.debug", sbuild_id,
+ sbuild_id + 2);
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ realname = realpath(debugfile, NULL);
+ if (realname && access(realname, R_OK))
+ zfree(&realname);
+ nsinfo__mountns_exit(&nsc);
+out:
+ free(debugfile);
+ return realname;
+}
+
+int build_id_cache__add_s(const char *sbuild_id, const char *name,
+ struct nsinfo *nsi, bool is_kallsyms, bool is_vdso)
+{
+ const size_t size = PATH_MAX;
+ char *realname = NULL, *filename = NULL, *dir_name = NULL,
+ *linkname = zalloc(size), *tmp;
+ char *debugfile = NULL;
+ int err = -1;
+
+ if (!is_kallsyms) {
+ if (!is_vdso)
+ realname = nsinfo__realpath(name, nsi);
+ else
+ realname = realpath(name, NULL);
+ if (!realname)
+ goto out_free;
+ }
+
+ dir_name = build_id_cache__cachedir(sbuild_id, name, nsi, is_kallsyms,
+ is_vdso);
+ if (!dir_name)
+ goto out_free;
+
+ /* Remove old style build-id cache */
+ if (is_regular_file(dir_name))
+ if (unlink(dir_name))
+ goto out_free;
+
+ if (mkdir_p(dir_name, 0755))
+ goto out_free;
+
+ /* Save the allocated buildid dirname */
+ if (asprintf(&filename, "%s/%s", dir_name,
+ build_id_cache__basename(is_kallsyms, is_vdso,
+ false)) < 0) {
+ filename = NULL;
+ goto out_free;
+ }
+
+ if (access(filename, F_OK)) {
+ if (is_kallsyms) {
+ if (copyfile("/proc/kallsyms", filename))
+ goto out_free;
+ } else if (nsi && nsi->need_setns) {
+ if (copyfile_ns(name, filename, nsi))
+ goto out_free;
+ } else if (link(realname, filename) && errno != EEXIST &&
+ copyfile(name, filename))
+ goto out_free;
+ }
+
+ /* Some binaries are stripped, but have .debug files with their symbol
+ * table. Check to see if we can locate one of those, since the elf
+ * file itself may not be very useful to users of our tools without a
+ * symtab.
+ */
+ if (!is_kallsyms && !is_vdso &&
+ strncmp(".ko", name + strlen(name) - 3, 3)) {
+ debugfile = build_id_cache__find_debug(sbuild_id, nsi);
+ if (debugfile) {
+ zfree(&filename);
+ if (asprintf(&filename, "%s/%s", dir_name,
+ build_id_cache__basename(false, false, true)) < 0) {
+ filename = NULL;
+ goto out_free;
+ }
+ if (access(filename, F_OK)) {
+ if (nsi && nsi->need_setns) {
+ if (copyfile_ns(debugfile, filename,
+ nsi))
+ goto out_free;
+ } else if (link(debugfile, filename) &&
+ errno != EEXIST &&
+ copyfile(debugfile, filename))
+ goto out_free;
+ }
+ }
+ }
+
+ if (!build_id_cache__linkname(sbuild_id, linkname, size))
+ goto out_free;
+ tmp = strrchr(linkname, '/');
+ *tmp = '\0';
+
+ if (access(linkname, X_OK) && mkdir_p(linkname, 0755))
+ goto out_free;
+
+ *tmp = '/';
+ tmp = dir_name + strlen(buildid_dir) - 5;
+ memcpy(tmp, "../..", 5);
+
+ if (symlink(tmp, linkname) == 0)
+ err = 0;
+
+ /* Update SDT cache : error is just warned */
+ if (realname &&
+ build_id_cache__add_sdt_cache(sbuild_id, realname, nsi) < 0)
+ pr_debug4("Failed to update/scan SDT cache for %s\n", realname);
+
+out_free:
+ if (!is_kallsyms)
+ free(realname);
+ free(filename);
+ free(debugfile);
+ free(dir_name);
+ free(linkname);
+ return err;
+}
+
+static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size,
+ const char *name, struct nsinfo *nsi,
+ bool is_kallsyms, bool is_vdso)
+{
+ char sbuild_id[SBUILD_ID_SIZE];
+
+ build_id__sprintf(build_id, build_id_size, sbuild_id);
+
+ return build_id_cache__add_s(sbuild_id, name, nsi, is_kallsyms,
+ is_vdso);
+}
+
+bool build_id_cache__cached(const char *sbuild_id)
+{
+ bool ret = false;
+ char *filename = build_id_cache__linkname(sbuild_id, NULL, 0);
+
+ if (filename && !access(filename, F_OK))
+ ret = true;
+ free(filename);
+
+ return ret;
+}
+
+int build_id_cache__remove_s(const char *sbuild_id)
+{
+ const size_t size = PATH_MAX;
+ char *filename = zalloc(size),
+ *linkname = zalloc(size), *tmp;
+ int err = -1;
+
+ if (filename == NULL || linkname == NULL)
+ goto out_free;
+
+ if (!build_id_cache__linkname(sbuild_id, linkname, size))
+ goto out_free;
+
+ if (access(linkname, F_OK))
+ goto out_free;
+
+ if (readlink(linkname, filename, size - 1) < 0)
+ goto out_free;
+
+ if (unlink(linkname))
+ goto out_free;
+
+ /*
+ * Since the link is relative, we must make it absolute:
+ */
+ tmp = strrchr(linkname, '/') + 1;
+ snprintf(tmp, size - (tmp - linkname), "%s", filename);
+
+ if (rm_rf(linkname))
+ goto out_free;
+
+ err = 0;
+out_free:
+ free(filename);
+ free(linkname);
+ return err;
+}
+
+static int dso__cache_build_id(struct dso *dso, struct machine *machine)
+{
+ bool is_kallsyms = dso__is_kallsyms(dso);
+ bool is_vdso = dso__is_vdso(dso);
+ const char *name = dso->long_name;
+
+ if (dso__is_kcore(dso)) {
+ is_kallsyms = true;
+ name = machine->mmap_name;
+ }
+ return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name,
+ dso->nsinfo, is_kallsyms, is_vdso);
+}
+
+static int __dsos__cache_build_ids(struct list_head *head,
+ struct machine *machine)
+{
+ struct dso *pos;
+ int err = 0;
+
+ dsos__for_each_with_build_id(pos, head)
+ if (dso__cache_build_id(pos, machine))
+ err = -1;
+
+ return err;
+}
+
+static int machine__cache_build_ids(struct machine *machine)
+{
+ return __dsos__cache_build_ids(&machine->dsos.head, machine);
+}
+
+int perf_session__cache_build_ids(struct perf_session *session)
+{
+ struct rb_node *nd;
+ int ret;
+
+ if (no_buildid_cache)
+ return 0;
+
+ if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST)
+ return -1;
+
+ ret = machine__cache_build_ids(&session->machines.host);
+
+ for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ ret |= machine__cache_build_ids(pos);
+ }
+ return ret ? -1 : 0;
+}
+
+static bool machine__read_build_ids(struct machine *machine, bool with_hits)
+{
+ return __dsos__read_build_ids(&machine->dsos.head, with_hits);
+}
+
+bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
+{
+ struct rb_node *nd;
+ bool ret = machine__read_build_ids(&session->machines.host, with_hits);
+
+ for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ ret |= machine__read_build_ids(pos, with_hits);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h
new file mode 100644
index 000000000..f0c565164
--- /dev/null
+++ b/tools/perf/util/build-id.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_BUILD_ID_H_
+#define PERF_BUILD_ID_H_ 1
+
+#define BUILD_ID_SIZE 20
+#define SBUILD_ID_SIZE (BUILD_ID_SIZE * 2 + 1)
+
+#include "tool.h"
+#include "namespaces.h"
+#include <linux/types.h>
+
+extern struct perf_tool build_id__mark_dso_hit_ops;
+struct dso;
+struct feat_fd;
+
+int build_id__sprintf(const u8 *build_id, int len, char *bf);
+int sysfs__sprintf_build_id(const char *root_dir, char *sbuild_id);
+int filename__sprintf_build_id(const char *pathname, char *sbuild_id);
+char *build_id_cache__kallsyms_path(const char *sbuild_id, char *bf,
+ size_t size);
+
+char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size,
+ bool is_debug);
+
+int build_id__mark_dso_hit(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct perf_evsel *evsel,
+ struct machine *machine);
+
+int dsos__hit_all(struct perf_session *session);
+
+bool perf_session__read_build_ids(struct perf_session *session, bool with_hits);
+int perf_session__write_buildid_table(struct perf_session *session,
+ struct feat_fd *fd);
+int perf_session__cache_build_ids(struct perf_session *session);
+
+char *build_id_cache__origname(const char *sbuild_id);
+char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size);
+char *build_id_cache__cachedir(const char *sbuild_id, const char *name,
+ struct nsinfo *nsi, bool is_kallsyms,
+ bool is_vdso);
+
+struct strlist;
+
+struct strlist *build_id_cache__list_all(bool validonly);
+char *build_id_cache__complement(const char *incomplete_sbuild_id);
+int build_id_cache__list_build_ids(const char *pathname, struct nsinfo *nsi,
+ struct strlist **result);
+bool build_id_cache__cached(const char *sbuild_id);
+int build_id_cache__add_s(const char *sbuild_id,
+ const char *name, struct nsinfo *nsi,
+ bool is_kallsyms, bool is_vdso);
+int build_id_cache__remove_s(const char *sbuild_id);
+
+extern char buildid_dir[];
+
+void set_buildid_dir(const char *dir);
+void disable_buildid_cache(void);
+
+#endif
diff --git a/tools/perf/util/c++/Build b/tools/perf/util/c++/Build
new file mode 100644
index 000000000..988fef1b1
--- /dev/null
+++ b/tools/perf/util/c++/Build
@@ -0,0 +1,2 @@
+libperf-$(CONFIG_CLANGLLVM) += clang.o
+libperf-$(CONFIG_CLANGLLVM) += clang-test.o
diff --git a/tools/perf/util/c++/clang-c.h b/tools/perf/util/c++/clang-c.h
new file mode 100644
index 000000000..e513366f2
--- /dev/null
+++ b/tools/perf/util/c++/clang-c.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UTIL_CLANG_C_H
+#define PERF_UTIL_CLANG_C_H
+
+#include <stddef.h> /* for size_t */
+#include <util-cxx.h> /* for __maybe_unused */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_LIBCLANGLLVM_SUPPORT
+extern void perf_clang__init(void);
+extern void perf_clang__cleanup(void);
+
+extern int test__clang_to_IR(void);
+extern int test__clang_to_obj(void);
+
+extern int perf_clang__compile_bpf(const char *filename,
+ void **p_obj_buf,
+ size_t *p_obj_buf_sz);
+#else
+
+#include <errno.h>
+
+static inline void perf_clang__init(void) { }
+static inline void perf_clang__cleanup(void) { }
+
+static inline int test__clang_to_IR(void) { return -1; }
+static inline int test__clang_to_obj(void) { return -1;}
+
+static inline int
+perf_clang__compile_bpf(const char *filename __maybe_unused,
+ void **p_obj_buf __maybe_unused,
+ size_t *p_obj_buf_sz __maybe_unused)
+{
+ return -ENOTSUP;
+}
+
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/tools/perf/util/c++/clang-test.cpp b/tools/perf/util/c++/clang-test.cpp
new file mode 100644
index 000000000..7b042a5eb
--- /dev/null
+++ b/tools/perf/util/c++/clang-test.cpp
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "clang.h"
+#include "clang-c.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+
+#include <util-cxx.h>
+#include <tests/llvm.h>
+#include <string>
+
+class perf_clang_scope {
+public:
+ explicit perf_clang_scope() {perf_clang__init();}
+ ~perf_clang_scope() {perf_clang__cleanup();}
+};
+
+static std::unique_ptr<llvm::Module>
+__test__clang_to_IR(void)
+{
+ unsigned int kernel_version;
+
+ if (fetch_kernel_version(&kernel_version, NULL, 0))
+ return std::unique_ptr<llvm::Module>(nullptr);
+
+ std::string cflag_kver("-DLINUX_VERSION_CODE=" +
+ std::to_string(kernel_version));
+
+ std::unique_ptr<llvm::Module> M =
+ perf::getModuleFromSource({cflag_kver.c_str()},
+ "perf-test.c",
+ test_llvm__bpf_base_prog);
+ return M;
+}
+
+extern "C" {
+int test__clang_to_IR(void)
+{
+ perf_clang_scope _scope;
+
+ auto M = __test__clang_to_IR();
+ if (!M)
+ return -1;
+ for (llvm::Function& F : *M)
+ if (F.getName() == "bpf_func__SyS_epoll_pwait")
+ return 0;
+ return -1;
+}
+
+int test__clang_to_obj(void)
+{
+ perf_clang_scope _scope;
+
+ auto M = __test__clang_to_IR();
+ if (!M)
+ return -1;
+
+ auto Buffer = perf::getBPFObjectFromModule(&*M);
+ if (!Buffer)
+ return -1;
+ return 0;
+}
+
+}
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
new file mode 100644
index 000000000..895125045
--- /dev/null
+++ b/tools/perf/util/c++/clang.cpp
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * llvm C frontend for perf. Support dynamically compile C file
+ *
+ * Inspired by clang example code:
+ * http://llvm.org/svn/llvm-project/cfe/trunk/examples/clang-interpreter/main.cpp
+ *
+ * Copyright (C) 2016 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2016 Huawei Inc.
+ */
+
+#include "clang/Basic/Version.h"
+#include "clang/CodeGen/CodeGenAction.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/TextDiagnosticPrinter.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <memory>
+
+#include "clang.h"
+#include "clang-c.h"
+
+namespace perf {
+
+static std::unique_ptr<llvm::LLVMContext> LLVMCtx;
+
+using namespace clang;
+
+static CompilerInvocation *
+createCompilerInvocation(llvm::opt::ArgStringList CFlags, StringRef& Path,
+ DiagnosticsEngine& Diags)
+{
+ llvm::opt::ArgStringList CCArgs {
+ "-cc1",
+ "-triple", "bpf-pc-linux",
+ "-fsyntax-only",
+ "-ferror-limit", "19",
+ "-fmessage-length", "127",
+ "-O2",
+ "-nostdsysteminc",
+ "-nobuiltininc",
+ "-vectorize-loops",
+ "-vectorize-slp",
+ "-Wno-unused-value",
+ "-Wno-pointer-sign",
+ "-x", "c"};
+
+ CCArgs.append(CFlags.begin(), CFlags.end());
+ CompilerInvocation *CI = tooling::newInvocation(&Diags, CCArgs);
+
+ FrontendOptions& Opts = CI->getFrontendOpts();
+ Opts.Inputs.clear();
+ Opts.Inputs.emplace_back(Path,
+ FrontendOptions::getInputKindForExtension("c"));
+ return CI;
+}
+
+static std::unique_ptr<llvm::Module>
+getModuleFromSource(llvm::opt::ArgStringList CFlags,
+ StringRef Path, IntrusiveRefCntPtr<vfs::FileSystem> VFS)
+{
+ CompilerInstance Clang;
+ Clang.createDiagnostics();
+
+ Clang.setVirtualFileSystem(&*VFS);
+
+#if CLANG_VERSION_MAJOR < 4
+ IntrusiveRefCntPtr<CompilerInvocation> CI =
+ createCompilerInvocation(std::move(CFlags), Path,
+ Clang.getDiagnostics());
+ Clang.setInvocation(&*CI);
+#else
+ std::shared_ptr<CompilerInvocation> CI(
+ createCompilerInvocation(std::move(CFlags), Path,
+ Clang.getDiagnostics()));
+ Clang.setInvocation(CI);
+#endif
+
+ std::unique_ptr<CodeGenAction> Act(new EmitLLVMOnlyAction(&*LLVMCtx));
+ if (!Clang.ExecuteAction(*Act))
+ return std::unique_ptr<llvm::Module>(nullptr);
+
+ return Act->takeModule();
+}
+
+std::unique_ptr<llvm::Module>
+getModuleFromSource(llvm::opt::ArgStringList CFlags,
+ StringRef Name, StringRef Content)
+{
+ using namespace vfs;
+
+ llvm::IntrusiveRefCntPtr<OverlayFileSystem> OverlayFS(
+ new OverlayFileSystem(getRealFileSystem()));
+ llvm::IntrusiveRefCntPtr<InMemoryFileSystem> MemFS(
+ new InMemoryFileSystem(true));
+
+ /*
+ * pushOverlay helps setting working dir for MemFS. Must call
+ * before addFile.
+ */
+ OverlayFS->pushOverlay(MemFS);
+ MemFS->addFile(Twine(Name), 0, llvm::MemoryBuffer::getMemBuffer(Content));
+
+ return getModuleFromSource(std::move(CFlags), Name, OverlayFS);
+}
+
+std::unique_ptr<llvm::Module>
+getModuleFromSource(llvm::opt::ArgStringList CFlags, StringRef Path)
+{
+ IntrusiveRefCntPtr<vfs::FileSystem> VFS(vfs::getRealFileSystem());
+ return getModuleFromSource(std::move(CFlags), Path, VFS);
+}
+
+std::unique_ptr<llvm::SmallVectorImpl<char>>
+getBPFObjectFromModule(llvm::Module *Module)
+{
+ using namespace llvm;
+
+ std::string TargetTriple("bpf-pc-linux");
+ std::string Error;
+ const Target* Target = TargetRegistry::lookupTarget(TargetTriple, Error);
+ if (!Target) {
+ llvm::errs() << Error;
+ return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr);
+ }
+
+ llvm::TargetOptions Opt;
+ TargetMachine *TargetMachine =
+ Target->createTargetMachine(TargetTriple,
+ "generic", "",
+ Opt, Reloc::Static);
+
+ Module->setDataLayout(TargetMachine->createDataLayout());
+ Module->setTargetTriple(TargetTriple);
+
+ std::unique_ptr<SmallVectorImpl<char>> Buffer(new SmallVector<char, 0>());
+ raw_svector_ostream ostream(*Buffer);
+
+ legacy::PassManager PM;
+ bool NotAdded;
+#if CLANG_VERSION_MAJOR < 7
+ NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream,
+ TargetMachine::CGFT_ObjectFile);
+#else
+ NotAdded = TargetMachine->addPassesToEmitFile(PM, ostream, nullptr,
+ TargetMachine::CGFT_ObjectFile);
+#endif
+ if (NotAdded) {
+ llvm::errs() << "TargetMachine can't emit a file of this type\n";
+ return std::unique_ptr<llvm::SmallVectorImpl<char>>(nullptr);;
+ }
+ PM.run(*Module);
+
+ return std::move(Buffer);
+}
+
+}
+
+extern "C" {
+void perf_clang__init(void)
+{
+ perf::LLVMCtx.reset(new llvm::LLVMContext());
+ LLVMInitializeBPFTargetInfo();
+ LLVMInitializeBPFTarget();
+ LLVMInitializeBPFTargetMC();
+ LLVMInitializeBPFAsmPrinter();
+}
+
+void perf_clang__cleanup(void)
+{
+ perf::LLVMCtx.reset(nullptr);
+ llvm::llvm_shutdown();
+}
+
+int perf_clang__compile_bpf(const char *filename,
+ void **p_obj_buf,
+ size_t *p_obj_buf_sz)
+{
+ using namespace perf;
+
+ if (!p_obj_buf || !p_obj_buf_sz)
+ return -EINVAL;
+
+ llvm::opt::ArgStringList CFlags;
+ auto M = getModuleFromSource(std::move(CFlags), filename);
+ if (!M)
+ return -EINVAL;
+ auto O = getBPFObjectFromModule(&*M);
+ if (!O)
+ return -EINVAL;
+
+ size_t size = O->size_in_bytes();
+ void *buffer;
+
+ buffer = malloc(size);
+ if (!buffer)
+ return -ENOMEM;
+ memcpy(buffer, O->data(), size);
+ *p_obj_buf = buffer;
+ *p_obj_buf_sz = size;
+ return 0;
+}
+}
diff --git a/tools/perf/util/c++/clang.h b/tools/perf/util/c++/clang.h
new file mode 100644
index 000000000..6ce33e22f
--- /dev/null
+++ b/tools/perf/util/c++/clang.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UTIL_CLANG_H
+#define PERF_UTIL_CLANG_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Option/Option.h"
+#include <memory>
+
+namespace perf {
+
+using namespace llvm;
+
+std::unique_ptr<Module>
+getModuleFromSource(opt::ArgStringList CFlags,
+ StringRef Name, StringRef Content);
+
+std::unique_ptr<Module>
+getModuleFromSource(opt::ArgStringList CFlags,
+ StringRef Path);
+
+std::unique_ptr<llvm::SmallVectorImpl<char>>
+getBPFObjectFromModule(llvm::Module *Module);
+
+}
+#endif
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
new file mode 100644
index 000000000..9f2e36ef5
--- /dev/null
+++ b/tools/perf/util/cache.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CACHE_H
+#define __PERF_CACHE_H
+
+#include "strbuf.h"
+#include <subcmd/pager.h>
+#include "../ui/ui.h"
+
+#include <linux/compiler.h>
+#include <linux/string.h>
+
+#define CMD_EXEC_PATH "--exec-path"
+#define CMD_DEBUGFS_DIR "--debugfs-dir="
+
+#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
+#define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR"
+#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
+#define PERF_PAGER_ENVIRONMENT "PERF_PAGER"
+
+int split_cmdline(char *cmdline, const char ***argv);
+
+#define alloc_nr(x) (((x)+16)*3/2)
+
+static inline int is_absolute_path(const char *path)
+{
+ return path[0] == '/';
+}
+
+char *mkpath(const char *fmt, ...) __printf(1, 2);
+
+#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/call-path.c b/tools/perf/util/call-path.c
new file mode 100644
index 000000000..904a17052
--- /dev/null
+++ b/tools/perf/util/call-path.c
@@ -0,0 +1,122 @@
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+
+#include "util.h"
+#include "call-path.h"
+
+static void call_path__init(struct call_path *cp, struct call_path *parent,
+ struct symbol *sym, u64 ip, bool in_kernel)
+{
+ cp->parent = parent;
+ cp->sym = sym;
+ cp->ip = sym ? 0 : ip;
+ cp->db_id = 0;
+ cp->in_kernel = in_kernel;
+ RB_CLEAR_NODE(&cp->rb_node);
+ cp->children = RB_ROOT;
+}
+
+struct call_path_root *call_path_root__new(void)
+{
+ struct call_path_root *cpr;
+
+ cpr = zalloc(sizeof(struct call_path_root));
+ if (!cpr)
+ return NULL;
+ call_path__init(&cpr->call_path, NULL, NULL, 0, false);
+ INIT_LIST_HEAD(&cpr->blocks);
+ return cpr;
+}
+
+void call_path_root__free(struct call_path_root *cpr)
+{
+ struct call_path_block *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &cpr->blocks, node) {
+ list_del(&pos->node);
+ free(pos);
+ }
+ free(cpr);
+}
+
+static struct call_path *call_path__new(struct call_path_root *cpr,
+ struct call_path *parent,
+ struct symbol *sym, u64 ip,
+ bool in_kernel)
+{
+ struct call_path_block *cpb;
+ struct call_path *cp;
+ size_t n;
+
+ if (cpr->next < cpr->sz) {
+ cpb = list_last_entry(&cpr->blocks, struct call_path_block,
+ node);
+ } else {
+ cpb = zalloc(sizeof(struct call_path_block));
+ if (!cpb)
+ return NULL;
+ list_add_tail(&cpb->node, &cpr->blocks);
+ cpr->sz += CALL_PATH_BLOCK_SIZE;
+ }
+
+ n = cpr->next++ & CALL_PATH_BLOCK_MASK;
+ cp = &cpb->cp[n];
+
+ call_path__init(cp, parent, sym, ip, in_kernel);
+
+ return cp;
+}
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+ struct call_path *parent,
+ struct symbol *sym, u64 ip, u64 ks)
+{
+ struct rb_node **p;
+ struct rb_node *node_parent = NULL;
+ struct call_path *cp;
+ bool in_kernel = ip >= ks;
+
+ if (sym)
+ ip = 0;
+
+ if (!parent)
+ return call_path__new(cpr, parent, sym, ip, in_kernel);
+
+ p = &parent->children.rb_node;
+ while (*p != NULL) {
+ node_parent = *p;
+ cp = rb_entry(node_parent, struct call_path, rb_node);
+
+ if (cp->sym == sym && cp->ip == ip)
+ return cp;
+
+ if (sym < cp->sym || (sym == cp->sym && ip < cp->ip))
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ cp = call_path__new(cpr, parent, sym, ip, in_kernel);
+ if (!cp)
+ return NULL;
+
+ rb_link_node(&cp->rb_node, node_parent, p);
+ rb_insert_color(&cp->rb_node, &parent->children);
+
+ return cp;
+}
diff --git a/tools/perf/util/call-path.h b/tools/perf/util/call-path.h
new file mode 100644
index 000000000..477f6d03b
--- /dev/null
+++ b/tools/perf/util/call-path.h
@@ -0,0 +1,77 @@
+/*
+ * call-path.h: Manipulate a tree data structure containing function call paths
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_CALL_PATH_H
+#define __PERF_CALL_PATH_H
+
+#include <sys/types.h>
+
+#include <linux/types.h>
+#include <linux/rbtree.h>
+
+/**
+ * struct call_path - node in list of calls leading to a function call.
+ * @parent: call path to the parent function call
+ * @sym: symbol of function called
+ * @ip: only if sym is null, the ip of the function
+ * @db_id: id used for db-export
+ * @in_kernel: whether function is a in the kernel
+ * @rb_node: node in parent's tree of called functions
+ * @children: tree of call paths of functions called
+ *
+ * In combination with the call_return structure, the call_path structure
+ * defines a context-sensitve call-graph.
+ */
+struct call_path {
+ struct call_path *parent;
+ struct symbol *sym;
+ u64 ip;
+ u64 db_id;
+ bool in_kernel;
+ struct rb_node rb_node;
+ struct rb_root children;
+};
+
+#define CALL_PATH_BLOCK_SHIFT 8
+#define CALL_PATH_BLOCK_SIZE (1 << CALL_PATH_BLOCK_SHIFT)
+#define CALL_PATH_BLOCK_MASK (CALL_PATH_BLOCK_SIZE - 1)
+
+struct call_path_block {
+ struct call_path cp[CALL_PATH_BLOCK_SIZE];
+ struct list_head node;
+};
+
+/**
+ * struct call_path_root - root of all call paths.
+ * @call_path: root call path
+ * @blocks: list of blocks to store call paths
+ * @next: next free space
+ * @sz: number of spaces
+ */
+struct call_path_root {
+ struct call_path call_path;
+ struct list_head blocks;
+ size_t next;
+ size_t sz;
+};
+
+struct call_path_root *call_path_root__new(void);
+void call_path_root__free(struct call_path_root *cpr);
+
+struct call_path *call_path__findnew(struct call_path_root *cpr,
+ struct call_path *parent,
+ struct symbol *sym, u64 ip, u64 ks);
+
+#endif
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
new file mode 100644
index 000000000..dc2212e12
--- /dev/null
+++ b/tools/perf/util/callchain.c
@@ -0,0 +1,1579 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2009-2011, Frederic Weisbecker <fweisbec@gmail.com>
+ *
+ * Handle the callchains from the stream in an ad-hoc radix tree and then
+ * sort them in an rbtree.
+ *
+ * Using a radix for code path provides a fast retrieval and factorizes
+ * memory use. Also that lets us use the paths in a hierarchical graph view.
+ *
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <math.h>
+
+#include "asm/bug.h"
+
+#include "hist.h"
+#include "util.h"
+#include "sort.h"
+#include "machine.h"
+#include "callchain.h"
+#include "branch.h"
+
+#define CALLCHAIN_PARAM_DEFAULT \
+ .mode = CHAIN_GRAPH_ABS, \
+ .min_percent = 0.5, \
+ .order = ORDER_CALLEE, \
+ .key = CCKEY_FUNCTION, \
+ .value = CCVAL_PERCENT, \
+
+struct callchain_param callchain_param = {
+ CALLCHAIN_PARAM_DEFAULT
+};
+
+/*
+ * Are there any events usind DWARF callchains?
+ *
+ * I.e.
+ *
+ * -e cycles/call-graph=dwarf/
+ */
+bool dwarf_callchain_users;
+
+struct callchain_param callchain_param_default = {
+ CALLCHAIN_PARAM_DEFAULT
+};
+
+__thread struct callchain_cursor callchain_cursor;
+
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param)
+{
+ return parse_callchain_record(arg, param);
+}
+
+static int parse_callchain_mode(const char *value)
+{
+ if (!strncmp(value, "graph", strlen(value))) {
+ callchain_param.mode = CHAIN_GRAPH_ABS;
+ return 0;
+ }
+ if (!strncmp(value, "flat", strlen(value))) {
+ callchain_param.mode = CHAIN_FLAT;
+ return 0;
+ }
+ if (!strncmp(value, "fractal", strlen(value))) {
+ callchain_param.mode = CHAIN_GRAPH_REL;
+ return 0;
+ }
+ if (!strncmp(value, "folded", strlen(value))) {
+ callchain_param.mode = CHAIN_FOLDED;
+ return 0;
+ }
+ return -1;
+}
+
+static int parse_callchain_order(const char *value)
+{
+ if (!strncmp(value, "caller", strlen(value))) {
+ callchain_param.order = ORDER_CALLER;
+ callchain_param.order_set = true;
+ return 0;
+ }
+ if (!strncmp(value, "callee", strlen(value))) {
+ callchain_param.order = ORDER_CALLEE;
+ callchain_param.order_set = true;
+ return 0;
+ }
+ return -1;
+}
+
+static int parse_callchain_sort_key(const char *value)
+{
+ if (!strncmp(value, "function", strlen(value))) {
+ callchain_param.key = CCKEY_FUNCTION;
+ return 0;
+ }
+ if (!strncmp(value, "address", strlen(value))) {
+ callchain_param.key = CCKEY_ADDRESS;
+ return 0;
+ }
+ if (!strncmp(value, "srcline", strlen(value))) {
+ callchain_param.key = CCKEY_SRCLINE;
+ return 0;
+ }
+ if (!strncmp(value, "branch", strlen(value))) {
+ callchain_param.branch_callstack = 1;
+ return 0;
+ }
+ return -1;
+}
+
+static int parse_callchain_value(const char *value)
+{
+ if (!strncmp(value, "percent", strlen(value))) {
+ callchain_param.value = CCVAL_PERCENT;
+ return 0;
+ }
+ if (!strncmp(value, "period", strlen(value))) {
+ callchain_param.value = CCVAL_PERIOD;
+ return 0;
+ }
+ if (!strncmp(value, "count", strlen(value))) {
+ callchain_param.value = CCVAL_COUNT;
+ return 0;
+ }
+ return -1;
+}
+
+static int get_stack_size(const char *str, unsigned long *_size)
+{
+ char *endptr;
+ unsigned long size;
+ unsigned long max_size = round_down(USHRT_MAX, sizeof(u64));
+
+ size = strtoul(str, &endptr, 0);
+
+ do {
+ if (*endptr)
+ break;
+
+ size = round_up(size, sizeof(u64));
+ if (!size || size > max_size)
+ break;
+
+ *_size = size;
+ return 0;
+
+ } while (0);
+
+ pr_err("callchain: Incorrect stack dump size (max %ld): %s\n",
+ max_size, str);
+ return -1;
+}
+
+static int
+__parse_callchain_report_opt(const char *arg, bool allow_record_opt)
+{
+ char *tok;
+ char *endptr, *saveptr = NULL;
+ bool minpcnt_set = false;
+ bool record_opt_set = false;
+ bool try_stack_size = false;
+
+ callchain_param.enabled = true;
+ symbol_conf.use_callchain = true;
+
+ if (!arg)
+ return 0;
+
+ while ((tok = strtok_r((char *)arg, ",", &saveptr)) != NULL) {
+ if (!strncmp(tok, "none", strlen(tok))) {
+ callchain_param.mode = CHAIN_NONE;
+ callchain_param.enabled = false;
+ symbol_conf.use_callchain = false;
+ return 0;
+ }
+
+ if (!parse_callchain_mode(tok) ||
+ !parse_callchain_order(tok) ||
+ !parse_callchain_sort_key(tok) ||
+ !parse_callchain_value(tok)) {
+ /* parsing ok - move on to the next */
+ try_stack_size = false;
+ goto next;
+ } else if (allow_record_opt && !record_opt_set) {
+ if (parse_callchain_record(tok, &callchain_param))
+ goto try_numbers;
+
+ /* assume that number followed by 'dwarf' is stack size */
+ if (callchain_param.record_mode == CALLCHAIN_DWARF)
+ try_stack_size = true;
+
+ record_opt_set = true;
+ goto next;
+ }
+
+try_numbers:
+ if (try_stack_size) {
+ unsigned long size = 0;
+
+ if (get_stack_size(tok, &size) < 0)
+ return -1;
+ callchain_param.dump_size = size;
+ try_stack_size = false;
+ } else if (!minpcnt_set) {
+ /* try to get the min percent */
+ callchain_param.min_percent = strtod(tok, &endptr);
+ if (tok == endptr)
+ return -1;
+ minpcnt_set = true;
+ } else {
+ /* try print limit at last */
+ callchain_param.print_limit = strtoul(tok, &endptr, 0);
+ if (tok == endptr)
+ return -1;
+ }
+next:
+ arg = NULL;
+ }
+
+ if (callchain_register_param(&callchain_param) < 0) {
+ pr_err("Can't register callchain params\n");
+ return -1;
+ }
+ return 0;
+}
+
+int parse_callchain_report_opt(const char *arg)
+{
+ return __parse_callchain_report_opt(arg, false);
+}
+
+int parse_callchain_top_opt(const char *arg)
+{
+ return __parse_callchain_report_opt(arg, true);
+}
+
+int parse_callchain_record(const char *arg, struct callchain_param *param)
+{
+ char *tok, *name, *saveptr = NULL;
+ char *buf;
+ int ret = -1;
+
+ /* We need buffer that we know we can write to. */
+ buf = malloc(strlen(arg) + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ strcpy(buf, arg);
+
+ tok = strtok_r((char *)buf, ",", &saveptr);
+ name = tok ? : (char *)buf;
+
+ do {
+ /* Framepointer style */
+ if (!strncmp(name, "fp", sizeof("fp"))) {
+ if (!strtok_r(NULL, ",", &saveptr)) {
+ param->record_mode = CALLCHAIN_FP;
+ ret = 0;
+ } else
+ pr_err("callchain: No more arguments "
+ "needed for --call-graph fp\n");
+ break;
+
+ /* Dwarf style */
+ } else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
+ const unsigned long default_stack_dump_size = 8192;
+
+ ret = 0;
+ param->record_mode = CALLCHAIN_DWARF;
+ param->dump_size = default_stack_dump_size;
+ dwarf_callchain_users = true;
+
+ tok = strtok_r(NULL, ",", &saveptr);
+ if (tok) {
+ unsigned long size = 0;
+
+ ret = get_stack_size(tok, &size);
+ param->dump_size = size;
+ }
+ } else if (!strncmp(name, "lbr", sizeof("lbr"))) {
+ if (!strtok_r(NULL, ",", &saveptr)) {
+ param->record_mode = CALLCHAIN_LBR;
+ ret = 0;
+ } else
+ pr_err("callchain: No more arguments "
+ "needed for --call-graph lbr\n");
+ break;
+ } else {
+ pr_err("callchain: Unknown --call-graph option "
+ "value: %s\n", arg);
+ break;
+ }
+
+ } while (0);
+
+ free(buf);
+ return ret;
+}
+
+int perf_callchain_config(const char *var, const char *value)
+{
+ char *endptr;
+
+ if (!strstarts(var, "call-graph."))
+ return 0;
+ var += sizeof("call-graph.") - 1;
+
+ if (!strcmp(var, "record-mode"))
+ return parse_callchain_record_opt(value, &callchain_param);
+ if (!strcmp(var, "dump-size")) {
+ unsigned long size = 0;
+ int ret;
+
+ ret = get_stack_size(value, &size);
+ callchain_param.dump_size = size;
+
+ return ret;
+ }
+ if (!strcmp(var, "print-type")){
+ int ret;
+ ret = parse_callchain_mode(value);
+ if (ret == -1)
+ pr_err("Invalid callchain mode: %s\n", value);
+ return ret;
+ }
+ if (!strcmp(var, "order")){
+ int ret;
+ ret = parse_callchain_order(value);
+ if (ret == -1)
+ pr_err("Invalid callchain order: %s\n", value);
+ return ret;
+ }
+ if (!strcmp(var, "sort-key")){
+ int ret;
+ ret = parse_callchain_sort_key(value);
+ if (ret == -1)
+ pr_err("Invalid callchain sort key: %s\n", value);
+ return ret;
+ }
+ if (!strcmp(var, "threshold")) {
+ callchain_param.min_percent = strtod(value, &endptr);
+ if (value == endptr) {
+ pr_err("Invalid callchain threshold: %s\n", value);
+ return -1;
+ }
+ }
+ if (!strcmp(var, "print-limit")) {
+ callchain_param.print_limit = strtod(value, &endptr);
+ if (value == endptr) {
+ pr_err("Invalid callchain print limit: %s\n", value);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static void
+rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
+ enum chain_mode mode)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct callchain_node *rnode;
+ u64 chain_cumul = callchain_cumul_hits(chain);
+
+ while (*p) {
+ u64 rnode_cumul;
+
+ parent = *p;
+ rnode = rb_entry(parent, struct callchain_node, rb_node);
+ rnode_cumul = callchain_cumul_hits(rnode);
+
+ switch (mode) {
+ case CHAIN_FLAT:
+ case CHAIN_FOLDED:
+ if (rnode->hit < chain->hit)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ break;
+ case CHAIN_GRAPH_ABS: /* Falldown */
+ case CHAIN_GRAPH_REL:
+ if (rnode_cumul < chain_cumul)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ break;
+ case CHAIN_NONE:
+ default:
+ break;
+ }
+ }
+
+ rb_link_node(&chain->rb_node, parent, p);
+ rb_insert_color(&chain->rb_node, root);
+}
+
+static void
+__sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
+ u64 min_hit)
+{
+ struct rb_node *n;
+ struct callchain_node *child;
+
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+
+ __sort_chain_flat(rb_root, child, min_hit);
+ }
+
+ if (node->hit && node->hit >= min_hit)
+ rb_insert_callchain(rb_root, node, CHAIN_FLAT);
+}
+
+/*
+ * Once we get every callchains from the stream, we can now
+ * sort them by hit
+ */
+static void
+sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
+ u64 min_hit, struct callchain_param *param __maybe_unused)
+{
+ *rb_root = RB_ROOT;
+ __sort_chain_flat(rb_root, &root->node, min_hit);
+}
+
+static void __sort_chain_graph_abs(struct callchain_node *node,
+ u64 min_hit)
+{
+ struct rb_node *n;
+ struct callchain_node *child;
+
+ node->rb_root = RB_ROOT;
+ n = rb_first(&node->rb_root_in);
+
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+
+ __sort_chain_graph_abs(child, min_hit);
+ if (callchain_cumul_hits(child) >= min_hit)
+ rb_insert_callchain(&node->rb_root, child,
+ CHAIN_GRAPH_ABS);
+ }
+}
+
+static void
+sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
+ u64 min_hit, struct callchain_param *param __maybe_unused)
+{
+ __sort_chain_graph_abs(&chain_root->node, min_hit);
+ rb_root->rb_node = chain_root->node.rb_root.rb_node;
+}
+
+static void __sort_chain_graph_rel(struct callchain_node *node,
+ double min_percent)
+{
+ struct rb_node *n;
+ struct callchain_node *child;
+ u64 min_hit;
+
+ node->rb_root = RB_ROOT;
+ min_hit = ceil(node->children_hit * min_percent);
+
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+
+ __sort_chain_graph_rel(child, min_percent);
+ if (callchain_cumul_hits(child) >= min_hit)
+ rb_insert_callchain(&node->rb_root, child,
+ CHAIN_GRAPH_REL);
+ }
+}
+
+static void
+sort_chain_graph_rel(struct rb_root *rb_root, struct callchain_root *chain_root,
+ u64 min_hit __maybe_unused, struct callchain_param *param)
+{
+ __sort_chain_graph_rel(&chain_root->node, param->min_percent / 100.0);
+ rb_root->rb_node = chain_root->node.rb_root.rb_node;
+}
+
+int callchain_register_param(struct callchain_param *param)
+{
+ switch (param->mode) {
+ case CHAIN_GRAPH_ABS:
+ param->sort = sort_chain_graph_abs;
+ break;
+ case CHAIN_GRAPH_REL:
+ param->sort = sort_chain_graph_rel;
+ break;
+ case CHAIN_FLAT:
+ case CHAIN_FOLDED:
+ param->sort = sort_chain_flat;
+ break;
+ case CHAIN_NONE:
+ default:
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Create a child for a parent. If inherit_children, then the new child
+ * will become the new parent of it's parent children
+ */
+static struct callchain_node *
+create_child(struct callchain_node *parent, bool inherit_children)
+{
+ struct callchain_node *new;
+
+ new = zalloc(sizeof(*new));
+ if (!new) {
+ perror("not enough memory to create child for code path tree");
+ return NULL;
+ }
+ new->parent = parent;
+ INIT_LIST_HEAD(&new->val);
+ INIT_LIST_HEAD(&new->parent_val);
+
+ if (inherit_children) {
+ struct rb_node *n;
+ struct callchain_node *child;
+
+ new->rb_root_in = parent->rb_root_in;
+ parent->rb_root_in = RB_ROOT;
+
+ n = rb_first(&new->rb_root_in);
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ child->parent = new;
+ n = rb_next(n);
+ }
+
+ /* make it the first child */
+ rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
+ rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
+ }
+
+ return new;
+}
+
+
+/*
+ * Fill the node with callchain values
+ */
+static int
+fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
+{
+ struct callchain_cursor_node *cursor_node;
+
+ node->val_nr = cursor->nr - cursor->pos;
+ if (!node->val_nr)
+ pr_warning("Warning: empty node in callchain tree\n");
+
+ cursor_node = callchain_cursor_current(cursor);
+
+ while (cursor_node) {
+ struct callchain_list *call;
+
+ call = zalloc(sizeof(*call));
+ if (!call) {
+ perror("not enough memory for the code path tree");
+ return -1;
+ }
+ call->ip = cursor_node->ip;
+ call->ms.sym = cursor_node->sym;
+ call->ms.map = map__get(cursor_node->map);
+ call->srcline = cursor_node->srcline;
+
+ if (cursor_node->branch) {
+ call->branch_count = 1;
+
+ if (cursor_node->branch_from) {
+ /*
+ * branch_from is set with value somewhere else
+ * to imply it's "to" of a branch.
+ */
+ call->brtype_stat.branch_to = true;
+
+ if (cursor_node->branch_flags.predicted)
+ call->predicted_count = 1;
+
+ if (cursor_node->branch_flags.abort)
+ call->abort_count = 1;
+
+ branch_type_count(&call->brtype_stat,
+ &cursor_node->branch_flags,
+ cursor_node->branch_from,
+ cursor_node->ip);
+ } else {
+ /*
+ * It's "from" of a branch
+ */
+ call->brtype_stat.branch_to = false;
+ call->cycles_count =
+ cursor_node->branch_flags.cycles;
+ call->iter_count = cursor_node->nr_loop_iter;
+ call->iter_cycles = cursor_node->iter_cycles;
+ }
+ }
+
+ list_add_tail(&call->list, &node->val);
+
+ callchain_cursor_advance(cursor);
+ cursor_node = callchain_cursor_current(cursor);
+ }
+ return 0;
+}
+
+static struct callchain_node *
+add_child(struct callchain_node *parent,
+ struct callchain_cursor *cursor,
+ u64 period)
+{
+ struct callchain_node *new;
+
+ new = create_child(parent, false);
+ if (new == NULL)
+ return NULL;
+
+ if (fill_node(new, cursor) < 0) {
+ struct callchain_list *call, *tmp;
+
+ list_for_each_entry_safe(call, tmp, &new->val, list) {
+ list_del(&call->list);
+ map__zput(call->ms.map);
+ free(call);
+ }
+ free(new);
+ return NULL;
+ }
+
+ new->children_hit = 0;
+ new->hit = period;
+ new->children_count = 0;
+ new->count = 1;
+ return new;
+}
+
+enum match_result {
+ MATCH_ERROR = -1,
+ MATCH_EQ,
+ MATCH_LT,
+ MATCH_GT,
+};
+
+static enum match_result match_chain_strings(const char *left,
+ const char *right)
+{
+ enum match_result ret = MATCH_EQ;
+ int cmp;
+
+ if (left && right)
+ cmp = strcmp(left, right);
+ else if (!left && right)
+ cmp = 1;
+ else if (left && !right)
+ cmp = -1;
+ else
+ return MATCH_ERROR;
+
+ if (cmp != 0)
+ ret = cmp < 0 ? MATCH_LT : MATCH_GT;
+
+ return ret;
+}
+
+/*
+ * We need to always use relative addresses because we're aggregating
+ * callchains from multiple threads, i.e. different address spaces, so
+ * comparing absolute addresses make no sense as a symbol in a DSO may end up
+ * in a different address when used in a different binary or even the same
+ * binary but with some sort of address randomization technique, thus we need
+ * to compare just relative addresses. -acme
+ */
+static enum match_result match_chain_dso_addresses(struct map *left_map, u64 left_ip,
+ struct map *right_map, u64 right_ip)
+{
+ struct dso *left_dso = left_map ? left_map->dso : NULL;
+ struct dso *right_dso = right_map ? right_map->dso : NULL;
+
+ if (left_dso != right_dso)
+ return left_dso < right_dso ? MATCH_LT : MATCH_GT;
+
+ if (left_ip != right_ip)
+ return left_ip < right_ip ? MATCH_LT : MATCH_GT;
+
+ return MATCH_EQ;
+}
+
+static enum match_result match_chain(struct callchain_cursor_node *node,
+ struct callchain_list *cnode)
+{
+ enum match_result match = MATCH_ERROR;
+
+ switch (callchain_param.key) {
+ case CCKEY_SRCLINE:
+ match = match_chain_strings(cnode->srcline, node->srcline);
+ if (match != MATCH_ERROR)
+ break;
+ /* otherwise fall-back to symbol-based comparison below */
+ __fallthrough;
+ case CCKEY_FUNCTION:
+ if (node->sym && cnode->ms.sym) {
+ /*
+ * Compare inlined frames based on their symbol name
+ * because different inlined frames will have the same
+ * symbol start. Otherwise do a faster comparison based
+ * on the symbol start address.
+ */
+ if (cnode->ms.sym->inlined || node->sym->inlined) {
+ match = match_chain_strings(cnode->ms.sym->name,
+ node->sym->name);
+ if (match != MATCH_ERROR)
+ break;
+ } else {
+ match = match_chain_dso_addresses(cnode->ms.map, cnode->ms.sym->start,
+ node->map, node->sym->start);
+ break;
+ }
+ }
+ /* otherwise fall-back to IP-based comparison below */
+ __fallthrough;
+ case CCKEY_ADDRESS:
+ default:
+ match = match_chain_dso_addresses(cnode->ms.map, cnode->ip, node->map, node->ip);
+ break;
+ }
+
+ if (match == MATCH_EQ && node->branch) {
+ cnode->branch_count++;
+
+ if (node->branch_from) {
+ /*
+ * It's "to" of a branch
+ */
+ cnode->brtype_stat.branch_to = true;
+
+ if (node->branch_flags.predicted)
+ cnode->predicted_count++;
+
+ if (node->branch_flags.abort)
+ cnode->abort_count++;
+
+ branch_type_count(&cnode->brtype_stat,
+ &node->branch_flags,
+ node->branch_from,
+ node->ip);
+ } else {
+ /*
+ * It's "from" of a branch
+ */
+ cnode->brtype_stat.branch_to = false;
+ cnode->cycles_count += node->branch_flags.cycles;
+ cnode->iter_count += node->nr_loop_iter;
+ cnode->iter_cycles += node->iter_cycles;
+ cnode->from_count++;
+ }
+ }
+
+ return match;
+}
+
+/*
+ * Split the parent in two parts (a new child is created) and
+ * give a part of its callchain to the created child.
+ * Then create another child to host the given callchain of new branch
+ */
+static int
+split_add_child(struct callchain_node *parent,
+ struct callchain_cursor *cursor,
+ struct callchain_list *to_split,
+ u64 idx_parents, u64 idx_local, u64 period)
+{
+ struct callchain_node *new;
+ struct list_head *old_tail;
+ unsigned int idx_total = idx_parents + idx_local;
+
+ /* split */
+ new = create_child(parent, true);
+ if (new == NULL)
+ return -1;
+
+ /* split the callchain and move a part to the new child */
+ old_tail = parent->val.prev;
+ list_del_range(&to_split->list, old_tail);
+ new->val.next = &to_split->list;
+ new->val.prev = old_tail;
+ to_split->list.prev = &new->val;
+ old_tail->next = &new->val;
+
+ /* split the hits */
+ new->hit = parent->hit;
+ new->children_hit = parent->children_hit;
+ parent->children_hit = callchain_cumul_hits(new);
+ new->val_nr = parent->val_nr - idx_local;
+ parent->val_nr = idx_local;
+ new->count = parent->count;
+ new->children_count = parent->children_count;
+ parent->children_count = callchain_cumul_counts(new);
+
+ /* create a new child for the new branch if any */
+ if (idx_total < cursor->nr) {
+ struct callchain_node *first;
+ struct callchain_list *cnode;
+ struct callchain_cursor_node *node;
+ struct rb_node *p, **pp;
+
+ parent->hit = 0;
+ parent->children_hit += period;
+ parent->count = 0;
+ parent->children_count += 1;
+
+ node = callchain_cursor_current(cursor);
+ new = add_child(parent, cursor, period);
+ if (new == NULL)
+ return -1;
+
+ /*
+ * This is second child since we moved parent's children
+ * to new (first) child above.
+ */
+ p = parent->rb_root_in.rb_node;
+ first = rb_entry(p, struct callchain_node, rb_node_in);
+ cnode = list_first_entry(&first->val, struct callchain_list,
+ list);
+
+ if (match_chain(node, cnode) == MATCH_LT)
+ pp = &p->rb_left;
+ else
+ pp = &p->rb_right;
+
+ rb_link_node(&new->rb_node_in, p, pp);
+ rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
+ } else {
+ parent->hit = period;
+ parent->count = 1;
+ }
+ return 0;
+}
+
+static enum match_result
+append_chain(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period);
+
+static int
+append_chain_children(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period)
+{
+ struct callchain_node *rnode;
+ struct callchain_cursor_node *node;
+ struct rb_node **p = &root->rb_root_in.rb_node;
+ struct rb_node *parent = NULL;
+
+ node = callchain_cursor_current(cursor);
+ if (!node)
+ return -1;
+
+ /* lookup in childrens */
+ while (*p) {
+ enum match_result ret;
+
+ parent = *p;
+ rnode = rb_entry(parent, struct callchain_node, rb_node_in);
+
+ /* If at least first entry matches, rely to children */
+ ret = append_chain(rnode, cursor, period);
+ if (ret == MATCH_EQ)
+ goto inc_children_hit;
+ if (ret == MATCH_ERROR)
+ return -1;
+
+ if (ret == MATCH_LT)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+ /* nothing in children, add to the current node */
+ rnode = add_child(root, cursor, period);
+ if (rnode == NULL)
+ return -1;
+
+ rb_link_node(&rnode->rb_node_in, parent, p);
+ rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
+
+inc_children_hit:
+ root->children_hit += period;
+ root->children_count++;
+ return 0;
+}
+
+static enum match_result
+append_chain(struct callchain_node *root,
+ struct callchain_cursor *cursor,
+ u64 period)
+{
+ struct callchain_list *cnode;
+ u64 start = cursor->pos;
+ bool found = false;
+ u64 matches;
+ enum match_result cmp = MATCH_ERROR;
+
+ /*
+ * Lookup in the current node
+ * If we have a symbol, then compare the start to match
+ * anywhere inside a function, unless function
+ * mode is disabled.
+ */
+ list_for_each_entry(cnode, &root->val, list) {
+ struct callchain_cursor_node *node;
+
+ node = callchain_cursor_current(cursor);
+ if (!node)
+ break;
+
+ cmp = match_chain(node, cnode);
+ if (cmp != MATCH_EQ)
+ break;
+
+ found = true;
+
+ callchain_cursor_advance(cursor);
+ }
+
+ /* matches not, relay no the parent */
+ if (!found) {
+ WARN_ONCE(cmp == MATCH_ERROR, "Chain comparison error\n");
+ return cmp;
+ }
+
+ matches = cursor->pos - start;
+
+ /* we match only a part of the node. Split it and add the new chain */
+ if (matches < root->val_nr) {
+ if (split_add_child(root, cursor, cnode, start, matches,
+ period) < 0)
+ return MATCH_ERROR;
+
+ return MATCH_EQ;
+ }
+
+ /* we match 100% of the path, increment the hit */
+ if (matches == root->val_nr && cursor->pos == cursor->nr) {
+ root->hit += period;
+ root->count++;
+ return MATCH_EQ;
+ }
+
+ /* We match the node and still have a part remaining */
+ if (append_chain_children(root, cursor, period) < 0)
+ return MATCH_ERROR;
+
+ return MATCH_EQ;
+}
+
+int callchain_append(struct callchain_root *root,
+ struct callchain_cursor *cursor,
+ u64 period)
+{
+ if (!cursor->nr)
+ return 0;
+
+ callchain_cursor_commit(cursor);
+
+ if (append_chain_children(&root->node, cursor, period) < 0)
+ return -1;
+
+ if (cursor->nr > root->max_depth)
+ root->max_depth = cursor->nr;
+
+ return 0;
+}
+
+static int
+merge_chain_branch(struct callchain_cursor *cursor,
+ struct callchain_node *dst, struct callchain_node *src)
+{
+ struct callchain_cursor_node **old_last = cursor->last;
+ struct callchain_node *child;
+ struct callchain_list *list, *next_list;
+ struct rb_node *n;
+ int old_pos = cursor->nr;
+ int err = 0;
+
+ list_for_each_entry_safe(list, next_list, &src->val, list) {
+ callchain_cursor_append(cursor, list->ip,
+ list->ms.map, list->ms.sym,
+ false, NULL, 0, 0, 0, list->srcline);
+ list_del(&list->list);
+ map__zput(list->ms.map);
+ free(list);
+ }
+
+ if (src->hit) {
+ callchain_cursor_commit(cursor);
+ if (append_chain_children(dst, cursor, src->hit) < 0)
+ return -1;
+ }
+
+ n = rb_first(&src->rb_root_in);
+ while (n) {
+ child = container_of(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+ rb_erase(&child->rb_node_in, &src->rb_root_in);
+
+ err = merge_chain_branch(cursor, dst, child);
+ if (err)
+ break;
+
+ free(child);
+ }
+
+ cursor->nr = old_pos;
+ cursor->last = old_last;
+
+ return err;
+}
+
+int callchain_merge(struct callchain_cursor *cursor,
+ struct callchain_root *dst, struct callchain_root *src)
+{
+ return merge_chain_branch(cursor, &dst->node, &src->node);
+}
+
+int callchain_cursor_append(struct callchain_cursor *cursor,
+ u64 ip, struct map *map, struct symbol *sym,
+ bool branch, struct branch_flags *flags,
+ int nr_loop_iter, u64 iter_cycles, u64 branch_from,
+ const char *srcline)
+{
+ struct callchain_cursor_node *node = *cursor->last;
+
+ if (!node) {
+ node = calloc(1, sizeof(*node));
+ if (!node)
+ return -ENOMEM;
+
+ *cursor->last = node;
+ }
+
+ node->ip = ip;
+ map__zput(node->map);
+ node->map = map__get(map);
+ node->sym = sym;
+ node->branch = branch;
+ node->nr_loop_iter = nr_loop_iter;
+ node->iter_cycles = iter_cycles;
+ node->srcline = srcline;
+
+ if (flags)
+ memcpy(&node->branch_flags, flags,
+ sizeof(struct branch_flags));
+
+ node->branch_from = branch_from;
+ cursor->nr++;
+
+ cursor->last = &node->next;
+
+ return 0;
+}
+
+int sample__resolve_callchain(struct perf_sample *sample,
+ struct callchain_cursor *cursor, struct symbol **parent,
+ struct perf_evsel *evsel, struct addr_location *al,
+ int max_stack)
+{
+ if (sample->callchain == NULL && !symbol_conf.show_branchflag_count)
+ return 0;
+
+ if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain ||
+ perf_hpp_list.parent || symbol_conf.show_branchflag_count) {
+ return thread__resolve_callchain(al->thread, cursor, evsel, sample,
+ parent, al, max_stack);
+ }
+ return 0;
+}
+
+int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample)
+{
+ if ((!symbol_conf.use_callchain || sample->callchain == NULL) &&
+ !symbol_conf.show_branchflag_count)
+ return 0;
+ return callchain_append(he->callchain, &callchain_cursor, sample->period);
+}
+
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+ bool hide_unresolved)
+{
+ al->map = node->map;
+ al->sym = node->sym;
+ al->srcline = node->srcline;
+ al->addr = node->ip;
+
+ if (al->sym == NULL) {
+ if (hide_unresolved)
+ return 0;
+ if (al->map == NULL)
+ goto out;
+ }
+
+ if (al->map->groups == &al->machine->kmaps) {
+ if (machine__is_host(al->machine)) {
+ al->cpumode = PERF_RECORD_MISC_KERNEL;
+ al->level = 'k';
+ } else {
+ al->cpumode = PERF_RECORD_MISC_GUEST_KERNEL;
+ al->level = 'g';
+ }
+ } else {
+ if (machine__is_host(al->machine)) {
+ al->cpumode = PERF_RECORD_MISC_USER;
+ al->level = '.';
+ } else if (perf_guest) {
+ al->cpumode = PERF_RECORD_MISC_GUEST_USER;
+ al->level = 'u';
+ } else {
+ al->cpumode = PERF_RECORD_MISC_HYPERVISOR;
+ al->level = 'H';
+ }
+ }
+
+out:
+ return 1;
+}
+
+char *callchain_list__sym_name(struct callchain_list *cl,
+ char *bf, size_t bfsize, bool show_dso)
+{
+ bool show_addr = callchain_param.key == CCKEY_ADDRESS;
+ bool show_srcline = show_addr || callchain_param.key == CCKEY_SRCLINE;
+ int printed;
+
+ if (cl->ms.sym) {
+ const char *inlined = cl->ms.sym->inlined ? " (inlined)" : "";
+
+ if (show_srcline && cl->srcline)
+ printed = scnprintf(bf, bfsize, "%s %s%s",
+ cl->ms.sym->name, cl->srcline,
+ inlined);
+ else
+ printed = scnprintf(bf, bfsize, "%s%s",
+ cl->ms.sym->name, inlined);
+ } else
+ printed = scnprintf(bf, bfsize, "%#" PRIx64, cl->ip);
+
+ if (show_dso)
+ scnprintf(bf + printed, bfsize - printed, " %s",
+ cl->ms.map ?
+ cl->ms.map->dso->short_name :
+ "unknown");
+
+ return bf;
+}
+
+char *callchain_node__scnprintf_value(struct callchain_node *node,
+ char *bf, size_t bfsize, u64 total)
+{
+ double percent = 0.0;
+ u64 period = callchain_cumul_hits(node);
+ unsigned count = callchain_cumul_counts(node);
+
+ if (callchain_param.mode == CHAIN_FOLDED) {
+ period = node->hit;
+ count = node->count;
+ }
+
+ switch (callchain_param.value) {
+ case CCVAL_PERIOD:
+ scnprintf(bf, bfsize, "%"PRIu64, period);
+ break;
+ case CCVAL_COUNT:
+ scnprintf(bf, bfsize, "%u", count);
+ break;
+ case CCVAL_PERCENT:
+ default:
+ if (total)
+ percent = period * 100.0 / total;
+ scnprintf(bf, bfsize, "%.2f%%", percent);
+ break;
+ }
+ return bf;
+}
+
+int callchain_node__fprintf_value(struct callchain_node *node,
+ FILE *fp, u64 total)
+{
+ double percent = 0.0;
+ u64 period = callchain_cumul_hits(node);
+ unsigned count = callchain_cumul_counts(node);
+
+ if (callchain_param.mode == CHAIN_FOLDED) {
+ period = node->hit;
+ count = node->count;
+ }
+
+ switch (callchain_param.value) {
+ case CCVAL_PERIOD:
+ return fprintf(fp, "%"PRIu64, period);
+ case CCVAL_COUNT:
+ return fprintf(fp, "%u", count);
+ case CCVAL_PERCENT:
+ default:
+ if (total)
+ percent = period * 100.0 / total;
+ return percent_color_fprintf(fp, "%.2f%%", percent);
+ }
+ return 0;
+}
+
+static void callchain_counts_value(struct callchain_node *node,
+ u64 *branch_count, u64 *predicted_count,
+ u64 *abort_count, u64 *cycles_count)
+{
+ struct callchain_list *clist;
+
+ list_for_each_entry(clist, &node->val, list) {
+ if (branch_count)
+ *branch_count += clist->branch_count;
+
+ if (predicted_count)
+ *predicted_count += clist->predicted_count;
+
+ if (abort_count)
+ *abort_count += clist->abort_count;
+
+ if (cycles_count)
+ *cycles_count += clist->cycles_count;
+ }
+}
+
+static int callchain_node_branch_counts_cumul(struct callchain_node *node,
+ u64 *branch_count,
+ u64 *predicted_count,
+ u64 *abort_count,
+ u64 *cycles_count)
+{
+ struct callchain_node *child;
+ struct rb_node *n;
+
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+
+ callchain_node_branch_counts_cumul(child, branch_count,
+ predicted_count,
+ abort_count,
+ cycles_count);
+
+ callchain_counts_value(child, branch_count,
+ predicted_count, abort_count,
+ cycles_count);
+ }
+
+ return 0;
+}
+
+int callchain_branch_counts(struct callchain_root *root,
+ u64 *branch_count, u64 *predicted_count,
+ u64 *abort_count, u64 *cycles_count)
+{
+ if (branch_count)
+ *branch_count = 0;
+
+ if (predicted_count)
+ *predicted_count = 0;
+
+ if (abort_count)
+ *abort_count = 0;
+
+ if (cycles_count)
+ *cycles_count = 0;
+
+ return callchain_node_branch_counts_cumul(&root->node,
+ branch_count,
+ predicted_count,
+ abort_count,
+ cycles_count);
+}
+
+static int count_pri64_printf(int idx, const char *str, u64 value, char *bf, int bfsize)
+{
+ int printed;
+
+ printed = scnprintf(bf, bfsize, "%s%s:%" PRId64 "", (idx) ? " " : " (", str, value);
+
+ return printed;
+}
+
+static int count_float_printf(int idx, const char *str, float value,
+ char *bf, int bfsize, float threshold)
+{
+ int printed;
+
+ if (threshold != 0.0 && value < threshold)
+ return 0;
+
+ printed = scnprintf(bf, bfsize, "%s%s:%.1f%%", (idx) ? " " : " (", str, value);
+
+ return printed;
+}
+
+static int branch_to_str(char *bf, int bfsize,
+ u64 branch_count, u64 predicted_count,
+ u64 abort_count,
+ struct branch_type_stat *brtype_stat)
+{
+ int printed, i = 0;
+
+ printed = branch_type_str(brtype_stat, bf, bfsize);
+ if (printed)
+ i++;
+
+ if (predicted_count < branch_count) {
+ printed += count_float_printf(i++, "predicted",
+ predicted_count * 100.0 / branch_count,
+ bf + printed, bfsize - printed, 0.0);
+ }
+
+ if (abort_count) {
+ printed += count_float_printf(i++, "abort",
+ abort_count * 100.0 / branch_count,
+ bf + printed, bfsize - printed, 0.1);
+ }
+
+ if (i)
+ printed += scnprintf(bf + printed, bfsize - printed, ")");
+
+ return printed;
+}
+
+static int branch_from_str(char *bf, int bfsize,
+ u64 branch_count,
+ u64 cycles_count, u64 iter_count,
+ u64 iter_cycles, u64 from_count)
+{
+ int printed = 0, i = 0;
+ u64 cycles, v = 0;
+
+ cycles = cycles_count / branch_count;
+ if (cycles) {
+ printed += count_pri64_printf(i++, "cycles",
+ cycles,
+ bf + printed, bfsize - printed);
+ }
+
+ if (iter_count && from_count) {
+ v = iter_count / from_count;
+ if (v) {
+ printed += count_pri64_printf(i++, "iter",
+ v, bf + printed, bfsize - printed);
+
+ printed += count_pri64_printf(i++, "avg_cycles",
+ iter_cycles / iter_count,
+ bf + printed, bfsize - printed);
+ }
+ }
+
+ if (i)
+ printed += scnprintf(bf + printed, bfsize - printed, ")");
+
+ return printed;
+}
+
+static int counts_str_build(char *bf, int bfsize,
+ u64 branch_count, u64 predicted_count,
+ u64 abort_count, u64 cycles_count,
+ u64 iter_count, u64 iter_cycles,
+ u64 from_count,
+ struct branch_type_stat *brtype_stat)
+{
+ int printed;
+
+ if (branch_count == 0)
+ return scnprintf(bf, bfsize, " (calltrace)");
+
+ if (brtype_stat->branch_to) {
+ printed = branch_to_str(bf, bfsize, branch_count,
+ predicted_count, abort_count, brtype_stat);
+ } else {
+ printed = branch_from_str(bf, bfsize, branch_count,
+ cycles_count, iter_count, iter_cycles,
+ from_count);
+ }
+
+ if (!printed)
+ bf[0] = 0;
+
+ return printed;
+}
+
+static int callchain_counts_printf(FILE *fp, char *bf, int bfsize,
+ u64 branch_count, u64 predicted_count,
+ u64 abort_count, u64 cycles_count,
+ u64 iter_count, u64 iter_cycles,
+ u64 from_count,
+ struct branch_type_stat *brtype_stat)
+{
+ char str[256];
+
+ counts_str_build(str, sizeof(str), branch_count,
+ predicted_count, abort_count, cycles_count,
+ iter_count, iter_cycles, from_count, brtype_stat);
+
+ if (fp)
+ return fprintf(fp, "%s", str);
+
+ return scnprintf(bf, bfsize, "%s", str);
+}
+
+int callchain_list_counts__printf_value(struct callchain_list *clist,
+ FILE *fp, char *bf, int bfsize)
+{
+ u64 branch_count, predicted_count;
+ u64 abort_count, cycles_count;
+ u64 iter_count, iter_cycles;
+ u64 from_count;
+
+ branch_count = clist->branch_count;
+ predicted_count = clist->predicted_count;
+ abort_count = clist->abort_count;
+ cycles_count = clist->cycles_count;
+ iter_count = clist->iter_count;
+ iter_cycles = clist->iter_cycles;
+ from_count = clist->from_count;
+
+ return callchain_counts_printf(fp, bf, bfsize, branch_count,
+ predicted_count, abort_count,
+ cycles_count, iter_count, iter_cycles,
+ from_count, &clist->brtype_stat);
+}
+
+static void free_callchain_node(struct callchain_node *node)
+{
+ struct callchain_list *list, *tmp;
+ struct callchain_node *child;
+ struct rb_node *n;
+
+ list_for_each_entry_safe(list, tmp, &node->parent_val, list) {
+ list_del(&list->list);
+ map__zput(list->ms.map);
+ free(list);
+ }
+
+ list_for_each_entry_safe(list, tmp, &node->val, list) {
+ list_del(&list->list);
+ map__zput(list->ms.map);
+ free(list);
+ }
+
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = container_of(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+ rb_erase(&child->rb_node_in, &node->rb_root_in);
+
+ free_callchain_node(child);
+ free(child);
+ }
+}
+
+void free_callchain(struct callchain_root *root)
+{
+ if (!symbol_conf.use_callchain)
+ return;
+
+ free_callchain_node(&root->node);
+}
+
+static u64 decay_callchain_node(struct callchain_node *node)
+{
+ struct callchain_node *child;
+ struct rb_node *n;
+ u64 child_hits = 0;
+
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = container_of(n, struct callchain_node, rb_node_in);
+
+ child_hits += decay_callchain_node(child);
+ n = rb_next(n);
+ }
+
+ node->hit = (node->hit * 7) / 8;
+ node->children_hit = child_hits;
+
+ return node->hit;
+}
+
+void decay_callchain(struct callchain_root *root)
+{
+ if (!symbol_conf.use_callchain)
+ return;
+
+ decay_callchain_node(&root->node);
+}
+
+int callchain_node__make_parent_list(struct callchain_node *node)
+{
+ struct callchain_node *parent = node->parent;
+ struct callchain_list *chain, *new;
+ LIST_HEAD(head);
+
+ while (parent) {
+ list_for_each_entry_reverse(chain, &parent->val, list) {
+ new = malloc(sizeof(*new));
+ if (new == NULL)
+ goto out;
+ *new = *chain;
+ new->has_children = false;
+ map__get(new->ms.map);
+ list_add_tail(&new->list, &head);
+ }
+ parent = parent->parent;
+ }
+
+ list_for_each_entry_safe_reverse(chain, new, &head, list)
+ list_move_tail(&chain->list, &node->parent_val);
+
+ if (!list_empty(&node->parent_val)) {
+ chain = list_first_entry(&node->parent_val, struct callchain_list, list);
+ chain->has_children = rb_prev(&node->rb_node) || rb_next(&node->rb_node);
+
+ chain = list_first_entry(&node->val, struct callchain_list, list);
+ chain->has_children = false;
+ }
+ return 0;
+
+out:
+ list_for_each_entry_safe(chain, new, &head, list) {
+ list_del(&chain->list);
+ map__zput(chain->ms.map);
+ free(chain);
+ }
+ return -ENOMEM;
+}
+
+int callchain_cursor__copy(struct callchain_cursor *dst,
+ struct callchain_cursor *src)
+{
+ int rc = 0;
+
+ callchain_cursor_reset(dst);
+ callchain_cursor_commit(src);
+
+ while (true) {
+ struct callchain_cursor_node *node;
+
+ node = callchain_cursor_current(src);
+ if (node == NULL)
+ break;
+
+ rc = callchain_cursor_append(dst, node->ip, node->map, node->sym,
+ node->branch, &node->branch_flags,
+ node->nr_loop_iter,
+ node->iter_cycles,
+ node->branch_from, node->srcline);
+ if (rc)
+ break;
+
+ callchain_cursor_advance(src);
+ }
+
+ return rc;
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
new file mode 100644
index 000000000..99d38ac01
--- /dev/null
+++ b/tools/perf/util/callchain.h
@@ -0,0 +1,302 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CALLCHAIN_H
+#define __PERF_CALLCHAIN_H
+
+#include "../perf.h"
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include "event.h"
+#include "map.h"
+#include "symbol.h"
+#include "branch.h"
+
+#define HELP_PAD "\t\t\t\t"
+
+#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace):\n\n"
+
+# define RECORD_MODE_HELP HELP_PAD "record_mode:\tcall graph recording mode (fp|dwarf|lbr)\n"
+
+#define RECORD_SIZE_HELP \
+ HELP_PAD "record_size:\tif record_mode is 'dwarf', max size of stack recording (<bytes>)\n" \
+ HELP_PAD "\t\tdefault: 8192 (bytes)\n"
+
+#define CALLCHAIN_RECORD_HELP CALLCHAIN_HELP RECORD_MODE_HELP RECORD_SIZE_HELP
+
+#define CALLCHAIN_REPORT_HELP \
+ HELP_PAD "print_type:\tcall graph printing style (graph|flat|fractal|folded|none)\n" \
+ HELP_PAD "threshold:\tminimum call graph inclusion threshold (<percent>)\n" \
+ HELP_PAD "print_limit:\tmaximum number of call graph entry (<number>)\n" \
+ HELP_PAD "order:\t\tcall graph order (caller|callee)\n" \
+ HELP_PAD "sort_key:\tcall graph sort key (function|address)\n" \
+ HELP_PAD "branch:\t\tinclude last branch info to call graph (branch)\n" \
+ HELP_PAD "value:\t\tcall graph value (percent|period|count)\n"
+
+enum perf_call_graph_mode {
+ CALLCHAIN_NONE,
+ CALLCHAIN_FP,
+ CALLCHAIN_DWARF,
+ CALLCHAIN_LBR,
+ CALLCHAIN_MAX
+};
+
+enum chain_mode {
+ CHAIN_NONE,
+ CHAIN_FLAT,
+ CHAIN_GRAPH_ABS,
+ CHAIN_GRAPH_REL,
+ CHAIN_FOLDED,
+};
+
+enum chain_order {
+ ORDER_CALLER,
+ ORDER_CALLEE
+};
+
+struct callchain_node {
+ struct callchain_node *parent;
+ struct list_head val;
+ struct list_head parent_val;
+ struct rb_node rb_node_in; /* to insert nodes in an rbtree */
+ struct rb_node rb_node; /* to sort nodes in an output tree */
+ struct rb_root rb_root_in; /* input tree of children */
+ struct rb_root rb_root; /* sorted output tree of children */
+ unsigned int val_nr;
+ unsigned int count;
+ unsigned int children_count;
+ u64 hit;
+ u64 children_hit;
+};
+
+struct callchain_root {
+ u64 max_depth;
+ struct callchain_node node;
+};
+
+struct callchain_param;
+
+typedef void (*sort_chain_func_t)(struct rb_root *, struct callchain_root *,
+ u64, struct callchain_param *);
+
+enum chain_key {
+ CCKEY_FUNCTION,
+ CCKEY_ADDRESS,
+ CCKEY_SRCLINE
+};
+
+enum chain_value {
+ CCVAL_PERCENT,
+ CCVAL_PERIOD,
+ CCVAL_COUNT,
+};
+
+extern bool dwarf_callchain_users;
+
+struct callchain_param {
+ bool enabled;
+ enum perf_call_graph_mode record_mode;
+ u32 dump_size;
+ enum chain_mode mode;
+ u16 max_stack;
+ u32 print_limit;
+ double min_percent;
+ sort_chain_func_t sort;
+ enum chain_order order;
+ bool order_set;
+ enum chain_key key;
+ bool branch_callstack;
+ enum chain_value value;
+};
+
+extern struct callchain_param callchain_param;
+extern struct callchain_param callchain_param_default;
+
+struct callchain_list {
+ u64 ip;
+ struct map_symbol ms;
+ struct /* for TUI */ {
+ bool unfolded;
+ bool has_children;
+ };
+ u64 branch_count;
+ u64 from_count;
+ u64 predicted_count;
+ u64 abort_count;
+ u64 cycles_count;
+ u64 iter_count;
+ u64 iter_cycles;
+ struct branch_type_stat brtype_stat;
+ const char *srcline;
+ struct list_head list;
+};
+
+/*
+ * A callchain cursor is a single linked list that
+ * let one feed a callchain progressively.
+ * It keeps persistent allocated entries to minimize
+ * allocations.
+ */
+struct callchain_cursor_node {
+ u64 ip;
+ struct map *map;
+ struct symbol *sym;
+ const char *srcline;
+ bool branch;
+ struct branch_flags branch_flags;
+ u64 branch_from;
+ int nr_loop_iter;
+ u64 iter_cycles;
+ struct callchain_cursor_node *next;
+};
+
+struct callchain_cursor {
+ u64 nr;
+ struct callchain_cursor_node *first;
+ struct callchain_cursor_node **last;
+ u64 pos;
+ struct callchain_cursor_node *curr;
+};
+
+extern __thread struct callchain_cursor callchain_cursor;
+
+static inline void callchain_init(struct callchain_root *root)
+{
+ INIT_LIST_HEAD(&root->node.val);
+ INIT_LIST_HEAD(&root->node.parent_val);
+
+ root->node.parent = NULL;
+ root->node.hit = 0;
+ root->node.children_hit = 0;
+ root->node.rb_root_in = RB_ROOT;
+ root->max_depth = 0;
+}
+
+static inline u64 callchain_cumul_hits(struct callchain_node *node)
+{
+ return node->hit + node->children_hit;
+}
+
+static inline unsigned callchain_cumul_counts(struct callchain_node *node)
+{
+ return node->count + node->children_count;
+}
+
+int callchain_register_param(struct callchain_param *param);
+int callchain_append(struct callchain_root *root,
+ struct callchain_cursor *cursor,
+ u64 period);
+
+int callchain_merge(struct callchain_cursor *cursor,
+ struct callchain_root *dst, struct callchain_root *src);
+
+/*
+ * Initialize a cursor before adding entries inside, but keep
+ * the previously allocated entries as a cache.
+ */
+static inline void callchain_cursor_reset(struct callchain_cursor *cursor)
+{
+ struct callchain_cursor_node *node;
+
+ cursor->nr = 0;
+ cursor->last = &cursor->first;
+
+ for (node = cursor->first; node != NULL; node = node->next)
+ map__zput(node->map);
+}
+
+int callchain_cursor_append(struct callchain_cursor *cursor, u64 ip,
+ struct map *map, struct symbol *sym,
+ bool branch, struct branch_flags *flags,
+ int nr_loop_iter, u64 iter_cycles, u64 branch_from,
+ const char *srcline);
+
+/* Close a cursor writing session. Initialize for the reader */
+static inline void callchain_cursor_commit(struct callchain_cursor *cursor)
+{
+ cursor->curr = cursor->first;
+ cursor->pos = 0;
+}
+
+/* Cursor reading iteration helpers */
+static inline struct callchain_cursor_node *
+callchain_cursor_current(struct callchain_cursor *cursor)
+{
+ if (cursor->pos == cursor->nr)
+ return NULL;
+
+ return cursor->curr;
+}
+
+static inline void callchain_cursor_advance(struct callchain_cursor *cursor)
+{
+ cursor->curr = cursor->curr->next;
+ cursor->pos++;
+}
+
+int callchain_cursor__copy(struct callchain_cursor *dst,
+ struct callchain_cursor *src);
+
+struct option;
+struct hist_entry;
+
+int record_parse_callchain_opt(const struct option *opt, const char *arg, int unset);
+int record_callchain_opt(const struct option *opt, const char *arg, int unset);
+
+struct record_opts;
+
+int record_opts__parse_callchain(struct record_opts *record,
+ struct callchain_param *callchain,
+ const char *arg, bool unset);
+
+int sample__resolve_callchain(struct perf_sample *sample,
+ struct callchain_cursor *cursor, struct symbol **parent,
+ struct perf_evsel *evsel, struct addr_location *al,
+ int max_stack);
+int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *sample);
+int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
+ bool hide_unresolved);
+
+extern const char record_callchain_help[];
+int parse_callchain_record(const char *arg, struct callchain_param *param);
+int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
+int parse_callchain_report_opt(const char *arg);
+int parse_callchain_top_opt(const char *arg);
+int perf_callchain_config(const char *var, const char *value);
+
+static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
+ struct callchain_cursor *src)
+{
+ *dest = *src;
+
+ dest->first = src->curr;
+ dest->nr -= src->pos;
+}
+
+#ifdef HAVE_SKIP_CALLCHAIN_IDX
+int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain);
+#else
+static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
+ struct ip_callchain *chain __maybe_unused)
+{
+ return -1;
+}
+#endif
+
+char *callchain_list__sym_name(struct callchain_list *cl,
+ char *bf, size_t bfsize, bool show_dso);
+char *callchain_node__scnprintf_value(struct callchain_node *node,
+ char *bf, size_t bfsize, u64 total);
+int callchain_node__fprintf_value(struct callchain_node *node,
+ FILE *fp, u64 total);
+
+int callchain_list_counts__printf_value(struct callchain_list *clist,
+ FILE *fp, char *bf, int bfsize);
+
+void free_callchain(struct callchain_root *root);
+void decay_callchain(struct callchain_root *root);
+int callchain_node__make_parent_list(struct callchain_node *node);
+
+int callchain_branch_counts(struct callchain_root *root,
+ u64 *branch_count, u64 *predicted_count,
+ u64 *abort_count, u64 *cycles_count);
+
+#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
new file mode 100644
index 000000000..ccd02634a
--- /dev/null
+++ b/tools/perf/util/cgroup.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include "../perf.h"
+#include <subcmd/parse-options.h>
+#include "evsel.h"
+#include "cgroup.h"
+#include "evlist.h"
+#include <linux/stringify.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int nr_cgroups;
+
+static int
+cgroupfs_find_mountpoint(char *buf, size_t maxlen)
+{
+ FILE *fp;
+ char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
+ char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
+ char *token, *saved_ptr = NULL;
+
+ fp = fopen("/proc/mounts", "r");
+ if (!fp)
+ return -1;
+
+ /*
+ * in order to handle split hierarchy, we need to scan /proc/mounts
+ * and inspect every cgroupfs mount point to find one that has
+ * perf_event subsystem
+ */
+ path_v1[0] = '\0';
+ path_v2[0] = '\0';
+
+ while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %"
+ __stringify(PATH_MAX)"s %*d %*d\n",
+ mountpoint, type, tokens) == 3) {
+
+ if (!path_v1[0] && !strcmp(type, "cgroup")) {
+
+ token = strtok_r(tokens, ",", &saved_ptr);
+
+ while (token != NULL) {
+ if (!strcmp(token, "perf_event")) {
+ strcpy(path_v1, mountpoint);
+ break;
+ }
+ token = strtok_r(NULL, ",", &saved_ptr);
+ }
+ }
+
+ if (!path_v2[0] && !strcmp(type, "cgroup2"))
+ strcpy(path_v2, mountpoint);
+
+ if (path_v1[0] && path_v2[0])
+ break;
+ }
+ fclose(fp);
+
+ if (path_v1[0])
+ path = path_v1;
+ else if (path_v2[0])
+ path = path_v2;
+ else
+ return -1;
+
+ if (strlen(path) < maxlen) {
+ strcpy(buf, path);
+ return 0;
+ }
+ return -1;
+}
+
+static int open_cgroup(const char *name)
+{
+ char path[PATH_MAX + 1];
+ char mnt[PATH_MAX + 1];
+ int fd;
+
+
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+ return -1;
+
+ scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ fprintf(stderr, "no access to cgroup %s\n", path);
+
+ return fd;
+}
+
+static struct cgroup *evlist__find_cgroup(struct perf_evlist *evlist, const char *str)
+{
+ struct perf_evsel *counter;
+ /*
+ * check if cgrp is already defined, if so we reuse it
+ */
+ evlist__for_each_entry(evlist, counter) {
+ if (!counter->cgrp)
+ continue;
+ if (!strcmp(counter->cgrp->name, str))
+ return cgroup__get(counter->cgrp);
+ }
+
+ return NULL;
+}
+
+static struct cgroup *cgroup__new(const char *name)
+{
+ struct cgroup *cgroup = zalloc(sizeof(*cgroup));
+
+ if (cgroup != NULL) {
+ refcount_set(&cgroup->refcnt, 1);
+
+ cgroup->name = strdup(name);
+ if (!cgroup->name)
+ goto out_err;
+ cgroup->fd = open_cgroup(name);
+ if (cgroup->fd == -1)
+ goto out_free_name;
+ }
+
+ return cgroup;
+
+out_free_name:
+ free(cgroup->name);
+out_err:
+ free(cgroup);
+ return NULL;
+}
+
+struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name)
+{
+ struct cgroup *cgroup = evlist__find_cgroup(evlist, name);
+
+ return cgroup ?: cgroup__new(name);
+}
+
+static int add_cgroup(struct perf_evlist *evlist, const char *str)
+{
+ struct perf_evsel *counter;
+ struct cgroup *cgrp = evlist__findnew_cgroup(evlist, str);
+ int n;
+
+ if (!cgrp)
+ return -1;
+ /*
+ * find corresponding event
+ * if add cgroup N, then need to find event N
+ */
+ n = 0;
+ evlist__for_each_entry(evlist, counter) {
+ if (n == nr_cgroups)
+ goto found;
+ n++;
+ }
+
+ cgroup__put(cgrp);
+ return -1;
+found:
+ counter->cgrp = cgrp;
+ return 0;
+}
+
+static void cgroup__delete(struct cgroup *cgroup)
+{
+ close(cgroup->fd);
+ zfree(&cgroup->name);
+ free(cgroup);
+}
+
+void cgroup__put(struct cgroup *cgrp)
+{
+ if (cgrp && refcount_dec_and_test(&cgrp->refcnt)) {
+ cgroup__delete(cgrp);
+ }
+}
+
+struct cgroup *cgroup__get(struct cgroup *cgroup)
+{
+ if (cgroup)
+ refcount_inc(&cgroup->refcnt);
+ return cgroup;
+}
+
+static void evsel__set_default_cgroup(struct perf_evsel *evsel, struct cgroup *cgroup)
+{
+ if (evsel->cgrp == NULL)
+ evsel->cgrp = cgroup__get(cgroup);
+}
+
+void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ evsel__set_default_cgroup(evsel, cgroup);
+}
+
+int parse_cgroups(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+ struct perf_evsel *counter;
+ struct cgroup *cgrp = NULL;
+ const char *p, *e, *eos = str + strlen(str);
+ char *s;
+ int ret, i;
+
+ if (list_empty(&evlist->entries)) {
+ fprintf(stderr, "must define events before cgroups\n");
+ return -1;
+ }
+
+ for (;;) {
+ p = strchr(str, ',');
+ e = p ? p : eos;
+
+ /* allow empty cgroups, i.e., skip */
+ if (e - str) {
+ /* termination added */
+ s = strndup(str, e - str);
+ if (!s)
+ return -1;
+ ret = add_cgroup(evlist, s);
+ free(s);
+ if (ret)
+ return -1;
+ }
+ /* nr_cgroups is increased een for empty cgroups */
+ nr_cgroups++;
+ if (!p)
+ break;
+ str = p+1;
+ }
+ /* for the case one cgroup combine to multiple events */
+ i = 0;
+ if (nr_cgroups == 1) {
+ evlist__for_each_entry(evlist, counter) {
+ if (i == 0)
+ cgrp = counter->cgrp;
+ else {
+ counter->cgrp = cgrp;
+ refcount_inc(&cgrp->refcnt);
+ }
+ i++;
+ }
+ }
+ return 0;
+}
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
new file mode 100644
index 000000000..f033a80c1
--- /dev/null
+++ b/tools/perf/util/cgroup.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CGROUP_H__
+#define __CGROUP_H__
+
+#include <linux/refcount.h>
+
+struct option;
+
+struct cgroup {
+ char *name;
+ int fd;
+ refcount_t refcnt;
+};
+
+
+extern int nr_cgroups; /* number of explicit cgroups defined */
+
+struct cgroup *cgroup__get(struct cgroup *cgroup);
+void cgroup__put(struct cgroup *cgroup);
+
+struct perf_evlist;
+
+struct cgroup *evlist__findnew_cgroup(struct perf_evlist *evlist, const char *name);
+
+void evlist__set_default_cgroup(struct perf_evlist *evlist, struct cgroup *cgroup);
+
+int parse_cgroups(const struct option *opt, const char *str, int unset);
+
+#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c
new file mode 100644
index 000000000..ca0fff627
--- /dev/null
+++ b/tools/perf/util/cloexec.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <sched.h>
+#include "util.h"
+#include "../perf.h"
+#include "cloexec.h"
+#include "asm/bug.h"
+#include "debug.h"
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <sys/syscall.h>
+
+static unsigned long flag = PERF_FLAG_FD_CLOEXEC;
+
+int __weak sched_getcpu(void)
+{
+#ifdef __NR_getcpu
+ unsigned cpu;
+ int err = syscall(__NR_getcpu, &cpu, NULL, NULL);
+ if (!err)
+ return cpu;
+#else
+ errno = ENOSYS;
+#endif
+ return -1;
+}
+
+static int perf_flag_probe(void)
+{
+ /* use 'safest' configuration as used in perf_evsel__fallback() */
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_CPU_CLOCK,
+ .exclude_kernel = 1,
+ };
+ int fd;
+ int err;
+ int cpu;
+ pid_t pid = -1;
+ char sbuf[STRERR_BUFSIZE];
+
+ cpu = sched_getcpu();
+ if (cpu < 0)
+ cpu = 0;
+
+ /*
+ * Using -1 for the pid is a workaround to avoid gratuitous jump label
+ * changes.
+ */
+ while (1) {
+ /* check cloexec flag */
+ fd = sys_perf_event_open(&attr, pid, cpu, -1,
+ PERF_FLAG_FD_CLOEXEC);
+ if (fd < 0 && pid == -1 && errno == EACCES) {
+ pid = 0;
+ continue;
+ }
+ break;
+ }
+ err = errno;
+
+ if (fd >= 0) {
+ close(fd);
+ return 1;
+ }
+
+ WARN_ONCE(err != EINVAL && err != EBUSY,
+ "perf_event_open(..., PERF_FLAG_FD_CLOEXEC) failed with unexpected error %d (%s)\n",
+ err, str_error_r(err, sbuf, sizeof(sbuf)));
+
+ /* not supported, confirm error related to PERF_FLAG_FD_CLOEXEC */
+ while (1) {
+ fd = sys_perf_event_open(&attr, pid, cpu, -1, 0);
+ if (fd < 0 && pid == -1 && errno == EACCES) {
+ pid = 0;
+ continue;
+ }
+ break;
+ }
+ err = errno;
+
+ if (fd >= 0)
+ close(fd);
+
+ if (WARN_ONCE(fd < 0 && err != EBUSY,
+ "perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n",
+ err, str_error_r(err, sbuf, sizeof(sbuf))))
+ return -1;
+
+ return 0;
+}
+
+unsigned long perf_event_open_cloexec_flag(void)
+{
+ static bool probed;
+
+ if (!probed) {
+ if (perf_flag_probe() <= 0)
+ flag = 0;
+ probed = true;
+ }
+
+ return flag;
+}
diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h
new file mode 100644
index 000000000..78216b101
--- /dev/null
+++ b/tools/perf/util/cloexec.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CLOEXEC_H
+#define __PERF_CLOEXEC_H
+
+unsigned long perf_event_open_cloexec_flag(void);
+
+#endif /* __PERF_CLOEXEC_H */
diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c
new file mode 100644
index 000000000..39e628b89
--- /dev/null
+++ b/tools/perf/util/color.c
@@ -0,0 +1,222 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/kernel.h>
+#include "cache.h"
+#include "config.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include "color.h"
+#include <math.h>
+#include <unistd.h>
+
+int perf_use_color_default = -1;
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty)
+{
+ if (value) {
+ if (!strcasecmp(value, "never"))
+ return 0;
+ if (!strcasecmp(value, "always"))
+ return 1;
+ if (!strcasecmp(value, "auto"))
+ goto auto_color;
+ }
+
+ /* Missing or explicit false to turn off colorization */
+ if (!perf_config_bool(var, value))
+ return 0;
+
+ /* any normal truth value defaults to 'auto' */
+ auto_color:
+ if (stdout_is_tty < 0)
+ stdout_is_tty = isatty(1);
+ if (stdout_is_tty || pager_in_use()) {
+ char *term = getenv("TERM");
+ if (term && strcmp(term, "dumb"))
+ return 1;
+ }
+ return 0;
+}
+
+int perf_color_default_config(const char *var, const char *value,
+ void *cb __maybe_unused)
+{
+ if (!strcmp(var, "color.ui")) {
+ perf_use_color_default = perf_config_colorbool(var, value, -1);
+ return 0;
+ }
+
+ return 0;
+}
+
+static int __color_vsnprintf(char *bf, size_t size, const char *color,
+ const char *fmt, va_list args, const char *trail)
+{
+ int r = 0;
+
+ /*
+ * Auto-detect:
+ */
+ if (perf_use_color_default < 0) {
+ if (isatty(1) || pager_in_use())
+ perf_use_color_default = 1;
+ else
+ perf_use_color_default = 0;
+ }
+
+ if (perf_use_color_default && *color)
+ r += scnprintf(bf, size, "%s", color);
+ r += vscnprintf(bf + r, size - r, fmt, args);
+ if (perf_use_color_default && *color)
+ r += scnprintf(bf + r, size - r, "%s", PERF_COLOR_RESET);
+ if (trail)
+ r += scnprintf(bf + r, size - r, "%s", trail);
+ return r;
+}
+
+/* Colors are not included in return value */
+static int __color_vfprintf(FILE *fp, const char *color, const char *fmt,
+ va_list args)
+{
+ int r = 0;
+
+ /*
+ * Auto-detect:
+ */
+ if (perf_use_color_default < 0) {
+ if (isatty(fileno(fp)) || pager_in_use())
+ perf_use_color_default = 1;
+ else
+ perf_use_color_default = 0;
+ }
+
+ if (perf_use_color_default && *color)
+ fprintf(fp, "%s", color);
+ r += vfprintf(fp, fmt, args);
+ if (perf_use_color_default && *color)
+ fprintf(fp, "%s", PERF_COLOR_RESET);
+ return r;
+}
+
+int color_vsnprintf(char *bf, size_t size, const char *color,
+ const char *fmt, va_list args)
+{
+ return __color_vsnprintf(bf, size, color, fmt, args, NULL);
+}
+
+int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args)
+{
+ return __color_vfprintf(fp, color, fmt, args);
+}
+
+int color_snprintf(char *bf, size_t size, const char *color,
+ const char *fmt, ...)
+{
+ va_list args;
+ int r;
+
+ va_start(args, fmt);
+ r = color_vsnprintf(bf, size, color, fmt, args);
+ va_end(args);
+ return r;
+}
+
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...)
+{
+ va_list args;
+ int r;
+
+ va_start(args, fmt);
+ r = color_vfprintf(fp, color, fmt, args);
+ va_end(args);
+ return r;
+}
+
+/*
+ * This function splits the buffer by newlines and colors the lines individually.
+ *
+ * Returns 0 on success.
+ */
+int color_fwrite_lines(FILE *fp, const char *color,
+ size_t count, const char *buf)
+{
+ if (!*color)
+ return fwrite(buf, count, 1, fp) != 1;
+
+ while (count) {
+ char *p = memchr(buf, '\n', count);
+
+ if (p != buf && (fputs(color, fp) < 0 ||
+ fwrite(buf, p ? (size_t)(p - buf) : count, 1, fp) != 1 ||
+ fputs(PERF_COLOR_RESET, fp) < 0))
+ return -1;
+ if (!p)
+ return 0;
+ if (fputc('\n', fp) < 0)
+ return -1;
+ count -= p + 1 - buf;
+ buf = p + 1;
+ }
+ return 0;
+}
+
+const char *get_percent_color(double percent)
+{
+ const char *color = PERF_COLOR_NORMAL;
+
+ /*
+ * We color high-overhead entries in red, mid-overhead
+ * entries in green - and keep the low overhead places
+ * normal:
+ */
+ if (fabs(percent) >= MIN_RED)
+ color = PERF_COLOR_RED;
+ else {
+ if (fabs(percent) > MIN_GREEN)
+ color = PERF_COLOR_GREEN;
+ }
+ return color;
+}
+
+int percent_color_fprintf(FILE *fp, const char *fmt, double percent)
+{
+ int r;
+ const char *color;
+
+ color = get_percent_color(percent);
+ r = color_fprintf(fp, color, fmt, percent);
+
+ return r;
+}
+
+int value_color_snprintf(char *bf, size_t size, const char *fmt, double value)
+{
+ const char *color = get_percent_color(value);
+ return color_snprintf(bf, size, color, fmt, value);
+}
+
+int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ double percent;
+
+ va_start(args, fmt);
+ percent = va_arg(args, double);
+ va_end(args);
+ return value_color_snprintf(bf, size, fmt, percent);
+}
+
+int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...)
+{
+ va_list args;
+ int len;
+ double percent;
+ const char *color;
+
+ va_start(args, fmt);
+ len = va_arg(args, int);
+ percent = va_arg(args, double);
+ va_end(args);
+
+ color = get_percent_color(percent);
+ return color_snprintf(bf, size, color, fmt, len, percent);
+}
diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h
new file mode 100644
index 000000000..22777b181
--- /dev/null
+++ b/tools/perf/util/color.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_COLOR_H
+#define __PERF_COLOR_H
+
+#include <stdio.h>
+
+/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */
+#define COLOR_MAXLEN 24
+
+#define PERF_COLOR_NORMAL ""
+#define PERF_COLOR_RESET "\033[m"
+#define PERF_COLOR_BOLD "\033[1m"
+#define PERF_COLOR_RED "\033[31m"
+#define PERF_COLOR_GREEN "\033[32m"
+#define PERF_COLOR_YELLOW "\033[33m"
+#define PERF_COLOR_BLUE "\033[34m"
+#define PERF_COLOR_MAGENTA "\033[35m"
+#define PERF_COLOR_CYAN "\033[36m"
+#define PERF_COLOR_BG_RED "\033[41m"
+
+#define MIN_GREEN 0.5
+#define MIN_RED 5.0
+
+/*
+ * This variable stores the value of color.ui
+ */
+extern int perf_use_color_default;
+
+
+/*
+ * Use this instead of perf_default_config if you need the value of color.ui.
+ */
+int perf_color_default_config(const char *var, const char *value, void *cb);
+
+int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty);
+int color_vsnprintf(char *bf, size_t size, const char *color,
+ const char *fmt, va_list args);
+int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args);
+int color_fprintf(FILE *fp, const char *color, const char *fmt, ...);
+int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...);
+int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf);
+int value_color_snprintf(char *bf, size_t size, const char *fmt, double value);
+int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...);
+int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...);
+int percent_color_fprintf(FILE *fp, const char *fmt, double percent);
+const char *get_percent_color(double percent);
+
+#endif /* __PERF_COLOR_H */
diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
new file mode 100644
index 000000000..31279a7bd
--- /dev/null
+++ b/tools/perf/util/comm.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "comm.h"
+#include "util.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/refcount.h>
+#include "rwsem.h"
+
+struct comm_str {
+ char *str;
+ struct rb_node rb_node;
+ refcount_t refcnt;
+};
+
+/* Should perhaps be moved to struct machine */
+static struct rb_root comm_str_root;
+static struct rw_semaphore comm_str_lock = {.lock = PTHREAD_RWLOCK_INITIALIZER,};
+
+static struct comm_str *comm_str__get(struct comm_str *cs)
+{
+ if (cs && refcount_inc_not_zero(&cs->refcnt))
+ return cs;
+
+ return NULL;
+}
+
+static void comm_str__put(struct comm_str *cs)
+{
+ if (cs && refcount_dec_and_test(&cs->refcnt)) {
+ down_write(&comm_str_lock);
+ rb_erase(&cs->rb_node, &comm_str_root);
+ up_write(&comm_str_lock);
+ zfree(&cs->str);
+ free(cs);
+ }
+}
+
+static struct comm_str *comm_str__alloc(const char *str)
+{
+ struct comm_str *cs;
+
+ cs = zalloc(sizeof(*cs));
+ if (!cs)
+ return NULL;
+
+ cs->str = strdup(str);
+ if (!cs->str) {
+ free(cs);
+ return NULL;
+ }
+
+ refcount_set(&cs->refcnt, 1);
+
+ return cs;
+}
+
+static
+struct comm_str *__comm_str__findnew(const char *str, struct rb_root *root)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct comm_str *iter, *new;
+ int cmp;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct comm_str, rb_node);
+
+ /*
+ * If we race with comm_str__put, iter->refcnt is 0
+ * and it will be removed within comm_str__put call
+ * shortly, ignore it in this search.
+ */
+ cmp = strcmp(str, iter->str);
+ if (!cmp && comm_str__get(iter))
+ return iter;
+
+ if (cmp < 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ new = comm_str__alloc(str);
+ if (!new)
+ return NULL;
+
+ rb_link_node(&new->rb_node, parent, p);
+ rb_insert_color(&new->rb_node, root);
+
+ return new;
+}
+
+static struct comm_str *comm_str__findnew(const char *str, struct rb_root *root)
+{
+ struct comm_str *cs;
+
+ down_write(&comm_str_lock);
+ cs = __comm_str__findnew(str, root);
+ up_write(&comm_str_lock);
+
+ return cs;
+}
+
+struct comm *comm__new(const char *str, u64 timestamp, bool exec)
+{
+ struct comm *comm = zalloc(sizeof(*comm));
+
+ if (!comm)
+ return NULL;
+
+ comm->start = timestamp;
+ comm->exec = exec;
+
+ comm->comm_str = comm_str__findnew(str, &comm_str_root);
+ if (!comm->comm_str) {
+ free(comm);
+ return NULL;
+ }
+
+ return comm;
+}
+
+int comm__override(struct comm *comm, const char *str, u64 timestamp, bool exec)
+{
+ struct comm_str *new, *old = comm->comm_str;
+
+ new = comm_str__findnew(str, &comm_str_root);
+ if (!new)
+ return -ENOMEM;
+
+ comm_str__put(old);
+ comm->comm_str = new;
+ comm->start = timestamp;
+ if (exec)
+ comm->exec = true;
+
+ return 0;
+}
+
+void comm__free(struct comm *comm)
+{
+ comm_str__put(comm->comm_str);
+ free(comm);
+}
+
+const char *comm__str(const struct comm *comm)
+{
+ return comm->comm_str->str;
+}
diff --git a/tools/perf/util/comm.h b/tools/perf/util/comm.h
new file mode 100644
index 000000000..3e5c438fe
--- /dev/null
+++ b/tools/perf/util/comm.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_COMM_H
+#define __PERF_COMM_H
+
+#include "../perf.h"
+#include <linux/rbtree.h>
+#include <linux/list.h>
+
+struct comm_str;
+
+struct comm {
+ struct comm_str *comm_str;
+ u64 start;
+ struct list_head list;
+ bool exec;
+ union { /* Tool specific area */
+ void *priv;
+ u64 db_id;
+ };
+};
+
+void comm__free(struct comm *comm);
+struct comm *comm__new(const char *str, u64 timestamp, bool exec);
+const char *comm__str(const struct comm *comm);
+int comm__override(struct comm *comm, const char *str, u64 timestamp,
+ bool exec);
+
+#endif /* __PERF_COMM_H */
diff --git a/tools/perf/util/compress.h b/tools/perf/util/compress.h
new file mode 100644
index 000000000..892e92e7e
--- /dev/null
+++ b/tools/perf/util/compress.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_COMPRESS_H
+#define PERF_COMPRESS_H
+
+#ifdef HAVE_ZLIB_SUPPORT
+int gzip_decompress_to_file(const char *input, int output_fd);
+bool gzip_is_compressed(const char *input);
+#endif
+
+#ifdef HAVE_LZMA_SUPPORT
+int lzma_decompress_to_file(const char *input, int output_fd);
+bool lzma_is_compressed(const char *input);
+#endif
+
+#endif /* PERF_COMPRESS_H */
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
new file mode 100644
index 000000000..9bb742acc
--- /dev/null
+++ b/tools/perf/util/config.c
@@ -0,0 +1,829 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * config.c
+ *
+ * Helper functions for parsing config items.
+ * Originally copied from GIT source.
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ * Copyright (C) Johannes Schindelin, 2005
+ *
+ */
+#include <errno.h>
+#include <sys/param.h>
+#include "util.h"
+#include "cache.h"
+#include <subcmd/exec-cmd.h>
+#include "util/hist.h" /* perf_hist_config */
+#include "util/llvm-utils.h" /* perf_llvm_config */
+#include "config.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/string.h>
+
+#include "sane_ctype.h"
+
+#define MAXNAME (256)
+
+#define DEBUG_CACHE_DIR ".debug"
+
+
+char buildid_dir[MAXPATHLEN]; /* root dir for buildid, binary cache */
+
+static FILE *config_file;
+static const char *config_file_name;
+static int config_linenr;
+static int config_file_eof;
+static struct perf_config_set *config_set;
+
+const char *config_exclusive_filename;
+
+static int get_next_char(void)
+{
+ int c;
+ FILE *f;
+
+ c = '\n';
+ if ((f = config_file) != NULL) {
+ c = fgetc(f);
+ if (c == '\r') {
+ /* DOS like systems */
+ c = fgetc(f);
+ if (c != '\n') {
+ ungetc(c, f);
+ c = '\r';
+ }
+ }
+ if (c == '\n')
+ config_linenr++;
+ if (c == EOF) {
+ config_file_eof = 1;
+ c = '\n';
+ }
+ }
+ return c;
+}
+
+static char *parse_value(void)
+{
+ static char value[1024];
+ int quote = 0, comment = 0, space = 0;
+ size_t len = 0;
+
+ for (;;) {
+ int c = get_next_char();
+
+ if (len >= sizeof(value) - 1)
+ return NULL;
+ if (c == '\n') {
+ if (quote)
+ return NULL;
+ value[len] = 0;
+ return value;
+ }
+ if (comment)
+ continue;
+ if (isspace(c) && !quote) {
+ space = 1;
+ continue;
+ }
+ if (!quote) {
+ if (c == ';' || c == '#') {
+ comment = 1;
+ continue;
+ }
+ }
+ if (space) {
+ if (len)
+ value[len++] = ' ';
+ space = 0;
+ }
+ if (c == '\\') {
+ c = get_next_char();
+ switch (c) {
+ case '\n':
+ continue;
+ case 't':
+ c = '\t';
+ break;
+ case 'b':
+ c = '\b';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ /* Some characters escape as themselves */
+ case '\\': case '"':
+ break;
+ /* Reject unknown escape sequences */
+ default:
+ return NULL;
+ }
+ value[len++] = c;
+ continue;
+ }
+ if (c == '"') {
+ quote = 1-quote;
+ continue;
+ }
+ value[len++] = c;
+ }
+}
+
+static inline int iskeychar(int c)
+{
+ return isalnum(c) || c == '-' || c == '_';
+}
+
+static int get_value(config_fn_t fn, void *data, char *name, unsigned int len)
+{
+ int c;
+ char *value;
+
+ /* Get the full name */
+ for (;;) {
+ c = get_next_char();
+ if (config_file_eof)
+ break;
+ if (!iskeychar(c))
+ break;
+ name[len++] = c;
+ if (len >= MAXNAME)
+ return -1;
+ }
+ name[len] = 0;
+ while (c == ' ' || c == '\t')
+ c = get_next_char();
+
+ value = NULL;
+ if (c != '\n') {
+ if (c != '=')
+ return -1;
+ value = parse_value();
+ if (!value)
+ return -1;
+ }
+ return fn(name, value, data);
+}
+
+static int get_extended_base_var(char *name, int baselen, int c)
+{
+ do {
+ if (c == '\n')
+ return -1;
+ c = get_next_char();
+ } while (isspace(c));
+
+ /* We require the format to be '[base "extension"]' */
+ if (c != '"')
+ return -1;
+ name[baselen++] = '.';
+
+ for (;;) {
+ int ch = get_next_char();
+
+ if (ch == '\n')
+ return -1;
+ if (ch == '"')
+ break;
+ if (ch == '\\') {
+ ch = get_next_char();
+ if (ch == '\n')
+ return -1;
+ }
+ name[baselen++] = ch;
+ if (baselen > MAXNAME / 2)
+ return -1;
+ }
+
+ /* Final ']' */
+ if (get_next_char() != ']')
+ return -1;
+ return baselen;
+}
+
+static int get_base_var(char *name)
+{
+ int baselen = 0;
+
+ for (;;) {
+ int c = get_next_char();
+ if (config_file_eof)
+ return -1;
+ if (c == ']')
+ return baselen;
+ if (isspace(c))
+ return get_extended_base_var(name, baselen, c);
+ if (!iskeychar(c) && c != '.')
+ return -1;
+ if (baselen > MAXNAME / 2)
+ return -1;
+ name[baselen++] = tolower(c);
+ }
+}
+
+static int perf_parse_file(config_fn_t fn, void *data)
+{
+ int comment = 0;
+ int baselen = 0;
+ static char var[MAXNAME];
+
+ /* U+FEFF Byte Order Mark in UTF8 */
+ static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf";
+ const unsigned char *bomptr = utf8_bom;
+
+ for (;;) {
+ int line, c = get_next_char();
+
+ if (bomptr && *bomptr) {
+ /* We are at the file beginning; skip UTF8-encoded BOM
+ * if present. Sane editors won't put this in on their
+ * own, but e.g. Windows Notepad will do it happily. */
+ if ((unsigned char) c == *bomptr) {
+ bomptr++;
+ continue;
+ } else {
+ /* Do not tolerate partial BOM. */
+ if (bomptr != utf8_bom)
+ break;
+ /* No BOM at file beginning. Cool. */
+ bomptr = NULL;
+ }
+ }
+ if (c == '\n') {
+ if (config_file_eof)
+ return 0;
+ comment = 0;
+ continue;
+ }
+ if (comment || isspace(c))
+ continue;
+ if (c == '#' || c == ';') {
+ comment = 1;
+ continue;
+ }
+ if (c == '[') {
+ baselen = get_base_var(var);
+ if (baselen <= 0)
+ break;
+ var[baselen++] = '.';
+ var[baselen] = 0;
+ continue;
+ }
+ if (!isalpha(c))
+ break;
+ var[baselen] = tolower(c);
+
+ /*
+ * The get_value function might or might not reach the '\n',
+ * so saving the current line number for error reporting.
+ */
+ line = config_linenr;
+ if (get_value(fn, data, var, baselen+1) < 0) {
+ config_linenr = line;
+ break;
+ }
+ }
+ pr_err("bad config file line %d in %s\n", config_linenr, config_file_name);
+ return -1;
+}
+
+static int parse_unit_factor(const char *end, unsigned long *val)
+{
+ if (!*end)
+ return 1;
+ else if (!strcasecmp(end, "k")) {
+ *val *= 1024;
+ return 1;
+ }
+ else if (!strcasecmp(end, "m")) {
+ *val *= 1024 * 1024;
+ return 1;
+ }
+ else if (!strcasecmp(end, "g")) {
+ *val *= 1024 * 1024 * 1024;
+ return 1;
+ }
+ return 0;
+}
+
+static int perf_parse_llong(const char *value, long long *ret)
+{
+ if (value && *value) {
+ char *end;
+ long long val = strtoll(value, &end, 0);
+ unsigned long factor = 1;
+
+ if (!parse_unit_factor(end, &factor))
+ return 0;
+ *ret = val * factor;
+ return 1;
+ }
+ return 0;
+}
+
+static int perf_parse_long(const char *value, long *ret)
+{
+ if (value && *value) {
+ char *end;
+ long val = strtol(value, &end, 0);
+ unsigned long factor = 1;
+ if (!parse_unit_factor(end, &factor))
+ return 0;
+ *ret = val * factor;
+ return 1;
+ }
+ return 0;
+}
+
+static void bad_config(const char *name)
+{
+ if (config_file_name)
+ pr_warning("bad config value for '%s' in %s, ignoring...\n", name, config_file_name);
+ else
+ pr_warning("bad config value for '%s', ignoring...\n", name);
+}
+
+int perf_config_u64(u64 *dest, const char *name, const char *value)
+{
+ long long ret = 0;
+
+ if (!perf_parse_llong(value, &ret)) {
+ bad_config(name);
+ return -1;
+ }
+
+ *dest = ret;
+ return 0;
+}
+
+int perf_config_int(int *dest, const char *name, const char *value)
+{
+ long ret = 0;
+ if (!perf_parse_long(value, &ret)) {
+ bad_config(name);
+ return -1;
+ }
+ *dest = ret;
+ return 0;
+}
+
+static int perf_config_bool_or_int(const char *name, const char *value, int *is_bool)
+{
+ int ret;
+
+ *is_bool = 1;
+ if (!value)
+ return 1;
+ if (!*value)
+ return 0;
+ if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on"))
+ return 1;
+ if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off"))
+ return 0;
+ *is_bool = 0;
+ return perf_config_int(&ret, name, value) < 0 ? -1 : ret;
+}
+
+int perf_config_bool(const char *name, const char *value)
+{
+ int discard;
+ return !!perf_config_bool_or_int(name, value, &discard);
+}
+
+static const char *perf_config_dirname(const char *name, const char *value)
+{
+ if (!name)
+ return NULL;
+ return value;
+}
+
+static int perf_buildid_config(const char *var, const char *value)
+{
+ /* same dir for all commands */
+ if (!strcmp(var, "buildid.dir")) {
+ const char *dir = perf_config_dirname(var, value);
+
+ if (!dir) {
+ pr_err("Invalid buildid directory!\n");
+ return -1;
+ }
+ strncpy(buildid_dir, dir, MAXPATHLEN-1);
+ buildid_dir[MAXPATHLEN-1] = '\0';
+ }
+
+ return 0;
+}
+
+static int perf_default_core_config(const char *var __maybe_unused,
+ const char *value __maybe_unused)
+{
+ /* Add other config variables here. */
+ return 0;
+}
+
+static int perf_ui_config(const char *var, const char *value)
+{
+ /* Add other config variables here. */
+ if (!strcmp(var, "ui.show-headers"))
+ symbol_conf.show_hist_headers = perf_config_bool(var, value);
+
+ return 0;
+}
+
+int perf_default_config(const char *var, const char *value,
+ void *dummy __maybe_unused)
+{
+ if (strstarts(var, "core."))
+ return perf_default_core_config(var, value);
+
+ if (strstarts(var, "hist."))
+ return perf_hist_config(var, value);
+
+ if (strstarts(var, "ui."))
+ return perf_ui_config(var, value);
+
+ if (strstarts(var, "call-graph."))
+ return perf_callchain_config(var, value);
+
+ if (strstarts(var, "llvm."))
+ return perf_llvm_config(var, value);
+
+ if (strstarts(var, "buildid."))
+ return perf_buildid_config(var, value);
+
+ /* Add other config variables here. */
+ return 0;
+}
+
+static int perf_config_from_file(config_fn_t fn, const char *filename, void *data)
+{
+ int ret;
+ FILE *f = fopen(filename, "r");
+
+ ret = -1;
+ if (f) {
+ config_file = f;
+ config_file_name = filename;
+ config_linenr = 1;
+ config_file_eof = 0;
+ ret = perf_parse_file(fn, data);
+ fclose(f);
+ config_file_name = NULL;
+ }
+ return ret;
+}
+
+const char *perf_etc_perfconfig(void)
+{
+ static const char *system_wide;
+ if (!system_wide)
+ system_wide = system_path(ETC_PERFCONFIG);
+ return system_wide;
+}
+
+static int perf_env_bool(const char *k, int def)
+{
+ const char *v = getenv(k);
+ return v ? perf_config_bool(k, v) : def;
+}
+
+static int perf_config_system(void)
+{
+ return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0);
+}
+
+static int perf_config_global(void)
+{
+ return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0);
+}
+
+static struct perf_config_section *find_section(struct list_head *sections,
+ const char *section_name)
+{
+ struct perf_config_section *section;
+
+ list_for_each_entry(section, sections, node)
+ if (!strcmp(section->name, section_name))
+ return section;
+
+ return NULL;
+}
+
+static struct perf_config_item *find_config_item(const char *name,
+ struct perf_config_section *section)
+{
+ struct perf_config_item *item;
+
+ list_for_each_entry(item, &section->items, node)
+ if (!strcmp(item->name, name))
+ return item;
+
+ return NULL;
+}
+
+static struct perf_config_section *add_section(struct list_head *sections,
+ const char *section_name)
+{
+ struct perf_config_section *section = zalloc(sizeof(*section));
+
+ if (!section)
+ return NULL;
+
+ INIT_LIST_HEAD(&section->items);
+ section->name = strdup(section_name);
+ if (!section->name) {
+ pr_debug("%s: strdup failed\n", __func__);
+ free(section);
+ return NULL;
+ }
+
+ list_add_tail(&section->node, sections);
+ return section;
+}
+
+static struct perf_config_item *add_config_item(struct perf_config_section *section,
+ const char *name)
+{
+ struct perf_config_item *item = zalloc(sizeof(*item));
+
+ if (!item)
+ return NULL;
+
+ item->name = strdup(name);
+ if (!item->name) {
+ pr_debug("%s: strdup failed\n", __func__);
+ free(item);
+ return NULL;
+ }
+
+ list_add_tail(&item->node, &section->items);
+ return item;
+}
+
+static int set_value(struct perf_config_item *item, const char *value)
+{
+ char *val = strdup(value);
+
+ if (!val)
+ return -1;
+
+ zfree(&item->value);
+ item->value = val;
+ return 0;
+}
+
+static int collect_config(const char *var, const char *value,
+ void *perf_config_set)
+{
+ int ret = -1;
+ char *ptr, *key;
+ char *section_name, *name;
+ struct perf_config_section *section = NULL;
+ struct perf_config_item *item = NULL;
+ struct perf_config_set *set = perf_config_set;
+ struct list_head *sections;
+
+ if (set == NULL)
+ return -1;
+
+ sections = &set->sections;
+ key = ptr = strdup(var);
+ if (!key) {
+ pr_debug("%s: strdup failed\n", __func__);
+ return -1;
+ }
+
+ section_name = strsep(&ptr, ".");
+ name = ptr;
+ if (name == NULL || value == NULL)
+ goto out_free;
+
+ section = find_section(sections, section_name);
+ if (!section) {
+ section = add_section(sections, section_name);
+ if (!section)
+ goto out_free;
+ }
+
+ item = find_config_item(name, section);
+ if (!item) {
+ item = add_config_item(section, name);
+ if (!item)
+ goto out_free;
+ }
+
+ /* perf_config_set can contain both user and system config items.
+ * So we should know where each value is from.
+ * The classification would be needed when a particular config file
+ * is overwrited by setting feature i.e. set_config().
+ */
+ if (strcmp(config_file_name, perf_etc_perfconfig()) == 0) {
+ section->from_system_config = true;
+ item->from_system_config = true;
+ } else {
+ section->from_system_config = false;
+ item->from_system_config = false;
+ }
+
+ ret = set_value(item, value);
+
+out_free:
+ free(key);
+ return ret;
+}
+
+int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
+ const char *var, const char *value)
+{
+ config_file_name = file_name;
+ return collect_config(var, value, set);
+}
+
+static int perf_config_set__init(struct perf_config_set *set)
+{
+ int ret = -1;
+ const char *home = NULL;
+ char *user_config;
+ struct stat st;
+
+ /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */
+ if (config_exclusive_filename)
+ return perf_config_from_file(collect_config, config_exclusive_filename, set);
+ if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) {
+ if (perf_config_from_file(collect_config, perf_etc_perfconfig(), set) < 0)
+ goto out;
+ }
+
+ home = getenv("HOME");
+
+ /*
+ * Skip reading user config if:
+ * - there is no place to read it from (HOME)
+ * - we are asked not to (PERF_CONFIG_NOGLOBAL=1)
+ */
+ if (!home || !*home || !perf_config_global())
+ return 0;
+
+ user_config = strdup(mkpath("%s/.perfconfig", home));
+ if (user_config == NULL) {
+ pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home);
+ goto out;
+ }
+
+ if (stat(user_config, &st) < 0) {
+ if (errno == ENOENT)
+ ret = 0;
+ goto out_free;
+ }
+
+ ret = 0;
+
+ if (st.st_uid && (st.st_uid != geteuid())) {
+ pr_warning("File %s not owned by current user or root, ignoring it.", user_config);
+ goto out_free;
+ }
+
+ if (st.st_size)
+ ret = perf_config_from_file(collect_config, user_config, set);
+
+out_free:
+ free(user_config);
+out:
+ return ret;
+}
+
+struct perf_config_set *perf_config_set__new(void)
+{
+ struct perf_config_set *set = zalloc(sizeof(*set));
+
+ if (set) {
+ INIT_LIST_HEAD(&set->sections);
+ perf_config_set__init(set);
+ }
+
+ return set;
+}
+
+static int perf_config__init(void)
+{
+ if (config_set == NULL)
+ config_set = perf_config_set__new();
+
+ return config_set == NULL;
+}
+
+int perf_config(config_fn_t fn, void *data)
+{
+ int ret = 0;
+ char key[BUFSIZ];
+ struct perf_config_section *section;
+ struct perf_config_item *item;
+
+ if (config_set == NULL && perf_config__init())
+ return -1;
+
+ perf_config_set__for_each_entry(config_set, section, item) {
+ char *value = item->value;
+
+ if (value) {
+ scnprintf(key, sizeof(key), "%s.%s",
+ section->name, item->name);
+ ret = fn(key, value, data);
+ if (ret < 0) {
+ pr_err("Error: wrong config key-value pair %s=%s\n",
+ key, value);
+ break;
+ }
+ }
+ }
+
+ return ret;
+}
+
+void perf_config__exit(void)
+{
+ perf_config_set__delete(config_set);
+ config_set = NULL;
+}
+
+void perf_config__refresh(void)
+{
+ perf_config__exit();
+ perf_config__init();
+}
+
+static void perf_config_item__delete(struct perf_config_item *item)
+{
+ zfree(&item->name);
+ zfree(&item->value);
+ free(item);
+}
+
+static void perf_config_section__purge(struct perf_config_section *section)
+{
+ struct perf_config_item *item, *tmp;
+
+ list_for_each_entry_safe(item, tmp, &section->items, node) {
+ list_del_init(&item->node);
+ perf_config_item__delete(item);
+ }
+}
+
+static void perf_config_section__delete(struct perf_config_section *section)
+{
+ perf_config_section__purge(section);
+ zfree(&section->name);
+ free(section);
+}
+
+static void perf_config_set__purge(struct perf_config_set *set)
+{
+ struct perf_config_section *section, *tmp;
+
+ list_for_each_entry_safe(section, tmp, &set->sections, node) {
+ list_del_init(&section->node);
+ perf_config_section__delete(section);
+ }
+}
+
+void perf_config_set__delete(struct perf_config_set *set)
+{
+ if (set == NULL)
+ return;
+
+ perf_config_set__purge(set);
+ free(set);
+}
+
+/*
+ * Call this to report error for your variable that should not
+ * get a boolean value (i.e. "[my] var" means "true").
+ */
+int config_error_nonbool(const char *var)
+{
+ pr_err("Missing value for '%s'", var);
+ return -1;
+}
+
+void set_buildid_dir(const char *dir)
+{
+ if (dir)
+ scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir);
+
+ /* default to $HOME/.debug */
+ if (buildid_dir[0] == '\0') {
+ char *home = getenv("HOME");
+
+ if (home) {
+ snprintf(buildid_dir, MAXPATHLEN-1, "%s/%s",
+ home, DEBUG_CACHE_DIR);
+ } else {
+ strncpy(buildid_dir, DEBUG_CACHE_DIR, MAXPATHLEN-1);
+ }
+ buildid_dir[MAXPATHLEN-1] = '\0';
+ }
+ /* for communicating with external commands */
+ setenv("PERF_BUILDID_DIR", buildid_dir, 1);
+}
diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h
new file mode 100644
index 000000000..bd0a5897c
--- /dev/null
+++ b/tools/perf/util/config.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CONFIG_H
+#define __PERF_CONFIG_H
+
+#include <stdbool.h>
+#include <linux/list.h>
+
+struct perf_config_item {
+ char *name;
+ char *value;
+ bool from_system_config;
+ struct list_head node;
+};
+
+struct perf_config_section {
+ char *name;
+ struct list_head items;
+ bool from_system_config;
+ struct list_head node;
+};
+
+struct perf_config_set {
+ struct list_head sections;
+};
+
+extern const char *config_exclusive_filename;
+
+typedef int (*config_fn_t)(const char *, const char *, void *);
+int perf_default_config(const char *, const char *, void *);
+int perf_config(config_fn_t fn, void *);
+int perf_config_int(int *dest, const char *, const char *);
+int perf_config_u64(u64 *dest, const char *, const char *);
+int perf_config_bool(const char *, const char *);
+int config_error_nonbool(const char *);
+const char *perf_etc_perfconfig(void);
+
+struct perf_config_set *perf_config_set__new(void);
+void perf_config_set__delete(struct perf_config_set *set);
+int perf_config_set__collect(struct perf_config_set *set, const char *file_name,
+ const char *var, const char *value);
+void perf_config__exit(void);
+void perf_config__refresh(void);
+
+/**
+ * perf_config_sections__for_each - iterate thru all the sections
+ * @list: list_head instance to iterate
+ * @section: struct perf_config_section iterator
+ */
+#define perf_config_sections__for_each_entry(list, section) \
+ list_for_each_entry(section, list, node)
+
+/**
+ * perf_config_items__for_each - iterate thru all the items
+ * @list: list_head instance to iterate
+ * @item: struct perf_config_item iterator
+ */
+#define perf_config_items__for_each_entry(list, item) \
+ list_for_each_entry(item, list, node)
+
+/**
+ * perf_config_set__for_each - iterate thru all the config section-item pairs
+ * @set: evlist instance to iterate
+ * @section: struct perf_config_section iterator
+ * @item: struct perf_config_item iterator
+ */
+#define perf_config_set__for_each_entry(set, section, item) \
+ perf_config_sections__for_each_entry(&set->sections, section) \
+ perf_config_items__for_each_entry(&section->items, item)
+
+#endif /* __PERF_CONFIG_H */
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
new file mode 100644
index 000000000..03032b410
--- /dev/null
+++ b/tools/perf/util/counts.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdlib.h>
+#include "evsel.h"
+#include "counts.h"
+#include "util.h"
+
+struct perf_counts *perf_counts__new(int ncpus, int nthreads)
+{
+ struct perf_counts *counts = zalloc(sizeof(*counts));
+
+ if (counts) {
+ struct xyarray *values;
+
+ values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values));
+ if (!values) {
+ free(counts);
+ return NULL;
+ }
+
+ counts->values = values;
+ }
+
+ return counts;
+}
+
+void perf_counts__delete(struct perf_counts *counts)
+{
+ if (counts) {
+ xyarray__delete(counts->values);
+ free(counts);
+ }
+}
+
+static void perf_counts__reset(struct perf_counts *counts)
+{
+ xyarray__reset(counts->values);
+}
+
+void perf_evsel__reset_counts(struct perf_evsel *evsel)
+{
+ perf_counts__reset(evsel->counts);
+}
+
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+ evsel->counts = perf_counts__new(ncpus, nthreads);
+ return evsel->counts != NULL ? 0 : -ENOMEM;
+}
+
+void perf_evsel__free_counts(struct perf_evsel *evsel)
+{
+ perf_counts__delete(evsel->counts);
+ evsel->counts = NULL;
+}
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
new file mode 100644
index 000000000..0d1050ccc
--- /dev/null
+++ b/tools/perf/util/counts.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_COUNTS_H
+#define __PERF_COUNTS_H
+
+#include "xyarray.h"
+
+struct perf_counts_values {
+ union {
+ struct {
+ u64 val;
+ u64 ena;
+ u64 run;
+ };
+ u64 values[3];
+ };
+ bool loaded;
+};
+
+struct perf_counts {
+ s8 scaled;
+ struct perf_counts_values aggr;
+ struct xyarray *values;
+};
+
+
+static inline struct perf_counts_values*
+perf_counts(struct perf_counts *counts, int cpu, int thread)
+{
+ return xyarray__entry(counts->values, cpu, thread);
+}
+
+struct perf_counts *perf_counts__new(int ncpus, int nthreads);
+void perf_counts__delete(struct perf_counts *counts);
+
+void perf_evsel__reset_counts(struct perf_evsel *evsel);
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads);
+void perf_evsel__free_counts(struct perf_evsel *evsel);
+
+#endif /* __PERF_COUNTS_H */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
new file mode 100644
index 000000000..827d844f4
--- /dev/null
+++ b/tools/perf/util/cpumap.c
@@ -0,0 +1,735 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include <api/fs/fs.h>
+#include "../perf.h"
+#include "cpumap.h"
+#include <assert.h>
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include "asm/bug.h"
+
+#include "sane_ctype.h"
+
+static int max_cpu_num;
+static int max_present_cpu_num;
+static int max_node_num;
+static int *cpunode_map;
+
+static struct cpu_map *cpu_map__default_new(void)
+{
+ struct cpu_map *cpus;
+ int nr_cpus;
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (nr_cpus < 0)
+ return NULL;
+
+ cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int));
+ if (cpus != NULL) {
+ int i;
+ for (i = 0; i < nr_cpus; ++i)
+ cpus->map[i] = i;
+
+ cpus->nr = nr_cpus;
+ refcount_set(&cpus->refcnt, 1);
+ }
+
+ return cpus;
+}
+
+static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus)
+{
+ size_t payload_size = nr_cpus * sizeof(int);
+ struct cpu_map *cpus = malloc(sizeof(*cpus) + payload_size);
+
+ if (cpus != NULL) {
+ cpus->nr = nr_cpus;
+ memcpy(cpus->map, tmp_cpus, payload_size);
+ refcount_set(&cpus->refcnt, 1);
+ }
+
+ return cpus;
+}
+
+struct cpu_map *cpu_map__read(FILE *file)
+{
+ struct cpu_map *cpus = NULL;
+ int nr_cpus = 0;
+ int *tmp_cpus = NULL, *tmp;
+ int max_entries = 0;
+ int n, cpu, prev;
+ char sep;
+
+ sep = 0;
+ prev = -1;
+ for (;;) {
+ n = fscanf(file, "%u%c", &cpu, &sep);
+ if (n <= 0)
+ break;
+ if (prev >= 0) {
+ int new_max = nr_cpus + cpu - prev - 1;
+
+ if (new_max >= max_entries) {
+ max_entries = new_max + MAX_NR_CPUS / 2;
+ tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ if (tmp == NULL)
+ goto out_free_tmp;
+ tmp_cpus = tmp;
+ }
+
+ while (++prev < cpu)
+ tmp_cpus[nr_cpus++] = prev;
+ }
+ if (nr_cpus == max_entries) {
+ max_entries += MAX_NR_CPUS;
+ tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ if (tmp == NULL)
+ goto out_free_tmp;
+ tmp_cpus = tmp;
+ }
+
+ tmp_cpus[nr_cpus++] = cpu;
+ if (n == 2 && sep == '-')
+ prev = cpu;
+ else
+ prev = -1;
+ if (n == 1 || sep == '\n')
+ break;
+ }
+
+ if (nr_cpus > 0)
+ cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
+ else
+ cpus = cpu_map__default_new();
+out_free_tmp:
+ free(tmp_cpus);
+ return cpus;
+}
+
+static struct cpu_map *cpu_map__read_all_cpu_map(void)
+{
+ struct cpu_map *cpus = NULL;
+ FILE *onlnf;
+
+ onlnf = fopen("/sys/devices/system/cpu/online", "r");
+ if (!onlnf)
+ return cpu_map__default_new();
+
+ cpus = cpu_map__read(onlnf);
+ fclose(onlnf);
+ return cpus;
+}
+
+struct cpu_map *cpu_map__new(const char *cpu_list)
+{
+ struct cpu_map *cpus = NULL;
+ unsigned long start_cpu, end_cpu = 0;
+ char *p = NULL;
+ int i, nr_cpus = 0;
+ int *tmp_cpus = NULL, *tmp;
+ int max_entries = 0;
+
+ if (!cpu_list)
+ return cpu_map__read_all_cpu_map();
+
+ /*
+ * must handle the case of empty cpumap to cover
+ * TOPOLOGY header for NUMA nodes with no CPU
+ * ( e.g., because of CPU hotplug)
+ */
+ if (!isdigit(*cpu_list) && *cpu_list != '\0')
+ goto out;
+
+ while (isdigit(*cpu_list)) {
+ p = NULL;
+ start_cpu = strtoul(cpu_list, &p, 0);
+ if (start_cpu >= INT_MAX
+ || (*p != '\0' && *p != ',' && *p != '-'))
+ goto invalid;
+
+ if (*p == '-') {
+ cpu_list = ++p;
+ p = NULL;
+ end_cpu = strtoul(cpu_list, &p, 0);
+
+ if (end_cpu >= INT_MAX || (*p != '\0' && *p != ','))
+ goto invalid;
+
+ if (end_cpu < start_cpu)
+ goto invalid;
+ } else {
+ end_cpu = start_cpu;
+ }
+
+ for (; start_cpu <= end_cpu; start_cpu++) {
+ /* check for duplicates */
+ for (i = 0; i < nr_cpus; i++)
+ if (tmp_cpus[i] == (int)start_cpu)
+ goto invalid;
+
+ if (nr_cpus == max_entries) {
+ max_entries += MAX_NR_CPUS;
+ tmp = realloc(tmp_cpus, max_entries * sizeof(int));
+ if (tmp == NULL)
+ goto invalid;
+ tmp_cpus = tmp;
+ }
+ tmp_cpus[nr_cpus++] = (int)start_cpu;
+ }
+ if (*p)
+ ++p;
+
+ cpu_list = p;
+ }
+
+ if (nr_cpus > 0)
+ cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
+ else if (*cpu_list != '\0')
+ cpus = cpu_map__default_new();
+ else
+ cpus = cpu_map__dummy_new();
+invalid:
+ free(tmp_cpus);
+out:
+ return cpus;
+}
+
+static struct cpu_map *cpu_map__from_entries(struct cpu_map_entries *cpus)
+{
+ struct cpu_map *map;
+
+ map = cpu_map__empty_new(cpus->nr);
+ if (map) {
+ unsigned i;
+
+ for (i = 0; i < cpus->nr; i++) {
+ /*
+ * Special treatment for -1, which is not real cpu number,
+ * and we need to use (int) -1 to initialize map[i],
+ * otherwise it would become 65535.
+ */
+ if (cpus->cpu[i] == (u16) -1)
+ map->map[i] = -1;
+ else
+ map->map[i] = (int) cpus->cpu[i];
+ }
+ }
+
+ return map;
+}
+
+static struct cpu_map *cpu_map__from_mask(struct cpu_map_mask *mask)
+{
+ struct cpu_map *map;
+ int nr, nbits = mask->nr * mask->long_size * BITS_PER_BYTE;
+
+ nr = bitmap_weight(mask->mask, nbits);
+
+ map = cpu_map__empty_new(nr);
+ if (map) {
+ int cpu, i = 0;
+
+ for_each_set_bit(cpu, mask->mask, nbits)
+ map->map[i++] = cpu;
+ }
+ return map;
+
+}
+
+struct cpu_map *cpu_map__new_data(struct cpu_map_data *data)
+{
+ if (data->type == PERF_CPU_MAP__CPUS)
+ return cpu_map__from_entries((struct cpu_map_entries *)data->data);
+ else
+ return cpu_map__from_mask((struct cpu_map_mask *)data->data);
+}
+
+size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp)
+{
+#define BUFSIZE 1024
+ char buf[BUFSIZE];
+
+ cpu_map__snprint(map, buf, sizeof(buf));
+ return fprintf(fp, "%s\n", buf);
+#undef BUFSIZE
+}
+
+struct cpu_map *cpu_map__dummy_new(void)
+{
+ struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int));
+
+ if (cpus != NULL) {
+ cpus->nr = 1;
+ cpus->map[0] = -1;
+ refcount_set(&cpus->refcnt, 1);
+ }
+
+ return cpus;
+}
+
+struct cpu_map *cpu_map__empty_new(int nr)
+{
+ struct cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int) * nr);
+
+ if (cpus != NULL) {
+ int i;
+
+ cpus->nr = nr;
+ for (i = 0; i < nr; i++)
+ cpus->map[i] = -1;
+
+ refcount_set(&cpus->refcnt, 1);
+ }
+
+ return cpus;
+}
+
+static void cpu_map__delete(struct cpu_map *map)
+{
+ if (map) {
+ WARN_ONCE(refcount_read(&map->refcnt) != 0,
+ "cpu_map refcnt unbalanced\n");
+ free(map);
+ }
+}
+
+struct cpu_map *cpu_map__get(struct cpu_map *map)
+{
+ if (map)
+ refcount_inc(&map->refcnt);
+ return map;
+}
+
+void cpu_map__put(struct cpu_map *map)
+{
+ if (map && refcount_dec_and_test(&map->refcnt))
+ cpu_map__delete(map);
+}
+
+static int cpu__get_topology_int(int cpu, const char *name, int *value)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, PATH_MAX,
+ "devices/system/cpu/cpu%d/topology/%s", cpu, name);
+
+ return sysfs__read_int(path, value);
+}
+
+int cpu_map__get_socket_id(int cpu)
+{
+ int value, ret = cpu__get_topology_int(cpu, "physical_package_id", &value);
+ return ret ?: value;
+}
+
+int cpu_map__get_socket(struct cpu_map *map, int idx, void *data __maybe_unused)
+{
+ int cpu;
+
+ if (idx > map->nr)
+ return -1;
+
+ cpu = map->map[idx];
+
+ return cpu_map__get_socket_id(cpu);
+}
+
+static int cmp_ids(const void *a, const void *b)
+{
+ return *(int *)a - *(int *)b;
+}
+
+int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
+ int (*f)(struct cpu_map *map, int cpu, void *data),
+ void *data)
+{
+ struct cpu_map *c;
+ int nr = cpus->nr;
+ int cpu, s1, s2;
+
+ /* allocate as much as possible */
+ c = calloc(1, sizeof(*c) + nr * sizeof(int));
+ if (!c)
+ return -1;
+
+ for (cpu = 0; cpu < nr; cpu++) {
+ s1 = f(cpus, cpu, data);
+ for (s2 = 0; s2 < c->nr; s2++) {
+ if (s1 == c->map[s2])
+ break;
+ }
+ if (s2 == c->nr) {
+ c->map[c->nr] = s1;
+ c->nr++;
+ }
+ }
+ /* ensure we process id in increasing order */
+ qsort(c->map, c->nr, sizeof(int), cmp_ids);
+
+ refcount_set(&c->refcnt, 1);
+ *res = c;
+ return 0;
+}
+
+int cpu_map__get_core_id(int cpu)
+{
+ int value, ret = cpu__get_topology_int(cpu, "core_id", &value);
+ return ret ?: value;
+}
+
+int cpu_map__get_core(struct cpu_map *map, int idx, void *data)
+{
+ int cpu, s;
+
+ if (idx > map->nr)
+ return -1;
+
+ cpu = map->map[idx];
+
+ cpu = cpu_map__get_core_id(cpu);
+
+ s = cpu_map__get_socket(map, idx, data);
+ if (s == -1)
+ return -1;
+
+ /*
+ * encode socket in upper 16 bits
+ * core_id is relative to socket, and
+ * we need a global id. So we combine
+ * socket+ core id
+ */
+ return (s << 16) | (cpu & 0xffff);
+}
+
+int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp)
+{
+ return cpu_map__build_map(cpus, sockp, cpu_map__get_socket, NULL);
+}
+
+int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep)
+{
+ return cpu_map__build_map(cpus, corep, cpu_map__get_core, NULL);
+}
+
+/* setup simple routines to easily access node numbers given a cpu number */
+static int get_max_num(char *path, int *max)
+{
+ size_t num;
+ char *buf;
+ int err = 0;
+
+ if (filename__read_str(path, &buf, &num))
+ return -1;
+
+ buf[num] = '\0';
+
+ /* start on the right, to find highest node num */
+ while (--num) {
+ if ((buf[num] == ',') || (buf[num] == '-')) {
+ num++;
+ break;
+ }
+ }
+ if (sscanf(&buf[num], "%d", max) < 1) {
+ err = -1;
+ goto out;
+ }
+
+ /* convert from 0-based to 1-based */
+ (*max)++;
+
+out:
+ free(buf);
+ return err;
+}
+
+/* Determine highest possible cpu in the system for sparse allocation */
+static void set_max_cpu_num(void)
+{
+ const char *mnt;
+ char path[PATH_MAX];
+ int ret = -1;
+
+ /* set up default */
+ max_cpu_num = 4096;
+ max_present_cpu_num = 4096;
+
+ mnt = sysfs__mountpoint();
+ if (!mnt)
+ goto out;
+
+ /* get the highest possible cpu number for a sparse allocation */
+ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt);
+ if (ret >= PATH_MAX) {
+ pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+ goto out;
+ }
+
+ ret = get_max_num(path, &max_cpu_num);
+ if (ret)
+ goto out;
+
+ /* get the highest present cpu number for a sparse allocation */
+ ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
+ if (ret >= PATH_MAX) {
+ pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+ goto out;
+ }
+
+ ret = get_max_num(path, &max_present_cpu_num);
+
+out:
+ if (ret)
+ pr_err("Failed to read max cpus, using default of %d\n", max_cpu_num);
+}
+
+/* Determine highest possible node in the system for sparse allocation */
+static void set_max_node_num(void)
+{
+ const char *mnt;
+ char path[PATH_MAX];
+ int ret = -1;
+
+ /* set up default */
+ max_node_num = 8;
+
+ mnt = sysfs__mountpoint();
+ if (!mnt)
+ goto out;
+
+ /* get the highest possible cpu number for a sparse allocation */
+ ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt);
+ if (ret >= PATH_MAX) {
+ pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+ goto out;
+ }
+
+ ret = get_max_num(path, &max_node_num);
+
+out:
+ if (ret)
+ pr_err("Failed to read max nodes, using default of %d\n", max_node_num);
+}
+
+int cpu__max_node(void)
+{
+ if (unlikely(!max_node_num))
+ set_max_node_num();
+
+ return max_node_num;
+}
+
+int cpu__max_cpu(void)
+{
+ if (unlikely(!max_cpu_num))
+ set_max_cpu_num();
+
+ return max_cpu_num;
+}
+
+int cpu__max_present_cpu(void)
+{
+ if (unlikely(!max_present_cpu_num))
+ set_max_cpu_num();
+
+ return max_present_cpu_num;
+}
+
+
+int cpu__get_node(int cpu)
+{
+ if (unlikely(cpunode_map == NULL)) {
+ pr_debug("cpu_map not initialized\n");
+ return -1;
+ }
+
+ return cpunode_map[cpu];
+}
+
+static int init_cpunode_map(void)
+{
+ int i;
+
+ set_max_cpu_num();
+ set_max_node_num();
+
+ cpunode_map = calloc(max_cpu_num, sizeof(int));
+ if (!cpunode_map) {
+ pr_err("%s: calloc failed\n", __func__);
+ return -1;
+ }
+
+ for (i = 0; i < max_cpu_num; i++)
+ cpunode_map[i] = -1;
+
+ return 0;
+}
+
+int cpu__setup_cpunode_map(void)
+{
+ struct dirent *dent1, *dent2;
+ DIR *dir1, *dir2;
+ unsigned int cpu, mem;
+ char buf[PATH_MAX];
+ char path[PATH_MAX];
+ const char *mnt;
+ int n;
+
+ /* initialize globals */
+ if (init_cpunode_map())
+ return -1;
+
+ mnt = sysfs__mountpoint();
+ if (!mnt)
+ return 0;
+
+ n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt);
+ if (n >= PATH_MAX) {
+ pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+ return -1;
+ }
+
+ dir1 = opendir(path);
+ if (!dir1)
+ return 0;
+
+ /* walk tree and setup map */
+ while ((dent1 = readdir(dir1)) != NULL) {
+ if (dent1->d_type != DT_DIR || sscanf(dent1->d_name, "node%u", &mem) < 1)
+ continue;
+
+ n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name);
+ if (n >= PATH_MAX) {
+ pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
+ continue;
+ }
+
+ dir2 = opendir(buf);
+ if (!dir2)
+ continue;
+ while ((dent2 = readdir(dir2)) != NULL) {
+ if (dent2->d_type != DT_LNK || sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+ continue;
+ cpunode_map[cpu] = mem;
+ }
+ closedir(dir2);
+ }
+ closedir(dir1);
+ return 0;
+}
+
+bool cpu_map__has(struct cpu_map *cpus, int cpu)
+{
+ return cpu_map__idx(cpus, cpu) != -1;
+}
+
+int cpu_map__idx(struct cpu_map *cpus, int cpu)
+{
+ int i;
+
+ for (i = 0; i < cpus->nr; ++i) {
+ if (cpus->map[i] == cpu)
+ return i;
+ }
+
+ return -1;
+}
+
+int cpu_map__cpu(struct cpu_map *cpus, int idx)
+{
+ return cpus->map[idx];
+}
+
+size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size)
+{
+ int i, cpu, start = -1;
+ bool first = true;
+ size_t ret = 0;
+
+#define COMMA first ? "" : ","
+
+ for (i = 0; i < map->nr + 1; i++) {
+ bool last = i == map->nr;
+
+ cpu = last ? INT_MAX : map->map[i];
+
+ if (start == -1) {
+ start = i;
+ if (last) {
+ ret += snprintf(buf + ret, size - ret,
+ "%s%d", COMMA,
+ map->map[i]);
+ }
+ } else if (((i - start) != (cpu - map->map[start])) || last) {
+ int end = i - 1;
+
+ if (start == end) {
+ ret += snprintf(buf + ret, size - ret,
+ "%s%d", COMMA,
+ map->map[start]);
+ } else {
+ ret += snprintf(buf + ret, size - ret,
+ "%s%d-%d", COMMA,
+ map->map[start], map->map[end]);
+ }
+ first = false;
+ start = i;
+ }
+ }
+
+#undef COMMA
+
+ pr_debug("cpumask list: %s\n", buf);
+ return ret;
+}
+
+static char hex_char(unsigned char val)
+{
+ if (val < 10)
+ return val + '0';
+ if (val < 16)
+ return val - 10 + 'a';
+ return '?';
+}
+
+size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size)
+{
+ int i, cpu;
+ char *ptr = buf;
+ unsigned char *bitmap;
+ int last_cpu = cpu_map__cpu(map, map->nr - 1);
+
+ if (buf == NULL)
+ return 0;
+
+ bitmap = zalloc(last_cpu / 8 + 1);
+ if (bitmap == NULL) {
+ buf[0] = '\0';
+ return 0;
+ }
+
+ for (i = 0; i < map->nr; i++) {
+ cpu = cpu_map__cpu(map, i);
+ bitmap[cpu / 8] |= 1 << (cpu % 8);
+ }
+
+ for (cpu = last_cpu / 4 * 4; cpu >= 0; cpu -= 4) {
+ unsigned char bits = bitmap[cpu / 8];
+
+ if (cpu % 8)
+ bits >>= 4;
+ else
+ bits &= 0xf;
+
+ *ptr++ = hex_char(bits);
+ if ((cpu % 32) == 0 && cpu > 0)
+ *ptr++ = ',';
+ }
+ *ptr = '\0';
+ free(bitmap);
+
+ buf[size - 1] = '\0';
+ return ptr - buf;
+}
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
new file mode 100644
index 000000000..ed8999d1a
--- /dev/null
+++ b/tools/perf/util/cpumap.h
@@ -0,0 +1,77 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_CPUMAP_H
+#define __PERF_CPUMAP_H
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <linux/refcount.h>
+
+#include "perf.h"
+#include "util/debug.h"
+
+struct cpu_map {
+ refcount_t refcnt;
+ int nr;
+ int map[];
+};
+
+struct cpu_map *cpu_map__new(const char *cpu_list);
+struct cpu_map *cpu_map__empty_new(int nr);
+struct cpu_map *cpu_map__dummy_new(void);
+struct cpu_map *cpu_map__new_data(struct cpu_map_data *data);
+struct cpu_map *cpu_map__read(FILE *file);
+size_t cpu_map__snprint(struct cpu_map *map, char *buf, size_t size);
+size_t cpu_map__snprint_mask(struct cpu_map *map, char *buf, size_t size);
+size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp);
+int cpu_map__get_socket_id(int cpu);
+int cpu_map__get_socket(struct cpu_map *map, int idx, void *data);
+int cpu_map__get_core_id(int cpu);
+int cpu_map__get_core(struct cpu_map *map, int idx, void *data);
+int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp);
+int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep);
+
+struct cpu_map *cpu_map__get(struct cpu_map *map);
+void cpu_map__put(struct cpu_map *map);
+
+static inline int cpu_map__socket(struct cpu_map *sock, int s)
+{
+ if (!sock || s > sock->nr || s < 0)
+ return 0;
+ return sock->map[s];
+}
+
+static inline int cpu_map__id_to_socket(int id)
+{
+ return id >> 16;
+}
+
+static inline int cpu_map__id_to_cpu(int id)
+{
+ return id & 0xffff;
+}
+
+static inline int cpu_map__nr(const struct cpu_map *map)
+{
+ return map ? map->nr : 1;
+}
+
+static inline bool cpu_map__empty(const struct cpu_map *map)
+{
+ return map ? map->map[0] == -1 : true;
+}
+
+int cpu__setup_cpunode_map(void);
+
+int cpu__max_node(void);
+int cpu__max_cpu(void);
+int cpu__max_present_cpu(void);
+int cpu__get_node(int cpu);
+
+int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res,
+ int (*f)(struct cpu_map *map, int cpu, void *data),
+ void *data);
+
+int cpu_map__cpu(struct cpu_map *cpus, int idx);
+bool cpu_map__has(struct cpu_map *cpus, int cpu);
+int cpu_map__idx(struct cpu_map *cpus, int cpu);
+#endif /* __PERF_CPUMAP_H */
diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build
new file mode 100644
index 000000000..bc22c39c7
--- /dev/null
+++ b/tools/perf/util/cs-etm-decoder/Build
@@ -0,0 +1 @@
+libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
new file mode 100644
index 000000000..f540037eb
--- /dev/null
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -0,0 +1,562 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015-2018 Linaro Limited.
+ *
+ * Author: Tor Jeremiassen <tor@ti.com>
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/err.h>
+#include <linux/list.h>
+#include <stdlib.h>
+#include <opencsd/c_api/opencsd_c_api.h>
+#include <opencsd/etmv4/trc_pkt_types_etmv4.h>
+#include <opencsd/ocsd_if_types.h>
+
+#include "cs-etm.h"
+#include "cs-etm-decoder.h"
+#include "intlist.h"
+#include "util.h"
+
+#define MAX_BUFFER 1024
+
+/* use raw logging */
+#ifdef CS_DEBUG_RAW
+#define CS_LOG_RAW_FRAMES
+#ifdef CS_RAW_PACKED
+#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT | \
+ OCSD_DFRMTR_PACKED_RAW_OUT)
+#else
+#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT)
+#endif
+#endif
+
+#define CS_ETM_INVAL_ADDR 0xdeadbeefdeadbeefUL
+
+struct cs_etm_decoder {
+ void *data;
+ void (*packet_printer)(const char *msg);
+ bool trace_on;
+ dcd_tree_handle_t dcd_tree;
+ cs_etm_mem_cb_type mem_access;
+ ocsd_datapath_resp_t prev_return;
+ u32 packet_count;
+ u32 head;
+ u32 tail;
+ struct cs_etm_packet packet_buffer[MAX_BUFFER];
+};
+
+static u32
+cs_etm_decoder__mem_access(const void *context,
+ const ocsd_vaddr_t address,
+ const ocsd_mem_space_acc_t mem_space __maybe_unused,
+ const u32 req_size,
+ u8 *buffer)
+{
+ struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
+
+ return decoder->mem_access(decoder->data,
+ address,
+ req_size,
+ buffer);
+}
+
+int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
+ u64 start, u64 end,
+ cs_etm_mem_cb_type cb_func)
+{
+ decoder->mem_access = cb_func;
+
+ if (ocsd_dt_add_callback_mem_acc(decoder->dcd_tree, start, end,
+ OCSD_MEM_SPACE_ANY,
+ cs_etm_decoder__mem_access, decoder))
+ return -1;
+
+ return 0;
+}
+
+int cs_etm_decoder__reset(struct cs_etm_decoder *decoder)
+{
+ ocsd_datapath_resp_t dp_ret;
+
+ decoder->prev_return = OCSD_RESP_CONT;
+
+ dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET,
+ 0, 0, NULL, NULL);
+ if (OCSD_DATA_RESP_IS_FATAL(dp_ret))
+ return -1;
+
+ return 0;
+}
+
+int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
+ struct cs_etm_packet *packet)
+{
+ if (!decoder || !packet)
+ return -EINVAL;
+
+ /* Nothing to do, might as well just return */
+ if (decoder->packet_count == 0)
+ return 0;
+ /*
+ * The queueing process in function cs_etm_decoder__buffer_packet()
+ * increments the tail *before* using it. This is somewhat counter
+ * intuitive but it has the advantage of centralizing tail management
+ * at a single location. Because of that we need to follow the same
+ * heuristic with the head, i.e we increment it before using its
+ * value. Otherwise the first element of the packet queue is not
+ * used.
+ */
+ decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1);
+
+ *packet = decoder->packet_buffer[decoder->head];
+
+ decoder->packet_count--;
+
+ return 1;
+}
+
+static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params,
+ ocsd_etmv4_cfg *config)
+{
+ config->reg_configr = params->etmv4.reg_configr;
+ config->reg_traceidr = params->etmv4.reg_traceidr;
+ config->reg_idr0 = params->etmv4.reg_idr0;
+ config->reg_idr1 = params->etmv4.reg_idr1;
+ config->reg_idr2 = params->etmv4.reg_idr2;
+ config->reg_idr8 = params->etmv4.reg_idr8;
+ config->reg_idr9 = 0;
+ config->reg_idr10 = 0;
+ config->reg_idr11 = 0;
+ config->reg_idr12 = 0;
+ config->reg_idr13 = 0;
+ config->arch_ver = ARCH_V8;
+ config->core_prof = profile_CortexA;
+}
+
+static void cs_etm_decoder__print_str_cb(const void *p_context,
+ const char *msg,
+ const int str_len)
+{
+ if (p_context && str_len)
+ ((struct cs_etm_decoder *)p_context)->packet_printer(msg);
+}
+
+static int
+cs_etm_decoder__init_def_logger_printing(struct cs_etm_decoder_params *d_params,
+ struct cs_etm_decoder *decoder)
+{
+ int ret = 0;
+
+ if (d_params->packet_printer == NULL)
+ return -1;
+
+ decoder->packet_printer = d_params->packet_printer;
+
+ /*
+ * Set up a library default logger to process any printers
+ * (packet/raw frame) we add later.
+ */
+ ret = ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1);
+ if (ret != 0)
+ return -1;
+
+ /* no stdout / err / file output */
+ ret = ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL);
+ if (ret != 0)
+ return -1;
+
+ /*
+ * Set the string CB for the default logger, passes strings to
+ * perf print logger.
+ */
+ ret = ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree,
+ (void *)decoder,
+ cs_etm_decoder__print_str_cb);
+ if (ret != 0)
+ ret = -1;
+
+ return 0;
+}
+
+#ifdef CS_LOG_RAW_FRAMES
+static void
+cs_etm_decoder__init_raw_frame_logging(struct cs_etm_decoder_params *d_params,
+ struct cs_etm_decoder *decoder)
+{
+ /* Only log these during a --dump operation */
+ if (d_params->operation == CS_ETM_OPERATION_PRINT) {
+ /* set up a library default logger to process the
+ * raw frame printer we add later
+ */
+ ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1);
+
+ /* no stdout / err / file output */
+ ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL);
+
+ /* set the string CB for the default logger,
+ * passes strings to perf print logger.
+ */
+ ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree,
+ (void *)decoder,
+ cs_etm_decoder__print_str_cb);
+
+ /* use the built in library printer for the raw frames */
+ ocsd_dt_set_raw_frame_printer(decoder->dcd_tree,
+ CS_RAW_DEBUG_FLAGS);
+ }
+}
+#else
+static void
+cs_etm_decoder__init_raw_frame_logging(
+ struct cs_etm_decoder_params *d_params __maybe_unused,
+ struct cs_etm_decoder *decoder __maybe_unused)
+{
+}
+#endif
+
+static int cs_etm_decoder__create_packet_printer(struct cs_etm_decoder *decoder,
+ const char *decoder_name,
+ void *trace_config)
+{
+ u8 csid;
+
+ if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name,
+ OCSD_CREATE_FLG_PACKET_PROC,
+ trace_config, &csid))
+ return -1;
+
+ if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0))
+ return -1;
+
+ return 0;
+}
+
+static int
+cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params,
+ struct cs_etm_decoder *decoder)
+{
+ const char *decoder_name;
+ ocsd_etmv4_cfg trace_config_etmv4;
+ void *trace_config;
+
+ switch (t_params->protocol) {
+ case CS_ETM_PROTO_ETMV4i:
+ cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
+ decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
+ trace_config = &trace_config_etmv4;
+ break;
+ default:
+ return -1;
+ }
+
+ return cs_etm_decoder__create_packet_printer(decoder,
+ decoder_name,
+ trace_config);
+}
+
+static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
+{
+ int i;
+
+ decoder->head = 0;
+ decoder->tail = 0;
+ decoder->packet_count = 0;
+ for (i = 0; i < MAX_BUFFER; i++) {
+ decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
+ decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
+ decoder->packet_buffer[i].last_instr_taken_branch = false;
+ decoder->packet_buffer[i].exc = false;
+ decoder->packet_buffer[i].exc_ret = false;
+ decoder->packet_buffer[i].cpu = INT_MIN;
+ }
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
+ const u8 trace_chan_id,
+ enum cs_etm_sample_type sample_type)
+{
+ u32 et = 0;
+ int cpu;
+
+ if (decoder->packet_count >= MAX_BUFFER - 1)
+ return OCSD_RESP_FATAL_SYS_ERR;
+
+ if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
+ return OCSD_RESP_FATAL_SYS_ERR;
+
+ et = decoder->tail;
+ et = (et + 1) & (MAX_BUFFER - 1);
+ decoder->tail = et;
+ decoder->packet_count++;
+
+ decoder->packet_buffer[et].sample_type = sample_type;
+ decoder->packet_buffer[et].exc = false;
+ decoder->packet_buffer[et].exc_ret = false;
+ decoder->packet_buffer[et].cpu = cpu;
+ decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
+ decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
+
+ if (decoder->packet_count == MAX_BUFFER - 1)
+ return OCSD_RESP_WAIT;
+
+ return OCSD_RESP_CONT;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
+ const ocsd_generic_trace_elem *elem,
+ const uint8_t trace_chan_id)
+{
+ int ret = 0;
+ struct cs_etm_packet *packet;
+
+ ret = cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ CS_ETM_RANGE);
+ if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
+ return ret;
+
+ packet = &decoder->packet_buffer[decoder->tail];
+
+ packet->start_addr = elem->st_addr;
+ packet->end_addr = elem->en_addr;
+ switch (elem->last_i_type) {
+ case OCSD_INSTR_BR:
+ case OCSD_INSTR_BR_INDIRECT:
+ packet->last_instr_taken_branch = elem->last_instr_exec;
+ break;
+ case OCSD_INSTR_ISB:
+ case OCSD_INSTR_DSB_DMB:
+ case OCSD_INSTR_OTHER:
+ default:
+ packet->last_instr_taken_branch = false;
+ break;
+ }
+
+ return ret;
+}
+
+static ocsd_datapath_resp_t
+cs_etm_decoder__buffer_trace_on(struct cs_etm_decoder *decoder,
+ const uint8_t trace_chan_id)
+{
+ return cs_etm_decoder__buffer_packet(decoder, trace_chan_id,
+ CS_ETM_TRACE_ON);
+}
+
+static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
+ const void *context,
+ const ocsd_trc_index_t indx __maybe_unused,
+ const u8 trace_chan_id __maybe_unused,
+ const ocsd_generic_trace_elem *elem)
+{
+ ocsd_datapath_resp_t resp = OCSD_RESP_CONT;
+ struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context;
+
+ switch (elem->elem_type) {
+ case OCSD_GEN_TRC_ELEM_UNKNOWN:
+ break;
+ case OCSD_GEN_TRC_ELEM_NO_SYNC:
+ decoder->trace_on = false;
+ break;
+ case OCSD_GEN_TRC_ELEM_TRACE_ON:
+ resp = cs_etm_decoder__buffer_trace_on(decoder,
+ trace_chan_id);
+ decoder->trace_on = true;
+ break;
+ case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
+ resp = cs_etm_decoder__buffer_range(decoder, elem,
+ trace_chan_id);
+ break;
+ case OCSD_GEN_TRC_ELEM_EXCEPTION:
+ decoder->packet_buffer[decoder->tail].exc = true;
+ break;
+ case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
+ decoder->packet_buffer[decoder->tail].exc_ret = true;
+ break;
+ case OCSD_GEN_TRC_ELEM_PE_CONTEXT:
+ case OCSD_GEN_TRC_ELEM_EO_TRACE:
+ case OCSD_GEN_TRC_ELEM_ADDR_NACC:
+ case OCSD_GEN_TRC_ELEM_TIMESTAMP:
+ case OCSD_GEN_TRC_ELEM_CYCLE_COUNT:
+ case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN:
+ case OCSD_GEN_TRC_ELEM_EVENT:
+ case OCSD_GEN_TRC_ELEM_SWTRACE:
+ case OCSD_GEN_TRC_ELEM_CUSTOM:
+ default:
+ break;
+ }
+
+ return resp;
+}
+
+static int cs_etm_decoder__create_etm_packet_decoder(
+ struct cs_etm_trace_params *t_params,
+ struct cs_etm_decoder *decoder)
+{
+ const char *decoder_name;
+ ocsd_etmv4_cfg trace_config_etmv4;
+ void *trace_config;
+ u8 csid;
+
+ switch (t_params->protocol) {
+ case CS_ETM_PROTO_ETMV4i:
+ cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4);
+ decoder_name = OCSD_BUILTIN_DCD_ETMV4I;
+ trace_config = &trace_config_etmv4;
+ break;
+ default:
+ return -1;
+ }
+
+ if (ocsd_dt_create_decoder(decoder->dcd_tree,
+ decoder_name,
+ OCSD_CREATE_FLG_FULL_DECODER,
+ trace_config, &csid))
+ return -1;
+
+ if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree,
+ cs_etm_decoder__gen_trace_elem_printer,
+ decoder))
+ return -1;
+
+ return 0;
+}
+
+static int
+cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
+ struct cs_etm_trace_params *t_params,
+ struct cs_etm_decoder *decoder)
+{
+ if (d_params->operation == CS_ETM_OPERATION_PRINT)
+ return cs_etm_decoder__create_etm_packet_printer(t_params,
+ decoder);
+ else if (d_params->operation == CS_ETM_OPERATION_DECODE)
+ return cs_etm_decoder__create_etm_packet_decoder(t_params,
+ decoder);
+
+ return -1;
+}
+
+struct cs_etm_decoder *
+cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params,
+ struct cs_etm_trace_params t_params[])
+{
+ struct cs_etm_decoder *decoder;
+ ocsd_dcd_tree_src_t format;
+ u32 flags;
+ int i, ret;
+
+ if ((!t_params) || (!d_params))
+ return NULL;
+
+ decoder = zalloc(sizeof(*decoder));
+
+ if (!decoder)
+ return NULL;
+
+ decoder->data = d_params->data;
+ decoder->prev_return = OCSD_RESP_CONT;
+ cs_etm_decoder__clear_buffer(decoder);
+ format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED :
+ OCSD_TRC_SRC_SINGLE);
+ flags = 0;
+ flags |= (d_params->fsyncs ? OCSD_DFRMTR_HAS_FSYNCS : 0);
+ flags |= (d_params->hsyncs ? OCSD_DFRMTR_HAS_HSYNCS : 0);
+ flags |= (d_params->frame_aligned ? OCSD_DFRMTR_FRAME_MEM_ALIGN : 0);
+
+ /*
+ * Drivers may add barrier frames when used with perf, set up to
+ * handle this. Barriers const of FSYNC packet repeated 4 times.
+ */
+ flags |= OCSD_DFRMTR_RESET_ON_4X_FSYNC;
+
+ /* Create decode tree for the data source */
+ decoder->dcd_tree = ocsd_create_dcd_tree(format, flags);
+
+ if (decoder->dcd_tree == 0)
+ goto err_free_decoder;
+
+ /* init library print logging support */
+ ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder);
+ if (ret != 0)
+ goto err_free_decoder_tree;
+
+ /* init raw frame logging if required */
+ cs_etm_decoder__init_raw_frame_logging(d_params, decoder);
+
+ for (i = 0; i < num_cpu; i++) {
+ ret = cs_etm_decoder__create_etm_decoder(d_params,
+ &t_params[i],
+ decoder);
+ if (ret != 0)
+ goto err_free_decoder_tree;
+ }
+
+ return decoder;
+
+err_free_decoder_tree:
+ ocsd_destroy_dcd_tree(decoder->dcd_tree);
+err_free_decoder:
+ free(decoder);
+ return NULL;
+}
+
+int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
+ u64 indx, const u8 *buf,
+ size_t len, size_t *consumed)
+{
+ int ret = 0;
+ ocsd_datapath_resp_t cur = OCSD_RESP_CONT;
+ ocsd_datapath_resp_t prev_return = decoder->prev_return;
+ size_t processed = 0;
+ u32 count;
+
+ while (processed < len) {
+ if (OCSD_DATA_RESP_IS_WAIT(prev_return)) {
+ cur = ocsd_dt_process_data(decoder->dcd_tree,
+ OCSD_OP_FLUSH,
+ 0,
+ 0,
+ NULL,
+ NULL);
+ } else if (OCSD_DATA_RESP_IS_CONT(prev_return)) {
+ cur = ocsd_dt_process_data(decoder->dcd_tree,
+ OCSD_OP_DATA,
+ indx + processed,
+ len - processed,
+ &buf[processed],
+ &count);
+ processed += count;
+ } else {
+ ret = -EINVAL;
+ break;
+ }
+
+ /*
+ * Return to the input code if the packet buffer is full.
+ * Flushing will get done once the packet buffer has been
+ * processed.
+ */
+ if (OCSD_DATA_RESP_IS_WAIT(cur))
+ break;
+
+ prev_return = cur;
+ }
+
+ decoder->prev_return = cur;
+ *consumed = processed;
+
+ return ret;
+}
+
+void cs_etm_decoder__free(struct cs_etm_decoder *decoder)
+{
+ if (!decoder)
+ return;
+
+ ocsd_destroy_dcd_tree(decoder->dcd_tree);
+ decoder->dcd_tree = NULL;
+ free(decoder);
+}
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
new file mode 100644
index 000000000..612b5755f
--- /dev/null
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -0,0 +1,108 @@
+/*
+ * SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright(C) 2015-2018 Linaro Limited.
+ *
+ * Author: Tor Jeremiassen <tor@ti.com>
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef INCLUDE__CS_ETM_DECODER_H__
+#define INCLUDE__CS_ETM_DECODER_H__
+
+#include <linux/types.h>
+#include <stdio.h>
+
+struct cs_etm_decoder;
+
+struct cs_etm_buffer {
+ const unsigned char *buf;
+ size_t len;
+ u64 offset;
+ u64 ref_timestamp;
+};
+
+enum cs_etm_sample_type {
+ CS_ETM_EMPTY = 0,
+ CS_ETM_RANGE = 1 << 0,
+ CS_ETM_TRACE_ON = 1 << 1,
+};
+
+struct cs_etm_packet {
+ enum cs_etm_sample_type sample_type;
+ u64 start_addr;
+ u64 end_addr;
+ u8 last_instr_taken_branch;
+ u8 exc;
+ u8 exc_ret;
+ int cpu;
+};
+
+struct cs_etm_queue;
+
+typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64,
+ size_t, u8 *);
+
+struct cs_etmv4_trace_params {
+ u32 reg_idr0;
+ u32 reg_idr1;
+ u32 reg_idr2;
+ u32 reg_idr8;
+ u32 reg_configr;
+ u32 reg_traceidr;
+};
+
+struct cs_etm_trace_params {
+ int protocol;
+ union {
+ struct cs_etmv4_trace_params etmv4;
+ };
+};
+
+struct cs_etm_decoder_params {
+ int operation;
+ void (*packet_printer)(const char *msg);
+ cs_etm_mem_cb_type mem_acc_cb;
+ u8 formatted;
+ u8 fsyncs;
+ u8 hsyncs;
+ u8 frame_aligned;
+ void *data;
+};
+
+/*
+ * The following enums are indexed starting with 1 to align with the
+ * open source coresight trace decoder library.
+ */
+enum {
+ CS_ETM_PROTO_ETMV3 = 1,
+ CS_ETM_PROTO_ETMV4i,
+ CS_ETM_PROTO_ETMV4d,
+};
+
+enum {
+ CS_ETM_OPERATION_PRINT = 1,
+ CS_ETM_OPERATION_DECODE,
+};
+
+int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
+ u64 indx, const u8 *buf,
+ size_t len, size_t *consumed);
+
+struct cs_etm_decoder *
+cs_etm_decoder__new(int num_cpu,
+ struct cs_etm_decoder_params *d_params,
+ struct cs_etm_trace_params t_params[]);
+
+void cs_etm_decoder__free(struct cs_etm_decoder *decoder);
+
+int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder,
+ u64 start, u64 end,
+ cs_etm_mem_cb_type cb_func);
+
+int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder,
+ struct cs_etm_packet *packet);
+
+int cs_etm_decoder__reset(struct cs_etm_decoder *decoder);
+
+#endif /* INCLUDE__CS_ETM_DECODER_H__ */
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
new file mode 100644
index 000000000..3275b8dc9
--- /dev/null
+++ b/tools/perf/util/cs-etm.c
@@ -0,0 +1,1507 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright(C) 2015-2018 Linaro Limited.
+ *
+ * Author: Tor Jeremiassen <tor@ti.com>
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#include <linux/bitops.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/types.h>
+
+#include <stdlib.h>
+
+#include "auxtrace.h"
+#include "color.h"
+#include "cs-etm.h"
+#include "cs-etm-decoder/cs-etm-decoder.h"
+#include "debug.h"
+#include "evlist.h"
+#include "intlist.h"
+#include "machine.h"
+#include "map.h"
+#include "perf.h"
+#include "thread.h"
+#include "thread_map.h"
+#include "thread-stack.h"
+#include "util.h"
+
+#define MAX_TIMESTAMP (~0ULL)
+
+/*
+ * A64 instructions are always 4 bytes
+ *
+ * Only A64 is supported, so can use this constant for converting between
+ * addresses and instruction counts, calculting offsets etc
+ */
+#define A64_INSTR_SIZE 4
+
+struct cs_etm_auxtrace {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ struct itrace_synth_opts synth_opts;
+ struct perf_session *session;
+ struct machine *machine;
+ struct thread *unknown_thread;
+
+ u8 timeless_decoding;
+ u8 snapshot_mode;
+ u8 data_queued;
+ u8 sample_branches;
+ u8 sample_instructions;
+
+ int num_cpu;
+ u32 auxtrace_type;
+ u64 branches_sample_type;
+ u64 branches_id;
+ u64 instructions_sample_type;
+ u64 instructions_sample_period;
+ u64 instructions_id;
+ u64 **metadata;
+ u64 kernel_start;
+ unsigned int pmu_type;
+};
+
+struct cs_etm_queue {
+ struct cs_etm_auxtrace *etm;
+ struct thread *thread;
+ struct cs_etm_decoder *decoder;
+ struct auxtrace_buffer *buffer;
+ const struct cs_etm_state *state;
+ union perf_event *event_buf;
+ unsigned int queue_nr;
+ pid_t pid, tid;
+ int cpu;
+ u64 time;
+ u64 timestamp;
+ u64 offset;
+ u64 period_instructions;
+ struct branch_stack *last_branch;
+ struct branch_stack *last_branch_rb;
+ size_t last_branch_pos;
+ struct cs_etm_packet *prev_packet;
+ struct cs_etm_packet *packet;
+};
+
+/* RB tree for quick conversion between traceID and metadata pointers */
+static struct intlist *traceid_list;
+
+static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
+static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
+ pid_t tid, u64 time_);
+
+int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
+{
+ struct int_node *inode;
+ u64 *metadata;
+
+ inode = intlist__find(traceid_list, trace_chan_id);
+ if (!inode)
+ return -EINVAL;
+
+ metadata = inode->priv;
+ *cpu = (int)metadata[CS_ETM_CPU];
+ return 0;
+}
+
+static void cs_etm__packet_dump(const char *pkt_string)
+{
+ const char *color = PERF_COLOR_BLUE;
+ int len = strlen(pkt_string);
+
+ if (len && (pkt_string[len-1] == '\n'))
+ color_fprintf(stdout, color, " %s", pkt_string);
+ else
+ color_fprintf(stdout, color, " %s\n", pkt_string);
+
+ fflush(stdout);
+}
+
+static void cs_etm__dump_event(struct cs_etm_auxtrace *etm,
+ struct auxtrace_buffer *buffer)
+{
+ int i, ret;
+ const char *color = PERF_COLOR_BLUE;
+ struct cs_etm_decoder_params d_params;
+ struct cs_etm_trace_params *t_params;
+ struct cs_etm_decoder *decoder;
+ size_t buffer_used = 0;
+
+ fprintf(stdout, "\n");
+ color_fprintf(stdout, color,
+ ". ... CoreSight ETM Trace data: size %zu bytes\n",
+ buffer->size);
+
+ /* Use metadata to fill in trace parameters for trace decoder */
+ t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
+ for (i = 0; i < etm->num_cpu; i++) {
+ t_params[i].protocol = CS_ETM_PROTO_ETMV4i;
+ t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0];
+ t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1];
+ t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2];
+ t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8];
+ t_params[i].etmv4.reg_configr =
+ etm->metadata[i][CS_ETMV4_TRCCONFIGR];
+ t_params[i].etmv4.reg_traceidr =
+ etm->metadata[i][CS_ETMV4_TRCTRACEIDR];
+ }
+
+ /* Set decoder parameters to simply print the trace packets */
+ d_params.packet_printer = cs_etm__packet_dump;
+ d_params.operation = CS_ETM_OPERATION_PRINT;
+ d_params.formatted = true;
+ d_params.fsyncs = false;
+ d_params.hsyncs = false;
+ d_params.frame_aligned = true;
+
+ decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
+
+ zfree(&t_params);
+
+ if (!decoder)
+ return;
+ do {
+ size_t consumed;
+
+ ret = cs_etm_decoder__process_data_block(
+ decoder, buffer->offset,
+ &((u8 *)buffer->data)[buffer_used],
+ buffer->size - buffer_used, &consumed);
+ if (ret)
+ break;
+
+ buffer_used += consumed;
+ } while (buffer_used < buffer->size);
+
+ cs_etm_decoder__free(decoder);
+}
+
+static int cs_etm__flush_events(struct perf_session *session,
+ struct perf_tool *tool)
+{
+ int ret;
+ struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events)
+ return -EINVAL;
+
+ if (!etm->timeless_decoding)
+ return -EINVAL;
+
+ ret = cs_etm__update_queues(etm);
+
+ if (ret < 0)
+ return ret;
+
+ return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1);
+}
+
+static void cs_etm__free_queue(void *priv)
+{
+ struct cs_etm_queue *etmq = priv;
+
+ if (!etmq)
+ return;
+
+ thread__zput(etmq->thread);
+ cs_etm_decoder__free(etmq->decoder);
+ zfree(&etmq->event_buf);
+ zfree(&etmq->last_branch);
+ zfree(&etmq->last_branch_rb);
+ zfree(&etmq->prev_packet);
+ zfree(&etmq->packet);
+ free(etmq);
+}
+
+static void cs_etm__free_events(struct perf_session *session)
+{
+ unsigned int i;
+ struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+ struct auxtrace_queues *queues = &aux->queues;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ cs_etm__free_queue(queues->queue_array[i].priv);
+ queues->queue_array[i].priv = NULL;
+ }
+
+ auxtrace_queues__free(queues);
+}
+
+static void cs_etm__free(struct perf_session *session)
+{
+ int i;
+ struct int_node *inode, *tmp;
+ struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+ cs_etm__free_events(session);
+ session->auxtrace = NULL;
+
+ /* First remove all traceID/metadata nodes for the RB tree */
+ intlist__for_each_entry_safe(inode, tmp, traceid_list)
+ intlist__remove(traceid_list, inode);
+ /* Then the RB tree itself */
+ intlist__delete(traceid_list);
+
+ for (i = 0; i < aux->num_cpu; i++)
+ zfree(&aux->metadata[i]);
+
+ thread__zput(aux->unknown_thread);
+ zfree(&aux->metadata);
+ zfree(&aux);
+}
+
+static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
+{
+ struct machine *machine;
+
+ machine = etmq->etm->machine;
+
+ if (address >= etmq->etm->kernel_start) {
+ if (machine__is_host(machine))
+ return PERF_RECORD_MISC_KERNEL;
+ else
+ return PERF_RECORD_MISC_GUEST_KERNEL;
+ } else {
+ if (machine__is_host(machine))
+ return PERF_RECORD_MISC_USER;
+ else if (perf_guest)
+ return PERF_RECORD_MISC_GUEST_USER;
+ else
+ return PERF_RECORD_MISC_HYPERVISOR;
+ }
+}
+
+static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address,
+ size_t size, u8 *buffer)
+{
+ u8 cpumode;
+ u64 offset;
+ int len;
+ struct thread *thread;
+ struct machine *machine;
+ struct addr_location al;
+
+ if (!etmq)
+ return -1;
+
+ machine = etmq->etm->machine;
+ cpumode = cs_etm__cpu_mode(etmq, address);
+
+ thread = etmq->thread;
+ if (!thread) {
+ if (cpumode != PERF_RECORD_MISC_KERNEL)
+ return -EINVAL;
+ thread = etmq->etm->unknown_thread;
+ }
+
+ if (!thread__find_map(thread, cpumode, address, &al) || !al.map->dso)
+ return 0;
+
+ if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
+ dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE))
+ return 0;
+
+ offset = al.map->map_ip(al.map, address);
+
+ map__load(al.map);
+
+ len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size);
+
+ if (len <= 0)
+ return 0;
+
+ return len;
+}
+
+static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
+ unsigned int queue_nr)
+{
+ int i;
+ struct cs_etm_decoder_params d_params;
+ struct cs_etm_trace_params *t_params;
+ struct cs_etm_queue *etmq;
+ size_t szp = sizeof(struct cs_etm_packet);
+
+ etmq = zalloc(sizeof(*etmq));
+ if (!etmq)
+ return NULL;
+
+ etmq->packet = zalloc(szp);
+ if (!etmq->packet)
+ goto out_free;
+
+ if (etm->synth_opts.last_branch || etm->sample_branches) {
+ etmq->prev_packet = zalloc(szp);
+ if (!etmq->prev_packet)
+ goto out_free;
+ }
+
+ if (etm->synth_opts.last_branch) {
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += etm->synth_opts.last_branch_sz *
+ sizeof(struct branch_entry);
+ etmq->last_branch = zalloc(sz);
+ if (!etmq->last_branch)
+ goto out_free;
+ etmq->last_branch_rb = zalloc(sz);
+ if (!etmq->last_branch_rb)
+ goto out_free;
+ }
+
+ etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!etmq->event_buf)
+ goto out_free;
+
+ etmq->etm = etm;
+ etmq->queue_nr = queue_nr;
+ etmq->pid = -1;
+ etmq->tid = -1;
+ etmq->cpu = -1;
+
+ /* Use metadata to fill in trace parameters for trace decoder */
+ t_params = zalloc(sizeof(*t_params) * etm->num_cpu);
+
+ if (!t_params)
+ goto out_free;
+
+ for (i = 0; i < etm->num_cpu; i++) {
+ t_params[i].protocol = CS_ETM_PROTO_ETMV4i;
+ t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0];
+ t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1];
+ t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2];
+ t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8];
+ t_params[i].etmv4.reg_configr =
+ etm->metadata[i][CS_ETMV4_TRCCONFIGR];
+ t_params[i].etmv4.reg_traceidr =
+ etm->metadata[i][CS_ETMV4_TRCTRACEIDR];
+ }
+
+ /* Set decoder parameters to simply print the trace packets */
+ d_params.packet_printer = cs_etm__packet_dump;
+ d_params.operation = CS_ETM_OPERATION_DECODE;
+ d_params.formatted = true;
+ d_params.fsyncs = false;
+ d_params.hsyncs = false;
+ d_params.frame_aligned = true;
+ d_params.data = etmq;
+
+ etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params);
+
+ zfree(&t_params);
+
+ if (!etmq->decoder)
+ goto out_free;
+
+ /*
+ * Register a function to handle all memory accesses required by
+ * the trace decoder library.
+ */
+ if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
+ 0x0L, ((u64) -1L),
+ cs_etm__mem_access))
+ goto out_free_decoder;
+
+ etmq->offset = 0;
+ etmq->period_instructions = 0;
+
+ return etmq;
+
+out_free_decoder:
+ cs_etm_decoder__free(etmq->decoder);
+out_free:
+ zfree(&etmq->event_buf);
+ zfree(&etmq->last_branch);
+ zfree(&etmq->last_branch_rb);
+ zfree(&etmq->prev_packet);
+ zfree(&etmq->packet);
+ free(etmq);
+
+ return NULL;
+}
+
+static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr)
+{
+ struct cs_etm_queue *etmq = queue->priv;
+
+ if (list_empty(&queue->head) || etmq)
+ return 0;
+
+ etmq = cs_etm__alloc_queue(etm, queue_nr);
+
+ if (!etmq)
+ return -ENOMEM;
+
+ queue->priv = etmq;
+
+ if (queue->cpu != -1)
+ etmq->cpu = queue->cpu;
+
+ etmq->tid = queue->tid;
+
+ return 0;
+}
+
+static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < etm->queues.nr_queues; i++) {
+ ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int cs_etm__update_queues(struct cs_etm_auxtrace *etm)
+{
+ if (etm->queues.new_data) {
+ etm->queues.new_data = false;
+ return cs_etm__setup_queues(etm);
+ }
+
+ return 0;
+}
+
+static inline void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq)
+{
+ struct branch_stack *bs_src = etmq->last_branch_rb;
+ struct branch_stack *bs_dst = etmq->last_branch;
+ size_t nr = 0;
+
+ /*
+ * Set the number of records before early exit: ->nr is used to
+ * determine how many branches to copy from ->entries.
+ */
+ bs_dst->nr = bs_src->nr;
+
+ /*
+ * Early exit when there is nothing to copy.
+ */
+ if (!bs_src->nr)
+ return;
+
+ /*
+ * As bs_src->entries is a circular buffer, we need to copy from it in
+ * two steps. First, copy the branches from the most recently inserted
+ * branch ->last_branch_pos until the end of bs_src->entries buffer.
+ */
+ nr = etmq->etm->synth_opts.last_branch_sz - etmq->last_branch_pos;
+ memcpy(&bs_dst->entries[0],
+ &bs_src->entries[etmq->last_branch_pos],
+ sizeof(struct branch_entry) * nr);
+
+ /*
+ * If we wrapped around at least once, the branches from the beginning
+ * of the bs_src->entries buffer and until the ->last_branch_pos element
+ * are older valid branches: copy them over. The total number of
+ * branches copied over will be equal to the number of branches asked by
+ * the user in last_branch_sz.
+ */
+ if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
+ memcpy(&bs_dst->entries[nr],
+ &bs_src->entries[0],
+ sizeof(struct branch_entry) * etmq->last_branch_pos);
+ }
+}
+
+static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
+{
+ etmq->last_branch_pos = 0;
+ etmq->last_branch_rb->nr = 0;
+}
+
+static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
+{
+ /* Returns 0 for the CS_ETM_TRACE_ON packet */
+ if (packet->sample_type == CS_ETM_TRACE_ON)
+ return 0;
+
+ /*
+ * The packet records the execution range with an exclusive end address
+ *
+ * A64 instructions are constant size, so the last executed
+ * instruction is A64_INSTR_SIZE before the end address
+ * Will need to do instruction level decode for T32 instructions as
+ * they can be variable size (not yet supported).
+ */
+ return packet->end_addr - A64_INSTR_SIZE;
+}
+
+static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
+{
+ /* Returns 0 for the CS_ETM_TRACE_ON packet */
+ if (packet->sample_type == CS_ETM_TRACE_ON)
+ return 0;
+
+ return packet->start_addr;
+}
+
+static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
+{
+ /*
+ * Only A64 instructions are currently supported, so can get
+ * instruction count by dividing.
+ * Will need to do instruction level decode for T32 instructions as
+ * they can be variable size (not yet supported).
+ */
+ return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
+}
+
+static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
+ u64 offset)
+{
+ /*
+ * Only A64 instructions are currently supported, so can get
+ * instruction address by muliplying.
+ * Will need to do instruction level decode for T32 instructions as
+ * they can be variable size (not yet supported).
+ */
+ return packet->start_addr + offset * A64_INSTR_SIZE;
+}
+
+static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
+{
+ struct branch_stack *bs = etmq->last_branch_rb;
+ struct branch_entry *be;
+
+ /*
+ * The branches are recorded in a circular buffer in reverse
+ * chronological order: we start recording from the last element of the
+ * buffer down. After writing the first element of the stack, move the
+ * insert position back to the end of the buffer.
+ */
+ if (!etmq->last_branch_pos)
+ etmq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
+
+ etmq->last_branch_pos -= 1;
+
+ be = &bs->entries[etmq->last_branch_pos];
+ be->from = cs_etm__last_executed_instr(etmq->prev_packet);
+ be->to = cs_etm__first_executed_instr(etmq->packet);
+ /* No support for mispredict */
+ be->flags.mispred = 0;
+ be->flags.predicted = 1;
+
+ /*
+ * Increment bs->nr until reaching the number of last branches asked by
+ * the user on the command line.
+ */
+ if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
+ bs->nr += 1;
+}
+
+static int cs_etm__inject_event(union perf_event *event,
+ struct perf_sample *sample, u64 type)
+{
+ event->header.size = perf_event__sample_event_size(sample, type, 0);
+ return perf_event__synthesize_sample(event, type, 0, sample);
+}
+
+
+static int
+cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq)
+{
+ struct auxtrace_buffer *aux_buffer = etmq->buffer;
+ struct auxtrace_buffer *old_buffer = aux_buffer;
+ struct auxtrace_queue *queue;
+
+ queue = &etmq->etm->queues.queue_array[etmq->queue_nr];
+
+ aux_buffer = auxtrace_buffer__next(queue, aux_buffer);
+
+ /* If no more data, drop the previous auxtrace_buffer and return */
+ if (!aux_buffer) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ buff->len = 0;
+ return 0;
+ }
+
+ etmq->buffer = aux_buffer;
+
+ /* If the aux_buffer doesn't have data associated, try to load it */
+ if (!aux_buffer->data) {
+ /* get the file desc associated with the perf data file */
+ int fd = perf_data__fd(etmq->etm->session->data);
+
+ aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd);
+ if (!aux_buffer->data)
+ return -ENOMEM;
+ }
+
+ /* If valid, drop the previous buffer */
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+
+ buff->offset = aux_buffer->offset;
+ buff->len = aux_buffer->size;
+ buff->buf = aux_buffer->data;
+
+ buff->ref_timestamp = aux_buffer->reference;
+
+ return buff->len;
+}
+
+static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm,
+ struct auxtrace_queue *queue)
+{
+ struct cs_etm_queue *etmq = queue->priv;
+
+ /* CPU-wide tracing isn't supported yet */
+ if (queue->tid == -1)
+ return;
+
+ if ((!etmq->thread) && (etmq->tid != -1))
+ etmq->thread = machine__find_thread(etm->machine, -1,
+ etmq->tid);
+
+ if (etmq->thread) {
+ etmq->pid = etmq->thread->pid_;
+ if (queue->cpu == -1)
+ etmq->cpu = etmq->thread->cpu;
+ }
+}
+
+static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
+ u64 addr, u64 period)
+{
+ int ret = 0;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ union perf_event *event = etmq->event_buf;
+ struct perf_sample sample = {.ip = 0,};
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = cs_etm__cpu_mode(etmq, addr);
+ event->sample.header.size = sizeof(struct perf_event_header);
+
+ sample.ip = addr;
+ sample.pid = etmq->pid;
+ sample.tid = etmq->tid;
+ sample.id = etmq->etm->instructions_id;
+ sample.stream_id = etmq->etm->instructions_id;
+ sample.period = period;
+ sample.cpu = etmq->packet->cpu;
+ sample.flags = 0;
+ sample.insn_len = 1;
+ sample.cpumode = event->sample.header.misc;
+
+ if (etm->synth_opts.last_branch) {
+ cs_etm__copy_last_branch_rb(etmq);
+ sample.branch_stack = etmq->last_branch;
+ }
+
+ if (etm->synth_opts.inject) {
+ ret = cs_etm__inject_event(event, &sample,
+ etm->instructions_sample_type);
+ if (ret)
+ return ret;
+ }
+
+ ret = perf_session__deliver_synth_event(etm->session, event, &sample);
+
+ if (ret)
+ pr_err(
+ "CS ETM Trace: failed to deliver instruction event, error %d\n",
+ ret);
+
+ if (etm->synth_opts.last_branch)
+ cs_etm__reset_last_branch_rb(etmq);
+
+ return ret;
+}
+
+/*
+ * The cs etm packet encodes an instruction range between a branch target
+ * and the next taken branch. Generate sample accordingly.
+ */
+static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq)
+{
+ int ret = 0;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ struct perf_sample sample = {.ip = 0,};
+ union perf_event *event = etmq->event_buf;
+ struct dummy_branch_stack {
+ u64 nr;
+ struct branch_entry entries;
+ } dummy_bs;
+ u64 ip;
+
+ ip = cs_etm__last_executed_instr(etmq->prev_packet);
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = cs_etm__cpu_mode(etmq, ip);
+ event->sample.header.size = sizeof(struct perf_event_header);
+
+ sample.ip = ip;
+ sample.pid = etmq->pid;
+ sample.tid = etmq->tid;
+ sample.addr = cs_etm__first_executed_instr(etmq->packet);
+ sample.id = etmq->etm->branches_id;
+ sample.stream_id = etmq->etm->branches_id;
+ sample.period = 1;
+ sample.cpu = etmq->packet->cpu;
+ sample.flags = 0;
+ sample.cpumode = event->sample.header.misc;
+
+ /*
+ * perf report cannot handle events without a branch stack
+ */
+ if (etm->synth_opts.last_branch) {
+ dummy_bs = (struct dummy_branch_stack){
+ .nr = 1,
+ .entries = {
+ .from = sample.ip,
+ .to = sample.addr,
+ },
+ };
+ sample.branch_stack = (struct branch_stack *)&dummy_bs;
+ }
+
+ if (etm->synth_opts.inject) {
+ ret = cs_etm__inject_event(event, &sample,
+ etm->branches_sample_type);
+ if (ret)
+ return ret;
+ }
+
+ ret = perf_session__deliver_synth_event(etm->session, event, &sample);
+
+ if (ret)
+ pr_err(
+ "CS ETM Trace: failed to deliver instruction event, error %d\n",
+ ret);
+
+ return ret;
+}
+
+struct cs_etm_synth {
+ struct perf_tool dummy_tool;
+ struct perf_session *session;
+};
+
+static int cs_etm__event_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct cs_etm_synth *cs_etm_synth =
+ container_of(tool, struct cs_etm_synth, dummy_tool);
+
+ return perf_session__deliver_synth_event(cs_etm_synth->session,
+ event, NULL);
+}
+
+static int cs_etm__synth_event(struct perf_session *session,
+ struct perf_event_attr *attr, u64 id)
+{
+ struct cs_etm_synth cs_etm_synth;
+
+ memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth));
+ cs_etm_synth.session = session;
+
+ return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1,
+ &id, cs_etm__event_synth);
+}
+
+static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
+ struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr;
+ bool found = false;
+ u64 id;
+ int err;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == etm->pmu_type) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_debug("No selected events with CoreSight Trace data\n");
+ return 0;
+ }
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.size = sizeof(struct perf_event_attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+ PERF_SAMPLE_PERIOD;
+ if (etm->timeless_decoding)
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+ else
+ attr.sample_type |= PERF_SAMPLE_TIME;
+
+ attr.exclude_user = evsel->attr.exclude_user;
+ attr.exclude_kernel = evsel->attr.exclude_kernel;
+ attr.exclude_hv = evsel->attr.exclude_hv;
+ attr.exclude_host = evsel->attr.exclude_host;
+ attr.exclude_guest = evsel->attr.exclude_guest;
+ attr.sample_id_all = evsel->attr.sample_id_all;
+ attr.read_format = evsel->attr.read_format;
+
+ /* create new id val to be a fixed offset from evsel id */
+ id = evsel->id[0] + 1000000000;
+
+ if (!id)
+ id = 1;
+
+ if (etm->synth_opts.branches) {
+ attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+ attr.sample_period = 1;
+ attr.sample_type |= PERF_SAMPLE_ADDR;
+ err = cs_etm__synth_event(session, &attr, id);
+ if (err)
+ return err;
+ etm->sample_branches = true;
+ etm->branches_sample_type = attr.sample_type;
+ etm->branches_id = id;
+ id += 1;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
+ }
+
+ if (etm->synth_opts.last_branch)
+ attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+
+ if (etm->synth_opts.instructions) {
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ attr.sample_period = etm->synth_opts.period;
+ etm->instructions_sample_period = attr.sample_period;
+ err = cs_etm__synth_event(session, &attr, id);
+ if (err)
+ return err;
+ etm->sample_instructions = true;
+ etm->instructions_sample_type = attr.sample_type;
+ etm->instructions_id = id;
+ id += 1;
+ }
+
+ return 0;
+}
+
+static int cs_etm__sample(struct cs_etm_queue *etmq)
+{
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ struct cs_etm_packet *tmp;
+ int ret;
+ u64 instrs_executed;
+
+ instrs_executed = cs_etm__instr_count(etmq->packet);
+ etmq->period_instructions += instrs_executed;
+
+ /*
+ * Record a branch when the last instruction in
+ * PREV_PACKET is a branch.
+ */
+ if (etm->synth_opts.last_branch &&
+ etmq->prev_packet &&
+ etmq->prev_packet->sample_type == CS_ETM_RANGE &&
+ etmq->prev_packet->last_instr_taken_branch)
+ cs_etm__update_last_branch_rb(etmq);
+
+ if (etm->sample_instructions &&
+ etmq->period_instructions >= etm->instructions_sample_period) {
+ /*
+ * Emit instruction sample periodically
+ * TODO: allow period to be defined in cycles and clock time
+ */
+
+ /* Get number of instructions executed after the sample point */
+ u64 instrs_over = etmq->period_instructions -
+ etm->instructions_sample_period;
+
+ /*
+ * Calculate the address of the sampled instruction (-1 as
+ * sample is reported as though instruction has just been
+ * executed, but PC has not advanced to next instruction)
+ */
+ u64 offset = (instrs_executed - instrs_over - 1);
+ u64 addr = cs_etm__instr_addr(etmq->packet, offset);
+
+ ret = cs_etm__synth_instruction_sample(
+ etmq, addr, etm->instructions_sample_period);
+ if (ret)
+ return ret;
+
+ /* Carry remaining instructions into next sample period */
+ etmq->period_instructions = instrs_over;
+ }
+
+ if (etm->sample_branches && etmq->prev_packet) {
+ bool generate_sample = false;
+
+ /* Generate sample for tracing on packet */
+ if (etmq->prev_packet->sample_type == CS_ETM_TRACE_ON)
+ generate_sample = true;
+
+ /* Generate sample for branch taken packet */
+ if (etmq->prev_packet->sample_type == CS_ETM_RANGE &&
+ etmq->prev_packet->last_instr_taken_branch)
+ generate_sample = true;
+
+ if (generate_sample) {
+ ret = cs_etm__synth_branch_sample(etmq);
+ if (ret)
+ return ret;
+ }
+ }
+
+ if (etm->sample_branches || etm->synth_opts.last_branch) {
+ /*
+ * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+ * the next incoming packet.
+ */
+ tmp = etmq->packet;
+ etmq->packet = etmq->prev_packet;
+ etmq->prev_packet = tmp;
+ }
+
+ return 0;
+}
+
+static int cs_etm__flush(struct cs_etm_queue *etmq)
+{
+ int err = 0;
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ struct cs_etm_packet *tmp;
+
+ if (!etmq->prev_packet)
+ return 0;
+
+ /* Handle start tracing packet */
+ if (etmq->prev_packet->sample_type == CS_ETM_EMPTY)
+ goto swap_packet;
+
+ if (etmq->etm->synth_opts.last_branch &&
+ etmq->prev_packet->sample_type == CS_ETM_RANGE) {
+ /*
+ * Generate a last branch event for the branches left in the
+ * circular buffer at the end of the trace.
+ *
+ * Use the address of the end of the last reported execution
+ * range
+ */
+ u64 addr = cs_etm__last_executed_instr(etmq->prev_packet);
+
+ err = cs_etm__synth_instruction_sample(
+ etmq, addr,
+ etmq->period_instructions);
+ if (err)
+ return err;
+
+ etmq->period_instructions = 0;
+
+ }
+
+ if (etm->sample_branches &&
+ etmq->prev_packet->sample_type == CS_ETM_RANGE) {
+ err = cs_etm__synth_branch_sample(etmq);
+ if (err)
+ return err;
+ }
+
+swap_packet:
+ if (etm->sample_branches || etm->synth_opts.last_branch) {
+ /*
+ * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+ * the next incoming packet.
+ */
+ tmp = etmq->packet;
+ etmq->packet = etmq->prev_packet;
+ etmq->prev_packet = tmp;
+ }
+
+ return err;
+}
+
+static int cs_etm__run_decoder(struct cs_etm_queue *etmq)
+{
+ struct cs_etm_auxtrace *etm = etmq->etm;
+ struct cs_etm_buffer buffer;
+ size_t buffer_used, processed;
+ int err = 0;
+
+ if (!etm->kernel_start)
+ etm->kernel_start = machine__kernel_start(etm->machine);
+
+ /* Go through each buffer in the queue and decode them one by one */
+ while (1) {
+ buffer_used = 0;
+ memset(&buffer, 0, sizeof(buffer));
+ err = cs_etm__get_trace(&buffer, etmq);
+ if (err <= 0)
+ return err;
+ /*
+ * We cannot assume consecutive blocks in the data file are
+ * contiguous, reset the decoder to force re-sync.
+ */
+ err = cs_etm_decoder__reset(etmq->decoder);
+ if (err != 0)
+ return err;
+
+ /* Run trace decoder until buffer consumed or end of trace */
+ do {
+ processed = 0;
+ err = cs_etm_decoder__process_data_block(
+ etmq->decoder,
+ etmq->offset,
+ &buffer.buf[buffer_used],
+ buffer.len - buffer_used,
+ &processed);
+ if (err)
+ return err;
+
+ etmq->offset += processed;
+ buffer_used += processed;
+
+ /* Process each packet in this chunk */
+ while (1) {
+ err = cs_etm_decoder__get_packet(etmq->decoder,
+ etmq->packet);
+ if (err <= 0)
+ /*
+ * Stop processing this chunk on
+ * end of data or error
+ */
+ break;
+
+ switch (etmq->packet->sample_type) {
+ case CS_ETM_RANGE:
+ /*
+ * If the packet contains an instruction
+ * range, generate instruction sequence
+ * events.
+ */
+ cs_etm__sample(etmq);
+ break;
+ case CS_ETM_TRACE_ON:
+ /*
+ * Discontinuity in trace, flush
+ * previous branch stack
+ */
+ cs_etm__flush(etmq);
+ break;
+ case CS_ETM_EMPTY:
+ /*
+ * Should not receive empty packet,
+ * report error.
+ */
+ pr_err("CS ETM Trace: empty packet\n");
+ return -EINVAL;
+ default:
+ break;
+ }
+ }
+ } while (buffer.len > buffer_used);
+
+ if (err == 0)
+ /* Flush any remaining branch stack entries */
+ err = cs_etm__flush(etmq);
+ }
+
+ return err;
+}
+
+static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
+ pid_t tid, u64 time_)
+{
+ unsigned int i;
+ struct auxtrace_queues *queues = &etm->queues;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ struct auxtrace_queue *queue = &etm->queues.queue_array[i];
+ struct cs_etm_queue *etmq = queue->priv;
+
+ if (etmq && ((tid == -1) || (etmq->tid == tid))) {
+ etmq->time = time_;
+ cs_etm__set_pid_tid_cpu(etm, queue);
+ cs_etm__run_decoder(etmq);
+ }
+ }
+
+ return 0;
+}
+
+static int cs_etm__process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ int err = 0;
+ u64 timestamp;
+ struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("CoreSight ETM Trace requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (!etm->timeless_decoding)
+ return -EINVAL;
+
+ if (sample->time && (sample->time != (u64) -1))
+ timestamp = sample->time;
+ else
+ timestamp = 0;
+
+ if (timestamp || etm->timeless_decoding) {
+ err = cs_etm__update_queues(etm);
+ if (err)
+ return err;
+ }
+
+ if (event->header.type == PERF_RECORD_EXIT)
+ return cs_etm__process_timeless_queues(etm,
+ event->fork.tid,
+ sample->time);
+
+ return 0;
+}
+
+static int cs_etm__process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+ if (!etm->data_queued) {
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int fd = perf_data__fd(session->data);
+ bool is_pipe = perf_data__is_pipe(session->data);
+ int err;
+
+ if (is_pipe)
+ data_offset = 0;
+ else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&etm->queues, session,
+ event, data_offset, &buffer);
+ if (err)
+ return err;
+
+ if (dump_trace)
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ cs_etm__dump_event(etm, buffer);
+ auxtrace_buffer__put_data(buffer);
+ }
+ }
+
+ return 0;
+}
+
+static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm)
+{
+ struct perf_evsel *evsel;
+ struct perf_evlist *evlist = etm->session->evlist;
+ bool timeless_decoding = true;
+
+ /*
+ * Circle through the list of event and complain if we find one
+ * with the time bit set.
+ */
+ evlist__for_each_entry(evlist, evsel) {
+ if ((evsel->attr.sample_type & PERF_SAMPLE_TIME))
+ timeless_decoding = false;
+ }
+
+ return timeless_decoding;
+}
+
+static const char * const cs_etm_global_header_fmts[] = {
+ [CS_HEADER_VERSION_0] = " Header version %llx\n",
+ [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n",
+ [CS_ETM_SNAPSHOT] = " Snapshot %llx\n",
+};
+
+static const char * const cs_etm_priv_fmts[] = {
+ [CS_ETM_MAGIC] = " Magic number %llx\n",
+ [CS_ETM_CPU] = " CPU %lld\n",
+ [CS_ETM_ETMCR] = " ETMCR %llx\n",
+ [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n",
+ [CS_ETM_ETMCCER] = " ETMCCER %llx\n",
+ [CS_ETM_ETMIDR] = " ETMIDR %llx\n",
+};
+
+static const char * const cs_etmv4_priv_fmts[] = {
+ [CS_ETM_MAGIC] = " Magic number %llx\n",
+ [CS_ETM_CPU] = " CPU %lld\n",
+ [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n",
+ [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n",
+ [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n",
+ [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n",
+ [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n",
+ [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n",
+ [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n",
+};
+
+static void cs_etm__print_auxtrace_info(u64 *val, int num)
+{
+ int i, j, cpu = 0;
+
+ for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++)
+ fprintf(stdout, cs_etm_global_header_fmts[i], val[i]);
+
+ for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) {
+ if (val[i] == __perf_cs_etmv3_magic)
+ for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++)
+ fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
+ else if (val[i] == __perf_cs_etmv4_magic)
+ for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++)
+ fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
+ else
+ /* failure.. return */
+ return;
+ }
+}
+
+int cs_etm__process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
+ struct cs_etm_auxtrace *etm = NULL;
+ struct int_node *inode;
+ unsigned int pmu_type;
+ int event_header_size = sizeof(struct perf_event_header);
+ int info_header_size;
+ int total_size = auxtrace_info->header.size;
+ int priv_size = 0;
+ int num_cpu;
+ int err = 0, idx = -1;
+ int i, j, k;
+ u64 *ptr, *hdr = NULL;
+ u64 **metadata = NULL;
+
+ /*
+ * sizeof(auxtrace_info_event::type) +
+ * sizeof(auxtrace_info_event::reserved) == 8
+ */
+ info_header_size = 8;
+
+ if (total_size < (event_header_size + info_header_size))
+ return -EINVAL;
+
+ priv_size = total_size - event_header_size - info_header_size;
+
+ /* First the global part */
+ ptr = (u64 *) auxtrace_info->priv;
+
+ /* Look for version '0' of the header */
+ if (ptr[0] != 0)
+ return -EINVAL;
+
+ hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX);
+ if (!hdr)
+ return -ENOMEM;
+
+ /* Extract header information - see cs-etm.h for format */
+ for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++)
+ hdr[i] = ptr[i];
+ num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff;
+ pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) &
+ 0xffffffff);
+
+ /*
+ * Create an RB tree for traceID-metadata tuple. Since the conversion
+ * has to be made for each packet that gets decoded, optimizing access
+ * in anything other than a sequential array is worth doing.
+ */
+ traceid_list = intlist__new(NULL);
+ if (!traceid_list) {
+ err = -ENOMEM;
+ goto err_free_hdr;
+ }
+
+ metadata = zalloc(sizeof(*metadata) * num_cpu);
+ if (!metadata) {
+ err = -ENOMEM;
+ goto err_free_traceid_list;
+ }
+
+ /*
+ * The metadata is stored in the auxtrace_info section and encodes
+ * the configuration of the ARM embedded trace macrocell which is
+ * required by the trace decoder to properly decode the trace due
+ * to its highly compressed nature.
+ */
+ for (j = 0; j < num_cpu; j++) {
+ if (ptr[i] == __perf_cs_etmv3_magic) {
+ metadata[j] = zalloc(sizeof(*metadata[j]) *
+ CS_ETM_PRIV_MAX);
+ if (!metadata[j]) {
+ err = -ENOMEM;
+ goto err_free_metadata;
+ }
+ for (k = 0; k < CS_ETM_PRIV_MAX; k++)
+ metadata[j][k] = ptr[i + k];
+
+ /* The traceID is our handle */
+ idx = metadata[j][CS_ETM_ETMTRACEIDR];
+ i += CS_ETM_PRIV_MAX;
+ } else if (ptr[i] == __perf_cs_etmv4_magic) {
+ metadata[j] = zalloc(sizeof(*metadata[j]) *
+ CS_ETMV4_PRIV_MAX);
+ if (!metadata[j]) {
+ err = -ENOMEM;
+ goto err_free_metadata;
+ }
+ for (k = 0; k < CS_ETMV4_PRIV_MAX; k++)
+ metadata[j][k] = ptr[i + k];
+
+ /* The traceID is our handle */
+ idx = metadata[j][CS_ETMV4_TRCTRACEIDR];
+ i += CS_ETMV4_PRIV_MAX;
+ }
+
+ /* Get an RB node for this CPU */
+ inode = intlist__findnew(traceid_list, idx);
+
+ /* Something went wrong, no need to continue */
+ if (!inode) {
+ err = PTR_ERR(inode);
+ goto err_free_metadata;
+ }
+
+ /*
+ * The node for that CPU should not be taken.
+ * Back out if that's the case.
+ */
+ if (inode->priv) {
+ err = -EINVAL;
+ goto err_free_metadata;
+ }
+ /* All good, associate the traceID with the metadata pointer */
+ inode->priv = metadata[j];
+ }
+
+ /*
+ * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and
+ * CS_ETMV4_PRIV_MAX mark how many double words are in the
+ * global metadata, and each cpu's metadata respectively.
+ * The following tests if the correct number of double words was
+ * present in the auxtrace info section.
+ */
+ if (i * 8 != priv_size) {
+ err = -EINVAL;
+ goto err_free_metadata;
+ }
+
+ etm = zalloc(sizeof(*etm));
+
+ if (!etm) {
+ err = -ENOMEM;
+ goto err_free_metadata;
+ }
+
+ err = auxtrace_queues__init(&etm->queues);
+ if (err)
+ goto err_free_etm;
+
+ etm->session = session;
+ etm->machine = &session->machines.host;
+
+ etm->num_cpu = num_cpu;
+ etm->pmu_type = pmu_type;
+ etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0);
+ etm->metadata = metadata;
+ etm->auxtrace_type = auxtrace_info->type;
+ etm->timeless_decoding = cs_etm__is_timeless_decoding(etm);
+
+ etm->auxtrace.process_event = cs_etm__process_event;
+ etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event;
+ etm->auxtrace.flush_events = cs_etm__flush_events;
+ etm->auxtrace.free_events = cs_etm__free_events;
+ etm->auxtrace.free = cs_etm__free;
+ session->auxtrace = &etm->auxtrace;
+
+ etm->unknown_thread = thread__new(999999999, 999999999);
+ if (!etm->unknown_thread)
+ goto err_free_queues;
+
+ /*
+ * Initialize list node so that at thread__zput() we can avoid
+ * segmentation fault at list_del_init().
+ */
+ INIT_LIST_HEAD(&etm->unknown_thread->node);
+
+ err = thread__set_comm(etm->unknown_thread, "unknown", 0);
+ if (err)
+ goto err_delete_thread;
+
+ if (thread__init_map_groups(etm->unknown_thread, etm->machine))
+ goto err_delete_thread;
+
+ if (dump_trace) {
+ cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
+ return 0;
+ }
+
+ if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
+ etm->synth_opts = *session->itrace_synth_opts;
+ } else {
+ itrace_synth_opts__set_default(&etm->synth_opts);
+ etm->synth_opts.callchain = false;
+ }
+
+ err = cs_etm__synth_events(etm, session);
+ if (err)
+ goto err_delete_thread;
+
+ err = auxtrace_queues__process_index(&etm->queues, session);
+ if (err)
+ goto err_delete_thread;
+
+ etm->data_queued = etm->queues.populated;
+
+ return 0;
+
+err_delete_thread:
+ thread__zput(etm->unknown_thread);
+err_free_queues:
+ auxtrace_queues__free(&etm->queues);
+ session->auxtrace = NULL;
+err_free_etm:
+ zfree(&etm);
+err_free_metadata:
+ /* No need to check @metadata[j], free(NULL) is supported */
+ for (j = 0; j < num_cpu; j++)
+ free(metadata[j]);
+ zfree(&metadata);
+err_free_traceid_list:
+ intlist__delete(traceid_list);
+err_free_hdr:
+ zfree(&hdr);
+
+ return -EINVAL;
+}
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
new file mode 100644
index 000000000..97c3152f5
--- /dev/null
+++ b/tools/perf/util/cs-etm.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(C) 2015 Linaro Limited. All rights reserved.
+ * Author: Mathieu Poirier <mathieu.poirier@linaro.org>
+ */
+
+#ifndef INCLUDE__UTIL_PERF_CS_ETM_H__
+#define INCLUDE__UTIL_PERF_CS_ETM_H__
+
+#include "util/event.h"
+#include "util/session.h"
+
+/* Versionning header in case things need tro change in the future. That way
+ * decoding of old snapshot is still possible.
+ */
+enum {
+ /* Starting with 0x0 */
+ CS_HEADER_VERSION_0,
+ /* PMU->type (32 bit), total # of CPUs (32 bit) */
+ CS_PMU_TYPE_CPUS,
+ CS_ETM_SNAPSHOT,
+ CS_HEADER_VERSION_0_MAX,
+};
+
+/* Beginning of header common to both ETMv3 and V4 */
+enum {
+ CS_ETM_MAGIC,
+ CS_ETM_CPU,
+};
+
+/* ETMv3/PTM metadata */
+enum {
+ /* Dynamic, configurable parameters */
+ CS_ETM_ETMCR = CS_ETM_CPU + 1,
+ CS_ETM_ETMTRACEIDR,
+ /* RO, taken from sysFS */
+ CS_ETM_ETMCCER,
+ CS_ETM_ETMIDR,
+ CS_ETM_PRIV_MAX,
+};
+
+/* ETMv4 metadata */
+enum {
+ /* Dynamic, configurable parameters */
+ CS_ETMV4_TRCCONFIGR = CS_ETM_CPU + 1,
+ CS_ETMV4_TRCTRACEIDR,
+ /* RO, taken from sysFS */
+ CS_ETMV4_TRCIDR0,
+ CS_ETMV4_TRCIDR1,
+ CS_ETMV4_TRCIDR2,
+ CS_ETMV4_TRCIDR8,
+ CS_ETMV4_TRCAUTHSTATUS,
+ CS_ETMV4_PRIV_MAX,
+};
+
+#define KiB(x) ((x) * 1024)
+#define MiB(x) ((x) * 1024 * 1024)
+
+#define CS_ETM_HEADER_SIZE (CS_HEADER_VERSION_0_MAX * sizeof(u64))
+
+static const u64 __perf_cs_etmv3_magic = 0x3030303030303030ULL;
+static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL;
+#define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64))
+#define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
+
+#ifdef HAVE_CSTRACE_SUPPORT
+int cs_etm__process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
+#else
+static inline int
+cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ return -1;
+}
+
+static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused,
+ int *cpu __maybe_unused)
+{
+ return -1;
+}
+#endif
+
+#endif
diff --git a/tools/perf/util/ctype.c b/tools/perf/util/ctype.c
new file mode 100644
index 000000000..ee4c1e8ed
--- /dev/null
+++ b/tools/perf/util/ctype.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Sane locale-independent, ASCII ctype.
+ *
+ * No surprises, and works with signed and unsigned chars.
+ */
+#include "sane_ctype.h"
+
+enum {
+ S = GIT_SPACE,
+ A = GIT_ALPHA,
+ D = GIT_DIGIT,
+ G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
+ R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */
+ P = GIT_PRINT_EXTRA, /* printable - alpha - digit - glob - regex */
+
+ PS = GIT_SPACE | GIT_PRINT_EXTRA,
+};
+
+unsigned char sane_ctype[256] = {
+/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */
+ PS,P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */
+ D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */
+ P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
+ A, A, A, A, A, A, A, A, A, A, A, G, G, P, R, P, /* 80.. 95 */
+ P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
+ A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */
+ /* Nothing in the 128.. range */
+};
+
+const char *graph_line =
+ "_____________________________________________________________________"
+ "_____________________________________________________________________"
+ "_____________________________________________________________________";
+const char *graph_dotted_line =
+ "---------------------------------------------------------------------"
+ "---------------------------------------------------------------------"
+ "---------------------------------------------------------------------";
+const char *spaces =
+ " "
+ " "
+ " ";
+const char *dots =
+ "....................................................................."
+ "....................................................................."
+ ".....................................................................";
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
new file mode 100644
index 000000000..24f2a87cf
--- /dev/null
+++ b/tools/perf/util/data-convert-bt.c
@@ -0,0 +1,1677 @@
+/*
+ * CTF writing support via babeltrace.
+ *
+ * Copyright (C) 2014, Jiri Olsa <jolsa@redhat.com>
+ * Copyright (C) 2014, Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <babeltrace/ctf-writer/writer.h>
+#include <babeltrace/ctf-writer/clock.h>
+#include <babeltrace/ctf-writer/stream.h>
+#include <babeltrace/ctf-writer/event.h>
+#include <babeltrace/ctf-writer/event-types.h>
+#include <babeltrace/ctf-writer/event-fields.h>
+#include <babeltrace/ctf-ir/utils.h>
+#include <babeltrace/ctf/events.h>
+#include <traceevent/event-parse.h>
+#include "asm/bug.h"
+#include "data-convert-bt.h"
+#include "session.h"
+#include "util.h"
+#include "debug.h"
+#include "tool.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "machine.h"
+#include "config.h"
+#include "sane_ctype.h"
+
+#define pr_N(n, fmt, ...) \
+ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
+
+#define pr(fmt, ...) pr_N(1, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr2(fmt, ...) pr_N(2, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_time2(t, fmt, ...) pr_time_N(2, debug_data_convert, t, pr_fmt(fmt), ##__VA_ARGS__)
+
+struct evsel_priv {
+ struct bt_ctf_event_class *event_class;
+};
+
+#define MAX_CPUS 4096
+
+struct ctf_stream {
+ struct bt_ctf_stream *stream;
+ int cpu;
+ u32 count;
+};
+
+struct ctf_writer {
+ /* writer primitives */
+ struct bt_ctf_writer *writer;
+ struct ctf_stream **stream;
+ int stream_cnt;
+ struct bt_ctf_stream_class *stream_class;
+ struct bt_ctf_clock *clock;
+
+ /* data types */
+ union {
+ struct {
+ struct bt_ctf_field_type *s64;
+ struct bt_ctf_field_type *u64;
+ struct bt_ctf_field_type *s32;
+ struct bt_ctf_field_type *u32;
+ struct bt_ctf_field_type *string;
+ struct bt_ctf_field_type *u32_hex;
+ struct bt_ctf_field_type *u64_hex;
+ };
+ struct bt_ctf_field_type *array[6];
+ } data;
+ struct bt_ctf_event_class *comm_class;
+ struct bt_ctf_event_class *exit_class;
+ struct bt_ctf_event_class *fork_class;
+ struct bt_ctf_event_class *mmap_class;
+ struct bt_ctf_event_class *mmap2_class;
+};
+
+struct convert {
+ struct perf_tool tool;
+ struct ctf_writer writer;
+
+ u64 events_size;
+ u64 events_count;
+ u64 non_sample_count;
+
+ /* Ordered events configured queue size. */
+ u64 queue_size;
+};
+
+static int value_set(struct bt_ctf_field_type *type,
+ struct bt_ctf_event *event,
+ const char *name, u64 val)
+{
+ struct bt_ctf_field *field;
+ bool sign = bt_ctf_field_type_integer_get_signed(type);
+ int ret;
+
+ field = bt_ctf_field_create(type);
+ if (!field) {
+ pr_err("failed to create a field %s\n", name);
+ return -1;
+ }
+
+ if (sign) {
+ ret = bt_ctf_field_signed_integer_set_value(field, val);
+ if (ret) {
+ pr_err("failed to set field value %s\n", name);
+ goto err;
+ }
+ } else {
+ ret = bt_ctf_field_unsigned_integer_set_value(field, val);
+ if (ret) {
+ pr_err("failed to set field value %s\n", name);
+ goto err;
+ }
+ }
+
+ ret = bt_ctf_event_set_payload(event, name, field);
+ if (ret) {
+ pr_err("failed to set payload %s\n", name);
+ goto err;
+ }
+
+ pr2(" SET [%s = %" PRIu64 "]\n", name, val);
+
+err:
+ bt_ctf_field_put(field);
+ return ret;
+}
+
+#define __FUNC_VALUE_SET(_name, _val_type) \
+static __maybe_unused int value_set_##_name(struct ctf_writer *cw, \
+ struct bt_ctf_event *event, \
+ const char *name, \
+ _val_type val) \
+{ \
+ struct bt_ctf_field_type *type = cw->data._name; \
+ return value_set(type, event, name, (u64) val); \
+}
+
+#define FUNC_VALUE_SET(_name) __FUNC_VALUE_SET(_name, _name)
+
+FUNC_VALUE_SET(s32)
+FUNC_VALUE_SET(u32)
+FUNC_VALUE_SET(s64)
+FUNC_VALUE_SET(u64)
+__FUNC_VALUE_SET(u64_hex, u64)
+
+static int string_set_value(struct bt_ctf_field *field, const char *string);
+static __maybe_unused int
+value_set_string(struct ctf_writer *cw, struct bt_ctf_event *event,
+ const char *name, const char *string)
+{
+ struct bt_ctf_field_type *type = cw->data.string;
+ struct bt_ctf_field *field;
+ int ret = 0;
+
+ field = bt_ctf_field_create(type);
+ if (!field) {
+ pr_err("failed to create a field %s\n", name);
+ return -1;
+ }
+
+ ret = string_set_value(field, string);
+ if (ret) {
+ pr_err("failed to set value %s\n", name);
+ goto err_put_field;
+ }
+
+ ret = bt_ctf_event_set_payload(event, name, field);
+ if (ret)
+ pr_err("failed to set payload %s\n", name);
+
+err_put_field:
+ bt_ctf_field_put(field);
+ return ret;
+}
+
+static struct bt_ctf_field_type*
+get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field)
+{
+ unsigned long flags = field->flags;
+
+ if (flags & FIELD_IS_STRING)
+ return cw->data.string;
+
+ if (!(flags & FIELD_IS_SIGNED)) {
+ /* unsigned long are mostly pointers */
+ if (flags & FIELD_IS_LONG || flags & FIELD_IS_POINTER)
+ return cw->data.u64_hex;
+ }
+
+ if (flags & FIELD_IS_SIGNED) {
+ if (field->size == 8)
+ return cw->data.s64;
+ else
+ return cw->data.s32;
+ }
+
+ if (field->size == 8)
+ return cw->data.u64;
+ else
+ return cw->data.u32;
+}
+
+static unsigned long long adjust_signedness(unsigned long long value_int, int size)
+{
+ unsigned long long value_mask;
+
+ /*
+ * value_mask = (1 << (size * 8 - 1)) - 1.
+ * Directly set value_mask for code readers.
+ */
+ switch (size) {
+ case 1:
+ value_mask = 0x7fULL;
+ break;
+ case 2:
+ value_mask = 0x7fffULL;
+ break;
+ case 4:
+ value_mask = 0x7fffffffULL;
+ break;
+ case 8:
+ /*
+ * For 64 bit value, return it self. There is no need
+ * to fill high bit.
+ */
+ /* Fall through */
+ default:
+ /* BUG! */
+ return value_int;
+ }
+
+ /* If it is a positive value, don't adjust. */
+ if ((value_int & (~0ULL - value_mask)) == 0)
+ return value_int;
+
+ /* Fill upper part of value_int with 1 to make it a negative long long. */
+ return (value_int & value_mask) | ~value_mask;
+}
+
+static int string_set_value(struct bt_ctf_field *field, const char *string)
+{
+ char *buffer = NULL;
+ size_t len = strlen(string), i, p;
+ int err;
+
+ for (i = p = 0; i < len; i++, p++) {
+ if (isprint(string[i])) {
+ if (!buffer)
+ continue;
+ buffer[p] = string[i];
+ } else {
+ char numstr[5];
+
+ snprintf(numstr, sizeof(numstr), "\\x%02x",
+ (unsigned int)(string[i]) & 0xff);
+
+ if (!buffer) {
+ buffer = zalloc(i + (len - i) * 4 + 2);
+ if (!buffer) {
+ pr_err("failed to set unprintable string '%s'\n", string);
+ return bt_ctf_field_string_set_value(field, "UNPRINTABLE-STRING");
+ }
+ if (i > 0)
+ strncpy(buffer, string, i);
+ }
+ memcpy(buffer + p, numstr, 4);
+ p += 3;
+ }
+ }
+
+ if (!buffer)
+ return bt_ctf_field_string_set_value(field, string);
+ err = bt_ctf_field_string_set_value(field, buffer);
+ free(buffer);
+ return err;
+}
+
+static int add_tracepoint_field_value(struct ctf_writer *cw,
+ struct bt_ctf_event_class *event_class,
+ struct bt_ctf_event *event,
+ struct perf_sample *sample,
+ struct format_field *fmtf)
+{
+ struct bt_ctf_field_type *type;
+ struct bt_ctf_field *array_field;
+ struct bt_ctf_field *field;
+ const char *name = fmtf->name;
+ void *data = sample->raw_data;
+ unsigned long flags = fmtf->flags;
+ unsigned int n_items;
+ unsigned int i;
+ unsigned int offset;
+ unsigned int len;
+ int ret;
+
+ name = fmtf->alias;
+ offset = fmtf->offset;
+ len = fmtf->size;
+ if (flags & FIELD_IS_STRING)
+ flags &= ~FIELD_IS_ARRAY;
+
+ if (flags & FIELD_IS_DYNAMIC) {
+ unsigned long long tmp_val;
+
+ tmp_val = tep_read_number(fmtf->event->pevent,
+ data + offset, len);
+ offset = tmp_val;
+ len = offset >> 16;
+ offset &= 0xffff;
+ }
+
+ if (flags & FIELD_IS_ARRAY) {
+
+ type = bt_ctf_event_class_get_field_by_name(
+ event_class, name);
+ array_field = bt_ctf_field_create(type);
+ bt_ctf_field_type_put(type);
+ if (!array_field) {
+ pr_err("Failed to create array type %s\n", name);
+ return -1;
+ }
+
+ len = fmtf->size / fmtf->arraylen;
+ n_items = fmtf->arraylen;
+ } else {
+ n_items = 1;
+ array_field = NULL;
+ }
+
+ type = get_tracepoint_field_type(cw, fmtf);
+
+ for (i = 0; i < n_items; i++) {
+ if (flags & FIELD_IS_ARRAY)
+ field = bt_ctf_field_array_get_field(array_field, i);
+ else
+ field = bt_ctf_field_create(type);
+
+ if (!field) {
+ pr_err("failed to create a field %s\n", name);
+ return -1;
+ }
+
+ if (flags & FIELD_IS_STRING)
+ ret = string_set_value(field, data + offset + i * len);
+ else {
+ unsigned long long value_int;
+
+ value_int = tep_read_number(
+ fmtf->event->pevent,
+ data + offset + i * len, len);
+
+ if (!(flags & FIELD_IS_SIGNED))
+ ret = bt_ctf_field_unsigned_integer_set_value(
+ field, value_int);
+ else
+ ret = bt_ctf_field_signed_integer_set_value(
+ field, adjust_signedness(value_int, len));
+ }
+
+ if (ret) {
+ pr_err("failed to set file value %s\n", name);
+ goto err_put_field;
+ }
+ if (!(flags & FIELD_IS_ARRAY)) {
+ ret = bt_ctf_event_set_payload(event, name, field);
+ if (ret) {
+ pr_err("failed to set payload %s\n", name);
+ goto err_put_field;
+ }
+ }
+ bt_ctf_field_put(field);
+ }
+ if (flags & FIELD_IS_ARRAY) {
+ ret = bt_ctf_event_set_payload(event, name, array_field);
+ if (ret) {
+ pr_err("Failed add payload array %s\n", name);
+ return -1;
+ }
+ bt_ctf_field_put(array_field);
+ }
+ return 0;
+
+err_put_field:
+ bt_ctf_field_put(field);
+ return -1;
+}
+
+static int add_tracepoint_fields_values(struct ctf_writer *cw,
+ struct bt_ctf_event_class *event_class,
+ struct bt_ctf_event *event,
+ struct format_field *fields,
+ struct perf_sample *sample)
+{
+ struct format_field *field;
+ int ret;
+
+ for (field = fields; field; field = field->next) {
+ ret = add_tracepoint_field_value(cw, event_class, event, sample,
+ field);
+ if (ret)
+ return -1;
+ }
+ return 0;
+}
+
+static int add_tracepoint_values(struct ctf_writer *cw,
+ struct bt_ctf_event_class *event_class,
+ struct bt_ctf_event *event,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ struct format_field *common_fields = evsel->tp_format->format.common_fields;
+ struct format_field *fields = evsel->tp_format->format.fields;
+ int ret;
+
+ ret = add_tracepoint_fields_values(cw, event_class, event,
+ common_fields, sample);
+ if (!ret)
+ ret = add_tracepoint_fields_values(cw, event_class, event,
+ fields, sample);
+
+ return ret;
+}
+
+static int
+add_bpf_output_values(struct bt_ctf_event_class *event_class,
+ struct bt_ctf_event *event,
+ struct perf_sample *sample)
+{
+ struct bt_ctf_field_type *len_type, *seq_type;
+ struct bt_ctf_field *len_field, *seq_field;
+ unsigned int raw_size = sample->raw_size;
+ unsigned int nr_elements = raw_size / sizeof(u32);
+ unsigned int i;
+ int ret;
+
+ if (nr_elements * sizeof(u32) != raw_size)
+ pr_warning("Incorrect raw_size (%u) in bpf output event, skip %zu bytes\n",
+ raw_size, nr_elements * sizeof(u32) - raw_size);
+
+ len_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_len");
+ len_field = bt_ctf_field_create(len_type);
+ if (!len_field) {
+ pr_err("failed to create 'raw_len' for bpf output event\n");
+ ret = -1;
+ goto put_len_type;
+ }
+
+ ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+ if (ret) {
+ pr_err("failed to set field value for raw_len\n");
+ goto put_len_field;
+ }
+ ret = bt_ctf_event_set_payload(event, "raw_len", len_field);
+ if (ret) {
+ pr_err("failed to set payload to raw_len\n");
+ goto put_len_field;
+ }
+
+ seq_type = bt_ctf_event_class_get_field_by_name(event_class, "raw_data");
+ seq_field = bt_ctf_field_create(seq_type);
+ if (!seq_field) {
+ pr_err("failed to create 'raw_data' for bpf output event\n");
+ ret = -1;
+ goto put_seq_type;
+ }
+
+ ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+ if (ret) {
+ pr_err("failed to set length of 'raw_data'\n");
+ goto put_seq_field;
+ }
+
+ for (i = 0; i < nr_elements; i++) {
+ struct bt_ctf_field *elem_field =
+ bt_ctf_field_sequence_get_field(seq_field, i);
+
+ ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+ ((u32 *)(sample->raw_data))[i]);
+
+ bt_ctf_field_put(elem_field);
+ if (ret) {
+ pr_err("failed to set raw_data[%d]\n", i);
+ goto put_seq_field;
+ }
+ }
+
+ ret = bt_ctf_event_set_payload(event, "raw_data", seq_field);
+ if (ret)
+ pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+ bt_ctf_field_put(seq_field);
+put_seq_type:
+ bt_ctf_field_type_put(seq_type);
+put_len_field:
+ bt_ctf_field_put(len_field);
+put_len_type:
+ bt_ctf_field_type_put(len_type);
+ return ret;
+}
+
+static int
+add_callchain_output_values(struct bt_ctf_event_class *event_class,
+ struct bt_ctf_event *event,
+ struct ip_callchain *callchain)
+{
+ struct bt_ctf_field_type *len_type, *seq_type;
+ struct bt_ctf_field *len_field, *seq_field;
+ unsigned int nr_elements = callchain->nr;
+ unsigned int i;
+ int ret;
+
+ len_type = bt_ctf_event_class_get_field_by_name(
+ event_class, "perf_callchain_size");
+ len_field = bt_ctf_field_create(len_type);
+ if (!len_field) {
+ pr_err("failed to create 'perf_callchain_size' for callchain output event\n");
+ ret = -1;
+ goto put_len_type;
+ }
+
+ ret = bt_ctf_field_unsigned_integer_set_value(len_field, nr_elements);
+ if (ret) {
+ pr_err("failed to set field value for perf_callchain_size\n");
+ goto put_len_field;
+ }
+ ret = bt_ctf_event_set_payload(event, "perf_callchain_size", len_field);
+ if (ret) {
+ pr_err("failed to set payload to perf_callchain_size\n");
+ goto put_len_field;
+ }
+
+ seq_type = bt_ctf_event_class_get_field_by_name(
+ event_class, "perf_callchain");
+ seq_field = bt_ctf_field_create(seq_type);
+ if (!seq_field) {
+ pr_err("failed to create 'perf_callchain' for callchain output event\n");
+ ret = -1;
+ goto put_seq_type;
+ }
+
+ ret = bt_ctf_field_sequence_set_length(seq_field, len_field);
+ if (ret) {
+ pr_err("failed to set length of 'perf_callchain'\n");
+ goto put_seq_field;
+ }
+
+ for (i = 0; i < nr_elements; i++) {
+ struct bt_ctf_field *elem_field =
+ bt_ctf_field_sequence_get_field(seq_field, i);
+
+ ret = bt_ctf_field_unsigned_integer_set_value(elem_field,
+ ((u64 *)(callchain->ips))[i]);
+
+ bt_ctf_field_put(elem_field);
+ if (ret) {
+ pr_err("failed to set callchain[%d]\n", i);
+ goto put_seq_field;
+ }
+ }
+
+ ret = bt_ctf_event_set_payload(event, "perf_callchain", seq_field);
+ if (ret)
+ pr_err("failed to set payload for raw_data\n");
+
+put_seq_field:
+ bt_ctf_field_put(seq_field);
+put_seq_type:
+ bt_ctf_field_type_put(seq_type);
+put_len_field:
+ bt_ctf_field_put(len_field);
+put_len_type:
+ bt_ctf_field_type_put(len_type);
+ return ret;
+}
+
+static int add_generic_values(struct ctf_writer *cw,
+ struct bt_ctf_event *event,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ u64 type = evsel->attr.sample_type;
+ int ret;
+
+ /*
+ * missing:
+ * PERF_SAMPLE_TIME - not needed as we have it in
+ * ctf event header
+ * PERF_SAMPLE_READ - TODO
+ * PERF_SAMPLE_RAW - tracepoint fields are handled separately
+ * PERF_SAMPLE_BRANCH_STACK - TODO
+ * PERF_SAMPLE_REGS_USER - TODO
+ * PERF_SAMPLE_STACK_USER - TODO
+ */
+
+ if (type & PERF_SAMPLE_IP) {
+ ret = value_set_u64_hex(cw, event, "perf_ip", sample->ip);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ ret = value_set_s32(cw, event, "perf_tid", sample->tid);
+ if (ret)
+ return -1;
+
+ ret = value_set_s32(cw, event, "perf_pid", sample->pid);
+ if (ret)
+ return -1;
+ }
+
+ if ((type & PERF_SAMPLE_ID) ||
+ (type & PERF_SAMPLE_IDENTIFIER)) {
+ ret = value_set_u64(cw, event, "perf_id", sample->id);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ ret = value_set_u64(cw, event, "perf_stream_id", sample->stream_id);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_PERIOD) {
+ ret = value_set_u64(cw, event, "perf_period", sample->period);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_WEIGHT) {
+ ret = value_set_u64(cw, event, "perf_weight", sample->weight);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_DATA_SRC) {
+ ret = value_set_u64(cw, event, "perf_data_src",
+ sample->data_src);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_TRANSACTION) {
+ ret = value_set_u64(cw, event, "perf_transaction",
+ sample->transaction);
+ if (ret)
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ctf_stream__flush(struct ctf_stream *cs)
+{
+ int err = 0;
+
+ if (cs) {
+ err = bt_ctf_stream_flush(cs->stream);
+ if (err)
+ pr_err("CTF stream %d flush failed\n", cs->cpu);
+
+ pr("Flush stream for cpu %d (%u samples)\n",
+ cs->cpu, cs->count);
+
+ cs->count = 0;
+ }
+
+ return err;
+}
+
+static struct ctf_stream *ctf_stream__create(struct ctf_writer *cw, int cpu)
+{
+ struct ctf_stream *cs;
+ struct bt_ctf_field *pkt_ctx = NULL;
+ struct bt_ctf_field *cpu_field = NULL;
+ struct bt_ctf_stream *stream = NULL;
+ int ret;
+
+ cs = zalloc(sizeof(*cs));
+ if (!cs) {
+ pr_err("Failed to allocate ctf stream\n");
+ return NULL;
+ }
+
+ stream = bt_ctf_writer_create_stream(cw->writer, cw->stream_class);
+ if (!stream) {
+ pr_err("Failed to create CTF stream\n");
+ goto out;
+ }
+
+ pkt_ctx = bt_ctf_stream_get_packet_context(stream);
+ if (!pkt_ctx) {
+ pr_err("Failed to obtain packet context\n");
+ goto out;
+ }
+
+ cpu_field = bt_ctf_field_structure_get_field(pkt_ctx, "cpu_id");
+ bt_ctf_field_put(pkt_ctx);
+ if (!cpu_field) {
+ pr_err("Failed to obtain cpu field\n");
+ goto out;
+ }
+
+ ret = bt_ctf_field_unsigned_integer_set_value(cpu_field, (u32) cpu);
+ if (ret) {
+ pr_err("Failed to update CPU number\n");
+ goto out;
+ }
+
+ bt_ctf_field_put(cpu_field);
+
+ cs->cpu = cpu;
+ cs->stream = stream;
+ return cs;
+
+out:
+ if (cpu_field)
+ bt_ctf_field_put(cpu_field);
+ if (stream)
+ bt_ctf_stream_put(stream);
+
+ free(cs);
+ return NULL;
+}
+
+static void ctf_stream__delete(struct ctf_stream *cs)
+{
+ if (cs) {
+ bt_ctf_stream_put(cs->stream);
+ free(cs);
+ }
+}
+
+static struct ctf_stream *ctf_stream(struct ctf_writer *cw, int cpu)
+{
+ struct ctf_stream *cs = cw->stream[cpu];
+
+ if (!cs) {
+ cs = ctf_stream__create(cw, cpu);
+ cw->stream[cpu] = cs;
+ }
+
+ return cs;
+}
+
+static int get_sample_cpu(struct ctf_writer *cw, struct perf_sample *sample,
+ struct perf_evsel *evsel)
+{
+ int cpu = 0;
+
+ if (evsel->attr.sample_type & PERF_SAMPLE_CPU)
+ cpu = sample->cpu;
+
+ if (cpu > cw->stream_cnt) {
+ pr_err("Event was recorded for CPU %d, limit is at %d.\n",
+ cpu, cw->stream_cnt);
+ cpu = 0;
+ }
+
+ return cpu;
+}
+
+#define STREAM_FLUSH_COUNT 100000
+
+/*
+ * Currently we have no other way to determine the
+ * time for the stream flush other than keep track
+ * of the number of events and check it against
+ * threshold.
+ */
+static bool is_flush_needed(struct ctf_stream *cs)
+{
+ return cs->count >= STREAM_FLUSH_COUNT;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *_event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine __maybe_unused)
+{
+ struct convert *c = container_of(tool, struct convert, tool);
+ struct evsel_priv *priv = evsel->priv;
+ struct ctf_writer *cw = &c->writer;
+ struct ctf_stream *cs;
+ struct bt_ctf_event_class *event_class;
+ struct bt_ctf_event *event;
+ int ret;
+ unsigned long type = evsel->attr.sample_type;
+
+ if (WARN_ONCE(!priv, "Failed to setup all events.\n"))
+ return 0;
+
+ event_class = priv->event_class;
+
+ /* update stats */
+ c->events_count++;
+ c->events_size += _event->header.size;
+
+ pr_time2(sample->time, "sample %" PRIu64 "\n", c->events_count);
+
+ event = bt_ctf_event_create(event_class);
+ if (!event) {
+ pr_err("Failed to create an CTF event\n");
+ return -1;
+ }
+
+ bt_ctf_clock_set_time(cw->clock, sample->time);
+
+ ret = add_generic_values(cw, event, evsel, sample);
+ if (ret)
+ return -1;
+
+ if (evsel->attr.type == PERF_TYPE_TRACEPOINT) {
+ ret = add_tracepoint_values(cw, event_class, event,
+ evsel, sample);
+ if (ret)
+ return -1;
+ }
+
+ if (type & PERF_SAMPLE_CALLCHAIN) {
+ ret = add_callchain_output_values(event_class,
+ event, sample->callchain);
+ if (ret)
+ return -1;
+ }
+
+ if (perf_evsel__is_bpf_output(evsel)) {
+ ret = add_bpf_output_values(event_class, event, sample);
+ if (ret)
+ return -1;
+ }
+
+ cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
+ if (cs) {
+ if (is_flush_needed(cs))
+ ctf_stream__flush(cs);
+
+ cs->count++;
+ bt_ctf_stream_append_event(cs->stream, event);
+ }
+
+ bt_ctf_event_put(event);
+ return cs ? 0 : -1;
+}
+
+#define __NON_SAMPLE_SET_FIELD(_name, _type, _field) \
+do { \
+ ret = value_set_##_type(cw, event, #_field, _event->_name._field);\
+ if (ret) \
+ return -1; \
+} while(0)
+
+#define __FUNC_PROCESS_NON_SAMPLE(_name, body) \
+static int process_##_name##_event(struct perf_tool *tool, \
+ union perf_event *_event, \
+ struct perf_sample *sample, \
+ struct machine *machine) \
+{ \
+ struct convert *c = container_of(tool, struct convert, tool);\
+ struct ctf_writer *cw = &c->writer; \
+ struct bt_ctf_event_class *event_class = cw->_name##_class;\
+ struct bt_ctf_event *event; \
+ struct ctf_stream *cs; \
+ int ret; \
+ \
+ c->non_sample_count++; \
+ c->events_size += _event->header.size; \
+ event = bt_ctf_event_create(event_class); \
+ if (!event) { \
+ pr_err("Failed to create an CTF event\n"); \
+ return -1; \
+ } \
+ \
+ bt_ctf_clock_set_time(cw->clock, sample->time); \
+ body \
+ cs = ctf_stream(cw, 0); \
+ if (cs) { \
+ if (is_flush_needed(cs)) \
+ ctf_stream__flush(cs); \
+ \
+ cs->count++; \
+ bt_ctf_stream_append_event(cs->stream, event); \
+ } \
+ bt_ctf_event_put(event); \
+ \
+ return perf_event__process_##_name(tool, _event, sample, machine);\
+}
+
+__FUNC_PROCESS_NON_SAMPLE(comm,
+ __NON_SAMPLE_SET_FIELD(comm, u32, pid);
+ __NON_SAMPLE_SET_FIELD(comm, u32, tid);
+ __NON_SAMPLE_SET_FIELD(comm, string, comm);
+)
+__FUNC_PROCESS_NON_SAMPLE(fork,
+ __NON_SAMPLE_SET_FIELD(fork, u32, pid);
+ __NON_SAMPLE_SET_FIELD(fork, u32, ppid);
+ __NON_SAMPLE_SET_FIELD(fork, u32, tid);
+ __NON_SAMPLE_SET_FIELD(fork, u32, ptid);
+ __NON_SAMPLE_SET_FIELD(fork, u64, time);
+)
+
+__FUNC_PROCESS_NON_SAMPLE(exit,
+ __NON_SAMPLE_SET_FIELD(fork, u32, pid);
+ __NON_SAMPLE_SET_FIELD(fork, u32, ppid);
+ __NON_SAMPLE_SET_FIELD(fork, u32, tid);
+ __NON_SAMPLE_SET_FIELD(fork, u32, ptid);
+ __NON_SAMPLE_SET_FIELD(fork, u64, time);
+)
+__FUNC_PROCESS_NON_SAMPLE(mmap,
+ __NON_SAMPLE_SET_FIELD(mmap, u32, pid);
+ __NON_SAMPLE_SET_FIELD(mmap, u32, tid);
+ __NON_SAMPLE_SET_FIELD(mmap, u64_hex, start);
+ __NON_SAMPLE_SET_FIELD(mmap, string, filename);
+)
+__FUNC_PROCESS_NON_SAMPLE(mmap2,
+ __NON_SAMPLE_SET_FIELD(mmap2, u32, pid);
+ __NON_SAMPLE_SET_FIELD(mmap2, u32, tid);
+ __NON_SAMPLE_SET_FIELD(mmap2, u64_hex, start);
+ __NON_SAMPLE_SET_FIELD(mmap2, string, filename);
+)
+#undef __NON_SAMPLE_SET_FIELD
+#undef __FUNC_PROCESS_NON_SAMPLE
+
+/* If dup < 0, add a prefix. Else, add _dupl_X suffix. */
+static char *change_name(char *name, char *orig_name, int dup)
+{
+ char *new_name = NULL;
+ size_t len;
+
+ if (!name)
+ name = orig_name;
+
+ if (dup >= 10)
+ goto out;
+ /*
+ * Add '_' prefix to potential keywork. According to
+ * Mathieu Desnoyers (https://lkml.org/lkml/2015/1/23/652),
+ * futher CTF spec updating may require us to use '$'.
+ */
+ if (dup < 0)
+ len = strlen(name) + sizeof("_");
+ else
+ len = strlen(orig_name) + sizeof("_dupl_X");
+
+ new_name = malloc(len);
+ if (!new_name)
+ goto out;
+
+ if (dup < 0)
+ snprintf(new_name, len, "_%s", name);
+ else
+ snprintf(new_name, len, "%s_dupl_%d", orig_name, dup);
+
+out:
+ if (name != orig_name)
+ free(name);
+ return new_name;
+}
+
+static int event_class_add_field(struct bt_ctf_event_class *event_class,
+ struct bt_ctf_field_type *type,
+ struct format_field *field)
+{
+ struct bt_ctf_field_type *t = NULL;
+ char *name;
+ int dup = 1;
+ int ret;
+
+ /* alias was already assigned */
+ if (field->alias != field->name)
+ return bt_ctf_event_class_add_field(event_class, type,
+ (char *)field->alias);
+
+ name = field->name;
+
+ /* If 'name' is a keywork, add prefix. */
+ if (bt_ctf_validate_identifier(name))
+ name = change_name(name, field->name, -1);
+
+ if (!name) {
+ pr_err("Failed to fix invalid identifier.");
+ return -1;
+ }
+ while ((t = bt_ctf_event_class_get_field_by_name(event_class, name))) {
+ bt_ctf_field_type_put(t);
+ name = change_name(name, field->name, dup++);
+ if (!name) {
+ pr_err("Failed to create dup name for '%s'\n", field->name);
+ return -1;
+ }
+ }
+
+ ret = bt_ctf_event_class_add_field(event_class, type, name);
+ if (!ret)
+ field->alias = name;
+
+ return ret;
+}
+
+static int add_tracepoint_fields_types(struct ctf_writer *cw,
+ struct format_field *fields,
+ struct bt_ctf_event_class *event_class)
+{
+ struct format_field *field;
+ int ret;
+
+ for (field = fields; field; field = field->next) {
+ struct bt_ctf_field_type *type;
+ unsigned long flags = field->flags;
+
+ pr2(" field '%s'\n", field->name);
+
+ type = get_tracepoint_field_type(cw, field);
+ if (!type)
+ return -1;
+
+ /*
+ * A string is an array of chars. For this we use the string
+ * type and don't care that it is an array. What we don't
+ * support is an array of strings.
+ */
+ if (flags & FIELD_IS_STRING)
+ flags &= ~FIELD_IS_ARRAY;
+
+ if (flags & FIELD_IS_ARRAY)
+ type = bt_ctf_field_type_array_create(type, field->arraylen);
+
+ ret = event_class_add_field(event_class, type, field);
+
+ if (flags & FIELD_IS_ARRAY)
+ bt_ctf_field_type_put(type);
+
+ if (ret) {
+ pr_err("Failed to add field '%s': %d\n",
+ field->name, ret);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int add_tracepoint_types(struct ctf_writer *cw,
+ struct perf_evsel *evsel,
+ struct bt_ctf_event_class *class)
+{
+ struct format_field *common_fields = evsel->tp_format->format.common_fields;
+ struct format_field *fields = evsel->tp_format->format.fields;
+ int ret;
+
+ ret = add_tracepoint_fields_types(cw, common_fields, class);
+ if (!ret)
+ ret = add_tracepoint_fields_types(cw, fields, class);
+
+ return ret;
+}
+
+static int add_bpf_output_types(struct ctf_writer *cw,
+ struct bt_ctf_event_class *class)
+{
+ struct bt_ctf_field_type *len_type = cw->data.u32;
+ struct bt_ctf_field_type *seq_base_type = cw->data.u32_hex;
+ struct bt_ctf_field_type *seq_type;
+ int ret;
+
+ ret = bt_ctf_event_class_add_field(class, len_type, "raw_len");
+ if (ret)
+ return ret;
+
+ seq_type = bt_ctf_field_type_sequence_create(seq_base_type, "raw_len");
+ if (!seq_type)
+ return -1;
+
+ return bt_ctf_event_class_add_field(class, seq_type, "raw_data");
+}
+
+static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
+ struct bt_ctf_event_class *event_class)
+{
+ u64 type = evsel->attr.sample_type;
+
+ /*
+ * missing:
+ * PERF_SAMPLE_TIME - not needed as we have it in
+ * ctf event header
+ * PERF_SAMPLE_READ - TODO
+ * PERF_SAMPLE_CALLCHAIN - TODO
+ * PERF_SAMPLE_RAW - tracepoint fields and BPF output
+ * are handled separately
+ * PERF_SAMPLE_BRANCH_STACK - TODO
+ * PERF_SAMPLE_REGS_USER - TODO
+ * PERF_SAMPLE_STACK_USER - TODO
+ */
+
+#define ADD_FIELD(cl, t, n) \
+ do { \
+ pr2(" field '%s'\n", n); \
+ if (bt_ctf_event_class_add_field(cl, t, n)) { \
+ pr_err("Failed to add field '%s';\n", n); \
+ return -1; \
+ } \
+ } while (0)
+
+ if (type & PERF_SAMPLE_IP)
+ ADD_FIELD(event_class, cw->data.u64_hex, "perf_ip");
+
+ if (type & PERF_SAMPLE_TID) {
+ ADD_FIELD(event_class, cw->data.s32, "perf_tid");
+ ADD_FIELD(event_class, cw->data.s32, "perf_pid");
+ }
+
+ if ((type & PERF_SAMPLE_ID) ||
+ (type & PERF_SAMPLE_IDENTIFIER))
+ ADD_FIELD(event_class, cw->data.u64, "perf_id");
+
+ if (type & PERF_SAMPLE_STREAM_ID)
+ ADD_FIELD(event_class, cw->data.u64, "perf_stream_id");
+
+ if (type & PERF_SAMPLE_PERIOD)
+ ADD_FIELD(event_class, cw->data.u64, "perf_period");
+
+ if (type & PERF_SAMPLE_WEIGHT)
+ ADD_FIELD(event_class, cw->data.u64, "perf_weight");
+
+ if (type & PERF_SAMPLE_DATA_SRC)
+ ADD_FIELD(event_class, cw->data.u64, "perf_data_src");
+
+ if (type & PERF_SAMPLE_TRANSACTION)
+ ADD_FIELD(event_class, cw->data.u64, "perf_transaction");
+
+ if (type & PERF_SAMPLE_CALLCHAIN) {
+ ADD_FIELD(event_class, cw->data.u32, "perf_callchain_size");
+ ADD_FIELD(event_class,
+ bt_ctf_field_type_sequence_create(
+ cw->data.u64_hex, "perf_callchain_size"),
+ "perf_callchain");
+ }
+
+#undef ADD_FIELD
+ return 0;
+}
+
+static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel)
+{
+ struct bt_ctf_event_class *event_class;
+ struct evsel_priv *priv;
+ const char *name = perf_evsel__name(evsel);
+ int ret;
+
+ pr("Adding event '%s' (type %d)\n", name, evsel->attr.type);
+
+ event_class = bt_ctf_event_class_create(name);
+ if (!event_class)
+ return -1;
+
+ ret = add_generic_types(cw, evsel, event_class);
+ if (ret)
+ goto err;
+
+ if (evsel->attr.type == PERF_TYPE_TRACEPOINT) {
+ ret = add_tracepoint_types(cw, evsel, event_class);
+ if (ret)
+ goto err;
+ }
+
+ if (perf_evsel__is_bpf_output(evsel)) {
+ ret = add_bpf_output_types(cw, event_class);
+ if (ret)
+ goto err;
+ }
+
+ ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);
+ if (ret) {
+ pr("Failed to add event class into stream.\n");
+ goto err;
+ }
+
+ priv = malloc(sizeof(*priv));
+ if (!priv)
+ goto err;
+
+ priv->event_class = event_class;
+ evsel->priv = priv;
+ return 0;
+
+err:
+ bt_ctf_event_class_put(event_class);
+ pr_err("Failed to add event '%s'.\n", name);
+ return -1;
+}
+
+static int setup_events(struct ctf_writer *cw, struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_evsel *evsel;
+ int ret;
+
+ evlist__for_each_entry(evlist, evsel) {
+ ret = add_event(cw, evsel);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+#define __NON_SAMPLE_ADD_FIELD(t, n) \
+ do { \
+ pr2(" field '%s'\n", #n); \
+ if (bt_ctf_event_class_add_field(event_class, cw->data.t, #n)) {\
+ pr_err("Failed to add field '%s';\n", #n);\
+ return -1; \
+ } \
+ } while(0)
+
+#define __FUNC_ADD_NON_SAMPLE_EVENT_CLASS(_name, body) \
+static int add_##_name##_event(struct ctf_writer *cw) \
+{ \
+ struct bt_ctf_event_class *event_class; \
+ int ret; \
+ \
+ pr("Adding "#_name" event\n"); \
+ event_class = bt_ctf_event_class_create("perf_" #_name);\
+ if (!event_class) \
+ return -1; \
+ body \
+ \
+ ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class);\
+ if (ret) { \
+ pr("Failed to add event class '"#_name"' into stream.\n");\
+ return ret; \
+ } \
+ \
+ cw->_name##_class = event_class; \
+ bt_ctf_event_class_put(event_class); \
+ return 0; \
+}
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(comm,
+ __NON_SAMPLE_ADD_FIELD(u32, pid);
+ __NON_SAMPLE_ADD_FIELD(u32, tid);
+ __NON_SAMPLE_ADD_FIELD(string, comm);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(fork,
+ __NON_SAMPLE_ADD_FIELD(u32, pid);
+ __NON_SAMPLE_ADD_FIELD(u32, ppid);
+ __NON_SAMPLE_ADD_FIELD(u32, tid);
+ __NON_SAMPLE_ADD_FIELD(u32, ptid);
+ __NON_SAMPLE_ADD_FIELD(u64, time);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(exit,
+ __NON_SAMPLE_ADD_FIELD(u32, pid);
+ __NON_SAMPLE_ADD_FIELD(u32, ppid);
+ __NON_SAMPLE_ADD_FIELD(u32, tid);
+ __NON_SAMPLE_ADD_FIELD(u32, ptid);
+ __NON_SAMPLE_ADD_FIELD(u64, time);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(mmap,
+ __NON_SAMPLE_ADD_FIELD(u32, pid);
+ __NON_SAMPLE_ADD_FIELD(u32, tid);
+ __NON_SAMPLE_ADD_FIELD(u64_hex, start);
+ __NON_SAMPLE_ADD_FIELD(string, filename);
+)
+
+__FUNC_ADD_NON_SAMPLE_EVENT_CLASS(mmap2,
+ __NON_SAMPLE_ADD_FIELD(u32, pid);
+ __NON_SAMPLE_ADD_FIELD(u32, tid);
+ __NON_SAMPLE_ADD_FIELD(u64_hex, start);
+ __NON_SAMPLE_ADD_FIELD(string, filename);
+)
+#undef __NON_SAMPLE_ADD_FIELD
+#undef __FUNC_ADD_NON_SAMPLE_EVENT_CLASS
+
+static int setup_non_sample_events(struct ctf_writer *cw,
+ struct perf_session *session __maybe_unused)
+{
+ int ret;
+
+ ret = add_comm_event(cw);
+ if (ret)
+ return ret;
+ ret = add_exit_event(cw);
+ if (ret)
+ return ret;
+ ret = add_fork_event(cw);
+ if (ret)
+ return ret;
+ ret = add_mmap_event(cw);
+ if (ret)
+ return ret;
+ ret = add_mmap2_event(cw);
+ if (ret)
+ return ret;
+ return 0;
+}
+
+static void cleanup_events(struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ struct evsel_priv *priv;
+
+ priv = evsel->priv;
+ bt_ctf_event_class_put(priv->event_class);
+ zfree(&evsel->priv);
+ }
+
+ perf_evlist__delete(evlist);
+ session->evlist = NULL;
+}
+
+static int setup_streams(struct ctf_writer *cw, struct perf_session *session)
+{
+ struct ctf_stream **stream;
+ struct perf_header *ph = &session->header;
+ int ncpus;
+
+ /*
+ * Try to get the number of cpus used in the data file,
+ * if not present fallback to the MAX_CPUS.
+ */
+ ncpus = ph->env.nr_cpus_avail ?: MAX_CPUS;
+
+ stream = zalloc(sizeof(*stream) * ncpus);
+ if (!stream) {
+ pr_err("Failed to allocate streams.\n");
+ return -ENOMEM;
+ }
+
+ cw->stream = stream;
+ cw->stream_cnt = ncpus;
+ return 0;
+}
+
+static void free_streams(struct ctf_writer *cw)
+{
+ int cpu;
+
+ for (cpu = 0; cpu < cw->stream_cnt; cpu++)
+ ctf_stream__delete(cw->stream[cpu]);
+
+ free(cw->stream);
+}
+
+static int ctf_writer__setup_env(struct ctf_writer *cw,
+ struct perf_session *session)
+{
+ struct perf_header *header = &session->header;
+ struct bt_ctf_writer *writer = cw->writer;
+
+#define ADD(__n, __v) \
+do { \
+ if (bt_ctf_writer_add_environment_field(writer, __n, __v)) \
+ return -1; \
+} while (0)
+
+ ADD("host", header->env.hostname);
+ ADD("sysname", "Linux");
+ ADD("release", header->env.os_release);
+ ADD("version", header->env.version);
+ ADD("machine", header->env.arch);
+ ADD("domain", "kernel");
+ ADD("tracer_name", "perf");
+
+#undef ADD
+ return 0;
+}
+
+static int ctf_writer__setup_clock(struct ctf_writer *cw)
+{
+ struct bt_ctf_clock *clock = cw->clock;
+
+ bt_ctf_clock_set_description(clock, "perf clock");
+
+#define SET(__n, __v) \
+do { \
+ if (bt_ctf_clock_set_##__n(clock, __v)) \
+ return -1; \
+} while (0)
+
+ SET(frequency, 1000000000);
+ SET(offset_s, 0);
+ SET(offset, 0);
+ SET(precision, 10);
+ SET(is_absolute, 0);
+
+#undef SET
+ return 0;
+}
+
+static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex)
+{
+ struct bt_ctf_field_type *type;
+
+ type = bt_ctf_field_type_integer_create(size);
+ if (!type)
+ return NULL;
+
+ if (sign &&
+ bt_ctf_field_type_integer_set_signed(type, 1))
+ goto err;
+
+ if (hex &&
+ bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL))
+ goto err;
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+ bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_BIG_ENDIAN);
+#else
+ bt_ctf_field_type_set_byte_order(type, BT_CTF_BYTE_ORDER_LITTLE_ENDIAN);
+#endif
+
+ pr2("Created type: INTEGER %d-bit %ssigned %s\n",
+ size, sign ? "un" : "", hex ? "hex" : "");
+ return type;
+
+err:
+ bt_ctf_field_type_put(type);
+ return NULL;
+}
+
+static void ctf_writer__cleanup_data(struct ctf_writer *cw)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(cw->data.array); i++)
+ bt_ctf_field_type_put(cw->data.array[i]);
+}
+
+static int ctf_writer__init_data(struct ctf_writer *cw)
+{
+#define CREATE_INT_TYPE(type, size, sign, hex) \
+do { \
+ (type) = create_int_type(size, sign, hex); \
+ if (!(type)) \
+ goto err; \
+} while (0)
+
+ CREATE_INT_TYPE(cw->data.s64, 64, true, false);
+ CREATE_INT_TYPE(cw->data.u64, 64, false, false);
+ CREATE_INT_TYPE(cw->data.s32, 32, true, false);
+ CREATE_INT_TYPE(cw->data.u32, 32, false, false);
+ CREATE_INT_TYPE(cw->data.u32_hex, 32, false, true);
+ CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true);
+
+ cw->data.string = bt_ctf_field_type_string_create();
+ if (cw->data.string)
+ return 0;
+
+err:
+ ctf_writer__cleanup_data(cw);
+ pr_err("Failed to create data types.\n");
+ return -1;
+}
+
+static void ctf_writer__cleanup(struct ctf_writer *cw)
+{
+ ctf_writer__cleanup_data(cw);
+
+ bt_ctf_clock_put(cw->clock);
+ free_streams(cw);
+ bt_ctf_stream_class_put(cw->stream_class);
+ bt_ctf_writer_put(cw->writer);
+
+ /* and NULL all the pointers */
+ memset(cw, 0, sizeof(*cw));
+}
+
+static int ctf_writer__init(struct ctf_writer *cw, const char *path)
+{
+ struct bt_ctf_writer *writer;
+ struct bt_ctf_stream_class *stream_class;
+ struct bt_ctf_clock *clock;
+ struct bt_ctf_field_type *pkt_ctx_type;
+ int ret;
+
+ /* CTF writer */
+ writer = bt_ctf_writer_create(path);
+ if (!writer)
+ goto err;
+
+ cw->writer = writer;
+
+ /* CTF clock */
+ clock = bt_ctf_clock_create("perf_clock");
+ if (!clock) {
+ pr("Failed to create CTF clock.\n");
+ goto err_cleanup;
+ }
+
+ cw->clock = clock;
+
+ if (ctf_writer__setup_clock(cw)) {
+ pr("Failed to setup CTF clock.\n");
+ goto err_cleanup;
+ }
+
+ /* CTF stream class */
+ stream_class = bt_ctf_stream_class_create("perf_stream");
+ if (!stream_class) {
+ pr("Failed to create CTF stream class.\n");
+ goto err_cleanup;
+ }
+
+ cw->stream_class = stream_class;
+
+ /* CTF clock stream setup */
+ if (bt_ctf_stream_class_set_clock(stream_class, clock)) {
+ pr("Failed to assign CTF clock to stream class.\n");
+ goto err_cleanup;
+ }
+
+ if (ctf_writer__init_data(cw))
+ goto err_cleanup;
+
+ /* Add cpu_id for packet context */
+ pkt_ctx_type = bt_ctf_stream_class_get_packet_context_type(stream_class);
+ if (!pkt_ctx_type)
+ goto err_cleanup;
+
+ ret = bt_ctf_field_type_structure_add_field(pkt_ctx_type, cw->data.u32, "cpu_id");
+ bt_ctf_field_type_put(pkt_ctx_type);
+ if (ret)
+ goto err_cleanup;
+
+ /* CTF clock writer setup */
+ if (bt_ctf_writer_add_clock(writer, clock)) {
+ pr("Failed to assign CTF clock to writer.\n");
+ goto err_cleanup;
+ }
+
+ return 0;
+
+err_cleanup:
+ ctf_writer__cleanup(cw);
+err:
+ pr_err("Failed to setup CTF writer.\n");
+ return -1;
+}
+
+static int ctf_writer__flush_streams(struct ctf_writer *cw)
+{
+ int cpu, ret = 0;
+
+ for (cpu = 0; cpu < cw->stream_cnt && !ret; cpu++)
+ ret = ctf_stream__flush(cw->stream[cpu]);
+
+ return ret;
+}
+
+static int convert__config(const char *var, const char *value, void *cb)
+{
+ struct convert *c = cb;
+
+ if (!strcmp(var, "convert.queue-size"))
+ return perf_config_u64(&c->queue_size, var, value);
+
+ return 0;
+}
+
+int bt_convert__perf2ctf(const char *input, const char *path,
+ struct perf_data_convert_opts *opts)
+{
+ struct perf_session *session;
+ struct perf_data data = {
+ .file.path = input,
+ .mode = PERF_DATA_MODE_READ,
+ .force = opts->force,
+ };
+ struct convert c = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
+ .lost = perf_event__process_lost,
+ .tracing_data = perf_event__process_tracing_data,
+ .build_id = perf_event__process_build_id,
+ .namespaces = perf_event__process_namespaces,
+ .ordered_events = true,
+ .ordering_requires_timestamps = true,
+ },
+ };
+ struct ctf_writer *cw = &c.writer;
+ int err;
+
+ if (opts->all) {
+ c.tool.comm = process_comm_event;
+ c.tool.exit = process_exit_event;
+ c.tool.fork = process_fork_event;
+ c.tool.mmap = process_mmap_event;
+ c.tool.mmap2 = process_mmap2_event;
+ }
+
+ err = perf_config(convert__config, &c);
+ if (err)
+ return err;
+
+ /* CTF writer */
+ if (ctf_writer__init(cw, path))
+ return -1;
+
+ err = -1;
+ /* perf.data session */
+ session = perf_session__new(&data, 0, &c.tool);
+ if (!session)
+ goto free_writer;
+
+ if (c.queue_size) {
+ ordered_events__set_alloc_size(&session->ordered_events,
+ c.queue_size);
+ }
+
+ /* CTF writer env/clock setup */
+ if (ctf_writer__setup_env(cw, session))
+ goto free_session;
+
+ /* CTF events setup */
+ if (setup_events(cw, session))
+ goto free_session;
+
+ if (opts->all && setup_non_sample_events(cw, session))
+ goto free_session;
+
+ if (setup_streams(cw, session))
+ goto free_session;
+
+ err = perf_session__process_events(session);
+ if (!err)
+ err = ctf_writer__flush_streams(cw);
+ else
+ pr_err("Error during conversion.\n");
+
+ fprintf(stderr,
+ "[ perf data convert: Converted '%s' into CTF data '%s' ]\n",
+ data.file.path, path);
+
+ fprintf(stderr,
+ "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples",
+ (double) c.events_size / 1024.0 / 1024.0,
+ c.events_count);
+
+ if (!c.non_sample_count)
+ fprintf(stderr, ") ]\n");
+ else
+ fprintf(stderr, ", %" PRIu64 " non-samples) ]\n", c.non_sample_count);
+
+ cleanup_events(session);
+ perf_session__delete(session);
+ ctf_writer__cleanup(cw);
+
+ return err;
+
+free_session:
+ perf_session__delete(session);
+free_writer:
+ ctf_writer__cleanup(cw);
+ pr_err("Error during conversion setup.\n");
+ return err;
+}
diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h
new file mode 100644
index 000000000..821674d63
--- /dev/null
+++ b/tools/perf/util/data-convert-bt.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DATA_CONVERT_BT_H
+#define __DATA_CONVERT_BT_H
+#include "data-convert.h"
+#ifdef HAVE_LIBBABELTRACE_SUPPORT
+
+int bt_convert__perf2ctf(const char *input_name, const char *to_ctf,
+ struct perf_data_convert_opts *opts);
+
+#endif /* HAVE_LIBBABELTRACE_SUPPORT */
+#endif /* __DATA_CONVERT_BT_H */
diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h
new file mode 100644
index 000000000..af90b6076
--- /dev/null
+++ b/tools/perf/util/data-convert.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __DATA_CONVERT_H
+#define __DATA_CONVERT_H
+
+struct perf_data_convert_opts {
+ bool force;
+ bool all;
+};
+
+#endif /* __DATA_CONVERT_H */
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
new file mode 100644
index 000000000..d8cfc19dd
--- /dev/null
+++ b/tools/perf/util/data.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "data.h"
+#include "util.h"
+#include "debug.h"
+
+static bool check_pipe(struct perf_data *data)
+{
+ struct stat st;
+ bool is_pipe = false;
+ int fd = perf_data__is_read(data) ?
+ STDIN_FILENO : STDOUT_FILENO;
+
+ if (!data->file.path) {
+ if (!fstat(fd, &st) && S_ISFIFO(st.st_mode))
+ is_pipe = true;
+ } else {
+ if (!strcmp(data->file.path, "-"))
+ is_pipe = true;
+ }
+
+ if (is_pipe)
+ data->file.fd = fd;
+
+ return data->is_pipe = is_pipe;
+}
+
+static int check_backup(struct perf_data *data)
+{
+ struct stat st;
+
+ if (!stat(data->file.path, &st) && st.st_size) {
+ /* TODO check errors properly */
+ char oldname[PATH_MAX];
+ snprintf(oldname, sizeof(oldname), "%s.old",
+ data->file.path);
+ unlink(oldname);
+ rename(data->file.path, oldname);
+ }
+
+ return 0;
+}
+
+static int open_file_read(struct perf_data *data)
+{
+ struct stat st;
+ int fd;
+ char sbuf[STRERR_BUFSIZE];
+
+ fd = open(data->file.path, O_RDONLY);
+ if (fd < 0) {
+ int err = errno;
+
+ pr_err("failed to open %s: %s", data->file.path,
+ str_error_r(err, sbuf, sizeof(sbuf)));
+ if (err == ENOENT && !strcmp(data->file.path, "perf.data"))
+ pr_err(" (try 'perf record' first)");
+ pr_err("\n");
+ return -err;
+ }
+
+ if (fstat(fd, &st) < 0)
+ goto out_close;
+
+ if (!data->force && st.st_uid && (st.st_uid != geteuid())) {
+ pr_err("File %s not owned by current user or root (use -f to override)\n",
+ data->file.path);
+ goto out_close;
+ }
+
+ if (!st.st_size) {
+ pr_info("zero-sized data (%s), nothing to do!\n",
+ data->file.path);
+ goto out_close;
+ }
+
+ data->size = st.st_size;
+ return fd;
+
+ out_close:
+ close(fd);
+ return -1;
+}
+
+static int open_file_write(struct perf_data *data)
+{
+ int fd;
+ char sbuf[STRERR_BUFSIZE];
+
+ if (check_backup(data))
+ return -1;
+
+ fd = open(data->file.path, O_CREAT|O_RDWR|O_TRUNC|O_CLOEXEC,
+ S_IRUSR|S_IWUSR);
+
+ if (fd < 0)
+ pr_err("failed to open %s : %s\n", data->file.path,
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+
+ return fd;
+}
+
+static int open_file(struct perf_data *data)
+{
+ int fd;
+
+ fd = perf_data__is_read(data) ?
+ open_file_read(data) : open_file_write(data);
+
+ data->file.fd = fd;
+ return fd < 0 ? -1 : 0;
+}
+
+int perf_data__open(struct perf_data *data)
+{
+ if (check_pipe(data))
+ return 0;
+
+ if (!data->file.path)
+ data->file.path = "perf.data";
+
+ return open_file(data);
+}
+
+void perf_data__close(struct perf_data *data)
+{
+ close(data->file.fd);
+}
+
+ssize_t perf_data_file__write(struct perf_data_file *file,
+ void *buf, size_t size)
+{
+ return writen(file->fd, buf, size);
+}
+
+ssize_t perf_data__write(struct perf_data *data,
+ void *buf, size_t size)
+{
+ return perf_data_file__write(&data->file, buf, size);
+}
+
+int perf_data__switch(struct perf_data *data,
+ const char *postfix,
+ size_t pos, bool at_exit)
+{
+ char *new_filepath;
+ int ret;
+
+ if (check_pipe(data))
+ return -EINVAL;
+ if (perf_data__is_read(data))
+ return -EINVAL;
+
+ if (asprintf(&new_filepath, "%s.%s", data->file.path, postfix) < 0)
+ return -ENOMEM;
+
+ /*
+ * Only fire a warning, don't return error, continue fill
+ * original file.
+ */
+ if (rename(data->file.path, new_filepath))
+ pr_warning("Failed to rename %s to %s\n", data->file.path, new_filepath);
+
+ if (!at_exit) {
+ close(data->file.fd);
+ ret = perf_data__open(data);
+ if (ret < 0)
+ goto out;
+
+ if (lseek(data->file.fd, pos, SEEK_SET) == (off_t)-1) {
+ ret = -errno;
+ pr_debug("Failed to lseek to %zu: %s",
+ pos, strerror(errno));
+ goto out;
+ }
+ }
+ ret = data->file.fd;
+out:
+ free(new_filepath);
+ return ret;
+}
diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h
new file mode 100644
index 000000000..4828f7fee
--- /dev/null
+++ b/tools/perf/util/data.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DATA_H
+#define __PERF_DATA_H
+
+#include <stdbool.h>
+
+enum perf_data_mode {
+ PERF_DATA_MODE_WRITE,
+ PERF_DATA_MODE_READ,
+};
+
+struct perf_data_file {
+ const char *path;
+ int fd;
+};
+
+struct perf_data {
+ struct perf_data_file file;
+ bool is_pipe;
+ bool force;
+ unsigned long size;
+ enum perf_data_mode mode;
+};
+
+static inline bool perf_data__is_read(struct perf_data *data)
+{
+ return data->mode == PERF_DATA_MODE_READ;
+}
+
+static inline bool perf_data__is_write(struct perf_data *data)
+{
+ return data->mode == PERF_DATA_MODE_WRITE;
+}
+
+static inline int perf_data__is_pipe(struct perf_data *data)
+{
+ return data->is_pipe;
+}
+
+static inline int perf_data__fd(struct perf_data *data)
+{
+ return data->file.fd;
+}
+
+static inline unsigned long perf_data__size(struct perf_data *data)
+{
+ return data->size;
+}
+
+int perf_data__open(struct perf_data *data);
+void perf_data__close(struct perf_data *data);
+ssize_t perf_data__write(struct perf_data *data,
+ void *buf, size_t size);
+ssize_t perf_data_file__write(struct perf_data_file *file,
+ void *buf, size_t size);
+/*
+ * If at_exit is set, only rename current perf.data to
+ * perf.data.<postfix>, continue write on original data.
+ * Set at_exit when flushing the last output.
+ *
+ * Return value is fd of new output.
+ */
+int perf_data__switch(struct perf_data *data,
+ const char *postfix,
+ size_t pos, bool at_exit);
+#endif /* __PERF_DATA_H */
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
new file mode 100644
index 000000000..7123746ed
--- /dev/null
+++ b/tools/perf/util/db-export.c
@@ -0,0 +1,507 @@
+/*
+ * db-export.c: Support for exporting data suitable for import to a database
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <errno.h>
+
+#include "evsel.h"
+#include "machine.h"
+#include "thread.h"
+#include "comm.h"
+#include "symbol.h"
+#include "event.h"
+#include "util.h"
+#include "thread-stack.h"
+#include "callchain.h"
+#include "call-path.h"
+#include "db-export.h"
+
+struct deferred_export {
+ struct list_head node;
+ struct comm *comm;
+};
+
+static int db_export__deferred(struct db_export *dbe)
+{
+ struct deferred_export *de;
+ int err;
+
+ while (!list_empty(&dbe->deferred)) {
+ de = list_entry(dbe->deferred.next, struct deferred_export,
+ node);
+ err = dbe->export_comm(dbe, de->comm);
+ list_del(&de->node);
+ free(de);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void db_export__free_deferred(struct db_export *dbe)
+{
+ struct deferred_export *de;
+
+ while (!list_empty(&dbe->deferred)) {
+ de = list_entry(dbe->deferred.next, struct deferred_export,
+ node);
+ list_del(&de->node);
+ free(de);
+ }
+}
+
+static int db_export__defer_comm(struct db_export *dbe, struct comm *comm)
+{
+ struct deferred_export *de;
+
+ de = zalloc(sizeof(struct deferred_export));
+ if (!de)
+ return -ENOMEM;
+
+ de->comm = comm;
+ list_add_tail(&de->node, &dbe->deferred);
+
+ return 0;
+}
+
+int db_export__init(struct db_export *dbe)
+{
+ memset(dbe, 0, sizeof(struct db_export));
+ INIT_LIST_HEAD(&dbe->deferred);
+ return 0;
+}
+
+int db_export__flush(struct db_export *dbe)
+{
+ return db_export__deferred(dbe);
+}
+
+void db_export__exit(struct db_export *dbe)
+{
+ db_export__free_deferred(dbe);
+ call_return_processor__free(dbe->crp);
+ dbe->crp = NULL;
+}
+
+int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel)
+{
+ if (evsel->db_id)
+ return 0;
+
+ evsel->db_id = ++dbe->evsel_last_db_id;
+
+ if (dbe->export_evsel)
+ return dbe->export_evsel(dbe, evsel);
+
+ return 0;
+}
+
+int db_export__machine(struct db_export *dbe, struct machine *machine)
+{
+ if (machine->db_id)
+ return 0;
+
+ machine->db_id = ++dbe->machine_last_db_id;
+
+ if (dbe->export_machine)
+ return dbe->export_machine(dbe, machine);
+
+ return 0;
+}
+
+int db_export__thread(struct db_export *dbe, struct thread *thread,
+ struct machine *machine, struct comm *comm)
+{
+ struct thread *main_thread;
+ u64 main_thread_db_id = 0;
+ int err;
+
+ if (thread->db_id)
+ return 0;
+
+ thread->db_id = ++dbe->thread_last_db_id;
+
+ if (thread->pid_ != -1) {
+ if (thread->pid_ == thread->tid) {
+ main_thread = thread;
+ } else {
+ main_thread = machine__findnew_thread(machine,
+ thread->pid_,
+ thread->pid_);
+ if (!main_thread)
+ return -ENOMEM;
+ err = db_export__thread(dbe, main_thread, machine,
+ comm);
+ if (err)
+ goto out_put;
+ if (comm) {
+ err = db_export__comm_thread(dbe, comm, thread);
+ if (err)
+ goto out_put;
+ }
+ }
+ main_thread_db_id = main_thread->db_id;
+ if (main_thread != thread)
+ thread__put(main_thread);
+ }
+
+ if (dbe->export_thread)
+ return dbe->export_thread(dbe, thread, main_thread_db_id,
+ machine);
+
+ return 0;
+
+out_put:
+ thread__put(main_thread);
+ return err;
+}
+
+int db_export__comm(struct db_export *dbe, struct comm *comm,
+ struct thread *main_thread)
+{
+ int err;
+
+ if (comm->db_id)
+ return 0;
+
+ comm->db_id = ++dbe->comm_last_db_id;
+
+ if (dbe->export_comm) {
+ if (main_thread->comm_set)
+ err = dbe->export_comm(dbe, comm);
+ else
+ err = db_export__defer_comm(dbe, comm);
+ if (err)
+ return err;
+ }
+
+ return db_export__comm_thread(dbe, comm, main_thread);
+}
+
+int db_export__comm_thread(struct db_export *dbe, struct comm *comm,
+ struct thread *thread)
+{
+ u64 db_id;
+
+ db_id = ++dbe->comm_thread_last_db_id;
+
+ if (dbe->export_comm_thread)
+ return dbe->export_comm_thread(dbe, db_id, comm, thread);
+
+ return 0;
+}
+
+int db_export__dso(struct db_export *dbe, struct dso *dso,
+ struct machine *machine)
+{
+ if (dso->db_id)
+ return 0;
+
+ dso->db_id = ++dbe->dso_last_db_id;
+
+ if (dbe->export_dso)
+ return dbe->export_dso(dbe, dso, machine);
+
+ return 0;
+}
+
+int db_export__symbol(struct db_export *dbe, struct symbol *sym,
+ struct dso *dso)
+{
+ u64 *sym_db_id = symbol__priv(sym);
+
+ if (*sym_db_id)
+ return 0;
+
+ *sym_db_id = ++dbe->symbol_last_db_id;
+
+ if (dbe->export_symbol)
+ return dbe->export_symbol(dbe, sym, dso);
+
+ return 0;
+}
+
+static int db_ids_from_al(struct db_export *dbe, struct addr_location *al,
+ u64 *dso_db_id, u64 *sym_db_id, u64 *offset)
+{
+ int err;
+
+ if (al->map) {
+ struct dso *dso = al->map->dso;
+
+ err = db_export__dso(dbe, dso, al->machine);
+ if (err)
+ return err;
+ *dso_db_id = dso->db_id;
+
+ if (!al->sym) {
+ al->sym = symbol__new(al->addr, 0, 0, 0, "unknown");
+ if (al->sym)
+ dso__insert_symbol(dso, al->sym);
+ }
+
+ if (al->sym) {
+ u64 *db_id = symbol__priv(al->sym);
+
+ err = db_export__symbol(dbe, al->sym, dso);
+ if (err)
+ return err;
+ *sym_db_id = *db_id;
+ *offset = al->addr - al->sym->start;
+ }
+ }
+
+ return 0;
+}
+
+static struct call_path *call_path_from_sample(struct db_export *dbe,
+ struct machine *machine,
+ struct thread *thread,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel)
+{
+ u64 kernel_start = machine__kernel_start(machine);
+ struct call_path *current = &dbe->cpr->call_path;
+ enum chain_order saved_order = callchain_param.order;
+ int err;
+
+ if (!symbol_conf.use_callchain || !sample->callchain)
+ return NULL;
+
+ /*
+ * Since the call path tree must be built starting with the root, we
+ * must use ORDER_CALL for call chain resolution, in order to process
+ * the callchain starting with the root node and ending with the leaf.
+ */
+ callchain_param.order = ORDER_CALLER;
+ err = thread__resolve_callchain(thread, &callchain_cursor, evsel,
+ sample, NULL, NULL, PERF_MAX_STACK_DEPTH);
+ if (err) {
+ callchain_param.order = saved_order;
+ return NULL;
+ }
+ callchain_cursor_commit(&callchain_cursor);
+
+ while (1) {
+ struct callchain_cursor_node *node;
+ struct addr_location al;
+ u64 dso_db_id = 0, sym_db_id = 0, offset = 0;
+
+ memset(&al, 0, sizeof(al));
+
+ node = callchain_cursor_current(&callchain_cursor);
+ if (!node)
+ break;
+ /*
+ * Handle export of symbol and dso for this node by
+ * constructing an addr_location struct and then passing it to
+ * db_ids_from_al() to perform the export.
+ */
+ al.sym = node->sym;
+ al.map = node->map;
+ al.machine = machine;
+ al.addr = node->ip;
+
+ if (al.map && !al.sym)
+ al.sym = dso__find_symbol(al.map->dso, al.addr);
+
+ db_ids_from_al(dbe, &al, &dso_db_id, &sym_db_id, &offset);
+
+ /* add node to the call path tree if it doesn't exist */
+ current = call_path__findnew(dbe->cpr, current,
+ al.sym, node->ip,
+ kernel_start);
+
+ callchain_cursor_advance(&callchain_cursor);
+ }
+
+ /* Reset the callchain order to its prior value. */
+ callchain_param.order = saved_order;
+
+ if (current == &dbe->cpr->call_path) {
+ /* Bail because the callchain was empty. */
+ return NULL;
+ }
+
+ return current;
+}
+
+int db_export__branch_type(struct db_export *dbe, u32 branch_type,
+ const char *name)
+{
+ if (dbe->export_branch_type)
+ return dbe->export_branch_type(dbe, branch_type, name);
+
+ return 0;
+}
+
+int db_export__sample(struct db_export *dbe, union perf_event *event,
+ struct perf_sample *sample, struct perf_evsel *evsel,
+ struct addr_location *al)
+{
+ struct thread* thread = al->thread;
+ struct export_sample es = {
+ .event = event,
+ .sample = sample,
+ .evsel = evsel,
+ .al = al,
+ };
+ struct thread *main_thread;
+ struct comm *comm = NULL;
+ int err;
+
+ err = db_export__evsel(dbe, evsel);
+ if (err)
+ return err;
+
+ err = db_export__machine(dbe, al->machine);
+ if (err)
+ return err;
+
+ main_thread = thread__main_thread(al->machine, thread);
+ if (main_thread)
+ comm = machine__thread_exec_comm(al->machine, main_thread);
+
+ err = db_export__thread(dbe, thread, al->machine, comm);
+ if (err)
+ goto out_put;
+
+ if (comm) {
+ err = db_export__comm(dbe, comm, main_thread);
+ if (err)
+ goto out_put;
+ es.comm_db_id = comm->db_id;
+ }
+
+ es.db_id = ++dbe->sample_last_db_id;
+
+ err = db_ids_from_al(dbe, al, &es.dso_db_id, &es.sym_db_id, &es.offset);
+ if (err)
+ goto out_put;
+
+ if (dbe->cpr) {
+ struct call_path *cp = call_path_from_sample(dbe, al->machine,
+ thread, sample,
+ evsel);
+ if (cp) {
+ db_export__call_path(dbe, cp);
+ es.call_path_id = cp->db_id;
+ }
+ }
+
+ if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
+ sample_addr_correlates_sym(&evsel->attr)) {
+ struct addr_location addr_al;
+
+ thread__resolve(thread, &addr_al, sample);
+ err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id,
+ &es.addr_sym_db_id, &es.addr_offset);
+ if (err)
+ goto out_put;
+ if (dbe->crp) {
+ err = thread_stack__process(thread, comm, sample, al,
+ &addr_al, es.db_id,
+ dbe->crp);
+ if (err)
+ goto out_put;
+ }
+ }
+
+ if (dbe->export_sample)
+ err = dbe->export_sample(dbe, &es);
+
+out_put:
+ thread__put(main_thread);
+ return err;
+}
+
+static struct {
+ u32 branch_type;
+ const char *name;
+} branch_types[] = {
+ {0, "no branch"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL, "call"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN, "return"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL, "conditional jump"},
+ {PERF_IP_FLAG_BRANCH, "unconditional jump"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_INTERRUPT,
+ "software interrupt"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT,
+ "return from interrupt"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_SYSCALLRET,
+ "system call"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN | PERF_IP_FLAG_SYSCALLRET,
+ "return from system call"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_ASYNC, "asynchronous branch"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT, "hardware interrupt"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT, "transaction abort"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_BEGIN, "trace begin"},
+ {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "trace end"},
+ {0, NULL}
+};
+
+int db_export__branch_types(struct db_export *dbe)
+{
+ int i, err = 0;
+
+ for (i = 0; branch_types[i].name ; i++) {
+ err = db_export__branch_type(dbe, branch_types[i].branch_type,
+ branch_types[i].name);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+int db_export__call_path(struct db_export *dbe, struct call_path *cp)
+{
+ int err;
+
+ if (cp->db_id)
+ return 0;
+
+ if (cp->parent) {
+ err = db_export__call_path(dbe, cp->parent);
+ if (err)
+ return err;
+ }
+
+ cp->db_id = ++dbe->call_path_last_db_id;
+
+ if (dbe->export_call_path)
+ return dbe->export_call_path(dbe, cp);
+
+ return 0;
+}
+
+int db_export__call_return(struct db_export *dbe, struct call_return *cr)
+{
+ int err;
+
+ if (cr->db_id)
+ return 0;
+
+ err = db_export__call_path(dbe, cr->cp);
+ if (err)
+ return err;
+
+ cr->db_id = ++dbe->call_return_last_db_id;
+
+ if (dbe->export_call_return)
+ return dbe->export_call_return(dbe, cr);
+
+ return 0;
+}
diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h
new file mode 100644
index 000000000..67bc6b8ad
--- /dev/null
+++ b/tools/perf/util/db-export.h
@@ -0,0 +1,109 @@
+/*
+ * db-export.h: Support for exporting data suitable for import to a database
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_DB_EXPORT_H
+#define __PERF_DB_EXPORT_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+struct perf_evsel;
+struct machine;
+struct thread;
+struct comm;
+struct dso;
+struct perf_sample;
+struct addr_location;
+struct call_return_processor;
+struct call_path_root;
+struct call_path;
+struct call_return;
+
+struct export_sample {
+ union perf_event *event;
+ struct perf_sample *sample;
+ struct perf_evsel *evsel;
+ struct addr_location *al;
+ u64 db_id;
+ u64 comm_db_id;
+ u64 dso_db_id;
+ u64 sym_db_id;
+ u64 offset; /* ip offset from symbol start */
+ u64 addr_dso_db_id;
+ u64 addr_sym_db_id;
+ u64 addr_offset; /* addr offset from symbol start */
+ u64 call_path_id;
+};
+
+struct db_export {
+ int (*export_evsel)(struct db_export *dbe, struct perf_evsel *evsel);
+ int (*export_machine)(struct db_export *dbe, struct machine *machine);
+ int (*export_thread)(struct db_export *dbe, struct thread *thread,
+ u64 main_thread_db_id, struct machine *machine);
+ int (*export_comm)(struct db_export *dbe, struct comm *comm);
+ int (*export_comm_thread)(struct db_export *dbe, u64 db_id,
+ struct comm *comm, struct thread *thread);
+ int (*export_dso)(struct db_export *dbe, struct dso *dso,
+ struct machine *machine);
+ int (*export_symbol)(struct db_export *dbe, struct symbol *sym,
+ struct dso *dso);
+ int (*export_branch_type)(struct db_export *dbe, u32 branch_type,
+ const char *name);
+ int (*export_sample)(struct db_export *dbe, struct export_sample *es);
+ int (*export_call_path)(struct db_export *dbe, struct call_path *cp);
+ int (*export_call_return)(struct db_export *dbe,
+ struct call_return *cr);
+ struct call_return_processor *crp;
+ struct call_path_root *cpr;
+ u64 evsel_last_db_id;
+ u64 machine_last_db_id;
+ u64 thread_last_db_id;
+ u64 comm_last_db_id;
+ u64 comm_thread_last_db_id;
+ u64 dso_last_db_id;
+ u64 symbol_last_db_id;
+ u64 sample_last_db_id;
+ u64 call_path_last_db_id;
+ u64 call_return_last_db_id;
+ struct list_head deferred;
+};
+
+int db_export__init(struct db_export *dbe);
+int db_export__flush(struct db_export *dbe);
+void db_export__exit(struct db_export *dbe);
+int db_export__evsel(struct db_export *dbe, struct perf_evsel *evsel);
+int db_export__machine(struct db_export *dbe, struct machine *machine);
+int db_export__thread(struct db_export *dbe, struct thread *thread,
+ struct machine *machine, struct comm *comm);
+int db_export__comm(struct db_export *dbe, struct comm *comm,
+ struct thread *main_thread);
+int db_export__comm_thread(struct db_export *dbe, struct comm *comm,
+ struct thread *thread);
+int db_export__dso(struct db_export *dbe, struct dso *dso,
+ struct machine *machine);
+int db_export__symbol(struct db_export *dbe, struct symbol *sym,
+ struct dso *dso);
+int db_export__branch_type(struct db_export *dbe, u32 branch_type,
+ const char *name);
+int db_export__sample(struct db_export *dbe, union perf_event *event,
+ struct perf_sample *sample, struct perf_evsel *evsel,
+ struct addr_location *al);
+
+int db_export__branch_types(struct db_export *dbe);
+
+int db_export__call_path(struct db_export *dbe, struct call_path *cp);
+int db_export__call_return(struct db_export *dbe, struct call_return *cr);
+
+#endif
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
new file mode 100644
index 000000000..3d6459626
--- /dev/null
+++ b/tools/perf/util/debug.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/* For general debugging purposes */
+
+#include "../perf.h"
+
+#include <inttypes.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/wait.h>
+#include <api/debug.h>
+#include <linux/time64.h>
+#ifdef HAVE_BACKTRACE_SUPPORT
+#include <execinfo.h>
+#endif
+#include "cache.h"
+#include "color.h"
+#include "event.h"
+#include "debug.h"
+#include "print_binary.h"
+#include "util.h"
+#include "target.h"
+
+#include "sane_ctype.h"
+
+int verbose;
+bool dump_trace = false, quiet = false;
+int debug_ordered_events;
+static int redirect_to_stderr;
+int debug_data_convert;
+
+int veprintf(int level, int var, const char *fmt, va_list args)
+{
+ int ret = 0;
+
+ if (var >= level) {
+ if (use_browser >= 1 && !redirect_to_stderr)
+ ui_helpline__vshow(fmt, args);
+ else
+ ret = vfprintf(stderr, fmt, args);
+ }
+
+ return ret;
+}
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, fmt);
+ ret = veprintf(level, var, fmt, args);
+ va_end(args);
+
+ return ret;
+}
+
+static int veprintf_time(u64 t, const char *fmt, va_list args)
+{
+ int ret = 0;
+ u64 secs, usecs, nsecs = t;
+
+ secs = nsecs / NSEC_PER_SEC;
+ nsecs -= secs * NSEC_PER_SEC;
+ usecs = nsecs / NSEC_PER_USEC;
+
+ ret = fprintf(stderr, "[%13" PRIu64 ".%06" PRIu64 "] ",
+ secs, usecs);
+ ret += vfprintf(stderr, fmt, args);
+ return ret;
+}
+
+int eprintf_time(int level, int var, u64 t, const char *fmt, ...)
+{
+ int ret = 0;
+ va_list args;
+
+ if (var >= level) {
+ va_start(args, fmt);
+ ret = veprintf_time(t, fmt, args);
+ va_end(args);
+ }
+
+ return ret;
+}
+
+/*
+ * Overloading libtraceevent standard info print
+ * function, display with -v in perf.
+ */
+void pr_stat(const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ veprintf(1, verbose, fmt, args);
+ va_end(args);
+ eprintf(1, verbose, "\n");
+}
+
+int dump_printf(const char *fmt, ...)
+{
+ va_list args;
+ int ret = 0;
+
+ if (dump_trace) {
+ va_start(args, fmt);
+ ret = vprintf(fmt, args);
+ va_end(args);
+ }
+
+ return ret;
+}
+
+static int trace_event_printer(enum binary_printer_ops op,
+ unsigned int val, void *extra, FILE *fp)
+{
+ const char *color = PERF_COLOR_BLUE;
+ union perf_event *event = (union perf_event *)extra;
+ unsigned char ch = (unsigned char)val;
+ int printed = 0;
+
+ switch (op) {
+ case BINARY_PRINT_DATA_BEGIN:
+ printed += fprintf(fp, ".");
+ printed += color_fprintf(fp, color, "\n. ... raw event: size %d bytes\n",
+ event->header.size);
+ break;
+ case BINARY_PRINT_LINE_BEGIN:
+ printed += fprintf(fp, ".");
+ break;
+ case BINARY_PRINT_ADDR:
+ printed += color_fprintf(fp, color, " %04x: ", val);
+ break;
+ case BINARY_PRINT_NUM_DATA:
+ printed += color_fprintf(fp, color, " %02x", val);
+ break;
+ case BINARY_PRINT_NUM_PAD:
+ printed += color_fprintf(fp, color, " ");
+ break;
+ case BINARY_PRINT_SEP:
+ printed += color_fprintf(fp, color, " ");
+ break;
+ case BINARY_PRINT_CHAR_DATA:
+ printed += color_fprintf(fp, color, "%c",
+ isprint(ch) ? ch : '.');
+ break;
+ case BINARY_PRINT_CHAR_PAD:
+ printed += color_fprintf(fp, color, " ");
+ break;
+ case BINARY_PRINT_LINE_END:
+ printed += color_fprintf(fp, color, "\n");
+ break;
+ case BINARY_PRINT_DATA_END:
+ printed += fprintf(fp, "\n");
+ break;
+ default:
+ break;
+ }
+
+ return printed;
+}
+
+void trace_event(union perf_event *event)
+{
+ unsigned char *raw_event = (void *)event;
+
+ if (!dump_trace)
+ return;
+
+ print_binary(raw_event, event->header.size, 16,
+ trace_event_printer, event);
+}
+
+static struct debug_variable {
+ const char *name;
+ int *ptr;
+} debug_variables[] = {
+ { .name = "verbose", .ptr = &verbose },
+ { .name = "ordered-events", .ptr = &debug_ordered_events},
+ { .name = "stderr", .ptr = &redirect_to_stderr},
+ { .name = "data-convert", .ptr = &debug_data_convert },
+ { .name = NULL, }
+};
+
+int perf_debug_option(const char *str)
+{
+ struct debug_variable *var = &debug_variables[0];
+ char *vstr, *s = strdup(str);
+ int v = 1;
+
+ vstr = strchr(s, '=');
+ if (vstr)
+ *vstr++ = 0;
+
+ while (var->name) {
+ if (!strcmp(s, var->name))
+ break;
+ var++;
+ }
+
+ if (!var->name) {
+ pr_err("Unknown debug variable name '%s'\n", s);
+ free(s);
+ return -1;
+ }
+
+ if (vstr) {
+ v = atoi(vstr);
+ /*
+ * Allow only values in range (0, 10),
+ * otherwise set 0.
+ */
+ v = (v < 0) || (v > 10) ? 0 : v;
+ }
+
+ if (quiet)
+ v = -1;
+
+ *var->ptr = v;
+ free(s);
+ return 0;
+}
+
+int perf_quiet_option(void)
+{
+ struct debug_variable *var = &debug_variables[0];
+
+ /* disable all debug messages */
+ while (var->name) {
+ *var->ptr = -1;
+ var++;
+ }
+
+ return 0;
+}
+
+#define DEBUG_WRAPPER(__n, __l) \
+static int pr_ ## __n ## _wrapper(const char *fmt, ...) \
+{ \
+ va_list args; \
+ int ret; \
+ \
+ va_start(args, fmt); \
+ ret = veprintf(__l, verbose, fmt, args); \
+ va_end(args); \
+ return ret; \
+}
+
+DEBUG_WRAPPER(warning, 0);
+DEBUG_WRAPPER(debug, 1);
+
+void perf_debug_setup(void)
+{
+ libapi_set_print(pr_warning_wrapper, pr_warning_wrapper, pr_debug_wrapper);
+}
+
+/* Obtain a backtrace and print it to stdout. */
+#ifdef HAVE_BACKTRACE_SUPPORT
+void dump_stack(void)
+{
+ void *array[16];
+ size_t size = backtrace(array, ARRAY_SIZE(array));
+ char **strings = backtrace_symbols(array, size);
+ size_t i;
+
+ printf("Obtained %zd stack frames.\n", size);
+
+ for (i = 0; i < size; i++)
+ printf("%s\n", strings[i]);
+
+ free(strings);
+}
+#else
+void dump_stack(void) {}
+#endif
+
+void sighandler_dump_stack(int sig)
+{
+ psignal(sig, "perf");
+ dump_stack();
+ signal(sig, SIG_DFL);
+ raise(sig);
+}
diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h
new file mode 100644
index 000000000..77445dfc5
--- /dev/null
+++ b/tools/perf/util/debug.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* For debugging general purposes */
+#ifndef __PERF_DEBUG_H
+#define __PERF_DEBUG_H
+
+#include <stdbool.h>
+#include <string.h>
+#include <linux/compiler.h>
+#include "event.h"
+#include "../ui/helpline.h"
+#include "../ui/progress.h"
+#include "../ui/util.h"
+
+extern int verbose;
+extern bool quiet, dump_trace;
+extern int debug_ordered_events;
+extern int debug_data_convert;
+
+#ifndef pr_fmt
+#define pr_fmt(fmt) fmt
+#endif
+
+#define pr_err(fmt, ...) \
+ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_warning(fmt, ...) \
+ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_info(fmt, ...) \
+ eprintf(0, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug(fmt, ...) \
+ eprintf(1, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debugN(n, fmt, ...) \
+ eprintf(n, verbose, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug2(fmt, ...) pr_debugN(2, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug3(fmt, ...) pr_debugN(3, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define pr_time_N(n, var, t, fmt, ...) \
+ eprintf_time(n, var, t, fmt, ##__VA_ARGS__)
+
+#define pr_oe_time(t, fmt, ...) pr_time_N(1, debug_ordered_events, t, pr_fmt(fmt), ##__VA_ARGS__)
+#define pr_oe_time2(t, fmt, ...) pr_time_N(2, debug_ordered_events, t, pr_fmt(fmt), ##__VA_ARGS__)
+
+#define STRERR_BUFSIZE 128 /* For the buffer size of str_error_r */
+
+int dump_printf(const char *fmt, ...) __printf(1, 2);
+void trace_event(union perf_event *event);
+
+int ui__error(const char *format, ...) __printf(1, 2);
+int ui__warning(const char *format, ...) __printf(1, 2);
+
+void pr_stat(const char *fmt, ...);
+
+int eprintf(int level, int var, const char *fmt, ...) __printf(3, 4);
+int eprintf_time(int level, int var, u64 t, const char *fmt, ...) __printf(4, 5);
+int veprintf(int level, int var, const char *fmt, va_list args);
+
+int perf_debug_option(const char *str);
+void perf_debug_setup(void);
+int perf_quiet_option(void);
+
+void dump_stack(void);
+void sighandler_dump_stack(int sig);
+
+#endif /* __PERF_DEBUG_H */
diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c
new file mode 100644
index 000000000..e4c486756
--- /dev/null
+++ b/tools/perf/util/demangle-java.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+#include "util.h"
+#include "debug.h"
+#include "symbol.h"
+
+#include "demangle-java.h"
+
+#include "sane_ctype.h"
+
+enum {
+ MODE_PREFIX = 0,
+ MODE_CLASS = 1,
+ MODE_FUNC = 2,
+ MODE_TYPE = 3,
+ MODE_CTYPE = 3, /* class arg */
+};
+
+#define BASE_ENT(c, n) [c - 'A']=n
+static const char *base_types['Z' - 'A' + 1] = {
+ BASE_ENT('B', "byte" ),
+ BASE_ENT('C', "char" ),
+ BASE_ENT('D', "double" ),
+ BASE_ENT('F', "float" ),
+ BASE_ENT('I', "int" ),
+ BASE_ENT('J', "long" ),
+ BASE_ENT('S', "short" ),
+ BASE_ENT('Z', "bool" ),
+};
+
+/*
+ * demangle Java symbol between str and end positions and stores
+ * up to maxlen characters into buf. The parser starts in mode.
+ *
+ * Use MODE_PREFIX to process entire prototype till end position
+ * Use MODE_TYPE to process return type if str starts on return type char
+ *
+ * Return:
+ * success: buf
+ * error : NULL
+ */
+static char *
+__demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int mode)
+{
+ int rlen = 0;
+ int array = 0;
+ int narg = 0;
+ const char *q;
+
+ if (!end)
+ end = str + strlen(str);
+
+ for (q = str; q != end; q++) {
+
+ if (rlen == (maxlen - 1))
+ break;
+
+ switch (*q) {
+ case 'L':
+ if (mode == MODE_PREFIX || mode == MODE_CTYPE) {
+ if (mode == MODE_CTYPE) {
+ if (narg)
+ rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+ narg++;
+ }
+ rlen += scnprintf(buf + rlen, maxlen - rlen, "class ");
+ if (mode == MODE_PREFIX)
+ mode = MODE_CLASS;
+ } else
+ buf[rlen++] = *q;
+ break;
+ case 'B':
+ case 'C':
+ case 'D':
+ case 'F':
+ case 'I':
+ case 'J':
+ case 'S':
+ case 'Z':
+ if (mode == MODE_TYPE) {
+ if (narg)
+ rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
+ rlen += scnprintf(buf + rlen, maxlen - rlen, "%s", base_types[*q - 'A']);
+ while (array--)
+ rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+ array = 0;
+ narg++;
+ } else
+ buf[rlen++] = *q;
+ break;
+ case 'V':
+ if (mode == MODE_TYPE) {
+ rlen += scnprintf(buf + rlen, maxlen - rlen, "void");
+ while (array--)
+ rlen += scnprintf(buf + rlen, maxlen - rlen, "[]");
+ array = 0;
+ } else
+ buf[rlen++] = *q;
+ break;
+ case '[':
+ if (mode != MODE_TYPE)
+ goto error;
+ array++;
+ break;
+ case '(':
+ if (mode != MODE_FUNC)
+ goto error;
+ buf[rlen++] = *q;
+ mode = MODE_TYPE;
+ break;
+ case ')':
+ if (mode != MODE_TYPE)
+ goto error;
+ buf[rlen++] = *q;
+ narg = 0;
+ break;
+ case ';':
+ if (mode != MODE_CLASS && mode != MODE_CTYPE)
+ goto error;
+ /* safe because at least one other char to process */
+ if (isalpha(*(q + 1)))
+ rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+ if (mode == MODE_CLASS)
+ mode = MODE_FUNC;
+ else if (mode == MODE_CTYPE)
+ mode = MODE_TYPE;
+ break;
+ case '/':
+ if (mode != MODE_CLASS && mode != MODE_CTYPE)
+ goto error;
+ rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
+ break;
+ default :
+ buf[rlen++] = *q;
+ }
+ }
+ buf[rlen] = '\0';
+ return buf;
+error:
+ return NULL;
+}
+
+/*
+ * Demangle Java function signature (openJDK, not GCJ)
+ * input:
+ * str: string to parse. String is not modified
+ * flags: comobination of JAVA_DEMANGLE_* flags to modify demangling
+ * return:
+ * if input can be demangled, then a newly allocated string is returned.
+ * if input cannot be demangled, then NULL is returned
+ *
+ * Note: caller is responsible for freeing demangled string
+ */
+char *
+java_demangle_sym(const char *str, int flags)
+{
+ char *buf, *ptr;
+ char *p;
+ size_t len, l1 = 0;
+
+ if (!str)
+ return NULL;
+
+ /* find start of retunr type */
+ p = strrchr(str, ')');
+ if (!p)
+ return NULL;
+
+ /*
+ * expansion factor estimated to 3x
+ */
+ len = strlen(str) * 3 + 1;
+ buf = malloc(len);
+ if (!buf)
+ return NULL;
+
+ buf[0] = '\0';
+ if (!(flags & JAVA_DEMANGLE_NORET)) {
+ /*
+ * get return type first
+ */
+ ptr = __demangle_java_sym(p + 1, NULL, buf, len, MODE_TYPE);
+ if (!ptr)
+ goto error;
+
+ /* add space between return type and function prototype */
+ l1 = strlen(buf);
+ buf[l1++] = ' ';
+ }
+
+ /* process function up to return type */
+ ptr = __demangle_java_sym(str, p + 1, buf + l1, len - l1, MODE_PREFIX);
+ if (!ptr)
+ goto error;
+
+ return buf;
+error:
+ free(buf);
+ return NULL;
+}
diff --git a/tools/perf/util/demangle-java.h b/tools/perf/util/demangle-java.h
new file mode 100644
index 000000000..f936c8eab
--- /dev/null
+++ b/tools/perf/util/demangle-java.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DEMANGLE_JAVA
+#define __PERF_DEMANGLE_JAVA 1
+/*
+ * demangle function flags
+ */
+#define JAVA_DEMANGLE_NORET 0x1 /* do not process return type */
+
+char * java_demangle_sym(const char *str, int flags);
+
+#endif /* __PERF_DEMANGLE_JAVA */
diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c
new file mode 100644
index 000000000..423afbbd3
--- /dev/null
+++ b/tools/perf/util/demangle-rust.c
@@ -0,0 +1,270 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include "util.h"
+#include "debug.h"
+
+#include "demangle-rust.h"
+
+/*
+ * Mangled Rust symbols look like this:
+ *
+ * _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
+ *
+ * The original symbol is:
+ *
+ * <std::sys::fd::FileDesc as core::ops::Drop>::drop
+ *
+ * The last component of the path is a 64-bit hash in lowercase hex, prefixed
+ * with "h". Rust does not have a global namespace between crates, an illusion
+ * which Rust maintains by using the hash to distinguish things that would
+ * otherwise have the same symbol.
+ *
+ * Any path component not starting with a XID_Start character is prefixed with
+ * "_".
+ *
+ * The following escape sequences are used:
+ *
+ * "," => $C$
+ * "@" => $SP$
+ * "*" => $BP$
+ * "&" => $RF$
+ * "<" => $LT$
+ * ">" => $GT$
+ * "(" => $LP$
+ * ")" => $RP$
+ * " " => $u20$
+ * "'" => $u27$
+ * "[" => $u5b$
+ * "]" => $u5d$
+ * "~" => $u7e$
+ *
+ * A double ".." means "::" and a single "." means "-".
+ *
+ * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
+ */
+
+static const char *hash_prefix = "::h";
+static const size_t hash_prefix_len = 3;
+static const size_t hash_len = 16;
+
+static bool is_prefixed_hash(const char *start);
+static bool looks_like_rust(const char *sym, size_t len);
+static bool unescape(const char **in, char **out, const char *seq, char value);
+
+/*
+ * INPUT:
+ * sym: symbol that has been through BFD-demangling
+ *
+ * This function looks for the following indicators:
+ *
+ * 1. The hash must consist of "h" followed by 16 lowercase hex digits.
+ *
+ * 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
+ * hex digits. This is true of 99.9998% of hashes so once in your life you
+ * may see a false negative. The point is to notice path components that
+ * could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
+ * this case a false positive (non-Rust symbol has an important path
+ * component removed because it looks like a Rust hash) is worse than a
+ * false negative (the rare Rust symbol is not demangled) so this sets the
+ * balance in favor of false negatives.
+ *
+ * 3. There must be no characters other than a-zA-Z0-9 and _.:$
+ *
+ * 4. There must be no unrecognized $-sign sequences.
+ *
+ * 5. There must be no sequence of three or more dots in a row ("...").
+ */
+bool
+rust_is_mangled(const char *sym)
+{
+ size_t len, len_without_hash;
+
+ if (!sym)
+ return false;
+
+ len = strlen(sym);
+ if (len <= hash_prefix_len + hash_len)
+ /* Not long enough to contain "::h" + hash + something else */
+ return false;
+
+ len_without_hash = len - (hash_prefix_len + hash_len);
+ if (!is_prefixed_hash(sym + len_without_hash))
+ return false;
+
+ return looks_like_rust(sym, len_without_hash);
+}
+
+/*
+ * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
+ * digits must comprise between 5 and 15 (inclusive) distinct digits.
+ */
+static bool is_prefixed_hash(const char *str)
+{
+ const char *end;
+ bool seen[16];
+ size_t i;
+ int count;
+
+ if (strncmp(str, hash_prefix, hash_prefix_len))
+ return false;
+ str += hash_prefix_len;
+
+ memset(seen, false, sizeof(seen));
+ for (end = str + hash_len; str < end; str++)
+ if (*str >= '0' && *str <= '9')
+ seen[*str - '0'] = true;
+ else if (*str >= 'a' && *str <= 'f')
+ seen[*str - 'a' + 10] = true;
+ else
+ return false;
+
+ /* Count how many distinct digits seen */
+ count = 0;
+ for (i = 0; i < 16; i++)
+ if (seen[i])
+ count++;
+
+ return count >= 5 && count <= 15;
+}
+
+static bool looks_like_rust(const char *str, size_t len)
+{
+ const char *end = str + len;
+
+ while (str < end)
+ switch (*str) {
+ case '$':
+ if (!strncmp(str, "$C$", 3))
+ str += 3;
+ else if (!strncmp(str, "$SP$", 4)
+ || !strncmp(str, "$BP$", 4)
+ || !strncmp(str, "$RF$", 4)
+ || !strncmp(str, "$LT$", 4)
+ || !strncmp(str, "$GT$", 4)
+ || !strncmp(str, "$LP$", 4)
+ || !strncmp(str, "$RP$", 4))
+ str += 4;
+ else if (!strncmp(str, "$u20$", 5)
+ || !strncmp(str, "$u27$", 5)
+ || !strncmp(str, "$u5b$", 5)
+ || !strncmp(str, "$u5d$", 5)
+ || !strncmp(str, "$u7e$", 5))
+ str += 5;
+ else
+ return false;
+ break;
+ case '.':
+ /* Do not allow three or more consecutive dots */
+ if (!strncmp(str, "...", 3))
+ return false;
+ /* Fall through */
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '0' ... '9':
+ case '_':
+ case ':':
+ str++;
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * INPUT:
+ * sym: symbol for which rust_is_mangled(sym) returns true
+ *
+ * The input is demangled in-place because the mangled name is always longer
+ * than the demangled one.
+ */
+void
+rust_demangle_sym(char *sym)
+{
+ const char *in;
+ char *out;
+ const char *end;
+
+ if (!sym)
+ return;
+
+ in = sym;
+ out = sym;
+ end = sym + strlen(sym) - (hash_prefix_len + hash_len);
+
+ while (in < end)
+ switch (*in) {
+ case '$':
+ if (!(unescape(&in, &out, "$C$", ',')
+ || unescape(&in, &out, "$SP$", '@')
+ || unescape(&in, &out, "$BP$", '*')
+ || unescape(&in, &out, "$RF$", '&')
+ || unescape(&in, &out, "$LT$", '<')
+ || unescape(&in, &out, "$GT$", '>')
+ || unescape(&in, &out, "$LP$", '(')
+ || unescape(&in, &out, "$RP$", ')')
+ || unescape(&in, &out, "$u20$", ' ')
+ || unescape(&in, &out, "$u27$", '\'')
+ || unescape(&in, &out, "$u5b$", '[')
+ || unescape(&in, &out, "$u5d$", ']')
+ || unescape(&in, &out, "$u7e$", '~'))) {
+ pr_err("demangle-rust: unexpected escape sequence");
+ goto done;
+ }
+ break;
+ case '_':
+ /*
+ * If this is the start of a path component and the next
+ * character is an escape sequence, ignore the
+ * underscore. The mangler inserts an underscore to make
+ * sure the path component begins with a XID_Start
+ * character.
+ */
+ if ((in == sym || in[-1] == ':') && in[1] == '$')
+ in++;
+ else
+ *out++ = *in++;
+ break;
+ case '.':
+ if (in[1] == '.') {
+ /* ".." becomes "::" */
+ *out++ = ':';
+ *out++ = ':';
+ in += 2;
+ } else {
+ /* "." becomes "-" */
+ *out++ = '-';
+ in++;
+ }
+ break;
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ case '0' ... '9':
+ case ':':
+ *out++ = *in++;
+ break;
+ default:
+ pr_err("demangle-rust: unexpected character '%c' in symbol\n",
+ *in);
+ goto done;
+ }
+
+done:
+ *out = '\0';
+}
+
+static bool unescape(const char **in, char **out, const char *seq, char value)
+{
+ size_t len = strlen(seq);
+
+ if (strncmp(*in, seq, len))
+ return false;
+
+ **out = value;
+
+ *in += len;
+ *out += 1;
+
+ return true;
+}
diff --git a/tools/perf/util/demangle-rust.h b/tools/perf/util/demangle-rust.h
new file mode 100644
index 000000000..2fca618b1
--- /dev/null
+++ b/tools/perf/util/demangle-rust.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DEMANGLE_RUST
+#define __PERF_DEMANGLE_RUST 1
+
+bool rust_is_mangled(const char *str);
+void rust_demangle_sym(char *str);
+
+#endif /* __PERF_DEMANGLE_RUST */
diff --git a/tools/perf/util/drv_configs.c b/tools/perf/util/drv_configs.c
new file mode 100644
index 000000000..eec754243
--- /dev/null
+++ b/tools/perf/util/drv_configs.c
@@ -0,0 +1,78 @@
+/*
+ * drv_configs.h: Interface to apply PMU specific configuration
+ * Copyright (c) 2016-2018, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include "drv_configs.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "pmu.h"
+#include <errno.h>
+
+static int
+perf_evsel__apply_drv_configs(struct perf_evsel *evsel,
+ struct perf_evsel_config_term **err_term)
+{
+ bool found = false;
+ int err = 0;
+ struct perf_evsel_config_term *term;
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmu__scan(pmu)) != NULL)
+ if (pmu->type == evsel->attr.type) {
+ found = true;
+ break;
+ }
+
+ list_for_each_entry(term, &evsel->config_terms, list) {
+ if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
+ continue;
+
+ /*
+ * We have a configuration term, report an error if we
+ * can't find the PMU or if the PMU driver doesn't support
+ * cmd line driver configuration.
+ */
+ if (!found || !pmu->set_drv_config) {
+ err = -EINVAL;
+ *err_term = term;
+ break;
+ }
+
+ err = pmu->set_drv_config(term);
+ if (err) {
+ *err_term = term;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
+ struct perf_evsel **err_evsel,
+ struct perf_evsel_config_term **err_term)
+{
+ struct perf_evsel *evsel;
+ int err = 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ err = perf_evsel__apply_drv_configs(evsel, err_term);
+ if (err) {
+ *err_evsel = evsel;
+ break;
+ }
+ }
+
+ return err;
+}
diff --git a/tools/perf/util/drv_configs.h b/tools/perf/util/drv_configs.h
new file mode 100644
index 000000000..32bc9babc
--- /dev/null
+++ b/tools/perf/util/drv_configs.h
@@ -0,0 +1,26 @@
+/*
+ * drv_configs.h: Interface to apply PMU specific configuration
+ * Copyright (c) 2016-2018, Linaro Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_DRV_CONFIGS_H
+#define __PERF_DRV_CONFIGS_H
+
+#include "drv_configs.h"
+#include "evlist.h"
+#include "evsel.h"
+
+int perf_evlist__apply_drv_configs(struct perf_evlist *evlist,
+ struct perf_evsel **err_evsel,
+ struct perf_evsel_config_term **term);
+#endif
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
new file mode 100644
index 000000000..1231f3181
--- /dev/null
+++ b/tools/perf/util/dso.c
@@ -0,0 +1,1541 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/bug.h>
+#include <linux/kernel.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include "compress.h"
+#include "path.h"
+#include "symbol.h"
+#include "srcline.h"
+#include "dso.h"
+#include "machine.h"
+#include "auxtrace.h"
+#include "util.h"
+#include "debug.h"
+#include "string2.h"
+#include "vdso.h"
+
+static const char * const debuglink_paths[] = {
+ "%.0s%s",
+ "%s/%s",
+ "%s/.debug/%s",
+ "/usr/lib/debug%s/%s"
+};
+
+char dso__symtab_origin(const struct dso *dso)
+{
+ static const char origin[] = {
+ [DSO_BINARY_TYPE__KALLSYMS] = 'k',
+ [DSO_BINARY_TYPE__VMLINUX] = 'v',
+ [DSO_BINARY_TYPE__JAVA_JIT] = 'j',
+ [DSO_BINARY_TYPE__DEBUGLINK] = 'l',
+ [DSO_BINARY_TYPE__BUILD_ID_CACHE] = 'B',
+ [DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO] = 'D',
+ [DSO_BINARY_TYPE__FEDORA_DEBUGINFO] = 'f',
+ [DSO_BINARY_TYPE__UBUNTU_DEBUGINFO] = 'u',
+ [DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO] = 'x',
+ [DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO] = 'o',
+ [DSO_BINARY_TYPE__BUILDID_DEBUGINFO] = 'b',
+ [DSO_BINARY_TYPE__SYSTEM_PATH_DSO] = 'd',
+ [DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE] = 'K',
+ [DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP] = 'm',
+ [DSO_BINARY_TYPE__GUEST_KALLSYMS] = 'g',
+ [DSO_BINARY_TYPE__GUEST_KMODULE] = 'G',
+ [DSO_BINARY_TYPE__GUEST_KMODULE_COMP] = 'M',
+ [DSO_BINARY_TYPE__GUEST_VMLINUX] = 'V',
+ };
+
+ if (dso == NULL || dso->symtab_type == DSO_BINARY_TYPE__NOT_FOUND)
+ return '!';
+ return origin[dso->symtab_type];
+}
+
+int dso__read_binary_type_filename(const struct dso *dso,
+ enum dso_binary_type type,
+ char *root_dir, char *filename, size_t size)
+{
+ char build_id_hex[SBUILD_ID_SIZE];
+ int ret = 0;
+ size_t len;
+
+ switch (type) {
+ case DSO_BINARY_TYPE__DEBUGLINK:
+ {
+ const char *last_slash;
+ char dso_dir[PATH_MAX];
+ char symfile[PATH_MAX];
+ unsigned int i;
+
+ len = __symbol__join_symfs(filename, size, dso->long_name);
+ last_slash = filename + len;
+ while (last_slash != filename && *last_slash != '/')
+ last_slash--;
+
+ strncpy(dso_dir, filename, last_slash - filename);
+ dso_dir[last_slash-filename] = '\0';
+
+ if (!is_regular_file(filename)) {
+ ret = -1;
+ break;
+ }
+
+ ret = filename__read_debuglink(filename, symfile, PATH_MAX);
+ if (ret)
+ break;
+
+ /* Check predefined locations where debug file might reside */
+ ret = -1;
+ for (i = 0; i < ARRAY_SIZE(debuglink_paths); i++) {
+ snprintf(filename, size,
+ debuglink_paths[i], dso_dir, symfile);
+ if (is_regular_file(filename)) {
+ ret = 0;
+ break;
+ }
+ }
+
+ break;
+ }
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+ if (dso__build_id_filename(dso, filename, size, false) == NULL)
+ ret = -1;
+ break;
+
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
+ if (dso__build_id_filename(dso, filename, size, true) == NULL)
+ ret = -1;
+ break;
+
+ case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+ len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+ snprintf(filename + len, size - len, "%s.debug", dso->long_name);
+ break;
+
+ case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+ len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+ snprintf(filename + len, size - len, "%s", dso->long_name);
+ break;
+
+ case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+ /*
+ * Ubuntu can mixup /usr/lib with /lib, putting debuginfo in
+ * /usr/lib/debug/lib when it is expected to be in
+ * /usr/lib/debug/usr/lib
+ */
+ if (strlen(dso->long_name) < 9 ||
+ strncmp(dso->long_name, "/usr/lib/", 9)) {
+ ret = -1;
+ break;
+ }
+ len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+ snprintf(filename + len, size - len, "%s", dso->long_name + 4);
+ break;
+
+ case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+ {
+ const char *last_slash;
+ size_t dir_size;
+
+ last_slash = dso->long_name + dso->long_name_len;
+ while (last_slash != dso->long_name && *last_slash != '/')
+ last_slash--;
+
+ len = __symbol__join_symfs(filename, size, "");
+ dir_size = last_slash - dso->long_name + 2;
+ if (dir_size > (size - len)) {
+ ret = -1;
+ break;
+ }
+ len += scnprintf(filename + len, dir_size, "%s", dso->long_name);
+ len += scnprintf(filename + len , size - len, ".debug%s",
+ last_slash);
+ break;
+ }
+
+ case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+ if (!dso->has_build_id) {
+ ret = -1;
+ break;
+ }
+
+ build_id__sprintf(dso->build_id,
+ sizeof(dso->build_id),
+ build_id_hex);
+ len = __symbol__join_symfs(filename, size, "/usr/lib/debug/.build-id/");
+ snprintf(filename + len, size - len, "%.2s/%s.debug",
+ build_id_hex, build_id_hex + 2);
+ break;
+
+ case DSO_BINARY_TYPE__VMLINUX:
+ case DSO_BINARY_TYPE__GUEST_VMLINUX:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+ __symbol__join_symfs(filename, size, dso->long_name);
+ break;
+
+ case DSO_BINARY_TYPE__GUEST_KMODULE:
+ case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
+ path__join3(filename, size, symbol_conf.symfs,
+ root_dir, dso->long_name);
+ break;
+
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
+ __symbol__join_symfs(filename, size, dso->long_name);
+ break;
+
+ case DSO_BINARY_TYPE__KCORE:
+ case DSO_BINARY_TYPE__GUEST_KCORE:
+ snprintf(filename, size, "%s", dso->long_name);
+ break;
+
+ default:
+ case DSO_BINARY_TYPE__KALLSYMS:
+ case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+ case DSO_BINARY_TYPE__JAVA_JIT:
+ case DSO_BINARY_TYPE__NOT_FOUND:
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+enum {
+ COMP_ID__NONE = 0,
+};
+
+static const struct {
+ const char *fmt;
+ int (*decompress)(const char *input, int output);
+ bool (*is_compressed)(const char *input);
+} compressions[] = {
+ [COMP_ID__NONE] = { .fmt = NULL, },
+#ifdef HAVE_ZLIB_SUPPORT
+ { "gz", gzip_decompress_to_file, gzip_is_compressed },
+#endif
+#ifdef HAVE_LZMA_SUPPORT
+ { "xz", lzma_decompress_to_file, lzma_is_compressed },
+#endif
+ { NULL, NULL, NULL },
+};
+
+static int is_supported_compression(const char *ext)
+{
+ unsigned i;
+
+ for (i = 1; compressions[i].fmt; i++) {
+ if (!strcmp(ext, compressions[i].fmt))
+ return i;
+ }
+ return COMP_ID__NONE;
+}
+
+bool is_kernel_module(const char *pathname, int cpumode)
+{
+ struct kmod_path m;
+ int mode = cpumode & PERF_RECORD_MISC_CPUMODE_MASK;
+
+ WARN_ONCE(mode != cpumode,
+ "Internal error: passing unmasked cpumode (%x) to is_kernel_module",
+ cpumode);
+
+ switch (mode) {
+ case PERF_RECORD_MISC_USER:
+ case PERF_RECORD_MISC_HYPERVISOR:
+ case PERF_RECORD_MISC_GUEST_USER:
+ return false;
+ /* Treat PERF_RECORD_MISC_CPUMODE_UNKNOWN as kernel */
+ default:
+ if (kmod_path__parse(&m, pathname)) {
+ pr_err("Failed to check whether %s is a kernel module or not. Assume it is.",
+ pathname);
+ return true;
+ }
+ }
+
+ return m.kmod;
+}
+
+bool dso__needs_decompress(struct dso *dso)
+{
+ return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
+}
+
+static int decompress_kmodule(struct dso *dso, const char *name,
+ char *pathname, size_t len)
+{
+ char tmpbuf[] = KMOD_DECOMP_NAME;
+ int fd = -1;
+
+ if (!dso__needs_decompress(dso))
+ return -1;
+
+ if (dso->comp == COMP_ID__NONE)
+ return -1;
+
+ /*
+ * We have proper compression id for DSO and yet the file
+ * behind the 'name' can still be plain uncompressed object.
+ *
+ * The reason is behind the logic we open the DSO object files,
+ * when we try all possible 'debug' objects until we find the
+ * data. So even if the DSO is represented by 'krava.xz' module,
+ * we can end up here opening ~/.debug/....23432432/debug' file
+ * which is not compressed.
+ *
+ * To keep this transparent, we detect this and return the file
+ * descriptor to the uncompressed file.
+ */
+ if (!compressions[dso->comp].is_compressed(name))
+ return open(name, O_RDONLY);
+
+ fd = mkstemp(tmpbuf);
+ if (fd < 0) {
+ dso->load_errno = errno;
+ return -1;
+ }
+
+ if (compressions[dso->comp].decompress(name, fd)) {
+ dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE;
+ close(fd);
+ fd = -1;
+ }
+
+ if (!pathname || (fd < 0))
+ unlink(tmpbuf);
+
+ if (pathname && (fd >= 0))
+ strlcpy(pathname, tmpbuf, len);
+
+ return fd;
+}
+
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name)
+{
+ return decompress_kmodule(dso, name, NULL, 0);
+}
+
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+ char *pathname, size_t len)
+{
+ int fd = decompress_kmodule(dso, name, pathname, len);
+
+ close(fd);
+ return fd >= 0 ? 0 : -1;
+}
+
+/*
+ * Parses kernel module specified in @path and updates
+ * @m argument like:
+ *
+ * @comp - true if @path contains supported compression suffix,
+ * false otherwise
+ * @kmod - true if @path contains '.ko' suffix in right position,
+ * false otherwise
+ * @name - if (@alloc_name && @kmod) is true, it contains strdup-ed base name
+ * of the kernel module without suffixes, otherwise strudup-ed
+ * base name of @path
+ * @ext - if (@alloc_ext && @comp) is true, it contains strdup-ed string
+ * the compression suffix
+ *
+ * Returns 0 if there's no strdup error, -ENOMEM otherwise.
+ */
+int __kmod_path__parse(struct kmod_path *m, const char *path,
+ bool alloc_name)
+{
+ const char *name = strrchr(path, '/');
+ const char *ext = strrchr(path, '.');
+ bool is_simple_name = false;
+
+ memset(m, 0x0, sizeof(*m));
+ name = name ? name + 1 : path;
+
+ /*
+ * '.' is also a valid character for module name. For example:
+ * [aaa.bbb] is a valid module name. '[' should have higher
+ * priority than '.ko' suffix.
+ *
+ * The kernel names are from machine__mmap_name. Such
+ * name should belong to kernel itself, not kernel module.
+ */
+ if (name[0] == '[') {
+ is_simple_name = true;
+ if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) ||
+ (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) ||
+ (strncmp(name, "[vdso]", 6) == 0) ||
+ (strncmp(name, "[vdso32]", 8) == 0) ||
+ (strncmp(name, "[vdsox32]", 9) == 0) ||
+ (strncmp(name, "[vsyscall]", 10) == 0)) {
+ m->kmod = false;
+
+ } else
+ m->kmod = true;
+ }
+
+ /* No extension, just return name. */
+ if ((ext == NULL) || is_simple_name) {
+ if (alloc_name) {
+ m->name = strdup(name);
+ return m->name ? 0 : -ENOMEM;
+ }
+ return 0;
+ }
+
+ m->comp = is_supported_compression(ext + 1);
+ if (m->comp > COMP_ID__NONE)
+ ext -= 3;
+
+ /* Check .ko extension only if there's enough name left. */
+ if (ext > name)
+ m->kmod = !strncmp(ext, ".ko", 3);
+
+ if (alloc_name) {
+ if (m->kmod) {
+ if (asprintf(&m->name, "[%.*s]", (int) (ext - name), name) == -1)
+ return -ENOMEM;
+ } else {
+ if (asprintf(&m->name, "%s", name) == -1)
+ return -ENOMEM;
+ }
+
+ strxfrchar(m->name, '-', '_');
+ }
+
+ return 0;
+}
+
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+ struct machine *machine)
+{
+ if (machine__is_host(machine))
+ dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
+ else
+ dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE;
+
+ /* _KMODULE_COMP should be next to _KMODULE */
+ if (m->kmod && m->comp) {
+ dso->symtab_type++;
+ dso->comp = m->comp;
+ }
+
+ dso__set_short_name(dso, strdup(m->name), true);
+}
+
+/*
+ * Global list of open DSOs and the counter.
+ */
+static LIST_HEAD(dso__data_open);
+static long dso__data_open_cnt;
+static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static void dso__list_add(struct dso *dso)
+{
+ list_add_tail(&dso->data.open_entry, &dso__data_open);
+ dso__data_open_cnt++;
+}
+
+static void dso__list_del(struct dso *dso)
+{
+ list_del(&dso->data.open_entry);
+ WARN_ONCE(dso__data_open_cnt <= 0,
+ "DSO data fd counter out of bounds.");
+ dso__data_open_cnt--;
+}
+
+static void close_first_dso(void);
+
+static int do_open(char *name)
+{
+ int fd;
+ char sbuf[STRERR_BUFSIZE];
+
+ do {
+ fd = open(name, O_RDONLY|O_CLOEXEC);
+ if (fd >= 0)
+ return fd;
+
+ pr_debug("dso open failed: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ if (!dso__data_open_cnt || errno != EMFILE)
+ break;
+
+ close_first_dso();
+ } while (1);
+
+ return -1;
+}
+
+static int __open_dso(struct dso *dso, struct machine *machine)
+{
+ int fd = -EINVAL;
+ char *root_dir = (char *)"";
+ char *name = malloc(PATH_MAX);
+ bool decomp = false;
+
+ if (!name)
+ return -ENOMEM;
+
+ if (machine)
+ root_dir = machine->root_dir;
+
+ if (dso__read_binary_type_filename(dso, dso->binary_type,
+ root_dir, name, PATH_MAX))
+ goto out;
+
+ if (!is_regular_file(name))
+ goto out;
+
+ if (dso__needs_decompress(dso)) {
+ char newpath[KMOD_DECOMP_LEN];
+ size_t len = sizeof(newpath);
+
+ if (dso__decompress_kmodule_path(dso, name, newpath, len) < 0) {
+ fd = -dso->load_errno;
+ goto out;
+ }
+
+ decomp = true;
+ strcpy(name, newpath);
+ }
+
+ fd = do_open(name);
+
+ if (decomp)
+ unlink(name);
+
+out:
+ free(name);
+ return fd;
+}
+
+static void check_data_close(void);
+
+/**
+ * dso_close - Open DSO data file
+ * @dso: dso object
+ *
+ * Open @dso's data file descriptor and updates
+ * list/count of open DSO objects.
+ */
+static int open_dso(struct dso *dso, struct machine *machine)
+{
+ int fd;
+ struct nscookie nsc;
+
+ if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
+ nsinfo__mountns_enter(dso->nsinfo, &nsc);
+ fd = __open_dso(dso, machine);
+ if (dso->binary_type != DSO_BINARY_TYPE__BUILD_ID_CACHE)
+ nsinfo__mountns_exit(&nsc);
+
+ if (fd >= 0) {
+ dso__list_add(dso);
+ /*
+ * Check if we crossed the allowed number
+ * of opened DSOs and close one if needed.
+ */
+ check_data_close();
+ }
+
+ return fd;
+}
+
+static void close_data_fd(struct dso *dso)
+{
+ if (dso->data.fd >= 0) {
+ close(dso->data.fd);
+ dso->data.fd = -1;
+ dso->data.file_size = 0;
+ dso__list_del(dso);
+ }
+}
+
+/**
+ * dso_close - Close DSO data file
+ * @dso: dso object
+ *
+ * Close @dso's data file descriptor and updates
+ * list/count of open DSO objects.
+ */
+static void close_dso(struct dso *dso)
+{
+ close_data_fd(dso);
+}
+
+static void close_first_dso(void)
+{
+ struct dso *dso;
+
+ dso = list_first_entry(&dso__data_open, struct dso, data.open_entry);
+ close_dso(dso);
+}
+
+static rlim_t get_fd_limit(void)
+{
+ struct rlimit l;
+ rlim_t limit = 0;
+
+ /* Allow half of the current open fd limit. */
+ if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
+ if (l.rlim_cur == RLIM_INFINITY)
+ limit = l.rlim_cur;
+ else
+ limit = l.rlim_cur / 2;
+ } else {
+ pr_err("failed to get fd limit\n");
+ limit = 1;
+ }
+
+ return limit;
+}
+
+static rlim_t fd_limit;
+
+/*
+ * Used only by tests/dso-data.c to reset the environment
+ * for tests. I dont expect we should change this during
+ * standard runtime.
+ */
+void reset_fd_limit(void)
+{
+ fd_limit = 0;
+}
+
+static bool may_cache_fd(void)
+{
+ if (!fd_limit)
+ fd_limit = get_fd_limit();
+
+ if (fd_limit == RLIM_INFINITY)
+ return true;
+
+ return fd_limit > (rlim_t) dso__data_open_cnt;
+}
+
+/*
+ * Check and close LRU dso if we crossed allowed limit
+ * for opened dso file descriptors. The limit is half
+ * of the RLIMIT_NOFILE files opened.
+*/
+static void check_data_close(void)
+{
+ bool cache_fd = may_cache_fd();
+
+ if (!cache_fd)
+ close_first_dso();
+}
+
+/**
+ * dso__data_close - Close DSO data file
+ * @dso: dso object
+ *
+ * External interface to close @dso's data file descriptor.
+ */
+void dso__data_close(struct dso *dso)
+{
+ pthread_mutex_lock(&dso__data_open_lock);
+ close_dso(dso);
+ pthread_mutex_unlock(&dso__data_open_lock);
+}
+
+static void try_to_open_dso(struct dso *dso, struct machine *machine)
+{
+ enum dso_binary_type binary_type_data[] = {
+ DSO_BINARY_TYPE__BUILD_ID_CACHE,
+ DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+ DSO_BINARY_TYPE__NOT_FOUND,
+ };
+ int i = 0;
+
+ if (dso->data.fd >= 0)
+ return;
+
+ if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) {
+ dso->data.fd = open_dso(dso, machine);
+ goto out;
+ }
+
+ do {
+ dso->binary_type = binary_type_data[i++];
+
+ dso->data.fd = open_dso(dso, machine);
+ if (dso->data.fd >= 0)
+ goto out;
+
+ } while (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND);
+out:
+ if (dso->data.fd >= 0)
+ dso->data.status = DSO_DATA_STATUS_OK;
+ else
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+}
+
+/**
+ * dso__data_get_fd - Get dso's data file descriptor
+ * @dso: dso object
+ * @machine: machine object
+ *
+ * External interface to find dso's file, open it and
+ * returns file descriptor. It should be paired with
+ * dso__data_put_fd() if it returns non-negative value.
+ */
+int dso__data_get_fd(struct dso *dso, struct machine *machine)
+{
+ if (dso->data.status == DSO_DATA_STATUS_ERROR)
+ return -1;
+
+ if (pthread_mutex_lock(&dso__data_open_lock) < 0)
+ return -1;
+
+ try_to_open_dso(dso, machine);
+
+ if (dso->data.fd < 0)
+ pthread_mutex_unlock(&dso__data_open_lock);
+
+ return dso->data.fd;
+}
+
+void dso__data_put_fd(struct dso *dso __maybe_unused)
+{
+ pthread_mutex_unlock(&dso__data_open_lock);
+}
+
+bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
+{
+ u32 flag = 1 << by;
+
+ if (dso->data.status_seen & flag)
+ return true;
+
+ dso->data.status_seen |= flag;
+
+ return false;
+}
+
+static void
+dso_cache__free(struct dso *dso)
+{
+ struct rb_root *root = &dso->data.cache;
+ struct rb_node *next = rb_first(root);
+
+ pthread_mutex_lock(&dso->lock);
+ while (next) {
+ struct dso_cache *cache;
+
+ cache = rb_entry(next, struct dso_cache, rb_node);
+ next = rb_next(&cache->rb_node);
+ rb_erase(&cache->rb_node, root);
+ free(cache);
+ }
+ pthread_mutex_unlock(&dso->lock);
+}
+
+static struct dso_cache *dso_cache__find(struct dso *dso, u64 offset)
+{
+ const struct rb_root *root = &dso->data.cache;
+ struct rb_node * const *p = &root->rb_node;
+ const struct rb_node *parent = NULL;
+ struct dso_cache *cache;
+
+ while (*p != NULL) {
+ u64 end;
+
+ parent = *p;
+ cache = rb_entry(parent, struct dso_cache, rb_node);
+ end = cache->offset + DSO__DATA_CACHE_SIZE;
+
+ if (offset < cache->offset)
+ p = &(*p)->rb_left;
+ else if (offset >= end)
+ p = &(*p)->rb_right;
+ else
+ return cache;
+ }
+
+ return NULL;
+}
+
+static struct dso_cache *
+dso_cache__insert(struct dso *dso, struct dso_cache *new)
+{
+ struct rb_root *root = &dso->data.cache;
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct dso_cache *cache;
+ u64 offset = new->offset;
+
+ pthread_mutex_lock(&dso->lock);
+ while (*p != NULL) {
+ u64 end;
+
+ parent = *p;
+ cache = rb_entry(parent, struct dso_cache, rb_node);
+ end = cache->offset + DSO__DATA_CACHE_SIZE;
+
+ if (offset < cache->offset)
+ p = &(*p)->rb_left;
+ else if (offset >= end)
+ p = &(*p)->rb_right;
+ else
+ goto out;
+ }
+
+ rb_link_node(&new->rb_node, parent, p);
+ rb_insert_color(&new->rb_node, root);
+
+ cache = NULL;
+out:
+ pthread_mutex_unlock(&dso->lock);
+ return cache;
+}
+
+static ssize_t
+dso_cache__memcpy(struct dso_cache *cache, u64 offset,
+ u8 *data, u64 size)
+{
+ u64 cache_offset = offset - cache->offset;
+ u64 cache_size = min(cache->size - cache_offset, size);
+
+ memcpy(data, cache->data + cache_offset, cache_size);
+ return cache_size;
+}
+
+static ssize_t
+dso_cache__read(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size)
+{
+ struct dso_cache *cache;
+ struct dso_cache *old;
+ ssize_t ret;
+
+ do {
+ u64 cache_offset;
+
+ cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
+ if (!cache)
+ return -ENOMEM;
+
+ pthread_mutex_lock(&dso__data_open_lock);
+
+ /*
+ * dso->data.fd might be closed if other thread opened another
+ * file (dso) due to open file limit (RLIMIT_NOFILE).
+ */
+ try_to_open_dso(dso, machine);
+
+ if (dso->data.fd < 0) {
+ ret = -errno;
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ break;
+ }
+
+ cache_offset = offset & DSO__DATA_CACHE_MASK;
+
+ ret = pread(dso->data.fd, cache->data, DSO__DATA_CACHE_SIZE, cache_offset);
+ if (ret <= 0)
+ break;
+
+ cache->offset = cache_offset;
+ cache->size = ret;
+ } while (0);
+
+ pthread_mutex_unlock(&dso__data_open_lock);
+
+ if (ret > 0) {
+ old = dso_cache__insert(dso, cache);
+ if (old) {
+ /* we lose the race */
+ free(cache);
+ cache = old;
+ }
+
+ ret = dso_cache__memcpy(cache, offset, data, size);
+ }
+
+ if (ret <= 0)
+ free(cache);
+
+ return ret;
+}
+
+static ssize_t dso_cache_read(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size)
+{
+ struct dso_cache *cache;
+
+ cache = dso_cache__find(dso, offset);
+ if (cache)
+ return dso_cache__memcpy(cache, offset, data, size);
+ else
+ return dso_cache__read(dso, machine, offset, data, size);
+}
+
+/*
+ * Reads and caches dso data DSO__DATA_CACHE_SIZE size chunks
+ * in the rb_tree. Any read to already cached data is served
+ * by cached data.
+ */
+static ssize_t cached_read(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size)
+{
+ ssize_t r = 0;
+ u8 *p = data;
+
+ do {
+ ssize_t ret;
+
+ ret = dso_cache_read(dso, machine, offset, p, size);
+ if (ret < 0)
+ return ret;
+
+ /* Reached EOF, return what we have. */
+ if (!ret)
+ break;
+
+ BUG_ON(ret > size);
+
+ r += ret;
+ p += ret;
+ offset += ret;
+ size -= ret;
+
+ } while (size);
+
+ return r;
+}
+
+static int data_file_size(struct dso *dso, struct machine *machine)
+{
+ int ret = 0;
+ struct stat st;
+ char sbuf[STRERR_BUFSIZE];
+
+ if (dso->data.file_size)
+ return 0;
+
+ if (dso->data.status == DSO_DATA_STATUS_ERROR)
+ return -1;
+
+ pthread_mutex_lock(&dso__data_open_lock);
+
+ /*
+ * dso->data.fd might be closed if other thread opened another
+ * file (dso) due to open file limit (RLIMIT_NOFILE).
+ */
+ try_to_open_dso(dso, machine);
+
+ if (dso->data.fd < 0) {
+ ret = -errno;
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ goto out;
+ }
+
+ if (fstat(dso->data.fd, &st) < 0) {
+ ret = -errno;
+ pr_err("dso cache fstat failed: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ dso->data.status = DSO_DATA_STATUS_ERROR;
+ goto out;
+ }
+ dso->data.file_size = st.st_size;
+
+out:
+ pthread_mutex_unlock(&dso__data_open_lock);
+ return ret;
+}
+
+/**
+ * dso__data_size - Return dso data size
+ * @dso: dso object
+ * @machine: machine object
+ *
+ * Return: dso data size
+ */
+off_t dso__data_size(struct dso *dso, struct machine *machine)
+{
+ if (data_file_size(dso, machine))
+ return -1;
+
+ /* For now just estimate dso data size is close to file size */
+ return dso->data.file_size;
+}
+
+static ssize_t data_read_offset(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size)
+{
+ if (data_file_size(dso, machine))
+ return -1;
+
+ /* Check the offset sanity. */
+ if (offset > dso->data.file_size)
+ return -1;
+
+ if (offset + size < offset)
+ return -1;
+
+ return cached_read(dso, machine, offset, data, size);
+}
+
+/**
+ * dso__data_read_offset - Read data from dso file offset
+ * @dso: dso object
+ * @machine: machine object
+ * @offset: file offset
+ * @data: buffer to store data
+ * @size: size of the @data buffer
+ *
+ * External interface to read data from dso file offset. Open
+ * dso data file and use cached_read to get the data.
+ */
+ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size)
+{
+ if (dso->data.status == DSO_DATA_STATUS_ERROR)
+ return -1;
+
+ return data_read_offset(dso, machine, offset, data, size);
+}
+
+/**
+ * dso__data_read_addr - Read data from dso address
+ * @dso: dso object
+ * @machine: machine object
+ * @add: virtual memory address
+ * @data: buffer to store data
+ * @size: size of the @data buffer
+ *
+ * External interface to read data from dso address.
+ */
+ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
+ struct machine *machine, u64 addr,
+ u8 *data, ssize_t size)
+{
+ u64 offset = map->map_ip(map, addr);
+ return dso__data_read_offset(dso, machine, offset, data, size);
+}
+
+struct map *dso__new_map(const char *name)
+{
+ struct map *map = NULL;
+ struct dso *dso = dso__new(name);
+
+ if (dso) {
+ map = map__new2(0, dso);
+ dso__put(dso);
+ }
+
+ return map;
+}
+
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+ const char *short_name, int dso_type)
+{
+ /*
+ * The kernel dso could be created by build_id processing.
+ */
+ struct dso *dso = machine__findnew_dso(machine, name);
+
+ /*
+ * We need to run this in all cases, since during the build_id
+ * processing we had no idea this was the kernel dso.
+ */
+ if (dso != NULL) {
+ dso__set_short_name(dso, short_name, false);
+ dso->kernel = dso_type;
+ }
+
+ return dso;
+}
+
+/*
+ * Find a matching entry and/or link current entry to RB tree.
+ * Either one of the dso or name parameter must be non-NULL or the
+ * function will not work.
+ */
+static struct dso *__dso__findlink_by_longname(struct rb_root *root,
+ struct dso *dso, const char *name)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+
+ if (!name)
+ name = dso->long_name;
+ /*
+ * Find node with the matching name
+ */
+ while (*p) {
+ struct dso *this = rb_entry(*p, struct dso, rb_node);
+ int rc = strcmp(name, this->long_name);
+
+ parent = *p;
+ if (rc == 0) {
+ /*
+ * In case the new DSO is a duplicate of an existing
+ * one, print a one-time warning & put the new entry
+ * at the end of the list of duplicates.
+ */
+ if (!dso || (dso == this))
+ return this; /* Find matching dso */
+ /*
+ * The core kernel DSOs may have duplicated long name.
+ * In this case, the short name should be different.
+ * Comparing the short names to differentiate the DSOs.
+ */
+ rc = strcmp(dso->short_name, this->short_name);
+ if (rc == 0) {
+ pr_err("Duplicated dso name: %s\n", name);
+ return NULL;
+ }
+ }
+ if (rc < 0)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+ if (dso) {
+ /* Add new node and rebalance tree */
+ rb_link_node(&dso->rb_node, parent, p);
+ rb_insert_color(&dso->rb_node, root);
+ dso->root = root;
+ }
+ return NULL;
+}
+
+static inline struct dso *__dso__find_by_longname(struct rb_root *root,
+ const char *name)
+{
+ return __dso__findlink_by_longname(root, NULL, name);
+}
+
+void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
+{
+ struct rb_root *root = dso->root;
+
+ if (name == NULL)
+ return;
+
+ if (dso->long_name_allocated)
+ free((char *)dso->long_name);
+
+ if (root) {
+ rb_erase(&dso->rb_node, root);
+ /*
+ * __dso__findlink_by_longname() isn't guaranteed to add it
+ * back, so a clean removal is required here.
+ */
+ RB_CLEAR_NODE(&dso->rb_node);
+ dso->root = NULL;
+ }
+
+ dso->long_name = name;
+ dso->long_name_len = strlen(name);
+ dso->long_name_allocated = name_allocated;
+
+ if (root)
+ __dso__findlink_by_longname(root, dso, NULL);
+}
+
+void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated)
+{
+ if (name == NULL)
+ return;
+
+ if (dso->short_name_allocated)
+ free((char *)dso->short_name);
+
+ dso->short_name = name;
+ dso->short_name_len = strlen(name);
+ dso->short_name_allocated = name_allocated;
+}
+
+static void dso__set_basename(struct dso *dso)
+{
+ /*
+ * basename() may modify path buffer, so we must pass
+ * a copy.
+ */
+ char *base, *lname = strdup(dso->long_name);
+
+ if (!lname)
+ return;
+
+ /*
+ * basename() may return a pointer to internal
+ * storage which is reused in subsequent calls
+ * so copy the result.
+ */
+ base = strdup(basename(lname));
+
+ free(lname);
+
+ if (!base)
+ return;
+
+ dso__set_short_name(dso, base, true);
+}
+
+int dso__name_len(const struct dso *dso)
+{
+ if (!dso)
+ return strlen("[unknown]");
+ if (verbose > 0)
+ return dso->long_name_len;
+
+ return dso->short_name_len;
+}
+
+bool dso__loaded(const struct dso *dso)
+{
+ return dso->loaded;
+}
+
+bool dso__sorted_by_name(const struct dso *dso)
+{
+ return dso->sorted_by_name;
+}
+
+void dso__set_sorted_by_name(struct dso *dso)
+{
+ dso->sorted_by_name = true;
+}
+
+struct dso *dso__new(const char *name)
+{
+ struct dso *dso = calloc(1, sizeof(*dso) + strlen(name) + 1);
+
+ if (dso != NULL) {
+ strcpy(dso->name, name);
+ dso__set_long_name(dso, dso->name, false);
+ dso__set_short_name(dso, dso->name, false);
+ dso->symbols = dso->symbol_names = RB_ROOT;
+ dso->data.cache = RB_ROOT;
+ dso->inlined_nodes = RB_ROOT;
+ dso->srclines = RB_ROOT;
+ dso->data.fd = -1;
+ dso->data.status = DSO_DATA_STATUS_UNKNOWN;
+ dso->symtab_type = DSO_BINARY_TYPE__NOT_FOUND;
+ dso->binary_type = DSO_BINARY_TYPE__NOT_FOUND;
+ dso->is_64_bit = (sizeof(void *) == 8);
+ dso->loaded = 0;
+ dso->rel = 0;
+ dso->sorted_by_name = 0;
+ dso->has_build_id = 0;
+ dso->has_srcline = 1;
+ dso->a2l_fails = 1;
+ dso->kernel = DSO_TYPE_USER;
+ dso->needs_swap = DSO_SWAP__UNSET;
+ dso->comp = COMP_ID__NONE;
+ RB_CLEAR_NODE(&dso->rb_node);
+ dso->root = NULL;
+ INIT_LIST_HEAD(&dso->node);
+ INIT_LIST_HEAD(&dso->data.open_entry);
+ pthread_mutex_init(&dso->lock, NULL);
+ refcount_set(&dso->refcnt, 1);
+ }
+
+ return dso;
+}
+
+void dso__delete(struct dso *dso)
+{
+ if (!RB_EMPTY_NODE(&dso->rb_node))
+ pr_err("DSO %s is still in rbtree when being deleted!\n",
+ dso->long_name);
+
+ /* free inlines first, as they reference symbols */
+ inlines__tree_delete(&dso->inlined_nodes);
+ srcline__tree_delete(&dso->srclines);
+ symbols__delete(&dso->symbols);
+
+ if (dso->short_name_allocated) {
+ zfree((char **)&dso->short_name);
+ dso->short_name_allocated = false;
+ }
+
+ if (dso->long_name_allocated) {
+ zfree((char **)&dso->long_name);
+ dso->long_name_allocated = false;
+ }
+
+ dso__data_close(dso);
+ auxtrace_cache__free(dso->auxtrace_cache);
+ dso_cache__free(dso);
+ dso__free_a2l(dso);
+ zfree(&dso->symsrc_filename);
+ nsinfo__zput(dso->nsinfo);
+ pthread_mutex_destroy(&dso->lock);
+ free(dso);
+}
+
+struct dso *dso__get(struct dso *dso)
+{
+ if (dso)
+ refcount_inc(&dso->refcnt);
+ return dso;
+}
+
+void dso__put(struct dso *dso)
+{
+ if (dso && refcount_dec_and_test(&dso->refcnt))
+ dso__delete(dso);
+}
+
+void dso__set_build_id(struct dso *dso, void *build_id)
+{
+ memcpy(dso->build_id, build_id, sizeof(dso->build_id));
+ dso->has_build_id = 1;
+}
+
+bool dso__build_id_equal(const struct dso *dso, u8 *build_id)
+{
+ return memcmp(dso->build_id, build_id, sizeof(dso->build_id)) == 0;
+}
+
+void dso__read_running_kernel_build_id(struct dso *dso, struct machine *machine)
+{
+ char path[PATH_MAX];
+
+ if (machine__is_default_guest(machine))
+ return;
+ sprintf(path, "%s/sys/kernel/notes", machine->root_dir);
+ if (sysfs__read_build_id(path, dso->build_id,
+ sizeof(dso->build_id)) == 0)
+ dso->has_build_id = true;
+}
+
+int dso__kernel_module_get_build_id(struct dso *dso,
+ const char *root_dir)
+{
+ char filename[PATH_MAX];
+ /*
+ * kernel module short names are of the form "[module]" and
+ * we need just "module" here.
+ */
+ const char *name = dso->short_name + 1;
+
+ snprintf(filename, sizeof(filename),
+ "%s/sys/module/%.*s/notes/.note.gnu.build-id",
+ root_dir, (int)strlen(name) - 1, name);
+
+ if (sysfs__read_build_id(filename, dso->build_id,
+ sizeof(dso->build_id)) == 0)
+ dso->has_build_id = true;
+
+ return 0;
+}
+
+bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
+{
+ bool have_build_id = false;
+ struct dso *pos;
+ struct nscookie nsc;
+
+ list_for_each_entry(pos, head, node) {
+ if (with_hits && !pos->hit && !dso__is_vdso(pos))
+ continue;
+ if (pos->has_build_id) {
+ have_build_id = true;
+ continue;
+ }
+ nsinfo__mountns_enter(pos->nsinfo, &nsc);
+ if (filename__read_build_id(pos->long_name, pos->build_id,
+ sizeof(pos->build_id)) > 0) {
+ have_build_id = true;
+ pos->has_build_id = true;
+ }
+ nsinfo__mountns_exit(&nsc);
+ }
+
+ return have_build_id;
+}
+
+void __dsos__add(struct dsos *dsos, struct dso *dso)
+{
+ list_add_tail(&dso->node, &dsos->head);
+ __dso__findlink_by_longname(&dsos->root, dso, NULL);
+ /*
+ * It is now in the linked list, grab a reference, then garbage collect
+ * this when needing memory, by looking at LRU dso instances in the
+ * list with atomic_read(&dso->refcnt) == 1, i.e. no references
+ * anywhere besides the one for the list, do, under a lock for the
+ * list: remove it from the list, then a dso__put(), that probably will
+ * be the last and will then call dso__delete(), end of life.
+ *
+ * That, or at the end of the 'struct machine' lifetime, when all
+ * 'struct dso' instances will be removed from the list, in
+ * dsos__exit(), if they have no other reference from some other data
+ * structure.
+ *
+ * E.g.: after processing a 'perf.data' file and storing references
+ * to objects instantiated while processing events, we will have
+ * references to the 'thread', 'map', 'dso' structs all from 'struct
+ * hist_entry' instances, but we may not need anything not referenced,
+ * so we might as well call machines__exit()/machines__delete() and
+ * garbage collect it.
+ */
+ dso__get(dso);
+}
+
+void dsos__add(struct dsos *dsos, struct dso *dso)
+{
+ down_write(&dsos->lock);
+ __dsos__add(dsos, dso);
+ up_write(&dsos->lock);
+}
+
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
+{
+ struct dso *pos;
+
+ if (cmp_short) {
+ list_for_each_entry(pos, &dsos->head, node)
+ if (strcmp(pos->short_name, name) == 0)
+ return pos;
+ return NULL;
+ }
+ return __dso__find_by_longname(&dsos->root, name);
+}
+
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
+{
+ struct dso *dso;
+ down_read(&dsos->lock);
+ dso = __dsos__find(dsos, name, cmp_short);
+ up_read(&dsos->lock);
+ return dso;
+}
+
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
+{
+ struct dso *dso = dso__new(name);
+
+ if (dso != NULL) {
+ __dsos__add(dsos, dso);
+ dso__set_basename(dso);
+ /* Put dso here because __dsos_add already got it */
+ dso__put(dso);
+ }
+ return dso;
+}
+
+struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
+{
+ struct dso *dso = __dsos__find(dsos, name, false);
+
+ return dso ? dso : __dsos__addnew(dsos, name);
+}
+
+struct dso *dsos__findnew(struct dsos *dsos, const char *name)
+{
+ struct dso *dso;
+ down_write(&dsos->lock);
+ dso = dso__get(__dsos__findnew(dsos, name));
+ up_write(&dsos->lock);
+ return dso;
+}
+
+size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm)
+{
+ struct dso *pos;
+ size_t ret = 0;
+
+ list_for_each_entry(pos, head, node) {
+ if (skip && skip(pos, parm))
+ continue;
+ ret += dso__fprintf_buildid(pos, fp);
+ ret += fprintf(fp, " %s\n", pos->long_name);
+ }
+ return ret;
+}
+
+size_t __dsos__fprintf(struct list_head *head, FILE *fp)
+{
+ struct dso *pos;
+ size_t ret = 0;
+
+ list_for_each_entry(pos, head, node) {
+ ret += dso__fprintf(pos, fp);
+ }
+
+ return ret;
+}
+
+size_t dso__fprintf_buildid(struct dso *dso, FILE *fp)
+{
+ char sbuild_id[SBUILD_ID_SIZE];
+
+ build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+ return fprintf(fp, "%s", sbuild_id);
+}
+
+size_t dso__fprintf(struct dso *dso, FILE *fp)
+{
+ struct rb_node *nd;
+ size_t ret = fprintf(fp, "dso: %s (", dso->short_name);
+
+ if (dso->short_name != dso->long_name)
+ ret += fprintf(fp, "%s, ", dso->long_name);
+ ret += fprintf(fp, "%sloaded, ", dso__loaded(dso) ? "" : "NOT ");
+ ret += dso__fprintf_buildid(dso, fp);
+ ret += fprintf(fp, ")\n");
+ for (nd = rb_first(&dso->symbols); nd; nd = rb_next(nd)) {
+ struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+ ret += symbol__fprintf(pos, fp);
+ }
+
+ return ret;
+}
+
+enum dso_type dso__type(struct dso *dso, struct machine *machine)
+{
+ int fd;
+ enum dso_type type = DSO__TYPE_UNKNOWN;
+
+ fd = dso__data_get_fd(dso, machine);
+ if (fd >= 0) {
+ type = dso__type_fd(fd);
+ dso__data_put_fd(dso);
+ }
+
+ return type;
+}
+
+int dso__strerror_load(struct dso *dso, char *buf, size_t buflen)
+{
+ int idx, errnum = dso->load_errno;
+ /*
+ * This must have a same ordering as the enum dso_load_errno.
+ */
+ static const char *dso_load__error_str[] = {
+ "Internal tools/perf/ library error",
+ "Invalid ELF file",
+ "Can not read build id",
+ "Mismatching build id",
+ "Decompression failure",
+ };
+
+ BUG_ON(buflen == 0);
+
+ if (errnum >= 0) {
+ const char *err = str_error_r(errnum, buf, buflen);
+
+ if (err != buf)
+ scnprintf(buf, buflen, "%s", err);
+
+ return 0;
+ }
+
+ if (errnum < __DSO_LOAD_ERRNO__START || errnum >= __DSO_LOAD_ERRNO__END)
+ return -1;
+
+ idx = errnum - __DSO_LOAD_ERRNO__START;
+ scnprintf(buf, buflen, "%s", dso_load__error_str[idx]);
+ return 0;
+}
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
new file mode 100644
index 000000000..0b474c7ef
--- /dev/null
+++ b/tools/perf/util/dso.h
@@ -0,0 +1,382 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DSO
+#define __PERF_DSO
+
+#include <linux/refcount.h>
+#include <linux/types.h>
+#include <linux/rbtree.h>
+#include <sys/types.h>
+#include <stdbool.h>
+#include "rwsem.h"
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include "map.h"
+#include "namespaces.h"
+#include "build-id.h"
+
+enum dso_binary_type {
+ DSO_BINARY_TYPE__KALLSYMS = 0,
+ DSO_BINARY_TYPE__GUEST_KALLSYMS,
+ DSO_BINARY_TYPE__VMLINUX,
+ DSO_BINARY_TYPE__GUEST_VMLINUX,
+ DSO_BINARY_TYPE__JAVA_JIT,
+ DSO_BINARY_TYPE__DEBUGLINK,
+ DSO_BINARY_TYPE__BUILD_ID_CACHE,
+ DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
+ DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+ DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+ DSO_BINARY_TYPE__GUEST_KMODULE,
+ DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
+ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
+ DSO_BINARY_TYPE__KCORE,
+ DSO_BINARY_TYPE__GUEST_KCORE,
+ DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+ DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+enum dso_kernel_type {
+ DSO_TYPE_USER = 0,
+ DSO_TYPE_KERNEL,
+ DSO_TYPE_GUEST_KERNEL
+};
+
+enum dso_swap_type {
+ DSO_SWAP__UNSET,
+ DSO_SWAP__NO,
+ DSO_SWAP__YES,
+};
+
+enum dso_data_status {
+ DSO_DATA_STATUS_ERROR = -1,
+ DSO_DATA_STATUS_UNKNOWN = 0,
+ DSO_DATA_STATUS_OK = 1,
+};
+
+enum dso_data_status_seen {
+ DSO_DATA_STATUS_SEEN_ITRACE,
+};
+
+enum dso_type {
+ DSO__TYPE_UNKNOWN,
+ DSO__TYPE_64BIT,
+ DSO__TYPE_32BIT,
+ DSO__TYPE_X32BIT,
+};
+
+enum dso_load_errno {
+ DSO_LOAD_ERRNO__SUCCESS = 0,
+
+ /*
+ * Choose an arbitrary negative big number not to clash with standard
+ * errno since SUS requires the errno has distinct positive values.
+ * See 'Issue 6' in the link below.
+ *
+ * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+ */
+ __DSO_LOAD_ERRNO__START = -10000,
+
+ DSO_LOAD_ERRNO__INTERNAL_ERROR = __DSO_LOAD_ERRNO__START,
+
+ /* for symsrc__init() */
+ DSO_LOAD_ERRNO__INVALID_ELF,
+ DSO_LOAD_ERRNO__CANNOT_READ_BUILDID,
+ DSO_LOAD_ERRNO__MISMATCHING_BUILDID,
+
+ /* for decompress_kmodule */
+ DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE,
+
+ __DSO_LOAD_ERRNO__END,
+};
+
+#define DSO__SWAP(dso, type, val) \
+({ \
+ type ____r = val; \
+ BUG_ON(dso->needs_swap == DSO_SWAP__UNSET); \
+ if (dso->needs_swap == DSO_SWAP__YES) { \
+ switch (sizeof(____r)) { \
+ case 2: \
+ ____r = bswap_16(val); \
+ break; \
+ case 4: \
+ ____r = bswap_32(val); \
+ break; \
+ case 8: \
+ ____r = bswap_64(val); \
+ break; \
+ default: \
+ BUG_ON(1); \
+ } \
+ } \
+ ____r; \
+})
+
+#define DSO__DATA_CACHE_SIZE 4096
+#define DSO__DATA_CACHE_MASK ~(DSO__DATA_CACHE_SIZE - 1)
+
+struct dso_cache {
+ struct rb_node rb_node;
+ u64 offset;
+ u64 size;
+ char data[0];
+};
+
+/*
+ * DSOs are put into both a list for fast iteration and rbtree for fast
+ * long name lookup.
+ */
+struct dsos {
+ struct list_head head;
+ struct rb_root root; /* rbtree root sorted by long name */
+ struct rw_semaphore lock;
+};
+
+struct auxtrace_cache;
+
+struct dso {
+ pthread_mutex_t lock;
+ struct list_head node;
+ struct rb_node rb_node; /* rbtree node sorted by long name */
+ struct rb_root *root; /* root of rbtree that rb_node is in */
+ struct rb_root symbols;
+ struct rb_root symbol_names;
+ struct rb_root inlined_nodes;
+ struct rb_root srclines;
+ struct {
+ u64 addr;
+ struct symbol *symbol;
+ } last_find_result;
+ void *a2l;
+ char *symsrc_filename;
+ unsigned int a2l_fails;
+ enum dso_kernel_type kernel;
+ enum dso_swap_type needs_swap;
+ enum dso_binary_type symtab_type;
+ enum dso_binary_type binary_type;
+ enum dso_load_errno load_errno;
+ u8 adjust_symbols:1;
+ u8 has_build_id:1;
+ u8 has_srcline:1;
+ u8 hit:1;
+ u8 annotate_warned:1;
+ u8 short_name_allocated:1;
+ u8 long_name_allocated:1;
+ u8 is_64_bit:1;
+ bool sorted_by_name;
+ bool loaded;
+ u8 rel;
+ u8 build_id[BUILD_ID_SIZE];
+ u64 text_offset;
+ const char *short_name;
+ const char *long_name;
+ u16 long_name_len;
+ u16 short_name_len;
+ void *dwfl; /* DWARF debug info */
+ struct auxtrace_cache *auxtrace_cache;
+ int comp;
+
+ /* dso data file */
+ struct {
+ struct rb_root cache;
+ int fd;
+ int status;
+ u32 status_seen;
+ size_t file_size;
+ struct list_head open_entry;
+ u64 debug_frame_offset;
+ u64 eh_frame_hdr_offset;
+ } data;
+
+ union { /* Tool specific area */
+ void *priv;
+ u64 db_id;
+ };
+ struct nsinfo *nsinfo;
+ refcount_t refcnt;
+ char name[0];
+};
+
+/* dso__for_each_symbol - iterate over the symbols of given type
+ *
+ * @dso: the 'struct dso *' in which symbols itereated
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ * @n: the 'struct rb_node *' to use as a temporary storage
+ */
+#define dso__for_each_symbol(dso, pos, n) \
+ symbols__for_each_entry(&(dso)->symbols, pos, n)
+
+static inline void dso__set_loaded(struct dso *dso)
+{
+ dso->loaded = true;
+}
+
+struct dso *dso__new(const char *name);
+void dso__delete(struct dso *dso);
+
+void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated);
+void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated);
+
+int dso__name_len(const struct dso *dso);
+
+struct dso *dso__get(struct dso *dso);
+void dso__put(struct dso *dso);
+
+static inline void __dso__zput(struct dso **dso)
+{
+ dso__put(*dso);
+ *dso = NULL;
+}
+
+#define dso__zput(dso) __dso__zput(&dso)
+
+bool dso__loaded(const struct dso *dso);
+
+static inline bool dso__has_symbols(const struct dso *dso)
+{
+ return !RB_EMPTY_ROOT(&dso->symbols);
+}
+
+bool dso__sorted_by_name(const struct dso *dso);
+void dso__set_sorted_by_name(struct dso *dso);
+void dso__sort_by_name(struct dso *dso);
+
+void dso__set_build_id(struct dso *dso, void *build_id);
+bool dso__build_id_equal(const struct dso *dso, u8 *build_id);
+void dso__read_running_kernel_build_id(struct dso *dso,
+ struct machine *machine);
+int dso__kernel_module_get_build_id(struct dso *dso, const char *root_dir);
+
+char dso__symtab_origin(const struct dso *dso);
+int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type,
+ char *root_dir, char *filename, size_t size);
+bool is_kernel_module(const char *pathname, int cpumode);
+bool dso__needs_decompress(struct dso *dso);
+int dso__decompress_kmodule_fd(struct dso *dso, const char *name);
+int dso__decompress_kmodule_path(struct dso *dso, const char *name,
+ char *pathname, size_t len);
+
+#define KMOD_DECOMP_NAME "/tmp/perf-kmod-XXXXXX"
+#define KMOD_DECOMP_LEN sizeof(KMOD_DECOMP_NAME)
+
+struct kmod_path {
+ char *name;
+ int comp;
+ bool kmod;
+};
+
+int __kmod_path__parse(struct kmod_path *m, const char *path,
+ bool alloc_name);
+
+#define kmod_path__parse(__m, __p) __kmod_path__parse(__m, __p, false)
+#define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true)
+
+void dso__set_module_info(struct dso *dso, struct kmod_path *m,
+ struct machine *machine);
+
+/*
+ * The dso__data_* external interface provides following functions:
+ * dso__data_get_fd
+ * dso__data_put_fd
+ * dso__data_close
+ * dso__data_size
+ * dso__data_read_offset
+ * dso__data_read_addr
+ *
+ * Please refer to the dso.c object code for each function and
+ * arguments documentation. Following text tries to explain the
+ * dso file descriptor caching.
+ *
+ * The dso__data* interface allows caching of opened file descriptors
+ * to speed up the dso data accesses. The idea is to leave the file
+ * descriptor opened ideally for the whole life of the dso object.
+ *
+ * The current usage of the dso__data_* interface is as follows:
+ *
+ * Get DSO's fd:
+ * int fd = dso__data_get_fd(dso, machine);
+ * if (fd >= 0) {
+ * USE 'fd' SOMEHOW
+ * dso__data_put_fd(dso);
+ * }
+ *
+ * Read DSO's data:
+ * n = dso__data_read_offset(dso_0, &machine, 0, buf, BUFSIZE);
+ * n = dso__data_read_addr(dso_0, &machine, 0, buf, BUFSIZE);
+ *
+ * Eventually close DSO's fd:
+ * dso__data_close(dso);
+ *
+ * It is not necessary to close the DSO object data file. Each time new
+ * DSO data file is opened, the limit (RLIMIT_NOFILE/2) is checked. Once
+ * it is crossed, the oldest opened DSO object is closed.
+ *
+ * The dso__delete function calls close_dso function to ensure the
+ * data file descriptor gets closed/unmapped before the dso object
+ * is freed.
+ *
+ * TODO
+*/
+int dso__data_get_fd(struct dso *dso, struct machine *machine);
+void dso__data_put_fd(struct dso *dso);
+void dso__data_close(struct dso *dso);
+
+off_t dso__data_size(struct dso *dso, struct machine *machine);
+ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
+ u64 offset, u8 *data, ssize_t size);
+ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
+ struct machine *machine, u64 addr,
+ u8 *data, ssize_t size);
+bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by);
+
+struct map *dso__new_map(const char *name);
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+ const char *short_name, int dso_type);
+
+void __dsos__add(struct dsos *dsos, struct dso *dso);
+void dsos__add(struct dsos *dsos, struct dso *dso);
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name);
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
+struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
+struct dso *dsos__findnew(struct dsos *dsos, const char *name);
+bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
+
+void dso__reset_find_symbol_cache(struct dso *dso);
+
+size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm);
+size_t __dsos__fprintf(struct list_head *head, FILE *fp);
+
+size_t dso__fprintf_buildid(struct dso *dso, FILE *fp);
+size_t dso__fprintf_symbols_by_name(struct dso *dso, FILE *fp);
+size_t dso__fprintf(struct dso *dso, FILE *fp);
+
+static inline bool dso__is_vmlinux(struct dso *dso)
+{
+ return dso->binary_type == DSO_BINARY_TYPE__VMLINUX ||
+ dso->binary_type == DSO_BINARY_TYPE__GUEST_VMLINUX;
+}
+
+static inline bool dso__is_kcore(struct dso *dso)
+{
+ return dso->binary_type == DSO_BINARY_TYPE__KCORE ||
+ dso->binary_type == DSO_BINARY_TYPE__GUEST_KCORE;
+}
+
+static inline bool dso__is_kallsyms(struct dso *dso)
+{
+ return dso->kernel && dso->long_name[0] != '/';
+}
+
+void dso__free_a2l(struct dso *dso);
+
+enum dso_type dso__type(struct dso *dso, struct machine *machine);
+
+int dso__strerror_load(struct dso *dso, char *buf, size_t buflen);
+
+void reset_fd_limit(void);
+
+#endif /* __PERF_DSO */
diff --git a/tools/perf/util/dump-insn.c b/tools/perf/util/dump-insn.c
new file mode 100644
index 000000000..10988d3de
--- /dev/null
+++ b/tools/perf/util/dump-insn.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include "dump-insn.h"
+
+/* Fallback code */
+
+__weak
+const char *dump_insn(struct perf_insn *x __maybe_unused,
+ u64 ip __maybe_unused, u8 *inbuf __maybe_unused,
+ int inlen __maybe_unused, int *lenp)
+{
+ if (lenp)
+ *lenp = 0;
+ return "?";
+}
diff --git a/tools/perf/util/dump-insn.h b/tools/perf/util/dump-insn.h
new file mode 100644
index 000000000..0e06280a8
--- /dev/null
+++ b/tools/perf/util/dump-insn.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_DUMP_INSN_H
+#define __PERF_DUMP_INSN_H 1
+
+#define MAXINSN 15
+
+#include <linux/types.h>
+
+struct thread;
+
+struct perf_insn {
+ /* Initialized by callers: */
+ struct thread *thread;
+ u8 cpumode;
+ bool is64bit;
+ int cpu;
+ /* Temporary */
+ char out[256];
+};
+
+const char *dump_insn(struct perf_insn *x, u64 ip,
+ u8 *inbuf, int inlen, int *lenp);
+#endif
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
new file mode 100644
index 000000000..230e94bf7
--- /dev/null
+++ b/tools/perf/util/dwarf-aux.c
@@ -0,0 +1,1353 @@
+/*
+ * dwarf-aux.c : libdw auxiliary interfaces
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include "util.h"
+#include "debug.h"
+#include "dwarf-aux.h"
+#include "string2.h"
+
+/**
+ * cu_find_realpath - Find the realpath of the target file
+ * @cu_die: A DIE(dwarf information entry) of CU(compilation Unit)
+ * @fname: The tail filename of the target file
+ *
+ * Find the real(long) path of @fname in @cu_die.
+ */
+const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname)
+{
+ Dwarf_Files *files;
+ size_t nfiles, i;
+ const char *src = NULL;
+ int ret;
+
+ if (!fname)
+ return NULL;
+
+ ret = dwarf_getsrcfiles(cu_die, &files, &nfiles);
+ if (ret != 0)
+ return NULL;
+
+ for (i = 0; i < nfiles; i++) {
+ src = dwarf_filesrc(files, i, NULL, NULL);
+ if (strtailcmp(src, fname) == 0)
+ break;
+ }
+ if (i == nfiles)
+ return NULL;
+ return src;
+}
+
+/**
+ * cu_get_comp_dir - Get the path of compilation directory
+ * @cu_die: a CU DIE
+ *
+ * Get the path of compilation directory of given @cu_die.
+ * Since this depends on DW_AT_comp_dir, older gcc will not
+ * embedded it. In that case, this returns NULL.
+ */
+const char *cu_get_comp_dir(Dwarf_Die *cu_die)
+{
+ Dwarf_Attribute attr;
+ if (dwarf_attr(cu_die, DW_AT_comp_dir, &attr) == NULL)
+ return NULL;
+ return dwarf_formstring(&attr);
+}
+
+/**
+ * cu_find_lineinfo - Get a line number and file name for given address
+ * @cu_die: a CU DIE
+ * @addr: An address
+ * @fname: a pointer which returns the file name string
+ * @lineno: a pointer which returns the line number
+ *
+ * Find a line number and file name for @addr in @cu_die.
+ */
+int cu_find_lineinfo(Dwarf_Die *cu_die, unsigned long addr,
+ const char **fname, int *lineno)
+{
+ Dwarf_Line *line;
+ Dwarf_Addr laddr;
+
+ line = dwarf_getsrc_die(cu_die, (Dwarf_Addr)addr);
+ if (line && dwarf_lineaddr(line, &laddr) == 0 &&
+ addr == (unsigned long)laddr && dwarf_lineno(line, lineno) == 0) {
+ *fname = dwarf_linesrc(line, NULL, NULL);
+ if (!*fname)
+ /* line number is useless without filename */
+ *lineno = 0;
+ }
+
+ return *lineno ?: -ENOENT;
+}
+
+static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data);
+
+/**
+ * cu_walk_functions_at - Walk on function DIEs at given address
+ * @cu_die: A CU DIE
+ * @addr: An address
+ * @callback: A callback which called with found DIEs
+ * @data: A user data
+ *
+ * Walk on function DIEs at given @addr in @cu_die. Passed DIEs
+ * should be subprogram or inlined-subroutines.
+ */
+int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ int (*callback)(Dwarf_Die *, void *), void *data)
+{
+ Dwarf_Die die_mem;
+ Dwarf_Die *sc_die;
+ int ret = -ENOENT;
+
+ /* Inlined function could be recursive. Trace it until fail */
+ for (sc_die = die_find_realfunc(cu_die, addr, &die_mem);
+ sc_die != NULL;
+ sc_die = die_find_child(sc_die, __die_find_inline_cb, &addr,
+ &die_mem)) {
+ ret = callback(sc_die, data);
+ if (ret)
+ break;
+ }
+
+ return ret;
+
+}
+
+/**
+ * die_get_linkage_name - Get the linkage name of the object
+ * @dw_die: A DIE of the object
+ *
+ * Get the linkage name attiribute of given @dw_die.
+ * For C++ binary, the linkage name will be the mangled symbol.
+ */
+const char *die_get_linkage_name(Dwarf_Die *dw_die)
+{
+ Dwarf_Attribute attr;
+
+ if (dwarf_attr_integrate(dw_die, DW_AT_linkage_name, &attr) == NULL)
+ return NULL;
+ return dwarf_formstring(&attr);
+}
+
+/**
+ * die_compare_name - Compare diename and tname
+ * @dw_die: a DIE
+ * @tname: a string of target name
+ *
+ * Compare the name of @dw_die and @tname. Return false if @dw_die has no name.
+ */
+bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
+{
+ const char *name;
+
+ name = dwarf_diename(dw_die);
+ return name ? (strcmp(tname, name) == 0) : false;
+}
+
+/**
+ * die_match_name - Match diename/linkage name and glob
+ * @dw_die: a DIE
+ * @glob: a string of target glob pattern
+ *
+ * Glob matching the name of @dw_die and @glob. Return false if matching fail.
+ * This also match linkage name.
+ */
+bool die_match_name(Dwarf_Die *dw_die, const char *glob)
+{
+ const char *name;
+
+ name = dwarf_diename(dw_die);
+ if (name && strglobmatch(name, glob))
+ return true;
+ /* fall back to check linkage name */
+ name = die_get_linkage_name(dw_die);
+ if (name && strglobmatch(name, glob))
+ return true;
+
+ return false;
+}
+
+/**
+ * die_get_call_lineno - Get callsite line number of inline-function instance
+ * @in_die: a DIE of an inlined function instance
+ *
+ * Get call-site line number of @in_die. This means from where the inline
+ * function is called.
+ */
+int die_get_call_lineno(Dwarf_Die *in_die)
+{
+ Dwarf_Attribute attr;
+ Dwarf_Word ret;
+
+ if (!dwarf_attr(in_die, DW_AT_call_line, &attr))
+ return -ENOENT;
+
+ dwarf_formudata(&attr, &ret);
+ return (int)ret;
+}
+
+/**
+ * die_get_type - Get type DIE
+ * @vr_die: a DIE of a variable
+ * @die_mem: where to store a type DIE
+ *
+ * Get a DIE of the type of given variable (@vr_die), and store
+ * it to die_mem. Return NULL if fails to get a type DIE.
+ */
+Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+ Dwarf_Attribute attr;
+
+ if (dwarf_attr_integrate(vr_die, DW_AT_type, &attr) &&
+ dwarf_formref_die(&attr, die_mem))
+ return die_mem;
+ else
+ return NULL;
+}
+
+/* Get a type die, but skip qualifiers */
+static Dwarf_Die *__die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+ int tag;
+
+ do {
+ vr_die = die_get_type(vr_die, die_mem);
+ if (!vr_die)
+ break;
+ tag = dwarf_tag(vr_die);
+ } while (tag == DW_TAG_const_type ||
+ tag == DW_TAG_restrict_type ||
+ tag == DW_TAG_volatile_type ||
+ tag == DW_TAG_shared_type);
+
+ return vr_die;
+}
+
+/**
+ * die_get_real_type - Get a type die, but skip qualifiers and typedef
+ * @vr_die: a DIE of a variable
+ * @die_mem: where to store a type DIE
+ *
+ * Get a DIE of the type of given variable (@vr_die), and store
+ * it to die_mem. Return NULL if fails to get a type DIE.
+ * If the type is qualifiers (e.g. const) or typedef, this skips it
+ * and tries to find real type (structure or basic types, e.g. int).
+ */
+Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem)
+{
+ do {
+ vr_die = __die_get_real_type(vr_die, die_mem);
+ } while (vr_die && dwarf_tag(vr_die) == DW_TAG_typedef);
+
+ return vr_die;
+}
+
+/* Get attribute and translate it as a udata */
+static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+ Dwarf_Word *result)
+{
+ Dwarf_Attribute attr;
+
+ if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
+ dwarf_formudata(&attr, result) != 0)
+ return -ENOENT;
+
+ return 0;
+}
+
+/* Get attribute and translate it as a sdata */
+static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
+ Dwarf_Sword *result)
+{
+ Dwarf_Attribute attr;
+
+ if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
+ dwarf_formsdata(&attr, result) != 0)
+ return -ENOENT;
+
+ return 0;
+}
+
+/**
+ * die_is_signed_type - Check whether a type DIE is signed or not
+ * @tp_die: a DIE of a type
+ *
+ * Get the encoding of @tp_die and return true if the encoding
+ * is signed.
+ */
+bool die_is_signed_type(Dwarf_Die *tp_die)
+{
+ Dwarf_Word ret;
+
+ if (die_get_attr_udata(tp_die, DW_AT_encoding, &ret))
+ return false;
+
+ return (ret == DW_ATE_signed_char || ret == DW_ATE_signed ||
+ ret == DW_ATE_signed_fixed);
+}
+
+/**
+ * die_is_func_def - Ensure that this DIE is a subprogram and definition
+ * @dw_die: a DIE
+ *
+ * Ensure that this DIE is a subprogram and NOT a declaration. This
+ * returns true if @dw_die is a function definition.
+ **/
+bool die_is_func_def(Dwarf_Die *dw_die)
+{
+ Dwarf_Attribute attr;
+
+ return (dwarf_tag(dw_die) == DW_TAG_subprogram &&
+ dwarf_attr(dw_die, DW_AT_declaration, &attr) == NULL);
+}
+
+/**
+ * die_entrypc - Returns entry PC (the lowest address) of a DIE
+ * @dw_die: a DIE
+ * @addr: where to store entry PC
+ *
+ * Since dwarf_entrypc() does not return entry PC if the DIE has only address
+ * range, we have to use this to retrieve the lowest address from the address
+ * range attribute.
+ */
+int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr)
+{
+ Dwarf_Addr base, end;
+ Dwarf_Attribute attr;
+
+ if (!addr)
+ return -EINVAL;
+
+ if (dwarf_entrypc(dw_die, addr) == 0)
+ return 0;
+
+ /*
+ * Since the dwarf_ranges() will return 0 if there is no
+ * DW_AT_ranges attribute, we should check it first.
+ */
+ if (!dwarf_attr(dw_die, DW_AT_ranges, &attr))
+ return -ENOENT;
+
+ return dwarf_ranges(dw_die, 0, &base, addr, &end) < 0 ? -ENOENT : 0;
+}
+
+/**
+ * die_is_func_instance - Ensure that this DIE is an instance of a subprogram
+ * @dw_die: a DIE
+ *
+ * Ensure that this DIE is an instance (which has an entry address).
+ * This returns true if @dw_die is a function instance. If not, the @dw_die
+ * must be a prototype. You can use die_walk_instances() to find actual
+ * instances.
+ **/
+bool die_is_func_instance(Dwarf_Die *dw_die)
+{
+ Dwarf_Addr tmp;
+ Dwarf_Attribute attr_mem;
+ int tag = dwarf_tag(dw_die);
+
+ if (tag != DW_TAG_subprogram &&
+ tag != DW_TAG_inlined_subroutine)
+ return false;
+
+ return dwarf_entrypc(dw_die, &tmp) == 0 ||
+ dwarf_attr(dw_die, DW_AT_ranges, &attr_mem) != NULL;
+}
+
+/**
+ * die_get_data_member_location - Get the data-member offset
+ * @mb_die: a DIE of a member of a data structure
+ * @offs: The offset of the member in the data structure
+ *
+ * Get the offset of @mb_die in the data structure including @mb_die, and
+ * stores result offset to @offs. If any error occurs this returns errno.
+ */
+int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
+{
+ Dwarf_Attribute attr;
+ Dwarf_Op *expr;
+ size_t nexpr;
+ int ret;
+
+ if (dwarf_attr(mb_die, DW_AT_data_member_location, &attr) == NULL)
+ return -ENOENT;
+
+ if (dwarf_formudata(&attr, offs) != 0) {
+ /* DW_AT_data_member_location should be DW_OP_plus_uconst */
+ ret = dwarf_getlocation(&attr, &expr, &nexpr);
+ if (ret < 0 || nexpr == 0)
+ return -ENOENT;
+
+ if (expr[0].atom != DW_OP_plus_uconst || nexpr != 1) {
+ pr_debug("Unable to get offset:Unexpected OP %x (%zd)\n",
+ expr[0].atom, nexpr);
+ return -ENOTSUP;
+ }
+ *offs = (Dwarf_Word)expr[0].number;
+ }
+ return 0;
+}
+
+/* Get the call file index number in CU DIE */
+static int die_get_call_fileno(Dwarf_Die *in_die)
+{
+ Dwarf_Sword idx;
+
+ if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0)
+ return (int)idx;
+ else
+ return -ENOENT;
+}
+
+/* Get the declared file index number in CU DIE */
+static int die_get_decl_fileno(Dwarf_Die *pdie)
+{
+ Dwarf_Sword idx;
+
+ if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0)
+ return (int)idx;
+ else
+ return -ENOENT;
+}
+
+/**
+ * die_get_call_file - Get callsite file name of inlined function instance
+ * @in_die: a DIE of an inlined function instance
+ *
+ * Get call-site file name of @in_die. This means from which file the inline
+ * function is called.
+ */
+const char *die_get_call_file(Dwarf_Die *in_die)
+{
+ Dwarf_Die cu_die;
+ Dwarf_Files *files;
+ int idx;
+
+ idx = die_get_call_fileno(in_die);
+ if (idx < 0 || !dwarf_diecu(in_die, &cu_die, NULL, NULL) ||
+ dwarf_getsrcfiles(&cu_die, &files, NULL) != 0)
+ return NULL;
+
+ return dwarf_filesrc(files, idx, NULL, NULL);
+}
+
+
+/**
+ * die_find_child - Generic DIE search function in DIE tree
+ * @rt_die: a root DIE
+ * @callback: a callback function
+ * @data: a user data passed to the callback function
+ * @die_mem: a buffer for result DIE
+ *
+ * Trace DIE tree from @rt_die and call @callback for each child DIE.
+ * If @callback returns DIE_FIND_CB_END, this stores the DIE into
+ * @die_mem and returns it. If @callback returns DIE_FIND_CB_CONTINUE,
+ * this continues to trace the tree. Optionally, @callback can return
+ * DIE_FIND_CB_CHILD and DIE_FIND_CB_SIBLING, those means trace only
+ * the children and trace only the siblings respectively.
+ * Returns NULL if @callback can't find any appropriate DIE.
+ */
+Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
+ int (*callback)(Dwarf_Die *, void *),
+ void *data, Dwarf_Die *die_mem)
+{
+ Dwarf_Die child_die;
+ int ret;
+
+ ret = dwarf_child(rt_die, die_mem);
+ if (ret != 0)
+ return NULL;
+
+ do {
+ ret = callback(die_mem, data);
+ if (ret == DIE_FIND_CB_END)
+ return die_mem;
+
+ if ((ret & DIE_FIND_CB_CHILD) &&
+ die_find_child(die_mem, callback, data, &child_die)) {
+ memcpy(die_mem, &child_die, sizeof(Dwarf_Die));
+ return die_mem;
+ }
+ } while ((ret & DIE_FIND_CB_SIBLING) &&
+ dwarf_siblingof(die_mem, die_mem) == 0);
+
+ return NULL;
+}
+
+struct __addr_die_search_param {
+ Dwarf_Addr addr;
+ Dwarf_Die *die_mem;
+};
+
+static int __die_search_func_tail_cb(Dwarf_Die *fn_die, void *data)
+{
+ struct __addr_die_search_param *ad = data;
+ Dwarf_Addr addr = 0;
+
+ if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
+ !dwarf_highpc(fn_die, &addr) &&
+ addr == ad->addr) {
+ memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
+ return DWARF_CB_ABORT;
+ }
+ return DWARF_CB_OK;
+}
+
+/**
+ * die_find_tailfunc - Search for a non-inlined function with tail call at
+ * given address
+ * @cu_die: a CU DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search for a non-inlined function DIE with tail call at @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem)
+{
+ struct __addr_die_search_param ad;
+ ad.addr = addr;
+ ad.die_mem = die_mem;
+ /* dwarf_getscopes can't find subprogram. */
+ if (!dwarf_getfuncs(cu_die, __die_search_func_tail_cb, &ad, 0))
+ return NULL;
+ else
+ return die_mem;
+}
+
+/* die_find callback for non-inlined function search */
+static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
+{
+ struct __addr_die_search_param *ad = data;
+
+ /*
+ * Since a declaration entry doesn't has given pc, this always returns
+ * function definition entry.
+ */
+ if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
+ dwarf_haspc(fn_die, ad->addr)) {
+ memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
+ return DWARF_CB_ABORT;
+ }
+ return DWARF_CB_OK;
+}
+
+/**
+ * die_find_realfunc - Search a non-inlined function at given address
+ * @cu_die: a CU DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search a non-inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ */
+Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem)
+{
+ struct __addr_die_search_param ad;
+ ad.addr = addr;
+ ad.die_mem = die_mem;
+ /* dwarf_getscopes can't find subprogram. */
+ if (!dwarf_getfuncs(cu_die, __die_search_func_cb, &ad, 0))
+ return NULL;
+ else
+ return die_mem;
+}
+
+/* die_find callback for inline function search */
+static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data)
+{
+ Dwarf_Addr *addr = data;
+
+ if (dwarf_tag(die_mem) == DW_TAG_inlined_subroutine &&
+ dwarf_haspc(die_mem, *addr))
+ return DIE_FIND_CB_END;
+
+ return DIE_FIND_CB_CONTINUE;
+}
+
+/**
+ * die_find_top_inlinefunc - Search the top inlined function at given address
+ * @sp_die: a subprogram DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search an inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ * Even if several inlined functions are expanded recursively, this
+ * doesn't trace it down, and returns the topmost one.
+ */
+Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem)
+{
+ return die_find_child(sp_die, __die_find_inline_cb, &addr, die_mem);
+}
+
+/**
+ * die_find_inlinefunc - Search an inlined function at given address
+ * @sp_die: a subprogram DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search an inlined function DIE which includes @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ * If several inlined functions are expanded recursively, this trace
+ * it down and returns deepest one.
+ */
+Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem)
+{
+ Dwarf_Die tmp_die;
+
+ sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr, &tmp_die);
+ if (!sp_die)
+ return NULL;
+
+ /* Inlined function could be recursive. Trace it until fail */
+ while (sp_die) {
+ memcpy(die_mem, sp_die, sizeof(Dwarf_Die));
+ sp_die = die_find_child(sp_die, __die_find_inline_cb, &addr,
+ &tmp_die);
+ }
+
+ return die_mem;
+}
+
+struct __instance_walk_param {
+ void *addr;
+ int (*callback)(Dwarf_Die *, void *);
+ void *data;
+ int retval;
+};
+
+static int __die_walk_instances_cb(Dwarf_Die *inst, void *data)
+{
+ struct __instance_walk_param *iwp = data;
+ Dwarf_Attribute attr_mem;
+ Dwarf_Die origin_mem;
+ Dwarf_Attribute *attr;
+ Dwarf_Die *origin;
+ int tmp;
+
+ if (!die_is_func_instance(inst))
+ return DIE_FIND_CB_CONTINUE;
+
+ attr = dwarf_attr(inst, DW_AT_abstract_origin, &attr_mem);
+ if (attr == NULL)
+ return DIE_FIND_CB_CONTINUE;
+
+ origin = dwarf_formref_die(attr, &origin_mem);
+ if (origin == NULL || origin->addr != iwp->addr)
+ return DIE_FIND_CB_CONTINUE;
+
+ /* Ignore redundant instances */
+ if (dwarf_tag(inst) == DW_TAG_inlined_subroutine) {
+ dwarf_decl_line(origin, &tmp);
+ if (die_get_call_lineno(inst) == tmp) {
+ tmp = die_get_decl_fileno(origin);
+ if (die_get_call_fileno(inst) == tmp)
+ return DIE_FIND_CB_CONTINUE;
+ }
+ }
+
+ iwp->retval = iwp->callback(inst, iwp->data);
+
+ return (iwp->retval) ? DIE_FIND_CB_END : DIE_FIND_CB_CONTINUE;
+}
+
+/**
+ * die_walk_instances - Walk on instances of given DIE
+ * @or_die: an abstract original DIE
+ * @callback: a callback function which is called with instance DIE
+ * @data: user data
+ *
+ * Walk on the instances of give @in_die. @in_die must be an inlined function
+ * declartion. This returns the return value of @callback if it returns
+ * non-zero value, or -ENOENT if there is no instance.
+ */
+int die_walk_instances(Dwarf_Die *or_die, int (*callback)(Dwarf_Die *, void *),
+ void *data)
+{
+ Dwarf_Die cu_die;
+ Dwarf_Die die_mem;
+ struct __instance_walk_param iwp = {
+ .addr = or_die->addr,
+ .callback = callback,
+ .data = data,
+ .retval = -ENOENT,
+ };
+
+ if (dwarf_diecu(or_die, &cu_die, NULL, NULL) == NULL)
+ return -ENOENT;
+
+ die_find_child(&cu_die, __die_walk_instances_cb, &iwp, &die_mem);
+
+ return iwp.retval;
+}
+
+/* Line walker internal parameters */
+struct __line_walk_param {
+ bool recursive;
+ line_walk_callback_t callback;
+ void *data;
+ int retval;
+};
+
+static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
+{
+ struct __line_walk_param *lw = data;
+ Dwarf_Addr addr = 0;
+ const char *fname;
+ int lineno;
+
+ if (dwarf_tag(in_die) == DW_TAG_inlined_subroutine) {
+ fname = die_get_call_file(in_die);
+ lineno = die_get_call_lineno(in_die);
+ if (fname && lineno > 0 && die_entrypc(in_die, &addr) == 0) {
+ lw->retval = lw->callback(fname, lineno, addr, lw->data);
+ if (lw->retval != 0)
+ return DIE_FIND_CB_END;
+ }
+ if (!lw->recursive)
+ return DIE_FIND_CB_SIBLING;
+ }
+
+ if (addr) {
+ fname = dwarf_decl_file(in_die);
+ if (fname && dwarf_decl_line(in_die, &lineno) == 0) {
+ lw->retval = lw->callback(fname, lineno, addr, lw->data);
+ if (lw->retval != 0)
+ return DIE_FIND_CB_END;
+ }
+ }
+
+ /* Continue to search nested inlined function call-sites */
+ return DIE_FIND_CB_CONTINUE;
+}
+
+/* Walk on lines of blocks included in given DIE */
+static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive,
+ line_walk_callback_t callback, void *data)
+{
+ struct __line_walk_param lw = {
+ .recursive = recursive,
+ .callback = callback,
+ .data = data,
+ .retval = 0,
+ };
+ Dwarf_Die die_mem;
+ Dwarf_Addr addr;
+ const char *fname;
+ int lineno;
+
+ /* Handle function declaration line */
+ fname = dwarf_decl_file(sp_die);
+ if (fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
+ die_entrypc(sp_die, &addr) == 0) {
+ lw.retval = callback(fname, lineno, addr, data);
+ if (lw.retval != 0)
+ goto done;
+ }
+ die_find_child(sp_die, __die_walk_funclines_cb, &lw, &die_mem);
+done:
+ return lw.retval;
+}
+
+static int __die_walk_culines_cb(Dwarf_Die *sp_die, void *data)
+{
+ struct __line_walk_param *lw = data;
+
+ /*
+ * Since inlined function can include another inlined function in
+ * the same file, we need to walk in it recursively.
+ */
+ lw->retval = __die_walk_funclines(sp_die, true, lw->callback, lw->data);
+ if (lw->retval != 0)
+ return DWARF_CB_ABORT;
+
+ return DWARF_CB_OK;
+}
+
+/**
+ * die_walk_lines - Walk on lines inside given DIE
+ * @rt_die: a root DIE (CU, subprogram or inlined_subroutine)
+ * @callback: callback routine
+ * @data: user data
+ *
+ * Walk on all lines inside given @rt_die and call @callback on each line.
+ * If the @rt_die is a function, walk only on the lines inside the function,
+ * otherwise @rt_die must be a CU DIE.
+ * Note that this walks not only dwarf line list, but also function entries
+ * and inline call-site.
+ */
+int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
+{
+ Dwarf_Lines *lines;
+ Dwarf_Line *line;
+ Dwarf_Addr addr;
+ const char *fname, *decf = NULL, *inf = NULL;
+ int lineno, ret = 0;
+ int decl = 0, inl;
+ Dwarf_Die die_mem, *cu_die;
+ size_t nlines, i;
+ bool flag;
+
+ /* Get the CU die */
+ if (dwarf_tag(rt_die) != DW_TAG_compile_unit) {
+ cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL);
+ dwarf_decl_line(rt_die, &decl);
+ decf = dwarf_decl_file(rt_die);
+ } else
+ cu_die = rt_die;
+ if (!cu_die) {
+ pr_debug2("Failed to get CU from given DIE.\n");
+ return -EINVAL;
+ }
+
+ /* Get lines list in the CU */
+ if (dwarf_getsrclines(cu_die, &lines, &nlines) != 0) {
+ pr_debug2("Failed to get source lines on this CU.\n");
+ return -ENOENT;
+ }
+ pr_debug2("Get %zd lines from this CU\n", nlines);
+
+ /* Walk on the lines on lines list */
+ for (i = 0; i < nlines; i++) {
+ line = dwarf_onesrcline(lines, i);
+ if (line == NULL ||
+ dwarf_lineno(line, &lineno) != 0 ||
+ dwarf_lineaddr(line, &addr) != 0) {
+ pr_debug2("Failed to get line info. "
+ "Possible error in debuginfo.\n");
+ continue;
+ }
+ /* Skip end-of-sequence */
+ if (dwarf_lineendsequence(line, &flag) != 0 || flag)
+ continue;
+ /* Skip Non statement line-info */
+ if (dwarf_linebeginstatement(line, &flag) != 0 || !flag)
+ continue;
+ /* Filter lines based on address */
+ if (rt_die != cu_die) {
+ /*
+ * Address filtering
+ * The line is included in given function, and
+ * no inline block includes it.
+ */
+ if (!dwarf_haspc(rt_die, addr))
+ continue;
+
+ if (die_find_inlinefunc(rt_die, addr, &die_mem)) {
+ /* Call-site check */
+ inf = die_get_call_file(&die_mem);
+ if ((inf && !strcmp(inf, decf)) &&
+ die_get_call_lineno(&die_mem) == lineno)
+ goto found;
+
+ dwarf_decl_line(&die_mem, &inl);
+ if (inl != decl ||
+ decf != dwarf_decl_file(&die_mem))
+ continue;
+ }
+ }
+found:
+ /* Get source line */
+ fname = dwarf_linesrc(line, NULL, NULL);
+
+ ret = callback(fname, lineno, addr, data);
+ if (ret != 0)
+ return ret;
+ }
+
+ /*
+ * Dwarf lines doesn't include function declarations and inlined
+ * subroutines. We have to check functions list or given function.
+ */
+ if (rt_die != cu_die)
+ /*
+ * Don't need walk inlined functions recursively, because
+ * inner inlined functions don't have the lines of the
+ * specified function.
+ */
+ ret = __die_walk_funclines(rt_die, false, callback, data);
+ else {
+ struct __line_walk_param param = {
+ .callback = callback,
+ .data = data,
+ .retval = 0,
+ };
+ dwarf_getfuncs(cu_die, __die_walk_culines_cb, &param, 0);
+ ret = param.retval;
+ }
+
+ return ret;
+}
+
+struct __find_variable_param {
+ const char *name;
+ Dwarf_Addr addr;
+};
+
+static int __die_find_variable_cb(Dwarf_Die *die_mem, void *data)
+{
+ struct __find_variable_param *fvp = data;
+ Dwarf_Attribute attr;
+ int tag;
+
+ tag = dwarf_tag(die_mem);
+ if ((tag == DW_TAG_formal_parameter ||
+ tag == DW_TAG_variable) &&
+ die_compare_name(die_mem, fvp->name) &&
+ /* Does the DIE have location information or external instance? */
+ (dwarf_attr(die_mem, DW_AT_external, &attr) ||
+ dwarf_attr(die_mem, DW_AT_location, &attr)))
+ return DIE_FIND_CB_END;
+ if (dwarf_haspc(die_mem, fvp->addr))
+ return DIE_FIND_CB_CONTINUE;
+ else
+ return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_find_variable_at - Find a given name variable at given address
+ * @sp_die: a function DIE
+ * @name: variable name
+ * @addr: address
+ * @die_mem: a buffer for result DIE
+ *
+ * Find a variable DIE called @name at @addr in @sp_die.
+ */
+Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+ Dwarf_Addr addr, Dwarf_Die *die_mem)
+{
+ struct __find_variable_param fvp = { .name = name, .addr = addr};
+
+ return die_find_child(sp_die, __die_find_variable_cb, (void *)&fvp,
+ die_mem);
+}
+
+static int __die_find_member_cb(Dwarf_Die *die_mem, void *data)
+{
+ const char *name = data;
+
+ if (dwarf_tag(die_mem) == DW_TAG_member) {
+ if (die_compare_name(die_mem, name))
+ return DIE_FIND_CB_END;
+ else if (!dwarf_diename(die_mem)) { /* Unnamed structure */
+ Dwarf_Die type_die, tmp_die;
+ if (die_get_type(die_mem, &type_die) &&
+ die_find_member(&type_die, name, &tmp_die))
+ return DIE_FIND_CB_END;
+ }
+ }
+ return DIE_FIND_CB_SIBLING;
+}
+
+/**
+ * die_find_member - Find a given name member in a data structure
+ * @st_die: a data structure type DIE
+ * @name: member name
+ * @die_mem: a buffer for result DIE
+ *
+ * Find a member DIE called @name in @st_die.
+ */
+Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
+ Dwarf_Die *die_mem)
+{
+ return die_find_child(st_die, __die_find_member_cb, (void *)name,
+ die_mem);
+}
+
+/**
+ * die_get_typename - Get the name of given variable DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for result type name
+ *
+ * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded.
+ * and Return -ENOENT if failed to find type name.
+ * Note that the result will stores typedef name if possible, and stores
+ * "*(function_type)" if the type is a function pointer.
+ */
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
+{
+ Dwarf_Die type;
+ int tag, ret;
+ const char *tmp = "";
+
+ if (__die_get_real_type(vr_die, &type) == NULL)
+ return -ENOENT;
+
+ tag = dwarf_tag(&type);
+ if (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)
+ tmp = "*";
+ else if (tag == DW_TAG_subroutine_type) {
+ /* Function pointer */
+ return strbuf_add(buf, "(function_type)", 15);
+ } else {
+ if (!dwarf_diename(&type))
+ return -ENOENT;
+ if (tag == DW_TAG_union_type)
+ tmp = "union ";
+ else if (tag == DW_TAG_structure_type)
+ tmp = "struct ";
+ else if (tag == DW_TAG_enumeration_type)
+ tmp = "enum ";
+ /* Write a base name */
+ return strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type));
+ }
+ ret = die_get_typename(&type, buf);
+ return ret ? ret : strbuf_addstr(buf, tmp);
+}
+
+/**
+ * die_get_varname - Get the name and type of given variable DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for type and variable name
+ *
+ * Get the name and type of @vr_die and stores it in @buf as "type\tname".
+ */
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
+{
+ int ret;
+
+ ret = die_get_typename(vr_die, buf);
+ if (ret < 0) {
+ pr_debug("Failed to get type, make it unknown.\n");
+ ret = strbuf_add(buf, " (unknown_type)", 14);
+ }
+
+ return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
+}
+
+#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT
+/**
+ * die_get_var_innermost_scope - Get innermost scope range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for variable byte offset range
+ *
+ * Get the innermost scope range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
+ struct strbuf *buf)
+{
+ Dwarf_Die *scopes;
+ int count;
+ size_t offset = 0;
+ Dwarf_Addr base;
+ Dwarf_Addr start, end;
+ Dwarf_Addr entry;
+ int ret;
+ bool first = true;
+ const char *name;
+
+ ret = die_entrypc(sp_die, &entry);
+ if (ret)
+ return ret;
+
+ name = dwarf_diename(sp_die);
+ if (!name)
+ return -ENOENT;
+
+ count = dwarf_getscopes_die(vr_die, &scopes);
+
+ /* (*SCOPES)[1] is the DIE for the scope containing that scope */
+ if (count <= 1) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ while ((offset = dwarf_ranges(&scopes[1], offset, &base,
+ &start, &end)) > 0) {
+ start -= entry;
+ end -= entry;
+
+ if (first) {
+ ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+ name, start, end);
+ first = false;
+ } else {
+ ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+ start, end);
+ }
+ if (ret < 0)
+ goto out;
+ }
+
+ if (!first)
+ ret = strbuf_add(buf, "]>", 2);
+
+out:
+ free(scopes);
+ return ret;
+}
+
+/**
+ * die_get_var_range - Get byte offset range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for type and variable name and byte offset range
+ *
+ * Get the byte offset range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf)
+{
+ int ret = 0;
+ Dwarf_Addr base;
+ Dwarf_Addr start, end;
+ Dwarf_Addr entry;
+ Dwarf_Op *op;
+ size_t nops;
+ size_t offset = 0;
+ Dwarf_Attribute attr;
+ bool first = true;
+ const char *name;
+
+ ret = die_entrypc(sp_die, &entry);
+ if (ret)
+ return ret;
+
+ name = dwarf_diename(sp_die);
+ if (!name)
+ return -ENOENT;
+
+ if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
+ return -EINVAL;
+
+ while ((offset = dwarf_getlocations(&attr, offset, &base,
+ &start, &end, &op, &nops)) > 0) {
+ if (start == 0) {
+ /* Single Location Descriptions */
+ ret = die_get_var_innermost_scope(sp_die, vr_die, buf);
+ goto out;
+ }
+
+ /* Location Lists */
+ start -= entry;
+ end -= entry;
+ if (first) {
+ ret = strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
+ name, start, end);
+ first = false;
+ } else {
+ ret = strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
+ start, end);
+ }
+ if (ret < 0)
+ goto out;
+ }
+
+ if (!first)
+ ret = strbuf_add(buf, "]>", 2);
+out:
+ return ret;
+}
+#else
+int die_get_var_range(Dwarf_Die *sp_die __maybe_unused,
+ Dwarf_Die *vr_die __maybe_unused,
+ struct strbuf *buf __maybe_unused)
+{
+ return -ENOTSUP;
+}
+#endif
+
+/*
+ * die_has_loclist - Check if DW_AT_location of @vr_die is a location list
+ * @vr_die: a variable DIE
+ */
+static bool die_has_loclist(Dwarf_Die *vr_die)
+{
+ Dwarf_Attribute loc;
+ int tag = dwarf_tag(vr_die);
+
+ if (tag != DW_TAG_formal_parameter &&
+ tag != DW_TAG_variable)
+ return false;
+
+ return (dwarf_attr_integrate(vr_die, DW_AT_location, &loc) &&
+ dwarf_whatform(&loc) == DW_FORM_sec_offset);
+}
+
+/*
+ * die_is_optimized_target - Check if target program is compiled with
+ * optimization
+ * @cu_die: a CU DIE
+ *
+ * For any object in given CU whose DW_AT_location is a location list,
+ * target program is compiled with optimization. This is applicable to
+ * clang as well.
+ */
+bool die_is_optimized_target(Dwarf_Die *cu_die)
+{
+ Dwarf_Die tmp_die;
+
+ if (die_has_loclist(cu_die))
+ return true;
+
+ if (!dwarf_child(cu_die, &tmp_die) &&
+ die_is_optimized_target(&tmp_die))
+ return true;
+
+ if (!dwarf_siblingof(cu_die, &tmp_die) &&
+ die_is_optimized_target(&tmp_die))
+ return true;
+
+ return false;
+}
+
+/*
+ * die_search_idx - Search index of given line address
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @addr: address we are looking for
+ * @idx: index to be set by this function (return value)
+ *
+ * Search for @addr by looping over every lines of CU. If address
+ * matches, set index of that line in @idx. Note that single source
+ * line can have multiple line records. i.e. single source line can
+ * have multiple index.
+ */
+static bool die_search_idx(Dwarf_Lines *lines, unsigned long nr_lines,
+ Dwarf_Addr addr, unsigned long *idx)
+{
+ unsigned long i;
+ Dwarf_Addr tmp;
+
+ for (i = 0; i < nr_lines; i++) {
+ if (dwarf_lineaddr(dwarf_onesrcline(lines, i), &tmp))
+ return false;
+
+ if (tmp == addr) {
+ *idx = i;
+ return true;
+ }
+ }
+ return false;
+}
+
+/*
+ * die_get_postprologue_addr - Search next address after function prologue
+ * @entrypc_idx: entrypc index
+ * @lines: Line records of single CU
+ * @nr_lines: Number of @lines
+ * @hignpc: high PC address of function
+ * @postprologue_addr: Next address after function prologue (return value)
+ *
+ * Look for prologue-end marker. If there is no explicit marker, return
+ * address of next line record or next source line.
+ */
+static bool die_get_postprologue_addr(unsigned long entrypc_idx,
+ Dwarf_Lines *lines,
+ unsigned long nr_lines,
+ Dwarf_Addr highpc,
+ Dwarf_Addr *postprologue_addr)
+{
+ unsigned long i;
+ int entrypc_lno, lno;
+ Dwarf_Line *line;
+ Dwarf_Addr addr;
+ bool p_end;
+
+ /* entrypc_lno is actual source line number */
+ line = dwarf_onesrcline(lines, entrypc_idx);
+ if (dwarf_lineno(line, &entrypc_lno))
+ return false;
+
+ for (i = entrypc_idx; i < nr_lines; i++) {
+ line = dwarf_onesrcline(lines, i);
+
+ if (dwarf_lineaddr(line, &addr) ||
+ dwarf_lineno(line, &lno) ||
+ dwarf_lineprologueend(line, &p_end))
+ return false;
+
+ /* highpc is exclusive. [entrypc,highpc) */
+ if (addr >= highpc)
+ break;
+
+ /* clang supports prologue-end marker */
+ if (p_end)
+ break;
+
+ /* Actual next line in source */
+ if (lno != entrypc_lno)
+ break;
+
+ /*
+ * Single source line can have multiple line records.
+ * For Example,
+ * void foo() { printf("hello\n"); }
+ * contains two line records. One points to declaration and
+ * other points to printf() line. Variable 'lno' won't get
+ * incremented in this case but 'i' will.
+ */
+ if (i != entrypc_idx)
+ break;
+ }
+
+ dwarf_lineaddr(line, postprologue_addr);
+ if (*postprologue_addr >= highpc)
+ dwarf_lineaddr(dwarf_onesrcline(lines, i - 1),
+ postprologue_addr);
+
+ return true;
+}
+
+/*
+ * die_skip_prologue - Use next address after prologue as probe location
+ * @sp_die: a subprogram DIE
+ * @cu_die: a CU DIE
+ * @entrypc: entrypc of the function
+ *
+ * Function prologue prepares stack and registers before executing function
+ * logic. When target program is compiled without optimization, function
+ * parameter information is only valid after prologue. When we probe entrypc
+ * of the function, and try to record function parameter, it contains
+ * garbage value.
+ */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+ Dwarf_Addr *entrypc)
+{
+ size_t nr_lines = 0;
+ unsigned long entrypc_idx = 0;
+ Dwarf_Lines *lines = NULL;
+ Dwarf_Addr postprologue_addr;
+ Dwarf_Addr highpc;
+
+ if (dwarf_highpc(sp_die, &highpc))
+ return;
+
+ if (dwarf_getsrclines(cu_die, &lines, &nr_lines))
+ return;
+
+ if (!die_search_idx(lines, nr_lines, *entrypc, &entrypc_idx))
+ return;
+
+ if (!die_get_postprologue_addr(entrypc_idx, lines, nr_lines,
+ highpc, &postprologue_addr))
+ return;
+
+ *entrypc = postprologue_addr;
+}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
new file mode 100644
index 000000000..ee15fac4e
--- /dev/null
+++ b/tools/perf/util/dwarf-aux.h
@@ -0,0 +1,142 @@
+#ifndef _DWARF_AUX_H
+#define _DWARF_AUX_H
+/*
+ * dwarf-aux.h : libdw auxiliary interfaces
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <dwarf.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <elfutils/version.h>
+
+/* Find the realpath of the target file */
+const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname);
+
+/* Get DW_AT_comp_dir (should be NULL with older gcc) */
+const char *cu_get_comp_dir(Dwarf_Die *cu_die);
+
+/* Get a line number and file name for given address */
+int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
+ const char **fname, int *lineno);
+
+/* Walk on funcitons at given address */
+int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ int (*callback)(Dwarf_Die *, void *), void *data);
+
+/* Get DW_AT_linkage_name (should be NULL for C binary) */
+const char *die_get_linkage_name(Dwarf_Die *dw_die);
+
+/* Get the lowest PC in DIE (including range list) */
+int die_entrypc(Dwarf_Die *dw_die, Dwarf_Addr *addr);
+
+/* Ensure that this DIE is a subprogram and definition (not declaration) */
+bool die_is_func_def(Dwarf_Die *dw_die);
+
+/* Ensure that this DIE is an instance of a subprogram */
+bool die_is_func_instance(Dwarf_Die *dw_die);
+
+/* Compare diename and tname */
+bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
+
+/* Matching diename with glob pattern */
+bool die_match_name(Dwarf_Die *dw_die, const char *glob);
+
+/* Get callsite line number of inline-function instance */
+int die_get_call_lineno(Dwarf_Die *in_die);
+
+/* Get callsite file name of inlined function instance */
+const char *die_get_call_file(Dwarf_Die *in_die);
+
+/* Get type die */
+Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+
+/* Get a type die, but skip qualifiers and typedef */
+Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+
+/* Check whether the DIE is signed or not */
+bool die_is_signed_type(Dwarf_Die *tp_die);
+
+/* Get data_member_location offset */
+int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs);
+
+/* Return values for die_find_child() callbacks */
+enum {
+ DIE_FIND_CB_END = 0, /* End of Search */
+ DIE_FIND_CB_CHILD = 1, /* Search only children */
+ DIE_FIND_CB_SIBLING = 2, /* Search only siblings */
+ DIE_FIND_CB_CONTINUE = 3, /* Search children and siblings */
+};
+
+/* Search child DIEs */
+Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
+ int (*callback)(Dwarf_Die *, void *),
+ void *data, Dwarf_Die *die_mem);
+
+/* Search a non-inlined function including given address */
+Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem);
+
+/* Search a non-inlined function with tail call at given address */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem);
+
+/* Search the top inlined function including given address */
+Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem);
+
+/* Search the deepest inlined function including given address */
+Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem);
+
+/* Walk on the instances of given DIE */
+int die_walk_instances(Dwarf_Die *in_die,
+ int (*callback)(Dwarf_Die *, void *), void *data);
+
+/* Walker on lines (Note: line number will not be sorted) */
+typedef int (* line_walk_callback_t) (const char *fname, int lineno,
+ Dwarf_Addr addr, void *data);
+
+/*
+ * Walk on lines inside given DIE. If the DIE is a subprogram, walk only on
+ * the lines inside the subprogram, otherwise the DIE must be a CU DIE.
+ */
+int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data);
+
+/* Find a variable called 'name' at given address */
+Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+ Dwarf_Addr addr, Dwarf_Die *die_mem);
+
+/* Find a member called 'name' */
+Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
+ Dwarf_Die *die_mem);
+
+/* Get the name of given variable DIE */
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
+
+/* Get the name and type of given variable DIE, stored as "type\tname" */
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
+
+/* Check if target program is compiled with optimization */
+bool die_is_optimized_target(Dwarf_Die *cu_die);
+
+/* Use next address after prologue as probe location */
+void die_skip_prologue(Dwarf_Die *sp_die, Dwarf_Die *cu_die,
+ Dwarf_Addr *entrypc);
+
+#endif
diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c
new file mode 100644
index 000000000..db55eddce
--- /dev/null
+++ b/tools/perf/util/dwarf-regs.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
+ *
+ * Written by: Masami Hiramatsu <mhiramat@kernel.org>
+ */
+
+#include <util.h>
+#include <debug.h>
+#include <dwarf-regs.h>
+#include <elf.h>
+#include <linux/kernel.h>
+
+#ifndef EM_AARCH64
+#define EM_AARCH64 183 /* ARM 64 bit */
+#endif
+
+/* Define const char * {arch}_register_tbl[] */
+#define DEFINE_DWARF_REGSTR_TABLE
+#include "../arch/x86/include/dwarf-regs-table.h"
+#include "../arch/arm/include/dwarf-regs-table.h"
+#include "../arch/arm64/include/dwarf-regs-table.h"
+#include "../arch/sh/include/dwarf-regs-table.h"
+#include "../arch/powerpc/include/dwarf-regs-table.h"
+#include "../arch/s390/include/dwarf-regs-table.h"
+#include "../arch/sparc/include/dwarf-regs-table.h"
+#include "../arch/xtensa/include/dwarf-regs-table.h"
+
+#define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL)
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine)
+{
+ switch (machine) {
+ case EM_NONE: /* Generic arch - use host arch */
+ return get_arch_regstr(n);
+ case EM_386:
+ return __get_dwarf_regstr(x86_32_regstr_tbl, n);
+ case EM_X86_64:
+ return __get_dwarf_regstr(x86_64_regstr_tbl, n);
+ case EM_ARM:
+ return __get_dwarf_regstr(arm_regstr_tbl, n);
+ case EM_AARCH64:
+ return __get_dwarf_regstr(aarch64_regstr_tbl, n);
+ case EM_SH:
+ return __get_dwarf_regstr(sh_regstr_tbl, n);
+ case EM_S390:
+ return __get_dwarf_regstr(s390_regstr_tbl, n);
+ case EM_PPC:
+ case EM_PPC64:
+ return __get_dwarf_regstr(powerpc_regstr_tbl, n);
+ case EM_SPARC:
+ case EM_SPARCV9:
+ return __get_dwarf_regstr(sparc_regstr_tbl, n);
+ case EM_XTENSA:
+ return __get_dwarf_regstr(xtensa_regstr_tbl, n);
+ default:
+ pr_err("ELF MACHINE %x is not supported.\n", machine);
+ }
+ return NULL;
+}
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
new file mode 100644
index 000000000..4c23779e2
--- /dev/null
+++ b/tools/perf/util/env.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cpumap.h"
+#include "env.h"
+#include "sane_ctype.h"
+#include "util.h"
+#include <errno.h>
+#include <sys/utsname.h>
+
+struct perf_env perf_env;
+
+void perf_env__exit(struct perf_env *env)
+{
+ int i;
+
+ zfree(&env->hostname);
+ zfree(&env->os_release);
+ zfree(&env->version);
+ zfree(&env->arch);
+ zfree(&env->cpu_desc);
+ zfree(&env->cpuid);
+ zfree(&env->cmdline);
+ zfree(&env->cmdline_argv);
+ zfree(&env->sibling_cores);
+ zfree(&env->sibling_threads);
+ zfree(&env->pmu_mappings);
+ zfree(&env->cpu);
+
+ for (i = 0; i < env->nr_numa_nodes; i++)
+ cpu_map__put(env->numa_nodes[i].map);
+ zfree(&env->numa_nodes);
+
+ for (i = 0; i < env->caches_cnt; i++)
+ cpu_cache_level__free(&env->caches[i]);
+ zfree(&env->caches);
+
+ for (i = 0; i < env->nr_memory_nodes; i++)
+ free(env->memory_nodes[i].set);
+ zfree(&env->memory_nodes);
+}
+
+int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[])
+{
+ int i;
+
+ /* do not include NULL termination */
+ env->cmdline_argv = calloc(argc, sizeof(char *));
+ if (env->cmdline_argv == NULL)
+ goto out_enomem;
+
+ /*
+ * Must copy argv contents because it gets moved around during option
+ * parsing:
+ */
+ for (i = 0; i < argc ; i++) {
+ env->cmdline_argv[i] = argv[i];
+ if (env->cmdline_argv[i] == NULL)
+ goto out_free;
+ }
+
+ env->nr_cmdline = argc;
+
+ return 0;
+out_free:
+ zfree(&env->cmdline_argv);
+out_enomem:
+ return -ENOMEM;
+}
+
+int perf_env__read_cpu_topology_map(struct perf_env *env)
+{
+ int cpu, nr_cpus;
+
+ if (env->cpu != NULL)
+ return 0;
+
+ if (env->nr_cpus_avail == 0)
+ env->nr_cpus_avail = cpu__max_present_cpu();
+
+ nr_cpus = env->nr_cpus_avail;
+ if (nr_cpus == -1)
+ return -EINVAL;
+
+ env->cpu = calloc(nr_cpus, sizeof(env->cpu[0]));
+ if (env->cpu == NULL)
+ return -ENOMEM;
+
+ for (cpu = 0; cpu < nr_cpus; ++cpu) {
+ env->cpu[cpu].core_id = cpu_map__get_core_id(cpu);
+ env->cpu[cpu].socket_id = cpu_map__get_socket_id(cpu);
+ }
+
+ env->nr_cpus_avail = nr_cpus;
+ return 0;
+}
+
+static int perf_env__read_arch(struct perf_env *env)
+{
+ struct utsname uts;
+
+ if (env->arch)
+ return 0;
+
+ if (!uname(&uts))
+ env->arch = strdup(uts.machine);
+
+ return env->arch ? 0 : -ENOMEM;
+}
+
+static int perf_env__read_nr_cpus_avail(struct perf_env *env)
+{
+ if (env->nr_cpus_avail == 0)
+ env->nr_cpus_avail = cpu__max_present_cpu();
+
+ return env->nr_cpus_avail ? 0 : -ENOENT;
+}
+
+const char *perf_env__raw_arch(struct perf_env *env)
+{
+ return env && !perf_env__read_arch(env) ? env->arch : "unknown";
+}
+
+int perf_env__nr_cpus_avail(struct perf_env *env)
+{
+ return env && !perf_env__read_nr_cpus_avail(env) ? env->nr_cpus_avail : 0;
+}
+
+void cpu_cache_level__free(struct cpu_cache_level *cache)
+{
+ free(cache->type);
+ free(cache->map);
+ free(cache->size);
+}
+
+/*
+ * Return architecture name in a normalized form.
+ * The conversion logic comes from the Makefile.
+ */
+static const char *normalize_arch(char *arch)
+{
+ if (!strcmp(arch, "x86_64"))
+ return "x86";
+ if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
+ return "x86";
+ if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
+ return "sparc";
+ if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64"))
+ return "arm64";
+ if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
+ return "arm";
+ if (!strncmp(arch, "s390", 4))
+ return "s390";
+ if (!strncmp(arch, "parisc", 6))
+ return "parisc";
+ if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
+ return "powerpc";
+ if (!strncmp(arch, "mips", 4))
+ return "mips";
+ if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
+ return "sh";
+
+ return arch;
+}
+
+const char *perf_env__arch(struct perf_env *env)
+{
+ struct utsname uts;
+ char *arch_name;
+
+ if (!env || !env->arch) { /* Assume local operation */
+ if (uname(&uts) < 0)
+ return NULL;
+ arch_name = uts.machine;
+ } else
+ arch_name = env->arch;
+
+ return normalize_arch(arch_name);
+}
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
new file mode 100644
index 000000000..1f3ccc368
--- /dev/null
+++ b/tools/perf/util/env.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ENV_H
+#define __PERF_ENV_H
+
+#include <linux/types.h>
+#include "cpumap.h"
+
+struct cpu_topology_map {
+ int socket_id;
+ int core_id;
+};
+
+struct cpu_cache_level {
+ u32 level;
+ u32 line_size;
+ u32 sets;
+ u32 ways;
+ char *type;
+ char *size;
+ char *map;
+};
+
+struct numa_node {
+ u32 node;
+ u64 mem_total;
+ u64 mem_free;
+ struct cpu_map *map;
+};
+
+struct memory_node {
+ u64 node;
+ u64 size;
+ unsigned long *set;
+};
+
+struct perf_env {
+ char *hostname;
+ char *os_release;
+ char *version;
+ char *arch;
+ int nr_cpus_online;
+ int nr_cpus_avail;
+ char *cpu_desc;
+ char *cpuid;
+ unsigned long long total_mem;
+ unsigned int msr_pmu_type;
+
+ int nr_cmdline;
+ int nr_sibling_cores;
+ int nr_sibling_threads;
+ int nr_numa_nodes;
+ int nr_memory_nodes;
+ int nr_pmu_mappings;
+ int nr_groups;
+ char *cmdline;
+ const char **cmdline_argv;
+ char *sibling_cores;
+ char *sibling_threads;
+ char *pmu_mappings;
+ struct cpu_topology_map *cpu;
+ struct cpu_cache_level *caches;
+ int caches_cnt;
+ struct numa_node *numa_nodes;
+ struct memory_node *memory_nodes;
+ unsigned long long memory_bsize;
+};
+
+extern struct perf_env perf_env;
+
+void perf_env__exit(struct perf_env *env);
+
+int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]);
+
+int perf_env__read_cpu_topology_map(struct perf_env *env);
+
+void cpu_cache_level__free(struct cpu_cache_level *cache);
+
+const char *perf_env__arch(struct perf_env *env);
+const char *perf_env__raw_arch(struct perf_env *env);
+int perf_env__nr_cpus_avail(struct perf_env *env);
+
+#endif /* __PERF_ENV_H */
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
new file mode 100644
index 000000000..538c935b4
--- /dev/null
+++ b/tools/perf/util/event.c
@@ -0,0 +1,1718 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include <api/fs/fs.h>
+#include <linux/perf_event.h>
+#include "event.h"
+#include "debug.h"
+#include "hist.h"
+#include "machine.h"
+#include "sort.h"
+#include "string2.h"
+#include "strlist.h"
+#include "thread.h"
+#include "thread_map.h"
+#include "sane_ctype.h"
+#include "symbol/kallsyms.h"
+#include "asm/bug.h"
+#include "stat.h"
+
+static const char *perf_event__names[] = {
+ [0] = "TOTAL",
+ [PERF_RECORD_MMAP] = "MMAP",
+ [PERF_RECORD_MMAP2] = "MMAP2",
+ [PERF_RECORD_LOST] = "LOST",
+ [PERF_RECORD_COMM] = "COMM",
+ [PERF_RECORD_EXIT] = "EXIT",
+ [PERF_RECORD_THROTTLE] = "THROTTLE",
+ [PERF_RECORD_UNTHROTTLE] = "UNTHROTTLE",
+ [PERF_RECORD_FORK] = "FORK",
+ [PERF_RECORD_READ] = "READ",
+ [PERF_RECORD_SAMPLE] = "SAMPLE",
+ [PERF_RECORD_AUX] = "AUX",
+ [PERF_RECORD_ITRACE_START] = "ITRACE_START",
+ [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES",
+ [PERF_RECORD_SWITCH] = "SWITCH",
+ [PERF_RECORD_SWITCH_CPU_WIDE] = "SWITCH_CPU_WIDE",
+ [PERF_RECORD_NAMESPACES] = "NAMESPACES",
+ [PERF_RECORD_HEADER_ATTR] = "ATTR",
+ [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
+ [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
+ [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID",
+ [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND",
+ [PERF_RECORD_ID_INDEX] = "ID_INDEX",
+ [PERF_RECORD_AUXTRACE_INFO] = "AUXTRACE_INFO",
+ [PERF_RECORD_AUXTRACE] = "AUXTRACE",
+ [PERF_RECORD_AUXTRACE_ERROR] = "AUXTRACE_ERROR",
+ [PERF_RECORD_THREAD_MAP] = "THREAD_MAP",
+ [PERF_RECORD_CPU_MAP] = "CPU_MAP",
+ [PERF_RECORD_STAT_CONFIG] = "STAT_CONFIG",
+ [PERF_RECORD_STAT] = "STAT",
+ [PERF_RECORD_STAT_ROUND] = "STAT_ROUND",
+ [PERF_RECORD_EVENT_UPDATE] = "EVENT_UPDATE",
+ [PERF_RECORD_TIME_CONV] = "TIME_CONV",
+ [PERF_RECORD_HEADER_FEATURE] = "FEATURE",
+};
+
+static const char *perf_ns__names[] = {
+ [NET_NS_INDEX] = "net",
+ [UTS_NS_INDEX] = "uts",
+ [IPC_NS_INDEX] = "ipc",
+ [PID_NS_INDEX] = "pid",
+ [USER_NS_INDEX] = "user",
+ [MNT_NS_INDEX] = "mnt",
+ [CGROUP_NS_INDEX] = "cgroup",
+};
+
+const char *perf_event__name(unsigned int id)
+{
+ if (id >= ARRAY_SIZE(perf_event__names))
+ return "INVALID";
+ if (!perf_event__names[id])
+ return "UNKNOWN";
+ return perf_event__names[id];
+}
+
+static const char *perf_ns__name(unsigned int id)
+{
+ if (id >= ARRAY_SIZE(perf_ns__names))
+ return "UNKNOWN";
+ return perf_ns__names[id];
+}
+
+int perf_tool__process_synth_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct machine *machine,
+ perf_event__handler_t process)
+{
+ struct perf_sample synth_sample = {
+ .pid = -1,
+ .tid = -1,
+ .time = -1,
+ .stream_id = -1,
+ .cpu = -1,
+ .period = 1,
+ .cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK,
+ };
+
+ return process(tool, event, &synth_sample, machine);
+};
+
+/*
+ * Assumes that the first 4095 bytes of /proc/pid/stat contains
+ * the comm, tgid and ppid.
+ */
+static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
+ pid_t *tgid, pid_t *ppid)
+{
+ char filename[PATH_MAX];
+ char bf[4096];
+ int fd;
+ size_t size = 0;
+ ssize_t n;
+ char *name, *tgids, *ppids;
+
+ *tgid = -1;
+ *ppid = -1;
+
+ snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0) {
+ pr_debug("couldn't open %s\n", filename);
+ return -1;
+ }
+
+ n = read(fd, bf, sizeof(bf) - 1);
+ close(fd);
+ if (n <= 0) {
+ pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n",
+ pid);
+ return -1;
+ }
+ bf[n] = '\0';
+
+ name = strstr(bf, "Name:");
+ tgids = strstr(bf, "Tgid:");
+ ppids = strstr(bf, "PPid:");
+
+ if (name) {
+ char *nl;
+
+ name += 5; /* strlen("Name:") */
+ name = ltrim(name);
+
+ nl = strchr(name, '\n');
+ if (nl)
+ *nl = '\0';
+
+ size = strlen(name);
+ if (size >= len)
+ size = len - 1;
+ memcpy(comm, name, size);
+ comm[size] = '\0';
+ } else {
+ pr_debug("Name: string not found for pid %d\n", pid);
+ }
+
+ if (tgids) {
+ tgids += 5; /* strlen("Tgid:") */
+ *tgid = atoi(tgids);
+ } else {
+ pr_debug("Tgid: string not found for pid %d\n", pid);
+ }
+
+ if (ppids) {
+ ppids += 5; /* strlen("PPid:") */
+ *ppid = atoi(ppids);
+ } else {
+ pr_debug("PPid: string not found for pid %d\n", pid);
+ }
+
+ return 0;
+}
+
+static int perf_event__prepare_comm(union perf_event *event, pid_t pid,
+ struct machine *machine,
+ pid_t *tgid, pid_t *ppid)
+{
+ size_t size;
+
+ *ppid = -1;
+
+ memset(&event->comm, 0, sizeof(event->comm));
+
+ if (machine__is_host(machine)) {
+ if (perf_event__get_comm_ids(pid, event->comm.comm,
+ sizeof(event->comm.comm),
+ tgid, ppid) != 0) {
+ return -1;
+ }
+ } else {
+ *tgid = machine->pid;
+ }
+
+ if (*tgid < 0)
+ return -1;
+
+ event->comm.pid = *tgid;
+ event->comm.header.type = PERF_RECORD_COMM;
+
+ size = strlen(event->comm.comm) + 1;
+ size = PERF_ALIGN(size, sizeof(u64));
+ memset(event->comm.comm + size, 0, machine->id_hdr_size);
+ event->comm.header.size = (sizeof(event->comm) -
+ (sizeof(event->comm.comm) - size) +
+ machine->id_hdr_size);
+ event->comm.tid = pid;
+
+ return 0;
+}
+
+pid_t perf_event__synthesize_comm(struct perf_tool *tool,
+ union perf_event *event, pid_t pid,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ pid_t tgid, ppid;
+
+ if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0)
+ return -1;
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+ return -1;
+
+ return tgid;
+}
+
+static void perf_event__get_ns_link_info(pid_t pid, const char *ns,
+ struct perf_ns_link_info *ns_link_info)
+{
+ struct stat64 st;
+ char proc_ns[128];
+
+ sprintf(proc_ns, "/proc/%u/ns/%s", pid, ns);
+ if (stat64(proc_ns, &st) == 0) {
+ ns_link_info->dev = st.st_dev;
+ ns_link_info->ino = st.st_ino;
+ }
+}
+
+int perf_event__synthesize_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ pid_t pid, pid_t tgid,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ u32 idx;
+ struct perf_ns_link_info *ns_link_info;
+
+ if (!tool || !tool->namespace_events)
+ return 0;
+
+ memset(&event->namespaces, 0, (sizeof(event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size));
+
+ event->namespaces.pid = tgid;
+ event->namespaces.tid = pid;
+
+ event->namespaces.nr_namespaces = NR_NAMESPACES;
+
+ ns_link_info = event->namespaces.link_info;
+
+ for (idx = 0; idx < event->namespaces.nr_namespaces; idx++)
+ perf_event__get_ns_link_info(pid, perf_ns__name(idx),
+ &ns_link_info[idx]);
+
+ event->namespaces.header.type = PERF_RECORD_NAMESPACES;
+
+ event->namespaces.header.size = (sizeof(event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+ return -1;
+
+ return 0;
+}
+
+static int perf_event__synthesize_fork(struct perf_tool *tool,
+ union perf_event *event,
+ pid_t pid, pid_t tgid, pid_t ppid,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size);
+
+ /*
+ * for main thread set parent to ppid from status file. For other
+ * threads set parent pid to main thread. ie., assume main thread
+ * spawns all threads in a process
+ */
+ if (tgid == pid) {
+ event->fork.ppid = ppid;
+ event->fork.ptid = ppid;
+ } else {
+ event->fork.ppid = tgid;
+ event->fork.ptid = tgid;
+ }
+ event->fork.pid = tgid;
+ event->fork.tid = pid;
+ event->fork.header.type = PERF_RECORD_FORK;
+
+ event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
+ return -1;
+
+ return 0;
+}
+
+int perf_event__synthesize_mmap_events(struct perf_tool *tool,
+ union perf_event *event,
+ pid_t pid, pid_t tgid,
+ perf_event__handler_t process,
+ struct machine *machine,
+ bool mmap_data,
+ unsigned int proc_map_timeout)
+{
+ char filename[PATH_MAX];
+ FILE *fp;
+ unsigned long long t;
+ bool truncation = false;
+ unsigned long long timeout = proc_map_timeout * 1000000ULL;
+ int rc = 0;
+ const char *hugetlbfs_mnt = hugetlbfs__mountpoint();
+ int hugetlbfs_mnt_len = hugetlbfs_mnt ? strlen(hugetlbfs_mnt) : 0;
+
+ if (machine__is_default_guest(machine))
+ return 0;
+
+ snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps",
+ machine->root_dir, pid, pid);
+
+ fp = fopen(filename, "r");
+ if (fp == NULL) {
+ /*
+ * We raced with a task exiting - just return:
+ */
+ pr_debug("couldn't open %s\n", filename);
+ return -1;
+ }
+
+ event->header.type = PERF_RECORD_MMAP2;
+ t = rdclock();
+
+ while (1) {
+ char bf[BUFSIZ];
+ char prot[5];
+ char execname[PATH_MAX];
+ char anonstr[] = "//anon";
+ unsigned int ino;
+ size_t size;
+ ssize_t n;
+
+ if (fgets(bf, sizeof(bf), fp) == NULL)
+ break;
+
+ if ((rdclock() - t) > timeout) {
+ pr_warning("Reading %s time out. "
+ "You may want to increase "
+ "the time limit by --proc-map-timeout\n",
+ filename);
+ truncation = true;
+ goto out;
+ }
+
+ /* ensure null termination since stack will be reused. */
+ strcpy(execname, "");
+
+ /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
+ n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %[^\n]\n",
+ &event->mmap2.start, &event->mmap2.len, prot,
+ &event->mmap2.pgoff, &event->mmap2.maj,
+ &event->mmap2.min,
+ &ino, execname);
+
+ /*
+ * Anon maps don't have the execname.
+ */
+ if (n < 7)
+ continue;
+
+ event->mmap2.ino = (u64)ino;
+
+ /*
+ * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
+ */
+ if (machine__is_host(machine))
+ event->header.misc = PERF_RECORD_MISC_USER;
+ else
+ event->header.misc = PERF_RECORD_MISC_GUEST_USER;
+
+ /* map protection and flags bits */
+ event->mmap2.prot = 0;
+ event->mmap2.flags = 0;
+ if (prot[0] == 'r')
+ event->mmap2.prot |= PROT_READ;
+ if (prot[1] == 'w')
+ event->mmap2.prot |= PROT_WRITE;
+ if (prot[2] == 'x')
+ event->mmap2.prot |= PROT_EXEC;
+
+ if (prot[3] == 's')
+ event->mmap2.flags |= MAP_SHARED;
+ else
+ event->mmap2.flags |= MAP_PRIVATE;
+
+ if (prot[2] != 'x') {
+ if (!mmap_data || prot[0] != 'r')
+ continue;
+
+ event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
+ }
+
+out:
+ if (truncation)
+ event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
+
+ if (!strcmp(execname, ""))
+ strcpy(execname, anonstr);
+
+ if (hugetlbfs_mnt_len &&
+ !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
+ strcpy(execname, anonstr);
+ event->mmap2.flags |= MAP_HUGETLB;
+ }
+
+ size = strlen(execname) + 1;
+ memcpy(event->mmap2.filename, execname, size);
+ size = PERF_ALIGN(size, sizeof(u64));
+ event->mmap2.len -= event->mmap.start;
+ event->mmap2.header.size = (sizeof(event->mmap2) -
+ (sizeof(event->mmap2.filename) - size));
+ memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
+ event->mmap2.header.size += machine->id_hdr_size;
+ event->mmap2.pid = tgid;
+ event->mmap2.tid = pid;
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
+ rc = -1;
+ break;
+ }
+
+ if (truncation)
+ break;
+ }
+
+ fclose(fp);
+ return rc;
+}
+
+int perf_event__synthesize_modules(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ int rc = 0;
+ struct map *pos;
+ struct maps *maps = machine__kernel_maps(machine);
+ union perf_event *event = zalloc((sizeof(event->mmap) +
+ machine->id_hdr_size));
+ if (event == NULL) {
+ pr_debug("Not enough memory synthesizing mmap event "
+ "for kernel modules\n");
+ return -1;
+ }
+
+ event->header.type = PERF_RECORD_MMAP;
+
+ /*
+ * kernel uses 0 for user space maps, see kernel/perf_event.c
+ * __perf_event_mmap
+ */
+ if (machine__is_host(machine))
+ event->header.misc = PERF_RECORD_MISC_KERNEL;
+ else
+ event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+
+ for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+ size_t size;
+
+ if (!__map__is_kmodule(pos))
+ continue;
+
+ size = PERF_ALIGN(pos->dso->long_name_len + 1, sizeof(u64));
+ event->mmap.header.type = PERF_RECORD_MMAP;
+ event->mmap.header.size = (sizeof(event->mmap) -
+ (sizeof(event->mmap.filename) - size));
+ memset(event->mmap.filename + size, 0, machine->id_hdr_size);
+ event->mmap.header.size += machine->id_hdr_size;
+ event->mmap.start = pos->start;
+ event->mmap.len = pos->end - pos->start;
+ event->mmap.pid = machine->pid;
+
+ memcpy(event->mmap.filename, pos->dso->long_name,
+ pos->dso->long_name_len + 1);
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
+ rc = -1;
+ break;
+ }
+ }
+
+ free(event);
+ return rc;
+}
+
+static int __event__synthesize_thread(union perf_event *comm_event,
+ union perf_event *mmap_event,
+ union perf_event *fork_event,
+ union perf_event *namespaces_event,
+ pid_t pid, int full,
+ perf_event__handler_t process,
+ struct perf_tool *tool,
+ struct machine *machine,
+ bool mmap_data,
+ unsigned int proc_map_timeout)
+{
+ char filename[PATH_MAX];
+ DIR *tasks;
+ struct dirent *dirent;
+ pid_t tgid, ppid;
+ int rc = 0;
+
+ /* special case: only send one comm event using passed in pid */
+ if (!full) {
+ tgid = perf_event__synthesize_comm(tool, comm_event, pid,
+ process, machine);
+
+ if (tgid == -1)
+ return -1;
+
+ if (perf_event__synthesize_namespaces(tool, namespaces_event, pid,
+ tgid, process, machine) < 0)
+ return -1;
+
+ /*
+ * send mmap only for thread group leader
+ * see thread__init_map_groups
+ */
+ if (pid == tgid &&
+ perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+ process, machine, mmap_data,
+ proc_map_timeout))
+ return -1;
+
+ return 0;
+ }
+
+ if (machine__is_default_guest(machine))
+ return 0;
+
+ snprintf(filename, sizeof(filename), "%s/proc/%d/task",
+ machine->root_dir, pid);
+
+ tasks = opendir(filename);
+ if (tasks == NULL) {
+ pr_debug("couldn't open %s\n", filename);
+ return 0;
+ }
+
+ while ((dirent = readdir(tasks)) != NULL) {
+ char *end;
+ pid_t _pid;
+
+ _pid = strtol(dirent->d_name, &end, 10);
+ if (*end)
+ continue;
+
+ rc = -1;
+ if (perf_event__prepare_comm(comm_event, _pid, machine,
+ &tgid, &ppid) != 0)
+ break;
+
+ if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
+ ppid, process, machine) < 0)
+ break;
+
+ if (perf_event__synthesize_namespaces(tool, namespaces_event, _pid,
+ tgid, process, machine) < 0)
+ break;
+
+ /*
+ * Send the prepared comm event
+ */
+ if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0)
+ break;
+
+ rc = 0;
+ if (_pid == pid) {
+ /* process the parent's maps too */
+ rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
+ process, machine, mmap_data, proc_map_timeout);
+ if (rc)
+ break;
+ }
+ }
+
+ closedir(tasks);
+ return rc;
+}
+
+int perf_event__synthesize_thread_map(struct perf_tool *tool,
+ struct thread_map *threads,
+ perf_event__handler_t process,
+ struct machine *machine,
+ bool mmap_data,
+ unsigned int proc_map_timeout)
+{
+ union perf_event *comm_event, *mmap_event, *fork_event;
+ union perf_event *namespaces_event;
+ int err = -1, thread, j;
+
+ comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
+ if (comm_event == NULL)
+ goto out;
+
+ mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
+ if (mmap_event == NULL)
+ goto out_free_comm;
+
+ fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
+ if (fork_event == NULL)
+ goto out_free_mmap;
+
+ namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+ if (namespaces_event == NULL)
+ goto out_free_fork;
+
+ err = 0;
+ for (thread = 0; thread < threads->nr; ++thread) {
+ if (__event__synthesize_thread(comm_event, mmap_event,
+ fork_event, namespaces_event,
+ thread_map__pid(threads, thread), 0,
+ process, tool, machine,
+ mmap_data, proc_map_timeout)) {
+ err = -1;
+ break;
+ }
+
+ /*
+ * comm.pid is set to thread group id by
+ * perf_event__synthesize_comm
+ */
+ if ((int) comm_event->comm.pid != thread_map__pid(threads, thread)) {
+ bool need_leader = true;
+
+ /* is thread group leader in thread_map? */
+ for (j = 0; j < threads->nr; ++j) {
+ if ((int) comm_event->comm.pid == thread_map__pid(threads, j)) {
+ need_leader = false;
+ break;
+ }
+ }
+
+ /* if not, generate events for it */
+ if (need_leader &&
+ __event__synthesize_thread(comm_event, mmap_event,
+ fork_event, namespaces_event,
+ comm_event->comm.pid, 0,
+ process, tool, machine,
+ mmap_data, proc_map_timeout)) {
+ err = -1;
+ break;
+ }
+ }
+ }
+ free(namespaces_event);
+out_free_fork:
+ free(fork_event);
+out_free_mmap:
+ free(mmap_event);
+out_free_comm:
+ free(comm_event);
+out:
+ return err;
+}
+
+static int __perf_event__synthesize_threads(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine,
+ bool mmap_data,
+ unsigned int proc_map_timeout,
+ struct dirent **dirent,
+ int start,
+ int num)
+{
+ union perf_event *comm_event, *mmap_event, *fork_event;
+ union perf_event *namespaces_event;
+ int err = -1;
+ char *end;
+ pid_t pid;
+ int i;
+
+ comm_event = malloc(sizeof(comm_event->comm) + machine->id_hdr_size);
+ if (comm_event == NULL)
+ goto out;
+
+ mmap_event = malloc(sizeof(mmap_event->mmap2) + machine->id_hdr_size);
+ if (mmap_event == NULL)
+ goto out_free_comm;
+
+ fork_event = malloc(sizeof(fork_event->fork) + machine->id_hdr_size);
+ if (fork_event == NULL)
+ goto out_free_mmap;
+
+ namespaces_event = malloc(sizeof(namespaces_event->namespaces) +
+ (NR_NAMESPACES * sizeof(struct perf_ns_link_info)) +
+ machine->id_hdr_size);
+ if (namespaces_event == NULL)
+ goto out_free_fork;
+
+ for (i = start; i < start + num; i++) {
+ if (!isdigit(dirent[i]->d_name[0]))
+ continue;
+
+ pid = (pid_t)strtol(dirent[i]->d_name, &end, 10);
+ /* only interested in proper numerical dirents */
+ if (*end)
+ continue;
+ /*
+ * We may race with exiting thread, so don't stop just because
+ * one thread couldn't be synthesized.
+ */
+ __event__synthesize_thread(comm_event, mmap_event, fork_event,
+ namespaces_event, pid, 1, process,
+ tool, machine, mmap_data,
+ proc_map_timeout);
+ }
+ err = 0;
+
+ free(namespaces_event);
+out_free_fork:
+ free(fork_event);
+out_free_mmap:
+ free(mmap_event);
+out_free_comm:
+ free(comm_event);
+out:
+ return err;
+}
+
+struct synthesize_threads_arg {
+ struct perf_tool *tool;
+ perf_event__handler_t process;
+ struct machine *machine;
+ bool mmap_data;
+ unsigned int proc_map_timeout;
+ struct dirent **dirent;
+ int num;
+ int start;
+};
+
+static void *synthesize_threads_worker(void *arg)
+{
+ struct synthesize_threads_arg *args = arg;
+
+ __perf_event__synthesize_threads(args->tool, args->process,
+ args->machine, args->mmap_data,
+ args->proc_map_timeout, args->dirent,
+ args->start, args->num);
+ return NULL;
+}
+
+int perf_event__synthesize_threads(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine,
+ bool mmap_data,
+ unsigned int proc_map_timeout,
+ unsigned int nr_threads_synthesize)
+{
+ struct synthesize_threads_arg *args = NULL;
+ pthread_t *synthesize_threads = NULL;
+ char proc_path[PATH_MAX];
+ struct dirent **dirent;
+ int num_per_thread;
+ int m, n, i, j;
+ int thread_nr;
+ int base = 0;
+ int err = -1;
+
+
+ if (machine__is_default_guest(machine))
+ return 0;
+
+ snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
+ n = scandir(proc_path, &dirent, 0, alphasort);
+ if (n < 0)
+ return err;
+
+ if (nr_threads_synthesize == UINT_MAX)
+ thread_nr = sysconf(_SC_NPROCESSORS_ONLN);
+ else
+ thread_nr = nr_threads_synthesize;
+
+ if (thread_nr <= 1) {
+ err = __perf_event__synthesize_threads(tool, process,
+ machine, mmap_data,
+ proc_map_timeout,
+ dirent, base, n);
+ goto free_dirent;
+ }
+ if (thread_nr > n)
+ thread_nr = n;
+
+ synthesize_threads = calloc(sizeof(pthread_t), thread_nr);
+ if (synthesize_threads == NULL)
+ goto free_dirent;
+
+ args = calloc(sizeof(*args), thread_nr);
+ if (args == NULL)
+ goto free_threads;
+
+ num_per_thread = n / thread_nr;
+ m = n % thread_nr;
+ for (i = 0; i < thread_nr; i++) {
+ args[i].tool = tool;
+ args[i].process = process;
+ args[i].machine = machine;
+ args[i].mmap_data = mmap_data;
+ args[i].proc_map_timeout = proc_map_timeout;
+ args[i].dirent = dirent;
+ }
+ for (i = 0; i < m; i++) {
+ args[i].num = num_per_thread + 1;
+ args[i].start = i * args[i].num;
+ }
+ if (i != 0)
+ base = args[i-1].start + args[i-1].num;
+ for (j = i; j < thread_nr; j++) {
+ args[j].num = num_per_thread;
+ args[j].start = base + (j - i) * args[i].num;
+ }
+
+ for (i = 0; i < thread_nr; i++) {
+ if (pthread_create(&synthesize_threads[i], NULL,
+ synthesize_threads_worker, &args[i]))
+ goto out_join;
+ }
+ err = 0;
+out_join:
+ for (i = 0; i < thread_nr; i++)
+ pthread_join(synthesize_threads[i], NULL);
+ free(args);
+free_threads:
+ free(synthesize_threads);
+free_dirent:
+ for (i = 0; i < n; i++)
+ free(dirent[i]);
+ free(dirent);
+
+ return err;
+}
+
+struct process_symbol_args {
+ const char *name;
+ u64 start;
+};
+
+static int find_symbol_cb(void *arg, const char *name, char type,
+ u64 start)
+{
+ struct process_symbol_args *args = arg;
+
+ /*
+ * Must be a function or at least an alias, as in PARISC64, where "_text" is
+ * an 'A' to the same address as "_stext".
+ */
+ if (!(kallsyms__is_function(type) ||
+ type == 'A') || strcmp(name, args->name))
+ return 0;
+
+ args->start = start;
+ return 1;
+}
+
+int kallsyms__get_function_start(const char *kallsyms_filename,
+ const char *symbol_name, u64 *addr)
+{
+ struct process_symbol_args args = { .name = symbol_name, };
+
+ if (kallsyms__parse(kallsyms_filename, &args, find_symbol_cb) <= 0)
+ return -1;
+
+ *addr = args.start;
+ return 0;
+}
+
+int __weak perf_event__synthesize_extra_kmaps(struct perf_tool *tool __maybe_unused,
+ perf_event__handler_t process __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return 0;
+}
+
+static int __perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ size_t size;
+ struct map *map = machine__kernel_map(machine);
+ struct kmap *kmap;
+ int err;
+ union perf_event *event;
+
+ if (map == NULL)
+ return -1;
+
+ kmap = map__kmap(map);
+ if (!kmap->ref_reloc_sym)
+ return -1;
+
+ /*
+ * We should get this from /sys/kernel/sections/.text, but till that is
+ * available use this, and after it is use this as a fallback for older
+ * kernels.
+ */
+ event = zalloc((sizeof(event->mmap) + machine->id_hdr_size));
+ if (event == NULL) {
+ pr_debug("Not enough memory synthesizing mmap event "
+ "for kernel modules\n");
+ return -1;
+ }
+
+ if (machine__is_host(machine)) {
+ /*
+ * kernel uses PERF_RECORD_MISC_USER for user space maps,
+ * see kernel/perf_event.c __perf_event_mmap
+ */
+ event->header.misc = PERF_RECORD_MISC_KERNEL;
+ } else {
+ event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
+ }
+
+ size = snprintf(event->mmap.filename, sizeof(event->mmap.filename),
+ "%s%s", machine->mmap_name, kmap->ref_reloc_sym->name) + 1;
+ size = PERF_ALIGN(size, sizeof(u64));
+ event->mmap.header.type = PERF_RECORD_MMAP;
+ event->mmap.header.size = (sizeof(event->mmap) -
+ (sizeof(event->mmap.filename) - size) + machine->id_hdr_size);
+ event->mmap.pgoff = kmap->ref_reloc_sym->addr;
+ event->mmap.start = map->start;
+ event->mmap.len = map->end - event->mmap.start;
+ event->mmap.pid = machine->pid;
+
+ err = perf_tool__process_synth_event(tool, event, machine, process);
+ free(event);
+
+ return err;
+}
+
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ int err;
+
+ err = __perf_event__synthesize_kernel_mmap(tool, process, machine);
+ if (err < 0)
+ return err;
+
+ return perf_event__synthesize_extra_kmaps(tool, process, machine);
+}
+
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+ struct thread_map *threads,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event *event;
+ int i, err, size;
+
+ size = sizeof(event->thread_map);
+ size += threads->nr * sizeof(event->thread_map.entries[0]);
+
+ event = zalloc(size);
+ if (!event)
+ return -ENOMEM;
+
+ event->header.type = PERF_RECORD_THREAD_MAP;
+ event->header.size = size;
+ event->thread_map.nr = threads->nr;
+
+ for (i = 0; i < threads->nr; i++) {
+ struct thread_map_event_entry *entry = &event->thread_map.entries[i];
+ char *comm = thread_map__comm(threads, i);
+
+ if (!comm)
+ comm = (char *) "";
+
+ entry->pid = thread_map__pid(threads, i);
+ strncpy((char *) &entry->comm, comm, sizeof(entry->comm));
+ }
+
+ err = process(tool, event, NULL, machine);
+
+ free(event);
+ return err;
+}
+
+static void synthesize_cpus(struct cpu_map_entries *cpus,
+ struct cpu_map *map)
+{
+ int i;
+
+ cpus->nr = map->nr;
+
+ for (i = 0; i < map->nr; i++)
+ cpus->cpu[i] = map->map[i];
+}
+
+static void synthesize_mask(struct cpu_map_mask *mask,
+ struct cpu_map *map, int max)
+{
+ int i;
+
+ mask->nr = BITS_TO_LONGS(max);
+ mask->long_size = sizeof(long);
+
+ for (i = 0; i < map->nr; i++)
+ set_bit(map->map[i], mask->mask);
+}
+
+static size_t cpus_size(struct cpu_map *map)
+{
+ return sizeof(struct cpu_map_entries) + map->nr * sizeof(u16);
+}
+
+static size_t mask_size(struct cpu_map *map, int *max)
+{
+ int i;
+
+ *max = 0;
+
+ for (i = 0; i < map->nr; i++) {
+ /* bit possition of the cpu is + 1 */
+ int bit = map->map[i] + 1;
+
+ if (bit > *max)
+ *max = bit;
+ }
+
+ return sizeof(struct cpu_map_mask) + BITS_TO_LONGS(*max) * sizeof(long);
+}
+
+void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max)
+{
+ size_t size_cpus, size_mask;
+ bool is_dummy = cpu_map__empty(map);
+
+ /*
+ * Both array and mask data have variable size based
+ * on the number of cpus and their actual values.
+ * The size of the 'struct cpu_map_data' is:
+ *
+ * array = size of 'struct cpu_map_entries' +
+ * number of cpus * sizeof(u64)
+ *
+ * mask = size of 'struct cpu_map_mask' +
+ * maximum cpu bit converted to size of longs
+ *
+ * and finaly + the size of 'struct cpu_map_data'.
+ */
+ size_cpus = cpus_size(map);
+ size_mask = mask_size(map, max);
+
+ if (is_dummy || (size_cpus < size_mask)) {
+ *size += size_cpus;
+ *type = PERF_CPU_MAP__CPUS;
+ } else {
+ *size += size_mask;
+ *type = PERF_CPU_MAP__MASK;
+ }
+
+ *size += sizeof(struct cpu_map_data);
+ *size = PERF_ALIGN(*size, sizeof(u64));
+ return zalloc(*size);
+}
+
+void cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
+ u16 type, int max)
+{
+ data->type = type;
+
+ switch (type) {
+ case PERF_CPU_MAP__CPUS:
+ synthesize_cpus((struct cpu_map_entries *) data->data, map);
+ break;
+ case PERF_CPU_MAP__MASK:
+ synthesize_mask((struct cpu_map_mask *) data->data, map, max);
+ default:
+ break;
+ };
+}
+
+static struct cpu_map_event* cpu_map_event__new(struct cpu_map *map)
+{
+ size_t size = sizeof(struct cpu_map_event);
+ struct cpu_map_event *event;
+ int max;
+ u16 type;
+
+ event = cpu_map_data__alloc(map, &size, &type, &max);
+ if (!event)
+ return NULL;
+
+ event->header.type = PERF_RECORD_CPU_MAP;
+ event->header.size = size;
+ event->data.type = type;
+
+ cpu_map_data__synthesize(&event->data, map, type, max);
+ return event;
+}
+
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+ struct cpu_map *map,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct cpu_map_event *event;
+ int err;
+
+ event = cpu_map_event__new(map);
+ if (!event)
+ return -ENOMEM;
+
+ err = process(tool, (union perf_event *) event, NULL, machine);
+
+ free(event);
+ return err;
+}
+
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+ struct perf_stat_config *config,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct stat_config_event *event;
+ int size, i = 0, err;
+
+ size = sizeof(*event);
+ size += (PERF_STAT_CONFIG_TERM__MAX * sizeof(event->data[0]));
+
+ event = zalloc(size);
+ if (!event)
+ return -ENOMEM;
+
+ event->header.type = PERF_RECORD_STAT_CONFIG;
+ event->header.size = size;
+ event->nr = PERF_STAT_CONFIG_TERM__MAX;
+
+#define ADD(__term, __val) \
+ event->data[i].tag = PERF_STAT_CONFIG_TERM__##__term; \
+ event->data[i].val = __val; \
+ i++;
+
+ ADD(AGGR_MODE, config->aggr_mode)
+ ADD(INTERVAL, config->interval)
+ ADD(SCALE, config->scale)
+
+ WARN_ONCE(i != PERF_STAT_CONFIG_TERM__MAX,
+ "stat config terms unbalanced\n");
+#undef ADD
+
+ err = process(tool, (union perf_event *) event, NULL, machine);
+
+ free(event);
+ return err;
+}
+
+int perf_event__synthesize_stat(struct perf_tool *tool,
+ u32 cpu, u32 thread, u64 id,
+ struct perf_counts_values *count,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct stat_event event;
+
+ event.header.type = PERF_RECORD_STAT;
+ event.header.size = sizeof(event);
+ event.header.misc = 0;
+
+ event.id = id;
+ event.cpu = cpu;
+ event.thread = thread;
+ event.val = count->val;
+ event.ena = count->ena;
+ event.run = count->run;
+
+ return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+ u64 evtime, u64 type,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ struct stat_round_event event;
+
+ event.header.type = PERF_RECORD_STAT_ROUND;
+ event.header.size = sizeof(event);
+ event.header.misc = 0;
+
+ event.time = evtime;
+ event.type = type;
+
+ return process(tool, (union perf_event *) &event, NULL, machine);
+}
+
+void perf_event__read_stat_config(struct perf_stat_config *config,
+ struct stat_config_event *event)
+{
+ unsigned i;
+
+ for (i = 0; i < event->nr; i++) {
+
+ switch (event->data[i].tag) {
+#define CASE(__term, __val) \
+ case PERF_STAT_CONFIG_TERM__##__term: \
+ config->__val = event->data[i].val; \
+ break;
+
+ CASE(AGGR_MODE, aggr_mode)
+ CASE(SCALE, scale)
+ CASE(INTERVAL, interval)
+#undef CASE
+ default:
+ pr_warning("unknown stat config term %" PRIu64 "\n",
+ event->data[i].tag);
+ }
+ }
+}
+
+size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp)
+{
+ const char *s;
+
+ if (event->header.misc & PERF_RECORD_MISC_COMM_EXEC)
+ s = " exec";
+ else
+ s = "";
+
+ return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid);
+}
+
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp)
+{
+ size_t ret = 0;
+ struct perf_ns_link_info *ns_link_info;
+ u32 nr_namespaces, idx;
+
+ ns_link_info = event->namespaces.link_info;
+ nr_namespaces = event->namespaces.nr_namespaces;
+
+ ret += fprintf(fp, " %d/%d - nr_namespaces: %u\n\t\t[",
+ event->namespaces.pid,
+ event->namespaces.tid,
+ nr_namespaces);
+
+ for (idx = 0; idx < nr_namespaces; idx++) {
+ if (idx && (idx % 4 == 0))
+ ret += fprintf(fp, "\n\t\t ");
+
+ ret += fprintf(fp, "%u/%s: %" PRIu64 "/%#" PRIx64 "%s", idx,
+ perf_ns__name(idx), (u64)ns_link_info[idx].dev,
+ (u64)ns_link_info[idx].ino,
+ ((idx + 1) != nr_namespaces) ? ", " : "]\n");
+ }
+
+ return ret;
+}
+
+int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_comm_event(machine, event, sample);
+}
+
+int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_namespaces_event(machine, event, sample);
+}
+
+int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_lost_event(machine, event, sample);
+}
+
+int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_aux_event(machine, event);
+}
+
+int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_itrace_start_event(machine, event);
+}
+
+int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_lost_samples_event(machine, event, sample);
+}
+
+int perf_event__process_switch(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_switch_event(machine, event);
+}
+
+size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
+ event->mmap.pid, event->mmap.tid, event->mmap.start,
+ event->mmap.len, event->mmap.pgoff,
+ (event->header.misc & PERF_RECORD_MISC_MMAP_DATA) ? 'r' : 'x',
+ event->mmap.filename);
+}
+
+size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64
+ " %02x:%02x %"PRIu64" %"PRIu64"]: %c%c%c%c %s\n",
+ event->mmap2.pid, event->mmap2.tid, event->mmap2.start,
+ event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj,
+ event->mmap2.min, event->mmap2.ino,
+ event->mmap2.ino_generation,
+ (event->mmap2.prot & PROT_READ) ? 'r' : '-',
+ (event->mmap2.prot & PROT_WRITE) ? 'w' : '-',
+ (event->mmap2.prot & PROT_EXEC) ? 'x' : '-',
+ (event->mmap2.flags & MAP_SHARED) ? 's' : 'p',
+ event->mmap2.filename);
+}
+
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp)
+{
+ struct thread_map *threads = thread_map__new_event(&event->thread_map);
+ size_t ret;
+
+ ret = fprintf(fp, " nr: ");
+
+ if (threads)
+ ret += thread_map__fprintf(threads, fp);
+ else
+ ret += fprintf(fp, "failed to get threads from event\n");
+
+ thread_map__put(threads);
+ return ret;
+}
+
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp)
+{
+ struct cpu_map *cpus = cpu_map__new_data(&event->cpu_map.data);
+ size_t ret;
+
+ ret = fprintf(fp, ": ");
+
+ if (cpus)
+ ret += cpu_map__fprintf(cpus, fp);
+ else
+ ret += fprintf(fp, "failed to get cpumap from event\n");
+
+ cpu_map__put(cpus);
+ return ret;
+}
+
+int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_mmap_event(machine, event, sample);
+}
+
+int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_mmap2_event(machine, event, sample);
+}
+
+size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, "(%d:%d):(%d:%d)\n",
+ event->fork.pid, event->fork.tid,
+ event->fork.ppid, event->fork.ptid);
+}
+
+int perf_event__process_fork(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_fork_event(machine, event, sample);
+}
+
+int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_exit_event(machine, event, sample);
+}
+
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s%s]\n",
+ event->aux.aux_offset, event->aux.aux_size,
+ event->aux.flags,
+ event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "",
+ event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "",
+ event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : "");
+}
+
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " pid: %u tid: %u\n",
+ event->itrace_start.pid, event->itrace_start.tid);
+}
+
+size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp)
+{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+ const char *in_out = !out ? "IN " :
+ !(event->header.misc & PERF_RECORD_MISC_SWITCH_OUT_PREEMPT) ?
+ "OUT " : "OUT preempt";
+
+ if (event->header.type == PERF_RECORD_SWITCH)
+ return fprintf(fp, " %s\n", in_out);
+
+ return fprintf(fp, " %s %s pid/tid: %5u/%-5u\n",
+ in_out, out ? "next" : "prev",
+ event->context_switch.next_prev_pid,
+ event->context_switch.next_prev_tid);
+}
+
+static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost);
+}
+
+size_t perf_event__fprintf(union perf_event *event, FILE *fp)
+{
+ size_t ret = fprintf(fp, "PERF_RECORD_%s",
+ perf_event__name(event->header.type));
+
+ switch (event->header.type) {
+ case PERF_RECORD_COMM:
+ ret += perf_event__fprintf_comm(event, fp);
+ break;
+ case PERF_RECORD_FORK:
+ case PERF_RECORD_EXIT:
+ ret += perf_event__fprintf_task(event, fp);
+ break;
+ case PERF_RECORD_MMAP:
+ ret += perf_event__fprintf_mmap(event, fp);
+ break;
+ case PERF_RECORD_NAMESPACES:
+ ret += perf_event__fprintf_namespaces(event, fp);
+ break;
+ case PERF_RECORD_MMAP2:
+ ret += perf_event__fprintf_mmap2(event, fp);
+ break;
+ case PERF_RECORD_AUX:
+ ret += perf_event__fprintf_aux(event, fp);
+ break;
+ case PERF_RECORD_ITRACE_START:
+ ret += perf_event__fprintf_itrace_start(event, fp);
+ break;
+ case PERF_RECORD_SWITCH:
+ case PERF_RECORD_SWITCH_CPU_WIDE:
+ ret += perf_event__fprintf_switch(event, fp);
+ break;
+ case PERF_RECORD_LOST:
+ ret += perf_event__fprintf_lost(event, fp);
+ break;
+ default:
+ ret += fprintf(fp, "\n");
+ }
+
+ return ret;
+}
+
+int perf_event__process(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_event(machine, event, sample);
+}
+
+struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al)
+{
+ struct map_groups *mg = thread->mg;
+ struct machine *machine = mg->machine;
+ bool load_map = false;
+
+ al->machine = machine;
+ al->thread = thread;
+ al->addr = addr;
+ al->cpumode = cpumode;
+ al->filtered = 0;
+
+ if (machine == NULL) {
+ al->map = NULL;
+ return NULL;
+ }
+
+ if (cpumode == PERF_RECORD_MISC_KERNEL && perf_host) {
+ al->level = 'k';
+ mg = &machine->kmaps;
+ load_map = true;
+ } else if (cpumode == PERF_RECORD_MISC_USER && perf_host) {
+ al->level = '.';
+ } else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL && perf_guest) {
+ al->level = 'g';
+ mg = &machine->kmaps;
+ load_map = true;
+ } else if (cpumode == PERF_RECORD_MISC_GUEST_USER && perf_guest) {
+ al->level = 'u';
+ } else {
+ al->level = 'H';
+ al->map = NULL;
+
+ if ((cpumode == PERF_RECORD_MISC_GUEST_USER ||
+ cpumode == PERF_RECORD_MISC_GUEST_KERNEL) &&
+ !perf_guest)
+ al->filtered |= (1 << HIST_FILTER__GUEST);
+ if ((cpumode == PERF_RECORD_MISC_USER ||
+ cpumode == PERF_RECORD_MISC_KERNEL) &&
+ !perf_host)
+ al->filtered |= (1 << HIST_FILTER__HOST);
+
+ return NULL;
+ }
+
+ al->map = map_groups__find(mg, al->addr);
+ if (al->map != NULL) {
+ /*
+ * Kernel maps might be changed when loading symbols so loading
+ * must be done prior to using kernel maps.
+ */
+ if (load_map)
+ map__load(al->map);
+ al->addr = al->map->map_ip(al->map, al->addr);
+ }
+
+ return al->map;
+}
+
+/*
+ * For branch stacks or branch samples, the sample cpumode might not be correct
+ * because it applies only to the sample 'ip' and not necessary to 'addr' or
+ * branch stack addresses. If possible, use a fallback to deal with those cases.
+ */
+struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al)
+{
+ struct map *map = thread__find_map(thread, cpumode, addr, al);
+ struct machine *machine = thread->mg->machine;
+ u8 addr_cpumode = machine__addr_cpumode(machine, cpumode, addr);
+
+ if (map || addr_cpumode == cpumode)
+ return map;
+
+ return thread__find_map(thread, addr_cpumode, addr, al);
+}
+
+struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al)
+{
+ al->sym = NULL;
+ if (thread__find_map(thread, cpumode, addr, al))
+ al->sym = map__find_symbol(al->map, al->addr);
+ return al->sym;
+}
+
+struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al)
+{
+ al->sym = NULL;
+ if (thread__find_map_fb(thread, cpumode, addr, al))
+ al->sym = map__find_symbol(al->map, al->addr);
+ return al->sym;
+}
+
+/*
+ * Callers need to drop the reference to al->thread, obtained in
+ * machine__findnew_thread()
+ */
+int machine__resolve(struct machine *machine, struct addr_location *al,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__findnew_thread(machine, sample->pid,
+ sample->tid);
+
+ if (thread == NULL)
+ return -1;
+
+ dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
+ thread__find_map(thread, sample->cpumode, sample->ip, al);
+ dump_printf(" ...... dso: %s\n",
+ al->map ? al->map->dso->long_name :
+ al->level == 'H' ? "[hypervisor]" : "<not found>");
+
+ if (thread__is_filtered(thread))
+ al->filtered |= (1 << HIST_FILTER__THREAD);
+
+ al->sym = NULL;
+ al->cpu = sample->cpu;
+ al->socket = -1;
+ al->srcline = NULL;
+
+ if (al->cpu >= 0) {
+ struct perf_env *env = machine->env;
+
+ if (env && env->cpu)
+ al->socket = env->cpu[al->cpu].socket_id;
+ }
+
+ if (al->map) {
+ struct dso *dso = al->map->dso;
+
+ if (symbol_conf.dso_list &&
+ (!dso || !(strlist__has_entry(symbol_conf.dso_list,
+ dso->short_name) ||
+ (dso->short_name != dso->long_name &&
+ strlist__has_entry(symbol_conf.dso_list,
+ dso->long_name))))) {
+ al->filtered |= (1 << HIST_FILTER__DSO);
+ }
+
+ al->sym = map__find_symbol(al->map, al->addr);
+ } else if (symbol_conf.dso_list) {
+ al->filtered |= (1 << HIST_FILTER__DSO);
+ }
+
+ if (symbol_conf.sym_list &&
+ (!al->sym || !strlist__has_entry(symbol_conf.sym_list,
+ al->sym->name))) {
+ al->filtered |= (1 << HIST_FILTER__SYMBOL);
+ }
+
+ return 0;
+}
+
+/*
+ * The preprocess_sample method will return with reference counts for the
+ * in it, when done using (and perhaps getting ref counts if needing to
+ * keep a pointer to one of those entries) it must be paired with
+ * addr_location__put(), so that the refcounts can be decremented.
+ */
+void addr_location__put(struct addr_location *al)
+{
+ thread__zput(al->thread);
+}
+
+bool is_bts_event(struct perf_event_attr *attr)
+{
+ return attr->type == PERF_TYPE_HARDWARE &&
+ (attr->config & PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
+ attr->sample_period == 1;
+}
+
+bool sample_addr_correlates_sym(struct perf_event_attr *attr)
+{
+ if (attr->type == PERF_TYPE_SOFTWARE &&
+ (attr->config == PERF_COUNT_SW_PAGE_FAULTS ||
+ attr->config == PERF_COUNT_SW_PAGE_FAULTS_MIN ||
+ attr->config == PERF_COUNT_SW_PAGE_FAULTS_MAJ))
+ return true;
+
+ if (is_bts_event(attr))
+ return true;
+
+ return false;
+}
+
+void thread__resolve(struct thread *thread, struct addr_location *al,
+ struct perf_sample *sample)
+{
+ thread__find_map_fb(thread, sample->cpumode, sample->addr, al);
+
+ al->cpu = sample->cpu;
+ al->sym = NULL;
+
+ if (al->map)
+ al->sym = map__find_symbol(al->map, al->addr);
+}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
new file mode 100644
index 000000000..bfa60bcaf
--- /dev/null
+++ b/tools/perf/util/event.h
@@ -0,0 +1,833 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_RECORD_H
+#define __PERF_RECORD_H
+
+#include <limits.h>
+#include <stdio.h>
+#include <linux/kernel.h>
+
+#include "../perf.h"
+#include "build-id.h"
+#include "perf_regs.h"
+
+struct mmap_event {
+ struct perf_event_header header;
+ u32 pid, tid;
+ u64 start;
+ u64 len;
+ u64 pgoff;
+ char filename[PATH_MAX];
+};
+
+struct mmap2_event {
+ struct perf_event_header header;
+ u32 pid, tid;
+ u64 start;
+ u64 len;
+ u64 pgoff;
+ u32 maj;
+ u32 min;
+ u64 ino;
+ u64 ino_generation;
+ u32 prot;
+ u32 flags;
+ char filename[PATH_MAX];
+};
+
+struct comm_event {
+ struct perf_event_header header;
+ u32 pid, tid;
+ char comm[16];
+};
+
+struct namespaces_event {
+ struct perf_event_header header;
+ u32 pid, tid;
+ u64 nr_namespaces;
+ struct perf_ns_link_info link_info[];
+};
+
+struct fork_event {
+ struct perf_event_header header;
+ u32 pid, ppid;
+ u32 tid, ptid;
+ u64 time;
+};
+
+struct lost_event {
+ struct perf_event_header header;
+ u64 id;
+ u64 lost;
+};
+
+struct lost_samples_event {
+ struct perf_event_header header;
+ u64 lost;
+};
+
+/*
+ * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
+ */
+struct read_event {
+ struct perf_event_header header;
+ u32 pid, tid;
+ u64 value;
+ u64 time_enabled;
+ u64 time_running;
+ u64 id;
+};
+
+struct throttle_event {
+ struct perf_event_header header;
+ u64 time;
+ u64 id;
+ u64 stream_id;
+};
+
+#define PERF_SAMPLE_MASK \
+ (PERF_SAMPLE_IP | PERF_SAMPLE_TID | \
+ PERF_SAMPLE_TIME | PERF_SAMPLE_ADDR | \
+ PERF_SAMPLE_ID | PERF_SAMPLE_STREAM_ID | \
+ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD | \
+ PERF_SAMPLE_IDENTIFIER)
+
+/* perf sample has 16 bits size limit */
+#define PERF_SAMPLE_MAX_SIZE (1 << 16)
+
+struct sample_event {
+ struct perf_event_header header;
+ u64 array[];
+};
+
+struct regs_dump {
+ u64 abi;
+ u64 mask;
+ u64 *regs;
+
+ /* Cached values/mask filled by first register access. */
+ u64 cache_regs[PERF_REGS_MAX];
+ u64 cache_mask;
+};
+
+struct stack_dump {
+ u16 offset;
+ u64 size;
+ char *data;
+};
+
+struct sample_read_value {
+ u64 value;
+ u64 id;
+};
+
+struct sample_read {
+ u64 time_enabled;
+ u64 time_running;
+ union {
+ struct {
+ u64 nr;
+ struct sample_read_value *values;
+ } group;
+ struct sample_read_value one;
+ };
+};
+
+struct ip_callchain {
+ u64 nr;
+ u64 ips[0];
+};
+
+struct branch_flags {
+ u64 mispred:1;
+ u64 predicted:1;
+ u64 in_tx:1;
+ u64 abort:1;
+ u64 cycles:16;
+ u64 type:4;
+ u64 reserved:40;
+};
+
+struct branch_entry {
+ u64 from;
+ u64 to;
+ struct branch_flags flags;
+};
+
+struct branch_stack {
+ u64 nr;
+ struct branch_entry entries[0];
+};
+
+enum {
+ PERF_IP_FLAG_BRANCH = 1ULL << 0,
+ PERF_IP_FLAG_CALL = 1ULL << 1,
+ PERF_IP_FLAG_RETURN = 1ULL << 2,
+ PERF_IP_FLAG_CONDITIONAL = 1ULL << 3,
+ PERF_IP_FLAG_SYSCALLRET = 1ULL << 4,
+ PERF_IP_FLAG_ASYNC = 1ULL << 5,
+ PERF_IP_FLAG_INTERRUPT = 1ULL << 6,
+ PERF_IP_FLAG_TX_ABORT = 1ULL << 7,
+ PERF_IP_FLAG_TRACE_BEGIN = 1ULL << 8,
+ PERF_IP_FLAG_TRACE_END = 1ULL << 9,
+ PERF_IP_FLAG_IN_TX = 1ULL << 10,
+};
+
+#define PERF_IP_FLAG_CHARS "bcrosyiABEx"
+
+#define PERF_BRANCH_MASK (\
+ PERF_IP_FLAG_BRANCH |\
+ PERF_IP_FLAG_CALL |\
+ PERF_IP_FLAG_RETURN |\
+ PERF_IP_FLAG_CONDITIONAL |\
+ PERF_IP_FLAG_SYSCALLRET |\
+ PERF_IP_FLAG_ASYNC |\
+ PERF_IP_FLAG_INTERRUPT |\
+ PERF_IP_FLAG_TX_ABORT |\
+ PERF_IP_FLAG_TRACE_BEGIN |\
+ PERF_IP_FLAG_TRACE_END)
+
+#define MAX_INSN 16
+
+struct perf_sample {
+ u64 ip;
+ u32 pid, tid;
+ u64 time;
+ u64 addr;
+ u64 id;
+ u64 stream_id;
+ u64 period;
+ u64 weight;
+ u64 transaction;
+ u32 cpu;
+ u32 raw_size;
+ u64 data_src;
+ u64 phys_addr;
+ u32 flags;
+ u16 insn_len;
+ u8 cpumode;
+ u16 misc;
+ char insn[MAX_INSN];
+ void *raw_data;
+ struct ip_callchain *callchain;
+ struct branch_stack *branch_stack;
+ struct regs_dump user_regs;
+ struct regs_dump intr_regs;
+ struct stack_dump user_stack;
+ struct sample_read read;
+};
+
+#define PERF_MEM_DATA_SRC_NONE \
+ (PERF_MEM_S(OP, NA) |\
+ PERF_MEM_S(LVL, NA) |\
+ PERF_MEM_S(SNOOP, NA) |\
+ PERF_MEM_S(LOCK, NA) |\
+ PERF_MEM_S(TLB, NA))
+
+struct build_id_event {
+ struct perf_event_header header;
+ pid_t pid;
+ u8 build_id[PERF_ALIGN(BUILD_ID_SIZE, sizeof(u64))];
+ char filename[];
+};
+
+enum perf_user_event_type { /* above any possible kernel type */
+ PERF_RECORD_USER_TYPE_START = 64,
+ PERF_RECORD_HEADER_ATTR = 64,
+ PERF_RECORD_HEADER_EVENT_TYPE = 65, /* deprecated */
+ PERF_RECORD_HEADER_TRACING_DATA = 66,
+ PERF_RECORD_HEADER_BUILD_ID = 67,
+ PERF_RECORD_FINISHED_ROUND = 68,
+ PERF_RECORD_ID_INDEX = 69,
+ PERF_RECORD_AUXTRACE_INFO = 70,
+ PERF_RECORD_AUXTRACE = 71,
+ PERF_RECORD_AUXTRACE_ERROR = 72,
+ PERF_RECORD_THREAD_MAP = 73,
+ PERF_RECORD_CPU_MAP = 74,
+ PERF_RECORD_STAT_CONFIG = 75,
+ PERF_RECORD_STAT = 76,
+ PERF_RECORD_STAT_ROUND = 77,
+ PERF_RECORD_EVENT_UPDATE = 78,
+ PERF_RECORD_TIME_CONV = 79,
+ PERF_RECORD_HEADER_FEATURE = 80,
+ PERF_RECORD_HEADER_MAX
+};
+
+enum auxtrace_error_type {
+ PERF_AUXTRACE_ERROR_ITRACE = 1,
+ PERF_AUXTRACE_ERROR_MAX
+};
+
+/* Attribute type for custom synthesized events */
+#define PERF_TYPE_SYNTH (INT_MAX + 1U)
+
+/* Attribute config for custom synthesized events */
+enum perf_synth_id {
+ PERF_SYNTH_INTEL_PTWRITE,
+ PERF_SYNTH_INTEL_MWAIT,
+ PERF_SYNTH_INTEL_PWRE,
+ PERF_SYNTH_INTEL_EXSTOP,
+ PERF_SYNTH_INTEL_PWRX,
+ PERF_SYNTH_INTEL_CBR,
+};
+
+/*
+ * Raw data formats for synthesized events. Note that 4 bytes of padding are
+ * present to match the 'size' member of PERF_SAMPLE_RAW data which is always
+ * 8-byte aligned. That means we must dereference raw_data with an offset of 4.
+ * Refer perf_sample__synth_ptr() and perf_synth__raw_data(). It also means the
+ * structure sizes are 4 bytes bigger than the raw_size, refer
+ * perf_synth__raw_size().
+ */
+
+struct perf_synth_intel_ptwrite {
+ u32 padding;
+ union {
+ struct {
+ u32 ip : 1,
+ reserved : 31;
+ };
+ u32 flags;
+ };
+ u64 payload;
+};
+
+struct perf_synth_intel_mwait {
+ u32 padding;
+ u32 reserved;
+ union {
+ struct {
+ u64 hints : 8,
+ reserved1 : 24,
+ extensions : 2,
+ reserved2 : 30;
+ };
+ u64 payload;
+ };
+};
+
+struct perf_synth_intel_pwre {
+ u32 padding;
+ u32 reserved;
+ union {
+ struct {
+ u64 reserved1 : 7,
+ hw : 1,
+ subcstate : 4,
+ cstate : 4,
+ reserved2 : 48;
+ };
+ u64 payload;
+ };
+};
+
+struct perf_synth_intel_exstop {
+ u32 padding;
+ union {
+ struct {
+ u32 ip : 1,
+ reserved : 31;
+ };
+ u32 flags;
+ };
+};
+
+struct perf_synth_intel_pwrx {
+ u32 padding;
+ u32 reserved;
+ union {
+ struct {
+ u64 deepest_cstate : 4,
+ last_cstate : 4,
+ wake_reason : 4,
+ reserved1 : 52;
+ };
+ u64 payload;
+ };
+};
+
+struct perf_synth_intel_cbr {
+ u32 padding;
+ union {
+ struct {
+ u32 cbr : 8,
+ reserved1 : 8,
+ max_nonturbo : 8,
+ reserved2 : 8;
+ };
+ u32 flags;
+ };
+ u32 freq;
+ u32 reserved3;
+};
+
+/*
+ * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
+ * 8-byte alignment.
+ */
+static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
+{
+ return sample->raw_data - 4;
+}
+
+static inline void *perf_synth__raw_data(void *p)
+{
+ return p + 4;
+}
+
+#define perf_synth__raw_size(d) (sizeof(d) - 4)
+
+#define perf_sample__bad_synth_size(s, d) ((s)->raw_size < sizeof(d) - 4)
+
+/*
+ * The kernel collects the number of events it couldn't send in a stretch and
+ * when possible sends this number in a PERF_RECORD_LOST event. The number of
+ * such "chunks" of lost events is stored in .nr_events[PERF_EVENT_LOST] while
+ * total_lost tells exactly how many events the kernel in fact lost, i.e. it is
+ * the sum of all struct lost_event.lost fields reported.
+ *
+ * The kernel discards mixed up samples and sends the number in a
+ * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored
+ * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells
+ * exactly how many samples the kernel in fact dropped, i.e. it is the sum of
+ * all struct lost_samples_event.lost fields reported.
+ *
+ * The total_period is needed because by default auto-freq is used, so
+ * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
+ * the total number of low level events, it is necessary to to sum all struct
+ * sample_event.period and stash the result in total_period.
+ */
+struct events_stats {
+ u64 total_period;
+ u64 total_non_filtered_period;
+ u64 total_lost;
+ u64 total_lost_samples;
+ u64 total_aux_lost;
+ u64 total_aux_partial;
+ u64 total_invalid_chains;
+ u32 nr_events[PERF_RECORD_HEADER_MAX];
+ u32 nr_non_filtered_samples;
+ u32 nr_lost_warned;
+ u32 nr_unknown_events;
+ u32 nr_invalid_chains;
+ u32 nr_unknown_id;
+ u32 nr_unprocessable_samples;
+ u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX];
+ u32 nr_proc_map_timeout;
+};
+
+enum {
+ PERF_CPU_MAP__CPUS = 0,
+ PERF_CPU_MAP__MASK = 1,
+};
+
+struct cpu_map_entries {
+ u16 nr;
+ u16 cpu[];
+};
+
+struct cpu_map_mask {
+ u16 nr;
+ u16 long_size;
+ unsigned long mask[];
+};
+
+struct cpu_map_data {
+ u16 type;
+ char data[];
+};
+
+struct cpu_map_event {
+ struct perf_event_header header;
+ struct cpu_map_data data;
+};
+
+struct attr_event {
+ struct perf_event_header header;
+ struct perf_event_attr attr;
+ u64 id[];
+};
+
+enum {
+ PERF_EVENT_UPDATE__UNIT = 0,
+ PERF_EVENT_UPDATE__SCALE = 1,
+ PERF_EVENT_UPDATE__NAME = 2,
+ PERF_EVENT_UPDATE__CPUS = 3,
+};
+
+struct event_update_event_cpus {
+ struct cpu_map_data cpus;
+};
+
+struct event_update_event_scale {
+ double scale;
+};
+
+struct event_update_event {
+ struct perf_event_header header;
+ u64 type;
+ u64 id;
+
+ char data[];
+};
+
+#define MAX_EVENT_NAME 64
+
+struct perf_trace_event_type {
+ u64 event_id;
+ char name[MAX_EVENT_NAME];
+};
+
+struct event_type_event {
+ struct perf_event_header header;
+ struct perf_trace_event_type event_type;
+};
+
+struct tracing_data_event {
+ struct perf_event_header header;
+ u32 size;
+};
+
+struct id_index_entry {
+ u64 id;
+ u64 idx;
+ u64 cpu;
+ u64 tid;
+};
+
+struct id_index_event {
+ struct perf_event_header header;
+ u64 nr;
+ struct id_index_entry entries[0];
+};
+
+struct auxtrace_info_event {
+ struct perf_event_header header;
+ u32 type;
+ u32 reserved__; /* For alignment */
+ u64 priv[];
+};
+
+struct auxtrace_event {
+ struct perf_event_header header;
+ u64 size;
+ u64 offset;
+ u64 reference;
+ u32 idx;
+ u32 tid;
+ u32 cpu;
+ u32 reserved__; /* For alignment */
+};
+
+#define MAX_AUXTRACE_ERROR_MSG 64
+
+struct auxtrace_error_event {
+ struct perf_event_header header;
+ u32 type;
+ u32 code;
+ u32 cpu;
+ u32 pid;
+ u32 tid;
+ u32 reserved__; /* For alignment */
+ u64 ip;
+ char msg[MAX_AUXTRACE_ERROR_MSG];
+};
+
+struct aux_event {
+ struct perf_event_header header;
+ u64 aux_offset;
+ u64 aux_size;
+ u64 flags;
+};
+
+struct itrace_start_event {
+ struct perf_event_header header;
+ u32 pid, tid;
+};
+
+struct context_switch_event {
+ struct perf_event_header header;
+ u32 next_prev_pid;
+ u32 next_prev_tid;
+};
+
+struct thread_map_event_entry {
+ u64 pid;
+ char comm[16];
+};
+
+struct thread_map_event {
+ struct perf_event_header header;
+ u64 nr;
+ struct thread_map_event_entry entries[];
+};
+
+enum {
+ PERF_STAT_CONFIG_TERM__AGGR_MODE = 0,
+ PERF_STAT_CONFIG_TERM__INTERVAL = 1,
+ PERF_STAT_CONFIG_TERM__SCALE = 2,
+ PERF_STAT_CONFIG_TERM__MAX = 3,
+};
+
+struct stat_config_event_entry {
+ u64 tag;
+ u64 val;
+};
+
+struct stat_config_event {
+ struct perf_event_header header;
+ u64 nr;
+ struct stat_config_event_entry data[];
+};
+
+struct stat_event {
+ struct perf_event_header header;
+
+ u64 id;
+ u32 cpu;
+ u32 thread;
+
+ union {
+ struct {
+ u64 val;
+ u64 ena;
+ u64 run;
+ };
+ u64 values[3];
+ };
+};
+
+enum {
+ PERF_STAT_ROUND_TYPE__INTERVAL = 0,
+ PERF_STAT_ROUND_TYPE__FINAL = 1,
+};
+
+struct stat_round_event {
+ struct perf_event_header header;
+ u64 type;
+ u64 time;
+};
+
+struct time_conv_event {
+ struct perf_event_header header;
+ u64 time_shift;
+ u64 time_mult;
+ u64 time_zero;
+};
+
+struct feature_event {
+ struct perf_event_header header;
+ u64 feat_id;
+ char data[];
+};
+
+union perf_event {
+ struct perf_event_header header;
+ struct mmap_event mmap;
+ struct mmap2_event mmap2;
+ struct comm_event comm;
+ struct namespaces_event namespaces;
+ struct fork_event fork;
+ struct lost_event lost;
+ struct lost_samples_event lost_samples;
+ struct read_event read;
+ struct throttle_event throttle;
+ struct sample_event sample;
+ struct attr_event attr;
+ struct event_update_event event_update;
+ struct event_type_event event_type;
+ struct tracing_data_event tracing_data;
+ struct build_id_event build_id;
+ struct id_index_event id_index;
+ struct auxtrace_info_event auxtrace_info;
+ struct auxtrace_event auxtrace;
+ struct auxtrace_error_event auxtrace_error;
+ struct aux_event aux;
+ struct itrace_start_event itrace_start;
+ struct context_switch_event context_switch;
+ struct thread_map_event thread_map;
+ struct cpu_map_event cpu_map;
+ struct stat_config_event stat_config;
+ struct stat_event stat;
+ struct stat_round_event stat_round;
+ struct time_conv_event time_conv;
+ struct feature_event feat;
+};
+
+void perf_event__print_totals(void);
+
+struct perf_tool;
+struct thread_map;
+struct cpu_map;
+struct perf_stat_config;
+struct perf_counts_values;
+
+typedef int (*perf_event__handler_t)(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+
+int perf_event__synthesize_thread_map(struct perf_tool *tool,
+ struct thread_map *threads,
+ perf_event__handler_t process,
+ struct machine *machine, bool mmap_data,
+ unsigned int proc_map_timeout);
+int perf_event__synthesize_thread_map2(struct perf_tool *tool,
+ struct thread_map *threads,
+ perf_event__handler_t process,
+ struct machine *machine);
+int perf_event__synthesize_cpu_map(struct perf_tool *tool,
+ struct cpu_map *cpus,
+ perf_event__handler_t process,
+ struct machine *machine);
+int perf_event__synthesize_threads(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine, bool mmap_data,
+ unsigned int proc_map_timeout,
+ unsigned int nr_threads_synthesize);
+int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine);
+int perf_event__synthesize_stat_config(struct perf_tool *tool,
+ struct perf_stat_config *config,
+ perf_event__handler_t process,
+ struct machine *machine);
+void perf_event__read_stat_config(struct perf_stat_config *config,
+ struct stat_config_event *event);
+int perf_event__synthesize_stat(struct perf_tool *tool,
+ u32 cpu, u32 thread, u64 id,
+ struct perf_counts_values *count,
+ perf_event__handler_t process,
+ struct machine *machine);
+int perf_event__synthesize_stat_round(struct perf_tool *tool,
+ u64 time, u64 type,
+ perf_event__handler_t process,
+ struct machine *machine);
+int perf_event__synthesize_modules(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine);
+
+int perf_event__process_comm(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_lost(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_lost_samples(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_aux(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_itrace_start(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_switch(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_mmap(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_mmap2(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_fork(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_event__process_exit(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+int perf_tool__process_synth_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct machine *machine,
+ perf_event__handler_t process);
+int perf_event__process(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
+
+struct addr_location;
+
+int machine__resolve(struct machine *machine, struct addr_location *al,
+ struct perf_sample *sample);
+
+void addr_location__put(struct addr_location *al);
+
+struct thread;
+
+bool is_bts_event(struct perf_event_attr *attr);
+bool sample_addr_correlates_sym(struct perf_event_attr *attr);
+void thread__resolve(struct thread *thread, struct addr_location *al,
+ struct perf_sample *sample);
+
+const char *perf_event__name(unsigned int id);
+
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
+ u64 read_format);
+int perf_event__synthesize_sample(union perf_event *event, u64 type,
+ u64 read_format,
+ const struct perf_sample *sample);
+
+pid_t perf_event__synthesize_comm(struct perf_tool *tool,
+ union perf_event *event, pid_t pid,
+ perf_event__handler_t process,
+ struct machine *machine);
+
+int perf_event__synthesize_namespaces(struct perf_tool *tool,
+ union perf_event *event,
+ pid_t pid, pid_t tgid,
+ perf_event__handler_t process,
+ struct machine *machine);
+
+int perf_event__synthesize_mmap_events(struct perf_tool *tool,
+ union perf_event *event,
+ pid_t pid, pid_t tgid,
+ perf_event__handler_t process,
+ struct machine *machine,
+ bool mmap_data,
+ unsigned int proc_map_timeout);
+
+int perf_event__synthesize_extra_kmaps(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine);
+
+size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf(union perf_event *event, FILE *fp);
+
+int kallsyms__get_function_start(const char *kallsyms_filename,
+ const char *symbol_name, u64 *addr);
+
+void *cpu_map_data__alloc(struct cpu_map *map, size_t *size, u16 *type, int *max);
+void cpu_map_data__synthesize(struct cpu_map_data *data, struct cpu_map *map,
+ u16 type, int max);
+
+void event_attr_init(struct perf_event_attr *attr);
+
+int perf_event_paranoid(void);
+
+extern int sysctl_perf_event_max_stack;
+extern int sysctl_perf_event_max_contexts_per_stack;
+
+#endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
new file mode 100644
index 000000000..819aa4491
--- /dev/null
+++ b/tools/perf/util/evlist.c
@@ -0,0 +1,1816 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+#include "util.h"
+#include <api/fs/fs.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <poll.h>
+#include "cpumap.h"
+#include "thread_map.h"
+#include "target.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "debug.h"
+#include "units.h"
+#include "asm/bug.h"
+#include <signal.h>
+#include <unistd.h>
+
+#include "parse-events.h"
+#include <subcmd/parse-options.h>
+
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/bitops.h>
+#include <linux/hash.h>
+#include <linux/log2.h>
+#include <linux/err.h>
+
+#ifdef LACKS_SIGQUEUE_PROTOTYPE
+int sigqueue(pid_t pid, int sig, const union sigval value);
+#endif
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+#define SID(e, x, y) xyarray__entry(e->sample_id, x, y)
+
+void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
+ struct thread_map *threads)
+{
+ int i;
+
+ for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+ INIT_HLIST_HEAD(&evlist->heads[i]);
+ INIT_LIST_HEAD(&evlist->entries);
+ perf_evlist__set_maps(evlist, cpus, threads);
+ fdarray__init(&evlist->pollfd, 64);
+ evlist->workload.pid = -1;
+ evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
+}
+
+struct perf_evlist *perf_evlist__new(void)
+{
+ struct perf_evlist *evlist = zalloc(sizeof(*evlist));
+
+ if (evlist != NULL)
+ perf_evlist__init(evlist, NULL, NULL);
+
+ return evlist;
+}
+
+struct perf_evlist *perf_evlist__new_default(void)
+{
+ struct perf_evlist *evlist = perf_evlist__new();
+
+ if (evlist && perf_evlist__add_default(evlist)) {
+ perf_evlist__delete(evlist);
+ evlist = NULL;
+ }
+
+ return evlist;
+}
+
+struct perf_evlist *perf_evlist__new_dummy(void)
+{
+ struct perf_evlist *evlist = perf_evlist__new();
+
+ if (evlist && perf_evlist__add_dummy(evlist)) {
+ perf_evlist__delete(evlist);
+ evlist = NULL;
+ }
+
+ return evlist;
+}
+
+/**
+ * perf_evlist__set_id_pos - set the positions of event ids.
+ * @evlist: selected event list
+ *
+ * Events with compatible sample types all have the same id_pos
+ * and is_pos. For convenience, put a copy on evlist.
+ */
+void perf_evlist__set_id_pos(struct perf_evlist *evlist)
+{
+ struct perf_evsel *first = perf_evlist__first(evlist);
+
+ evlist->id_pos = first->id_pos;
+ evlist->is_pos = first->is_pos;
+}
+
+static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ perf_evsel__calc_id_pos(evsel);
+
+ perf_evlist__set_id_pos(evlist);
+}
+
+static void perf_evlist__purge(struct perf_evlist *evlist)
+{
+ struct perf_evsel *pos, *n;
+
+ evlist__for_each_entry_safe(evlist, n, pos) {
+ list_del_init(&pos->node);
+ pos->evlist = NULL;
+ perf_evsel__delete(pos);
+ }
+
+ evlist->nr_entries = 0;
+}
+
+void perf_evlist__exit(struct perf_evlist *evlist)
+{
+ zfree(&evlist->mmap);
+ zfree(&evlist->overwrite_mmap);
+ fdarray__exit(&evlist->pollfd);
+}
+
+void perf_evlist__delete(struct perf_evlist *evlist)
+{
+ if (evlist == NULL)
+ return;
+
+ perf_evlist__munmap(evlist);
+ perf_evlist__close(evlist);
+ cpu_map__put(evlist->cpus);
+ thread_map__put(evlist->threads);
+ evlist->cpus = NULL;
+ evlist->threads = NULL;
+ perf_evlist__purge(evlist);
+ perf_evlist__exit(evlist);
+ free(evlist);
+}
+
+static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
+ struct perf_evsel *evsel)
+{
+ /*
+ * We already have cpus for evsel (via PMU sysfs) so
+ * keep it, if there's no target cpu list defined.
+ */
+ if (!evsel->own_cpus || evlist->has_user_cpus) {
+ cpu_map__put(evsel->cpus);
+ evsel->cpus = cpu_map__get(evlist->cpus);
+ } else if (evsel->cpus != evsel->own_cpus) {
+ cpu_map__put(evsel->cpus);
+ evsel->cpus = cpu_map__get(evsel->own_cpus);
+ }
+
+ thread_map__put(evsel->threads);
+ evsel->threads = thread_map__get(evlist->threads);
+}
+
+static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ __perf_evlist__propagate_maps(evlist, evsel);
+}
+
+void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry)
+{
+ entry->evlist = evlist;
+ list_add_tail(&entry->node, &evlist->entries);
+ entry->idx = evlist->nr_entries;
+ entry->tracking = !entry->idx;
+
+ if (!evlist->nr_entries++)
+ perf_evlist__set_id_pos(evlist);
+
+ __perf_evlist__propagate_maps(evlist, entry);
+}
+
+void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel)
+{
+ evsel->evlist = NULL;
+ list_del_init(&evsel->node);
+ evlist->nr_entries -= 1;
+}
+
+void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
+ struct list_head *list)
+{
+ struct perf_evsel *evsel, *temp;
+
+ __evlist__for_each_entry_safe(list, temp, evsel) {
+ list_del_init(&evsel->node);
+ perf_evlist__add(evlist, evsel);
+ }
+}
+
+void __perf_evlist__set_leader(struct list_head *list)
+{
+ struct perf_evsel *evsel, *leader;
+
+ leader = list_entry(list->next, struct perf_evsel, node);
+ evsel = list_entry(list->prev, struct perf_evsel, node);
+
+ leader->nr_members = evsel->idx - leader->idx + 1;
+
+ __evlist__for_each_entry(list, evsel) {
+ evsel->leader = leader;
+ }
+}
+
+void perf_evlist__set_leader(struct perf_evlist *evlist)
+{
+ if (evlist->nr_entries) {
+ evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0;
+ __perf_evlist__set_leader(&evlist->entries);
+ }
+}
+
+void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr)
+{
+ attr->precise_ip = 3;
+
+ while (attr->precise_ip != 0) {
+ int fd = sys_perf_event_open(attr, 0, -1, -1, 0);
+ if (fd != -1) {
+ close(fd);
+ break;
+ }
+ --attr->precise_ip;
+ }
+}
+
+int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise)
+{
+ struct perf_evsel *evsel = perf_evsel__new_cycles(precise);
+
+ if (evsel == NULL)
+ return -ENOMEM;
+
+ perf_evlist__add(evlist, evsel);
+ return 0;
+}
+
+int perf_evlist__add_dummy(struct perf_evlist *evlist)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_DUMMY,
+ .size = sizeof(attr), /* to capture ABI version */
+ };
+ struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries);
+
+ if (evsel == NULL)
+ return -ENOMEM;
+
+ perf_evlist__add(evlist, evsel);
+ return 0;
+}
+
+static int perf_evlist__add_attrs(struct perf_evlist *evlist,
+ struct perf_event_attr *attrs, size_t nr_attrs)
+{
+ struct perf_evsel *evsel, *n;
+ LIST_HEAD(head);
+ size_t i;
+
+ for (i = 0; i < nr_attrs; i++) {
+ evsel = perf_evsel__new_idx(attrs + i, evlist->nr_entries + i);
+ if (evsel == NULL)
+ goto out_delete_partial_list;
+ list_add_tail(&evsel->node, &head);
+ }
+
+ perf_evlist__splice_list_tail(evlist, &head);
+
+ return 0;
+
+out_delete_partial_list:
+ __evlist__for_each_entry_safe(&head, n, evsel)
+ perf_evsel__delete(evsel);
+ return -1;
+}
+
+int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
+ struct perf_event_attr *attrs, size_t nr_attrs)
+{
+ size_t i;
+
+ for (i = 0; i < nr_attrs; i++)
+ event_attr_init(attrs + i);
+
+ return perf_evlist__add_attrs(evlist, attrs, nr_attrs);
+}
+
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
+ (int)evsel->attr.config == id)
+ return evsel;
+ }
+
+ return NULL;
+}
+
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
+ const char *name)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if ((evsel->attr.type == PERF_TYPE_TRACEPOINT) &&
+ (strcmp(evsel->name, name) == 0))
+ return evsel;
+ }
+
+ return NULL;
+}
+
+int perf_evlist__add_newtp(struct perf_evlist *evlist,
+ const char *sys, const char *name, void *handler)
+{
+ struct perf_evsel *evsel = perf_evsel__newtp(sys, name);
+
+ if (IS_ERR(evsel))
+ return -1;
+
+ evsel->handler = handler;
+ perf_evlist__add(evlist, evsel);
+ return 0;
+}
+
+static int perf_evlist__nr_threads(struct perf_evlist *evlist,
+ struct perf_evsel *evsel)
+{
+ if (evsel->system_wide)
+ return 1;
+ else
+ return thread_map__nr(evlist->threads);
+}
+
+void perf_evlist__disable(struct perf_evlist *evlist)
+{
+ struct perf_evsel *pos;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+ continue;
+ perf_evsel__disable(pos);
+ }
+
+ evlist->enabled = false;
+}
+
+void perf_evlist__enable(struct perf_evlist *evlist)
+{
+ struct perf_evsel *pos;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (!perf_evsel__is_group_leader(pos) || !pos->fd)
+ continue;
+ perf_evsel__enable(pos);
+ }
+
+ evlist->enabled = true;
+}
+
+void perf_evlist__toggle_enable(struct perf_evlist *evlist)
+{
+ (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
+}
+
+static int perf_evlist__enable_event_cpu(struct perf_evlist *evlist,
+ struct perf_evsel *evsel, int cpu)
+{
+ int thread;
+ int nr_threads = perf_evlist__nr_threads(evlist, evsel);
+
+ if (!evsel->fd)
+ return -EINVAL;
+
+ for (thread = 0; thread < nr_threads; thread++) {
+ int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int perf_evlist__enable_event_thread(struct perf_evlist *evlist,
+ struct perf_evsel *evsel,
+ int thread)
+{
+ int cpu;
+ int nr_cpus = cpu_map__nr(evlist->cpus);
+
+ if (!evsel->fd)
+ return -EINVAL;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ int err = ioctl(FD(evsel, cpu, thread), PERF_EVENT_IOC_ENABLE, 0);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
+ struct perf_evsel *evsel, int idx)
+{
+ bool per_cpu_mmaps = !cpu_map__empty(evlist->cpus);
+
+ if (per_cpu_mmaps)
+ return perf_evlist__enable_event_cpu(evlist, evsel, idx);
+ else
+ return perf_evlist__enable_event_thread(evlist, evsel, idx);
+}
+
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
+{
+ int nr_cpus = cpu_map__nr(evlist->cpus);
+ int nr_threads = thread_map__nr(evlist->threads);
+ int nfds = 0;
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->system_wide)
+ nfds += nr_cpus;
+ else
+ nfds += nr_cpus * nr_threads;
+ }
+
+ if (fdarray__available_entries(&evlist->pollfd) < nfds &&
+ fdarray__grow(&evlist->pollfd, nfds) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int __perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
+ struct perf_mmap *map, short revent)
+{
+ int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
+ /*
+ * Save the idx so that when we filter out fds POLLHUP'ed we can
+ * close the associated evlist->mmap[] entry.
+ */
+ if (pos >= 0) {
+ evlist->pollfd.priv[pos].ptr = map;
+
+ fcntl(fd, F_SETFL, O_NONBLOCK);
+ }
+
+ return pos;
+}
+
+int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd)
+{
+ return __perf_evlist__add_pollfd(evlist, fd, NULL, POLLIN);
+}
+
+static void perf_evlist__munmap_filtered(struct fdarray *fda, int fd,
+ void *arg __maybe_unused)
+{
+ struct perf_mmap *map = fda->priv[fd].ptr;
+
+ if (map)
+ perf_mmap__put(map);
+}
+
+int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask)
+{
+ return fdarray__filter(&evlist->pollfd, revents_and_mask,
+ perf_evlist__munmap_filtered, NULL);
+}
+
+int perf_evlist__poll(struct perf_evlist *evlist, int timeout)
+{
+ return fdarray__poll(&evlist->pollfd, timeout);
+}
+
+static void perf_evlist__id_hash(struct perf_evlist *evlist,
+ struct perf_evsel *evsel,
+ int cpu, int thread, u64 id)
+{
+ int hash;
+ struct perf_sample_id *sid = SID(evsel, cpu, thread);
+
+ sid->id = id;
+ sid->evsel = evsel;
+ hash = hash_64(sid->id, PERF_EVLIST__HLIST_BITS);
+ hlist_add_head(&sid->node, &evlist->heads[hash]);
+}
+
+void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
+ int cpu, int thread, u64 id)
+{
+ perf_evlist__id_hash(evlist, evsel, cpu, thread, id);
+ evsel->id[evsel->ids++] = id;
+}
+
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+ struct perf_evsel *evsel,
+ int cpu, int thread, int fd)
+{
+ u64 read_data[4] = { 0, };
+ int id_idx = 1; /* The first entry is the counter value */
+ u64 id;
+ int ret;
+
+ ret = ioctl(fd, PERF_EVENT_IOC_ID, &id);
+ if (!ret)
+ goto add;
+
+ if (errno != ENOTTY)
+ return -1;
+
+ /* Legacy way to get event id.. All hail to old kernels! */
+
+ /*
+ * This way does not work with group format read, so bail
+ * out in that case.
+ */
+ if (perf_evlist__read_format(evlist) & PERF_FORMAT_GROUP)
+ return -1;
+
+ if (!(evsel->attr.read_format & PERF_FORMAT_ID) ||
+ read(fd, &read_data, sizeof(read_data)) == -1)
+ return -1;
+
+ if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ ++id_idx;
+ if (evsel->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ ++id_idx;
+
+ id = read_data[id_idx];
+
+ add:
+ perf_evlist__id_add(evlist, evsel, cpu, thread, id);
+ return 0;
+}
+
+static void perf_evlist__set_sid_idx(struct perf_evlist *evlist,
+ struct perf_evsel *evsel, int idx, int cpu,
+ int thread)
+{
+ struct perf_sample_id *sid = SID(evsel, cpu, thread);
+ sid->idx = idx;
+ if (evlist->cpus && cpu >= 0)
+ sid->cpu = evlist->cpus->map[cpu];
+ else
+ sid->cpu = -1;
+ if (!evsel->system_wide && evlist->threads && thread >= 0)
+ sid->tid = thread_map__pid(evlist->threads, thread);
+ else
+ sid->tid = -1;
+}
+
+struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id)
+{
+ struct hlist_head *head;
+ struct perf_sample_id *sid;
+ int hash;
+
+ hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
+ head = &evlist->heads[hash];
+
+ hlist_for_each_entry(sid, head, node)
+ if (sid->id == id)
+ return sid;
+
+ return NULL;
+}
+
+struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id)
+{
+ struct perf_sample_id *sid;
+
+ if (evlist->nr_entries == 1 || !id)
+ return perf_evlist__first(evlist);
+
+ sid = perf_evlist__id2sid(evlist, id);
+ if (sid)
+ return sid->evsel;
+
+ if (!perf_evlist__sample_id_all(evlist))
+ return perf_evlist__first(evlist);
+
+ return NULL;
+}
+
+struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
+ u64 id)
+{
+ struct perf_sample_id *sid;
+
+ if (!id)
+ return NULL;
+
+ sid = perf_evlist__id2sid(evlist, id);
+ if (sid)
+ return sid->evsel;
+
+ return NULL;
+}
+
+static int perf_evlist__event2id(struct perf_evlist *evlist,
+ union perf_event *event, u64 *id)
+{
+ const u64 *array = event->sample.array;
+ ssize_t n;
+
+ n = (event->header.size - sizeof(event->header)) >> 3;
+
+ if (event->header.type == PERF_RECORD_SAMPLE) {
+ if (evlist->id_pos >= n)
+ return -1;
+ *id = array[evlist->id_pos];
+ } else {
+ if (evlist->is_pos > n)
+ return -1;
+ n -= evlist->is_pos;
+ *id = array[n];
+ }
+ return 0;
+}
+
+struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
+ union perf_event *event)
+{
+ struct perf_evsel *first = perf_evlist__first(evlist);
+ struct hlist_head *head;
+ struct perf_sample_id *sid;
+ int hash;
+ u64 id;
+
+ if (evlist->nr_entries == 1)
+ return first;
+
+ if (!first->attr.sample_id_all &&
+ event->header.type != PERF_RECORD_SAMPLE)
+ return first;
+
+ if (perf_evlist__event2id(evlist, event, &id))
+ return NULL;
+
+ /* Synthesized events have an id of zero */
+ if (!id)
+ return first;
+
+ hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
+ head = &evlist->heads[hash];
+
+ hlist_for_each_entry(sid, head, node) {
+ if (sid->id == id)
+ return sid->evsel;
+ }
+ return NULL;
+}
+
+static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value)
+{
+ int i;
+
+ if (!evlist->overwrite_mmap)
+ return 0;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ int fd = evlist->overwrite_mmap[i].fd;
+ int err;
+
+ if (fd < 0)
+ continue;
+ err = ioctl(fd, PERF_EVENT_IOC_PAUSE_OUTPUT, value ? 1 : 0);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int perf_evlist__pause(struct perf_evlist *evlist)
+{
+ return perf_evlist__set_paused(evlist, true);
+}
+
+static int perf_evlist__resume(struct perf_evlist *evlist)
+{
+ return perf_evlist__set_paused(evlist, false);
+}
+
+static void perf_evlist__munmap_nofree(struct perf_evlist *evlist)
+{
+ int i;
+
+ if (evlist->mmap)
+ for (i = 0; i < evlist->nr_mmaps; i++)
+ perf_mmap__munmap(&evlist->mmap[i]);
+
+ if (evlist->overwrite_mmap)
+ for (i = 0; i < evlist->nr_mmaps; i++)
+ perf_mmap__munmap(&evlist->overwrite_mmap[i]);
+}
+
+void perf_evlist__munmap(struct perf_evlist *evlist)
+{
+ perf_evlist__munmap_nofree(evlist);
+ zfree(&evlist->mmap);
+ zfree(&evlist->overwrite_mmap);
+}
+
+static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist,
+ bool overwrite)
+{
+ int i;
+ struct perf_mmap *map;
+
+ evlist->nr_mmaps = cpu_map__nr(evlist->cpus);
+ if (cpu_map__empty(evlist->cpus))
+ evlist->nr_mmaps = thread_map__nr(evlist->threads);
+ map = zalloc(evlist->nr_mmaps * sizeof(struct perf_mmap));
+ if (!map)
+ return NULL;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ map[i].fd = -1;
+ map[i].overwrite = overwrite;
+ /*
+ * When the perf_mmap() call is made we grab one refcount, plus
+ * one extra to let perf_mmap__consume() get the last
+ * events after all real references (perf_mmap__get()) are
+ * dropped.
+ *
+ * Each PERF_EVENT_IOC_SET_OUTPUT points to this mmap and
+ * thus does perf_mmap__get() on it.
+ */
+ refcount_set(&map[i].refcnt, 0);
+ }
+ return map;
+}
+
+static bool
+perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused,
+ struct perf_evsel *evsel)
+{
+ if (evsel->attr.write_backward)
+ return false;
+ return true;
+}
+
+static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx,
+ struct mmap_params *mp, int cpu_idx,
+ int thread, int *_output, int *_output_overwrite)
+{
+ struct perf_evsel *evsel;
+ int revent;
+ int evlist_cpu = cpu_map__cpu(evlist->cpus, cpu_idx);
+
+ evlist__for_each_entry(evlist, evsel) {
+ struct perf_mmap *maps = evlist->mmap;
+ int *output = _output;
+ int fd;
+ int cpu;
+
+ mp->prot = PROT_READ | PROT_WRITE;
+ if (evsel->attr.write_backward) {
+ output = _output_overwrite;
+ maps = evlist->overwrite_mmap;
+
+ if (!maps) {
+ maps = perf_evlist__alloc_mmap(evlist, true);
+ if (!maps)
+ return -1;
+ evlist->overwrite_mmap = maps;
+ if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY)
+ perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING);
+ }
+ mp->prot &= ~PROT_WRITE;
+ }
+
+ if (evsel->system_wide && thread)
+ continue;
+
+ cpu = cpu_map__idx(evsel->cpus, evlist_cpu);
+ if (cpu == -1)
+ continue;
+
+ fd = FD(evsel, cpu, thread);
+
+ if (*output == -1) {
+ *output = fd;
+
+ if (perf_mmap__mmap(&maps[idx], mp, *output, evlist_cpu) < 0)
+ return -1;
+ } else {
+ if (ioctl(fd, PERF_EVENT_IOC_SET_OUTPUT, *output) != 0)
+ return -1;
+
+ perf_mmap__get(&maps[idx]);
+ }
+
+ revent = perf_evlist__should_poll(evlist, evsel) ? POLLIN : 0;
+
+ /*
+ * The system_wide flag causes a selected event to be opened
+ * always without a pid. Consequently it will never get a
+ * POLLHUP, but it is used for tracking in combination with
+ * other events, so it should not need to be polled anyway.
+ * Therefore don't add it for polling.
+ */
+ if (!evsel->system_wide &&
+ __perf_evlist__add_pollfd(evlist, fd, &maps[idx], revent) < 0) {
+ perf_mmap__put(&maps[idx]);
+ return -1;
+ }
+
+ if (evsel->attr.read_format & PERF_FORMAT_ID) {
+ if (perf_evlist__id_add_fd(evlist, evsel, cpu, thread,
+ fd) < 0)
+ return -1;
+ perf_evlist__set_sid_idx(evlist, evsel, idx, cpu,
+ thread);
+ }
+ }
+
+ return 0;
+}
+
+static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
+ struct mmap_params *mp)
+{
+ int cpu, thread;
+ int nr_cpus = cpu_map__nr(evlist->cpus);
+ int nr_threads = thread_map__nr(evlist->threads);
+
+ pr_debug2("perf event ring buffer mmapped per cpu\n");
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ int output = -1;
+ int output_overwrite = -1;
+
+ auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
+ true);
+
+ for (thread = 0; thread < nr_threads; thread++) {
+ if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
+ thread, &output, &output_overwrite))
+ goto out_unmap;
+ }
+ }
+
+ return 0;
+
+out_unmap:
+ perf_evlist__munmap_nofree(evlist);
+ return -1;
+}
+
+static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
+ struct mmap_params *mp)
+{
+ int thread;
+ int nr_threads = thread_map__nr(evlist->threads);
+
+ pr_debug2("perf event ring buffer mmapped per thread\n");
+ for (thread = 0; thread < nr_threads; thread++) {
+ int output = -1;
+ int output_overwrite = -1;
+
+ auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
+ false);
+
+ if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
+ &output, &output_overwrite))
+ goto out_unmap;
+ }
+
+ return 0;
+
+out_unmap:
+ perf_evlist__munmap_nofree(evlist);
+ return -1;
+}
+
+unsigned long perf_event_mlock_kb_in_pages(void)
+{
+ unsigned long pages;
+ int max;
+
+ if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
+ /*
+ * Pick a once upon a time good value, i.e. things look
+ * strange since we can't read a sysctl value, but lets not
+ * die yet...
+ */
+ max = 512;
+ } else {
+ max -= (page_size / 1024);
+ }
+
+ pages = (max * 1024) / page_size;
+ if (!is_power_of_2(pages))
+ pages = rounddown_pow_of_two(pages);
+
+ return pages;
+}
+
+size_t perf_evlist__mmap_size(unsigned long pages)
+{
+ if (pages == UINT_MAX)
+ pages = perf_event_mlock_kb_in_pages();
+ else if (!is_power_of_2(pages))
+ return 0;
+
+ return (pages + 1) * page_size;
+}
+
+static long parse_pages_arg(const char *str, unsigned long min,
+ unsigned long max)
+{
+ unsigned long pages, val;
+ static struct parse_tag tags[] = {
+ { .tag = 'B', .mult = 1 },
+ { .tag = 'K', .mult = 1 << 10 },
+ { .tag = 'M', .mult = 1 << 20 },
+ { .tag = 'G', .mult = 1 << 30 },
+ { .tag = 0 },
+ };
+
+ if (str == NULL)
+ return -EINVAL;
+
+ val = parse_tag_value(str, tags);
+ if (val != (unsigned long) -1) {
+ /* we got file size value */
+ pages = PERF_ALIGN(val, page_size) / page_size;
+ } else {
+ /* we got pages count value */
+ char *eptr;
+ pages = strtoul(str, &eptr, 10);
+ if (*eptr != '\0')
+ return -EINVAL;
+ }
+
+ if (pages == 0 && min == 0) {
+ /* leave number of pages at 0 */
+ } else if (!is_power_of_2(pages)) {
+ char buf[100];
+
+ /* round pages up to next power of 2 */
+ pages = roundup_pow_of_two(pages);
+ if (!pages)
+ return -EINVAL;
+
+ unit_number__scnprintf(buf, sizeof(buf), pages * page_size);
+ pr_info("rounding mmap pages size to %s (%lu pages)\n",
+ buf, pages);
+ }
+
+ if (pages > max)
+ return -EINVAL;
+
+ return pages;
+}
+
+int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
+{
+ unsigned long max = UINT_MAX;
+ long pages;
+
+ if (max > SIZE_MAX / page_size)
+ max = SIZE_MAX / page_size;
+
+ pages = parse_pages_arg(str, 1, max);
+ if (pages < 0) {
+ pr_err("Invalid argument for --mmap_pages/-m\n");
+ return -1;
+ }
+
+ *mmap_pages = pages;
+ return 0;
+}
+
+int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ return __perf_evlist__parse_mmap_pages(opt->value, str);
+}
+
+/**
+ * perf_evlist__mmap_ex - Create mmaps to receive events.
+ * @evlist: list of events
+ * @pages: map length in pages
+ * @overwrite: overwrite older events?
+ * @auxtrace_pages - auxtrace map length in pages
+ * @auxtrace_overwrite - overwrite older auxtrace data?
+ *
+ * If @overwrite is %false the user needs to signal event consumption using
+ * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this
+ * automatically.
+ *
+ * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
+ * consumption using auxtrace_mmap__write_tail().
+ *
+ * Return: %0 on success, negative error code otherwise.
+ */
+int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
+ unsigned int auxtrace_pages,
+ bool auxtrace_overwrite)
+{
+ struct perf_evsel *evsel;
+ const struct cpu_map *cpus = evlist->cpus;
+ const struct thread_map *threads = evlist->threads;
+ /*
+ * Delay setting mp.prot: set it before calling perf_mmap__mmap.
+ * Its value is decided by evsel's write_backward.
+ * So &mp should not be passed through const pointer.
+ */
+ struct mmap_params mp;
+
+ if (!evlist->mmap)
+ evlist->mmap = perf_evlist__alloc_mmap(evlist, false);
+ if (!evlist->mmap)
+ return -ENOMEM;
+
+ if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0)
+ return -ENOMEM;
+
+ evlist->mmap_len = perf_evlist__mmap_size(pages);
+ pr_debug("mmap size %zuB\n", evlist->mmap_len);
+ mp.mask = evlist->mmap_len - page_size - 1;
+
+ auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
+ auxtrace_pages, auxtrace_overwrite);
+
+ evlist__for_each_entry(evlist, evsel) {
+ if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
+ evsel->sample_id == NULL &&
+ perf_evsel__alloc_id(evsel, cpu_map__nr(cpus), threads->nr) < 0)
+ return -ENOMEM;
+ }
+
+ if (cpu_map__empty(cpus))
+ return perf_evlist__mmap_per_thread(evlist, &mp);
+
+ return perf_evlist__mmap_per_cpu(evlist, &mp);
+}
+
+int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages)
+{
+ return perf_evlist__mmap_ex(evlist, pages, 0, false);
+}
+
+int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
+{
+ bool all_threads = (target->per_thread && target->system_wide);
+ struct cpu_map *cpus;
+ struct thread_map *threads;
+
+ /*
+ * If specify '-a' and '--per-thread' to perf record, perf record
+ * will override '--per-thread'. target->per_thread = false and
+ * target->system_wide = true.
+ *
+ * If specify '--per-thread' only to perf record,
+ * target->per_thread = true and target->system_wide = false.
+ *
+ * So target->per_thread && target->system_wide is false.
+ * For perf record, thread_map__new_str doesn't call
+ * thread_map__new_all_cpus. That will keep perf record's
+ * current behavior.
+ *
+ * For perf stat, it allows the case that target->per_thread and
+ * target->system_wide are all true. It means to collect system-wide
+ * per-thread data. thread_map__new_str will call
+ * thread_map__new_all_cpus to enumerate all threads.
+ */
+ threads = thread_map__new_str(target->pid, target->tid, target->uid,
+ all_threads);
+
+ if (!threads)
+ return -1;
+
+ if (target__uses_dummy_map(target))
+ cpus = cpu_map__dummy_new();
+ else
+ cpus = cpu_map__new(target->cpu_list);
+
+ if (!cpus)
+ goto out_delete_threads;
+
+ evlist->has_user_cpus = !!target->cpu_list;
+
+ perf_evlist__set_maps(evlist, cpus, threads);
+
+ return 0;
+
+out_delete_threads:
+ thread_map__put(threads);
+ return -1;
+}
+
+void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
+ struct thread_map *threads)
+{
+ /*
+ * Allow for the possibility that one or another of the maps isn't being
+ * changed i.e. don't put it. Note we are assuming the maps that are
+ * being applied are brand new and evlist is taking ownership of the
+ * original reference count of 1. If that is not the case it is up to
+ * the caller to increase the reference count.
+ */
+ if (cpus != evlist->cpus) {
+ cpu_map__put(evlist->cpus);
+ evlist->cpus = cpu_map__get(cpus);
+ }
+
+ if (threads != evlist->threads) {
+ thread_map__put(evlist->threads);
+ evlist->threads = thread_map__get(threads);
+ }
+
+ perf_evlist__propagate_maps(evlist);
+}
+
+void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
+ enum perf_event_sample_format bit)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ __perf_evsel__set_sample_bit(evsel, bit);
+}
+
+void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
+ enum perf_event_sample_format bit)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ __perf_evsel__reset_sample_bit(evsel, bit);
+}
+
+int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel)
+{
+ struct perf_evsel *evsel;
+ int err = 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->filter == NULL)
+ continue;
+
+ /*
+ * filters only work for tracepoint event, which doesn't have cpu limit.
+ * So evlist and evsel should always be same.
+ */
+ err = perf_evsel__apply_filter(evsel, evsel->filter);
+ if (err) {
+ *err_evsel = evsel;
+ break;
+ }
+ }
+
+ return err;
+}
+
+int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter)
+{
+ struct perf_evsel *evsel;
+ int err = 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+
+ err = perf_evsel__set_filter(evsel, filter);
+ if (err)
+ break;
+ }
+
+ return err;
+}
+
+int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids)
+{
+ char *filter;
+ int ret = -1;
+ size_t i;
+
+ for (i = 0; i < npids; ++i) {
+ if (i == 0) {
+ if (asprintf(&filter, "common_pid != %d", pids[i]) < 0)
+ return -1;
+ } else {
+ char *tmp;
+
+ if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0)
+ goto out_free;
+
+ free(filter);
+ filter = tmp;
+ }
+ }
+
+ ret = perf_evlist__set_filter(evlist, filter);
+out_free:
+ free(filter);
+ return ret;
+}
+
+int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid)
+{
+ return perf_evlist__set_filter_pids(evlist, 1, &pid);
+}
+
+bool perf_evlist__valid_sample_type(struct perf_evlist *evlist)
+{
+ struct perf_evsel *pos;
+
+ if (evlist->nr_entries == 1)
+ return true;
+
+ if (evlist->id_pos < 0 || evlist->is_pos < 0)
+ return false;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (pos->id_pos != evlist->id_pos ||
+ pos->is_pos != evlist->is_pos)
+ return false;
+ }
+
+ return true;
+}
+
+u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ if (evlist->combined_sample_type)
+ return evlist->combined_sample_type;
+
+ evlist__for_each_entry(evlist, evsel)
+ evlist->combined_sample_type |= evsel->attr.sample_type;
+
+ return evlist->combined_sample_type;
+}
+
+u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist)
+{
+ evlist->combined_sample_type = 0;
+ return __perf_evlist__combined_sample_type(evlist);
+}
+
+u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ u64 branch_type = 0;
+
+ evlist__for_each_entry(evlist, evsel)
+ branch_type |= evsel->attr.branch_sample_type;
+ return branch_type;
+}
+
+bool perf_evlist__valid_read_format(struct perf_evlist *evlist)
+{
+ struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
+ u64 read_format = first->attr.read_format;
+ u64 sample_type = first->attr.sample_type;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (read_format != pos->attr.read_format)
+ return false;
+ }
+
+ /* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
+ if ((sample_type & PERF_SAMPLE_READ) &&
+ !(read_format & PERF_FORMAT_ID)) {
+ return false;
+ }
+
+ return true;
+}
+
+u64 perf_evlist__read_format(struct perf_evlist *evlist)
+{
+ struct perf_evsel *first = perf_evlist__first(evlist);
+ return first->attr.read_format;
+}
+
+u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist)
+{
+ struct perf_evsel *first = perf_evlist__first(evlist);
+ struct perf_sample *data;
+ u64 sample_type;
+ u16 size = 0;
+
+ if (!first->attr.sample_id_all)
+ goto out;
+
+ sample_type = first->attr.sample_type;
+
+ if (sample_type & PERF_SAMPLE_TID)
+ size += sizeof(data->tid) * 2;
+
+ if (sample_type & PERF_SAMPLE_TIME)
+ size += sizeof(data->time);
+
+ if (sample_type & PERF_SAMPLE_ID)
+ size += sizeof(data->id);
+
+ if (sample_type & PERF_SAMPLE_STREAM_ID)
+ size += sizeof(data->stream_id);
+
+ if (sample_type & PERF_SAMPLE_CPU)
+ size += sizeof(data->cpu) * 2;
+
+ if (sample_type & PERF_SAMPLE_IDENTIFIER)
+ size += sizeof(data->id);
+out:
+ return size;
+}
+
+bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist)
+{
+ struct perf_evsel *first = perf_evlist__first(evlist), *pos = first;
+
+ evlist__for_each_entry_continue(evlist, pos) {
+ if (first->attr.sample_id_all != pos->attr.sample_id_all)
+ return false;
+ }
+
+ return true;
+}
+
+bool perf_evlist__sample_id_all(struct perf_evlist *evlist)
+{
+ struct perf_evsel *first = perf_evlist__first(evlist);
+ return first->attr.sample_id_all;
+}
+
+void perf_evlist__set_selected(struct perf_evlist *evlist,
+ struct perf_evsel *evsel)
+{
+ evlist->selected = evsel;
+}
+
+void perf_evlist__close(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry_reverse(evlist, evsel)
+ perf_evsel__close(evsel);
+}
+
+static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist)
+{
+ struct cpu_map *cpus;
+ struct thread_map *threads;
+ int err = -ENOMEM;
+
+ /*
+ * Try reading /sys/devices/system/cpu/online to get
+ * an all cpus map.
+ *
+ * FIXME: -ENOMEM is the best we can do here, the cpu_map
+ * code needs an overhaul to properly forward the
+ * error, and we may not want to do that fallback to a
+ * default cpu identity map :-\
+ */
+ cpus = cpu_map__new(NULL);
+ if (!cpus)
+ goto out;
+
+ threads = thread_map__new_dummy();
+ if (!threads)
+ goto out_put;
+
+ perf_evlist__set_maps(evlist, cpus, threads);
+out:
+ return err;
+out_put:
+ cpu_map__put(cpus);
+ goto out;
+}
+
+int perf_evlist__open(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ int err;
+
+ /*
+ * Default: one fd per CPU, all threads, aka systemwide
+ * as sys_perf_event_open(cpu = -1, thread = -1) is EINVAL
+ */
+ if (evlist->threads == NULL && evlist->cpus == NULL) {
+ err = perf_evlist__create_syswide_maps(evlist);
+ if (err < 0)
+ goto out_err;
+ }
+
+ perf_evlist__update_id_pos(evlist);
+
+ evlist__for_each_entry(evlist, evsel) {
+ err = perf_evsel__open(evsel, evsel->cpus, evsel->threads);
+ if (err < 0)
+ goto out_err;
+ }
+
+ return 0;
+out_err:
+ perf_evlist__close(evlist);
+ errno = -err;
+ return err;
+}
+
+int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *target,
+ const char *argv[], bool pipe_output,
+ void (*exec_error)(int signo, siginfo_t *info, void *ucontext))
+{
+ int child_ready_pipe[2], go_pipe[2];
+ char bf;
+
+ if (pipe(child_ready_pipe) < 0) {
+ perror("failed to create 'ready' pipe");
+ return -1;
+ }
+
+ if (pipe(go_pipe) < 0) {
+ perror("failed to create 'go' pipe");
+ goto out_close_ready_pipe;
+ }
+
+ evlist->workload.pid = fork();
+ if (evlist->workload.pid < 0) {
+ perror("failed to fork");
+ goto out_close_pipes;
+ }
+
+ if (!evlist->workload.pid) {
+ int ret;
+
+ if (pipe_output)
+ dup2(2, 1);
+
+ signal(SIGTERM, SIG_DFL);
+
+ close(child_ready_pipe[0]);
+ close(go_pipe[1]);
+ fcntl(go_pipe[0], F_SETFD, FD_CLOEXEC);
+
+ /*
+ * Tell the parent we're ready to go
+ */
+ close(child_ready_pipe[1]);
+
+ /*
+ * Wait until the parent tells us to go.
+ */
+ ret = read(go_pipe[0], &bf, 1);
+ /*
+ * The parent will ask for the execvp() to be performed by
+ * writing exactly one byte, in workload.cork_fd, usually via
+ * perf_evlist__start_workload().
+ *
+ * For cancelling the workload without actually running it,
+ * the parent will just close workload.cork_fd, without writing
+ * anything, i.e. read will return zero and we just exit()
+ * here.
+ */
+ if (ret != 1) {
+ if (ret == -1)
+ perror("unable to read pipe");
+ exit(ret);
+ }
+
+ execvp(argv[0], (char **)argv);
+
+ if (exec_error) {
+ union sigval val;
+
+ val.sival_int = errno;
+ if (sigqueue(getppid(), SIGUSR1, val))
+ perror(argv[0]);
+ } else
+ perror(argv[0]);
+ exit(-1);
+ }
+
+ if (exec_error) {
+ struct sigaction act = {
+ .sa_flags = SA_SIGINFO,
+ .sa_sigaction = exec_error,
+ };
+ sigaction(SIGUSR1, &act, NULL);
+ }
+
+ if (target__none(target)) {
+ if (evlist->threads == NULL) {
+ fprintf(stderr, "FATAL: evlist->threads need to be set at this point (%s:%d).\n",
+ __func__, __LINE__);
+ goto out_close_pipes;
+ }
+ thread_map__set_pid(evlist->threads, 0, evlist->workload.pid);
+ }
+
+ close(child_ready_pipe[1]);
+ close(go_pipe[0]);
+ /*
+ * wait for child to settle
+ */
+ if (read(child_ready_pipe[0], &bf, 1) == -1) {
+ perror("unable to read pipe");
+ goto out_close_pipes;
+ }
+
+ fcntl(go_pipe[1], F_SETFD, FD_CLOEXEC);
+ evlist->workload.cork_fd = go_pipe[1];
+ close(child_ready_pipe[0]);
+ return 0;
+
+out_close_pipes:
+ close(go_pipe[0]);
+ close(go_pipe[1]);
+out_close_ready_pipe:
+ close(child_ready_pipe[0]);
+ close(child_ready_pipe[1]);
+ return -1;
+}
+
+int perf_evlist__start_workload(struct perf_evlist *evlist)
+{
+ if (evlist->workload.cork_fd > 0) {
+ char bf = 0;
+ int ret;
+ /*
+ * Remove the cork, let it rip!
+ */
+ ret = write(evlist->workload.cork_fd, &bf, 1);
+ if (ret < 0)
+ perror("unable to write to pipe");
+
+ close(evlist->workload.cork_fd);
+ return ret;
+ }
+
+ return 0;
+}
+
+int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
+
+ if (!evsel)
+ return -EFAULT;
+ return perf_evsel__parse_sample(evsel, event, sample);
+}
+
+int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist,
+ union perf_event *event,
+ u64 *timestamp)
+{
+ struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event);
+
+ if (!evsel)
+ return -EFAULT;
+ return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
+}
+
+size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp)
+{
+ struct perf_evsel *evsel;
+ size_t printed = 0;
+
+ evlist__for_each_entry(evlist, evsel) {
+ printed += fprintf(fp, "%s%s", evsel->idx ? ", " : "",
+ perf_evsel__name(evsel));
+ }
+
+ return printed + fprintf(fp, "\n");
+}
+
+int perf_evlist__strerror_open(struct perf_evlist *evlist,
+ int err, char *buf, size_t size)
+{
+ int printed, value;
+ char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
+
+ switch (err) {
+ case EACCES:
+ case EPERM:
+ printed = scnprintf(buf, size,
+ "Error:\t%s.\n"
+ "Hint:\tCheck /proc/sys/kernel/perf_event_paranoid setting.", emsg);
+
+ value = perf_event_paranoid();
+
+ printed += scnprintf(buf + printed, size - printed, "\nHint:\t");
+
+ if (value >= 2) {
+ printed += scnprintf(buf + printed, size - printed,
+ "For your workloads it needs to be <= 1\nHint:\t");
+ }
+ printed += scnprintf(buf + printed, size - printed,
+ "For system wide tracing it needs to be set to -1.\n");
+
+ printed += scnprintf(buf + printed, size - printed,
+ "Hint:\tTry: 'sudo sh -c \"echo -1 > /proc/sys/kernel/perf_event_paranoid\"'\n"
+ "Hint:\tThe current value is %d.", value);
+ break;
+ case EINVAL: {
+ struct perf_evsel *first = perf_evlist__first(evlist);
+ int max_freq;
+
+ if (sysctl__read_int("kernel/perf_event_max_sample_rate", &max_freq) < 0)
+ goto out_default;
+
+ if (first->attr.sample_freq < (u64)max_freq)
+ goto out_default;
+
+ printed = scnprintf(buf, size,
+ "Error:\t%s.\n"
+ "Hint:\tCheck /proc/sys/kernel/perf_event_max_sample_rate.\n"
+ "Hint:\tThe current value is %d and %" PRIu64 " is being requested.",
+ emsg, max_freq, first->attr.sample_freq);
+ break;
+ }
+ default:
+out_default:
+ scnprintf(buf, size, "%s", emsg);
+ break;
+ }
+
+ return 0;
+}
+
+int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
+{
+ char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
+ int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
+
+ switch (err) {
+ case EPERM:
+ sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
+ printed += scnprintf(buf + printed, size - printed,
+ "Error:\t%s.\n"
+ "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
+ "Hint:\tTried using %zd kB.\n",
+ emsg, pages_max_per_user, pages_attempted);
+
+ if (pages_attempted >= pages_max_per_user) {
+ printed += scnprintf(buf + printed, size - printed,
+ "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
+ pages_max_per_user + pages_attempted);
+ }
+
+ printed += scnprintf(buf + printed, size - printed,
+ "Hint:\tTry using a smaller -m/--mmap-pages value.");
+ break;
+ default:
+ scnprintf(buf, size, "%s", emsg);
+ break;
+ }
+
+ return 0;
+}
+
+void perf_evlist__to_front(struct perf_evlist *evlist,
+ struct perf_evsel *move_evsel)
+{
+ struct perf_evsel *evsel, *n;
+ LIST_HEAD(move);
+
+ if (move_evsel == perf_evlist__first(evlist))
+ return;
+
+ evlist__for_each_entry_safe(evlist, n, evsel) {
+ if (evsel->leader == move_evsel->leader)
+ list_move_tail(&evsel->node, &move);
+ }
+
+ list_splice(&move, &evlist->entries);
+}
+
+void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
+ struct perf_evsel *tracking_evsel)
+{
+ struct perf_evsel *evsel;
+
+ if (tracking_evsel->tracking)
+ return;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel != tracking_evsel)
+ evsel->tracking = false;
+ }
+
+ tracking_evsel->tracking = true;
+}
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_str(struct perf_evlist *evlist,
+ const char *str)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (!evsel->name)
+ continue;
+ if (strcmp(str, evsel->name) == 0)
+ return evsel;
+ }
+
+ return NULL;
+}
+
+void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist,
+ enum bkw_mmap_state state)
+{
+ enum bkw_mmap_state old_state = evlist->bkw_mmap_state;
+ enum action {
+ NONE,
+ PAUSE,
+ RESUME,
+ } action = NONE;
+
+ if (!evlist->overwrite_mmap)
+ return;
+
+ switch (old_state) {
+ case BKW_MMAP_NOTREADY: {
+ if (state != BKW_MMAP_RUNNING)
+ goto state_err;
+ break;
+ }
+ case BKW_MMAP_RUNNING: {
+ if (state != BKW_MMAP_DATA_PENDING)
+ goto state_err;
+ action = PAUSE;
+ break;
+ }
+ case BKW_MMAP_DATA_PENDING: {
+ if (state != BKW_MMAP_EMPTY)
+ goto state_err;
+ break;
+ }
+ case BKW_MMAP_EMPTY: {
+ if (state != BKW_MMAP_RUNNING)
+ goto state_err;
+ action = RESUME;
+ break;
+ }
+ default:
+ WARN_ONCE(1, "Shouldn't get there\n");
+ }
+
+ evlist->bkw_mmap_state = state;
+
+ switch (action) {
+ case PAUSE:
+ perf_evlist__pause(evlist);
+ break;
+ case RESUME:
+ perf_evlist__resume(evlist);
+ break;
+ case NONE:
+ default:
+ break;
+ }
+
+state_err:
+ return;
+}
+
+bool perf_evlist__exclude_kernel(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (!evsel->attr.exclude_kernel)
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Events in data file are not collect in groups, but we still want
+ * the group display. Set the artificial group and set the leader's
+ * forced_leader flag to notify the display code.
+ */
+void perf_evlist__force_leader(struct perf_evlist *evlist)
+{
+ if (!evlist->nr_groups) {
+ struct perf_evsel *leader = perf_evlist__first(evlist);
+
+ perf_evlist__set_leader(evlist);
+ leader->forced_leader = true;
+ }
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
new file mode 100644
index 000000000..dc66436ad
--- /dev/null
+++ b/tools/perf/util/evlist.h
@@ -0,0 +1,315 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_EVLIST_H
+#define __PERF_EVLIST_H 1
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/refcount.h>
+#include <linux/list.h>
+#include <api/fd/array.h>
+#include <stdio.h>
+#include "../perf.h"
+#include "event.h"
+#include "evsel.h"
+#include "mmap.h"
+#include "util.h"
+#include <signal.h>
+#include <unistd.h>
+
+struct pollfd;
+struct thread_map;
+struct cpu_map;
+struct record_opts;
+
+#define PERF_EVLIST__HLIST_BITS 8
+#define PERF_EVLIST__HLIST_SIZE (1 << PERF_EVLIST__HLIST_BITS)
+
+struct perf_evlist {
+ struct list_head entries;
+ struct hlist_head heads[PERF_EVLIST__HLIST_SIZE];
+ int nr_entries;
+ int nr_groups;
+ int nr_mmaps;
+ bool enabled;
+ bool has_user_cpus;
+ size_t mmap_len;
+ int id_pos;
+ int is_pos;
+ u64 combined_sample_type;
+ enum bkw_mmap_state bkw_mmap_state;
+ struct {
+ int cork_fd;
+ pid_t pid;
+ } workload;
+ struct fdarray pollfd;
+ struct perf_mmap *mmap;
+ struct perf_mmap *overwrite_mmap;
+ struct thread_map *threads;
+ struct cpu_map *cpus;
+ struct perf_evsel *selected;
+ struct events_stats stats;
+ struct perf_env *env;
+ u64 first_sample_time;
+ u64 last_sample_time;
+};
+
+struct perf_evsel_str_handler {
+ const char *name;
+ void *handler;
+};
+
+struct perf_evlist *perf_evlist__new(void);
+struct perf_evlist *perf_evlist__new_default(void);
+struct perf_evlist *perf_evlist__new_dummy(void);
+void perf_evlist__init(struct perf_evlist *evlist, struct cpu_map *cpus,
+ struct thread_map *threads);
+void perf_evlist__exit(struct perf_evlist *evlist);
+void perf_evlist__delete(struct perf_evlist *evlist);
+
+void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry);
+void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel);
+
+int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise);
+
+static inline int perf_evlist__add_default(struct perf_evlist *evlist)
+{
+ return __perf_evlist__add_default(evlist, true);
+}
+
+int __perf_evlist__add_default_attrs(struct perf_evlist *evlist,
+ struct perf_event_attr *attrs, size_t nr_attrs);
+
+#define perf_evlist__add_default_attrs(evlist, array) \
+ __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
+
+int perf_evlist__add_dummy(struct perf_evlist *evlist);
+
+int perf_evlist__add_newtp(struct perf_evlist *evlist,
+ const char *sys, const char *name, void *handler);
+
+void __perf_evlist__set_sample_bit(struct perf_evlist *evlist,
+ enum perf_event_sample_format bit);
+void __perf_evlist__reset_sample_bit(struct perf_evlist *evlist,
+ enum perf_event_sample_format bit);
+
+#define perf_evlist__set_sample_bit(evlist, bit) \
+ __perf_evlist__set_sample_bit(evlist, PERF_SAMPLE_##bit)
+
+#define perf_evlist__reset_sample_bit(evlist, bit) \
+ __perf_evlist__reset_sample_bit(evlist, PERF_SAMPLE_##bit)
+
+int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter);
+int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid);
+int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids);
+
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id);
+
+struct perf_evsel *
+perf_evlist__find_tracepoint_by_name(struct perf_evlist *evlist,
+ const char *name);
+
+void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel,
+ int cpu, int thread, u64 id);
+int perf_evlist__id_add_fd(struct perf_evlist *evlist,
+ struct perf_evsel *evsel,
+ int cpu, int thread, int fd);
+
+int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd);
+int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
+int perf_evlist__filter_pollfd(struct perf_evlist *evlist, short revents_and_mask);
+
+int perf_evlist__poll(struct perf_evlist *evlist, int timeout);
+
+struct perf_evsel *perf_evlist__id2evsel(struct perf_evlist *evlist, u64 id);
+struct perf_evsel *perf_evlist__id2evsel_strict(struct perf_evlist *evlist,
+ u64 id);
+
+struct perf_sample_id *perf_evlist__id2sid(struct perf_evlist *evlist, u64 id);
+
+void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, enum bkw_mmap_state state);
+
+void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx);
+
+int perf_evlist__open(struct perf_evlist *evlist);
+void perf_evlist__close(struct perf_evlist *evlist);
+
+struct callchain_param;
+
+void perf_evlist__set_id_pos(struct perf_evlist *evlist);
+bool perf_can_sample_identifier(void);
+bool perf_can_record_switch_events(void);
+bool perf_can_record_cpu_wide(void);
+void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
+ struct callchain_param *callchain);
+int record_opts__config(struct record_opts *opts);
+
+int perf_evlist__prepare_workload(struct perf_evlist *evlist,
+ struct target *target,
+ const char *argv[], bool pipe_output,
+ void (*exec_error)(int signo, siginfo_t *info,
+ void *ucontext));
+int perf_evlist__start_workload(struct perf_evlist *evlist);
+
+struct option;
+
+int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
+int perf_evlist__parse_mmap_pages(const struct option *opt,
+ const char *str,
+ int unset);
+
+unsigned long perf_event_mlock_kb_in_pages(void);
+
+int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
+ unsigned int auxtrace_pages,
+ bool auxtrace_overwrite);
+int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages);
+void perf_evlist__munmap(struct perf_evlist *evlist);
+
+size_t perf_evlist__mmap_size(unsigned long pages);
+
+void perf_evlist__disable(struct perf_evlist *evlist);
+void perf_evlist__enable(struct perf_evlist *evlist);
+void perf_evlist__toggle_enable(struct perf_evlist *evlist);
+
+int perf_evlist__enable_event_idx(struct perf_evlist *evlist,
+ struct perf_evsel *evsel, int idx);
+
+void perf_evlist__set_selected(struct perf_evlist *evlist,
+ struct perf_evsel *evsel);
+
+void perf_evlist__set_maps(struct perf_evlist *evlist, struct cpu_map *cpus,
+ struct thread_map *threads);
+int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target);
+int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel);
+
+void __perf_evlist__set_leader(struct list_head *list);
+void perf_evlist__set_leader(struct perf_evlist *evlist);
+
+u64 perf_evlist__read_format(struct perf_evlist *evlist);
+u64 __perf_evlist__combined_sample_type(struct perf_evlist *evlist);
+u64 perf_evlist__combined_sample_type(struct perf_evlist *evlist);
+u64 perf_evlist__combined_branch_type(struct perf_evlist *evlist);
+bool perf_evlist__sample_id_all(struct perf_evlist *evlist);
+u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist);
+
+int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event,
+ struct perf_sample *sample);
+
+int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist,
+ union perf_event *event,
+ u64 *timestamp);
+
+bool perf_evlist__valid_sample_type(struct perf_evlist *evlist);
+bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist);
+bool perf_evlist__valid_read_format(struct perf_evlist *evlist);
+
+void perf_evlist__splice_list_tail(struct perf_evlist *evlist,
+ struct list_head *list);
+
+static inline bool perf_evlist__empty(struct perf_evlist *evlist)
+{
+ return list_empty(&evlist->entries);
+}
+
+static inline struct perf_evsel *perf_evlist__first(struct perf_evlist *evlist)
+{
+ return list_entry(evlist->entries.next, struct perf_evsel, node);
+}
+
+static inline struct perf_evsel *perf_evlist__last(struct perf_evlist *evlist)
+{
+ return list_entry(evlist->entries.prev, struct perf_evsel, node);
+}
+
+size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp);
+
+int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size);
+int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size);
+
+bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str);
+void perf_evlist__to_front(struct perf_evlist *evlist,
+ struct perf_evsel *move_evsel);
+
+/**
+ * __evlist__for_each_entry - iterate thru all the evsels
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry(list, evsel) \
+ list_for_each_entry(evsel, list, node)
+
+/**
+ * evlist__for_each_entry - iterate thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry(evlist, evsel) \
+ __evlist__for_each_entry(&(evlist)->entries, evsel)
+
+/**
+ * __evlist__for_each_entry_continue - continue iteration thru all the evsels
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_continue(list, evsel) \
+ list_for_each_entry_continue(evsel, list, node)
+
+/**
+ * evlist__for_each_entry_continue - continue iteration thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry_continue(evlist, evsel) \
+ __evlist__for_each_entry_continue(&(evlist)->entries, evsel)
+
+/**
+ * __evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
+ * @list: list_head instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_reverse(list, evsel) \
+ list_for_each_entry_reverse(evsel, list, node)
+
+/**
+ * evlist__for_each_entry_reverse - iterate thru all the evsels in reverse order
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ */
+#define evlist__for_each_entry_reverse(evlist, evsel) \
+ __evlist__for_each_entry_reverse(&(evlist)->entries, evsel)
+
+/**
+ * __evlist__for_each_entry_safe - safely iterate thru all the evsels
+ * @list: list_head instance to iterate
+ * @tmp: struct evsel temp iterator
+ * @evsel: struct evsel iterator
+ */
+#define __evlist__for_each_entry_safe(list, tmp, evsel) \
+ list_for_each_entry_safe(evsel, tmp, list, node)
+
+/**
+ * evlist__for_each_entry_safe - safely iterate thru all the evsels
+ * @evlist: evlist instance to iterate
+ * @evsel: struct evsel iterator
+ * @tmp: struct evsel temp iterator
+ */
+#define evlist__for_each_entry_safe(evlist, tmp, evsel) \
+ __evlist__for_each_entry_safe(&(evlist)->entries, tmp, evsel)
+
+void perf_evlist__set_tracking_event(struct perf_evlist *evlist,
+ struct perf_evsel *tracking_evsel);
+
+void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr);
+
+struct perf_evsel *
+perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str);
+
+struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
+ union perf_event *event);
+
+bool perf_evlist__exclude_kernel(struct perf_evlist *evlist);
+
+void perf_evlist__force_leader(struct perf_evlist *evlist);
+
+#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
new file mode 100644
index 000000000..11a2aa808
--- /dev/null
+++ b/tools/perf/util/evsel.c
@@ -0,0 +1,2954 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <byteswap.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bitops.h>
+#include <api/fs/fs.h>
+#include <api/fs/tracing_path.h>
+#include <traceevent/event-parse.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <sys/ioctl.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include "asm/bug.h"
+#include "callchain.h"
+#include "cgroup.h"
+#include "event.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "util.h"
+#include "cpumap.h"
+#include "thread_map.h"
+#include "target.h"
+#include "perf_regs.h"
+#include "debug.h"
+#include "trace-event.h"
+#include "stat.h"
+#include "memswap.h"
+#include "util/parse-branch-options.h"
+
+#include "sane_ctype.h"
+
+struct perf_missing_features perf_missing_features;
+
+static clockid_t clockid;
+
+static int perf_evsel__no_extra_init(struct perf_evsel *evsel __maybe_unused)
+{
+ return 0;
+}
+
+void __weak test_attr__ready(void) { }
+
+static void perf_evsel__no_extra_fini(struct perf_evsel *evsel __maybe_unused)
+{
+}
+
+static struct {
+ size_t size;
+ int (*init)(struct perf_evsel *evsel);
+ void (*fini)(struct perf_evsel *evsel);
+} perf_evsel__object = {
+ .size = sizeof(struct perf_evsel),
+ .init = perf_evsel__no_extra_init,
+ .fini = perf_evsel__no_extra_fini,
+};
+
+int perf_evsel__object_config(size_t object_size,
+ int (*init)(struct perf_evsel *evsel),
+ void (*fini)(struct perf_evsel *evsel))
+{
+
+ if (object_size == 0)
+ goto set_methods;
+
+ if (perf_evsel__object.size > object_size)
+ return -EINVAL;
+
+ perf_evsel__object.size = object_size;
+
+set_methods:
+ if (init != NULL)
+ perf_evsel__object.init = init;
+
+ if (fini != NULL)
+ perf_evsel__object.fini = fini;
+
+ return 0;
+}
+
+#define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
+
+int __perf_evsel__sample_size(u64 sample_type)
+{
+ u64 mask = sample_type & PERF_SAMPLE_MASK;
+ int size = 0;
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ if (mask & (1ULL << i))
+ size++;
+ }
+
+ size *= sizeof(u64);
+
+ return size;
+}
+
+/**
+ * __perf_evsel__calc_id_pos - calculate id_pos.
+ * @sample_type: sample type
+ *
+ * This function returns the position of the event id (PERF_SAMPLE_ID or
+ * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of struct
+ * sample_event.
+ */
+static int __perf_evsel__calc_id_pos(u64 sample_type)
+{
+ int idx = 0;
+
+ if (sample_type & PERF_SAMPLE_IDENTIFIER)
+ return 0;
+
+ if (!(sample_type & PERF_SAMPLE_ID))
+ return -1;
+
+ if (sample_type & PERF_SAMPLE_IP)
+ idx += 1;
+
+ if (sample_type & PERF_SAMPLE_TID)
+ idx += 1;
+
+ if (sample_type & PERF_SAMPLE_TIME)
+ idx += 1;
+
+ if (sample_type & PERF_SAMPLE_ADDR)
+ idx += 1;
+
+ return idx;
+}
+
+/**
+ * __perf_evsel__calc_is_pos - calculate is_pos.
+ * @sample_type: sample type
+ *
+ * This function returns the position (counting backwards) of the event id
+ * (PERF_SAMPLE_ID or PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if
+ * sample_id_all is used there is an id sample appended to non-sample events.
+ */
+static int __perf_evsel__calc_is_pos(u64 sample_type)
+{
+ int idx = 1;
+
+ if (sample_type & PERF_SAMPLE_IDENTIFIER)
+ return 1;
+
+ if (!(sample_type & PERF_SAMPLE_ID))
+ return -1;
+
+ if (sample_type & PERF_SAMPLE_CPU)
+ idx += 1;
+
+ if (sample_type & PERF_SAMPLE_STREAM_ID)
+ idx += 1;
+
+ return idx;
+}
+
+void perf_evsel__calc_id_pos(struct perf_evsel *evsel)
+{
+ evsel->id_pos = __perf_evsel__calc_id_pos(evsel->attr.sample_type);
+ evsel->is_pos = __perf_evsel__calc_is_pos(evsel->attr.sample_type);
+}
+
+void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
+ enum perf_event_sample_format bit)
+{
+ if (!(evsel->attr.sample_type & bit)) {
+ evsel->attr.sample_type |= bit;
+ evsel->sample_size += sizeof(u64);
+ perf_evsel__calc_id_pos(evsel);
+ }
+}
+
+void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
+ enum perf_event_sample_format bit)
+{
+ if (evsel->attr.sample_type & bit) {
+ evsel->attr.sample_type &= ~bit;
+ evsel->sample_size -= sizeof(u64);
+ perf_evsel__calc_id_pos(evsel);
+ }
+}
+
+void perf_evsel__set_sample_id(struct perf_evsel *evsel,
+ bool can_sample_identifier)
+{
+ if (can_sample_identifier) {
+ perf_evsel__reset_sample_bit(evsel, ID);
+ perf_evsel__set_sample_bit(evsel, IDENTIFIER);
+ } else {
+ perf_evsel__set_sample_bit(evsel, ID);
+ }
+ evsel->attr.read_format |= PERF_FORMAT_ID;
+}
+
+/**
+ * perf_evsel__is_function_event - Return whether given evsel is a function
+ * trace event
+ *
+ * @evsel - evsel selector to be tested
+ *
+ * Return %true if event is function trace event
+ */
+bool perf_evsel__is_function_event(struct perf_evsel *evsel)
+{
+#define FUNCTION_EVENT "ftrace:function"
+
+ return evsel->name &&
+ !strncmp(FUNCTION_EVENT, evsel->name, sizeof(FUNCTION_EVENT));
+
+#undef FUNCTION_EVENT
+}
+
+void perf_evsel__init(struct perf_evsel *evsel,
+ struct perf_event_attr *attr, int idx)
+{
+ evsel->idx = idx;
+ evsel->tracking = !idx;
+ evsel->attr = *attr;
+ evsel->leader = evsel;
+ evsel->unit = "";
+ evsel->scale = 1.0;
+ evsel->evlist = NULL;
+ evsel->bpf_fd = -1;
+ INIT_LIST_HEAD(&evsel->node);
+ INIT_LIST_HEAD(&evsel->config_terms);
+ perf_evsel__object.init(evsel);
+ evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
+ perf_evsel__calc_id_pos(evsel);
+ evsel->cmdline_group_boundary = false;
+ evsel->metric_expr = NULL;
+ evsel->metric_name = NULL;
+ evsel->metric_events = NULL;
+ evsel->collect_stat = false;
+ evsel->pmu_name = NULL;
+}
+
+struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
+{
+ struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
+
+ if (!evsel)
+ return NULL;
+ perf_evsel__init(evsel, attr, idx);
+
+ if (perf_evsel__is_bpf_output(evsel)) {
+ evsel->attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
+ evsel->attr.sample_period = 1;
+ }
+
+ if (perf_evsel__is_clock(evsel)) {
+ /*
+ * The evsel->unit points to static alias->unit
+ * so it's ok to use static string in here.
+ */
+ static const char *unit = "msec";
+
+ evsel->unit = unit;
+ evsel->scale = 1e-6;
+ }
+
+ return evsel;
+}
+
+static bool perf_event_can_profile_kernel(void)
+{
+ return geteuid() == 0 || perf_event_paranoid() == -1;
+}
+
+struct perf_evsel *perf_evsel__new_cycles(bool precise)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .exclude_kernel = !perf_event_can_profile_kernel(),
+ };
+ struct perf_evsel *evsel;
+
+ event_attr_init(&attr);
+
+ if (!precise)
+ goto new_event;
+ /*
+ * Unnamed union member, not supported as struct member named
+ * initializer in older compilers such as gcc 4.4.7
+ *
+ * Just for probing the precise_ip:
+ */
+ attr.sample_period = 1;
+
+ perf_event_attr__set_max_precise_ip(&attr);
+ /*
+ * Now let the usual logic to set up the perf_event_attr defaults
+ * to kick in when we return and before perf_evsel__open() is called.
+ */
+ attr.sample_period = 0;
+new_event:
+ evsel = perf_evsel__new(&attr);
+ if (evsel == NULL)
+ goto out;
+
+ /* use asprintf() because free(evsel) assumes name is allocated */
+ if (asprintf(&evsel->name, "cycles%s%s%.*s",
+ (attr.precise_ip || attr.exclude_kernel) ? ":" : "",
+ attr.exclude_kernel ? "u" : "",
+ attr.precise_ip ? attr.precise_ip + 1 : 0, "ppp") < 0)
+ goto error_free;
+out:
+ return evsel;
+error_free:
+ perf_evsel__delete(evsel);
+ evsel = NULL;
+ goto out;
+}
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx)
+{
+ struct perf_evsel *evsel = zalloc(perf_evsel__object.size);
+ int err = -ENOMEM;
+
+ if (evsel == NULL) {
+ goto out_err;
+ } else {
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_TRACEPOINT,
+ .sample_type = (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
+ PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
+ };
+
+ if (asprintf(&evsel->name, "%s:%s", sys, name) < 0)
+ goto out_free;
+
+ evsel->tp_format = trace_event__tp_format(sys, name);
+ if (IS_ERR(evsel->tp_format)) {
+ err = PTR_ERR(evsel->tp_format);
+ goto out_free;
+ }
+
+ event_attr_init(&attr);
+ attr.config = evsel->tp_format->id;
+ attr.sample_period = 1;
+ perf_evsel__init(evsel, &attr, idx);
+ }
+
+ return evsel;
+
+out_free:
+ zfree(&evsel->name);
+ free(evsel);
+out_err:
+ return ERR_PTR(err);
+}
+
+const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
+ "cycles",
+ "instructions",
+ "cache-references",
+ "cache-misses",
+ "branches",
+ "branch-misses",
+ "bus-cycles",
+ "stalled-cycles-frontend",
+ "stalled-cycles-backend",
+ "ref-cycles",
+};
+
+static const char *__perf_evsel__hw_name(u64 config)
+{
+ if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
+ return perf_evsel__hw_names[config];
+
+ return "unknown-hardware";
+}
+
+static int perf_evsel__add_modifiers(struct perf_evsel *evsel, char *bf, size_t size)
+{
+ int colon = 0, r = 0;
+ struct perf_event_attr *attr = &evsel->attr;
+ bool exclude_guest_default = false;
+
+#define MOD_PRINT(context, mod) do { \
+ if (!attr->exclude_##context) { \
+ if (!colon) colon = ++r; \
+ r += scnprintf(bf + r, size - r, "%c", mod); \
+ } } while(0)
+
+ if (attr->exclude_kernel || attr->exclude_user || attr->exclude_hv) {
+ MOD_PRINT(kernel, 'k');
+ MOD_PRINT(user, 'u');
+ MOD_PRINT(hv, 'h');
+ exclude_guest_default = true;
+ }
+
+ if (attr->precise_ip) {
+ if (!colon)
+ colon = ++r;
+ r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp");
+ exclude_guest_default = true;
+ }
+
+ if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) {
+ MOD_PRINT(host, 'H');
+ MOD_PRINT(guest, 'G');
+ }
+#undef MOD_PRINT
+ if (colon)
+ bf[colon - 1] = ':';
+ return r;
+}
+
+static int perf_evsel__hw_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+ int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->attr.config));
+ return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
+ "cpu-clock",
+ "task-clock",
+ "page-faults",
+ "context-switches",
+ "cpu-migrations",
+ "minor-faults",
+ "major-faults",
+ "alignment-faults",
+ "emulation-faults",
+ "dummy",
+};
+
+static const char *__perf_evsel__sw_name(u64 config)
+{
+ if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
+ return perf_evsel__sw_names[config];
+ return "unknown-software";
+}
+
+static int perf_evsel__sw_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+ int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->attr.config));
+ return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
+{
+ int r;
+
+ r = scnprintf(bf, size, "mem:0x%" PRIx64 ":", addr);
+
+ if (type & HW_BREAKPOINT_R)
+ r += scnprintf(bf + r, size - r, "r");
+
+ if (type & HW_BREAKPOINT_W)
+ r += scnprintf(bf + r, size - r, "w");
+
+ if (type & HW_BREAKPOINT_X)
+ r += scnprintf(bf + r, size - r, "x");
+
+ return r;
+}
+
+static int perf_evsel__bp_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
+ return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
+}
+
+const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_EVSEL__MAX_ALIASES] = {
+ { "L1-dcache", "l1-d", "l1d", "L1-data", },
+ { "L1-icache", "l1-i", "l1i", "L1-instruction", },
+ { "LLC", "L2", },
+ { "dTLB", "d-tlb", "Data-TLB", },
+ { "iTLB", "i-tlb", "Instruction-TLB", },
+ { "branch", "branches", "bpu", "btb", "bpc", },
+ { "node", },
+};
+
+const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_EVSEL__MAX_ALIASES] = {
+ { "load", "loads", "read", },
+ { "store", "stores", "write", },
+ { "prefetch", "prefetches", "speculative-read", "speculative-load", },
+};
+
+const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
+ [PERF_EVSEL__MAX_ALIASES] = {
+ { "refs", "Reference", "ops", "access", },
+ { "misses", "miss", },
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+#define CACHE_READ (1 << C(OP_READ))
+#define CACHE_WRITE (1 << C(OP_WRITE))
+#define CACHE_PREFETCH (1 << C(OP_PREFETCH))
+#define COP(x) (1 << x)
+
+/*
+ * cache operartion stat
+ * L1I : Read and prefetch only
+ * ITLB and BPU : Read-only
+ */
+static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
+ [C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(L1I)] = (CACHE_READ | CACHE_PREFETCH),
+ [C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(DTLB)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+ [C(ITLB)] = (CACHE_READ),
+ [C(BPU)] = (CACHE_READ),
+ [C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
+};
+
+bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
+{
+ if (perf_evsel__hw_cache_stat[type] & COP(op))
+ return true; /* valid */
+ else
+ return false; /* invalid */
+}
+
+int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
+ char *bf, size_t size)
+{
+ if (result) {
+ return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
+ perf_evsel__hw_cache_op[op][0],
+ perf_evsel__hw_cache_result[result][0]);
+ }
+
+ return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0],
+ perf_evsel__hw_cache_op[op][1]);
+}
+
+static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
+{
+ u8 op, result, type = (config >> 0) & 0xff;
+ const char *err = "unknown-ext-hardware-cache-type";
+
+ if (type >= PERF_COUNT_HW_CACHE_MAX)
+ goto out_err;
+
+ op = (config >> 8) & 0xff;
+ err = "unknown-ext-hardware-cache-op";
+ if (op >= PERF_COUNT_HW_CACHE_OP_MAX)
+ goto out_err;
+
+ result = (config >> 16) & 0xff;
+ err = "unknown-ext-hardware-cache-result";
+ if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
+ goto out_err;
+
+ err = "invalid-cache";
+ if (!perf_evsel__is_cache_op_valid(type, op))
+ goto out_err;
+
+ return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
+out_err:
+ return scnprintf(bf, size, "%s", err);
+}
+
+static int perf_evsel__hw_cache_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+ int ret = __perf_evsel__hw_cache_name(evsel->attr.config, bf, size);
+ return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
+}
+
+static int perf_evsel__raw_name(struct perf_evsel *evsel, char *bf, size_t size)
+{
+ int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->attr.config);
+ return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
+}
+
+const char *perf_evsel__name(struct perf_evsel *evsel)
+{
+ char bf[128];
+
+ if (!evsel)
+ goto out_unknown;
+
+ if (evsel->name)
+ return evsel->name;
+
+ switch (evsel->attr.type) {
+ case PERF_TYPE_RAW:
+ perf_evsel__raw_name(evsel, bf, sizeof(bf));
+ break;
+
+ case PERF_TYPE_HARDWARE:
+ perf_evsel__hw_name(evsel, bf, sizeof(bf));
+ break;
+
+ case PERF_TYPE_HW_CACHE:
+ perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));
+ break;
+
+ case PERF_TYPE_SOFTWARE:
+ perf_evsel__sw_name(evsel, bf, sizeof(bf));
+ break;
+
+ case PERF_TYPE_TRACEPOINT:
+ scnprintf(bf, sizeof(bf), "%s", "unknown tracepoint");
+ break;
+
+ case PERF_TYPE_BREAKPOINT:
+ perf_evsel__bp_name(evsel, bf, sizeof(bf));
+ break;
+
+ default:
+ scnprintf(bf, sizeof(bf), "unknown attr type: %d",
+ evsel->attr.type);
+ break;
+ }
+
+ evsel->name = strdup(bf);
+
+ if (evsel->name)
+ return evsel->name;
+out_unknown:
+ return "unknown";
+}
+
+const char *perf_evsel__group_name(struct perf_evsel *evsel)
+{
+ return evsel->group_name ?: "anon group";
+}
+
+/*
+ * Returns the group details for the specified leader,
+ * with following rules.
+ *
+ * For record -e '{cycles,instructions}'
+ * 'anon group { cycles:u, instructions:u }'
+ *
+ * For record -e 'cycles,instructions' and report --group
+ * 'cycles:u, instructions:u'
+ */
+int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size)
+{
+ int ret = 0;
+ struct perf_evsel *pos;
+ const char *group_name = perf_evsel__group_name(evsel);
+
+ if (!evsel->forced_leader)
+ ret = scnprintf(buf, size, "%s { ", group_name);
+
+ ret += scnprintf(buf + ret, size - ret, "%s",
+ perf_evsel__name(evsel));
+
+ for_each_group_member(pos, evsel)
+ ret += scnprintf(buf + ret, size - ret, ", %s",
+ perf_evsel__name(pos));
+
+ if (!evsel->forced_leader)
+ ret += scnprintf(buf + ret, size - ret, " }");
+
+ return ret;
+}
+
+static void __perf_evsel__config_callchain(struct perf_evsel *evsel,
+ struct record_opts *opts,
+ struct callchain_param *param)
+{
+ bool function = perf_evsel__is_function_event(evsel);
+ struct perf_event_attr *attr = &evsel->attr;
+
+ perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+
+ attr->sample_max_stack = param->max_stack;
+
+ if (param->record_mode == CALLCHAIN_LBR) {
+ if (!opts->branch_stack) {
+ if (attr->exclude_user) {
+ pr_warning("LBR callstack option is only available "
+ "to get user callchain information. "
+ "Falling back to framepointers.\n");
+ } else {
+ perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+ attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_CALL_STACK |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_NO_FLAGS;
+ }
+ } else
+ pr_warning("Cannot use LBR callstack with branch stack. "
+ "Falling back to framepointers.\n");
+ }
+
+ if (param->record_mode == CALLCHAIN_DWARF) {
+ if (!function) {
+ perf_evsel__set_sample_bit(evsel, REGS_USER);
+ perf_evsel__set_sample_bit(evsel, STACK_USER);
+ attr->sample_regs_user |= PERF_REGS_MASK;
+ attr->sample_stack_user = param->dump_size;
+ attr->exclude_callchain_user = 1;
+ } else {
+ pr_info("Cannot use DWARF unwind for function trace event,"
+ " falling back to framepointers.\n");
+ }
+ }
+
+ if (function) {
+ pr_info("Disabling user space callchains for function trace event.\n");
+ attr->exclude_callchain_user = 1;
+ }
+}
+
+void perf_evsel__config_callchain(struct perf_evsel *evsel,
+ struct record_opts *opts,
+ struct callchain_param *param)
+{
+ if (param->enabled)
+ return __perf_evsel__config_callchain(evsel, opts, param);
+}
+
+static void
+perf_evsel__reset_callgraph(struct perf_evsel *evsel,
+ struct callchain_param *param)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+
+ perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+ if (param->record_mode == CALLCHAIN_LBR) {
+ perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_CALL_STACK);
+ }
+ if (param->record_mode == CALLCHAIN_DWARF) {
+ perf_evsel__reset_sample_bit(evsel, REGS_USER);
+ perf_evsel__reset_sample_bit(evsel, STACK_USER);
+ }
+}
+
+static void apply_config_terms(struct perf_evsel *evsel,
+ struct record_opts *opts, bool track)
+{
+ struct perf_evsel_config_term *term;
+ struct list_head *config_terms = &evsel->config_terms;
+ struct perf_event_attr *attr = &evsel->attr;
+ /* callgraph default */
+ struct callchain_param param = {
+ .record_mode = callchain_param.record_mode,
+ };
+ u32 dump_size = 0;
+ int max_stack = 0;
+ const char *callgraph_buf = NULL;
+
+ list_for_each_entry(term, config_terms, list) {
+ switch (term->type) {
+ case PERF_EVSEL__CONFIG_TERM_PERIOD:
+ if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
+ attr->sample_period = term->val.period;
+ attr->freq = 0;
+ perf_evsel__reset_sample_bit(evsel, PERIOD);
+ }
+ break;
+ case PERF_EVSEL__CONFIG_TERM_FREQ:
+ if (!(term->weak && opts->user_freq != UINT_MAX)) {
+ attr->sample_freq = term->val.freq;
+ attr->freq = 1;
+ perf_evsel__set_sample_bit(evsel, PERIOD);
+ }
+ break;
+ case PERF_EVSEL__CONFIG_TERM_TIME:
+ if (term->val.time)
+ perf_evsel__set_sample_bit(evsel, TIME);
+ else
+ perf_evsel__reset_sample_bit(evsel, TIME);
+ break;
+ case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
+ callgraph_buf = term->val.callgraph;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_BRANCH:
+ if (term->val.branch && strcmp(term->val.branch, "no")) {
+ perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+ parse_branch_str(term->val.branch,
+ &attr->branch_sample_type);
+ } else
+ perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ break;
+ case PERF_EVSEL__CONFIG_TERM_STACK_USER:
+ dump_size = term->val.stack_user;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
+ max_stack = term->val.max_stack;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_INHERIT:
+ /*
+ * attr->inherit should has already been set by
+ * perf_evsel__config. If user explicitly set
+ * inherit using config terms, override global
+ * opt->no_inherit setting.
+ */
+ attr->inherit = term->val.inherit ? 1 : 0;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_OVERWRITE:
+ attr->write_backward = term->val.overwrite ? 1 : 0;
+ break;
+ case PERF_EVSEL__CONFIG_TERM_DRV_CFG:
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* User explicitly set per-event callgraph, clear the old setting and reset. */
+ if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) {
+ bool sample_address = false;
+
+ if (max_stack) {
+ param.max_stack = max_stack;
+ if (callgraph_buf == NULL)
+ callgraph_buf = "fp";
+ }
+
+ /* parse callgraph parameters */
+ if (callgraph_buf != NULL) {
+ if (!strcmp(callgraph_buf, "no")) {
+ param.enabled = false;
+ param.record_mode = CALLCHAIN_NONE;
+ } else {
+ param.enabled = true;
+ if (parse_callchain_record(callgraph_buf, &param)) {
+ pr_err("per-event callgraph setting for %s failed. "
+ "Apply callgraph global setting for it\n",
+ evsel->name);
+ return;
+ }
+ if (param.record_mode == CALLCHAIN_DWARF)
+ sample_address = true;
+ }
+ }
+ if (dump_size > 0) {
+ dump_size = round_up(dump_size, sizeof(u64));
+ param.dump_size = dump_size;
+ }
+
+ /* If global callgraph set, clear it */
+ if (callchain_param.enabled)
+ perf_evsel__reset_callgraph(evsel, &callchain_param);
+
+ /* set perf-event callgraph */
+ if (param.enabled) {
+ if (sample_address) {
+ perf_evsel__set_sample_bit(evsel, ADDR);
+ perf_evsel__set_sample_bit(evsel, DATA_SRC);
+ evsel->attr.mmap_data = track;
+ }
+ perf_evsel__config_callchain(evsel, opts, &param);
+ }
+ }
+}
+
+static bool is_dummy_event(struct perf_evsel *evsel)
+{
+ return (evsel->attr.type == PERF_TYPE_SOFTWARE) &&
+ (evsel->attr.config == PERF_COUNT_SW_DUMMY);
+}
+
+/*
+ * The enable_on_exec/disabled value strategy:
+ *
+ * 1) For any type of traced program:
+ * - all independent events and group leaders are disabled
+ * - all group members are enabled
+ *
+ * Group members are ruled by group leaders. They need to
+ * be enabled, because the group scheduling relies on that.
+ *
+ * 2) For traced programs executed by perf:
+ * - all independent events and group leaders have
+ * enable_on_exec set
+ * - we don't specifically enable or disable any event during
+ * the record command
+ *
+ * Independent events and group leaders are initially disabled
+ * and get enabled by exec. Group members are ruled by group
+ * leaders as stated in 1).
+ *
+ * 3) For traced programs attached by perf (pid/tid):
+ * - we specifically enable or disable all events during
+ * the record command
+ *
+ * When attaching events to already running traced we
+ * enable/disable events specifically, as there's no
+ * initial traced exec call.
+ */
+void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts,
+ struct callchain_param *callchain)
+{
+ struct perf_evsel *leader = evsel->leader;
+ struct perf_event_attr *attr = &evsel->attr;
+ int track = evsel->tracking;
+ bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread;
+
+ attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1;
+ attr->inherit = !opts->no_inherit;
+ attr->write_backward = opts->overwrite ? 1 : 0;
+
+ perf_evsel__set_sample_bit(evsel, IP);
+ perf_evsel__set_sample_bit(evsel, TID);
+
+ if (evsel->sample_read) {
+ perf_evsel__set_sample_bit(evsel, READ);
+
+ /*
+ * We need ID even in case of single event, because
+ * PERF_SAMPLE_READ process ID specific data.
+ */
+ perf_evsel__set_sample_id(evsel, false);
+
+ /*
+ * Apply group format only if we belong to group
+ * with more than one members.
+ */
+ if (leader->nr_members > 1) {
+ attr->read_format |= PERF_FORMAT_GROUP;
+ attr->inherit = 0;
+ }
+ }
+
+ /*
+ * We default some events to have a default interval. But keep
+ * it a weak assumption overridable by the user.
+ */
+ if (!attr->sample_period || (opts->user_freq != UINT_MAX ||
+ opts->user_interval != ULLONG_MAX)) {
+ if (opts->freq) {
+ perf_evsel__set_sample_bit(evsel, PERIOD);
+ attr->freq = 1;
+ attr->sample_freq = opts->freq;
+ } else {
+ attr->sample_period = opts->default_interval;
+ }
+ }
+
+ /*
+ * Disable sampling for all group members other
+ * than leader in case leader 'leads' the sampling.
+ */
+ if ((leader != evsel) && leader->sample_read) {
+ attr->freq = 0;
+ attr->sample_freq = 0;
+ attr->sample_period = 0;
+ attr->write_backward = 0;
+ }
+
+ if (opts->no_samples)
+ attr->sample_freq = 0;
+
+ if (opts->inherit_stat) {
+ evsel->attr.read_format |=
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING |
+ PERF_FORMAT_ID;
+ attr->inherit_stat = 1;
+ }
+
+ if (opts->sample_address) {
+ perf_evsel__set_sample_bit(evsel, ADDR);
+ attr->mmap_data = track;
+ }
+
+ /*
+ * We don't allow user space callchains for function trace
+ * event, due to issues with page faults while tracing page
+ * fault handler and its overall trickiness nature.
+ */
+ if (perf_evsel__is_function_event(evsel))
+ evsel->attr.exclude_callchain_user = 1;
+
+ if (callchain && callchain->enabled && !evsel->no_aux_samples)
+ perf_evsel__config_callchain(evsel, opts, callchain);
+
+ if (opts->sample_intr_regs) {
+ attr->sample_regs_intr = opts->sample_intr_regs;
+ perf_evsel__set_sample_bit(evsel, REGS_INTR);
+ }
+
+ if (opts->sample_user_regs) {
+ attr->sample_regs_user |= opts->sample_user_regs;
+ perf_evsel__set_sample_bit(evsel, REGS_USER);
+ }
+
+ if (target__has_cpu(&opts->target) || opts->sample_cpu)
+ perf_evsel__set_sample_bit(evsel, CPU);
+
+ /*
+ * When the user explicitly disabled time don't force it here.
+ */
+ if (opts->sample_time &&
+ (!perf_missing_features.sample_id_all &&
+ (!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
+ opts->sample_time_set)))
+ perf_evsel__set_sample_bit(evsel, TIME);
+
+ if (opts->raw_samples && !evsel->no_aux_samples) {
+ perf_evsel__set_sample_bit(evsel, TIME);
+ perf_evsel__set_sample_bit(evsel, RAW);
+ perf_evsel__set_sample_bit(evsel, CPU);
+ }
+
+ if (opts->sample_address)
+ perf_evsel__set_sample_bit(evsel, DATA_SRC);
+
+ if (opts->sample_phys_addr)
+ perf_evsel__set_sample_bit(evsel, PHYS_ADDR);
+
+ if (opts->no_buffering) {
+ attr->watermark = 0;
+ attr->wakeup_events = 1;
+ }
+ if (opts->branch_stack && !evsel->no_aux_samples) {
+ perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+ attr->branch_sample_type = opts->branch_stack;
+ }
+
+ if (opts->sample_weight)
+ perf_evsel__set_sample_bit(evsel, WEIGHT);
+
+ attr->task = track;
+ attr->mmap = track;
+ attr->mmap2 = track && !perf_missing_features.mmap2;
+ attr->comm = track;
+
+ if (opts->record_namespaces)
+ attr->namespaces = track;
+
+ if (opts->record_switch_events)
+ attr->context_switch = track;
+
+ if (opts->sample_transaction)
+ perf_evsel__set_sample_bit(evsel, TRANSACTION);
+
+ if (opts->running_time) {
+ evsel->attr.read_format |=
+ PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+ }
+
+ /*
+ * XXX see the function comment above
+ *
+ * Disabling only independent events or group leaders,
+ * keeping group members enabled.
+ */
+ if (perf_evsel__is_group_leader(evsel))
+ attr->disabled = 1;
+
+ /*
+ * Setting enable_on_exec for independent events and
+ * group leaders for traced executed by perf.
+ */
+ if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) &&
+ !opts->initial_delay)
+ attr->enable_on_exec = 1;
+
+ if (evsel->immediate) {
+ attr->disabled = 0;
+ attr->enable_on_exec = 0;
+ }
+
+ clockid = opts->clockid;
+ if (opts->use_clockid) {
+ attr->use_clockid = 1;
+ attr->clockid = opts->clockid;
+ }
+
+ if (evsel->precise_max)
+ perf_event_attr__set_max_precise_ip(attr);
+
+ if (opts->all_user) {
+ attr->exclude_kernel = 1;
+ attr->exclude_user = 0;
+ }
+
+ if (opts->all_kernel) {
+ attr->exclude_kernel = 0;
+ attr->exclude_user = 1;
+ }
+
+ if (evsel->own_cpus || evsel->unit)
+ evsel->attr.read_format |= PERF_FORMAT_ID;
+
+ /*
+ * Apply event specific term settings,
+ * it overloads any global configuration.
+ */
+ apply_config_terms(evsel, opts, track);
+
+ evsel->ignore_missing_thread = opts->ignore_missing_thread;
+
+ /* The --period option takes the precedence. */
+ if (opts->period_set) {
+ if (opts->period)
+ perf_evsel__set_sample_bit(evsel, PERIOD);
+ else
+ perf_evsel__reset_sample_bit(evsel, PERIOD);
+ }
+
+ /*
+ * For initial_delay, a dummy event is added implicitly.
+ * The software event will trigger -EOPNOTSUPP error out,
+ * if BRANCH_STACK bit is set.
+ */
+ if (opts->initial_delay && is_dummy_event(evsel))
+ perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+}
+
+static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+ if (evsel->system_wide)
+ nthreads = 1;
+
+ evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));
+
+ if (evsel->fd) {
+ int cpu, thread;
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ for (thread = 0; thread < nthreads; thread++) {
+ FD(evsel, cpu, thread) = -1;
+ }
+ }
+ }
+
+ return evsel->fd != NULL ? 0 : -ENOMEM;
+}
+
+static int perf_evsel__run_ioctl(struct perf_evsel *evsel,
+ int ioc, void *arg)
+{
+ int cpu, thread;
+
+ for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) {
+ for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {
+ int fd = FD(evsel, cpu, thread),
+ err = ioctl(fd, ioc, arg);
+
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter)
+{
+ return perf_evsel__run_ioctl(evsel,
+ PERF_EVENT_IOC_SET_FILTER,
+ (void *)filter);
+}
+
+int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter)
+{
+ char *new_filter = strdup(filter);
+
+ if (new_filter != NULL) {
+ free(evsel->filter);
+ evsel->filter = new_filter;
+ return 0;
+ }
+
+ return -1;
+}
+
+static int perf_evsel__append_filter(struct perf_evsel *evsel,
+ const char *fmt, const char *filter)
+{
+ char *new_filter;
+
+ if (evsel->filter == NULL)
+ return perf_evsel__set_filter(evsel, filter);
+
+ if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
+ free(evsel->filter);
+ evsel->filter = new_filter;
+ return 0;
+ }
+
+ return -1;
+}
+
+int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter)
+{
+ return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
+}
+
+int perf_evsel__append_addr_filter(struct perf_evsel *evsel, const char *filter)
+{
+ return perf_evsel__append_filter(evsel, "%s,%s", filter);
+}
+
+int perf_evsel__enable(struct perf_evsel *evsel)
+{
+ return perf_evsel__run_ioctl(evsel,
+ PERF_EVENT_IOC_ENABLE,
+ 0);
+}
+
+int perf_evsel__disable(struct perf_evsel *evsel)
+{
+ return perf_evsel__run_ioctl(evsel,
+ PERF_EVENT_IOC_DISABLE,
+ 0);
+}
+
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
+{
+ if (ncpus == 0 || nthreads == 0)
+ return 0;
+
+ if (evsel->system_wide)
+ nthreads = 1;
+
+ evsel->sample_id = xyarray__new(ncpus, nthreads, sizeof(struct perf_sample_id));
+ if (evsel->sample_id == NULL)
+ return -ENOMEM;
+
+ evsel->id = zalloc(ncpus * nthreads * sizeof(u64));
+ if (evsel->id == NULL) {
+ xyarray__delete(evsel->sample_id);
+ evsel->sample_id = NULL;
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void perf_evsel__free_fd(struct perf_evsel *evsel)
+{
+ xyarray__delete(evsel->fd);
+ evsel->fd = NULL;
+}
+
+static void perf_evsel__free_id(struct perf_evsel *evsel)
+{
+ xyarray__delete(evsel->sample_id);
+ evsel->sample_id = NULL;
+ zfree(&evsel->id);
+}
+
+static void perf_evsel__free_config_terms(struct perf_evsel *evsel)
+{
+ struct perf_evsel_config_term *term, *h;
+
+ list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
+ list_del(&term->list);
+ free(term);
+ }
+}
+
+void perf_evsel__close_fd(struct perf_evsel *evsel)
+{
+ int cpu, thread;
+
+ for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++)
+ for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) {
+ close(FD(evsel, cpu, thread));
+ FD(evsel, cpu, thread) = -1;
+ }
+}
+
+void perf_evsel__exit(struct perf_evsel *evsel)
+{
+ assert(list_empty(&evsel->node));
+ assert(evsel->evlist == NULL);
+ perf_evsel__free_counts(evsel);
+ perf_evsel__free_fd(evsel);
+ perf_evsel__free_id(evsel);
+ perf_evsel__free_config_terms(evsel);
+ cgroup__put(evsel->cgrp);
+ cpu_map__put(evsel->cpus);
+ cpu_map__put(evsel->own_cpus);
+ thread_map__put(evsel->threads);
+ zfree(&evsel->group_name);
+ zfree(&evsel->name);
+ zfree(&evsel->pmu_name);
+ zfree(&evsel->per_pkg_mask);
+ zfree(&evsel->metric_events);
+ perf_evsel__object.fini(evsel);
+}
+
+void perf_evsel__delete(struct perf_evsel *evsel)
+{
+ perf_evsel__exit(evsel);
+ free(evsel);
+}
+
+void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
+ struct perf_counts_values *count)
+{
+ struct perf_counts_values tmp;
+
+ if (!evsel->prev_raw_counts)
+ return;
+
+ if (cpu == -1) {
+ tmp = evsel->prev_raw_counts->aggr;
+ evsel->prev_raw_counts->aggr = *count;
+ } else {
+ tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread);
+ *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count;
+ }
+
+ count->val = count->val - tmp.val;
+ count->ena = count->ena - tmp.ena;
+ count->run = count->run - tmp.run;
+}
+
+void perf_counts_values__scale(struct perf_counts_values *count,
+ bool scale, s8 *pscaled)
+{
+ s8 scaled = 0;
+
+ if (scale) {
+ if (count->run == 0) {
+ scaled = -1;
+ count->val = 0;
+ } else if (count->run < count->ena) {
+ scaled = 1;
+ count->val = (u64)((double) count->val * count->ena / count->run + 0.5);
+ }
+ } else
+ count->ena = count->run = 0;
+
+ if (pscaled)
+ *pscaled = scaled;
+}
+
+static int perf_evsel__read_size(struct perf_evsel *evsel)
+{
+ u64 read_format = evsel->attr.read_format;
+ int entry = sizeof(u64); /* value */
+ int size = 0;
+ int nr = 1;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ size += sizeof(u64);
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ size += sizeof(u64);
+
+ if (read_format & PERF_FORMAT_ID)
+ entry += sizeof(u64);
+
+ if (read_format & PERF_FORMAT_GROUP) {
+ nr = evsel->nr_members;
+ size += sizeof(u64);
+ }
+
+ size += entry * nr;
+ return size;
+}
+
+int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+ struct perf_counts_values *count)
+{
+ size_t size = perf_evsel__read_size(evsel);
+
+ memset(count, 0, sizeof(*count));
+
+ if (FD(evsel, cpu, thread) < 0)
+ return -EINVAL;
+
+ if (readn(FD(evsel, cpu, thread), count->values, size) <= 0)
+ return -errno;
+
+ return 0;
+}
+
+static int
+perf_evsel__read_one(struct perf_evsel *evsel, int cpu, int thread)
+{
+ struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread);
+
+ return perf_evsel__read(evsel, cpu, thread, count);
+}
+
+static void
+perf_evsel__set_count(struct perf_evsel *counter, int cpu, int thread,
+ u64 val, u64 ena, u64 run)
+{
+ struct perf_counts_values *count;
+
+ count = perf_counts(counter->counts, cpu, thread);
+
+ count->val = val;
+ count->ena = ena;
+ count->run = run;
+ count->loaded = true;
+}
+
+static int
+perf_evsel__process_group_data(struct perf_evsel *leader,
+ int cpu, int thread, u64 *data)
+{
+ u64 read_format = leader->attr.read_format;
+ struct sample_read_value *v;
+ u64 nr, ena = 0, run = 0, i;
+
+ nr = *data++;
+
+ if (nr != (u64) leader->nr_members)
+ return -EINVAL;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ ena = *data++;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ run = *data++;
+
+ v = (struct sample_read_value *) data;
+
+ perf_evsel__set_count(leader, cpu, thread,
+ v[0].value, ena, run);
+
+ for (i = 1; i < nr; i++) {
+ struct perf_evsel *counter;
+
+ counter = perf_evlist__id2evsel(leader->evlist, v[i].id);
+ if (!counter)
+ return -EINVAL;
+
+ perf_evsel__set_count(counter, cpu, thread,
+ v[i].value, ena, run);
+ }
+
+ return 0;
+}
+
+static int
+perf_evsel__read_group(struct perf_evsel *leader, int cpu, int thread)
+{
+ struct perf_stat_evsel *ps = leader->stats;
+ u64 read_format = leader->attr.read_format;
+ int size = perf_evsel__read_size(leader);
+ u64 *data = ps->group_data;
+
+ if (!(read_format & PERF_FORMAT_ID))
+ return -EINVAL;
+
+ if (!perf_evsel__is_group_leader(leader))
+ return -EINVAL;
+
+ if (!data) {
+ data = zalloc(size);
+ if (!data)
+ return -ENOMEM;
+
+ ps->group_data = data;
+ }
+
+ if (FD(leader, cpu, thread) < 0)
+ return -EINVAL;
+
+ if (readn(FD(leader, cpu, thread), data, size) <= 0)
+ return -errno;
+
+ return perf_evsel__process_group_data(leader, cpu, thread, data);
+}
+
+int perf_evsel__read_counter(struct perf_evsel *evsel, int cpu, int thread)
+{
+ u64 read_format = evsel->attr.read_format;
+
+ if (read_format & PERF_FORMAT_GROUP)
+ return perf_evsel__read_group(evsel, cpu, thread);
+ else
+ return perf_evsel__read_one(evsel, cpu, thread);
+}
+
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+ int cpu, int thread, bool scale)
+{
+ struct perf_counts_values count;
+ size_t nv = scale ? 3 : 1;
+
+ if (FD(evsel, cpu, thread) < 0)
+ return -EINVAL;
+
+ if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
+ return -ENOMEM;
+
+ if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
+ return -errno;
+
+ perf_evsel__compute_deltas(evsel, cpu, thread, &count);
+ perf_counts_values__scale(&count, scale, NULL);
+ *perf_counts(evsel->counts, cpu, thread) = count;
+ return 0;
+}
+
+static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread)
+{
+ struct perf_evsel *leader = evsel->leader;
+ int fd;
+
+ if (perf_evsel__is_group_leader(evsel))
+ return -1;
+
+ /*
+ * Leader must be already processed/open,
+ * if not it's a bug.
+ */
+ BUG_ON(!leader->fd);
+
+ fd = FD(leader, cpu, thread);
+ BUG_ON(fd == -1);
+
+ return fd;
+}
+
+struct bit_names {
+ int bit;
+ const char *name;
+};
+
+static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits)
+{
+ bool first_bit = true;
+ int i = 0;
+
+ do {
+ if (value & bits[i].bit) {
+ buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name);
+ first_bit = false;
+ }
+ } while (bits[++i].name != NULL);
+}
+
+static void __p_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_##n, #n }
+ struct bit_names bits[] = {
+ bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR),
+ bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU),
+ bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW),
+ bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
+ bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
+ bit_name(WEIGHT), bit_name(PHYS_ADDR),
+ { .name = NULL, }
+ };
+#undef bit_name
+ __p_bits(buf, size, value, bits);
+}
+
+static void __p_branch_sample_type(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_SAMPLE_BRANCH_##n, #n }
+ struct bit_names bits[] = {
+ bit_name(USER), bit_name(KERNEL), bit_name(HV), bit_name(ANY),
+ bit_name(ANY_CALL), bit_name(ANY_RETURN), bit_name(IND_CALL),
+ bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
+ bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
+ bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+ { .name = NULL, }
+ };
+#undef bit_name
+ __p_bits(buf, size, value, bits);
+}
+
+static void __p_read_format(char *buf, size_t size, u64 value)
+{
+#define bit_name(n) { PERF_FORMAT_##n, #n }
+ struct bit_names bits[] = {
+ bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING),
+ bit_name(ID), bit_name(GROUP),
+ { .name = NULL, }
+ };
+#undef bit_name
+ __p_bits(buf, size, value, bits);
+}
+
+#define BUF_SIZE 1024
+
+#define p_hex(val) snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
+#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
+#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
+#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val)
+#define p_branch_sample_type(val) __p_branch_sample_type(buf, BUF_SIZE, val)
+#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val)
+
+#define PRINT_ATTRn(_n, _f, _p) \
+do { \
+ if (attr->_f) { \
+ _p(attr->_f); \
+ ret += attr__fprintf(fp, _n, buf, priv);\
+ } \
+} while (0)
+
+#define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p)
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+ attr__fprintf_f attr__fprintf, void *priv)
+{
+ char buf[BUF_SIZE];
+ int ret = 0;
+
+ PRINT_ATTRf(type, p_unsigned);
+ PRINT_ATTRf(size, p_unsigned);
+ PRINT_ATTRf(config, p_hex);
+ PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned);
+ PRINT_ATTRf(sample_type, p_sample_type);
+ PRINT_ATTRf(read_format, p_read_format);
+
+ PRINT_ATTRf(disabled, p_unsigned);
+ PRINT_ATTRf(inherit, p_unsigned);
+ PRINT_ATTRf(pinned, p_unsigned);
+ PRINT_ATTRf(exclusive, p_unsigned);
+ PRINT_ATTRf(exclude_user, p_unsigned);
+ PRINT_ATTRf(exclude_kernel, p_unsigned);
+ PRINT_ATTRf(exclude_hv, p_unsigned);
+ PRINT_ATTRf(exclude_idle, p_unsigned);
+ PRINT_ATTRf(mmap, p_unsigned);
+ PRINT_ATTRf(comm, p_unsigned);
+ PRINT_ATTRf(freq, p_unsigned);
+ PRINT_ATTRf(inherit_stat, p_unsigned);
+ PRINT_ATTRf(enable_on_exec, p_unsigned);
+ PRINT_ATTRf(task, p_unsigned);
+ PRINT_ATTRf(watermark, p_unsigned);
+ PRINT_ATTRf(precise_ip, p_unsigned);
+ PRINT_ATTRf(mmap_data, p_unsigned);
+ PRINT_ATTRf(sample_id_all, p_unsigned);
+ PRINT_ATTRf(exclude_host, p_unsigned);
+ PRINT_ATTRf(exclude_guest, p_unsigned);
+ PRINT_ATTRf(exclude_callchain_kernel, p_unsigned);
+ PRINT_ATTRf(exclude_callchain_user, p_unsigned);
+ PRINT_ATTRf(mmap2, p_unsigned);
+ PRINT_ATTRf(comm_exec, p_unsigned);
+ PRINT_ATTRf(use_clockid, p_unsigned);
+ PRINT_ATTRf(context_switch, p_unsigned);
+ PRINT_ATTRf(write_backward, p_unsigned);
+ PRINT_ATTRf(namespaces, p_unsigned);
+
+ PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
+ PRINT_ATTRf(bp_type, p_unsigned);
+ PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex);
+ PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex);
+ PRINT_ATTRf(branch_sample_type, p_branch_sample_type);
+ PRINT_ATTRf(sample_regs_user, p_hex);
+ PRINT_ATTRf(sample_stack_user, p_unsigned);
+ PRINT_ATTRf(clockid, p_signed);
+ PRINT_ATTRf(sample_regs_intr, p_hex);
+ PRINT_ATTRf(aux_watermark, p_unsigned);
+ PRINT_ATTRf(sample_max_stack, p_unsigned);
+
+ return ret;
+}
+
+static int __open_attr__fprintf(FILE *fp, const char *name, const char *val,
+ void *priv __maybe_unused)
+{
+ return fprintf(fp, " %-32s %s\n", name, val);
+}
+
+static void perf_evsel__remove_fd(struct perf_evsel *pos,
+ int nr_cpus, int nr_threads,
+ int thread_idx)
+{
+ for (int cpu = 0; cpu < nr_cpus; cpu++)
+ for (int thread = thread_idx; thread < nr_threads - 1; thread++)
+ FD(pos, cpu, thread) = FD(pos, cpu, thread + 1);
+}
+
+static int update_fds(struct perf_evsel *evsel,
+ int nr_cpus, int cpu_idx,
+ int nr_threads, int thread_idx)
+{
+ struct perf_evsel *pos;
+
+ if (cpu_idx >= nr_cpus || thread_idx >= nr_threads)
+ return -EINVAL;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ nr_cpus = pos != evsel ? nr_cpus : cpu_idx;
+
+ perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx);
+
+ /*
+ * Since fds for next evsel has not been created,
+ * there is no need to iterate whole event list.
+ */
+ if (pos == evsel)
+ break;
+ }
+ return 0;
+}
+
+static bool ignore_missing_thread(struct perf_evsel *evsel,
+ int nr_cpus, int cpu,
+ struct thread_map *threads,
+ int thread, int err)
+{
+ pid_t ignore_pid = thread_map__pid(threads, thread);
+
+ if (!evsel->ignore_missing_thread)
+ return false;
+
+ /* The system wide setup does not work with threads. */
+ if (evsel->system_wide)
+ return false;
+
+ /* The -ESRCH is perf event syscall errno for pid's not found. */
+ if (err != -ESRCH)
+ return false;
+
+ /* If there's only one thread, let it fail. */
+ if (threads->nr == 1)
+ return false;
+
+ /*
+ * We should remove fd for missing_thread first
+ * because thread_map__remove() will decrease threads->nr.
+ */
+ if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread))
+ return false;
+
+ if (thread_map__remove(threads, thread))
+ return false;
+
+ pr_warning("WARNING: Ignored open failure for pid %d\n",
+ ignore_pid);
+ return true;
+}
+
+int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+ struct thread_map *threads)
+{
+ int cpu, thread, nthreads;
+ unsigned long flags = PERF_FLAG_FD_CLOEXEC;
+ int pid = -1, err;
+ enum { NO_CHANGE, SET_TO_MAX, INCREASED_MAX } set_rlimit = NO_CHANGE;
+
+ if (perf_missing_features.write_backward && evsel->attr.write_backward)
+ return -EINVAL;
+
+ if (cpus == NULL) {
+ static struct cpu_map *empty_cpu_map;
+
+ if (empty_cpu_map == NULL) {
+ empty_cpu_map = cpu_map__dummy_new();
+ if (empty_cpu_map == NULL)
+ return -ENOMEM;
+ }
+
+ cpus = empty_cpu_map;
+ }
+
+ if (threads == NULL) {
+ static struct thread_map *empty_thread_map;
+
+ if (empty_thread_map == NULL) {
+ empty_thread_map = thread_map__new_by_tid(-1);
+ if (empty_thread_map == NULL)
+ return -ENOMEM;
+ }
+
+ threads = empty_thread_map;
+ }
+
+ if (evsel->system_wide)
+ nthreads = 1;
+ else
+ nthreads = threads->nr;
+
+ if (evsel->fd == NULL &&
+ perf_evsel__alloc_fd(evsel, cpus->nr, nthreads) < 0)
+ return -ENOMEM;
+
+ if (evsel->cgrp) {
+ flags |= PERF_FLAG_PID_CGROUP;
+ pid = evsel->cgrp->fd;
+ }
+
+fallback_missing_features:
+ if (perf_missing_features.clockid_wrong)
+ evsel->attr.clockid = CLOCK_MONOTONIC; /* should always work */
+ if (perf_missing_features.clockid) {
+ evsel->attr.use_clockid = 0;
+ evsel->attr.clockid = 0;
+ }
+ if (perf_missing_features.cloexec)
+ flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC;
+ if (perf_missing_features.mmap2)
+ evsel->attr.mmap2 = 0;
+ if (perf_missing_features.exclude_guest)
+ evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
+ if (perf_missing_features.lbr_flags)
+ evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES);
+ if (perf_missing_features.group_read && evsel->attr.inherit)
+ evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
+retry_sample_id:
+ if (perf_missing_features.sample_id_all)
+ evsel->attr.sample_id_all = 0;
+
+ if (verbose >= 2) {
+ fprintf(stderr, "%.60s\n", graph_dotted_line);
+ fprintf(stderr, "perf_event_attr:\n");
+ perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL);
+ fprintf(stderr, "%.60s\n", graph_dotted_line);
+ }
+
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+
+ for (thread = 0; thread < nthreads; thread++) {
+ int fd, group_fd;
+
+ if (!evsel->cgrp && !evsel->system_wide)
+ pid = thread_map__pid(threads, thread);
+
+ group_fd = get_group_fd(evsel, cpu, thread);
+retry_open:
+ pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx",
+ pid, cpus->map[cpu], group_fd, flags);
+
+ test_attr__ready();
+
+ fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu],
+ group_fd, flags);
+
+ FD(evsel, cpu, thread) = fd;
+
+ if (fd < 0) {
+ err = -errno;
+
+ if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) {
+ /*
+ * We just removed 1 thread, so take a step
+ * back on thread index and lower the upper
+ * nthreads limit.
+ */
+ nthreads--;
+ thread--;
+
+ /* ... and pretend like nothing have happened. */
+ err = 0;
+ continue;
+ }
+
+ pr_debug2("\nsys_perf_event_open failed, error %d\n",
+ err);
+ goto try_fallback;
+ }
+
+ pr_debug2(" = %d\n", fd);
+
+ if (evsel->bpf_fd >= 0) {
+ int evt_fd = fd;
+ int bpf_fd = evsel->bpf_fd;
+
+ err = ioctl(evt_fd,
+ PERF_EVENT_IOC_SET_BPF,
+ bpf_fd);
+ if (err && errno != EEXIST) {
+ pr_err("failed to attach bpf fd %d: %s\n",
+ bpf_fd, strerror(errno));
+ err = -EINVAL;
+ goto out_close;
+ }
+ }
+
+ set_rlimit = NO_CHANGE;
+
+ /*
+ * If we succeeded but had to kill clockid, fail and
+ * have perf_evsel__open_strerror() print us a nice
+ * error.
+ */
+ if (perf_missing_features.clockid ||
+ perf_missing_features.clockid_wrong) {
+ err = -EINVAL;
+ goto out_close;
+ }
+ }
+ }
+
+ return 0;
+
+try_fallback:
+ /*
+ * perf stat needs between 5 and 22 fds per CPU. When we run out
+ * of them try to increase the limits.
+ */
+ if (err == -EMFILE && set_rlimit < INCREASED_MAX) {
+ struct rlimit l;
+ int old_errno = errno;
+
+ if (getrlimit(RLIMIT_NOFILE, &l) == 0) {
+ if (set_rlimit == NO_CHANGE)
+ l.rlim_cur = l.rlim_max;
+ else {
+ l.rlim_cur = l.rlim_max + 1000;
+ l.rlim_max = l.rlim_cur;
+ }
+ if (setrlimit(RLIMIT_NOFILE, &l) == 0) {
+ set_rlimit++;
+ errno = old_errno;
+ goto retry_open;
+ }
+ }
+ errno = old_errno;
+ }
+
+ if (err != -EINVAL || cpu > 0 || thread > 0)
+ goto out_close;
+
+ /*
+ * Must probe features in the order they were added to the
+ * perf_event_attr interface.
+ */
+ if (!perf_missing_features.write_backward && evsel->attr.write_backward) {
+ perf_missing_features.write_backward = true;
+ pr_debug2("switching off write_backward\n");
+ goto out_close;
+ } else if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) {
+ perf_missing_features.clockid_wrong = true;
+ pr_debug2("switching off clockid\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.clockid && evsel->attr.use_clockid) {
+ perf_missing_features.clockid = true;
+ pr_debug2("switching off use_clockid\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) {
+ perf_missing_features.cloexec = true;
+ pr_debug2("switching off cloexec flag\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.mmap2 && evsel->attr.mmap2) {
+ perf_missing_features.mmap2 = true;
+ pr_debug2("switching off mmap2\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.exclude_guest &&
+ (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
+ perf_missing_features.exclude_guest = true;
+ pr_debug2("switching off exclude_guest, exclude_host\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.sample_id_all) {
+ perf_missing_features.sample_id_all = true;
+ pr_debug2("switching off sample_id_all\n");
+ goto retry_sample_id;
+ } else if (!perf_missing_features.lbr_flags &&
+ (evsel->attr.branch_sample_type &
+ (PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_NO_FLAGS))) {
+ perf_missing_features.lbr_flags = true;
+ pr_debug2("switching off branch sample type no (cycles/flags)\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.group_read &&
+ evsel->attr.inherit &&
+ (evsel->attr.read_format & PERF_FORMAT_GROUP) &&
+ perf_evsel__is_group_leader(evsel)) {
+ perf_missing_features.group_read = true;
+ pr_debug2("switching off group read\n");
+ goto fallback_missing_features;
+ }
+out_close:
+ if (err)
+ threads->err_thread = thread;
+
+ do {
+ while (--thread >= 0) {
+ close(FD(evsel, cpu, thread));
+ FD(evsel, cpu, thread) = -1;
+ }
+ thread = nthreads;
+ } while (--cpu >= 0);
+ return err;
+}
+
+void perf_evsel__close(struct perf_evsel *evsel)
+{
+ if (evsel->fd == NULL)
+ return;
+
+ perf_evsel__close_fd(evsel);
+ perf_evsel__free_fd(evsel);
+}
+
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
+ struct cpu_map *cpus)
+{
+ return perf_evsel__open(evsel, cpus, NULL);
+}
+
+int perf_evsel__open_per_thread(struct perf_evsel *evsel,
+ struct thread_map *threads)
+{
+ return perf_evsel__open(evsel, NULL, threads);
+}
+
+static int perf_evsel__parse_id_sample(const struct perf_evsel *evsel,
+ const union perf_event *event,
+ struct perf_sample *sample)
+{
+ u64 type = evsel->attr.sample_type;
+ const u64 *array = event->sample.array;
+ bool swapped = evsel->needs_swap;
+ union u64_swap u;
+
+ array += ((event->header.size -
+ sizeof(event->header)) / sizeof(u64)) - 1;
+
+ if (type & PERF_SAMPLE_IDENTIFIER) {
+ sample->id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_CPU) {
+ u.val64 = *array;
+ if (swapped) {
+ /* undo swap of u64, then swap on individual u32s */
+ u.val64 = bswap_64(u.val64);
+ u.val32[0] = bswap_32(u.val32[0]);
+ }
+
+ sample->cpu = u.val32[0];
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ sample->stream_id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_ID) {
+ sample->id = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ sample->time = *array;
+ array--;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u.val64 = *array;
+ if (swapped) {
+ /* undo swap of u64, then swap on individual u32s */
+ u.val64 = bswap_64(u.val64);
+ u.val32[0] = bswap_32(u.val32[0]);
+ u.val32[1] = bswap_32(u.val32[1]);
+ }
+
+ sample->pid = u.val32[0];
+ sample->tid = u.val32[1];
+ array--;
+ }
+
+ return 0;
+}
+
+static inline bool overflow(const void *endp, u16 max_size, const void *offset,
+ u64 size)
+{
+ return size > max_size || offset + size > endp;
+}
+
+#define OVERFLOW_CHECK(offset, size, max_size) \
+ do { \
+ if (overflow(endp, (max_size), (offset), (size))) \
+ return -EFAULT; \
+ } while (0)
+
+#define OVERFLOW_CHECK_u64(offset) \
+ OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64))
+
+static int
+perf_event__check_size(union perf_event *event, unsigned int sample_size)
+{
+ /*
+ * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes
+ * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to
+ * check the format does not go past the end of the event.
+ */
+ if (sample_size + sizeof(event->header) > event->header.size)
+ return -EFAULT;
+
+ return 0;
+}
+
+int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
+ struct perf_sample *data)
+{
+ u64 type = evsel->attr.sample_type;
+ bool swapped = evsel->needs_swap;
+ const u64 *array;
+ u16 max_size = event->header.size;
+ const void *endp = (void *)event + max_size;
+ u64 sz;
+
+ /*
+ * used for cross-endian analysis. See git commit 65014ab3
+ * for why this goofiness is needed.
+ */
+ union u64_swap u;
+
+ memset(data, 0, sizeof(*data));
+ data->cpu = data->pid = data->tid = -1;
+ data->stream_id = data->id = data->time = -1ULL;
+ data->period = evsel->attr.sample_period;
+ data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ data->misc = event->header.misc;
+ data->id = -1ULL;
+ data->data_src = PERF_MEM_DATA_SRC_NONE;
+
+ if (event->header.type != PERF_RECORD_SAMPLE) {
+ if (!evsel->attr.sample_id_all)
+ return 0;
+ return perf_evsel__parse_id_sample(evsel, event, data);
+ }
+
+ array = event->sample.array;
+
+ if (perf_event__check_size(event, evsel->sample_size))
+ return -EFAULT;
+
+ if (type & PERF_SAMPLE_IDENTIFIER) {
+ data->id = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_IP) {
+ data->ip = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u.val64 = *array;
+ if (swapped) {
+ /* undo swap of u64, then swap on individual u32s */
+ u.val64 = bswap_64(u.val64);
+ u.val32[0] = bswap_32(u.val32[0]);
+ u.val32[1] = bswap_32(u.val32[1]);
+ }
+
+ data->pid = u.val32[0];
+ data->tid = u.val32[1];
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ data->time = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ADDR) {
+ data->addr = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ID) {
+ data->id = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ data->stream_id = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_CPU) {
+
+ u.val64 = *array;
+ if (swapped) {
+ /* undo swap of u64, then swap on individual u32s */
+ u.val64 = bswap_64(u.val64);
+ u.val32[0] = bswap_32(u.val32[0]);
+ }
+
+ data->cpu = u.val32[0];
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_PERIOD) {
+ data->period = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_READ) {
+ u64 read_format = evsel->attr.read_format;
+
+ OVERFLOW_CHECK_u64(array);
+ if (read_format & PERF_FORMAT_GROUP)
+ data->read.group.nr = *array;
+ else
+ data->read.one.value = *array;
+
+ array++;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ OVERFLOW_CHECK_u64(array);
+ data->read.time_enabled = *array;
+ array++;
+ }
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ OVERFLOW_CHECK_u64(array);
+ data->read.time_running = *array;
+ array++;
+ }
+
+ /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+ if (read_format & PERF_FORMAT_GROUP) {
+ const u64 max_group_nr = UINT64_MAX /
+ sizeof(struct sample_read_value);
+
+ if (data->read.group.nr > max_group_nr)
+ return -EFAULT;
+ sz = data->read.group.nr *
+ sizeof(struct sample_read_value);
+ OVERFLOW_CHECK(array, sz, max_size);
+ data->read.group.values =
+ (struct sample_read_value *)array;
+ array = (void *)array + sz;
+ } else {
+ OVERFLOW_CHECK_u64(array);
+ data->read.one.id = *array;
+ array++;
+ }
+ }
+
+ if (evsel__has_callchain(evsel)) {
+ const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
+
+ OVERFLOW_CHECK_u64(array);
+ data->callchain = (struct ip_callchain *)array++;
+ if (data->callchain->nr > max_callchain_nr)
+ return -EFAULT;
+ sz = data->callchain->nr * sizeof(u64);
+ OVERFLOW_CHECK(array, sz, max_size);
+ array = (void *)array + sz;
+ }
+
+ if (type & PERF_SAMPLE_RAW) {
+ OVERFLOW_CHECK_u64(array);
+ u.val64 = *array;
+
+ /*
+ * Undo swap of u64, then swap on individual u32s,
+ * get the size of the raw area and undo all of the
+ * swap. The pevent interface handles endianity by
+ * itself.
+ */
+ if (swapped) {
+ u.val64 = bswap_64(u.val64);
+ u.val32[0] = bswap_32(u.val32[0]);
+ u.val32[1] = bswap_32(u.val32[1]);
+ }
+ data->raw_size = u.val32[0];
+
+ /*
+ * The raw data is aligned on 64bits including the
+ * u32 size, so it's safe to use mem_bswap_64.
+ */
+ if (swapped)
+ mem_bswap_64((void *) array, data->raw_size);
+
+ array = (void *)array + sizeof(u32);
+
+ OVERFLOW_CHECK(array, data->raw_size, max_size);
+ data->raw_data = (void *)array;
+ array = (void *)array + data->raw_size;
+ }
+
+ if (type & PERF_SAMPLE_BRANCH_STACK) {
+ const u64 max_branch_nr = UINT64_MAX /
+ sizeof(struct branch_entry);
+
+ OVERFLOW_CHECK_u64(array);
+ data->branch_stack = (struct branch_stack *)array++;
+
+ if (data->branch_stack->nr > max_branch_nr)
+ return -EFAULT;
+ sz = data->branch_stack->nr * sizeof(struct branch_entry);
+ OVERFLOW_CHECK(array, sz, max_size);
+ array = (void *)array + sz;
+ }
+
+ if (type & PERF_SAMPLE_REGS_USER) {
+ OVERFLOW_CHECK_u64(array);
+ data->user_regs.abi = *array;
+ array++;
+
+ if (data->user_regs.abi) {
+ u64 mask = evsel->attr.sample_regs_user;
+
+ sz = hweight_long(mask) * sizeof(u64);
+ OVERFLOW_CHECK(array, sz, max_size);
+ data->user_regs.mask = mask;
+ data->user_regs.regs = (u64 *)array;
+ array = (void *)array + sz;
+ }
+ }
+
+ if (type & PERF_SAMPLE_STACK_USER) {
+ OVERFLOW_CHECK_u64(array);
+ sz = *array++;
+
+ data->user_stack.offset = ((char *)(array - 1)
+ - (char *) event);
+
+ if (!sz) {
+ data->user_stack.size = 0;
+ } else {
+ OVERFLOW_CHECK(array, sz, max_size);
+ data->user_stack.data = (char *)array;
+ array = (void *)array + sz;
+ OVERFLOW_CHECK_u64(array);
+ data->user_stack.size = *array++;
+ if (WARN_ONCE(data->user_stack.size > sz,
+ "user stack dump failure\n"))
+ return -EFAULT;
+ }
+ }
+
+ if (type & PERF_SAMPLE_WEIGHT) {
+ OVERFLOW_CHECK_u64(array);
+ data->weight = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_DATA_SRC) {
+ OVERFLOW_CHECK_u64(array);
+ data->data_src = *array;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TRANSACTION) {
+ OVERFLOW_CHECK_u64(array);
+ data->transaction = *array;
+ array++;
+ }
+
+ data->intr_regs.abi = PERF_SAMPLE_REGS_ABI_NONE;
+ if (type & PERF_SAMPLE_REGS_INTR) {
+ OVERFLOW_CHECK_u64(array);
+ data->intr_regs.abi = *array;
+ array++;
+
+ if (data->intr_regs.abi != PERF_SAMPLE_REGS_ABI_NONE) {
+ u64 mask = evsel->attr.sample_regs_intr;
+
+ sz = hweight_long(mask) * sizeof(u64);
+ OVERFLOW_CHECK(array, sz, max_size);
+ data->intr_regs.mask = mask;
+ data->intr_regs.regs = (u64 *)array;
+ array = (void *)array + sz;
+ }
+ }
+
+ data->phys_addr = 0;
+ if (type & PERF_SAMPLE_PHYS_ADDR) {
+ data->phys_addr = *array;
+ array++;
+ }
+
+ return 0;
+}
+
+int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel,
+ union perf_event *event,
+ u64 *timestamp)
+{
+ u64 type = evsel->attr.sample_type;
+ const u64 *array;
+
+ if (!(type & PERF_SAMPLE_TIME))
+ return -1;
+
+ if (event->header.type != PERF_RECORD_SAMPLE) {
+ struct perf_sample data = {
+ .time = -1ULL,
+ };
+
+ if (!evsel->attr.sample_id_all)
+ return -1;
+ if (perf_evsel__parse_id_sample(evsel, event, &data))
+ return -1;
+
+ *timestamp = data.time;
+ return 0;
+ }
+
+ array = event->sample.array;
+
+ if (perf_event__check_size(event, evsel->sample_size))
+ return -EFAULT;
+
+ if (type & PERF_SAMPLE_IDENTIFIER)
+ array++;
+
+ if (type & PERF_SAMPLE_IP)
+ array++;
+
+ if (type & PERF_SAMPLE_TID)
+ array++;
+
+ if (type & PERF_SAMPLE_TIME)
+ *timestamp = *array;
+
+ return 0;
+}
+
+size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
+ u64 read_format)
+{
+ size_t sz, result = sizeof(struct sample_event);
+
+ if (type & PERF_SAMPLE_IDENTIFIER)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_IP)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_TID)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_TIME)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_ADDR)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_ID)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_STREAM_ID)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_CPU)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_PERIOD)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_READ) {
+ result += sizeof(u64);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ result += sizeof(u64);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ result += sizeof(u64);
+ /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+ if (read_format & PERF_FORMAT_GROUP) {
+ sz = sample->read.group.nr *
+ sizeof(struct sample_read_value);
+ result += sz;
+ } else {
+ result += sizeof(u64);
+ }
+ }
+
+ if (type & PERF_SAMPLE_CALLCHAIN) {
+ sz = (sample->callchain->nr + 1) * sizeof(u64);
+ result += sz;
+ }
+
+ if (type & PERF_SAMPLE_RAW) {
+ result += sizeof(u32);
+ result += sample->raw_size;
+ }
+
+ if (type & PERF_SAMPLE_BRANCH_STACK) {
+ sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+ sz += sizeof(u64);
+ result += sz;
+ }
+
+ if (type & PERF_SAMPLE_REGS_USER) {
+ if (sample->user_regs.abi) {
+ result += sizeof(u64);
+ sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
+ result += sz;
+ } else {
+ result += sizeof(u64);
+ }
+ }
+
+ if (type & PERF_SAMPLE_STACK_USER) {
+ sz = sample->user_stack.size;
+ result += sizeof(u64);
+ if (sz) {
+ result += sz;
+ result += sizeof(u64);
+ }
+ }
+
+ if (type & PERF_SAMPLE_WEIGHT)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_DATA_SRC)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_TRANSACTION)
+ result += sizeof(u64);
+
+ if (type & PERF_SAMPLE_REGS_INTR) {
+ if (sample->intr_regs.abi) {
+ result += sizeof(u64);
+ sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+ result += sz;
+ } else {
+ result += sizeof(u64);
+ }
+ }
+
+ if (type & PERF_SAMPLE_PHYS_ADDR)
+ result += sizeof(u64);
+
+ return result;
+}
+
+int perf_event__synthesize_sample(union perf_event *event, u64 type,
+ u64 read_format,
+ const struct perf_sample *sample)
+{
+ u64 *array;
+ size_t sz;
+ /*
+ * used for cross-endian analysis. See git commit 65014ab3
+ * for why this goofiness is needed.
+ */
+ union u64_swap u;
+
+ array = event->sample.array;
+
+ if (type & PERF_SAMPLE_IDENTIFIER) {
+ *array = sample->id;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_IP) {
+ *array = sample->ip;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TID) {
+ u.val32[0] = sample->pid;
+ u.val32[1] = sample->tid;
+ *array = u.val64;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TIME) {
+ *array = sample->time;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ADDR) {
+ *array = sample->addr;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_ID) {
+ *array = sample->id;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_STREAM_ID) {
+ *array = sample->stream_id;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_CPU) {
+ u.val32[0] = sample->cpu;
+ u.val32[1] = 0;
+ *array = u.val64;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_PERIOD) {
+ *array = sample->period;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_READ) {
+ if (read_format & PERF_FORMAT_GROUP)
+ *array = sample->read.group.nr;
+ else
+ *array = sample->read.one.value;
+ array++;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ *array = sample->read.time_enabled;
+ array++;
+ }
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ *array = sample->read.time_running;
+ array++;
+ }
+
+ /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+ if (read_format & PERF_FORMAT_GROUP) {
+ sz = sample->read.group.nr *
+ sizeof(struct sample_read_value);
+ memcpy(array, sample->read.group.values, sz);
+ array = (void *)array + sz;
+ } else {
+ *array = sample->read.one.id;
+ array++;
+ }
+ }
+
+ if (type & PERF_SAMPLE_CALLCHAIN) {
+ sz = (sample->callchain->nr + 1) * sizeof(u64);
+ memcpy(array, sample->callchain, sz);
+ array = (void *)array + sz;
+ }
+
+ if (type & PERF_SAMPLE_RAW) {
+ u.val32[0] = sample->raw_size;
+ *array = u.val64;
+ array = (void *)array + sizeof(u32);
+
+ memcpy(array, sample->raw_data, sample->raw_size);
+ array = (void *)array + sample->raw_size;
+ }
+
+ if (type & PERF_SAMPLE_BRANCH_STACK) {
+ sz = sample->branch_stack->nr * sizeof(struct branch_entry);
+ sz += sizeof(u64);
+ memcpy(array, sample->branch_stack, sz);
+ array = (void *)array + sz;
+ }
+
+ if (type & PERF_SAMPLE_REGS_USER) {
+ if (sample->user_regs.abi) {
+ *array++ = sample->user_regs.abi;
+ sz = hweight_long(sample->user_regs.mask) * sizeof(u64);
+ memcpy(array, sample->user_regs.regs, sz);
+ array = (void *)array + sz;
+ } else {
+ *array++ = 0;
+ }
+ }
+
+ if (type & PERF_SAMPLE_STACK_USER) {
+ sz = sample->user_stack.size;
+ *array++ = sz;
+ if (sz) {
+ memcpy(array, sample->user_stack.data, sz);
+ array = (void *)array + sz;
+ *array++ = sz;
+ }
+ }
+
+ if (type & PERF_SAMPLE_WEIGHT) {
+ *array = sample->weight;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_DATA_SRC) {
+ *array = sample->data_src;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_TRANSACTION) {
+ *array = sample->transaction;
+ array++;
+ }
+
+ if (type & PERF_SAMPLE_REGS_INTR) {
+ if (sample->intr_regs.abi) {
+ *array++ = sample->intr_regs.abi;
+ sz = hweight_long(sample->intr_regs.mask) * sizeof(u64);
+ memcpy(array, sample->intr_regs.regs, sz);
+ array = (void *)array + sz;
+ } else {
+ *array++ = 0;
+ }
+ }
+
+ if (type & PERF_SAMPLE_PHYS_ADDR) {
+ *array = sample->phys_addr;
+ array++;
+ }
+
+ return 0;
+}
+
+struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name)
+{
+ return tep_find_field(evsel->tp_format, name);
+}
+
+void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
+ const char *name)
+{
+ struct format_field *field = perf_evsel__field(evsel, name);
+ int offset;
+
+ if (!field)
+ return NULL;
+
+ offset = field->offset;
+
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ offset = *(int *)(sample->raw_data + field->offset);
+ offset &= 0xffff;
+ }
+
+ return sample->raw_data + offset;
+}
+
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample,
+ bool needs_swap)
+{
+ u64 value;
+ void *ptr = sample->raw_data + field->offset;
+
+ switch (field->size) {
+ case 1:
+ return *(u8 *)ptr;
+ case 2:
+ value = *(u16 *)ptr;
+ break;
+ case 4:
+ value = *(u32 *)ptr;
+ break;
+ case 8:
+ memcpy(&value, ptr, sizeof(u64));
+ break;
+ default:
+ return 0;
+ }
+
+ if (!needs_swap)
+ return value;
+
+ switch (field->size) {
+ case 2:
+ return bswap_16(value);
+ case 4:
+ return bswap_32(value);
+ case 8:
+ return bswap_64(value);
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
+ const char *name)
+{
+ struct format_field *field = perf_evsel__field(evsel, name);
+
+ if (!field)
+ return 0;
+
+ return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
+}
+
+bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
+ char *msg, size_t msgsize)
+{
+ int paranoid;
+
+ if ((err == ENOENT || err == ENXIO || err == ENODEV) &&
+ evsel->attr.type == PERF_TYPE_HARDWARE &&
+ evsel->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+ /*
+ * If it's cycles then fall back to hrtimer based
+ * cpu-clock-tick sw counter, which is always available even if
+ * no PMU support.
+ *
+ * PPC returns ENXIO until 2.6.37 (behavior changed with commit
+ * b0a873e).
+ */
+ scnprintf(msg, msgsize, "%s",
+"The cycles event is not supported, trying to fall back to cpu-clock-ticks");
+
+ evsel->attr.type = PERF_TYPE_SOFTWARE;
+ evsel->attr.config = PERF_COUNT_SW_CPU_CLOCK;
+
+ zfree(&evsel->name);
+ return true;
+ } else if (err == EACCES && !evsel->attr.exclude_kernel &&
+ (paranoid = perf_event_paranoid()) > 1) {
+ const char *name = perf_evsel__name(evsel);
+ char *new_name;
+ const char *sep = ":";
+
+ /* Is there already the separator in the name. */
+ if (strchr(name, '/') ||
+ strchr(name, ':'))
+ sep = "";
+
+ if (asprintf(&new_name, "%s%su", name, sep) < 0)
+ return false;
+
+ if (evsel->name)
+ free(evsel->name);
+ evsel->name = new_name;
+ scnprintf(msg, msgsize,
+"kernel.perf_event_paranoid=%d, trying to fall back to excluding kernel samples", paranoid);
+ evsel->attr.exclude_kernel = 1;
+
+ return true;
+ }
+
+ return false;
+}
+
+static bool find_process(const char *name)
+{
+ size_t len = strlen(name);
+ DIR *dir;
+ struct dirent *d;
+ int ret = -1;
+
+ dir = opendir(procfs__mountpoint());
+ if (!dir)
+ return false;
+
+ /* Walk through the directory. */
+ while (ret && (d = readdir(dir)) != NULL) {
+ char path[PATH_MAX];
+ char *data;
+ size_t size;
+
+ if ((d->d_type != DT_DIR) ||
+ !strcmp(".", d->d_name) ||
+ !strcmp("..", d->d_name))
+ continue;
+
+ scnprintf(path, sizeof(path), "%s/%s/comm",
+ procfs__mountpoint(), d->d_name);
+
+ if (filename__read_str(path, &data, &size))
+ continue;
+
+ ret = strncmp(name, data, len);
+ free(data);
+ }
+
+ closedir(dir);
+ return ret ? false : true;
+}
+
+int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
+ int err, char *msg, size_t size)
+{
+ char sbuf[STRERR_BUFSIZE];
+ int printed = 0;
+
+ switch (err) {
+ case EPERM:
+ case EACCES:
+ if (err == EPERM)
+ printed = scnprintf(msg, size,
+ "No permission to enable %s event.\n\n",
+ perf_evsel__name(evsel));
+
+ return scnprintf(msg + printed, size - printed,
+ "You may not have permission to collect %sstats.\n\n"
+ "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
+ "which controls use of the performance events system by\n"
+ "unprivileged users (without CAP_SYS_ADMIN).\n\n"
+ "The current value is %d:\n\n"
+ " -1: Allow use of (almost) all events by all users\n"
+ " Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
+ ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n"
+ " Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n"
+ ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
+ ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n"
+ "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n"
+ " kernel.perf_event_paranoid = -1\n" ,
+ target->system_wide ? "system-wide " : "",
+ perf_event_paranoid());
+ case ENOENT:
+ return scnprintf(msg, size, "The %s event is not supported.",
+ perf_evsel__name(evsel));
+ case EMFILE:
+ return scnprintf(msg, size, "%s",
+ "Too many events are opened.\n"
+ "Probably the maximum number of open file descriptors has been reached.\n"
+ "Hint: Try again after reducing the number of events.\n"
+ "Hint: Try increasing the limit with 'ulimit -n <limit>'");
+ case ENOMEM:
+ if (evsel__has_callchain(evsel) &&
+ access("/proc/sys/kernel/perf_event_max_stack", F_OK) == 0)
+ return scnprintf(msg, size,
+ "Not enough memory to setup event with callchain.\n"
+ "Hint: Try tweaking /proc/sys/kernel/perf_event_max_stack\n"
+ "Hint: Current value: %d", sysctl__max_stack());
+ break;
+ case ENODEV:
+ if (target->cpu_list)
+ return scnprintf(msg, size, "%s",
+ "No such device - did you specify an out-of-range profile CPU?");
+ break;
+ case EOPNOTSUPP:
+ if (evsel->attr.sample_period != 0)
+ return scnprintf(msg, size,
+ "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
+ perf_evsel__name(evsel));
+ if (evsel->attr.precise_ip)
+ return scnprintf(msg, size, "%s",
+ "\'precise\' request may not be supported. Try removing 'p' modifier.");
+#if defined(__i386__) || defined(__x86_64__)
+ if (evsel->attr.type == PERF_TYPE_HARDWARE)
+ return scnprintf(msg, size, "%s",
+ "No hardware sampling interrupt available.\n");
+#endif
+ break;
+ case EBUSY:
+ if (find_process("oprofiled"))
+ return scnprintf(msg, size,
+ "The PMU counters are busy/taken by another profiler.\n"
+ "We found oprofile daemon running, please stop it and try again.");
+ break;
+ case EINVAL:
+ if (evsel->attr.write_backward && perf_missing_features.write_backward)
+ return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
+ if (perf_missing_features.clockid)
+ return scnprintf(msg, size, "clockid feature not supported.");
+ if (perf_missing_features.clockid_wrong)
+ return scnprintf(msg, size, "wrong clockid (%d).", clockid);
+ break;
+ default:
+ break;
+ }
+
+ return scnprintf(msg, size,
+ "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
+ "/bin/dmesg | grep -i perf may provide additional information.\n",
+ err, str_error_r(err, sbuf, sizeof(sbuf)),
+ perf_evsel__name(evsel));
+}
+
+struct perf_env *perf_evsel__env(struct perf_evsel *evsel)
+{
+ if (evsel && evsel->evlist)
+ return evsel->evlist->env;
+ return NULL;
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
new file mode 100644
index 000000000..163c96061
--- /dev/null
+++ b/tools/perf/util/evsel.h
@@ -0,0 +1,484 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_EVSEL_H
+#define __PERF_EVSEL_H 1
+
+#include <linux/list.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include "xyarray.h"
+#include "symbol.h"
+#include "cpumap.h"
+#include "counts.h"
+
+struct perf_evsel;
+
+/*
+ * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are
+ * more than one entry in the evlist.
+ */
+struct perf_sample_id {
+ struct hlist_node node;
+ u64 id;
+ struct perf_evsel *evsel;
+ int idx;
+ int cpu;
+ pid_t tid;
+
+ /* Holds total ID period value for PERF_SAMPLE_READ processing. */
+ u64 period;
+};
+
+struct cgroup;
+
+/*
+ * The 'struct perf_evsel_config_term' is used to pass event
+ * specific configuration data to perf_evsel__config routine.
+ * It is allocated within event parsing and attached to
+ * perf_evsel::config_terms list head.
+*/
+enum term_type {
+ PERF_EVSEL__CONFIG_TERM_PERIOD,
+ PERF_EVSEL__CONFIG_TERM_FREQ,
+ PERF_EVSEL__CONFIG_TERM_TIME,
+ PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
+ PERF_EVSEL__CONFIG_TERM_STACK_USER,
+ PERF_EVSEL__CONFIG_TERM_INHERIT,
+ PERF_EVSEL__CONFIG_TERM_MAX_STACK,
+ PERF_EVSEL__CONFIG_TERM_OVERWRITE,
+ PERF_EVSEL__CONFIG_TERM_DRV_CFG,
+ PERF_EVSEL__CONFIG_TERM_BRANCH,
+};
+
+struct perf_evsel_config_term {
+ struct list_head list;
+ enum term_type type;
+ union {
+ u64 period;
+ u64 freq;
+ bool time;
+ char *callgraph;
+ char *drv_cfg;
+ u64 stack_user;
+ int max_stack;
+ bool inherit;
+ bool overwrite;
+ char *branch;
+ } val;
+ bool weak;
+};
+
+struct perf_stat_evsel;
+
+/** struct perf_evsel - event selector
+ *
+ * @evlist - evlist this evsel is in, if it is in one.
+ * @node - To insert it into evlist->entries or in other list_heads, say in
+ * the event parsing routines.
+ * @name - Can be set to retain the original event name passed by the user,
+ * so that when showing results in tools such as 'perf stat', we
+ * show the name used, not some alias.
+ * @id_pos: the position of the event id (PERF_SAMPLE_ID or
+ * PERF_SAMPLE_IDENTIFIER) in a sample event i.e. in the array of
+ * struct sample_event
+ * @is_pos: the position (counting backwards) of the event id (PERF_SAMPLE_ID or
+ * PERF_SAMPLE_IDENTIFIER) in a non-sample event i.e. if sample_id_all
+ * is used there is an id sample appended to non-sample events
+ * @priv: And what is in its containing unnamed union are tool specific
+ */
+struct perf_evsel {
+ struct list_head node;
+ struct perf_evlist *evlist;
+ struct perf_event_attr attr;
+ char *filter;
+ struct xyarray *fd;
+ struct xyarray *sample_id;
+ u64 *id;
+ struct perf_counts *counts;
+ struct perf_counts *prev_raw_counts;
+ int idx;
+ u32 ids;
+ char *name;
+ double scale;
+ const char *unit;
+ struct event_format *tp_format;
+ off_t id_offset;
+ struct perf_stat_evsel *stats;
+ void *priv;
+ u64 db_id;
+ struct cgroup *cgrp;
+ void *handler;
+ struct cpu_map *cpus;
+ struct cpu_map *own_cpus;
+ struct thread_map *threads;
+ unsigned int sample_size;
+ int id_pos;
+ int is_pos;
+ bool uniquified_name;
+ bool snapshot;
+ bool supported;
+ bool needs_swap;
+ bool no_aux_samples;
+ bool immediate;
+ bool system_wide;
+ bool tracking;
+ bool per_pkg;
+ bool precise_max;
+ bool ignore_missing_thread;
+ bool forced_leader;
+ bool use_uncore_alias;
+ /* parse modifier helper */
+ int exclude_GH;
+ int nr_members;
+ int sample_read;
+ unsigned long *per_pkg_mask;
+ struct perf_evsel *leader;
+ char *group_name;
+ bool cmdline_group_boundary;
+ struct list_head config_terms;
+ int bpf_fd;
+ bool auto_merge_stats;
+ bool merged_stat;
+ const char * metric_expr;
+ const char * metric_name;
+ struct perf_evsel **metric_events;
+ bool collect_stat;
+ bool weak_group;
+ const char *pmu_name;
+};
+
+union u64_swap {
+ u64 val64;
+ u32 val32[2];
+};
+
+struct perf_missing_features {
+ bool sample_id_all;
+ bool exclude_guest;
+ bool mmap2;
+ bool cloexec;
+ bool clockid;
+ bool clockid_wrong;
+ bool lbr_flags;
+ bool write_backward;
+ bool group_read;
+};
+
+extern struct perf_missing_features perf_missing_features;
+
+struct cpu_map;
+struct target;
+struct thread_map;
+struct record_opts;
+
+static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel)
+{
+ return evsel->cpus;
+}
+
+static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
+{
+ return perf_evsel__cpus(evsel)->nr;
+}
+
+void perf_counts_values__scale(struct perf_counts_values *count,
+ bool scale, s8 *pscaled);
+
+void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread,
+ struct perf_counts_values *count);
+
+int perf_evsel__object_config(size_t object_size,
+ int (*init)(struct perf_evsel *evsel),
+ void (*fini)(struct perf_evsel *evsel));
+
+struct perf_evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx);
+
+static inline struct perf_evsel *perf_evsel__new(struct perf_event_attr *attr)
+{
+ return perf_evsel__new_idx(attr, 0);
+}
+
+struct perf_evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx);
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+static inline struct perf_evsel *perf_evsel__newtp(const char *sys, const char *name)
+{
+ return perf_evsel__newtp_idx(sys, name, 0);
+}
+
+struct perf_evsel *perf_evsel__new_cycles(bool precise);
+
+struct event_format *event_format__new(const char *sys, const char *name);
+
+void perf_evsel__init(struct perf_evsel *evsel,
+ struct perf_event_attr *attr, int idx);
+void perf_evsel__exit(struct perf_evsel *evsel);
+void perf_evsel__delete(struct perf_evsel *evsel);
+
+struct callchain_param;
+
+void perf_evsel__config(struct perf_evsel *evsel,
+ struct record_opts *opts,
+ struct callchain_param *callchain);
+void perf_evsel__config_callchain(struct perf_evsel *evsel,
+ struct record_opts *opts,
+ struct callchain_param *callchain);
+
+int __perf_evsel__sample_size(u64 sample_type);
+void perf_evsel__calc_id_pos(struct perf_evsel *evsel);
+
+bool perf_evsel__is_cache_op_valid(u8 type, u8 op);
+
+#define PERF_EVSEL__MAX_ALIASES 8
+
+extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_EVSEL__MAX_ALIASES];
+extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_EVSEL__MAX_ALIASES];
+extern const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
+ [PERF_EVSEL__MAX_ALIASES];
+extern const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX];
+extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX];
+int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
+ char *bf, size_t size);
+const char *perf_evsel__name(struct perf_evsel *evsel);
+
+const char *perf_evsel__group_name(struct perf_evsel *evsel);
+int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size);
+
+int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
+void perf_evsel__close_fd(struct perf_evsel *evsel);
+
+void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
+ enum perf_event_sample_format bit);
+void __perf_evsel__reset_sample_bit(struct perf_evsel *evsel,
+ enum perf_event_sample_format bit);
+
+#define perf_evsel__set_sample_bit(evsel, bit) \
+ __perf_evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+#define perf_evsel__reset_sample_bit(evsel, bit) \
+ __perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+void perf_evsel__set_sample_id(struct perf_evsel *evsel,
+ bool use_sample_identifier);
+
+int perf_evsel__set_filter(struct perf_evsel *evsel, const char *filter);
+int perf_evsel__append_tp_filter(struct perf_evsel *evsel, const char *filter);
+int perf_evsel__append_addr_filter(struct perf_evsel *evsel,
+ const char *filter);
+int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter);
+int perf_evsel__enable(struct perf_evsel *evsel);
+int perf_evsel__disable(struct perf_evsel *evsel);
+
+int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
+ struct cpu_map *cpus);
+int perf_evsel__open_per_thread(struct perf_evsel *evsel,
+ struct thread_map *threads);
+int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
+ struct thread_map *threads);
+void perf_evsel__close(struct perf_evsel *evsel);
+
+struct perf_sample;
+
+void *perf_evsel__rawptr(struct perf_evsel *evsel, struct perf_sample *sample,
+ const char *name);
+u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample,
+ const char *name);
+
+static inline char *perf_evsel__strval(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ const char *name)
+{
+ return perf_evsel__rawptr(evsel, sample, name);
+}
+
+struct format_field;
+
+u64 format_field__intval(struct format_field *field, struct perf_sample *sample, bool needs_swap);
+
+struct format_field *perf_evsel__field(struct perf_evsel *evsel, const char *name);
+
+#define perf_evsel__match(evsel, t, c) \
+ (evsel->attr.type == PERF_TYPE_##t && \
+ evsel->attr.config == PERF_COUNT_##c)
+
+static inline bool perf_evsel__match2(struct perf_evsel *e1,
+ struct perf_evsel *e2)
+{
+ return (e1->attr.type == e2->attr.type) &&
+ (e1->attr.config == e2->attr.config);
+}
+
+#define perf_evsel__cmp(a, b) \
+ ((a) && \
+ (b) && \
+ (a)->attr.type == (b)->attr.type && \
+ (a)->attr.config == (b)->attr.config)
+
+int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,
+ struct perf_counts_values *count);
+
+int perf_evsel__read_counter(struct perf_evsel *evsel, int cpu, int thread);
+
+int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+ int cpu, int thread, bool scale);
+
+/**
+ * perf_evsel__read_on_cpu - Read out the results on a CPU and thread
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu(struct perf_evsel *evsel,
+ int cpu, int thread)
+{
+ return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
+}
+
+/**
+ * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
+ *
+ * @evsel - event selector to read value
+ * @cpu - CPU of interest
+ * @thread - thread of interest
+ */
+static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel,
+ int cpu, int thread)
+{
+ return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
+}
+
+int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
+ struct perf_sample *sample);
+
+int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel,
+ union perf_event *event,
+ u64 *timestamp);
+
+static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel)
+{
+ return list_entry(evsel->node.next, struct perf_evsel, node);
+}
+
+static inline struct perf_evsel *perf_evsel__prev(struct perf_evsel *evsel)
+{
+ return list_entry(evsel->node.prev, struct perf_evsel, node);
+}
+
+/**
+ * perf_evsel__is_group_leader - Return whether given evsel is a leader event
+ *
+ * @evsel - evsel selector to be tested
+ *
+ * Return %true if @evsel is a group leader or a stand-alone event
+ */
+static inline bool perf_evsel__is_group_leader(const struct perf_evsel *evsel)
+{
+ return evsel->leader == evsel;
+}
+
+/**
+ * perf_evsel__is_group_event - Return whether given evsel is a group event
+ *
+ * @evsel - evsel selector to be tested
+ *
+ * Return %true iff event group view is enabled and @evsel is a actual group
+ * leader which has other members in the group
+ */
+static inline bool perf_evsel__is_group_event(struct perf_evsel *evsel)
+{
+ if (!symbol_conf.event_group)
+ return false;
+
+ return perf_evsel__is_group_leader(evsel) && evsel->nr_members > 1;
+}
+
+bool perf_evsel__is_function_event(struct perf_evsel *evsel);
+
+static inline bool perf_evsel__is_bpf_output(struct perf_evsel *evsel)
+{
+ return perf_evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
+}
+
+static inline bool perf_evsel__is_clock(struct perf_evsel *evsel)
+{
+ return perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
+ perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
+}
+
+struct perf_attr_details {
+ bool freq;
+ bool verbose;
+ bool event_group;
+ bool force;
+ bool trace_fields;
+};
+
+int perf_evsel__fprintf(struct perf_evsel *evsel,
+ struct perf_attr_details *details, FILE *fp);
+
+#define EVSEL__PRINT_IP (1<<0)
+#define EVSEL__PRINT_SYM (1<<1)
+#define EVSEL__PRINT_DSO (1<<2)
+#define EVSEL__PRINT_SYMOFFSET (1<<3)
+#define EVSEL__PRINT_ONELINE (1<<4)
+#define EVSEL__PRINT_SRCLINE (1<<5)
+#define EVSEL__PRINT_UNKNOWN_AS_ADDR (1<<6)
+#define EVSEL__PRINT_CALLCHAIN_ARROW (1<<7)
+#define EVSEL__PRINT_SKIP_IGNORED (1<<8)
+
+struct callchain_cursor;
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+ unsigned int print_opts,
+ struct callchain_cursor *cursor, FILE *fp);
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+ int left_alignment, unsigned int print_opts,
+ struct callchain_cursor *cursor, FILE *fp);
+
+bool perf_evsel__fallback(struct perf_evsel *evsel, int err,
+ char *msg, size_t msgsize);
+int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
+ int err, char *msg, size_t size);
+
+static inline int perf_evsel__group_idx(struct perf_evsel *evsel)
+{
+ return evsel->idx - evsel->leader->idx;
+}
+
+/* Iterates group WITHOUT the leader. */
+#define for_each_group_member(_evsel, _leader) \
+for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); \
+ (_evsel) && (_evsel)->leader == (_leader); \
+ (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node))
+
+/* Iterates group WITH the leader. */
+#define for_each_group_evsel(_evsel, _leader) \
+for ((_evsel) = _leader; \
+ (_evsel) && (_evsel)->leader == (_leader); \
+ (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node))
+
+static inline bool perf_evsel__has_branch_callstack(const struct perf_evsel *evsel)
+{
+ return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
+}
+
+static inline bool evsel__has_callchain(const struct perf_evsel *evsel)
+{
+ return (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0;
+}
+
+typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *);
+
+int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
+ attr__fprintf_f attr__fprintf, void *priv);
+
+struct perf_env *perf_evsel__env(struct perf_evsel *evsel);
+
+#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
new file mode 100644
index 000000000..06dfb0278
--- /dev/null
+++ b/tools/perf/util/evsel_fprintf.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <traceevent/event-parse.h>
+#include "evsel.h"
+#include "callchain.h"
+#include "map.h"
+#include "strlist.h"
+#include "symbol.h"
+#include "srcline.h"
+
+static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
+{
+ va_list args;
+ int ret = 0;
+
+ if (!*first) {
+ ret += fprintf(fp, ",");
+ } else {
+ ret += fprintf(fp, ":");
+ *first = false;
+ }
+
+ va_start(args, fmt);
+ ret += vfprintf(fp, fmt, args);
+ va_end(args);
+ return ret;
+}
+
+static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv)
+{
+ return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
+}
+
+int perf_evsel__fprintf(struct perf_evsel *evsel,
+ struct perf_attr_details *details, FILE *fp)
+{
+ bool first = true;
+ int printed = 0;
+
+ if (details->event_group) {
+ struct perf_evsel *pos;
+
+ if (!perf_evsel__is_group_leader(evsel))
+ return 0;
+
+ if (evsel->nr_members > 1)
+ printed += fprintf(fp, "%s{", evsel->group_name ?: "");
+
+ printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+ for_each_group_member(pos, evsel)
+ printed += fprintf(fp, ",%s", perf_evsel__name(pos));
+
+ if (evsel->nr_members > 1)
+ printed += fprintf(fp, "}");
+ goto out;
+ }
+
+ printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+
+ if (details->verbose) {
+ printed += perf_event_attr__fprintf(fp, &evsel->attr,
+ __print_attr__fprintf, &first);
+ } else if (details->freq) {
+ const char *term = "sample_freq";
+
+ if (!evsel->attr.freq)
+ term = "sample_period";
+
+ printed += comma_fprintf(fp, &first, " %s=%" PRIu64,
+ term, (u64)evsel->attr.sample_freq);
+ }
+
+ if (details->trace_fields) {
+ struct format_field *field;
+
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+ printed += comma_fprintf(fp, &first, " (not a tracepoint)");
+ goto out;
+ }
+
+ field = evsel->tp_format->format.fields;
+ if (field == NULL) {
+ printed += comma_fprintf(fp, &first, " (no trace field)");
+ goto out;
+ }
+
+ printed += comma_fprintf(fp, &first, " trace_fields: %s", field->name);
+
+ field = field->next;
+ while (field) {
+ printed += comma_fprintf(fp, &first, "%s", field->name);
+ field = field->next;
+ }
+ }
+out:
+ fputc('\n', fp);
+ return ++printed;
+}
+
+int sample__fprintf_callchain(struct perf_sample *sample, int left_alignment,
+ unsigned int print_opts, struct callchain_cursor *cursor,
+ FILE *fp)
+{
+ int printed = 0;
+ struct callchain_cursor_node *node;
+ int print_ip = print_opts & EVSEL__PRINT_IP;
+ int print_sym = print_opts & EVSEL__PRINT_SYM;
+ int print_dso = print_opts & EVSEL__PRINT_DSO;
+ int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+ int print_oneline = print_opts & EVSEL__PRINT_ONELINE;
+ int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+ int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+ int print_arrow = print_opts & EVSEL__PRINT_CALLCHAIN_ARROW;
+ int print_skip_ignored = print_opts & EVSEL__PRINT_SKIP_IGNORED;
+ char s = print_oneline ? ' ' : '\t';
+ bool first = true;
+
+ if (sample->callchain) {
+ struct addr_location node_al;
+
+ callchain_cursor_commit(cursor);
+
+ while (1) {
+ u64 addr = 0;
+
+ node = callchain_cursor_current(cursor);
+ if (!node)
+ break;
+
+ if (node->sym && node->sym->ignore && print_skip_ignored)
+ goto next;
+
+ printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+ if (print_arrow && !first)
+ printed += fprintf(fp, " <-");
+
+ if (print_ip)
+ printed += fprintf(fp, "%c%16" PRIx64, s, node->ip);
+
+ if (node->map)
+ addr = node->map->map_ip(node->map, node->ip);
+
+ if (print_sym) {
+ printed += fprintf(fp, " ");
+ node_al.addr = addr;
+ node_al.map = node->map;
+
+ if (print_symoffset) {
+ printed += __symbol__fprintf_symname_offs(node->sym, &node_al,
+ print_unknown_as_addr,
+ true, fp);
+ } else {
+ printed += __symbol__fprintf_symname(node->sym, &node_al,
+ print_unknown_as_addr, fp);
+ }
+ }
+
+ if (print_dso && (!node->sym || !node->sym->inlined)) {
+ printed += fprintf(fp, " (");
+ printed += map__fprintf_dsoname(node->map, fp);
+ printed += fprintf(fp, ")");
+ }
+
+ if (print_srcline)
+ printed += map__fprintf_srcline(node->map, addr, "\n ", fp);
+
+ if (node->sym && node->sym->inlined)
+ printed += fprintf(fp, " (inlined)");
+
+ if (!print_oneline)
+ printed += fprintf(fp, "\n");
+
+ if (symbol_conf.bt_stop_list &&
+ node->sym &&
+ strlist__has_entry(symbol_conf.bt_stop_list,
+ node->sym->name)) {
+ break;
+ }
+
+ first = false;
+next:
+ callchain_cursor_advance(cursor);
+ }
+ }
+
+ return printed;
+}
+
+int sample__fprintf_sym(struct perf_sample *sample, struct addr_location *al,
+ int left_alignment, unsigned int print_opts,
+ struct callchain_cursor *cursor, FILE *fp)
+{
+ int printed = 0;
+ int print_ip = print_opts & EVSEL__PRINT_IP;
+ int print_sym = print_opts & EVSEL__PRINT_SYM;
+ int print_dso = print_opts & EVSEL__PRINT_DSO;
+ int print_symoffset = print_opts & EVSEL__PRINT_SYMOFFSET;
+ int print_srcline = print_opts & EVSEL__PRINT_SRCLINE;
+ int print_unknown_as_addr = print_opts & EVSEL__PRINT_UNKNOWN_AS_ADDR;
+
+ if (cursor != NULL) {
+ printed += sample__fprintf_callchain(sample, left_alignment,
+ print_opts, cursor, fp);
+ } else {
+ printed += fprintf(fp, "%-*.*s", left_alignment, left_alignment, " ");
+
+ if (print_ip)
+ printed += fprintf(fp, "%16" PRIx64, sample->ip);
+
+ if (print_sym) {
+ printed += fprintf(fp, " ");
+ if (print_symoffset) {
+ printed += __symbol__fprintf_symname_offs(al->sym, al,
+ print_unknown_as_addr,
+ true, fp);
+ } else {
+ printed += __symbol__fprintf_symname(al->sym, al,
+ print_unknown_as_addr, fp);
+ }
+ }
+
+ if (print_dso) {
+ printed += fprintf(fp, " (");
+ printed += map__fprintf_dsoname(al->map, fp);
+ printed += fprintf(fp, ")");
+ }
+
+ if (print_srcline)
+ printed += map__fprintf_srcline(al->map, al->addr, "\n ", fp);
+ }
+
+ return printed;
+}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
new file mode 100644
index 000000000..046160831
--- /dev/null
+++ b/tools/perf/util/expr.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PARSE_CTX_H
+#define PARSE_CTX_H 1
+
+#define EXPR_MAX_OTHER 15
+#define MAX_PARSE_ID EXPR_MAX_OTHER
+
+struct parse_id {
+ const char *name;
+ double val;
+};
+
+struct parse_ctx {
+ int num_ids;
+ struct parse_id ids[MAX_PARSE_ID];
+};
+
+void expr__ctx_init(struct parse_ctx *ctx);
+void expr__add_id(struct parse_ctx *ctx, const char *id, double val);
+#ifndef IN_EXPR_Y
+int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp);
+#endif
+int expr__find_other(const char *p, const char *one, const char ***other,
+ int *num_other);
+
+#endif
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
new file mode 100644
index 000000000..e7bd19c38
--- /dev/null
+++ b/tools/perf/util/expr.y
@@ -0,0 +1,236 @@
+/* Simple expression parser */
+%{
+#include "util.h"
+#include "util/debug.h"
+#define IN_EXPR_Y 1
+#include "expr.h"
+#include "smt.h"
+#include <string.h>
+
+#define MAXIDLEN 256
+%}
+
+%define api.pure full
+
+%parse-param { double *final_val }
+%parse-param { struct parse_ctx *ctx }
+%parse-param { const char **pp }
+%lex-param { const char **pp }
+
+%union {
+ double num;
+ char id[MAXIDLEN+1];
+}
+
+%token <num> NUMBER
+%token <id> ID
+%token MIN MAX IF ELSE SMT_ON
+%left MIN MAX IF
+%left '|'
+%left '^'
+%left '&'
+%left '-' '+'
+%left '*' '/' '%'
+%left NEG NOT
+%type <num> expr if_expr
+
+%{
+static int expr__lex(YYSTYPE *res, const char **pp);
+
+static void expr__error(double *final_val __maybe_unused,
+ struct parse_ctx *ctx __maybe_unused,
+ const char **pp __maybe_unused,
+ const char *s)
+{
+ pr_debug("%s\n", s);
+}
+
+static int lookup_id(struct parse_ctx *ctx, char *id, double *val)
+{
+ int i;
+
+ for (i = 0; i < ctx->num_ids; i++) {
+ if (!strcasecmp(ctx->ids[i].name, id)) {
+ *val = ctx->ids[i].val;
+ return 0;
+ }
+ }
+ return -1;
+}
+
+%}
+%%
+
+all_expr: if_expr { *final_val = $1; }
+ ;
+
+if_expr:
+ expr IF expr ELSE expr { $$ = $3 ? $1 : $5; }
+ | expr
+ ;
+
+expr: NUMBER
+ | ID { if (lookup_id(ctx, $1, &$$) < 0) {
+ pr_debug("%s not found\n", $1);
+ YYABORT;
+ }
+ }
+ | expr '|' expr { $$ = (long)$1 | (long)$3; }
+ | expr '&' expr { $$ = (long)$1 & (long)$3; }
+ | expr '^' expr { $$ = (long)$1 ^ (long)$3; }
+ | expr '+' expr { $$ = $1 + $3; }
+ | expr '-' expr { $$ = $1 - $3; }
+ | expr '*' expr { $$ = $1 * $3; }
+ | expr '/' expr { if ($3 == 0) YYABORT; $$ = $1 / $3; }
+ | expr '%' expr { if ((long)$3 == 0) YYABORT; $$ = (long)$1 % (long)$3; }
+ | '-' expr %prec NEG { $$ = -$2; }
+ | '(' if_expr ')' { $$ = $2; }
+ | MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; }
+ | MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; }
+ | SMT_ON { $$ = smt_on() > 0; }
+ ;
+
+%%
+
+static int expr__symbol(YYSTYPE *res, const char *p, const char **pp)
+{
+ char *dst = res->id;
+ const char *s = p;
+
+ if (*p == '#')
+ *dst++ = *p++;
+
+ while (isalnum(*p) || *p == '_' || *p == '.' || *p == ':' || *p == '@' || *p == '\\') {
+ if (p - s >= MAXIDLEN)
+ return -1;
+ /*
+ * Allow @ instead of / to be able to specify pmu/event/ without
+ * conflicts with normal division.
+ */
+ if (*p == '@')
+ *dst++ = '/';
+ else if (*p == '\\')
+ *dst++ = *++p;
+ else
+ *dst++ = *p;
+ p++;
+ }
+ *dst = 0;
+ *pp = p;
+ dst = res->id;
+ switch (dst[0]) {
+ case 'm':
+ if (!strcmp(dst, "min"))
+ return MIN;
+ if (!strcmp(dst, "max"))
+ return MAX;
+ break;
+ case 'i':
+ if (!strcmp(dst, "if"))
+ return IF;
+ break;
+ case 'e':
+ if (!strcmp(dst, "else"))
+ return ELSE;
+ break;
+ case '#':
+ if (!strcasecmp(dst, "#smt_on"))
+ return SMT_ON;
+ break;
+ }
+ return ID;
+}
+
+static int expr__lex(YYSTYPE *res, const char **pp)
+{
+ int tok;
+ const char *s;
+ const char *p = *pp;
+
+ while (isspace(*p))
+ p++;
+ s = p;
+ switch (*p++) {
+ case '#':
+ case 'a' ... 'z':
+ case 'A' ... 'Z':
+ return expr__symbol(res, p - 1, pp);
+ case '0' ... '9': case '.':
+ res->num = strtod(s, (char **)&p);
+ tok = NUMBER;
+ break;
+ default:
+ tok = *s;
+ break;
+ }
+ *pp = p;
+ return tok;
+}
+
+/* Caller must make sure id is allocated */
+void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
+{
+ int idx;
+ assert(ctx->num_ids < MAX_PARSE_ID);
+ idx = ctx->num_ids++;
+ ctx->ids[idx].name = name;
+ ctx->ids[idx].val = val;
+}
+
+void expr__ctx_init(struct parse_ctx *ctx)
+{
+ ctx->num_ids = 0;
+}
+
+static bool already_seen(const char *val, const char *one, const char **other,
+ int num_other)
+{
+ int i;
+
+ if (one && !strcasecmp(one, val))
+ return true;
+ for (i = 0; i < num_other; i++)
+ if (!strcasecmp(other[i], val))
+ return true;
+ return false;
+}
+
+int expr__find_other(const char *p, const char *one, const char ***other,
+ int *num_otherp)
+{
+ const char *orig = p;
+ int err = -1;
+ int num_other;
+
+ *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *));
+ if (!*other)
+ return -1;
+
+ num_other = 0;
+ for (;;) {
+ YYSTYPE val;
+ int tok = expr__lex(&val, &p);
+ if (tok == 0) {
+ err = 0;
+ break;
+ }
+ if (tok == ID && !already_seen(val.id, one, *other, num_other)) {
+ if (num_other >= EXPR_MAX_OTHER - 1) {
+ pr_debug("Too many extra events in %s\n", orig);
+ break;
+ }
+ (*other)[num_other] = strdup(val.id);
+ if (!(*other)[num_other])
+ return -1;
+ num_other++;
+ }
+ }
+ (*other)[num_other] = NULL;
+ *num_otherp = num_other;
+ if (err) {
+ *num_otherp = 0;
+ free(*other);
+ *other = NULL;
+ }
+ return err;
+}
diff --git a/tools/perf/util/find-vdso-map.c b/tools/perf/util/find-vdso-map.c
new file mode 100644
index 000000000..d7823e350
--- /dev/null
+++ b/tools/perf/util/find-vdso-map.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+static int find_vdso_map(void **start, void **end)
+{
+ FILE *maps;
+ char line[128];
+ int found = 0;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) {
+ fprintf(stderr, "vdso: cannot open maps\n");
+ return -1;
+ }
+
+ while (!found && fgets(line, sizeof(line), maps)) {
+ int m = -1;
+
+ /* We care only about private r-x mappings. */
+ if (2 != sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n",
+ start, end, &m))
+ continue;
+ if (m < 0)
+ continue;
+
+ if (!strncmp(&line[m], VDSO__MAP_NAME,
+ sizeof(VDSO__MAP_NAME) - 1))
+ found = 1;
+ }
+
+ fclose(maps);
+ return !found;
+}
diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c
new file mode 100644
index 000000000..aafbe54fd
--- /dev/null
+++ b/tools/perf/util/genelf.c
@@ -0,0 +1,554 @@
+/*
+ * genelf.c
+ * Copyright (C) 2014, Google, Inc
+ *
+ * Contributed by:
+ * Stephane Eranian <eranian@gmail.com>
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#ifdef HAVE_DWARF_SUPPORT
+#include <dwarf.h>
+#endif
+
+#include "perf.h"
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#ifndef NT_GNU_BUILD_ID
+#define NT_GNU_BUILD_ID 3
+#endif
+
+#define JVMTI
+
+#define BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef HAVE_LIBCRYPTO
+
+#define BUILD_ID_MD5
+#undef BUILD_ID_SHA /* does not seem to work well when linked with Java */
+#undef BUILD_ID_URANDOM /* different uuid for each run */
+
+#ifdef BUILD_ID_SHA
+#include <openssl/sha.h>
+#endif
+
+#ifdef BUILD_ID_MD5
+#include <openssl/md5.h>
+#endif
+#endif
+
+
+typedef struct {
+ unsigned int namesz; /* Size of entry's owner string */
+ unsigned int descsz; /* Size of the note descriptor */
+ unsigned int type; /* Interpretation of the descriptor */
+ char name[0]; /* Start of the name+desc data */
+} Elf_Note;
+
+struct options {
+ char *output;
+ int fd;
+};
+
+static char shd_string_table[] = {
+ 0,
+ '.', 't', 'e', 'x', 't', 0, /* 1 */
+ '.', 's', 'h', 's', 't', 'r', 't', 'a', 'b', 0, /* 7 */
+ '.', 's', 'y', 'm', 't', 'a', 'b', 0, /* 17 */
+ '.', 's', 't', 'r', 't', 'a', 'b', 0, /* 25 */
+ '.', 'n', 'o', 't', 'e', '.', 'g', 'n', 'u', '.', 'b', 'u', 'i', 'l', 'd', '-', 'i', 'd', 0, /* 33 */
+ '.', 'd', 'e', 'b', 'u', 'g', '_', 'l', 'i', 'n', 'e', 0, /* 52 */
+ '.', 'd', 'e', 'b', 'u', 'g', '_', 'i', 'n', 'f', 'o', 0, /* 64 */
+ '.', 'd', 'e', 'b', 'u', 'g', '_', 'a', 'b', 'b', 'r', 'e', 'v', 0, /* 76 */
+ '.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', '_', 'h', 'd', 'r', 0, /* 90 */
+ '.', 'e', 'h', '_', 'f', 'r', 'a', 'm', 'e', 0, /* 104 */
+};
+
+static struct buildid_note {
+ Elf_Note desc; /* descsz: size of build-id, must be multiple of 4 */
+ char name[4]; /* GNU\0 */
+ char build_id[20];
+} bnote;
+
+static Elf_Sym symtab[]={
+ /* symbol 0 MUST be the undefined symbol */
+ { .st_name = 0, /* index in sym_string table */
+ .st_info = ELF_ST_TYPE(STT_NOTYPE),
+ .st_shndx = 0, /* for now */
+ .st_value = 0x0,
+ .st_other = ELF_ST_VIS(STV_DEFAULT),
+ .st_size = 0,
+ },
+ { .st_name = 1, /* index in sym_string table */
+ .st_info = ELF_ST_BIND(STB_LOCAL) | ELF_ST_TYPE(STT_FUNC),
+ .st_shndx = 1,
+ .st_value = 0, /* for now */
+ .st_other = ELF_ST_VIS(STV_DEFAULT),
+ .st_size = 0, /* for now */
+ }
+};
+
+#ifdef BUILD_ID_URANDOM
+static void
+gen_build_id(struct buildid_note *note,
+ unsigned long load_addr __maybe_unused,
+ const void *code __maybe_unused,
+ size_t csize __maybe_unused)
+{
+ int fd;
+ size_t sz = sizeof(note->build_id);
+ ssize_t sret;
+
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd == -1)
+ err(1, "cannot access /dev/urandom for buildid");
+
+ sret = read(fd, note->build_id, sz);
+
+ close(fd);
+
+ if (sret != (ssize_t)sz)
+ memset(note->build_id, 0, sz);
+}
+#endif
+
+#ifdef BUILD_ID_SHA
+static void
+gen_build_id(struct buildid_note *note,
+ unsigned long load_addr __maybe_unused,
+ const void *code,
+ size_t csize)
+{
+ if (sizeof(note->build_id) < SHA_DIGEST_LENGTH)
+ errx(1, "build_id too small for SHA1");
+
+ SHA1(code, csize, (unsigned char *)note->build_id);
+}
+#endif
+
+#ifdef BUILD_ID_MD5
+static void
+gen_build_id(struct buildid_note *note, unsigned long load_addr, const void *code, size_t csize)
+{
+ MD5_CTX context;
+
+ if (sizeof(note->build_id) < 16)
+ errx(1, "build_id too small for MD5");
+
+ MD5_Init(&context);
+ MD5_Update(&context, &load_addr, sizeof(load_addr));
+ MD5_Update(&context, code, csize);
+ MD5_Final((unsigned char *)note->build_id, &context);
+}
+#endif
+
+static int
+jit_add_eh_frame_info(Elf *e, void* unwinding, uint64_t unwinding_header_size,
+ uint64_t unwinding_size, uint64_t base_offset)
+{
+ Elf_Data *d;
+ Elf_Scn *scn;
+ Elf_Shdr *shdr;
+ uint64_t unwinding_table_size = unwinding_size - unwinding_header_size;
+
+ /*
+ * setup eh_frame section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ return -1;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ return -1;
+ }
+
+ d->d_align = 8;
+ d->d_off = 0LL;
+ d->d_buf = unwinding;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = unwinding_table_size;
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ return -1;
+ }
+
+ shdr->sh_name = 104;
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = base_offset;
+ shdr->sh_flags = SHF_ALLOC;
+ shdr->sh_entsize = 0;
+
+ /*
+ * setup eh_frame_hdr section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ return -1;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ return -1;
+ }
+
+ d->d_align = 4;
+ d->d_off = 0LL;
+ d->d_buf = unwinding + unwinding_table_size;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = unwinding_header_size;
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ return -1;
+ }
+
+ shdr->sh_name = 90;
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = base_offset + unwinding_table_size;
+ shdr->sh_flags = SHF_ALLOC;
+ shdr->sh_entsize = 0;
+
+ return 0;
+}
+
+/*
+ * fd: file descriptor open for writing for the output file
+ * load_addr: code load address (could be zero, just used for buildid)
+ * sym: function name (for native code - used as the symbol)
+ * code: the native code
+ * csize: the code size in bytes
+ */
+int
+jit_write_elf(int fd, uint64_t load_addr, const char *sym,
+ const void *code, int csize,
+ void *debug __maybe_unused, int nr_debug_entries __maybe_unused,
+ void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size)
+{
+ Elf *e;
+ Elf_Data *d;
+ Elf_Scn *scn;
+ Elf_Ehdr *ehdr;
+ Elf_Shdr *shdr;
+ uint64_t eh_frame_base_offset;
+ char *strsym = NULL;
+ int symlen;
+ int retval = -1;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ warnx("ELF initialization failed");
+ return -1;
+ }
+
+ e = elf_begin(fd, ELF_C_WRITE, NULL);
+ if (!e) {
+ warnx("elf_begin failed");
+ goto error;
+ }
+
+ /*
+ * setup ELF header
+ */
+ ehdr = elf_newehdr(e);
+ if (!ehdr) {
+ warnx("cannot get ehdr");
+ goto error;
+ }
+
+ ehdr->e_ident[EI_DATA] = GEN_ELF_ENDIAN;
+ ehdr->e_ident[EI_CLASS] = GEN_ELF_CLASS;
+ ehdr->e_machine = GEN_ELF_ARCH;
+ ehdr->e_type = ET_DYN;
+ ehdr->e_entry = GEN_ELF_TEXT_OFFSET;
+ ehdr->e_version = EV_CURRENT;
+ ehdr->e_shstrndx= unwinding ? 4 : 2; /* shdr index for section name */
+
+ /*
+ * setup text section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ goto error;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ goto error;
+ }
+
+ d->d_align = 16;
+ d->d_off = 0LL;
+ d->d_buf = (void *)code;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = csize;
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ goto error;
+ }
+
+ shdr->sh_name = 1;
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = GEN_ELF_TEXT_OFFSET;
+ shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ shdr->sh_entsize = 0;
+
+ /*
+ * Setup .eh_frame_hdr and .eh_frame
+ */
+ if (unwinding) {
+ eh_frame_base_offset = ALIGN_8(GEN_ELF_TEXT_OFFSET + csize);
+ retval = jit_add_eh_frame_info(e, unwinding,
+ unwinding_header_size, unwinding_size,
+ eh_frame_base_offset);
+ if (retval)
+ goto error;
+ }
+
+ /*
+ * setup section headers string table
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ goto error;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ goto error;
+ }
+
+ d->d_align = 1;
+ d->d_off = 0LL;
+ d->d_buf = shd_string_table;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = sizeof(shd_string_table);
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ goto error;
+ }
+
+ shdr->sh_name = 7; /* offset of '.shstrtab' in shd_string_table */
+ shdr->sh_type = SHT_STRTAB;
+ shdr->sh_flags = 0;
+ shdr->sh_entsize = 0;
+
+ /*
+ * setup symtab section
+ */
+ symtab[1].st_size = csize;
+ symtab[1].st_value = GEN_ELF_TEXT_OFFSET;
+
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ goto error;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ goto error;
+ }
+
+ d->d_align = 8;
+ d->d_off = 0LL;
+ d->d_buf = symtab;
+ d->d_type = ELF_T_SYM;
+ d->d_size = sizeof(symtab);
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ goto error;
+ }
+
+ shdr->sh_name = 17; /* offset of '.symtab' in shd_string_table */
+ shdr->sh_type = SHT_SYMTAB;
+ shdr->sh_flags = 0;
+ shdr->sh_entsize = sizeof(Elf_Sym);
+ shdr->sh_link = unwinding ? 6 : 4; /* index of .strtab section */
+
+ /*
+ * setup symbols string table
+ * 2 = 1 for 0 in 1st entry, 1 for the 0 at end of symbol for 2nd entry
+ */
+ symlen = 2 + strlen(sym);
+ strsym = calloc(1, symlen);
+ if (!strsym) {
+ warnx("cannot allocate strsym");
+ goto error;
+ }
+ strcpy(strsym + 1, sym);
+
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ goto error;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ goto error;
+ }
+
+ d->d_align = 1;
+ d->d_off = 0LL;
+ d->d_buf = strsym;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = symlen;
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ goto error;
+ }
+
+ shdr->sh_name = 25; /* offset in shd_string_table */
+ shdr->sh_type = SHT_STRTAB;
+ shdr->sh_flags = 0;
+ shdr->sh_entsize = 0;
+
+ /*
+ * setup build-id section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ goto error;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ goto error;
+ }
+
+ /*
+ * build-id generation
+ */
+ gen_build_id(&bnote, load_addr, code, csize);
+ bnote.desc.namesz = sizeof(bnote.name); /* must include 0 termination */
+ bnote.desc.descsz = sizeof(bnote.build_id);
+ bnote.desc.type = NT_GNU_BUILD_ID;
+ strcpy(bnote.name, "GNU");
+
+ d->d_align = 4;
+ d->d_off = 0LL;
+ d->d_buf = &bnote;
+ d->d_type = ELF_T_BYTE;
+ d->d_size = sizeof(bnote);
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ goto error;
+ }
+
+ shdr->sh_name = 33; /* offset in shd_string_table */
+ shdr->sh_type = SHT_NOTE;
+ shdr->sh_addr = 0x0;
+ shdr->sh_flags = SHF_ALLOC;
+ shdr->sh_size = sizeof(bnote);
+ shdr->sh_entsize = 0;
+
+#ifdef HAVE_DWARF_SUPPORT
+ if (debug && nr_debug_entries) {
+ retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries);
+ if (retval)
+ goto error;
+ } else
+#endif
+ {
+ if (elf_update(e, ELF_C_WRITE) < 0) {
+ warnx("elf_update 4 failed");
+ goto error;
+ }
+ }
+
+ retval = 0;
+error:
+ (void)elf_end(e);
+
+ free(strsym);
+
+
+ return retval;
+}
+
+#ifndef JVMTI
+
+static unsigned char x86_code[] = {
+ 0xBB, 0x2A, 0x00, 0x00, 0x00, /* movl $42, %ebx */
+ 0xB8, 0x01, 0x00, 0x00, 0x00, /* movl $1, %eax */
+ 0xCD, 0x80 /* int $0x80 */
+};
+
+static struct options options;
+
+int main(int argc, char **argv)
+{
+ int c, fd, ret;
+
+ while ((c = getopt(argc, argv, "o:h")) != -1) {
+ switch (c) {
+ case 'o':
+ options.output = optarg;
+ break;
+ case 'h':
+ printf("Usage: genelf -o output_file [-h]\n");
+ return 0;
+ default:
+ errx(1, "unknown option");
+ }
+ }
+
+ fd = open(options.output, O_CREAT|O_TRUNC|O_RDWR, 0666);
+ if (fd == -1)
+ err(1, "cannot create file %s", options.output);
+
+ ret = jit_write_elf(fd, "main", x86_code, sizeof(x86_code));
+ close(fd);
+
+ if (ret != 0)
+ unlink(options.output);
+
+ return ret;
+}
+#endif
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
new file mode 100644
index 000000000..de322d51c
--- /dev/null
+++ b/tools/perf/util/genelf.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __GENELF_H__
+#define __GENELF_H__
+
+/* genelf.c */
+int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
+ const void *code, int csize, void *debug, int nr_debug_entries,
+ void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size);
+#ifdef HAVE_DWARF_SUPPORT
+/* genelf_debug.c */
+int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries);
+#endif
+
+#if defined(__arm__)
+#define GEN_ELF_ARCH EM_ARM
+#define GEN_ELF_CLASS ELFCLASS32
+#elif defined(__aarch64__)
+#define GEN_ELF_ARCH EM_AARCH64
+#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__x86_64__)
+#define GEN_ELF_ARCH EM_X86_64
+#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__i386__)
+#define GEN_ELF_ARCH EM_386
+#define GEN_ELF_CLASS ELFCLASS32
+#elif defined(__powerpc64__)
+#define GEN_ELF_ARCH EM_PPC64
+#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__powerpc__)
+#define GEN_ELF_ARCH EM_PPC
+#define GEN_ELF_CLASS ELFCLASS32
+#else
+#error "unsupported architecture"
+#endif
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define GEN_ELF_ENDIAN ELFDATA2MSB
+#else
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#endif
+
+#if GEN_ELF_CLASS == ELFCLASS64
+#define elf_newehdr elf64_newehdr
+#define elf_getshdr elf64_getshdr
+#define Elf_Ehdr Elf64_Ehdr
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define ELF_ST_TYPE(a) ELF64_ST_TYPE(a)
+#define ELF_ST_BIND(a) ELF64_ST_BIND(a)
+#define ELF_ST_VIS(a) ELF64_ST_VISIBILITY(a)
+#else
+#define elf_newehdr elf32_newehdr
+#define elf_getshdr elf32_getshdr
+#define Elf_Ehdr Elf32_Ehdr
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define ELF_ST_TYPE(a) ELF32_ST_TYPE(a)
+#define ELF_ST_BIND(a) ELF32_ST_BIND(a)
+#define ELF_ST_VIS(a) ELF32_ST_VISIBILITY(a)
+#endif
+
+/* The .text section is directly after the ELF header */
+#define GEN_ELF_TEXT_OFFSET sizeof(Elf_Ehdr)
+
+#endif
diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
new file mode 100644
index 000000000..40789d860
--- /dev/null
+++ b/tools/perf/util/genelf_debug.c
@@ -0,0 +1,611 @@
+/*
+ * genelf_debug.c
+ * Copyright (C) 2015, Google, Inc
+ *
+ * Contributed by:
+ * Stephane Eranian <eranian@google.com>
+ *
+ * Released under the GPL v2.
+ *
+ * based on GPLv2 source code from Oprofile
+ * @remark Copyright 2007 OProfile authors
+ * @author Philippe Elie
+ */
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <libelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <err.h>
+#include <dwarf.h>
+
+#include "perf.h"
+#include "genelf.h"
+#include "../util/jitdump.h"
+
+#define BUFFER_EXT_DFL_SIZE (4 * 1024)
+
+typedef uint32_t uword;
+typedef uint16_t uhalf;
+typedef int32_t sword;
+typedef int16_t shalf;
+typedef uint8_t ubyte;
+typedef int8_t sbyte;
+
+struct buffer_ext {
+ size_t cur_pos;
+ size_t max_sz;
+ void *data;
+};
+
+static void
+buffer_ext_dump(struct buffer_ext *be, const char *msg)
+{
+ size_t i;
+ warnx("DUMP for %s", msg);
+ for (i = 0 ; i < be->cur_pos; i++)
+ warnx("%4zu 0x%02x", i, (((char *)be->data)[i]) & 0xff);
+}
+
+static inline int
+buffer_ext_add(struct buffer_ext *be, void *addr, size_t sz)
+{
+ void *tmp;
+ size_t be_sz = be->max_sz;
+
+retry:
+ if ((be->cur_pos + sz) < be_sz) {
+ memcpy(be->data + be->cur_pos, addr, sz);
+ be->cur_pos += sz;
+ return 0;
+ }
+
+ if (!be_sz)
+ be_sz = BUFFER_EXT_DFL_SIZE;
+ else
+ be_sz <<= 1;
+
+ tmp = realloc(be->data, be_sz);
+ if (!tmp)
+ return -1;
+
+ be->data = tmp;
+ be->max_sz = be_sz;
+
+ goto retry;
+}
+
+static void
+buffer_ext_init(struct buffer_ext *be)
+{
+ be->data = NULL;
+ be->cur_pos = 0;
+ be->max_sz = 0;
+}
+
+static inline size_t
+buffer_ext_size(struct buffer_ext *be)
+{
+ return be->cur_pos;
+}
+
+static inline void *
+buffer_ext_addr(struct buffer_ext *be)
+{
+ return be->data;
+}
+
+struct debug_line_header {
+ // Not counting this field
+ uword total_length;
+ // version number (2 currently)
+ uhalf version;
+ // relative offset from next field to
+ // program statement
+ uword prolog_length;
+ ubyte minimum_instruction_length;
+ ubyte default_is_stmt;
+ // line_base - see DWARF 2 specs
+ sbyte line_base;
+ // line_range - see DWARF 2 specs
+ ubyte line_range;
+ // number of opcode + 1
+ ubyte opcode_base;
+ /* follow the array of opcode args nr: ubytes [nr_opcode_base] */
+ /* follow the search directories index, zero terminated string
+ * terminated by an empty string.
+ */
+ /* follow an array of { filename, LEB128, LEB128, LEB128 }, first is
+ * the directory index entry, 0 means current directory, then mtime
+ * and filesize, last entry is followed by en empty string.
+ */
+ /* follow the first program statement */
+} __packed;
+
+/* DWARF 2 spec talk only about one possible compilation unit header while
+ * binutils can handle two flavours of dwarf 2, 32 and 64 bits, this is not
+ * related to the used arch, an ELF 32 can hold more than 4 Go of debug
+ * information. For now we handle only DWARF 2 32 bits comp unit. It'll only
+ * become a problem if we generate more than 4GB of debug information.
+ */
+struct compilation_unit_header {
+ uword total_length;
+ uhalf version;
+ uword debug_abbrev_offset;
+ ubyte pointer_size;
+} __packed;
+
+#define DW_LNS_num_opcode (DW_LNS_set_isa + 1)
+
+/* field filled at run time are marked with -1 */
+static struct debug_line_header const default_debug_line_header = {
+ .total_length = -1,
+ .version = 2,
+ .prolog_length = -1,
+ .minimum_instruction_length = 1, /* could be better when min instruction size != 1 */
+ .default_is_stmt = 1, /* we don't take care about basic block */
+ .line_base = -5, /* sensible value for line base ... */
+ .line_range = -14, /* ... and line range are guessed statically */
+ .opcode_base = DW_LNS_num_opcode
+};
+
+static ubyte standard_opcode_length[] =
+{
+ 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1
+};
+#if 0
+{
+ [DW_LNS_advance_pc] = 1,
+ [DW_LNS_advance_line] = 1,
+ [DW_LNS_set_file] = 1,
+ [DW_LNS_set_column] = 1,
+ [DW_LNS_fixed_advance_pc] = 1,
+ [DW_LNS_set_isa] = 1,
+};
+#endif
+
+/* field filled at run time are marked with -1 */
+static struct compilation_unit_header default_comp_unit_header = {
+ .total_length = -1,
+ .version = 2,
+ .debug_abbrev_offset = 0, /* we reuse the same abbrev entries for all comp unit */
+ .pointer_size = sizeof(void *)
+};
+
+static void emit_uword(struct buffer_ext *be, uword data)
+{
+ buffer_ext_add(be, &data, sizeof(uword));
+}
+
+static void emit_string(struct buffer_ext *be, const char *s)
+{
+ buffer_ext_add(be, (void *)s, strlen(s) + 1);
+}
+
+static void emit_unsigned_LEB128(struct buffer_ext *be,
+ unsigned long data)
+{
+ do {
+ ubyte cur = data & 0x7F;
+ data >>= 7;
+ if (data)
+ cur |= 0x80;
+ buffer_ext_add(be, &cur, 1);
+ } while (data);
+}
+
+static void emit_signed_LEB128(struct buffer_ext *be, long data)
+{
+ int more = 1;
+ int negative = data < 0;
+ int size = sizeof(long) * CHAR_BIT;
+ while (more) {
+ ubyte cur = data & 0x7F;
+ data >>= 7;
+ if (negative)
+ data |= - (1 << (size - 7));
+ if ((data == 0 && !(cur & 0x40)) ||
+ (data == -1l && (cur & 0x40)))
+ more = 0;
+ else
+ cur |= 0x80;
+ buffer_ext_add(be, &cur, 1);
+ }
+}
+
+static void emit_extended_opcode(struct buffer_ext *be, ubyte opcode,
+ void *data, size_t data_len)
+{
+ buffer_ext_add(be, (char *)"", 1);
+
+ emit_unsigned_LEB128(be, data_len + 1);
+
+ buffer_ext_add(be, &opcode, 1);
+ buffer_ext_add(be, data, data_len);
+}
+
+static void emit_opcode(struct buffer_ext *be, ubyte opcode)
+{
+ buffer_ext_add(be, &opcode, 1);
+}
+
+static void emit_opcode_signed(struct buffer_ext *be,
+ ubyte opcode, long data)
+{
+ buffer_ext_add(be, &opcode, 1);
+ emit_signed_LEB128(be, data);
+}
+
+static void emit_opcode_unsigned(struct buffer_ext *be, ubyte opcode,
+ unsigned long data)
+{
+ buffer_ext_add(be, &opcode, 1);
+ emit_unsigned_LEB128(be, data);
+}
+
+static void emit_advance_pc(struct buffer_ext *be, unsigned long delta_pc)
+{
+ emit_opcode_unsigned(be, DW_LNS_advance_pc, delta_pc);
+}
+
+static void emit_advance_lineno(struct buffer_ext *be, long delta_lineno)
+{
+ emit_opcode_signed(be, DW_LNS_advance_line, delta_lineno);
+}
+
+static void emit_lne_end_of_sequence(struct buffer_ext *be)
+{
+ emit_extended_opcode(be, DW_LNE_end_sequence, NULL, 0);
+}
+
+static void emit_set_file(struct buffer_ext *be, unsigned long idx)
+{
+ emit_opcode_unsigned(be, DW_LNS_set_file, idx);
+}
+
+static void emit_lne_define_filename(struct buffer_ext *be,
+ const char *filename)
+{
+ buffer_ext_add(be, (void *)"", 1);
+
+ /* LNE field, strlen(filename) + zero termination, 3 bytes for: the dir entry, timestamp, filesize */
+ emit_unsigned_LEB128(be, strlen(filename) + 5);
+ emit_opcode(be, DW_LNE_define_file);
+ emit_string(be, filename);
+ /* directory index 0=do not know */
+ emit_unsigned_LEB128(be, 0);
+ /* last modification date on file 0=do not know */
+ emit_unsigned_LEB128(be, 0);
+ /* filesize 0=do not know */
+ emit_unsigned_LEB128(be, 0);
+}
+
+static void emit_lne_set_address(struct buffer_ext *be,
+ void *address)
+{
+ emit_extended_opcode(be, DW_LNE_set_address, &address, sizeof(unsigned long));
+}
+
+static ubyte get_special_opcode(struct debug_entry *ent,
+ unsigned int last_line,
+ unsigned long last_vma)
+{
+ unsigned int temp;
+ unsigned long delta_addr;
+
+ /*
+ * delta from line_base
+ */
+ temp = (ent->lineno - last_line) - default_debug_line_header.line_base;
+
+ if (temp >= default_debug_line_header.line_range)
+ return 0;
+
+ /*
+ * delta of addresses
+ */
+ delta_addr = (ent->addr - last_vma) / default_debug_line_header.minimum_instruction_length;
+
+ /* This is not sufficient to ensure opcode will be in [0-256] but
+ * sufficient to ensure when summing with the delta lineno we will
+ * not overflow the unsigned long opcode */
+
+ if (delta_addr <= 256 / default_debug_line_header.line_range) {
+ unsigned long opcode = temp +
+ (delta_addr * default_debug_line_header.line_range) +
+ default_debug_line_header.opcode_base;
+
+ return opcode <= 255 ? opcode : 0;
+ }
+ return 0;
+}
+
+static void emit_lineno_info(struct buffer_ext *be,
+ struct debug_entry *ent, size_t nr_entry,
+ unsigned long code_addr)
+{
+ size_t i;
+
+ /*
+ * Machine state at start of a statement program
+ * address = 0
+ * file = 1
+ * line = 1
+ * column = 0
+ * is_stmt = default_is_stmt as given in the debug_line_header
+ * basic block = 0
+ * end sequence = 0
+ */
+
+ /* start state of the state machine we take care of */
+ unsigned long last_vma = code_addr;
+ char const *cur_filename = NULL;
+ unsigned long cur_file_idx = 0;
+ int last_line = 1;
+
+ emit_lne_set_address(be, (void *)code_addr);
+
+ for (i = 0; i < nr_entry; i++, ent = debug_entry_next(ent)) {
+ int need_copy = 0;
+ ubyte special_opcode;
+
+ /*
+ * check if filename changed, if so add it
+ */
+ if (!cur_filename || strcmp(cur_filename, ent->name)) {
+ emit_lne_define_filename(be, ent->name);
+ cur_filename = ent->name;
+ emit_set_file(be, ++cur_file_idx);
+ need_copy = 1;
+ }
+
+ special_opcode = get_special_opcode(ent, last_line, last_vma);
+ if (special_opcode != 0) {
+ last_line = ent->lineno;
+ last_vma = ent->addr;
+ emit_opcode(be, special_opcode);
+ } else {
+ /*
+ * lines differ, emit line delta
+ */
+ if (last_line != ent->lineno) {
+ emit_advance_lineno(be, ent->lineno - last_line);
+ last_line = ent->lineno;
+ need_copy = 1;
+ }
+ /*
+ * addresses differ, emit address delta
+ */
+ if (last_vma != ent->addr) {
+ emit_advance_pc(be, ent->addr - last_vma);
+ last_vma = ent->addr;
+ need_copy = 1;
+ }
+ /*
+ * add new row to matrix
+ */
+ if (need_copy)
+ emit_opcode(be, DW_LNS_copy);
+ }
+ }
+}
+
+static void add_debug_line(struct buffer_ext *be,
+ struct debug_entry *ent, size_t nr_entry,
+ unsigned long code_addr)
+{
+ struct debug_line_header * dbg_header;
+ size_t old_size;
+
+ old_size = buffer_ext_size(be);
+
+ buffer_ext_add(be, (void *)&default_debug_line_header,
+ sizeof(default_debug_line_header));
+
+ buffer_ext_add(be, &standard_opcode_length, sizeof(standard_opcode_length));
+
+ // empty directory entry
+ buffer_ext_add(be, (void *)"", 1);
+
+ // empty filename directory
+ buffer_ext_add(be, (void *)"", 1);
+
+ dbg_header = buffer_ext_addr(be) + old_size;
+ dbg_header->prolog_length = (buffer_ext_size(be) - old_size) -
+ offsetof(struct debug_line_header, minimum_instruction_length);
+
+ emit_lineno_info(be, ent, nr_entry, code_addr);
+
+ emit_lne_end_of_sequence(be);
+
+ dbg_header = buffer_ext_addr(be) + old_size;
+ dbg_header->total_length = (buffer_ext_size(be) - old_size) -
+ offsetof(struct debug_line_header, version);
+}
+
+static void
+add_debug_abbrev(struct buffer_ext *be)
+{
+ emit_unsigned_LEB128(be, 1);
+ emit_unsigned_LEB128(be, DW_TAG_compile_unit);
+ emit_unsigned_LEB128(be, DW_CHILDREN_yes);
+ emit_unsigned_LEB128(be, DW_AT_stmt_list);
+ emit_unsigned_LEB128(be, DW_FORM_data4);
+ emit_unsigned_LEB128(be, 0);
+ emit_unsigned_LEB128(be, 0);
+ emit_unsigned_LEB128(be, 0);
+}
+
+static void
+add_compilation_unit(struct buffer_ext *be,
+ size_t offset_debug_line)
+{
+ struct compilation_unit_header *comp_unit_header;
+ size_t old_size = buffer_ext_size(be);
+
+ buffer_ext_add(be, &default_comp_unit_header,
+ sizeof(default_comp_unit_header));
+
+ emit_unsigned_LEB128(be, 1);
+ emit_uword(be, offset_debug_line);
+
+ comp_unit_header = buffer_ext_addr(be) + old_size;
+ comp_unit_header->total_length = (buffer_ext_size(be) - old_size) -
+ offsetof(struct compilation_unit_header, version);
+}
+
+static int
+jit_process_debug_info(uint64_t code_addr,
+ void *debug, int nr_debug_entries,
+ struct buffer_ext *dl,
+ struct buffer_ext *da,
+ struct buffer_ext *di)
+{
+ struct debug_entry *ent = debug;
+ int i;
+
+ for (i = 0; i < nr_debug_entries; i++) {
+ ent->addr = ent->addr - code_addr;
+ ent = debug_entry_next(ent);
+ }
+ add_compilation_unit(di, buffer_ext_size(dl));
+ add_debug_line(dl, debug, nr_debug_entries, 0);
+ add_debug_abbrev(da);
+ if (0) buffer_ext_dump(da, "abbrev");
+
+ return 0;
+}
+
+int
+jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries)
+{
+ Elf_Data *d;
+ Elf_Scn *scn;
+ Elf_Shdr *shdr;
+ struct buffer_ext dl, di, da;
+ int ret;
+
+ buffer_ext_init(&dl);
+ buffer_ext_init(&di);
+ buffer_ext_init(&da);
+
+ ret = jit_process_debug_info(code_addr, debug, nr_debug_entries, &dl, &da, &di);
+ if (ret)
+ return -1;
+ /*
+ * setup .debug_line section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ return -1;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ return -1;
+ }
+
+ d->d_align = 1;
+ d->d_off = 0LL;
+ d->d_buf = buffer_ext_addr(&dl);
+ d->d_type = ELF_T_BYTE;
+ d->d_size = buffer_ext_size(&dl);
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ return -1;
+ }
+
+ shdr->sh_name = 52; /* .debug_line */
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+ shdr->sh_flags = 0;
+ shdr->sh_entsize = 0;
+
+ /*
+ * setup .debug_info section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ return -1;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ return -1;
+ }
+
+ d->d_align = 1;
+ d->d_off = 0LL;
+ d->d_buf = buffer_ext_addr(&di);
+ d->d_type = ELF_T_BYTE;
+ d->d_size = buffer_ext_size(&di);
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ return -1;
+ }
+
+ shdr->sh_name = 64; /* .debug_info */
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+ shdr->sh_flags = 0;
+ shdr->sh_entsize = 0;
+
+ /*
+ * setup .debug_abbrev section
+ */
+ scn = elf_newscn(e);
+ if (!scn) {
+ warnx("cannot create section");
+ return -1;
+ }
+
+ d = elf_newdata(scn);
+ if (!d) {
+ warnx("cannot get new data");
+ return -1;
+ }
+
+ d->d_align = 1;
+ d->d_off = 0LL;
+ d->d_buf = buffer_ext_addr(&da);
+ d->d_type = ELF_T_BYTE;
+ d->d_size = buffer_ext_size(&da);
+ d->d_version = EV_CURRENT;
+
+ shdr = elf_getshdr(scn);
+ if (!shdr) {
+ warnx("cannot get section header");
+ return -1;
+ }
+
+ shdr->sh_name = 76; /* .debug_info */
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_addr = 0; /* must be zero or == sh_offset -> dynamic object */
+ shdr->sh_flags = 0;
+ shdr->sh_entsize = 0;
+
+ /*
+ * now we update the ELF image with all the sections
+ */
+ if (elf_update(e, ELF_C_WRITE) < 0) {
+ warnx("elf_update debug failed");
+ return -1;
+ }
+ return 0;
+}
diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh
new file mode 100755
index 000000000..c3cef36d4
--- /dev/null
+++ b/tools/perf/util/generate-cmdlist.sh
@@ -0,0 +1,55 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+echo "/* Automatically generated by $0 */
+struct cmdname_help
+{
+ char name[16];
+ char help[80];
+};
+
+static struct cmdname_help common_cmds[] = {"
+
+sed -n -e 's/^perf-\([^ ]*\)[ ].* common.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+ sed -n '
+ /^NAME/,/perf-'"$cmd"'/H
+ ${
+ x
+ s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/
+ p
+ }' "Documentation/perf-$cmd.txt"
+done
+
+echo "#ifdef HAVE_LIBELF_SUPPORT"
+sed -n -e 's/^perf-\([^ ]*\)[ ].* full.*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+ sed -n '
+ /^NAME/,/perf-'"$cmd"'/H
+ ${
+ x
+ s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/
+ p
+ }' "Documentation/perf-$cmd.txt"
+done
+echo "#endif /* HAVE_LIBELF_SUPPORT */"
+
+echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)"
+sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt |
+sort |
+while read cmd
+do
+ sed -n '
+ /^NAME/,/perf-'"$cmd"'/H
+ ${
+ x
+ s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/
+ p
+ }' "Documentation/perf-$cmd.txt"
+done
+echo "#endif /* HAVE_LIBELF_SUPPORT */"
+echo "};"
diff --git a/tools/perf/util/get_current_dir_name.c b/tools/perf/util/get_current_dir_name.c
new file mode 100644
index 000000000..267aa609a
--- /dev/null
+++ b/tools/perf/util/get_current_dir_name.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+//
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+#include "util.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdlib.h>
+
+/* Android's 'bionic' library, for one, doesn't have this */
+
+char *get_current_dir_name(void)
+{
+ char pwd[PATH_MAX];
+
+ return getcwd(pwd, sizeof(pwd)) == NULL ? NULL : strdup(pwd);
+}
+#endif // HAVE_GET_CURRENT_DIR_NAME
diff --git a/tools/perf/util/group.h b/tools/perf/util/group.h
new file mode 100644
index 000000000..f36c7e317
--- /dev/null
+++ b/tools/perf/util/group.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef GROUP_H
+#define GROUP_H 1
+
+bool arch_topdown_check_group(bool *warn);
+void arch_topdown_group_warn(void);
+
+#endif
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
new file mode 100644
index 000000000..3c0d74fc1
--- /dev/null
+++ b/tools/perf/util/header.c
@@ -0,0 +1,3946 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include "util.h"
+#include "string2.h"
+#include <sys/param.h>
+#include <sys/types.h>
+#include <byteswap.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/stringify.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <linux/time64.h>
+#include <dirent.h>
+
+#include "evlist.h"
+#include "evsel.h"
+#include "header.h"
+#include "memswap.h"
+#include "../perf.h"
+#include "trace-event.h"
+#include "session.h"
+#include "symbol.h"
+#include "debug.h"
+#include "cpumap.h"
+#include "pmu.h"
+#include "vdso.h"
+#include "strbuf.h"
+#include "build-id.h"
+#include "data.h"
+#include <api/fs/fs.h>
+#include "asm/bug.h"
+#include "tool.h"
+#include "time-utils.h"
+#include "units.h"
+
+#include "sane_ctype.h"
+
+/*
+ * magic2 = "PERFILE2"
+ * must be a numerical value to let the endianness
+ * determine the memory layout. That way we are able
+ * to detect endianness when reading the perf.data file
+ * back.
+ *
+ * we check for legacy (PERFFILE) format.
+ */
+static const char *__perf_magic1 = "PERFFILE";
+static const u64 __perf_magic2 = 0x32454c4946524550ULL;
+static const u64 __perf_magic2_sw = 0x50455246494c4532ULL;
+
+#define PERF_MAGIC __perf_magic2
+
+const char perf_version_string[] = PERF_VERSION;
+
+struct perf_file_attr {
+ struct perf_event_attr attr;
+ struct perf_file_section ids;
+};
+
+struct feat_fd {
+ struct perf_header *ph;
+ int fd;
+ void *buf; /* Either buf != NULL or fd >= 0 */
+ ssize_t offset;
+ size_t size;
+ struct perf_evsel *events;
+};
+
+void perf_header__set_feat(struct perf_header *header, int feat)
+{
+ set_bit(feat, header->adds_features);
+}
+
+void perf_header__clear_feat(struct perf_header *header, int feat)
+{
+ clear_bit(feat, header->adds_features);
+}
+
+bool perf_header__has_feat(const struct perf_header *header, int feat)
+{
+ return test_bit(feat, header->adds_features);
+}
+
+static int __do_write_fd(struct feat_fd *ff, const void *buf, size_t size)
+{
+ ssize_t ret = writen(ff->fd, buf, size);
+
+ if (ret != (ssize_t)size)
+ return ret < 0 ? (int)ret : -1;
+ return 0;
+}
+
+static int __do_write_buf(struct feat_fd *ff, const void *buf, size_t size)
+{
+ /* struct perf_event_header::size is u16 */
+ const size_t max_size = 0xffff - sizeof(struct perf_event_header);
+ size_t new_size = ff->size;
+ void *addr;
+
+ if (size + ff->offset > max_size)
+ return -E2BIG;
+
+ while (size > (new_size - ff->offset))
+ new_size <<= 1;
+ new_size = min(max_size, new_size);
+
+ if (ff->size < new_size) {
+ addr = realloc(ff->buf, new_size);
+ if (!addr)
+ return -ENOMEM;
+ ff->buf = addr;
+ ff->size = new_size;
+ }
+
+ memcpy(ff->buf + ff->offset, buf, size);
+ ff->offset += size;
+
+ return 0;
+}
+
+/* Return: 0 if succeded, -ERR if failed. */
+int do_write(struct feat_fd *ff, const void *buf, size_t size)
+{
+ if (!ff->buf)
+ return __do_write_fd(ff, buf, size);
+ return __do_write_buf(ff, buf, size);
+}
+
+/* Return: 0 if succeded, -ERR if failed. */
+static int do_write_bitmap(struct feat_fd *ff, unsigned long *set, u64 size)
+{
+ u64 *p = (u64 *) set;
+ int i, ret;
+
+ ret = do_write(ff, &size, sizeof(size));
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+ ret = do_write(ff, p + i, sizeof(*p));
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Return: 0 if succeded, -ERR if failed. */
+int write_padded(struct feat_fd *ff, const void *bf,
+ size_t count, size_t count_aligned)
+{
+ static const char zero_buf[NAME_ALIGN];
+ int err = do_write(ff, bf, count);
+
+ if (!err)
+ err = do_write(ff, zero_buf, count_aligned - count);
+
+ return err;
+}
+
+#define string_size(str) \
+ (PERF_ALIGN((strlen(str) + 1), NAME_ALIGN) + sizeof(u32))
+
+/* Return: 0 if succeded, -ERR if failed. */
+static int do_write_string(struct feat_fd *ff, const char *str)
+{
+ u32 len, olen;
+ int ret;
+
+ olen = strlen(str) + 1;
+ len = PERF_ALIGN(olen, NAME_ALIGN);
+
+ /* write len, incl. \0 */
+ ret = do_write(ff, &len, sizeof(len));
+ if (ret < 0)
+ return ret;
+
+ return write_padded(ff, str, olen, len);
+}
+
+static int __do_read_fd(struct feat_fd *ff, void *addr, ssize_t size)
+{
+ ssize_t ret = readn(ff->fd, addr, size);
+
+ if (ret != size)
+ return ret < 0 ? (int)ret : -1;
+ return 0;
+}
+
+static int __do_read_buf(struct feat_fd *ff, void *addr, ssize_t size)
+{
+ if (size > (ssize_t)ff->size - ff->offset)
+ return -1;
+
+ memcpy(addr, ff->buf + ff->offset, size);
+ ff->offset += size;
+
+ return 0;
+
+}
+
+static int __do_read(struct feat_fd *ff, void *addr, ssize_t size)
+{
+ if (!ff->buf)
+ return __do_read_fd(ff, addr, size);
+ return __do_read_buf(ff, addr, size);
+}
+
+static int do_read_u32(struct feat_fd *ff, u32 *addr)
+{
+ int ret;
+
+ ret = __do_read(ff, addr, sizeof(*addr));
+ if (ret)
+ return ret;
+
+ if (ff->ph->needs_swap)
+ *addr = bswap_32(*addr);
+ return 0;
+}
+
+static int do_read_u64(struct feat_fd *ff, u64 *addr)
+{
+ int ret;
+
+ ret = __do_read(ff, addr, sizeof(*addr));
+ if (ret)
+ return ret;
+
+ if (ff->ph->needs_swap)
+ *addr = bswap_64(*addr);
+ return 0;
+}
+
+static char *do_read_string(struct feat_fd *ff)
+{
+ u32 len;
+ char *buf;
+
+ if (do_read_u32(ff, &len))
+ return NULL;
+
+ buf = malloc(len);
+ if (!buf)
+ return NULL;
+
+ if (!__do_read(ff, buf, len)) {
+ /*
+ * strings are padded by zeroes
+ * thus the actual strlen of buf
+ * may be less than len
+ */
+ return buf;
+ }
+
+ free(buf);
+ return NULL;
+}
+
+/* Return: 0 if succeded, -ERR if failed. */
+static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
+{
+ unsigned long *set;
+ u64 size, *p;
+ int i, ret;
+
+ ret = do_read_u64(ff, &size);
+ if (ret)
+ return ret;
+
+ set = bitmap_alloc(size);
+ if (!set)
+ return -ENOMEM;
+
+ p = (u64 *) set;
+
+ for (i = 0; (u64) i < BITS_TO_U64(size); i++) {
+ ret = do_read_u64(ff, p + i);
+ if (ret < 0) {
+ free(set);
+ return ret;
+ }
+ }
+
+ *pset = set;
+ *psize = size;
+ return 0;
+}
+
+static int write_tracing_data(struct feat_fd *ff,
+ struct perf_evlist *evlist)
+{
+ if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+ return -1;
+
+ return read_tracing_data(ff->fd, &evlist->entries);
+}
+
+static int write_build_id(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ struct perf_session *session;
+ int err;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ if (!perf_session__read_build_ids(session, true))
+ return -1;
+
+ if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+ return -1;
+
+ err = perf_session__write_buildid_table(session, ff);
+ if (err < 0) {
+ pr_debug("failed to write buildid table\n");
+ return err;
+ }
+ perf_session__cache_build_ids(session);
+
+ return 0;
+}
+
+static int write_hostname(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ struct utsname uts;
+ int ret;
+
+ ret = uname(&uts);
+ if (ret < 0)
+ return -1;
+
+ return do_write_string(ff, uts.nodename);
+}
+
+static int write_osrelease(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ struct utsname uts;
+ int ret;
+
+ ret = uname(&uts);
+ if (ret < 0)
+ return -1;
+
+ return do_write_string(ff, uts.release);
+}
+
+static int write_arch(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ struct utsname uts;
+ int ret;
+
+ ret = uname(&uts);
+ if (ret < 0)
+ return -1;
+
+ return do_write_string(ff, uts.machine);
+}
+
+static int write_version(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ return do_write_string(ff, perf_version_string);
+}
+
+static int __write_cpudesc(struct feat_fd *ff, const char *cpuinfo_proc)
+{
+ FILE *file;
+ char *buf = NULL;
+ char *s, *p;
+ const char *search = cpuinfo_proc;
+ size_t len = 0;
+ int ret = -1;
+
+ if (!search)
+ return -1;
+
+ file = fopen("/proc/cpuinfo", "r");
+ if (!file)
+ return -1;
+
+ while (getline(&buf, &len, file) > 0) {
+ ret = strncmp(buf, search, strlen(search));
+ if (!ret)
+ break;
+ }
+
+ if (ret) {
+ ret = -1;
+ goto done;
+ }
+
+ s = buf;
+
+ p = strchr(buf, ':');
+ if (p && *(p+1) == ' ' && *(p+2))
+ s = p + 2;
+ p = strchr(s, '\n');
+ if (p)
+ *p = '\0';
+
+ /* squash extra space characters (branding string) */
+ p = s;
+ while (*p) {
+ if (isspace(*p)) {
+ char *r = p + 1;
+ char *q = r;
+ *p = ' ';
+ while (*q && isspace(*q))
+ q++;
+ if (q != (p+1))
+ while ((*r++ = *q++));
+ }
+ p++;
+ }
+ ret = do_write_string(ff, s);
+done:
+ free(buf);
+ fclose(file);
+ return ret;
+}
+
+static int write_cpudesc(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ const char *cpuinfo_procs[] = CPUINFO_PROC;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(cpuinfo_procs); i++) {
+ int ret;
+ ret = __write_cpudesc(ff, cpuinfo_procs[i]);
+ if (ret >= 0)
+ return ret;
+ }
+ return -1;
+}
+
+
+static int write_nrcpus(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ long nr;
+ u32 nrc, nra;
+ int ret;
+
+ nrc = cpu__max_present_cpu();
+
+ nr = sysconf(_SC_NPROCESSORS_ONLN);
+ if (nr < 0)
+ return -1;
+
+ nra = (u32)(nr & UINT_MAX);
+
+ ret = do_write(ff, &nrc, sizeof(nrc));
+ if (ret < 0)
+ return ret;
+
+ return do_write(ff, &nra, sizeof(nra));
+}
+
+static int write_event_desc(struct feat_fd *ff,
+ struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ u32 nre, nri, sz;
+ int ret;
+
+ nre = evlist->nr_entries;
+
+ /*
+ * write number of events
+ */
+ ret = do_write(ff, &nre, sizeof(nre));
+ if (ret < 0)
+ return ret;
+
+ /*
+ * size of perf_event_attr struct
+ */
+ sz = (u32)sizeof(evsel->attr);
+ ret = do_write(ff, &sz, sizeof(sz));
+ if (ret < 0)
+ return ret;
+
+ evlist__for_each_entry(evlist, evsel) {
+ ret = do_write(ff, &evsel->attr, sz);
+ if (ret < 0)
+ return ret;
+ /*
+ * write number of unique id per event
+ * there is one id per instance of an event
+ *
+ * copy into an nri to be independent of the
+ * type of ids,
+ */
+ nri = evsel->ids;
+ ret = do_write(ff, &nri, sizeof(nri));
+ if (ret < 0)
+ return ret;
+
+ /*
+ * write event string as passed on cmdline
+ */
+ ret = do_write_string(ff, perf_evsel__name(evsel));
+ if (ret < 0)
+ return ret;
+ /*
+ * write unique ids for this event
+ */
+ ret = do_write(ff, evsel->id, evsel->ids * sizeof(u64));
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int write_cmdline(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ char buf[MAXPATHLEN];
+ u32 n;
+ int i, ret;
+
+ /* actual path to perf binary */
+ ret = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
+ if (ret <= 0)
+ return -1;
+
+ /* readlink() does not add null termination */
+ buf[ret] = '\0';
+
+ /* account for binary path */
+ n = perf_env.nr_cmdline + 1;
+
+ ret = do_write(ff, &n, sizeof(n));
+ if (ret < 0)
+ return ret;
+
+ ret = do_write_string(ff, buf);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0 ; i < perf_env.nr_cmdline; i++) {
+ ret = do_write_string(ff, perf_env.cmdline_argv[i]);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+#define CORE_SIB_FMT \
+ "/sys/devices/system/cpu/cpu%d/topology/core_siblings_list"
+#define THRD_SIB_FMT \
+ "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list"
+
+struct cpu_topo {
+ u32 cpu_nr;
+ u32 core_sib;
+ u32 thread_sib;
+ char **core_siblings;
+ char **thread_siblings;
+};
+
+static int build_cpu_topo(struct cpu_topo *tp, int cpu)
+{
+ FILE *fp;
+ char filename[MAXPATHLEN];
+ char *buf = NULL, *p;
+ size_t len = 0;
+ ssize_t sret;
+ u32 i = 0;
+ int ret = -1;
+
+ sprintf(filename, CORE_SIB_FMT, cpu);
+ fp = fopen(filename, "r");
+ if (!fp)
+ goto try_threads;
+
+ sret = getline(&buf, &len, fp);
+ fclose(fp);
+ if (sret <= 0)
+ goto try_threads;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ for (i = 0; i < tp->core_sib; i++) {
+ if (!strcmp(buf, tp->core_siblings[i]))
+ break;
+ }
+ if (i == tp->core_sib) {
+ tp->core_siblings[i] = buf;
+ tp->core_sib++;
+ buf = NULL;
+ len = 0;
+ }
+ ret = 0;
+
+try_threads:
+ sprintf(filename, THRD_SIB_FMT, cpu);
+ fp = fopen(filename, "r");
+ if (!fp)
+ goto done;
+
+ if (getline(&buf, &len, fp) <= 0)
+ goto done;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ for (i = 0; i < tp->thread_sib; i++) {
+ if (!strcmp(buf, tp->thread_siblings[i]))
+ break;
+ }
+ if (i == tp->thread_sib) {
+ tp->thread_siblings[i] = buf;
+ tp->thread_sib++;
+ buf = NULL;
+ }
+ ret = 0;
+done:
+ if(fp)
+ fclose(fp);
+ free(buf);
+ return ret;
+}
+
+static void free_cpu_topo(struct cpu_topo *tp)
+{
+ u32 i;
+
+ if (!tp)
+ return;
+
+ for (i = 0 ; i < tp->core_sib; i++)
+ zfree(&tp->core_siblings[i]);
+
+ for (i = 0 ; i < tp->thread_sib; i++)
+ zfree(&tp->thread_siblings[i]);
+
+ free(tp);
+}
+
+static struct cpu_topo *build_cpu_topology(void)
+{
+ struct cpu_topo *tp = NULL;
+ void *addr;
+ u32 nr, i;
+ size_t sz;
+ long ncpus;
+ int ret = -1;
+ struct cpu_map *map;
+
+ ncpus = cpu__max_present_cpu();
+
+ /* build online CPU map */
+ map = cpu_map__new(NULL);
+ if (map == NULL) {
+ pr_debug("failed to get system cpumap\n");
+ return NULL;
+ }
+
+ nr = (u32)(ncpus & UINT_MAX);
+
+ sz = nr * sizeof(char *);
+ addr = calloc(1, sizeof(*tp) + 2 * sz);
+ if (!addr)
+ goto out_free;
+
+ tp = addr;
+ tp->cpu_nr = nr;
+ addr += sizeof(*tp);
+ tp->core_siblings = addr;
+ addr += sz;
+ tp->thread_siblings = addr;
+
+ for (i = 0; i < nr; i++) {
+ if (!cpu_map__has(map, i))
+ continue;
+
+ ret = build_cpu_topo(tp, i);
+ if (ret < 0)
+ break;
+ }
+
+out_free:
+ cpu_map__put(map);
+ if (ret) {
+ free_cpu_topo(tp);
+ tp = NULL;
+ }
+ return tp;
+}
+
+static int write_cpu_topology(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ struct cpu_topo *tp;
+ u32 i;
+ int ret, j;
+
+ tp = build_cpu_topology();
+ if (!tp)
+ return -1;
+
+ ret = do_write(ff, &tp->core_sib, sizeof(tp->core_sib));
+ if (ret < 0)
+ goto done;
+
+ for (i = 0; i < tp->core_sib; i++) {
+ ret = do_write_string(ff, tp->core_siblings[i]);
+ if (ret < 0)
+ goto done;
+ }
+ ret = do_write(ff, &tp->thread_sib, sizeof(tp->thread_sib));
+ if (ret < 0)
+ goto done;
+
+ for (i = 0; i < tp->thread_sib; i++) {
+ ret = do_write_string(ff, tp->thread_siblings[i]);
+ if (ret < 0)
+ break;
+ }
+
+ ret = perf_env__read_cpu_topology_map(&perf_env);
+ if (ret < 0)
+ goto done;
+
+ for (j = 0; j < perf_env.nr_cpus_avail; j++) {
+ ret = do_write(ff, &perf_env.cpu[j].core_id,
+ sizeof(perf_env.cpu[j].core_id));
+ if (ret < 0)
+ return ret;
+ ret = do_write(ff, &perf_env.cpu[j].socket_id,
+ sizeof(perf_env.cpu[j].socket_id));
+ if (ret < 0)
+ return ret;
+ }
+done:
+ free_cpu_topo(tp);
+ return ret;
+}
+
+
+
+static int write_total_mem(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ char *buf = NULL;
+ FILE *fp;
+ size_t len = 0;
+ int ret = -1, n;
+ uint64_t mem;
+
+ fp = fopen("/proc/meminfo", "r");
+ if (!fp)
+ return -1;
+
+ while (getline(&buf, &len, fp) > 0) {
+ ret = strncmp(buf, "MemTotal:", 9);
+ if (!ret)
+ break;
+ }
+ if (!ret) {
+ n = sscanf(buf, "%*s %"PRIu64, &mem);
+ if (n == 1)
+ ret = do_write(ff, &mem, sizeof(mem));
+ } else
+ ret = -1;
+ free(buf);
+ fclose(fp);
+ return ret;
+}
+
+static int write_topo_node(struct feat_fd *ff, int node)
+{
+ char str[MAXPATHLEN];
+ char field[32];
+ char *buf = NULL, *p;
+ size_t len = 0;
+ FILE *fp;
+ u64 mem_total, mem_free, mem;
+ int ret = -1;
+
+ sprintf(str, "/sys/devices/system/node/node%d/meminfo", node);
+ fp = fopen(str, "r");
+ if (!fp)
+ return -1;
+
+ while (getline(&buf, &len, fp) > 0) {
+ /* skip over invalid lines */
+ if (!strchr(buf, ':'))
+ continue;
+ if (sscanf(buf, "%*s %*d %31s %"PRIu64, field, &mem) != 2)
+ goto done;
+ if (!strcmp(field, "MemTotal:"))
+ mem_total = mem;
+ if (!strcmp(field, "MemFree:"))
+ mem_free = mem;
+ }
+
+ fclose(fp);
+ fp = NULL;
+
+ ret = do_write(ff, &mem_total, sizeof(u64));
+ if (ret)
+ goto done;
+
+ ret = do_write(ff, &mem_free, sizeof(u64));
+ if (ret)
+ goto done;
+
+ ret = -1;
+ sprintf(str, "/sys/devices/system/node/node%d/cpulist", node);
+
+ fp = fopen(str, "r");
+ if (!fp)
+ goto done;
+
+ if (getline(&buf, &len, fp) <= 0)
+ goto done;
+
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+
+ ret = do_write_string(ff, buf);
+done:
+ free(buf);
+ if (fp)
+ fclose(fp);
+ return ret;
+}
+
+static int write_numa_topology(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ char *buf = NULL;
+ size_t len = 0;
+ FILE *fp;
+ struct cpu_map *node_map = NULL;
+ char *c;
+ u32 nr, i, j;
+ int ret = -1;
+
+ fp = fopen("/sys/devices/system/node/online", "r");
+ if (!fp)
+ return -1;
+
+ if (getline(&buf, &len, fp) <= 0)
+ goto done;
+
+ c = strchr(buf, '\n');
+ if (c)
+ *c = '\0';
+
+ node_map = cpu_map__new(buf);
+ if (!node_map)
+ goto done;
+
+ nr = (u32)node_map->nr;
+
+ ret = do_write(ff, &nr, sizeof(nr));
+ if (ret < 0)
+ goto done;
+
+ for (i = 0; i < nr; i++) {
+ j = (u32)node_map->map[i];
+ ret = do_write(ff, &j, sizeof(j));
+ if (ret < 0)
+ break;
+
+ ret = write_topo_node(ff, i);
+ if (ret < 0)
+ break;
+ }
+done:
+ free(buf);
+ fclose(fp);
+ cpu_map__put(node_map);
+ return ret;
+}
+
+/*
+ * File format:
+ *
+ * struct pmu_mappings {
+ * u32 pmu_num;
+ * struct pmu_map {
+ * u32 type;
+ * char name[];
+ * }[pmu_num];
+ * };
+ */
+
+static int write_pmu_mappings(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ struct perf_pmu *pmu = NULL;
+ u32 pmu_num = 0;
+ int ret;
+
+ /*
+ * Do a first pass to count number of pmu to avoid lseek so this
+ * works in pipe mode as well.
+ */
+ while ((pmu = perf_pmu__scan(pmu))) {
+ if (!pmu->name)
+ continue;
+ pmu_num++;
+ }
+
+ ret = do_write(ff, &pmu_num, sizeof(pmu_num));
+ if (ret < 0)
+ return ret;
+
+ while ((pmu = perf_pmu__scan(pmu))) {
+ if (!pmu->name)
+ continue;
+
+ ret = do_write(ff, &pmu->type, sizeof(pmu->type));
+ if (ret < 0)
+ return ret;
+
+ ret = do_write_string(ff, pmu->name);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * File format:
+ *
+ * struct group_descs {
+ * u32 nr_groups;
+ * struct group_desc {
+ * char name[];
+ * u32 leader_idx;
+ * u32 nr_members;
+ * }[nr_groups];
+ * };
+ */
+static int write_group_desc(struct feat_fd *ff,
+ struct perf_evlist *evlist)
+{
+ u32 nr_groups = evlist->nr_groups;
+ struct perf_evsel *evsel;
+ int ret;
+
+ ret = do_write(ff, &nr_groups, sizeof(nr_groups));
+ if (ret < 0)
+ return ret;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (perf_evsel__is_group_leader(evsel) &&
+ evsel->nr_members > 1) {
+ const char *name = evsel->group_name ?: "{anon_group}";
+ u32 leader_idx = evsel->idx;
+ u32 nr_members = evsel->nr_members;
+
+ ret = do_write_string(ff, name);
+ if (ret < 0)
+ return ret;
+
+ ret = do_write(ff, &leader_idx, sizeof(leader_idx));
+ if (ret < 0)
+ return ret;
+
+ ret = do_write(ff, &nr_members, sizeof(nr_members));
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+/*
+ * default get_cpuid(): nothing gets recorded
+ * actual implementation must be in arch/$(SRCARCH)/util/header.c
+ */
+int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused)
+{
+ return -1;
+}
+
+static int write_cpuid(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ char buffer[64];
+ int ret;
+
+ ret = get_cpuid(buffer, sizeof(buffer));
+ if (!ret)
+ goto write_it;
+
+ return -1;
+write_it:
+ return do_write_string(ff, buffer);
+}
+
+static int write_branch_stack(struct feat_fd *ff __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
+static int write_auxtrace(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ struct perf_session *session;
+ int err;
+
+ if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+ return -1;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ err = auxtrace_index__write(ff->fd, &session->auxtrace_index);
+ if (err < 0)
+ pr_err("Failed to write auxtrace index\n");
+ return err;
+}
+
+static int cpu_cache_level__sort(const void *a, const void *b)
+{
+ struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a;
+ struct cpu_cache_level *cache_b = (struct cpu_cache_level *)b;
+
+ return cache_a->level - cache_b->level;
+}
+
+static bool cpu_cache_level__cmp(struct cpu_cache_level *a, struct cpu_cache_level *b)
+{
+ if (a->level != b->level)
+ return false;
+
+ if (a->line_size != b->line_size)
+ return false;
+
+ if (a->sets != b->sets)
+ return false;
+
+ if (a->ways != b->ways)
+ return false;
+
+ if (strcmp(a->type, b->type))
+ return false;
+
+ if (strcmp(a->size, b->size))
+ return false;
+
+ if (strcmp(a->map, b->map))
+ return false;
+
+ return true;
+}
+
+static int cpu_cache_level__read(struct cpu_cache_level *cache, u32 cpu, u16 level)
+{
+ char path[PATH_MAX], file[PATH_MAX];
+ struct stat st;
+ size_t len;
+
+ scnprintf(path, PATH_MAX, "devices/system/cpu/cpu%d/cache/index%d/", cpu, level);
+ scnprintf(file, PATH_MAX, "%s/%s", sysfs__mountpoint(), path);
+
+ if (stat(file, &st))
+ return 1;
+
+ scnprintf(file, PATH_MAX, "%s/level", path);
+ if (sysfs__read_int(file, (int *) &cache->level))
+ return -1;
+
+ scnprintf(file, PATH_MAX, "%s/coherency_line_size", path);
+ if (sysfs__read_int(file, (int *) &cache->line_size))
+ return -1;
+
+ scnprintf(file, PATH_MAX, "%s/number_of_sets", path);
+ if (sysfs__read_int(file, (int *) &cache->sets))
+ return -1;
+
+ scnprintf(file, PATH_MAX, "%s/ways_of_associativity", path);
+ if (sysfs__read_int(file, (int *) &cache->ways))
+ return -1;
+
+ scnprintf(file, PATH_MAX, "%s/type", path);
+ if (sysfs__read_str(file, &cache->type, &len))
+ return -1;
+
+ cache->type[len] = 0;
+ cache->type = rtrim(cache->type);
+
+ scnprintf(file, PATH_MAX, "%s/size", path);
+ if (sysfs__read_str(file, &cache->size, &len)) {
+ free(cache->type);
+ return -1;
+ }
+
+ cache->size[len] = 0;
+ cache->size = rtrim(cache->size);
+
+ scnprintf(file, PATH_MAX, "%s/shared_cpu_list", path);
+ if (sysfs__read_str(file, &cache->map, &len)) {
+ free(cache->size);
+ free(cache->type);
+ return -1;
+ }
+
+ cache->map[len] = 0;
+ cache->map = rtrim(cache->map);
+ return 0;
+}
+
+static void cpu_cache_level__fprintf(FILE *out, struct cpu_cache_level *c)
+{
+ fprintf(out, "L%d %-15s %8s [%s]\n", c->level, c->type, c->size, c->map);
+}
+
+static int build_caches(struct cpu_cache_level caches[], u32 size, u32 *cntp)
+{
+ u32 i, cnt = 0;
+ long ncpus;
+ u32 nr, cpu;
+ u16 level;
+
+ ncpus = sysconf(_SC_NPROCESSORS_CONF);
+ if (ncpus < 0)
+ return -1;
+
+ nr = (u32)(ncpus & UINT_MAX);
+
+ for (cpu = 0; cpu < nr; cpu++) {
+ for (level = 0; level < 10; level++) {
+ struct cpu_cache_level c;
+ int err;
+
+ err = cpu_cache_level__read(&c, cpu, level);
+ if (err < 0)
+ return err;
+
+ if (err == 1)
+ break;
+
+ for (i = 0; i < cnt; i++) {
+ if (cpu_cache_level__cmp(&c, &caches[i]))
+ break;
+ }
+
+ if (i == cnt)
+ caches[cnt++] = c;
+ else
+ cpu_cache_level__free(&c);
+
+ if (WARN_ONCE(cnt == size, "way too many cpu caches.."))
+ goto out;
+ }
+ }
+ out:
+ *cntp = cnt;
+ return 0;
+}
+
+#define MAX_CACHES (MAX_NR_CPUS * 4)
+
+static int write_cache(struct feat_fd *ff,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ struct cpu_cache_level caches[MAX_CACHES];
+ u32 cnt = 0, i, version = 1;
+ int ret;
+
+ ret = build_caches(caches, MAX_CACHES, &cnt);
+ if (ret)
+ goto out;
+
+ qsort(&caches, cnt, sizeof(struct cpu_cache_level), cpu_cache_level__sort);
+
+ ret = do_write(ff, &version, sizeof(u32));
+ if (ret < 0)
+ goto out;
+
+ ret = do_write(ff, &cnt, sizeof(u32));
+ if (ret < 0)
+ goto out;
+
+ for (i = 0; i < cnt; i++) {
+ struct cpu_cache_level *c = &caches[i];
+
+ #define _W(v) \
+ ret = do_write(ff, &c->v, sizeof(u32)); \
+ if (ret < 0) \
+ goto out;
+
+ _W(level)
+ _W(line_size)
+ _W(sets)
+ _W(ways)
+ #undef _W
+
+ #define _W(v) \
+ ret = do_write_string(ff, (const char *) c->v); \
+ if (ret < 0) \
+ goto out;
+
+ _W(type)
+ _W(size)
+ _W(map)
+ #undef _W
+ }
+
+out:
+ for (i = 0; i < cnt; i++)
+ cpu_cache_level__free(&caches[i]);
+ return ret;
+}
+
+static int write_stat(struct feat_fd *ff __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ return 0;
+}
+
+static int write_sample_time(struct feat_fd *ff,
+ struct perf_evlist *evlist)
+{
+ int ret;
+
+ ret = do_write(ff, &evlist->first_sample_time,
+ sizeof(evlist->first_sample_time));
+ if (ret < 0)
+ return ret;
+
+ return do_write(ff, &evlist->last_sample_time,
+ sizeof(evlist->last_sample_time));
+}
+
+
+static int memory_node__read(struct memory_node *n, unsigned long idx)
+{
+ unsigned int phys, size = 0;
+ char path[PATH_MAX];
+ struct dirent *ent;
+ DIR *dir;
+
+#define for_each_memory(mem, dir) \
+ while ((ent = readdir(dir))) \
+ if (strcmp(ent->d_name, ".") && \
+ strcmp(ent->d_name, "..") && \
+ sscanf(ent->d_name, "memory%u", &mem) == 1)
+
+ scnprintf(path, PATH_MAX,
+ "%s/devices/system/node/node%lu",
+ sysfs__mountpoint(), idx);
+
+ dir = opendir(path);
+ if (!dir) {
+ pr_warning("failed: cant' open memory sysfs data\n");
+ return -1;
+ }
+
+ for_each_memory(phys, dir) {
+ size = max(phys, size);
+ }
+
+ size++;
+
+ n->set = bitmap_alloc(size);
+ if (!n->set) {
+ closedir(dir);
+ return -ENOMEM;
+ }
+
+ n->node = idx;
+ n->size = size;
+
+ rewinddir(dir);
+
+ for_each_memory(phys, dir) {
+ set_bit(phys, n->set);
+ }
+
+ closedir(dir);
+ return 0;
+}
+
+static int memory_node__sort(const void *a, const void *b)
+{
+ const struct memory_node *na = a;
+ const struct memory_node *nb = b;
+
+ return na->node - nb->node;
+}
+
+static int build_mem_topology(struct memory_node *nodes, u64 size, u64 *cntp)
+{
+ char path[PATH_MAX];
+ struct dirent *ent;
+ DIR *dir;
+ u64 cnt = 0;
+ int ret = 0;
+
+ scnprintf(path, PATH_MAX, "%s/devices/system/node/",
+ sysfs__mountpoint());
+
+ dir = opendir(path);
+ if (!dir) {
+ pr_debug2("%s: could't read %s, does this arch have topology information?\n",
+ __func__, path);
+ return -1;
+ }
+
+ while (!ret && (ent = readdir(dir))) {
+ unsigned int idx;
+ int r;
+
+ if (!strcmp(ent->d_name, ".") ||
+ !strcmp(ent->d_name, ".."))
+ continue;
+
+ r = sscanf(ent->d_name, "node%u", &idx);
+ if (r != 1)
+ continue;
+
+ if (WARN_ONCE(cnt >= size,
+ "failed to write MEM_TOPOLOGY, way too many nodes\n"))
+ return -1;
+
+ ret = memory_node__read(&nodes[cnt++], idx);
+ }
+
+ *cntp = cnt;
+ closedir(dir);
+
+ if (!ret)
+ qsort(nodes, cnt, sizeof(nodes[0]), memory_node__sort);
+
+ return ret;
+}
+
+#define MAX_MEMORY_NODES 2000
+
+/*
+ * The MEM_TOPOLOGY holds physical memory map for every
+ * node in system. The format of data is as follows:
+ *
+ * 0 - version | for future changes
+ * 8 - block_size_bytes | /sys/devices/system/memory/block_size_bytes
+ * 16 - count | number of nodes
+ *
+ * For each node we store map of physical indexes for
+ * each node:
+ *
+ * 32 - node id | node index
+ * 40 - size | size of bitmap
+ * 48 - bitmap | bitmap of memory indexes that belongs to node
+ */
+static int write_mem_topology(struct feat_fd *ff __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused)
+{
+ static struct memory_node nodes[MAX_MEMORY_NODES];
+ u64 bsize, version = 1, i, nr;
+ int ret;
+
+ ret = sysfs__read_xll("devices/system/memory/block_size_bytes",
+ (unsigned long long *) &bsize);
+ if (ret)
+ return ret;
+
+ ret = build_mem_topology(&nodes[0], MAX_MEMORY_NODES, &nr);
+ if (ret)
+ return ret;
+
+ ret = do_write(ff, &version, sizeof(version));
+ if (ret < 0)
+ goto out;
+
+ ret = do_write(ff, &bsize, sizeof(bsize));
+ if (ret < 0)
+ goto out;
+
+ ret = do_write(ff, &nr, sizeof(nr));
+ if (ret < 0)
+ goto out;
+
+ for (i = 0; i < nr; i++) {
+ struct memory_node *n = &nodes[i];
+
+ #define _W(v) \
+ ret = do_write(ff, &n->v, sizeof(n->v)); \
+ if (ret < 0) \
+ goto out;
+
+ _W(node)
+ _W(size)
+
+ #undef _W
+
+ ret = do_write_bitmap(ff, n->set, n->size);
+ if (ret < 0)
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static void print_hostname(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
+}
+
+static void print_osrelease(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# os release : %s\n", ff->ph->env.os_release);
+}
+
+static void print_arch(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# arch : %s\n", ff->ph->env.arch);
+}
+
+static void print_cpudesc(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# cpudesc : %s\n", ff->ph->env.cpu_desc);
+}
+
+static void print_nrcpus(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# nrcpus online : %u\n", ff->ph->env.nr_cpus_online);
+ fprintf(fp, "# nrcpus avail : %u\n", ff->ph->env.nr_cpus_avail);
+}
+
+static void print_version(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# perf version : %s\n", ff->ph->env.version);
+}
+
+static void print_cmdline(struct feat_fd *ff, FILE *fp)
+{
+ int nr, i;
+
+ nr = ff->ph->env.nr_cmdline;
+
+ fprintf(fp, "# cmdline : ");
+
+ for (i = 0; i < nr; i++) {
+ char *argv_i = strdup(ff->ph->env.cmdline_argv[i]);
+ if (!argv_i) {
+ fprintf(fp, "%s ", ff->ph->env.cmdline_argv[i]);
+ } else {
+ char *mem = argv_i;
+ do {
+ char *quote = strchr(argv_i, '\'');
+ if (!quote)
+ break;
+ *quote++ = '\0';
+ fprintf(fp, "%s\\\'", argv_i);
+ argv_i = quote;
+ } while (1);
+ fprintf(fp, "%s ", argv_i);
+ free(mem);
+ }
+ }
+ fputc('\n', fp);
+}
+
+static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
+{
+ struct perf_header *ph = ff->ph;
+ int cpu_nr = ph->env.nr_cpus_avail;
+ int nr, i;
+ char *str;
+
+ nr = ph->env.nr_sibling_cores;
+ str = ph->env.sibling_cores;
+
+ for (i = 0; i < nr; i++) {
+ fprintf(fp, "# sibling cores : %s\n", str);
+ str += strlen(str) + 1;
+ }
+
+ nr = ph->env.nr_sibling_threads;
+ str = ph->env.sibling_threads;
+
+ for (i = 0; i < nr; i++) {
+ fprintf(fp, "# sibling threads : %s\n", str);
+ str += strlen(str) + 1;
+ }
+
+ if (ph->env.cpu != NULL) {
+ for (i = 0; i < cpu_nr; i++)
+ fprintf(fp, "# CPU %d: Core ID %d, Socket ID %d\n", i,
+ ph->env.cpu[i].core_id, ph->env.cpu[i].socket_id);
+ } else
+ fprintf(fp, "# Core ID and Socket ID information is not available\n");
+}
+
+static void free_event_desc(struct perf_evsel *events)
+{
+ struct perf_evsel *evsel;
+
+ if (!events)
+ return;
+
+ for (evsel = events; evsel->attr.size; evsel++) {
+ zfree(&evsel->name);
+ zfree(&evsel->id);
+ }
+
+ free(events);
+}
+
+static struct perf_evsel *read_event_desc(struct feat_fd *ff)
+{
+ struct perf_evsel *evsel, *events = NULL;
+ u64 *id;
+ void *buf = NULL;
+ u32 nre, sz, nr, i, j;
+ size_t msz;
+
+ /* number of events */
+ if (do_read_u32(ff, &nre))
+ goto error;
+
+ if (do_read_u32(ff, &sz))
+ goto error;
+
+ /* buffer to hold on file attr struct */
+ buf = malloc(sz);
+ if (!buf)
+ goto error;
+
+ /* the last event terminates with evsel->attr.size == 0: */
+ events = calloc(nre + 1, sizeof(*events));
+ if (!events)
+ goto error;
+
+ msz = sizeof(evsel->attr);
+ if (sz < msz)
+ msz = sz;
+
+ for (i = 0, evsel = events; i < nre; evsel++, i++) {
+ evsel->idx = i;
+
+ /*
+ * must read entire on-file attr struct to
+ * sync up with layout.
+ */
+ if (__do_read(ff, buf, sz))
+ goto error;
+
+ if (ff->ph->needs_swap)
+ perf_event__attr_swap(buf);
+
+ memcpy(&evsel->attr, buf, msz);
+
+ if (do_read_u32(ff, &nr))
+ goto error;
+
+ if (ff->ph->needs_swap)
+ evsel->needs_swap = true;
+
+ evsel->name = do_read_string(ff);
+ if (!evsel->name)
+ goto error;
+
+ if (!nr)
+ continue;
+
+ id = calloc(nr, sizeof(*id));
+ if (!id)
+ goto error;
+ evsel->ids = nr;
+ evsel->id = id;
+
+ for (j = 0 ; j < nr; j++) {
+ if (do_read_u64(ff, id))
+ goto error;
+ id++;
+ }
+ }
+out:
+ free(buf);
+ return events;
+error:
+ free_event_desc(events);
+ events = NULL;
+ goto out;
+}
+
+static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val,
+ void *priv __maybe_unused)
+{
+ return fprintf(fp, ", %s = %s", name, val);
+}
+
+static void print_event_desc(struct feat_fd *ff, FILE *fp)
+{
+ struct perf_evsel *evsel, *events;
+ u32 j;
+ u64 *id;
+
+ if (ff->events)
+ events = ff->events;
+ else
+ events = read_event_desc(ff);
+
+ if (!events) {
+ fprintf(fp, "# event desc: not available or unable to read\n");
+ return;
+ }
+
+ for (evsel = events; evsel->attr.size; evsel++) {
+ fprintf(fp, "# event : name = %s, ", evsel->name);
+
+ if (evsel->ids) {
+ fprintf(fp, ", id = {");
+ for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) {
+ if (j)
+ fputc(',', fp);
+ fprintf(fp, " %"PRIu64, *id);
+ }
+ fprintf(fp, " }");
+ }
+
+ perf_event_attr__fprintf(fp, &evsel->attr, __desc_attr__fprintf, NULL);
+
+ fputc('\n', fp);
+ }
+
+ free_event_desc(events);
+ ff->events = NULL;
+}
+
+static void print_total_mem(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# total memory : %llu kB\n", ff->ph->env.total_mem);
+}
+
+static void print_numa_topology(struct feat_fd *ff, FILE *fp)
+{
+ int i;
+ struct numa_node *n;
+
+ for (i = 0; i < ff->ph->env.nr_numa_nodes; i++) {
+ n = &ff->ph->env.numa_nodes[i];
+
+ fprintf(fp, "# node%u meminfo : total = %"PRIu64" kB,"
+ " free = %"PRIu64" kB\n",
+ n->node, n->mem_total, n->mem_free);
+
+ fprintf(fp, "# node%u cpu list : ", n->node);
+ cpu_map__fprintf(n->map, fp);
+ }
+}
+
+static void print_cpuid(struct feat_fd *ff, FILE *fp)
+{
+ fprintf(fp, "# cpuid : %s\n", ff->ph->env.cpuid);
+}
+
+static void print_branch_stack(struct feat_fd *ff __maybe_unused, FILE *fp)
+{
+ fprintf(fp, "# contains samples with branch stack\n");
+}
+
+static void print_auxtrace(struct feat_fd *ff __maybe_unused, FILE *fp)
+{
+ fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n");
+}
+
+static void print_stat(struct feat_fd *ff __maybe_unused, FILE *fp)
+{
+ fprintf(fp, "# contains stat data\n");
+}
+
+static void print_cache(struct feat_fd *ff, FILE *fp __maybe_unused)
+{
+ int i;
+
+ fprintf(fp, "# CPU cache info:\n");
+ for (i = 0; i < ff->ph->env.caches_cnt; i++) {
+ fprintf(fp, "# ");
+ cpu_cache_level__fprintf(fp, &ff->ph->env.caches[i]);
+ }
+}
+
+static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
+{
+ const char *delimiter = "# pmu mappings: ";
+ char *str, *tmp;
+ u32 pmu_num;
+ u32 type;
+
+ pmu_num = ff->ph->env.nr_pmu_mappings;
+ if (!pmu_num) {
+ fprintf(fp, "# pmu mappings: not available\n");
+ return;
+ }
+
+ str = ff->ph->env.pmu_mappings;
+
+ while (pmu_num) {
+ type = strtoul(str, &tmp, 0);
+ if (*tmp != ':')
+ goto error;
+
+ str = tmp + 1;
+ fprintf(fp, "%s%s = %" PRIu32, delimiter, str, type);
+
+ delimiter = ", ";
+ str += strlen(str) + 1;
+ pmu_num--;
+ }
+
+ fprintf(fp, "\n");
+
+ if (!pmu_num)
+ return;
+error:
+ fprintf(fp, "# pmu mappings: unable to read\n");
+}
+
+static void print_group_desc(struct feat_fd *ff, FILE *fp)
+{
+ struct perf_session *session;
+ struct perf_evsel *evsel;
+ u32 nr = 0;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (perf_evsel__is_group_leader(evsel) &&
+ evsel->nr_members > 1) {
+ fprintf(fp, "# group: %s{%s", evsel->group_name ?: "",
+ perf_evsel__name(evsel));
+
+ nr = evsel->nr_members - 1;
+ } else if (nr) {
+ fprintf(fp, ",%s", perf_evsel__name(evsel));
+
+ if (--nr == 0)
+ fprintf(fp, "}\n");
+ }
+ }
+}
+
+static void print_sample_time(struct feat_fd *ff, FILE *fp)
+{
+ struct perf_session *session;
+ char time_buf[32];
+ double d;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ timestamp__scnprintf_usec(session->evlist->first_sample_time,
+ time_buf, sizeof(time_buf));
+ fprintf(fp, "# time of first sample : %s\n", time_buf);
+
+ timestamp__scnprintf_usec(session->evlist->last_sample_time,
+ time_buf, sizeof(time_buf));
+ fprintf(fp, "# time of last sample : %s\n", time_buf);
+
+ d = (double)(session->evlist->last_sample_time -
+ session->evlist->first_sample_time) / NSEC_PER_MSEC;
+
+ fprintf(fp, "# sample duration : %10.3f ms\n", d);
+}
+
+static void memory_node__fprintf(struct memory_node *n,
+ unsigned long long bsize, FILE *fp)
+{
+ char buf_map[100], buf_size[50];
+ unsigned long long size;
+
+ size = bsize * bitmap_weight(n->set, n->size);
+ unit_number__scnprintf(buf_size, 50, size);
+
+ bitmap_scnprintf(n->set, n->size, buf_map, 100);
+ fprintf(fp, "# %3" PRIu64 " [%s]: %s\n", n->node, buf_size, buf_map);
+}
+
+static void print_mem_topology(struct feat_fd *ff, FILE *fp)
+{
+ struct memory_node *nodes;
+ int i, nr;
+
+ nodes = ff->ph->env.memory_nodes;
+ nr = ff->ph->env.nr_memory_nodes;
+
+ fprintf(fp, "# memory nodes (nr %d, block size 0x%llx):\n",
+ nr, ff->ph->env.memory_bsize);
+
+ for (i = 0; i < nr; i++) {
+ memory_node__fprintf(&nodes[i], ff->ph->env.memory_bsize, fp);
+ }
+}
+
+static int __event_process_build_id(struct build_id_event *bev,
+ char *filename,
+ struct perf_session *session)
+{
+ int err = -1;
+ struct machine *machine;
+ u16 cpumode;
+ struct dso *dso;
+ enum dso_kernel_type dso_type;
+
+ machine = perf_session__findnew_machine(session, bev->pid);
+ if (!machine)
+ goto out;
+
+ cpumode = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+ switch (cpumode) {
+ case PERF_RECORD_MISC_KERNEL:
+ dso_type = DSO_TYPE_KERNEL;
+ break;
+ case PERF_RECORD_MISC_GUEST_KERNEL:
+ dso_type = DSO_TYPE_GUEST_KERNEL;
+ break;
+ case PERF_RECORD_MISC_USER:
+ case PERF_RECORD_MISC_GUEST_USER:
+ dso_type = DSO_TYPE_USER;
+ break;
+ default:
+ goto out;
+ }
+
+ dso = machine__findnew_dso(machine, filename);
+ if (dso != NULL) {
+ char sbuild_id[SBUILD_ID_SIZE];
+
+ dso__set_build_id(dso, &bev->build_id);
+
+ if (dso_type != DSO_TYPE_USER) {
+ struct kmod_path m = { .name = NULL, };
+
+ if (!kmod_path__parse_name(&m, filename) && m.kmod)
+ dso__set_module_info(dso, &m, machine);
+ else
+ dso->kernel = dso_type;
+
+ free(m.name);
+ }
+
+ build_id__sprintf(dso->build_id, sizeof(dso->build_id),
+ sbuild_id);
+ pr_debug("build id event received for %s: %s\n",
+ dso->long_name, sbuild_id);
+ dso__put(dso);
+ }
+
+ err = 0;
+out:
+ return err;
+}
+
+static int perf_header__read_build_ids_abi_quirk(struct perf_header *header,
+ int input, u64 offset, u64 size)
+{
+ struct perf_session *session = container_of(header, struct perf_session, header);
+ struct {
+ struct perf_event_header header;
+ u8 build_id[PERF_ALIGN(BUILD_ID_SIZE, sizeof(u64))];
+ char filename[0];
+ } old_bev;
+ struct build_id_event bev;
+ char filename[PATH_MAX];
+ u64 limit = offset + size;
+
+ while (offset < limit) {
+ ssize_t len;
+
+ if (readn(input, &old_bev, sizeof(old_bev)) != sizeof(old_bev))
+ return -1;
+
+ if (header->needs_swap)
+ perf_event_header__bswap(&old_bev.header);
+
+ len = old_bev.header.size - sizeof(old_bev);
+ if (readn(input, filename, len) != len)
+ return -1;
+
+ bev.header = old_bev.header;
+
+ /*
+ * As the pid is the missing value, we need to fill
+ * it properly. The header.misc value give us nice hint.
+ */
+ bev.pid = HOST_KERNEL_ID;
+ if (bev.header.misc == PERF_RECORD_MISC_GUEST_USER ||
+ bev.header.misc == PERF_RECORD_MISC_GUEST_KERNEL)
+ bev.pid = DEFAULT_GUEST_KERNEL_ID;
+
+ memcpy(bev.build_id, old_bev.build_id, sizeof(bev.build_id));
+ __event_process_build_id(&bev, filename, session);
+
+ offset += bev.header.size;
+ }
+
+ return 0;
+}
+
+static int perf_header__read_build_ids(struct perf_header *header,
+ int input, u64 offset, u64 size)
+{
+ struct perf_session *session = container_of(header, struct perf_session, header);
+ struct build_id_event bev;
+ char filename[PATH_MAX];
+ u64 limit = offset + size, orig_offset = offset;
+ int err = -1;
+
+ while (offset < limit) {
+ ssize_t len;
+
+ if (readn(input, &bev, sizeof(bev)) != sizeof(bev))
+ goto out;
+
+ if (header->needs_swap)
+ perf_event_header__bswap(&bev.header);
+
+ len = bev.header.size - sizeof(bev);
+ if (readn(input, filename, len) != len)
+ goto out;
+ /*
+ * The a1645ce1 changeset:
+ *
+ * "perf: 'perf kvm' tool for monitoring guest performance from host"
+ *
+ * Added a field to struct build_id_event that broke the file
+ * format.
+ *
+ * Since the kernel build-id is the first entry, process the
+ * table using the old format if the well known
+ * '[kernel.kallsyms]' string for the kernel build-id has the
+ * first 4 characters chopped off (where the pid_t sits).
+ */
+ if (memcmp(filename, "nel.kallsyms]", 13) == 0) {
+ if (lseek(input, orig_offset, SEEK_SET) == (off_t)-1)
+ return -1;
+ return perf_header__read_build_ids_abi_quirk(header, input, offset, size);
+ }
+
+ __event_process_build_id(&bev, filename, session);
+
+ offset += bev.header.size;
+ }
+ err = 0;
+out:
+ return err;
+}
+
+/* Macro for features that simply need to read and store a string. */
+#define FEAT_PROCESS_STR_FUN(__feat, __feat_env) \
+static int process_##__feat(struct feat_fd *ff, void *data __maybe_unused) \
+{\
+ ff->ph->env.__feat_env = do_read_string(ff); \
+ return ff->ph->env.__feat_env ? 0 : -ENOMEM; \
+}
+
+FEAT_PROCESS_STR_FUN(hostname, hostname);
+FEAT_PROCESS_STR_FUN(osrelease, os_release);
+FEAT_PROCESS_STR_FUN(version, version);
+FEAT_PROCESS_STR_FUN(arch, arch);
+FEAT_PROCESS_STR_FUN(cpudesc, cpu_desc);
+FEAT_PROCESS_STR_FUN(cpuid, cpuid);
+
+static int process_tracing_data(struct feat_fd *ff, void *data)
+{
+ ssize_t ret = trace_report(ff->fd, data, false);
+
+ return ret < 0 ? -1 : 0;
+}
+
+static int process_build_id(struct feat_fd *ff, void *data __maybe_unused)
+{
+ if (perf_header__read_build_ids(ff->ph, ff->fd, ff->offset, ff->size))
+ pr_debug("Failed to read buildids, continuing...\n");
+ return 0;
+}
+
+static int process_nrcpus(struct feat_fd *ff, void *data __maybe_unused)
+{
+ int ret;
+ u32 nr_cpus_avail, nr_cpus_online;
+
+ ret = do_read_u32(ff, &nr_cpus_avail);
+ if (ret)
+ return ret;
+
+ ret = do_read_u32(ff, &nr_cpus_online);
+ if (ret)
+ return ret;
+ ff->ph->env.nr_cpus_avail = (int)nr_cpus_avail;
+ ff->ph->env.nr_cpus_online = (int)nr_cpus_online;
+ return 0;
+}
+
+static int process_total_mem(struct feat_fd *ff, void *data __maybe_unused)
+{
+ u64 total_mem;
+ int ret;
+
+ ret = do_read_u64(ff, &total_mem);
+ if (ret)
+ return -1;
+ ff->ph->env.total_mem = (unsigned long long)total_mem;
+ return 0;
+}
+
+static struct perf_evsel *
+perf_evlist__find_by_index(struct perf_evlist *evlist, int idx)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->idx == idx)
+ return evsel;
+ }
+
+ return NULL;
+}
+
+static void
+perf_evlist__set_event_name(struct perf_evlist *evlist,
+ struct perf_evsel *event)
+{
+ struct perf_evsel *evsel;
+
+ if (!event->name)
+ return;
+
+ evsel = perf_evlist__find_by_index(evlist, event->idx);
+ if (!evsel)
+ return;
+
+ if (evsel->name)
+ return;
+
+ evsel->name = strdup(event->name);
+}
+
+static int
+process_event_desc(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_session *session;
+ struct perf_evsel *evsel, *events = read_event_desc(ff);
+
+ if (!events)
+ return 0;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ if (session->data->is_pipe) {
+ /* Save events for reading later by print_event_desc,
+ * since they can't be read again in pipe mode. */
+ ff->events = events;
+ }
+
+ for (evsel = events; evsel->attr.size; evsel++)
+ perf_evlist__set_event_name(session->evlist, evsel);
+
+ if (!session->data->is_pipe)
+ free_event_desc(events);
+
+ return 0;
+}
+
+static int process_cmdline(struct feat_fd *ff, void *data __maybe_unused)
+{
+ char *str, *cmdline = NULL, **argv = NULL;
+ u32 nr, i, len = 0;
+
+ if (do_read_u32(ff, &nr))
+ return -1;
+
+ ff->ph->env.nr_cmdline = nr;
+
+ cmdline = zalloc(ff->size + nr + 1);
+ if (!cmdline)
+ return -1;
+
+ argv = zalloc(sizeof(char *) * (nr + 1));
+ if (!argv)
+ goto error;
+
+ for (i = 0; i < nr; i++) {
+ str = do_read_string(ff);
+ if (!str)
+ goto error;
+
+ argv[i] = cmdline + len;
+ memcpy(argv[i], str, strlen(str) + 1);
+ len += strlen(str) + 1;
+ free(str);
+ }
+ ff->ph->env.cmdline = cmdline;
+ ff->ph->env.cmdline_argv = (const char **) argv;
+ return 0;
+
+error:
+ free(argv);
+ free(cmdline);
+ return -1;
+}
+
+static int process_cpu_topology(struct feat_fd *ff, void *data __maybe_unused)
+{
+ u32 nr, i;
+ char *str;
+ struct strbuf sb;
+ int cpu_nr = ff->ph->env.nr_cpus_avail;
+ u64 size = 0;
+ struct perf_header *ph = ff->ph;
+ bool do_core_id_test = true;
+
+ ph->env.cpu = calloc(cpu_nr, sizeof(*ph->env.cpu));
+ if (!ph->env.cpu)
+ return -1;
+
+ if (do_read_u32(ff, &nr))
+ goto free_cpu;
+
+ ph->env.nr_sibling_cores = nr;
+ size += sizeof(u32);
+ if (strbuf_init(&sb, 128) < 0)
+ goto free_cpu;
+
+ for (i = 0; i < nr; i++) {
+ str = do_read_string(ff);
+ if (!str)
+ goto error;
+
+ /* include a NULL character at the end */
+ if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+ goto error;
+ size += string_size(str);
+ free(str);
+ }
+ ph->env.sibling_cores = strbuf_detach(&sb, NULL);
+
+ if (do_read_u32(ff, &nr))
+ return -1;
+
+ ph->env.nr_sibling_threads = nr;
+ size += sizeof(u32);
+
+ for (i = 0; i < nr; i++) {
+ str = do_read_string(ff);
+ if (!str)
+ goto error;
+
+ /* include a NULL character at the end */
+ if (strbuf_add(&sb, str, strlen(str) + 1) < 0)
+ goto error;
+ size += string_size(str);
+ free(str);
+ }
+ ph->env.sibling_threads = strbuf_detach(&sb, NULL);
+
+ /*
+ * The header may be from old perf,
+ * which doesn't include core id and socket id information.
+ */
+ if (ff->size <= size) {
+ zfree(&ph->env.cpu);
+ return 0;
+ }
+
+ /* On s390 the socket_id number is not related to the numbers of cpus.
+ * The socket_id number might be higher than the numbers of cpus.
+ * This depends on the configuration.
+ * AArch64 is the same.
+ */
+ if (ph->env.arch && (!strncmp(ph->env.arch, "s390", 4)
+ || !strncmp(ph->env.arch, "aarch64", 7)))
+ do_core_id_test = false;
+
+ for (i = 0; i < (u32)cpu_nr; i++) {
+ if (do_read_u32(ff, &nr))
+ goto free_cpu;
+
+ ph->env.cpu[i].core_id = nr;
+
+ if (do_read_u32(ff, &nr))
+ goto free_cpu;
+
+ if (do_core_id_test && nr != (u32)-1 && nr > (u32)cpu_nr) {
+ pr_debug("socket_id number is too big."
+ "You may need to upgrade the perf tool.\n");
+ goto free_cpu;
+ }
+
+ ph->env.cpu[i].socket_id = nr;
+ }
+
+ return 0;
+
+error:
+ strbuf_release(&sb);
+free_cpu:
+ zfree(&ph->env.cpu);
+ return -1;
+}
+
+static int process_numa_topology(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct numa_node *nodes, *n;
+ u32 nr, i;
+ char *str;
+
+ /* nr nodes */
+ if (do_read_u32(ff, &nr))
+ return -1;
+
+ nodes = zalloc(sizeof(*nodes) * nr);
+ if (!nodes)
+ return -ENOMEM;
+
+ for (i = 0; i < nr; i++) {
+ n = &nodes[i];
+
+ /* node number */
+ if (do_read_u32(ff, &n->node))
+ goto error;
+
+ if (do_read_u64(ff, &n->mem_total))
+ goto error;
+
+ if (do_read_u64(ff, &n->mem_free))
+ goto error;
+
+ str = do_read_string(ff);
+ if (!str)
+ goto error;
+
+ n->map = cpu_map__new(str);
+ if (!n->map)
+ goto error;
+
+ free(str);
+ }
+ ff->ph->env.nr_numa_nodes = nr;
+ ff->ph->env.numa_nodes = nodes;
+ return 0;
+
+error:
+ free(nodes);
+ return -1;
+}
+
+static int process_pmu_mappings(struct feat_fd *ff, void *data __maybe_unused)
+{
+ char *name;
+ u32 pmu_num;
+ u32 type;
+ struct strbuf sb;
+
+ if (do_read_u32(ff, &pmu_num))
+ return -1;
+
+ if (!pmu_num) {
+ pr_debug("pmu mappings not available\n");
+ return 0;
+ }
+
+ ff->ph->env.nr_pmu_mappings = pmu_num;
+ if (strbuf_init(&sb, 128) < 0)
+ return -1;
+
+ while (pmu_num) {
+ if (do_read_u32(ff, &type))
+ goto error;
+
+ name = do_read_string(ff);
+ if (!name)
+ goto error;
+
+ if (strbuf_addf(&sb, "%u:%s", type, name) < 0)
+ goto error;
+ /* include a NULL character at the end */
+ if (strbuf_add(&sb, "", 1) < 0)
+ goto error;
+
+ if (!strcmp(name, "msr"))
+ ff->ph->env.msr_pmu_type = type;
+
+ free(name);
+ pmu_num--;
+ }
+ ff->ph->env.pmu_mappings = strbuf_detach(&sb, NULL);
+ return 0;
+
+error:
+ strbuf_release(&sb);
+ return -1;
+}
+
+static int process_group_desc(struct feat_fd *ff, void *data __maybe_unused)
+{
+ size_t ret = -1;
+ u32 i, nr, nr_groups;
+ struct perf_session *session;
+ struct perf_evsel *evsel, *leader = NULL;
+ struct group_desc {
+ char *name;
+ u32 leader_idx;
+ u32 nr_members;
+ } *desc;
+
+ if (do_read_u32(ff, &nr_groups))
+ return -1;
+
+ ff->ph->env.nr_groups = nr_groups;
+ if (!nr_groups) {
+ pr_debug("group desc not available\n");
+ return 0;
+ }
+
+ desc = calloc(nr_groups, sizeof(*desc));
+ if (!desc)
+ return -1;
+
+ for (i = 0; i < nr_groups; i++) {
+ desc[i].name = do_read_string(ff);
+ if (!desc[i].name)
+ goto out_free;
+
+ if (do_read_u32(ff, &desc[i].leader_idx))
+ goto out_free;
+
+ if (do_read_u32(ff, &desc[i].nr_members))
+ goto out_free;
+ }
+
+ /*
+ * Rebuild group relationship based on the group_desc
+ */
+ session = container_of(ff->ph, struct perf_session, header);
+ session->evlist->nr_groups = nr_groups;
+
+ i = nr = 0;
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->idx == (int) desc[i].leader_idx) {
+ evsel->leader = evsel;
+ /* {anon_group} is a dummy name */
+ if (strcmp(desc[i].name, "{anon_group}")) {
+ evsel->group_name = desc[i].name;
+ desc[i].name = NULL;
+ }
+ evsel->nr_members = desc[i].nr_members;
+
+ if (i >= nr_groups || nr > 0) {
+ pr_debug("invalid group desc\n");
+ goto out_free;
+ }
+
+ leader = evsel;
+ nr = evsel->nr_members - 1;
+ i++;
+ } else if (nr) {
+ /* This is a group member */
+ evsel->leader = leader;
+
+ nr--;
+ }
+ }
+
+ if (i != nr_groups || nr != 0) {
+ pr_debug("invalid group desc\n");
+ goto out_free;
+ }
+
+ ret = 0;
+out_free:
+ for (i = 0; i < nr_groups; i++)
+ zfree(&desc[i].name);
+ free(desc);
+
+ return ret;
+}
+
+static int process_auxtrace(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_session *session;
+ int err;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ err = auxtrace_index__process(ff->fd, ff->size, session,
+ ff->ph->needs_swap);
+ if (err < 0)
+ pr_err("Failed to process auxtrace index\n");
+ return err;
+}
+
+static int process_cache(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct cpu_cache_level *caches;
+ u32 cnt, i, version;
+
+ if (do_read_u32(ff, &version))
+ return -1;
+
+ if (version != 1)
+ return -1;
+
+ if (do_read_u32(ff, &cnt))
+ return -1;
+
+ caches = zalloc(sizeof(*caches) * cnt);
+ if (!caches)
+ return -1;
+
+ for (i = 0; i < cnt; i++) {
+ struct cpu_cache_level c;
+
+ #define _R(v) \
+ if (do_read_u32(ff, &c.v))\
+ goto out_free_caches; \
+
+ _R(level)
+ _R(line_size)
+ _R(sets)
+ _R(ways)
+ #undef _R
+
+ #define _R(v) \
+ c.v = do_read_string(ff); \
+ if (!c.v) \
+ goto out_free_caches;
+
+ _R(type)
+ _R(size)
+ _R(map)
+ #undef _R
+
+ caches[i] = c;
+ }
+
+ ff->ph->env.caches = caches;
+ ff->ph->env.caches_cnt = cnt;
+ return 0;
+out_free_caches:
+ free(caches);
+ return -1;
+}
+
+static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused)
+{
+ struct perf_session *session;
+ u64 first_sample_time, last_sample_time;
+ int ret;
+
+ session = container_of(ff->ph, struct perf_session, header);
+
+ ret = do_read_u64(ff, &first_sample_time);
+ if (ret)
+ return -1;
+
+ ret = do_read_u64(ff, &last_sample_time);
+ if (ret)
+ return -1;
+
+ session->evlist->first_sample_time = first_sample_time;
+ session->evlist->last_sample_time = last_sample_time;
+ return 0;
+}
+
+static int process_mem_topology(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ struct memory_node *nodes;
+ u64 version, i, nr, bsize;
+ int ret = -1;
+
+ if (do_read_u64(ff, &version))
+ return -1;
+
+ if (version != 1)
+ return -1;
+
+ if (do_read_u64(ff, &bsize))
+ return -1;
+
+ if (do_read_u64(ff, &nr))
+ return -1;
+
+ nodes = zalloc(sizeof(*nodes) * nr);
+ if (!nodes)
+ return -1;
+
+ for (i = 0; i < nr; i++) {
+ struct memory_node n;
+
+ #define _R(v) \
+ if (do_read_u64(ff, &n.v)) \
+ goto out; \
+
+ _R(node)
+ _R(size)
+
+ #undef _R
+
+ if (do_read_bitmap(ff, &n.set, &n.size))
+ goto out;
+
+ nodes[i] = n;
+ }
+
+ ff->ph->env.memory_bsize = bsize;
+ ff->ph->env.memory_nodes = nodes;
+ ff->ph->env.nr_memory_nodes = nr;
+ ret = 0;
+
+out:
+ if (ret)
+ free(nodes);
+ return ret;
+}
+
+struct feature_ops {
+ int (*write)(struct feat_fd *ff, struct perf_evlist *evlist);
+ void (*print)(struct feat_fd *ff, FILE *fp);
+ int (*process)(struct feat_fd *ff, void *data);
+ const char *name;
+ bool full_only;
+ bool synthesize;
+};
+
+#define FEAT_OPR(n, func, __full_only) \
+ [HEADER_##n] = { \
+ .name = __stringify(n), \
+ .write = write_##func, \
+ .print = print_##func, \
+ .full_only = __full_only, \
+ .process = process_##func, \
+ .synthesize = true \
+ }
+
+#define FEAT_OPN(n, func, __full_only) \
+ [HEADER_##n] = { \
+ .name = __stringify(n), \
+ .write = write_##func, \
+ .print = print_##func, \
+ .full_only = __full_only, \
+ .process = process_##func \
+ }
+
+/* feature_ops not implemented: */
+#define print_tracing_data NULL
+#define print_build_id NULL
+
+#define process_branch_stack NULL
+#define process_stat NULL
+
+
+static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
+ FEAT_OPN(TRACING_DATA, tracing_data, false),
+ FEAT_OPN(BUILD_ID, build_id, false),
+ FEAT_OPR(HOSTNAME, hostname, false),
+ FEAT_OPR(OSRELEASE, osrelease, false),
+ FEAT_OPR(VERSION, version, false),
+ FEAT_OPR(ARCH, arch, false),
+ FEAT_OPR(NRCPUS, nrcpus, false),
+ FEAT_OPR(CPUDESC, cpudesc, false),
+ FEAT_OPR(CPUID, cpuid, false),
+ FEAT_OPR(TOTAL_MEM, total_mem, false),
+ FEAT_OPR(EVENT_DESC, event_desc, false),
+ FEAT_OPR(CMDLINE, cmdline, false),
+ FEAT_OPR(CPU_TOPOLOGY, cpu_topology, true),
+ FEAT_OPR(NUMA_TOPOLOGY, numa_topology, true),
+ FEAT_OPN(BRANCH_STACK, branch_stack, false),
+ FEAT_OPR(PMU_MAPPINGS, pmu_mappings, false),
+ FEAT_OPR(GROUP_DESC, group_desc, false),
+ FEAT_OPN(AUXTRACE, auxtrace, false),
+ FEAT_OPN(STAT, stat, false),
+ FEAT_OPN(CACHE, cache, true),
+ FEAT_OPR(SAMPLE_TIME, sample_time, false),
+ FEAT_OPR(MEM_TOPOLOGY, mem_topology, true),
+};
+
+struct header_print_data {
+ FILE *fp;
+ bool full; /* extended list of headers */
+};
+
+static int perf_file_section__fprintf_info(struct perf_file_section *section,
+ struct perf_header *ph,
+ int feat, int fd, void *data)
+{
+ struct header_print_data *hd = data;
+ struct feat_fd ff;
+
+ if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
+ pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
+ "%d, continuing...\n", section->offset, feat);
+ return 0;
+ }
+ if (feat >= HEADER_LAST_FEATURE) {
+ pr_warning("unknown feature %d\n", feat);
+ return 0;
+ }
+ if (!feat_ops[feat].print)
+ return 0;
+
+ ff = (struct feat_fd) {
+ .fd = fd,
+ .ph = ph,
+ };
+
+ if (!feat_ops[feat].full_only || hd->full)
+ feat_ops[feat].print(&ff, hd->fp);
+ else
+ fprintf(hd->fp, "# %s info available, use -I to display\n",
+ feat_ops[feat].name);
+
+ return 0;
+}
+
+int perf_header__fprintf_info(struct perf_session *session, FILE *fp, bool full)
+{
+ struct header_print_data hd;
+ struct perf_header *header = &session->header;
+ int fd = perf_data__fd(session->data);
+ struct stat st;
+ time_t stctime;
+ int ret, bit;
+
+ hd.fp = fp;
+ hd.full = full;
+
+ ret = fstat(fd, &st);
+ if (ret == -1)
+ return -1;
+
+ stctime = st.st_ctime;
+ fprintf(fp, "# captured on : %s", ctime(&stctime));
+
+ fprintf(fp, "# header version : %u\n", header->version);
+ fprintf(fp, "# data offset : %" PRIu64 "\n", header->data_offset);
+ fprintf(fp, "# data size : %" PRIu64 "\n", header->data_size);
+ fprintf(fp, "# feat offset : %" PRIu64 "\n", header->feat_offset);
+
+ perf_header__process_sections(header, fd, &hd,
+ perf_file_section__fprintf_info);
+
+ if (session->data->is_pipe)
+ return 0;
+
+ fprintf(fp, "# missing features: ");
+ for_each_clear_bit(bit, header->adds_features, HEADER_LAST_FEATURE) {
+ if (bit)
+ fprintf(fp, "%s ", feat_ops[bit].name);
+ }
+
+ fprintf(fp, "\n");
+ return 0;
+}
+
+static int do_write_feat(struct feat_fd *ff, int type,
+ struct perf_file_section **p,
+ struct perf_evlist *evlist)
+{
+ int err;
+ int ret = 0;
+
+ if (perf_header__has_feat(ff->ph, type)) {
+ if (!feat_ops[type].write)
+ return -1;
+
+ if (WARN(ff->buf, "Error: calling %s in pipe-mode.\n", __func__))
+ return -1;
+
+ (*p)->offset = lseek(ff->fd, 0, SEEK_CUR);
+
+ err = feat_ops[type].write(ff, evlist);
+ if (err < 0) {
+ pr_debug("failed to write feature %s\n", feat_ops[type].name);
+
+ /* undo anything written */
+ lseek(ff->fd, (*p)->offset, SEEK_SET);
+
+ return -1;
+ }
+ (*p)->size = lseek(ff->fd, 0, SEEK_CUR) - (*p)->offset;
+ (*p)++;
+ }
+ return ret;
+}
+
+static int perf_header__adds_write(struct perf_header *header,
+ struct perf_evlist *evlist, int fd)
+{
+ int nr_sections;
+ struct feat_fd ff;
+ struct perf_file_section *feat_sec, *p;
+ int sec_size;
+ u64 sec_start;
+ int feat;
+ int err;
+
+ ff = (struct feat_fd){
+ .fd = fd,
+ .ph = header,
+ };
+
+ nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
+ if (!nr_sections)
+ return 0;
+
+ feat_sec = p = calloc(nr_sections, sizeof(*feat_sec));
+ if (feat_sec == NULL)
+ return -ENOMEM;
+
+ sec_size = sizeof(*feat_sec) * nr_sections;
+
+ sec_start = header->feat_offset;
+ lseek(fd, sec_start + sec_size, SEEK_SET);
+
+ for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
+ if (do_write_feat(&ff, feat, &p, evlist))
+ perf_header__clear_feat(header, feat);
+ }
+
+ lseek(fd, sec_start, SEEK_SET);
+ /*
+ * may write more than needed due to dropped feature, but
+ * this is okay, reader will skip the mising entries
+ */
+ err = do_write(&ff, feat_sec, sec_size);
+ if (err < 0)
+ pr_debug("failed to write feature section\n");
+ free(feat_sec);
+ return err;
+}
+
+int perf_header__write_pipe(int fd)
+{
+ struct perf_pipe_file_header f_header;
+ struct feat_fd ff;
+ int err;
+
+ ff = (struct feat_fd){ .fd = fd };
+
+ f_header = (struct perf_pipe_file_header){
+ .magic = PERF_MAGIC,
+ .size = sizeof(f_header),
+ };
+
+ err = do_write(&ff, &f_header, sizeof(f_header));
+ if (err < 0) {
+ pr_debug("failed to write perf pipe header\n");
+ return err;
+ }
+
+ return 0;
+}
+
+int perf_session__write_header(struct perf_session *session,
+ struct perf_evlist *evlist,
+ int fd, bool at_exit)
+{
+ struct perf_file_header f_header;
+ struct perf_file_attr f_attr;
+ struct perf_header *header = &session->header;
+ struct perf_evsel *evsel;
+ struct feat_fd ff;
+ u64 attr_offset;
+ int err;
+
+ ff = (struct feat_fd){ .fd = fd};
+ lseek(fd, sizeof(f_header), SEEK_SET);
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ evsel->id_offset = lseek(fd, 0, SEEK_CUR);
+ err = do_write(&ff, evsel->id, evsel->ids * sizeof(u64));
+ if (err < 0) {
+ pr_debug("failed to write perf header\n");
+ return err;
+ }
+ }
+
+ attr_offset = lseek(ff.fd, 0, SEEK_CUR);
+
+ evlist__for_each_entry(evlist, evsel) {
+ f_attr = (struct perf_file_attr){
+ .attr = evsel->attr,
+ .ids = {
+ .offset = evsel->id_offset,
+ .size = evsel->ids * sizeof(u64),
+ }
+ };
+ err = do_write(&ff, &f_attr, sizeof(f_attr));
+ if (err < 0) {
+ pr_debug("failed to write perf header attribute\n");
+ return err;
+ }
+ }
+
+ if (!header->data_offset)
+ header->data_offset = lseek(fd, 0, SEEK_CUR);
+ header->feat_offset = header->data_offset + header->data_size;
+
+ if (at_exit) {
+ err = perf_header__adds_write(header, evlist, fd);
+ if (err < 0)
+ return err;
+ }
+
+ f_header = (struct perf_file_header){
+ .magic = PERF_MAGIC,
+ .size = sizeof(f_header),
+ .attr_size = sizeof(f_attr),
+ .attrs = {
+ .offset = attr_offset,
+ .size = evlist->nr_entries * sizeof(f_attr),
+ },
+ .data = {
+ .offset = header->data_offset,
+ .size = header->data_size,
+ },
+ /* event_types is ignored, store zeros */
+ };
+
+ memcpy(&f_header.adds_features, &header->adds_features, sizeof(header->adds_features));
+
+ lseek(fd, 0, SEEK_SET);
+ err = do_write(&ff, &f_header, sizeof(f_header));
+ if (err < 0) {
+ pr_debug("failed to write perf header\n");
+ return err;
+ }
+ lseek(fd, header->data_offset + header->data_size, SEEK_SET);
+
+ return 0;
+}
+
+static int perf_header__getbuffer64(struct perf_header *header,
+ int fd, void *buf, size_t size)
+{
+ if (readn(fd, buf, size) <= 0)
+ return -1;
+
+ if (header->needs_swap)
+ mem_bswap_64(buf, size);
+
+ return 0;
+}
+
+int perf_header__process_sections(struct perf_header *header, int fd,
+ void *data,
+ int (*process)(struct perf_file_section *section,
+ struct perf_header *ph,
+ int feat, int fd, void *data))
+{
+ struct perf_file_section *feat_sec, *sec;
+ int nr_sections;
+ int sec_size;
+ int feat;
+ int err;
+
+ nr_sections = bitmap_weight(header->adds_features, HEADER_FEAT_BITS);
+ if (!nr_sections)
+ return 0;
+
+ feat_sec = sec = calloc(nr_sections, sizeof(*feat_sec));
+ if (!feat_sec)
+ return -1;
+
+ sec_size = sizeof(*feat_sec) * nr_sections;
+
+ lseek(fd, header->feat_offset, SEEK_SET);
+
+ err = perf_header__getbuffer64(header, fd, feat_sec, sec_size);
+ if (err < 0)
+ goto out_free;
+
+ for_each_set_bit(feat, header->adds_features, HEADER_LAST_FEATURE) {
+ err = process(sec++, header, feat, fd, data);
+ if (err < 0)
+ goto out_free;
+ }
+ err = 0;
+out_free:
+ free(feat_sec);
+ return err;
+}
+
+static const int attr_file_abi_sizes[] = {
+ [0] = PERF_ATTR_SIZE_VER0,
+ [1] = PERF_ATTR_SIZE_VER1,
+ [2] = PERF_ATTR_SIZE_VER2,
+ [3] = PERF_ATTR_SIZE_VER3,
+ [4] = PERF_ATTR_SIZE_VER4,
+ 0,
+};
+
+/*
+ * In the legacy file format, the magic number is not used to encode endianness.
+ * hdr_sz was used to encode endianness. But given that hdr_sz can vary based
+ * on ABI revisions, we need to try all combinations for all endianness to
+ * detect the endianness.
+ */
+static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph)
+{
+ uint64_t ref_size, attr_size;
+ int i;
+
+ for (i = 0 ; attr_file_abi_sizes[i]; i++) {
+ ref_size = attr_file_abi_sizes[i]
+ + sizeof(struct perf_file_section);
+ if (hdr_sz != ref_size) {
+ attr_size = bswap_64(hdr_sz);
+ if (attr_size != ref_size)
+ continue;
+
+ ph->needs_swap = true;
+ }
+ pr_debug("ABI%d perf.data file detected, need_swap=%d\n",
+ i,
+ ph->needs_swap);
+ return 0;
+ }
+ /* could not determine endianness */
+ return -1;
+}
+
+#define PERF_PIPE_HDR_VER0 16
+
+static const size_t attr_pipe_abi_sizes[] = {
+ [0] = PERF_PIPE_HDR_VER0,
+ 0,
+};
+
+/*
+ * In the legacy pipe format, there is an implicit assumption that endiannesss
+ * between host recording the samples, and host parsing the samples is the
+ * same. This is not always the case given that the pipe output may always be
+ * redirected into a file and analyzed on a different machine with possibly a
+ * different endianness and perf_event ABI revsions in the perf tool itself.
+ */
+static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)
+{
+ u64 attr_size;
+ int i;
+
+ for (i = 0 ; attr_pipe_abi_sizes[i]; i++) {
+ if (hdr_sz != attr_pipe_abi_sizes[i]) {
+ attr_size = bswap_64(hdr_sz);
+ if (attr_size != hdr_sz)
+ continue;
+
+ ph->needs_swap = true;
+ }
+ pr_debug("Pipe ABI%d perf.data file detected\n", i);
+ return 0;
+ }
+ return -1;
+}
+
+bool is_perf_magic(u64 magic)
+{
+ if (!memcmp(&magic, __perf_magic1, sizeof(magic))
+ || magic == __perf_magic2
+ || magic == __perf_magic2_sw)
+ return true;
+
+ return false;
+}
+
+static int check_magic_endian(u64 magic, uint64_t hdr_sz,
+ bool is_pipe, struct perf_header *ph)
+{
+ int ret;
+
+ /* check for legacy format */
+ ret = memcmp(&magic, __perf_magic1, sizeof(magic));
+ if (ret == 0) {
+ ph->version = PERF_HEADER_VERSION_1;
+ pr_debug("legacy perf.data format\n");
+ if (is_pipe)
+ return try_all_pipe_abis(hdr_sz, ph);
+
+ return try_all_file_abis(hdr_sz, ph);
+ }
+ /*
+ * the new magic number serves two purposes:
+ * - unique number to identify actual perf.data files
+ * - encode endianness of file
+ */
+ ph->version = PERF_HEADER_VERSION_2;
+
+ /* check magic number with one endianness */
+ if (magic == __perf_magic2)
+ return 0;
+
+ /* check magic number with opposite endianness */
+ if (magic != __perf_magic2_sw)
+ return -1;
+
+ ph->needs_swap = true;
+
+ return 0;
+}
+
+int perf_file_header__read(struct perf_file_header *header,
+ struct perf_header *ph, int fd)
+{
+ ssize_t ret;
+
+ lseek(fd, 0, SEEK_SET);
+
+ ret = readn(fd, header, sizeof(*header));
+ if (ret <= 0)
+ return -1;
+
+ if (check_magic_endian(header->magic,
+ header->attr_size, false, ph) < 0) {
+ pr_debug("magic/endian check failed\n");
+ return -1;
+ }
+
+ if (ph->needs_swap) {
+ mem_bswap_64(header, offsetof(struct perf_file_header,
+ adds_features));
+ }
+
+ if (header->size != sizeof(*header)) {
+ /* Support the previous format */
+ if (header->size == offsetof(typeof(*header), adds_features))
+ bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
+ else
+ return -1;
+ } else if (ph->needs_swap) {
+ /*
+ * feature bitmap is declared as an array of unsigned longs --
+ * not good since its size can differ between the host that
+ * generated the data file and the host analyzing the file.
+ *
+ * We need to handle endianness, but we don't know the size of
+ * the unsigned long where the file was generated. Take a best
+ * guess at determining it: try 64-bit swap first (ie., file
+ * created on a 64-bit host), and check if the hostname feature
+ * bit is set (this feature bit is forced on as of fbe96f2).
+ * If the bit is not, undo the 64-bit swap and try a 32-bit
+ * swap. If the hostname bit is still not set (e.g., older data
+ * file), punt and fallback to the original behavior --
+ * clearing all feature bits and setting buildid.
+ */
+ mem_bswap_64(&header->adds_features,
+ BITS_TO_U64(HEADER_FEAT_BITS));
+
+ if (!test_bit(HEADER_HOSTNAME, header->adds_features)) {
+ /* unswap as u64 */
+ mem_bswap_64(&header->adds_features,
+ BITS_TO_U64(HEADER_FEAT_BITS));
+
+ /* unswap as u32 */
+ mem_bswap_32(&header->adds_features,
+ BITS_TO_U32(HEADER_FEAT_BITS));
+ }
+
+ if (!test_bit(HEADER_HOSTNAME, header->adds_features)) {
+ bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
+ set_bit(HEADER_BUILD_ID, header->adds_features);
+ }
+ }
+
+ memcpy(&ph->adds_features, &header->adds_features,
+ sizeof(ph->adds_features));
+
+ ph->data_offset = header->data.offset;
+ ph->data_size = header->data.size;
+ ph->feat_offset = header->data.offset + header->data.size;
+ return 0;
+}
+
+static int perf_file_section__process(struct perf_file_section *section,
+ struct perf_header *ph,
+ int feat, int fd, void *data)
+{
+ struct feat_fd fdd = {
+ .fd = fd,
+ .ph = ph,
+ .size = section->size,
+ .offset = section->offset,
+ };
+
+ if (lseek(fd, section->offset, SEEK_SET) == (off_t)-1) {
+ pr_debug("Failed to lseek to %" PRIu64 " offset for feature "
+ "%d, continuing...\n", section->offset, feat);
+ return 0;
+ }
+
+ if (feat >= HEADER_LAST_FEATURE) {
+ pr_debug("unknown feature %d, continuing...\n", feat);
+ return 0;
+ }
+
+ if (!feat_ops[feat].process)
+ return 0;
+
+ return feat_ops[feat].process(&fdd, data);
+}
+
+static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
+ struct perf_header *ph, int fd,
+ bool repipe)
+{
+ struct feat_fd ff = {
+ .fd = STDOUT_FILENO,
+ .ph = ph,
+ };
+ ssize_t ret;
+
+ ret = readn(fd, header, sizeof(*header));
+ if (ret <= 0)
+ return -1;
+
+ if (check_magic_endian(header->magic, header->size, true, ph) < 0) {
+ pr_debug("endian/magic failed\n");
+ return -1;
+ }
+
+ if (ph->needs_swap)
+ header->size = bswap_64(header->size);
+
+ if (repipe && do_write(&ff, header, sizeof(*header)) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int perf_header__read_pipe(struct perf_session *session)
+{
+ struct perf_header *header = &session->header;
+ struct perf_pipe_file_header f_header;
+
+ if (perf_file_header__read_pipe(&f_header, header,
+ perf_data__fd(session->data),
+ session->repipe) < 0) {
+ pr_debug("incompatible file format\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int read_attr(int fd, struct perf_header *ph,
+ struct perf_file_attr *f_attr)
+{
+ struct perf_event_attr *attr = &f_attr->attr;
+ size_t sz, left;
+ size_t our_sz = sizeof(f_attr->attr);
+ ssize_t ret;
+
+ memset(f_attr, 0, sizeof(*f_attr));
+
+ /* read minimal guaranteed structure */
+ ret = readn(fd, attr, PERF_ATTR_SIZE_VER0);
+ if (ret <= 0) {
+ pr_debug("cannot read %d bytes of header attr\n",
+ PERF_ATTR_SIZE_VER0);
+ return -1;
+ }
+
+ /* on file perf_event_attr size */
+ sz = attr->size;
+
+ if (ph->needs_swap)
+ sz = bswap_32(sz);
+
+ if (sz == 0) {
+ /* assume ABI0 */
+ sz = PERF_ATTR_SIZE_VER0;
+ } else if (sz > our_sz) {
+ pr_debug("file uses a more recent and unsupported ABI"
+ " (%zu bytes extra)\n", sz - our_sz);
+ return -1;
+ }
+ /* what we have not yet read and that we know about */
+ left = sz - PERF_ATTR_SIZE_VER0;
+ if (left) {
+ void *ptr = attr;
+ ptr += PERF_ATTR_SIZE_VER0;
+
+ ret = readn(fd, ptr, left);
+ }
+ /* read perf_file_section, ids are read in caller */
+ ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids));
+
+ return ret <= 0 ? -1 : 0;
+}
+
+static int perf_evsel__prepare_tracepoint_event(struct perf_evsel *evsel,
+ struct tep_handle *pevent)
+{
+ struct event_format *event;
+ char bf[128];
+
+ /* already prepared */
+ if (evsel->tp_format)
+ return 0;
+
+ if (pevent == NULL) {
+ pr_debug("broken or missing trace data\n");
+ return -1;
+ }
+
+ event = tep_find_event(pevent, evsel->attr.config);
+ if (event == NULL) {
+ pr_debug("cannot find event format for %d\n", (int)evsel->attr.config);
+ return -1;
+ }
+
+ if (!evsel->name) {
+ snprintf(bf, sizeof(bf), "%s:%s", event->system, event->name);
+ evsel->name = strdup(bf);
+ if (evsel->name == NULL)
+ return -1;
+ }
+
+ evsel->tp_format = event;
+ return 0;
+}
+
+static int perf_evlist__prepare_tracepoint_events(struct perf_evlist *evlist,
+ struct tep_handle *pevent)
+{
+ struct perf_evsel *pos;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (pos->attr.type == PERF_TYPE_TRACEPOINT &&
+ perf_evsel__prepare_tracepoint_event(pos, pevent))
+ return -1;
+ }
+
+ return 0;
+}
+
+int perf_session__read_header(struct perf_session *session)
+{
+ struct perf_data *data = session->data;
+ struct perf_header *header = &session->header;
+ struct perf_file_header f_header;
+ struct perf_file_attr f_attr;
+ u64 f_id;
+ int nr_attrs, nr_ids, i, j;
+ int fd = perf_data__fd(data);
+
+ session->evlist = perf_evlist__new();
+ if (session->evlist == NULL)
+ return -ENOMEM;
+
+ session->evlist->env = &header->env;
+ session->machines.host.env = &header->env;
+ if (perf_data__is_pipe(data))
+ return perf_header__read_pipe(session);
+
+ if (perf_file_header__read(&f_header, header, fd) < 0)
+ return -EINVAL;
+
+ /*
+ * Sanity check that perf.data was written cleanly; data size is
+ * initialized to 0 and updated only if the on_exit function is run.
+ * If data size is still 0 then the file contains only partial
+ * information. Just warn user and process it as much as it can.
+ */
+ if (f_header.data.size == 0) {
+ pr_warning("WARNING: The %s file's data size field is 0 which is unexpected.\n"
+ "Was the 'perf record' command properly terminated?\n",
+ data->file.path);
+ }
+
+ if (f_header.attr_size == 0) {
+ pr_err("ERROR: The %s file's attr size field is 0 which is unexpected.\n"
+ "Was the 'perf record' command properly terminated?\n",
+ data->file.path);
+ return -EINVAL;
+ }
+
+ nr_attrs = f_header.attrs.size / f_header.attr_size;
+ lseek(fd, f_header.attrs.offset, SEEK_SET);
+
+ for (i = 0; i < nr_attrs; i++) {
+ struct perf_evsel *evsel;
+ off_t tmp;
+
+ if (read_attr(fd, header, &f_attr) < 0)
+ goto out_errno;
+
+ if (header->needs_swap) {
+ f_attr.ids.size = bswap_64(f_attr.ids.size);
+ f_attr.ids.offset = bswap_64(f_attr.ids.offset);
+ perf_event__attr_swap(&f_attr.attr);
+ }
+
+ tmp = lseek(fd, 0, SEEK_CUR);
+ evsel = perf_evsel__new(&f_attr.attr);
+
+ if (evsel == NULL)
+ goto out_delete_evlist;
+
+ evsel->needs_swap = header->needs_swap;
+ /*
+ * Do it before so that if perf_evsel__alloc_id fails, this
+ * entry gets purged too at perf_evlist__delete().
+ */
+ perf_evlist__add(session->evlist, evsel);
+
+ nr_ids = f_attr.ids.size / sizeof(u64);
+ /*
+ * We don't have the cpu and thread maps on the header, so
+ * for allocating the perf_sample_id table we fake 1 cpu and
+ * hattr->ids threads.
+ */
+ if (perf_evsel__alloc_id(evsel, 1, nr_ids))
+ goto out_delete_evlist;
+
+ lseek(fd, f_attr.ids.offset, SEEK_SET);
+
+ for (j = 0; j < nr_ids; j++) {
+ if (perf_header__getbuffer64(header, fd, &f_id, sizeof(f_id)))
+ goto out_errno;
+
+ perf_evlist__id_add(session->evlist, evsel, 0, j, f_id);
+ }
+
+ lseek(fd, tmp, SEEK_SET);
+ }
+
+ perf_header__process_sections(header, fd, &session->tevent,
+ perf_file_section__process);
+
+ if (perf_evlist__prepare_tracepoint_events(session->evlist,
+ session->tevent.pevent))
+ goto out_delete_evlist;
+
+ return 0;
+out_errno:
+ return -errno;
+
+out_delete_evlist:
+ perf_evlist__delete(session->evlist);
+ session->evlist = NULL;
+ return -ENOMEM;
+}
+
+int perf_event__synthesize_attr(struct perf_tool *tool,
+ struct perf_event_attr *attr, u32 ids, u64 *id,
+ perf_event__handler_t process)
+{
+ union perf_event *ev;
+ size_t size;
+ int err;
+
+ size = sizeof(struct perf_event_attr);
+ size = PERF_ALIGN(size, sizeof(u64));
+ size += sizeof(struct perf_event_header);
+ size += ids * sizeof(u64);
+
+ ev = zalloc(size);
+
+ if (ev == NULL)
+ return -ENOMEM;
+
+ ev->attr.attr = *attr;
+ memcpy(ev->attr.id, id, ids * sizeof(u64));
+
+ ev->attr.header.type = PERF_RECORD_HEADER_ATTR;
+ ev->attr.header.size = (u16)size;
+
+ if (ev->attr.header.size == size)
+ err = process(tool, ev, NULL, NULL);
+ else
+ err = -E2BIG;
+
+ free(ev);
+
+ return err;
+}
+
+int perf_event__synthesize_features(struct perf_tool *tool,
+ struct perf_session *session,
+ struct perf_evlist *evlist,
+ perf_event__handler_t process)
+{
+ struct perf_header *header = &session->header;
+ struct feat_fd ff;
+ struct feature_event *fe;
+ size_t sz, sz_hdr;
+ int feat, ret;
+
+ sz_hdr = sizeof(fe->header);
+ sz = sizeof(union perf_event);
+ /* get a nice alignment */
+ sz = PERF_ALIGN(sz, page_size);
+
+ memset(&ff, 0, sizeof(ff));
+
+ ff.buf = malloc(sz);
+ if (!ff.buf)
+ return -ENOMEM;
+
+ ff.size = sz - sz_hdr;
+
+ for_each_set_bit(feat, header->adds_features, HEADER_FEAT_BITS) {
+ if (!feat_ops[feat].synthesize) {
+ pr_debug("No record header feature for header :%d\n", feat);
+ continue;
+ }
+
+ ff.offset = sizeof(*fe);
+
+ ret = feat_ops[feat].write(&ff, evlist);
+ if (ret || ff.offset <= (ssize_t)sizeof(*fe)) {
+ pr_debug("Error writing feature\n");
+ continue;
+ }
+ /* ff.buf may have changed due to realloc in do_write() */
+ fe = ff.buf;
+ memset(fe, 0, sizeof(*fe));
+
+ fe->feat_id = feat;
+ fe->header.type = PERF_RECORD_HEADER_FEATURE;
+ fe->header.size = ff.offset;
+
+ ret = process(tool, ff.buf, NULL, NULL);
+ if (ret) {
+ free(ff.buf);
+ return ret;
+ }
+ }
+
+ /* Send HEADER_LAST_FEATURE mark. */
+ fe = ff.buf;
+ fe->feat_id = HEADER_LAST_FEATURE;
+ fe->header.type = PERF_RECORD_HEADER_FEATURE;
+ fe->header.size = sizeof(*fe);
+
+ ret = process(tool, ff.buf, NULL, NULL);
+
+ free(ff.buf);
+ return ret;
+}
+
+int perf_event__process_feature(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct feat_fd ff = { .fd = 0 };
+ struct feature_event *fe = (struct feature_event *)event;
+ int type = fe->header.type;
+ u64 feat = fe->feat_id;
+
+ if (type < 0 || type >= PERF_RECORD_HEADER_MAX) {
+ pr_warning("invalid record type %d in pipe-mode\n", type);
+ return 0;
+ }
+ if (feat == HEADER_RESERVED || feat >= HEADER_LAST_FEATURE) {
+ pr_warning("invalid record type %d in pipe-mode\n", type);
+ return -1;
+ }
+
+ if (!feat_ops[feat].process)
+ return 0;
+
+ ff.buf = (void *)fe->data;
+ ff.size = event->header.size - sizeof(*fe);
+ ff.ph = &session->header;
+
+ if (feat_ops[feat].process(&ff, NULL))
+ return -1;
+
+ if (!feat_ops[feat].print || !tool->show_feat_hdr)
+ return 0;
+
+ if (!feat_ops[feat].full_only ||
+ tool->show_feat_hdr >= SHOW_FEAT_HEADER_FULL_INFO) {
+ feat_ops[feat].print(&ff, stdout);
+ } else {
+ fprintf(stdout, "# %s info available, use -I to display\n",
+ feat_ops[feat].name);
+ }
+
+ return 0;
+}
+
+static struct event_update_event *
+event_update_event__new(size_t size, u64 type, u64 id)
+{
+ struct event_update_event *ev;
+
+ size += sizeof(*ev);
+ size = PERF_ALIGN(size, sizeof(u64));
+
+ ev = zalloc(size);
+ if (ev) {
+ ev->header.type = PERF_RECORD_EVENT_UPDATE;
+ ev->header.size = (u16)size;
+ ev->type = type;
+ ev->id = id;
+ }
+ return ev;
+}
+
+int
+perf_event__synthesize_event_update_unit(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process)
+{
+ struct event_update_event *ev;
+ size_t size = strlen(evsel->unit);
+ int err;
+
+ ev = event_update_event__new(size + 1, PERF_EVENT_UPDATE__UNIT, evsel->id[0]);
+ if (ev == NULL)
+ return -ENOMEM;
+
+ strlcpy(ev->data, evsel->unit, size + 1);
+ err = process(tool, (union perf_event *)ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+int
+perf_event__synthesize_event_update_scale(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process)
+{
+ struct event_update_event *ev;
+ struct event_update_event_scale *ev_data;
+ int err;
+
+ ev = event_update_event__new(sizeof(*ev_data), PERF_EVENT_UPDATE__SCALE, evsel->id[0]);
+ if (ev == NULL)
+ return -ENOMEM;
+
+ ev_data = (struct event_update_event_scale *) ev->data;
+ ev_data->scale = evsel->scale;
+ err = process(tool, (union perf_event*) ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+int
+perf_event__synthesize_event_update_name(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process)
+{
+ struct event_update_event *ev;
+ size_t len = strlen(evsel->name);
+ int err;
+
+ ev = event_update_event__new(len + 1, PERF_EVENT_UPDATE__NAME, evsel->id[0]);
+ if (ev == NULL)
+ return -ENOMEM;
+
+ strlcpy(ev->data, evsel->name, len + 1);
+ err = process(tool, (union perf_event*) ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+int
+perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process)
+{
+ size_t size = sizeof(struct event_update_event);
+ struct event_update_event *ev;
+ int max, err;
+ u16 type;
+
+ if (!evsel->own_cpus)
+ return 0;
+
+ ev = cpu_map_data__alloc(evsel->own_cpus, &size, &type, &max);
+ if (!ev)
+ return -ENOMEM;
+
+ ev->header.type = PERF_RECORD_EVENT_UPDATE;
+ ev->header.size = (u16)size;
+ ev->type = PERF_EVENT_UPDATE__CPUS;
+ ev->id = evsel->id[0];
+
+ cpu_map_data__synthesize((struct cpu_map_data *) ev->data,
+ evsel->own_cpus,
+ type, max);
+
+ err = process(tool, (union perf_event*) ev, NULL, NULL);
+ free(ev);
+ return err;
+}
+
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp)
+{
+ struct event_update_event *ev = &event->event_update;
+ struct event_update_event_scale *ev_scale;
+ struct event_update_event_cpus *ev_cpus;
+ struct cpu_map *map;
+ size_t ret;
+
+ ret = fprintf(fp, "\n... id: %" PRIu64 "\n", ev->id);
+
+ switch (ev->type) {
+ case PERF_EVENT_UPDATE__SCALE:
+ ev_scale = (struct event_update_event_scale *) ev->data;
+ ret += fprintf(fp, "... scale: %f\n", ev_scale->scale);
+ break;
+ case PERF_EVENT_UPDATE__UNIT:
+ ret += fprintf(fp, "... unit: %s\n", ev->data);
+ break;
+ case PERF_EVENT_UPDATE__NAME:
+ ret += fprintf(fp, "... name: %s\n", ev->data);
+ break;
+ case PERF_EVENT_UPDATE__CPUS:
+ ev_cpus = (struct event_update_event_cpus *) ev->data;
+ ret += fprintf(fp, "... ");
+
+ map = cpu_map__new_data(&ev_cpus->cpus);
+ if (map)
+ ret += cpu_map__fprintf(map, fp);
+ else
+ ret += fprintf(fp, "failed to get cpus\n");
+ break;
+ default:
+ ret += fprintf(fp, "... unknown type\n");
+ break;
+ }
+
+ return ret;
+}
+
+int perf_event__synthesize_attrs(struct perf_tool *tool,
+ struct perf_session *session,
+ perf_event__handler_t process)
+{
+ struct perf_evsel *evsel;
+ int err = 0;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ err = perf_event__synthesize_attr(tool, &evsel->attr, evsel->ids,
+ evsel->id, process);
+ if (err) {
+ pr_debug("failed to create perf header attribute\n");
+ return err;
+ }
+ }
+
+ return err;
+}
+
+static bool has_unit(struct perf_evsel *counter)
+{
+ return counter->unit && *counter->unit;
+}
+
+static bool has_scale(struct perf_evsel *counter)
+{
+ return counter->scale != 1;
+}
+
+int perf_event__synthesize_extra_attr(struct perf_tool *tool,
+ struct perf_evlist *evsel_list,
+ perf_event__handler_t process,
+ bool is_pipe)
+{
+ struct perf_evsel *counter;
+ int err;
+
+ /*
+ * Synthesize other events stuff not carried within
+ * attr event - unit, scale, name
+ */
+ evlist__for_each_entry(evsel_list, counter) {
+ if (!counter->supported)
+ continue;
+
+ /*
+ * Synthesize unit and scale only if it's defined.
+ */
+ if (has_unit(counter)) {
+ err = perf_event__synthesize_event_update_unit(tool, counter, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel unit.\n");
+ return err;
+ }
+ }
+
+ if (has_scale(counter)) {
+ err = perf_event__synthesize_event_update_scale(tool, counter, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel counter.\n");
+ return err;
+ }
+ }
+
+ if (counter->own_cpus) {
+ err = perf_event__synthesize_event_update_cpus(tool, counter, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel cpus.\n");
+ return err;
+ }
+ }
+
+ /*
+ * Name is needed only for pipe output,
+ * perf.data carries event names.
+ */
+ if (is_pipe) {
+ err = perf_event__synthesize_event_update_name(tool, counter, process);
+ if (err < 0) {
+ pr_err("Couldn't synthesize evsel name.\n");
+ return err;
+ }
+ }
+ }
+ return 0;
+}
+
+int perf_event__process_attr(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_evlist **pevlist)
+{
+ u32 i, ids, n_ids;
+ struct perf_evsel *evsel;
+ struct perf_evlist *evlist = *pevlist;
+
+ if (evlist == NULL) {
+ *pevlist = evlist = perf_evlist__new();
+ if (evlist == NULL)
+ return -ENOMEM;
+ }
+
+ evsel = perf_evsel__new(&event->attr.attr);
+ if (evsel == NULL)
+ return -ENOMEM;
+
+ perf_evlist__add(evlist, evsel);
+
+ ids = event->header.size;
+ ids -= (void *)&event->attr.id - (void *)event;
+ n_ids = ids / sizeof(u64);
+ /*
+ * We don't have the cpu and thread maps on the header, so
+ * for allocating the perf_sample_id table we fake 1 cpu and
+ * hattr->ids threads.
+ */
+ if (perf_evsel__alloc_id(evsel, 1, n_ids))
+ return -ENOMEM;
+
+ for (i = 0; i < n_ids; i++) {
+ perf_evlist__id_add(evlist, evsel, 0, i, event->attr.id[i]);
+ }
+
+ return 0;
+}
+
+int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_evlist **pevlist)
+{
+ struct event_update_event *ev = &event->event_update;
+ struct event_update_event_scale *ev_scale;
+ struct event_update_event_cpus *ev_cpus;
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct cpu_map *map;
+
+ if (!pevlist || *pevlist == NULL)
+ return -EINVAL;
+
+ evlist = *pevlist;
+
+ evsel = perf_evlist__id2evsel(evlist, ev->id);
+ if (evsel == NULL)
+ return -EINVAL;
+
+ switch (ev->type) {
+ case PERF_EVENT_UPDATE__UNIT:
+ evsel->unit = strdup(ev->data);
+ break;
+ case PERF_EVENT_UPDATE__NAME:
+ evsel->name = strdup(ev->data);
+ break;
+ case PERF_EVENT_UPDATE__SCALE:
+ ev_scale = (struct event_update_event_scale *) ev->data;
+ evsel->scale = ev_scale->scale;
+ break;
+ case PERF_EVENT_UPDATE__CPUS:
+ ev_cpus = (struct event_update_event_cpus *) ev->data;
+
+ map = cpu_map__new_data(&ev_cpus->cpus);
+ if (map)
+ evsel->own_cpus = map;
+ else
+ pr_err("failed to get event_update cpus\n");
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd,
+ struct perf_evlist *evlist,
+ perf_event__handler_t process)
+{
+ union perf_event ev;
+ struct tracing_data *tdata;
+ ssize_t size = 0, aligned_size = 0, padding;
+ struct feat_fd ff;
+ int err __maybe_unused = 0;
+
+ /*
+ * We are going to store the size of the data followed
+ * by the data contents. Since the fd descriptor is a pipe,
+ * we cannot seek back to store the size of the data once
+ * we know it. Instead we:
+ *
+ * - write the tracing data to the temp file
+ * - get/write the data size to pipe
+ * - write the tracing data from the temp file
+ * to the pipe
+ */
+ tdata = tracing_data_get(&evlist->entries, fd, true);
+ if (!tdata)
+ return -1;
+
+ memset(&ev, 0, sizeof(ev));
+
+ ev.tracing_data.header.type = PERF_RECORD_HEADER_TRACING_DATA;
+ size = tdata->size;
+ aligned_size = PERF_ALIGN(size, sizeof(u64));
+ padding = aligned_size - size;
+ ev.tracing_data.header.size = sizeof(ev.tracing_data);
+ ev.tracing_data.size = aligned_size;
+
+ process(tool, &ev, NULL, NULL);
+
+ /*
+ * The put function will copy all the tracing data
+ * stored in temp file to the pipe.
+ */
+ tracing_data_put(tdata);
+
+ ff = (struct feat_fd){ .fd = fd };
+ if (write_padded(&ff, NULL, 0, padding))
+ return -1;
+
+ return aligned_size;
+}
+
+int perf_event__process_tracing_data(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ ssize_t size_read, padding, size = event->tracing_data.size;
+ int fd = perf_data__fd(session->data);
+ off_t offset = lseek(fd, 0, SEEK_CUR);
+ char buf[BUFSIZ];
+
+ /* setup for reading amidst mmap */
+ lseek(fd, offset + sizeof(struct tracing_data_event),
+ SEEK_SET);
+
+ size_read = trace_report(fd, &session->tevent,
+ session->repipe);
+ padding = PERF_ALIGN(size_read, sizeof(u64)) - size_read;
+
+ if (readn(fd, buf, padding) < 0) {
+ pr_err("%s: reading input file", __func__);
+ return -1;
+ }
+ if (session->repipe) {
+ int retw = write(STDOUT_FILENO, buf, padding);
+ if (retw <= 0 || retw != padding) {
+ pr_err("%s: repiping tracing data padding", __func__);
+ return -1;
+ }
+ }
+
+ if (size_read + padding != size) {
+ pr_err("%s: tracing data size mismatch", __func__);
+ return -1;
+ }
+
+ perf_evlist__prepare_tracepoint_events(session->evlist,
+ session->tevent.pevent);
+
+ return size_read + padding;
+}
+
+int perf_event__synthesize_build_id(struct perf_tool *tool,
+ struct dso *pos, u16 misc,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event ev;
+ size_t len;
+ int err = 0;
+
+ if (!pos->hit)
+ return err;
+
+ memset(&ev, 0, sizeof(ev));
+
+ len = pos->long_name_len + 1;
+ len = PERF_ALIGN(len, NAME_ALIGN);
+ memcpy(&ev.build_id.build_id, pos->build_id, sizeof(pos->build_id));
+ ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
+ ev.build_id.header.misc = misc;
+ ev.build_id.pid = machine->pid;
+ ev.build_id.header.size = sizeof(ev.build_id) + len;
+ memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
+
+ err = process(tool, &ev, NULL, machine);
+
+ return err;
+}
+
+int perf_event__process_build_id(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ __event_process_build_id(&event->build_id,
+ event->build_id.filename,
+ session);
+ return 0;
+}
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
new file mode 100644
index 000000000..6d7fe44aa
--- /dev/null
+++ b/tools/perf/util/header.h
@@ -0,0 +1,180 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_HEADER_H
+#define __PERF_HEADER_H
+
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+#include <sys/types.h>
+#include <stdbool.h>
+#include <linux/bitmap.h>
+#include <linux/types.h>
+#include "event.h"
+#include "env.h"
+#include "pmu.h"
+
+enum {
+ HEADER_RESERVED = 0, /* always cleared */
+ HEADER_FIRST_FEATURE = 1,
+ HEADER_TRACING_DATA = 1,
+ HEADER_BUILD_ID,
+
+ HEADER_HOSTNAME,
+ HEADER_OSRELEASE,
+ HEADER_VERSION,
+ HEADER_ARCH,
+ HEADER_NRCPUS,
+ HEADER_CPUDESC,
+ HEADER_CPUID,
+ HEADER_TOTAL_MEM,
+ HEADER_CMDLINE,
+ HEADER_EVENT_DESC,
+ HEADER_CPU_TOPOLOGY,
+ HEADER_NUMA_TOPOLOGY,
+ HEADER_BRANCH_STACK,
+ HEADER_PMU_MAPPINGS,
+ HEADER_GROUP_DESC,
+ HEADER_AUXTRACE,
+ HEADER_STAT,
+ HEADER_CACHE,
+ HEADER_SAMPLE_TIME,
+ HEADER_MEM_TOPOLOGY,
+ HEADER_LAST_FEATURE,
+ HEADER_FEAT_BITS = 256,
+};
+
+enum perf_header_version {
+ PERF_HEADER_VERSION_1,
+ PERF_HEADER_VERSION_2,
+};
+
+struct perf_file_section {
+ u64 offset;
+ u64 size;
+};
+
+struct perf_file_header {
+ u64 magic;
+ u64 size;
+ u64 attr_size;
+ struct perf_file_section attrs;
+ struct perf_file_section data;
+ /* event_types is ignored */
+ struct perf_file_section event_types;
+ DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
+};
+
+struct perf_pipe_file_header {
+ u64 magic;
+ u64 size;
+};
+
+struct perf_header;
+
+int perf_file_header__read(struct perf_file_header *header,
+ struct perf_header *ph, int fd);
+
+struct perf_header {
+ enum perf_header_version version;
+ bool needs_swap;
+ u64 data_offset;
+ u64 data_size;
+ u64 feat_offset;
+ DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS);
+ struct perf_env env;
+};
+
+struct perf_evlist;
+struct perf_session;
+
+int perf_session__read_header(struct perf_session *session);
+int perf_session__write_header(struct perf_session *session,
+ struct perf_evlist *evlist,
+ int fd, bool at_exit);
+int perf_header__write_pipe(int fd);
+
+void perf_header__set_feat(struct perf_header *header, int feat);
+void perf_header__clear_feat(struct perf_header *header, int feat);
+bool perf_header__has_feat(const struct perf_header *header, int feat);
+
+int perf_header__set_cmdline(int argc, const char **argv);
+
+int perf_header__process_sections(struct perf_header *header, int fd,
+ void *data,
+ int (*process)(struct perf_file_section *section,
+ struct perf_header *ph,
+ int feat, int fd, void *data));
+
+int perf_header__fprintf_info(struct perf_session *s, FILE *fp, bool full);
+
+int perf_event__synthesize_features(struct perf_tool *tool,
+ struct perf_session *session,
+ struct perf_evlist *evlist,
+ perf_event__handler_t process);
+
+int perf_event__synthesize_extra_attr(struct perf_tool *tool,
+ struct perf_evlist *evsel_list,
+ perf_event__handler_t process,
+ bool is_pipe);
+
+int perf_event__process_feature(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
+
+int perf_event__synthesize_attr(struct perf_tool *tool,
+ struct perf_event_attr *attr, u32 ids, u64 *id,
+ perf_event__handler_t process);
+int perf_event__synthesize_attrs(struct perf_tool *tool,
+ struct perf_session *session,
+ perf_event__handler_t process);
+int perf_event__synthesize_event_update_unit(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process);
+int perf_event__synthesize_event_update_scale(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process);
+int perf_event__synthesize_event_update_name(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process);
+int perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ perf_event__handler_t process);
+int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
+ struct perf_evlist **pevlist);
+int perf_event__process_event_update(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_evlist **pevlist);
+size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
+
+int perf_event__synthesize_tracing_data(struct perf_tool *tool,
+ int fd, struct perf_evlist *evlist,
+ perf_event__handler_t process);
+int perf_event__process_tracing_data(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
+
+int perf_event__synthesize_build_id(struct perf_tool *tool,
+ struct dso *pos, u16 misc,
+ perf_event__handler_t process,
+ struct machine *machine);
+int perf_event__process_build_id(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
+bool is_perf_magic(u64 magic);
+
+#define NAME_ALIGN 64
+
+struct feat_fd;
+
+int do_write(struct feat_fd *fd, const void *buf, size_t size);
+
+int write_padded(struct feat_fd *fd, const void *bf,
+ size_t count, size_t count_aligned);
+
+/*
+ * arch specific callback
+ */
+int get_cpuid(char *buffer, size_t sz);
+
+char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused);
+int strcmp_cpuid_str(const char *s1, const char *s2);
+#endif /* __PERF_HEADER_H */
diff --git a/tools/perf/util/help-unknown-cmd.c b/tools/perf/util/help-unknown-cmd.c
new file mode 100644
index 000000000..4f07a5ba5
--- /dev/null
+++ b/tools/perf/util/help-unknown-cmd.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cache.h"
+#include "config.h"
+#include <poll.h>
+#include <stdio.h>
+#include <subcmd/help.h>
+#include "../builtin.h"
+#include "levenshtein.h"
+
+static int autocorrect;
+
+static int perf_unknown_cmd_config(const char *var, const char *value,
+ void *cb __maybe_unused)
+{
+ if (!strcmp(var, "help.autocorrect"))
+ return perf_config_int(&autocorrect, var,value);
+
+ return 0;
+}
+
+static int levenshtein_compare(const void *p1, const void *p2)
+{
+ const struct cmdname *const *c1 = p1, *const *c2 = p2;
+ const char *s1 = (*c1)->name, *s2 = (*c2)->name;
+ int l1 = (*c1)->len;
+ int l2 = (*c2)->len;
+ return l1 != l2 ? l1 - l2 : strcmp(s1, s2);
+}
+
+static int add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
+{
+ unsigned int i, nr = cmds->cnt + old->cnt;
+ void *tmp;
+
+ if (nr > cmds->alloc) {
+ /* Choose bigger one to alloc */
+ if (alloc_nr(cmds->alloc) < nr)
+ cmds->alloc = nr;
+ else
+ cmds->alloc = alloc_nr(cmds->alloc);
+ tmp = realloc(cmds->names, cmds->alloc * sizeof(*cmds->names));
+ if (!tmp)
+ return -1;
+ cmds->names = tmp;
+ }
+ for (i = 0; i < old->cnt; i++)
+ cmds->names[cmds->cnt++] = old->names[i];
+ zfree(&old->names);
+ old->cnt = 0;
+ return 0;
+}
+
+const char *help_unknown_cmd(const char *cmd)
+{
+ unsigned int i, n = 0, best_similarity = 0;
+ struct cmdnames main_cmds, other_cmds;
+
+ memset(&main_cmds, 0, sizeof(main_cmds));
+ memset(&other_cmds, 0, sizeof(main_cmds));
+
+ perf_config(perf_unknown_cmd_config, NULL);
+
+ load_command_list("perf-", &main_cmds, &other_cmds);
+
+ if (add_cmd_list(&main_cmds, &other_cmds) < 0) {
+ fprintf(stderr, "ERROR: Failed to allocate command list for unknown command.\n");
+ goto end;
+ }
+ qsort(main_cmds.names, main_cmds.cnt,
+ sizeof(main_cmds.names), cmdname_compare);
+ uniq(&main_cmds);
+
+ if (main_cmds.cnt) {
+ /* This reuses cmdname->len for similarity index */
+ for (i = 0; i < main_cmds.cnt; ++i)
+ main_cmds.names[i]->len =
+ levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+
+ qsort(main_cmds.names, main_cmds.cnt,
+ sizeof(*main_cmds.names), levenshtein_compare);
+
+ best_similarity = main_cmds.names[0]->len;
+ n = 1;
+ while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
+ ++n;
+ }
+
+ if (autocorrect && n == 1) {
+ const char *assumed = main_cmds.names[0]->name;
+
+ main_cmds.names[0] = NULL;
+ clean_cmdnames(&main_cmds);
+ fprintf(stderr, "WARNING: You called a perf program named '%s', "
+ "which does not exist.\n"
+ "Continuing under the assumption that you meant '%s'\n",
+ cmd, assumed);
+ if (autocorrect > 0) {
+ fprintf(stderr, "in %0.1f seconds automatically...\n",
+ (float)autocorrect/10.0);
+ poll(NULL, 0, autocorrect * 100);
+ }
+ return assumed;
+ }
+
+ fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd);
+
+ if (main_cmds.cnt && best_similarity < 6) {
+ fprintf(stderr, "\nDid you mean %s?\n",
+ n < 2 ? "this": "one of these");
+
+ for (i = 0; i < n; i++)
+ fprintf(stderr, "\t%s\n", main_cmds.names[i]->name);
+ }
+end:
+ exit(1);
+}
diff --git a/tools/perf/util/help-unknown-cmd.h b/tools/perf/util/help-unknown-cmd.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/perf/util/help-unknown-cmd.h
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
new file mode 100644
index 000000000..918260b65
--- /dev/null
+++ b/tools/perf/util/hist.c
@@ -0,0 +1,2650 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include "build-id.h"
+#include "hist.h"
+#include "map.h"
+#include "session.h"
+#include "namespaces.h"
+#include "sort.h"
+#include "units.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "annotate.h"
+#include "srcline.h"
+#include "thread.h"
+#include "ui/progress.h"
+#include <errno.h>
+#include <math.h>
+#include <inttypes.h>
+#include <sys/param.h>
+
+static bool hists__filter_entry_by_dso(struct hists *hists,
+ struct hist_entry *he);
+static bool hists__filter_entry_by_thread(struct hists *hists,
+ struct hist_entry *he);
+static bool hists__filter_entry_by_symbol(struct hists *hists,
+ struct hist_entry *he);
+static bool hists__filter_entry_by_socket(struct hists *hists,
+ struct hist_entry *he);
+
+u16 hists__col_len(struct hists *hists, enum hist_column col)
+{
+ return hists->col_len[col];
+}
+
+void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len)
+{
+ hists->col_len[col] = len;
+}
+
+bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len)
+{
+ if (len > hists__col_len(hists, col)) {
+ hists__set_col_len(hists, col, len);
+ return true;
+ }
+ return false;
+}
+
+void hists__reset_col_len(struct hists *hists)
+{
+ enum hist_column col;
+
+ for (col = 0; col < HISTC_NR_COLS; ++col)
+ hists__set_col_len(hists, col, 0);
+}
+
+static void hists__set_unres_dso_col_len(struct hists *hists, int dso)
+{
+ const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
+
+ if (hists__col_len(hists, dso) < unresolved_col_width &&
+ !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
+ !symbol_conf.dso_list)
+ hists__set_col_len(hists, dso, unresolved_col_width);
+}
+
+void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
+{
+ const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
+ int symlen;
+ u16 len;
+
+ /*
+ * +4 accounts for '[x] ' priv level info
+ * +2 accounts for 0x prefix on raw addresses
+ * +3 accounts for ' y ' symtab origin info
+ */
+ if (h->ms.sym) {
+ symlen = h->ms.sym->namelen + 4;
+ if (verbose > 0)
+ symlen += BITS_PER_LONG / 4 + 2 + 3;
+ hists__new_col_len(hists, HISTC_SYMBOL, symlen);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_SYMBOL, symlen);
+ hists__set_unres_dso_col_len(hists, HISTC_DSO);
+ }
+
+ len = thread__comm_len(h->thread);
+ if (hists__new_col_len(hists, HISTC_COMM, len))
+ hists__set_col_len(hists, HISTC_THREAD, len + 8);
+
+ if (h->ms.map) {
+ len = dso__name_len(h->ms.map->dso);
+ hists__new_col_len(hists, HISTC_DSO, len);
+ }
+
+ if (h->parent)
+ hists__new_col_len(hists, HISTC_PARENT, h->parent->namelen);
+
+ if (h->branch_info) {
+ if (h->branch_info->from.sym) {
+ symlen = (int)h->branch_info->from.sym->namelen + 4;
+ if (verbose > 0)
+ symlen += BITS_PER_LONG / 4 + 2 + 3;
+ hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+
+ symlen = dso__name_len(h->branch_info->from.map->dso);
+ hists__new_col_len(hists, HISTC_DSO_FROM, symlen);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+ hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM);
+ }
+
+ if (h->branch_info->to.sym) {
+ symlen = (int)h->branch_info->to.sym->namelen + 4;
+ if (verbose > 0)
+ symlen += BITS_PER_LONG / 4 + 2 + 3;
+ hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+
+ symlen = dso__name_len(h->branch_info->to.map->dso);
+ hists__new_col_len(hists, HISTC_DSO_TO, symlen);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+ hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
+ }
+
+ if (h->branch_info->srcline_from)
+ hists__new_col_len(hists, HISTC_SRCLINE_FROM,
+ strlen(h->branch_info->srcline_from));
+ if (h->branch_info->srcline_to)
+ hists__new_col_len(hists, HISTC_SRCLINE_TO,
+ strlen(h->branch_info->srcline_to));
+ }
+
+ if (h->mem_info) {
+ if (h->mem_info->daddr.sym) {
+ symlen = (int)h->mem_info->daddr.sym->namelen + 4
+ + unresolved_col_width + 2;
+ hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
+ symlen);
+ hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+ symlen + 1);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL,
+ symlen);
+ hists__new_col_len(hists, HISTC_MEM_DCACHELINE,
+ symlen);
+ }
+
+ if (h->mem_info->iaddr.sym) {
+ symlen = (int)h->mem_info->iaddr.sym->namelen + 4
+ + unresolved_col_width + 2;
+ hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL,
+ symlen);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL,
+ symlen);
+ }
+
+ if (h->mem_info->daddr.map) {
+ symlen = dso__name_len(h->mem_info->daddr.map->dso);
+ hists__new_col_len(hists, HISTC_MEM_DADDR_DSO,
+ symlen);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
+ }
+
+ hists__new_col_len(hists, HISTC_MEM_PHYS_DADDR,
+ unresolved_col_width + 4 + 2);
+
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_MEM_DADDR_SYMBOL, symlen);
+ hists__new_col_len(hists, HISTC_MEM_IADDR_SYMBOL, symlen);
+ hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
+ }
+
+ hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
+ hists__new_col_len(hists, HISTC_CPU, 3);
+ hists__new_col_len(hists, HISTC_SOCKET, 6);
+ hists__new_col_len(hists, HISTC_MEM_LOCKED, 6);
+ hists__new_col_len(hists, HISTC_MEM_TLB, 22);
+ hists__new_col_len(hists, HISTC_MEM_SNOOP, 12);
+ hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3);
+ hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12);
+ hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12);
+
+ if (h->srcline) {
+ len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header));
+ hists__new_col_len(hists, HISTC_SRCLINE, len);
+ }
+
+ if (h->srcfile)
+ hists__new_col_len(hists, HISTC_SRCFILE, strlen(h->srcfile));
+
+ if (h->transaction)
+ hists__new_col_len(hists, HISTC_TRANSACTION,
+ hist_entry__transaction_len());
+
+ if (h->trace_output)
+ hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output));
+}
+
+void hists__output_recalc_col_len(struct hists *hists, int max_rows)
+{
+ struct rb_node *next = rb_first(&hists->entries);
+ struct hist_entry *n;
+ int row = 0;
+
+ hists__reset_col_len(hists);
+
+ while (next && row++ < max_rows) {
+ n = rb_entry(next, struct hist_entry, rb_node);
+ if (!n->filtered)
+ hists__calc_col_len(hists, n);
+ next = rb_next(&n->rb_node);
+ }
+}
+
+static void he_stat__add_cpumode_period(struct he_stat *he_stat,
+ unsigned int cpumode, u64 period)
+{
+ switch (cpumode) {
+ case PERF_RECORD_MISC_KERNEL:
+ he_stat->period_sys += period;
+ break;
+ case PERF_RECORD_MISC_USER:
+ he_stat->period_us += period;
+ break;
+ case PERF_RECORD_MISC_GUEST_KERNEL:
+ he_stat->period_guest_sys += period;
+ break;
+ case PERF_RECORD_MISC_GUEST_USER:
+ he_stat->period_guest_us += period;
+ break;
+ default:
+ break;
+ }
+}
+
+static void he_stat__add_period(struct he_stat *he_stat, u64 period,
+ u64 weight)
+{
+
+ he_stat->period += period;
+ he_stat->weight += weight;
+ he_stat->nr_events += 1;
+}
+
+static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
+{
+ dest->period += src->period;
+ dest->period_sys += src->period_sys;
+ dest->period_us += src->period_us;
+ dest->period_guest_sys += src->period_guest_sys;
+ dest->period_guest_us += src->period_guest_us;
+ dest->nr_events += src->nr_events;
+ dest->weight += src->weight;
+}
+
+static void he_stat__decay(struct he_stat *he_stat)
+{
+ he_stat->period = (he_stat->period * 7) / 8;
+ he_stat->nr_events = (he_stat->nr_events * 7) / 8;
+ /* XXX need decay for weight too? */
+}
+
+static void hists__delete_entry(struct hists *hists, struct hist_entry *he);
+
+static bool hists__decay_entry(struct hists *hists, struct hist_entry *he)
+{
+ u64 prev_period = he->stat.period;
+ u64 diff;
+
+ if (prev_period == 0)
+ return true;
+
+ he_stat__decay(&he->stat);
+ if (symbol_conf.cumulate_callchain)
+ he_stat__decay(he->stat_acc);
+ decay_callchain(he->callchain);
+
+ diff = prev_period - he->stat.period;
+
+ if (!he->depth) {
+ hists->stats.total_period -= diff;
+ if (!he->filtered)
+ hists->stats.total_non_filtered_period -= diff;
+ }
+
+ if (!he->leaf) {
+ struct hist_entry *child;
+ struct rb_node *node = rb_first(&he->hroot_out);
+ while (node) {
+ child = rb_entry(node, struct hist_entry, rb_node);
+ node = rb_next(node);
+
+ if (hists__decay_entry(hists, child))
+ hists__delete_entry(hists, child);
+ }
+ }
+
+ return he->stat.period == 0;
+}
+
+static void hists__delete_entry(struct hists *hists, struct hist_entry *he)
+{
+ struct rb_root *root_in;
+ struct rb_root *root_out;
+
+ if (he->parent_he) {
+ root_in = &he->parent_he->hroot_in;
+ root_out = &he->parent_he->hroot_out;
+ } else {
+ if (hists__has(hists, need_collapse))
+ root_in = &hists->entries_collapsed;
+ else
+ root_in = hists->entries_in;
+ root_out = &hists->entries;
+ }
+
+ rb_erase(&he->rb_node_in, root_in);
+ rb_erase(&he->rb_node, root_out);
+
+ --hists->nr_entries;
+ if (!he->filtered)
+ --hists->nr_non_filtered_entries;
+
+ hist_entry__delete(he);
+}
+
+void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel)
+{
+ struct rb_node *next = rb_first(&hists->entries);
+ struct hist_entry *n;
+
+ while (next) {
+ n = rb_entry(next, struct hist_entry, rb_node);
+ next = rb_next(&n->rb_node);
+ if (((zap_user && n->level == '.') ||
+ (zap_kernel && n->level != '.') ||
+ hists__decay_entry(hists, n))) {
+ hists__delete_entry(hists, n);
+ }
+ }
+}
+
+void hists__delete_entries(struct hists *hists)
+{
+ struct rb_node *next = rb_first(&hists->entries);
+ struct hist_entry *n;
+
+ while (next) {
+ n = rb_entry(next, struct hist_entry, rb_node);
+ next = rb_next(&n->rb_node);
+
+ hists__delete_entry(hists, n);
+ }
+}
+
+/*
+ * histogram, sorted on item, collects periods
+ */
+
+static int hist_entry__init(struct hist_entry *he,
+ struct hist_entry *template,
+ bool sample_self,
+ size_t callchain_size)
+{
+ *he = *template;
+ he->callchain_size = callchain_size;
+
+ if (symbol_conf.cumulate_callchain) {
+ he->stat_acc = malloc(sizeof(he->stat));
+ if (he->stat_acc == NULL)
+ return -ENOMEM;
+ memcpy(he->stat_acc, &he->stat, sizeof(he->stat));
+ if (!sample_self)
+ memset(&he->stat, 0, sizeof(he->stat));
+ }
+
+ map__get(he->ms.map);
+
+ if (he->branch_info) {
+ /*
+ * This branch info is (a part of) allocated from
+ * sample__resolve_bstack() and will be freed after
+ * adding new entries. So we need to save a copy.
+ */
+ he->branch_info = malloc(sizeof(*he->branch_info));
+ if (he->branch_info == NULL) {
+ map__zput(he->ms.map);
+ free(he->stat_acc);
+ return -ENOMEM;
+ }
+
+ memcpy(he->branch_info, template->branch_info,
+ sizeof(*he->branch_info));
+
+ map__get(he->branch_info->from.map);
+ map__get(he->branch_info->to.map);
+ }
+
+ if (he->mem_info) {
+ map__get(he->mem_info->iaddr.map);
+ map__get(he->mem_info->daddr.map);
+ }
+
+ if (hist_entry__has_callchains(he) && symbol_conf.use_callchain)
+ callchain_init(he->callchain);
+
+ if (he->raw_data) {
+ he->raw_data = memdup(he->raw_data, he->raw_size);
+
+ if (he->raw_data == NULL) {
+ map__put(he->ms.map);
+ if (he->branch_info) {
+ map__put(he->branch_info->from.map);
+ map__put(he->branch_info->to.map);
+ free(he->branch_info);
+ }
+ if (he->mem_info) {
+ map__put(he->mem_info->iaddr.map);
+ map__put(he->mem_info->daddr.map);
+ }
+ free(he->stat_acc);
+ return -ENOMEM;
+ }
+ }
+ INIT_LIST_HEAD(&he->pairs.node);
+ thread__get(he->thread);
+ he->hroot_in = RB_ROOT;
+ he->hroot_out = RB_ROOT;
+
+ if (!symbol_conf.report_hierarchy)
+ he->leaf = true;
+
+ return 0;
+}
+
+static void *hist_entry__zalloc(size_t size)
+{
+ return zalloc(size + sizeof(struct hist_entry));
+}
+
+static void hist_entry__free(void *ptr)
+{
+ free(ptr);
+}
+
+static struct hist_entry_ops default_ops = {
+ .new = hist_entry__zalloc,
+ .free = hist_entry__free,
+};
+
+static struct hist_entry *hist_entry__new(struct hist_entry *template,
+ bool sample_self)
+{
+ struct hist_entry_ops *ops = template->ops;
+ size_t callchain_size = 0;
+ struct hist_entry *he;
+ int err = 0;
+
+ if (!ops)
+ ops = template->ops = &default_ops;
+
+ if (symbol_conf.use_callchain)
+ callchain_size = sizeof(struct callchain_root);
+
+ he = ops->new(callchain_size);
+ if (he) {
+ err = hist_entry__init(he, template, sample_self, callchain_size);
+ if (err) {
+ ops->free(he);
+ he = NULL;
+ }
+ }
+
+ return he;
+}
+
+static u8 symbol__parent_filter(const struct symbol *parent)
+{
+ if (symbol_conf.exclude_other && parent == NULL)
+ return 1 << HIST_FILTER__PARENT;
+ return 0;
+}
+
+static void hist_entry__add_callchain_period(struct hist_entry *he, u64 period)
+{
+ if (!hist_entry__has_callchains(he) || !symbol_conf.use_callchain)
+ return;
+
+ he->hists->callchain_period += period;
+ if (!he->filtered)
+ he->hists->callchain_non_filtered_period += period;
+}
+
+static struct hist_entry *hists__findnew_entry(struct hists *hists,
+ struct hist_entry *entry,
+ struct addr_location *al,
+ bool sample_self)
+{
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ struct hist_entry *he;
+ int64_t cmp;
+ u64 period = entry->stat.period;
+ u64 weight = entry->stat.weight;
+
+ p = &hists->entries_in->rb_node;
+
+ while (*p != NULL) {
+ parent = *p;
+ he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+ /*
+ * Make sure that it receives arguments in a same order as
+ * hist_entry__collapse() so that we can use an appropriate
+ * function when searching an entry regardless which sort
+ * keys were used.
+ */
+ cmp = hist_entry__cmp(he, entry);
+
+ if (!cmp) {
+ if (sample_self) {
+ he_stat__add_period(&he->stat, period, weight);
+ hist_entry__add_callchain_period(he, period);
+ }
+ if (symbol_conf.cumulate_callchain)
+ he_stat__add_period(he->stat_acc, period, weight);
+
+ /*
+ * This mem info was allocated from sample__resolve_mem
+ * and will not be used anymore.
+ */
+ mem_info__zput(entry->mem_info);
+
+ /* If the map of an existing hist_entry has
+ * become out-of-date due to an exec() or
+ * similar, update it. Otherwise we will
+ * mis-adjust symbol addresses when computing
+ * the history counter to increment.
+ */
+ if (he->ms.map != entry->ms.map) {
+ map__put(he->ms.map);
+ he->ms.map = map__get(entry->ms.map);
+ }
+ goto out;
+ }
+
+ if (cmp < 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ he = hist_entry__new(entry, sample_self);
+ if (!he)
+ return NULL;
+
+ if (sample_self)
+ hist_entry__add_callchain_period(he, period);
+ hists->nr_entries++;
+
+ rb_link_node(&he->rb_node_in, parent, p);
+ rb_insert_color(&he->rb_node_in, hists->entries_in);
+out:
+ if (sample_self)
+ he_stat__add_cpumode_period(&he->stat, al->cpumode, period);
+ if (symbol_conf.cumulate_callchain)
+ he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period);
+ return he;
+}
+
+static struct hist_entry*
+__hists__add_entry(struct hists *hists,
+ struct addr_location *al,
+ struct symbol *sym_parent,
+ struct branch_info *bi,
+ struct mem_info *mi,
+ struct perf_sample *sample,
+ bool sample_self,
+ struct hist_entry_ops *ops)
+{
+ struct namespaces *ns = thread__namespaces(al->thread);
+ struct hist_entry entry = {
+ .thread = al->thread,
+ .comm = thread__comm(al->thread),
+ .cgroup_id = {
+ .dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0,
+ .ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0,
+ },
+ .ms = {
+ .map = al->map,
+ .sym = al->sym,
+ },
+ .srcline = al->srcline ? strdup(al->srcline) : NULL,
+ .socket = al->socket,
+ .cpu = al->cpu,
+ .cpumode = al->cpumode,
+ .ip = al->addr,
+ .level = al->level,
+ .stat = {
+ .nr_events = 1,
+ .period = sample->period,
+ .weight = sample->weight,
+ },
+ .parent = sym_parent,
+ .filtered = symbol__parent_filter(sym_parent) | al->filtered,
+ .hists = hists,
+ .branch_info = bi,
+ .mem_info = mi,
+ .transaction = sample->transaction,
+ .raw_data = sample->raw_data,
+ .raw_size = sample->raw_size,
+ .ops = ops,
+ }, *he = hists__findnew_entry(hists, &entry, al, sample_self);
+
+ if (!hists->has_callchains && he && he->callchain_size != 0)
+ hists->has_callchains = true;
+ return he;
+}
+
+struct hist_entry *hists__add_entry(struct hists *hists,
+ struct addr_location *al,
+ struct symbol *sym_parent,
+ struct branch_info *bi,
+ struct mem_info *mi,
+ struct perf_sample *sample,
+ bool sample_self)
+{
+ return __hists__add_entry(hists, al, sym_parent, bi, mi,
+ sample, sample_self, NULL);
+}
+
+struct hist_entry *hists__add_entry_ops(struct hists *hists,
+ struct hist_entry_ops *ops,
+ struct addr_location *al,
+ struct symbol *sym_parent,
+ struct branch_info *bi,
+ struct mem_info *mi,
+ struct perf_sample *sample,
+ bool sample_self)
+{
+ return __hists__add_entry(hists, al, sym_parent, bi, mi,
+ sample, sample_self, ops);
+}
+
+static int
+iter_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+ struct addr_location *al __maybe_unused)
+{
+ return 0;
+}
+
+static int
+iter_add_next_nop_entry(struct hist_entry_iter *iter __maybe_unused,
+ struct addr_location *al __maybe_unused)
+{
+ return 0;
+}
+
+static int
+iter_prepare_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+ struct perf_sample *sample = iter->sample;
+ struct mem_info *mi;
+
+ mi = sample__resolve_mem(sample, al);
+ if (mi == NULL)
+ return -ENOMEM;
+
+ iter->priv = mi;
+ return 0;
+}
+
+static int
+iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+ u64 cost;
+ struct mem_info *mi = iter->priv;
+ struct hists *hists = evsel__hists(iter->evsel);
+ struct perf_sample *sample = iter->sample;
+ struct hist_entry *he;
+
+ if (mi == NULL)
+ return -EINVAL;
+
+ cost = sample->weight;
+ if (!cost)
+ cost = 1;
+
+ /*
+ * must pass period=weight in order to get the correct
+ * sorting from hists__collapse_resort() which is solely
+ * based on periods. We want sorting be done on nr_events * weight
+ * and this is indirectly achieved by passing period=weight here
+ * and the he_stat__add_period() function.
+ */
+ sample->period = cost;
+
+ he = hists__add_entry(hists, al, iter->parent, NULL, mi,
+ sample, true);
+ if (!he)
+ return -ENOMEM;
+
+ iter->he = he;
+ return 0;
+}
+
+static int
+iter_finish_mem_entry(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused)
+{
+ struct perf_evsel *evsel = iter->evsel;
+ struct hists *hists = evsel__hists(evsel);
+ struct hist_entry *he = iter->he;
+ int err = -EINVAL;
+
+ if (he == NULL)
+ goto out;
+
+ hists__inc_nr_samples(hists, he->filtered);
+
+ err = hist_entry__append_callchain(he, iter->sample);
+
+out:
+ /*
+ * We don't need to free iter->priv (mem_info) here since the mem info
+ * was either already freed in hists__findnew_entry() or passed to a
+ * new hist entry by hist_entry__new().
+ */
+ iter->priv = NULL;
+
+ iter->he = NULL;
+ return err;
+}
+
+static int
+iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+ struct branch_info *bi;
+ struct perf_sample *sample = iter->sample;
+
+ bi = sample__resolve_bstack(sample, al);
+ if (!bi)
+ return -ENOMEM;
+
+ iter->curr = 0;
+ iter->total = sample->branch_stack->nr;
+
+ iter->priv = bi;
+ return 0;
+}
+
+static int
+iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
+ struct addr_location *al __maybe_unused)
+{
+ return 0;
+}
+
+static int
+iter_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+ struct branch_info *bi = iter->priv;
+ int i = iter->curr;
+
+ if (bi == NULL)
+ return 0;
+
+ if (iter->curr >= iter->total)
+ return 0;
+
+ al->map = bi[i].to.map;
+ al->sym = bi[i].to.sym;
+ al->addr = bi[i].to.addr;
+ return 1;
+}
+
+static int
+iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+ struct branch_info *bi;
+ struct perf_evsel *evsel = iter->evsel;
+ struct hists *hists = evsel__hists(evsel);
+ struct perf_sample *sample = iter->sample;
+ struct hist_entry *he = NULL;
+ int i = iter->curr;
+ int err = 0;
+
+ bi = iter->priv;
+
+ if (iter->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
+ goto out;
+
+ /*
+ * The report shows the percentage of total branches captured
+ * and not events sampled. Thus we use a pseudo period of 1.
+ */
+ sample->period = 1;
+ sample->weight = bi->flags.cycles ? bi->flags.cycles : 1;
+
+ he = hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
+ sample, true);
+ if (he == NULL)
+ return -ENOMEM;
+
+ hists__inc_nr_samples(hists, he->filtered);
+
+out:
+ iter->he = he;
+ iter->curr++;
+ return err;
+}
+
+static int
+iter_finish_branch_entry(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused)
+{
+ zfree(&iter->priv);
+ iter->he = NULL;
+
+ return iter->curr >= iter->total ? 0 : -1;
+}
+
+static int
+iter_prepare_normal_entry(struct hist_entry_iter *iter __maybe_unused,
+ struct addr_location *al __maybe_unused)
+{
+ return 0;
+}
+
+static int
+iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location *al)
+{
+ struct perf_evsel *evsel = iter->evsel;
+ struct perf_sample *sample = iter->sample;
+ struct hist_entry *he;
+
+ he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+ sample, true);
+ if (he == NULL)
+ return -ENOMEM;
+
+ iter->he = he;
+ return 0;
+}
+
+static int
+iter_finish_normal_entry(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused)
+{
+ struct hist_entry *he = iter->he;
+ struct perf_evsel *evsel = iter->evsel;
+ struct perf_sample *sample = iter->sample;
+
+ if (he == NULL)
+ return 0;
+
+ iter->he = NULL;
+
+ hists__inc_nr_samples(evsel__hists(evsel), he->filtered);
+
+ return hist_entry__append_callchain(he, sample);
+}
+
+static int
+iter_prepare_cumulative_entry(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused)
+{
+ struct hist_entry **he_cache;
+
+ callchain_cursor_commit(&callchain_cursor);
+
+ /*
+ * This is for detecting cycles or recursions so that they're
+ * cumulated only one time to prevent entries more than 100%
+ * overhead.
+ */
+ he_cache = malloc(sizeof(*he_cache) * (callchain_cursor.nr + 1));
+ if (he_cache == NULL)
+ return -ENOMEM;
+
+ iter->priv = he_cache;
+ iter->curr = 0;
+
+ return 0;
+}
+
+static int
+iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
+ struct addr_location *al)
+{
+ struct perf_evsel *evsel = iter->evsel;
+ struct hists *hists = evsel__hists(evsel);
+ struct perf_sample *sample = iter->sample;
+ struct hist_entry **he_cache = iter->priv;
+ struct hist_entry *he;
+ int err = 0;
+
+ he = hists__add_entry(hists, al, iter->parent, NULL, NULL,
+ sample, true);
+ if (he == NULL)
+ return -ENOMEM;
+
+ iter->he = he;
+ he_cache[iter->curr++] = he;
+
+ hist_entry__append_callchain(he, sample);
+
+ /*
+ * We need to re-initialize the cursor since callchain_append()
+ * advanced the cursor to the end.
+ */
+ callchain_cursor_commit(&callchain_cursor);
+
+ hists__inc_nr_samples(hists, he->filtered);
+
+ return err;
+}
+
+static int
+iter_next_cumulative_entry(struct hist_entry_iter *iter,
+ struct addr_location *al)
+{
+ struct callchain_cursor_node *node;
+
+ node = callchain_cursor_current(&callchain_cursor);
+ if (node == NULL)
+ return 0;
+
+ return fill_callchain_info(al, node, iter->hide_unresolved);
+}
+
+static int
+iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
+ struct addr_location *al)
+{
+ struct perf_evsel *evsel = iter->evsel;
+ struct perf_sample *sample = iter->sample;
+ struct hist_entry **he_cache = iter->priv;
+ struct hist_entry *he;
+ struct hist_entry he_tmp = {
+ .hists = evsel__hists(evsel),
+ .cpu = al->cpu,
+ .thread = al->thread,
+ .comm = thread__comm(al->thread),
+ .ip = al->addr,
+ .ms = {
+ .map = al->map,
+ .sym = al->sym,
+ },
+ .srcline = al->srcline ? strdup(al->srcline) : NULL,
+ .parent = iter->parent,
+ .raw_data = sample->raw_data,
+ .raw_size = sample->raw_size,
+ };
+ int i;
+ struct callchain_cursor cursor;
+
+ callchain_cursor_snapshot(&cursor, &callchain_cursor);
+
+ callchain_cursor_advance(&callchain_cursor);
+
+ /*
+ * Check if there's duplicate entries in the callchain.
+ * It's possible that it has cycles or recursive calls.
+ */
+ for (i = 0; i < iter->curr; i++) {
+ if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
+ /* to avoid calling callback function */
+ iter->he = NULL;
+ return 0;
+ }
+ }
+
+ he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+ sample, false);
+ if (he == NULL)
+ return -ENOMEM;
+
+ iter->he = he;
+ he_cache[iter->curr++] = he;
+
+ if (hist_entry__has_callchains(he) && symbol_conf.use_callchain)
+ callchain_append(he->callchain, &cursor, sample->period);
+ return 0;
+}
+
+static int
+iter_finish_cumulative_entry(struct hist_entry_iter *iter,
+ struct addr_location *al __maybe_unused)
+{
+ zfree(&iter->priv);
+ iter->he = NULL;
+
+ return 0;
+}
+
+const struct hist_iter_ops hist_iter_mem = {
+ .prepare_entry = iter_prepare_mem_entry,
+ .add_single_entry = iter_add_single_mem_entry,
+ .next_entry = iter_next_nop_entry,
+ .add_next_entry = iter_add_next_nop_entry,
+ .finish_entry = iter_finish_mem_entry,
+};
+
+const struct hist_iter_ops hist_iter_branch = {
+ .prepare_entry = iter_prepare_branch_entry,
+ .add_single_entry = iter_add_single_branch_entry,
+ .next_entry = iter_next_branch_entry,
+ .add_next_entry = iter_add_next_branch_entry,
+ .finish_entry = iter_finish_branch_entry,
+};
+
+const struct hist_iter_ops hist_iter_normal = {
+ .prepare_entry = iter_prepare_normal_entry,
+ .add_single_entry = iter_add_single_normal_entry,
+ .next_entry = iter_next_nop_entry,
+ .add_next_entry = iter_add_next_nop_entry,
+ .finish_entry = iter_finish_normal_entry,
+};
+
+const struct hist_iter_ops hist_iter_cumulative = {
+ .prepare_entry = iter_prepare_cumulative_entry,
+ .add_single_entry = iter_add_single_cumulative_entry,
+ .next_entry = iter_next_cumulative_entry,
+ .add_next_entry = iter_add_next_cumulative_entry,
+ .finish_entry = iter_finish_cumulative_entry,
+};
+
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+ int max_stack_depth, void *arg)
+{
+ int err, err2;
+ struct map *alm = NULL;
+
+ if (al)
+ alm = map__get(al->map);
+
+ err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent,
+ iter->evsel, al, max_stack_depth);
+ if (err) {
+ map__put(alm);
+ return err;
+ }
+
+ err = iter->ops->prepare_entry(iter, al);
+ if (err)
+ goto out;
+
+ err = iter->ops->add_single_entry(iter, al);
+ if (err)
+ goto out;
+
+ if (iter->he && iter->add_entry_cb) {
+ err = iter->add_entry_cb(iter, al, true, arg);
+ if (err)
+ goto out;
+ }
+
+ while (iter->ops->next_entry(iter, al)) {
+ err = iter->ops->add_next_entry(iter, al);
+ if (err)
+ break;
+
+ if (iter->he && iter->add_entry_cb) {
+ err = iter->add_entry_cb(iter, al, false, arg);
+ if (err)
+ goto out;
+ }
+ }
+
+out:
+ err2 = iter->ops->finish_entry(iter, al);
+ if (!err)
+ err = err2;
+
+ map__put(alm);
+
+ return err;
+}
+
+int64_t
+hist_entry__cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct hists *hists = left->hists;
+ struct perf_hpp_fmt *fmt;
+ int64_t cmp = 0;
+
+ hists__for_each_sort_list(hists, fmt) {
+ if (perf_hpp__is_dynamic_entry(fmt) &&
+ !perf_hpp__defined_dynamic_entry(fmt, hists))
+ continue;
+
+ cmp = fmt->cmp(fmt, left, right);
+ if (cmp)
+ break;
+ }
+
+ return cmp;
+}
+
+int64_t
+hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
+{
+ struct hists *hists = left->hists;
+ struct perf_hpp_fmt *fmt;
+ int64_t cmp = 0;
+
+ hists__for_each_sort_list(hists, fmt) {
+ if (perf_hpp__is_dynamic_entry(fmt) &&
+ !perf_hpp__defined_dynamic_entry(fmt, hists))
+ continue;
+
+ cmp = fmt->collapse(fmt, left, right);
+ if (cmp)
+ break;
+ }
+
+ return cmp;
+}
+
+void hist_entry__delete(struct hist_entry *he)
+{
+ struct hist_entry_ops *ops = he->ops;
+
+ thread__zput(he->thread);
+ map__zput(he->ms.map);
+
+ if (he->branch_info) {
+ map__zput(he->branch_info->from.map);
+ map__zput(he->branch_info->to.map);
+ free_srcline(he->branch_info->srcline_from);
+ free_srcline(he->branch_info->srcline_to);
+ zfree(&he->branch_info);
+ }
+
+ if (he->mem_info) {
+ map__zput(he->mem_info->iaddr.map);
+ map__zput(he->mem_info->daddr.map);
+ mem_info__zput(he->mem_info);
+ }
+
+ zfree(&he->stat_acc);
+ free_srcline(he->srcline);
+ if (he->srcfile && he->srcfile[0])
+ free(he->srcfile);
+ free_callchain(he->callchain);
+ free(he->trace_output);
+ free(he->raw_data);
+ ops->free(he);
+}
+
+/*
+ * If this is not the last column, then we need to pad it according to the
+ * pre-calculated max lenght for this column, otherwise don't bother adding
+ * spaces because that would break viewing this with, for instance, 'less',
+ * that would show tons of trailing spaces when a long C++ demangled method
+ * names is sampled.
+*/
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_fmt *fmt, int printed)
+{
+ if (!list_is_last(&fmt->list, &he->hists->hpp_list->fields)) {
+ const int width = fmt->width(fmt, hpp, he->hists);
+ if (printed < width) {
+ advance_hpp(hpp, printed);
+ printed = scnprintf(hpp->buf, hpp->size, "%-*s", width - printed, " ");
+ }
+ }
+
+ return printed;
+}
+
+/*
+ * collapse the histogram
+ */
+
+static void hists__apply_filters(struct hists *hists, struct hist_entry *he);
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *he,
+ enum hist_filter type);
+
+typedef bool (*fmt_chk_fn)(struct perf_hpp_fmt *fmt);
+
+static bool check_thread_entry(struct perf_hpp_fmt *fmt)
+{
+ return perf_hpp__is_thread_entry(fmt) || perf_hpp__is_comm_entry(fmt);
+}
+
+static void hist_entry__check_and_remove_filter(struct hist_entry *he,
+ enum hist_filter type,
+ fmt_chk_fn check)
+{
+ struct perf_hpp_fmt *fmt;
+ bool type_match = false;
+ struct hist_entry *parent = he->parent_he;
+
+ switch (type) {
+ case HIST_FILTER__THREAD:
+ if (symbol_conf.comm_list == NULL &&
+ symbol_conf.pid_list == NULL &&
+ symbol_conf.tid_list == NULL)
+ return;
+ break;
+ case HIST_FILTER__DSO:
+ if (symbol_conf.dso_list == NULL)
+ return;
+ break;
+ case HIST_FILTER__SYMBOL:
+ if (symbol_conf.sym_list == NULL)
+ return;
+ break;
+ case HIST_FILTER__PARENT:
+ case HIST_FILTER__GUEST:
+ case HIST_FILTER__HOST:
+ case HIST_FILTER__SOCKET:
+ case HIST_FILTER__C2C:
+ default:
+ return;
+ }
+
+ /* if it's filtered by own fmt, it has to have filter bits */
+ perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+ if (check(fmt)) {
+ type_match = true;
+ break;
+ }
+ }
+
+ if (type_match) {
+ /*
+ * If the filter is for current level entry, propagate
+ * filter marker to parents. The marker bit was
+ * already set by default so it only needs to clear
+ * non-filtered entries.
+ */
+ if (!(he->filtered & (1 << type))) {
+ while (parent) {
+ parent->filtered &= ~(1 << type);
+ parent = parent->parent_he;
+ }
+ }
+ } else {
+ /*
+ * If current entry doesn't have matching formats, set
+ * filter marker for upper level entries. it will be
+ * cleared if its lower level entries is not filtered.
+ *
+ * For lower-level entries, it inherits parent's
+ * filter bit so that lower level entries of a
+ * non-filtered entry won't set the filter marker.
+ */
+ if (parent == NULL)
+ he->filtered |= (1 << type);
+ else
+ he->filtered |= (parent->filtered & (1 << type));
+ }
+}
+
+static void hist_entry__apply_hierarchy_filters(struct hist_entry *he)
+{
+ hist_entry__check_and_remove_filter(he, HIST_FILTER__THREAD,
+ check_thread_entry);
+
+ hist_entry__check_and_remove_filter(he, HIST_FILTER__DSO,
+ perf_hpp__is_dso_entry);
+
+ hist_entry__check_and_remove_filter(he, HIST_FILTER__SYMBOL,
+ perf_hpp__is_sym_entry);
+
+ hists__apply_filters(he->hists, he);
+}
+
+static struct hist_entry *hierarchy_insert_entry(struct hists *hists,
+ struct rb_root *root,
+ struct hist_entry *he,
+ struct hist_entry *parent_he,
+ struct perf_hpp_list *hpp_list)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct hist_entry *iter, *new;
+ struct perf_hpp_fmt *fmt;
+ int64_t cmp;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct hist_entry, rb_node_in);
+
+ cmp = 0;
+ perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+ cmp = fmt->collapse(fmt, iter, he);
+ if (cmp)
+ break;
+ }
+
+ if (!cmp) {
+ he_stat__add_stat(&iter->stat, &he->stat);
+ return iter;
+ }
+
+ if (cmp < 0)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+
+ new = hist_entry__new(he, true);
+ if (new == NULL)
+ return NULL;
+
+ hists->nr_entries++;
+
+ /* save related format list for output */
+ new->hpp_list = hpp_list;
+ new->parent_he = parent_he;
+
+ hist_entry__apply_hierarchy_filters(new);
+
+ /* some fields are now passed to 'new' */
+ perf_hpp_list__for_each_sort_list(hpp_list, fmt) {
+ if (perf_hpp__is_trace_entry(fmt) || perf_hpp__is_dynamic_entry(fmt))
+ he->trace_output = NULL;
+ else
+ new->trace_output = NULL;
+
+ if (perf_hpp__is_srcline_entry(fmt))
+ he->srcline = NULL;
+ else
+ new->srcline = NULL;
+
+ if (perf_hpp__is_srcfile_entry(fmt))
+ he->srcfile = NULL;
+ else
+ new->srcfile = NULL;
+ }
+
+ rb_link_node(&new->rb_node_in, parent, p);
+ rb_insert_color(&new->rb_node_in, root);
+ return new;
+}
+
+static int hists__hierarchy_insert_entry(struct hists *hists,
+ struct rb_root *root,
+ struct hist_entry *he)
+{
+ struct perf_hpp_list_node *node;
+ struct hist_entry *new_he = NULL;
+ struct hist_entry *parent = NULL;
+ int depth = 0;
+ int ret = 0;
+
+ list_for_each_entry(node, &hists->hpp_formats, list) {
+ /* skip period (overhead) and elided columns */
+ if (node->level == 0 || node->skip)
+ continue;
+
+ /* insert copy of 'he' for each fmt into the hierarchy */
+ new_he = hierarchy_insert_entry(hists, root, he, parent, &node->hpp);
+ if (new_he == NULL) {
+ ret = -1;
+ break;
+ }
+
+ root = &new_he->hroot_in;
+ new_he->depth = depth++;
+ parent = new_he;
+ }
+
+ if (new_he) {
+ new_he->leaf = true;
+
+ if (hist_entry__has_callchains(new_he) &&
+ symbol_conf.use_callchain) {
+ callchain_cursor_reset(&callchain_cursor);
+ if (callchain_merge(&callchain_cursor,
+ new_he->callchain,
+ he->callchain) < 0)
+ ret = -1;
+ }
+ }
+
+ /* 'he' is no longer used */
+ hist_entry__delete(he);
+
+ /* return 0 (or -1) since it already applied filters */
+ return ret;
+}
+
+static int hists__collapse_insert_entry(struct hists *hists,
+ struct rb_root *root,
+ struct hist_entry *he)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct hist_entry *iter;
+ int64_t cmp;
+
+ if (symbol_conf.report_hierarchy)
+ return hists__hierarchy_insert_entry(hists, root, he);
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct hist_entry, rb_node_in);
+
+ cmp = hist_entry__collapse(iter, he);
+
+ if (!cmp) {
+ int ret = 0;
+
+ he_stat__add_stat(&iter->stat, &he->stat);
+ if (symbol_conf.cumulate_callchain)
+ he_stat__add_stat(iter->stat_acc, he->stat_acc);
+
+ if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) {
+ callchain_cursor_reset(&callchain_cursor);
+ if (callchain_merge(&callchain_cursor,
+ iter->callchain,
+ he->callchain) < 0)
+ ret = -1;
+ }
+ hist_entry__delete(he);
+ return ret;
+ }
+
+ if (cmp < 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ hists->nr_entries++;
+
+ rb_link_node(&he->rb_node_in, parent, p);
+ rb_insert_color(&he->rb_node_in, root);
+ return 1;
+}
+
+struct rb_root *hists__get_rotate_entries_in(struct hists *hists)
+{
+ struct rb_root *root;
+
+ pthread_mutex_lock(&hists->lock);
+
+ root = hists->entries_in;
+ if (++hists->entries_in > &hists->entries_in_array[1])
+ hists->entries_in = &hists->entries_in_array[0];
+
+ pthread_mutex_unlock(&hists->lock);
+
+ return root;
+}
+
+static void hists__apply_filters(struct hists *hists, struct hist_entry *he)
+{
+ hists__filter_entry_by_dso(hists, he);
+ hists__filter_entry_by_thread(hists, he);
+ hists__filter_entry_by_symbol(hists, he);
+ hists__filter_entry_by_socket(hists, he);
+}
+
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog)
+{
+ struct rb_root *root;
+ struct rb_node *next;
+ struct hist_entry *n;
+ int ret;
+
+ if (!hists__has(hists, need_collapse))
+ return 0;
+
+ hists->nr_entries = 0;
+
+ root = hists__get_rotate_entries_in(hists);
+
+ next = rb_first(root);
+
+ while (next) {
+ if (session_done())
+ break;
+ n = rb_entry(next, struct hist_entry, rb_node_in);
+ next = rb_next(&n->rb_node_in);
+
+ rb_erase(&n->rb_node_in, root);
+ ret = hists__collapse_insert_entry(hists, &hists->entries_collapsed, n);
+ if (ret < 0)
+ return -1;
+
+ if (ret) {
+ /*
+ * If it wasn't combined with one of the entries already
+ * collapsed, we need to apply the filters that may have
+ * been set by, say, the hist_browser.
+ */
+ hists__apply_filters(hists, n);
+ }
+ if (prog)
+ ui_progress__update(prog, 1);
+ }
+ return 0;
+}
+
+static int64_t hist_entry__sort(struct hist_entry *a, struct hist_entry *b)
+{
+ struct hists *hists = a->hists;
+ struct perf_hpp_fmt *fmt;
+ int64_t cmp = 0;
+
+ hists__for_each_sort_list(hists, fmt) {
+ if (perf_hpp__should_skip(fmt, a->hists))
+ continue;
+
+ cmp = fmt->sort(fmt, a, b);
+ if (cmp)
+ break;
+ }
+
+ return cmp;
+}
+
+static void hists__reset_filter_stats(struct hists *hists)
+{
+ hists->nr_non_filtered_entries = 0;
+ hists->stats.total_non_filtered_period = 0;
+}
+
+void hists__reset_stats(struct hists *hists)
+{
+ hists->nr_entries = 0;
+ hists->stats.total_period = 0;
+
+ hists__reset_filter_stats(hists);
+}
+
+static void hists__inc_filter_stats(struct hists *hists, struct hist_entry *h)
+{
+ hists->nr_non_filtered_entries++;
+ hists->stats.total_non_filtered_period += h->stat.period;
+}
+
+void hists__inc_stats(struct hists *hists, struct hist_entry *h)
+{
+ if (!h->filtered)
+ hists__inc_filter_stats(hists, h);
+
+ hists->nr_entries++;
+ hists->stats.total_period += h->stat.period;
+}
+
+static void hierarchy_recalc_total_periods(struct hists *hists)
+{
+ struct rb_node *node;
+ struct hist_entry *he;
+
+ node = rb_first(&hists->entries);
+
+ hists->stats.total_period = 0;
+ hists->stats.total_non_filtered_period = 0;
+
+ /*
+ * recalculate total period using top-level entries only
+ * since lower level entries only see non-filtered entries
+ * but upper level entries have sum of both entries.
+ */
+ while (node) {
+ he = rb_entry(node, struct hist_entry, rb_node);
+ node = rb_next(node);
+
+ hists->stats.total_period += he->stat.period;
+ if (!he->filtered)
+ hists->stats.total_non_filtered_period += he->stat.period;
+ }
+}
+
+static void hierarchy_insert_output_entry(struct rb_root *root,
+ struct hist_entry *he)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct hist_entry *iter;
+ struct perf_hpp_fmt *fmt;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct hist_entry, rb_node);
+
+ if (hist_entry__sort(he, iter) > 0)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+
+ rb_link_node(&he->rb_node, parent, p);
+ rb_insert_color(&he->rb_node, root);
+
+ /* update column width of dynamic entry */
+ perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+ if (perf_hpp__is_dynamic_entry(fmt))
+ fmt->sort(fmt, he, NULL);
+ }
+}
+
+static void hists__hierarchy_output_resort(struct hists *hists,
+ struct ui_progress *prog,
+ struct rb_root *root_in,
+ struct rb_root *root_out,
+ u64 min_callchain_hits,
+ bool use_callchain)
+{
+ struct rb_node *node;
+ struct hist_entry *he;
+
+ *root_out = RB_ROOT;
+ node = rb_first(root_in);
+
+ while (node) {
+ he = rb_entry(node, struct hist_entry, rb_node_in);
+ node = rb_next(node);
+
+ hierarchy_insert_output_entry(root_out, he);
+
+ if (prog)
+ ui_progress__update(prog, 1);
+
+ hists->nr_entries++;
+ if (!he->filtered) {
+ hists->nr_non_filtered_entries++;
+ hists__calc_col_len(hists, he);
+ }
+
+ if (!he->leaf) {
+ hists__hierarchy_output_resort(hists, prog,
+ &he->hroot_in,
+ &he->hroot_out,
+ min_callchain_hits,
+ use_callchain);
+ continue;
+ }
+
+ if (!use_callchain)
+ continue;
+
+ if (callchain_param.mode == CHAIN_GRAPH_REL) {
+ u64 total = he->stat.period;
+
+ if (symbol_conf.cumulate_callchain)
+ total = he->stat_acc->period;
+
+ min_callchain_hits = total * (callchain_param.min_percent / 100);
+ }
+
+ callchain_param.sort(&he->sorted_chain, he->callchain,
+ min_callchain_hits, &callchain_param);
+ }
+}
+
+static void __hists__insert_output_entry(struct rb_root *entries,
+ struct hist_entry *he,
+ u64 min_callchain_hits,
+ bool use_callchain)
+{
+ struct rb_node **p = &entries->rb_node;
+ struct rb_node *parent = NULL;
+ struct hist_entry *iter;
+ struct perf_hpp_fmt *fmt;
+
+ if (use_callchain) {
+ if (callchain_param.mode == CHAIN_GRAPH_REL) {
+ u64 total = he->stat.period;
+
+ if (symbol_conf.cumulate_callchain)
+ total = he->stat_acc->period;
+
+ min_callchain_hits = total * (callchain_param.min_percent / 100);
+ }
+ callchain_param.sort(&he->sorted_chain, he->callchain,
+ min_callchain_hits, &callchain_param);
+ }
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct hist_entry, rb_node);
+
+ if (hist_entry__sort(he, iter) > 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&he->rb_node, parent, p);
+ rb_insert_color(&he->rb_node, entries);
+
+ perf_hpp_list__for_each_sort_list(&perf_hpp_list, fmt) {
+ if (perf_hpp__is_dynamic_entry(fmt) &&
+ perf_hpp__defined_dynamic_entry(fmt, he->hists))
+ fmt->sort(fmt, he, NULL); /* update column width */
+ }
+}
+
+static void output_resort(struct hists *hists, struct ui_progress *prog,
+ bool use_callchain, hists__resort_cb_t cb)
+{
+ struct rb_root *root;
+ struct rb_node *next;
+ struct hist_entry *n;
+ u64 callchain_total;
+ u64 min_callchain_hits;
+
+ callchain_total = hists->callchain_period;
+ if (symbol_conf.filter_relative)
+ callchain_total = hists->callchain_non_filtered_period;
+
+ min_callchain_hits = callchain_total * (callchain_param.min_percent / 100);
+
+ hists__reset_stats(hists);
+ hists__reset_col_len(hists);
+
+ if (symbol_conf.report_hierarchy) {
+ hists__hierarchy_output_resort(hists, prog,
+ &hists->entries_collapsed,
+ &hists->entries,
+ min_callchain_hits,
+ use_callchain);
+ hierarchy_recalc_total_periods(hists);
+ return;
+ }
+
+ if (hists__has(hists, need_collapse))
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ next = rb_first(root);
+ hists->entries = RB_ROOT;
+
+ while (next) {
+ n = rb_entry(next, struct hist_entry, rb_node_in);
+ next = rb_next(&n->rb_node_in);
+
+ if (cb && cb(n))
+ continue;
+
+ __hists__insert_output_entry(&hists->entries, n, min_callchain_hits, use_callchain);
+ hists__inc_stats(hists, n);
+
+ if (!n->filtered)
+ hists__calc_col_len(hists, n);
+
+ if (prog)
+ ui_progress__update(prog, 1);
+ }
+}
+
+void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog)
+{
+ bool use_callchain;
+
+ if (evsel && symbol_conf.use_callchain && !symbol_conf.show_ref_callgraph)
+ use_callchain = evsel__has_callchain(evsel);
+ else
+ use_callchain = symbol_conf.use_callchain;
+
+ use_callchain |= symbol_conf.show_branchflag_count;
+
+ output_resort(evsel__hists(evsel), prog, use_callchain, NULL);
+}
+
+void hists__output_resort(struct hists *hists, struct ui_progress *prog)
+{
+ output_resort(hists, prog, symbol_conf.use_callchain, NULL);
+}
+
+void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
+ hists__resort_cb_t cb)
+{
+ output_resort(hists, prog, symbol_conf.use_callchain, cb);
+}
+
+static bool can_goto_child(struct hist_entry *he, enum hierarchy_move_dir hmd)
+{
+ if (he->leaf || hmd == HMD_FORCE_SIBLING)
+ return false;
+
+ if (he->unfolded || hmd == HMD_FORCE_CHILD)
+ return true;
+
+ return false;
+}
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node)
+{
+ struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+ while (can_goto_child(he, HMD_NORMAL)) {
+ node = rb_last(&he->hroot_out);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ }
+ return node;
+}
+
+struct rb_node *__rb_hierarchy_next(struct rb_node *node, enum hierarchy_move_dir hmd)
+{
+ struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+ if (can_goto_child(he, hmd))
+ node = rb_first(&he->hroot_out);
+ else
+ node = rb_next(node);
+
+ while (node == NULL) {
+ he = he->parent_he;
+ if (he == NULL)
+ break;
+
+ node = rb_next(&he->rb_node);
+ }
+ return node;
+}
+
+struct rb_node *rb_hierarchy_prev(struct rb_node *node)
+{
+ struct hist_entry *he = rb_entry(node, struct hist_entry, rb_node);
+
+ node = rb_prev(node);
+ if (node)
+ return rb_hierarchy_last(node);
+
+ he = he->parent_he;
+ if (he == NULL)
+ return NULL;
+
+ return &he->rb_node;
+}
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit)
+{
+ struct rb_node *node;
+ struct hist_entry *child;
+ float percent;
+
+ if (he->leaf)
+ return false;
+
+ node = rb_first(&he->hroot_out);
+ child = rb_entry(node, struct hist_entry, rb_node);
+
+ while (node && child->filtered) {
+ node = rb_next(node);
+ child = rb_entry(node, struct hist_entry, rb_node);
+ }
+
+ if (node)
+ percent = hist_entry__get_percent_limit(child);
+ else
+ percent = 0;
+
+ return node && percent >= limit;
+}
+
+static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h,
+ enum hist_filter filter)
+{
+ h->filtered &= ~(1 << filter);
+
+ if (symbol_conf.report_hierarchy) {
+ struct hist_entry *parent = h->parent_he;
+
+ while (parent) {
+ he_stat__add_stat(&parent->stat, &h->stat);
+
+ parent->filtered &= ~(1 << filter);
+
+ if (parent->filtered)
+ goto next;
+
+ /* force fold unfiltered entry for simplicity */
+ parent->unfolded = false;
+ parent->has_no_entry = false;
+ parent->row_offset = 0;
+ parent->nr_rows = 0;
+next:
+ parent = parent->parent_he;
+ }
+ }
+
+ if (h->filtered)
+ return;
+
+ /* force fold unfiltered entry for simplicity */
+ h->unfolded = false;
+ h->has_no_entry = false;
+ h->row_offset = 0;
+ h->nr_rows = 0;
+
+ hists->stats.nr_non_filtered_samples += h->stat.nr_events;
+
+ hists__inc_filter_stats(hists, h);
+ hists__calc_col_len(hists, h);
+}
+
+
+static bool hists__filter_entry_by_dso(struct hists *hists,
+ struct hist_entry *he)
+{
+ if (hists->dso_filter != NULL &&
+ (he->ms.map == NULL || he->ms.map->dso != hists->dso_filter)) {
+ he->filtered |= (1 << HIST_FILTER__DSO);
+ return true;
+ }
+
+ return false;
+}
+
+static bool hists__filter_entry_by_thread(struct hists *hists,
+ struct hist_entry *he)
+{
+ if (hists->thread_filter != NULL &&
+ he->thread != hists->thread_filter) {
+ he->filtered |= (1 << HIST_FILTER__THREAD);
+ return true;
+ }
+
+ return false;
+}
+
+static bool hists__filter_entry_by_symbol(struct hists *hists,
+ struct hist_entry *he)
+{
+ if (hists->symbol_filter_str != NULL &&
+ (!he->ms.sym || strstr(he->ms.sym->name,
+ hists->symbol_filter_str) == NULL)) {
+ he->filtered |= (1 << HIST_FILTER__SYMBOL);
+ return true;
+ }
+
+ return false;
+}
+
+static bool hists__filter_entry_by_socket(struct hists *hists,
+ struct hist_entry *he)
+{
+ if ((hists->socket_filter > -1) &&
+ (he->socket != hists->socket_filter)) {
+ he->filtered |= (1 << HIST_FILTER__SOCKET);
+ return true;
+ }
+
+ return false;
+}
+
+typedef bool (*filter_fn_t)(struct hists *hists, struct hist_entry *he);
+
+static void hists__filter_by_type(struct hists *hists, int type, filter_fn_t filter)
+{
+ struct rb_node *nd;
+
+ hists->stats.nr_non_filtered_samples = 0;
+
+ hists__reset_filter_stats(hists);
+ hists__reset_col_len(hists);
+
+ for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+ if (filter(hists, h))
+ continue;
+
+ hists__remove_entry_filter(hists, h, type);
+ }
+}
+
+static void resort_filtered_entry(struct rb_root *root, struct hist_entry *he)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct hist_entry *iter;
+ struct rb_root new_root = RB_ROOT;
+ struct rb_node *nd;
+
+ while (*p != NULL) {
+ parent = *p;
+ iter = rb_entry(parent, struct hist_entry, rb_node);
+
+ if (hist_entry__sort(he, iter) > 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&he->rb_node, parent, p);
+ rb_insert_color(&he->rb_node, root);
+
+ if (he->leaf || he->filtered)
+ return;
+
+ nd = rb_first(&he->hroot_out);
+ while (nd) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+ nd = rb_next(nd);
+ rb_erase(&h->rb_node, &he->hroot_out);
+
+ resort_filtered_entry(&new_root, h);
+ }
+
+ he->hroot_out = new_root;
+}
+
+static void hists__filter_hierarchy(struct hists *hists, int type, const void *arg)
+{
+ struct rb_node *nd;
+ struct rb_root new_root = RB_ROOT;
+
+ hists->stats.nr_non_filtered_samples = 0;
+
+ hists__reset_filter_stats(hists);
+ hists__reset_col_len(hists);
+
+ nd = rb_first(&hists->entries);
+ while (nd) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+ int ret;
+
+ ret = hist_entry__filter(h, type, arg);
+
+ /*
+ * case 1. non-matching type
+ * zero out the period, set filter marker and move to child
+ */
+ if (ret < 0) {
+ memset(&h->stat, 0, sizeof(h->stat));
+ h->filtered |= (1 << type);
+
+ nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_CHILD);
+ }
+ /*
+ * case 2. matched type (filter out)
+ * set filter marker and move to next
+ */
+ else if (ret == 1) {
+ h->filtered |= (1 << type);
+
+ nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+ }
+ /*
+ * case 3. ok (not filtered)
+ * add period to hists and parents, erase the filter marker
+ * and move to next sibling
+ */
+ else {
+ hists__remove_entry_filter(hists, h, type);
+
+ nd = __rb_hierarchy_next(&h->rb_node, HMD_FORCE_SIBLING);
+ }
+ }
+
+ hierarchy_recalc_total_periods(hists);
+
+ /*
+ * resort output after applying a new filter since filter in a lower
+ * hierarchy can change periods in a upper hierarchy.
+ */
+ nd = rb_first(&hists->entries);
+ while (nd) {
+ struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
+
+ nd = rb_next(nd);
+ rb_erase(&h->rb_node, &hists->entries);
+
+ resort_filtered_entry(&new_root, h);
+ }
+
+ hists->entries = new_root;
+}
+
+void hists__filter_by_thread(struct hists *hists)
+{
+ if (symbol_conf.report_hierarchy)
+ hists__filter_hierarchy(hists, HIST_FILTER__THREAD,
+ hists->thread_filter);
+ else
+ hists__filter_by_type(hists, HIST_FILTER__THREAD,
+ hists__filter_entry_by_thread);
+}
+
+void hists__filter_by_dso(struct hists *hists)
+{
+ if (symbol_conf.report_hierarchy)
+ hists__filter_hierarchy(hists, HIST_FILTER__DSO,
+ hists->dso_filter);
+ else
+ hists__filter_by_type(hists, HIST_FILTER__DSO,
+ hists__filter_entry_by_dso);
+}
+
+void hists__filter_by_symbol(struct hists *hists)
+{
+ if (symbol_conf.report_hierarchy)
+ hists__filter_hierarchy(hists, HIST_FILTER__SYMBOL,
+ hists->symbol_filter_str);
+ else
+ hists__filter_by_type(hists, HIST_FILTER__SYMBOL,
+ hists__filter_entry_by_symbol);
+}
+
+void hists__filter_by_socket(struct hists *hists)
+{
+ if (symbol_conf.report_hierarchy)
+ hists__filter_hierarchy(hists, HIST_FILTER__SOCKET,
+ &hists->socket_filter);
+ else
+ hists__filter_by_type(hists, HIST_FILTER__SOCKET,
+ hists__filter_entry_by_socket);
+}
+
+void events_stats__inc(struct events_stats *stats, u32 type)
+{
+ ++stats->nr_events[0];
+ ++stats->nr_events[type];
+}
+
+void hists__inc_nr_events(struct hists *hists, u32 type)
+{
+ events_stats__inc(&hists->stats, type);
+}
+
+void hists__inc_nr_samples(struct hists *hists, bool filtered)
+{
+ events_stats__inc(&hists->stats, PERF_RECORD_SAMPLE);
+ if (!filtered)
+ hists->stats.nr_non_filtered_samples++;
+}
+
+static struct hist_entry *hists__add_dummy_entry(struct hists *hists,
+ struct hist_entry *pair)
+{
+ struct rb_root *root;
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ struct hist_entry *he;
+ int64_t cmp;
+
+ if (hists__has(hists, need_collapse))
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ p = &root->rb_node;
+
+ while (*p != NULL) {
+ parent = *p;
+ he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+ cmp = hist_entry__collapse(he, pair);
+
+ if (!cmp)
+ goto out;
+
+ if (cmp < 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ he = hist_entry__new(pair, true);
+ if (he) {
+ memset(&he->stat, 0, sizeof(he->stat));
+ he->hists = hists;
+ if (symbol_conf.cumulate_callchain)
+ memset(he->stat_acc, 0, sizeof(he->stat));
+ rb_link_node(&he->rb_node_in, parent, p);
+ rb_insert_color(&he->rb_node_in, root);
+ hists__inc_stats(hists, he);
+ he->dummy = true;
+ }
+out:
+ return he;
+}
+
+static struct hist_entry *add_dummy_hierarchy_entry(struct hists *hists,
+ struct rb_root *root,
+ struct hist_entry *pair)
+{
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ struct hist_entry *he;
+ struct perf_hpp_fmt *fmt;
+
+ p = &root->rb_node;
+ while (*p != NULL) {
+ int64_t cmp = 0;
+
+ parent = *p;
+ he = rb_entry(parent, struct hist_entry, rb_node_in);
+
+ perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+ cmp = fmt->collapse(fmt, he, pair);
+ if (cmp)
+ break;
+ }
+ if (!cmp)
+ goto out;
+
+ if (cmp < 0)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+
+ he = hist_entry__new(pair, true);
+ if (he) {
+ rb_link_node(&he->rb_node_in, parent, p);
+ rb_insert_color(&he->rb_node_in, root);
+
+ he->dummy = true;
+ he->hists = hists;
+ memset(&he->stat, 0, sizeof(he->stat));
+ hists__inc_stats(hists, he);
+ }
+out:
+ return he;
+}
+
+static struct hist_entry *hists__find_entry(struct hists *hists,
+ struct hist_entry *he)
+{
+ struct rb_node *n;
+
+ if (hists__has(hists, need_collapse))
+ n = hists->entries_collapsed.rb_node;
+ else
+ n = hists->entries_in->rb_node;
+
+ while (n) {
+ struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node_in);
+ int64_t cmp = hist_entry__collapse(iter, he);
+
+ if (cmp < 0)
+ n = n->rb_left;
+ else if (cmp > 0)
+ n = n->rb_right;
+ else
+ return iter;
+ }
+
+ return NULL;
+}
+
+static struct hist_entry *hists__find_hierarchy_entry(struct rb_root *root,
+ struct hist_entry *he)
+{
+ struct rb_node *n = root->rb_node;
+
+ while (n) {
+ struct hist_entry *iter;
+ struct perf_hpp_fmt *fmt;
+ int64_t cmp = 0;
+
+ iter = rb_entry(n, struct hist_entry, rb_node_in);
+ perf_hpp_list__for_each_sort_list(he->hpp_list, fmt) {
+ cmp = fmt->collapse(fmt, iter, he);
+ if (cmp)
+ break;
+ }
+
+ if (cmp < 0)
+ n = n->rb_left;
+ else if (cmp > 0)
+ n = n->rb_right;
+ else
+ return iter;
+ }
+
+ return NULL;
+}
+
+static void hists__match_hierarchy(struct rb_root *leader_root,
+ struct rb_root *other_root)
+{
+ struct rb_node *nd;
+ struct hist_entry *pos, *pair;
+
+ for (nd = rb_first(leader_root); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct hist_entry, rb_node_in);
+ pair = hists__find_hierarchy_entry(other_root, pos);
+
+ if (pair) {
+ hist_entry__add_pair(pair, pos);
+ hists__match_hierarchy(&pos->hroot_in, &pair->hroot_in);
+ }
+ }
+}
+
+/*
+ * Look for pairs to link to the leader buckets (hist_entries):
+ */
+void hists__match(struct hists *leader, struct hists *other)
+{
+ struct rb_root *root;
+ struct rb_node *nd;
+ struct hist_entry *pos, *pair;
+
+ if (symbol_conf.report_hierarchy) {
+ /* hierarchy report always collapses entries */
+ return hists__match_hierarchy(&leader->entries_collapsed,
+ &other->entries_collapsed);
+ }
+
+ if (hists__has(leader, need_collapse))
+ root = &leader->entries_collapsed;
+ else
+ root = leader->entries_in;
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct hist_entry, rb_node_in);
+ pair = hists__find_entry(other, pos);
+
+ if (pair)
+ hist_entry__add_pair(pair, pos);
+ }
+}
+
+static int hists__link_hierarchy(struct hists *leader_hists,
+ struct hist_entry *parent,
+ struct rb_root *leader_root,
+ struct rb_root *other_root)
+{
+ struct rb_node *nd;
+ struct hist_entry *pos, *leader;
+
+ for (nd = rb_first(other_root); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct hist_entry, rb_node_in);
+
+ if (hist_entry__has_pairs(pos)) {
+ bool found = false;
+
+ list_for_each_entry(leader, &pos->pairs.head, pairs.node) {
+ if (leader->hists == leader_hists) {
+ found = true;
+ break;
+ }
+ }
+ if (!found)
+ return -1;
+ } else {
+ leader = add_dummy_hierarchy_entry(leader_hists,
+ leader_root, pos);
+ if (leader == NULL)
+ return -1;
+
+ /* do not point parent in the pos */
+ leader->parent_he = parent;
+
+ hist_entry__add_pair(pos, leader);
+ }
+
+ if (!pos->leaf) {
+ if (hists__link_hierarchy(leader_hists, leader,
+ &leader->hroot_in,
+ &pos->hroot_in) < 0)
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Look for entries in the other hists that are not present in the leader, if
+ * we find them, just add a dummy entry on the leader hists, with period=0,
+ * nr_events=0, to serve as the list header.
+ */
+int hists__link(struct hists *leader, struct hists *other)
+{
+ struct rb_root *root;
+ struct rb_node *nd;
+ struct hist_entry *pos, *pair;
+
+ if (symbol_conf.report_hierarchy) {
+ /* hierarchy report always collapses entries */
+ return hists__link_hierarchy(leader, NULL,
+ &leader->entries_collapsed,
+ &other->entries_collapsed);
+ }
+
+ if (hists__has(other, need_collapse))
+ root = &other->entries_collapsed;
+ else
+ root = other->entries_in;
+
+ for (nd = rb_first(root); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct hist_entry, rb_node_in);
+
+ if (!hist_entry__has_pairs(pos)) {
+ pair = hists__add_dummy_entry(leader, pos);
+ if (pair == NULL)
+ return -1;
+ hist_entry__add_pair(pos, pair);
+ }
+ }
+
+ return 0;
+}
+
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+ struct perf_sample *sample, bool nonany_branch_mode)
+{
+ struct branch_info *bi;
+
+ /* If we have branch cycles always annotate them. */
+ if (bs && bs->nr && bs->entries[0].flags.cycles) {
+ int i;
+
+ bi = sample__resolve_bstack(sample, al);
+ if (bi) {
+ struct addr_map_symbol *prev = NULL;
+
+ /*
+ * Ignore errors, still want to process the
+ * other entries.
+ *
+ * For non standard branch modes always
+ * force no IPC (prev == NULL)
+ *
+ * Note that perf stores branches reversed from
+ * program order!
+ */
+ for (i = bs->nr - 1; i >= 0; i--) {
+ addr_map_symbol__account_cycles(&bi[i].from,
+ nonany_branch_mode ? NULL : prev,
+ bi[i].flags.cycles);
+ prev = &bi[i].to;
+ }
+ free(bi);
+ }
+ }
+}
+
+size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp)
+{
+ struct perf_evsel *pos;
+ size_t ret = 0;
+
+ evlist__for_each_entry(evlist, pos) {
+ ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));
+ ret += events_stats__fprintf(&evsel__hists(pos)->stats, fp);
+ }
+
+ return ret;
+}
+
+
+u64 hists__total_period(struct hists *hists)
+{
+ return symbol_conf.filter_relative ? hists->stats.total_non_filtered_period :
+ hists->stats.total_period;
+}
+
+int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq)
+{
+ char unit;
+ int printed;
+ const struct dso *dso = hists->dso_filter;
+ const struct thread *thread = hists->thread_filter;
+ int socket_id = hists->socket_filter;
+ unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
+ u64 nr_events = hists->stats.total_period;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ const char *ev_name = perf_evsel__name(evsel);
+ char buf[512], sample_freq_str[64] = "";
+ size_t buflen = sizeof(buf);
+ char ref[30] = " show reference callgraph, ";
+ bool enable_ref = false;
+
+ if (symbol_conf.filter_relative) {
+ nr_samples = hists->stats.nr_non_filtered_samples;
+ nr_events = hists->stats.total_non_filtered_period;
+ }
+
+ if (perf_evsel__is_group_event(evsel)) {
+ struct perf_evsel *pos;
+
+ perf_evsel__group_desc(evsel, buf, buflen);
+ ev_name = buf;
+
+ for_each_group_member(pos, evsel) {
+ struct hists *pos_hists = evsel__hists(pos);
+
+ if (symbol_conf.filter_relative) {
+ nr_samples += pos_hists->stats.nr_non_filtered_samples;
+ nr_events += pos_hists->stats.total_non_filtered_period;
+ } else {
+ nr_samples += pos_hists->stats.nr_events[PERF_RECORD_SAMPLE];
+ nr_events += pos_hists->stats.total_period;
+ }
+ }
+ }
+
+ if (symbol_conf.show_ref_callgraph &&
+ strstr(ev_name, "call-graph=no"))
+ enable_ref = true;
+
+ if (show_freq)
+ scnprintf(sample_freq_str, sizeof(sample_freq_str), " %d Hz,", evsel->attr.sample_freq);
+
+ nr_samples = convert_unit(nr_samples, &unit);
+ printed = scnprintf(bf, size,
+ "Samples: %lu%c of event%s '%s',%s%sEvent count (approx.): %" PRIu64,
+ nr_samples, unit, evsel->nr_members > 1 ? "s" : "",
+ ev_name, sample_freq_str, enable_ref ? ref : " ", nr_events);
+
+
+ if (hists->uid_filter_str)
+ printed += snprintf(bf + printed, size - printed,
+ ", UID: %s", hists->uid_filter_str);
+ if (thread) {
+ if (hists__has(hists, thread)) {
+ printed += scnprintf(bf + printed, size - printed,
+ ", Thread: %s(%d)",
+ (thread->comm_set ? thread__comm_str(thread) : ""),
+ thread->tid);
+ } else {
+ printed += scnprintf(bf + printed, size - printed,
+ ", Thread: %s",
+ (thread->comm_set ? thread__comm_str(thread) : ""));
+ }
+ }
+ if (dso)
+ printed += scnprintf(bf + printed, size - printed,
+ ", DSO: %s", dso->short_name);
+ if (socket_id > -1)
+ printed += scnprintf(bf + printed, size - printed,
+ ", Processor Socket: %d", socket_id);
+
+ return printed;
+}
+
+int parse_filter_percentage(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ if (!strcmp(arg, "relative"))
+ symbol_conf.filter_relative = true;
+ else if (!strcmp(arg, "absolute"))
+ symbol_conf.filter_relative = false;
+ else {
+ pr_debug("Invalid percentage: %s\n", arg);
+ return -1;
+ }
+
+ return 0;
+}
+
+int perf_hist_config(const char *var, const char *value)
+{
+ if (!strcmp(var, "hist.percentage"))
+ return parse_filter_percentage(NULL, value, 0);
+
+ return 0;
+}
+
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list)
+{
+ memset(hists, 0, sizeof(*hists));
+ hists->entries_in_array[0] = hists->entries_in_array[1] = RB_ROOT;
+ hists->entries_in = &hists->entries_in_array[0];
+ hists->entries_collapsed = RB_ROOT;
+ hists->entries = RB_ROOT;
+ pthread_mutex_init(&hists->lock, NULL);
+ hists->socket_filter = -1;
+ hists->hpp_list = hpp_list;
+ INIT_LIST_HEAD(&hists->hpp_formats);
+ return 0;
+}
+
+static void hists__delete_remaining_entries(struct rb_root *root)
+{
+ struct rb_node *node;
+ struct hist_entry *he;
+
+ while (!RB_EMPTY_ROOT(root)) {
+ node = rb_first(root);
+ rb_erase(node, root);
+
+ he = rb_entry(node, struct hist_entry, rb_node_in);
+ hist_entry__delete(he);
+ }
+}
+
+static void hists__delete_all_entries(struct hists *hists)
+{
+ hists__delete_entries(hists);
+ hists__delete_remaining_entries(&hists->entries_in_array[0]);
+ hists__delete_remaining_entries(&hists->entries_in_array[1]);
+ hists__delete_remaining_entries(&hists->entries_collapsed);
+}
+
+static void hists_evsel__exit(struct perf_evsel *evsel)
+{
+ struct hists *hists = evsel__hists(evsel);
+ struct perf_hpp_fmt *fmt, *pos;
+ struct perf_hpp_list_node *node, *tmp;
+
+ hists__delete_all_entries(hists);
+
+ list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) {
+ perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) {
+ list_del(&fmt->list);
+ free(fmt);
+ }
+ list_del(&node->list);
+ free(node);
+ }
+}
+
+static int hists_evsel__init(struct perf_evsel *evsel)
+{
+ struct hists *hists = evsel__hists(evsel);
+
+ __hists__init(hists, &perf_hpp_list);
+ return 0;
+}
+
+/*
+ * XXX We probably need a hists_evsel__exit() to free the hist_entries
+ * stored in the rbtree...
+ */
+
+int hists__init(void)
+{
+ int err = perf_evsel__object_config(sizeof(struct hists_evsel),
+ hists_evsel__init,
+ hists_evsel__exit);
+ if (err)
+ fputs("FATAL ERROR: Couldn't setup hists class\n", stderr);
+
+ return err;
+}
+
+void perf_hpp_list__init(struct perf_hpp_list *list)
+{
+ INIT_LIST_HEAD(&list->fields);
+ INIT_LIST_HEAD(&list->sorts);
+}
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
new file mode 100644
index 000000000..899c1ca5e
--- /dev/null
+++ b/tools/perf/util/hist.h
@@ -0,0 +1,527 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_HIST_H
+#define __PERF_HIST_H
+
+#include <linux/types.h>
+#include <pthread.h>
+#include "callchain.h"
+#include "evsel.h"
+#include "header.h"
+#include "color.h"
+#include "ui/progress.h"
+
+struct hist_entry;
+struct hist_entry_ops;
+struct addr_location;
+struct symbol;
+
+enum hist_filter {
+ HIST_FILTER__DSO,
+ HIST_FILTER__THREAD,
+ HIST_FILTER__PARENT,
+ HIST_FILTER__SYMBOL,
+ HIST_FILTER__GUEST,
+ HIST_FILTER__HOST,
+ HIST_FILTER__SOCKET,
+ HIST_FILTER__C2C,
+};
+
+enum hist_column {
+ HISTC_SYMBOL,
+ HISTC_DSO,
+ HISTC_THREAD,
+ HISTC_COMM,
+ HISTC_CGROUP_ID,
+ HISTC_PARENT,
+ HISTC_CPU,
+ HISTC_SOCKET,
+ HISTC_SRCLINE,
+ HISTC_SRCFILE,
+ HISTC_MISPREDICT,
+ HISTC_IN_TX,
+ HISTC_ABORT,
+ HISTC_SYMBOL_FROM,
+ HISTC_SYMBOL_TO,
+ HISTC_DSO_FROM,
+ HISTC_DSO_TO,
+ HISTC_LOCAL_WEIGHT,
+ HISTC_GLOBAL_WEIGHT,
+ HISTC_MEM_DADDR_SYMBOL,
+ HISTC_MEM_DADDR_DSO,
+ HISTC_MEM_PHYS_DADDR,
+ HISTC_MEM_LOCKED,
+ HISTC_MEM_TLB,
+ HISTC_MEM_LVL,
+ HISTC_MEM_SNOOP,
+ HISTC_MEM_DCACHELINE,
+ HISTC_MEM_IADDR_SYMBOL,
+ HISTC_TRANSACTION,
+ HISTC_CYCLES,
+ HISTC_SRCLINE_FROM,
+ HISTC_SRCLINE_TO,
+ HISTC_TRACE,
+ HISTC_SYM_SIZE,
+ HISTC_DSO_SIZE,
+ HISTC_NR_COLS, /* Last entry */
+};
+
+struct thread;
+struct dso;
+
+struct hists {
+ struct rb_root entries_in_array[2];
+ struct rb_root *entries_in;
+ struct rb_root entries;
+ struct rb_root entries_collapsed;
+ u64 nr_entries;
+ u64 nr_non_filtered_entries;
+ u64 callchain_period;
+ u64 callchain_non_filtered_period;
+ struct thread *thread_filter;
+ const struct dso *dso_filter;
+ const char *uid_filter_str;
+ const char *symbol_filter_str;
+ pthread_mutex_t lock;
+ struct events_stats stats;
+ u64 event_stream;
+ u16 col_len[HISTC_NR_COLS];
+ bool has_callchains;
+ int socket_filter;
+ struct perf_hpp_list *hpp_list;
+ struct list_head hpp_formats;
+ int nr_hpp_node;
+};
+
+#define hists__has(__h, __f) (__h)->hpp_list->__f
+
+struct hist_entry_iter;
+
+struct hist_iter_ops {
+ int (*prepare_entry)(struct hist_entry_iter *, struct addr_location *);
+ int (*add_single_entry)(struct hist_entry_iter *, struct addr_location *);
+ int (*next_entry)(struct hist_entry_iter *, struct addr_location *);
+ int (*add_next_entry)(struct hist_entry_iter *, struct addr_location *);
+ int (*finish_entry)(struct hist_entry_iter *, struct addr_location *);
+};
+
+struct hist_entry_iter {
+ int total;
+ int curr;
+
+ bool hide_unresolved;
+
+ struct perf_evsel *evsel;
+ struct perf_sample *sample;
+ struct hist_entry *he;
+ struct symbol *parent;
+ void *priv;
+
+ const struct hist_iter_ops *ops;
+ /* user-defined callback function (optional) */
+ int (*add_entry_cb)(struct hist_entry_iter *iter,
+ struct addr_location *al, bool single, void *arg);
+};
+
+extern const struct hist_iter_ops hist_iter_normal;
+extern const struct hist_iter_ops hist_iter_branch;
+extern const struct hist_iter_ops hist_iter_mem;
+extern const struct hist_iter_ops hist_iter_cumulative;
+
+struct hist_entry *hists__add_entry(struct hists *hists,
+ struct addr_location *al,
+ struct symbol *parent,
+ struct branch_info *bi,
+ struct mem_info *mi,
+ struct perf_sample *sample,
+ bool sample_self);
+
+struct hist_entry *hists__add_entry_ops(struct hists *hists,
+ struct hist_entry_ops *ops,
+ struct addr_location *al,
+ struct symbol *sym_parent,
+ struct branch_info *bi,
+ struct mem_info *mi,
+ struct perf_sample *sample,
+ bool sample_self);
+
+int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
+ int max_stack_depth, void *arg);
+
+struct perf_hpp;
+struct perf_hpp_fmt;
+
+int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t hist_entry__collapse(struct hist_entry *left, struct hist_entry *right);
+int hist_entry__transaction_len(void);
+int hist_entry__sort_snprintf(struct hist_entry *he, char *bf, size_t size,
+ struct hists *hists);
+int hist_entry__snprintf_alignment(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_fmt *fmt, int printed);
+void hist_entry__delete(struct hist_entry *he);
+
+typedef int (*hists__resort_cb_t)(struct hist_entry *he);
+
+void perf_evsel__output_resort(struct perf_evsel *evsel, struct ui_progress *prog);
+void hists__output_resort(struct hists *hists, struct ui_progress *prog);
+void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
+ hists__resort_cb_t cb);
+int hists__collapse_resort(struct hists *hists, struct ui_progress *prog);
+
+void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel);
+void hists__delete_entries(struct hists *hists);
+void hists__output_recalc_col_len(struct hists *hists, int max_rows);
+
+u64 hists__total_period(struct hists *hists);
+void hists__reset_stats(struct hists *hists);
+void hists__inc_stats(struct hists *hists, struct hist_entry *h);
+void hists__inc_nr_events(struct hists *hists, u32 type);
+void hists__inc_nr_samples(struct hists *hists, bool filtered);
+void events_stats__inc(struct events_stats *stats, u32 type);
+size_t events_stats__fprintf(struct events_stats *stats, FILE *fp);
+
+size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
+ int max_cols, float min_pcnt, FILE *fp,
+ bool ignore_callchains);
+size_t perf_evlist__fprintf_nr_events(struct perf_evlist *evlist, FILE *fp);
+
+void hists__filter_by_dso(struct hists *hists);
+void hists__filter_by_thread(struct hists *hists);
+void hists__filter_by_symbol(struct hists *hists);
+void hists__filter_by_socket(struct hists *hists);
+
+static inline bool hists__has_filter(struct hists *hists)
+{
+ return hists->thread_filter || hists->dso_filter ||
+ hists->symbol_filter_str || (hists->socket_filter > -1);
+}
+
+u16 hists__col_len(struct hists *hists, enum hist_column col);
+void hists__set_col_len(struct hists *hists, enum hist_column col, u16 len);
+bool hists__new_col_len(struct hists *hists, enum hist_column col, u16 len);
+void hists__reset_col_len(struct hists *hists);
+void hists__calc_col_len(struct hists *hists, struct hist_entry *he);
+
+void hists__match(struct hists *leader, struct hists *other);
+int hists__link(struct hists *leader, struct hists *other);
+
+struct hists_evsel {
+ struct perf_evsel evsel;
+ struct hists hists;
+};
+
+static inline struct perf_evsel *hists_to_evsel(struct hists *hists)
+{
+ struct hists_evsel *hevsel = container_of(hists, struct hists_evsel, hists);
+ return &hevsel->evsel;
+}
+
+static inline struct hists *evsel__hists(struct perf_evsel *evsel)
+{
+ struct hists_evsel *hevsel = (struct hists_evsel *)evsel;
+ return &hevsel->hists;
+}
+
+static __pure inline bool hists__has_callchains(struct hists *hists)
+{
+ return hists->has_callchains;
+}
+
+int hists__init(void);
+int __hists__init(struct hists *hists, struct perf_hpp_list *hpp_list);
+
+struct rb_root *hists__get_rotate_entries_in(struct hists *hists);
+
+struct perf_hpp {
+ char *buf;
+ size_t size;
+ const char *sep;
+ void *ptr;
+};
+
+struct perf_hpp_fmt {
+ const char *name;
+ int (*header)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hists *hists, int line, int *span);
+ int (*width)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hists *hists);
+ int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he);
+ int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he);
+ int64_t (*cmp)(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b);
+ int64_t (*collapse)(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b);
+ int64_t (*sort)(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b);
+ bool (*equal)(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b);
+ void (*free)(struct perf_hpp_fmt *fmt);
+
+ struct list_head list;
+ struct list_head sort_list;
+ bool elide;
+ int len;
+ int user_len;
+ int idx;
+ int level;
+};
+
+struct perf_hpp_list {
+ struct list_head fields;
+ struct list_head sorts;
+
+ int nr_header_lines;
+ int need_collapse;
+ int parent;
+ int sym;
+ int dso;
+ int socket;
+ int thread;
+ int comm;
+};
+
+extern struct perf_hpp_list perf_hpp_list;
+
+struct perf_hpp_list_node {
+ struct list_head list;
+ struct perf_hpp_list hpp;
+ int level;
+ bool skip;
+};
+
+void perf_hpp_list__column_register(struct perf_hpp_list *list,
+ struct perf_hpp_fmt *format);
+void perf_hpp_list__register_sort_field(struct perf_hpp_list *list,
+ struct perf_hpp_fmt *format);
+void perf_hpp_list__prepend_sort_field(struct perf_hpp_list *list,
+ struct perf_hpp_fmt *format);
+
+static inline void perf_hpp__column_register(struct perf_hpp_fmt *format)
+{
+ perf_hpp_list__column_register(&perf_hpp_list, format);
+}
+
+static inline void perf_hpp__register_sort_field(struct perf_hpp_fmt *format)
+{
+ perf_hpp_list__register_sort_field(&perf_hpp_list, format);
+}
+
+static inline void perf_hpp__prepend_sort_field(struct perf_hpp_fmt *format)
+{
+ perf_hpp_list__prepend_sort_field(&perf_hpp_list, format);
+}
+
+#define perf_hpp_list__for_each_format(_list, format) \
+ list_for_each_entry(format, &(_list)->fields, list)
+
+#define perf_hpp_list__for_each_format_safe(_list, format, tmp) \
+ list_for_each_entry_safe(format, tmp, &(_list)->fields, list)
+
+#define perf_hpp_list__for_each_sort_list(_list, format) \
+ list_for_each_entry(format, &(_list)->sorts, sort_list)
+
+#define perf_hpp_list__for_each_sort_list_safe(_list, format, tmp) \
+ list_for_each_entry_safe(format, tmp, &(_list)->sorts, sort_list)
+
+#define hists__for_each_format(hists, format) \
+ perf_hpp_list__for_each_format((hists)->hpp_list, format)
+
+#define hists__for_each_sort_list(hists, format) \
+ perf_hpp_list__for_each_sort_list((hists)->hpp_list, format)
+
+extern struct perf_hpp_fmt perf_hpp__format[];
+
+enum {
+ /* Matches perf_hpp__format array. */
+ PERF_HPP__OVERHEAD,
+ PERF_HPP__OVERHEAD_SYS,
+ PERF_HPP__OVERHEAD_US,
+ PERF_HPP__OVERHEAD_GUEST_SYS,
+ PERF_HPP__OVERHEAD_GUEST_US,
+ PERF_HPP__OVERHEAD_ACC,
+ PERF_HPP__SAMPLES,
+ PERF_HPP__PERIOD,
+
+ PERF_HPP__MAX_INDEX
+};
+
+void perf_hpp__init(void);
+void perf_hpp__cancel_cumulate(void);
+void perf_hpp__setup_output_field(struct perf_hpp_list *list);
+void perf_hpp__reset_output_field(struct perf_hpp_list *list);
+void perf_hpp__append_sort_keys(struct perf_hpp_list *list);
+int perf_hpp__setup_hists_formats(struct perf_hpp_list *list,
+ struct perf_evlist *evlist);
+
+
+bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format);
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *format);
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists);
+bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcline_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_srcfile_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_thread_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_comm_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_dso_entry(struct perf_hpp_fmt *fmt);
+bool perf_hpp__is_sym_entry(struct perf_hpp_fmt *fmt);
+
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt);
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg);
+
+static inline bool perf_hpp__should_skip(struct perf_hpp_fmt *format,
+ struct hists *hists)
+{
+ if (format->elide)
+ return true;
+
+ if (perf_hpp__is_dynamic_entry(format) &&
+ !perf_hpp__defined_dynamic_entry(format, hists))
+ return true;
+
+ return false;
+}
+
+void perf_hpp__reset_width(struct perf_hpp_fmt *fmt, struct hists *hists);
+void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists);
+void perf_hpp__set_user_width(const char *width_list_str);
+void hists__reset_column_width(struct hists *hists);
+
+typedef u64 (*hpp_field_fn)(struct hist_entry *he);
+typedef int (*hpp_callback_fn)(struct perf_hpp *hpp, bool front);
+typedef int (*hpp_snprint_fn)(struct perf_hpp *hpp, const char *fmt, ...);
+
+int hpp__fmt(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he, hpp_field_fn get_field,
+ const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent);
+int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he, hpp_field_fn get_field,
+ const char *fmtstr, hpp_snprint_fn print_fn, bool fmt_percent);
+
+static inline void advance_hpp(struct perf_hpp *hpp, int inc)
+{
+ hpp->buf += inc;
+ hpp->size -= inc;
+}
+
+static inline size_t perf_hpp__use_color(void)
+{
+ return !symbol_conf.field_sep;
+}
+
+static inline size_t perf_hpp__color_overhead(void)
+{
+ return perf_hpp__use_color() ?
+ (COLOR_MAXLEN + sizeof(PERF_COLOR_RESET)) * PERF_HPP__MAX_INDEX
+ : 0;
+}
+
+struct perf_evlist;
+
+struct hist_browser_timer {
+ void (*timer)(void *arg);
+ void *arg;
+ int refresh;
+};
+
+struct annotation_options;
+
+#ifdef HAVE_SLANG_SUPPORT
+#include "../ui/keysyms.h"
+int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt,
+ struct annotation_options *annotation_opts);
+
+int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt,
+ struct annotation_options *annotation_opts);
+
+int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help,
+ struct hist_browser_timer *hbt,
+ float min_pcnt,
+ struct perf_env *env,
+ bool warn_lost_event,
+ struct annotation_options *annotation_options);
+int script_browse(const char *script_opt);
+#else
+static inline
+int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused,
+ const char *help __maybe_unused,
+ struct hist_browser_timer *hbt __maybe_unused,
+ float min_pcnt __maybe_unused,
+ struct perf_env *env __maybe_unused,
+ bool warn_lost_event __maybe_unused,
+ struct annotation_options *annotation_options __maybe_unused)
+{
+ return 0;
+}
+static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused,
+ struct perf_evsel *evsel __maybe_unused,
+ struct hist_browser_timer *hbt __maybe_unused,
+ struct annotation_options *annotation_options __maybe_unused)
+{
+ return 0;
+}
+
+static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused,
+ struct perf_evsel *evsel __maybe_unused,
+ struct hist_browser_timer *hbt __maybe_unused,
+ struct annotation_options *annotation_opts __maybe_unused)
+{
+ return 0;
+}
+
+static inline int script_browse(const char *script_opt __maybe_unused)
+{
+ return 0;
+}
+
+#define K_LEFT -1000
+#define K_RIGHT -2000
+#define K_SWITCH_INPUT_DATA -3000
+#endif
+
+unsigned int hists__sort_list_width(struct hists *hists);
+unsigned int hists__overhead_width(struct hists *hists);
+
+void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
+ struct perf_sample *sample, bool nonany_branch_mode);
+
+struct option;
+int parse_filter_percentage(const struct option *opt, const char *arg, int unset);
+int perf_hist_config(const char *var, const char *value);
+
+void perf_hpp_list__init(struct perf_hpp_list *list);
+
+enum hierarchy_move_dir {
+ HMD_NORMAL,
+ HMD_FORCE_SIBLING,
+ HMD_FORCE_CHILD,
+};
+
+struct rb_node *rb_hierarchy_last(struct rb_node *node);
+struct rb_node *__rb_hierarchy_next(struct rb_node *node,
+ enum hierarchy_move_dir hmd);
+struct rb_node *rb_hierarchy_prev(struct rb_node *node);
+
+static inline struct rb_node *rb_hierarchy_next(struct rb_node *node)
+{
+ return __rb_hierarchy_next(node, HMD_NORMAL);
+}
+
+#define HIERARCHY_INDENT 3
+
+bool hist_entry__has_hierarchy_children(struct hist_entry *he, float limit);
+int hpp_color_scnprintf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hpp__slsmg_color_printf(struct perf_hpp *hpp, const char *fmt, ...);
+int __hist_entry__snprintf(struct hist_entry *he, struct perf_hpp *hpp,
+ struct perf_hpp_list *hpp_list);
+int hists__fprintf_headers(struct hists *hists, FILE *fp);
+int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool show_freq);
+
+static inline int hists__scnprintf_title(struct hists *hists, char *bf, size_t size)
+{
+ return __hists__scnprintf_title(hists, bf, size, true);
+}
+
+#endif /* __PERF_HIST_H */
diff --git a/tools/perf/util/include/asm/asm-offsets.h b/tools/perf/util/include/asm/asm-offsets.h
new file mode 100644
index 000000000..3aff4cf44
--- /dev/null
+++ b/tools/perf/util/include/asm/asm-offsets.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* stub */
diff --git a/tools/perf/util/include/asm/cpufeature.h b/tools/perf/util/include/asm/cpufeature.h
new file mode 100644
index 000000000..2270481c7
--- /dev/null
+++ b/tools/perf/util/include/asm/cpufeature.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PERF_CPUFEATURE_H
+#define PERF_CPUFEATURE_H
+
+/* cpufeature.h ... dummy header file for including arch/x86/lib/memcpy_64.S */
+
+#define X86_FEATURE_REP_GOOD 0
+
+#endif /* PERF_CPUFEATURE_H */
diff --git a/tools/perf/util/include/asm/dwarf2.h b/tools/perf/util/include/asm/dwarf2.h
new file mode 100644
index 000000000..e9876be63
--- /dev/null
+++ b/tools/perf/util/include/asm/dwarf2.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PERF_DWARF2_H
+#define PERF_DWARF2_H
+
+/* dwarf2.h ... dummy header file for including arch/x86/lib/mem{cpy,set}_64.S */
+
+#define CFI_STARTPROC
+#define CFI_ENDPROC
+#define CFI_REMEMBER_STATE
+#define CFI_RESTORE_STATE
+
+#endif /* PERF_DWARF2_H */
+
diff --git a/tools/perf/util/include/asm/swab.h b/tools/perf/util/include/asm/swab.h
new file mode 100644
index 000000000..ed5389425
--- /dev/null
+++ b/tools/perf/util/include/asm/swab.h
@@ -0,0 +1 @@
+/* stub */
diff --git a/tools/perf/util/include/asm/system.h b/tools/perf/util/include/asm/system.h
new file mode 100644
index 000000000..710cecca9
--- /dev/null
+++ b/tools/perf/util/include/asm/system.h
@@ -0,0 +1 @@
+/* Empty */
diff --git a/tools/perf/util/include/asm/uaccess.h b/tools/perf/util/include/asm/uaccess.h
new file mode 100644
index 000000000..6a6f4b990
--- /dev/null
+++ b/tools/perf/util/include/asm/uaccess.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_ASM_UACCESS_H_
+#define _PERF_ASM_UACCESS_H_
+
+#define __get_user(src, dest) \
+({ \
+ (src) = *dest; \
+ 0; \
+})
+
+#define get_user __get_user
+
+#define access_ok(type, addr, size) 1
+
+#endif
diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h
new file mode 100644
index 000000000..7d99a084e
--- /dev/null
+++ b/tools/perf/util/include/dwarf-regs.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_DWARF_REGS_H_
+#define _PERF_DWARF_REGS_H_
+
+#ifdef HAVE_DWARF_SUPPORT
+const char *get_arch_regstr(unsigned int n);
+/*
+ * get_dwarf_regstr - Returns ftrace register string from DWARF regnum
+ * n: DWARF register number
+ * machine: ELF machine signature (EM_*)
+ */
+const char *get_dwarf_regstr(unsigned int n, unsigned int machine);
+#endif
+
+#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
+/*
+ * Arch should support fetching the offset of a register in pt_regs
+ * by its name. See kernel's regs_query_register_offset in
+ * arch/xxx/kernel/ptrace.c.
+ */
+int regs_query_register_offset(const char *name);
+#endif
+#endif
diff --git a/tools/perf/util/include/linux/ctype.h b/tools/perf/util/include/linux/ctype.h
new file mode 100644
index 000000000..a53d4ee1e
--- /dev/null
+++ b/tools/perf/util/include/linux/ctype.h
@@ -0,0 +1 @@
+#include "../util.h"
diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h
new file mode 100644
index 000000000..f01d48a8d
--- /dev/null
+++ b/tools/perf/util/include/linux/linkage.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PERF_LINUX_LINKAGE_H_
+#define PERF_LINUX_LINKAGE_H_
+
+/* linkage.h ... for including arch/x86/lib/memcpy_64.S */
+
+#define ENTRY(name) \
+ .globl name; \
+ name:
+
+#define ENDPROC(name)
+
+#endif /* PERF_LINUX_LINKAGE_H_ */
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
new file mode 100644
index 000000000..7127bc917
--- /dev/null
+++ b/tools/perf/util/intel-bts.c
@@ -0,0 +1,952 @@
+/*
+ * intel-bts.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <endian.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "cpumap.h"
+#include "color.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "session.h"
+#include "util.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "debug.h"
+#include "tsc.h"
+#include "auxtrace.h"
+#include "intel-pt-decoder/intel-pt-insn-decoder.h"
+#include "intel-bts.h"
+
+#define MAX_TIMESTAMP (~0ULL)
+
+#define INTEL_BTS_ERR_NOINSN 5
+#define INTEL_BTS_ERR_LOST 9
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define le64_to_cpu bswap_64
+#else
+#define le64_to_cpu
+#endif
+
+struct intel_bts {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ u32 auxtrace_type;
+ struct perf_session *session;
+ struct machine *machine;
+ bool sampling_mode;
+ bool snapshot_mode;
+ bool data_queued;
+ u32 pmu_type;
+ struct perf_tsc_conversion tc;
+ bool cap_user_time_zero;
+ struct itrace_synth_opts synth_opts;
+ bool sample_branches;
+ u32 branches_filter;
+ u64 branches_sample_type;
+ u64 branches_id;
+ size_t branches_event_size;
+ unsigned long num_events;
+};
+
+struct intel_bts_queue {
+ struct intel_bts *bts;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ bool on_heap;
+ bool done;
+ pid_t pid;
+ pid_t tid;
+ int cpu;
+ u64 time;
+ struct intel_pt_insn intel_pt_insn;
+ u32 sample_flags;
+};
+
+struct branch {
+ u64 from;
+ u64 to;
+ u64 misc;
+};
+
+static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
+ unsigned char *buf, size_t len)
+{
+ struct branch *branch;
+ size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
+ const char *color = PERF_COLOR_BLUE;
+
+ color_fprintf(stdout, color,
+ ". ... Intel BTS data: size %zu bytes\n",
+ len);
+
+ while (len) {
+ if (len >= br_sz)
+ sz = br_sz;
+ else
+ sz = len;
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ for (i = 0; i < sz; i++)
+ color_fprintf(stdout, color, " %02x", buf[i]);
+ for (; i < br_sz; i++)
+ color_fprintf(stdout, color, " ");
+ if (len >= br_sz) {
+ branch = (struct branch *)buf;
+ color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
+ le64_to_cpu(branch->from),
+ le64_to_cpu(branch->to),
+ le64_to_cpu(branch->misc) & 0x10 ?
+ "pred" : "miss");
+ } else {
+ color_fprintf(stdout, color, " Bad record!\n");
+ }
+ pos += sz;
+ buf += sz;
+ len -= sz;
+ }
+}
+
+static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+ intel_bts_dump(bts, buf, len);
+}
+
+static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
+{
+ union perf_event event;
+ int err;
+
+ auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
+ sample->tid, 0, "Lost trace data");
+
+ err = perf_session__deliver_synth_event(bts->session, &event, NULL);
+ if (err)
+ pr_err("Intel BTS: failed to deliver error event, error %d\n",
+ err);
+
+ return err;
+}
+
+static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
+ unsigned int queue_nr)
+{
+ struct intel_bts_queue *btsq;
+
+ btsq = zalloc(sizeof(struct intel_bts_queue));
+ if (!btsq)
+ return NULL;
+
+ btsq->bts = bts;
+ btsq->queue_nr = queue_nr;
+ btsq->pid = -1;
+ btsq->tid = -1;
+ btsq->cpu = -1;
+
+ return btsq;
+}
+
+static int intel_bts_setup_queue(struct intel_bts *bts,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr)
+{
+ struct intel_bts_queue *btsq = queue->priv;
+
+ if (list_empty(&queue->head))
+ return 0;
+
+ if (!btsq) {
+ btsq = intel_bts_alloc_queue(bts, queue_nr);
+ if (!btsq)
+ return -ENOMEM;
+ queue->priv = btsq;
+
+ if (queue->cpu != -1)
+ btsq->cpu = queue->cpu;
+ btsq->tid = queue->tid;
+ }
+
+ if (bts->sampling_mode)
+ return 0;
+
+ if (!btsq->on_heap && !btsq->buffer) {
+ int ret;
+
+ btsq->buffer = auxtrace_buffer__next(queue, NULL);
+ if (!btsq->buffer)
+ return 0;
+
+ ret = auxtrace_heap__add(&bts->heap, queue_nr,
+ btsq->buffer->reference);
+ if (ret)
+ return ret;
+ btsq->on_heap = true;
+ }
+
+ return 0;
+}
+
+static int intel_bts_setup_queues(struct intel_bts *bts)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < bts->queues.nr_queues; i++) {
+ ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
+ i);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static inline int intel_bts_update_queues(struct intel_bts *bts)
+{
+ if (bts->queues.new_data) {
+ bts->queues.new_data = false;
+ return intel_bts_setup_queues(bts);
+ }
+ return 0;
+}
+
+static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
+ unsigned char *buf_b, size_t len_b)
+{
+ size_t offs, len;
+
+ if (len_a > len_b)
+ offs = len_a - len_b;
+ else
+ offs = 0;
+
+ for (; offs < len_a; offs += sizeof(struct branch)) {
+ len = len_a - offs;
+ if (!memcmp(buf_a + offs, buf_b, len))
+ return buf_b + len;
+ }
+
+ return buf_b;
+}
+
+static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
+ struct auxtrace_buffer *b)
+{
+ struct auxtrace_buffer *a;
+ void *start;
+
+ if (b->list.prev == &queue->head)
+ return 0;
+ a = list_entry(b->list.prev, struct auxtrace_buffer, list);
+ start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
+ if (!start)
+ return -EINVAL;
+ b->use_size = b->data + b->size - start;
+ b->use_data = start;
+ return 0;
+}
+
+static inline u8 intel_bts_cpumode(struct intel_bts *bts, uint64_t ip)
+{
+ return machine__kernel_ip(bts->machine, ip) ?
+ PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+}
+
+static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
+ struct branch *branch)
+{
+ int ret;
+ struct intel_bts *bts = btsq->bts;
+ union perf_event event;
+ struct perf_sample sample = { .ip = 0, };
+
+ if (bts->synth_opts.initial_skip &&
+ bts->num_events++ <= bts->synth_opts.initial_skip)
+ return 0;
+
+ sample.ip = le64_to_cpu(branch->from);
+ sample.cpumode = intel_bts_cpumode(bts, sample.ip);
+ sample.pid = btsq->pid;
+ sample.tid = btsq->tid;
+ sample.addr = le64_to_cpu(branch->to);
+ sample.id = btsq->bts->branches_id;
+ sample.stream_id = btsq->bts->branches_id;
+ sample.period = 1;
+ sample.cpu = btsq->cpu;
+ sample.flags = btsq->sample_flags;
+ sample.insn_len = btsq->intel_pt_insn.length;
+ memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
+
+ event.sample.header.type = PERF_RECORD_SAMPLE;
+ event.sample.header.misc = sample.cpumode;
+ event.sample.header.size = sizeof(struct perf_event_header);
+
+ if (bts->synth_opts.inject) {
+ event.sample.header.size = bts->branches_event_size;
+ ret = perf_event__synthesize_sample(&event,
+ bts->branches_sample_type,
+ 0, &sample);
+ if (ret)
+ return ret;
+ }
+
+ ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
+ if (ret)
+ pr_err("Intel BTS: failed to deliver branch event, error %d\n",
+ ret);
+
+ return ret;
+}
+
+static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
+{
+ struct machine *machine = btsq->bts->machine;
+ struct thread *thread;
+ struct addr_location al;
+ unsigned char buf[INTEL_PT_INSN_BUF_SZ];
+ ssize_t len;
+ int x86_64;
+ uint8_t cpumode;
+ int err = -1;
+
+ if (machine__kernel_ip(machine, ip))
+ cpumode = PERF_RECORD_MISC_KERNEL;
+ else
+ cpumode = PERF_RECORD_MISC_USER;
+
+ thread = machine__find_thread(machine, -1, btsq->tid);
+ if (!thread)
+ return -1;
+
+ if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
+ goto out_put;
+
+ len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf,
+ INTEL_PT_INSN_BUF_SZ);
+ if (len <= 0)
+ goto out_put;
+
+ /* Load maps to ensure dso->is_64_bit has been updated */
+ map__load(al.map);
+
+ x86_64 = al.map->dso->is_64_bit;
+
+ if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
+ goto out_put;
+
+ err = 0;
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
+ pid_t tid, u64 ip)
+{
+ union perf_event event;
+ int err;
+
+ auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
+ "Failed to get instruction");
+
+ err = perf_session__deliver_synth_event(bts->session, &event, NULL);
+ if (err)
+ pr_err("Intel BTS: failed to deliver error event, error %d\n",
+ err);
+
+ return err;
+}
+
+static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
+ struct branch *branch)
+{
+ int err;
+
+ if (!branch->from) {
+ if (branch->to)
+ btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_BEGIN;
+ else
+ btsq->sample_flags = 0;
+ btsq->intel_pt_insn.length = 0;
+ } else if (!branch->to) {
+ btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_END;
+ btsq->intel_pt_insn.length = 0;
+ } else {
+ err = intel_bts_get_next_insn(btsq, branch->from);
+ if (err) {
+ btsq->sample_flags = 0;
+ btsq->intel_pt_insn.length = 0;
+ if (!btsq->bts->synth_opts.errors)
+ return 0;
+ err = intel_bts_synth_error(btsq->bts, btsq->cpu,
+ btsq->pid, btsq->tid,
+ branch->from);
+ return err;
+ }
+ btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
+ /* Check for an async branch into the kernel */
+ if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
+ machine__kernel_ip(btsq->bts->machine, branch->to) &&
+ btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_SYSCALLRET))
+ btsq->sample_flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT;
+ }
+
+ return 0;
+}
+
+static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
+ struct auxtrace_buffer *buffer,
+ struct thread *thread)
+{
+ struct branch *branch;
+ size_t sz, bsz = sizeof(struct branch);
+ u32 filter = btsq->bts->branches_filter;
+ int err = 0;
+
+ if (buffer->use_data) {
+ sz = buffer->use_size;
+ branch = buffer->use_data;
+ } else {
+ sz = buffer->size;
+ branch = buffer->data;
+ }
+
+ if (!btsq->bts->sample_branches)
+ return 0;
+
+ for (; sz > bsz; branch += 1, sz -= bsz) {
+ if (!branch->from && !branch->to)
+ continue;
+ intel_bts_get_branch_type(btsq, branch);
+ if (btsq->bts->synth_opts.thread_stack)
+ thread_stack__event(thread, btsq->sample_flags,
+ le64_to_cpu(branch->from),
+ le64_to_cpu(branch->to),
+ btsq->intel_pt_insn.length,
+ buffer->buffer_nr + 1);
+ if (filter && !(filter & btsq->sample_flags))
+ continue;
+ err = intel_bts_synth_branch_sample(btsq, branch);
+ if (err)
+ break;
+ }
+ return err;
+}
+
+static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
+{
+ struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
+ struct auxtrace_queue *queue;
+ struct thread *thread;
+ int err;
+
+ if (btsq->done)
+ return 1;
+
+ if (btsq->pid == -1) {
+ thread = machine__find_thread(btsq->bts->machine, -1,
+ btsq->tid);
+ if (thread)
+ btsq->pid = thread->pid_;
+ } else {
+ thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
+ btsq->tid);
+ }
+
+ queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
+
+ if (!buffer)
+ buffer = auxtrace_buffer__next(queue, NULL);
+
+ if (!buffer) {
+ if (!btsq->bts->sampling_mode)
+ btsq->done = 1;
+ err = 1;
+ goto out_put;
+ }
+
+ /* Currently there is no support for split buffers */
+ if (buffer->consecutive) {
+ err = -EINVAL;
+ goto out_put;
+ }
+
+ if (!buffer->data) {
+ int fd = perf_data__fd(btsq->bts->session->data);
+
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
+ if (!buffer->data) {
+ err = -ENOMEM;
+ goto out_put;
+ }
+ }
+
+ if (btsq->bts->snapshot_mode && !buffer->consecutive &&
+ intel_bts_do_fix_overlap(queue, buffer)) {
+ err = -ENOMEM;
+ goto out_put;
+ }
+
+ if (!btsq->bts->synth_opts.callchain &&
+ !btsq->bts->synth_opts.thread_stack && thread &&
+ (!old_buffer || btsq->bts->sampling_mode ||
+ (btsq->bts->snapshot_mode && !buffer->consecutive)))
+ thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
+
+ err = intel_bts_process_buffer(btsq, buffer, thread);
+
+ auxtrace_buffer__drop_data(buffer);
+
+ btsq->buffer = auxtrace_buffer__next(queue, buffer);
+ if (btsq->buffer) {
+ if (timestamp)
+ *timestamp = btsq->buffer->reference;
+ } else {
+ if (!btsq->bts->sampling_mode)
+ btsq->done = 1;
+ }
+out_put:
+ thread__put(thread);
+ return err;
+}
+
+static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
+{
+ u64 ts = 0;
+ int ret;
+
+ while (1) {
+ ret = intel_bts_process_queue(btsq, &ts);
+ if (ret < 0)
+ return ret;
+ if (ret)
+ break;
+ }
+ return 0;
+}
+
+static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
+{
+ struct auxtrace_queues *queues = &bts->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ struct auxtrace_queue *queue = &bts->queues.queue_array[i];
+ struct intel_bts_queue *btsq = queue->priv;
+
+ if (btsq && btsq->tid == tid)
+ return intel_bts_flush_queue(btsq);
+ }
+ return 0;
+}
+
+static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
+{
+ while (1) {
+ unsigned int queue_nr;
+ struct auxtrace_queue *queue;
+ struct intel_bts_queue *btsq;
+ u64 ts = 0;
+ int ret;
+
+ if (!bts->heap.heap_cnt)
+ return 0;
+
+ if (bts->heap.heap_array[0].ordinal > timestamp)
+ return 0;
+
+ queue_nr = bts->heap.heap_array[0].queue_nr;
+ queue = &bts->queues.queue_array[queue_nr];
+ btsq = queue->priv;
+
+ auxtrace_heap__pop(&bts->heap);
+
+ ret = intel_bts_process_queue(btsq, &ts);
+ if (ret < 0) {
+ auxtrace_heap__add(&bts->heap, queue_nr, ts);
+ return ret;
+ }
+
+ if (!ret) {
+ ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
+ if (ret < 0)
+ return ret;
+ } else {
+ btsq->on_heap = false;
+ }
+ }
+
+ return 0;
+}
+
+static int intel_bts_process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+ u64 timestamp;
+ int err;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("Intel BTS requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (sample->time && sample->time != (u64)-1)
+ timestamp = perf_time_to_tsc(sample->time, &bts->tc);
+ else
+ timestamp = 0;
+
+ err = intel_bts_update_queues(bts);
+ if (err)
+ return err;
+
+ err = intel_bts_process_queues(bts, timestamp);
+ if (err)
+ return err;
+ if (event->header.type == PERF_RECORD_EXIT) {
+ err = intel_bts_process_tid_exit(bts, event->fork.tid);
+ if (err)
+ return err;
+ }
+
+ if (event->header.type == PERF_RECORD_AUX &&
+ (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
+ bts->synth_opts.errors)
+ err = intel_bts_lost(bts, sample);
+
+ return err;
+}
+
+static int intel_bts_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+
+ if (bts->sampling_mode)
+ return 0;
+
+ if (!bts->data_queued) {
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int fd = perf_data__fd(session->data);
+ int err;
+
+ if (perf_data__is_pipe(session->data)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&bts->queues, session, event,
+ data_offset, &buffer);
+ if (err)
+ return err;
+
+ /* Dump here now we have copied a piped trace out of the pipe */
+ if (dump_trace) {
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ intel_bts_dump_event(bts, buffer->data,
+ buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int intel_bts_flush(struct perf_session *session,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+ int ret;
+
+ if (dump_trace || bts->sampling_mode)
+ return 0;
+
+ if (!tool->ordered_events)
+ return -EINVAL;
+
+ ret = intel_bts_update_queues(bts);
+ if (ret < 0)
+ return ret;
+
+ return intel_bts_process_queues(bts, MAX_TIMESTAMP);
+}
+
+static void intel_bts_free_queue(void *priv)
+{
+ struct intel_bts_queue *btsq = priv;
+
+ if (!btsq)
+ return;
+ free(btsq);
+}
+
+static void intel_bts_free_events(struct perf_session *session)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+ struct auxtrace_queues *queues = &bts->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ intel_bts_free_queue(queues->queue_array[i].priv);
+ queues->queue_array[i].priv = NULL;
+ }
+ auxtrace_queues__free(queues);
+}
+
+static void intel_bts_free(struct perf_session *session)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+
+ auxtrace_heap__free(&bts->heap);
+ intel_bts_free_events(session);
+ session->auxtrace = NULL;
+ free(bts);
+}
+
+struct intel_bts_synth {
+ struct perf_tool dummy_tool;
+ struct perf_session *session;
+};
+
+static int intel_bts_event_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct intel_bts_synth *intel_bts_synth =
+ container_of(tool, struct intel_bts_synth, dummy_tool);
+
+ return perf_session__deliver_synth_event(intel_bts_synth->session,
+ event, NULL);
+}
+
+static int intel_bts_synth_event(struct perf_session *session,
+ struct perf_event_attr *attr, u64 id)
+{
+ struct intel_bts_synth intel_bts_synth;
+
+ memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
+ intel_bts_synth.session = session;
+
+ return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
+ &id, intel_bts_event_synth);
+}
+
+static int intel_bts_synth_events(struct intel_bts *bts,
+ struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr;
+ bool found = false;
+ u64 id;
+ int err;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == bts->pmu_type && evsel->ids) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_debug("There are no selected events with Intel BTS data\n");
+ return 0;
+ }
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.size = sizeof(struct perf_event_attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+ PERF_SAMPLE_PERIOD;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
+ attr.exclude_user = evsel->attr.exclude_user;
+ attr.exclude_kernel = evsel->attr.exclude_kernel;
+ attr.exclude_hv = evsel->attr.exclude_hv;
+ attr.exclude_host = evsel->attr.exclude_host;
+ attr.exclude_guest = evsel->attr.exclude_guest;
+ attr.sample_id_all = evsel->attr.sample_id_all;
+ attr.read_format = evsel->attr.read_format;
+
+ id = evsel->id[0] + 1000000000;
+ if (!id)
+ id = 1;
+
+ if (bts->synth_opts.branches) {
+ attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+ attr.sample_period = 1;
+ attr.sample_type |= PERF_SAMPLE_ADDR;
+ pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+ id, (u64)attr.sample_type);
+ err = intel_bts_synth_event(session, &attr, id);
+ if (err) {
+ pr_err("%s: failed to synthesize 'branches' event type\n",
+ __func__);
+ return err;
+ }
+ bts->sample_branches = true;
+ bts->branches_sample_type = attr.sample_type;
+ bts->branches_id = id;
+ /*
+ * We only use sample types from PERF_SAMPLE_MASK so we can use
+ * __perf_evsel__sample_size() here.
+ */
+ bts->branches_event_size = sizeof(struct sample_event) +
+ __perf_evsel__sample_size(attr.sample_type);
+ }
+
+ return 0;
+}
+
+static const char * const intel_bts_info_fmts[] = {
+ [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n",
+ [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
+ [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
+ [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n",
+ [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
+ [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
+};
+
+static void intel_bts_print_info(u64 *arr, int start, int finish)
+{
+ int i;
+
+ if (!dump_trace)
+ return;
+
+ for (i = start; i <= finish; i++)
+ fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
+}
+
+int intel_bts_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
+ size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
+ struct intel_bts *bts;
+ int err;
+
+ if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
+ min_sz)
+ return -EINVAL;
+
+ bts = zalloc(sizeof(struct intel_bts));
+ if (!bts)
+ return -ENOMEM;
+
+ err = auxtrace_queues__init(&bts->queues);
+ if (err)
+ goto err_free;
+
+ bts->session = session;
+ bts->machine = &session->machines.host; /* No kvm support */
+ bts->auxtrace_type = auxtrace_info->type;
+ bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
+ bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
+ bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
+ bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
+ bts->cap_user_time_zero =
+ auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
+ bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
+
+ bts->sampling_mode = false;
+
+ bts->auxtrace.process_event = intel_bts_process_event;
+ bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
+ bts->auxtrace.flush_events = intel_bts_flush;
+ bts->auxtrace.free_events = intel_bts_free_events;
+ bts->auxtrace.free = intel_bts_free;
+ session->auxtrace = &bts->auxtrace;
+
+ intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
+ INTEL_BTS_SNAPSHOT_MODE);
+
+ if (dump_trace)
+ return 0;
+
+ if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
+ bts->synth_opts = *session->itrace_synth_opts;
+ } else {
+ itrace_synth_opts__set_default(&bts->synth_opts);
+ if (session->itrace_synth_opts)
+ bts->synth_opts.thread_stack =
+ session->itrace_synth_opts->thread_stack;
+ }
+
+ if (bts->synth_opts.calls)
+ bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_TRACE_END;
+ if (bts->synth_opts.returns)
+ bts->branches_filter |= PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_TRACE_BEGIN;
+
+ err = intel_bts_synth_events(bts, session);
+ if (err)
+ goto err_free_queues;
+
+ err = auxtrace_queues__process_index(&bts->queues, session);
+ if (err)
+ goto err_free_queues;
+
+ if (bts->queues.populated)
+ bts->data_queued = true;
+
+ return 0;
+
+err_free_queues:
+ auxtrace_queues__free(&bts->queues);
+ session->auxtrace = NULL;
+err_free:
+ free(bts);
+ return err;
+}
diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h
new file mode 100644
index 000000000..ca65e21b3
--- /dev/null
+++ b/tools/perf/util/intel-bts.h
@@ -0,0 +1,43 @@
+/*
+ * intel-bts.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__PERF_INTEL_BTS_H__
+#define INCLUDE__PERF_INTEL_BTS_H__
+
+#define INTEL_BTS_PMU_NAME "intel_bts"
+
+enum {
+ INTEL_BTS_PMU_TYPE,
+ INTEL_BTS_TIME_SHIFT,
+ INTEL_BTS_TIME_MULT,
+ INTEL_BTS_TIME_ZERO,
+ INTEL_BTS_CAP_USER_TIME_ZERO,
+ INTEL_BTS_SNAPSHOT_MODE,
+ INTEL_BTS_AUXTRACE_PRIV_MAX,
+};
+
+#define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64))
+
+struct auxtrace_record;
+struct perf_tool;
+union perf_event;
+struct perf_session;
+
+struct auxtrace_record *intel_bts_recording_init(int *err);
+
+int intel_bts_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build
new file mode 100644
index 000000000..1b704fbea
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/Build
@@ -0,0 +1,36 @@
+libperf-$(CONFIG_AUXTRACE) += intel-pt-pkt-decoder.o intel-pt-insn-decoder.o intel-pt-log.o intel-pt-decoder.o
+
+inat_tables_script = util/intel-pt-decoder/gen-insn-attr-x86.awk
+inat_tables_maps = util/intel-pt-decoder/x86-opcode-map.txt
+
+$(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+ $(call rule_mkdir)
+ @$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@ || rm -f $@
+
+# Busybox's diff doesn't have -I, avoid warning in the case
+
+$(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c
+ @(diff -I 2>&1 | grep -q 'option requires an argument' && \
+ test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \
+ ((diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/insn.c' differs from latest version at 'arch/x86/lib/insn.c'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/inat.c' differs from latest version at 'arch/x86/lib/inat.c'" >&2)) && \
+ ((diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder map file at 'tools/perf/util/intel-pt-decoder/x86-opcode-map.txt' differs from latest version at 'arch/x86/lib/x86-opcode-map.txt'" >&2)) && \
+ ((diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder script at 'tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk' differs from latest version at 'arch/x86/tools/gen-insn-attr-x86.awk'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/insn.h' differs from latest version at 'arch/x86/include/asm/insn.h'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat.h' differs from latest version at 'arch/x86/include/asm/inat.h'" >&2)) && \
+ ((diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) || \
+ (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat_types.h' differs from latest version at 'arch/x86/include/asm/inat_types.h'" >&2)))) || true
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
+
+CFLAGS_intel-pt-insn-decoder.o += -I$(OUTPUT)util/intel-pt-decoder
+
+ifeq ($(CC_NO_CLANG), 1)
+ CFLAGS_intel-pt-insn-decoder.o += -Wno-override-init
+endif
diff --git a/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
new file mode 100644
index 000000000..ddd5c4c21
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk
@@ -0,0 +1,392 @@
+#!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+# gen-insn-attr-x86.awk: Instruction attribute table generator
+# Written by Masami Hiramatsu <mhiramat@redhat.com>
+#
+# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
+
+# Awk implementation sanity check
+function check_awk_implement() {
+ if (sprintf("%x", 0) != "0")
+ return "Your awk has a printf-format problem."
+ return ""
+}
+
+# Clear working vars
+function clear_vars() {
+ delete table
+ delete lptable2
+ delete lptable1
+ delete lptable3
+ eid = -1 # escape id
+ gid = -1 # group id
+ aid = -1 # AVX id
+ tname = ""
+}
+
+BEGIN {
+ # Implementation error checking
+ awkchecked = check_awk_implement()
+ if (awkchecked != "") {
+ print "Error: " awkchecked > "/dev/stderr"
+ print "Please try to use gawk." > "/dev/stderr"
+ exit 1
+ }
+
+ # Setup generating tables
+ print "/* x86 opcode map generated from x86-opcode-map.txt */"
+ print "/* Do not change this code. */\n"
+ ggid = 1
+ geid = 1
+ gaid = 0
+ delete etable
+ delete gtable
+ delete atable
+
+ opnd_expr = "^[A-Za-z/]"
+ ext_expr = "^\\("
+ sep_expr = "^\\|$"
+ group_expr = "^Grp[0-9A-Za-z]+"
+
+ imm_expr = "^[IJAOL][a-z]"
+ imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+ imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+ imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
+ imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
+ imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
+ imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
+ imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+ imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+ imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
+ imm_flag["Ob"] = "INAT_MOFFSET"
+ imm_flag["Ov"] = "INAT_MOFFSET"
+ imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+
+ modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
+ force64_expr = "\\([df]64\\)"
+ rex_expr = "^REX(\\.[XRWB]+)*"
+ fpu_expr = "^ESC" # TODO
+
+ lprefix1_expr = "\\((66|!F3)\\)"
+ lprefix2_expr = "\\(F3\\)"
+ lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
+ lprefix_expr = "\\((66|F2|F3)\\)"
+ max_lprefix = 4
+
+ # All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
+ # accepts VEX prefix
+ vexok_opcode_expr = "^[vk].*"
+ vexok_expr = "\\(v1\\)"
+ # All opcodes with (v) superscript supports *only* VEX prefix
+ vexonly_expr = "\\(v\\)"
+ # All opcodes with (ev) superscript supports *only* EVEX prefix
+ evexonly_expr = "\\(ev\\)"
+
+ prefix_expr = "\\(Prefix\\)"
+ prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
+ prefix_num["REPNE"] = "INAT_PFX_REPNE"
+ prefix_num["REP/REPE"] = "INAT_PFX_REPE"
+ prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
+ prefix_num["XRELEASE"] = "INAT_PFX_REPE"
+ prefix_num["LOCK"] = "INAT_PFX_LOCK"
+ prefix_num["SEG=CS"] = "INAT_PFX_CS"
+ prefix_num["SEG=DS"] = "INAT_PFX_DS"
+ prefix_num["SEG=ES"] = "INAT_PFX_ES"
+ prefix_num["SEG=FS"] = "INAT_PFX_FS"
+ prefix_num["SEG=GS"] = "INAT_PFX_GS"
+ prefix_num["SEG=SS"] = "INAT_PFX_SS"
+ prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
+ prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
+ prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
+ prefix_num["EVEX"] = "INAT_PFX_EVEX"
+
+ clear_vars()
+}
+
+function semantic_error(msg) {
+ print "Semantic error at " NR ": " msg > "/dev/stderr"
+ exit 1
+}
+
+function debug(msg) {
+ print "DEBUG: " msg
+}
+
+function array_size(arr, i,c) {
+ c = 0
+ for (i in arr)
+ c++
+ return c
+}
+
+/^Table:/ {
+ print "/* " $0 " */"
+ if (tname != "")
+ semantic_error("Hit Table: before EndTable:.");
+}
+
+/^Referrer:/ {
+ if (NF != 1) {
+ # escape opcode table
+ ref = ""
+ for (i = 2; i <= NF; i++)
+ ref = ref $i
+ eid = escape[ref]
+ tname = sprintf("inat_escape_table_%d", eid)
+ }
+}
+
+/^AVXcode:/ {
+ if (NF != 1) {
+ # AVX/escape opcode table
+ aid = $2
+ if (gaid <= aid)
+ gaid = aid + 1
+ if (tname == "") # AVX only opcode table
+ tname = sprintf("inat_avx_table_%d", $2)
+ }
+ if (aid == -1 && eid == -1) # primary opcode table
+ tname = "inat_primary_table"
+}
+
+/^GrpTable:/ {
+ print "/* " $0 " */"
+ if (!($2 in group))
+ semantic_error("No group: " $2 )
+ gid = group[$2]
+ tname = "inat_group_table_" gid
+}
+
+function print_table(tbl,name,fmt,n)
+{
+ print "const insn_attr_t " name " = {"
+ for (i = 0; i < n; i++) {
+ id = sprintf(fmt, i)
+ if (tbl[id])
+ print " [" id "] = " tbl[id] ","
+ }
+ print "};"
+}
+
+/^EndTable/ {
+ if (gid != -1) {
+ # print group tables
+ if (array_size(table) != 0) {
+ print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,0] = tname
+ }
+ if (array_size(lptable1) != 0) {
+ print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,1] = tname "_1"
+ }
+ if (array_size(lptable2) != 0) {
+ print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,2] = tname "_2"
+ }
+ if (array_size(lptable3) != 0) {
+ print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,3] = tname "_3"
+ }
+ } else {
+ # print primary/escaped tables
+ if (array_size(table) != 0) {
+ print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,0] = tname
+ if (aid >= 0)
+ atable[aid,0] = tname
+ }
+ if (array_size(lptable1) != 0) {
+ print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,1] = tname "_1"
+ if (aid >= 0)
+ atable[aid,1] = tname "_1"
+ }
+ if (array_size(lptable2) != 0) {
+ print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,2] = tname "_2"
+ if (aid >= 0)
+ atable[aid,2] = tname "_2"
+ }
+ if (array_size(lptable3) != 0) {
+ print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,3] = tname "_3"
+ if (aid >= 0)
+ atable[aid,3] = tname "_3"
+ }
+ }
+ print ""
+ clear_vars()
+}
+
+function add_flags(old,new) {
+ if (old && new)
+ return old " | " new
+ else if (old)
+ return old
+ else
+ return new
+}
+
+# convert operands to flags.
+function convert_operands(count,opnd, i,j,imm,mod)
+{
+ imm = null
+ mod = null
+ for (j = 1; j <= count; j++) {
+ i = opnd[j]
+ if (match(i, imm_expr) == 1) {
+ if (!imm_flag[i])
+ semantic_error("Unknown imm opnd: " i)
+ if (imm) {
+ if (i != "Ib")
+ semantic_error("Second IMM error")
+ imm = add_flags(imm, "INAT_SCNDIMM")
+ } else
+ imm = imm_flag[i]
+ } else if (match(i, modrm_expr))
+ mod = "INAT_MODRM"
+ }
+ return add_flags(imm, mod)
+}
+
+/^[0-9a-f]+\:/ {
+ if (NR == 1)
+ next
+ # get index
+ idx = "0x" substr($1, 1, index($1,":") - 1)
+ if (idx in table)
+ semantic_error("Redefine " idx " in " tname)
+
+ # check if escaped opcode
+ if ("escape" == $2) {
+ if ($3 != "#")
+ semantic_error("No escaped name")
+ ref = ""
+ for (i = 4; i <= NF; i++)
+ ref = ref $i
+ if (ref in escape)
+ semantic_error("Redefine escape (" ref ")")
+ escape[ref] = geid
+ geid++
+ table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
+ next
+ }
+
+ variant = null
+ # converts
+ i = 2
+ while (i <= NF) {
+ opcode = $(i++)
+ delete opnds
+ ext = null
+ flags = null
+ opnd = null
+ # parse one opcode
+ if (match($i, opnd_expr)) {
+ opnd = $i
+ count = split($(i++), opnds, ",")
+ flags = convert_operands(count, opnds)
+ }
+ if (match($i, ext_expr))
+ ext = $(i++)
+ if (match($i, sep_expr))
+ i++
+ else if (i < NF)
+ semantic_error($i " is not a separator")
+
+ # check if group opcode
+ if (match(opcode, group_expr)) {
+ if (!(opcode in group)) {
+ group[opcode] = ggid
+ ggid++
+ }
+ flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
+ }
+ # check force(or default) 64bit
+ if (match(ext, force64_expr))
+ flags = add_flags(flags, "INAT_FORCE64")
+
+ # check REX prefix
+ if (match(opcode, rex_expr))
+ flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
+
+ # check coprocessor escape : TODO
+ if (match(opcode, fpu_expr))
+ flags = add_flags(flags, "INAT_MODRM")
+
+ # check VEX codes
+ if (match(ext, evexonly_expr))
+ flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
+ else if (match(ext, vexonly_expr))
+ flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
+ else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
+ flags = add_flags(flags, "INAT_VEXOK")
+
+ # check prefixes
+ if (match(ext, prefix_expr)) {
+ if (!prefix_num[opcode])
+ semantic_error("Unknown prefix: " opcode)
+ flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
+ }
+ if (length(flags) == 0)
+ continue
+ # check if last prefix
+ if (match(ext, lprefix1_expr)) {
+ lptable1[idx] = add_flags(lptable1[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (match(ext, lprefix2_expr)) {
+ lptable2[idx] = add_flags(lptable2[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (match(ext, lprefix3_expr)) {
+ lptable3[idx] = add_flags(lptable3[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (!match(ext, lprefix_expr)){
+ table[idx] = add_flags(table[idx],flags)
+ }
+ }
+ if (variant)
+ table[idx] = add_flags(table[idx],variant)
+}
+
+END {
+ if (awkchecked != "")
+ exit 1
+ # print escape opcode map's array
+ print "/* Escape opcode map array */"
+ print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < geid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (etable[i,j])
+ print " ["i"]["j"] = "etable[i,j]","
+ print "};\n"
+ # print group opcode map's array
+ print "/* Group opcode map array */"
+ print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < ggid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (gtable[i,j])
+ print " ["i"]["j"] = "gtable[i,j]","
+ print "};\n"
+ # print AVX opcode map's array
+ print "/* AVX opcode map array */"
+ print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < gaid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (atable[i,j])
+ print " ["i"]["j"] = "atable[i,j]","
+ print "};"
+}
diff --git a/tools/perf/util/intel-pt-decoder/inat.c b/tools/perf/util/intel-pt-decoder/inat.c
new file mode 100644
index 000000000..906d94aa0
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/inat.c
@@ -0,0 +1,96 @@
+/*
+ * x86 instruction attribute tables
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include "insn.h"
+
+/* Attribute tables are generated from opcode map */
+#include "inat-tables.c"
+
+/* Attribute search APIs */
+insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
+{
+ return inat_primary_table[opcode];
+}
+
+int inat_get_last_prefix_id(insn_byte_t last_pfx)
+{
+ insn_attr_t lpfx_attr;
+
+ lpfx_attr = inat_get_opcode_attribute(last_pfx);
+ return inat_last_prefix_id(lpfx_attr);
+}
+
+insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
+ insn_attr_t esc_attr)
+{
+ const insn_attr_t *table;
+ int n;
+
+ n = inat_escape_id(esc_attr);
+
+ table = inat_escape_tables[n][0];
+ if (!table)
+ return 0;
+ if (inat_has_variant(table[opcode]) && lpfx_id) {
+ table = inat_escape_tables[n][lpfx_id];
+ if (!table)
+ return 0;
+ }
+ return table[opcode];
+}
+
+insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
+ insn_attr_t grp_attr)
+{
+ const insn_attr_t *table;
+ int n;
+
+ n = inat_group_id(grp_attr);
+
+ table = inat_group_tables[n][0];
+ if (!table)
+ return inat_group_common_attribute(grp_attr);
+ if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
+ table = inat_group_tables[n][lpfx_id];
+ if (!table)
+ return inat_group_common_attribute(grp_attr);
+ }
+ return table[X86_MODRM_REG(modrm)] |
+ inat_group_common_attribute(grp_attr);
+}
+
+insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
+ insn_byte_t vex_p)
+{
+ const insn_attr_t *table;
+ if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
+ return 0;
+ /* At first, this checks the master table */
+ table = inat_avx_tables[vex_m][0];
+ if (!table)
+ return 0;
+ if (!inat_is_group(table[opcode]) && vex_p) {
+ /* If this is not a group, get attribute directly */
+ table = inat_avx_tables[vex_m][vex_p];
+ if (!table)
+ return 0;
+ }
+ return table[opcode];
+}
diff --git a/tools/perf/util/intel-pt-decoder/inat.h b/tools/perf/util/intel-pt-decoder/inat.h
new file mode 100644
index 000000000..52dc8d911
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/inat.h
@@ -0,0 +1,244 @@
+#ifndef _ASM_X86_INAT_H
+#define _ASM_X86_INAT_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include "inat_types.h"
+
+/*
+ * Internal bits. Don't use bitmasks directly, because these bits are
+ * unstable. You should use checking functions.
+ */
+
+#define INAT_OPCODE_TABLE_SIZE 256
+#define INAT_GROUP_TABLE_SIZE 8
+
+/* Legacy last prefixes */
+#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */
+#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */
+#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */
+/* Other Legacy prefixes */
+#define INAT_PFX_LOCK 4 /* 0xF0 */
+#define INAT_PFX_CS 5 /* 0x2E */
+#define INAT_PFX_DS 6 /* 0x3E */
+#define INAT_PFX_ES 7 /* 0x26 */
+#define INAT_PFX_FS 8 /* 0x64 */
+#define INAT_PFX_GS 9 /* 0x65 */
+#define INAT_PFX_SS 10 /* 0x36 */
+#define INAT_PFX_ADDRSZ 11 /* 0x67 */
+/* x86-64 REX prefix */
+#define INAT_PFX_REX 12 /* 0x4X */
+/* AVX VEX prefixes */
+#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
+#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
+#define INAT_PFX_EVEX 15 /* EVEX prefix */
+
+#define INAT_LSTPFX_MAX 3
+#define INAT_LGCPFX_MAX 11
+
+/* Immediate size */
+#define INAT_IMM_BYTE 1
+#define INAT_IMM_WORD 2
+#define INAT_IMM_DWORD 3
+#define INAT_IMM_QWORD 4
+#define INAT_IMM_PTR 5
+#define INAT_IMM_VWORD32 6
+#define INAT_IMM_VWORD 7
+
+/* Legacy prefix */
+#define INAT_PFX_OFFS 0
+#define INAT_PFX_BITS 4
+#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
+#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
+/* Escape opcodes */
+#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS)
+#define INAT_ESC_BITS 2
+#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1)
+#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS)
+/* Group opcodes (1-16) */
+#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS)
+#define INAT_GRP_BITS 5
+#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1)
+#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS)
+/* Immediates */
+#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS)
+#define INAT_IMM_BITS 3
+#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
+/* Flags */
+#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS)
+#define INAT_MODRM (1 << (INAT_FLAG_OFFS))
+#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1))
+#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2))
+#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
+#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
+#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
+#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
+#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7))
+/* Attribute making macros for attribute tables */
+#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
+#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
+#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
+#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)
+
+/* Identifiers for segment registers */
+#define INAT_SEG_REG_IGNORE 0
+#define INAT_SEG_REG_DEFAULT 1
+#define INAT_SEG_REG_CS 2
+#define INAT_SEG_REG_SS 3
+#define INAT_SEG_REG_DS 4
+#define INAT_SEG_REG_ES 5
+#define INAT_SEG_REG_FS 6
+#define INAT_SEG_REG_GS 7
+
+/* Attribute search APIs */
+extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
+extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
+ int lpfx_id,
+ insn_attr_t esc_attr);
+extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
+ int lpfx_id,
+ insn_attr_t esc_attr);
+extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
+ insn_byte_t vex_m,
+ insn_byte_t vex_pp);
+
+/* Attribute checking functions */
+static inline int inat_is_legacy_prefix(insn_attr_t attr)
+{
+ attr &= INAT_PFX_MASK;
+ return attr && attr <= INAT_LGCPFX_MAX;
+}
+
+static inline int inat_is_address_size_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
+}
+
+static inline int inat_is_operand_size_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
+}
+
+static inline int inat_is_rex_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
+}
+
+static inline int inat_last_prefix_id(insn_attr_t attr)
+{
+ if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
+ return 0;
+ else
+ return attr & INAT_PFX_MASK;
+}
+
+static inline int inat_is_vex_prefix(insn_attr_t attr)
+{
+ attr &= INAT_PFX_MASK;
+ return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3 ||
+ attr == INAT_PFX_EVEX;
+}
+
+static inline int inat_is_evex_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_EVEX;
+}
+
+static inline int inat_is_vex3_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
+}
+
+static inline int inat_is_escape(insn_attr_t attr)
+{
+ return attr & INAT_ESC_MASK;
+}
+
+static inline int inat_escape_id(insn_attr_t attr)
+{
+ return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
+}
+
+static inline int inat_is_group(insn_attr_t attr)
+{
+ return attr & INAT_GRP_MASK;
+}
+
+static inline int inat_group_id(insn_attr_t attr)
+{
+ return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
+}
+
+static inline int inat_group_common_attribute(insn_attr_t attr)
+{
+ return attr & ~INAT_GRP_MASK;
+}
+
+static inline int inat_has_immediate(insn_attr_t attr)
+{
+ return attr & INAT_IMM_MASK;
+}
+
+static inline int inat_immediate_size(insn_attr_t attr)
+{
+ return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
+}
+
+static inline int inat_has_modrm(insn_attr_t attr)
+{
+ return attr & INAT_MODRM;
+}
+
+static inline int inat_is_force64(insn_attr_t attr)
+{
+ return attr & INAT_FORCE64;
+}
+
+static inline int inat_has_second_immediate(insn_attr_t attr)
+{
+ return attr & INAT_SCNDIMM;
+}
+
+static inline int inat_has_moffset(insn_attr_t attr)
+{
+ return attr & INAT_MOFFSET;
+}
+
+static inline int inat_has_variant(insn_attr_t attr)
+{
+ return attr & INAT_VARIANT;
+}
+
+static inline int inat_accept_vex(insn_attr_t attr)
+{
+ return attr & INAT_VEXOK;
+}
+
+static inline int inat_must_vex(insn_attr_t attr)
+{
+ return attr & (INAT_VEXONLY | INAT_EVEXONLY);
+}
+
+static inline int inat_must_evex(insn_attr_t attr)
+{
+ return attr & INAT_EVEXONLY;
+}
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/inat_types.h b/tools/perf/util/intel-pt-decoder/inat_types.h
new file mode 100644
index 000000000..cb3c20ce3
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/inat_types.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_X86_INAT_TYPES_H
+#define _ASM_X86_INAT_TYPES_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* Instruction attributes */
+typedef unsigned int insn_attr_t;
+typedef unsigned char insn_byte_t;
+typedef signed int insn_value_t;
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c
new file mode 100644
index 000000000..ca983e2be
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/insn.c
@@ -0,0 +1,606 @@
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004, 2009
+ */
+
+#ifdef __KERNEL__
+#include <linux/string.h>
+#else
+#include <string.h>
+#endif
+#include "inat.h"
+#include "insn.h"
+
+/* Verify next sizeof(t) bytes can be on the same instruction */
+#define validate_next(t, insn, n) \
+ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
+
+#define __get_next(t, insn) \
+ ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+
+#define __peek_nbyte_next(t, insn, n) \
+ ({ t r = *(t*)((insn)->next_byte + n); r; })
+
+#define get_next(t, insn) \
+ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
+
+#define peek_nbyte_next(t, insn, n) \
+ ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
+
+#define peek_next(t, insn) peek_nbyte_next(t, insn, 0)
+
+/**
+ * insn_init() - initialize struct insn
+ * @insn: &struct insn to be initialized
+ * @kaddr: address (in kernel memory) of instruction (or copy thereof)
+ * @x86_64: !0 for 64-bit kernel or 64-bit app
+ */
+void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
+{
+ /*
+ * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
+ * even if the input buffer is long enough to hold them.
+ */
+ if (buf_len > MAX_INSN_SIZE)
+ buf_len = MAX_INSN_SIZE;
+
+ memset(insn, 0, sizeof(*insn));
+ insn->kaddr = kaddr;
+ insn->end_kaddr = kaddr + buf_len;
+ insn->next_byte = kaddr;
+ insn->x86_64 = x86_64 ? 1 : 0;
+ insn->opnd_bytes = 4;
+ if (x86_64)
+ insn->addr_bytes = 8;
+ else
+ insn->addr_bytes = 4;
+}
+
+/**
+ * insn_get_prefixes - scan x86 instruction prefix bytes
+ * @insn: &struct insn containing instruction
+ *
+ * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
+ * to point to the (first) opcode. No effect if @insn->prefixes.got
+ * is already set.
+ */
+void insn_get_prefixes(struct insn *insn)
+{
+ struct insn_field *prefixes = &insn->prefixes;
+ insn_attr_t attr;
+ insn_byte_t b, lb;
+ int i, nb;
+
+ if (prefixes->got)
+ return;
+
+ nb = 0;
+ lb = 0;
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ while (inat_is_legacy_prefix(attr)) {
+ /* Skip if same prefix */
+ for (i = 0; i < nb; i++)
+ if (prefixes->bytes[i] == b)
+ goto found;
+ if (nb == 4)
+ /* Invalid instruction */
+ break;
+ prefixes->bytes[nb++] = b;
+ if (inat_is_address_size_prefix(attr)) {
+ /* address size switches 2/4 or 4/8 */
+ if (insn->x86_64)
+ insn->addr_bytes ^= 12;
+ else
+ insn->addr_bytes ^= 6;
+ } else if (inat_is_operand_size_prefix(attr)) {
+ /* oprand size switches 2/4 */
+ insn->opnd_bytes ^= 6;
+ }
+found:
+ prefixes->nbytes++;
+ insn->next_byte++;
+ lb = b;
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ }
+ /* Set the last prefix */
+ if (lb && lb != insn->prefixes.bytes[3]) {
+ if (unlikely(insn->prefixes.bytes[3])) {
+ /* Swap the last prefix */
+ b = insn->prefixes.bytes[3];
+ for (i = 0; i < nb; i++)
+ if (prefixes->bytes[i] == lb)
+ prefixes->bytes[i] = b;
+ }
+ insn->prefixes.bytes[3] = lb;
+ }
+
+ /* Decode REX prefix */
+ if (insn->x86_64) {
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ if (inat_is_rex_prefix(attr)) {
+ insn->rex_prefix.value = b;
+ insn->rex_prefix.nbytes = 1;
+ insn->next_byte++;
+ if (X86_REX_W(b))
+ /* REX.W overrides opnd_size */
+ insn->opnd_bytes = 8;
+ }
+ }
+ insn->rex_prefix.got = 1;
+
+ /* Decode VEX prefix */
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ if (inat_is_vex_prefix(attr)) {
+ insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
+ if (!insn->x86_64) {
+ /*
+ * In 32-bits mode, if the [7:6] bits (mod bits of
+ * ModRM) on the second byte are not 11b, it is
+ * LDS or LES or BOUND.
+ */
+ if (X86_MODRM_MOD(b2) != 3)
+ goto vex_end;
+ }
+ insn->vex_prefix.bytes[0] = b;
+ insn->vex_prefix.bytes[1] = b2;
+ if (inat_is_evex_prefix(attr)) {
+ b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+ insn->vex_prefix.bytes[2] = b2;
+ b2 = peek_nbyte_next(insn_byte_t, insn, 3);
+ insn->vex_prefix.bytes[3] = b2;
+ insn->vex_prefix.nbytes = 4;
+ insn->next_byte += 4;
+ if (insn->x86_64 && X86_VEX_W(b2))
+ /* VEX.W overrides opnd_size */
+ insn->opnd_bytes = 8;
+ } else if (inat_is_vex3_prefix(attr)) {
+ b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+ insn->vex_prefix.bytes[2] = b2;
+ insn->vex_prefix.nbytes = 3;
+ insn->next_byte += 3;
+ if (insn->x86_64 && X86_VEX_W(b2))
+ /* VEX.W overrides opnd_size */
+ insn->opnd_bytes = 8;
+ } else {
+ /*
+ * For VEX2, fake VEX3-like byte#2.
+ * Makes it easier to decode vex.W, vex.vvvv,
+ * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
+ */
+ insn->vex_prefix.bytes[2] = b2 & 0x7f;
+ insn->vex_prefix.nbytes = 2;
+ insn->next_byte += 2;
+ }
+ }
+vex_end:
+ insn->vex_prefix.got = 1;
+
+ prefixes->got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_opcode - collect opcode(s)
+ * @insn: &struct insn containing instruction
+ *
+ * Populates @insn->opcode, updates @insn->next_byte to point past the
+ * opcode byte(s), and set @insn->attr (except for groups).
+ * If necessary, first collects any preceding (prefix) bytes.
+ * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
+ * is already 1.
+ */
+void insn_get_opcode(struct insn *insn)
+{
+ struct insn_field *opcode = &insn->opcode;
+ insn_byte_t op;
+ int pfx_id;
+ if (opcode->got)
+ return;
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+
+ /* Get first opcode */
+ op = get_next(insn_byte_t, insn);
+ opcode->bytes[0] = op;
+ opcode->nbytes = 1;
+
+ /* Check if there is VEX prefix or not */
+ if (insn_is_avx(insn)) {
+ insn_byte_t m, p;
+ m = insn_vex_m_bits(insn);
+ p = insn_vex_p_bits(insn);
+ insn->attr = inat_get_avx_attribute(op, m, p);
+ if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
+ (!inat_accept_vex(insn->attr) &&
+ !inat_is_group(insn->attr)))
+ insn->attr = 0; /* This instruction is bad */
+ goto end; /* VEX has only 1 byte for opcode */
+ }
+
+ insn->attr = inat_get_opcode_attribute(op);
+ while (inat_is_escape(insn->attr)) {
+ /* Get escaped opcode */
+ op = get_next(insn_byte_t, insn);
+ opcode->bytes[opcode->nbytes++] = op;
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
+ }
+ if (inat_must_vex(insn->attr))
+ insn->attr = 0; /* This instruction is bad */
+end:
+ opcode->got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_modrm - collect ModRM byte, if any
+ * @insn: &struct insn containing instruction
+ *
+ * Populates @insn->modrm and updates @insn->next_byte to point past the
+ * ModRM byte, if any. If necessary, first collects the preceding bytes
+ * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
+ */
+void insn_get_modrm(struct insn *insn)
+{
+ struct insn_field *modrm = &insn->modrm;
+ insn_byte_t pfx_id, mod;
+ if (modrm->got)
+ return;
+ if (!insn->opcode.got)
+ insn_get_opcode(insn);
+
+ if (inat_has_modrm(insn->attr)) {
+ mod = get_next(insn_byte_t, insn);
+ modrm->value = mod;
+ modrm->nbytes = 1;
+ if (inat_is_group(insn->attr)) {
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_group_attribute(mod, pfx_id,
+ insn->attr);
+ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
+ insn->attr = 0; /* This is bad */
+ }
+ }
+
+ if (insn->x86_64 && inat_is_force64(insn->attr))
+ insn->opnd_bytes = 8;
+ modrm->got = 1;
+
+err_out:
+ return;
+}
+
+
+/**
+ * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte. No effect if @insn->x86_64 is 0.
+ */
+int insn_rip_relative(struct insn *insn)
+{
+ struct insn_field *modrm = &insn->modrm;
+
+ if (!insn->x86_64)
+ return 0;
+ if (!modrm->got)
+ insn_get_modrm(insn);
+ /*
+ * For rip-relative instructions, the mod field (top 2 bits)
+ * is zero and the r/m field (bottom 3 bits) is 0x5.
+ */
+ return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+}
+
+/**
+ * insn_get_sib() - Get the SIB byte of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.
+ */
+void insn_get_sib(struct insn *insn)
+{
+ insn_byte_t modrm;
+
+ if (insn->sib.got)
+ return;
+ if (!insn->modrm.got)
+ insn_get_modrm(insn);
+ if (insn->modrm.nbytes) {
+ modrm = (insn_byte_t)insn->modrm.value;
+ if (insn->addr_bytes != 2 &&
+ X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
+ insn->sib.value = get_next(insn_byte_t, insn);
+ insn->sib.nbytes = 1;
+ }
+ }
+ insn->sib.got = 1;
+
+err_out:
+ return;
+}
+
+
+/**
+ * insn_get_displacement() - Get the displacement of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * SIB byte.
+ * Displacement value is sign-expanded.
+ */
+void insn_get_displacement(struct insn *insn)
+{
+ insn_byte_t mod, rm, base;
+
+ if (insn->displacement.got)
+ return;
+ if (!insn->sib.got)
+ insn_get_sib(insn);
+ if (insn->modrm.nbytes) {
+ /*
+ * Interpreting the modrm byte:
+ * mod = 00 - no displacement fields (exceptions below)
+ * mod = 01 - 1-byte displacement field
+ * mod = 10 - displacement field is 4 bytes, or 2 bytes if
+ * address size = 2 (0x67 prefix in 32-bit mode)
+ * mod = 11 - no memory operand
+ *
+ * If address size = 2...
+ * mod = 00, r/m = 110 - displacement field is 2 bytes
+ *
+ * If address size != 2...
+ * mod != 11, r/m = 100 - SIB byte exists
+ * mod = 00, SIB base = 101 - displacement field is 4 bytes
+ * mod = 00, r/m = 101 - rip-relative addressing, displacement
+ * field is 4 bytes
+ */
+ mod = X86_MODRM_MOD(insn->modrm.value);
+ rm = X86_MODRM_RM(insn->modrm.value);
+ base = X86_SIB_BASE(insn->sib.value);
+ if (mod == 3)
+ goto out;
+ if (mod == 1) {
+ insn->displacement.value = get_next(signed char, insn);
+ insn->displacement.nbytes = 1;
+ } else if (insn->addr_bytes == 2) {
+ if ((mod == 0 && rm == 6) || mod == 2) {
+ insn->displacement.value =
+ get_next(short, insn);
+ insn->displacement.nbytes = 2;
+ }
+ } else {
+ if ((mod == 0 && rm == 5) || mod == 2 ||
+ (mod == 0 && base == 5)) {
+ insn->displacement.value = get_next(int, insn);
+ insn->displacement.nbytes = 4;
+ }
+ }
+ }
+out:
+ insn->displacement.got = 1;
+
+err_out:
+ return;
+}
+
+/* Decode moffset16/32/64. Return 0 if failed */
+static int __get_moffset(struct insn *insn)
+{
+ switch (insn->addr_bytes) {
+ case 2:
+ insn->moffset1.value = get_next(short, insn);
+ insn->moffset1.nbytes = 2;
+ break;
+ case 4:
+ insn->moffset1.value = get_next(int, insn);
+ insn->moffset1.nbytes = 4;
+ break;
+ case 8:
+ insn->moffset1.value = get_next(int, insn);
+ insn->moffset1.nbytes = 4;
+ insn->moffset2.value = get_next(int, insn);
+ insn->moffset2.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->moffset1.got = insn->moffset2.got = 1;
+
+ return 1;
+
+err_out:
+ return 0;
+}
+
+/* Decode imm v32(Iz). Return 0 if failed */
+static int __get_immv32(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate.value = get_next(short, insn);
+ insn->immediate.nbytes = 2;
+ break;
+ case 4:
+ case 8:
+ insn->immediate.value = get_next(int, insn);
+ insn->immediate.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+
+ return 1;
+
+err_out:
+ return 0;
+}
+
+/* Decode imm v64(Iv/Ov), Return 0 if failed */
+static int __get_immv(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate1.value = get_next(short, insn);
+ insn->immediate1.nbytes = 2;
+ break;
+ case 4:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ break;
+ case 8:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ insn->immediate2.value = get_next(int, insn);
+ insn->immediate2.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->immediate1.got = insn->immediate2.got = 1;
+
+ return 1;
+err_out:
+ return 0;
+}
+
+/* Decode ptr16:16/32(Ap) */
+static int __get_immptr(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate1.value = get_next(short, insn);
+ insn->immediate1.nbytes = 2;
+ break;
+ case 4:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ break;
+ case 8:
+ /* ptr16:64 is not exist (no segment) */
+ return 0;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->immediate2.value = get_next(unsigned short, insn);
+ insn->immediate2.nbytes = 2;
+ insn->immediate1.got = insn->immediate2.got = 1;
+
+ return 1;
+err_out:
+ return 0;
+}
+
+/**
+ * insn_get_immediate() - Get the immediates of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * displacement bytes.
+ * Basically, most of immediates are sign-expanded. Unsigned-value can be
+ * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ */
+void insn_get_immediate(struct insn *insn)
+{
+ if (insn->immediate.got)
+ return;
+ if (!insn->displacement.got)
+ insn_get_displacement(insn);
+
+ if (inat_has_moffset(insn->attr)) {
+ if (!__get_moffset(insn))
+ goto err_out;
+ goto done;
+ }
+
+ if (!inat_has_immediate(insn->attr))
+ /* no immediates */
+ goto done;
+
+ switch (inat_immediate_size(insn->attr)) {
+ case INAT_IMM_BYTE:
+ insn->immediate.value = get_next(signed char, insn);
+ insn->immediate.nbytes = 1;
+ break;
+ case INAT_IMM_WORD:
+ insn->immediate.value = get_next(short, insn);
+ insn->immediate.nbytes = 2;
+ break;
+ case INAT_IMM_DWORD:
+ insn->immediate.value = get_next(int, insn);
+ insn->immediate.nbytes = 4;
+ break;
+ case INAT_IMM_QWORD:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ insn->immediate2.value = get_next(int, insn);
+ insn->immediate2.nbytes = 4;
+ break;
+ case INAT_IMM_PTR:
+ if (!__get_immptr(insn))
+ goto err_out;
+ break;
+ case INAT_IMM_VWORD32:
+ if (!__get_immv32(insn))
+ goto err_out;
+ break;
+ case INAT_IMM_VWORD:
+ if (!__get_immv(insn))
+ goto err_out;
+ break;
+ default:
+ /* Here, insn must have an immediate, but failed */
+ goto err_out;
+ }
+ if (inat_has_second_immediate(insn->attr)) {
+ insn->immediate2.value = get_next(signed char, insn);
+ insn->immediate2.nbytes = 1;
+ }
+done:
+ insn->immediate.got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_length() - Get the length of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * immediates bytes.
+ */
+void insn_get_length(struct insn *insn)
+{
+ if (insn->length)
+ return;
+ if (!insn->immediate.got)
+ insn_get_immediate(insn);
+ insn->length = (unsigned char)((unsigned long)insn->next_byte
+ - (unsigned long)insn->kaddr);
+}
diff --git a/tools/perf/util/intel-pt-decoder/insn.h b/tools/perf/util/intel-pt-decoder/insn.h
new file mode 100644
index 000000000..1a01d0a41
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/insn.h
@@ -0,0 +1,244 @@
+#ifndef _ASM_X86_INSN_H
+#define _ASM_X86_INSN_H
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+/* insn_attr_t is defined in inat.h */
+#include "inat.h"
+
+struct insn_field {
+ union {
+ insn_value_t value;
+ insn_byte_t bytes[4];
+ };
+ /* !0 if we've run insn_get_xxx() for this field */
+ unsigned char got;
+ unsigned char nbytes;
+};
+
+struct insn {
+ struct insn_field prefixes; /*
+ * Prefixes
+ * prefixes.bytes[3]: last prefix
+ */
+ struct insn_field rex_prefix; /* REX prefix */
+ struct insn_field vex_prefix; /* VEX prefix */
+ struct insn_field opcode; /*
+ * opcode.bytes[0]: opcode1
+ * opcode.bytes[1]: opcode2
+ * opcode.bytes[2]: opcode3
+ */
+ struct insn_field modrm;
+ struct insn_field sib;
+ struct insn_field displacement;
+ union {
+ struct insn_field immediate;
+ struct insn_field moffset1; /* for 64bit MOV */
+ struct insn_field immediate1; /* for 64bit imm or off16/32 */
+ };
+ union {
+ struct insn_field moffset2; /* for 64bit MOV */
+ struct insn_field immediate2; /* for 64bit imm or seg16 */
+ };
+
+ insn_attr_t attr;
+ unsigned char opnd_bytes;
+ unsigned char addr_bytes;
+ unsigned char length;
+ unsigned char x86_64;
+
+ const insn_byte_t *kaddr; /* kernel address of insn to analyze */
+ const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */
+ const insn_byte_t *next_byte;
+};
+
+#define MAX_INSN_SIZE 15
+
+#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
+#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
+#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
+
+#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
+#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
+#define X86_SIB_BASE(sib) ((sib) & 0x07)
+
+#define X86_REX_W(rex) ((rex) & 8)
+#define X86_REX_R(rex) ((rex) & 4)
+#define X86_REX_X(rex) ((rex) & 2)
+#define X86_REX_B(rex) ((rex) & 1)
+
+/* VEX bit flags */
+#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
+#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */
+#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */
+#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
+#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
+/* VEX bit fields */
+#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */
+#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
+#define X86_VEX2_M 1 /* VEX2.M always 1 */
+#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
+
+extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
+extern void insn_get_prefixes(struct insn *insn);
+extern void insn_get_opcode(struct insn *insn);
+extern void insn_get_modrm(struct insn *insn);
+extern void insn_get_sib(struct insn *insn);
+extern void insn_get_displacement(struct insn *insn);
+extern void insn_get_immediate(struct insn *insn);
+extern void insn_get_length(struct insn *insn);
+
+/* Attribute will be determined after getting ModRM (for opcode groups) */
+static inline void insn_get_attribute(struct insn *insn)
+{
+ insn_get_modrm(insn);
+}
+
+/* Instruction uses RIP-relative addressing */
+extern int insn_rip_relative(struct insn *insn);
+
+/* Init insn for kernel text */
+static inline void kernel_insn_init(struct insn *insn,
+ const void *kaddr, int buf_len)
+{
+#ifdef CONFIG_X86_64
+ insn_init(insn, kaddr, buf_len, 1);
+#else /* CONFIG_X86_32 */
+ insn_init(insn, kaddr, buf_len, 0);
+#endif
+}
+
+static inline int insn_is_avx(struct insn *insn)
+{
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+ return (insn->vex_prefix.value != 0);
+}
+
+static inline int insn_is_evex(struct insn *insn)
+{
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+ return (insn->vex_prefix.nbytes == 4);
+}
+
+/* Ensure this instruction is decoded completely */
+static inline int insn_complete(struct insn *insn)
+{
+ return insn->opcode.got && insn->modrm.got && insn->sib.got &&
+ insn->displacement.got && insn->immediate.got;
+}
+
+static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
+ return X86_VEX2_M;
+ else if (insn->vex_prefix.nbytes == 3) /* 3 bytes VEX */
+ return X86_VEX3_M(insn->vex_prefix.bytes[1]);
+ else /* EVEX */
+ return X86_EVEX_M(insn->vex_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
+ return X86_VEX_P(insn->vex_prefix.bytes[1]);
+ else
+ return X86_VEX_P(insn->vex_prefix.bytes[2]);
+}
+
+/* Get the last prefix id from last prefix or VEX prefix */
+static inline int insn_last_prefix_id(struct insn *insn)
+{
+ if (insn_is_avx(insn))
+ return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
+
+ if (insn->prefixes.bytes[3])
+ return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
+
+ return 0;
+}
+
+/* Offset of each field from kaddr */
+static inline int insn_offset_rex_prefix(struct insn *insn)
+{
+ return insn->prefixes.nbytes;
+}
+static inline int insn_offset_vex_prefix(struct insn *insn)
+{
+ return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
+}
+static inline int insn_offset_opcode(struct insn *insn)
+{
+ return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
+}
+static inline int insn_offset_modrm(struct insn *insn)
+{
+ return insn_offset_opcode(insn) + insn->opcode.nbytes;
+}
+static inline int insn_offset_sib(struct insn *insn)
+{
+ return insn_offset_modrm(insn) + insn->modrm.nbytes;
+}
+static inline int insn_offset_displacement(struct insn *insn)
+{
+ return insn_offset_sib(insn) + insn->sib.nbytes;
+}
+static inline int insn_offset_immediate(struct insn *insn)
+{
+ return insn_offset_displacement(insn) + insn->displacement.nbytes;
+}
+
+/**
+ * for_each_insn_prefix() -- Iterate prefixes in the instruction
+ * @insn: Pointer to struct insn.
+ * @idx: Index storage.
+ * @prefix: Prefix byte.
+ *
+ * Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
+ * and the index is stored in @idx (note that this @idx is just for a cursor,
+ * do not change it.)
+ * Since prefixes.nbytes can be bigger than 4 if some prefixes
+ * are repeated, it cannot be used for looping over the prefixes.
+ */
+#define for_each_insn_prefix(insn, idx, prefix) \
+ for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+
+#define POP_SS_OPCODE 0x1f
+#define MOV_SREG_OPCODE 0x8e
+
+/*
+ * Intel SDM Vol.3A 6.8.3 states;
+ * "Any single-step trap that would be delivered following the MOV to SS
+ * instruction or POP to SS instruction (because EFLAGS.TF is 1) is
+ * suppressed."
+ * This function returns true if @insn is MOV SS or POP SS. On these
+ * instructions, single stepping is suppressed.
+ */
+static inline int insn_masking_exception(struct insn *insn)
+{
+ return insn->opcode.bytes[0] == POP_SS_OPCODE ||
+ (insn->opcode.bytes[0] == MOV_SREG_OPCODE &&
+ X86_MODRM_REG(insn->modrm.bytes[0]) == 2);
+}
+
+#endif /* _ASM_X86_INSN_H */
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
new file mode 100644
index 000000000..e2a6c2295
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -0,0 +1,2727 @@
+/*
+ * intel_pt_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <linux/compiler.h>
+
+#include "../cache.h"
+#include "../util.h"
+#include "../auxtrace.h"
+
+#include "intel-pt-insn-decoder.h"
+#include "intel-pt-pkt-decoder.h"
+#include "intel-pt-decoder.h"
+#include "intel-pt-log.h"
+
+#define INTEL_PT_BLK_SIZE 1024
+
+#define BIT63 (((uint64_t)1 << 63))
+
+#define INTEL_PT_RETURN 1
+
+/* Maximum number of loops with no packets consumed i.e. stuck in a loop */
+#define INTEL_PT_MAX_LOOPS 10000
+
+struct intel_pt_blk {
+ struct intel_pt_blk *prev;
+ uint64_t ip[INTEL_PT_BLK_SIZE];
+};
+
+struct intel_pt_stack {
+ struct intel_pt_blk *blk;
+ struct intel_pt_blk *spare;
+ int pos;
+};
+
+enum intel_pt_pkt_state {
+ INTEL_PT_STATE_NO_PSB,
+ INTEL_PT_STATE_NO_IP,
+ INTEL_PT_STATE_ERR_RESYNC,
+ INTEL_PT_STATE_IN_SYNC,
+ INTEL_PT_STATE_TNT_CONT,
+ INTEL_PT_STATE_TNT,
+ INTEL_PT_STATE_TIP,
+ INTEL_PT_STATE_TIP_PGD,
+ INTEL_PT_STATE_FUP,
+ INTEL_PT_STATE_FUP_NO_TIP,
+};
+
+static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
+{
+ switch (pkt_state) {
+ case INTEL_PT_STATE_NO_PSB:
+ case INTEL_PT_STATE_NO_IP:
+ case INTEL_PT_STATE_ERR_RESYNC:
+ case INTEL_PT_STATE_IN_SYNC:
+ case INTEL_PT_STATE_TNT_CONT:
+ return true;
+ case INTEL_PT_STATE_TNT:
+ case INTEL_PT_STATE_TIP:
+ case INTEL_PT_STATE_TIP_PGD:
+ case INTEL_PT_STATE_FUP:
+ case INTEL_PT_STATE_FUP_NO_TIP:
+ return false;
+ default:
+ return true;
+ };
+}
+
+#ifdef INTEL_PT_STRICT
+#define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_NO_PSB
+#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_NO_PSB
+#else
+#define INTEL_PT_STATE_ERR1 (decoder->pkt_state)
+#define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_IP
+#define INTEL_PT_STATE_ERR3 INTEL_PT_STATE_ERR_RESYNC
+#define INTEL_PT_STATE_ERR4 INTEL_PT_STATE_IN_SYNC
+#endif
+
+struct intel_pt_decoder {
+ int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
+ int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
+ uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
+ uint64_t max_insn_cnt, void *data);
+ bool (*pgd_ip)(uint64_t ip, void *data);
+ void *data;
+ struct intel_pt_state state;
+ const unsigned char *buf;
+ size_t len;
+ bool return_compression;
+ bool branch_enable;
+ bool mtc_insn;
+ bool pge;
+ bool have_tma;
+ bool have_cyc;
+ bool fixup_last_mtc;
+ bool have_last_ip;
+ enum intel_pt_param_flags flags;
+ uint64_t pos;
+ uint64_t last_ip;
+ uint64_t ip;
+ uint64_t cr3;
+ uint64_t timestamp;
+ uint64_t tsc_timestamp;
+ uint64_t ref_timestamp;
+ uint64_t sample_timestamp;
+ uint64_t ret_addr;
+ uint64_t ctc_timestamp;
+ uint64_t ctc_delta;
+ uint64_t cycle_cnt;
+ uint64_t cyc_ref_timestamp;
+ uint32_t last_mtc;
+ uint32_t tsc_ctc_ratio_n;
+ uint32_t tsc_ctc_ratio_d;
+ uint32_t tsc_ctc_mult;
+ uint32_t tsc_slip;
+ uint32_t ctc_rem_mask;
+ int mtc_shift;
+ struct intel_pt_stack stack;
+ enum intel_pt_pkt_state pkt_state;
+ struct intel_pt_pkt packet;
+ struct intel_pt_pkt tnt;
+ int pkt_step;
+ int pkt_len;
+ int last_packet_type;
+ unsigned int cbr;
+ unsigned int cbr_seen;
+ unsigned int max_non_turbo_ratio;
+ double max_non_turbo_ratio_fp;
+ double cbr_cyc_to_tsc;
+ double calc_cyc_to_tsc;
+ bool have_calc_cyc_to_tsc;
+ int exec_mode;
+ unsigned int insn_bytes;
+ uint64_t period;
+ enum intel_pt_period_type period_type;
+ uint64_t tot_insn_cnt;
+ uint64_t period_insn_cnt;
+ uint64_t period_mask;
+ uint64_t period_ticks;
+ uint64_t last_masked_timestamp;
+ bool continuous_period;
+ bool overflow;
+ bool set_fup_tx_flags;
+ bool set_fup_ptw;
+ bool set_fup_mwait;
+ bool set_fup_pwre;
+ bool set_fup_exstop;
+ unsigned int fup_tx_flags;
+ unsigned int tx_flags;
+ uint64_t fup_ptw_payload;
+ uint64_t fup_mwait_payload;
+ uint64_t fup_pwre_payload;
+ uint64_t cbr_payload;
+ uint64_t timestamp_insn_cnt;
+ uint64_t sample_insn_cnt;
+ uint64_t stuck_ip;
+ int no_progress;
+ int stuck_ip_prd;
+ int stuck_ip_cnt;
+ const unsigned char *next_buf;
+ size_t next_len;
+ unsigned char temp_buf[INTEL_PT_PKT_MAX_SZ];
+};
+
+static uint64_t intel_pt_lower_power_of_2(uint64_t x)
+{
+ int i;
+
+ for (i = 0; x != 1; i++)
+ x >>= 1;
+
+ return x << i;
+}
+
+static void intel_pt_setup_period(struct intel_pt_decoder *decoder)
+{
+ if (decoder->period_type == INTEL_PT_PERIOD_TICKS) {
+ uint64_t period;
+
+ period = intel_pt_lower_power_of_2(decoder->period);
+ decoder->period_mask = ~(period - 1);
+ decoder->period_ticks = period;
+ }
+}
+
+static uint64_t multdiv(uint64_t t, uint32_t n, uint32_t d)
+{
+ if (!d)
+ return 0;
+ return (t / d) * n + ((t % d) * n) / d;
+}
+
+struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
+{
+ struct intel_pt_decoder *decoder;
+
+ if (!params->get_trace || !params->walk_insn)
+ return NULL;
+
+ decoder = zalloc(sizeof(struct intel_pt_decoder));
+ if (!decoder)
+ return NULL;
+
+ decoder->get_trace = params->get_trace;
+ decoder->walk_insn = params->walk_insn;
+ decoder->pgd_ip = params->pgd_ip;
+ decoder->data = params->data;
+ decoder->return_compression = params->return_compression;
+ decoder->branch_enable = params->branch_enable;
+
+ decoder->flags = params->flags;
+
+ decoder->period = params->period;
+ decoder->period_type = params->period_type;
+
+ decoder->max_non_turbo_ratio = params->max_non_turbo_ratio;
+ decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio;
+
+ intel_pt_setup_period(decoder);
+
+ decoder->mtc_shift = params->mtc_period;
+ decoder->ctc_rem_mask = (1 << decoder->mtc_shift) - 1;
+
+ decoder->tsc_ctc_ratio_n = params->tsc_ctc_ratio_n;
+ decoder->tsc_ctc_ratio_d = params->tsc_ctc_ratio_d;
+
+ if (!decoder->tsc_ctc_ratio_n)
+ decoder->tsc_ctc_ratio_d = 0;
+
+ if (decoder->tsc_ctc_ratio_d) {
+ if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d))
+ decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n /
+ decoder->tsc_ctc_ratio_d;
+ }
+
+ /*
+ * A TSC packet can slip past MTC packets so that the timestamp appears
+ * to go backwards. One estimate is that can be up to about 40 CPU
+ * cycles, which is certainly less than 0x1000 TSC ticks, but accept
+ * slippage an order of magnitude more to be on the safe side.
+ */
+ decoder->tsc_slip = 0x10000;
+
+ intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift);
+ intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n);
+ intel_pt_log("timestamp: tsc_ctc_ratio_d %u\n", decoder->tsc_ctc_ratio_d);
+ intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
+ intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
+
+ return decoder;
+}
+
+static void intel_pt_pop_blk(struct intel_pt_stack *stack)
+{
+ struct intel_pt_blk *blk = stack->blk;
+
+ stack->blk = blk->prev;
+ if (!stack->spare)
+ stack->spare = blk;
+ else
+ free(blk);
+}
+
+static uint64_t intel_pt_pop(struct intel_pt_stack *stack)
+{
+ if (!stack->pos) {
+ if (!stack->blk)
+ return 0;
+ intel_pt_pop_blk(stack);
+ if (!stack->blk)
+ return 0;
+ stack->pos = INTEL_PT_BLK_SIZE;
+ }
+ return stack->blk->ip[--stack->pos];
+}
+
+static int intel_pt_alloc_blk(struct intel_pt_stack *stack)
+{
+ struct intel_pt_blk *blk;
+
+ if (stack->spare) {
+ blk = stack->spare;
+ stack->spare = NULL;
+ } else {
+ blk = malloc(sizeof(struct intel_pt_blk));
+ if (!blk)
+ return -ENOMEM;
+ }
+
+ blk->prev = stack->blk;
+ stack->blk = blk;
+ stack->pos = 0;
+ return 0;
+}
+
+static int intel_pt_push(struct intel_pt_stack *stack, uint64_t ip)
+{
+ int err;
+
+ if (!stack->blk || stack->pos == INTEL_PT_BLK_SIZE) {
+ err = intel_pt_alloc_blk(stack);
+ if (err)
+ return err;
+ }
+
+ stack->blk->ip[stack->pos++] = ip;
+ return 0;
+}
+
+static void intel_pt_clear_stack(struct intel_pt_stack *stack)
+{
+ while (stack->blk)
+ intel_pt_pop_blk(stack);
+ stack->pos = 0;
+}
+
+static void intel_pt_free_stack(struct intel_pt_stack *stack)
+{
+ intel_pt_clear_stack(stack);
+ zfree(&stack->blk);
+ zfree(&stack->spare);
+}
+
+void intel_pt_decoder_free(struct intel_pt_decoder *decoder)
+{
+ intel_pt_free_stack(&decoder->stack);
+ free(decoder);
+}
+
+static int intel_pt_ext_err(int code)
+{
+ switch (code) {
+ case -ENOMEM:
+ return INTEL_PT_ERR_NOMEM;
+ case -ENOSYS:
+ return INTEL_PT_ERR_INTERN;
+ case -EBADMSG:
+ return INTEL_PT_ERR_BADPKT;
+ case -ENODATA:
+ return INTEL_PT_ERR_NODATA;
+ case -EILSEQ:
+ return INTEL_PT_ERR_NOINSN;
+ case -ENOENT:
+ return INTEL_PT_ERR_MISMAT;
+ case -EOVERFLOW:
+ return INTEL_PT_ERR_OVR;
+ case -ENOSPC:
+ return INTEL_PT_ERR_LOST;
+ case -ELOOP:
+ return INTEL_PT_ERR_NELOOP;
+ default:
+ return INTEL_PT_ERR_UNK;
+ }
+}
+
+static const char *intel_pt_err_msgs[] = {
+ [INTEL_PT_ERR_NOMEM] = "Memory allocation failed",
+ [INTEL_PT_ERR_INTERN] = "Internal error",
+ [INTEL_PT_ERR_BADPKT] = "Bad packet",
+ [INTEL_PT_ERR_NODATA] = "No more data",
+ [INTEL_PT_ERR_NOINSN] = "Failed to get instruction",
+ [INTEL_PT_ERR_MISMAT] = "Trace doesn't match instruction",
+ [INTEL_PT_ERR_OVR] = "Overflow packet",
+ [INTEL_PT_ERR_LOST] = "Lost trace data",
+ [INTEL_PT_ERR_UNK] = "Unknown error!",
+ [INTEL_PT_ERR_NELOOP] = "Never-ending loop",
+};
+
+int intel_pt__strerror(int code, char *buf, size_t buflen)
+{
+ if (code < 1 || code >= INTEL_PT_ERR_MAX)
+ code = INTEL_PT_ERR_UNK;
+ strlcpy(buf, intel_pt_err_msgs[code], buflen);
+ return 0;
+}
+
+static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet,
+ uint64_t last_ip)
+{
+ uint64_t ip;
+
+ switch (packet->count) {
+ case 1:
+ ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) |
+ packet->payload;
+ break;
+ case 2:
+ ip = (last_ip & (uint64_t)0xffffffff00000000ULL) |
+ packet->payload;
+ break;
+ case 3:
+ ip = packet->payload;
+ /* Sign-extend 6-byte ip */
+ if (ip & (uint64_t)0x800000000000ULL)
+ ip |= (uint64_t)0xffff000000000000ULL;
+ break;
+ case 4:
+ ip = (last_ip & (uint64_t)0xffff000000000000ULL) |
+ packet->payload;
+ break;
+ case 6:
+ ip = packet->payload;
+ break;
+ default:
+ return 0;
+ }
+
+ return ip;
+}
+
+static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder)
+{
+ decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip);
+ decoder->have_last_ip = true;
+}
+
+static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder)
+{
+ intel_pt_set_last_ip(decoder);
+ decoder->ip = decoder->last_ip;
+}
+
+static void intel_pt_decoder_log_packet(struct intel_pt_decoder *decoder)
+{
+ intel_pt_log_packet(&decoder->packet, decoder->pkt_len, decoder->pos,
+ decoder->buf);
+}
+
+static int intel_pt_bug(struct intel_pt_decoder *decoder)
+{
+ intel_pt_log("ERROR: Internal error\n");
+ decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+ return -ENOSYS;
+}
+
+static inline void intel_pt_clear_tx_flags(struct intel_pt_decoder *decoder)
+{
+ decoder->tx_flags = 0;
+}
+
+static inline void intel_pt_update_in_tx(struct intel_pt_decoder *decoder)
+{
+ decoder->tx_flags = decoder->packet.payload & INTEL_PT_IN_TX;
+}
+
+static int intel_pt_bad_packet(struct intel_pt_decoder *decoder)
+{
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ decoder->pkt_len = 1;
+ decoder->pkt_step = 1;
+ intel_pt_decoder_log_packet(decoder);
+ if (decoder->pkt_state != INTEL_PT_STATE_NO_PSB) {
+ intel_pt_log("ERROR: Bad packet\n");
+ decoder->pkt_state = INTEL_PT_STATE_ERR1;
+ }
+ return -EBADMSG;
+}
+
+static int intel_pt_get_data(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_buffer buffer = { .buf = 0, };
+ int ret;
+
+ decoder->pkt_step = 0;
+
+ intel_pt_log("Getting more data\n");
+ ret = decoder->get_trace(&buffer, decoder->data);
+ if (ret)
+ return ret;
+ decoder->buf = buffer.buf;
+ decoder->len = buffer.len;
+ if (!decoder->len) {
+ intel_pt_log("No more data\n");
+ return -ENODATA;
+ }
+ if (!buffer.consecutive) {
+ decoder->ip = 0;
+ decoder->pkt_state = INTEL_PT_STATE_NO_PSB;
+ decoder->ref_timestamp = buffer.ref_timestamp;
+ decoder->timestamp = 0;
+ decoder->have_tma = false;
+ decoder->state.trace_nr = buffer.trace_nr;
+ intel_pt_log("Reference timestamp 0x%" PRIx64 "\n",
+ decoder->ref_timestamp);
+ return -ENOLINK;
+ }
+
+ return 0;
+}
+
+static int intel_pt_get_next_data(struct intel_pt_decoder *decoder)
+{
+ if (!decoder->next_buf)
+ return intel_pt_get_data(decoder);
+
+ decoder->buf = decoder->next_buf;
+ decoder->len = decoder->next_len;
+ decoder->next_buf = 0;
+ decoder->next_len = 0;
+ return 0;
+}
+
+static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
+{
+ unsigned char *buf = decoder->temp_buf;
+ size_t old_len, len, n;
+ int ret;
+
+ old_len = decoder->len;
+ len = decoder->len;
+ memcpy(buf, decoder->buf, len);
+
+ ret = intel_pt_get_data(decoder);
+ if (ret) {
+ decoder->pos += old_len;
+ return ret < 0 ? ret : -EINVAL;
+ }
+
+ n = INTEL_PT_PKT_MAX_SZ - len;
+ if (n > decoder->len)
+ n = decoder->len;
+ memcpy(buf + len, decoder->buf, n);
+ len += n;
+
+ ret = intel_pt_get_packet(buf, len, &decoder->packet);
+ if (ret < (int)old_len) {
+ decoder->next_buf = decoder->buf;
+ decoder->next_len = decoder->len;
+ decoder->buf = buf;
+ decoder->len = old_len;
+ return intel_pt_bad_packet(decoder);
+ }
+
+ decoder->next_buf = decoder->buf + (ret - old_len);
+ decoder->next_len = decoder->len - (ret - old_len);
+
+ decoder->buf = buf;
+ decoder->len = ret;
+
+ return ret;
+}
+
+struct intel_pt_pkt_info {
+ struct intel_pt_decoder *decoder;
+ struct intel_pt_pkt packet;
+ uint64_t pos;
+ int pkt_len;
+ int last_packet_type;
+ void *data;
+};
+
+typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info);
+
+/* Lookahead packets in current buffer */
+static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
+ intel_pt_pkt_cb_t cb, void *data)
+{
+ struct intel_pt_pkt_info pkt_info;
+ const unsigned char *buf = decoder->buf;
+ size_t len = decoder->len;
+ int ret;
+
+ pkt_info.decoder = decoder;
+ pkt_info.pos = decoder->pos;
+ pkt_info.pkt_len = decoder->pkt_step;
+ pkt_info.last_packet_type = decoder->last_packet_type;
+ pkt_info.data = data;
+
+ while (1) {
+ do {
+ pkt_info.pos += pkt_info.pkt_len;
+ buf += pkt_info.pkt_len;
+ len -= pkt_info.pkt_len;
+
+ if (!len)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ ret = intel_pt_get_packet(buf, len, &pkt_info.packet);
+ if (!ret)
+ return INTEL_PT_NEED_MORE_BYTES;
+ if (ret < 0)
+ return ret;
+
+ pkt_info.pkt_len = ret;
+ } while (pkt_info.packet.type == INTEL_PT_PAD);
+
+ ret = cb(&pkt_info);
+ if (ret)
+ return 0;
+
+ pkt_info.last_packet_type = pkt_info.packet.type;
+ }
+}
+
+struct intel_pt_calc_cyc_to_tsc_info {
+ uint64_t cycle_cnt;
+ unsigned int cbr;
+ uint32_t last_mtc;
+ uint64_t ctc_timestamp;
+ uint64_t ctc_delta;
+ uint64_t tsc_timestamp;
+ uint64_t timestamp;
+ bool have_tma;
+ bool fixup_last_mtc;
+ bool from_mtc;
+ double cbr_cyc_to_tsc;
+};
+
+/*
+ * MTC provides a 8-bit slice of CTC but the TMA packet only provides the lower
+ * 16 bits of CTC. If mtc_shift > 8 then some of the MTC bits are not in the CTC
+ * provided by the TMA packet. Fix-up the last_mtc calculated from the TMA
+ * packet by copying the missing bits from the current MTC assuming the least
+ * difference between the two, and that the current MTC comes after last_mtc.
+ */
+static void intel_pt_fixup_last_mtc(uint32_t mtc, int mtc_shift,
+ uint32_t *last_mtc)
+{
+ uint32_t first_missing_bit = 1U << (16 - mtc_shift);
+ uint32_t mask = ~(first_missing_bit - 1);
+
+ *last_mtc |= mtc & mask;
+ if (*last_mtc >= mtc) {
+ *last_mtc -= first_missing_bit;
+ *last_mtc &= 0xff;
+ }
+}
+
+static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
+{
+ struct intel_pt_decoder *decoder = pkt_info->decoder;
+ struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data;
+ uint64_t timestamp;
+ double cyc_to_tsc;
+ unsigned int cbr;
+ uint32_t mtc, mtc_delta, ctc, fc, ctc_rem;
+
+ switch (pkt_info->packet.type) {
+ case INTEL_PT_TNT:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_FUP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PIP:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PAD:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ return 0;
+
+ case INTEL_PT_MTC:
+ if (!data->have_tma)
+ return 0;
+
+ mtc = pkt_info->packet.payload;
+ if (decoder->mtc_shift > 8 && data->fixup_last_mtc) {
+ data->fixup_last_mtc = false;
+ intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+ &data->last_mtc);
+ }
+ if (mtc > data->last_mtc)
+ mtc_delta = mtc - data->last_mtc;
+ else
+ mtc_delta = mtc + 256 - data->last_mtc;
+ data->ctc_delta += mtc_delta << decoder->mtc_shift;
+ data->last_mtc = mtc;
+
+ if (decoder->tsc_ctc_mult) {
+ timestamp = data->ctc_timestamp +
+ data->ctc_delta * decoder->tsc_ctc_mult;
+ } else {
+ timestamp = data->ctc_timestamp +
+ multdiv(data->ctc_delta,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ if (timestamp < data->timestamp)
+ return 1;
+
+ if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+ data->timestamp = timestamp;
+ return 0;
+ }
+
+ break;
+
+ case INTEL_PT_TSC:
+ /*
+ * For now, do not support using TSC packets - refer
+ * intel_pt_calc_cyc_to_tsc().
+ */
+ if (data->from_mtc)
+ return 1;
+ timestamp = pkt_info->packet.payload |
+ (data->timestamp & (0xffULL << 56));
+ if (data->from_mtc && timestamp < data->timestamp &&
+ data->timestamp - timestamp < decoder->tsc_slip)
+ return 1;
+ if (timestamp < data->timestamp)
+ timestamp += (1ULL << 56);
+ if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+ if (data->from_mtc)
+ return 1;
+ data->tsc_timestamp = timestamp;
+ data->timestamp = timestamp;
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_TMA:
+ if (data->from_mtc)
+ return 1;
+
+ if (!decoder->tsc_ctc_ratio_d)
+ return 0;
+
+ ctc = pkt_info->packet.payload;
+ fc = pkt_info->packet.count;
+ ctc_rem = ctc & decoder->ctc_rem_mask;
+
+ data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+
+ data->ctc_timestamp = data->tsc_timestamp - fc;
+ if (decoder->tsc_ctc_mult) {
+ data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+ } else {
+ data->ctc_timestamp -=
+ multdiv(ctc_rem, decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ data->ctc_delta = 0;
+ data->have_tma = true;
+ data->fixup_last_mtc = true;
+
+ return 0;
+
+ case INTEL_PT_CYC:
+ data->cycle_cnt += pkt_info->packet.payload;
+ return 0;
+
+ case INTEL_PT_CBR:
+ cbr = pkt_info->packet.payload;
+ if (data->cbr && data->cbr != cbr)
+ return 1;
+ data->cbr = cbr;
+ data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+ return 0;
+
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_OVF:
+ case INTEL_PT_BAD: /* Does not happen */
+ default:
+ return 1;
+ }
+
+ if (!data->cbr && decoder->cbr) {
+ data->cbr = decoder->cbr;
+ data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc;
+ }
+
+ if (!data->cycle_cnt)
+ return 1;
+
+ cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt;
+
+ if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc &&
+ cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n",
+ cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+ return 1;
+ }
+
+ decoder->calc_cyc_to_tsc = cyc_to_tsc;
+ decoder->have_calc_cyc_to_tsc = true;
+
+ if (data->cbr) {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n",
+ cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+ } else {
+ intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n",
+ cyc_to_tsc, pkt_info->pos);
+ }
+
+ return 1;
+}
+
+static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
+ bool from_mtc)
+{
+ struct intel_pt_calc_cyc_to_tsc_info data = {
+ .cycle_cnt = 0,
+ .cbr = 0,
+ .last_mtc = decoder->last_mtc,
+ .ctc_timestamp = decoder->ctc_timestamp,
+ .ctc_delta = decoder->ctc_delta,
+ .tsc_timestamp = decoder->tsc_timestamp,
+ .timestamp = decoder->timestamp,
+ .have_tma = decoder->have_tma,
+ .fixup_last_mtc = decoder->fixup_last_mtc,
+ .from_mtc = from_mtc,
+ .cbr_cyc_to_tsc = 0,
+ };
+
+ /*
+ * For now, do not support using TSC packets for at least the reasons:
+ * 1) timing might have stopped
+ * 2) TSC packets within PSB+ can slip against CYC packets
+ */
+ if (!from_mtc)
+ return;
+
+ intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
+}
+
+static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
+{
+ int ret;
+
+ decoder->last_packet_type = decoder->packet.type;
+
+ do {
+ decoder->pos += decoder->pkt_step;
+ decoder->buf += decoder->pkt_step;
+ decoder->len -= decoder->pkt_step;
+
+ if (!decoder->len) {
+ ret = intel_pt_get_next_data(decoder);
+ if (ret)
+ return ret;
+ }
+
+ ret = intel_pt_get_packet(decoder->buf, decoder->len,
+ &decoder->packet);
+ if (ret == INTEL_PT_NEED_MORE_BYTES &&
+ decoder->len < INTEL_PT_PKT_MAX_SZ && !decoder->next_buf) {
+ ret = intel_pt_get_split_packet(decoder);
+ if (ret < 0)
+ return ret;
+ }
+ if (ret <= 0)
+ return intel_pt_bad_packet(decoder);
+
+ decoder->pkt_len = ret;
+ decoder->pkt_step = ret;
+ intel_pt_decoder_log_packet(decoder);
+ } while (decoder->packet.type == INTEL_PT_PAD);
+
+ return 0;
+}
+
+static uint64_t intel_pt_next_period(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp, masked_timestamp;
+
+ timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
+ masked_timestamp = timestamp & decoder->period_mask;
+ if (decoder->continuous_period) {
+ if (masked_timestamp > decoder->last_masked_timestamp)
+ return 1;
+ } else {
+ timestamp += 1;
+ masked_timestamp = timestamp & decoder->period_mask;
+ if (masked_timestamp > decoder->last_masked_timestamp) {
+ decoder->last_masked_timestamp = masked_timestamp;
+ decoder->continuous_period = true;
+ }
+ }
+
+ if (masked_timestamp < decoder->last_masked_timestamp)
+ return decoder->period_ticks;
+
+ return decoder->period_ticks - (timestamp - masked_timestamp);
+}
+
+static uint64_t intel_pt_next_sample(struct intel_pt_decoder *decoder)
+{
+ switch (decoder->period_type) {
+ case INTEL_PT_PERIOD_INSTRUCTIONS:
+ return decoder->period - decoder->period_insn_cnt;
+ case INTEL_PT_PERIOD_TICKS:
+ return intel_pt_next_period(decoder);
+ case INTEL_PT_PERIOD_NONE:
+ case INTEL_PT_PERIOD_MTC:
+ default:
+ return 0;
+ }
+}
+
+static void intel_pt_sample_insn(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp, masked_timestamp;
+
+ switch (decoder->period_type) {
+ case INTEL_PT_PERIOD_INSTRUCTIONS:
+ decoder->period_insn_cnt = 0;
+ break;
+ case INTEL_PT_PERIOD_TICKS:
+ timestamp = decoder->timestamp + decoder->timestamp_insn_cnt;
+ masked_timestamp = timestamp & decoder->period_mask;
+ if (masked_timestamp > decoder->last_masked_timestamp)
+ decoder->last_masked_timestamp = masked_timestamp;
+ else
+ decoder->last_masked_timestamp += decoder->period_ticks;
+ break;
+ case INTEL_PT_PERIOD_NONE:
+ case INTEL_PT_PERIOD_MTC:
+ default:
+ break;
+ }
+
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+}
+
+static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
+ struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+{
+ uint64_t max_insn_cnt, insn_cnt = 0;
+ int err;
+
+ if (!decoder->mtc_insn)
+ decoder->mtc_insn = true;
+
+ max_insn_cnt = intel_pt_next_sample(decoder);
+
+ err = decoder->walk_insn(intel_pt_insn, &insn_cnt, &decoder->ip, ip,
+ max_insn_cnt, decoder->data);
+
+ decoder->tot_insn_cnt += insn_cnt;
+ decoder->timestamp_insn_cnt += insn_cnt;
+ decoder->sample_insn_cnt += insn_cnt;
+ decoder->period_insn_cnt += insn_cnt;
+
+ if (err) {
+ decoder->no_progress = 0;
+ decoder->pkt_state = INTEL_PT_STATE_ERR2;
+ intel_pt_log_at("ERROR: Failed to get instruction",
+ decoder->ip);
+ if (err == -ENOENT)
+ return -ENOLINK;
+ return -EILSEQ;
+ }
+
+ if (ip && decoder->ip == ip) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ if (max_insn_cnt && insn_cnt >= max_insn_cnt)
+ intel_pt_sample_insn(decoder);
+
+ if (intel_pt_insn->branch == INTEL_PT_BR_NO_BRANCH) {
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->ip += intel_pt_insn->length;
+ err = INTEL_PT_RETURN;
+ goto out;
+ }
+
+ if (intel_pt_insn->op == INTEL_PT_OP_CALL) {
+ /* Zero-length calls are excluded */
+ if (intel_pt_insn->branch != INTEL_PT_BR_UNCONDITIONAL ||
+ intel_pt_insn->rel) {
+ err = intel_pt_push(&decoder->stack, decoder->ip +
+ intel_pt_insn->length);
+ if (err)
+ goto out;
+ }
+ } else if (intel_pt_insn->op == INTEL_PT_OP_RET) {
+ decoder->ret_addr = intel_pt_pop(&decoder->stack);
+ }
+
+ if (intel_pt_insn->branch == INTEL_PT_BR_UNCONDITIONAL) {
+ int cnt = decoder->no_progress++;
+
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip += intel_pt_insn->length +
+ intel_pt_insn->rel;
+ decoder->state.to_ip = decoder->ip;
+ err = INTEL_PT_RETURN;
+
+ /*
+ * Check for being stuck in a loop. This can happen if a
+ * decoder error results in the decoder erroneously setting the
+ * ip to an address that is itself in an infinite loop that
+ * consumes no packets. When that happens, there must be an
+ * unconditional branch.
+ */
+ if (cnt) {
+ if (cnt == 1) {
+ decoder->stuck_ip = decoder->state.to_ip;
+ decoder->stuck_ip_prd = 1;
+ decoder->stuck_ip_cnt = 1;
+ } else if (cnt > INTEL_PT_MAX_LOOPS ||
+ decoder->state.to_ip == decoder->stuck_ip) {
+ intel_pt_log_at("ERROR: Never-ending loop",
+ decoder->state.to_ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ err = -ELOOP;
+ goto out;
+ } else if (!--decoder->stuck_ip_cnt) {
+ decoder->stuck_ip_prd += 1;
+ decoder->stuck_ip_cnt = decoder->stuck_ip_prd;
+ decoder->stuck_ip = decoder->state.to_ip;
+ }
+ }
+ goto out_no_progress;
+ }
+out:
+ decoder->no_progress = 0;
+out_no_progress:
+ decoder->state.insn_op = intel_pt_insn->op;
+ decoder->state.insn_len = intel_pt_insn->length;
+ memcpy(decoder->state.insn, intel_pt_insn->buf,
+ INTEL_PT_INSN_BUF_SZ);
+
+ if (decoder->tx_flags & INTEL_PT_IN_TX)
+ decoder->state.flags |= INTEL_PT_IN_TX;
+
+ return err;
+}
+
+static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
+{
+ bool ret = false;
+
+ if (decoder->set_fup_tx_flags) {
+ decoder->set_fup_tx_flags = false;
+ decoder->tx_flags = decoder->fup_tx_flags;
+ decoder->state.type = INTEL_PT_TRANSACTION;
+ if (decoder->fup_tx_flags & INTEL_PT_ABORT_TX)
+ decoder->state.type |= INTEL_PT_BRANCH;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.flags = decoder->fup_tx_flags;
+ return true;
+ }
+ if (decoder->set_fup_ptw) {
+ decoder->set_fup_ptw = false;
+ decoder->state.type = INTEL_PT_PTW;
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.ptw_payload = decoder->fup_ptw_payload;
+ return true;
+ }
+ if (decoder->set_fup_mwait) {
+ decoder->set_fup_mwait = false;
+ decoder->state.type = INTEL_PT_MWAIT_OP;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.mwait_payload = decoder->fup_mwait_payload;
+ ret = true;
+ }
+ if (decoder->set_fup_pwre) {
+ decoder->set_fup_pwre = false;
+ decoder->state.type |= INTEL_PT_PWR_ENTRY;
+ decoder->state.type &= ~INTEL_PT_BRANCH;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.pwre_payload = decoder->fup_pwre_payload;
+ ret = true;
+ }
+ if (decoder->set_fup_exstop) {
+ decoder->set_fup_exstop = false;
+ decoder->state.type |= INTEL_PT_EX_STOP;
+ decoder->state.type &= ~INTEL_PT_BRANCH;
+ decoder->state.flags |= INTEL_PT_FUP_IP;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ ret = true;
+ }
+ return ret;
+}
+
+static inline bool intel_pt_fup_with_nlip(struct intel_pt_decoder *decoder,
+ struct intel_pt_insn *intel_pt_insn,
+ uint64_t ip, int err)
+{
+ return decoder->flags & INTEL_PT_FUP_WITH_NLIP && !err &&
+ intel_pt_insn->branch == INTEL_PT_BR_INDIRECT &&
+ ip == decoder->ip + intel_pt_insn->length;
+}
+
+static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ uint64_t ip;
+ int err;
+
+ ip = decoder->last_ip;
+
+ while (1) {
+ err = intel_pt_walk_insn(decoder, &intel_pt_insn, ip);
+ if (err == INTEL_PT_RETURN)
+ return 0;
+ if (err == -EAGAIN ||
+ intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
+ bool no_tip = decoder->pkt_state != INTEL_PT_STATE_FUP;
+
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ if (intel_pt_fup_event(decoder) && no_tip)
+ return 0;
+ return -EAGAIN;
+ }
+ decoder->set_fup_tx_flags = false;
+ if (err)
+ return err;
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+ intel_pt_log_at("ERROR: Unexpected indirect branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ return -ENOENT;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ intel_pt_log_at("ERROR: Unexpected conditional branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ return -ENOENT;
+ }
+
+ intel_pt_bug(decoder);
+ }
+}
+
+static int intel_pt_walk_tip(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ int err;
+
+ err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+ if (err == INTEL_PT_RETURN &&
+ decoder->pgd_ip &&
+ decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+ (decoder->state.type & INTEL_PT_BRANCH) &&
+ decoder->pgd_ip(decoder->state.to_ip, decoder->data)) {
+ /* Unconditional branch leaving filter region */
+ decoder->no_progress = 0;
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.to_ip = 0;
+ return 0;
+ }
+ if (err == INTEL_PT_RETURN)
+ return 0;
+ if (err)
+ return err;
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+ if (decoder->pkt_state == INTEL_PT_STATE_TIP_PGD) {
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ if (decoder->packet.count != 0)
+ decoder->ip = decoder->last_ip;
+ } else {
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.from_ip = decoder->ip;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ decoder->state.to_ip = decoder->last_ip;
+ decoder->ip = decoder->last_ip;
+ }
+ }
+ return 0;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ uint64_t to_ip = decoder->ip + intel_pt_insn.length +
+ intel_pt_insn.rel;
+
+ if (decoder->pgd_ip &&
+ decoder->pkt_state == INTEL_PT_STATE_TIP_PGD &&
+ decoder->pgd_ip(to_ip, decoder->data)) {
+ /* Conditional branch leaving filter region */
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->ip = to_ip;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+ }
+ intel_pt_log_at("ERROR: Conditional branch when expecting indirect branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ return -ENOENT;
+ }
+
+ return intel_pt_bug(decoder);
+}
+
+static int intel_pt_walk_tnt(struct intel_pt_decoder *decoder)
+{
+ struct intel_pt_insn intel_pt_insn;
+ int err;
+
+ while (1) {
+ err = intel_pt_walk_insn(decoder, &intel_pt_insn, 0);
+ if (err == INTEL_PT_RETURN)
+ return 0;
+ if (err)
+ return err;
+
+ if (intel_pt_insn.op == INTEL_PT_OP_RET) {
+ if (!decoder->return_compression) {
+ intel_pt_log_at("ERROR: RET when expecting conditional branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+ if (!decoder->ret_addr) {
+ intel_pt_log_at("ERROR: Bad RET compression (stack empty)",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+ if (!(decoder->tnt.payload & BIT63)) {
+ intel_pt_log_at("ERROR: Bad RET compression (TNT=N)",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+ }
+ decoder->tnt.count -= 1;
+ if (decoder->tnt.count)
+ decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->tnt.payload <<= 1;
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip = decoder->ret_addr;
+ decoder->state.to_ip = decoder->ip;
+ return 0;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_INDIRECT) {
+ /* Handle deferred TIPs */
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type != INTEL_PT_TIP ||
+ decoder->packet.count == 0) {
+ intel_pt_log_at("ERROR: Missing deferred TIP for indirect branch",
+ decoder->ip);
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ decoder->pkt_step = 0;
+ return -ENOENT;
+ }
+ intel_pt_set_last_ip(decoder);
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = decoder->last_ip;
+ decoder->ip = decoder->last_ip;
+ return 0;
+ }
+
+ if (intel_pt_insn.branch == INTEL_PT_BR_CONDITIONAL) {
+ decoder->tnt.count -= 1;
+ if (decoder->tnt.count)
+ decoder->pkt_state = INTEL_PT_STATE_TNT_CONT;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ if (decoder->tnt.payload & BIT63) {
+ decoder->tnt.payload <<= 1;
+ decoder->state.from_ip = decoder->ip;
+ decoder->ip += intel_pt_insn.length +
+ intel_pt_insn.rel;
+ decoder->state.to_ip = decoder->ip;
+ return 0;
+ }
+ /* Instruction sample for a non-taken branch */
+ if (decoder->state.type & INTEL_PT_INSTRUCTION) {
+ decoder->tnt.payload <<= 1;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->ip += intel_pt_insn.length;
+ return 0;
+ }
+ decoder->ip += intel_pt_insn.length;
+ if (!decoder->tnt.count) {
+ decoder->sample_timestamp = decoder->timestamp;
+ decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ return -EAGAIN;
+ }
+ decoder->tnt.payload <<= 1;
+ continue;
+ }
+
+ return intel_pt_bug(decoder);
+ }
+}
+
+static int intel_pt_mode_tsx(struct intel_pt_decoder *decoder, bool *no_tip)
+{
+ unsigned int fup_tx_flags;
+ int err;
+
+ fup_tx_flags = decoder->packet.payload &
+ (INTEL_PT_IN_TX | INTEL_PT_ABORT_TX);
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->fup_tx_flags = fup_tx_flags;
+ decoder->set_fup_tx_flags = true;
+ if (!(decoder->fup_tx_flags & INTEL_PT_ABORT_TX))
+ *no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after MODE.TSX",
+ decoder->pos);
+ intel_pt_update_in_tx(decoder);
+ }
+ return 0;
+}
+
+static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp;
+
+ decoder->have_tma = false;
+
+ if (decoder->ref_timestamp) {
+ timestamp = decoder->packet.payload |
+ (decoder->ref_timestamp & (0xffULL << 56));
+ if (timestamp < decoder->ref_timestamp) {
+ if (decoder->ref_timestamp - timestamp > (1ULL << 55))
+ timestamp += (1ULL << 56);
+ } else {
+ if (timestamp - decoder->ref_timestamp > (1ULL << 55))
+ timestamp -= (1ULL << 56);
+ }
+ decoder->tsc_timestamp = timestamp;
+ decoder->timestamp = timestamp;
+ decoder->ref_timestamp = 0;
+ decoder->timestamp_insn_cnt = 0;
+ } else if (decoder->timestamp) {
+ timestamp = decoder->packet.payload |
+ (decoder->timestamp & (0xffULL << 56));
+ decoder->tsc_timestamp = timestamp;
+ if (timestamp < decoder->timestamp &&
+ decoder->timestamp - timestamp < decoder->tsc_slip) {
+ intel_pt_log_to("Suppressing backwards timestamp",
+ timestamp);
+ timestamp = decoder->timestamp;
+ }
+ if (timestamp < decoder->timestamp) {
+ intel_pt_log_to("Wraparound timestamp", timestamp);
+ timestamp += (1ULL << 56);
+ decoder->tsc_timestamp = timestamp;
+ }
+ decoder->timestamp = timestamp;
+ decoder->timestamp_insn_cnt = 0;
+ }
+
+ if (decoder->last_packet_type == INTEL_PT_CYC) {
+ decoder->cyc_ref_timestamp = decoder->timestamp;
+ decoder->cycle_cnt = 0;
+ decoder->have_calc_cyc_to_tsc = false;
+ intel_pt_calc_cyc_to_tsc(decoder, false);
+ }
+
+ intel_pt_log_to("Setting timestamp", decoder->timestamp);
+}
+
+static int intel_pt_overflow(struct intel_pt_decoder *decoder)
+{
+ intel_pt_log("ERROR: Buffer overflow\n");
+ intel_pt_clear_tx_flags(decoder);
+ decoder->timestamp_insn_cnt = 0;
+ decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
+ decoder->overflow = true;
+ return -EOVERFLOW;
+}
+
+static void intel_pt_calc_tma(struct intel_pt_decoder *decoder)
+{
+ uint32_t ctc = decoder->packet.payload;
+ uint32_t fc = decoder->packet.count;
+ uint32_t ctc_rem = ctc & decoder->ctc_rem_mask;
+
+ if (!decoder->tsc_ctc_ratio_d)
+ return;
+
+ decoder->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+ decoder->ctc_timestamp = decoder->tsc_timestamp - fc;
+ if (decoder->tsc_ctc_mult) {
+ decoder->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+ } else {
+ decoder->ctc_timestamp -= multdiv(ctc_rem,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+ decoder->ctc_delta = 0;
+ decoder->have_tma = true;
+ decoder->fixup_last_mtc = true;
+ intel_pt_log("CTC timestamp " x64_fmt " last MTC %#x CTC rem %#x\n",
+ decoder->ctc_timestamp, decoder->last_mtc, ctc_rem);
+}
+
+static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp;
+ uint32_t mtc, mtc_delta;
+
+ if (!decoder->have_tma)
+ return;
+
+ mtc = decoder->packet.payload;
+
+ if (decoder->mtc_shift > 8 && decoder->fixup_last_mtc) {
+ decoder->fixup_last_mtc = false;
+ intel_pt_fixup_last_mtc(mtc, decoder->mtc_shift,
+ &decoder->last_mtc);
+ }
+
+ if (mtc > decoder->last_mtc)
+ mtc_delta = mtc - decoder->last_mtc;
+ else
+ mtc_delta = mtc + 256 - decoder->last_mtc;
+
+ decoder->ctc_delta += mtc_delta << decoder->mtc_shift;
+
+ if (decoder->tsc_ctc_mult) {
+ timestamp = decoder->ctc_timestamp +
+ decoder->ctc_delta * decoder->tsc_ctc_mult;
+ } else {
+ timestamp = decoder->ctc_timestamp +
+ multdiv(decoder->ctc_delta,
+ decoder->tsc_ctc_ratio_n,
+ decoder->tsc_ctc_ratio_d);
+ }
+
+ if (timestamp < decoder->timestamp)
+ intel_pt_log("Suppressing MTC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+ timestamp, decoder->timestamp);
+ else
+ decoder->timestamp = timestamp;
+
+ decoder->timestamp_insn_cnt = 0;
+ decoder->last_mtc = mtc;
+
+ if (decoder->last_packet_type == INTEL_PT_CYC) {
+ decoder->cyc_ref_timestamp = decoder->timestamp;
+ decoder->cycle_cnt = 0;
+ decoder->have_calc_cyc_to_tsc = false;
+ intel_pt_calc_cyc_to_tsc(decoder, true);
+ }
+}
+
+static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
+{
+ unsigned int cbr = decoder->packet.payload & 0xff;
+
+ decoder->cbr_payload = decoder->packet.payload;
+
+ if (decoder->cbr == cbr)
+ return;
+
+ decoder->cbr = cbr;
+ decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+}
+
+static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t timestamp = decoder->cyc_ref_timestamp;
+
+ decoder->have_cyc = true;
+
+ decoder->cycle_cnt += decoder->packet.payload;
+
+ if (!decoder->cyc_ref_timestamp)
+ return;
+
+ if (decoder->have_calc_cyc_to_tsc)
+ timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc;
+ else if (decoder->cbr)
+ timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc;
+ else
+ return;
+
+ if (timestamp < decoder->timestamp)
+ intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+ timestamp, decoder->timestamp);
+ else
+ decoder->timestamp = timestamp;
+
+ decoder->timestamp_insn_cnt = 0;
+}
+
+/* Walk PSB+ packets when already in sync. */
+static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_PSBEND:
+ return 0;
+
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_TNT:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_BAD:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ decoder->have_tma = false;
+ intel_pt_log("ERROR: Unexpected packet\n");
+ return -EAGAIN;
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_FUP:
+ decoder->pge = true;
+ if (decoder->packet.count)
+ intel_pt_set_last_ip(decoder);
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ intel_pt_update_in_tx(decoder);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ default:
+ break;
+ }
+ }
+}
+
+static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ if (decoder->tx_flags & INTEL_PT_ABORT_TX) {
+ decoder->tx_flags = 0;
+ decoder->state.flags &= ~INTEL_PT_IN_TX;
+ decoder->state.flags |= INTEL_PT_ABORT_TX;
+ } else {
+ decoder->state.flags |= INTEL_PT_ASYNC;
+ }
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TNT:
+ case INTEL_PT_FUP:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_PSB:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_BAD:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ intel_pt_log("ERROR: Missing TIP after FUP\n");
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ decoder->pkt_step = 0;
+ return -ENOENT;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_TIP_PGD:
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ if (decoder->packet.count != 0) {
+ intel_pt_set_ip(decoder);
+ intel_pt_log("Omitting PGD ip " x64_fmt "\n",
+ decoder->ip);
+ }
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ return 0;
+
+ case INTEL_PT_TIP_PGE:
+ decoder->pge = true;
+ intel_pt_log("Omitting PGE ip " x64_fmt "\n",
+ decoder->ip);
+ decoder->state.from_ip = 0;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ intel_pt_set_ip(decoder);
+ decoder->state.to_ip = decoder->ip;
+ }
+ return 0;
+
+ case INTEL_PT_TIP:
+ decoder->state.from_ip = decoder->ip;
+ if (decoder->packet.count == 0) {
+ decoder->state.to_ip = 0;
+ } else {
+ intel_pt_set_ip(decoder);
+ decoder->state.to_ip = decoder->ip;
+ }
+ return 0;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type == INTEL_PT_PERIOD_MTC)
+ decoder->state.type |= INTEL_PT_INSTRUCTION;
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ break;
+
+ default:
+ return intel_pt_bug(decoder);
+ }
+ }
+}
+
+static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
+{
+ bool no_tip = false;
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+next:
+ switch (decoder->packet.type) {
+ case INTEL_PT_TNT:
+ if (!decoder->packet.count)
+ break;
+ decoder->tnt = decoder->packet;
+ decoder->pkt_state = INTEL_PT_STATE_TNT;
+ err = intel_pt_walk_tnt(decoder);
+ if (err == -EAGAIN)
+ break;
+ return err;
+
+ case INTEL_PT_TIP_PGD:
+ if (decoder->packet.count != 0)
+ intel_pt_set_last_ip(decoder);
+ decoder->pkt_state = INTEL_PT_STATE_TIP_PGD;
+ return intel_pt_walk_tip(decoder);
+
+ case INTEL_PT_TIP_PGE: {
+ decoder->pge = true;
+ if (decoder->packet.count == 0) {
+ intel_pt_log_at("Skipping zero TIP.PGE",
+ decoder->pos);
+ break;
+ }
+ intel_pt_set_ip(decoder);
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ return 0;
+ }
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_TIP:
+ if (decoder->packet.count != 0)
+ intel_pt_set_last_ip(decoder);
+ decoder->pkt_state = INTEL_PT_STATE_TIP;
+ return intel_pt_walk_tip(decoder);
+
+ case INTEL_PT_FUP:
+ if (decoder->packet.count == 0) {
+ intel_pt_log_at("Skipping zero FUP",
+ decoder->pos);
+ no_tip = false;
+ break;
+ }
+ intel_pt_set_last_ip(decoder);
+ if (!decoder->branch_enable) {
+ decoder->ip = decoder->last_ip;
+ if (intel_pt_fup_event(decoder))
+ return 0;
+ no_tip = false;
+ break;
+ }
+ if (decoder->set_fup_mwait)
+ no_tip = true;
+ if (no_tip)
+ decoder->pkt_state = INTEL_PT_STATE_FUP_NO_TIP;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_FUP;
+ err = intel_pt_walk_fup(decoder);
+ if (err != -EAGAIN)
+ return err;
+ if (no_tip) {
+ no_tip = false;
+ break;
+ }
+ return intel_pt_walk_fup_tip(decoder);
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ break;
+
+ case INTEL_PT_PSB:
+ decoder->last_ip = 0;
+ decoder->have_last_ip = true;
+ intel_pt_clear_stack(&decoder->stack);
+ err = intel_pt_walk_psbend(decoder);
+ if (err == -EAGAIN)
+ goto next;
+ if (err)
+ return err;
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ if (decoder->period_type != INTEL_PT_PERIOD_MTC)
+ break;
+ /*
+ * Ensure that there has been an instruction since the
+ * last MTC.
+ */
+ if (!decoder->mtc_insn)
+ break;
+ decoder->mtc_insn = false;
+ /* Ensure that there is a timestamp */
+ if (!decoder->timestamp)
+ break;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->mtc_insn = false;
+ return 0;
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ if (!decoder->branch_enable &&
+ decoder->cbr != decoder->cbr_seen) {
+ decoder->cbr_seen = decoder->cbr;
+ decoder->state.type = INTEL_PT_CBR_CHG;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.cbr_payload =
+ decoder->packet.payload;
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ /* MODE_TSX need not be followed by FUP */
+ if (!decoder->pge) {
+ intel_pt_update_in_tx(decoder);
+ break;
+ }
+ err = intel_pt_mode_tsx(decoder, &no_tip);
+ if (err)
+ return err;
+ goto next;
+
+ case INTEL_PT_BAD: /* Does not happen */
+ return intel_pt_bug(decoder);
+
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ break;
+
+ case INTEL_PT_PTWRITE_IP:
+ decoder->fup_ptw_payload = decoder->packet.payload;
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_ptw = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after PTWRITE",
+ decoder->pos);
+ }
+ goto next;
+
+ case INTEL_PT_PTWRITE:
+ decoder->state.type = INTEL_PT_PTW;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.ptw_payload = decoder->packet.payload;
+ return 0;
+
+ case INTEL_PT_MWAIT:
+ decoder->fup_mwait_payload = decoder->packet.payload;
+ decoder->set_fup_mwait = true;
+ break;
+
+ case INTEL_PT_PWRE:
+ if (decoder->set_fup_mwait) {
+ decoder->fup_pwre_payload =
+ decoder->packet.payload;
+ decoder->set_fup_pwre = true;
+ break;
+ }
+ decoder->state.type = INTEL_PT_PWR_ENTRY;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.pwrx_payload = decoder->packet.payload;
+ return 0;
+
+ case INTEL_PT_EXSTOP_IP:
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+ if (decoder->packet.type == INTEL_PT_FUP) {
+ decoder->set_fup_exstop = true;
+ no_tip = true;
+ } else {
+ intel_pt_log_at("ERROR: Missing FUP after EXSTOP",
+ decoder->pos);
+ }
+ goto next;
+
+ case INTEL_PT_EXSTOP:
+ decoder->state.type = INTEL_PT_EX_STOP;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+
+ case INTEL_PT_PWRX:
+ decoder->state.type = INTEL_PT_PWR_EXIT;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ decoder->state.pwrx_payload = decoder->packet.payload;
+ return 0;
+
+ default:
+ return intel_pt_bug(decoder);
+ }
+ }
+}
+
+static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder)
+{
+ return decoder->packet.count &&
+ (decoder->have_last_ip || decoder->packet.count == 3 ||
+ decoder->packet.count == 6);
+}
+
+/* Walk PSB+ packets to get in sync. */
+static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TIP_PGD:
+ decoder->continuous_period = false;
+ __fallthrough;
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ intel_pt_log("ERROR: Unexpected packet\n");
+ return -ENOENT;
+
+ case INTEL_PT_FUP:
+ decoder->pge = true;
+ if (intel_pt_have_ip(decoder)) {
+ uint64_t current_ip = decoder->ip;
+
+ intel_pt_set_ip(decoder);
+ if (current_ip)
+ intel_pt_log_to("Setting IP",
+ decoder->ip);
+ }
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ intel_pt_update_in_tx(decoder);
+ break;
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ __fallthrough;
+
+ case INTEL_PT_TNT:
+ decoder->have_tma = false;
+ intel_pt_log("ERROR: Unexpected packet\n");
+ if (decoder->ip)
+ decoder->pkt_state = INTEL_PT_STATE_ERR4;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_ERR3;
+ return -ENOENT;
+
+ case INTEL_PT_BAD: /* Does not happen */
+ return intel_pt_bug(decoder);
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_PSBEND:
+ return 0;
+
+ case INTEL_PT_PSB:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ default:
+ break;
+ }
+ }
+}
+
+static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ while (1) {
+ err = intel_pt_get_next_packet(decoder);
+ if (err)
+ return err;
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TIP_PGD:
+ decoder->continuous_period = false;
+ __fallthrough;
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD;
+ if (intel_pt_have_ip(decoder))
+ intel_pt_set_ip(decoder);
+ if (decoder->ip)
+ return 0;
+ break;
+
+ case INTEL_PT_FUP:
+ if (intel_pt_have_ip(decoder))
+ intel_pt_set_ip(decoder);
+ if (decoder->ip)
+ return 0;
+ break;
+
+ case INTEL_PT_MTC:
+ intel_pt_calc_mtc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TSC:
+ intel_pt_calc_tsc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_TMA:
+ intel_pt_calc_tma(decoder);
+ break;
+
+ case INTEL_PT_CYC:
+ intel_pt_calc_cyc_timestamp(decoder);
+ break;
+
+ case INTEL_PT_CBR:
+ intel_pt_calc_cbr(decoder);
+ break;
+
+ case INTEL_PT_PIP:
+ decoder->cr3 = decoder->packet.payload & (BIT63 - 1);
+ break;
+
+ case INTEL_PT_MODE_EXEC:
+ decoder->exec_mode = decoder->packet.payload;
+ break;
+
+ case INTEL_PT_MODE_TSX:
+ intel_pt_update_in_tx(decoder);
+ break;
+
+ case INTEL_PT_OVF:
+ return intel_pt_overflow(decoder);
+
+ case INTEL_PT_BAD: /* Does not happen */
+ return intel_pt_bug(decoder);
+
+ case INTEL_PT_TRACESTOP:
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ intel_pt_clear_tx_flags(decoder);
+ decoder->have_tma = false;
+ break;
+
+ case INTEL_PT_PSB:
+ decoder->last_ip = 0;
+ decoder->have_last_ip = true;
+ intel_pt_clear_stack(&decoder->stack);
+ err = intel_pt_walk_psb(decoder);
+ if (err)
+ return err;
+ if (decoder->ip) {
+ /* Do not have a sample */
+ decoder->state.type = 0;
+ return 0;
+ }
+ break;
+
+ case INTEL_PT_TNT:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PAD:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ default:
+ break;
+ }
+ }
+}
+
+static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ decoder->set_fup_tx_flags = false;
+ decoder->set_fup_ptw = false;
+ decoder->set_fup_mwait = false;
+ decoder->set_fup_pwre = false;
+ decoder->set_fup_exstop = false;
+
+ if (!decoder->branch_enable) {
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->overflow = false;
+ decoder->state.type = 0; /* Do not have a sample */
+ return 0;
+ }
+
+ intel_pt_log("Scanning for full IP\n");
+ err = intel_pt_walk_to_ip(decoder);
+ if (err)
+ return err;
+
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->overflow = false;
+
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ intel_pt_log_to("Setting IP", decoder->ip);
+
+ return 0;
+}
+
+static int intel_pt_part_psb(struct intel_pt_decoder *decoder)
+{
+ const unsigned char *end = decoder->buf + decoder->len;
+ size_t i;
+
+ for (i = INTEL_PT_PSB_LEN - 1; i; i--) {
+ if (i > decoder->len)
+ continue;
+ if (!memcmp(end - i, INTEL_PT_PSB_STR, i))
+ return i;
+ }
+ return 0;
+}
+
+static int intel_pt_rest_psb(struct intel_pt_decoder *decoder, int part_psb)
+{
+ size_t rest_psb = INTEL_PT_PSB_LEN - part_psb;
+ const char *psb = INTEL_PT_PSB_STR;
+
+ if (rest_psb > decoder->len ||
+ memcmp(decoder->buf, psb + part_psb, rest_psb))
+ return 0;
+
+ return rest_psb;
+}
+
+static int intel_pt_get_split_psb(struct intel_pt_decoder *decoder,
+ int part_psb)
+{
+ int rest_psb, ret;
+
+ decoder->pos += decoder->len;
+ decoder->len = 0;
+
+ ret = intel_pt_get_next_data(decoder);
+ if (ret)
+ return ret;
+
+ rest_psb = intel_pt_rest_psb(decoder, part_psb);
+ if (!rest_psb)
+ return 0;
+
+ decoder->pos -= part_psb;
+ decoder->next_buf = decoder->buf + rest_psb;
+ decoder->next_len = decoder->len - rest_psb;
+ memcpy(decoder->temp_buf, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+ decoder->buf = decoder->temp_buf;
+ decoder->len = INTEL_PT_PSB_LEN;
+
+ return 0;
+}
+
+static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder)
+{
+ unsigned char *next;
+ int ret;
+
+ intel_pt_log("Scanning for PSB\n");
+ while (1) {
+ if (!decoder->len) {
+ ret = intel_pt_get_next_data(decoder);
+ if (ret)
+ return ret;
+ }
+
+ next = memmem(decoder->buf, decoder->len, INTEL_PT_PSB_STR,
+ INTEL_PT_PSB_LEN);
+ if (!next) {
+ int part_psb;
+
+ part_psb = intel_pt_part_psb(decoder);
+ if (part_psb) {
+ ret = intel_pt_get_split_psb(decoder, part_psb);
+ if (ret)
+ return ret;
+ } else {
+ decoder->pos += decoder->len;
+ decoder->len = 0;
+ }
+ continue;
+ }
+
+ decoder->pkt_step = next - decoder->buf;
+ return intel_pt_get_next_packet(decoder);
+ }
+}
+
+static int intel_pt_sync(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ decoder->pge = false;
+ decoder->continuous_period = false;
+ decoder->have_last_ip = false;
+ decoder->last_ip = 0;
+ decoder->ip = 0;
+ intel_pt_clear_stack(&decoder->stack);
+
+ err = intel_pt_scan_for_psb(decoder);
+ if (err)
+ return err;
+
+ decoder->have_last_ip = true;
+ decoder->pkt_state = INTEL_PT_STATE_NO_IP;
+
+ err = intel_pt_walk_psb(decoder);
+ if (err)
+ return err;
+
+ if (decoder->ip) {
+ decoder->state.type = 0; /* Do not have a sample */
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ } else {
+ return intel_pt_sync_ip(decoder);
+ }
+
+ return 0;
+}
+
+static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder)
+{
+ uint64_t est = decoder->sample_insn_cnt << 1;
+
+ if (!decoder->cbr || !decoder->max_non_turbo_ratio)
+ goto out;
+
+ est *= decoder->max_non_turbo_ratio;
+ est /= decoder->cbr;
+out:
+ return decoder->sample_timestamp + est;
+}
+
+const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
+{
+ int err;
+
+ do {
+ decoder->state.type = INTEL_PT_BRANCH;
+ decoder->state.flags = 0;
+
+ switch (decoder->pkt_state) {
+ case INTEL_PT_STATE_NO_PSB:
+ err = intel_pt_sync(decoder);
+ break;
+ case INTEL_PT_STATE_NO_IP:
+ decoder->have_last_ip = false;
+ decoder->last_ip = 0;
+ decoder->ip = 0;
+ __fallthrough;
+ case INTEL_PT_STATE_ERR_RESYNC:
+ err = intel_pt_sync_ip(decoder);
+ break;
+ case INTEL_PT_STATE_IN_SYNC:
+ err = intel_pt_walk_trace(decoder);
+ break;
+ case INTEL_PT_STATE_TNT:
+ case INTEL_PT_STATE_TNT_CONT:
+ err = intel_pt_walk_tnt(decoder);
+ if (err == -EAGAIN)
+ err = intel_pt_walk_trace(decoder);
+ break;
+ case INTEL_PT_STATE_TIP:
+ case INTEL_PT_STATE_TIP_PGD:
+ err = intel_pt_walk_tip(decoder);
+ break;
+ case INTEL_PT_STATE_FUP:
+ err = intel_pt_walk_fup(decoder);
+ if (err == -EAGAIN)
+ err = intel_pt_walk_fup_tip(decoder);
+ break;
+ case INTEL_PT_STATE_FUP_NO_TIP:
+ err = intel_pt_walk_fup(decoder);
+ if (err == -EAGAIN)
+ err = intel_pt_walk_trace(decoder);
+ break;
+ default:
+ err = intel_pt_bug(decoder);
+ break;
+ }
+ } while (err == -ENOLINK);
+
+ if (err) {
+ decoder->state.err = intel_pt_ext_err(err);
+ decoder->state.from_ip = decoder->ip;
+ decoder->sample_timestamp = decoder->timestamp;
+ decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ } else {
+ decoder->state.err = 0;
+ if (decoder->cbr != decoder->cbr_seen && decoder->state.type) {
+ decoder->cbr_seen = decoder->cbr;
+ decoder->state.type |= INTEL_PT_CBR_CHG;
+ decoder->state.cbr_payload = decoder->cbr_payload;
+ }
+ if (intel_pt_sample_time(decoder->pkt_state)) {
+ decoder->sample_timestamp = decoder->timestamp;
+ decoder->sample_insn_cnt = decoder->timestamp_insn_cnt;
+ }
+ }
+
+ decoder->state.timestamp = decoder->sample_timestamp;
+ decoder->state.est_timestamp = intel_pt_est_timestamp(decoder);
+ decoder->state.cr3 = decoder->cr3;
+ decoder->state.tot_insn_cnt = decoder->tot_insn_cnt;
+
+ return &decoder->state;
+}
+
+/**
+ * intel_pt_next_psb - move buffer pointer to the start of the next PSB packet.
+ * @buf: pointer to buffer pointer
+ * @len: size of buffer
+ *
+ * Updates the buffer pointer to point to the start of the next PSB packet if
+ * there is one, otherwise the buffer pointer is unchanged. If @buf is updated,
+ * @len is adjusted accordingly.
+ *
+ * Return: %true if a PSB packet is found, %false otherwise.
+ */
+static bool intel_pt_next_psb(unsigned char **buf, size_t *len)
+{
+ unsigned char *next;
+
+ next = memmem(*buf, *len, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+ if (next) {
+ *len -= next - *buf;
+ *buf = next;
+ return true;
+ }
+ return false;
+}
+
+/**
+ * intel_pt_step_psb - move buffer pointer to the start of the following PSB
+ * packet.
+ * @buf: pointer to buffer pointer
+ * @len: size of buffer
+ *
+ * Updates the buffer pointer to point to the start of the following PSB packet
+ * (skipping the PSB at @buf itself) if there is one, otherwise the buffer
+ * pointer is unchanged. If @buf is updated, @len is adjusted accordingly.
+ *
+ * Return: %true if a PSB packet is found, %false otherwise.
+ */
+static bool intel_pt_step_psb(unsigned char **buf, size_t *len)
+{
+ unsigned char *next;
+
+ if (!*len)
+ return false;
+
+ next = memmem(*buf + 1, *len - 1, INTEL_PT_PSB_STR, INTEL_PT_PSB_LEN);
+ if (next) {
+ *len -= next - *buf;
+ *buf = next;
+ return true;
+ }
+ return false;
+}
+
+/**
+ * intel_pt_last_psb - find the last PSB packet in a buffer.
+ * @buf: buffer
+ * @len: size of buffer
+ *
+ * This function finds the last PSB in a buffer.
+ *
+ * Return: A pointer to the last PSB in @buf if found, %NULL otherwise.
+ */
+static unsigned char *intel_pt_last_psb(unsigned char *buf, size_t len)
+{
+ const char *n = INTEL_PT_PSB_STR;
+ unsigned char *p;
+ size_t k;
+
+ if (len < INTEL_PT_PSB_LEN)
+ return NULL;
+
+ k = len - INTEL_PT_PSB_LEN + 1;
+ while (1) {
+ p = memrchr(buf, n[0], k);
+ if (!p)
+ return NULL;
+ if (!memcmp(p + 1, n + 1, INTEL_PT_PSB_LEN - 1))
+ return p;
+ k = p - buf;
+ if (!k)
+ return NULL;
+ }
+}
+
+/**
+ * intel_pt_next_tsc - find and return next TSC.
+ * @buf: buffer
+ * @len: size of buffer
+ * @tsc: TSC value returned
+ * @rem: returns remaining size when TSC is found
+ *
+ * Find a TSC packet in @buf and return the TSC value. This function assumes
+ * that @buf starts at a PSB and that PSB+ will contain TSC and so stops if a
+ * PSBEND packet is found.
+ *
+ * Return: %true if TSC is found, false otherwise.
+ */
+static bool intel_pt_next_tsc(unsigned char *buf, size_t len, uint64_t *tsc,
+ size_t *rem)
+{
+ struct intel_pt_pkt packet;
+ int ret;
+
+ while (len) {
+ ret = intel_pt_get_packet(buf, len, &packet);
+ if (ret <= 0)
+ return false;
+ if (packet.type == INTEL_PT_TSC) {
+ *tsc = packet.payload;
+ *rem = len;
+ return true;
+ }
+ if (packet.type == INTEL_PT_PSBEND)
+ return false;
+ buf += ret;
+ len -= ret;
+ }
+ return false;
+}
+
+/**
+ * intel_pt_tsc_cmp - compare 7-byte TSCs.
+ * @tsc1: first TSC to compare
+ * @tsc2: second TSC to compare
+ *
+ * This function compares 7-byte TSC values allowing for the possibility that
+ * TSC wrapped around. Generally it is not possible to know if TSC has wrapped
+ * around so for that purpose this function assumes the absolute difference is
+ * less than half the maximum difference.
+ *
+ * Return: %-1 if @tsc1 is before @tsc2, %0 if @tsc1 == @tsc2, %1 if @tsc1 is
+ * after @tsc2.
+ */
+static int intel_pt_tsc_cmp(uint64_t tsc1, uint64_t tsc2)
+{
+ const uint64_t halfway = (1ULL << 55);
+
+ if (tsc1 == tsc2)
+ return 0;
+
+ if (tsc1 < tsc2) {
+ if (tsc2 - tsc1 < halfway)
+ return -1;
+ else
+ return 1;
+ } else {
+ if (tsc1 - tsc2 < halfway)
+ return 1;
+ else
+ return -1;
+ }
+}
+
+#define MAX_PADDING (PERF_AUXTRACE_RECORD_ALIGNMENT - 1)
+
+/**
+ * adj_for_padding - adjust overlap to account for padding.
+ * @buf_b: second buffer
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ *
+ * @buf_a might have up to 7 bytes of padding appended. Adjust the overlap
+ * accordingly.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts
+ */
+static unsigned char *adj_for_padding(unsigned char *buf_b,
+ unsigned char *buf_a, size_t len_a)
+{
+ unsigned char *p = buf_b - MAX_PADDING;
+ unsigned char *q = buf_a + len_a - MAX_PADDING;
+ int i;
+
+ for (i = MAX_PADDING; i; i--, p++, q++) {
+ if (*p != *q)
+ break;
+ }
+
+ return p;
+}
+
+/**
+ * intel_pt_find_overlap_tsc - determine start of non-overlapped trace data
+ * using TSC.
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ * @buf_b: second buffer
+ * @len_b: size of second buffer
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ * to buf_a
+ *
+ * If the trace contains TSC we can look at the last TSC of @buf_a and the
+ * first TSC of @buf_b in order to determine if the buffers overlap, and then
+ * walk forward in @buf_b until a later TSC is found. A precondition is that
+ * @buf_a and @buf_b are positioned at a PSB.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts, or
+ * @buf_b + @len_b if there is no non-overlapped data.
+ */
+static unsigned char *intel_pt_find_overlap_tsc(unsigned char *buf_a,
+ size_t len_a,
+ unsigned char *buf_b,
+ size_t len_b, bool *consecutive)
+{
+ uint64_t tsc_a, tsc_b;
+ unsigned char *p;
+ size_t len, rem_a, rem_b;
+
+ p = intel_pt_last_psb(buf_a, len_a);
+ if (!p)
+ return buf_b; /* No PSB in buf_a => no overlap */
+
+ len = len_a - (p - buf_a);
+ if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a)) {
+ /* The last PSB+ in buf_a is incomplete, so go back one more */
+ len_a -= len;
+ p = intel_pt_last_psb(buf_a, len_a);
+ if (!p)
+ return buf_b; /* No full PSB+ => assume no overlap */
+ len = len_a - (p - buf_a);
+ if (!intel_pt_next_tsc(p, len, &tsc_a, &rem_a))
+ return buf_b; /* No TSC in buf_a => assume no overlap */
+ }
+
+ while (1) {
+ /* Ignore PSB+ with no TSC */
+ if (intel_pt_next_tsc(buf_b, len_b, &tsc_b, &rem_b)) {
+ int cmp = intel_pt_tsc_cmp(tsc_a, tsc_b);
+
+ /* Same TSC, so buffers are consecutive */
+ if (!cmp && rem_b >= rem_a) {
+ unsigned char *start;
+
+ *consecutive = true;
+ start = buf_b + len_b - (rem_b - rem_a);
+ return adj_for_padding(start, buf_a, len_a);
+ }
+ if (cmp < 0)
+ return buf_b; /* tsc_a < tsc_b => no overlap */
+ }
+
+ if (!intel_pt_step_psb(&buf_b, &len_b))
+ return buf_b + len_b; /* No PSB in buf_b => no data */
+ }
+}
+
+/**
+ * intel_pt_find_overlap - determine start of non-overlapped trace data.
+ * @buf_a: first buffer
+ * @len_a: size of first buffer
+ * @buf_b: second buffer
+ * @len_b: size of second buffer
+ * @have_tsc: can use TSC packets to detect overlap
+ * @consecutive: returns true if there is data in buf_b that is consecutive
+ * to buf_a
+ *
+ * When trace samples or snapshots are recorded there is the possibility that
+ * the data overlaps. Note that, for the purposes of decoding, data is only
+ * useful if it begins with a PSB packet.
+ *
+ * Return: A pointer into @buf_b from where non-overlapped data starts, or
+ * @buf_b + @len_b if there is no non-overlapped data.
+ */
+unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
+ unsigned char *buf_b, size_t len_b,
+ bool have_tsc, bool *consecutive)
+{
+ unsigned char *found;
+
+ /* Buffer 'b' must start at PSB so throw away everything before that */
+ if (!intel_pt_next_psb(&buf_b, &len_b))
+ return buf_b + len_b; /* No PSB */
+
+ if (!intel_pt_next_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+
+ if (have_tsc) {
+ found = intel_pt_find_overlap_tsc(buf_a, len_a, buf_b, len_b,
+ consecutive);
+ if (found)
+ return found;
+ }
+
+ /*
+ * Buffer 'b' cannot end within buffer 'a' so, for comparison purposes,
+ * we can ignore the first part of buffer 'a'.
+ */
+ while (len_b < len_a) {
+ if (!intel_pt_step_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+ }
+
+ /* Now len_b >= len_a */
+ while (1) {
+ /* Potential overlap so check the bytes */
+ found = memmem(buf_a, len_a, buf_b, len_a);
+ if (found) {
+ *consecutive = true;
+ return adj_for_padding(buf_b + len_a, buf_a, len_a);
+ }
+
+ /* Try again at next PSB in buffer 'a' */
+ if (!intel_pt_step_psb(&buf_a, &len_a))
+ return buf_b; /* No overlap */
+ }
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
new file mode 100644
index 000000000..51c18d67f
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -0,0 +1,133 @@
+/*
+ * intel_pt_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_DECODER_H__
+#define INCLUDE__INTEL_PT_DECODER_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include "intel-pt-insn-decoder.h"
+
+#define INTEL_PT_IN_TX (1 << 0)
+#define INTEL_PT_ABORT_TX (1 << 1)
+#define INTEL_PT_ASYNC (1 << 2)
+#define INTEL_PT_FUP_IP (1 << 3)
+
+enum intel_pt_sample_type {
+ INTEL_PT_BRANCH = 1 << 0,
+ INTEL_PT_INSTRUCTION = 1 << 1,
+ INTEL_PT_TRANSACTION = 1 << 2,
+ INTEL_PT_PTW = 1 << 3,
+ INTEL_PT_MWAIT_OP = 1 << 4,
+ INTEL_PT_PWR_ENTRY = 1 << 5,
+ INTEL_PT_EX_STOP = 1 << 6,
+ INTEL_PT_PWR_EXIT = 1 << 7,
+ INTEL_PT_CBR_CHG = 1 << 8,
+};
+
+enum intel_pt_period_type {
+ INTEL_PT_PERIOD_NONE,
+ INTEL_PT_PERIOD_INSTRUCTIONS,
+ INTEL_PT_PERIOD_TICKS,
+ INTEL_PT_PERIOD_MTC,
+};
+
+enum {
+ INTEL_PT_ERR_NOMEM = 1,
+ INTEL_PT_ERR_INTERN,
+ INTEL_PT_ERR_BADPKT,
+ INTEL_PT_ERR_NODATA,
+ INTEL_PT_ERR_NOINSN,
+ INTEL_PT_ERR_MISMAT,
+ INTEL_PT_ERR_OVR,
+ INTEL_PT_ERR_LOST,
+ INTEL_PT_ERR_UNK,
+ INTEL_PT_ERR_NELOOP,
+ INTEL_PT_ERR_MAX,
+};
+
+enum intel_pt_param_flags {
+ /*
+ * FUP packet can contain next linear instruction pointer instead of
+ * current linear instruction pointer.
+ */
+ INTEL_PT_FUP_WITH_NLIP = 1 << 0,
+};
+
+struct intel_pt_state {
+ enum intel_pt_sample_type type;
+ int err;
+ uint64_t from_ip;
+ uint64_t to_ip;
+ uint64_t cr3;
+ uint64_t tot_insn_cnt;
+ uint64_t timestamp;
+ uint64_t est_timestamp;
+ uint64_t trace_nr;
+ uint64_t ptw_payload;
+ uint64_t mwait_payload;
+ uint64_t pwre_payload;
+ uint64_t pwrx_payload;
+ uint64_t cbr_payload;
+ uint32_t flags;
+ enum intel_pt_insn_op insn_op;
+ int insn_len;
+ char insn[INTEL_PT_INSN_BUF_SZ];
+};
+
+struct intel_pt_insn;
+
+struct intel_pt_buffer {
+ const unsigned char *buf;
+ size_t len;
+ bool consecutive;
+ uint64_t ref_timestamp;
+ uint64_t trace_nr;
+};
+
+struct intel_pt_params {
+ int (*get_trace)(struct intel_pt_buffer *buffer, void *data);
+ int (*walk_insn)(struct intel_pt_insn *intel_pt_insn,
+ uint64_t *insn_cnt_ptr, uint64_t *ip, uint64_t to_ip,
+ uint64_t max_insn_cnt, void *data);
+ bool (*pgd_ip)(uint64_t ip, void *data);
+ void *data;
+ bool return_compression;
+ bool branch_enable;
+ uint64_t period;
+ enum intel_pt_period_type period_type;
+ unsigned max_non_turbo_ratio;
+ unsigned int mtc_period;
+ uint32_t tsc_ctc_ratio_n;
+ uint32_t tsc_ctc_ratio_d;
+ enum intel_pt_param_flags flags;
+};
+
+struct intel_pt_decoder;
+
+struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params);
+void intel_pt_decoder_free(struct intel_pt_decoder *decoder);
+
+const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder);
+
+unsigned char *intel_pt_find_overlap(unsigned char *buf_a, size_t len_a,
+ unsigned char *buf_b, size_t len_b,
+ bool have_tsc, bool *consecutive);
+
+int intel_pt__strerror(int code, char *buf, size_t buflen);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
new file mode 100644
index 000000000..548188280
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -0,0 +1,274 @@
+/*
+ * intel_pt_insn_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+
+#include "event.h"
+
+#include "insn.h"
+
+#include "inat.c"
+#include "insn.c"
+
+#include "intel-pt-insn-decoder.h"
+#include "dump-insn.h"
+
+#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN
+#error Instruction buffer size too small
+#endif
+
+/* Based on branch_type() from arch/x86/events/intel/lbr.c */
+static void intel_pt_insn_decoder(struct insn *insn,
+ struct intel_pt_insn *intel_pt_insn)
+{
+ enum intel_pt_insn_op op = INTEL_PT_OP_OTHER;
+ enum intel_pt_insn_branch branch = INTEL_PT_BR_NO_BRANCH;
+ int ext;
+
+ intel_pt_insn->rel = 0;
+
+ if (insn_is_avx(insn)) {
+ intel_pt_insn->op = INTEL_PT_OP_OTHER;
+ intel_pt_insn->branch = INTEL_PT_BR_NO_BRANCH;
+ intel_pt_insn->length = insn->length;
+ return;
+ }
+
+ switch (insn->opcode.bytes[0]) {
+ case 0xf:
+ switch (insn->opcode.bytes[1]) {
+ case 0x05: /* syscall */
+ case 0x34: /* sysenter */
+ op = INTEL_PT_OP_SYSCALL;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0x07: /* sysret */
+ case 0x35: /* sysexit */
+ op = INTEL_PT_OP_SYSRET;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0x80 ... 0x8f: /* jcc */
+ op = INTEL_PT_OP_JCC;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ default:
+ break;
+ }
+ break;
+ case 0x70 ... 0x7f: /* jcc */
+ op = INTEL_PT_OP_JCC;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ case 0xc2: /* near ret */
+ case 0xc3: /* near ret */
+ case 0xca: /* far ret */
+ case 0xcb: /* far ret */
+ op = INTEL_PT_OP_RET;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xcf: /* iret */
+ op = INTEL_PT_OP_IRET;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xcc ... 0xce: /* int */
+ op = INTEL_PT_OP_INT;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xe8: /* call near rel */
+ op = INTEL_PT_OP_CALL;
+ branch = INTEL_PT_BR_UNCONDITIONAL;
+ break;
+ case 0x9a: /* call far absolute */
+ op = INTEL_PT_OP_CALL;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xe0 ... 0xe2: /* loop */
+ op = INTEL_PT_OP_LOOP;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ case 0xe3: /* jcc */
+ op = INTEL_PT_OP_JCC;
+ branch = INTEL_PT_BR_CONDITIONAL;
+ break;
+ case 0xe9: /* jmp */
+ case 0xeb: /* jmp */
+ op = INTEL_PT_OP_JMP;
+ branch = INTEL_PT_BR_UNCONDITIONAL;
+ break;
+ case 0xea: /* far jmp */
+ op = INTEL_PT_OP_JMP;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 0xff: /* call near absolute, call far absolute ind */
+ ext = (insn->modrm.bytes[0] >> 3) & 0x7;
+ switch (ext) {
+ case 2: /* near ind call */
+ case 3: /* far ind call */
+ op = INTEL_PT_OP_CALL;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ case 4:
+ case 5:
+ op = INTEL_PT_OP_JMP;
+ branch = INTEL_PT_BR_INDIRECT;
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ intel_pt_insn->op = op;
+ intel_pt_insn->branch = branch;
+ intel_pt_insn->length = insn->length;
+
+ if (branch == INTEL_PT_BR_CONDITIONAL ||
+ branch == INTEL_PT_BR_UNCONDITIONAL) {
+#if __BYTE_ORDER == __BIG_ENDIAN
+ switch (insn->immediate.nbytes) {
+ case 1:
+ intel_pt_insn->rel = insn->immediate.value;
+ break;
+ case 2:
+ intel_pt_insn->rel =
+ bswap_16((short)insn->immediate.value);
+ break;
+ case 4:
+ intel_pt_insn->rel = bswap_32(insn->immediate.value);
+ break;
+ default:
+ intel_pt_insn->rel = 0;
+ break;
+ }
+#else
+ intel_pt_insn->rel = insn->immediate.value;
+#endif
+ }
+}
+
+int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
+ struct intel_pt_insn *intel_pt_insn)
+{
+ struct insn insn;
+
+ insn_init(&insn, buf, len, x86_64);
+ insn_get_length(&insn);
+ if (!insn_complete(&insn) || insn.length > len)
+ return -1;
+ intel_pt_insn_decoder(&insn, intel_pt_insn);
+ if (insn.length < INTEL_PT_INSN_BUF_SZ)
+ memcpy(intel_pt_insn->buf, buf, insn.length);
+ else
+ memcpy(intel_pt_insn->buf, buf, INTEL_PT_INSN_BUF_SZ);
+ return 0;
+}
+
+const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
+ u8 *inbuf, int inlen, int *lenp)
+{
+ struct insn insn;
+ int n, i;
+ int left;
+
+ insn_init(&insn, inbuf, inlen, x->is64bit);
+ insn_get_length(&insn);
+ if (!insn_complete(&insn) || insn.length > inlen)
+ return "<bad>";
+ if (lenp)
+ *lenp = insn.length;
+ left = sizeof(x->out);
+ n = snprintf(x->out, left, "insn: ");
+ left -= n;
+ for (i = 0; i < insn.length; i++) {
+ n += snprintf(x->out + n, left, "%02x ", inbuf[i]);
+ left -= n;
+ }
+ return x->out;
+}
+
+const char *branch_name[] = {
+ [INTEL_PT_OP_OTHER] = "Other",
+ [INTEL_PT_OP_CALL] = "Call",
+ [INTEL_PT_OP_RET] = "Ret",
+ [INTEL_PT_OP_JCC] = "Jcc",
+ [INTEL_PT_OP_JMP] = "Jmp",
+ [INTEL_PT_OP_LOOP] = "Loop",
+ [INTEL_PT_OP_IRET] = "IRet",
+ [INTEL_PT_OP_INT] = "Int",
+ [INTEL_PT_OP_SYSCALL] = "Syscall",
+ [INTEL_PT_OP_SYSRET] = "Sysret",
+};
+
+const char *intel_pt_insn_name(enum intel_pt_insn_op op)
+{
+ return branch_name[op];
+}
+
+int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
+ size_t buf_len)
+{
+ switch (intel_pt_insn->branch) {
+ case INTEL_PT_BR_CONDITIONAL:
+ case INTEL_PT_BR_UNCONDITIONAL:
+ return snprintf(buf, buf_len, "%s %s%d",
+ intel_pt_insn_name(intel_pt_insn->op),
+ intel_pt_insn->rel > 0 ? "+" : "",
+ intel_pt_insn->rel);
+ case INTEL_PT_BR_NO_BRANCH:
+ case INTEL_PT_BR_INDIRECT:
+ return snprintf(buf, buf_len, "%s",
+ intel_pt_insn_name(intel_pt_insn->op));
+ default:
+ break;
+ }
+ return 0;
+}
+
+int intel_pt_insn_type(enum intel_pt_insn_op op)
+{
+ switch (op) {
+ case INTEL_PT_OP_OTHER:
+ return 0;
+ case INTEL_PT_OP_CALL:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL;
+ case INTEL_PT_OP_RET:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN;
+ case INTEL_PT_OP_JCC:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
+ case INTEL_PT_OP_JMP:
+ return PERF_IP_FLAG_BRANCH;
+ case INTEL_PT_OP_LOOP:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CONDITIONAL;
+ case INTEL_PT_OP_IRET:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_INTERRUPT;
+ case INTEL_PT_OP_INT:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_INTERRUPT;
+ case INTEL_PT_OP_SYSCALL:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_SYSCALLRET;
+ case INTEL_PT_OP_SYSRET:
+ return PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_SYSCALLRET;
+ default:
+ return 0;
+ }
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
new file mode 100644
index 000000000..37ec5627a
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.h
@@ -0,0 +1,63 @@
+/*
+ * intel_pt_insn_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_INSN_DECODER_H__
+#define INCLUDE__INTEL_PT_INSN_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define INTEL_PT_INSN_DESC_MAX 32
+#define INTEL_PT_INSN_BUF_SZ 16
+
+enum intel_pt_insn_op {
+ INTEL_PT_OP_OTHER,
+ INTEL_PT_OP_CALL,
+ INTEL_PT_OP_RET,
+ INTEL_PT_OP_JCC,
+ INTEL_PT_OP_JMP,
+ INTEL_PT_OP_LOOP,
+ INTEL_PT_OP_IRET,
+ INTEL_PT_OP_INT,
+ INTEL_PT_OP_SYSCALL,
+ INTEL_PT_OP_SYSRET,
+};
+
+enum intel_pt_insn_branch {
+ INTEL_PT_BR_NO_BRANCH,
+ INTEL_PT_BR_INDIRECT,
+ INTEL_PT_BR_CONDITIONAL,
+ INTEL_PT_BR_UNCONDITIONAL,
+};
+
+struct intel_pt_insn {
+ enum intel_pt_insn_op op;
+ enum intel_pt_insn_branch branch;
+ int length;
+ int32_t rel;
+ unsigned char buf[INTEL_PT_INSN_BUF_SZ];
+};
+
+int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
+ struct intel_pt_insn *intel_pt_insn);
+
+const char *intel_pt_insn_name(enum intel_pt_insn_op op);
+
+int intel_pt_insn_desc(const struct intel_pt_insn *intel_pt_insn, char *buf,
+ size_t buf_len);
+
+int intel_pt_insn_type(enum intel_pt_insn_op op);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.c b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
new file mode 100644
index 000000000..e02bc7b16
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.c
@@ -0,0 +1,156 @@
+/*
+ * intel_pt_log.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "intel-pt-log.h"
+#include "intel-pt-insn-decoder.h"
+
+#include "intel-pt-pkt-decoder.h"
+
+#define MAX_LOG_NAME 256
+
+static FILE *f;
+static char log_name[MAX_LOG_NAME];
+bool intel_pt_enable_logging;
+
+void intel_pt_log_enable(void)
+{
+ intel_pt_enable_logging = true;
+}
+
+void intel_pt_log_disable(void)
+{
+ if (f)
+ fflush(f);
+ intel_pt_enable_logging = false;
+}
+
+void intel_pt_log_set_name(const char *name)
+{
+ strncpy(log_name, name, MAX_LOG_NAME - 5);
+ strcat(log_name, ".log");
+}
+
+static void intel_pt_print_data(const unsigned char *buf, int len, uint64_t pos,
+ int indent)
+{
+ int i;
+
+ for (i = 0; i < indent; i++)
+ fprintf(f, " ");
+
+ fprintf(f, " %08" PRIx64 ": ", pos);
+ for (i = 0; i < len; i++)
+ fprintf(f, " %02x", buf[i]);
+ for (; i < 16; i++)
+ fprintf(f, " ");
+ fprintf(f, " ");
+}
+
+static void intel_pt_print_no_data(uint64_t pos, int indent)
+{
+ int i;
+
+ for (i = 0; i < indent; i++)
+ fprintf(f, " ");
+
+ fprintf(f, " %08" PRIx64 ": ", pos);
+ for (i = 0; i < 16; i++)
+ fprintf(f, " ");
+ fprintf(f, " ");
+}
+
+static int intel_pt_log_open(void)
+{
+ if (!intel_pt_enable_logging)
+ return -1;
+
+ if (f)
+ return 0;
+
+ if (!log_name[0])
+ return -1;
+
+ f = fopen(log_name, "w+");
+ if (!f) {
+ intel_pt_enable_logging = false;
+ return -1;
+ }
+
+ return 0;
+}
+
+void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+ uint64_t pos, const unsigned char *buf)
+{
+ char desc[INTEL_PT_PKT_DESC_MAX];
+
+ if (intel_pt_log_open())
+ return;
+
+ intel_pt_print_data(buf, pkt_len, pos, 0);
+ intel_pt_pkt_desc(packet, desc, INTEL_PT_PKT_DESC_MAX);
+ fprintf(f, "%s\n", desc);
+}
+
+void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip)
+{
+ char desc[INTEL_PT_INSN_DESC_MAX];
+ size_t len = intel_pt_insn->length;
+
+ if (intel_pt_log_open())
+ return;
+
+ if (len > INTEL_PT_INSN_BUF_SZ)
+ len = INTEL_PT_INSN_BUF_SZ;
+ intel_pt_print_data(intel_pt_insn->buf, len, ip, 8);
+ if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
+ fprintf(f, "%s\n", desc);
+ else
+ fprintf(f, "Bad instruction!\n");
+}
+
+void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
+ uint64_t ip)
+{
+ char desc[INTEL_PT_INSN_DESC_MAX];
+
+ if (intel_pt_log_open())
+ return;
+
+ intel_pt_print_no_data(ip, 8);
+ if (intel_pt_insn_desc(intel_pt_insn, desc, INTEL_PT_INSN_DESC_MAX) > 0)
+ fprintf(f, "%s\n", desc);
+ else
+ fprintf(f, "Bad instruction!\n");
+}
+
+void __intel_pt_log(const char *fmt, ...)
+{
+ va_list args;
+
+ if (intel_pt_log_open())
+ return;
+
+ va_start(args, fmt);
+ vfprintf(f, fmt, args);
+ va_end(args);
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-log.h b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
new file mode 100644
index 000000000..45b64f93f
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-log.h
@@ -0,0 +1,78 @@
+/*
+ * intel_pt_log.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_LOG_H__
+#define INCLUDE__INTEL_PT_LOG_H__
+
+#include <linux/compiler.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+struct intel_pt_pkt;
+
+void intel_pt_log_enable(void);
+void intel_pt_log_disable(void);
+void intel_pt_log_set_name(const char *name);
+
+void __intel_pt_log_packet(const struct intel_pt_pkt *packet, int pkt_len,
+ uint64_t pos, const unsigned char *buf);
+
+struct intel_pt_insn;
+
+void __intel_pt_log_insn(struct intel_pt_insn *intel_pt_insn, uint64_t ip);
+void __intel_pt_log_insn_no_data(struct intel_pt_insn *intel_pt_insn,
+ uint64_t ip);
+
+void __intel_pt_log(const char *fmt, ...) __printf(1, 2);
+
+#define intel_pt_log(fmt, ...) \
+ do { \
+ if (intel_pt_enable_logging) \
+ __intel_pt_log(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+#define intel_pt_log_packet(arg, ...) \
+ do { \
+ if (intel_pt_enable_logging) \
+ __intel_pt_log_packet(arg, ##__VA_ARGS__); \
+ } while (0)
+
+#define intel_pt_log_insn(arg, ...) \
+ do { \
+ if (intel_pt_enable_logging) \
+ __intel_pt_log_insn(arg, ##__VA_ARGS__); \
+ } while (0)
+
+#define intel_pt_log_insn_no_data(arg, ...) \
+ do { \
+ if (intel_pt_enable_logging) \
+ __intel_pt_log_insn_no_data(arg, ##__VA_ARGS__); \
+ } while (0)
+
+#define x64_fmt "0x%" PRIx64
+
+extern bool intel_pt_enable_logging;
+
+static inline void intel_pt_log_at(const char *msg, uint64_t u)
+{
+ intel_pt_log("%s at " x64_fmt "\n", msg, u);
+}
+
+static inline void intel_pt_log_to(const char *msg, uint64_t u)
+{
+ intel_pt_log("%s to " x64_fmt "\n", msg, u);
+}
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
new file mode 100644
index 000000000..d426761a5
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -0,0 +1,638 @@
+/*
+ * intel_pt_pkt_decoder.c: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+#include <linux/compiler.h>
+
+#include "intel-pt-pkt-decoder.h"
+
+#define BIT(n) (1 << (n))
+
+#define BIT63 ((uint64_t)1 << 63)
+
+#define NR_FLAG BIT63
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define le16_to_cpu bswap_16
+#define le32_to_cpu bswap_32
+#define le64_to_cpu bswap_64
+#define memcpy_le64(d, s, n) do { \
+ memcpy((d), (s), (n)); \
+ *(d) = le64_to_cpu(*(d)); \
+} while (0)
+#else
+#define le16_to_cpu
+#define le32_to_cpu
+#define le64_to_cpu
+#define memcpy_le64 memcpy
+#endif
+
+static const char * const packet_name[] = {
+ [INTEL_PT_BAD] = "Bad Packet!",
+ [INTEL_PT_PAD] = "PAD",
+ [INTEL_PT_TNT] = "TNT",
+ [INTEL_PT_TIP_PGD] = "TIP.PGD",
+ [INTEL_PT_TIP_PGE] = "TIP.PGE",
+ [INTEL_PT_TSC] = "TSC",
+ [INTEL_PT_TMA] = "TMA",
+ [INTEL_PT_MODE_EXEC] = "MODE.Exec",
+ [INTEL_PT_MODE_TSX] = "MODE.TSX",
+ [INTEL_PT_MTC] = "MTC",
+ [INTEL_PT_TIP] = "TIP",
+ [INTEL_PT_FUP] = "FUP",
+ [INTEL_PT_CYC] = "CYC",
+ [INTEL_PT_VMCS] = "VMCS",
+ [INTEL_PT_PSB] = "PSB",
+ [INTEL_PT_PSBEND] = "PSBEND",
+ [INTEL_PT_CBR] = "CBR",
+ [INTEL_PT_TRACESTOP] = "TraceSTOP",
+ [INTEL_PT_PIP] = "PIP",
+ [INTEL_PT_OVF] = "OVF",
+ [INTEL_PT_MNT] = "MNT",
+ [INTEL_PT_PTWRITE] = "PTWRITE",
+ [INTEL_PT_PTWRITE_IP] = "PTWRITE",
+ [INTEL_PT_EXSTOP] = "EXSTOP",
+ [INTEL_PT_EXSTOP_IP] = "EXSTOP",
+ [INTEL_PT_MWAIT] = "MWAIT",
+ [INTEL_PT_PWRE] = "PWRE",
+ [INTEL_PT_PWRX] = "PWRX",
+};
+
+const char *intel_pt_pkt_name(enum intel_pt_pkt_type type)
+{
+ return packet_name[type];
+}
+
+static int intel_pt_get_long_tnt(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ uint64_t payload;
+ int count;
+
+ if (len < 8)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ payload = le64_to_cpu(*(uint64_t *)buf);
+
+ for (count = 47; count; count--) {
+ if (payload & BIT63)
+ break;
+ payload <<= 1;
+ }
+
+ packet->type = INTEL_PT_TNT;
+ packet->count = count;
+ packet->payload = payload << 1;
+ return 8;
+}
+
+static int intel_pt_get_pip(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ uint64_t payload = 0;
+
+ if (len < 8)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_PIP;
+ memcpy_le64(&payload, buf + 2, 6);
+ packet->payload = payload >> 1;
+ if (payload & 1)
+ packet->payload |= NR_FLAG;
+
+ return 8;
+}
+
+static int intel_pt_get_tracestop(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_TRACESTOP;
+ return 2;
+}
+
+static int intel_pt_get_cbr(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 4)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_CBR;
+ packet->payload = le16_to_cpu(*(uint16_t *)(buf + 2));
+ return 4;
+}
+
+static int intel_pt_get_vmcs(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ unsigned int count = (52 - 5) >> 3;
+
+ if (count < 1 || count > 7)
+ return INTEL_PT_BAD_PACKET;
+
+ if (len < count + 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_VMCS;
+ packet->count = count;
+ memcpy_le64(&packet->payload, buf + 2, count);
+
+ return count + 2;
+}
+
+static int intel_pt_get_ovf(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_OVF;
+ return 2;
+}
+
+static int intel_pt_get_psb(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ int i;
+
+ if (len < 16)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ for (i = 2; i < 16; i += 2) {
+ if (buf[i] != 2 || buf[i + 1] != 0x82)
+ return INTEL_PT_BAD_PACKET;
+ }
+
+ packet->type = INTEL_PT_PSB;
+ return 16;
+}
+
+static int intel_pt_get_psbend(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_PSBEND;
+ return 2;
+}
+
+static int intel_pt_get_tma(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 7)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ packet->type = INTEL_PT_TMA;
+ packet->payload = buf[2] | (buf[3] << 8);
+ packet->count = buf[5] | ((buf[6] & BIT(0)) << 8);
+ return 7;
+}
+
+static int intel_pt_get_pad(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_PAD;
+ return 1;
+}
+
+static int intel_pt_get_mnt(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 11)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MNT;
+ memcpy_le64(&packet->payload, buf + 3, 8);
+ return 11
+;
+}
+
+static int intel_pt_get_3byte(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 3)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ switch (buf[2]) {
+ case 0x88: /* MNT */
+ return intel_pt_get_mnt(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+static int intel_pt_get_ptwrite(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ packet->count = (buf[1] >> 5) & 0x3;
+ packet->type = buf[1] & BIT(7) ? INTEL_PT_PTWRITE_IP :
+ INTEL_PT_PTWRITE;
+
+ switch (packet->count) {
+ case 0:
+ if (len < 6)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->payload = le32_to_cpu(*(uint32_t *)(buf + 2));
+ return 6;
+ case 1:
+ if (len < 10)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+ return 10;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+static int intel_pt_get_exstop(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_EXSTOP;
+ return 2;
+}
+
+static int intel_pt_get_exstop_ip(struct intel_pt_pkt *packet)
+{
+ packet->type = INTEL_PT_EXSTOP_IP;
+ return 2;
+}
+
+static int intel_pt_get_mwait(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 10)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MWAIT;
+ packet->payload = le64_to_cpu(*(uint64_t *)(buf + 2));
+ return 10;
+}
+
+static int intel_pt_get_pwre(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 4)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_PWRE;
+ memcpy_le64(&packet->payload, buf + 2, 2);
+ return 4;
+}
+
+static int intel_pt_get_pwrx(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 7)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_PWRX;
+ memcpy_le64(&packet->payload, buf + 2, 5);
+ return 7;
+}
+
+static int intel_pt_get_ext(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ if ((buf[1] & 0x1f) == 0x12)
+ return intel_pt_get_ptwrite(buf, len, packet);
+
+ switch (buf[1]) {
+ case 0xa3: /* Long TNT */
+ return intel_pt_get_long_tnt(buf, len, packet);
+ case 0x43: /* PIP */
+ return intel_pt_get_pip(buf, len, packet);
+ case 0x83: /* TraceStop */
+ return intel_pt_get_tracestop(packet);
+ case 0x03: /* CBR */
+ return intel_pt_get_cbr(buf, len, packet);
+ case 0xc8: /* VMCS */
+ return intel_pt_get_vmcs(buf, len, packet);
+ case 0xf3: /* OVF */
+ return intel_pt_get_ovf(packet);
+ case 0x82: /* PSB */
+ return intel_pt_get_psb(buf, len, packet);
+ case 0x23: /* PSBEND */
+ return intel_pt_get_psbend(packet);
+ case 0x73: /* TMA */
+ return intel_pt_get_tma(buf, len, packet);
+ case 0xC3: /* 3-byte header */
+ return intel_pt_get_3byte(buf, len, packet);
+ case 0x62: /* EXSTOP no IP */
+ return intel_pt_get_exstop(packet);
+ case 0xE2: /* EXSTOP with IP */
+ return intel_pt_get_exstop_ip(packet);
+ case 0xC2: /* MWAIT */
+ return intel_pt_get_mwait(buf, len, packet);
+ case 0x22: /* PWRE */
+ return intel_pt_get_pwre(buf, len, packet);
+ case 0xA2: /* PWRX */
+ return intel_pt_get_pwrx(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+static int intel_pt_get_short_tnt(unsigned int byte,
+ struct intel_pt_pkt *packet)
+{
+ int count;
+
+ for (count = 6; count; count--) {
+ if (byte & BIT(7))
+ break;
+ byte <<= 1;
+ }
+
+ packet->type = INTEL_PT_TNT;
+ packet->count = count;
+ packet->payload = (uint64_t)byte << 57;
+
+ return 1;
+}
+
+static int intel_pt_get_cyc(unsigned int byte, const unsigned char *buf,
+ size_t len, struct intel_pt_pkt *packet)
+{
+ unsigned int offs = 1, shift;
+ uint64_t payload = byte >> 3;
+
+ byte >>= 2;
+ len -= 1;
+ for (shift = 5; byte & 1; shift += 7) {
+ if (offs > 9)
+ return INTEL_PT_BAD_PACKET;
+ if (len < offs)
+ return INTEL_PT_NEED_MORE_BYTES;
+ byte = buf[offs++];
+ payload |= ((uint64_t)byte >> 1) << shift;
+ }
+
+ packet->type = INTEL_PT_CYC;
+ packet->payload = payload;
+ return offs;
+}
+
+static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte,
+ const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ int ip_len;
+
+ packet->count = byte >> 5;
+
+ switch (packet->count) {
+ case 0:
+ ip_len = 0;
+ break;
+ case 1:
+ if (len < 3)
+ return INTEL_PT_NEED_MORE_BYTES;
+ ip_len = 2;
+ packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1));
+ break;
+ case 2:
+ if (len < 5)
+ return INTEL_PT_NEED_MORE_BYTES;
+ ip_len = 4;
+ packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1));
+ break;
+ case 3:
+ case 4:
+ if (len < 7)
+ return INTEL_PT_NEED_MORE_BYTES;
+ ip_len = 6;
+ memcpy_le64(&packet->payload, buf + 1, 6);
+ break;
+ case 6:
+ if (len < 9)
+ return INTEL_PT_NEED_MORE_BYTES;
+ ip_len = 8;
+ packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1));
+ break;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+
+ packet->type = type;
+
+ return ip_len + 1;
+}
+
+static int intel_pt_get_mode(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ switch (buf[1] >> 5) {
+ case 0:
+ packet->type = INTEL_PT_MODE_EXEC;
+ switch (buf[1] & 3) {
+ case 0:
+ packet->payload = 16;
+ break;
+ case 1:
+ packet->payload = 64;
+ break;
+ case 2:
+ packet->payload = 32;
+ break;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+ break;
+ case 1:
+ packet->type = INTEL_PT_MODE_TSX;
+ if ((buf[1] & 3) == 3)
+ return INTEL_PT_BAD_PACKET;
+ packet->payload = buf[1] & 3;
+ break;
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+
+ return 2;
+}
+
+static int intel_pt_get_tsc(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 8)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_TSC;
+ memcpy_le64(&packet->payload, buf + 1, 7);
+ return 8;
+}
+
+static int intel_pt_get_mtc(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ if (len < 2)
+ return INTEL_PT_NEED_MORE_BYTES;
+ packet->type = INTEL_PT_MTC;
+ packet->payload = buf[1];
+ return 2;
+}
+
+static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ unsigned int byte;
+
+ memset(packet, 0, sizeof(struct intel_pt_pkt));
+
+ if (!len)
+ return INTEL_PT_NEED_MORE_BYTES;
+
+ byte = buf[0];
+ if (!(byte & BIT(0))) {
+ if (byte == 0)
+ return intel_pt_get_pad(packet);
+ if (byte == 2)
+ return intel_pt_get_ext(buf, len, packet);
+ return intel_pt_get_short_tnt(byte, packet);
+ }
+
+ if ((byte & 2))
+ return intel_pt_get_cyc(byte, buf, len, packet);
+
+ switch (byte & 0x1f) {
+ case 0x0D:
+ return intel_pt_get_ip(INTEL_PT_TIP, byte, buf, len, packet);
+ case 0x11:
+ return intel_pt_get_ip(INTEL_PT_TIP_PGE, byte, buf, len,
+ packet);
+ case 0x01:
+ return intel_pt_get_ip(INTEL_PT_TIP_PGD, byte, buf, len,
+ packet);
+ case 0x1D:
+ return intel_pt_get_ip(INTEL_PT_FUP, byte, buf, len, packet);
+ case 0x19:
+ switch (byte) {
+ case 0x99:
+ return intel_pt_get_mode(buf, len, packet);
+ case 0x19:
+ return intel_pt_get_tsc(buf, len, packet);
+ case 0x59:
+ return intel_pt_get_mtc(buf, len, packet);
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+ default:
+ return INTEL_PT_BAD_PACKET;
+ }
+}
+
+int intel_pt_get_packet(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet)
+{
+ int ret;
+
+ ret = intel_pt_do_get_packet(buf, len, packet);
+ if (ret > 0) {
+ while (ret < 8 && len > (size_t)ret && !buf[ret])
+ ret += 1;
+ }
+ return ret;
+}
+
+int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf,
+ size_t buf_len)
+{
+ int ret, i, nr;
+ unsigned long long payload = packet->payload;
+ const char *name = intel_pt_pkt_name(packet->type);
+
+ switch (packet->type) {
+ case INTEL_PT_BAD:
+ case INTEL_PT_PAD:
+ case INTEL_PT_PSB:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_OVF:
+ return snprintf(buf, buf_len, "%s", name);
+ case INTEL_PT_TNT: {
+ size_t blen = buf_len;
+
+ ret = snprintf(buf, blen, "%s ", name);
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ blen -= ret;
+ for (i = 0; i < packet->count; i++) {
+ if (payload & BIT63)
+ ret = snprintf(buf, blen, "T");
+ else
+ ret = snprintf(buf, blen, "N");
+ if (ret < 0)
+ return ret;
+ buf += ret;
+ blen -= ret;
+ payload <<= 1;
+ }
+ ret = snprintf(buf, blen, " (%d)", packet->count);
+ if (ret < 0)
+ return ret;
+ blen -= ret;
+ return buf_len - blen;
+ }
+ case INTEL_PT_TIP_PGD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TIP:
+ case INTEL_PT_FUP:
+ if (!(packet->count))
+ return snprintf(buf, buf_len, "%s no ip", name);
+ __fallthrough;
+ case INTEL_PT_CYC:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_MTC:
+ case INTEL_PT_MNT:
+ case INTEL_PT_CBR:
+ case INTEL_PT_TSC:
+ return snprintf(buf, buf_len, "%s 0x%llx", name, payload);
+ case INTEL_PT_TMA:
+ return snprintf(buf, buf_len, "%s CTC 0x%x FC 0x%x", name,
+ (unsigned)payload, packet->count);
+ case INTEL_PT_MODE_EXEC:
+ return snprintf(buf, buf_len, "%s %lld", name, payload);
+ case INTEL_PT_MODE_TSX:
+ return snprintf(buf, buf_len, "%s TXAbort:%u InTX:%u",
+ name, (unsigned)(payload >> 1) & 1,
+ (unsigned)payload & 1);
+ case INTEL_PT_PIP:
+ nr = packet->payload & NR_FLAG ? 1 : 0;
+ payload &= ~NR_FLAG;
+ ret = snprintf(buf, buf_len, "%s 0x%llx (NR=%d)",
+ name, payload, nr);
+ return ret;
+ case INTEL_PT_PTWRITE:
+ return snprintf(buf, buf_len, "%s 0x%llx IP:0", name, payload);
+ case INTEL_PT_PTWRITE_IP:
+ return snprintf(buf, buf_len, "%s 0x%llx IP:1", name, payload);
+ case INTEL_PT_EXSTOP:
+ return snprintf(buf, buf_len, "%s IP:0", name);
+ case INTEL_PT_EXSTOP_IP:
+ return snprintf(buf, buf_len, "%s IP:1", name);
+ case INTEL_PT_MWAIT:
+ return snprintf(buf, buf_len, "%s 0x%llx Hints 0x%x Extensions 0x%x",
+ name, payload, (unsigned int)(payload & 0xff),
+ (unsigned int)((payload >> 32) & 0x3));
+ case INTEL_PT_PWRE:
+ return snprintf(buf, buf_len, "%s 0x%llx HW:%u CState:%u Sub-CState:%u",
+ name, payload, !!(payload & 0x80),
+ (unsigned int)((payload >> 12) & 0xf),
+ (unsigned int)((payload >> 8) & 0xf));
+ case INTEL_PT_PWRX:
+ return snprintf(buf, buf_len, "%s 0x%llx Last CState:%u Deepest CState:%u Wake Reason 0x%x",
+ name, payload,
+ (unsigned int)((payload >> 4) & 0xf),
+ (unsigned int)(payload & 0xf),
+ (unsigned int)((payload >> 8) & 0xf));
+ default:
+ break;
+ }
+ return snprintf(buf, buf_len, "%s 0x%llx (%d)",
+ name, payload, packet->count);
+}
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
new file mode 100644
index 000000000..73ddc3a88
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.h
@@ -0,0 +1,77 @@
+/*
+ * intel_pt_pkt_decoder.h: Intel Processor Trace support
+ * Copyright (c) 2013-2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__INTEL_PT_PKT_DECODER_H__
+#define INCLUDE__INTEL_PT_PKT_DECODER_H__
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define INTEL_PT_PKT_DESC_MAX 256
+
+#define INTEL_PT_NEED_MORE_BYTES -1
+#define INTEL_PT_BAD_PACKET -2
+
+#define INTEL_PT_PSB_STR "\002\202\002\202\002\202\002\202" \
+ "\002\202\002\202\002\202\002\202"
+#define INTEL_PT_PSB_LEN 16
+
+#define INTEL_PT_PKT_MAX_SZ 16
+
+enum intel_pt_pkt_type {
+ INTEL_PT_BAD,
+ INTEL_PT_PAD,
+ INTEL_PT_TNT,
+ INTEL_PT_TIP_PGD,
+ INTEL_PT_TIP_PGE,
+ INTEL_PT_TSC,
+ INTEL_PT_TMA,
+ INTEL_PT_MODE_EXEC,
+ INTEL_PT_MODE_TSX,
+ INTEL_PT_MTC,
+ INTEL_PT_TIP,
+ INTEL_PT_FUP,
+ INTEL_PT_CYC,
+ INTEL_PT_VMCS,
+ INTEL_PT_PSB,
+ INTEL_PT_PSBEND,
+ INTEL_PT_CBR,
+ INTEL_PT_TRACESTOP,
+ INTEL_PT_PIP,
+ INTEL_PT_OVF,
+ INTEL_PT_MNT,
+ INTEL_PT_PTWRITE,
+ INTEL_PT_PTWRITE_IP,
+ INTEL_PT_EXSTOP,
+ INTEL_PT_EXSTOP_IP,
+ INTEL_PT_MWAIT,
+ INTEL_PT_PWRE,
+ INTEL_PT_PWRX,
+};
+
+struct intel_pt_pkt {
+ enum intel_pt_pkt_type type;
+ int count;
+ uint64_t payload;
+};
+
+const char *intel_pt_pkt_name(enum intel_pt_pkt_type);
+
+int intel_pt_get_packet(const unsigned char *buf, size_t len,
+ struct intel_pt_pkt *packet);
+
+int intel_pt_pkt_desc(const struct intel_pt_pkt *packet, char *buf, size_t len);
+
+#endif
diff --git a/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
new file mode 100644
index 000000000..e0b85930d
--- /dev/null
+++ b/tools/perf/util/intel-pt-decoder/x86-opcode-map.txt
@@ -0,0 +1,1072 @@
+# x86 Opcode Maps
+#
+# This is (mostly) based on following documentations.
+# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C
+# (#326018-047US, June 2013)
+#
+#<Opcode maps>
+# Table: table-name
+# Referrer: escaped-name
+# AVXcode: avx-code
+# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# (or)
+# opcode: escape # escaped-name
+# EndTable
+#
+# mnemonics that begin with lowercase 'v' accept a VEX or EVEX prefix
+# mnemonics that begin with lowercase 'k' accept a VEX prefix
+#
+#<group maps>
+# GrpTable: GrpXXX
+# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# EndTable
+#
+# AVX Superscripts
+# (ev): this opcode requires EVEX prefix.
+# (evo): this opcode is changed by EVEX prefix (EVEX opcode)
+# (v): this opcode requires VEX prefix.
+# (v1): this opcode only supports 128bit VEX.
+#
+# Last Prefix Superscripts
+# - (66): the last prefix is 0x66
+# - (F3): the last prefix is 0xF3
+# - (F2): the last prefix is 0xF2
+# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
+# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
+
+Table: one byte opcode
+Referrer:
+AVXcode:
+# 0x00 - 0x0f
+00: ADD Eb,Gb
+01: ADD Ev,Gv
+02: ADD Gb,Eb
+03: ADD Gv,Ev
+04: ADD AL,Ib
+05: ADD rAX,Iz
+06: PUSH ES (i64)
+07: POP ES (i64)
+08: OR Eb,Gb
+09: OR Ev,Gv
+0a: OR Gb,Eb
+0b: OR Gv,Ev
+0c: OR AL,Ib
+0d: OR rAX,Iz
+0e: PUSH CS (i64)
+0f: escape # 2-byte escape
+# 0x10 - 0x1f
+10: ADC Eb,Gb
+11: ADC Ev,Gv
+12: ADC Gb,Eb
+13: ADC Gv,Ev
+14: ADC AL,Ib
+15: ADC rAX,Iz
+16: PUSH SS (i64)
+17: POP SS (i64)
+18: SBB Eb,Gb
+19: SBB Ev,Gv
+1a: SBB Gb,Eb
+1b: SBB Gv,Ev
+1c: SBB AL,Ib
+1d: SBB rAX,Iz
+1e: PUSH DS (i64)
+1f: POP DS (i64)
+# 0x20 - 0x2f
+20: AND Eb,Gb
+21: AND Ev,Gv
+22: AND Gb,Eb
+23: AND Gv,Ev
+24: AND AL,Ib
+25: AND rAx,Iz
+26: SEG=ES (Prefix)
+27: DAA (i64)
+28: SUB Eb,Gb
+29: SUB Ev,Gv
+2a: SUB Gb,Eb
+2b: SUB Gv,Ev
+2c: SUB AL,Ib
+2d: SUB rAX,Iz
+2e: SEG=CS (Prefix)
+2f: DAS (i64)
+# 0x30 - 0x3f
+30: XOR Eb,Gb
+31: XOR Ev,Gv
+32: XOR Gb,Eb
+33: XOR Gv,Ev
+34: XOR AL,Ib
+35: XOR rAX,Iz
+36: SEG=SS (Prefix)
+37: AAA (i64)
+38: CMP Eb,Gb
+39: CMP Ev,Gv
+3a: CMP Gb,Eb
+3b: CMP Gv,Ev
+3c: CMP AL,Ib
+3d: CMP rAX,Iz
+3e: SEG=DS (Prefix)
+3f: AAS (i64)
+# 0x40 - 0x4f
+40: INC eAX (i64) | REX (o64)
+41: INC eCX (i64) | REX.B (o64)
+42: INC eDX (i64) | REX.X (o64)
+43: INC eBX (i64) | REX.XB (o64)
+44: INC eSP (i64) | REX.R (o64)
+45: INC eBP (i64) | REX.RB (o64)
+46: INC eSI (i64) | REX.RX (o64)
+47: INC eDI (i64) | REX.RXB (o64)
+48: DEC eAX (i64) | REX.W (o64)
+49: DEC eCX (i64) | REX.WB (o64)
+4a: DEC eDX (i64) | REX.WX (o64)
+4b: DEC eBX (i64) | REX.WXB (o64)
+4c: DEC eSP (i64) | REX.WR (o64)
+4d: DEC eBP (i64) | REX.WRB (o64)
+4e: DEC eSI (i64) | REX.WRX (o64)
+4f: DEC eDI (i64) | REX.WRXB (o64)
+# 0x50 - 0x5f
+50: PUSH rAX/r8 (d64)
+51: PUSH rCX/r9 (d64)
+52: PUSH rDX/r10 (d64)
+53: PUSH rBX/r11 (d64)
+54: PUSH rSP/r12 (d64)
+55: PUSH rBP/r13 (d64)
+56: PUSH rSI/r14 (d64)
+57: PUSH rDI/r15 (d64)
+58: POP rAX/r8 (d64)
+59: POP rCX/r9 (d64)
+5a: POP rDX/r10 (d64)
+5b: POP rBX/r11 (d64)
+5c: POP rSP/r12 (d64)
+5d: POP rBP/r13 (d64)
+5e: POP rSI/r14 (d64)
+5f: POP rDI/r15 (d64)
+# 0x60 - 0x6f
+60: PUSHA/PUSHAD (i64)
+61: POPA/POPAD (i64)
+62: BOUND Gv,Ma (i64) | EVEX (Prefix)
+63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
+64: SEG=FS (Prefix)
+65: SEG=GS (Prefix)
+66: Operand-Size (Prefix)
+67: Address-Size (Prefix)
+68: PUSH Iz (d64)
+69: IMUL Gv,Ev,Iz
+6a: PUSH Ib (d64)
+6b: IMUL Gv,Ev,Ib
+6c: INS/INSB Yb,DX
+6d: INS/INSW/INSD Yz,DX
+6e: OUTS/OUTSB DX,Xb
+6f: OUTS/OUTSW/OUTSD DX,Xz
+# 0x70 - 0x7f
+70: JO Jb
+71: JNO Jb
+72: JB/JNAE/JC Jb
+73: JNB/JAE/JNC Jb
+74: JZ/JE Jb
+75: JNZ/JNE Jb
+76: JBE/JNA Jb
+77: JNBE/JA Jb
+78: JS Jb
+79: JNS Jb
+7a: JP/JPE Jb
+7b: JNP/JPO Jb
+7c: JL/JNGE Jb
+7d: JNL/JGE Jb
+7e: JLE/JNG Jb
+7f: JNLE/JG Jb
+# 0x80 - 0x8f
+80: Grp1 Eb,Ib (1A)
+81: Grp1 Ev,Iz (1A)
+82: Grp1 Eb,Ib (1A),(i64)
+83: Grp1 Ev,Ib (1A)
+84: TEST Eb,Gb
+85: TEST Ev,Gv
+86: XCHG Eb,Gb
+87: XCHG Ev,Gv
+88: MOV Eb,Gb
+89: MOV Ev,Gv
+8a: MOV Gb,Eb
+8b: MOV Gv,Ev
+8c: MOV Ev,Sw
+8d: LEA Gv,M
+8e: MOV Sw,Ew
+8f: Grp1A (1A) | POP Ev (d64)
+# 0x90 - 0x9f
+90: NOP | PAUSE (F3) | XCHG r8,rAX
+91: XCHG rCX/r9,rAX
+92: XCHG rDX/r10,rAX
+93: XCHG rBX/r11,rAX
+94: XCHG rSP/r12,rAX
+95: XCHG rBP/r13,rAX
+96: XCHG rSI/r14,rAX
+97: XCHG rDI/r15,rAX
+98: CBW/CWDE/CDQE
+99: CWD/CDQ/CQO
+9a: CALLF Ap (i64)
+9b: FWAIT/WAIT
+9c: PUSHF/D/Q Fv (d64)
+9d: POPF/D/Q Fv (d64)
+9e: SAHF
+9f: LAHF
+# 0xa0 - 0xaf
+a0: MOV AL,Ob
+a1: MOV rAX,Ov
+a2: MOV Ob,AL
+a3: MOV Ov,rAX
+a4: MOVS/B Yb,Xb
+a5: MOVS/W/D/Q Yv,Xv
+a6: CMPS/B Xb,Yb
+a7: CMPS/W/D Xv,Yv
+a8: TEST AL,Ib
+a9: TEST rAX,Iz
+aa: STOS/B Yb,AL
+ab: STOS/W/D/Q Yv,rAX
+ac: LODS/B AL,Xb
+ad: LODS/W/D/Q rAX,Xv
+ae: SCAS/B AL,Yb
+# Note: The May 2011 Intel manual shows Xv for the second parameter of the
+# next instruction but Yv is correct
+af: SCAS/W/D/Q rAX,Yv
+# 0xb0 - 0xbf
+b0: MOV AL/R8L,Ib
+b1: MOV CL/R9L,Ib
+b2: MOV DL/R10L,Ib
+b3: MOV BL/R11L,Ib
+b4: MOV AH/R12L,Ib
+b5: MOV CH/R13L,Ib
+b6: MOV DH/R14L,Ib
+b7: MOV BH/R15L,Ib
+b8: MOV rAX/r8,Iv
+b9: MOV rCX/r9,Iv
+ba: MOV rDX/r10,Iv
+bb: MOV rBX/r11,Iv
+bc: MOV rSP/r12,Iv
+bd: MOV rBP/r13,Iv
+be: MOV rSI/r14,Iv
+bf: MOV rDI/r15,Iv
+# 0xc0 - 0xcf
+c0: Grp2 Eb,Ib (1A)
+c1: Grp2 Ev,Ib (1A)
+c2: RETN Iw (f64)
+c3: RETN
+c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
+c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
+c6: Grp11A Eb,Ib (1A)
+c7: Grp11B Ev,Iz (1A)
+c8: ENTER Iw,Ib
+c9: LEAVE (d64)
+ca: RETF Iw
+cb: RETF
+cc: INT3
+cd: INT Ib
+ce: INTO (i64)
+cf: IRET/D/Q
+# 0xd0 - 0xdf
+d0: Grp2 Eb,1 (1A)
+d1: Grp2 Ev,1 (1A)
+d2: Grp2 Eb,CL (1A)
+d3: Grp2 Ev,CL (1A)
+d4: AAM Ib (i64)
+d5: AAD Ib (i64)
+d6:
+d7: XLAT/XLATB
+d8: ESC
+d9: ESC
+da: ESC
+db: ESC
+dc: ESC
+dd: ESC
+de: ESC
+df: ESC
+# 0xe0 - 0xef
+# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
+# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
+# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
+e0: LOOPNE/LOOPNZ Jb (f64)
+e1: LOOPE/LOOPZ Jb (f64)
+e2: LOOP Jb (f64)
+e3: JrCXZ Jb (f64)
+e4: IN AL,Ib
+e5: IN eAX,Ib
+e6: OUT Ib,AL
+e7: OUT Ib,eAX
+# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
+# in "near" jumps and calls is 16-bit. For CALL,
+# push of return address is 16-bit wide, RSP is decremented by 2
+# but is not truncated to 16 bits, unlike RIP.
+e8: CALL Jz (f64)
+e9: JMP-near Jz (f64)
+ea: JMP-far Ap (i64)
+eb: JMP-short Jb (f64)
+ec: IN AL,DX
+ed: IN eAX,DX
+ee: OUT DX,AL
+ef: OUT DX,eAX
+# 0xf0 - 0xff
+f0: LOCK (Prefix)
+f1:
+f2: REPNE (Prefix) | XACQUIRE (Prefix)
+f3: REP/REPE (Prefix) | XRELEASE (Prefix)
+f4: HLT
+f5: CMC
+f6: Grp3_1 Eb (1A)
+f7: Grp3_2 Ev (1A)
+f8: CLC
+f9: STC
+fa: CLI
+fb: STI
+fc: CLD
+fd: STD
+fe: Grp4 (1A)
+ff: Grp5 (1A)
+EndTable
+
+Table: 2-byte opcode (0x0f)
+Referrer: 2-byte escape
+AVXcode: 1
+# 0x0f 0x00-0x0f
+00: Grp6 (1A)
+01: Grp7 (1A)
+02: LAR Gv,Ew
+03: LSL Gv,Ew
+04:
+05: SYSCALL (o64)
+06: CLTS
+07: SYSRET (o64)
+08: INVD
+09: WBINVD
+0a:
+0b: UD2 (1B)
+0c:
+# AMD's prefetch group. Intel supports prefetchw(/1) only.
+0d: GrpP
+0e: FEMMS
+# 3DNow! uses the last imm byte as opcode extension.
+0f: 3DNow! Pq,Qq,Ib
+# 0x0f 0x10-0x1f
+# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
+# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
+# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
+# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
+# Reference A.1
+10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
+11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
+12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
+13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
+14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
+15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
+16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
+17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
+18: Grp16 (1A)
+19:
+# Intel SDM opcode map does not list MPX instructions. For now using Gv for
+# bnd registers and Ev for everything else is OK because the instruction
+# decoder does not use the information except as an indication that there is
+# a ModR/M byte.
+1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
+1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
+1c:
+1d:
+1e:
+1f: NOP Ev
+# 0x0f 0x20-0x2f
+20: MOV Rd,Cd
+21: MOV Rd,Dd
+22: MOV Cd,Rd
+23: MOV Dd,Rd
+24:
+25:
+26:
+27:
+28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
+29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
+2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
+2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
+2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
+2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
+2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1)
+2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1)
+# 0x0f 0x30-0x3f
+30: WRMSR
+31: RDTSC
+32: RDMSR
+33: RDPMC
+34: SYSENTER
+35: SYSEXIT
+36:
+37: GETSEC
+38: escape # 3-byte escape 1
+39:
+3a: escape # 3-byte escape 2
+3b:
+3c:
+3d:
+3e:
+3f:
+# 0x0f 0x40-0x4f
+40: CMOVO Gv,Ev
+41: CMOVNO Gv,Ev | kandw/q Vk,Hk,Uk | kandb/d Vk,Hk,Uk (66)
+42: CMOVB/C/NAE Gv,Ev | kandnw/q Vk,Hk,Uk | kandnb/d Vk,Hk,Uk (66)
+43: CMOVAE/NB/NC Gv,Ev
+44: CMOVE/Z Gv,Ev | knotw/q Vk,Uk | knotb/d Vk,Uk (66)
+45: CMOVNE/NZ Gv,Ev | korw/q Vk,Hk,Uk | korb/d Vk,Hk,Uk (66)
+46: CMOVBE/NA Gv,Ev | kxnorw/q Vk,Hk,Uk | kxnorb/d Vk,Hk,Uk (66)
+47: CMOVA/NBE Gv,Ev | kxorw/q Vk,Hk,Uk | kxorb/d Vk,Hk,Uk (66)
+48: CMOVS Gv,Ev
+49: CMOVNS Gv,Ev
+4a: CMOVP/PE Gv,Ev | kaddw/q Vk,Hk,Uk | kaddb/d Vk,Hk,Uk (66)
+4b: CMOVNP/PO Gv,Ev | kunpckbw Vk,Hk,Uk (66) | kunpckwd/dq Vk,Hk,Uk
+4c: CMOVL/NGE Gv,Ev
+4d: CMOVNL/GE Gv,Ev
+4e: CMOVLE/NG Gv,Ev
+4f: CMOVNLE/G Gv,Ev
+# 0x0f 0x50-0x5f
+50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
+51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
+52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
+53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
+54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
+55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
+56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
+57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
+58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
+59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
+5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
+5b: vcvtdq2ps Vps,Wdq | vcvtqq2ps Vps,Wqq (evo) | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
+5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
+5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
+5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
+5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
+# 0x0f 0x60-0x6f
+60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
+61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
+62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
+63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
+64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
+65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
+66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
+67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
+68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
+69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
+6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
+6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
+6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
+6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
+6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
+6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqa32/64 Vx,Wx (66),(evo) | vmovdqu Vx,Wx (F3) | vmovdqu32/64 Vx,Wx (F3),(evo) | vmovdqu8/16 Vx,Wx (F2),(ev)
+# 0x0f 0x70-0x7f
+70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
+71: Grp12 (1A)
+72: Grp13 (1A)
+73: Grp14 (1A)
+74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
+75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
+76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
+# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
+77: emms | vzeroupper | vzeroall
+78: VMREAD Ey,Gy | vcvttps2udq/pd2udq Vx,Wpd (evo) | vcvttsd2usi Gv,Wx (F2),(ev) | vcvttss2usi Gv,Wx (F3),(ev) | vcvttps2uqq/pd2uqq Vx,Wx (66),(ev)
+79: VMWRITE Gy,Ey | vcvtps2udq/pd2udq Vx,Wpd (evo) | vcvtsd2usi Gv,Wx (F2),(ev) | vcvtss2usi Gv,Wx (F3),(ev) | vcvtps2uqq/pd2uqq Vx,Wx (66),(ev)
+7a: vcvtudq2pd/uqq2pd Vpd,Wx (F3),(ev) | vcvtudq2ps/uqq2ps Vpd,Wx (F2),(ev) | vcvttps2qq/pd2qq Vx,Wx (66),(ev)
+7b: vcvtusi2sd Vpd,Hpd,Ev (F2),(ev) | vcvtusi2ss Vps,Hps,Ev (F3),(ev) | vcvtps2qq/pd2qq Vx,Wx (66),(ev)
+7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
+7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
+7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
+7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev)
+# 0x0f 0x80-0x8f
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
+80: JO Jz (f64)
+81: JNO Jz (f64)
+82: JB/JC/JNAE Jz (f64)
+83: JAE/JNB/JNC Jz (f64)
+84: JE/JZ Jz (f64)
+85: JNE/JNZ Jz (f64)
+86: JBE/JNA Jz (f64)
+87: JA/JNBE Jz (f64)
+88: JS Jz (f64)
+89: JNS Jz (f64)
+8a: JP/JPE Jz (f64)
+8b: JNP/JPO Jz (f64)
+8c: JL/JNGE Jz (f64)
+8d: JNL/JGE Jz (f64)
+8e: JLE/JNG Jz (f64)
+8f: JNLE/JG Jz (f64)
+# 0x0f 0x90-0x9f
+90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66)
+91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66)
+92: SETB/C/NAE Eb | kmovw Vk,Rv | kmovb Vk,Rv (66) | kmovq/d Vk,Rv (F2)
+93: SETAE/NB/NC Eb | kmovw Gv,Uk | kmovb Gv,Uk (66) | kmovq/d Gv,Uk (F2)
+94: SETE/Z Eb
+95: SETNE/NZ Eb
+96: SETBE/NA Eb
+97: SETA/NBE Eb
+98: SETS Eb | kortestw/q Vk,Uk | kortestb/d Vk,Uk (66)
+99: SETNS Eb | ktestw/q Vk,Uk | ktestb/d Vk,Uk (66)
+9a: SETP/PE Eb
+9b: SETNP/PO Eb
+9c: SETL/NGE Eb
+9d: SETNL/GE Eb
+9e: SETLE/NG Eb
+9f: SETNLE/G Eb
+# 0x0f 0xa0-0xaf
+a0: PUSH FS (d64)
+a1: POP FS (d64)
+a2: CPUID
+a3: BT Ev,Gv
+a4: SHLD Ev,Gv,Ib
+a5: SHLD Ev,Gv,CL
+a6: GrpPDLK
+a7: GrpRNG
+a8: PUSH GS (d64)
+a9: POP GS (d64)
+aa: RSM
+ab: BTS Ev,Gv
+ac: SHRD Ev,Gv,Ib
+ad: SHRD Ev,Gv,CL
+ae: Grp15 (1A),(1C)
+af: IMUL Gv,Ev
+# 0x0f 0xb0-0xbf
+b0: CMPXCHG Eb,Gb
+b1: CMPXCHG Ev,Gv
+b2: LSS Gv,Mp
+b3: BTR Ev,Gv
+b4: LFS Gv,Mp
+b5: LGS Gv,Mp
+b6: MOVZX Gv,Eb
+b7: MOVZX Gv,Ew
+b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
+b9: Grp10 (1A)
+ba: Grp8 Ev,Ib (1A)
+bb: BTC Ev,Gv
+bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
+bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
+be: MOVSX Gv,Eb
+bf: MOVSX Gv,Ew
+# 0x0f 0xc0-0xcf
+c0: XADD Eb,Gb
+c1: XADD Ev,Gv
+c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
+c3: movnti My,Gy
+c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
+c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
+c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
+c7: Grp9 (1A)
+c8: BSWAP RAX/EAX/R8/R8D
+c9: BSWAP RCX/ECX/R9/R9D
+ca: BSWAP RDX/EDX/R10/R10D
+cb: BSWAP RBX/EBX/R11/R11D
+cc: BSWAP RSP/ESP/R12/R12D
+cd: BSWAP RBP/EBP/R13/R13D
+ce: BSWAP RSI/ESI/R14/R14D
+cf: BSWAP RDI/EDI/R15/R15D
+# 0x0f 0xd0-0xdf
+d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
+d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
+d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
+d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
+d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
+d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
+d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
+d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
+d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
+d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
+da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
+db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1) | vpandd/q Vx,Hx,Wx (66),(evo)
+dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
+dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
+de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
+df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1) | vpandnd/q Vx,Hx,Wx (66),(evo)
+# 0x0f 0xe0-0xef
+e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
+e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
+e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
+e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
+e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
+e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
+e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtdq2pd/qq2pd Vx,Wdq (F3),(evo) | vcvtpd2dq Vx,Wpd (F2)
+e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
+e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
+e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
+ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
+eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1) | vpord/q Vx,Hx,Wx (66),(evo)
+ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
+ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
+ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
+ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1) | vpxord/q Vx,Hx,Wx (66),(evo)
+# 0x0f 0xf0-0xff
+f0: vlddqu Vx,Mx (F2)
+f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
+f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
+f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
+f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
+f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
+f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
+f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
+f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
+f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
+fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
+fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
+fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
+fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
+fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
+ff: UD0
+EndTable
+
+Table: 3-byte opcode 1 (0x0f 0x38)
+Referrer: 3-byte escape 1
+AVXcode: 2
+# 0x0f 0x38 0x00-0x0f
+00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
+01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
+02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
+03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
+04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
+05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
+06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
+07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
+08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
+09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
+0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
+0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
+0c: vpermilps Vx,Hx,Wx (66),(v)
+0d: vpermilpd Vx,Hx,Wx (66),(v)
+0e: vtestps Vx,Wx (66),(v)
+0f: vtestpd Vx,Wx (66),(v)
+# 0x0f 0x38 0x10-0x1f
+10: pblendvb Vdq,Wdq (66) | vpsrlvw Vx,Hx,Wx (66),(evo) | vpmovuswb Wx,Vx (F3),(ev)
+11: vpmovusdb Wx,Vd (F3),(ev) | vpsravw Vx,Hx,Wx (66),(ev)
+12: vpmovusqb Wx,Vq (F3),(ev) | vpsllvw Vx,Hx,Wx (66),(ev)
+13: vcvtph2ps Vx,Wx (66),(v) | vpmovusdw Wx,Vd (F3),(ev)
+14: blendvps Vdq,Wdq (66) | vpmovusqw Wx,Vq (F3),(ev) | vprorvd/q Vx,Hx,Wx (66),(evo)
+15: blendvpd Vdq,Wdq (66) | vpmovusqd Wx,Vq (F3),(ev) | vprolvd/q Vx,Hx,Wx (66),(evo)
+16: vpermps Vqq,Hqq,Wqq (66),(v) | vpermps/d Vqq,Hqq,Wqq (66),(evo)
+17: vptest Vx,Wx (66)
+18: vbroadcastss Vx,Wd (66),(v)
+19: vbroadcastsd Vqq,Wq (66),(v) | vbroadcastf32x2 Vqq,Wq (66),(evo)
+1a: vbroadcastf128 Vqq,Mdq (66),(v) | vbroadcastf32x4/64x2 Vqq,Wq (66),(evo)
+1b: vbroadcastf32x8/64x4 Vqq,Mdq (66),(ev)
+1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
+1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
+1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
+1f: vpabsq Vx,Wx (66),(ev)
+# 0x0f 0x38 0x20-0x2f
+20: vpmovsxbw Vx,Ux/Mq (66),(v1) | vpmovswb Wx,Vx (F3),(ev)
+21: vpmovsxbd Vx,Ux/Md (66),(v1) | vpmovsdb Wx,Vd (F3),(ev)
+22: vpmovsxbq Vx,Ux/Mw (66),(v1) | vpmovsqb Wx,Vq (F3),(ev)
+23: vpmovsxwd Vx,Ux/Mq (66),(v1) | vpmovsdw Wx,Vd (F3),(ev)
+24: vpmovsxwq Vx,Ux/Md (66),(v1) | vpmovsqw Wx,Vq (F3),(ev)
+25: vpmovsxdq Vx,Ux/Mq (66),(v1) | vpmovsqd Wx,Vq (F3),(ev)
+26: vptestmb/w Vk,Hx,Wx (66),(ev) | vptestnmb/w Vk,Hx,Wx (F3),(ev)
+27: vptestmd/q Vk,Hx,Wx (66),(ev) | vptestnmd/q Vk,Hx,Wx (F3),(ev)
+28: vpmuldq Vx,Hx,Wx (66),(v1) | vpmovm2b/w Vx,Uk (F3),(ev)
+29: vpcmpeqq Vx,Hx,Wx (66),(v1) | vpmovb2m/w2m Vk,Ux (F3),(ev)
+2a: vmovntdqa Vx,Mx (66),(v1) | vpbroadcastmb2q Vx,Uk (F3),(ev)
+2b: vpackusdw Vx,Hx,Wx (66),(v1)
+2c: vmaskmovps Vx,Hx,Mx (66),(v) | vscalefps/d Vx,Hx,Wx (66),(evo)
+2d: vmaskmovpd Vx,Hx,Mx (66),(v) | vscalefss/d Vx,Hx,Wx (66),(evo)
+2e: vmaskmovps Mx,Hx,Vx (66),(v)
+2f: vmaskmovpd Mx,Hx,Vx (66),(v)
+# 0x0f 0x38 0x30-0x3f
+30: vpmovzxbw Vx,Ux/Mq (66),(v1) | vpmovwb Wx,Vx (F3),(ev)
+31: vpmovzxbd Vx,Ux/Md (66),(v1) | vpmovdb Wx,Vd (F3),(ev)
+32: vpmovzxbq Vx,Ux/Mw (66),(v1) | vpmovqb Wx,Vq (F3),(ev)
+33: vpmovzxwd Vx,Ux/Mq (66),(v1) | vpmovdw Wx,Vd (F3),(ev)
+34: vpmovzxwq Vx,Ux/Md (66),(v1) | vpmovqw Wx,Vq (F3),(ev)
+35: vpmovzxdq Vx,Ux/Mq (66),(v1) | vpmovqd Wx,Vq (F3),(ev)
+36: vpermd Vqq,Hqq,Wqq (66),(v) | vpermd/q Vqq,Hqq,Wqq (66),(evo)
+37: vpcmpgtq Vx,Hx,Wx (66),(v1)
+38: vpminsb Vx,Hx,Wx (66),(v1) | vpmovm2d/q Vx,Uk (F3),(ev)
+39: vpminsd Vx,Hx,Wx (66),(v1) | vpminsd/q Vx,Hx,Wx (66),(evo) | vpmovd2m/q2m Vk,Ux (F3),(ev)
+3a: vpminuw Vx,Hx,Wx (66),(v1) | vpbroadcastmw2d Vx,Uk (F3),(ev)
+3b: vpminud Vx,Hx,Wx (66),(v1) | vpminud/q Vx,Hx,Wx (66),(evo)
+3c: vpmaxsb Vx,Hx,Wx (66),(v1)
+3d: vpmaxsd Vx,Hx,Wx (66),(v1) | vpmaxsd/q Vx,Hx,Wx (66),(evo)
+3e: vpmaxuw Vx,Hx,Wx (66),(v1)
+3f: vpmaxud Vx,Hx,Wx (66),(v1) | vpmaxud/q Vx,Hx,Wx (66),(evo)
+# 0x0f 0x38 0x40-0x8f
+40: vpmulld Vx,Hx,Wx (66),(v1) | vpmulld/q Vx,Hx,Wx (66),(evo)
+41: vphminposuw Vdq,Wdq (66),(v1)
+42: vgetexpps/d Vx,Wx (66),(ev)
+43: vgetexpss/d Vx,Hx,Wx (66),(ev)
+44: vplzcntd/q Vx,Wx (66),(ev)
+45: vpsrlvd/q Vx,Hx,Wx (66),(v)
+46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo)
+47: vpsllvd/q Vx,Hx,Wx (66),(v)
+# Skip 0x48-0x4b
+4c: vrcp14ps/d Vpd,Wpd (66),(ev)
+4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev)
+4e: vrsqrt14ps/d Vpd,Wpd (66),(ev)
+4f: vrsqrt14ss/d Vsd,Hsd,Wsd (66),(ev)
+# Skip 0x50-0x57
+58: vpbroadcastd Vx,Wx (66),(v)
+59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo)
+5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo)
+5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev)
+# Skip 0x5c-0x63
+64: vpblendmd/q Vx,Hx,Wx (66),(ev)
+65: vblendmps/d Vx,Hx,Wx (66),(ev)
+66: vpblendmb/w Vx,Hx,Wx (66),(ev)
+# Skip 0x67-0x74
+75: vpermi2b/w Vx,Hx,Wx (66),(ev)
+76: vpermi2d/q Vx,Hx,Wx (66),(ev)
+77: vpermi2ps/d Vx,Hx,Wx (66),(ev)
+78: vpbroadcastb Vx,Wx (66),(v)
+79: vpbroadcastw Vx,Wx (66),(v)
+7a: vpbroadcastb Vx,Rv (66),(ev)
+7b: vpbroadcastw Vx,Rv (66),(ev)
+7c: vpbroadcastd/q Vx,Rv (66),(ev)
+7d: vpermt2b/w Vx,Hx,Wx (66),(ev)
+7e: vpermt2d/q Vx,Hx,Wx (66),(ev)
+7f: vpermt2ps/d Vx,Hx,Wx (66),(ev)
+80: INVEPT Gy,Mdq (66)
+81: INVVPID Gy,Mdq (66)
+82: INVPCID Gy,Mdq (66)
+83: vpmultishiftqb Vx,Hx,Wx (66),(ev)
+88: vexpandps/d Vpd,Wpd (66),(ev)
+89: vpexpandd/q Vx,Wx (66),(ev)
+8a: vcompressps/d Wx,Vx (66),(ev)
+8b: vpcompressd/q Wx,Vx (66),(ev)
+8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
+8d: vpermb/w Vx,Hx,Wx (66),(ev)
+8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
+# 0x0f 0x38 0x90-0xbf (FMA)
+90: vgatherdd/q Vx,Hx,Wx (66),(v) | vpgatherdd/q Vx,Wx (66),(evo)
+91: vgatherqd/q Vx,Hx,Wx (66),(v) | vpgatherqd/q Vx,Wx (66),(evo)
+92: vgatherdps/d Vx,Hx,Wx (66),(v)
+93: vgatherqps/d Vx,Hx,Wx (66),(v)
+94:
+95:
+96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
+97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
+98: vfmadd132ps/d Vx,Hx,Wx (66),(v)
+99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
+9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
+9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
+9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+a0: vpscatterdd/q Wx,Vx (66),(ev)
+a1: vpscatterqd/q Wx,Vx (66),(ev)
+a2: vscatterdps/d Wx,Vx (66),(ev)
+a3: vscatterqps/d Wx,Vx (66),(ev)
+a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
+a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
+a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
+a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
+ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
+ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
+af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+b4: vpmadd52luq Vx,Hx,Wx (66),(ev)
+b5: vpmadd52huq Vx,Hx,Wx (66),(ev)
+b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
+b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
+b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
+b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
+bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
+bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
+bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+# 0x0f 0x38 0xc0-0xff
+c4: vpconflictd/q Vx,Wx (66),(ev)
+c6: Grp18 (1A)
+c7: Grp19 (1A)
+c8: sha1nexte Vdq,Wdq | vexp2ps/d Vx,Wx (66),(ev)
+c9: sha1msg1 Vdq,Wdq
+ca: sha1msg2 Vdq,Wdq | vrcp28ps/d Vx,Wx (66),(ev)
+cb: sha256rnds2 Vdq,Wdq | vrcp28ss/d Vx,Hx,Wx (66),(ev)
+cc: sha256msg1 Vdq,Wdq | vrsqrt28ps/d Vx,Wx (66),(ev)
+cd: sha256msg2 Vdq,Wdq | vrsqrt28ss/d Vx,Hx,Wx (66),(ev)
+db: VAESIMC Vdq,Wdq (66),(v1)
+dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
+dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
+de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
+df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
+f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
+f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
+f2: ANDN Gy,By,Ey (v)
+f3: Grp17 (1A)
+f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
+f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
+f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
+EndTable
+
+Table: 3-byte opcode 2 (0x0f 0x3a)
+Referrer: 3-byte escape 2
+AVXcode: 3
+# 0x0f 0x3a 0x00-0xff
+00: vpermq Vqq,Wqq,Ib (66),(v)
+01: vpermpd Vqq,Wqq,Ib (66),(v)
+02: vpblendd Vx,Hx,Wx,Ib (66),(v)
+03: valignd/q Vx,Hx,Wx,Ib (66),(ev)
+04: vpermilps Vx,Wx,Ib (66),(v)
+05: vpermilpd Vx,Wx,Ib (66),(v)
+06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
+07:
+08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo)
+09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo)
+0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo)
+0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo)
+0c: vblendps Vx,Hx,Wx,Ib (66)
+0d: vblendpd Vx,Hx,Wx,Ib (66)
+0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
+0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
+14: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
+15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
+16: vpextrd/q Ey,Vdq,Ib (66),(v1)
+17: vextractps Ed,Vdq,Ib (66),(v1)
+18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v) | vinsertf32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+19: vextractf128 Wdq,Vqq,Ib (66),(v) | vextractf32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+1a: vinsertf32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+1b: vextractf32x8/64x4 Wdq,Vqq,Ib (66),(ev)
+1d: vcvtps2ph Wx,Vx,Ib (66),(v)
+1e: vpcmpud/q Vk,Hd,Wd,Ib (66),(ev)
+1f: vpcmpd/q Vk,Hd,Wd,Ib (66),(ev)
+20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
+21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
+22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
+23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
+25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev)
+26: vgetmantps/d Vx,Wx,Ib (66),(ev)
+27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev)
+30: kshiftrb/w Vk,Uk,Ib (66),(v)
+31: kshiftrd/q Vk,Uk,Ib (66),(v)
+32: kshiftlb/w Vk,Uk,Ib (66),(v)
+33: kshiftld/q Vk,Uk,Ib (66),(v)
+38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v) | vinserti32x4/64x2 Vqq,Hqq,Wqq,Ib (66),(evo)
+39: vextracti128 Wdq,Vqq,Ib (66),(v) | vextracti32x4/64x2 Wdq,Vqq,Ib (66),(evo)
+3a: vinserti32x8/64x4 Vqq,Hqq,Wqq,Ib (66),(ev)
+3b: vextracti32x8/64x4 Wdq,Vqq,Ib (66),(ev)
+3e: vpcmpub/w Vk,Hk,Wx,Ib (66),(ev)
+3f: vpcmpb/w Vk,Hk,Wx,Ib (66),(ev)
+40: vdpps Vx,Hx,Wx,Ib (66)
+41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
+42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1) | vdbpsadbw Vx,Hx,Wx,Ib (66),(evo)
+43: vshufi32x4/64x2 Vx,Hx,Wx,Ib (66),(ev)
+44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
+46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
+4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
+4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
+4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
+50: vrangeps/d Vx,Hx,Wx,Ib (66),(ev)
+51: vrangess/d Vx,Hx,Wx,Ib (66),(ev)
+54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev)
+55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev)
+56: vreduceps/d Vx,Wx,Ib (66),(ev)
+57: vreducess/d Vx,Hx,Wx,Ib (66),(ev)
+60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
+61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
+62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
+63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+66: vfpclassps/d Vk,Wx,Ib (66),(ev)
+67: vfpclassss/d Vk,Wx,Ib (66),(ev)
+cc: sha1rnds4 Vdq,Wdq,Ib
+df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
+f0: RORX Gy,Ey,Ib (F2),(v)
+EndTable
+
+GrpTable: Grp1
+0: ADD
+1: OR
+2: ADC
+3: SBB
+4: AND
+5: SUB
+6: XOR
+7: CMP
+EndTable
+
+GrpTable: Grp1A
+0: POP
+EndTable
+
+GrpTable: Grp2
+0: ROL
+1: ROR
+2: RCL
+3: RCR
+4: SHL/SAL
+5: SHR
+6:
+7: SAR
+EndTable
+
+GrpTable: Grp3_1
+0: TEST Eb,Ib
+1: TEST Eb,Ib
+2: NOT Eb
+3: NEG Eb
+4: MUL AL,Eb
+5: IMUL AL,Eb
+6: DIV AL,Eb
+7: IDIV AL,Eb
+EndTable
+
+GrpTable: Grp3_2
+0: TEST Ev,Iz
+1:
+2: NOT Ev
+3: NEG Ev
+4: MUL rAX,Ev
+5: IMUL rAX,Ev
+6: DIV rAX,Ev
+7: IDIV rAX,Ev
+EndTable
+
+GrpTable: Grp4
+0: INC Eb
+1: DEC Eb
+EndTable
+
+GrpTable: Grp5
+0: INC Ev
+1: DEC Ev
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
+2: CALLN Ev (f64)
+3: CALLF Ep
+4: JMPN Ev (f64)
+5: JMPF Mp
+6: PUSH Ev (d64)
+7:
+EndTable
+
+GrpTable: Grp6
+0: SLDT Rv/Mw
+1: STR Rv/Mw
+2: LLDT Ew
+3: LTR Ew
+4: VERR Ew
+5: VERW Ew
+EndTable
+
+GrpTable: Grp7
+0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
+3: LIDT Ms
+4: SMSW Mw/Rv
+5: rdpkru (110),(11B) | wrpkru (111),(11B)
+6: LMSW Ew
+7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
+EndTable
+
+GrpTable: Grp8
+4: BT
+5: BTS
+6: BTR
+7: BTC
+EndTable
+
+GrpTable: Grp9
+1: CMPXCHG8B/16B Mq/Mdq
+3: xrstors
+4: xsavec
+5: xsaves
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
+EndTable
+
+GrpTable: Grp10
+# all are UD1
+0: UD1
+1: UD1
+2: UD1
+3: UD1
+4: UD1
+5: UD1
+6: UD1
+7: UD1
+EndTable
+
+# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
+GrpTable: Grp11A
+0: MOV Eb,Ib
+7: XABORT Ib (000),(11B)
+EndTable
+
+GrpTable: Grp11B
+0: MOV Eb,Iz
+7: XBEGIN Jz (000),(11B)
+EndTable
+
+GrpTable: Grp12
+2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
+4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
+6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp13
+0: vprord/q Hx,Wx,Ib (66),(ev)
+1: vprold/q Hx,Wx,Ib (66),(ev)
+2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
+4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1) | vpsrad/q Hx,Ux,Ib (66),(evo)
+6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp14
+2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
+3: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
+6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
+7: vpslldq Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp15
+0: fxsave | RDFSBASE Ry (F3),(11B)
+1: fxstor | RDGSBASE Ry (F3),(11B)
+2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
+3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
+4: XSAVE | ptwrite Ey (F3),(11B)
+5: XRSTOR | lfence (11B)
+6: XSAVEOPT | clwb (66) | mfence (11B)
+7: clflush | clflushopt (66) | sfence (11B)
+EndTable
+
+GrpTable: Grp16
+0: prefetch NTA
+1: prefetch T0
+2: prefetch T1
+3: prefetch T2
+EndTable
+
+GrpTable: Grp17
+1: BLSR By,Ey (v)
+2: BLSMSK By,Ey (v)
+3: BLSI By,Ey (v)
+EndTable
+
+GrpTable: Grp18
+1: vgatherpf0dps/d Wx (66),(ev)
+2: vgatherpf1dps/d Wx (66),(ev)
+5: vscatterpf0dps/d Wx (66),(ev)
+6: vscatterpf1dps/d Wx (66),(ev)
+EndTable
+
+GrpTable: Grp19
+1: vgatherpf0qps/d Wx (66),(ev)
+2: vgatherpf1qps/d Wx (66),(ev)
+5: vscatterpf0qps/d Wx (66),(ev)
+6: vscatterpf1qps/d Wx (66),(ev)
+EndTable
+
+# AMD's Prefetch Group
+GrpTable: GrpP
+0: PREFETCH
+1: PREFETCHW
+EndTable
+
+GrpTable: GrpPDLK
+0: MONTMUL
+1: XSHA1
+2: XSHA2
+EndTable
+
+GrpTable: GrpRNG
+0: xstore-rng
+1: xcrypt-ecb
+2: xcrypt-cbc
+4: xcrypt-cfb
+5: xcrypt-ofb
+EndTable
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
new file mode 100644
index 000000000..256b4755c
--- /dev/null
+++ b/tools/perf/util/intel-pt.c
@@ -0,0 +1,2636 @@
+/*
+ * intel_pt.c: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "../perf.h"
+#include "session.h"
+#include "machine.h"
+#include "memswap.h"
+#include "sort.h"
+#include "tool.h"
+#include "event.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "map.h"
+#include "color.h"
+#include "util.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "symbol.h"
+#include "callchain.h"
+#include "dso.h"
+#include "debug.h"
+#include "auxtrace.h"
+#include "tsc.h"
+#include "intel-pt.h"
+#include "config.h"
+
+#include "intel-pt-decoder/intel-pt-log.h"
+#include "intel-pt-decoder/intel-pt-decoder.h"
+#include "intel-pt-decoder/intel-pt-insn-decoder.h"
+#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
+
+#define MAX_TIMESTAMP (~0ULL)
+
+struct intel_pt {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ u32 auxtrace_type;
+ struct perf_session *session;
+ struct machine *machine;
+ struct perf_evsel *switch_evsel;
+ struct thread *unknown_thread;
+ bool timeless_decoding;
+ bool sampling_mode;
+ bool snapshot_mode;
+ bool per_cpu_mmaps;
+ bool have_tsc;
+ bool data_queued;
+ bool est_tsc;
+ bool sync_switch;
+ bool mispred_all;
+ int have_sched_switch;
+ u32 pmu_type;
+ u64 kernel_start;
+ u64 switch_ip;
+ u64 ptss_ip;
+
+ struct perf_tsc_conversion tc;
+ bool cap_user_time_zero;
+
+ struct itrace_synth_opts synth_opts;
+
+ bool sample_instructions;
+ u64 instructions_sample_type;
+ u64 instructions_id;
+
+ bool sample_branches;
+ u32 branches_filter;
+ u64 branches_sample_type;
+ u64 branches_id;
+
+ bool sample_transactions;
+ u64 transactions_sample_type;
+ u64 transactions_id;
+
+ bool sample_ptwrites;
+ u64 ptwrites_sample_type;
+ u64 ptwrites_id;
+
+ bool sample_pwr_events;
+ u64 pwr_events_sample_type;
+ u64 mwait_id;
+ u64 pwre_id;
+ u64 exstop_id;
+ u64 pwrx_id;
+ u64 cbr_id;
+
+ u64 tsc_bit;
+ u64 mtc_bit;
+ u64 mtc_freq_bits;
+ u32 tsc_ctc_ratio_n;
+ u32 tsc_ctc_ratio_d;
+ u64 cyc_bit;
+ u64 noretcomp_bit;
+ unsigned max_non_turbo_ratio;
+ unsigned cbr2khz;
+
+ unsigned long num_events;
+
+ char *filter;
+ struct addr_filters filts;
+};
+
+enum switch_state {
+ INTEL_PT_SS_NOT_TRACING,
+ INTEL_PT_SS_UNKNOWN,
+ INTEL_PT_SS_TRACING,
+ INTEL_PT_SS_EXPECTING_SWITCH_EVENT,
+ INTEL_PT_SS_EXPECTING_SWITCH_IP,
+};
+
+struct intel_pt_queue {
+ struct intel_pt *pt;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ struct auxtrace_buffer *old_buffer;
+ void *decoder;
+ const struct intel_pt_state *state;
+ struct ip_callchain *chain;
+ struct branch_stack *last_branch;
+ struct branch_stack *last_branch_rb;
+ size_t last_branch_pos;
+ union perf_event *event_buf;
+ bool on_heap;
+ bool stop;
+ bool step_through_buffers;
+ bool use_buffer_pid_tid;
+ bool sync_switch;
+ pid_t pid, tid;
+ int cpu;
+ int switch_state;
+ pid_t next_tid;
+ struct thread *thread;
+ bool exclude_kernel;
+ bool have_sample;
+ u64 time;
+ u64 timestamp;
+ u32 flags;
+ u16 insn_len;
+ u64 last_insn_cnt;
+ char insn[INTEL_PT_INSN_BUF_SZ];
+};
+
+static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
+ unsigned char *buf, size_t len)
+{
+ struct intel_pt_pkt packet;
+ size_t pos = 0;
+ int ret, pkt_len, i;
+ char desc[INTEL_PT_PKT_DESC_MAX];
+ const char *color = PERF_COLOR_BLUE;
+
+ color_fprintf(stdout, color,
+ ". ... Intel Processor Trace data: size %zu bytes\n",
+ len);
+
+ while (len) {
+ ret = intel_pt_get_packet(buf, len, &packet);
+ if (ret > 0)
+ pkt_len = ret;
+ else
+ pkt_len = 1;
+ printf(".");
+ color_fprintf(stdout, color, " %08x: ", pos);
+ for (i = 0; i < pkt_len; i++)
+ color_fprintf(stdout, color, " %02x", buf[i]);
+ for (; i < 16; i++)
+ color_fprintf(stdout, color, " ");
+ if (ret > 0) {
+ ret = intel_pt_pkt_desc(&packet, desc,
+ INTEL_PT_PKT_DESC_MAX);
+ if (ret > 0)
+ color_fprintf(stdout, color, " %s\n", desc);
+ } else {
+ color_fprintf(stdout, color, " Bad packet!\n");
+ }
+ pos += pkt_len;
+ buf += pkt_len;
+ len -= pkt_len;
+ }
+}
+
+static void intel_pt_dump_event(struct intel_pt *pt, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+ intel_pt_dump(pt, buf, len);
+}
+
+static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
+ struct auxtrace_buffer *b)
+{
+ bool consecutive = false;
+ void *start;
+
+ start = intel_pt_find_overlap(a->data, a->size, b->data, b->size,
+ pt->have_tsc, &consecutive);
+ if (!start)
+ return -EINVAL;
+ b->use_size = b->data + b->size - start;
+ b->use_data = start;
+ if (b->use_size && consecutive)
+ b->consecutive = true;
+ return 0;
+}
+
+/* This function assumes data is processed sequentially only */
+static int intel_pt_get_trace(struct intel_pt_buffer *b, void *data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct auxtrace_buffer *buffer = ptq->buffer;
+ struct auxtrace_buffer *old_buffer = ptq->old_buffer;
+ struct auxtrace_queue *queue;
+ bool might_overlap;
+
+ if (ptq->stop) {
+ b->len = 0;
+ return 0;
+ }
+
+ queue = &ptq->pt->queues.queue_array[ptq->queue_nr];
+
+ buffer = auxtrace_buffer__next(queue, buffer);
+ if (!buffer) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ b->len = 0;
+ return 0;
+ }
+
+ ptq->buffer = buffer;
+
+ if (!buffer->data) {
+ int fd = perf_data__fd(ptq->pt->session->data);
+
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
+ if (!buffer->data)
+ return -ENOMEM;
+ }
+
+ might_overlap = ptq->pt->snapshot_mode || ptq->pt->sampling_mode;
+ if (might_overlap && !buffer->consecutive && old_buffer &&
+ intel_pt_do_fix_overlap(ptq->pt, old_buffer, buffer))
+ return -ENOMEM;
+
+ if (buffer->use_data) {
+ b->len = buffer->use_size;
+ b->buf = buffer->use_data;
+ } else {
+ b->len = buffer->size;
+ b->buf = buffer->data;
+ }
+ b->ref_timestamp = buffer->reference;
+
+ if (!old_buffer || (might_overlap && !buffer->consecutive)) {
+ b->consecutive = false;
+ b->trace_nr = buffer->buffer_nr + 1;
+ } else {
+ b->consecutive = true;
+ }
+
+ if (ptq->step_through_buffers)
+ ptq->stop = true;
+
+ if (b->len) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ ptq->old_buffer = buffer;
+ } else {
+ auxtrace_buffer__drop_data(buffer);
+ return intel_pt_get_trace(b, data);
+ }
+
+ return 0;
+}
+
+struct intel_pt_cache_entry {
+ struct auxtrace_cache_entry entry;
+ u64 insn_cnt;
+ u64 byte_cnt;
+ enum intel_pt_insn_op op;
+ enum intel_pt_insn_branch branch;
+ int length;
+ int32_t rel;
+ char insn[INTEL_PT_INSN_BUF_SZ];
+};
+
+static int intel_pt_config_div(const char *var, const char *value, void *data)
+{
+ int *d = data;
+ long val;
+
+ if (!strcmp(var, "intel-pt.cache-divisor")) {
+ val = strtol(value, NULL, 0);
+ if (val > 0 && val <= INT_MAX)
+ *d = val;
+ }
+
+ return 0;
+}
+
+static int intel_pt_cache_divisor(void)
+{
+ static int d;
+
+ if (d)
+ return d;
+
+ perf_config(intel_pt_config_div, &d);
+
+ if (!d)
+ d = 64;
+
+ return d;
+}
+
+static unsigned int intel_pt_cache_size(struct dso *dso,
+ struct machine *machine)
+{
+ off_t size;
+
+ size = dso__data_size(dso, machine);
+ size /= intel_pt_cache_divisor();
+ if (size < 1000)
+ return 10;
+ if (size > (1 << 21))
+ return 21;
+ return 32 - __builtin_clz(size);
+}
+
+static struct auxtrace_cache *intel_pt_cache(struct dso *dso,
+ struct machine *machine)
+{
+ struct auxtrace_cache *c;
+ unsigned int bits;
+
+ if (dso->auxtrace_cache)
+ return dso->auxtrace_cache;
+
+ bits = intel_pt_cache_size(dso, machine);
+
+ /* Ignoring cache creation failure */
+ c = auxtrace_cache__new(bits, sizeof(struct intel_pt_cache_entry), 200);
+
+ dso->auxtrace_cache = c;
+
+ return c;
+}
+
+static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
+ u64 offset, u64 insn_cnt, u64 byte_cnt,
+ struct intel_pt_insn *intel_pt_insn)
+{
+ struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+ struct intel_pt_cache_entry *e;
+ int err;
+
+ if (!c)
+ return -ENOMEM;
+
+ e = auxtrace_cache__alloc_entry(c);
+ if (!e)
+ return -ENOMEM;
+
+ e->insn_cnt = insn_cnt;
+ e->byte_cnt = byte_cnt;
+ e->op = intel_pt_insn->op;
+ e->branch = intel_pt_insn->branch;
+ e->length = intel_pt_insn->length;
+ e->rel = intel_pt_insn->rel;
+ memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
+
+ err = auxtrace_cache__add(c, offset, &e->entry);
+ if (err)
+ auxtrace_cache__free_entry(c, e);
+
+ return err;
+}
+
+static struct intel_pt_cache_entry *
+intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
+{
+ struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+
+ if (!c)
+ return NULL;
+
+ return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
+}
+
+static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
+{
+ return ip >= pt->kernel_start ?
+ PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+}
+
+static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
+ uint64_t *insn_cnt_ptr, uint64_t *ip,
+ uint64_t to_ip, uint64_t max_insn_cnt,
+ void *data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct machine *machine = ptq->pt->machine;
+ struct thread *thread;
+ struct addr_location al;
+ unsigned char buf[INTEL_PT_INSN_BUF_SZ];
+ ssize_t len;
+ int x86_64;
+ u8 cpumode;
+ u64 offset, start_offset, start_ip;
+ u64 insn_cnt = 0;
+ bool one_map = true;
+
+ intel_pt_insn->length = 0;
+
+ if (to_ip && *ip == to_ip)
+ goto out_no_cache;
+
+ cpumode = intel_pt_cpumode(ptq->pt, *ip);
+
+ thread = ptq->thread;
+ if (!thread) {
+ if (cpumode != PERF_RECORD_MISC_KERNEL)
+ return -EINVAL;
+ thread = ptq->pt->unknown_thread;
+ }
+
+ while (1) {
+ if (!thread__find_map(thread, cpumode, *ip, &al) || !al.map->dso)
+ return -EINVAL;
+
+ if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR &&
+ dso__data_status_seen(al.map->dso,
+ DSO_DATA_STATUS_SEEN_ITRACE))
+ return -ENOENT;
+
+ offset = al.map->map_ip(al.map, *ip);
+
+ if (!to_ip && one_map) {
+ struct intel_pt_cache_entry *e;
+
+ e = intel_pt_cache_lookup(al.map->dso, machine, offset);
+ if (e &&
+ (!max_insn_cnt || e->insn_cnt <= max_insn_cnt)) {
+ *insn_cnt_ptr = e->insn_cnt;
+ *ip += e->byte_cnt;
+ intel_pt_insn->op = e->op;
+ intel_pt_insn->branch = e->branch;
+ intel_pt_insn->length = e->length;
+ intel_pt_insn->rel = e->rel;
+ memcpy(intel_pt_insn->buf, e->insn,
+ INTEL_PT_INSN_BUF_SZ);
+ intel_pt_log_insn_no_data(intel_pt_insn, *ip);
+ return 0;
+ }
+ }
+
+ start_offset = offset;
+ start_ip = *ip;
+
+ /* Load maps to ensure dso->is_64_bit has been updated */
+ map__load(al.map);
+
+ x86_64 = al.map->dso->is_64_bit;
+
+ while (1) {
+ len = dso__data_read_offset(al.map->dso, machine,
+ offset, buf,
+ INTEL_PT_INSN_BUF_SZ);
+ if (len <= 0)
+ return -EINVAL;
+
+ if (intel_pt_get_insn(buf, len, x86_64, intel_pt_insn))
+ return -EINVAL;
+
+ intel_pt_log_insn(intel_pt_insn, *ip);
+
+ insn_cnt += 1;
+
+ if (intel_pt_insn->branch != INTEL_PT_BR_NO_BRANCH)
+ goto out;
+
+ if (max_insn_cnt && insn_cnt >= max_insn_cnt)
+ goto out_no_cache;
+
+ *ip += intel_pt_insn->length;
+
+ if (to_ip && *ip == to_ip) {
+ intel_pt_insn->length = 0;
+ goto out_no_cache;
+ }
+
+ if (*ip >= al.map->end)
+ break;
+
+ offset += intel_pt_insn->length;
+ }
+ one_map = false;
+ }
+out:
+ *insn_cnt_ptr = insn_cnt;
+
+ if (!one_map)
+ goto out_no_cache;
+
+ /*
+ * Didn't lookup in the 'to_ip' case, so do it now to prevent duplicate
+ * entries.
+ */
+ if (to_ip) {
+ struct intel_pt_cache_entry *e;
+
+ e = intel_pt_cache_lookup(al.map->dso, machine, start_offset);
+ if (e)
+ return 0;
+ }
+
+ /* Ignore cache errors */
+ intel_pt_cache_add(al.map->dso, machine, start_offset, insn_cnt,
+ *ip - start_ip, intel_pt_insn);
+
+ return 0;
+
+out_no_cache:
+ *insn_cnt_ptr = insn_cnt;
+ return 0;
+}
+
+static bool intel_pt_match_pgd_ip(struct intel_pt *pt, uint64_t ip,
+ uint64_t offset, const char *filename)
+{
+ struct addr_filter *filt;
+ bool have_filter = false;
+ bool hit_tracestop = false;
+ bool hit_filter = false;
+
+ list_for_each_entry(filt, &pt->filts.head, list) {
+ if (filt->start)
+ have_filter = true;
+
+ if ((filename && !filt->filename) ||
+ (!filename && filt->filename) ||
+ (filename && strcmp(filename, filt->filename)))
+ continue;
+
+ if (!(offset >= filt->addr && offset < filt->addr + filt->size))
+ continue;
+
+ intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s hit filter: %s offset %#"PRIx64" size %#"PRIx64"\n",
+ ip, offset, filename ? filename : "[kernel]",
+ filt->start ? "filter" : "stop",
+ filt->addr, filt->size);
+
+ if (filt->start)
+ hit_filter = true;
+ else
+ hit_tracestop = true;
+ }
+
+ if (!hit_tracestop && !hit_filter)
+ intel_pt_log("TIP.PGD ip %#"PRIx64" offset %#"PRIx64" in %s is not in a filter region\n",
+ ip, offset, filename ? filename : "[kernel]");
+
+ return hit_tracestop || (have_filter && !hit_filter);
+}
+
+static int __intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+ struct intel_pt_queue *ptq = data;
+ struct thread *thread;
+ struct addr_location al;
+ u8 cpumode;
+ u64 offset;
+
+ if (ip >= ptq->pt->kernel_start)
+ return intel_pt_match_pgd_ip(ptq->pt, ip, ip, NULL);
+
+ cpumode = PERF_RECORD_MISC_USER;
+
+ thread = ptq->thread;
+ if (!thread)
+ return -EINVAL;
+
+ if (!thread__find_map(thread, cpumode, ip, &al) || !al.map->dso)
+ return -EINVAL;
+
+ offset = al.map->map_ip(al.map, ip);
+
+ return intel_pt_match_pgd_ip(ptq->pt, ip, offset,
+ al.map->dso->long_name);
+}
+
+static bool intel_pt_pgd_ip(uint64_t ip, void *data)
+{
+ return __intel_pt_pgd_ip(ip, data) > 0;
+}
+
+static bool intel_pt_get_config(struct intel_pt *pt,
+ struct perf_event_attr *attr, u64 *config)
+{
+ if (attr->type == pt->pmu_type) {
+ if (config)
+ *config = attr->config;
+ return true;
+ }
+
+ return false;
+}
+
+static bool intel_pt_exclude_kernel(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
+ !evsel->attr.exclude_kernel)
+ return false;
+ }
+ return true;
+}
+
+static bool intel_pt_return_compression(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+ u64 config;
+
+ if (!pt->noretcomp_bit)
+ return true;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->attr, &config) &&
+ (config & pt->noretcomp_bit))
+ return false;
+ }
+ return true;
+}
+
+static bool intel_pt_branch_enable(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+ u64 config;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->attr, &config) &&
+ (config & 1) && !(config & 0x2000))
+ return false;
+ }
+ return true;
+}
+
+static unsigned int intel_pt_mtc_period(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+ unsigned int shift;
+ u64 config;
+
+ if (!pt->mtc_freq_bits)
+ return 0;
+
+ for (shift = 0, config = pt->mtc_freq_bits; !(config & 1); shift++)
+ config >>= 1;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->attr, &config))
+ return (config & pt->mtc_freq_bits) >> shift;
+ }
+ return 0;
+}
+
+static bool intel_pt_timeless_decoding(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+ bool timeless_decoding = true;
+ u64 config;
+
+ if (!pt->tsc_bit || !pt->cap_user_time_zero)
+ return true;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (!(evsel->attr.sample_type & PERF_SAMPLE_TIME))
+ return true;
+ if (intel_pt_get_config(pt, &evsel->attr, &config)) {
+ if (config & pt->tsc_bit)
+ timeless_decoding = false;
+ else
+ return true;
+ }
+ }
+ return timeless_decoding;
+}
+
+static bool intel_pt_tracing_kernel(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->attr, NULL) &&
+ !evsel->attr.exclude_kernel)
+ return true;
+ }
+ return false;
+}
+
+static bool intel_pt_have_tsc(struct intel_pt *pt)
+{
+ struct perf_evsel *evsel;
+ bool have_tsc = false;
+ u64 config;
+
+ if (!pt->tsc_bit)
+ return false;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (intel_pt_get_config(pt, &evsel->attr, &config)) {
+ if (config & pt->tsc_bit)
+ have_tsc = true;
+ else
+ return false;
+ }
+ }
+ return have_tsc;
+}
+
+static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
+{
+ u64 quot, rem;
+
+ quot = ns / pt->tc.time_mult;
+ rem = ns % pt->tc.time_mult;
+ return (quot << pt->tc.time_shift) + (rem << pt->tc.time_shift) /
+ pt->tc.time_mult;
+}
+
+static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
+ unsigned int queue_nr)
+{
+ struct intel_pt_params params = { .get_trace = 0, };
+ struct perf_env *env = pt->machine->env;
+ struct intel_pt_queue *ptq;
+
+ ptq = zalloc(sizeof(struct intel_pt_queue));
+ if (!ptq)
+ return NULL;
+
+ if (pt->synth_opts.callchain) {
+ size_t sz = sizeof(struct ip_callchain);
+
+ /* Add 1 to callchain_sz for callchain context */
+ sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
+ ptq->chain = zalloc(sz);
+ if (!ptq->chain)
+ goto out_free;
+ }
+
+ if (pt->synth_opts.last_branch) {
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += pt->synth_opts.last_branch_sz *
+ sizeof(struct branch_entry);
+ ptq->last_branch = zalloc(sz);
+ if (!ptq->last_branch)
+ goto out_free;
+ ptq->last_branch_rb = zalloc(sz);
+ if (!ptq->last_branch_rb)
+ goto out_free;
+ }
+
+ ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!ptq->event_buf)
+ goto out_free;
+
+ ptq->pt = pt;
+ ptq->queue_nr = queue_nr;
+ ptq->exclude_kernel = intel_pt_exclude_kernel(pt);
+ ptq->pid = -1;
+ ptq->tid = -1;
+ ptq->cpu = -1;
+ ptq->next_tid = -1;
+
+ params.get_trace = intel_pt_get_trace;
+ params.walk_insn = intel_pt_walk_next_insn;
+ params.data = ptq;
+ params.return_compression = intel_pt_return_compression(pt);
+ params.branch_enable = intel_pt_branch_enable(pt);
+ params.max_non_turbo_ratio = pt->max_non_turbo_ratio;
+ params.mtc_period = intel_pt_mtc_period(pt);
+ params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
+ params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
+
+ if (pt->filts.cnt > 0)
+ params.pgd_ip = intel_pt_pgd_ip;
+
+ if (pt->synth_opts.instructions) {
+ if (pt->synth_opts.period) {
+ switch (pt->synth_opts.period_type) {
+ case PERF_ITRACE_PERIOD_INSTRUCTIONS:
+ params.period_type =
+ INTEL_PT_PERIOD_INSTRUCTIONS;
+ params.period = pt->synth_opts.period;
+ break;
+ case PERF_ITRACE_PERIOD_TICKS:
+ params.period_type = INTEL_PT_PERIOD_TICKS;
+ params.period = pt->synth_opts.period;
+ break;
+ case PERF_ITRACE_PERIOD_NANOSECS:
+ params.period_type = INTEL_PT_PERIOD_TICKS;
+ params.period = intel_pt_ns_to_ticks(pt,
+ pt->synth_opts.period);
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (!params.period) {
+ params.period_type = INTEL_PT_PERIOD_INSTRUCTIONS;
+ params.period = 1;
+ }
+ }
+
+ if (env->cpuid && !strncmp(env->cpuid, "GenuineIntel,6,92,", 18))
+ params.flags |= INTEL_PT_FUP_WITH_NLIP;
+
+ ptq->decoder = intel_pt_decoder_new(&params);
+ if (!ptq->decoder)
+ goto out_free;
+
+ return ptq;
+
+out_free:
+ zfree(&ptq->event_buf);
+ zfree(&ptq->last_branch);
+ zfree(&ptq->last_branch_rb);
+ zfree(&ptq->chain);
+ free(ptq);
+ return NULL;
+}
+
+static void intel_pt_free_queue(void *priv)
+{
+ struct intel_pt_queue *ptq = priv;
+
+ if (!ptq)
+ return;
+ thread__zput(ptq->thread);
+ intel_pt_decoder_free(ptq->decoder);
+ zfree(&ptq->event_buf);
+ zfree(&ptq->last_branch);
+ zfree(&ptq->last_branch_rb);
+ zfree(&ptq->chain);
+ free(ptq);
+}
+
+static void intel_pt_set_pid_tid_cpu(struct intel_pt *pt,
+ struct auxtrace_queue *queue)
+{
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (queue->tid == -1 || pt->have_sched_switch) {
+ ptq->tid = machine__get_current_tid(pt->machine, ptq->cpu);
+ if (ptq->tid == -1)
+ ptq->pid = -1;
+ thread__zput(ptq->thread);
+ }
+
+ if (!ptq->thread && ptq->tid != -1)
+ ptq->thread = machine__find_thread(pt->machine, -1, ptq->tid);
+
+ if (ptq->thread) {
+ ptq->pid = ptq->thread->pid_;
+ if (queue->cpu == -1)
+ ptq->cpu = ptq->thread->cpu;
+ }
+}
+
+static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
+{
+ ptq->insn_len = 0;
+ if (ptq->state->flags & INTEL_PT_ABORT_TX) {
+ ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TX_ABORT;
+ } else if (ptq->state->flags & INTEL_PT_ASYNC) {
+ if (ptq->state->to_ip)
+ ptq->flags = PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL |
+ PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT;
+ else
+ ptq->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_END;
+ ptq->insn_len = 0;
+ } else {
+ if (ptq->state->from_ip)
+ ptq->flags = intel_pt_insn_type(ptq->state->insn_op);
+ else
+ ptq->flags = PERF_IP_FLAG_BRANCH |
+ PERF_IP_FLAG_TRACE_BEGIN;
+ if (ptq->state->flags & INTEL_PT_IN_TX)
+ ptq->flags |= PERF_IP_FLAG_IN_TX;
+ ptq->insn_len = ptq->state->insn_len;
+ memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
+ }
+}
+
+static int intel_pt_setup_queue(struct intel_pt *pt,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr)
+{
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (list_empty(&queue->head))
+ return 0;
+
+ if (!ptq) {
+ ptq = intel_pt_alloc_queue(pt, queue_nr);
+ if (!ptq)
+ return -ENOMEM;
+ queue->priv = ptq;
+
+ if (queue->cpu != -1)
+ ptq->cpu = queue->cpu;
+ ptq->tid = queue->tid;
+
+ if (pt->sampling_mode && !pt->snapshot_mode &&
+ pt->timeless_decoding)
+ ptq->step_through_buffers = true;
+
+ ptq->sync_switch = pt->sync_switch;
+ }
+
+ if (!ptq->on_heap &&
+ (!ptq->sync_switch ||
+ ptq->switch_state != INTEL_PT_SS_EXPECTING_SWITCH_EVENT)) {
+ const struct intel_pt_state *state;
+ int ret;
+
+ if (pt->timeless_decoding)
+ return 0;
+
+ intel_pt_log("queue %u getting timestamp\n", queue_nr);
+ intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
+ queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+ while (1) {
+ state = intel_pt_decode(ptq->decoder);
+ if (state->err) {
+ if (state->err == INTEL_PT_ERR_NODATA) {
+ intel_pt_log("queue %u has no timestamp\n",
+ queue_nr);
+ return 0;
+ }
+ continue;
+ }
+ if (state->timestamp)
+ break;
+ }
+
+ ptq->timestamp = state->timestamp;
+ intel_pt_log("queue %u timestamp 0x%" PRIx64 "\n",
+ queue_nr, ptq->timestamp);
+ ptq->state = state;
+ ptq->have_sample = true;
+ intel_pt_sample_flags(ptq);
+ ret = auxtrace_heap__add(&pt->heap, queue_nr, ptq->timestamp);
+ if (ret)
+ return ret;
+ ptq->on_heap = true;
+ }
+
+ return 0;
+}
+
+static int intel_pt_setup_queues(struct intel_pt *pt)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < pt->queues.nr_queues; i++) {
+ ret = intel_pt_setup_queue(pt, &pt->queues.queue_array[i], i);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
+{
+ struct branch_stack *bs_src = ptq->last_branch_rb;
+ struct branch_stack *bs_dst = ptq->last_branch;
+ size_t nr = 0;
+
+ bs_dst->nr = bs_src->nr;
+
+ if (!bs_src->nr)
+ return;
+
+ nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
+ memcpy(&bs_dst->entries[0],
+ &bs_src->entries[ptq->last_branch_pos],
+ sizeof(struct branch_entry) * nr);
+
+ if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
+ memcpy(&bs_dst->entries[nr],
+ &bs_src->entries[0],
+ sizeof(struct branch_entry) * ptq->last_branch_pos);
+ }
+}
+
+static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
+{
+ ptq->last_branch_pos = 0;
+ ptq->last_branch_rb->nr = 0;
+}
+
+static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
+{
+ const struct intel_pt_state *state = ptq->state;
+ struct branch_stack *bs = ptq->last_branch_rb;
+ struct branch_entry *be;
+
+ if (!ptq->last_branch_pos)
+ ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
+
+ ptq->last_branch_pos -= 1;
+
+ be = &bs->entries[ptq->last_branch_pos];
+ be->from = state->from_ip;
+ be->to = state->to_ip;
+ be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
+ be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
+ /* No support for mispredict */
+ be->flags.mispred = ptq->pt->mispred_all;
+
+ if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
+ bs->nr += 1;
+}
+
+static inline bool intel_pt_skip_event(struct intel_pt *pt)
+{
+ return pt->synth_opts.initial_skip &&
+ pt->num_events++ < pt->synth_opts.initial_skip;
+}
+
+static void intel_pt_prep_b_sample(struct intel_pt *pt,
+ struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ if (!pt->timeless_decoding)
+ sample->time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+
+ sample->ip = ptq->state->from_ip;
+ sample->cpumode = intel_pt_cpumode(pt, sample->ip);
+ sample->pid = ptq->pid;
+ sample->tid = ptq->tid;
+ sample->addr = ptq->state->to_ip;
+ sample->period = 1;
+ sample->cpu = ptq->cpu;
+ sample->flags = ptq->flags;
+ sample->insn_len = ptq->insn_len;
+ memcpy(sample->insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = sample->cpumode;
+ event->sample.header.size = sizeof(struct perf_event_header);
+}
+
+static int intel_pt_inject_event(union perf_event *event,
+ struct perf_sample *sample, u64 type)
+{
+ event->header.size = perf_event__sample_event_size(sample, type, 0);
+ return perf_event__synthesize_sample(event, type, 0, sample);
+}
+
+static inline int intel_pt_opt_inject(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample, u64 type)
+{
+ if (!pt->synth_opts.inject)
+ return 0;
+
+ return intel_pt_inject_event(event, sample, type);
+}
+
+static int intel_pt_deliver_synth_b_event(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample, u64 type)
+{
+ int ret;
+
+ ret = intel_pt_opt_inject(pt, event, sample, type);
+ if (ret)
+ return ret;
+
+ ret = perf_session__deliver_synth_event(pt->session, event, sample);
+ if (ret)
+ pr_err("Intel PT: failed to deliver event, error %d\n", ret);
+
+ return ret;
+}
+
+static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct dummy_branch_stack {
+ u64 nr;
+ struct branch_entry entries;
+ } dummy_bs;
+
+ if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
+ return 0;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_b_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->branches_id;
+ sample.stream_id = ptq->pt->branches_id;
+
+ /*
+ * perf report cannot handle events without a branch stack when using
+ * SORT_MODE__BRANCH so make a dummy one.
+ */
+ if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
+ dummy_bs = (struct dummy_branch_stack){
+ .nr = 1,
+ .entries = {
+ .from = sample.ip,
+ .to = sample.addr,
+ },
+ };
+ sample.branch_stack = (struct branch_stack *)&dummy_bs;
+ }
+
+ return intel_pt_deliver_synth_b_event(pt, event, &sample,
+ pt->branches_sample_type);
+}
+
+static void intel_pt_prep_sample(struct intel_pt *pt,
+ struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ intel_pt_prep_b_sample(pt, ptq, event, sample);
+
+ if (pt->synth_opts.callchain) {
+ thread_stack__sample(ptq->thread, ptq->chain,
+ pt->synth_opts.callchain_sz + 1,
+ sample->ip, pt->kernel_start);
+ sample->callchain = ptq->chain;
+ }
+
+ if (pt->synth_opts.last_branch) {
+ intel_pt_copy_last_branch_rb(ptq);
+ sample->branch_stack = ptq->last_branch;
+ }
+}
+
+static inline int intel_pt_deliver_synth_event(struct intel_pt *pt,
+ struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample,
+ u64 type)
+{
+ int ret;
+
+ ret = intel_pt_deliver_synth_b_event(pt, event, sample, type);
+
+ if (pt->synth_opts.last_branch)
+ intel_pt_reset_last_branch_rb(ptq);
+
+ return ret;
+}
+
+static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->instructions_id;
+ sample.stream_id = ptq->pt->instructions_id;
+ sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
+
+ ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->instructions_sample_type);
+}
+
+static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->transactions_id;
+ sample.stream_id = ptq->pt->transactions_id;
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->transactions_sample_type);
+}
+
+static void intel_pt_prep_p_sample(struct intel_pt *pt,
+ struct intel_pt_queue *ptq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ intel_pt_prep_sample(pt, ptq, event, sample);
+
+ /*
+ * Zero IP is used to mean "trace start" but that is not the case for
+ * power or PTWRITE events with no IP, so clear the flags.
+ */
+ if (!sample->ip)
+ sample->flags = 0;
+}
+
+static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_ptwrite raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->ptwrites_id;
+ sample.stream_id = ptq->pt->ptwrites_id;
+
+ raw.flags = 0;
+ raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+ raw.payload = cpu_to_le64(ptq->state->ptw_payload);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->ptwrites_sample_type);
+}
+
+static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_cbr raw;
+ u32 flags;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->cbr_id;
+ sample.stream_id = ptq->pt->cbr_id;
+
+ flags = (u16)ptq->state->cbr_payload | (pt->max_non_turbo_ratio << 16);
+ raw.flags = cpu_to_le32(flags);
+ raw.freq = cpu_to_le32(raw.cbr * pt->cbr2khz);
+ raw.reserved3 = 0;
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_mwait raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->mwait_id;
+ sample.stream_id = ptq->pt->mwait_id;
+
+ raw.reserved = 0;
+ raw.payload = cpu_to_le64(ptq->state->mwait_payload);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_pwre raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->pwre_id;
+ sample.stream_id = ptq->pt->pwre_id;
+
+ raw.reserved = 0;
+ raw.payload = cpu_to_le64(ptq->state->pwre_payload);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_exstop raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->exstop_id;
+ sample.stream_id = ptq->pt->exstop_id;
+
+ raw.flags = 0;
+ raw.ip = !!(ptq->state->flags & INTEL_PT_FUP_IP);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
+{
+ struct intel_pt *pt = ptq->pt;
+ union perf_event *event = ptq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+ struct perf_synth_intel_pwrx raw;
+
+ if (intel_pt_skip_event(pt))
+ return 0;
+
+ intel_pt_prep_p_sample(pt, ptq, event, &sample);
+
+ sample.id = ptq->pt->pwrx_id;
+ sample.stream_id = ptq->pt->pwrx_id;
+
+ raw.reserved = 0;
+ raw.payload = cpu_to_le64(ptq->state->pwrx_payload);
+
+ sample.raw_size = perf_synth__raw_size(raw);
+ sample.raw_data = perf_synth__raw_data(&raw);
+
+ return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ pt->pwr_events_sample_type);
+}
+
+static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
+ pid_t pid, pid_t tid, u64 ip)
+{
+ union perf_event event;
+ char msg[MAX_AUXTRACE_ERROR_MSG];
+ int err;
+
+ intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
+
+ auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ code, cpu, pid, tid, ip, msg);
+
+ err = perf_session__deliver_synth_event(pt->session, &event, NULL);
+ if (err)
+ pr_err("Intel Processor Trace: failed to deliver error event, error %d\n",
+ err);
+
+ return err;
+}
+
+static int intel_pt_next_tid(struct intel_pt *pt, struct intel_pt_queue *ptq)
+{
+ struct auxtrace_queue *queue;
+ pid_t tid = ptq->next_tid;
+ int err;
+
+ if (tid == -1)
+ return 0;
+
+ intel_pt_log("switch: cpu %d tid %d\n", ptq->cpu, tid);
+
+ err = machine__set_current_tid(pt->machine, ptq->cpu, -1, tid);
+
+ queue = &pt->queues.queue_array[ptq->queue_nr];
+ intel_pt_set_pid_tid_cpu(pt, queue);
+
+ ptq->next_tid = -1;
+
+ return err;
+}
+
+static inline bool intel_pt_is_switch_ip(struct intel_pt_queue *ptq, u64 ip)
+{
+ struct intel_pt *pt = ptq->pt;
+
+ return ip == pt->switch_ip &&
+ (ptq->flags & PERF_IP_FLAG_BRANCH) &&
+ !(ptq->flags & (PERF_IP_FLAG_CONDITIONAL | PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_INTERRUPT | PERF_IP_FLAG_TX_ABORT));
+}
+
+#define INTEL_PT_PWR_EVT (INTEL_PT_MWAIT_OP | INTEL_PT_PWR_ENTRY | \
+ INTEL_PT_EX_STOP | INTEL_PT_PWR_EXIT | \
+ INTEL_PT_CBR_CHG)
+
+static int intel_pt_sample(struct intel_pt_queue *ptq)
+{
+ const struct intel_pt_state *state = ptq->state;
+ struct intel_pt *pt = ptq->pt;
+ int err;
+
+ if (!ptq->have_sample)
+ return 0;
+
+ ptq->have_sample = false;
+
+ if (pt->sample_pwr_events && (state->type & INTEL_PT_PWR_EVT)) {
+ if (state->type & INTEL_PT_CBR_CHG) {
+ err = intel_pt_synth_cbr_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_MWAIT_OP) {
+ err = intel_pt_synth_mwait_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_PWR_ENTRY) {
+ err = intel_pt_synth_pwre_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_EX_STOP) {
+ err = intel_pt_synth_exstop_sample(ptq);
+ if (err)
+ return err;
+ }
+ if (state->type & INTEL_PT_PWR_EXIT) {
+ err = intel_pt_synth_pwrx_sample(ptq);
+ if (err)
+ return err;
+ }
+ }
+
+ if (pt->sample_instructions && (state->type & INTEL_PT_INSTRUCTION)) {
+ err = intel_pt_synth_instruction_sample(ptq);
+ if (err)
+ return err;
+ }
+
+ if (pt->sample_transactions && (state->type & INTEL_PT_TRANSACTION)) {
+ err = intel_pt_synth_transaction_sample(ptq);
+ if (err)
+ return err;
+ }
+
+ if (pt->sample_ptwrites && (state->type & INTEL_PT_PTW)) {
+ err = intel_pt_synth_ptwrite_sample(ptq);
+ if (err)
+ return err;
+ }
+
+ if (!(state->type & INTEL_PT_BRANCH))
+ return 0;
+
+ if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
+ thread_stack__event(ptq->thread, ptq->flags, state->from_ip,
+ state->to_ip, ptq->insn_len,
+ state->trace_nr);
+ else
+ thread_stack__set_trace_nr(ptq->thread, state->trace_nr);
+
+ if (pt->sample_branches) {
+ err = intel_pt_synth_branch_sample(ptq);
+ if (err)
+ return err;
+ }
+
+ if (pt->synth_opts.last_branch)
+ intel_pt_update_last_branch_rb(ptq);
+
+ if (!ptq->sync_switch)
+ return 0;
+
+ if (intel_pt_is_switch_ip(ptq, state->to_ip)) {
+ switch (ptq->switch_state) {
+ case INTEL_PT_SS_NOT_TRACING:
+ case INTEL_PT_SS_UNKNOWN:
+ case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+ err = intel_pt_next_tid(pt, ptq);
+ if (err)
+ return err;
+ ptq->switch_state = INTEL_PT_SS_TRACING;
+ break;
+ default:
+ ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_EVENT;
+ return 1;
+ }
+ } else if (!state->to_ip) {
+ ptq->switch_state = INTEL_PT_SS_NOT_TRACING;
+ } else if (ptq->switch_state == INTEL_PT_SS_NOT_TRACING) {
+ ptq->switch_state = INTEL_PT_SS_UNKNOWN;
+ } else if (ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
+ state->to_ip == pt->ptss_ip &&
+ (ptq->flags & PERF_IP_FLAG_CALL)) {
+ ptq->switch_state = INTEL_PT_SS_TRACING;
+ }
+
+ return 0;
+}
+
+static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
+{
+ struct machine *machine = pt->machine;
+ struct map *map;
+ struct symbol *sym, *start;
+ u64 ip, switch_ip = 0;
+ const char *ptss;
+
+ if (ptss_ip)
+ *ptss_ip = 0;
+
+ map = machine__kernel_map(machine);
+ if (!map)
+ return 0;
+
+ if (map__load(map))
+ return 0;
+
+ start = dso__first_symbol(map->dso);
+
+ for (sym = start; sym; sym = dso__next_symbol(sym)) {
+ if (sym->binding == STB_GLOBAL &&
+ !strcmp(sym->name, "__switch_to")) {
+ ip = map->unmap_ip(map, sym->start);
+ if (ip >= map->start && ip < map->end) {
+ switch_ip = ip;
+ break;
+ }
+ }
+ }
+
+ if (!switch_ip || !ptss_ip)
+ return 0;
+
+ if (pt->have_sched_switch == 1)
+ ptss = "perf_trace_sched_switch";
+ else
+ ptss = "__perf_event_task_sched_out";
+
+ for (sym = start; sym; sym = dso__next_symbol(sym)) {
+ if (!strcmp(sym->name, ptss)) {
+ ip = map->unmap_ip(map, sym->start);
+ if (ip >= map->start && ip < map->end) {
+ *ptss_ip = ip;
+ break;
+ }
+ }
+ }
+
+ return switch_ip;
+}
+
+static void intel_pt_enable_sync_switch(struct intel_pt *pt)
+{
+ unsigned int i;
+
+ pt->sync_switch = true;
+
+ for (i = 0; i < pt->queues.nr_queues; i++) {
+ struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (ptq)
+ ptq->sync_switch = true;
+ }
+}
+
+static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
+{
+ const struct intel_pt_state *state = ptq->state;
+ struct intel_pt *pt = ptq->pt;
+ int err;
+
+ if (!pt->kernel_start) {
+ pt->kernel_start = machine__kernel_start(pt->machine);
+ if (pt->per_cpu_mmaps &&
+ (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
+ !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
+ !pt->sampling_mode) {
+ pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
+ if (pt->switch_ip) {
+ intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
+ pt->switch_ip, pt->ptss_ip);
+ intel_pt_enable_sync_switch(pt);
+ }
+ }
+ }
+
+ intel_pt_log("queue %u decoding cpu %d pid %d tid %d\n",
+ ptq->queue_nr, ptq->cpu, ptq->pid, ptq->tid);
+ while (1) {
+ err = intel_pt_sample(ptq);
+ if (err)
+ return err;
+
+ state = intel_pt_decode(ptq->decoder);
+ if (state->err) {
+ if (state->err == INTEL_PT_ERR_NODATA)
+ return 1;
+ if (ptq->sync_switch &&
+ state->from_ip >= pt->kernel_start) {
+ ptq->sync_switch = false;
+ intel_pt_next_tid(pt, ptq);
+ }
+ if (pt->synth_opts.errors) {
+ err = intel_pt_synth_error(pt, state->err,
+ ptq->cpu, ptq->pid,
+ ptq->tid,
+ state->from_ip);
+ if (err)
+ return err;
+ }
+ continue;
+ }
+
+ ptq->state = state;
+ ptq->have_sample = true;
+ intel_pt_sample_flags(ptq);
+
+ /* Use estimated TSC upon return to user space */
+ if (pt->est_tsc &&
+ (state->from_ip >= pt->kernel_start || !state->from_ip) &&
+ state->to_ip && state->to_ip < pt->kernel_start) {
+ intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
+ state->timestamp, state->est_timestamp);
+ ptq->timestamp = state->est_timestamp;
+ /* Use estimated TSC in unknown switch state */
+ } else if (ptq->sync_switch &&
+ ptq->switch_state == INTEL_PT_SS_UNKNOWN &&
+ intel_pt_is_switch_ip(ptq, state->to_ip) &&
+ ptq->next_tid == -1) {
+ intel_pt_log("TSC %"PRIx64" est. TSC %"PRIx64"\n",
+ state->timestamp, state->est_timestamp);
+ ptq->timestamp = state->est_timestamp;
+ } else if (state->timestamp > ptq->timestamp) {
+ ptq->timestamp = state->timestamp;
+ }
+
+ if (!pt->timeless_decoding && ptq->timestamp >= *timestamp) {
+ *timestamp = ptq->timestamp;
+ return 0;
+ }
+ }
+ return 0;
+}
+
+static inline int intel_pt_update_queues(struct intel_pt *pt)
+{
+ if (pt->queues.new_data) {
+ pt->queues.new_data = false;
+ return intel_pt_setup_queues(pt);
+ }
+ return 0;
+}
+
+static int intel_pt_process_queues(struct intel_pt *pt, u64 timestamp)
+{
+ unsigned int queue_nr;
+ u64 ts;
+ int ret;
+
+ while (1) {
+ struct auxtrace_queue *queue;
+ struct intel_pt_queue *ptq;
+
+ if (!pt->heap.heap_cnt)
+ return 0;
+
+ if (pt->heap.heap_array[0].ordinal >= timestamp)
+ return 0;
+
+ queue_nr = pt->heap.heap_array[0].queue_nr;
+ queue = &pt->queues.queue_array[queue_nr];
+ ptq = queue->priv;
+
+ intel_pt_log("queue %u processing 0x%" PRIx64 " to 0x%" PRIx64 "\n",
+ queue_nr, pt->heap.heap_array[0].ordinal,
+ timestamp);
+
+ auxtrace_heap__pop(&pt->heap);
+
+ if (pt->heap.heap_cnt) {
+ ts = pt->heap.heap_array[0].ordinal + 1;
+ if (ts > timestamp)
+ ts = timestamp;
+ } else {
+ ts = timestamp;
+ }
+
+ intel_pt_set_pid_tid_cpu(pt, queue);
+
+ ret = intel_pt_run_decoder(ptq, &ts);
+
+ if (ret < 0) {
+ auxtrace_heap__add(&pt->heap, queue_nr, ts);
+ return ret;
+ }
+
+ if (!ret) {
+ ret = auxtrace_heap__add(&pt->heap, queue_nr, ts);
+ if (ret < 0)
+ return ret;
+ } else {
+ ptq->on_heap = false;
+ }
+ }
+
+ return 0;
+}
+
+static int intel_pt_process_timeless_queues(struct intel_pt *pt, pid_t tid,
+ u64 time_)
+{
+ struct auxtrace_queues *queues = &pt->queues;
+ unsigned int i;
+ u64 ts = 0;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ struct auxtrace_queue *queue = &pt->queues.queue_array[i];
+ struct intel_pt_queue *ptq = queue->priv;
+
+ if (ptq && (tid == -1 || ptq->tid == tid)) {
+ ptq->time = time_;
+ intel_pt_set_pid_tid_cpu(pt, queue);
+ intel_pt_run_decoder(ptq, &ts);
+ }
+ }
+ return 0;
+}
+
+static int intel_pt_lost(struct intel_pt *pt, struct perf_sample *sample)
+{
+ return intel_pt_synth_error(pt, INTEL_PT_ERR_LOST, sample->cpu,
+ sample->pid, sample->tid, 0);
+}
+
+static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
+{
+ unsigned i, j;
+
+ if (cpu < 0 || !pt->queues.nr_queues)
+ return NULL;
+
+ if ((unsigned)cpu >= pt->queues.nr_queues)
+ i = pt->queues.nr_queues - 1;
+ else
+ i = cpu;
+
+ if (pt->queues.queue_array[i].cpu == cpu)
+ return pt->queues.queue_array[i].priv;
+
+ for (j = 0; i > 0; j++) {
+ if (pt->queues.queue_array[--i].cpu == cpu)
+ return pt->queues.queue_array[i].priv;
+ }
+
+ for (; j < pt->queues.nr_queues; j++) {
+ if (pt->queues.queue_array[j].cpu == cpu)
+ return pt->queues.queue_array[j].priv;
+ }
+
+ return NULL;
+}
+
+static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
+ u64 timestamp)
+{
+ struct intel_pt_queue *ptq;
+ int err;
+
+ if (!pt->sync_switch)
+ return 1;
+
+ ptq = intel_pt_cpu_to_ptq(pt, cpu);
+ if (!ptq || !ptq->sync_switch)
+ return 1;
+
+ switch (ptq->switch_state) {
+ case INTEL_PT_SS_NOT_TRACING:
+ ptq->next_tid = -1;
+ break;
+ case INTEL_PT_SS_UNKNOWN:
+ case INTEL_PT_SS_TRACING:
+ ptq->next_tid = tid;
+ ptq->switch_state = INTEL_PT_SS_EXPECTING_SWITCH_IP;
+ return 0;
+ case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
+ if (!ptq->on_heap) {
+ ptq->timestamp = perf_time_to_tsc(timestamp,
+ &pt->tc);
+ err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
+ ptq->timestamp);
+ if (err)
+ return err;
+ ptq->on_heap = true;
+ }
+ ptq->switch_state = INTEL_PT_SS_TRACING;
+ break;
+ case INTEL_PT_SS_EXPECTING_SWITCH_IP:
+ ptq->next_tid = tid;
+ intel_pt_log("ERROR: cpu %d expecting switch ip\n", cpu);
+ break;
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int intel_pt_process_switch(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ struct perf_evsel *evsel;
+ pid_t tid;
+ int cpu, ret;
+
+ evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
+ if (evsel != pt->switch_evsel)
+ return 0;
+
+ tid = perf_evsel__intval(evsel, sample, "next_pid");
+ cpu = sample->cpu;
+
+ intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+ cpu, tid, sample->time, perf_time_to_tsc(sample->time,
+ &pt->tc));
+
+ ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
+ if (ret <= 0)
+ return ret;
+
+ return machine__set_current_tid(pt->machine, cpu, -1, tid);
+}
+
+static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
+ struct perf_sample *sample)
+{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+ pid_t pid, tid;
+ int cpu, ret;
+
+ cpu = sample->cpu;
+
+ if (pt->have_sched_switch == 3) {
+ if (!out)
+ return 0;
+ if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
+ pr_err("Expecting CPU-wide context switch event\n");
+ return -EINVAL;
+ }
+ pid = event->context_switch.next_prev_pid;
+ tid = event->context_switch.next_prev_tid;
+ } else {
+ if (out)
+ return 0;
+ pid = sample->pid;
+ tid = sample->tid;
+ }
+
+ if (tid == -1)
+ intel_pt_log("context_switch event has no tid\n");
+
+ intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+ cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
+ &pt->tc));
+
+ ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
+ if (ret <= 0)
+ return ret;
+
+ return machine__set_current_tid(pt->machine, cpu, pid, tid);
+}
+
+static int intel_pt_process_itrace_start(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ if (!pt->per_cpu_mmaps)
+ return 0;
+
+ intel_pt_log("itrace_start: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+ sample->cpu, event->itrace_start.pid,
+ event->itrace_start.tid, sample->time,
+ perf_time_to_tsc(sample->time, &pt->tc));
+
+ return machine__set_current_tid(pt->machine, sample->cpu,
+ event->itrace_start.pid,
+ event->itrace_start.tid);
+}
+
+static int intel_pt_process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+ u64 timestamp;
+ int err = 0;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("Intel Processor Trace requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (sample->time && sample->time != (u64)-1)
+ timestamp = perf_time_to_tsc(sample->time, &pt->tc);
+ else
+ timestamp = 0;
+
+ if (timestamp || pt->timeless_decoding) {
+ err = intel_pt_update_queues(pt);
+ if (err)
+ return err;
+ }
+
+ if (pt->timeless_decoding) {
+ if (event->header.type == PERF_RECORD_EXIT) {
+ err = intel_pt_process_timeless_queues(pt,
+ event->fork.tid,
+ sample->time);
+ }
+ } else if (timestamp) {
+ err = intel_pt_process_queues(pt, timestamp);
+ }
+ if (err)
+ return err;
+
+ if (event->header.type == PERF_RECORD_AUX &&
+ (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
+ pt->synth_opts.errors) {
+ err = intel_pt_lost(pt, sample);
+ if (err)
+ return err;
+ }
+
+ if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
+ err = intel_pt_process_switch(pt, sample);
+ else if (event->header.type == PERF_RECORD_ITRACE_START)
+ err = intel_pt_process_itrace_start(pt, event, sample);
+ else if (event->header.type == PERF_RECORD_SWITCH ||
+ event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
+ err = intel_pt_context_switch(pt, event, sample);
+
+ intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
+ perf_event__name(event->header.type), event->header.type,
+ sample->cpu, sample->time, timestamp);
+
+ return err;
+}
+
+static int intel_pt_flush(struct perf_session *session, struct perf_tool *tool)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+ int ret;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events)
+ return -EINVAL;
+
+ ret = intel_pt_update_queues(pt);
+ if (ret < 0)
+ return ret;
+
+ if (pt->timeless_decoding)
+ return intel_pt_process_timeless_queues(pt, -1,
+ MAX_TIMESTAMP - 1);
+
+ return intel_pt_process_queues(pt, MAX_TIMESTAMP);
+}
+
+static void intel_pt_free_events(struct perf_session *session)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+ struct auxtrace_queues *queues = &pt->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ intel_pt_free_queue(queues->queue_array[i].priv);
+ queues->queue_array[i].priv = NULL;
+ }
+ intel_pt_log_disable();
+ auxtrace_queues__free(queues);
+}
+
+static void intel_pt_free(struct perf_session *session)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+
+ auxtrace_heap__free(&pt->heap);
+ intel_pt_free_events(session);
+ session->auxtrace = NULL;
+ thread__put(pt->unknown_thread);
+ addr_filters__exit(&pt->filts);
+ zfree(&pt->filter);
+ free(pt);
+}
+
+static int intel_pt_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+
+ if (!pt->data_queued) {
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int fd = perf_data__fd(session->data);
+ int err;
+
+ if (perf_data__is_pipe(session->data)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&pt->queues, session, event,
+ data_offset, &buffer);
+ if (err)
+ return err;
+
+ /* Dump here now we have copied a piped trace out of the pipe */
+ if (dump_trace) {
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ intel_pt_dump_event(pt, buffer->data,
+ buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
+ }
+ }
+
+ return 0;
+}
+
+struct intel_pt_synth {
+ struct perf_tool dummy_tool;
+ struct perf_session *session;
+};
+
+static int intel_pt_event_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct intel_pt_synth *intel_pt_synth =
+ container_of(tool, struct intel_pt_synth, dummy_tool);
+
+ return perf_session__deliver_synth_event(intel_pt_synth->session, event,
+ NULL);
+}
+
+static int intel_pt_synth_event(struct perf_session *session, const char *name,
+ struct perf_event_attr *attr, u64 id)
+{
+ struct intel_pt_synth intel_pt_synth;
+ int err;
+
+ pr_debug("Synthesizing '%s' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
+ name, id, (u64)attr->sample_type);
+
+ memset(&intel_pt_synth, 0, sizeof(struct intel_pt_synth));
+ intel_pt_synth.session = session;
+
+ err = perf_event__synthesize_attr(&intel_pt_synth.dummy_tool, attr, 1,
+ &id, intel_pt_event_synth);
+ if (err)
+ pr_err("%s: failed to synthesize '%s' event type\n",
+ __func__, name);
+
+ return err;
+}
+
+static void intel_pt_set_event_name(struct perf_evlist *evlist, u64 id,
+ const char *name)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->id && evsel->id[0] == id) {
+ if (evsel->name)
+ zfree(&evsel->name);
+ evsel->name = strdup(name);
+ break;
+ }
+ }
+}
+
+static struct perf_evsel *intel_pt_evsel(struct intel_pt *pt,
+ struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type == pt->pmu_type && evsel->ids)
+ return evsel;
+ }
+
+ return NULL;
+}
+
+static int intel_pt_synth_events(struct intel_pt *pt,
+ struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_evsel *evsel = intel_pt_evsel(pt, evlist);
+ struct perf_event_attr attr;
+ u64 id;
+ int err;
+
+ if (!evsel) {
+ pr_debug("There are no selected events with Intel Processor Trace data\n");
+ return 0;
+ }
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.size = sizeof(struct perf_event_attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+ PERF_SAMPLE_PERIOD;
+ if (pt->timeless_decoding)
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+ else
+ attr.sample_type |= PERF_SAMPLE_TIME;
+ if (!pt->per_cpu_mmaps)
+ attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
+ attr.exclude_user = evsel->attr.exclude_user;
+ attr.exclude_kernel = evsel->attr.exclude_kernel;
+ attr.exclude_hv = evsel->attr.exclude_hv;
+ attr.exclude_host = evsel->attr.exclude_host;
+ attr.exclude_guest = evsel->attr.exclude_guest;
+ attr.sample_id_all = evsel->attr.sample_id_all;
+ attr.read_format = evsel->attr.read_format;
+
+ id = evsel->id[0] + 1000000000;
+ if (!id)
+ id = 1;
+
+ if (pt->synth_opts.branches) {
+ attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
+ attr.sample_period = 1;
+ attr.sample_type |= PERF_SAMPLE_ADDR;
+ err = intel_pt_synth_event(session, "branches", &attr, id);
+ if (err)
+ return err;
+ pt->sample_branches = true;
+ pt->branches_sample_type = attr.sample_type;
+ pt->branches_id = id;
+ id += 1;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
+ }
+
+ if (pt->synth_opts.callchain)
+ attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+ if (pt->synth_opts.last_branch)
+ attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+
+ if (pt->synth_opts.instructions) {
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ if (pt->synth_opts.period_type == PERF_ITRACE_PERIOD_NANOSECS)
+ attr.sample_period =
+ intel_pt_ns_to_ticks(pt, pt->synth_opts.period);
+ else
+ attr.sample_period = pt->synth_opts.period;
+ err = intel_pt_synth_event(session, "instructions", &attr, id);
+ if (err)
+ return err;
+ pt->sample_instructions = true;
+ pt->instructions_sample_type = attr.sample_type;
+ pt->instructions_id = id;
+ id += 1;
+ }
+
+ attr.sample_type &= ~(u64)PERF_SAMPLE_PERIOD;
+ attr.sample_period = 1;
+
+ if (pt->synth_opts.transactions) {
+ attr.config = PERF_COUNT_HW_INSTRUCTIONS;
+ err = intel_pt_synth_event(session, "transactions", &attr, id);
+ if (err)
+ return err;
+ pt->sample_transactions = true;
+ pt->transactions_sample_type = attr.sample_type;
+ pt->transactions_id = id;
+ intel_pt_set_event_name(evlist, id, "transactions");
+ id += 1;
+ }
+
+ attr.type = PERF_TYPE_SYNTH;
+ attr.sample_type |= PERF_SAMPLE_RAW;
+
+ if (pt->synth_opts.ptwrites) {
+ attr.config = PERF_SYNTH_INTEL_PTWRITE;
+ err = intel_pt_synth_event(session, "ptwrite", &attr, id);
+ if (err)
+ return err;
+ pt->sample_ptwrites = true;
+ pt->ptwrites_sample_type = attr.sample_type;
+ pt->ptwrites_id = id;
+ intel_pt_set_event_name(evlist, id, "ptwrite");
+ id += 1;
+ }
+
+ if (pt->synth_opts.pwr_events) {
+ pt->sample_pwr_events = true;
+ pt->pwr_events_sample_type = attr.sample_type;
+
+ attr.config = PERF_SYNTH_INTEL_CBR;
+ err = intel_pt_synth_event(session, "cbr", &attr, id);
+ if (err)
+ return err;
+ pt->cbr_id = id;
+ intel_pt_set_event_name(evlist, id, "cbr");
+ id += 1;
+ }
+
+ if (pt->synth_opts.pwr_events && (evsel->attr.config & 0x10)) {
+ attr.config = PERF_SYNTH_INTEL_MWAIT;
+ err = intel_pt_synth_event(session, "mwait", &attr, id);
+ if (err)
+ return err;
+ pt->mwait_id = id;
+ intel_pt_set_event_name(evlist, id, "mwait");
+ id += 1;
+
+ attr.config = PERF_SYNTH_INTEL_PWRE;
+ err = intel_pt_synth_event(session, "pwre", &attr, id);
+ if (err)
+ return err;
+ pt->pwre_id = id;
+ intel_pt_set_event_name(evlist, id, "pwre");
+ id += 1;
+
+ attr.config = PERF_SYNTH_INTEL_EXSTOP;
+ err = intel_pt_synth_event(session, "exstop", &attr, id);
+ if (err)
+ return err;
+ pt->exstop_id = id;
+ intel_pt_set_event_name(evlist, id, "exstop");
+ id += 1;
+
+ attr.config = PERF_SYNTH_INTEL_PWRX;
+ err = intel_pt_synth_event(session, "pwrx", &attr, id);
+ if (err)
+ return err;
+ pt->pwrx_id = id;
+ intel_pt_set_event_name(evlist, id, "pwrx");
+ id += 1;
+ }
+
+ return 0;
+}
+
+static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry_reverse(evlist, evsel) {
+ const char *name = perf_evsel__name(evsel);
+
+ if (!strcmp(name, "sched:sched_switch"))
+ return evsel;
+ }
+
+ return NULL;
+}
+
+static bool intel_pt_find_switch(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.context_switch)
+ return true;
+ }
+
+ return false;
+}
+
+static int intel_pt_perf_config(const char *var, const char *value, void *data)
+{
+ struct intel_pt *pt = data;
+
+ if (!strcmp(var, "intel-pt.mispred-all"))
+ pt->mispred_all = perf_config_bool(var, value);
+
+ return 0;
+}
+
+static const char * const intel_pt_info_fmts[] = {
+ [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
+ [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
+ [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
+ [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n",
+ [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
+ [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n",
+ [INTEL_PT_NORETCOMP_BIT] = " NoRETComp bit %#"PRIx64"\n",
+ [INTEL_PT_HAVE_SCHED_SWITCH] = " Have sched_switch %"PRId64"\n",
+ [INTEL_PT_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
+ [INTEL_PT_PER_CPU_MMAPS] = " Per-cpu maps %"PRId64"\n",
+ [INTEL_PT_MTC_BIT] = " MTC bit %#"PRIx64"\n",
+ [INTEL_PT_TSC_CTC_N] = " TSC:CTC numerator %"PRIu64"\n",
+ [INTEL_PT_TSC_CTC_D] = " TSC:CTC denominator %"PRIu64"\n",
+ [INTEL_PT_CYC_BIT] = " CYC bit %#"PRIx64"\n",
+ [INTEL_PT_MAX_NONTURBO_RATIO] = " Max non-turbo ratio %"PRIu64"\n",
+ [INTEL_PT_FILTER_STR_LEN] = " Filter string len. %"PRIu64"\n",
+};
+
+static void intel_pt_print_info(u64 *arr, int start, int finish)
+{
+ int i;
+
+ if (!dump_trace)
+ return;
+
+ for (i = start; i <= finish; i++)
+ fprintf(stdout, intel_pt_info_fmts[i], arr[i]);
+}
+
+static void intel_pt_print_info_str(const char *name, const char *str)
+{
+ if (!dump_trace)
+ return;
+
+ fprintf(stdout, " %-20s%s\n", name, str ? str : "");
+}
+
+static bool intel_pt_has(struct auxtrace_info_event *auxtrace_info, int pos)
+{
+ return auxtrace_info->header.size >=
+ sizeof(struct auxtrace_info_event) + (sizeof(u64) * (pos + 1));
+}
+
+int intel_pt_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
+ size_t min_sz = sizeof(u64) * INTEL_PT_PER_CPU_MMAPS;
+ struct intel_pt *pt;
+ void *info_end;
+ u64 *info;
+ int err;
+
+ if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
+ min_sz)
+ return -EINVAL;
+
+ pt = zalloc(sizeof(struct intel_pt));
+ if (!pt)
+ return -ENOMEM;
+
+ addr_filters__init(&pt->filts);
+
+ err = perf_config(intel_pt_perf_config, pt);
+ if (err)
+ goto err_free;
+
+ err = auxtrace_queues__init(&pt->queues);
+ if (err)
+ goto err_free;
+
+ intel_pt_log_set_name(INTEL_PT_PMU_NAME);
+
+ pt->session = session;
+ pt->machine = &session->machines.host; /* No kvm support */
+ pt->auxtrace_type = auxtrace_info->type;
+ pt->pmu_type = auxtrace_info->priv[INTEL_PT_PMU_TYPE];
+ pt->tc.time_shift = auxtrace_info->priv[INTEL_PT_TIME_SHIFT];
+ pt->tc.time_mult = auxtrace_info->priv[INTEL_PT_TIME_MULT];
+ pt->tc.time_zero = auxtrace_info->priv[INTEL_PT_TIME_ZERO];
+ pt->cap_user_time_zero = auxtrace_info->priv[INTEL_PT_CAP_USER_TIME_ZERO];
+ pt->tsc_bit = auxtrace_info->priv[INTEL_PT_TSC_BIT];
+ pt->noretcomp_bit = auxtrace_info->priv[INTEL_PT_NORETCOMP_BIT];
+ pt->have_sched_switch = auxtrace_info->priv[INTEL_PT_HAVE_SCHED_SWITCH];
+ pt->snapshot_mode = auxtrace_info->priv[INTEL_PT_SNAPSHOT_MODE];
+ pt->per_cpu_mmaps = auxtrace_info->priv[INTEL_PT_PER_CPU_MMAPS];
+ intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_PMU_TYPE,
+ INTEL_PT_PER_CPU_MMAPS);
+
+ if (intel_pt_has(auxtrace_info, INTEL_PT_CYC_BIT)) {
+ pt->mtc_bit = auxtrace_info->priv[INTEL_PT_MTC_BIT];
+ pt->mtc_freq_bits = auxtrace_info->priv[INTEL_PT_MTC_FREQ_BITS];
+ pt->tsc_ctc_ratio_n = auxtrace_info->priv[INTEL_PT_TSC_CTC_N];
+ pt->tsc_ctc_ratio_d = auxtrace_info->priv[INTEL_PT_TSC_CTC_D];
+ pt->cyc_bit = auxtrace_info->priv[INTEL_PT_CYC_BIT];
+ intel_pt_print_info(&auxtrace_info->priv[0], INTEL_PT_MTC_BIT,
+ INTEL_PT_CYC_BIT);
+ }
+
+ if (intel_pt_has(auxtrace_info, INTEL_PT_MAX_NONTURBO_RATIO)) {
+ pt->max_non_turbo_ratio =
+ auxtrace_info->priv[INTEL_PT_MAX_NONTURBO_RATIO];
+ intel_pt_print_info(&auxtrace_info->priv[0],
+ INTEL_PT_MAX_NONTURBO_RATIO,
+ INTEL_PT_MAX_NONTURBO_RATIO);
+ }
+
+ info = &auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN] + 1;
+ info_end = (void *)info + auxtrace_info->header.size;
+
+ if (intel_pt_has(auxtrace_info, INTEL_PT_FILTER_STR_LEN)) {
+ size_t len;
+
+ len = auxtrace_info->priv[INTEL_PT_FILTER_STR_LEN];
+ intel_pt_print_info(&auxtrace_info->priv[0],
+ INTEL_PT_FILTER_STR_LEN,
+ INTEL_PT_FILTER_STR_LEN);
+ if (len) {
+ const char *filter = (const char *)info;
+
+ len = roundup(len + 1, 8);
+ info += len >> 3;
+ if ((void *)info > info_end) {
+ pr_err("%s: bad filter string length\n", __func__);
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ pt->filter = memdup(filter, len);
+ if (!pt->filter) {
+ err = -ENOMEM;
+ goto err_free_queues;
+ }
+ if (session->header.needs_swap)
+ mem_bswap_64(pt->filter, len);
+ if (pt->filter[len - 1]) {
+ pr_err("%s: filter string not null terminated\n", __func__);
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ err = addr_filters__parse_bare_filter(&pt->filts,
+ filter);
+ if (err)
+ goto err_free_queues;
+ }
+ intel_pt_print_info_str("Filter string", pt->filter);
+ }
+
+ pt->timeless_decoding = intel_pt_timeless_decoding(pt);
+ if (pt->timeless_decoding && !pt->tc.time_mult)
+ pt->tc.time_mult = 1;
+ pt->have_tsc = intel_pt_have_tsc(pt);
+ pt->sampling_mode = false;
+ pt->est_tsc = !pt->timeless_decoding;
+
+ pt->unknown_thread = thread__new(999999999, 999999999);
+ if (!pt->unknown_thread) {
+ err = -ENOMEM;
+ goto err_free_queues;
+ }
+
+ /*
+ * Since this thread will not be kept in any rbtree not in a
+ * list, initialize its list node so that at thread__put() the
+ * current thread lifetime assuption is kept and we don't segfault
+ * at list_del_init().
+ */
+ INIT_LIST_HEAD(&pt->unknown_thread->node);
+
+ err = thread__set_comm(pt->unknown_thread, "unknown", 0);
+ if (err)
+ goto err_delete_thread;
+ if (thread__init_map_groups(pt->unknown_thread, pt->machine)) {
+ err = -ENOMEM;
+ goto err_delete_thread;
+ }
+
+ pt->auxtrace.process_event = intel_pt_process_event;
+ pt->auxtrace.process_auxtrace_event = intel_pt_process_auxtrace_event;
+ pt->auxtrace.flush_events = intel_pt_flush;
+ pt->auxtrace.free_events = intel_pt_free_events;
+ pt->auxtrace.free = intel_pt_free;
+ session->auxtrace = &pt->auxtrace;
+
+ if (dump_trace)
+ return 0;
+
+ if (pt->have_sched_switch == 1) {
+ pt->switch_evsel = intel_pt_find_sched_switch(session->evlist);
+ if (!pt->switch_evsel) {
+ pr_err("%s: missing sched_switch event\n", __func__);
+ err = -EINVAL;
+ goto err_delete_thread;
+ }
+ } else if (pt->have_sched_switch == 2 &&
+ !intel_pt_find_switch(session->evlist)) {
+ pr_err("%s: missing context_switch attribute flag\n", __func__);
+ err = -EINVAL;
+ goto err_delete_thread;
+ }
+
+ if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
+ pt->synth_opts = *session->itrace_synth_opts;
+ } else {
+ itrace_synth_opts__set_default(&pt->synth_opts);
+ if (use_browser != -1) {
+ pt->synth_opts.branches = false;
+ pt->synth_opts.callchain = true;
+ }
+ if (session->itrace_synth_opts)
+ pt->synth_opts.thread_stack =
+ session->itrace_synth_opts->thread_stack;
+ }
+
+ if (pt->synth_opts.log)
+ intel_pt_log_enable();
+
+ /* Maximum non-turbo ratio is TSC freq / 100 MHz */
+ if (pt->tc.time_mult) {
+ u64 tsc_freq = intel_pt_ns_to_ticks(pt, 1000000000);
+
+ if (!pt->max_non_turbo_ratio)
+ pt->max_non_turbo_ratio =
+ (tsc_freq + 50000000) / 100000000;
+ intel_pt_log("TSC frequency %"PRIu64"\n", tsc_freq);
+ intel_pt_log("Maximum non-turbo ratio %u\n",
+ pt->max_non_turbo_ratio);
+ pt->cbr2khz = tsc_freq / pt->max_non_turbo_ratio / 1000;
+ }
+
+ if (pt->synth_opts.calls)
+ pt->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
+ PERF_IP_FLAG_TRACE_END;
+ if (pt->synth_opts.returns)
+ pt->branches_filter |= PERF_IP_FLAG_RETURN |
+ PERF_IP_FLAG_TRACE_BEGIN;
+
+ if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
+ symbol_conf.use_callchain = true;
+ if (callchain_register_param(&callchain_param) < 0) {
+ symbol_conf.use_callchain = false;
+ pt->synth_opts.callchain = false;
+ }
+ }
+
+ err = intel_pt_synth_events(pt, session);
+ if (err)
+ goto err_delete_thread;
+
+ err = auxtrace_queues__process_index(&pt->queues, session);
+ if (err)
+ goto err_delete_thread;
+
+ if (pt->queues.populated)
+ pt->data_queued = true;
+
+ if (pt->timeless_decoding)
+ pr_debug2("Intel PT decoding without timestamps\n");
+
+ return 0;
+
+err_delete_thread:
+ thread__zput(pt->unknown_thread);
+err_free_queues:
+ intel_pt_log_disable();
+ auxtrace_queues__free(&pt->queues);
+ session->auxtrace = NULL;
+err_free:
+ addr_filters__exit(&pt->filts);
+ zfree(&pt->filter);
+ free(pt);
+ return err;
+}
diff --git a/tools/perf/util/intel-pt.h b/tools/perf/util/intel-pt.h
new file mode 100644
index 000000000..e13b14e5a
--- /dev/null
+++ b/tools/perf/util/intel-pt.h
@@ -0,0 +1,56 @@
+/*
+ * intel_pt.h: Intel Processor Trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef INCLUDE__PERF_INTEL_PT_H__
+#define INCLUDE__PERF_INTEL_PT_H__
+
+#define INTEL_PT_PMU_NAME "intel_pt"
+
+enum {
+ INTEL_PT_PMU_TYPE,
+ INTEL_PT_TIME_SHIFT,
+ INTEL_PT_TIME_MULT,
+ INTEL_PT_TIME_ZERO,
+ INTEL_PT_CAP_USER_TIME_ZERO,
+ INTEL_PT_TSC_BIT,
+ INTEL_PT_NORETCOMP_BIT,
+ INTEL_PT_HAVE_SCHED_SWITCH,
+ INTEL_PT_SNAPSHOT_MODE,
+ INTEL_PT_PER_CPU_MMAPS,
+ INTEL_PT_MTC_BIT,
+ INTEL_PT_MTC_FREQ_BITS,
+ INTEL_PT_TSC_CTC_N,
+ INTEL_PT_TSC_CTC_D,
+ INTEL_PT_CYC_BIT,
+ INTEL_PT_MAX_NONTURBO_RATIO,
+ INTEL_PT_FILTER_STR_LEN,
+ INTEL_PT_AUXTRACE_PRIV_MAX,
+};
+
+struct auxtrace_record;
+struct perf_tool;
+union perf_event;
+struct perf_session;
+struct perf_event_attr;
+struct perf_pmu;
+
+struct auxtrace_record *intel_pt_recording_init(int *err);
+
+int intel_pt_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+
+struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu);
+
+#endif
diff --git a/tools/perf/util/intlist.c b/tools/perf/util/intlist.c
new file mode 100644
index 000000000..89715b64a
--- /dev/null
+++ b/tools/perf/util/intlist.c
@@ -0,0 +1,146 @@
+/*
+ * Based on intlist.c by:
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Licensed under the GPLv2.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+
+#include "intlist.h"
+
+static struct rb_node *intlist__node_new(struct rblist *rblist __maybe_unused,
+ const void *entry)
+{
+ int i = (int)((long)entry);
+ struct rb_node *rc = NULL;
+ struct int_node *node = malloc(sizeof(*node));
+
+ if (node != NULL) {
+ node->i = i;
+ node->priv = NULL;
+ rc = &node->rb_node;
+ }
+
+ return rc;
+}
+
+static void int_node__delete(struct int_node *ilist)
+{
+ free(ilist);
+}
+
+static void intlist__node_delete(struct rblist *rblist __maybe_unused,
+ struct rb_node *rb_node)
+{
+ struct int_node *node = container_of(rb_node, struct int_node, rb_node);
+
+ int_node__delete(node);
+}
+
+static int intlist__node_cmp(struct rb_node *rb_node, const void *entry)
+{
+ int i = (int)((long)entry);
+ struct int_node *node = container_of(rb_node, struct int_node, rb_node);
+
+ return node->i - i;
+}
+
+int intlist__add(struct intlist *ilist, int i)
+{
+ return rblist__add_node(&ilist->rblist, (void *)((long)i));
+}
+
+void intlist__remove(struct intlist *ilist, struct int_node *node)
+{
+ rblist__remove_node(&ilist->rblist, &node->rb_node);
+}
+
+static struct int_node *__intlist__findnew(struct intlist *ilist,
+ int i, bool create)
+{
+ struct int_node *node = NULL;
+ struct rb_node *rb_node;
+
+ if (ilist == NULL)
+ return NULL;
+
+ if (create)
+ rb_node = rblist__findnew(&ilist->rblist, (void *)((long)i));
+ else
+ rb_node = rblist__find(&ilist->rblist, (void *)((long)i));
+
+ if (rb_node)
+ node = container_of(rb_node, struct int_node, rb_node);
+
+ return node;
+}
+
+struct int_node *intlist__find(struct intlist *ilist, int i)
+{
+ return __intlist__findnew(ilist, i, false);
+}
+
+struct int_node *intlist__findnew(struct intlist *ilist, int i)
+{
+ return __intlist__findnew(ilist, i, true);
+}
+
+static int intlist__parse_list(struct intlist *ilist, const char *s)
+{
+ char *sep;
+ int err;
+
+ do {
+ long value = strtol(s, &sep, 10);
+ err = -EINVAL;
+ if (*sep != ',' && *sep != '\0')
+ break;
+ err = intlist__add(ilist, value);
+ if (err)
+ break;
+ s = sep + 1;
+ } while (*sep != '\0');
+
+ return err;
+}
+
+struct intlist *intlist__new(const char *slist)
+{
+ struct intlist *ilist = malloc(sizeof(*ilist));
+
+ if (ilist != NULL) {
+ rblist__init(&ilist->rblist);
+ ilist->rblist.node_cmp = intlist__node_cmp;
+ ilist->rblist.node_new = intlist__node_new;
+ ilist->rblist.node_delete = intlist__node_delete;
+
+ if (slist && intlist__parse_list(ilist, slist))
+ goto out_delete;
+ }
+
+ return ilist;
+out_delete:
+ intlist__delete(ilist);
+ return NULL;
+}
+
+void intlist__delete(struct intlist *ilist)
+{
+ if (ilist != NULL)
+ rblist__delete(&ilist->rblist);
+}
+
+struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx)
+{
+ struct int_node *node = NULL;
+ struct rb_node *rb_node;
+
+ rb_node = rblist__entry(&ilist->rblist, idx);
+ if (rb_node)
+ node = container_of(rb_node, struct int_node, rb_node);
+
+ return node;
+}
diff --git a/tools/perf/util/intlist.h b/tools/perf/util/intlist.h
new file mode 100644
index 000000000..85bab8735
--- /dev/null
+++ b/tools/perf/util/intlist.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_INTLIST_H
+#define __PERF_INTLIST_H
+
+#include <linux/rbtree.h>
+#include <stdbool.h>
+
+#include "rblist.h"
+
+struct int_node {
+ struct rb_node rb_node;
+ int i;
+ void *priv;
+};
+
+struct intlist {
+ struct rblist rblist;
+};
+
+struct intlist *intlist__new(const char *slist);
+void intlist__delete(struct intlist *ilist);
+
+void intlist__remove(struct intlist *ilist, struct int_node *in);
+int intlist__add(struct intlist *ilist, int i);
+
+struct int_node *intlist__entry(const struct intlist *ilist, unsigned int idx);
+struct int_node *intlist__find(struct intlist *ilist, int i);
+struct int_node *intlist__findnew(struct intlist *ilist, int i);
+
+static inline bool intlist__has_entry(struct intlist *ilist, int i)
+{
+ return intlist__find(ilist, i) != NULL;
+}
+
+static inline bool intlist__empty(const struct intlist *ilist)
+{
+ return rblist__empty(&ilist->rblist);
+}
+
+static inline unsigned int intlist__nr_entries(const struct intlist *ilist)
+{
+ return rblist__nr_entries(&ilist->rblist);
+}
+
+/* For intlist iteration */
+static inline struct int_node *intlist__first(struct intlist *ilist)
+{
+ struct rb_node *rn = rb_first(&ilist->rblist.entries);
+ return rn ? rb_entry(rn, struct int_node, rb_node) : NULL;
+}
+static inline struct int_node *intlist__next(struct int_node *in)
+{
+ struct rb_node *rn;
+ if (!in)
+ return NULL;
+ rn = rb_next(&in->rb_node);
+ return rn ? rb_entry(rn, struct int_node, rb_node) : NULL;
+}
+
+/**
+ * intlist__for_each_entry - iterate over a intlist
+ * @pos: the &struct int_node to use as a loop cursor.
+ * @ilist: the &struct intlist for loop.
+ */
+#define intlist__for_each_entry(pos, ilist) \
+ for (pos = intlist__first(ilist); pos; pos = intlist__next(pos))
+
+/**
+ * intlist__for_each_entry_safe - iterate over a intlist safe against removal of
+ * int_node
+ * @pos: the &struct int_node to use as a loop cursor.
+ * @n: another &struct int_node to use as temporary storage.
+ * @ilist: the &struct intlist for loop.
+ */
+#define intlist__for_each_entry_safe(pos, n, ilist) \
+ for (pos = intlist__first(ilist), n = intlist__next(pos); pos;\
+ pos = n, n = intlist__next(n))
+#endif /* __PERF_INTLIST_H */
diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
new file mode 100644
index 000000000..6817ffc2a
--- /dev/null
+++ b/tools/perf/util/jit.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __JIT_H__
+#define __JIT_H__
+
+#include <data.h>
+
+int jit_process(struct perf_session *session, struct perf_data *output,
+ struct machine *machine, char *filename, pid_t pid, u64 *nbytes);
+
+int jit_inject_record(const char *filename);
+
+#endif /* __JIT_H__ */
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
new file mode 100644
index 000000000..663e790c2
--- /dev/null
+++ b/tools/perf/util/jitdump.c
@@ -0,0 +1,793 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <byteswap.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <linux/stringify.h>
+
+#include "util.h"
+#include "event.h"
+#include "debug.h"
+#include "evlist.h"
+#include "symbol.h"
+#include <elf.h>
+
+#include "tsc.h"
+#include "session.h"
+#include "jit.h"
+#include "jitdump.h"
+#include "genelf.h"
+#include "../builtin.h"
+
+#include "sane_ctype.h"
+
+struct jit_buf_desc {
+ struct perf_data *output;
+ struct perf_session *session;
+ struct machine *machine;
+ union jr_entry *entry;
+ void *buf;
+ uint64_t sample_type;
+ size_t bufsize;
+ FILE *in;
+ bool needs_bswap; /* handles cross-endianess */
+ bool use_arch_timestamp;
+ void *debug_data;
+ void *unwinding_data;
+ uint64_t unwinding_size;
+ uint64_t unwinding_mapped_size;
+ uint64_t eh_frame_hdr_size;
+ size_t nr_debug_entries;
+ uint32_t code_load_count;
+ u64 bytes_written;
+ struct rb_root code_root;
+ char dir[PATH_MAX];
+};
+
+struct debug_line_info {
+ unsigned long vma;
+ unsigned int lineno;
+ /* The filename format is unspecified, absolute path, relative etc. */
+ char const filename[0];
+};
+
+struct jit_tool {
+ struct perf_tool tool;
+ struct perf_data output;
+ struct perf_data input;
+ u64 bytes_written;
+};
+
+#define hmax(a, b) ((a) > (b) ? (a) : (b))
+#define get_jit_tool(t) (container_of(tool, struct jit_tool, tool))
+
+static int
+jit_emit_elf(char *filename,
+ const char *sym,
+ uint64_t code_addr,
+ const void *code,
+ int csize,
+ void *debug,
+ int nr_debug_entries,
+ void *unwinding,
+ uint32_t unwinding_header_size,
+ uint32_t unwinding_size)
+{
+ int ret, fd;
+
+ if (verbose > 0)
+ fprintf(stderr, "write ELF image %s\n", filename);
+
+ fd = open(filename, O_CREAT|O_TRUNC|O_WRONLY, 0644);
+ if (fd == -1) {
+ pr_warning("cannot create jit ELF %s: %s\n", filename, strerror(errno));
+ return -1;
+ }
+
+ ret = jit_write_elf(fd, code_addr, sym, (const void *)code, csize, debug, nr_debug_entries,
+ unwinding, unwinding_header_size, unwinding_size);
+
+ close(fd);
+
+ if (ret)
+ unlink(filename);
+
+ return ret;
+}
+
+static void
+jit_close(struct jit_buf_desc *jd)
+{
+ if (!(jd && jd->in))
+ return;
+ funlockfile(jd->in);
+ fclose(jd->in);
+ jd->in = NULL;
+}
+
+static int
+jit_validate_events(struct perf_session *session)
+{
+ struct perf_evsel *evsel;
+
+ /*
+ * check that all events use CLOCK_MONOTONIC
+ */
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->attr.use_clockid == 0 || evsel->attr.clockid != CLOCK_MONOTONIC)
+ return -1;
+ }
+ return 0;
+}
+
+static int
+jit_open(struct jit_buf_desc *jd, const char *name)
+{
+ struct jitheader header;
+ struct jr_prefix *prefix;
+ ssize_t bs, bsz = 0;
+ void *n, *buf = NULL;
+ int ret, retval = -1;
+
+ jd->in = fopen(name, "r");
+ if (!jd->in)
+ return -1;
+
+ bsz = hmax(sizeof(header), sizeof(*prefix));
+
+ buf = malloc(bsz);
+ if (!buf)
+ goto error;
+
+ /*
+ * protect from writer modifying the file while we are reading it
+ */
+ flockfile(jd->in);
+
+ ret = fread(buf, sizeof(header), 1, jd->in);
+ if (ret != 1)
+ goto error;
+
+ memcpy(&header, buf, sizeof(header));
+
+ if (header.magic != JITHEADER_MAGIC) {
+ if (header.magic != JITHEADER_MAGIC_SW)
+ goto error;
+ jd->needs_bswap = true;
+ }
+
+ if (jd->needs_bswap) {
+ header.version = bswap_32(header.version);
+ header.total_size = bswap_32(header.total_size);
+ header.pid = bswap_32(header.pid);
+ header.elf_mach = bswap_32(header.elf_mach);
+ header.timestamp = bswap_64(header.timestamp);
+ header.flags = bswap_64(header.flags);
+ }
+
+ jd->use_arch_timestamp = header.flags & JITDUMP_FLAGS_ARCH_TIMESTAMP;
+
+ if (verbose > 2)
+ pr_debug("version=%u\nhdr.size=%u\nts=0x%llx\npid=%d\nelf_mach=%d\nuse_arch_timestamp=%d\n",
+ header.version,
+ header.total_size,
+ (unsigned long long)header.timestamp,
+ header.pid,
+ header.elf_mach,
+ jd->use_arch_timestamp);
+
+ if (header.version > JITHEADER_VERSION) {
+ pr_err("wrong jitdump version %u, expected " __stringify(JITHEADER_VERSION),
+ header.version);
+ goto error;
+ }
+
+ if (header.flags & JITDUMP_FLAGS_RESERVED) {
+ pr_err("jitdump file contains invalid or unsupported flags 0x%llx\n",
+ (unsigned long long)header.flags & JITDUMP_FLAGS_RESERVED);
+ goto error;
+ }
+
+ if (jd->use_arch_timestamp && !jd->session->time_conv.time_mult) {
+ pr_err("jitdump file uses arch timestamps but there is no timestamp conversion\n");
+ goto error;
+ }
+
+ /*
+ * validate event is using the correct clockid
+ */
+ if (!jd->use_arch_timestamp && jit_validate_events(jd->session)) {
+ pr_err("error, jitted code must be sampled with perf record -k 1\n");
+ goto error;
+ }
+
+ bs = header.total_size - sizeof(header);
+
+ if (bs > bsz) {
+ n = realloc(buf, bs);
+ if (!n)
+ goto error;
+ bsz = bs;
+ buf = n;
+ /* read extra we do not know about */
+ ret = fread(buf, bs - bsz, 1, jd->in);
+ if (ret != 1)
+ goto error;
+ }
+ /*
+ * keep dirname for generating files and mmap records
+ */
+ strcpy(jd->dir, name);
+ dirname(jd->dir);
+
+ return 0;
+error:
+ funlockfile(jd->in);
+ fclose(jd->in);
+ return retval;
+}
+
+static union jr_entry *
+jit_get_next_entry(struct jit_buf_desc *jd)
+{
+ struct jr_prefix *prefix;
+ union jr_entry *jr;
+ void *addr;
+ size_t bs, size;
+ int id, ret;
+
+ if (!(jd && jd->in))
+ return NULL;
+
+ if (jd->buf == NULL) {
+ size_t sz = getpagesize();
+ if (sz < sizeof(*prefix))
+ sz = sizeof(*prefix);
+
+ jd->buf = malloc(sz);
+ if (jd->buf == NULL)
+ return NULL;
+
+ jd->bufsize = sz;
+ }
+
+ prefix = jd->buf;
+
+ /*
+ * file is still locked at this point
+ */
+ ret = fread(prefix, sizeof(*prefix), 1, jd->in);
+ if (ret != 1)
+ return NULL;
+
+ if (jd->needs_bswap) {
+ prefix->id = bswap_32(prefix->id);
+ prefix->total_size = bswap_32(prefix->total_size);
+ prefix->timestamp = bswap_64(prefix->timestamp);
+ }
+ id = prefix->id;
+ size = prefix->total_size;
+
+ bs = (size_t)size;
+ if (bs < sizeof(*prefix))
+ return NULL;
+
+ if (id >= JIT_CODE_MAX) {
+ pr_warning("next_entry: unknown record type %d, skipping\n", id);
+ }
+ if (bs > jd->bufsize) {
+ void *n;
+ n = realloc(jd->buf, bs);
+ if (!n)
+ return NULL;
+ jd->buf = n;
+ jd->bufsize = bs;
+ }
+
+ addr = ((void *)jd->buf) + sizeof(*prefix);
+
+ ret = fread(addr, bs - sizeof(*prefix), 1, jd->in);
+ if (ret != 1)
+ return NULL;
+
+ jr = (union jr_entry *)jd->buf;
+
+ switch(id) {
+ case JIT_CODE_DEBUG_INFO:
+ if (jd->needs_bswap) {
+ uint64_t n;
+ jr->info.code_addr = bswap_64(jr->info.code_addr);
+ jr->info.nr_entry = bswap_64(jr->info.nr_entry);
+ for (n = 0 ; n < jr->info.nr_entry; n++) {
+ jr->info.entries[n].addr = bswap_64(jr->info.entries[n].addr);
+ jr->info.entries[n].lineno = bswap_32(jr->info.entries[n].lineno);
+ jr->info.entries[n].discrim = bswap_32(jr->info.entries[n].discrim);
+ }
+ }
+ break;
+ case JIT_CODE_UNWINDING_INFO:
+ if (jd->needs_bswap) {
+ jr->unwinding.unwinding_size = bswap_64(jr->unwinding.unwinding_size);
+ jr->unwinding.eh_frame_hdr_size = bswap_64(jr->unwinding.eh_frame_hdr_size);
+ jr->unwinding.mapped_size = bswap_64(jr->unwinding.mapped_size);
+ }
+ break;
+ case JIT_CODE_CLOSE:
+ break;
+ case JIT_CODE_LOAD:
+ if (jd->needs_bswap) {
+ jr->load.pid = bswap_32(jr->load.pid);
+ jr->load.tid = bswap_32(jr->load.tid);
+ jr->load.vma = bswap_64(jr->load.vma);
+ jr->load.code_addr = bswap_64(jr->load.code_addr);
+ jr->load.code_size = bswap_64(jr->load.code_size);
+ jr->load.code_index= bswap_64(jr->load.code_index);
+ }
+ jd->code_load_count++;
+ break;
+ case JIT_CODE_MOVE:
+ if (jd->needs_bswap) {
+ jr->move.pid = bswap_32(jr->move.pid);
+ jr->move.tid = bswap_32(jr->move.tid);
+ jr->move.vma = bswap_64(jr->move.vma);
+ jr->move.old_code_addr = bswap_64(jr->move.old_code_addr);
+ jr->move.new_code_addr = bswap_64(jr->move.new_code_addr);
+ jr->move.code_size = bswap_64(jr->move.code_size);
+ jr->move.code_index = bswap_64(jr->move.code_index);
+ }
+ break;
+ case JIT_CODE_MAX:
+ default:
+ /* skip unknown record (we have read them) */
+ break;
+ }
+ return jr;
+}
+
+static int
+jit_inject_event(struct jit_buf_desc *jd, union perf_event *event)
+{
+ ssize_t size;
+
+ size = perf_data__write(jd->output, event, event->header.size);
+ if (size < 0)
+ return -1;
+
+ jd->bytes_written += size;
+ return 0;
+}
+
+static uint64_t convert_timestamp(struct jit_buf_desc *jd, uint64_t timestamp)
+{
+ struct perf_tsc_conversion tc;
+
+ if (!jd->use_arch_timestamp)
+ return timestamp;
+
+ tc.time_shift = jd->session->time_conv.time_shift;
+ tc.time_mult = jd->session->time_conv.time_mult;
+ tc.time_zero = jd->session->time_conv.time_zero;
+
+ if (!tc.time_mult)
+ return 0;
+
+ return tsc_to_perf_time(timestamp, &tc);
+}
+
+static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+ struct perf_sample sample;
+ union perf_event *event;
+ struct perf_tool *tool = jd->session->tool;
+ uint64_t code, addr;
+ uintptr_t uaddr;
+ char *filename;
+ struct stat st;
+ size_t size;
+ u16 idr_size;
+ const char *sym;
+ uint64_t count;
+ int ret, csize, usize;
+ pid_t pid, tid;
+ struct {
+ u32 pid, tid;
+ u64 time;
+ } *id;
+
+ pid = jr->load.pid;
+ tid = jr->load.tid;
+ csize = jr->load.code_size;
+ usize = jd->unwinding_mapped_size;
+ addr = jr->load.code_addr;
+ sym = (void *)((unsigned long)jr + sizeof(jr->load));
+ code = (unsigned long)jr + jr->load.p.total_size - csize;
+ count = jr->load.code_index;
+ idr_size = jd->machine->id_hdr_size;
+
+ event = calloc(1, sizeof(*event) + idr_size);
+ if (!event)
+ return -1;
+
+ filename = event->mmap2.filename;
+ size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so",
+ jd->dir,
+ pid,
+ count);
+
+ size++; /* for \0 */
+
+ size = PERF_ALIGN(size, sizeof(u64));
+ uaddr = (uintptr_t)code;
+ ret = jit_emit_elf(filename, sym, addr, (const void *)uaddr, csize, jd->debug_data, jd->nr_debug_entries,
+ jd->unwinding_data, jd->eh_frame_hdr_size, jd->unwinding_size);
+
+ if (jd->debug_data && jd->nr_debug_entries) {
+ free(jd->debug_data);
+ jd->debug_data = NULL;
+ jd->nr_debug_entries = 0;
+ }
+
+ if (jd->unwinding_data && jd->eh_frame_hdr_size) {
+ free(jd->unwinding_data);
+ jd->unwinding_data = NULL;
+ jd->eh_frame_hdr_size = 0;
+ jd->unwinding_mapped_size = 0;
+ jd->unwinding_size = 0;
+ }
+
+ if (ret) {
+ free(event);
+ return -1;
+ }
+ if (stat(filename, &st))
+ memset(&st, 0, sizeof(st));
+
+ event->mmap2.header.type = PERF_RECORD_MMAP2;
+ event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+ event->mmap2.header.size = (sizeof(event->mmap2) -
+ (sizeof(event->mmap2.filename) - size) + idr_size);
+
+ event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+ event->mmap2.start = addr;
+ event->mmap2.len = usize ? ALIGN_8(csize) + usize : csize;
+ event->mmap2.pid = pid;
+ event->mmap2.tid = tid;
+ event->mmap2.ino = st.st_ino;
+ event->mmap2.maj = major(st.st_dev);
+ event->mmap2.min = minor(st.st_dev);
+ event->mmap2.prot = st.st_mode;
+ event->mmap2.flags = MAP_SHARED;
+ event->mmap2.ino_generation = 1;
+
+ id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+ if (jd->sample_type & PERF_SAMPLE_TID) {
+ id->pid = pid;
+ id->tid = tid;
+ }
+ if (jd->sample_type & PERF_SAMPLE_TIME)
+ id->time = convert_timestamp(jd, jr->load.p.timestamp);
+
+ /*
+ * create pseudo sample to induce dso hit increment
+ * use first address as sample address
+ */
+ memset(&sample, 0, sizeof(sample));
+ sample.cpumode = PERF_RECORD_MISC_USER;
+ sample.pid = pid;
+ sample.tid = tid;
+ sample.time = id->time;
+ sample.ip = addr;
+
+ ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+ if (ret)
+ return ret;
+
+ ret = jit_inject_event(jd, event);
+ /*
+ * mark dso as use to generate buildid in the header
+ */
+ if (!ret)
+ build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+ return ret;
+}
+
+static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+ struct perf_sample sample;
+ union perf_event *event;
+ struct perf_tool *tool = jd->session->tool;
+ char *filename;
+ size_t size;
+ struct stat st;
+ int usize;
+ u16 idr_size;
+ int ret;
+ pid_t pid, tid;
+ struct {
+ u32 pid, tid;
+ u64 time;
+ } *id;
+
+ pid = jr->move.pid;
+ tid = jr->move.tid;
+ usize = jd->unwinding_mapped_size;
+ idr_size = jd->machine->id_hdr_size;
+
+ /*
+ * +16 to account for sample_id_all (hack)
+ */
+ event = calloc(1, sizeof(*event) + 16);
+ if (!event)
+ return -1;
+
+ filename = event->mmap2.filename;
+ size = snprintf(filename, PATH_MAX, "%s/jitted-%d-%" PRIu64 ".so",
+ jd->dir,
+ pid,
+ jr->move.code_index);
+
+ size++; /* for \0 */
+
+ if (stat(filename, &st))
+ memset(&st, 0, sizeof(st));
+
+ size = PERF_ALIGN(size, sizeof(u64));
+
+ event->mmap2.header.type = PERF_RECORD_MMAP2;
+ event->mmap2.header.misc = PERF_RECORD_MISC_USER;
+ event->mmap2.header.size = (sizeof(event->mmap2) -
+ (sizeof(event->mmap2.filename) - size) + idr_size);
+ event->mmap2.pgoff = GEN_ELF_TEXT_OFFSET;
+ event->mmap2.start = jr->move.new_code_addr;
+ event->mmap2.len = usize ? ALIGN_8(jr->move.code_size) + usize
+ : jr->move.code_size;
+ event->mmap2.pid = pid;
+ event->mmap2.tid = tid;
+ event->mmap2.ino = st.st_ino;
+ event->mmap2.maj = major(st.st_dev);
+ event->mmap2.min = minor(st.st_dev);
+ event->mmap2.prot = st.st_mode;
+ event->mmap2.flags = MAP_SHARED;
+ event->mmap2.ino_generation = 1;
+
+ id = (void *)((unsigned long)event + event->mmap.header.size - idr_size);
+ if (jd->sample_type & PERF_SAMPLE_TID) {
+ id->pid = pid;
+ id->tid = tid;
+ }
+ if (jd->sample_type & PERF_SAMPLE_TIME)
+ id->time = convert_timestamp(jd, jr->load.p.timestamp);
+
+ /*
+ * create pseudo sample to induce dso hit increment
+ * use first address as sample address
+ */
+ memset(&sample, 0, sizeof(sample));
+ sample.cpumode = PERF_RECORD_MISC_USER;
+ sample.pid = pid;
+ sample.tid = tid;
+ sample.time = id->time;
+ sample.ip = jr->move.new_code_addr;
+
+ ret = perf_event__process_mmap2(tool, event, &sample, jd->machine);
+ if (ret)
+ return ret;
+
+ ret = jit_inject_event(jd, event);
+ if (!ret)
+ build_id__mark_dso_hit(tool, event, &sample, NULL, jd->machine);
+
+ return ret;
+}
+
+static int jit_repipe_debug_info(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+ void *data;
+ size_t sz;
+
+ if (!(jd && jr))
+ return -1;
+
+ sz = jr->prefix.total_size - sizeof(jr->info);
+ data = malloc(sz);
+ if (!data)
+ return -1;
+
+ memcpy(data, &jr->info.entries, sz);
+
+ jd->debug_data = data;
+
+ /*
+ * we must use nr_entry instead of size here because
+ * we cannot distinguish actual entry from padding otherwise
+ */
+ jd->nr_debug_entries = jr->info.nr_entry;
+
+ return 0;
+}
+
+static int
+jit_repipe_unwinding_info(struct jit_buf_desc *jd, union jr_entry *jr)
+{
+ void *unwinding_data;
+ uint32_t unwinding_data_size;
+
+ if (!(jd && jr))
+ return -1;
+
+ unwinding_data_size = jr->prefix.total_size - sizeof(jr->unwinding);
+ unwinding_data = malloc(unwinding_data_size);
+ if (!unwinding_data)
+ return -1;
+
+ memcpy(unwinding_data, &jr->unwinding.unwinding_data,
+ unwinding_data_size);
+
+ jd->eh_frame_hdr_size = jr->unwinding.eh_frame_hdr_size;
+ jd->unwinding_size = jr->unwinding.unwinding_size;
+ jd->unwinding_mapped_size = jr->unwinding.mapped_size;
+ jd->unwinding_data = unwinding_data;
+
+ return 0;
+}
+
+static int
+jit_process_dump(struct jit_buf_desc *jd)
+{
+ union jr_entry *jr;
+ int ret = 0;
+
+ while ((jr = jit_get_next_entry(jd))) {
+ switch(jr->prefix.id) {
+ case JIT_CODE_LOAD:
+ ret = jit_repipe_code_load(jd, jr);
+ break;
+ case JIT_CODE_MOVE:
+ ret = jit_repipe_code_move(jd, jr);
+ break;
+ case JIT_CODE_DEBUG_INFO:
+ ret = jit_repipe_debug_info(jd, jr);
+ break;
+ case JIT_CODE_UNWINDING_INFO:
+ ret = jit_repipe_unwinding_info(jd, jr);
+ break;
+ default:
+ ret = 0;
+ continue;
+ }
+ }
+ return ret;
+}
+
+static int
+jit_inject(struct jit_buf_desc *jd, char *path)
+{
+ int ret;
+
+ if (verbose > 0)
+ fprintf(stderr, "injecting: %s\n", path);
+
+ ret = jit_open(jd, path);
+ if (ret)
+ return -1;
+
+ ret = jit_process_dump(jd);
+
+ jit_close(jd);
+
+ if (verbose > 0)
+ fprintf(stderr, "injected: %s (%d)\n", path, ret);
+
+ return 0;
+}
+
+/*
+ * File must be with pattern .../jit-XXXX.dump
+ * where XXXX is the PID of the process which did the mmap()
+ * as captured in the RECORD_MMAP record
+ */
+static int
+jit_detect(char *mmap_name, pid_t pid)
+ {
+ char *p;
+ char *end = NULL;
+ pid_t pid2;
+
+ if (verbose > 2)
+ fprintf(stderr, "jit marker trying : %s\n", mmap_name);
+ /*
+ * get file name
+ */
+ p = strrchr(mmap_name, '/');
+ if (!p)
+ return -1;
+
+ /*
+ * match prefix
+ */
+ if (strncmp(p, "/jit-", 5))
+ return -1;
+
+ /*
+ * skip prefix
+ */
+ p += 5;
+
+ /*
+ * must be followed by a pid
+ */
+ if (!isdigit(*p))
+ return -1;
+
+ pid2 = (int)strtol(p, &end, 10);
+ if (!end)
+ return -1;
+
+ /*
+ * pid does not match mmap pid
+ * pid==0 in system-wide mode (synthesized)
+ */
+ if (pid && pid2 != pid)
+ return -1;
+ /*
+ * validate suffix
+ */
+ if (strcmp(end, ".dump"))
+ return -1;
+
+ if (verbose > 0)
+ fprintf(stderr, "jit marker found: %s\n", mmap_name);
+
+ return 0;
+}
+
+int
+jit_process(struct perf_session *session,
+ struct perf_data *output,
+ struct machine *machine,
+ char *filename,
+ pid_t pid,
+ u64 *nbytes)
+{
+ struct perf_evsel *first;
+ struct jit_buf_desc jd;
+ int ret;
+
+ /*
+ * first, detect marker mmap (i.e., the jitdump mmap)
+ */
+ if (jit_detect(filename, pid))
+ return 0;
+
+ memset(&jd, 0, sizeof(jd));
+
+ jd.session = session;
+ jd.output = output;
+ jd.machine = machine;
+
+ /*
+ * track sample_type to compute id_all layout
+ * perf sets the same sample type to all events as of now
+ */
+ first = perf_evlist__first(session->evlist);
+ jd.sample_type = first->attr.sample_type;
+
+ *nbytes = 0;
+
+ ret = jit_inject(&jd, filename);
+ if (!ret) {
+ *nbytes = jd.bytes_written;
+ ret = 1;
+ }
+
+ return ret;
+}
diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h
new file mode 100644
index 000000000..c6b9b67f4
--- /dev/null
+++ b/tools/perf/util/jitdump.h
@@ -0,0 +1,139 @@
+/*
+ * jitdump.h: jitted code info encapsulation file format
+ *
+ * Adapted from OProfile GPLv2 support jidump.h:
+ * Copyright 2007 OProfile authors
+ * Jens Wilke
+ * Daniel Hansel
+ * Copyright IBM Corporation 2007
+ */
+#ifndef JITDUMP_H
+#define JITDUMP_H
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdint.h>
+
+/* JiTD */
+#define JITHEADER_MAGIC 0x4A695444
+#define JITHEADER_MAGIC_SW 0x4454694A
+
+#define PADDING_8ALIGNED(x) ((((x) + 7) & 7) ^ 7)
+#define ALIGN_8(x) (((x) + 7) & (~7))
+
+#define JITHEADER_VERSION 1
+
+enum jitdump_flags_bits {
+ JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT,
+ JITDUMP_FLAGS_MAX_BIT,
+};
+
+#define JITDUMP_FLAGS_ARCH_TIMESTAMP (1ULL << JITDUMP_FLAGS_ARCH_TIMESTAMP_BIT)
+
+#define JITDUMP_FLAGS_RESERVED (JITDUMP_FLAGS_MAX_BIT < 64 ? \
+ (~((1ULL << JITDUMP_FLAGS_MAX_BIT) - 1)) : 0)
+
+struct jitheader {
+ uint32_t magic; /* characters "jItD" */
+ uint32_t version; /* header version */
+ uint32_t total_size; /* total size of header */
+ uint32_t elf_mach; /* elf mach target */
+ uint32_t pad1; /* reserved */
+ uint32_t pid; /* JIT process id */
+ uint64_t timestamp; /* timestamp */
+ uint64_t flags; /* flags */
+};
+
+enum jit_record_type {
+ JIT_CODE_LOAD = 0,
+ JIT_CODE_MOVE = 1,
+ JIT_CODE_DEBUG_INFO = 2,
+ JIT_CODE_CLOSE = 3,
+ JIT_CODE_UNWINDING_INFO = 4,
+
+ JIT_CODE_MAX,
+};
+
+/* record prefix (mandatory in each record) */
+struct jr_prefix {
+ uint32_t id;
+ uint32_t total_size;
+ uint64_t timestamp;
+};
+
+struct jr_code_load {
+ struct jr_prefix p;
+
+ uint32_t pid;
+ uint32_t tid;
+ uint64_t vma;
+ uint64_t code_addr;
+ uint64_t code_size;
+ uint64_t code_index;
+};
+
+struct jr_code_close {
+ struct jr_prefix p;
+};
+
+struct jr_code_move {
+ struct jr_prefix p;
+
+ uint32_t pid;
+ uint32_t tid;
+ uint64_t vma;
+ uint64_t old_code_addr;
+ uint64_t new_code_addr;
+ uint64_t code_size;
+ uint64_t code_index;
+};
+
+struct debug_entry {
+ uint64_t addr;
+ int lineno; /* source line number starting at 1 */
+ int discrim; /* column discriminator, 0 is default */
+ const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */
+};
+
+struct jr_code_debug_info {
+ struct jr_prefix p;
+
+ uint64_t code_addr;
+ uint64_t nr_entry;
+ struct debug_entry entries[0];
+};
+
+struct jr_code_unwinding_info {
+ struct jr_prefix p;
+
+ uint64_t unwinding_size;
+ uint64_t eh_frame_hdr_size;
+ uint64_t mapped_size;
+ const char unwinding_data[0];
+};
+
+union jr_entry {
+ struct jr_code_debug_info info;
+ struct jr_code_close close;
+ struct jr_code_load load;
+ struct jr_code_move move;
+ struct jr_prefix prefix;
+ struct jr_code_unwinding_info unwinding;
+};
+
+static inline struct debug_entry *
+debug_entry_next(struct debug_entry *ent)
+{
+ void *a = ent + 1;
+ size_t l = strlen(ent->name) + 1;
+ return a + l;
+}
+
+static inline char *
+debug_entry_file(struct debug_entry *ent)
+{
+ void *a = ent + 1;
+ return a;
+}
+
+#endif /* !JITDUMP_H */
diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h
new file mode 100644
index 000000000..7b1f06567
--- /dev/null
+++ b/tools/perf/util/kvm-stat.h
@@ -0,0 +1,147 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_KVM_STAT_H
+#define __PERF_KVM_STAT_H
+
+#include "../perf.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "session.h"
+#include "tool.h"
+#include "stat.h"
+
+struct event_key {
+ #define INVALID_KEY (~0ULL)
+ u64 key;
+ int info;
+ struct exit_reasons_table *exit_reasons;
+};
+
+struct kvm_event_stats {
+ u64 time;
+ struct stats stats;
+};
+
+struct kvm_event {
+ struct list_head hash_entry;
+ struct rb_node rb;
+
+ struct event_key key;
+
+ struct kvm_event_stats total;
+
+ #define DEFAULT_VCPU_NUM 8
+ int max_vcpu;
+ struct kvm_event_stats *vcpu;
+};
+
+typedef int (*key_cmp_fun)(struct kvm_event*, struct kvm_event*, int);
+
+struct kvm_event_key {
+ const char *name;
+ key_cmp_fun key;
+};
+
+struct perf_kvm_stat;
+
+struct child_event_ops {
+ void (*get_key)(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key);
+ const char *name;
+};
+
+struct kvm_events_ops {
+ bool (*is_begin_event)(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key);
+ bool (*is_end_event)(struct perf_evsel *evsel,
+ struct perf_sample *sample, struct event_key *key);
+ struct child_event_ops *child_ops;
+ void (*decode_key)(struct perf_kvm_stat *kvm, struct event_key *key,
+ char *decode);
+ const char *name;
+};
+
+struct exit_reasons_table {
+ unsigned long exit_code;
+ const char *reason;
+};
+
+#define EVENTS_BITS 12
+#define EVENTS_CACHE_SIZE (1UL << EVENTS_BITS)
+
+struct perf_kvm_stat {
+ struct perf_tool tool;
+ struct record_opts opts;
+ struct perf_evlist *evlist;
+ struct perf_session *session;
+
+ const char *file_name;
+ const char *report_event;
+ const char *sort_key;
+ int trace_vcpu;
+
+ struct exit_reasons_table *exit_reasons;
+ const char *exit_reasons_isa;
+
+ struct kvm_events_ops *events_ops;
+ key_cmp_fun compare;
+ struct list_head kvm_events_cache[EVENTS_CACHE_SIZE];
+
+ u64 total_time;
+ u64 total_count;
+ u64 lost_events;
+ u64 duration;
+
+ struct intlist *pid_list;
+
+ struct rb_root result;
+
+ int timerfd;
+ unsigned int display_time;
+ bool live;
+ bool force;
+};
+
+struct kvm_reg_events_ops {
+ const char *name;
+ struct kvm_events_ops *ops;
+};
+
+void exit_event_get_key(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key);
+bool exit_event_begin(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key);
+bool exit_event_end(struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct event_key *key);
+void exit_event_decode_key(struct perf_kvm_stat *kvm,
+ struct event_key *key,
+ char *decode);
+
+bool kvm_exit_event(struct perf_evsel *evsel);
+bool kvm_entry_event(struct perf_evsel *evsel);
+int setup_kvm_events_tp(struct perf_kvm_stat *kvm);
+
+#define define_exit_reasons_table(name, symbols) \
+ static struct exit_reasons_table name[] = { \
+ symbols, { -1, NULL } \
+ }
+
+/*
+ * arch specific callbacks and data structures
+ */
+int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid);
+
+extern const char *kvm_events_tp[];
+extern struct kvm_reg_events_ops kvm_reg_events_ops[];
+extern const char * const kvm_skip_events[];
+extern const char *vcpu_id_str;
+extern const int decode_str_len;
+extern const char *kvm_exit_reason;
+extern const char *kvm_entry_trace;
+extern const char *kvm_exit_trace;
+
+#endif /* __PERF_KVM_STAT_H */
diff --git a/tools/perf/util/levenshtein.c b/tools/perf/util/levenshtein.c
new file mode 100644
index 000000000..a217ecf03
--- /dev/null
+++ b/tools/perf/util/levenshtein.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "levenshtein.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*
+ * This function implements the Damerau-Levenshtein algorithm to
+ * calculate a distance between strings.
+ *
+ * Basically, it says how many letters need to be swapped, substituted,
+ * deleted from, or added to string1, at least, to get string2.
+ *
+ * The idea is to build a distance matrix for the substrings of both
+ * strings. To avoid a large space complexity, only the last three rows
+ * are kept in memory (if swaps had the same or higher cost as one deletion
+ * plus one insertion, only two rows would be needed).
+ *
+ * At any stage, "i + 1" denotes the length of the current substring of
+ * string1 that the distance is calculated for.
+ *
+ * row2 holds the current row, row1 the previous row (i.e. for the substring
+ * of string1 of length "i"), and row0 the row before that.
+ *
+ * In other words, at the start of the big loop, row2[j + 1] contains the
+ * Damerau-Levenshtein distance between the substring of string1 of length
+ * "i" and the substring of string2 of length "j + 1".
+ *
+ * All the big loop does is determine the partial minimum-cost paths.
+ *
+ * It does so by calculating the costs of the path ending in characters
+ * i (in string1) and j (in string2), respectively, given that the last
+ * operation is a substition, a swap, a deletion, or an insertion.
+ *
+ * This implementation allows the costs to be weighted:
+ *
+ * - w (as in "sWap")
+ * - s (as in "Substitution")
+ * - a (for insertion, AKA "Add")
+ * - d (as in "Deletion")
+ *
+ * Note that this algorithm calculates a distance _iff_ d == a.
+ */
+int levenshtein(const char *string1, const char *string2,
+ int w, int s, int a, int d)
+{
+ int len1 = strlen(string1), len2 = strlen(string2);
+ int *row0 = malloc(sizeof(int) * (len2 + 1));
+ int *row1 = malloc(sizeof(int) * (len2 + 1));
+ int *row2 = malloc(sizeof(int) * (len2 + 1));
+ int i, j;
+
+ for (j = 0; j <= len2; j++)
+ row1[j] = j * a;
+ for (i = 0; i < len1; i++) {
+ int *dummy;
+
+ row2[0] = (i + 1) * d;
+ for (j = 0; j < len2; j++) {
+ /* substitution */
+ row2[j + 1] = row1[j] + s * (string1[i] != string2[j]);
+ /* swap */
+ if (i > 0 && j > 0 && string1[i - 1] == string2[j] &&
+ string1[i] == string2[j - 1] &&
+ row2[j + 1] > row0[j - 1] + w)
+ row2[j + 1] = row0[j - 1] + w;
+ /* deletion */
+ if (row2[j + 1] > row1[j + 1] + d)
+ row2[j + 1] = row1[j + 1] + d;
+ /* insertion */
+ if (row2[j + 1] > row2[j] + a)
+ row2[j + 1] = row2[j] + a;
+ }
+
+ dummy = row0;
+ row0 = row1;
+ row1 = row2;
+ row2 = dummy;
+ }
+
+ i = row1[len2];
+ free(row0);
+ free(row1);
+ free(row2);
+
+ return i;
+}
diff --git a/tools/perf/util/levenshtein.h b/tools/perf/util/levenshtein.h
new file mode 100644
index 000000000..34ca173c5
--- /dev/null
+++ b/tools/perf/util/levenshtein.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_LEVENSHTEIN_H
+#define __PERF_LEVENSHTEIN_H
+
+int levenshtein(const char *string1, const char *string2,
+ int swap_penalty, int substition_penalty,
+ int insertion_penalty, int deletion_penalty);
+
+#endif /* __PERF_LEVENSHTEIN_H */
diff --git a/tools/perf/util/libunwind/arm64.c b/tools/perf/util/libunwind/arm64.c
new file mode 100644
index 000000000..66756e6be
--- /dev/null
+++ b/tools/perf/util/libunwind/arm64.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the defination of this function is included directly from
+ * 'arch/arm64/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each arm64 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+/* Define arch specific functions & regs for libunwind, should be
+ * defined before including "unwind.h"
+ */
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arm64_reg_id(regnum)
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_ARM64_PC
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_ARM64_SP
+
+#include "unwind.h"
+#include "debug.h"
+#include "libunwind-aarch64.h"
+#include <../../../../arch/arm64/include/uapi/asm/perf_regs.h>
+#include "../../arch/arm64/util/unwind-libunwind.c"
+
+/* NO_LIBUNWIND_DEBUG_FRAME is a feature flag for local libunwind,
+ * assign NO_LIBUNWIND_DEBUG_FRAME_AARCH64 to it for compiling arm64
+ * unwind methods.
+ */
+#undef NO_LIBUNWIND_DEBUG_FRAME
+#ifdef NO_LIBUNWIND_DEBUG_FRAME_AARCH64
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+arm64_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/libunwind/x86_32.c b/tools/perf/util/libunwind/x86_32.c
new file mode 100644
index 000000000..c5e568188
--- /dev/null
+++ b/tools/perf/util/libunwind/x86_32.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This file setups defines to compile arch specific binary from the
+ * generic one.
+ *
+ * The function 'LIBUNWIND__ARCH_REG_ID' name is set according to arch
+ * name and the defination of this function is included directly from
+ * 'arch/x86/util/unwind-libunwind.c', to make sure that this function
+ * is defined no matter what arch the host is.
+ *
+ * Finally, the arch specific unwind methods are exported which will
+ * be assigned to each x86 thread.
+ */
+
+#define REMOTE_UNWIND_LIBUNWIND
+
+/* Define arch specific functions & regs for libunwind, should be
+ * defined before including "unwind.h"
+ */
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__x86_reg_id(regnum)
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_X86_IP
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_X86_SP
+
+#include "unwind.h"
+#include "debug.h"
+#include "libunwind-x86.h"
+#include <../../../../arch/x86/include/uapi/asm/perf_regs.h>
+
+/* HAVE_ARCH_X86_64_SUPPORT is used in'arch/x86/util/unwind-libunwind.c'
+ * for x86_32, we undef it to compile code for x86_32 only.
+ */
+#undef HAVE_ARCH_X86_64_SUPPORT
+#include "../../arch/x86/util/unwind-libunwind.c"
+
+/* Explicitly define NO_LIBUNWIND_DEBUG_FRAME, because non-ARM has no
+ * dwarf_find_debug_frame() function.
+ */
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+#define NO_LIBUNWIND_DEBUG_FRAME
+#endif
+#include "util/unwind-libunwind-local.c"
+
+struct unwind_libunwind_ops *
+x86_32_unwind_libunwind_ops = &_unwind_libunwind_ops;
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
new file mode 100644
index 000000000..46ec9a1bb
--- /dev/null
+++ b/tools/perf/util/llvm-utils.c
@@ -0,0 +1,579 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/err.h>
+#include "debug.h"
+#include "llvm-utils.h"
+#include "config.h"
+#include "util.h"
+#include <sys/wait.h>
+#include <subcmd/exec-cmd.h>
+
+#define CLANG_BPF_CMD_DEFAULT_TEMPLATE \
+ "$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
+ "-DLINUX_VERSION_CODE=$LINUX_VERSION_CODE " \
+ "$CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS " \
+ "-Wno-unused-value -Wno-pointer-sign " \
+ "-working-directory $WORKING_DIR " \
+ "-c \"$CLANG_SOURCE\" -target bpf $CLANG_EMIT_LLVM -O2 -o - $LLVM_OPTIONS_PIPE"
+
+struct llvm_param llvm_param = {
+ .clang_path = "clang",
+ .llc_path = "llc",
+ .clang_bpf_cmd_template = CLANG_BPF_CMD_DEFAULT_TEMPLATE,
+ .clang_opt = NULL,
+ .opts = NULL,
+ .kbuild_dir = NULL,
+ .kbuild_opts = NULL,
+ .user_set_param = false,
+};
+
+int perf_llvm_config(const char *var, const char *value)
+{
+ if (!strstarts(var, "llvm."))
+ return 0;
+ var += sizeof("llvm.") - 1;
+
+ if (!strcmp(var, "clang-path"))
+ llvm_param.clang_path = strdup(value);
+ else if (!strcmp(var, "clang-bpf-cmd-template"))
+ llvm_param.clang_bpf_cmd_template = strdup(value);
+ else if (!strcmp(var, "clang-opt"))
+ llvm_param.clang_opt = strdup(value);
+ else if (!strcmp(var, "kbuild-dir"))
+ llvm_param.kbuild_dir = strdup(value);
+ else if (!strcmp(var, "kbuild-opts"))
+ llvm_param.kbuild_opts = strdup(value);
+ else if (!strcmp(var, "dump-obj"))
+ llvm_param.dump_obj = !!perf_config_bool(var, value);
+ else if (!strcmp(var, "opts"))
+ llvm_param.opts = strdup(value);
+ else {
+ pr_debug("Invalid LLVM config option: %s\n", value);
+ return -1;
+ }
+ llvm_param.user_set_param = true;
+ return 0;
+}
+
+static int
+search_program(const char *def, const char *name,
+ char *output)
+{
+ char *env, *path, *tmp = NULL;
+ char buf[PATH_MAX];
+ int ret;
+
+ output[0] = '\0';
+ if (def && def[0] != '\0') {
+ if (def[0] == '/') {
+ if (access(def, F_OK) == 0) {
+ strlcpy(output, def, PATH_MAX);
+ return 0;
+ }
+ } else if (def[0] != '\0')
+ name = def;
+ }
+
+ env = getenv("PATH");
+ if (!env)
+ return -1;
+ env = strdup(env);
+ if (!env)
+ return -1;
+
+ ret = -ENOENT;
+ path = strtok_r(env, ":", &tmp);
+ while (path) {
+ scnprintf(buf, sizeof(buf), "%s/%s", path, name);
+ if (access(buf, F_OK) == 0) {
+ strlcpy(output, buf, PATH_MAX);
+ ret = 0;
+ break;
+ }
+ path = strtok_r(NULL, ":", &tmp);
+ }
+
+ free(env);
+ return ret;
+}
+
+#define READ_SIZE 4096
+static int
+read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
+{
+ int err = 0;
+ void *buf = NULL;
+ FILE *file = NULL;
+ size_t read_sz = 0, buf_sz = 0;
+ char serr[STRERR_BUFSIZE];
+
+ file = popen(cmd, "r");
+ if (!file) {
+ pr_err("ERROR: unable to popen cmd: %s\n",
+ str_error_r(errno, serr, sizeof(serr)));
+ return -EINVAL;
+ }
+
+ while (!feof(file) && !ferror(file)) {
+ /*
+ * Make buf_sz always have obe byte extra space so we
+ * can put '\0' there.
+ */
+ if (buf_sz - read_sz < READ_SIZE + 1) {
+ void *new_buf;
+
+ buf_sz = read_sz + READ_SIZE + 1;
+ new_buf = realloc(buf, buf_sz);
+
+ if (!new_buf) {
+ pr_err("ERROR: failed to realloc memory\n");
+ err = -ENOMEM;
+ goto errout;
+ }
+
+ buf = new_buf;
+ }
+ read_sz += fread(buf + read_sz, 1, READ_SIZE, file);
+ }
+
+ if (buf_sz - read_sz < 1) {
+ pr_err("ERROR: internal error\n");
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (ferror(file)) {
+ pr_err("ERROR: error occurred when reading from pipe: %s\n",
+ str_error_r(errno, serr, sizeof(serr)));
+ err = -EIO;
+ goto errout;
+ }
+
+ err = WEXITSTATUS(pclose(file));
+ file = NULL;
+ if (err) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ /*
+ * If buf is string, give it terminal '\0' to make our life
+ * easier. If buf is not string, that '\0' is out of space
+ * indicated by read_sz so caller won't even notice it.
+ */
+ ((char *)buf)[read_sz] = '\0';
+
+ if (!p_buf)
+ free(buf);
+ else
+ *p_buf = buf;
+
+ if (p_read_sz)
+ *p_read_sz = read_sz;
+ return 0;
+
+errout:
+ if (file)
+ pclose(file);
+ free(buf);
+ if (p_buf)
+ *p_buf = NULL;
+ if (p_read_sz)
+ *p_read_sz = 0;
+ return err;
+}
+
+static inline void
+force_set_env(const char *var, const char *value)
+{
+ if (value) {
+ setenv(var, value, 1);
+ pr_debug("set env: %s=%s\n", var, value);
+ } else {
+ unsetenv(var);
+ pr_debug("unset env: %s\n", var);
+ }
+}
+
+static void
+version_notice(void)
+{
+ pr_err(
+" \tLLVM 3.7 or newer is required. Which can be found from http://llvm.org\n"
+" \tYou may want to try git trunk:\n"
+" \t\tgit clone http://llvm.org/git/llvm.git\n"
+" \t\t and\n"
+" \t\tgit clone http://llvm.org/git/clang.git\n\n"
+" \tOr fetch the latest clang/llvm 3.7 from pre-built llvm packages for\n"
+" \tdebian/ubuntu:\n"
+" \t\thttp://llvm.org/apt\n\n"
+" \tIf you are using old version of clang, change 'clang-bpf-cmd-template'\n"
+" \toption in [llvm] section of ~/.perfconfig to:\n\n"
+" \t \"$CLANG_EXEC $CLANG_OPTIONS $KERNEL_INC_OPTIONS $PERF_BPF_INC_OPTIONS \\\n"
+" \t -working-directory $WORKING_DIR -c $CLANG_SOURCE \\\n"
+" \t -emit-llvm -o - | /path/to/llc -march=bpf -filetype=obj -o -\"\n"
+" \t(Replace /path/to/llc with path to your llc)\n\n"
+);
+}
+
+static int detect_kbuild_dir(char **kbuild_dir)
+{
+ const char *test_dir = llvm_param.kbuild_dir;
+ const char *prefix_dir = "";
+ const char *suffix_dir = "";
+
+ /* _UTSNAME_LENGTH is 65 */
+ char release[128];
+
+ char *autoconf_path;
+
+ int err;
+
+ if (!test_dir) {
+ err = fetch_kernel_version(NULL, release,
+ sizeof(release));
+ if (err)
+ return -EINVAL;
+
+ test_dir = release;
+ prefix_dir = "/lib/modules/";
+ suffix_dir = "/build";
+ }
+
+ err = asprintf(&autoconf_path, "%s%s%s/include/generated/autoconf.h",
+ prefix_dir, test_dir, suffix_dir);
+ if (err < 0)
+ return -ENOMEM;
+
+ if (access(autoconf_path, R_OK) == 0) {
+ free(autoconf_path);
+
+ err = asprintf(kbuild_dir, "%s%s%s", prefix_dir, test_dir,
+ suffix_dir);
+ if (err < 0)
+ return -ENOMEM;
+ return 0;
+ }
+ free(autoconf_path);
+ return -ENOENT;
+}
+
+static const char *kinc_fetch_script =
+"#!/usr/bin/env sh\n"
+"if ! test -d \"$KBUILD_DIR\"\n"
+"then\n"
+" exit 1\n"
+"fi\n"
+"if ! test -f \"$KBUILD_DIR/include/generated/autoconf.h\"\n"
+"then\n"
+" exit 1\n"
+"fi\n"
+"TMPDIR=`mktemp -d`\n"
+"if test -z \"$TMPDIR\"\n"
+"then\n"
+" exit 1\n"
+"fi\n"
+"cat << EOF > $TMPDIR/Makefile\n"
+"obj-y := dummy.o\n"
+"\\$(obj)/%.o: \\$(src)/%.c\n"
+"\t@echo -n \"\\$(NOSTDINC_FLAGS) \\$(LINUXINCLUDE) \\$(EXTRA_CFLAGS)\"\n"
+"EOF\n"
+"touch $TMPDIR/dummy.c\n"
+"make -s -C $KBUILD_DIR M=$TMPDIR $KBUILD_OPTS dummy.o 2>/dev/null\n"
+"RET=$?\n"
+"rm -rf $TMPDIR\n"
+"exit $RET\n";
+
+void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts)
+{
+ static char *saved_kbuild_dir;
+ static char *saved_kbuild_include_opts;
+ int err;
+
+ if (!kbuild_dir || !kbuild_include_opts)
+ return;
+
+ *kbuild_dir = NULL;
+ *kbuild_include_opts = NULL;
+
+ if (saved_kbuild_dir && saved_kbuild_include_opts &&
+ !IS_ERR(saved_kbuild_dir) && !IS_ERR(saved_kbuild_include_opts)) {
+ *kbuild_dir = strdup(saved_kbuild_dir);
+ *kbuild_include_opts = strdup(saved_kbuild_include_opts);
+
+ if (*kbuild_dir && *kbuild_include_opts)
+ return;
+
+ zfree(kbuild_dir);
+ zfree(kbuild_include_opts);
+ /*
+ * Don't fall through: it may breaks saved_kbuild_dir and
+ * saved_kbuild_include_opts if detect them again when
+ * memory is low.
+ */
+ return;
+ }
+
+ if (llvm_param.kbuild_dir && !llvm_param.kbuild_dir[0]) {
+ pr_debug("[llvm.kbuild-dir] is set to \"\" deliberately.\n");
+ pr_debug("Skip kbuild options detection.\n");
+ goto errout;
+ }
+
+ err = detect_kbuild_dir(kbuild_dir);
+ if (err) {
+ pr_warning(
+"WARNING:\tunable to get correct kernel building directory.\n"
+"Hint:\tSet correct kbuild directory using 'kbuild-dir' option in [llvm]\n"
+" \tsection of ~/.perfconfig or set it to \"\" to suppress kbuild\n"
+" \tdetection.\n\n");
+ goto errout;
+ }
+
+ pr_debug("Kernel build dir is set to %s\n", *kbuild_dir);
+ force_set_env("KBUILD_DIR", *kbuild_dir);
+ force_set_env("KBUILD_OPTS", llvm_param.kbuild_opts);
+ err = read_from_pipe(kinc_fetch_script,
+ (void **)kbuild_include_opts,
+ NULL);
+ if (err) {
+ pr_warning(
+"WARNING:\tunable to get kernel include directories from '%s'\n"
+"Hint:\tTry set clang include options using 'clang-bpf-cmd-template'\n"
+" \toption in [llvm] section of ~/.perfconfig and set 'kbuild-dir'\n"
+" \toption in [llvm] to \"\" to suppress this detection.\n\n",
+ *kbuild_dir);
+
+ free(*kbuild_dir);
+ *kbuild_dir = NULL;
+ goto errout;
+ }
+
+ pr_debug("include option is set to %s\n", *kbuild_include_opts);
+
+ saved_kbuild_dir = strdup(*kbuild_dir);
+ saved_kbuild_include_opts = strdup(*kbuild_include_opts);
+
+ if (!saved_kbuild_dir || !saved_kbuild_include_opts) {
+ zfree(&saved_kbuild_dir);
+ zfree(&saved_kbuild_include_opts);
+ }
+ return;
+errout:
+ saved_kbuild_dir = ERR_PTR(-EINVAL);
+ saved_kbuild_include_opts = ERR_PTR(-EINVAL);
+}
+
+int llvm__get_nr_cpus(void)
+{
+ static int nr_cpus_avail = 0;
+ char serr[STRERR_BUFSIZE];
+
+ if (nr_cpus_avail > 0)
+ return nr_cpus_avail;
+
+ nr_cpus_avail = sysconf(_SC_NPROCESSORS_CONF);
+ if (nr_cpus_avail <= 0) {
+ pr_err(
+"WARNING:\tunable to get available CPUs in this system: %s\n"
+" \tUse 128 instead.\n", str_error_r(errno, serr, sizeof(serr)));
+ nr_cpus_avail = 128;
+ }
+ return nr_cpus_avail;
+}
+
+void llvm__dump_obj(const char *path, void *obj_buf, size_t size)
+{
+ char *obj_path = strdup(path);
+ FILE *fp;
+ char *p;
+
+ if (!obj_path) {
+ pr_warning("WARNING: Not enough memory, skip object dumping\n");
+ return;
+ }
+
+ p = strrchr(obj_path, '.');
+ if (!p || (strcmp(p, ".c") != 0)) {
+ pr_warning("WARNING: invalid llvm source path: '%s', skip object dumping\n",
+ obj_path);
+ goto out;
+ }
+
+ p[1] = 'o';
+ fp = fopen(obj_path, "wb");
+ if (!fp) {
+ pr_warning("WARNING: failed to open '%s': %s, skip object dumping\n",
+ obj_path, strerror(errno));
+ goto out;
+ }
+
+ pr_info("LLVM: dumping %s\n", obj_path);
+ if (fwrite(obj_buf, size, 1, fp) != 1)
+ pr_warning("WARNING: failed to write to file '%s': %s, skip object dumping\n",
+ obj_path, strerror(errno));
+ fclose(fp);
+out:
+ free(obj_path);
+}
+
+int llvm__compile_bpf(const char *path, void **p_obj_buf,
+ size_t *p_obj_buf_sz)
+{
+ size_t obj_buf_sz;
+ void *obj_buf = NULL;
+ int err, nr_cpus_avail;
+ unsigned int kernel_version;
+ char linux_version_code_str[64];
+ const char *clang_opt = llvm_param.clang_opt;
+ char clang_path[PATH_MAX], llc_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64];
+ char serr[STRERR_BUFSIZE];
+ char *kbuild_dir = NULL, *kbuild_include_opts = NULL,
+ *perf_bpf_include_opts = NULL;
+ const char *template = llvm_param.clang_bpf_cmd_template;
+ char *pipe_template = NULL;
+ const char *opts = llvm_param.opts;
+ char *command_echo = NULL, *command_out;
+ char *perf_include_dir = system_path(PERF_INCLUDE_DIR);
+
+ if (path[0] != '-' && realpath(path, abspath) == NULL) {
+ err = errno;
+ pr_err("ERROR: problems with path %s: %s\n",
+ path, str_error_r(err, serr, sizeof(serr)));
+ return -err;
+ }
+
+ if (!template)
+ template = CLANG_BPF_CMD_DEFAULT_TEMPLATE;
+
+ err = search_program(llvm_param.clang_path,
+ "clang", clang_path);
+ if (err) {
+ pr_err(
+"ERROR:\tunable to find clang.\n"
+"Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
+" \tand 'clang-path' option in [llvm] section of ~/.perfconfig.\n");
+ version_notice();
+ return -ENOENT;
+ }
+
+ /*
+ * This is an optional work. Even it fail we can continue our
+ * work. Needn't to check error return.
+ */
+ llvm__get_kbuild_opts(&kbuild_dir, &kbuild_include_opts);
+
+ nr_cpus_avail = llvm__get_nr_cpus();
+ snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d",
+ nr_cpus_avail);
+
+ if (fetch_kernel_version(&kernel_version, NULL, 0))
+ kernel_version = 0;
+
+ snprintf(linux_version_code_str, sizeof(linux_version_code_str),
+ "0x%x", kernel_version);
+ if (asprintf(&perf_bpf_include_opts, "-I%s/bpf", perf_include_dir) < 0)
+ goto errout;
+ force_set_env("NR_CPUS", nr_cpus_avail_str);
+ force_set_env("LINUX_VERSION_CODE", linux_version_code_str);
+ force_set_env("CLANG_EXEC", clang_path);
+ force_set_env("CLANG_OPTIONS", clang_opt);
+ force_set_env("KERNEL_INC_OPTIONS", kbuild_include_opts);
+ force_set_env("PERF_BPF_INC_OPTIONS", perf_bpf_include_opts);
+ force_set_env("WORKING_DIR", kbuild_dir ? : ".");
+
+ if (opts) {
+ err = search_program(llvm_param.llc_path, "llc", llc_path);
+ if (err) {
+ pr_err("ERROR:\tunable to find llc.\n"
+ "Hint:\tTry to install latest clang/llvm to support BPF. Check your $PATH\n"
+ " \tand 'llc-path' option in [llvm] section of ~/.perfconfig.\n");
+ version_notice();
+ goto errout;
+ }
+
+ err = -ENOMEM;
+ if (asprintf(&pipe_template, "%s -emit-llvm | %s -march=bpf %s -filetype=obj -o -",
+ template, llc_path, opts) < 0) {
+ pr_err("ERROR:\tnot enough memory to setup command line\n");
+ goto errout;
+ }
+
+ template = pipe_template;
+
+ }
+
+ /*
+ * Since we may reset clang's working dir, path of source file
+ * should be transferred into absolute path, except we want
+ * stdin to be source file (testing).
+ */
+ force_set_env("CLANG_SOURCE",
+ (path[0] == '-') ? path : abspath);
+
+ pr_debug("llvm compiling command template: %s\n", template);
+
+ err = -ENOMEM;
+ if (asprintf(&command_echo, "echo -n \"%s\"", template) < 0)
+ goto errout;
+
+ err = read_from_pipe(command_echo, (void **) &command_out, NULL);
+ if (err)
+ goto errout;
+
+ pr_debug("llvm compiling command : %s\n", command_out);
+
+ err = read_from_pipe(template, &obj_buf, &obj_buf_sz);
+ if (err) {
+ pr_err("ERROR:\tunable to compile %s\n", path);
+ pr_err("Hint:\tCheck error message shown above.\n");
+ pr_err("Hint:\tYou can also pre-compile it into .o using:\n");
+ pr_err(" \t\tclang -target bpf -O2 -c %s\n", path);
+ pr_err(" \twith proper -I and -D options.\n");
+ goto errout;
+ }
+
+ free(command_echo);
+ free(command_out);
+ free(kbuild_dir);
+ free(kbuild_include_opts);
+ free(perf_bpf_include_opts);
+ free(perf_include_dir);
+
+ if (!p_obj_buf)
+ free(obj_buf);
+ else
+ *p_obj_buf = obj_buf;
+
+ if (p_obj_buf_sz)
+ *p_obj_buf_sz = obj_buf_sz;
+ return 0;
+errout:
+ free(command_echo);
+ free(kbuild_dir);
+ free(kbuild_include_opts);
+ free(obj_buf);
+ free(perf_bpf_include_opts);
+ free(perf_include_dir);
+ free(pipe_template);
+ if (p_obj_buf)
+ *p_obj_buf = NULL;
+ if (p_obj_buf_sz)
+ *p_obj_buf_sz = 0;
+ return err;
+}
+
+int llvm__search_clang(void)
+{
+ char clang_path[PATH_MAX];
+
+ return search_program(llvm_param.clang_path, "clang", clang_path);
+}
diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h
new file mode 100644
index 000000000..bf3f3f4c4
--- /dev/null
+++ b/tools/perf/util/llvm-utils.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2015, Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015, Huawei Inc.
+ */
+#ifndef __LLVM_UTILS_H
+#define __LLVM_UTILS_H
+
+#include "debug.h"
+
+struct llvm_param {
+ /* Path of clang executable */
+ const char *clang_path;
+ /* Path of llc executable */
+ const char *llc_path;
+ /*
+ * Template of clang bpf compiling. 5 env variables
+ * can be used:
+ * $CLANG_EXEC: Path to clang.
+ * $CLANG_OPTIONS: Extra options to clang.
+ * $KERNEL_INC_OPTIONS: Kernel include directories.
+ * $WORKING_DIR: Kernel source directory.
+ * $CLANG_SOURCE: Source file to be compiled.
+ */
+ const char *clang_bpf_cmd_template;
+ /* Will be filled in $CLANG_OPTIONS */
+ const char *clang_opt;
+ /*
+ * If present it'll add -emit-llvm to $CLANG_OPTIONS to pipe
+ * the clang output to llc, useful for new llvm options not
+ * yet selectable via 'clang -mllvm option', such as -mattr=dwarfris
+ * in clang 6.0/llvm 7
+ */
+ const char *opts;
+ /* Where to find kbuild system */
+ const char *kbuild_dir;
+ /*
+ * Arguments passed to make, like 'ARCH=arm' if doing cross
+ * compiling. Should not be used for dynamic compiling.
+ */
+ const char *kbuild_opts;
+ /*
+ * Default is false. If set to true, write compiling result
+ * to object file.
+ */
+ bool dump_obj;
+ /*
+ * Default is false. If one of the above fields is set by user
+ * explicitly then user_set_llvm is set to true. This is used
+ * for perf test. If user doesn't set anything in .perfconfig
+ * and clang is not found, don't trigger llvm test.
+ */
+ bool user_set_param;
+};
+
+extern struct llvm_param llvm_param;
+int perf_llvm_config(const char *var, const char *value);
+
+int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz);
+
+/* This function is for test__llvm() use only */
+int llvm__search_clang(void);
+
+/* Following functions are reused by builtin clang support */
+void llvm__get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts);
+int llvm__get_nr_cpus(void);
+
+void llvm__dump_obj(const char *path, void *obj_buf, size_t size);
+#endif
diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c
new file mode 100644
index 000000000..6c844110f
--- /dev/null
+++ b/tools/perf/util/lzma.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <lzma.h>
+#include <stdio.h>
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include "compress.h"
+#include "util.h"
+#include "debug.h"
+#include <unistd.h>
+
+#define BUFSIZE 8192
+
+static const char *lzma_strerror(lzma_ret ret)
+{
+ switch ((int) ret) {
+ case LZMA_MEM_ERROR:
+ return "Memory allocation failed";
+ case LZMA_OPTIONS_ERROR:
+ return "Unsupported decompressor flags";
+ case LZMA_FORMAT_ERROR:
+ return "The input is not in the .xz format";
+ case LZMA_DATA_ERROR:
+ return "Compressed file is corrupt";
+ case LZMA_BUF_ERROR:
+ return "Compressed file is truncated or otherwise corrupt";
+ default:
+ return "Unknown error, possibly a bug";
+ }
+}
+
+int lzma_decompress_to_file(const char *input, int output_fd)
+{
+ lzma_action action = LZMA_RUN;
+ lzma_stream strm = LZMA_STREAM_INIT;
+ lzma_ret ret;
+ int err = -1;
+
+ u8 buf_in[BUFSIZE];
+ u8 buf_out[BUFSIZE];
+ FILE *infile;
+
+ infile = fopen(input, "rb");
+ if (!infile) {
+ pr_err("lzma: fopen failed on %s: '%s'\n",
+ input, strerror(errno));
+ return -1;
+ }
+
+ ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED);
+ if (ret != LZMA_OK) {
+ pr_err("lzma: lzma_stream_decoder failed %s (%d)\n",
+ lzma_strerror(ret), ret);
+ goto err_fclose;
+ }
+
+ strm.next_in = NULL;
+ strm.avail_in = 0;
+ strm.next_out = buf_out;
+ strm.avail_out = sizeof(buf_out);
+
+ while (1) {
+ if (strm.avail_in == 0 && !feof(infile)) {
+ strm.next_in = buf_in;
+ strm.avail_in = fread(buf_in, 1, sizeof(buf_in), infile);
+
+ if (ferror(infile)) {
+ pr_err("lzma: read error: %s\n", strerror(errno));
+ goto err_lzma_end;
+ }
+
+ if (feof(infile))
+ action = LZMA_FINISH;
+ }
+
+ ret = lzma_code(&strm, action);
+
+ if (strm.avail_out == 0 || ret == LZMA_STREAM_END) {
+ ssize_t write_size = sizeof(buf_out) - strm.avail_out;
+
+ if (writen(output_fd, buf_out, write_size) != write_size) {
+ pr_err("lzma: write error: %s\n", strerror(errno));
+ goto err_lzma_end;
+ }
+
+ strm.next_out = buf_out;
+ strm.avail_out = sizeof(buf_out);
+ }
+
+ if (ret != LZMA_OK) {
+ if (ret == LZMA_STREAM_END)
+ break;
+
+ pr_err("lzma: failed %s\n", lzma_strerror(ret));
+ goto err_lzma_end;
+ }
+ }
+
+ err = 0;
+err_lzma_end:
+ lzma_end(&strm);
+err_fclose:
+ fclose(infile);
+ return err;
+}
+
+bool lzma_is_compressed(const char *input)
+{
+ int fd = open(input, O_RDONLY);
+ const uint8_t magic[6] = { 0xFD, '7', 'z', 'X', 'Z', 0x00 };
+ char buf[6] = { 0 };
+ ssize_t rc;
+
+ if (fd < 0)
+ return -1;
+
+ rc = read(fd, buf, sizeof(buf));
+ close(fd);
+ return rc == sizeof(buf) ?
+ memcmp(buf, magic, sizeof(buf)) == 0 : false;
+}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
new file mode 100644
index 000000000..4ada233b3
--- /dev/null
+++ b/tools/perf/util/machine.c
@@ -0,0 +1,2607 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <regex.h>
+#include "callchain.h"
+#include "debug.h"
+#include "event.h"
+#include "evsel.h"
+#include "hist.h"
+#include "machine.h"
+#include "map.h"
+#include "sort.h"
+#include "strlist.h"
+#include "thread.h"
+#include "vdso.h"
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "unwind.h"
+#include "linux/hash.h"
+#include "asm/bug.h"
+
+#include "sane_ctype.h"
+#include <symbol/kallsyms.h>
+#include <linux/mman.h>
+
+static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
+
+static void dsos__init(struct dsos *dsos)
+{
+ INIT_LIST_HEAD(&dsos->head);
+ dsos->root = RB_ROOT;
+ init_rwsem(&dsos->lock);
+}
+
+static void machine__threads_init(struct machine *machine)
+{
+ int i;
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+ threads->entries = RB_ROOT;
+ init_rwsem(&threads->lock);
+ threads->nr = 0;
+ INIT_LIST_HEAD(&threads->dead);
+ threads->last_match = NULL;
+ }
+}
+
+static int machine__set_mmap_name(struct machine *machine)
+{
+ if (machine__is_host(machine))
+ machine->mmap_name = strdup("[kernel.kallsyms]");
+ else if (machine__is_default_guest(machine))
+ machine->mmap_name = strdup("[guest.kernel.kallsyms]");
+ else if (asprintf(&machine->mmap_name, "[guest.kernel.kallsyms.%d]",
+ machine->pid) < 0)
+ machine->mmap_name = NULL;
+
+ return machine->mmap_name ? 0 : -ENOMEM;
+}
+
+int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
+{
+ int err = -ENOMEM;
+
+ memset(machine, 0, sizeof(*machine));
+ map_groups__init(&machine->kmaps, machine);
+ RB_CLEAR_NODE(&machine->rb_node);
+ dsos__init(&machine->dsos);
+
+ machine__threads_init(machine);
+
+ machine->vdso_info = NULL;
+ machine->env = NULL;
+
+ machine->pid = pid;
+
+ machine->id_hdr_size = 0;
+ machine->kptr_restrict_warned = false;
+ machine->comm_exec = false;
+ machine->kernel_start = 0;
+ machine->vmlinux_map = NULL;
+
+ machine->root_dir = strdup(root_dir);
+ if (machine->root_dir == NULL)
+ return -ENOMEM;
+
+ if (machine__set_mmap_name(machine))
+ goto out;
+
+ if (pid != HOST_KERNEL_ID) {
+ struct thread *thread = machine__findnew_thread(machine, -1,
+ pid);
+ char comm[64];
+
+ if (thread == NULL)
+ goto out;
+
+ snprintf(comm, sizeof(comm), "[guest/%d]", pid);
+ thread__set_comm(thread, comm, 0);
+ thread__put(thread);
+ }
+
+ machine->current_tid = NULL;
+ err = 0;
+
+out:
+ if (err) {
+ zfree(&machine->root_dir);
+ zfree(&machine->mmap_name);
+ }
+ return 0;
+}
+
+struct machine *machine__new_host(void)
+{
+ struct machine *machine = malloc(sizeof(*machine));
+
+ if (machine != NULL) {
+ machine__init(machine, "", HOST_KERNEL_ID);
+
+ if (machine__create_kernel_maps(machine) < 0)
+ goto out_delete;
+ }
+
+ return machine;
+out_delete:
+ free(machine);
+ return NULL;
+}
+
+struct machine *machine__new_kallsyms(void)
+{
+ struct machine *machine = machine__new_host();
+ /*
+ * FIXME:
+ * 1) We should switch to machine__load_kallsyms(), i.e. not explicitely
+ * ask for not using the kcore parsing code, once this one is fixed
+ * to create a map per module.
+ */
+ if (machine && machine__load_kallsyms(machine, "/proc/kallsyms") <= 0) {
+ machine__delete(machine);
+ machine = NULL;
+ }
+
+ return machine;
+}
+
+static void dsos__purge(struct dsos *dsos)
+{
+ struct dso *pos, *n;
+
+ down_write(&dsos->lock);
+
+ list_for_each_entry_safe(pos, n, &dsos->head, node) {
+ RB_CLEAR_NODE(&pos->rb_node);
+ pos->root = NULL;
+ list_del_init(&pos->node);
+ dso__put(pos);
+ }
+
+ up_write(&dsos->lock);
+}
+
+static void dsos__exit(struct dsos *dsos)
+{
+ dsos__purge(dsos);
+ exit_rwsem(&dsos->lock);
+}
+
+void machine__delete_threads(struct machine *machine)
+{
+ struct rb_node *nd;
+ int i;
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+ down_write(&threads->lock);
+ nd = rb_first(&threads->entries);
+ while (nd) {
+ struct thread *t = rb_entry(nd, struct thread, rb_node);
+
+ nd = rb_next(nd);
+ __machine__remove_thread(machine, t, false);
+ }
+ up_write(&threads->lock);
+ }
+}
+
+void machine__exit(struct machine *machine)
+{
+ int i;
+
+ if (machine == NULL)
+ return;
+
+ machine__destroy_kernel_maps(machine);
+ map_groups__exit(&machine->kmaps);
+ dsos__exit(&machine->dsos);
+ machine__exit_vdso(machine);
+ zfree(&machine->root_dir);
+ zfree(&machine->mmap_name);
+ zfree(&machine->current_tid);
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+ exit_rwsem(&threads->lock);
+ }
+}
+
+void machine__delete(struct machine *machine)
+{
+ if (machine) {
+ machine__exit(machine);
+ free(machine);
+ }
+}
+
+void machines__init(struct machines *machines)
+{
+ machine__init(&machines->host, "", HOST_KERNEL_ID);
+ machines->guests = RB_ROOT;
+}
+
+void machines__exit(struct machines *machines)
+{
+ machine__exit(&machines->host);
+ /* XXX exit guest */
+}
+
+struct machine *machines__add(struct machines *machines, pid_t pid,
+ const char *root_dir)
+{
+ struct rb_node **p = &machines->guests.rb_node;
+ struct rb_node *parent = NULL;
+ struct machine *pos, *machine = malloc(sizeof(*machine));
+
+ if (machine == NULL)
+ return NULL;
+
+ if (machine__init(machine, root_dir, pid) != 0) {
+ free(machine);
+ return NULL;
+ }
+
+ while (*p != NULL) {
+ parent = *p;
+ pos = rb_entry(parent, struct machine, rb_node);
+ if (pid < pos->pid)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&machine->rb_node, parent, p);
+ rb_insert_color(&machine->rb_node, &machines->guests);
+
+ return machine;
+}
+
+void machines__set_comm_exec(struct machines *machines, bool comm_exec)
+{
+ struct rb_node *nd;
+
+ machines->host.comm_exec = comm_exec;
+
+ for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+ machine->comm_exec = comm_exec;
+ }
+}
+
+struct machine *machines__find(struct machines *machines, pid_t pid)
+{
+ struct rb_node **p = &machines->guests.rb_node;
+ struct rb_node *parent = NULL;
+ struct machine *machine;
+ struct machine *default_machine = NULL;
+
+ if (pid == HOST_KERNEL_ID)
+ return &machines->host;
+
+ while (*p != NULL) {
+ parent = *p;
+ machine = rb_entry(parent, struct machine, rb_node);
+ if (pid < machine->pid)
+ p = &(*p)->rb_left;
+ else if (pid > machine->pid)
+ p = &(*p)->rb_right;
+ else
+ return machine;
+ if (!machine->pid)
+ default_machine = machine;
+ }
+
+ return default_machine;
+}
+
+struct machine *machines__findnew(struct machines *machines, pid_t pid)
+{
+ char path[PATH_MAX];
+ const char *root_dir = "";
+ struct machine *machine = machines__find(machines, pid);
+
+ if (machine && (machine->pid == pid))
+ goto out;
+
+ if ((pid != HOST_KERNEL_ID) &&
+ (pid != DEFAULT_GUEST_KERNEL_ID) &&
+ (symbol_conf.guestmount)) {
+ sprintf(path, "%s/%d", symbol_conf.guestmount, pid);
+ if (access(path, R_OK)) {
+ static struct strlist *seen;
+
+ if (!seen)
+ seen = strlist__new(NULL, NULL);
+
+ if (!strlist__has_entry(seen, path)) {
+ pr_err("Can't access file %s\n", path);
+ strlist__add(seen, path);
+ }
+ machine = NULL;
+ goto out;
+ }
+ root_dir = path;
+ }
+
+ machine = machines__add(machines, pid, root_dir);
+out:
+ return machine;
+}
+
+void machines__process_guests(struct machines *machines,
+ machine__process_t process, void *data)
+{
+ struct rb_node *nd;
+
+ for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ process(pos, data);
+ }
+}
+
+void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size)
+{
+ struct rb_node *node;
+ struct machine *machine;
+
+ machines->host.id_hdr_size = id_hdr_size;
+
+ for (node = rb_first(&machines->guests); node; node = rb_next(node)) {
+ machine = rb_entry(node, struct machine, rb_node);
+ machine->id_hdr_size = id_hdr_size;
+ }
+
+ return;
+}
+
+static void machine__update_thread_pid(struct machine *machine,
+ struct thread *th, pid_t pid)
+{
+ struct thread *leader;
+
+ if (pid == th->pid_ || pid == -1 || th->pid_ != -1)
+ return;
+
+ th->pid_ = pid;
+
+ if (th->pid_ == th->tid)
+ return;
+
+ leader = __machine__findnew_thread(machine, th->pid_, th->pid_);
+ if (!leader)
+ goto out_err;
+
+ if (!leader->mg)
+ leader->mg = map_groups__new(machine);
+
+ if (!leader->mg)
+ goto out_err;
+
+ if (th->mg == leader->mg)
+ return;
+
+ if (th->mg) {
+ /*
+ * Maps are created from MMAP events which provide the pid and
+ * tid. Consequently there never should be any maps on a thread
+ * with an unknown pid. Just print an error if there are.
+ */
+ if (!map_groups__empty(th->mg))
+ pr_err("Discarding thread maps for %d:%d\n",
+ th->pid_, th->tid);
+ map_groups__put(th->mg);
+ }
+
+ th->mg = map_groups__get(leader->mg);
+out_put:
+ thread__put(leader);
+ return;
+out_err:
+ pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid);
+ goto out_put;
+}
+
+/*
+ * Front-end cache - TID lookups come in blocks,
+ * so most of the time we dont have to look up
+ * the full rbtree:
+ */
+static struct thread*
+__threads__get_last_match(struct threads *threads, struct machine *machine,
+ int pid, int tid)
+{
+ struct thread *th;
+
+ th = threads->last_match;
+ if (th != NULL) {
+ if (th->tid == tid) {
+ machine__update_thread_pid(machine, th, pid);
+ return thread__get(th);
+ }
+
+ threads->last_match = NULL;
+ }
+
+ return NULL;
+}
+
+static struct thread*
+threads__get_last_match(struct threads *threads, struct machine *machine,
+ int pid, int tid)
+{
+ struct thread *th = NULL;
+
+ if (perf_singlethreaded)
+ th = __threads__get_last_match(threads, machine, pid, tid);
+
+ return th;
+}
+
+static void
+__threads__set_last_match(struct threads *threads, struct thread *th)
+{
+ threads->last_match = th;
+}
+
+static void
+threads__set_last_match(struct threads *threads, struct thread *th)
+{
+ if (perf_singlethreaded)
+ __threads__set_last_match(threads, th);
+}
+
+/*
+ * Caller must eventually drop thread->refcnt returned with a successful
+ * lookup/new thread inserted.
+ */
+static struct thread *____machine__findnew_thread(struct machine *machine,
+ struct threads *threads,
+ pid_t pid, pid_t tid,
+ bool create)
+{
+ struct rb_node **p = &threads->entries.rb_node;
+ struct rb_node *parent = NULL;
+ struct thread *th;
+
+ th = threads__get_last_match(threads, machine, pid, tid);
+ if (th)
+ return th;
+
+ while (*p != NULL) {
+ parent = *p;
+ th = rb_entry(parent, struct thread, rb_node);
+
+ if (th->tid == tid) {
+ threads__set_last_match(threads, th);
+ machine__update_thread_pid(machine, th, pid);
+ return thread__get(th);
+ }
+
+ if (tid < th->tid)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ if (!create)
+ return NULL;
+
+ th = thread__new(pid, tid);
+ if (th != NULL) {
+ rb_link_node(&th->rb_node, parent, p);
+ rb_insert_color(&th->rb_node, &threads->entries);
+
+ /*
+ * We have to initialize map_groups separately
+ * after rb tree is updated.
+ *
+ * The reason is that we call machine__findnew_thread
+ * within thread__init_map_groups to find the thread
+ * leader and that would screwed the rb tree.
+ */
+ if (thread__init_map_groups(th, machine)) {
+ rb_erase_init(&th->rb_node, &threads->entries);
+ RB_CLEAR_NODE(&th->rb_node);
+ thread__put(th);
+ return NULL;
+ }
+ /*
+ * It is now in the rbtree, get a ref
+ */
+ thread__get(th);
+ threads__set_last_match(threads, th);
+ ++threads->nr;
+ }
+
+ return th;
+}
+
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
+{
+ return ____machine__findnew_thread(machine, machine__threads(machine, tid), pid, tid, true);
+}
+
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
+ pid_t tid)
+{
+ struct threads *threads = machine__threads(machine, tid);
+ struct thread *th;
+
+ down_write(&threads->lock);
+ th = __machine__findnew_thread(machine, pid, tid);
+ up_write(&threads->lock);
+ return th;
+}
+
+struct thread *machine__find_thread(struct machine *machine, pid_t pid,
+ pid_t tid)
+{
+ struct threads *threads = machine__threads(machine, tid);
+ struct thread *th;
+
+ down_read(&threads->lock);
+ th = ____machine__findnew_thread(machine, threads, pid, tid, false);
+ up_read(&threads->lock);
+ return th;
+}
+
+struct comm *machine__thread_exec_comm(struct machine *machine,
+ struct thread *thread)
+{
+ if (machine->comm_exec)
+ return thread__exec_comm(thread);
+ else
+ return thread__comm(thread);
+}
+
+int machine__process_comm_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__findnew_thread(machine,
+ event->comm.pid,
+ event->comm.tid);
+ bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC;
+ int err = 0;
+
+ if (exec)
+ machine->comm_exec = true;
+
+ if (dump_trace)
+ perf_event__fprintf_comm(event, stdout);
+
+ if (thread == NULL ||
+ __thread__set_comm(thread, event->comm.comm, sample->time, exec)) {
+ dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
+ err = -1;
+ }
+
+ thread__put(thread);
+
+ return err;
+}
+
+int machine__process_namespaces_event(struct machine *machine __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct thread *thread = machine__findnew_thread(machine,
+ event->namespaces.pid,
+ event->namespaces.tid);
+ int err = 0;
+
+ WARN_ONCE(event->namespaces.nr_namespaces > NR_NAMESPACES,
+ "\nWARNING: kernel seems to support more namespaces than perf"
+ " tool.\nTry updating the perf tool..\n\n");
+
+ WARN_ONCE(event->namespaces.nr_namespaces < NR_NAMESPACES,
+ "\nWARNING: perf tool seems to support more namespaces than"
+ " the kernel.\nTry updating the kernel..\n\n");
+
+ if (dump_trace)
+ perf_event__fprintf_namespaces(event, stdout);
+
+ if (thread == NULL ||
+ thread__set_namespaces(thread, sample->time, &event->namespaces)) {
+ dump_printf("problem processing PERF_RECORD_NAMESPACES, skipping event.\n");
+ err = -1;
+ }
+
+ thread__put(thread);
+
+ return err;
+}
+
+int machine__process_lost_event(struct machine *machine __maybe_unused,
+ union perf_event *event, struct perf_sample *sample __maybe_unused)
+{
+ dump_printf(": id:%" PRIu64 ": lost:%" PRIu64 "\n",
+ event->lost.id, event->lost.lost);
+ return 0;
+}
+
+int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
+ union perf_event *event, struct perf_sample *sample)
+{
+ dump_printf(": id:%" PRIu64 ": lost samples :%" PRIu64 "\n",
+ sample->id, event->lost_samples.lost);
+ return 0;
+}
+
+static struct dso *machine__findnew_module_dso(struct machine *machine,
+ struct kmod_path *m,
+ const char *filename)
+{
+ struct dso *dso;
+
+ down_write(&machine->dsos.lock);
+
+ dso = __dsos__find(&machine->dsos, m->name, true);
+ if (!dso) {
+ dso = __dsos__addnew(&machine->dsos, m->name);
+ if (dso == NULL)
+ goto out_unlock;
+
+ dso__set_module_info(dso, m, machine);
+ dso__set_long_name(dso, strdup(filename), true);
+ }
+
+ dso__get(dso);
+out_unlock:
+ up_write(&machine->dsos.lock);
+ return dso;
+}
+
+int machine__process_aux_event(struct machine *machine __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_aux(event, stdout);
+ return 0;
+}
+
+int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_itrace_start(event, stdout);
+ return 0;
+}
+
+int machine__process_switch_event(struct machine *machine __maybe_unused,
+ union perf_event *event)
+{
+ if (dump_trace)
+ perf_event__fprintf_switch(event, stdout);
+ return 0;
+}
+
+struct map *machine__findnew_module_map(struct machine *machine, u64 start,
+ const char *filename)
+{
+ struct map *map = NULL;
+ struct dso *dso = NULL;
+ struct kmod_path m;
+
+ if (kmod_path__parse_name(&m, filename))
+ return NULL;
+
+ map = map_groups__find_by_name(&machine->kmaps, m.name);
+ if (map)
+ goto out;
+
+ dso = machine__findnew_module_dso(machine, &m, filename);
+ if (dso == NULL)
+ goto out;
+
+ map = map__new2(start, dso);
+ if (map == NULL)
+ goto out;
+
+ map_groups__insert(&machine->kmaps, map);
+
+ /* Put the map here because map_groups__insert alread got it */
+ map__put(map);
+out:
+ /* put the dso here, corresponding to machine__findnew_module_dso */
+ dso__put(dso);
+ free(m.name);
+ return map;
+}
+
+size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
+{
+ struct rb_node *nd;
+ size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp);
+
+ for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ ret += __dsos__fprintf(&pos->dsos.head, fp);
+ }
+
+ return ret;
+}
+
+size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm)
+{
+ return __dsos__fprintf_buildid(&m->dsos.head, fp, skip, parm);
+}
+
+size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm)
+{
+ struct rb_node *nd;
+ size_t ret = machine__fprintf_dsos_buildid(&machines->host, fp, skip, parm);
+
+ for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *pos = rb_entry(nd, struct machine, rb_node);
+ ret += machine__fprintf_dsos_buildid(pos, fp, skip, parm);
+ }
+ return ret;
+}
+
+size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)
+{
+ int i;
+ size_t printed = 0;
+ struct dso *kdso = machine__kernel_map(machine)->dso;
+
+ if (kdso->has_build_id) {
+ char filename[PATH_MAX];
+ if (dso__build_id_filename(kdso, filename, sizeof(filename),
+ false))
+ printed += fprintf(fp, "[0] %s\n", filename);
+ }
+
+ for (i = 0; i < vmlinux_path__nr_entries; ++i)
+ printed += fprintf(fp, "[%d] %s\n",
+ i + kdso->has_build_id, vmlinux_path[i]);
+
+ return printed;
+}
+
+size_t machine__fprintf(struct machine *machine, FILE *fp)
+{
+ struct rb_node *nd;
+ size_t ret;
+ int i;
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ struct threads *threads = &machine->threads[i];
+
+ down_read(&threads->lock);
+
+ ret = fprintf(fp, "Threads: %u\n", threads->nr);
+
+ for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+ struct thread *pos = rb_entry(nd, struct thread, rb_node);
+
+ ret += thread__fprintf(pos, fp);
+ }
+
+ up_read(&threads->lock);
+ }
+ return ret;
+}
+
+static struct dso *machine__get_kernel(struct machine *machine)
+{
+ const char *vmlinux_name = machine->mmap_name;
+ struct dso *kernel;
+
+ if (machine__is_host(machine)) {
+ if (symbol_conf.vmlinux_name)
+ vmlinux_name = symbol_conf.vmlinux_name;
+
+ kernel = machine__findnew_kernel(machine, vmlinux_name,
+ "[kernel]", DSO_TYPE_KERNEL);
+ } else {
+ if (symbol_conf.default_guest_vmlinux_name)
+ vmlinux_name = symbol_conf.default_guest_vmlinux_name;
+
+ kernel = machine__findnew_kernel(machine, vmlinux_name,
+ "[guest.kernel]",
+ DSO_TYPE_GUEST_KERNEL);
+ }
+
+ if (kernel != NULL && (!kernel->has_build_id))
+ dso__read_running_kernel_build_id(kernel, machine);
+
+ return kernel;
+}
+
+struct process_args {
+ u64 start;
+};
+
+void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+ size_t bufsz)
+{
+ if (machine__is_default_guest(machine))
+ scnprintf(buf, bufsz, "%s", symbol_conf.default_guest_kallsyms);
+ else
+ scnprintf(buf, bufsz, "%s/proc/kallsyms", machine->root_dir);
+}
+
+const char *ref_reloc_sym_names[] = {"_text", "_stext", NULL};
+
+/* Figure out the start address of kernel map from /proc/kallsyms.
+ * Returns the name of the start symbol in *symbol_name. Pass in NULL as
+ * symbol_name if it's not that important.
+ */
+static int machine__get_running_kernel_start(struct machine *machine,
+ const char **symbol_name, u64 *start)
+{
+ char filename[PATH_MAX];
+ int i, err = -1;
+ const char *name;
+ u64 addr = 0;
+
+ machine__get_kallsyms_filename(machine, filename, PATH_MAX);
+
+ if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+ return 0;
+
+ for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
+ err = kallsyms__get_function_start(filename, name, &addr);
+ if (!err)
+ break;
+ }
+
+ if (err)
+ return -1;
+
+ if (symbol_name)
+ *symbol_name = name;
+
+ *start = addr;
+ return 0;
+}
+
+int machine__create_extra_kernel_map(struct machine *machine,
+ struct dso *kernel,
+ struct extra_kernel_map *xm)
+{
+ struct kmap *kmap;
+ struct map *map;
+
+ map = map__new2(xm->start, kernel);
+ if (!map)
+ return -1;
+
+ map->end = xm->end;
+ map->pgoff = xm->pgoff;
+
+ kmap = map__kmap(map);
+
+ kmap->kmaps = &machine->kmaps;
+ strlcpy(kmap->name, xm->name, KMAP_NAME_LEN);
+
+ map_groups__insert(&machine->kmaps, map);
+
+ pr_debug2("Added extra kernel map %s %" PRIx64 "-%" PRIx64 "\n",
+ kmap->name, map->start, map->end);
+
+ map__put(map);
+
+ return 0;
+}
+
+static u64 find_entry_trampoline(struct dso *dso)
+{
+ /* Duplicates are removed so lookup all aliases */
+ const char *syms[] = {
+ "_entry_trampoline",
+ "__entry_trampoline_start",
+ "entry_SYSCALL_64_trampoline",
+ };
+ struct symbol *sym = dso__first_symbol(dso);
+ unsigned int i;
+
+ for (; sym; sym = dso__next_symbol(sym)) {
+ if (sym->binding != STB_GLOBAL)
+ continue;
+ for (i = 0; i < ARRAY_SIZE(syms); i++) {
+ if (!strcmp(sym->name, syms[i]))
+ return sym->start;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * These values can be used for kernels that do not have symbols for the entry
+ * trampolines in kallsyms.
+ */
+#define X86_64_CPU_ENTRY_AREA_PER_CPU 0xfffffe0000000000ULL
+#define X86_64_CPU_ENTRY_AREA_SIZE 0x2c000
+#define X86_64_ENTRY_TRAMPOLINE 0x6000
+
+/* Map x86_64 PTI entry trampolines */
+int machine__map_x86_64_entry_trampolines(struct machine *machine,
+ struct dso *kernel)
+{
+ struct map_groups *kmaps = &machine->kmaps;
+ struct maps *maps = &kmaps->maps;
+ int nr_cpus_avail, cpu;
+ bool found = false;
+ struct map *map;
+ u64 pgoff;
+
+ /*
+ * In the vmlinux case, pgoff is a virtual address which must now be
+ * mapped to a vmlinux offset.
+ */
+ for (map = maps__first(maps); map; map = map__next(map)) {
+ struct kmap *kmap = __map__kmap(map);
+ struct map *dest_map;
+
+ if (!kmap || !is_entry_trampoline(kmap->name))
+ continue;
+
+ dest_map = map_groups__find(kmaps, map->pgoff);
+ if (dest_map != map)
+ map->pgoff = dest_map->map_ip(dest_map, map->pgoff);
+ found = true;
+ }
+ if (found || machine->trampolines_mapped)
+ return 0;
+
+ pgoff = find_entry_trampoline(kernel);
+ if (!pgoff)
+ return 0;
+
+ nr_cpus_avail = machine__nr_cpus_avail(machine);
+
+ /* Add a 1 page map for each CPU's entry trampoline */
+ for (cpu = 0; cpu < nr_cpus_avail; cpu++) {
+ u64 va = X86_64_CPU_ENTRY_AREA_PER_CPU +
+ cpu * X86_64_CPU_ENTRY_AREA_SIZE +
+ X86_64_ENTRY_TRAMPOLINE;
+ struct extra_kernel_map xm = {
+ .start = va,
+ .end = va + page_size,
+ .pgoff = pgoff,
+ };
+
+ strlcpy(xm.name, ENTRY_TRAMPOLINE_NAME, KMAP_NAME_LEN);
+
+ if (machine__create_extra_kernel_map(machine, kernel, &xm) < 0)
+ return -1;
+ }
+
+ machine->trampolines_mapped = nr_cpus_avail;
+
+ return 0;
+}
+
+int __weak machine__create_extra_kernel_maps(struct machine *machine __maybe_unused,
+ struct dso *kernel __maybe_unused)
+{
+ return 0;
+}
+
+static int
+__machine__create_kernel_maps(struct machine *machine, struct dso *kernel)
+{
+ struct kmap *kmap;
+ struct map *map;
+
+ /* In case of renewal the kernel map, destroy previous one */
+ machine__destroy_kernel_maps(machine);
+
+ machine->vmlinux_map = map__new2(0, kernel);
+ if (machine->vmlinux_map == NULL)
+ return -1;
+
+ machine->vmlinux_map->map_ip = machine->vmlinux_map->unmap_ip = identity__map_ip;
+ map = machine__kernel_map(machine);
+ kmap = map__kmap(map);
+ if (!kmap)
+ return -1;
+
+ kmap->kmaps = &machine->kmaps;
+ map_groups__insert(&machine->kmaps, map);
+
+ return 0;
+}
+
+void machine__destroy_kernel_maps(struct machine *machine)
+{
+ struct kmap *kmap;
+ struct map *map = machine__kernel_map(machine);
+
+ if (map == NULL)
+ return;
+
+ kmap = map__kmap(map);
+ map_groups__remove(&machine->kmaps, map);
+ if (kmap && kmap->ref_reloc_sym) {
+ zfree((char **)&kmap->ref_reloc_sym->name);
+ zfree(&kmap->ref_reloc_sym);
+ }
+
+ map__zput(machine->vmlinux_map);
+}
+
+int machines__create_guest_kernel_maps(struct machines *machines)
+{
+ int ret = 0;
+ struct dirent **namelist = NULL;
+ int i, items = 0;
+ char path[PATH_MAX];
+ pid_t pid;
+ char *endp;
+
+ if (symbol_conf.default_guest_vmlinux_name ||
+ symbol_conf.default_guest_modules ||
+ symbol_conf.default_guest_kallsyms) {
+ machines__create_kernel_maps(machines, DEFAULT_GUEST_KERNEL_ID);
+ }
+
+ if (symbol_conf.guestmount) {
+ items = scandir(symbol_conf.guestmount, &namelist, NULL, NULL);
+ if (items <= 0)
+ return -ENOENT;
+ for (i = 0; i < items; i++) {
+ if (!isdigit(namelist[i]->d_name[0])) {
+ /* Filter out . and .. */
+ continue;
+ }
+ pid = (pid_t)strtol(namelist[i]->d_name, &endp, 10);
+ if ((*endp != '\0') ||
+ (endp == namelist[i]->d_name) ||
+ (errno == ERANGE)) {
+ pr_debug("invalid directory (%s). Skipping.\n",
+ namelist[i]->d_name);
+ continue;
+ }
+ sprintf(path, "%s/%s/proc/kallsyms",
+ symbol_conf.guestmount,
+ namelist[i]->d_name);
+ ret = access(path, R_OK);
+ if (ret) {
+ pr_debug("Can't access file %s\n", path);
+ goto failure;
+ }
+ machines__create_kernel_maps(machines, pid);
+ }
+failure:
+ free(namelist);
+ }
+
+ return ret;
+}
+
+void machines__destroy_kernel_maps(struct machines *machines)
+{
+ struct rb_node *next = rb_first(&machines->guests);
+
+ machine__destroy_kernel_maps(&machines->host);
+
+ while (next) {
+ struct machine *pos = rb_entry(next, struct machine, rb_node);
+
+ next = rb_next(&pos->rb_node);
+ rb_erase(&pos->rb_node, &machines->guests);
+ machine__delete(pos);
+ }
+}
+
+int machines__create_kernel_maps(struct machines *machines, pid_t pid)
+{
+ struct machine *machine = machines__findnew(machines, pid);
+
+ if (machine == NULL)
+ return -1;
+
+ return machine__create_kernel_maps(machine);
+}
+
+int machine__load_kallsyms(struct machine *machine, const char *filename)
+{
+ struct map *map = machine__kernel_map(machine);
+ int ret = __dso__load_kallsyms(map->dso, filename, map, true);
+
+ if (ret > 0) {
+ dso__set_loaded(map->dso);
+ /*
+ * Since /proc/kallsyms will have multiple sessions for the
+ * kernel, with modules between them, fixup the end of all
+ * sections.
+ */
+ map_groups__fixup_end(&machine->kmaps);
+ }
+
+ return ret;
+}
+
+int machine__load_vmlinux_path(struct machine *machine)
+{
+ struct map *map = machine__kernel_map(machine);
+ int ret = dso__load_vmlinux_path(map->dso, map);
+
+ if (ret > 0)
+ dso__set_loaded(map->dso);
+
+ return ret;
+}
+
+static char *get_kernel_version(const char *root_dir)
+{
+ char version[PATH_MAX];
+ FILE *file;
+ char *name, *tmp;
+ const char *prefix = "Linux version ";
+
+ sprintf(version, "%s/proc/version", root_dir);
+ file = fopen(version, "r");
+ if (!file)
+ return NULL;
+
+ version[0] = '\0';
+ tmp = fgets(version, sizeof(version), file);
+ fclose(file);
+
+ name = strstr(version, prefix);
+ if (!name)
+ return NULL;
+ name += strlen(prefix);
+ tmp = strchr(name, ' ');
+ if (tmp)
+ *tmp = '\0';
+
+ return strdup(name);
+}
+
+static bool is_kmod_dso(struct dso *dso)
+{
+ return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE;
+}
+
+static int map_groups__set_module_path(struct map_groups *mg, const char *path,
+ struct kmod_path *m)
+{
+ char *long_name;
+ struct map *map = map_groups__find_by_name(mg, m->name);
+
+ if (map == NULL)
+ return 0;
+
+ long_name = strdup(path);
+ if (long_name == NULL)
+ return -ENOMEM;
+
+ dso__set_long_name(map->dso, long_name, true);
+ dso__kernel_module_get_build_id(map->dso, "");
+
+ /*
+ * Full name could reveal us kmod compression, so
+ * we need to update the symtab_type if needed.
+ */
+ if (m->comp && is_kmod_dso(map->dso)) {
+ map->dso->symtab_type++;
+ map->dso->comp = m->comp;
+ }
+
+ return 0;
+}
+
+static int map_groups__set_modules_path_dir(struct map_groups *mg,
+ const char *dir_name, int depth)
+{
+ struct dirent *dent;
+ DIR *dir = opendir(dir_name);
+ int ret = 0;
+
+ if (!dir) {
+ pr_debug("%s: cannot open %s dir\n", __func__, dir_name);
+ return -1;
+ }
+
+ while ((dent = readdir(dir)) != NULL) {
+ char path[PATH_MAX];
+ struct stat st;
+
+ /*sshfs might return bad dent->d_type, so we have to stat*/
+ snprintf(path, sizeof(path), "%s/%s", dir_name, dent->d_name);
+ if (stat(path, &st))
+ continue;
+
+ if (S_ISDIR(st.st_mode)) {
+ if (!strcmp(dent->d_name, ".") ||
+ !strcmp(dent->d_name, ".."))
+ continue;
+
+ /* Do not follow top-level source and build symlinks */
+ if (depth == 0) {
+ if (!strcmp(dent->d_name, "source") ||
+ !strcmp(dent->d_name, "build"))
+ continue;
+ }
+
+ ret = map_groups__set_modules_path_dir(mg, path,
+ depth + 1);
+ if (ret < 0)
+ goto out;
+ } else {
+ struct kmod_path m;
+
+ ret = kmod_path__parse_name(&m, dent->d_name);
+ if (ret)
+ goto out;
+
+ if (m.kmod)
+ ret = map_groups__set_module_path(mg, path, &m);
+
+ free(m.name);
+
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ closedir(dir);
+ return ret;
+}
+
+static int machine__set_modules_path(struct machine *machine)
+{
+ char *version;
+ char modules_path[PATH_MAX];
+
+ version = get_kernel_version(machine->root_dir);
+ if (!version)
+ return -1;
+
+ snprintf(modules_path, sizeof(modules_path), "%s/lib/modules/%s",
+ machine->root_dir, version);
+ free(version);
+
+ return map_groups__set_modules_path_dir(&machine->kmaps, modules_path, 0);
+}
+int __weak arch__fix_module_text_start(u64 *start __maybe_unused,
+ u64 *size __maybe_unused,
+ const char *name __maybe_unused)
+{
+ return 0;
+}
+
+static int machine__create_module(void *arg, const char *name, u64 start,
+ u64 size)
+{
+ struct machine *machine = arg;
+ struct map *map;
+
+ if (arch__fix_module_text_start(&start, &size, name) < 0)
+ return -1;
+
+ map = machine__findnew_module_map(machine, start, name);
+ if (map == NULL)
+ return -1;
+ map->end = start + size;
+
+ dso__kernel_module_get_build_id(map->dso, machine->root_dir);
+
+ return 0;
+}
+
+static int machine__create_modules(struct machine *machine)
+{
+ const char *modules;
+ char path[PATH_MAX];
+
+ if (machine__is_default_guest(machine)) {
+ modules = symbol_conf.default_guest_modules;
+ } else {
+ snprintf(path, PATH_MAX, "%s/proc/modules", machine->root_dir);
+ modules = path;
+ }
+
+ if (symbol__restricted_filename(modules, "/proc/modules"))
+ return -1;
+
+ if (modules__parse(modules, machine, machine__create_module))
+ return -1;
+
+ if (!machine__set_modules_path(machine))
+ return 0;
+
+ pr_debug("Problems setting modules path maps, continuing anyway...\n");
+
+ return 0;
+}
+
+static void machine__set_kernel_mmap(struct machine *machine,
+ u64 start, u64 end)
+{
+ machine->vmlinux_map->start = start;
+ machine->vmlinux_map->end = end;
+ /*
+ * Be a bit paranoid here, some perf.data file came with
+ * a zero sized synthesized MMAP event for the kernel.
+ */
+ if (start == 0 && end == 0)
+ machine->vmlinux_map->end = ~0ULL;
+}
+
+static void machine__update_kernel_mmap(struct machine *machine,
+ u64 start, u64 end)
+{
+ struct map *map = machine__kernel_map(machine);
+
+ map__get(map);
+ map_groups__remove(&machine->kmaps, map);
+
+ machine__set_kernel_mmap(machine, start, end);
+
+ map_groups__insert(&machine->kmaps, map);
+ map__put(map);
+}
+
+int machine__create_kernel_maps(struct machine *machine)
+{
+ struct dso *kernel = machine__get_kernel(machine);
+ const char *name = NULL;
+ struct map *map;
+ u64 addr = 0;
+ int ret;
+
+ if (kernel == NULL)
+ return -1;
+
+ ret = __machine__create_kernel_maps(machine, kernel);
+ if (ret < 0)
+ goto out_put;
+
+ if (symbol_conf.use_modules && machine__create_modules(machine) < 0) {
+ if (machine__is_host(machine))
+ pr_debug("Problems creating module maps, "
+ "continuing anyway...\n");
+ else
+ pr_debug("Problems creating module maps for guest %d, "
+ "continuing anyway...\n", machine->pid);
+ }
+
+ if (!machine__get_running_kernel_start(machine, &name, &addr)) {
+ if (name &&
+ map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map, name, addr)) {
+ machine__destroy_kernel_maps(machine);
+ ret = -1;
+ goto out_put;
+ }
+
+ /*
+ * we have a real start address now, so re-order the kmaps
+ * assume it's the last in the kmaps
+ */
+ machine__update_kernel_mmap(machine, addr, ~0ULL);
+ }
+
+ if (machine__create_extra_kernel_maps(machine, kernel))
+ pr_debug("Problems creating extra kernel maps, continuing anyway...\n");
+
+ /* update end address of the kernel map using adjacent module address */
+ map = map__next(machine__kernel_map(machine));
+ if (map)
+ machine__set_kernel_mmap(machine, addr, map->start);
+out_put:
+ dso__put(kernel);
+ return ret;
+}
+
+static bool machine__uses_kcore(struct machine *machine)
+{
+ struct dso *dso;
+
+ list_for_each_entry(dso, &machine->dsos.head, node) {
+ if (dso__is_kcore(dso))
+ return true;
+ }
+
+ return false;
+}
+
+static bool perf_event__is_extra_kernel_mmap(struct machine *machine,
+ union perf_event *event)
+{
+ return machine__is(machine, "x86_64") &&
+ is_entry_trampoline(event->mmap.filename);
+}
+
+static int machine__process_extra_kernel_map(struct machine *machine,
+ union perf_event *event)
+{
+ struct map *kernel_map = machine__kernel_map(machine);
+ struct dso *kernel = kernel_map ? kernel_map->dso : NULL;
+ struct extra_kernel_map xm = {
+ .start = event->mmap.start,
+ .end = event->mmap.start + event->mmap.len,
+ .pgoff = event->mmap.pgoff,
+ };
+
+ if (kernel == NULL)
+ return -1;
+
+ strlcpy(xm.name, event->mmap.filename, KMAP_NAME_LEN);
+
+ return machine__create_extra_kernel_map(machine, kernel, &xm);
+}
+
+static int machine__process_kernel_mmap_event(struct machine *machine,
+ union perf_event *event)
+{
+ struct map *map;
+ enum dso_kernel_type kernel_type;
+ bool is_kernel_mmap;
+
+ /* If we have maps from kcore then we do not need or want any others */
+ if (machine__uses_kcore(machine))
+ return 0;
+
+ if (machine__is_host(machine))
+ kernel_type = DSO_TYPE_KERNEL;
+ else
+ kernel_type = DSO_TYPE_GUEST_KERNEL;
+
+ is_kernel_mmap = memcmp(event->mmap.filename,
+ machine->mmap_name,
+ strlen(machine->mmap_name) - 1) == 0;
+ if (event->mmap.filename[0] == '/' ||
+ (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
+ map = machine__findnew_module_map(machine, event->mmap.start,
+ event->mmap.filename);
+ if (map == NULL)
+ goto out_problem;
+
+ map->end = map->start + event->mmap.len;
+ } else if (is_kernel_mmap) {
+ const char *symbol_name = (event->mmap.filename +
+ strlen(machine->mmap_name));
+ /*
+ * Should be there already, from the build-id table in
+ * the header.
+ */
+ struct dso *kernel = NULL;
+ struct dso *dso;
+
+ down_read(&machine->dsos.lock);
+
+ list_for_each_entry(dso, &machine->dsos.head, node) {
+
+ /*
+ * The cpumode passed to is_kernel_module is not the
+ * cpumode of *this* event. If we insist on passing
+ * correct cpumode to is_kernel_module, we should
+ * record the cpumode when we adding this dso to the
+ * linked list.
+ *
+ * However we don't really need passing correct
+ * cpumode. We know the correct cpumode must be kernel
+ * mode (if not, we should not link it onto kernel_dsos
+ * list).
+ *
+ * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN.
+ * is_kernel_module() treats it as a kernel cpumode.
+ */
+
+ if (!dso->kernel ||
+ is_kernel_module(dso->long_name,
+ PERF_RECORD_MISC_CPUMODE_UNKNOWN))
+ continue;
+
+
+ kernel = dso;
+ break;
+ }
+
+ up_read(&machine->dsos.lock);
+
+ if (kernel == NULL)
+ kernel = machine__findnew_dso(machine, machine->mmap_name);
+ if (kernel == NULL)
+ goto out_problem;
+
+ kernel->kernel = kernel_type;
+ if (__machine__create_kernel_maps(machine, kernel) < 0) {
+ dso__put(kernel);
+ goto out_problem;
+ }
+
+ if (strstr(kernel->long_name, "vmlinux"))
+ dso__set_short_name(kernel, "[kernel.vmlinux]", false);
+
+ machine__update_kernel_mmap(machine, event->mmap.start,
+ event->mmap.start + event->mmap.len);
+
+ /*
+ * Avoid using a zero address (kptr_restrict) for the ref reloc
+ * symbol. Effectively having zero here means that at record
+ * time /proc/sys/kernel/kptr_restrict was non zero.
+ */
+ if (event->mmap.pgoff != 0) {
+ map__set_kallsyms_ref_reloc_sym(machine->vmlinux_map,
+ symbol_name,
+ event->mmap.pgoff);
+ }
+
+ if (machine__is_default_guest(machine)) {
+ /*
+ * preload dso of guest kernel and modules
+ */
+ dso__load(kernel, machine__kernel_map(machine));
+ }
+ } else if (perf_event__is_extra_kernel_mmap(machine, event)) {
+ return machine__process_extra_kernel_map(machine, event);
+ }
+ return 0;
+out_problem:
+ return -1;
+}
+
+int machine__process_mmap2_event(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct thread *thread;
+ struct map *map;
+ int ret = 0;
+
+ if (dump_trace)
+ perf_event__fprintf_mmap2(event, stdout);
+
+ if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+ sample->cpumode == PERF_RECORD_MISC_KERNEL) {
+ ret = machine__process_kernel_mmap_event(machine, event);
+ if (ret < 0)
+ goto out_problem;
+ return 0;
+ }
+
+ thread = machine__findnew_thread(machine, event->mmap2.pid,
+ event->mmap2.tid);
+ if (thread == NULL)
+ goto out_problem;
+
+ map = map__new(machine, event->mmap2.start,
+ event->mmap2.len, event->mmap2.pgoff,
+ event->mmap2.maj,
+ event->mmap2.min, event->mmap2.ino,
+ event->mmap2.ino_generation,
+ event->mmap2.prot,
+ event->mmap2.flags,
+ event->mmap2.filename, thread);
+
+ if (map == NULL)
+ goto out_problem_map;
+
+ ret = thread__insert_map(thread, map);
+ if (ret)
+ goto out_problem_insert;
+
+ thread__put(thread);
+ map__put(map);
+ return 0;
+
+out_problem_insert:
+ map__put(map);
+out_problem_map:
+ thread__put(thread);
+out_problem:
+ dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n");
+ return 0;
+}
+
+int machine__process_mmap_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct thread *thread;
+ struct map *map;
+ u32 prot = 0;
+ int ret = 0;
+
+ if (dump_trace)
+ perf_event__fprintf_mmap(event, stdout);
+
+ if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+ sample->cpumode == PERF_RECORD_MISC_KERNEL) {
+ ret = machine__process_kernel_mmap_event(machine, event);
+ if (ret < 0)
+ goto out_problem;
+ return 0;
+ }
+
+ thread = machine__findnew_thread(machine, event->mmap.pid,
+ event->mmap.tid);
+ if (thread == NULL)
+ goto out_problem;
+
+ if (!(event->header.misc & PERF_RECORD_MISC_MMAP_DATA))
+ prot = PROT_EXEC;
+
+ map = map__new(machine, event->mmap.start,
+ event->mmap.len, event->mmap.pgoff,
+ 0, 0, 0, 0, prot, 0,
+ event->mmap.filename,
+ thread);
+
+ if (map == NULL)
+ goto out_problem_map;
+
+ ret = thread__insert_map(thread, map);
+ if (ret)
+ goto out_problem_insert;
+
+ thread__put(thread);
+ map__put(map);
+ return 0;
+
+out_problem_insert:
+ map__put(map);
+out_problem_map:
+ thread__put(thread);
+out_problem:
+ dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
+ return 0;
+}
+
+static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
+{
+ struct threads *threads = machine__threads(machine, th->tid);
+
+ if (threads->last_match == th)
+ threads__set_last_match(threads, NULL);
+
+ BUG_ON(refcount_read(&th->refcnt) == 0);
+ if (lock)
+ down_write(&threads->lock);
+ rb_erase_init(&th->rb_node, &threads->entries);
+ RB_CLEAR_NODE(&th->rb_node);
+ --threads->nr;
+ /*
+ * Move it first to the dead_threads list, then drop the reference,
+ * if this is the last reference, then the thread__delete destructor
+ * will be called and we will remove it from the dead_threads list.
+ */
+ list_add_tail(&th->node, &threads->dead);
+ if (lock)
+ up_write(&threads->lock);
+ thread__put(th);
+}
+
+void machine__remove_thread(struct machine *machine, struct thread *th)
+{
+ return __machine__remove_thread(machine, th, true);
+}
+
+int machine__process_fork_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__find_thread(machine,
+ event->fork.pid,
+ event->fork.tid);
+ struct thread *parent = machine__findnew_thread(machine,
+ event->fork.ppid,
+ event->fork.ptid);
+ int err = 0;
+
+ if (dump_trace)
+ perf_event__fprintf_task(event, stdout);
+
+ /*
+ * There may be an existing thread that is not actually the parent,
+ * either because we are processing events out of order, or because the
+ * (fork) event that would have removed the thread was lost. Assume the
+ * latter case and continue on as best we can.
+ */
+ if (parent->pid_ != (pid_t)event->fork.ppid) {
+ dump_printf("removing erroneous parent thread %d/%d\n",
+ parent->pid_, parent->tid);
+ machine__remove_thread(machine, parent);
+ thread__put(parent);
+ parent = machine__findnew_thread(machine, event->fork.ppid,
+ event->fork.ptid);
+ }
+
+ /* if a thread currently exists for the thread id remove it */
+ if (thread != NULL) {
+ machine__remove_thread(machine, thread);
+ thread__put(thread);
+ }
+
+ thread = machine__findnew_thread(machine, event->fork.pid,
+ event->fork.tid);
+
+ if (thread == NULL || parent == NULL ||
+ thread__fork(thread, parent, sample->time) < 0) {
+ dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
+ err = -1;
+ }
+ thread__put(thread);
+ thread__put(parent);
+
+ return err;
+}
+
+int machine__process_exit_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct thread *thread = machine__find_thread(machine,
+ event->fork.pid,
+ event->fork.tid);
+
+ if (dump_trace)
+ perf_event__fprintf_task(event, stdout);
+
+ if (thread != NULL) {
+ thread__exited(thread);
+ thread__put(thread);
+ }
+
+ return 0;
+}
+
+int machine__process_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample)
+{
+ int ret;
+
+ switch (event->header.type) {
+ case PERF_RECORD_COMM:
+ ret = machine__process_comm_event(machine, event, sample); break;
+ case PERF_RECORD_MMAP:
+ ret = machine__process_mmap_event(machine, event, sample); break;
+ case PERF_RECORD_NAMESPACES:
+ ret = machine__process_namespaces_event(machine, event, sample); break;
+ case PERF_RECORD_MMAP2:
+ ret = machine__process_mmap2_event(machine, event, sample); break;
+ case PERF_RECORD_FORK:
+ ret = machine__process_fork_event(machine, event, sample); break;
+ case PERF_RECORD_EXIT:
+ ret = machine__process_exit_event(machine, event, sample); break;
+ case PERF_RECORD_LOST:
+ ret = machine__process_lost_event(machine, event, sample); break;
+ case PERF_RECORD_AUX:
+ ret = machine__process_aux_event(machine, event); break;
+ case PERF_RECORD_ITRACE_START:
+ ret = machine__process_itrace_start_event(machine, event); break;
+ case PERF_RECORD_LOST_SAMPLES:
+ ret = machine__process_lost_samples_event(machine, event, sample); break;
+ case PERF_RECORD_SWITCH:
+ case PERF_RECORD_SWITCH_CPU_WIDE:
+ ret = machine__process_switch_event(machine, event); break;
+ default:
+ ret = -1;
+ break;
+ }
+
+ return ret;
+}
+
+static bool symbol__match_regex(struct symbol *sym, regex_t *regex)
+{
+ if (!regexec(regex, sym->name, 0, NULL, 0))
+ return 1;
+ return 0;
+}
+
+static void ip__resolve_ams(struct thread *thread,
+ struct addr_map_symbol *ams,
+ u64 ip)
+{
+ struct addr_location al;
+
+ memset(&al, 0, sizeof(al));
+ /*
+ * We cannot use the header.misc hint to determine whether a
+ * branch stack address is user, kernel, guest, hypervisor.
+ * Branches may straddle the kernel/user/hypervisor boundaries.
+ * Thus, we have to try consecutively until we find a match
+ * or else, the symbol is unknown
+ */
+ thread__find_cpumode_addr_location(thread, ip, &al);
+
+ ams->addr = ip;
+ ams->al_addr = al.addr;
+ ams->sym = al.sym;
+ ams->map = al.map;
+ ams->phys_addr = 0;
+}
+
+static void ip__resolve_data(struct thread *thread,
+ u8 m, struct addr_map_symbol *ams,
+ u64 addr, u64 phys_addr)
+{
+ struct addr_location al;
+
+ memset(&al, 0, sizeof(al));
+
+ thread__find_symbol(thread, m, addr, &al);
+
+ ams->addr = addr;
+ ams->al_addr = al.addr;
+ ams->sym = al.sym;
+ ams->map = al.map;
+ ams->phys_addr = phys_addr;
+}
+
+struct mem_info *sample__resolve_mem(struct perf_sample *sample,
+ struct addr_location *al)
+{
+ struct mem_info *mi = mem_info__new();
+
+ if (!mi)
+ return NULL;
+
+ ip__resolve_ams(al->thread, &mi->iaddr, sample->ip);
+ ip__resolve_data(al->thread, al->cpumode, &mi->daddr,
+ sample->addr, sample->phys_addr);
+ mi->data_src.val = sample->data_src;
+
+ return mi;
+}
+
+static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip)
+{
+ char *srcline = NULL;
+
+ if (!map || callchain_param.key == CCKEY_FUNCTION)
+ return srcline;
+
+ srcline = srcline__tree_find(&map->dso->srclines, ip);
+ if (!srcline) {
+ bool show_sym = false;
+ bool show_addr = callchain_param.key == CCKEY_ADDRESS;
+
+ srcline = get_srcline(map->dso, map__rip_2objdump(map, ip),
+ sym, show_sym, show_addr, ip);
+ srcline__tree_insert(&map->dso->srclines, ip, srcline);
+ }
+
+ return srcline;
+}
+
+struct iterations {
+ int nr_loop_iter;
+ u64 cycles;
+};
+
+static int add_callchain_ip(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u8 *cpumode,
+ u64 ip,
+ bool branch,
+ struct branch_flags *flags,
+ struct iterations *iter,
+ u64 branch_from)
+{
+ struct addr_location al;
+ int nr_loop_iter = 0;
+ u64 iter_cycles = 0;
+ const char *srcline = NULL;
+
+ al.filtered = 0;
+ al.sym = NULL;
+ al.srcline = NULL;
+ if (!cpumode) {
+ thread__find_cpumode_addr_location(thread, ip, &al);
+ } else {
+ if (ip >= PERF_CONTEXT_MAX) {
+ switch (ip) {
+ case PERF_CONTEXT_HV:
+ *cpumode = PERF_RECORD_MISC_HYPERVISOR;
+ break;
+ case PERF_CONTEXT_KERNEL:
+ *cpumode = PERF_RECORD_MISC_KERNEL;
+ break;
+ case PERF_CONTEXT_USER:
+ *cpumode = PERF_RECORD_MISC_USER;
+ break;
+ default:
+ pr_debug("invalid callchain context: "
+ "%"PRId64"\n", (s64) ip);
+ /*
+ * It seems the callchain is corrupted.
+ * Discard all.
+ */
+ callchain_cursor_reset(cursor);
+ return 1;
+ }
+ return 0;
+ }
+ thread__find_symbol(thread, *cpumode, ip, &al);
+ }
+
+ if (al.sym != NULL) {
+ if (perf_hpp_list.parent && !*parent &&
+ symbol__match_regex(al.sym, &parent_regex))
+ *parent = al.sym;
+ else if (have_ignore_callees && root_al &&
+ symbol__match_regex(al.sym, &ignore_callees_regex)) {
+ /* Treat this symbol as the root,
+ forgetting its callees. */
+ *root_al = al;
+ callchain_cursor_reset(cursor);
+ }
+ }
+
+ if (symbol_conf.hide_unresolved && al.sym == NULL)
+ return 0;
+
+ if (iter) {
+ nr_loop_iter = iter->nr_loop_iter;
+ iter_cycles = iter->cycles;
+ }
+
+ srcline = callchain_srcline(al.map, al.sym, al.addr);
+ return callchain_cursor_append(cursor, ip, al.map, al.sym,
+ branch, flags, nr_loop_iter,
+ iter_cycles, branch_from, srcline);
+}
+
+struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
+ struct addr_location *al)
+{
+ unsigned int i;
+ const struct branch_stack *bs = sample->branch_stack;
+ struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info));
+
+ if (!bi)
+ return NULL;
+
+ for (i = 0; i < bs->nr; i++) {
+ ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to);
+ ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from);
+ bi[i].flags = bs->entries[i].flags;
+ }
+ return bi;
+}
+
+static void save_iterations(struct iterations *iter,
+ struct branch_entry *be, int nr)
+{
+ int i;
+
+ iter->nr_loop_iter++;
+ iter->cycles = 0;
+
+ for (i = 0; i < nr; i++)
+ iter->cycles += be[i].flags.cycles;
+}
+
+#define CHASHSZ 127
+#define CHASHBITS 7
+#define NO_ENTRY 0xff
+
+#define PERF_MAX_BRANCH_DEPTH 127
+
+/* Remove loops. */
+static int remove_loops(struct branch_entry *l, int nr,
+ struct iterations *iter)
+{
+ int i, j, off;
+ unsigned char chash[CHASHSZ];
+
+ memset(chash, NO_ENTRY, sizeof(chash));
+
+ BUG_ON(PERF_MAX_BRANCH_DEPTH > 255);
+
+ for (i = 0; i < nr; i++) {
+ int h = hash_64(l[i].from, CHASHBITS) % CHASHSZ;
+
+ /* no collision handling for now */
+ if (chash[h] == NO_ENTRY) {
+ chash[h] = i;
+ } else if (l[chash[h]].from == l[i].from) {
+ bool is_loop = true;
+ /* check if it is a real loop */
+ off = 0;
+ for (j = chash[h]; j < i && i + off < nr; j++, off++)
+ if (l[j].from != l[i + off].from) {
+ is_loop = false;
+ break;
+ }
+ if (is_loop) {
+ j = nr - (i + off);
+ if (j > 0) {
+ save_iterations(iter + i + off,
+ l + i, off);
+
+ memmove(iter + i, iter + i + off,
+ j * sizeof(*iter));
+
+ memmove(l + i, l + i + off,
+ j * sizeof(*l));
+ }
+
+ nr -= off;
+ }
+ }
+ }
+ return nr;
+}
+
+/*
+ * Recolve LBR callstack chain sample
+ * Return:
+ * 1 on success get LBR callchain information
+ * 0 no available LBR callchain information, should try fp
+ * negative error code on other errors.
+ */
+static int resolve_lbr_callchain_sample(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack)
+{
+ struct ip_callchain *chain = sample->callchain;
+ int chain_nr = min(max_stack, (int)chain->nr), i;
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ u64 ip, branch_from = 0;
+
+ for (i = 0; i < chain_nr; i++) {
+ if (chain->ips[i] == PERF_CONTEXT_USER)
+ break;
+ }
+
+ /* LBR only affects the user callchain */
+ if (i != chain_nr) {
+ struct branch_stack *lbr_stack = sample->branch_stack;
+ int lbr_nr = lbr_stack->nr, j, k;
+ bool branch;
+ struct branch_flags *flags;
+ /*
+ * LBR callstack can only get user call chain.
+ * The mix_chain_nr is kernel call chain
+ * number plus LBR user call chain number.
+ * i is kernel call chain number,
+ * 1 is PERF_CONTEXT_USER,
+ * lbr_nr + 1 is the user call chain number.
+ * For details, please refer to the comments
+ * in callchain__printf
+ */
+ int mix_chain_nr = i + 1 + lbr_nr + 1;
+
+ for (j = 0; j < mix_chain_nr; j++) {
+ int err;
+ branch = false;
+ flags = NULL;
+
+ if (callchain_param.order == ORDER_CALLEE) {
+ if (j < i + 1)
+ ip = chain->ips[j];
+ else if (j > i + 1) {
+ k = j - i - 2;
+ ip = lbr_stack->entries[k].from;
+ branch = true;
+ flags = &lbr_stack->entries[k].flags;
+ } else {
+ ip = lbr_stack->entries[0].to;
+ branch = true;
+ flags = &lbr_stack->entries[0].flags;
+ branch_from =
+ lbr_stack->entries[0].from;
+ }
+ } else {
+ if (j < lbr_nr) {
+ k = lbr_nr - j - 1;
+ ip = lbr_stack->entries[k].from;
+ branch = true;
+ flags = &lbr_stack->entries[k].flags;
+ }
+ else if (j > lbr_nr)
+ ip = chain->ips[i + 1 - (j - lbr_nr)];
+ else {
+ ip = lbr_stack->entries[0].to;
+ branch = true;
+ flags = &lbr_stack->entries[0].flags;
+ branch_from =
+ lbr_stack->entries[0].from;
+ }
+ }
+
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ branch, flags, NULL,
+ branch_from);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+ return 1;
+ }
+
+ return 0;
+}
+
+static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u8 *cpumode, int ent)
+{
+ int err = 0;
+
+ while (--ent >= 0) {
+ u64 ip = chain->ips[ent];
+
+ if (ip >= PERF_CONTEXT_MAX) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, cpumode, ip,
+ false, NULL, NULL, 0);
+ break;
+ }
+ }
+ return err;
+}
+
+static int thread__resolve_callchain_sample(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack)
+{
+ struct branch_stack *branch = sample->branch_stack;
+ struct ip_callchain *chain = sample->callchain;
+ int chain_nr = 0;
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ int i, j, err, nr_entries;
+ int skip_idx = -1;
+ int first_call = 0;
+
+ if (chain)
+ chain_nr = chain->nr;
+
+ if (perf_evsel__has_branch_callstack(evsel)) {
+ err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
+ root_al, max_stack);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+
+ /*
+ * Based on DWARF debug information, some architectures skip
+ * a callchain entry saved by the kernel.
+ */
+ skip_idx = arch_skip_callchain_idx(thread, chain);
+
+ /*
+ * Add branches to call stack for easier browsing. This gives
+ * more context for a sample than just the callers.
+ *
+ * This uses individual histograms of paths compared to the
+ * aggregated histograms the normal LBR mode uses.
+ *
+ * Limitations for now:
+ * - No extra filters
+ * - No annotations (should annotate somehow)
+ */
+
+ if (branch && callchain_param.branch_callstack) {
+ int nr = min(max_stack, (int)branch->nr);
+ struct branch_entry be[nr];
+ struct iterations iter[nr];
+
+ if (branch->nr > PERF_MAX_BRANCH_DEPTH) {
+ pr_warning("corrupted branch chain. skipping...\n");
+ goto check_calls;
+ }
+
+ for (i = 0; i < nr; i++) {
+ if (callchain_param.order == ORDER_CALLEE) {
+ be[i] = branch->entries[i];
+
+ if (chain == NULL)
+ continue;
+
+ /*
+ * Check for overlap into the callchain.
+ * The return address is one off compared to
+ * the branch entry. To adjust for this
+ * assume the calling instruction is not longer
+ * than 8 bytes.
+ */
+ if (i == skip_idx ||
+ chain->ips[first_call] >= PERF_CONTEXT_MAX)
+ first_call++;
+ else if (be[i].from < chain->ips[first_call] &&
+ be[i].from >= chain->ips[first_call] - 8)
+ first_call++;
+ } else
+ be[i] = branch->entries[branch->nr - i - 1];
+ }
+
+ memset(iter, 0, sizeof(struct iterations) * nr);
+ nr = remove_loops(be, nr, iter);
+
+ for (i = 0; i < nr; i++) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al,
+ NULL, be[i].to,
+ true, &be[i].flags,
+ NULL, be[i].from);
+
+ if (!err)
+ err = add_callchain_ip(thread, cursor, parent, root_al,
+ NULL, be[i].from,
+ true, &be[i].flags,
+ &iter[i], 0);
+ if (err == -EINVAL)
+ break;
+ if (err)
+ return err;
+ }
+
+ if (chain_nr == 0)
+ return 0;
+
+ chain_nr -= nr;
+ }
+
+check_calls:
+ if (chain && callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent, root_al,
+ &cpumode, chain->nr - first_call);
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+ for (i = first_call, nr_entries = 0;
+ i < chain_nr && nr_entries < max_stack; i++) {
+ u64 ip;
+
+ if (callchain_param.order == ORDER_CALLEE)
+ j = i;
+ else
+ j = chain->nr - i - 1;
+
+#ifdef HAVE_SKIP_CALLCHAIN_IDX
+ if (j == skip_idx)
+ continue;
+#endif
+ ip = chain->ips[j];
+ if (ip < PERF_CONTEXT_MAX)
+ ++nr_entries;
+ else if (callchain_param.order != ORDER_CALLEE) {
+ err = find_prev_cpumode(chain, thread, cursor, parent,
+ root_al, &cpumode, j);
+ if (err)
+ return (err < 0) ? err : 0;
+ continue;
+ }
+
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ false, NULL, NULL, 0);
+
+ if (err)
+ return (err < 0) ? err : 0;
+ }
+
+ return 0;
+}
+
+static int append_inlines(struct callchain_cursor *cursor,
+ struct map *map, struct symbol *sym, u64 ip)
+{
+ struct inline_node *inline_node;
+ struct inline_list *ilist;
+ u64 addr;
+ int ret = 1;
+
+ if (!symbol_conf.inline_name || !map || !sym)
+ return ret;
+
+ addr = map__map_ip(map, ip);
+ addr = map__rip_2objdump(map, addr);
+
+ inline_node = inlines__tree_find(&map->dso->inlined_nodes, addr);
+ if (!inline_node) {
+ inline_node = dso__parse_addr_inlines(map->dso, addr, sym);
+ if (!inline_node)
+ return ret;
+ inlines__tree_insert(&map->dso->inlined_nodes, inline_node);
+ }
+
+ list_for_each_entry(ilist, &inline_node->val, list) {
+ ret = callchain_cursor_append(cursor, ip, map,
+ ilist->symbol, false,
+ NULL, 0, 0, 0, ilist->srcline);
+
+ if (ret != 0)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int unwind_entry(struct unwind_entry *entry, void *arg)
+{
+ struct callchain_cursor *cursor = arg;
+ const char *srcline = NULL;
+ u64 addr = entry->ip;
+
+ if (symbol_conf.hide_unresolved && entry->sym == NULL)
+ return 0;
+
+ if (append_inlines(cursor, entry->map, entry->sym, entry->ip) == 0)
+ return 0;
+
+ /*
+ * Convert entry->ip from a virtual address to an offset in
+ * its corresponding binary.
+ */
+ if (entry->map)
+ addr = map__map_ip(entry->map, entry->ip);
+
+ srcline = callchain_srcline(entry->map, entry->sym, addr);
+ return callchain_cursor_append(cursor, entry->ip,
+ entry->map, entry->sym,
+ false, NULL, 0, 0, 0, srcline);
+}
+
+static int thread__resolve_callchain_unwind(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ int max_stack)
+{
+ /* Can we do dwarf post unwind? */
+ if (!((evsel->attr.sample_type & PERF_SAMPLE_REGS_USER) &&
+ (evsel->attr.sample_type & PERF_SAMPLE_STACK_USER)))
+ return 0;
+
+ /* Bail out if nothing was captured. */
+ if ((!sample->user_regs.regs) ||
+ (!sample->user_stack.size))
+ return 0;
+
+ return unwind__get_entries(unwind_entry, cursor,
+ thread, sample, max_stack);
+}
+
+int thread__resolve_callchain(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack)
+{
+ int ret = 0;
+
+ callchain_cursor_reset(cursor);
+
+ if (callchain_param.order == ORDER_CALLEE) {
+ ret = thread__resolve_callchain_sample(thread, cursor,
+ evsel, sample,
+ parent, root_al,
+ max_stack);
+ if (ret)
+ return ret;
+ ret = thread__resolve_callchain_unwind(thread, cursor,
+ evsel, sample,
+ max_stack);
+ } else {
+ ret = thread__resolve_callchain_unwind(thread, cursor,
+ evsel, sample,
+ max_stack);
+ if (ret)
+ return ret;
+ ret = thread__resolve_callchain_sample(thread, cursor,
+ evsel, sample,
+ parent, root_al,
+ max_stack);
+ }
+
+ return ret;
+}
+
+int machine__for_each_thread(struct machine *machine,
+ int (*fn)(struct thread *thread, void *p),
+ void *priv)
+{
+ struct threads *threads;
+ struct rb_node *nd;
+ struct thread *thread;
+ int rc = 0;
+ int i;
+
+ for (i = 0; i < THREADS__TABLE_SIZE; i++) {
+ threads = &machine->threads[i];
+ for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
+ thread = rb_entry(nd, struct thread, rb_node);
+ rc = fn(thread, priv);
+ if (rc != 0)
+ return rc;
+ }
+
+ list_for_each_entry(thread, &threads->dead, node) {
+ rc = fn(thread, priv);
+ if (rc != 0)
+ return rc;
+ }
+ }
+ return rc;
+}
+
+int machines__for_each_thread(struct machines *machines,
+ int (*fn)(struct thread *thread, void *p),
+ void *priv)
+{
+ struct rb_node *nd;
+ int rc = 0;
+
+ rc = machine__for_each_thread(&machines->host, fn, priv);
+ if (rc != 0)
+ return rc;
+
+ for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+ struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+ rc = machine__for_each_thread(machine, fn, priv);
+ if (rc != 0)
+ return rc;
+ }
+ return rc;
+}
+
+int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+ struct target *target, struct thread_map *threads,
+ perf_event__handler_t process, bool data_mmap,
+ unsigned int proc_map_timeout,
+ unsigned int nr_threads_synthesize)
+{
+ if (target__has_task(target))
+ return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
+ else if (target__has_cpu(target))
+ return perf_event__synthesize_threads(tool, process,
+ machine, data_mmap,
+ proc_map_timeout,
+ nr_threads_synthesize);
+ /* command specified */
+ return 0;
+}
+
+pid_t machine__get_current_tid(struct machine *machine, int cpu)
+{
+ if (cpu < 0 || cpu >= MAX_NR_CPUS || !machine->current_tid)
+ return -1;
+
+ return machine->current_tid[cpu];
+}
+
+int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
+ pid_t tid)
+{
+ struct thread *thread;
+
+ if (cpu < 0)
+ return -EINVAL;
+
+ if (!machine->current_tid) {
+ int i;
+
+ machine->current_tid = calloc(MAX_NR_CPUS, sizeof(pid_t));
+ if (!machine->current_tid)
+ return -ENOMEM;
+ for (i = 0; i < MAX_NR_CPUS; i++)
+ machine->current_tid[i] = -1;
+ }
+
+ if (cpu >= MAX_NR_CPUS) {
+ pr_err("Requested CPU %d too large. ", cpu);
+ pr_err("Consider raising MAX_NR_CPUS\n");
+ return -EINVAL;
+ }
+
+ machine->current_tid[cpu] = tid;
+
+ thread = machine__findnew_thread(machine, pid, tid);
+ if (!thread)
+ return -ENOMEM;
+
+ thread->cpu = cpu;
+ thread__put(thread);
+
+ return 0;
+}
+
+/*
+ * Compares the raw arch string. N.B. see instead perf_env__arch() if a
+ * normalized arch is needed.
+ */
+bool machine__is(struct machine *machine, const char *arch)
+{
+ return machine && !strcmp(perf_env__raw_arch(machine->env), arch);
+}
+
+int machine__nr_cpus_avail(struct machine *machine)
+{
+ return machine ? perf_env__nr_cpus_avail(machine->env) : 0;
+}
+
+int machine__get_kernel_start(struct machine *machine)
+{
+ struct map *map = machine__kernel_map(machine);
+ int err = 0;
+
+ /*
+ * The only addresses above 2^63 are kernel addresses of a 64-bit
+ * kernel. Note that addresses are unsigned so that on a 32-bit system
+ * all addresses including kernel addresses are less than 2^32. In
+ * that case (32-bit system), if the kernel mapping is unknown, all
+ * addresses will be assumed to be in user space - see
+ * machine__kernel_ip().
+ */
+ machine->kernel_start = 1ULL << 63;
+ if (map) {
+ err = map__load(map);
+ /*
+ * On x86_64, PTI entry trampolines are less than the
+ * start of kernel text, but still above 2^63. So leave
+ * kernel_start = 1ULL << 63 for x86_64.
+ */
+ if (!err && !machine__is(machine, "x86_64"))
+ machine->kernel_start = map->start;
+ }
+ return err;
+}
+
+u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr)
+{
+ u8 addr_cpumode = cpumode;
+ bool kernel_ip;
+
+ if (!machine->single_address_space)
+ goto out;
+
+ kernel_ip = machine__kernel_ip(machine, addr);
+ switch (cpumode) {
+ case PERF_RECORD_MISC_KERNEL:
+ case PERF_RECORD_MISC_USER:
+ addr_cpumode = kernel_ip ? PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+ break;
+ case PERF_RECORD_MISC_GUEST_KERNEL:
+ case PERF_RECORD_MISC_GUEST_USER:
+ addr_cpumode = kernel_ip ? PERF_RECORD_MISC_GUEST_KERNEL :
+ PERF_RECORD_MISC_GUEST_USER;
+ break;
+ default:
+ break;
+ }
+out:
+ return addr_cpumode;
+}
+
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
+{
+ return dsos__findnew(&machine->dsos, filename);
+}
+
+char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp)
+{
+ struct machine *machine = vmachine;
+ struct map *map;
+ struct symbol *sym = machine__find_kernel_symbol(machine, *addrp, &map);
+
+ if (sym == NULL)
+ return NULL;
+
+ *modp = __map__is_kmodule(map) ? (char *)map->dso->short_name : NULL;
+ *addrp = map->unmap_ip(map, sym->start);
+ return sym->name;
+}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
new file mode 100644
index 000000000..6f3767808
--- /dev/null
+++ b/tools/perf/util/machine.h
@@ -0,0 +1,296 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MACHINE_H
+#define __PERF_MACHINE_H
+
+#include <sys/types.h>
+#include <linux/rbtree.h>
+#include "map.h"
+#include "dso.h"
+#include "event.h"
+#include "rwsem.h"
+
+struct addr_location;
+struct branch_stack;
+struct perf_evsel;
+struct perf_sample;
+struct symbol;
+struct thread;
+union perf_event;
+
+/* Native host kernel uses -1 as pid index in machine */
+#define HOST_KERNEL_ID (-1)
+#define DEFAULT_GUEST_KERNEL_ID (0)
+
+extern const char *ref_reloc_sym_names[];
+
+struct vdso_info;
+
+#define THREADS__TABLE_BITS 8
+#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS)
+
+struct threads {
+ struct rb_root entries;
+ struct rw_semaphore lock;
+ unsigned int nr;
+ struct list_head dead;
+ struct thread *last_match;
+};
+
+struct machine {
+ struct rb_node rb_node;
+ pid_t pid;
+ u16 id_hdr_size;
+ bool comm_exec;
+ bool kptr_restrict_warned;
+ bool single_address_space;
+ char *root_dir;
+ char *mmap_name;
+ struct threads threads[THREADS__TABLE_SIZE];
+ struct vdso_info *vdso_info;
+ struct perf_env *env;
+ struct dsos dsos;
+ struct map_groups kmaps;
+ struct map *vmlinux_map;
+ u64 kernel_start;
+ pid_t *current_tid;
+ union { /* Tool specific area */
+ void *priv;
+ u64 db_id;
+ };
+ bool trampolines_mapped;
+};
+
+static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
+{
+ /* Cast it to handle tid == -1 */
+ return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
+}
+
+/*
+ * The main kernel (vmlinux) map
+ */
+static inline
+struct map *machine__kernel_map(struct machine *machine)
+{
+ return machine->vmlinux_map;
+}
+
+/*
+ * kernel (the one returned by machine__kernel_map()) plus kernel modules maps
+ */
+static inline
+struct maps *machine__kernel_maps(struct machine *machine)
+{
+ return &machine->kmaps.maps;
+}
+
+int machine__get_kernel_start(struct machine *machine);
+
+static inline u64 machine__kernel_start(struct machine *machine)
+{
+ if (!machine->kernel_start)
+ machine__get_kernel_start(machine);
+ return machine->kernel_start;
+}
+
+static inline bool machine__kernel_ip(struct machine *machine, u64 ip)
+{
+ u64 kernel_start = machine__kernel_start(machine);
+
+ return ip >= kernel_start;
+}
+
+u8 machine__addr_cpumode(struct machine *machine, u8 cpumode, u64 addr);
+
+struct thread *machine__find_thread(struct machine *machine, pid_t pid,
+ pid_t tid);
+struct comm *machine__thread_exec_comm(struct machine *machine,
+ struct thread *thread);
+
+int machine__process_comm_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_exit_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_fork_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_lost_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_lost_samples_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_aux_event(struct machine *machine,
+ union perf_event *event);
+int machine__process_itrace_start_event(struct machine *machine,
+ union perf_event *event);
+int machine__process_switch_event(struct machine *machine,
+ union perf_event *event);
+int machine__process_namespaces_event(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_mmap_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+int machine__process_event(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample);
+
+typedef void (*machine__process_t)(struct machine *machine, void *data);
+
+struct machines {
+ struct machine host;
+ struct rb_root guests;
+};
+
+void machines__init(struct machines *machines);
+void machines__exit(struct machines *machines);
+
+void machines__process_guests(struct machines *machines,
+ machine__process_t process, void *data);
+
+struct machine *machines__add(struct machines *machines, pid_t pid,
+ const char *root_dir);
+struct machine *machines__find_host(struct machines *machines);
+struct machine *machines__find(struct machines *machines, pid_t pid);
+struct machine *machines__findnew(struct machines *machines, pid_t pid);
+
+void machines__set_id_hdr_size(struct machines *machines, u16 id_hdr_size);
+void machines__set_comm_exec(struct machines *machines, bool comm_exec);
+
+struct machine *machine__new_host(void);
+struct machine *machine__new_kallsyms(void);
+int machine__init(struct machine *machine, const char *root_dir, pid_t pid);
+void machine__exit(struct machine *machine);
+void machine__delete_threads(struct machine *machine);
+void machine__delete(struct machine *machine);
+void machine__remove_thread(struct machine *machine, struct thread *th);
+
+struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
+ struct addr_location *al);
+struct mem_info *sample__resolve_mem(struct perf_sample *sample,
+ struct addr_location *al);
+
+struct callchain_cursor;
+
+int thread__resolve_callchain(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_evsel *evsel,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ int max_stack);
+
+/*
+ * Default guest kernel is defined by parameter --guestkallsyms
+ * and --guestmodules
+ */
+static inline bool machine__is_default_guest(struct machine *machine)
+{
+ return machine ? machine->pid == DEFAULT_GUEST_KERNEL_ID : false;
+}
+
+static inline bool machine__is_host(struct machine *machine)
+{
+ return machine ? machine->pid == HOST_KERNEL_ID : false;
+}
+
+bool machine__is(struct machine *machine, const char *arch);
+int machine__nr_cpus_avail(struct machine *machine);
+
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
+
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename);
+
+size_t machine__fprintf(struct machine *machine, FILE *fp);
+
+static inline
+struct symbol *machine__find_kernel_symbol(struct machine *machine, u64 addr,
+ struct map **mapp)
+{
+ return map_groups__find_symbol(&machine->kmaps, addr, mapp);
+}
+
+static inline
+struct symbol *machine__find_kernel_symbol_by_name(struct machine *machine,
+ const char *name,
+ struct map **mapp)
+{
+ return map_groups__find_symbol_by_name(&machine->kmaps, name, mapp);
+}
+
+struct map *machine__findnew_module_map(struct machine *machine, u64 start,
+ const char *filename);
+int arch__fix_module_text_start(u64 *start, u64 *size, const char *name);
+
+int machine__load_kallsyms(struct machine *machine, const char *filename);
+
+int machine__load_vmlinux_path(struct machine *machine);
+
+size_t machine__fprintf_dsos_buildid(struct machine *machine, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm);
+size_t machines__fprintf_dsos(struct machines *machines, FILE *fp);
+size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm);
+
+void machine__destroy_kernel_maps(struct machine *machine);
+int machine__create_kernel_maps(struct machine *machine);
+
+int machines__create_kernel_maps(struct machines *machines, pid_t pid);
+int machines__create_guest_kernel_maps(struct machines *machines);
+void machines__destroy_kernel_maps(struct machines *machines);
+
+size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
+
+int machine__for_each_thread(struct machine *machine,
+ int (*fn)(struct thread *thread, void *p),
+ void *priv);
+int machines__for_each_thread(struct machines *machines,
+ int (*fn)(struct thread *thread, void *p),
+ void *priv);
+
+int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
+ struct target *target, struct thread_map *threads,
+ perf_event__handler_t process, bool data_mmap,
+ unsigned int proc_map_timeout,
+ unsigned int nr_threads_synthesize);
+static inline
+int machine__synthesize_threads(struct machine *machine, struct target *target,
+ struct thread_map *threads, bool data_mmap,
+ unsigned int proc_map_timeout,
+ unsigned int nr_threads_synthesize)
+{
+ return __machine__synthesize_threads(machine, NULL, target, threads,
+ perf_event__process, data_mmap,
+ proc_map_timeout,
+ nr_threads_synthesize);
+}
+
+pid_t machine__get_current_tid(struct machine *machine, int cpu);
+int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
+ pid_t tid);
+/*
+ * For use with libtraceevent's tep_set_function_resolver()
+ */
+char *machine__resolve_kernel_addr(void *vmachine, unsigned long long *addrp, char **modp);
+
+void machine__get_kallsyms_filename(struct machine *machine, char *buf,
+ size_t bufsz);
+
+int machine__create_extra_kernel_maps(struct machine *machine,
+ struct dso *kernel);
+
+/* Kernel-space maps for symbols that are outside the main kernel map and module maps */
+struct extra_kernel_map {
+ u64 start;
+ u64 end;
+ u64 pgoff;
+ char name[KMAP_NAME_LEN];
+};
+
+int machine__create_extra_kernel_map(struct machine *machine,
+ struct dso *kernel,
+ struct extra_kernel_map *xm);
+
+int machine__map_x86_64_entry_trampolines(struct machine *machine,
+ struct dso *kernel);
+
+#endif /* __PERF_MACHINE_H */
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
new file mode 100644
index 000000000..769d11575
--- /dev/null
+++ b/tools/perf/util/map.c
@@ -0,0 +1,920 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "symbol.h"
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
+#include "map.h"
+#include "thread.h"
+#include "vdso.h"
+#include "build-id.h"
+#include "util.h"
+#include "debug.h"
+#include "machine.h"
+#include <linux/string.h>
+#include "srcline.h"
+#include "namespaces.h"
+#include "unwind.h"
+
+static void __maps__insert(struct maps *maps, struct map *map);
+
+static inline int is_anon_memory(const char *filename, u32 flags)
+{
+ return flags & MAP_HUGETLB ||
+ !strcmp(filename, "//anon") ||
+ !strncmp(filename, "/dev/zero", sizeof("/dev/zero") - 1) ||
+ !strncmp(filename, "/anon_hugepage", sizeof("/anon_hugepage") - 1);
+}
+
+static inline int is_no_dso_memory(const char *filename)
+{
+ return !strncmp(filename, "[stack", 6) ||
+ !strncmp(filename, "/SYSV",5) ||
+ !strcmp(filename, "[heap]");
+}
+
+static inline int is_android_lib(const char *filename)
+{
+ return !strncmp(filename, "/data/app-lib", 13) ||
+ !strncmp(filename, "/system/lib", 11);
+}
+
+static inline bool replace_android_lib(const char *filename, char *newfilename)
+{
+ const char *libname;
+ char *app_abi;
+ size_t app_abi_length, new_length;
+ size_t lib_length = 0;
+
+ libname = strrchr(filename, '/');
+ if (libname)
+ lib_length = strlen(libname);
+
+ app_abi = getenv("APP_ABI");
+ if (!app_abi)
+ return false;
+
+ app_abi_length = strlen(app_abi);
+
+ if (!strncmp(filename, "/data/app-lib", 13)) {
+ char *apk_path;
+
+ if (!app_abi_length)
+ return false;
+
+ new_length = 7 + app_abi_length + lib_length;
+
+ apk_path = getenv("APK_PATH");
+ if (apk_path) {
+ new_length += strlen(apk_path) + 1;
+ if (new_length > PATH_MAX)
+ return false;
+ snprintf(newfilename, new_length,
+ "%s/libs/%s/%s", apk_path, app_abi, libname);
+ } else {
+ if (new_length > PATH_MAX)
+ return false;
+ snprintf(newfilename, new_length,
+ "libs/%s/%s", app_abi, libname);
+ }
+ return true;
+ }
+
+ if (!strncmp(filename, "/system/lib/", 12)) {
+ char *ndk, *app;
+ const char *arch;
+ int ndk_length, app_length;
+
+ ndk = getenv("NDK_ROOT");
+ app = getenv("APP_PLATFORM");
+
+ if (!(ndk && app))
+ return false;
+
+ ndk_length = strlen(ndk);
+ app_length = strlen(app);
+
+ if (!(ndk_length && app_length && app_abi_length))
+ return false;
+
+ arch = !strncmp(app_abi, "arm", 3) ? "arm" :
+ !strncmp(app_abi, "mips", 4) ? "mips" :
+ !strncmp(app_abi, "x86", 3) ? "x86" : NULL;
+
+ if (!arch)
+ return false;
+
+ new_length = 27 + ndk_length +
+ app_length + lib_length
+ + strlen(arch);
+
+ if (new_length > PATH_MAX)
+ return false;
+ snprintf(newfilename, new_length,
+ "%.*s/platforms/%.*s/arch-%s/usr/lib/%s",
+ ndk_length, ndk, app_length, app, arch, libname);
+
+ return true;
+ }
+ return false;
+}
+
+void map__init(struct map *map, u64 start, u64 end, u64 pgoff, struct dso *dso)
+{
+ map->start = start;
+ map->end = end;
+ map->pgoff = pgoff;
+ map->reloc = 0;
+ map->dso = dso__get(dso);
+ map->map_ip = map__map_ip;
+ map->unmap_ip = map__unmap_ip;
+ RB_CLEAR_NODE(&map->rb_node);
+ map->groups = NULL;
+ map->erange_warned = false;
+ refcount_set(&map->refcnt, 1);
+}
+
+struct map *map__new(struct machine *machine, u64 start, u64 len,
+ u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
+ u64 ino_gen, u32 prot, u32 flags, char *filename,
+ struct thread *thread)
+{
+ struct map *map = malloc(sizeof(*map));
+ struct nsinfo *nsi = NULL;
+ struct nsinfo *nnsi;
+
+ if (map != NULL) {
+ char newfilename[PATH_MAX];
+ struct dso *dso;
+ int anon, no_dso, vdso, android;
+
+ android = is_android_lib(filename);
+ anon = is_anon_memory(filename, flags);
+ vdso = is_vdso_map(filename);
+ no_dso = is_no_dso_memory(filename);
+
+ map->maj = d_maj;
+ map->min = d_min;
+ map->ino = ino;
+ map->ino_generation = ino_gen;
+ map->prot = prot;
+ map->flags = flags;
+ nsi = nsinfo__get(thread->nsinfo);
+
+ if ((anon || no_dso) && nsi && (prot & PROT_EXEC)) {
+ snprintf(newfilename, sizeof(newfilename),
+ "/tmp/perf-%d.map", nsi->pid);
+ filename = newfilename;
+ }
+
+ if (android) {
+ if (replace_android_lib(filename, newfilename))
+ filename = newfilename;
+ }
+
+ if (vdso) {
+ /* The vdso maps are always on the host and not the
+ * container. Ensure that we don't use setns to look
+ * them up.
+ */
+ nnsi = nsinfo__copy(nsi);
+ if (nnsi) {
+ nsinfo__put(nsi);
+ nnsi->need_setns = false;
+ nsi = nnsi;
+ }
+ pgoff = 0;
+ dso = machine__findnew_vdso(machine, thread);
+ } else
+ dso = machine__findnew_dso(machine, filename);
+
+ if (dso == NULL)
+ goto out_delete;
+
+ map__init(map, start, start + len, pgoff, dso);
+
+ if (anon || no_dso) {
+ map->map_ip = map->unmap_ip = identity__map_ip;
+
+ /*
+ * Set memory without DSO as loaded. All map__find_*
+ * functions still return NULL, and we avoid the
+ * unnecessary map__load warning.
+ */
+ if (!(prot & PROT_EXEC))
+ dso__set_loaded(dso);
+ }
+ dso->nsinfo = nsi;
+ dso__put(dso);
+ }
+ return map;
+out_delete:
+ nsinfo__put(nsi);
+ free(map);
+ return NULL;
+}
+
+/*
+ * Constructor variant for modules (where we know from /proc/modules where
+ * they are loaded) and for vmlinux, where only after we load all the
+ * symbols we'll know where it starts and ends.
+ */
+struct map *map__new2(u64 start, struct dso *dso)
+{
+ struct map *map = calloc(1, (sizeof(*map) +
+ (dso->kernel ? sizeof(struct kmap) : 0)));
+ if (map != NULL) {
+ /*
+ * ->end will be filled after we load all the symbols
+ */
+ map__init(map, start, 0, 0, dso);
+ }
+
+ return map;
+}
+
+/*
+ * Use this and __map__is_kmodule() for map instances that are in
+ * machine->kmaps, and thus have map->groups->machine all properly set, to
+ * disambiguate between the kernel and modules.
+ *
+ * When the need arises, introduce map__is_{kernel,kmodule)() that
+ * checks (map->groups != NULL && map->groups->machine != NULL &&
+ * map->dso->kernel) before calling __map__is_{kernel,kmodule}())
+ */
+bool __map__is_kernel(const struct map *map)
+{
+ return machine__kernel_map(map->groups->machine) == map;
+}
+
+bool __map__is_extra_kernel_map(const struct map *map)
+{
+ struct kmap *kmap = __map__kmap((struct map *)map);
+
+ return kmap && kmap->name[0];
+}
+
+bool map__has_symbols(const struct map *map)
+{
+ return dso__has_symbols(map->dso);
+}
+
+static void map__exit(struct map *map)
+{
+ BUG_ON(!RB_EMPTY_NODE(&map->rb_node));
+ dso__zput(map->dso);
+}
+
+void map__delete(struct map *map)
+{
+ map__exit(map);
+ free(map);
+}
+
+void map__put(struct map *map)
+{
+ if (map && refcount_dec_and_test(&map->refcnt))
+ map__delete(map);
+}
+
+void map__fixup_start(struct map *map)
+{
+ struct rb_root *symbols = &map->dso->symbols;
+ struct rb_node *nd = rb_first(symbols);
+ if (nd != NULL) {
+ struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+ map->start = sym->start;
+ }
+}
+
+void map__fixup_end(struct map *map)
+{
+ struct rb_root *symbols = &map->dso->symbols;
+ struct rb_node *nd = rb_last(symbols);
+ if (nd != NULL) {
+ struct symbol *sym = rb_entry(nd, struct symbol, rb_node);
+ map->end = sym->end;
+ }
+}
+
+#define DSO__DELETED "(deleted)"
+
+int map__load(struct map *map)
+{
+ const char *name = map->dso->long_name;
+ int nr;
+
+ if (dso__loaded(map->dso))
+ return 0;
+
+ nr = dso__load(map->dso, map);
+ if (nr < 0) {
+ if (map->dso->has_build_id) {
+ char sbuild_id[SBUILD_ID_SIZE];
+
+ build_id__sprintf(map->dso->build_id,
+ sizeof(map->dso->build_id),
+ sbuild_id);
+ pr_warning("%s with build id %s not found",
+ name, sbuild_id);
+ } else
+ pr_warning("Failed to open %s", name);
+
+ pr_warning(", continuing without symbols\n");
+ return -1;
+ } else if (nr == 0) {
+#ifdef HAVE_LIBELF_SUPPORT
+ const size_t len = strlen(name);
+ const size_t real_len = len - sizeof(DSO__DELETED);
+
+ if (len > sizeof(DSO__DELETED) &&
+ strcmp(name + real_len + 1, DSO__DELETED) == 0) {
+ pr_warning("%.*s was updated (is prelink enabled?). "
+ "Restart the long running apps that use it!\n",
+ (int)real_len, name);
+ } else {
+ pr_warning("no symbols found in %s, maybe install "
+ "a debug package?\n", name);
+ }
+#endif
+ return -1;
+ }
+
+ return 0;
+}
+
+struct symbol *map__find_symbol(struct map *map, u64 addr)
+{
+ if (map__load(map) < 0)
+ return NULL;
+
+ return dso__find_symbol(map->dso, addr);
+}
+
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name)
+{
+ if (map__load(map) < 0)
+ return NULL;
+
+ if (!dso__sorted_by_name(map->dso))
+ dso__sort_by_name(map->dso);
+
+ return dso__find_symbol_by_name(map->dso, name);
+}
+
+struct map *map__clone(struct map *from)
+{
+ struct map *map = memdup(from, sizeof(*map));
+
+ if (map != NULL) {
+ refcount_set(&map->refcnt, 1);
+ RB_CLEAR_NODE(&map->rb_node);
+ dso__get(map->dso);
+ map->groups = NULL;
+ }
+
+ return map;
+}
+
+size_t map__fprintf(struct map *map, FILE *fp)
+{
+ return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s\n",
+ map->start, map->end, map->pgoff, map->dso->name);
+}
+
+size_t map__fprintf_dsoname(struct map *map, FILE *fp)
+{
+ const char *dsoname = "[unknown]";
+
+ if (map && map->dso) {
+ if (symbol_conf.show_kernel_path && map->dso->long_name)
+ dsoname = map->dso->long_name;
+ else
+ dsoname = map->dso->name;
+ }
+
+ return fprintf(fp, "%s", dsoname);
+}
+
+char *map__srcline(struct map *map, u64 addr, struct symbol *sym)
+{
+ if (map == NULL)
+ return SRCLINE_UNKNOWN;
+ return get_srcline(map->dso, map__rip_2objdump(map, addr), sym, true, true, addr);
+}
+
+int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
+ FILE *fp)
+{
+ int ret = 0;
+
+ if (map && map->dso) {
+ char *srcline = map__srcline(map, addr, NULL);
+ if (srcline != SRCLINE_UNKNOWN)
+ ret = fprintf(fp, "%s%s", prefix, srcline);
+ free_srcline(srcline);
+ }
+ return ret;
+}
+
+/**
+ * map__rip_2objdump - convert symbol start address to objdump address.
+ * @map: memory map
+ * @rip: symbol start address
+ *
+ * objdump wants/reports absolute IPs for ET_EXEC, and RIPs for ET_DYN.
+ * map->dso->adjust_symbols==1 for ET_EXEC-like cases except ET_REL which is
+ * relative to section start.
+ *
+ * Return: Address suitable for passing to "objdump --start-address="
+ */
+u64 map__rip_2objdump(struct map *map, u64 rip)
+{
+ struct kmap *kmap = __map__kmap(map);
+
+ /*
+ * vmlinux does not have program headers for PTI entry trampolines and
+ * kcore may not either. However the trampoline object code is on the
+ * main kernel map, so just use that instead.
+ */
+ if (kmap && is_entry_trampoline(kmap->name) && kmap->kmaps && kmap->kmaps->machine) {
+ struct map *kernel_map = machine__kernel_map(kmap->kmaps->machine);
+
+ if (kernel_map)
+ map = kernel_map;
+ }
+
+ if (!map->dso->adjust_symbols)
+ return rip;
+
+ if (map->dso->rel)
+ return rip - map->pgoff;
+
+ /*
+ * kernel modules also have DSO_TYPE_USER in dso->kernel,
+ * but all kernel modules are ET_REL, so won't get here.
+ */
+ if (map->dso->kernel == DSO_TYPE_USER)
+ return rip + map->dso->text_offset;
+
+ return map->unmap_ip(map, rip) - map->reloc;
+}
+
+/**
+ * map__objdump_2mem - convert objdump address to a memory address.
+ * @map: memory map
+ * @ip: objdump address
+ *
+ * Closely related to map__rip_2objdump(), this function takes an address from
+ * objdump and converts it to a memory address. Note this assumes that @map
+ * contains the address. To be sure the result is valid, check it forwards
+ * e.g. map__rip_2objdump(map->map_ip(map, map__objdump_2mem(map, ip))) == ip
+ *
+ * Return: Memory address.
+ */
+u64 map__objdump_2mem(struct map *map, u64 ip)
+{
+ if (!map->dso->adjust_symbols)
+ return map->unmap_ip(map, ip);
+
+ if (map->dso->rel)
+ return map->unmap_ip(map, ip + map->pgoff);
+
+ /*
+ * kernel modules also have DSO_TYPE_USER in dso->kernel,
+ * but all kernel modules are ET_REL, so won't get here.
+ */
+ if (map->dso->kernel == DSO_TYPE_USER)
+ return map->unmap_ip(map, ip - map->dso->text_offset);
+
+ return ip + map->reloc;
+}
+
+static void maps__init(struct maps *maps)
+{
+ maps->entries = RB_ROOT;
+ init_rwsem(&maps->lock);
+}
+
+void map_groups__init(struct map_groups *mg, struct machine *machine)
+{
+ maps__init(&mg->maps);
+ mg->machine = machine;
+ refcount_set(&mg->refcnt, 1);
+}
+
+static void __maps__purge(struct maps *maps)
+{
+ struct rb_root *root = &maps->entries;
+ struct rb_node *next = rb_first(root);
+
+ while (next) {
+ struct map *pos = rb_entry(next, struct map, rb_node);
+
+ next = rb_next(&pos->rb_node);
+ rb_erase_init(&pos->rb_node, root);
+ map__put(pos);
+ }
+}
+
+static void maps__exit(struct maps *maps)
+{
+ down_write(&maps->lock);
+ __maps__purge(maps);
+ up_write(&maps->lock);
+}
+
+void map_groups__exit(struct map_groups *mg)
+{
+ maps__exit(&mg->maps);
+}
+
+bool map_groups__empty(struct map_groups *mg)
+{
+ return !maps__first(&mg->maps);
+}
+
+struct map_groups *map_groups__new(struct machine *machine)
+{
+ struct map_groups *mg = malloc(sizeof(*mg));
+
+ if (mg != NULL)
+ map_groups__init(mg, machine);
+
+ return mg;
+}
+
+void map_groups__delete(struct map_groups *mg)
+{
+ map_groups__exit(mg);
+ free(mg);
+}
+
+void map_groups__put(struct map_groups *mg)
+{
+ if (mg && refcount_dec_and_test(&mg->refcnt))
+ map_groups__delete(mg);
+}
+
+struct symbol *map_groups__find_symbol(struct map_groups *mg,
+ u64 addr, struct map **mapp)
+{
+ struct map *map = map_groups__find(mg, addr);
+
+ /* Ensure map is loaded before using map->map_ip */
+ if (map != NULL && map__load(map) >= 0) {
+ if (mapp != NULL)
+ *mapp = map;
+ return map__find_symbol(map, map->map_ip(map, addr));
+ }
+
+ return NULL;
+}
+
+static bool map__contains_symbol(struct map *map, struct symbol *sym)
+{
+ u64 ip = map->unmap_ip(map, sym->start);
+
+ return ip >= map->start && ip < map->end;
+}
+
+struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
+ struct map **mapp)
+{
+ struct symbol *sym;
+ struct rb_node *nd;
+
+ down_read(&maps->lock);
+
+ for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
+ struct map *pos = rb_entry(nd, struct map, rb_node);
+
+ sym = map__find_symbol_by_name(pos, name);
+
+ if (sym == NULL)
+ continue;
+ if (!map__contains_symbol(pos, sym)) {
+ sym = NULL;
+ continue;
+ }
+ if (mapp != NULL)
+ *mapp = pos;
+ goto out;
+ }
+
+ sym = NULL;
+out:
+ up_read(&maps->lock);
+ return sym;
+}
+
+struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
+ const char *name,
+ struct map **mapp)
+{
+ return maps__find_symbol_by_name(&mg->maps, name, mapp);
+}
+
+int map_groups__find_ams(struct addr_map_symbol *ams)
+{
+ if (ams->addr < ams->map->start || ams->addr >= ams->map->end) {
+ if (ams->map->groups == NULL)
+ return -1;
+ ams->map = map_groups__find(ams->map->groups, ams->addr);
+ if (ams->map == NULL)
+ return -1;
+ }
+
+ ams->al_addr = ams->map->map_ip(ams->map, ams->addr);
+ ams->sym = map__find_symbol(ams->map, ams->al_addr);
+
+ return ams->sym ? 0 : -1;
+}
+
+static size_t maps__fprintf(struct maps *maps, FILE *fp)
+{
+ size_t printed = 0;
+ struct rb_node *nd;
+
+ down_read(&maps->lock);
+
+ for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
+ struct map *pos = rb_entry(nd, struct map, rb_node);
+ printed += fprintf(fp, "Map:");
+ printed += map__fprintf(pos, fp);
+ if (verbose > 2) {
+ printed += dso__fprintf(pos->dso, fp);
+ printed += fprintf(fp, "--\n");
+ }
+ }
+
+ up_read(&maps->lock);
+
+ return printed;
+}
+
+size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
+{
+ return maps__fprintf(&mg->maps, fp);
+}
+
+static void __map_groups__insert(struct map_groups *mg, struct map *map)
+{
+ __maps__insert(&mg->maps, map);
+ map->groups = mg;
+}
+
+static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
+{
+ struct rb_root *root;
+ struct rb_node *next, *first;
+ int err = 0;
+
+ down_write(&maps->lock);
+
+ root = &maps->entries;
+
+ /*
+ * Find first map where end > map->start.
+ * Same as find_vma() in kernel.
+ */
+ next = root->rb_node;
+ first = NULL;
+ while (next) {
+ struct map *pos = rb_entry(next, struct map, rb_node);
+
+ if (pos->end > map->start) {
+ first = next;
+ if (pos->start <= map->start)
+ break;
+ next = next->rb_left;
+ } else
+ next = next->rb_right;
+ }
+
+ next = first;
+ while (next) {
+ struct map *pos = rb_entry(next, struct map, rb_node);
+ next = rb_next(&pos->rb_node);
+
+ /*
+ * Stop if current map starts after map->end.
+ * Maps are ordered by start: next will not overlap for sure.
+ */
+ if (pos->start >= map->end)
+ break;
+
+ if (verbose >= 2) {
+
+ if (use_browser) {
+ pr_warning("overlapping maps in %s "
+ "(disable tui for more info)\n",
+ map->dso->name);
+ } else {
+ fputs("overlapping maps:\n", fp);
+ map__fprintf(map, fp);
+ map__fprintf(pos, fp);
+ }
+ }
+
+ rb_erase_init(&pos->rb_node, root);
+ /*
+ * Now check if we need to create new maps for areas not
+ * overlapped by the new map:
+ */
+ if (map->start > pos->start) {
+ struct map *before = map__clone(pos);
+
+ if (before == NULL) {
+ err = -ENOMEM;
+ goto put_map;
+ }
+
+ before->end = map->start;
+ __map_groups__insert(pos->groups, before);
+ if (verbose >= 2 && !use_browser)
+ map__fprintf(before, fp);
+ map__put(before);
+ }
+
+ if (map->end < pos->end) {
+ struct map *after = map__clone(pos);
+
+ if (after == NULL) {
+ err = -ENOMEM;
+ goto put_map;
+ }
+
+ after->start = map->end;
+ after->pgoff += map->end - pos->start;
+ assert(pos->map_ip(pos, map->end) == after->map_ip(after, map->end));
+ __map_groups__insert(pos->groups, after);
+ if (verbose >= 2 && !use_browser)
+ map__fprintf(after, fp);
+ map__put(after);
+ }
+put_map:
+ map__put(pos);
+
+ if (err)
+ goto out;
+ }
+
+ err = 0;
+out:
+ up_write(&maps->lock);
+ return err;
+}
+
+int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
+ FILE *fp)
+{
+ return maps__fixup_overlappings(&mg->maps, map, fp);
+}
+
+/*
+ * XXX This should not really _copy_ te maps, but refcount them.
+ */
+int map_groups__clone(struct thread *thread, struct map_groups *parent)
+{
+ struct map_groups *mg = thread->mg;
+ int err = -ENOMEM;
+ struct map *map;
+ struct maps *maps = &parent->maps;
+
+ down_read(&maps->lock);
+
+ for (map = maps__first(maps); map; map = map__next(map)) {
+ struct map *new = map__clone(map);
+ if (new == NULL)
+ goto out_unlock;
+
+ err = unwind__prepare_access(thread, new, NULL);
+ if (err)
+ goto out_unlock;
+
+ map_groups__insert(mg, new);
+ map__put(new);
+ }
+
+ err = 0;
+out_unlock:
+ up_read(&maps->lock);
+ return err;
+}
+
+static void __maps__insert(struct maps *maps, struct map *map)
+{
+ struct rb_node **p = &maps->entries.rb_node;
+ struct rb_node *parent = NULL;
+ const u64 ip = map->start;
+ struct map *m;
+
+ while (*p != NULL) {
+ parent = *p;
+ m = rb_entry(parent, struct map, rb_node);
+ if (ip < m->start)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&map->rb_node, parent, p);
+ rb_insert_color(&map->rb_node, &maps->entries);
+ map__get(map);
+}
+
+void maps__insert(struct maps *maps, struct map *map)
+{
+ down_write(&maps->lock);
+ __maps__insert(maps, map);
+ up_write(&maps->lock);
+}
+
+static void __maps__remove(struct maps *maps, struct map *map)
+{
+ rb_erase_init(&map->rb_node, &maps->entries);
+ map__put(map);
+}
+
+void maps__remove(struct maps *maps, struct map *map)
+{
+ down_write(&maps->lock);
+ __maps__remove(maps, map);
+ up_write(&maps->lock);
+}
+
+struct map *maps__find(struct maps *maps, u64 ip)
+{
+ struct rb_node **p, *parent = NULL;
+ struct map *m;
+
+ down_read(&maps->lock);
+
+ p = &maps->entries.rb_node;
+ while (*p != NULL) {
+ parent = *p;
+ m = rb_entry(parent, struct map, rb_node);
+ if (ip < m->start)
+ p = &(*p)->rb_left;
+ else if (ip >= m->end)
+ p = &(*p)->rb_right;
+ else
+ goto out;
+ }
+
+ m = NULL;
+out:
+ up_read(&maps->lock);
+ return m;
+}
+
+struct map *maps__first(struct maps *maps)
+{
+ struct rb_node *first = rb_first(&maps->entries);
+
+ if (first)
+ return rb_entry(first, struct map, rb_node);
+ return NULL;
+}
+
+struct map *map__next(struct map *map)
+{
+ struct rb_node *next = rb_next(&map->rb_node);
+
+ if (next)
+ return rb_entry(next, struct map, rb_node);
+ return NULL;
+}
+
+struct kmap *__map__kmap(struct map *map)
+{
+ if (!map->dso || !map->dso->kernel)
+ return NULL;
+ return (struct kmap *)(map + 1);
+}
+
+struct kmap *map__kmap(struct map *map)
+{
+ struct kmap *kmap = __map__kmap(map);
+
+ if (!kmap)
+ pr_err("Internal error: map__kmap with a non-kernel map\n");
+ return kmap;
+}
+
+struct map_groups *map__kmaps(struct map *map)
+{
+ struct kmap *kmap = map__kmap(map);
+
+ if (!kmap || !kmap->kmaps) {
+ pr_err("Internal error: map__kmaps with a non-kernel map\n");
+ return NULL;
+ }
+ return kmap->kmaps;
+}
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
new file mode 100644
index 000000000..e0f327b51
--- /dev/null
+++ b/tools/perf/util/map.h
@@ -0,0 +1,255 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MAP_H
+#define __PERF_MAP_H
+
+#include <linux/refcount.h>
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include "rwsem.h"
+
+struct dso;
+struct ip_callchain;
+struct ref_reloc_sym;
+struct map_groups;
+struct machine;
+struct perf_evsel;
+
+struct map {
+ union {
+ struct rb_node rb_node;
+ struct list_head node;
+ };
+ u64 start;
+ u64 end;
+ bool erange_warned;
+ u32 priv;
+ u32 prot;
+ u32 flags;
+ u64 pgoff;
+ u64 reloc;
+ u32 maj, min; /* only valid for MMAP2 record */
+ u64 ino; /* only valid for MMAP2 record */
+ u64 ino_generation;/* only valid for MMAP2 record */
+
+ /* ip -> dso rip */
+ u64 (*map_ip)(struct map *, u64);
+ /* dso rip -> ip */
+ u64 (*unmap_ip)(struct map *, u64);
+
+ struct dso *dso;
+ struct map_groups *groups;
+ refcount_t refcnt;
+};
+
+#define KMAP_NAME_LEN 256
+
+struct kmap {
+ struct ref_reloc_sym *ref_reloc_sym;
+ struct map_groups *kmaps;
+ char name[KMAP_NAME_LEN];
+};
+
+struct maps {
+ struct rb_root entries;
+ struct rw_semaphore lock;
+};
+
+struct map_groups {
+ struct maps maps;
+ struct machine *machine;
+ refcount_t refcnt;
+};
+
+struct map_groups *map_groups__new(struct machine *machine);
+void map_groups__delete(struct map_groups *mg);
+bool map_groups__empty(struct map_groups *mg);
+
+static inline struct map_groups *map_groups__get(struct map_groups *mg)
+{
+ if (mg)
+ refcount_inc(&mg->refcnt);
+ return mg;
+}
+
+void map_groups__put(struct map_groups *mg);
+
+struct kmap *__map__kmap(struct map *map);
+struct kmap *map__kmap(struct map *map);
+struct map_groups *map__kmaps(struct map *map);
+
+static inline u64 map__map_ip(struct map *map, u64 ip)
+{
+ return ip - map->start + map->pgoff;
+}
+
+static inline u64 map__unmap_ip(struct map *map, u64 ip)
+{
+ return ip + map->start - map->pgoff;
+}
+
+static inline u64 identity__map_ip(struct map *map __maybe_unused, u64 ip)
+{
+ return ip;
+}
+
+static inline size_t map__size(const struct map *map)
+{
+ return map->end - map->start;
+}
+
+/* rip/ip <-> addr suitable for passing to `objdump --start-address=` */
+u64 map__rip_2objdump(struct map *map, u64 rip);
+
+/* objdump address -> memory address */
+u64 map__objdump_2mem(struct map *map, u64 ip);
+
+struct symbol;
+struct thread;
+
+/* map__for_each_symbol - iterate over the symbols in the given map
+ *
+ * @map: the 'struct map *' in which symbols itereated
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ * @n: the 'struct rb_node *' to use as a temporary storage
+ * Note: caller must ensure map->dso is not NULL (map is loaded).
+ */
+#define map__for_each_symbol(map, pos, n) \
+ dso__for_each_symbol(map->dso, pos, n)
+
+/* map__for_each_symbol_with_name - iterate over the symbols in the given map
+ * that have the given name
+ *
+ * @map: the 'struct map *' in which symbols itereated
+ * @sym_name: the symbol name
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ */
+#define __map__for_each_symbol_by_name(map, sym_name, pos) \
+ for (pos = map__find_symbol_by_name(map, sym_name); \
+ pos && \
+ !symbol__match_symbol_name(pos->name, sym_name, \
+ SYMBOL_TAG_INCLUDE__DEFAULT_ONLY); \
+ pos = symbol__next_by_name(pos))
+
+#define map__for_each_symbol_by_name(map, sym_name, pos) \
+ __map__for_each_symbol_by_name(map, sym_name, (pos))
+
+void map__init(struct map *map,
+ u64 start, u64 end, u64 pgoff, struct dso *dso);
+struct map *map__new(struct machine *machine, u64 start, u64 len,
+ u64 pgoff, u32 d_maj, u32 d_min, u64 ino,
+ u64 ino_gen, u32 prot, u32 flags,
+ char *filename, struct thread *thread);
+struct map *map__new2(u64 start, struct dso *dso);
+void map__delete(struct map *map);
+struct map *map__clone(struct map *map);
+
+static inline struct map *map__get(struct map *map)
+{
+ if (map)
+ refcount_inc(&map->refcnt);
+ return map;
+}
+
+void map__put(struct map *map);
+
+static inline void __map__zput(struct map **map)
+{
+ map__put(*map);
+ *map = NULL;
+}
+
+#define map__zput(map) __map__zput(&map)
+
+size_t map__fprintf(struct map *map, FILE *fp);
+size_t map__fprintf_dsoname(struct map *map, FILE *fp);
+char *map__srcline(struct map *map, u64 addr, struct symbol *sym);
+int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix,
+ FILE *fp);
+
+int map__load(struct map *map);
+struct symbol *map__find_symbol(struct map *map, u64 addr);
+struct symbol *map__find_symbol_by_name(struct map *map, const char *name);
+void map__fixup_start(struct map *map);
+void map__fixup_end(struct map *map);
+
+void map__reloc_vmlinux(struct map *map);
+
+void maps__insert(struct maps *maps, struct map *map);
+void maps__remove(struct maps *maps, struct map *map);
+struct map *maps__find(struct maps *maps, u64 addr);
+struct map *maps__first(struct maps *maps);
+struct map *map__next(struct map *map);
+struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name,
+ struct map **mapp);
+void map_groups__init(struct map_groups *mg, struct machine *machine);
+void map_groups__exit(struct map_groups *mg);
+int map_groups__clone(struct thread *thread,
+ struct map_groups *parent);
+size_t map_groups__fprintf(struct map_groups *mg, FILE *fp);
+
+int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
+ u64 addr);
+
+static inline void map_groups__insert(struct map_groups *mg, struct map *map)
+{
+ maps__insert(&mg->maps, map);
+ map->groups = mg;
+}
+
+static inline void map_groups__remove(struct map_groups *mg, struct map *map)
+{
+ maps__remove(&mg->maps, map);
+}
+
+static inline struct map *map_groups__find(struct map_groups *mg, u64 addr)
+{
+ return maps__find(&mg->maps, addr);
+}
+
+struct map *map_groups__first(struct map_groups *mg);
+
+static inline struct map *map_groups__next(struct map *map)
+{
+ return map__next(map);
+}
+
+struct symbol *map_groups__find_symbol(struct map_groups *mg,
+ u64 addr, struct map **mapp);
+
+struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
+ const char *name,
+ struct map **mapp);
+
+struct addr_map_symbol;
+
+int map_groups__find_ams(struct addr_map_symbol *ams);
+
+int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
+ FILE *fp);
+
+struct map *map_groups__find_by_name(struct map_groups *mg, const char *name);
+
+bool __map__is_kernel(const struct map *map);
+bool __map__is_extra_kernel_map(const struct map *map);
+
+static inline bool __map__is_kmodule(const struct map *map)
+{
+ return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map);
+}
+
+bool map__has_symbols(const struct map *map);
+
+#define ENTRY_TRAMPOLINE_NAME "__entry_SYSCALL_64_trampoline"
+
+static inline bool is_entry_trampoline(const char *name)
+{
+ return !strcmp(name, ENTRY_TRAMPOLINE_NAME);
+}
+
+#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
new file mode 100644
index 000000000..93f74d8d3
--- /dev/null
+++ b/tools/perf/util/mem-events.c
@@ -0,0 +1,452 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <api/fs/fs.h>
+#include <linux/kernel.h>
+#include "mem-events.h"
+#include "debug.h"
+#include "symbol.h"
+#include "sort.h"
+
+unsigned int perf_mem_events__loads_ldlat = 30;
+
+#define E(t, n, s) { .tag = t, .name = n, .sysfs_name = s }
+
+struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
+ E("ldlat-loads", "cpu/mem-loads,ldlat=%u/P", "mem-loads"),
+ E("ldlat-stores", "cpu/mem-stores/P", "mem-stores"),
+};
+#undef E
+
+#undef E
+
+static char mem_loads_name[100];
+static bool mem_loads_name__init;
+
+char *perf_mem_events__name(int i)
+{
+ if (i == PERF_MEM_EVENTS__LOAD) {
+ if (!mem_loads_name__init) {
+ mem_loads_name__init = true;
+ scnprintf(mem_loads_name, sizeof(mem_loads_name),
+ perf_mem_events[i].name,
+ perf_mem_events__loads_ldlat);
+ }
+ return mem_loads_name;
+ }
+
+ return (char *)perf_mem_events[i].name;
+}
+
+int perf_mem_events__parse(const char *str)
+{
+ char *tok, *saveptr = NULL;
+ bool found = false;
+ char *buf;
+ int j;
+
+ /* We need buffer that we know we can write to. */
+ buf = malloc(strlen(str) + 1);
+ if (!buf)
+ return -ENOMEM;
+
+ strcpy(buf, str);
+
+ tok = strtok_r((char *)buf, ",", &saveptr);
+
+ while (tok) {
+ for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ struct perf_mem_event *e = &perf_mem_events[j];
+
+ if (strstr(e->tag, tok))
+ e->record = found = true;
+ }
+
+ tok = strtok_r(NULL, ",", &saveptr);
+ }
+
+ free(buf);
+
+ if (found)
+ return 0;
+
+ pr_err("failed: event '%s' not found, use '-e list' to get list of available events\n", str);
+ return -1;
+}
+
+int perf_mem_events__init(void)
+{
+ const char *mnt = sysfs__mount();
+ bool found = false;
+ int j;
+
+ if (!mnt)
+ return -ENOENT;
+
+ for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ char path[PATH_MAX];
+ struct perf_mem_event *e = &perf_mem_events[j];
+ struct stat st;
+
+ scnprintf(path, PATH_MAX, "%s/devices/cpu/events/%s",
+ mnt, e->sysfs_name);
+
+ if (!stat(path, &st))
+ e->supported = found = true;
+ }
+
+ return found ? 0 : -ENOENT;
+}
+
+static const char * const tlb_access[] = {
+ "N/A",
+ "HIT",
+ "MISS",
+ "L1",
+ "L2",
+ "Walker",
+ "Fault",
+};
+
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ size_t l = 0, i;
+ u64 m = PERF_MEM_TLB_NA;
+ u64 hit, miss;
+
+ sz -= 1; /* -1 for null termination */
+ out[0] = '\0';
+
+ if (mem_info)
+ m = mem_info->data_src.mem_dtlb;
+
+ hit = m & PERF_MEM_TLB_HIT;
+ miss = m & PERF_MEM_TLB_MISS;
+
+ /* already taken care of */
+ m &= ~(PERF_MEM_TLB_HIT|PERF_MEM_TLB_MISS);
+
+ for (i = 0; m && i < ARRAY_SIZE(tlb_access); i++, m >>= 1) {
+ if (!(m & 0x1))
+ continue;
+ if (l) {
+ strcat(out, " or ");
+ l += 4;
+ }
+ l += scnprintf(out + l, sz - l, tlb_access[i]);
+ }
+ if (*out == '\0')
+ l += scnprintf(out, sz - l, "N/A");
+ if (hit)
+ l += scnprintf(out + l, sz - l, " hit");
+ if (miss)
+ l += scnprintf(out + l, sz - l, " miss");
+
+ return l;
+}
+
+static const char * const mem_lvl[] = {
+ "N/A",
+ "HIT",
+ "MISS",
+ "L1",
+ "LFB",
+ "L2",
+ "L3",
+ "Local RAM",
+ "Remote RAM (1 hop)",
+ "Remote RAM (2 hops)",
+ "Remote Cache (1 hop)",
+ "Remote Cache (2 hops)",
+ "I/O",
+ "Uncached",
+};
+
+static const char * const mem_lvlnum[] = {
+ [PERF_MEM_LVLNUM_ANY_CACHE] = "Any cache",
+ [PERF_MEM_LVLNUM_LFB] = "LFB",
+ [PERF_MEM_LVLNUM_RAM] = "RAM",
+ [PERF_MEM_LVLNUM_PMEM] = "PMEM",
+ [PERF_MEM_LVLNUM_NA] = "N/A",
+};
+
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ size_t i, l = 0;
+ u64 m = PERF_MEM_LVL_NA;
+ u64 hit, miss;
+ int printed;
+
+ if (mem_info)
+ m = mem_info->data_src.mem_lvl;
+
+ sz -= 1; /* -1 for null termination */
+ out[0] = '\0';
+
+ hit = m & PERF_MEM_LVL_HIT;
+ miss = m & PERF_MEM_LVL_MISS;
+
+ /* already taken care of */
+ m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS);
+
+
+ if (mem_info && mem_info->data_src.mem_remote) {
+ strcat(out, "Remote ");
+ l += 7;
+ }
+
+ printed = 0;
+ for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) {
+ if (!(m & 0x1))
+ continue;
+ if (printed++) {
+ strcat(out, " or ");
+ l += 4;
+ }
+ l += scnprintf(out + l, sz - l, mem_lvl[i]);
+ }
+
+ if (mem_info && mem_info->data_src.mem_lvl_num) {
+ int lvl = mem_info->data_src.mem_lvl_num;
+ if (printed++) {
+ strcat(out, " or ");
+ l += 4;
+ }
+ if (mem_lvlnum[lvl])
+ l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]);
+ else
+ l += scnprintf(out + l, sz - l, "L%d", lvl);
+ }
+
+ if (l == 0)
+ l += scnprintf(out + l, sz - l, "N/A");
+ if (hit)
+ l += scnprintf(out + l, sz - l, " hit");
+ if (miss)
+ l += scnprintf(out + l, sz - l, " miss");
+
+ return l;
+}
+
+static const char * const snoop_access[] = {
+ "N/A",
+ "None",
+ "Hit",
+ "Miss",
+ "HitM",
+};
+
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ size_t i, l = 0;
+ u64 m = PERF_MEM_SNOOP_NA;
+
+ sz -= 1; /* -1 for null termination */
+ out[0] = '\0';
+
+ if (mem_info)
+ m = mem_info->data_src.mem_snoop;
+
+ for (i = 0; m && i < ARRAY_SIZE(snoop_access); i++, m >>= 1) {
+ if (!(m & 0x1))
+ continue;
+ if (l) {
+ strcat(out, " or ");
+ l += 4;
+ }
+ l += scnprintf(out + l, sz - l, snoop_access[i]);
+ }
+ if (mem_info &&
+ (mem_info->data_src.mem_snoopx & PERF_MEM_SNOOPX_FWD)) {
+ if (l) {
+ strcat(out, " or ");
+ l += 4;
+ }
+ l += scnprintf(out + l, sz - l, "Fwd");
+ }
+
+ if (*out == '\0')
+ l += scnprintf(out, sz - l, "N/A");
+
+ return l;
+}
+
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ u64 mask = PERF_MEM_LOCK_NA;
+ int l;
+
+ if (mem_info)
+ mask = mem_info->data_src.mem_lock;
+
+ if (mask & PERF_MEM_LOCK_NA)
+ l = scnprintf(out, sz, "N/A");
+ else if (mask & PERF_MEM_LOCK_LOCKED)
+ l = scnprintf(out, sz, "Yes");
+ else
+ l = scnprintf(out, sz, "No");
+
+ return l;
+}
+
+int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_info)
+{
+ int i = 0;
+
+ i += perf_mem__lvl_scnprintf(out, sz, mem_info);
+ i += scnprintf(out + i, sz - i, "|SNP ");
+ i += perf_mem__snp_scnprintf(out + i, sz - i, mem_info);
+ i += scnprintf(out + i, sz - i, "|TLB ");
+ i += perf_mem__tlb_scnprintf(out + i, sz - i, mem_info);
+ i += scnprintf(out + i, sz - i, "|LCK ");
+ i += perf_mem__lck_scnprintf(out + i, sz - i, mem_info);
+
+ return i;
+}
+
+int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
+{
+ union perf_mem_data_src *data_src = &mi->data_src;
+ u64 daddr = mi->daddr.addr;
+ u64 op = data_src->mem_op;
+ u64 lvl = data_src->mem_lvl;
+ u64 snoop = data_src->mem_snoop;
+ u64 lock = data_src->mem_lock;
+ /*
+ * Skylake might report unknown remote level via this
+ * bit, consider it when evaluating remote HITMs.
+ */
+ bool mrem = data_src->mem_remote;
+ int err = 0;
+
+#define HITM_INC(__f) \
+do { \
+ stats->__f++; \
+ stats->tot_hitm++; \
+} while (0)
+
+#define P(a, b) PERF_MEM_##a##_##b
+
+ stats->nr_entries++;
+
+ if (lock & P(LOCK, LOCKED)) stats->locks++;
+
+ if (op & P(OP, LOAD)) {
+ /* load */
+ stats->load++;
+
+ if (!daddr) {
+ stats->ld_noadrs++;
+ return -1;
+ }
+
+ if (lvl & P(LVL, HIT)) {
+ if (lvl & P(LVL, UNC)) stats->ld_uncache++;
+ if (lvl & P(LVL, IO)) stats->ld_io++;
+ if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
+ if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
+ if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
+ if (lvl & P(LVL, L3 )) {
+ if (snoop & P(SNOOP, HITM))
+ HITM_INC(lcl_hitm);
+ else
+ stats->ld_llchit++;
+ }
+
+ if (lvl & P(LVL, LOC_RAM)) {
+ stats->lcl_dram++;
+ if (snoop & P(SNOOP, HIT))
+ stats->ld_shared++;
+ else
+ stats->ld_excl++;
+ }
+
+ if ((lvl & P(LVL, REM_RAM1)) ||
+ (lvl & P(LVL, REM_RAM2)) ||
+ mrem) {
+ stats->rmt_dram++;
+ if (snoop & P(SNOOP, HIT))
+ stats->ld_shared++;
+ else
+ stats->ld_excl++;
+ }
+ }
+
+ if ((lvl & P(LVL, REM_CCE1)) ||
+ (lvl & P(LVL, REM_CCE2)) ||
+ mrem) {
+ if (snoop & P(SNOOP, HIT))
+ stats->rmt_hit++;
+ else if (snoop & P(SNOOP, HITM))
+ HITM_INC(rmt_hitm);
+ }
+
+ if ((lvl & P(LVL, MISS)))
+ stats->ld_miss++;
+
+ } else if (op & P(OP, STORE)) {
+ /* store */
+ stats->store++;
+
+ if (!daddr) {
+ stats->st_noadrs++;
+ return -1;
+ }
+
+ if (lvl & P(LVL, HIT)) {
+ if (lvl & P(LVL, UNC)) stats->st_uncache++;
+ if (lvl & P(LVL, L1 )) stats->st_l1hit++;
+ }
+ if (lvl & P(LVL, MISS))
+ if (lvl & P(LVL, L1)) stats->st_l1miss++;
+ } else {
+ /* unparsable data_src? */
+ stats->noparse++;
+ return -1;
+ }
+
+ if (!mi->daddr.map || !mi->iaddr.map) {
+ stats->nomap++;
+ return -1;
+ }
+
+#undef P
+#undef HITM_INC
+ return err;
+}
+
+void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add)
+{
+ stats->nr_entries += add->nr_entries;
+
+ stats->locks += add->locks;
+ stats->store += add->store;
+ stats->st_uncache += add->st_uncache;
+ stats->st_noadrs += add->st_noadrs;
+ stats->st_l1hit += add->st_l1hit;
+ stats->st_l1miss += add->st_l1miss;
+ stats->load += add->load;
+ stats->ld_excl += add->ld_excl;
+ stats->ld_shared += add->ld_shared;
+ stats->ld_uncache += add->ld_uncache;
+ stats->ld_io += add->ld_io;
+ stats->ld_miss += add->ld_miss;
+ stats->ld_noadrs += add->ld_noadrs;
+ stats->ld_fbhit += add->ld_fbhit;
+ stats->ld_l1hit += add->ld_l1hit;
+ stats->ld_l2hit += add->ld_l2hit;
+ stats->ld_llchit += add->ld_llchit;
+ stats->lcl_hitm += add->lcl_hitm;
+ stats->rmt_hitm += add->rmt_hitm;
+ stats->tot_hitm += add->tot_hitm;
+ stats->rmt_hit += add->rmt_hit;
+ stats->lcl_dram += add->lcl_dram;
+ stats->rmt_dram += add->rmt_dram;
+ stats->nomap += add->nomap;
+ stats->noparse += add->noparse;
+}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
new file mode 100644
index 000000000..a889ec2fa
--- /dev/null
+++ b/tools/perf/util/mem-events.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_MEM_EVENTS_H
+#define __PERF_MEM_EVENTS_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include "stat.h"
+
+struct perf_mem_event {
+ bool record;
+ bool supported;
+ const char *tag;
+ const char *name;
+ const char *sysfs_name;
+};
+
+enum {
+ PERF_MEM_EVENTS__LOAD,
+ PERF_MEM_EVENTS__STORE,
+ PERF_MEM_EVENTS__MAX,
+};
+
+extern struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX];
+extern unsigned int perf_mem_events__loads_ldlat;
+
+int perf_mem_events__parse(const char *str);
+int perf_mem_events__init(void);
+
+char *perf_mem_events__name(int i);
+
+struct mem_info;
+int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__snp_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
+
+int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info);
+
+struct c2c_stats {
+ u32 nr_entries;
+
+ u32 locks; /* count of 'lock' transactions */
+ u32 store; /* count of all stores in trace */
+ u32 st_uncache; /* stores to uncacheable address */
+ u32 st_noadrs; /* cacheable store with no address */
+ u32 st_l1hit; /* count of stores that hit L1D */
+ u32 st_l1miss; /* count of stores that miss L1D */
+ u32 load; /* count of all loads in trace */
+ u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */
+ u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */
+ u32 ld_uncache; /* loads to uncacheable address */
+ u32 ld_io; /* loads to io address */
+ u32 ld_miss; /* loads miss */
+ u32 ld_noadrs; /* cacheable load with no address */
+ u32 ld_fbhit; /* count of loads hitting Fill Buffer */
+ u32 ld_l1hit; /* count of loads that hit L1D */
+ u32 ld_l2hit; /* count of loads that hit L2D */
+ u32 ld_llchit; /* count of loads that hit LLC */
+ u32 lcl_hitm; /* count of loads with local HITM */
+ u32 rmt_hitm; /* count of loads with remote HITM */
+ u32 tot_hitm; /* count of loads with local and remote HITM */
+ u32 rmt_hit; /* count of loads with remote hit clean; */
+ u32 lcl_dram; /* count of loads miss to local DRAM */
+ u32 rmt_dram; /* count of loads miss to remote DRAM */
+ u32 nomap; /* count of load/stores with no phys adrs */
+ u32 noparse; /* count of unparsable data sources */
+};
+
+struct hist_entry;
+int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi);
+void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add);
+
+#endif /* __PERF_MEM_EVENTS_H */
diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c
new file mode 100644
index 000000000..81c5a2e43
--- /dev/null
+++ b/tools/perf/util/mem2node.c
@@ -0,0 +1,135 @@
+#include <errno.h>
+#include <inttypes.h>
+#include <asm/bug.h>
+#include <linux/bitmap.h>
+#include "mem2node.h"
+#include "util.h"
+
+struct phys_entry {
+ struct rb_node rb_node;
+ u64 start;
+ u64 end;
+ u64 node;
+};
+
+static void phys_entry__insert(struct phys_entry *entry, struct rb_root *root)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct phys_entry *e;
+
+ while (*p != NULL) {
+ parent = *p;
+ e = rb_entry(parent, struct phys_entry, rb_node);
+
+ if (entry->start < e->start)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ rb_link_node(&entry->rb_node, parent, p);
+ rb_insert_color(&entry->rb_node, root);
+}
+
+static void
+phys_entry__init(struct phys_entry *entry, u64 start, u64 bsize, u64 node)
+{
+ entry->start = start;
+ entry->end = start + bsize;
+ entry->node = node;
+ RB_CLEAR_NODE(&entry->rb_node);
+}
+
+int mem2node__init(struct mem2node *map, struct perf_env *env)
+{
+ struct memory_node *n, *nodes = &env->memory_nodes[0];
+ struct phys_entry *entries, *tmp_entries;
+ u64 bsize = env->memory_bsize;
+ int i, j = 0, max = 0;
+
+ memset(map, 0x0, sizeof(*map));
+ map->root = RB_ROOT;
+
+ for (i = 0; i < env->nr_memory_nodes; i++) {
+ n = &nodes[i];
+ max += bitmap_weight(n->set, n->size);
+ }
+
+ entries = zalloc(sizeof(*entries) * max);
+ if (!entries)
+ return -ENOMEM;
+
+ for (i = 0; i < env->nr_memory_nodes; i++) {
+ u64 bit;
+
+ n = &nodes[i];
+
+ for (bit = 0; bit < n->size; bit++) {
+ u64 start;
+
+ if (!test_bit(bit, n->set))
+ continue;
+
+ start = bit * bsize;
+
+ /*
+ * Merge nearby areas, we walk in order
+ * through the bitmap, so no need to sort.
+ */
+ if (j > 0) {
+ struct phys_entry *prev = &entries[j - 1];
+
+ if ((prev->end == start) &&
+ (prev->node == n->node)) {
+ prev->end += bsize;
+ continue;
+ }
+ }
+
+ phys_entry__init(&entries[j++], start, bsize, n->node);
+ }
+ }
+
+ /* Cut unused entries, due to merging. */
+ tmp_entries = realloc(entries, sizeof(*entries) * j);
+ if (tmp_entries || WARN_ON_ONCE(j == 0))
+ entries = tmp_entries;
+
+ for (i = 0; i < j; i++) {
+ pr_debug("mem2node %03" PRIu64 " [0x%016" PRIx64 "-0x%016" PRIx64 "]\n",
+ entries[i].node, entries[i].start, entries[i].end);
+
+ phys_entry__insert(&entries[i], &map->root);
+ }
+
+ map->entries = entries;
+ return 0;
+}
+
+void mem2node__exit(struct mem2node *map)
+{
+ zfree(&map->entries);
+}
+
+int mem2node__node(struct mem2node *map, u64 addr)
+{
+ struct rb_node **p, *parent = NULL;
+ struct phys_entry *entry;
+
+ p = &map->root.rb_node;
+ while (*p != NULL) {
+ parent = *p;
+ entry = rb_entry(parent, struct phys_entry, rb_node);
+ if (addr < entry->start)
+ p = &(*p)->rb_left;
+ else if (addr >= entry->end)
+ p = &(*p)->rb_right;
+ else
+ goto out;
+ }
+
+ entry = NULL;
+out:
+ return entry ? (int) entry->node : -1;
+}
diff --git a/tools/perf/util/mem2node.h b/tools/perf/util/mem2node.h
new file mode 100644
index 000000000..59c4752a2
--- /dev/null
+++ b/tools/perf/util/mem2node.h
@@ -0,0 +1,19 @@
+#ifndef __MEM2NODE_H
+#define __MEM2NODE_H
+
+#include <linux/rbtree.h>
+#include "env.h"
+
+struct phys_entry;
+
+struct mem2node {
+ struct rb_root root;
+ struct phys_entry *entries;
+ int cnt;
+};
+
+int mem2node__init(struct mem2node *map, struct perf_env *env);
+void mem2node__exit(struct mem2node *map);
+int mem2node__node(struct mem2node *map, u64 addr);
+
+#endif /* __MEM2NODE_H */
diff --git a/tools/perf/util/memswap.c b/tools/perf/util/memswap.c
new file mode 100644
index 000000000..c1317e498
--- /dev/null
+++ b/tools/perf/util/memswap.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <byteswap.h>
+#include "memswap.h"
+#include <linux/types.h>
+
+void mem_bswap_32(void *src, int byte_size)
+{
+ u32 *m = src;
+ while (byte_size > 0) {
+ *m = bswap_32(*m);
+ byte_size -= sizeof(u32);
+ ++m;
+ }
+}
+
+void mem_bswap_64(void *src, int byte_size)
+{
+ u64 *m = src;
+
+ while (byte_size > 0) {
+ *m = bswap_64(*m);
+ byte_size -= sizeof(u64);
+ ++m;
+ }
+}
diff --git a/tools/perf/util/memswap.h b/tools/perf/util/memswap.h
new file mode 100644
index 000000000..1e29ff903
--- /dev/null
+++ b/tools/perf/util/memswap.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_MEMSWAP_H_
+#define PERF_MEMSWAP_H_
+
+void mem_bswap_64(void *src, int byte_size);
+void mem_bswap_32(void *src, int byte_size);
+
+#endif /* PERF_MEMSWAP_H_ */
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
new file mode 100644
index 000000000..6dcc6e118
--- /dev/null
+++ b/tools/perf/util/metricgroup.c
@@ -0,0 +1,540 @@
+/*
+ * Copyright (c) 2017, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+/* Manage metrics and groups of metrics from JSON files */
+
+#include "metricgroup.h"
+#include "evlist.h"
+#include "strbuf.h"
+#include "pmu.h"
+#include "expr.h"
+#include "rblist.h"
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+#include "pmu-events/pmu-events.h"
+#include "strlist.h"
+#include <assert.h>
+#include <ctype.h>
+
+struct metric_event *metricgroup__lookup(struct rblist *metric_events,
+ struct perf_evsel *evsel,
+ bool create)
+{
+ struct rb_node *nd;
+ struct metric_event me = {
+ .evsel = evsel
+ };
+
+ if (!metric_events)
+ return NULL;
+
+ nd = rblist__find(metric_events, &me);
+ if (nd)
+ return container_of(nd, struct metric_event, nd);
+ if (create) {
+ rblist__add_node(metric_events, &me);
+ nd = rblist__find(metric_events, &me);
+ if (nd)
+ return container_of(nd, struct metric_event, nd);
+ }
+ return NULL;
+}
+
+static int metric_event_cmp(struct rb_node *rb_node, const void *entry)
+{
+ struct metric_event *a = container_of(rb_node,
+ struct metric_event,
+ nd);
+ const struct metric_event *b = entry;
+
+ if (a->evsel == b->evsel)
+ return 0;
+ if ((char *)a->evsel < (char *)b->evsel)
+ return -1;
+ return +1;
+}
+
+static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused,
+ const void *entry)
+{
+ struct metric_event *me = malloc(sizeof(struct metric_event));
+
+ if (!me)
+ return NULL;
+ memcpy(me, entry, sizeof(struct metric_event));
+ me->evsel = ((struct metric_event *)entry)->evsel;
+ INIT_LIST_HEAD(&me->head);
+ return &me->nd;
+}
+
+static void metricgroup__rblist_init(struct rblist *metric_events)
+{
+ rblist__init(metric_events);
+ metric_events->node_cmp = metric_event_cmp;
+ metric_events->node_new = metric_event_new;
+}
+
+struct egroup {
+ struct list_head nd;
+ int idnum;
+ const char **ids;
+ const char *metric_name;
+ const char *metric_expr;
+};
+
+static bool record_evsel(int *ind, struct perf_evsel **start,
+ int idnum,
+ struct perf_evsel **metric_events,
+ struct perf_evsel *ev)
+{
+ metric_events[*ind] = ev;
+ if (*ind == 0)
+ *start = ev;
+ if (++*ind == idnum) {
+ metric_events[*ind] = NULL;
+ return true;
+ }
+ return false;
+}
+
+static struct perf_evsel *find_evsel_group(struct perf_evlist *perf_evlist,
+ const char **ids,
+ int idnum,
+ struct perf_evsel **metric_events)
+{
+ struct perf_evsel *ev, *start = NULL;
+ int ind = 0;
+
+ evlist__for_each_entry (perf_evlist, ev) {
+ if (ev->collect_stat)
+ continue;
+ if (!strcmp(ev->name, ids[ind])) {
+ if (record_evsel(&ind, &start, idnum,
+ metric_events, ev))
+ return start;
+ } else {
+ /*
+ * We saw some other event that is not
+ * in our list of events. Discard
+ * the whole match and start again.
+ */
+ ind = 0;
+ start = NULL;
+ if (!strcmp(ev->name, ids[ind])) {
+ if (record_evsel(&ind, &start, idnum,
+ metric_events, ev))
+ return start;
+ }
+ }
+ }
+ /*
+ * This can happen when an alias expands to multiple
+ * events, like for uncore events.
+ * We don't support this case for now.
+ */
+ return NULL;
+}
+
+static int metricgroup__setup_events(struct list_head *groups,
+ struct perf_evlist *perf_evlist,
+ struct rblist *metric_events_list)
+{
+ struct metric_event *me;
+ struct metric_expr *expr;
+ int i = 0;
+ int ret = 0;
+ struct egroup *eg;
+ struct perf_evsel *evsel;
+
+ list_for_each_entry (eg, groups, nd) {
+ struct perf_evsel **metric_events;
+
+ metric_events = calloc(sizeof(void *), eg->idnum + 1);
+ if (!metric_events) {
+ ret = -ENOMEM;
+ break;
+ }
+ evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum,
+ metric_events);
+ if (!evsel) {
+ pr_debug("Cannot resolve %s: %s\n",
+ eg->metric_name, eg->metric_expr);
+ free(metric_events);
+ continue;
+ }
+ for (i = 0; i < eg->idnum; i++)
+ metric_events[i]->collect_stat = true;
+ me = metricgroup__lookup(metric_events_list, evsel, true);
+ if (!me) {
+ ret = -ENOMEM;
+ free(metric_events);
+ break;
+ }
+ expr = malloc(sizeof(struct metric_expr));
+ if (!expr) {
+ ret = -ENOMEM;
+ free(metric_events);
+ break;
+ }
+ expr->metric_expr = eg->metric_expr;
+ expr->metric_name = eg->metric_name;
+ expr->metric_events = metric_events;
+ list_add(&expr->nd, &me->head);
+ }
+ return ret;
+}
+
+static bool match_metric(const char *n, const char *list)
+{
+ int len;
+ char *m;
+
+ if (!list)
+ return false;
+ if (!strcmp(list, "all"))
+ return true;
+ if (!n)
+ return !strcasecmp(list, "No_group");
+ len = strlen(list);
+ m = strcasestr(n, list);
+ if (!m)
+ return false;
+ if ((m == n || m[-1] == ';' || m[-1] == ' ') &&
+ (m[len] == 0 || m[len] == ';'))
+ return true;
+ return false;
+}
+
+struct mep {
+ struct rb_node nd;
+ const char *name;
+ struct strlist *metrics;
+};
+
+static int mep_cmp(struct rb_node *rb_node, const void *entry)
+{
+ struct mep *a = container_of(rb_node, struct mep, nd);
+ struct mep *b = (struct mep *)entry;
+
+ return strcmp(a->name, b->name);
+}
+
+static struct rb_node *mep_new(struct rblist *rl __maybe_unused,
+ const void *entry)
+{
+ struct mep *me = malloc(sizeof(struct mep));
+
+ if (!me)
+ return NULL;
+ memcpy(me, entry, sizeof(struct mep));
+ me->name = strdup(me->name);
+ if (!me->name)
+ goto out_me;
+ me->metrics = strlist__new(NULL, NULL);
+ if (!me->metrics)
+ goto out_name;
+ return &me->nd;
+out_name:
+ free((char *)me->name);
+out_me:
+ free(me);
+ return NULL;
+}
+
+static struct mep *mep_lookup(struct rblist *groups, const char *name)
+{
+ struct rb_node *nd;
+ struct mep me = {
+ .name = name
+ };
+ nd = rblist__find(groups, &me);
+ if (nd)
+ return container_of(nd, struct mep, nd);
+ rblist__add_node(groups, &me);
+ nd = rblist__find(groups, &me);
+ if (nd)
+ return container_of(nd, struct mep, nd);
+ return NULL;
+}
+
+static void mep_delete(struct rblist *rl __maybe_unused,
+ struct rb_node *nd)
+{
+ struct mep *me = container_of(nd, struct mep, nd);
+
+ strlist__delete(me->metrics);
+ free((void *)me->name);
+ free(me);
+}
+
+static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
+{
+ struct str_node *sn;
+ int n = 0;
+
+ strlist__for_each_entry (sn, metrics) {
+ if (raw)
+ printf("%s%s", n > 0 ? " " : "", sn->s);
+ else
+ printf(" %s\n", sn->s);
+ n++;
+ }
+ if (raw)
+ putchar('\n');
+}
+
+void metricgroup__print(bool metrics, bool metricgroups, char *filter,
+ bool raw)
+{
+ struct pmu_events_map *map = perf_pmu__find_map(NULL);
+ struct pmu_event *pe;
+ int i;
+ struct rblist groups;
+ struct rb_node *node, *next;
+ struct strlist *metriclist = NULL;
+
+ if (!map)
+ return;
+
+ if (!metricgroups) {
+ metriclist = strlist__new(NULL, NULL);
+ if (!metriclist)
+ return;
+ }
+
+ rblist__init(&groups);
+ groups.node_new = mep_new;
+ groups.node_cmp = mep_cmp;
+ groups.node_delete = mep_delete;
+ for (i = 0; ; i++) {
+ const char *g;
+ pe = &map->table[i];
+
+ if (!pe->name && !pe->metric_group && !pe->metric_name)
+ break;
+ if (!pe->metric_expr)
+ continue;
+ g = pe->metric_group;
+ if (!g && pe->metric_name) {
+ if (pe->name)
+ continue;
+ g = "No_group";
+ }
+ if (g) {
+ char *omg;
+ char *mg = strdup(g);
+
+ if (!mg)
+ return;
+ omg = mg;
+ while ((g = strsep(&mg, ";")) != NULL) {
+ struct mep *me;
+ char *s;
+
+ if (*g == 0)
+ g = "No_group";
+ while (isspace(*g))
+ g++;
+ if (filter && !strstr(g, filter))
+ continue;
+ if (raw)
+ s = (char *)pe->metric_name;
+ else {
+ if (asprintf(&s, "%s\n%*s%s]",
+ pe->metric_name, 8, "[", pe->desc) < 0)
+ return;
+ }
+
+ if (!s)
+ continue;
+
+ if (!metricgroups) {
+ strlist__add(metriclist, s);
+ } else {
+ me = mep_lookup(&groups, g);
+ if (!me)
+ continue;
+ strlist__add(me->metrics, s);
+ }
+ }
+ free(omg);
+ }
+ }
+
+ if (metricgroups && !raw)
+ printf("\nMetric Groups:\n\n");
+ else if (metrics && !raw)
+ printf("\nMetrics:\n\n");
+
+ for (node = rb_first(&groups.entries); node; node = next) {
+ struct mep *me = container_of(node, struct mep, nd);
+
+ if (metricgroups)
+ printf("%s%s%s", me->name, metrics ? ":" : "", raw ? " " : "\n");
+ if (metrics)
+ metricgroup__print_strlist(me->metrics, raw);
+ next = rb_next(node);
+ rblist__remove_node(&groups, node);
+ }
+ if (!metricgroups)
+ metricgroup__print_strlist(metriclist, raw);
+ strlist__delete(metriclist);
+}
+
+static int metricgroup__add_metric(const char *metric, struct strbuf *events,
+ struct list_head *group_list)
+{
+ struct pmu_events_map *map = perf_pmu__find_map(NULL);
+ struct pmu_event *pe;
+ int ret = -EINVAL;
+ int i, j;
+
+ if (!map)
+ return 0;
+
+ for (i = 0; ; i++) {
+ pe = &map->table[i];
+
+ if (!pe->name && !pe->metric_group && !pe->metric_name)
+ break;
+ if (!pe->metric_expr)
+ continue;
+ if (match_metric(pe->metric_group, metric) ||
+ match_metric(pe->metric_name, metric)) {
+ const char **ids;
+ int idnum;
+ struct egroup *eg;
+
+ pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
+
+ if (expr__find_other(pe->metric_expr,
+ NULL, &ids, &idnum) < 0)
+ continue;
+ if (events->len > 0)
+ strbuf_addf(events, ",");
+ for (j = 0; j < idnum; j++) {
+ pr_debug("found event %s\n", ids[j]);
+ strbuf_addf(events, "%s%s",
+ j == 0 ? "{" : ",",
+ ids[j]);
+ }
+ strbuf_addf(events, "}:W");
+
+ eg = malloc(sizeof(struct egroup));
+ if (!eg) {
+ ret = -ENOMEM;
+ break;
+ }
+ eg->ids = ids;
+ eg->idnum = idnum;
+ eg->metric_name = pe->metric_name;
+ eg->metric_expr = pe->metric_expr;
+ list_add_tail(&eg->nd, group_list);
+ ret = 0;
+ }
+ }
+ return ret;
+}
+
+static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
+ struct list_head *group_list)
+{
+ char *llist, *nlist, *p;
+ int ret = -EINVAL;
+
+ nlist = strdup(list);
+ if (!nlist)
+ return -ENOMEM;
+ llist = nlist;
+
+ strbuf_init(events, 100);
+ strbuf_addf(events, "%s", "");
+
+ while ((p = strsep(&llist, ",")) != NULL) {
+ ret = metricgroup__add_metric(p, events, group_list);
+ if (ret == -EINVAL) {
+ fprintf(stderr, "Cannot find metric or group `%s'\n",
+ p);
+ break;
+ }
+ }
+ free(nlist);
+ return ret;
+}
+
+static void metricgroup__free_egroups(struct list_head *group_list)
+{
+ struct egroup *eg, *egtmp;
+ int i;
+
+ list_for_each_entry_safe (eg, egtmp, group_list, nd) {
+ for (i = 0; i < eg->idnum; i++)
+ free((char *)eg->ids[i]);
+ free(eg->ids);
+ free(eg);
+ }
+}
+
+int metricgroup__parse_groups(const struct option *opt,
+ const char *str,
+ struct rblist *metric_events)
+{
+ struct parse_events_error parse_error;
+ struct perf_evlist *perf_evlist = *(struct perf_evlist **)opt->value;
+ struct strbuf extra_events;
+ LIST_HEAD(group_list);
+ int ret;
+
+ if (metric_events->nr_entries == 0)
+ metricgroup__rblist_init(metric_events);
+ ret = metricgroup__add_metric_list(str, &extra_events, &group_list);
+ if (ret)
+ return ret;
+ pr_debug("adding %s\n", extra_events.buf);
+ memset(&parse_error, 0, sizeof(struct parse_events_error));
+ ret = parse_events(perf_evlist, extra_events.buf, &parse_error);
+ if (ret) {
+ parse_events_print_error(&parse_error, extra_events.buf);
+ goto out;
+ }
+ strbuf_release(&extra_events);
+ ret = metricgroup__setup_events(&group_list, perf_evlist,
+ metric_events);
+out:
+ metricgroup__free_egroups(&group_list);
+ return ret;
+}
+
+bool metricgroup__has_metric(const char *metric)
+{
+ struct pmu_events_map *map = perf_pmu__find_map(NULL);
+ struct pmu_event *pe;
+ int i;
+
+ if (!map)
+ return false;
+
+ for (i = 0; ; i++) {
+ pe = &map->table[i];
+
+ if (!pe->name && !pe->metric_group && !pe->metric_name)
+ break;
+ if (!pe->metric_expr)
+ continue;
+ if (match_metric(pe->metric_name, metric))
+ return true;
+ }
+ return false;
+}
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
new file mode 100644
index 000000000..8a155dba0
--- /dev/null
+++ b/tools/perf/util/metricgroup.h
@@ -0,0 +1,32 @@
+#ifndef METRICGROUP_H
+#define METRICGROUP_H 1
+
+#include "linux/list.h"
+#include "rblist.h"
+#include <subcmd/parse-options.h>
+#include "evlist.h"
+#include "strbuf.h"
+
+struct metric_event {
+ struct rb_node nd;
+ struct perf_evsel *evsel;
+ struct list_head head; /* list of metric_expr */
+};
+
+struct metric_expr {
+ struct list_head nd;
+ const char *metric_expr;
+ const char *metric_name;
+ struct perf_evsel **metric_events;
+};
+
+struct metric_event *metricgroup__lookup(struct rblist *metric_events,
+ struct perf_evsel *evsel,
+ bool create);
+int metricgroup__parse_groups(const struct option *opt,
+ const char *str,
+ struct rblist *metric_events);
+
+void metricgroup__print(bool metrics, bool groups, char *filter, bool raw);
+bool metricgroup__has_metric(const char *metric);
+#endif
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
new file mode 100644
index 000000000..215f69f41
--- /dev/null
+++ b/tools/perf/util/mmap.c
@@ -0,0 +1,339 @@
+/*
+ * Copyright (C) 2011-2017, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Parts came from evlist.c builtin-{top,stat,record}.c, see those files for further
+ * copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include <sys/mman.h>
+#include <inttypes.h>
+#include <asm/bug.h>
+#include "debug.h"
+#include "event.h"
+#include "mmap.h"
+#include "util.h" /* page_size */
+
+size_t perf_mmap__mmap_len(struct perf_mmap *map)
+{
+ return map->mask + 1 + page_size;
+}
+
+/* When check_messup is true, 'end' must points to a good entry */
+static union perf_event *perf_mmap__read(struct perf_mmap *map,
+ u64 *startp, u64 end)
+{
+ unsigned char *data = map->base + page_size;
+ union perf_event *event = NULL;
+ int diff = end - *startp;
+
+ if (diff >= (int)sizeof(event->header)) {
+ size_t size;
+
+ event = (union perf_event *)&data[*startp & map->mask];
+ size = event->header.size;
+
+ if (size < sizeof(event->header) || diff < (int)size)
+ return NULL;
+
+ /*
+ * Event straddles the mmap boundary -- header should always
+ * be inside due to u64 alignment of output.
+ */
+ if ((*startp & map->mask) + size != ((*startp + size) & map->mask)) {
+ unsigned int offset = *startp;
+ unsigned int len = min(sizeof(*event), size), cpy;
+ void *dst = map->event_copy;
+
+ do {
+ cpy = min(map->mask + 1 - (offset & map->mask), len);
+ memcpy(dst, &data[offset & map->mask], cpy);
+ offset += cpy;
+ dst += cpy;
+ len -= cpy;
+ } while (len);
+
+ event = (union perf_event *)map->event_copy;
+ }
+
+ *startp += size;
+ }
+
+ return event;
+}
+
+/*
+ * Read event from ring buffer one by one.
+ * Return one event for each call.
+ *
+ * Usage:
+ * perf_mmap__read_init()
+ * while(event = perf_mmap__read_event()) {
+ * //process the event
+ * perf_mmap__consume()
+ * }
+ * perf_mmap__read_done()
+ */
+union perf_event *perf_mmap__read_event(struct perf_mmap *map)
+{
+ union perf_event *event;
+
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!refcount_read(&map->refcnt))
+ return NULL;
+
+ /* non-overwirte doesn't pause the ringbuffer */
+ if (!map->overwrite)
+ map->end = perf_mmap__read_head(map);
+
+ event = perf_mmap__read(map, &map->start, map->end);
+
+ if (!map->overwrite)
+ map->prev = map->start;
+
+ return event;
+}
+
+static bool perf_mmap__empty(struct perf_mmap *map)
+{
+ return perf_mmap__read_head(map) == map->prev && !map->auxtrace_mmap.base;
+}
+
+void perf_mmap__get(struct perf_mmap *map)
+{
+ refcount_inc(&map->refcnt);
+}
+
+void perf_mmap__put(struct perf_mmap *map)
+{
+ BUG_ON(map->base && refcount_read(&map->refcnt) == 0);
+
+ if (refcount_dec_and_test(&map->refcnt))
+ perf_mmap__munmap(map);
+}
+
+void perf_mmap__consume(struct perf_mmap *map)
+{
+ if (!map->overwrite) {
+ u64 old = map->prev;
+
+ perf_mmap__write_tail(map, old);
+ }
+
+ if (refcount_read(&map->refcnt) == 1 && perf_mmap__empty(map))
+ perf_mmap__put(map);
+}
+
+int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
+ struct auxtrace_mmap_params *mp __maybe_unused,
+ void *userpg __maybe_unused,
+ int fd __maybe_unused)
+{
+ return 0;
+}
+
+void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp __maybe_unused,
+ off_t auxtrace_offset __maybe_unused,
+ unsigned int auxtrace_pages __maybe_unused,
+ bool auxtrace_overwrite __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp __maybe_unused,
+ struct perf_evlist *evlist __maybe_unused,
+ int idx __maybe_unused,
+ bool per_cpu __maybe_unused)
+{
+}
+
+void perf_mmap__munmap(struct perf_mmap *map)
+{
+ if (map->base != NULL) {
+ munmap(map->base, perf_mmap__mmap_len(map));
+ map->base = NULL;
+ map->fd = -1;
+ refcount_set(&map->refcnt, 0);
+ }
+ auxtrace_mmap__munmap(&map->auxtrace_mmap);
+}
+
+int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu)
+{
+ /*
+ * The last one will be done at perf_mmap__consume(), so that we
+ * make sure we don't prevent tools from consuming every last event in
+ * the ring buffer.
+ *
+ * I.e. we can get the POLLHUP meaning that the fd doesn't exist
+ * anymore, but the last events for it are still in the ring buffer,
+ * waiting to be consumed.
+ *
+ * Tools can chose to ignore this at their own discretion, but the
+ * evlist layer can't just drop it when filtering events in
+ * perf_evlist__filter_pollfd().
+ */
+ refcount_set(&map->refcnt, 2);
+ map->prev = 0;
+ map->mask = mp->mask;
+ map->base = mmap(NULL, perf_mmap__mmap_len(map), mp->prot,
+ MAP_SHARED, fd, 0);
+ if (map->base == MAP_FAILED) {
+ pr_debug2("failed to mmap perf event ring buffer, error %d\n",
+ errno);
+ map->base = NULL;
+ return -1;
+ }
+ map->fd = fd;
+ map->cpu = cpu;
+
+ if (auxtrace_mmap__mmap(&map->auxtrace_mmap,
+ &mp->auxtrace_mp, map->base, fd))
+ return -1;
+
+ return 0;
+}
+
+static int overwrite_rb_find_range(void *buf, int mask, u64 *start, u64 *end)
+{
+ struct perf_event_header *pheader;
+ u64 evt_head = *start;
+ int size = mask + 1;
+
+ pr_debug2("%s: buf=%p, start=%"PRIx64"\n", __func__, buf, *start);
+ pheader = (struct perf_event_header *)(buf + (*start & mask));
+ while (true) {
+ if (evt_head - *start >= (unsigned int)size) {
+ pr_debug("Finished reading overwrite ring buffer: rewind\n");
+ if (evt_head - *start > (unsigned int)size)
+ evt_head -= pheader->size;
+ *end = evt_head;
+ return 0;
+ }
+
+ pheader = (struct perf_event_header *)(buf + (evt_head & mask));
+
+ if (pheader->size == 0) {
+ pr_debug("Finished reading overwrite ring buffer: get start\n");
+ *end = evt_head;
+ return 0;
+ }
+
+ evt_head += pheader->size;
+ pr_debug3("move evt_head: %"PRIx64"\n", evt_head);
+ }
+ WARN_ONCE(1, "Shouldn't get here\n");
+ return -1;
+}
+
+/*
+ * Report the start and end of the available data in ringbuffer
+ */
+static int __perf_mmap__read_init(struct perf_mmap *md)
+{
+ u64 head = perf_mmap__read_head(md);
+ u64 old = md->prev;
+ unsigned char *data = md->base + page_size;
+ unsigned long size;
+
+ md->start = md->overwrite ? head : old;
+ md->end = md->overwrite ? old : head;
+
+ if (md->start == md->end)
+ return -EAGAIN;
+
+ size = md->end - md->start;
+ if (size > (unsigned long)(md->mask) + 1) {
+ if (!md->overwrite) {
+ WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n");
+
+ md->prev = head;
+ perf_mmap__consume(md);
+ return -EAGAIN;
+ }
+
+ /*
+ * Backward ring buffer is full. We still have a chance to read
+ * most of data from it.
+ */
+ if (overwrite_rb_find_range(data, md->mask, &md->start, &md->end))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int perf_mmap__read_init(struct perf_mmap *map)
+{
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!refcount_read(&map->refcnt))
+ return -ENOENT;
+
+ return __perf_mmap__read_init(map);
+}
+
+int perf_mmap__push(struct perf_mmap *md, void *to,
+ int push(void *to, void *buf, size_t size))
+{
+ u64 head = perf_mmap__read_head(md);
+ unsigned char *data = md->base + page_size;
+ unsigned long size;
+ void *buf;
+ int rc = 0;
+
+ rc = perf_mmap__read_init(md);
+ if (rc < 0)
+ return (rc == -EAGAIN) ? 0 : -1;
+
+ size = md->end - md->start;
+
+ if ((md->start & md->mask) + size != (md->end & md->mask)) {
+ buf = &data[md->start & md->mask];
+ size = md->mask + 1 - (md->start & md->mask);
+ md->start += size;
+
+ if (push(to, buf, size) < 0) {
+ rc = -1;
+ goto out;
+ }
+ }
+
+ buf = &data[md->start & md->mask];
+ size = md->end - md->start;
+ md->start += size;
+
+ if (push(to, buf, size) < 0) {
+ rc = -1;
+ goto out;
+ }
+
+ md->prev = head;
+ perf_mmap__consume(md);
+out:
+ return rc;
+}
+
+/*
+ * Mandatory for overwrite mode
+ * The direction of overwrite mode is backward.
+ * The last perf_mmap__read() will set tail to map->prev.
+ * Need to correct the map->prev to head which is the end of next read.
+ */
+void perf_mmap__read_done(struct perf_mmap *map)
+{
+ /*
+ * Check if event was unmapped due to a POLLHUP/POLLERR.
+ */
+ if (!refcount_read(&map->refcnt))
+ return;
+
+ map->prev = perf_mmap__read_head(map);
+}
diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
new file mode 100644
index 000000000..05a6d47c7
--- /dev/null
+++ b/tools/perf/util/mmap.h
@@ -0,0 +1,102 @@
+#ifndef __PERF_MMAP_H
+#define __PERF_MMAP_H 1
+
+#include <linux/compiler.h>
+#include <linux/refcount.h>
+#include <linux/types.h>
+#include <asm/barrier.h>
+#include <stdbool.h>
+#include "auxtrace.h"
+#include "event.h"
+
+/**
+ * struct perf_mmap - perf's ring buffer mmap details
+ *
+ * @refcnt - e.g. code using PERF_EVENT_IOC_SET_OUTPUT to share this
+ */
+struct perf_mmap {
+ void *base;
+ int mask;
+ int fd;
+ int cpu;
+ refcount_t refcnt;
+ u64 prev;
+ u64 start;
+ u64 end;
+ bool overwrite;
+ struct auxtrace_mmap auxtrace_mmap;
+ char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8);
+};
+
+/*
+ * State machine of bkw_mmap_state:
+ *
+ * .________________(forbid)_____________.
+ * | V
+ * NOTREADY --(0)--> RUNNING --(1)--> DATA_PENDING --(2)--> EMPTY
+ * ^ ^ | ^ |
+ * | |__(forbid)____/ |___(forbid)___/|
+ * | |
+ * \_________________(3)_______________/
+ *
+ * NOTREADY : Backward ring buffers are not ready
+ * RUNNING : Backward ring buffers are recording
+ * DATA_PENDING : We are required to collect data from backward ring buffers
+ * EMPTY : We have collected data from backward ring buffers.
+ *
+ * (0): Setup backward ring buffer
+ * (1): Pause ring buffers for reading
+ * (2): Read from ring buffers
+ * (3): Resume ring buffers for recording
+ */
+enum bkw_mmap_state {
+ BKW_MMAP_NOTREADY,
+ BKW_MMAP_RUNNING,
+ BKW_MMAP_DATA_PENDING,
+ BKW_MMAP_EMPTY,
+};
+
+struct mmap_params {
+ int prot, mask;
+ struct auxtrace_mmap_params auxtrace_mp;
+};
+
+int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd, int cpu);
+void perf_mmap__munmap(struct perf_mmap *map);
+
+void perf_mmap__get(struct perf_mmap *map);
+void perf_mmap__put(struct perf_mmap *map);
+
+void perf_mmap__consume(struct perf_mmap *map);
+
+static inline u64 perf_mmap__read_head(struct perf_mmap *mm)
+{
+ struct perf_event_mmap_page *pc = mm->base;
+ u64 head = READ_ONCE(pc->data_head);
+ rmb();
+ return head;
+}
+
+static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail)
+{
+ struct perf_event_mmap_page *pc = md->base;
+
+ /*
+ * ensure all reads are done before we write the tail out.
+ */
+ mb();
+ pc->data_tail = tail;
+}
+
+union perf_event *perf_mmap__read_forward(struct perf_mmap *map);
+
+union perf_event *perf_mmap__read_event(struct perf_mmap *map);
+
+int perf_mmap__push(struct perf_mmap *md, void *to,
+ int push(void *to, void *buf, size_t size));
+
+size_t perf_mmap__mmap_len(struct perf_mmap *map);
+
+int perf_mmap__read_init(struct perf_mmap *md);
+void perf_mmap__read_done(struct perf_mmap *map);
+#endif /*__PERF_MMAP_H */
diff --git a/tools/perf/util/namespaces.c b/tools/perf/util/namespaces.c
new file mode 100644
index 000000000..aed170bd4
--- /dev/null
+++ b/tools/perf/util/namespaces.c
@@ -0,0 +1,265 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#include "namespaces.h"
+#include "util.h"
+#include "event.h"
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <asm/bug.h>
+
+struct namespaces *namespaces__new(struct namespaces_event *event)
+{
+ struct namespaces *namespaces;
+ u64 link_info_size = ((event ? event->nr_namespaces : NR_NAMESPACES) *
+ sizeof(struct perf_ns_link_info));
+
+ namespaces = zalloc(sizeof(struct namespaces) + link_info_size);
+ if (!namespaces)
+ return NULL;
+
+ namespaces->end_time = -1;
+
+ if (event)
+ memcpy(namespaces->link_info, event->link_info, link_info_size);
+
+ return namespaces;
+}
+
+void namespaces__free(struct namespaces *namespaces)
+{
+ free(namespaces);
+}
+
+int nsinfo__init(struct nsinfo *nsi)
+{
+ char oldns[PATH_MAX];
+ char spath[PATH_MAX];
+ char *newns = NULL;
+ char *statln = NULL;
+ struct stat old_stat;
+ struct stat new_stat;
+ FILE *f = NULL;
+ size_t linesz = 0;
+ int rv = -1;
+
+ if (snprintf(oldns, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
+ return rv;
+
+ if (asprintf(&newns, "/proc/%d/ns/mnt", nsi->pid) == -1)
+ return rv;
+
+ if (stat(oldns, &old_stat) < 0)
+ goto out;
+
+ if (stat(newns, &new_stat) < 0)
+ goto out;
+
+ /* Check if the mount namespaces differ, if so then indicate that we
+ * want to switch as part of looking up dso/map data.
+ */
+ if (old_stat.st_ino != new_stat.st_ino) {
+ nsi->need_setns = true;
+ nsi->mntns_path = newns;
+ newns = NULL;
+ }
+
+ /* If we're dealing with a process that is in a different PID namespace,
+ * attempt to work out the innermost tgid for the process.
+ */
+ if (snprintf(spath, PATH_MAX, "/proc/%d/status", nsi->pid) >= PATH_MAX)
+ goto out;
+
+ f = fopen(spath, "r");
+ if (f == NULL)
+ goto out;
+
+ while (getline(&statln, &linesz, f) != -1) {
+ /* Use tgid if CONFIG_PID_NS is not defined. */
+ if (strstr(statln, "Tgid:") != NULL) {
+ nsi->tgid = (pid_t)strtol(strrchr(statln, '\t'),
+ NULL, 10);
+ nsi->nstgid = nsi->tgid;
+ }
+
+ if (strstr(statln, "NStgid:") != NULL) {
+ nsi->nstgid = (pid_t)strtol(strrchr(statln, '\t'),
+ NULL, 10);
+ break;
+ }
+ }
+ rv = 0;
+
+out:
+ if (f != NULL)
+ (void) fclose(f);
+ free(statln);
+ free(newns);
+ return rv;
+}
+
+struct nsinfo *nsinfo__new(pid_t pid)
+{
+ struct nsinfo *nsi;
+
+ if (pid == 0)
+ return NULL;
+
+ nsi = calloc(1, sizeof(*nsi));
+ if (nsi != NULL) {
+ nsi->pid = pid;
+ nsi->tgid = pid;
+ nsi->nstgid = pid;
+ nsi->need_setns = false;
+ /* Init may fail if the process exits while we're trying to look
+ * at its proc information. In that case, save the pid but
+ * don't try to enter the namespace.
+ */
+ if (nsinfo__init(nsi) == -1)
+ nsi->need_setns = false;
+
+ refcount_set(&nsi->refcnt, 1);
+ }
+
+ return nsi;
+}
+
+struct nsinfo *nsinfo__copy(struct nsinfo *nsi)
+{
+ struct nsinfo *nnsi;
+
+ if (nsi == NULL)
+ return NULL;
+
+ nnsi = calloc(1, sizeof(*nnsi));
+ if (nnsi != NULL) {
+ nnsi->pid = nsi->pid;
+ nnsi->tgid = nsi->tgid;
+ nnsi->nstgid = nsi->nstgid;
+ nnsi->need_setns = nsi->need_setns;
+ if (nsi->mntns_path) {
+ nnsi->mntns_path = strdup(nsi->mntns_path);
+ if (!nnsi->mntns_path) {
+ free(nnsi);
+ return NULL;
+ }
+ }
+ refcount_set(&nnsi->refcnt, 1);
+ }
+
+ return nnsi;
+}
+
+void nsinfo__delete(struct nsinfo *nsi)
+{
+ zfree(&nsi->mntns_path);
+ free(nsi);
+}
+
+struct nsinfo *nsinfo__get(struct nsinfo *nsi)
+{
+ if (nsi)
+ refcount_inc(&nsi->refcnt);
+ return nsi;
+}
+
+void nsinfo__put(struct nsinfo *nsi)
+{
+ if (nsi && refcount_dec_and_test(&nsi->refcnt))
+ nsinfo__delete(nsi);
+}
+
+void nsinfo__mountns_enter(struct nsinfo *nsi,
+ struct nscookie *nc)
+{
+ char curpath[PATH_MAX];
+ int oldns = -1;
+ int newns = -1;
+ char *oldcwd = NULL;
+
+ if (nc == NULL)
+ return;
+
+ nc->oldns = -1;
+ nc->newns = -1;
+
+ if (!nsi || !nsi->need_setns)
+ return;
+
+ if (snprintf(curpath, PATH_MAX, "/proc/self/ns/mnt") >= PATH_MAX)
+ return;
+
+ oldcwd = get_current_dir_name();
+ if (!oldcwd)
+ return;
+
+ oldns = open(curpath, O_RDONLY);
+ if (oldns < 0)
+ goto errout;
+
+ newns = open(nsi->mntns_path, O_RDONLY);
+ if (newns < 0)
+ goto errout;
+
+ if (setns(newns, CLONE_NEWNS) < 0)
+ goto errout;
+
+ nc->oldcwd = oldcwd;
+ nc->oldns = oldns;
+ nc->newns = newns;
+ return;
+
+errout:
+ free(oldcwd);
+ if (oldns > -1)
+ close(oldns);
+ if (newns > -1)
+ close(newns);
+}
+
+void nsinfo__mountns_exit(struct nscookie *nc)
+{
+ if (nc == NULL || nc->oldns == -1 || nc->newns == -1 || !nc->oldcwd)
+ return;
+
+ setns(nc->oldns, CLONE_NEWNS);
+
+ if (nc->oldcwd) {
+ WARN_ON_ONCE(chdir(nc->oldcwd));
+ zfree(&nc->oldcwd);
+ }
+
+ if (nc->oldns > -1) {
+ close(nc->oldns);
+ nc->oldns = -1;
+ }
+
+ if (nc->newns > -1) {
+ close(nc->newns);
+ nc->newns = -1;
+ }
+}
+
+char *nsinfo__realpath(const char *path, struct nsinfo *nsi)
+{
+ char *rpath;
+ struct nscookie nsc;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ rpath = realpath(path, NULL);
+ nsinfo__mountns_exit(&nsc);
+
+ return rpath;
+}
diff --git a/tools/perf/util/namespaces.h b/tools/perf/util/namespaces.h
new file mode 100644
index 000000000..d5f46c09e
--- /dev/null
+++ b/tools/perf/util/namespaces.h
@@ -0,0 +1,67 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2017 Hari Bathini, IBM Corporation
+ */
+
+#ifndef __PERF_NAMESPACES_H
+#define __PERF_NAMESPACES_H
+
+#include <sys/types.h>
+#include <linux/stddef.h>
+#include <linux/perf_event.h>
+#include <linux/refcount.h>
+#include <linux/types.h>
+
+struct namespaces_event;
+
+struct namespaces {
+ struct list_head list;
+ u64 end_time;
+ struct perf_ns_link_info link_info[];
+};
+
+struct namespaces *namespaces__new(struct namespaces_event *event);
+void namespaces__free(struct namespaces *namespaces);
+
+struct nsinfo {
+ pid_t pid;
+ pid_t tgid;
+ pid_t nstgid;
+ bool need_setns;
+ char *mntns_path;
+ refcount_t refcnt;
+};
+
+struct nscookie {
+ int oldns;
+ int newns;
+ char *oldcwd;
+};
+
+int nsinfo__init(struct nsinfo *nsi);
+struct nsinfo *nsinfo__new(pid_t pid);
+struct nsinfo *nsinfo__copy(struct nsinfo *nsi);
+void nsinfo__delete(struct nsinfo *nsi);
+
+struct nsinfo *nsinfo__get(struct nsinfo *nsi);
+void nsinfo__put(struct nsinfo *nsi);
+
+void nsinfo__mountns_enter(struct nsinfo *nsi, struct nscookie *nc);
+void nsinfo__mountns_exit(struct nscookie *nc);
+
+char *nsinfo__realpath(const char *path, struct nsinfo *nsi);
+
+static inline void __nsinfo__zput(struct nsinfo **nsip)
+{
+ if (nsip) {
+ nsinfo__put(*nsip);
+ *nsip = NULL;
+ }
+}
+
+#define nsinfo__zput(nsi) __nsinfo__zput(&nsi)
+
+#endif /* __PERF_NAMESPACES_H */
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
new file mode 100644
index 000000000..bad9e0296
--- /dev/null
+++ b/tools/perf/util/ordered-events.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/list.h>
+#include <linux/compiler.h>
+#include <linux/string.h>
+#include "ordered-events.h"
+#include "session.h"
+#include "asm/bug.h"
+#include "debug.h"
+
+#define pr_N(n, fmt, ...) \
+ eprintf(n, debug_ordered_events, fmt, ##__VA_ARGS__)
+
+#define pr(fmt, ...) pr_N(1, pr_fmt(fmt), ##__VA_ARGS__)
+
+static void queue_event(struct ordered_events *oe, struct ordered_event *new)
+{
+ struct ordered_event *last = oe->last;
+ u64 timestamp = new->timestamp;
+ struct list_head *p;
+
+ ++oe->nr_events;
+ oe->last = new;
+
+ pr_oe_time2(timestamp, "queue_event nr_events %u\n", oe->nr_events);
+
+ if (!last) {
+ list_add(&new->list, &oe->events);
+ oe->max_timestamp = timestamp;
+ return;
+ }
+
+ /*
+ * last event might point to some random place in the list as it's
+ * the last queued event. We expect that the new event is close to
+ * this.
+ */
+ if (last->timestamp <= timestamp) {
+ while (last->timestamp <= timestamp) {
+ p = last->list.next;
+ if (p == &oe->events) {
+ list_add_tail(&new->list, &oe->events);
+ oe->max_timestamp = timestamp;
+ return;
+ }
+ last = list_entry(p, struct ordered_event, list);
+ }
+ list_add_tail(&new->list, &last->list);
+ } else {
+ while (last->timestamp > timestamp) {
+ p = last->list.prev;
+ if (p == &oe->events) {
+ list_add(&new->list, &oe->events);
+ return;
+ }
+ last = list_entry(p, struct ordered_event, list);
+ }
+ list_add(&new->list, &last->list);
+ }
+}
+
+static union perf_event *__dup_event(struct ordered_events *oe,
+ union perf_event *event)
+{
+ union perf_event *new_event = NULL;
+
+ if (oe->cur_alloc_size < oe->max_alloc_size) {
+ new_event = memdup(event, event->header.size);
+ if (new_event)
+ oe->cur_alloc_size += event->header.size;
+ }
+
+ return new_event;
+}
+
+static union perf_event *dup_event(struct ordered_events *oe,
+ union perf_event *event)
+{
+ return oe->copy_on_queue ? __dup_event(oe, event) : event;
+}
+
+static void free_dup_event(struct ordered_events *oe, union perf_event *event)
+{
+ if (event && oe->copy_on_queue) {
+ oe->cur_alloc_size -= event->header.size;
+ free(event);
+ }
+}
+
+#define MAX_SAMPLE_BUFFER (64 * 1024 / sizeof(struct ordered_event))
+static struct ordered_event *alloc_event(struct ordered_events *oe,
+ union perf_event *event)
+{
+ struct list_head *cache = &oe->cache;
+ struct ordered_event *new = NULL;
+ union perf_event *new_event;
+
+ new_event = dup_event(oe, event);
+ if (!new_event)
+ return NULL;
+
+ if (!list_empty(cache)) {
+ new = list_entry(cache->next, struct ordered_event, list);
+ list_del(&new->list);
+ } else if (oe->buffer) {
+ new = oe->buffer + oe->buffer_idx;
+ if (++oe->buffer_idx == MAX_SAMPLE_BUFFER)
+ oe->buffer = NULL;
+ } else if (oe->cur_alloc_size < oe->max_alloc_size) {
+ size_t size = MAX_SAMPLE_BUFFER * sizeof(*new);
+
+ oe->buffer = malloc(size);
+ if (!oe->buffer) {
+ free_dup_event(oe, new_event);
+ return NULL;
+ }
+
+ pr("alloc size %" PRIu64 "B (+%zu), max %" PRIu64 "B\n",
+ oe->cur_alloc_size, size, oe->max_alloc_size);
+
+ oe->cur_alloc_size += size;
+ list_add(&oe->buffer->list, &oe->to_free);
+
+ /* First entry is abused to maintain the to_free list. */
+ oe->buffer_idx = 2;
+ new = oe->buffer + 1;
+ } else {
+ pr("allocation limit reached %" PRIu64 "B\n", oe->max_alloc_size);
+ }
+
+ new->event = new_event;
+ return new;
+}
+
+static struct ordered_event *
+ordered_events__new_event(struct ordered_events *oe, u64 timestamp,
+ union perf_event *event)
+{
+ struct ordered_event *new;
+
+ new = alloc_event(oe, event);
+ if (new) {
+ new->timestamp = timestamp;
+ queue_event(oe, new);
+ }
+
+ return new;
+}
+
+void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event)
+{
+ list_move(&event->list, &oe->cache);
+ oe->nr_events--;
+ free_dup_event(oe, event->event);
+ event->event = NULL;
+}
+
+int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
+ u64 timestamp, u64 file_offset)
+{
+ struct ordered_event *oevent;
+
+ if (!timestamp || timestamp == ~0ULL)
+ return -ETIME;
+
+ if (timestamp < oe->last_flush) {
+ pr_oe_time(timestamp, "out of order event\n");
+ pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n",
+ oe->last_flush_type);
+
+ oe->nr_unordered_events++;
+ }
+
+ oevent = ordered_events__new_event(oe, timestamp, event);
+ if (!oevent) {
+ ordered_events__flush(oe, OE_FLUSH__HALF);
+ oevent = ordered_events__new_event(oe, timestamp, event);
+ }
+
+ if (!oevent)
+ return -ENOMEM;
+
+ oevent->file_offset = file_offset;
+ return 0;
+}
+
+static int __ordered_events__flush(struct ordered_events *oe)
+{
+ struct list_head *head = &oe->events;
+ struct ordered_event *tmp, *iter;
+ u64 limit = oe->next_flush;
+ u64 last_ts = oe->last ? oe->last->timestamp : 0ULL;
+ bool show_progress = limit == ULLONG_MAX;
+ struct ui_progress prog;
+ int ret;
+
+ if (!limit)
+ return 0;
+
+ if (show_progress)
+ ui_progress__init(&prog, oe->nr_events, "Processing time ordered events...");
+
+ list_for_each_entry_safe(iter, tmp, head, list) {
+ if (session_done())
+ return 0;
+
+ if (iter->timestamp > limit)
+ break;
+ ret = oe->deliver(oe, iter);
+ if (ret)
+ return ret;
+
+ ordered_events__delete(oe, iter);
+ oe->last_flush = iter->timestamp;
+
+ if (show_progress)
+ ui_progress__update(&prog, 1);
+ }
+
+ if (list_empty(head))
+ oe->last = NULL;
+ else if (last_ts <= limit)
+ oe->last = list_entry(head->prev, struct ordered_event, list);
+
+ if (show_progress)
+ ui_progress__finish();
+
+ return 0;
+}
+
+int ordered_events__flush(struct ordered_events *oe, enum oe_flush how)
+{
+ static const char * const str[] = {
+ "NONE",
+ "FINAL",
+ "ROUND",
+ "HALF ",
+ };
+ int err;
+
+ if (oe->nr_events == 0)
+ return 0;
+
+ switch (how) {
+ case OE_FLUSH__FINAL:
+ oe->next_flush = ULLONG_MAX;
+ break;
+
+ case OE_FLUSH__HALF:
+ {
+ struct ordered_event *first, *last;
+ struct list_head *head = &oe->events;
+
+ first = list_entry(head->next, struct ordered_event, list);
+ last = oe->last;
+
+ /* Warn if we are called before any event got allocated. */
+ if (WARN_ONCE(!last || list_empty(head), "empty queue"))
+ return 0;
+
+ oe->next_flush = first->timestamp;
+ oe->next_flush += (last->timestamp - first->timestamp) / 2;
+ break;
+ }
+
+ case OE_FLUSH__ROUND:
+ case OE_FLUSH__NONE:
+ default:
+ break;
+ };
+
+ pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush PRE %s, nr_events %u\n",
+ str[how], oe->nr_events);
+ pr_oe_time(oe->max_timestamp, "max_timestamp\n");
+
+ err = __ordered_events__flush(oe);
+
+ if (!err) {
+ if (how == OE_FLUSH__ROUND)
+ oe->next_flush = oe->max_timestamp;
+
+ oe->last_flush_type = how;
+ }
+
+ pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush POST %s, nr_events %u\n",
+ str[how], oe->nr_events);
+ pr_oe_time(oe->last_flush, "last_flush\n");
+
+ return err;
+}
+
+void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver)
+{
+ INIT_LIST_HEAD(&oe->events);
+ INIT_LIST_HEAD(&oe->cache);
+ INIT_LIST_HEAD(&oe->to_free);
+ oe->max_alloc_size = (u64) -1;
+ oe->cur_alloc_size = 0;
+ oe->deliver = deliver;
+}
+
+void ordered_events__free(struct ordered_events *oe)
+{
+ while (!list_empty(&oe->to_free)) {
+ struct ordered_event *event;
+
+ event = list_entry(oe->to_free.next, struct ordered_event, list);
+ list_del(&event->list);
+ free_dup_event(oe, event->event);
+ free(event);
+ }
+}
+
+void ordered_events__reinit(struct ordered_events *oe)
+{
+ ordered_events__deliver_t old_deliver = oe->deliver;
+
+ ordered_events__free(oe);
+ memset(oe, '\0', sizeof(*oe));
+ ordered_events__init(oe, old_deliver);
+}
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
new file mode 100644
index 000000000..8c7a29485
--- /dev/null
+++ b/tools/perf/util/ordered-events.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ORDERED_EVENTS_H
+#define __ORDERED_EVENTS_H
+
+#include <linux/types.h>
+
+struct perf_sample;
+
+struct ordered_event {
+ u64 timestamp;
+ u64 file_offset;
+ union perf_event *event;
+ struct list_head list;
+};
+
+enum oe_flush {
+ OE_FLUSH__NONE,
+ OE_FLUSH__FINAL,
+ OE_FLUSH__ROUND,
+ OE_FLUSH__HALF,
+};
+
+struct ordered_events;
+
+typedef int (*ordered_events__deliver_t)(struct ordered_events *oe,
+ struct ordered_event *event);
+
+struct ordered_events {
+ u64 last_flush;
+ u64 next_flush;
+ u64 max_timestamp;
+ u64 max_alloc_size;
+ u64 cur_alloc_size;
+ struct list_head events;
+ struct list_head cache;
+ struct list_head to_free;
+ struct ordered_event *buffer;
+ struct ordered_event *last;
+ ordered_events__deliver_t deliver;
+ int buffer_idx;
+ unsigned int nr_events;
+ enum oe_flush last_flush_type;
+ u32 nr_unordered_events;
+ bool copy_on_queue;
+};
+
+int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
+ u64 timestamp, u64 file_offset);
+void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event);
+int ordered_events__flush(struct ordered_events *oe, enum oe_flush how);
+void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver);
+void ordered_events__free(struct ordered_events *oe);
+void ordered_events__reinit(struct ordered_events *oe);
+
+static inline
+void ordered_events__set_alloc_size(struct ordered_events *oe, u64 size)
+{
+ oe->max_alloc_size = size;
+}
+
+static inline
+void ordered_events__set_copy_on_queue(struct ordered_events *oe, bool copy)
+{
+ oe->copy_on_queue = copy;
+}
+#endif /* __ORDERED_EVENTS_H */
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
new file mode 100644
index 000000000..bd779d9f4
--- /dev/null
+++ b/tools/perf/util/parse-branch-options.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "perf.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-branch-options.h"
+
+#define BRANCH_OPT(n, m) \
+ { .name = n, .mode = (m) }
+
+#define BRANCH_END { .name = NULL }
+
+struct branch_mode {
+ const char *name;
+ int mode;
+};
+
+static const struct branch_mode branch_modes[] = {
+ BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
+ BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
+ BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
+ BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
+ BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
+ BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
+ BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
+ BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
+ BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
+ BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
+ BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
+ BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP),
+ BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL),
+ BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE),
+ BRANCH_END
+};
+
+int parse_branch_str(const char *str, __u64 *mode)
+{
+#define ONLY_PLM \
+ (PERF_SAMPLE_BRANCH_USER |\
+ PERF_SAMPLE_BRANCH_KERNEL |\
+ PERF_SAMPLE_BRANCH_HV)
+
+ int ret = 0;
+ char *p, *s;
+ char *os = NULL;
+ const struct branch_mode *br;
+
+ if (str == NULL) {
+ *mode = PERF_SAMPLE_BRANCH_ANY;
+ return 0;
+ }
+
+ /* because str is read-only */
+ s = os = strdup(str);
+ if (!s)
+ return -1;
+
+ for (;;) {
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ for (br = branch_modes; br->name; br++) {
+ if (!strcasecmp(s, br->name))
+ break;
+ }
+ if (!br->name) {
+ ret = -1;
+ pr_warning("unknown branch filter %s,"
+ " check man page\n", s);
+ goto error;
+ }
+
+ *mode |= br->mode;
+
+ if (!p)
+ break;
+
+ s = p + 1;
+ }
+
+ /* default to any branch */
+ if ((*mode & ~ONLY_PLM) == 0) {
+ *mode = PERF_SAMPLE_BRANCH_ANY;
+ }
+error:
+ free(os);
+ return ret;
+}
+
+int
+parse_branch_stack(const struct option *opt, const char *str, int unset)
+{
+ __u64 *mode = (__u64 *)opt->value;
+
+ if (unset)
+ return 0;
+
+ /*
+ * cannot set it twice, -b + --branch-filter for instance
+ */
+ if (*mode)
+ return -1;
+
+ return parse_branch_str(str, mode);
+}
diff --git a/tools/perf/util/parse-branch-options.h b/tools/perf/util/parse-branch-options.h
new file mode 100644
index 000000000..11d172273
--- /dev/null
+++ b/tools/perf/util/parse-branch-options.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PARSE_BRANCH_OPTIONS_H
+#define _PERF_PARSE_BRANCH_OPTIONS_H 1
+#include <stdint.h>
+int parse_branch_stack(const struct option *opt, const char *str, int unset);
+int parse_branch_str(const char *str, __u64 *mode);
+#endif /* _PERF_PARSE_BRANCH_OPTIONS_H */
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
new file mode 100644
index 000000000..0eff0c3ba
--- /dev/null
+++ b/tools/perf/util/parse-events.c
@@ -0,0 +1,2768 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/hw_breakpoint.h>
+#include <linux/err.h>
+#include <dirent.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/param.h>
+#include "term.h"
+#include "../perf.h"
+#include "evlist.h"
+#include "evsel.h"
+#include <subcmd/parse-options.h>
+#include "parse-events.h"
+#include <subcmd/exec-cmd.h>
+#include "string2.h"
+#include "strlist.h"
+#include "symbol.h"
+#include "cache.h"
+#include "header.h"
+#include "bpf-loader.h"
+#include "debug.h"
+#include <api/fs/tracing_path.h>
+#include "parse-events-bison.h"
+#define YY_EXTRA_TYPE int
+#include "parse-events-flex.h"
+#include "pmu.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "probe-file.h"
+#include "asm/bug.h"
+#include "util/parse-branch-options.h"
+#include "metricgroup.h"
+
+#define MAX_NAME_LEN 100
+
+#ifdef PARSER_DEBUG
+extern int parse_events_debug;
+#endif
+int parse_events_parse(void *parse_state, void *scanner);
+static int get_config_terms(struct list_head *head_config,
+ struct list_head *head_terms __maybe_unused);
+
+static struct perf_pmu_event_symbol *perf_pmu_events_list;
+/*
+ * The variable indicates the number of supported pmu event symbols.
+ * 0 means not initialized and ready to init
+ * -1 means failed to init, don't try anymore
+ * >0 is the number of supported pmu event symbols
+ */
+static int perf_pmu_events_list_num;
+
+struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = {
+ .symbol = "cpu-cycles",
+ .alias = "cycles",
+ },
+ [PERF_COUNT_HW_INSTRUCTIONS] = {
+ .symbol = "instructions",
+ .alias = "",
+ },
+ [PERF_COUNT_HW_CACHE_REFERENCES] = {
+ .symbol = "cache-references",
+ .alias = "",
+ },
+ [PERF_COUNT_HW_CACHE_MISSES] = {
+ .symbol = "cache-misses",
+ .alias = "",
+ },
+ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = {
+ .symbol = "branch-instructions",
+ .alias = "branches",
+ },
+ [PERF_COUNT_HW_BRANCH_MISSES] = {
+ .symbol = "branch-misses",
+ .alias = "",
+ },
+ [PERF_COUNT_HW_BUS_CYCLES] = {
+ .symbol = "bus-cycles",
+ .alias = "",
+ },
+ [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = {
+ .symbol = "stalled-cycles-frontend",
+ .alias = "idle-cycles-frontend",
+ },
+ [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = {
+ .symbol = "stalled-cycles-backend",
+ .alias = "idle-cycles-backend",
+ },
+ [PERF_COUNT_HW_REF_CPU_CYCLES] = {
+ .symbol = "ref-cycles",
+ .alias = "",
+ },
+};
+
+struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = {
+ [PERF_COUNT_SW_CPU_CLOCK] = {
+ .symbol = "cpu-clock",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_TASK_CLOCK] = {
+ .symbol = "task-clock",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_PAGE_FAULTS] = {
+ .symbol = "page-faults",
+ .alias = "faults",
+ },
+ [PERF_COUNT_SW_CONTEXT_SWITCHES] = {
+ .symbol = "context-switches",
+ .alias = "cs",
+ },
+ [PERF_COUNT_SW_CPU_MIGRATIONS] = {
+ .symbol = "cpu-migrations",
+ .alias = "migrations",
+ },
+ [PERF_COUNT_SW_PAGE_FAULTS_MIN] = {
+ .symbol = "minor-faults",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_PAGE_FAULTS_MAJ] = {
+ .symbol = "major-faults",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_ALIGNMENT_FAULTS] = {
+ .symbol = "alignment-faults",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_EMULATION_FAULTS] = {
+ .symbol = "emulation-faults",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_DUMMY] = {
+ .symbol = "dummy",
+ .alias = "",
+ },
+ [PERF_COUNT_SW_BPF_OUTPUT] = {
+ .symbol = "bpf-output",
+ .alias = "",
+ },
+};
+
+#define __PERF_EVENT_FIELD(config, name) \
+ ((config & PERF_EVENT_##name##_MASK) >> PERF_EVENT_##name##_SHIFT)
+
+#define PERF_EVENT_RAW(config) __PERF_EVENT_FIELD(config, RAW)
+#define PERF_EVENT_CONFIG(config) __PERF_EVENT_FIELD(config, CONFIG)
+#define PERF_EVENT_TYPE(config) __PERF_EVENT_FIELD(config, TYPE)
+#define PERF_EVENT_ID(config) __PERF_EVENT_FIELD(config, EVENT)
+
+#define for_each_subsystem(sys_dir, sys_dirent) \
+ while ((sys_dirent = readdir(sys_dir)) != NULL) \
+ if (sys_dirent->d_type == DT_DIR && \
+ (strcmp(sys_dirent->d_name, ".")) && \
+ (strcmp(sys_dirent->d_name, "..")))
+
+static int tp_event_has_id(const char *dir_path, struct dirent *evt_dir)
+{
+ char evt_path[MAXPATHLEN];
+ int fd;
+
+ snprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path, evt_dir->d_name);
+ fd = open(evt_path, O_RDONLY);
+ if (fd < 0)
+ return -EINVAL;
+ close(fd);
+
+ return 0;
+}
+
+#define for_each_event(dir_path, evt_dir, evt_dirent) \
+ while ((evt_dirent = readdir(evt_dir)) != NULL) \
+ if (evt_dirent->d_type == DT_DIR && \
+ (strcmp(evt_dirent->d_name, ".")) && \
+ (strcmp(evt_dirent->d_name, "..")) && \
+ (!tp_event_has_id(dir_path, evt_dirent)))
+
+#define MAX_EVENT_LENGTH 512
+
+
+struct tracepoint_path *tracepoint_id_to_path(u64 config)
+{
+ struct tracepoint_path *path = NULL;
+ DIR *sys_dir, *evt_dir;
+ struct dirent *sys_dirent, *evt_dirent;
+ char id_buf[24];
+ int fd;
+ u64 id;
+ char evt_path[MAXPATHLEN];
+ char *dir_path;
+
+ sys_dir = tracing_events__opendir();
+ if (!sys_dir)
+ return NULL;
+
+ for_each_subsystem(sys_dir, sys_dirent) {
+ dir_path = get_events_file(sys_dirent->d_name);
+ if (!dir_path)
+ continue;
+ evt_dir = opendir(dir_path);
+ if (!evt_dir)
+ goto next;
+
+ for_each_event(dir_path, evt_dir, evt_dirent) {
+
+ scnprintf(evt_path, MAXPATHLEN, "%s/%s/id", dir_path,
+ evt_dirent->d_name);
+ fd = open(evt_path, O_RDONLY);
+ if (fd < 0)
+ continue;
+ if (read(fd, id_buf, sizeof(id_buf)) < 0) {
+ close(fd);
+ continue;
+ }
+ close(fd);
+ id = atoll(id_buf);
+ if (id == config) {
+ put_events_file(dir_path);
+ closedir(evt_dir);
+ closedir(sys_dir);
+ path = zalloc(sizeof(*path));
+ if (!path)
+ return NULL;
+ path->system = malloc(MAX_EVENT_LENGTH);
+ if (!path->system) {
+ free(path);
+ return NULL;
+ }
+ path->name = malloc(MAX_EVENT_LENGTH);
+ if (!path->name) {
+ zfree(&path->system);
+ free(path);
+ return NULL;
+ }
+ strncpy(path->system, sys_dirent->d_name,
+ MAX_EVENT_LENGTH);
+ strncpy(path->name, evt_dirent->d_name,
+ MAX_EVENT_LENGTH);
+ return path;
+ }
+ }
+ closedir(evt_dir);
+next:
+ put_events_file(dir_path);
+ }
+
+ closedir(sys_dir);
+ return NULL;
+}
+
+struct tracepoint_path *tracepoint_name_to_path(const char *name)
+{
+ struct tracepoint_path *path = zalloc(sizeof(*path));
+ char *str = strchr(name, ':');
+
+ if (path == NULL || str == NULL) {
+ free(path);
+ return NULL;
+ }
+
+ path->system = strndup(name, str - name);
+ path->name = strdup(str+1);
+
+ if (path->system == NULL || path->name == NULL) {
+ zfree(&path->system);
+ zfree(&path->name);
+ zfree(&path);
+ }
+
+ return path;
+}
+
+const char *event_type(int type)
+{
+ switch (type) {
+ case PERF_TYPE_HARDWARE:
+ return "hardware";
+
+ case PERF_TYPE_SOFTWARE:
+ return "software";
+
+ case PERF_TYPE_TRACEPOINT:
+ return "tracepoint";
+
+ case PERF_TYPE_HW_CACHE:
+ return "hardware-cache";
+
+ default:
+ break;
+ }
+
+ return "unknown";
+}
+
+static int parse_events__is_name_term(struct parse_events_term *term)
+{
+ return term->type_term == PARSE_EVENTS__TERM_TYPE_NAME;
+}
+
+static char *get_config_name(struct list_head *head_terms)
+{
+ struct parse_events_term *term;
+
+ if (!head_terms)
+ return NULL;
+
+ list_for_each_entry(term, head_terms, list)
+ if (parse_events__is_name_term(term))
+ return term->val.str;
+
+ return NULL;
+}
+
+static struct perf_evsel *
+__add_event(struct list_head *list, int *idx,
+ struct perf_event_attr *attr,
+ char *name, struct perf_pmu *pmu,
+ struct list_head *config_terms, bool auto_merge_stats)
+{
+ struct perf_evsel *evsel;
+ struct cpu_map *cpus = pmu ? pmu->cpus : NULL;
+
+ event_attr_init(attr);
+
+ evsel = perf_evsel__new_idx(attr, *idx);
+ if (!evsel)
+ return NULL;
+
+ (*idx)++;
+ evsel->cpus = cpu_map__get(cpus);
+ evsel->own_cpus = cpu_map__get(cpus);
+ evsel->system_wide = pmu ? pmu->is_uncore : false;
+ evsel->auto_merge_stats = auto_merge_stats;
+
+ if (name)
+ evsel->name = strdup(name);
+
+ if (config_terms)
+ list_splice(config_terms, &evsel->config_terms);
+
+ list_add_tail(&evsel->node, list);
+ return evsel;
+}
+
+static int add_event(struct list_head *list, int *idx,
+ struct perf_event_attr *attr, char *name,
+ struct list_head *config_terms)
+{
+ return __add_event(list, idx, attr, name, NULL, config_terms, false) ? 0 : -ENOMEM;
+}
+
+static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
+{
+ int i, j;
+ int n, longest = -1;
+
+ for (i = 0; i < size; i++) {
+ for (j = 0; j < PERF_EVSEL__MAX_ALIASES && names[i][j]; j++) {
+ n = strlen(names[i][j]);
+ if (n > longest && !strncasecmp(str, names[i][j], n))
+ longest = n;
+ }
+ if (longest > 0)
+ return i;
+ }
+
+ return -1;
+}
+
+typedef int config_term_func_t(struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct parse_events_error *err);
+static int config_term_common(struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct parse_events_error *err);
+static int config_attr(struct perf_event_attr *attr,
+ struct list_head *head,
+ struct parse_events_error *err,
+ config_term_func_t config_term);
+
+int parse_events_add_cache(struct list_head *list, int *idx,
+ char *type, char *op_result1, char *op_result2,
+ struct parse_events_error *err,
+ struct list_head *head_config)
+{
+ struct perf_event_attr attr;
+ LIST_HEAD(config_terms);
+ char name[MAX_NAME_LEN], *config_name;
+ int cache_type = -1, cache_op = -1, cache_result = -1;
+ char *op_result[2] = { op_result1, op_result2 };
+ int i, n;
+
+ /*
+ * No fallback - if we cannot get a clear cache type
+ * then bail out:
+ */
+ cache_type = parse_aliases(type, perf_evsel__hw_cache,
+ PERF_COUNT_HW_CACHE_MAX);
+ if (cache_type == -1)
+ return -EINVAL;
+
+ config_name = get_config_name(head_config);
+ n = snprintf(name, MAX_NAME_LEN, "%s", type);
+
+ for (i = 0; (i < 2) && (op_result[i]); i++) {
+ char *str = op_result[i];
+
+ n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str);
+
+ if (cache_op == -1) {
+ cache_op = parse_aliases(str, perf_evsel__hw_cache_op,
+ PERF_COUNT_HW_CACHE_OP_MAX);
+ if (cache_op >= 0) {
+ if (!perf_evsel__is_cache_op_valid(cache_type, cache_op))
+ return -EINVAL;
+ continue;
+ }
+ }
+
+ if (cache_result == -1) {
+ cache_result = parse_aliases(str, perf_evsel__hw_cache_result,
+ PERF_COUNT_HW_CACHE_RESULT_MAX);
+ if (cache_result >= 0)
+ continue;
+ }
+ }
+
+ /*
+ * Fall back to reads:
+ */
+ if (cache_op == -1)
+ cache_op = PERF_COUNT_HW_CACHE_OP_READ;
+
+ /*
+ * Fall back to accesses:
+ */
+ if (cache_result == -1)
+ cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.config = cache_type | (cache_op << 8) | (cache_result << 16);
+ attr.type = PERF_TYPE_HW_CACHE;
+
+ if (head_config) {
+ if (config_attr(&attr, head_config, err,
+ config_term_common))
+ return -EINVAL;
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+ }
+ return add_event(list, idx, &attr, config_name ? : name, &config_terms);
+}
+
+static void tracepoint_error(struct parse_events_error *e, int err,
+ const char *sys, const char *name)
+{
+ char help[BUFSIZ];
+
+ if (!e)
+ return;
+
+ /*
+ * We get error directly from syscall errno ( > 0),
+ * or from encoded pointer's error ( < 0).
+ */
+ err = abs(err);
+
+ switch (err) {
+ case EACCES:
+ e->str = strdup("can't access trace events");
+ break;
+ case ENOENT:
+ e->str = strdup("unknown tracepoint");
+ break;
+ default:
+ e->str = strdup("failed to add tracepoint");
+ break;
+ }
+
+ tracing_path__strerror_open_tp(err, help, sizeof(help), sys, name);
+ e->help = strdup(help);
+}
+
+static int add_tracepoint(struct list_head *list, int *idx,
+ const char *sys_name, const char *evt_name,
+ struct parse_events_error *err,
+ struct list_head *head_config)
+{
+ struct perf_evsel *evsel;
+
+ evsel = perf_evsel__newtp_idx(sys_name, evt_name, (*idx)++);
+ if (IS_ERR(evsel)) {
+ tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name);
+ return PTR_ERR(evsel);
+ }
+
+ if (head_config) {
+ LIST_HEAD(config_terms);
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+ list_splice(&config_terms, &evsel->config_terms);
+ }
+
+ list_add_tail(&evsel->node, list);
+ return 0;
+}
+
+static int add_tracepoint_multi_event(struct list_head *list, int *idx,
+ const char *sys_name, const char *evt_name,
+ struct parse_events_error *err,
+ struct list_head *head_config)
+{
+ char *evt_path;
+ struct dirent *evt_ent;
+ DIR *evt_dir;
+ int ret = 0, found = 0;
+
+ evt_path = get_events_file(sys_name);
+ if (!evt_path) {
+ tracepoint_error(err, errno, sys_name, evt_name);
+ return -1;
+ }
+ evt_dir = opendir(evt_path);
+ if (!evt_dir) {
+ put_events_file(evt_path);
+ tracepoint_error(err, errno, sys_name, evt_name);
+ return -1;
+ }
+
+ while (!ret && (evt_ent = readdir(evt_dir))) {
+ if (!strcmp(evt_ent->d_name, ".")
+ || !strcmp(evt_ent->d_name, "..")
+ || !strcmp(evt_ent->d_name, "enable")
+ || !strcmp(evt_ent->d_name, "filter"))
+ continue;
+
+ if (!strglobmatch(evt_ent->d_name, evt_name))
+ continue;
+
+ found++;
+
+ ret = add_tracepoint(list, idx, sys_name, evt_ent->d_name,
+ err, head_config);
+ }
+
+ if (!found) {
+ tracepoint_error(err, ENOENT, sys_name, evt_name);
+ ret = -1;
+ }
+
+ put_events_file(evt_path);
+ closedir(evt_dir);
+ return ret;
+}
+
+static int add_tracepoint_event(struct list_head *list, int *idx,
+ const char *sys_name, const char *evt_name,
+ struct parse_events_error *err,
+ struct list_head *head_config)
+{
+ return strpbrk(evt_name, "*?") ?
+ add_tracepoint_multi_event(list, idx, sys_name, evt_name,
+ err, head_config) :
+ add_tracepoint(list, idx, sys_name, evt_name,
+ err, head_config);
+}
+
+static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
+ const char *sys_name, const char *evt_name,
+ struct parse_events_error *err,
+ struct list_head *head_config)
+{
+ struct dirent *events_ent;
+ DIR *events_dir;
+ int ret = 0;
+
+ events_dir = tracing_events__opendir();
+ if (!events_dir) {
+ tracepoint_error(err, errno, sys_name, evt_name);
+ return -1;
+ }
+
+ while (!ret && (events_ent = readdir(events_dir))) {
+ if (!strcmp(events_ent->d_name, ".")
+ || !strcmp(events_ent->d_name, "..")
+ || !strcmp(events_ent->d_name, "enable")
+ || !strcmp(events_ent->d_name, "header_event")
+ || !strcmp(events_ent->d_name, "header_page"))
+ continue;
+
+ if (!strglobmatch(events_ent->d_name, sys_name))
+ continue;
+
+ ret = add_tracepoint_event(list, idx, events_ent->d_name,
+ evt_name, err, head_config);
+ }
+
+ closedir(events_dir);
+ return ret;
+}
+
+struct __add_bpf_event_param {
+ struct parse_events_state *parse_state;
+ struct list_head *list;
+ struct list_head *head_config;
+};
+
+static int add_bpf_event(const char *group, const char *event, int fd,
+ void *_param)
+{
+ LIST_HEAD(new_evsels);
+ struct __add_bpf_event_param *param = _param;
+ struct parse_events_state *parse_state = param->parse_state;
+ struct list_head *list = param->list;
+ struct perf_evsel *pos;
+ int err;
+
+ pr_debug("add bpf event %s:%s and attach bpf program %d\n",
+ group, event, fd);
+
+ err = parse_events_add_tracepoint(&new_evsels, &parse_state->idx, group,
+ event, parse_state->error,
+ param->head_config);
+ if (err) {
+ struct perf_evsel *evsel, *tmp;
+
+ pr_debug("Failed to add BPF event %s:%s\n",
+ group, event);
+ list_for_each_entry_safe(evsel, tmp, &new_evsels, node) {
+ list_del(&evsel->node);
+ perf_evsel__delete(evsel);
+ }
+ return err;
+ }
+ pr_debug("adding %s:%s\n", group, event);
+
+ list_for_each_entry(pos, &new_evsels, node) {
+ pr_debug("adding %s:%s to %p\n",
+ group, event, pos);
+ pos->bpf_fd = fd;
+ }
+ list_splice(&new_evsels, list);
+ return 0;
+}
+
+int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
+ struct list_head *list,
+ struct bpf_object *obj,
+ struct list_head *head_config)
+{
+ int err;
+ char errbuf[BUFSIZ];
+ struct __add_bpf_event_param param = {parse_state, list, head_config};
+ static bool registered_unprobe_atexit = false;
+
+ if (IS_ERR(obj) || !obj) {
+ snprintf(errbuf, sizeof(errbuf),
+ "Internal error: load bpf obj with NULL");
+ err = -EINVAL;
+ goto errout;
+ }
+
+ /*
+ * Register atexit handler before calling bpf__probe() so
+ * bpf__probe() don't need to unprobe probe points its already
+ * created when failure.
+ */
+ if (!registered_unprobe_atexit) {
+ atexit(bpf__clear);
+ registered_unprobe_atexit = true;
+ }
+
+ err = bpf__probe(obj);
+ if (err) {
+ bpf__strerror_probe(obj, err, errbuf, sizeof(errbuf));
+ goto errout;
+ }
+
+ err = bpf__load(obj);
+ if (err) {
+ bpf__strerror_load(obj, err, errbuf, sizeof(errbuf));
+ goto errout;
+ }
+
+ err = bpf__foreach_event(obj, add_bpf_event, &param);
+ if (err) {
+ snprintf(errbuf, sizeof(errbuf),
+ "Attach events in BPF object failed");
+ goto errout;
+ }
+
+ return 0;
+errout:
+ parse_state->error->help = strdup("(add -v to see detail)");
+ parse_state->error->str = strdup(errbuf);
+ return err;
+}
+
+static int
+parse_events_config_bpf(struct parse_events_state *parse_state,
+ struct bpf_object *obj,
+ struct list_head *head_config)
+{
+ struct parse_events_term *term;
+ int error_pos;
+
+ if (!head_config || list_empty(head_config))
+ return 0;
+
+ list_for_each_entry(term, head_config, list) {
+ char errbuf[BUFSIZ];
+ int err;
+
+ if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
+ snprintf(errbuf, sizeof(errbuf),
+ "Invalid config term for BPF object");
+ errbuf[BUFSIZ - 1] = '\0';
+
+ parse_state->error->idx = term->err_term;
+ parse_state->error->str = strdup(errbuf);
+ return -EINVAL;
+ }
+
+ err = bpf__config_obj(obj, term, parse_state->evlist, &error_pos);
+ if (err) {
+ bpf__strerror_config_obj(obj, term, parse_state->evlist,
+ &error_pos, err, errbuf,
+ sizeof(errbuf));
+ parse_state->error->help = strdup(
+"Hint:\tValid config terms:\n"
+" \tmap:[<arraymap>].value<indices>=[value]\n"
+" \tmap:[<eventmap>].event<indices>=[event]\n"
+"\n"
+" \twhere <indices> is something like [0,3...5] or [all]\n"
+" \t(add -v to see detail)");
+ parse_state->error->str = strdup(errbuf);
+ if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
+ parse_state->error->idx = term->err_val;
+ else
+ parse_state->error->idx = term->err_term + error_pos;
+ return err;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Split config terms:
+ * perf record -e bpf.c/call-graph=fp,map:array.value[0]=1/ ...
+ * 'call-graph=fp' is 'evt config', should be applied to each
+ * events in bpf.c.
+ * 'map:array.value[0]=1' is 'obj config', should be processed
+ * with parse_events_config_bpf.
+ *
+ * Move object config terms from the first list to obj_head_config.
+ */
+static void
+split_bpf_config_terms(struct list_head *evt_head_config,
+ struct list_head *obj_head_config)
+{
+ struct parse_events_term *term, *temp;
+
+ /*
+ * Currectly, all possible user config term
+ * belong to bpf object. parse_events__is_hardcoded_term()
+ * happends to be a good flag.
+ *
+ * See parse_events_config_bpf() and
+ * config_term_tracepoint().
+ */
+ list_for_each_entry_safe(term, temp, evt_head_config, list)
+ if (!parse_events__is_hardcoded_term(term))
+ list_move_tail(&term->list, obj_head_config);
+}
+
+int parse_events_load_bpf(struct parse_events_state *parse_state,
+ struct list_head *list,
+ char *bpf_file_name,
+ bool source,
+ struct list_head *head_config)
+{
+ int err;
+ struct bpf_object *obj;
+ LIST_HEAD(obj_head_config);
+
+ if (head_config)
+ split_bpf_config_terms(head_config, &obj_head_config);
+
+ obj = bpf__prepare_load(bpf_file_name, source);
+ if (IS_ERR(obj)) {
+ char errbuf[BUFSIZ];
+
+ err = PTR_ERR(obj);
+
+ if (err == -ENOTSUP)
+ snprintf(errbuf, sizeof(errbuf),
+ "BPF support is not compiled");
+ else
+ bpf__strerror_prepare_load(bpf_file_name,
+ source,
+ -err, errbuf,
+ sizeof(errbuf));
+
+ parse_state->error->help = strdup("(add -v to see detail)");
+ parse_state->error->str = strdup(errbuf);
+ return err;
+ }
+
+ err = parse_events_load_bpf_obj(parse_state, list, obj, head_config);
+ if (err)
+ return err;
+ err = parse_events_config_bpf(parse_state, obj, &obj_head_config);
+
+ /*
+ * Caller doesn't know anything about obj_head_config,
+ * so combine them together again before returnning.
+ */
+ if (head_config)
+ list_splice_tail(&obj_head_config, head_config);
+ return err;
+}
+
+static int
+parse_breakpoint_type(const char *type, struct perf_event_attr *attr)
+{
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ if (!type || !type[i])
+ break;
+
+#define CHECK_SET_TYPE(bit) \
+do { \
+ if (attr->bp_type & bit) \
+ return -EINVAL; \
+ else \
+ attr->bp_type |= bit; \
+} while (0)
+
+ switch (type[i]) {
+ case 'r':
+ CHECK_SET_TYPE(HW_BREAKPOINT_R);
+ break;
+ case 'w':
+ CHECK_SET_TYPE(HW_BREAKPOINT_W);
+ break;
+ case 'x':
+ CHECK_SET_TYPE(HW_BREAKPOINT_X);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+#undef CHECK_SET_TYPE
+
+ if (!attr->bp_type) /* Default */
+ attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W;
+
+ return 0;
+}
+
+int parse_events_add_breakpoint(struct list_head *list, int *idx,
+ void *ptr, char *type, u64 len)
+{
+ struct perf_event_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.bp_addr = (unsigned long) ptr;
+
+ if (parse_breakpoint_type(type, &attr))
+ return -EINVAL;
+
+ /* Provide some defaults if len is not specified */
+ if (!len) {
+ if (attr.bp_type == HW_BREAKPOINT_X)
+ len = sizeof(long);
+ else
+ len = HW_BREAKPOINT_LEN_4;
+ }
+
+ attr.bp_len = len;
+
+ attr.type = PERF_TYPE_BREAKPOINT;
+ attr.sample_period = 1;
+
+ return add_event(list, idx, &attr, NULL, NULL);
+}
+
+static int check_type_val(struct parse_events_term *term,
+ struct parse_events_error *err,
+ int type)
+{
+ if (type == term->type_val)
+ return 0;
+
+ if (err) {
+ err->idx = term->err_val;
+ if (type == PARSE_EVENTS__TERM_TYPE_NUM)
+ err->str = strdup("expected numeric value");
+ else
+ err->str = strdup("expected string value");
+ }
+ return -EINVAL;
+}
+
+/*
+ * Update according to parse-events.l
+ */
+static const char *config_term_names[__PARSE_EVENTS__TERM_TYPE_NR] = {
+ [PARSE_EVENTS__TERM_TYPE_USER] = "<sysfs term>",
+ [PARSE_EVENTS__TERM_TYPE_CONFIG] = "config",
+ [PARSE_EVENTS__TERM_TYPE_CONFIG1] = "config1",
+ [PARSE_EVENTS__TERM_TYPE_CONFIG2] = "config2",
+ [PARSE_EVENTS__TERM_TYPE_NAME] = "name",
+ [PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD] = "period",
+ [PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ] = "freq",
+ [PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE] = "branch_type",
+ [PARSE_EVENTS__TERM_TYPE_TIME] = "time",
+ [PARSE_EVENTS__TERM_TYPE_CALLGRAPH] = "call-graph",
+ [PARSE_EVENTS__TERM_TYPE_STACKSIZE] = "stack-size",
+ [PARSE_EVENTS__TERM_TYPE_NOINHERIT] = "no-inherit",
+ [PARSE_EVENTS__TERM_TYPE_INHERIT] = "inherit",
+ [PARSE_EVENTS__TERM_TYPE_MAX_STACK] = "max-stack",
+ [PARSE_EVENTS__TERM_TYPE_OVERWRITE] = "overwrite",
+ [PARSE_EVENTS__TERM_TYPE_NOOVERWRITE] = "no-overwrite",
+ [PARSE_EVENTS__TERM_TYPE_DRV_CFG] = "driver-config",
+};
+
+static bool config_term_shrinked;
+
+static bool
+config_term_avail(int term_type, struct parse_events_error *err)
+{
+ if (term_type < 0 || term_type >= __PARSE_EVENTS__TERM_TYPE_NR) {
+ err->str = strdup("Invalid term_type");
+ return false;
+ }
+ if (!config_term_shrinked)
+ return true;
+
+ switch (term_type) {
+ case PARSE_EVENTS__TERM_TYPE_CONFIG:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+ case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+ case PARSE_EVENTS__TERM_TYPE_NAME:
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+ return true;
+ default:
+ if (!err)
+ return false;
+
+ /* term_type is validated so indexing is safe */
+ if (asprintf(&err->str, "'%s' is not usable in 'perf stat'",
+ config_term_names[term_type]) < 0)
+ err->str = NULL;
+ return false;
+ }
+}
+
+void parse_events__shrink_config_terms(void)
+{
+ config_term_shrinked = true;
+}
+
+static int config_term_common(struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct parse_events_error *err)
+{
+#define CHECK_TYPE_VAL(type) \
+do { \
+ if (check_type_val(term, err, PARSE_EVENTS__TERM_TYPE_ ## type)) \
+ return -EINVAL; \
+} while (0)
+
+ switch (term->type_term) {
+ case PARSE_EVENTS__TERM_TYPE_CONFIG:
+ CHECK_TYPE_VAL(NUM);
+ attr->config = term->val.num;
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CONFIG1:
+ CHECK_TYPE_VAL(NUM);
+ attr->config1 = term->val.num;
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CONFIG2:
+ CHECK_TYPE_VAL(NUM);
+ attr->config2 = term->val.num;
+ break;
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+ CHECK_TYPE_VAL(STR);
+ if (strcmp(term->val.str, "no") &&
+ parse_branch_str(term->val.str, &attr->branch_sample_type)) {
+ err->str = strdup("invalid branch sample type");
+ err->idx = term->err_val;
+ return -EINVAL;
+ }
+ break;
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ CHECK_TYPE_VAL(NUM);
+ if (term->val.num > 1) {
+ err->str = strdup("expected 0 or 1");
+ err->idx = term->err_val;
+ return -EINVAL;
+ }
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ CHECK_TYPE_VAL(STR);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_INHERIT:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_NAME:
+ CHECK_TYPE_VAL(STR);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ CHECK_TYPE_VAL(NUM);
+ break;
+ default:
+ err->str = strdup("unknown term");
+ err->idx = term->err_term;
+ err->help = parse_events_formats_error_string(NULL);
+ return -EINVAL;
+ }
+
+ /*
+ * Check term availbility after basic checking so
+ * PARSE_EVENTS__TERM_TYPE_USER can be found and filtered.
+ *
+ * If check availbility at the entry of this function,
+ * user will see "'<sysfs term>' is not usable in 'perf stat'"
+ * if an invalid config term is provided for legacy events
+ * (for example, instructions/badterm/...), which is confusing.
+ */
+ if (!config_term_avail(term->type_term, err))
+ return -EINVAL;
+ return 0;
+#undef CHECK_TYPE_VAL
+}
+
+static int config_term_pmu(struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct parse_events_error *err)
+{
+ if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER ||
+ term->type_term == PARSE_EVENTS__TERM_TYPE_DRV_CFG)
+ /*
+ * Always succeed for sysfs terms, as we dont know
+ * at this point what type they need to have.
+ */
+ return 0;
+ else
+ return config_term_common(attr, term, err);
+}
+
+static int config_term_tracepoint(struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct parse_events_error *err)
+{
+ switch (term->type_term) {
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ case PARSE_EVENTS__TERM_TYPE_INHERIT:
+ case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+ case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+ return config_term_common(attr, term, err);
+ default:
+ if (err) {
+ err->idx = term->err_term;
+ err->str = strdup("unknown term");
+ err->help = strdup("valid terms: call-graph,stack-size\n");
+ }
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int config_attr(struct perf_event_attr *attr,
+ struct list_head *head,
+ struct parse_events_error *err,
+ config_term_func_t config_term)
+{
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, head, list)
+ if (config_term(attr, term, err))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int get_config_terms(struct list_head *head_config,
+ struct list_head *head_terms __maybe_unused)
+{
+#define ADD_CONFIG_TERM(__type, __name, __val) \
+do { \
+ struct perf_evsel_config_term *__t; \
+ \
+ __t = zalloc(sizeof(*__t)); \
+ if (!__t) \
+ return -ENOMEM; \
+ \
+ INIT_LIST_HEAD(&__t->list); \
+ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \
+ __t->val.__name = __val; \
+ __t->weak = term->weak; \
+ list_add_tail(&__t->list, head_terms); \
+} while (0)
+
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, head_config, list) {
+ switch (term->type_term) {
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD:
+ ADD_CONFIG_TERM(PERIOD, period, term->val.num);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ:
+ ADD_CONFIG_TERM(FREQ, freq, term->val.num);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_TIME:
+ ADD_CONFIG_TERM(TIME, time, term->val.num);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_CALLGRAPH:
+ ADD_CONFIG_TERM(CALLGRAPH, callgraph, term->val.str);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE:
+ ADD_CONFIG_TERM(BRANCH, branch, term->val.str);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STACKSIZE:
+ ADD_CONFIG_TERM(STACK_USER, stack_user, term->val.num);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_INHERIT:
+ ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 1 : 0);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_NOINHERIT:
+ ADD_CONFIG_TERM(INHERIT, inherit, term->val.num ? 0 : 1);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_MAX_STACK:
+ ADD_CONFIG_TERM(MAX_STACK, max_stack, term->val.num);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_OVERWRITE:
+ ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 1 : 0);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_NOOVERWRITE:
+ ADD_CONFIG_TERM(OVERWRITE, overwrite, term->val.num ? 0 : 1);
+ break;
+ case PARSE_EVENTS__TERM_TYPE_DRV_CFG:
+ ADD_CONFIG_TERM(DRV_CFG, drv_cfg, term->val.str);
+ break;
+ default:
+ break;
+ }
+ }
+#undef ADD_EVSEL_CONFIG
+ return 0;
+}
+
+int parse_events_add_tracepoint(struct list_head *list, int *idx,
+ const char *sys, const char *event,
+ struct parse_events_error *err,
+ struct list_head *head_config)
+{
+ if (head_config) {
+ struct perf_event_attr attr;
+
+ if (config_attr(&attr, head_config, err,
+ config_term_tracepoint))
+ return -EINVAL;
+ }
+
+ if (strpbrk(sys, "*?"))
+ return add_tracepoint_multi_sys(list, idx, sys, event,
+ err, head_config);
+ else
+ return add_tracepoint_event(list, idx, sys, event,
+ err, head_config);
+}
+
+int parse_events_add_numeric(struct parse_events_state *parse_state,
+ struct list_head *list,
+ u32 type, u64 config,
+ struct list_head *head_config)
+{
+ struct perf_event_attr attr;
+ LIST_HEAD(config_terms);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.type = type;
+ attr.config = config;
+
+ if (head_config) {
+ if (config_attr(&attr, head_config, parse_state->error,
+ config_term_common))
+ return -EINVAL;
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+ }
+
+ return add_event(list, &parse_state->idx, &attr,
+ get_config_name(head_config), &config_terms);
+}
+
+int parse_events_add_pmu(struct parse_events_state *parse_state,
+ struct list_head *list, char *name,
+ struct list_head *head_config,
+ bool auto_merge_stats,
+ bool use_alias)
+{
+ struct perf_event_attr attr;
+ struct perf_pmu_info info;
+ struct perf_pmu *pmu;
+ struct perf_evsel *evsel;
+ struct parse_events_error *err = parse_state->error;
+ bool use_uncore_alias;
+ LIST_HEAD(config_terms);
+
+ pmu = perf_pmu__find(name);
+ if (!pmu) {
+ if (asprintf(&err->str,
+ "Cannot find PMU `%s'. Missing kernel support?",
+ name) < 0)
+ err->str = NULL;
+ return -EINVAL;
+ }
+
+ if (pmu->default_config) {
+ memcpy(&attr, pmu->default_config,
+ sizeof(struct perf_event_attr));
+ } else {
+ memset(&attr, 0, sizeof(attr));
+ }
+
+ use_uncore_alias = (pmu->is_uncore && use_alias);
+
+ if (!head_config) {
+ attr.type = pmu->type;
+ evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL, auto_merge_stats);
+ if (evsel) {
+ evsel->pmu_name = name ? strdup(name) : NULL;
+ evsel->use_uncore_alias = use_uncore_alias;
+ return 0;
+ } else {
+ return -ENOMEM;
+ }
+ }
+
+ if (perf_pmu__check_alias(pmu, head_config, &info))
+ return -EINVAL;
+
+ /*
+ * Configure hardcoded terms first, no need to check
+ * return value when called with fail == 0 ;)
+ */
+ if (config_attr(&attr, head_config, parse_state->error, config_term_pmu))
+ return -EINVAL;
+
+ if (get_config_terms(head_config, &config_terms))
+ return -ENOMEM;
+
+ if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
+ struct perf_evsel_config_term *pos, *tmp;
+
+ list_for_each_entry_safe(pos, tmp, &config_terms, list) {
+ list_del_init(&pos->list);
+ free(pos);
+ }
+ return -EINVAL;
+ }
+
+ evsel = __add_event(list, &parse_state->idx, &attr,
+ get_config_name(head_config), pmu,
+ &config_terms, auto_merge_stats);
+ if (evsel) {
+ evsel->unit = info.unit;
+ evsel->scale = info.scale;
+ evsel->per_pkg = info.per_pkg;
+ evsel->snapshot = info.snapshot;
+ evsel->metric_expr = info.metric_expr;
+ evsel->metric_name = info.metric_name;
+ evsel->pmu_name = name ? strdup(name) : NULL;
+ evsel->use_uncore_alias = use_uncore_alias;
+ }
+
+ return evsel ? 0 : -ENOMEM;
+}
+
+int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
+ char *str, struct list_head **listp)
+{
+ struct list_head *head;
+ struct parse_events_term *term;
+ struct list_head *list;
+ struct perf_pmu *pmu = NULL;
+ int ok = 0;
+
+ *listp = NULL;
+ /* Add it for all PMUs that support the alias */
+ list = malloc(sizeof(struct list_head));
+ if (!list)
+ return -1;
+ INIT_LIST_HEAD(list);
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ struct perf_pmu_alias *alias;
+
+ list_for_each_entry(alias, &pmu->aliases, list) {
+ if (!strcasecmp(alias->name, str)) {
+ head = malloc(sizeof(struct list_head));
+ if (!head)
+ return -1;
+ INIT_LIST_HEAD(head);
+ if (parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ str, 1, false, &str, NULL) < 0)
+ return -1;
+ list_add_tail(&term->list, head);
+
+ if (!parse_events_add_pmu(parse_state, list,
+ pmu->name, head,
+ true, true)) {
+ pr_debug("%s -> %s/%s/\n", str,
+ pmu->name, alias->str);
+ ok++;
+ }
+
+ parse_events_terms__delete(head);
+ }
+ }
+ }
+ if (!ok)
+ return -1;
+ *listp = list;
+ return 0;
+}
+
+int parse_events__modifier_group(struct list_head *list,
+ char *event_mod)
+{
+ return parse_events__modifier_event(list, event_mod, true);
+}
+
+/*
+ * Check if the two uncore PMUs are from the same uncore block
+ * The format of the uncore PMU name is uncore_#blockname_#pmuidx
+ */
+static bool is_same_uncore_block(const char *pmu_name_a, const char *pmu_name_b)
+{
+ char *end_a, *end_b;
+
+ end_a = strrchr(pmu_name_a, '_');
+ end_b = strrchr(pmu_name_b, '_');
+
+ if (!end_a || !end_b)
+ return false;
+
+ if ((end_a - pmu_name_a) != (end_b - pmu_name_b))
+ return false;
+
+ return (strncmp(pmu_name_a, pmu_name_b, end_a - pmu_name_a) == 0);
+}
+
+static int
+parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list,
+ struct parse_events_state *parse_state)
+{
+ struct perf_evsel *evsel, *leader;
+ uintptr_t *leaders;
+ bool is_leader = true;
+ int i, nr_pmu = 0, total_members, ret = 0;
+
+ leader = list_first_entry(list, struct perf_evsel, node);
+ evsel = list_last_entry(list, struct perf_evsel, node);
+ total_members = evsel->idx - leader->idx + 1;
+
+ leaders = calloc(total_members, sizeof(uintptr_t));
+ if (WARN_ON(!leaders))
+ return 0;
+
+ /*
+ * Going through the whole group and doing sanity check.
+ * All members must use alias, and be from the same uncore block.
+ * Also, storing the leader events in an array.
+ */
+ __evlist__for_each_entry(list, evsel) {
+
+ /* Only split the uncore group which members use alias */
+ if (!evsel->use_uncore_alias)
+ goto out;
+
+ /* The events must be from the same uncore block */
+ if (!is_same_uncore_block(leader->pmu_name, evsel->pmu_name))
+ goto out;
+
+ if (!is_leader)
+ continue;
+ /*
+ * If the event's PMU name starts to repeat, it must be a new
+ * event. That can be used to distinguish the leader from
+ * other members, even they have the same event name.
+ */
+ if ((leader != evsel) &&
+ !strcmp(leader->pmu_name, evsel->pmu_name)) {
+ is_leader = false;
+ continue;
+ }
+
+ /* Store the leader event for each PMU */
+ leaders[nr_pmu++] = (uintptr_t) evsel;
+ }
+
+ /* only one event alias */
+ if (nr_pmu == total_members) {
+ parse_state->nr_groups--;
+ goto handled;
+ }
+
+ /*
+ * An uncore event alias is a joint name which means the same event
+ * runs on all PMUs of a block.
+ * Perf doesn't support mixed events from different PMUs in the same
+ * group. The big group has to be split into multiple small groups
+ * which only include the events from the same PMU.
+ *
+ * Here the uncore event aliases must be from the same uncore block.
+ * The number of PMUs must be same for each alias. The number of new
+ * small groups equals to the number of PMUs.
+ * Setting the leader event for corresponding members in each group.
+ */
+ i = 0;
+ __evlist__for_each_entry(list, evsel) {
+ if (i >= nr_pmu)
+ i = 0;
+ evsel->leader = (struct perf_evsel *) leaders[i++];
+ }
+
+ /* The number of members and group name are same for each group */
+ for (i = 0; i < nr_pmu; i++) {
+ evsel = (struct perf_evsel *) leaders[i];
+ evsel->nr_members = total_members / nr_pmu;
+ evsel->group_name = name ? strdup(name) : NULL;
+ }
+
+ /* Take the new small groups into account */
+ parse_state->nr_groups += nr_pmu - 1;
+
+handled:
+ ret = 1;
+out:
+ free(leaders);
+ return ret;
+}
+
+void parse_events__set_leader(char *name, struct list_head *list,
+ struct parse_events_state *parse_state)
+{
+ struct perf_evsel *leader;
+
+ if (list_empty(list)) {
+ WARN_ONCE(true, "WARNING: failed to set leader: empty list");
+ return;
+ }
+
+ if (parse_events__set_leader_for_uncore_aliase(name, list, parse_state))
+ return;
+
+ __perf_evlist__set_leader(list);
+ leader = list_entry(list->next, struct perf_evsel, node);
+ leader->group_name = name ? strdup(name) : NULL;
+}
+
+/* list_event is assumed to point to malloc'ed memory */
+void parse_events_update_lists(struct list_head *list_event,
+ struct list_head *list_all)
+{
+ /*
+ * Called for single event definition. Update the
+ * 'all event' list, and reinit the 'single event'
+ * list, for next event definition.
+ */
+ list_splice_tail(list_event, list_all);
+ free(list_event);
+}
+
+struct event_modifier {
+ int eu;
+ int ek;
+ int eh;
+ int eH;
+ int eG;
+ int eI;
+ int precise;
+ int precise_max;
+ int exclude_GH;
+ int sample_read;
+ int pinned;
+ int weak;
+};
+
+static int get_event_modifier(struct event_modifier *mod, char *str,
+ struct perf_evsel *evsel)
+{
+ int eu = evsel ? evsel->attr.exclude_user : 0;
+ int ek = evsel ? evsel->attr.exclude_kernel : 0;
+ int eh = evsel ? evsel->attr.exclude_hv : 0;
+ int eH = evsel ? evsel->attr.exclude_host : 0;
+ int eG = evsel ? evsel->attr.exclude_guest : 0;
+ int eI = evsel ? evsel->attr.exclude_idle : 0;
+ int precise = evsel ? evsel->attr.precise_ip : 0;
+ int precise_max = 0;
+ int sample_read = 0;
+ int pinned = evsel ? evsel->attr.pinned : 0;
+
+ int exclude = eu | ek | eh;
+ int exclude_GH = evsel ? evsel->exclude_GH : 0;
+ int weak = 0;
+
+ memset(mod, 0, sizeof(*mod));
+
+ while (*str) {
+ if (*str == 'u') {
+ if (!exclude)
+ exclude = eu = ek = eh = 1;
+ eu = 0;
+ } else if (*str == 'k') {
+ if (!exclude)
+ exclude = eu = ek = eh = 1;
+ ek = 0;
+ } else if (*str == 'h') {
+ if (!exclude)
+ exclude = eu = ek = eh = 1;
+ eh = 0;
+ } else if (*str == 'G') {
+ if (!exclude_GH)
+ exclude_GH = eG = eH = 1;
+ eG = 0;
+ } else if (*str == 'H') {
+ if (!exclude_GH)
+ exclude_GH = eG = eH = 1;
+ eH = 0;
+ } else if (*str == 'I') {
+ eI = 1;
+ } else if (*str == 'p') {
+ precise++;
+ /* use of precise requires exclude_guest */
+ if (!exclude_GH)
+ eG = 1;
+ } else if (*str == 'P') {
+ precise_max = 1;
+ } else if (*str == 'S') {
+ sample_read = 1;
+ } else if (*str == 'D') {
+ pinned = 1;
+ } else if (*str == 'W') {
+ weak = 1;
+ } else
+ break;
+
+ ++str;
+ }
+
+ /*
+ * precise ip:
+ *
+ * 0 - SAMPLE_IP can have arbitrary skid
+ * 1 - SAMPLE_IP must have constant skid
+ * 2 - SAMPLE_IP requested to have 0 skid
+ * 3 - SAMPLE_IP must have 0 skid
+ *
+ * See also PERF_RECORD_MISC_EXACT_IP
+ */
+ if (precise > 3)
+ return -EINVAL;
+
+ mod->eu = eu;
+ mod->ek = ek;
+ mod->eh = eh;
+ mod->eH = eH;
+ mod->eG = eG;
+ mod->eI = eI;
+ mod->precise = precise;
+ mod->precise_max = precise_max;
+ mod->exclude_GH = exclude_GH;
+ mod->sample_read = sample_read;
+ mod->pinned = pinned;
+ mod->weak = weak;
+
+ return 0;
+}
+
+/*
+ * Basic modifier sanity check to validate it contains only one
+ * instance of any modifier (apart from 'p') present.
+ */
+static int check_modifier(char *str)
+{
+ char *p = str;
+
+ /* The sizeof includes 0 byte as well. */
+ if (strlen(str) > (sizeof("ukhGHpppPSDIW") - 1))
+ return -1;
+
+ while (*p) {
+ if (*p != 'p' && strchr(p + 1, *p))
+ return -1;
+ p++;
+ }
+
+ return 0;
+}
+
+int parse_events__modifier_event(struct list_head *list, char *str, bool add)
+{
+ struct perf_evsel *evsel;
+ struct event_modifier mod;
+
+ if (str == NULL)
+ return 0;
+
+ if (check_modifier(str))
+ return -EINVAL;
+
+ if (!add && get_event_modifier(&mod, str, NULL))
+ return -EINVAL;
+
+ __evlist__for_each_entry(list, evsel) {
+ if (add && get_event_modifier(&mod, str, evsel))
+ return -EINVAL;
+
+ evsel->attr.exclude_user = mod.eu;
+ evsel->attr.exclude_kernel = mod.ek;
+ evsel->attr.exclude_hv = mod.eh;
+ evsel->attr.precise_ip = mod.precise;
+ evsel->attr.exclude_host = mod.eH;
+ evsel->attr.exclude_guest = mod.eG;
+ evsel->attr.exclude_idle = mod.eI;
+ evsel->exclude_GH = mod.exclude_GH;
+ evsel->sample_read = mod.sample_read;
+ evsel->precise_max = mod.precise_max;
+ evsel->weak_group = mod.weak;
+
+ if (perf_evsel__is_group_leader(evsel))
+ evsel->attr.pinned = mod.pinned;
+ }
+
+ return 0;
+}
+
+int parse_events_name(struct list_head *list, char *name)
+{
+ struct perf_evsel *evsel;
+
+ __evlist__for_each_entry(list, evsel) {
+ if (!evsel->name)
+ evsel->name = strdup(name);
+ }
+
+ return 0;
+}
+
+static int
+comp_pmu(const void *p1, const void *p2)
+{
+ struct perf_pmu_event_symbol *pmu1 = (struct perf_pmu_event_symbol *) p1;
+ struct perf_pmu_event_symbol *pmu2 = (struct perf_pmu_event_symbol *) p2;
+
+ return strcasecmp(pmu1->symbol, pmu2->symbol);
+}
+
+static void perf_pmu__parse_cleanup(void)
+{
+ if (perf_pmu_events_list_num > 0) {
+ struct perf_pmu_event_symbol *p;
+ int i;
+
+ for (i = 0; i < perf_pmu_events_list_num; i++) {
+ p = perf_pmu_events_list + i;
+ zfree(&p->symbol);
+ }
+ zfree(&perf_pmu_events_list);
+ perf_pmu_events_list_num = 0;
+ }
+}
+
+#define SET_SYMBOL(str, stype) \
+do { \
+ p->symbol = str; \
+ if (!p->symbol) \
+ goto err; \
+ p->type = stype; \
+} while (0)
+
+/*
+ * Read the pmu events list from sysfs
+ * Save it into perf_pmu_events_list
+ */
+static void perf_pmu__parse_init(void)
+{
+
+ struct perf_pmu *pmu = NULL;
+ struct perf_pmu_alias *alias;
+ int len = 0;
+
+ pmu = NULL;
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ list_for_each_entry(alias, &pmu->aliases, list) {
+ if (strchr(alias->name, '-'))
+ len++;
+ len++;
+ }
+ }
+
+ if (len == 0) {
+ perf_pmu_events_list_num = -1;
+ return;
+ }
+ perf_pmu_events_list = malloc(sizeof(struct perf_pmu_event_symbol) * len);
+ if (!perf_pmu_events_list)
+ return;
+ perf_pmu_events_list_num = len;
+
+ len = 0;
+ pmu = NULL;
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ list_for_each_entry(alias, &pmu->aliases, list) {
+ struct perf_pmu_event_symbol *p = perf_pmu_events_list + len;
+ char *tmp = strchr(alias->name, '-');
+
+ if (tmp != NULL) {
+ SET_SYMBOL(strndup(alias->name, tmp - alias->name),
+ PMU_EVENT_SYMBOL_PREFIX);
+ p++;
+ SET_SYMBOL(strdup(++tmp), PMU_EVENT_SYMBOL_SUFFIX);
+ len += 2;
+ } else {
+ SET_SYMBOL(strdup(alias->name), PMU_EVENT_SYMBOL);
+ len++;
+ }
+ }
+ }
+ qsort(perf_pmu_events_list, len,
+ sizeof(struct perf_pmu_event_symbol), comp_pmu);
+
+ return;
+err:
+ perf_pmu__parse_cleanup();
+}
+
+enum perf_pmu_event_symbol_type
+perf_pmu__parse_check(const char *name)
+{
+ struct perf_pmu_event_symbol p, *r;
+
+ /* scan kernel pmu events from sysfs if needed */
+ if (perf_pmu_events_list_num == 0)
+ perf_pmu__parse_init();
+ /*
+ * name "cpu" could be prefix of cpu-cycles or cpu// events.
+ * cpu-cycles has been handled by hardcode.
+ * So it must be cpu// events, not kernel pmu event.
+ */
+ if ((perf_pmu_events_list_num <= 0) || !strcmp(name, "cpu"))
+ return PMU_EVENT_SYMBOL_ERR;
+
+ p.symbol = strdup(name);
+ r = bsearch(&p, perf_pmu_events_list,
+ (size_t) perf_pmu_events_list_num,
+ sizeof(struct perf_pmu_event_symbol), comp_pmu);
+ zfree(&p.symbol);
+ return r ? r->type : PMU_EVENT_SYMBOL_ERR;
+}
+
+static int parse_events__scanner(const char *str, void *parse_state, int start_token)
+{
+ YY_BUFFER_STATE buffer;
+ void *scanner;
+ int ret;
+
+ ret = parse_events_lex_init_extra(start_token, &scanner);
+ if (ret)
+ return ret;
+
+ buffer = parse_events__scan_string(str, scanner);
+
+#ifdef PARSER_DEBUG
+ parse_events_debug = 1;
+#endif
+ ret = parse_events_parse(parse_state, scanner);
+
+ parse_events__flush_buffer(buffer, scanner);
+ parse_events__delete_buffer(buffer, scanner);
+ parse_events_lex_destroy(scanner);
+ return ret;
+}
+
+/*
+ * parse event config string, return a list of event terms.
+ */
+int parse_events_terms(struct list_head *terms, const char *str)
+{
+ struct parse_events_state parse_state = {
+ .terms = NULL,
+ };
+ int ret;
+
+ ret = parse_events__scanner(str, &parse_state, PE_START_TERMS);
+ if (!ret) {
+ list_splice(parse_state.terms, terms);
+ zfree(&parse_state.terms);
+ return 0;
+ }
+
+ parse_events_terms__delete(parse_state.terms);
+ return ret;
+}
+
+int parse_events(struct perf_evlist *evlist, const char *str,
+ struct parse_events_error *err)
+{
+ struct parse_events_state parse_state = {
+ .list = LIST_HEAD_INIT(parse_state.list),
+ .idx = evlist->nr_entries,
+ .error = err,
+ .evlist = evlist,
+ };
+ int ret;
+
+ ret = parse_events__scanner(str, &parse_state, PE_START_EVENTS);
+ perf_pmu__parse_cleanup();
+
+ if (!ret && list_empty(&parse_state.list)) {
+ WARN_ONCE(true, "WARNING: event parser found nothing\n");
+ return -1;
+ }
+
+ /*
+ * Add list to the evlist even with errors to allow callers to clean up.
+ */
+ perf_evlist__splice_list_tail(evlist, &parse_state.list);
+
+ if (!ret) {
+ struct perf_evsel *last;
+
+ evlist->nr_groups += parse_state.nr_groups;
+ last = perf_evlist__last(evlist);
+ last->cmdline_group_boundary = true;
+
+ return 0;
+ }
+
+ /*
+ * There are 2 users - builtin-record and builtin-test objects.
+ * Both call perf_evlist__delete in case of error, so we dont
+ * need to bother.
+ */
+ return ret;
+}
+
+#define MAX_WIDTH 1000
+static int get_term_width(void)
+{
+ struct winsize ws;
+
+ get_term_dimensions(&ws);
+ return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col;
+}
+
+void parse_events_print_error(struct parse_events_error *err,
+ const char *event)
+{
+ const char *str = "invalid or unsupported event: ";
+ char _buf[MAX_WIDTH];
+ char *buf = (char *) event;
+ int idx = 0;
+
+ if (err->str) {
+ /* -2 for extra '' in the final fprintf */
+ int width = get_term_width() - 2;
+ int len_event = strlen(event);
+ int len_str, max_len, cut = 0;
+
+ /*
+ * Maximum error index indent, we will cut
+ * the event string if it's bigger.
+ */
+ int max_err_idx = 13;
+
+ /*
+ * Let's be specific with the message when
+ * we have the precise error.
+ */
+ str = "event syntax error: ";
+ len_str = strlen(str);
+ max_len = width - len_str;
+
+ buf = _buf;
+
+ /* We're cutting from the beginning. */
+ if (err->idx > max_err_idx)
+ cut = err->idx - max_err_idx;
+
+ strncpy(buf, event + cut, max_len);
+
+ /* Mark cut parts with '..' on both sides. */
+ if (cut)
+ buf[0] = buf[1] = '.';
+
+ if ((len_event - cut) > max_len) {
+ buf[max_len - 1] = buf[max_len - 2] = '.';
+ buf[max_len] = 0;
+ }
+
+ idx = len_str + err->idx - cut;
+ }
+
+ fprintf(stderr, "%s'%s'\n", str, buf);
+ if (idx) {
+ fprintf(stderr, "%*s\\___ %s\n", idx + 1, "", err->str);
+ if (err->help)
+ fprintf(stderr, "\n%s\n", err->help);
+ zfree(&err->str);
+ zfree(&err->help);
+ }
+}
+
+#undef MAX_WIDTH
+
+int parse_events_option(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+ struct parse_events_error err = { .idx = 0, };
+ int ret = parse_events(evlist, str, &err);
+
+ if (ret) {
+ parse_events_print_error(&err, str);
+ fprintf(stderr, "Run 'perf list' for a list of valid events\n");
+ }
+
+ return ret;
+}
+
+static int
+foreach_evsel_in_last_glob(struct perf_evlist *evlist,
+ int (*func)(struct perf_evsel *evsel,
+ const void *arg),
+ const void *arg)
+{
+ struct perf_evsel *last = NULL;
+ int err;
+
+ /*
+ * Don't return when list_empty, give func a chance to report
+ * error when it found last == NULL.
+ *
+ * So no need to WARN here, let *func do this.
+ */
+ if (evlist->nr_entries > 0)
+ last = perf_evlist__last(evlist);
+
+ do {
+ err = (*func)(last, arg);
+ if (err)
+ return -1;
+ if (!last)
+ return 0;
+
+ if (last->node.prev == &evlist->entries)
+ return 0;
+ last = list_entry(last->node.prev, struct perf_evsel, node);
+ } while (!last->cmdline_group_boundary);
+
+ return 0;
+}
+
+static int set_filter(struct perf_evsel *evsel, const void *arg)
+{
+ const char *str = arg;
+ bool found = false;
+ int nr_addr_filters = 0;
+ struct perf_pmu *pmu = NULL;
+
+ if (evsel == NULL) {
+ fprintf(stderr,
+ "--filter option should follow a -e tracepoint or HW tracer option\n");
+ return -1;
+ }
+
+ if (evsel->attr.type == PERF_TYPE_TRACEPOINT) {
+ if (perf_evsel__append_tp_filter(evsel, str) < 0) {
+ fprintf(stderr,
+ "not enough memory to hold filter string\n");
+ return -1;
+ }
+
+ return 0;
+ }
+
+ while ((pmu = perf_pmu__scan(pmu)) != NULL)
+ if (pmu->type == evsel->attr.type) {
+ found = true;
+ break;
+ }
+
+ if (found)
+ perf_pmu__scan_file(pmu, "nr_addr_filters",
+ "%d", &nr_addr_filters);
+
+ if (!nr_addr_filters) {
+ fprintf(stderr,
+ "This CPU does not support address filtering\n");
+ return -1;
+ }
+
+ if (perf_evsel__append_addr_filter(evsel, str) < 0) {
+ fprintf(stderr,
+ "not enough memory to hold filter string\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int parse_filter(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+
+ return foreach_evsel_in_last_glob(evlist, set_filter,
+ (const void *)str);
+}
+
+static int add_exclude_perf_filter(struct perf_evsel *evsel,
+ const void *arg __maybe_unused)
+{
+ char new_filter[64];
+
+ if (evsel == NULL || evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+ fprintf(stderr,
+ "--exclude-perf option should follow a -e tracepoint option\n");
+ return -1;
+ }
+
+ snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
+
+ if (perf_evsel__append_tp_filter(evsel, new_filter) < 0) {
+ fprintf(stderr,
+ "not enough memory to hold filter string\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int exclude_perf(const struct option *opt,
+ const char *arg __maybe_unused,
+ int unset __maybe_unused)
+{
+ struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
+
+ return foreach_evsel_in_last_glob(evlist, add_exclude_perf_filter,
+ NULL);
+}
+
+static const char * const event_type_descriptors[] = {
+ "Hardware event",
+ "Software event",
+ "Tracepoint event",
+ "Hardware cache event",
+ "Raw hardware event descriptor",
+ "Hardware breakpoint",
+};
+
+static int cmp_string(const void *a, const void *b)
+{
+ const char * const *as = a;
+ const char * const *bs = b;
+
+ return strcmp(*as, *bs);
+}
+
+/*
+ * Print the events from <debugfs_mount_point>/tracing/events
+ */
+
+void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
+ bool name_only)
+{
+ DIR *sys_dir, *evt_dir;
+ struct dirent *sys_dirent, *evt_dirent;
+ char evt_path[MAXPATHLEN];
+ char *dir_path;
+ char **evt_list = NULL;
+ unsigned int evt_i = 0, evt_num = 0;
+ bool evt_num_known = false;
+
+restart:
+ sys_dir = tracing_events__opendir();
+ if (!sys_dir)
+ return;
+
+ if (evt_num_known) {
+ evt_list = zalloc(sizeof(char *) * evt_num);
+ if (!evt_list)
+ goto out_close_sys_dir;
+ }
+
+ for_each_subsystem(sys_dir, sys_dirent) {
+ if (subsys_glob != NULL &&
+ !strglobmatch(sys_dirent->d_name, subsys_glob))
+ continue;
+
+ dir_path = get_events_file(sys_dirent->d_name);
+ if (!dir_path)
+ continue;
+ evt_dir = opendir(dir_path);
+ if (!evt_dir)
+ goto next;
+
+ for_each_event(dir_path, evt_dir, evt_dirent) {
+ if (event_glob != NULL &&
+ !strglobmatch(evt_dirent->d_name, event_glob))
+ continue;
+
+ if (!evt_num_known) {
+ evt_num++;
+ continue;
+ }
+
+ snprintf(evt_path, MAXPATHLEN, "%s:%s",
+ sys_dirent->d_name, evt_dirent->d_name);
+
+ evt_list[evt_i] = strdup(evt_path);
+ if (evt_list[evt_i] == NULL) {
+ put_events_file(dir_path);
+ goto out_close_evt_dir;
+ }
+ evt_i++;
+ }
+ closedir(evt_dir);
+next:
+ put_events_file(dir_path);
+ }
+ closedir(sys_dir);
+
+ if (!evt_num_known) {
+ evt_num_known = true;
+ goto restart;
+ }
+ qsort(evt_list, evt_num, sizeof(char *), cmp_string);
+ evt_i = 0;
+ while (evt_i < evt_num) {
+ if (name_only) {
+ printf("%s ", evt_list[evt_i++]);
+ continue;
+ }
+ printf(" %-50s [%s]\n", evt_list[evt_i++],
+ event_type_descriptors[PERF_TYPE_TRACEPOINT]);
+ }
+ if (evt_num && pager_in_use())
+ printf("\n");
+
+out_free:
+ evt_num = evt_i;
+ for (evt_i = 0; evt_i < evt_num; evt_i++)
+ zfree(&evt_list[evt_i]);
+ zfree(&evt_list);
+ return;
+
+out_close_evt_dir:
+ closedir(evt_dir);
+out_close_sys_dir:
+ closedir(sys_dir);
+
+ printf("FATAL: not enough memory to print %s\n",
+ event_type_descriptors[PERF_TYPE_TRACEPOINT]);
+ if (evt_list)
+ goto out_free;
+}
+
+/*
+ * Check whether event is in <debugfs_mount_point>/tracing/events
+ */
+
+int is_valid_tracepoint(const char *event_string)
+{
+ DIR *sys_dir, *evt_dir;
+ struct dirent *sys_dirent, *evt_dirent;
+ char evt_path[MAXPATHLEN];
+ char *dir_path;
+
+ sys_dir = tracing_events__opendir();
+ if (!sys_dir)
+ return 0;
+
+ for_each_subsystem(sys_dir, sys_dirent) {
+ dir_path = get_events_file(sys_dirent->d_name);
+ if (!dir_path)
+ continue;
+ evt_dir = opendir(dir_path);
+ if (!evt_dir)
+ goto next;
+
+ for_each_event(dir_path, evt_dir, evt_dirent) {
+ snprintf(evt_path, MAXPATHLEN, "%s:%s",
+ sys_dirent->d_name, evt_dirent->d_name);
+ if (!strcmp(evt_path, event_string)) {
+ closedir(evt_dir);
+ closedir(sys_dir);
+ return 1;
+ }
+ }
+ closedir(evt_dir);
+next:
+ put_events_file(dir_path);
+ }
+ closedir(sys_dir);
+ return 0;
+}
+
+static bool is_event_supported(u8 type, unsigned config)
+{
+ bool ret = true;
+ int open_return;
+ struct perf_evsel *evsel;
+ struct perf_event_attr attr = {
+ .type = type,
+ .config = config,
+ .disabled = 1,
+ };
+ struct thread_map *tmap = thread_map__new_by_tid(0);
+
+ if (tmap == NULL)
+ return false;
+
+ evsel = perf_evsel__new(&attr);
+ if (evsel) {
+ open_return = perf_evsel__open(evsel, NULL, tmap);
+ ret = open_return >= 0;
+
+ if (open_return == -EACCES) {
+ /*
+ * This happens if the paranoid value
+ * /proc/sys/kernel/perf_event_paranoid is set to 2
+ * Re-run with exclude_kernel set; we don't do that
+ * by default as some ARM machines do not support it.
+ *
+ */
+ evsel->attr.exclude_kernel = 1;
+ ret = perf_evsel__open(evsel, NULL, tmap) >= 0;
+ }
+ perf_evsel__delete(evsel);
+ }
+
+ thread_map__put(tmap);
+ return ret;
+}
+
+void print_sdt_events(const char *subsys_glob, const char *event_glob,
+ bool name_only)
+{
+ struct probe_cache *pcache;
+ struct probe_cache_entry *ent;
+ struct strlist *bidlist, *sdtlist;
+ struct strlist_config cfg = {.dont_dupstr = true};
+ struct str_node *nd, *nd2;
+ char *buf, *path, *ptr = NULL;
+ bool show_detail = false;
+ int ret;
+
+ sdtlist = strlist__new(NULL, &cfg);
+ if (!sdtlist) {
+ pr_debug("Failed to allocate new strlist for SDT\n");
+ return;
+ }
+ bidlist = build_id_cache__list_all(true);
+ if (!bidlist) {
+ pr_debug("Failed to get buildids: %d\n", errno);
+ return;
+ }
+ strlist__for_each_entry(nd, bidlist) {
+ pcache = probe_cache__new(nd->s, NULL);
+ if (!pcache)
+ continue;
+ list_for_each_entry(ent, &pcache->entries, node) {
+ if (!ent->sdt)
+ continue;
+ if (subsys_glob &&
+ !strglobmatch(ent->pev.group, subsys_glob))
+ continue;
+ if (event_glob &&
+ !strglobmatch(ent->pev.event, event_glob))
+ continue;
+ ret = asprintf(&buf, "%s:%s@%s", ent->pev.group,
+ ent->pev.event, nd->s);
+ if (ret > 0)
+ strlist__add(sdtlist, buf);
+ }
+ probe_cache__delete(pcache);
+ }
+ strlist__delete(bidlist);
+
+ strlist__for_each_entry(nd, sdtlist) {
+ buf = strchr(nd->s, '@');
+ if (buf)
+ *(buf++) = '\0';
+ if (name_only) {
+ printf("%s ", nd->s);
+ continue;
+ }
+ nd2 = strlist__next(nd);
+ if (nd2) {
+ ptr = strchr(nd2->s, '@');
+ if (ptr)
+ *ptr = '\0';
+ if (strcmp(nd->s, nd2->s) == 0)
+ show_detail = true;
+ }
+ if (show_detail) {
+ path = build_id_cache__origname(buf);
+ ret = asprintf(&buf, "%s@%s(%.12s)", nd->s, path, buf);
+ if (ret > 0) {
+ printf(" %-50s [%s]\n", buf, "SDT event");
+ free(buf);
+ }
+ free(path);
+ } else
+ printf(" %-50s [%s]\n", nd->s, "SDT event");
+ if (nd2) {
+ if (strcmp(nd->s, nd2->s) != 0)
+ show_detail = false;
+ if (ptr)
+ *ptr = '@';
+ }
+ }
+ strlist__delete(sdtlist);
+}
+
+int print_hwcache_events(const char *event_glob, bool name_only)
+{
+ unsigned int type, op, i, evt_i = 0, evt_num = 0;
+ char name[64];
+ char **evt_list = NULL;
+ bool evt_num_known = false;
+
+restart:
+ if (evt_num_known) {
+ evt_list = zalloc(sizeof(char *) * evt_num);
+ if (!evt_list)
+ goto out_enomem;
+ }
+
+ for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+ for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+ /* skip invalid cache type */
+ if (!perf_evsel__is_cache_op_valid(type, op))
+ continue;
+
+ for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+ __perf_evsel__hw_cache_type_op_res_name(type, op, i,
+ name, sizeof(name));
+ if (event_glob != NULL && !strglobmatch(name, event_glob))
+ continue;
+
+ if (!is_event_supported(PERF_TYPE_HW_CACHE,
+ type | (op << 8) | (i << 16)))
+ continue;
+
+ if (!evt_num_known) {
+ evt_num++;
+ continue;
+ }
+
+ evt_list[evt_i] = strdup(name);
+ if (evt_list[evt_i] == NULL)
+ goto out_enomem;
+ evt_i++;
+ }
+ }
+ }
+
+ if (!evt_num_known) {
+ evt_num_known = true;
+ goto restart;
+ }
+ qsort(evt_list, evt_num, sizeof(char *), cmp_string);
+ evt_i = 0;
+ while (evt_i < evt_num) {
+ if (name_only) {
+ printf("%s ", evt_list[evt_i++]);
+ continue;
+ }
+ printf(" %-50s [%s]\n", evt_list[evt_i++],
+ event_type_descriptors[PERF_TYPE_HW_CACHE]);
+ }
+ if (evt_num && pager_in_use())
+ printf("\n");
+
+out_free:
+ evt_num = evt_i;
+ for (evt_i = 0; evt_i < evt_num; evt_i++)
+ zfree(&evt_list[evt_i]);
+ zfree(&evt_list);
+ return evt_num;
+
+out_enomem:
+ printf("FATAL: not enough memory to print %s\n", event_type_descriptors[PERF_TYPE_HW_CACHE]);
+ if (evt_list)
+ goto out_free;
+ return evt_num;
+}
+
+void print_symbol_events(const char *event_glob, unsigned type,
+ struct event_symbol *syms, unsigned max,
+ bool name_only)
+{
+ unsigned int i, evt_i = 0, evt_num = 0;
+ char name[MAX_NAME_LEN];
+ char **evt_list = NULL;
+ bool evt_num_known = false;
+
+restart:
+ if (evt_num_known) {
+ evt_list = zalloc(sizeof(char *) * evt_num);
+ if (!evt_list)
+ goto out_enomem;
+ syms -= max;
+ }
+
+ for (i = 0; i < max; i++, syms++) {
+
+ if (event_glob != NULL && syms->symbol != NULL &&
+ !(strglobmatch(syms->symbol, event_glob) ||
+ (syms->alias && strglobmatch(syms->alias, event_glob))))
+ continue;
+
+ if (!is_event_supported(type, i))
+ continue;
+
+ if (!evt_num_known) {
+ evt_num++;
+ continue;
+ }
+
+ if (!name_only && strlen(syms->alias))
+ snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
+ else
+ strlcpy(name, syms->symbol, MAX_NAME_LEN);
+
+ evt_list[evt_i] = strdup(name);
+ if (evt_list[evt_i] == NULL)
+ goto out_enomem;
+ evt_i++;
+ }
+
+ if (!evt_num_known) {
+ evt_num_known = true;
+ goto restart;
+ }
+ qsort(evt_list, evt_num, sizeof(char *), cmp_string);
+ evt_i = 0;
+ while (evt_i < evt_num) {
+ if (name_only) {
+ printf("%s ", evt_list[evt_i++]);
+ continue;
+ }
+ printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]);
+ }
+ if (evt_num && pager_in_use())
+ printf("\n");
+
+out_free:
+ evt_num = evt_i;
+ for (evt_i = 0; evt_i < evt_num; evt_i++)
+ zfree(&evt_list[evt_i]);
+ zfree(&evt_list);
+ return;
+
+out_enomem:
+ printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]);
+ if (evt_list)
+ goto out_free;
+}
+
+/*
+ * Print the help text for the event symbols:
+ */
+void print_events(const char *event_glob, bool name_only, bool quiet_flag,
+ bool long_desc, bool details_flag)
+{
+ print_symbol_events(event_glob, PERF_TYPE_HARDWARE,
+ event_symbols_hw, PERF_COUNT_HW_MAX, name_only);
+
+ print_symbol_events(event_glob, PERF_TYPE_SOFTWARE,
+ event_symbols_sw, PERF_COUNT_SW_MAX, name_only);
+
+ print_hwcache_events(event_glob, name_only);
+
+ print_pmu_events(event_glob, name_only, quiet_flag, long_desc,
+ details_flag);
+
+ if (event_glob != NULL)
+ return;
+
+ if (!name_only) {
+ printf(" %-50s [%s]\n",
+ "rNNN",
+ event_type_descriptors[PERF_TYPE_RAW]);
+ printf(" %-50s [%s]\n",
+ "cpu/t1=v1[,t2=v2,t3 ...]/modifier",
+ event_type_descriptors[PERF_TYPE_RAW]);
+ if (pager_in_use())
+ printf(" (see 'man perf-list' on how to encode it)\n\n");
+
+ printf(" %-50s [%s]\n",
+ "mem:<addr>[/len][:access]",
+ event_type_descriptors[PERF_TYPE_BREAKPOINT]);
+ if (pager_in_use())
+ printf("\n");
+ }
+
+ print_tracepoint_events(NULL, NULL, name_only);
+
+ print_sdt_events(NULL, NULL, name_only);
+
+ metricgroup__print(true, true, NULL, name_only);
+}
+
+int parse_events__is_hardcoded_term(struct parse_events_term *term)
+{
+ return term->type_term != PARSE_EVENTS__TERM_TYPE_USER;
+}
+
+static int new_term(struct parse_events_term **_term,
+ struct parse_events_term *temp,
+ char *str, u64 num)
+{
+ struct parse_events_term *term;
+
+ term = malloc(sizeof(*term));
+ if (!term)
+ return -ENOMEM;
+
+ *term = *temp;
+ INIT_LIST_HEAD(&term->list);
+ term->weak = false;
+
+ switch (term->type_val) {
+ case PARSE_EVENTS__TERM_TYPE_NUM:
+ term->val.num = num;
+ break;
+ case PARSE_EVENTS__TERM_TYPE_STR:
+ term->val.str = str;
+ break;
+ default:
+ free(term);
+ return -EINVAL;
+ }
+
+ *_term = term;
+ return 0;
+}
+
+int parse_events_term__num(struct parse_events_term **term,
+ int type_term, char *config, u64 num,
+ bool no_value,
+ void *loc_term_, void *loc_val_)
+{
+ YYLTYPE *loc_term = loc_term_;
+ YYLTYPE *loc_val = loc_val_;
+
+ struct parse_events_term temp = {
+ .type_val = PARSE_EVENTS__TERM_TYPE_NUM,
+ .type_term = type_term,
+ .config = config,
+ .no_value = no_value,
+ .err_term = loc_term ? loc_term->first_column : 0,
+ .err_val = loc_val ? loc_val->first_column : 0,
+ };
+
+ return new_term(term, &temp, NULL, num);
+}
+
+int parse_events_term__str(struct parse_events_term **term,
+ int type_term, char *config, char *str,
+ void *loc_term_, void *loc_val_)
+{
+ YYLTYPE *loc_term = loc_term_;
+ YYLTYPE *loc_val = loc_val_;
+
+ struct parse_events_term temp = {
+ .type_val = PARSE_EVENTS__TERM_TYPE_STR,
+ .type_term = type_term,
+ .config = config,
+ .err_term = loc_term ? loc_term->first_column : 0,
+ .err_val = loc_val ? loc_val->first_column : 0,
+ };
+
+ return new_term(term, &temp, str, 0);
+}
+
+int parse_events_term__sym_hw(struct parse_events_term **term,
+ char *config, unsigned idx)
+{
+ struct event_symbol *sym;
+ struct parse_events_term temp = {
+ .type_val = PARSE_EVENTS__TERM_TYPE_STR,
+ .type_term = PARSE_EVENTS__TERM_TYPE_USER,
+ .config = config ?: (char *) "event",
+ };
+
+ BUG_ON(idx >= PERF_COUNT_HW_MAX);
+ sym = &event_symbols_hw[idx];
+
+ return new_term(term, &temp, (char *) sym->symbol, 0);
+}
+
+int parse_events_term__clone(struct parse_events_term **new,
+ struct parse_events_term *term)
+{
+ struct parse_events_term temp = {
+ .type_val = term->type_val,
+ .type_term = term->type_term,
+ .config = term->config,
+ .err_term = term->err_term,
+ .err_val = term->err_val,
+ };
+
+ return new_term(new, &temp, term->val.str, term->val.num);
+}
+
+int parse_events_copy_term_list(struct list_head *old,
+ struct list_head **new)
+{
+ struct parse_events_term *term, *n;
+ int ret;
+
+ if (!old) {
+ *new = NULL;
+ return 0;
+ }
+
+ *new = malloc(sizeof(struct list_head));
+ if (!*new)
+ return -ENOMEM;
+ INIT_LIST_HEAD(*new);
+
+ list_for_each_entry (term, old, list) {
+ ret = parse_events_term__clone(&n, term);
+ if (ret)
+ return ret;
+ list_add_tail(&n->list, *new);
+ }
+ return 0;
+}
+
+void parse_events_terms__purge(struct list_head *terms)
+{
+ struct parse_events_term *term, *h;
+
+ list_for_each_entry_safe(term, h, terms, list) {
+ if (term->array.nr_ranges)
+ zfree(&term->array.ranges);
+ list_del_init(&term->list);
+ free(term);
+ }
+}
+
+void parse_events_terms__delete(struct list_head *terms)
+{
+ if (!terms)
+ return;
+ parse_events_terms__purge(terms);
+ free(terms);
+}
+
+void parse_events__clear_array(struct parse_events_array *a)
+{
+ zfree(&a->ranges);
+}
+
+void parse_events_evlist_error(struct parse_events_state *parse_state,
+ int idx, const char *str)
+{
+ struct parse_events_error *err = parse_state->error;
+
+ if (!err)
+ return;
+ err->idx = idx;
+ err->str = strdup(str);
+ WARN_ONCE(!err->str, "WARNING: failed to allocate error string");
+}
+
+static void config_terms_list(char *buf, size_t buf_sz)
+{
+ int i;
+ bool first = true;
+
+ buf[0] = '\0';
+ for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) {
+ const char *name = config_term_names[i];
+
+ if (!config_term_avail(i, NULL))
+ continue;
+ if (!name)
+ continue;
+ if (name[0] == '<')
+ continue;
+
+ if (strlen(buf) + strlen(name) + 2 >= buf_sz)
+ return;
+
+ if (!first)
+ strcat(buf, ",");
+ else
+ first = false;
+ strcat(buf, name);
+ }
+}
+
+/*
+ * Return string contains valid config terms of an event.
+ * @additional_terms: For terms such as PMU sysfs terms.
+ */
+char *parse_events_formats_error_string(char *additional_terms)
+{
+ char *str;
+ /* "no-overwrite" is the longest name */
+ char static_terms[__PARSE_EVENTS__TERM_TYPE_NR *
+ (sizeof("no-overwrite") - 1)];
+
+ config_terms_list(static_terms, sizeof(static_terms));
+ /* valid terms */
+ if (additional_terms) {
+ if (asprintf(&str, "valid terms: %s,%s",
+ additional_terms, static_terms) < 0)
+ goto fail;
+ } else {
+ if (asprintf(&str, "valid terms: %s", static_terms) < 0)
+ goto fail;
+ }
+ return str;
+
+fail:
+ return NULL;
+}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
new file mode 100644
index 000000000..4473dac27
--- /dev/null
+++ b/tools/perf/util/parse-events.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_PARSE_EVENTS_H
+#define __PERF_PARSE_EVENTS_H
+/*
+ * Parse symbolic events/counts passed in as options:
+ */
+
+#include <linux/list.h>
+#include <stdbool.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <string.h>
+
+struct list_head;
+struct perf_evsel;
+struct perf_evlist;
+struct parse_events_error;
+
+struct option;
+
+struct tracepoint_path {
+ char *system;
+ char *name;
+ struct tracepoint_path *next;
+};
+
+struct tracepoint_path *tracepoint_id_to_path(u64 config);
+struct tracepoint_path *tracepoint_name_to_path(const char *name);
+bool have_tracepoints(struct list_head *evlist);
+
+const char *event_type(int type);
+
+int parse_events_option(const struct option *opt, const char *str, int unset);
+int parse_events(struct perf_evlist *evlist, const char *str,
+ struct parse_events_error *error);
+int parse_events_terms(struct list_head *terms, const char *str);
+int parse_filter(const struct option *opt, const char *str, int unset);
+int exclude_perf(const struct option *opt, const char *arg, int unset);
+
+#define EVENTS_HELP_MAX (128*1024)
+
+enum perf_pmu_event_symbol_type {
+ PMU_EVENT_SYMBOL_ERR, /* not a PMU EVENT */
+ PMU_EVENT_SYMBOL, /* normal style PMU event */
+ PMU_EVENT_SYMBOL_PREFIX, /* prefix of pre-suf style event */
+ PMU_EVENT_SYMBOL_SUFFIX, /* suffix of pre-suf style event */
+};
+
+struct perf_pmu_event_symbol {
+ char *symbol;
+ enum perf_pmu_event_symbol_type type;
+};
+
+enum {
+ PARSE_EVENTS__TERM_TYPE_NUM,
+ PARSE_EVENTS__TERM_TYPE_STR,
+};
+
+enum {
+ PARSE_EVENTS__TERM_TYPE_USER,
+ PARSE_EVENTS__TERM_TYPE_CONFIG,
+ PARSE_EVENTS__TERM_TYPE_CONFIG1,
+ PARSE_EVENTS__TERM_TYPE_CONFIG2,
+ PARSE_EVENTS__TERM_TYPE_NAME,
+ PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD,
+ PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ,
+ PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE,
+ PARSE_EVENTS__TERM_TYPE_TIME,
+ PARSE_EVENTS__TERM_TYPE_CALLGRAPH,
+ PARSE_EVENTS__TERM_TYPE_STACKSIZE,
+ PARSE_EVENTS__TERM_TYPE_NOINHERIT,
+ PARSE_EVENTS__TERM_TYPE_INHERIT,
+ PARSE_EVENTS__TERM_TYPE_MAX_STACK,
+ PARSE_EVENTS__TERM_TYPE_NOOVERWRITE,
+ PARSE_EVENTS__TERM_TYPE_OVERWRITE,
+ PARSE_EVENTS__TERM_TYPE_DRV_CFG,
+ __PARSE_EVENTS__TERM_TYPE_NR,
+};
+
+struct parse_events_array {
+ size_t nr_ranges;
+ struct {
+ unsigned int start;
+ size_t length;
+ } *ranges;
+};
+
+struct parse_events_term {
+ char *config;
+ struct parse_events_array array;
+ union {
+ char *str;
+ u64 num;
+ } val;
+ int type_val;
+ int type_term;
+ struct list_head list;
+ bool used;
+ bool no_value;
+
+ /* error string indexes for within parsed string */
+ int err_term;
+ int err_val;
+
+ /* Coming from implicit alias */
+ bool weak;
+};
+
+struct parse_events_error {
+ int idx; /* index in the parsed string */
+ char *str; /* string to display at the index */
+ char *help; /* optional help string */
+};
+
+struct parse_events_state {
+ struct list_head list;
+ int idx;
+ int nr_groups;
+ struct parse_events_error *error;
+ struct perf_evlist *evlist;
+ struct list_head *terms;
+};
+
+void parse_events__shrink_config_terms(void);
+int parse_events__is_hardcoded_term(struct parse_events_term *term);
+int parse_events_term__num(struct parse_events_term **term,
+ int type_term, char *config, u64 num,
+ bool novalue,
+ void *loc_term, void *loc_val);
+int parse_events_term__str(struct parse_events_term **term,
+ int type_term, char *config, char *str,
+ void *loc_term, void *loc_val);
+int parse_events_term__sym_hw(struct parse_events_term **term,
+ char *config, unsigned idx);
+int parse_events_term__clone(struct parse_events_term **new,
+ struct parse_events_term *term);
+void parse_events_terms__delete(struct list_head *terms);
+void parse_events_terms__purge(struct list_head *terms);
+void parse_events__clear_array(struct parse_events_array *a);
+int parse_events__modifier_event(struct list_head *list, char *str, bool add);
+int parse_events__modifier_group(struct list_head *list, char *event_mod);
+int parse_events_name(struct list_head *list, char *name);
+int parse_events_add_tracepoint(struct list_head *list, int *idx,
+ const char *sys, const char *event,
+ struct parse_events_error *error,
+ struct list_head *head_config);
+int parse_events_load_bpf(struct parse_events_state *parse_state,
+ struct list_head *list,
+ char *bpf_file_name,
+ bool source,
+ struct list_head *head_config);
+/* Provide this function for perf test */
+struct bpf_object;
+int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
+ struct list_head *list,
+ struct bpf_object *obj,
+ struct list_head *head_config);
+int parse_events_add_numeric(struct parse_events_state *parse_state,
+ struct list_head *list,
+ u32 type, u64 config,
+ struct list_head *head_config);
+int parse_events_add_cache(struct list_head *list, int *idx,
+ char *type, char *op_result1, char *op_result2,
+ struct parse_events_error *error,
+ struct list_head *head_config);
+int parse_events_add_breakpoint(struct list_head *list, int *idx,
+ void *ptr, char *type, u64 len);
+int parse_events_add_pmu(struct parse_events_state *parse_state,
+ struct list_head *list, char *name,
+ struct list_head *head_config,
+ bool auto_merge_stats,
+ bool use_alias);
+
+int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
+ char *str,
+ struct list_head **listp);
+
+int parse_events_copy_term_list(struct list_head *old,
+ struct list_head **new);
+
+enum perf_pmu_event_symbol_type
+perf_pmu__parse_check(const char *name);
+void parse_events__set_leader(char *name, struct list_head *list,
+ struct parse_events_state *parse_state);
+void parse_events_update_lists(struct list_head *list_event,
+ struct list_head *list_all);
+void parse_events_evlist_error(struct parse_events_state *parse_state,
+ int idx, const char *str);
+
+void print_events(const char *event_glob, bool name_only, bool quiet,
+ bool long_desc, bool details_flag);
+
+struct event_symbol {
+ const char *symbol;
+ const char *alias;
+};
+extern struct event_symbol event_symbols_hw[];
+extern struct event_symbol event_symbols_sw[];
+void print_symbol_events(const char *event_glob, unsigned type,
+ struct event_symbol *syms, unsigned max,
+ bool name_only);
+void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
+ bool name_only);
+int print_hwcache_events(const char *event_glob, bool name_only);
+void print_sdt_events(const char *subsys_glob, const char *event_glob,
+ bool name_only);
+int is_valid_tracepoint(const char *event_string);
+
+int valid_event_mount(const char *eventfs);
+char *parse_events_formats_error_string(char *additional_terms);
+
+void parse_events_print_error(struct parse_events_error *err,
+ const char *event);
+
+#ifdef HAVE_LIBELF_SUPPORT
+/*
+ * If the probe point starts with '%',
+ * or starts with "sdt_" and has a ':' but no '=',
+ * then it should be a SDT/cached probe point.
+ */
+static inline bool is_sdt_event(char *str)
+{
+ return (str[0] == '%' ||
+ (!strncmp(str, "sdt_", 4) &&
+ !!strchr(str, ':') && !strchr(str, '=')));
+}
+#else
+static inline bool is_sdt_event(char *str __maybe_unused)
+{
+ return false;
+}
+#endif /* HAVE_LIBELF_SUPPORT */
+
+#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
new file mode 100644
index 000000000..5f761f3ed
--- /dev/null
+++ b/tools/perf/util/parse-events.l
@@ -0,0 +1,378 @@
+
+%option reentrant
+%option bison-bridge
+%option prefix="parse_events_"
+%option stack
+%option bison-locations
+%option yylineno
+%option reject
+
+%{
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "../perf.h"
+#include "parse-events.h"
+#include "parse-events-bison.h"
+
+char *parse_events_get_text(yyscan_t yyscanner);
+YYSTYPE *parse_events_get_lval(yyscan_t yyscanner);
+
+static int __value(YYSTYPE *yylval, char *str, int base, int token)
+{
+ u64 num;
+
+ errno = 0;
+ num = strtoull(str, NULL, base);
+ if (errno)
+ return PE_ERROR;
+
+ yylval->num = num;
+ return token;
+}
+
+static int value(yyscan_t scanner, int base)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ return __value(yylval, text, base, PE_VALUE);
+}
+
+static int raw(yyscan_t scanner)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ return __value(yylval, text + 1, 16, PE_RAW);
+}
+
+static int str(yyscan_t scanner, int token)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ if (text[0] != '\'') {
+ yylval->str = strdup(text);
+ } else {
+ /*
+ * If a text tag specified on the command line
+ * contains opening single quite ' then it is
+ * expected that the tag ends with single quote
+ * as well, like this:
+ * name=\'CPU_CLK_UNHALTED.THREAD:cmask=1\'
+ * quotes need to be escaped to bypass shell
+ * processing.
+ */
+ yylval->str = strndup(&text[1], strlen(text) - 2);
+ }
+
+ return token;
+}
+
+static bool isbpf_suffix(char *text)
+{
+ int len = strlen(text);
+
+ if (len < 2)
+ return false;
+ if ((text[len - 1] == 'c' || text[len - 1] == 'o') &&
+ text[len - 2] == '.')
+ return true;
+ if (len > 4 && !strcmp(text + len - 4, ".obj"))
+ return true;
+ return false;
+}
+
+static bool isbpf(yyscan_t scanner)
+{
+ char *text = parse_events_get_text(scanner);
+ struct stat st;
+
+ if (!isbpf_suffix(text))
+ return false;
+
+ return stat(text, &st) == 0;
+}
+
+/*
+ * This function is called when the parser gets two kind of input:
+ *
+ * @cfg1 or @cfg2=config
+ *
+ * The leading '@' is stripped off before 'cfg1' and 'cfg2=config' are given to
+ * bison. In the latter case it is necessary to keep the string intact so that
+ * the PMU kernel driver can determine what configurable is associated to
+ * 'config'.
+ */
+static int drv_str(yyscan_t scanner, int token)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ /* Strip off the '@' */
+ yylval->str = strdup(text + 1);
+ return token;
+}
+
+#define REWIND(__alloc) \
+do { \
+ YYSTYPE *__yylval = parse_events_get_lval(yyscanner); \
+ char *text = parse_events_get_text(yyscanner); \
+ \
+ if (__alloc) \
+ __yylval->str = strdup(text); \
+ \
+ yycolumn -= strlen(text); \
+ yyless(0); \
+} while (0)
+
+static int pmu_str_check(yyscan_t scanner)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ yylval->str = strdup(text);
+ switch (perf_pmu__parse_check(text)) {
+ case PMU_EVENT_SYMBOL_PREFIX:
+ return PE_PMU_EVENT_PRE;
+ case PMU_EVENT_SYMBOL_SUFFIX:
+ return PE_PMU_EVENT_SUF;
+ case PMU_EVENT_SYMBOL:
+ return PE_KERNEL_PMU_EVENT;
+ default:
+ return PE_NAME;
+ }
+}
+
+static int sym(yyscan_t scanner, int type, int config)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+ yylval->num = (type << 16) + config;
+ return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW;
+}
+
+static int term(yyscan_t scanner, int type)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+
+ yylval->num = type;
+ return PE_TERM;
+}
+
+#define YY_USER_ACTION \
+do { \
+ yylloc->last_column = yylloc->first_column; \
+ yylloc->first_column = yycolumn; \
+ yycolumn += yyleng; \
+} while (0);
+
+#define USER_REJECT \
+ yycolumn -= yyleng; \
+ REJECT
+
+%}
+
+%x mem
+%s config
+%x event
+%x array
+
+group [^,{}/]*[{][^}]*[}][^,{}/]*
+event_pmu [^,{}/]+[/][^/]*[/][^,{}/]*
+event [^,{}/]+
+bpf_object [^,{}]+\.(o|bpf)[a-zA-Z0-9._]*
+bpf_source [^,{}]+\.c[a-zA-Z0-9._]*
+
+num_dec [0-9]+
+num_hex 0x[a-fA-F0-9]+
+num_raw_hex [a-fA-F0-9]+
+name [a-zA-Z_*?\[\]][a-zA-Z0-9_*?.\[\]]*
+name_tag [\'][a-zA-Z_*?\[\]][a-zA-Z0-9_*?\-,\.\[\]:=]*[\']
+name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.:]*
+drv_cfg_term [a-zA-Z0-9_\.]+(=[a-zA-Z0-9_*?\.:]+)?
+/* If you add a modifier you need to update check_modifier() */
+modifier_event [ukhpPGHSDIW]+
+modifier_bp [rwx]{1,3}
+
+%%
+
+%{
+ {
+ int start_token;
+
+ start_token = parse_events_get_extra(yyscanner);
+
+ if (start_token == PE_START_TERMS)
+ BEGIN(config);
+ else if (start_token == PE_START_EVENTS)
+ BEGIN(event);
+
+ if (start_token) {
+ parse_events_set_extra(NULL, yyscanner);
+ /*
+ * The flex parser does not init locations variable
+ * via the scan_string interface, so we need do the
+ * init in here.
+ */
+ yycolumn = 0;
+ return start_token;
+ }
+ }
+%}
+
+<event>{
+
+{group} {
+ BEGIN(INITIAL);
+ REWIND(0);
+ }
+
+{event_pmu} |
+{bpf_object} |
+{bpf_source} |
+{event} {
+ BEGIN(INITIAL);
+ REWIND(1);
+ return PE_EVENT_NAME;
+ }
+
+<<EOF>> {
+ BEGIN(INITIAL);
+ REWIND(0);
+ }
+
+}
+
+<array>{
+"]" { BEGIN(config); return ']'; }
+{num_dec} { return value(yyscanner, 10); }
+{num_hex} { return value(yyscanner, 16); }
+, { return ','; }
+"\.\.\." { return PE_ARRAY_RANGE; }
+}
+
+<config>{
+ /*
+ * Please update config_term_names when new static term is added.
+ */
+config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
+config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
+config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
+name { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NAME); }
+period { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD); }
+freq { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ); }
+branch_type { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_BRANCH_SAMPLE_TYPE); }
+time { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_TIME); }
+call-graph { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CALLGRAPH); }
+stack-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_STACKSIZE); }
+max-stack { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_MAX_STACK); }
+inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_INHERIT); }
+no-inherit { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOINHERIT); }
+overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_OVERWRITE); }
+no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
+, { return ','; }
+"/" { BEGIN(INITIAL); return '/'; }
+{name_minus} { return str(yyscanner, PE_NAME); }
+\[all\] { return PE_ARRAY_ALL; }
+"[" { BEGIN(array); return '['; }
+@{drv_cfg_term} { return drv_str(yyscanner, PE_DRV_CFG_TERM); }
+}
+
+<mem>{
+{modifier_bp} { return str(yyscanner, PE_MODIFIER_BP); }
+: { return ':'; }
+"/" { return '/'; }
+{num_dec} { return value(yyscanner, 10); }
+{num_hex} { return value(yyscanner, 16); }
+ /*
+ * We need to separate 'mem:' scanner part, in order to get specific
+ * modifier bits parsed out. Otherwise we would need to handle PE_NAME
+ * and we'd need to parse it manually. During the escape from <mem>
+ * state we need to put the escaping char back, so we dont miss it.
+ */
+. { unput(*yytext); BEGIN(INITIAL); }
+ /*
+ * We destroy the scanner after reaching EOF,
+ * but anyway just to be sure get back to INIT state.
+ */
+<<EOF>> { BEGIN(INITIAL); }
+}
+
+cpu-cycles|cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); }
+stalled-cycles-frontend|idle-cycles-frontend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); }
+stalled-cycles-backend|idle-cycles-backend { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); }
+instructions { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS); }
+cache-references { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES); }
+cache-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES); }
+branch-instructions|branches { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); }
+branch-misses { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES); }
+bus-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES); }
+ref-cycles { return sym(yyscanner, PERF_TYPE_HARDWARE, PERF_COUNT_HW_REF_CPU_CYCLES); }
+cpu-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK); }
+task-clock { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK); }
+page-faults|faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS); }
+minor-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN); }
+major-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ); }
+context-switches|cs { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES); }
+cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS); }
+alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); }
+emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
+dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+duration_time { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
+bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
+
+ /*
+ * We have to handle the kernel PMU event cycles-ct/cycles-t/mem-loads/mem-stores separately.
+ * Because the prefix cycles is mixed up with cpu-cycles.
+ * loads and stores are mixed up with cache event
+ */
+cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+
+L1-dcache|l1-d|l1d|L1-data |
+L1-icache|l1-i|l1i|L1-instruction |
+LLC|L2 |
+dTLB|d-tlb|Data-TLB |
+iTLB|i-tlb|Instruction-TLB |
+branch|branches|bpu|btb|bpc |
+node { return str(yyscanner, PE_NAME_CACHE_TYPE); }
+
+load|loads|read |
+store|stores|write |
+prefetch|prefetches |
+speculative-read|speculative-load |
+refs|Reference|ops|access |
+misses|miss { return str(yyscanner, PE_NAME_CACHE_OP_RESULT); }
+
+mem: { BEGIN(mem); return PE_PREFIX_MEM; }
+r{num_raw_hex} { return raw(yyscanner); }
+{num_dec} { return value(yyscanner, 10); }
+{num_hex} { return value(yyscanner, 16); }
+
+{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); }
+{bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); }
+{bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); }
+{name} { return pmu_str_check(yyscanner); }
+{name_tag} { return str(yyscanner, PE_NAME); }
+"/" { BEGIN(config); return '/'; }
+- { return '-'; }
+, { BEGIN(event); return ','; }
+: { return ':'; }
+"{" { BEGIN(event); return '{'; }
+"}" { return '}'; }
+= { return '='; }
+\n { }
+. { }
+
+%%
+
+int parse_events_wrap(void *scanner __maybe_unused)
+{
+ return 1;
+}
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
new file mode 100644
index 000000000..8d7578be7
--- /dev/null
+++ b/tools/perf/util/parse-events.y
@@ -0,0 +1,714 @@
+%define api.pure full
+%parse-param {void *_parse_state}
+%parse-param {void *scanner}
+%lex-param {void* scanner}
+%locations
+
+%{
+
+#define YYDEBUG 1
+
+#include <fnmatch.h>
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include "util.h"
+#include "pmu.h"
+#include "debug.h"
+#include "parse-events.h"
+#include "parse-events-bison.h"
+
+void parse_events_error(YYLTYPE *loc, void *parse_state, void *scanner, char const *msg);
+
+#define ABORT_ON(val) \
+do { \
+ if (val) \
+ YYABORT; \
+} while (0)
+
+#define ALLOC_LIST(list) \
+do { \
+ list = malloc(sizeof(*list)); \
+ ABORT_ON(!list); \
+ INIT_LIST_HEAD(list); \
+} while (0)
+
+static void inc_group_count(struct list_head *list,
+ struct parse_events_state *parse_state)
+{
+ /* Count groups only have more than 1 members */
+ if (!list_is_last(list->next, list))
+ parse_state->nr_groups++;
+}
+
+%}
+
+%token PE_START_EVENTS PE_START_TERMS
+%token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_RAW PE_TERM
+%token PE_EVENT_NAME
+%token PE_NAME
+%token PE_BPF_OBJECT PE_BPF_SOURCE
+%token PE_MODIFIER_EVENT PE_MODIFIER_BP
+%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT
+%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
+%token PE_ERROR
+%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%token PE_ARRAY_ALL PE_ARRAY_RANGE
+%token PE_DRV_CFG_TERM
+%type <num> PE_VALUE
+%type <num> PE_VALUE_SYM_HW
+%type <num> PE_VALUE_SYM_SW
+%type <num> PE_RAW
+%type <num> PE_TERM
+%type <str> PE_NAME
+%type <str> PE_BPF_OBJECT
+%type <str> PE_BPF_SOURCE
+%type <str> PE_NAME_CACHE_TYPE
+%type <str> PE_NAME_CACHE_OP_RESULT
+%type <str> PE_MODIFIER_EVENT
+%type <str> PE_MODIFIER_BP
+%type <str> PE_EVENT_NAME
+%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%type <str> PE_DRV_CFG_TERM
+%type <num> value_sym
+%type <head> event_config
+%type <head> opt_event_config
+%type <head> opt_pmu_config
+%type <term> event_term
+%type <head> event_pmu
+%type <head> event_legacy_symbol
+%type <head> event_legacy_cache
+%type <head> event_legacy_mem
+%type <head> event_legacy_tracepoint
+%type <tracepoint_name> tracepoint_name
+%type <head> event_legacy_numeric
+%type <head> event_legacy_raw
+%type <head> event_bpf_file
+%type <head> event_def
+%type <head> event_mod
+%type <head> event_name
+%type <head> event
+%type <head> events
+%type <head> group_def
+%type <head> group
+%type <head> groups
+%type <array> array
+%type <array> array_term
+%type <array> array_terms
+
+%union
+{
+ char *str;
+ u64 num;
+ struct list_head *head;
+ struct parse_events_term *term;
+ struct tracepoint_name {
+ char *sys;
+ char *event;
+ } tracepoint_name;
+ struct parse_events_array array;
+}
+%%
+
+start:
+PE_START_EVENTS start_events
+|
+PE_START_TERMS start_terms
+
+start_events: groups
+{
+ struct parse_events_state *parse_state = _parse_state;
+
+ parse_events_update_lists($1, &parse_state->list);
+}
+
+groups:
+groups ',' group
+{
+ struct list_head *list = $1;
+ struct list_head *group = $3;
+
+ parse_events_update_lists(group, list);
+ $$ = list;
+}
+|
+groups ',' event
+{
+ struct list_head *list = $1;
+ struct list_head *event = $3;
+
+ parse_events_update_lists(event, list);
+ $$ = list;
+}
+|
+group
+|
+event
+
+group:
+group_def ':' PE_MODIFIER_EVENT
+{
+ struct list_head *list = $1;
+
+ ABORT_ON(parse_events__modifier_group(list, $3));
+ $$ = list;
+}
+|
+group_def
+
+group_def:
+PE_NAME '{' events '}'
+{
+ struct list_head *list = $3;
+
+ inc_group_count(list, _parse_state);
+ parse_events__set_leader($1, list, _parse_state);
+ $$ = list;
+}
+|
+'{' events '}'
+{
+ struct list_head *list = $2;
+
+ inc_group_count(list, _parse_state);
+ parse_events__set_leader(NULL, list, _parse_state);
+ $$ = list;
+}
+
+events:
+events ',' event
+{
+ struct list_head *event = $3;
+ struct list_head *list = $1;
+
+ parse_events_update_lists(event, list);
+ $$ = list;
+}
+|
+event
+
+event: event_mod
+
+event_mod:
+event_name PE_MODIFIER_EVENT
+{
+ struct list_head *list = $1;
+
+ /*
+ * Apply modifier on all events added by single event definition
+ * (there could be more events added for multiple tracepoint
+ * definitions via '*?'.
+ */
+ ABORT_ON(parse_events__modifier_event(list, $2, false));
+ $$ = list;
+}
+|
+event_name
+
+event_name:
+PE_EVENT_NAME event_def
+{
+ ABORT_ON(parse_events_name($2, $1));
+ free($1);
+ $$ = $2;
+}
+|
+event_def
+
+event_def: event_pmu |
+ event_legacy_symbol |
+ event_legacy_cache sep_dc |
+ event_legacy_mem |
+ event_legacy_tracepoint sep_dc |
+ event_legacy_numeric sep_dc |
+ event_legacy_raw sep_dc |
+ event_bpf_file
+
+event_pmu:
+PE_NAME opt_pmu_config
+{
+ struct parse_events_state *parse_state = _parse_state;
+ struct parse_events_error *error = parse_state->error;
+ struct list_head *list, *orig_terms, *terms;
+
+ if (parse_events_copy_term_list($2, &orig_terms))
+ YYABORT;
+
+ if (error)
+ error->idx = @1.first_column;
+
+ ALLOC_LIST(list);
+ if (parse_events_add_pmu(_parse_state, list, $1, $2, false, false)) {
+ struct perf_pmu *pmu = NULL;
+ int ok = 0;
+ char *pattern;
+
+ if (asprintf(&pattern, "%s*", $1) < 0)
+ YYABORT;
+
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ char *name = pmu->name;
+
+ if (!strncmp(name, "uncore_", 7) &&
+ strncmp($1, "uncore_", 7))
+ name += 7;
+ if (!fnmatch(pattern, name, 0)) {
+ if (parse_events_copy_term_list(orig_terms, &terms)) {
+ free(pattern);
+ YYABORT;
+ }
+ if (!parse_events_add_pmu(_parse_state, list, pmu->name, terms, true, false))
+ ok++;
+ parse_events_terms__delete(terms);
+ }
+ }
+
+ free(pattern);
+
+ if (!ok)
+ YYABORT;
+ }
+ parse_events_terms__delete($2);
+ parse_events_terms__delete(orig_terms);
+ $$ = list;
+}
+|
+PE_KERNEL_PMU_EVENT sep_dc
+{
+ struct list_head *list;
+
+ if (parse_events_multi_pmu_add(_parse_state, $1, &list) < 0)
+ YYABORT;
+ $$ = list;
+}
+|
+PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
+{
+ struct list_head *list;
+ char pmu_name[128];
+
+ snprintf(&pmu_name, 128, "%s-%s", $1, $3);
+ if (parse_events_multi_pmu_add(_parse_state, pmu_name, &list) < 0)
+ YYABORT;
+ $$ = list;
+}
+
+value_sym:
+PE_VALUE_SYM_HW
+|
+PE_VALUE_SYM_SW
+
+event_legacy_symbol:
+value_sym '/' event_config '/'
+{
+ struct list_head *list;
+ int type = $1 >> 16;
+ int config = $1 & 255;
+
+ ALLOC_LIST(list);
+ ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, $3));
+ parse_events_terms__delete($3);
+ $$ = list;
+}
+|
+value_sym sep_slash_dc
+{
+ struct list_head *list;
+ int type = $1 >> 16;
+ int config = $1 & 255;
+
+ ALLOC_LIST(list);
+ ABORT_ON(parse_events_add_numeric(_parse_state, list, type, config, NULL));
+ $$ = list;
+}
+
+event_legacy_cache:
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT '-' PE_NAME_CACHE_OP_RESULT opt_event_config
+{
+ struct parse_events_state *parse_state = _parse_state;
+ struct parse_events_error *error = parse_state->error;
+ struct list_head *list;
+
+ ALLOC_LIST(list);
+ ABORT_ON(parse_events_add_cache(list, &parse_state->idx, $1, $3, $5, error, $6));
+ parse_events_terms__delete($6);
+ $$ = list;
+}
+|
+PE_NAME_CACHE_TYPE '-' PE_NAME_CACHE_OP_RESULT opt_event_config
+{
+ struct parse_events_state *parse_state = _parse_state;
+ struct parse_events_error *error = parse_state->error;
+ struct list_head *list;
+
+ ALLOC_LIST(list);
+ ABORT_ON(parse_events_add_cache(list, &parse_state->idx, $1, $3, NULL, error, $4));
+ parse_events_terms__delete($4);
+ $$ = list;
+}
+|
+PE_NAME_CACHE_TYPE opt_event_config
+{
+ struct parse_events_state *parse_state = _parse_state;
+ struct parse_events_error *error = parse_state->error;
+ struct list_head *list;
+
+ ALLOC_LIST(list);
+ ABORT_ON(parse_events_add_cache(list, &parse_state->idx, $1, NULL, NULL, error, $2));
+ parse_events_terms__delete($2);
+ $$ = list;
+}
+
+event_legacy_mem:
+PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc
+{
+ struct parse_events_state *parse_state = _parse_state;
+ struct list_head *list;
+
+ ALLOC_LIST(list);
+ ABORT_ON(parse_events_add_breakpoint(list, &parse_state->idx,
+ (void *) $2, $6, $4));
+ $$ = list;
+}
+|
+PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc
+{
+ struct parse_events_state *parse_state = _parse_state;
+ struct list_head *list;
+
+ ALLOC_LIST(list);
+ ABORT_ON(parse_events_add_breakpoint(list, &parse_state->idx,
+ (void *) $2, NULL, $4));
+ $$ = list;
+}
+|
+PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
+{
+ struct parse_events_state *parse_state = _parse_state;
+ struct list_head *list;
+
+ ALLOC_LIST(list);
+ ABORT_ON(parse_events_add_breakpoint(list, &parse_state->idx,
+ (void *) $2, $4, 0));
+ $$ = list;
+}
+|
+PE_PREFIX_MEM PE_VALUE sep_dc
+{
+ struct parse_events_state *parse_state = _parse_state;
+ struct list_head *list;
+
+ ALLOC_LIST(list);
+ ABORT_ON(parse_events_add_breakpoint(list, &parse_state->idx,
+ (void *) $2, NULL, 0));
+ $$ = list;
+}
+
+event_legacy_tracepoint:
+tracepoint_name opt_event_config
+{
+ struct parse_events_state *parse_state = _parse_state;
+ struct parse_events_error *error = parse_state->error;
+ struct list_head *list;
+
+ ALLOC_LIST(list);
+ if (error)
+ error->idx = @1.first_column;
+
+ if (parse_events_add_tracepoint(list, &parse_state->idx, $1.sys, $1.event,
+ error, $2))
+ return -1;
+
+ $$ = list;
+}
+
+tracepoint_name:
+PE_NAME '-' PE_NAME ':' PE_NAME
+{
+ char sys_name[128];
+ struct tracepoint_name tracepoint;
+
+ snprintf(&sys_name, 128, "%s-%s", $1, $3);
+ tracepoint.sys = &sys_name;
+ tracepoint.event = $5;
+
+ $$ = tracepoint;
+}
+|
+PE_NAME ':' PE_NAME
+{
+ struct tracepoint_name tracepoint = {$1, $3};
+
+ $$ = tracepoint;
+}
+
+event_legacy_numeric:
+PE_VALUE ':' PE_VALUE opt_event_config
+{
+ struct list_head *list;
+
+ ALLOC_LIST(list);
+ ABORT_ON(parse_events_add_numeric(_parse_state, list, (u32)$1, $3, $4));
+ parse_events_terms__delete($4);
+ $$ = list;
+}
+
+event_legacy_raw:
+PE_RAW opt_event_config
+{
+ struct list_head *list;
+
+ ALLOC_LIST(list);
+ ABORT_ON(parse_events_add_numeric(_parse_state, list, PERF_TYPE_RAW, $1, $2));
+ parse_events_terms__delete($2);
+ $$ = list;
+}
+
+event_bpf_file:
+PE_BPF_OBJECT opt_event_config
+{
+ struct parse_events_state *parse_state = _parse_state;
+ struct parse_events_error *error = parse_state->error;
+ struct list_head *list;
+
+ ALLOC_LIST(list);
+ ABORT_ON(parse_events_load_bpf(parse_state, list, $1, false, $2));
+ parse_events_terms__delete($2);
+ $$ = list;
+}
+|
+PE_BPF_SOURCE opt_event_config
+{
+ struct list_head *list;
+
+ ALLOC_LIST(list);
+ ABORT_ON(parse_events_load_bpf(_parse_state, list, $1, true, $2));
+ parse_events_terms__delete($2);
+ $$ = list;
+}
+
+opt_event_config:
+'/' event_config '/'
+{
+ $$ = $2;
+}
+|
+'/' '/'
+{
+ $$ = NULL;
+}
+|
+{
+ $$ = NULL;
+}
+
+opt_pmu_config:
+'/' event_config '/'
+{
+ $$ = $2;
+}
+|
+'/' '/'
+{
+ $$ = NULL;
+}
+
+start_terms: event_config
+{
+ struct parse_events_state *parse_state = _parse_state;
+ parse_state->terms = $1;
+}
+
+event_config:
+event_config ',' event_term
+{
+ struct list_head *head = $1;
+ struct parse_events_term *term = $3;
+
+ ABORT_ON(!head);
+ list_add_tail(&term->list, head);
+ $$ = $1;
+}
+|
+event_term
+{
+ struct list_head *head = malloc(sizeof(*head));
+ struct parse_events_term *term = $1;
+
+ ABORT_ON(!head);
+ INIT_LIST_HEAD(head);
+ list_add_tail(&term->list, head);
+ $$ = head;
+}
+
+event_term:
+PE_NAME '=' PE_NAME
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ $1, $3, &@1, &@3));
+ $$ = term;
+}
+|
+PE_NAME '=' PE_VALUE
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ $1, $3, false, &@1, &@3));
+ $$ = term;
+}
+|
+PE_NAME '=' PE_VALUE_SYM_HW
+{
+ struct parse_events_term *term;
+ int config = $3 & 255;
+
+ ABORT_ON(parse_events_term__sym_hw(&term, $1, config));
+ $$ = term;
+}
+|
+PE_NAME
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ $1, 1, true, &@1, NULL));
+ $$ = term;
+}
+|
+PE_VALUE_SYM_HW
+{
+ struct parse_events_term *term;
+ int config = $1 & 255;
+
+ ABORT_ON(parse_events_term__sym_hw(&term, NULL, config));
+ $$ = term;
+}
+|
+PE_TERM '=' PE_NAME
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3));
+ $$ = term;
+}
+|
+PE_TERM '=' PE_VALUE
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, false, &@1, &@3));
+ $$ = term;
+}
+|
+PE_TERM
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, true, &@1, NULL));
+ $$ = term;
+}
+|
+PE_NAME array '=' PE_NAME
+{
+ struct parse_events_term *term;
+ int i;
+
+ ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ $1, $4, &@1, &@4));
+
+ term->array = $2;
+ $$ = term;
+}
+|
+PE_NAME array '=' PE_VALUE
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
+ $1, $4, false, &@1, &@4));
+ term->array = $2;
+ $$ = term;
+}
+|
+PE_DRV_CFG_TERM
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_DRV_CFG,
+ $1, $1, &@1, NULL));
+ $$ = term;
+}
+
+array:
+'[' array_terms ']'
+{
+ $$ = $2;
+}
+|
+PE_ARRAY_ALL
+{
+ $$.nr_ranges = 0;
+ $$.ranges = NULL;
+}
+
+array_terms:
+array_terms ',' array_term
+{
+ struct parse_events_array new_array;
+
+ new_array.nr_ranges = $1.nr_ranges + $3.nr_ranges;
+ new_array.ranges = malloc(sizeof(new_array.ranges[0]) *
+ new_array.nr_ranges);
+ ABORT_ON(!new_array.ranges);
+ memcpy(&new_array.ranges[0], $1.ranges,
+ $1.nr_ranges * sizeof(new_array.ranges[0]));
+ memcpy(&new_array.ranges[$1.nr_ranges], $3.ranges,
+ $3.nr_ranges * sizeof(new_array.ranges[0]));
+ free($1.ranges);
+ free($3.ranges);
+ $$ = new_array;
+}
+|
+array_term
+
+array_term:
+PE_VALUE
+{
+ struct parse_events_array array;
+
+ array.nr_ranges = 1;
+ array.ranges = malloc(sizeof(array.ranges[0]));
+ ABORT_ON(!array.ranges);
+ array.ranges[0].start = $1;
+ array.ranges[0].length = 1;
+ $$ = array;
+}
+|
+PE_VALUE PE_ARRAY_RANGE PE_VALUE
+{
+ struct parse_events_array array;
+
+ ABORT_ON($3 < $1);
+ array.nr_ranges = 1;
+ array.ranges = malloc(sizeof(array.ranges[0]));
+ ABORT_ON(!array.ranges);
+ array.ranges[0].start = $1;
+ array.ranges[0].length = $3 - $1 + 1;
+ $$ = array;
+}
+
+sep_dc: ':' |
+
+sep_slash_dc: '/' | ':' |
+
+%%
+
+void parse_events_error(YYLTYPE *loc, void *parse_state,
+ void *scanner __maybe_unused,
+ char const *msg __maybe_unused)
+{
+ parse_events_evlist_error(parse_state, loc->last_column, "parser error");
+}
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
new file mode 100644
index 000000000..e5ad120e7
--- /dev/null
+++ b/tools/perf/util/parse-regs-options.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "perf.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include <subcmd/parse-options.h>
+#include "util/parse-regs-options.h"
+
+int
+parse_regs(const struct option *opt, const char *str, int unset)
+{
+ uint64_t *mode = (uint64_t *)opt->value;
+ const struct sample_reg *r;
+ char *s, *os = NULL, *p;
+ int ret = -1;
+
+ if (unset)
+ return 0;
+
+ /*
+ * cannot set it twice
+ */
+ if (*mode)
+ return -1;
+
+ /* str may be NULL in case no arg is passed to -I */
+ if (str) {
+ /* because str is read-only */
+ s = os = strdup(str);
+ if (!s)
+ return -1;
+
+ for (;;) {
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ if (!strcmp(s, "?")) {
+ fprintf(stderr, "available registers: ");
+ for (r = sample_reg_masks; r->name; r++) {
+ fprintf(stderr, "%s ", r->name);
+ }
+ fputc('\n', stderr);
+ /* just printing available regs */
+ goto error;
+ }
+ for (r = sample_reg_masks; r->name; r++) {
+ if (!strcasecmp(s, r->name))
+ break;
+ }
+ if (!r->name) {
+ ui__warning("unknown register %s,"
+ " check man page\n", s);
+ goto error;
+ }
+
+ *mode |= r->mask;
+
+ if (!p)
+ break;
+
+ s = p + 1;
+ }
+ }
+ ret = 0;
+
+ /* default to all possible regs */
+ if (*mode == 0)
+ *mode = PERF_REGS_MASK;
+error:
+ free(os);
+ return ret;
+}
diff --git a/tools/perf/util/parse-regs-options.h b/tools/perf/util/parse-regs-options.h
new file mode 100644
index 000000000..cdefb1acf
--- /dev/null
+++ b/tools/perf/util/parse-regs-options.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PARSE_REGS_OPTIONS_H
+#define _PERF_PARSE_REGS_OPTIONS_H 1
+struct option;
+int parse_regs(const struct option *opt, const char *str, int unset);
+#endif /* _PERF_PARSE_REGS_OPTIONS_H */
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
new file mode 100644
index 000000000..ca56ba2dd
--- /dev/null
+++ b/tools/perf/util/path.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * I'm tired of doing "vsnprintf()" etc just to open a
+ * file, so here's a "return static buffer with printf"
+ * interface for paths.
+ *
+ * It's obviously not thread-safe. Sue me. But it's quite
+ * useful for doing things like
+ *
+ * f = open(mkpath("%s/%s.perf", base, name), O_RDONLY);
+ *
+ * which is what it's designed for.
+ */
+#include "cache.h"
+#include "path.h"
+#include <linux/kernel.h>
+#include <limits.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <unistd.h>
+
+static char bad_path[] = "/bad-path/";
+/*
+ * One hack:
+ */
+static char *get_pathname(void)
+{
+ static char pathname_array[4][PATH_MAX];
+ static int idx;
+
+ return pathname_array[3 & ++idx];
+}
+
+static char *cleanup_path(char *path)
+{
+ /* Clean it up */
+ if (!memcmp(path, "./", 2)) {
+ path += 2;
+ while (*path == '/')
+ path++;
+ }
+ return path;
+}
+
+char *mkpath(const char *fmt, ...)
+{
+ va_list args;
+ unsigned len;
+ char *pathname = get_pathname();
+
+ va_start(args, fmt);
+ len = vsnprintf(pathname, PATH_MAX, fmt, args);
+ va_end(args);
+ if (len >= PATH_MAX)
+ return bad_path;
+ return cleanup_path(pathname);
+}
+
+int path__join(char *bf, size_t size, const char *path1, const char *path2)
+{
+ return scnprintf(bf, size, "%s%s%s", path1, path1[0] ? "/" : "", path2);
+}
+
+int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3)
+{
+ return scnprintf(bf, size, "%s%s%s%s%s", path1, path1[0] ? "/" : "",
+ path2, path2[0] ? "/" : "", path3);
+}
+
+bool is_regular_file(const char *file)
+{
+ struct stat st;
+
+ if (stat(file, &st))
+ return false;
+
+ return S_ISREG(st.st_mode);
+}
+
+/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */
+bool is_directory(const char *base_path, const struct dirent *dent)
+{
+ char path[PATH_MAX];
+ struct stat st;
+
+ sprintf(path, "%s/%s", base_path, dent->d_name);
+ if (stat(path, &st))
+ return false;
+
+ return S_ISDIR(st.st_mode);
+}
diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h
new file mode 100644
index 000000000..f014f905d
--- /dev/null
+++ b/tools/perf/util/path.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PATH_H
+#define _PERF_PATH_H
+
+struct dirent;
+
+int path__join(char *bf, size_t size, const char *path1, const char *path2);
+int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3);
+
+bool is_regular_file(const char *file);
+bool is_directory(const char *base_path, const struct dirent *dent);
+
+#endif /* _PERF_PATH_H */
diff --git a/tools/perf/util/perf-hooks-list.h b/tools/perf/util/perf-hooks-list.h
new file mode 100644
index 000000000..2867c07ee
--- /dev/null
+++ b/tools/perf/util/perf-hooks-list.h
@@ -0,0 +1,3 @@
+PERF_HOOK(record_start)
+PERF_HOOK(record_end)
+PERF_HOOK(test)
diff --git a/tools/perf/util/perf-hooks.c b/tools/perf/util/perf-hooks.c
new file mode 100644
index 000000000..4f3aa8d99
--- /dev/null
+++ b/tools/perf/util/perf-hooks.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * perf_hooks.c
+ *
+ * Copyright (C) 2016 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2016 Huawei Inc.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/perf-hooks.h"
+
+static sigjmp_buf jmpbuf;
+static const struct perf_hook_desc *current_perf_hook;
+
+void perf_hooks__invoke(const struct perf_hook_desc *desc)
+{
+ if (!(desc && desc->p_hook_func && *desc->p_hook_func))
+ return;
+
+ if (sigsetjmp(jmpbuf, 1)) {
+ pr_warning("Fatal error (SEGFAULT) in perf hook '%s'\n",
+ desc->hook_name);
+ *(current_perf_hook->p_hook_func) = NULL;
+ } else {
+ current_perf_hook = desc;
+ (**desc->p_hook_func)(desc->hook_ctx);
+ }
+ current_perf_hook = NULL;
+}
+
+void perf_hooks__recover(void)
+{
+ if (current_perf_hook)
+ siglongjmp(jmpbuf, 1);
+}
+
+#define PERF_HOOK(name) \
+perf_hook_func_t __perf_hook_func_##name = NULL; \
+struct perf_hook_desc __perf_hook_desc_##name = \
+ {.hook_name = #name, \
+ .p_hook_func = &__perf_hook_func_##name, \
+ .hook_ctx = NULL};
+#include "perf-hooks-list.h"
+#undef PERF_HOOK
+
+#define PERF_HOOK(name) \
+ &__perf_hook_desc_##name,
+
+static struct perf_hook_desc *perf_hooks[] = {
+#include "perf-hooks-list.h"
+};
+#undef PERF_HOOK
+
+int perf_hooks__set_hook(const char *hook_name,
+ perf_hook_func_t hook_func,
+ void *hook_ctx)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(perf_hooks); i++) {
+ if (strcmp(hook_name, perf_hooks[i]->hook_name) != 0)
+ continue;
+
+ if (*(perf_hooks[i]->p_hook_func))
+ pr_warning("Overwrite existing hook: %s\n", hook_name);
+ *(perf_hooks[i]->p_hook_func) = hook_func;
+ perf_hooks[i]->hook_ctx = hook_ctx;
+ return 0;
+ }
+ return -ENOENT;
+}
+
+perf_hook_func_t perf_hooks__get_hook(const char *hook_name)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(perf_hooks); i++) {
+ if (strcmp(hook_name, perf_hooks[i]->hook_name) != 0)
+ continue;
+
+ return *(perf_hooks[i]->p_hook_func);
+ }
+ return ERR_PTR(-ENOENT);
+}
diff --git a/tools/perf/util/perf-hooks.h b/tools/perf/util/perf-hooks.h
new file mode 100644
index 000000000..27fbec62d
--- /dev/null
+++ b/tools/perf/util/perf-hooks.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UTIL_PERF_HOOKS_H
+#define PERF_UTIL_PERF_HOOKS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void (*perf_hook_func_t)(void *ctx);
+struct perf_hook_desc {
+ const char * const hook_name;
+ perf_hook_func_t * const p_hook_func;
+ void *hook_ctx;
+};
+
+extern void perf_hooks__invoke(const struct perf_hook_desc *);
+extern void perf_hooks__recover(void);
+
+#define PERF_HOOK(name) \
+extern struct perf_hook_desc __perf_hook_desc_##name; \
+static inline void perf_hooks__invoke_##name(void) \
+{ \
+ perf_hooks__invoke(&__perf_hook_desc_##name); \
+}
+
+#include "perf-hooks-list.h"
+#undef PERF_HOOK
+
+extern int
+perf_hooks__set_hook(const char *hook_name,
+ perf_hook_func_t hook_func,
+ void *hook_ctx);
+
+extern perf_hook_func_t
+perf_hooks__get_hook(const char *hook_name);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
new file mode 100644
index 000000000..2acfcc527
--- /dev/null
+++ b/tools/perf/util/perf_regs.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include "perf_regs.h"
+#include "event.h"
+
+const struct sample_reg __weak sample_reg_masks[] = {
+ SMPL_REG_END
+};
+
+int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
+ char **new_op __maybe_unused)
+{
+ return SDT_ARG_SKIP;
+}
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+int perf_reg_value(u64 *valp, struct regs_dump *regs, int id)
+{
+ int i, idx = 0;
+ u64 mask = regs->mask;
+
+ if (regs->cache_mask & (1ULL << id))
+ goto out;
+
+ if (!(mask & (1ULL << id)))
+ return -EINVAL;
+
+ for (i = 0; i < id; i++) {
+ if (mask & (1ULL << i))
+ idx++;
+ }
+
+ regs->cache_mask |= (1ULL << id);
+ regs->cache_regs[id] = regs->regs[idx];
+
+out:
+ *valp = regs->cache_regs[id];
+ return 0;
+}
+#endif
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
new file mode 100644
index 000000000..f732e3af2
--- /dev/null
+++ b/tools/perf/util/perf_regs.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_REGS_H
+#define __PERF_REGS_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+
+struct regs_dump;
+
+struct sample_reg {
+ const char *name;
+ uint64_t mask;
+};
+#define SMPL_REG(n, b) { .name = #n, .mask = 1ULL << (b) }
+#define SMPL_REG_END { .name = NULL }
+
+extern const struct sample_reg sample_reg_masks[];
+
+enum {
+ SDT_ARG_VALID = 0,
+ SDT_ARG_SKIP,
+};
+
+int arch_sdt_arg_parse_op(char *old_op, char **new_op);
+
+#ifdef HAVE_PERF_REGS_SUPPORT
+#include <perf_regs.h>
+
+int perf_reg_value(u64 *valp, struct regs_dump *regs, int id);
+
+#else
+#define PERF_REGS_MASK 0
+#define PERF_REGS_MAX 0
+
+static inline const char *perf_reg_name(int id __maybe_unused)
+{
+ return "unknown";
+}
+
+static inline int perf_reg_value(u64 *valp __maybe_unused,
+ struct regs_dump *regs __maybe_unused,
+ int id __maybe_unused)
+{
+ return 0;
+}
+#endif /* HAVE_PERF_REGS_SUPPORT */
+#endif /* __PERF_REGS_H */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
new file mode 100644
index 000000000..c42054f42
--- /dev/null
+++ b/tools/perf/util/pmu.c
@@ -0,0 +1,1553 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/list.h>
+#include <linux/compiler.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <dirent.h>
+#include <api/fs/fs.h>
+#include <locale.h>
+#include <regex.h>
+#include "util.h"
+#include "pmu.h"
+#include "parse-events.h"
+#include "cpumap.h"
+#include "header.h"
+#include "pmu-events/pmu-events.h"
+#include "cache.h"
+#include "string2.h"
+
+struct perf_pmu_format {
+ char *name;
+ int value;
+ DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
+ struct list_head list;
+};
+
+#define EVENT_SOURCE_DEVICE_PATH "/bus/event_source/devices/"
+
+int perf_pmu_parse(struct list_head *list, char *name);
+extern FILE *perf_pmu_in;
+
+static LIST_HEAD(pmus);
+
+/*
+ * Parse & process all the sysfs attributes located under
+ * the directory specified in 'dir' parameter.
+ */
+int perf_pmu__format_parse(char *dir, struct list_head *head)
+{
+ struct dirent *evt_ent;
+ DIR *format_dir;
+ int ret = 0;
+
+ format_dir = opendir(dir);
+ if (!format_dir)
+ return -EINVAL;
+
+ while (!ret && (evt_ent = readdir(format_dir))) {
+ char path[PATH_MAX];
+ char *name = evt_ent->d_name;
+ FILE *file;
+
+ if (!strcmp(name, ".") || !strcmp(name, ".."))
+ continue;
+
+ snprintf(path, PATH_MAX, "%s/%s", dir, name);
+
+ ret = -EINVAL;
+ file = fopen(path, "r");
+ if (!file)
+ break;
+
+ perf_pmu_in = file;
+ ret = perf_pmu_parse(head, name);
+ fclose(file);
+ }
+
+ closedir(format_dir);
+ return ret;
+}
+
+/*
+ * Reading/parsing the default pmu format definition, which should be
+ * located at:
+ * /sys/bus/event_source/devices/<dev>/format as sysfs group attributes.
+ */
+static int pmu_format(const char *name, struct list_head *format)
+{
+ struct stat st;
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return -1;
+
+ snprintf(path, PATH_MAX,
+ "%s" EVENT_SOURCE_DEVICE_PATH "%s/format", sysfs, name);
+
+ if (stat(path, &st) < 0)
+ return 0; /* no error if format does not exist */
+
+ if (perf_pmu__format_parse(path, format))
+ return -1;
+
+ return 0;
+}
+
+static int convert_scale(const char *scale, char **end, double *sval)
+{
+ char *lc;
+ int ret = 0;
+
+ /*
+ * save current locale
+ */
+ lc = setlocale(LC_NUMERIC, NULL);
+
+ /*
+ * The lc string may be allocated in static storage,
+ * so get a dynamic copy to make it survive setlocale
+ * call below.
+ */
+ lc = strdup(lc);
+ if (!lc) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * force to C locale to ensure kernel
+ * scale string is converted correctly.
+ * kernel uses default C locale.
+ */
+ setlocale(LC_NUMERIC, "C");
+
+ *sval = strtod(scale, end);
+
+out:
+ /* restore locale */
+ setlocale(LC_NUMERIC, lc);
+ free(lc);
+ return ret;
+}
+
+static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *name)
+{
+ struct stat st;
+ ssize_t sret;
+ char scale[128];
+ int fd, ret = -1;
+ char path[PATH_MAX];
+
+ scnprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return -1;
+
+ if (fstat(fd, &st) < 0)
+ goto error;
+
+ sret = read(fd, scale, sizeof(scale)-1);
+ if (sret < 0)
+ goto error;
+
+ if (scale[sret - 1] == '\n')
+ scale[sret - 1] = '\0';
+ else
+ scale[sret] = '\0';
+
+ ret = convert_scale(scale, NULL, &alias->scale);
+error:
+ close(fd);
+ return ret;
+}
+
+static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *name)
+{
+ char path[PATH_MAX];
+ ssize_t sret;
+ int fd;
+
+ scnprintf(path, PATH_MAX, "%s/%s.unit", dir, name);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return -1;
+
+ sret = read(fd, alias->unit, UNIT_MAX_LEN);
+ if (sret < 0)
+ goto error;
+
+ close(fd);
+
+ if (alias->unit[sret - 1] == '\n')
+ alias->unit[sret - 1] = '\0';
+ else
+ alias->unit[sret] = '\0';
+
+ return 0;
+error:
+ close(fd);
+ alias->unit[0] = '\0';
+ return -1;
+}
+
+static int
+perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, char *dir, char *name)
+{
+ char path[PATH_MAX];
+ int fd;
+
+ scnprintf(path, PATH_MAX, "%s/%s.per-pkg", dir, name);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return -1;
+
+ close(fd);
+
+ alias->per_pkg = true;
+ return 0;
+}
+
+static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias,
+ char *dir, char *name)
+{
+ char path[PATH_MAX];
+ int fd;
+
+ scnprintf(path, PATH_MAX, "%s/%s.snapshot", dir, name);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return -1;
+
+ alias->snapshot = true;
+ close(fd);
+ return 0;
+}
+
+static void perf_pmu_assign_str(char *name, const char *field, char **old_str,
+ char **new_str)
+{
+ if (!*old_str)
+ goto set_new;
+
+ if (*new_str) { /* Have new string, check with old */
+ if (strcasecmp(*old_str, *new_str))
+ pr_debug("alias %s differs in field '%s'\n",
+ name, field);
+ zfree(old_str);
+ } else /* Nothing new --> keep old string */
+ return;
+set_new:
+ *old_str = *new_str;
+ *new_str = NULL;
+}
+
+static void perf_pmu_update_alias(struct perf_pmu_alias *old,
+ struct perf_pmu_alias *newalias)
+{
+ perf_pmu_assign_str(old->name, "desc", &old->desc, &newalias->desc);
+ perf_pmu_assign_str(old->name, "long_desc", &old->long_desc,
+ &newalias->long_desc);
+ perf_pmu_assign_str(old->name, "topic", &old->topic, &newalias->topic);
+ perf_pmu_assign_str(old->name, "metric_expr", &old->metric_expr,
+ &newalias->metric_expr);
+ perf_pmu_assign_str(old->name, "metric_name", &old->metric_name,
+ &newalias->metric_name);
+ perf_pmu_assign_str(old->name, "value", &old->str, &newalias->str);
+ old->scale = newalias->scale;
+ old->per_pkg = newalias->per_pkg;
+ old->snapshot = newalias->snapshot;
+ memcpy(old->unit, newalias->unit, sizeof(old->unit));
+}
+
+/* Delete an alias entry. */
+static void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
+{
+ zfree(&newalias->name);
+ zfree(&newalias->desc);
+ zfree(&newalias->long_desc);
+ zfree(&newalias->topic);
+ zfree(&newalias->str);
+ zfree(&newalias->metric_expr);
+ zfree(&newalias->metric_name);
+ parse_events_terms__purge(&newalias->terms);
+ free(newalias);
+}
+
+/* Merge an alias, search in alias list. If this name is already
+ * present merge both of them to combine all information.
+ */
+static bool perf_pmu_merge_alias(struct perf_pmu_alias *newalias,
+ struct list_head *alist)
+{
+ struct perf_pmu_alias *a;
+
+ list_for_each_entry(a, alist, list) {
+ if (!strcasecmp(newalias->name, a->name)) {
+ perf_pmu_update_alias(a, newalias);
+ perf_pmu_free_alias(newalias);
+ return true;
+ }
+ }
+ return false;
+}
+
+static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name,
+ char *desc, char *val,
+ char *long_desc, char *topic,
+ char *unit, char *perpkg,
+ char *metric_expr,
+ char *metric_name)
+{
+ struct parse_events_term *term;
+ struct perf_pmu_alias *alias;
+ int ret;
+ int num;
+ char newval[256];
+
+ alias = malloc(sizeof(*alias));
+ if (!alias)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&alias->terms);
+ alias->scale = 1.0;
+ alias->unit[0] = '\0';
+ alias->per_pkg = false;
+ alias->snapshot = false;
+
+ ret = parse_events_terms(&alias->terms, val);
+ if (ret) {
+ pr_err("Cannot parse alias %s: %d\n", val, ret);
+ free(alias);
+ return ret;
+ }
+
+ /* Scan event and remove leading zeroes, spaces, newlines, some
+ * platforms have terms specified as
+ * event=0x0091 (read from files ../<PMU>/events/<FILE>
+ * and terms specified as event=0x91 (read from JSON files).
+ *
+ * Rebuild string to make alias->str member comparable.
+ */
+ memset(newval, 0, sizeof(newval));
+ ret = 0;
+ list_for_each_entry(term, &alias->terms, list) {
+ if (ret)
+ ret += scnprintf(newval + ret, sizeof(newval) - ret,
+ ",");
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM)
+ ret += scnprintf(newval + ret, sizeof(newval) - ret,
+ "%s=%#x", term->config, term->val.num);
+ else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
+ ret += scnprintf(newval + ret, sizeof(newval) - ret,
+ "%s=%s", term->config, term->val.str);
+ }
+
+ alias->name = strdup(name);
+ if (dir) {
+ /*
+ * load unit name and scale if available
+ */
+ perf_pmu__parse_unit(alias, dir, name);
+ perf_pmu__parse_scale(alias, dir, name);
+ perf_pmu__parse_per_pkg(alias, dir, name);
+ perf_pmu__parse_snapshot(alias, dir, name);
+ }
+
+ alias->metric_expr = metric_expr ? strdup(metric_expr) : NULL;
+ alias->metric_name = metric_name ? strdup(metric_name): NULL;
+ alias->desc = desc ? strdup(desc) : NULL;
+ alias->long_desc = long_desc ? strdup(long_desc) :
+ desc ? strdup(desc) : NULL;
+ alias->topic = topic ? strdup(topic) : NULL;
+ if (unit) {
+ if (convert_scale(unit, &unit, &alias->scale) < 0)
+ return -1;
+ snprintf(alias->unit, sizeof(alias->unit), "%s", unit);
+ }
+ alias->per_pkg = perpkg && sscanf(perpkg, "%d", &num) == 1 && num == 1;
+ alias->str = strdup(newval);
+
+ if (!perf_pmu_merge_alias(alias, list))
+ list_add_tail(&alias->list, list);
+
+ return 0;
+}
+
+static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file)
+{
+ char buf[256];
+ int ret;
+
+ ret = fread(buf, 1, sizeof(buf), file);
+ if (ret == 0)
+ return -EINVAL;
+
+ buf[ret] = 0;
+
+ /* Remove trailing newline from sysfs file */
+ rtrim(buf);
+
+ return __perf_pmu__new_alias(list, dir, name, NULL, buf, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+}
+
+static inline bool pmu_alias_info_file(char *name)
+{
+ size_t len;
+
+ len = strlen(name);
+ if (len > 5 && !strcmp(name + len - 5, ".unit"))
+ return true;
+ if (len > 6 && !strcmp(name + len - 6, ".scale"))
+ return true;
+ if (len > 8 && !strcmp(name + len - 8, ".per-pkg"))
+ return true;
+ if (len > 9 && !strcmp(name + len - 9, ".snapshot"))
+ return true;
+
+ return false;
+}
+
+/*
+ * Process all the sysfs attributes located under the directory
+ * specified in 'dir' parameter.
+ */
+static int pmu_aliases_parse(char *dir, struct list_head *head)
+{
+ struct dirent *evt_ent;
+ DIR *event_dir;
+
+ event_dir = opendir(dir);
+ if (!event_dir)
+ return -EINVAL;
+
+ while ((evt_ent = readdir(event_dir))) {
+ char path[PATH_MAX];
+ char *name = evt_ent->d_name;
+ FILE *file;
+
+ if (!strcmp(name, ".") || !strcmp(name, ".."))
+ continue;
+
+ /*
+ * skip info files parsed in perf_pmu__new_alias()
+ */
+ if (pmu_alias_info_file(name))
+ continue;
+
+ scnprintf(path, PATH_MAX, "%s/%s", dir, name);
+
+ file = fopen(path, "r");
+ if (!file) {
+ pr_debug("Cannot open %s\n", path);
+ continue;
+ }
+
+ if (perf_pmu__new_alias(head, dir, name, file) < 0)
+ pr_debug("Cannot set up %s\n", name);
+ fclose(file);
+ }
+
+ closedir(event_dir);
+ return 0;
+}
+
+/*
+ * Reading the pmu event aliases definition, which should be located at:
+ * /sys/bus/event_source/devices/<dev>/events as sysfs group attributes.
+ */
+static int pmu_aliases(const char *name, struct list_head *head)
+{
+ struct stat st;
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return -1;
+
+ snprintf(path, PATH_MAX,
+ "%s/bus/event_source/devices/%s/events", sysfs, name);
+
+ if (stat(path, &st) < 0)
+ return 0; /* no error if 'events' does not exist */
+
+ if (pmu_aliases_parse(path, head))
+ return -1;
+
+ return 0;
+}
+
+static int pmu_alias_terms(struct perf_pmu_alias *alias,
+ struct list_head *terms)
+{
+ struct parse_events_term *term, *cloned;
+ LIST_HEAD(list);
+ int ret;
+
+ list_for_each_entry(term, &alias->terms, list) {
+ ret = parse_events_term__clone(&cloned, term);
+ if (ret) {
+ parse_events_terms__purge(&list);
+ return ret;
+ }
+ /*
+ * Weak terms don't override command line options,
+ * which we don't want for implicit terms in aliases.
+ */
+ cloned->weak = true;
+ list_add_tail(&cloned->list, &list);
+ }
+ list_splice(&list, terms);
+ return 0;
+}
+
+/*
+ * Reading/parsing the default pmu type value, which should be
+ * located at:
+ * /sys/bus/event_source/devices/<dev>/type as sysfs attribute.
+ */
+static int pmu_type(const char *name, __u32 *type)
+{
+ struct stat st;
+ char path[PATH_MAX];
+ FILE *file;
+ int ret = 0;
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return -1;
+
+ snprintf(path, PATH_MAX,
+ "%s" EVENT_SOURCE_DEVICE_PATH "%s/type", sysfs, name);
+
+ if (stat(path, &st) < 0)
+ return -1;
+
+ file = fopen(path, "r");
+ if (!file)
+ return -EINVAL;
+
+ if (1 != fscanf(file, "%u", type))
+ ret = -1;
+
+ fclose(file);
+ return ret;
+}
+
+/* Add all pmus in sysfs to pmu list: */
+static void pmu_read_sysfs(void)
+{
+ char path[PATH_MAX];
+ DIR *dir;
+ struct dirent *dent;
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return;
+
+ snprintf(path, PATH_MAX,
+ "%s" EVENT_SOURCE_DEVICE_PATH, sysfs);
+
+ dir = opendir(path);
+ if (!dir)
+ return;
+
+ while ((dent = readdir(dir))) {
+ if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
+ continue;
+ /* add to static LIST_HEAD(pmus): */
+ perf_pmu__find(dent->d_name);
+ }
+
+ closedir(dir);
+}
+
+static struct cpu_map *__pmu_cpumask(const char *path)
+{
+ FILE *file;
+ struct cpu_map *cpus;
+
+ file = fopen(path, "r");
+ if (!file)
+ return NULL;
+
+ cpus = cpu_map__read(file);
+ fclose(file);
+ return cpus;
+}
+
+/*
+ * Uncore PMUs have a "cpumask" file under sysfs. CPU PMUs (e.g. on arm/arm64)
+ * may have a "cpus" file.
+ */
+#define CPUS_TEMPLATE_UNCORE "%s/bus/event_source/devices/%s/cpumask"
+#define CPUS_TEMPLATE_CPU "%s/bus/event_source/devices/%s/cpus"
+
+static struct cpu_map *pmu_cpumask(const char *name)
+{
+ char path[PATH_MAX];
+ struct cpu_map *cpus;
+ const char *sysfs = sysfs__mountpoint();
+ const char *templates[] = {
+ CPUS_TEMPLATE_UNCORE,
+ CPUS_TEMPLATE_CPU,
+ NULL
+ };
+ const char **template;
+
+ if (!sysfs)
+ return NULL;
+
+ for (template = templates; *template; template++) {
+ snprintf(path, PATH_MAX, *template, sysfs, name);
+ cpus = __pmu_cpumask(path);
+ if (cpus)
+ return cpus;
+ }
+
+ return NULL;
+}
+
+static bool pmu_is_uncore(const char *name)
+{
+ char path[PATH_MAX];
+ struct cpu_map *cpus;
+ const char *sysfs = sysfs__mountpoint();
+
+ snprintf(path, PATH_MAX, CPUS_TEMPLATE_UNCORE, sysfs, name);
+ cpus = __pmu_cpumask(path);
+ cpu_map__put(cpus);
+
+ return !!cpus;
+}
+
+/*
+ * PMU CORE devices have different name other than cpu in sysfs on some
+ * platforms.
+ * Looking for possible sysfs files to identify the arm core device.
+ */
+static int is_arm_pmu_core(const char *name)
+{
+ struct stat st;
+ char path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+
+ if (!sysfs)
+ return 0;
+
+ /* Look for cpu sysfs (specific to arm) */
+ scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus",
+ sysfs, name);
+ if (stat(path, &st) == 0)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Return the CPU id as a raw string.
+ *
+ * Each architecture should provide a more precise id string that
+ * can be use to match the architecture's "mapfile".
+ */
+char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+ return NULL;
+}
+
+/* Return zero when the cpuid from the mapfile.csv matches the
+ * cpuid string generated on this platform.
+ * Otherwise return non-zero.
+ */
+int strcmp_cpuid_str(const char *mapcpuid, const char *cpuid)
+{
+ regex_t re;
+ regmatch_t pmatch[1];
+ int match;
+
+ if (regcomp(&re, mapcpuid, REG_EXTENDED) != 0) {
+ /* Warn unable to generate match particular string. */
+ pr_info("Invalid regular expression %s\n", mapcpuid);
+ return 1;
+ }
+
+ match = !regexec(&re, cpuid, 1, pmatch, 0);
+ regfree(&re);
+ if (match) {
+ size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so);
+
+ /* Verify the entire string matched. */
+ if (match_len == strlen(cpuid))
+ return 0;
+ }
+ return 1;
+}
+
+static char *perf_pmu__getcpuid(struct perf_pmu *pmu)
+{
+ char *cpuid;
+ static bool printed;
+
+ cpuid = getenv("PERF_CPUID");
+ if (cpuid)
+ cpuid = strdup(cpuid);
+ if (!cpuid)
+ cpuid = get_cpuid_str(pmu);
+ if (!cpuid)
+ return NULL;
+
+ if (!printed) {
+ pr_debug("Using CPUID %s\n", cpuid);
+ printed = true;
+ }
+ return cpuid;
+}
+
+struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu)
+{
+ struct pmu_events_map *map;
+ char *cpuid = perf_pmu__getcpuid(pmu);
+ int i;
+
+ /* on some platforms which uses cpus map, cpuid can be NULL for
+ * PMUs other than CORE PMUs.
+ */
+ if (!cpuid)
+ return NULL;
+
+ i = 0;
+ for (;;) {
+ map = &pmu_events_map[i++];
+ if (!map->table) {
+ map = NULL;
+ break;
+ }
+
+ if (!strcmp_cpuid_str(map->cpuid, cpuid))
+ break;
+ }
+ free(cpuid);
+ return map;
+}
+
+/*
+ * From the pmu_events_map, find the table of PMU events that corresponds
+ * to the current running CPU. Then, add all PMU events from that table
+ * as aliases.
+ */
+static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
+{
+ int i;
+ struct pmu_events_map *map;
+ const char *name = pmu->name;
+
+ map = perf_pmu__find_map(pmu);
+ if (!map)
+ return;
+
+ /*
+ * Found a matching PMU events table. Create aliases
+ */
+ i = 0;
+ while (1) {
+ const char *cpu_name = is_arm_pmu_core(name) ? name : "cpu";
+ struct pmu_event *pe = &map->table[i++];
+ const char *pname = pe->pmu ? pe->pmu : cpu_name;
+
+ if (!pe->name) {
+ if (pe->metric_group || pe->metric_name)
+ continue;
+ break;
+ }
+
+ /*
+ * uncore alias may be from different PMU
+ * with common prefix
+ */
+ if (pmu_is_uncore(name) &&
+ !strncmp(pname, name, strlen(pname)))
+ goto new_alias;
+
+ if (strcmp(pname, name))
+ continue;
+
+new_alias:
+ /* need type casts to override 'const' */
+ __perf_pmu__new_alias(head, NULL, (char *)pe->name,
+ (char *)pe->desc, (char *)pe->event,
+ (char *)pe->long_desc, (char *)pe->topic,
+ (char *)pe->unit, (char *)pe->perpkg,
+ (char *)pe->metric_expr,
+ (char *)pe->metric_name);
+ }
+}
+
+struct perf_event_attr * __weak
+perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
+{
+ return NULL;
+}
+
+static struct perf_pmu *pmu_lookup(const char *name)
+{
+ struct perf_pmu *pmu;
+ LIST_HEAD(format);
+ LIST_HEAD(aliases);
+ __u32 type;
+
+ /*
+ * The pmu data we store & need consists of the pmu
+ * type value and format definitions. Load both right
+ * now.
+ */
+ if (pmu_format(name, &format))
+ return NULL;
+
+ /*
+ * Check the type first to avoid unnecessary work.
+ */
+ if (pmu_type(name, &type))
+ return NULL;
+
+ if (pmu_aliases(name, &aliases))
+ return NULL;
+
+ pmu = zalloc(sizeof(*pmu));
+ if (!pmu)
+ return NULL;
+
+ pmu->cpus = pmu_cpumask(name);
+ pmu->name = strdup(name);
+ pmu->type = type;
+ pmu->is_uncore = pmu_is_uncore(name);
+ pmu_add_cpu_aliases(&aliases, pmu);
+
+ INIT_LIST_HEAD(&pmu->format);
+ INIT_LIST_HEAD(&pmu->aliases);
+ list_splice(&format, &pmu->format);
+ list_splice(&aliases, &pmu->aliases);
+ list_add_tail(&pmu->list, &pmus);
+
+ pmu->default_config = perf_pmu__get_default_config(pmu);
+
+ return pmu;
+}
+
+static struct perf_pmu *pmu_find(const char *name)
+{
+ struct perf_pmu *pmu;
+
+ list_for_each_entry(pmu, &pmus, list)
+ if (!strcmp(pmu->name, name))
+ return pmu;
+
+ return NULL;
+}
+
+struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu)
+{
+ /*
+ * pmu iterator: If pmu is NULL, we start at the begin,
+ * otherwise return the next pmu. Returns NULL on end.
+ */
+ if (!pmu) {
+ pmu_read_sysfs();
+ pmu = list_prepare_entry(pmu, &pmus, list);
+ }
+ list_for_each_entry_continue(pmu, &pmus, list)
+ return pmu;
+ return NULL;
+}
+
+struct perf_pmu *perf_pmu__find(const char *name)
+{
+ struct perf_pmu *pmu;
+
+ /*
+ * Once PMU is loaded it stays in the list,
+ * so we keep us from multiple reading/parsing
+ * the pmu format definitions.
+ */
+ pmu = pmu_find(name);
+ if (pmu)
+ return pmu;
+
+ return pmu_lookup(name);
+}
+
+static struct perf_pmu_format *
+pmu_find_format(struct list_head *formats, const char *name)
+{
+ struct perf_pmu_format *format;
+
+ list_for_each_entry(format, formats, list)
+ if (!strcmp(format->name, name))
+ return format;
+
+ return NULL;
+}
+
+__u64 perf_pmu__format_bits(struct list_head *formats, const char *name)
+{
+ struct perf_pmu_format *format = pmu_find_format(formats, name);
+ __u64 bits = 0;
+ int fbit;
+
+ if (!format)
+ return 0;
+
+ for_each_set_bit(fbit, format->bits, PERF_PMU_FORMAT_BITS)
+ bits |= 1ULL << fbit;
+
+ return bits;
+}
+
+/*
+ * Sets value based on the format definition (format parameter)
+ * and unformated value (value parameter).
+ */
+static void pmu_format_value(unsigned long *format, __u64 value, __u64 *v,
+ bool zero)
+{
+ unsigned long fbit, vbit;
+
+ for (fbit = 0, vbit = 0; fbit < PERF_PMU_FORMAT_BITS; fbit++) {
+
+ if (!test_bit(fbit, format))
+ continue;
+
+ if (value & (1llu << vbit++))
+ *v |= (1llu << fbit);
+ else if (zero)
+ *v &= ~(1llu << fbit);
+ }
+}
+
+static __u64 pmu_format_max_value(const unsigned long *format)
+{
+ int w;
+
+ w = bitmap_weight(format, PERF_PMU_FORMAT_BITS);
+ if (!w)
+ return 0;
+ if (w < 64)
+ return (1ULL << w) - 1;
+ return -1;
+}
+
+/*
+ * Term is a string term, and might be a param-term. Try to look up it's value
+ * in the remaining terms.
+ * - We have a term like "base-or-format-term=param-term",
+ * - We need to find the value supplied for "param-term" (with param-term named
+ * in a config string) later on in the term list.
+ */
+static int pmu_resolve_param_term(struct parse_events_term *term,
+ struct list_head *head_terms,
+ __u64 *value)
+{
+ struct parse_events_term *t;
+
+ list_for_each_entry(t, head_terms, list) {
+ if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+ if (!strcmp(t->config, term->config)) {
+ t->used = true;
+ *value = t->val.num;
+ return 0;
+ }
+ }
+ }
+
+ if (verbose > 0)
+ printf("Required parameter '%s' not specified\n", term->config);
+
+ return -1;
+}
+
+static char *pmu_formats_string(struct list_head *formats)
+{
+ struct perf_pmu_format *format;
+ char *str = NULL;
+ struct strbuf buf = STRBUF_INIT;
+ unsigned i = 0;
+
+ if (!formats)
+ return NULL;
+
+ /* sysfs exported terms */
+ list_for_each_entry(format, formats, list)
+ if (strbuf_addf(&buf, i++ ? ",%s" : "%s", format->name) < 0)
+ goto error;
+
+ str = strbuf_detach(&buf, NULL);
+error:
+ strbuf_release(&buf);
+
+ return str;
+}
+
+/*
+ * Setup one of config[12] attr members based on the
+ * user input data - term parameter.
+ */
+static int pmu_config_term(struct list_head *formats,
+ struct perf_event_attr *attr,
+ struct parse_events_term *term,
+ struct list_head *head_terms,
+ bool zero, struct parse_events_error *err)
+{
+ struct perf_pmu_format *format;
+ __u64 *vp;
+ __u64 val, max_val;
+
+ /*
+ * If this is a parameter we've already used for parameterized-eval,
+ * skip it in normal eval.
+ */
+ if (term->used)
+ return 0;
+
+ /*
+ * Hardcoded terms should be already in, so nothing
+ * to be done for them.
+ */
+ if (parse_events__is_hardcoded_term(term))
+ return 0;
+
+ format = pmu_find_format(formats, term->config);
+ if (!format) {
+ if (verbose > 0)
+ printf("Invalid event/parameter '%s'\n", term->config);
+ if (err) {
+ char *pmu_term = pmu_formats_string(formats);
+
+ err->idx = term->err_term;
+ err->str = strdup("unknown term");
+ err->help = parse_events_formats_error_string(pmu_term);
+ free(pmu_term);
+ }
+ return -EINVAL;
+ }
+
+ switch (format->value) {
+ case PERF_PMU_FORMAT_VALUE_CONFIG:
+ vp = &attr->config;
+ break;
+ case PERF_PMU_FORMAT_VALUE_CONFIG1:
+ vp = &attr->config1;
+ break;
+ case PERF_PMU_FORMAT_VALUE_CONFIG2:
+ vp = &attr->config2;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ /*
+ * Either directly use a numeric term, or try to translate string terms
+ * using event parameters.
+ */
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+ if (term->no_value &&
+ bitmap_weight(format->bits, PERF_PMU_FORMAT_BITS) > 1) {
+ if (err) {
+ err->idx = term->err_val;
+ err->str = strdup("no value assigned for term");
+ }
+ return -EINVAL;
+ }
+
+ val = term->val.num;
+ } else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+ if (strcmp(term->val.str, "?")) {
+ if (verbose > 0) {
+ pr_info("Invalid sysfs entry %s=%s\n",
+ term->config, term->val.str);
+ }
+ if (err) {
+ err->idx = term->err_val;
+ err->str = strdup("expected numeric value");
+ }
+ return -EINVAL;
+ }
+
+ if (pmu_resolve_param_term(term, head_terms, &val))
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ max_val = pmu_format_max_value(format->bits);
+ if (val > max_val) {
+ if (err) {
+ err->idx = term->err_val;
+ if (asprintf(&err->str,
+ "value too big for format, maximum is %llu",
+ (unsigned long long)max_val) < 0)
+ err->str = strdup("value too big for format");
+ return -EINVAL;
+ }
+ /*
+ * Assume we don't care if !err, in which case the value will be
+ * silently truncated.
+ */
+ }
+
+ pmu_format_value(format->bits, val, vp, zero);
+ return 0;
+}
+
+int perf_pmu__config_terms(struct list_head *formats,
+ struct perf_event_attr *attr,
+ struct list_head *head_terms,
+ bool zero, struct parse_events_error *err)
+{
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, head_terms, list) {
+ if (pmu_config_term(formats, attr, term, head_terms,
+ zero, err))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Configures event's 'attr' parameter based on the:
+ * 1) users input - specified in terms parameter
+ * 2) pmu format definitions - specified by pmu parameter
+ */
+int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
+ struct list_head *head_terms,
+ struct parse_events_error *err)
+{
+ bool zero = !!pmu->default_config;
+
+ attr->type = pmu->type;
+ return perf_pmu__config_terms(&pmu->format, attr, head_terms,
+ zero, err);
+}
+
+static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
+ struct parse_events_term *term)
+{
+ struct perf_pmu_alias *alias;
+ char *name;
+
+ if (parse_events__is_hardcoded_term(term))
+ return NULL;
+
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
+ if (term->val.num != 1)
+ return NULL;
+ if (pmu_find_format(&pmu->format, term->config))
+ return NULL;
+ name = term->config;
+ } else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
+ if (strcasecmp(term->config, "event"))
+ return NULL;
+ name = term->val.str;
+ } else {
+ return NULL;
+ }
+
+ list_for_each_entry(alias, &pmu->aliases, list) {
+ if (!strcasecmp(alias->name, name))
+ return alias;
+ }
+ return NULL;
+}
+
+
+static int check_info_data(struct perf_pmu_alias *alias,
+ struct perf_pmu_info *info)
+{
+ /*
+ * Only one term in event definition can
+ * define unit, scale and snapshot, fail
+ * if there's more than one.
+ */
+ if ((info->unit && alias->unit[0]) ||
+ (info->scale && alias->scale) ||
+ (info->snapshot && alias->snapshot))
+ return -EINVAL;
+
+ if (alias->unit[0])
+ info->unit = alias->unit;
+
+ if (alias->scale)
+ info->scale = alias->scale;
+
+ if (alias->snapshot)
+ info->snapshot = alias->snapshot;
+
+ return 0;
+}
+
+/*
+ * Find alias in the terms list and replace it with the terms
+ * defined for the alias
+ */
+int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
+ struct perf_pmu_info *info)
+{
+ struct parse_events_term *term, *h;
+ struct perf_pmu_alias *alias;
+ int ret;
+
+ info->per_pkg = false;
+
+ /*
+ * Mark unit and scale as not set
+ * (different from default values, see below)
+ */
+ info->unit = NULL;
+ info->scale = 0.0;
+ info->snapshot = false;
+ info->metric_expr = NULL;
+ info->metric_name = NULL;
+
+ list_for_each_entry_safe(term, h, head_terms, list) {
+ alias = pmu_find_alias(pmu, term);
+ if (!alias)
+ continue;
+ ret = pmu_alias_terms(alias, &term->list);
+ if (ret)
+ return ret;
+
+ ret = check_info_data(alias, info);
+ if (ret)
+ return ret;
+
+ if (alias->per_pkg)
+ info->per_pkg = true;
+ info->metric_expr = alias->metric_expr;
+ info->metric_name = alias->metric_name;
+
+ list_del(&term->list);
+ free(term);
+ }
+
+ /*
+ * if no unit or scale foundin aliases, then
+ * set defaults as for evsel
+ * unit cannot left to NULL
+ */
+ if (info->unit == NULL)
+ info->unit = "";
+
+ if (info->scale == 0.0)
+ info->scale = 1.0;
+
+ return 0;
+}
+
+int perf_pmu__new_format(struct list_head *list, char *name,
+ int config, unsigned long *bits)
+{
+ struct perf_pmu_format *format;
+
+ format = zalloc(sizeof(*format));
+ if (!format)
+ return -ENOMEM;
+
+ format->name = strdup(name);
+ format->value = config;
+ memcpy(format->bits, bits, sizeof(format->bits));
+
+ list_add_tail(&format->list, list);
+ return 0;
+}
+
+void perf_pmu__set_format(unsigned long *bits, long from, long to)
+{
+ long b;
+
+ if (!to)
+ to = from;
+
+ memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS));
+ for (b = from; b <= to; b++)
+ set_bit(b, bits);
+}
+
+void perf_pmu__del_formats(struct list_head *formats)
+{
+ struct perf_pmu_format *fmt, *tmp;
+
+ list_for_each_entry_safe(fmt, tmp, formats, list) {
+ list_del(&fmt->list);
+ free(fmt->name);
+ free(fmt);
+ }
+}
+
+static int sub_non_neg(int a, int b)
+{
+ if (b > a)
+ return 0;
+ return a - b;
+}
+
+static char *format_alias(char *buf, int len, struct perf_pmu *pmu,
+ struct perf_pmu_alias *alias)
+{
+ struct parse_events_term *term;
+ int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name);
+
+ list_for_each_entry(term, &alias->terms, list) {
+ if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR)
+ used += snprintf(buf + used, sub_non_neg(len, used),
+ ",%s=%s", term->config,
+ term->val.str);
+ }
+
+ if (sub_non_neg(len, used) > 0) {
+ buf[used] = '/';
+ used++;
+ }
+ if (sub_non_neg(len, used) > 0) {
+ buf[used] = '\0';
+ used++;
+ } else
+ buf[len - 1] = '\0';
+
+ return buf;
+}
+
+static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu,
+ struct perf_pmu_alias *alias)
+{
+ snprintf(buf, len, "%s OR %s/%s/", alias->name, pmu->name, alias->name);
+ return buf;
+}
+
+struct sevent {
+ char *name;
+ char *desc;
+ char *topic;
+ char *str;
+ char *pmu;
+ char *metric_expr;
+ char *metric_name;
+};
+
+static int cmp_sevent(const void *a, const void *b)
+{
+ const struct sevent *as = a;
+ const struct sevent *bs = b;
+
+ /* Put extra events last */
+ if (!!as->desc != !!bs->desc)
+ return !!as->desc - !!bs->desc;
+ if (as->topic && bs->topic) {
+ int n = strcmp(as->topic, bs->topic);
+
+ if (n)
+ return n;
+ }
+ return strcmp(as->name, bs->name);
+}
+
+static void wordwrap(char *s, int start, int max, int corr)
+{
+ int column = start;
+ int n;
+
+ while (*s) {
+ int wlen = strcspn(s, " \t");
+
+ if (column + wlen >= max && column > start) {
+ printf("\n%*s", start, "");
+ column = start + corr;
+ }
+ n = printf("%s%.*s", column > start ? " " : "", wlen, s);
+ if (n <= 0)
+ break;
+ s += wlen;
+ column += n;
+ s = ltrim(s);
+ }
+}
+
+void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
+ bool long_desc, bool details_flag)
+{
+ struct perf_pmu *pmu;
+ struct perf_pmu_alias *alias;
+ char buf[1024];
+ int printed = 0;
+ int len, j;
+ struct sevent *aliases;
+ int numdesc = 0;
+ int columns = pager_get_columns();
+ char *topic = NULL;
+
+ pmu = NULL;
+ len = 0;
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ list_for_each_entry(alias, &pmu->aliases, list)
+ len++;
+ if (pmu->selectable)
+ len++;
+ }
+ aliases = zalloc(sizeof(struct sevent) * len);
+ if (!aliases)
+ goto out_enomem;
+ pmu = NULL;
+ j = 0;
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ list_for_each_entry(alias, &pmu->aliases, list) {
+ char *name = alias->desc ? alias->name :
+ format_alias(buf, sizeof(buf), pmu, alias);
+ bool is_cpu = !strcmp(pmu->name, "cpu");
+
+ if (event_glob != NULL &&
+ !(strglobmatch_nocase(name, event_glob) ||
+ (!is_cpu && strglobmatch_nocase(alias->name,
+ event_glob)) ||
+ (alias->topic &&
+ strglobmatch_nocase(alias->topic, event_glob))))
+ continue;
+
+ if (is_cpu && !name_only && !alias->desc)
+ name = format_alias_or(buf, sizeof(buf), pmu, alias);
+
+ aliases[j].name = name;
+ if (is_cpu && !name_only && !alias->desc)
+ aliases[j].name = format_alias_or(buf,
+ sizeof(buf),
+ pmu, alias);
+ aliases[j].name = strdup(aliases[j].name);
+ if (!aliases[j].name)
+ goto out_enomem;
+
+ aliases[j].desc = long_desc ? alias->long_desc :
+ alias->desc;
+ aliases[j].topic = alias->topic;
+ aliases[j].str = alias->str;
+ aliases[j].pmu = pmu->name;
+ aliases[j].metric_expr = alias->metric_expr;
+ aliases[j].metric_name = alias->metric_name;
+ j++;
+ }
+ if (pmu->selectable &&
+ (event_glob == NULL || strglobmatch(pmu->name, event_glob))) {
+ char *s;
+ if (asprintf(&s, "%s//", pmu->name) < 0)
+ goto out_enomem;
+ aliases[j].name = s;
+ j++;
+ }
+ }
+ len = j;
+ qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
+ for (j = 0; j < len; j++) {
+ /* Skip duplicates */
+ if (j > 0 && !strcmp(aliases[j].name, aliases[j - 1].name))
+ continue;
+ if (name_only) {
+ printf("%s ", aliases[j].name);
+ continue;
+ }
+ if (aliases[j].desc && !quiet_flag) {
+ if (numdesc++ == 0)
+ printf("\n");
+ if (aliases[j].topic && (!topic ||
+ strcmp(topic, aliases[j].topic))) {
+ printf("%s%s:\n", topic ? "\n" : "",
+ aliases[j].topic);
+ topic = aliases[j].topic;
+ }
+ printf(" %-50s\n", aliases[j].name);
+ printf("%*s", 8, "[");
+ wordwrap(aliases[j].desc, 8, columns, 0);
+ printf("]\n");
+ if (details_flag) {
+ printf("%*s%s/%s/ ", 8, "", aliases[j].pmu, aliases[j].str);
+ if (aliases[j].metric_name)
+ printf(" MetricName: %s", aliases[j].metric_name);
+ if (aliases[j].metric_expr)
+ printf(" MetricExpr: %s", aliases[j].metric_expr);
+ putchar('\n');
+ }
+ } else
+ printf(" %-50s [Kernel PMU event]\n", aliases[j].name);
+ printed++;
+ }
+ if (printed && pager_in_use())
+ printf("\n");
+out_free:
+ for (j = 0; j < len; j++)
+ zfree(&aliases[j].name);
+ zfree(&aliases);
+ return;
+
+out_enomem:
+ printf("FATAL: not enough memory to print PMU events\n");
+ if (aliases)
+ goto out_free;
+}
+
+bool pmu_have_event(const char *pname, const char *name)
+{
+ struct perf_pmu *pmu;
+ struct perf_pmu_alias *alias;
+
+ pmu = NULL;
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ if (strcmp(pname, pmu->name))
+ continue;
+ list_for_each_entry(alias, &pmu->aliases, list)
+ if (!strcmp(alias->name, name))
+ return true;
+ }
+ return false;
+}
+
+static FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name)
+{
+ struct stat st;
+ char path[PATH_MAX];
+ const char *sysfs;
+
+ sysfs = sysfs__mountpoint();
+ if (!sysfs)
+ return NULL;
+
+ snprintf(path, PATH_MAX,
+ "%s" EVENT_SOURCE_DEVICE_PATH "%s/%s", sysfs, pmu->name, name);
+
+ if (stat(path, &st) < 0)
+ return NULL;
+
+ return fopen(path, "r");
+}
+
+int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
+ ...)
+{
+ va_list args;
+ FILE *file;
+ int ret = EOF;
+
+ va_start(args, fmt);
+ file = perf_pmu__open_file(pmu, name);
+ if (file) {
+ ret = vfscanf(file, fmt, args);
+ fclose(file);
+ }
+ va_end(args);
+ return ret;
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
new file mode 100644
index 000000000..21335425f
--- /dev/null
+++ b/tools/perf/util/pmu.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PMU_H
+#define __PMU_H
+
+#include <linux/bitmap.h>
+#include <linux/compiler.h>
+#include <linux/perf_event.h>
+#include <stdbool.h>
+#include "evsel.h"
+#include "parse-events.h"
+
+enum {
+ PERF_PMU_FORMAT_VALUE_CONFIG,
+ PERF_PMU_FORMAT_VALUE_CONFIG1,
+ PERF_PMU_FORMAT_VALUE_CONFIG2,
+};
+
+#define PERF_PMU_FORMAT_BITS 64
+
+struct perf_event_attr;
+
+struct perf_pmu {
+ char *name;
+ __u32 type;
+ bool selectable;
+ bool is_uncore;
+ struct perf_event_attr *default_config;
+ struct cpu_map *cpus;
+ struct list_head format; /* HEAD struct perf_pmu_format -> list */
+ struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
+ struct list_head list; /* ELEM */
+ int (*set_drv_config) (struct perf_evsel_config_term *term);
+};
+
+struct perf_pmu_info {
+ const char *unit;
+ const char *metric_expr;
+ const char *metric_name;
+ double scale;
+ bool per_pkg;
+ bool snapshot;
+};
+
+#define UNIT_MAX_LEN 31 /* max length for event unit name */
+
+struct perf_pmu_alias {
+ char *name;
+ char *desc;
+ char *long_desc;
+ char *topic;
+ char *str;
+ struct list_head terms; /* HEAD struct parse_events_term -> list */
+ struct list_head list; /* ELEM */
+ char unit[UNIT_MAX_LEN+1];
+ double scale;
+ bool per_pkg;
+ bool snapshot;
+ char *metric_expr;
+ char *metric_name;
+};
+
+struct perf_pmu *perf_pmu__find(const char *name);
+int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
+ struct list_head *head_terms,
+ struct parse_events_error *error);
+int perf_pmu__config_terms(struct list_head *formats,
+ struct perf_event_attr *attr,
+ struct list_head *head_terms,
+ bool zero, struct parse_events_error *error);
+__u64 perf_pmu__format_bits(struct list_head *formats, const char *name);
+int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
+ struct perf_pmu_info *info);
+struct list_head *perf_pmu__alias(struct perf_pmu *pmu,
+ struct list_head *head_terms);
+int perf_pmu_wrap(void);
+void perf_pmu_error(struct list_head *list, char *name, char const *msg);
+
+int perf_pmu__new_format(struct list_head *list, char *name,
+ int config, unsigned long *bits);
+void perf_pmu__set_format(unsigned long *bits, long from, long to);
+int perf_pmu__format_parse(char *dir, struct list_head *head);
+void perf_pmu__del_formats(struct list_head *formats);
+
+struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
+
+void print_pmu_events(const char *event_glob, bool name_only, bool quiet,
+ bool long_desc, bool details_flag);
+bool pmu_have_event(const char *pname, const char *name);
+
+int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4);
+
+int perf_pmu__test(void);
+
+struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
+
+struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu);
+
+#endif /* __PMU_H */
diff --git a/tools/perf/util/pmu.l b/tools/perf/util/pmu.l
new file mode 100644
index 000000000..a15d9fbd7
--- /dev/null
+++ b/tools/perf/util/pmu.l
@@ -0,0 +1,43 @@
+%option prefix="perf_pmu_"
+
+%{
+#include <stdlib.h>
+#include <linux/bitops.h>
+#include "pmu.h"
+#include "pmu-bison.h"
+
+static int value(int base)
+{
+ long num;
+
+ errno = 0;
+ num = strtoul(perf_pmu_text, NULL, base);
+ if (errno)
+ return PP_ERROR;
+
+ perf_pmu_lval.num = num;
+ return PP_VALUE;
+}
+
+%}
+
+num_dec [0-9]+
+
+%%
+
+{num_dec} { return value(10); }
+config { return PP_CONFIG; }
+config1 { return PP_CONFIG1; }
+config2 { return PP_CONFIG2; }
+- { return '-'; }
+: { return ':'; }
+, { return ','; }
+. { ; }
+\n { ; }
+
+%%
+
+int perf_pmu_wrap(void)
+{
+ return 1;
+}
diff --git a/tools/perf/util/pmu.y b/tools/perf/util/pmu.y
new file mode 100644
index 000000000..bfd7e8509
--- /dev/null
+++ b/tools/perf/util/pmu.y
@@ -0,0 +1,92 @@
+
+%parse-param {struct list_head *format}
+%parse-param {char *name}
+
+%{
+
+#include <linux/compiler.h>
+#include <linux/list.h>
+#include <linux/bitmap.h>
+#include <string.h>
+#include "pmu.h"
+
+extern int perf_pmu_lex (void);
+
+#define ABORT_ON(val) \
+do { \
+ if (val) \
+ YYABORT; \
+} while (0)
+
+%}
+
+%token PP_CONFIG PP_CONFIG1 PP_CONFIG2
+%token PP_VALUE PP_ERROR
+%type <num> PP_VALUE
+%type <bits> bit_term
+%type <bits> bits
+
+%union
+{
+ unsigned long num;
+ DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
+}
+
+%%
+
+format:
+format format_term
+|
+format_term
+
+format_term:
+PP_CONFIG ':' bits
+{
+ ABORT_ON(perf_pmu__new_format(format, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG,
+ $3));
+}
+|
+PP_CONFIG1 ':' bits
+{
+ ABORT_ON(perf_pmu__new_format(format, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG1,
+ $3));
+}
+|
+PP_CONFIG2 ':' bits
+{
+ ABORT_ON(perf_pmu__new_format(format, name,
+ PERF_PMU_FORMAT_VALUE_CONFIG2,
+ $3));
+}
+
+bits:
+bits ',' bit_term
+{
+ bitmap_or($$, $1, $3, 64);
+}
+|
+bit_term
+{
+ memcpy($$, $1, sizeof($1));
+}
+
+bit_term:
+PP_VALUE '-' PP_VALUE
+{
+ perf_pmu__set_format($$, $1, $3);
+}
+|
+PP_VALUE
+{
+ perf_pmu__set_format($$, $1, 0);
+}
+
+%%
+
+void perf_pmu_error(struct list_head *list __maybe_unused,
+ char *name __maybe_unused,
+ char const *msg __maybe_unused)
+{
+}
diff --git a/tools/perf/util/print_binary.c b/tools/perf/util/print_binary.c
new file mode 100644
index 000000000..71aeaf6f4
--- /dev/null
+++ b/tools/perf/util/print_binary.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "print_binary.h"
+#include <linux/log2.h>
+#include "sane_ctype.h"
+
+int binary__fprintf(unsigned char *data, size_t len,
+ size_t bytes_per_line, binary__fprintf_t printer,
+ void *extra, FILE *fp)
+{
+ size_t i, j, mask;
+ int printed = 0;
+
+ if (!printer)
+ return 0;
+
+ bytes_per_line = roundup_pow_of_two(bytes_per_line);
+ mask = bytes_per_line - 1;
+
+ printed += printer(BINARY_PRINT_DATA_BEGIN, 0, extra, fp);
+ for (i = 0; i < len; i++) {
+ if ((i & mask) == 0) {
+ printed += printer(BINARY_PRINT_LINE_BEGIN, -1, extra, fp);
+ printed += printer(BINARY_PRINT_ADDR, i, extra, fp);
+ }
+
+ printed += printer(BINARY_PRINT_NUM_DATA, data[i], extra, fp);
+
+ if (((i & mask) == mask) || i == len - 1) {
+ for (j = 0; j < mask-(i & mask); j++)
+ printed += printer(BINARY_PRINT_NUM_PAD, -1, extra, fp);
+
+ printer(BINARY_PRINT_SEP, i, extra, fp);
+ for (j = i & ~mask; j <= i; j++)
+ printed += printer(BINARY_PRINT_CHAR_DATA, data[j], extra, fp);
+ for (j = 0; j < mask-(i & mask); j++)
+ printed += printer(BINARY_PRINT_CHAR_PAD, i, extra, fp);
+ printed += printer(BINARY_PRINT_LINE_END, -1, extra, fp);
+ }
+ }
+ printed += printer(BINARY_PRINT_DATA_END, -1, extra, fp);
+ return printed;
+}
+
+int is_printable_array(char *p, unsigned int len)
+{
+ unsigned int i;
+
+ if (!p || !len || p[len - 1] != 0)
+ return 0;
+
+ len--;
+
+ for (i = 0; i < len && p[i]; i++) {
+ if (!isprint(p[i]) && !isspace(p[i]))
+ return 0;
+ }
+ return 1;
+}
diff --git a/tools/perf/util/print_binary.h b/tools/perf/util/print_binary.h
new file mode 100644
index 000000000..2a1554afc
--- /dev/null
+++ b/tools/perf/util/print_binary.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_PRINT_BINARY_H
+#define PERF_PRINT_BINARY_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+enum binary_printer_ops {
+ BINARY_PRINT_DATA_BEGIN,
+ BINARY_PRINT_LINE_BEGIN,
+ BINARY_PRINT_ADDR,
+ BINARY_PRINT_NUM_DATA,
+ BINARY_PRINT_NUM_PAD,
+ BINARY_PRINT_SEP,
+ BINARY_PRINT_CHAR_DATA,
+ BINARY_PRINT_CHAR_PAD,
+ BINARY_PRINT_LINE_END,
+ BINARY_PRINT_DATA_END,
+};
+
+typedef int (*binary__fprintf_t)(enum binary_printer_ops op,
+ unsigned int val, void *extra, FILE *fp);
+
+int binary__fprintf(unsigned char *data, size_t len,
+ size_t bytes_per_line, binary__fprintf_t printer,
+ void *extra, FILE *fp);
+
+static inline void print_binary(unsigned char *data, size_t len,
+ size_t bytes_per_line, binary__fprintf_t printer,
+ void *extra)
+{
+ binary__fprintf(data, len, bytes_per_line, printer, extra, stdout);
+}
+
+int is_printable_array(char *p, unsigned int len);
+
+#endif /* PERF_PRINT_BINARY_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
new file mode 100644
index 000000000..4aeb3e139
--- /dev/null
+++ b/tools/perf/util/probe-event.c
@@ -0,0 +1,3547 @@
+/*
+ * probe-event.c : perf-probe definition to probe_events format converter
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <inttypes.h>
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <limits.h>
+#include <elf.h>
+
+#include "util.h"
+#include "event.h"
+#include "strlist.h"
+#include "strfilter.h"
+#include "debug.h"
+#include "cache.h"
+#include "color.h"
+#include "symbol.h"
+#include "thread.h"
+#include <api/fs/fs.h>
+#include "trace-event.h" /* For __maybe_unused */
+#include "probe-event.h"
+#include "probe-finder.h"
+#include "probe-file.h"
+#include "session.h"
+#include "string2.h"
+
+#include "sane_ctype.h"
+
+#define PERFPROBE_GROUP "probe"
+
+bool probe_event_dry_run; /* Dry run flag */
+struct probe_conf probe_conf;
+
+#define semantic_error(msg ...) pr_err("Semantic error :" msg)
+
+int e_snprintf(char *str, size_t size, const char *format, ...)
+{
+ int ret;
+ va_list ap;
+ va_start(ap, format);
+ ret = vsnprintf(str, size, format, ap);
+ va_end(ap);
+ if (ret >= (int)size)
+ ret = -E2BIG;
+ return ret;
+}
+
+static struct machine *host_machine;
+
+/* Initialize symbol maps and path of vmlinux/modules */
+int init_probe_symbol_maps(bool user_only)
+{
+ int ret;
+
+ symbol_conf.sort_by_name = true;
+ symbol_conf.allow_aliases = true;
+ ret = symbol__init(NULL);
+ if (ret < 0) {
+ pr_debug("Failed to init symbol map.\n");
+ goto out;
+ }
+
+ if (host_machine || user_only) /* already initialized */
+ return 0;
+
+ if (symbol_conf.vmlinux_name)
+ pr_debug("Use vmlinux: %s\n", symbol_conf.vmlinux_name);
+
+ host_machine = machine__new_host();
+ if (!host_machine) {
+ pr_debug("machine__new_host() failed.\n");
+ symbol__exit();
+ ret = -1;
+ }
+out:
+ if (ret < 0)
+ pr_warning("Failed to init vmlinux path.\n");
+ return ret;
+}
+
+void exit_probe_symbol_maps(void)
+{
+ machine__delete(host_machine);
+ host_machine = NULL;
+ symbol__exit();
+}
+
+static struct ref_reloc_sym *kernel_get_ref_reloc_sym(struct map **pmap)
+{
+ /* kmap->ref_reloc_sym should be set if host_machine is initialized */
+ struct kmap *kmap;
+ struct map *map = machine__kernel_map(host_machine);
+
+ if (map__load(map) < 0)
+ return NULL;
+
+ kmap = map__kmap(map);
+ if (!kmap)
+ return NULL;
+
+ if (pmap)
+ *pmap = map;
+
+ return kmap->ref_reloc_sym;
+}
+
+static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
+ bool reloc, bool reladdr)
+{
+ struct ref_reloc_sym *reloc_sym;
+ struct symbol *sym;
+ struct map *map;
+
+ /* ref_reloc_sym is just a label. Need a special fix*/
+ reloc_sym = kernel_get_ref_reloc_sym(NULL);
+ if (reloc_sym && strcmp(name, reloc_sym->name) == 0)
+ *addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr;
+ else {
+ sym = machine__find_kernel_symbol_by_name(host_machine, name, &map);
+ if (!sym)
+ return -ENOENT;
+ *addr = map->unmap_ip(map, sym->start) -
+ ((reloc) ? 0 : map->reloc) -
+ ((reladdr) ? map->start : 0);
+ }
+ return 0;
+}
+
+static struct map *kernel_get_module_map(const char *module)
+{
+ struct maps *maps = machine__kernel_maps(host_machine);
+ struct map *pos;
+
+ /* A file path -- this is an offline module */
+ if (module && strchr(module, '/'))
+ return dso__new_map(module);
+
+ if (!module) {
+ pos = machine__kernel_map(host_machine);
+ return map__get(pos);
+ }
+
+ for (pos = maps__first(maps); pos; pos = map__next(pos)) {
+ /* short_name is "[module]" */
+ if (strncmp(pos->dso->short_name + 1, module,
+ pos->dso->short_name_len - 2) == 0 &&
+ module[pos->dso->short_name_len - 2] == '\0') {
+ return map__get(pos);
+ }
+ }
+ return NULL;
+}
+
+struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user)
+{
+ /* Init maps of given executable or kernel */
+ if (user) {
+ struct map *map;
+
+ map = dso__new_map(target);
+ if (map && map->dso) {
+ nsinfo__put(map->dso->nsinfo);
+ map->dso->nsinfo = nsinfo__get(nsi);
+ }
+ return map;
+ } else {
+ return kernel_get_module_map(target);
+ }
+}
+
+static int convert_exec_to_group(const char *exec, char **result)
+{
+ char *ptr1, *ptr2, *exec_copy;
+ char buf[64];
+ int ret;
+
+ exec_copy = strdup(exec);
+ if (!exec_copy)
+ return -ENOMEM;
+
+ ptr1 = basename(exec_copy);
+ if (!ptr1) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ for (ptr2 = ptr1; *ptr2 != '\0'; ptr2++) {
+ if (!isalnum(*ptr2) && *ptr2 != '_') {
+ *ptr2 = '\0';
+ break;
+ }
+ }
+
+ ret = e_snprintf(buf, 64, "%s_%s", PERFPROBE_GROUP, ptr1);
+ if (ret < 0)
+ goto out;
+
+ *result = strdup(buf);
+ ret = *result ? 0 : -ENOMEM;
+
+out:
+ free(exec_copy);
+ return ret;
+}
+
+static void clear_perf_probe_point(struct perf_probe_point *pp)
+{
+ free(pp->file);
+ free(pp->function);
+ free(pp->lazy_line);
+}
+
+static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs)
+{
+ int i;
+
+ for (i = 0; i < ntevs; i++)
+ clear_probe_trace_event(tevs + i);
+}
+
+static bool kprobe_blacklist__listed(unsigned long address);
+static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
+{
+ struct map *map;
+ bool ret = false;
+
+ map = kernel_get_module_map(NULL);
+ if (map) {
+ ret = address <= map->start || map->end < address;
+ if (ret)
+ pr_warning("%s is out of .text, skip it.\n", symbol);
+ map__put(map);
+ }
+ if (!ret && kprobe_blacklist__listed(address)) {
+ pr_warning("%s is blacklisted function, skip it.\n", symbol);
+ ret = true;
+ }
+
+ return ret;
+}
+
+/*
+ * @module can be module name of module file path. In case of path,
+ * inspect elf and find out what is actual module name.
+ * Caller has to free mod_name after using it.
+ */
+static char *find_module_name(const char *module)
+{
+ int fd;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ Elf_Data *data;
+ Elf_Scn *sec;
+ char *mod_name = NULL;
+ int name_offset;
+
+ fd = open(module, O_RDONLY);
+ if (fd < 0)
+ return NULL;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ goto elf_err;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto ret_err;
+
+ sec = elf_section_by_name(elf, &ehdr, &shdr,
+ ".gnu.linkonce.this_module", NULL);
+ if (!sec)
+ goto ret_err;
+
+ data = elf_getdata(sec, NULL);
+ if (!data || !data->d_buf)
+ goto ret_err;
+
+ /*
+ * NOTE:
+ * '.gnu.linkonce.this_module' section of kernel module elf directly
+ * maps to 'struct module' from linux/module.h. This section contains
+ * actual module name which will be used by kernel after loading it.
+ * But, we cannot use 'struct module' here since linux/module.h is not
+ * exposed to user-space. Offset of 'name' has remained same from long
+ * time, so hardcoding it here.
+ */
+ if (ehdr.e_ident[EI_CLASS] == ELFCLASS32)
+ name_offset = 12;
+ else /* expect ELFCLASS64 by default */
+ name_offset = 24;
+
+ mod_name = strdup((char *)data->d_buf + name_offset);
+
+ret_err:
+ elf_end(elf);
+elf_err:
+ close(fd);
+ return mod_name;
+}
+
+#ifdef HAVE_DWARF_SUPPORT
+
+static int kernel_get_module_dso(const char *module, struct dso **pdso)
+{
+ struct dso *dso;
+ struct map *map;
+ const char *vmlinux_name;
+ int ret = 0;
+
+ if (module) {
+ char module_name[128];
+
+ snprintf(module_name, sizeof(module_name), "[%s]", module);
+ map = map_groups__find_by_name(&host_machine->kmaps, module_name);
+ if (map) {
+ dso = map->dso;
+ goto found;
+ }
+ pr_debug("Failed to find module %s.\n", module);
+ return -ENOENT;
+ }
+
+ map = machine__kernel_map(host_machine);
+ dso = map->dso;
+
+ vmlinux_name = symbol_conf.vmlinux_name;
+ dso->load_errno = 0;
+ if (vmlinux_name)
+ ret = dso__load_vmlinux(dso, map, vmlinux_name, false);
+ else
+ ret = dso__load_vmlinux_path(dso, map);
+found:
+ *pdso = dso;
+ return ret;
+}
+
+/*
+ * Some binaries like glibc have special symbols which are on the symbol
+ * table, but not in the debuginfo. If we can find the address of the
+ * symbol from map, we can translate the address back to the probe point.
+ */
+static int find_alternative_probe_point(struct debuginfo *dinfo,
+ struct perf_probe_point *pp,
+ struct perf_probe_point *result,
+ const char *target, struct nsinfo *nsi,
+ bool uprobes)
+{
+ struct map *map = NULL;
+ struct symbol *sym;
+ u64 address = 0;
+ int ret = -ENOENT;
+
+ /* This can work only for function-name based one */
+ if (!pp->function || pp->file)
+ return -ENOTSUP;
+
+ map = get_target_map(target, nsi, uprobes);
+ if (!map)
+ return -EINVAL;
+
+ /* Find the address of given function */
+ map__for_each_symbol_by_name(map, pp->function, sym) {
+ if (uprobes)
+ address = sym->start;
+ else
+ address = map->unmap_ip(map, sym->start) - map->reloc;
+ break;
+ }
+ if (!address) {
+ ret = -ENOENT;
+ goto out;
+ }
+ pr_debug("Symbol %s address found : %" PRIx64 "\n",
+ pp->function, address);
+
+ ret = debuginfo__find_probe_point(dinfo, (unsigned long)address,
+ result);
+ if (ret <= 0)
+ ret = (!ret) ? -ENOENT : ret;
+ else {
+ result->offset += pp->offset;
+ result->line += pp->line;
+ result->retprobe = pp->retprobe;
+ ret = 0;
+ }
+
+out:
+ map__put(map);
+ return ret;
+
+}
+
+static int get_alternative_probe_event(struct debuginfo *dinfo,
+ struct perf_probe_event *pev,
+ struct perf_probe_point *tmp)
+{
+ int ret;
+
+ memcpy(tmp, &pev->point, sizeof(*tmp));
+ memset(&pev->point, 0, sizeof(pev->point));
+ ret = find_alternative_probe_point(dinfo, tmp, &pev->point, pev->target,
+ pev->nsi, pev->uprobes);
+ if (ret < 0)
+ memcpy(&pev->point, tmp, sizeof(*tmp));
+
+ return ret;
+}
+
+static int get_alternative_line_range(struct debuginfo *dinfo,
+ struct line_range *lr,
+ const char *target, bool user)
+{
+ struct perf_probe_point pp = { .function = lr->function,
+ .file = lr->file,
+ .line = lr->start };
+ struct perf_probe_point result;
+ int ret, len = 0;
+
+ memset(&result, 0, sizeof(result));
+
+ if (lr->end != INT_MAX)
+ len = lr->end - lr->start;
+ ret = find_alternative_probe_point(dinfo, &pp, &result,
+ target, NULL, user);
+ if (!ret) {
+ lr->function = result.function;
+ lr->file = result.file;
+ lr->start = result.line;
+ if (lr->end != INT_MAX)
+ lr->end = lr->start + len;
+ clear_perf_probe_point(&pp);
+ }
+ return ret;
+}
+
+/* Open new debuginfo of given module */
+static struct debuginfo *open_debuginfo(const char *module, struct nsinfo *nsi,
+ bool silent)
+{
+ const char *path = module;
+ char reason[STRERR_BUFSIZE];
+ struct debuginfo *ret = NULL;
+ struct dso *dso = NULL;
+ struct nscookie nsc;
+ int err;
+
+ if (!module || !strchr(module, '/')) {
+ err = kernel_get_module_dso(module, &dso);
+ if (err < 0) {
+ if (!dso || dso->load_errno == 0) {
+ if (!str_error_r(-err, reason, STRERR_BUFSIZE))
+ strcpy(reason, "(unknown)");
+ } else
+ dso__strerror_load(dso, reason, STRERR_BUFSIZE);
+ if (!silent)
+ pr_err("Failed to find the path for %s: %s\n",
+ module ?: "kernel", reason);
+ return NULL;
+ }
+ path = dso->long_name;
+ }
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = debuginfo__new(path);
+ if (!ret && !silent) {
+ pr_warning("The %s file has no debug information.\n", path);
+ if (!module || !strtailcmp(path, ".ko"))
+ pr_warning("Rebuild with CONFIG_DEBUG_INFO=y, ");
+ else
+ pr_warning("Rebuild with -g, ");
+ pr_warning("or install an appropriate debuginfo package.\n");
+ }
+ nsinfo__mountns_exit(&nsc);
+ return ret;
+}
+
+/* For caching the last debuginfo */
+static struct debuginfo *debuginfo_cache;
+static char *debuginfo_cache_path;
+
+static struct debuginfo *debuginfo_cache__open(const char *module, bool silent)
+{
+ const char *path = module;
+
+ /* If the module is NULL, it should be the kernel. */
+ if (!module)
+ path = "kernel";
+
+ if (debuginfo_cache_path && !strcmp(debuginfo_cache_path, path))
+ goto out;
+
+ /* Copy module path */
+ free(debuginfo_cache_path);
+ debuginfo_cache_path = strdup(path);
+ if (!debuginfo_cache_path) {
+ debuginfo__delete(debuginfo_cache);
+ debuginfo_cache = NULL;
+ goto out;
+ }
+
+ debuginfo_cache = open_debuginfo(module, NULL, silent);
+ if (!debuginfo_cache)
+ zfree(&debuginfo_cache_path);
+out:
+ return debuginfo_cache;
+}
+
+static void debuginfo_cache__exit(void)
+{
+ debuginfo__delete(debuginfo_cache);
+ debuginfo_cache = NULL;
+ zfree(&debuginfo_cache_path);
+}
+
+
+static int get_text_start_address(const char *exec, unsigned long *address,
+ struct nsinfo *nsi)
+{
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ int fd, ret = -ENOENT;
+ struct nscookie nsc;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ fd = open(exec, O_RDONLY);
+ nsinfo__mountns_exit(&nsc);
+ if (fd < 0)
+ return -errno;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL) {
+ ret = -EINVAL;
+ goto out_close;
+ }
+
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto out;
+
+ if (!elf_section_by_name(elf, &ehdr, &shdr, ".text", NULL))
+ goto out;
+
+ *address = shdr.sh_addr - shdr.sh_offset;
+ ret = 0;
+out:
+ elf_end(elf);
+out_close:
+ close(fd);
+
+ return ret;
+}
+
+/*
+ * Convert trace point to probe point with debuginfo
+ */
+static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
+ struct perf_probe_point *pp,
+ bool is_kprobe)
+{
+ struct debuginfo *dinfo = NULL;
+ unsigned long stext = 0;
+ u64 addr = tp->address;
+ int ret = -ENOENT;
+
+ /* convert the address to dwarf address */
+ if (!is_kprobe) {
+ if (!addr) {
+ ret = -EINVAL;
+ goto error;
+ }
+ ret = get_text_start_address(tp->module, &stext, NULL);
+ if (ret < 0)
+ goto error;
+ addr += stext;
+ } else if (tp->symbol) {
+ /* If the module is given, this returns relative address */
+ ret = kernel_get_symbol_address_by_name(tp->symbol, &addr,
+ false, !!tp->module);
+ if (ret != 0)
+ goto error;
+ addr += tp->offset;
+ }
+
+ pr_debug("try to find information at %" PRIx64 " in %s\n", addr,
+ tp->module ? : "kernel");
+
+ dinfo = debuginfo_cache__open(tp->module, verbose <= 0);
+ if (dinfo)
+ ret = debuginfo__find_probe_point(dinfo,
+ (unsigned long)addr, pp);
+ else
+ ret = -ENOENT;
+
+ if (ret > 0) {
+ pp->retprobe = tp->retprobe;
+ return 0;
+ }
+error:
+ pr_debug("Failed to find corresponding probes from debuginfo.\n");
+ return ret ? : -ENOENT;
+}
+
+/* Adjust symbol name and address */
+static int post_process_probe_trace_point(struct probe_trace_point *tp,
+ struct map *map, unsigned long offs)
+{
+ struct symbol *sym;
+ u64 addr = tp->address - offs;
+
+ sym = map__find_symbol(map, addr);
+ if (!sym)
+ return -ENOENT;
+
+ if (strcmp(sym->name, tp->symbol)) {
+ /* If we have no realname, use symbol for it */
+ if (!tp->realname)
+ tp->realname = tp->symbol;
+ else
+ free(tp->symbol);
+ tp->symbol = strdup(sym->name);
+ if (!tp->symbol)
+ return -ENOMEM;
+ }
+ tp->offset = addr - sym->start;
+ tp->address -= offs;
+
+ return 0;
+}
+
+/*
+ * Rename DWARF symbols to ELF symbols -- gcc sometimes optimizes functions
+ * and generate new symbols with suffixes such as .constprop.N or .isra.N
+ * etc. Since those symbols are not recorded in DWARF, we have to find
+ * correct generated symbols from offline ELF binary.
+ * For online kernel or uprobes we don't need this because those are
+ * rebased on _text, or already a section relative address.
+ */
+static int
+post_process_offline_probe_trace_events(struct probe_trace_event *tevs,
+ int ntevs, const char *pathname)
+{
+ struct map *map;
+ unsigned long stext = 0;
+ int i, ret = 0;
+
+ /* Prepare a map for offline binary */
+ map = dso__new_map(pathname);
+ if (!map || get_text_start_address(pathname, &stext, NULL) < 0) {
+ pr_warning("Failed to get ELF symbols for %s\n", pathname);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < ntevs; i++) {
+ ret = post_process_probe_trace_point(&tevs[i].point,
+ map, stext);
+ if (ret < 0)
+ break;
+ }
+ map__put(map);
+
+ return ret;
+}
+
+static int add_exec_to_probe_trace_events(struct probe_trace_event *tevs,
+ int ntevs, const char *exec,
+ struct nsinfo *nsi)
+{
+ int i, ret = 0;
+ unsigned long stext = 0;
+
+ if (!exec)
+ return 0;
+
+ ret = get_text_start_address(exec, &stext, nsi);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < ntevs && ret >= 0; i++) {
+ /* point.address is the addres of point.symbol + point.offset */
+ tevs[i].point.address -= stext;
+ tevs[i].point.module = strdup(exec);
+ if (!tevs[i].point.module) {
+ ret = -ENOMEM;
+ break;
+ }
+ tevs[i].uprobes = true;
+ }
+
+ return ret;
+}
+
+static int
+post_process_module_probe_trace_events(struct probe_trace_event *tevs,
+ int ntevs, const char *module,
+ struct debuginfo *dinfo)
+{
+ Dwarf_Addr text_offs = 0;
+ int i, ret = 0;
+ char *mod_name = NULL;
+ struct map *map;
+
+ if (!module)
+ return 0;
+
+ map = get_target_map(module, NULL, false);
+ if (!map || debuginfo__get_text_offset(dinfo, &text_offs, true) < 0) {
+ pr_warning("Failed to get ELF symbols for %s\n", module);
+ return -EINVAL;
+ }
+
+ mod_name = find_module_name(module);
+ for (i = 0; i < ntevs; i++) {
+ ret = post_process_probe_trace_point(&tevs[i].point,
+ map, (unsigned long)text_offs);
+ if (ret < 0)
+ break;
+ tevs[i].point.module =
+ strdup(mod_name ? mod_name : module);
+ if (!tevs[i].point.module) {
+ ret = -ENOMEM;
+ break;
+ }
+ }
+
+ free(mod_name);
+ map__put(map);
+
+ return ret;
+}
+
+static int
+post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
+ int ntevs)
+{
+ struct ref_reloc_sym *reloc_sym;
+ struct map *map;
+ char *tmp;
+ int i, skipped = 0;
+
+ /* Skip post process if the target is an offline kernel */
+ if (symbol_conf.ignore_vmlinux_buildid)
+ return post_process_offline_probe_trace_events(tevs, ntevs,
+ symbol_conf.vmlinux_name);
+
+ reloc_sym = kernel_get_ref_reloc_sym(&map);
+ if (!reloc_sym) {
+ pr_warning("Relocated base symbol is not found!\n");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < ntevs; i++) {
+ if (!tevs[i].point.address)
+ continue;
+ if (tevs[i].point.retprobe && !kretprobe_offset_is_supported())
+ continue;
+ /*
+ * If we found a wrong one, mark it by NULL symbol.
+ * Since addresses in debuginfo is same as objdump, we need
+ * to convert it to addresses on memory.
+ */
+ if (kprobe_warn_out_range(tevs[i].point.symbol,
+ map__objdump_2mem(map, tevs[i].point.address))) {
+ tmp = NULL;
+ skipped++;
+ } else {
+ tmp = strdup(reloc_sym->name);
+ if (!tmp)
+ return -ENOMEM;
+ }
+ /* If we have no realname, use symbol for it */
+ if (!tevs[i].point.realname)
+ tevs[i].point.realname = tevs[i].point.symbol;
+ else
+ free(tevs[i].point.symbol);
+ tevs[i].point.symbol = tmp;
+ tevs[i].point.offset = tevs[i].point.address -
+ reloc_sym->unrelocated_addr;
+ }
+ return skipped;
+}
+
+void __weak
+arch__post_process_probe_trace_events(struct perf_probe_event *pev __maybe_unused,
+ int ntevs __maybe_unused)
+{
+}
+
+/* Post processing the probe events */
+static int post_process_probe_trace_events(struct perf_probe_event *pev,
+ struct probe_trace_event *tevs,
+ int ntevs, const char *module,
+ bool uprobe, struct debuginfo *dinfo)
+{
+ int ret;
+
+ if (uprobe)
+ ret = add_exec_to_probe_trace_events(tevs, ntevs, module,
+ pev->nsi);
+ else if (module)
+ /* Currently ref_reloc_sym based probe is not for drivers */
+ ret = post_process_module_probe_trace_events(tevs, ntevs,
+ module, dinfo);
+ else
+ ret = post_process_kernel_probe_trace_events(tevs, ntevs);
+
+ if (ret >= 0)
+ arch__post_process_probe_trace_events(pev, ntevs);
+
+ return ret;
+}
+
+/* Try to find perf_probe_event with debuginfo */
+static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ bool need_dwarf = perf_probe_event_need_dwarf(pev);
+ struct perf_probe_point tmp;
+ struct debuginfo *dinfo;
+ int ntevs, ret = 0;
+
+ dinfo = open_debuginfo(pev->target, pev->nsi, !need_dwarf);
+ if (!dinfo) {
+ if (need_dwarf)
+ return -ENOENT;
+ pr_debug("Could not open debuginfo. Try to use symbols.\n");
+ return 0;
+ }
+
+ pr_debug("Try to find probe point from debuginfo.\n");
+ /* Searching trace events corresponding to a probe event */
+ ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
+
+ if (ntevs == 0) { /* Not found, retry with an alternative */
+ ret = get_alternative_probe_event(dinfo, pev, &tmp);
+ if (!ret) {
+ ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
+ /*
+ * Write back to the original probe_event for
+ * setting appropriate (user given) event name
+ */
+ clear_perf_probe_point(&pev->point);
+ memcpy(&pev->point, &tmp, sizeof(tmp));
+ }
+ }
+
+ if (ntevs > 0) { /* Succeeded to find trace events */
+ pr_debug("Found %d probe_trace_events.\n", ntevs);
+ ret = post_process_probe_trace_events(pev, *tevs, ntevs,
+ pev->target, pev->uprobes, dinfo);
+ if (ret < 0 || ret == ntevs) {
+ pr_debug("Post processing failed or all events are skipped. (%d)\n", ret);
+ clear_probe_trace_events(*tevs, ntevs);
+ zfree(tevs);
+ ntevs = 0;
+ }
+ }
+
+ debuginfo__delete(dinfo);
+
+ if (ntevs == 0) { /* No error but failed to find probe point. */
+ pr_warning("Probe point '%s' not found.\n",
+ synthesize_perf_probe_point(&pev->point));
+ return -ENOENT;
+ } else if (ntevs < 0) {
+ /* Error path : ntevs < 0 */
+ pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs);
+ if (ntevs == -EBADF)
+ pr_warning("Warning: No dwarf info found in the vmlinux - "
+ "please rebuild kernel with CONFIG_DEBUG_INFO=y.\n");
+ if (!need_dwarf) {
+ pr_debug("Trying to use symbols.\n");
+ return 0;
+ }
+ }
+ return ntevs;
+}
+
+#define LINEBUF_SIZE 256
+#define NR_ADDITIONAL_LINES 2
+
+static int __show_one_line(FILE *fp, int l, bool skip, bool show_num)
+{
+ char buf[LINEBUF_SIZE], sbuf[STRERR_BUFSIZE];
+ const char *color = show_num ? "" : PERF_COLOR_BLUE;
+ const char *prefix = NULL;
+
+ do {
+ if (fgets(buf, LINEBUF_SIZE, fp) == NULL)
+ goto error;
+ if (skip)
+ continue;
+ if (!prefix) {
+ prefix = show_num ? "%7d " : " ";
+ color_fprintf(stdout, color, prefix, l);
+ }
+ color_fprintf(stdout, color, "%s", buf);
+
+ } while (strchr(buf, '\n') == NULL);
+
+ return 1;
+error:
+ if (ferror(fp)) {
+ pr_warning("File read error: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ return -1;
+ }
+ return 0;
+}
+
+static int _show_one_line(FILE *fp, int l, bool skip, bool show_num)
+{
+ int rv = __show_one_line(fp, l, skip, show_num);
+ if (rv == 0) {
+ pr_warning("Source file is shorter than expected.\n");
+ rv = -1;
+ }
+ return rv;
+}
+
+#define show_one_line_with_num(f,l) _show_one_line(f,l,false,true)
+#define show_one_line(f,l) _show_one_line(f,l,false,false)
+#define skip_one_line(f,l) _show_one_line(f,l,true,false)
+#define show_one_line_or_eof(f,l) __show_one_line(f,l,false,false)
+
+/*
+ * Show line-range always requires debuginfo to find source file and
+ * line number.
+ */
+static int __show_line_range(struct line_range *lr, const char *module,
+ bool user)
+{
+ int l = 1;
+ struct int_node *ln;
+ struct debuginfo *dinfo;
+ FILE *fp;
+ int ret;
+ char *tmp;
+ char sbuf[STRERR_BUFSIZE];
+
+ /* Search a line range */
+ dinfo = open_debuginfo(module, NULL, false);
+ if (!dinfo)
+ return -ENOENT;
+
+ ret = debuginfo__find_line_range(dinfo, lr);
+ if (!ret) { /* Not found, retry with an alternative */
+ ret = get_alternative_line_range(dinfo, lr, module, user);
+ if (!ret)
+ ret = debuginfo__find_line_range(dinfo, lr);
+ }
+ debuginfo__delete(dinfo);
+ if (ret == 0 || ret == -ENOENT) {
+ pr_warning("Specified source line is not found.\n");
+ return -ENOENT;
+ } else if (ret < 0) {
+ pr_warning("Debuginfo analysis failed.\n");
+ return ret;
+ }
+
+ /* Convert source file path */
+ tmp = lr->path;
+ ret = get_real_path(tmp, lr->comp_dir, &lr->path);
+
+ /* Free old path when new path is assigned */
+ if (tmp != lr->path)
+ free(tmp);
+
+ if (ret < 0) {
+ pr_warning("Failed to find source file path.\n");
+ return ret;
+ }
+
+ setup_pager();
+
+ if (lr->function)
+ fprintf(stdout, "<%s@%s:%d>\n", lr->function, lr->path,
+ lr->start - lr->offset);
+ else
+ fprintf(stdout, "<%s:%d>\n", lr->path, lr->start);
+
+ fp = fopen(lr->path, "r");
+ if (fp == NULL) {
+ pr_warning("Failed to open %s: %s\n", lr->path,
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ return -errno;
+ }
+ /* Skip to starting line number */
+ while (l < lr->start) {
+ ret = skip_one_line(fp, l++);
+ if (ret < 0)
+ goto end;
+ }
+
+ intlist__for_each_entry(ln, lr->line_list) {
+ for (; ln->i > l; l++) {
+ ret = show_one_line(fp, l - lr->offset);
+ if (ret < 0)
+ goto end;
+ }
+ ret = show_one_line_with_num(fp, l++ - lr->offset);
+ if (ret < 0)
+ goto end;
+ }
+
+ if (lr->end == INT_MAX)
+ lr->end = l + NR_ADDITIONAL_LINES;
+ while (l <= lr->end) {
+ ret = show_one_line_or_eof(fp, l++ - lr->offset);
+ if (ret <= 0)
+ break;
+ }
+end:
+ fclose(fp);
+ return ret;
+}
+
+int show_line_range(struct line_range *lr, const char *module,
+ struct nsinfo *nsi, bool user)
+{
+ int ret;
+ struct nscookie nsc;
+
+ ret = init_probe_symbol_maps(user);
+ if (ret < 0)
+ return ret;
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = __show_line_range(lr, module, user);
+ nsinfo__mountns_exit(&nsc);
+ exit_probe_symbol_maps();
+
+ return ret;
+}
+
+static int show_available_vars_at(struct debuginfo *dinfo,
+ struct perf_probe_event *pev,
+ struct strfilter *_filter)
+{
+ char *buf;
+ int ret, i, nvars;
+ struct str_node *node;
+ struct variable_list *vls = NULL, *vl;
+ struct perf_probe_point tmp;
+ const char *var;
+
+ buf = synthesize_perf_probe_point(&pev->point);
+ if (!buf)
+ return -EINVAL;
+ pr_debug("Searching variables at %s\n", buf);
+
+ ret = debuginfo__find_available_vars_at(dinfo, pev, &vls);
+ if (!ret) { /* Not found, retry with an alternative */
+ ret = get_alternative_probe_event(dinfo, pev, &tmp);
+ if (!ret) {
+ ret = debuginfo__find_available_vars_at(dinfo, pev,
+ &vls);
+ /* Release the old probe_point */
+ clear_perf_probe_point(&tmp);
+ }
+ }
+ if (ret <= 0) {
+ if (ret == 0 || ret == -ENOENT) {
+ pr_err("Failed to find the address of %s\n", buf);
+ ret = -ENOENT;
+ } else
+ pr_warning("Debuginfo analysis failed.\n");
+ goto end;
+ }
+
+ /* Some variables are found */
+ fprintf(stdout, "Available variables at %s\n", buf);
+ for (i = 0; i < ret; i++) {
+ vl = &vls[i];
+ /*
+ * A probe point might be converted to
+ * several trace points.
+ */
+ fprintf(stdout, "\t@<%s+%lu>\n", vl->point.symbol,
+ vl->point.offset);
+ zfree(&vl->point.symbol);
+ nvars = 0;
+ if (vl->vars) {
+ strlist__for_each_entry(node, vl->vars) {
+ var = strchr(node->s, '\t') + 1;
+ if (strfilter__compare(_filter, var)) {
+ fprintf(stdout, "\t\t%s\n", node->s);
+ nvars++;
+ }
+ }
+ strlist__delete(vl->vars);
+ }
+ if (nvars == 0)
+ fprintf(stdout, "\t\t(No matched variables)\n");
+ }
+ free(vls);
+end:
+ free(buf);
+ return ret;
+}
+
+/* Show available variables on given probe point */
+int show_available_vars(struct perf_probe_event *pevs, int npevs,
+ struct strfilter *_filter)
+{
+ int i, ret = 0;
+ struct debuginfo *dinfo;
+
+ ret = init_probe_symbol_maps(pevs->uprobes);
+ if (ret < 0)
+ return ret;
+
+ dinfo = open_debuginfo(pevs->target, pevs->nsi, false);
+ if (!dinfo) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ setup_pager();
+
+ for (i = 0; i < npevs && ret >= 0; i++)
+ ret = show_available_vars_at(dinfo, &pevs[i], _filter);
+
+ debuginfo__delete(dinfo);
+out:
+ exit_probe_symbol_maps();
+ return ret;
+}
+
+#else /* !HAVE_DWARF_SUPPORT */
+
+static void debuginfo_cache__exit(void)
+{
+}
+
+static int
+find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
+ struct perf_probe_point *pp __maybe_unused,
+ bool is_kprobe __maybe_unused)
+{
+ return -ENOSYS;
+}
+
+static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs __maybe_unused)
+{
+ if (perf_probe_event_need_dwarf(pev)) {
+ pr_warning("Debuginfo-analysis is not supported.\n");
+ return -ENOSYS;
+ }
+
+ return 0;
+}
+
+int show_line_range(struct line_range *lr __maybe_unused,
+ const char *module __maybe_unused,
+ struct nsinfo *nsi __maybe_unused,
+ bool user __maybe_unused)
+{
+ pr_warning("Debuginfo-analysis is not supported.\n");
+ return -ENOSYS;
+}
+
+int show_available_vars(struct perf_probe_event *pevs __maybe_unused,
+ int npevs __maybe_unused,
+ struct strfilter *filter __maybe_unused)
+{
+ pr_warning("Debuginfo-analysis is not supported.\n");
+ return -ENOSYS;
+}
+#endif
+
+void line_range__clear(struct line_range *lr)
+{
+ free(lr->function);
+ free(lr->file);
+ free(lr->path);
+ free(lr->comp_dir);
+ intlist__delete(lr->line_list);
+ memset(lr, 0, sizeof(*lr));
+}
+
+int line_range__init(struct line_range *lr)
+{
+ memset(lr, 0, sizeof(*lr));
+ lr->line_list = intlist__new(NULL);
+ if (!lr->line_list)
+ return -ENOMEM;
+ else
+ return 0;
+}
+
+static int parse_line_num(char **ptr, int *val, const char *what)
+{
+ const char *start = *ptr;
+
+ errno = 0;
+ *val = strtol(*ptr, ptr, 0);
+ if (errno || *ptr == start) {
+ semantic_error("'%s' is not a valid number.\n", what);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Check the name is good for event, group or function */
+static bool is_c_func_name(const char *name)
+{
+ if (!isalpha(*name) && *name != '_')
+ return false;
+ while (*++name != '\0') {
+ if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+ return false;
+ }
+ return true;
+}
+
+/*
+ * Stuff 'lr' according to the line range described by 'arg'.
+ * The line range syntax is described by:
+ *
+ * SRC[:SLN[+NUM|-ELN]]
+ * FNC[@SRC][:SLN[+NUM|-ELN]]
+ */
+int parse_line_range_desc(const char *arg, struct line_range *lr)
+{
+ char *range, *file, *name = strdup(arg);
+ int err;
+
+ if (!name)
+ return -ENOMEM;
+
+ lr->start = 0;
+ lr->end = INT_MAX;
+
+ range = strchr(name, ':');
+ if (range) {
+ *range++ = '\0';
+
+ err = parse_line_num(&range, &lr->start, "start line");
+ if (err)
+ goto err;
+
+ if (*range == '+' || *range == '-') {
+ const char c = *range++;
+
+ err = parse_line_num(&range, &lr->end, "end line");
+ if (err)
+ goto err;
+
+ if (c == '+') {
+ lr->end += lr->start;
+ /*
+ * Adjust the number of lines here.
+ * If the number of lines == 1, the
+ * the end of line should be equal to
+ * the start of line.
+ */
+ lr->end--;
+ }
+ }
+
+ pr_debug("Line range is %d to %d\n", lr->start, lr->end);
+
+ err = -EINVAL;
+ if (lr->start > lr->end) {
+ semantic_error("Start line must be smaller"
+ " than end line.\n");
+ goto err;
+ }
+ if (*range != '\0') {
+ semantic_error("Tailing with invalid str '%s'.\n", range);
+ goto err;
+ }
+ }
+
+ file = strchr(name, '@');
+ if (file) {
+ *file = '\0';
+ lr->file = strdup(++file);
+ if (lr->file == NULL) {
+ err = -ENOMEM;
+ goto err;
+ }
+ lr->function = name;
+ } else if (strchr(name, '/') || strchr(name, '.'))
+ lr->file = name;
+ else if (is_c_func_name(name))/* We reuse it for checking funcname */
+ lr->function = name;
+ else { /* Invalid name */
+ semantic_error("'%s' is not a valid function name.\n", name);
+ err = -EINVAL;
+ goto err;
+ }
+
+ return 0;
+err:
+ free(name);
+ return err;
+}
+
+static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev)
+{
+ char *ptr;
+
+ ptr = strpbrk_esc(*arg, ":");
+ if (ptr) {
+ *ptr = '\0';
+ if (!pev->sdt && !is_c_func_name(*arg))
+ goto ng_name;
+ pev->group = strdup_esc(*arg);
+ if (!pev->group)
+ return -ENOMEM;
+ *arg = ptr + 1;
+ } else
+ pev->group = NULL;
+
+ pev->event = strdup_esc(*arg);
+ if (pev->event == NULL)
+ return -ENOMEM;
+
+ if (!pev->sdt && !is_c_func_name(pev->event)) {
+ zfree(&pev->event);
+ng_name:
+ zfree(&pev->group);
+ semantic_error("%s is bad for event name -it must "
+ "follow C symbol-naming rule.\n", *arg);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Parse probepoint definition. */
+static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
+{
+ struct perf_probe_point *pp = &pev->point;
+ char *ptr, *tmp;
+ char c, nc = 0;
+ bool file_spec = false;
+ int ret;
+
+ /*
+ * <Syntax>
+ * perf probe [GRP:][EVENT=]SRC[:LN|;PTN]
+ * perf probe [GRP:][EVENT=]FUNC[@SRC][+OFFS|%return|:LN|;PAT]
+ * perf probe %[GRP:]SDT_EVENT
+ */
+ if (!arg)
+ return -EINVAL;
+
+ if (is_sdt_event(arg)) {
+ pev->sdt = true;
+ if (arg[0] == '%')
+ arg++;
+ }
+
+ ptr = strpbrk_esc(arg, ";=@+%");
+ if (pev->sdt) {
+ if (ptr) {
+ if (*ptr != '@') {
+ semantic_error("%s must be an SDT name.\n",
+ arg);
+ return -EINVAL;
+ }
+ /* This must be a target file name or build id */
+ tmp = build_id_cache__complement(ptr + 1);
+ if (tmp) {
+ pev->target = build_id_cache__origname(tmp);
+ free(tmp);
+ } else
+ pev->target = strdup_esc(ptr + 1);
+ if (!pev->target)
+ return -ENOMEM;
+ *ptr = '\0';
+ }
+ ret = parse_perf_probe_event_name(&arg, pev);
+ if (ret == 0) {
+ if (asprintf(&pev->point.function, "%%%s", pev->event) < 0)
+ ret = -errno;
+ }
+ return ret;
+ }
+
+ if (ptr && *ptr == '=') { /* Event name */
+ *ptr = '\0';
+ tmp = ptr + 1;
+ ret = parse_perf_probe_event_name(&arg, pev);
+ if (ret < 0)
+ return ret;
+
+ arg = tmp;
+ }
+
+ /*
+ * Check arg is function or file name and copy it.
+ *
+ * We consider arg to be a file spec if and only if it satisfies
+ * all of the below criteria::
+ * - it does not include any of "+@%",
+ * - it includes one of ":;", and
+ * - it has a period '.' in the name.
+ *
+ * Otherwise, we consider arg to be a function specification.
+ */
+ if (!strpbrk_esc(arg, "+@%")) {
+ ptr = strpbrk_esc(arg, ";:");
+ /* This is a file spec if it includes a '.' before ; or : */
+ if (ptr && memchr(arg, '.', ptr - arg))
+ file_spec = true;
+ }
+
+ ptr = strpbrk_esc(arg, ";:+@%");
+ if (ptr) {
+ nc = *ptr;
+ *ptr++ = '\0';
+ }
+
+ if (arg[0] == '\0')
+ tmp = NULL;
+ else {
+ tmp = strdup_esc(arg);
+ if (tmp == NULL)
+ return -ENOMEM;
+ }
+
+ if (file_spec)
+ pp->file = tmp;
+ else {
+ pp->function = tmp;
+
+ /*
+ * Keep pp->function even if this is absolute address,
+ * so it can mark whether abs_address is valid.
+ * Which make 'perf probe lib.bin 0x0' possible.
+ *
+ * Note that checking length of tmp is not needed
+ * because when we access tmp[1] we know tmp[0] is '0',
+ * so tmp[1] should always valid (but could be '\0').
+ */
+ if (tmp && !strncmp(tmp, "0x", 2)) {
+ pp->abs_address = strtoul(pp->function, &tmp, 0);
+ if (*tmp != '\0') {
+ semantic_error("Invalid absolute address.\n");
+ return -EINVAL;
+ }
+ }
+ }
+
+ /* Parse other options */
+ while (ptr) {
+ arg = ptr;
+ c = nc;
+ if (c == ';') { /* Lazy pattern must be the last part */
+ pp->lazy_line = strdup(arg); /* let leave escapes */
+ if (pp->lazy_line == NULL)
+ return -ENOMEM;
+ break;
+ }
+ ptr = strpbrk_esc(arg, ";:+@%");
+ if (ptr) {
+ nc = *ptr;
+ *ptr++ = '\0';
+ }
+ switch (c) {
+ case ':': /* Line number */
+ pp->line = strtoul(arg, &tmp, 0);
+ if (*tmp != '\0') {
+ semantic_error("There is non-digit char"
+ " in line number.\n");
+ return -EINVAL;
+ }
+ break;
+ case '+': /* Byte offset from a symbol */
+ pp->offset = strtoul(arg, &tmp, 0);
+ if (*tmp != '\0') {
+ semantic_error("There is non-digit character"
+ " in offset.\n");
+ return -EINVAL;
+ }
+ break;
+ case '@': /* File name */
+ if (pp->file) {
+ semantic_error("SRC@SRC is not allowed.\n");
+ return -EINVAL;
+ }
+ pp->file = strdup_esc(arg);
+ if (pp->file == NULL)
+ return -ENOMEM;
+ break;
+ case '%': /* Probe places */
+ if (strcmp(arg, "return") == 0) {
+ pp->retprobe = 1;
+ } else { /* Others not supported yet */
+ semantic_error("%%%s is not supported.\n", arg);
+ return -ENOTSUP;
+ }
+ break;
+ default: /* Buggy case */
+ pr_err("This program has a bug at %s:%d.\n",
+ __FILE__, __LINE__);
+ return -ENOTSUP;
+ break;
+ }
+ }
+
+ /* Exclusion check */
+ if (pp->lazy_line && pp->line) {
+ semantic_error("Lazy pattern can't be used with"
+ " line number.\n");
+ return -EINVAL;
+ }
+
+ if (pp->lazy_line && pp->offset) {
+ semantic_error("Lazy pattern can't be used with offset.\n");
+ return -EINVAL;
+ }
+
+ if (pp->line && pp->offset) {
+ semantic_error("Offset can't be used with line number.\n");
+ return -EINVAL;
+ }
+
+ if (!pp->line && !pp->lazy_line && pp->file && !pp->function) {
+ semantic_error("File always requires line number or "
+ "lazy pattern.\n");
+ return -EINVAL;
+ }
+
+ if (pp->offset && !pp->function) {
+ semantic_error("Offset requires an entry function.\n");
+ return -EINVAL;
+ }
+
+ if ((pp->offset || pp->line || pp->lazy_line) && pp->retprobe) {
+ semantic_error("Offset/Line/Lazy pattern can't be used with "
+ "return probe.\n");
+ return -EINVAL;
+ }
+
+ pr_debug("symbol:%s file:%s line:%d offset:%lu return:%d lazy:%s\n",
+ pp->function, pp->file, pp->line, pp->offset, pp->retprobe,
+ pp->lazy_line);
+ return 0;
+}
+
+/* Parse perf-probe event argument */
+static int parse_perf_probe_arg(char *str, struct perf_probe_arg *arg)
+{
+ char *tmp, *goodname;
+ struct perf_probe_arg_field **fieldp;
+
+ pr_debug("parsing arg: %s into ", str);
+
+ tmp = strchr(str, '=');
+ if (tmp) {
+ arg->name = strndup(str, tmp - str);
+ if (arg->name == NULL)
+ return -ENOMEM;
+ pr_debug("name:%s ", arg->name);
+ str = tmp + 1;
+ }
+
+ tmp = strchr(str, ':');
+ if (tmp) { /* Type setting */
+ *tmp = '\0';
+ arg->type = strdup(tmp + 1);
+ if (arg->type == NULL)
+ return -ENOMEM;
+ pr_debug("type:%s ", arg->type);
+ }
+
+ tmp = strpbrk(str, "-.[");
+ if (!is_c_varname(str) || !tmp) {
+ /* A variable, register, symbol or special value */
+ arg->var = strdup(str);
+ if (arg->var == NULL)
+ return -ENOMEM;
+ pr_debug("%s\n", arg->var);
+ return 0;
+ }
+
+ /* Structure fields or array element */
+ arg->var = strndup(str, tmp - str);
+ if (arg->var == NULL)
+ return -ENOMEM;
+ goodname = arg->var;
+ pr_debug("%s, ", arg->var);
+ fieldp = &arg->field;
+
+ do {
+ *fieldp = zalloc(sizeof(struct perf_probe_arg_field));
+ if (*fieldp == NULL)
+ return -ENOMEM;
+ if (*tmp == '[') { /* Array */
+ str = tmp;
+ (*fieldp)->index = strtol(str + 1, &tmp, 0);
+ (*fieldp)->ref = true;
+ if (*tmp != ']' || tmp == str + 1) {
+ semantic_error("Array index must be a"
+ " number.\n");
+ return -EINVAL;
+ }
+ tmp++;
+ if (*tmp == '\0')
+ tmp = NULL;
+ } else { /* Structure */
+ if (*tmp == '.') {
+ str = tmp + 1;
+ (*fieldp)->ref = false;
+ } else if (tmp[1] == '>') {
+ str = tmp + 2;
+ (*fieldp)->ref = true;
+ } else {
+ semantic_error("Argument parse error: %s\n",
+ str);
+ return -EINVAL;
+ }
+ tmp = strpbrk(str, "-.[");
+ }
+ if (tmp) {
+ (*fieldp)->name = strndup(str, tmp - str);
+ if ((*fieldp)->name == NULL)
+ return -ENOMEM;
+ if (*str != '[')
+ goodname = (*fieldp)->name;
+ pr_debug("%s(%d), ", (*fieldp)->name, (*fieldp)->ref);
+ fieldp = &(*fieldp)->next;
+ }
+ } while (tmp);
+ (*fieldp)->name = strdup(str);
+ if ((*fieldp)->name == NULL)
+ return -ENOMEM;
+ if (*str != '[')
+ goodname = (*fieldp)->name;
+ pr_debug("%s(%d)\n", (*fieldp)->name, (*fieldp)->ref);
+
+ /* If no name is specified, set the last field name (not array index)*/
+ if (!arg->name) {
+ arg->name = strdup(goodname);
+ if (arg->name == NULL)
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+/* Parse perf-probe event command */
+int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev)
+{
+ char **argv;
+ int argc, i, ret = 0;
+
+ argv = argv_split(cmd, &argc);
+ if (!argv) {
+ pr_debug("Failed to split arguments.\n");
+ return -ENOMEM;
+ }
+ if (argc - 1 > MAX_PROBE_ARGS) {
+ semantic_error("Too many probe arguments (%d).\n", argc - 1);
+ ret = -ERANGE;
+ goto out;
+ }
+ /* Parse probe point */
+ ret = parse_perf_probe_point(argv[0], pev);
+ if (ret < 0)
+ goto out;
+
+ /* Copy arguments and ensure return probe has no C argument */
+ pev->nargs = argc - 1;
+ pev->args = zalloc(sizeof(struct perf_probe_arg) * pev->nargs);
+ if (pev->args == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < pev->nargs && ret >= 0; i++) {
+ ret = parse_perf_probe_arg(argv[i + 1], &pev->args[i]);
+ if (ret >= 0 &&
+ is_c_varname(pev->args[i].var) && pev->point.retprobe) {
+ semantic_error("You can't specify local variable for"
+ " kretprobe.\n");
+ ret = -EINVAL;
+ }
+ }
+out:
+ argv_free(argv);
+
+ return ret;
+}
+
+/* Returns true if *any* ARG is either C variable, $params or $vars. */
+bool perf_probe_with_var(struct perf_probe_event *pev)
+{
+ int i = 0;
+
+ for (i = 0; i < pev->nargs; i++)
+ if (is_c_varname(pev->args[i].var) ||
+ !strcmp(pev->args[i].var, PROBE_ARG_PARAMS) ||
+ !strcmp(pev->args[i].var, PROBE_ARG_VARS))
+ return true;
+ return false;
+}
+
+/* Return true if this perf_probe_event requires debuginfo */
+bool perf_probe_event_need_dwarf(struct perf_probe_event *pev)
+{
+ if (pev->point.file || pev->point.line || pev->point.lazy_line)
+ return true;
+
+ if (perf_probe_with_var(pev))
+ return true;
+
+ return false;
+}
+
+/* Parse probe_events event into struct probe_point */
+int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
+{
+ struct probe_trace_point *tp = &tev->point;
+ char pr;
+ char *p;
+ char *argv0_str = NULL, *fmt, *fmt1_str, *fmt2_str, *fmt3_str;
+ int ret, i, argc;
+ char **argv;
+
+ pr_debug("Parsing probe_events: %s\n", cmd);
+ argv = argv_split(cmd, &argc);
+ if (!argv) {
+ pr_debug("Failed to split arguments.\n");
+ return -ENOMEM;
+ }
+ if (argc < 2) {
+ semantic_error("Too few probe arguments.\n");
+ ret = -ERANGE;
+ goto out;
+ }
+
+ /* Scan event and group name. */
+ argv0_str = strdup(argv[0]);
+ if (argv0_str == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ fmt1_str = strtok_r(argv0_str, ":", &fmt);
+ fmt2_str = strtok_r(NULL, "/", &fmt);
+ fmt3_str = strtok_r(NULL, " \t", &fmt);
+ if (fmt1_str == NULL || fmt2_str == NULL || fmt3_str == NULL) {
+ semantic_error("Failed to parse event name: %s\n", argv[0]);
+ ret = -EINVAL;
+ goto out;
+ }
+ pr = fmt1_str[0];
+ tev->group = strdup(fmt2_str);
+ tev->event = strdup(fmt3_str);
+ if (tev->group == NULL || tev->event == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ pr_debug("Group:%s Event:%s probe:%c\n", tev->group, tev->event, pr);
+
+ tp->retprobe = (pr == 'r');
+
+ /* Scan module name(if there), function name and offset */
+ p = strchr(argv[1], ':');
+ if (p) {
+ tp->module = strndup(argv[1], p - argv[1]);
+ if (!tp->module) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ tev->uprobes = (tp->module[0] == '/');
+ p++;
+ } else
+ p = argv[1];
+ fmt1_str = strtok_r(p, "+", &fmt);
+ /* only the address started with 0x */
+ if (fmt1_str[0] == '0') {
+ /*
+ * Fix a special case:
+ * if address == 0, kernel reports something like:
+ * p:probe_libc/abs_0 /lib/libc-2.18.so:0x (null) arg1=%ax
+ * Newer kernel may fix that, but we want to
+ * support old kernel also.
+ */
+ if (strcmp(fmt1_str, "0x") == 0) {
+ if (!argv[2] || strcmp(argv[2], "(null)")) {
+ ret = -EINVAL;
+ goto out;
+ }
+ tp->address = 0;
+
+ free(argv[2]);
+ for (i = 2; argv[i + 1] != NULL; i++)
+ argv[i] = argv[i + 1];
+
+ argv[i] = NULL;
+ argc -= 1;
+ } else
+ tp->address = strtoul(fmt1_str, NULL, 0);
+ } else {
+ /* Only the symbol-based probe has offset */
+ tp->symbol = strdup(fmt1_str);
+ if (tp->symbol == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ fmt2_str = strtok_r(NULL, "", &fmt);
+ if (fmt2_str == NULL)
+ tp->offset = 0;
+ else
+ tp->offset = strtoul(fmt2_str, NULL, 10);
+ }
+
+ tev->nargs = argc - 2;
+ tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+ if (tev->args == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < tev->nargs; i++) {
+ p = strchr(argv[i + 2], '=');
+ if (p) /* We don't need which register is assigned. */
+ *p++ = '\0';
+ else
+ p = argv[i + 2];
+ tev->args[i].name = strdup(argv[i + 2]);
+ /* TODO: parse regs and offset */
+ tev->args[i].value = strdup(p);
+ if (tev->args[i].name == NULL || tev->args[i].value == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ free(argv0_str);
+ argv_free(argv);
+ return ret;
+}
+
+/* Compose only probe arg */
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa)
+{
+ struct perf_probe_arg_field *field = pa->field;
+ struct strbuf buf;
+ char *ret = NULL;
+ int err;
+
+ if (strbuf_init(&buf, 64) < 0)
+ return NULL;
+
+ if (pa->name && pa->var)
+ err = strbuf_addf(&buf, "%s=%s", pa->name, pa->var);
+ else
+ err = strbuf_addstr(&buf, pa->name ?: pa->var);
+ if (err)
+ goto out;
+
+ while (field) {
+ if (field->name[0] == '[')
+ err = strbuf_addstr(&buf, field->name);
+ else
+ err = strbuf_addf(&buf, "%s%s", field->ref ? "->" : ".",
+ field->name);
+ field = field->next;
+ if (err)
+ goto out;
+ }
+
+ if (pa->type)
+ if (strbuf_addf(&buf, ":%s", pa->type) < 0)
+ goto out;
+
+ ret = strbuf_detach(&buf, NULL);
+out:
+ strbuf_release(&buf);
+ return ret;
+}
+
+/* Compose only probe point (not argument) */
+char *synthesize_perf_probe_point(struct perf_probe_point *pp)
+{
+ struct strbuf buf;
+ char *tmp, *ret = NULL;
+ int len, err = 0;
+
+ if (strbuf_init(&buf, 64) < 0)
+ return NULL;
+
+ if (pp->function) {
+ if (strbuf_addstr(&buf, pp->function) < 0)
+ goto out;
+ if (pp->offset)
+ err = strbuf_addf(&buf, "+%lu", pp->offset);
+ else if (pp->line)
+ err = strbuf_addf(&buf, ":%d", pp->line);
+ else if (pp->retprobe)
+ err = strbuf_addstr(&buf, "%return");
+ if (err)
+ goto out;
+ }
+ if (pp->file) {
+ tmp = pp->file;
+ len = strlen(tmp);
+ if (len > 30) {
+ tmp = strchr(pp->file + len - 30, '/');
+ tmp = tmp ? tmp + 1 : pp->file + len - 30;
+ }
+ err = strbuf_addf(&buf, "@%s", tmp);
+ if (!err && !pp->function && pp->line)
+ err = strbuf_addf(&buf, ":%d", pp->line);
+ }
+ if (!err)
+ ret = strbuf_detach(&buf, NULL);
+out:
+ strbuf_release(&buf);
+ return ret;
+}
+
+char *synthesize_perf_probe_command(struct perf_probe_event *pev)
+{
+ struct strbuf buf;
+ char *tmp, *ret = NULL;
+ int i;
+
+ if (strbuf_init(&buf, 64))
+ return NULL;
+ if (pev->event)
+ if (strbuf_addf(&buf, "%s:%s=", pev->group ?: PERFPROBE_GROUP,
+ pev->event) < 0)
+ goto out;
+
+ tmp = synthesize_perf_probe_point(&pev->point);
+ if (!tmp || strbuf_addstr(&buf, tmp) < 0)
+ goto out;
+ free(tmp);
+
+ for (i = 0; i < pev->nargs; i++) {
+ tmp = synthesize_perf_probe_arg(pev->args + i);
+ if (!tmp || strbuf_addf(&buf, " %s", tmp) < 0)
+ goto out;
+ free(tmp);
+ }
+
+ ret = strbuf_detach(&buf, NULL);
+out:
+ strbuf_release(&buf);
+ return ret;
+}
+
+static int __synthesize_probe_trace_arg_ref(struct probe_trace_arg_ref *ref,
+ struct strbuf *buf, int depth)
+{
+ int err;
+ if (ref->next) {
+ depth = __synthesize_probe_trace_arg_ref(ref->next, buf,
+ depth + 1);
+ if (depth < 0)
+ return depth;
+ }
+ err = strbuf_addf(buf, "%+ld(", ref->offset);
+ return (err < 0) ? err : depth;
+}
+
+static int synthesize_probe_trace_arg(struct probe_trace_arg *arg,
+ struct strbuf *buf)
+{
+ struct probe_trace_arg_ref *ref = arg->ref;
+ int depth = 0, err;
+
+ /* Argument name or separator */
+ if (arg->name)
+ err = strbuf_addf(buf, " %s=", arg->name);
+ else
+ err = strbuf_addch(buf, ' ');
+ if (err)
+ return err;
+
+ /* Special case: @XXX */
+ if (arg->value[0] == '@' && arg->ref)
+ ref = ref->next;
+
+ /* Dereferencing arguments */
+ if (ref) {
+ depth = __synthesize_probe_trace_arg_ref(ref, buf, 1);
+ if (depth < 0)
+ return depth;
+ }
+
+ /* Print argument value */
+ if (arg->value[0] == '@' && arg->ref)
+ err = strbuf_addf(buf, "%s%+ld", arg->value, arg->ref->offset);
+ else
+ err = strbuf_addstr(buf, arg->value);
+
+ /* Closing */
+ while (!err && depth--)
+ err = strbuf_addch(buf, ')');
+
+ /* Print argument type */
+ if (!err && arg->type)
+ err = strbuf_addf(buf, ":%s", arg->type);
+
+ return err;
+}
+
+char *synthesize_probe_trace_command(struct probe_trace_event *tev)
+{
+ struct probe_trace_point *tp = &tev->point;
+ struct strbuf buf;
+ char *ret = NULL;
+ int i, err;
+
+ /* Uprobes must have tp->module */
+ if (tev->uprobes && !tp->module)
+ return NULL;
+
+ if (strbuf_init(&buf, 32) < 0)
+ return NULL;
+
+ if (strbuf_addf(&buf, "%c:%s/%s ", tp->retprobe ? 'r' : 'p',
+ tev->group, tev->event) < 0)
+ goto error;
+ /*
+ * If tp->address == 0, then this point must be a
+ * absolute address uprobe.
+ * try_to_find_absolute_address() should have made
+ * tp->symbol to "0x0".
+ */
+ if (tev->uprobes && !tp->address) {
+ if (!tp->symbol || strcmp(tp->symbol, "0x0"))
+ goto error;
+ }
+
+ /* Use the tp->address for uprobes */
+ if (tev->uprobes)
+ err = strbuf_addf(&buf, "%s:0x%lx", tp->module, tp->address);
+ else if (!strncmp(tp->symbol, "0x", 2))
+ /* Absolute address. See try_to_find_absolute_address() */
+ err = strbuf_addf(&buf, "%s%s0x%lx", tp->module ?: "",
+ tp->module ? ":" : "", tp->address);
+ else
+ err = strbuf_addf(&buf, "%s%s%s+%lu", tp->module ?: "",
+ tp->module ? ":" : "", tp->symbol, tp->offset);
+ if (err)
+ goto error;
+
+ for (i = 0; i < tev->nargs; i++)
+ if (synthesize_probe_trace_arg(&tev->args[i], &buf) < 0)
+ goto error;
+
+ ret = strbuf_detach(&buf, NULL);
+error:
+ strbuf_release(&buf);
+ return ret;
+}
+
+static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
+ struct perf_probe_point *pp,
+ bool is_kprobe)
+{
+ struct symbol *sym = NULL;
+ struct map *map = NULL;
+ u64 addr = tp->address;
+ int ret = -ENOENT;
+
+ if (!is_kprobe) {
+ map = dso__new_map(tp->module);
+ if (!map)
+ goto out;
+ sym = map__find_symbol(map, addr);
+ } else {
+ if (tp->symbol && !addr) {
+ if (kernel_get_symbol_address_by_name(tp->symbol,
+ &addr, true, false) < 0)
+ goto out;
+ }
+ if (addr) {
+ addr += tp->offset;
+ sym = machine__find_kernel_symbol(host_machine, addr, &map);
+ }
+ }
+
+ if (!sym)
+ goto out;
+
+ pp->retprobe = tp->retprobe;
+ pp->offset = addr - map->unmap_ip(map, sym->start);
+ pp->function = strdup(sym->name);
+ ret = pp->function ? 0 : -ENOMEM;
+
+out:
+ if (map && !is_kprobe) {
+ map__put(map);
+ }
+
+ return ret;
+}
+
+static int convert_to_perf_probe_point(struct probe_trace_point *tp,
+ struct perf_probe_point *pp,
+ bool is_kprobe)
+{
+ char buf[128];
+ int ret;
+
+ ret = find_perf_probe_point_from_dwarf(tp, pp, is_kprobe);
+ if (!ret)
+ return 0;
+ ret = find_perf_probe_point_from_map(tp, pp, is_kprobe);
+ if (!ret)
+ return 0;
+
+ pr_debug("Failed to find probe point from both of dwarf and map.\n");
+
+ if (tp->symbol) {
+ pp->function = strdup(tp->symbol);
+ pp->offset = tp->offset;
+ } else {
+ ret = e_snprintf(buf, 128, "0x%" PRIx64, (u64)tp->address);
+ if (ret < 0)
+ return ret;
+ pp->function = strdup(buf);
+ pp->offset = 0;
+ }
+ if (pp->function == NULL)
+ return -ENOMEM;
+
+ pp->retprobe = tp->retprobe;
+
+ return 0;
+}
+
+static int convert_to_perf_probe_event(struct probe_trace_event *tev,
+ struct perf_probe_event *pev, bool is_kprobe)
+{
+ struct strbuf buf = STRBUF_INIT;
+ int i, ret;
+
+ /* Convert event/group name */
+ pev->event = strdup(tev->event);
+ pev->group = strdup(tev->group);
+ if (pev->event == NULL || pev->group == NULL)
+ return -ENOMEM;
+
+ /* Convert trace_point to probe_point */
+ ret = convert_to_perf_probe_point(&tev->point, &pev->point, is_kprobe);
+ if (ret < 0)
+ return ret;
+
+ /* Convert trace_arg to probe_arg */
+ pev->nargs = tev->nargs;
+ pev->args = zalloc(sizeof(struct perf_probe_arg) * pev->nargs);
+ if (pev->args == NULL)
+ return -ENOMEM;
+ for (i = 0; i < tev->nargs && ret >= 0; i++) {
+ if (tev->args[i].name)
+ pev->args[i].name = strdup(tev->args[i].name);
+ else {
+ if ((ret = strbuf_init(&buf, 32)) < 0)
+ goto error;
+ ret = synthesize_probe_trace_arg(&tev->args[i], &buf);
+ pev->args[i].name = strbuf_detach(&buf, NULL);
+ }
+ if (pev->args[i].name == NULL && ret >= 0)
+ ret = -ENOMEM;
+ }
+error:
+ if (ret < 0)
+ clear_perf_probe_event(pev);
+
+ return ret;
+}
+
+void clear_perf_probe_event(struct perf_probe_event *pev)
+{
+ struct perf_probe_arg_field *field, *next;
+ int i;
+
+ free(pev->event);
+ free(pev->group);
+ free(pev->target);
+ clear_perf_probe_point(&pev->point);
+
+ for (i = 0; i < pev->nargs; i++) {
+ free(pev->args[i].name);
+ free(pev->args[i].var);
+ free(pev->args[i].type);
+ field = pev->args[i].field;
+ while (field) {
+ next = field->next;
+ zfree(&field->name);
+ free(field);
+ field = next;
+ }
+ }
+ free(pev->args);
+ memset(pev, 0, sizeof(*pev));
+}
+
+#define strdup_or_goto(str, label) \
+({ char *__p = NULL; if (str && !(__p = strdup(str))) goto label; __p; })
+
+static int perf_probe_point__copy(struct perf_probe_point *dst,
+ struct perf_probe_point *src)
+{
+ dst->file = strdup_or_goto(src->file, out_err);
+ dst->function = strdup_or_goto(src->function, out_err);
+ dst->lazy_line = strdup_or_goto(src->lazy_line, out_err);
+ dst->line = src->line;
+ dst->retprobe = src->retprobe;
+ dst->offset = src->offset;
+ return 0;
+
+out_err:
+ clear_perf_probe_point(dst);
+ return -ENOMEM;
+}
+
+static int perf_probe_arg__copy(struct perf_probe_arg *dst,
+ struct perf_probe_arg *src)
+{
+ struct perf_probe_arg_field *field, **ppfield;
+
+ dst->name = strdup_or_goto(src->name, out_err);
+ dst->var = strdup_or_goto(src->var, out_err);
+ dst->type = strdup_or_goto(src->type, out_err);
+
+ field = src->field;
+ ppfield = &(dst->field);
+ while (field) {
+ *ppfield = zalloc(sizeof(*field));
+ if (!*ppfield)
+ goto out_err;
+ (*ppfield)->name = strdup_or_goto(field->name, out_err);
+ (*ppfield)->index = field->index;
+ (*ppfield)->ref = field->ref;
+ field = field->next;
+ ppfield = &((*ppfield)->next);
+ }
+ return 0;
+out_err:
+ return -ENOMEM;
+}
+
+int perf_probe_event__copy(struct perf_probe_event *dst,
+ struct perf_probe_event *src)
+{
+ int i;
+
+ dst->event = strdup_or_goto(src->event, out_err);
+ dst->group = strdup_or_goto(src->group, out_err);
+ dst->target = strdup_or_goto(src->target, out_err);
+ dst->uprobes = src->uprobes;
+
+ if (perf_probe_point__copy(&dst->point, &src->point) < 0)
+ goto out_err;
+
+ dst->args = zalloc(sizeof(struct perf_probe_arg) * src->nargs);
+ if (!dst->args)
+ goto out_err;
+ dst->nargs = src->nargs;
+
+ for (i = 0; i < src->nargs; i++)
+ if (perf_probe_arg__copy(&dst->args[i], &src->args[i]) < 0)
+ goto out_err;
+ return 0;
+
+out_err:
+ clear_perf_probe_event(dst);
+ return -ENOMEM;
+}
+
+void clear_probe_trace_event(struct probe_trace_event *tev)
+{
+ struct probe_trace_arg_ref *ref, *next;
+ int i;
+
+ free(tev->event);
+ free(tev->group);
+ free(tev->point.symbol);
+ free(tev->point.realname);
+ free(tev->point.module);
+ for (i = 0; i < tev->nargs; i++) {
+ free(tev->args[i].name);
+ free(tev->args[i].value);
+ free(tev->args[i].type);
+ ref = tev->args[i].ref;
+ while (ref) {
+ next = ref->next;
+ free(ref);
+ ref = next;
+ }
+ }
+ free(tev->args);
+ memset(tev, 0, sizeof(*tev));
+}
+
+struct kprobe_blacklist_node {
+ struct list_head list;
+ unsigned long start;
+ unsigned long end;
+ char *symbol;
+};
+
+static void kprobe_blacklist__delete(struct list_head *blacklist)
+{
+ struct kprobe_blacklist_node *node;
+
+ while (!list_empty(blacklist)) {
+ node = list_first_entry(blacklist,
+ struct kprobe_blacklist_node, list);
+ list_del(&node->list);
+ free(node->symbol);
+ free(node);
+ }
+}
+
+static int kprobe_blacklist__load(struct list_head *blacklist)
+{
+ struct kprobe_blacklist_node *node;
+ const char *__debugfs = debugfs__mountpoint();
+ char buf[PATH_MAX], *p;
+ FILE *fp;
+ int ret;
+
+ if (__debugfs == NULL)
+ return -ENOTSUP;
+
+ ret = e_snprintf(buf, PATH_MAX, "%s/kprobes/blacklist", __debugfs);
+ if (ret < 0)
+ return ret;
+
+ fp = fopen(buf, "r");
+ if (!fp)
+ return -errno;
+
+ ret = 0;
+ while (fgets(buf, PATH_MAX, fp)) {
+ node = zalloc(sizeof(*node));
+ if (!node) {
+ ret = -ENOMEM;
+ break;
+ }
+ INIT_LIST_HEAD(&node->list);
+ list_add_tail(&node->list, blacklist);
+ if (sscanf(buf, "0x%lx-0x%lx", &node->start, &node->end) != 2) {
+ ret = -EINVAL;
+ break;
+ }
+ p = strchr(buf, '\t');
+ if (p) {
+ p++;
+ if (p[strlen(p) - 1] == '\n')
+ p[strlen(p) - 1] = '\0';
+ } else
+ p = (char *)"unknown";
+ node->symbol = strdup(p);
+ if (!node->symbol) {
+ ret = -ENOMEM;
+ break;
+ }
+ pr_debug2("Blacklist: 0x%lx-0x%lx, %s\n",
+ node->start, node->end, node->symbol);
+ ret++;
+ }
+ if (ret < 0)
+ kprobe_blacklist__delete(blacklist);
+ fclose(fp);
+
+ return ret;
+}
+
+static struct kprobe_blacklist_node *
+kprobe_blacklist__find_by_address(struct list_head *blacklist,
+ unsigned long address)
+{
+ struct kprobe_blacklist_node *node;
+
+ list_for_each_entry(node, blacklist, list) {
+ if (node->start <= address && address < node->end)
+ return node;
+ }
+
+ return NULL;
+}
+
+static LIST_HEAD(kprobe_blacklist);
+
+static void kprobe_blacklist__init(void)
+{
+ if (!list_empty(&kprobe_blacklist))
+ return;
+
+ if (kprobe_blacklist__load(&kprobe_blacklist) < 0)
+ pr_debug("No kprobe blacklist support, ignored\n");
+}
+
+static void kprobe_blacklist__release(void)
+{
+ kprobe_blacklist__delete(&kprobe_blacklist);
+}
+
+static bool kprobe_blacklist__listed(unsigned long address)
+{
+ return !!kprobe_blacklist__find_by_address(&kprobe_blacklist, address);
+}
+
+static int perf_probe_event__sprintf(const char *group, const char *event,
+ struct perf_probe_event *pev,
+ const char *module,
+ struct strbuf *result)
+{
+ int i, ret;
+ char *buf;
+
+ if (asprintf(&buf, "%s:%s", group, event) < 0)
+ return -errno;
+ ret = strbuf_addf(result, " %-20s (on ", buf);
+ free(buf);
+ if (ret)
+ return ret;
+
+ /* Synthesize only event probe point */
+ buf = synthesize_perf_probe_point(&pev->point);
+ if (!buf)
+ return -ENOMEM;
+ ret = strbuf_addstr(result, buf);
+ free(buf);
+
+ if (!ret && module)
+ ret = strbuf_addf(result, " in %s", module);
+
+ if (!ret && pev->nargs > 0) {
+ ret = strbuf_add(result, " with", 5);
+ for (i = 0; !ret && i < pev->nargs; i++) {
+ buf = synthesize_perf_probe_arg(&pev->args[i]);
+ if (!buf)
+ return -ENOMEM;
+ ret = strbuf_addf(result, " %s", buf);
+ free(buf);
+ }
+ }
+ if (!ret)
+ ret = strbuf_addch(result, ')');
+
+ return ret;
+}
+
+/* Show an event */
+int show_perf_probe_event(const char *group, const char *event,
+ struct perf_probe_event *pev,
+ const char *module, bool use_stdout)
+{
+ struct strbuf buf = STRBUF_INIT;
+ int ret;
+
+ ret = perf_probe_event__sprintf(group, event, pev, module, &buf);
+ if (ret >= 0) {
+ if (use_stdout)
+ printf("%s\n", buf.buf);
+ else
+ pr_info("%s\n", buf.buf);
+ }
+ strbuf_release(&buf);
+
+ return ret;
+}
+
+static bool filter_probe_trace_event(struct probe_trace_event *tev,
+ struct strfilter *filter)
+{
+ char tmp[128];
+
+ /* At first, check the event name itself */
+ if (strfilter__compare(filter, tev->event))
+ return true;
+
+ /* Next, check the combination of name and group */
+ if (e_snprintf(tmp, 128, "%s:%s", tev->group, tev->event) < 0)
+ return false;
+ return strfilter__compare(filter, tmp);
+}
+
+static int __show_perf_probe_events(int fd, bool is_kprobe,
+ struct strfilter *filter)
+{
+ int ret = 0;
+ struct probe_trace_event tev;
+ struct perf_probe_event pev;
+ struct strlist *rawlist;
+ struct str_node *ent;
+
+ memset(&tev, 0, sizeof(tev));
+ memset(&pev, 0, sizeof(pev));
+
+ rawlist = probe_file__get_rawlist(fd);
+ if (!rawlist)
+ return -ENOMEM;
+
+ strlist__for_each_entry(ent, rawlist) {
+ ret = parse_probe_trace_command(ent->s, &tev);
+ if (ret >= 0) {
+ if (!filter_probe_trace_event(&tev, filter))
+ goto next;
+ ret = convert_to_perf_probe_event(&tev, &pev,
+ is_kprobe);
+ if (ret < 0)
+ goto next;
+ ret = show_perf_probe_event(pev.group, pev.event,
+ &pev, tev.point.module,
+ true);
+ }
+next:
+ clear_perf_probe_event(&pev);
+ clear_probe_trace_event(&tev);
+ if (ret < 0)
+ break;
+ }
+ strlist__delete(rawlist);
+ /* Cleanup cached debuginfo if needed */
+ debuginfo_cache__exit();
+
+ return ret;
+}
+
+/* List up current perf-probe events */
+int show_perf_probe_events(struct strfilter *filter)
+{
+ int kp_fd, up_fd, ret;
+
+ setup_pager();
+
+ if (probe_conf.cache)
+ return probe_cache__show_all_caches(filter);
+
+ ret = init_probe_symbol_maps(false);
+ if (ret < 0)
+ return ret;
+
+ ret = probe_file__open_both(&kp_fd, &up_fd, 0);
+ if (ret < 0)
+ return ret;
+
+ if (kp_fd >= 0)
+ ret = __show_perf_probe_events(kp_fd, true, filter);
+ if (up_fd >= 0 && ret >= 0)
+ ret = __show_perf_probe_events(up_fd, false, filter);
+ if (kp_fd > 0)
+ close(kp_fd);
+ if (up_fd > 0)
+ close(up_fd);
+ exit_probe_symbol_maps();
+
+ return ret;
+}
+
+static int get_new_event_name(char *buf, size_t len, const char *base,
+ struct strlist *namelist, bool ret_event,
+ bool allow_suffix)
+{
+ int i, ret;
+ char *p, *nbase;
+
+ if (*base == '.')
+ base++;
+ nbase = strdup(base);
+ if (!nbase)
+ return -ENOMEM;
+
+ /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */
+ p = strpbrk(nbase, ".@");
+ if (p && p != nbase)
+ *p = '\0';
+
+ /* Try no suffix number */
+ ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : "");
+ if (ret < 0) {
+ pr_debug("snprintf() failed: %d\n", ret);
+ goto out;
+ }
+ if (!strlist__has_entry(namelist, buf))
+ goto out;
+
+ if (!allow_suffix) {
+ pr_warning("Error: event \"%s\" already exists.\n"
+ " Hint: Remove existing event by 'perf probe -d'\n"
+ " or force duplicates by 'perf probe -f'\n"
+ " or set 'force=yes' in BPF source.\n",
+ buf);
+ ret = -EEXIST;
+ goto out;
+ }
+
+ /* Try to add suffix */
+ for (i = 1; i < MAX_EVENT_INDEX; i++) {
+ ret = e_snprintf(buf, len, "%s_%d", nbase, i);
+ if (ret < 0) {
+ pr_debug("snprintf() failed: %d\n", ret);
+ goto out;
+ }
+ if (!strlist__has_entry(namelist, buf))
+ break;
+ }
+ if (i == MAX_EVENT_INDEX) {
+ pr_warning("Too many events are on the same function.\n");
+ ret = -ERANGE;
+ }
+
+out:
+ free(nbase);
+
+ /* Final validation */
+ if (ret >= 0 && !is_c_func_name(buf)) {
+ pr_warning("Internal error: \"%s\" is an invalid event name.\n",
+ buf);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+/* Warn if the current kernel's uprobe implementation is old */
+static void warn_uprobe_event_compat(struct probe_trace_event *tev)
+{
+ int i;
+ char *buf = synthesize_probe_trace_command(tev);
+
+ /* Old uprobe event doesn't support memory dereference */
+ if (!tev->uprobes || tev->nargs == 0 || !buf)
+ goto out;
+
+ for (i = 0; i < tev->nargs; i++)
+ if (strglobmatch(tev->args[i].value, "[$@+-]*")) {
+ pr_warning("Please upgrade your kernel to at least "
+ "3.14 to have access to feature %s\n",
+ tev->args[i].value);
+ break;
+ }
+out:
+ free(buf);
+}
+
+/* Set new name from original perf_probe_event and namelist */
+static int probe_trace_event__set_name(struct probe_trace_event *tev,
+ struct perf_probe_event *pev,
+ struct strlist *namelist,
+ bool allow_suffix)
+{
+ const char *event, *group;
+ char buf[64];
+ int ret;
+
+ /* If probe_event or trace_event already have the name, reuse it */
+ if (pev->event && !pev->sdt)
+ event = pev->event;
+ else if (tev->event)
+ event = tev->event;
+ else {
+ /* Or generate new one from probe point */
+ if (pev->point.function &&
+ (strncmp(pev->point.function, "0x", 2) != 0) &&
+ !strisglob(pev->point.function))
+ event = pev->point.function;
+ else
+ event = tev->point.realname;
+ }
+ if (pev->group && !pev->sdt)
+ group = pev->group;
+ else if (tev->group)
+ group = tev->group;
+ else
+ group = PERFPROBE_GROUP;
+
+ /* Get an unused new event name */
+ ret = get_new_event_name(buf, 64, event, namelist,
+ tev->point.retprobe, allow_suffix);
+ if (ret < 0)
+ return ret;
+
+ event = buf;
+
+ tev->event = strdup(event);
+ tev->group = strdup(group);
+ if (tev->event == NULL || tev->group == NULL)
+ return -ENOMEM;
+
+ /* Add added event name to namelist */
+ strlist__add(namelist, event);
+ return 0;
+}
+
+static int __open_probe_file_and_namelist(bool uprobe,
+ struct strlist **namelist)
+{
+ int fd;
+
+ fd = probe_file__open(PF_FL_RW | (uprobe ? PF_FL_UPROBE : 0));
+ if (fd < 0)
+ return fd;
+
+ /* Get current event names */
+ *namelist = probe_file__get_namelist(fd);
+ if (!(*namelist)) {
+ pr_debug("Failed to get current event list.\n");
+ close(fd);
+ return -ENOMEM;
+ }
+ return fd;
+}
+
+static int __add_probe_trace_events(struct perf_probe_event *pev,
+ struct probe_trace_event *tevs,
+ int ntevs, bool allow_suffix)
+{
+ int i, fd[2] = {-1, -1}, up, ret;
+ struct probe_trace_event *tev = NULL;
+ struct probe_cache *cache = NULL;
+ struct strlist *namelist[2] = {NULL, NULL};
+ struct nscookie nsc;
+
+ up = pev->uprobes ? 1 : 0;
+ fd[up] = __open_probe_file_and_namelist(up, &namelist[up]);
+ if (fd[up] < 0)
+ return fd[up];
+
+ ret = 0;
+ for (i = 0; i < ntevs; i++) {
+ tev = &tevs[i];
+ up = tev->uprobes ? 1 : 0;
+ if (fd[up] == -1) { /* Open the kprobe/uprobe_events */
+ fd[up] = __open_probe_file_and_namelist(up,
+ &namelist[up]);
+ if (fd[up] < 0)
+ goto close_out;
+ }
+ /* Skip if the symbol is out of .text or blacklisted */
+ if (!tev->point.symbol && !pev->uprobes)
+ continue;
+
+ /* Set new name for tev (and update namelist) */
+ ret = probe_trace_event__set_name(tev, pev, namelist[up],
+ allow_suffix);
+ if (ret < 0)
+ break;
+
+ nsinfo__mountns_enter(pev->nsi, &nsc);
+ ret = probe_file__add_event(fd[up], tev);
+ nsinfo__mountns_exit(&nsc);
+ if (ret < 0)
+ break;
+
+ /*
+ * Probes after the first probe which comes from same
+ * user input are always allowed to add suffix, because
+ * there might be several addresses corresponding to
+ * one code line.
+ */
+ allow_suffix = true;
+ }
+ if (ret == -EINVAL && pev->uprobes)
+ warn_uprobe_event_compat(tev);
+ if (ret == 0 && probe_conf.cache) {
+ cache = probe_cache__new(pev->target, pev->nsi);
+ if (!cache ||
+ probe_cache__add_entry(cache, pev, tevs, ntevs) < 0 ||
+ probe_cache__commit(cache) < 0)
+ pr_warning("Failed to add event to probe cache\n");
+ probe_cache__delete(cache);
+ }
+
+close_out:
+ for (up = 0; up < 2; up++) {
+ strlist__delete(namelist[up]);
+ if (fd[up] >= 0)
+ close(fd[up]);
+ }
+ return ret;
+}
+
+static int find_probe_functions(struct map *map, char *name,
+ struct symbol **syms)
+{
+ int found = 0;
+ struct symbol *sym;
+ struct rb_node *tmp;
+ const char *norm, *ver;
+ char *buf = NULL;
+ bool cut_version = true;
+
+ if (map__load(map) < 0)
+ return 0;
+
+ /* If user gives a version, don't cut off the version from symbols */
+ if (strchr(name, '@'))
+ cut_version = false;
+
+ map__for_each_symbol(map, sym, tmp) {
+ norm = arch__normalize_symbol_name(sym->name);
+ if (!norm)
+ continue;
+
+ if (cut_version) {
+ /* We don't care about default symbol or not */
+ ver = strchr(norm, '@');
+ if (ver) {
+ buf = strndup(norm, ver - norm);
+ if (!buf)
+ return -ENOMEM;
+ norm = buf;
+ }
+ }
+
+ if (strglobmatch(norm, name)) {
+ found++;
+ if (syms && found < probe_conf.max_probes)
+ syms[found - 1] = sym;
+ }
+ if (buf)
+ zfree(&buf);
+ }
+
+ return found;
+}
+
+void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
+ struct probe_trace_event *tev __maybe_unused,
+ struct map *map __maybe_unused,
+ struct symbol *sym __maybe_unused) { }
+
+/*
+ * Find probe function addresses from map.
+ * Return an error or the number of found probe_trace_event
+ */
+static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ struct map *map = NULL;
+ struct ref_reloc_sym *reloc_sym = NULL;
+ struct symbol *sym;
+ struct symbol **syms = NULL;
+ struct probe_trace_event *tev;
+ struct perf_probe_point *pp = &pev->point;
+ struct probe_trace_point *tp;
+ int num_matched_functions;
+ int ret, i, j, skipped = 0;
+ char *mod_name;
+
+ map = get_target_map(pev->target, pev->nsi, pev->uprobes);
+ if (!map) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ syms = malloc(sizeof(struct symbol *) * probe_conf.max_probes);
+ if (!syms) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /*
+ * Load matched symbols: Since the different local symbols may have
+ * same name but different addresses, this lists all the symbols.
+ */
+ num_matched_functions = find_probe_functions(map, pp->function, syms);
+ if (num_matched_functions <= 0) {
+ pr_err("Failed to find symbol %s in %s\n", pp->function,
+ pev->target ? : "kernel");
+ ret = -ENOENT;
+ goto out;
+ } else if (num_matched_functions > probe_conf.max_probes) {
+ pr_err("Too many functions matched in %s\n",
+ pev->target ? : "kernel");
+ ret = -E2BIG;
+ goto out;
+ }
+
+ /* Note that the symbols in the kmodule are not relocated */
+ if (!pev->uprobes && !pev->target &&
+ (!pp->retprobe || kretprobe_offset_is_supported())) {
+ reloc_sym = kernel_get_ref_reloc_sym(NULL);
+ if (!reloc_sym) {
+ pr_warning("Relocated base symbol is not found!\n");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ /* Setup result trace-probe-events */
+ *tevs = zalloc(sizeof(*tev) * num_matched_functions);
+ if (!*tevs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = 0;
+
+ for (j = 0; j < num_matched_functions; j++) {
+ sym = syms[j];
+
+ if (sym->type != STT_FUNC)
+ continue;
+
+ tev = (*tevs) + ret;
+ tp = &tev->point;
+ if (ret == num_matched_functions) {
+ pr_warning("Too many symbols are listed. Skip it.\n");
+ break;
+ }
+ ret++;
+
+ if (pp->offset > sym->end - sym->start) {
+ pr_warning("Offset %ld is bigger than the size of %s\n",
+ pp->offset, sym->name);
+ ret = -ENOENT;
+ goto err_out;
+ }
+ /* Add one probe point */
+ tp->address = map->unmap_ip(map, sym->start) + pp->offset;
+
+ /* Check the kprobe (not in module) is within .text */
+ if (!pev->uprobes && !pev->target &&
+ kprobe_warn_out_range(sym->name, tp->address)) {
+ tp->symbol = NULL; /* Skip it */
+ skipped++;
+ } else if (reloc_sym) {
+ tp->symbol = strdup_or_goto(reloc_sym->name, nomem_out);
+ tp->offset = tp->address - reloc_sym->addr;
+ } else {
+ tp->symbol = strdup_or_goto(sym->name, nomem_out);
+ tp->offset = pp->offset;
+ }
+ tp->realname = strdup_or_goto(sym->name, nomem_out);
+
+ tp->retprobe = pp->retprobe;
+ if (pev->target) {
+ if (pev->uprobes) {
+ tev->point.module = strdup_or_goto(pev->target,
+ nomem_out);
+ } else {
+ mod_name = find_module_name(pev->target);
+ tev->point.module =
+ strdup(mod_name ? mod_name : pev->target);
+ free(mod_name);
+ if (!tev->point.module)
+ goto nomem_out;
+ }
+ }
+ tev->uprobes = pev->uprobes;
+ tev->nargs = pev->nargs;
+ if (tev->nargs) {
+ tev->args = zalloc(sizeof(struct probe_trace_arg) *
+ tev->nargs);
+ if (tev->args == NULL)
+ goto nomem_out;
+ }
+ for (i = 0; i < tev->nargs; i++) {
+ if (pev->args[i].name)
+ tev->args[i].name =
+ strdup_or_goto(pev->args[i].name,
+ nomem_out);
+
+ tev->args[i].value = strdup_or_goto(pev->args[i].var,
+ nomem_out);
+ if (pev->args[i].type)
+ tev->args[i].type =
+ strdup_or_goto(pev->args[i].type,
+ nomem_out);
+ }
+ arch__fix_tev_from_maps(pev, tev, map, sym);
+ }
+ if (ret == skipped) {
+ ret = -ENOENT;
+ goto err_out;
+ }
+
+out:
+ map__put(map);
+ free(syms);
+ return ret;
+
+nomem_out:
+ ret = -ENOMEM;
+err_out:
+ clear_probe_trace_events(*tevs, num_matched_functions);
+ zfree(tevs);
+ goto out;
+}
+
+static int try_to_find_absolute_address(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ struct perf_probe_point *pp = &pev->point;
+ struct probe_trace_event *tev;
+ struct probe_trace_point *tp;
+ int i, err;
+
+ if (!(pev->point.function && !strncmp(pev->point.function, "0x", 2)))
+ return -EINVAL;
+ if (perf_probe_event_need_dwarf(pev))
+ return -EINVAL;
+
+ /*
+ * This is 'perf probe /lib/libc.so 0xabcd'. Try to probe at
+ * absolute address.
+ *
+ * Only one tev can be generated by this.
+ */
+ *tevs = zalloc(sizeof(*tev));
+ if (!*tevs)
+ return -ENOMEM;
+
+ tev = *tevs;
+ tp = &tev->point;
+
+ /*
+ * Don't use tp->offset, use address directly, because
+ * in synthesize_probe_trace_command() address cannot be
+ * zero.
+ */
+ tp->address = pev->point.abs_address;
+ tp->retprobe = pp->retprobe;
+ tev->uprobes = pev->uprobes;
+
+ err = -ENOMEM;
+ /*
+ * Give it a '0x' leading symbol name.
+ * In __add_probe_trace_events, a NULL symbol is interpreted as
+ * invalud.
+ */
+ if (asprintf(&tp->symbol, "0x%lx", tp->address) < 0)
+ goto errout;
+
+ /* For kprobe, check range */
+ if ((!tev->uprobes) &&
+ (kprobe_warn_out_range(tev->point.symbol,
+ tev->point.address))) {
+ err = -EACCES;
+ goto errout;
+ }
+
+ if (asprintf(&tp->realname, "abs_%lx", tp->address) < 0)
+ goto errout;
+
+ if (pev->target) {
+ tp->module = strdup(pev->target);
+ if (!tp->module)
+ goto errout;
+ }
+
+ if (tev->group) {
+ tev->group = strdup(pev->group);
+ if (!tev->group)
+ goto errout;
+ }
+
+ if (pev->event) {
+ tev->event = strdup(pev->event);
+ if (!tev->event)
+ goto errout;
+ }
+
+ tev->nargs = pev->nargs;
+ tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+ if (!tev->args)
+ goto errout;
+
+ for (i = 0; i < tev->nargs; i++)
+ copy_to_probe_trace_arg(&tev->args[i], &pev->args[i]);
+
+ return 1;
+
+errout:
+ clear_probe_trace_events(*tevs, 1);
+ *tevs = NULL;
+ return err;
+}
+
+/* Concatinate two arrays */
+static void *memcat(void *a, size_t sz_a, void *b, size_t sz_b)
+{
+ void *ret;
+
+ ret = malloc(sz_a + sz_b);
+ if (ret) {
+ memcpy(ret, a, sz_a);
+ memcpy(ret + sz_a, b, sz_b);
+ }
+ return ret;
+}
+
+static int
+concat_probe_trace_events(struct probe_trace_event **tevs, int *ntevs,
+ struct probe_trace_event **tevs2, int ntevs2)
+{
+ struct probe_trace_event *new_tevs;
+ int ret = 0;
+
+ if (*ntevs == 0) {
+ *tevs = *tevs2;
+ *ntevs = ntevs2;
+ *tevs2 = NULL;
+ return 0;
+ }
+
+ if (*ntevs + ntevs2 > probe_conf.max_probes)
+ ret = -E2BIG;
+ else {
+ /* Concatinate the array of probe_trace_event */
+ new_tevs = memcat(*tevs, (*ntevs) * sizeof(**tevs),
+ *tevs2, ntevs2 * sizeof(**tevs2));
+ if (!new_tevs)
+ ret = -ENOMEM;
+ else {
+ free(*tevs);
+ *tevs = new_tevs;
+ *ntevs += ntevs2;
+ }
+ }
+ if (ret < 0)
+ clear_probe_trace_events(*tevs2, ntevs2);
+ zfree(tevs2);
+
+ return ret;
+}
+
+/*
+ * Try to find probe_trace_event from given probe caches. Return the number
+ * of cached events found, if an error occurs return the error.
+ */
+static int find_cached_events(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs,
+ const char *target)
+{
+ struct probe_cache *cache;
+ struct probe_cache_entry *entry;
+ struct probe_trace_event *tmp_tevs = NULL;
+ int ntevs = 0;
+ int ret = 0;
+
+ cache = probe_cache__new(target, pev->nsi);
+ /* Return 0 ("not found") if the target has no probe cache. */
+ if (!cache)
+ return 0;
+
+ for_each_probe_cache_entry(entry, cache) {
+ /* Skip the cache entry which has no name */
+ if (!entry->pev.event || !entry->pev.group)
+ continue;
+ if ((!pev->group || strglobmatch(entry->pev.group, pev->group)) &&
+ strglobmatch(entry->pev.event, pev->event)) {
+ ret = probe_cache_entry__get_event(entry, &tmp_tevs);
+ if (ret > 0)
+ ret = concat_probe_trace_events(tevs, &ntevs,
+ &tmp_tevs, ret);
+ if (ret < 0)
+ break;
+ }
+ }
+ probe_cache__delete(cache);
+ if (ret < 0) {
+ clear_probe_trace_events(*tevs, ntevs);
+ zfree(tevs);
+ } else {
+ ret = ntevs;
+ if (ntevs > 0 && target && target[0] == '/')
+ pev->uprobes = true;
+ }
+
+ return ret;
+}
+
+/* Try to find probe_trace_event from all probe caches */
+static int find_cached_events_all(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ struct probe_trace_event *tmp_tevs = NULL;
+ struct strlist *bidlist;
+ struct str_node *nd;
+ char *pathname;
+ int ntevs = 0;
+ int ret;
+
+ /* Get the buildid list of all valid caches */
+ bidlist = build_id_cache__list_all(true);
+ if (!bidlist) {
+ ret = -errno;
+ pr_debug("Failed to get buildids: %d\n", ret);
+ return ret;
+ }
+
+ ret = 0;
+ strlist__for_each_entry(nd, bidlist) {
+ pathname = build_id_cache__origname(nd->s);
+ ret = find_cached_events(pev, &tmp_tevs, pathname);
+ /* In the case of cnt == 0, we just skip it */
+ if (ret > 0)
+ ret = concat_probe_trace_events(tevs, &ntevs,
+ &tmp_tevs, ret);
+ free(pathname);
+ if (ret < 0)
+ break;
+ }
+ strlist__delete(bidlist);
+
+ if (ret < 0) {
+ clear_probe_trace_events(*tevs, ntevs);
+ zfree(tevs);
+ } else
+ ret = ntevs;
+
+ return ret;
+}
+
+static int find_probe_trace_events_from_cache(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ struct probe_cache *cache;
+ struct probe_cache_entry *entry;
+ struct probe_trace_event *tev;
+ struct str_node *node;
+ int ret, i;
+
+ if (pev->sdt) {
+ /* For SDT/cached events, we use special search functions */
+ if (!pev->target)
+ return find_cached_events_all(pev, tevs);
+ else
+ return find_cached_events(pev, tevs, pev->target);
+ }
+ cache = probe_cache__new(pev->target, pev->nsi);
+ if (!cache)
+ return 0;
+
+ entry = probe_cache__find(cache, pev);
+ if (!entry) {
+ /* SDT must be in the cache */
+ ret = pev->sdt ? -ENOENT : 0;
+ goto out;
+ }
+
+ ret = strlist__nr_entries(entry->tevlist);
+ if (ret > probe_conf.max_probes) {
+ pr_debug("Too many entries matched in the cache of %s\n",
+ pev->target ? : "kernel");
+ ret = -E2BIG;
+ goto out;
+ }
+
+ *tevs = zalloc(ret * sizeof(*tev));
+ if (!*tevs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ i = 0;
+ strlist__for_each_entry(node, entry->tevlist) {
+ tev = &(*tevs)[i++];
+ ret = parse_probe_trace_command(node->s, tev);
+ if (ret < 0)
+ goto out;
+ /* Set the uprobes attribute as same as original */
+ tev->uprobes = pev->uprobes;
+ }
+ ret = i;
+
+out:
+ probe_cache__delete(cache);
+ return ret;
+}
+
+static int convert_to_probe_trace_events(struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ int ret;
+
+ if (!pev->group && !pev->sdt) {
+ /* Set group name if not given */
+ if (!pev->uprobes) {
+ pev->group = strdup(PERFPROBE_GROUP);
+ ret = pev->group ? 0 : -ENOMEM;
+ } else
+ ret = convert_exec_to_group(pev->target, &pev->group);
+ if (ret != 0) {
+ pr_warning("Failed to make a group name.\n");
+ return ret;
+ }
+ }
+
+ ret = try_to_find_absolute_address(pev, tevs);
+ if (ret > 0)
+ return ret;
+
+ /* At first, we need to lookup cache entry */
+ ret = find_probe_trace_events_from_cache(pev, tevs);
+ if (ret > 0 || pev->sdt) /* SDT can be found only in the cache */
+ return ret == 0 ? -ENOENT : ret; /* Found in probe cache */
+
+ /* Convert perf_probe_event with debuginfo */
+ ret = try_to_find_probe_trace_events(pev, tevs);
+ if (ret != 0)
+ return ret; /* Found in debuginfo or got an error */
+
+ return find_probe_trace_events_from_map(pev, tevs);
+}
+
+int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+ int i, ret;
+
+ /* Loop 1: convert all events */
+ for (i = 0; i < npevs; i++) {
+ /* Init kprobe blacklist if needed */
+ if (!pevs[i].uprobes)
+ kprobe_blacklist__init();
+ /* Convert with or without debuginfo */
+ ret = convert_to_probe_trace_events(&pevs[i], &pevs[i].tevs);
+ if (ret < 0)
+ return ret;
+ pevs[i].ntevs = ret;
+ }
+ /* This just release blacklist only if allocated */
+ kprobe_blacklist__release();
+
+ return 0;
+}
+
+static int show_probe_trace_event(struct probe_trace_event *tev)
+{
+ char *buf = synthesize_probe_trace_command(tev);
+
+ if (!buf) {
+ pr_debug("Failed to synthesize probe trace event.\n");
+ return -EINVAL;
+ }
+
+ /* Showing definition always go stdout */
+ printf("%s\n", buf);
+ free(buf);
+
+ return 0;
+}
+
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs)
+{
+ struct strlist *namelist = strlist__new(NULL, NULL);
+ struct probe_trace_event *tev;
+ struct perf_probe_event *pev;
+ int i, j, ret = 0;
+
+ if (!namelist)
+ return -ENOMEM;
+
+ for (j = 0; j < npevs && !ret; j++) {
+ pev = &pevs[j];
+ for (i = 0; i < pev->ntevs && !ret; i++) {
+ tev = &pev->tevs[i];
+ /* Skip if the symbol is out of .text or blacklisted */
+ if (!tev->point.symbol && !pev->uprobes)
+ continue;
+
+ /* Set new name for tev (and update namelist) */
+ ret = probe_trace_event__set_name(tev, pev,
+ namelist, true);
+ if (!ret)
+ ret = show_probe_trace_event(tev);
+ }
+ }
+ strlist__delete(namelist);
+
+ return ret;
+}
+
+int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+ int i, ret = 0;
+
+ /* Loop 2: add all events */
+ for (i = 0; i < npevs; i++) {
+ ret = __add_probe_trace_events(&pevs[i], pevs[i].tevs,
+ pevs[i].ntevs,
+ probe_conf.force_add);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+
+void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+ int i, j;
+ struct perf_probe_event *pev;
+
+ /* Loop 3: cleanup and free trace events */
+ for (i = 0; i < npevs; i++) {
+ pev = &pevs[i];
+ for (j = 0; j < pevs[i].ntevs; j++)
+ clear_probe_trace_event(&pevs[i].tevs[j]);
+ zfree(&pevs[i].tevs);
+ pevs[i].ntevs = 0;
+ nsinfo__zput(pev->nsi);
+ clear_perf_probe_event(&pevs[i]);
+ }
+}
+
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs)
+{
+ int ret;
+
+ ret = init_probe_symbol_maps(pevs->uprobes);
+ if (ret < 0)
+ return ret;
+
+ ret = convert_perf_probe_events(pevs, npevs);
+ if (ret == 0)
+ ret = apply_perf_probe_events(pevs, npevs);
+
+ cleanup_perf_probe_events(pevs, npevs);
+
+ exit_probe_symbol_maps();
+ return ret;
+}
+
+int del_perf_probe_events(struct strfilter *filter)
+{
+ int ret, ret2, ufd = -1, kfd = -1;
+ char *str = strfilter__string(filter);
+
+ if (!str)
+ return -EINVAL;
+
+ /* Get current event names */
+ ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW);
+ if (ret < 0)
+ goto out;
+
+ ret = probe_file__del_events(kfd, filter);
+ if (ret < 0 && ret != -ENOENT)
+ goto error;
+
+ ret2 = probe_file__del_events(ufd, filter);
+ if (ret2 < 0 && ret2 != -ENOENT) {
+ ret = ret2;
+ goto error;
+ }
+ ret = 0;
+
+error:
+ if (kfd >= 0)
+ close(kfd);
+ if (ufd >= 0)
+ close(ufd);
+out:
+ free(str);
+
+ return ret;
+}
+
+int show_available_funcs(const char *target, struct nsinfo *nsi,
+ struct strfilter *_filter, bool user)
+{
+ struct rb_node *nd;
+ struct map *map;
+ int ret;
+
+ ret = init_probe_symbol_maps(user);
+ if (ret < 0)
+ return ret;
+
+ /* Get a symbol map */
+ map = get_target_map(target, nsi, user);
+ if (!map) {
+ pr_err("Failed to get a map for %s\n", (target) ? : "kernel");
+ return -EINVAL;
+ }
+
+ ret = map__load(map);
+ if (ret) {
+ if (ret == -2) {
+ char *str = strfilter__string(_filter);
+ pr_err("Failed to find symbols matched to \"%s\"\n",
+ str);
+ free(str);
+ } else
+ pr_err("Failed to load symbols in %s\n",
+ (target) ? : "kernel");
+ goto end;
+ }
+ if (!dso__sorted_by_name(map->dso))
+ dso__sort_by_name(map->dso);
+
+ /* Show all (filtered) symbols */
+ setup_pager();
+
+ for (nd = rb_first(&map->dso->symbol_names); nd; nd = rb_next(nd)) {
+ struct symbol_name_rb_node *pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
+
+ if (strfilter__compare(_filter, pos->sym.name))
+ printf("%s\n", pos->sym.name);
+ }
+end:
+ map__put(map);
+ exit_probe_symbol_maps();
+
+ return ret;
+}
+
+int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
+ struct perf_probe_arg *pvar)
+{
+ tvar->value = strdup(pvar->var);
+ if (tvar->value == NULL)
+ return -ENOMEM;
+ if (pvar->type) {
+ tvar->type = strdup(pvar->type);
+ if (tvar->type == NULL)
+ return -ENOMEM;
+ }
+ if (pvar->name) {
+ tvar->name = strdup(pvar->name);
+ if (tvar->name == NULL)
+ return -ENOMEM;
+ } else
+ tvar->name = NULL;
+ return 0;
+}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
new file mode 100644
index 000000000..45b14f020
--- /dev/null
+++ b/tools/perf/util/probe-event.h
@@ -0,0 +1,193 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PROBE_EVENT_H
+#define _PROBE_EVENT_H
+
+#include <linux/compiler.h>
+#include <stdbool.h>
+#include "intlist.h"
+#include "namespaces.h"
+
+/* Probe related configurations */
+struct probe_conf {
+ bool show_ext_vars;
+ bool show_location_range;
+ bool force_add;
+ bool no_inlines;
+ bool cache;
+ int max_probes;
+};
+extern struct probe_conf probe_conf;
+extern bool probe_event_dry_run;
+
+struct symbol;
+
+/* kprobe-tracer and uprobe-tracer tracing point */
+struct probe_trace_point {
+ char *realname; /* function real name (if needed) */
+ char *symbol; /* Base symbol */
+ char *module; /* Module name */
+ unsigned long offset; /* Offset from symbol */
+ unsigned long address; /* Actual address of the trace point */
+ bool retprobe; /* Return probe flag */
+};
+
+/* probe-tracer tracing argument referencing offset */
+struct probe_trace_arg_ref {
+ struct probe_trace_arg_ref *next; /* Next reference */
+ long offset; /* Offset value */
+};
+
+/* kprobe-tracer and uprobe-tracer tracing argument */
+struct probe_trace_arg {
+ char *name; /* Argument name */
+ char *value; /* Base value */
+ char *type; /* Type name */
+ struct probe_trace_arg_ref *ref; /* Referencing offset */
+};
+
+/* kprobe-tracer and uprobe-tracer tracing event (point + arg) */
+struct probe_trace_event {
+ char *event; /* Event name */
+ char *group; /* Group name */
+ struct probe_trace_point point; /* Trace point */
+ int nargs; /* Number of args */
+ bool uprobes; /* uprobes only */
+ struct probe_trace_arg *args; /* Arguments */
+};
+
+/* Perf probe probing point */
+struct perf_probe_point {
+ char *file; /* File path */
+ char *function; /* Function name */
+ int line; /* Line number */
+ bool retprobe; /* Return probe flag */
+ char *lazy_line; /* Lazy matching pattern */
+ unsigned long offset; /* Offset from function entry */
+ unsigned long abs_address; /* Absolute address of the point */
+};
+
+/* Perf probe probing argument field chain */
+struct perf_probe_arg_field {
+ struct perf_probe_arg_field *next; /* Next field */
+ char *name; /* Name of the field */
+ long index; /* Array index number */
+ bool ref; /* Referencing flag */
+};
+
+/* Perf probe probing argument */
+struct perf_probe_arg {
+ char *name; /* Argument name */
+ char *var; /* Variable name */
+ char *type; /* Type name */
+ struct perf_probe_arg_field *field; /* Structure fields */
+};
+
+/* Perf probe probing event (point + arg) */
+struct perf_probe_event {
+ char *event; /* Event name */
+ char *group; /* Group name */
+ struct perf_probe_point point; /* Probe point */
+ int nargs; /* Number of arguments */
+ bool sdt; /* SDT/cached event flag */
+ bool uprobes; /* Uprobe event flag */
+ char *target; /* Target binary */
+ struct perf_probe_arg *args; /* Arguments */
+ struct probe_trace_event *tevs;
+ int ntevs;
+ struct nsinfo *nsi; /* Target namespace */
+};
+
+/* Line range */
+struct line_range {
+ char *file; /* File name */
+ char *function; /* Function name */
+ int start; /* Start line number */
+ int end; /* End line number */
+ int offset; /* Start line offset */
+ char *path; /* Real path name */
+ char *comp_dir; /* Compile directory */
+ struct intlist *line_list; /* Visible lines */
+};
+
+struct strlist;
+
+/* List of variables */
+struct variable_list {
+ struct probe_trace_point point; /* Actual probepoint */
+ struct strlist *vars; /* Available variables */
+};
+
+struct map;
+int init_probe_symbol_maps(bool user_only);
+void exit_probe_symbol_maps(void);
+
+/* Command string to events */
+int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev);
+int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev);
+
+/* Events to command string */
+char *synthesize_perf_probe_command(struct perf_probe_event *pev);
+char *synthesize_probe_trace_command(struct probe_trace_event *tev);
+char *synthesize_perf_probe_arg(struct perf_probe_arg *pa);
+char *synthesize_perf_probe_point(struct perf_probe_point *pp);
+
+int perf_probe_event__copy(struct perf_probe_event *dst,
+ struct perf_probe_event *src);
+
+bool perf_probe_with_var(struct perf_probe_event *pev);
+
+/* Check the perf_probe_event needs debuginfo */
+bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
+
+/* Release event contents */
+void clear_perf_probe_event(struct perf_probe_event *pev);
+void clear_probe_trace_event(struct probe_trace_event *tev);
+
+/* Command string to line-range */
+int parse_line_range_desc(const char *cmd, struct line_range *lr);
+
+/* Release line range members */
+void line_range__clear(struct line_range *lr);
+
+/* Initialize line range */
+int line_range__init(struct line_range *lr);
+
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int show_probe_trace_events(struct perf_probe_event *pevs, int npevs);
+void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+
+struct strfilter;
+
+int del_perf_probe_events(struct strfilter *filter);
+
+int show_perf_probe_event(const char *group, const char *event,
+ struct perf_probe_event *pev,
+ const char *module, bool use_stdout);
+int show_perf_probe_events(struct strfilter *filter);
+int show_line_range(struct line_range *lr, const char *module,
+ struct nsinfo *nsi, bool user);
+int show_available_vars(struct perf_probe_event *pevs, int npevs,
+ struct strfilter *filter);
+int show_available_funcs(const char *module, struct nsinfo *nsi,
+ struct strfilter *filter, bool user);
+void arch__fix_tev_from_maps(struct perf_probe_event *pev,
+ struct probe_trace_event *tev, struct map *map,
+ struct symbol *sym);
+
+/* If there is no space to write, returns -E2BIG. */
+int e_snprintf(char *str, size_t size, const char *format, ...) __printf(3, 4);
+
+/* Maximum index number of event-name postfix */
+#define MAX_EVENT_INDEX 1024
+
+int copy_to_probe_trace_arg(struct probe_trace_arg *tvar,
+ struct perf_probe_arg *pvar);
+
+struct map *get_target_map(const char *target, struct nsinfo *nsi, bool user);
+
+void arch__post_process_probe_trace_events(struct perf_probe_event *pev,
+ int ntevs);
+
+#endif /*_PROBE_EVENT_H */
diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c
new file mode 100644
index 000000000..b67ae3b8d
--- /dev/null
+++ b/tools/perf/util/probe-file.c
@@ -0,0 +1,1066 @@
+/*
+ * probe-file.c : operate ftrace k/uprobe events files
+ *
+ * Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include "util.h"
+#include "event.h"
+#include "strlist.h"
+#include "strfilter.h"
+#include "debug.h"
+#include "cache.h"
+#include "color.h"
+#include "symbol.h"
+#include "thread.h"
+#include <api/fs/tracing_path.h>
+#include "probe-event.h"
+#include "probe-file.h"
+#include "session.h"
+#include "perf_regs.h"
+#include "string2.h"
+
+/* 4096 - 2 ('\n' + '\0') */
+#define MAX_CMDLEN 4094
+
+static void print_open_warning(int err, bool uprobe)
+{
+ char sbuf[STRERR_BUFSIZE];
+
+ if (err == -ENOENT) {
+ const char *config;
+
+ if (uprobe)
+ config = "CONFIG_UPROBE_EVENTS";
+ else
+ config = "CONFIG_KPROBE_EVENTS";
+
+ pr_warning("%cprobe_events file does not exist"
+ " - please rebuild kernel with %s.\n",
+ uprobe ? 'u' : 'k', config);
+ } else if (err == -ENOTSUP)
+ pr_warning("Tracefs or debugfs is not mounted.\n");
+ else
+ pr_warning("Failed to open %cprobe_events: %s\n",
+ uprobe ? 'u' : 'k',
+ str_error_r(-err, sbuf, sizeof(sbuf)));
+}
+
+static void print_both_open_warning(int kerr, int uerr)
+{
+ /* Both kprobes and uprobes are disabled, warn it. */
+ if (kerr == -ENOTSUP && uerr == -ENOTSUP)
+ pr_warning("Tracefs or debugfs is not mounted.\n");
+ else if (kerr == -ENOENT && uerr == -ENOENT)
+ pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS "
+ "or/and CONFIG_UPROBE_EVENTS.\n");
+ else {
+ char sbuf[STRERR_BUFSIZE];
+ pr_warning("Failed to open kprobe events: %s.\n",
+ str_error_r(-kerr, sbuf, sizeof(sbuf)));
+ pr_warning("Failed to open uprobe events: %s.\n",
+ str_error_r(-uerr, sbuf, sizeof(sbuf)));
+ }
+}
+
+int open_trace_file(const char *trace_file, bool readwrite)
+{
+ char buf[PATH_MAX];
+ int ret;
+
+ ret = e_snprintf(buf, PATH_MAX, "%s/%s", tracing_path_mount(), trace_file);
+ if (ret >= 0) {
+ pr_debug("Opening %s write=%d\n", buf, readwrite);
+ if (readwrite && !probe_event_dry_run)
+ ret = open(buf, O_RDWR | O_APPEND, 0);
+ else
+ ret = open(buf, O_RDONLY, 0);
+
+ if (ret < 0)
+ ret = -errno;
+ }
+ return ret;
+}
+
+static int open_kprobe_events(bool readwrite)
+{
+ return open_trace_file("kprobe_events", readwrite);
+}
+
+static int open_uprobe_events(bool readwrite)
+{
+ return open_trace_file("uprobe_events", readwrite);
+}
+
+int probe_file__open(int flag)
+{
+ int fd;
+
+ if (flag & PF_FL_UPROBE)
+ fd = open_uprobe_events(flag & PF_FL_RW);
+ else
+ fd = open_kprobe_events(flag & PF_FL_RW);
+ if (fd < 0)
+ print_open_warning(fd, flag & PF_FL_UPROBE);
+
+ return fd;
+}
+
+int probe_file__open_both(int *kfd, int *ufd, int flag)
+{
+ if (!kfd || !ufd)
+ return -EINVAL;
+
+ *kfd = open_kprobe_events(flag & PF_FL_RW);
+ *ufd = open_uprobe_events(flag & PF_FL_RW);
+ if (*kfd < 0 && *ufd < 0) {
+ print_both_open_warning(*kfd, *ufd);
+ return *kfd;
+ }
+
+ return 0;
+}
+
+/* Get raw string list of current kprobe_events or uprobe_events */
+struct strlist *probe_file__get_rawlist(int fd)
+{
+ int ret, idx, fddup;
+ FILE *fp;
+ char buf[MAX_CMDLEN];
+ char *p;
+ struct strlist *sl;
+
+ if (fd < 0)
+ return NULL;
+
+ sl = strlist__new(NULL, NULL);
+ if (sl == NULL)
+ return NULL;
+
+ fddup = dup(fd);
+ if (fddup < 0)
+ goto out_free_sl;
+
+ fp = fdopen(fddup, "r");
+ if (!fp)
+ goto out_close_fddup;
+
+ while (!feof(fp)) {
+ p = fgets(buf, MAX_CMDLEN, fp);
+ if (!p)
+ break;
+
+ idx = strlen(p) - 1;
+ if (p[idx] == '\n')
+ p[idx] = '\0';
+ ret = strlist__add(sl, buf);
+ if (ret < 0) {
+ pr_debug("strlist__add failed (%d)\n", ret);
+ goto out_close_fp;
+ }
+ }
+ fclose(fp);
+
+ return sl;
+
+out_close_fp:
+ fclose(fp);
+ goto out_free_sl;
+out_close_fddup:
+ close(fddup);
+out_free_sl:
+ strlist__delete(sl);
+ return NULL;
+}
+
+static struct strlist *__probe_file__get_namelist(int fd, bool include_group)
+{
+ char buf[128];
+ struct strlist *sl, *rawlist;
+ struct str_node *ent;
+ struct probe_trace_event tev;
+ int ret = 0;
+
+ memset(&tev, 0, sizeof(tev));
+ rawlist = probe_file__get_rawlist(fd);
+ if (!rawlist)
+ return NULL;
+ sl = strlist__new(NULL, NULL);
+ strlist__for_each_entry(ent, rawlist) {
+ ret = parse_probe_trace_command(ent->s, &tev);
+ if (ret < 0)
+ break;
+ if (include_group) {
+ ret = e_snprintf(buf, 128, "%s:%s", tev.group,
+ tev.event);
+ if (ret >= 0)
+ ret = strlist__add(sl, buf);
+ } else
+ ret = strlist__add(sl, tev.event);
+ clear_probe_trace_event(&tev);
+ if (ret < 0)
+ break;
+ }
+ strlist__delete(rawlist);
+
+ if (ret < 0) {
+ strlist__delete(sl);
+ return NULL;
+ }
+ return sl;
+}
+
+/* Get current perf-probe event names */
+struct strlist *probe_file__get_namelist(int fd)
+{
+ return __probe_file__get_namelist(fd, false);
+}
+
+int probe_file__add_event(int fd, struct probe_trace_event *tev)
+{
+ int ret = 0;
+ char *buf = synthesize_probe_trace_command(tev);
+ char sbuf[STRERR_BUFSIZE];
+
+ if (!buf) {
+ pr_debug("Failed to synthesize probe trace event.\n");
+ return -EINVAL;
+ }
+
+ pr_debug("Writing event: %s\n", buf);
+ if (!probe_event_dry_run) {
+ if (write(fd, buf, strlen(buf)) < (int)strlen(buf)) {
+ ret = -errno;
+ pr_warning("Failed to write event: %s\n",
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ }
+ }
+ free(buf);
+
+ return ret;
+}
+
+static int __del_trace_probe_event(int fd, struct str_node *ent)
+{
+ char *p;
+ char buf[128];
+ int ret;
+
+ /* Convert from perf-probe event to trace-probe event */
+ ret = e_snprintf(buf, 128, "-:%s", ent->s);
+ if (ret < 0)
+ goto error;
+
+ p = strchr(buf + 2, ':');
+ if (!p) {
+ pr_debug("Internal error: %s should have ':' but not.\n",
+ ent->s);
+ ret = -ENOTSUP;
+ goto error;
+ }
+ *p = '/';
+
+ pr_debug("Writing event: %s\n", buf);
+ ret = write(fd, buf, strlen(buf));
+ if (ret < 0) {
+ ret = -errno;
+ goto error;
+ }
+
+ return 0;
+error:
+ pr_warning("Failed to delete event: %s\n",
+ str_error_r(-ret, buf, sizeof(buf)));
+ return ret;
+}
+
+int probe_file__get_events(int fd, struct strfilter *filter,
+ struct strlist *plist)
+{
+ struct strlist *namelist;
+ struct str_node *ent;
+ const char *p;
+ int ret = -ENOENT;
+
+ if (!plist)
+ return -EINVAL;
+
+ namelist = __probe_file__get_namelist(fd, true);
+ if (!namelist)
+ return -ENOENT;
+
+ strlist__for_each_entry(ent, namelist) {
+ p = strchr(ent->s, ':');
+ if ((p && strfilter__compare(filter, p + 1)) ||
+ strfilter__compare(filter, ent->s)) {
+ strlist__add(plist, ent->s);
+ ret = 0;
+ }
+ }
+ strlist__delete(namelist);
+
+ return ret;
+}
+
+int probe_file__del_strlist(int fd, struct strlist *namelist)
+{
+ int ret = 0;
+ struct str_node *ent;
+
+ strlist__for_each_entry(ent, namelist) {
+ ret = __del_trace_probe_event(fd, ent);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+
+int probe_file__del_events(int fd, struct strfilter *filter)
+{
+ struct strlist *namelist;
+ int ret;
+
+ namelist = strlist__new(NULL, NULL);
+ if (!namelist)
+ return -ENOMEM;
+
+ ret = probe_file__get_events(fd, filter, namelist);
+ if (ret < 0)
+ goto out;
+
+ ret = probe_file__del_strlist(fd, namelist);
+out:
+ strlist__delete(namelist);
+ return ret;
+}
+
+/* Caller must ensure to remove this entry from list */
+static void probe_cache_entry__delete(struct probe_cache_entry *entry)
+{
+ if (entry) {
+ BUG_ON(!list_empty(&entry->node));
+
+ strlist__delete(entry->tevlist);
+ clear_perf_probe_event(&entry->pev);
+ zfree(&entry->spev);
+ free(entry);
+ }
+}
+
+static struct probe_cache_entry *
+probe_cache_entry__new(struct perf_probe_event *pev)
+{
+ struct probe_cache_entry *entry = zalloc(sizeof(*entry));
+
+ if (entry) {
+ INIT_LIST_HEAD(&entry->node);
+ entry->tevlist = strlist__new(NULL, NULL);
+ if (!entry->tevlist)
+ zfree(&entry);
+ else if (pev) {
+ entry->spev = synthesize_perf_probe_command(pev);
+ if (!entry->spev ||
+ perf_probe_event__copy(&entry->pev, pev) < 0) {
+ probe_cache_entry__delete(entry);
+ return NULL;
+ }
+ }
+ }
+
+ return entry;
+}
+
+int probe_cache_entry__get_event(struct probe_cache_entry *entry,
+ struct probe_trace_event **tevs)
+{
+ struct probe_trace_event *tev;
+ struct str_node *node;
+ int ret, i;
+
+ ret = strlist__nr_entries(entry->tevlist);
+ if (ret > probe_conf.max_probes)
+ return -E2BIG;
+
+ *tevs = zalloc(ret * sizeof(*tev));
+ if (!*tevs)
+ return -ENOMEM;
+
+ i = 0;
+ strlist__for_each_entry(node, entry->tevlist) {
+ tev = &(*tevs)[i++];
+ ret = parse_probe_trace_command(node->s, tev);
+ if (ret < 0)
+ break;
+ }
+ return i;
+}
+
+/* For the kernel probe caches, pass target = NULL or DSO__NAME_KALLSYMS */
+static int probe_cache__open(struct probe_cache *pcache, const char *target,
+ struct nsinfo *nsi)
+{
+ char cpath[PATH_MAX];
+ char sbuildid[SBUILD_ID_SIZE];
+ char *dir_name = NULL;
+ bool is_kallsyms = false;
+ int ret, fd;
+ struct nscookie nsc;
+
+ if (target && build_id_cache__cached(target)) {
+ /* This is a cached buildid */
+ strlcpy(sbuildid, target, SBUILD_ID_SIZE);
+ dir_name = build_id_cache__linkname(sbuildid, NULL, 0);
+ goto found;
+ }
+
+ if (!target || !strcmp(target, DSO__NAME_KALLSYMS)) {
+ target = DSO__NAME_KALLSYMS;
+ is_kallsyms = true;
+ ret = sysfs__sprintf_build_id("/", sbuildid);
+ } else {
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = filename__sprintf_build_id(target, sbuildid);
+ nsinfo__mountns_exit(&nsc);
+ }
+
+ if (ret < 0) {
+ pr_debug("Failed to get build-id from %s.\n", target);
+ return ret;
+ }
+
+ /* If we have no buildid cache, make it */
+ if (!build_id_cache__cached(sbuildid)) {
+ ret = build_id_cache__add_s(sbuildid, target, nsi,
+ is_kallsyms, NULL);
+ if (ret < 0) {
+ pr_debug("Failed to add build-id cache: %s\n", target);
+ return ret;
+ }
+ }
+
+ dir_name = build_id_cache__cachedir(sbuildid, target, nsi, is_kallsyms,
+ false);
+found:
+ if (!dir_name) {
+ pr_debug("Failed to get cache from %s\n", target);
+ return -ENOMEM;
+ }
+
+ snprintf(cpath, PATH_MAX, "%s/probes", dir_name);
+ fd = open(cpath, O_CREAT | O_RDWR, 0644);
+ if (fd < 0)
+ pr_debug("Failed to open cache(%d): %s\n", fd, cpath);
+ free(dir_name);
+ pcache->fd = fd;
+
+ return fd;
+}
+
+static int probe_cache__load(struct probe_cache *pcache)
+{
+ struct probe_cache_entry *entry = NULL;
+ char buf[MAX_CMDLEN], *p;
+ int ret = 0, fddup;
+ FILE *fp;
+
+ fddup = dup(pcache->fd);
+ if (fddup < 0)
+ return -errno;
+ fp = fdopen(fddup, "r");
+ if (!fp) {
+ close(fddup);
+ return -EINVAL;
+ }
+
+ while (!feof(fp)) {
+ if (!fgets(buf, MAX_CMDLEN, fp))
+ break;
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+ /* #perf_probe_event or %sdt_event */
+ if (buf[0] == '#' || buf[0] == '%') {
+ entry = probe_cache_entry__new(NULL);
+ if (!entry) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (buf[0] == '%')
+ entry->sdt = true;
+ entry->spev = strdup(buf + 1);
+ if (entry->spev)
+ ret = parse_perf_probe_command(buf + 1,
+ &entry->pev);
+ else
+ ret = -ENOMEM;
+ if (ret < 0) {
+ probe_cache_entry__delete(entry);
+ goto out;
+ }
+ list_add_tail(&entry->node, &pcache->entries);
+ } else { /* trace_probe_event */
+ if (!entry) {
+ ret = -EINVAL;
+ goto out;
+ }
+ strlist__add(entry->tevlist, buf);
+ }
+ }
+out:
+ fclose(fp);
+ return ret;
+}
+
+static struct probe_cache *probe_cache__alloc(void)
+{
+ struct probe_cache *pcache = zalloc(sizeof(*pcache));
+
+ if (pcache) {
+ INIT_LIST_HEAD(&pcache->entries);
+ pcache->fd = -EINVAL;
+ }
+ return pcache;
+}
+
+void probe_cache__purge(struct probe_cache *pcache)
+{
+ struct probe_cache_entry *entry, *n;
+
+ list_for_each_entry_safe(entry, n, &pcache->entries, node) {
+ list_del_init(&entry->node);
+ probe_cache_entry__delete(entry);
+ }
+}
+
+void probe_cache__delete(struct probe_cache *pcache)
+{
+ if (!pcache)
+ return;
+
+ probe_cache__purge(pcache);
+ if (pcache->fd > 0)
+ close(pcache->fd);
+ free(pcache);
+}
+
+struct probe_cache *probe_cache__new(const char *target, struct nsinfo *nsi)
+{
+ struct probe_cache *pcache = probe_cache__alloc();
+ int ret;
+
+ if (!pcache)
+ return NULL;
+
+ ret = probe_cache__open(pcache, target, nsi);
+ if (ret < 0) {
+ pr_debug("Cache open error: %d\n", ret);
+ goto out_err;
+ }
+
+ ret = probe_cache__load(pcache);
+ if (ret < 0) {
+ pr_debug("Cache read error: %d\n", ret);
+ goto out_err;
+ }
+
+ return pcache;
+
+out_err:
+ probe_cache__delete(pcache);
+ return NULL;
+}
+
+static bool streql(const char *a, const char *b)
+{
+ if (a == b)
+ return true;
+
+ if (!a || !b)
+ return false;
+
+ return !strcmp(a, b);
+}
+
+struct probe_cache_entry *
+probe_cache__find(struct probe_cache *pcache, struct perf_probe_event *pev)
+{
+ struct probe_cache_entry *entry = NULL;
+ char *cmd = synthesize_perf_probe_command(pev);
+
+ if (!cmd)
+ return NULL;
+
+ for_each_probe_cache_entry(entry, pcache) {
+ if (pev->sdt) {
+ if (entry->pev.event &&
+ streql(entry->pev.event, pev->event) &&
+ (!pev->group ||
+ streql(entry->pev.group, pev->group)))
+ goto found;
+
+ continue;
+ }
+ /* Hit if same event name or same command-string */
+ if ((pev->event &&
+ (streql(entry->pev.group, pev->group) &&
+ streql(entry->pev.event, pev->event))) ||
+ (!strcmp(entry->spev, cmd)))
+ goto found;
+ }
+ entry = NULL;
+
+found:
+ free(cmd);
+ return entry;
+}
+
+struct probe_cache_entry *
+probe_cache__find_by_name(struct probe_cache *pcache,
+ const char *group, const char *event)
+{
+ struct probe_cache_entry *entry = NULL;
+
+ for_each_probe_cache_entry(entry, pcache) {
+ /* Hit if same event name or same command-string */
+ if (streql(entry->pev.group, group) &&
+ streql(entry->pev.event, event))
+ goto found;
+ }
+ entry = NULL;
+
+found:
+ return entry;
+}
+
+int probe_cache__add_entry(struct probe_cache *pcache,
+ struct perf_probe_event *pev,
+ struct probe_trace_event *tevs, int ntevs)
+{
+ struct probe_cache_entry *entry = NULL;
+ char *command;
+ int i, ret = 0;
+
+ if (!pcache || !pev || !tevs || ntevs <= 0) {
+ ret = -EINVAL;
+ goto out_err;
+ }
+
+ /* Remove old cache entry */
+ entry = probe_cache__find(pcache, pev);
+ if (entry) {
+ list_del_init(&entry->node);
+ probe_cache_entry__delete(entry);
+ }
+
+ ret = -ENOMEM;
+ entry = probe_cache_entry__new(pev);
+ if (!entry)
+ goto out_err;
+
+ for (i = 0; i < ntevs; i++) {
+ if (!tevs[i].point.symbol)
+ continue;
+
+ command = synthesize_probe_trace_command(&tevs[i]);
+ if (!command)
+ goto out_err;
+ strlist__add(entry->tevlist, command);
+ free(command);
+ }
+ list_add_tail(&entry->node, &pcache->entries);
+ pr_debug("Added probe cache: %d\n", ntevs);
+ return 0;
+
+out_err:
+ pr_debug("Failed to add probe caches\n");
+ probe_cache_entry__delete(entry);
+ return ret;
+}
+
+#ifdef HAVE_GELF_GETNOTE_SUPPORT
+static unsigned long long sdt_note__get_addr(struct sdt_note *note)
+{
+ return note->bit32 ? (unsigned long long)note->addr.a32[0]
+ : (unsigned long long)note->addr.a64[0];
+}
+
+static const char * const type_to_suffix[] = {
+ ":s64", "", "", "", ":s32", "", ":s16", ":s8",
+ "", ":u8", ":u16", "", ":u32", "", "", "", ":u64"
+};
+
+/*
+ * Isolate the string number and convert it into a decimal value;
+ * this will be an index to get suffix of the uprobe name (defining
+ * the type)
+ */
+static int sdt_arg_parse_size(char *n_ptr, const char **suffix)
+{
+ long type_idx;
+
+ type_idx = strtol(n_ptr, NULL, 10);
+ if (type_idx < -8 || type_idx > 8) {
+ pr_debug4("Failed to get a valid sdt type\n");
+ return -1;
+ }
+
+ *suffix = type_to_suffix[type_idx + 8];
+ return 0;
+}
+
+static int synthesize_sdt_probe_arg(struct strbuf *buf, int i, const char *arg)
+{
+ char *op, *desc = strdup(arg), *new_op = NULL;
+ const char *suffix = "";
+ int ret = -1;
+
+ if (desc == NULL) {
+ pr_debug4("Allocation error\n");
+ return ret;
+ }
+
+ /*
+ * Argument is in N@OP format. N is size of the argument and OP is
+ * the actual assembly operand. N can be omitted; in that case
+ * argument is just OP(without @).
+ */
+ op = strchr(desc, '@');
+ if (op) {
+ op[0] = '\0';
+ op++;
+
+ if (sdt_arg_parse_size(desc, &suffix))
+ goto error;
+ } else {
+ op = desc;
+ }
+
+ ret = arch_sdt_arg_parse_op(op, &new_op);
+
+ if (ret < 0)
+ goto error;
+
+ if (ret == SDT_ARG_VALID) {
+ ret = strbuf_addf(buf, " arg%d=%s%s", i + 1, new_op, suffix);
+ if (ret < 0)
+ goto error;
+ }
+
+ ret = 0;
+error:
+ free(desc);
+ free(new_op);
+ return ret;
+}
+
+static char *synthesize_sdt_probe_command(struct sdt_note *note,
+ const char *pathname,
+ const char *sdtgrp)
+{
+ struct strbuf buf;
+ char *ret = NULL, **args;
+ int i, args_count;
+
+ if (strbuf_init(&buf, 32) < 0)
+ return NULL;
+
+ if (strbuf_addf(&buf, "p:%s/%s %s:0x%llx",
+ sdtgrp, note->name, pathname,
+ sdt_note__get_addr(note)) < 0)
+ goto error;
+
+ if (!note->args)
+ goto out;
+
+ if (note->args) {
+ args = argv_split(note->args, &args_count);
+
+ for (i = 0; i < args_count; ++i) {
+ if (synthesize_sdt_probe_arg(&buf, i, args[i]) < 0)
+ goto error;
+ }
+ }
+
+out:
+ ret = strbuf_detach(&buf, NULL);
+error:
+ strbuf_release(&buf);
+ return ret;
+}
+
+int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname)
+{
+ struct probe_cache_entry *entry = NULL;
+ struct list_head sdtlist;
+ struct sdt_note *note;
+ char *buf;
+ char sdtgrp[64];
+ int ret;
+
+ INIT_LIST_HEAD(&sdtlist);
+ ret = get_sdt_note_list(&sdtlist, pathname);
+ if (ret < 0) {
+ pr_debug4("Failed to get sdt note: %d\n", ret);
+ return ret;
+ }
+ list_for_each_entry(note, &sdtlist, note_list) {
+ ret = snprintf(sdtgrp, 64, "sdt_%s", note->provider);
+ if (ret < 0)
+ break;
+ /* Try to find same-name entry */
+ entry = probe_cache__find_by_name(pcache, sdtgrp, note->name);
+ if (!entry) {
+ entry = probe_cache_entry__new(NULL);
+ if (!entry) {
+ ret = -ENOMEM;
+ break;
+ }
+ entry->sdt = true;
+ ret = asprintf(&entry->spev, "%s:%s=%s", sdtgrp,
+ note->name, note->name);
+ if (ret < 0)
+ break;
+ entry->pev.event = strdup(note->name);
+ entry->pev.group = strdup(sdtgrp);
+ list_add_tail(&entry->node, &pcache->entries);
+ }
+ buf = synthesize_sdt_probe_command(note, pathname, sdtgrp);
+ if (!buf) {
+ ret = -ENOMEM;
+ break;
+ }
+
+ strlist__add(entry->tevlist, buf);
+ free(buf);
+ entry = NULL;
+ }
+ if (entry) {
+ list_del_init(&entry->node);
+ probe_cache_entry__delete(entry);
+ }
+ cleanup_sdt_note_list(&sdtlist);
+ return ret;
+}
+#endif
+
+static int probe_cache_entry__write(struct probe_cache_entry *entry, int fd)
+{
+ struct str_node *snode;
+ struct stat st;
+ struct iovec iov[3];
+ const char *prefix = entry->sdt ? "%" : "#";
+ int ret;
+ /* Save stat for rollback */
+ ret = fstat(fd, &st);
+ if (ret < 0)
+ return ret;
+
+ pr_debug("Writing cache: %s%s\n", prefix, entry->spev);
+ iov[0].iov_base = (void *)prefix; iov[0].iov_len = 1;
+ iov[1].iov_base = entry->spev; iov[1].iov_len = strlen(entry->spev);
+ iov[2].iov_base = (void *)"\n"; iov[2].iov_len = 1;
+ ret = writev(fd, iov, 3);
+ if (ret < (int)iov[1].iov_len + 2)
+ goto rollback;
+
+ strlist__for_each_entry(snode, entry->tevlist) {
+ iov[0].iov_base = (void *)snode->s;
+ iov[0].iov_len = strlen(snode->s);
+ iov[1].iov_base = (void *)"\n"; iov[1].iov_len = 1;
+ ret = writev(fd, iov, 2);
+ if (ret < (int)iov[0].iov_len + 1)
+ goto rollback;
+ }
+ return 0;
+
+rollback:
+ /* Rollback to avoid cache file corruption */
+ if (ret > 0)
+ ret = -1;
+ if (ftruncate(fd, st.st_size) < 0)
+ ret = -2;
+
+ return ret;
+}
+
+int probe_cache__commit(struct probe_cache *pcache)
+{
+ struct probe_cache_entry *entry;
+ int ret = 0;
+
+ /* TBD: if we do not update existing entries, skip it */
+ ret = lseek(pcache->fd, 0, SEEK_SET);
+ if (ret < 0)
+ goto out;
+
+ ret = ftruncate(pcache->fd, 0);
+ if (ret < 0)
+ goto out;
+
+ for_each_probe_cache_entry(entry, pcache) {
+ ret = probe_cache_entry__write(entry, pcache->fd);
+ pr_debug("Cache committed: %d\n", ret);
+ if (ret < 0)
+ break;
+ }
+out:
+ return ret;
+}
+
+static bool probe_cache_entry__compare(struct probe_cache_entry *entry,
+ struct strfilter *filter)
+{
+ char buf[128], *ptr = entry->spev;
+
+ if (entry->pev.event) {
+ snprintf(buf, 128, "%s:%s", entry->pev.group, entry->pev.event);
+ ptr = buf;
+ }
+ return strfilter__compare(filter, ptr);
+}
+
+int probe_cache__filter_purge(struct probe_cache *pcache,
+ struct strfilter *filter)
+{
+ struct probe_cache_entry *entry, *tmp;
+
+ list_for_each_entry_safe(entry, tmp, &pcache->entries, node) {
+ if (probe_cache_entry__compare(entry, filter)) {
+ pr_info("Removed cached event: %s\n", entry->spev);
+ list_del_init(&entry->node);
+ probe_cache_entry__delete(entry);
+ }
+ }
+ return 0;
+}
+
+static int probe_cache__show_entries(struct probe_cache *pcache,
+ struct strfilter *filter)
+{
+ struct probe_cache_entry *entry;
+
+ for_each_probe_cache_entry(entry, pcache) {
+ if (probe_cache_entry__compare(entry, filter))
+ printf("%s\n", entry->spev);
+ }
+ return 0;
+}
+
+/* Show all cached probes */
+int probe_cache__show_all_caches(struct strfilter *filter)
+{
+ struct probe_cache *pcache;
+ struct strlist *bidlist;
+ struct str_node *nd;
+ char *buf = strfilter__string(filter);
+
+ pr_debug("list cache with filter: %s\n", buf);
+ free(buf);
+
+ bidlist = build_id_cache__list_all(true);
+ if (!bidlist) {
+ pr_debug("Failed to get buildids: %d\n", errno);
+ return -EINVAL;
+ }
+ strlist__for_each_entry(nd, bidlist) {
+ pcache = probe_cache__new(nd->s, NULL);
+ if (!pcache)
+ continue;
+ if (!list_empty(&pcache->entries)) {
+ buf = build_id_cache__origname(nd->s);
+ printf("%s (%s):\n", buf, nd->s);
+ free(buf);
+ probe_cache__show_entries(pcache, filter);
+ }
+ probe_cache__delete(pcache);
+ }
+ strlist__delete(bidlist);
+
+ return 0;
+}
+
+enum ftrace_readme {
+ FTRACE_README_PROBE_TYPE_X = 0,
+ FTRACE_README_KRETPROBE_OFFSET,
+ FTRACE_README_END,
+};
+
+static struct {
+ const char *pattern;
+ bool avail;
+} ftrace_readme_table[] = {
+#define DEFINE_TYPE(idx, pat) \
+ [idx] = {.pattern = pat, .avail = false}
+ DEFINE_TYPE(FTRACE_README_PROBE_TYPE_X, "*type: * x8/16/32/64,*"),
+ DEFINE_TYPE(FTRACE_README_KRETPROBE_OFFSET, "*place (kretprobe): *"),
+};
+
+static bool scan_ftrace_readme(enum ftrace_readme type)
+{
+ int fd;
+ FILE *fp;
+ char *buf = NULL;
+ size_t len = 0;
+ bool ret = false;
+ static bool scanned = false;
+
+ if (scanned)
+ goto result;
+
+ fd = open_trace_file("README", false);
+ if (fd < 0)
+ return ret;
+
+ fp = fdopen(fd, "r");
+ if (!fp) {
+ close(fd);
+ return ret;
+ }
+
+ while (getline(&buf, &len, fp) > 0)
+ for (enum ftrace_readme i = 0; i < FTRACE_README_END; i++)
+ if (!ftrace_readme_table[i].avail)
+ ftrace_readme_table[i].avail =
+ strglobmatch(buf, ftrace_readme_table[i].pattern);
+ scanned = true;
+
+ fclose(fp);
+ free(buf);
+
+result:
+ if (type >= FTRACE_README_END)
+ return false;
+
+ return ftrace_readme_table[type].avail;
+}
+
+bool probe_type_is_available(enum probe_type type)
+{
+ if (type >= PROBE_TYPE_END)
+ return false;
+ else if (type == PROBE_TYPE_X)
+ return scan_ftrace_readme(FTRACE_README_PROBE_TYPE_X);
+
+ return true;
+}
+
+bool kretprobe_offset_is_supported(void)
+{
+ return scan_ftrace_readme(FTRACE_README_KRETPROBE_OFFSET);
+}
diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h
new file mode 100644
index 000000000..63f29b1d2
--- /dev/null
+++ b/tools/perf/util/probe-file.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PROBE_FILE_H
+#define __PROBE_FILE_H
+
+#include "probe-event.h"
+
+struct strlist;
+struct strfilter;
+
+/* Cache of probe definitions */
+struct probe_cache_entry {
+ struct list_head node;
+ bool sdt;
+ struct perf_probe_event pev;
+ char *spev;
+ struct strlist *tevlist;
+};
+
+struct probe_cache {
+ int fd;
+ struct list_head entries;
+};
+
+enum probe_type {
+ PROBE_TYPE_U = 0,
+ PROBE_TYPE_S,
+ PROBE_TYPE_X,
+ PROBE_TYPE_STRING,
+ PROBE_TYPE_BITFIELD,
+ PROBE_TYPE_END,
+};
+
+#define PF_FL_UPROBE 1
+#define PF_FL_RW 2
+#define for_each_probe_cache_entry(entry, pcache) \
+ list_for_each_entry(entry, &pcache->entries, node)
+
+/* probe-file.c depends on libelf */
+#ifdef HAVE_LIBELF_SUPPORT
+int open_trace_file(const char *trace_file, bool readwrite);
+int probe_file__open(int flag);
+int probe_file__open_both(int *kfd, int *ufd, int flag);
+struct strlist *probe_file__get_namelist(int fd);
+struct strlist *probe_file__get_rawlist(int fd);
+int probe_file__add_event(int fd, struct probe_trace_event *tev);
+
+int probe_file__del_events(int fd, struct strfilter *filter);
+int probe_file__get_events(int fd, struct strfilter *filter,
+ struct strlist *plist);
+int probe_file__del_strlist(int fd, struct strlist *namelist);
+
+int probe_cache_entry__get_event(struct probe_cache_entry *entry,
+ struct probe_trace_event **tevs);
+
+struct probe_cache *probe_cache__new(const char *target, struct nsinfo *nsi);
+int probe_cache__add_entry(struct probe_cache *pcache,
+ struct perf_probe_event *pev,
+ struct probe_trace_event *tevs, int ntevs);
+int probe_cache__scan_sdt(struct probe_cache *pcache, const char *pathname);
+int probe_cache__commit(struct probe_cache *pcache);
+void probe_cache__purge(struct probe_cache *pcache);
+void probe_cache__delete(struct probe_cache *pcache);
+int probe_cache__filter_purge(struct probe_cache *pcache,
+ struct strfilter *filter);
+struct probe_cache_entry *probe_cache__find(struct probe_cache *pcache,
+ struct perf_probe_event *pev);
+struct probe_cache_entry *probe_cache__find_by_name(struct probe_cache *pcache,
+ const char *group, const char *event);
+int probe_cache__show_all_caches(struct strfilter *filter);
+bool probe_type_is_available(enum probe_type type);
+bool kretprobe_offset_is_supported(void);
+#else /* ! HAVE_LIBELF_SUPPORT */
+static inline struct probe_cache *probe_cache__new(const char *tgt __maybe_unused, struct nsinfo *nsi __maybe_unused)
+{
+ return NULL;
+}
+#define probe_cache__delete(pcache) do {} while (0)
+#endif
+#endif
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
new file mode 100644
index 000000000..4da4ec255
--- /dev/null
+++ b/tools/perf/util/probe-finder.c
@@ -0,0 +1,1910 @@
+/*
+ * probe-finder.c : C expression to kprobe event converter
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <inttypes.h>
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <dwarf-regs.h>
+
+#include <linux/bitops.h>
+#include "event.h"
+#include "dso.h"
+#include "debug.h"
+#include "intlist.h"
+#include "util.h"
+#include "strlist.h"
+#include "symbol.h"
+#include "probe-finder.h"
+#include "probe-file.h"
+#include "string2.h"
+
+/* Kprobe tracer basic type is up to u64 */
+#define MAX_BASIC_TYPE_BITS 64
+
+/* Dwarf FL wrappers */
+static char *debuginfo_path; /* Currently dummy */
+
+static const Dwfl_Callbacks offline_callbacks = {
+ .find_debuginfo = dwfl_standard_find_debuginfo,
+ .debuginfo_path = &debuginfo_path,
+
+ .section_address = dwfl_offline_section_address,
+
+ /* We use this table for core files too. */
+ .find_elf = dwfl_build_id_find_elf,
+};
+
+/* Get a Dwarf from offline image */
+static int debuginfo__init_offline_dwarf(struct debuginfo *dbg,
+ const char *path)
+{
+ int fd;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ dbg->dwfl = dwfl_begin(&offline_callbacks);
+ if (!dbg->dwfl)
+ goto error;
+
+ dwfl_report_begin(dbg->dwfl);
+ dbg->mod = dwfl_report_offline(dbg->dwfl, "", "", fd);
+ if (!dbg->mod)
+ goto error;
+
+ dbg->dbg = dwfl_module_getdwarf(dbg->mod, &dbg->bias);
+ if (!dbg->dbg)
+ goto error;
+
+ dwfl_report_end(dbg->dwfl, NULL, NULL);
+
+ return 0;
+error:
+ if (dbg->dwfl)
+ dwfl_end(dbg->dwfl);
+ else
+ close(fd);
+ memset(dbg, 0, sizeof(*dbg));
+
+ return -ENOENT;
+}
+
+static struct debuginfo *__debuginfo__new(const char *path)
+{
+ struct debuginfo *dbg = zalloc(sizeof(*dbg));
+ if (!dbg)
+ return NULL;
+
+ if (debuginfo__init_offline_dwarf(dbg, path) < 0)
+ zfree(&dbg);
+ if (dbg)
+ pr_debug("Open Debuginfo file: %s\n", path);
+ return dbg;
+}
+
+enum dso_binary_type distro_dwarf_types[] = {
+ DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+ DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+ DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+struct debuginfo *debuginfo__new(const char *path)
+{
+ enum dso_binary_type *type;
+ char buf[PATH_MAX], nil = '\0';
+ struct dso *dso;
+ struct debuginfo *dinfo = NULL;
+
+ /* Try to open distro debuginfo files */
+ dso = dso__new(path);
+ if (!dso)
+ goto out;
+
+ for (type = distro_dwarf_types;
+ !dinfo && *type != DSO_BINARY_TYPE__NOT_FOUND;
+ type++) {
+ if (dso__read_binary_type_filename(dso, *type, &nil,
+ buf, PATH_MAX) < 0)
+ continue;
+ dinfo = __debuginfo__new(buf);
+ }
+ dso__put(dso);
+
+out:
+ /* if failed to open all distro debuginfo, open given binary */
+ return dinfo ? : __debuginfo__new(path);
+}
+
+void debuginfo__delete(struct debuginfo *dbg)
+{
+ if (dbg) {
+ if (dbg->dwfl)
+ dwfl_end(dbg->dwfl);
+ free(dbg);
+ }
+}
+
+/*
+ * Probe finder related functions
+ */
+
+static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs)
+{
+ struct probe_trace_arg_ref *ref;
+ ref = zalloc(sizeof(struct probe_trace_arg_ref));
+ if (ref != NULL)
+ ref->offset = offs;
+ return ref;
+}
+
+/*
+ * Convert a location into trace_arg.
+ * If tvar == NULL, this just checks variable can be converted.
+ * If fentry == true and vr_die is a parameter, do huristic search
+ * for the location fuzzed by function entry mcount.
+ */
+static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
+ Dwarf_Op *fb_ops, Dwarf_Die *sp_die,
+ unsigned int machine,
+ struct probe_trace_arg *tvar)
+{
+ Dwarf_Attribute attr;
+ Dwarf_Addr tmp = 0;
+ Dwarf_Op *op;
+ size_t nops;
+ unsigned int regn;
+ Dwarf_Word offs = 0;
+ bool ref = false;
+ const char *regs;
+ int ret, ret2 = 0;
+
+ if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL)
+ goto static_var;
+
+ /* TODO: handle more than 1 exprs */
+ if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
+ return -EINVAL; /* Broken DIE ? */
+ if (dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0) {
+ ret = dwarf_entrypc(sp_die, &tmp);
+ if (ret)
+ return -ENOENT;
+
+ if (probe_conf.show_location_range &&
+ (dwarf_tag(vr_die) == DW_TAG_variable)) {
+ ret2 = -ERANGE;
+ } else if (addr != tmp ||
+ dwarf_tag(vr_die) != DW_TAG_formal_parameter) {
+ return -ENOENT;
+ }
+
+ ret = dwarf_highpc(sp_die, &tmp);
+ if (ret)
+ return -ENOENT;
+ /*
+ * This is fuzzed by fentry mcount. We try to find the
+ * parameter location at the earliest address.
+ */
+ for (addr += 1; addr <= tmp; addr++) {
+ if (dwarf_getlocation_addr(&attr, addr, &op,
+ &nops, 1) > 0)
+ goto found;
+ }
+ return -ENOENT;
+ }
+found:
+ if (nops == 0)
+ /* TODO: Support const_value */
+ return -ENOENT;
+
+ if (op->atom == DW_OP_addr) {
+static_var:
+ if (!tvar)
+ return ret2;
+ /* Static variables on memory (not stack), make @varname */
+ ret = strlen(dwarf_diename(vr_die));
+ tvar->value = zalloc(ret + 2);
+ if (tvar->value == NULL)
+ return -ENOMEM;
+ snprintf(tvar->value, ret + 2, "@%s", dwarf_diename(vr_die));
+ tvar->ref = alloc_trace_arg_ref((long)offs);
+ if (tvar->ref == NULL)
+ return -ENOMEM;
+ return ret2;
+ }
+
+ /* If this is based on frame buffer, set the offset */
+ if (op->atom == DW_OP_fbreg) {
+ if (fb_ops == NULL)
+ return -ENOTSUP;
+ ref = true;
+ offs = op->number;
+ op = &fb_ops[0];
+ }
+
+ if (op->atom >= DW_OP_breg0 && op->atom <= DW_OP_breg31) {
+ regn = op->atom - DW_OP_breg0;
+ offs += op->number;
+ ref = true;
+ } else if (op->atom >= DW_OP_reg0 && op->atom <= DW_OP_reg31) {
+ regn = op->atom - DW_OP_reg0;
+ } else if (op->atom == DW_OP_bregx) {
+ regn = op->number;
+ offs += op->number2;
+ ref = true;
+ } else if (op->atom == DW_OP_regx) {
+ regn = op->number;
+ } else {
+ pr_debug("DW_OP %x is not supported.\n", op->atom);
+ return -ENOTSUP;
+ }
+
+ if (!tvar)
+ return ret2;
+
+ regs = get_dwarf_regstr(regn, machine);
+ if (!regs) {
+ /* This should be a bug in DWARF or this tool */
+ pr_warning("Mapping for the register number %u "
+ "missing on this architecture.\n", regn);
+ return -ENOTSUP;
+ }
+
+ tvar->value = strdup(regs);
+ if (tvar->value == NULL)
+ return -ENOMEM;
+
+ if (ref) {
+ tvar->ref = alloc_trace_arg_ref((long)offs);
+ if (tvar->ref == NULL)
+ return -ENOMEM;
+ }
+ return ret2;
+}
+
+#define BYTES_TO_BITS(nb) ((nb) * BITS_PER_LONG / sizeof(long))
+
+static int convert_variable_type(Dwarf_Die *vr_die,
+ struct probe_trace_arg *tvar,
+ const char *cast)
+{
+ struct probe_trace_arg_ref **ref_ptr = &tvar->ref;
+ Dwarf_Die type;
+ char buf[16];
+ char sbuf[STRERR_BUFSIZE];
+ int bsize, boffs, total;
+ int ret;
+ char prefix;
+
+ /* TODO: check all types */
+ if (cast && strcmp(cast, "string") != 0 && strcmp(cast, "x") != 0 &&
+ strcmp(cast, "s") != 0 && strcmp(cast, "u") != 0) {
+ /* Non string type is OK */
+ /* and respect signedness/hexadecimal cast */
+ tvar->type = strdup(cast);
+ return (tvar->type == NULL) ? -ENOMEM : 0;
+ }
+
+ bsize = dwarf_bitsize(vr_die);
+ if (bsize > 0) {
+ /* This is a bitfield */
+ boffs = dwarf_bitoffset(vr_die);
+ total = dwarf_bytesize(vr_die);
+ if (boffs < 0 || total < 0)
+ return -ENOENT;
+ ret = snprintf(buf, 16, "b%d@%d/%zd", bsize, boffs,
+ BYTES_TO_BITS(total));
+ goto formatted;
+ }
+
+ if (die_get_real_type(vr_die, &type) == NULL) {
+ pr_warning("Failed to get a type information of %s.\n",
+ dwarf_diename(vr_die));
+ return -ENOENT;
+ }
+
+ pr_debug("%s type is %s.\n",
+ dwarf_diename(vr_die), dwarf_diename(&type));
+
+ if (cast && strcmp(cast, "string") == 0) { /* String type */
+ ret = dwarf_tag(&type);
+ if (ret != DW_TAG_pointer_type &&
+ ret != DW_TAG_array_type) {
+ pr_warning("Failed to cast into string: "
+ "%s(%s) is not a pointer nor array.\n",
+ dwarf_diename(vr_die), dwarf_diename(&type));
+ return -EINVAL;
+ }
+ if (die_get_real_type(&type, &type) == NULL) {
+ pr_warning("Failed to get a type"
+ " information.\n");
+ return -ENOENT;
+ }
+ if (ret == DW_TAG_pointer_type) {
+ while (*ref_ptr)
+ ref_ptr = &(*ref_ptr)->next;
+ /* Add new reference with offset +0 */
+ *ref_ptr = zalloc(sizeof(struct probe_trace_arg_ref));
+ if (*ref_ptr == NULL) {
+ pr_warning("Out of memory error\n");
+ return -ENOMEM;
+ }
+ }
+ if (!die_compare_name(&type, "char") &&
+ !die_compare_name(&type, "unsigned char")) {
+ pr_warning("Failed to cast into string: "
+ "%s is not (unsigned) char *.\n",
+ dwarf_diename(vr_die));
+ return -EINVAL;
+ }
+ tvar->type = strdup(cast);
+ return (tvar->type == NULL) ? -ENOMEM : 0;
+ }
+
+ if (cast && (strcmp(cast, "u") == 0))
+ prefix = 'u';
+ else if (cast && (strcmp(cast, "s") == 0))
+ prefix = 's';
+ else if (cast && (strcmp(cast, "x") == 0) &&
+ probe_type_is_available(PROBE_TYPE_X))
+ prefix = 'x';
+ else
+ prefix = die_is_signed_type(&type) ? 's' :
+ probe_type_is_available(PROBE_TYPE_X) ? 'x' : 'u';
+
+ ret = dwarf_bytesize(&type);
+ if (ret <= 0)
+ /* No size ... try to use default type */
+ return 0;
+ ret = BYTES_TO_BITS(ret);
+
+ /* Check the bitwidth */
+ if (ret > MAX_BASIC_TYPE_BITS) {
+ pr_info("%s exceeds max-bitwidth. Cut down to %d bits.\n",
+ dwarf_diename(&type), MAX_BASIC_TYPE_BITS);
+ ret = MAX_BASIC_TYPE_BITS;
+ }
+ ret = snprintf(buf, 16, "%c%d", prefix, ret);
+
+formatted:
+ if (ret < 0 || ret >= 16) {
+ if (ret >= 16)
+ ret = -E2BIG;
+ pr_warning("Failed to convert variable type: %s\n",
+ str_error_r(-ret, sbuf, sizeof(sbuf)));
+ return ret;
+ }
+ tvar->type = strdup(buf);
+ if (tvar->type == NULL)
+ return -ENOMEM;
+ return 0;
+}
+
+static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname,
+ struct perf_probe_arg_field *field,
+ struct probe_trace_arg_ref **ref_ptr,
+ Dwarf_Die *die_mem)
+{
+ struct probe_trace_arg_ref *ref = *ref_ptr;
+ Dwarf_Die type;
+ Dwarf_Word offs;
+ int ret, tag;
+
+ pr_debug("converting %s in %s\n", field->name, varname);
+ if (die_get_real_type(vr_die, &type) == NULL) {
+ pr_warning("Failed to get the type of %s.\n", varname);
+ return -ENOENT;
+ }
+ pr_debug2("Var real type: %s (%x)\n", dwarf_diename(&type),
+ (unsigned)dwarf_dieoffset(&type));
+ tag = dwarf_tag(&type);
+
+ if (field->name[0] == '[' &&
+ (tag == DW_TAG_array_type || tag == DW_TAG_pointer_type)) {
+ /* Save original type for next field or type */
+ memcpy(die_mem, &type, sizeof(*die_mem));
+ /* Get the type of this array */
+ if (die_get_real_type(&type, &type) == NULL) {
+ pr_warning("Failed to get the type of %s.\n", varname);
+ return -ENOENT;
+ }
+ pr_debug2("Array real type: %s (%x)\n", dwarf_diename(&type),
+ (unsigned)dwarf_dieoffset(&type));
+ if (tag == DW_TAG_pointer_type) {
+ ref = zalloc(sizeof(struct probe_trace_arg_ref));
+ if (ref == NULL)
+ return -ENOMEM;
+ if (*ref_ptr)
+ (*ref_ptr)->next = ref;
+ else
+ *ref_ptr = ref;
+ }
+ ref->offset += dwarf_bytesize(&type) * field->index;
+ goto next;
+ } else if (tag == DW_TAG_pointer_type) {
+ /* Check the pointer and dereference */
+ if (!field->ref) {
+ pr_err("Semantic error: %s must be referred by '->'\n",
+ field->name);
+ return -EINVAL;
+ }
+ /* Get the type pointed by this pointer */
+ if (die_get_real_type(&type, &type) == NULL) {
+ pr_warning("Failed to get the type of %s.\n", varname);
+ return -ENOENT;
+ }
+ /* Verify it is a data structure */
+ tag = dwarf_tag(&type);
+ if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
+ pr_warning("%s is not a data structure nor a union.\n",
+ varname);
+ return -EINVAL;
+ }
+
+ ref = zalloc(sizeof(struct probe_trace_arg_ref));
+ if (ref == NULL)
+ return -ENOMEM;
+ if (*ref_ptr)
+ (*ref_ptr)->next = ref;
+ else
+ *ref_ptr = ref;
+ } else {
+ /* Verify it is a data structure */
+ if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) {
+ pr_warning("%s is not a data structure nor a union.\n",
+ varname);
+ return -EINVAL;
+ }
+ if (field->name[0] == '[') {
+ pr_err("Semantic error: %s is not a pointer"
+ " nor array.\n", varname);
+ return -EINVAL;
+ }
+ /* While prcessing unnamed field, we don't care about this */
+ if (field->ref && dwarf_diename(vr_die)) {
+ pr_err("Semantic error: %s must be referred by '.'\n",
+ field->name);
+ return -EINVAL;
+ }
+ if (!ref) {
+ pr_warning("Structure on a register is not "
+ "supported yet.\n");
+ return -ENOTSUP;
+ }
+ }
+
+ if (die_find_member(&type, field->name, die_mem) == NULL) {
+ pr_warning("%s(type:%s) has no member %s.\n", varname,
+ dwarf_diename(&type), field->name);
+ return -EINVAL;
+ }
+
+ /* Get the offset of the field */
+ if (tag == DW_TAG_union_type) {
+ offs = 0;
+ } else {
+ ret = die_get_data_member_location(die_mem, &offs);
+ if (ret < 0) {
+ pr_warning("Failed to get the offset of %s.\n",
+ field->name);
+ return ret;
+ }
+ }
+ ref->offset += (long)offs;
+
+ /* If this member is unnamed, we need to reuse this field */
+ if (!dwarf_diename(die_mem))
+ return convert_variable_fields(die_mem, varname, field,
+ &ref, die_mem);
+
+next:
+ /* Converting next field */
+ if (field->next)
+ return convert_variable_fields(die_mem, field->name,
+ field->next, &ref, die_mem);
+ else
+ return 0;
+}
+
+/* Show a variables in kprobe event format */
+static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
+{
+ Dwarf_Die die_mem;
+ int ret;
+
+ pr_debug("Converting variable %s into trace event.\n",
+ dwarf_diename(vr_die));
+
+ ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
+ &pf->sp_die, pf->machine, pf->tvar);
+ if (ret == -ENOENT || ret == -EINVAL) {
+ pr_err("Failed to find the location of the '%s' variable at this address.\n"
+ " Perhaps it has been optimized out.\n"
+ " Use -V with the --range option to show '%s' location range.\n",
+ pf->pvar->var, pf->pvar->var);
+ } else if (ret == -ENOTSUP)
+ pr_err("Sorry, we don't support this variable location yet.\n");
+ else if (ret == 0 && pf->pvar->field) {
+ ret = convert_variable_fields(vr_die, pf->pvar->var,
+ pf->pvar->field, &pf->tvar->ref,
+ &die_mem);
+ vr_die = &die_mem;
+ }
+ if (ret == 0)
+ ret = convert_variable_type(vr_die, pf->tvar, pf->pvar->type);
+ /* *expr will be cached in libdw. Don't free it. */
+ return ret;
+}
+
+/* Find a variable in a scope DIE */
+static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+ Dwarf_Die vr_die;
+ char *buf, *ptr;
+ int ret = 0;
+
+ /* Copy raw parameters */
+ if (!is_c_varname(pf->pvar->var))
+ return copy_to_probe_trace_arg(pf->tvar, pf->pvar);
+
+ if (pf->pvar->name)
+ pf->tvar->name = strdup(pf->pvar->name);
+ else {
+ buf = synthesize_perf_probe_arg(pf->pvar);
+ if (!buf)
+ return -ENOMEM;
+ ptr = strchr(buf, ':'); /* Change type separator to _ */
+ if (ptr)
+ *ptr = '_';
+ pf->tvar->name = buf;
+ }
+ if (pf->tvar->name == NULL)
+ return -ENOMEM;
+
+ pr_debug("Searching '%s' variable in context.\n", pf->pvar->var);
+ /* Search child die for local variables and parameters. */
+ if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) {
+ /* Search again in global variables */
+ if (!die_find_variable_at(&pf->cu_die, pf->pvar->var,
+ 0, &vr_die)) {
+ pr_warning("Failed to find '%s' in this function.\n",
+ pf->pvar->var);
+ ret = -ENOENT;
+ }
+ }
+ if (ret >= 0)
+ ret = convert_variable(&vr_die, pf);
+
+ return ret;
+}
+
+/* Convert subprogram DIE to trace point */
+static int convert_to_trace_point(Dwarf_Die *sp_die, Dwfl_Module *mod,
+ Dwarf_Addr paddr, bool retprobe,
+ const char *function,
+ struct probe_trace_point *tp)
+{
+ Dwarf_Addr eaddr;
+ GElf_Sym sym;
+ const char *symbol;
+
+ /* Verify the address is correct */
+ if (!dwarf_haspc(sp_die, paddr)) {
+ pr_warning("Specified offset is out of %s\n",
+ dwarf_diename(sp_die));
+ return -EINVAL;
+ }
+
+ if (dwarf_entrypc(sp_die, &eaddr) == 0) {
+ /* If the DIE has entrypc, use it. */
+ symbol = dwarf_diename(sp_die);
+ } else {
+ /* Try to get actual symbol name and address from symtab */
+ symbol = dwfl_module_addrsym(mod, paddr, &sym, NULL);
+ eaddr = sym.st_value;
+ }
+ if (!symbol) {
+ pr_warning("Failed to find symbol at 0x%lx\n",
+ (unsigned long)paddr);
+ return -ENOENT;
+ }
+
+ tp->offset = (unsigned long)(paddr - eaddr);
+ tp->address = (unsigned long)paddr;
+ tp->symbol = strdup(symbol);
+ if (!tp->symbol)
+ return -ENOMEM;
+
+ /* Return probe must be on the head of a subprogram */
+ if (retprobe) {
+ if (eaddr != paddr) {
+ pr_warning("Failed to find \"%s%%return\",\n"
+ " because %s is an inlined function and"
+ " has no return point.\n", function,
+ function);
+ return -EINVAL;
+ }
+ tp->retprobe = true;
+ }
+
+ return 0;
+}
+
+/* Call probe_finder callback with scope DIE */
+static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+ Dwarf_Attribute fb_attr;
+ Dwarf_Frame *frame = NULL;
+ size_t nops;
+ int ret;
+
+ if (!sc_die) {
+ pr_err("Caller must pass a scope DIE. Program error.\n");
+ return -EINVAL;
+ }
+
+ /* If not a real subprogram, find a real one */
+ if (!die_is_func_def(sc_die)) {
+ if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
+ if (die_find_tailfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
+ pr_warning("Ignoring tail call from %s\n",
+ dwarf_diename(&pf->sp_die));
+ return 0;
+ } else {
+ pr_warning("Failed to find probe point in any "
+ "functions.\n");
+ return -ENOENT;
+ }
+ }
+ } else
+ memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die));
+
+ /* Get the frame base attribute/ops from subprogram */
+ dwarf_attr(&pf->sp_die, DW_AT_frame_base, &fb_attr);
+ ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1);
+ if (ret <= 0 || nops == 0) {
+ pf->fb_ops = NULL;
+#if _ELFUTILS_PREREQ(0, 142)
+ } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa &&
+ (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) {
+ if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 &&
+ (dwarf_cfi_addrframe(pf->cfi_dbg, pf->addr, &frame) != 0)) ||
+ dwarf_frame_cfa(frame, &pf->fb_ops, &nops) != 0) {
+ pr_warning("Failed to get call frame on 0x%jx\n",
+ (uintmax_t)pf->addr);
+ free(frame);
+ return -ENOENT;
+ }
+#endif
+ }
+
+ /* Call finder's callback handler */
+ ret = pf->callback(sc_die, pf);
+
+ /* Since *pf->fb_ops can be a part of frame. we should free it here. */
+ free(frame);
+ pf->fb_ops = NULL;
+
+ return ret;
+}
+
+struct find_scope_param {
+ const char *function;
+ const char *file;
+ int line;
+ int diff;
+ Dwarf_Die *die_mem;
+ bool found;
+};
+
+static int find_best_scope_cb(Dwarf_Die *fn_die, void *data)
+{
+ struct find_scope_param *fsp = data;
+ const char *file;
+ int lno;
+
+ /* Skip if declared file name does not match */
+ if (fsp->file) {
+ file = dwarf_decl_file(fn_die);
+ if (!file || strcmp(fsp->file, file) != 0)
+ return 0;
+ }
+ /* If the function name is given, that's what user expects */
+ if (fsp->function) {
+ if (die_match_name(fn_die, fsp->function)) {
+ memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
+ fsp->found = true;
+ return 1;
+ }
+ } else {
+ /* With the line number, find the nearest declared DIE */
+ dwarf_decl_line(fn_die, &lno);
+ if (lno < fsp->line && fsp->diff > fsp->line - lno) {
+ /* Keep a candidate and continue */
+ fsp->diff = fsp->line - lno;
+ memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
+ fsp->found = true;
+ }
+ }
+ return 0;
+}
+
+/* Return innermost DIE */
+static int find_inner_scope_cb(Dwarf_Die *fn_die, void *data)
+{
+ struct find_scope_param *fsp = data;
+
+ memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
+ fsp->found = true;
+ return 1;
+}
+
+/* Find an appropriate scope fits to given conditions */
+static Dwarf_Die *find_best_scope(struct probe_finder *pf, Dwarf_Die *die_mem)
+{
+ struct find_scope_param fsp = {
+ .function = pf->pev->point.function,
+ .file = pf->fname,
+ .line = pf->lno,
+ .diff = INT_MAX,
+ .die_mem = die_mem,
+ .found = false,
+ };
+ int ret;
+
+ ret = cu_walk_functions_at(&pf->cu_die, pf->addr, find_best_scope_cb,
+ &fsp);
+ if (!ret && !fsp.found)
+ cu_walk_functions_at(&pf->cu_die, pf->addr,
+ find_inner_scope_cb, &fsp);
+
+ return fsp.found ? die_mem : NULL;
+}
+
+static int probe_point_line_walker(const char *fname, int lineno,
+ Dwarf_Addr addr, void *data)
+{
+ struct probe_finder *pf = data;
+ Dwarf_Die *sc_die, die_mem;
+ int ret;
+
+ if (lineno != pf->lno || strtailcmp(fname, pf->fname) != 0)
+ return 0;
+
+ pf->addr = addr;
+ sc_die = find_best_scope(pf, &die_mem);
+ if (!sc_die) {
+ pr_warning("Failed to find scope of probe point.\n");
+ return -ENOENT;
+ }
+
+ ret = call_probe_finder(sc_die, pf);
+
+ /* Continue if no error, because the line will be in inline function */
+ return ret < 0 ? ret : 0;
+}
+
+/* Find probe point from its line number */
+static int find_probe_point_by_line(struct probe_finder *pf)
+{
+ return die_walk_lines(&pf->cu_die, probe_point_line_walker, pf);
+}
+
+/* Find lines which match lazy pattern */
+static int find_lazy_match_lines(struct intlist *list,
+ const char *fname, const char *pat)
+{
+ FILE *fp;
+ char *line = NULL;
+ size_t line_len;
+ ssize_t len;
+ int count = 0, linenum = 1;
+ char sbuf[STRERR_BUFSIZE];
+
+ fp = fopen(fname, "r");
+ if (!fp) {
+ pr_warning("Failed to open %s: %s\n", fname,
+ str_error_r(errno, sbuf, sizeof(sbuf)));
+ return -errno;
+ }
+
+ while ((len = getline(&line, &line_len, fp)) > 0) {
+
+ if (line[len - 1] == '\n')
+ line[len - 1] = '\0';
+
+ if (strlazymatch(line, pat)) {
+ intlist__add(list, linenum);
+ count++;
+ }
+ linenum++;
+ }
+
+ if (ferror(fp))
+ count = -errno;
+ free(line);
+ fclose(fp);
+
+ if (count == 0)
+ pr_debug("No matched lines found in %s.\n", fname);
+ return count;
+}
+
+static int probe_point_lazy_walker(const char *fname, int lineno,
+ Dwarf_Addr addr, void *data)
+{
+ struct probe_finder *pf = data;
+ Dwarf_Die *sc_die, die_mem;
+ int ret;
+
+ if (!intlist__has_entry(pf->lcache, lineno) ||
+ strtailcmp(fname, pf->fname) != 0)
+ return 0;
+
+ pr_debug("Probe line found: line:%d addr:0x%llx\n",
+ lineno, (unsigned long long)addr);
+ pf->addr = addr;
+ pf->lno = lineno;
+ sc_die = find_best_scope(pf, &die_mem);
+ if (!sc_die) {
+ pr_warning("Failed to find scope of probe point.\n");
+ return -ENOENT;
+ }
+
+ ret = call_probe_finder(sc_die, pf);
+
+ /*
+ * Continue if no error, because the lazy pattern will match
+ * to other lines
+ */
+ return ret < 0 ? ret : 0;
+}
+
+/* Find probe points from lazy pattern */
+static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+ int ret = 0;
+ char *fpath;
+
+ if (intlist__empty(pf->lcache)) {
+ const char *comp_dir;
+
+ comp_dir = cu_get_comp_dir(&pf->cu_die);
+ ret = get_real_path(pf->fname, comp_dir, &fpath);
+ if (ret < 0) {
+ pr_warning("Failed to find source file path.\n");
+ return ret;
+ }
+
+ /* Matching lazy line pattern */
+ ret = find_lazy_match_lines(pf->lcache, fpath,
+ pf->pev->point.lazy_line);
+ free(fpath);
+ if (ret <= 0)
+ return ret;
+ }
+
+ return die_walk_lines(sp_die, probe_point_lazy_walker, pf);
+}
+
+static void skip_prologue(Dwarf_Die *sp_die, struct probe_finder *pf)
+{
+ struct perf_probe_point *pp = &pf->pev->point;
+
+ /* Not uprobe? */
+ if (!pf->pev->uprobes)
+ return;
+
+ /* Compiled with optimization? */
+ if (die_is_optimized_target(&pf->cu_die))
+ return;
+
+ /* Don't know entrypc? */
+ if (!pf->addr)
+ return;
+
+ /* Only FUNC and FUNC@SRC are eligible. */
+ if (!pp->function || pp->line || pp->retprobe || pp->lazy_line ||
+ pp->offset || pp->abs_address)
+ return;
+
+ /* Not interested in func parameter? */
+ if (!perf_probe_with_var(pf->pev))
+ return;
+
+ pr_info("Target program is compiled without optimization. Skipping prologue.\n"
+ "Probe on address 0x%" PRIx64 " to force probing at the function entry.\n\n",
+ pf->addr);
+
+ die_skip_prologue(sp_die, &pf->cu_die, &pf->addr);
+}
+
+static int probe_point_inline_cb(Dwarf_Die *in_die, void *data)
+{
+ struct probe_finder *pf = data;
+ struct perf_probe_point *pp = &pf->pev->point;
+ Dwarf_Addr addr;
+ int ret;
+
+ if (pp->lazy_line)
+ ret = find_probe_point_lazy(in_die, pf);
+ else {
+ /* Get probe address */
+ if (die_entrypc(in_die, &addr) != 0) {
+ pr_warning("Failed to get entry address of %s.\n",
+ dwarf_diename(in_die));
+ return -ENOENT;
+ }
+ if (addr == 0) {
+ pr_debug("%s has no valid entry address. skipped.\n",
+ dwarf_diename(in_die));
+ return -ENOENT;
+ }
+ pf->addr = addr;
+ pf->addr += pp->offset;
+ pr_debug("found inline addr: 0x%jx\n",
+ (uintmax_t)pf->addr);
+
+ ret = call_probe_finder(in_die, pf);
+ }
+
+ return ret;
+}
+
+/* Callback parameter with return value for libdw */
+struct dwarf_callback_param {
+ void *data;
+ int retval;
+};
+
+/* Search function from function name */
+static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
+{
+ struct dwarf_callback_param *param = data;
+ struct probe_finder *pf = param->data;
+ struct perf_probe_point *pp = &pf->pev->point;
+
+ /* Check tag and diename */
+ if (!die_is_func_def(sp_die) ||
+ !die_match_name(sp_die, pp->function))
+ return DWARF_CB_OK;
+
+ /* Check declared file */
+ if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die)))
+ return DWARF_CB_OK;
+
+ pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die),
+ (unsigned long)dwarf_dieoffset(sp_die));
+ pf->fname = dwarf_decl_file(sp_die);
+ if (pp->line) { /* Function relative line */
+ dwarf_decl_line(sp_die, &pf->lno);
+ pf->lno += pp->line;
+ param->retval = find_probe_point_by_line(pf);
+ } else if (die_is_func_instance(sp_die)) {
+ /* Instances always have the entry address */
+ die_entrypc(sp_die, &pf->addr);
+ /* But in some case the entry address is 0 */
+ if (pf->addr == 0) {
+ pr_debug("%s has no entry PC. Skipped\n",
+ dwarf_diename(sp_die));
+ param->retval = 0;
+ /* Real function */
+ } else if (pp->lazy_line)
+ param->retval = find_probe_point_lazy(sp_die, pf);
+ else {
+ skip_prologue(sp_die, pf);
+ pf->addr += pp->offset;
+ /* TODO: Check the address in this function */
+ param->retval = call_probe_finder(sp_die, pf);
+ }
+ } else if (!probe_conf.no_inlines) {
+ /* Inlined function: search instances */
+ param->retval = die_walk_instances(sp_die,
+ probe_point_inline_cb, (void *)pf);
+ /* This could be a non-existed inline definition */
+ if (param->retval == -ENOENT)
+ param->retval = 0;
+ }
+
+ /* We need to find other candidates */
+ if (strisglob(pp->function) && param->retval >= 0) {
+ param->retval = 0; /* We have to clear the result */
+ return DWARF_CB_OK;
+ }
+
+ return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */
+}
+
+static int find_probe_point_by_func(struct probe_finder *pf)
+{
+ struct dwarf_callback_param _param = {.data = (void *)pf,
+ .retval = 0};
+ dwarf_getfuncs(&pf->cu_die, probe_point_search_cb, &_param, 0);
+ return _param.retval;
+}
+
+struct pubname_callback_param {
+ char *function;
+ char *file;
+ Dwarf_Die *cu_die;
+ Dwarf_Die *sp_die;
+ int found;
+};
+
+static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
+{
+ struct pubname_callback_param *param = data;
+
+ if (dwarf_offdie(dbg, gl->die_offset, param->sp_die)) {
+ if (dwarf_tag(param->sp_die) != DW_TAG_subprogram)
+ return DWARF_CB_OK;
+
+ if (die_match_name(param->sp_die, param->function)) {
+ if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die))
+ return DWARF_CB_OK;
+
+ if (param->file &&
+ strtailcmp(param->file, dwarf_decl_file(param->sp_die)))
+ return DWARF_CB_OK;
+
+ param->found = 1;
+ return DWARF_CB_ABORT;
+ }
+ }
+
+ return DWARF_CB_OK;
+}
+
+static int debuginfo__find_probe_location(struct debuginfo *dbg,
+ struct probe_finder *pf)
+{
+ struct perf_probe_point *pp = &pf->pev->point;
+ Dwarf_Off off, noff;
+ size_t cuhl;
+ Dwarf_Die *diep;
+ int ret = 0;
+
+ off = 0;
+ pf->lcache = intlist__new(NULL);
+ if (!pf->lcache)
+ return -ENOMEM;
+
+ /* Fastpath: lookup by function name from .debug_pubnames section */
+ if (pp->function && !strisglob(pp->function)) {
+ struct pubname_callback_param pubname_param = {
+ .function = pp->function,
+ .file = pp->file,
+ .cu_die = &pf->cu_die,
+ .sp_die = &pf->sp_die,
+ .found = 0,
+ };
+ struct dwarf_callback_param probe_param = {
+ .data = pf,
+ };
+
+ dwarf_getpubnames(dbg->dbg, pubname_search_cb,
+ &pubname_param, 0);
+ if (pubname_param.found) {
+ ret = probe_point_search_cb(&pf->sp_die, &probe_param);
+ if (ret)
+ goto found;
+ }
+ }
+
+ /* Loop on CUs (Compilation Unit) */
+ while (!dwarf_nextcu(dbg->dbg, off, &noff, &cuhl, NULL, NULL, NULL)) {
+ /* Get the DIE(Debugging Information Entry) of this CU */
+ diep = dwarf_offdie(dbg->dbg, off + cuhl, &pf->cu_die);
+ if (!diep)
+ continue;
+
+ /* Check if target file is included. */
+ if (pp->file)
+ pf->fname = cu_find_realpath(&pf->cu_die, pp->file);
+ else
+ pf->fname = NULL;
+
+ if (!pp->file || pf->fname) {
+ if (pp->function)
+ ret = find_probe_point_by_func(pf);
+ else if (pp->lazy_line)
+ ret = find_probe_point_lazy(&pf->cu_die, pf);
+ else {
+ pf->lno = pp->line;
+ ret = find_probe_point_by_line(pf);
+ }
+ if (ret < 0)
+ break;
+ }
+ off = noff;
+ }
+
+found:
+ intlist__delete(pf->lcache);
+ pf->lcache = NULL;
+
+ return ret;
+}
+
+/* Find probe points from debuginfo */
+static int debuginfo__find_probes(struct debuginfo *dbg,
+ struct probe_finder *pf)
+{
+ int ret = 0;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+
+ if (pf->cfi_eh || pf->cfi_dbg)
+ return debuginfo__find_probe_location(dbg, pf);
+
+ /* Get the call frame information from this dwarf */
+ elf = dwarf_getelf(dbg->dbg);
+ if (elf == NULL)
+ return -EINVAL;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ return -EINVAL;
+
+ pf->machine = ehdr.e_machine;
+
+#if _ELFUTILS_PREREQ(0, 142)
+ do {
+ GElf_Shdr shdr;
+
+ if (elf_section_by_name(elf, &ehdr, &shdr, ".eh_frame", NULL) &&
+ shdr.sh_type == SHT_PROGBITS)
+ pf->cfi_eh = dwarf_getcfi_elf(elf);
+
+ pf->cfi_dbg = dwarf_getcfi(dbg->dbg);
+ } while (0);
+#endif
+
+ ret = debuginfo__find_probe_location(dbg, pf);
+ return ret;
+}
+
+struct local_vars_finder {
+ struct probe_finder *pf;
+ struct perf_probe_arg *args;
+ bool vars;
+ int max_args;
+ int nargs;
+ int ret;
+};
+
+/* Collect available variables in this scope */
+static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
+{
+ struct local_vars_finder *vf = data;
+ struct probe_finder *pf = vf->pf;
+ int tag;
+
+ tag = dwarf_tag(die_mem);
+ if (tag == DW_TAG_formal_parameter ||
+ (tag == DW_TAG_variable && vf->vars)) {
+ if (convert_variable_location(die_mem, vf->pf->addr,
+ vf->pf->fb_ops, &pf->sp_die,
+ pf->machine, NULL) == 0) {
+ vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem);
+ if (vf->args[vf->nargs].var == NULL) {
+ vf->ret = -ENOMEM;
+ return DIE_FIND_CB_END;
+ }
+ pr_debug(" %s", vf->args[vf->nargs].var);
+ vf->nargs++;
+ }
+ }
+
+ if (dwarf_haspc(die_mem, vf->pf->addr))
+ return DIE_FIND_CB_CONTINUE;
+ else
+ return DIE_FIND_CB_SIBLING;
+}
+
+static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
+ struct perf_probe_arg *args)
+{
+ Dwarf_Die die_mem;
+ int i;
+ int n = 0;
+ struct local_vars_finder vf = {.pf = pf, .args = args, .vars = false,
+ .max_args = MAX_PROBE_ARGS, .ret = 0};
+
+ for (i = 0; i < pf->pev->nargs; i++) {
+ /* var never be NULL */
+ if (strcmp(pf->pev->args[i].var, PROBE_ARG_VARS) == 0)
+ vf.vars = true;
+ else if (strcmp(pf->pev->args[i].var, PROBE_ARG_PARAMS) != 0) {
+ /* Copy normal argument */
+ args[n] = pf->pev->args[i];
+ n++;
+ continue;
+ }
+ pr_debug("Expanding %s into:", pf->pev->args[i].var);
+ vf.nargs = n;
+ /* Special local variables */
+ die_find_child(sc_die, copy_variables_cb, (void *)&vf,
+ &die_mem);
+ pr_debug(" (%d)\n", vf.nargs - n);
+ if (vf.ret < 0)
+ return vf.ret;
+ n = vf.nargs;
+ }
+ return n;
+}
+
+/* Add a found probe point into trace event list */
+static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+ struct trace_event_finder *tf =
+ container_of(pf, struct trace_event_finder, pf);
+ struct perf_probe_point *pp = &pf->pev->point;
+ struct probe_trace_event *tev;
+ struct perf_probe_arg *args = NULL;
+ int ret, i;
+
+ /* Check number of tevs */
+ if (tf->ntevs == tf->max_tevs) {
+ pr_warning("Too many( > %d) probe point found.\n",
+ tf->max_tevs);
+ return -ERANGE;
+ }
+ tev = &tf->tevs[tf->ntevs++];
+
+ /* Trace point should be converted from subprogram DIE */
+ ret = convert_to_trace_point(&pf->sp_die, tf->mod, pf->addr,
+ pp->retprobe, pp->function, &tev->point);
+ if (ret < 0)
+ goto end;
+
+ tev->point.realname = strdup(dwarf_diename(sc_die));
+ if (!tev->point.realname) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
+ tev->point.offset);
+
+ /* Expand special probe argument if exist */
+ args = zalloc(sizeof(struct perf_probe_arg) * MAX_PROBE_ARGS);
+ if (args == NULL) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ ret = expand_probe_args(sc_die, pf, args);
+ if (ret < 0)
+ goto end;
+
+ tev->nargs = ret;
+ tev->args = zalloc(sizeof(struct probe_trace_arg) * tev->nargs);
+ if (tev->args == NULL) {
+ ret = -ENOMEM;
+ goto end;
+ }
+
+ /* Find each argument */
+ for (i = 0; i < tev->nargs; i++) {
+ pf->pvar = &args[i];
+ pf->tvar = &tev->args[i];
+ /* Variable should be found from scope DIE */
+ ret = find_variable(sc_die, pf);
+ if (ret != 0)
+ break;
+ }
+
+end:
+ if (ret) {
+ clear_probe_trace_event(tev);
+ tf->ntevs--;
+ }
+ free(args);
+ return ret;
+}
+
+/* Find probe_trace_events specified by perf_probe_event from debuginfo */
+int debuginfo__find_trace_events(struct debuginfo *dbg,
+ struct perf_probe_event *pev,
+ struct probe_trace_event **tevs)
+{
+ struct trace_event_finder tf = {
+ .pf = {.pev = pev, .callback = add_probe_trace_event},
+ .max_tevs = probe_conf.max_probes, .mod = dbg->mod};
+ int ret, i;
+
+ /* Allocate result tevs array */
+ *tevs = zalloc(sizeof(struct probe_trace_event) * tf.max_tevs);
+ if (*tevs == NULL)
+ return -ENOMEM;
+
+ tf.tevs = *tevs;
+ tf.ntevs = 0;
+
+ ret = debuginfo__find_probes(dbg, &tf.pf);
+ if (ret < 0 || tf.ntevs == 0) {
+ for (i = 0; i < tf.ntevs; i++)
+ clear_probe_trace_event(&tf.tevs[i]);
+ zfree(tevs);
+ return ret;
+ }
+
+ return (ret < 0) ? ret : tf.ntevs;
+}
+
+/* Collect available variables in this scope */
+static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
+{
+ struct available_var_finder *af = data;
+ struct variable_list *vl;
+ struct strbuf buf = STRBUF_INIT;
+ int tag, ret;
+
+ vl = &af->vls[af->nvls - 1];
+
+ tag = dwarf_tag(die_mem);
+ if (tag == DW_TAG_formal_parameter ||
+ tag == DW_TAG_variable) {
+ ret = convert_variable_location(die_mem, af->pf.addr,
+ af->pf.fb_ops, &af->pf.sp_die,
+ af->pf.machine, NULL);
+ if (ret == 0 || ret == -ERANGE) {
+ int ret2;
+ bool externs = !af->child;
+
+ if (strbuf_init(&buf, 64) < 0)
+ goto error;
+
+ if (probe_conf.show_location_range) {
+ if (!externs)
+ ret2 = strbuf_add(&buf,
+ ret ? "[INV]\t" : "[VAL]\t", 6);
+ else
+ ret2 = strbuf_add(&buf, "[EXT]\t", 6);
+ if (ret2)
+ goto error;
+ }
+
+ ret2 = die_get_varname(die_mem, &buf);
+
+ if (!ret2 && probe_conf.show_location_range &&
+ !externs) {
+ if (strbuf_addch(&buf, '\t') < 0)
+ goto error;
+ ret2 = die_get_var_range(&af->pf.sp_die,
+ die_mem, &buf);
+ }
+
+ pr_debug("Add new var: %s\n", buf.buf);
+ if (ret2 == 0) {
+ strlist__add(vl->vars,
+ strbuf_detach(&buf, NULL));
+ }
+ strbuf_release(&buf);
+ }
+ }
+
+ if (af->child && dwarf_haspc(die_mem, af->pf.addr))
+ return DIE_FIND_CB_CONTINUE;
+ else
+ return DIE_FIND_CB_SIBLING;
+error:
+ strbuf_release(&buf);
+ pr_debug("Error in strbuf\n");
+ return DIE_FIND_CB_END;
+}
+
+static bool available_var_finder_overlap(struct available_var_finder *af)
+{
+ int i;
+
+ for (i = 0; i < af->nvls; i++) {
+ if (af->pf.addr == af->vls[i].point.address)
+ return true;
+ }
+ return false;
+
+}
+
+/* Add a found vars into available variables list */
+static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf)
+{
+ struct available_var_finder *af =
+ container_of(pf, struct available_var_finder, pf);
+ struct perf_probe_point *pp = &pf->pev->point;
+ struct variable_list *vl;
+ Dwarf_Die die_mem;
+ int ret;
+
+ /*
+ * For some reason (e.g. different column assigned to same address),
+ * this callback can be called with the address which already passed.
+ * Ignore it first.
+ */
+ if (available_var_finder_overlap(af))
+ return 0;
+
+ /* Check number of tevs */
+ if (af->nvls == af->max_vls) {
+ pr_warning("Too many( > %d) probe point found.\n", af->max_vls);
+ return -ERANGE;
+ }
+ vl = &af->vls[af->nvls++];
+
+ /* Trace point should be converted from subprogram DIE */
+ ret = convert_to_trace_point(&pf->sp_die, af->mod, pf->addr,
+ pp->retprobe, pp->function, &vl->point);
+ if (ret < 0)
+ return ret;
+
+ pr_debug("Probe point found: %s+%lu\n", vl->point.symbol,
+ vl->point.offset);
+
+ /* Find local variables */
+ vl->vars = strlist__new(NULL, NULL);
+ if (vl->vars == NULL)
+ return -ENOMEM;
+ af->child = true;
+ die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem);
+
+ /* Find external variables */
+ if (!probe_conf.show_ext_vars)
+ goto out;
+ /* Don't need to search child DIE for external vars. */
+ af->child = false;
+ die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem);
+
+out:
+ if (strlist__empty(vl->vars)) {
+ strlist__delete(vl->vars);
+ vl->vars = NULL;
+ }
+
+ return ret;
+}
+
+/*
+ * Find available variables at given probe point
+ * Return the number of found probe points. Return 0 if there is no
+ * matched probe point. Return <0 if an error occurs.
+ */
+int debuginfo__find_available_vars_at(struct debuginfo *dbg,
+ struct perf_probe_event *pev,
+ struct variable_list **vls)
+{
+ struct available_var_finder af = {
+ .pf = {.pev = pev, .callback = add_available_vars},
+ .mod = dbg->mod,
+ .max_vls = probe_conf.max_probes};
+ int ret;
+
+ /* Allocate result vls array */
+ *vls = zalloc(sizeof(struct variable_list) * af.max_vls);
+ if (*vls == NULL)
+ return -ENOMEM;
+
+ af.vls = *vls;
+ af.nvls = 0;
+
+ ret = debuginfo__find_probes(dbg, &af.pf);
+ if (ret < 0) {
+ /* Free vlist for error */
+ while (af.nvls--) {
+ zfree(&af.vls[af.nvls].point.symbol);
+ strlist__delete(af.vls[af.nvls].vars);
+ }
+ zfree(vls);
+ return ret;
+ }
+
+ return (ret < 0) ? ret : af.nvls;
+}
+
+/* For the kernel module, we need a special code to get a DIE */
+int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
+ bool adjust_offset)
+{
+ int n, i;
+ Elf32_Word shndx;
+ Elf_Scn *scn;
+ Elf *elf;
+ GElf_Shdr mem, *shdr;
+ const char *p;
+
+ elf = dwfl_module_getelf(dbg->mod, &dbg->bias);
+ if (!elf)
+ return -EINVAL;
+
+ /* Get the number of relocations */
+ n = dwfl_module_relocations(dbg->mod);
+ if (n < 0)
+ return -ENOENT;
+ /* Search the relocation related .text section */
+ for (i = 0; i < n; i++) {
+ p = dwfl_module_relocation_info(dbg->mod, i, &shndx);
+ if (strcmp(p, ".text") == 0) {
+ /* OK, get the section header */
+ scn = elf_getscn(elf, shndx);
+ if (!scn)
+ return -ENOENT;
+ shdr = gelf_getshdr(scn, &mem);
+ if (!shdr)
+ return -ENOENT;
+ *offs = shdr->sh_addr;
+ if (adjust_offset)
+ *offs -= shdr->sh_offset;
+ }
+ }
+ return 0;
+}
+
+/* Reverse search */
+int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr,
+ struct perf_probe_point *ppt)
+{
+ Dwarf_Die cudie, spdie, indie;
+ Dwarf_Addr _addr = 0, baseaddr = 0;
+ const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp;
+ int baseline = 0, lineno = 0, ret = 0;
+
+ /* We always need to relocate the address for aranges */
+ if (debuginfo__get_text_offset(dbg, &baseaddr, false) == 0)
+ addr += baseaddr;
+ /* Find cu die */
+ if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) {
+ pr_warning("Failed to find debug information for address %lx\n",
+ addr);
+ ret = -EINVAL;
+ goto end;
+ }
+
+ /* Find a corresponding line (filename and lineno) */
+ cu_find_lineinfo(&cudie, addr, &fname, &lineno);
+ /* Don't care whether it failed or not */
+
+ /* Find a corresponding function (name, baseline and baseaddr) */
+ if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) {
+ /* Get function entry information */
+ func = basefunc = dwarf_diename(&spdie);
+ if (!func ||
+ die_entrypc(&spdie, &baseaddr) != 0 ||
+ dwarf_decl_line(&spdie, &baseline) != 0) {
+ lineno = 0;
+ goto post;
+ }
+
+ fname = dwarf_decl_file(&spdie);
+ if (addr == (unsigned long)baseaddr) {
+ /* Function entry - Relative line number is 0 */
+ lineno = baseline;
+ goto post;
+ }
+
+ /* Track down the inline functions step by step */
+ while (die_find_top_inlinefunc(&spdie, (Dwarf_Addr)addr,
+ &indie)) {
+ /* There is an inline function */
+ if (die_entrypc(&indie, &_addr) == 0 &&
+ _addr == addr) {
+ /*
+ * addr is at an inline function entry.
+ * In this case, lineno should be the call-site
+ * line number. (overwrite lineinfo)
+ */
+ lineno = die_get_call_lineno(&indie);
+ fname = die_get_call_file(&indie);
+ break;
+ } else {
+ /*
+ * addr is in an inline function body.
+ * Since lineno points one of the lines
+ * of the inline function, baseline should
+ * be the entry line of the inline function.
+ */
+ tmp = dwarf_diename(&indie);
+ if (!tmp ||
+ dwarf_decl_line(&indie, &baseline) != 0)
+ break;
+ func = tmp;
+ spdie = indie;
+ }
+ }
+ /* Verify the lineno and baseline are in a same file */
+ tmp = dwarf_decl_file(&spdie);
+ if (!tmp || strcmp(tmp, fname) != 0)
+ lineno = 0;
+ }
+
+post:
+ /* Make a relative line number or an offset */
+ if (lineno)
+ ppt->line = lineno - baseline;
+ else if (basefunc) {
+ ppt->offset = addr - (unsigned long)baseaddr;
+ func = basefunc;
+ }
+
+ /* Duplicate strings */
+ if (func) {
+ ppt->function = strdup(func);
+ if (ppt->function == NULL) {
+ ret = -ENOMEM;
+ goto end;
+ }
+ }
+ if (fname) {
+ ppt->file = strdup(fname);
+ if (ppt->file == NULL) {
+ zfree(&ppt->function);
+ ret = -ENOMEM;
+ goto end;
+ }
+ }
+end:
+ if (ret == 0 && (fname || func))
+ ret = 1; /* Found a point */
+ return ret;
+}
+
+/* Add a line and store the src path */
+static int line_range_add_line(const char *src, unsigned int lineno,
+ struct line_range *lr)
+{
+ /* Copy source path */
+ if (!lr->path) {
+ lr->path = strdup(src);
+ if (lr->path == NULL)
+ return -ENOMEM;
+ }
+ return intlist__add(lr->line_list, lineno);
+}
+
+static int line_range_walk_cb(const char *fname, int lineno,
+ Dwarf_Addr addr __maybe_unused,
+ void *data)
+{
+ struct line_finder *lf = data;
+ int err;
+
+ if ((strtailcmp(fname, lf->fname) != 0) ||
+ (lf->lno_s > lineno || lf->lno_e < lineno))
+ return 0;
+
+ err = line_range_add_line(fname, lineno, lf->lr);
+ if (err < 0 && err != -EEXIST)
+ return err;
+
+ return 0;
+}
+
+/* Find line range from its line number */
+static int find_line_range_by_line(Dwarf_Die *sp_die, struct line_finder *lf)
+{
+ int ret;
+
+ ret = die_walk_lines(sp_die ?: &lf->cu_die, line_range_walk_cb, lf);
+
+ /* Update status */
+ if (ret >= 0)
+ if (!intlist__empty(lf->lr->line_list))
+ ret = lf->found = 1;
+ else
+ ret = 0; /* Lines are not found */
+ else {
+ zfree(&lf->lr->path);
+ }
+ return ret;
+}
+
+static int line_range_inline_cb(Dwarf_Die *in_die, void *data)
+{
+ int ret = find_line_range_by_line(in_die, data);
+
+ /*
+ * We have to check all instances of inlined function, because
+ * some execution paths can be optimized out depends on the
+ * function argument of instances. However, if an error occurs,
+ * it should be handled by the caller.
+ */
+ return ret < 0 ? ret : 0;
+}
+
+/* Search function definition from function name */
+static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
+{
+ struct dwarf_callback_param *param = data;
+ struct line_finder *lf = param->data;
+ struct line_range *lr = lf->lr;
+
+ /* Check declared file */
+ if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die)))
+ return DWARF_CB_OK;
+
+ if (die_is_func_def(sp_die) &&
+ die_match_name(sp_die, lr->function)) {
+ lf->fname = dwarf_decl_file(sp_die);
+ dwarf_decl_line(sp_die, &lr->offset);
+ pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
+ lf->lno_s = lr->offset + lr->start;
+ if (lf->lno_s < 0) /* Overflow */
+ lf->lno_s = INT_MAX;
+ lf->lno_e = lr->offset + lr->end;
+ if (lf->lno_e < 0) /* Overflow */
+ lf->lno_e = INT_MAX;
+ pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e);
+ lr->start = lf->lno_s;
+ lr->end = lf->lno_e;
+ if (!die_is_func_instance(sp_die))
+ param->retval = die_walk_instances(sp_die,
+ line_range_inline_cb, lf);
+ else
+ param->retval = find_line_range_by_line(sp_die, lf);
+ return DWARF_CB_ABORT;
+ }
+ return DWARF_CB_OK;
+}
+
+static int find_line_range_by_func(struct line_finder *lf)
+{
+ struct dwarf_callback_param param = {.data = (void *)lf, .retval = 0};
+ dwarf_getfuncs(&lf->cu_die, line_range_search_cb, &param, 0);
+ return param.retval;
+}
+
+int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr)
+{
+ struct line_finder lf = {.lr = lr, .found = 0};
+ int ret = 0;
+ Dwarf_Off off = 0, noff;
+ size_t cuhl;
+ Dwarf_Die *diep;
+ const char *comp_dir;
+
+ /* Fastpath: lookup by function name from .debug_pubnames section */
+ if (lr->function) {
+ struct pubname_callback_param pubname_param = {
+ .function = lr->function, .file = lr->file,
+ .cu_die = &lf.cu_die, .sp_die = &lf.sp_die, .found = 0};
+ struct dwarf_callback_param line_range_param = {
+ .data = (void *)&lf, .retval = 0};
+
+ dwarf_getpubnames(dbg->dbg, pubname_search_cb,
+ &pubname_param, 0);
+ if (pubname_param.found) {
+ line_range_search_cb(&lf.sp_die, &line_range_param);
+ if (lf.found)
+ goto found;
+ }
+ }
+
+ /* Loop on CUs (Compilation Unit) */
+ while (!lf.found && ret >= 0) {
+ if (dwarf_nextcu(dbg->dbg, off, &noff, &cuhl,
+ NULL, NULL, NULL) != 0)
+ break;
+
+ /* Get the DIE(Debugging Information Entry) of this CU */
+ diep = dwarf_offdie(dbg->dbg, off + cuhl, &lf.cu_die);
+ if (!diep)
+ continue;
+
+ /* Check if target file is included. */
+ if (lr->file)
+ lf.fname = cu_find_realpath(&lf.cu_die, lr->file);
+ else
+ lf.fname = 0;
+
+ if (!lr->file || lf.fname) {
+ if (lr->function)
+ ret = find_line_range_by_func(&lf);
+ else {
+ lf.lno_s = lr->start;
+ lf.lno_e = lr->end;
+ ret = find_line_range_by_line(NULL, &lf);
+ }
+ }
+ off = noff;
+ }
+
+found:
+ /* Store comp_dir */
+ if (lf.found) {
+ comp_dir = cu_get_comp_dir(&lf.cu_die);
+ if (comp_dir) {
+ lr->comp_dir = strdup(comp_dir);
+ if (!lr->comp_dir)
+ ret = -ENOMEM;
+ }
+ }
+
+ pr_debug("path: %s\n", lr->path);
+ return (ret < 0) ? ret : lf.found;
+}
+
+/*
+ * Find a src file from a DWARF tag path. Prepend optional source path prefix
+ * and chop off leading directories that do not exist. Result is passed back as
+ * a newly allocated path on success.
+ * Return 0 if file was found and readable, -errno otherwise.
+ */
+int get_real_path(const char *raw_path, const char *comp_dir,
+ char **new_path)
+{
+ const char *prefix = symbol_conf.source_prefix;
+
+ if (!prefix) {
+ if (raw_path[0] != '/' && comp_dir)
+ /* If not an absolute path, try to use comp_dir */
+ prefix = comp_dir;
+ else {
+ if (access(raw_path, R_OK) == 0) {
+ *new_path = strdup(raw_path);
+ return *new_path ? 0 : -ENOMEM;
+ } else
+ return -errno;
+ }
+ }
+
+ *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2));
+ if (!*new_path)
+ return -ENOMEM;
+
+ for (;;) {
+ sprintf(*new_path, "%s/%s", prefix, raw_path);
+
+ if (access(*new_path, R_OK) == 0)
+ return 0;
+
+ if (!symbol_conf.source_prefix) {
+ /* In case of searching comp_dir, don't retry */
+ zfree(new_path);
+ return -errno;
+ }
+
+ switch (errno) {
+ case ENAMETOOLONG:
+ case ENOENT:
+ case EROFS:
+ case EFAULT:
+ raw_path = strchr(++raw_path, '/');
+ if (!raw_path) {
+ zfree(new_path);
+ return -ENOENT;
+ }
+ continue;
+
+ default:
+ zfree(new_path);
+ return -errno;
+ }
+ }
+}
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
new file mode 100644
index 000000000..16252980f
--- /dev/null
+++ b/tools/perf/util/probe-finder.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PROBE_FINDER_H
+#define _PROBE_FINDER_H
+
+#include <stdbool.h>
+#include "intlist.h"
+#include "probe-event.h"
+#include "sane_ctype.h"
+
+#define MAX_PROBE_BUFFER 1024
+#define MAX_PROBES 128
+#define MAX_PROBE_ARGS 128
+
+#define PROBE_ARG_VARS "$vars"
+#define PROBE_ARG_PARAMS "$params"
+
+static inline int is_c_varname(const char *name)
+{
+ /* TODO */
+ return isalpha(name[0]) || name[0] == '_';
+}
+
+#ifdef HAVE_DWARF_SUPPORT
+
+#include "dwarf-aux.h"
+
+/* TODO: export debuginfo data structure even if no dwarf support */
+
+/* debug information structure */
+struct debuginfo {
+ Dwarf *dbg;
+ Dwfl_Module *mod;
+ Dwfl *dwfl;
+ Dwarf_Addr bias;
+};
+
+/* This also tries to open distro debuginfo */
+struct debuginfo *debuginfo__new(const char *path);
+void debuginfo__delete(struct debuginfo *dbg);
+
+/* Find probe_trace_events specified by perf_probe_event from debuginfo */
+int debuginfo__find_trace_events(struct debuginfo *dbg,
+ struct perf_probe_event *pev,
+ struct probe_trace_event **tevs);
+
+/* Find a perf_probe_point from debuginfo */
+int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr,
+ struct perf_probe_point *ppt);
+
+int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs,
+ bool adjust_offset);
+
+/* Find a line range */
+int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr);
+
+/* Find available variables */
+int debuginfo__find_available_vars_at(struct debuginfo *dbg,
+ struct perf_probe_event *pev,
+ struct variable_list **vls);
+
+/* Find a src file from a DWARF tag path */
+int get_real_path(const char *raw_path, const char *comp_dir,
+ char **new_path);
+
+struct probe_finder {
+ struct perf_probe_event *pev; /* Target probe event */
+
+ /* Callback when a probe point is found */
+ int (*callback)(Dwarf_Die *sc_die, struct probe_finder *pf);
+
+ /* For function searching */
+ int lno; /* Line number */
+ Dwarf_Addr addr; /* Address */
+ const char *fname; /* Real file name */
+ Dwarf_Die cu_die; /* Current CU */
+ Dwarf_Die sp_die;
+ struct intlist *lcache; /* Line cache for lazy match */
+
+ /* For variable searching */
+#if _ELFUTILS_PREREQ(0, 142)
+ /* Call Frame Information from .eh_frame */
+ Dwarf_CFI *cfi_eh;
+ /* Call Frame Information from .debug_frame */
+ Dwarf_CFI *cfi_dbg;
+#endif
+ Dwarf_Op *fb_ops; /* Frame base attribute */
+ unsigned int machine; /* Target machine arch */
+ struct perf_probe_arg *pvar; /* Current target variable */
+ struct probe_trace_arg *tvar; /* Current result variable */
+};
+
+struct trace_event_finder {
+ struct probe_finder pf;
+ Dwfl_Module *mod; /* For solving symbols */
+ struct probe_trace_event *tevs; /* Found trace events */
+ int ntevs; /* Number of trace events */
+ int max_tevs; /* Max number of trace events */
+};
+
+struct available_var_finder {
+ struct probe_finder pf;
+ Dwfl_Module *mod; /* For solving symbols */
+ struct variable_list *vls; /* Found variable lists */
+ int nvls; /* Number of variable lists */
+ int max_vls; /* Max no. of variable lists */
+ bool child; /* Search child scopes */
+};
+
+struct line_finder {
+ struct line_range *lr; /* Target line range */
+
+ const char *fname; /* File name */
+ int lno_s; /* Start line number */
+ int lno_e; /* End line number */
+ Dwarf_Die cu_die; /* Current CU */
+ Dwarf_Die sp_die;
+ int found;
+};
+
+#endif /* HAVE_DWARF_SUPPORT */
+
+#endif /*_PROBE_FINDER_H */
diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
new file mode 100644
index 000000000..797fe1ae2
--- /dev/null
+++ b/tools/perf/util/pstack.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Simple pointer stack
+ *
+ * (c) 2010 Arnaldo Carvalho de Melo <acme@redhat.com>
+ */
+
+#include "util.h"
+#include "pstack.h"
+#include "debug.h"
+#include <linux/kernel.h>
+#include <stdlib.h>
+
+struct pstack {
+ unsigned short top;
+ unsigned short max_nr_entries;
+ void *entries[0];
+};
+
+struct pstack *pstack__new(unsigned short max_nr_entries)
+{
+ struct pstack *pstack = zalloc((sizeof(*pstack) +
+ max_nr_entries * sizeof(void *)));
+ if (pstack != NULL)
+ pstack->max_nr_entries = max_nr_entries;
+ return pstack;
+}
+
+void pstack__delete(struct pstack *pstack)
+{
+ free(pstack);
+}
+
+bool pstack__empty(const struct pstack *pstack)
+{
+ return pstack->top == 0;
+}
+
+void pstack__remove(struct pstack *pstack, void *key)
+{
+ unsigned short i = pstack->top, last_index = pstack->top - 1;
+
+ while (i-- != 0) {
+ if (pstack->entries[i] == key) {
+ if (i < last_index)
+ memmove(pstack->entries + i,
+ pstack->entries + i + 1,
+ (last_index - i) * sizeof(void *));
+ --pstack->top;
+ return;
+ }
+ }
+ pr_err("%s: %p not on the pstack!\n", __func__, key);
+}
+
+void pstack__push(struct pstack *pstack, void *key)
+{
+ if (pstack->top == pstack->max_nr_entries) {
+ pr_err("%s: top=%d, overflow!\n", __func__, pstack->top);
+ return;
+ }
+ pstack->entries[pstack->top++] = key;
+}
+
+void *pstack__pop(struct pstack *pstack)
+{
+ void *ret;
+
+ if (pstack->top == 0) {
+ pr_err("%s: underflow!\n", __func__);
+ return NULL;
+ }
+
+ ret = pstack->entries[--pstack->top];
+ pstack->entries[pstack->top] = NULL;
+ return ret;
+}
+
+void *pstack__peek(struct pstack *pstack)
+{
+ if (pstack->top == 0)
+ return NULL;
+ return pstack->entries[pstack->top - 1];
+}
diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h
new file mode 100644
index 000000000..8729b8be0
--- /dev/null
+++ b/tools/perf/util/pstack.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_PSTACK_
+#define _PERF_PSTACK_
+
+#include <stdbool.h>
+
+struct pstack;
+struct pstack *pstack__new(unsigned short max_nr_entries);
+void pstack__delete(struct pstack *pstack);
+bool pstack__empty(const struct pstack *pstack);
+void pstack__remove(struct pstack *pstack, void *key);
+void pstack__push(struct pstack *pstack, void *key);
+void *pstack__pop(struct pstack *pstack);
+void *pstack__peek(struct pstack *pstack);
+
+#endif /* _PERF_PSTACK_ */
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
new file mode 100644
index 000000000..7aa0ea645
--- /dev/null
+++ b/tools/perf/util/python-ext-sources
@@ -0,0 +1,33 @@
+#
+# List of files needed by perf python extension
+#
+# Each source file must be placed on its own line so that it can be
+# processed by Makefile and util/setup.py accordingly.
+#
+
+util/python.c
+util/ctype.c
+util/evlist.c
+util/evsel.c
+util/cpumap.c
+util/memswap.c
+util/mmap.c
+util/namespaces.c
+../lib/bitmap.c
+../lib/find_bit.c
+../lib/hweight.c
+../lib/vsprintf.c
+util/thread_map.c
+util/util.c
+util/xyarray.c
+util/cgroup.c
+util/parse-branch-options.c
+util/rblist.c
+util/counts.c
+util/print_binary.c
+util/strlist.c
+util/trace-event.c
+../lib/rbtree.c
+util/string.c
+util/symbol_fprintf.c
+util/units.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
new file mode 100644
index 000000000..69f5f6142
--- /dev/null
+++ b/tools/perf/util/python.c
@@ -0,0 +1,1377 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <Python.h>
+#include <structmember.h>
+#include <inttypes.h>
+#include <poll.h>
+#include <linux/err.h>
+#include "evlist.h"
+#include "callchain.h"
+#include "evsel.h"
+#include "event.h"
+#include "cpumap.h"
+#include "print_binary.h"
+#include "thread_map.h"
+#include "mmap.h"
+
+#if PY_MAJOR_VERSION < 3
+#define _PyUnicode_FromString(arg) \
+ PyString_FromString(arg)
+#define _PyUnicode_AsString(arg) \
+ PyString_AsString(arg)
+#define _PyUnicode_FromFormat(...) \
+ PyString_FromFormat(__VA_ARGS__)
+#define _PyLong_FromLong(arg) \
+ PyInt_FromLong(arg)
+
+#else
+
+#define _PyUnicode_FromString(arg) \
+ PyUnicode_FromString(arg)
+#define _PyUnicode_FromFormat(...) \
+ PyUnicode_FromFormat(__VA_ARGS__)
+#define _PyLong_FromLong(arg) \
+ PyLong_FromLong(arg)
+#endif
+
+#ifndef Py_TYPE
+#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
+#endif
+
+/*
+ * Provide these two so that we don't have to link against callchain.c and
+ * start dragging hist.c, etc.
+ */
+struct callchain_param callchain_param;
+
+int parse_callchain_record(const char *arg __maybe_unused,
+ struct callchain_param *param __maybe_unused)
+{
+ return 0;
+}
+
+/*
+ * Support debug printing even though util/debug.c is not linked. That means
+ * implementing 'verbose' and 'eprintf'.
+ */
+int verbose;
+
+int eprintf(int level, int var, const char *fmt, ...)
+{
+ va_list args;
+ int ret = 0;
+
+ if (var >= level) {
+ va_start(args, fmt);
+ ret = vfprintf(stderr, fmt, args);
+ va_end(args);
+ }
+
+ return ret;
+}
+
+/* Define PyVarObject_HEAD_INIT for python 2.5 */
+#ifndef PyVarObject_HEAD_INIT
+# define PyVarObject_HEAD_INIT(type, size) PyObject_HEAD_INIT(type) size,
+#endif
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initperf(void);
+#else
+PyMODINIT_FUNC PyInit_perf(void);
+#endif
+
+#define member_def(type, member, ptype, help) \
+ { #member, ptype, \
+ offsetof(struct pyrf_event, event) + offsetof(struct type, member), \
+ 0, help }
+
+#define sample_member_def(name, member, ptype, help) \
+ { #name, ptype, \
+ offsetof(struct pyrf_event, sample) + offsetof(struct perf_sample, member), \
+ 0, help }
+
+struct pyrf_event {
+ PyObject_HEAD
+ struct perf_evsel *evsel;
+ struct perf_sample sample;
+ union perf_event event;
+};
+
+#define sample_members \
+ sample_member_def(sample_ip, ip, T_ULONGLONG, "event type"), \
+ sample_member_def(sample_pid, pid, T_INT, "event pid"), \
+ sample_member_def(sample_tid, tid, T_INT, "event tid"), \
+ sample_member_def(sample_time, time, T_ULONGLONG, "event timestamp"), \
+ sample_member_def(sample_addr, addr, T_ULONGLONG, "event addr"), \
+ sample_member_def(sample_id, id, T_ULONGLONG, "event id"), \
+ sample_member_def(sample_stream_id, stream_id, T_ULONGLONG, "event stream id"), \
+ sample_member_def(sample_period, period, T_ULONGLONG, "event period"), \
+ sample_member_def(sample_cpu, cpu, T_UINT, "event cpu"),
+
+static char pyrf_mmap_event__doc[] = PyDoc_STR("perf mmap event object.");
+
+static PyMemberDef pyrf_mmap_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(perf_event_header, misc, T_UINT, "event misc"),
+ member_def(mmap_event, pid, T_UINT, "event pid"),
+ member_def(mmap_event, tid, T_UINT, "event tid"),
+ member_def(mmap_event, start, T_ULONGLONG, "start of the map"),
+ member_def(mmap_event, len, T_ULONGLONG, "map length"),
+ member_def(mmap_event, pgoff, T_ULONGLONG, "page offset"),
+ member_def(mmap_event, filename, T_STRING_INPLACE, "backing store"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_mmap_event__repr(struct pyrf_event *pevent)
+{
+ PyObject *ret;
+ char *s;
+
+ if (asprintf(&s, "{ type: mmap, pid: %u, tid: %u, start: %#" PRIx64 ", "
+ "length: %#" PRIx64 ", offset: %#" PRIx64 ", "
+ "filename: %s }",
+ pevent->event.mmap.pid, pevent->event.mmap.tid,
+ pevent->event.mmap.start, pevent->event.mmap.len,
+ pevent->event.mmap.pgoff, pevent->event.mmap.filename) < 0) {
+ ret = PyErr_NoMemory();
+ } else {
+ ret = _PyUnicode_FromString(s);
+ free(s);
+ }
+ return ret;
+}
+
+static PyTypeObject pyrf_mmap_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.mmap_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_mmap_event__doc,
+ .tp_members = pyrf_mmap_event__members,
+ .tp_repr = (reprfunc)pyrf_mmap_event__repr,
+};
+
+static char pyrf_task_event__doc[] = PyDoc_STR("perf task (fork/exit) event object.");
+
+static PyMemberDef pyrf_task_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(fork_event, pid, T_UINT, "event pid"),
+ member_def(fork_event, ppid, T_UINT, "event ppid"),
+ member_def(fork_event, tid, T_UINT, "event tid"),
+ member_def(fork_event, ptid, T_UINT, "event ptid"),
+ member_def(fork_event, time, T_ULONGLONG, "timestamp"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_task_event__repr(struct pyrf_event *pevent)
+{
+ return _PyUnicode_FromFormat("{ type: %s, pid: %u, ppid: %u, tid: %u, "
+ "ptid: %u, time: %" PRIu64 "}",
+ pevent->event.header.type == PERF_RECORD_FORK ? "fork" : "exit",
+ pevent->event.fork.pid,
+ pevent->event.fork.ppid,
+ pevent->event.fork.tid,
+ pevent->event.fork.ptid,
+ pevent->event.fork.time);
+}
+
+static PyTypeObject pyrf_task_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.task_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_task_event__doc,
+ .tp_members = pyrf_task_event__members,
+ .tp_repr = (reprfunc)pyrf_task_event__repr,
+};
+
+static char pyrf_comm_event__doc[] = PyDoc_STR("perf comm event object.");
+
+static PyMemberDef pyrf_comm_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(comm_event, pid, T_UINT, "event pid"),
+ member_def(comm_event, tid, T_UINT, "event tid"),
+ member_def(comm_event, comm, T_STRING_INPLACE, "process name"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_comm_event__repr(struct pyrf_event *pevent)
+{
+ return _PyUnicode_FromFormat("{ type: comm, pid: %u, tid: %u, comm: %s }",
+ pevent->event.comm.pid,
+ pevent->event.comm.tid,
+ pevent->event.comm.comm);
+}
+
+static PyTypeObject pyrf_comm_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.comm_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_comm_event__doc,
+ .tp_members = pyrf_comm_event__members,
+ .tp_repr = (reprfunc)pyrf_comm_event__repr,
+};
+
+static char pyrf_throttle_event__doc[] = PyDoc_STR("perf throttle event object.");
+
+static PyMemberDef pyrf_throttle_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(throttle_event, time, T_ULONGLONG, "timestamp"),
+ member_def(throttle_event, id, T_ULONGLONG, "event id"),
+ member_def(throttle_event, stream_id, T_ULONGLONG, "event stream id"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_throttle_event__repr(struct pyrf_event *pevent)
+{
+ struct throttle_event *te = (struct throttle_event *)(&pevent->event.header + 1);
+
+ return _PyUnicode_FromFormat("{ type: %sthrottle, time: %" PRIu64 ", id: %" PRIu64
+ ", stream_id: %" PRIu64 " }",
+ pevent->event.header.type == PERF_RECORD_THROTTLE ? "" : "un",
+ te->time, te->id, te->stream_id);
+}
+
+static PyTypeObject pyrf_throttle_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.throttle_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_throttle_event__doc,
+ .tp_members = pyrf_throttle_event__members,
+ .tp_repr = (reprfunc)pyrf_throttle_event__repr,
+};
+
+static char pyrf_lost_event__doc[] = PyDoc_STR("perf lost event object.");
+
+static PyMemberDef pyrf_lost_event__members[] = {
+ sample_members
+ member_def(lost_event, id, T_ULONGLONG, "event id"),
+ member_def(lost_event, lost, T_ULONGLONG, "number of lost events"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_lost_event__repr(struct pyrf_event *pevent)
+{
+ PyObject *ret;
+ char *s;
+
+ if (asprintf(&s, "{ type: lost, id: %#" PRIx64 ", "
+ "lost: %#" PRIx64 " }",
+ pevent->event.lost.id, pevent->event.lost.lost) < 0) {
+ ret = PyErr_NoMemory();
+ } else {
+ ret = _PyUnicode_FromString(s);
+ free(s);
+ }
+ return ret;
+}
+
+static PyTypeObject pyrf_lost_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.lost_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_lost_event__doc,
+ .tp_members = pyrf_lost_event__members,
+ .tp_repr = (reprfunc)pyrf_lost_event__repr,
+};
+
+static char pyrf_read_event__doc[] = PyDoc_STR("perf read event object.");
+
+static PyMemberDef pyrf_read_event__members[] = {
+ sample_members
+ member_def(read_event, pid, T_UINT, "event pid"),
+ member_def(read_event, tid, T_UINT, "event tid"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_read_event__repr(struct pyrf_event *pevent)
+{
+ return _PyUnicode_FromFormat("{ type: read, pid: %u, tid: %u }",
+ pevent->event.read.pid,
+ pevent->event.read.tid);
+ /*
+ * FIXME: return the array of read values,
+ * making this method useful ;-)
+ */
+}
+
+static PyTypeObject pyrf_read_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.read_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_read_event__doc,
+ .tp_members = pyrf_read_event__members,
+ .tp_repr = (reprfunc)pyrf_read_event__repr,
+};
+
+static char pyrf_sample_event__doc[] = PyDoc_STR("perf sample event object.");
+
+static PyMemberDef pyrf_sample_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
+{
+ PyObject *ret;
+ char *s;
+
+ if (asprintf(&s, "{ type: sample }") < 0) {
+ ret = PyErr_NoMemory();
+ } else {
+ ret = _PyUnicode_FromString(s);
+ free(s);
+ }
+ return ret;
+}
+
+static bool is_tracepoint(struct pyrf_event *pevent)
+{
+ return pevent->evsel->attr.type == PERF_TYPE_TRACEPOINT;
+}
+
+static PyObject*
+tracepoint_field(struct pyrf_event *pe, struct format_field *field)
+{
+ struct tep_handle *pevent = field->event->pevent;
+ void *data = pe->sample.raw_data;
+ PyObject *ret = NULL;
+ unsigned long long val;
+ unsigned int offset, len;
+
+ if (field->flags & FIELD_IS_ARRAY) {
+ offset = field->offset;
+ len = field->size;
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ val = tep_read_number(pevent, data + offset, len);
+ offset = val;
+ len = offset >> 16;
+ offset &= 0xffff;
+ }
+ if (field->flags & FIELD_IS_STRING &&
+ is_printable_array(data + offset, len)) {
+ ret = _PyUnicode_FromString((char *)data + offset);
+ } else {
+ ret = PyByteArray_FromStringAndSize((const char *) data + offset, len);
+ field->flags &= ~FIELD_IS_STRING;
+ }
+ } else {
+ val = tep_read_number(pevent, data + field->offset,
+ field->size);
+ if (field->flags & FIELD_IS_POINTER)
+ ret = PyLong_FromUnsignedLong((unsigned long) val);
+ else if (field->flags & FIELD_IS_SIGNED)
+ ret = PyLong_FromLong((long) val);
+ else
+ ret = PyLong_FromUnsignedLong((unsigned long) val);
+ }
+
+ return ret;
+}
+
+static PyObject*
+get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
+{
+ const char *str = _PyUnicode_AsString(PyObject_Str(attr_name));
+ struct perf_evsel *evsel = pevent->evsel;
+ struct format_field *field;
+
+ if (!evsel->tp_format) {
+ struct event_format *tp_format;
+
+ tp_format = trace_event__tp_format_id(evsel->attr.config);
+ if (!tp_format)
+ return NULL;
+
+ evsel->tp_format = tp_format;
+ }
+
+ field = tep_find_any_field(evsel->tp_format, str);
+ if (!field)
+ return NULL;
+
+ return tracepoint_field(pevent, field);
+}
+
+static PyObject*
+pyrf_sample_event__getattro(struct pyrf_event *pevent, PyObject *attr_name)
+{
+ PyObject *obj = NULL;
+
+ if (is_tracepoint(pevent))
+ obj = get_tracepoint_field(pevent, attr_name);
+
+ return obj ?: PyObject_GenericGetAttr((PyObject *) pevent, attr_name);
+}
+
+static PyTypeObject pyrf_sample_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.sample_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_sample_event__doc,
+ .tp_members = pyrf_sample_event__members,
+ .tp_repr = (reprfunc)pyrf_sample_event__repr,
+ .tp_getattro = (getattrofunc) pyrf_sample_event__getattro,
+};
+
+static char pyrf_context_switch_event__doc[] = PyDoc_STR("perf context_switch event object.");
+
+static PyMemberDef pyrf_context_switch_event__members[] = {
+ sample_members
+ member_def(perf_event_header, type, T_UINT, "event type"),
+ member_def(context_switch_event, next_prev_pid, T_UINT, "next/prev pid"),
+ member_def(context_switch_event, next_prev_tid, T_UINT, "next/prev tid"),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf_context_switch_event__repr(struct pyrf_event *pevent)
+{
+ PyObject *ret;
+ char *s;
+
+ if (asprintf(&s, "{ type: context_switch, next_prev_pid: %u, next_prev_tid: %u, switch_out: %u }",
+ pevent->event.context_switch.next_prev_pid,
+ pevent->event.context_switch.next_prev_tid,
+ !!(pevent->event.header.misc & PERF_RECORD_MISC_SWITCH_OUT)) < 0) {
+ ret = PyErr_NoMemory();
+ } else {
+ ret = _PyUnicode_FromString(s);
+ free(s);
+ }
+ return ret;
+}
+
+static PyTypeObject pyrf_context_switch_event__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.context_switch_event",
+ .tp_basicsize = sizeof(struct pyrf_event),
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_context_switch_event__doc,
+ .tp_members = pyrf_context_switch_event__members,
+ .tp_repr = (reprfunc)pyrf_context_switch_event__repr,
+};
+
+static int pyrf_event__setup_types(void)
+{
+ int err;
+ pyrf_mmap_event__type.tp_new =
+ pyrf_task_event__type.tp_new =
+ pyrf_comm_event__type.tp_new =
+ pyrf_lost_event__type.tp_new =
+ pyrf_read_event__type.tp_new =
+ pyrf_sample_event__type.tp_new =
+ pyrf_context_switch_event__type.tp_new =
+ pyrf_throttle_event__type.tp_new = PyType_GenericNew;
+ err = PyType_Ready(&pyrf_mmap_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_lost_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_task_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_comm_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_throttle_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_read_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_sample_event__type);
+ if (err < 0)
+ goto out;
+ err = PyType_Ready(&pyrf_context_switch_event__type);
+ if (err < 0)
+ goto out;
+out:
+ return err;
+}
+
+static PyTypeObject *pyrf_event__type[] = {
+ [PERF_RECORD_MMAP] = &pyrf_mmap_event__type,
+ [PERF_RECORD_LOST] = &pyrf_lost_event__type,
+ [PERF_RECORD_COMM] = &pyrf_comm_event__type,
+ [PERF_RECORD_EXIT] = &pyrf_task_event__type,
+ [PERF_RECORD_THROTTLE] = &pyrf_throttle_event__type,
+ [PERF_RECORD_UNTHROTTLE] = &pyrf_throttle_event__type,
+ [PERF_RECORD_FORK] = &pyrf_task_event__type,
+ [PERF_RECORD_READ] = &pyrf_read_event__type,
+ [PERF_RECORD_SAMPLE] = &pyrf_sample_event__type,
+ [PERF_RECORD_SWITCH] = &pyrf_context_switch_event__type,
+ [PERF_RECORD_SWITCH_CPU_WIDE] = &pyrf_context_switch_event__type,
+};
+
+static PyObject *pyrf_event__new(union perf_event *event)
+{
+ struct pyrf_event *pevent;
+ PyTypeObject *ptype;
+
+ if ((event->header.type < PERF_RECORD_MMAP ||
+ event->header.type > PERF_RECORD_SAMPLE) &&
+ !(event->header.type == PERF_RECORD_SWITCH ||
+ event->header.type == PERF_RECORD_SWITCH_CPU_WIDE))
+ return NULL;
+
+ ptype = pyrf_event__type[event->header.type];
+ pevent = PyObject_New(struct pyrf_event, ptype);
+ if (pevent != NULL)
+ memcpy(&pevent->event, event, event->header.size);
+ return (PyObject *)pevent;
+}
+
+struct pyrf_cpu_map {
+ PyObject_HEAD
+
+ struct cpu_map *cpus;
+};
+
+static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus,
+ PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "cpustr", NULL };
+ char *cpustr = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s",
+ kwlist, &cpustr))
+ return -1;
+
+ pcpus->cpus = cpu_map__new(cpustr);
+ if (pcpus->cpus == NULL)
+ return -1;
+ return 0;
+}
+
+static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus)
+{
+ cpu_map__put(pcpus->cpus);
+ Py_TYPE(pcpus)->tp_free((PyObject*)pcpus);
+}
+
+static Py_ssize_t pyrf_cpu_map__length(PyObject *obj)
+{
+ struct pyrf_cpu_map *pcpus = (void *)obj;
+
+ return pcpus->cpus->nr;
+}
+
+static PyObject *pyrf_cpu_map__item(PyObject *obj, Py_ssize_t i)
+{
+ struct pyrf_cpu_map *pcpus = (void *)obj;
+
+ if (i >= pcpus->cpus->nr)
+ return NULL;
+
+ return Py_BuildValue("i", pcpus->cpus->map[i]);
+}
+
+static PySequenceMethods pyrf_cpu_map__sequence_methods = {
+ .sq_length = pyrf_cpu_map__length,
+ .sq_item = pyrf_cpu_map__item,
+};
+
+static char pyrf_cpu_map__doc[] = PyDoc_STR("cpu map object.");
+
+static PyTypeObject pyrf_cpu_map__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.cpu_map",
+ .tp_basicsize = sizeof(struct pyrf_cpu_map),
+ .tp_dealloc = (destructor)pyrf_cpu_map__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_cpu_map__doc,
+ .tp_as_sequence = &pyrf_cpu_map__sequence_methods,
+ .tp_init = (initproc)pyrf_cpu_map__init,
+};
+
+static int pyrf_cpu_map__setup_types(void)
+{
+ pyrf_cpu_map__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_cpu_map__type);
+}
+
+struct pyrf_thread_map {
+ PyObject_HEAD
+
+ struct thread_map *threads;
+};
+
+static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads,
+ PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "pid", "tid", "uid", NULL };
+ int pid = -1, tid = -1, uid = UINT_MAX;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iii",
+ kwlist, &pid, &tid, &uid))
+ return -1;
+
+ pthreads->threads = thread_map__new(pid, tid, uid);
+ if (pthreads->threads == NULL)
+ return -1;
+ return 0;
+}
+
+static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads)
+{
+ thread_map__put(pthreads->threads);
+ Py_TYPE(pthreads)->tp_free((PyObject*)pthreads);
+}
+
+static Py_ssize_t pyrf_thread_map__length(PyObject *obj)
+{
+ struct pyrf_thread_map *pthreads = (void *)obj;
+
+ return pthreads->threads->nr;
+}
+
+static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i)
+{
+ struct pyrf_thread_map *pthreads = (void *)obj;
+
+ if (i >= pthreads->threads->nr)
+ return NULL;
+
+ return Py_BuildValue("i", pthreads->threads->map[i]);
+}
+
+static PySequenceMethods pyrf_thread_map__sequence_methods = {
+ .sq_length = pyrf_thread_map__length,
+ .sq_item = pyrf_thread_map__item,
+};
+
+static char pyrf_thread_map__doc[] = PyDoc_STR("thread map object.");
+
+static PyTypeObject pyrf_thread_map__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.thread_map",
+ .tp_basicsize = sizeof(struct pyrf_thread_map),
+ .tp_dealloc = (destructor)pyrf_thread_map__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_thread_map__doc,
+ .tp_as_sequence = &pyrf_thread_map__sequence_methods,
+ .tp_init = (initproc)pyrf_thread_map__init,
+};
+
+static int pyrf_thread_map__setup_types(void)
+{
+ pyrf_thread_map__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_thread_map__type);
+}
+
+struct pyrf_evsel {
+ PyObject_HEAD
+
+ struct perf_evsel evsel;
+};
+
+static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .sample_type = PERF_SAMPLE_PERIOD | PERF_SAMPLE_TID,
+ };
+ static char *kwlist[] = {
+ "type",
+ "config",
+ "sample_freq",
+ "sample_period",
+ "sample_type",
+ "read_format",
+ "disabled",
+ "inherit",
+ "pinned",
+ "exclusive",
+ "exclude_user",
+ "exclude_kernel",
+ "exclude_hv",
+ "exclude_idle",
+ "mmap",
+ "context_switch",
+ "comm",
+ "freq",
+ "inherit_stat",
+ "enable_on_exec",
+ "task",
+ "watermark",
+ "precise_ip",
+ "mmap_data",
+ "sample_id_all",
+ "wakeup_events",
+ "bp_type",
+ "bp_addr",
+ "bp_len",
+ NULL
+ };
+ u64 sample_period = 0;
+ u32 disabled = 0,
+ inherit = 0,
+ pinned = 0,
+ exclusive = 0,
+ exclude_user = 0,
+ exclude_kernel = 0,
+ exclude_hv = 0,
+ exclude_idle = 0,
+ mmap = 0,
+ context_switch = 0,
+ comm = 0,
+ freq = 1,
+ inherit_stat = 0,
+ enable_on_exec = 0,
+ task = 0,
+ watermark = 0,
+ precise_ip = 0,
+ mmap_data = 0,
+ sample_id_all = 1;
+ int idx = 0;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs,
+ "|iKiKKiiiiiiiiiiiiiiiiiiiiiiKK", kwlist,
+ &attr.type, &attr.config, &attr.sample_freq,
+ &sample_period, &attr.sample_type,
+ &attr.read_format, &disabled, &inherit,
+ &pinned, &exclusive, &exclude_user,
+ &exclude_kernel, &exclude_hv, &exclude_idle,
+ &mmap, &context_switch, &comm, &freq, &inherit_stat,
+ &enable_on_exec, &task, &watermark,
+ &precise_ip, &mmap_data, &sample_id_all,
+ &attr.wakeup_events, &attr.bp_type,
+ &attr.bp_addr, &attr.bp_len, &idx))
+ return -1;
+
+ /* union... */
+ if (sample_period != 0) {
+ if (attr.sample_freq != 0)
+ return -1; /* FIXME: throw right exception */
+ attr.sample_period = sample_period;
+ }
+
+ /* Bitfields */
+ attr.disabled = disabled;
+ attr.inherit = inherit;
+ attr.pinned = pinned;
+ attr.exclusive = exclusive;
+ attr.exclude_user = exclude_user;
+ attr.exclude_kernel = exclude_kernel;
+ attr.exclude_hv = exclude_hv;
+ attr.exclude_idle = exclude_idle;
+ attr.mmap = mmap;
+ attr.context_switch = context_switch;
+ attr.comm = comm;
+ attr.freq = freq;
+ attr.inherit_stat = inherit_stat;
+ attr.enable_on_exec = enable_on_exec;
+ attr.task = task;
+ attr.watermark = watermark;
+ attr.precise_ip = precise_ip;
+ attr.mmap_data = mmap_data;
+ attr.sample_id_all = sample_id_all;
+ attr.size = sizeof(attr);
+
+ perf_evsel__init(&pevsel->evsel, &attr, idx);
+ return 0;
+}
+
+static void pyrf_evsel__delete(struct pyrf_evsel *pevsel)
+{
+ perf_evsel__exit(&pevsel->evsel);
+ Py_TYPE(pevsel)->tp_free((PyObject*)pevsel);
+}
+
+static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_evsel *evsel = &pevsel->evsel;
+ struct cpu_map *cpus = NULL;
+ struct thread_map *threads = NULL;
+ PyObject *pcpus = NULL, *pthreads = NULL;
+ int group = 0, inherit = 0;
+ static char *kwlist[] = { "cpus", "threads", "group", "inherit", NULL };
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist,
+ &pcpus, &pthreads, &group, &inherit))
+ return NULL;
+
+ if (pthreads != NULL)
+ threads = ((struct pyrf_thread_map *)pthreads)->threads;
+
+ if (pcpus != NULL)
+ cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
+
+ evsel->attr.inherit = inherit;
+ /*
+ * This will group just the fds for this single evsel, to group
+ * multiple events, use evlist.open().
+ */
+ if (perf_evsel__open(evsel, cpus, threads) < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyMethodDef pyrf_evsel__methods[] = {
+ {
+ .ml_name = "open",
+ .ml_meth = (PyCFunction)pyrf_evsel__open,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("open the event selector file descriptor table.")
+ },
+ { .ml_name = NULL, }
+};
+
+static char pyrf_evsel__doc[] = PyDoc_STR("perf event selector list object.");
+
+static PyTypeObject pyrf_evsel__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.evsel",
+ .tp_basicsize = sizeof(struct pyrf_evsel),
+ .tp_dealloc = (destructor)pyrf_evsel__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_doc = pyrf_evsel__doc,
+ .tp_methods = pyrf_evsel__methods,
+ .tp_init = (initproc)pyrf_evsel__init,
+};
+
+static int pyrf_evsel__setup_types(void)
+{
+ pyrf_evsel__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_evsel__type);
+}
+
+struct pyrf_evlist {
+ PyObject_HEAD
+
+ struct perf_evlist evlist;
+};
+
+static int pyrf_evlist__init(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs __maybe_unused)
+{
+ PyObject *pcpus = NULL, *pthreads = NULL;
+ struct cpu_map *cpus;
+ struct thread_map *threads;
+
+ if (!PyArg_ParseTuple(args, "OO", &pcpus, &pthreads))
+ return -1;
+
+ threads = ((struct pyrf_thread_map *)pthreads)->threads;
+ cpus = ((struct pyrf_cpu_map *)pcpus)->cpus;
+ perf_evlist__init(&pevlist->evlist, cpus, threads);
+ return 0;
+}
+
+static void pyrf_evlist__delete(struct pyrf_evlist *pevlist)
+{
+ perf_evlist__exit(&pevlist->evlist);
+ Py_TYPE(pevlist)->tp_free((PyObject*)pevlist);
+}
+
+static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ static char *kwlist[] = { "pages", "overwrite", NULL };
+ int pages = 128, overwrite = false;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ii", kwlist,
+ &pages, &overwrite))
+ return NULL;
+
+ if (perf_evlist__mmap(evlist, pages) < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *pyrf_evlist__poll(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ static char *kwlist[] = { "timeout", NULL };
+ int timeout = -1, n;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i", kwlist, &timeout))
+ return NULL;
+
+ n = perf_evlist__poll(evlist, timeout);
+ if (n < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ return Py_BuildValue("i", n);
+}
+
+static PyObject *pyrf_evlist__get_pollfd(struct pyrf_evlist *pevlist,
+ PyObject *args __maybe_unused,
+ PyObject *kwargs __maybe_unused)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ PyObject *list = PyList_New(0);
+ int i;
+
+ for (i = 0; i < evlist->pollfd.nr; ++i) {
+ PyObject *file;
+#if PY_MAJOR_VERSION < 3
+ FILE *fp = fdopen(evlist->pollfd.entries[i].fd, "r");
+
+ if (fp == NULL)
+ goto free_list;
+
+ file = PyFile_FromFile(fp, "perf", "r", NULL);
+#else
+ file = PyFile_FromFd(evlist->pollfd.entries[i].fd, "perf", "r", -1,
+ NULL, NULL, NULL, 0);
+#endif
+ if (file == NULL)
+ goto free_list;
+
+ if (PyList_Append(list, file) != 0) {
+ Py_DECREF(file);
+ goto free_list;
+ }
+
+ Py_DECREF(file);
+ }
+
+ return list;
+free_list:
+ return PyErr_NoMemory();
+}
+
+
+static PyObject *pyrf_evlist__add(struct pyrf_evlist *pevlist,
+ PyObject *args,
+ PyObject *kwargs __maybe_unused)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ PyObject *pevsel;
+ struct perf_evsel *evsel;
+
+ if (!PyArg_ParseTuple(args, "O", &pevsel))
+ return NULL;
+
+ Py_INCREF(pevsel);
+ evsel = &((struct pyrf_evsel *)pevsel)->evsel;
+ evsel->idx = evlist->nr_entries;
+ perf_evlist__add(evlist, evsel);
+
+ return Py_BuildValue("i", evlist->nr_entries);
+}
+
+static struct perf_mmap *get_md(struct perf_evlist *evlist, int cpu)
+{
+ int i;
+
+ for (i = 0; i < evlist->nr_mmaps; i++) {
+ struct perf_mmap *md = &evlist->mmap[i];
+
+ if (md->cpu == cpu)
+ return md;
+ }
+
+ return NULL;
+}
+
+static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ union perf_event *event;
+ int sample_id_all = 1, cpu;
+ static char *kwlist[] = { "cpu", "sample_id_all", NULL };
+ struct perf_mmap *md;
+ int err;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist,
+ &cpu, &sample_id_all))
+ return NULL;
+
+ md = get_md(evlist, cpu);
+ if (!md)
+ return NULL;
+
+ if (perf_mmap__read_init(md) < 0)
+ goto end;
+
+ event = perf_mmap__read_event(md);
+ if (event != NULL) {
+ PyObject *pyevent = pyrf_event__new(event);
+ struct pyrf_event *pevent = (struct pyrf_event *)pyevent;
+ struct perf_evsel *evsel;
+
+ if (pyevent == NULL)
+ return PyErr_NoMemory();
+
+ evsel = perf_evlist__event2evsel(evlist, event);
+ if (!evsel) {
+ Py_INCREF(Py_None);
+ return Py_None;
+ }
+
+ pevent->evsel = evsel;
+
+ err = perf_evsel__parse_sample(evsel, event, &pevent->sample);
+
+ /* Consume the even only after we parsed it out. */
+ perf_mmap__consume(md);
+
+ if (err)
+ return PyErr_Format(PyExc_OSError,
+ "perf: can't parse sample, err=%d", err);
+ return pyevent;
+ }
+end:
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist,
+ PyObject *args, PyObject *kwargs)
+{
+ struct perf_evlist *evlist = &pevlist->evlist;
+ int group = 0;
+ static char *kwlist[] = { "group", NULL };
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist, &group))
+ return NULL;
+
+ if (group)
+ perf_evlist__set_leader(evlist);
+
+ if (perf_evlist__open(evlist) < 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return NULL;
+ }
+
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+
+static PyMethodDef pyrf_evlist__methods[] = {
+ {
+ .ml_name = "mmap",
+ .ml_meth = (PyCFunction)pyrf_evlist__mmap,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("mmap the file descriptor table.")
+ },
+ {
+ .ml_name = "open",
+ .ml_meth = (PyCFunction)pyrf_evlist__open,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("open the file descriptors.")
+ },
+ {
+ .ml_name = "poll",
+ .ml_meth = (PyCFunction)pyrf_evlist__poll,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("poll the file descriptor table.")
+ },
+ {
+ .ml_name = "get_pollfd",
+ .ml_meth = (PyCFunction)pyrf_evlist__get_pollfd,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("get the poll file descriptor table.")
+ },
+ {
+ .ml_name = "add",
+ .ml_meth = (PyCFunction)pyrf_evlist__add,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("adds an event selector to the list.")
+ },
+ {
+ .ml_name = "read_on_cpu",
+ .ml_meth = (PyCFunction)pyrf_evlist__read_on_cpu,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("reads an event.")
+ },
+ { .ml_name = NULL, }
+};
+
+static Py_ssize_t pyrf_evlist__length(PyObject *obj)
+{
+ struct pyrf_evlist *pevlist = (void *)obj;
+
+ return pevlist->evlist.nr_entries;
+}
+
+static PyObject *pyrf_evlist__item(PyObject *obj, Py_ssize_t i)
+{
+ struct pyrf_evlist *pevlist = (void *)obj;
+ struct perf_evsel *pos;
+
+ if (i >= pevlist->evlist.nr_entries)
+ return NULL;
+
+ evlist__for_each_entry(&pevlist->evlist, pos) {
+ if (i-- == 0)
+ break;
+ }
+
+ return Py_BuildValue("O", container_of(pos, struct pyrf_evsel, evsel));
+}
+
+static PySequenceMethods pyrf_evlist__sequence_methods = {
+ .sq_length = pyrf_evlist__length,
+ .sq_item = pyrf_evlist__item,
+};
+
+static char pyrf_evlist__doc[] = PyDoc_STR("perf event selector list object.");
+
+static PyTypeObject pyrf_evlist__type = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "perf.evlist",
+ .tp_basicsize = sizeof(struct pyrf_evlist),
+ .tp_dealloc = (destructor)pyrf_evlist__delete,
+ .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE,
+ .tp_as_sequence = &pyrf_evlist__sequence_methods,
+ .tp_doc = pyrf_evlist__doc,
+ .tp_methods = pyrf_evlist__methods,
+ .tp_init = (initproc)pyrf_evlist__init,
+};
+
+static int pyrf_evlist__setup_types(void)
+{
+ pyrf_evlist__type.tp_new = PyType_GenericNew;
+ return PyType_Ready(&pyrf_evlist__type);
+}
+
+#define PERF_CONST(name) { #name, PERF_##name }
+
+static struct {
+ const char *name;
+ int value;
+} perf__constants[] = {
+ PERF_CONST(TYPE_HARDWARE),
+ PERF_CONST(TYPE_SOFTWARE),
+ PERF_CONST(TYPE_TRACEPOINT),
+ PERF_CONST(TYPE_HW_CACHE),
+ PERF_CONST(TYPE_RAW),
+ PERF_CONST(TYPE_BREAKPOINT),
+
+ PERF_CONST(COUNT_HW_CPU_CYCLES),
+ PERF_CONST(COUNT_HW_INSTRUCTIONS),
+ PERF_CONST(COUNT_HW_CACHE_REFERENCES),
+ PERF_CONST(COUNT_HW_CACHE_MISSES),
+ PERF_CONST(COUNT_HW_BRANCH_INSTRUCTIONS),
+ PERF_CONST(COUNT_HW_BRANCH_MISSES),
+ PERF_CONST(COUNT_HW_BUS_CYCLES),
+ PERF_CONST(COUNT_HW_CACHE_L1D),
+ PERF_CONST(COUNT_HW_CACHE_L1I),
+ PERF_CONST(COUNT_HW_CACHE_LL),
+ PERF_CONST(COUNT_HW_CACHE_DTLB),
+ PERF_CONST(COUNT_HW_CACHE_ITLB),
+ PERF_CONST(COUNT_HW_CACHE_BPU),
+ PERF_CONST(COUNT_HW_CACHE_OP_READ),
+ PERF_CONST(COUNT_HW_CACHE_OP_WRITE),
+ PERF_CONST(COUNT_HW_CACHE_OP_PREFETCH),
+ PERF_CONST(COUNT_HW_CACHE_RESULT_ACCESS),
+ PERF_CONST(COUNT_HW_CACHE_RESULT_MISS),
+
+ PERF_CONST(COUNT_HW_STALLED_CYCLES_FRONTEND),
+ PERF_CONST(COUNT_HW_STALLED_CYCLES_BACKEND),
+
+ PERF_CONST(COUNT_SW_CPU_CLOCK),
+ PERF_CONST(COUNT_SW_TASK_CLOCK),
+ PERF_CONST(COUNT_SW_PAGE_FAULTS),
+ PERF_CONST(COUNT_SW_CONTEXT_SWITCHES),
+ PERF_CONST(COUNT_SW_CPU_MIGRATIONS),
+ PERF_CONST(COUNT_SW_PAGE_FAULTS_MIN),
+ PERF_CONST(COUNT_SW_PAGE_FAULTS_MAJ),
+ PERF_CONST(COUNT_SW_ALIGNMENT_FAULTS),
+ PERF_CONST(COUNT_SW_EMULATION_FAULTS),
+ PERF_CONST(COUNT_SW_DUMMY),
+
+ PERF_CONST(SAMPLE_IP),
+ PERF_CONST(SAMPLE_TID),
+ PERF_CONST(SAMPLE_TIME),
+ PERF_CONST(SAMPLE_ADDR),
+ PERF_CONST(SAMPLE_READ),
+ PERF_CONST(SAMPLE_CALLCHAIN),
+ PERF_CONST(SAMPLE_ID),
+ PERF_CONST(SAMPLE_CPU),
+ PERF_CONST(SAMPLE_PERIOD),
+ PERF_CONST(SAMPLE_STREAM_ID),
+ PERF_CONST(SAMPLE_RAW),
+
+ PERF_CONST(FORMAT_TOTAL_TIME_ENABLED),
+ PERF_CONST(FORMAT_TOTAL_TIME_RUNNING),
+ PERF_CONST(FORMAT_ID),
+ PERF_CONST(FORMAT_GROUP),
+
+ PERF_CONST(RECORD_MMAP),
+ PERF_CONST(RECORD_LOST),
+ PERF_CONST(RECORD_COMM),
+ PERF_CONST(RECORD_EXIT),
+ PERF_CONST(RECORD_THROTTLE),
+ PERF_CONST(RECORD_UNTHROTTLE),
+ PERF_CONST(RECORD_FORK),
+ PERF_CONST(RECORD_READ),
+ PERF_CONST(RECORD_SAMPLE),
+ PERF_CONST(RECORD_MMAP2),
+ PERF_CONST(RECORD_AUX),
+ PERF_CONST(RECORD_ITRACE_START),
+ PERF_CONST(RECORD_LOST_SAMPLES),
+ PERF_CONST(RECORD_SWITCH),
+ PERF_CONST(RECORD_SWITCH_CPU_WIDE),
+
+ PERF_CONST(RECORD_MISC_SWITCH_OUT),
+ { .name = NULL, },
+};
+
+static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
+ PyObject *args, PyObject *kwargs)
+{
+ struct event_format *tp_format;
+ static char *kwlist[] = { "sys", "name", NULL };
+ char *sys = NULL;
+ char *name = NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss", kwlist,
+ &sys, &name))
+ return NULL;
+
+ tp_format = trace_event__tp_format(sys, name);
+ if (IS_ERR(tp_format))
+ return _PyLong_FromLong(-1);
+
+ return _PyLong_FromLong(tp_format->id);
+}
+
+static PyMethodDef perf__methods[] = {
+ {
+ .ml_name = "tracepoint",
+ .ml_meth = (PyCFunction) pyrf__tracepoint,
+ .ml_flags = METH_VARARGS | METH_KEYWORDS,
+ .ml_doc = PyDoc_STR("Get tracepoint config.")
+ },
+ { .ml_name = NULL, }
+};
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initperf(void)
+#else
+PyMODINIT_FUNC PyInit_perf(void)
+#endif
+{
+ PyObject *obj;
+ int i;
+ PyObject *dict;
+#if PY_MAJOR_VERSION < 3
+ PyObject *module = Py_InitModule("perf", perf__methods);
+#else
+ static struct PyModuleDef moduledef = {
+ PyModuleDef_HEAD_INIT,
+ "perf", /* m_name */
+ "", /* m_doc */
+ -1, /* m_size */
+ perf__methods, /* m_methods */
+ NULL, /* m_reload */
+ NULL, /* m_traverse */
+ NULL, /* m_clear */
+ NULL, /* m_free */
+ };
+ PyObject *module = PyModule_Create(&moduledef);
+#endif
+
+ if (module == NULL ||
+ pyrf_event__setup_types() < 0 ||
+ pyrf_evlist__setup_types() < 0 ||
+ pyrf_evsel__setup_types() < 0 ||
+ pyrf_thread_map__setup_types() < 0 ||
+ pyrf_cpu_map__setup_types() < 0)
+#if PY_MAJOR_VERSION < 3
+ return;
+#else
+ return module;
+#endif
+
+ /* The page_size is placed in util object. */
+ page_size = sysconf(_SC_PAGE_SIZE);
+
+ Py_INCREF(&pyrf_evlist__type);
+ PyModule_AddObject(module, "evlist", (PyObject*)&pyrf_evlist__type);
+
+ Py_INCREF(&pyrf_evsel__type);
+ PyModule_AddObject(module, "evsel", (PyObject*)&pyrf_evsel__type);
+
+ Py_INCREF(&pyrf_mmap_event__type);
+ PyModule_AddObject(module, "mmap_event", (PyObject *)&pyrf_mmap_event__type);
+
+ Py_INCREF(&pyrf_lost_event__type);
+ PyModule_AddObject(module, "lost_event", (PyObject *)&pyrf_lost_event__type);
+
+ Py_INCREF(&pyrf_comm_event__type);
+ PyModule_AddObject(module, "comm_event", (PyObject *)&pyrf_comm_event__type);
+
+ Py_INCREF(&pyrf_task_event__type);
+ PyModule_AddObject(module, "task_event", (PyObject *)&pyrf_task_event__type);
+
+ Py_INCREF(&pyrf_throttle_event__type);
+ PyModule_AddObject(module, "throttle_event", (PyObject *)&pyrf_throttle_event__type);
+
+ Py_INCREF(&pyrf_task_event__type);
+ PyModule_AddObject(module, "task_event", (PyObject *)&pyrf_task_event__type);
+
+ Py_INCREF(&pyrf_read_event__type);
+ PyModule_AddObject(module, "read_event", (PyObject *)&pyrf_read_event__type);
+
+ Py_INCREF(&pyrf_sample_event__type);
+ PyModule_AddObject(module, "sample_event", (PyObject *)&pyrf_sample_event__type);
+
+ Py_INCREF(&pyrf_context_switch_event__type);
+ PyModule_AddObject(module, "switch_event", (PyObject *)&pyrf_context_switch_event__type);
+
+ Py_INCREF(&pyrf_thread_map__type);
+ PyModule_AddObject(module, "thread_map", (PyObject*)&pyrf_thread_map__type);
+
+ Py_INCREF(&pyrf_cpu_map__type);
+ PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type);
+
+ dict = PyModule_GetDict(module);
+ if (dict == NULL)
+ goto error;
+
+ for (i = 0; perf__constants[i].name != NULL; i++) {
+ obj = _PyLong_FromLong(perf__constants[i].value);
+ if (obj == NULL)
+ goto error;
+ PyDict_SetItemString(dict, perf__constants[i].name, obj);
+ Py_DECREF(obj);
+ }
+
+error:
+ if (PyErr_Occurred())
+ PyErr_SetString(PyExc_ImportError, "perf: Init failed!");
+#if PY_MAJOR_VERSION >= 3
+ return module;
+#endif
+}
+
+/*
+ * Dummy, to avoid dragging all the test_attr infrastructure in the python
+ * binding.
+ */
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+ int fd, int group_fd, unsigned long flags)
+{
+}
diff --git a/tools/perf/util/rb_resort.h b/tools/perf/util/rb_resort.h
new file mode 100644
index 000000000..a920f702a
--- /dev/null
+++ b/tools/perf/util/rb_resort.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_RESORT_RB_H_
+#define _PERF_RESORT_RB_H_
+/*
+ * Template for creating a class to resort an existing rb_tree according to
+ * a new sort criteria, that must be present in the entries of the source
+ * rb_tree.
+ *
+ * (c) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Quick example, resorting threads by its shortname:
+ *
+ * First define the prefix (threads) to be used for the functions and data
+ * structures created, and provide an expression for the sorting, then the
+ * fields to be present in each of the entries in the new, sorted, rb_tree.
+ *
+ * The body of the init function should collect the fields, maybe
+ * pre-calculating them from multiple entries in the original 'entry' from
+ * the rb_tree used as a source for the entries to be sorted:
+
+DEFINE_RB_RESORT_RB(threads, strcmp(a->thread->shortname,
+ b->thread->shortname) < 0,
+ struct thread *thread;
+)
+{
+ entry->thread = rb_entry(nd, struct thread, rb_node);
+}
+
+ * After this it is just a matter of instantiating it and iterating it,
+ * for a few data structures with existing rb_trees, such as 'struct machine',
+ * helpers are available to get the rb_root and the nr_entries:
+
+ DECLARE_RESORT_RB_MACHINE_THREADS(threads, machine_ptr);
+
+ * This will instantiate the new rb_tree and a cursor for it, that can be used as:
+
+ struct rb_node *nd;
+
+ resort_rb__for_each_entry(nd, threads) {
+ struct thread *t = threads_entry;
+ printf("%s: %d\n", t->shortname, t->tid);
+ }
+
+ * Then delete it:
+
+ resort_rb__delete(threads);
+
+ * The name of the data structures and functions will have a _sorted suffix
+ * right before the method names, i.e. will look like:
+ *
+ * struct threads_sorted_entry {}
+ * threads_sorted__insert()
+ */
+
+#define DEFINE_RESORT_RB(__name, __comp, ...) \
+struct __name##_sorted_entry { \
+ struct rb_node rb_node; \
+ __VA_ARGS__ \
+}; \
+static void __name##_sorted__init_entry(struct rb_node *nd, \
+ struct __name##_sorted_entry *entry); \
+ \
+static int __name##_sorted__cmp(struct rb_node *nda, struct rb_node *ndb) \
+{ \
+ struct __name##_sorted_entry *a, *b; \
+ a = rb_entry(nda, struct __name##_sorted_entry, rb_node); \
+ b = rb_entry(ndb, struct __name##_sorted_entry, rb_node); \
+ return __comp; \
+} \
+ \
+struct __name##_sorted { \
+ struct rb_root entries; \
+ struct __name##_sorted_entry nd[0]; \
+}; \
+ \
+static void __name##_sorted__insert(struct __name##_sorted *sorted, \
+ struct rb_node *sorted_nd) \
+{ \
+ struct rb_node **p = &sorted->entries.rb_node, *parent = NULL; \
+ while (*p != NULL) { \
+ parent = *p; \
+ if (__name##_sorted__cmp(sorted_nd, parent)) \
+ p = &(*p)->rb_left; \
+ else \
+ p = &(*p)->rb_right; \
+ } \
+ rb_link_node(sorted_nd, parent, p); \
+ rb_insert_color(sorted_nd, &sorted->entries); \
+} \
+ \
+static void __name##_sorted__sort(struct __name##_sorted *sorted, \
+ struct rb_root *entries) \
+{ \
+ struct rb_node *nd; \
+ unsigned int i = 0; \
+ for (nd = rb_first(entries); nd; nd = rb_next(nd)) { \
+ struct __name##_sorted_entry *snd = &sorted->nd[i++]; \
+ __name##_sorted__init_entry(nd, snd); \
+ __name##_sorted__insert(sorted, &snd->rb_node); \
+ } \
+} \
+ \
+static struct __name##_sorted *__name##_sorted__new(struct rb_root *entries, \
+ int nr_entries) \
+{ \
+ struct __name##_sorted *sorted; \
+ sorted = malloc(sizeof(*sorted) + sizeof(sorted->nd[0]) * nr_entries); \
+ if (sorted) { \
+ sorted->entries = RB_ROOT; \
+ __name##_sorted__sort(sorted, entries); \
+ } \
+ return sorted; \
+} \
+ \
+static void __name##_sorted__delete(struct __name##_sorted *sorted) \
+{ \
+ free(sorted); \
+} \
+ \
+static void __name##_sorted__init_entry(struct rb_node *nd, \
+ struct __name##_sorted_entry *entry)
+
+#define DECLARE_RESORT_RB(__name) \
+struct __name##_sorted_entry *__name##_entry; \
+struct __name##_sorted *__name = __name##_sorted__new
+
+#define resort_rb__for_each_entry(__nd, __name) \
+ for (__nd = rb_first(&__name->entries); \
+ __name##_entry = rb_entry(__nd, struct __name##_sorted_entry, \
+ rb_node), __nd; \
+ __nd = rb_next(__nd))
+
+#define resort_rb__delete(__name) \
+ __name##_sorted__delete(__name), __name = NULL
+
+/*
+ * Helpers for other classes that contains both an rbtree and the
+ * number of entries in it:
+ */
+
+/* For 'struct intlist' */
+#define DECLARE_RESORT_RB_INTLIST(__name, __ilist) \
+ DECLARE_RESORT_RB(__name)(&__ilist->rblist.entries, \
+ __ilist->rblist.nr_entries)
+
+/* For 'struct machine->threads' */
+#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \
+ DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries, \
+ __machine->threads[hash_bucket].nr)
+
+#endif /* _PERF_RESORT_RB_H_ */
diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c
new file mode 100644
index 000000000..0efc3258c
--- /dev/null
+++ b/tools/perf/util/rblist.c
@@ -0,0 +1,133 @@
+/*
+ * Based on strlist.c by:
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Licensed under the GPLv2.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "rblist.h"
+
+int rblist__add_node(struct rblist *rblist, const void *new_entry)
+{
+ struct rb_node **p = &rblist->entries.rb_node;
+ struct rb_node *parent = NULL, *new_node;
+
+ while (*p != NULL) {
+ int rc;
+
+ parent = *p;
+
+ rc = rblist->node_cmp(parent, new_entry);
+ if (rc > 0)
+ p = &(*p)->rb_left;
+ else if (rc < 0)
+ p = &(*p)->rb_right;
+ else
+ return -EEXIST;
+ }
+
+ new_node = rblist->node_new(rblist, new_entry);
+ if (new_node == NULL)
+ return -ENOMEM;
+
+ rb_link_node(new_node, parent, p);
+ rb_insert_color(new_node, &rblist->entries);
+ ++rblist->nr_entries;
+
+ return 0;
+}
+
+void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node)
+{
+ rb_erase(rb_node, &rblist->entries);
+ --rblist->nr_entries;
+ rblist->node_delete(rblist, rb_node);
+}
+
+static struct rb_node *__rblist__findnew(struct rblist *rblist,
+ const void *entry,
+ bool create)
+{
+ struct rb_node **p = &rblist->entries.rb_node;
+ struct rb_node *parent = NULL, *new_node = NULL;
+
+ while (*p != NULL) {
+ int rc;
+
+ parent = *p;
+
+ rc = rblist->node_cmp(parent, entry);
+ if (rc > 0)
+ p = &(*p)->rb_left;
+ else if (rc < 0)
+ p = &(*p)->rb_right;
+ else
+ return parent;
+ }
+
+ if (create) {
+ new_node = rblist->node_new(rblist, entry);
+ if (new_node) {
+ rb_link_node(new_node, parent, p);
+ rb_insert_color(new_node, &rblist->entries);
+ ++rblist->nr_entries;
+ }
+ }
+
+ return new_node;
+}
+
+struct rb_node *rblist__find(struct rblist *rblist, const void *entry)
+{
+ return __rblist__findnew(rblist, entry, false);
+}
+
+struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry)
+{
+ return __rblist__findnew(rblist, entry, true);
+}
+
+void rblist__init(struct rblist *rblist)
+{
+ if (rblist != NULL) {
+ rblist->entries = RB_ROOT;
+ rblist->nr_entries = 0;
+ }
+
+ return;
+}
+
+void rblist__exit(struct rblist *rblist)
+{
+ struct rb_node *pos, *next = rb_first(&rblist->entries);
+
+ while (next) {
+ pos = next;
+ next = rb_next(pos);
+ rblist__remove_node(rblist, pos);
+ }
+}
+
+void rblist__delete(struct rblist *rblist)
+{
+ if (rblist != NULL) {
+ rblist__exit(rblist);
+ free(rblist);
+ }
+}
+
+struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx)
+{
+ struct rb_node *node;
+
+ for (node = rb_first(&rblist->entries); node; node = rb_next(node)) {
+ if (!idx--)
+ return node;
+ }
+
+ return NULL;
+}
diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h
new file mode 100644
index 000000000..76df15c27
--- /dev/null
+++ b/tools/perf/util/rblist.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_RBLIST_H
+#define __PERF_RBLIST_H
+
+#include <linux/rbtree.h>
+#include <stdbool.h>
+
+/*
+ * create node structs of the form:
+ * struct my_node {
+ * struct rb_node rb_node;
+ * ... my data ...
+ * };
+ *
+ * create list structs of the form:
+ * struct mylist {
+ * struct rblist rblist;
+ * ... my data ...
+ * };
+ */
+
+struct rblist {
+ struct rb_root entries;
+ unsigned int nr_entries;
+
+ int (*node_cmp)(struct rb_node *rbn, const void *entry);
+ struct rb_node *(*node_new)(struct rblist *rlist, const void *new_entry);
+ void (*node_delete)(struct rblist *rblist, struct rb_node *rb_node);
+};
+
+void rblist__init(struct rblist *rblist);
+void rblist__exit(struct rblist *rblist);
+void rblist__delete(struct rblist *rblist);
+int rblist__add_node(struct rblist *rblist, const void *new_entry);
+void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node);
+struct rb_node *rblist__find(struct rblist *rblist, const void *entry);
+struct rb_node *rblist__findnew(struct rblist *rblist, const void *entry);
+struct rb_node *rblist__entry(const struct rblist *rblist, unsigned int idx);
+
+static inline bool rblist__empty(const struct rblist *rblist)
+{
+ return rblist->nr_entries == 0;
+}
+
+static inline unsigned int rblist__nr_entries(const struct rblist *rblist)
+{
+ return rblist->nr_entries;
+}
+
+#endif /* __PERF_RBLIST_H */
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
new file mode 100644
index 000000000..9cfc7bf16
--- /dev/null
+++ b/tools/perf/util/record.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "evlist.h"
+#include "evsel.h"
+#include "cpumap.h"
+#include "parse-events.h"
+#include <errno.h>
+#include <api/fs/fs.h>
+#include <subcmd/parse-options.h>
+#include "util.h"
+#include "cloexec.h"
+
+typedef void (*setup_probe_fn_t)(struct perf_evsel *evsel);
+
+static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
+{
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ unsigned long flags = perf_event_open_cloexec_flag();
+ int err = -EAGAIN, fd;
+ static pid_t pid = -1;
+
+ evlist = perf_evlist__new();
+ if (!evlist)
+ return -ENOMEM;
+
+ if (parse_events(evlist, str, NULL))
+ goto out_delete;
+
+ evsel = perf_evlist__first(evlist);
+
+ while (1) {
+ fd = sys_perf_event_open(&evsel->attr, pid, cpu, -1, flags);
+ if (fd < 0) {
+ if (pid == -1 && errno == EACCES) {
+ pid = 0;
+ continue;
+ }
+ goto out_delete;
+ }
+ break;
+ }
+ close(fd);
+
+ fn(evsel);
+
+ fd = sys_perf_event_open(&evsel->attr, pid, cpu, -1, flags);
+ if (fd < 0) {
+ if (errno == EINVAL)
+ err = -EINVAL;
+ goto out_delete;
+ }
+ close(fd);
+ err = 0;
+
+out_delete:
+ perf_evlist__delete(evlist);
+ return err;
+}
+
+static bool perf_probe_api(setup_probe_fn_t fn)
+{
+ const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
+ struct cpu_map *cpus;
+ int cpu, ret, i = 0;
+
+ cpus = cpu_map__new(NULL);
+ if (!cpus)
+ return false;
+ cpu = cpus->map[0];
+ cpu_map__put(cpus);
+
+ do {
+ ret = perf_do_probe_api(fn, cpu, try[i++]);
+ if (!ret)
+ return true;
+ } while (ret == -EAGAIN && try[i]);
+
+ return false;
+}
+
+static void perf_probe_sample_identifier(struct perf_evsel *evsel)
+{
+ evsel->attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
+}
+
+static void perf_probe_comm_exec(struct perf_evsel *evsel)
+{
+ evsel->attr.comm_exec = 1;
+}
+
+static void perf_probe_context_switch(struct perf_evsel *evsel)
+{
+ evsel->attr.context_switch = 1;
+}
+
+bool perf_can_sample_identifier(void)
+{
+ return perf_probe_api(perf_probe_sample_identifier);
+}
+
+static bool perf_can_comm_exec(void)
+{
+ return perf_probe_api(perf_probe_comm_exec);
+}
+
+bool perf_can_record_switch_events(void)
+{
+ return perf_probe_api(perf_probe_context_switch);
+}
+
+bool perf_can_record_cpu_wide(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_CPU_CLOCK,
+ .exclude_kernel = 1,
+ };
+ struct cpu_map *cpus;
+ int cpu, fd;
+
+ cpus = cpu_map__new(NULL);
+ if (!cpus)
+ return false;
+ cpu = cpus->map[0];
+ cpu_map__put(cpus);
+
+ fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+ if (fd < 0)
+ return false;
+ close(fd);
+
+ return true;
+}
+
+void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts,
+ struct callchain_param *callchain)
+{
+ struct perf_evsel *evsel;
+ bool use_sample_identifier = false;
+ bool use_comm_exec;
+ bool sample_id = opts->sample_id;
+
+ /*
+ * Set the evsel leader links before we configure attributes,
+ * since some might depend on this info.
+ */
+ if (opts->group)
+ perf_evlist__set_leader(evlist);
+
+ if (evlist->cpus->map[0] < 0)
+ opts->no_inherit = true;
+
+ use_comm_exec = perf_can_comm_exec();
+
+ evlist__for_each_entry(evlist, evsel) {
+ perf_evsel__config(evsel, opts, callchain);
+ if (evsel->tracking && use_comm_exec)
+ evsel->attr.comm_exec = 1;
+ }
+
+ if (opts->full_auxtrace) {
+ /*
+ * Need to be able to synthesize and parse selected events with
+ * arbitrary sample types, which requires always being able to
+ * match the id.
+ */
+ use_sample_identifier = perf_can_sample_identifier();
+ sample_id = true;
+ } else if (evlist->nr_entries > 1) {
+ struct perf_evsel *first = perf_evlist__first(evlist);
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.sample_type == first->attr.sample_type)
+ continue;
+ use_sample_identifier = perf_can_sample_identifier();
+ break;
+ }
+ sample_id = true;
+ }
+
+ if (sample_id) {
+ evlist__for_each_entry(evlist, evsel)
+ perf_evsel__set_sample_id(evsel, use_sample_identifier);
+ }
+
+ perf_evlist__set_id_pos(evlist);
+}
+
+static int get_max_rate(unsigned int *rate)
+{
+ return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
+}
+
+static int record_opts__config_freq(struct record_opts *opts)
+{
+ bool user_freq = opts->user_freq != UINT_MAX;
+ unsigned int max_rate;
+
+ if (opts->user_interval != ULLONG_MAX)
+ opts->default_interval = opts->user_interval;
+ if (user_freq)
+ opts->freq = opts->user_freq;
+
+ /*
+ * User specified count overrides default frequency.
+ */
+ if (opts->default_interval)
+ opts->freq = 0;
+ else if (opts->freq) {
+ opts->default_interval = opts->freq;
+ } else {
+ pr_err("frequency and count are zero, aborting\n");
+ return -1;
+ }
+
+ if (get_max_rate(&max_rate))
+ return 0;
+
+ /*
+ * User specified frequency is over current maximum.
+ */
+ if (user_freq && (max_rate < opts->freq)) {
+ if (opts->strict_freq) {
+ pr_err("error: Maximum frequency rate (%'u Hz) exceeded.\n"
+ " Please use -F freq option with a lower value or consider\n"
+ " tweaking /proc/sys/kernel/perf_event_max_sample_rate.\n",
+ max_rate);
+ return -1;
+ } else {
+ pr_warning("warning: Maximum frequency rate (%'u Hz) exceeded, throttling from %'u Hz to %'u Hz.\n"
+ " The limit can be raised via /proc/sys/kernel/perf_event_max_sample_rate.\n"
+ " The kernel will lower it when perf's interrupts take too long.\n"
+ " Use --strict-freq to disable this throttling, refusing to record.\n",
+ max_rate, opts->freq, max_rate);
+
+ opts->freq = max_rate;
+ }
+ }
+
+ /*
+ * Default frequency is over current maximum.
+ */
+ if (max_rate < opts->freq) {
+ pr_warning("Lowering default frequency rate to %u.\n"
+ "Please consider tweaking "
+ "/proc/sys/kernel/perf_event_max_sample_rate.\n",
+ max_rate);
+ opts->freq = max_rate;
+ }
+
+ return 0;
+}
+
+int record_opts__config(struct record_opts *opts)
+{
+ return record_opts__config_freq(opts);
+}
+
+bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str)
+{
+ struct perf_evlist *temp_evlist;
+ struct perf_evsel *evsel;
+ int err, fd, cpu;
+ bool ret = false;
+ pid_t pid = -1;
+
+ temp_evlist = perf_evlist__new();
+ if (!temp_evlist)
+ return false;
+
+ err = parse_events(temp_evlist, str, NULL);
+ if (err)
+ goto out_delete;
+
+ evsel = perf_evlist__last(temp_evlist);
+
+ if (!evlist || cpu_map__empty(evlist->cpus)) {
+ struct cpu_map *cpus = cpu_map__new(NULL);
+
+ cpu = cpus ? cpus->map[0] : 0;
+ cpu_map__put(cpus);
+ } else {
+ cpu = evlist->cpus->map[0];
+ }
+
+ while (1) {
+ fd = sys_perf_event_open(&evsel->attr, pid, cpu, -1,
+ perf_event_open_cloexec_flag());
+ if (fd < 0) {
+ if (pid == -1 && errno == EACCES) {
+ pid = 0;
+ continue;
+ }
+ goto out_delete;
+ }
+ break;
+ }
+ close(fd);
+ ret = true;
+
+out_delete:
+ perf_evlist__delete(temp_evlist);
+ return ret;
+}
+
+int record__parse_freq(const struct option *opt, const char *str, int unset __maybe_unused)
+{
+ unsigned int freq;
+ struct record_opts *opts = opt->value;
+
+ if (!str)
+ return -EINVAL;
+
+ if (strcasecmp(str, "max") == 0) {
+ if (get_max_rate(&freq)) {
+ pr_err("couldn't read /proc/sys/kernel/perf_event_max_sample_rate\n");
+ return -1;
+ }
+ pr_info("info: Using a maximum frequency rate of %'d Hz\n", freq);
+ } else {
+ freq = atoi(str);
+ }
+
+ opts->user_freq = freq;
+ return 0;
+}
diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c
new file mode 100644
index 000000000..5e52e7baa
--- /dev/null
+++ b/tools/perf/util/rwsem.c
@@ -0,0 +1,32 @@
+#include "util.h"
+#include "rwsem.h"
+
+int init_rwsem(struct rw_semaphore *sem)
+{
+ return pthread_rwlock_init(&sem->lock, NULL);
+}
+
+int exit_rwsem(struct rw_semaphore *sem)
+{
+ return pthread_rwlock_destroy(&sem->lock);
+}
+
+int down_read(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_rdlock(&sem->lock);
+}
+
+int up_read(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
+
+int down_write(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_wrlock(&sem->lock);
+}
+
+int up_write(struct rw_semaphore *sem)
+{
+ return perf_singlethreaded ? 0 : pthread_rwlock_unlock(&sem->lock);
+}
diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h
new file mode 100644
index 000000000..94565ad4d
--- /dev/null
+++ b/tools/perf/util/rwsem.h
@@ -0,0 +1,19 @@
+#ifndef _PERF_RWSEM_H
+#define _PERF_RWSEM_H
+
+#include <pthread.h>
+
+struct rw_semaphore {
+ pthread_rwlock_t lock;
+};
+
+int init_rwsem(struct rw_semaphore *sem);
+int exit_rwsem(struct rw_semaphore *sem);
+
+int down_read(struct rw_semaphore *sem);
+int up_read(struct rw_semaphore *sem);
+
+int down_write(struct rw_semaphore *sem);
+int up_write(struct rw_semaphore *sem);
+
+#endif /* _PERF_RWSEM_H */
diff --git a/tools/perf/util/s390-cpumsf-kernel.h b/tools/perf/util/s390-cpumsf-kernel.h
new file mode 100644
index 000000000..de8c7ad0e
--- /dev/null
+++ b/tools/perf/util/s390-cpumsf-kernel.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Auxtrace support for s390 CPU measurement sampling facility
+ *
+ * Copyright IBM Corp. 2018
+ * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
+ * Thomas Richter <tmricht@linux.ibm.com>
+ */
+#ifndef S390_CPUMSF_KERNEL_H
+#define S390_CPUMSF_KERNEL_H
+
+#define S390_CPUMSF_PAGESZ 4096 /* Size of sample block units */
+#define S390_CPUMSF_DIAG_DEF_FIRST 0x8001 /* Diagnostic entry lowest id */
+
+struct hws_basic_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:4; /* 16-19 reserved */
+ unsigned int U:4; /* 20-23 Number of unique instruct. */
+ unsigned int z:2; /* zeros */
+ unsigned int T:1; /* 26 PSW DAT mode */
+ unsigned int W:1; /* 27 PSW wait state */
+ unsigned int P:1; /* 28 PSW Problem state */
+ unsigned int AS:2; /* 29-30 PSW address-space control */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ unsigned int CL:2; /* 32-33 Configuration Level */
+ unsigned int:14;
+ unsigned int prim_asn:16; /* primary ASN */
+ unsigned long long ia; /* Instruction Address */
+ unsigned long long gpp; /* Guest Program Parameter */
+ unsigned long long hpp; /* Host Program Parameter */
+};
+
+struct hws_diag_entry {
+ unsigned int def:16; /* 0-15 Data Entry Format */
+ unsigned int R:15; /* 16-19 and 20-30 reserved */
+ unsigned int I:1; /* 31 entry valid or invalid */
+ u8 data[]; /* Machine-dependent sample data */
+};
+
+struct hws_combined_entry {
+ struct hws_basic_entry basic; /* Basic-sampling data entry */
+ struct hws_diag_entry diag; /* Diagnostic-sampling data entry */
+};
+
+struct hws_trailer_entry {
+ union {
+ struct {
+ unsigned int f:1; /* 0 - Block Full Indicator */
+ unsigned int a:1; /* 1 - Alert request control */
+ unsigned int t:1; /* 2 - Timestamp format */
+ unsigned int:29; /* 3 - 31: Reserved */
+ unsigned int bsdes:16; /* 32-47: size of basic SDE */
+ unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */
+ };
+ unsigned long long flags; /* 0 - 64: All indicators */
+ };
+ unsigned long long overflow; /* 64 - sample Overflow count */
+ unsigned char timestamp[16]; /* 16 - 31 timestamp */
+ unsigned long long reserved1; /* 32 -Reserved */
+ unsigned long long reserved2; /* */
+ union { /* 48 - reserved for programming use */
+ struct {
+ unsigned long long clock_base:1; /* in progusage2 */
+ unsigned long long progusage1:63;
+ unsigned long long progusage2;
+ };
+ unsigned long long progusage[2];
+ };
+};
+
+#endif
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
new file mode 100644
index 000000000..910f2621d
--- /dev/null
+++ b/tools/perf/util/s390-cpumsf.c
@@ -0,0 +1,950 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2018
+ * Auxtrace support for s390 CPU-Measurement Sampling Facility
+ *
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ *
+ * Auxiliary traces are collected during 'perf record' using rbd000 event.
+ * Several PERF_RECORD_XXX are generated during recording:
+ *
+ * PERF_RECORD_AUX:
+ * Records that new data landed in the AUX buffer part.
+ * PERF_RECORD_AUXTRACE:
+ * Defines auxtrace data. Followed by the actual data. The contents of
+ * the auxtrace data is dependent on the event and the CPU.
+ * This record is generated by perf record command. For details
+ * see Documentation/perf.data-file-format.txt.
+ * PERF_RECORD_AUXTRACE_INFO:
+ * Defines a table of contains for PERF_RECORD_AUXTRACE records. This
+ * record is generated during 'perf record' command. Each record contains up
+ * to 256 entries describing offset and size of the AUXTRACE data in the
+ * perf.data file.
+ * PERF_RECORD_AUXTRACE_ERROR:
+ * Indicates an error during AUXTRACE collection such as buffer overflow.
+ * PERF_RECORD_FINISHED_ROUND:
+ * Perf events are not necessarily in time stamp order, as they can be
+ * collected in parallel on different CPUs. If the events should be
+ * processed in time order they need to be sorted first.
+ * Perf report guarantees that there is no reordering over a
+ * PERF_RECORD_FINISHED_ROUND boundary event. All perf records with a
+ * time stamp lower than this record are processed (and displayed) before
+ * the succeeding perf record are processed.
+ *
+ * These records are evaluated during perf report command.
+ *
+ * 1. PERF_RECORD_AUXTRACE_INFO is used to set up the infrastructure for
+ * auxiliary trace data processing. See s390_cpumsf_process_auxtrace_info()
+ * below.
+ * Auxiliary trace data is collected per CPU. To merge the data into the report
+ * an auxtrace_queue is created for each CPU. It is assumed that the auxtrace
+ * data is in ascending order.
+ *
+ * Each queue has a double linked list of auxtrace_buffers. This list contains
+ * the offset and size of a CPU's auxtrace data. During auxtrace processing
+ * the data portion is mmap()'ed.
+ *
+ * To sort the queues in chronological order, all queue access is controlled
+ * by the auxtrace_heap. This is basicly a stack, each stack element has two
+ * entries, the queue number and a time stamp. However the stack is sorted by
+ * the time stamps. The highest time stamp is at the bottom the lowest
+ * (nearest) time stamp is at the top. That sort order is maintained at all
+ * times!
+ *
+ * After the auxtrace infrastructure has been setup, the auxtrace queues are
+ * filled with data (offset/size pairs) and the auxtrace_heap is populated.
+ *
+ * 2. PERF_RECORD_XXX processing triggers access to the auxtrace_queues.
+ * Each record is handled by s390_cpumsf_process_event(). The time stamp of
+ * the perf record is compared with the time stamp located on the auxtrace_heap
+ * top element. If that time stamp is lower than the time stamp from the
+ * record sample, the auxtrace queues will be processed. As auxtrace queues
+ * control many auxtrace_buffers and each buffer can be quite large, the
+ * auxtrace buffer might be processed only partially. In this case the
+ * position in the auxtrace_buffer of that queue is remembered and the time
+ * stamp of the last processed entry of the auxtrace_buffer replaces the
+ * current auxtrace_heap top.
+ *
+ * 3. Auxtrace_queues might run of out data and are feeded by the
+ * PERF_RECORD_AUXTRACE handling, see s390_cpumsf_process_auxtrace_event().
+ *
+ * Event Generation
+ * Each sampling-data entry in the auxilary trace data generates a perf sample.
+ * This sample is filled
+ * with data from the auxtrace such as PID/TID, instruction address, CPU state,
+ * etc. This sample is processed with perf_session__deliver_synth_event() to
+ * be included into the GUI.
+ *
+ * 4. PERF_RECORD_FINISHED_ROUND event is used to process all the remaining
+ * auxiliary traces entries until the time stamp of this record is reached
+ * auxtrace_heap top. This is triggered by ordered_event->deliver().
+ *
+ *
+ * Perf event processing.
+ * Event processing of PERF_RECORD_XXX entries relies on time stamp entries.
+ * This is the function call sequence:
+ *
+ * __cmd_report()
+ * |
+ * perf_session__process_events()
+ * |
+ * __perf_session__process_events()
+ * |
+ * perf_session__process_event()
+ * | This functions splits the PERF_RECORD_XXX records.
+ * | - Those generated by perf record command (type number equal or higher
+ * | than PERF_RECORD_USER_TYPE_START) are handled by
+ * | perf_session__process_user_event(see below)
+ * | - Those generated by the kernel are handled by
+ * | perf_evlist__parse_sample_timestamp()
+ * |
+ * perf_evlist__parse_sample_timestamp()
+ * | Extract time stamp from sample data.
+ * |
+ * perf_session__queue_event()
+ * | If timestamp is positive the sample is entered into an ordered_event
+ * | list, sort order is the timestamp. The event processing is deferred until
+ * | later (see perf_session__process_user_event()).
+ * | Other timestamps (0 or -1) are handled immediately by
+ * | perf_session__deliver_event(). These are events generated at start up
+ * | of command perf record. They create PERF_RECORD_COMM and PERF_RECORD_MMAP*
+ * | records. They are needed to create a list of running processes and its
+ * | memory mappings and layout. They are needed at the beginning to enable
+ * | command perf report to create process trees and memory mappings.
+ * |
+ * perf_session__deliver_event()
+ * | Delivers a PERF_RECORD_XXX entry for handling.
+ * |
+ * auxtrace__process_event()
+ * | The timestamp of the PERF_RECORD_XXX entry is taken to correlate with
+ * | time stamps from the auxiliary trace buffers. This enables
+ * | synchronization between auxiliary trace data and the events on the
+ * | perf.data file.
+ * |
+ * machine__deliver_event()
+ * | Handles the PERF_RECORD_XXX event. This depends on the record type.
+ * It might update the process tree, update a process memory map or enter
+ * a sample with IP and call back chain data into GUI data pool.
+ *
+ *
+ * Deferred processing determined by perf_session__process_user_event() is
+ * finally processed when a PERF_RECORD_FINISHED_ROUND is encountered. These
+ * are generated during command perf record.
+ * The timestamp of PERF_RECORD_FINISHED_ROUND event is taken to process all
+ * PERF_RECORD_XXX entries stored in the ordered_event list. This list was
+ * built up while reading the perf.data file.
+ * Each event is now processed by calling perf_session__deliver_event().
+ * This enables time synchronization between the data in the perf.data file and
+ * the data in the auxiliary trace buffers.
+ */
+
+#include <endian.h>
+#include <errno.h>
+#include <byteswap.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "cpumap.h"
+#include "color.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "machine.h"
+#include "session.h"
+#include "util.h"
+#include "thread.h"
+#include "debug.h"
+#include "auxtrace.h"
+#include "s390-cpumsf.h"
+#include "s390-cpumsf-kernel.h"
+
+struct s390_cpumsf {
+ struct auxtrace auxtrace;
+ struct auxtrace_queues queues;
+ struct auxtrace_heap heap;
+ struct perf_session *session;
+ struct machine *machine;
+ u32 auxtrace_type;
+ u32 pmu_type;
+ u16 machine_type;
+ bool data_queued;
+};
+
+struct s390_cpumsf_queue {
+ struct s390_cpumsf *sf;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ int cpu;
+};
+
+/* Display s390 CPU measurement facility basic-sampling data entry */
+static bool s390_cpumsf_basic_show(const char *color, size_t pos,
+ struct hws_basic_entry *basic)
+{
+ if (basic->def != 1) {
+ pr_err("Invalid AUX trace basic entry [%#08zx]\n", pos);
+ return false;
+ }
+ color_fprintf(stdout, color, " [%#08zx] Basic Def:%04x Inst:%#04x"
+ " %c%c%c%c AS:%d ASN:%#04x IA:%#018llx\n"
+ "\t\tCL:%d HPP:%#018llx GPP:%#018llx\n",
+ pos, basic->def, basic->U,
+ basic->T ? 'T' : ' ',
+ basic->W ? 'W' : ' ',
+ basic->P ? 'P' : ' ',
+ basic->I ? 'I' : ' ',
+ basic->AS, basic->prim_asn, basic->ia, basic->CL,
+ basic->hpp, basic->gpp);
+ return true;
+}
+
+/* Display s390 CPU measurement facility diagnostic-sampling data entry */
+static bool s390_cpumsf_diag_show(const char *color, size_t pos,
+ struct hws_diag_entry *diag)
+{
+ if (diag->def < S390_CPUMSF_DIAG_DEF_FIRST) {
+ pr_err("Invalid AUX trace diagnostic entry [%#08zx]\n", pos);
+ return false;
+ }
+ color_fprintf(stdout, color, " [%#08zx] Diag Def:%04x %c\n",
+ pos, diag->def, diag->I ? 'I' : ' ');
+ return true;
+}
+
+/* Return TOD timestamp contained in an trailer entry */
+static unsigned long long trailer_timestamp(struct hws_trailer_entry *te)
+{
+ /* te->t set: TOD in STCKE format, bytes 8-15
+ * to->t not set: TOD in STCK format, bytes 0-7
+ */
+ unsigned long long ts;
+
+ memcpy(&ts, &te->timestamp[te->t], sizeof(ts));
+ return ts;
+}
+
+/* Display s390 CPU measurement facility trailer entry */
+static bool s390_cpumsf_trailer_show(const char *color, size_t pos,
+ struct hws_trailer_entry *te)
+{
+ if (te->bsdes != sizeof(struct hws_basic_entry)) {
+ pr_err("Invalid AUX trace trailer entry [%#08zx]\n", pos);
+ return false;
+ }
+ color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d"
+ " dsdes:%d Overflow:%lld Time:%#llx\n"
+ "\t\tC:%d TOD:%#lx 1:%#llx 2:%#llx\n",
+ pos,
+ te->f ? 'F' : ' ',
+ te->a ? 'A' : ' ',
+ te->t ? 'T' : ' ',
+ te->bsdes, te->dsdes, te->overflow,
+ trailer_timestamp(te), te->clock_base, te->progusage2,
+ te->progusage[0], te->progusage[1]);
+ return true;
+}
+
+/* Test a sample data block. It must be 4KB or a multiple thereof in size and
+ * 4KB page aligned. Each sample data page has a trailer entry at the
+ * end which contains the sample entry data sizes.
+ *
+ * Return true if the sample data block passes the checks and set the
+ * basic set entry size and diagnostic set entry size.
+ *
+ * Return false on failure.
+ *
+ * Note: Old hardware does not set the basic or diagnostic entry sizes
+ * in the trailer entry. Use the type number instead.
+ */
+static bool s390_cpumsf_validate(int machine_type,
+ unsigned char *buf, size_t len,
+ unsigned short *bsdes,
+ unsigned short *dsdes)
+{
+ struct hws_basic_entry *basic = (struct hws_basic_entry *)buf;
+ struct hws_trailer_entry *te;
+
+ *dsdes = *bsdes = 0;
+ if (len & (S390_CPUMSF_PAGESZ - 1)) /* Illegal size */
+ return false;
+ if (basic->def != 1) /* No basic set entry, must be first */
+ return false;
+ /* Check for trailer entry at end of SDB */
+ te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
+ - sizeof(*te));
+ *bsdes = te->bsdes;
+ *dsdes = te->dsdes;
+ if (!te->bsdes && !te->dsdes) {
+ /* Very old hardware, use CPUID */
+ switch (machine_type) {
+ case 2097:
+ case 2098:
+ *dsdes = 64;
+ *bsdes = 32;
+ break;
+ case 2817:
+ case 2818:
+ *dsdes = 74;
+ *bsdes = 32;
+ break;
+ case 2827:
+ case 2828:
+ *dsdes = 85;
+ *bsdes = 32;
+ break;
+ case 2964:
+ case 2965:
+ *dsdes = 112;
+ *bsdes = 32;
+ break;
+ default:
+ /* Illegal trailer entry */
+ return false;
+ }
+ }
+ return true;
+}
+
+/* Return true if there is room for another entry */
+static bool s390_cpumsf_reached_trailer(size_t entry_sz, size_t pos)
+{
+ size_t payload = S390_CPUMSF_PAGESZ - sizeof(struct hws_trailer_entry);
+
+ if (payload - (pos & (S390_CPUMSF_PAGESZ - 1)) < entry_sz)
+ return false;
+ return true;
+}
+
+/* Dump an auxiliary buffer. These buffers are multiple of
+ * 4KB SDB pages.
+ */
+static void s390_cpumsf_dump(struct s390_cpumsf *sf,
+ unsigned char *buf, size_t len)
+{
+ const char *color = PERF_COLOR_BLUE;
+ struct hws_basic_entry *basic;
+ struct hws_diag_entry *diag;
+ unsigned short bsdes, dsdes;
+ size_t pos = 0;
+
+ color_fprintf(stdout, color,
+ ". ... s390 AUX data: size %zu bytes\n",
+ len);
+
+ if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
+ &dsdes)) {
+ pr_err("Invalid AUX trace data block size:%zu"
+ " (type:%d bsdes:%hd dsdes:%hd)\n",
+ len, sf->machine_type, bsdes, dsdes);
+ return;
+ }
+
+ /* s390 kernel always returns 4KB blocks fully occupied,
+ * no partially filled SDBs.
+ */
+ while (pos < len) {
+ /* Handle Basic entry */
+ basic = (struct hws_basic_entry *)(buf + pos);
+ if (s390_cpumsf_basic_show(color, pos, basic))
+ pos += bsdes;
+ else
+ return;
+
+ /* Handle Diagnostic entry */
+ diag = (struct hws_diag_entry *)(buf + pos);
+ if (s390_cpumsf_diag_show(color, pos, diag))
+ pos += dsdes;
+ else
+ return;
+
+ /* Check for trailer entry */
+ if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
+ /* Show trailer entry */
+ struct hws_trailer_entry te;
+
+ pos = (pos + S390_CPUMSF_PAGESZ)
+ & ~(S390_CPUMSF_PAGESZ - 1);
+ pos -= sizeof(te);
+ memcpy(&te, buf + pos, sizeof(te));
+ /* Set descriptor sizes in case of old hardware
+ * where these values are not set.
+ */
+ te.bsdes = bsdes;
+ te.dsdes = dsdes;
+ if (s390_cpumsf_trailer_show(color, pos, &te))
+ pos += sizeof(te);
+ else
+ return;
+ }
+ }
+}
+
+static void s390_cpumsf_dump_event(struct s390_cpumsf *sf, unsigned char *buf,
+ size_t len)
+{
+ printf(".\n");
+ s390_cpumsf_dump(sf, buf, len);
+}
+
+#define S390_LPP_PID_MASK 0xffffffff
+
+static bool s390_cpumsf_make_event(size_t pos,
+ struct hws_basic_entry *basic,
+ struct s390_cpumsf_queue *sfq)
+{
+ struct perf_sample sample = {
+ .ip = basic->ia,
+ .pid = basic->hpp & S390_LPP_PID_MASK,
+ .tid = basic->hpp & S390_LPP_PID_MASK,
+ .cpumode = PERF_RECORD_MISC_CPUMODE_UNKNOWN,
+ .cpu = sfq->cpu,
+ .period = 1
+ };
+ union perf_event event;
+
+ memset(&event, 0, sizeof(event));
+ if (basic->CL == 1) /* Native LPAR mode */
+ sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+ else if (basic->CL == 2) /* Guest kernel/user space */
+ sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+ else if (basic->gpp || basic->prim_asn != 0xffff)
+ /* Use heuristics on old hardware */
+ sample.cpumode = basic->P ? PERF_RECORD_MISC_GUEST_USER
+ : PERF_RECORD_MISC_GUEST_KERNEL;
+ else
+ sample.cpumode = basic->P ? PERF_RECORD_MISC_USER
+ : PERF_RECORD_MISC_KERNEL;
+
+ event.sample.header.type = PERF_RECORD_SAMPLE;
+ event.sample.header.misc = sample.cpumode;
+ event.sample.header.size = sizeof(struct perf_event_header);
+
+ pr_debug4("%s pos:%#zx ip:%#" PRIx64 " P:%d CL:%d pid:%d.%d cpumode:%d cpu:%d\n",
+ __func__, pos, sample.ip, basic->P, basic->CL, sample.pid,
+ sample.tid, sample.cpumode, sample.cpu);
+ if (perf_session__deliver_synth_event(sfq->sf->session, &event,
+ &sample)) {
+ pr_err("s390 Auxiliary Trace: failed to deliver event\n");
+ return false;
+ }
+ return true;
+}
+
+static unsigned long long get_trailer_time(const unsigned char *buf)
+{
+ struct hws_trailer_entry *te;
+ unsigned long long aux_time;
+
+ te = (struct hws_trailer_entry *)(buf + S390_CPUMSF_PAGESZ
+ - sizeof(*te));
+
+ if (!te->clock_base) /* TOD_CLOCK_BASE value missing */
+ return 0;
+
+ /* Correct calculation to convert time stamp in trailer entry to
+ * nano seconds (taken from arch/s390 function tod_to_ns()).
+ * TOD_CLOCK_BASE is stored in trailer entry member progusage2.
+ */
+ aux_time = trailer_timestamp(te) - te->progusage2;
+ aux_time = (aux_time >> 9) * 125 + (((aux_time & 0x1ff) * 125) >> 9);
+ return aux_time;
+}
+
+/* Process the data samples of a single queue. The first parameter is a
+ * pointer to the queue, the second parameter is the time stamp. This
+ * is the time stamp:
+ * - of the event that triggered this processing.
+ * - or the time stamp when the last proccesing of this queue stopped.
+ * In this case it stopped at a 4KB page boundary and record the
+ * position on where to continue processing on the next invocation
+ * (see buffer->use_data and buffer->use_size).
+ *
+ * When this function returns the second parameter is updated to
+ * reflect the time stamp of the last processed auxiliary data entry
+ * (taken from the trailer entry of that page). The caller uses this
+ * returned time stamp to record the last processed entry in this
+ * queue.
+ *
+ * The function returns:
+ * 0: Processing successful. The second parameter returns the
+ * time stamp from the trailer entry until which position
+ * processing took place. Subsequent calls resume from this
+ * position.
+ * <0: An error occurred during processing. The second parameter
+ * returns the maximum time stamp.
+ * >0: Done on this queue. The second parameter returns the
+ * maximum time stamp.
+ */
+static int s390_cpumsf_samples(struct s390_cpumsf_queue *sfq, u64 *ts)
+{
+ struct s390_cpumsf *sf = sfq->sf;
+ unsigned char *buf = sfq->buffer->use_data;
+ size_t len = sfq->buffer->use_size;
+ struct hws_basic_entry *basic;
+ unsigned short bsdes, dsdes;
+ size_t pos = 0;
+ int err = 1;
+ u64 aux_ts;
+
+ if (!s390_cpumsf_validate(sf->machine_type, buf, len, &bsdes,
+ &dsdes)) {
+ *ts = ~0ULL;
+ return -1;
+ }
+
+ /* Get trailer entry time stamp and check if entries in
+ * this auxiliary page are ready for processing. If the
+ * time stamp of the first entry is too high, whole buffer
+ * can be skipped. In this case return time stamp.
+ */
+ aux_ts = get_trailer_time(buf);
+ if (!aux_ts) {
+ pr_err("[%#08" PRIx64 "] Invalid AUX trailer entry TOD clock base\n",
+ (s64)sfq->buffer->data_offset);
+ aux_ts = ~0ULL;
+ goto out;
+ }
+ if (aux_ts > *ts) {
+ *ts = aux_ts;
+ return 0;
+ }
+
+ while (pos < len) {
+ /* Handle Basic entry */
+ basic = (struct hws_basic_entry *)(buf + pos);
+ if (s390_cpumsf_make_event(pos, basic, sfq))
+ pos += bsdes;
+ else {
+ err = -EBADF;
+ goto out;
+ }
+
+ pos += dsdes; /* Skip diagnositic entry */
+
+ /* Check for trailer entry */
+ if (!s390_cpumsf_reached_trailer(bsdes + dsdes, pos)) {
+ pos = (pos + S390_CPUMSF_PAGESZ)
+ & ~(S390_CPUMSF_PAGESZ - 1);
+ /* Check existence of next page */
+ if (pos >= len)
+ break;
+ aux_ts = get_trailer_time(buf + pos);
+ if (!aux_ts) {
+ aux_ts = ~0ULL;
+ goto out;
+ }
+ if (aux_ts > *ts) {
+ *ts = aux_ts;
+ sfq->buffer->use_data += pos;
+ sfq->buffer->use_size -= pos;
+ return 0;
+ }
+ }
+ }
+out:
+ *ts = aux_ts;
+ sfq->buffer->use_size = 0;
+ sfq->buffer->use_data = NULL;
+ return err; /* Buffer completely scanned or error */
+}
+
+/* Run the s390 auxiliary trace decoder.
+ * Select the queue buffer to operate on, the caller already selected
+ * the proper queue, depending on second parameter 'ts'.
+ * This is the time stamp until which the auxiliary entries should
+ * be processed. This value is updated by called functions and
+ * returned to the caller.
+ *
+ * Resume processing in the current buffer. If there is no buffer
+ * get a new buffer from the queue and setup start position for
+ * processing.
+ * When a buffer is completely processed remove it from the queue
+ * before returning.
+ *
+ * This function returns
+ * 1: When the queue is empty. Second parameter will be set to
+ * maximum time stamp.
+ * 0: Normal processing done.
+ * <0: Error during queue buffer setup. This causes the caller
+ * to stop processing completely.
+ */
+static int s390_cpumsf_run_decoder(struct s390_cpumsf_queue *sfq,
+ u64 *ts)
+{
+
+ struct auxtrace_buffer *buffer;
+ struct auxtrace_queue *queue;
+ int err;
+
+ queue = &sfq->sf->queues.queue_array[sfq->queue_nr];
+
+ /* Get buffer and last position in buffer to resume
+ * decoding the auxiliary entries. One buffer might be large
+ * and decoding might stop in between. This depends on the time
+ * stamp of the trailer entry in each page of the auxiliary
+ * data and the time stamp of the event triggering the decoding.
+ */
+ if (sfq->buffer == NULL) {
+ sfq->buffer = buffer = auxtrace_buffer__next(queue,
+ sfq->buffer);
+ if (!buffer) {
+ *ts = ~0ULL;
+ return 1; /* Processing done on this queue */
+ }
+ /* Start with a new buffer on this queue */
+ if (buffer->data) {
+ buffer->use_size = buffer->size;
+ buffer->use_data = buffer->data;
+ }
+ } else
+ buffer = sfq->buffer;
+
+ if (!buffer->data) {
+ int fd = perf_data__fd(sfq->sf->session->data);
+
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
+ if (!buffer->data)
+ return -ENOMEM;
+ buffer->use_size = buffer->size;
+ buffer->use_data = buffer->data;
+ }
+ pr_debug4("%s queue_nr:%d buffer:%" PRId64 " offset:%#" PRIx64 " size:%#zx rest:%#zx\n",
+ __func__, sfq->queue_nr, buffer->buffer_nr, buffer->offset,
+ buffer->size, buffer->use_size);
+ err = s390_cpumsf_samples(sfq, ts);
+
+ /* If non-zero, there is either an error (err < 0) or the buffer is
+ * completely done (err > 0). The error is unrecoverable, usually
+ * some descriptors could not be read successfully, so continue with
+ * the next buffer.
+ * In both cases the parameter 'ts' has been updated.
+ */
+ if (err) {
+ sfq->buffer = NULL;
+ list_del(&buffer->list);
+ auxtrace_buffer__free(buffer);
+ if (err > 0) /* Buffer done, no error */
+ err = 0;
+ }
+ return err;
+}
+
+static struct s390_cpumsf_queue *
+s390_cpumsf_alloc_queue(struct s390_cpumsf *sf, unsigned int queue_nr)
+{
+ struct s390_cpumsf_queue *sfq;
+
+ sfq = zalloc(sizeof(struct s390_cpumsf_queue));
+ if (sfq == NULL)
+ return NULL;
+
+ sfq->sf = sf;
+ sfq->queue_nr = queue_nr;
+ sfq->cpu = -1;
+ return sfq;
+}
+
+static int s390_cpumsf_setup_queue(struct s390_cpumsf *sf,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr, u64 ts)
+{
+ struct s390_cpumsf_queue *sfq = queue->priv;
+
+ if (list_empty(&queue->head))
+ return 0;
+
+ if (sfq == NULL) {
+ sfq = s390_cpumsf_alloc_queue(sf, queue_nr);
+ if (!sfq)
+ return -ENOMEM;
+ queue->priv = sfq;
+
+ if (queue->cpu != -1)
+ sfq->cpu = queue->cpu;
+ }
+ return auxtrace_heap__add(&sf->heap, queue_nr, ts);
+}
+
+static int s390_cpumsf_setup_queues(struct s390_cpumsf *sf, u64 ts)
+{
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; i < sf->queues.nr_queues; i++) {
+ ret = s390_cpumsf_setup_queue(sf, &sf->queues.queue_array[i],
+ i, ts);
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+static int s390_cpumsf_update_queues(struct s390_cpumsf *sf, u64 ts)
+{
+ if (!sf->queues.new_data)
+ return 0;
+
+ sf->queues.new_data = false;
+ return s390_cpumsf_setup_queues(sf, ts);
+}
+
+static int s390_cpumsf_process_queues(struct s390_cpumsf *sf, u64 timestamp)
+{
+ unsigned int queue_nr;
+ u64 ts;
+ int ret;
+
+ while (1) {
+ struct auxtrace_queue *queue;
+ struct s390_cpumsf_queue *sfq;
+
+ if (!sf->heap.heap_cnt)
+ return 0;
+
+ if (sf->heap.heap_array[0].ordinal >= timestamp)
+ return 0;
+
+ queue_nr = sf->heap.heap_array[0].queue_nr;
+ queue = &sf->queues.queue_array[queue_nr];
+ sfq = queue->priv;
+
+ auxtrace_heap__pop(&sf->heap);
+ if (sf->heap.heap_cnt) {
+ ts = sf->heap.heap_array[0].ordinal + 1;
+ if (ts > timestamp)
+ ts = timestamp;
+ } else {
+ ts = timestamp;
+ }
+
+ ret = s390_cpumsf_run_decoder(sfq, &ts);
+ if (ret < 0) {
+ auxtrace_heap__add(&sf->heap, queue_nr, ts);
+ return ret;
+ }
+ if (!ret) {
+ ret = auxtrace_heap__add(&sf->heap, queue_nr, ts);
+ if (ret < 0)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+static int s390_cpumsf_synth_error(struct s390_cpumsf *sf, int code, int cpu,
+ pid_t pid, pid_t tid, u64 ip)
+{
+ char msg[MAX_AUXTRACE_ERROR_MSG];
+ union perf_event event;
+ int err;
+
+ strncpy(msg, "Lost Auxiliary Trace Buffer", sizeof(msg) - 1);
+ auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
+ code, cpu, pid, tid, ip, msg);
+
+ err = perf_session__deliver_synth_event(sf->session, &event, NULL);
+ if (err)
+ pr_err("s390 Auxiliary Trace: failed to deliver error event,"
+ "error %d\n", err);
+ return err;
+}
+
+static int s390_cpumsf_lost(struct s390_cpumsf *sf, struct perf_sample *sample)
+{
+ return s390_cpumsf_synth_error(sf, 1, sample->cpu,
+ sample->pid, sample->tid, 0);
+}
+
+static int
+s390_cpumsf_process_event(struct perf_session *session __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ struct s390_cpumsf *sf = container_of(session->auxtrace,
+ struct s390_cpumsf,
+ auxtrace);
+ u64 timestamp = sample->time;
+ int err = 0;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("s390 Auxiliary Trace requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (event->header.type == PERF_RECORD_AUX &&
+ event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
+ return s390_cpumsf_lost(sf, sample);
+
+ if (timestamp) {
+ err = s390_cpumsf_update_queues(sf, timestamp);
+ if (!err)
+ err = s390_cpumsf_process_queues(sf, timestamp);
+ }
+ return err;
+}
+
+struct s390_cpumsf_synth {
+ struct perf_tool cpumsf_tool;
+ struct perf_session *session;
+};
+
+static int
+s390_cpumsf_process_auxtrace_event(struct perf_session *session,
+ union perf_event *event __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ struct s390_cpumsf *sf = container_of(session->auxtrace,
+ struct s390_cpumsf,
+ auxtrace);
+
+ int fd = perf_data__fd(session->data);
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int err;
+
+ if (sf->data_queued)
+ return 0;
+
+ if (perf_data__is_pipe(session->data)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
+
+ err = auxtrace_queues__add_event(&sf->queues, session, event,
+ data_offset, &buffer);
+ if (err)
+ return err;
+
+ /* Dump here after copying piped trace out of the pipe */
+ if (dump_trace) {
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ s390_cpumsf_dump_event(sf, buffer->data,
+ buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
+ }
+ return 0;
+}
+
+static void s390_cpumsf_free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static int s390_cpumsf_flush(struct perf_session *session __maybe_unused,
+ struct perf_tool *tool __maybe_unused)
+{
+ return 0;
+}
+
+static void s390_cpumsf_free_queues(struct perf_session *session)
+{
+ struct s390_cpumsf *sf = container_of(session->auxtrace,
+ struct s390_cpumsf,
+ auxtrace);
+ struct auxtrace_queues *queues = &sf->queues;
+ unsigned int i;
+
+ for (i = 0; i < queues->nr_queues; i++)
+ zfree(&queues->queue_array[i].priv);
+ auxtrace_queues__free(queues);
+}
+
+static void s390_cpumsf_free(struct perf_session *session)
+{
+ struct s390_cpumsf *sf = container_of(session->auxtrace,
+ struct s390_cpumsf,
+ auxtrace);
+
+ auxtrace_heap__free(&sf->heap);
+ s390_cpumsf_free_queues(session);
+ session->auxtrace = NULL;
+ free(sf);
+}
+
+static int s390_cpumsf_get_type(const char *cpuid)
+{
+ int ret, family = 0;
+
+ ret = sscanf(cpuid, "%*[^,],%u", &family);
+ return (ret == 1) ? family : 0;
+}
+
+/* Check itrace options set on perf report command.
+ * Return true, if none are set or all options specified can be
+ * handled on s390.
+ * Return false otherwise.
+ */
+static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
+{
+ if (!itops || !itops->set)
+ return true;
+ pr_err("No --itrace options supported\n");
+ return false;
+}
+
+int s390_cpumsf_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session)
+{
+ struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
+ struct s390_cpumsf *sf;
+ int err;
+
+ if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event))
+ return -EINVAL;
+
+ sf = zalloc(sizeof(struct s390_cpumsf));
+ if (sf == NULL)
+ return -ENOMEM;
+
+ if (!check_auxtrace_itrace(session->itrace_synth_opts)) {
+ err = -EINVAL;
+ goto err_free;
+ }
+
+ err = auxtrace_queues__init(&sf->queues);
+ if (err)
+ goto err_free;
+
+ sf->session = session;
+ sf->machine = &session->machines.host; /* No kvm support */
+ sf->auxtrace_type = auxtrace_info->type;
+ sf->pmu_type = PERF_TYPE_RAW;
+ sf->machine_type = s390_cpumsf_get_type(session->evlist->env->cpuid);
+
+ sf->auxtrace.process_event = s390_cpumsf_process_event;
+ sf->auxtrace.process_auxtrace_event = s390_cpumsf_process_auxtrace_event;
+ sf->auxtrace.flush_events = s390_cpumsf_flush;
+ sf->auxtrace.free_events = s390_cpumsf_free_events;
+ sf->auxtrace.free = s390_cpumsf_free;
+ session->auxtrace = &sf->auxtrace;
+
+ if (dump_trace)
+ return 0;
+
+ err = auxtrace_queues__process_index(&sf->queues, session);
+ if (err)
+ goto err_free_queues;
+
+ if (sf->queues.populated)
+ sf->data_queued = true;
+
+ return 0;
+
+err_free_queues:
+ auxtrace_queues__free(&sf->queues);
+ session->auxtrace = NULL;
+err_free:
+ free(sf);
+ return err;
+}
diff --git a/tools/perf/util/s390-cpumsf.h b/tools/perf/util/s390-cpumsf.h
new file mode 100644
index 000000000..fb64d1005
--- /dev/null
+++ b/tools/perf/util/s390-cpumsf.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright IBM Corp. 2018
+ * Auxtrace support for s390 CPU-Measurement Sampling Facility
+ *
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ */
+
+#ifndef INCLUDE__PERF_S390_CPUMSF_H
+#define INCLUDE__PERF_S390_CPUMSF_H
+
+union perf_event;
+struct perf_session;
+struct perf_pmu;
+
+struct auxtrace_record *
+s390_cpumsf_recording_init(int *err, struct perf_pmu *s390_cpumsf_pmu);
+
+int s390_cpumsf_process_auxtrace_info(union perf_event *event,
+ struct perf_session *session);
+#endif
diff --git a/tools/perf/util/sane_ctype.h b/tools/perf/util/sane_ctype.h
new file mode 100644
index 000000000..c2b42ff9f
--- /dev/null
+++ b/tools/perf/util/sane_ctype.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_SANE_CTYPE_H
+#define _PERF_SANE_CTYPE_H
+
+extern const char *graph_line;
+extern const char *graph_dotted_line;
+extern const char *spaces;
+extern const char *dots;
+
+/* Sane ctype - no locale, and works with signed chars */
+#undef isascii
+#undef isspace
+#undef isdigit
+#undef isxdigit
+#undef isalpha
+#undef isprint
+#undef isalnum
+#undef islower
+#undef isupper
+#undef tolower
+#undef toupper
+
+extern unsigned char sane_ctype[256];
+#define GIT_SPACE 0x01
+#define GIT_DIGIT 0x02
+#define GIT_ALPHA 0x04
+#define GIT_GLOB_SPECIAL 0x08
+#define GIT_REGEX_SPECIAL 0x10
+#define GIT_PRINT_EXTRA 0x20
+#define GIT_PRINT 0x3E
+#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
+#define isascii(x) (((x) & ~0x7f) == 0)
+#define isspace(x) sane_istest(x,GIT_SPACE)
+#define isdigit(x) sane_istest(x,GIT_DIGIT)
+#define isxdigit(x) \
+ (sane_istest(toupper(x), GIT_ALPHA | GIT_DIGIT) && toupper(x) < 'G')
+#define isalpha(x) sane_istest(x,GIT_ALPHA)
+#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
+#define isprint(x) sane_istest(x,GIT_PRINT)
+#define islower(x) (sane_istest(x,GIT_ALPHA) && (x & 0x20))
+#define isupper(x) (sane_istest(x,GIT_ALPHA) && !(x & 0x20))
+#define tolower(x) sane_case((unsigned char)(x), 0x20)
+#define toupper(x) sane_case((unsigned char)(x), 0)
+
+static inline int sane_case(int x, int high)
+{
+ if (sane_istest(x, GIT_ALPHA))
+ x = (x & ~0x20) | high;
+ return x;
+}
+
+#endif /* _PERF_SANE_CTYPE_H */
diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build
new file mode 100644
index 000000000..82d28c67e
--- /dev/null
+++ b/tools/perf/util/scripting-engines/Build
@@ -0,0 +1,6 @@
+libperf-$(CONFIG_LIBPERL) += trace-event-perl.o
+libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o
+
+CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default
+
+CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
new file mode 100644
index 000000000..45484f0f7
--- /dev/null
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -0,0 +1,752 @@
+/*
+ * trace-event-perl. Feed perf script events to an embedded Perl interpreter.
+ *
+ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/time64.h>
+
+#include <stdbool.h>
+/* perl needs the following define, right after including stdbool.h */
+#define HAS_BOOL
+#include <EXTERN.h>
+#include <perl.h>
+
+#include "../../perf.h"
+#include "../callchain.h"
+#include "../machine.h"
+#include "../thread.h"
+#include "../event.h"
+#include "../trace-event.h"
+#include "../evsel.h"
+#include "../debug.h"
+
+void boot_Perf__Trace__Context(pTHX_ CV *cv);
+void boot_DynaLoader(pTHX_ CV *cv);
+typedef PerlInterpreter * INTERP;
+
+void xs_init(pTHX);
+
+void xs_init(pTHX)
+{
+ const char *file = __FILE__;
+ dXSUB_SYS;
+
+ newXS("Perf::Trace::Context::bootstrap", boot_Perf__Trace__Context,
+ file);
+ newXS("DynaLoader::boot_DynaLoader", boot_DynaLoader, file);
+}
+
+INTERP my_perl;
+
+#define TRACE_EVENT_TYPE_MAX \
+ ((1 << (sizeof(unsigned short) * 8)) - 1)
+
+static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
+
+extern struct scripting_context *scripting_context;
+
+static char *cur_field_name;
+static int zero_flag_atom;
+
+static void define_symbolic_value(const char *ev_name,
+ const char *field_name,
+ const char *field_value,
+ const char *field_str)
+{
+ unsigned long long value;
+ dSP;
+
+ value = eval_flag(field_value);
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+
+ XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+ XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+ XPUSHs(sv_2mortal(newSVuv(value)));
+ XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
+
+ PUTBACK;
+ if (get_cv("main::define_symbolic_value", 0))
+ call_pv("main::define_symbolic_value", G_SCALAR);
+ SPAGAIN;
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+}
+
+static void define_symbolic_values(struct print_flag_sym *field,
+ const char *ev_name,
+ const char *field_name)
+{
+ define_symbolic_value(ev_name, field_name, field->value, field->str);
+ if (field->next)
+ define_symbolic_values(field->next, ev_name, field_name);
+}
+
+static void define_symbolic_field(const char *ev_name,
+ const char *field_name)
+{
+ dSP;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+
+ XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+ XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+
+ PUTBACK;
+ if (get_cv("main::define_symbolic_field", 0))
+ call_pv("main::define_symbolic_field", G_SCALAR);
+ SPAGAIN;
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+}
+
+static void define_flag_value(const char *ev_name,
+ const char *field_name,
+ const char *field_value,
+ const char *field_str)
+{
+ unsigned long long value;
+ dSP;
+
+ value = eval_flag(field_value);
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+
+ XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+ XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+ XPUSHs(sv_2mortal(newSVuv(value)));
+ XPUSHs(sv_2mortal(newSVpv(field_str, 0)));
+
+ PUTBACK;
+ if (get_cv("main::define_flag_value", 0))
+ call_pv("main::define_flag_value", G_SCALAR);
+ SPAGAIN;
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+}
+
+static void define_flag_values(struct print_flag_sym *field,
+ const char *ev_name,
+ const char *field_name)
+{
+ define_flag_value(ev_name, field_name, field->value, field->str);
+ if (field->next)
+ define_flag_values(field->next, ev_name, field_name);
+}
+
+static void define_flag_field(const char *ev_name,
+ const char *field_name,
+ const char *delim)
+{
+ dSP;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+
+ XPUSHs(sv_2mortal(newSVpv(ev_name, 0)));
+ XPUSHs(sv_2mortal(newSVpv(field_name, 0)));
+ XPUSHs(sv_2mortal(newSVpv(delim, 0)));
+
+ PUTBACK;
+ if (get_cv("main::define_flag_field", 0))
+ call_pv("main::define_flag_field", G_SCALAR);
+ SPAGAIN;
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+}
+
+static void define_event_symbols(struct event_format *event,
+ const char *ev_name,
+ struct print_arg *args)
+{
+ if (args == NULL)
+ return;
+
+ switch (args->type) {
+ case PRINT_NULL:
+ break;
+ case PRINT_ATOM:
+ define_flag_value(ev_name, cur_field_name, "0",
+ args->atom.atom);
+ zero_flag_atom = 0;
+ break;
+ case PRINT_FIELD:
+ free(cur_field_name);
+ cur_field_name = strdup(args->field.name);
+ break;
+ case PRINT_FLAGS:
+ define_event_symbols(event, ev_name, args->flags.field);
+ define_flag_field(ev_name, cur_field_name, args->flags.delim);
+ define_flag_values(args->flags.flags, ev_name, cur_field_name);
+ break;
+ case PRINT_SYMBOL:
+ define_event_symbols(event, ev_name, args->symbol.field);
+ define_symbolic_field(ev_name, cur_field_name);
+ define_symbolic_values(args->symbol.symbols, ev_name,
+ cur_field_name);
+ break;
+ case PRINT_HEX:
+ case PRINT_HEX_STR:
+ define_event_symbols(event, ev_name, args->hex.field);
+ define_event_symbols(event, ev_name, args->hex.size);
+ break;
+ case PRINT_INT_ARRAY:
+ define_event_symbols(event, ev_name, args->int_array.field);
+ define_event_symbols(event, ev_name, args->int_array.count);
+ define_event_symbols(event, ev_name, args->int_array.el_size);
+ break;
+ case PRINT_BSTRING:
+ case PRINT_DYNAMIC_ARRAY:
+ case PRINT_DYNAMIC_ARRAY_LEN:
+ case PRINT_STRING:
+ case PRINT_BITMASK:
+ break;
+ case PRINT_TYPE:
+ define_event_symbols(event, ev_name, args->typecast.item);
+ break;
+ case PRINT_OP:
+ if (strcmp(args->op.op, ":") == 0)
+ zero_flag_atom = 1;
+ define_event_symbols(event, ev_name, args->op.left);
+ define_event_symbols(event, ev_name, args->op.right);
+ break;
+ case PRINT_FUNC:
+ default:
+ pr_err("Unsupported print arg type\n");
+ /* we should warn... */
+ return;
+ }
+
+ if (args->next)
+ define_event_symbols(event, ev_name, args->next);
+}
+
+static SV *perl_process_callchain(struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct addr_location *al)
+{
+ AV *list;
+
+ list = newAV();
+ if (!list)
+ goto exit;
+
+ if (!symbol_conf.use_callchain || !sample->callchain)
+ goto exit;
+
+ if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+ sample, NULL, NULL, scripting_max_stack) != 0) {
+ pr_err("Failed to resolve callchain. Skipping\n");
+ goto exit;
+ }
+ callchain_cursor_commit(&callchain_cursor);
+
+
+ while (1) {
+ HV *elem;
+ struct callchain_cursor_node *node;
+ node = callchain_cursor_current(&callchain_cursor);
+ if (!node)
+ break;
+
+ elem = newHV();
+ if (!elem)
+ goto exit;
+
+ if (!hv_stores(elem, "ip", newSVuv(node->ip))) {
+ hv_undef(elem);
+ goto exit;
+ }
+
+ if (node->sym) {
+ HV *sym = newHV();
+ if (!sym) {
+ hv_undef(elem);
+ goto exit;
+ }
+ if (!hv_stores(sym, "start", newSVuv(node->sym->start)) ||
+ !hv_stores(sym, "end", newSVuv(node->sym->end)) ||
+ !hv_stores(sym, "binding", newSVuv(node->sym->binding)) ||
+ !hv_stores(sym, "name", newSVpvn(node->sym->name,
+ node->sym->namelen)) ||
+ !hv_stores(elem, "sym", newRV_noinc((SV*)sym))) {
+ hv_undef(sym);
+ hv_undef(elem);
+ goto exit;
+ }
+ }
+
+ if (node->map) {
+ struct map *map = node->map;
+ const char *dsoname = "[unknown]";
+ if (map && map->dso) {
+ if (symbol_conf.show_kernel_path && map->dso->long_name)
+ dsoname = map->dso->long_name;
+ else
+ dsoname = map->dso->name;
+ }
+ if (!hv_stores(elem, "dso", newSVpv(dsoname,0))) {
+ hv_undef(elem);
+ goto exit;
+ }
+ }
+
+ callchain_cursor_advance(&callchain_cursor);
+ av_push(list, newRV_noinc((SV*)elem));
+ }
+
+exit:
+ return newRV_noinc((SV*)list);
+}
+
+static void perl_process_tracepoint(struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct addr_location *al)
+{
+ struct thread *thread = al->thread;
+ struct event_format *event = evsel->tp_format;
+ struct format_field *field;
+ static char handler[256];
+ unsigned long long val;
+ unsigned long s, ns;
+ int pid;
+ int cpu = sample->cpu;
+ void *data = sample->raw_data;
+ unsigned long long nsecs = sample->time;
+ const char *comm = thread__comm_str(thread);
+
+ dSP;
+
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ return;
+
+ if (!event) {
+ pr_debug("ug! no event found for type %" PRIu64, (u64)evsel->attr.config);
+ return;
+ }
+
+ pid = raw_field_value(event, "common_pid", data);
+
+ sprintf(handler, "%s::%s", event->system, event->name);
+
+ if (!test_and_set_bit(event->id, events_defined))
+ define_event_symbols(event, handler, event->print_fmt.args);
+
+ s = nsecs / NSEC_PER_SEC;
+ ns = nsecs - s * NSEC_PER_SEC;
+
+ scripting_context->event_data = data;
+ scripting_context->pevent = evsel->tp_format->pevent;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+
+ XPUSHs(sv_2mortal(newSVpv(handler, 0)));
+ XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
+ XPUSHs(sv_2mortal(newSVuv(cpu)));
+ XPUSHs(sv_2mortal(newSVuv(s)));
+ XPUSHs(sv_2mortal(newSVuv(ns)));
+ XPUSHs(sv_2mortal(newSViv(pid)));
+ XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+ XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
+
+ /* common fields other than pid can be accessed via xsub fns */
+
+ for (field = event->format.fields; field; field = field->next) {
+ if (field->flags & FIELD_IS_STRING) {
+ int offset;
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ offset = *(int *)(data + field->offset);
+ offset &= 0xffff;
+ } else
+ offset = field->offset;
+ XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
+ } else { /* FIELD_IS_NUMERIC */
+ val = read_size(event, data + field->offset,
+ field->size);
+ if (field->flags & FIELD_IS_SIGNED) {
+ XPUSHs(sv_2mortal(newSViv(val)));
+ } else {
+ XPUSHs(sv_2mortal(newSVuv(val)));
+ }
+ }
+ }
+
+ PUTBACK;
+
+ if (get_cv(handler, 0))
+ call_pv(handler, G_SCALAR);
+ else if (get_cv("main::trace_unhandled", 0)) {
+ XPUSHs(sv_2mortal(newSVpv(handler, 0)));
+ XPUSHs(sv_2mortal(newSViv(PTR2IV(scripting_context))));
+ XPUSHs(sv_2mortal(newSVuv(cpu)));
+ XPUSHs(sv_2mortal(newSVuv(nsecs)));
+ XPUSHs(sv_2mortal(newSViv(pid)));
+ XPUSHs(sv_2mortal(newSVpv(comm, 0)));
+ XPUSHs(sv_2mortal(perl_process_callchain(sample, evsel, al)));
+ call_pv("main::trace_unhandled", G_SCALAR);
+ }
+ SPAGAIN;
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+}
+
+static void perl_process_event_generic(union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel)
+{
+ dSP;
+
+ if (!get_cv("process_event", 0))
+ return;
+
+ ENTER;
+ SAVETMPS;
+ PUSHMARK(SP);
+ XPUSHs(sv_2mortal(newSVpvn((const char *)event, event->header.size)));
+ XPUSHs(sv_2mortal(newSVpvn((const char *)&evsel->attr, sizeof(evsel->attr))));
+ XPUSHs(sv_2mortal(newSVpvn((const char *)sample, sizeof(*sample))));
+ XPUSHs(sv_2mortal(newSVpvn((const char *)sample->raw_data, sample->raw_size)));
+ PUTBACK;
+ call_pv("process_event", G_SCALAR);
+ SPAGAIN;
+ PUTBACK;
+ FREETMPS;
+ LEAVE;
+}
+
+static void perl_process_event(union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct addr_location *al)
+{
+ perl_process_tracepoint(sample, evsel, al);
+ perl_process_event_generic(event, sample, evsel);
+}
+
+static void run_start_sub(void)
+{
+ dSP; /* access to Perl stack */
+ PUSHMARK(SP);
+
+ if (get_cv("main::trace_begin", 0))
+ call_pv("main::trace_begin", G_DISCARD | G_NOARGS);
+}
+
+/*
+ * Start trace script
+ */
+static int perl_start_script(const char *script, int argc, const char **argv)
+{
+ const char **command_line;
+ int i, err = 0;
+
+ command_line = malloc((argc + 2) * sizeof(const char *));
+ command_line[0] = "";
+ command_line[1] = script;
+ for (i = 2; i < argc + 2; i++)
+ command_line[i] = argv[i - 2];
+
+ my_perl = perl_alloc();
+ perl_construct(my_perl);
+
+ if (perl_parse(my_perl, xs_init, argc + 2, (char **)command_line,
+ (char **)NULL)) {
+ err = -1;
+ goto error;
+ }
+
+ if (perl_run(my_perl)) {
+ err = -1;
+ goto error;
+ }
+
+ if (SvTRUE(ERRSV)) {
+ err = -1;
+ goto error;
+ }
+
+ run_start_sub();
+
+ free(command_line);
+ return 0;
+error:
+ perl_free(my_perl);
+ free(command_line);
+
+ return err;
+}
+
+static int perl_flush_script(void)
+{
+ return 0;
+}
+
+/*
+ * Stop trace script
+ */
+static int perl_stop_script(void)
+{
+ dSP; /* access to Perl stack */
+ PUSHMARK(SP);
+
+ if (get_cv("main::trace_end", 0))
+ call_pv("main::trace_end", G_DISCARD | G_NOARGS);
+
+ perl_destruct(my_perl);
+ perl_free(my_perl);
+
+ return 0;
+}
+
+static int perl_generate_script(struct tep_handle *pevent, const char *outfile)
+{
+ struct event_format *event = NULL;
+ struct format_field *f;
+ char fname[PATH_MAX];
+ int not_first, count;
+ FILE *ofp;
+
+ sprintf(fname, "%s.pl", outfile);
+ ofp = fopen(fname, "w");
+ if (ofp == NULL) {
+ fprintf(stderr, "couldn't open %s\n", fname);
+ return -1;
+ }
+
+ fprintf(ofp, "# perf script event handlers, "
+ "generated by perf script -g perl\n");
+
+ fprintf(ofp, "# Licensed under the terms of the GNU GPL"
+ " License version 2\n\n");
+
+ fprintf(ofp, "# The common_* event handler fields are the most useful "
+ "fields common to\n");
+
+ fprintf(ofp, "# all events. They don't necessarily correspond to "
+ "the 'common_*' fields\n");
+
+ fprintf(ofp, "# in the format files. Those fields not available as "
+ "handler params can\n");
+
+ fprintf(ofp, "# be retrieved using Perl functions of the form "
+ "common_*($context).\n");
+
+ fprintf(ofp, "# See Context.pm for the list of available "
+ "functions.\n\n");
+
+ fprintf(ofp, "use lib \"$ENV{'PERF_EXEC_PATH'}/scripts/perl/"
+ "Perf-Trace-Util/lib\";\n");
+
+ fprintf(ofp, "use lib \"./Perf-Trace-Util/lib\";\n");
+ fprintf(ofp, "use Perf::Trace::Core;\n");
+ fprintf(ofp, "use Perf::Trace::Context;\n");
+ fprintf(ofp, "use Perf::Trace::Util;\n\n");
+
+ fprintf(ofp, "sub trace_begin\n{\n\t# optional\n}\n\n");
+ fprintf(ofp, "sub trace_end\n{\n\t# optional\n}\n");
+
+
+ fprintf(ofp, "\n\
+sub print_backtrace\n\
+{\n\
+ my $callchain = shift;\n\
+ for my $node (@$callchain)\n\
+ {\n\
+ if(exists $node->{sym})\n\
+ {\n\
+ printf( \"\\t[\\%%x] \\%%s\\n\", $node->{ip}, $node->{sym}{name});\n\
+ }\n\
+ else\n\
+ {\n\
+ printf( \"\\t[\\%%x]\\n\", $node{ip});\n\
+ }\n\
+ }\n\
+}\n\n\
+");
+
+
+ while ((event = trace_find_next_event(pevent, event))) {
+ fprintf(ofp, "sub %s::%s\n{\n", event->system, event->name);
+ fprintf(ofp, "\tmy (");
+
+ fprintf(ofp, "$event_name, ");
+ fprintf(ofp, "$context, ");
+ fprintf(ofp, "$common_cpu, ");
+ fprintf(ofp, "$common_secs, ");
+ fprintf(ofp, "$common_nsecs,\n");
+ fprintf(ofp, "\t $common_pid, ");
+ fprintf(ofp, "$common_comm, ");
+ fprintf(ofp, "$common_callchain,\n\t ");
+
+ not_first = 0;
+ count = 0;
+
+ for (f = event->format.fields; f; f = f->next) {
+ if (not_first++)
+ fprintf(ofp, ", ");
+ if (++count % 5 == 0)
+ fprintf(ofp, "\n\t ");
+
+ fprintf(ofp, "$%s", f->name);
+ }
+ fprintf(ofp, ") = @_;\n\n");
+
+ fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
+ "$common_secs, $common_nsecs,\n\t "
+ "$common_pid, $common_comm, $common_callchain);\n\n");
+
+ fprintf(ofp, "\tprintf(\"");
+
+ not_first = 0;
+ count = 0;
+
+ for (f = event->format.fields; f; f = f->next) {
+ if (not_first++)
+ fprintf(ofp, ", ");
+ if (count && count % 4 == 0) {
+ fprintf(ofp, "\".\n\t \"");
+ }
+ count++;
+
+ fprintf(ofp, "%s=", f->name);
+ if (f->flags & FIELD_IS_STRING ||
+ f->flags & FIELD_IS_FLAG ||
+ f->flags & FIELD_IS_SYMBOLIC)
+ fprintf(ofp, "%%s");
+ else if (f->flags & FIELD_IS_SIGNED)
+ fprintf(ofp, "%%d");
+ else
+ fprintf(ofp, "%%u");
+ }
+
+ fprintf(ofp, "\\n\",\n\t ");
+
+ not_first = 0;
+ count = 0;
+
+ for (f = event->format.fields; f; f = f->next) {
+ if (not_first++)
+ fprintf(ofp, ", ");
+
+ if (++count % 5 == 0)
+ fprintf(ofp, "\n\t ");
+
+ if (f->flags & FIELD_IS_FLAG) {
+ if ((count - 1) % 5 != 0) {
+ fprintf(ofp, "\n\t ");
+ count = 4;
+ }
+ fprintf(ofp, "flag_str(\"");
+ fprintf(ofp, "%s::%s\", ", event->system,
+ event->name);
+ fprintf(ofp, "\"%s\", $%s)", f->name,
+ f->name);
+ } else if (f->flags & FIELD_IS_SYMBOLIC) {
+ if ((count - 1) % 5 != 0) {
+ fprintf(ofp, "\n\t ");
+ count = 4;
+ }
+ fprintf(ofp, "symbol_str(\"");
+ fprintf(ofp, "%s::%s\", ", event->system,
+ event->name);
+ fprintf(ofp, "\"%s\", $%s)", f->name,
+ f->name);
+ } else
+ fprintf(ofp, "$%s", f->name);
+ }
+
+ fprintf(ofp, ");\n\n");
+
+ fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+
+ fprintf(ofp, "}\n\n");
+ }
+
+ fprintf(ofp, "sub trace_unhandled\n{\n\tmy ($event_name, $context, "
+ "$common_cpu, $common_secs, $common_nsecs,\n\t "
+ "$common_pid, $common_comm, $common_callchain) = @_;\n\n");
+
+ fprintf(ofp, "\tprint_header($event_name, $common_cpu, "
+ "$common_secs, $common_nsecs,\n\t $common_pid, "
+ "$common_comm, $common_callchain);\n");
+ fprintf(ofp, "\tprint_backtrace($common_callchain);\n");
+ fprintf(ofp, "}\n\n");
+
+ fprintf(ofp, "sub print_header\n{\n"
+ "\tmy ($event_name, $cpu, $secs, $nsecs, $pid, $comm) = @_;\n\n"
+ "\tprintf(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \",\n\t "
+ "$event_name, $cpu, $secs, $nsecs, $pid, $comm);\n}\n");
+
+ fprintf(ofp,
+ "\n# Packed byte string args of process_event():\n"
+ "#\n"
+ "# $event:\tunion perf_event\tutil/event.h\n"
+ "# $attr:\tstruct perf_event_attr\tlinux/perf_event.h\n"
+ "# $sample:\tstruct perf_sample\tutil/event.h\n"
+ "# $raw_data:\tperf_sample->raw_data\tutil/event.h\n"
+ "\n"
+ "sub process_event\n"
+ "{\n"
+ "\tmy ($event, $attr, $sample, $raw_data) = @_;\n"
+ "\n"
+ "\tmy @event\t= unpack(\"LSS\", $event);\n"
+ "\tmy @attr\t= unpack(\"LLQQQQQLLQQ\", $attr);\n"
+ "\tmy @sample\t= unpack(\"QLLQQQQQLL\", $sample);\n"
+ "\tmy @raw_data\t= unpack(\"C*\", $raw_data);\n"
+ "\n"
+ "\tuse Data::Dumper;\n"
+ "\tprint Dumper \\@event, \\@attr, \\@sample, \\@raw_data;\n"
+ "}\n");
+
+ fclose(ofp);
+
+ fprintf(stderr, "generated Perl script: %s\n", fname);
+
+ return 0;
+}
+
+struct scripting_ops perl_scripting_ops = {
+ .name = "Perl",
+ .start_script = perl_start_script,
+ .flush_script = perl_flush_script,
+ .stop_script = perl_stop_script,
+ .process_event = perl_process_event,
+ .generate_script = perl_generate_script,
+};
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
new file mode 100644
index 000000000..2814251df
--- /dev/null
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -0,0 +1,1784 @@
+/*
+ * trace-event-python. Feed trace events to an embedded Python interpreter.
+ *
+ * Copyright (C) 2010 Tom Zanussi <tzanussi@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <Python.h>
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/compiler.h>
+#include <linux/time64.h>
+
+#include "../../perf.h"
+#include "../debug.h"
+#include "../callchain.h"
+#include "../evsel.h"
+#include "../util.h"
+#include "../event.h"
+#include "../thread.h"
+#include "../comm.h"
+#include "../machine.h"
+#include "../db-export.h"
+#include "../thread-stack.h"
+#include "../trace-event.h"
+#include "../call-path.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "print_binary.h"
+#include "stat.h"
+#include "mem-events.h"
+
+#if PY_MAJOR_VERSION < 3
+#define _PyUnicode_FromString(arg) \
+ PyString_FromString(arg)
+#define _PyUnicode_FromStringAndSize(arg1, arg2) \
+ PyString_FromStringAndSize((arg1), (arg2))
+#define _PyBytes_FromStringAndSize(arg1, arg2) \
+ PyString_FromStringAndSize((arg1), (arg2))
+#define _PyLong_FromLong(arg) \
+ PyInt_FromLong(arg)
+#define _PyLong_AsLong(arg) \
+ PyInt_AsLong(arg)
+#define _PyCapsule_New(arg1, arg2, arg3) \
+ PyCObject_FromVoidPtr((arg1), (arg2))
+
+PyMODINIT_FUNC initperf_trace_context(void);
+#else
+#define _PyUnicode_FromString(arg) \
+ PyUnicode_FromString(arg)
+#define _PyUnicode_FromStringAndSize(arg1, arg2) \
+ PyUnicode_FromStringAndSize((arg1), (arg2))
+#define _PyBytes_FromStringAndSize(arg1, arg2) \
+ PyBytes_FromStringAndSize((arg1), (arg2))
+#define _PyLong_FromLong(arg) \
+ PyLong_FromLong(arg)
+#define _PyLong_AsLong(arg) \
+ PyLong_AsLong(arg)
+#define _PyCapsule_New(arg1, arg2, arg3) \
+ PyCapsule_New((arg1), (arg2), (arg3))
+
+PyMODINIT_FUNC PyInit_perf_trace_context(void);
+#endif
+
+#define TRACE_EVENT_TYPE_MAX \
+ ((1 << (sizeof(unsigned short) * 8)) - 1)
+
+static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX);
+
+#define MAX_FIELDS 64
+#define N_COMMON_FIELDS 7
+
+extern struct scripting_context *scripting_context;
+
+static char *cur_field_name;
+static int zero_flag_atom;
+
+static PyObject *main_module, *main_dict;
+
+struct tables {
+ struct db_export dbe;
+ PyObject *evsel_handler;
+ PyObject *machine_handler;
+ PyObject *thread_handler;
+ PyObject *comm_handler;
+ PyObject *comm_thread_handler;
+ PyObject *dso_handler;
+ PyObject *symbol_handler;
+ PyObject *branch_type_handler;
+ PyObject *sample_handler;
+ PyObject *call_path_handler;
+ PyObject *call_return_handler;
+ bool db_export_mode;
+};
+
+static struct tables tables_global;
+
+static void handler_call_die(const char *handler_name) __noreturn;
+static void handler_call_die(const char *handler_name)
+{
+ PyErr_Print();
+ Py_FatalError("problem in Python trace event handler");
+ // Py_FatalError does not return
+ // but we have to make the compiler happy
+ abort();
+}
+
+/*
+ * Insert val into into the dictionary and decrement the reference counter.
+ * This is necessary for dictionaries since PyDict_SetItemString() does not
+ * steal a reference, as opposed to PyTuple_SetItem().
+ */
+static void pydict_set_item_string_decref(PyObject *dict, const char *key, PyObject *val)
+{
+ PyDict_SetItemString(dict, key, val);
+ Py_DECREF(val);
+}
+
+static PyObject *get_handler(const char *handler_name)
+{
+ PyObject *handler;
+
+ handler = PyDict_GetItemString(main_dict, handler_name);
+ if (handler && !PyCallable_Check(handler))
+ return NULL;
+ return handler;
+}
+
+static int get_argument_count(PyObject *handler)
+{
+ int arg_count = 0;
+
+ /*
+ * The attribute for the code object is func_code in Python 2,
+ * whereas it is __code__ in Python 3.0+.
+ */
+ PyObject *code_obj = PyObject_GetAttrString(handler,
+ "func_code");
+ if (PyErr_Occurred()) {
+ PyErr_Clear();
+ code_obj = PyObject_GetAttrString(handler,
+ "__code__");
+ }
+ PyErr_Clear();
+ if (code_obj) {
+ PyObject *arg_count_obj = PyObject_GetAttrString(code_obj,
+ "co_argcount");
+ if (arg_count_obj) {
+ arg_count = (int) _PyLong_AsLong(arg_count_obj);
+ Py_DECREF(arg_count_obj);
+ }
+ Py_DECREF(code_obj);
+ }
+ return arg_count;
+}
+
+static void call_object(PyObject *handler, PyObject *args, const char *die_msg)
+{
+ PyObject *retval;
+
+ retval = PyObject_CallObject(handler, args);
+ if (retval == NULL)
+ handler_call_die(die_msg);
+ Py_DECREF(retval);
+}
+
+static void try_call_object(const char *handler_name, PyObject *args)
+{
+ PyObject *handler;
+
+ handler = get_handler(handler_name);
+ if (handler)
+ call_object(handler, args, handler_name);
+}
+
+static void define_value(enum print_arg_type field_type,
+ const char *ev_name,
+ const char *field_name,
+ const char *field_value,
+ const char *field_str)
+{
+ const char *handler_name = "define_flag_value";
+ PyObject *t;
+ unsigned long long value;
+ unsigned n = 0;
+
+ if (field_type == PRINT_SYMBOL)
+ handler_name = "define_symbolic_value";
+
+ t = PyTuple_New(4);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ value = eval_flag(field_value);
+
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name));
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name));
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(value));
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_str));
+
+ try_call_object(handler_name, t);
+
+ Py_DECREF(t);
+}
+
+static void define_values(enum print_arg_type field_type,
+ struct print_flag_sym *field,
+ const char *ev_name,
+ const char *field_name)
+{
+ define_value(field_type, ev_name, field_name, field->value,
+ field->str);
+
+ if (field->next)
+ define_values(field_type, field->next, ev_name, field_name);
+}
+
+static void define_field(enum print_arg_type field_type,
+ const char *ev_name,
+ const char *field_name,
+ const char *delim)
+{
+ const char *handler_name = "define_flag_field";
+ PyObject *t;
+ unsigned n = 0;
+
+ if (field_type == PRINT_SYMBOL)
+ handler_name = "define_symbolic_field";
+
+ if (field_type == PRINT_FLAGS)
+ t = PyTuple_New(3);
+ else
+ t = PyTuple_New(2);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(ev_name));
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(field_name));
+ if (field_type == PRINT_FLAGS)
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(delim));
+
+ try_call_object(handler_name, t);
+
+ Py_DECREF(t);
+}
+
+static void define_event_symbols(struct event_format *event,
+ const char *ev_name,
+ struct print_arg *args)
+{
+ if (args == NULL)
+ return;
+
+ switch (args->type) {
+ case PRINT_NULL:
+ break;
+ case PRINT_ATOM:
+ define_value(PRINT_FLAGS, ev_name, cur_field_name, "0",
+ args->atom.atom);
+ zero_flag_atom = 0;
+ break;
+ case PRINT_FIELD:
+ free(cur_field_name);
+ cur_field_name = strdup(args->field.name);
+ break;
+ case PRINT_FLAGS:
+ define_event_symbols(event, ev_name, args->flags.field);
+ define_field(PRINT_FLAGS, ev_name, cur_field_name,
+ args->flags.delim);
+ define_values(PRINT_FLAGS, args->flags.flags, ev_name,
+ cur_field_name);
+ break;
+ case PRINT_SYMBOL:
+ define_event_symbols(event, ev_name, args->symbol.field);
+ define_field(PRINT_SYMBOL, ev_name, cur_field_name, NULL);
+ define_values(PRINT_SYMBOL, args->symbol.symbols, ev_name,
+ cur_field_name);
+ break;
+ case PRINT_HEX:
+ case PRINT_HEX_STR:
+ define_event_symbols(event, ev_name, args->hex.field);
+ define_event_symbols(event, ev_name, args->hex.size);
+ break;
+ case PRINT_INT_ARRAY:
+ define_event_symbols(event, ev_name, args->int_array.field);
+ define_event_symbols(event, ev_name, args->int_array.count);
+ define_event_symbols(event, ev_name, args->int_array.el_size);
+ break;
+ case PRINT_STRING:
+ break;
+ case PRINT_TYPE:
+ define_event_symbols(event, ev_name, args->typecast.item);
+ break;
+ case PRINT_OP:
+ if (strcmp(args->op.op, ":") == 0)
+ zero_flag_atom = 1;
+ define_event_symbols(event, ev_name, args->op.left);
+ define_event_symbols(event, ev_name, args->op.right);
+ break;
+ default:
+ /* gcc warns for these? */
+ case PRINT_BSTRING:
+ case PRINT_DYNAMIC_ARRAY:
+ case PRINT_DYNAMIC_ARRAY_LEN:
+ case PRINT_FUNC:
+ case PRINT_BITMASK:
+ /* we should warn... */
+ return;
+ }
+
+ if (args->next)
+ define_event_symbols(event, ev_name, args->next);
+}
+
+static PyObject *get_field_numeric_entry(struct event_format *event,
+ struct format_field *field, void *data)
+{
+ bool is_array = field->flags & FIELD_IS_ARRAY;
+ PyObject *obj = NULL, *list = NULL;
+ unsigned long long val;
+ unsigned int item_size, n_items, i;
+
+ if (is_array) {
+ list = PyList_New(field->arraylen);
+ item_size = field->size / field->arraylen;
+ n_items = field->arraylen;
+ } else {
+ item_size = field->size;
+ n_items = 1;
+ }
+
+ for (i = 0; i < n_items; i++) {
+
+ val = read_size(event, data + field->offset + i * item_size,
+ item_size);
+ if (field->flags & FIELD_IS_SIGNED) {
+ if ((long long)val >= LONG_MIN &&
+ (long long)val <= LONG_MAX)
+ obj = _PyLong_FromLong(val);
+ else
+ obj = PyLong_FromLongLong(val);
+ } else {
+ if (val <= LONG_MAX)
+ obj = _PyLong_FromLong(val);
+ else
+ obj = PyLong_FromUnsignedLongLong(val);
+ }
+ if (is_array)
+ PyList_SET_ITEM(list, i, obj);
+ }
+ if (is_array)
+ obj = list;
+ return obj;
+}
+
+static const char *get_dsoname(struct map *map)
+{
+ const char *dsoname = "[unknown]";
+
+ if (map && map->dso) {
+ if (symbol_conf.show_kernel_path && map->dso->long_name)
+ dsoname = map->dso->long_name;
+ else
+ dsoname = map->dso->name;
+ }
+
+ return dsoname;
+}
+
+static PyObject *python_process_callchain(struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct addr_location *al)
+{
+ PyObject *pylist;
+
+ pylist = PyList_New(0);
+ if (!pylist)
+ Py_FatalError("couldn't create Python list");
+
+ if (!symbol_conf.use_callchain || !sample->callchain)
+ goto exit;
+
+ if (thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
+ sample, NULL, NULL,
+ scripting_max_stack) != 0) {
+ pr_err("Failed to resolve callchain. Skipping\n");
+ goto exit;
+ }
+ callchain_cursor_commit(&callchain_cursor);
+
+
+ while (1) {
+ PyObject *pyelem;
+ struct callchain_cursor_node *node;
+ node = callchain_cursor_current(&callchain_cursor);
+ if (!node)
+ break;
+
+ pyelem = PyDict_New();
+ if (!pyelem)
+ Py_FatalError("couldn't create Python dictionary");
+
+
+ pydict_set_item_string_decref(pyelem, "ip",
+ PyLong_FromUnsignedLongLong(node->ip));
+
+ if (node->sym) {
+ PyObject *pysym = PyDict_New();
+ if (!pysym)
+ Py_FatalError("couldn't create Python dictionary");
+ pydict_set_item_string_decref(pysym, "start",
+ PyLong_FromUnsignedLongLong(node->sym->start));
+ pydict_set_item_string_decref(pysym, "end",
+ PyLong_FromUnsignedLongLong(node->sym->end));
+ pydict_set_item_string_decref(pysym, "binding",
+ _PyLong_FromLong(node->sym->binding));
+ pydict_set_item_string_decref(pysym, "name",
+ _PyUnicode_FromStringAndSize(node->sym->name,
+ node->sym->namelen));
+ pydict_set_item_string_decref(pyelem, "sym", pysym);
+ }
+
+ if (node->map) {
+ const char *dsoname = get_dsoname(node->map);
+
+ pydict_set_item_string_decref(pyelem, "dso",
+ _PyUnicode_FromString(dsoname));
+ }
+
+ callchain_cursor_advance(&callchain_cursor);
+ PyList_Append(pylist, pyelem);
+ Py_DECREF(pyelem);
+ }
+
+exit:
+ return pylist;
+}
+
+static PyObject *python_process_brstack(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct branch_stack *br = sample->branch_stack;
+ PyObject *pylist;
+ u64 i;
+
+ pylist = PyList_New(0);
+ if (!pylist)
+ Py_FatalError("couldn't create Python list");
+
+ if (!(br && br->nr))
+ goto exit;
+
+ for (i = 0; i < br->nr; i++) {
+ PyObject *pyelem;
+ struct addr_location al;
+ const char *dsoname;
+
+ pyelem = PyDict_New();
+ if (!pyelem)
+ Py_FatalError("couldn't create Python dictionary");
+
+ pydict_set_item_string_decref(pyelem, "from",
+ PyLong_FromUnsignedLongLong(br->entries[i].from));
+ pydict_set_item_string_decref(pyelem, "to",
+ PyLong_FromUnsignedLongLong(br->entries[i].to));
+ pydict_set_item_string_decref(pyelem, "mispred",
+ PyBool_FromLong(br->entries[i].flags.mispred));
+ pydict_set_item_string_decref(pyelem, "predicted",
+ PyBool_FromLong(br->entries[i].flags.predicted));
+ pydict_set_item_string_decref(pyelem, "in_tx",
+ PyBool_FromLong(br->entries[i].flags.in_tx));
+ pydict_set_item_string_decref(pyelem, "abort",
+ PyBool_FromLong(br->entries[i].flags.abort));
+ pydict_set_item_string_decref(pyelem, "cycles",
+ PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles));
+
+ thread__find_map_fb(thread, sample->cpumode,
+ br->entries[i].from, &al);
+ dsoname = get_dsoname(al.map);
+ pydict_set_item_string_decref(pyelem, "from_dsoname",
+ _PyUnicode_FromString(dsoname));
+
+ thread__find_map_fb(thread, sample->cpumode,
+ br->entries[i].to, &al);
+ dsoname = get_dsoname(al.map);
+ pydict_set_item_string_decref(pyelem, "to_dsoname",
+ _PyUnicode_FromString(dsoname));
+
+ PyList_Append(pylist, pyelem);
+ Py_DECREF(pyelem);
+ }
+
+exit:
+ return pylist;
+}
+
+static unsigned long get_offset(struct symbol *sym, struct addr_location *al)
+{
+ unsigned long offset;
+
+ if (al->addr < sym->end)
+ offset = al->addr - sym->start;
+ else
+ offset = al->addr - al->map->start - sym->start;
+
+ return offset;
+}
+
+static int get_symoff(struct symbol *sym, struct addr_location *al,
+ bool print_off, char *bf, int size)
+{
+ unsigned long offset;
+
+ if (!sym || !sym->name[0])
+ return scnprintf(bf, size, "%s", "[unknown]");
+
+ if (!print_off)
+ return scnprintf(bf, size, "%s", sym->name);
+
+ offset = get_offset(sym, al);
+
+ return scnprintf(bf, size, "%s+0x%x", sym->name, offset);
+}
+
+static int get_br_mspred(struct branch_flags *flags, char *bf, int size)
+{
+ if (!flags->mispred && !flags->predicted)
+ return scnprintf(bf, size, "%s", "-");
+
+ if (flags->mispred)
+ return scnprintf(bf, size, "%s", "M");
+
+ return scnprintf(bf, size, "%s", "P");
+}
+
+static PyObject *python_process_brstacksym(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct branch_stack *br = sample->branch_stack;
+ PyObject *pylist;
+ u64 i;
+ char bf[512];
+ struct addr_location al;
+
+ pylist = PyList_New(0);
+ if (!pylist)
+ Py_FatalError("couldn't create Python list");
+
+ if (!(br && br->nr))
+ goto exit;
+
+ for (i = 0; i < br->nr; i++) {
+ PyObject *pyelem;
+
+ pyelem = PyDict_New();
+ if (!pyelem)
+ Py_FatalError("couldn't create Python dictionary");
+
+ thread__find_symbol_fb(thread, sample->cpumode,
+ br->entries[i].from, &al);
+ get_symoff(al.sym, &al, true, bf, sizeof(bf));
+ pydict_set_item_string_decref(pyelem, "from",
+ _PyUnicode_FromString(bf));
+
+ thread__find_symbol_fb(thread, sample->cpumode,
+ br->entries[i].to, &al);
+ get_symoff(al.sym, &al, true, bf, sizeof(bf));
+ pydict_set_item_string_decref(pyelem, "to",
+ _PyUnicode_FromString(bf));
+
+ get_br_mspred(&br->entries[i].flags, bf, sizeof(bf));
+ pydict_set_item_string_decref(pyelem, "pred",
+ _PyUnicode_FromString(bf));
+
+ if (br->entries[i].flags.in_tx) {
+ pydict_set_item_string_decref(pyelem, "in_tx",
+ _PyUnicode_FromString("X"));
+ } else {
+ pydict_set_item_string_decref(pyelem, "in_tx",
+ _PyUnicode_FromString("-"));
+ }
+
+ if (br->entries[i].flags.abort) {
+ pydict_set_item_string_decref(pyelem, "abort",
+ _PyUnicode_FromString("A"));
+ } else {
+ pydict_set_item_string_decref(pyelem, "abort",
+ _PyUnicode_FromString("-"));
+ }
+
+ PyList_Append(pylist, pyelem);
+ Py_DECREF(pyelem);
+ }
+
+exit:
+ return pylist;
+}
+
+static PyObject *get_sample_value_as_tuple(struct sample_read_value *value)
+{
+ PyObject *t;
+
+ t = PyTuple_New(2);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+ PyTuple_SetItem(t, 0, PyLong_FromUnsignedLongLong(value->id));
+ PyTuple_SetItem(t, 1, PyLong_FromUnsignedLongLong(value->value));
+ return t;
+}
+
+static void set_sample_read_in_dict(PyObject *dict_sample,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel)
+{
+ u64 read_format = evsel->attr.read_format;
+ PyObject *values;
+ unsigned int i;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ pydict_set_item_string_decref(dict_sample, "time_enabled",
+ PyLong_FromUnsignedLongLong(sample->read.time_enabled));
+ }
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ pydict_set_item_string_decref(dict_sample, "time_running",
+ PyLong_FromUnsignedLongLong(sample->read.time_running));
+ }
+
+ if (read_format & PERF_FORMAT_GROUP)
+ values = PyList_New(sample->read.group.nr);
+ else
+ values = PyList_New(1);
+
+ if (!values)
+ Py_FatalError("couldn't create Python list");
+
+ if (read_format & PERF_FORMAT_GROUP) {
+ for (i = 0; i < sample->read.group.nr; i++) {
+ PyObject *t = get_sample_value_as_tuple(&sample->read.group.values[i]);
+ PyList_SET_ITEM(values, i, t);
+ }
+ } else {
+ PyObject *t = get_sample_value_as_tuple(&sample->read.one);
+ PyList_SET_ITEM(values, 0, t);
+ }
+ pydict_set_item_string_decref(dict_sample, "values", values);
+}
+
+static void set_sample_datasrc_in_dict(PyObject *dict,
+ struct perf_sample *sample)
+{
+ struct mem_info mi = { .data_src.val = sample->data_src };
+ char decode[100];
+
+ pydict_set_item_string_decref(dict, "datasrc",
+ PyLong_FromUnsignedLongLong(sample->data_src));
+
+ perf_script__meminfo_scnprintf(decode, 100, &mi);
+
+ pydict_set_item_string_decref(dict, "datasrc_decode",
+ _PyUnicode_FromString(decode));
+}
+
+static int regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
+{
+ unsigned int i = 0, r;
+ int printed = 0;
+
+ bf[0] = 0;
+
+ for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
+ u64 val = regs->regs[i++];
+
+ printed += scnprintf(bf + printed, size - printed,
+ "%5s:0x%" PRIx64 " ",
+ perf_reg_name(r), val);
+ }
+
+ return printed;
+}
+
+static void set_regs_in_dict(PyObject *dict,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ char bf[512];
+
+ regs_map(&sample->intr_regs, attr->sample_regs_intr, bf, sizeof(bf));
+
+ pydict_set_item_string_decref(dict, "iregs",
+ _PyUnicode_FromString(bf));
+
+ regs_map(&sample->user_regs, attr->sample_regs_user, bf, sizeof(bf));
+
+ pydict_set_item_string_decref(dict, "uregs",
+ _PyUnicode_FromString(bf));
+}
+
+static PyObject *get_perf_sample_dict(struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct addr_location *al,
+ PyObject *callchain)
+{
+ PyObject *dict, *dict_sample, *brstack, *brstacksym;
+
+ dict = PyDict_New();
+ if (!dict)
+ Py_FatalError("couldn't create Python dictionary");
+
+ dict_sample = PyDict_New();
+ if (!dict_sample)
+ Py_FatalError("couldn't create Python dictionary");
+
+ pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel)));
+ pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->attr, sizeof(evsel->attr)));
+
+ pydict_set_item_string_decref(dict_sample, "pid",
+ _PyLong_FromLong(sample->pid));
+ pydict_set_item_string_decref(dict_sample, "tid",
+ _PyLong_FromLong(sample->tid));
+ pydict_set_item_string_decref(dict_sample, "cpu",
+ _PyLong_FromLong(sample->cpu));
+ pydict_set_item_string_decref(dict_sample, "ip",
+ PyLong_FromUnsignedLongLong(sample->ip));
+ pydict_set_item_string_decref(dict_sample, "time",
+ PyLong_FromUnsignedLongLong(sample->time));
+ pydict_set_item_string_decref(dict_sample, "period",
+ PyLong_FromUnsignedLongLong(sample->period));
+ pydict_set_item_string_decref(dict_sample, "phys_addr",
+ PyLong_FromUnsignedLongLong(sample->phys_addr));
+ pydict_set_item_string_decref(dict_sample, "addr",
+ PyLong_FromUnsignedLongLong(sample->addr));
+ set_sample_read_in_dict(dict_sample, sample, evsel);
+ pydict_set_item_string_decref(dict_sample, "weight",
+ PyLong_FromUnsignedLongLong(sample->weight));
+ pydict_set_item_string_decref(dict_sample, "transaction",
+ PyLong_FromUnsignedLongLong(sample->transaction));
+ set_sample_datasrc_in_dict(dict_sample, sample);
+ pydict_set_item_string_decref(dict, "sample", dict_sample);
+
+ pydict_set_item_string_decref(dict, "raw_buf", _PyBytes_FromStringAndSize(
+ (const char *)sample->raw_data, sample->raw_size));
+ pydict_set_item_string_decref(dict, "comm",
+ _PyUnicode_FromString(thread__comm_str(al->thread)));
+ if (al->map) {
+ pydict_set_item_string_decref(dict, "dso",
+ _PyUnicode_FromString(al->map->dso->name));
+ }
+ if (al->sym) {
+ pydict_set_item_string_decref(dict, "symbol",
+ _PyUnicode_FromString(al->sym->name));
+ }
+
+ pydict_set_item_string_decref(dict, "callchain", callchain);
+
+ brstack = python_process_brstack(sample, al->thread);
+ pydict_set_item_string_decref(dict, "brstack", brstack);
+
+ brstacksym = python_process_brstacksym(sample, al->thread);
+ pydict_set_item_string_decref(dict, "brstacksym", brstacksym);
+
+ set_regs_in_dict(dict, sample, evsel);
+
+ return dict;
+}
+
+static void python_process_tracepoint(struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct addr_location *al)
+{
+ struct event_format *event = evsel->tp_format;
+ PyObject *handler, *context, *t, *obj = NULL, *callchain;
+ PyObject *dict = NULL, *all_entries_dict = NULL;
+ static char handler_name[256];
+ struct format_field *field;
+ unsigned long s, ns;
+ unsigned n = 0;
+ int pid;
+ int cpu = sample->cpu;
+ void *data = sample->raw_data;
+ unsigned long long nsecs = sample->time;
+ const char *comm = thread__comm_str(al->thread);
+ const char *default_handler_name = "trace_unhandled";
+
+ if (!event) {
+ snprintf(handler_name, sizeof(handler_name),
+ "ug! no event found for type %" PRIu64, (u64)evsel->attr.config);
+ Py_FatalError(handler_name);
+ }
+
+ pid = raw_field_value(event, "common_pid", data);
+
+ sprintf(handler_name, "%s__%s", event->system, event->name);
+
+ if (!test_and_set_bit(event->id, events_defined))
+ define_event_symbols(event, handler_name, event->print_fmt.args);
+
+ handler = get_handler(handler_name);
+ if (!handler) {
+ handler = get_handler(default_handler_name);
+ if (!handler)
+ return;
+ dict = PyDict_New();
+ if (!dict)
+ Py_FatalError("couldn't create Python dict");
+ }
+
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+
+ s = nsecs / NSEC_PER_SEC;
+ ns = nsecs - s * NSEC_PER_SEC;
+
+ scripting_context->event_data = data;
+ scripting_context->pevent = evsel->tp_format->pevent;
+
+ context = _PyCapsule_New(scripting_context, NULL, NULL);
+
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(handler_name));
+ PyTuple_SetItem(t, n++, context);
+
+ /* ip unwinding */
+ callchain = python_process_callchain(sample, evsel, al);
+ /* Need an additional reference for the perf_sample dict */
+ Py_INCREF(callchain);
+
+ if (!dict) {
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu));
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(s));
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(ns));
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(pid));
+ PyTuple_SetItem(t, n++, _PyUnicode_FromString(comm));
+ PyTuple_SetItem(t, n++, callchain);
+ } else {
+ pydict_set_item_string_decref(dict, "common_cpu", _PyLong_FromLong(cpu));
+ pydict_set_item_string_decref(dict, "common_s", _PyLong_FromLong(s));
+ pydict_set_item_string_decref(dict, "common_ns", _PyLong_FromLong(ns));
+ pydict_set_item_string_decref(dict, "common_pid", _PyLong_FromLong(pid));
+ pydict_set_item_string_decref(dict, "common_comm", _PyUnicode_FromString(comm));
+ pydict_set_item_string_decref(dict, "common_callchain", callchain);
+ }
+ for (field = event->format.fields; field; field = field->next) {
+ unsigned int offset, len;
+ unsigned long long val;
+
+ if (field->flags & FIELD_IS_ARRAY) {
+ offset = field->offset;
+ len = field->size;
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ val = tep_read_number(scripting_context->pevent,
+ data + offset, len);
+ offset = val;
+ len = offset >> 16;
+ offset &= 0xffff;
+ }
+ if (field->flags & FIELD_IS_STRING &&
+ is_printable_array(data + offset, len)) {
+ obj = _PyUnicode_FromString((char *) data + offset);
+ } else {
+ obj = PyByteArray_FromStringAndSize((const char *) data + offset, len);
+ field->flags &= ~FIELD_IS_STRING;
+ }
+ } else { /* FIELD_IS_NUMERIC */
+ obj = get_field_numeric_entry(event, field, data);
+ }
+ if (!dict)
+ PyTuple_SetItem(t, n++, obj);
+ else
+ pydict_set_item_string_decref(dict, field->name, obj);
+
+ }
+
+ if (dict)
+ PyTuple_SetItem(t, n++, dict);
+
+ if (get_argument_count(handler) == (int) n + 1) {
+ all_entries_dict = get_perf_sample_dict(sample, evsel, al,
+ callchain);
+ PyTuple_SetItem(t, n++, all_entries_dict);
+ } else {
+ Py_DECREF(callchain);
+ }
+
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ if (!dict)
+ call_object(handler, t, handler_name);
+ else
+ call_object(handler, t, default_handler_name);
+
+ Py_DECREF(t);
+}
+
+static PyObject *tuple_new(unsigned int sz)
+{
+ PyObject *t;
+
+ t = PyTuple_New(sz);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+ return t;
+}
+
+static int tuple_set_u64(PyObject *t, unsigned int pos, u64 val)
+{
+#if BITS_PER_LONG == 64
+ return PyTuple_SetItem(t, pos, _PyLong_FromLong(val));
+#endif
+#if BITS_PER_LONG == 32
+ return PyTuple_SetItem(t, pos, PyLong_FromLongLong(val));
+#endif
+}
+
+static int tuple_set_s32(PyObject *t, unsigned int pos, s32 val)
+{
+ return PyTuple_SetItem(t, pos, _PyLong_FromLong(val));
+}
+
+static int tuple_set_string(PyObject *t, unsigned int pos, const char *s)
+{
+ return PyTuple_SetItem(t, pos, _PyUnicode_FromString(s));
+}
+
+static int python_export_evsel(struct db_export *dbe, struct perf_evsel *evsel)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(2);
+
+ tuple_set_u64(t, 0, evsel->db_id);
+ tuple_set_string(t, 1, perf_evsel__name(evsel));
+
+ call_object(tables->evsel_handler, t, "evsel_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_machine(struct db_export *dbe,
+ struct machine *machine)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(3);
+
+ tuple_set_u64(t, 0, machine->db_id);
+ tuple_set_s32(t, 1, machine->pid);
+ tuple_set_string(t, 2, machine->root_dir ? machine->root_dir : "");
+
+ call_object(tables->machine_handler, t, "machine_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_thread(struct db_export *dbe, struct thread *thread,
+ u64 main_thread_db_id, struct machine *machine)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(5);
+
+ tuple_set_u64(t, 0, thread->db_id);
+ tuple_set_u64(t, 1, machine->db_id);
+ tuple_set_u64(t, 2, main_thread_db_id);
+ tuple_set_s32(t, 3, thread->pid_);
+ tuple_set_s32(t, 4, thread->tid);
+
+ call_object(tables->thread_handler, t, "thread_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_comm(struct db_export *dbe, struct comm *comm)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(2);
+
+ tuple_set_u64(t, 0, comm->db_id);
+ tuple_set_string(t, 1, comm__str(comm));
+
+ call_object(tables->comm_handler, t, "comm_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_comm_thread(struct db_export *dbe, u64 db_id,
+ struct comm *comm, struct thread *thread)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(3);
+
+ tuple_set_u64(t, 0, db_id);
+ tuple_set_u64(t, 1, comm->db_id);
+ tuple_set_u64(t, 2, thread->db_id);
+
+ call_object(tables->comm_thread_handler, t, "comm_thread_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_dso(struct db_export *dbe, struct dso *dso,
+ struct machine *machine)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ char sbuild_id[SBUILD_ID_SIZE];
+ PyObject *t;
+
+ build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+
+ t = tuple_new(5);
+
+ tuple_set_u64(t, 0, dso->db_id);
+ tuple_set_u64(t, 1, machine->db_id);
+ tuple_set_string(t, 2, dso->short_name);
+ tuple_set_string(t, 3, dso->long_name);
+ tuple_set_string(t, 4, sbuild_id);
+
+ call_object(tables->dso_handler, t, "dso_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_symbol(struct db_export *dbe, struct symbol *sym,
+ struct dso *dso)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ u64 *sym_db_id = symbol__priv(sym);
+ PyObject *t;
+
+ t = tuple_new(6);
+
+ tuple_set_u64(t, 0, *sym_db_id);
+ tuple_set_u64(t, 1, dso->db_id);
+ tuple_set_u64(t, 2, sym->start);
+ tuple_set_u64(t, 3, sym->end);
+ tuple_set_s32(t, 4, sym->binding);
+ tuple_set_string(t, 5, sym->name);
+
+ call_object(tables->symbol_handler, t, "symbol_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_branch_type(struct db_export *dbe, u32 branch_type,
+ const char *name)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(2);
+
+ tuple_set_s32(t, 0, branch_type);
+ tuple_set_string(t, 1, name);
+
+ call_object(tables->branch_type_handler, t, "branch_type_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_sample(struct db_export *dbe,
+ struct export_sample *es)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+
+ t = tuple_new(22);
+
+ tuple_set_u64(t, 0, es->db_id);
+ tuple_set_u64(t, 1, es->evsel->db_id);
+ tuple_set_u64(t, 2, es->al->machine->db_id);
+ tuple_set_u64(t, 3, es->al->thread->db_id);
+ tuple_set_u64(t, 4, es->comm_db_id);
+ tuple_set_u64(t, 5, es->dso_db_id);
+ tuple_set_u64(t, 6, es->sym_db_id);
+ tuple_set_u64(t, 7, es->offset);
+ tuple_set_u64(t, 8, es->sample->ip);
+ tuple_set_u64(t, 9, es->sample->time);
+ tuple_set_s32(t, 10, es->sample->cpu);
+ tuple_set_u64(t, 11, es->addr_dso_db_id);
+ tuple_set_u64(t, 12, es->addr_sym_db_id);
+ tuple_set_u64(t, 13, es->addr_offset);
+ tuple_set_u64(t, 14, es->sample->addr);
+ tuple_set_u64(t, 15, es->sample->period);
+ tuple_set_u64(t, 16, es->sample->weight);
+ tuple_set_u64(t, 17, es->sample->transaction);
+ tuple_set_u64(t, 18, es->sample->data_src);
+ tuple_set_s32(t, 19, es->sample->flags & PERF_BRANCH_MASK);
+ tuple_set_s32(t, 20, !!(es->sample->flags & PERF_IP_FLAG_IN_TX));
+ tuple_set_u64(t, 21, es->call_path_id);
+
+ call_object(tables->sample_handler, t, "sample_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_call_path(struct db_export *dbe, struct call_path *cp)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ PyObject *t;
+ u64 parent_db_id, sym_db_id;
+
+ parent_db_id = cp->parent ? cp->parent->db_id : 0;
+ sym_db_id = cp->sym ? *(u64 *)symbol__priv(cp->sym) : 0;
+
+ t = tuple_new(4);
+
+ tuple_set_u64(t, 0, cp->db_id);
+ tuple_set_u64(t, 1, parent_db_id);
+ tuple_set_u64(t, 2, sym_db_id);
+ tuple_set_u64(t, 3, cp->ip);
+
+ call_object(tables->call_path_handler, t, "call_path_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_export_call_return(struct db_export *dbe,
+ struct call_return *cr)
+{
+ struct tables *tables = container_of(dbe, struct tables, dbe);
+ u64 comm_db_id = cr->comm ? cr->comm->db_id : 0;
+ PyObject *t;
+
+ t = tuple_new(11);
+
+ tuple_set_u64(t, 0, cr->db_id);
+ tuple_set_u64(t, 1, cr->thread->db_id);
+ tuple_set_u64(t, 2, comm_db_id);
+ tuple_set_u64(t, 3, cr->cp->db_id);
+ tuple_set_u64(t, 4, cr->call_time);
+ tuple_set_u64(t, 5, cr->return_time);
+ tuple_set_u64(t, 6, cr->branch_count);
+ tuple_set_u64(t, 7, cr->call_ref);
+ tuple_set_u64(t, 8, cr->return_ref);
+ tuple_set_u64(t, 9, cr->cp->parent->db_id);
+ tuple_set_s32(t, 10, cr->flags);
+
+ call_object(tables->call_return_handler, t, "call_return_table");
+
+ Py_DECREF(t);
+
+ return 0;
+}
+
+static int python_process_call_return(struct call_return *cr, void *data)
+{
+ struct db_export *dbe = data;
+
+ return db_export__call_return(dbe, cr);
+}
+
+static void python_process_general_event(struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct addr_location *al)
+{
+ PyObject *handler, *t, *dict, *callchain;
+ static char handler_name[64];
+ unsigned n = 0;
+
+ snprintf(handler_name, sizeof(handler_name), "%s", "process_event");
+
+ handler = get_handler(handler_name);
+ if (!handler)
+ return;
+
+ /*
+ * Use the MAX_FIELDS to make the function expandable, though
+ * currently there is only one item for the tuple.
+ */
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ /* ip unwinding */
+ callchain = python_process_callchain(sample, evsel, al);
+ dict = get_perf_sample_dict(sample, evsel, al, callchain);
+
+ PyTuple_SetItem(t, n++, dict);
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
+static void python_process_event(union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct addr_location *al)
+{
+ struct tables *tables = &tables_global;
+
+ switch (evsel->attr.type) {
+ case PERF_TYPE_TRACEPOINT:
+ python_process_tracepoint(sample, evsel, al);
+ break;
+ /* Reserve for future process_hw/sw/raw APIs */
+ default:
+ if (tables->db_export_mode)
+ db_export__sample(&tables->dbe, event, sample, evsel, al);
+ else
+ python_process_general_event(sample, evsel, al);
+ }
+}
+
+static void get_handler_name(char *str, size_t size,
+ struct perf_evsel *evsel)
+{
+ char *p = str;
+
+ scnprintf(str, size, "stat__%s", perf_evsel__name(evsel));
+
+ while ((p = strchr(p, ':'))) {
+ *p = '_';
+ p++;
+ }
+}
+
+static void
+process_stat(struct perf_evsel *counter, int cpu, int thread, u64 tstamp,
+ struct perf_counts_values *count)
+{
+ PyObject *handler, *t;
+ static char handler_name[256];
+ int n = 0;
+
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ get_handler_name(handler_name, sizeof(handler_name),
+ counter);
+
+ handler = get_handler(handler_name);
+ if (!handler) {
+ pr_debug("can't find python handler %s\n", handler_name);
+ return;
+ }
+
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(cpu));
+ PyTuple_SetItem(t, n++, _PyLong_FromLong(thread));
+
+ tuple_set_u64(t, n++, tstamp);
+ tuple_set_u64(t, n++, count->val);
+ tuple_set_u64(t, n++, count->ena);
+ tuple_set_u64(t, n++, count->run);
+
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
+static void python_process_stat(struct perf_stat_config *config,
+ struct perf_evsel *counter, u64 tstamp)
+{
+ struct thread_map *threads = counter->threads;
+ struct cpu_map *cpus = counter->cpus;
+ int cpu, thread;
+
+ if (config->aggr_mode == AGGR_GLOBAL) {
+ process_stat(counter, -1, -1, tstamp,
+ &counter->counts->aggr);
+ return;
+ }
+
+ for (thread = 0; thread < threads->nr; thread++) {
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ process_stat(counter, cpus->map[cpu],
+ thread_map__pid(threads, thread), tstamp,
+ perf_counts(counter->counts, cpu, thread));
+ }
+ }
+}
+
+static void python_process_stat_interval(u64 tstamp)
+{
+ PyObject *handler, *t;
+ static const char handler_name[] = "stat__interval";
+ int n = 0;
+
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ handler = get_handler(handler_name);
+ if (!handler) {
+ pr_debug("can't find python handler %s\n", handler_name);
+ return;
+ }
+
+ tuple_set_u64(t, n++, tstamp);
+
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
+static int run_start_sub(void)
+{
+ main_module = PyImport_AddModule("__main__");
+ if (main_module == NULL)
+ return -1;
+ Py_INCREF(main_module);
+
+ main_dict = PyModule_GetDict(main_module);
+ if (main_dict == NULL)
+ goto error;
+ Py_INCREF(main_dict);
+
+ try_call_object("trace_begin", NULL);
+
+ return 0;
+
+error:
+ Py_XDECREF(main_dict);
+ Py_XDECREF(main_module);
+ return -1;
+}
+
+#define SET_TABLE_HANDLER_(name, handler_name, table_name) do { \
+ tables->handler_name = get_handler(#table_name); \
+ if (tables->handler_name) \
+ tables->dbe.export_ ## name = python_export_ ## name; \
+} while (0)
+
+#define SET_TABLE_HANDLER(name) \
+ SET_TABLE_HANDLER_(name, name ## _handler, name ## _table)
+
+static void set_table_handlers(struct tables *tables)
+{
+ const char *perf_db_export_mode = "perf_db_export_mode";
+ const char *perf_db_export_calls = "perf_db_export_calls";
+ const char *perf_db_export_callchains = "perf_db_export_callchains";
+ PyObject *db_export_mode, *db_export_calls, *db_export_callchains;
+ bool export_calls = false;
+ bool export_callchains = false;
+ int ret;
+
+ memset(tables, 0, sizeof(struct tables));
+ if (db_export__init(&tables->dbe))
+ Py_FatalError("failed to initialize export");
+
+ db_export_mode = PyDict_GetItemString(main_dict, perf_db_export_mode);
+ if (!db_export_mode)
+ return;
+
+ ret = PyObject_IsTrue(db_export_mode);
+ if (ret == -1)
+ handler_call_die(perf_db_export_mode);
+ if (!ret)
+ return;
+
+ /* handle export calls */
+ tables->dbe.crp = NULL;
+ db_export_calls = PyDict_GetItemString(main_dict, perf_db_export_calls);
+ if (db_export_calls) {
+ ret = PyObject_IsTrue(db_export_calls);
+ if (ret == -1)
+ handler_call_die(perf_db_export_calls);
+ export_calls = !!ret;
+ }
+
+ if (export_calls) {
+ tables->dbe.crp =
+ call_return_processor__new(python_process_call_return,
+ &tables->dbe);
+ if (!tables->dbe.crp)
+ Py_FatalError("failed to create calls processor");
+ }
+
+ /* handle export callchains */
+ tables->dbe.cpr = NULL;
+ db_export_callchains = PyDict_GetItemString(main_dict,
+ perf_db_export_callchains);
+ if (db_export_callchains) {
+ ret = PyObject_IsTrue(db_export_callchains);
+ if (ret == -1)
+ handler_call_die(perf_db_export_callchains);
+ export_callchains = !!ret;
+ }
+
+ if (export_callchains) {
+ /*
+ * Attempt to use the call path root from the call return
+ * processor, if the call return processor is in use. Otherwise,
+ * we allocate a new call path root. This prevents exporting
+ * duplicate call path ids when both are in use simultaniously.
+ */
+ if (tables->dbe.crp)
+ tables->dbe.cpr = tables->dbe.crp->cpr;
+ else
+ tables->dbe.cpr = call_path_root__new();
+
+ if (!tables->dbe.cpr)
+ Py_FatalError("failed to create call path root");
+ }
+
+ tables->db_export_mode = true;
+ /*
+ * Reserve per symbol space for symbol->db_id via symbol__priv()
+ */
+ symbol_conf.priv_size = sizeof(u64);
+
+ SET_TABLE_HANDLER(evsel);
+ SET_TABLE_HANDLER(machine);
+ SET_TABLE_HANDLER(thread);
+ SET_TABLE_HANDLER(comm);
+ SET_TABLE_HANDLER(comm_thread);
+ SET_TABLE_HANDLER(dso);
+ SET_TABLE_HANDLER(symbol);
+ SET_TABLE_HANDLER(branch_type);
+ SET_TABLE_HANDLER(sample);
+ SET_TABLE_HANDLER(call_path);
+ SET_TABLE_HANDLER(call_return);
+}
+
+#if PY_MAJOR_VERSION < 3
+static void _free_command_line(const char **command_line, int num)
+{
+ free(command_line);
+}
+#else
+static void _free_command_line(wchar_t **command_line, int num)
+{
+ int i;
+ for (i = 0; i < num; i++)
+ PyMem_RawFree(command_line[i]);
+ free(command_line);
+}
+#endif
+
+
+/*
+ * Start trace script
+ */
+static int python_start_script(const char *script, int argc, const char **argv)
+{
+ struct tables *tables = &tables_global;
+#if PY_MAJOR_VERSION < 3
+ const char **command_line;
+#else
+ wchar_t **command_line;
+#endif
+ /*
+ * Use a non-const name variable to cope with python 2.6's
+ * PyImport_AppendInittab prototype
+ */
+ char buf[PATH_MAX], name[19] = "perf_trace_context";
+ int i, err = 0;
+ FILE *fp;
+
+#if PY_MAJOR_VERSION < 3
+ command_line = malloc((argc + 1) * sizeof(const char *));
+ command_line[0] = script;
+ for (i = 1; i < argc + 1; i++)
+ command_line[i] = argv[i - 1];
+ PyImport_AppendInittab(name, initperf_trace_context);
+#else
+ command_line = malloc((argc + 1) * sizeof(wchar_t *));
+ command_line[0] = Py_DecodeLocale(script, NULL);
+ for (i = 1; i < argc + 1; i++)
+ command_line[i] = Py_DecodeLocale(argv[i - 1], NULL);
+ PyImport_AppendInittab(name, PyInit_perf_trace_context);
+#endif
+ Py_Initialize();
+
+#if PY_MAJOR_VERSION < 3
+ PySys_SetArgv(argc + 1, (char **)command_line);
+#else
+ PySys_SetArgv(argc + 1, command_line);
+#endif
+
+ fp = fopen(script, "r");
+ if (!fp) {
+ sprintf(buf, "Can't open python script \"%s\"", script);
+ perror(buf);
+ err = -1;
+ goto error;
+ }
+
+ err = PyRun_SimpleFile(fp, script);
+ if (err) {
+ fprintf(stderr, "Error running python script %s\n", script);
+ goto error;
+ }
+
+ err = run_start_sub();
+ if (err) {
+ fprintf(stderr, "Error starting python script %s\n", script);
+ goto error;
+ }
+
+ set_table_handlers(tables);
+
+ if (tables->db_export_mode) {
+ err = db_export__branch_types(&tables->dbe);
+ if (err)
+ goto error;
+ }
+
+ _free_command_line(command_line, argc + 1);
+
+ return err;
+error:
+ Py_Finalize();
+ _free_command_line(command_line, argc + 1);
+
+ return err;
+}
+
+static int python_flush_script(void)
+{
+ struct tables *tables = &tables_global;
+
+ return db_export__flush(&tables->dbe);
+}
+
+/*
+ * Stop trace script
+ */
+static int python_stop_script(void)
+{
+ struct tables *tables = &tables_global;
+
+ try_call_object("trace_end", NULL);
+
+ db_export__exit(&tables->dbe);
+
+ Py_XDECREF(main_dict);
+ Py_XDECREF(main_module);
+ Py_Finalize();
+
+ return 0;
+}
+
+static int python_generate_script(struct tep_handle *pevent, const char *outfile)
+{
+ struct event_format *event = NULL;
+ struct format_field *f;
+ char fname[PATH_MAX];
+ int not_first, count;
+ FILE *ofp;
+
+ sprintf(fname, "%s.py", outfile);
+ ofp = fopen(fname, "w");
+ if (ofp == NULL) {
+ fprintf(stderr, "couldn't open %s\n", fname);
+ return -1;
+ }
+ fprintf(ofp, "# perf script event handlers, "
+ "generated by perf script -g python\n");
+
+ fprintf(ofp, "# Licensed under the terms of the GNU GPL"
+ " License version 2\n\n");
+
+ fprintf(ofp, "# The common_* event handler fields are the most useful "
+ "fields common to\n");
+
+ fprintf(ofp, "# all events. They don't necessarily correspond to "
+ "the 'common_*' fields\n");
+
+ fprintf(ofp, "# in the format files. Those fields not available as "
+ "handler params can\n");
+
+ fprintf(ofp, "# be retrieved using Python functions of the form "
+ "common_*(context).\n");
+
+ fprintf(ofp, "# See the perf-script-python Documentation for the list "
+ "of available functions.\n\n");
+
+ fprintf(ofp, "from __future__ import print_function\n\n");
+ fprintf(ofp, "import os\n");
+ fprintf(ofp, "import sys\n\n");
+
+ fprintf(ofp, "sys.path.append(os.environ['PERF_EXEC_PATH'] + \\\n");
+ fprintf(ofp, "\t'/scripts/python/Perf-Trace-Util/lib/Perf/Trace')\n");
+ fprintf(ofp, "\nfrom perf_trace_context import *\n");
+ fprintf(ofp, "from Core import *\n\n\n");
+
+ fprintf(ofp, "def trace_begin():\n");
+ fprintf(ofp, "\tprint(\"in trace_begin\")\n\n");
+
+ fprintf(ofp, "def trace_end():\n");
+ fprintf(ofp, "\tprint(\"in trace_end\")\n\n");
+
+ while ((event = trace_find_next_event(pevent, event))) {
+ fprintf(ofp, "def %s__%s(", event->system, event->name);
+ fprintf(ofp, "event_name, ");
+ fprintf(ofp, "context, ");
+ fprintf(ofp, "common_cpu,\n");
+ fprintf(ofp, "\tcommon_secs, ");
+ fprintf(ofp, "common_nsecs, ");
+ fprintf(ofp, "common_pid, ");
+ fprintf(ofp, "common_comm,\n\t");
+ fprintf(ofp, "common_callchain, ");
+
+ not_first = 0;
+ count = 0;
+
+ for (f = event->format.fields; f; f = f->next) {
+ if (not_first++)
+ fprintf(ofp, ", ");
+ if (++count % 5 == 0)
+ fprintf(ofp, "\n\t");
+
+ fprintf(ofp, "%s", f->name);
+ }
+ if (not_first++)
+ fprintf(ofp, ", ");
+ if (++count % 5 == 0)
+ fprintf(ofp, "\n\t\t");
+ fprintf(ofp, "perf_sample_dict");
+
+ fprintf(ofp, "):\n");
+
+ fprintf(ofp, "\t\tprint_header(event_name, common_cpu, "
+ "common_secs, common_nsecs,\n\t\t\t"
+ "common_pid, common_comm)\n\n");
+
+ fprintf(ofp, "\t\tprint(\"");
+
+ not_first = 0;
+ count = 0;
+
+ for (f = event->format.fields; f; f = f->next) {
+ if (not_first++)
+ fprintf(ofp, ", ");
+ if (count && count % 3 == 0) {
+ fprintf(ofp, "\" \\\n\t\t\"");
+ }
+ count++;
+
+ fprintf(ofp, "%s=", f->name);
+ if (f->flags & FIELD_IS_STRING ||
+ f->flags & FIELD_IS_FLAG ||
+ f->flags & FIELD_IS_ARRAY ||
+ f->flags & FIELD_IS_SYMBOLIC)
+ fprintf(ofp, "%%s");
+ else if (f->flags & FIELD_IS_SIGNED)
+ fprintf(ofp, "%%d");
+ else
+ fprintf(ofp, "%%u");
+ }
+
+ fprintf(ofp, "\" %% \\\n\t\t(");
+
+ not_first = 0;
+ count = 0;
+
+ for (f = event->format.fields; f; f = f->next) {
+ if (not_first++)
+ fprintf(ofp, ", ");
+
+ if (++count % 5 == 0)
+ fprintf(ofp, "\n\t\t");
+
+ if (f->flags & FIELD_IS_FLAG) {
+ if ((count - 1) % 5 != 0) {
+ fprintf(ofp, "\n\t\t");
+ count = 4;
+ }
+ fprintf(ofp, "flag_str(\"");
+ fprintf(ofp, "%s__%s\", ", event->system,
+ event->name);
+ fprintf(ofp, "\"%s\", %s)", f->name,
+ f->name);
+ } else if (f->flags & FIELD_IS_SYMBOLIC) {
+ if ((count - 1) % 5 != 0) {
+ fprintf(ofp, "\n\t\t");
+ count = 4;
+ }
+ fprintf(ofp, "symbol_str(\"");
+ fprintf(ofp, "%s__%s\", ", event->system,
+ event->name);
+ fprintf(ofp, "\"%s\", %s)", f->name,
+ f->name);
+ } else
+ fprintf(ofp, "%s", f->name);
+ }
+
+ fprintf(ofp, "))\n\n");
+
+ fprintf(ofp, "\t\tprint('Sample: {'+"
+ "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n");
+
+ fprintf(ofp, "\t\tfor node in common_callchain:");
+ fprintf(ofp, "\n\t\t\tif 'sym' in node:");
+ fprintf(ofp, "\n\t\t\t\tprint(\"\\t[%%x] %%s\" %% (node['ip'], node['sym']['name']))");
+ fprintf(ofp, "\n\t\t\telse:");
+ fprintf(ofp, "\n\t\t\t\tprint(\"\t[%%x]\" %% (node['ip']))\n\n");
+ fprintf(ofp, "\t\tprint()\n\n");
+
+ }
+
+ fprintf(ofp, "def trace_unhandled(event_name, context, "
+ "event_fields_dict, perf_sample_dict):\n");
+
+ fprintf(ofp, "\t\tprint(get_dict_as_string(event_fields_dict))\n");
+ fprintf(ofp, "\t\tprint('Sample: {'+"
+ "get_dict_as_string(perf_sample_dict['sample'], ', ')+'}')\n\n");
+
+ fprintf(ofp, "def print_header("
+ "event_name, cpu, secs, nsecs, pid, comm):\n"
+ "\tprint(\"%%-20s %%5u %%05u.%%09u %%8u %%-20s \" %% \\\n\t"
+ "(event_name, cpu, secs, nsecs, pid, comm), end=\"\")\n\n");
+
+ fprintf(ofp, "def get_dict_as_string(a_dict, delimiter=' '):\n"
+ "\treturn delimiter.join"
+ "(['%%s=%%s'%%(k,str(v))for k,v in sorted(a_dict.items())])\n");
+
+ fclose(ofp);
+
+ fprintf(stderr, "generated Python script: %s\n", fname);
+
+ return 0;
+}
+
+struct scripting_ops python_scripting_ops = {
+ .name = "Python",
+ .start_script = python_start_script,
+ .flush_script = python_flush_script,
+ .stop_script = python_stop_script,
+ .process_event = python_process_event,
+ .process_stat = python_process_stat,
+ .process_stat_interval = python_process_stat_interval,
+ .generate_script = python_generate_script,
+};
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
new file mode 100644
index 000000000..3be1534f1
--- /dev/null
+++ b/tools/perf/util/session.c
@@ -0,0 +1,2251 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+#include <traceevent/event-parse.h>
+#include <api/fs/fs.h>
+
+#include <byteswap.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+
+#include "evlist.h"
+#include "evsel.h"
+#include "memswap.h"
+#include "session.h"
+#include "tool.h"
+#include "sort.h"
+#include "util.h"
+#include "cpumap.h"
+#include "perf_regs.h"
+#include "asm/bug.h"
+#include "auxtrace.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "stat.h"
+#include "arch/common.h"
+
+static int perf_session__deliver_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool,
+ u64 file_offset);
+
+static int perf_session__open(struct perf_session *session)
+{
+ struct perf_data *data = session->data;
+
+ if (perf_session__read_header(session) < 0) {
+ pr_err("incompatible file format (rerun with -v to learn more)\n");
+ return -1;
+ }
+
+ if (perf_data__is_pipe(data))
+ return 0;
+
+ if (perf_header__has_feat(&session->header, HEADER_STAT))
+ return 0;
+
+ if (!perf_evlist__valid_sample_type(session->evlist)) {
+ pr_err("non matching sample_type\n");
+ return -1;
+ }
+
+ if (!perf_evlist__valid_sample_id_all(session->evlist)) {
+ pr_err("non matching sample_id_all\n");
+ return -1;
+ }
+
+ if (!perf_evlist__valid_read_format(session->evlist)) {
+ pr_err("non matching read_format\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+void perf_session__set_id_hdr_size(struct perf_session *session)
+{
+ u16 id_hdr_size = perf_evlist__id_hdr_size(session->evlist);
+
+ machines__set_id_hdr_size(&session->machines, id_hdr_size);
+}
+
+int perf_session__create_kernel_maps(struct perf_session *session)
+{
+ int ret = machine__create_kernel_maps(&session->machines.host);
+
+ if (ret >= 0)
+ ret = machines__create_guest_kernel_maps(&session->machines);
+ return ret;
+}
+
+static void perf_session__destroy_kernel_maps(struct perf_session *session)
+{
+ machines__destroy_kernel_maps(&session->machines);
+}
+
+static bool perf_session__has_comm_exec(struct perf_session *session)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->attr.comm_exec)
+ return true;
+ }
+
+ return false;
+}
+
+static void perf_session__set_comm_exec(struct perf_session *session)
+{
+ bool comm_exec = perf_session__has_comm_exec(session);
+
+ machines__set_comm_exec(&session->machines, comm_exec);
+}
+
+static int ordered_events__deliver_event(struct ordered_events *oe,
+ struct ordered_event *event)
+{
+ struct perf_session *session = container_of(oe, struct perf_session,
+ ordered_events);
+
+ return perf_session__deliver_event(session, event->event,
+ session->tool, event->file_offset);
+}
+
+struct perf_session *perf_session__new(struct perf_data *data,
+ bool repipe, struct perf_tool *tool)
+{
+ struct perf_session *session = zalloc(sizeof(*session));
+
+ if (!session)
+ goto out;
+
+ session->repipe = repipe;
+ session->tool = tool;
+ INIT_LIST_HEAD(&session->auxtrace_index);
+ machines__init(&session->machines);
+ ordered_events__init(&session->ordered_events, ordered_events__deliver_event);
+
+ if (data) {
+ if (perf_data__open(data))
+ goto out_delete;
+
+ session->data = data;
+
+ if (perf_data__is_read(data)) {
+ if (perf_session__open(session) < 0)
+ goto out_close;
+
+ /*
+ * set session attributes that are present in perf.data
+ * but not in pipe-mode.
+ */
+ if (!data->is_pipe) {
+ perf_session__set_id_hdr_size(session);
+ perf_session__set_comm_exec(session);
+ }
+ }
+ } else {
+ session->machines.host.env = &perf_env;
+ }
+
+ session->machines.host.single_address_space =
+ perf_env__single_address_space(session->machines.host.env);
+
+ if (!data || perf_data__is_write(data)) {
+ /*
+ * In O_RDONLY mode this will be performed when reading the
+ * kernel MMAP event, in perf_event__process_mmap().
+ */
+ if (perf_session__create_kernel_maps(session) < 0)
+ pr_warning("Cannot read kernel map\n");
+ }
+
+ /*
+ * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is
+ * processed, so perf_evlist__sample_id_all is not meaningful here.
+ */
+ if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
+ tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) {
+ dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
+ tool->ordered_events = false;
+ }
+
+ return session;
+
+ out_close:
+ perf_data__close(data);
+ out_delete:
+ perf_session__delete(session);
+ out:
+ return NULL;
+}
+
+static void perf_session__delete_threads(struct perf_session *session)
+{
+ machine__delete_threads(&session->machines.host);
+}
+
+void perf_session__delete(struct perf_session *session)
+{
+ if (session == NULL)
+ return;
+ auxtrace__free(session);
+ auxtrace_index__free(&session->auxtrace_index);
+ perf_session__destroy_kernel_maps(session);
+ perf_session__delete_threads(session);
+ perf_env__exit(&session->header.env);
+ machines__exit(&session->machines);
+ if (session->data)
+ perf_data__close(session->data);
+ free(session);
+}
+
+static int process_event_synth_tracing_data_stub(struct perf_tool *tool
+ __maybe_unused,
+ union perf_event *event
+ __maybe_unused,
+ struct perf_session *session
+ __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_synth_attr_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_evlist **pevlist
+ __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_synth_event_update_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_evlist **pevlist
+ __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_event_update(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_sample_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_event_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_finished_round_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct ordered_events *oe __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_finished_round(struct perf_tool *tool,
+ union perf_event *event,
+ struct ordered_events *oe);
+
+static int skipn(int fd, off_t n)
+{
+ char buf[4096];
+ ssize_t ret;
+
+ while (n > 0) {
+ ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
+ if (ret <= 0)
+ return ret;
+ n -= ret;
+ }
+
+ return 0;
+}
+
+static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session
+ __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ if (perf_data__is_pipe(session->data))
+ skipn(perf_data__fd(session->data), event->auxtrace.size);
+ return event->auxtrace.size;
+}
+
+static int process_event_op2_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+
+static
+int process_event_thread_map_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_thread_map(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static
+int process_event_cpu_map_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_cpu_map(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static
+int process_event_stat_config_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *session __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat_config(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_stat_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *perf_session
+ __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+static int process_stat_round_stub(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_session *perf_session
+ __maybe_unused)
+{
+ if (dump_trace)
+ perf_event__fprintf_stat_round(event, stdout);
+
+ dump_printf(": unhandled!\n");
+ return 0;
+}
+
+void perf_tool__fill_defaults(struct perf_tool *tool)
+{
+ if (tool->sample == NULL)
+ tool->sample = process_event_sample_stub;
+ if (tool->mmap == NULL)
+ tool->mmap = process_event_stub;
+ if (tool->mmap2 == NULL)
+ tool->mmap2 = process_event_stub;
+ if (tool->comm == NULL)
+ tool->comm = process_event_stub;
+ if (tool->namespaces == NULL)
+ tool->namespaces = process_event_stub;
+ if (tool->fork == NULL)
+ tool->fork = process_event_stub;
+ if (tool->exit == NULL)
+ tool->exit = process_event_stub;
+ if (tool->lost == NULL)
+ tool->lost = perf_event__process_lost;
+ if (tool->lost_samples == NULL)
+ tool->lost_samples = perf_event__process_lost_samples;
+ if (tool->aux == NULL)
+ tool->aux = perf_event__process_aux;
+ if (tool->itrace_start == NULL)
+ tool->itrace_start = perf_event__process_itrace_start;
+ if (tool->context_switch == NULL)
+ tool->context_switch = perf_event__process_switch;
+ if (tool->read == NULL)
+ tool->read = process_event_sample_stub;
+ if (tool->throttle == NULL)
+ tool->throttle = process_event_stub;
+ if (tool->unthrottle == NULL)
+ tool->unthrottle = process_event_stub;
+ if (tool->attr == NULL)
+ tool->attr = process_event_synth_attr_stub;
+ if (tool->event_update == NULL)
+ tool->event_update = process_event_synth_event_update_stub;
+ if (tool->tracing_data == NULL)
+ tool->tracing_data = process_event_synth_tracing_data_stub;
+ if (tool->build_id == NULL)
+ tool->build_id = process_event_op2_stub;
+ if (tool->finished_round == NULL) {
+ if (tool->ordered_events)
+ tool->finished_round = process_finished_round;
+ else
+ tool->finished_round = process_finished_round_stub;
+ }
+ if (tool->id_index == NULL)
+ tool->id_index = process_event_op2_stub;
+ if (tool->auxtrace_info == NULL)
+ tool->auxtrace_info = process_event_op2_stub;
+ if (tool->auxtrace == NULL)
+ tool->auxtrace = process_event_auxtrace_stub;
+ if (tool->auxtrace_error == NULL)
+ tool->auxtrace_error = process_event_op2_stub;
+ if (tool->thread_map == NULL)
+ tool->thread_map = process_event_thread_map_stub;
+ if (tool->cpu_map == NULL)
+ tool->cpu_map = process_event_cpu_map_stub;
+ if (tool->stat_config == NULL)
+ tool->stat_config = process_event_stat_config_stub;
+ if (tool->stat == NULL)
+ tool->stat = process_stat_stub;
+ if (tool->stat_round == NULL)
+ tool->stat_round = process_stat_round_stub;
+ if (tool->time_conv == NULL)
+ tool->time_conv = process_event_op2_stub;
+ if (tool->feature == NULL)
+ tool->feature = process_event_op2_stub;
+}
+
+static void swap_sample_id_all(union perf_event *event, void *data)
+{
+ void *end = (void *) event + event->header.size;
+ int size = end - data;
+
+ BUG_ON(size % sizeof(u64));
+ mem_bswap_64(data, size);
+}
+
+static void perf_event__all64_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ struct perf_event_header *hdr = &event->header;
+ mem_bswap_64(hdr + 1, event->header.size - sizeof(*hdr));
+}
+
+static void perf_event__comm_swap(union perf_event *event, bool sample_id_all)
+{
+ event->comm.pid = bswap_32(event->comm.pid);
+ event->comm.tid = bswap_32(event->comm.tid);
+
+ if (sample_id_all) {
+ void *data = &event->comm.comm;
+
+ data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+ swap_sample_id_all(event, data);
+ }
+}
+
+static void perf_event__mmap_swap(union perf_event *event,
+ bool sample_id_all)
+{
+ event->mmap.pid = bswap_32(event->mmap.pid);
+ event->mmap.tid = bswap_32(event->mmap.tid);
+ event->mmap.start = bswap_64(event->mmap.start);
+ event->mmap.len = bswap_64(event->mmap.len);
+ event->mmap.pgoff = bswap_64(event->mmap.pgoff);
+
+ if (sample_id_all) {
+ void *data = &event->mmap.filename;
+
+ data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+ swap_sample_id_all(event, data);
+ }
+}
+
+static void perf_event__mmap2_swap(union perf_event *event,
+ bool sample_id_all)
+{
+ event->mmap2.pid = bswap_32(event->mmap2.pid);
+ event->mmap2.tid = bswap_32(event->mmap2.tid);
+ event->mmap2.start = bswap_64(event->mmap2.start);
+ event->mmap2.len = bswap_64(event->mmap2.len);
+ event->mmap2.pgoff = bswap_64(event->mmap2.pgoff);
+ event->mmap2.maj = bswap_32(event->mmap2.maj);
+ event->mmap2.min = bswap_32(event->mmap2.min);
+ event->mmap2.ino = bswap_64(event->mmap2.ino);
+ event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation);
+
+ if (sample_id_all) {
+ void *data = &event->mmap2.filename;
+
+ data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+ swap_sample_id_all(event, data);
+ }
+}
+static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
+{
+ event->fork.pid = bswap_32(event->fork.pid);
+ event->fork.tid = bswap_32(event->fork.tid);
+ event->fork.ppid = bswap_32(event->fork.ppid);
+ event->fork.ptid = bswap_32(event->fork.ptid);
+ event->fork.time = bswap_64(event->fork.time);
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->fork + 1);
+}
+
+static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
+{
+ event->read.pid = bswap_32(event->read.pid);
+ event->read.tid = bswap_32(event->read.tid);
+ event->read.value = bswap_64(event->read.value);
+ event->read.time_enabled = bswap_64(event->read.time_enabled);
+ event->read.time_running = bswap_64(event->read.time_running);
+ event->read.id = bswap_64(event->read.id);
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->read + 1);
+}
+
+static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
+{
+ event->aux.aux_offset = bswap_64(event->aux.aux_offset);
+ event->aux.aux_size = bswap_64(event->aux.aux_size);
+ event->aux.flags = bswap_64(event->aux.flags);
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->aux + 1);
+}
+
+static void perf_event__itrace_start_swap(union perf_event *event,
+ bool sample_id_all)
+{
+ event->itrace_start.pid = bswap_32(event->itrace_start.pid);
+ event->itrace_start.tid = bswap_32(event->itrace_start.tid);
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->itrace_start + 1);
+}
+
+static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
+{
+ if (event->header.type == PERF_RECORD_SWITCH_CPU_WIDE) {
+ event->context_switch.next_prev_pid =
+ bswap_32(event->context_switch.next_prev_pid);
+ event->context_switch.next_prev_tid =
+ bswap_32(event->context_switch.next_prev_tid);
+ }
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->context_switch + 1);
+}
+
+static void perf_event__throttle_swap(union perf_event *event,
+ bool sample_id_all)
+{
+ event->throttle.time = bswap_64(event->throttle.time);
+ event->throttle.id = bswap_64(event->throttle.id);
+ event->throttle.stream_id = bswap_64(event->throttle.stream_id);
+
+ if (sample_id_all)
+ swap_sample_id_all(event, &event->throttle + 1);
+}
+
+static u8 revbyte(u8 b)
+{
+ int rev = (b >> 4) | ((b & 0xf) << 4);
+ rev = ((rev & 0xcc) >> 2) | ((rev & 0x33) << 2);
+ rev = ((rev & 0xaa) >> 1) | ((rev & 0x55) << 1);
+ return (u8) rev;
+}
+
+/*
+ * XXX this is hack in attempt to carry flags bitfield
+ * through endian village. ABI says:
+ *
+ * Bit-fields are allocated from right to left (least to most significant)
+ * on little-endian implementations and from left to right (most to least
+ * significant) on big-endian implementations.
+ *
+ * The above seems to be byte specific, so we need to reverse each
+ * byte of the bitfield. 'Internet' also says this might be implementation
+ * specific and we probably need proper fix and carry perf_event_attr
+ * bitfield flags in separate data file FEAT_ section. Thought this seems
+ * to work for now.
+ */
+static void swap_bitfield(u8 *p, unsigned len)
+{
+ unsigned i;
+
+ for (i = 0; i < len; i++) {
+ *p = revbyte(*p);
+ p++;
+ }
+}
+
+/* exported for swapping attributes in file header */
+void perf_event__attr_swap(struct perf_event_attr *attr)
+{
+ attr->type = bswap_32(attr->type);
+ attr->size = bswap_32(attr->size);
+
+#define bswap_safe(f, n) \
+ (attr->size > (offsetof(struct perf_event_attr, f) + \
+ sizeof(attr->f) * (n)))
+#define bswap_field(f, sz) \
+do { \
+ if (bswap_safe(f, 0)) \
+ attr->f = bswap_##sz(attr->f); \
+} while(0)
+#define bswap_field_16(f) bswap_field(f, 16)
+#define bswap_field_32(f) bswap_field(f, 32)
+#define bswap_field_64(f) bswap_field(f, 64)
+
+ bswap_field_64(config);
+ bswap_field_64(sample_period);
+ bswap_field_64(sample_type);
+ bswap_field_64(read_format);
+ bswap_field_32(wakeup_events);
+ bswap_field_32(bp_type);
+ bswap_field_64(bp_addr);
+ bswap_field_64(bp_len);
+ bswap_field_64(branch_sample_type);
+ bswap_field_64(sample_regs_user);
+ bswap_field_32(sample_stack_user);
+ bswap_field_32(aux_watermark);
+ bswap_field_16(sample_max_stack);
+
+ /*
+ * After read_format are bitfields. Check read_format because
+ * we are unable to use offsetof on bitfield.
+ */
+ if (bswap_safe(read_format, 1))
+ swap_bitfield((u8 *) (&attr->read_format + 1),
+ sizeof(u64));
+#undef bswap_field_64
+#undef bswap_field_32
+#undef bswap_field
+#undef bswap_safe
+}
+
+static void perf_event__hdr_attr_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ size_t size;
+
+ perf_event__attr_swap(&event->attr.attr);
+
+ size = event->header.size;
+ size -= (void *)&event->attr.id - (void *)event;
+ mem_bswap_64(event->attr.id, size);
+}
+
+static void perf_event__event_update_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->event_update.type = bswap_64(event->event_update.type);
+ event->event_update.id = bswap_64(event->event_update.id);
+}
+
+static void perf_event__event_type_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->event_type.event_type.event_id =
+ bswap_64(event->event_type.event_type.event_id);
+}
+
+static void perf_event__tracing_data_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->tracing_data.size = bswap_32(event->tracing_data.size);
+}
+
+static void perf_event__auxtrace_info_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ size_t size;
+
+ event->auxtrace_info.type = bswap_32(event->auxtrace_info.type);
+
+ size = event->header.size;
+ size -= (void *)&event->auxtrace_info.priv - (void *)event;
+ mem_bswap_64(event->auxtrace_info.priv, size);
+}
+
+static void perf_event__auxtrace_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->auxtrace.size = bswap_64(event->auxtrace.size);
+ event->auxtrace.offset = bswap_64(event->auxtrace.offset);
+ event->auxtrace.reference = bswap_64(event->auxtrace.reference);
+ event->auxtrace.idx = bswap_32(event->auxtrace.idx);
+ event->auxtrace.tid = bswap_32(event->auxtrace.tid);
+ event->auxtrace.cpu = bswap_32(event->auxtrace.cpu);
+}
+
+static void perf_event__auxtrace_error_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->auxtrace_error.type = bswap_32(event->auxtrace_error.type);
+ event->auxtrace_error.code = bswap_32(event->auxtrace_error.code);
+ event->auxtrace_error.cpu = bswap_32(event->auxtrace_error.cpu);
+ event->auxtrace_error.pid = bswap_32(event->auxtrace_error.pid);
+ event->auxtrace_error.tid = bswap_32(event->auxtrace_error.tid);
+ event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip);
+}
+
+static void perf_event__thread_map_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ unsigned i;
+
+ event->thread_map.nr = bswap_64(event->thread_map.nr);
+
+ for (i = 0; i < event->thread_map.nr; i++)
+ event->thread_map.entries[i].pid = bswap_64(event->thread_map.entries[i].pid);
+}
+
+static void perf_event__cpu_map_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ struct cpu_map_data *data = &event->cpu_map.data;
+ struct cpu_map_entries *cpus;
+ struct cpu_map_mask *mask;
+ unsigned i;
+
+ data->type = bswap_64(data->type);
+
+ switch (data->type) {
+ case PERF_CPU_MAP__CPUS:
+ cpus = (struct cpu_map_entries *)data->data;
+
+ cpus->nr = bswap_16(cpus->nr);
+
+ for (i = 0; i < cpus->nr; i++)
+ cpus->cpu[i] = bswap_16(cpus->cpu[i]);
+ break;
+ case PERF_CPU_MAP__MASK:
+ mask = (struct cpu_map_mask *) data->data;
+
+ mask->nr = bswap_16(mask->nr);
+ mask->long_size = bswap_16(mask->long_size);
+
+ switch (mask->long_size) {
+ case 4: mem_bswap_32(&mask->mask, mask->nr); break;
+ case 8: mem_bswap_64(&mask->mask, mask->nr); break;
+ default:
+ pr_err("cpu_map swap: unsupported long size\n");
+ }
+ default:
+ break;
+ }
+}
+
+static void perf_event__stat_config_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ u64 size;
+
+ size = event->stat_config.nr * sizeof(event->stat_config.data[0]);
+ size += 1; /* nr item itself */
+ mem_bswap_64(&event->stat_config.nr, size);
+}
+
+static void perf_event__stat_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->stat.id = bswap_64(event->stat.id);
+ event->stat.thread = bswap_32(event->stat.thread);
+ event->stat.cpu = bswap_32(event->stat.cpu);
+ event->stat.val = bswap_64(event->stat.val);
+ event->stat.ena = bswap_64(event->stat.ena);
+ event->stat.run = bswap_64(event->stat.run);
+}
+
+static void perf_event__stat_round_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
+{
+ event->stat_round.type = bswap_64(event->stat_round.type);
+ event->stat_round.time = bswap_64(event->stat_round.time);
+}
+
+typedef void (*perf_event__swap_op)(union perf_event *event,
+ bool sample_id_all);
+
+static perf_event__swap_op perf_event__swap_ops[] = {
+ [PERF_RECORD_MMAP] = perf_event__mmap_swap,
+ [PERF_RECORD_MMAP2] = perf_event__mmap2_swap,
+ [PERF_RECORD_COMM] = perf_event__comm_swap,
+ [PERF_RECORD_FORK] = perf_event__task_swap,
+ [PERF_RECORD_EXIT] = perf_event__task_swap,
+ [PERF_RECORD_LOST] = perf_event__all64_swap,
+ [PERF_RECORD_READ] = perf_event__read_swap,
+ [PERF_RECORD_THROTTLE] = perf_event__throttle_swap,
+ [PERF_RECORD_UNTHROTTLE] = perf_event__throttle_swap,
+ [PERF_RECORD_SAMPLE] = perf_event__all64_swap,
+ [PERF_RECORD_AUX] = perf_event__aux_swap,
+ [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap,
+ [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap,
+ [PERF_RECORD_SWITCH] = perf_event__switch_swap,
+ [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap,
+ [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
+ [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
+ [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
+ [PERF_RECORD_HEADER_BUILD_ID] = NULL,
+ [PERF_RECORD_ID_INDEX] = perf_event__all64_swap,
+ [PERF_RECORD_AUXTRACE_INFO] = perf_event__auxtrace_info_swap,
+ [PERF_RECORD_AUXTRACE] = perf_event__auxtrace_swap,
+ [PERF_RECORD_AUXTRACE_ERROR] = perf_event__auxtrace_error_swap,
+ [PERF_RECORD_THREAD_MAP] = perf_event__thread_map_swap,
+ [PERF_RECORD_CPU_MAP] = perf_event__cpu_map_swap,
+ [PERF_RECORD_STAT_CONFIG] = perf_event__stat_config_swap,
+ [PERF_RECORD_STAT] = perf_event__stat_swap,
+ [PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap,
+ [PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap,
+ [PERF_RECORD_TIME_CONV] = perf_event__all64_swap,
+ [PERF_RECORD_HEADER_MAX] = NULL,
+};
+
+/*
+ * When perf record finishes a pass on every buffers, it records this pseudo
+ * event.
+ * We record the max timestamp t found in the pass n.
+ * Assuming these timestamps are monotonic across cpus, we know that if
+ * a buffer still has events with timestamps below t, they will be all
+ * available and then read in the pass n + 1.
+ * Hence when we start to read the pass n + 2, we can safely flush every
+ * events with timestamps below t.
+ *
+ * ============ PASS n =================
+ * CPU 0 | CPU 1
+ * |
+ * cnt1 timestamps | cnt2 timestamps
+ * 1 | 2
+ * 2 | 3
+ * - | 4 <--- max recorded
+ *
+ * ============ PASS n + 1 ==============
+ * CPU 0 | CPU 1
+ * |
+ * cnt1 timestamps | cnt2 timestamps
+ * 3 | 5
+ * 4 | 6
+ * 5 | 7 <---- max recorded
+ *
+ * Flush every events below timestamp 4
+ *
+ * ============ PASS n + 2 ==============
+ * CPU 0 | CPU 1
+ * |
+ * cnt1 timestamps | cnt2 timestamps
+ * 6 | 8
+ * 7 | 9
+ * - | 10
+ *
+ * Flush every events below timestamp 7
+ * etc...
+ */
+static int process_finished_round(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct ordered_events *oe)
+{
+ if (dump_trace)
+ fprintf(stdout, "\n");
+ return ordered_events__flush(oe, OE_FLUSH__ROUND);
+}
+
+int perf_session__queue_event(struct perf_session *s, union perf_event *event,
+ u64 timestamp, u64 file_offset)
+{
+ return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset);
+}
+
+static void callchain__lbr_callstack_printf(struct perf_sample *sample)
+{
+ struct ip_callchain *callchain = sample->callchain;
+ struct branch_stack *lbr_stack = sample->branch_stack;
+ u64 kernel_callchain_nr = callchain->nr;
+ unsigned int i;
+
+ for (i = 0; i < kernel_callchain_nr; i++) {
+ if (callchain->ips[i] == PERF_CONTEXT_USER)
+ break;
+ }
+
+ if ((i != kernel_callchain_nr) && lbr_stack->nr) {
+ u64 total_nr;
+ /*
+ * LBR callstack can only get user call chain,
+ * i is kernel call chain number,
+ * 1 is PERF_CONTEXT_USER.
+ *
+ * The user call chain is stored in LBR registers.
+ * LBR are pair registers. The caller is stored
+ * in "from" register, while the callee is stored
+ * in "to" register.
+ * For example, there is a call stack
+ * "A"->"B"->"C"->"D".
+ * The LBR registers will recorde like
+ * "C"->"D", "B"->"C", "A"->"B".
+ * So only the first "to" register and all "from"
+ * registers are needed to construct the whole stack.
+ */
+ total_nr = i + 1 + lbr_stack->nr + 1;
+ kernel_callchain_nr = i + 1;
+
+ printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr);
+
+ for (i = 0; i < kernel_callchain_nr; i++)
+ printf("..... %2d: %016" PRIx64 "\n",
+ i, callchain->ips[i]);
+
+ printf("..... %2d: %016" PRIx64 "\n",
+ (int)(kernel_callchain_nr), lbr_stack->entries[0].to);
+ for (i = 0; i < lbr_stack->nr; i++)
+ printf("..... %2d: %016" PRIx64 "\n",
+ (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from);
+ }
+}
+
+static void callchain__printf(struct perf_evsel *evsel,
+ struct perf_sample *sample)
+{
+ unsigned int i;
+ struct ip_callchain *callchain = sample->callchain;
+
+ if (perf_evsel__has_branch_callstack(evsel))
+ callchain__lbr_callstack_printf(sample);
+
+ printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
+
+ for (i = 0; i < callchain->nr; i++)
+ printf("..... %2d: %016" PRIx64 "\n",
+ i, callchain->ips[i]);
+}
+
+static void branch_stack__printf(struct perf_sample *sample)
+{
+ uint64_t i;
+
+ printf("... branch stack: nr:%" PRIu64 "\n", sample->branch_stack->nr);
+
+ for (i = 0; i < sample->branch_stack->nr; i++) {
+ struct branch_entry *e = &sample->branch_stack->entries[i];
+
+ printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
+ i, e->from, e->to,
+ (unsigned short)e->flags.cycles,
+ e->flags.mispred ? "M" : " ",
+ e->flags.predicted ? "P" : " ",
+ e->flags.abort ? "A" : " ",
+ e->flags.in_tx ? "T" : " ",
+ (unsigned)e->flags.reserved);
+ }
+}
+
+static void regs_dump__printf(u64 mask, u64 *regs)
+{
+ unsigned rid, i = 0;
+
+ for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
+ u64 val = regs[i++];
+
+ printf(".... %-5s 0x%" PRIx64 "\n",
+ perf_reg_name(rid), val);
+ }
+}
+
+static const char *regs_abi[] = {
+ [PERF_SAMPLE_REGS_ABI_NONE] = "none",
+ [PERF_SAMPLE_REGS_ABI_32] = "32-bit",
+ [PERF_SAMPLE_REGS_ABI_64] = "64-bit",
+};
+
+static inline const char *regs_dump_abi(struct regs_dump *d)
+{
+ if (d->abi > PERF_SAMPLE_REGS_ABI_64)
+ return "unknown";
+
+ return regs_abi[d->abi];
+}
+
+static void regs__printf(const char *type, struct regs_dump *regs)
+{
+ u64 mask = regs->mask;
+
+ printf("... %s regs: mask 0x%" PRIx64 " ABI %s\n",
+ type,
+ mask,
+ regs_dump_abi(regs));
+
+ regs_dump__printf(mask, regs->regs);
+}
+
+static void regs_user__printf(struct perf_sample *sample)
+{
+ struct regs_dump *user_regs = &sample->user_regs;
+
+ if (user_regs->regs)
+ regs__printf("user", user_regs);
+}
+
+static void regs_intr__printf(struct perf_sample *sample)
+{
+ struct regs_dump *intr_regs = &sample->intr_regs;
+
+ if (intr_regs->regs)
+ regs__printf("intr", intr_regs);
+}
+
+static void stack_user__printf(struct stack_dump *dump)
+{
+ printf("... ustack: size %" PRIu64 ", offset 0x%x\n",
+ dump->size, dump->offset);
+}
+
+static void perf_evlist__print_tstamp(struct perf_evlist *evlist,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ u64 sample_type = __perf_evlist__combined_sample_type(evlist);
+
+ if (event->header.type != PERF_RECORD_SAMPLE &&
+ !perf_evlist__sample_id_all(evlist)) {
+ fputs("-1 -1 ", stdout);
+ return;
+ }
+
+ if ((sample_type & PERF_SAMPLE_CPU))
+ printf("%u ", sample->cpu);
+
+ if (sample_type & PERF_SAMPLE_TIME)
+ printf("%" PRIu64 " ", sample->time);
+}
+
+static void sample_read__printf(struct perf_sample *sample, u64 read_format)
+{
+ printf("... sample_read:\n");
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ printf("...... time enabled %016" PRIx64 "\n",
+ sample->read.time_enabled);
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ printf("...... time running %016" PRIx64 "\n",
+ sample->read.time_running);
+
+ if (read_format & PERF_FORMAT_GROUP) {
+ u64 i;
+
+ printf(".... group nr %" PRIu64 "\n", sample->read.group.nr);
+
+ for (i = 0; i < sample->read.group.nr; i++) {
+ struct sample_read_value *value;
+
+ value = &sample->read.group.values[i];
+ printf("..... id %016" PRIx64
+ ", value %016" PRIx64 "\n",
+ value->id, value->value);
+ }
+ } else
+ printf("..... id %016" PRIx64 ", value %016" PRIx64 "\n",
+ sample->read.one.id, sample->read.one.value);
+}
+
+static void dump_event(struct perf_evlist *evlist, union perf_event *event,
+ u64 file_offset, struct perf_sample *sample)
+{
+ if (!dump_trace)
+ return;
+
+ printf("\n%#" PRIx64 " [%#x]: event: %d\n",
+ file_offset, event->header.size, event->header.type);
+
+ trace_event(event);
+
+ if (sample)
+ perf_evlist__print_tstamp(evlist, event, sample);
+
+ printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset,
+ event->header.size, perf_event__name(event->header.type));
+}
+
+static void dump_sample(struct perf_evsel *evsel, union perf_event *event,
+ struct perf_sample *sample)
+{
+ u64 sample_type;
+
+ if (!dump_trace)
+ return;
+
+ printf("(IP, 0x%x): %d/%d: %#" PRIx64 " period: %" PRIu64 " addr: %#" PRIx64 "\n",
+ event->header.misc, sample->pid, sample->tid, sample->ip,
+ sample->period, sample->addr);
+
+ sample_type = evsel->attr.sample_type;
+
+ if (evsel__has_callchain(evsel))
+ callchain__printf(evsel, sample);
+
+ if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !perf_evsel__has_branch_callstack(evsel))
+ branch_stack__printf(sample);
+
+ if (sample_type & PERF_SAMPLE_REGS_USER)
+ regs_user__printf(sample);
+
+ if (sample_type & PERF_SAMPLE_REGS_INTR)
+ regs_intr__printf(sample);
+
+ if (sample_type & PERF_SAMPLE_STACK_USER)
+ stack_user__printf(&sample->user_stack);
+
+ if (sample_type & PERF_SAMPLE_WEIGHT)
+ printf("... weight: %" PRIu64 "\n", sample->weight);
+
+ if (sample_type & PERF_SAMPLE_DATA_SRC)
+ printf(" . data_src: 0x%"PRIx64"\n", sample->data_src);
+
+ if (sample_type & PERF_SAMPLE_PHYS_ADDR)
+ printf(" .. phys_addr: 0x%"PRIx64"\n", sample->phys_addr);
+
+ if (sample_type & PERF_SAMPLE_TRANSACTION)
+ printf("... transaction: %" PRIx64 "\n", sample->transaction);
+
+ if (sample_type & PERF_SAMPLE_READ)
+ sample_read__printf(sample, evsel->attr.read_format);
+}
+
+static void dump_read(struct perf_evsel *evsel, union perf_event *event)
+{
+ struct read_event *read_event = &event->read;
+ u64 read_format;
+
+ if (!dump_trace)
+ return;
+
+ printf(": %d %d %s %" PRIu64 "\n", event->read.pid, event->read.tid,
+ evsel ? perf_evsel__name(evsel) : "FAIL",
+ event->read.value);
+
+ if (!evsel)
+ return;
+
+ read_format = evsel->attr.read_format;
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ printf("... time enabled : %" PRIu64 "\n", read_event->time_enabled);
+
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ printf("... time running : %" PRIu64 "\n", read_event->time_running);
+
+ if (read_format & PERF_FORMAT_ID)
+ printf("... id : %" PRIu64 "\n", read_event->id);
+}
+
+static struct machine *machines__find_for_cpumode(struct machines *machines,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct machine *machine;
+
+ if (perf_guest &&
+ ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
+ (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) {
+ u32 pid;
+
+ if (event->header.type == PERF_RECORD_MMAP
+ || event->header.type == PERF_RECORD_MMAP2)
+ pid = event->mmap.pid;
+ else
+ pid = sample->pid;
+
+ machine = machines__find(machines, pid);
+ if (!machine)
+ machine = machines__findnew(machines, DEFAULT_GUEST_KERNEL_ID);
+ return machine;
+ }
+
+ return &machines->host;
+}
+
+static int deliver_sample_value(struct perf_evlist *evlist,
+ struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct sample_read_value *v,
+ struct machine *machine)
+{
+ struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id);
+
+ if (sid) {
+ sample->id = v->id;
+ sample->period = v->value - sid->period;
+ sid->period = v->value;
+ }
+
+ if (!sid || sid->evsel == NULL) {
+ ++evlist->stats.nr_unknown_id;
+ return 0;
+ }
+
+ return tool->sample(tool, event, sample, sid->evsel, machine);
+}
+
+static int deliver_sample_group(struct perf_evlist *evlist,
+ struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int ret = -EINVAL;
+ u64 i;
+
+ for (i = 0; i < sample->read.group.nr; i++) {
+ ret = deliver_sample_value(evlist, tool, event, sample,
+ &sample->read.group.values[i],
+ machine);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int
+ perf_evlist__deliver_sample(struct perf_evlist *evlist,
+ struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ /* We know evsel != NULL. */
+ u64 sample_type = evsel->attr.sample_type;
+ u64 read_format = evsel->attr.read_format;
+
+ /* Standard sample delivery. */
+ if (!(sample_type & PERF_SAMPLE_READ))
+ return tool->sample(tool, event, sample, evsel, machine);
+
+ /* For PERF_SAMPLE_READ we have either single or group mode. */
+ if (read_format & PERF_FORMAT_GROUP)
+ return deliver_sample_group(evlist, tool, event, sample,
+ machine);
+ else
+ return deliver_sample_value(evlist, tool, event, sample,
+ &sample->read.one, machine);
+}
+
+static int machines__deliver_event(struct machines *machines,
+ struct perf_evlist *evlist,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool, u64 file_offset)
+{
+ struct perf_evsel *evsel;
+ struct machine *machine;
+
+ dump_event(evlist, event, file_offset, sample);
+
+ evsel = perf_evlist__id2evsel(evlist, sample->id);
+
+ machine = machines__find_for_cpumode(machines, event, sample);
+
+ switch (event->header.type) {
+ case PERF_RECORD_SAMPLE:
+ if (evsel == NULL) {
+ ++evlist->stats.nr_unknown_id;
+ return 0;
+ }
+ dump_sample(evsel, event, sample);
+ if (machine == NULL) {
+ ++evlist->stats.nr_unprocessable_samples;
+ return 0;
+ }
+ return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine);
+ case PERF_RECORD_MMAP:
+ return tool->mmap(tool, event, sample, machine);
+ case PERF_RECORD_MMAP2:
+ if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
+ ++evlist->stats.nr_proc_map_timeout;
+ return tool->mmap2(tool, event, sample, machine);
+ case PERF_RECORD_COMM:
+ return tool->comm(tool, event, sample, machine);
+ case PERF_RECORD_NAMESPACES:
+ return tool->namespaces(tool, event, sample, machine);
+ case PERF_RECORD_FORK:
+ return tool->fork(tool, event, sample, machine);
+ case PERF_RECORD_EXIT:
+ return tool->exit(tool, event, sample, machine);
+ case PERF_RECORD_LOST:
+ if (tool->lost == perf_event__process_lost)
+ evlist->stats.total_lost += event->lost.lost;
+ return tool->lost(tool, event, sample, machine);
+ case PERF_RECORD_LOST_SAMPLES:
+ if (tool->lost_samples == perf_event__process_lost_samples)
+ evlist->stats.total_lost_samples += event->lost_samples.lost;
+ return tool->lost_samples(tool, event, sample, machine);
+ case PERF_RECORD_READ:
+ dump_read(evsel, event);
+ return tool->read(tool, event, sample, evsel, machine);
+ case PERF_RECORD_THROTTLE:
+ return tool->throttle(tool, event, sample, machine);
+ case PERF_RECORD_UNTHROTTLE:
+ return tool->unthrottle(tool, event, sample, machine);
+ case PERF_RECORD_AUX:
+ if (tool->aux == perf_event__process_aux) {
+ if (event->aux.flags & PERF_AUX_FLAG_TRUNCATED)
+ evlist->stats.total_aux_lost += 1;
+ if (event->aux.flags & PERF_AUX_FLAG_PARTIAL)
+ evlist->stats.total_aux_partial += 1;
+ }
+ return tool->aux(tool, event, sample, machine);
+ case PERF_RECORD_ITRACE_START:
+ return tool->itrace_start(tool, event, sample, machine);
+ case PERF_RECORD_SWITCH:
+ case PERF_RECORD_SWITCH_CPU_WIDE:
+ return tool->context_switch(tool, event, sample, machine);
+ default:
+ ++evlist->stats.nr_unknown_events;
+ return -1;
+ }
+}
+
+static int perf_session__deliver_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_tool *tool,
+ u64 file_offset)
+{
+ struct perf_sample sample;
+ int ret;
+
+ ret = perf_evlist__parse_sample(session->evlist, event, &sample);
+ if (ret) {
+ pr_err("Can't parse sample, err = %d\n", ret);
+ return ret;
+ }
+
+ ret = auxtrace__process_event(session, event, &sample, tool);
+ if (ret < 0)
+ return ret;
+ if (ret > 0)
+ return 0;
+
+ return machines__deliver_event(&session->machines, session->evlist,
+ event, &sample, tool, file_offset);
+}
+
+static s64 perf_session__process_user_event(struct perf_session *session,
+ union perf_event *event,
+ u64 file_offset)
+{
+ struct ordered_events *oe = &session->ordered_events;
+ struct perf_tool *tool = session->tool;
+ struct perf_sample sample = { .time = 0, };
+ int fd = perf_data__fd(session->data);
+ int err;
+
+ dump_event(session->evlist, event, file_offset, &sample);
+
+ /* These events are processed right away */
+ switch (event->header.type) {
+ case PERF_RECORD_HEADER_ATTR:
+ err = tool->attr(tool, event, &session->evlist);
+ if (err == 0) {
+ perf_session__set_id_hdr_size(session);
+ perf_session__set_comm_exec(session);
+ }
+ return err;
+ case PERF_RECORD_EVENT_UPDATE:
+ return tool->event_update(tool, event, &session->evlist);
+ case PERF_RECORD_HEADER_EVENT_TYPE:
+ /*
+ * Depreceated, but we need to handle it for sake
+ * of old data files create in pipe mode.
+ */
+ return 0;
+ case PERF_RECORD_HEADER_TRACING_DATA:
+ /* setup for reading amidst mmap */
+ lseek(fd, file_offset, SEEK_SET);
+ return tool->tracing_data(tool, event, session);
+ case PERF_RECORD_HEADER_BUILD_ID:
+ return tool->build_id(tool, event, session);
+ case PERF_RECORD_FINISHED_ROUND:
+ return tool->finished_round(tool, event, oe);
+ case PERF_RECORD_ID_INDEX:
+ return tool->id_index(tool, event, session);
+ case PERF_RECORD_AUXTRACE_INFO:
+ return tool->auxtrace_info(tool, event, session);
+ case PERF_RECORD_AUXTRACE:
+ /* setup for reading amidst mmap */
+ lseek(fd, file_offset + event->header.size, SEEK_SET);
+ return tool->auxtrace(tool, event, session);
+ case PERF_RECORD_AUXTRACE_ERROR:
+ perf_session__auxtrace_error_inc(session, event);
+ return tool->auxtrace_error(tool, event, session);
+ case PERF_RECORD_THREAD_MAP:
+ return tool->thread_map(tool, event, session);
+ case PERF_RECORD_CPU_MAP:
+ return tool->cpu_map(tool, event, session);
+ case PERF_RECORD_STAT_CONFIG:
+ return tool->stat_config(tool, event, session);
+ case PERF_RECORD_STAT:
+ return tool->stat(tool, event, session);
+ case PERF_RECORD_STAT_ROUND:
+ return tool->stat_round(tool, event, session);
+ case PERF_RECORD_TIME_CONV:
+ session->time_conv = event->time_conv;
+ return tool->time_conv(tool, event, session);
+ case PERF_RECORD_HEADER_FEATURE:
+ return tool->feature(tool, event, session);
+ default:
+ return -EINVAL;
+ }
+}
+
+int perf_session__deliver_synth_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_tool *tool = session->tool;
+
+ events_stats__inc(&evlist->stats, event->header.type);
+
+ if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+ return perf_session__process_user_event(session, event, 0);
+
+ return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0);
+}
+
+static void event_swap(union perf_event *event, bool sample_id_all)
+{
+ perf_event__swap_op swap;
+
+ swap = perf_event__swap_ops[event->header.type];
+ if (swap)
+ swap(event, sample_id_all);
+}
+
+int perf_session__peek_event(struct perf_session *session, off_t file_offset,
+ void *buf, size_t buf_sz,
+ union perf_event **event_ptr,
+ struct perf_sample *sample)
+{
+ union perf_event *event;
+ size_t hdr_sz, rest;
+ int fd;
+
+ if (session->one_mmap && !session->header.needs_swap) {
+ event = file_offset - session->one_mmap_offset +
+ session->one_mmap_addr;
+ goto out_parse_sample;
+ }
+
+ if (perf_data__is_pipe(session->data))
+ return -1;
+
+ fd = perf_data__fd(session->data);
+ hdr_sz = sizeof(struct perf_event_header);
+
+ if (buf_sz < hdr_sz)
+ return -1;
+
+ if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 ||
+ readn(fd, buf, hdr_sz) != (ssize_t)hdr_sz)
+ return -1;
+
+ event = (union perf_event *)buf;
+
+ if (session->header.needs_swap)
+ perf_event_header__bswap(&event->header);
+
+ if (event->header.size < hdr_sz || event->header.size > buf_sz)
+ return -1;
+
+ buf += hdr_sz;
+ rest = event->header.size - hdr_sz;
+
+ if (readn(fd, buf, rest) != (ssize_t)rest)
+ return -1;
+
+ if (session->header.needs_swap)
+ event_swap(event, perf_evlist__sample_id_all(session->evlist));
+
+out_parse_sample:
+
+ if (sample && event->header.type < PERF_RECORD_USER_TYPE_START &&
+ perf_evlist__parse_sample(session->evlist, event, sample))
+ return -1;
+
+ *event_ptr = event;
+
+ return 0;
+}
+
+static s64 perf_session__process_event(struct perf_session *session,
+ union perf_event *event, u64 file_offset)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct perf_tool *tool = session->tool;
+ int ret;
+
+ if (session->header.needs_swap)
+ event_swap(event, perf_evlist__sample_id_all(evlist));
+
+ if (event->header.type >= PERF_RECORD_HEADER_MAX)
+ return -EINVAL;
+
+ events_stats__inc(&evlist->stats, event->header.type);
+
+ if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+ return perf_session__process_user_event(session, event, file_offset);
+
+ if (tool->ordered_events) {
+ u64 timestamp = -1ULL;
+
+ ret = perf_evlist__parse_sample_timestamp(evlist, event, &timestamp);
+ if (ret && ret != -1)
+ return ret;
+
+ ret = perf_session__queue_event(session, event, timestamp, file_offset);
+ if (ret != -ETIME)
+ return ret;
+ }
+
+ return perf_session__deliver_event(session, event, tool, file_offset);
+}
+
+void perf_event_header__bswap(struct perf_event_header *hdr)
+{
+ hdr->type = bswap_32(hdr->type);
+ hdr->misc = bswap_16(hdr->misc);
+ hdr->size = bswap_16(hdr->size);
+}
+
+struct thread *perf_session__findnew(struct perf_session *session, pid_t pid)
+{
+ return machine__findnew_thread(&session->machines.host, -1, pid);
+}
+
+int perf_session__register_idle_thread(struct perf_session *session)
+{
+ struct thread *thread;
+ int err = 0;
+
+ thread = machine__findnew_thread(&session->machines.host, 0, 0);
+ if (thread == NULL || thread__set_comm(thread, "swapper", 0)) {
+ pr_err("problem inserting idle task.\n");
+ err = -1;
+ }
+
+ if (thread == NULL || thread__set_namespaces(thread, 0, NULL)) {
+ pr_err("problem inserting idle task.\n");
+ err = -1;
+ }
+
+ /* machine__findnew_thread() got the thread, so put it */
+ thread__put(thread);
+ return err;
+}
+
+static void
+perf_session__warn_order(const struct perf_session *session)
+{
+ const struct ordered_events *oe = &session->ordered_events;
+ struct perf_evsel *evsel;
+ bool should_warn = true;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->attr.write_backward)
+ should_warn = false;
+ }
+
+ if (!should_warn)
+ return;
+ if (oe->nr_unordered_events != 0)
+ ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
+}
+
+static void perf_session__warn_about_errors(const struct perf_session *session)
+{
+ const struct events_stats *stats = &session->evlist->stats;
+
+ if (session->tool->lost == perf_event__process_lost &&
+ stats->nr_events[PERF_RECORD_LOST] != 0) {
+ ui__warning("Processed %d events and lost %d chunks!\n\n"
+ "Check IO/CPU overload!\n\n",
+ stats->nr_events[0],
+ stats->nr_events[PERF_RECORD_LOST]);
+ }
+
+ if (session->tool->lost_samples == perf_event__process_lost_samples) {
+ double drop_rate;
+
+ drop_rate = (double)stats->total_lost_samples /
+ (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
+ if (drop_rate > 0.05) {
+ ui__warning("Processed %" PRIu64 " samples and lost %3.2f%%!\n\n",
+ stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
+ drop_rate * 100.0);
+ }
+ }
+
+ if (session->tool->aux == perf_event__process_aux &&
+ stats->total_aux_lost != 0) {
+ ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n",
+ stats->total_aux_lost,
+ stats->nr_events[PERF_RECORD_AUX]);
+ }
+
+ if (session->tool->aux == perf_event__process_aux &&
+ stats->total_aux_partial != 0) {
+ bool vmm_exclusive = false;
+
+ (void)sysfs__read_bool("module/kvm_intel/parameters/vmm_exclusive",
+ &vmm_exclusive);
+
+ ui__warning("AUX data had gaps in it %" PRIu64 " times out of %u!\n\n"
+ "Are you running a KVM guest in the background?%s\n\n",
+ stats->total_aux_partial,
+ stats->nr_events[PERF_RECORD_AUX],
+ vmm_exclusive ?
+ "\nReloading kvm_intel module with vmm_exclusive=0\n"
+ "will reduce the gaps to only guest's timeslices." :
+ "");
+ }
+
+ if (stats->nr_unknown_events != 0) {
+ ui__warning("Found %u unknown events!\n\n"
+ "Is this an older tool processing a perf.data "
+ "file generated by a more recent tool?\n\n"
+ "If that is not the case, consider "
+ "reporting to linux-kernel@vger.kernel.org.\n\n",
+ stats->nr_unknown_events);
+ }
+
+ if (stats->nr_unknown_id != 0) {
+ ui__warning("%u samples with id not present in the header\n",
+ stats->nr_unknown_id);
+ }
+
+ if (stats->nr_invalid_chains != 0) {
+ ui__warning("Found invalid callchains!\n\n"
+ "%u out of %u events were discarded for this reason.\n\n"
+ "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
+ stats->nr_invalid_chains,
+ stats->nr_events[PERF_RECORD_SAMPLE]);
+ }
+
+ if (stats->nr_unprocessable_samples != 0) {
+ ui__warning("%u unprocessable samples recorded.\n"
+ "Do you have a KVM guest running and not using 'perf kvm'?\n",
+ stats->nr_unprocessable_samples);
+ }
+
+ perf_session__warn_order(session);
+
+ events_stats__auxtrace_error_warn(stats);
+
+ if (stats->nr_proc_map_timeout != 0) {
+ ui__warning("%d map information files for pre-existing threads were\n"
+ "not processed, if there are samples for addresses they\n"
+ "will not be resolved, you may find out which are these\n"
+ "threads by running with -v and redirecting the output\n"
+ "to a file.\n"
+ "The time limit to process proc map is too short?\n"
+ "Increase it by --proc-map-timeout\n",
+ stats->nr_proc_map_timeout);
+ }
+}
+
+static int perf_session__flush_thread_stack(struct thread *thread,
+ void *p __maybe_unused)
+{
+ return thread_stack__flush(thread);
+}
+
+static int perf_session__flush_thread_stacks(struct perf_session *session)
+{
+ return machines__for_each_thread(&session->machines,
+ perf_session__flush_thread_stack,
+ NULL);
+}
+
+volatile int session_done;
+
+static int __perf_session__process_pipe_events(struct perf_session *session)
+{
+ struct ordered_events *oe = &session->ordered_events;
+ struct perf_tool *tool = session->tool;
+ int fd = perf_data__fd(session->data);
+ union perf_event *event;
+ uint32_t size, cur_size = 0;
+ void *buf = NULL;
+ s64 skip = 0;
+ u64 head;
+ ssize_t err;
+ void *p;
+
+ perf_tool__fill_defaults(tool);
+
+ head = 0;
+ cur_size = sizeof(union perf_event);
+
+ buf = malloc(cur_size);
+ if (!buf)
+ return -errno;
+ ordered_events__set_copy_on_queue(oe, true);
+more:
+ event = buf;
+ err = readn(fd, event, sizeof(struct perf_event_header));
+ if (err <= 0) {
+ if (err == 0)
+ goto done;
+
+ pr_err("failed to read event header\n");
+ goto out_err;
+ }
+
+ if (session->header.needs_swap)
+ perf_event_header__bswap(&event->header);
+
+ size = event->header.size;
+ if (size < sizeof(struct perf_event_header)) {
+ pr_err("bad event header size\n");
+ goto out_err;
+ }
+
+ if (size > cur_size) {
+ void *new = realloc(buf, size);
+ if (!new) {
+ pr_err("failed to allocate memory to read event\n");
+ goto out_err;
+ }
+ buf = new;
+ cur_size = size;
+ event = buf;
+ }
+ p = event;
+ p += sizeof(struct perf_event_header);
+
+ if (size - sizeof(struct perf_event_header)) {
+ err = readn(fd, p, size - sizeof(struct perf_event_header));
+ if (err <= 0) {
+ if (err == 0) {
+ pr_err("unexpected end of event stream\n");
+ goto done;
+ }
+
+ pr_err("failed to read event data\n");
+ goto out_err;
+ }
+ }
+
+ if ((skip = perf_session__process_event(session, event, head)) < 0) {
+ pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+ head, event->header.size, event->header.type);
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ head += size;
+
+ if (skip > 0)
+ head += skip;
+
+ if (!session_done())
+ goto more;
+done:
+ /* do the final flush for ordered samples */
+ err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+ if (err)
+ goto out_err;
+ err = auxtrace__flush_events(session, tool);
+ if (err)
+ goto out_err;
+ err = perf_session__flush_thread_stacks(session);
+out_err:
+ free(buf);
+ if (!tool->no_warn)
+ perf_session__warn_about_errors(session);
+ ordered_events__free(&session->ordered_events);
+ auxtrace__free_events(session);
+ return err;
+}
+
+static union perf_event *
+fetch_mmaped_event(struct perf_session *session,
+ u64 head, size_t mmap_size, char *buf)
+{
+ union perf_event *event;
+
+ /*
+ * Ensure we have enough space remaining to read
+ * the size of the event in the headers.
+ */
+ if (head + sizeof(event->header) > mmap_size)
+ return NULL;
+
+ event = (union perf_event *)(buf + head);
+
+ if (session->header.needs_swap)
+ perf_event_header__bswap(&event->header);
+
+ if (head + event->header.size > mmap_size) {
+ /* We're not fetching the event so swap back again */
+ if (session->header.needs_swap)
+ perf_event_header__bswap(&event->header);
+ return NULL;
+ }
+
+ return event;
+}
+
+/*
+ * On 64bit we can mmap the data file in one go. No need for tiny mmap
+ * slices. On 32bit we use 32MB.
+ */
+#if BITS_PER_LONG == 64
+#define MMAP_SIZE ULLONG_MAX
+#define NUM_MMAPS 1
+#else
+#define MMAP_SIZE (32 * 1024 * 1024ULL)
+#define NUM_MMAPS 128
+#endif
+
+static int __perf_session__process_events(struct perf_session *session,
+ u64 data_offset, u64 data_size,
+ u64 file_size)
+{
+ struct ordered_events *oe = &session->ordered_events;
+ struct perf_tool *tool = session->tool;
+ int fd = perf_data__fd(session->data);
+ u64 head, page_offset, file_offset, file_pos, size;
+ int err, mmap_prot, mmap_flags, map_idx = 0;
+ size_t mmap_size;
+ char *buf, *mmaps[NUM_MMAPS];
+ union perf_event *event;
+ struct ui_progress prog;
+ s64 skip;
+
+ perf_tool__fill_defaults(tool);
+
+ page_offset = page_size * (data_offset / page_size);
+ file_offset = page_offset;
+ head = data_offset - page_offset;
+
+ if (data_size == 0)
+ goto out;
+
+ if (data_offset + data_size < file_size)
+ file_size = data_offset + data_size;
+
+ ui_progress__init_size(&prog, file_size, "Processing events...");
+
+ mmap_size = MMAP_SIZE;
+ if (mmap_size > file_size) {
+ mmap_size = file_size;
+ session->one_mmap = true;
+ }
+
+ memset(mmaps, 0, sizeof(mmaps));
+
+ mmap_prot = PROT_READ;
+ mmap_flags = MAP_SHARED;
+
+ if (session->header.needs_swap) {
+ mmap_prot |= PROT_WRITE;
+ mmap_flags = MAP_PRIVATE;
+ }
+remap:
+ buf = mmap(NULL, mmap_size, mmap_prot, mmap_flags, fd,
+ file_offset);
+ if (buf == MAP_FAILED) {
+ pr_err("failed to mmap file\n");
+ err = -errno;
+ goto out_err;
+ }
+ mmaps[map_idx] = buf;
+ map_idx = (map_idx + 1) & (ARRAY_SIZE(mmaps) - 1);
+ file_pos = file_offset + head;
+ if (session->one_mmap) {
+ session->one_mmap_addr = buf;
+ session->one_mmap_offset = file_offset;
+ }
+
+more:
+ event = fetch_mmaped_event(session, head, mmap_size, buf);
+ if (!event) {
+ if (mmaps[map_idx]) {
+ munmap(mmaps[map_idx], mmap_size);
+ mmaps[map_idx] = NULL;
+ }
+
+ page_offset = page_size * (head / page_size);
+ file_offset += page_offset;
+ head -= page_offset;
+ goto remap;
+ }
+
+ size = event->header.size;
+
+ if (size < sizeof(struct perf_event_header) ||
+ (skip = perf_session__process_event(session, event, file_pos)) < 0) {
+ pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
+ file_offset + head, event->header.size,
+ event->header.type);
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ if (skip)
+ size += skip;
+
+ head += size;
+ file_pos += size;
+
+ ui_progress__update(&prog, size);
+
+ if (session_done())
+ goto out;
+
+ if (file_pos < file_size)
+ goto more;
+
+out:
+ /* do the final flush for ordered samples */
+ err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+ if (err)
+ goto out_err;
+ err = auxtrace__flush_events(session, tool);
+ if (err)
+ goto out_err;
+ err = perf_session__flush_thread_stacks(session);
+out_err:
+ ui_progress__finish();
+ if (!tool->no_warn)
+ perf_session__warn_about_errors(session);
+ /*
+ * We may switching perf.data output, make ordered_events
+ * reusable.
+ */
+ ordered_events__reinit(&session->ordered_events);
+ auxtrace__free_events(session);
+ session->one_mmap = false;
+ return err;
+}
+
+int perf_session__process_events(struct perf_session *session)
+{
+ u64 size = perf_data__size(session->data);
+ int err;
+
+ if (perf_session__register_idle_thread(session) < 0)
+ return -ENOMEM;
+
+ if (!perf_data__is_pipe(session->data))
+ err = __perf_session__process_events(session,
+ session->header.data_offset,
+ session->header.data_size, size);
+ else
+ err = __perf_session__process_pipe_events(session);
+
+ return err;
+}
+
+bool perf_session__has_traces(struct perf_session *session, const char *msg)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (evsel->attr.type == PERF_TYPE_TRACEPOINT)
+ return true;
+ }
+
+ pr_err("No trace sample to read. Did you call 'perf %s'?\n", msg);
+ return false;
+}
+
+int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name, u64 addr)
+{
+ char *bracket;
+ struct ref_reloc_sym *ref;
+ struct kmap *kmap;
+
+ ref = zalloc(sizeof(struct ref_reloc_sym));
+ if (ref == NULL)
+ return -ENOMEM;
+
+ ref->name = strdup(symbol_name);
+ if (ref->name == NULL) {
+ free(ref);
+ return -ENOMEM;
+ }
+
+ bracket = strchr(ref->name, ']');
+ if (bracket)
+ *bracket = '\0';
+
+ ref->addr = addr;
+
+ kmap = map__kmap(map);
+ if (kmap)
+ kmap->ref_reloc_sym = ref;
+
+ return 0;
+}
+
+size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp)
+{
+ return machines__fprintf_dsos(&session->machines, fp);
+}
+
+size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
+ bool (skip)(struct dso *dso, int parm), int parm)
+{
+ return machines__fprintf_dsos_buildid(&session->machines, fp, skip, parm);
+}
+
+size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
+{
+ size_t ret;
+ const char *msg = "";
+
+ if (perf_header__has_feat(&session->header, HEADER_AUXTRACE))
+ msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)";
+
+ ret = fprintf(fp, "\nAggregated stats:%s\n", msg);
+
+ ret += events_stats__fprintf(&session->evlist->stats, fp);
+ return ret;
+}
+
+size_t perf_session__fprintf(struct perf_session *session, FILE *fp)
+{
+ /*
+ * FIXME: Here we have to actually print all the machines in this
+ * session, not just the host...
+ */
+ return machine__fprintf(&session->machines.host, fp);
+}
+
+struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
+ unsigned int type)
+{
+ struct perf_evsel *pos;
+
+ evlist__for_each_entry(session->evlist, pos) {
+ if (pos->attr.type == type)
+ return pos;
+ }
+ return NULL;
+}
+
+int perf_session__cpu_bitmap(struct perf_session *session,
+ const char *cpu_list, unsigned long *cpu_bitmap)
+{
+ int i, err = -1;
+ struct cpu_map *map;
+
+ for (i = 0; i < PERF_TYPE_MAX; ++i) {
+ struct perf_evsel *evsel;
+
+ evsel = perf_session__find_first_evtype(session, i);
+ if (!evsel)
+ continue;
+
+ if (!(evsel->attr.sample_type & PERF_SAMPLE_CPU)) {
+ pr_err("File does not contain CPU events. "
+ "Remove -C option to proceed.\n");
+ return -1;
+ }
+ }
+
+ map = cpu_map__new(cpu_list);
+ if (map == NULL) {
+ pr_err("Invalid cpu_list\n");
+ return -1;
+ }
+
+ for (i = 0; i < map->nr; i++) {
+ int cpu = map->map[i];
+
+ if (cpu >= MAX_NR_CPUS) {
+ pr_err("Requested CPU %d too large. "
+ "Consider raising MAX_NR_CPUS\n", cpu);
+ goto out_delete_map;
+ }
+
+ set_bit(cpu, cpu_bitmap);
+ }
+
+ err = 0;
+
+out_delete_map:
+ cpu_map__put(map);
+ return err;
+}
+
+void perf_session__fprintf_info(struct perf_session *session, FILE *fp,
+ bool full)
+{
+ if (session == NULL || fp == NULL)
+ return;
+
+ fprintf(fp, "# ========\n");
+ perf_header__fprintf_info(session, fp, full);
+ fprintf(fp, "# ========\n#\n");
+}
+
+
+int __perf_session__set_tracepoints_handlers(struct perf_session *session,
+ const struct perf_evsel_str_handler *assocs,
+ size_t nr_assocs)
+{
+ struct perf_evsel *evsel;
+ size_t i;
+ int err;
+
+ for (i = 0; i < nr_assocs; i++) {
+ /*
+ * Adding a handler for an event not in the session,
+ * just ignore it.
+ */
+ evsel = perf_evlist__find_tracepoint_by_name(session->evlist, assocs[i].name);
+ if (evsel == NULL)
+ continue;
+
+ err = -EEXIST;
+ if (evsel->handler != NULL)
+ goto out;
+ evsel->handler = assocs[i].handler;
+ }
+
+ err = 0;
+out:
+ return err;
+}
+
+int perf_event__process_id_index(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_evlist *evlist = session->evlist;
+ struct id_index_event *ie = &event->id_index;
+ size_t i, nr, max_nr;
+
+ max_nr = (ie->header.size - sizeof(struct id_index_event)) /
+ sizeof(struct id_index_entry);
+ nr = ie->nr;
+ if (nr > max_nr)
+ return -EINVAL;
+
+ if (dump_trace)
+ fprintf(stdout, " nr: %zu\n", nr);
+
+ for (i = 0; i < nr; i++) {
+ struct id_index_entry *e = &ie->entries[i];
+ struct perf_sample_id *sid;
+
+ if (dump_trace) {
+ fprintf(stdout, " ... id: %"PRIu64, e->id);
+ fprintf(stdout, " idx: %"PRIu64, e->idx);
+ fprintf(stdout, " cpu: %"PRId64, e->cpu);
+ fprintf(stdout, " tid: %"PRId64"\n", e->tid);
+ }
+
+ sid = perf_evlist__id2sid(evlist, e->id);
+ if (!sid)
+ return -ENOENT;
+ sid->idx = e->idx;
+ sid->cpu = e->cpu;
+ sid->tid = e->tid;
+ }
+ return 0;
+}
+
+int perf_event__synthesize_id_index(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct perf_evlist *evlist,
+ struct machine *machine)
+{
+ union perf_event *ev;
+ struct perf_evsel *evsel;
+ size_t nr = 0, i = 0, sz, max_nr, n;
+ int err;
+
+ pr_debug2("Synthesizing id index\n");
+
+ max_nr = (UINT16_MAX - sizeof(struct id_index_event)) /
+ sizeof(struct id_index_entry);
+
+ evlist__for_each_entry(evlist, evsel)
+ nr += evsel->ids;
+
+ n = nr > max_nr ? max_nr : nr;
+ sz = sizeof(struct id_index_event) + n * sizeof(struct id_index_entry);
+ ev = zalloc(sz);
+ if (!ev)
+ return -ENOMEM;
+
+ ev->id_index.header.type = PERF_RECORD_ID_INDEX;
+ ev->id_index.header.size = sz;
+ ev->id_index.nr = n;
+
+ evlist__for_each_entry(evlist, evsel) {
+ u32 j;
+
+ for (j = 0; j < evsel->ids; j++) {
+ struct id_index_entry *e;
+ struct perf_sample_id *sid;
+
+ if (i >= n) {
+ err = process(tool, ev, NULL, machine);
+ if (err)
+ goto out_err;
+ nr -= n;
+ i = 0;
+ }
+
+ e = &ev->id_index.entries[i++];
+
+ e->id = evsel->id[j];
+
+ sid = perf_evlist__id2sid(evlist, e->id);
+ if (!sid) {
+ free(ev);
+ return -ENOENT;
+ }
+
+ e->idx = sid->idx;
+ e->cpu = sid->cpu;
+ e->tid = sid->tid;
+ }
+ }
+
+ sz = sizeof(struct id_index_event) + nr * sizeof(struct id_index_entry);
+ ev->id_index.header.size = sz;
+ ev->id_index.nr = nr;
+
+ err = process(tool, ev, NULL, machine);
+out_err:
+ free(ev);
+
+ return err;
+}
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
new file mode 100644
index 000000000..da40b4b38
--- /dev/null
+++ b/tools/perf/util/session.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SESSION_H
+#define __PERF_SESSION_H
+
+#include "trace-event.h"
+#include "event.h"
+#include "header.h"
+#include "machine.h"
+#include "data.h"
+#include "ordered-events.h"
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/perf_event.h>
+
+struct ip_callchain;
+struct symbol;
+struct thread;
+
+struct auxtrace;
+struct itrace_synth_opts;
+
+struct perf_session {
+ struct perf_header header;
+ struct machines machines;
+ struct perf_evlist *evlist;
+ struct auxtrace *auxtrace;
+ struct itrace_synth_opts *itrace_synth_opts;
+ struct list_head auxtrace_index;
+ struct trace_event tevent;
+ struct time_conv_event time_conv;
+ bool repipe;
+ bool one_mmap;
+ void *one_mmap_addr;
+ u64 one_mmap_offset;
+ struct ordered_events ordered_events;
+ struct perf_data *data;
+ struct perf_tool *tool;
+};
+
+struct perf_tool;
+
+struct perf_session *perf_session__new(struct perf_data *data,
+ bool repipe, struct perf_tool *tool);
+void perf_session__delete(struct perf_session *session);
+
+void perf_event_header__bswap(struct perf_event_header *hdr);
+
+int perf_session__peek_event(struct perf_session *session, off_t file_offset,
+ void *buf, size_t buf_sz,
+ union perf_event **event_ptr,
+ struct perf_sample *sample);
+
+int perf_session__process_events(struct perf_session *session);
+
+int perf_session__queue_event(struct perf_session *s, union perf_event *event,
+ u64 timestamp, u64 file_offset);
+
+void perf_tool__fill_defaults(struct perf_tool *tool);
+
+int perf_session__resolve_callchain(struct perf_session *session,
+ struct perf_evsel *evsel,
+ struct thread *thread,
+ struct ip_callchain *chain,
+ struct symbol **parent);
+
+bool perf_session__has_traces(struct perf_session *session, const char *msg);
+
+void perf_event__attr_swap(struct perf_event_attr *attr);
+
+int perf_session__create_kernel_maps(struct perf_session *session);
+
+void perf_session__set_id_hdr_size(struct perf_session *session);
+
+static inline
+struct machine *perf_session__find_machine(struct perf_session *session, pid_t pid)
+{
+ return machines__find(&session->machines, pid);
+}
+
+static inline
+struct machine *perf_session__findnew_machine(struct perf_session *session, pid_t pid)
+{
+ return machines__findnew(&session->machines, pid);
+}
+
+struct thread *perf_session__findnew(struct perf_session *session, pid_t pid);
+int perf_session__register_idle_thread(struct perf_session *session);
+
+size_t perf_session__fprintf(struct perf_session *session, FILE *fp);
+
+size_t perf_session__fprintf_dsos(struct perf_session *session, FILE *fp);
+
+size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp,
+ bool (fn)(struct dso *dso, int parm), int parm);
+
+size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp);
+
+struct perf_evsel *perf_session__find_first_evtype(struct perf_session *session,
+ unsigned int type);
+
+int perf_session__cpu_bitmap(struct perf_session *session,
+ const char *cpu_list, unsigned long *cpu_bitmap);
+
+void perf_session__fprintf_info(struct perf_session *s, FILE *fp, bool full);
+
+struct perf_evsel_str_handler;
+
+int __perf_session__set_tracepoints_handlers(struct perf_session *session,
+ const struct perf_evsel_str_handler *assocs,
+ size_t nr_assocs);
+
+#define perf_session__set_tracepoints_handlers(session, array) \
+ __perf_session__set_tracepoints_handlers(session, array, ARRAY_SIZE(array))
+
+extern volatile int session_done;
+
+#define session_done() READ_ONCE(session_done)
+
+int perf_session__deliver_synth_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample);
+
+int perf_event__process_id_index(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
+
+int perf_event__synthesize_id_index(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct perf_evlist *evlist,
+ struct machine *machine);
+
+#endif /* __PERF_SESSION_H */
diff --git a/tools/perf/util/setns.c b/tools/perf/util/setns.c
new file mode 100644
index 000000000..ce8fc290f
--- /dev/null
+++ b/tools/perf/util/setns.c
@@ -0,0 +1,8 @@
+#include "util.h"
+#include <unistd.h>
+#include <sys/syscall.h>
+
+int setns(int fd, int nstype)
+{
+ return syscall(__NR_setns, fd, nstype);
+}
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
new file mode 100644
index 000000000..1942f6dd2
--- /dev/null
+++ b/tools/perf/util/setup.py
@@ -0,0 +1,69 @@
+#!/usr/bin/python
+
+from os import getenv
+from subprocess import Popen, PIPE
+from re import sub
+
+def clang_has_option(option):
+ return [o for o in Popen(['clang', option], stderr=PIPE).stderr.readlines() if "unknown argument" in o] == [ ]
+
+cc = getenv("CC")
+if cc == "clang":
+ from _sysconfigdata import build_time_vars
+ build_time_vars["CFLAGS"] = sub("-specs=[^ ]+", "", build_time_vars["CFLAGS"])
+ if not clang_has_option("-mcet"):
+ build_time_vars["CFLAGS"] = sub("-mcet", "", build_time_vars["CFLAGS"])
+ if not clang_has_option("-fcf-protection"):
+ build_time_vars["CFLAGS"] = sub("-fcf-protection", "", build_time_vars["CFLAGS"])
+
+from distutils.core import setup, Extension
+
+from distutils.command.build_ext import build_ext as _build_ext
+from distutils.command.install_lib import install_lib as _install_lib
+
+class build_ext(_build_ext):
+ def finalize_options(self):
+ _build_ext.finalize_options(self)
+ self.build_lib = build_lib
+ self.build_temp = build_tmp
+
+class install_lib(_install_lib):
+ def finalize_options(self):
+ _install_lib.finalize_options(self)
+ self.build_dir = build_lib
+
+
+cflags = getenv('CFLAGS', '').split()
+# switch off several checks (need to be at the end of cflags list)
+cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter', '-Wno-redundant-decls' ]
+if cc != "clang":
+ cflags += ['-Wno-cast-function-type' ]
+
+src_perf = getenv('srctree') + '/tools/perf'
+build_lib = getenv('PYTHON_EXTBUILD_LIB')
+build_tmp = getenv('PYTHON_EXTBUILD_TMP')
+libtraceevent = getenv('LIBTRACEEVENT')
+libapikfs = getenv('LIBAPI')
+
+ext_sources = [f.strip() for f in open('util/python-ext-sources')
+ if len(f.strip()) > 0 and f[0] != '#']
+
+# use full paths with source files
+ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources))
+
+perf = Extension('perf',
+ sources = ext_sources,
+ include_dirs = ['util/include'],
+ extra_compile_args = cflags,
+ extra_objects = [libtraceevent, libapikfs],
+ )
+
+setup(name='perf',
+ version='0.1',
+ description='Interface with the Linux profiling infrastructure',
+ author='Arnaldo Carvalho de Melo',
+ author_email='acme@redhat.com',
+ license='GPLv2',
+ url='http://perf.wiki.kernel.org',
+ ext_modules=[perf],
+ cmdclass={'build_ext': build_ext, 'install_lib': install_lib})
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
new file mode 100644
index 000000000..453f6f6f2
--- /dev/null
+++ b/tools/perf/util/smt.c
@@ -0,0 +1,44 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/bitops.h>
+#include "api/fs/fs.h"
+#include "smt.h"
+
+int smt_on(void)
+{
+ static bool cached;
+ static int cached_result;
+ int cpu;
+ int ncpu;
+
+ if (cached)
+ return cached_result;
+
+ ncpu = sysconf(_SC_NPROCESSORS_CONF);
+ for (cpu = 0; cpu < ncpu; cpu++) {
+ unsigned long long siblings;
+ char *str;
+ size_t strlen;
+ char fn[256];
+
+ snprintf(fn, sizeof fn,
+ "devices/system/cpu/cpu%d/topology/thread_siblings",
+ cpu);
+ if (sysfs__read_str(fn, &str, &strlen) < 0)
+ continue;
+ /* Entry is hex, but does not have 0x, so need custom parser */
+ siblings = strtoull(str, NULL, 16);
+ free(str);
+ if (hweight64(siblings) > 1) {
+ cached_result = 1;
+ cached = true;
+ break;
+ }
+ }
+ if (!cached) {
+ cached_result = 0;
+ cached = true;
+ }
+ return cached_result;
+}
diff --git a/tools/perf/util/smt.h b/tools/perf/util/smt.h
new file mode 100644
index 000000000..b8414b7bc
--- /dev/null
+++ b/tools/perf/util/smt.h
@@ -0,0 +1,6 @@
+#ifndef SMT_H
+#define SMT_H 1
+
+int smt_on(void);
+
+#endif
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
new file mode 100644
index 000000000..85ff4f68a
--- /dev/null
+++ b/tools/perf/util/sort.c
@@ -0,0 +1,3015 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <regex.h>
+#include <linux/mman.h>
+#include "sort.h"
+#include "hist.h"
+#include "comm.h"
+#include "symbol.h"
+#include "thread.h"
+#include "evsel.h"
+#include "evlist.h"
+#include "strlist.h"
+#include <traceevent/event-parse.h>
+#include "mem-events.h"
+#include <linux/kernel.h>
+
+regex_t parent_regex;
+const char default_parent_pattern[] = "^sys_|^do_page_fault";
+const char *parent_pattern = default_parent_pattern;
+const char *default_sort_order = "comm,dso,symbol";
+const char default_branch_sort_order[] = "comm,dso_from,symbol_from,symbol_to,cycles";
+const char default_mem_sort_order[] = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
+const char default_top_sort_order[] = "dso,symbol";
+const char default_diff_sort_order[] = "dso,symbol";
+const char default_tracepoint_sort_order[] = "trace";
+const char *sort_order;
+const char *field_order;
+regex_t ignore_callees_regex;
+int have_ignore_callees = 0;
+enum sort_mode sort__mode = SORT_MODE__NORMAL;
+
+/*
+ * Replaces all occurrences of a char used with the:
+ *
+ * -t, --field-separator
+ *
+ * option, that uses a special separator character and don't pad with spaces,
+ * replacing all occurances of this separator in symbol names (and other
+ * output) with a '.' character, that thus it's the only non valid separator.
+*/
+static int repsep_snprintf(char *bf, size_t size, const char *fmt, ...)
+{
+ int n;
+ va_list ap;
+
+ va_start(ap, fmt);
+ n = vsnprintf(bf, size, fmt, ap);
+ if (symbol_conf.field_sep && n > 0) {
+ char *sep = bf;
+
+ while (1) {
+ sep = strchr(sep, *symbol_conf.field_sep);
+ if (sep == NULL)
+ break;
+ *sep = '.';
+ }
+ }
+ va_end(ap);
+
+ if (n >= (int)size)
+ return size - 1;
+ return n;
+}
+
+static int64_t cmp_null(const void *l, const void *r)
+{
+ if (!l && !r)
+ return 0;
+ else if (!l)
+ return -1;
+ else
+ return 1;
+}
+
+/* --sort pid */
+
+static int64_t
+sort__thread_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return right->thread->tid - left->thread->tid;
+}
+
+static int hist_entry__thread_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ const char *comm = thread__comm_str(he->thread);
+
+ width = max(7U, width) - 8;
+ return repsep_snprintf(bf, size, "%7d:%-*.*s", he->thread->tid,
+ width, width, comm ?: "");
+}
+
+static int hist_entry__thread_filter(struct hist_entry *he, int type, const void *arg)
+{
+ const struct thread *th = arg;
+
+ if (type != HIST_FILTER__THREAD)
+ return -1;
+
+ return th && he->thread != th;
+}
+
+struct sort_entry sort_thread = {
+ .se_header = " Pid:Command",
+ .se_cmp = sort__thread_cmp,
+ .se_snprintf = hist_entry__thread_snprintf,
+ .se_filter = hist_entry__thread_filter,
+ .se_width_idx = HISTC_THREAD,
+};
+
+/* --sort comm */
+
+/*
+ * We can't use pointer comparison in functions below,
+ * because it gives different results based on pointer
+ * values, which could break some sorting assumptions.
+ */
+static int64_t
+sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return strcmp(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int64_t
+sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
+{
+ return strcmp(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int64_t
+sort__comm_sort(struct hist_entry *left, struct hist_entry *right)
+{
+ return strcmp(comm__str(right->comm), comm__str(left->comm));
+}
+
+static int hist_entry__comm_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, comm__str(he->comm));
+}
+
+struct sort_entry sort_comm = {
+ .se_header = "Command",
+ .se_cmp = sort__comm_cmp,
+ .se_collapse = sort__comm_collapse,
+ .se_sort = sort__comm_sort,
+ .se_snprintf = hist_entry__comm_snprintf,
+ .se_filter = hist_entry__thread_filter,
+ .se_width_idx = HISTC_COMM,
+};
+
+/* --sort dso */
+
+static int64_t _sort__dso_cmp(struct map *map_l, struct map *map_r)
+{
+ struct dso *dso_l = map_l ? map_l->dso : NULL;
+ struct dso *dso_r = map_r ? map_r->dso : NULL;
+ const char *dso_name_l, *dso_name_r;
+
+ if (!dso_l || !dso_r)
+ return cmp_null(dso_r, dso_l);
+
+ if (verbose > 0) {
+ dso_name_l = dso_l->long_name;
+ dso_name_r = dso_r->long_name;
+ } else {
+ dso_name_l = dso_l->short_name;
+ dso_name_r = dso_r->short_name;
+ }
+
+ return strcmp(dso_name_l, dso_name_r);
+}
+
+static int64_t
+sort__dso_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return _sort__dso_cmp(right->ms.map, left->ms.map);
+}
+
+static int _hist_entry__dso_snprintf(struct map *map, char *bf,
+ size_t size, unsigned int width)
+{
+ if (map && map->dso) {
+ const char *dso_name = verbose > 0 ? map->dso->long_name :
+ map->dso->short_name;
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, dso_name);
+ }
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "[unknown]");
+}
+
+static int hist_entry__dso_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return _hist_entry__dso_snprintf(he->ms.map, bf, size, width);
+}
+
+static int hist_entry__dso_filter(struct hist_entry *he, int type, const void *arg)
+{
+ const struct dso *dso = arg;
+
+ if (type != HIST_FILTER__DSO)
+ return -1;
+
+ return dso && (!he->ms.map || he->ms.map->dso != dso);
+}
+
+struct sort_entry sort_dso = {
+ .se_header = "Shared Object",
+ .se_cmp = sort__dso_cmp,
+ .se_snprintf = hist_entry__dso_snprintf,
+ .se_filter = hist_entry__dso_filter,
+ .se_width_idx = HISTC_DSO,
+};
+
+/* --sort symbol */
+
+static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
+{
+ return (int64_t)(right_ip - left_ip);
+}
+
+static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
+{
+ if (!sym_l || !sym_r)
+ return cmp_null(sym_l, sym_r);
+
+ if (sym_l == sym_r)
+ return 0;
+
+ if (sym_l->inlined || sym_r->inlined) {
+ int ret = strcmp(sym_l->name, sym_r->name);
+
+ if (ret)
+ return ret;
+ if ((sym_l->start <= sym_r->end) && (sym_l->end >= sym_r->start))
+ return 0;
+ }
+
+ if (sym_l->start != sym_r->start)
+ return (int64_t)(sym_r->start - sym_l->start);
+
+ return (int64_t)(sym_r->end - sym_l->end);
+}
+
+static int64_t
+sort__sym_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ int64_t ret;
+
+ if (!left->ms.sym && !right->ms.sym)
+ return _sort__addr_cmp(left->ip, right->ip);
+
+ /*
+ * comparing symbol address alone is not enough since it's a
+ * relative address within a dso.
+ */
+ if (!hists__has(left->hists, dso) || hists__has(right->hists, dso)) {
+ ret = sort__dso_cmp(left, right);
+ if (ret != 0)
+ return ret;
+ }
+
+ return _sort__sym_cmp(left->ms.sym, right->ms.sym);
+}
+
+static int64_t
+sort__sym_sort(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->ms.sym || !right->ms.sym)
+ return cmp_null(left->ms.sym, right->ms.sym);
+
+ return strcmp(right->ms.sym->name, left->ms.sym->name);
+}
+
+static int _hist_entry__sym_snprintf(struct map *map, struct symbol *sym,
+ u64 ip, char level, char *bf, size_t size,
+ unsigned int width)
+{
+ size_t ret = 0;
+
+ if (verbose > 0) {
+ char o = map ? dso__symtab_origin(map->dso) : '!';
+ ret += repsep_snprintf(bf, size, "%-#*llx %c ",
+ BITS_PER_LONG / 4 + 2, ip, o);
+ }
+
+ ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
+ if (sym && map) {
+ if (sym->type == STT_OBJECT) {
+ ret += repsep_snprintf(bf + ret, size - ret, "%s", sym->name);
+ ret += repsep_snprintf(bf + ret, size - ret, "+0x%llx",
+ ip - map->unmap_ip(map, sym->start));
+ } else {
+ ret += repsep_snprintf(bf + ret, size - ret, "%.*s",
+ width - ret,
+ sym->name);
+ if (sym->inlined)
+ ret += repsep_snprintf(bf + ret, size - ret,
+ " (inlined)");
+ }
+ } else {
+ size_t len = BITS_PER_LONG / 4;
+ ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx",
+ len, ip);
+ }
+
+ return ret;
+}
+
+static int hist_entry__sym_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return _hist_entry__sym_snprintf(he->ms.map, he->ms.sym, he->ip,
+ he->level, bf, size, width);
+}
+
+static int hist_entry__sym_filter(struct hist_entry *he, int type, const void *arg)
+{
+ const char *sym = arg;
+
+ if (type != HIST_FILTER__SYMBOL)
+ return -1;
+
+ return sym && (!he->ms.sym || !strstr(he->ms.sym->name, sym));
+}
+
+struct sort_entry sort_sym = {
+ .se_header = "Symbol",
+ .se_cmp = sort__sym_cmp,
+ .se_sort = sort__sym_sort,
+ .se_snprintf = hist_entry__sym_snprintf,
+ .se_filter = hist_entry__sym_filter,
+ .se_width_idx = HISTC_SYMBOL,
+};
+
+/* --sort srcline */
+
+char *hist_entry__srcline(struct hist_entry *he)
+{
+ return map__srcline(he->ms.map, he->ip, he->ms.sym);
+}
+
+static int64_t
+sort__srcline_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->srcline)
+ left->srcline = hist_entry__srcline(left);
+ if (!right->srcline)
+ right->srcline = hist_entry__srcline(right);
+
+ return strcmp(right->srcline, left->srcline);
+}
+
+static int hist_entry__srcline_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (!he->srcline)
+ he->srcline = hist_entry__srcline(he);
+
+ return repsep_snprintf(bf, size, "%-.*s", width, he->srcline);
+}
+
+struct sort_entry sort_srcline = {
+ .se_header = "Source:Line",
+ .se_cmp = sort__srcline_cmp,
+ .se_snprintf = hist_entry__srcline_snprintf,
+ .se_width_idx = HISTC_SRCLINE,
+};
+
+/* --sort srcline_from */
+
+static char *addr_map_symbol__srcline(struct addr_map_symbol *ams)
+{
+ return map__srcline(ams->map, ams->al_addr, ams->sym);
+}
+
+static int64_t
+sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info->srcline_from)
+ left->branch_info->srcline_from = addr_map_symbol__srcline(&left->branch_info->from);
+
+ if (!right->branch_info->srcline_from)
+ right->branch_info->srcline_from = addr_map_symbol__srcline(&right->branch_info->from);
+
+ return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from);
+}
+
+static int hist_entry__srcline_from_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, he->branch_info->srcline_from);
+}
+
+struct sort_entry sort_srcline_from = {
+ .se_header = "From Source:Line",
+ .se_cmp = sort__srcline_from_cmp,
+ .se_snprintf = hist_entry__srcline_from_snprintf,
+ .se_width_idx = HISTC_SRCLINE_FROM,
+};
+
+/* --sort srcline_to */
+
+static int64_t
+sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info->srcline_to)
+ left->branch_info->srcline_to = addr_map_symbol__srcline(&left->branch_info->to);
+
+ if (!right->branch_info->srcline_to)
+ right->branch_info->srcline_to = addr_map_symbol__srcline(&right->branch_info->to);
+
+ return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to);
+}
+
+static int hist_entry__srcline_to_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, he->branch_info->srcline_to);
+}
+
+struct sort_entry sort_srcline_to = {
+ .se_header = "To Source:Line",
+ .se_cmp = sort__srcline_to_cmp,
+ .se_snprintf = hist_entry__srcline_to_snprintf,
+ .se_width_idx = HISTC_SRCLINE_TO,
+};
+
+/* --sort srcfile */
+
+static char no_srcfile[1];
+
+static char *hist_entry__get_srcfile(struct hist_entry *e)
+{
+ char *sf, *p;
+ struct map *map = e->ms.map;
+
+ if (!map)
+ return no_srcfile;
+
+ sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip),
+ e->ms.sym, false, true, true, e->ip);
+ if (!strcmp(sf, SRCLINE_UNKNOWN))
+ return no_srcfile;
+ p = strchr(sf, ':');
+ if (p && *sf) {
+ *p = 0;
+ return sf;
+ }
+ free(sf);
+ return no_srcfile;
+}
+
+static int64_t
+sort__srcfile_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->srcfile)
+ left->srcfile = hist_entry__get_srcfile(left);
+ if (!right->srcfile)
+ right->srcfile = hist_entry__get_srcfile(right);
+
+ return strcmp(right->srcfile, left->srcfile);
+}
+
+static int hist_entry__srcfile_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (!he->srcfile)
+ he->srcfile = hist_entry__get_srcfile(he);
+
+ return repsep_snprintf(bf, size, "%-.*s", width, he->srcfile);
+}
+
+struct sort_entry sort_srcfile = {
+ .se_header = "Source File",
+ .se_cmp = sort__srcfile_cmp,
+ .se_snprintf = hist_entry__srcfile_snprintf,
+ .se_width_idx = HISTC_SRCFILE,
+};
+
+/* --sort parent */
+
+static int64_t
+sort__parent_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct symbol *sym_l = left->parent;
+ struct symbol *sym_r = right->parent;
+
+ if (!sym_l || !sym_r)
+ return cmp_null(sym_l, sym_r);
+
+ return strcmp(sym_r->name, sym_l->name);
+}
+
+static int hist_entry__parent_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*.*s", width, width,
+ he->parent ? he->parent->name : "[other]");
+}
+
+struct sort_entry sort_parent = {
+ .se_header = "Parent symbol",
+ .se_cmp = sort__parent_cmp,
+ .se_snprintf = hist_entry__parent_snprintf,
+ .se_width_idx = HISTC_PARENT,
+};
+
+/* --sort cpu */
+
+static int64_t
+sort__cpu_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return right->cpu - left->cpu;
+}
+
+static int hist_entry__cpu_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%*.*d", width, width, he->cpu);
+}
+
+struct sort_entry sort_cpu = {
+ .se_header = "CPU",
+ .se_cmp = sort__cpu_cmp,
+ .se_snprintf = hist_entry__cpu_snprintf,
+ .se_width_idx = HISTC_CPU,
+};
+
+/* --sort cgroup_id */
+
+static int64_t _sort__cgroup_dev_cmp(u64 left_dev, u64 right_dev)
+{
+ return (int64_t)(right_dev - left_dev);
+}
+
+static int64_t _sort__cgroup_inode_cmp(u64 left_ino, u64 right_ino)
+{
+ return (int64_t)(right_ino - left_ino);
+}
+
+static int64_t
+sort__cgroup_id_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ int64_t ret;
+
+ ret = _sort__cgroup_dev_cmp(right->cgroup_id.dev, left->cgroup_id.dev);
+ if (ret != 0)
+ return ret;
+
+ return _sort__cgroup_inode_cmp(right->cgroup_id.ino,
+ left->cgroup_id.ino);
+}
+
+static int hist_entry__cgroup_id_snprintf(struct hist_entry *he,
+ char *bf, size_t size,
+ unsigned int width __maybe_unused)
+{
+ return repsep_snprintf(bf, size, "%lu/0x%lx", he->cgroup_id.dev,
+ he->cgroup_id.ino);
+}
+
+struct sort_entry sort_cgroup_id = {
+ .se_header = "cgroup id (dev/inode)",
+ .se_cmp = sort__cgroup_id_cmp,
+ .se_snprintf = hist_entry__cgroup_id_snprintf,
+ .se_width_idx = HISTC_CGROUP_ID,
+};
+
+/* --sort socket */
+
+static int64_t
+sort__socket_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return right->socket - left->socket;
+}
+
+static int hist_entry__socket_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%*.*d", width, width-3, he->socket);
+}
+
+static int hist_entry__socket_filter(struct hist_entry *he, int type, const void *arg)
+{
+ int sk = *(const int *)arg;
+
+ if (type != HIST_FILTER__SOCKET)
+ return -1;
+
+ return sk >= 0 && he->socket != sk;
+}
+
+struct sort_entry sort_socket = {
+ .se_header = "Socket",
+ .se_cmp = sort__socket_cmp,
+ .se_snprintf = hist_entry__socket_snprintf,
+ .se_filter = hist_entry__socket_filter,
+ .se_width_idx = HISTC_SOCKET,
+};
+
+/* --sort trace */
+
+static char *get_trace_output(struct hist_entry *he)
+{
+ struct trace_seq seq;
+ struct perf_evsel *evsel;
+ struct tep_record rec = {
+ .data = he->raw_data,
+ .size = he->raw_size,
+ };
+
+ evsel = hists_to_evsel(he->hists);
+
+ trace_seq_init(&seq);
+ if (symbol_conf.raw_trace) {
+ tep_print_fields(&seq, he->raw_data, he->raw_size,
+ evsel->tp_format);
+ } else {
+ tep_event_info(&seq, evsel->tp_format, &rec);
+ }
+ /*
+ * Trim the buffer, it starts at 4KB and we're not going to
+ * add anything more to this buffer.
+ */
+ return realloc(seq.buffer, seq.len + 1);
+}
+
+static int64_t
+sort__trace_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct perf_evsel *evsel;
+
+ evsel = hists_to_evsel(left->hists);
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ return 0;
+
+ if (left->trace_output == NULL)
+ left->trace_output = get_trace_output(left);
+ if (right->trace_output == NULL)
+ right->trace_output = get_trace_output(right);
+
+ return strcmp(right->trace_output, left->trace_output);
+}
+
+static int hist_entry__trace_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ struct perf_evsel *evsel;
+
+ evsel = hists_to_evsel(he->hists);
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ return scnprintf(bf, size, "%-.*s", width, "N/A");
+
+ if (he->trace_output == NULL)
+ he->trace_output = get_trace_output(he);
+ return repsep_snprintf(bf, size, "%-.*s", width, he->trace_output);
+}
+
+struct sort_entry sort_trace = {
+ .se_header = "Trace output",
+ .se_cmp = sort__trace_cmp,
+ .se_snprintf = hist_entry__trace_snprintf,
+ .se_width_idx = HISTC_TRACE,
+};
+
+/* sort keys for branch stacks */
+
+static int64_t
+sort__dso_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ return _sort__dso_cmp(left->branch_info->from.map,
+ right->branch_info->from.map);
+}
+
+static int hist_entry__dso_from_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info)
+ return _hist_entry__dso_snprintf(he->branch_info->from.map,
+ bf, size, width);
+ else
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__dso_from_filter(struct hist_entry *he, int type,
+ const void *arg)
+{
+ const struct dso *dso = arg;
+
+ if (type != HIST_FILTER__DSO)
+ return -1;
+
+ return dso && (!he->branch_info || !he->branch_info->from.map ||
+ he->branch_info->from.map->dso != dso);
+}
+
+static int64_t
+sort__dso_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ return _sort__dso_cmp(left->branch_info->to.map,
+ right->branch_info->to.map);
+}
+
+static int hist_entry__dso_to_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info)
+ return _hist_entry__dso_snprintf(he->branch_info->to.map,
+ bf, size, width);
+ else
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__dso_to_filter(struct hist_entry *he, int type,
+ const void *arg)
+{
+ const struct dso *dso = arg;
+
+ if (type != HIST_FILTER__DSO)
+ return -1;
+
+ return dso && (!he->branch_info || !he->branch_info->to.map ||
+ he->branch_info->to.map->dso != dso);
+}
+
+static int64_t
+sort__sym_from_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct addr_map_symbol *from_l = &left->branch_info->from;
+ struct addr_map_symbol *from_r = &right->branch_info->from;
+
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ from_l = &left->branch_info->from;
+ from_r = &right->branch_info->from;
+
+ if (!from_l->sym && !from_r->sym)
+ return _sort__addr_cmp(from_l->addr, from_r->addr);
+
+ return _sort__sym_cmp(from_l->sym, from_r->sym);
+}
+
+static int64_t
+sort__sym_to_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct addr_map_symbol *to_l, *to_r;
+
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ to_l = &left->branch_info->to;
+ to_r = &right->branch_info->to;
+
+ if (!to_l->sym && !to_r->sym)
+ return _sort__addr_cmp(to_l->addr, to_r->addr);
+
+ return _sort__sym_cmp(to_l->sym, to_r->sym);
+}
+
+static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info) {
+ struct addr_map_symbol *from = &he->branch_info->from;
+
+ return _hist_entry__sym_snprintf(from->map, from->sym, from->addr,
+ he->level, bf, size, width);
+ }
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (he->branch_info) {
+ struct addr_map_symbol *to = &he->branch_info->to;
+
+ return _hist_entry__sym_snprintf(to->map, to->sym, to->addr,
+ he->level, bf, size, width);
+ }
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
+}
+
+static int hist_entry__sym_from_filter(struct hist_entry *he, int type,
+ const void *arg)
+{
+ const char *sym = arg;
+
+ if (type != HIST_FILTER__SYMBOL)
+ return -1;
+
+ return sym && !(he->branch_info && he->branch_info->from.sym &&
+ strstr(he->branch_info->from.sym->name, sym));
+}
+
+static int hist_entry__sym_to_filter(struct hist_entry *he, int type,
+ const void *arg)
+{
+ const char *sym = arg;
+
+ if (type != HIST_FILTER__SYMBOL)
+ return -1;
+
+ return sym && !(he->branch_info && he->branch_info->to.sym &&
+ strstr(he->branch_info->to.sym->name, sym));
+}
+
+struct sort_entry sort_dso_from = {
+ .se_header = "Source Shared Object",
+ .se_cmp = sort__dso_from_cmp,
+ .se_snprintf = hist_entry__dso_from_snprintf,
+ .se_filter = hist_entry__dso_from_filter,
+ .se_width_idx = HISTC_DSO_FROM,
+};
+
+struct sort_entry sort_dso_to = {
+ .se_header = "Target Shared Object",
+ .se_cmp = sort__dso_to_cmp,
+ .se_snprintf = hist_entry__dso_to_snprintf,
+ .se_filter = hist_entry__dso_to_filter,
+ .se_width_idx = HISTC_DSO_TO,
+};
+
+struct sort_entry sort_sym_from = {
+ .se_header = "Source Symbol",
+ .se_cmp = sort__sym_from_cmp,
+ .se_snprintf = hist_entry__sym_from_snprintf,
+ .se_filter = hist_entry__sym_from_filter,
+ .se_width_idx = HISTC_SYMBOL_FROM,
+};
+
+struct sort_entry sort_sym_to = {
+ .se_header = "Target Symbol",
+ .se_cmp = sort__sym_to_cmp,
+ .se_snprintf = hist_entry__sym_to_snprintf,
+ .se_filter = hist_entry__sym_to_filter,
+ .se_width_idx = HISTC_SYMBOL_TO,
+};
+
+static int64_t
+sort__mispredict_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ unsigned char mp, p;
+
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ mp = left->branch_info->flags.mispred != right->branch_info->flags.mispred;
+ p = left->branch_info->flags.predicted != right->branch_info->flags.predicted;
+ return mp || p;
+}
+
+static int hist_entry__mispredict_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width){
+ static const char *out = "N/A";
+
+ if (he->branch_info) {
+ if (he->branch_info->flags.predicted)
+ out = "N";
+ else if (he->branch_info->flags.mispred)
+ out = "Y";
+ }
+
+ return repsep_snprintf(bf, size, "%-*.*s", width, width, out);
+}
+
+static int64_t
+sort__cycles_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ return left->branch_info->flags.cycles -
+ right->branch_info->flags.cycles;
+}
+
+static int hist_entry__cycles_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ if (!he->branch_info)
+ return scnprintf(bf, size, "%-.*s", width, "N/A");
+ if (he->branch_info->flags.cycles == 0)
+ return repsep_snprintf(bf, size, "%-*s", width, "-");
+ return repsep_snprintf(bf, size, "%-*hd", width,
+ he->branch_info->flags.cycles);
+}
+
+struct sort_entry sort_cycles = {
+ .se_header = "Basic Block Cycles",
+ .se_cmp = sort__cycles_cmp,
+ .se_snprintf = hist_entry__cycles_snprintf,
+ .se_width_idx = HISTC_CYCLES,
+};
+
+/* --sort daddr_sym */
+int64_t
+sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ uint64_t l = 0, r = 0;
+
+ if (left->mem_info)
+ l = left->mem_info->daddr.addr;
+ if (right->mem_info)
+ r = right->mem_info->daddr.addr;
+
+ return (int64_t)(r - l);
+}
+
+static int hist_entry__daddr_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ uint64_t addr = 0;
+ struct map *map = NULL;
+ struct symbol *sym = NULL;
+
+ if (he->mem_info) {
+ addr = he->mem_info->daddr.addr;
+ map = he->mem_info->daddr.map;
+ sym = he->mem_info->daddr.sym;
+ }
+ return _hist_entry__sym_snprintf(map, sym, addr, he->level, bf, size,
+ width);
+}
+
+int64_t
+sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ uint64_t l = 0, r = 0;
+
+ if (left->mem_info)
+ l = left->mem_info->iaddr.addr;
+ if (right->mem_info)
+ r = right->mem_info->iaddr.addr;
+
+ return (int64_t)(r - l);
+}
+
+static int hist_entry__iaddr_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ uint64_t addr = 0;
+ struct map *map = NULL;
+ struct symbol *sym = NULL;
+
+ if (he->mem_info) {
+ addr = he->mem_info->iaddr.addr;
+ map = he->mem_info->iaddr.map;
+ sym = he->mem_info->iaddr.sym;
+ }
+ return _hist_entry__sym_snprintf(map, sym, addr, he->level, bf, size,
+ width);
+}
+
+static int64_t
+sort__dso_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ struct map *map_l = NULL;
+ struct map *map_r = NULL;
+
+ if (left->mem_info)
+ map_l = left->mem_info->daddr.map;
+ if (right->mem_info)
+ map_r = right->mem_info->daddr.map;
+
+ return _sort__dso_cmp(map_l, map_r);
+}
+
+static int hist_entry__dso_daddr_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ struct map *map = NULL;
+
+ if (he->mem_info)
+ map = he->mem_info->daddr.map;
+
+ return _hist_entry__dso_snprintf(map, bf, size, width);
+}
+
+static int64_t
+sort__locked_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ union perf_mem_data_src data_src_l;
+ union perf_mem_data_src data_src_r;
+
+ if (left->mem_info)
+ data_src_l = left->mem_info->data_src;
+ else
+ data_src_l.mem_lock = PERF_MEM_LOCK_NA;
+
+ if (right->mem_info)
+ data_src_r = right->mem_info->data_src;
+ else
+ data_src_r.mem_lock = PERF_MEM_LOCK_NA;
+
+ return (int64_t)(data_src_r.mem_lock - data_src_l.mem_lock);
+}
+
+static int hist_entry__locked_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char out[10];
+
+ perf_mem__lck_scnprintf(out, sizeof(out), he->mem_info);
+ return repsep_snprintf(bf, size, "%.*s", width, out);
+}
+
+static int64_t
+sort__tlb_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ union perf_mem_data_src data_src_l;
+ union perf_mem_data_src data_src_r;
+
+ if (left->mem_info)
+ data_src_l = left->mem_info->data_src;
+ else
+ data_src_l.mem_dtlb = PERF_MEM_TLB_NA;
+
+ if (right->mem_info)
+ data_src_r = right->mem_info->data_src;
+ else
+ data_src_r.mem_dtlb = PERF_MEM_TLB_NA;
+
+ return (int64_t)(data_src_r.mem_dtlb - data_src_l.mem_dtlb);
+}
+
+static int hist_entry__tlb_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char out[64];
+
+ perf_mem__tlb_scnprintf(out, sizeof(out), he->mem_info);
+ return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+static int64_t
+sort__lvl_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ union perf_mem_data_src data_src_l;
+ union perf_mem_data_src data_src_r;
+
+ if (left->mem_info)
+ data_src_l = left->mem_info->data_src;
+ else
+ data_src_l.mem_lvl = PERF_MEM_LVL_NA;
+
+ if (right->mem_info)
+ data_src_r = right->mem_info->data_src;
+ else
+ data_src_r.mem_lvl = PERF_MEM_LVL_NA;
+
+ return (int64_t)(data_src_r.mem_lvl - data_src_l.mem_lvl);
+}
+
+static int hist_entry__lvl_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char out[64];
+
+ perf_mem__lvl_scnprintf(out, sizeof(out), he->mem_info);
+ return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+static int64_t
+sort__snoop_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ union perf_mem_data_src data_src_l;
+ union perf_mem_data_src data_src_r;
+
+ if (left->mem_info)
+ data_src_l = left->mem_info->data_src;
+ else
+ data_src_l.mem_snoop = PERF_MEM_SNOOP_NA;
+
+ if (right->mem_info)
+ data_src_r = right->mem_info->data_src;
+ else
+ data_src_r.mem_snoop = PERF_MEM_SNOOP_NA;
+
+ return (int64_t)(data_src_r.mem_snoop - data_src_l.mem_snoop);
+}
+
+static int hist_entry__snoop_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ char out[64];
+
+ perf_mem__snp_scnprintf(out, sizeof(out), he->mem_info);
+ return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+int64_t
+sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ u64 l, r;
+ struct map *l_map, *r_map;
+
+ if (!left->mem_info) return -1;
+ if (!right->mem_info) return 1;
+
+ /* group event types together */
+ if (left->cpumode > right->cpumode) return -1;
+ if (left->cpumode < right->cpumode) return 1;
+
+ l_map = left->mem_info->daddr.map;
+ r_map = right->mem_info->daddr.map;
+
+ /* if both are NULL, jump to sort on al_addr instead */
+ if (!l_map && !r_map)
+ goto addr;
+
+ if (!l_map) return -1;
+ if (!r_map) return 1;
+
+ if (l_map->maj > r_map->maj) return -1;
+ if (l_map->maj < r_map->maj) return 1;
+
+ if (l_map->min > r_map->min) return -1;
+ if (l_map->min < r_map->min) return 1;
+
+ if (l_map->ino > r_map->ino) return -1;
+ if (l_map->ino < r_map->ino) return 1;
+
+ if (l_map->ino_generation > r_map->ino_generation) return -1;
+ if (l_map->ino_generation < r_map->ino_generation) return 1;
+
+ /*
+ * Addresses with no major/minor numbers are assumed to be
+ * anonymous in userspace. Sort those on pid then address.
+ *
+ * The kernel and non-zero major/minor mapped areas are
+ * assumed to be unity mapped. Sort those on address.
+ */
+
+ if ((left->cpumode != PERF_RECORD_MISC_KERNEL) &&
+ (!(l_map->flags & MAP_SHARED)) &&
+ !l_map->maj && !l_map->min && !l_map->ino &&
+ !l_map->ino_generation) {
+ /* userspace anonymous */
+
+ if (left->thread->pid_ > right->thread->pid_) return -1;
+ if (left->thread->pid_ < right->thread->pid_) return 1;
+ }
+
+addr:
+ /* al_addr does all the right addr - start + offset calculations */
+ l = cl_address(left->mem_info->daddr.al_addr);
+ r = cl_address(right->mem_info->daddr.al_addr);
+
+ if (l > r) return -1;
+ if (l < r) return 1;
+
+ return 0;
+}
+
+static int hist_entry__dcacheline_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+
+ uint64_t addr = 0;
+ struct map *map = NULL;
+ struct symbol *sym = NULL;
+ char level = he->level;
+
+ if (he->mem_info) {
+ addr = cl_address(he->mem_info->daddr.al_addr);
+ map = he->mem_info->daddr.map;
+ sym = he->mem_info->daddr.sym;
+
+ /* print [s] for shared data mmaps */
+ if ((he->cpumode != PERF_RECORD_MISC_KERNEL) &&
+ map && !(map->prot & PROT_EXEC) &&
+ (map->flags & MAP_SHARED) &&
+ (map->maj || map->min || map->ino ||
+ map->ino_generation))
+ level = 's';
+ else if (!map)
+ level = 'X';
+ }
+ return _hist_entry__sym_snprintf(map, sym, addr, level, bf, size,
+ width);
+}
+
+struct sort_entry sort_mispredict = {
+ .se_header = "Branch Mispredicted",
+ .se_cmp = sort__mispredict_cmp,
+ .se_snprintf = hist_entry__mispredict_snprintf,
+ .se_width_idx = HISTC_MISPREDICT,
+};
+
+static u64 he_weight(struct hist_entry *he)
+{
+ return he->stat.nr_events ? he->stat.weight / he->stat.nr_events : 0;
+}
+
+static int64_t
+sort__local_weight_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return he_weight(left) - he_weight(right);
+}
+
+static int hist_entry__local_weight_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*llu", width, he_weight(he));
+}
+
+struct sort_entry sort_local_weight = {
+ .se_header = "Local Weight",
+ .se_cmp = sort__local_weight_cmp,
+ .se_snprintf = hist_entry__local_weight_snprintf,
+ .se_width_idx = HISTC_LOCAL_WEIGHT,
+};
+
+static int64_t
+sort__global_weight_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->stat.weight - right->stat.weight;
+}
+
+static int hist_entry__global_weight_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return repsep_snprintf(bf, size, "%-*llu", width, he->stat.weight);
+}
+
+struct sort_entry sort_global_weight = {
+ .se_header = "Weight",
+ .se_cmp = sort__global_weight_cmp,
+ .se_snprintf = hist_entry__global_weight_snprintf,
+ .se_width_idx = HISTC_GLOBAL_WEIGHT,
+};
+
+struct sort_entry sort_mem_daddr_sym = {
+ .se_header = "Data Symbol",
+ .se_cmp = sort__daddr_cmp,
+ .se_snprintf = hist_entry__daddr_snprintf,
+ .se_width_idx = HISTC_MEM_DADDR_SYMBOL,
+};
+
+struct sort_entry sort_mem_iaddr_sym = {
+ .se_header = "Code Symbol",
+ .se_cmp = sort__iaddr_cmp,
+ .se_snprintf = hist_entry__iaddr_snprintf,
+ .se_width_idx = HISTC_MEM_IADDR_SYMBOL,
+};
+
+struct sort_entry sort_mem_daddr_dso = {
+ .se_header = "Data Object",
+ .se_cmp = sort__dso_daddr_cmp,
+ .se_snprintf = hist_entry__dso_daddr_snprintf,
+ .se_width_idx = HISTC_MEM_DADDR_DSO,
+};
+
+struct sort_entry sort_mem_locked = {
+ .se_header = "Locked",
+ .se_cmp = sort__locked_cmp,
+ .se_snprintf = hist_entry__locked_snprintf,
+ .se_width_idx = HISTC_MEM_LOCKED,
+};
+
+struct sort_entry sort_mem_tlb = {
+ .se_header = "TLB access",
+ .se_cmp = sort__tlb_cmp,
+ .se_snprintf = hist_entry__tlb_snprintf,
+ .se_width_idx = HISTC_MEM_TLB,
+};
+
+struct sort_entry sort_mem_lvl = {
+ .se_header = "Memory access",
+ .se_cmp = sort__lvl_cmp,
+ .se_snprintf = hist_entry__lvl_snprintf,
+ .se_width_idx = HISTC_MEM_LVL,
+};
+
+struct sort_entry sort_mem_snoop = {
+ .se_header = "Snoop",
+ .se_cmp = sort__snoop_cmp,
+ .se_snprintf = hist_entry__snoop_snprintf,
+ .se_width_idx = HISTC_MEM_SNOOP,
+};
+
+struct sort_entry sort_mem_dcacheline = {
+ .se_header = "Data Cacheline",
+ .se_cmp = sort__dcacheline_cmp,
+ .se_snprintf = hist_entry__dcacheline_snprintf,
+ .se_width_idx = HISTC_MEM_DCACHELINE,
+};
+
+static int64_t
+sort__phys_daddr_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ uint64_t l = 0, r = 0;
+
+ if (left->mem_info)
+ l = left->mem_info->daddr.phys_addr;
+ if (right->mem_info)
+ r = right->mem_info->daddr.phys_addr;
+
+ return (int64_t)(r - l);
+}
+
+static int hist_entry__phys_daddr_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ uint64_t addr = 0;
+ size_t ret = 0;
+ size_t len = BITS_PER_LONG / 4;
+
+ addr = he->mem_info->daddr.phys_addr;
+
+ ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", he->level);
+
+ ret += repsep_snprintf(bf + ret, size - ret, "%-#.*llx", len, addr);
+
+ ret += repsep_snprintf(bf + ret, size - ret, "%-*s", width - ret, "");
+
+ if (ret > width)
+ bf[width] = '\0';
+
+ return width;
+}
+
+struct sort_entry sort_mem_phys_daddr = {
+ .se_header = "Data Physical Address",
+ .se_cmp = sort__phys_daddr_cmp,
+ .se_snprintf = hist_entry__phys_daddr_snprintf,
+ .se_width_idx = HISTC_MEM_PHYS_DADDR,
+};
+
+static int64_t
+sort__abort_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ return left->branch_info->flags.abort !=
+ right->branch_info->flags.abort;
+}
+
+static int hist_entry__abort_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ static const char *out = "N/A";
+
+ if (he->branch_info) {
+ if (he->branch_info->flags.abort)
+ out = "A";
+ else
+ out = ".";
+ }
+
+ return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_abort = {
+ .se_header = "Transaction abort",
+ .se_cmp = sort__abort_cmp,
+ .se_snprintf = hist_entry__abort_snprintf,
+ .se_width_idx = HISTC_ABORT,
+};
+
+static int64_t
+sort__in_tx_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (!left->branch_info || !right->branch_info)
+ return cmp_null(left->branch_info, right->branch_info);
+
+ return left->branch_info->flags.in_tx !=
+ right->branch_info->flags.in_tx;
+}
+
+static int hist_entry__in_tx_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ static const char *out = "N/A";
+
+ if (he->branch_info) {
+ if (he->branch_info->flags.in_tx)
+ out = "T";
+ else
+ out = ".";
+ }
+
+ return repsep_snprintf(bf, size, "%-*s", width, out);
+}
+
+struct sort_entry sort_in_tx = {
+ .se_header = "Branch in transaction",
+ .se_cmp = sort__in_tx_cmp,
+ .se_snprintf = hist_entry__in_tx_snprintf,
+ .se_width_idx = HISTC_IN_TX,
+};
+
+static int64_t
+sort__transaction_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return left->transaction - right->transaction;
+}
+
+static inline char *add_str(char *p, const char *str)
+{
+ strcpy(p, str);
+ return p + strlen(str);
+}
+
+static struct txbit {
+ unsigned flag;
+ const char *name;
+ int skip_for_len;
+} txbits[] = {
+ { PERF_TXN_ELISION, "EL ", 0 },
+ { PERF_TXN_TRANSACTION, "TX ", 1 },
+ { PERF_TXN_SYNC, "SYNC ", 1 },
+ { PERF_TXN_ASYNC, "ASYNC ", 0 },
+ { PERF_TXN_RETRY, "RETRY ", 0 },
+ { PERF_TXN_CONFLICT, "CON ", 0 },
+ { PERF_TXN_CAPACITY_WRITE, "CAP-WRITE ", 1 },
+ { PERF_TXN_CAPACITY_READ, "CAP-READ ", 0 },
+ { 0, NULL, 0 }
+};
+
+int hist_entry__transaction_len(void)
+{
+ int i;
+ int len = 0;
+
+ for (i = 0; txbits[i].name; i++) {
+ if (!txbits[i].skip_for_len)
+ len += strlen(txbits[i].name);
+ }
+ len += 4; /* :XX<space> */
+ return len;
+}
+
+static int hist_entry__transaction_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ u64 t = he->transaction;
+ char buf[128];
+ char *p = buf;
+ int i;
+
+ buf[0] = 0;
+ for (i = 0; txbits[i].name; i++)
+ if (txbits[i].flag & t)
+ p = add_str(p, txbits[i].name);
+ if (t && !(t & (PERF_TXN_SYNC|PERF_TXN_ASYNC)))
+ p = add_str(p, "NEITHER ");
+ if (t & PERF_TXN_ABORT_MASK) {
+ sprintf(p, ":%" PRIx64,
+ (t & PERF_TXN_ABORT_MASK) >>
+ PERF_TXN_ABORT_SHIFT);
+ p += strlen(p);
+ }
+
+ return repsep_snprintf(bf, size, "%-*s", width, buf);
+}
+
+struct sort_entry sort_transaction = {
+ .se_header = "Transaction ",
+ .se_cmp = sort__transaction_cmp,
+ .se_snprintf = hist_entry__transaction_snprintf,
+ .se_width_idx = HISTC_TRANSACTION,
+};
+
+/* --sort symbol_size */
+
+static int64_t _sort__sym_size_cmp(struct symbol *sym_l, struct symbol *sym_r)
+{
+ int64_t size_l = sym_l != NULL ? symbol__size(sym_l) : 0;
+ int64_t size_r = sym_r != NULL ? symbol__size(sym_r) : 0;
+
+ return size_l < size_r ? -1 :
+ size_l == size_r ? 0 : 1;
+}
+
+static int64_t
+sort__sym_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return _sort__sym_size_cmp(right->ms.sym, left->ms.sym);
+}
+
+static int _hist_entry__sym_size_snprintf(struct symbol *sym, char *bf,
+ size_t bf_size, unsigned int width)
+{
+ if (sym)
+ return repsep_snprintf(bf, bf_size, "%*d", width, symbol__size(sym));
+
+ return repsep_snprintf(bf, bf_size, "%*s", width, "unknown");
+}
+
+static int hist_entry__sym_size_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return _hist_entry__sym_size_snprintf(he->ms.sym, bf, size, width);
+}
+
+struct sort_entry sort_sym_size = {
+ .se_header = "Symbol size",
+ .se_cmp = sort__sym_size_cmp,
+ .se_snprintf = hist_entry__sym_size_snprintf,
+ .se_width_idx = HISTC_SYM_SIZE,
+};
+
+/* --sort dso_size */
+
+static int64_t _sort__dso_size_cmp(struct map *map_l, struct map *map_r)
+{
+ int64_t size_l = map_l != NULL ? map__size(map_l) : 0;
+ int64_t size_r = map_r != NULL ? map__size(map_r) : 0;
+
+ return size_l < size_r ? -1 :
+ size_l == size_r ? 0 : 1;
+}
+
+static int64_t
+sort__dso_size_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return _sort__dso_size_cmp(right->ms.map, left->ms.map);
+}
+
+static int _hist_entry__dso_size_snprintf(struct map *map, char *bf,
+ size_t bf_size, unsigned int width)
+{
+ if (map && map->dso)
+ return repsep_snprintf(bf, bf_size, "%*d", width,
+ map__size(map));
+
+ return repsep_snprintf(bf, bf_size, "%*s", width, "unknown");
+}
+
+static int hist_entry__dso_size_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width)
+{
+ return _hist_entry__dso_size_snprintf(he->ms.map, bf, size, width);
+}
+
+struct sort_entry sort_dso_size = {
+ .se_header = "DSO size",
+ .se_cmp = sort__dso_size_cmp,
+ .se_snprintf = hist_entry__dso_size_snprintf,
+ .se_width_idx = HISTC_DSO_SIZE,
+};
+
+
+struct sort_dimension {
+ const char *name;
+ struct sort_entry *entry;
+ int taken;
+};
+
+#define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension common_sort_dimensions[] = {
+ DIM(SORT_PID, "pid", sort_thread),
+ DIM(SORT_COMM, "comm", sort_comm),
+ DIM(SORT_DSO, "dso", sort_dso),
+ DIM(SORT_SYM, "symbol", sort_sym),
+ DIM(SORT_PARENT, "parent", sort_parent),
+ DIM(SORT_CPU, "cpu", sort_cpu),
+ DIM(SORT_SOCKET, "socket", sort_socket),
+ DIM(SORT_SRCLINE, "srcline", sort_srcline),
+ DIM(SORT_SRCFILE, "srcfile", sort_srcfile),
+ DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
+ DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
+ DIM(SORT_TRANSACTION, "transaction", sort_transaction),
+ DIM(SORT_TRACE, "trace", sort_trace),
+ DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
+ DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size),
+ DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
+};
+
+#undef DIM
+
+#define DIM(d, n, func) [d - __SORT_BRANCH_STACK] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension bstack_sort_dimensions[] = {
+ DIM(SORT_DSO_FROM, "dso_from", sort_dso_from),
+ DIM(SORT_DSO_TO, "dso_to", sort_dso_to),
+ DIM(SORT_SYM_FROM, "symbol_from", sort_sym_from),
+ DIM(SORT_SYM_TO, "symbol_to", sort_sym_to),
+ DIM(SORT_MISPREDICT, "mispredict", sort_mispredict),
+ DIM(SORT_IN_TX, "in_tx", sort_in_tx),
+ DIM(SORT_ABORT, "abort", sort_abort),
+ DIM(SORT_CYCLES, "cycles", sort_cycles),
+ DIM(SORT_SRCLINE_FROM, "srcline_from", sort_srcline_from),
+ DIM(SORT_SRCLINE_TO, "srcline_to", sort_srcline_to),
+};
+
+#undef DIM
+
+#define DIM(d, n, func) [d - __SORT_MEMORY_MODE] = { .name = n, .entry = &(func) }
+
+static struct sort_dimension memory_sort_dimensions[] = {
+ DIM(SORT_MEM_DADDR_SYMBOL, "symbol_daddr", sort_mem_daddr_sym),
+ DIM(SORT_MEM_IADDR_SYMBOL, "symbol_iaddr", sort_mem_iaddr_sym),
+ DIM(SORT_MEM_DADDR_DSO, "dso_daddr", sort_mem_daddr_dso),
+ DIM(SORT_MEM_LOCKED, "locked", sort_mem_locked),
+ DIM(SORT_MEM_TLB, "tlb", sort_mem_tlb),
+ DIM(SORT_MEM_LVL, "mem", sort_mem_lvl),
+ DIM(SORT_MEM_SNOOP, "snoop", sort_mem_snoop),
+ DIM(SORT_MEM_DCACHELINE, "dcacheline", sort_mem_dcacheline),
+ DIM(SORT_MEM_PHYS_DADDR, "phys_daddr", sort_mem_phys_daddr),
+};
+
+#undef DIM
+
+struct hpp_dimension {
+ const char *name;
+ struct perf_hpp_fmt *fmt;
+ int taken;
+};
+
+#define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], }
+
+static struct hpp_dimension hpp_sort_dimensions[] = {
+ DIM(PERF_HPP__OVERHEAD, "overhead"),
+ DIM(PERF_HPP__OVERHEAD_SYS, "overhead_sys"),
+ DIM(PERF_HPP__OVERHEAD_US, "overhead_us"),
+ DIM(PERF_HPP__OVERHEAD_GUEST_SYS, "overhead_guest_sys"),
+ DIM(PERF_HPP__OVERHEAD_GUEST_US, "overhead_guest_us"),
+ DIM(PERF_HPP__OVERHEAD_ACC, "overhead_children"),
+ DIM(PERF_HPP__SAMPLES, "sample"),
+ DIM(PERF_HPP__PERIOD, "period"),
+};
+
+#undef DIM
+
+struct hpp_sort_entry {
+ struct perf_hpp_fmt hpp;
+ struct sort_entry *se;
+};
+
+void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+ struct hpp_sort_entry *hse;
+
+ if (!perf_hpp__is_sort_entry(fmt))
+ return;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ hists__new_col_len(hists, hse->se->se_width_idx, strlen(fmt->name));
+}
+
+static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hists *hists, int line __maybe_unused,
+ int *span __maybe_unused)
+{
+ struct hpp_sort_entry *hse;
+ size_t len = fmt->user_len;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+
+ if (!len)
+ len = hists__col_len(hists, hse->se->se_width_idx);
+
+ return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name);
+}
+
+static int __sort__hpp_width(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct hists *hists)
+{
+ struct hpp_sort_entry *hse;
+ size_t len = fmt->user_len;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+
+ if (!len)
+ len = hists__col_len(hists, hse->se->se_width_idx);
+
+ return len;
+}
+
+static int __sort__hpp_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct hpp_sort_entry *hse;
+ size_t len = fmt->user_len;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+
+ if (!len)
+ len = hists__col_len(he->hists, hse->se->se_width_idx);
+
+ return hse->se->se_snprintf(he, hpp->buf, hpp->size, len);
+}
+
+static int64_t __sort__hpp_cmp(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct hpp_sort_entry *hse;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ return hse->se->se_cmp(a, b);
+}
+
+static int64_t __sort__hpp_collapse(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct hpp_sort_entry *hse;
+ int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *);
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ collapse_fn = hse->se->se_collapse ?: hse->se->se_cmp;
+ return collapse_fn(a, b);
+}
+
+static int64_t __sort__hpp_sort(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct hpp_sort_entry *hse;
+ int64_t (*sort_fn)(struct hist_entry *, struct hist_entry *);
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ sort_fn = hse->se->se_sort ?: hse->se->se_cmp;
+ return sort_fn(a, b);
+}
+
+bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format)
+{
+ return format->header == __sort__hpp_header;
+}
+
+#define MK_SORT_ENTRY_CHK(key) \
+bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt) \
+{ \
+ struct hpp_sort_entry *hse; \
+ \
+ if (!perf_hpp__is_sort_entry(fmt)) \
+ return false; \
+ \
+ hse = container_of(fmt, struct hpp_sort_entry, hpp); \
+ return hse->se == &sort_ ## key ; \
+}
+
+MK_SORT_ENTRY_CHK(trace)
+MK_SORT_ENTRY_CHK(srcline)
+MK_SORT_ENTRY_CHK(srcfile)
+MK_SORT_ENTRY_CHK(thread)
+MK_SORT_ENTRY_CHK(comm)
+MK_SORT_ENTRY_CHK(dso)
+MK_SORT_ENTRY_CHK(sym)
+
+
+static bool __sort__hpp_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+ struct hpp_sort_entry *hse_a;
+ struct hpp_sort_entry *hse_b;
+
+ if (!perf_hpp__is_sort_entry(a) || !perf_hpp__is_sort_entry(b))
+ return false;
+
+ hse_a = container_of(a, struct hpp_sort_entry, hpp);
+ hse_b = container_of(b, struct hpp_sort_entry, hpp);
+
+ return hse_a->se == hse_b->se;
+}
+
+static void hse_free(struct perf_hpp_fmt *fmt)
+{
+ struct hpp_sort_entry *hse;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ free(hse);
+}
+
+static struct hpp_sort_entry *
+__sort_dimension__alloc_hpp(struct sort_dimension *sd, int level)
+{
+ struct hpp_sort_entry *hse;
+
+ hse = malloc(sizeof(*hse));
+ if (hse == NULL) {
+ pr_err("Memory allocation failed\n");
+ return NULL;
+ }
+
+ hse->se = sd->entry;
+ hse->hpp.name = sd->entry->se_header;
+ hse->hpp.header = __sort__hpp_header;
+ hse->hpp.width = __sort__hpp_width;
+ hse->hpp.entry = __sort__hpp_entry;
+ hse->hpp.color = NULL;
+
+ hse->hpp.cmp = __sort__hpp_cmp;
+ hse->hpp.collapse = __sort__hpp_collapse;
+ hse->hpp.sort = __sort__hpp_sort;
+ hse->hpp.equal = __sort__hpp_equal;
+ hse->hpp.free = hse_free;
+
+ INIT_LIST_HEAD(&hse->hpp.list);
+ INIT_LIST_HEAD(&hse->hpp.sort_list);
+ hse->hpp.elide = false;
+ hse->hpp.len = 0;
+ hse->hpp.user_len = 0;
+ hse->hpp.level = level;
+
+ return hse;
+}
+
+static void hpp_free(struct perf_hpp_fmt *fmt)
+{
+ free(fmt);
+}
+
+static struct perf_hpp_fmt *__hpp_dimension__alloc_hpp(struct hpp_dimension *hd,
+ int level)
+{
+ struct perf_hpp_fmt *fmt;
+
+ fmt = memdup(hd->fmt, sizeof(*fmt));
+ if (fmt) {
+ INIT_LIST_HEAD(&fmt->list);
+ INIT_LIST_HEAD(&fmt->sort_list);
+ fmt->free = hpp_free;
+ fmt->level = level;
+ }
+
+ return fmt;
+}
+
+int hist_entry__filter(struct hist_entry *he, int type, const void *arg)
+{
+ struct perf_hpp_fmt *fmt;
+ struct hpp_sort_entry *hse;
+ int ret = -1;
+ int r;
+
+ perf_hpp_list__for_each_format(he->hpp_list, fmt) {
+ if (!perf_hpp__is_sort_entry(fmt))
+ continue;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ if (hse->se->se_filter == NULL)
+ continue;
+
+ /*
+ * hist entry is filtered if any of sort key in the hpp list
+ * is applied. But it should skip non-matched filter types.
+ */
+ r = hse->se->se_filter(he, type, arg);
+ if (r >= 0) {
+ if (ret < 0)
+ ret = 0;
+ ret |= r;
+ }
+ }
+
+ return ret;
+}
+
+static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd,
+ struct perf_hpp_list *list,
+ int level)
+{
+ struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level);
+
+ if (hse == NULL)
+ return -1;
+
+ perf_hpp_list__register_sort_field(list, &hse->hpp);
+ return 0;
+}
+
+static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
+ struct perf_hpp_list *list)
+{
+ struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0);
+
+ if (hse == NULL)
+ return -1;
+
+ perf_hpp_list__column_register(list, &hse->hpp);
+ return 0;
+}
+
+struct hpp_dynamic_entry {
+ struct perf_hpp_fmt hpp;
+ struct perf_evsel *evsel;
+ struct format_field *field;
+ unsigned dynamic_len;
+ bool raw_trace;
+};
+
+static int hde_width(struct hpp_dynamic_entry *hde)
+{
+ if (!hde->hpp.len) {
+ int len = hde->dynamic_len;
+ int namelen = strlen(hde->field->name);
+ int fieldlen = hde->field->size;
+
+ if (namelen > len)
+ len = namelen;
+
+ if (!(hde->field->flags & FIELD_IS_STRING)) {
+ /* length for print hex numbers */
+ fieldlen = hde->field->size * 2 + 2;
+ }
+ if (fieldlen > len)
+ len = fieldlen;
+
+ hde->hpp.len = len;
+ }
+ return hde->hpp.len;
+}
+
+static void update_dynamic_len(struct hpp_dynamic_entry *hde,
+ struct hist_entry *he)
+{
+ char *str, *pos;
+ struct format_field *field = hde->field;
+ size_t namelen;
+ bool last = false;
+
+ if (hde->raw_trace)
+ return;
+
+ /* parse pretty print result and update max length */
+ if (!he->trace_output)
+ he->trace_output = get_trace_output(he);
+
+ namelen = strlen(field->name);
+ str = he->trace_output;
+
+ while (str) {
+ pos = strchr(str, ' ');
+ if (pos == NULL) {
+ last = true;
+ pos = str + strlen(str);
+ }
+
+ if (!strncmp(str, field->name, namelen)) {
+ size_t len;
+
+ str += namelen + 1;
+ len = pos - str;
+
+ if (len > hde->dynamic_len)
+ hde->dynamic_len = len;
+ break;
+ }
+
+ if (last)
+ str = NULL;
+ else
+ str = pos + 1;
+ }
+}
+
+static int __sort__hde_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hists *hists __maybe_unused,
+ int line __maybe_unused,
+ int *span __maybe_unused)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ return scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, hde->field->name);
+}
+
+static int __sort__hde_width(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct hists *hists __maybe_unused)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ return len;
+}
+
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt, struct hists *hists)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ return hists_to_evsel(hists) == hde->evsel;
+}
+
+static int __sort__hde_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct hpp_dynamic_entry *hde;
+ size_t len = fmt->user_len;
+ char *str, *pos;
+ struct format_field *field;
+ size_t namelen;
+ bool last = false;
+ int ret;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (!len)
+ len = hde_width(hde);
+
+ if (hde->raw_trace)
+ goto raw_field;
+
+ if (!he->trace_output)
+ he->trace_output = get_trace_output(he);
+
+ field = hde->field;
+ namelen = strlen(field->name);
+ str = he->trace_output;
+
+ while (str) {
+ pos = strchr(str, ' ');
+ if (pos == NULL) {
+ last = true;
+ pos = str + strlen(str);
+ }
+
+ if (!strncmp(str, field->name, namelen)) {
+ str += namelen + 1;
+ str = strndup(str, pos - str);
+
+ if (str == NULL)
+ return scnprintf(hpp->buf, hpp->size,
+ "%*.*s", len, len, "ERROR");
+ break;
+ }
+
+ if (last)
+ str = NULL;
+ else
+ str = pos + 1;
+ }
+
+ if (str == NULL) {
+ struct trace_seq seq;
+raw_field:
+ trace_seq_init(&seq);
+ tep_print_field(&seq, he->raw_data, hde->field);
+ str = seq.buffer;
+ }
+
+ ret = scnprintf(hpp->buf, hpp->size, "%*.*s", len, len, str);
+ free(str);
+ return ret;
+}
+
+static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
+ struct hist_entry *a, struct hist_entry *b)
+{
+ struct hpp_dynamic_entry *hde;
+ struct format_field *field;
+ unsigned offset, size;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+
+ if (b == NULL) {
+ update_dynamic_len(hde, a);
+ return 0;
+ }
+
+ field = hde->field;
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ unsigned long long dyn;
+
+ tep_read_number_field(field, a->raw_data, &dyn);
+ offset = dyn & 0xffff;
+ size = (dyn >> 16) & 0xffff;
+
+ /* record max width for output */
+ if (size > hde->dynamic_len)
+ hde->dynamic_len = size;
+ } else {
+ offset = field->offset;
+ size = field->size;
+ }
+
+ return memcmp(a->raw_data + offset, b->raw_data + offset, size);
+}
+
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt)
+{
+ return fmt->cmp == __sort__hde_cmp;
+}
+
+static bool __sort__hde_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b)
+{
+ struct hpp_dynamic_entry *hde_a;
+ struct hpp_dynamic_entry *hde_b;
+
+ if (!perf_hpp__is_dynamic_entry(a) || !perf_hpp__is_dynamic_entry(b))
+ return false;
+
+ hde_a = container_of(a, struct hpp_dynamic_entry, hpp);
+ hde_b = container_of(b, struct hpp_dynamic_entry, hpp);
+
+ return hde_a->field == hde_b->field;
+}
+
+static void hde_free(struct perf_hpp_fmt *fmt)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+ free(hde);
+}
+
+static struct hpp_dynamic_entry *
+__alloc_dynamic_entry(struct perf_evsel *evsel, struct format_field *field,
+ int level)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = malloc(sizeof(*hde));
+ if (hde == NULL) {
+ pr_debug("Memory allocation failed\n");
+ return NULL;
+ }
+
+ hde->evsel = evsel;
+ hde->field = field;
+ hde->dynamic_len = 0;
+
+ hde->hpp.name = field->name;
+ hde->hpp.header = __sort__hde_header;
+ hde->hpp.width = __sort__hde_width;
+ hde->hpp.entry = __sort__hde_entry;
+ hde->hpp.color = NULL;
+
+ hde->hpp.cmp = __sort__hde_cmp;
+ hde->hpp.collapse = __sort__hde_cmp;
+ hde->hpp.sort = __sort__hde_cmp;
+ hde->hpp.equal = __sort__hde_equal;
+ hde->hpp.free = hde_free;
+
+ INIT_LIST_HEAD(&hde->hpp.list);
+ INIT_LIST_HEAD(&hde->hpp.sort_list);
+ hde->hpp.elide = false;
+ hde->hpp.len = 0;
+ hde->hpp.user_len = 0;
+ hde->hpp.level = level;
+
+ return hde;
+}
+
+struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt)
+{
+ struct perf_hpp_fmt *new_fmt = NULL;
+
+ if (perf_hpp__is_sort_entry(fmt)) {
+ struct hpp_sort_entry *hse, *new_hse;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ new_hse = memdup(hse, sizeof(*hse));
+ if (new_hse)
+ new_fmt = &new_hse->hpp;
+ } else if (perf_hpp__is_dynamic_entry(fmt)) {
+ struct hpp_dynamic_entry *hde, *new_hde;
+
+ hde = container_of(fmt, struct hpp_dynamic_entry, hpp);
+ new_hde = memdup(hde, sizeof(*hde));
+ if (new_hde)
+ new_fmt = &new_hde->hpp;
+ } else {
+ new_fmt = memdup(fmt, sizeof(*fmt));
+ }
+
+ INIT_LIST_HEAD(&new_fmt->list);
+ INIT_LIST_HEAD(&new_fmt->sort_list);
+
+ return new_fmt;
+}
+
+static int parse_field_name(char *str, char **event, char **field, char **opt)
+{
+ char *event_name, *field_name, *opt_name;
+
+ event_name = str;
+ field_name = strchr(str, '.');
+
+ if (field_name) {
+ *field_name++ = '\0';
+ } else {
+ event_name = NULL;
+ field_name = str;
+ }
+
+ opt_name = strchr(field_name, '/');
+ if (opt_name)
+ *opt_name++ = '\0';
+
+ *event = event_name;
+ *field = field_name;
+ *opt = opt_name;
+
+ return 0;
+}
+
+/* find match evsel using a given event name. The event name can be:
+ * 1. '%' + event index (e.g. '%1' for first event)
+ * 2. full event name (e.g. sched:sched_switch)
+ * 3. partial event name (should not contain ':')
+ */
+static struct perf_evsel *find_evsel(struct perf_evlist *evlist, char *event_name)
+{
+ struct perf_evsel *evsel = NULL;
+ struct perf_evsel *pos;
+ bool full_name;
+
+ /* case 1 */
+ if (event_name[0] == '%') {
+ int nr = strtol(event_name+1, NULL, 0);
+
+ if (nr > evlist->nr_entries)
+ return NULL;
+
+ evsel = perf_evlist__first(evlist);
+ while (--nr > 0)
+ evsel = perf_evsel__next(evsel);
+
+ return evsel;
+ }
+
+ full_name = !!strchr(event_name, ':');
+ evlist__for_each_entry(evlist, pos) {
+ /* case 2 */
+ if (full_name && !strcmp(pos->name, event_name))
+ return pos;
+ /* case 3 */
+ if (!full_name && strstr(pos->name, event_name)) {
+ if (evsel) {
+ pr_debug("'%s' event is ambiguous: it can be %s or %s\n",
+ event_name, evsel->name, pos->name);
+ return NULL;
+ }
+ evsel = pos;
+ }
+ }
+
+ return evsel;
+}
+
+static int __dynamic_dimension__add(struct perf_evsel *evsel,
+ struct format_field *field,
+ bool raw_trace, int level)
+{
+ struct hpp_dynamic_entry *hde;
+
+ hde = __alloc_dynamic_entry(evsel, field, level);
+ if (hde == NULL)
+ return -ENOMEM;
+
+ hde->raw_trace = raw_trace;
+
+ perf_hpp__register_sort_field(&hde->hpp);
+ return 0;
+}
+
+static int add_evsel_fields(struct perf_evsel *evsel, bool raw_trace, int level)
+{
+ int ret;
+ struct format_field *field;
+
+ field = evsel->tp_format->format.fields;
+ while (field) {
+ ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
+ if (ret < 0)
+ return ret;
+
+ field = field->next;
+ }
+ return 0;
+}
+
+static int add_all_dynamic_fields(struct perf_evlist *evlist, bool raw_trace,
+ int level)
+{
+ int ret;
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+
+ ret = add_evsel_fields(evsel, raw_trace, level);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+static int add_all_matching_fields(struct perf_evlist *evlist,
+ char *field_name, bool raw_trace, int level)
+{
+ int ret = -ESRCH;
+ struct perf_evsel *evsel;
+ struct format_field *field;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+
+ field = tep_find_any_field(evsel->tp_format, field_name);
+ if (field == NULL)
+ continue;
+
+ ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+
+static int add_dynamic_entry(struct perf_evlist *evlist, const char *tok,
+ int level)
+{
+ char *str, *event_name, *field_name, *opt_name;
+ struct perf_evsel *evsel;
+ struct format_field *field;
+ bool raw_trace = symbol_conf.raw_trace;
+ int ret = 0;
+
+ if (evlist == NULL)
+ return -ENOENT;
+
+ str = strdup(tok);
+ if (str == NULL)
+ return -ENOMEM;
+
+ if (parse_field_name(str, &event_name, &field_name, &opt_name) < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (opt_name) {
+ if (strcmp(opt_name, "raw")) {
+ pr_debug("unsupported field option %s\n", opt_name);
+ ret = -EINVAL;
+ goto out;
+ }
+ raw_trace = true;
+ }
+
+ if (!strcmp(field_name, "trace_fields")) {
+ ret = add_all_dynamic_fields(evlist, raw_trace, level);
+ goto out;
+ }
+
+ if (event_name == NULL) {
+ ret = add_all_matching_fields(evlist, field_name, raw_trace, level);
+ goto out;
+ }
+
+ evsel = find_evsel(evlist, event_name);
+ if (evsel == NULL) {
+ pr_debug("Cannot find event: %s\n", event_name);
+ ret = -ENOENT;
+ goto out;
+ }
+
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+ pr_debug("%s is not a tracepoint event\n", event_name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!strcmp(field_name, "*")) {
+ ret = add_evsel_fields(evsel, raw_trace, level);
+ } else {
+ field = tep_find_any_field(evsel->tp_format, field_name);
+ if (field == NULL) {
+ pr_debug("Cannot find event field for %s.%s\n",
+ event_name, field_name);
+ return -ENOENT;
+ }
+
+ ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
+ }
+
+out:
+ free(str);
+ return ret;
+}
+
+static int __sort_dimension__add(struct sort_dimension *sd,
+ struct perf_hpp_list *list,
+ int level)
+{
+ if (sd->taken)
+ return 0;
+
+ if (__sort_dimension__add_hpp_sort(sd, list, level) < 0)
+ return -1;
+
+ if (sd->entry->se_collapse)
+ list->need_collapse = 1;
+
+ sd->taken = 1;
+
+ return 0;
+}
+
+static int __hpp_dimension__add(struct hpp_dimension *hd,
+ struct perf_hpp_list *list,
+ int level)
+{
+ struct perf_hpp_fmt *fmt;
+
+ if (hd->taken)
+ return 0;
+
+ fmt = __hpp_dimension__alloc_hpp(hd, level);
+ if (!fmt)
+ return -1;
+
+ hd->taken = 1;
+ perf_hpp_list__register_sort_field(list, fmt);
+ return 0;
+}
+
+static int __sort_dimension__add_output(struct perf_hpp_list *list,
+ struct sort_dimension *sd)
+{
+ if (sd->taken)
+ return 0;
+
+ if (__sort_dimension__add_hpp_output(sd, list) < 0)
+ return -1;
+
+ sd->taken = 1;
+ return 0;
+}
+
+static int __hpp_dimension__add_output(struct perf_hpp_list *list,
+ struct hpp_dimension *hd)
+{
+ struct perf_hpp_fmt *fmt;
+
+ if (hd->taken)
+ return 0;
+
+ fmt = __hpp_dimension__alloc_hpp(hd, 0);
+ if (!fmt)
+ return -1;
+
+ hd->taken = 1;
+ perf_hpp_list__column_register(list, fmt);
+ return 0;
+}
+
+int hpp_dimension__add_output(unsigned col)
+{
+ BUG_ON(col >= PERF_HPP__MAX_INDEX);
+ return __hpp_dimension__add_output(&perf_hpp_list, &hpp_sort_dimensions[col]);
+}
+
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+ struct perf_evlist *evlist,
+ int level)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
+ struct sort_dimension *sd = &common_sort_dimensions[i];
+
+ if (strncasecmp(tok, sd->name, strlen(tok)))
+ continue;
+
+ if (sd->entry == &sort_parent) {
+ int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED);
+ if (ret) {
+ char err[BUFSIZ];
+
+ regerror(ret, &parent_regex, err, sizeof(err));
+ pr_err("Invalid regex: %s\n%s", parent_pattern, err);
+ return -EINVAL;
+ }
+ list->parent = 1;
+ } else if (sd->entry == &sort_sym) {
+ list->sym = 1;
+ /*
+ * perf diff displays the performance difference amongst
+ * two or more perf.data files. Those files could come
+ * from different binaries. So we should not compare
+ * their ips, but the name of symbol.
+ */
+ if (sort__mode == SORT_MODE__DIFF)
+ sd->entry->se_collapse = sort__sym_sort;
+
+ } else if (sd->entry == &sort_dso) {
+ list->dso = 1;
+ } else if (sd->entry == &sort_socket) {
+ list->socket = 1;
+ } else if (sd->entry == &sort_thread) {
+ list->thread = 1;
+ } else if (sd->entry == &sort_comm) {
+ list->comm = 1;
+ }
+
+ return __sort_dimension__add(sd, list, level);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+ struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+
+ if (strncasecmp(tok, hd->name, strlen(tok)))
+ continue;
+
+ return __hpp_dimension__add(hd, list, level);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
+ struct sort_dimension *sd = &bstack_sort_dimensions[i];
+
+ if (strncasecmp(tok, sd->name, strlen(tok)))
+ continue;
+
+ if (sort__mode != SORT_MODE__BRANCH)
+ return -EINVAL;
+
+ if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to)
+ list->sym = 1;
+
+ __sort_dimension__add(sd, list, level);
+ return 0;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
+ struct sort_dimension *sd = &memory_sort_dimensions[i];
+
+ if (strncasecmp(tok, sd->name, strlen(tok)))
+ continue;
+
+ if (sort__mode != SORT_MODE__MEMORY)
+ return -EINVAL;
+
+ if (sd->entry == &sort_mem_dcacheline && cacheline_size() == 0)
+ return -EINVAL;
+
+ if (sd->entry == &sort_mem_daddr_sym)
+ list->sym = 1;
+
+ __sort_dimension__add(sd, list, level);
+ return 0;
+ }
+
+ if (!add_dynamic_entry(evlist, tok, level))
+ return 0;
+
+ return -ESRCH;
+}
+
+static int setup_sort_list(struct perf_hpp_list *list, char *str,
+ struct perf_evlist *evlist)
+{
+ char *tmp, *tok;
+ int ret = 0;
+ int level = 0;
+ int next_level = 1;
+ bool in_group = false;
+
+ do {
+ tok = str;
+ tmp = strpbrk(str, "{}, ");
+ if (tmp) {
+ if (in_group)
+ next_level = level;
+ else
+ next_level = level + 1;
+
+ if (*tmp == '{')
+ in_group = true;
+ else if (*tmp == '}')
+ in_group = false;
+
+ *tmp = '\0';
+ str = tmp + 1;
+ }
+
+ if (*tok) {
+ ret = sort_dimension__add(list, tok, evlist, level);
+ if (ret == -EINVAL) {
+ if (!cacheline_size() && !strncasecmp(tok, "dcacheline", strlen(tok)))
+ pr_err("The \"dcacheline\" --sort key needs to know the cacheline size and it couldn't be determined on this system");
+ else
+ pr_err("Invalid --sort key: `%s'", tok);
+ break;
+ } else if (ret == -ESRCH) {
+ pr_err("Unknown --sort key: `%s'", tok);
+ break;
+ }
+ }
+
+ level = next_level;
+ } while (tmp);
+
+ return ret;
+}
+
+static const char *get_default_sort_order(struct perf_evlist *evlist)
+{
+ const char *default_sort_orders[] = {
+ default_sort_order,
+ default_branch_sort_order,
+ default_mem_sort_order,
+ default_top_sort_order,
+ default_diff_sort_order,
+ default_tracepoint_sort_order,
+ };
+ bool use_trace = true;
+ struct perf_evsel *evsel;
+
+ BUG_ON(sort__mode >= ARRAY_SIZE(default_sort_orders));
+
+ if (evlist == NULL || perf_evlist__empty(evlist))
+ goto out_no_evlist;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->attr.type != PERF_TYPE_TRACEPOINT) {
+ use_trace = false;
+ break;
+ }
+ }
+
+ if (use_trace) {
+ sort__mode = SORT_MODE__TRACEPOINT;
+ if (symbol_conf.raw_trace)
+ return "trace_fields";
+ }
+out_no_evlist:
+ return default_sort_orders[sort__mode];
+}
+
+static int setup_sort_order(struct perf_evlist *evlist)
+{
+ char *new_sort_order;
+
+ /*
+ * Append '+'-prefixed sort order to the default sort
+ * order string.
+ */
+ if (!sort_order || is_strict_order(sort_order))
+ return 0;
+
+ if (sort_order[1] == '\0') {
+ pr_err("Invalid --sort key: `+'");
+ return -EINVAL;
+ }
+
+ /*
+ * We allocate new sort_order string, but we never free it,
+ * because it's checked over the rest of the code.
+ */
+ if (asprintf(&new_sort_order, "%s,%s",
+ get_default_sort_order(evlist), sort_order + 1) < 0) {
+ pr_err("Not enough memory to set up --sort");
+ return -ENOMEM;
+ }
+
+ sort_order = new_sort_order;
+ return 0;
+}
+
+/*
+ * Adds 'pre,' prefix into 'str' is 'pre' is
+ * not already part of 'str'.
+ */
+static char *prefix_if_not_in(const char *pre, char *str)
+{
+ char *n;
+
+ if (!str || strstr(str, pre))
+ return str;
+
+ if (asprintf(&n, "%s,%s", pre, str) < 0)
+ n = NULL;
+
+ free(str);
+ return n;
+}
+
+static char *setup_overhead(char *keys)
+{
+ if (sort__mode == SORT_MODE__DIFF)
+ return keys;
+
+ keys = prefix_if_not_in("overhead", keys);
+
+ if (symbol_conf.cumulate_callchain)
+ keys = prefix_if_not_in("overhead_children", keys);
+
+ return keys;
+}
+
+static int __setup_sorting(struct perf_evlist *evlist)
+{
+ char *str;
+ const char *sort_keys;
+ int ret = 0;
+
+ ret = setup_sort_order(evlist);
+ if (ret)
+ return ret;
+
+ sort_keys = sort_order;
+ if (sort_keys == NULL) {
+ if (is_strict_order(field_order)) {
+ /*
+ * If user specified field order but no sort order,
+ * we'll honor it and not add default sort orders.
+ */
+ return 0;
+ }
+
+ sort_keys = get_default_sort_order(evlist);
+ }
+
+ str = strdup(sort_keys);
+ if (str == NULL) {
+ pr_err("Not enough memory to setup sort keys");
+ return -ENOMEM;
+ }
+
+ /*
+ * Prepend overhead fields for backward compatibility.
+ */
+ if (!is_strict_order(field_order)) {
+ str = setup_overhead(str);
+ if (str == NULL) {
+ pr_err("Not enough memory to setup overhead keys");
+ return -ENOMEM;
+ }
+ }
+
+ ret = setup_sort_list(&perf_hpp_list, str, evlist);
+
+ free(str);
+ return ret;
+}
+
+void perf_hpp__set_elide(int idx, bool elide)
+{
+ struct perf_hpp_fmt *fmt;
+ struct hpp_sort_entry *hse;
+
+ perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+ if (!perf_hpp__is_sort_entry(fmt))
+ continue;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ if (hse->se->se_width_idx == idx) {
+ fmt->elide = elide;
+ break;
+ }
+ }
+}
+
+static bool __get_elide(struct strlist *list, const char *list_name, FILE *fp)
+{
+ if (list && strlist__nr_entries(list) == 1) {
+ if (fp != NULL)
+ fprintf(fp, "# %s: %s\n", list_name,
+ strlist__entry(list, 0)->s);
+ return true;
+ }
+ return false;
+}
+
+static bool get_elide(int idx, FILE *output)
+{
+ switch (idx) {
+ case HISTC_SYMBOL:
+ return __get_elide(symbol_conf.sym_list, "symbol", output);
+ case HISTC_DSO:
+ return __get_elide(symbol_conf.dso_list, "dso", output);
+ case HISTC_COMM:
+ return __get_elide(symbol_conf.comm_list, "comm", output);
+ default:
+ break;
+ }
+
+ if (sort__mode != SORT_MODE__BRANCH)
+ return false;
+
+ switch (idx) {
+ case HISTC_SYMBOL_FROM:
+ return __get_elide(symbol_conf.sym_from_list, "sym_from", output);
+ case HISTC_SYMBOL_TO:
+ return __get_elide(symbol_conf.sym_to_list, "sym_to", output);
+ case HISTC_DSO_FROM:
+ return __get_elide(symbol_conf.dso_from_list, "dso_from", output);
+ case HISTC_DSO_TO:
+ return __get_elide(symbol_conf.dso_to_list, "dso_to", output);
+ default:
+ break;
+ }
+
+ return false;
+}
+
+void sort__setup_elide(FILE *output)
+{
+ struct perf_hpp_fmt *fmt;
+ struct hpp_sort_entry *hse;
+
+ perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+ if (!perf_hpp__is_sort_entry(fmt))
+ continue;
+
+ hse = container_of(fmt, struct hpp_sort_entry, hpp);
+ fmt->elide = get_elide(hse->se->se_width_idx, output);
+ }
+
+ /*
+ * It makes no sense to elide all of sort entries.
+ * Just revert them to show up again.
+ */
+ perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+ if (!perf_hpp__is_sort_entry(fmt))
+ continue;
+
+ if (!fmt->elide)
+ return;
+ }
+
+ perf_hpp_list__for_each_format(&perf_hpp_list, fmt) {
+ if (!perf_hpp__is_sort_entry(fmt))
+ continue;
+
+ fmt->elide = false;
+ }
+}
+
+int output_field_add(struct perf_hpp_list *list, char *tok)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
+ struct sort_dimension *sd = &common_sort_dimensions[i];
+
+ if (strncasecmp(tok, sd->name, strlen(tok)))
+ continue;
+
+ return __sort_dimension__add_output(list, sd);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) {
+ struct hpp_dimension *hd = &hpp_sort_dimensions[i];
+
+ if (strncasecmp(tok, hd->name, strlen(tok)))
+ continue;
+
+ return __hpp_dimension__add_output(list, hd);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
+ struct sort_dimension *sd = &bstack_sort_dimensions[i];
+
+ if (strncasecmp(tok, sd->name, strlen(tok)))
+ continue;
+
+ return __sort_dimension__add_output(list, sd);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
+ struct sort_dimension *sd = &memory_sort_dimensions[i];
+
+ if (strncasecmp(tok, sd->name, strlen(tok)))
+ continue;
+
+ return __sort_dimension__add_output(list, sd);
+ }
+
+ return -ESRCH;
+}
+
+static int setup_output_list(struct perf_hpp_list *list, char *str)
+{
+ char *tmp, *tok;
+ int ret = 0;
+
+ for (tok = strtok_r(str, ", ", &tmp);
+ tok; tok = strtok_r(NULL, ", ", &tmp)) {
+ ret = output_field_add(list, tok);
+ if (ret == -EINVAL) {
+ ui__error("Invalid --fields key: `%s'", tok);
+ break;
+ } else if (ret == -ESRCH) {
+ ui__error("Unknown --fields key: `%s'", tok);
+ break;
+ }
+ }
+
+ return ret;
+}
+
+void reset_dimensions(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++)
+ common_sort_dimensions[i].taken = 0;
+
+ for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++)
+ hpp_sort_dimensions[i].taken = 0;
+
+ for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++)
+ bstack_sort_dimensions[i].taken = 0;
+
+ for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++)
+ memory_sort_dimensions[i].taken = 0;
+}
+
+bool is_strict_order(const char *order)
+{
+ return order && (*order != '+');
+}
+
+static int __setup_output_field(void)
+{
+ char *str, *strp;
+ int ret = -EINVAL;
+
+ if (field_order == NULL)
+ return 0;
+
+ strp = str = strdup(field_order);
+ if (str == NULL) {
+ pr_err("Not enough memory to setup output fields");
+ return -ENOMEM;
+ }
+
+ if (!is_strict_order(field_order))
+ strp++;
+
+ if (!strlen(strp)) {
+ pr_err("Invalid --fields key: `+'");
+ goto out;
+ }
+
+ ret = setup_output_list(&perf_hpp_list, strp);
+
+out:
+ free(str);
+ return ret;
+}
+
+int setup_sorting(struct perf_evlist *evlist)
+{
+ int err;
+
+ err = __setup_sorting(evlist);
+ if (err < 0)
+ return err;
+
+ if (parent_pattern != default_parent_pattern) {
+ err = sort_dimension__add(&perf_hpp_list, "parent", evlist, -1);
+ if (err < 0)
+ return err;
+ }
+
+ reset_dimensions();
+
+ /*
+ * perf diff doesn't use default hpp output fields.
+ */
+ if (sort__mode != SORT_MODE__DIFF)
+ perf_hpp__init();
+
+ err = __setup_output_field();
+ if (err < 0)
+ return err;
+
+ /* copy sort keys to output fields */
+ perf_hpp__setup_output_field(&perf_hpp_list);
+ /* and then copy output fields to sort keys */
+ perf_hpp__append_sort_keys(&perf_hpp_list);
+
+ /* setup hists-specific output fields */
+ if (perf_hpp__setup_hists_formats(&perf_hpp_list, evlist) < 0)
+ return -1;
+
+ return 0;
+}
+
+void reset_output_field(void)
+{
+ perf_hpp_list.need_collapse = 0;
+ perf_hpp_list.parent = 0;
+ perf_hpp_list.sym = 0;
+ perf_hpp_list.dso = 0;
+
+ field_order = NULL;
+ sort_order = NULL;
+
+ reset_dimensions();
+ perf_hpp__reset_output_field(&perf_hpp_list);
+}
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
new file mode 100644
index 000000000..a97cf8e6b
--- /dev/null
+++ b/tools/perf/util/sort.h
@@ -0,0 +1,303 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SORT_H
+#define __PERF_SORT_H
+#include "../builtin.h"
+
+#include <regex.h>
+
+#include "color.h"
+#include <linux/list.h>
+#include "cache.h"
+#include <linux/rbtree.h>
+#include "symbol.h"
+#include "string.h"
+#include "callchain.h"
+#include "values.h"
+
+#include "../perf.h"
+#include "debug.h"
+#include "header.h"
+
+#include <subcmd/parse-options.h>
+#include "parse-events.h"
+#include "hist.h"
+#include "srcline.h"
+
+struct thread;
+
+extern regex_t parent_regex;
+extern const char *sort_order;
+extern const char *field_order;
+extern const char default_parent_pattern[];
+extern const char *parent_pattern;
+extern const char *default_sort_order;
+extern regex_t ignore_callees_regex;
+extern int have_ignore_callees;
+extern enum sort_mode sort__mode;
+extern struct sort_entry sort_comm;
+extern struct sort_entry sort_dso;
+extern struct sort_entry sort_sym;
+extern struct sort_entry sort_parent;
+extern struct sort_entry sort_dso_from;
+extern struct sort_entry sort_dso_to;
+extern struct sort_entry sort_sym_from;
+extern struct sort_entry sort_sym_to;
+extern struct sort_entry sort_srcline;
+extern enum sort_type sort__first_dimension;
+extern const char default_mem_sort_order[];
+
+struct he_stat {
+ u64 period;
+ u64 period_sys;
+ u64 period_us;
+ u64 period_guest_sys;
+ u64 period_guest_us;
+ u64 weight;
+ u32 nr_events;
+};
+
+struct namespace_id {
+ u64 dev;
+ u64 ino;
+};
+
+struct hist_entry_diff {
+ bool computed;
+ union {
+ /* PERF_HPP__DELTA */
+ double period_ratio_delta;
+
+ /* PERF_HPP__RATIO */
+ double period_ratio;
+
+ /* HISTC_WEIGHTED_DIFF */
+ s64 wdiff;
+ };
+};
+
+struct hist_entry_ops {
+ void *(*new)(size_t size);
+ void (*free)(void *ptr);
+};
+
+/**
+ * struct hist_entry - histogram entry
+ *
+ * @row_offset - offset from the first callchain expanded to appear on screen
+ * @nr_rows - rows expanded in callchain, recalculated on folding/unfolding
+ */
+struct hist_entry {
+ struct rb_node rb_node_in;
+ struct rb_node rb_node;
+ union {
+ struct list_head node;
+ struct list_head head;
+ } pairs;
+ struct he_stat stat;
+ struct he_stat *stat_acc;
+ struct map_symbol ms;
+ struct thread *thread;
+ struct comm *comm;
+ struct namespace_id cgroup_id;
+ u64 ip;
+ u64 transaction;
+ s32 socket;
+ s32 cpu;
+ u8 cpumode;
+ u8 depth;
+
+ /* We are added by hists__add_dummy_entry. */
+ bool dummy;
+ bool leaf;
+
+ char level;
+ u8 filtered;
+
+ u16 callchain_size;
+ union {
+ /*
+ * Since perf diff only supports the stdio output, TUI
+ * fields are only accessed from perf report (or perf
+ * top). So make it a union to reduce memory usage.
+ */
+ struct hist_entry_diff diff;
+ struct /* for TUI */ {
+ u16 row_offset;
+ u16 nr_rows;
+ bool init_have_children;
+ bool unfolded;
+ bool has_children;
+ bool has_no_entry;
+ };
+ };
+ char *srcline;
+ char *srcfile;
+ struct symbol *parent;
+ struct branch_info *branch_info;
+ struct hists *hists;
+ struct mem_info *mem_info;
+ void *raw_data;
+ u32 raw_size;
+ void *trace_output;
+ struct perf_hpp_list *hpp_list;
+ struct hist_entry *parent_he;
+ struct hist_entry_ops *ops;
+ union {
+ /* this is for hierarchical entry structure */
+ struct {
+ struct rb_root hroot_in;
+ struct rb_root hroot_out;
+ }; /* non-leaf entries */
+ struct rb_root sorted_chain; /* leaf entry has callchains */
+ };
+ struct callchain_root callchain[0]; /* must be last member */
+};
+
+static __pure inline bool hist_entry__has_callchains(struct hist_entry *he)
+{
+ return he->callchain_size != 0;
+}
+
+static inline bool hist_entry__has_pairs(struct hist_entry *he)
+{
+ return !list_empty(&he->pairs.node);
+}
+
+static inline struct hist_entry *hist_entry__next_pair(struct hist_entry *he)
+{
+ if (hist_entry__has_pairs(he))
+ return list_entry(he->pairs.node.next, struct hist_entry, pairs.node);
+ return NULL;
+}
+
+static inline void hist_entry__add_pair(struct hist_entry *pair,
+ struct hist_entry *he)
+{
+ list_add_tail(&pair->pairs.node, &he->pairs.head);
+}
+
+static inline float hist_entry__get_percent_limit(struct hist_entry *he)
+{
+ u64 period = he->stat.period;
+ u64 total_period = hists__total_period(he->hists);
+
+ if (unlikely(total_period == 0))
+ return 0;
+
+ if (symbol_conf.cumulate_callchain)
+ period = he->stat_acc->period;
+
+ return period * 100.0 / total_period;
+}
+
+static inline u64 cl_address(u64 address)
+{
+ /* return the cacheline of the address */
+ return (address & ~(cacheline_size() - 1));
+}
+
+static inline u64 cl_offset(u64 address)
+{
+ /* return the cacheline of the address */
+ return (address & (cacheline_size() - 1));
+}
+
+enum sort_mode {
+ SORT_MODE__NORMAL,
+ SORT_MODE__BRANCH,
+ SORT_MODE__MEMORY,
+ SORT_MODE__TOP,
+ SORT_MODE__DIFF,
+ SORT_MODE__TRACEPOINT,
+};
+
+enum sort_type {
+ /* common sort keys */
+ SORT_PID,
+ SORT_COMM,
+ SORT_DSO,
+ SORT_SYM,
+ SORT_PARENT,
+ SORT_CPU,
+ SORT_SOCKET,
+ SORT_SRCLINE,
+ SORT_SRCFILE,
+ SORT_LOCAL_WEIGHT,
+ SORT_GLOBAL_WEIGHT,
+ SORT_TRANSACTION,
+ SORT_TRACE,
+ SORT_SYM_SIZE,
+ SORT_DSO_SIZE,
+ SORT_CGROUP_ID,
+
+ /* branch stack specific sort keys */
+ __SORT_BRANCH_STACK,
+ SORT_DSO_FROM = __SORT_BRANCH_STACK,
+ SORT_DSO_TO,
+ SORT_SYM_FROM,
+ SORT_SYM_TO,
+ SORT_MISPREDICT,
+ SORT_ABORT,
+ SORT_IN_TX,
+ SORT_CYCLES,
+ SORT_SRCLINE_FROM,
+ SORT_SRCLINE_TO,
+
+ /* memory mode specific sort keys */
+ __SORT_MEMORY_MODE,
+ SORT_MEM_DADDR_SYMBOL = __SORT_MEMORY_MODE,
+ SORT_MEM_DADDR_DSO,
+ SORT_MEM_LOCKED,
+ SORT_MEM_TLB,
+ SORT_MEM_LVL,
+ SORT_MEM_SNOOP,
+ SORT_MEM_DCACHELINE,
+ SORT_MEM_IADDR_SYMBOL,
+ SORT_MEM_PHYS_DADDR,
+};
+
+/*
+ * configurable sorting bits
+ */
+
+struct sort_entry {
+ const char *se_header;
+
+ int64_t (*se_cmp)(struct hist_entry *, struct hist_entry *);
+ int64_t (*se_collapse)(struct hist_entry *, struct hist_entry *);
+ int64_t (*se_sort)(struct hist_entry *, struct hist_entry *);
+ int (*se_snprintf)(struct hist_entry *he, char *bf, size_t size,
+ unsigned int width);
+ int (*se_filter)(struct hist_entry *he, int type, const void *arg);
+ u8 se_width_idx;
+};
+
+extern struct sort_entry sort_thread;
+extern struct list_head hist_entry__sort_list;
+
+struct perf_evlist;
+struct tep_handle;
+int setup_sorting(struct perf_evlist *evlist);
+int setup_output_field(void);
+void reset_output_field(void);
+void sort__setup_elide(FILE *fp);
+void perf_hpp__set_elide(int idx, bool elide);
+
+int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset);
+
+bool is_strict_order(const char *order);
+
+int hpp_dimension__add_output(unsigned col);
+void reset_dimensions(void);
+int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
+ struct perf_evlist *evlist,
+ int level);
+int output_field_add(struct perf_hpp_list *list, char *tok);
+int64_t
+sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
+char *hist_entry__srcline(struct hist_entry *he);
+#endif /* __PERF_SORT_H */
diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
new file mode 100644
index 000000000..b8e77617f
--- /dev/null
+++ b/tools/perf/util/srcline.c
@@ -0,0 +1,720 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <linux/kernel.h>
+
+#include "util/dso.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/callchain.h"
+#include "srcline.h"
+#include "string2.h"
+#include "symbol.h"
+
+bool srcline_full_filename;
+
+static const char *dso__name(struct dso *dso)
+{
+ const char *dso_name;
+
+ if (dso->symsrc_filename)
+ dso_name = dso->symsrc_filename;
+ else
+ dso_name = dso->long_name;
+
+ if (dso_name[0] == '[')
+ return NULL;
+
+ if (!strncmp(dso_name, "/tmp/perf-", 10))
+ return NULL;
+
+ return dso_name;
+}
+
+static int inline_list__append(struct symbol *symbol, char *srcline,
+ struct inline_node *node)
+{
+ struct inline_list *ilist;
+
+ ilist = zalloc(sizeof(*ilist));
+ if (ilist == NULL)
+ return -1;
+
+ ilist->symbol = symbol;
+ ilist->srcline = srcline;
+
+ if (callchain_param.order == ORDER_CALLEE)
+ list_add_tail(&ilist->list, &node->val);
+ else
+ list_add(&ilist->list, &node->val);
+
+ return 0;
+}
+
+/* basename version that takes a const input string */
+static const char *gnu_basename(const char *path)
+{
+ const char *base = strrchr(path, '/');
+
+ return base ? base + 1 : path;
+}
+
+static char *srcline_from_fileline(const char *file, unsigned int line)
+{
+ char *srcline;
+
+ if (!file)
+ return NULL;
+
+ if (!srcline_full_filename)
+ file = gnu_basename(file);
+
+ if (asprintf(&srcline, "%s:%u", file, line) < 0)
+ return NULL;
+
+ return srcline;
+}
+
+static struct symbol *new_inline_sym(struct dso *dso,
+ struct symbol *base_sym,
+ const char *funcname)
+{
+ struct symbol *inline_sym;
+ char *demangled = NULL;
+
+ if (!funcname)
+ funcname = "??";
+
+ if (dso) {
+ demangled = dso__demangle_sym(dso, 0, funcname);
+ if (demangled)
+ funcname = demangled;
+ }
+
+ if (base_sym && strcmp(funcname, base_sym->name) == 0) {
+ /* reuse the real, existing symbol */
+ inline_sym = base_sym;
+ /* ensure that we don't alias an inlined symbol, which could
+ * lead to double frees in inline_node__delete
+ */
+ assert(!base_sym->inlined);
+ } else {
+ /* create a fake symbol for the inline frame */
+ inline_sym = symbol__new(base_sym ? base_sym->start : 0,
+ base_sym ? (base_sym->end - base_sym->start) : 0,
+ base_sym ? base_sym->binding : 0,
+ base_sym ? base_sym->type : 0,
+ funcname);
+ if (inline_sym)
+ inline_sym->inlined = 1;
+ }
+
+ free(demangled);
+
+ return inline_sym;
+}
+
+#ifdef HAVE_LIBBFD_SUPPORT
+
+/*
+ * Implement addr2line using libbfd.
+ */
+#define PACKAGE "perf"
+#include <bfd.h>
+
+struct a2l_data {
+ const char *input;
+ u64 addr;
+
+ bool found;
+ const char *filename;
+ const char *funcname;
+ unsigned line;
+
+ bfd *abfd;
+ asymbol **syms;
+};
+
+static int bfd_error(const char *string)
+{
+ const char *errmsg;
+
+ errmsg = bfd_errmsg(bfd_get_error());
+ fflush(stdout);
+
+ if (string)
+ pr_debug("%s: %s\n", string, errmsg);
+ else
+ pr_debug("%s\n", errmsg);
+
+ return -1;
+}
+
+static int slurp_symtab(bfd *abfd, struct a2l_data *a2l)
+{
+ long storage;
+ long symcount;
+ asymbol **syms;
+ bfd_boolean dynamic = FALSE;
+
+ if ((bfd_get_file_flags(abfd) & HAS_SYMS) == 0)
+ return bfd_error(bfd_get_filename(abfd));
+
+ storage = bfd_get_symtab_upper_bound(abfd);
+ if (storage == 0L) {
+ storage = bfd_get_dynamic_symtab_upper_bound(abfd);
+ dynamic = TRUE;
+ }
+ if (storage < 0L)
+ return bfd_error(bfd_get_filename(abfd));
+
+ syms = malloc(storage);
+ if (dynamic)
+ symcount = bfd_canonicalize_dynamic_symtab(abfd, syms);
+ else
+ symcount = bfd_canonicalize_symtab(abfd, syms);
+
+ if (symcount < 0) {
+ free(syms);
+ return bfd_error(bfd_get_filename(abfd));
+ }
+
+ a2l->syms = syms;
+ return 0;
+}
+
+static void find_address_in_section(bfd *abfd, asection *section, void *data)
+{
+ bfd_vma pc, vma;
+ bfd_size_type size;
+ struct a2l_data *a2l = data;
+ flagword flags;
+
+ if (a2l->found)
+ return;
+
+#ifdef bfd_get_section_flags
+ flags = bfd_get_section_flags(abfd, section);
+#else
+ flags = bfd_section_flags(section);
+#endif
+ if ((flags & SEC_ALLOC) == 0)
+ return;
+
+ pc = a2l->addr;
+#ifdef bfd_get_section_vma
+ vma = bfd_get_section_vma(abfd, section);
+#else
+ vma = bfd_section_vma(section);
+#endif
+#ifdef bfd_get_section_size
+ size = bfd_get_section_size(section);
+#else
+ size = bfd_section_size(section);
+#endif
+
+ if (pc < vma || pc >= vma + size)
+ return;
+
+ a2l->found = bfd_find_nearest_line(abfd, section, a2l->syms, pc - vma,
+ &a2l->filename, &a2l->funcname,
+ &a2l->line);
+
+ if (a2l->filename && !strlen(a2l->filename))
+ a2l->filename = NULL;
+}
+
+static struct a2l_data *addr2line_init(const char *path)
+{
+ bfd *abfd;
+ struct a2l_data *a2l = NULL;
+
+ abfd = bfd_openr(path, NULL);
+ if (abfd == NULL)
+ return NULL;
+
+ if (!bfd_check_format(abfd, bfd_object))
+ goto out;
+
+ a2l = zalloc(sizeof(*a2l));
+ if (a2l == NULL)
+ goto out;
+
+ a2l->abfd = abfd;
+ a2l->input = strdup(path);
+ if (a2l->input == NULL)
+ goto out;
+
+ if (slurp_symtab(abfd, a2l))
+ goto out;
+
+ return a2l;
+
+out:
+ if (a2l) {
+ zfree((char **)&a2l->input);
+ free(a2l);
+ }
+ bfd_close(abfd);
+ return NULL;
+}
+
+static void addr2line_cleanup(struct a2l_data *a2l)
+{
+ if (a2l->abfd)
+ bfd_close(a2l->abfd);
+ zfree((char **)&a2l->input);
+ zfree(&a2l->syms);
+ free(a2l);
+}
+
+#define MAX_INLINE_NEST 1024
+
+static int inline_list__append_dso_a2l(struct dso *dso,
+ struct inline_node *node,
+ struct symbol *sym)
+{
+ struct a2l_data *a2l = dso->a2l;
+ struct symbol *inline_sym = new_inline_sym(dso, sym, a2l->funcname);
+ char *srcline = NULL;
+
+ if (a2l->filename)
+ srcline = srcline_from_fileline(a2l->filename, a2l->line);
+
+ return inline_list__append(inline_sym, srcline, node);
+}
+
+static int addr2line(const char *dso_name, u64 addr,
+ char **file, unsigned int *line, struct dso *dso,
+ bool unwind_inlines, struct inline_node *node,
+ struct symbol *sym)
+{
+ int ret = 0;
+ struct a2l_data *a2l = dso->a2l;
+
+ if (!a2l) {
+ dso->a2l = addr2line_init(dso_name);
+ a2l = dso->a2l;
+ }
+
+ if (a2l == NULL) {
+ pr_warning("addr2line_init failed for %s\n", dso_name);
+ return 0;
+ }
+
+ a2l->addr = addr;
+ a2l->found = false;
+
+ bfd_map_over_sections(a2l->abfd, find_address_in_section, a2l);
+
+ if (!a2l->found)
+ return 0;
+
+ if (unwind_inlines) {
+ int cnt = 0;
+
+ if (node && inline_list__append_dso_a2l(dso, node, sym))
+ return 0;
+
+ while (bfd_find_inliner_info(a2l->abfd, &a2l->filename,
+ &a2l->funcname, &a2l->line) &&
+ cnt++ < MAX_INLINE_NEST) {
+
+ if (a2l->filename && !strlen(a2l->filename))
+ a2l->filename = NULL;
+
+ if (node != NULL) {
+ if (inline_list__append_dso_a2l(dso, node, sym))
+ return 0;
+ // found at least one inline frame
+ ret = 1;
+ }
+ }
+ }
+
+ if (file) {
+ *file = a2l->filename ? strdup(a2l->filename) : NULL;
+ ret = *file ? 1 : 0;
+ }
+
+ if (line)
+ *line = a2l->line;
+
+ return ret;
+}
+
+void dso__free_a2l(struct dso *dso)
+{
+ struct a2l_data *a2l = dso->a2l;
+
+ if (!a2l)
+ return;
+
+ addr2line_cleanup(a2l);
+
+ dso->a2l = NULL;
+}
+
+static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
+ struct dso *dso, struct symbol *sym)
+{
+ struct inline_node *node;
+
+ node = zalloc(sizeof(*node));
+ if (node == NULL) {
+ perror("not enough memory for the inline node");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&node->val);
+ node->addr = addr;
+
+ addr2line(dso_name, addr, NULL, NULL, dso, true, node, sym);
+ return node;
+}
+
+#else /* HAVE_LIBBFD_SUPPORT */
+
+static int filename_split(char *filename, unsigned int *line_nr)
+{
+ char *sep;
+
+ sep = strchr(filename, '\n');
+ if (sep)
+ *sep = '\0';
+
+ if (!strcmp(filename, "??:0"))
+ return 0;
+
+ sep = strchr(filename, ':');
+ if (sep) {
+ *sep++ = '\0';
+ *line_nr = strtoul(sep, NULL, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int addr2line(const char *dso_name, u64 addr,
+ char **file, unsigned int *line_nr,
+ struct dso *dso __maybe_unused,
+ bool unwind_inlines __maybe_unused,
+ struct inline_node *node __maybe_unused,
+ struct symbol *sym __maybe_unused)
+{
+ FILE *fp;
+ char cmd[PATH_MAX];
+ char *filename = NULL;
+ size_t len;
+ int ret = 0;
+
+ scnprintf(cmd, sizeof(cmd), "addr2line -e %s %016"PRIx64,
+ dso_name, addr);
+
+ fp = popen(cmd, "r");
+ if (fp == NULL) {
+ pr_warning("popen failed for %s\n", dso_name);
+ return 0;
+ }
+
+ if (getline(&filename, &len, fp) < 0 || !len) {
+ pr_warning("addr2line has no output for %s\n", dso_name);
+ goto out;
+ }
+
+ ret = filename_split(filename, line_nr);
+ if (ret != 1) {
+ free(filename);
+ goto out;
+ }
+
+ *file = filename;
+
+out:
+ pclose(fp);
+ return ret;
+}
+
+void dso__free_a2l(struct dso *dso __maybe_unused)
+{
+}
+
+static struct inline_node *addr2inlines(const char *dso_name, u64 addr,
+ struct dso *dso __maybe_unused,
+ struct symbol *sym)
+{
+ FILE *fp;
+ char cmd[PATH_MAX];
+ struct inline_node *node;
+ char *filename = NULL;
+ char *funcname = NULL;
+ size_t filelen, funclen;
+ unsigned int line_nr = 0;
+
+ scnprintf(cmd, sizeof(cmd), "addr2line -e %s -i -f %016"PRIx64,
+ dso_name, addr);
+
+ fp = popen(cmd, "r");
+ if (fp == NULL) {
+ pr_err("popen failed for %s\n", dso_name);
+ return NULL;
+ }
+
+ node = zalloc(sizeof(*node));
+ if (node == NULL) {
+ perror("not enough memory for the inline node");
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&node->val);
+ node->addr = addr;
+
+ /* addr2line -f generates two lines for each inlined functions */
+ while (getline(&funcname, &funclen, fp) != -1) {
+ char *srcline;
+ struct symbol *inline_sym;
+
+ rtrim(funcname);
+
+ if (getline(&filename, &filelen, fp) == -1)
+ goto out;
+
+ if (filename_split(filename, &line_nr) != 1)
+ goto out;
+
+ srcline = srcline_from_fileline(filename, line_nr);
+ inline_sym = new_inline_sym(dso, sym, funcname);
+
+ if (inline_list__append(inline_sym, srcline, node) != 0) {
+ free(srcline);
+ if (inline_sym && inline_sym->inlined)
+ symbol__delete(inline_sym);
+ goto out;
+ }
+ }
+
+out:
+ pclose(fp);
+ free(filename);
+ free(funcname);
+
+ return node;
+}
+
+#endif /* HAVE_LIBBFD_SUPPORT */
+
+/*
+ * Number of addr2line failures (without success) before disabling it for that
+ * dso.
+ */
+#define A2L_FAIL_LIMIT 123
+
+char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+ bool show_sym, bool show_addr, bool unwind_inlines,
+ u64 ip)
+{
+ char *file = NULL;
+ unsigned line = 0;
+ char *srcline;
+ const char *dso_name;
+
+ if (!dso->has_srcline)
+ goto out;
+
+ dso_name = dso__name(dso);
+ if (dso_name == NULL)
+ goto out;
+
+ if (!addr2line(dso_name, addr, &file, &line, dso,
+ unwind_inlines, NULL, sym))
+ goto out;
+
+ srcline = srcline_from_fileline(file, line);
+ free(file);
+
+ if (!srcline)
+ goto out;
+
+ dso->a2l_fails = 0;
+
+ return srcline;
+
+out:
+ if (dso->a2l_fails && ++dso->a2l_fails > A2L_FAIL_LIMIT) {
+ dso->has_srcline = 0;
+ dso__free_a2l(dso);
+ }
+
+ if (!show_addr)
+ return (show_sym && sym) ?
+ strndup(sym->name, sym->namelen) : NULL;
+
+ if (sym) {
+ if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "",
+ ip - sym->start) < 0)
+ return SRCLINE_UNKNOWN;
+ } else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0)
+ return SRCLINE_UNKNOWN;
+ return srcline;
+}
+
+void free_srcline(char *srcline)
+{
+ if (srcline && strcmp(srcline, SRCLINE_UNKNOWN) != 0)
+ free(srcline);
+}
+
+char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+ bool show_sym, bool show_addr, u64 ip)
+{
+ return __get_srcline(dso, addr, sym, show_sym, show_addr, false, ip);
+}
+
+struct srcline_node {
+ u64 addr;
+ char *srcline;
+ struct rb_node rb_node;
+};
+
+void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline)
+{
+ struct rb_node **p = &tree->rb_node;
+ struct rb_node *parent = NULL;
+ struct srcline_node *i, *node;
+
+ node = zalloc(sizeof(struct srcline_node));
+ if (!node) {
+ perror("not enough memory for the srcline node");
+ return;
+ }
+
+ node->addr = addr;
+ node->srcline = srcline;
+
+ while (*p != NULL) {
+ parent = *p;
+ i = rb_entry(parent, struct srcline_node, rb_node);
+ if (addr < i->addr)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&node->rb_node, parent, p);
+ rb_insert_color(&node->rb_node, tree);
+}
+
+char *srcline__tree_find(struct rb_root *tree, u64 addr)
+{
+ struct rb_node *n = tree->rb_node;
+
+ while (n) {
+ struct srcline_node *i = rb_entry(n, struct srcline_node,
+ rb_node);
+
+ if (addr < i->addr)
+ n = n->rb_left;
+ else if (addr > i->addr)
+ n = n->rb_right;
+ else
+ return i->srcline;
+ }
+
+ return NULL;
+}
+
+void srcline__tree_delete(struct rb_root *tree)
+{
+ struct srcline_node *pos;
+ struct rb_node *next = rb_first(tree);
+
+ while (next) {
+ pos = rb_entry(next, struct srcline_node, rb_node);
+ next = rb_next(&pos->rb_node);
+ rb_erase(&pos->rb_node, tree);
+ free_srcline(pos->srcline);
+ zfree(&pos);
+ }
+}
+
+struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr,
+ struct symbol *sym)
+{
+ const char *dso_name;
+
+ dso_name = dso__name(dso);
+ if (dso_name == NULL)
+ return NULL;
+
+ return addr2inlines(dso_name, addr, dso, sym);
+}
+
+void inline_node__delete(struct inline_node *node)
+{
+ struct inline_list *ilist, *tmp;
+
+ list_for_each_entry_safe(ilist, tmp, &node->val, list) {
+ list_del_init(&ilist->list);
+ free_srcline(ilist->srcline);
+ /* only the inlined symbols are owned by the list */
+ if (ilist->symbol && ilist->symbol->inlined)
+ symbol__delete(ilist->symbol);
+ free(ilist);
+ }
+
+ free(node);
+}
+
+void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines)
+{
+ struct rb_node **p = &tree->rb_node;
+ struct rb_node *parent = NULL;
+ const u64 addr = inlines->addr;
+ struct inline_node *i;
+
+ while (*p != NULL) {
+ parent = *p;
+ i = rb_entry(parent, struct inline_node, rb_node);
+ if (addr < i->addr)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&inlines->rb_node, parent, p);
+ rb_insert_color(&inlines->rb_node, tree);
+}
+
+struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr)
+{
+ struct rb_node *n = tree->rb_node;
+
+ while (n) {
+ struct inline_node *i = rb_entry(n, struct inline_node,
+ rb_node);
+
+ if (addr < i->addr)
+ n = n->rb_left;
+ else if (addr > i->addr)
+ n = n->rb_right;
+ else
+ return i;
+ }
+
+ return NULL;
+}
+
+void inlines__tree_delete(struct rb_root *tree)
+{
+ struct inline_node *pos;
+ struct rb_node *next = rb_first(tree);
+
+ while (next) {
+ pos = rb_entry(next, struct inline_node, rb_node);
+ next = rb_next(&pos->rb_node);
+ rb_erase(&pos->rb_node, tree);
+ inline_node__delete(pos);
+ }
+}
diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h
new file mode 100644
index 000000000..b2bb5502f
--- /dev/null
+++ b/tools/perf/util/srcline.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_SRCLINE_H
+#define PERF_SRCLINE_H
+
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct dso;
+struct symbol;
+
+extern bool srcline_full_filename;
+char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+ bool show_sym, bool show_addr, u64 ip);
+char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
+ bool show_sym, bool show_addr, bool unwind_inlines,
+ u64 ip);
+void free_srcline(char *srcline);
+
+/* insert the srcline into the DSO, which will take ownership */
+void srcline__tree_insert(struct rb_root *tree, u64 addr, char *srcline);
+/* find previously inserted srcline */
+char *srcline__tree_find(struct rb_root *tree, u64 addr);
+/* delete all srclines within the tree */
+void srcline__tree_delete(struct rb_root *tree);
+
+#define SRCLINE_UNKNOWN ((char *) "??:0")
+
+struct inline_list {
+ struct symbol *symbol;
+ char *srcline;
+ struct list_head list;
+};
+
+struct inline_node {
+ u64 addr;
+ struct list_head val;
+ struct rb_node rb_node;
+};
+
+/* parse inlined frames for the given address */
+struct inline_node *dso__parse_addr_inlines(struct dso *dso, u64 addr,
+ struct symbol *sym);
+/* free resources associated to the inline node list */
+void inline_node__delete(struct inline_node *node);
+
+/* insert the inline node list into the DSO, which will take ownership */
+void inlines__tree_insert(struct rb_root *tree, struct inline_node *inlines);
+/* find previously inserted inline node list */
+struct inline_node *inlines__tree_find(struct rb_root *tree, u64 addr);
+/* delete all nodes within the tree of inline_node s */
+void inlines__tree_delete(struct rb_root *tree);
+
+#endif /* PERF_SRCLINE_H */
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
new file mode 100644
index 000000000..59475287e
--- /dev/null
+++ b/tools/perf/util/stat-shadow.c
@@ -0,0 +1,999 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include "evsel.h"
+#include "stat.h"
+#include "color.h"
+#include "pmu.h"
+#include "rblist.h"
+#include "evlist.h"
+#include "expr.h"
+#include "metricgroup.h"
+
+/*
+ * AGGR_GLOBAL: Use CPU 0
+ * AGGR_SOCKET: Use first CPU of socket
+ * AGGR_CORE: Use first CPU of core
+ * AGGR_NONE: Use matching CPU
+ * AGGR_THREAD: Not supported?
+ */
+static bool have_frontend_stalled;
+
+struct runtime_stat rt_stat;
+struct stats walltime_nsecs_stats;
+
+struct saved_value {
+ struct rb_node rb_node;
+ struct perf_evsel *evsel;
+ enum stat_type type;
+ int ctx;
+ int cpu;
+ struct runtime_stat *stat;
+ struct stats stats;
+};
+
+static int saved_value_cmp(struct rb_node *rb_node, const void *entry)
+{
+ struct saved_value *a = container_of(rb_node,
+ struct saved_value,
+ rb_node);
+ const struct saved_value *b = entry;
+
+ if (a->cpu != b->cpu)
+ return a->cpu - b->cpu;
+
+ /*
+ * Previously the rbtree was used to link generic metrics.
+ * The keys were evsel/cpu. Now the rbtree is extended to support
+ * per-thread shadow stats. For shadow stats case, the keys
+ * are cpu/type/ctx/stat (evsel is NULL). For generic metrics
+ * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL).
+ */
+ if (a->type != b->type)
+ return a->type - b->type;
+
+ if (a->ctx != b->ctx)
+ return a->ctx - b->ctx;
+
+ if (a->evsel == NULL && b->evsel == NULL) {
+ if (a->stat == b->stat)
+ return 0;
+
+ if ((char *)a->stat < (char *)b->stat)
+ return -1;
+
+ return 1;
+ }
+
+ if (a->evsel == b->evsel)
+ return 0;
+ if ((char *)a->evsel < (char *)b->evsel)
+ return -1;
+ return +1;
+}
+
+static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused,
+ const void *entry)
+{
+ struct saved_value *nd = malloc(sizeof(struct saved_value));
+
+ if (!nd)
+ return NULL;
+ memcpy(nd, entry, sizeof(struct saved_value));
+ return &nd->rb_node;
+}
+
+static void saved_value_delete(struct rblist *rblist __maybe_unused,
+ struct rb_node *rb_node)
+{
+ struct saved_value *v;
+
+ BUG_ON(!rb_node);
+ v = container_of(rb_node, struct saved_value, rb_node);
+ free(v);
+}
+
+static struct saved_value *saved_value_lookup(struct perf_evsel *evsel,
+ int cpu,
+ bool create,
+ enum stat_type type,
+ int ctx,
+ struct runtime_stat *st)
+{
+ struct rblist *rblist;
+ struct rb_node *nd;
+ struct saved_value dm = {
+ .cpu = cpu,
+ .evsel = evsel,
+ .type = type,
+ .ctx = ctx,
+ .stat = st,
+ };
+
+ rblist = &st->value_list;
+
+ nd = rblist__find(rblist, &dm);
+ if (nd)
+ return container_of(nd, struct saved_value, rb_node);
+ if (create) {
+ rblist__add_node(rblist, &dm);
+ nd = rblist__find(rblist, &dm);
+ if (nd)
+ return container_of(nd, struct saved_value, rb_node);
+ }
+ return NULL;
+}
+
+void runtime_stat__init(struct runtime_stat *st)
+{
+ struct rblist *rblist = &st->value_list;
+
+ rblist__init(rblist);
+ rblist->node_cmp = saved_value_cmp;
+ rblist->node_new = saved_value_new;
+ rblist->node_delete = saved_value_delete;
+}
+
+void runtime_stat__exit(struct runtime_stat *st)
+{
+ rblist__exit(&st->value_list);
+}
+
+void perf_stat__init_shadow_stats(void)
+{
+ have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend");
+ runtime_stat__init(&rt_stat);
+}
+
+static int evsel_context(struct perf_evsel *evsel)
+{
+ int ctx = 0;
+
+ if (evsel->attr.exclude_kernel)
+ ctx |= CTX_BIT_KERNEL;
+ if (evsel->attr.exclude_user)
+ ctx |= CTX_BIT_USER;
+ if (evsel->attr.exclude_hv)
+ ctx |= CTX_BIT_HV;
+ if (evsel->attr.exclude_host)
+ ctx |= CTX_BIT_HOST;
+ if (evsel->attr.exclude_idle)
+ ctx |= CTX_BIT_IDLE;
+
+ return ctx;
+}
+
+static void reset_stat(struct runtime_stat *st)
+{
+ struct rblist *rblist;
+ struct rb_node *pos, *next;
+
+ rblist = &st->value_list;
+ next = rb_first(&rblist->entries);
+ while (next) {
+ pos = next;
+ next = rb_next(pos);
+ memset(&container_of(pos, struct saved_value, rb_node)->stats,
+ 0,
+ sizeof(struct stats));
+ }
+}
+
+void perf_stat__reset_shadow_stats(void)
+{
+ reset_stat(&rt_stat);
+ memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+}
+
+void perf_stat__reset_shadow_per_stat(struct runtime_stat *st)
+{
+ reset_stat(st);
+}
+
+static void update_runtime_stat(struct runtime_stat *st,
+ enum stat_type type,
+ int ctx, int cpu, u64 count)
+{
+ struct saved_value *v = saved_value_lookup(NULL, cpu, true,
+ type, ctx, st);
+
+ if (v)
+ update_stats(&v->stats, count);
+}
+
+/*
+ * Update various tracking values we maintain to print
+ * more semantic information such as miss/hit ratios,
+ * instruction rates, etc:
+ */
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
+ int cpu, struct runtime_stat *st)
+{
+ int ctx = evsel_context(counter);
+ u64 count_ns = count;
+
+ count *= counter->scale;
+
+ if (perf_evsel__is_clock(counter))
+ update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
+ else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+ update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
+ update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, TRANSACTION_START))
+ update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, ELISION_START))
+ update_runtime_stat(st, STAT_ELISION, ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS))
+ update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS,
+ ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED))
+ update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED,
+ ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED))
+ update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED,
+ ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES))
+ update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES,
+ ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
+ update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
+ ctx, cpu, count);
+ else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+ update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
+ ctx, cpu, count);
+ else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+ update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
+ ctx, cpu, count);
+ else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+ update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
+ else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
+ update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
+ update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
+ update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
+ update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
+ update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
+ else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
+ update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, SMI_NUM))
+ update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
+ else if (perf_stat_evsel__is(counter, APERF))
+ update_runtime_stat(st, STAT_APERF, ctx, cpu, count);
+
+ if (counter->collect_stat) {
+ struct saved_value *v = saved_value_lookup(counter, cpu, true,
+ STAT_NONE, 0, st);
+ update_stats(&v->stats, count);
+ }
+}
+
+/* used for get_ratio_color() */
+enum grc_type {
+ GRC_STALLED_CYCLES_FE,
+ GRC_STALLED_CYCLES_BE,
+ GRC_CACHE_MISSES,
+ GRC_MAX_NR
+};
+
+static const char *get_ratio_color(enum grc_type type, double ratio)
+{
+ static const double grc_table[GRC_MAX_NR][3] = {
+ [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
+ [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
+ [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 },
+ };
+ const char *color = PERF_COLOR_NORMAL;
+
+ if (ratio > grc_table[type][0])
+ color = PERF_COLOR_RED;
+ else if (ratio > grc_table[type][1])
+ color = PERF_COLOR_MAGENTA;
+ else if (ratio > grc_table[type][2])
+ color = PERF_COLOR_YELLOW;
+
+ return color;
+}
+
+static struct perf_evsel *perf_stat__find_event(struct perf_evlist *evsel_list,
+ const char *name)
+{
+ struct perf_evsel *c2;
+
+ evlist__for_each_entry (evsel_list, c2) {
+ if (!strcasecmp(c2->name, name) && !c2->collect_stat)
+ return c2;
+ }
+ return NULL;
+}
+
+/* Mark MetricExpr target events and link events using them to them. */
+void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list)
+{
+ struct perf_evsel *counter, *leader, **metric_events, *oc;
+ bool found;
+ const char **metric_names;
+ int i;
+ int num_metric_names;
+
+ evlist__for_each_entry(evsel_list, counter) {
+ bool invalid = false;
+
+ leader = counter->leader;
+ if (!counter->metric_expr)
+ continue;
+ metric_events = counter->metric_events;
+ if (!metric_events) {
+ if (expr__find_other(counter->metric_expr, counter->name,
+ &metric_names, &num_metric_names) < 0)
+ continue;
+
+ metric_events = calloc(sizeof(struct perf_evsel *),
+ num_metric_names + 1);
+ if (!metric_events)
+ return;
+ counter->metric_events = metric_events;
+ }
+
+ for (i = 0; i < num_metric_names; i++) {
+ found = false;
+ if (leader) {
+ /* Search in group */
+ for_each_group_member (oc, leader) {
+ if (!strcasecmp(oc->name, metric_names[i]) &&
+ !oc->collect_stat) {
+ found = true;
+ break;
+ }
+ }
+ }
+ if (!found) {
+ /* Search ignoring groups */
+ oc = perf_stat__find_event(evsel_list, metric_names[i]);
+ }
+ if (!oc) {
+ /* Deduping one is good enough to handle duplicated PMUs. */
+ static char *printed;
+
+ /*
+ * Adding events automatically would be difficult, because
+ * it would risk creating groups that are not schedulable.
+ * perf stat doesn't understand all the scheduling constraints
+ * of events. So we ask the user instead to add the missing
+ * events.
+ */
+ if (!printed || strcasecmp(printed, metric_names[i])) {
+ fprintf(stderr,
+ "Add %s event to groups to get metric expression for %s\n",
+ metric_names[i],
+ counter->name);
+ printed = strdup(metric_names[i]);
+ }
+ invalid = true;
+ continue;
+ }
+ metric_events[i] = oc;
+ oc->collect_stat = true;
+ }
+ metric_events[i] = NULL;
+ free(metric_names);
+ if (invalid) {
+ free(metric_events);
+ counter->metric_events = NULL;
+ counter->metric_expr = NULL;
+ }
+ }
+}
+
+static double runtime_stat_avg(struct runtime_stat *st,
+ enum stat_type type, int ctx, int cpu)
+{
+ struct saved_value *v;
+
+ v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
+ if (!v)
+ return 0.0;
+
+ return avg_stats(&v->stats);
+}
+
+static double runtime_stat_n(struct runtime_stat *st,
+ enum stat_type type, int ctx, int cpu)
+{
+ struct saved_value *v;
+
+ v = saved_value_lookup(NULL, cpu, false, type, ctx, st);
+ if (!v)
+ return 0.0;
+
+ return v->stats.n;
+}
+
+static void print_stalled_cycles_frontend(int cpu,
+ struct perf_evsel *evsel, double avg,
+ struct perf_stat_output_ctx *out,
+ struct runtime_stat *st)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
+
+ if (ratio)
+ out->print_metric(out->ctx, color, "%7.2f%%", "frontend cycles idle",
+ ratio);
+ else
+ out->print_metric(out->ctx, NULL, NULL, "frontend cycles idle", 0);
+}
+
+static void print_stalled_cycles_backend(int cpu,
+ struct perf_evsel *evsel, double avg,
+ struct perf_stat_output_ctx *out,
+ struct runtime_stat *st)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
+
+ out->print_metric(out->ctx, color, "%7.2f%%", "backend cycles idle", ratio);
+}
+
+static void print_branch_misses(int cpu,
+ struct perf_evsel *evsel,
+ double avg,
+ struct perf_stat_output_ctx *out,
+ struct runtime_stat *st)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+ out->print_metric(out->ctx, color, "%7.2f%%", "of all branches", ratio);
+}
+
+static void print_l1_dcache_misses(int cpu,
+ struct perf_evsel *evsel,
+ double avg,
+ struct perf_stat_output_ctx *out,
+ struct runtime_stat *st)
+
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+ out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
+}
+
+static void print_l1_icache_misses(int cpu,
+ struct perf_evsel *evsel,
+ double avg,
+ struct perf_stat_output_ctx *out,
+ struct runtime_stat *st)
+
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+ out->print_metric(out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
+}
+
+static void print_dtlb_cache_misses(int cpu,
+ struct perf_evsel *evsel,
+ double avg,
+ struct perf_stat_output_ctx *out,
+ struct runtime_stat *st)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+ out->print_metric(out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
+}
+
+static void print_itlb_cache_misses(int cpu,
+ struct perf_evsel *evsel,
+ double avg,
+ struct perf_stat_output_ctx *out,
+ struct runtime_stat *st)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+ out->print_metric(out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
+}
+
+static void print_ll_cache_misses(int cpu,
+ struct perf_evsel *evsel,
+ double avg,
+ struct perf_stat_output_ctx *out,
+ struct runtime_stat *st)
+{
+ double total, ratio = 0.0;
+ const char *color;
+ int ctx = evsel_context(evsel);
+
+ total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu);
+
+ if (total)
+ ratio = avg / total * 100.0;
+
+ color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+ out->print_metric(out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
+}
+
+/*
+ * High level "TopDown" CPU core pipe line bottleneck break down.
+ *
+ * Basic concept following
+ * Yasin, A Top Down Method for Performance analysis and Counter architecture
+ * ISPASS14
+ *
+ * The CPU pipeline is divided into 4 areas that can be bottlenecks:
+ *
+ * Frontend -> Backend -> Retiring
+ * BadSpeculation in addition means out of order execution that is thrown away
+ * (for example branch mispredictions)
+ * Frontend is instruction decoding.
+ * Backend is execution, like computation and accessing data in memory
+ * Retiring is good execution that is not directly bottlenecked
+ *
+ * The formulas are computed in slots.
+ * A slot is an entry in the pipeline each for the pipeline width
+ * (for example a 4-wide pipeline has 4 slots for each cycle)
+ *
+ * Formulas:
+ * BadSpeculation = ((SlotsIssued - SlotsRetired) + RecoveryBubbles) /
+ * TotalSlots
+ * Retiring = SlotsRetired / TotalSlots
+ * FrontendBound = FetchBubbles / TotalSlots
+ * BackendBound = 1.0 - BadSpeculation - Retiring - FrontendBound
+ *
+ * The kernel provides the mapping to the low level CPU events and any scaling
+ * needed for the CPU pipeline width, for example:
+ *
+ * TotalSlots = Cycles * 4
+ *
+ * The scaling factor is communicated in the sysfs unit.
+ *
+ * In some cases the CPU may not be able to measure all the formulas due to
+ * missing events. In this case multiple formulas are combined, as possible.
+ *
+ * Full TopDown supports more levels to sub-divide each area: for example
+ * BackendBound into computing bound and memory bound. For now we only
+ * support Level 1 TopDown.
+ */
+
+static double sanitize_val(double x)
+{
+ if (x < 0 && x >= -0.02)
+ return 0.0;
+ return x;
+}
+
+static double td_total_slots(int ctx, int cpu, struct runtime_stat *st)
+{
+ return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu);
+}
+
+static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st)
+{
+ double bad_spec = 0;
+ double total_slots;
+ double total;
+
+ total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) -
+ runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) +
+ runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu);
+
+ total_slots = td_total_slots(ctx, cpu, st);
+ if (total_slots)
+ bad_spec = total / total_slots;
+ return sanitize_val(bad_spec);
+}
+
+static double td_retiring(int ctx, int cpu, struct runtime_stat *st)
+{
+ double retiring = 0;
+ double total_slots = td_total_slots(ctx, cpu, st);
+ double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED,
+ ctx, cpu);
+
+ if (total_slots)
+ retiring = ret_slots / total_slots;
+ return retiring;
+}
+
+static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st)
+{
+ double fe_bound = 0;
+ double total_slots = td_total_slots(ctx, cpu, st);
+ double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES,
+ ctx, cpu);
+
+ if (total_slots)
+ fe_bound = fetch_bub / total_slots;
+ return fe_bound;
+}
+
+static double td_be_bound(int ctx, int cpu, struct runtime_stat *st)
+{
+ double sum = (td_fe_bound(ctx, cpu, st) +
+ td_bad_spec(ctx, cpu, st) +
+ td_retiring(ctx, cpu, st));
+ if (sum == 0)
+ return 0;
+ return sanitize_val(1.0 - sum);
+}
+
+static void print_smi_cost(int cpu, struct perf_evsel *evsel,
+ struct perf_stat_output_ctx *out,
+ struct runtime_stat *st)
+{
+ double smi_num, aperf, cycles, cost = 0.0;
+ int ctx = evsel_context(evsel);
+ const char *color = NULL;
+
+ smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu);
+ aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu);
+ cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+
+ if ((cycles == 0) || (aperf == 0))
+ return;
+
+ if (smi_num)
+ cost = (aperf - cycles) / aperf * 100.00;
+
+ if (cost > 10)
+ color = PERF_COLOR_RED;
+ out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
+ out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
+}
+
+static void generic_metric(const char *metric_expr,
+ struct perf_evsel **metric_events,
+ char *name,
+ const char *metric_name,
+ double avg,
+ int cpu,
+ struct perf_stat_output_ctx *out,
+ struct runtime_stat *st)
+{
+ print_metric_t print_metric = out->print_metric;
+ struct parse_ctx pctx;
+ double ratio;
+ int i;
+ void *ctxp = out->ctx;
+
+ expr__ctx_init(&pctx);
+ expr__add_id(&pctx, name, avg);
+ for (i = 0; metric_events[i]; i++) {
+ struct saved_value *v;
+ struct stats *stats;
+ double scale;
+
+ if (!strcmp(metric_events[i]->name, "duration_time")) {
+ stats = &walltime_nsecs_stats;
+ scale = 1e-9;
+ } else {
+ v = saved_value_lookup(metric_events[i], cpu, false,
+ STAT_NONE, 0, st);
+ if (!v)
+ break;
+ stats = &v->stats;
+ scale = 1.0;
+ }
+ expr__add_id(&pctx, metric_events[i]->name, avg_stats(stats)*scale);
+ }
+ if (!metric_events[i]) {
+ const char *p = metric_expr;
+
+ if (expr__parse(&ratio, &pctx, &p) == 0)
+ print_metric(ctxp, NULL, "%8.1f",
+ metric_name ?
+ metric_name :
+ out->force_header ? name : "",
+ ratio);
+ else
+ print_metric(ctxp, NULL, NULL,
+ out->force_header ?
+ (metric_name ? metric_name : name) : "", 0);
+ } else
+ print_metric(ctxp, NULL, NULL, "", 0);
+}
+
+void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+ double avg, int cpu,
+ struct perf_stat_output_ctx *out,
+ struct rblist *metric_events,
+ struct runtime_stat *st)
+{
+ void *ctxp = out->ctx;
+ print_metric_t print_metric = out->print_metric;
+ double total, ratio = 0.0, total2;
+ const char *color = NULL;
+ int ctx = evsel_context(evsel);
+ struct metric_event *me;
+ int num = 1;
+
+ if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
+ total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+
+ if (total) {
+ ratio = avg / total;
+ print_metric(ctxp, NULL, "%7.2f ",
+ "insn per cycle", ratio);
+ } else {
+ print_metric(ctxp, NULL, NULL, "insn per cycle", 0);
+ }
+
+ total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT,
+ ctx, cpu);
+
+ total = max(total, runtime_stat_avg(st,
+ STAT_STALLED_CYCLES_BACK,
+ ctx, cpu));
+
+ if (total && avg) {
+ out->new_line(ctxp);
+ ratio = total / avg;
+ print_metric(ctxp, NULL, "%7.2f ",
+ "stalled cycles per insn",
+ ratio);
+ } else if (have_frontend_stalled) {
+ print_metric(ctxp, NULL, NULL,
+ "stalled cycles per insn", 0);
+ }
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
+ if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
+ print_branch_misses(cpu, evsel, avg, out, st);
+ else
+ print_metric(ctxp, NULL, NULL, "of all branches", 0);
+ } else if (
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+ if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
+ print_l1_dcache_misses(cpu, evsel, avg, out, st);
+ else
+ print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0);
+ } else if (
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+ if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
+ print_l1_icache_misses(cpu, evsel, avg, out, st);
+ else
+ print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0);
+ } else if (
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+ if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
+ print_dtlb_cache_misses(cpu, evsel, avg, out, st);
+ else
+ print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0);
+ } else if (
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+ if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
+ print_itlb_cache_misses(cpu, evsel, avg, out, st);
+ else
+ print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0);
+ } else if (
+ evsel->attr.type == PERF_TYPE_HW_CACHE &&
+ evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL |
+ ((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+ ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) {
+
+ if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
+ print_ll_cache_misses(cpu, evsel, avg, out, st);
+ else
+ print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0);
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
+ total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
+
+ if (total)
+ ratio = avg * 100 / total;
+
+ if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0)
+ print_metric(ctxp, NULL, "%8.3f %%",
+ "of all cache refs", ratio);
+ else
+ print_metric(ctxp, NULL, NULL, "of all cache refs", 0);
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
+ print_stalled_cycles_frontend(cpu, evsel, avg, out, st);
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
+ print_stalled_cycles_backend(cpu, evsel, avg, out, st);
+ } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
+ total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
+
+ if (total) {
+ ratio = avg / total;
+ print_metric(ctxp, NULL, "%8.3f", "GHz", ratio);
+ } else {
+ print_metric(ctxp, NULL, NULL, "Ghz", 0);
+ }
+ } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
+ total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+
+ if (total)
+ print_metric(ctxp, NULL,
+ "%7.2f%%", "transactional cycles",
+ 100.0 * (avg / total));
+ else
+ print_metric(ctxp, NULL, NULL, "transactional cycles",
+ 0);
+ } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
+ total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
+ total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu);
+
+ if (total2 < avg)
+ total2 = avg;
+ if (total)
+ print_metric(ctxp, NULL, "%7.2f%%", "aborted cycles",
+ 100.0 * ((total2-avg) / total));
+ else
+ print_metric(ctxp, NULL, NULL, "aborted cycles", 0);
+ } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) {
+ total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
+ ctx, cpu);
+
+ if (avg)
+ ratio = total / avg;
+
+ if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0)
+ print_metric(ctxp, NULL, "%8.0f",
+ "cycles / transaction", ratio);
+ else
+ print_metric(ctxp, NULL, NULL, "cycles / transaction",
+ 0);
+ } else if (perf_stat_evsel__is(evsel, ELISION_START)) {
+ total = runtime_stat_avg(st, STAT_CYCLES_IN_TX,
+ ctx, cpu);
+
+ if (avg)
+ ratio = total / avg;
+
+ print_metric(ctxp, NULL, "%8.0f", "cycles / elision", ratio);
+ } else if (perf_evsel__is_clock(evsel)) {
+ if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
+ print_metric(ctxp, NULL, "%8.3f", "CPUs utilized",
+ avg / (ratio * evsel->scale));
+ else
+ print_metric(ctxp, NULL, NULL, "CPUs utilized", 0);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) {
+ double fe_bound = td_fe_bound(ctx, cpu, st);
+
+ if (fe_bound > 0.2)
+ color = PERF_COLOR_RED;
+ print_metric(ctxp, color, "%8.1f%%", "frontend bound",
+ fe_bound * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) {
+ double retiring = td_retiring(ctx, cpu, st);
+
+ if (retiring > 0.7)
+ color = PERF_COLOR_GREEN;
+ print_metric(ctxp, color, "%8.1f%%", "retiring",
+ retiring * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) {
+ double bad_spec = td_bad_spec(ctx, cpu, st);
+
+ if (bad_spec > 0.1)
+ color = PERF_COLOR_RED;
+ print_metric(ctxp, color, "%8.1f%%", "bad speculation",
+ bad_spec * 100.);
+ } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) {
+ double be_bound = td_be_bound(ctx, cpu, st);
+ const char *name = "backend bound";
+ static int have_recovery_bubbles = -1;
+
+ /* In case the CPU does not support topdown-recovery-bubbles */
+ if (have_recovery_bubbles < 0)
+ have_recovery_bubbles = pmu_have_event("cpu",
+ "topdown-recovery-bubbles");
+ if (!have_recovery_bubbles)
+ name = "backend bound/bad spec";
+
+ if (be_bound > 0.2)
+ color = PERF_COLOR_RED;
+ if (td_total_slots(ctx, cpu, st) > 0)
+ print_metric(ctxp, color, "%8.1f%%", name,
+ be_bound * 100.);
+ else
+ print_metric(ctxp, NULL, NULL, name, 0);
+ } else if (evsel->metric_expr) {
+ generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name,
+ evsel->metric_name, avg, cpu, out, st);
+ } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
+ char unit = 'M';
+ char unit_buf[10];
+
+ total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
+
+ if (total)
+ ratio = 1000.0 * avg / total;
+ if (ratio < 0.001) {
+ ratio *= 1000;
+ unit = 'K';
+ }
+ snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
+ print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
+ } else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
+ print_smi_cost(cpu, evsel, out, st);
+ } else {
+ num = 0;
+ }
+
+ if ((me = metricgroup__lookup(metric_events, evsel, false)) != NULL) {
+ struct metric_expr *mexp;
+
+ list_for_each_entry (mexp, &me->head, nd) {
+ if (num++ > 0)
+ out->new_line(ctxp);
+ generic_metric(mexp->metric_expr, mexp->metric_events,
+ evsel->name, mexp->metric_name,
+ avg, cpu, out, st);
+ }
+ }
+ if (num == 0)
+ print_metric(ctxp, NULL, NULL, NULL, 0);
+}
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
new file mode 100644
index 000000000..b254bee70
--- /dev/null
+++ b/tools/perf/util/stat.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <inttypes.h>
+#include <math.h>
+#include "stat.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "thread_map.h"
+
+void update_stats(struct stats *stats, u64 val)
+{
+ double delta;
+
+ stats->n++;
+ delta = val - stats->mean;
+ stats->mean += delta / stats->n;
+ stats->M2 += delta*(val - stats->mean);
+
+ if (val > stats->max)
+ stats->max = val;
+
+ if (val < stats->min)
+ stats->min = val;
+}
+
+double avg_stats(struct stats *stats)
+{
+ return stats->mean;
+}
+
+/*
+ * http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
+ *
+ * (\Sum n_i^2) - ((\Sum n_i)^2)/n
+ * s^2 = -------------------------------
+ * n - 1
+ *
+ * http://en.wikipedia.org/wiki/Stddev
+ *
+ * The std dev of the mean is related to the std dev by:
+ *
+ * s
+ * s_mean = -------
+ * sqrt(n)
+ *
+ */
+double stddev_stats(struct stats *stats)
+{
+ double variance, variance_mean;
+
+ if (stats->n < 2)
+ return 0.0;
+
+ variance = stats->M2 / (stats->n - 1);
+ variance_mean = variance / stats->n;
+
+ return sqrt(variance_mean);
+}
+
+double rel_stddev_stats(double stddev, double avg)
+{
+ double pct = 0.0;
+
+ if (avg)
+ pct = 100.0 * stddev/avg;
+
+ return pct;
+}
+
+bool __perf_evsel_stat__is(struct perf_evsel *evsel,
+ enum perf_stat_evsel_id id)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+
+ return ps->id == id;
+}
+
+#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
+static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
+ ID(NONE, x),
+ ID(CYCLES_IN_TX, cpu/cycles-t/),
+ ID(TRANSACTION_START, cpu/tx-start/),
+ ID(ELISION_START, cpu/el-start/),
+ ID(CYCLES_IN_TX_CP, cpu/cycles-ct/),
+ ID(TOPDOWN_TOTAL_SLOTS, topdown-total-slots),
+ ID(TOPDOWN_SLOTS_ISSUED, topdown-slots-issued),
+ ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
+ ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
+ ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
+ ID(SMI_NUM, msr/smi/),
+ ID(APERF, msr/aperf/),
+};
+#undef ID
+
+static void perf_stat_evsel_id_init(struct perf_evsel *evsel)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+ int i;
+
+ /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
+
+ for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
+ if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
+ ps->id = i;
+ break;
+ }
+ }
+}
+
+static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
+{
+ int i;
+ struct perf_stat_evsel *ps = evsel->stats;
+
+ for (i = 0; i < 3; i++)
+ init_stats(&ps->res_stats[i]);
+
+ perf_stat_evsel_id_init(evsel);
+}
+
+static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
+{
+ evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
+ if (evsel->stats == NULL)
+ return -ENOMEM;
+ perf_evsel__reset_stat_priv(evsel);
+ return 0;
+}
+
+static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
+{
+ struct perf_stat_evsel *ps = evsel->stats;
+
+ if (ps)
+ free(ps->group_data);
+ zfree(&evsel->stats);
+}
+
+static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel,
+ int ncpus, int nthreads)
+{
+ struct perf_counts *counts;
+
+ counts = perf_counts__new(ncpus, nthreads);
+ if (counts)
+ evsel->prev_raw_counts = counts;
+
+ return counts ? 0 : -ENOMEM;
+}
+
+static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
+{
+ perf_counts__delete(evsel->prev_raw_counts);
+ evsel->prev_raw_counts = NULL;
+}
+
+static void perf_evsel__reset_prev_raw_counts(struct perf_evsel *evsel)
+{
+ if (evsel->prev_raw_counts) {
+ evsel->prev_raw_counts->aggr.val = 0;
+ evsel->prev_raw_counts->aggr.ena = 0;
+ evsel->prev_raw_counts->aggr.run = 0;
+ }
+}
+
+static int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw)
+{
+ int ncpus = perf_evsel__nr_cpus(evsel);
+ int nthreads = thread_map__nr(evsel->threads);
+
+ if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
+ perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
+ (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
+ return -ENOMEM;
+
+ return 0;
+}
+
+int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (perf_evsel__alloc_stats(evsel, alloc_raw))
+ goto out_free;
+ }
+
+ return 0;
+
+out_free:
+ perf_evlist__free_stats(evlist);
+ return -1;
+}
+
+void perf_evlist__free_stats(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ perf_evsel__free_stat_priv(evsel);
+ perf_evsel__free_counts(evsel);
+ perf_evsel__free_prev_raw_counts(evsel);
+ }
+}
+
+void perf_evlist__reset_stats(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ perf_evsel__reset_stat_priv(evsel);
+ perf_evsel__reset_counts(evsel);
+ }
+}
+
+void perf_evlist__reset_prev_raw_counts(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ perf_evsel__reset_prev_raw_counts(evsel);
+}
+
+static void zero_per_pkg(struct perf_evsel *counter)
+{
+ if (counter->per_pkg_mask)
+ memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
+}
+
+static int check_per_pkg(struct perf_evsel *counter,
+ struct perf_counts_values *vals, int cpu, bool *skip)
+{
+ unsigned long *mask = counter->per_pkg_mask;
+ struct cpu_map *cpus = perf_evsel__cpus(counter);
+ int s;
+
+ *skip = false;
+
+ if (!counter->per_pkg)
+ return 0;
+
+ if (cpu_map__empty(cpus))
+ return 0;
+
+ if (!mask) {
+ mask = zalloc(MAX_NR_CPUS);
+ if (!mask)
+ return -ENOMEM;
+
+ counter->per_pkg_mask = mask;
+ }
+
+ /*
+ * we do not consider an event that has not run as a good
+ * instance to mark a package as used (skip=1). Otherwise
+ * we may run into a situation where the first CPU in a package
+ * is not running anything, yet the second is, and this function
+ * would mark the package as used after the first CPU and would
+ * not read the values from the second CPU.
+ */
+ if (!(vals->run && vals->ena))
+ return 0;
+
+ s = cpu_map__get_socket(cpus, cpu, NULL);
+ if (s < 0)
+ return -1;
+
+ *skip = test_and_set_bit(s, mask) == 1;
+ return 0;
+}
+
+static int
+process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel,
+ int cpu, int thread,
+ struct perf_counts_values *count)
+{
+ struct perf_counts_values *aggr = &evsel->counts->aggr;
+ static struct perf_counts_values zero;
+ bool skip = false;
+
+ if (check_per_pkg(evsel, count, cpu, &skip)) {
+ pr_err("failed to read per-pkg counter\n");
+ return -1;
+ }
+
+ if (skip)
+ count = &zero;
+
+ switch (config->aggr_mode) {
+ case AGGR_THREAD:
+ case AGGR_CORE:
+ case AGGR_SOCKET:
+ case AGGR_NONE:
+ if (!evsel->snapshot)
+ perf_evsel__compute_deltas(evsel, cpu, thread, count);
+ perf_counts_values__scale(count, config->scale, NULL);
+ if (config->aggr_mode == AGGR_NONE)
+ perf_stat__update_shadow_stats(evsel, count->val, cpu,
+ &rt_stat);
+ if (config->aggr_mode == AGGR_THREAD) {
+ if (config->stats)
+ perf_stat__update_shadow_stats(evsel,
+ count->val, 0, &config->stats[thread]);
+ else
+ perf_stat__update_shadow_stats(evsel,
+ count->val, 0, &rt_stat);
+ }
+ break;
+ case AGGR_GLOBAL:
+ aggr->val += count->val;
+ if (config->scale) {
+ aggr->ena += count->ena;
+ aggr->run += count->run;
+ }
+ case AGGR_UNSET:
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int process_counter_maps(struct perf_stat_config *config,
+ struct perf_evsel *counter)
+{
+ int nthreads = thread_map__nr(counter->threads);
+ int ncpus = perf_evsel__nr_cpus(counter);
+ int cpu, thread;
+
+ if (counter->system_wide)
+ nthreads = 1;
+
+ for (thread = 0; thread < nthreads; thread++) {
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ if (process_counter_values(config, counter, cpu, thread,
+ perf_counts(counter->counts, cpu, thread)))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+ struct perf_evsel *counter)
+{
+ struct perf_counts_values *aggr = &counter->counts->aggr;
+ struct perf_stat_evsel *ps = counter->stats;
+ u64 *count = counter->counts->aggr.values;
+ int i, ret;
+
+ aggr->val = aggr->ena = aggr->run = 0;
+
+ /*
+ * We calculate counter's data every interval,
+ * and the display code shows ps->res_stats
+ * avg value. We need to zero the stats for
+ * interval mode, otherwise overall avg running
+ * averages will be shown for each interval.
+ */
+ if (config->interval) {
+ for (i = 0; i < 3; i++)
+ init_stats(&ps->res_stats[i]);
+ }
+
+ if (counter->per_pkg)
+ zero_per_pkg(counter);
+
+ ret = process_counter_maps(config, counter);
+ if (ret)
+ return ret;
+
+ if (config->aggr_mode != AGGR_GLOBAL)
+ return 0;
+
+ if (!counter->snapshot)
+ perf_evsel__compute_deltas(counter, -1, -1, aggr);
+ perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
+
+ for (i = 0; i < 3; i++)
+ update_stats(&ps->res_stats[i], count[i]);
+
+ if (verbose > 0) {
+ fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
+ perf_evsel__name(counter), count[0], count[1], count[2]);
+ }
+
+ /*
+ * Save the full runtime - to allow normalization during printout:
+ */
+ perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat);
+
+ return 0;
+}
+
+int perf_event__process_stat_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ struct perf_counts_values count;
+ struct stat_event *st = &event->stat;
+ struct perf_evsel *counter;
+
+ count.val = st->val;
+ count.ena = st->ena;
+ count.run = st->run;
+
+ counter = perf_evlist__id2evsel(session->evlist, st->id);
+ if (!counter) {
+ pr_err("Failed to resolve counter for stat event.\n");
+ return -EINVAL;
+ }
+
+ *perf_counts(counter->counts, st->cpu, st->thread) = count;
+ counter->supported = true;
+ return 0;
+}
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp)
+{
+ struct stat_event *st = (struct stat_event *) event;
+ size_t ret;
+
+ ret = fprintf(fp, "\n... id %" PRIu64 ", cpu %d, thread %d\n",
+ st->id, st->cpu, st->thread);
+ ret += fprintf(fp, "... value %" PRIu64 ", enabled %" PRIu64 ", running %" PRIu64 "\n",
+ st->val, st->ena, st->run);
+
+ return ret;
+}
+
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp)
+{
+ struct stat_round_event *rd = (struct stat_round_event *)event;
+ size_t ret;
+
+ ret = fprintf(fp, "\n... time %" PRIu64 ", type %s\n", rd->time,
+ rd->type == PERF_STAT_ROUND_TYPE__FINAL ? "FINAL" : "INTERVAL");
+
+ return ret;
+}
+
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp)
+{
+ struct perf_stat_config sc;
+ size_t ret;
+
+ perf_event__read_stat_config(&sc, &event->stat_config);
+
+ ret = fprintf(fp, "\n");
+ ret += fprintf(fp, "... aggr_mode %d\n", sc.aggr_mode);
+ ret += fprintf(fp, "... scale %d\n", sc.scale);
+ ret += fprintf(fp, "... interval %u\n", sc.interval);
+
+ return ret;
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
new file mode 100644
index 000000000..e19abb163
--- /dev/null
+++ b/tools/perf/util/stat.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STATS_H
+#define __PERF_STATS_H
+
+#include <linux/types.h>
+#include <stdio.h>
+#include "xyarray.h"
+#include "rblist.h"
+
+struct stats {
+ double n, mean, M2;
+ u64 max, min;
+};
+
+enum perf_stat_evsel_id {
+ PERF_STAT_EVSEL_ID__NONE = 0,
+ PERF_STAT_EVSEL_ID__CYCLES_IN_TX,
+ PERF_STAT_EVSEL_ID__TRANSACTION_START,
+ PERF_STAT_EVSEL_ID__ELISION_START,
+ PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
+ PERF_STAT_EVSEL_ID__TOPDOWN_TOTAL_SLOTS,
+ PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_ISSUED,
+ PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
+ PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
+ PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
+ PERF_STAT_EVSEL_ID__SMI_NUM,
+ PERF_STAT_EVSEL_ID__APERF,
+ PERF_STAT_EVSEL_ID__MAX,
+};
+
+struct perf_stat_evsel {
+ struct stats res_stats[3];
+ enum perf_stat_evsel_id id;
+ u64 *group_data;
+};
+
+enum aggr_mode {
+ AGGR_NONE,
+ AGGR_GLOBAL,
+ AGGR_SOCKET,
+ AGGR_CORE,
+ AGGR_THREAD,
+ AGGR_UNSET,
+};
+
+enum {
+ CTX_BIT_USER = 1 << 0,
+ CTX_BIT_KERNEL = 1 << 1,
+ CTX_BIT_HV = 1 << 2,
+ CTX_BIT_HOST = 1 << 3,
+ CTX_BIT_IDLE = 1 << 4,
+ CTX_BIT_MAX = 1 << 5,
+};
+
+#define NUM_CTX CTX_BIT_MAX
+
+enum stat_type {
+ STAT_NONE = 0,
+ STAT_NSECS,
+ STAT_CYCLES,
+ STAT_STALLED_CYCLES_FRONT,
+ STAT_STALLED_CYCLES_BACK,
+ STAT_BRANCHES,
+ STAT_CACHEREFS,
+ STAT_L1_DCACHE,
+ STAT_L1_ICACHE,
+ STAT_LL_CACHE,
+ STAT_ITLB_CACHE,
+ STAT_DTLB_CACHE,
+ STAT_CYCLES_IN_TX,
+ STAT_TRANSACTION,
+ STAT_ELISION,
+ STAT_TOPDOWN_TOTAL_SLOTS,
+ STAT_TOPDOWN_SLOTS_ISSUED,
+ STAT_TOPDOWN_SLOTS_RETIRED,
+ STAT_TOPDOWN_FETCH_BUBBLES,
+ STAT_TOPDOWN_RECOVERY_BUBBLES,
+ STAT_SMI_NUM,
+ STAT_APERF,
+ STAT_MAX
+};
+
+struct runtime_stat {
+ struct rblist value_list;
+};
+
+struct perf_stat_config {
+ enum aggr_mode aggr_mode;
+ bool scale;
+ FILE *output;
+ unsigned int interval;
+ unsigned int timeout;
+ int times;
+ struct runtime_stat *stats;
+ int stats_num;
+};
+
+void update_stats(struct stats *stats, u64 val);
+double avg_stats(struct stats *stats);
+double stddev_stats(struct stats *stats);
+double rel_stddev_stats(double stddev, double avg);
+
+static inline void init_stats(struct stats *stats)
+{
+ stats->n = 0.0;
+ stats->mean = 0.0;
+ stats->M2 = 0.0;
+ stats->min = (u64) -1;
+ stats->max = 0;
+}
+
+struct perf_evsel;
+struct perf_evlist;
+
+struct perf_aggr_thread_value {
+ struct perf_evsel *counter;
+ int id;
+ double uval;
+ u64 val;
+ u64 run;
+ u64 ena;
+};
+
+bool __perf_evsel_stat__is(struct perf_evsel *evsel,
+ enum perf_stat_evsel_id id);
+
+#define perf_stat_evsel__is(evsel, id) \
+ __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
+
+extern struct runtime_stat rt_stat;
+extern struct stats walltime_nsecs_stats;
+
+typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit,
+ const char *fmt, double val);
+typedef void (*new_line_t )(void *ctx);
+
+void runtime_stat__init(struct runtime_stat *st);
+void runtime_stat__exit(struct runtime_stat *st);
+void perf_stat__init_shadow_stats(void);
+void perf_stat__reset_shadow_stats(void);
+void perf_stat__reset_shadow_per_stat(struct runtime_stat *st);
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count,
+ int cpu, struct runtime_stat *st);
+struct perf_stat_output_ctx {
+ void *ctx;
+ print_metric_t print_metric;
+ new_line_t new_line;
+ bool force_header;
+};
+
+void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
+ double avg, int cpu,
+ struct perf_stat_output_ctx *out,
+ struct rblist *metric_events,
+ struct runtime_stat *st);
+void perf_stat__collect_metric_expr(struct perf_evlist *);
+
+int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw);
+void perf_evlist__free_stats(struct perf_evlist *evlist);
+void perf_evlist__reset_stats(struct perf_evlist *evlist);
+void perf_evlist__reset_prev_raw_counts(struct perf_evlist *evlist);
+
+int perf_stat_process_counter(struct perf_stat_config *config,
+ struct perf_evsel *counter);
+struct perf_tool;
+union perf_event;
+struct perf_session;
+int perf_event__process_stat_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session);
+
+size_t perf_event__fprintf_stat(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_round(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_stat_config(union perf_event *event, FILE *fp);
+#endif
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
new file mode 100644
index 000000000..23092fd64
--- /dev/null
+++ b/tools/perf/util/strbuf.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "debug.h"
+#include "util.h"
+#include <linux/kernel.h>
+#include <errno.h>
+
+/*
+ * Used as the default ->buf value, so that people can always assume
+ * buf is non NULL and ->buf is NUL terminated even for a freshly
+ * initialized strbuf.
+ */
+char strbuf_slopbuf[1];
+
+int strbuf_init(struct strbuf *sb, ssize_t hint)
+{
+ sb->alloc = sb->len = 0;
+ sb->buf = strbuf_slopbuf;
+ if (hint)
+ return strbuf_grow(sb, hint);
+ return 0;
+}
+
+void strbuf_release(struct strbuf *sb)
+{
+ if (sb->alloc) {
+ zfree(&sb->buf);
+ strbuf_init(sb, 0);
+ }
+}
+
+char *strbuf_detach(struct strbuf *sb, size_t *sz)
+{
+ char *res = sb->alloc ? sb->buf : NULL;
+ if (sz)
+ *sz = sb->len;
+ strbuf_init(sb, 0);
+ return res;
+}
+
+int strbuf_grow(struct strbuf *sb, size_t extra)
+{
+ char *buf;
+ size_t nr = sb->len + extra + 1;
+
+ if (nr < sb->alloc)
+ return 0;
+
+ if (nr <= sb->len)
+ return -E2BIG;
+
+ if (alloc_nr(sb->alloc) > nr)
+ nr = alloc_nr(sb->alloc);
+
+ /*
+ * Note that sb->buf == strbuf_slopbuf if sb->alloc == 0, and it is
+ * a static variable. Thus we have to avoid passing it to realloc.
+ */
+ buf = realloc(sb->alloc ? sb->buf : NULL, nr * sizeof(*buf));
+ if (!buf)
+ return -ENOMEM;
+
+ sb->buf = buf;
+ sb->alloc = nr;
+ return 0;
+}
+
+int strbuf_addch(struct strbuf *sb, int c)
+{
+ int ret = strbuf_grow(sb, 1);
+ if (ret)
+ return ret;
+
+ sb->buf[sb->len++] = c;
+ sb->buf[sb->len] = '\0';
+ return 0;
+}
+
+int strbuf_add(struct strbuf *sb, const void *data, size_t len)
+{
+ int ret = strbuf_grow(sb, len);
+ if (ret)
+ return ret;
+
+ memcpy(sb->buf + sb->len, data, len);
+ return strbuf_setlen(sb, sb->len + len);
+}
+
+static int strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
+{
+ int len, ret;
+ va_list ap_saved;
+
+ if (!strbuf_avail(sb)) {
+ ret = strbuf_grow(sb, 64);
+ if (ret)
+ return ret;
+ }
+
+ va_copy(ap_saved, ap);
+ len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap);
+ if (len < 0) {
+ va_end(ap_saved);
+ return len;
+ }
+ if (len > strbuf_avail(sb)) {
+ ret = strbuf_grow(sb, len);
+ if (ret) {
+ va_end(ap_saved);
+ return ret;
+ }
+ len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap_saved);
+ if (len > strbuf_avail(sb)) {
+ pr_debug("this should not happen, your vsnprintf is broken");
+ va_end(ap_saved);
+ return -EINVAL;
+ }
+ }
+ va_end(ap_saved);
+ return strbuf_setlen(sb, sb->len + len);
+}
+
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...)
+{
+ va_list ap;
+ int ret;
+
+ va_start(ap, fmt);
+ ret = strbuf_addv(sb, fmt, ap);
+ va_end(ap);
+ return ret;
+}
+
+ssize_t strbuf_read(struct strbuf *sb, int fd, ssize_t hint)
+{
+ size_t oldlen = sb->len;
+ size_t oldalloc = sb->alloc;
+ int ret;
+
+ ret = strbuf_grow(sb, hint ? hint : 8192);
+ if (ret)
+ return ret;
+
+ for (;;) {
+ ssize_t cnt;
+
+ cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1);
+ if (cnt < 0) {
+ if (oldalloc == 0)
+ strbuf_release(sb);
+ else
+ strbuf_setlen(sb, oldlen);
+ return cnt;
+ }
+ if (!cnt)
+ break;
+ sb->len += cnt;
+ ret = strbuf_grow(sb, 8192);
+ if (ret)
+ return ret;
+ }
+
+ sb->buf[sb->len] = '\0';
+ return sb->len - oldlen;
+}
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
new file mode 100644
index 000000000..ea94d8628
--- /dev/null
+++ b/tools/perf/util/strbuf.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STRBUF_H
+#define __PERF_STRBUF_H
+
+/*
+ * Strbuf's can be use in many ways: as a byte array, or to store arbitrary
+ * long, overflow safe strings.
+ *
+ * Strbufs has some invariants that are very important to keep in mind:
+ *
+ * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to
+ * build complex strings/buffers whose final size isn't easily known.
+ *
+ * It is NOT legal to copy the ->buf pointer away.
+ * `strbuf_detach' is the operation that detachs a buffer from its shell
+ * while keeping the shell valid wrt its invariants.
+ *
+ * 2. the ->buf member is a byte array that has at least ->len + 1 bytes
+ * allocated. The extra byte is used to store a '\0', allowing the ->buf
+ * member to be a valid C-string. Every strbuf function ensure this
+ * invariant is preserved.
+ *
+ * Note that it is OK to "play" with the buffer directly if you work it
+ * that way:
+ *
+ * strbuf_grow(sb, SOME_SIZE);
+ * ... Here, the memory array starting at sb->buf, and of length
+ * ... strbuf_avail(sb) is all yours, and you are sure that
+ * ... strbuf_avail(sb) is at least SOME_SIZE.
+ * strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE);
+ *
+ * Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb).
+ *
+ * Doing so is safe, though if it has to be done in many places, adding the
+ * missing API to the strbuf module is the way to go.
+ *
+ * XXX: do _not_ assume that the area that is yours is of size ->alloc - 1
+ * even if it's true in the current implementation. Alloc is somehow a
+ * "private" member that should not be messed with.
+ */
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <string.h>
+#include <linux/compiler.h>
+#include <sys/types.h>
+
+extern char strbuf_slopbuf[];
+struct strbuf {
+ size_t alloc;
+ size_t len;
+ char *buf;
+};
+
+#define STRBUF_INIT { 0, 0, strbuf_slopbuf }
+
+/*----- strbuf life cycle -----*/
+int strbuf_init(struct strbuf *buf, ssize_t hint);
+void strbuf_release(struct strbuf *buf);
+char *strbuf_detach(struct strbuf *buf, size_t *);
+
+/*----- strbuf size related -----*/
+static inline ssize_t strbuf_avail(const struct strbuf *sb) {
+ return sb->alloc ? sb->alloc - sb->len - 1 : 0;
+}
+
+int strbuf_grow(struct strbuf *buf, size_t);
+
+static inline int strbuf_setlen(struct strbuf *sb, size_t len) {
+ if (!sb->alloc) {
+ int ret = strbuf_grow(sb, 0);
+ if (ret)
+ return ret;
+ }
+ assert(len < sb->alloc);
+ sb->len = len;
+ sb->buf[len] = '\0';
+ return 0;
+}
+
+/*----- add data in your buffer -----*/
+int strbuf_addch(struct strbuf *sb, int c);
+
+int strbuf_add(struct strbuf *buf, const void *, size_t);
+static inline int strbuf_addstr(struct strbuf *sb, const char *s) {
+ return strbuf_add(sb, s, strlen(s));
+}
+
+int strbuf_addf(struct strbuf *sb, const char *fmt, ...) __printf(2, 3);
+
+/* XXX: if read fails, any partial read is undone */
+ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);
+
+#endif /* __PERF_STRBUF_H */
diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c
new file mode 100644
index 000000000..7f3253d44
--- /dev/null
+++ b/tools/perf/util/strfilter.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "util.h"
+#include "string2.h"
+#include "strfilter.h"
+
+#include <errno.h>
+#include "sane_ctype.h"
+
+/* Operators */
+static const char *OP_and = "&"; /* Logical AND */
+static const char *OP_or = "|"; /* Logical OR */
+static const char *OP_not = "!"; /* Logical NOT */
+
+#define is_operator(c) ((c) == '|' || (c) == '&' || (c) == '!')
+#define is_separator(c) (is_operator(c) || (c) == '(' || (c) == ')')
+
+static void strfilter_node__delete(struct strfilter_node *node)
+{
+ if (node) {
+ if (node->p && !is_operator(*node->p))
+ zfree((char **)&node->p);
+ strfilter_node__delete(node->l);
+ strfilter_node__delete(node->r);
+ free(node);
+ }
+}
+
+void strfilter__delete(struct strfilter *filter)
+{
+ if (filter) {
+ strfilter_node__delete(filter->root);
+ free(filter);
+ }
+}
+
+static const char *get_token(const char *s, const char **e)
+{
+ const char *p;
+
+ while (isspace(*s)) /* Skip spaces */
+ s++;
+
+ if (*s == '\0') {
+ p = s;
+ goto end;
+ }
+
+ p = s + 1;
+ if (!is_separator(*s)) {
+ /* End search */
+retry:
+ while (*p && !is_separator(*p) && !isspace(*p))
+ p++;
+ /* Escape and special case: '!' is also used in glob pattern */
+ if (*(p - 1) == '\\' || (*p == '!' && *(p - 1) == '[')) {
+ p++;
+ goto retry;
+ }
+ }
+end:
+ *e = p;
+ return s;
+}
+
+static struct strfilter_node *strfilter_node__alloc(const char *op,
+ struct strfilter_node *l,
+ struct strfilter_node *r)
+{
+ struct strfilter_node *node = zalloc(sizeof(*node));
+
+ if (node) {
+ node->p = op;
+ node->l = l;
+ node->r = r;
+ }
+
+ return node;
+}
+
+static struct strfilter_node *strfilter_node__new(const char *s,
+ const char **ep)
+{
+ struct strfilter_node root, *cur, *last_op;
+ const char *e;
+
+ if (!s)
+ return NULL;
+
+ memset(&root, 0, sizeof(root));
+ last_op = cur = &root;
+
+ s = get_token(s, &e);
+ while (*s != '\0' && *s != ')') {
+ switch (*s) {
+ case '&': /* Exchg last OP->r with AND */
+ if (!cur->r || !last_op->r)
+ goto error;
+ cur = strfilter_node__alloc(OP_and, last_op->r, NULL);
+ if (!cur)
+ goto nomem;
+ last_op->r = cur;
+ last_op = cur;
+ break;
+ case '|': /* Exchg the root with OR */
+ if (!cur->r || !root.r)
+ goto error;
+ cur = strfilter_node__alloc(OP_or, root.r, NULL);
+ if (!cur)
+ goto nomem;
+ root.r = cur;
+ last_op = cur;
+ break;
+ case '!': /* Add NOT as a leaf node */
+ if (cur->r)
+ goto error;
+ cur->r = strfilter_node__alloc(OP_not, NULL, NULL);
+ if (!cur->r)
+ goto nomem;
+ cur = cur->r;
+ break;
+ case '(': /* Recursively parses inside the parenthesis */
+ if (cur->r)
+ goto error;
+ cur->r = strfilter_node__new(s + 1, &s);
+ if (!s)
+ goto nomem;
+ if (!cur->r || *s != ')')
+ goto error;
+ e = s + 1;
+ break;
+ default:
+ if (cur->r)
+ goto error;
+ cur->r = strfilter_node__alloc(NULL, NULL, NULL);
+ if (!cur->r)
+ goto nomem;
+ cur->r->p = strndup(s, e - s);
+ if (!cur->r->p)
+ goto nomem;
+ }
+ s = get_token(e, &e);
+ }
+ if (!cur->r)
+ goto error;
+ *ep = s;
+ return root.r;
+nomem:
+ s = NULL;
+error:
+ *ep = s;
+ strfilter_node__delete(root.r);
+ return NULL;
+}
+
+/*
+ * Parse filter rule and return new strfilter.
+ * Return NULL if fail, and *ep == NULL if memory allocation failed.
+ */
+struct strfilter *strfilter__new(const char *rules, const char **err)
+{
+ struct strfilter *filter = zalloc(sizeof(*filter));
+ const char *ep = NULL;
+
+ if (filter)
+ filter->root = strfilter_node__new(rules, &ep);
+
+ if (!filter || !filter->root || *ep != '\0') {
+ if (err)
+ *err = ep;
+ strfilter__delete(filter);
+ filter = NULL;
+ }
+
+ return filter;
+}
+
+static int strfilter__append(struct strfilter *filter, bool _or,
+ const char *rules, const char **err)
+{
+ struct strfilter_node *right, *root;
+ const char *ep = NULL;
+
+ if (!filter || !rules)
+ return -EINVAL;
+
+ right = strfilter_node__new(rules, &ep);
+ if (!right || *ep != '\0') {
+ if (err)
+ *err = ep;
+ goto error;
+ }
+ root = strfilter_node__alloc(_or ? OP_or : OP_and, filter->root, right);
+ if (!root) {
+ ep = NULL;
+ goto error;
+ }
+
+ filter->root = root;
+ return 0;
+
+error:
+ strfilter_node__delete(right);
+ return ep ? -EINVAL : -ENOMEM;
+}
+
+int strfilter__or(struct strfilter *filter, const char *rules, const char **err)
+{
+ return strfilter__append(filter, true, rules, err);
+}
+
+int strfilter__and(struct strfilter *filter, const char *rules,
+ const char **err)
+{
+ return strfilter__append(filter, false, rules, err);
+}
+
+static bool strfilter_node__compare(struct strfilter_node *node,
+ const char *str)
+{
+ if (!node || !node->p)
+ return false;
+
+ switch (*node->p) {
+ case '|': /* OR */
+ return strfilter_node__compare(node->l, str) ||
+ strfilter_node__compare(node->r, str);
+ case '&': /* AND */
+ return strfilter_node__compare(node->l, str) &&
+ strfilter_node__compare(node->r, str);
+ case '!': /* NOT */
+ return !strfilter_node__compare(node->r, str);
+ default:
+ return strglobmatch(str, node->p);
+ }
+}
+
+/* Return true if STR matches the filter rules */
+bool strfilter__compare(struct strfilter *filter, const char *str)
+{
+ if (!filter)
+ return false;
+ return strfilter_node__compare(filter->root, str);
+}
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf);
+
+/* sprint node in parenthesis if needed */
+static int strfilter_node__sprint_pt(struct strfilter_node *node, char *buf)
+{
+ int len;
+ int pt = node->r ? 2 : 0; /* don't need to check node->l */
+
+ if (buf && pt)
+ *buf++ = '(';
+ len = strfilter_node__sprint(node, buf);
+ if (len < 0)
+ return len;
+ if (buf && pt)
+ *(buf + len) = ')';
+ return len + pt;
+}
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf)
+{
+ int len = 0, rlen;
+
+ if (!node || !node->p)
+ return -EINVAL;
+
+ switch (*node->p) {
+ case '|':
+ case '&':
+ len = strfilter_node__sprint_pt(node->l, buf);
+ if (len < 0)
+ return len;
+ __fallthrough;
+ case '!':
+ if (buf) {
+ *(buf + len++) = *node->p;
+ buf += len;
+ } else
+ len++;
+ rlen = strfilter_node__sprint_pt(node->r, buf);
+ if (rlen < 0)
+ return rlen;
+ len += rlen;
+ break;
+ default:
+ len = strlen(node->p);
+ if (buf)
+ strcpy(buf, node->p);
+ }
+
+ return len;
+}
+
+char *strfilter__string(struct strfilter *filter)
+{
+ int len;
+ char *ret = NULL;
+
+ len = strfilter_node__sprint(filter->root, NULL);
+ if (len < 0)
+ return NULL;
+
+ ret = malloc(len + 1);
+ if (ret)
+ strfilter_node__sprint(filter->root, ret);
+
+ return ret;
+}
diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h
new file mode 100644
index 000000000..e0c25a40f
--- /dev/null
+++ b/tools/perf/util/strfilter.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STRFILTER_H
+#define __PERF_STRFILTER_H
+/* General purpose glob matching filter */
+
+#include <linux/list.h>
+#include <stdbool.h>
+
+/* A node of string filter */
+struct strfilter_node {
+ struct strfilter_node *l; /* Tree left branche (for &,|) */
+ struct strfilter_node *r; /* Tree right branche (for !,&,|) */
+ const char *p; /* Operator or rule */
+};
+
+/* String filter */
+struct strfilter {
+ struct strfilter_node *root;
+};
+
+/**
+ * strfilter__new - Create a new string filter
+ * @rules: Filter rule, which is a combination of glob expressions.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and return new strfilter. Return NULL if an error detected.
+ * In that case, *@err will indicate where it is detected, and *@err is NULL
+ * if a memory allocation is failed.
+ */
+struct strfilter *strfilter__new(const char *rules, const char **err);
+
+/**
+ * strfilter__or - Append an additional rule by logical-or
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ * @filter by using logical-or.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-or.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__or(struct strfilter *filter,
+ const char *rules, const char **err);
+
+/**
+ * strfilter__add - Append an additional rule by logical-and
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ * @filter by using logical-and.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-and.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__and(struct strfilter *filter,
+ const char *rules, const char **err);
+
+/**
+ * strfilter__compare - compare given string and a string filter
+ * @filter: String filter
+ * @str: target string
+ *
+ * Compare @str and @filter. Return true if the str match the rule
+ */
+bool strfilter__compare(struct strfilter *filter, const char *str);
+
+/**
+ * strfilter__delete - delete a string filter
+ * @filter: String filter to delete
+ *
+ * Delete @filter.
+ */
+void strfilter__delete(struct strfilter *filter);
+
+/**
+ * strfilter__string - Reconstruct a rule string from filter
+ * @filter: String filter to reconstruct
+ *
+ * Reconstruct a rule string from @filter. This will be good for
+ * debug messages. Note that returning string must be freed afterward.
+ */
+char *strfilter__string(struct strfilter *filter);
+
+#endif
diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c
new file mode 100644
index 000000000..d8bfd0c4d
--- /dev/null
+++ b/tools/perf/util/string.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "string2.h"
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <stdlib.h>
+
+#include "sane_ctype.h"
+
+#define K 1024LL
+/*
+ * perf_atoll()
+ * Parse (\d+)(b|B|kb|KB|mb|MB|gb|GB|tb|TB) (e.g. "256MB")
+ * and return its numeric value
+ */
+s64 perf_atoll(const char *str)
+{
+ s64 length;
+ char *p;
+ char c;
+
+ if (!isdigit(str[0]))
+ goto out_err;
+
+ length = strtoll(str, &p, 10);
+ switch (c = *p++) {
+ case 'b': case 'B':
+ if (*p)
+ goto out_err;
+
+ __fallthrough;
+ case '\0':
+ return length;
+ default:
+ goto out_err;
+ /* two-letter suffices */
+ case 'k': case 'K':
+ length <<= 10;
+ break;
+ case 'm': case 'M':
+ length <<= 20;
+ break;
+ case 'g': case 'G':
+ length <<= 30;
+ break;
+ case 't': case 'T':
+ length <<= 40;
+ break;
+ }
+ /* we want the cases to match */
+ if (islower(c)) {
+ if (strcmp(p, "b") != 0)
+ goto out_err;
+ } else {
+ if (strcmp(p, "B") != 0)
+ goto out_err;
+ }
+ return length;
+
+out_err:
+ return -1;
+}
+
+/*
+ * Helper function for splitting a string into an argv-like array.
+ * originally copied from lib/argv_split.c
+ */
+static const char *skip_sep(const char *cp)
+{
+ while (*cp && isspace(*cp))
+ cp++;
+
+ return cp;
+}
+
+static const char *skip_arg(const char *cp)
+{
+ while (*cp && !isspace(*cp))
+ cp++;
+
+ return cp;
+}
+
+static int count_argc(const char *str)
+{
+ int count = 0;
+
+ while (*str) {
+ str = skip_sep(str);
+ if (*str) {
+ count++;
+ str = skip_arg(str);
+ }
+ }
+
+ return count;
+}
+
+/**
+ * argv_free - free an argv
+ * @argv - the argument vector to be freed
+ *
+ * Frees an argv and the strings it points to.
+ */
+void argv_free(char **argv)
+{
+ char **p;
+ for (p = argv; *p; p++) {
+ free(*p);
+ *p = NULL;
+ }
+
+ free(argv);
+}
+
+/**
+ * argv_split - split a string at whitespace, returning an argv
+ * @str: the string to be split
+ * @argcp: returned argument count
+ *
+ * Returns an array of pointers to strings which are split out from
+ * @str. This is performed by strictly splitting on white-space; no
+ * quote processing is performed. Multiple whitespace characters are
+ * considered to be a single argument separator. The returned array
+ * is always NULL-terminated. Returns NULL on memory allocation
+ * failure.
+ */
+char **argv_split(const char *str, int *argcp)
+{
+ int argc = count_argc(str);
+ char **argv = calloc(argc + 1, sizeof(*argv));
+ char **argvp;
+
+ if (argv == NULL)
+ goto out;
+
+ if (argcp)
+ *argcp = argc;
+
+ argvp = argv;
+
+ while (*str) {
+ str = skip_sep(str);
+
+ if (*str) {
+ const char *p = str;
+ char *t;
+
+ str = skip_arg(str);
+
+ t = strndup(p, str-p);
+ if (t == NULL)
+ goto fail;
+ *argvp++ = t;
+ }
+ }
+ *argvp = NULL;
+
+out:
+ return argv;
+
+fail:
+ argv_free(argv);
+ return NULL;
+}
+
+/* Character class matching */
+static bool __match_charclass(const char *pat, char c, const char **npat)
+{
+ bool complement = false, ret = true;
+
+ if (*pat == '!') {
+ complement = true;
+ pat++;
+ }
+ if (*pat++ == c) /* First character is special */
+ goto end;
+
+ while (*pat && *pat != ']') { /* Matching */
+ if (*pat == '-' && *(pat + 1) != ']') { /* Range */
+ if (*(pat - 1) <= c && c <= *(pat + 1))
+ goto end;
+ if (*(pat - 1) > *(pat + 1))
+ goto error;
+ pat += 2;
+ } else if (*pat++ == c)
+ goto end;
+ }
+ if (!*pat)
+ goto error;
+ ret = false;
+
+end:
+ while (*pat && *pat != ']') /* Searching closing */
+ pat++;
+ if (!*pat)
+ goto error;
+ *npat = pat + 1;
+ return complement ? !ret : ret;
+
+error:
+ return false;
+}
+
+/* Glob/lazy pattern matching */
+static bool __match_glob(const char *str, const char *pat, bool ignore_space,
+ bool case_ins)
+{
+ while (*str && *pat && *pat != '*') {
+ if (ignore_space) {
+ /* Ignore spaces for lazy matching */
+ if (isspace(*str)) {
+ str++;
+ continue;
+ }
+ if (isspace(*pat)) {
+ pat++;
+ continue;
+ }
+ }
+ if (*pat == '?') { /* Matches any single character */
+ str++;
+ pat++;
+ continue;
+ } else if (*pat == '[') /* Character classes/Ranges */
+ if (__match_charclass(pat + 1, *str, &pat)) {
+ str++;
+ continue;
+ } else
+ return false;
+ else if (*pat == '\\') /* Escaped char match as normal char */
+ pat++;
+ if (case_ins) {
+ if (tolower(*str) != tolower(*pat))
+ return false;
+ } else if (*str != *pat)
+ return false;
+ str++;
+ pat++;
+ }
+ /* Check wild card */
+ if (*pat == '*') {
+ while (*pat == '*')
+ pat++;
+ if (!*pat) /* Tail wild card matches all */
+ return true;
+ while (*str)
+ if (__match_glob(str++, pat, ignore_space, case_ins))
+ return true;
+ }
+ return !*str && !*pat;
+}
+
+/**
+ * strglobmatch - glob expression pattern matching
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This returns true if the @str matches @pat. @pat can includes wildcards
+ * ('*','?') and character classes ([CHARS], complementation and ranges are
+ * also supported). Also, this supports escape character ('\') to use special
+ * characters as normal character.
+ *
+ * Note: if @pat syntax is broken, this always returns false.
+ */
+bool strglobmatch(const char *str, const char *pat)
+{
+ return __match_glob(str, pat, false, false);
+}
+
+bool strglobmatch_nocase(const char *str, const char *pat)
+{
+ return __match_glob(str, pat, false, true);
+}
+
+/**
+ * strlazymatch - matching pattern strings lazily with glob pattern
+ * @str: the target string to match
+ * @pat: the pattern string to match
+ *
+ * This is similar to strglobmatch, except this ignores spaces in
+ * the target string.
+ */
+bool strlazymatch(const char *str, const char *pat)
+{
+ return __match_glob(str, pat, true, false);
+}
+
+/**
+ * strtailcmp - Compare the tail of two strings
+ * @s1: 1st string to be compared
+ * @s2: 2nd string to be compared
+ *
+ * Return 0 if whole of either string is same as another's tail part.
+ */
+int strtailcmp(const char *s1, const char *s2)
+{
+ int i1 = strlen(s1);
+ int i2 = strlen(s2);
+ while (--i1 >= 0 && --i2 >= 0) {
+ if (s1[i1] != s2[i2])
+ return s1[i1] - s2[i2];
+ }
+ return 0;
+}
+
+/**
+ * strxfrchar - Locate and replace character in @s
+ * @s: The string to be searched/changed.
+ * @from: Source character to be replaced.
+ * @to: Destination character.
+ *
+ * Return pointer to the changed string.
+ */
+char *strxfrchar(char *s, char from, char to)
+{
+ char *p = s;
+
+ while ((p = strchr(p, from)) != NULL)
+ *p++ = to;
+
+ return s;
+}
+
+/**
+ * ltrim - Removes leading whitespace from @s.
+ * @s: The string to be stripped.
+ *
+ * Return pointer to the first non-whitespace character in @s.
+ */
+char *ltrim(char *s)
+{
+ while (isspace(*s))
+ s++;
+
+ return s;
+}
+
+/**
+ * rtrim - Removes trailing whitespace from @s.
+ * @s: The string to be stripped.
+ *
+ * Note that the first trailing whitespace is replaced with a %NUL-terminator
+ * in the given string @s. Returns @s.
+ */
+char *rtrim(char *s)
+{
+ size_t size = strlen(s);
+ char *end;
+
+ if (!size)
+ return s;
+
+ end = s + size - 1;
+ while (end >= s && isspace(*end))
+ end--;
+ *(end + 1) = '\0';
+
+ return s;
+}
+
+char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints)
+{
+ /*
+ * FIXME: replace this with an expression using log10() when we
+ * find a suitable implementation, maybe the one in the dvb drivers...
+ *
+ * "%s == %d || " = log10(MAXINT) * 2 + 8 chars for the operators
+ */
+ size_t size = nints * 28 + 1; /* \0 */
+ size_t i, printed = 0;
+ char *expr = malloc(size);
+
+ if (expr) {
+ const char *or_and = "||", *eq_neq = "==";
+ char *e = expr;
+
+ if (!in) {
+ or_and = "&&";
+ eq_neq = "!=";
+ }
+
+ for (i = 0; i < nints; ++i) {
+ if (printed == size)
+ goto out_err_overflow;
+
+ if (i > 0)
+ printed += scnprintf(e + printed, size - printed, " %s ", or_and);
+ printed += scnprintf(e + printed, size - printed,
+ "%s %s %d", var, eq_neq, ints[i]);
+ }
+ }
+
+ return expr;
+
+out_err_overflow:
+ free(expr);
+ return NULL;
+}
+
+/* Like strpbrk(), but not break if it is right after a backslash (escaped) */
+char *strpbrk_esc(char *str, const char *stopset)
+{
+ char *ptr;
+
+ do {
+ ptr = strpbrk(str, stopset);
+ if (ptr == str ||
+ (ptr == str + 1 && *(ptr - 1) != '\\'))
+ break;
+ str = ptr + 1;
+ } while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\');
+
+ return ptr;
+}
+
+/* Like strdup, but do not copy a single backslash */
+char *strdup_esc(const char *str)
+{
+ char *s, *d, *p, *ret = strdup(str);
+
+ if (!ret)
+ return NULL;
+
+ d = strchr(ret, '\\');
+ if (!d)
+ return ret;
+
+ s = d + 1;
+ do {
+ if (*s == '\0') {
+ *d = '\0';
+ break;
+ }
+ p = strchr(s + 1, '\\');
+ if (p) {
+ memmove(d, s, p - s);
+ d += p - s;
+ s = p + 1;
+ } else
+ memmove(d, s, strlen(s) + 1);
+ } while (p);
+
+ return ret;
+}
diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h
new file mode 100644
index 000000000..4c68a09b9
--- /dev/null
+++ b/tools/perf/util/string2.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_STRING_H
+#define PERF_STRING_H
+
+#include <linux/types.h>
+#include <stddef.h>
+#include <string.h>
+
+s64 perf_atoll(const char *str);
+char **argv_split(const char *str, int *argcp);
+void argv_free(char **argv);
+bool strglobmatch(const char *str, const char *pat);
+bool strglobmatch_nocase(const char *str, const char *pat);
+bool strlazymatch(const char *str, const char *pat);
+static inline bool strisglob(const char *str)
+{
+ return strpbrk(str, "*?[") != NULL;
+}
+int strtailcmp(const char *s1, const char *s2);
+char *strxfrchar(char *s, char from, char to);
+
+char *ltrim(char *s);
+char *rtrim(char *s);
+
+static inline char *trim(char *s)
+{
+ return ltrim(rtrim(s));
+}
+
+char *asprintf_expr_inout_ints(const char *var, bool in, size_t nints, int *ints);
+
+static inline char *asprintf_expr_in_ints(const char *var, size_t nints, int *ints)
+{
+ return asprintf_expr_inout_ints(var, true, nints, ints);
+}
+
+static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int *ints)
+{
+ return asprintf_expr_inout_ints(var, false, nints, ints);
+}
+
+char *strpbrk_esc(char *str, const char *stopset);
+char *strdup_esc(const char *str);
+
+#endif /* PERF_STRING_H */
diff --git a/tools/perf/util/strlist.c b/tools/perf/util/strlist.c
new file mode 100644
index 000000000..9de5434bb
--- /dev/null
+++ b/tools/perf/util/strlist.c
@@ -0,0 +1,209 @@
+/*
+ * (c) 2009 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Licensed under the GPLv2.
+ */
+
+#include "strlist.h"
+#include "util.h"
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static
+struct rb_node *strlist__node_new(struct rblist *rblist, const void *entry)
+{
+ const char *s = entry;
+ struct rb_node *rc = NULL;
+ struct strlist *strlist = container_of(rblist, struct strlist, rblist);
+ struct str_node *snode = malloc(sizeof(*snode));
+
+ if (snode != NULL) {
+ if (strlist->dupstr) {
+ s = strdup(s);
+ if (s == NULL)
+ goto out_delete;
+ }
+ snode->s = s;
+ rc = &snode->rb_node;
+ }
+
+ return rc;
+
+out_delete:
+ free(snode);
+ return NULL;
+}
+
+static void str_node__delete(struct str_node *snode, bool dupstr)
+{
+ if (dupstr)
+ zfree((char **)&snode->s);
+ free(snode);
+}
+
+static
+void strlist__node_delete(struct rblist *rblist, struct rb_node *rb_node)
+{
+ struct strlist *slist = container_of(rblist, struct strlist, rblist);
+ struct str_node *snode = container_of(rb_node, struct str_node, rb_node);
+
+ str_node__delete(snode, slist->dupstr);
+}
+
+static int strlist__node_cmp(struct rb_node *rb_node, const void *entry)
+{
+ const char *str = entry;
+ struct str_node *snode = container_of(rb_node, struct str_node, rb_node);
+
+ return strcmp(snode->s, str);
+}
+
+int strlist__add(struct strlist *slist, const char *new_entry)
+{
+ return rblist__add_node(&slist->rblist, new_entry);
+}
+
+int strlist__load(struct strlist *slist, const char *filename)
+{
+ char entry[1024];
+ int err;
+ FILE *fp = fopen(filename, "r");
+
+ if (fp == NULL)
+ return -errno;
+
+ while (fgets(entry, sizeof(entry), fp) != NULL) {
+ const size_t len = strlen(entry);
+
+ if (len == 0)
+ continue;
+ entry[len - 1] = '\0';
+
+ err = strlist__add(slist, entry);
+ if (err != 0)
+ goto out;
+ }
+
+ err = 0;
+out:
+ fclose(fp);
+ return err;
+}
+
+void strlist__remove(struct strlist *slist, struct str_node *snode)
+{
+ rblist__remove_node(&slist->rblist, &snode->rb_node);
+}
+
+struct str_node *strlist__find(struct strlist *slist, const char *entry)
+{
+ struct str_node *snode = NULL;
+ struct rb_node *rb_node = rblist__find(&slist->rblist, entry);
+
+ if (rb_node)
+ snode = container_of(rb_node, struct str_node, rb_node);
+
+ return snode;
+}
+
+static int strlist__parse_list_entry(struct strlist *slist, const char *s,
+ const char *subst_dir)
+{
+ int err;
+ char *subst = NULL;
+
+ if (strncmp(s, "file://", 7) == 0)
+ return strlist__load(slist, s + 7);
+
+ if (subst_dir) {
+ err = -ENOMEM;
+ if (asprintf(&subst, "%s/%s", subst_dir, s) < 0)
+ goto out;
+
+ if (access(subst, F_OK) == 0) {
+ err = strlist__load(slist, subst);
+ goto out;
+ }
+
+ if (slist->file_only) {
+ err = -ENOENT;
+ goto out;
+ }
+ }
+
+ err = strlist__add(slist, s);
+out:
+ free(subst);
+ return err;
+}
+
+static int strlist__parse_list(struct strlist *slist, const char *s, const char *subst_dir)
+{
+ char *sep;
+ int err;
+
+ while ((sep = strchr(s, ',')) != NULL) {
+ *sep = '\0';
+ err = strlist__parse_list_entry(slist, s, subst_dir);
+ *sep = ',';
+ if (err != 0)
+ return err;
+ s = sep + 1;
+ }
+
+ return *s ? strlist__parse_list_entry(slist, s, subst_dir) : 0;
+}
+
+struct strlist *strlist__new(const char *list, const struct strlist_config *config)
+{
+ struct strlist *slist = malloc(sizeof(*slist));
+
+ if (slist != NULL) {
+ bool dupstr = true;
+ bool file_only = false;
+ const char *dirname = NULL;
+
+ if (config) {
+ dupstr = !config->dont_dupstr;
+ dirname = config->dirname;
+ file_only = config->file_only;
+ }
+
+ rblist__init(&slist->rblist);
+ slist->rblist.node_cmp = strlist__node_cmp;
+ slist->rblist.node_new = strlist__node_new;
+ slist->rblist.node_delete = strlist__node_delete;
+
+ slist->dupstr = dupstr;
+ slist->file_only = file_only;
+
+ if (list && strlist__parse_list(slist, list, dirname) != 0)
+ goto out_error;
+ }
+
+ return slist;
+out_error:
+ free(slist);
+ return NULL;
+}
+
+void strlist__delete(struct strlist *slist)
+{
+ if (slist != NULL)
+ rblist__delete(&slist->rblist);
+}
+
+struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx)
+{
+ struct str_node *snode = NULL;
+ struct rb_node *rb_node;
+
+ rb_node = rblist__entry(&slist->rblist, idx);
+ if (rb_node)
+ snode = container_of(rb_node, struct str_node, rb_node);
+
+ return snode;
+}
diff --git a/tools/perf/util/strlist.h b/tools/perf/util/strlist.h
new file mode 100644
index 000000000..d58f1e08b
--- /dev/null
+++ b/tools/perf/util/strlist.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_STRLIST_H
+#define __PERF_STRLIST_H
+
+#include <linux/rbtree.h>
+#include <stdbool.h>
+
+#include "rblist.h"
+
+struct str_node {
+ struct rb_node rb_node;
+ const char *s;
+};
+
+struct strlist {
+ struct rblist rblist;
+ bool dupstr;
+ bool file_only;
+};
+
+/*
+ * @file_only: When dirname is present, only consider entries as filenames,
+ * that should not be added to the list if dirname/entry is not
+ * found
+ */
+struct strlist_config {
+ bool dont_dupstr;
+ bool file_only;
+ const char *dirname;
+};
+
+struct strlist *strlist__new(const char *slist, const struct strlist_config *config);
+void strlist__delete(struct strlist *slist);
+
+void strlist__remove(struct strlist *slist, struct str_node *sn);
+int strlist__load(struct strlist *slist, const char *filename);
+int strlist__add(struct strlist *slist, const char *str);
+
+struct str_node *strlist__entry(const struct strlist *slist, unsigned int idx);
+struct str_node *strlist__find(struct strlist *slist, const char *entry);
+
+static inline bool strlist__has_entry(struct strlist *slist, const char *entry)
+{
+ return strlist__find(slist, entry) != NULL;
+}
+
+static inline bool strlist__empty(const struct strlist *slist)
+{
+ return rblist__empty(&slist->rblist);
+}
+
+static inline unsigned int strlist__nr_entries(const struct strlist *slist)
+{
+ return rblist__nr_entries(&slist->rblist);
+}
+
+/* For strlist iteration */
+static inline struct str_node *strlist__first(struct strlist *slist)
+{
+ struct rb_node *rn = rb_first(&slist->rblist.entries);
+ return rn ? rb_entry(rn, struct str_node, rb_node) : NULL;
+}
+static inline struct str_node *strlist__next(struct str_node *sn)
+{
+ struct rb_node *rn;
+ if (!sn)
+ return NULL;
+ rn = rb_next(&sn->rb_node);
+ return rn ? rb_entry(rn, struct str_node, rb_node) : NULL;
+}
+
+/**
+ * strlist_for_each - iterate over a strlist
+ * @pos: the &struct str_node to use as a loop cursor.
+ * @slist: the &struct strlist for loop.
+ */
+#define strlist__for_each_entry(pos, slist) \
+ for (pos = strlist__first(slist); pos; pos = strlist__next(pos))
+
+/**
+ * strlist_for_each_safe - iterate over a strlist safe against removal of
+ * str_node
+ * @pos: the &struct str_node to use as a loop cursor.
+ * @n: another &struct str_node to use as temporary storage.
+ * @slist: the &struct strlist for loop.
+ */
+#define strlist__for_each_entry_safe(pos, n, slist) \
+ for (pos = strlist__first(slist), n = strlist__next(pos); pos;\
+ pos = n, n = strlist__next(n))
+#endif /* __PERF_STRLIST_H */
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
new file mode 100644
index 000000000..f735ee038
--- /dev/null
+++ b/tools/perf/util/svghelper.c
@@ -0,0 +1,809 @@
+/*
+ * svghelper.c - helper functions for outputting svg
+ *
+ * (C) Copyright 2009 Intel Corporation
+ *
+ * Authors:
+ * Arjan van de Ven <arjan@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <linux/bitmap.h>
+#include <linux/time64.h>
+
+#include "perf.h"
+#include "svghelper.h"
+#include "util.h"
+#include "cpumap.h"
+
+static u64 first_time, last_time;
+static u64 turbo_frequency, max_freq;
+
+
+#define SLOT_MULT 30.0
+#define SLOT_HEIGHT 25.0
+#define SLOT_HALF (SLOT_HEIGHT / 2)
+
+int svg_page_width = 1000;
+u64 svg_highlight;
+const char *svg_highlight_name;
+
+#define MIN_TEXT_SIZE 0.01
+
+static u64 total_height;
+static FILE *svgfile;
+
+static double cpu2slot(int cpu)
+{
+ return 2 * cpu + 1;
+}
+
+static int *topology_map;
+
+static double cpu2y(int cpu)
+{
+ if (topology_map)
+ return cpu2slot(topology_map[cpu]) * SLOT_MULT;
+ else
+ return cpu2slot(cpu) * SLOT_MULT;
+}
+
+static double time2pixels(u64 __time)
+{
+ double X;
+
+ X = 1.0 * svg_page_width * (__time - first_time) / (last_time - first_time);
+ return X;
+}
+
+/*
+ * Round text sizes so that the svg viewer only needs a discrete
+ * number of renderings of the font
+ */
+static double round_text_size(double size)
+{
+ int loop = 100;
+ double target = 10.0;
+
+ if (size >= 10.0)
+ return size;
+ while (loop--) {
+ if (size >= target)
+ return target;
+ target = target / 2.0;
+ }
+ return size;
+}
+
+void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end)
+{
+ int new_width;
+
+ svgfile = fopen(filename, "w");
+ if (!svgfile) {
+ fprintf(stderr, "Cannot open %s for output\n", filename);
+ return;
+ }
+ first_time = start;
+ first_time = first_time / 100000000 * 100000000;
+ last_time = end;
+
+ /*
+ * if the recording is short, we default to a width of 1000, but
+ * for longer recordings we want at least 200 units of width per second
+ */
+ new_width = (last_time - first_time) / 5000000;
+
+ if (new_width > svg_page_width)
+ svg_page_width = new_width;
+
+ total_height = (1 + rows + cpu2slot(cpus)) * SLOT_MULT;
+ fprintf(svgfile, "<?xml version=\"1.0\" standalone=\"no\"?> \n");
+ fprintf(svgfile, "<!DOCTYPE svg SYSTEM \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n");
+ fprintf(svgfile, "<svg width=\"%i\" height=\"%" PRIu64 "\" version=\"1.1\" xmlns=\"http://www.w3.org/2000/svg\">\n", svg_page_width, total_height);
+
+ fprintf(svgfile, "<defs>\n <style type=\"text/css\">\n <![CDATA[\n");
+
+ fprintf(svgfile, " rect { stroke-width: 1; }\n");
+ fprintf(svgfile, " rect.process { fill:rgb(180,180,180); fill-opacity:0.9; stroke-width:1; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.process2 { fill:rgb(180,180,180); fill-opacity:0.9; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.process3 { fill:rgb(180,180,180); fill-opacity:0.5; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.sample { fill:rgb( 0, 0,255); fill-opacity:0.8; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.sample_hi{ fill:rgb(255,128, 0); fill-opacity:0.8; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.error { fill:rgb(255, 0, 0); fill-opacity:0.5; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.net { fill:rgb( 0,128, 0); fill-opacity:0.5; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.disk { fill:rgb( 0, 0,255); fill-opacity:0.5; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.sync { fill:rgb(128,128, 0); fill-opacity:0.5; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.poll { fill:rgb( 0,128,128); fill-opacity:0.2; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.blocked { fill:rgb(255, 0, 0); fill-opacity:0.5; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.waiting { fill:rgb(224,214, 0); fill-opacity:0.8; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.WAITING { fill:rgb(255,214, 48); fill-opacity:0.6; stroke-width:0; stroke:rgb( 0, 0, 0); } \n");
+ fprintf(svgfile, " rect.cpu { fill:rgb(192,192,192); fill-opacity:0.2; stroke-width:0.5; stroke:rgb(128,128,128); } \n");
+ fprintf(svgfile, " rect.pstate { fill:rgb(128,128,128); fill-opacity:0.8; stroke-width:0; } \n");
+ fprintf(svgfile, " rect.c1 { fill:rgb(255,214,214); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " rect.c2 { fill:rgb(255,172,172); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " rect.c3 { fill:rgb(255,130,130); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " rect.c4 { fill:rgb(255, 88, 88); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " rect.c5 { fill:rgb(255, 44, 44); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " rect.c6 { fill:rgb(255, 0, 0); fill-opacity:0.5; stroke-width:0; } \n");
+ fprintf(svgfile, " line.pstate { stroke:rgb(255,255, 0); stroke-opacity:0.8; stroke-width:2; } \n");
+
+ fprintf(svgfile, " ]]>\n </style>\n</defs>\n");
+}
+
+static double normalize_height(double height)
+{
+ if (height < 0.25)
+ return 0.25;
+ else if (height < 0.50)
+ return 0.50;
+ else if (height < 0.75)
+ return 0.75;
+ else
+ return 0.100;
+}
+
+void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges)
+{
+ double w = time2pixels(end) - time2pixels(start);
+ height = normalize_height(height);
+
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+ fprintf(svgfile, "<title>fd=%d error=%d merges=%d</title>\n", fd, err, merges);
+ fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(start),
+ w,
+ Yslot * SLOT_MULT,
+ SLOT_HALF * height,
+ type);
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges)
+{
+ double w = time2pixels(end) - time2pixels(start);
+ height = normalize_height(height);
+
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+ fprintf(svgfile, "<title>fd=%d error=%d merges=%d</title>\n", fd, err, merges);
+ fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(start),
+ w,
+ Yslot * SLOT_MULT + SLOT_HEIGHT - SLOT_HALF * height,
+ SLOT_HALF * height,
+ type);
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges)
+{
+ double w = time2pixels(end) - time2pixels(start);
+ height = normalize_height(height);
+
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+ fprintf(svgfile, "<title>fd=%d error=%d merges=%d</title>\n", fd, err, merges);
+ fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(start),
+ w,
+ Yslot * SLOT_MULT + SLOT_HEIGHT - SLOT_HEIGHT * height,
+ SLOT_HEIGHT * height,
+ type);
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_box(int Yslot, u64 start, u64 end, const char *type)
+{
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(start), time2pixels(end)-time2pixels(start), Yslot * SLOT_MULT, SLOT_HEIGHT, type);
+}
+
+static char *time_to_string(u64 duration);
+void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace)
+{
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+ fprintf(svgfile, "<title>#%d blocked %s</title>\n", cpu,
+ time_to_string(end - start));
+ if (backtrace)
+ fprintf(svgfile, "<desc>Blocked on:\n%s</desc>\n", backtrace);
+ svg_box(Yslot, start, end, "blocked");
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace)
+{
+ double text_size;
+ const char *type;
+
+ if (!svgfile)
+ return;
+
+ if (svg_highlight && end - start > svg_highlight)
+ type = "sample_hi";
+ else
+ type = "sample";
+ fprintf(svgfile, "<g>\n");
+
+ fprintf(svgfile, "<title>#%d running %s</title>\n",
+ cpu, time_to_string(end - start));
+ if (backtrace)
+ fprintf(svgfile, "<desc>Switched because:\n%s</desc>\n", backtrace);
+ fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(start), time2pixels(end)-time2pixels(start), Yslot * SLOT_MULT, SLOT_HEIGHT,
+ type);
+
+ text_size = (time2pixels(end)-time2pixels(start));
+ if (cpu > 9)
+ text_size = text_size/2;
+ if (text_size > 1.25)
+ text_size = 1.25;
+ text_size = round_text_size(text_size);
+
+ if (text_size > MIN_TEXT_SIZE)
+ fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\" font-size=\"%.8fpt\">%i</text>\n",
+ time2pixels(start), Yslot * SLOT_MULT + SLOT_HEIGHT - 1, text_size, cpu + 1);
+
+ fprintf(svgfile, "</g>\n");
+}
+
+static char *time_to_string(u64 duration)
+{
+ static char text[80];
+
+ text[0] = 0;
+
+ if (duration < NSEC_PER_USEC) /* less than 1 usec */
+ return text;
+
+ if (duration < NSEC_PER_MSEC) { /* less than 1 msec */
+ sprintf(text, "%.1f us", duration / (double)NSEC_PER_USEC);
+ return text;
+ }
+ sprintf(text, "%.1f ms", duration / (double)NSEC_PER_MSEC);
+
+ return text;
+}
+
+void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace)
+{
+ char *text;
+ const char *style;
+ double font_size;
+
+ if (!svgfile)
+ return;
+
+ style = "waiting";
+
+ if (end-start > 10 * NSEC_PER_MSEC) /* 10 msec */
+ style = "WAITING";
+
+ text = time_to_string(end-start);
+
+ font_size = 1.0 * (time2pixels(end)-time2pixels(start));
+
+ if (font_size > 3)
+ font_size = 3;
+
+ font_size = round_text_size(font_size);
+
+ fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\">\n", time2pixels(start), Yslot * SLOT_MULT);
+ fprintf(svgfile, "<title>#%d waiting %s</title>\n", cpu, time_to_string(end - start));
+ if (backtrace)
+ fprintf(svgfile, "<desc>Waiting on:\n%s</desc>\n", backtrace);
+ fprintf(svgfile, "<rect x=\"0\" width=\"%.8f\" y=\"0\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(end)-time2pixels(start), SLOT_HEIGHT, style);
+ if (font_size > MIN_TEXT_SIZE)
+ fprintf(svgfile, "<text transform=\"rotate(90)\" font-size=\"%.8fpt\"> %s</text>\n",
+ font_size, text);
+ fprintf(svgfile, "</g>\n");
+}
+
+static char *cpu_model(void)
+{
+ static char cpu_m[255];
+ char buf[256];
+ FILE *file;
+
+ cpu_m[0] = 0;
+ /* CPU type */
+ file = fopen("/proc/cpuinfo", "r");
+ if (file) {
+ while (fgets(buf, 255, file)) {
+ if (strstr(buf, "model name")) {
+ strlcpy(cpu_m, &buf[13], 255);
+ break;
+ }
+ }
+ fclose(file);
+ }
+
+ /* CPU type */
+ file = fopen("/sys/devices/system/cpu/cpu0/cpufreq/scaling_available_frequencies", "r");
+ if (file) {
+ while (fgets(buf, 255, file)) {
+ unsigned int freq;
+ freq = strtoull(buf, NULL, 10);
+ if (freq > max_freq)
+ max_freq = freq;
+ }
+ fclose(file);
+ }
+ return cpu_m;
+}
+
+void svg_cpu_box(int cpu, u64 __max_freq, u64 __turbo_freq)
+{
+ char cpu_string[80];
+ if (!svgfile)
+ return;
+
+ max_freq = __max_freq;
+ turbo_frequency = __turbo_freq;
+
+ fprintf(svgfile, "<g>\n");
+
+ fprintf(svgfile, "<rect x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\" class=\"cpu\"/>\n",
+ time2pixels(first_time),
+ time2pixels(last_time)-time2pixels(first_time),
+ cpu2y(cpu), SLOT_MULT+SLOT_HEIGHT);
+
+ sprintf(cpu_string, "CPU %i", (int)cpu);
+ fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\">%s</text>\n",
+ 10+time2pixels(first_time), cpu2y(cpu) + SLOT_HEIGHT/2, cpu_string);
+
+ fprintf(svgfile, "<text transform=\"translate(%.8f,%.8f)\" font-size=\"1.25pt\">%s</text>\n",
+ 10+time2pixels(first_time), cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - 4, cpu_model());
+
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace)
+{
+ double width;
+ const char *type;
+
+ if (!svgfile)
+ return;
+
+ if (svg_highlight && end - start >= svg_highlight)
+ type = "sample_hi";
+ else if (svg_highlight_name && strstr(name, svg_highlight_name))
+ type = "sample_hi";
+ else
+ type = "sample";
+
+ fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\">\n", time2pixels(start), cpu2y(cpu));
+ fprintf(svgfile, "<title>%d %s running %s</title>\n", pid, name, time_to_string(end - start));
+ if (backtrace)
+ fprintf(svgfile, "<desc>Switched because:\n%s</desc>\n", backtrace);
+ fprintf(svgfile, "<rect x=\"0\" width=\"%.8f\" y=\"0\" height=\"%.1f\" class=\"%s\"/>\n",
+ time2pixels(end)-time2pixels(start), SLOT_MULT+SLOT_HEIGHT, type);
+ width = time2pixels(end)-time2pixels(start);
+ if (width > 6)
+ width = 6;
+
+ width = round_text_size(width);
+
+ if (width > MIN_TEXT_SIZE)
+ fprintf(svgfile, "<text transform=\"rotate(90)\" font-size=\"%.8fpt\">%s</text>\n",
+ width, name);
+
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_cstate(int cpu, u64 start, u64 end, int type)
+{
+ double width;
+ char style[128];
+
+ if (!svgfile)
+ return;
+
+
+ fprintf(svgfile, "<g>\n");
+
+ if (type > 6)
+ type = 6;
+ sprintf(style, "c%i", type);
+
+ fprintf(svgfile, "<rect class=\"%s\" x=\"%.8f\" width=\"%.8f\" y=\"%.1f\" height=\"%.1f\"/>\n",
+ style,
+ time2pixels(start), time2pixels(end)-time2pixels(start),
+ cpu2y(cpu), SLOT_MULT+SLOT_HEIGHT);
+
+ width = (time2pixels(end)-time2pixels(start))/2.0;
+ if (width > 6)
+ width = 6;
+
+ width = round_text_size(width);
+
+ if (width > MIN_TEXT_SIZE)
+ fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\" font-size=\"%.8fpt\">C%i</text>\n",
+ time2pixels(start), cpu2y(cpu)+width, width, type);
+
+ fprintf(svgfile, "</g>\n");
+}
+
+static char *HzToHuman(unsigned long hz)
+{
+ static char buffer[1024];
+ unsigned long long Hz;
+
+ memset(buffer, 0, 1024);
+
+ Hz = hz;
+
+ /* default: just put the Number in */
+ sprintf(buffer, "%9lli", Hz);
+
+ if (Hz > 1000)
+ sprintf(buffer, " %6lli Mhz", (Hz+500)/1000);
+
+ if (Hz > 1500000)
+ sprintf(buffer, " %6.2f Ghz", (Hz+5000.0)/1000000);
+
+ if (Hz == turbo_frequency)
+ sprintf(buffer, "Turbo");
+
+ return buffer;
+}
+
+void svg_pstate(int cpu, u64 start, u64 end, u64 freq)
+{
+ double height = 0;
+
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+
+ if (max_freq)
+ height = freq * 1.0 / max_freq * (SLOT_HEIGHT + SLOT_MULT);
+ height = 1 + cpu2y(cpu) + SLOT_MULT + SLOT_HEIGHT - height;
+ fprintf(svgfile, "<line x1=\"%.8f\" x2=\"%.8f\" y1=\"%.1f\" y2=\"%.1f\" class=\"pstate\"/>\n",
+ time2pixels(start), time2pixels(end), height, height);
+ fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\" font-size=\"0.25pt\">%s</text>\n",
+ time2pixels(start), height+0.9, HzToHuman(freq));
+
+ fprintf(svgfile, "</g>\n");
+}
+
+
+void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace)
+{
+ double height;
+
+ if (!svgfile)
+ return;
+
+
+ fprintf(svgfile, "<g>\n");
+
+ fprintf(svgfile, "<title>%s wakes up %s</title>\n",
+ desc1 ? desc1 : "?",
+ desc2 ? desc2 : "?");
+
+ if (backtrace)
+ fprintf(svgfile, "<desc>%s</desc>\n", backtrace);
+
+ if (row1 < row2) {
+ if (row1) {
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+ time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT, time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/32);
+ if (desc2)
+ fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &gt;</text></g>\n",
+ time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT + SLOT_HEIGHT/48, desc2);
+ }
+ if (row2) {
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+ time2pixels(start), row2 * SLOT_MULT - SLOT_MULT/32, time2pixels(start), row2 * SLOT_MULT);
+ if (desc1)
+ fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &gt;</text></g>\n",
+ time2pixels(start), row2 * SLOT_MULT - SLOT_MULT/32, desc1);
+ }
+ } else {
+ if (row2) {
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+ time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT, time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/32);
+ if (desc1)
+ fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &lt;</text></g>\n",
+ time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT + SLOT_MULT/48, desc1);
+ }
+ if (row1) {
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+ time2pixels(start), row1 * SLOT_MULT - SLOT_MULT/32, time2pixels(start), row1 * SLOT_MULT);
+ if (desc2)
+ fprintf(svgfile, "<g transform=\"translate(%.8f,%.8f)\"><text transform=\"rotate(90)\" font-size=\"0.02pt\">%s &lt;</text></g>\n",
+ time2pixels(start), row1 * SLOT_MULT - SLOT_HEIGHT/32, desc2);
+ }
+ }
+ height = row1 * SLOT_MULT;
+ if (row2 > row1)
+ height += SLOT_HEIGHT;
+ if (row1)
+ fprintf(svgfile, "<circle cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\" style=\"fill:rgb(32,255,32)\"/>\n",
+ time2pixels(start), height);
+
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_wakeline(u64 start, int row1, int row2, const char *backtrace)
+{
+ double height;
+
+ if (!svgfile)
+ return;
+
+
+ fprintf(svgfile, "<g>\n");
+
+ if (backtrace)
+ fprintf(svgfile, "<desc>%s</desc>\n", backtrace);
+
+ if (row1 < row2)
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+ time2pixels(start), row1 * SLOT_MULT + SLOT_HEIGHT, time2pixels(start), row2 * SLOT_MULT);
+ else
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%.2f\" style=\"stroke:rgb(32,255,32);stroke-width:0.009\"/>\n",
+ time2pixels(start), row2 * SLOT_MULT + SLOT_HEIGHT, time2pixels(start), row1 * SLOT_MULT);
+
+ height = row1 * SLOT_MULT;
+ if (row2 > row1)
+ height += SLOT_HEIGHT;
+ fprintf(svgfile, "<circle cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\" style=\"fill:rgb(32,255,32)\"/>\n",
+ time2pixels(start), height);
+
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_interrupt(u64 start, int row, const char *backtrace)
+{
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+
+ fprintf(svgfile, "<title>Wakeup from interrupt</title>\n");
+
+ if (backtrace)
+ fprintf(svgfile, "<desc>%s</desc>\n", backtrace);
+
+ fprintf(svgfile, "<circle cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\" style=\"fill:rgb(255,128,128)\"/>\n",
+ time2pixels(start), row * SLOT_MULT);
+ fprintf(svgfile, "<circle cx=\"%.8f\" cy=\"%.2f\" r = \"0.01\" style=\"fill:rgb(255,128,128)\"/>\n",
+ time2pixels(start), row * SLOT_MULT + SLOT_HEIGHT);
+
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_text(int Yslot, u64 start, const char *text)
+{
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<text x=\"%.8f\" y=\"%.8f\">%s</text>\n",
+ time2pixels(start), Yslot * SLOT_MULT+SLOT_HEIGHT/2, text);
+}
+
+static void svg_legenda_box(int X, const char *text, const char *style)
+{
+ double boxsize;
+ boxsize = SLOT_HEIGHT / 2;
+
+ fprintf(svgfile, "<rect x=\"%i\" width=\"%.8f\" y=\"0\" height=\"%.1f\" class=\"%s\"/>\n",
+ X, boxsize, boxsize, style);
+ fprintf(svgfile, "<text transform=\"translate(%.8f, %.8f)\" font-size=\"%.8fpt\">%s</text>\n",
+ X + boxsize + 5, boxsize, 0.8 * boxsize, text);
+}
+
+void svg_io_legenda(void)
+{
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+ svg_legenda_box(0, "Disk", "disk");
+ svg_legenda_box(100, "Network", "net");
+ svg_legenda_box(200, "Sync", "sync");
+ svg_legenda_box(300, "Poll", "poll");
+ svg_legenda_box(400, "Error", "error");
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_legenda(void)
+{
+ if (!svgfile)
+ return;
+
+ fprintf(svgfile, "<g>\n");
+ svg_legenda_box(0, "Running", "sample");
+ svg_legenda_box(100, "Idle","c1");
+ svg_legenda_box(200, "Deeper Idle", "c3");
+ svg_legenda_box(350, "Deepest Idle", "c6");
+ svg_legenda_box(550, "Sleeping", "process2");
+ svg_legenda_box(650, "Waiting for cpu", "waiting");
+ svg_legenda_box(800, "Blocked on IO", "blocked");
+ fprintf(svgfile, "</g>\n");
+}
+
+void svg_time_grid(double min_thickness)
+{
+ u64 i;
+
+ if (!svgfile)
+ return;
+
+ i = first_time;
+ while (i < last_time) {
+ int color = 220;
+ double thickness = 0.075;
+ if ((i % 100000000) == 0) {
+ thickness = 0.5;
+ color = 192;
+ }
+ if ((i % 1000000000) == 0) {
+ thickness = 2.0;
+ color = 128;
+ }
+
+ if (thickness >= min_thickness)
+ fprintf(svgfile, "<line x1=\"%.8f\" y1=\"%.2f\" x2=\"%.8f\" y2=\"%" PRIu64 "\" style=\"stroke:rgb(%i,%i,%i);stroke-width:%.3f\"/>\n",
+ time2pixels(i), SLOT_MULT/2, time2pixels(i),
+ total_height, color, color, color, thickness);
+
+ i += 10000000;
+ }
+}
+
+void svg_close(void)
+{
+ if (svgfile) {
+ fprintf(svgfile, "</svg>\n");
+ fclose(svgfile);
+ svgfile = NULL;
+ }
+}
+
+#define cpumask_bits(maskp) ((maskp)->bits)
+typedef struct { DECLARE_BITMAP(bits, MAX_NR_CPUS); } cpumask_t;
+
+struct topology {
+ cpumask_t *sib_core;
+ int sib_core_nr;
+ cpumask_t *sib_thr;
+ int sib_thr_nr;
+};
+
+static void scan_thread_topology(int *map, struct topology *t, int cpu, int *pos)
+{
+ int i;
+ int thr;
+
+ for (i = 0; i < t->sib_thr_nr; i++) {
+ if (!test_bit(cpu, cpumask_bits(&t->sib_thr[i])))
+ continue;
+
+ for_each_set_bit(thr,
+ cpumask_bits(&t->sib_thr[i]),
+ MAX_NR_CPUS)
+ if (map[thr] == -1)
+ map[thr] = (*pos)++;
+ }
+}
+
+static void scan_core_topology(int *map, struct topology *t)
+{
+ int pos = 0;
+ int i;
+ int cpu;
+
+ for (i = 0; i < t->sib_core_nr; i++)
+ for_each_set_bit(cpu,
+ cpumask_bits(&t->sib_core[i]),
+ MAX_NR_CPUS)
+ scan_thread_topology(map, t, cpu, &pos);
+}
+
+static int str_to_bitmap(char *s, cpumask_t *b)
+{
+ int i;
+ int ret = 0;
+ struct cpu_map *m;
+ int c;
+
+ m = cpu_map__new(s);
+ if (!m)
+ return -1;
+
+ for (i = 0; i < m->nr; i++) {
+ c = m->map[i];
+ if (c >= MAX_NR_CPUS) {
+ ret = -1;
+ break;
+ }
+
+ set_bit(c, cpumask_bits(b));
+ }
+
+ cpu_map__put(m);
+
+ return ret;
+}
+
+int svg_build_topology_map(char *sib_core, int sib_core_nr,
+ char *sib_thr, int sib_thr_nr)
+{
+ int i;
+ struct topology t;
+
+ t.sib_core_nr = sib_core_nr;
+ t.sib_thr_nr = sib_thr_nr;
+ t.sib_core = calloc(sib_core_nr, sizeof(cpumask_t));
+ t.sib_thr = calloc(sib_thr_nr, sizeof(cpumask_t));
+
+ if (!t.sib_core || !t.sib_thr) {
+ fprintf(stderr, "topology: no memory\n");
+ goto exit;
+ }
+
+ for (i = 0; i < sib_core_nr; i++) {
+ if (str_to_bitmap(sib_core, &t.sib_core[i])) {
+ fprintf(stderr, "topology: can't parse siblings map\n");
+ goto exit;
+ }
+
+ sib_core += strlen(sib_core) + 1;
+ }
+
+ for (i = 0; i < sib_thr_nr; i++) {
+ if (str_to_bitmap(sib_thr, &t.sib_thr[i])) {
+ fprintf(stderr, "topology: can't parse siblings map\n");
+ goto exit;
+ }
+
+ sib_thr += strlen(sib_thr) + 1;
+ }
+
+ topology_map = malloc(sizeof(int) * MAX_NR_CPUS);
+ if (!topology_map) {
+ fprintf(stderr, "topology: no memory\n");
+ goto exit;
+ }
+
+ for (i = 0; i < MAX_NR_CPUS; i++)
+ topology_map[i] = -1;
+
+ scan_core_topology(topology_map, &t);
+
+ return 0;
+
+exit:
+ zfree(&t.sib_core);
+ zfree(&t.sib_thr);
+
+ return -1;
+}
diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h
new file mode 100644
index 000000000..e55338d5c
--- /dev/null
+++ b/tools/perf/util/svghelper.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SVGHELPER_H
+#define __PERF_SVGHELPER_H
+
+#include <linux/types.h>
+
+void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end);
+void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_box(int Yslot, u64 start, u64 end, const char *type);
+void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency);
+
+
+void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace);
+void svg_cstate(int cpu, u64 start, u64 end, int type);
+void svg_pstate(int cpu, u64 start, u64 end, u64 freq);
+
+
+void svg_time_grid(double min_thickness);
+void svg_io_legenda(void);
+void svg_legenda(void);
+void svg_wakeline(u64 start, int row1, int row2, const char *backtrace);
+void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace);
+void svg_interrupt(u64 start, int row, const char *backtrace);
+void svg_text(int Yslot, u64 start, const char *text);
+void svg_close(void);
+int svg_build_topology_map(char *sib_core, int sib_core_nr, char *sib_thr, int sib_thr_nr);
+
+extern int svg_page_width;
+extern u64 svg_highlight;
+extern const char *svg_highlight_name;
+
+#endif /* __PERF_SVGHELPER_H */
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
new file mode 100644
index 000000000..166c621e0
--- /dev/null
+++ b/tools/perf/util/symbol-elf.c
@@ -0,0 +1,2251 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <inttypes.h>
+
+#include "symbol.h"
+#include "demangle-java.h"
+#include "demangle-rust.h"
+#include "machine.h"
+#include "vdso.h"
+#include "debug.h"
+#include "sane_ctype.h"
+#include <symbol/kallsyms.h>
+
+#ifndef EM_AARCH64
+#define EM_AARCH64 183 /* ARM 64 bit */
+#endif
+
+typedef Elf64_Nhdr GElf_Nhdr;
+
+#ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
+extern char *cplus_demangle(const char *, int);
+
+static inline char *bfd_demangle(void __maybe_unused *v, const char *c, int i)
+{
+ return cplus_demangle(c, i);
+}
+#else
+#ifdef NO_DEMANGLE
+static inline char *bfd_demangle(void __maybe_unused *v,
+ const char __maybe_unused *c,
+ int __maybe_unused i)
+{
+ return NULL;
+}
+#else
+#define PACKAGE 'perf'
+#include <bfd.h>
+#endif
+#endif
+
+#ifndef HAVE_ELF_GETPHDRNUM_SUPPORT
+static int elf_getphdrnum(Elf *elf, size_t *dst)
+{
+ GElf_Ehdr gehdr;
+ GElf_Ehdr *ehdr;
+
+ ehdr = gelf_getehdr(elf, &gehdr);
+ if (!ehdr)
+ return -1;
+
+ *dst = ehdr->e_phnum;
+
+ return 0;
+}
+#endif
+
+#ifndef HAVE_ELF_GETSHDRSTRNDX_SUPPORT
+static int elf_getshdrstrndx(Elf *elf __maybe_unused, size_t *dst __maybe_unused)
+{
+ pr_err("%s: update your libelf to > 0.140, this one lacks elf_getshdrstrndx().\n", __func__);
+ return -1;
+}
+#endif
+
+#ifndef NT_GNU_BUILD_ID
+#define NT_GNU_BUILD_ID 3
+#endif
+
+/**
+ * elf_symtab__for_each_symbol - iterate thru all the symbols
+ *
+ * @syms: struct elf_symtab instance to iterate
+ * @idx: uint32_t idx
+ * @sym: GElf_Sym iterator
+ */
+#define elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) \
+ for (idx = 0, gelf_getsym(syms, idx, &sym);\
+ idx < nr_syms; \
+ idx++, gelf_getsym(syms, idx, &sym))
+
+static inline uint8_t elf_sym__type(const GElf_Sym *sym)
+{
+ return GELF_ST_TYPE(sym->st_info);
+}
+
+static inline uint8_t elf_sym__visibility(const GElf_Sym *sym)
+{
+ return GELF_ST_VISIBILITY(sym->st_other);
+}
+
+#ifndef STT_GNU_IFUNC
+#define STT_GNU_IFUNC 10
+#endif
+
+static inline int elf_sym__is_function(const GElf_Sym *sym)
+{
+ return (elf_sym__type(sym) == STT_FUNC ||
+ elf_sym__type(sym) == STT_GNU_IFUNC) &&
+ sym->st_name != 0 &&
+ sym->st_shndx != SHN_UNDEF;
+}
+
+static inline bool elf_sym__is_object(const GElf_Sym *sym)
+{
+ return elf_sym__type(sym) == STT_OBJECT &&
+ sym->st_name != 0 &&
+ sym->st_shndx != SHN_UNDEF;
+}
+
+static inline int elf_sym__is_label(const GElf_Sym *sym)
+{
+ return elf_sym__type(sym) == STT_NOTYPE &&
+ sym->st_name != 0 &&
+ sym->st_shndx != SHN_UNDEF &&
+ sym->st_shndx != SHN_ABS &&
+ elf_sym__visibility(sym) != STV_HIDDEN &&
+ elf_sym__visibility(sym) != STV_INTERNAL;
+}
+
+static bool elf_sym__filter(GElf_Sym *sym)
+{
+ return elf_sym__is_function(sym) || elf_sym__is_object(sym);
+}
+
+static inline const char *elf_sym__name(const GElf_Sym *sym,
+ const Elf_Data *symstrs)
+{
+ return symstrs->d_buf + sym->st_name;
+}
+
+static inline const char *elf_sec__name(const GElf_Shdr *shdr,
+ const Elf_Data *secstrs)
+{
+ return secstrs->d_buf + shdr->sh_name;
+}
+
+static inline int elf_sec__is_text(const GElf_Shdr *shdr,
+ const Elf_Data *secstrs)
+{
+ return strstr(elf_sec__name(shdr, secstrs), "text") != NULL;
+}
+
+static inline bool elf_sec__is_data(const GElf_Shdr *shdr,
+ const Elf_Data *secstrs)
+{
+ return strstr(elf_sec__name(shdr, secstrs), "data") != NULL;
+}
+
+static bool elf_sec__filter(GElf_Shdr *shdr, Elf_Data *secstrs)
+{
+ return elf_sec__is_text(shdr, secstrs) ||
+ elf_sec__is_data(shdr, secstrs);
+}
+
+static size_t elf_addr_to_index(Elf *elf, GElf_Addr addr)
+{
+ Elf_Scn *sec = NULL;
+ GElf_Shdr shdr;
+ size_t cnt = 1;
+
+ while ((sec = elf_nextscn(elf, sec)) != NULL) {
+ gelf_getshdr(sec, &shdr);
+
+ if ((addr >= shdr.sh_addr) &&
+ (addr < (shdr.sh_addr + shdr.sh_size)))
+ return cnt;
+
+ ++cnt;
+ }
+
+ return -1;
+}
+
+Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+ GElf_Shdr *shp, const char *name, size_t *idx)
+{
+ Elf_Scn *sec = NULL;
+ size_t cnt = 1;
+
+ /* Elf is corrupted/truncated, avoid calling elf_strptr. */
+ if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL))
+ return NULL;
+
+ while ((sec = elf_nextscn(elf, sec)) != NULL) {
+ char *str;
+
+ gelf_getshdr(sec, shp);
+ str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name);
+ if (str && !strcmp(name, str)) {
+ if (idx)
+ *idx = cnt;
+ return sec;
+ }
+ ++cnt;
+ }
+
+ return NULL;
+}
+
+static bool want_demangle(bool is_kernel_sym)
+{
+ return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
+}
+
+static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
+{
+ int demangle_flags = verbose > 0 ? (DMGL_PARAMS | DMGL_ANSI) : DMGL_NO_OPTS;
+ char *demangled = NULL;
+
+ /*
+ * We need to figure out if the object was created from C++ sources
+ * DWARF DW_compile_unit has this, but we don't always have access
+ * to it...
+ */
+ if (!want_demangle(dso->kernel || kmodule))
+ return demangled;
+
+ demangled = bfd_demangle(NULL, elf_name, demangle_flags);
+ if (demangled == NULL)
+ demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
+ else if (rust_is_mangled(demangled))
+ /*
+ * Input to Rust demangling is the BFD-demangled
+ * name which it Rust-demangles in place.
+ */
+ rust_demangle_sym(demangled);
+
+ return demangled;
+}
+
+#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \
+ for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \
+ idx < nr_entries; \
+ ++idx, pos = gelf_getrel(reldata, idx, &pos_mem))
+
+#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \
+ for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \
+ idx < nr_entries; \
+ ++idx, pos = gelf_getrela(reldata, idx, &pos_mem))
+
+/*
+ * We need to check if we have a .dynsym, so that we can handle the
+ * .plt, synthesizing its symbols, that aren't on the symtabs (be it
+ * .dynsym or .symtab).
+ * And always look at the original dso, not at debuginfo packages, that
+ * have the PLT data stripped out (shdr_rel_plt.sh_type == SHT_NOBITS).
+ */
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss)
+{
+ uint32_t nr_rel_entries, idx;
+ GElf_Sym sym;
+ u64 plt_offset, plt_header_size, plt_entry_size;
+ GElf_Shdr shdr_plt;
+ struct symbol *f;
+ GElf_Shdr shdr_rel_plt, shdr_dynsym;
+ Elf_Data *reldata, *syms, *symstrs;
+ Elf_Scn *scn_plt_rel, *scn_symstrs, *scn_dynsym;
+ size_t dynsym_idx;
+ GElf_Ehdr ehdr;
+ char sympltname[1024];
+ Elf *elf;
+ int nr = 0, symidx, err = 0;
+
+ if (!ss->dynsym)
+ return 0;
+
+ elf = ss->elf;
+ ehdr = ss->ehdr;
+
+ scn_dynsym = ss->dynsym;
+ shdr_dynsym = ss->dynshdr;
+ dynsym_idx = ss->dynsym_idx;
+
+ if (scn_dynsym == NULL)
+ goto out_elf_end;
+
+ scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt,
+ ".rela.plt", NULL);
+ if (scn_plt_rel == NULL) {
+ scn_plt_rel = elf_section_by_name(elf, &ehdr, &shdr_rel_plt,
+ ".rel.plt", NULL);
+ if (scn_plt_rel == NULL)
+ goto out_elf_end;
+ }
+
+ err = -1;
+
+ if (shdr_rel_plt.sh_link != dynsym_idx)
+ goto out_elf_end;
+
+ if (elf_section_by_name(elf, &ehdr, &shdr_plt, ".plt", NULL) == NULL)
+ goto out_elf_end;
+
+ /*
+ * Fetch the relocation section to find the idxes to the GOT
+ * and the symbols in the .dynsym they refer to.
+ */
+ reldata = elf_getdata(scn_plt_rel, NULL);
+ if (reldata == NULL)
+ goto out_elf_end;
+
+ syms = elf_getdata(scn_dynsym, NULL);
+ if (syms == NULL)
+ goto out_elf_end;
+
+ scn_symstrs = elf_getscn(elf, shdr_dynsym.sh_link);
+ if (scn_symstrs == NULL)
+ goto out_elf_end;
+
+ symstrs = elf_getdata(scn_symstrs, NULL);
+ if (symstrs == NULL)
+ goto out_elf_end;
+
+ if (symstrs->d_size == 0)
+ goto out_elf_end;
+
+ nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize;
+ plt_offset = shdr_plt.sh_offset;
+ switch (ehdr.e_machine) {
+ case EM_ARM:
+ plt_header_size = 20;
+ plt_entry_size = 12;
+ break;
+
+ case EM_AARCH64:
+ plt_header_size = 32;
+ plt_entry_size = 16;
+ break;
+
+ case EM_SPARC:
+ plt_header_size = 48;
+ plt_entry_size = 12;
+ break;
+
+ case EM_SPARCV9:
+ plt_header_size = 128;
+ plt_entry_size = 32;
+ break;
+
+ default: /* FIXME: s390/alpha/mips/parisc/poperpc/sh/xtensa need to be checked */
+ plt_header_size = shdr_plt.sh_entsize;
+ plt_entry_size = shdr_plt.sh_entsize;
+ break;
+ }
+ plt_offset += plt_header_size;
+
+ if (shdr_rel_plt.sh_type == SHT_RELA) {
+ GElf_Rela pos_mem, *pos;
+
+ elf_section__for_each_rela(reldata, pos, pos_mem, idx,
+ nr_rel_entries) {
+ const char *elf_name = NULL;
+ char *demangled = NULL;
+ symidx = GELF_R_SYM(pos->r_info);
+ gelf_getsym(syms, symidx, &sym);
+
+ elf_name = elf_sym__name(&sym, symstrs);
+ demangled = demangle_sym(dso, 0, elf_name);
+ if (demangled != NULL)
+ elf_name = demangled;
+ snprintf(sympltname, sizeof(sympltname),
+ "%s@plt", elf_name);
+ free(demangled);
+
+ f = symbol__new(plt_offset, plt_entry_size,
+ STB_GLOBAL, STT_FUNC, sympltname);
+ if (!f)
+ goto out_elf_end;
+
+ plt_offset += plt_entry_size;
+ symbols__insert(&dso->symbols, f);
+ ++nr;
+ }
+ } else if (shdr_rel_plt.sh_type == SHT_REL) {
+ GElf_Rel pos_mem, *pos;
+ elf_section__for_each_rel(reldata, pos, pos_mem, idx,
+ nr_rel_entries) {
+ const char *elf_name = NULL;
+ char *demangled = NULL;
+ symidx = GELF_R_SYM(pos->r_info);
+ gelf_getsym(syms, symidx, &sym);
+
+ elf_name = elf_sym__name(&sym, symstrs);
+ demangled = demangle_sym(dso, 0, elf_name);
+ if (demangled != NULL)
+ elf_name = demangled;
+ snprintf(sympltname, sizeof(sympltname),
+ "%s@plt", elf_name);
+ free(demangled);
+
+ f = symbol__new(plt_offset, plt_entry_size,
+ STB_GLOBAL, STT_FUNC, sympltname);
+ if (!f)
+ goto out_elf_end;
+
+ plt_offset += plt_entry_size;
+ symbols__insert(&dso->symbols, f);
+ ++nr;
+ }
+ }
+
+ err = 0;
+out_elf_end:
+ if (err == 0)
+ return nr;
+ pr_debug("%s: problems reading %s PLT info.\n",
+ __func__, dso->long_name);
+ return 0;
+}
+
+char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name)
+{
+ return demangle_sym(dso, kmodule, elf_name);
+}
+
+/*
+ * Align offset to 4 bytes as needed for note name and descriptor data.
+ */
+#define NOTE_ALIGN(n) (((n) + 3) & -4U)
+
+static int elf_read_build_id(Elf *elf, void *bf, size_t size)
+{
+ int err = -1;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ Elf_Data *data;
+ Elf_Scn *sec;
+ Elf_Kind ek;
+ void *ptr;
+
+ if (size < BUILD_ID_SIZE)
+ goto out;
+
+ ek = elf_kind(elf);
+ if (ek != ELF_K_ELF)
+ goto out;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL) {
+ pr_err("%s: cannot get elf header.\n", __func__);
+ goto out;
+ }
+
+ /*
+ * Check following sections for notes:
+ * '.note.gnu.build-id'
+ * '.notes'
+ * '.note' (VDSO specific)
+ */
+ do {
+ sec = elf_section_by_name(elf, &ehdr, &shdr,
+ ".note.gnu.build-id", NULL);
+ if (sec)
+ break;
+
+ sec = elf_section_by_name(elf, &ehdr, &shdr,
+ ".notes", NULL);
+ if (sec)
+ break;
+
+ sec = elf_section_by_name(elf, &ehdr, &shdr,
+ ".note", NULL);
+ if (sec)
+ break;
+
+ return err;
+
+ } while (0);
+
+ data = elf_getdata(sec, NULL);
+ if (data == NULL)
+ goto out;
+
+ ptr = data->d_buf;
+ while (ptr < (data->d_buf + data->d_size)) {
+ GElf_Nhdr *nhdr = ptr;
+ size_t namesz = NOTE_ALIGN(nhdr->n_namesz),
+ descsz = NOTE_ALIGN(nhdr->n_descsz);
+ const char *name;
+
+ ptr += sizeof(*nhdr);
+ name = ptr;
+ ptr += namesz;
+ if (nhdr->n_type == NT_GNU_BUILD_ID &&
+ nhdr->n_namesz == sizeof("GNU")) {
+ if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
+ size_t sz = min(size, descsz);
+ memcpy(bf, ptr, sz);
+ memset(bf + sz, 0, size - sz);
+ err = descsz;
+ break;
+ }
+ }
+ ptr += descsz;
+ }
+
+out:
+ return err;
+}
+
+int filename__read_build_id(const char *filename, void *bf, size_t size)
+{
+ int fd, err = -1;
+ Elf *elf;
+
+ if (size < BUILD_ID_SIZE)
+ goto out;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ goto out;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL) {
+ pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
+ goto out_close;
+ }
+
+ err = elf_read_build_id(elf, bf, size);
+
+ elf_end(elf);
+out_close:
+ close(fd);
+out:
+ return err;
+}
+
+int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
+{
+ int fd, err = -1;
+
+ if (size < BUILD_ID_SIZE)
+ goto out;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ goto out;
+
+ while (1) {
+ char bf[BUFSIZ];
+ GElf_Nhdr nhdr;
+ size_t namesz, descsz;
+
+ if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr))
+ break;
+
+ namesz = NOTE_ALIGN(nhdr.n_namesz);
+ descsz = NOTE_ALIGN(nhdr.n_descsz);
+ if (nhdr.n_type == NT_GNU_BUILD_ID &&
+ nhdr.n_namesz == sizeof("GNU")) {
+ if (read(fd, bf, namesz) != (ssize_t)namesz)
+ break;
+ if (memcmp(bf, "GNU", sizeof("GNU")) == 0) {
+ size_t sz = min(descsz, size);
+ if (read(fd, build_id, sz) == (ssize_t)sz) {
+ memset(build_id + sz, 0, size - sz);
+ err = 0;
+ break;
+ }
+ } else if (read(fd, bf, descsz) != (ssize_t)descsz)
+ break;
+ } else {
+ int n = namesz + descsz;
+
+ if (n > (int)sizeof(bf)) {
+ n = sizeof(bf);
+ pr_debug("%s: truncating reading of build id in sysfs file %s: n_namesz=%u, n_descsz=%u.\n",
+ __func__, filename, nhdr.n_namesz, nhdr.n_descsz);
+ }
+ if (read(fd, bf, n) != n)
+ break;
+ }
+ }
+ close(fd);
+out:
+ return err;
+}
+
+int filename__read_debuglink(const char *filename, char *debuglink,
+ size_t size)
+{
+ int fd, err = -1;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ Elf_Data *data;
+ Elf_Scn *sec;
+ Elf_Kind ek;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ goto out;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL) {
+ pr_debug2("%s: cannot read %s ELF file.\n", __func__, filename);
+ goto out_close;
+ }
+
+ ek = elf_kind(elf);
+ if (ek != ELF_K_ELF)
+ goto out_elf_end;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL) {
+ pr_err("%s: cannot get elf header.\n", __func__);
+ goto out_elf_end;
+ }
+
+ sec = elf_section_by_name(elf, &ehdr, &shdr,
+ ".gnu_debuglink", NULL);
+ if (sec == NULL)
+ goto out_elf_end;
+
+ data = elf_getdata(sec, NULL);
+ if (data == NULL)
+ goto out_elf_end;
+
+ /* the start of this section is a zero-terminated string */
+ strncpy(debuglink, data->d_buf, size);
+
+ err = 0;
+
+out_elf_end:
+ elf_end(elf);
+out_close:
+ close(fd);
+out:
+ return err;
+}
+
+static int dso__swap_init(struct dso *dso, unsigned char eidata)
+{
+ static unsigned int const endian = 1;
+
+ dso->needs_swap = DSO_SWAP__NO;
+
+ switch (eidata) {
+ case ELFDATA2LSB:
+ /* We are big endian, DSO is little endian. */
+ if (*(unsigned char const *)&endian != 1)
+ dso->needs_swap = DSO_SWAP__YES;
+ break;
+
+ case ELFDATA2MSB:
+ /* We are little endian, DSO is big endian. */
+ if (*(unsigned char const *)&endian != 0)
+ dso->needs_swap = DSO_SWAP__YES;
+ break;
+
+ default:
+ pr_err("unrecognized DSO data encoding %d\n", eidata);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+bool symsrc__possibly_runtime(struct symsrc *ss)
+{
+ return ss->dynsym || ss->opdsec;
+}
+
+bool symsrc__has_symtab(struct symsrc *ss)
+{
+ return ss->symtab != NULL;
+}
+
+void symsrc__destroy(struct symsrc *ss)
+{
+ zfree(&ss->name);
+ elf_end(ss->elf);
+ close(ss->fd);
+}
+
+bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+ return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL;
+}
+
+int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
+ enum dso_binary_type type)
+{
+ int err = -1;
+ GElf_Ehdr ehdr;
+ Elf *elf;
+ int fd;
+
+ if (dso__needs_decompress(dso)) {
+ fd = dso__decompress_kmodule_fd(dso, name);
+ if (fd < 0)
+ return -1;
+
+ type = dso->symtab_type;
+ } else {
+ fd = open(name, O_RDONLY);
+ if (fd < 0) {
+ dso->load_errno = errno;
+ return -1;
+ }
+ }
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL) {
+ pr_debug("%s: cannot read %s ELF file.\n", __func__, name);
+ dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF;
+ goto out_close;
+ }
+
+ if (gelf_getehdr(elf, &ehdr) == NULL) {
+ dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF;
+ pr_debug("%s: cannot get elf header.\n", __func__);
+ goto out_elf_end;
+ }
+
+ if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) {
+ dso->load_errno = DSO_LOAD_ERRNO__INTERNAL_ERROR;
+ goto out_elf_end;
+ }
+
+ /* Always reject images with a mismatched build-id: */
+ if (dso->has_build_id && !symbol_conf.ignore_vmlinux_buildid) {
+ u8 build_id[BUILD_ID_SIZE];
+
+ if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) {
+ dso->load_errno = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID;
+ goto out_elf_end;
+ }
+
+ if (!dso__build_id_equal(dso, build_id)) {
+ pr_debug("%s: build id mismatch for %s.\n", __func__, name);
+ dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID;
+ goto out_elf_end;
+ }
+ }
+
+ ss->is_64_bit = (gelf_getclass(elf) == ELFCLASS64);
+
+ ss->symtab = elf_section_by_name(elf, &ehdr, &ss->symshdr, ".symtab",
+ NULL);
+ if (ss->symshdr.sh_type != SHT_SYMTAB)
+ ss->symtab = NULL;
+
+ ss->dynsym_idx = 0;
+ ss->dynsym = elf_section_by_name(elf, &ehdr, &ss->dynshdr, ".dynsym",
+ &ss->dynsym_idx);
+ if (ss->dynshdr.sh_type != SHT_DYNSYM)
+ ss->dynsym = NULL;
+
+ ss->opdidx = 0;
+ ss->opdsec = elf_section_by_name(elf, &ehdr, &ss->opdshdr, ".opd",
+ &ss->opdidx);
+ if (ss->opdshdr.sh_type != SHT_PROGBITS)
+ ss->opdsec = NULL;
+
+ if (dso->kernel == DSO_TYPE_USER)
+ ss->adjust_symbols = true;
+ else
+ ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
+
+ ss->name = strdup(name);
+ if (!ss->name) {
+ dso->load_errno = errno;
+ goto out_elf_end;
+ }
+
+ ss->elf = elf;
+ ss->fd = fd;
+ ss->ehdr = ehdr;
+ ss->type = type;
+
+ return 0;
+
+out_elf_end:
+ elf_end(elf);
+out_close:
+ close(fd);
+ return err;
+}
+
+/**
+ * ref_reloc_sym_not_found - has kernel relocation symbol been found.
+ * @kmap: kernel maps and relocation reference symbol
+ *
+ * This function returns %true if we are dealing with the kernel maps and the
+ * relocation reference symbol has not yet been found. Otherwise %false is
+ * returned.
+ */
+static bool ref_reloc_sym_not_found(struct kmap *kmap)
+{
+ return kmap && kmap->ref_reloc_sym && kmap->ref_reloc_sym->name &&
+ !kmap->ref_reloc_sym->unrelocated_addr;
+}
+
+/**
+ * ref_reloc - kernel relocation offset.
+ * @kmap: kernel maps and relocation reference symbol
+ *
+ * This function returns the offset of kernel addresses as determined by using
+ * the relocation reference symbol i.e. if the kernel has not been relocated
+ * then the return value is zero.
+ */
+static u64 ref_reloc(struct kmap *kmap)
+{
+ if (kmap && kmap->ref_reloc_sym &&
+ kmap->ref_reloc_sym->unrelocated_addr)
+ return kmap->ref_reloc_sym->addr -
+ kmap->ref_reloc_sym->unrelocated_addr;
+ return 0;
+}
+
+void __weak arch__sym_update(struct symbol *s __maybe_unused,
+ GElf_Sym *sym __maybe_unused) { }
+
+static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
+ GElf_Sym *sym, GElf_Shdr *shdr,
+ struct map_groups *kmaps, struct kmap *kmap,
+ struct dso **curr_dsop, struct map **curr_mapp,
+ const char *section_name,
+ bool adjust_kernel_syms, bool kmodule, bool *remap_kernel)
+{
+ struct dso *curr_dso = *curr_dsop;
+ struct map *curr_map;
+ char dso_name[PATH_MAX];
+
+ /* Adjust symbol to map to file offset */
+ if (adjust_kernel_syms)
+ sym->st_value -= shdr->sh_addr - shdr->sh_offset;
+
+ if (strcmp(section_name, (curr_dso->short_name + dso->short_name_len)) == 0)
+ return 0;
+
+ if (strcmp(section_name, ".text") == 0) {
+ /*
+ * The initial kernel mapping is based on
+ * kallsyms and identity maps. Overwrite it to
+ * map to the kernel dso.
+ */
+ if (*remap_kernel && dso->kernel) {
+ *remap_kernel = false;
+ map->start = shdr->sh_addr + ref_reloc(kmap);
+ map->end = map->start + shdr->sh_size;
+ map->pgoff = shdr->sh_offset;
+ map->map_ip = map__map_ip;
+ map->unmap_ip = map__unmap_ip;
+ /* Ensure maps are correctly ordered */
+ if (kmaps) {
+ map__get(map);
+ map_groups__remove(kmaps, map);
+ map_groups__insert(kmaps, map);
+ map__put(map);
+ }
+ }
+
+ /*
+ * The initial module mapping is based on
+ * /proc/modules mapped to offset zero.
+ * Overwrite it to map to the module dso.
+ */
+ if (*remap_kernel && kmodule) {
+ *remap_kernel = false;
+ map->pgoff = shdr->sh_offset;
+ }
+
+ *curr_mapp = map;
+ *curr_dsop = dso;
+ return 0;
+ }
+
+ if (!kmap)
+ return 0;
+
+ snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name);
+
+ curr_map = map_groups__find_by_name(kmaps, dso_name);
+ if (curr_map == NULL) {
+ u64 start = sym->st_value;
+
+ if (kmodule)
+ start += map->start + shdr->sh_offset;
+
+ curr_dso = dso__new(dso_name);
+ if (curr_dso == NULL)
+ return -1;
+ curr_dso->kernel = dso->kernel;
+ curr_dso->long_name = dso->long_name;
+ curr_dso->long_name_len = dso->long_name_len;
+ curr_map = map__new2(start, curr_dso);
+ dso__put(curr_dso);
+ if (curr_map == NULL)
+ return -1;
+
+ if (adjust_kernel_syms) {
+ curr_map->start = shdr->sh_addr + ref_reloc(kmap);
+ curr_map->end = curr_map->start + shdr->sh_size;
+ curr_map->pgoff = shdr->sh_offset;
+ } else {
+ curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
+ }
+ curr_dso->symtab_type = dso->symtab_type;
+ map_groups__insert(kmaps, curr_map);
+ /*
+ * Add it before we drop the referece to curr_map, i.e. while
+ * we still are sure to have a reference to this DSO via
+ * *curr_map->dso.
+ */
+ dsos__add(&map->groups->machine->dsos, curr_dso);
+ /* kmaps already got it */
+ map__put(curr_map);
+ dso__set_loaded(curr_dso);
+ *curr_mapp = curr_map;
+ *curr_dsop = curr_dso;
+ } else
+ *curr_dsop = curr_map->dso;
+
+ return 0;
+}
+
+int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+ struct symsrc *runtime_ss, int kmodule)
+{
+ struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL;
+ struct map_groups *kmaps = kmap ? map__kmaps(map) : NULL;
+ struct map *curr_map = map;
+ struct dso *curr_dso = dso;
+ Elf_Data *symstrs, *secstrs;
+ uint32_t nr_syms;
+ int err = -1;
+ uint32_t idx;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ GElf_Shdr tshdr;
+ Elf_Data *syms, *opddata = NULL;
+ GElf_Sym sym;
+ Elf_Scn *sec, *sec_strndx;
+ Elf *elf;
+ int nr = 0;
+ bool remap_kernel = false, adjust_kernel_syms = false;
+
+ if (kmap && !kmaps)
+ return -1;
+
+ dso->symtab_type = syms_ss->type;
+ dso->is_64_bit = syms_ss->is_64_bit;
+ dso->rel = syms_ss->ehdr.e_type == ET_REL;
+
+ /*
+ * Modules may already have symbols from kallsyms, but those symbols
+ * have the wrong values for the dso maps, so remove them.
+ */
+ if (kmodule && syms_ss->symtab)
+ symbols__delete(&dso->symbols);
+
+ if (!syms_ss->symtab) {
+ /*
+ * If the vmlinux is stripped, fail so we will fall back
+ * to using kallsyms. The vmlinux runtime symbols aren't
+ * of much use.
+ */
+ if (dso->kernel)
+ goto out_elf_end;
+
+ syms_ss->symtab = syms_ss->dynsym;
+ syms_ss->symshdr = syms_ss->dynshdr;
+ }
+
+ elf = syms_ss->elf;
+ ehdr = syms_ss->ehdr;
+ sec = syms_ss->symtab;
+ shdr = syms_ss->symshdr;
+
+ if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr,
+ ".text", NULL))
+ dso->text_offset = tshdr.sh_addr - tshdr.sh_offset;
+
+ if (runtime_ss->opdsec)
+ opddata = elf_rawdata(runtime_ss->opdsec, NULL);
+
+ syms = elf_getdata(sec, NULL);
+ if (syms == NULL)
+ goto out_elf_end;
+
+ sec = elf_getscn(elf, shdr.sh_link);
+ if (sec == NULL)
+ goto out_elf_end;
+
+ symstrs = elf_getdata(sec, NULL);
+ if (symstrs == NULL)
+ goto out_elf_end;
+
+ sec_strndx = elf_getscn(runtime_ss->elf, runtime_ss->ehdr.e_shstrndx);
+ if (sec_strndx == NULL)
+ goto out_elf_end;
+
+ secstrs = elf_getdata(sec_strndx, NULL);
+ if (secstrs == NULL)
+ goto out_elf_end;
+
+ nr_syms = shdr.sh_size / shdr.sh_entsize;
+
+ memset(&sym, 0, sizeof(sym));
+
+ /*
+ * The kernel relocation symbol is needed in advance in order to adjust
+ * kernel maps correctly.
+ */
+ if (ref_reloc_sym_not_found(kmap)) {
+ elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
+ const char *elf_name = elf_sym__name(&sym, symstrs);
+
+ if (strcmp(elf_name, kmap->ref_reloc_sym->name))
+ continue;
+ kmap->ref_reloc_sym->unrelocated_addr = sym.st_value;
+ map->reloc = kmap->ref_reloc_sym->addr -
+ kmap->ref_reloc_sym->unrelocated_addr;
+ break;
+ }
+ }
+
+ /*
+ * Handle any relocation of vdso necessary because older kernels
+ * attempted to prelink vdso to its virtual address.
+ */
+ if (dso__is_vdso(dso))
+ map->reloc = map->start - dso->text_offset;
+
+ dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
+ /*
+ * Initial kernel and module mappings do not map to the dso.
+ * Flag the fixups.
+ */
+ if (dso->kernel || kmodule) {
+ remap_kernel = true;
+ adjust_kernel_syms = dso->adjust_symbols;
+ }
+ elf_symtab__for_each_symbol(syms, nr_syms, idx, sym) {
+ struct symbol *f;
+ const char *elf_name = elf_sym__name(&sym, symstrs);
+ char *demangled = NULL;
+ int is_label = elf_sym__is_label(&sym);
+ const char *section_name;
+ bool used_opd = false;
+
+ if (!is_label && !elf_sym__filter(&sym))
+ continue;
+
+ /* Reject ARM ELF "mapping symbols": these aren't unique and
+ * don't identify functions, so will confuse the profile
+ * output: */
+ if (ehdr.e_machine == EM_ARM || ehdr.e_machine == EM_AARCH64) {
+ if (elf_name[0] == '$' && strchr("adtx", elf_name[1])
+ && (elf_name[2] == '\0' || elf_name[2] == '.'))
+ continue;
+ }
+
+ if (runtime_ss->opdsec && sym.st_shndx == runtime_ss->opdidx) {
+ u32 offset = sym.st_value - syms_ss->opdshdr.sh_addr;
+ u64 *opd = opddata->d_buf + offset;
+ sym.st_value = DSO__SWAP(dso, u64, *opd);
+ sym.st_shndx = elf_addr_to_index(runtime_ss->elf,
+ sym.st_value);
+ used_opd = true;
+ }
+ /*
+ * When loading symbols in a data mapping, ABS symbols (which
+ * has a value of SHN_ABS in its st_shndx) failed at
+ * elf_getscn(). And it marks the loading as a failure so
+ * already loaded symbols cannot be fixed up.
+ *
+ * I'm not sure what should be done. Just ignore them for now.
+ * - Namhyung Kim
+ */
+ if (sym.st_shndx == SHN_ABS)
+ continue;
+
+ sec = elf_getscn(runtime_ss->elf, sym.st_shndx);
+ if (!sec)
+ goto out_elf_end;
+
+ gelf_getshdr(sec, &shdr);
+
+ if (is_label && !elf_sec__filter(&shdr, secstrs))
+ continue;
+
+ section_name = elf_sec__name(&shdr, secstrs);
+
+ /* On ARM, symbols for thumb functions have 1 added to
+ * the symbol address as a flag - remove it */
+ if ((ehdr.e_machine == EM_ARM) &&
+ (GELF_ST_TYPE(sym.st_info) == STT_FUNC) &&
+ (sym.st_value & 1))
+ --sym.st_value;
+
+ if (dso->kernel || kmodule) {
+ if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map,
+ section_name, adjust_kernel_syms, kmodule, &remap_kernel))
+ goto out_elf_end;
+ } else if ((used_opd && runtime_ss->adjust_symbols) ||
+ (!used_opd && syms_ss->adjust_symbols)) {
+ pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
+ "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
+ (u64)sym.st_value, (u64)shdr.sh_addr,
+ (u64)shdr.sh_offset);
+ sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+ }
+
+ demangled = demangle_sym(dso, kmodule, elf_name);
+ if (demangled != NULL)
+ elf_name = demangled;
+
+ f = symbol__new(sym.st_value, sym.st_size,
+ GELF_ST_BIND(sym.st_info),
+ GELF_ST_TYPE(sym.st_info), elf_name);
+ free(demangled);
+ if (!f)
+ goto out_elf_end;
+
+ arch__sym_update(f, &sym);
+
+ __symbols__insert(&curr_dso->symbols, f, dso->kernel);
+ nr++;
+ }
+
+ /*
+ * For misannotated, zeroed, ASM function sizes.
+ */
+ if (nr > 0) {
+ symbols__fixup_end(&dso->symbols);
+ symbols__fixup_duplicate(&dso->symbols);
+ if (kmap) {
+ /*
+ * We need to fixup this here too because we create new
+ * maps here, for things like vsyscall sections.
+ */
+ map_groups__fixup_end(kmaps);
+ }
+ }
+ err = nr;
+out_elf_end:
+ return err;
+}
+
+static int elf_read_maps(Elf *elf, bool exe, mapfn_t mapfn, void *data)
+{
+ GElf_Phdr phdr;
+ size_t i, phdrnum;
+ int err;
+ u64 sz;
+
+ if (elf_getphdrnum(elf, &phdrnum))
+ return -1;
+
+ for (i = 0; i < phdrnum; i++) {
+ if (gelf_getphdr(elf, i, &phdr) == NULL)
+ return -1;
+ if (phdr.p_type != PT_LOAD)
+ continue;
+ if (exe) {
+ if (!(phdr.p_flags & PF_X))
+ continue;
+ } else {
+ if (!(phdr.p_flags & PF_R))
+ continue;
+ }
+ sz = min(phdr.p_memsz, phdr.p_filesz);
+ if (!sz)
+ continue;
+ err = mapfn(phdr.p_vaddr, sz, phdr.p_offset, data);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
+ bool *is_64_bit)
+{
+ int err;
+ Elf *elf;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ return -1;
+
+ if (is_64_bit)
+ *is_64_bit = (gelf_getclass(elf) == ELFCLASS64);
+
+ err = elf_read_maps(elf, exe, mapfn, data);
+
+ elf_end(elf);
+ return err;
+}
+
+enum dso_type dso__type_fd(int fd)
+{
+ enum dso_type dso_type = DSO__TYPE_UNKNOWN;
+ GElf_Ehdr ehdr;
+ Elf_Kind ek;
+ Elf *elf;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ goto out;
+
+ ek = elf_kind(elf);
+ if (ek != ELF_K_ELF)
+ goto out_end;
+
+ if (gelf_getclass(elf) == ELFCLASS64) {
+ dso_type = DSO__TYPE_64BIT;
+ goto out_end;
+ }
+
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto out_end;
+
+ if (ehdr.e_machine == EM_X86_64)
+ dso_type = DSO__TYPE_X32BIT;
+ else
+ dso_type = DSO__TYPE_32BIT;
+out_end:
+ elf_end(elf);
+out:
+ return dso_type;
+}
+
+static int copy_bytes(int from, off_t from_offs, int to, off_t to_offs, u64 len)
+{
+ ssize_t r;
+ size_t n;
+ int err = -1;
+ char *buf = malloc(page_size);
+
+ if (buf == NULL)
+ return -1;
+
+ if (lseek(to, to_offs, SEEK_SET) != to_offs)
+ goto out;
+
+ if (lseek(from, from_offs, SEEK_SET) != from_offs)
+ goto out;
+
+ while (len) {
+ n = page_size;
+ if (len < n)
+ n = len;
+ /* Use read because mmap won't work on proc files */
+ r = read(from, buf, n);
+ if (r < 0)
+ goto out;
+ if (!r)
+ break;
+ n = r;
+ r = write(to, buf, n);
+ if (r < 0)
+ goto out;
+ if ((size_t)r != n)
+ goto out;
+ len -= n;
+ }
+
+ err = 0;
+out:
+ free(buf);
+ return err;
+}
+
+struct kcore {
+ int fd;
+ int elfclass;
+ Elf *elf;
+ GElf_Ehdr ehdr;
+};
+
+static int kcore__open(struct kcore *kcore, const char *filename)
+{
+ GElf_Ehdr *ehdr;
+
+ kcore->fd = open(filename, O_RDONLY);
+ if (kcore->fd == -1)
+ return -1;
+
+ kcore->elf = elf_begin(kcore->fd, ELF_C_READ, NULL);
+ if (!kcore->elf)
+ goto out_close;
+
+ kcore->elfclass = gelf_getclass(kcore->elf);
+ if (kcore->elfclass == ELFCLASSNONE)
+ goto out_end;
+
+ ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr);
+ if (!ehdr)
+ goto out_end;
+
+ return 0;
+
+out_end:
+ elf_end(kcore->elf);
+out_close:
+ close(kcore->fd);
+ return -1;
+}
+
+static int kcore__init(struct kcore *kcore, char *filename, int elfclass,
+ bool temp)
+{
+ kcore->elfclass = elfclass;
+
+ if (temp)
+ kcore->fd = mkstemp(filename);
+ else
+ kcore->fd = open(filename, O_WRONLY | O_CREAT | O_EXCL, 0400);
+ if (kcore->fd == -1)
+ return -1;
+
+ kcore->elf = elf_begin(kcore->fd, ELF_C_WRITE, NULL);
+ if (!kcore->elf)
+ goto out_close;
+
+ if (!gelf_newehdr(kcore->elf, elfclass))
+ goto out_end;
+
+ memset(&kcore->ehdr, 0, sizeof(GElf_Ehdr));
+
+ return 0;
+
+out_end:
+ elf_end(kcore->elf);
+out_close:
+ close(kcore->fd);
+ unlink(filename);
+ return -1;
+}
+
+static void kcore__close(struct kcore *kcore)
+{
+ elf_end(kcore->elf);
+ close(kcore->fd);
+}
+
+static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count)
+{
+ GElf_Ehdr *ehdr = &to->ehdr;
+ GElf_Ehdr *kehdr = &from->ehdr;
+
+ memcpy(ehdr->e_ident, kehdr->e_ident, EI_NIDENT);
+ ehdr->e_type = kehdr->e_type;
+ ehdr->e_machine = kehdr->e_machine;
+ ehdr->e_version = kehdr->e_version;
+ ehdr->e_entry = 0;
+ ehdr->e_shoff = 0;
+ ehdr->e_flags = kehdr->e_flags;
+ ehdr->e_phnum = count;
+ ehdr->e_shentsize = 0;
+ ehdr->e_shnum = 0;
+ ehdr->e_shstrndx = 0;
+
+ if (from->elfclass == ELFCLASS32) {
+ ehdr->e_phoff = sizeof(Elf32_Ehdr);
+ ehdr->e_ehsize = sizeof(Elf32_Ehdr);
+ ehdr->e_phentsize = sizeof(Elf32_Phdr);
+ } else {
+ ehdr->e_phoff = sizeof(Elf64_Ehdr);
+ ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+ ehdr->e_phentsize = sizeof(Elf64_Phdr);
+ }
+
+ if (!gelf_update_ehdr(to->elf, ehdr))
+ return -1;
+
+ if (!gelf_newphdr(to->elf, count))
+ return -1;
+
+ return 0;
+}
+
+static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset,
+ u64 addr, u64 len)
+{
+ GElf_Phdr phdr = {
+ .p_type = PT_LOAD,
+ .p_flags = PF_R | PF_W | PF_X,
+ .p_offset = offset,
+ .p_vaddr = addr,
+ .p_paddr = 0,
+ .p_filesz = len,
+ .p_memsz = len,
+ .p_align = page_size,
+ };
+
+ if (!gelf_update_phdr(kcore->elf, idx, &phdr))
+ return -1;
+
+ return 0;
+}
+
+static off_t kcore__write(struct kcore *kcore)
+{
+ return elf_update(kcore->elf, ELF_C_WRITE);
+}
+
+struct phdr_data {
+ off_t offset;
+ off_t rel;
+ u64 addr;
+ u64 len;
+ struct list_head node;
+ struct phdr_data *remaps;
+};
+
+struct sym_data {
+ u64 addr;
+ struct list_head node;
+};
+
+struct kcore_copy_info {
+ u64 stext;
+ u64 etext;
+ u64 first_symbol;
+ u64 last_symbol;
+ u64 first_module;
+ u64 first_module_symbol;
+ u64 last_module_symbol;
+ size_t phnum;
+ struct list_head phdrs;
+ struct list_head syms;
+};
+
+#define kcore_copy__for_each_phdr(k, p) \
+ list_for_each_entry((p), &(k)->phdrs, node)
+
+static struct phdr_data *phdr_data__new(u64 addr, u64 len, off_t offset)
+{
+ struct phdr_data *p = zalloc(sizeof(*p));
+
+ if (p) {
+ p->addr = addr;
+ p->len = len;
+ p->offset = offset;
+ }
+
+ return p;
+}
+
+static struct phdr_data *kcore_copy_info__addnew(struct kcore_copy_info *kci,
+ u64 addr, u64 len,
+ off_t offset)
+{
+ struct phdr_data *p = phdr_data__new(addr, len, offset);
+
+ if (p)
+ list_add_tail(&p->node, &kci->phdrs);
+
+ return p;
+}
+
+static void kcore_copy__free_phdrs(struct kcore_copy_info *kci)
+{
+ struct phdr_data *p, *tmp;
+
+ list_for_each_entry_safe(p, tmp, &kci->phdrs, node) {
+ list_del(&p->node);
+ free(p);
+ }
+}
+
+static struct sym_data *kcore_copy__new_sym(struct kcore_copy_info *kci,
+ u64 addr)
+{
+ struct sym_data *s = zalloc(sizeof(*s));
+
+ if (s) {
+ s->addr = addr;
+ list_add_tail(&s->node, &kci->syms);
+ }
+
+ return s;
+}
+
+static void kcore_copy__free_syms(struct kcore_copy_info *kci)
+{
+ struct sym_data *s, *tmp;
+
+ list_for_each_entry_safe(s, tmp, &kci->syms, node) {
+ list_del(&s->node);
+ free(s);
+ }
+}
+
+static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
+ u64 start)
+{
+ struct kcore_copy_info *kci = arg;
+
+ if (!kallsyms__is_function(type))
+ return 0;
+
+ if (strchr(name, '[')) {
+ if (!kci->first_module_symbol || start < kci->first_module_symbol)
+ kci->first_module_symbol = start;
+ if (start > kci->last_module_symbol)
+ kci->last_module_symbol = start;
+ return 0;
+ }
+
+ if (!kci->first_symbol || start < kci->first_symbol)
+ kci->first_symbol = start;
+
+ if (!kci->last_symbol || start > kci->last_symbol)
+ kci->last_symbol = start;
+
+ if (!strcmp(name, "_stext")) {
+ kci->stext = start;
+ return 0;
+ }
+
+ if (!strcmp(name, "_etext")) {
+ kci->etext = start;
+ return 0;
+ }
+
+ if (is_entry_trampoline(name) && !kcore_copy__new_sym(kci, start))
+ return -1;
+
+ return 0;
+}
+
+static int kcore_copy__parse_kallsyms(struct kcore_copy_info *kci,
+ const char *dir)
+{
+ char kallsyms_filename[PATH_MAX];
+
+ scnprintf(kallsyms_filename, PATH_MAX, "%s/kallsyms", dir);
+
+ if (symbol__restricted_filename(kallsyms_filename, "/proc/kallsyms"))
+ return -1;
+
+ if (kallsyms__parse(kallsyms_filename, kci,
+ kcore_copy__process_kallsyms) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int kcore_copy__process_modules(void *arg,
+ const char *name __maybe_unused,
+ u64 start, u64 size __maybe_unused)
+{
+ struct kcore_copy_info *kci = arg;
+
+ if (!kci->first_module || start < kci->first_module)
+ kci->first_module = start;
+
+ return 0;
+}
+
+static int kcore_copy__parse_modules(struct kcore_copy_info *kci,
+ const char *dir)
+{
+ char modules_filename[PATH_MAX];
+
+ scnprintf(modules_filename, PATH_MAX, "%s/modules", dir);
+
+ if (symbol__restricted_filename(modules_filename, "/proc/modules"))
+ return -1;
+
+ if (modules__parse(modules_filename, kci,
+ kcore_copy__process_modules) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int kcore_copy__map(struct kcore_copy_info *kci, u64 start, u64 end,
+ u64 pgoff, u64 s, u64 e)
+{
+ u64 len, offset;
+
+ if (s < start || s >= end)
+ return 0;
+
+ offset = (s - start) + pgoff;
+ len = e < end ? e - s : end - s;
+
+ return kcore_copy_info__addnew(kci, s, len, offset) ? 0 : -1;
+}
+
+static int kcore_copy__read_map(u64 start, u64 len, u64 pgoff, void *data)
+{
+ struct kcore_copy_info *kci = data;
+ u64 end = start + len;
+ struct sym_data *sdat;
+
+ if (kcore_copy__map(kci, start, end, pgoff, kci->stext, kci->etext))
+ return -1;
+
+ if (kcore_copy__map(kci, start, end, pgoff, kci->first_module,
+ kci->last_module_symbol))
+ return -1;
+
+ list_for_each_entry(sdat, &kci->syms, node) {
+ u64 s = round_down(sdat->addr, page_size);
+
+ if (kcore_copy__map(kci, start, end, pgoff, s, s + len))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int kcore_copy__read_maps(struct kcore_copy_info *kci, Elf *elf)
+{
+ if (elf_read_maps(elf, true, kcore_copy__read_map, kci) < 0)
+ return -1;
+
+ return 0;
+}
+
+static void kcore_copy__find_remaps(struct kcore_copy_info *kci)
+{
+ struct phdr_data *p, *k = NULL;
+ u64 kend;
+
+ if (!kci->stext)
+ return;
+
+ /* Find phdr that corresponds to the kernel map (contains stext) */
+ kcore_copy__for_each_phdr(kci, p) {
+ u64 pend = p->addr + p->len - 1;
+
+ if (p->addr <= kci->stext && pend >= kci->stext) {
+ k = p;
+ break;
+ }
+ }
+
+ if (!k)
+ return;
+
+ kend = k->offset + k->len;
+
+ /* Find phdrs that remap the kernel */
+ kcore_copy__for_each_phdr(kci, p) {
+ u64 pend = p->offset + p->len;
+
+ if (p == k)
+ continue;
+
+ if (p->offset >= k->offset && pend <= kend)
+ p->remaps = k;
+ }
+}
+
+static void kcore_copy__layout(struct kcore_copy_info *kci)
+{
+ struct phdr_data *p;
+ off_t rel = 0;
+
+ kcore_copy__find_remaps(kci);
+
+ kcore_copy__for_each_phdr(kci, p) {
+ if (!p->remaps) {
+ p->rel = rel;
+ rel += p->len;
+ }
+ kci->phnum += 1;
+ }
+
+ kcore_copy__for_each_phdr(kci, p) {
+ struct phdr_data *k = p->remaps;
+
+ if (k)
+ p->rel = p->offset - k->offset + k->rel;
+ }
+}
+
+static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
+ Elf *elf)
+{
+ if (kcore_copy__parse_kallsyms(kci, dir))
+ return -1;
+
+ if (kcore_copy__parse_modules(kci, dir))
+ return -1;
+
+ if (kci->stext)
+ kci->stext = round_down(kci->stext, page_size);
+ else
+ kci->stext = round_down(kci->first_symbol, page_size);
+
+ if (kci->etext) {
+ kci->etext = round_up(kci->etext, page_size);
+ } else if (kci->last_symbol) {
+ kci->etext = round_up(kci->last_symbol, page_size);
+ kci->etext += page_size;
+ }
+
+ if (kci->first_module_symbol &&
+ (!kci->first_module || kci->first_module_symbol < kci->first_module))
+ kci->first_module = kci->first_module_symbol;
+
+ kci->first_module = round_down(kci->first_module, page_size);
+
+ if (kci->last_module_symbol) {
+ kci->last_module_symbol = round_up(kci->last_module_symbol,
+ page_size);
+ kci->last_module_symbol += page_size;
+ }
+
+ if (!kci->stext || !kci->etext)
+ return -1;
+
+ if (kci->first_module && !kci->last_module_symbol)
+ return -1;
+
+ if (kcore_copy__read_maps(kci, elf))
+ return -1;
+
+ kcore_copy__layout(kci);
+
+ return 0;
+}
+
+static int kcore_copy__copy_file(const char *from_dir, const char *to_dir,
+ const char *name)
+{
+ char from_filename[PATH_MAX];
+ char to_filename[PATH_MAX];
+
+ scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name);
+ scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name);
+
+ return copyfile_mode(from_filename, to_filename, 0400);
+}
+
+static int kcore_copy__unlink(const char *dir, const char *name)
+{
+ char filename[PATH_MAX];
+
+ scnprintf(filename, PATH_MAX, "%s/%s", dir, name);
+
+ return unlink(filename);
+}
+
+static int kcore_copy__compare_fds(int from, int to)
+{
+ char *buf_from;
+ char *buf_to;
+ ssize_t ret;
+ size_t len;
+ int err = -1;
+
+ buf_from = malloc(page_size);
+ buf_to = malloc(page_size);
+ if (!buf_from || !buf_to)
+ goto out;
+
+ while (1) {
+ /* Use read because mmap won't work on proc files */
+ ret = read(from, buf_from, page_size);
+ if (ret < 0)
+ goto out;
+
+ if (!ret)
+ break;
+
+ len = ret;
+
+ if (readn(to, buf_to, len) != (int)len)
+ goto out;
+
+ if (memcmp(buf_from, buf_to, len))
+ goto out;
+ }
+
+ err = 0;
+out:
+ free(buf_to);
+ free(buf_from);
+ return err;
+}
+
+static int kcore_copy__compare_files(const char *from_filename,
+ const char *to_filename)
+{
+ int from, to, err = -1;
+
+ from = open(from_filename, O_RDONLY);
+ if (from < 0)
+ return -1;
+
+ to = open(to_filename, O_RDONLY);
+ if (to < 0)
+ goto out_close_from;
+
+ err = kcore_copy__compare_fds(from, to);
+
+ close(to);
+out_close_from:
+ close(from);
+ return err;
+}
+
+static int kcore_copy__compare_file(const char *from_dir, const char *to_dir,
+ const char *name)
+{
+ char from_filename[PATH_MAX];
+ char to_filename[PATH_MAX];
+
+ scnprintf(from_filename, PATH_MAX, "%s/%s", from_dir, name);
+ scnprintf(to_filename, PATH_MAX, "%s/%s", to_dir, name);
+
+ return kcore_copy__compare_files(from_filename, to_filename);
+}
+
+/**
+ * kcore_copy - copy kallsyms, modules and kcore from one directory to another.
+ * @from_dir: from directory
+ * @to_dir: to directory
+ *
+ * This function copies kallsyms, modules and kcore files from one directory to
+ * another. kallsyms and modules are copied entirely. Only code segments are
+ * copied from kcore. It is assumed that two segments suffice: one for the
+ * kernel proper and one for all the modules. The code segments are determined
+ * from kallsyms and modules files. The kernel map starts at _stext or the
+ * lowest function symbol, and ends at _etext or the highest function symbol.
+ * The module map starts at the lowest module address and ends at the highest
+ * module symbol. Start addresses are rounded down to the nearest page. End
+ * addresses are rounded up to the nearest page. An extra page is added to the
+ * highest kernel symbol and highest module symbol to, hopefully, encompass that
+ * symbol too. Because it contains only code sections, the resulting kcore is
+ * unusual. One significant peculiarity is that the mapping (start -> pgoff)
+ * is not the same for the kernel map and the modules map. That happens because
+ * the data is copied adjacently whereas the original kcore has gaps. Finally,
+ * kallsyms and modules files are compared with their copies to check that
+ * modules have not been loaded or unloaded while the copies were taking place.
+ *
+ * Return: %0 on success, %-1 on failure.
+ */
+int kcore_copy(const char *from_dir, const char *to_dir)
+{
+ struct kcore kcore;
+ struct kcore extract;
+ int idx = 0, err = -1;
+ off_t offset, sz;
+ struct kcore_copy_info kci = { .stext = 0, };
+ char kcore_filename[PATH_MAX];
+ char extract_filename[PATH_MAX];
+ struct phdr_data *p;
+
+ INIT_LIST_HEAD(&kci.phdrs);
+ INIT_LIST_HEAD(&kci.syms);
+
+ if (kcore_copy__copy_file(from_dir, to_dir, "kallsyms"))
+ return -1;
+
+ if (kcore_copy__copy_file(from_dir, to_dir, "modules"))
+ goto out_unlink_kallsyms;
+
+ scnprintf(kcore_filename, PATH_MAX, "%s/kcore", from_dir);
+ scnprintf(extract_filename, PATH_MAX, "%s/kcore", to_dir);
+
+ if (kcore__open(&kcore, kcore_filename))
+ goto out_unlink_modules;
+
+ if (kcore_copy__calc_maps(&kci, from_dir, kcore.elf))
+ goto out_kcore_close;
+
+ if (kcore__init(&extract, extract_filename, kcore.elfclass, false))
+ goto out_kcore_close;
+
+ if (kcore__copy_hdr(&kcore, &extract, kci.phnum))
+ goto out_extract_close;
+
+ offset = gelf_fsize(extract.elf, ELF_T_EHDR, 1, EV_CURRENT) +
+ gelf_fsize(extract.elf, ELF_T_PHDR, kci.phnum, EV_CURRENT);
+ offset = round_up(offset, page_size);
+
+ kcore_copy__for_each_phdr(&kci, p) {
+ off_t offs = p->rel + offset;
+
+ if (kcore__add_phdr(&extract, idx++, offs, p->addr, p->len))
+ goto out_extract_close;
+ }
+
+ sz = kcore__write(&extract);
+ if (sz < 0 || sz > offset)
+ goto out_extract_close;
+
+ kcore_copy__for_each_phdr(&kci, p) {
+ off_t offs = p->rel + offset;
+
+ if (p->remaps)
+ continue;
+ if (copy_bytes(kcore.fd, p->offset, extract.fd, offs, p->len))
+ goto out_extract_close;
+ }
+
+ if (kcore_copy__compare_file(from_dir, to_dir, "modules"))
+ goto out_extract_close;
+
+ if (kcore_copy__compare_file(from_dir, to_dir, "kallsyms"))
+ goto out_extract_close;
+
+ err = 0;
+
+out_extract_close:
+ kcore__close(&extract);
+ if (err)
+ unlink(extract_filename);
+out_kcore_close:
+ kcore__close(&kcore);
+out_unlink_modules:
+ if (err)
+ kcore_copy__unlink(to_dir, "modules");
+out_unlink_kallsyms:
+ if (err)
+ kcore_copy__unlink(to_dir, "kallsyms");
+
+ kcore_copy__free_phdrs(&kci);
+ kcore_copy__free_syms(&kci);
+
+ return err;
+}
+
+int kcore_extract__create(struct kcore_extract *kce)
+{
+ struct kcore kcore;
+ struct kcore extract;
+ size_t count = 1;
+ int idx = 0, err = -1;
+ off_t offset = page_size, sz;
+
+ if (kcore__open(&kcore, kce->kcore_filename))
+ return -1;
+
+ strcpy(kce->extract_filename, PERF_KCORE_EXTRACT);
+ if (kcore__init(&extract, kce->extract_filename, kcore.elfclass, true))
+ goto out_kcore_close;
+
+ if (kcore__copy_hdr(&kcore, &extract, count))
+ goto out_extract_close;
+
+ if (kcore__add_phdr(&extract, idx, offset, kce->addr, kce->len))
+ goto out_extract_close;
+
+ sz = kcore__write(&extract);
+ if (sz < 0 || sz > offset)
+ goto out_extract_close;
+
+ if (copy_bytes(kcore.fd, kce->offs, extract.fd, offset, kce->len))
+ goto out_extract_close;
+
+ err = 0;
+
+out_extract_close:
+ kcore__close(&extract);
+ if (err)
+ unlink(kce->extract_filename);
+out_kcore_close:
+ kcore__close(&kcore);
+
+ return err;
+}
+
+void kcore_extract__delete(struct kcore_extract *kce)
+{
+ unlink(kce->extract_filename);
+}
+
+#ifdef HAVE_GELF_GETNOTE_SUPPORT
+/**
+ * populate_sdt_note : Parse raw data and identify SDT note
+ * @elf: elf of the opened file
+ * @data: raw data of a section with description offset applied
+ * @len: note description size
+ * @type: type of the note
+ * @sdt_notes: List to add the SDT note
+ *
+ * Responsible for parsing the @data in section .note.stapsdt in @elf and
+ * if its an SDT note, it appends to @sdt_notes list.
+ */
+static int populate_sdt_note(Elf **elf, const char *data, size_t len,
+ struct list_head *sdt_notes)
+{
+ const char *provider, *name, *args;
+ struct sdt_note *tmp = NULL;
+ GElf_Ehdr ehdr;
+ GElf_Addr base_off = 0;
+ GElf_Shdr shdr;
+ int ret = -EINVAL;
+
+ union {
+ Elf64_Addr a64[NR_ADDR];
+ Elf32_Addr a32[NR_ADDR];
+ } buf;
+
+ Elf_Data dst = {
+ .d_buf = &buf, .d_type = ELF_T_ADDR, .d_version = EV_CURRENT,
+ .d_size = gelf_fsize((*elf), ELF_T_ADDR, NR_ADDR, EV_CURRENT),
+ .d_off = 0, .d_align = 0
+ };
+ Elf_Data src = {
+ .d_buf = (void *) data, .d_type = ELF_T_ADDR,
+ .d_version = EV_CURRENT, .d_size = dst.d_size, .d_off = 0,
+ .d_align = 0
+ };
+
+ tmp = (struct sdt_note *)calloc(1, sizeof(struct sdt_note));
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto out_err;
+ }
+
+ INIT_LIST_HEAD(&tmp->note_list);
+
+ if (len < dst.d_size + 3)
+ goto out_free_note;
+
+ /* Translation from file representation to memory representation */
+ if (gelf_xlatetom(*elf, &dst, &src,
+ elf_getident(*elf, NULL)[EI_DATA]) == NULL) {
+ pr_err("gelf_xlatetom : %s\n", elf_errmsg(-1));
+ goto out_free_note;
+ }
+
+ /* Populate the fields of sdt_note */
+ provider = data + dst.d_size;
+
+ name = (const char *)memchr(provider, '\0', data + len - provider);
+ if (name++ == NULL)
+ goto out_free_note;
+
+ tmp->provider = strdup(provider);
+ if (!tmp->provider) {
+ ret = -ENOMEM;
+ goto out_free_note;
+ }
+ tmp->name = strdup(name);
+ if (!tmp->name) {
+ ret = -ENOMEM;
+ goto out_free_prov;
+ }
+
+ args = memchr(name, '\0', data + len - name);
+
+ /*
+ * There is no argument if:
+ * - We reached the end of the note;
+ * - There is not enough room to hold a potential string;
+ * - The argument string is empty or just contains ':'.
+ */
+ if (args == NULL || data + len - args < 2 ||
+ args[1] == ':' || args[1] == '\0')
+ tmp->args = NULL;
+ else {
+ tmp->args = strdup(++args);
+ if (!tmp->args) {
+ ret = -ENOMEM;
+ goto out_free_name;
+ }
+ }
+
+ if (gelf_getclass(*elf) == ELFCLASS32) {
+ memcpy(&tmp->addr, &buf, 3 * sizeof(Elf32_Addr));
+ tmp->bit32 = true;
+ } else {
+ memcpy(&tmp->addr, &buf, 3 * sizeof(Elf64_Addr));
+ tmp->bit32 = false;
+ }
+
+ if (!gelf_getehdr(*elf, &ehdr)) {
+ pr_debug("%s : cannot get elf header.\n", __func__);
+ ret = -EBADF;
+ goto out_free_args;
+ }
+
+ /* Adjust the prelink effect :
+ * Find out the .stapsdt.base section.
+ * This scn will help us to handle prelinking (if present).
+ * Compare the retrieved file offset of the base section with the
+ * base address in the description of the SDT note. If its different,
+ * then accordingly, adjust the note location.
+ */
+ if (elf_section_by_name(*elf, &ehdr, &shdr, SDT_BASE_SCN, NULL)) {
+ base_off = shdr.sh_offset;
+ if (base_off) {
+ if (tmp->bit32)
+ tmp->addr.a32[0] = tmp->addr.a32[0] + base_off -
+ tmp->addr.a32[1];
+ else
+ tmp->addr.a64[0] = tmp->addr.a64[0] + base_off -
+ tmp->addr.a64[1];
+ }
+ }
+
+ list_add_tail(&tmp->note_list, sdt_notes);
+ return 0;
+
+out_free_args:
+ free(tmp->args);
+out_free_name:
+ free(tmp->name);
+out_free_prov:
+ free(tmp->provider);
+out_free_note:
+ free(tmp);
+out_err:
+ return ret;
+}
+
+/**
+ * construct_sdt_notes_list : constructs a list of SDT notes
+ * @elf : elf to look into
+ * @sdt_notes : empty list_head
+ *
+ * Scans the sections in 'elf' for the section
+ * .note.stapsdt. It, then calls populate_sdt_note to find
+ * out the SDT events and populates the 'sdt_notes'.
+ */
+static int construct_sdt_notes_list(Elf *elf, struct list_head *sdt_notes)
+{
+ GElf_Ehdr ehdr;
+ Elf_Scn *scn = NULL;
+ Elf_Data *data;
+ GElf_Shdr shdr;
+ size_t shstrndx, next;
+ GElf_Nhdr nhdr;
+ size_t name_off, desc_off, offset;
+ int ret = 0;
+
+ if (gelf_getehdr(elf, &ehdr) == NULL) {
+ ret = -EBADF;
+ goto out_ret;
+ }
+ if (elf_getshdrstrndx(elf, &shstrndx) != 0) {
+ ret = -EBADF;
+ goto out_ret;
+ }
+
+ /* Look for the required section */
+ scn = elf_section_by_name(elf, &ehdr, &shdr, SDT_NOTE_SCN, NULL);
+ if (!scn) {
+ ret = -ENOENT;
+ goto out_ret;
+ }
+
+ if ((shdr.sh_type != SHT_NOTE) || (shdr.sh_flags & SHF_ALLOC)) {
+ ret = -ENOENT;
+ goto out_ret;
+ }
+
+ data = elf_getdata(scn, NULL);
+
+ /* Get the SDT notes */
+ for (offset = 0; (next = gelf_getnote(data, offset, &nhdr, &name_off,
+ &desc_off)) > 0; offset = next) {
+ if (nhdr.n_namesz == sizeof(SDT_NOTE_NAME) &&
+ !memcmp(data->d_buf + name_off, SDT_NOTE_NAME,
+ sizeof(SDT_NOTE_NAME))) {
+ /* Check the type of the note */
+ if (nhdr.n_type != SDT_NOTE_TYPE)
+ goto out_ret;
+
+ ret = populate_sdt_note(&elf, ((data->d_buf) + desc_off),
+ nhdr.n_descsz, sdt_notes);
+ if (ret < 0)
+ goto out_ret;
+ }
+ }
+ if (list_empty(sdt_notes))
+ ret = -ENOENT;
+
+out_ret:
+ return ret;
+}
+
+/**
+ * get_sdt_note_list : Wrapper to construct a list of sdt notes
+ * @head : empty list_head
+ * @target : file to find SDT notes from
+ *
+ * This opens the file, initializes
+ * the ELF and then calls construct_sdt_notes_list.
+ */
+int get_sdt_note_list(struct list_head *head, const char *target)
+{
+ Elf *elf;
+ int fd, ret;
+
+ fd = open(target, O_RDONLY);
+ if (fd < 0)
+ return -EBADF;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (!elf) {
+ ret = -EBADF;
+ goto out_close;
+ }
+ ret = construct_sdt_notes_list(elf, head);
+ elf_end(elf);
+out_close:
+ close(fd);
+ return ret;
+}
+
+/**
+ * cleanup_sdt_note_list : free the sdt notes' list
+ * @sdt_notes: sdt notes' list
+ *
+ * Free up the SDT notes in @sdt_notes.
+ * Returns the number of SDT notes free'd.
+ */
+int cleanup_sdt_note_list(struct list_head *sdt_notes)
+{
+ struct sdt_note *tmp, *pos;
+ int nr_free = 0;
+
+ list_for_each_entry_safe(pos, tmp, sdt_notes, note_list) {
+ list_del(&pos->note_list);
+ free(pos->name);
+ free(pos->provider);
+ free(pos);
+ nr_free++;
+ }
+ return nr_free;
+}
+
+/**
+ * sdt_notes__get_count: Counts the number of sdt events
+ * @start: list_head to sdt_notes list
+ *
+ * Returns the number of SDT notes in a list
+ */
+int sdt_notes__get_count(struct list_head *start)
+{
+ struct sdt_note *sdt_ptr;
+ int count = 0;
+
+ list_for_each_entry(sdt_ptr, start, note_list)
+ count++;
+ return count;
+}
+#endif
+
+void symbol__elf_init(void)
+{
+ elf_version(EV_CURRENT);
+}
diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
new file mode 100644
index 000000000..7119df77d
--- /dev/null
+++ b/tools/perf/util/symbol-minimal.c
@@ -0,0 +1,383 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "symbol.h"
+#include "util.h"
+
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <byteswap.h>
+#include <sys/stat.h>
+
+
+static bool check_need_swap(int file_endian)
+{
+ const int data = 1;
+ u8 *check = (u8 *)&data;
+ int host_endian;
+
+ if (check[0] == 1)
+ host_endian = ELFDATA2LSB;
+ else
+ host_endian = ELFDATA2MSB;
+
+ return host_endian != file_endian;
+}
+
+#define NOTE_ALIGN(sz) (((sz) + 3) & ~3)
+
+#define NT_GNU_BUILD_ID 3
+
+static int read_build_id(void *note_data, size_t note_len, void *bf,
+ size_t size, bool need_swap)
+{
+ struct {
+ u32 n_namesz;
+ u32 n_descsz;
+ u32 n_type;
+ } *nhdr;
+ void *ptr;
+
+ ptr = note_data;
+ while (ptr < (note_data + note_len)) {
+ const char *name;
+ size_t namesz, descsz;
+
+ nhdr = ptr;
+ if (need_swap) {
+ nhdr->n_namesz = bswap_32(nhdr->n_namesz);
+ nhdr->n_descsz = bswap_32(nhdr->n_descsz);
+ nhdr->n_type = bswap_32(nhdr->n_type);
+ }
+
+ namesz = NOTE_ALIGN(nhdr->n_namesz);
+ descsz = NOTE_ALIGN(nhdr->n_descsz);
+
+ ptr += sizeof(*nhdr);
+ name = ptr;
+ ptr += namesz;
+ if (nhdr->n_type == NT_GNU_BUILD_ID &&
+ nhdr->n_namesz == sizeof("GNU")) {
+ if (memcmp(name, "GNU", sizeof("GNU")) == 0) {
+ size_t sz = min(size, descsz);
+ memcpy(bf, ptr, sz);
+ memset(bf + sz, 0, size - sz);
+ return 0;
+ }
+ }
+ ptr += descsz;
+ }
+
+ return -1;
+}
+
+int filename__read_debuglink(const char *filename __maybe_unused,
+ char *debuglink __maybe_unused,
+ size_t size __maybe_unused)
+{
+ return -1;
+}
+
+/*
+ * Just try PT_NOTE header otherwise fails
+ */
+int filename__read_build_id(const char *filename, void *bf, size_t size)
+{
+ FILE *fp;
+ int ret = -1;
+ bool need_swap = false;
+ u8 e_ident[EI_NIDENT];
+ size_t buf_size;
+ void *buf;
+ int i;
+
+ fp = fopen(filename, "r");
+ if (fp == NULL)
+ return -1;
+
+ if (fread(e_ident, sizeof(e_ident), 1, fp) != 1)
+ goto out;
+
+ if (memcmp(e_ident, ELFMAG, SELFMAG) ||
+ e_ident[EI_VERSION] != EV_CURRENT)
+ goto out;
+
+ need_swap = check_need_swap(e_ident[EI_DATA]);
+
+ /* for simplicity */
+ fseek(fp, 0, SEEK_SET);
+
+ if (e_ident[EI_CLASS] == ELFCLASS32) {
+ Elf32_Ehdr ehdr;
+ Elf32_Phdr *phdr;
+
+ if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
+ goto out;
+
+ if (need_swap) {
+ ehdr.e_phoff = bswap_32(ehdr.e_phoff);
+ ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
+ ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+ }
+
+ buf_size = ehdr.e_phentsize * ehdr.e_phnum;
+ buf = malloc(buf_size);
+ if (buf == NULL)
+ goto out;
+
+ fseek(fp, ehdr.e_phoff, SEEK_SET);
+ if (fread(buf, buf_size, 1, fp) != 1)
+ goto out_free;
+
+ for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
+ void *tmp;
+ long offset;
+
+ if (need_swap) {
+ phdr->p_type = bswap_32(phdr->p_type);
+ phdr->p_offset = bswap_32(phdr->p_offset);
+ phdr->p_filesz = bswap_32(phdr->p_filesz);
+ }
+
+ if (phdr->p_type != PT_NOTE)
+ continue;
+
+ buf_size = phdr->p_filesz;
+ offset = phdr->p_offset;
+ tmp = realloc(buf, buf_size);
+ if (tmp == NULL)
+ goto out_free;
+
+ buf = tmp;
+ fseek(fp, offset, SEEK_SET);
+ if (fread(buf, buf_size, 1, fp) != 1)
+ goto out_free;
+
+ ret = read_build_id(buf, buf_size, bf, size, need_swap);
+ if (ret == 0)
+ ret = size;
+ break;
+ }
+ } else {
+ Elf64_Ehdr ehdr;
+ Elf64_Phdr *phdr;
+
+ if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1)
+ goto out;
+
+ if (need_swap) {
+ ehdr.e_phoff = bswap_64(ehdr.e_phoff);
+ ehdr.e_phentsize = bswap_16(ehdr.e_phentsize);
+ ehdr.e_phnum = bswap_16(ehdr.e_phnum);
+ }
+
+ buf_size = ehdr.e_phentsize * ehdr.e_phnum;
+ buf = malloc(buf_size);
+ if (buf == NULL)
+ goto out;
+
+ fseek(fp, ehdr.e_phoff, SEEK_SET);
+ if (fread(buf, buf_size, 1, fp) != 1)
+ goto out_free;
+
+ for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
+ void *tmp;
+ long offset;
+
+ if (need_swap) {
+ phdr->p_type = bswap_32(phdr->p_type);
+ phdr->p_offset = bswap_64(phdr->p_offset);
+ phdr->p_filesz = bswap_64(phdr->p_filesz);
+ }
+
+ if (phdr->p_type != PT_NOTE)
+ continue;
+
+ buf_size = phdr->p_filesz;
+ offset = phdr->p_offset;
+ tmp = realloc(buf, buf_size);
+ if (tmp == NULL)
+ goto out_free;
+
+ buf = tmp;
+ fseek(fp, offset, SEEK_SET);
+ if (fread(buf, buf_size, 1, fp) != 1)
+ goto out_free;
+
+ ret = read_build_id(buf, buf_size, bf, size, need_swap);
+ if (ret == 0)
+ ret = size;
+ break;
+ }
+ }
+out_free:
+ free(buf);
+out:
+ fclose(fp);
+ return ret;
+}
+
+int sysfs__read_build_id(const char *filename, void *build_id, size_t size)
+{
+ int fd;
+ int ret = -1;
+ struct stat stbuf;
+ size_t buf_size;
+ void *buf;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ if (fstat(fd, &stbuf) < 0)
+ goto out;
+
+ buf_size = stbuf.st_size;
+ buf = malloc(buf_size);
+ if (buf == NULL)
+ goto out;
+
+ if (read(fd, buf, buf_size) != (ssize_t) buf_size)
+ goto out_free;
+
+ ret = read_build_id(buf, buf_size, build_id, size, false);
+out_free:
+ free(buf);
+out:
+ close(fd);
+ return ret;
+}
+
+int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
+ enum dso_binary_type type)
+{
+ int fd = open(name, O_RDONLY);
+ if (fd < 0)
+ goto out_errno;
+
+ ss->name = strdup(name);
+ if (!ss->name)
+ goto out_close;
+
+ ss->fd = fd;
+ ss->type = type;
+
+ return 0;
+out_close:
+ close(fd);
+out_errno:
+ dso->load_errno = errno;
+ return -1;
+}
+
+bool symsrc__possibly_runtime(struct symsrc *ss __maybe_unused)
+{
+ /* Assume all sym sources could be a runtime image. */
+ return true;
+}
+
+bool symsrc__has_symtab(struct symsrc *ss __maybe_unused)
+{
+ return false;
+}
+
+void symsrc__destroy(struct symsrc *ss)
+{
+ zfree(&ss->name);
+ close(ss->fd);
+}
+
+int dso__synthesize_plt_symbols(struct dso *dso __maybe_unused,
+ struct symsrc *ss __maybe_unused)
+{
+ return 0;
+}
+
+static int fd__is_64_bit(int fd)
+{
+ u8 e_ident[EI_NIDENT];
+
+ if (lseek(fd, 0, SEEK_SET))
+ return -1;
+
+ if (readn(fd, e_ident, sizeof(e_ident)) != sizeof(e_ident))
+ return -1;
+
+ if (memcmp(e_ident, ELFMAG, SELFMAG) ||
+ e_ident[EI_VERSION] != EV_CURRENT)
+ return -1;
+
+ return e_ident[EI_CLASS] == ELFCLASS64;
+}
+
+enum dso_type dso__type_fd(int fd)
+{
+ Elf64_Ehdr ehdr;
+ int ret;
+
+ ret = fd__is_64_bit(fd);
+ if (ret < 0)
+ return DSO__TYPE_UNKNOWN;
+
+ if (ret)
+ return DSO__TYPE_64BIT;
+
+ if (readn(fd, &ehdr, sizeof(ehdr)) != sizeof(ehdr))
+ return DSO__TYPE_UNKNOWN;
+
+ if (ehdr.e_machine == EM_X86_64)
+ return DSO__TYPE_X32BIT;
+
+ return DSO__TYPE_32BIT;
+}
+
+int dso__load_sym(struct dso *dso, struct map *map __maybe_unused,
+ struct symsrc *ss,
+ struct symsrc *runtime_ss __maybe_unused,
+ int kmodule __maybe_unused)
+{
+ unsigned char build_id[BUILD_ID_SIZE];
+ int ret;
+
+ ret = fd__is_64_bit(ss->fd);
+ if (ret >= 0)
+ dso->is_64_bit = ret;
+
+ if (filename__read_build_id(ss->name, build_id, BUILD_ID_SIZE) > 0) {
+ dso__set_build_id(dso, build_id);
+ }
+ return 0;
+}
+
+int file__read_maps(int fd __maybe_unused, bool exe __maybe_unused,
+ mapfn_t mapfn __maybe_unused, void *data __maybe_unused,
+ bool *is_64_bit __maybe_unused)
+{
+ return -1;
+}
+
+int kcore_extract__create(struct kcore_extract *kce __maybe_unused)
+{
+ return -1;
+}
+
+void kcore_extract__delete(struct kcore_extract *kce __maybe_unused)
+{
+}
+
+int kcore_copy(const char *from_dir __maybe_unused,
+ const char *to_dir __maybe_unused)
+{
+ return -1;
+}
+
+void symbol__elf_init(void)
+{
+}
+
+char *dso__demangle_sym(struct dso *dso __maybe_unused,
+ int kmodule __maybe_unused,
+ const char *elf_name __maybe_unused)
+{
+ return NULL;
+}
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
new file mode 100644
index 000000000..6abd1dce1
--- /dev/null
+++ b/tools/perf/util/symbol.c
@@ -0,0 +1,2257 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include <linux/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include "annotate.h"
+#include "build-id.h"
+#include "util.h"
+#include "debug.h"
+#include "machine.h"
+#include "symbol.h"
+#include "strlist.h"
+#include "intlist.h"
+#include "namespaces.h"
+#include "header.h"
+#include "path.h"
+#include "sane_ctype.h"
+
+#include <elf.h>
+#include <limits.h>
+#include <symbol/kallsyms.h>
+#include <sys/utsname.h>
+
+static int dso__load_kernel_sym(struct dso *dso, struct map *map);
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map);
+static bool symbol__is_idle(const char *name);
+
+int vmlinux_path__nr_entries;
+char **vmlinux_path;
+
+struct symbol_conf symbol_conf = {
+ .use_modules = true,
+ .try_vmlinux_path = true,
+ .demangle = true,
+ .demangle_kernel = false,
+ .cumulate_callchain = true,
+ .show_hist_headers = true,
+ .symfs = "",
+ .event_group = true,
+ .inline_name = true,
+};
+
+static enum dso_binary_type binary_type_symtab[] = {
+ DSO_BINARY_TYPE__KALLSYMS,
+ DSO_BINARY_TYPE__GUEST_KALLSYMS,
+ DSO_BINARY_TYPE__JAVA_JIT,
+ DSO_BINARY_TYPE__DEBUGLINK,
+ DSO_BINARY_TYPE__BUILD_ID_CACHE,
+ DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
+ DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
+ DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
+ DSO_BINARY_TYPE__GUEST_KMODULE,
+ DSO_BINARY_TYPE__GUEST_KMODULE_COMP,
+ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
+ DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
+ DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__NOT_FOUND,
+};
+
+#define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab)
+
+static bool symbol_type__filter(char symbol_type)
+{
+ symbol_type = toupper(symbol_type);
+ return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B';
+}
+
+static int prefix_underscores_count(const char *str)
+{
+ const char *tail = str;
+
+ while (*tail == '_')
+ tail++;
+
+ return tail - str;
+}
+
+void __weak arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
+{
+ p->end = c->start;
+}
+
+const char * __weak arch__normalize_symbol_name(const char *name)
+{
+ return name;
+}
+
+int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+ return strcmp(namea, nameb);
+}
+
+int __weak arch__compare_symbol_names_n(const char *namea, const char *nameb,
+ unsigned int n)
+{
+ return strncmp(namea, nameb, n);
+}
+
+int __weak arch__choose_best_symbol(struct symbol *syma,
+ struct symbol *symb __maybe_unused)
+{
+ /* Avoid "SyS" kernel syscall aliases */
+ if (strlen(syma->name) >= 3 && !strncmp(syma->name, "SyS", 3))
+ return SYMBOL_B;
+ if (strlen(syma->name) >= 10 && !strncmp(syma->name, "compat_SyS", 10))
+ return SYMBOL_B;
+
+ return SYMBOL_A;
+}
+
+static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
+{
+ s64 a;
+ s64 b;
+ size_t na, nb;
+
+ /* Prefer a symbol with non zero length */
+ a = syma->end - syma->start;
+ b = symb->end - symb->start;
+ if ((b == 0) && (a > 0))
+ return SYMBOL_A;
+ else if ((a == 0) && (b > 0))
+ return SYMBOL_B;
+
+ /* Prefer a non weak symbol over a weak one */
+ a = syma->binding == STB_WEAK;
+ b = symb->binding == STB_WEAK;
+ if (b && !a)
+ return SYMBOL_A;
+ if (a && !b)
+ return SYMBOL_B;
+
+ /* Prefer a global symbol over a non global one */
+ a = syma->binding == STB_GLOBAL;
+ b = symb->binding == STB_GLOBAL;
+ if (a && !b)
+ return SYMBOL_A;
+ if (b && !a)
+ return SYMBOL_B;
+
+ /* Prefer a symbol with less underscores */
+ a = prefix_underscores_count(syma->name);
+ b = prefix_underscores_count(symb->name);
+ if (b > a)
+ return SYMBOL_A;
+ else if (a > b)
+ return SYMBOL_B;
+
+ /* Choose the symbol with the longest name */
+ na = strlen(syma->name);
+ nb = strlen(symb->name);
+ if (na > nb)
+ return SYMBOL_A;
+ else if (na < nb)
+ return SYMBOL_B;
+
+ return arch__choose_best_symbol(syma, symb);
+}
+
+void symbols__fixup_duplicate(struct rb_root *symbols)
+{
+ struct rb_node *nd;
+ struct symbol *curr, *next;
+
+ if (symbol_conf.allow_aliases)
+ return;
+
+ nd = rb_first(symbols);
+
+ while (nd) {
+ curr = rb_entry(nd, struct symbol, rb_node);
+again:
+ nd = rb_next(&curr->rb_node);
+ next = rb_entry(nd, struct symbol, rb_node);
+
+ if (!nd)
+ break;
+
+ if (curr->start != next->start)
+ continue;
+
+ if (choose_best_symbol(curr, next) == SYMBOL_A) {
+ rb_erase(&next->rb_node, symbols);
+ symbol__delete(next);
+ goto again;
+ } else {
+ nd = rb_next(&curr->rb_node);
+ rb_erase(&curr->rb_node, symbols);
+ symbol__delete(curr);
+ }
+ }
+}
+
+void symbols__fixup_end(struct rb_root *symbols)
+{
+ struct rb_node *nd, *prevnd = rb_first(symbols);
+ struct symbol *curr, *prev;
+
+ if (prevnd == NULL)
+ return;
+
+ curr = rb_entry(prevnd, struct symbol, rb_node);
+
+ for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
+ prev = curr;
+ curr = rb_entry(nd, struct symbol, rb_node);
+
+ if (prev->end == prev->start || prev->end != curr->start)
+ arch__symbols__fixup_end(prev, curr);
+ }
+
+ /* Last entry */
+ if (curr->end == curr->start)
+ curr->end = roundup(curr->start, 4096) + 4096;
+}
+
+void map_groups__fixup_end(struct map_groups *mg)
+{
+ struct maps *maps = &mg->maps;
+ struct map *next, *curr;
+
+ down_write(&maps->lock);
+
+ curr = maps__first(maps);
+ if (curr == NULL)
+ goto out_unlock;
+
+ for (next = map__next(curr); next; next = map__next(curr)) {
+ if (!curr->end)
+ curr->end = next->start;
+ curr = next;
+ }
+
+ /*
+ * We still haven't the actual symbols, so guess the
+ * last map final address.
+ */
+ if (!curr->end)
+ curr->end = ~0ULL;
+
+out_unlock:
+ up_write(&maps->lock);
+}
+
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name)
+{
+ size_t namelen = strlen(name) + 1;
+ struct symbol *sym = calloc(1, (symbol_conf.priv_size +
+ sizeof(*sym) + namelen));
+ if (sym == NULL)
+ return NULL;
+
+ if (symbol_conf.priv_size) {
+ if (symbol_conf.init_annotation) {
+ struct annotation *notes = (void *)sym;
+ pthread_mutex_init(&notes->lock, NULL);
+ }
+ sym = ((void *)sym) + symbol_conf.priv_size;
+ }
+
+ sym->start = start;
+ sym->end = len ? start + len : start;
+ sym->type = type;
+ sym->binding = binding;
+ sym->namelen = namelen - 1;
+
+ pr_debug4("%s: %s %#" PRIx64 "-%#" PRIx64 "\n",
+ __func__, name, start, sym->end);
+ memcpy(sym->name, name, namelen);
+
+ return sym;
+}
+
+void symbol__delete(struct symbol *sym)
+{
+ free(((void *)sym) - symbol_conf.priv_size);
+}
+
+void symbols__delete(struct rb_root *symbols)
+{
+ struct symbol *pos;
+ struct rb_node *next = rb_first(symbols);
+
+ while (next) {
+ pos = rb_entry(next, struct symbol, rb_node);
+ next = rb_next(&pos->rb_node);
+ rb_erase(&pos->rb_node, symbols);
+ symbol__delete(pos);
+ }
+}
+
+void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel)
+{
+ struct rb_node **p = &symbols->rb_node;
+ struct rb_node *parent = NULL;
+ const u64 ip = sym->start;
+ struct symbol *s;
+
+ if (kernel) {
+ const char *name = sym->name;
+ /*
+ * ppc64 uses function descriptors and appends a '.' to the
+ * start of every instruction address. Remove it.
+ */
+ if (name[0] == '.')
+ name++;
+ sym->idle = symbol__is_idle(name);
+ }
+
+ while (*p != NULL) {
+ parent = *p;
+ s = rb_entry(parent, struct symbol, rb_node);
+ if (ip < s->start)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&sym->rb_node, parent, p);
+ rb_insert_color(&sym->rb_node, symbols);
+}
+
+void symbols__insert(struct rb_root *symbols, struct symbol *sym)
+{
+ __symbols__insert(symbols, sym, false);
+}
+
+static struct symbol *symbols__find(struct rb_root *symbols, u64 ip)
+{
+ struct rb_node *n;
+
+ if (symbols == NULL)
+ return NULL;
+
+ n = symbols->rb_node;
+
+ while (n) {
+ struct symbol *s = rb_entry(n, struct symbol, rb_node);
+
+ if (ip < s->start)
+ n = n->rb_left;
+ else if (ip > s->end || (ip == s->end && ip != s->start))
+ n = n->rb_right;
+ else
+ return s;
+ }
+
+ return NULL;
+}
+
+static struct symbol *symbols__first(struct rb_root *symbols)
+{
+ struct rb_node *n = rb_first(symbols);
+
+ if (n)
+ return rb_entry(n, struct symbol, rb_node);
+
+ return NULL;
+}
+
+static struct symbol *symbols__last(struct rb_root *symbols)
+{
+ struct rb_node *n = rb_last(symbols);
+
+ if (n)
+ return rb_entry(n, struct symbol, rb_node);
+
+ return NULL;
+}
+
+static struct symbol *symbols__next(struct symbol *sym)
+{
+ struct rb_node *n = rb_next(&sym->rb_node);
+
+ if (n)
+ return rb_entry(n, struct symbol, rb_node);
+
+ return NULL;
+}
+
+static void symbols__insert_by_name(struct rb_root *symbols, struct symbol *sym)
+{
+ struct rb_node **p = &symbols->rb_node;
+ struct rb_node *parent = NULL;
+ struct symbol_name_rb_node *symn, *s;
+
+ symn = container_of(sym, struct symbol_name_rb_node, sym);
+
+ while (*p != NULL) {
+ parent = *p;
+ s = rb_entry(parent, struct symbol_name_rb_node, rb_node);
+ if (strcmp(sym->name, s->sym.name) < 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&symn->rb_node, parent, p);
+ rb_insert_color(&symn->rb_node, symbols);
+}
+
+static void symbols__sort_by_name(struct rb_root *symbols,
+ struct rb_root *source)
+{
+ struct rb_node *nd;
+
+ for (nd = rb_first(source); nd; nd = rb_next(nd)) {
+ struct symbol *pos = rb_entry(nd, struct symbol, rb_node);
+ symbols__insert_by_name(symbols, pos);
+ }
+}
+
+int symbol__match_symbol_name(const char *name, const char *str,
+ enum symbol_tag_include includes)
+{
+ const char *versioning;
+
+ if (includes == SYMBOL_TAG_INCLUDE__DEFAULT_ONLY &&
+ (versioning = strstr(name, "@@"))) {
+ int len = strlen(str);
+
+ if (len < versioning - name)
+ len = versioning - name;
+
+ return arch__compare_symbol_names_n(name, str, len);
+ } else
+ return arch__compare_symbol_names(name, str);
+}
+
+static struct symbol *symbols__find_by_name(struct rb_root *symbols,
+ const char *name,
+ enum symbol_tag_include includes)
+{
+ struct rb_node *n;
+ struct symbol_name_rb_node *s = NULL;
+
+ if (symbols == NULL)
+ return NULL;
+
+ n = symbols->rb_node;
+
+ while (n) {
+ int cmp;
+
+ s = rb_entry(n, struct symbol_name_rb_node, rb_node);
+ cmp = symbol__match_symbol_name(s->sym.name, name, includes);
+
+ if (cmp > 0)
+ n = n->rb_left;
+ else if (cmp < 0)
+ n = n->rb_right;
+ else
+ break;
+ }
+
+ if (n == NULL)
+ return NULL;
+
+ if (includes != SYMBOL_TAG_INCLUDE__DEFAULT_ONLY)
+ /* return first symbol that has same name (if any) */
+ for (n = rb_prev(n); n; n = rb_prev(n)) {
+ struct symbol_name_rb_node *tmp;
+
+ tmp = rb_entry(n, struct symbol_name_rb_node, rb_node);
+ if (arch__compare_symbol_names(tmp->sym.name, s->sym.name))
+ break;
+
+ s = tmp;
+ }
+
+ return &s->sym;
+}
+
+void dso__reset_find_symbol_cache(struct dso *dso)
+{
+ dso->last_find_result.addr = 0;
+ dso->last_find_result.symbol = NULL;
+}
+
+void dso__insert_symbol(struct dso *dso, struct symbol *sym)
+{
+ __symbols__insert(&dso->symbols, sym, dso->kernel);
+
+ /* update the symbol cache if necessary */
+ if (dso->last_find_result.addr >= sym->start &&
+ (dso->last_find_result.addr < sym->end ||
+ sym->start == sym->end)) {
+ dso->last_find_result.symbol = sym;
+ }
+}
+
+struct symbol *dso__find_symbol(struct dso *dso, u64 addr)
+{
+ if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) {
+ dso->last_find_result.addr = addr;
+ dso->last_find_result.symbol = symbols__find(&dso->symbols, addr);
+ }
+
+ return dso->last_find_result.symbol;
+}
+
+struct symbol *dso__first_symbol(struct dso *dso)
+{
+ return symbols__first(&dso->symbols);
+}
+
+struct symbol *dso__last_symbol(struct dso *dso)
+{
+ return symbols__last(&dso->symbols);
+}
+
+struct symbol *dso__next_symbol(struct symbol *sym)
+{
+ return symbols__next(sym);
+}
+
+struct symbol *symbol__next_by_name(struct symbol *sym)
+{
+ struct symbol_name_rb_node *s = container_of(sym, struct symbol_name_rb_node, sym);
+ struct rb_node *n = rb_next(&s->rb_node);
+
+ return n ? &rb_entry(n, struct symbol_name_rb_node, rb_node)->sym : NULL;
+}
+
+ /*
+ * Returns first symbol that matched with @name.
+ */
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name)
+{
+ struct symbol *s = symbols__find_by_name(&dso->symbol_names, name,
+ SYMBOL_TAG_INCLUDE__NONE);
+ if (!s)
+ s = symbols__find_by_name(&dso->symbol_names, name,
+ SYMBOL_TAG_INCLUDE__DEFAULT_ONLY);
+ return s;
+}
+
+void dso__sort_by_name(struct dso *dso)
+{
+ dso__set_sorted_by_name(dso);
+ return symbols__sort_by_name(&dso->symbol_names, &dso->symbols);
+}
+
+int modules__parse(const char *filename, void *arg,
+ int (*process_module)(void *arg, const char *name,
+ u64 start, u64 size))
+{
+ char *line = NULL;
+ size_t n;
+ FILE *file;
+ int err = 0;
+
+ file = fopen(filename, "r");
+ if (file == NULL)
+ return -1;
+
+ while (1) {
+ char name[PATH_MAX];
+ u64 start, size;
+ char *sep, *endptr;
+ ssize_t line_len;
+
+ line_len = getline(&line, &n, file);
+ if (line_len < 0) {
+ if (feof(file))
+ break;
+ err = -1;
+ goto out;
+ }
+
+ if (!line) {
+ err = -1;
+ goto out;
+ }
+
+ line[--line_len] = '\0'; /* \n */
+
+ sep = strrchr(line, 'x');
+ if (sep == NULL)
+ continue;
+
+ hex2u64(sep + 1, &start);
+
+ sep = strchr(line, ' ');
+ if (sep == NULL)
+ continue;
+
+ *sep = '\0';
+
+ scnprintf(name, sizeof(name), "[%s]", line);
+
+ size = strtoul(sep + 1, &endptr, 0);
+ if (*endptr != ' ' && *endptr != '\t')
+ continue;
+
+ err = process_module(arg, name, start, size);
+ if (err)
+ break;
+ }
+out:
+ free(line);
+ fclose(file);
+ return err;
+}
+
+/*
+ * These are symbols in the kernel image, so make sure that
+ * sym is from a kernel DSO.
+ */
+static bool symbol__is_idle(const char *name)
+{
+ const char * const idle_symbols[] = {
+ "cpu_idle",
+ "cpu_startup_entry",
+ "intel_idle",
+ "default_idle",
+ "native_safe_halt",
+ "enter_idle",
+ "exit_idle",
+ "mwait_idle",
+ "mwait_idle_with_hints",
+ "poll_idle",
+ "ppc64_runlatch_off",
+ "pseries_dedicated_idle_sleep",
+ NULL
+ };
+ int i;
+
+ for (i = 0; idle_symbols[i]; i++) {
+ if (!strcmp(idle_symbols[i], name))
+ return true;
+ }
+
+ return false;
+}
+
+static int map__process_kallsym_symbol(void *arg, const char *name,
+ char type, u64 start)
+{
+ struct symbol *sym;
+ struct dso *dso = arg;
+ struct rb_root *root = &dso->symbols;
+
+ if (!symbol_type__filter(type))
+ return 0;
+
+ /*
+ * module symbols are not sorted so we add all
+ * symbols, setting length to 0, and rely on
+ * symbols__fixup_end() to fix it up.
+ */
+ sym = symbol__new(start, 0, kallsyms2elf_binding(type), kallsyms2elf_type(type), name);
+ if (sym == NULL)
+ return -ENOMEM;
+ /*
+ * We will pass the symbols to the filter later, in
+ * map__split_kallsyms, when we have split the maps per module
+ */
+ __symbols__insert(root, sym, !strchr(name, '['));
+
+ return 0;
+}
+
+/*
+ * Loads the function entries in /proc/kallsyms into kernel_map->dso,
+ * so that we can in the next step set the symbol ->end address and then
+ * call kernel_maps__split_kallsyms.
+ */
+static int dso__load_all_kallsyms(struct dso *dso, const char *filename)
+{
+ return kallsyms__parse(filename, dso, map__process_kallsym_symbol);
+}
+
+static int map_groups__split_kallsyms_for_kcore(struct map_groups *kmaps, struct dso *dso)
+{
+ struct map *curr_map;
+ struct symbol *pos;
+ int count = 0;
+ struct rb_root old_root = dso->symbols;
+ struct rb_root *root = &dso->symbols;
+ struct rb_node *next = rb_first(root);
+
+ if (!kmaps)
+ return -1;
+
+ *root = RB_ROOT;
+
+ while (next) {
+ char *module;
+
+ pos = rb_entry(next, struct symbol, rb_node);
+ next = rb_next(&pos->rb_node);
+
+ rb_erase_init(&pos->rb_node, &old_root);
+
+ module = strchr(pos->name, '\t');
+ if (module)
+ *module = '\0';
+
+ curr_map = map_groups__find(kmaps, pos->start);
+
+ if (!curr_map) {
+ symbol__delete(pos);
+ continue;
+ }
+
+ pos->start -= curr_map->start - curr_map->pgoff;
+ if (pos->end > curr_map->end)
+ pos->end = curr_map->end;
+ if (pos->end)
+ pos->end -= curr_map->start - curr_map->pgoff;
+ symbols__insert(&curr_map->dso->symbols, pos);
+ ++count;
+ }
+
+ /* Symbols have been adjusted */
+ dso->adjust_symbols = 1;
+
+ return count;
+}
+
+/*
+ * Split the symbols into maps, making sure there are no overlaps, i.e. the
+ * kernel range is broken in several maps, named [kernel].N, as we don't have
+ * the original ELF section names vmlinux have.
+ */
+static int map_groups__split_kallsyms(struct map_groups *kmaps, struct dso *dso, u64 delta,
+ struct map *initial_map)
+{
+ struct machine *machine;
+ struct map *curr_map = initial_map;
+ struct symbol *pos;
+ int count = 0, moved = 0;
+ struct rb_root *root = &dso->symbols;
+ struct rb_node *next = rb_first(root);
+ int kernel_range = 0;
+ bool x86_64;
+
+ if (!kmaps)
+ return -1;
+
+ machine = kmaps->machine;
+
+ x86_64 = machine__is(machine, "x86_64");
+
+ while (next) {
+ char *module;
+
+ pos = rb_entry(next, struct symbol, rb_node);
+ next = rb_next(&pos->rb_node);
+
+ module = strchr(pos->name, '\t');
+ if (module) {
+ if (!symbol_conf.use_modules)
+ goto discard_symbol;
+
+ *module++ = '\0';
+
+ if (strcmp(curr_map->dso->short_name, module)) {
+ if (curr_map != initial_map &&
+ dso->kernel == DSO_TYPE_GUEST_KERNEL &&
+ machine__is_default_guest(machine)) {
+ /*
+ * We assume all symbols of a module are
+ * continuous in * kallsyms, so curr_map
+ * points to a module and all its
+ * symbols are in its kmap. Mark it as
+ * loaded.
+ */
+ dso__set_loaded(curr_map->dso);
+ }
+
+ curr_map = map_groups__find_by_name(kmaps, module);
+ if (curr_map == NULL) {
+ pr_debug("%s/proc/{kallsyms,modules} "
+ "inconsistency while looking "
+ "for \"%s\" module!\n",
+ machine->root_dir, module);
+ curr_map = initial_map;
+ goto discard_symbol;
+ }
+
+ if (curr_map->dso->loaded &&
+ !machine__is_default_guest(machine))
+ goto discard_symbol;
+ }
+ /*
+ * So that we look just like we get from .ko files,
+ * i.e. not prelinked, relative to initial_map->start.
+ */
+ pos->start = curr_map->map_ip(curr_map, pos->start);
+ pos->end = curr_map->map_ip(curr_map, pos->end);
+ } else if (x86_64 && is_entry_trampoline(pos->name)) {
+ /*
+ * These symbols are not needed anymore since the
+ * trampoline maps refer to the text section and it's
+ * symbols instead. Avoid having to deal with
+ * relocations, and the assumption that the first symbol
+ * is the start of kernel text, by simply removing the
+ * symbols at this point.
+ */
+ goto discard_symbol;
+ } else if (curr_map != initial_map) {
+ char dso_name[PATH_MAX];
+ struct dso *ndso;
+
+ if (delta) {
+ /* Kernel was relocated at boot time */
+ pos->start -= delta;
+ pos->end -= delta;
+ }
+
+ if (count == 0) {
+ curr_map = initial_map;
+ goto add_symbol;
+ }
+
+ if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ snprintf(dso_name, sizeof(dso_name),
+ "[guest.kernel].%d",
+ kernel_range++);
+ else
+ snprintf(dso_name, sizeof(dso_name),
+ "[kernel].%d",
+ kernel_range++);
+
+ ndso = dso__new(dso_name);
+ if (ndso == NULL)
+ return -1;
+
+ ndso->kernel = dso->kernel;
+
+ curr_map = map__new2(pos->start, ndso);
+ if (curr_map == NULL) {
+ dso__put(ndso);
+ return -1;
+ }
+
+ curr_map->map_ip = curr_map->unmap_ip = identity__map_ip;
+ map_groups__insert(kmaps, curr_map);
+ ++kernel_range;
+ } else if (delta) {
+ /* Kernel was relocated at boot time */
+ pos->start -= delta;
+ pos->end -= delta;
+ }
+add_symbol:
+ if (curr_map != initial_map) {
+ rb_erase(&pos->rb_node, root);
+ symbols__insert(&curr_map->dso->symbols, pos);
+ ++moved;
+ } else
+ ++count;
+
+ continue;
+discard_symbol:
+ rb_erase(&pos->rb_node, root);
+ symbol__delete(pos);
+ }
+
+ if (curr_map != initial_map &&
+ dso->kernel == DSO_TYPE_GUEST_KERNEL &&
+ machine__is_default_guest(kmaps->machine)) {
+ dso__set_loaded(curr_map->dso);
+ }
+
+ return count + moved;
+}
+
+bool symbol__restricted_filename(const char *filename,
+ const char *restricted_filename)
+{
+ bool restricted = false;
+
+ if (symbol_conf.kptr_restrict) {
+ char *r = realpath(filename, NULL);
+
+ if (r != NULL) {
+ restricted = strcmp(r, restricted_filename) == 0;
+ free(r);
+ return restricted;
+ }
+ }
+
+ return restricted;
+}
+
+struct module_info {
+ struct rb_node rb_node;
+ char *name;
+ u64 start;
+};
+
+static void add_module(struct module_info *mi, struct rb_root *modules)
+{
+ struct rb_node **p = &modules->rb_node;
+ struct rb_node *parent = NULL;
+ struct module_info *m;
+
+ while (*p != NULL) {
+ parent = *p;
+ m = rb_entry(parent, struct module_info, rb_node);
+ if (strcmp(mi->name, m->name) < 0)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+ rb_link_node(&mi->rb_node, parent, p);
+ rb_insert_color(&mi->rb_node, modules);
+}
+
+static void delete_modules(struct rb_root *modules)
+{
+ struct module_info *mi;
+ struct rb_node *next = rb_first(modules);
+
+ while (next) {
+ mi = rb_entry(next, struct module_info, rb_node);
+ next = rb_next(&mi->rb_node);
+ rb_erase(&mi->rb_node, modules);
+ zfree(&mi->name);
+ free(mi);
+ }
+}
+
+static struct module_info *find_module(const char *name,
+ struct rb_root *modules)
+{
+ struct rb_node *n = modules->rb_node;
+
+ while (n) {
+ struct module_info *m;
+ int cmp;
+
+ m = rb_entry(n, struct module_info, rb_node);
+ cmp = strcmp(name, m->name);
+ if (cmp < 0)
+ n = n->rb_left;
+ else if (cmp > 0)
+ n = n->rb_right;
+ else
+ return m;
+ }
+
+ return NULL;
+}
+
+static int __read_proc_modules(void *arg, const char *name, u64 start,
+ u64 size __maybe_unused)
+{
+ struct rb_root *modules = arg;
+ struct module_info *mi;
+
+ mi = zalloc(sizeof(struct module_info));
+ if (!mi)
+ return -ENOMEM;
+
+ mi->name = strdup(name);
+ mi->start = start;
+
+ if (!mi->name) {
+ free(mi);
+ return -ENOMEM;
+ }
+
+ add_module(mi, modules);
+
+ return 0;
+}
+
+static int read_proc_modules(const char *filename, struct rb_root *modules)
+{
+ if (symbol__restricted_filename(filename, "/proc/modules"))
+ return -1;
+
+ if (modules__parse(filename, modules, __read_proc_modules)) {
+ delete_modules(modules);
+ return -1;
+ }
+
+ return 0;
+}
+
+int compare_proc_modules(const char *from, const char *to)
+{
+ struct rb_root from_modules = RB_ROOT;
+ struct rb_root to_modules = RB_ROOT;
+ struct rb_node *from_node, *to_node;
+ struct module_info *from_m, *to_m;
+ int ret = -1;
+
+ if (read_proc_modules(from, &from_modules))
+ return -1;
+
+ if (read_proc_modules(to, &to_modules))
+ goto out_delete_from;
+
+ from_node = rb_first(&from_modules);
+ to_node = rb_first(&to_modules);
+ while (from_node) {
+ if (!to_node)
+ break;
+
+ from_m = rb_entry(from_node, struct module_info, rb_node);
+ to_m = rb_entry(to_node, struct module_info, rb_node);
+
+ if (from_m->start != to_m->start ||
+ strcmp(from_m->name, to_m->name))
+ break;
+
+ from_node = rb_next(from_node);
+ to_node = rb_next(to_node);
+ }
+
+ if (!from_node && !to_node)
+ ret = 0;
+
+ delete_modules(&to_modules);
+out_delete_from:
+ delete_modules(&from_modules);
+
+ return ret;
+}
+
+struct map *map_groups__first(struct map_groups *mg)
+{
+ return maps__first(&mg->maps);
+}
+
+static int do_validate_kcore_modules(const char *filename,
+ struct map_groups *kmaps)
+{
+ struct rb_root modules = RB_ROOT;
+ struct map *old_map;
+ int err;
+
+ err = read_proc_modules(filename, &modules);
+ if (err)
+ return err;
+
+ old_map = map_groups__first(kmaps);
+ while (old_map) {
+ struct map *next = map_groups__next(old_map);
+ struct module_info *mi;
+
+ if (!__map__is_kmodule(old_map)) {
+ old_map = next;
+ continue;
+ }
+
+ /* Module must be in memory at the same address */
+ mi = find_module(old_map->dso->short_name, &modules);
+ if (!mi || mi->start != old_map->start) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ old_map = next;
+ }
+out:
+ delete_modules(&modules);
+ return err;
+}
+
+/*
+ * If kallsyms is referenced by name then we look for filename in the same
+ * directory.
+ */
+static bool filename_from_kallsyms_filename(char *filename,
+ const char *base_name,
+ const char *kallsyms_filename)
+{
+ char *name;
+
+ strcpy(filename, kallsyms_filename);
+ name = strrchr(filename, '/');
+ if (!name)
+ return false;
+
+ name += 1;
+
+ if (!strcmp(name, "kallsyms")) {
+ strcpy(name, base_name);
+ return true;
+ }
+
+ return false;
+}
+
+static int validate_kcore_modules(const char *kallsyms_filename,
+ struct map *map)
+{
+ struct map_groups *kmaps = map__kmaps(map);
+ char modules_filename[PATH_MAX];
+
+ if (!kmaps)
+ return -EINVAL;
+
+ if (!filename_from_kallsyms_filename(modules_filename, "modules",
+ kallsyms_filename))
+ return -EINVAL;
+
+ if (do_validate_kcore_modules(modules_filename, kmaps))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int validate_kcore_addresses(const char *kallsyms_filename,
+ struct map *map)
+{
+ struct kmap *kmap = map__kmap(map);
+
+ if (!kmap)
+ return -EINVAL;
+
+ if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) {
+ u64 start;
+
+ if (kallsyms__get_function_start(kallsyms_filename,
+ kmap->ref_reloc_sym->name, &start))
+ return -ENOENT;
+ if (start != kmap->ref_reloc_sym->addr)
+ return -EINVAL;
+ }
+
+ return validate_kcore_modules(kallsyms_filename, map);
+}
+
+struct kcore_mapfn_data {
+ struct dso *dso;
+ struct list_head maps;
+};
+
+static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data)
+{
+ struct kcore_mapfn_data *md = data;
+ struct map *map;
+
+ map = map__new2(start, md->dso);
+ if (map == NULL)
+ return -ENOMEM;
+
+ map->end = map->start + len;
+ map->pgoff = pgoff;
+
+ list_add(&map->node, &md->maps);
+
+ return 0;
+}
+
+static int dso__load_kcore(struct dso *dso, struct map *map,
+ const char *kallsyms_filename)
+{
+ struct map_groups *kmaps = map__kmaps(map);
+ struct kcore_mapfn_data md;
+ struct map *old_map, *new_map, *replacement_map = NULL;
+ struct machine *machine;
+ bool is_64_bit;
+ int err, fd;
+ char kcore_filename[PATH_MAX];
+ u64 stext;
+
+ if (!kmaps)
+ return -EINVAL;
+
+ machine = kmaps->machine;
+
+ /* This function requires that the map is the kernel map */
+ if (!__map__is_kernel(map))
+ return -EINVAL;
+
+ if (!filename_from_kallsyms_filename(kcore_filename, "kcore",
+ kallsyms_filename))
+ return -EINVAL;
+
+ /* Modules and kernel must be present at their original addresses */
+ if (validate_kcore_addresses(kallsyms_filename, map))
+ return -EINVAL;
+
+ md.dso = dso;
+ INIT_LIST_HEAD(&md.maps);
+
+ fd = open(kcore_filename, O_RDONLY);
+ if (fd < 0) {
+ pr_debug("Failed to open %s. Note /proc/kcore requires CAP_SYS_RAWIO capability to access.\n",
+ kcore_filename);
+ return -EINVAL;
+ }
+
+ /* Read new maps into temporary lists */
+ err = file__read_maps(fd, map->prot & PROT_EXEC, kcore_mapfn, &md,
+ &is_64_bit);
+ if (err)
+ goto out_err;
+ dso->is_64_bit = is_64_bit;
+
+ if (list_empty(&md.maps)) {
+ err = -EINVAL;
+ goto out_err;
+ }
+
+ /* Remove old maps */
+ old_map = map_groups__first(kmaps);
+ while (old_map) {
+ struct map *next = map_groups__next(old_map);
+
+ if (old_map != map)
+ map_groups__remove(kmaps, old_map);
+ old_map = next;
+ }
+ machine->trampolines_mapped = false;
+
+ /* Find the kernel map using the '_stext' symbol */
+ if (!kallsyms__get_function_start(kallsyms_filename, "_stext", &stext)) {
+ list_for_each_entry(new_map, &md.maps, node) {
+ if (stext >= new_map->start && stext < new_map->end) {
+ replacement_map = new_map;
+ break;
+ }
+ }
+ }
+
+ if (!replacement_map)
+ replacement_map = list_entry(md.maps.next, struct map, node);
+
+ /* Add new maps */
+ while (!list_empty(&md.maps)) {
+ new_map = list_entry(md.maps.next, struct map, node);
+ list_del_init(&new_map->node);
+ if (new_map == replacement_map) {
+ map->start = new_map->start;
+ map->end = new_map->end;
+ map->pgoff = new_map->pgoff;
+ map->map_ip = new_map->map_ip;
+ map->unmap_ip = new_map->unmap_ip;
+ /* Ensure maps are correctly ordered */
+ map__get(map);
+ map_groups__remove(kmaps, map);
+ map_groups__insert(kmaps, map);
+ map__put(map);
+ } else {
+ map_groups__insert(kmaps, new_map);
+ }
+
+ map__put(new_map);
+ }
+
+ if (machine__is(machine, "x86_64")) {
+ u64 addr;
+
+ /*
+ * If one of the corresponding symbols is there, assume the
+ * entry trampoline maps are too.
+ */
+ if (!kallsyms__get_function_start(kallsyms_filename,
+ ENTRY_TRAMPOLINE_NAME,
+ &addr))
+ machine->trampolines_mapped = true;
+ }
+
+ /*
+ * Set the data type and long name so that kcore can be read via
+ * dso__data_read_addr().
+ */
+ if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ dso->binary_type = DSO_BINARY_TYPE__GUEST_KCORE;
+ else
+ dso->binary_type = DSO_BINARY_TYPE__KCORE;
+ dso__set_long_name(dso, strdup(kcore_filename), true);
+
+ close(fd);
+
+ if (map->prot & PROT_EXEC)
+ pr_debug("Using %s for kernel object code\n", kcore_filename);
+ else
+ pr_debug("Using %s for kernel data\n", kcore_filename);
+
+ return 0;
+
+out_err:
+ while (!list_empty(&md.maps)) {
+ map = list_entry(md.maps.next, struct map, node);
+ list_del_init(&map->node);
+ map__put(map);
+ }
+ close(fd);
+ return -EINVAL;
+}
+
+/*
+ * If the kernel is relocated at boot time, kallsyms won't match. Compute the
+ * delta based on the relocation reference symbol.
+ */
+static int kallsyms__delta(struct kmap *kmap, const char *filename, u64 *delta)
+{
+ u64 addr;
+
+ if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name)
+ return 0;
+
+ if (kallsyms__get_function_start(filename, kmap->ref_reloc_sym->name, &addr))
+ return -1;
+
+ *delta = addr - kmap->ref_reloc_sym->addr;
+ return 0;
+}
+
+int __dso__load_kallsyms(struct dso *dso, const char *filename,
+ struct map *map, bool no_kcore)
+{
+ struct kmap *kmap = map__kmap(map);
+ u64 delta = 0;
+
+ if (symbol__restricted_filename(filename, "/proc/kallsyms"))
+ return -1;
+
+ if (!kmap || !kmap->kmaps)
+ return -1;
+
+ if (dso__load_all_kallsyms(dso, filename) < 0)
+ return -1;
+
+ if (kallsyms__delta(kmap, filename, &delta))
+ return -1;
+
+ symbols__fixup_end(&dso->symbols);
+ symbols__fixup_duplicate(&dso->symbols);
+
+ if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
+ else
+ dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
+
+ if (!no_kcore && !dso__load_kcore(dso, map, filename))
+ return map_groups__split_kallsyms_for_kcore(kmap->kmaps, dso);
+ else
+ return map_groups__split_kallsyms(kmap->kmaps, dso, delta, map);
+}
+
+int dso__load_kallsyms(struct dso *dso, const char *filename,
+ struct map *map)
+{
+ return __dso__load_kallsyms(dso, filename, map, false);
+}
+
+static int dso__load_perf_map(const char *map_path, struct dso *dso)
+{
+ char *line = NULL;
+ size_t n;
+ FILE *file;
+ int nr_syms = 0;
+
+ file = fopen(map_path, "r");
+ if (file == NULL)
+ goto out_failure;
+
+ while (!feof(file)) {
+ u64 start, size;
+ struct symbol *sym;
+ int line_len, len;
+
+ line_len = getline(&line, &n, file);
+ if (line_len < 0)
+ break;
+
+ if (!line)
+ goto out_failure;
+
+ line[--line_len] = '\0'; /* \n */
+
+ len = hex2u64(line, &start);
+
+ len++;
+ if (len + 2 >= line_len)
+ continue;
+
+ len += hex2u64(line + len, &size);
+
+ len++;
+ if (len + 2 >= line_len)
+ continue;
+
+ sym = symbol__new(start, size, STB_GLOBAL, STT_FUNC, line + len);
+
+ if (sym == NULL)
+ goto out_delete_line;
+
+ symbols__insert(&dso->symbols, sym);
+ nr_syms++;
+ }
+
+ free(line);
+ fclose(file);
+
+ return nr_syms;
+
+out_delete_line:
+ free(line);
+out_failure:
+ return -1;
+}
+
+static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
+ enum dso_binary_type type)
+{
+ switch (type) {
+ case DSO_BINARY_TYPE__JAVA_JIT:
+ case DSO_BINARY_TYPE__DEBUGLINK:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
+ case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
+ case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
+ case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
+ return !kmod && dso->kernel == DSO_TYPE_USER;
+
+ case DSO_BINARY_TYPE__KALLSYMS:
+ case DSO_BINARY_TYPE__VMLINUX:
+ case DSO_BINARY_TYPE__KCORE:
+ return dso->kernel == DSO_TYPE_KERNEL;
+
+ case DSO_BINARY_TYPE__GUEST_KALLSYMS:
+ case DSO_BINARY_TYPE__GUEST_VMLINUX:
+ case DSO_BINARY_TYPE__GUEST_KCORE:
+ return dso->kernel == DSO_TYPE_GUEST_KERNEL;
+
+ case DSO_BINARY_TYPE__GUEST_KMODULE:
+ case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE:
+ case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
+ /*
+ * kernel modules know their symtab type - it's set when
+ * creating a module dso in machine__findnew_module_map().
+ */
+ return kmod && dso->symtab_type == type;
+
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE:
+ case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO:
+ return true;
+
+ case DSO_BINARY_TYPE__NOT_FOUND:
+ default:
+ return false;
+ }
+}
+
+/* Checks for the existence of the perf-<pid>.map file in two different
+ * locations. First, if the process is a separate mount namespace, check in
+ * that namespace using the pid of the innermost pid namespace. If's not in a
+ * namespace, or the file can't be found there, try in the mount namespace of
+ * the tracing process using our view of its pid.
+ */
+static int dso__find_perf_map(char *filebuf, size_t bufsz,
+ struct nsinfo **nsip)
+{
+ struct nscookie nsc;
+ struct nsinfo *nsi;
+ struct nsinfo *nnsi;
+ int rc = -1;
+
+ nsi = *nsip;
+
+ if (nsi->need_setns) {
+ snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nsi->nstgid);
+ nsinfo__mountns_enter(nsi, &nsc);
+ rc = access(filebuf, R_OK);
+ nsinfo__mountns_exit(&nsc);
+ if (rc == 0)
+ return rc;
+ }
+
+ nnsi = nsinfo__copy(nsi);
+ if (nnsi) {
+ nsinfo__put(nsi);
+
+ nnsi->need_setns = false;
+ snprintf(filebuf, bufsz, "/tmp/perf-%d.map", nnsi->tgid);
+ *nsip = nnsi;
+ rc = 0;
+ }
+
+ return rc;
+}
+
+int dso__load(struct dso *dso, struct map *map)
+{
+ char *name;
+ int ret = -1;
+ u_int i;
+ struct machine *machine;
+ char *root_dir = (char *) "";
+ int ss_pos = 0;
+ struct symsrc ss_[2];
+ struct symsrc *syms_ss = NULL, *runtime_ss = NULL;
+ bool kmod;
+ bool perfmap;
+ unsigned char build_id[BUILD_ID_SIZE];
+ struct nscookie nsc;
+ char newmapname[PATH_MAX];
+ const char *map_path = dso->long_name;
+
+ perfmap = strncmp(dso->name, "/tmp/perf-", 10) == 0;
+ if (perfmap) {
+ if (dso->nsinfo && (dso__find_perf_map(newmapname,
+ sizeof(newmapname), &dso->nsinfo) == 0)) {
+ map_path = newmapname;
+ }
+ }
+
+ nsinfo__mountns_enter(dso->nsinfo, &nsc);
+ pthread_mutex_lock(&dso->lock);
+
+ /* check again under the dso->lock */
+ if (dso__loaded(dso)) {
+ ret = 1;
+ goto out;
+ }
+
+ if (map->groups && map->groups->machine)
+ machine = map->groups->machine;
+ else
+ machine = NULL;
+
+ if (dso->kernel) {
+ if (dso->kernel == DSO_TYPE_KERNEL)
+ ret = dso__load_kernel_sym(dso, map);
+ else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ ret = dso__load_guest_kernel_sym(dso, map);
+
+ if (machine__is(machine, "x86_64"))
+ machine__map_x86_64_entry_trampolines(machine, dso);
+ goto out;
+ }
+
+ dso->adjust_symbols = 0;
+
+ if (perfmap) {
+ struct stat st;
+
+ if (lstat(map_path, &st) < 0)
+ goto out;
+
+ if (!symbol_conf.force && st.st_uid && (st.st_uid != geteuid())) {
+ pr_warning("File %s not owned by current user or root, "
+ "ignoring it (use -f to override).\n", map_path);
+ goto out;
+ }
+
+ ret = dso__load_perf_map(map_path, dso);
+ dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
+ DSO_BINARY_TYPE__NOT_FOUND;
+ goto out;
+ }
+
+ if (machine)
+ root_dir = machine->root_dir;
+
+ name = malloc(PATH_MAX);
+ if (!name)
+ goto out;
+
+ kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
+ dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE ||
+ dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
+
+
+ /*
+ * Read the build id if possible. This is required for
+ * DSO_BINARY_TYPE__BUILDID_DEBUGINFO to work
+ */
+ if (!dso->has_build_id &&
+ is_regular_file(dso->long_name)) {
+ __symbol__join_symfs(name, PATH_MAX, dso->long_name);
+ if (filename__read_build_id(name, build_id, BUILD_ID_SIZE) > 0)
+ dso__set_build_id(dso, build_id);
+ }
+
+ /*
+ * Iterate over candidate debug images.
+ * Keep track of "interesting" ones (those which have a symtab, dynsym,
+ * and/or opd section) for processing.
+ */
+ for (i = 0; i < DSO_BINARY_TYPE__SYMTAB_CNT; i++) {
+ struct symsrc *ss = &ss_[ss_pos];
+ bool next_slot = false;
+ bool is_reg;
+ bool nsexit;
+ int sirc = -1;
+
+ enum dso_binary_type symtab_type = binary_type_symtab[i];
+
+ nsexit = (symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE ||
+ symtab_type == DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO);
+
+ if (!dso__is_compatible_symtab_type(dso, kmod, symtab_type))
+ continue;
+
+ if (dso__read_binary_type_filename(dso, symtab_type,
+ root_dir, name, PATH_MAX))
+ continue;
+
+ if (nsexit)
+ nsinfo__mountns_exit(&nsc);
+
+ is_reg = is_regular_file(name);
+ if (is_reg)
+ sirc = symsrc__init(ss, dso, name, symtab_type);
+
+ if (nsexit)
+ nsinfo__mountns_enter(dso->nsinfo, &nsc);
+
+ if (!is_reg || sirc < 0)
+ continue;
+
+ if (!syms_ss && symsrc__has_symtab(ss)) {
+ syms_ss = ss;
+ next_slot = true;
+ if (!dso->symsrc_filename)
+ dso->symsrc_filename = strdup(name);
+ }
+
+ if (!runtime_ss && symsrc__possibly_runtime(ss)) {
+ runtime_ss = ss;
+ next_slot = true;
+ }
+
+ if (next_slot) {
+ ss_pos++;
+
+ if (syms_ss && runtime_ss)
+ break;
+ } else {
+ symsrc__destroy(ss);
+ }
+
+ }
+
+ if (!runtime_ss && !syms_ss)
+ goto out_free;
+
+ if (runtime_ss && !syms_ss) {
+ syms_ss = runtime_ss;
+ }
+
+ /* We'll have to hope for the best */
+ if (!runtime_ss && syms_ss)
+ runtime_ss = syms_ss;
+
+ if (syms_ss)
+ ret = dso__load_sym(dso, map, syms_ss, runtime_ss, kmod);
+ else
+ ret = -1;
+
+ if (ret > 0) {
+ int nr_plt;
+
+ nr_plt = dso__synthesize_plt_symbols(dso, runtime_ss);
+ if (nr_plt > 0)
+ ret += nr_plt;
+ }
+
+ for (; ss_pos > 0; ss_pos--)
+ symsrc__destroy(&ss_[ss_pos - 1]);
+out_free:
+ free(name);
+ if (ret < 0 && strstr(dso->name, " (deleted)") != NULL)
+ ret = 0;
+out:
+ dso__set_loaded(dso);
+ pthread_mutex_unlock(&dso->lock);
+ nsinfo__mountns_exit(&nsc);
+
+ return ret;
+}
+
+struct map *map_groups__find_by_name(struct map_groups *mg, const char *name)
+{
+ struct maps *maps = &mg->maps;
+ struct map *map;
+
+ down_read(&maps->lock);
+
+ for (map = maps__first(maps); map; map = map__next(map)) {
+ if (map->dso && strcmp(map->dso->short_name, name) == 0)
+ goto out_unlock;
+ }
+
+ map = NULL;
+
+out_unlock:
+ up_read(&maps->lock);
+ return map;
+}
+
+int dso__load_vmlinux(struct dso *dso, struct map *map,
+ const char *vmlinux, bool vmlinux_allocated)
+{
+ int err = -1;
+ struct symsrc ss;
+ char symfs_vmlinux[PATH_MAX];
+ enum dso_binary_type symtab_type;
+
+ if (vmlinux[0] == '/')
+ snprintf(symfs_vmlinux, sizeof(symfs_vmlinux), "%s", vmlinux);
+ else
+ symbol__join_symfs(symfs_vmlinux, vmlinux);
+
+ if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
+ else
+ symtab_type = DSO_BINARY_TYPE__VMLINUX;
+
+ if (symsrc__init(&ss, dso, symfs_vmlinux, symtab_type))
+ return -1;
+
+ err = dso__load_sym(dso, map, &ss, &ss, 0);
+ symsrc__destroy(&ss);
+
+ if (err > 0) {
+ if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
+ else
+ dso->binary_type = DSO_BINARY_TYPE__VMLINUX;
+ dso__set_long_name(dso, vmlinux, vmlinux_allocated);
+ dso__set_loaded(dso);
+ pr_debug("Using %s for symbols\n", symfs_vmlinux);
+ }
+
+ return err;
+}
+
+int dso__load_vmlinux_path(struct dso *dso, struct map *map)
+{
+ int i, err = 0;
+ char *filename = NULL;
+
+ pr_debug("Looking at the vmlinux_path (%d entries long)\n",
+ vmlinux_path__nr_entries + 1);
+
+ for (i = 0; i < vmlinux_path__nr_entries; ++i) {
+ err = dso__load_vmlinux(dso, map, vmlinux_path[i], false);
+ if (err > 0)
+ goto out;
+ }
+
+ if (!symbol_conf.ignore_vmlinux_buildid)
+ filename = dso__build_id_filename(dso, NULL, 0, false);
+ if (filename != NULL) {
+ err = dso__load_vmlinux(dso, map, filename, true);
+ if (err > 0)
+ goto out;
+ free(filename);
+ }
+out:
+ return err;
+}
+
+static bool visible_dir_filter(const char *name, struct dirent *d)
+{
+ if (d->d_type != DT_DIR)
+ return false;
+ return lsdir_no_dot_filter(name, d);
+}
+
+static int find_matching_kcore(struct map *map, char *dir, size_t dir_sz)
+{
+ char kallsyms_filename[PATH_MAX];
+ int ret = -1;
+ struct strlist *dirs;
+ struct str_node *nd;
+
+ dirs = lsdir(dir, visible_dir_filter);
+ if (!dirs)
+ return -1;
+
+ strlist__for_each_entry(nd, dirs) {
+ scnprintf(kallsyms_filename, sizeof(kallsyms_filename),
+ "%s/%s/kallsyms", dir, nd->s);
+ if (!validate_kcore_addresses(kallsyms_filename, map)) {
+ strlcpy(dir, kallsyms_filename, dir_sz);
+ ret = 0;
+ break;
+ }
+ }
+
+ strlist__delete(dirs);
+
+ return ret;
+}
+
+/*
+ * Use open(O_RDONLY) to check readability directly instead of access(R_OK)
+ * since access(R_OK) only checks with real UID/GID but open() use effective
+ * UID/GID and actual capabilities (e.g. /proc/kcore requires CAP_SYS_RAWIO).
+ */
+static bool filename__readable(const char *file)
+{
+ int fd = open(file, O_RDONLY);
+ if (fd < 0)
+ return false;
+ close(fd);
+ return true;
+}
+
+static char *dso__find_kallsyms(struct dso *dso, struct map *map)
+{
+ u8 host_build_id[BUILD_ID_SIZE];
+ char sbuild_id[SBUILD_ID_SIZE];
+ bool is_host = false;
+ char path[PATH_MAX];
+
+ if (!dso->has_build_id) {
+ /*
+ * Last resort, if we don't have a build-id and couldn't find
+ * any vmlinux file, try the running kernel kallsyms table.
+ */
+ goto proc_kallsyms;
+ }
+
+ if (sysfs__read_build_id("/sys/kernel/notes", host_build_id,
+ sizeof(host_build_id)) == 0)
+ is_host = dso__build_id_equal(dso, host_build_id);
+
+ /* Try a fast path for /proc/kallsyms if possible */
+ if (is_host) {
+ /*
+ * Do not check the build-id cache, unless we know we cannot use
+ * /proc/kcore or module maps don't match to /proc/kallsyms.
+ * To check readability of /proc/kcore, do not use access(R_OK)
+ * since /proc/kcore requires CAP_SYS_RAWIO to read and access
+ * can't check it.
+ */
+ if (filename__readable("/proc/kcore") &&
+ !validate_kcore_addresses("/proc/kallsyms", map))
+ goto proc_kallsyms;
+ }
+
+ build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id);
+
+ /* Find kallsyms in build-id cache with kcore */
+ scnprintf(path, sizeof(path), "%s/%s/%s",
+ buildid_dir, DSO__NAME_KCORE, sbuild_id);
+
+ if (!find_matching_kcore(map, path, sizeof(path)))
+ return strdup(path);
+
+ /* Use current /proc/kallsyms if possible */
+ if (is_host) {
+proc_kallsyms:
+ return strdup("/proc/kallsyms");
+ }
+
+ /* Finally, find a cache of kallsyms */
+ if (!build_id_cache__kallsyms_path(sbuild_id, path, sizeof(path))) {
+ pr_err("No kallsyms or vmlinux with build-id %s was found\n",
+ sbuild_id);
+ return NULL;
+ }
+
+ return strdup(path);
+}
+
+static int dso__load_kernel_sym(struct dso *dso, struct map *map)
+{
+ int err;
+ const char *kallsyms_filename = NULL;
+ char *kallsyms_allocated_filename = NULL;
+ /*
+ * Step 1: if the user specified a kallsyms or vmlinux filename, use
+ * it and only it, reporting errors to the user if it cannot be used.
+ *
+ * For instance, try to analyse an ARM perf.data file _without_ a
+ * build-id, or if the user specifies the wrong path to the right
+ * vmlinux file, obviously we can't fallback to another vmlinux (a
+ * x86_86 one, on the machine where analysis is being performed, say),
+ * or worse, /proc/kallsyms.
+ *
+ * If the specified file _has_ a build-id and there is a build-id
+ * section in the perf.data file, we will still do the expected
+ * validation in dso__load_vmlinux and will bail out if they don't
+ * match.
+ */
+ if (symbol_conf.kallsyms_name != NULL) {
+ kallsyms_filename = symbol_conf.kallsyms_name;
+ goto do_kallsyms;
+ }
+
+ if (!symbol_conf.ignore_vmlinux && symbol_conf.vmlinux_name != NULL) {
+ return dso__load_vmlinux(dso, map, symbol_conf.vmlinux_name, false);
+ }
+
+ if (!symbol_conf.ignore_vmlinux && vmlinux_path != NULL) {
+ err = dso__load_vmlinux_path(dso, map);
+ if (err > 0)
+ return err;
+ }
+
+ /* do not try local files if a symfs was given */
+ if (symbol_conf.symfs[0] != 0)
+ return -1;
+
+ kallsyms_allocated_filename = dso__find_kallsyms(dso, map);
+ if (!kallsyms_allocated_filename)
+ return -1;
+
+ kallsyms_filename = kallsyms_allocated_filename;
+
+do_kallsyms:
+ err = dso__load_kallsyms(dso, kallsyms_filename, map);
+ if (err > 0)
+ pr_debug("Using %s for symbols\n", kallsyms_filename);
+ free(kallsyms_allocated_filename);
+
+ if (err > 0 && !dso__is_kcore(dso)) {
+ dso->binary_type = DSO_BINARY_TYPE__KALLSYMS;
+ dso__set_long_name(dso, DSO__NAME_KALLSYMS, false);
+ map__fixup_start(map);
+ map__fixup_end(map);
+ }
+
+ return err;
+}
+
+static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map)
+{
+ int err;
+ const char *kallsyms_filename = NULL;
+ struct machine *machine;
+ char path[PATH_MAX];
+
+ if (!map->groups) {
+ pr_debug("Guest kernel map hasn't the point to groups\n");
+ return -1;
+ }
+ machine = map->groups->machine;
+
+ if (machine__is_default_guest(machine)) {
+ /*
+ * if the user specified a vmlinux filename, use it and only
+ * it, reporting errors to the user if it cannot be used.
+ * Or use file guest_kallsyms inputted by user on commandline
+ */
+ if (symbol_conf.default_guest_vmlinux_name != NULL) {
+ err = dso__load_vmlinux(dso, map,
+ symbol_conf.default_guest_vmlinux_name,
+ false);
+ return err;
+ }
+
+ kallsyms_filename = symbol_conf.default_guest_kallsyms;
+ if (!kallsyms_filename)
+ return -1;
+ } else {
+ sprintf(path, "%s/proc/kallsyms", machine->root_dir);
+ kallsyms_filename = path;
+ }
+
+ err = dso__load_kallsyms(dso, kallsyms_filename, map);
+ if (err > 0)
+ pr_debug("Using %s for symbols\n", kallsyms_filename);
+ if (err > 0 && !dso__is_kcore(dso)) {
+ dso->binary_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
+ dso__set_long_name(dso, machine->mmap_name, false);
+ map__fixup_start(map);
+ map__fixup_end(map);
+ }
+
+ return err;
+}
+
+static void vmlinux_path__exit(void)
+{
+ while (--vmlinux_path__nr_entries >= 0)
+ zfree(&vmlinux_path[vmlinux_path__nr_entries]);
+ vmlinux_path__nr_entries = 0;
+
+ zfree(&vmlinux_path);
+}
+
+static const char * const vmlinux_paths[] = {
+ "vmlinux",
+ "/boot/vmlinux"
+};
+
+static const char * const vmlinux_paths_upd[] = {
+ "/boot/vmlinux-%s",
+ "/usr/lib/debug/boot/vmlinux-%s",
+ "/lib/modules/%s/build/vmlinux",
+ "/usr/lib/debug/lib/modules/%s/vmlinux",
+ "/usr/lib/debug/boot/vmlinux-%s.debug"
+};
+
+static int vmlinux_path__add(const char *new_entry)
+{
+ vmlinux_path[vmlinux_path__nr_entries] = strdup(new_entry);
+ if (vmlinux_path[vmlinux_path__nr_entries] == NULL)
+ return -1;
+ ++vmlinux_path__nr_entries;
+
+ return 0;
+}
+
+static int vmlinux_path__init(struct perf_env *env)
+{
+ struct utsname uts;
+ char bf[PATH_MAX];
+ char *kernel_version;
+ unsigned int i;
+
+ vmlinux_path = malloc(sizeof(char *) * (ARRAY_SIZE(vmlinux_paths) +
+ ARRAY_SIZE(vmlinux_paths_upd)));
+ if (vmlinux_path == NULL)
+ return -1;
+
+ for (i = 0; i < ARRAY_SIZE(vmlinux_paths); i++)
+ if (vmlinux_path__add(vmlinux_paths[i]) < 0)
+ goto out_fail;
+
+ /* only try kernel version if no symfs was given */
+ if (symbol_conf.symfs[0] != 0)
+ return 0;
+
+ if (env) {
+ kernel_version = env->os_release;
+ } else {
+ if (uname(&uts) < 0)
+ goto out_fail;
+
+ kernel_version = uts.release;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vmlinux_paths_upd); i++) {
+ snprintf(bf, sizeof(bf), vmlinux_paths_upd[i], kernel_version);
+ if (vmlinux_path__add(bf) < 0)
+ goto out_fail;
+ }
+
+ return 0;
+
+out_fail:
+ vmlinux_path__exit();
+ return -1;
+}
+
+int setup_list(struct strlist **list, const char *list_str,
+ const char *list_name)
+{
+ if (list_str == NULL)
+ return 0;
+
+ *list = strlist__new(list_str, NULL);
+ if (!*list) {
+ pr_err("problems parsing %s list\n", list_name);
+ return -1;
+ }
+
+ symbol_conf.has_filter = true;
+ return 0;
+}
+
+int setup_intlist(struct intlist **list, const char *list_str,
+ const char *list_name)
+{
+ if (list_str == NULL)
+ return 0;
+
+ *list = intlist__new(list_str);
+ if (!*list) {
+ pr_err("problems parsing %s list\n", list_name);
+ return -1;
+ }
+ return 0;
+}
+
+static bool symbol__read_kptr_restrict(void)
+{
+ bool value = false;
+ FILE *fp = fopen("/proc/sys/kernel/kptr_restrict", "r");
+
+ if (fp != NULL) {
+ char line[8];
+
+ if (fgets(line, sizeof(line), fp) != NULL)
+ value = ((geteuid() != 0) || (getuid() != 0)) ?
+ (atoi(line) != 0) :
+ (atoi(line) == 2);
+
+ fclose(fp);
+ }
+
+ return value;
+}
+
+int symbol__annotation_init(void)
+{
+ if (symbol_conf.init_annotation)
+ return 0;
+
+ if (symbol_conf.initialized) {
+ pr_err("Annotation needs to be init before symbol__init()\n");
+ return -1;
+ }
+
+ symbol_conf.priv_size += sizeof(struct annotation);
+ symbol_conf.init_annotation = true;
+ return 0;
+}
+
+int symbol__init(struct perf_env *env)
+{
+ const char *symfs;
+
+ if (symbol_conf.initialized)
+ return 0;
+
+ symbol_conf.priv_size = PERF_ALIGN(symbol_conf.priv_size, sizeof(u64));
+
+ symbol__elf_init();
+
+ if (symbol_conf.sort_by_name)
+ symbol_conf.priv_size += (sizeof(struct symbol_name_rb_node) -
+ sizeof(struct symbol));
+
+ if (symbol_conf.try_vmlinux_path && vmlinux_path__init(env) < 0)
+ return -1;
+
+ if (symbol_conf.field_sep && *symbol_conf.field_sep == '.') {
+ pr_err("'.' is the only non valid --field-separator argument\n");
+ return -1;
+ }
+
+ if (setup_list(&symbol_conf.dso_list,
+ symbol_conf.dso_list_str, "dso") < 0)
+ return -1;
+
+ if (setup_list(&symbol_conf.comm_list,
+ symbol_conf.comm_list_str, "comm") < 0)
+ goto out_free_dso_list;
+
+ if (setup_intlist(&symbol_conf.pid_list,
+ symbol_conf.pid_list_str, "pid") < 0)
+ goto out_free_comm_list;
+
+ if (setup_intlist(&symbol_conf.tid_list,
+ symbol_conf.tid_list_str, "tid") < 0)
+ goto out_free_pid_list;
+
+ if (setup_list(&symbol_conf.sym_list,
+ symbol_conf.sym_list_str, "symbol") < 0)
+ goto out_free_tid_list;
+
+ if (setup_list(&symbol_conf.bt_stop_list,
+ symbol_conf.bt_stop_list_str, "symbol") < 0)
+ goto out_free_sym_list;
+
+ /*
+ * A path to symbols of "/" is identical to ""
+ * reset here for simplicity.
+ */
+ symfs = realpath(symbol_conf.symfs, NULL);
+ if (symfs == NULL)
+ symfs = symbol_conf.symfs;
+ if (strcmp(symfs, "/") == 0)
+ symbol_conf.symfs = "";
+ if (symfs != symbol_conf.symfs)
+ free((void *)symfs);
+
+ symbol_conf.kptr_restrict = symbol__read_kptr_restrict();
+
+ symbol_conf.initialized = true;
+ return 0;
+
+out_free_sym_list:
+ strlist__delete(symbol_conf.sym_list);
+out_free_tid_list:
+ intlist__delete(symbol_conf.tid_list);
+out_free_pid_list:
+ intlist__delete(symbol_conf.pid_list);
+out_free_comm_list:
+ strlist__delete(symbol_conf.comm_list);
+out_free_dso_list:
+ strlist__delete(symbol_conf.dso_list);
+ return -1;
+}
+
+void symbol__exit(void)
+{
+ if (!symbol_conf.initialized)
+ return;
+ strlist__delete(symbol_conf.bt_stop_list);
+ strlist__delete(symbol_conf.sym_list);
+ strlist__delete(symbol_conf.dso_list);
+ strlist__delete(symbol_conf.comm_list);
+ intlist__delete(symbol_conf.tid_list);
+ intlist__delete(symbol_conf.pid_list);
+ vmlinux_path__exit();
+ symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL;
+ symbol_conf.bt_stop_list = NULL;
+ symbol_conf.initialized = false;
+}
+
+int symbol__config_symfs(const struct option *opt __maybe_unused,
+ const char *dir, int unset __maybe_unused)
+{
+ char *bf = NULL;
+ int ret;
+
+ symbol_conf.symfs = strdup(dir);
+ if (symbol_conf.symfs == NULL)
+ return -ENOMEM;
+
+ /* skip the locally configured cache if a symfs is given, and
+ * config buildid dir to symfs/.debug
+ */
+ ret = asprintf(&bf, "%s/%s", dir, ".debug");
+ if (ret < 0)
+ return -ENOMEM;
+
+ set_buildid_dir(bf);
+
+ free(bf);
+ return 0;
+}
+
+struct mem_info *mem_info__get(struct mem_info *mi)
+{
+ if (mi)
+ refcount_inc(&mi->refcnt);
+ return mi;
+}
+
+void mem_info__put(struct mem_info *mi)
+{
+ if (mi && refcount_dec_and_test(&mi->refcnt))
+ free(mi);
+}
+
+struct mem_info *mem_info__new(void)
+{
+ struct mem_info *mi = zalloc(sizeof(*mi));
+
+ if (mi)
+ refcount_set(&mi->refcnt, 1);
+ return mi;
+}
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
new file mode 100644
index 000000000..76ef2facd
--- /dev/null
+++ b/tools/perf/util/symbol.h
@@ -0,0 +1,401 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYMBOL
+#define __PERF_SYMBOL 1
+
+#include <linux/types.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include "map.h"
+#include "../perf.h"
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <stdio.h>
+#include <byteswap.h>
+#include <libgen.h>
+#include "build-id.h"
+#include "event.h"
+#include "path.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+#include <libelf.h>
+#include <gelf.h>
+#endif
+#include <elf.h>
+
+#include "dso.h"
+
+/*
+ * libelf 0.8.x and earlier do not support ELF_C_READ_MMAP;
+ * for newer versions we can use mmap to reduce memory usage:
+ */
+#ifdef HAVE_LIBELF_MMAP_SUPPORT
+# define PERF_ELF_C_READ_MMAP ELF_C_READ_MMAP
+#else
+# define PERF_ELF_C_READ_MMAP ELF_C_READ
+#endif
+
+#ifdef HAVE_LIBELF_SUPPORT
+Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+ GElf_Shdr *shp, const char *name, size_t *idx);
+#endif
+
+#ifndef DMGL_PARAMS
+#define DMGL_NO_OPTS 0 /* For readability... */
+#define DMGL_PARAMS (1 << 0) /* Include function args */
+#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */
+#endif
+
+#define DSO__NAME_KALLSYMS "[kernel.kallsyms]"
+#define DSO__NAME_KCORE "[kernel.kcore]"
+
+/** struct symbol - symtab entry
+ *
+ * @ignore - resolvable but tools ignore it (e.g. idle routines)
+ */
+struct symbol {
+ struct rb_node rb_node;
+ u64 start;
+ u64 end;
+ u16 namelen;
+ u8 type:4;
+ u8 binding:4;
+ u8 idle:1;
+ u8 ignore:1;
+ u8 inlined:1;
+ u8 arch_sym;
+ char name[0];
+};
+
+void symbol__delete(struct symbol *sym);
+void symbols__delete(struct rb_root *symbols);
+
+/* symbols__for_each_entry - iterate over symbols (rb_root)
+ *
+ * @symbols: the rb_root of symbols
+ * @pos: the 'struct symbol *' to use as a loop cursor
+ * @nd: the 'struct rb_node *' to use as a temporary storage
+ */
+#define symbols__for_each_entry(symbols, pos, nd) \
+ for (nd = rb_first(symbols); \
+ nd && (pos = rb_entry(nd, struct symbol, rb_node)); \
+ nd = rb_next(nd))
+
+static inline size_t symbol__size(const struct symbol *sym)
+{
+ return sym->end - sym->start;
+}
+
+struct strlist;
+struct intlist;
+
+struct symbol_conf {
+ unsigned short priv_size;
+ bool try_vmlinux_path,
+ init_annotation,
+ force,
+ ignore_vmlinux,
+ ignore_vmlinux_buildid,
+ show_kernel_path,
+ use_modules,
+ allow_aliases,
+ sort_by_name,
+ show_nr_samples,
+ show_total_period,
+ use_callchain,
+ cumulate_callchain,
+ show_branchflag_count,
+ exclude_other,
+ show_cpu_utilization,
+ initialized,
+ kptr_restrict,
+ event_group,
+ demangle,
+ demangle_kernel,
+ filter_relative,
+ show_hist_headers,
+ branch_callstack,
+ has_filter,
+ show_ref_callgraph,
+ hide_unresolved,
+ raw_trace,
+ report_hierarchy,
+ inline_name;
+ const char *vmlinux_name,
+ *kallsyms_name,
+ *source_prefix,
+ *field_sep;
+ const char *default_guest_vmlinux_name,
+ *default_guest_kallsyms,
+ *default_guest_modules;
+ const char *guestmount;
+ const char *dso_list_str,
+ *comm_list_str,
+ *pid_list_str,
+ *tid_list_str,
+ *sym_list_str,
+ *col_width_list_str,
+ *bt_stop_list_str;
+ struct strlist *dso_list,
+ *comm_list,
+ *sym_list,
+ *dso_from_list,
+ *dso_to_list,
+ *sym_from_list,
+ *sym_to_list,
+ *bt_stop_list;
+ struct intlist *pid_list,
+ *tid_list;
+ const char *symfs;
+};
+
+extern struct symbol_conf symbol_conf;
+
+struct symbol_name_rb_node {
+ struct rb_node rb_node;
+ struct symbol sym;
+};
+
+static inline int __symbol__join_symfs(char *bf, size_t size, const char *path)
+{
+ return path__join(bf, size, symbol_conf.symfs, path);
+}
+
+#define symbol__join_symfs(bf, path) __symbol__join_symfs(bf, sizeof(bf), path)
+
+extern int vmlinux_path__nr_entries;
+extern char **vmlinux_path;
+
+static inline void *symbol__priv(struct symbol *sym)
+{
+ return ((void *)sym) - symbol_conf.priv_size;
+}
+
+struct ref_reloc_sym {
+ const char *name;
+ u64 addr;
+ u64 unrelocated_addr;
+};
+
+struct map_symbol {
+ struct map *map;
+ struct symbol *sym;
+};
+
+struct addr_map_symbol {
+ struct map *map;
+ struct symbol *sym;
+ u64 addr;
+ u64 al_addr;
+ u64 phys_addr;
+};
+
+struct branch_info {
+ struct addr_map_symbol from;
+ struct addr_map_symbol to;
+ struct branch_flags flags;
+ char *srcline_from;
+ char *srcline_to;
+};
+
+struct mem_info {
+ struct addr_map_symbol iaddr;
+ struct addr_map_symbol daddr;
+ union perf_mem_data_src data_src;
+ refcount_t refcnt;
+};
+
+struct addr_location {
+ struct machine *machine;
+ struct thread *thread;
+ struct map *map;
+ struct symbol *sym;
+ const char *srcline;
+ u64 addr;
+ char level;
+ u8 filtered;
+ u8 cpumode;
+ s32 cpu;
+ s32 socket;
+};
+
+struct symsrc {
+ char *name;
+ int fd;
+ enum dso_binary_type type;
+
+#ifdef HAVE_LIBELF_SUPPORT
+ Elf *elf;
+ GElf_Ehdr ehdr;
+
+ Elf_Scn *opdsec;
+ size_t opdidx;
+ GElf_Shdr opdshdr;
+
+ Elf_Scn *symtab;
+ GElf_Shdr symshdr;
+
+ Elf_Scn *dynsym;
+ size_t dynsym_idx;
+ GElf_Shdr dynshdr;
+
+ bool adjust_symbols;
+ bool is_64_bit;
+#endif
+};
+
+void symsrc__destroy(struct symsrc *ss);
+int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
+ enum dso_binary_type type);
+bool symsrc__has_symtab(struct symsrc *ss);
+bool symsrc__possibly_runtime(struct symsrc *ss);
+
+int dso__load(struct dso *dso, struct map *map);
+int dso__load_vmlinux(struct dso *dso, struct map *map,
+ const char *vmlinux, bool vmlinux_allocated);
+int dso__load_vmlinux_path(struct dso *dso, struct map *map);
+int __dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map,
+ bool no_kcore);
+int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map);
+
+void dso__insert_symbol(struct dso *dso,
+ struct symbol *sym);
+
+struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
+struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name);
+
+struct symbol *symbol__next_by_name(struct symbol *sym);
+
+struct symbol *dso__first_symbol(struct dso *dso);
+struct symbol *dso__last_symbol(struct dso *dso);
+struct symbol *dso__next_symbol(struct symbol *sym);
+
+enum dso_type dso__type_fd(int fd);
+
+int filename__read_build_id(const char *filename, void *bf, size_t size);
+int sysfs__read_build_id(const char *filename, void *bf, size_t size);
+int modules__parse(const char *filename, void *arg,
+ int (*process_module)(void *arg, const char *name,
+ u64 start, u64 size));
+int filename__read_debuglink(const char *filename, char *debuglink,
+ size_t size);
+
+struct perf_env;
+int symbol__init(struct perf_env *env);
+void symbol__exit(void);
+void symbol__elf_init(void);
+int symbol__annotation_init(void);
+
+struct symbol *symbol__new(u64 start, u64 len, u8 binding, u8 type, const char *name);
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+ const struct addr_location *al,
+ bool unknown_as_addr,
+ bool print_offsets, FILE *fp);
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+ const struct addr_location *al, FILE *fp);
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+ const struct addr_location *al,
+ bool unknown_as_addr, FILE *fp);
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp);
+size_t symbol__fprintf(struct symbol *sym, FILE *fp);
+bool symbol__restricted_filename(const char *filename,
+ const char *restricted_filename);
+int symbol__config_symfs(const struct option *opt __maybe_unused,
+ const char *dir, int unset __maybe_unused);
+
+int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
+ struct symsrc *runtime_ss, int kmodule);
+int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss);
+
+char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name);
+
+void __symbols__insert(struct rb_root *symbols, struct symbol *sym, bool kernel);
+void symbols__insert(struct rb_root *symbols, struct symbol *sym);
+void symbols__fixup_duplicate(struct rb_root *symbols);
+void symbols__fixup_end(struct rb_root *symbols);
+void map_groups__fixup_end(struct map_groups *mg);
+
+typedef int (*mapfn_t)(u64 start, u64 len, u64 pgoff, void *data);
+int file__read_maps(int fd, bool exe, mapfn_t mapfn, void *data,
+ bool *is_64_bit);
+
+#define PERF_KCORE_EXTRACT "/tmp/perf-kcore-XXXXXX"
+
+struct kcore_extract {
+ char *kcore_filename;
+ u64 addr;
+ u64 offs;
+ u64 len;
+ char extract_filename[sizeof(PERF_KCORE_EXTRACT)];
+ int fd;
+};
+
+int kcore_extract__create(struct kcore_extract *kce);
+void kcore_extract__delete(struct kcore_extract *kce);
+
+int kcore_copy(const char *from_dir, const char *to_dir);
+int compare_proc_modules(const char *from, const char *to);
+
+int setup_list(struct strlist **list, const char *list_str,
+ const char *list_name);
+int setup_intlist(struct intlist **list, const char *list_str,
+ const char *list_name);
+
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
+void arch__sym_update(struct symbol *s, GElf_Sym *sym);
+#endif
+
+const char *arch__normalize_symbol_name(const char *name);
+#define SYMBOL_A 0
+#define SYMBOL_B 1
+
+void arch__symbols__fixup_end(struct symbol *p, struct symbol *c);
+int arch__compare_symbol_names(const char *namea, const char *nameb);
+int arch__compare_symbol_names_n(const char *namea, const char *nameb,
+ unsigned int n);
+int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb);
+
+enum symbol_tag_include {
+ SYMBOL_TAG_INCLUDE__NONE = 0,
+ SYMBOL_TAG_INCLUDE__DEFAULT_ONLY
+};
+
+int symbol__match_symbol_name(const char *namea, const char *nameb,
+ enum symbol_tag_include includes);
+
+/* structure containing an SDT note's info */
+struct sdt_note {
+ char *name; /* name of the note*/
+ char *provider; /* provider name */
+ char *args;
+ bool bit32; /* whether the location is 32 bits? */
+ union { /* location, base and semaphore addrs */
+ Elf64_Addr a64[3];
+ Elf32_Addr a32[3];
+ } addr;
+ struct list_head note_list; /* SDT notes' list */
+};
+
+int get_sdt_note_list(struct list_head *head, const char *target);
+int cleanup_sdt_note_list(struct list_head *sdt_notes);
+int sdt_notes__get_count(struct list_head *start);
+
+#define SDT_BASE_SCN ".stapsdt.base"
+#define SDT_NOTE_SCN ".note.stapsdt"
+#define SDT_NOTE_TYPE 3
+#define SDT_NOTE_NAME "stapsdt"
+#define NR_ADDR 3
+
+struct mem_info *mem_info__new(void);
+struct mem_info *mem_info__get(struct mem_info *mi);
+void mem_info__put(struct mem_info *mi);
+
+static inline void __mem_info__zput(struct mem_info **mi)
+{
+ mem_info__put(*mi);
+ *mi = NULL;
+}
+
+#define mem_info__zput(mi) __mem_info__zput(&mi)
+
+#endif /* __PERF_SYMBOL */
diff --git a/tools/perf/util/symbol_fprintf.c b/tools/perf/util/symbol_fprintf.c
new file mode 100644
index 000000000..1fd175bb4
--- /dev/null
+++ b/tools/perf/util/symbol_fprintf.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elf.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "symbol.h"
+
+size_t symbol__fprintf(struct symbol *sym, FILE *fp)
+{
+ return fprintf(fp, " %" PRIx64 "-%" PRIx64 " %c %s\n",
+ sym->start, sym->end,
+ sym->binding == STB_GLOBAL ? 'g' :
+ sym->binding == STB_LOCAL ? 'l' : 'w',
+ sym->name);
+}
+
+size_t __symbol__fprintf_symname_offs(const struct symbol *sym,
+ const struct addr_location *al,
+ bool unknown_as_addr,
+ bool print_offsets, FILE *fp)
+{
+ unsigned long offset;
+ size_t length;
+
+ if (sym) {
+ length = fprintf(fp, "%s", sym->name);
+ if (al && print_offsets) {
+ if (al->addr < sym->end)
+ offset = al->addr - sym->start;
+ else
+ offset = al->addr - al->map->start - sym->start;
+ length += fprintf(fp, "+0x%lx", offset);
+ }
+ return length;
+ } else if (al && unknown_as_addr)
+ return fprintf(fp, "[%#" PRIx64 "]", al->addr);
+ else
+ return fprintf(fp, "[unknown]");
+}
+
+size_t symbol__fprintf_symname_offs(const struct symbol *sym,
+ const struct addr_location *al,
+ FILE *fp)
+{
+ return __symbol__fprintf_symname_offs(sym, al, false, true, fp);
+}
+
+size_t __symbol__fprintf_symname(const struct symbol *sym,
+ const struct addr_location *al,
+ bool unknown_as_addr, FILE *fp)
+{
+ return __symbol__fprintf_symname_offs(sym, al, unknown_as_addr, false, fp);
+}
+
+size_t symbol__fprintf_symname(const struct symbol *sym, FILE *fp)
+{
+ return __symbol__fprintf_symname_offs(sym, NULL, false, false, fp);
+}
+
+size_t dso__fprintf_symbols_by_name(struct dso *dso,
+ FILE *fp)
+{
+ size_t ret = 0;
+ struct rb_node *nd;
+ struct symbol_name_rb_node *pos;
+
+ for (nd = rb_first(&dso->symbol_names); nd; nd = rb_next(nd)) {
+ pos = rb_entry(nd, struct symbol_name_rb_node, rb_node);
+ ret += fprintf(fp, "%s\n", pos->sym.name);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
new file mode 100644
index 000000000..3393d7ee9
--- /dev/null
+++ b/tools/perf/util/syscalltbl.c
@@ -0,0 +1,183 @@
+/*
+ * System call table mapper
+ *
+ * (C) 2016 Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include "syscalltbl.h"
+#include <stdlib.h>
+#include <linux/compiler.h>
+
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
+#include <string.h>
+#include "string2.h"
+#include "util.h"
+
+#if defined(__x86_64__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_x86_64;
+#elif defined(__s390x__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_s390_64;
+#elif defined(__powerpc64__)
+#include <asm/syscalls_64.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_powerpc_64;
+#elif defined(__powerpc__)
+#include <asm/syscalls_32.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_POWERPC_32_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_powerpc_32;
+#elif defined(__aarch64__)
+#include <asm/syscalls.c>
+const int syscalltbl_native_max_id = SYSCALLTBL_ARM64_MAX_ID;
+static const char **syscalltbl_native = syscalltbl_arm64;
+#endif
+
+struct syscall {
+ int id;
+ const char *name;
+};
+
+static int syscallcmpname(const void *vkey, const void *ventry)
+{
+ const char *key = vkey;
+ const struct syscall *entry = ventry;
+
+ return strcmp(key, entry->name);
+}
+
+static int syscallcmp(const void *va, const void *vb)
+{
+ const struct syscall *a = va, *b = vb;
+
+ return strcmp(a->name, b->name);
+}
+
+static int syscalltbl__init_native(struct syscalltbl *tbl)
+{
+ int nr_entries = 0, i, j;
+ struct syscall *entries;
+
+ for (i = 0; i <= syscalltbl_native_max_id; ++i)
+ if (syscalltbl_native[i])
+ ++nr_entries;
+
+ entries = tbl->syscalls.entries = malloc(sizeof(struct syscall) * nr_entries);
+ if (tbl->syscalls.entries == NULL)
+ return -1;
+
+ for (i = 0, j = 0; i <= syscalltbl_native_max_id; ++i) {
+ if (syscalltbl_native[i]) {
+ entries[j].name = syscalltbl_native[i];
+ entries[j].id = i;
+ ++j;
+ }
+ }
+
+ qsort(tbl->syscalls.entries, nr_entries, sizeof(struct syscall), syscallcmp);
+ tbl->syscalls.nr_entries = nr_entries;
+ return 0;
+}
+
+struct syscalltbl *syscalltbl__new(void)
+{
+ struct syscalltbl *tbl = malloc(sizeof(*tbl));
+ if (tbl) {
+ if (syscalltbl__init_native(tbl)) {
+ free(tbl);
+ return NULL;
+ }
+ }
+ return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+ zfree(&tbl->syscalls.entries);
+ free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl __maybe_unused, int id)
+{
+ return id <= syscalltbl_native_max_id ? syscalltbl_native[id]: NULL;
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+ struct syscall *sc = bsearch(name, tbl->syscalls.entries,
+ tbl->syscalls.nr_entries, sizeof(*sc),
+ syscallcmpname);
+
+ return sc ? sc->id : -1;
+}
+
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+ int i;
+ struct syscall *syscalls = tbl->syscalls.entries;
+
+ for (i = *idx + 1; i < tbl->syscalls.nr_entries; ++i) {
+ if (strglobmatch(syscalls[i].name, syscall_glob)) {
+ *idx = i;
+ return syscalls[i].id;
+ }
+ }
+
+ return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+ *idx = -1;
+ return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
+
+#else /* HAVE_SYSCALL_TABLE_SUPPORT */
+
+#include <libaudit.h>
+
+struct syscalltbl *syscalltbl__new(void)
+{
+ struct syscalltbl *tbl = malloc(sizeof(*tbl));
+ if (tbl)
+ tbl->audit_machine = audit_detect_machine();
+ return tbl;
+}
+
+void syscalltbl__delete(struct syscalltbl *tbl)
+{
+ free(tbl);
+}
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id)
+{
+ return audit_syscall_to_name(id, tbl->audit_machine);
+}
+
+int syscalltbl__id(struct syscalltbl *tbl, const char *name)
+{
+ return audit_name_to_syscall(name, tbl->audit_machine);
+}
+
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused,
+ const char *syscall_glob __maybe_unused, int *idx __maybe_unused)
+{
+ return -1;
+}
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx)
+{
+ return syscalltbl__strglobmatch_next(tbl, syscall_glob, idx);
+}
+#endif /* HAVE_SYSCALL_TABLE_SUPPORT */
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
new file mode 100644
index 000000000..c8e7e9ce0
--- /dev/null
+++ b/tools/perf/util/syscalltbl.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SYSCALLTBL_H
+#define __PERF_SYSCALLTBL_H
+
+struct syscalltbl {
+ union {
+ int audit_machine;
+ struct {
+ int nr_entries;
+ void *entries;
+ } syscalls;
+ };
+};
+
+struct syscalltbl *syscalltbl__new(void);
+void syscalltbl__delete(struct syscalltbl *tbl);
+
+const char *syscalltbl__name(const struct syscalltbl *tbl, int id);
+int syscalltbl__id(struct syscalltbl *tbl, const char *name);
+
+int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+int syscalltbl__strglobmatch_next(struct syscalltbl *tbl, const char *syscall_glob, int *idx);
+
+#endif /* __PERF_SYSCALLTBL_H */
diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c
new file mode 100644
index 000000000..21c4d9b23
--- /dev/null
+++ b/tools/perf/util/target.c
@@ -0,0 +1,152 @@
+/*
+ * Helper functions for handling target threads/cpus
+ *
+ * Copyright (C) 2012, LG Electronics, Namhyung Kim <namhyung.kim@lge.com>
+ *
+ * Released under the GPL v2.
+ */
+
+#include "target.h"
+#include "util.h"
+#include "debug.h"
+
+#include <pwd.h>
+#include <string.h>
+
+
+enum target_errno target__validate(struct target *target)
+{
+ enum target_errno ret = TARGET_ERRNO__SUCCESS;
+
+ if (target->pid)
+ target->tid = target->pid;
+
+ /* CPU and PID are mutually exclusive */
+ if (target->tid && target->cpu_list) {
+ target->cpu_list = NULL;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__PID_OVERRIDE_CPU;
+ }
+
+ /* UID and PID are mutually exclusive */
+ if (target->tid && target->uid_str) {
+ target->uid_str = NULL;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__PID_OVERRIDE_UID;
+ }
+
+ /* UID and CPU are mutually exclusive */
+ if (target->uid_str && target->cpu_list) {
+ target->cpu_list = NULL;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__UID_OVERRIDE_CPU;
+ }
+
+ /* PID and SYSTEM are mutually exclusive */
+ if (target->tid && target->system_wide) {
+ target->system_wide = false;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__PID_OVERRIDE_SYSTEM;
+ }
+
+ /* UID and SYSTEM are mutually exclusive */
+ if (target->uid_str && target->system_wide) {
+ target->system_wide = false;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__UID_OVERRIDE_SYSTEM;
+ }
+
+ /* THREAD and SYSTEM/CPU are mutually exclusive */
+ if (target->per_thread && (target->system_wide || target->cpu_list)) {
+ target->per_thread = false;
+ if (ret == TARGET_ERRNO__SUCCESS)
+ ret = TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD;
+ }
+
+ return ret;
+}
+
+enum target_errno target__parse_uid(struct target *target)
+{
+ struct passwd pwd, *result;
+ char buf[1024];
+ const char *str = target->uid_str;
+
+ target->uid = UINT_MAX;
+ if (str == NULL)
+ return TARGET_ERRNO__SUCCESS;
+
+ /* Try user name first */
+ getpwnam_r(str, &pwd, buf, sizeof(buf), &result);
+
+ if (result == NULL) {
+ /*
+ * The user name not found. Maybe it's a UID number.
+ */
+ char *endptr;
+ int uid = strtol(str, &endptr, 10);
+
+ if (*endptr != '\0')
+ return TARGET_ERRNO__INVALID_UID;
+
+ getpwuid_r(uid, &pwd, buf, sizeof(buf), &result);
+
+ if (result == NULL)
+ return TARGET_ERRNO__USER_NOT_FOUND;
+ }
+
+ target->uid = result->pw_uid;
+ return TARGET_ERRNO__SUCCESS;
+}
+
+/*
+ * This must have a same ordering as the enum target_errno.
+ */
+static const char *target__error_str[] = {
+ "PID/TID switch overriding CPU",
+ "PID/TID switch overriding UID",
+ "UID switch overriding CPU",
+ "PID/TID switch overriding SYSTEM",
+ "UID switch overriding SYSTEM",
+ "SYSTEM/CPU switch overriding PER-THREAD",
+ "Invalid User: %s",
+ "Problems obtaining information for user %s",
+};
+
+int target__strerror(struct target *target, int errnum,
+ char *buf, size_t buflen)
+{
+ int idx;
+ const char *msg;
+
+ BUG_ON(buflen == 0);
+
+ if (errnum >= 0) {
+ str_error_r(errnum, buf, buflen);
+ return 0;
+ }
+
+ if (errnum < __TARGET_ERRNO__START || errnum >= __TARGET_ERRNO__END)
+ return -1;
+
+ idx = errnum - __TARGET_ERRNO__START;
+ msg = target__error_str[idx];
+
+ switch (errnum) {
+ case TARGET_ERRNO__PID_OVERRIDE_CPU ...
+ TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD:
+ snprintf(buf, buflen, "%s", msg);
+ break;
+
+ case TARGET_ERRNO__INVALID_UID:
+ case TARGET_ERRNO__USER_NOT_FOUND:
+ snprintf(buf, buflen, msg, target->uid_str);
+ break;
+
+ default:
+ /* cannot reach here */
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h
new file mode 100644
index 000000000..6ef01a83b
--- /dev/null
+++ b/tools/perf/util/target.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_TARGET_H
+#define _PERF_TARGET_H
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+struct target {
+ const char *pid;
+ const char *tid;
+ const char *cpu_list;
+ const char *uid_str;
+ uid_t uid;
+ bool system_wide;
+ bool uses_mmap;
+ bool default_per_cpu;
+ bool per_thread;
+};
+
+enum target_errno {
+ TARGET_ERRNO__SUCCESS = 0,
+
+ /*
+ * Choose an arbitrary negative big number not to clash with standard
+ * errno since SUS requires the errno has distinct positive values.
+ * See 'Issue 6' in the link below.
+ *
+ * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+ */
+ __TARGET_ERRNO__START = -10000,
+
+ /* for target__validate() */
+ TARGET_ERRNO__PID_OVERRIDE_CPU = __TARGET_ERRNO__START,
+ TARGET_ERRNO__PID_OVERRIDE_UID,
+ TARGET_ERRNO__UID_OVERRIDE_CPU,
+ TARGET_ERRNO__PID_OVERRIDE_SYSTEM,
+ TARGET_ERRNO__UID_OVERRIDE_SYSTEM,
+ TARGET_ERRNO__SYSTEM_OVERRIDE_THREAD,
+
+ /* for target__parse_uid() */
+ TARGET_ERRNO__INVALID_UID,
+ TARGET_ERRNO__USER_NOT_FOUND,
+
+ __TARGET_ERRNO__END,
+};
+
+enum target_errno target__validate(struct target *target);
+enum target_errno target__parse_uid(struct target *target);
+
+int target__strerror(struct target *target, int errnum, char *buf, size_t buflen);
+
+static inline bool target__has_task(struct target *target)
+{
+ return target->tid || target->pid || target->uid_str;
+}
+
+static inline bool target__has_cpu(struct target *target)
+{
+ return target->system_wide || target->cpu_list;
+}
+
+static inline bool target__none(struct target *target)
+{
+ return !target__has_task(target) && !target__has_cpu(target);
+}
+
+static inline bool target__has_per_thread(struct target *target)
+{
+ return target->system_wide && target->per_thread;
+}
+
+static inline bool target__uses_dummy_map(struct target *target)
+{
+ bool use_dummy = false;
+
+ if (target->default_per_cpu)
+ use_dummy = target->per_thread ? true : false;
+ else if (target__has_task(target) ||
+ (!target__has_cpu(target) && !target->uses_mmap))
+ use_dummy = true;
+ else if (target__has_per_thread(target))
+ use_dummy = true;
+
+ return use_dummy;
+}
+
+#endif /* _PERF_TARGET_H */
diff --git a/tools/perf/util/term.c b/tools/perf/util/term.c
new file mode 100644
index 000000000..e7aa82c06
--- /dev/null
+++ b/tools/perf/util/term.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "term.h"
+#include <stdlib.h>
+#include <termios.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+void get_term_dimensions(struct winsize *ws)
+{
+ char *s = getenv("LINES");
+
+ if (s != NULL) {
+ ws->ws_row = atoi(s);
+ s = getenv("COLUMNS");
+ if (s != NULL) {
+ ws->ws_col = atoi(s);
+ if (ws->ws_row && ws->ws_col)
+ return;
+ }
+ }
+#ifdef TIOCGWINSZ
+ if (ioctl(1, TIOCGWINSZ, ws) == 0 &&
+ ws->ws_row && ws->ws_col)
+ return;
+#endif
+ ws->ws_row = 25;
+ ws->ws_col = 80;
+}
+
+void set_term_quiet_input(struct termios *old)
+{
+ struct termios tc;
+
+ tcgetattr(0, old);
+ tc = *old;
+ tc.c_lflag &= ~(ICANON | ECHO);
+ tc.c_cc[VMIN] = 0;
+ tc.c_cc[VTIME] = 0;
+ tcsetattr(0, TCSANOW, &tc);
+}
diff --git a/tools/perf/util/term.h b/tools/perf/util/term.h
new file mode 100644
index 000000000..607b170a9
--- /dev/null
+++ b/tools/perf/util/term.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TERM_H
+#define __PERF_TERM_H
+
+struct termios;
+struct winsize;
+
+void get_term_dimensions(struct winsize *ws);
+void set_term_quiet_input(struct termios *old);
+
+#endif /* __PERF_TERM_H */
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
new file mode 100644
index 000000000..a5669d05e
--- /dev/null
+++ b/tools/perf/util/thread-stack.c
@@ -0,0 +1,652 @@
+/*
+ * thread-stack.c: Synthesize a thread's stack using call / return events
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <errno.h>
+#include "thread.h"
+#include "event.h"
+#include "machine.h"
+#include "util.h"
+#include "debug.h"
+#include "symbol.h"
+#include "comm.h"
+#include "call-path.h"
+#include "thread-stack.h"
+
+#define STACK_GROWTH 2048
+
+/**
+ * struct thread_stack_entry - thread stack entry.
+ * @ret_addr: return address
+ * @timestamp: timestamp (if known)
+ * @ref: external reference (e.g. db_id of sample)
+ * @branch_count: the branch count when the entry was created
+ * @cp: call path
+ * @no_call: a 'call' was not seen
+ */
+struct thread_stack_entry {
+ u64 ret_addr;
+ u64 timestamp;
+ u64 ref;
+ u64 branch_count;
+ struct call_path *cp;
+ bool no_call;
+};
+
+/**
+ * struct thread_stack - thread stack constructed from 'call' and 'return'
+ * branch samples.
+ * @stack: array that holds the stack
+ * @cnt: number of entries in the stack
+ * @sz: current maximum stack size
+ * @trace_nr: current trace number
+ * @branch_count: running branch count
+ * @kernel_start: kernel start address
+ * @last_time: last timestamp
+ * @crp: call/return processor
+ * @comm: current comm
+ */
+struct thread_stack {
+ struct thread_stack_entry *stack;
+ size_t cnt;
+ size_t sz;
+ u64 trace_nr;
+ u64 branch_count;
+ u64 kernel_start;
+ u64 last_time;
+ struct call_return_processor *crp;
+ struct comm *comm;
+};
+
+static int thread_stack__grow(struct thread_stack *ts)
+{
+ struct thread_stack_entry *new_stack;
+ size_t sz, new_sz;
+
+ new_sz = ts->sz + STACK_GROWTH;
+ sz = new_sz * sizeof(struct thread_stack_entry);
+
+ new_stack = realloc(ts->stack, sz);
+ if (!new_stack)
+ return -ENOMEM;
+
+ ts->stack = new_stack;
+ ts->sz = new_sz;
+
+ return 0;
+}
+
+static struct thread_stack *thread_stack__new(struct thread *thread,
+ struct call_return_processor *crp)
+{
+ struct thread_stack *ts;
+
+ ts = zalloc(sizeof(struct thread_stack));
+ if (!ts)
+ return NULL;
+
+ if (thread_stack__grow(ts)) {
+ free(ts);
+ return NULL;
+ }
+
+ if (thread->mg && thread->mg->machine)
+ ts->kernel_start = machine__kernel_start(thread->mg->machine);
+ else
+ ts->kernel_start = 1ULL << 63;
+ ts->crp = crp;
+
+ return ts;
+}
+
+static int thread_stack__push(struct thread_stack *ts, u64 ret_addr)
+{
+ int err = 0;
+
+ if (ts->cnt == ts->sz) {
+ err = thread_stack__grow(ts);
+ if (err) {
+ pr_warning("Out of memory: discarding thread stack\n");
+ ts->cnt = 0;
+ }
+ }
+
+ ts->stack[ts->cnt++].ret_addr = ret_addr;
+
+ return err;
+}
+
+static void thread_stack__pop(struct thread_stack *ts, u64 ret_addr)
+{
+ size_t i;
+
+ /*
+ * In some cases there may be functions which are not seen to return.
+ * For example when setjmp / longjmp has been used. Or the perf context
+ * switch in the kernel which doesn't stop and start tracing in exactly
+ * the same code path. When that happens the return address will be
+ * further down the stack. If the return address is not found at all,
+ * we assume the opposite (i.e. this is a return for a call that wasn't
+ * seen for some reason) and leave the stack alone.
+ */
+ for (i = ts->cnt; i; ) {
+ if (ts->stack[--i].ret_addr == ret_addr) {
+ ts->cnt = i;
+ return;
+ }
+ }
+}
+
+static bool thread_stack__in_kernel(struct thread_stack *ts)
+{
+ if (!ts->cnt)
+ return false;
+
+ return ts->stack[ts->cnt - 1].cp->in_kernel;
+}
+
+static int thread_stack__call_return(struct thread *thread,
+ struct thread_stack *ts, size_t idx,
+ u64 timestamp, u64 ref, bool no_return)
+{
+ struct call_return_processor *crp = ts->crp;
+ struct thread_stack_entry *tse;
+ struct call_return cr = {
+ .thread = thread,
+ .comm = ts->comm,
+ .db_id = 0,
+ };
+
+ tse = &ts->stack[idx];
+ cr.cp = tse->cp;
+ cr.call_time = tse->timestamp;
+ cr.return_time = timestamp;
+ cr.branch_count = ts->branch_count - tse->branch_count;
+ cr.call_ref = tse->ref;
+ cr.return_ref = ref;
+ if (tse->no_call)
+ cr.flags |= CALL_RETURN_NO_CALL;
+ if (no_return)
+ cr.flags |= CALL_RETURN_NO_RETURN;
+
+ return crp->process(&cr, crp->data);
+}
+
+static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
+{
+ struct call_return_processor *crp = ts->crp;
+ int err;
+
+ if (!crp) {
+ ts->cnt = 0;
+ return 0;
+ }
+
+ while (ts->cnt) {
+ err = thread_stack__call_return(thread, ts, --ts->cnt,
+ ts->last_time, 0, true);
+ if (err) {
+ pr_err("Error flushing thread stack!\n");
+ ts->cnt = 0;
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int thread_stack__flush(struct thread *thread)
+{
+ if (thread->ts)
+ return __thread_stack__flush(thread, thread->ts);
+
+ return 0;
+}
+
+int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
+ u64 to_ip, u16 insn_len, u64 trace_nr)
+{
+ if (!thread)
+ return -EINVAL;
+
+ if (!thread->ts) {
+ thread->ts = thread_stack__new(thread, NULL);
+ if (!thread->ts) {
+ pr_warning("Out of memory: no thread stack\n");
+ return -ENOMEM;
+ }
+ thread->ts->trace_nr = trace_nr;
+ }
+
+ /*
+ * When the trace is discontinuous, the trace_nr changes. In that case
+ * the stack might be completely invalid. Better to report nothing than
+ * to report something misleading, so flush the stack.
+ */
+ if (trace_nr != thread->ts->trace_nr) {
+ if (thread->ts->trace_nr)
+ __thread_stack__flush(thread, thread->ts);
+ thread->ts->trace_nr = trace_nr;
+ }
+
+ /* Stop here if thread_stack__process() is in use */
+ if (thread->ts->crp)
+ return 0;
+
+ if (flags & PERF_IP_FLAG_CALL) {
+ u64 ret_addr;
+
+ if (!to_ip)
+ return 0;
+ ret_addr = from_ip + insn_len;
+ if (ret_addr == to_ip)
+ return 0; /* Zero-length calls are excluded */
+ return thread_stack__push(thread->ts, ret_addr);
+ } else if (flags & PERF_IP_FLAG_RETURN) {
+ if (!from_ip)
+ return 0;
+ thread_stack__pop(thread->ts, to_ip);
+ }
+
+ return 0;
+}
+
+void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
+{
+ if (!thread || !thread->ts)
+ return;
+
+ if (trace_nr != thread->ts->trace_nr) {
+ if (thread->ts->trace_nr)
+ __thread_stack__flush(thread, thread->ts);
+ thread->ts->trace_nr = trace_nr;
+ }
+}
+
+void thread_stack__free(struct thread *thread)
+{
+ if (thread->ts) {
+ __thread_stack__flush(thread, thread->ts);
+ zfree(&thread->ts->stack);
+ zfree(&thread->ts);
+ }
+}
+
+static inline u64 callchain_context(u64 ip, u64 kernel_start)
+{
+ return ip < kernel_start ? PERF_CONTEXT_USER : PERF_CONTEXT_KERNEL;
+}
+
+void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
+ size_t sz, u64 ip, u64 kernel_start)
+{
+ u64 context = callchain_context(ip, kernel_start);
+ u64 last_context;
+ size_t i, j;
+
+ if (sz < 2) {
+ chain->nr = 0;
+ return;
+ }
+
+ chain->ips[0] = context;
+ chain->ips[1] = ip;
+
+ if (!thread || !thread->ts) {
+ chain->nr = 2;
+ return;
+ }
+
+ last_context = context;
+
+ for (i = 2, j = 1; i < sz && j <= thread->ts->cnt; i++, j++) {
+ ip = thread->ts->stack[thread->ts->cnt - j].ret_addr;
+ context = callchain_context(ip, kernel_start);
+ if (context != last_context) {
+ if (i >= sz - 1)
+ break;
+ chain->ips[i++] = context;
+ last_context = context;
+ }
+ chain->ips[i] = ip;
+ }
+
+ chain->nr = i;
+}
+
+struct call_return_processor *
+call_return_processor__new(int (*process)(struct call_return *cr, void *data),
+ void *data)
+{
+ struct call_return_processor *crp;
+
+ crp = zalloc(sizeof(struct call_return_processor));
+ if (!crp)
+ return NULL;
+ crp->cpr = call_path_root__new();
+ if (!crp->cpr)
+ goto out_free;
+ crp->process = process;
+ crp->data = data;
+ return crp;
+
+out_free:
+ free(crp);
+ return NULL;
+}
+
+void call_return_processor__free(struct call_return_processor *crp)
+{
+ if (crp) {
+ call_path_root__free(crp->cpr);
+ free(crp);
+ }
+}
+
+static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr,
+ u64 timestamp, u64 ref, struct call_path *cp,
+ bool no_call)
+{
+ struct thread_stack_entry *tse;
+ int err;
+
+ if (ts->cnt == ts->sz) {
+ err = thread_stack__grow(ts);
+ if (err)
+ return err;
+ }
+
+ tse = &ts->stack[ts->cnt++];
+ tse->ret_addr = ret_addr;
+ tse->timestamp = timestamp;
+ tse->ref = ref;
+ tse->branch_count = ts->branch_count;
+ tse->cp = cp;
+ tse->no_call = no_call;
+
+ return 0;
+}
+
+static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts,
+ u64 ret_addr, u64 timestamp, u64 ref,
+ struct symbol *sym)
+{
+ int err;
+
+ if (!ts->cnt)
+ return 1;
+
+ if (ts->cnt == 1) {
+ struct thread_stack_entry *tse = &ts->stack[0];
+
+ if (tse->cp->sym == sym)
+ return thread_stack__call_return(thread, ts, --ts->cnt,
+ timestamp, ref, false);
+ }
+
+ if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) {
+ return thread_stack__call_return(thread, ts, --ts->cnt,
+ timestamp, ref, false);
+ } else {
+ size_t i = ts->cnt - 1;
+
+ while (i--) {
+ if (ts->stack[i].ret_addr != ret_addr)
+ continue;
+ i += 1;
+ while (ts->cnt > i) {
+ err = thread_stack__call_return(thread, ts,
+ --ts->cnt,
+ timestamp, ref,
+ true);
+ if (err)
+ return err;
+ }
+ return thread_stack__call_return(thread, ts, --ts->cnt,
+ timestamp, ref, false);
+ }
+ }
+
+ return 1;
+}
+
+static int thread_stack__bottom(struct thread *thread, struct thread_stack *ts,
+ struct perf_sample *sample,
+ struct addr_location *from_al,
+ struct addr_location *to_al, u64 ref)
+{
+ struct call_path_root *cpr = ts->crp->cpr;
+ struct call_path *cp;
+ struct symbol *sym;
+ u64 ip;
+
+ if (sample->ip) {
+ ip = sample->ip;
+ sym = from_al->sym;
+ } else if (sample->addr) {
+ ip = sample->addr;
+ sym = to_al->sym;
+ } else {
+ return 0;
+ }
+
+ cp = call_path__findnew(cpr, &cpr->call_path, sym, ip,
+ ts->kernel_start);
+ if (!cp)
+ return -ENOMEM;
+
+ return thread_stack__push_cp(thread->ts, ip, sample->time, ref, cp,
+ true);
+}
+
+static int thread_stack__no_call_return(struct thread *thread,
+ struct thread_stack *ts,
+ struct perf_sample *sample,
+ struct addr_location *from_al,
+ struct addr_location *to_al, u64 ref)
+{
+ struct call_path_root *cpr = ts->crp->cpr;
+ struct call_path *cp, *parent;
+ u64 ks = ts->kernel_start;
+ int err;
+
+ if (sample->ip >= ks && sample->addr < ks) {
+ /* Return to userspace, so pop all kernel addresses */
+ while (thread_stack__in_kernel(ts)) {
+ err = thread_stack__call_return(thread, ts, --ts->cnt,
+ sample->time, ref,
+ true);
+ if (err)
+ return err;
+ }
+
+ /* If the stack is empty, push the userspace address */
+ if (!ts->cnt) {
+ cp = call_path__findnew(cpr, &cpr->call_path,
+ to_al->sym, sample->addr,
+ ts->kernel_start);
+ if (!cp)
+ return -ENOMEM;
+ return thread_stack__push_cp(ts, 0, sample->time, ref,
+ cp, true);
+ }
+ } else if (thread_stack__in_kernel(ts) && sample->ip < ks) {
+ /* Return to userspace, so pop all kernel addresses */
+ while (thread_stack__in_kernel(ts)) {
+ err = thread_stack__call_return(thread, ts, --ts->cnt,
+ sample->time, ref,
+ true);
+ if (err)
+ return err;
+ }
+ }
+
+ if (ts->cnt)
+ parent = ts->stack[ts->cnt - 1].cp;
+ else
+ parent = &cpr->call_path;
+
+ /* This 'return' had no 'call', so push and pop top of stack */
+ cp = call_path__findnew(cpr, parent, from_al->sym, sample->ip,
+ ts->kernel_start);
+ if (!cp)
+ return -ENOMEM;
+
+ err = thread_stack__push_cp(ts, sample->addr, sample->time, ref, cp,
+ true);
+ if (err)
+ return err;
+
+ return thread_stack__pop_cp(thread, ts, sample->addr, sample->time, ref,
+ to_al->sym);
+}
+
+static int thread_stack__trace_begin(struct thread *thread,
+ struct thread_stack *ts, u64 timestamp,
+ u64 ref)
+{
+ struct thread_stack_entry *tse;
+ int err;
+
+ if (!ts->cnt)
+ return 0;
+
+ /* Pop trace end */
+ tse = &ts->stack[ts->cnt - 1];
+ if (tse->cp->sym == NULL && tse->cp->ip == 0) {
+ err = thread_stack__call_return(thread, ts, --ts->cnt,
+ timestamp, ref, false);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int thread_stack__trace_end(struct thread_stack *ts,
+ struct perf_sample *sample, u64 ref)
+{
+ struct call_path_root *cpr = ts->crp->cpr;
+ struct call_path *cp;
+ u64 ret_addr;
+
+ /* No point having 'trace end' on the bottom of the stack */
+ if (!ts->cnt || (ts->cnt == 1 && ts->stack[0].ref == ref))
+ return 0;
+
+ cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, NULL, 0,
+ ts->kernel_start);
+ if (!cp)
+ return -ENOMEM;
+
+ ret_addr = sample->ip + sample->insn_len;
+
+ return thread_stack__push_cp(ts, ret_addr, sample->time, ref, cp,
+ false);
+}
+
+int thread_stack__process(struct thread *thread, struct comm *comm,
+ struct perf_sample *sample,
+ struct addr_location *from_al,
+ struct addr_location *to_al, u64 ref,
+ struct call_return_processor *crp)
+{
+ struct thread_stack *ts = thread->ts;
+ int err = 0;
+
+ if (ts) {
+ if (!ts->crp) {
+ /* Supersede thread_stack__event() */
+ thread_stack__free(thread);
+ thread->ts = thread_stack__new(thread, crp);
+ if (!thread->ts)
+ return -ENOMEM;
+ ts = thread->ts;
+ ts->comm = comm;
+ }
+ } else {
+ thread->ts = thread_stack__new(thread, crp);
+ if (!thread->ts)
+ return -ENOMEM;
+ ts = thread->ts;
+ ts->comm = comm;
+ }
+
+ /* Flush stack on exec */
+ if (ts->comm != comm && thread->pid_ == thread->tid) {
+ err = __thread_stack__flush(thread, ts);
+ if (err)
+ return err;
+ ts->comm = comm;
+ }
+
+ /* If the stack is empty, put the current symbol on the stack */
+ if (!ts->cnt) {
+ err = thread_stack__bottom(thread, ts, sample, from_al, to_al,
+ ref);
+ if (err)
+ return err;
+ }
+
+ ts->branch_count += 1;
+ ts->last_time = sample->time;
+
+ if (sample->flags & PERF_IP_FLAG_CALL) {
+ struct call_path_root *cpr = ts->crp->cpr;
+ struct call_path *cp;
+ u64 ret_addr;
+
+ if (!sample->ip || !sample->addr)
+ return 0;
+
+ ret_addr = sample->ip + sample->insn_len;
+ if (ret_addr == sample->addr)
+ return 0; /* Zero-length calls are excluded */
+
+ cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp,
+ to_al->sym, sample->addr,
+ ts->kernel_start);
+ if (!cp)
+ return -ENOMEM;
+ err = thread_stack__push_cp(ts, ret_addr, sample->time, ref,
+ cp, false);
+ } else if (sample->flags & PERF_IP_FLAG_RETURN) {
+ if (!sample->ip || !sample->addr)
+ return 0;
+
+ err = thread_stack__pop_cp(thread, ts, sample->addr,
+ sample->time, ref, from_al->sym);
+ if (err) {
+ if (err < 0)
+ return err;
+ err = thread_stack__no_call_return(thread, ts, sample,
+ from_al, to_al, ref);
+ }
+ } else if (sample->flags & PERF_IP_FLAG_TRACE_BEGIN) {
+ err = thread_stack__trace_begin(thread, ts, sample->time, ref);
+ } else if (sample->flags & PERF_IP_FLAG_TRACE_END) {
+ err = thread_stack__trace_end(ts, sample, ref);
+ }
+
+ return err;
+}
+
+size_t thread_stack__depth(struct thread *thread)
+{
+ if (!thread->ts)
+ return 0;
+ return thread->ts->cnt;
+}
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
new file mode 100644
index 000000000..f97c00a8c
--- /dev/null
+++ b/tools/perf/util/thread-stack.h
@@ -0,0 +1,102 @@
+/*
+ * thread-stack.h: Synthesize a thread's stack using call / return events
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_THREAD_STACK_H
+#define __PERF_THREAD_STACK_H
+
+#include <sys/types.h>
+
+#include <linux/types.h>
+
+struct thread;
+struct comm;
+struct ip_callchain;
+struct symbol;
+struct dso;
+struct comm;
+struct perf_sample;
+struct addr_location;
+struct call_path;
+
+/*
+ * Call/Return flags.
+ *
+ * CALL_RETURN_NO_CALL: 'return' but no matching 'call'
+ * CALL_RETURN_NO_RETURN: 'call' but no matching 'return'
+ */
+enum {
+ CALL_RETURN_NO_CALL = 1 << 0,
+ CALL_RETURN_NO_RETURN = 1 << 1,
+};
+
+/**
+ * struct call_return - paired call/return information.
+ * @thread: thread in which call/return occurred
+ * @comm: comm in which call/return occurred
+ * @cp: call path
+ * @call_time: timestamp of call (if known)
+ * @return_time: timestamp of return (if known)
+ * @branch_count: number of branches seen between call and return
+ * @call_ref: external reference to 'call' sample (e.g. db_id)
+ * @return_ref: external reference to 'return' sample (e.g. db_id)
+ * @db_id: id used for db-export
+ * @flags: Call/Return flags
+ */
+struct call_return {
+ struct thread *thread;
+ struct comm *comm;
+ struct call_path *cp;
+ u64 call_time;
+ u64 return_time;
+ u64 branch_count;
+ u64 call_ref;
+ u64 return_ref;
+ u64 db_id;
+ u32 flags;
+};
+
+/**
+ * struct call_return_processor - provides a call-back to consume call-return
+ * information.
+ * @cpr: call path root
+ * @process: call-back that accepts call/return information
+ * @data: anonymous data for call-back
+ */
+struct call_return_processor {
+ struct call_path_root *cpr;
+ int (*process)(struct call_return *cr, void *data);
+ void *data;
+};
+
+int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
+ u64 to_ip, u16 insn_len, u64 trace_nr);
+void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
+void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
+ size_t sz, u64 ip, u64 kernel_start);
+int thread_stack__flush(struct thread *thread);
+void thread_stack__free(struct thread *thread);
+size_t thread_stack__depth(struct thread *thread);
+
+struct call_return_processor *
+call_return_processor__new(int (*process)(struct call_return *cr, void *data),
+ void *data);
+void call_return_processor__free(struct call_return_processor *crp);
+int thread_stack__process(struct thread *thread, struct comm *comm,
+ struct perf_sample *sample,
+ struct addr_location *from_al,
+ struct addr_location *to_al, u64 ref,
+ struct call_return_processor *crp);
+
+#endif
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
new file mode 100644
index 000000000..2c146d0c2
--- /dev/null
+++ b/tools/perf/util/thread.c
@@ -0,0 +1,416 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../perf.h"
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include "session.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "util.h"
+#include "debug.h"
+#include "namespaces.h"
+#include "comm.h"
+#include "unwind.h"
+
+#include <api/fs/fs.h>
+
+int thread__init_map_groups(struct thread *thread, struct machine *machine)
+{
+ pid_t pid = thread->pid_;
+
+ if (pid == thread->tid || pid == -1) {
+ thread->mg = map_groups__new(machine);
+ } else {
+ struct thread *leader = __machine__findnew_thread(machine, pid, pid);
+ if (leader) {
+ thread->mg = map_groups__get(leader->mg);
+ thread__put(leader);
+ }
+ }
+
+ return thread->mg ? 0 : -1;
+}
+
+struct thread *thread__new(pid_t pid, pid_t tid)
+{
+ char *comm_str;
+ struct comm *comm;
+ struct thread *thread = zalloc(sizeof(*thread));
+
+ if (thread != NULL) {
+ thread->pid_ = pid;
+ thread->tid = tid;
+ thread->ppid = -1;
+ thread->cpu = -1;
+ INIT_LIST_HEAD(&thread->namespaces_list);
+ INIT_LIST_HEAD(&thread->comm_list);
+ init_rwsem(&thread->namespaces_lock);
+ init_rwsem(&thread->comm_lock);
+
+ comm_str = malloc(32);
+ if (!comm_str)
+ goto err_thread;
+
+ snprintf(comm_str, 32, ":%d", tid);
+ comm = comm__new(comm_str, 0, false);
+ free(comm_str);
+ if (!comm)
+ goto err_thread;
+
+ list_add(&comm->list, &thread->comm_list);
+ refcount_set(&thread->refcnt, 1);
+ RB_CLEAR_NODE(&thread->rb_node);
+ /* Thread holds first ref to nsdata. */
+ thread->nsinfo = nsinfo__new(pid);
+ }
+
+ return thread;
+
+err_thread:
+ free(thread);
+ return NULL;
+}
+
+void thread__delete(struct thread *thread)
+{
+ struct namespaces *namespaces, *tmp_namespaces;
+ struct comm *comm, *tmp_comm;
+
+ BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));
+
+ thread_stack__free(thread);
+
+ if (thread->mg) {
+ map_groups__put(thread->mg);
+ thread->mg = NULL;
+ }
+ down_write(&thread->namespaces_lock);
+ list_for_each_entry_safe(namespaces, tmp_namespaces,
+ &thread->namespaces_list, list) {
+ list_del(&namespaces->list);
+ namespaces__free(namespaces);
+ }
+ up_write(&thread->namespaces_lock);
+
+ down_write(&thread->comm_lock);
+ list_for_each_entry_safe(comm, tmp_comm, &thread->comm_list, list) {
+ list_del(&comm->list);
+ comm__free(comm);
+ }
+ up_write(&thread->comm_lock);
+
+ unwind__finish_access(thread);
+ nsinfo__zput(thread->nsinfo);
+
+ exit_rwsem(&thread->namespaces_lock);
+ exit_rwsem(&thread->comm_lock);
+ free(thread);
+}
+
+struct thread *thread__get(struct thread *thread)
+{
+ if (thread)
+ refcount_inc(&thread->refcnt);
+ return thread;
+}
+
+void thread__put(struct thread *thread)
+{
+ if (thread && refcount_dec_and_test(&thread->refcnt)) {
+ /*
+ * Remove it from the dead_threads list, as last reference
+ * is gone.
+ */
+ list_del_init(&thread->node);
+ thread__delete(thread);
+ }
+}
+
+static struct namespaces *__thread__namespaces(const struct thread *thread)
+{
+ if (list_empty(&thread->namespaces_list))
+ return NULL;
+
+ return list_first_entry(&thread->namespaces_list, struct namespaces, list);
+}
+
+struct namespaces *thread__namespaces(const struct thread *thread)
+{
+ struct namespaces *ns;
+
+ down_read((struct rw_semaphore *)&thread->namespaces_lock);
+ ns = __thread__namespaces(thread);
+ up_read((struct rw_semaphore *)&thread->namespaces_lock);
+
+ return ns;
+}
+
+static int __thread__set_namespaces(struct thread *thread, u64 timestamp,
+ struct namespaces_event *event)
+{
+ struct namespaces *new, *curr = __thread__namespaces(thread);
+
+ new = namespaces__new(event);
+ if (!new)
+ return -ENOMEM;
+
+ list_add(&new->list, &thread->namespaces_list);
+
+ if (timestamp && curr) {
+ /*
+ * setns syscall must have changed few or all the namespaces
+ * of this thread. Update end time for the namespaces
+ * previously used.
+ */
+ curr = list_next_entry(new, list);
+ curr->end_time = timestamp;
+ }
+
+ return 0;
+}
+
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+ struct namespaces_event *event)
+{
+ int ret;
+
+ down_write(&thread->namespaces_lock);
+ ret = __thread__set_namespaces(thread, timestamp, event);
+ up_write(&thread->namespaces_lock);
+ return ret;
+}
+
+struct comm *thread__comm(const struct thread *thread)
+{
+ if (list_empty(&thread->comm_list))
+ return NULL;
+
+ return list_first_entry(&thread->comm_list, struct comm, list);
+}
+
+struct comm *thread__exec_comm(const struct thread *thread)
+{
+ struct comm *comm, *last = NULL, *second_last = NULL;
+
+ list_for_each_entry(comm, &thread->comm_list, list) {
+ if (comm->exec)
+ return comm;
+ second_last = last;
+ last = comm;
+ }
+
+ /*
+ * 'last' with no start time might be the parent's comm of a synthesized
+ * thread (created by processing a synthesized fork event). For a main
+ * thread, that is very probably wrong. Prefer a later comm to avoid
+ * that case.
+ */
+ if (second_last && !last->start && thread->pid_ == thread->tid)
+ return second_last;
+
+ return last;
+}
+
+static int ____thread__set_comm(struct thread *thread, const char *str,
+ u64 timestamp, bool exec)
+{
+ struct comm *new, *curr = thread__comm(thread);
+
+ /* Override the default :tid entry */
+ if (!thread->comm_set) {
+ int err = comm__override(curr, str, timestamp, exec);
+ if (err)
+ return err;
+ } else {
+ new = comm__new(str, timestamp, exec);
+ if (!new)
+ return -ENOMEM;
+ list_add(&new->list, &thread->comm_list);
+
+ if (exec)
+ unwind__flush_access(thread);
+ }
+
+ thread->comm_set = true;
+
+ return 0;
+}
+
+int __thread__set_comm(struct thread *thread, const char *str, u64 timestamp,
+ bool exec)
+{
+ int ret;
+
+ down_write(&thread->comm_lock);
+ ret = ____thread__set_comm(thread, str, timestamp, exec);
+ up_write(&thread->comm_lock);
+ return ret;
+}
+
+int thread__set_comm_from_proc(struct thread *thread)
+{
+ char path[64];
+ char *comm = NULL;
+ size_t sz;
+ int err = -1;
+
+ if (!(snprintf(path, sizeof(path), "%d/task/%d/comm",
+ thread->pid_, thread->tid) >= (int)sizeof(path)) &&
+ procfs__read_str(path, &comm, &sz) == 0) {
+ comm[sz - 1] = '\0';
+ err = thread__set_comm(thread, comm, 0);
+ }
+
+ return err;
+}
+
+static const char *__thread__comm_str(const struct thread *thread)
+{
+ const struct comm *comm = thread__comm(thread);
+
+ if (!comm)
+ return NULL;
+
+ return comm__str(comm);
+}
+
+const char *thread__comm_str(const struct thread *thread)
+{
+ const char *str;
+
+ down_read((struct rw_semaphore *)&thread->comm_lock);
+ str = __thread__comm_str(thread);
+ up_read((struct rw_semaphore *)&thread->comm_lock);
+
+ return str;
+}
+
+/* CHECKME: it should probably better return the max comm len from its comm list */
+int thread__comm_len(struct thread *thread)
+{
+ if (!thread->comm_len) {
+ const char *comm = thread__comm_str(thread);
+ if (!comm)
+ return 0;
+ thread->comm_len = strlen(comm);
+ }
+
+ return thread->comm_len;
+}
+
+size_t thread__fprintf(struct thread *thread, FILE *fp)
+{
+ return fprintf(fp, "Thread %d %s\n", thread->tid, thread__comm_str(thread)) +
+ map_groups__fprintf(thread->mg, fp);
+}
+
+int thread__insert_map(struct thread *thread, struct map *map)
+{
+ int ret;
+
+ ret = unwind__prepare_access(thread, map, NULL);
+ if (ret)
+ return ret;
+
+ map_groups__fixup_overlappings(thread->mg, map, stderr);
+ map_groups__insert(thread->mg, map);
+
+ return 0;
+}
+
+static int __thread__prepare_access(struct thread *thread)
+{
+ bool initialized = false;
+ int err = 0;
+ struct maps *maps = &thread->mg->maps;
+ struct map *map;
+
+ down_read(&maps->lock);
+
+ for (map = maps__first(maps); map; map = map__next(map)) {
+ err = unwind__prepare_access(thread, map, &initialized);
+ if (err || initialized)
+ break;
+ }
+
+ up_read(&maps->lock);
+
+ return err;
+}
+
+static int thread__prepare_access(struct thread *thread)
+{
+ int err = 0;
+
+ if (symbol_conf.use_callchain)
+ err = __thread__prepare_access(thread);
+
+ return err;
+}
+
+static int thread__clone_map_groups(struct thread *thread,
+ struct thread *parent)
+{
+ /* This is new thread, we share map groups for process. */
+ if (thread->pid_ == parent->pid_)
+ return thread__prepare_access(thread);
+
+ if (thread->mg == parent->mg) {
+ pr_debug("broken map groups on thread %d/%d parent %d/%d\n",
+ thread->pid_, thread->tid, parent->pid_, parent->tid);
+ return 0;
+ }
+
+ /* But this one is new process, copy maps. */
+ if (map_groups__clone(thread, parent->mg) < 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
+{
+ if (parent->comm_set) {
+ const char *comm = thread__comm_str(parent);
+ int err;
+ if (!comm)
+ return -ENOMEM;
+ err = thread__set_comm(thread, comm, timestamp);
+ if (err)
+ return err;
+ }
+
+ thread->ppid = parent->tid;
+ return thread__clone_map_groups(thread, parent);
+}
+
+void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
+ struct addr_location *al)
+{
+ size_t i;
+ const u8 cpumodes[] = {
+ PERF_RECORD_MISC_USER,
+ PERF_RECORD_MISC_KERNEL,
+ PERF_RECORD_MISC_GUEST_USER,
+ PERF_RECORD_MISC_GUEST_KERNEL
+ };
+
+ for (i = 0; i < ARRAY_SIZE(cpumodes); i++) {
+ thread__find_symbol(thread, cpumodes[i], addr, al);
+ if (al->map)
+ break;
+ }
+}
+
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread)
+{
+ if (thread->pid_ == thread->tid)
+ return thread__get(thread);
+
+ if (thread->pid_ == -1)
+ return NULL;
+
+ return machine__find_thread(machine, thread->pid_, thread->pid_);
+}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
new file mode 100644
index 000000000..4e2c3cbde
--- /dev/null
+++ b/tools/perf/util/thread.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_THREAD_H
+#define __PERF_THREAD_H
+
+#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include <linux/list.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "symbol.h"
+#include <strlist.h>
+#include <intlist.h>
+#include "rwsem.h"
+
+struct thread_stack;
+struct unwind_libunwind_ops;
+
+struct thread {
+ union {
+ struct rb_node rb_node;
+ struct list_head node;
+ };
+ struct map_groups *mg;
+ pid_t pid_; /* Not all tools update this */
+ pid_t tid;
+ pid_t ppid;
+ int cpu;
+ refcount_t refcnt;
+ bool comm_set;
+ int comm_len;
+ bool dead; /* if set thread has exited */
+ struct list_head namespaces_list;
+ struct rw_semaphore namespaces_lock;
+ struct list_head comm_list;
+ struct rw_semaphore comm_lock;
+ u64 db_id;
+
+ void *priv;
+ struct thread_stack *ts;
+ struct nsinfo *nsinfo;
+#ifdef HAVE_LIBUNWIND_SUPPORT
+ void *addr_space;
+ struct unwind_libunwind_ops *unwind_libunwind_ops;
+#endif
+};
+
+struct machine;
+struct namespaces;
+struct comm;
+
+struct thread *thread__new(pid_t pid, pid_t tid);
+int thread__init_map_groups(struct thread *thread, struct machine *machine);
+void thread__delete(struct thread *thread);
+
+struct thread *thread__get(struct thread *thread);
+void thread__put(struct thread *thread);
+
+static inline void __thread__zput(struct thread **thread)
+{
+ thread__put(*thread);
+ *thread = NULL;
+}
+
+#define thread__zput(thread) __thread__zput(&thread)
+
+static inline void thread__exited(struct thread *thread)
+{
+ thread->dead = true;
+}
+
+struct namespaces *thread__namespaces(const struct thread *thread);
+int thread__set_namespaces(struct thread *thread, u64 timestamp,
+ struct namespaces_event *event);
+
+int __thread__set_comm(struct thread *thread, const char *comm, u64 timestamp,
+ bool exec);
+static inline int thread__set_comm(struct thread *thread, const char *comm,
+ u64 timestamp)
+{
+ return __thread__set_comm(thread, comm, timestamp, false);
+}
+
+int thread__set_comm_from_proc(struct thread *thread);
+
+int thread__comm_len(struct thread *thread);
+struct comm *thread__comm(const struct thread *thread);
+struct comm *thread__exec_comm(const struct thread *thread);
+const char *thread__comm_str(const struct thread *thread);
+int thread__insert_map(struct thread *thread, struct map *map);
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
+size_t thread__fprintf(struct thread *thread, FILE *fp);
+
+struct thread *thread__main_thread(struct machine *machine, struct thread *thread);
+
+struct map *thread__find_map(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al);
+struct map *thread__find_map_fb(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al);
+
+struct symbol *thread__find_symbol(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al);
+struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode,
+ u64 addr, struct addr_location *al);
+
+void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
+ struct addr_location *al);
+
+static inline void *thread__priv(struct thread *thread)
+{
+ return thread->priv;
+}
+
+static inline void thread__set_priv(struct thread *thread, void *p)
+{
+ thread->priv = p;
+}
+
+static inline bool thread__is_filtered(struct thread *thread)
+{
+ if (symbol_conf.comm_list &&
+ !strlist__has_entry(symbol_conf.comm_list, thread__comm_str(thread))) {
+ return true;
+ }
+
+ if (symbol_conf.pid_list &&
+ !intlist__has_entry(symbol_conf.pid_list, thread->pid_)) {
+ return true;
+ }
+
+ if (symbol_conf.tid_list &&
+ !intlist__has_entry(symbol_conf.tid_list, thread->tid)) {
+ return true;
+ }
+
+ return false;
+}
+
+#endif /* __PERF_THREAD_H */
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
new file mode 100644
index 000000000..5d467d8ae
--- /dev/null
+++ b/tools/perf/util/thread_map.c
@@ -0,0 +1,489 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <dirent.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include "string2.h"
+#include "strlist.h"
+#include <string.h>
+#include <api/fs/fs.h>
+#include "asm/bug.h"
+#include "thread_map.h"
+#include "util.h"
+#include "debug.h"
+#include "event.h"
+
+/* Skip "." and ".." directories */
+static int filter(const struct dirent *dir)
+{
+ if (dir->d_name[0] == '.')
+ return 0;
+ else
+ return 1;
+}
+
+static void thread_map__reset(struct thread_map *map, int start, int nr)
+{
+ size_t size = (nr - start) * sizeof(map->map[0]);
+
+ memset(&map->map[start], 0, size);
+ map->err_thread = -1;
+}
+
+static struct thread_map *thread_map__realloc(struct thread_map *map, int nr)
+{
+ size_t size = sizeof(*map) + sizeof(map->map[0]) * nr;
+ int start = map ? map->nr : 0;
+
+ map = realloc(map, size);
+ /*
+ * We only realloc to add more items, let's reset new items.
+ */
+ if (map)
+ thread_map__reset(map, start, nr);
+
+ return map;
+}
+
+#define thread_map__alloc(__nr) thread_map__realloc(NULL, __nr)
+
+struct thread_map *thread_map__new_by_pid(pid_t pid)
+{
+ struct thread_map *threads;
+ char name[256];
+ int items;
+ struct dirent **namelist = NULL;
+ int i;
+
+ sprintf(name, "/proc/%d/task", pid);
+ items = scandir(name, &namelist, filter, NULL);
+ if (items <= 0)
+ return NULL;
+
+ threads = thread_map__alloc(items);
+ if (threads != NULL) {
+ for (i = 0; i < items; i++)
+ thread_map__set_pid(threads, i, atoi(namelist[i]->d_name));
+ threads->nr = items;
+ refcount_set(&threads->refcnt, 1);
+ }
+
+ for (i=0; i<items; i++)
+ zfree(&namelist[i]);
+ free(namelist);
+
+ return threads;
+}
+
+struct thread_map *thread_map__new_by_tid(pid_t tid)
+{
+ struct thread_map *threads = thread_map__alloc(1);
+
+ if (threads != NULL) {
+ thread_map__set_pid(threads, 0, tid);
+ threads->nr = 1;
+ refcount_set(&threads->refcnt, 1);
+ }
+
+ return threads;
+}
+
+static struct thread_map *__thread_map__new_all_cpus(uid_t uid)
+{
+ DIR *proc;
+ int max_threads = 32, items, i;
+ char path[NAME_MAX + 1 + 6];
+ struct dirent *dirent, **namelist = NULL;
+ struct thread_map *threads = thread_map__alloc(max_threads);
+
+ if (threads == NULL)
+ goto out;
+
+ proc = opendir("/proc");
+ if (proc == NULL)
+ goto out_free_threads;
+
+ threads->nr = 0;
+ refcount_set(&threads->refcnt, 1);
+
+ while ((dirent = readdir(proc)) != NULL) {
+ char *end;
+ bool grow = false;
+ pid_t pid = strtol(dirent->d_name, &end, 10);
+
+ if (*end) /* only interested in proper numerical dirents */
+ continue;
+
+ snprintf(path, sizeof(path), "/proc/%s", dirent->d_name);
+
+ if (uid != UINT_MAX) {
+ struct stat st;
+
+ if (stat(path, &st) != 0 || st.st_uid != uid)
+ continue;
+ }
+
+ snprintf(path, sizeof(path), "/proc/%d/task", pid);
+ items = scandir(path, &namelist, filter, NULL);
+ if (items <= 0)
+ goto out_free_closedir;
+
+ while (threads->nr + items >= max_threads) {
+ max_threads *= 2;
+ grow = true;
+ }
+
+ if (grow) {
+ struct thread_map *tmp;
+
+ tmp = thread_map__realloc(threads, max_threads);
+ if (tmp == NULL)
+ goto out_free_namelist;
+
+ threads = tmp;
+ }
+
+ for (i = 0; i < items; i++) {
+ thread_map__set_pid(threads, threads->nr + i,
+ atoi(namelist[i]->d_name));
+ }
+
+ for (i = 0; i < items; i++)
+ zfree(&namelist[i]);
+ free(namelist);
+
+ threads->nr += items;
+ }
+
+out_closedir:
+ closedir(proc);
+out:
+ return threads;
+
+out_free_threads:
+ free(threads);
+ return NULL;
+
+out_free_namelist:
+ for (i = 0; i < items; i++)
+ zfree(&namelist[i]);
+ free(namelist);
+
+out_free_closedir:
+ zfree(&threads);
+ goto out_closedir;
+}
+
+struct thread_map *thread_map__new_all_cpus(void)
+{
+ return __thread_map__new_all_cpus(UINT_MAX);
+}
+
+struct thread_map *thread_map__new_by_uid(uid_t uid)
+{
+ return __thread_map__new_all_cpus(uid);
+}
+
+struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid)
+{
+ if (pid != -1)
+ return thread_map__new_by_pid(pid);
+
+ if (tid == -1 && uid != UINT_MAX)
+ return thread_map__new_by_uid(uid);
+
+ return thread_map__new_by_tid(tid);
+}
+
+static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
+{
+ struct thread_map *threads = NULL, *nt;
+ char name[256];
+ int items, total_tasks = 0;
+ struct dirent **namelist = NULL;
+ int i, j = 0;
+ pid_t pid, prev_pid = INT_MAX;
+ char *end_ptr;
+ struct str_node *pos;
+ struct strlist_config slist_config = { .dont_dupstr = true, };
+ struct strlist *slist = strlist__new(pid_str, &slist_config);
+
+ if (!slist)
+ return NULL;
+
+ strlist__for_each_entry(pos, slist) {
+ pid = strtol(pos->s, &end_ptr, 10);
+
+ if (pid == INT_MIN || pid == INT_MAX ||
+ (*end_ptr != '\0' && *end_ptr != ','))
+ goto out_free_threads;
+
+ if (pid == prev_pid)
+ continue;
+
+ sprintf(name, "/proc/%d/task", pid);
+ items = scandir(name, &namelist, filter, NULL);
+ if (items <= 0)
+ goto out_free_threads;
+
+ total_tasks += items;
+ nt = thread_map__realloc(threads, total_tasks);
+ if (nt == NULL)
+ goto out_free_namelist;
+
+ threads = nt;
+
+ for (i = 0; i < items; i++) {
+ thread_map__set_pid(threads, j++, atoi(namelist[i]->d_name));
+ zfree(&namelist[i]);
+ }
+ threads->nr = total_tasks;
+ free(namelist);
+ }
+
+out:
+ strlist__delete(slist);
+ if (threads)
+ refcount_set(&threads->refcnt, 1);
+ return threads;
+
+out_free_namelist:
+ for (i = 0; i < items; i++)
+ zfree(&namelist[i]);
+ free(namelist);
+
+out_free_threads:
+ zfree(&threads);
+ goto out;
+}
+
+struct thread_map *thread_map__new_dummy(void)
+{
+ struct thread_map *threads = thread_map__alloc(1);
+
+ if (threads != NULL) {
+ thread_map__set_pid(threads, 0, -1);
+ threads->nr = 1;
+ refcount_set(&threads->refcnt, 1);
+ }
+ return threads;
+}
+
+struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
+{
+ struct thread_map *threads = NULL, *nt;
+ int ntasks = 0;
+ pid_t tid, prev_tid = INT_MAX;
+ char *end_ptr;
+ struct str_node *pos;
+ struct strlist_config slist_config = { .dont_dupstr = true, };
+ struct strlist *slist;
+
+ /* perf-stat expects threads to be generated even if tid not given */
+ if (!tid_str)
+ return thread_map__new_dummy();
+
+ slist = strlist__new(tid_str, &slist_config);
+ if (!slist)
+ return NULL;
+
+ strlist__for_each_entry(pos, slist) {
+ tid = strtol(pos->s, &end_ptr, 10);
+
+ if (tid == INT_MIN || tid == INT_MAX ||
+ (*end_ptr != '\0' && *end_ptr != ','))
+ goto out_free_threads;
+
+ if (tid == prev_tid)
+ continue;
+
+ ntasks++;
+ nt = thread_map__realloc(threads, ntasks);
+
+ if (nt == NULL)
+ goto out_free_threads;
+
+ threads = nt;
+ thread_map__set_pid(threads, ntasks - 1, tid);
+ threads->nr = ntasks;
+ }
+out:
+ if (threads)
+ refcount_set(&threads->refcnt, 1);
+ return threads;
+
+out_free_threads:
+ zfree(&threads);
+ strlist__delete(slist);
+ goto out;
+}
+
+struct thread_map *thread_map__new_str(const char *pid, const char *tid,
+ uid_t uid, bool all_threads)
+{
+ if (pid)
+ return thread_map__new_by_pid_str(pid);
+
+ if (!tid && uid != UINT_MAX)
+ return thread_map__new_by_uid(uid);
+
+ if (all_threads)
+ return thread_map__new_all_cpus();
+
+ return thread_map__new_by_tid_str(tid);
+}
+
+static void thread_map__delete(struct thread_map *threads)
+{
+ if (threads) {
+ int i;
+
+ WARN_ONCE(refcount_read(&threads->refcnt) != 0,
+ "thread map refcnt unbalanced\n");
+ for (i = 0; i < threads->nr; i++)
+ free(thread_map__comm(threads, i));
+ free(threads);
+ }
+}
+
+struct thread_map *thread_map__get(struct thread_map *map)
+{
+ if (map)
+ refcount_inc(&map->refcnt);
+ return map;
+}
+
+void thread_map__put(struct thread_map *map)
+{
+ if (map && refcount_dec_and_test(&map->refcnt))
+ thread_map__delete(map);
+}
+
+size_t thread_map__fprintf(struct thread_map *threads, FILE *fp)
+{
+ int i;
+ size_t printed = fprintf(fp, "%d thread%s: ",
+ threads->nr, threads->nr > 1 ? "s" : "");
+ for (i = 0; i < threads->nr; ++i)
+ printed += fprintf(fp, "%s%d", i ? ", " : "", thread_map__pid(threads, i));
+
+ return printed + fprintf(fp, "\n");
+}
+
+static int get_comm(char **comm, pid_t pid)
+{
+ char *path;
+ size_t size;
+ int err;
+
+ if (asprintf(&path, "%s/%d/comm", procfs__mountpoint(), pid) == -1)
+ return -ENOMEM;
+
+ err = filename__read_str(path, comm, &size);
+ if (!err) {
+ /*
+ * We're reading 16 bytes, while filename__read_str
+ * allocates data per BUFSIZ bytes, so we can safely
+ * mark the end of the string.
+ */
+ (*comm)[size] = 0;
+ rtrim(*comm);
+ }
+
+ free(path);
+ return err;
+}
+
+static void comm_init(struct thread_map *map, int i)
+{
+ pid_t pid = thread_map__pid(map, i);
+ char *comm = NULL;
+
+ /* dummy pid comm initialization */
+ if (pid == -1) {
+ map->map[i].comm = strdup("dummy");
+ return;
+ }
+
+ /*
+ * The comm name is like extra bonus ;-),
+ * so just warn if we fail for any reason.
+ */
+ if (get_comm(&comm, pid))
+ pr_warning("Couldn't resolve comm name for pid %d\n", pid);
+
+ map->map[i].comm = comm;
+}
+
+void thread_map__read_comms(struct thread_map *threads)
+{
+ int i;
+
+ for (i = 0; i < threads->nr; ++i)
+ comm_init(threads, i);
+}
+
+static void thread_map__copy_event(struct thread_map *threads,
+ struct thread_map_event *event)
+{
+ unsigned i;
+
+ threads->nr = (int) event->nr;
+
+ for (i = 0; i < event->nr; i++) {
+ thread_map__set_pid(threads, i, (pid_t) event->entries[i].pid);
+ threads->map[i].comm = strndup(event->entries[i].comm, 16);
+ }
+
+ refcount_set(&threads->refcnt, 1);
+}
+
+struct thread_map *thread_map__new_event(struct thread_map_event *event)
+{
+ struct thread_map *threads;
+
+ threads = thread_map__alloc(event->nr);
+ if (threads)
+ thread_map__copy_event(threads, event);
+
+ return threads;
+}
+
+bool thread_map__has(struct thread_map *threads, pid_t pid)
+{
+ int i;
+
+ for (i = 0; i < threads->nr; ++i) {
+ if (threads->map[i].pid == pid)
+ return true;
+ }
+
+ return false;
+}
+
+int thread_map__remove(struct thread_map *threads, int idx)
+{
+ int i;
+
+ if (threads->nr < 1)
+ return -EINVAL;
+
+ if (idx >= threads->nr)
+ return -EINVAL;
+
+ /*
+ * Free the 'idx' item and shift the rest up.
+ */
+ free(threads->map[idx].comm);
+
+ for (i = idx; i < threads->nr - 1; i++)
+ threads->map[i] = threads->map[i + 1];
+
+ threads->nr--;
+ return 0;
+}
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
new file mode 100644
index 000000000..2f689c90a
--- /dev/null
+++ b/tools/perf/util/thread_map.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_THREAD_MAP_H
+#define __PERF_THREAD_MAP_H
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <linux/refcount.h>
+
+struct thread_map_data {
+ pid_t pid;
+ char *comm;
+};
+
+struct thread_map {
+ refcount_t refcnt;
+ int nr;
+ int err_thread;
+ struct thread_map_data map[];
+};
+
+struct thread_map_event;
+
+struct thread_map *thread_map__new_dummy(void);
+struct thread_map *thread_map__new_by_pid(pid_t pid);
+struct thread_map *thread_map__new_by_tid(pid_t tid);
+struct thread_map *thread_map__new_by_uid(uid_t uid);
+struct thread_map *thread_map__new_all_cpus(void);
+struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid);
+struct thread_map *thread_map__new_event(struct thread_map_event *event);
+
+struct thread_map *thread_map__get(struct thread_map *map);
+void thread_map__put(struct thread_map *map);
+
+struct thread_map *thread_map__new_str(const char *pid,
+ const char *tid, uid_t uid, bool all_threads);
+
+struct thread_map *thread_map__new_by_tid_str(const char *tid_str);
+
+size_t thread_map__fprintf(struct thread_map *threads, FILE *fp);
+
+static inline int thread_map__nr(struct thread_map *threads)
+{
+ return threads ? threads->nr : 1;
+}
+
+static inline pid_t thread_map__pid(struct thread_map *map, int thread)
+{
+ return map->map[thread].pid;
+}
+
+static inline void
+thread_map__set_pid(struct thread_map *map, int thread, pid_t pid)
+{
+ map->map[thread].pid = pid;
+}
+
+static inline char *thread_map__comm(struct thread_map *map, int thread)
+{
+ return map->map[thread].comm;
+}
+
+void thread_map__read_comms(struct thread_map *threads);
+bool thread_map__has(struct thread_map *threads, pid_t pid);
+int thread_map__remove(struct thread_map *threads, int idx);
+#endif /* __PERF_THREAD_MAP_H */
diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c
new file mode 100644
index 000000000..6193b4605
--- /dev/null
+++ b/tools/perf/util/time-utils.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <linux/time64.h>
+#include <time.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <math.h>
+
+#include "perf.h"
+#include "debug.h"
+#include "time-utils.h"
+
+int parse_nsec_time(const char *str, u64 *ptime)
+{
+ u64 time_sec, time_nsec;
+ char *end;
+
+ time_sec = strtoul(str, &end, 10);
+ if (*end != '.' && *end != '\0')
+ return -1;
+
+ if (*end == '.') {
+ int i;
+ char nsec_buf[10];
+
+ if (strlen(++end) > 9)
+ return -1;
+
+ strncpy(nsec_buf, end, 9);
+ nsec_buf[9] = '\0';
+
+ /* make it nsec precision */
+ for (i = strlen(nsec_buf); i < 9; i++)
+ nsec_buf[i] = '0';
+
+ time_nsec = strtoul(nsec_buf, &end, 10);
+ if (*end != '\0')
+ return -1;
+ } else
+ time_nsec = 0;
+
+ *ptime = time_sec * NSEC_PER_SEC + time_nsec;
+ return 0;
+}
+
+static int parse_timestr_sec_nsec(struct perf_time_interval *ptime,
+ char *start_str, char *end_str)
+{
+ if (start_str && (*start_str != '\0') &&
+ (parse_nsec_time(start_str, &ptime->start) != 0)) {
+ return -1;
+ }
+
+ if (end_str && (*end_str != '\0') &&
+ (parse_nsec_time(end_str, &ptime->end) != 0)) {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int split_start_end(char **start, char **end, const char *ostr, char ch)
+{
+ char *start_str, *end_str;
+ char *d, *str;
+
+ if (ostr == NULL || *ostr == '\0')
+ return 0;
+
+ /* copy original string because we need to modify it */
+ str = strdup(ostr);
+ if (str == NULL)
+ return -ENOMEM;
+
+ start_str = str;
+ d = strchr(start_str, ch);
+ if (d) {
+ *d = '\0';
+ ++d;
+ }
+ end_str = d;
+
+ *start = start_str;
+ *end = end_str;
+
+ return 0;
+}
+
+int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr)
+{
+ char *start_str = NULL, *end_str;
+ int rc;
+
+ rc = split_start_end(&start_str, &end_str, ostr, ',');
+ if (rc || !start_str)
+ return rc;
+
+ ptime->start = 0;
+ ptime->end = 0;
+
+ rc = parse_timestr_sec_nsec(ptime, start_str, end_str);
+
+ free(start_str);
+
+ /* make sure end time is after start time if it was given */
+ if (rc == 0 && ptime->end && ptime->end < ptime->start)
+ return -EINVAL;
+
+ pr_debug("start time %" PRIu64 ", ", ptime->start);
+ pr_debug("end time %" PRIu64 "\n", ptime->end);
+
+ return rc;
+}
+
+static int parse_percent(double *pcnt, char *str)
+{
+ char *c, *endptr;
+ double d;
+
+ c = strchr(str, '%');
+ if (c)
+ *c = '\0';
+ else
+ return -1;
+
+ d = strtod(str, &endptr);
+ if (endptr != str + strlen(str))
+ return -1;
+
+ *pcnt = d / 100.0;
+ return 0;
+}
+
+static int percent_slash_split(char *str, struct perf_time_interval *ptime,
+ u64 start, u64 end)
+{
+ char *p, *end_str;
+ double pcnt, start_pcnt, end_pcnt;
+ u64 total = end - start;
+ int i;
+
+ /*
+ * Example:
+ * 10%/2: select the second 10% slice and the third 10% slice
+ */
+
+ /* We can modify this string since the original one is copied */
+ p = strchr(str, '/');
+ if (!p)
+ return -1;
+
+ *p = '\0';
+ if (parse_percent(&pcnt, str) < 0)
+ return -1;
+
+ p++;
+ i = (int)strtol(p, &end_str, 10);
+ if (*end_str)
+ return -1;
+
+ if (pcnt <= 0.0)
+ return -1;
+
+ start_pcnt = pcnt * (i - 1);
+ end_pcnt = pcnt * i;
+
+ if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
+ end_pcnt < 0.0 || end_pcnt > 1.0) {
+ return -1;
+ }
+
+ ptime->start = start + round(start_pcnt * total);
+ ptime->end = start + round(end_pcnt * total);
+
+ return 0;
+}
+
+static int percent_dash_split(char *str, struct perf_time_interval *ptime,
+ u64 start, u64 end)
+{
+ char *start_str = NULL, *end_str;
+ double start_pcnt, end_pcnt;
+ u64 total = end - start;
+ int ret;
+
+ /*
+ * Example: 0%-10%
+ */
+
+ ret = split_start_end(&start_str, &end_str, str, '-');
+ if (ret || !start_str)
+ return ret;
+
+ if ((parse_percent(&start_pcnt, start_str) != 0) ||
+ (parse_percent(&end_pcnt, end_str) != 0)) {
+ free(start_str);
+ return -1;
+ }
+
+ free(start_str);
+
+ if (start_pcnt < 0.0 || start_pcnt > 1.0 ||
+ end_pcnt < 0.0 || end_pcnt > 1.0 ||
+ start_pcnt > end_pcnt) {
+ return -1;
+ }
+
+ ptime->start = start + round(start_pcnt * total);
+ ptime->end = start + round(end_pcnt * total);
+
+ return 0;
+}
+
+typedef int (*time_pecent_split)(char *, struct perf_time_interval *,
+ u64 start, u64 end);
+
+static int percent_comma_split(struct perf_time_interval *ptime_buf, int num,
+ const char *ostr, u64 start, u64 end,
+ time_pecent_split func)
+{
+ char *str, *p1, *p2;
+ int len, ret, i = 0;
+
+ str = strdup(ostr);
+ if (str == NULL)
+ return -ENOMEM;
+
+ len = strlen(str);
+ p1 = str;
+
+ while (p1 < str + len) {
+ if (i >= num) {
+ free(str);
+ return -1;
+ }
+
+ p2 = strchr(p1, ',');
+ if (p2)
+ *p2 = '\0';
+
+ ret = (func)(p1, &ptime_buf[i], start, end);
+ if (ret < 0) {
+ free(str);
+ return -1;
+ }
+
+ pr_debug("start time %d: %" PRIu64 ", ", i, ptime_buf[i].start);
+ pr_debug("end time %d: %" PRIu64 "\n", i, ptime_buf[i].end);
+
+ i++;
+
+ if (p2)
+ p1 = p2 + 1;
+ else
+ break;
+ }
+
+ free(str);
+ return i;
+}
+
+static int one_percent_convert(struct perf_time_interval *ptime_buf,
+ const char *ostr, u64 start, u64 end, char *c)
+{
+ char *str;
+ int len = strlen(ostr), ret;
+
+ /*
+ * c points to '%'.
+ * '%' should be the last character
+ */
+ if (ostr + len - 1 != c)
+ return -1;
+
+ /*
+ * Construct a string like "xx%/1"
+ */
+ str = malloc(len + 3);
+ if (str == NULL)
+ return -ENOMEM;
+
+ memcpy(str, ostr, len);
+ strcpy(str + len, "/1");
+
+ ret = percent_slash_split(str, ptime_buf, start, end);
+ if (ret == 0)
+ ret = 1;
+
+ free(str);
+ return ret;
+}
+
+int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
+ const char *ostr, u64 start, u64 end)
+{
+ char *c;
+
+ /*
+ * ostr example:
+ * 10%/2,10%/3: select the second 10% slice and the third 10% slice
+ * 0%-10%,30%-40%: multiple time range
+ * 50%: just one percent
+ */
+
+ memset(ptime_buf, 0, sizeof(*ptime_buf) * num);
+
+ c = strchr(ostr, '/');
+ if (c) {
+ return percent_comma_split(ptime_buf, num, ostr, start,
+ end, percent_slash_split);
+ }
+
+ c = strchr(ostr, '-');
+ if (c) {
+ return percent_comma_split(ptime_buf, num, ostr, start,
+ end, percent_dash_split);
+ }
+
+ c = strchr(ostr, '%');
+ if (c)
+ return one_percent_convert(ptime_buf, ostr, start, end, c);
+
+ return -1;
+}
+
+struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size)
+{
+ const char *p1, *p2;
+ int i = 1;
+ struct perf_time_interval *ptime;
+
+ /*
+ * At least allocate one time range.
+ */
+ if (!ostr)
+ goto alloc;
+
+ p1 = ostr;
+ while (p1 < ostr + strlen(ostr)) {
+ p2 = strchr(p1, ',');
+ if (!p2)
+ break;
+
+ p1 = p2 + 1;
+ i++;
+ }
+
+alloc:
+ *size = i;
+ ptime = calloc(i, sizeof(*ptime));
+ return ptime;
+}
+
+bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp)
+{
+ /* if time is not set don't drop sample */
+ if (timestamp == 0)
+ return false;
+
+ /* otherwise compare sample time to time window */
+ if ((ptime->start && timestamp < ptime->start) ||
+ (ptime->end && timestamp > ptime->end)) {
+ return true;
+ }
+
+ return false;
+}
+
+bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
+ int num, u64 timestamp)
+{
+ struct perf_time_interval *ptime;
+ int i;
+
+ if ((timestamp == 0) || (num == 0))
+ return false;
+
+ if (num == 1)
+ return perf_time__skip_sample(&ptime_buf[0], timestamp);
+
+ /*
+ * start/end of multiple time ranges must be valid.
+ */
+ for (i = 0; i < num; i++) {
+ ptime = &ptime_buf[i];
+
+ if (timestamp >= ptime->start &&
+ ((timestamp < ptime->end && i < num - 1) ||
+ (timestamp <= ptime->end && i == num - 1))) {
+ break;
+ }
+ }
+
+ return (i == num) ? true : false;
+}
+
+int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz)
+{
+ u64 sec = timestamp / NSEC_PER_SEC;
+ u64 usec = (timestamp % NSEC_PER_SEC) / NSEC_PER_USEC;
+
+ return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec);
+}
+
+int fetch_current_timestamp(char *buf, size_t sz)
+{
+ struct timeval tv;
+ struct tm tm;
+ char dt[32];
+
+ if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
+ return -1;
+
+ if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
+ return -1;
+
+ scnprintf(buf, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
+
+ return 0;
+}
diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h
new file mode 100644
index 000000000..70b177d2b
--- /dev/null
+++ b/tools/perf/util/time-utils.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TIME_UTILS_H_
+#define _TIME_UTILS_H_
+
+#include <stddef.h>
+#include <linux/types.h>
+
+struct perf_time_interval {
+ u64 start, end;
+};
+
+int parse_nsec_time(const char *str, u64 *ptime);
+
+int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr);
+
+int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num,
+ const char *ostr, u64 start, u64 end);
+
+struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size);
+
+bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp);
+
+bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf,
+ int num, u64 timestamp);
+
+int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz);
+
+int fetch_current_timestamp(char *buf, size_t sz);
+
+#endif
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
new file mode 100644
index 000000000..183c91453
--- /dev/null
+++ b/tools/perf/util/tool.h
@@ -0,0 +1,83 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TOOL_H
+#define __PERF_TOOL_H
+
+#include <stdbool.h>
+
+#include <linux/types.h>
+
+struct perf_session;
+union perf_event;
+struct perf_evlist;
+struct perf_evsel;
+struct perf_sample;
+struct perf_tool;
+struct machine;
+struct ordered_events;
+
+typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel, struct machine *machine);
+
+typedef int (*event_op)(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine);
+
+typedef int (*event_attr_op)(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_evlist **pevlist);
+
+typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event,
+ struct perf_session *session);
+
+typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
+ struct ordered_events *oe);
+
+typedef s64 (*event_op3)(struct perf_tool *tool, union perf_event *event,
+ struct perf_session *session);
+
+enum show_feature_header {
+ SHOW_FEAT_NO_HEADER = 0,
+ SHOW_FEAT_HEADER,
+ SHOW_FEAT_HEADER_FULL_INFO,
+};
+
+struct perf_tool {
+ event_sample sample,
+ read;
+ event_op mmap,
+ mmap2,
+ comm,
+ namespaces,
+ fork,
+ exit,
+ lost,
+ lost_samples,
+ aux,
+ itrace_start,
+ context_switch,
+ throttle,
+ unthrottle;
+ event_attr_op attr;
+ event_attr_op event_update;
+ event_op2 tracing_data;
+ event_oe finished_round;
+ event_op2 build_id,
+ id_index,
+ auxtrace_info,
+ auxtrace_error,
+ time_conv,
+ thread_map,
+ cpu_map,
+ stat_config,
+ stat,
+ stat_round,
+ feature;
+ event_op3 auxtrace;
+ bool ordered_events;
+ bool ordering_requires_timestamps;
+ bool namespace_events;
+ bool no_warn;
+ enum show_feature_header show_feat_hdr;
+};
+
+#endif /* __PERF_TOOL_H */
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
new file mode 100644
index 000000000..8e517def9
--- /dev/null
+++ b/tools/perf/util/top.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2011, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+ *
+ * Refactored from builtin-top.c, see that files for further copyright notes.
+ *
+ * Released under the GPL v2. (and only v2, not any later version)
+ */
+
+#include "cpumap.h"
+#include "event.h"
+#include "evlist.h"
+#include "evsel.h"
+#include "parse-events.h"
+#include "symbol.h"
+#include "top.h"
+#include <inttypes.h>
+
+#define SNPRINTF(buf, size, fmt, args...) \
+({ \
+ size_t r = snprintf(buf, size, fmt, ## args); \
+ r > size ? size : r; \
+})
+
+size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
+{
+ float samples_per_sec;
+ float ksamples_per_sec;
+ float esamples_percent;
+ struct record_opts *opts = &top->record_opts;
+ struct target *target = &opts->target;
+ size_t ret = 0;
+
+ if (top->samples) {
+ samples_per_sec = top->samples / top->delay_secs;
+ ksamples_per_sec = top->kernel_samples / top->delay_secs;
+ esamples_percent = (100.0 * top->exact_samples) / top->samples;
+ } else {
+ samples_per_sec = ksamples_per_sec = esamples_percent = 0.0;
+ }
+
+ if (!perf_guest) {
+ float ksamples_percent = 0.0;
+
+ if (samples_per_sec)
+ ksamples_percent = (100.0 * ksamples_per_sec) /
+ samples_per_sec;
+ ret = SNPRINTF(bf, size,
+ " PerfTop:%8.0f irqs/sec kernel:%4.1f%%"
+ " exact: %4.1f%% [", samples_per_sec,
+ ksamples_percent, esamples_percent);
+ } else {
+ float us_samples_per_sec = top->us_samples / top->delay_secs;
+ float guest_kernel_samples_per_sec = top->guest_kernel_samples / top->delay_secs;
+ float guest_us_samples_per_sec = top->guest_us_samples / top->delay_secs;
+
+ ret = SNPRINTF(bf, size,
+ " PerfTop:%8.0f irqs/sec kernel:%4.1f%% us:%4.1f%%"
+ " guest kernel:%4.1f%% guest us:%4.1f%%"
+ " exact: %4.1f%% [", samples_per_sec,
+ 100.0 - (100.0 * ((samples_per_sec - ksamples_per_sec) /
+ samples_per_sec)),
+ 100.0 - (100.0 * ((samples_per_sec - us_samples_per_sec) /
+ samples_per_sec)),
+ 100.0 - (100.0 * ((samples_per_sec -
+ guest_kernel_samples_per_sec) /
+ samples_per_sec)),
+ 100.0 - (100.0 * ((samples_per_sec -
+ guest_us_samples_per_sec) /
+ samples_per_sec)),
+ esamples_percent);
+ }
+
+ if (top->evlist->nr_entries == 1) {
+ struct perf_evsel *first = perf_evlist__first(top->evlist);
+ ret += SNPRINTF(bf + ret, size - ret, "%" PRIu64 "%s ",
+ (uint64_t)first->attr.sample_period,
+ opts->freq ? "Hz" : "");
+ }
+
+ ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel));
+
+ ret += SNPRINTF(bf + ret, size - ret, "], ");
+
+ if (target->pid)
+ ret += SNPRINTF(bf + ret, size - ret, " (target_pid: %s",
+ target->pid);
+ else if (target->tid)
+ ret += SNPRINTF(bf + ret, size - ret, " (target_tid: %s",
+ target->tid);
+ else if (target->uid_str != NULL)
+ ret += SNPRINTF(bf + ret, size - ret, " (uid: %s",
+ target->uid_str);
+ else
+ ret += SNPRINTF(bf + ret, size - ret, " (all");
+
+ if (target->cpu_list)
+ ret += SNPRINTF(bf + ret, size - ret, ", CPU%s: %s)",
+ top->evlist->cpus->nr > 1 ? "s" : "",
+ target->cpu_list);
+ else {
+ if (target->tid)
+ ret += SNPRINTF(bf + ret, size - ret, ")");
+ else
+ ret += SNPRINTF(bf + ret, size - ret, ", %d CPU%s)",
+ top->evlist->cpus->nr,
+ top->evlist->cpus->nr > 1 ? "s" : "");
+ }
+
+ return ret;
+}
+
+void perf_top__reset_sample_counters(struct perf_top *top)
+{
+ top->samples = top->us_samples = top->kernel_samples =
+ top->exact_samples = top->guest_kernel_samples =
+ top->guest_us_samples = 0;
+}
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
new file mode 100644
index 000000000..9add1f72c
--- /dev/null
+++ b/tools/perf/util/top.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TOP_H
+#define __PERF_TOP_H 1
+
+#include "tool.h"
+#include "annotate.h"
+#include <linux/types.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ioctl.h>
+
+struct perf_evlist;
+struct perf_evsel;
+struct perf_session;
+
+struct perf_top {
+ struct perf_tool tool;
+ struct perf_evlist *evlist;
+ struct record_opts record_opts;
+ struct annotation_options annotation_opts;
+ /*
+ * Symbols will be added here in perf_event__process_sample and will
+ * get out after decayed.
+ */
+ u64 samples;
+ u64 kernel_samples, us_samples;
+ u64 exact_samples;
+ u64 guest_us_samples, guest_kernel_samples;
+ int print_entries, count_filter, delay_secs;
+ int max_stack;
+ bool hide_kernel_symbols, hide_user_symbols, zero;
+ bool use_tui, use_stdio;
+ bool vmlinux_warned;
+ bool dump_symtab;
+ struct hist_entry *sym_filter_entry;
+ struct perf_evsel *sym_evsel;
+ struct perf_session *session;
+ struct winsize winsize;
+ int realtime_prio;
+ const char *sym_filter;
+ float min_percent;
+ unsigned int nr_threads_synthesize;
+};
+
+#define CONSOLE_CLEAR ""
+
+size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size);
+void perf_top__reset_sample_counters(struct perf_top *top);
+#endif /* __PERF_TOP_H */
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
new file mode 100644
index 000000000..8ad8e7551
--- /dev/null
+++ b/tools/perf/util/trace-event-info.c
@@ -0,0 +1,618 @@
+/*
+ * Copyright (C) 2008,2009, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include "util.h"
+#include <dirent.h>
+#include <mntent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <linux/list.h>
+#include <linux/kernel.h>
+
+#include "../perf.h"
+#include "trace-event.h"
+#include <api/fs/tracing_path.h>
+#include "evsel.h"
+#include "debug.h"
+
+#define VERSION "0.6"
+
+static int output_fd;
+
+
+int bigendian(void)
+{
+ unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0};
+ unsigned int *ptr;
+
+ ptr = (unsigned int *)(void *)str;
+ return *ptr == 0x01020304;
+}
+
+/* unfortunately, you can not stat debugfs or proc files for size */
+static int record_file(const char *file, ssize_t hdr_sz)
+{
+ unsigned long long size = 0;
+ char buf[BUFSIZ], *sizep;
+ off_t hdr_pos = lseek(output_fd, 0, SEEK_CUR);
+ int r, fd;
+ int err = -EIO;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ pr_debug("Can't read '%s'", file);
+ return -errno;
+ }
+
+ /* put in zeros for file size, then fill true size later */
+ if (hdr_sz) {
+ if (write(output_fd, &size, hdr_sz) != hdr_sz)
+ goto out;
+ }
+
+ do {
+ r = read(fd, buf, BUFSIZ);
+ if (r > 0) {
+ size += r;
+ if (write(output_fd, buf, r) != r)
+ goto out;
+ }
+ } while (r > 0);
+
+ /* ugh, handle big-endian hdr_size == 4 */
+ sizep = (char*)&size;
+ if (bigendian())
+ sizep += sizeof(u64) - hdr_sz;
+
+ if (hdr_sz && pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0) {
+ pr_debug("writing file size failed\n");
+ goto out;
+ }
+
+ err = 0;
+out:
+ close(fd);
+ return err;
+}
+
+static int record_header_files(void)
+{
+ char *path = get_events_file("header_page");
+ struct stat st;
+ int err = -EIO;
+
+ if (!path) {
+ pr_debug("can't get tracing/events/header_page");
+ return -ENOMEM;
+ }
+
+ if (stat(path, &st) < 0) {
+ pr_debug("can't read '%s'", path);
+ goto out;
+ }
+
+ if (write(output_fd, "header_page", 12) != 12) {
+ pr_debug("can't write header_page\n");
+ goto out;
+ }
+
+ if (record_file(path, 8) < 0) {
+ pr_debug("can't record header_page file\n");
+ goto out;
+ }
+
+ put_events_file(path);
+
+ path = get_events_file("header_event");
+ if (!path) {
+ pr_debug("can't get tracing/events/header_event");
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (stat(path, &st) < 0) {
+ pr_debug("can't read '%s'", path);
+ goto out;
+ }
+
+ if (write(output_fd, "header_event", 13) != 13) {
+ pr_debug("can't write header_event\n");
+ goto out;
+ }
+
+ if (record_file(path, 8) < 0) {
+ pr_debug("can't record header_event file\n");
+ goto out;
+ }
+
+ err = 0;
+out:
+ put_events_file(path);
+ return err;
+}
+
+static bool name_in_tp_list(char *sys, struct tracepoint_path *tps)
+{
+ while (tps) {
+ if (!strcmp(sys, tps->name))
+ return true;
+ tps = tps->next;
+ }
+
+ return false;
+}
+
+#define for_each_event(dir, dent, tps) \
+ while ((dent = readdir(dir))) \
+ if (dent->d_type == DT_DIR && \
+ (strcmp(dent->d_name, ".")) && \
+ (strcmp(dent->d_name, ".."))) \
+
+static int copy_event_system(const char *sys, struct tracepoint_path *tps)
+{
+ struct dirent *dent;
+ struct stat st;
+ char *format;
+ DIR *dir;
+ int count = 0;
+ int ret;
+ int err;
+
+ dir = opendir(sys);
+ if (!dir) {
+ pr_debug("can't read directory '%s'", sys);
+ return -errno;
+ }
+
+ for_each_event(dir, dent, tps) {
+ if (!name_in_tp_list(dent->d_name, tps))
+ continue;
+
+ if (asprintf(&format, "%s/%s/format", sys, dent->d_name) < 0) {
+ err = -ENOMEM;
+ goto out;
+ }
+ ret = stat(format, &st);
+ free(format);
+ if (ret < 0)
+ continue;
+ count++;
+ }
+
+ if (write(output_fd, &count, 4) != 4) {
+ err = -EIO;
+ pr_debug("can't write count\n");
+ goto out;
+ }
+
+ rewinddir(dir);
+ for_each_event(dir, dent, tps) {
+ if (!name_in_tp_list(dent->d_name, tps))
+ continue;
+
+ if (asprintf(&format, "%s/%s/format", sys, dent->d_name) < 0) {
+ err = -ENOMEM;
+ goto out;
+ }
+ ret = stat(format, &st);
+
+ if (ret >= 0) {
+ err = record_file(format, 8);
+ if (err) {
+ free(format);
+ goto out;
+ }
+ }
+ free(format);
+ }
+ err = 0;
+out:
+ closedir(dir);
+ return err;
+}
+
+static int record_ftrace_files(struct tracepoint_path *tps)
+{
+ char *path;
+ int ret;
+
+ path = get_events_file("ftrace");
+ if (!path) {
+ pr_debug("can't get tracing/events/ftrace");
+ return -ENOMEM;
+ }
+
+ ret = copy_event_system(path, tps);
+
+ put_tracing_file(path);
+
+ return ret;
+}
+
+static bool system_in_tp_list(char *sys, struct tracepoint_path *tps)
+{
+ while (tps) {
+ if (!strcmp(sys, tps->system))
+ return true;
+ tps = tps->next;
+ }
+
+ return false;
+}
+
+static int record_event_files(struct tracepoint_path *tps)
+{
+ struct dirent *dent;
+ struct stat st;
+ char *path;
+ char *sys;
+ DIR *dir;
+ int count = 0;
+ int ret;
+ int err;
+
+ path = get_tracing_file("events");
+ if (!path) {
+ pr_debug("can't get tracing/events");
+ return -ENOMEM;
+ }
+
+ dir = opendir(path);
+ if (!dir) {
+ err = -errno;
+ pr_debug("can't read directory '%s'", path);
+ goto out;
+ }
+
+ for_each_event(dir, dent, tps) {
+ if (strcmp(dent->d_name, "ftrace") == 0 ||
+ !system_in_tp_list(dent->d_name, tps))
+ continue;
+
+ count++;
+ }
+
+ if (write(output_fd, &count, 4) != 4) {
+ err = -EIO;
+ pr_debug("can't write count\n");
+ goto out;
+ }
+
+ rewinddir(dir);
+ for_each_event(dir, dent, tps) {
+ if (strcmp(dent->d_name, "ftrace") == 0 ||
+ !system_in_tp_list(dent->d_name, tps))
+ continue;
+
+ if (asprintf(&sys, "%s/%s", path, dent->d_name) < 0) {
+ err = -ENOMEM;
+ goto out;
+ }
+ ret = stat(sys, &st);
+ if (ret >= 0) {
+ ssize_t size = strlen(dent->d_name) + 1;
+
+ if (write(output_fd, dent->d_name, size) != size ||
+ copy_event_system(sys, tps) < 0) {
+ err = -EIO;
+ free(sys);
+ goto out;
+ }
+ }
+ free(sys);
+ }
+ err = 0;
+out:
+ closedir(dir);
+ put_tracing_file(path);
+
+ return err;
+}
+
+static int record_proc_kallsyms(void)
+{
+ unsigned long long size = 0;
+ /*
+ * Just to keep older perf.data file parsers happy, record a zero
+ * sized kallsyms file, i.e. do the same thing that was done when
+ * /proc/kallsyms (or something specified via --kallsyms, in a
+ * different path) couldn't be read.
+ */
+ return write(output_fd, &size, 4) != 4 ? -EIO : 0;
+}
+
+static int record_ftrace_printk(void)
+{
+ unsigned int size;
+ char *path;
+ struct stat st;
+ int ret, err = 0;
+
+ path = get_tracing_file("printk_formats");
+ if (!path) {
+ pr_debug("can't get tracing/printk_formats");
+ return -ENOMEM;
+ }
+
+ ret = stat(path, &st);
+ if (ret < 0) {
+ /* not found */
+ size = 0;
+ if (write(output_fd, &size, 4) != 4)
+ err = -EIO;
+ goto out;
+ }
+ err = record_file(path, 4);
+
+out:
+ put_tracing_file(path);
+ return err;
+}
+
+static int record_saved_cmdline(void)
+{
+ unsigned long long size;
+ char *path;
+ struct stat st;
+ int ret, err = 0;
+
+ path = get_tracing_file("saved_cmdlines");
+ if (!path) {
+ pr_debug("can't get tracing/saved_cmdline");
+ return -ENOMEM;
+ }
+
+ ret = stat(path, &st);
+ if (ret < 0) {
+ /* not found */
+ size = 0;
+ if (write(output_fd, &size, 8) != 8)
+ err = -EIO;
+ goto out;
+ }
+ err = record_file(path, 8);
+
+out:
+ put_tracing_file(path);
+ return err;
+}
+
+static void
+put_tracepoints_path(struct tracepoint_path *tps)
+{
+ while (tps) {
+ struct tracepoint_path *t = tps;
+
+ tps = tps->next;
+ zfree(&t->name);
+ zfree(&t->system);
+ free(t);
+ }
+}
+
+static struct tracepoint_path *
+get_tracepoints_path(struct list_head *pattrs)
+{
+ struct tracepoint_path path, *ppath = &path;
+ struct perf_evsel *pos;
+ int nr_tracepoints = 0;
+
+ list_for_each_entry(pos, pattrs, node) {
+ if (pos->attr.type != PERF_TYPE_TRACEPOINT)
+ continue;
+ ++nr_tracepoints;
+
+ if (pos->name) {
+ ppath->next = tracepoint_name_to_path(pos->name);
+ if (ppath->next)
+ goto next;
+
+ if (strchr(pos->name, ':') == NULL)
+ goto try_id;
+
+ goto error;
+ }
+
+try_id:
+ ppath->next = tracepoint_id_to_path(pos->attr.config);
+ if (!ppath->next) {
+error:
+ pr_debug("No memory to alloc tracepoints list\n");
+ put_tracepoints_path(&path);
+ return NULL;
+ }
+next:
+ ppath = ppath->next;
+ }
+
+ return nr_tracepoints > 0 ? path.next : NULL;
+}
+
+bool have_tracepoints(struct list_head *pattrs)
+{
+ struct perf_evsel *pos;
+
+ list_for_each_entry(pos, pattrs, node)
+ if (pos->attr.type == PERF_TYPE_TRACEPOINT)
+ return true;
+
+ return false;
+}
+
+static int tracing_data_header(void)
+{
+ char buf[20];
+ ssize_t size;
+
+ /* just guessing this is someone's birthday.. ;) */
+ buf[0] = 23;
+ buf[1] = 8;
+ buf[2] = 68;
+ memcpy(buf + 3, "tracing", 7);
+
+ if (write(output_fd, buf, 10) != 10)
+ return -1;
+
+ size = strlen(VERSION) + 1;
+ if (write(output_fd, VERSION, size) != size)
+ return -1;
+
+ /* save endian */
+ if (bigendian())
+ buf[0] = 1;
+ else
+ buf[0] = 0;
+
+ if (write(output_fd, buf, 1) != 1)
+ return -1;
+
+ /* save size of long */
+ buf[0] = sizeof(long);
+ if (write(output_fd, buf, 1) != 1)
+ return -1;
+
+ /* save page_size */
+ if (write(output_fd, &page_size, 4) != 4)
+ return -1;
+
+ return 0;
+}
+
+struct tracing_data *tracing_data_get(struct list_head *pattrs,
+ int fd, bool temp)
+{
+ struct tracepoint_path *tps;
+ struct tracing_data *tdata;
+ int err;
+
+ output_fd = fd;
+
+ tps = get_tracepoints_path(pattrs);
+ if (!tps)
+ return NULL;
+
+ tdata = malloc(sizeof(*tdata));
+ if (!tdata)
+ return NULL;
+
+ tdata->temp = temp;
+ tdata->size = 0;
+
+ if (temp) {
+ int temp_fd;
+
+ snprintf(tdata->temp_file, sizeof(tdata->temp_file),
+ "/tmp/perf-XXXXXX");
+ if (!mkstemp(tdata->temp_file)) {
+ pr_debug("Can't make temp file");
+ free(tdata);
+ return NULL;
+ }
+
+ temp_fd = open(tdata->temp_file, O_RDWR);
+ if (temp_fd < 0) {
+ pr_debug("Can't read '%s'", tdata->temp_file);
+ free(tdata);
+ return NULL;
+ }
+
+ /*
+ * Set the temp file the default output, so all the
+ * tracing data are stored into it.
+ */
+ output_fd = temp_fd;
+ }
+
+ err = tracing_data_header();
+ if (err)
+ goto out;
+ err = record_header_files();
+ if (err)
+ goto out;
+ err = record_ftrace_files(tps);
+ if (err)
+ goto out;
+ err = record_event_files(tps);
+ if (err)
+ goto out;
+ err = record_proc_kallsyms();
+ if (err)
+ goto out;
+ err = record_ftrace_printk();
+ if (err)
+ goto out;
+ err = record_saved_cmdline();
+
+out:
+ /*
+ * All tracing data are stored by now, we can restore
+ * the default output file in case we used temp file.
+ */
+ if (temp) {
+ tdata->size = lseek(output_fd, 0, SEEK_CUR);
+ close(output_fd);
+ output_fd = fd;
+ }
+
+ if (err)
+ zfree(&tdata);
+
+ put_tracepoints_path(tps);
+ return tdata;
+}
+
+int tracing_data_put(struct tracing_data *tdata)
+{
+ int err = 0;
+
+ if (tdata->temp) {
+ err = record_file(tdata->temp_file, 0);
+ unlink(tdata->temp_file);
+ }
+
+ free(tdata);
+ return err;
+}
+
+int read_tracing_data(int fd, struct list_head *pattrs)
+{
+ int err;
+ struct tracing_data *tdata;
+
+ /*
+ * We work over the real file, so we can write data
+ * directly, no temp file is needed.
+ */
+ tdata = tracing_data_get(pattrs, fd, false);
+ if (!tdata)
+ return -ENOMEM;
+
+ err = tracing_data_put(tdata);
+ return err;
+}
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
new file mode 100644
index 000000000..b15a9bf18
--- /dev/null
+++ b/tools/perf/util/trace-event-parse.c
@@ -0,0 +1,257 @@
+/*
+ * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "../perf.h"
+#include "debug.h"
+#include "trace-event.h"
+
+#include "sane_ctype.h"
+
+static int get_common_field(struct scripting_context *context,
+ int *offset, int *size, const char *type)
+{
+ struct tep_handle *pevent = context->pevent;
+ struct event_format *event;
+ struct format_field *field;
+
+ if (!*size) {
+ if (!pevent->events)
+ return 0;
+
+ event = pevent->events[0];
+ field = tep_find_common_field(event, type);
+ if (!field)
+ return 0;
+ *offset = field->offset;
+ *size = field->size;
+ }
+
+ return tep_read_number(pevent, context->event_data + *offset, *size);
+}
+
+int common_lock_depth(struct scripting_context *context)
+{
+ static int offset;
+ static int size;
+ int ret;
+
+ ret = get_common_field(context, &size, &offset,
+ "common_lock_depth");
+ if (ret < 0)
+ return -1;
+
+ return ret;
+}
+
+int common_flags(struct scripting_context *context)
+{
+ static int offset;
+ static int size;
+ int ret;
+
+ ret = get_common_field(context, &size, &offset,
+ "common_flags");
+ if (ret < 0)
+ return -1;
+
+ return ret;
+}
+
+int common_pc(struct scripting_context *context)
+{
+ static int offset;
+ static int size;
+ int ret;
+
+ ret = get_common_field(context, &size, &offset,
+ "common_preempt_count");
+ if (ret < 0)
+ return -1;
+
+ return ret;
+}
+
+unsigned long long
+raw_field_value(struct event_format *event, const char *name, void *data)
+{
+ struct format_field *field;
+ unsigned long long val;
+
+ field = tep_find_any_field(event, name);
+ if (!field)
+ return 0ULL;
+
+ tep_read_number_field(field, data, &val);
+
+ return val;
+}
+
+unsigned long long read_size(struct event_format *event, void *ptr, int size)
+{
+ return tep_read_number(event->pevent, ptr, size);
+}
+
+void event_format__fprintf(struct event_format *event,
+ int cpu, void *data, int size, FILE *fp)
+{
+ struct tep_record record;
+ struct trace_seq s;
+
+ memset(&record, 0, sizeof(record));
+ record.cpu = cpu;
+ record.size = size;
+ record.data = data;
+
+ trace_seq_init(&s);
+ tep_event_info(&s, event, &record);
+ trace_seq_do_fprintf(&s, fp);
+ trace_seq_destroy(&s);
+}
+
+void event_format__print(struct event_format *event,
+ int cpu, void *data, int size)
+{
+ return event_format__fprintf(event, cpu, data, size, stdout);
+}
+
+void parse_ftrace_printk(struct tep_handle *pevent,
+ char *file, unsigned int size __maybe_unused)
+{
+ unsigned long long addr;
+ char *printk;
+ char *line;
+ char *next = NULL;
+ char *addr_str;
+ char *fmt = NULL;
+
+ line = strtok_r(file, "\n", &next);
+ while (line) {
+ addr_str = strtok_r(line, ":", &fmt);
+ if (!addr_str) {
+ pr_warning("printk format with empty entry");
+ break;
+ }
+ addr = strtoull(addr_str, NULL, 16);
+ /* fmt still has a space, skip it */
+ printk = strdup(fmt+1);
+ line = strtok_r(NULL, "\n", &next);
+ tep_register_print_string(pevent, printk, addr);
+ free(printk);
+ }
+}
+
+void parse_saved_cmdline(struct tep_handle *pevent,
+ char *file, unsigned int size __maybe_unused)
+{
+ char comm[17]; /* Max comm length in the kernel is 16. */
+ char *line;
+ char *next = NULL;
+ int pid;
+
+ line = strtok_r(file, "\n", &next);
+ while (line) {
+ if (sscanf(line, "%d %16s", &pid, comm) == 2)
+ tep_register_comm(pevent, comm, pid);
+ line = strtok_r(NULL, "\n", &next);
+ }
+}
+
+int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size)
+{
+ return tep_parse_event(pevent, buf, size, "ftrace");
+}
+
+int parse_event_file(struct tep_handle *pevent,
+ char *buf, unsigned long size, char *sys)
+{
+ return tep_parse_event(pevent, buf, size, sys);
+}
+
+struct event_format *trace_find_next_event(struct tep_handle *pevent,
+ struct event_format *event)
+{
+ static int idx;
+
+ if (!pevent || !pevent->events)
+ return NULL;
+
+ if (!event) {
+ idx = 0;
+ return pevent->events[0];
+ }
+
+ if (idx < pevent->nr_events && event == pevent->events[idx]) {
+ idx++;
+ if (idx == pevent->nr_events)
+ return NULL;
+ return pevent->events[idx];
+ }
+
+ for (idx = 1; idx < pevent->nr_events; idx++) {
+ if (event == pevent->events[idx - 1])
+ return pevent->events[idx];
+ }
+ return NULL;
+}
+
+struct flag {
+ const char *name;
+ unsigned long long value;
+};
+
+static const struct flag flags[] = {
+ { "HI_SOFTIRQ", 0 },
+ { "TIMER_SOFTIRQ", 1 },
+ { "NET_TX_SOFTIRQ", 2 },
+ { "NET_RX_SOFTIRQ", 3 },
+ { "BLOCK_SOFTIRQ", 4 },
+ { "IRQ_POLL_SOFTIRQ", 5 },
+ { "TASKLET_SOFTIRQ", 6 },
+ { "SCHED_SOFTIRQ", 7 },
+ { "HRTIMER_SOFTIRQ", 8 },
+ { "RCU_SOFTIRQ", 9 },
+
+ { "HRTIMER_NORESTART", 0 },
+ { "HRTIMER_RESTART", 1 },
+};
+
+unsigned long long eval_flag(const char *flag)
+{
+ int i;
+
+ /*
+ * Some flags in the format files do not get converted.
+ * If the flag is not numeric, see if it is something that
+ * we already know about.
+ */
+ if (isdigit(flag[0]))
+ return strtoull(flag, NULL, 0);
+
+ for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++)
+ if (strcmp(flags[i].name, flag) == 0)
+ return flags[i].value;
+
+ return 0;
+}
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
new file mode 100644
index 000000000..12324325e
--- /dev/null
+++ b/tools/perf/util/trace-event-read.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright (C) 2009, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <dirent.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "../perf.h"
+#include "util.h"
+#include "trace-event.h"
+#include "debug.h"
+
+static int input_fd;
+
+static ssize_t trace_data_size;
+static bool repipe;
+
+static int __do_read(int fd, void *buf, int size)
+{
+ int rsize = size;
+
+ while (size) {
+ int ret = read(fd, buf, size);
+
+ if (ret <= 0)
+ return -1;
+
+ if (repipe) {
+ int retw = write(STDOUT_FILENO, buf, ret);
+
+ if (retw <= 0 || retw != ret) {
+ pr_debug("repiping input file");
+ return -1;
+ }
+ }
+
+ size -= ret;
+ buf += ret;
+ }
+
+ return rsize;
+}
+
+static int do_read(void *data, int size)
+{
+ int r;
+
+ r = __do_read(input_fd, data, size);
+ if (r <= 0) {
+ pr_debug("reading input file (size expected=%d received=%d)",
+ size, r);
+ return -1;
+ }
+
+ trace_data_size += r;
+
+ return r;
+}
+
+/* If it fails, the next read will report it */
+static void skip(int size)
+{
+ char buf[BUFSIZ];
+ int r;
+
+ while (size) {
+ r = size > BUFSIZ ? BUFSIZ : size;
+ do_read(buf, r);
+ size -= r;
+ };
+}
+
+static unsigned int read4(struct tep_handle *pevent)
+{
+ unsigned int data;
+
+ if (do_read(&data, 4) < 0)
+ return 0;
+ return __data2host4(pevent, data);
+}
+
+static unsigned long long read8(struct tep_handle *pevent)
+{
+ unsigned long long data;
+
+ if (do_read(&data, 8) < 0)
+ return 0;
+ return __data2host8(pevent, data);
+}
+
+static char *read_string(void)
+{
+ char buf[BUFSIZ];
+ char *str = NULL;
+ int size = 0;
+ off_t r;
+ char c;
+
+ for (;;) {
+ r = read(input_fd, &c, 1);
+ if (r < 0) {
+ pr_debug("reading input file");
+ goto out;
+ }
+
+ if (!r) {
+ pr_debug("no data");
+ goto out;
+ }
+
+ if (repipe) {
+ int retw = write(STDOUT_FILENO, &c, 1);
+
+ if (retw <= 0 || retw != r) {
+ pr_debug("repiping input file string");
+ goto out;
+ }
+ }
+
+ buf[size++] = c;
+
+ if (!c)
+ break;
+ }
+
+ trace_data_size += size;
+
+ str = malloc(size);
+ if (str)
+ memcpy(str, buf, size);
+out:
+ return str;
+}
+
+static int read_proc_kallsyms(struct tep_handle *pevent)
+{
+ unsigned int size;
+
+ size = read4(pevent);
+ if (!size)
+ return 0;
+ /*
+ * Just skip it, now that we configure libtraceevent to use the
+ * tools/perf/ symbol resolver.
+ *
+ * We need to skip it so that we can continue parsing old perf.data
+ * files, that contains this /proc/kallsyms payload.
+ *
+ * Newer perf.data files will have just the 4-bytes zeros "kallsyms
+ * payload", so that older tools can continue reading it and interpret
+ * it as "no kallsyms payload is present".
+ */
+ lseek(input_fd, size, SEEK_CUR);
+ trace_data_size += size;
+ return 0;
+}
+
+static int read_ftrace_printk(struct tep_handle *pevent)
+{
+ unsigned int size;
+ char *buf;
+
+ /* it can have 0 size */
+ size = read4(pevent);
+ if (!size)
+ return 0;
+
+ buf = malloc(size + 1);
+ if (buf == NULL)
+ return -1;
+
+ if (do_read(buf, size) < 0) {
+ free(buf);
+ return -1;
+ }
+
+ buf[size] = '\0';
+
+ parse_ftrace_printk(pevent, buf, size);
+
+ free(buf);
+ return 0;
+}
+
+static int read_header_files(struct tep_handle *pevent)
+{
+ unsigned long long size;
+ char *header_page;
+ char buf[BUFSIZ];
+ int ret = 0;
+
+ if (do_read(buf, 12) < 0)
+ return -1;
+
+ if (memcmp(buf, "header_page", 12) != 0) {
+ pr_debug("did not read header page");
+ return -1;
+ }
+
+ size = read8(pevent);
+
+ header_page = malloc(size);
+ if (header_page == NULL)
+ return -1;
+
+ if (do_read(header_page, size) < 0) {
+ pr_debug("did not read header page");
+ free(header_page);
+ return -1;
+ }
+
+ if (!tep_parse_header_page(pevent, header_page, size,
+ tep_get_long_size(pevent))) {
+ /*
+ * The commit field in the page is of type long,
+ * use that instead, since it represents the kernel.
+ */
+ tep_set_long_size(pevent, pevent->header_page_size_size);
+ }
+ free(header_page);
+
+ if (do_read(buf, 13) < 0)
+ return -1;
+
+ if (memcmp(buf, "header_event", 13) != 0) {
+ pr_debug("did not read header event");
+ return -1;
+ }
+
+ size = read8(pevent);
+ skip(size);
+
+ return ret;
+}
+
+static int read_ftrace_file(struct tep_handle *pevent, unsigned long long size)
+{
+ int ret;
+ char *buf;
+
+ buf = malloc(size);
+ if (buf == NULL) {
+ pr_debug("memory allocation failure\n");
+ return -1;
+ }
+
+ ret = do_read(buf, size);
+ if (ret < 0) {
+ pr_debug("error reading ftrace file.\n");
+ goto out;
+ }
+
+ ret = parse_ftrace_file(pevent, buf, size);
+ if (ret < 0)
+ pr_debug("error parsing ftrace file.\n");
+out:
+ free(buf);
+ return ret;
+}
+
+static int read_event_file(struct tep_handle *pevent, char *sys,
+ unsigned long long size)
+{
+ int ret;
+ char *buf;
+
+ buf = malloc(size);
+ if (buf == NULL) {
+ pr_debug("memory allocation failure\n");
+ return -1;
+ }
+
+ ret = do_read(buf, size);
+ if (ret < 0) {
+ free(buf);
+ goto out;
+ }
+
+ ret = parse_event_file(pevent, buf, size, sys);
+ if (ret < 0)
+ pr_debug("error parsing event file.\n");
+out:
+ free(buf);
+ return ret;
+}
+
+static int read_ftrace_files(struct tep_handle *pevent)
+{
+ unsigned long long size;
+ int count;
+ int i;
+ int ret;
+
+ count = read4(pevent);
+
+ for (i = 0; i < count; i++) {
+ size = read8(pevent);
+ ret = read_ftrace_file(pevent, size);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
+static int read_event_files(struct tep_handle *pevent)
+{
+ unsigned long long size;
+ char *sys;
+ int systems;
+ int count;
+ int i,x;
+ int ret;
+
+ systems = read4(pevent);
+
+ for (i = 0; i < systems; i++) {
+ sys = read_string();
+ if (sys == NULL)
+ return -1;
+
+ count = read4(pevent);
+
+ for (x=0; x < count; x++) {
+ size = read8(pevent);
+ ret = read_event_file(pevent, sys, size);
+ if (ret) {
+ free(sys);
+ return ret;
+ }
+ }
+ free(sys);
+ }
+ return 0;
+}
+
+static int read_saved_cmdline(struct tep_handle *pevent)
+{
+ unsigned long long size;
+ char *buf;
+ int ret;
+
+ /* it can have 0 size */
+ size = read8(pevent);
+ if (!size)
+ return 0;
+
+ buf = malloc(size + 1);
+ if (buf == NULL) {
+ pr_debug("memory allocation failure\n");
+ return -1;
+ }
+
+ ret = do_read(buf, size);
+ if (ret < 0) {
+ pr_debug("error reading saved cmdlines\n");
+ goto out;
+ }
+ buf[ret] = '\0';
+
+ parse_saved_cmdline(pevent, buf, size);
+ ret = 0;
+out:
+ free(buf);
+ return ret;
+}
+
+ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe)
+{
+ char buf[BUFSIZ];
+ char test[] = { 23, 8, 68 };
+ char *version;
+ int show_version = 0;
+ int show_funcs = 0;
+ int show_printk = 0;
+ ssize_t size = -1;
+ int file_bigendian;
+ int host_bigendian;
+ int file_long_size;
+ int file_page_size;
+ struct tep_handle *pevent = NULL;
+ int err;
+
+ repipe = __repipe;
+ input_fd = fd;
+
+ if (do_read(buf, 3) < 0)
+ return -1;
+ if (memcmp(buf, test, 3) != 0) {
+ pr_debug("no trace data in the file");
+ return -1;
+ }
+
+ if (do_read(buf, 7) < 0)
+ return -1;
+ if (memcmp(buf, "tracing", 7) != 0) {
+ pr_debug("not a trace file (missing 'tracing' tag)");
+ return -1;
+ }
+
+ version = read_string();
+ if (version == NULL)
+ return -1;
+ if (show_version)
+ printf("version = %s\n", version);
+
+ if (do_read(buf, 1) < 0) {
+ free(version);
+ return -1;
+ }
+ file_bigendian = buf[0];
+ host_bigendian = bigendian();
+
+ if (trace_event__init(tevent)) {
+ pr_debug("trace_event__init failed");
+ goto out;
+ }
+
+ pevent = tevent->pevent;
+
+ tep_set_flag(pevent, TEP_NSEC_OUTPUT);
+ tep_set_file_bigendian(pevent, file_bigendian);
+ tep_set_host_bigendian(pevent, host_bigendian);
+
+ if (do_read(buf, 1) < 0)
+ goto out;
+ file_long_size = buf[0];
+
+ file_page_size = read4(pevent);
+ if (!file_page_size)
+ goto out;
+
+ tep_set_long_size(pevent, file_long_size);
+ tep_set_page_size(pevent, file_page_size);
+
+ err = read_header_files(pevent);
+ if (err)
+ goto out;
+ err = read_ftrace_files(pevent);
+ if (err)
+ goto out;
+ err = read_event_files(pevent);
+ if (err)
+ goto out;
+ err = read_proc_kallsyms(pevent);
+ if (err)
+ goto out;
+ err = read_ftrace_printk(pevent);
+ if (err)
+ goto out;
+ if (atof(version) >= 0.6) {
+ err = read_saved_cmdline(pevent);
+ if (err)
+ goto out;
+ }
+
+ size = trace_data_size;
+ repipe = false;
+
+ if (show_funcs) {
+ tep_print_funcs(pevent);
+ } else if (show_printk) {
+ tep_print_printk(pevent);
+ }
+
+ pevent = NULL;
+
+out:
+ if (pevent)
+ trace_event__cleanup(tevent);
+ free(version);
+ return size;
+}
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
new file mode 100644
index 000000000..b749f812a
--- /dev/null
+++ b/tools/perf/util/trace-event-scripting.c
@@ -0,0 +1,176 @@
+/*
+ * trace-event-scripting. Scripting engine common and initialization code.
+ *
+ * Copyright (C) 2009-2010 Tom Zanussi <tzanussi@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "../perf.h"
+#include "debug.h"
+#include "util.h"
+#include "trace-event.h"
+
+struct scripting_context *scripting_context;
+
+static int flush_script_unsupported(void)
+{
+ return 0;
+}
+
+static int stop_script_unsupported(void)
+{
+ return 0;
+}
+
+static void process_event_unsupported(union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct perf_evsel *evsel __maybe_unused,
+ struct addr_location *al __maybe_unused)
+{
+}
+
+static void print_python_unsupported_msg(void)
+{
+ fprintf(stderr, "Python scripting not supported."
+ " Install libpython and rebuild perf to enable it.\n"
+ "For example:\n # apt-get install python-dev (ubuntu)"
+ "\n # yum install python-devel (Fedora)"
+ "\n etc.\n");
+}
+
+static int python_start_script_unsupported(const char *script __maybe_unused,
+ int argc __maybe_unused,
+ const char **argv __maybe_unused)
+{
+ print_python_unsupported_msg();
+
+ return -1;
+}
+
+static int python_generate_script_unsupported(struct tep_handle *pevent
+ __maybe_unused,
+ const char *outfile
+ __maybe_unused)
+{
+ print_python_unsupported_msg();
+
+ return -1;
+}
+
+struct scripting_ops python_scripting_unsupported_ops = {
+ .name = "Python",
+ .start_script = python_start_script_unsupported,
+ .flush_script = flush_script_unsupported,
+ .stop_script = stop_script_unsupported,
+ .process_event = process_event_unsupported,
+ .generate_script = python_generate_script_unsupported,
+};
+
+static void register_python_scripting(struct scripting_ops *scripting_ops)
+{
+ if (scripting_context == NULL)
+ scripting_context = malloc(sizeof(*scripting_context));
+
+ if (scripting_context == NULL ||
+ script_spec_register("Python", scripting_ops) ||
+ script_spec_register("py", scripting_ops)) {
+ pr_err("Error registering Python script extension: disabling it\n");
+ zfree(&scripting_context);
+ }
+}
+
+#ifndef HAVE_LIBPYTHON_SUPPORT
+void setup_python_scripting(void)
+{
+ register_python_scripting(&python_scripting_unsupported_ops);
+}
+#else
+extern struct scripting_ops python_scripting_ops;
+
+void setup_python_scripting(void)
+{
+ register_python_scripting(&python_scripting_ops);
+}
+#endif
+
+static void print_perl_unsupported_msg(void)
+{
+ fprintf(stderr, "Perl scripting not supported."
+ " Install libperl and rebuild perf to enable it.\n"
+ "For example:\n # apt-get install libperl-dev (ubuntu)"
+ "\n # yum install 'perl(ExtUtils::Embed)' (Fedora)"
+ "\n etc.\n");
+}
+
+static int perl_start_script_unsupported(const char *script __maybe_unused,
+ int argc __maybe_unused,
+ const char **argv __maybe_unused)
+{
+ print_perl_unsupported_msg();
+
+ return -1;
+}
+
+static int perl_generate_script_unsupported(struct tep_handle *pevent
+ __maybe_unused,
+ const char *outfile __maybe_unused)
+{
+ print_perl_unsupported_msg();
+
+ return -1;
+}
+
+struct scripting_ops perl_scripting_unsupported_ops = {
+ .name = "Perl",
+ .start_script = perl_start_script_unsupported,
+ .flush_script = flush_script_unsupported,
+ .stop_script = stop_script_unsupported,
+ .process_event = process_event_unsupported,
+ .generate_script = perl_generate_script_unsupported,
+};
+
+static void register_perl_scripting(struct scripting_ops *scripting_ops)
+{
+ if (scripting_context == NULL)
+ scripting_context = malloc(sizeof(*scripting_context));
+
+ if (scripting_context == NULL ||
+ script_spec_register("Perl", scripting_ops) ||
+ script_spec_register("pl", scripting_ops)) {
+ pr_err("Error registering Perl script extension: disabling it\n");
+ zfree(&scripting_context);
+ }
+}
+
+#ifndef HAVE_LIBPERL_SUPPORT
+void setup_perl_scripting(void)
+{
+ register_perl_scripting(&perl_scripting_unsupported_ops);
+}
+#else
+extern struct scripting_ops perl_scripting_ops;
+
+void setup_perl_scripting(void)
+{
+ register_perl_scripting(&perl_scripting_ops);
+}
+#endif
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
new file mode 100644
index 000000000..58bb72f26
--- /dev/null
+++ b/tools/perf/util/trace-event.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <traceevent/event-parse.h>
+#include <api/fs/tracing_path.h>
+#include <api/fs/fs.h>
+#include "trace-event.h"
+#include "machine.h"
+#include "util.h"
+
+/*
+ * global trace_event object used by trace_event__tp_format
+ *
+ * TODO There's no cleanup call for this. Add some sort of
+ * __exit function support and call trace_event__cleanup
+ * there.
+ */
+static struct trace_event tevent;
+static bool tevent_initialized;
+
+int trace_event__init(struct trace_event *t)
+{
+ struct tep_handle *pevent = tep_alloc();
+
+ if (pevent) {
+ t->plugin_list = tep_load_plugins(pevent);
+ t->pevent = pevent;
+ }
+
+ return pevent ? 0 : -1;
+}
+
+static int trace_event__init2(void)
+{
+ int be = tep_host_bigendian();
+ struct tep_handle *pevent;
+
+ if (trace_event__init(&tevent))
+ return -1;
+
+ pevent = tevent.pevent;
+ tep_set_flag(pevent, TEP_NSEC_OUTPUT);
+ tep_set_file_bigendian(pevent, be);
+ tep_set_host_bigendian(pevent, be);
+ tevent_initialized = true;
+ return 0;
+}
+
+int trace_event__register_resolver(struct machine *machine,
+ tep_func_resolver_t *func)
+{
+ if (!tevent_initialized && trace_event__init2())
+ return -1;
+
+ return tep_set_function_resolver(tevent.pevent, func, machine);
+}
+
+void trace_event__cleanup(struct trace_event *t)
+{
+ tep_unload_plugins(t->plugin_list, t->pevent);
+ tep_free(t->pevent);
+}
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+static struct event_format*
+tp_format(const char *sys, const char *name)
+{
+ char *tp_dir = get_events_file(sys);
+ struct tep_handle *pevent = tevent.pevent;
+ struct event_format *event = NULL;
+ char path[PATH_MAX];
+ size_t size;
+ char *data;
+ int err;
+
+ if (!tp_dir)
+ return ERR_PTR(-errno);
+
+ scnprintf(path, PATH_MAX, "%s/%s/format", tp_dir, name);
+ put_events_file(tp_dir);
+
+ err = filename__read_str(path, &data, &size);
+ if (err)
+ return ERR_PTR(err);
+
+ tep_parse_format(pevent, &event, data, size, sys);
+
+ free(data);
+ return event;
+}
+
+/*
+ * Returns pointer with encoded error via <linux/err.h> interface.
+ */
+struct event_format*
+trace_event__tp_format(const char *sys, const char *name)
+{
+ if (!tevent_initialized && trace_event__init2())
+ return ERR_PTR(-ENOMEM);
+
+ return tp_format(sys, name);
+}
+
+struct event_format *trace_event__tp_format_id(int id)
+{
+ if (!tevent_initialized && trace_event__init2())
+ return ERR_PTR(-ENOMEM);
+
+ return tep_find_event(tevent.pevent, id);
+}
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
new file mode 100644
index 000000000..40204ec3a
--- /dev/null
+++ b/tools/perf/util/trace-event.h
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_UTIL_TRACE_EVENT_H
+#define _PERF_UTIL_TRACE_EVENT_H
+
+#include <traceevent/event-parse.h>
+#include "parse-events.h"
+
+struct machine;
+struct perf_sample;
+union perf_event;
+struct perf_tool;
+struct thread;
+struct plugin_list;
+
+struct trace_event {
+ struct tep_handle *pevent;
+ struct plugin_list *plugin_list;
+};
+
+int trace_event__init(struct trace_event *t);
+void trace_event__cleanup(struct trace_event *t);
+int trace_event__register_resolver(struct machine *machine,
+ tep_func_resolver_t *func);
+struct event_format*
+trace_event__tp_format(const char *sys, const char *name);
+
+struct event_format *trace_event__tp_format_id(int id);
+
+int bigendian(void);
+
+void event_format__fprintf(struct event_format *event,
+ int cpu, void *data, int size, FILE *fp);
+
+void event_format__print(struct event_format *event,
+ int cpu, void *data, int size);
+
+int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size);
+int parse_event_file(struct tep_handle *pevent,
+ char *buf, unsigned long size, char *sys);
+
+unsigned long long
+raw_field_value(struct event_format *event, const char *name, void *data);
+
+void parse_proc_kallsyms(struct tep_handle *pevent, char *file, unsigned int size);
+void parse_ftrace_printk(struct tep_handle *pevent, char *file, unsigned int size);
+void parse_saved_cmdline(struct tep_handle *pevent, char *file, unsigned int size);
+
+ssize_t trace_report(int fd, struct trace_event *tevent, bool repipe);
+
+struct event_format *trace_find_next_event(struct tep_handle *pevent,
+ struct event_format *event);
+unsigned long long read_size(struct event_format *event, void *ptr, int size);
+unsigned long long eval_flag(const char *flag);
+
+int read_tracing_data(int fd, struct list_head *pattrs);
+
+struct tracing_data {
+ /* size is only valid if temp is 'true' */
+ ssize_t size;
+ bool temp;
+ char temp_file[50];
+};
+
+struct tracing_data *tracing_data_get(struct list_head *pattrs,
+ int fd, bool temp);
+int tracing_data_put(struct tracing_data *tdata);
+
+
+struct addr_location;
+
+struct perf_session;
+struct perf_stat_config;
+
+struct scripting_ops {
+ const char *name;
+ int (*start_script) (const char *script, int argc, const char **argv);
+ int (*flush_script) (void);
+ int (*stop_script) (void);
+ void (*process_event) (union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct addr_location *al);
+ void (*process_stat)(struct perf_stat_config *config,
+ struct perf_evsel *evsel, u64 tstamp);
+ void (*process_stat_interval)(u64 tstamp);
+ int (*generate_script) (struct tep_handle *pevent, const char *outfile);
+};
+
+extern unsigned int scripting_max_stack;
+
+int script_spec_register(const char *spec, struct scripting_ops *ops);
+
+void setup_perl_scripting(void);
+void setup_python_scripting(void);
+
+struct scripting_context {
+ struct tep_handle *pevent;
+ void *event_data;
+};
+
+int common_pc(struct scripting_context *context);
+int common_flags(struct scripting_context *context);
+int common_lock_depth(struct scripting_context *context);
+
+#endif /* _PERF_UTIL_TRACE_EVENT_H */
diff --git a/tools/perf/util/trigger.h b/tools/perf/util/trigger.h
new file mode 100644
index 000000000..88223bc7c
--- /dev/null
+++ b/tools/perf/util/trigger.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRIGGER_H_
+#define __TRIGGER_H_ 1
+
+#include "util/debug.h"
+#include "asm/bug.h"
+
+/*
+ * Use trigger to model operations which need to be executed when
+ * an event (a signal, for example) is observed.
+ *
+ * States and transits:
+ *
+ *
+ * OFF--> ON --> READY --(hit)--> HIT
+ * ^ |
+ * | (ready)
+ * | |
+ * \_____________/
+ *
+ * is_hit and is_ready are two key functions to query the state of
+ * a trigger. is_hit means the event already happen; is_ready means the
+ * trigger is waiting for the event.
+ */
+
+struct trigger {
+ volatile enum {
+ TRIGGER_ERROR = -2,
+ TRIGGER_OFF = -1,
+ TRIGGER_ON = 0,
+ TRIGGER_READY = 1,
+ TRIGGER_HIT = 2,
+ } state;
+ const char *name;
+};
+
+#define TRIGGER_WARN_ONCE(t, exp) \
+ WARN_ONCE(t->state != exp, "trigger '%s' state transist error: %d in %s()\n", \
+ t->name, t->state, __func__)
+
+static inline bool trigger_is_available(struct trigger *t)
+{
+ return t->state >= 0;
+}
+
+static inline bool trigger_is_error(struct trigger *t)
+{
+ return t->state <= TRIGGER_ERROR;
+}
+
+static inline void trigger_on(struct trigger *t)
+{
+ TRIGGER_WARN_ONCE(t, TRIGGER_OFF);
+ t->state = TRIGGER_ON;
+}
+
+static inline void trigger_ready(struct trigger *t)
+{
+ if (!trigger_is_available(t))
+ return;
+ t->state = TRIGGER_READY;
+}
+
+static inline void trigger_hit(struct trigger *t)
+{
+ if (!trigger_is_available(t))
+ return;
+ TRIGGER_WARN_ONCE(t, TRIGGER_READY);
+ t->state = TRIGGER_HIT;
+}
+
+static inline void trigger_off(struct trigger *t)
+{
+ if (!trigger_is_available(t))
+ return;
+ t->state = TRIGGER_OFF;
+}
+
+static inline void trigger_error(struct trigger *t)
+{
+ t->state = TRIGGER_ERROR;
+}
+
+static inline bool trigger_is_ready(struct trigger *t)
+{
+ return t->state == TRIGGER_READY;
+}
+
+static inline bool trigger_is_hit(struct trigger *t)
+{
+ return t->state == TRIGGER_HIT;
+}
+
+#define DEFINE_TRIGGER(n) \
+struct trigger n = {.state = TRIGGER_OFF, .name = #n}
+#endif
diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c
new file mode 100644
index 000000000..bfa782421
--- /dev/null
+++ b/tools/perf/util/tsc.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+#include "tsc.h"
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
+{
+ u64 t, quot, rem;
+
+ t = ns - tc->time_zero;
+ quot = t / tc->time_mult;
+ rem = t % tc->time_mult;
+ return (quot << tc->time_shift) +
+ (rem << tc->time_shift) / tc->time_mult;
+}
+
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
+{
+ u64 quot, rem;
+
+ quot = cyc >> tc->time_shift;
+ rem = cyc & (((u64)1 << tc->time_shift) - 1);
+ return tc->time_zero + quot * tc->time_mult +
+ ((rem * tc->time_mult) >> tc->time_shift);
+}
+
+u64 __weak rdtsc(void)
+{
+ return 0;
+}
diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h
new file mode 100644
index 000000000..e0c3af34a
--- /dev/null
+++ b/tools/perf/util/tsc.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_TSC_H
+#define __PERF_TSC_H
+
+#include <linux/types.h>
+
+#include "event.h"
+
+struct perf_tsc_conversion {
+ u16 time_shift;
+ u32 time_mult;
+ u64 time_zero;
+};
+struct perf_event_mmap_page;
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc);
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
+u64 rdtsc(void);
+
+struct perf_event_mmap_page;
+struct perf_tool;
+struct machine;
+
+int perf_event__synth_time_conv(const struct perf_event_mmap_page *pc,
+ struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine);
+
+#endif
diff --git a/tools/perf/util/units.c b/tools/perf/util/units.c
new file mode 100644
index 000000000..a46762aec
--- /dev/null
+++ b/tools/perf/util/units.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "units.h"
+#include <inttypes.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/kernel.h>
+#include <linux/time64.h>
+
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags)
+{
+ struct parse_tag *i = tags;
+
+ while (i->tag) {
+ char *s = strchr(str, i->tag);
+
+ if (s) {
+ unsigned long int value;
+ char *endptr;
+
+ value = strtoul(str, &endptr, 10);
+ if (s != endptr)
+ break;
+
+ if (value > ULONG_MAX / i->mult)
+ break;
+ value *= i->mult;
+ return value;
+ }
+ i++;
+ }
+
+ return (unsigned long) -1;
+}
+
+unsigned long convert_unit(unsigned long value, char *unit)
+{
+ *unit = ' ';
+
+ if (value > 1000) {
+ value /= 1000;
+ *unit = 'K';
+ }
+
+ if (value > 1000) {
+ value /= 1000;
+ *unit = 'M';
+ }
+
+ if (value > 1000) {
+ value /= 1000;
+ *unit = 'G';
+ }
+
+ return value;
+}
+
+int unit_number__scnprintf(char *buf, size_t size, u64 n)
+{
+ char unit[4] = "BKMG";
+ int i = 0;
+
+ while (((n / 1024) > 1) && (i < 3)) {
+ n /= 1024;
+ i++;
+ }
+
+ return scnprintf(buf, size, "%" PRIu64 "%c", n, unit[i]);
+}
diff --git a/tools/perf/util/units.h b/tools/perf/util/units.h
new file mode 100644
index 000000000..99263b6a2
--- /dev/null
+++ b/tools/perf/util/units.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERF_UNIT_H
+#define PERF_UNIT_H
+
+#include <stddef.h>
+#include <linux/types.h>
+
+struct parse_tag {
+ char tag;
+ int mult;
+};
+
+unsigned long parse_tag_value(const char *str, struct parse_tag *tags);
+
+unsigned long convert_unit(unsigned long value, char *unit);
+int unit_number__scnprintf(char *buf, size_t size, u64 n);
+
+#endif /* PERF_UNIT_H */
diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c
new file mode 100644
index 000000000..5eff9bfc5
--- /dev/null
+++ b/tools/perf/util/unwind-libdw.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <elfutils/libdw.h>
+#include <elfutils/libdwfl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include "debug.h"
+#include "unwind.h"
+#include "unwind-libdw.h"
+#include "machine.h"
+#include "thread.h"
+#include <linux/types.h>
+#include "event.h"
+#include "perf_regs.h"
+#include "callchain.h"
+#include "util.h"
+
+static char *debuginfo_path;
+
+static const Dwfl_Callbacks offline_callbacks = {
+ .find_debuginfo = dwfl_standard_find_debuginfo,
+ .debuginfo_path = &debuginfo_path,
+ .section_address = dwfl_offline_section_address,
+};
+
+static int __report_module(struct addr_location *al, u64 ip,
+ struct unwind_info *ui)
+{
+ Dwfl_Module *mod;
+ struct dso *dso = NULL;
+ /*
+ * Some callers will use al->sym, so we can't just use the
+ * cheaper thread__find_map() here.
+ */
+ thread__find_symbol(ui->thread, PERF_RECORD_MISC_USER, ip, al);
+
+ if (al->map)
+ dso = al->map->dso;
+
+ if (!dso)
+ return 0;
+
+ mod = dwfl_addrmodule(ui->dwfl, ip);
+ if (mod) {
+ Dwarf_Addr s;
+
+ dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL);
+ if (s != al->map->start - al->map->pgoff)
+ mod = 0;
+ }
+
+ if (!mod)
+ mod = dwfl_report_elf(ui->dwfl, dso->short_name,
+ (dso->symsrc_filename ? dso->symsrc_filename : dso->long_name), -1, al->map->start - al->map->pgoff,
+ false);
+
+ return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1;
+}
+
+static int report_module(u64 ip, struct unwind_info *ui)
+{
+ struct addr_location al;
+
+ return __report_module(&al, ip, ui);
+}
+
+/*
+ * Store all entries within entries array,
+ * we will process it after we finish unwind.
+ */
+static int entry(u64 ip, struct unwind_info *ui)
+
+{
+ struct unwind_entry *e = &ui->entries[ui->idx++];
+ struct addr_location al;
+
+ if (__report_module(&al, ip, ui))
+ return -1;
+
+ e->ip = ip;
+ e->map = al.map;
+ e->sym = al.sym;
+
+ pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
+ al.sym ? al.sym->name : "''",
+ ip,
+ al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
+ return 0;
+}
+
+static pid_t next_thread(Dwfl *dwfl, void *arg, void **thread_argp)
+{
+ /* We want only single thread to be processed. */
+ if (*thread_argp != NULL)
+ return 0;
+
+ *thread_argp = arg;
+ return dwfl_pid(dwfl);
+}
+
+static int access_dso_mem(struct unwind_info *ui, Dwarf_Addr addr,
+ Dwarf_Word *data)
+{
+ struct addr_location al;
+ ssize_t size;
+
+ if (!thread__find_map(ui->thread, PERF_RECORD_MISC_USER, addr, &al)) {
+ pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+ return -1;
+ }
+
+ if (!al.map->dso)
+ return -1;
+
+ size = dso__data_read_addr(al.map->dso, al.map, ui->machine,
+ addr, (u8 *) data, sizeof(*data));
+
+ return !(size == sizeof(*data));
+}
+
+static bool memory_read(Dwfl *dwfl __maybe_unused, Dwarf_Addr addr, Dwarf_Word *result,
+ void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct stack_dump *stack = &ui->sample->user_stack;
+ u64 start, end;
+ int offset;
+ int ret;
+
+ ret = perf_reg_value(&start, &ui->sample->user_regs, PERF_REG_SP);
+ if (ret)
+ return false;
+
+ end = start + stack->size;
+
+ /* Check overflow. */
+ if (addr + sizeof(Dwarf_Word) < addr)
+ return false;
+
+ if (addr < start || addr + sizeof(Dwarf_Word) > end) {
+ ret = access_dso_mem(ui, addr, result);
+ if (ret) {
+ pr_debug("unwind: access_mem 0x%" PRIx64 " not inside range"
+ " 0x%" PRIx64 "-0x%" PRIx64 "\n",
+ addr, start, end);
+ return false;
+ }
+ return true;
+ }
+
+ offset = addr - start;
+ *result = *(Dwarf_Word *)&stack->data[offset];
+ pr_debug("unwind: access_mem addr 0x%" PRIx64 ", val %lx, offset %d\n",
+ addr, (unsigned long)*result, offset);
+ return true;
+}
+
+static const Dwfl_Thread_Callbacks callbacks = {
+ .next_thread = next_thread,
+ .memory_read = memory_read,
+ .set_initial_registers = libdw__arch_set_initial_registers,
+};
+
+static int
+frame_callback(Dwfl_Frame *state, void *arg)
+{
+ struct unwind_info *ui = arg;
+ Dwarf_Addr pc;
+ bool isactivation;
+
+ if (!dwfl_frame_pc(state, &pc, NULL)) {
+ pr_err("%s", dwfl_errmsg(-1));
+ return DWARF_CB_ABORT;
+ }
+
+ // report the module before we query for isactivation
+ report_module(pc, ui);
+
+ if (!dwfl_frame_pc(state, &pc, &isactivation)) {
+ pr_err("%s", dwfl_errmsg(-1));
+ return DWARF_CB_ABORT;
+ }
+
+ if (!isactivation)
+ --pc;
+
+ return entry(pc, ui) || !(--ui->max_stack) ?
+ DWARF_CB_ABORT : DWARF_CB_OK;
+}
+
+int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data,
+ int max_stack)
+{
+ struct unwind_info *ui, ui_buf = {
+ .sample = data,
+ .thread = thread,
+ .machine = thread->mg->machine,
+ .cb = cb,
+ .arg = arg,
+ .max_stack = max_stack,
+ };
+ Dwarf_Word ip;
+ int err = -EINVAL, i;
+
+ if (!data->user_regs.regs)
+ return -EINVAL;
+
+ ui = zalloc(sizeof(ui_buf) + sizeof(ui_buf.entries[0]) * max_stack);
+ if (!ui)
+ return -ENOMEM;
+
+ *ui = ui_buf;
+
+ ui->dwfl = dwfl_begin(&offline_callbacks);
+ if (!ui->dwfl)
+ goto out;
+
+ err = perf_reg_value(&ip, &data->user_regs, PERF_REG_IP);
+ if (err)
+ goto out;
+
+ err = report_module(ip, ui);
+ if (err)
+ goto out;
+
+ err = !dwfl_attach_state(ui->dwfl, EM_NONE, thread->tid, &callbacks, ui);
+ if (err)
+ goto out;
+
+ err = dwfl_getthread_frames(ui->dwfl, thread->tid, frame_callback, ui);
+
+ if (err && ui->max_stack != max_stack)
+ err = 0;
+
+ /*
+ * Display what we got based on the order setup.
+ */
+ for (i = 0; i < ui->idx && !err; i++) {
+ int j = i;
+
+ if (callchain_param.order == ORDER_CALLER)
+ j = ui->idx - i - 1;
+
+ err = ui->entries[j].ip ? ui->cb(&ui->entries[j], ui->arg) : 0;
+ }
+
+ out:
+ if (err)
+ pr_debug("unwind: failed with '%s'\n", dwfl_errmsg(-1));
+
+ dwfl_end(ui->dwfl);
+ free(ui);
+ return 0;
+}
diff --git a/tools/perf/util/unwind-libdw.h b/tools/perf/util/unwind-libdw.h
new file mode 100644
index 000000000..0cbd2650e
--- /dev/null
+++ b/tools/perf/util/unwind-libdw.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_UNWIND_LIBDW_H
+#define __PERF_UNWIND_LIBDW_H
+
+#include <elfutils/libdwfl.h>
+#include "unwind.h"
+
+struct machine;
+struct perf_sample;
+struct thread;
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg);
+
+struct unwind_info {
+ Dwfl *dwfl;
+ struct perf_sample *sample;
+ struct machine *machine;
+ struct thread *thread;
+ unwind_entry_cb_t cb;
+ void *arg;
+ int max_stack;
+ int idx;
+ struct unwind_entry entries[];
+};
+
+#endif /* __PERF_UNWIND_LIBDW_H */
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
new file mode 100644
index 000000000..79f521a55
--- /dev/null
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -0,0 +1,739 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Post mortem Dwarf CFI based unwinding on top of regs and stack dumps.
+ *
+ * Lots of this code have been borrowed or heavily inspired from parts of
+ * the libunwind 0.99 code which are (amongst other contributors I may have
+ * forgotten):
+ *
+ * Copyright (C) 2002-2007 Hewlett-Packard Co
+ * Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * And the bugs have been added by:
+ *
+ * Copyright (C) 2010, Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (C) 2012, Jiri Olsa <jolsa@redhat.com>
+ *
+ */
+
+#include <elf.h>
+#include <errno.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <linux/list.h>
+#ifndef REMOTE_UNWIND_LIBUNWIND
+#include <libunwind.h>
+#include <libunwind-ptrace.h>
+#endif
+#include "callchain.h"
+#include "thread.h"
+#include "session.h"
+#include "perf_regs.h"
+#include "unwind.h"
+#include "symbol.h"
+#include "util.h"
+#include "debug.h"
+#include "asm/bug.h"
+#include "dso.h"
+
+extern int
+UNW_OBJ(dwarf_search_unwind_table) (unw_addr_space_t as,
+ unw_word_t ip,
+ unw_dyn_info_t *di,
+ unw_proc_info_t *pi,
+ int need_unwind_info, void *arg);
+
+#define dwarf_search_unwind_table UNW_OBJ(dwarf_search_unwind_table)
+
+extern int
+UNW_OBJ(dwarf_find_debug_frame) (int found, unw_dyn_info_t *di_debug,
+ unw_word_t ip,
+ unw_word_t segbase,
+ const char *obj_name, unw_word_t start,
+ unw_word_t end);
+
+#define dwarf_find_debug_frame UNW_OBJ(dwarf_find_debug_frame)
+
+#define DW_EH_PE_FORMAT_MASK 0x0f /* format of the encoded value */
+#define DW_EH_PE_APPL_MASK 0x70 /* how the value is to be applied */
+
+/* Pointer-encoding formats: */
+#define DW_EH_PE_omit 0xff
+#define DW_EH_PE_ptr 0x00 /* pointer-sized unsigned value */
+#define DW_EH_PE_udata4 0x03 /* unsigned 32-bit value */
+#define DW_EH_PE_udata8 0x04 /* unsigned 64-bit value */
+#define DW_EH_PE_sdata4 0x0b /* signed 32-bit value */
+#define DW_EH_PE_sdata8 0x0c /* signed 64-bit value */
+
+/* Pointer-encoding application: */
+#define DW_EH_PE_absptr 0x00 /* absolute value */
+#define DW_EH_PE_pcrel 0x10 /* rel. to addr. of encoded value */
+
+/*
+ * The following are not documented by LSB v1.3, yet they are used by
+ * GCC, presumably they aren't documented by LSB since they aren't
+ * used on Linux:
+ */
+#define DW_EH_PE_funcrel 0x40 /* start-of-procedure-relative */
+#define DW_EH_PE_aligned 0x50 /* aligned pointer */
+
+/* Flags intentionaly not handled, since they're not needed:
+ * #define DW_EH_PE_indirect 0x80
+ * #define DW_EH_PE_uleb128 0x01
+ * #define DW_EH_PE_udata2 0x02
+ * #define DW_EH_PE_sleb128 0x09
+ * #define DW_EH_PE_sdata2 0x0a
+ * #define DW_EH_PE_textrel 0x20
+ * #define DW_EH_PE_datarel 0x30
+ */
+
+struct unwind_info {
+ struct perf_sample *sample;
+ struct machine *machine;
+ struct thread *thread;
+};
+
+#define dw_read(ptr, type, end) ({ \
+ type *__p = (type *) ptr; \
+ type __v; \
+ if ((__p + 1) > (type *) end) \
+ return -EINVAL; \
+ __v = *__p++; \
+ ptr = (typeof(ptr)) __p; \
+ __v; \
+ })
+
+static int __dw_read_encoded_value(u8 **p, u8 *end, u64 *val,
+ u8 encoding)
+{
+ u8 *cur = *p;
+ *val = 0;
+
+ switch (encoding) {
+ case DW_EH_PE_omit:
+ *val = 0;
+ goto out;
+ case DW_EH_PE_ptr:
+ *val = dw_read(cur, unsigned long, end);
+ goto out;
+ default:
+ break;
+ }
+
+ switch (encoding & DW_EH_PE_APPL_MASK) {
+ case DW_EH_PE_absptr:
+ break;
+ case DW_EH_PE_pcrel:
+ *val = (unsigned long) cur;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if ((encoding & 0x07) == 0x00)
+ encoding |= DW_EH_PE_udata4;
+
+ switch (encoding & DW_EH_PE_FORMAT_MASK) {
+ case DW_EH_PE_sdata4:
+ *val += dw_read(cur, s32, end);
+ break;
+ case DW_EH_PE_udata4:
+ *val += dw_read(cur, u32, end);
+ break;
+ case DW_EH_PE_sdata8:
+ *val += dw_read(cur, s64, end);
+ break;
+ case DW_EH_PE_udata8:
+ *val += dw_read(cur, u64, end);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ out:
+ *p = cur;
+ return 0;
+}
+
+#define dw_read_encoded_value(ptr, end, enc) ({ \
+ u64 __v; \
+ if (__dw_read_encoded_value(&ptr, end, &__v, enc)) { \
+ return -EINVAL; \
+ } \
+ __v; \
+ })
+
+static u64 elf_section_offset(int fd, const char *name)
+{
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ GElf_Shdr shdr;
+ u64 offset = 0;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ return 0;
+
+ do {
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ break;
+
+ if (!elf_section_by_name(elf, &ehdr, &shdr, name, NULL))
+ break;
+
+ offset = shdr.sh_offset;
+ } while (0);
+
+ elf_end(elf);
+ return offset;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int elf_is_exec(int fd, const char *name)
+{
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ int retval = 0;
+
+ elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
+ if (elf == NULL)
+ return 0;
+ if (gelf_getehdr(elf, &ehdr) == NULL)
+ goto out;
+
+ retval = (ehdr.e_type == ET_EXEC);
+
+out:
+ elf_end(elf);
+ pr_debug("unwind: elf_is_exec(%s): %d\n", name, retval);
+ return retval;
+}
+#endif
+
+struct table_entry {
+ u32 start_ip_offset;
+ u32 fde_offset;
+};
+
+struct eh_frame_hdr {
+ unsigned char version;
+ unsigned char eh_frame_ptr_enc;
+ unsigned char fde_count_enc;
+ unsigned char table_enc;
+
+ /*
+ * The rest of the header is variable-length and consists of the
+ * following members:
+ *
+ * encoded_t eh_frame_ptr;
+ * encoded_t fde_count;
+ */
+
+ /* A single encoded pointer should not be more than 8 bytes. */
+ u64 enc[2];
+
+ /*
+ * struct {
+ * encoded_t start_ip;
+ * encoded_t fde_addr;
+ * } binary_search_table[fde_count];
+ */
+ char data[0];
+} __packed;
+
+static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
+ u64 offset, u64 *table_data, u64 *segbase,
+ u64 *fde_count)
+{
+ struct eh_frame_hdr hdr;
+ u8 *enc = (u8 *) &hdr.enc;
+ u8 *end = (u8 *) &hdr.data;
+ ssize_t r;
+
+ r = dso__data_read_offset(dso, machine, offset,
+ (u8 *) &hdr, sizeof(hdr));
+ if (r != sizeof(hdr))
+ return -EINVAL;
+
+ /* We dont need eh_frame_ptr, just skip it. */
+ dw_read_encoded_value(enc, end, hdr.eh_frame_ptr_enc);
+
+ *fde_count = dw_read_encoded_value(enc, end, hdr.fde_count_enc);
+ *segbase = offset;
+ *table_data = (enc - (u8 *) &hdr) + offset;
+ return 0;
+}
+
+static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
+ u64 *table_data, u64 *segbase,
+ u64 *fde_count)
+{
+ int ret = -EINVAL, fd;
+ u64 offset = dso->data.eh_frame_hdr_offset;
+
+ if (offset == 0) {
+ fd = dso__data_get_fd(dso, machine);
+ if (fd < 0)
+ return -EINVAL;
+
+ /* Check the .eh_frame section for unwinding info */
+ offset = elf_section_offset(fd, ".eh_frame_hdr");
+ dso->data.eh_frame_hdr_offset = offset;
+ dso__data_put_fd(dso);
+ }
+
+ if (offset)
+ ret = unwind_spec_ehframe(dso, machine, offset,
+ table_data, segbase,
+ fde_count);
+
+ return ret;
+}
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+static int read_unwind_spec_debug_frame(struct dso *dso,
+ struct machine *machine, u64 *offset)
+{
+ int fd;
+ u64 ofs = dso->data.debug_frame_offset;
+
+ /* debug_frame can reside in:
+ * - dso
+ * - debug pointed by symsrc_filename
+ * - gnu_debuglink, which doesn't necessary
+ * has to be pointed by symsrc_filename
+ */
+ if (ofs == 0) {
+ fd = dso__data_get_fd(dso, machine);
+ if (fd >= 0) {
+ ofs = elf_section_offset(fd, ".debug_frame");
+ dso__data_put_fd(dso);
+ }
+
+ if (ofs <= 0) {
+ fd = open(dso->symsrc_filename, O_RDONLY);
+ if (fd >= 0) {
+ ofs = elf_section_offset(fd, ".debug_frame");
+ close(fd);
+ }
+ }
+
+ if (ofs <= 0) {
+ char *debuglink = malloc(PATH_MAX);
+ int ret = 0;
+
+ ret = dso__read_binary_type_filename(
+ dso, DSO_BINARY_TYPE__DEBUGLINK,
+ machine->root_dir, debuglink, PATH_MAX);
+ if (!ret) {
+ fd = open(debuglink, O_RDONLY);
+ if (fd >= 0) {
+ ofs = elf_section_offset(fd,
+ ".debug_frame");
+ close(fd);
+ }
+ }
+ if (ofs > 0) {
+ if (dso->symsrc_filename != NULL) {
+ pr_warning(
+ "%s: overwrite symsrc(%s,%s)\n",
+ __func__,
+ dso->symsrc_filename,
+ debuglink);
+ free(dso->symsrc_filename);
+ }
+ dso->symsrc_filename = debuglink;
+ } else {
+ free(debuglink);
+ }
+ }
+
+ dso->data.debug_frame_offset = ofs;
+ }
+
+ *offset = ofs;
+ if (*offset)
+ return 0;
+
+ return -EINVAL;
+}
+#endif
+
+static struct map *find_map(unw_word_t ip, struct unwind_info *ui)
+{
+ struct addr_location al;
+ return thread__find_map(ui->thread, PERF_RECORD_MISC_USER, ip, &al);
+}
+
+static int
+find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
+ int need_unwind_info, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct map *map;
+ unw_dyn_info_t di;
+ u64 table_data, segbase, fde_count;
+ int ret = -EINVAL;
+
+ map = find_map(ip, ui);
+ if (!map || !map->dso)
+ return -EINVAL;
+
+ pr_debug("unwind: find_proc_info dso %s\n", map->dso->name);
+
+ /* Check the .eh_frame section for unwinding info */
+ if (!read_unwind_spec_eh_frame(map->dso, ui->machine,
+ &table_data, &segbase, &fde_count)) {
+ memset(&di, 0, sizeof(di));
+ di.format = UNW_INFO_FORMAT_REMOTE_TABLE;
+ di.start_ip = map->start;
+ di.end_ip = map->end;
+ di.u.rti.segbase = map->start + segbase - map->pgoff;
+ di.u.rti.table_data = map->start + table_data - map->pgoff;
+ di.u.rti.table_len = fde_count * sizeof(struct table_entry)
+ / sizeof(unw_word_t);
+ ret = dwarf_search_unwind_table(as, ip, &di, pi,
+ need_unwind_info, arg);
+ }
+
+#ifndef NO_LIBUNWIND_DEBUG_FRAME
+ /* Check the .debug_frame section for unwinding info */
+ if (ret < 0 &&
+ !read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
+ int fd = dso__data_get_fd(map->dso, ui->machine);
+ int is_exec = elf_is_exec(fd, map->dso->name);
+ unw_word_t base = is_exec ? 0 : map->start;
+ const char *symfile;
+
+ if (fd >= 0)
+ dso__data_put_fd(map->dso);
+
+ symfile = map->dso->symsrc_filename ?: map->dso->name;
+
+ memset(&di, 0, sizeof(di));
+ if (dwarf_find_debug_frame(0, &di, ip, base, symfile,
+ map->start, map->end))
+ return dwarf_search_unwind_table(as, ip, &di, pi,
+ need_unwind_info, arg);
+ }
+#endif
+
+ return ret;
+}
+
+static int access_fpreg(unw_addr_space_t __maybe_unused as,
+ unw_regnum_t __maybe_unused num,
+ unw_fpreg_t __maybe_unused *val,
+ int __maybe_unused __write,
+ void __maybe_unused *arg)
+{
+ pr_err("unwind: access_fpreg unsupported\n");
+ return -UNW_EINVAL;
+}
+
+static int get_dyn_info_list_addr(unw_addr_space_t __maybe_unused as,
+ unw_word_t __maybe_unused *dil_addr,
+ void __maybe_unused *arg)
+{
+ return -UNW_ENOINFO;
+}
+
+static int resume(unw_addr_space_t __maybe_unused as,
+ unw_cursor_t __maybe_unused *cu,
+ void __maybe_unused *arg)
+{
+ pr_err("unwind: resume unsupported\n");
+ return -UNW_EINVAL;
+}
+
+static int
+get_proc_name(unw_addr_space_t __maybe_unused as,
+ unw_word_t __maybe_unused addr,
+ char __maybe_unused *bufp, size_t __maybe_unused buf_len,
+ unw_word_t __maybe_unused *offp, void __maybe_unused *arg)
+{
+ pr_err("unwind: get_proc_name unsupported\n");
+ return -UNW_EINVAL;
+}
+
+static int access_dso_mem(struct unwind_info *ui, unw_word_t addr,
+ unw_word_t *data)
+{
+ struct map *map;
+ ssize_t size;
+
+ map = find_map(addr, ui);
+ if (!map) {
+ pr_debug("unwind: no map for %lx\n", (unsigned long)addr);
+ return -1;
+ }
+
+ if (!map->dso)
+ return -1;
+
+ size = dso__data_read_addr(map->dso, map, ui->machine,
+ addr, (u8 *) data, sizeof(*data));
+
+ return !(size == sizeof(*data));
+}
+
+static int access_mem(unw_addr_space_t __maybe_unused as,
+ unw_word_t addr, unw_word_t *valp,
+ int __write, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct stack_dump *stack = &ui->sample->user_stack;
+ u64 start, end;
+ int offset;
+ int ret;
+
+ /* Don't support write, probably not needed. */
+ if (__write || !stack || !ui->sample->user_regs.regs) {
+ *valp = 0;
+ return 0;
+ }
+
+ ret = perf_reg_value(&start, &ui->sample->user_regs,
+ LIBUNWIND__ARCH_REG_SP);
+ if (ret)
+ return ret;
+
+ end = start + stack->size;
+
+ /* Check overflow. */
+ if (addr + sizeof(unw_word_t) < addr)
+ return -EINVAL;
+
+ if (addr < start || addr + sizeof(unw_word_t) >= end) {
+ ret = access_dso_mem(ui, addr, valp);
+ if (ret) {
+ pr_debug("unwind: access_mem %p not inside range"
+ " 0x%" PRIx64 "-0x%" PRIx64 "\n",
+ (void *) (uintptr_t) addr, start, end);
+ *valp = 0;
+ return ret;
+ }
+ return 0;
+ }
+
+ offset = addr - start;
+ *valp = *(unw_word_t *)&stack->data[offset];
+ pr_debug("unwind: access_mem addr %p val %lx, offset %d\n",
+ (void *) (uintptr_t) addr, (unsigned long)*valp, offset);
+ return 0;
+}
+
+static int access_reg(unw_addr_space_t __maybe_unused as,
+ unw_regnum_t regnum, unw_word_t *valp,
+ int __write, void *arg)
+{
+ struct unwind_info *ui = arg;
+ int id, ret;
+ u64 val;
+
+ /* Don't support write, I suspect we don't need it. */
+ if (__write) {
+ pr_err("unwind: access_reg w %d\n", regnum);
+ return 0;
+ }
+
+ if (!ui->sample->user_regs.regs) {
+ *valp = 0;
+ return 0;
+ }
+
+ id = LIBUNWIND__ARCH_REG_ID(regnum);
+ if (id < 0)
+ return -EINVAL;
+
+ ret = perf_reg_value(&val, &ui->sample->user_regs, id);
+ if (ret) {
+ pr_err("unwind: can't read reg %d\n", regnum);
+ return ret;
+ }
+
+ *valp = (unw_word_t) val;
+ pr_debug("unwind: reg %d, val %lx\n", regnum, (unsigned long)*valp);
+ return 0;
+}
+
+static void put_unwind_info(unw_addr_space_t __maybe_unused as,
+ unw_proc_info_t *pi __maybe_unused,
+ void *arg __maybe_unused)
+{
+ pr_debug("unwind: put_unwind_info called\n");
+}
+
+static int entry(u64 ip, struct thread *thread,
+ unwind_entry_cb_t cb, void *arg)
+{
+ struct unwind_entry e;
+ struct addr_location al;
+
+ e.sym = thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
+ e.ip = ip;
+ e.map = al.map;
+
+ pr_debug("unwind: %s:ip = 0x%" PRIx64 " (0x%" PRIx64 ")\n",
+ al.sym ? al.sym->name : "''",
+ ip,
+ al.map ? al.map->map_ip(al.map, ip) : (u64) 0);
+
+ return cb(&e, arg);
+}
+
+static void display_error(int err)
+{
+ switch (err) {
+ case UNW_EINVAL:
+ pr_err("unwind: Only supports local.\n");
+ break;
+ case UNW_EUNSPEC:
+ pr_err("unwind: Unspecified error.\n");
+ break;
+ case UNW_EBADREG:
+ pr_err("unwind: Register unavailable.\n");
+ break;
+ default:
+ break;
+ }
+}
+
+static unw_accessors_t accessors = {
+ .find_proc_info = find_proc_info,
+ .put_unwind_info = put_unwind_info,
+ .get_dyn_info_list_addr = get_dyn_info_list_addr,
+ .access_mem = access_mem,
+ .access_reg = access_reg,
+ .access_fpreg = access_fpreg,
+ .resume = resume,
+ .get_proc_name = get_proc_name,
+};
+
+static int _unwind__prepare_access(struct thread *thread)
+{
+ if (!dwarf_callchain_users)
+ return 0;
+ thread->addr_space = unw_create_addr_space(&accessors, 0);
+ if (!thread->addr_space) {
+ pr_err("unwind: Can't create unwind address space.\n");
+ return -ENOMEM;
+ }
+
+ unw_set_caching_policy(thread->addr_space, UNW_CACHE_GLOBAL);
+ return 0;
+}
+
+static void _unwind__flush_access(struct thread *thread)
+{
+ if (!dwarf_callchain_users)
+ return;
+ unw_flush_cache(thread->addr_space, 0, 0);
+}
+
+static void _unwind__finish_access(struct thread *thread)
+{
+ if (!dwarf_callchain_users)
+ return;
+ unw_destroy_addr_space(thread->addr_space);
+}
+
+static int get_entries(struct unwind_info *ui, unwind_entry_cb_t cb,
+ void *arg, int max_stack)
+{
+ u64 val;
+ unw_word_t ips[max_stack];
+ unw_addr_space_t addr_space;
+ unw_cursor_t c;
+ int ret, i = 0;
+
+ ret = perf_reg_value(&val, &ui->sample->user_regs,
+ LIBUNWIND__ARCH_REG_IP);
+ if (ret)
+ return ret;
+
+ ips[i++] = (unw_word_t) val;
+
+ /*
+ * If we need more than one entry, do the DWARF
+ * unwind itself.
+ */
+ if (max_stack - 1 > 0) {
+ WARN_ONCE(!ui->thread, "WARNING: ui->thread is NULL");
+ addr_space = ui->thread->addr_space;
+
+ if (addr_space == NULL)
+ return -1;
+
+ ret = unw_init_remote(&c, addr_space, ui);
+ if (ret)
+ display_error(ret);
+
+ while (!ret && (unw_step(&c) > 0) && i < max_stack) {
+ unw_get_reg(&c, UNW_REG_IP, &ips[i]);
+
+ /*
+ * Decrement the IP for any non-activation frames.
+ * this is required to properly find the srcline
+ * for caller frames.
+ * See also the documentation for dwfl_frame_pc(),
+ * which this code tries to replicate.
+ */
+ if (unw_is_signal_frame(&c) <= 0)
+ --ips[i];
+
+ ++i;
+ }
+
+ max_stack = i;
+ }
+
+ /*
+ * Display what we got based on the order setup.
+ */
+ for (i = 0; i < max_stack && !ret; i++) {
+ int j = i;
+
+ if (callchain_param.order == ORDER_CALLER)
+ j = max_stack - i - 1;
+ ret = ips[j] ? entry(ips[j], ui->thread, cb, arg) : 0;
+ }
+
+ return ret;
+}
+
+static int _unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data, int max_stack)
+{
+ struct unwind_info ui = {
+ .sample = data,
+ .thread = thread,
+ .machine = thread->mg->machine,
+ };
+
+ if (!data->user_regs.regs)
+ return -EINVAL;
+
+ if (max_stack <= 0)
+ return -EINVAL;
+
+ return get_entries(&ui, cb, arg, max_stack);
+}
+
+static struct unwind_libunwind_ops
+_unwind_libunwind_ops = {
+ .prepare_access = _unwind__prepare_access,
+ .flush_access = _unwind__flush_access,
+ .finish_access = _unwind__finish_access,
+ .get_entries = _unwind__get_entries,
+};
+
+#ifndef REMOTE_UNWIND_LIBUNWIND
+struct unwind_libunwind_ops *
+local_unwind_libunwind_ops = &_unwind_libunwind_ops;
+#endif
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
new file mode 100644
index 000000000..b029a5e9a
--- /dev/null
+++ b/tools/perf/util/unwind-libunwind.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "unwind.h"
+#include "thread.h"
+#include "session.h"
+#include "debug.h"
+#include "env.h"
+
+struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops;
+struct unwind_libunwind_ops __weak *arm64_unwind_libunwind_ops;
+
+static void unwind__register_ops(struct thread *thread,
+ struct unwind_libunwind_ops *ops)
+{
+ thread->unwind_libunwind_ops = ops;
+}
+
+int unwind__prepare_access(struct thread *thread, struct map *map,
+ bool *initialized)
+{
+ const char *arch;
+ enum dso_type dso_type;
+ struct unwind_libunwind_ops *ops = local_unwind_libunwind_ops;
+ int err;
+
+ if (thread->addr_space) {
+ pr_debug("unwind: thread map already set, dso=%s\n",
+ map->dso->name);
+ if (initialized)
+ *initialized = true;
+ return 0;
+ }
+
+ /* env->arch is NULL for live-mode (i.e. perf top) */
+ if (!thread->mg->machine->env || !thread->mg->machine->env->arch)
+ goto out_register;
+
+ dso_type = dso__type(map->dso, thread->mg->machine);
+ if (dso_type == DSO__TYPE_UNKNOWN)
+ return 0;
+
+ arch = perf_env__arch(thread->mg->machine->env);
+
+ if (!strcmp(arch, "x86")) {
+ if (dso_type != DSO__TYPE_64BIT)
+ ops = x86_32_unwind_libunwind_ops;
+ } else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) {
+ if (dso_type == DSO__TYPE_64BIT)
+ ops = arm64_unwind_libunwind_ops;
+ }
+
+ if (!ops) {
+ pr_err("unwind: target platform=%s is not supported\n", arch);
+ return 0;
+ }
+out_register:
+ unwind__register_ops(thread, ops);
+
+ err = thread->unwind_libunwind_ops->prepare_access(thread);
+ if (initialized)
+ *initialized = err ? false : true;
+ return err;
+}
+
+void unwind__flush_access(struct thread *thread)
+{
+ if (thread->unwind_libunwind_ops)
+ thread->unwind_libunwind_ops->flush_access(thread);
+}
+
+void unwind__finish_access(struct thread *thread)
+{
+ if (thread->unwind_libunwind_ops)
+ thread->unwind_libunwind_ops->finish_access(thread);
+}
+
+int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data, int max_stack)
+{
+ if (thread->unwind_libunwind_ops)
+ return thread->unwind_libunwind_ops->get_entries(cb, arg, thread, data, max_stack);
+ return 0;
+}
diff --git a/tools/perf/util/unwind.h b/tools/perf/util/unwind.h
new file mode 100644
index 000000000..8a44a1569
--- /dev/null
+++ b/tools/perf/util/unwind.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __UNWIND_H
+#define __UNWIND_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+struct map;
+struct perf_sample;
+struct symbol;
+struct thread;
+
+struct unwind_entry {
+ struct map *map;
+ struct symbol *sym;
+ u64 ip;
+};
+
+typedef int (*unwind_entry_cb_t)(struct unwind_entry *entry, void *arg);
+
+struct unwind_libunwind_ops {
+ int (*prepare_access)(struct thread *thread);
+ void (*flush_access)(struct thread *thread);
+ void (*finish_access)(struct thread *thread);
+ int (*get_entries)(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data, int max_stack);
+};
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+int unwind__get_entries(unwind_entry_cb_t cb, void *arg,
+ struct thread *thread,
+ struct perf_sample *data, int max_stack);
+/* libunwind specific */
+#ifdef HAVE_LIBUNWIND_SUPPORT
+#ifndef LIBUNWIND__ARCH_REG_ID
+#define LIBUNWIND__ARCH_REG_ID(regnum) libunwind__arch_reg_id(regnum)
+#endif
+
+#ifndef LIBUNWIND__ARCH_REG_SP
+#define LIBUNWIND__ARCH_REG_SP PERF_REG_SP
+#endif
+
+#ifndef LIBUNWIND__ARCH_REG_IP
+#define LIBUNWIND__ARCH_REG_IP PERF_REG_IP
+#endif
+
+int LIBUNWIND__ARCH_REG_ID(int regnum);
+int unwind__prepare_access(struct thread *thread, struct map *map,
+ bool *initialized);
+void unwind__flush_access(struct thread *thread);
+void unwind__finish_access(struct thread *thread);
+#else
+static inline int unwind__prepare_access(struct thread *thread __maybe_unused,
+ struct map *map __maybe_unused,
+ bool *initialized __maybe_unused)
+{
+ return 0;
+}
+
+static inline void unwind__flush_access(struct thread *thread __maybe_unused) {}
+static inline void unwind__finish_access(struct thread *thread __maybe_unused) {}
+#endif
+#else
+static inline int
+unwind__get_entries(unwind_entry_cb_t cb __maybe_unused,
+ void *arg __maybe_unused,
+ struct thread *thread __maybe_unused,
+ struct perf_sample *data __maybe_unused,
+ int max_stack __maybe_unused)
+{
+ return 0;
+}
+
+static inline int unwind__prepare_access(struct thread *thread __maybe_unused,
+ struct map *map __maybe_unused,
+ bool *initialized __maybe_unused)
+{
+ return 0;
+}
+
+static inline void unwind__flush_access(struct thread *thread __maybe_unused) {}
+static inline void unwind__finish_access(struct thread *thread __maybe_unused) {}
+#endif /* HAVE_DWARF_UNWIND_SUPPORT */
+#endif /* __UNWIND_H */
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
new file mode 100644
index 000000000..070d25cee
--- /dev/null
+++ b/tools/perf/util/usage.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * usage.c
+ *
+ * Various reporting routines.
+ * Originally copied from GIT source.
+ *
+ * Copyright (C) Linus Torvalds, 2005
+ */
+#include "util.h"
+#include "debug.h"
+
+static __noreturn void usage_builtin(const char *err)
+{
+ fprintf(stderr, "\n Usage: %s\n", err);
+ exit(129);
+}
+
+/* If we are in a dlopen()ed .so write to a global variable would segfault
+ * (ugh), so keep things static. */
+static void (*usage_routine)(const char *err) __noreturn = usage_builtin;
+
+void usage(const char *err)
+{
+ usage_routine(err);
+}
diff --git a/tools/perf/util/util-cxx.h b/tools/perf/util/util-cxx.h
new file mode 100644
index 000000000..80a99e458
--- /dev/null
+++ b/tools/perf/util/util-cxx.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support C++ source use utilities defined in util.h
+ */
+
+#ifndef PERF_UTIL_UTIL_CXX_H
+#define PERF_UTIL_UTIL_CXX_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Now 'new' is the only C++ keyword found in util.h:
+ * in tools/include/linux/rbtree.h
+ *
+ * Other keywords, like class and delete, should be
+ * redefined if necessary.
+ */
+#define new _new
+#include "util.h"
+#undef new
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
new file mode 100644
index 000000000..eac5b858a
--- /dev/null
+++ b/tools/perf/util/util.c
@@ -0,0 +1,508 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../perf.h"
+#include "util.h"
+#include "debug.h"
+#include <api/fs/fs.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/time64.h>
+#include <unistd.h>
+#include "strlist.h"
+
+/*
+ * XXX We need to find a better place for these things...
+ */
+
+bool perf_singlethreaded = true;
+
+void perf_set_singlethreaded(void)
+{
+ perf_singlethreaded = true;
+}
+
+void perf_set_multithreaded(void)
+{
+ perf_singlethreaded = false;
+}
+
+unsigned int page_size;
+
+#ifdef _SC_LEVEL1_DCACHE_LINESIZE
+#define cache_line_size(cacheline_sizep) *cacheline_sizep = sysconf(_SC_LEVEL1_DCACHE_LINESIZE)
+#else
+static void cache_line_size(int *cacheline_sizep)
+{
+ if (sysfs__read_int("devices/system/cpu/cpu0/cache/index0/coherency_line_size", cacheline_sizep))
+ pr_debug("cannot determine cache line size");
+}
+#endif
+
+int cacheline_size(void)
+{
+ static int size;
+
+ if (!size)
+ cache_line_size(&size);
+
+ return size;
+}
+
+int sysctl_perf_event_max_stack = PERF_MAX_STACK_DEPTH;
+int sysctl_perf_event_max_contexts_per_stack = PERF_MAX_CONTEXTS_PER_STACK;
+
+int sysctl__max_stack(void)
+{
+ int value;
+
+ if (sysctl__read_int("kernel/perf_event_max_stack", &value) == 0)
+ sysctl_perf_event_max_stack = value;
+
+ if (sysctl__read_int("kernel/perf_event_max_contexts_per_stack", &value) == 0)
+ sysctl_perf_event_max_contexts_per_stack = value;
+
+ return sysctl_perf_event_max_stack;
+}
+
+bool test_attr__enabled;
+
+bool perf_host = true;
+bool perf_guest = false;
+
+void event_attr_init(struct perf_event_attr *attr)
+{
+ if (!perf_host)
+ attr->exclude_host = 1;
+ if (!perf_guest)
+ attr->exclude_guest = 1;
+ /* to capture ABI version */
+ attr->size = sizeof(*attr);
+}
+
+int mkdir_p(char *path, mode_t mode)
+{
+ struct stat st;
+ int err;
+ char *d = path;
+
+ if (*d != '/')
+ return -1;
+
+ if (stat(path, &st) == 0)
+ return 0;
+
+ while (*++d == '/');
+
+ while ((d = strchr(d, '/'))) {
+ *d = '\0';
+ err = stat(path, &st) && mkdir(path, mode);
+ *d++ = '/';
+ if (err)
+ return -1;
+ while (*d == '/')
+ ++d;
+ }
+ return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
+}
+
+int rm_rf(const char *path)
+{
+ DIR *dir;
+ int ret = 0;
+ struct dirent *d;
+ char namebuf[PATH_MAX];
+
+ dir = opendir(path);
+ if (dir == NULL)
+ return 0;
+
+ while ((d = readdir(dir)) != NULL && !ret) {
+ struct stat statbuf;
+
+ if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+ continue;
+
+ scnprintf(namebuf, sizeof(namebuf), "%s/%s",
+ path, d->d_name);
+
+ /* We have to check symbolic link itself */
+ ret = lstat(namebuf, &statbuf);
+ if (ret < 0) {
+ pr_debug("stat failed: %s\n", namebuf);
+ break;
+ }
+
+ if (S_ISDIR(statbuf.st_mode))
+ ret = rm_rf(namebuf);
+ else
+ ret = unlink(namebuf);
+ }
+ closedir(dir);
+
+ if (ret < 0)
+ return ret;
+
+ return rmdir(path);
+}
+
+/* A filter which removes dot files */
+bool lsdir_no_dot_filter(const char *name __maybe_unused, struct dirent *d)
+{
+ return d->d_name[0] != '.';
+}
+
+/* lsdir reads a directory and store it in strlist */
+struct strlist *lsdir(const char *name,
+ bool (*filter)(const char *, struct dirent *))
+{
+ struct strlist *list = NULL;
+ DIR *dir;
+ struct dirent *d;
+
+ dir = opendir(name);
+ if (!dir)
+ return NULL;
+
+ list = strlist__new(NULL, NULL);
+ if (!list) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ while ((d = readdir(dir)) != NULL) {
+ if (!filter || filter(name, d))
+ strlist__add(list, d->d_name);
+ }
+
+out:
+ closedir(dir);
+ return list;
+}
+
+static int slow_copyfile(const char *from, const char *to, struct nsinfo *nsi)
+{
+ int err = -1;
+ char *line = NULL;
+ size_t n;
+ FILE *from_fp, *to_fp;
+ struct nscookie nsc;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ from_fp = fopen(from, "r");
+ nsinfo__mountns_exit(&nsc);
+ if (from_fp == NULL)
+ goto out;
+
+ to_fp = fopen(to, "w");
+ if (to_fp == NULL)
+ goto out_fclose_from;
+
+ while (getline(&line, &n, from_fp) > 0)
+ if (fputs(line, to_fp) == EOF)
+ goto out_fclose_to;
+ err = 0;
+out_fclose_to:
+ fclose(to_fp);
+ free(line);
+out_fclose_from:
+ fclose(from_fp);
+out:
+ return err;
+}
+
+static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
+{
+ void *ptr;
+ loff_t pgoff;
+
+ pgoff = off_in & ~(page_size - 1);
+ off_in -= pgoff;
+
+ ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff);
+ if (ptr == MAP_FAILED)
+ return -1;
+
+ while (size) {
+ ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out);
+ if (ret < 0 && errno == EINTR)
+ continue;
+ if (ret <= 0)
+ break;
+
+ size -= ret;
+ off_in += ret;
+ off_out += ret;
+ }
+ munmap(ptr, off_in + size);
+
+ return size ? -1 : 0;
+}
+
+static int copyfile_mode_ns(const char *from, const char *to, mode_t mode,
+ struct nsinfo *nsi)
+{
+ int fromfd, tofd;
+ struct stat st;
+ int err;
+ char *tmp = NULL, *ptr = NULL;
+ struct nscookie nsc;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ err = stat(from, &st);
+ nsinfo__mountns_exit(&nsc);
+ if (err)
+ goto out;
+ err = -1;
+
+ /* extra 'x' at the end is to reserve space for '.' */
+ if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) {
+ tmp = NULL;
+ goto out;
+ }
+ ptr = strrchr(tmp, '/');
+ if (!ptr)
+ goto out;
+ ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1);
+ *ptr = '.';
+
+ tofd = mkstemp(tmp);
+ if (tofd < 0)
+ goto out;
+
+ if (fchmod(tofd, mode))
+ goto out_close_to;
+
+ if (st.st_size == 0) { /* /proc? do it slowly... */
+ err = slow_copyfile(from, tmp, nsi);
+ goto out_close_to;
+ }
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ fromfd = open(from, O_RDONLY);
+ nsinfo__mountns_exit(&nsc);
+ if (fromfd < 0)
+ goto out_close_to;
+
+ err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size);
+
+ close(fromfd);
+out_close_to:
+ close(tofd);
+ if (!err)
+ err = link(tmp, to);
+ unlink(tmp);
+out:
+ free(tmp);
+ return err;
+}
+
+int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi)
+{
+ return copyfile_mode_ns(from, to, 0755, nsi);
+}
+
+int copyfile_mode(const char *from, const char *to, mode_t mode)
+{
+ return copyfile_mode_ns(from, to, mode, NULL);
+}
+
+int copyfile(const char *from, const char *to)
+{
+ return copyfile_mode(from, to, 0755);
+}
+
+static ssize_t ion(bool is_read, int fd, void *buf, size_t n)
+{
+ void *buf_start = buf;
+ size_t left = n;
+
+ while (left) {
+ /* buf must be treated as const if !is_read. */
+ ssize_t ret = is_read ? read(fd, buf, left) :
+ write(fd, buf, left);
+
+ if (ret < 0 && errno == EINTR)
+ continue;
+ if (ret <= 0)
+ return ret;
+
+ left -= ret;
+ buf += ret;
+ }
+
+ BUG_ON((size_t)(buf - buf_start) != n);
+ return n;
+}
+
+/*
+ * Read exactly 'n' bytes or return an error.
+ */
+ssize_t readn(int fd, void *buf, size_t n)
+{
+ return ion(true, fd, buf, n);
+}
+
+/*
+ * Write exactly 'n' bytes or return an error.
+ */
+ssize_t writen(int fd, const void *buf, size_t n)
+{
+ /* ion does not modify buf. */
+ return ion(false, fd, (void *)buf, n);
+}
+
+size_t hex_width(u64 v)
+{
+ size_t n = 1;
+
+ while ((v >>= 4))
+ ++n;
+
+ return n;
+}
+
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+int hex2u64(const char *ptr, u64 *long_val)
+{
+ char *p;
+
+ *long_val = strtoull(ptr, &p, 16);
+
+ return p - ptr;
+}
+
+int perf_event_paranoid(void)
+{
+ int value;
+
+ if (sysctl__read_int("kernel/perf_event_paranoid", &value))
+ return INT_MAX;
+
+ return value;
+}
+static int
+fetch_ubuntu_kernel_version(unsigned int *puint)
+{
+ ssize_t len;
+ size_t line_len = 0;
+ char *ptr, *line = NULL;
+ int version, patchlevel, sublevel, err;
+ FILE *vsig;
+
+ if (!puint)
+ return 0;
+
+ vsig = fopen("/proc/version_signature", "r");
+ if (!vsig) {
+ pr_debug("Open /proc/version_signature failed: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ len = getline(&line, &line_len, vsig);
+ fclose(vsig);
+ err = -1;
+ if (len <= 0) {
+ pr_debug("Reading from /proc/version_signature failed: %s\n",
+ strerror(errno));
+ goto errout;
+ }
+
+ ptr = strrchr(line, ' ');
+ if (!ptr) {
+ pr_debug("Parsing /proc/version_signature failed: %s\n", line);
+ goto errout;
+ }
+
+ err = sscanf(ptr + 1, "%d.%d.%d",
+ &version, &patchlevel, &sublevel);
+ if (err != 3) {
+ pr_debug("Unable to get kernel version from /proc/version_signature '%s'\n",
+ line);
+ goto errout;
+ }
+
+ *puint = (version << 16) + (patchlevel << 8) + sublevel;
+ err = 0;
+errout:
+ free(line);
+ return err;
+}
+
+int
+fetch_kernel_version(unsigned int *puint, char *str,
+ size_t str_size)
+{
+ struct utsname utsname;
+ int version, patchlevel, sublevel, err;
+ bool int_ver_ready = false;
+
+ if (access("/proc/version_signature", R_OK) == 0)
+ if (!fetch_ubuntu_kernel_version(puint))
+ int_ver_ready = true;
+
+ if (uname(&utsname))
+ return -1;
+
+ if (str && str_size) {
+ strncpy(str, utsname.release, str_size);
+ str[str_size - 1] = '\0';
+ }
+
+ if (!puint || int_ver_ready)
+ return 0;
+
+ err = sscanf(utsname.release, "%d.%d.%d",
+ &version, &patchlevel, &sublevel);
+
+ if (err != 3) {
+ pr_debug("Unable to get kernel version from uname '%s'\n",
+ utsname.release);
+ return -1;
+ }
+
+ *puint = (version << 16) + (patchlevel << 8) + sublevel;
+ return 0;
+}
+
+const char *perf_tip(const char *dirpath)
+{
+ struct strlist *tips;
+ struct str_node *node;
+ char *tip = NULL;
+ struct strlist_config conf = {
+ .dirname = dirpath,
+ .file_only = true,
+ };
+
+ tips = strlist__new("tips.txt", &conf);
+ if (tips == NULL)
+ return errno == ENOENT ? NULL :
+ "Tip: check path of tips.txt or get more memory! ;-p";
+
+ if (strlist__nr_entries(tips) == 0)
+ goto out;
+
+ node = strlist__entry(tips, random() % strlist__nr_entries(tips));
+ if (asprintf(&tip, "Tip: %s", node->s) < 0)
+ tip = (char *)"Tip: get more memory! ;-)";
+
+out:
+ strlist__delete(tips);
+
+ return tip;
+}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
new file mode 100644
index 000000000..2efec9e77
--- /dev/null
+++ b/tools/perf/util/util.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef GIT_COMPAT_UTIL_H
+#define GIT_COMPAT_UTIL_H
+
+#define _BSD_SOURCE 1
+/* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
+#define _DEFAULT_SOURCE 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <linux/compiler.h>
+#include <sys/types.h>
+
+/* General helper functions */
+void usage(const char *err) __noreturn;
+void die(const char *err, ...) __noreturn __printf(1, 2);
+
+static inline void *zalloc(size_t size)
+{
+ return calloc(1, size);
+}
+
+#define zfree(ptr) ({ free((void *)*ptr); *ptr = NULL; })
+
+struct dirent;
+struct nsinfo;
+struct strlist;
+
+int mkdir_p(char *path, mode_t mode);
+int rm_rf(const char *path);
+struct strlist *lsdir(const char *name, bool (*filter)(const char *, struct dirent *));
+bool lsdir_no_dot_filter(const char *name, struct dirent *d);
+int copyfile(const char *from, const char *to);
+int copyfile_mode(const char *from, const char *to, mode_t mode);
+int copyfile_ns(const char *from, const char *to, struct nsinfo *nsi);
+
+ssize_t readn(int fd, void *buf, size_t n);
+ssize_t writen(int fd, const void *buf, size_t n);
+
+size_t hex_width(u64 v);
+int hex2u64(const char *ptr, u64 *val);
+
+extern unsigned int page_size;
+int __pure cacheline_size(void);
+
+int sysctl__max_stack(void);
+
+int fetch_kernel_version(unsigned int *puint,
+ char *str, size_t str_sz);
+#define KVER_VERSION(x) (((x) >> 16) & 0xff)
+#define KVER_PATCHLEVEL(x) (((x) >> 8) & 0xff)
+#define KVER_SUBLEVEL(x) ((x) & 0xff)
+#define KVER_FMT "%d.%d.%d"
+#define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x)
+
+const char *perf_tip(const char *dirpath);
+
+#ifndef HAVE_GET_CURRENT_DIR_NAME
+char *get_current_dir_name(void);
+#endif
+
+#ifndef HAVE_SCHED_GETCPU_SUPPORT
+int sched_getcpu(void);
+#endif
+
+#ifndef HAVE_SETNS_SUPPORT
+int setns(int fd, int nstype);
+#endif
+
+extern bool perf_singlethreaded;
+
+void perf_set_singlethreaded(void);
+void perf_set_multithreaded(void);
+
+#ifndef O_CLOEXEC
+#ifdef __sparc__
+#define O_CLOEXEC 0x400000
+#elif defined(__alpha__) || defined(__hppa__)
+#define O_CLOEXEC 010000000
+#else
+#define O_CLOEXEC 02000000
+#endif
+#endif
+
+#endif /* GIT_COMPAT_UTIL_H */
diff --git a/tools/perf/util/values.c b/tools/perf/util/values.c
new file mode 100644
index 000000000..4b7a303e4
--- /dev/null
+++ b/tools/perf/util/values.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "util.h"
+#include "values.h"
+#include "debug.h"
+
+int perf_read_values_init(struct perf_read_values *values)
+{
+ values->threads_max = 16;
+ values->pid = malloc(values->threads_max * sizeof(*values->pid));
+ values->tid = malloc(values->threads_max * sizeof(*values->tid));
+ values->value = zalloc(values->threads_max * sizeof(*values->value));
+ if (!values->pid || !values->tid || !values->value) {
+ pr_debug("failed to allocate read_values threads arrays");
+ goto out_free_pid;
+ }
+ values->threads = 0;
+
+ values->counters_max = 16;
+ values->counterrawid = malloc(values->counters_max
+ * sizeof(*values->counterrawid));
+ values->countername = malloc(values->counters_max
+ * sizeof(*values->countername));
+ if (!values->counterrawid || !values->countername) {
+ pr_debug("failed to allocate read_values counters arrays");
+ goto out_free_counter;
+ }
+ values->counters = 0;
+
+ return 0;
+
+out_free_counter:
+ zfree(&values->counterrawid);
+ zfree(&values->countername);
+out_free_pid:
+ zfree(&values->pid);
+ zfree(&values->tid);
+ zfree(&values->value);
+ return -ENOMEM;
+}
+
+void perf_read_values_destroy(struct perf_read_values *values)
+{
+ int i;
+
+ if (!values->threads_max || !values->counters_max)
+ return;
+
+ for (i = 0; i < values->threads; i++)
+ zfree(&values->value[i]);
+ zfree(&values->value);
+ zfree(&values->pid);
+ zfree(&values->tid);
+ zfree(&values->counterrawid);
+ for (i = 0; i < values->counters; i++)
+ zfree(&values->countername[i]);
+ zfree(&values->countername);
+}
+
+static int perf_read_values__enlarge_threads(struct perf_read_values *values)
+{
+ int nthreads_max = values->threads_max * 2;
+ void *npid = realloc(values->pid, nthreads_max * sizeof(*values->pid)),
+ *ntid = realloc(values->tid, nthreads_max * sizeof(*values->tid)),
+ *nvalue = realloc(values->value, nthreads_max * sizeof(*values->value));
+
+ if (!npid || !ntid || !nvalue)
+ goto out_err;
+
+ values->threads_max = nthreads_max;
+ values->pid = npid;
+ values->tid = ntid;
+ values->value = nvalue;
+ return 0;
+out_err:
+ free(npid);
+ free(ntid);
+ free(nvalue);
+ pr_debug("failed to enlarge read_values threads arrays");
+ return -ENOMEM;
+}
+
+static int perf_read_values__findnew_thread(struct perf_read_values *values,
+ u32 pid, u32 tid)
+{
+ int i;
+
+ for (i = 0; i < values->threads; i++)
+ if (values->pid[i] == pid && values->tid[i] == tid)
+ return i;
+
+ if (values->threads == values->threads_max) {
+ i = perf_read_values__enlarge_threads(values);
+ if (i < 0)
+ return i;
+ }
+
+ i = values->threads;
+
+ values->value[i] = zalloc(values->counters_max * sizeof(**values->value));
+ if (!values->value[i]) {
+ pr_debug("failed to allocate read_values counters array");
+ return -ENOMEM;
+ }
+ values->pid[i] = pid;
+ values->tid[i] = tid;
+ values->threads = i + 1;
+
+ return i;
+}
+
+static int perf_read_values__enlarge_counters(struct perf_read_values *values)
+{
+ char **countername;
+ int i, counters_max = values->counters_max * 2;
+ u64 *counterrawid = realloc(values->counterrawid, counters_max * sizeof(*values->counterrawid));
+
+ if (!counterrawid) {
+ pr_debug("failed to enlarge read_values rawid array");
+ goto out_enomem;
+ }
+
+ countername = realloc(values->countername, counters_max * sizeof(*values->countername));
+ if (!countername) {
+ pr_debug("failed to enlarge read_values rawid array");
+ goto out_free_rawid;
+ }
+
+ for (i = 0; i < values->threads; i++) {
+ u64 *value = realloc(values->value[i], counters_max * sizeof(**values->value));
+ int j;
+
+ if (!value) {
+ pr_debug("failed to enlarge read_values ->values array");
+ goto out_free_name;
+ }
+
+ for (j = values->counters_max; j < counters_max; j++)
+ value[j] = 0;
+
+ values->value[i] = value;
+ }
+
+ values->counters_max = counters_max;
+ values->counterrawid = counterrawid;
+ values->countername = countername;
+
+ return 0;
+out_free_name:
+ free(countername);
+out_free_rawid:
+ free(counterrawid);
+out_enomem:
+ return -ENOMEM;
+}
+
+static int perf_read_values__findnew_counter(struct perf_read_values *values,
+ u64 rawid, const char *name)
+{
+ int i;
+
+ for (i = 0; i < values->counters; i++)
+ if (values->counterrawid[i] == rawid)
+ return i;
+
+ if (values->counters == values->counters_max) {
+ i = perf_read_values__enlarge_counters(values);
+ if (i)
+ return i;
+ }
+
+ i = values->counters++;
+ values->counterrawid[i] = rawid;
+ values->countername[i] = strdup(name);
+
+ return i;
+}
+
+int perf_read_values_add_value(struct perf_read_values *values,
+ u32 pid, u32 tid,
+ u64 rawid, const char *name, u64 value)
+{
+ int tindex, cindex;
+
+ tindex = perf_read_values__findnew_thread(values, pid, tid);
+ if (tindex < 0)
+ return tindex;
+ cindex = perf_read_values__findnew_counter(values, rawid, name);
+ if (cindex < 0)
+ return cindex;
+
+ values->value[tindex][cindex] += value;
+ return 0;
+}
+
+static void perf_read_values__display_pretty(FILE *fp,
+ struct perf_read_values *values)
+{
+ int i, j;
+ int pidwidth, tidwidth;
+ int *counterwidth;
+
+ counterwidth = malloc(values->counters * sizeof(*counterwidth));
+ if (!counterwidth) {
+ fprintf(fp, "INTERNAL ERROR: Failed to allocate counterwidth array\n");
+ return;
+ }
+ tidwidth = 3;
+ pidwidth = 3;
+ for (j = 0; j < values->counters; j++)
+ counterwidth[j] = strlen(values->countername[j]);
+ for (i = 0; i < values->threads; i++) {
+ int width;
+
+ width = snprintf(NULL, 0, "%d", values->pid[i]);
+ if (width > pidwidth)
+ pidwidth = width;
+ width = snprintf(NULL, 0, "%d", values->tid[i]);
+ if (width > tidwidth)
+ tidwidth = width;
+ for (j = 0; j < values->counters; j++) {
+ width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
+ if (width > counterwidth[j])
+ counterwidth[j] = width;
+ }
+ }
+
+ fprintf(fp, "# %*s %*s", pidwidth, "PID", tidwidth, "TID");
+ for (j = 0; j < values->counters; j++)
+ fprintf(fp, " %*s", counterwidth[j], values->countername[j]);
+ fprintf(fp, "\n");
+
+ for (i = 0; i < values->threads; i++) {
+ fprintf(fp, " %*d %*d", pidwidth, values->pid[i],
+ tidwidth, values->tid[i]);
+ for (j = 0; j < values->counters; j++)
+ fprintf(fp, " %*" PRIu64,
+ counterwidth[j], values->value[i][j]);
+ fprintf(fp, "\n");
+ }
+ free(counterwidth);
+}
+
+static void perf_read_values__display_raw(FILE *fp,
+ struct perf_read_values *values)
+{
+ int width, pidwidth, tidwidth, namewidth, rawwidth, countwidth;
+ int i, j;
+
+ tidwidth = 3; /* TID */
+ pidwidth = 3; /* PID */
+ namewidth = 4; /* "Name" */
+ rawwidth = 3; /* "Raw" */
+ countwidth = 5; /* "Count" */
+
+ for (i = 0; i < values->threads; i++) {
+ width = snprintf(NULL, 0, "%d", values->pid[i]);
+ if (width > pidwidth)
+ pidwidth = width;
+ width = snprintf(NULL, 0, "%d", values->tid[i]);
+ if (width > tidwidth)
+ tidwidth = width;
+ }
+ for (j = 0; j < values->counters; j++) {
+ width = strlen(values->countername[j]);
+ if (width > namewidth)
+ namewidth = width;
+ width = snprintf(NULL, 0, "%" PRIx64, values->counterrawid[j]);
+ if (width > rawwidth)
+ rawwidth = width;
+ }
+ for (i = 0; i < values->threads; i++) {
+ for (j = 0; j < values->counters; j++) {
+ width = snprintf(NULL, 0, "%" PRIu64, values->value[i][j]);
+ if (width > countwidth)
+ countwidth = width;
+ }
+ }
+
+ fprintf(fp, "# %*s %*s %*s %*s %*s\n",
+ pidwidth, "PID", tidwidth, "TID",
+ namewidth, "Name", rawwidth, "Raw",
+ countwidth, "Count");
+ for (i = 0; i < values->threads; i++)
+ for (j = 0; j < values->counters; j++)
+ fprintf(fp, " %*d %*d %*s %*" PRIx64 " %*" PRIu64,
+ pidwidth, values->pid[i],
+ tidwidth, values->tid[i],
+ namewidth, values->countername[j],
+ rawwidth, values->counterrawid[j],
+ countwidth, values->value[i][j]);
+}
+
+void perf_read_values_display(FILE *fp, struct perf_read_values *values, int raw)
+{
+ if (raw)
+ perf_read_values__display_raw(fp, values);
+ else
+ perf_read_values__display_pretty(fp, values);
+}
diff --git a/tools/perf/util/values.h b/tools/perf/util/values.h
new file mode 100644
index 000000000..8c41f22f4
--- /dev/null
+++ b/tools/perf/util/values.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_VALUES_H
+#define __PERF_VALUES_H
+
+#include <linux/types.h>
+
+struct perf_read_values {
+ int threads;
+ int threads_max;
+ u32 *pid, *tid;
+ int counters;
+ int counters_max;
+ u64 *counterrawid;
+ char **countername;
+ u64 **value;
+};
+
+int perf_read_values_init(struct perf_read_values *values);
+void perf_read_values_destroy(struct perf_read_values *values);
+
+int perf_read_values_add_value(struct perf_read_values *values,
+ u32 pid, u32 tid,
+ u64 rawid, const char *name, u64 value);
+
+void perf_read_values_display(FILE *fp, struct perf_read_values *values,
+ int raw);
+
+#endif /* __PERF_VALUES_H */
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
new file mode 100644
index 000000000..741af209b
--- /dev/null
+++ b/tools/perf/util/vdso.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <linux/kernel.h>
+
+#include "vdso.h"
+#include "util.h"
+#include "symbol.h"
+#include "machine.h"
+#include "thread.h"
+#include "linux/string.h"
+#include "debug.h"
+
+/*
+ * Include definition of find_vdso_map() also used in perf-read-vdso.c for
+ * building perf-read-vdso32 and perf-read-vdsox32.
+ */
+#include "find-vdso-map.c"
+
+#define VDSO__TEMP_FILE_NAME "/tmp/perf-vdso.so-XXXXXX"
+
+struct vdso_file {
+ bool found;
+ bool error;
+ char temp_file_name[sizeof(VDSO__TEMP_FILE_NAME)];
+ const char *dso_name;
+ const char *read_prog;
+};
+
+struct vdso_info {
+ struct vdso_file vdso;
+#if BITS_PER_LONG == 64
+ struct vdso_file vdso32;
+ struct vdso_file vdsox32;
+#endif
+};
+
+static struct vdso_info *vdso_info__new(void)
+{
+ static const struct vdso_info vdso_info_init = {
+ .vdso = {
+ .temp_file_name = VDSO__TEMP_FILE_NAME,
+ .dso_name = DSO__NAME_VDSO,
+ },
+#if BITS_PER_LONG == 64
+ .vdso32 = {
+ .temp_file_name = VDSO__TEMP_FILE_NAME,
+ .dso_name = DSO__NAME_VDSO32,
+ .read_prog = "perf-read-vdso32",
+ },
+ .vdsox32 = {
+ .temp_file_name = VDSO__TEMP_FILE_NAME,
+ .dso_name = DSO__NAME_VDSOX32,
+ .read_prog = "perf-read-vdsox32",
+ },
+#endif
+ };
+
+ return memdup(&vdso_info_init, sizeof(vdso_info_init));
+}
+
+static char *get_file(struct vdso_file *vdso_file)
+{
+ char *vdso = NULL;
+ char *buf = NULL;
+ void *start, *end;
+ size_t size;
+ int fd;
+
+ if (vdso_file->found)
+ return vdso_file->temp_file_name;
+
+ if (vdso_file->error || find_vdso_map(&start, &end))
+ return NULL;
+
+ size = end - start;
+
+ buf = memdup(start, size);
+ if (!buf)
+ return NULL;
+
+ fd = mkstemp(vdso_file->temp_file_name);
+ if (fd < 0)
+ goto out;
+
+ if (size == (size_t) write(fd, buf, size))
+ vdso = vdso_file->temp_file_name;
+
+ close(fd);
+
+ out:
+ free(buf);
+
+ vdso_file->found = (vdso != NULL);
+ vdso_file->error = !vdso_file->found;
+ return vdso;
+}
+
+void machine__exit_vdso(struct machine *machine)
+{
+ struct vdso_info *vdso_info = machine->vdso_info;
+
+ if (!vdso_info)
+ return;
+
+ if (vdso_info->vdso.found)
+ unlink(vdso_info->vdso.temp_file_name);
+#if BITS_PER_LONG == 64
+ if (vdso_info->vdso32.found)
+ unlink(vdso_info->vdso32.temp_file_name);
+ if (vdso_info->vdsox32.found)
+ unlink(vdso_info->vdsox32.temp_file_name);
+#endif
+
+ zfree(&machine->vdso_info);
+}
+
+static struct dso *__machine__addnew_vdso(struct machine *machine, const char *short_name,
+ const char *long_name)
+{
+ struct dso *dso;
+
+ dso = dso__new(short_name);
+ if (dso != NULL) {
+ __dsos__add(&machine->dsos, dso);
+ dso__set_long_name(dso, long_name, false);
+ }
+
+ return dso;
+}
+
+static enum dso_type machine__thread_dso_type(struct machine *machine,
+ struct thread *thread)
+{
+ enum dso_type dso_type = DSO__TYPE_UNKNOWN;
+ struct map *map = map_groups__first(thread->mg);
+
+ for (; map ; map = map_groups__next(map)) {
+ struct dso *dso = map->dso;
+ if (!dso || dso->long_name[0] != '/')
+ continue;
+ dso_type = dso__type(dso, machine);
+ if (dso_type != DSO__TYPE_UNKNOWN)
+ break;
+ }
+
+ return dso_type;
+}
+
+#if BITS_PER_LONG == 64
+
+static int vdso__do_copy_compat(FILE *f, int fd)
+{
+ char buf[4096];
+ size_t count;
+
+ while (1) {
+ count = fread(buf, 1, sizeof(buf), f);
+ if (ferror(f))
+ return -errno;
+ if (feof(f))
+ break;
+ if (count && writen(fd, buf, count) != (ssize_t)count)
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int vdso__copy_compat(const char *prog, int fd)
+{
+ FILE *f;
+ int err;
+
+ f = popen(prog, "r");
+ if (!f)
+ return -errno;
+
+ err = vdso__do_copy_compat(f, fd);
+
+ if (pclose(f) == -1)
+ return -errno;
+
+ return err;
+}
+
+static int vdso__create_compat_file(const char *prog, char *temp_name)
+{
+ int fd, err;
+
+ fd = mkstemp(temp_name);
+ if (fd < 0)
+ return -errno;
+
+ err = vdso__copy_compat(prog, fd);
+
+ if (close(fd) == -1)
+ return -errno;
+
+ return err;
+}
+
+static const char *vdso__get_compat_file(struct vdso_file *vdso_file)
+{
+ int err;
+
+ if (vdso_file->found)
+ return vdso_file->temp_file_name;
+
+ if (vdso_file->error)
+ return NULL;
+
+ err = vdso__create_compat_file(vdso_file->read_prog,
+ vdso_file->temp_file_name);
+ if (err) {
+ pr_err("%s failed, error %d\n", vdso_file->read_prog, err);
+ vdso_file->error = true;
+ return NULL;
+ }
+
+ vdso_file->found = true;
+
+ return vdso_file->temp_file_name;
+}
+
+static struct dso *__machine__findnew_compat(struct machine *machine,
+ struct vdso_file *vdso_file)
+{
+ const char *file_name;
+ struct dso *dso;
+
+ dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true);
+ if (dso)
+ goto out;
+
+ file_name = vdso__get_compat_file(vdso_file);
+ if (!file_name)
+ goto out;
+
+ dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
+out:
+ return dso;
+}
+
+static int __machine__findnew_vdso_compat(struct machine *machine,
+ struct thread *thread,
+ struct vdso_info *vdso_info,
+ struct dso **dso)
+{
+ enum dso_type dso_type;
+
+ dso_type = machine__thread_dso_type(machine, thread);
+
+#ifndef HAVE_PERF_READ_VDSO32
+ if (dso_type == DSO__TYPE_32BIT)
+ return 0;
+#endif
+#ifndef HAVE_PERF_READ_VDSOX32
+ if (dso_type == DSO__TYPE_X32BIT)
+ return 0;
+#endif
+
+ switch (dso_type) {
+ case DSO__TYPE_32BIT:
+ *dso = __machine__findnew_compat(machine, &vdso_info->vdso32);
+ return 1;
+ case DSO__TYPE_X32BIT:
+ *dso = __machine__findnew_compat(machine, &vdso_info->vdsox32);
+ return 1;
+ case DSO__TYPE_UNKNOWN:
+ case DSO__TYPE_64BIT:
+ default:
+ return 0;
+ }
+}
+
+#endif
+
+static struct dso *machine__find_vdso(struct machine *machine,
+ struct thread *thread)
+{
+ struct dso *dso = NULL;
+ enum dso_type dso_type;
+
+ dso_type = machine__thread_dso_type(machine, thread);
+ switch (dso_type) {
+ case DSO__TYPE_32BIT:
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO32, true);
+ if (!dso) {
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO,
+ true);
+ if (dso && dso_type != dso__type(dso, machine))
+ dso = NULL;
+ }
+ break;
+ case DSO__TYPE_X32BIT:
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSOX32, true);
+ break;
+ case DSO__TYPE_64BIT:
+ case DSO__TYPE_UNKNOWN:
+ default:
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+ break;
+ }
+
+ return dso;
+}
+
+struct dso *machine__findnew_vdso(struct machine *machine,
+ struct thread *thread)
+{
+ struct vdso_info *vdso_info;
+ struct dso *dso = NULL;
+
+ down_write(&machine->dsos.lock);
+ if (!machine->vdso_info)
+ machine->vdso_info = vdso_info__new();
+
+ vdso_info = machine->vdso_info;
+ if (!vdso_info)
+ goto out_unlock;
+
+ dso = machine__find_vdso(machine, thread);
+ if (dso)
+ goto out_unlock;
+
+#if BITS_PER_LONG == 64
+ if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
+ goto out_unlock;
+#endif
+
+ dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+ if (!dso) {
+ char *file;
+
+ file = get_file(&vdso_info->vdso);
+ if (file)
+ dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
+ }
+
+out_unlock:
+ dso__get(dso);
+ up_write(&machine->dsos.lock);
+ return dso;
+}
+
+bool dso__is_vdso(struct dso *dso)
+{
+ return !strcmp(dso->short_name, DSO__NAME_VDSO) ||
+ !strcmp(dso->short_name, DSO__NAME_VDSO32) ||
+ !strcmp(dso->short_name, DSO__NAME_VDSOX32);
+}
diff --git a/tools/perf/util/vdso.h b/tools/perf/util/vdso.h
new file mode 100644
index 000000000..bc74ace60
--- /dev/null
+++ b/tools/perf/util/vdso.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_VDSO__
+#define __PERF_VDSO__
+
+#include <linux/types.h>
+#include <string.h>
+#include <stdbool.h>
+
+#define VDSO__MAP_NAME "[vdso]"
+
+#define DSO__NAME_VDSO "[vdso]"
+#define DSO__NAME_VDSO32 "[vdso32]"
+#define DSO__NAME_VDSOX32 "[vdsox32]"
+
+static inline bool is_vdso_map(const char *filename)
+{
+ return !strcmp(filename, VDSO__MAP_NAME);
+}
+
+struct dso;
+
+bool dso__is_vdso(struct dso *dso);
+
+struct machine;
+struct thread;
+
+struct dso *machine__findnew_vdso(struct machine *machine, struct thread *thread);
+void machine__exit_vdso(struct machine *machine);
+
+#endif /* __PERF_VDSO__ */
diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c
new file mode 100644
index 000000000..dc95154f5
--- /dev/null
+++ b/tools/perf/util/xyarray.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "xyarray.h"
+#include "util.h"
+#include <stdlib.h>
+#include <string.h>
+
+struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size)
+{
+ size_t row_size = ylen * entry_size;
+ struct xyarray *xy = zalloc(sizeof(*xy) + xlen * row_size);
+
+ if (xy != NULL) {
+ xy->entry_size = entry_size;
+ xy->row_size = row_size;
+ xy->entries = xlen * ylen;
+ xy->max_x = xlen;
+ xy->max_y = ylen;
+ }
+
+ return xy;
+}
+
+void xyarray__reset(struct xyarray *xy)
+{
+ size_t n = xy->entries * xy->entry_size;
+
+ memset(xy->contents, 0, n);
+}
+
+void xyarray__delete(struct xyarray *xy)
+{
+ free(xy);
+}
diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h
new file mode 100644
index 000000000..2627b038b
--- /dev/null
+++ b/tools/perf/util/xyarray.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PERF_XYARRAY_H_
+#define _PERF_XYARRAY_H_ 1
+
+#include <linux/compiler.h>
+#include <sys/types.h>
+
+struct xyarray {
+ size_t row_size;
+ size_t entry_size;
+ size_t entries;
+ size_t max_x;
+ size_t max_y;
+ char contents[] __aligned(8);
+};
+
+struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
+void xyarray__delete(struct xyarray *xy);
+void xyarray__reset(struct xyarray *xy);
+
+static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
+{
+ return &xy->contents[x * xy->row_size + y * xy->entry_size];
+}
+
+static inline int xyarray__max_y(struct xyarray *xy)
+{
+ return xy->max_y;
+}
+
+static inline int xyarray__max_x(struct xyarray *xy)
+{
+ return xy->max_x;
+}
+
+#endif /* _PERF_XYARRAY_H_ */
diff --git a/tools/perf/util/zlib.c b/tools/perf/util/zlib.c
new file mode 100644
index 000000000..902ce6384
--- /dev/null
+++ b/tools/perf/util/zlib.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <zlib.h>
+#include <linux/compiler.h>
+#include <unistd.h>
+
+#include "util/compress.h"
+#include "util/util.h"
+#include "util/debug.h"
+
+
+#define CHUNK_SIZE 16384
+
+int gzip_decompress_to_file(const char *input, int output_fd)
+{
+ int ret = Z_STREAM_ERROR;
+ int input_fd;
+ void *ptr;
+ int len;
+ struct stat stbuf;
+ unsigned char buf[CHUNK_SIZE];
+ z_stream zs = {
+ .zalloc = Z_NULL,
+ .zfree = Z_NULL,
+ .opaque = Z_NULL,
+ .avail_in = 0,
+ .next_in = Z_NULL,
+ };
+
+ input_fd = open(input, O_RDONLY);
+ if (input_fd < 0)
+ return -1;
+
+ if (fstat(input_fd, &stbuf) < 0)
+ goto out_close;
+
+ ptr = mmap(NULL, stbuf.st_size, PROT_READ, MAP_PRIVATE, input_fd, 0);
+ if (ptr == MAP_FAILED)
+ goto out_close;
+
+ if (inflateInit2(&zs, 16 + MAX_WBITS) != Z_OK)
+ goto out_unmap;
+
+ zs.next_in = ptr;
+ zs.avail_in = stbuf.st_size;
+
+ do {
+ zs.next_out = buf;
+ zs.avail_out = CHUNK_SIZE;
+
+ ret = inflate(&zs, Z_NO_FLUSH);
+ switch (ret) {
+ case Z_NEED_DICT:
+ ret = Z_DATA_ERROR;
+ /* fall through */
+ case Z_DATA_ERROR:
+ case Z_MEM_ERROR:
+ goto out;
+ default:
+ break;
+ }
+
+ len = CHUNK_SIZE - zs.avail_out;
+ if (writen(output_fd, buf, len) != len) {
+ ret = Z_DATA_ERROR;
+ goto out;
+ }
+
+ } while (ret != Z_STREAM_END);
+
+out:
+ inflateEnd(&zs);
+out_unmap:
+ munmap(ptr, stbuf.st_size);
+out_close:
+ close(input_fd);
+
+ return ret == Z_STREAM_END ? 0 : -1;
+}
+
+bool gzip_is_compressed(const char *input)
+{
+ int fd = open(input, O_RDONLY);
+ const uint8_t magic[2] = { 0x1f, 0x8b };
+ char buf[2] = { 0 };
+ ssize_t rc;
+
+ if (fd < 0)
+ return -1;
+
+ rc = read(fd, buf, sizeof(buf));
+ close(fd);
+ return rc == sizeof(buf) ?
+ memcmp(buf, magic, sizeof(buf)) == 0 : false;
+}
diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore
new file mode 100644
index 000000000..cba3d9949
--- /dev/null
+++ b/tools/power/acpi/.gitignore
@@ -0,0 +1,4 @@
+acpidbg
+acpidump
+ec
+include
diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile
new file mode 100644
index 000000000..a8bf90815
--- /dev/null
+++ b/tools/power/acpi/Makefile
@@ -0,0 +1,27 @@
+# tools/power/acpi/Makefile - ACPI tool Makefile
+#
+# Copyright (c) 2013, Intel Corporation
+# Author: Lv Zheng <lv.zheng@intel.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License.
+
+include ../../scripts/Makefile.include
+
+all: acpidbg acpidump ec
+clean: acpidbg_clean acpidump_clean ec_clean
+install: acpidbg_install acpidump_install ec_install
+uninstall: acpidbg_uninstall acpidump_uninstall ec_uninstall
+
+acpidbg acpidump ec: FORCE
+ $(call descend,tools/$@,all)
+acpidbg_clean acpidump_clean ec_clean:
+ $(call descend,tools/$(@:_clean=),clean)
+acpidbg_install acpidump_install ec_install:
+ $(call descend,tools/$(@:_install=),install)
+acpidbg_uninstall acpidump_uninstall ec_uninstall:
+ $(call descend,tools/$(@:_uninstall=),uninstall)
+
+.PHONY: FORCE
diff --git a/tools/power/acpi/Makefile.config b/tools/power/acpi/Makefile.config
new file mode 100644
index 000000000..32ff7baf3
--- /dev/null
+++ b/tools/power/acpi/Makefile.config
@@ -0,0 +1,91 @@
+# tools/power/acpi/Makefile.config - ACPI tool Makefile
+#
+# Copyright (c) 2015, Intel Corporation
+# Author: Lv Zheng <lv.zheng@intel.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License.
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+include $(srctree)/../../scripts/Makefile.include
+
+OUTPUT=$(srctree)/
+ifeq ("$(origin O)", "command line")
+ OUTPUT := $(O)/tools/power/acpi/
+endif
+#$(info Determined 'OUTPUT' to be $(OUTPUT))
+
+# --- CONFIGURATION BEGIN ---
+
+# Set the following to `true' to make a unstripped, unoptimized
+# binary. Leave this set to `false' for production use.
+DEBUG ?= true
+
+# make the build silent. Set this to something else to make it noisy again.
+V ?= false
+
+# Prefix to the directories we're installing to
+DESTDIR ?=
+
+# --- CONFIGURATION END ---
+
+# Directory definitions. These are default and most probably
+# do not need to be changed. Please note that DESTDIR is
+# added in front of any of them
+
+bindir ?= /usr/bin
+sbindir ?= /usr/sbin
+mandir ?= /usr/man
+
+# Toolchain: what tools do we use, and what options do they need:
+
+INSTALL = /usr/bin/install -c
+INSTALL_PROGRAM = ${INSTALL}
+INSTALL_DATA = ${INSTALL} -m 644
+INSTALL_SCRIPT = ${INSTALL_PROGRAM}
+
+# If you are running a cross compiler, you may want to set this
+# to something more interesting, like "arm-linux-". If you want
+# to compile vs uClibc, that can be done here as well.
+CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
+CROSS_COMPILE ?= $(CROSS)
+LD = $(CC)
+
+# check if compiler option is supported
+cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
+
+# use '-Os' optimization if available, else use -O2
+OPTIMIZATION := $(call cc-supports,-Os,-O2)
+
+WARNINGS := -Wall
+WARNINGS += $(call cc-supports,-Wstrict-prototypes)
+WARNINGS += $(call cc-supports,-Wdeclaration-after-statement)
+
+KERNEL_INCLUDE := $(OUTPUT)include
+ACPICA_INCLUDE := $(srctree)/../../../drivers/acpi/acpica
+CFLAGS += -D_LINUX -I$(KERNEL_INCLUDE) -I$(ACPICA_INCLUDE)
+CFLAGS += $(WARNINGS)
+
+ifeq ($(strip $(V)),false)
+ QUIET=@
+ ECHO=@echo
+else
+ QUIET=
+ ECHO=@\#
+endif
+
+# if DEBUG is enabled, then we do not strip or optimize
+ifeq ($(strip $(DEBUG)),true)
+ CFLAGS += -O1 -g -DDEBUG
+ STRIPCMD = /bin/true -Since_we_are_debugging
+else
+ CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer
+ STRIPCMD = $(STRIP) -s --remove-section=.note --remove-section=.comment
+endif
diff --git a/tools/power/acpi/Makefile.rules b/tools/power/acpi/Makefile.rules
new file mode 100644
index 000000000..373738338
--- /dev/null
+++ b/tools/power/acpi/Makefile.rules
@@ -0,0 +1,51 @@
+# tools/power/acpi/Makefile.rules - ACPI tool Makefile
+#
+# Copyright (c) 2015, Intel Corporation
+# Author: Lv Zheng <lv.zheng@intel.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License.
+
+objdir := $(OUTPUT)tools/$(TOOL)/
+toolobjs := $(addprefix $(objdir),$(TOOL_OBJS))
+$(OUTPUT)$(TOOL): $(toolobjs) FORCE
+ $(ECHO) " LD " $(subst $(OUTPUT),,$@)
+ $(QUIET) $(LD) $(CFLAGS) $(LDFLAGS) $(toolobjs) -L$(OUTPUT) -o $@
+ $(ECHO) " STRIP " $(subst $(OUTPUT),,$@)
+ $(QUIET) $(STRIPCMD) $@
+
+$(KERNEL_INCLUDE):
+ $(ECHO) " MKDIR " $(subst $(OUTPUT),,$@)
+ $(QUIET) mkdir -p $(KERNEL_INCLUDE)
+ $(ECHO) " CP " $(subst $(OUTPUT),,$@)
+ $(QUIET) cp -rf $(srctree)/../../../include/acpi $(KERNEL_INCLUDE)/
+
+$(objdir)%.o: %.c $(KERNEL_INCLUDE)
+ $(ECHO) " CC " $(subst $(OUTPUT),,$@)
+ $(QUIET) $(CC) -c $(CFLAGS) -o $@ $<
+
+all: $(OUTPUT)$(TOOL)
+clean:
+ $(ECHO) " RMOBJ " $(subst $(OUTPUT),,$(objdir))
+ $(QUIET) find $(objdir) \( -not -type d \)\
+ -and \( -name '*~' -o -name '*.[oas]' \)\
+ -type f -print | xargs rm -f
+ $(ECHO) " RM " $(TOOL)
+ $(QUIET) rm -f $(OUTPUT)$(TOOL)
+ $(ECHO) " RMINC " $(subst $(OUTPUT),,$(KERNEL_INCLUDE))
+ $(QUIET) rm -rf $(KERNEL_INCLUDE)
+
+install-tools:
+ $(ECHO) " INST " $(TOOL)
+ $(QUIET) $(INSTALL) -d $(DESTDIR)$(sbindir)
+ $(QUIET) $(INSTALL_PROGRAM) $(OUTPUT)$(TOOL) $(DESTDIR)$(sbindir)
+uninstall-tools:
+ $(ECHO) " UNINST " $(TOOL)
+ $(QUIET) rm -f $(DESTDIR)$(sbindir)/$(TOOL)
+
+install: all install-tools $(EXTRA_INSTALL)
+uninstall: uninstall-tools $(EXTRA_UNINSTALL)
+
+.PHONY: FORCE
diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c
new file mode 100644
index 000000000..ff8025de8
--- /dev/null
+++ b/tools/power/acpi/common/cmfsize.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/******************************************************************************
+ *
+ * Module Name: cfsize - Common get file size function
+ *
+ * Copyright (C) 2000 - 2018, Intel Corp.
+ *
+ *****************************************************************************/
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+#include "acapps.h"
+
+#define _COMPONENT ACPI_TOOLS
+ACPI_MODULE_NAME("cmfsize")
+
+/*******************************************************************************
+ *
+ * FUNCTION: cm_get_file_size
+ *
+ * PARAMETERS: file - Open file descriptor
+ *
+ * RETURN: File Size. On error, -1 (ACPI_UINT32_MAX)
+ *
+ * DESCRIPTION: Get the size of a file. Uses seek-to-EOF. File must be open.
+ * Does not disturb the current file pointer.
+ *
+ ******************************************************************************/
+u32 cm_get_file_size(ACPI_FILE file)
+{
+ long file_size;
+ long current_offset;
+ acpi_status status;
+
+ /* Save the current file pointer, seek to EOF to obtain file size */
+
+ current_offset = ftell(file);
+ if (current_offset < 0) {
+ goto offset_error;
+ }
+
+ status = fseek(file, 0, SEEK_END);
+ if (ACPI_FAILURE(status)) {
+ goto seek_error;
+ }
+
+ file_size = ftell(file);
+ if (file_size < 0) {
+ goto offset_error;
+ }
+
+ /* Restore original file pointer */
+
+ status = fseek(file, current_offset, SEEK_SET);
+ if (ACPI_FAILURE(status)) {
+ goto seek_error;
+ }
+
+ return ((u32)file_size);
+
+offset_error:
+ fprintf(stderr, "Could not get file offset\n");
+ return (ACPI_UINT32_MAX);
+
+seek_error:
+ fprintf(stderr, "Could not set file offset\n");
+ return (ACPI_UINT32_MAX);
+}
diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c
new file mode 100644
index 000000000..7be89b873
--- /dev/null
+++ b/tools/power/acpi/common/getopt.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/******************************************************************************
+ *
+ * Module Name: getopt
+ *
+ * Copyright (C) 2000 - 2018, Intel Corp.
+ *
+ *****************************************************************************/
+
+/*
+ * ACPICA getopt() implementation
+ *
+ * Option strings:
+ * "f" - Option has no arguments
+ * "f:" - Option requires an argument
+ * "f+" - Option has an optional argument
+ * "f^" - Option has optional single-char sub-options
+ * "f|" - Option has required single-char sub-options
+ */
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+#include "acapps.h"
+
+#define ACPI_OPTION_ERROR(msg, badchar) \
+ if (acpi_gbl_opterr) {fprintf (stderr, "%s%c\n", msg, badchar);}
+
+int acpi_gbl_opterr = 1;
+int acpi_gbl_optind = 1;
+int acpi_gbl_sub_opt_char = 0;
+char *acpi_gbl_optarg;
+
+static int current_char_ptr = 1;
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_getopt_argument
+ *
+ * PARAMETERS: argc, argv - from main
+ *
+ * RETURN: 0 if an argument was found, -1 otherwise. Sets acpi_gbl_Optarg
+ * to point to the next argument.
+ *
+ * DESCRIPTION: Get the next argument. Used to obtain arguments for the
+ * two-character options after the original call to acpi_getopt.
+ * Note: Either the argument starts at the next character after
+ * the option, or it is pointed to by the next argv entry.
+ * (After call to acpi_getopt, we need to backup to the previous
+ * argv entry).
+ *
+ ******************************************************************************/
+
+int acpi_getopt_argument(int argc, char **argv)
+{
+
+ acpi_gbl_optind--;
+ current_char_ptr++;
+
+ if (argv[acpi_gbl_optind][(int)(current_char_ptr + 1)] != '\0') {
+ acpi_gbl_optarg =
+ &argv[acpi_gbl_optind++][(int)(current_char_ptr + 1)];
+ } else if (++acpi_gbl_optind >= argc) {
+ ACPI_OPTION_ERROR("\nOption requires an argument", 0);
+
+ current_char_ptr = 1;
+ return (-1);
+ } else {
+ acpi_gbl_optarg = argv[acpi_gbl_optind++];
+ }
+
+ current_char_ptr = 1;
+ return (0);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_getopt
+ *
+ * PARAMETERS: argc, argv - from main
+ * opts - options info list
+ *
+ * RETURN: Option character or ACPI_OPT_END
+ *
+ * DESCRIPTION: Get the next option
+ *
+ ******************************************************************************/
+
+int acpi_getopt(int argc, char **argv, char *opts)
+{
+ int current_char;
+ char *opts_ptr;
+
+ if (current_char_ptr == 1) {
+ if (acpi_gbl_optind >= argc ||
+ argv[acpi_gbl_optind][0] != '-' ||
+ argv[acpi_gbl_optind][1] == '\0') {
+ return (ACPI_OPT_END);
+ } else if (strcmp(argv[acpi_gbl_optind], "--") == 0) {
+ acpi_gbl_optind++;
+ return (ACPI_OPT_END);
+ }
+ }
+
+ /* Get the option */
+
+ current_char = argv[acpi_gbl_optind][current_char_ptr];
+
+ /* Make sure that the option is legal */
+
+ if (current_char == ':' ||
+ (opts_ptr = strchr(opts, current_char)) == NULL) {
+ ACPI_OPTION_ERROR("Illegal option: -", current_char);
+
+ if (argv[acpi_gbl_optind][++current_char_ptr] == '\0') {
+ acpi_gbl_optind++;
+ current_char_ptr = 1;
+ }
+
+ return ('?');
+ }
+
+ /* Option requires an argument? */
+
+ if (*++opts_ptr == ':') {
+ if (argv[acpi_gbl_optind][(int)(current_char_ptr + 1)] != '\0') {
+ acpi_gbl_optarg =
+ &argv[acpi_gbl_optind++][(int)
+ (current_char_ptr + 1)];
+ } else if (++acpi_gbl_optind >= argc) {
+ ACPI_OPTION_ERROR("Option requires an argument: -",
+ current_char);
+
+ current_char_ptr = 1;
+ return ('?');
+ } else {
+ acpi_gbl_optarg = argv[acpi_gbl_optind++];
+ }
+
+ current_char_ptr = 1;
+ }
+
+ /* Option has an optional argument? */
+
+ else if (*opts_ptr == '+') {
+ if (argv[acpi_gbl_optind][(int)(current_char_ptr + 1)] != '\0') {
+ acpi_gbl_optarg =
+ &argv[acpi_gbl_optind++][(int)
+ (current_char_ptr + 1)];
+ } else if (++acpi_gbl_optind >= argc) {
+ acpi_gbl_optarg = NULL;
+ } else {
+ acpi_gbl_optarg = argv[acpi_gbl_optind++];
+ }
+
+ current_char_ptr = 1;
+ }
+
+ /* Option has optional single-char arguments? */
+
+ else if (*opts_ptr == '^') {
+ if (argv[acpi_gbl_optind][(int)(current_char_ptr + 1)] != '\0') {
+ acpi_gbl_optarg =
+ &argv[acpi_gbl_optind][(int)(current_char_ptr + 1)];
+ } else {
+ acpi_gbl_optarg = "^";
+ }
+
+ acpi_gbl_sub_opt_char = acpi_gbl_optarg[0];
+ acpi_gbl_optind++;
+ current_char_ptr = 1;
+ }
+
+ /* Option has a required single-char argument? */
+
+ else if (*opts_ptr == '|') {
+ if (argv[acpi_gbl_optind][(int)(current_char_ptr + 1)] != '\0') {
+ acpi_gbl_optarg =
+ &argv[acpi_gbl_optind][(int)(current_char_ptr + 1)];
+ } else {
+ ACPI_OPTION_ERROR
+ ("Option requires a single-character suboption: -",
+ current_char);
+
+ current_char_ptr = 1;
+ return ('?');
+ }
+
+ acpi_gbl_sub_opt_char = acpi_gbl_optarg[0];
+ acpi_gbl_optind++;
+ current_char_ptr = 1;
+ }
+
+ /* Option with no arguments */
+
+ else {
+ if (argv[acpi_gbl_optind][++current_char_ptr] == '\0') {
+ current_char_ptr = 1;
+ acpi_gbl_optind++;
+ }
+
+ acpi_gbl_optarg = NULL;
+ }
+
+ return (current_char);
+}
diff --git a/tools/power/acpi/man/acpidump.8 b/tools/power/acpi/man/acpidump.8
new file mode 100644
index 000000000..79e2d1d43
--- /dev/null
+++ b/tools/power/acpi/man/acpidump.8
@@ -0,0 +1,127 @@
+.TH ACPIDUMP 8
+.SH NAME
+acpidump \- dump a system's ACPI tables to an ASCII file
+
+.SH SYNOPSIS
+.B acpidump
+.RI [ options ]
+.br
+
+.SH DESCRIPTION
+.B acpidump
+dumps the systems ACPI tables to an ASCII file appropriate for
+attaching to a bug report.
+
+Subsequently, they can be processed by utilities in the ACPICA package.
+
+.SH OPTIONS
+acpidump options are as follow:
+.TP
+.B Options
+.TP
+.B \-b
+Dump tables to binary files
+.TP
+.B \-h \-?
+This help message
+.TP
+.B \-o <File>
+Redirect output to file
+.TP
+.B \-r <Address>
+Dump tables from specified RSDP
+.TP
+.B \-s
+Print table summaries only
+.TP
+.B \-v
+Display version information
+.TP
+.B \-z
+Verbose mode
+.TP
+.B Table Options
+.TP
+.B \-a <Address>
+Get table via a physical address
+.TP
+.B \-c <on|off>
+Turning on/off customized table dumping
+.TP
+.B \-f <BinaryFile>
+Get table via a binary file
+.TP
+.B \-n <Signature>
+Get table via a name/signature
+.TP
+.B \-x
+Do not use but dump XSDT
+.TP
+.B \-x \-x
+Do not use or dump XSDT
+.TP
+.fi
+Invocation without parameters dumps all available tables.
+.TP
+Multiple mixed instances of -a, -f, and -n are supported.
+
+.SH EXAMPLES
+
+.nf
+# acpidump > acpidump.out
+
+$ acpixtract -a acpidump.out
+ Acpi table [DSDT] - 15974 bytes written to DSDT.dat
+ Acpi table [FACS] - 64 bytes written to FACS.dat
+ Acpi table [FACP] - 116 bytes written to FACP.dat
+ Acpi table [APIC] - 120 bytes written to APIC.dat
+ Acpi table [MCFG] - 60 bytes written to MCFG.dat
+ Acpi table [SSDT] - 444 bytes written to SSDT1.dat
+ Acpi table [SSDT] - 439 bytes written to SSDT2.dat
+ Acpi table [SSDT] - 439 bytes written to SSDT3.dat
+ Acpi table [SSDT] - 439 bytes written to SSDT4.dat
+ Acpi table [SSDT] - 439 bytes written to SSDT5.dat
+ Acpi table [RSDT] - 76 bytes written to RSDT.dat
+ Acpi table [RSDP] - 20 bytes written to RSDP.dat
+
+$ iasl -d *.dat
+...
+.fi
+creates *.dsl, a human readable form which can be edited
+and compiled using iasl.
+
+
+.SH NOTES
+
+.B "acpidump "
+must be run as root.
+
+.SH REFERENCES
+ACPICA: https://acpica.org/
+
+.SH FILES
+.ta
+.nf
+/dev/mem
+/sys/firmware/acpi/tables/*
+/sys/firmware/acpi/tables/dynamic/*
+/sys/firmware/efi/systab
+.fi
+
+.SH AUTHOR
+.TP
+Original by:
+ Len Brown <len.brown@intel.com>
+.TP
+Written by:
+ Chao Guan <chao.guan@intel.com>
+.TP
+Updated by:
+ Bob Moore <robert.moore@intel.com>
+ Lv Zheng <lv.zheng@intel.com>
+
+.SH SEE ALSO
+\&\fIacpixtract\fR\|(8), \fIiasl\fR\|(8).
+
+.SH COPYRIGHT
+COPYRIGHT (c) 2013, Intel Corporation.
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
new file mode 100644
index 000000000..a20c703f8
--- /dev/null
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -0,0 +1,1372 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/******************************************************************************
+ *
+ * Module Name: oslinuxtbl - Linux OSL for obtaining ACPI tables
+ *
+ * Copyright (C) 2000 - 2018, Intel Corp.
+ *
+ *****************************************************************************/
+
+#include "acpidump.h"
+
+#define _COMPONENT ACPI_OS_SERVICES
+ACPI_MODULE_NAME("oslinuxtbl")
+
+#ifndef PATH_MAX
+#define PATH_MAX 256
+#endif
+/* List of information about obtained ACPI tables */
+typedef struct osl_table_info {
+ struct osl_table_info *next;
+ u32 instance;
+ char signature[ACPI_NAME_SIZE];
+
+} osl_table_info;
+
+/* Local prototypes */
+
+static acpi_status osl_table_initialize(void);
+
+static acpi_status
+osl_table_name_from_file(char *filename, char *signature, u32 *instance);
+
+static acpi_status osl_add_table_to_list(char *signature, u32 instance);
+
+static acpi_status
+osl_read_table_from_file(char *filename,
+ acpi_size file_offset,
+ char *signature, struct acpi_table_header **table);
+
+static acpi_status
+osl_map_table(acpi_size address,
+ char *signature, struct acpi_table_header **table);
+
+static void osl_unmap_table(struct acpi_table_header *table);
+
+static acpi_physical_address
+osl_find_rsdp_via_efi_by_keyword(FILE * file, const char *keyword);
+
+static acpi_physical_address osl_find_rsdp_via_efi(void);
+
+static acpi_status osl_load_rsdp(void);
+
+static acpi_status osl_list_customized_tables(char *directory);
+
+static acpi_status
+osl_get_customized_table(char *pathname,
+ char *signature,
+ u32 instance,
+ struct acpi_table_header **table,
+ acpi_physical_address *address);
+
+static acpi_status osl_list_bios_tables(void);
+
+static acpi_status
+osl_get_bios_table(char *signature,
+ u32 instance,
+ struct acpi_table_header **table,
+ acpi_physical_address *address);
+
+static acpi_status osl_get_last_status(acpi_status default_status);
+
+/* File locations */
+
+#define DYNAMIC_TABLE_DIR "/sys/firmware/acpi/tables/dynamic"
+#define STATIC_TABLE_DIR "/sys/firmware/acpi/tables"
+#define EFI_SYSTAB "/sys/firmware/efi/systab"
+
+/* Should we get dynamically loaded SSDTs from DYNAMIC_TABLE_DIR? */
+
+u8 gbl_dump_dynamic_tables = TRUE;
+
+/* Initialization flags */
+
+u8 gbl_table_list_initialized = FALSE;
+
+/* Local copies of main ACPI tables */
+
+struct acpi_table_rsdp gbl_rsdp;
+struct acpi_table_fadt *gbl_fadt = NULL;
+struct acpi_table_rsdt *gbl_rsdt = NULL;
+struct acpi_table_xsdt *gbl_xsdt = NULL;
+
+/* Table addresses */
+
+acpi_physical_address gbl_fadt_address = 0;
+acpi_physical_address gbl_rsdp_address = 0;
+
+/* Revision of RSD PTR */
+
+u8 gbl_revision = 0;
+
+struct osl_table_info *gbl_table_list_head = NULL;
+u32 gbl_table_count = 0;
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_get_last_status
+ *
+ * PARAMETERS: default_status - Default error status to return
+ *
+ * RETURN: Status; Converted from errno.
+ *
+ * DESCRIPTION: Get last errno and conver it to acpi_status.
+ *
+ *****************************************************************************/
+
+static acpi_status osl_get_last_status(acpi_status default_status)
+{
+
+ switch (errno) {
+ case EACCES:
+ case EPERM:
+
+ return (AE_ACCESS);
+
+ case ENOENT:
+
+ return (AE_NOT_FOUND);
+
+ case ENOMEM:
+
+ return (AE_NO_MEMORY);
+
+ default:
+
+ return (default_status);
+ }
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_get_table_by_address
+ *
+ * PARAMETERS: address - Physical address of the ACPI table
+ * table - Where a pointer to the table is returned
+ *
+ * RETURN: Status; Table buffer is returned if AE_OK.
+ * AE_NOT_FOUND: A valid table was not found at the address
+ *
+ * DESCRIPTION: Get an ACPI table via a physical memory address.
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_get_table_by_address(acpi_physical_address address,
+ struct acpi_table_header **table)
+{
+ u32 table_length;
+ struct acpi_table_header *mapped_table;
+ struct acpi_table_header *local_table = NULL;
+ acpi_status status = AE_OK;
+
+ /* Get main ACPI tables from memory on first invocation of this function */
+
+ status = osl_table_initialize();
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ /* Map the table and validate it */
+
+ status = osl_map_table(address, NULL, &mapped_table);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ /* Copy table to local buffer and return it */
+
+ table_length = ap_get_table_length(mapped_table);
+ if (table_length == 0) {
+ status = AE_BAD_HEADER;
+ goto exit;
+ }
+
+ local_table = calloc(1, table_length);
+ if (!local_table) {
+ status = AE_NO_MEMORY;
+ goto exit;
+ }
+
+ memcpy(local_table, mapped_table, table_length);
+
+exit:
+ osl_unmap_table(mapped_table);
+ *table = local_table;
+ return (status);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_get_table_by_name
+ *
+ * PARAMETERS: signature - ACPI Signature for desired table. Must be
+ * a null terminated 4-character string.
+ * instance - Multiple table support for SSDT/UEFI (0...n)
+ * Must be 0 for other tables.
+ * table - Where a pointer to the table is returned
+ * address - Where the table physical address is returned
+ *
+ * RETURN: Status; Table buffer and physical address returned if AE_OK.
+ * AE_LIMIT: Instance is beyond valid limit
+ * AE_NOT_FOUND: A table with the signature was not found
+ *
+ * NOTE: Assumes the input signature is uppercase.
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_get_table_by_name(char *signature,
+ u32 instance,
+ struct acpi_table_header **table,
+ acpi_physical_address *address)
+{
+ acpi_status status;
+
+ /* Get main ACPI tables from memory on first invocation of this function */
+
+ status = osl_table_initialize();
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ /* Not a main ACPI table, attempt to extract it from the RSDT/XSDT */
+
+ if (!gbl_dump_customized_tables) {
+
+ /* Attempt to get the table from the memory */
+
+ status =
+ osl_get_bios_table(signature, instance, table, address);
+ } else {
+ /* Attempt to get the table from the static directory */
+
+ status = osl_get_customized_table(STATIC_TABLE_DIR, signature,
+ instance, table, address);
+ }
+
+ if (ACPI_FAILURE(status) && status == AE_LIMIT) {
+ if (gbl_dump_dynamic_tables) {
+
+ /* Attempt to get a dynamic table */
+
+ status =
+ osl_get_customized_table(DYNAMIC_TABLE_DIR,
+ signature, instance, table,
+ address);
+ }
+ }
+
+ return (status);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_add_table_to_list
+ *
+ * PARAMETERS: signature - Table signature
+ * instance - Table instance
+ *
+ * RETURN: Status; Successfully added if AE_OK.
+ * AE_NO_MEMORY: Memory allocation error
+ *
+ * DESCRIPTION: Insert a table structure into OSL table list.
+ *
+ *****************************************************************************/
+
+static acpi_status osl_add_table_to_list(char *signature, u32 instance)
+{
+ struct osl_table_info *new_info;
+ struct osl_table_info *next;
+ u32 next_instance = 0;
+ u8 found = FALSE;
+
+ new_info = calloc(1, sizeof(struct osl_table_info));
+ if (!new_info) {
+ return (AE_NO_MEMORY);
+ }
+
+ ACPI_MOVE_NAME(new_info->signature, signature);
+
+ if (!gbl_table_list_head) {
+ gbl_table_list_head = new_info;
+ } else {
+ next = gbl_table_list_head;
+ while (1) {
+ if (ACPI_COMPARE_NAME(next->signature, signature)) {
+ if (next->instance == instance) {
+ found = TRUE;
+ }
+ if (next->instance >= next_instance) {
+ next_instance = next->instance + 1;
+ }
+ }
+
+ if (!next->next) {
+ break;
+ }
+ next = next->next;
+ }
+ next->next = new_info;
+ }
+
+ if (found) {
+ if (instance) {
+ fprintf(stderr,
+ "%4.4s: Warning unmatched table instance %d, expected %d\n",
+ signature, instance, next_instance);
+ }
+ instance = next_instance;
+ }
+
+ new_info->instance = instance;
+ gbl_table_count++;
+
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_get_table_by_index
+ *
+ * PARAMETERS: index - Which table to get
+ * table - Where a pointer to the table is returned
+ * instance - Where a pointer to the table instance no. is
+ * returned
+ * address - Where the table physical address is returned
+ *
+ * RETURN: Status; Table buffer and physical address returned if AE_OK.
+ * AE_LIMIT: Index is beyond valid limit
+ *
+ * DESCRIPTION: Get an ACPI table via an index value (0 through n). Returns
+ * AE_LIMIT when an invalid index is reached. Index is not
+ * necessarily an index into the RSDT/XSDT.
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_get_table_by_index(u32 index,
+ struct acpi_table_header **table,
+ u32 *instance, acpi_physical_address *address)
+{
+ struct osl_table_info *info;
+ acpi_status status;
+ u32 i;
+
+ /* Get main ACPI tables from memory on first invocation of this function */
+
+ status = osl_table_initialize();
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ /* Validate Index */
+
+ if (index >= gbl_table_count) {
+ return (AE_LIMIT);
+ }
+
+ /* Point to the table list entry specified by the Index argument */
+
+ info = gbl_table_list_head;
+ for (i = 0; i < index; i++) {
+ info = info->next;
+ }
+
+ /* Now we can just get the table via the signature */
+
+ status = acpi_os_get_table_by_name(info->signature, info->instance,
+ table, address);
+
+ if (ACPI_SUCCESS(status)) {
+ *instance = info->instance;
+ }
+ return (status);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_find_rsdp_via_efi_by_keyword
+ *
+ * PARAMETERS: keyword - Character string indicating ACPI GUID version
+ * in the EFI table
+ *
+ * RETURN: RSDP address if found
+ *
+ * DESCRIPTION: Find RSDP address via EFI using keyword indicating the ACPI
+ * GUID version.
+ *
+ *****************************************************************************/
+
+static acpi_physical_address
+osl_find_rsdp_via_efi_by_keyword(FILE * file, const char *keyword)
+{
+ char buffer[80];
+ unsigned long long address = 0;
+ char format[32];
+
+ snprintf(format, 32, "%s=%s", keyword, "%llx");
+ fseek(file, 0, SEEK_SET);
+ while (fgets(buffer, 80, file)) {
+ if (sscanf(buffer, format, &address) == 1) {
+ break;
+ }
+ }
+
+ return ((acpi_physical_address)(address));
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_find_rsdp_via_efi
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: RSDP address if found
+ *
+ * DESCRIPTION: Find RSDP address via EFI.
+ *
+ *****************************************************************************/
+
+static acpi_physical_address osl_find_rsdp_via_efi(void)
+{
+ FILE *file;
+ acpi_physical_address address = 0;
+
+ file = fopen(EFI_SYSTAB, "r");
+ if (file) {
+ address = osl_find_rsdp_via_efi_by_keyword(file, "ACPI20");
+ if (!address) {
+ address =
+ osl_find_rsdp_via_efi_by_keyword(file, "ACPI");
+ }
+ fclose(file);
+ }
+
+ return (address);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_load_rsdp
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Scan and load RSDP.
+ *
+ *****************************************************************************/
+
+static acpi_status osl_load_rsdp(void)
+{
+ struct acpi_table_header *mapped_table;
+ u8 *rsdp_address;
+ acpi_physical_address rsdp_base;
+ acpi_size rsdp_size;
+
+ /* Get RSDP from memory */
+
+ rsdp_size = sizeof(struct acpi_table_rsdp);
+ if (gbl_rsdp_base) {
+ rsdp_base = gbl_rsdp_base;
+ } else {
+ rsdp_base = osl_find_rsdp_via_efi();
+ }
+
+ if (!rsdp_base) {
+ rsdp_base = ACPI_HI_RSDP_WINDOW_BASE;
+ rsdp_size = ACPI_HI_RSDP_WINDOW_SIZE;
+ }
+
+ rsdp_address = acpi_os_map_memory(rsdp_base, rsdp_size);
+ if (!rsdp_address) {
+ return (osl_get_last_status(AE_BAD_ADDRESS));
+ }
+
+ /* Search low memory for the RSDP */
+
+ mapped_table = ACPI_CAST_PTR(struct acpi_table_header,
+ acpi_tb_scan_memory_for_rsdp(rsdp_address,
+ rsdp_size));
+ if (!mapped_table) {
+ acpi_os_unmap_memory(rsdp_address, rsdp_size);
+ return (AE_NOT_FOUND);
+ }
+
+ gbl_rsdp_address =
+ rsdp_base + (ACPI_CAST8(mapped_table) - rsdp_address);
+
+ memcpy(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp));
+ acpi_os_unmap_memory(rsdp_address, rsdp_size);
+
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_can_use_xsdt
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: TRUE if XSDT is allowed to be used.
+ *
+ * DESCRIPTION: This function collects logic that can be used to determine if
+ * XSDT should be used instead of RSDT.
+ *
+ *****************************************************************************/
+
+static u8 osl_can_use_xsdt(void)
+{
+ if (gbl_revision && !acpi_gbl_do_not_use_xsdt) {
+ return (TRUE);
+ } else {
+ return (FALSE);
+ }
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_table_initialize
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Initialize ACPI table data. Get and store main ACPI tables to
+ * local variables. Main ACPI tables include RSDT, FADT, RSDT,
+ * and/or XSDT.
+ *
+ *****************************************************************************/
+
+static acpi_status osl_table_initialize(void)
+{
+ acpi_status status;
+ acpi_physical_address address;
+
+ if (gbl_table_list_initialized) {
+ return (AE_OK);
+ }
+
+ if (!gbl_dump_customized_tables) {
+
+ /* Get RSDP from memory */
+
+ status = osl_load_rsdp();
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ /* Get XSDT from memory */
+
+ if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) {
+ if (gbl_xsdt) {
+ free(gbl_xsdt);
+ gbl_xsdt = NULL;
+ }
+
+ gbl_revision = 2;
+ status = osl_get_bios_table(ACPI_SIG_XSDT, 0,
+ ACPI_CAST_PTR(struct
+ acpi_table_header
+ *, &gbl_xsdt),
+ &address);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+ }
+
+ /* Get RSDT from memory */
+
+ if (gbl_rsdp.rsdt_physical_address) {
+ if (gbl_rsdt) {
+ free(gbl_rsdt);
+ gbl_rsdt = NULL;
+ }
+
+ status = osl_get_bios_table(ACPI_SIG_RSDT, 0,
+ ACPI_CAST_PTR(struct
+ acpi_table_header
+ *, &gbl_rsdt),
+ &address);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+ }
+
+ /* Get FADT from memory */
+
+ if (gbl_fadt) {
+ free(gbl_fadt);
+ gbl_fadt = NULL;
+ }
+
+ status = osl_get_bios_table(ACPI_SIG_FADT, 0,
+ ACPI_CAST_PTR(struct
+ acpi_table_header *,
+ &gbl_fadt),
+ &gbl_fadt_address);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ /* Add mandatory tables to global table list first */
+
+ status = osl_add_table_to_list(ACPI_RSDP_NAME, 0);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ status = osl_add_table_to_list(ACPI_SIG_RSDT, 0);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ if (gbl_revision == 2) {
+ status = osl_add_table_to_list(ACPI_SIG_XSDT, 0);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+ }
+
+ status = osl_add_table_to_list(ACPI_SIG_DSDT, 0);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ status = osl_add_table_to_list(ACPI_SIG_FACS, 0);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ /* Add all tables found in the memory */
+
+ status = osl_list_bios_tables();
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+ } else {
+ /* Add all tables found in the static directory */
+
+ status = osl_list_customized_tables(STATIC_TABLE_DIR);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+ }
+
+ if (gbl_dump_dynamic_tables) {
+
+ /* Add all dynamically loaded tables in the dynamic directory */
+
+ status = osl_list_customized_tables(DYNAMIC_TABLE_DIR);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+ }
+
+ gbl_table_list_initialized = TRUE;
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_list_bios_tables
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: Status; Table list is initialized if AE_OK.
+ *
+ * DESCRIPTION: Add ACPI tables to the table list from memory.
+ *
+ * NOTE: This works on Linux as table customization does not modify the
+ * addresses stored in RSDP/RSDT/XSDT/FADT.
+ *
+ *****************************************************************************/
+
+static acpi_status osl_list_bios_tables(void)
+{
+ struct acpi_table_header *mapped_table = NULL;
+ u8 *table_data;
+ u8 number_of_tables;
+ u8 item_size;
+ acpi_physical_address table_address = 0;
+ acpi_status status = AE_OK;
+ u32 i;
+
+ if (osl_can_use_xsdt()) {
+ item_size = sizeof(u64);
+ table_data =
+ ACPI_CAST8(gbl_xsdt) + sizeof(struct acpi_table_header);
+ number_of_tables =
+ (u8)((gbl_xsdt->header.length -
+ sizeof(struct acpi_table_header))
+ / item_size);
+ } else { /* Use RSDT if XSDT is not available */
+
+ item_size = sizeof(u32);
+ table_data =
+ ACPI_CAST8(gbl_rsdt) + sizeof(struct acpi_table_header);
+ number_of_tables =
+ (u8)((gbl_rsdt->header.length -
+ sizeof(struct acpi_table_header))
+ / item_size);
+ }
+
+ /* Search RSDT/XSDT for the requested table */
+
+ for (i = 0; i < number_of_tables; ++i, table_data += item_size) {
+ if (osl_can_use_xsdt()) {
+ table_address =
+ (acpi_physical_address)(*ACPI_CAST64(table_data));
+ } else {
+ table_address =
+ (acpi_physical_address)(*ACPI_CAST32(table_data));
+ }
+
+ /* Skip NULL entries in RSDT/XSDT */
+
+ if (table_address == 0) {
+ continue;
+ }
+
+ status = osl_map_table(table_address, NULL, &mapped_table);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ osl_add_table_to_list(mapped_table->signature, 0);
+ osl_unmap_table(mapped_table);
+ }
+
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_get_bios_table
+ *
+ * PARAMETERS: signature - ACPI Signature for common table. Must be
+ * a null terminated 4-character string.
+ * instance - Multiple table support for SSDT/UEFI (0...n)
+ * Must be 0 for other tables.
+ * table - Where a pointer to the table is returned
+ * address - Where the table physical address is returned
+ *
+ * RETURN: Status; Table buffer and physical address returned if AE_OK.
+ * AE_LIMIT: Instance is beyond valid limit
+ * AE_NOT_FOUND: A table with the signature was not found
+ *
+ * DESCRIPTION: Get a BIOS provided ACPI table
+ *
+ * NOTE: Assumes the input signature is uppercase.
+ *
+ *****************************************************************************/
+
+static acpi_status
+osl_get_bios_table(char *signature,
+ u32 instance,
+ struct acpi_table_header **table,
+ acpi_physical_address *address)
+{
+ struct acpi_table_header *local_table = NULL;
+ struct acpi_table_header *mapped_table = NULL;
+ u8 *table_data;
+ u8 number_of_tables;
+ u8 item_size;
+ u32 current_instance = 0;
+ acpi_physical_address table_address;
+ acpi_physical_address first_table_address = 0;
+ u32 table_length = 0;
+ acpi_status status = AE_OK;
+ u32 i;
+
+ /* Handle special tables whose addresses are not in RSDT/XSDT */
+
+ if (ACPI_COMPARE_NAME(signature, ACPI_RSDP_NAME) ||
+ ACPI_COMPARE_NAME(signature, ACPI_SIG_RSDT) ||
+ ACPI_COMPARE_NAME(signature, ACPI_SIG_XSDT) ||
+ ACPI_COMPARE_NAME(signature, ACPI_SIG_DSDT) ||
+ ACPI_COMPARE_NAME(signature, ACPI_SIG_FACS)) {
+
+find_next_instance:
+
+ table_address = 0;
+
+ /*
+ * Get the appropriate address, either 32-bit or 64-bit. Be very
+ * careful about the FADT length and validate table addresses.
+ * Note: The 64-bit addresses have priority.
+ */
+ if (ACPI_COMPARE_NAME(signature, ACPI_SIG_DSDT)) {
+ if (current_instance < 2) {
+ if ((gbl_fadt->header.length >=
+ MIN_FADT_FOR_XDSDT) && gbl_fadt->Xdsdt
+ && current_instance == 0) {
+ table_address =
+ (acpi_physical_address)gbl_fadt->
+ Xdsdt;
+ } else
+ if ((gbl_fadt->header.length >=
+ MIN_FADT_FOR_DSDT)
+ && gbl_fadt->dsdt !=
+ first_table_address) {
+ table_address =
+ (acpi_physical_address)gbl_fadt->
+ dsdt;
+ }
+ }
+ } else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_FACS)) {
+ if (current_instance < 2) {
+ if ((gbl_fadt->header.length >=
+ MIN_FADT_FOR_XFACS) && gbl_fadt->Xfacs
+ && current_instance == 0) {
+ table_address =
+ (acpi_physical_address)gbl_fadt->
+ Xfacs;
+ } else
+ if ((gbl_fadt->header.length >=
+ MIN_FADT_FOR_FACS)
+ && gbl_fadt->facs !=
+ first_table_address) {
+ table_address =
+ (acpi_physical_address)gbl_fadt->
+ facs;
+ }
+ }
+ } else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_XSDT)) {
+ if (!gbl_revision) {
+ return (AE_BAD_SIGNATURE);
+ }
+ if (current_instance == 0) {
+ table_address =
+ (acpi_physical_address)gbl_rsdp.
+ xsdt_physical_address;
+ }
+ } else if (ACPI_COMPARE_NAME(signature, ACPI_SIG_RSDT)) {
+ if (current_instance == 0) {
+ table_address =
+ (acpi_physical_address)gbl_rsdp.
+ rsdt_physical_address;
+ }
+ } else {
+ if (current_instance == 0) {
+ table_address =
+ (acpi_physical_address)gbl_rsdp_address;
+ signature = ACPI_SIG_RSDP;
+ }
+ }
+
+ if (table_address == 0) {
+ goto exit_find_table;
+ }
+
+ /* Now we can get the requested special table */
+
+ status = osl_map_table(table_address, signature, &mapped_table);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ table_length = ap_get_table_length(mapped_table);
+ if (first_table_address == 0) {
+ first_table_address = table_address;
+ }
+
+ /* Match table instance */
+
+ if (current_instance != instance) {
+ osl_unmap_table(mapped_table);
+ mapped_table = NULL;
+ current_instance++;
+ goto find_next_instance;
+ }
+ } else { /* Case for a normal ACPI table */
+
+ if (osl_can_use_xsdt()) {
+ item_size = sizeof(u64);
+ table_data =
+ ACPI_CAST8(gbl_xsdt) +
+ sizeof(struct acpi_table_header);
+ number_of_tables =
+ (u8)((gbl_xsdt->header.length -
+ sizeof(struct acpi_table_header))
+ / item_size);
+ } else { /* Use RSDT if XSDT is not available */
+
+ item_size = sizeof(u32);
+ table_data =
+ ACPI_CAST8(gbl_rsdt) +
+ sizeof(struct acpi_table_header);
+ number_of_tables =
+ (u8)((gbl_rsdt->header.length -
+ sizeof(struct acpi_table_header))
+ / item_size);
+ }
+
+ /* Search RSDT/XSDT for the requested table */
+
+ for (i = 0; i < number_of_tables; ++i, table_data += item_size) {
+ if (osl_can_use_xsdt()) {
+ table_address =
+ (acpi_physical_address)(*ACPI_CAST64
+ (table_data));
+ } else {
+ table_address =
+ (acpi_physical_address)(*ACPI_CAST32
+ (table_data));
+ }
+
+ /* Skip NULL entries in RSDT/XSDT */
+
+ if (table_address == 0) {
+ continue;
+ }
+
+ status =
+ osl_map_table(table_address, NULL, &mapped_table);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+ table_length = mapped_table->length;
+
+ /* Does this table match the requested signature? */
+
+ if (!ACPI_COMPARE_NAME
+ (mapped_table->signature, signature)) {
+ osl_unmap_table(mapped_table);
+ mapped_table = NULL;
+ continue;
+ }
+
+ /* Match table instance (for SSDT/UEFI tables) */
+
+ if (current_instance != instance) {
+ osl_unmap_table(mapped_table);
+ mapped_table = NULL;
+ current_instance++;
+ continue;
+ }
+
+ break;
+ }
+ }
+
+exit_find_table:
+
+ if (!mapped_table) {
+ return (AE_LIMIT);
+ }
+
+ if (table_length == 0) {
+ status = AE_BAD_HEADER;
+ goto exit;
+ }
+
+ /* Copy table to local buffer and return it */
+
+ local_table = calloc(1, table_length);
+ if (!local_table) {
+ status = AE_NO_MEMORY;
+ goto exit;
+ }
+
+ memcpy(local_table, mapped_table, table_length);
+ *address = table_address;
+ *table = local_table;
+
+exit:
+ osl_unmap_table(mapped_table);
+ return (status);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_list_customized_tables
+ *
+ * PARAMETERS: directory - Directory that contains the tables
+ *
+ * RETURN: Status; Table list is initialized if AE_OK.
+ *
+ * DESCRIPTION: Add ACPI tables to the table list from a directory.
+ *
+ *****************************************************************************/
+
+static acpi_status osl_list_customized_tables(char *directory)
+{
+ void *table_dir;
+ u32 instance;
+ char temp_name[ACPI_NAME_SIZE];
+ char *filename;
+ acpi_status status = AE_OK;
+
+ /* Open the requested directory */
+
+ table_dir = acpi_os_open_directory(directory, "*", REQUEST_FILE_ONLY);
+ if (!table_dir) {
+ return (osl_get_last_status(AE_NOT_FOUND));
+ }
+
+ /* Examine all entries in this directory */
+
+ while ((filename = acpi_os_get_next_filename(table_dir))) {
+
+ /* Extract table name and instance number */
+
+ status =
+ osl_table_name_from_file(filename, temp_name, &instance);
+
+ /* Ignore meaningless files */
+
+ if (ACPI_FAILURE(status)) {
+ continue;
+ }
+
+ /* Add new info node to global table list */
+
+ status = osl_add_table_to_list(temp_name, instance);
+ if (ACPI_FAILURE(status)) {
+ break;
+ }
+ }
+
+ acpi_os_close_directory(table_dir);
+ return (status);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_map_table
+ *
+ * PARAMETERS: address - Address of the table in memory
+ * signature - Optional ACPI Signature for desired table.
+ * Null terminated 4-character string.
+ * table - Where a pointer to the mapped table is
+ * returned
+ *
+ * RETURN: Status; Mapped table is returned if AE_OK.
+ * AE_NOT_FOUND: A valid table was not found at the address
+ *
+ * DESCRIPTION: Map entire ACPI table into caller's address space.
+ *
+ *****************************************************************************/
+
+static acpi_status
+osl_map_table(acpi_size address,
+ char *signature, struct acpi_table_header **table)
+{
+ struct acpi_table_header *mapped_table;
+ u32 length;
+
+ if (!address) {
+ return (AE_BAD_ADDRESS);
+ }
+
+ /*
+ * Map the header so we can get the table length.
+ * Use sizeof (struct acpi_table_header) as:
+ * 1. it is bigger than 24 to include RSDP->Length
+ * 2. it is smaller than sizeof (struct acpi_table_rsdp)
+ */
+ mapped_table =
+ acpi_os_map_memory(address, sizeof(struct acpi_table_header));
+ if (!mapped_table) {
+ fprintf(stderr, "Could not map table header at 0x%8.8X%8.8X\n",
+ ACPI_FORMAT_UINT64(address));
+ return (osl_get_last_status(AE_BAD_ADDRESS));
+ }
+
+ /* If specified, signature must match */
+
+ if (signature) {
+ if (ACPI_VALIDATE_RSDP_SIG(signature)) {
+ if (!ACPI_VALIDATE_RSDP_SIG(mapped_table->signature)) {
+ acpi_os_unmap_memory(mapped_table,
+ sizeof(struct
+ acpi_table_header));
+ return (AE_BAD_SIGNATURE);
+ }
+ } else
+ if (!ACPI_COMPARE_NAME(signature, mapped_table->signature))
+ {
+ acpi_os_unmap_memory(mapped_table,
+ sizeof(struct acpi_table_header));
+ return (AE_BAD_SIGNATURE);
+ }
+ }
+
+ /* Map the entire table */
+
+ length = ap_get_table_length(mapped_table);
+ acpi_os_unmap_memory(mapped_table, sizeof(struct acpi_table_header));
+ if (length == 0) {
+ return (AE_BAD_HEADER);
+ }
+
+ mapped_table = acpi_os_map_memory(address, length);
+ if (!mapped_table) {
+ fprintf(stderr,
+ "Could not map table at 0x%8.8X%8.8X length %8.8X\n",
+ ACPI_FORMAT_UINT64(address), length);
+ return (osl_get_last_status(AE_INVALID_TABLE_LENGTH));
+ }
+
+ (void)ap_is_valid_checksum(mapped_table);
+
+ *table = mapped_table;
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_unmap_table
+ *
+ * PARAMETERS: table - A pointer to the mapped table
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Unmap entire ACPI table.
+ *
+ *****************************************************************************/
+
+static void osl_unmap_table(struct acpi_table_header *table)
+{
+ if (table) {
+ acpi_os_unmap_memory(table, ap_get_table_length(table));
+ }
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_table_name_from_file
+ *
+ * PARAMETERS: filename - File that contains the desired table
+ * signature - Pointer to 4-character buffer to store
+ * extracted table signature.
+ * instance - Pointer to integer to store extracted
+ * table instance number.
+ *
+ * RETURN: Status; Table name is extracted if AE_OK.
+ *
+ * DESCRIPTION: Extract table signature and instance number from a table file
+ * name.
+ *
+ *****************************************************************************/
+
+static acpi_status
+osl_table_name_from_file(char *filename, char *signature, u32 *instance)
+{
+
+ /* Ignore meaningless files */
+
+ if (strlen(filename) < ACPI_NAME_SIZE) {
+ return (AE_BAD_SIGNATURE);
+ }
+
+ /* Extract instance number */
+
+ if (isdigit((int)filename[ACPI_NAME_SIZE])) {
+ sscanf(&filename[ACPI_NAME_SIZE], "%u", instance);
+ } else if (strlen(filename) != ACPI_NAME_SIZE) {
+ return (AE_BAD_SIGNATURE);
+ } else {
+ *instance = 0;
+ }
+
+ /* Extract signature */
+
+ ACPI_MOVE_NAME(signature, filename);
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_read_table_from_file
+ *
+ * PARAMETERS: filename - File that contains the desired table
+ * file_offset - Offset of the table in file
+ * signature - Optional ACPI Signature for desired table.
+ * A null terminated 4-character string.
+ * table - Where a pointer to the table is returned
+ *
+ * RETURN: Status; Table buffer is returned if AE_OK.
+ *
+ * DESCRIPTION: Read a ACPI table from a file.
+ *
+ *****************************************************************************/
+
+static acpi_status
+osl_read_table_from_file(char *filename,
+ acpi_size file_offset,
+ char *signature, struct acpi_table_header **table)
+{
+ FILE *table_file;
+ struct acpi_table_header header;
+ struct acpi_table_header *local_table = NULL;
+ u32 table_length;
+ s32 count;
+ acpi_status status = AE_OK;
+
+ /* Open the file */
+
+ table_file = fopen(filename, "rb");
+ if (table_file == NULL) {
+ fprintf(stderr, "Could not open table file: %s\n", filename);
+ return (osl_get_last_status(AE_NOT_FOUND));
+ }
+
+ fseek(table_file, file_offset, SEEK_SET);
+
+ /* Read the Table header to get the table length */
+
+ count = fread(&header, 1, sizeof(struct acpi_table_header), table_file);
+ if (count != sizeof(struct acpi_table_header)) {
+ fprintf(stderr, "Could not read table header: %s\n", filename);
+ status = AE_BAD_HEADER;
+ goto exit;
+ }
+
+ /* If signature is specified, it must match the table */
+
+ if (signature) {
+ if (ACPI_VALIDATE_RSDP_SIG(signature)) {
+ if (!ACPI_VALIDATE_RSDP_SIG(header.signature)) {
+ fprintf(stderr,
+ "Incorrect RSDP signature: found %8.8s\n",
+ header.signature);
+ status = AE_BAD_SIGNATURE;
+ goto exit;
+ }
+ } else if (!ACPI_COMPARE_NAME(signature, header.signature)) {
+ fprintf(stderr,
+ "Incorrect signature: Expecting %4.4s, found %4.4s\n",
+ signature, header.signature);
+ status = AE_BAD_SIGNATURE;
+ goto exit;
+ }
+ }
+
+ table_length = ap_get_table_length(&header);
+ if (table_length == 0) {
+ status = AE_BAD_HEADER;
+ goto exit;
+ }
+
+ /* Read the entire table into a local buffer */
+
+ local_table = calloc(1, table_length);
+ if (!local_table) {
+ fprintf(stderr,
+ "%4.4s: Could not allocate buffer for table of length %X\n",
+ header.signature, table_length);
+ status = AE_NO_MEMORY;
+ goto exit;
+ }
+
+ fseek(table_file, file_offset, SEEK_SET);
+
+ count = fread(local_table, 1, table_length, table_file);
+ if (count != table_length) {
+ fprintf(stderr, "%4.4s: Could not read table content\n",
+ header.signature);
+ status = AE_INVALID_TABLE_LENGTH;
+ goto exit;
+ }
+
+ /* Validate checksum */
+
+ (void)ap_is_valid_checksum(local_table);
+
+exit:
+ fclose(table_file);
+ *table = local_table;
+ return (status);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: osl_get_customized_table
+ *
+ * PARAMETERS: pathname - Directory to find Linux customized table
+ * signature - ACPI Signature for desired table. Must be
+ * a null terminated 4-character string.
+ * instance - Multiple table support for SSDT/UEFI (0...n)
+ * Must be 0 for other tables.
+ * table - Where a pointer to the table is returned
+ * address - Where the table physical address is returned
+ *
+ * RETURN: Status; Table buffer is returned if AE_OK.
+ * AE_LIMIT: Instance is beyond valid limit
+ * AE_NOT_FOUND: A table with the signature was not found
+ *
+ * DESCRIPTION: Get an OS customized table.
+ *
+ *****************************************************************************/
+
+static acpi_status
+osl_get_customized_table(char *pathname,
+ char *signature,
+ u32 instance,
+ struct acpi_table_header **table,
+ acpi_physical_address *address)
+{
+ void *table_dir;
+ u32 current_instance = 0;
+ char temp_name[ACPI_NAME_SIZE];
+ char table_filename[PATH_MAX];
+ char *filename;
+ acpi_status status;
+
+ /* Open the directory for customized tables */
+
+ table_dir = acpi_os_open_directory(pathname, "*", REQUEST_FILE_ONLY);
+ if (!table_dir) {
+ return (osl_get_last_status(AE_NOT_FOUND));
+ }
+
+ /* Attempt to find the table in the directory */
+
+ while ((filename = acpi_os_get_next_filename(table_dir))) {
+
+ /* Ignore meaningless files */
+
+ if (!ACPI_COMPARE_NAME(filename, signature)) {
+ continue;
+ }
+
+ /* Extract table name and instance number */
+
+ status =
+ osl_table_name_from_file(filename, temp_name,
+ &current_instance);
+
+ /* Ignore meaningless files */
+
+ if (ACPI_FAILURE(status) || current_instance != instance) {
+ continue;
+ }
+
+ /* Create the table pathname */
+
+ if (instance != 0) {
+ sprintf(table_filename, "%s/%4.4s%d", pathname,
+ temp_name, instance);
+ } else {
+ sprintf(table_filename, "%s/%4.4s", pathname,
+ temp_name);
+ }
+ break;
+ }
+
+ acpi_os_close_directory(table_dir);
+
+ if (!filename) {
+ return (AE_LIMIT);
+ }
+
+ /* There is no physical address saved for customized tables, use zero */
+
+ *address = 0;
+ status = osl_read_table_from_file(table_filename, 0, NULL, table);
+
+ return (status);
+}
diff --git a/tools/power/acpi/os_specific/service_layers/osunixdir.c b/tools/power/acpi/os_specific/service_layers/osunixdir.c
new file mode 100644
index 000000000..4fd44e218
--- /dev/null
+++ b/tools/power/acpi/os_specific/service_layers/osunixdir.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/******************************************************************************
+ *
+ * Module Name: osunixdir - Unix directory access interfaces
+ *
+ * Copyright (C) 2000 - 2018, Intel Corp.
+ *
+ *****************************************************************************/
+
+#include <acpi/acpi.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <fnmatch.h>
+#include <ctype.h>
+#include <sys/stat.h>
+
+/*
+ * Allocated structure returned from os_open_directory
+ */
+typedef struct external_find_info {
+ char *dir_pathname;
+ DIR *dir_ptr;
+ char temp_buffer[256];
+ char *wildcard_spec;
+ char requested_file_type;
+
+} external_find_info;
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_os_open_directory
+ *
+ * PARAMETERS: dir_pathname - Full pathname to the directory
+ * wildcard_spec - string of the form "*.c", etc.
+ *
+ * RETURN: A directory "handle" to be used in subsequent search operations.
+ * NULL returned on failure.
+ *
+ * DESCRIPTION: Open a directory in preparation for a wildcard search
+ *
+ ******************************************************************************/
+
+void *acpi_os_open_directory(char *dir_pathname,
+ char *wildcard_spec, char requested_file_type)
+{
+ struct external_find_info *external_info;
+ DIR *dir;
+
+ /* Allocate the info struct that will be returned to the caller */
+
+ external_info = calloc(1, sizeof(struct external_find_info));
+ if (!external_info) {
+ return (NULL);
+ }
+
+ /* Get the directory stream */
+
+ dir = opendir(dir_pathname);
+ if (!dir) {
+ fprintf(stderr, "Cannot open directory - %s\n", dir_pathname);
+ free(external_info);
+ return (NULL);
+ }
+
+ /* Save the info in the return structure */
+
+ external_info->wildcard_spec = wildcard_spec;
+ external_info->requested_file_type = requested_file_type;
+ external_info->dir_pathname = dir_pathname;
+ external_info->dir_ptr = dir;
+ return (external_info);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_os_get_next_filename
+ *
+ * PARAMETERS: dir_handle - Created via acpi_os_open_directory
+ *
+ * RETURN: Next filename matched. NULL if no more matches.
+ *
+ * DESCRIPTION: Get the next file in the directory that matches the wildcard
+ * specification.
+ *
+ ******************************************************************************/
+
+char *acpi_os_get_next_filename(void *dir_handle)
+{
+ struct external_find_info *external_info = dir_handle;
+ struct dirent *dir_entry;
+ char *temp_str;
+ int str_len;
+ struct stat temp_stat;
+ int err;
+
+ while ((dir_entry = readdir(external_info->dir_ptr))) {
+ if (!fnmatch
+ (external_info->wildcard_spec, dir_entry->d_name, 0)) {
+ if (dir_entry->d_name[0] == '.') {
+ continue;
+ }
+
+ str_len = strlen(dir_entry->d_name) +
+ strlen(external_info->dir_pathname) + 2;
+
+ temp_str = calloc(str_len, 1);
+ if (!temp_str) {
+ fprintf(stderr,
+ "Could not allocate buffer for temporary string\n");
+ return (NULL);
+ }
+
+ strcpy(temp_str, external_info->dir_pathname);
+ strcat(temp_str, "/");
+ strcat(temp_str, dir_entry->d_name);
+
+ err = stat(temp_str, &temp_stat);
+ if (err == -1) {
+ fprintf(stderr,
+ "Cannot stat file (should not happen) - %s\n",
+ temp_str);
+ free(temp_str);
+ return (NULL);
+ }
+
+ free(temp_str);
+
+ if ((S_ISDIR(temp_stat.st_mode)
+ && (external_info->requested_file_type ==
+ REQUEST_DIR_ONLY))
+ || ((!S_ISDIR(temp_stat.st_mode)
+ && external_info->requested_file_type ==
+ REQUEST_FILE_ONLY))) {
+
+ /* copy to a temp buffer because dir_entry struct is on the stack */
+
+ strcpy(external_info->temp_buffer,
+ dir_entry->d_name);
+ return (external_info->temp_buffer);
+ }
+ }
+ }
+
+ return (NULL);
+}
+
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_os_close_directory
+ *
+ * PARAMETERS: dir_handle - Created via acpi_os_open_directory
+ *
+ * RETURN: None.
+ *
+ * DESCRIPTION: Close the open directory and cleanup.
+ *
+ ******************************************************************************/
+
+void acpi_os_close_directory(void *dir_handle)
+{
+ struct external_find_info *external_info = dir_handle;
+
+ /* Close the directory and free allocations */
+
+ closedir(external_info->dir_ptr);
+ free(dir_handle);
+}
diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c
new file mode 100644
index 000000000..93d835930
--- /dev/null
+++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/******************************************************************************
+ *
+ * Module Name: osunixmap - Unix OSL for file mappings
+ *
+ * Copyright (C) 2000 - 2018, Intel Corp.
+ *
+ *****************************************************************************/
+
+#include "acpidump.h"
+#include <unistd.h>
+#include <sys/mman.h>
+#ifdef _free_BSD
+#include <sys/param.h>
+#endif
+
+#define _COMPONENT ACPI_OS_SERVICES
+ACPI_MODULE_NAME("osunixmap")
+
+#ifndef O_BINARY
+#define O_BINARY 0
+#endif
+#if defined(_dragon_fly) || defined(_free_BSD) || defined(_QNX)
+#define MMAP_FLAGS MAP_SHARED
+#else
+#define MMAP_FLAGS MAP_PRIVATE
+#endif
+#define SYSTEM_MEMORY "/dev/mem"
+/*******************************************************************************
+ *
+ * FUNCTION: acpi_os_get_page_size
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: Page size of the platform.
+ *
+ * DESCRIPTION: Obtain page size of the platform.
+ *
+ ******************************************************************************/
+static acpi_size acpi_os_get_page_size(void)
+{
+
+#ifdef PAGE_SIZE
+ return PAGE_SIZE;
+#else
+ return sysconf(_SC_PAGESIZE);
+#endif
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_map_memory
+ *
+ * PARAMETERS: where - Physical address of memory to be mapped
+ * length - How much memory to map
+ *
+ * RETURN: Pointer to mapped memory. Null on error.
+ *
+ * DESCRIPTION: Map physical memory into local address space.
+ *
+ *****************************************************************************/
+
+void *acpi_os_map_memory(acpi_physical_address where, acpi_size length)
+{
+ u8 *mapped_memory;
+ acpi_physical_address offset;
+ acpi_size page_size;
+ int fd;
+
+ fd = open(SYSTEM_MEMORY, O_RDONLY | O_BINARY);
+ if (fd < 0) {
+ fprintf(stderr, "Cannot open %s\n", SYSTEM_MEMORY);
+ return (NULL);
+ }
+
+ /* Align the offset to use mmap */
+
+ page_size = acpi_os_get_page_size();
+ offset = where % page_size;
+
+ /* Map the table header to get the length of the full table */
+
+ mapped_memory = mmap(NULL, (length + offset), PROT_READ, MMAP_FLAGS,
+ fd, (where - offset));
+ if (mapped_memory == MAP_FAILED) {
+ fprintf(stderr, "Cannot map %s\n", SYSTEM_MEMORY);
+ close(fd);
+ return (NULL);
+ }
+
+ close(fd);
+ return (ACPI_CAST8(mapped_memory + offset));
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_unmap_memory
+ *
+ * PARAMETERS: where - Logical address of memory to be unmapped
+ * length - How much memory to unmap
+ *
+ * RETURN: None.
+ *
+ * DESCRIPTION: Delete a previously created mapping. Where and Length must
+ * correspond to a previous mapping exactly.
+ *
+ *****************************************************************************/
+
+void acpi_os_unmap_memory(void *where, acpi_size length)
+{
+ acpi_physical_address offset;
+ acpi_size page_size;
+
+ page_size = acpi_os_get_page_size();
+ offset = ACPI_TO_INTEGER(where) % page_size;
+ munmap((u8 *)where - offset, (length + offset));
+}
diff --git a/tools/power/acpi/os_specific/service_layers/osunixxf.c b/tools/power/acpi/os_specific/service_layers/osunixxf.c
new file mode 100644
index 000000000..8a88e8777
--- /dev/null
+++ b/tools/power/acpi/os_specific/service_layers/osunixxf.c
@@ -0,0 +1,1317 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/******************************************************************************
+ *
+ * Module Name: osunixxf - UNIX OSL interfaces
+ *
+ * Copyright (C) 2000 - 2018, Intel Corp.
+ *
+ *****************************************************************************/
+
+/*
+ * These interfaces are required in order to compile the ASL compiler and the
+ * various ACPICA tools under Linux or other Unix-like system.
+ */
+#include <acpi/acpi.h>
+#include "accommon.h"
+#include "amlcode.h"
+#include "acparser.h"
+#include "acdebug.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <semaphore.h>
+#include <pthread.h>
+#include <errno.h>
+
+#define _COMPONENT ACPI_OS_SERVICES
+ACPI_MODULE_NAME("osunixxf")
+
+/* Upcalls to acpi_exec */
+void
+ae_table_override(struct acpi_table_header *existing_table,
+ struct acpi_table_header **new_table);
+
+typedef void *(*PTHREAD_CALLBACK) (void *);
+
+/* Buffer used by acpi_os_vprintf */
+
+#define ACPI_VPRINTF_BUFFER_SIZE 512
+#define _ASCII_NEWLINE '\n'
+
+/* Terminal support for acpi_exec only */
+
+#ifdef ACPI_EXEC_APP
+#include <termios.h>
+
+struct termios original_term_attributes;
+int term_attributes_were_set = 0;
+
+acpi_status acpi_ut_read_line(char *buffer, u32 buffer_length, u32 *bytes_read);
+
+static void os_enter_line_edit_mode(void);
+
+static void os_exit_line_edit_mode(void);
+
+/******************************************************************************
+ *
+ * FUNCTION: os_enter_line_edit_mode, os_exit_line_edit_mode
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Enter/Exit the raw character input mode for the terminal.
+ *
+ * Interactive line-editing support for the AML debugger. Used with the
+ * common/acgetline module.
+ *
+ * readline() is not used because of non-portability. It is not available
+ * on all systems, and if it is, often the package must be manually installed.
+ *
+ * Therefore, we use the POSIX tcgetattr/tcsetattr and do the minimal line
+ * editing that we need in acpi_os_get_line.
+ *
+ * If the POSIX tcgetattr/tcsetattr interfaces are unavailable, these
+ * calls will also work:
+ * For os_enter_line_edit_mode: system ("stty cbreak -echo")
+ * For os_exit_line_edit_mode: system ("stty cooked echo")
+ *
+ *****************************************************************************/
+
+static void os_enter_line_edit_mode(void)
+{
+ struct termios local_term_attributes;
+
+ term_attributes_were_set = 0;
+
+ /* STDIN must be a terminal */
+
+ if (!isatty(STDIN_FILENO)) {
+ return;
+ }
+
+ /* Get and keep the original attributes */
+
+ if (tcgetattr(STDIN_FILENO, &original_term_attributes)) {
+ fprintf(stderr, "Could not get terminal attributes!\n");
+ return;
+ }
+
+ /* Set the new attributes to enable raw character input */
+
+ memcpy(&local_term_attributes, &original_term_attributes,
+ sizeof(struct termios));
+
+ local_term_attributes.c_lflag &= ~(ICANON | ECHO);
+ local_term_attributes.c_cc[VMIN] = 1;
+ local_term_attributes.c_cc[VTIME] = 0;
+
+ if (tcsetattr(STDIN_FILENO, TCSANOW, &local_term_attributes)) {
+ fprintf(stderr, "Could not set terminal attributes!\n");
+ return;
+ }
+
+ term_attributes_were_set = 1;
+}
+
+static void os_exit_line_edit_mode(void)
+{
+
+ if (!term_attributes_were_set) {
+ return;
+ }
+
+ /* Set terminal attributes back to the original values */
+
+ if (tcsetattr(STDIN_FILENO, TCSANOW, &original_term_attributes)) {
+ fprintf(stderr, "Could not restore terminal attributes!\n");
+ }
+}
+
+#else
+
+/* These functions are not needed for other ACPICA utilities */
+
+#define os_enter_line_edit_mode()
+#define os_exit_line_edit_mode()
+#endif
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_initialize, acpi_os_terminate
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Initialize and terminate this module.
+ *
+ *****************************************************************************/
+
+acpi_status acpi_os_initialize(void)
+{
+ acpi_status status;
+
+ acpi_gbl_output_file = stdout;
+
+ os_enter_line_edit_mode();
+
+ status = acpi_os_create_lock(&acpi_gbl_print_lock);
+ if (ACPI_FAILURE(status)) {
+ return (status);
+ }
+
+ return (AE_OK);
+}
+
+acpi_status acpi_os_terminate(void)
+{
+
+ os_exit_line_edit_mode();
+ return (AE_OK);
+}
+
+#ifndef ACPI_USE_NATIVE_RSDP_POINTER
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_get_root_pointer
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: RSDP physical address
+ *
+ * DESCRIPTION: Gets the ACPI root pointer (RSDP)
+ *
+ *****************************************************************************/
+
+acpi_physical_address acpi_os_get_root_pointer(void)
+{
+
+ return (0);
+}
+#endif
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_predefined_override
+ *
+ * PARAMETERS: init_val - Initial value of the predefined object
+ * new_val - The new value for the object
+ *
+ * RETURN: Status, pointer to value. Null pointer returned if not
+ * overriding.
+ *
+ * DESCRIPTION: Allow the OS to override predefined names
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_predefined_override(const struct acpi_predefined_names *init_val,
+ acpi_string *new_val)
+{
+
+ if (!init_val || !new_val) {
+ return (AE_BAD_PARAMETER);
+ }
+
+ *new_val = NULL;
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_table_override
+ *
+ * PARAMETERS: existing_table - Header of current table (probably
+ * firmware)
+ * new_table - Where an entire new table is returned.
+ *
+ * RETURN: Status, pointer to new table. Null pointer returned if no
+ * table is available to override
+ *
+ * DESCRIPTION: Return a different version of a table if one is available
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_table_override(struct acpi_table_header *existing_table,
+ struct acpi_table_header **new_table)
+{
+
+ if (!existing_table || !new_table) {
+ return (AE_BAD_PARAMETER);
+ }
+
+ *new_table = NULL;
+
+#ifdef ACPI_EXEC_APP
+
+ ae_table_override(existing_table, new_table);
+ return (AE_OK);
+#else
+
+ return (AE_NO_ACPI_TABLES);
+#endif
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_physical_table_override
+ *
+ * PARAMETERS: existing_table - Header of current table (probably firmware)
+ * new_address - Where new table address is returned
+ * (Physical address)
+ * new_table_length - Where new table length is returned
+ *
+ * RETURN: Status, address/length of new table. Null pointer returned
+ * if no table is available to override.
+ *
+ * DESCRIPTION: Returns AE_SUPPORT, function not used in user space.
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_physical_table_override(struct acpi_table_header *existing_table,
+ acpi_physical_address *new_address,
+ u32 *new_table_length)
+{
+
+ return (AE_SUPPORT);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_enter_sleep
+ *
+ * PARAMETERS: sleep_state - Which sleep state to enter
+ * rega_value - Register A value
+ * regb_value - Register B value
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: A hook before writing sleep registers to enter the sleep
+ * state. Return AE_CTRL_TERMINATE to skip further sleep register
+ * writes.
+ *
+ *****************************************************************************/
+
+acpi_status acpi_os_enter_sleep(u8 sleep_state, u32 rega_value, u32 regb_value)
+{
+
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_redirect_output
+ *
+ * PARAMETERS: destination - An open file handle/pointer
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Causes redirect of acpi_os_printf and acpi_os_vprintf
+ *
+ *****************************************************************************/
+
+void acpi_os_redirect_output(void *destination)
+{
+
+ acpi_gbl_output_file = destination;
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_printf
+ *
+ * PARAMETERS: fmt, ... - Standard printf format
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Formatted output. Note: very similar to acpi_os_vprintf
+ * (performance), changes should be tracked in both functions.
+ *
+ *****************************************************************************/
+
+void ACPI_INTERNAL_VAR_XFACE acpi_os_printf(const char *fmt, ...)
+{
+ va_list args;
+ u8 flags;
+
+ flags = acpi_gbl_db_output_flags;
+ if (flags & ACPI_DB_REDIRECTABLE_OUTPUT) {
+
+ /* Output is directable to either a file (if open) or the console */
+
+ if (acpi_gbl_debug_file) {
+
+ /* Output file is open, send the output there */
+
+ va_start(args, fmt);
+ vfprintf(acpi_gbl_debug_file, fmt, args);
+ va_end(args);
+ } else {
+ /* No redirection, send output to console (once only!) */
+
+ flags |= ACPI_DB_CONSOLE_OUTPUT;
+ }
+ }
+
+ if (flags & ACPI_DB_CONSOLE_OUTPUT) {
+ va_start(args, fmt);
+ vfprintf(acpi_gbl_output_file, fmt, args);
+ va_end(args);
+ }
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_vprintf
+ *
+ * PARAMETERS: fmt - Standard printf format
+ * args - Argument list
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Formatted output with argument list pointer. Note: very
+ * similar to acpi_os_printf, changes should be tracked in both
+ * functions.
+ *
+ *****************************************************************************/
+
+void acpi_os_vprintf(const char *fmt, va_list args)
+{
+ u8 flags;
+ char buffer[ACPI_VPRINTF_BUFFER_SIZE];
+
+ /*
+ * We build the output string in a local buffer because we may be
+ * outputting the buffer twice. Using vfprintf is problematic because
+ * some implementations modify the args pointer/structure during
+ * execution. Thus, we use the local buffer for portability.
+ *
+ * Note: Since this module is intended for use by the various ACPICA
+ * utilities/applications, we can safely declare the buffer on the stack.
+ * Also, This function is used for relatively small error messages only.
+ */
+ vsnprintf(buffer, ACPI_VPRINTF_BUFFER_SIZE, fmt, args);
+
+ flags = acpi_gbl_db_output_flags;
+ if (flags & ACPI_DB_REDIRECTABLE_OUTPUT) {
+
+ /* Output is directable to either a file (if open) or the console */
+
+ if (acpi_gbl_debug_file) {
+
+ /* Output file is open, send the output there */
+
+ fputs(buffer, acpi_gbl_debug_file);
+ } else {
+ /* No redirection, send output to console (once only!) */
+
+ flags |= ACPI_DB_CONSOLE_OUTPUT;
+ }
+ }
+
+ if (flags & ACPI_DB_CONSOLE_OUTPUT) {
+ fputs(buffer, acpi_gbl_output_file);
+ }
+}
+
+#ifndef ACPI_EXEC_APP
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_get_line
+ *
+ * PARAMETERS: buffer - Where to return the command line
+ * buffer_length - Maximum length of Buffer
+ * bytes_read - Where the actual byte count is returned
+ *
+ * RETURN: Status and actual bytes read
+ *
+ * DESCRIPTION: Get the next input line from the terminal. NOTE: For the
+ * acpi_exec utility, we use the acgetline module instead to
+ * provide line-editing and history support.
+ *
+ *****************************************************************************/
+
+acpi_status acpi_os_get_line(char *buffer, u32 buffer_length, u32 *bytes_read)
+{
+ int input_char;
+ u32 end_of_line;
+
+ /* Standard acpi_os_get_line for all utilities except acpi_exec */
+
+ for (end_of_line = 0;; end_of_line++) {
+ if (end_of_line >= buffer_length) {
+ return (AE_BUFFER_OVERFLOW);
+ }
+
+ if ((input_char = getchar()) == EOF) {
+ return (AE_ERROR);
+ }
+
+ if (!input_char || input_char == _ASCII_NEWLINE) {
+ break;
+ }
+
+ buffer[end_of_line] = (char)input_char;
+ }
+
+ /* Null terminate the buffer */
+
+ buffer[end_of_line] = 0;
+
+ /* Return the number of bytes in the string */
+
+ if (bytes_read) {
+ *bytes_read = end_of_line;
+ }
+
+ return (AE_OK);
+}
+#endif
+
+#ifndef ACPI_USE_NATIVE_MEMORY_MAPPING
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_map_memory
+ *
+ * PARAMETERS: where - Physical address of memory to be mapped
+ * length - How much memory to map
+ *
+ * RETURN: Pointer to mapped memory. Null on error.
+ *
+ * DESCRIPTION: Map physical memory into caller's address space
+ *
+ *****************************************************************************/
+
+void *acpi_os_map_memory(acpi_physical_address where, acpi_size length)
+{
+
+ return (ACPI_TO_POINTER((acpi_size)where));
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_unmap_memory
+ *
+ * PARAMETERS: where - Logical address of memory to be unmapped
+ * length - How much memory to unmap
+ *
+ * RETURN: None.
+ *
+ * DESCRIPTION: Delete a previously created mapping. Where and Length must
+ * correspond to a previous mapping exactly.
+ *
+ *****************************************************************************/
+
+void acpi_os_unmap_memory(void *where, acpi_size length)
+{
+
+ return;
+}
+#endif
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_allocate
+ *
+ * PARAMETERS: size - Amount to allocate, in bytes
+ *
+ * RETURN: Pointer to the new allocation. Null on error.
+ *
+ * DESCRIPTION: Allocate memory. Algorithm is dependent on the OS.
+ *
+ *****************************************************************************/
+
+void *acpi_os_allocate(acpi_size size)
+{
+ void *mem;
+
+ mem = (void *)malloc((size_t) size);
+ return (mem);
+}
+
+#ifdef USE_NATIVE_ALLOCATE_ZEROED
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_allocate_zeroed
+ *
+ * PARAMETERS: size - Amount to allocate, in bytes
+ *
+ * RETURN: Pointer to the new allocation. Null on error.
+ *
+ * DESCRIPTION: Allocate and zero memory. Algorithm is dependent on the OS.
+ *
+ *****************************************************************************/
+
+void *acpi_os_allocate_zeroed(acpi_size size)
+{
+ void *mem;
+
+ mem = (void *)calloc(1, (size_t) size);
+ return (mem);
+}
+#endif
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_free
+ *
+ * PARAMETERS: mem - Pointer to previously allocated memory
+ *
+ * RETURN: None.
+ *
+ * DESCRIPTION: Free memory allocated via acpi_os_allocate
+ *
+ *****************************************************************************/
+
+void acpi_os_free(void *mem)
+{
+
+ free(mem);
+}
+
+#ifdef ACPI_SINGLE_THREADED
+/******************************************************************************
+ *
+ * FUNCTION: Semaphore stub functions
+ *
+ * DESCRIPTION: Stub functions used for single-thread applications that do
+ * not require semaphore synchronization. Full implementations
+ * of these functions appear after the stubs.
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_create_semaphore(u32 max_units,
+ u32 initial_units, acpi_handle *out_handle)
+{
+ *out_handle = (acpi_handle)1;
+ return (AE_OK);
+}
+
+acpi_status acpi_os_delete_semaphore(acpi_handle handle)
+{
+ return (AE_OK);
+}
+
+acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout)
+{
+ return (AE_OK);
+}
+
+acpi_status acpi_os_signal_semaphore(acpi_handle handle, u32 units)
+{
+ return (AE_OK);
+}
+
+#else
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_create_semaphore
+ *
+ * PARAMETERS: initial_units - Units to be assigned to the new semaphore
+ * out_handle - Where a handle will be returned
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Create an OS semaphore
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_create_semaphore(u32 max_units,
+ u32 initial_units, acpi_handle *out_handle)
+{
+ sem_t *sem;
+
+ if (!out_handle) {
+ return (AE_BAD_PARAMETER);
+ }
+#ifdef __APPLE__
+ {
+ static int semaphore_count = 0;
+ char semaphore_name[32];
+
+ snprintf(semaphore_name, sizeof(semaphore_name), "acpi_sem_%d",
+ semaphore_count++);
+ printf("%s\n", semaphore_name);
+ sem =
+ sem_open(semaphore_name, O_EXCL | O_CREAT, 0755,
+ initial_units);
+ if (!sem) {
+ return (AE_NO_MEMORY);
+ }
+ sem_unlink(semaphore_name); /* This just deletes the name */
+ }
+
+#else
+ sem = acpi_os_allocate(sizeof(sem_t));
+ if (!sem) {
+ return (AE_NO_MEMORY);
+ }
+
+ if (sem_init(sem, 0, initial_units) == -1) {
+ acpi_os_free(sem);
+ return (AE_BAD_PARAMETER);
+ }
+#endif
+
+ *out_handle = (acpi_handle)sem;
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_delete_semaphore
+ *
+ * PARAMETERS: handle - Handle returned by acpi_os_create_semaphore
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Delete an OS semaphore
+ *
+ *****************************************************************************/
+
+acpi_status acpi_os_delete_semaphore(acpi_handle handle)
+{
+ sem_t *sem = (sem_t *) handle;
+
+ if (!sem) {
+ return (AE_BAD_PARAMETER);
+ }
+#ifdef __APPLE__
+ if (sem_close(sem) == -1) {
+ return (AE_BAD_PARAMETER);
+ }
+#else
+ if (sem_destroy(sem) == -1) {
+ return (AE_BAD_PARAMETER);
+ }
+#endif
+
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_wait_semaphore
+ *
+ * PARAMETERS: handle - Handle returned by acpi_os_create_semaphore
+ * units - How many units to wait for
+ * msec_timeout - How long to wait (milliseconds)
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Wait for units
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 msec_timeout)
+{
+ acpi_status status = AE_OK;
+ sem_t *sem = (sem_t *) handle;
+ int ret_val;
+#ifndef ACPI_USE_ALTERNATE_TIMEOUT
+ struct timespec time;
+#endif
+
+ if (!sem) {
+ return (AE_BAD_PARAMETER);
+ }
+
+ switch (msec_timeout) {
+ /*
+ * No Wait:
+ * --------
+ * A zero timeout value indicates that we shouldn't wait - just
+ * acquire the semaphore if available otherwise return AE_TIME
+ * (a.k.a. 'would block').
+ */
+ case 0:
+
+ if (sem_trywait(sem) == -1) {
+ status = (AE_TIME);
+ }
+ break;
+
+ /* Wait Indefinitely */
+
+ case ACPI_WAIT_FOREVER:
+
+ while (((ret_val = sem_wait(sem)) == -1) && (errno == EINTR)) {
+ continue; /* Restart if interrupted */
+ }
+ if (ret_val != 0) {
+ status = (AE_TIME);
+ }
+ break;
+
+ /* Wait with msec_timeout */
+
+ default:
+
+#ifdef ACPI_USE_ALTERNATE_TIMEOUT
+ /*
+ * Alternate timeout mechanism for environments where
+ * sem_timedwait is not available or does not work properly.
+ */
+ while (msec_timeout) {
+ if (sem_trywait(sem) == 0) {
+
+ /* Got the semaphore */
+ return (AE_OK);
+ }
+
+ if (msec_timeout >= 10) {
+ msec_timeout -= 10;
+ usleep(10 * ACPI_USEC_PER_MSEC); /* ten milliseconds */
+ } else {
+ msec_timeout--;
+ usleep(ACPI_USEC_PER_MSEC); /* one millisecond */
+ }
+ }
+ status = (AE_TIME);
+#else
+ /*
+ * The interface to sem_timedwait is an absolute time, so we need to
+ * get the current time, then add in the millisecond Timeout value.
+ */
+ if (clock_gettime(CLOCK_REALTIME, &time) == -1) {
+ perror("clock_gettime");
+ return (AE_TIME);
+ }
+
+ time.tv_sec += (msec_timeout / ACPI_MSEC_PER_SEC);
+ time.tv_nsec +=
+ ((msec_timeout % ACPI_MSEC_PER_SEC) * ACPI_NSEC_PER_MSEC);
+
+ /* Handle nanosecond overflow (field must be less than one second) */
+
+ if (time.tv_nsec >= ACPI_NSEC_PER_SEC) {
+ time.tv_sec += (time.tv_nsec / ACPI_NSEC_PER_SEC);
+ time.tv_nsec = (time.tv_nsec % ACPI_NSEC_PER_SEC);
+ }
+
+ while (((ret_val = sem_timedwait(sem, &time)) == -1)
+ && (errno == EINTR)) {
+ continue; /* Restart if interrupted */
+
+ }
+
+ if (ret_val != 0) {
+ if (errno != ETIMEDOUT) {
+ perror("sem_timedwait");
+ }
+ status = (AE_TIME);
+ }
+#endif
+ break;
+ }
+
+ return (status);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_signal_semaphore
+ *
+ * PARAMETERS: handle - Handle returned by acpi_os_create_semaphore
+ * units - Number of units to send
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Send units
+ *
+ *****************************************************************************/
+
+acpi_status acpi_os_signal_semaphore(acpi_handle handle, u32 units)
+{
+ sem_t *sem = (sem_t *) handle;
+
+ if (!sem) {
+ return (AE_BAD_PARAMETER);
+ }
+
+ if (sem_post(sem) == -1) {
+ return (AE_LIMIT);
+ }
+
+ return (AE_OK);
+}
+
+#endif /* ACPI_SINGLE_THREADED */
+
+/******************************************************************************
+ *
+ * FUNCTION: Spinlock interfaces
+ *
+ * DESCRIPTION: Map these interfaces to semaphore interfaces
+ *
+ *****************************************************************************/
+
+acpi_status acpi_os_create_lock(acpi_spinlock * out_handle)
+{
+
+ return (acpi_os_create_semaphore(1, 1, out_handle));
+}
+
+void acpi_os_delete_lock(acpi_spinlock handle)
+{
+ acpi_os_delete_semaphore(handle);
+}
+
+acpi_cpu_flags acpi_os_acquire_lock(acpi_handle handle)
+{
+ acpi_os_wait_semaphore(handle, 1, 0xFFFF);
+ return (0);
+}
+
+void acpi_os_release_lock(acpi_spinlock handle, acpi_cpu_flags flags)
+{
+ acpi_os_signal_semaphore(handle, 1);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_install_interrupt_handler
+ *
+ * PARAMETERS: interrupt_number - Level handler should respond to.
+ * isr - Address of the ACPI interrupt handler
+ * except_ptr - Where status is returned
+ *
+ * RETURN: Handle to the newly installed handler.
+ *
+ * DESCRIPTION: Install an interrupt handler. Used to install the ACPI
+ * OS-independent handler.
+ *
+ *****************************************************************************/
+
+u32
+acpi_os_install_interrupt_handler(u32 interrupt_number,
+ acpi_osd_handler service_routine,
+ void *context)
+{
+
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_remove_interrupt_handler
+ *
+ * PARAMETERS: handle - Returned when handler was installed
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Uninstalls an interrupt handler.
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_remove_interrupt_handler(u32 interrupt_number,
+ acpi_osd_handler service_routine)
+{
+
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_stall
+ *
+ * PARAMETERS: microseconds - Time to sleep
+ *
+ * RETURN: Blocks until sleep is completed.
+ *
+ * DESCRIPTION: Sleep at microsecond granularity
+ *
+ *****************************************************************************/
+
+void acpi_os_stall(u32 microseconds)
+{
+
+ if (microseconds) {
+ usleep(microseconds);
+ }
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_sleep
+ *
+ * PARAMETERS: milliseconds - Time to sleep
+ *
+ * RETURN: Blocks until sleep is completed.
+ *
+ * DESCRIPTION: Sleep at millisecond granularity
+ *
+ *****************************************************************************/
+
+void acpi_os_sleep(u64 milliseconds)
+{
+
+ /* Sleep for whole seconds */
+
+ sleep(milliseconds / ACPI_MSEC_PER_SEC);
+
+ /*
+ * Sleep for remaining microseconds.
+ * Arg to usleep() is in usecs and must be less than 1,000,000 (1 second).
+ */
+ usleep((milliseconds % ACPI_MSEC_PER_SEC) * ACPI_USEC_PER_MSEC);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_get_timer
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: Current time in 100 nanosecond units
+ *
+ * DESCRIPTION: Get the current system time
+ *
+ *****************************************************************************/
+
+u64 acpi_os_get_timer(void)
+{
+ struct timeval time;
+
+ /* This timer has sufficient resolution for user-space application code */
+
+ gettimeofday(&time, NULL);
+
+ /* (Seconds * 10^7 = 100ns(10^-7)) + (Microseconds(10^-6) * 10^1 = 100ns) */
+
+ return (((u64)time.tv_sec * ACPI_100NSEC_PER_SEC) +
+ ((u64)time.tv_usec * ACPI_100NSEC_PER_USEC));
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_read_pci_configuration
+ *
+ * PARAMETERS: pci_id - Seg/Bus/Dev
+ * pci_register - Device Register
+ * value - Buffer where value is placed
+ * width - Number of bits
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Read data from PCI configuration space
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_read_pci_configuration(struct acpi_pci_id *pci_id,
+ u32 pci_register, u64 *value, u32 width)
+{
+
+ *value = 0;
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_write_pci_configuration
+ *
+ * PARAMETERS: pci_id - Seg/Bus/Dev
+ * pci_register - Device Register
+ * value - Value to be written
+ * width - Number of bits
+ *
+ * RETURN: Status.
+ *
+ * DESCRIPTION: Write data to PCI configuration space
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_write_pci_configuration(struct acpi_pci_id *pci_id,
+ u32 pci_register, u64 value, u32 width)
+{
+
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_read_port
+ *
+ * PARAMETERS: address - Address of I/O port/register to read
+ * value - Where value is placed
+ * width - Number of bits
+ *
+ * RETURN: Value read from port
+ *
+ * DESCRIPTION: Read data from an I/O port or register
+ *
+ *****************************************************************************/
+
+acpi_status acpi_os_read_port(acpi_io_address address, u32 *value, u32 width)
+{
+
+ switch (width) {
+ case 8:
+
+ *value = 0xFF;
+ break;
+
+ case 16:
+
+ *value = 0xFFFF;
+ break;
+
+ case 32:
+
+ *value = 0xFFFFFFFF;
+ break;
+
+ default:
+
+ return (AE_BAD_PARAMETER);
+ }
+
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_write_port
+ *
+ * PARAMETERS: address - Address of I/O port/register to write
+ * value - Value to write
+ * width - Number of bits
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Write data to an I/O port or register
+ *
+ *****************************************************************************/
+
+acpi_status acpi_os_write_port(acpi_io_address address, u32 value, u32 width)
+{
+
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_read_memory
+ *
+ * PARAMETERS: address - Physical Memory Address to read
+ * value - Where value is placed
+ * width - Number of bits (8,16,32, or 64)
+ *
+ * RETURN: Value read from physical memory address. Always returned
+ * as a 64-bit integer, regardless of the read width.
+ *
+ * DESCRIPTION: Read data from a physical memory address
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_read_memory(acpi_physical_address address, u64 *value, u32 width)
+{
+
+ switch (width) {
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+
+ *value = 0;
+ break;
+
+ default:
+
+ return (AE_BAD_PARAMETER);
+ }
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_write_memory
+ *
+ * PARAMETERS: address - Physical Memory Address to write
+ * value - Value to write
+ * width - Number of bits (8,16,32, or 64)
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Write data to a physical memory address
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_write_memory(acpi_physical_address address, u64 value, u32 width)
+{
+
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_readable
+ *
+ * PARAMETERS: pointer - Area to be verified
+ * length - Size of area
+ *
+ * RETURN: TRUE if readable for entire length
+ *
+ * DESCRIPTION: Verify that a pointer is valid for reading
+ *
+ *****************************************************************************/
+
+u8 acpi_os_readable(void *pointer, acpi_size length)
+{
+
+ return (TRUE);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_writable
+ *
+ * PARAMETERS: pointer - Area to be verified
+ * length - Size of area
+ *
+ * RETURN: TRUE if writable for entire length
+ *
+ * DESCRIPTION: Verify that a pointer is valid for writing
+ *
+ *****************************************************************************/
+
+u8 acpi_os_writable(void *pointer, acpi_size length)
+{
+
+ return (TRUE);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_signal
+ *
+ * PARAMETERS: function - ACPI A signal function code
+ * info - Pointer to function-dependent structure
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Miscellaneous functions. Example implementation only.
+ *
+ *****************************************************************************/
+
+acpi_status acpi_os_signal(u32 function, void *info)
+{
+
+ switch (function) {
+ case ACPI_SIGNAL_FATAL:
+
+ break;
+
+ case ACPI_SIGNAL_BREAKPOINT:
+
+ break;
+
+ default:
+
+ break;
+ }
+
+ return (AE_OK);
+}
+
+/* Optional multi-thread support */
+
+#ifndef ACPI_SINGLE_THREADED
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_get_thread_id
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: Id of the running thread
+ *
+ * DESCRIPTION: Get the ID of the current (running) thread
+ *
+ *****************************************************************************/
+
+acpi_thread_id acpi_os_get_thread_id(void)
+{
+ pthread_t thread;
+
+ thread = pthread_self();
+ return (ACPI_CAST_PTHREAD_T(thread));
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_execute
+ *
+ * PARAMETERS: type - Type of execution
+ * function - Address of the function to execute
+ * context - Passed as a parameter to the function
+ *
+ * RETURN: Status.
+ *
+ * DESCRIPTION: Execute a new thread
+ *
+ *****************************************************************************/
+
+acpi_status
+acpi_os_execute(acpi_execute_type type,
+ acpi_osd_exec_callback function, void *context)
+{
+ pthread_t thread;
+ int ret;
+
+ ret =
+ pthread_create(&thread, NULL, (PTHREAD_CALLBACK) function, context);
+ if (ret) {
+ acpi_os_printf("Create thread failed");
+ }
+ return (0);
+}
+
+#else /* ACPI_SINGLE_THREADED */
+acpi_thread_id acpi_os_get_thread_id(void)
+{
+ return (1);
+}
+
+acpi_status
+acpi_os_execute(acpi_execute_type type,
+ acpi_osd_exec_callback function, void *context)
+{
+
+ function(context);
+
+ return (AE_OK);
+}
+
+#endif /* ACPI_SINGLE_THREADED */
+
+/******************************************************************************
+ *
+ * FUNCTION: acpi_os_wait_events_complete
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Wait for all asynchronous events to complete. This
+ * implementation does nothing.
+ *
+ *****************************************************************************/
+
+void acpi_os_wait_events_complete(void)
+{
+ return;
+}
diff --git a/tools/power/acpi/tools/acpidbg/Makefile b/tools/power/acpi/tools/acpidbg/Makefile
new file mode 100644
index 000000000..f2d06e773
--- /dev/null
+++ b/tools/power/acpi/tools/acpidbg/Makefile
@@ -0,0 +1,25 @@
+# tools/power/acpi/tools/acpidbg/Makefile - ACPI tool Makefile
+#
+# Copyright (c) 2015, Intel Corporation
+# Author: Lv Zheng <lv.zheng@intel.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License.
+
+include ../../Makefile.config
+
+TOOL = acpidbg
+vpath %.c \
+ ../../../../../drivers/acpi/acpica\
+ ../../common\
+ ../../os_specific/service_layers\
+ .
+CFLAGS += -DACPI_APPLICATION -DACPI_SINGLE_THREAD -DACPI_DEBUGGER\
+ -I.
+LDFLAGS += -lpthread
+TOOL_OBJS = \
+ acpidbg.o
+
+include ../../Makefile.rules
diff --git a/tools/power/acpi/tools/acpidbg/acpidbg.c b/tools/power/acpi/tools/acpidbg/acpidbg.c
new file mode 100644
index 000000000..4308362d7
--- /dev/null
+++ b/tools/power/acpi/tools/acpidbg/acpidbg.c
@@ -0,0 +1,444 @@
+/*
+ * ACPI AML interfacing userspace utility
+ *
+ * Copyright (C) 2015, Intel Corporation
+ * Authors: Lv Zheng <lv.zheng@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <acpi/acpi.h>
+
+/* Headers not included by include/acpi/platform/aclinux.h */
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <error.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <sys/select.h>
+#include "../../../../../include/linux/circ_buf.h"
+
+#define ACPI_AML_FILE "/sys/kernel/debug/acpi/acpidbg"
+#define ACPI_AML_SEC_TICK 1
+#define ACPI_AML_USEC_PEEK 200
+#define ACPI_AML_BUF_SIZE 4096
+
+#define ACPI_AML_BATCH_WRITE_CMD 0x00 /* Write command to kernel */
+#define ACPI_AML_BATCH_READ_LOG 0x01 /* Read log from kernel */
+#define ACPI_AML_BATCH_WRITE_LOG 0x02 /* Write log to console */
+
+#define ACPI_AML_LOG_START 0x00
+#define ACPI_AML_PROMPT_START 0x01
+#define ACPI_AML_PROMPT_STOP 0x02
+#define ACPI_AML_LOG_STOP 0x03
+#define ACPI_AML_PROMPT_ROLL 0x04
+
+#define ACPI_AML_INTERACTIVE 0x00
+#define ACPI_AML_BATCH 0x01
+
+#define circ_count(circ) \
+ (CIRC_CNT((circ)->head, (circ)->tail, ACPI_AML_BUF_SIZE))
+#define circ_count_to_end(circ) \
+ (CIRC_CNT_TO_END((circ)->head, (circ)->tail, ACPI_AML_BUF_SIZE))
+#define circ_space(circ) \
+ (CIRC_SPACE((circ)->head, (circ)->tail, ACPI_AML_BUF_SIZE))
+#define circ_space_to_end(circ) \
+ (CIRC_SPACE_TO_END((circ)->head, (circ)->tail, ACPI_AML_BUF_SIZE))
+
+#define acpi_aml_cmd_count() circ_count(&acpi_aml_cmd_crc)
+#define acpi_aml_log_count() circ_count(&acpi_aml_log_crc)
+#define acpi_aml_cmd_space() circ_space(&acpi_aml_cmd_crc)
+#define acpi_aml_log_space() circ_space(&acpi_aml_log_crc)
+
+#define ACPI_AML_DO(_fd, _op, _buf, _ret) \
+ do { \
+ _ret = acpi_aml_##_op(_fd, &acpi_aml_##_buf##_crc); \
+ if (_ret == 0) { \
+ fprintf(stderr, \
+ "%s %s pipe closed.\n", #_buf, #_op); \
+ return; \
+ } \
+ } while (0)
+#define ACPI_AML_BATCH_DO(_fd, _op, _buf, _ret) \
+ do { \
+ _ret = acpi_aml_##_op##_batch_##_buf(_fd, \
+ &acpi_aml_##_buf##_crc); \
+ if (_ret == 0) \
+ return; \
+ } while (0)
+
+
+static char acpi_aml_cmd_buf[ACPI_AML_BUF_SIZE];
+static char acpi_aml_log_buf[ACPI_AML_BUF_SIZE];
+static struct circ_buf acpi_aml_cmd_crc = {
+ .buf = acpi_aml_cmd_buf,
+ .head = 0,
+ .tail = 0,
+};
+static struct circ_buf acpi_aml_log_crc = {
+ .buf = acpi_aml_log_buf,
+ .head = 0,
+ .tail = 0,
+};
+static const char *acpi_aml_file_path = ACPI_AML_FILE;
+static unsigned long acpi_aml_mode = ACPI_AML_INTERACTIVE;
+static bool acpi_aml_exit;
+
+static bool acpi_aml_batch_drain;
+static unsigned long acpi_aml_batch_state;
+static char acpi_aml_batch_prompt;
+static char acpi_aml_batch_roll;
+static unsigned long acpi_aml_log_state;
+static char *acpi_aml_batch_cmd = NULL;
+static char *acpi_aml_batch_pos = NULL;
+
+static int acpi_aml_set_fl(int fd, int flags)
+{
+ int ret;
+
+ ret = fcntl(fd, F_GETFL, 0);
+ if (ret < 0) {
+ perror("fcntl(F_GETFL)");
+ return ret;
+ }
+ flags |= ret;
+ ret = fcntl(fd, F_SETFL, flags);
+ if (ret < 0) {
+ perror("fcntl(F_SETFL)");
+ return ret;
+ }
+ return ret;
+}
+
+static int acpi_aml_set_fd(int fd, int maxfd, fd_set *set)
+{
+ if (fd > maxfd)
+ maxfd = fd;
+ FD_SET(fd, set);
+ return maxfd;
+}
+
+static int acpi_aml_read(int fd, struct circ_buf *crc)
+{
+ char *p;
+ int len;
+
+ p = &crc->buf[crc->head];
+ len = circ_space_to_end(crc);
+ len = read(fd, p, len);
+ if (len < 0)
+ perror("read");
+ else if (len > 0)
+ crc->head = (crc->head + len) & (ACPI_AML_BUF_SIZE - 1);
+ return len;
+}
+
+static int acpi_aml_read_batch_cmd(int unused, struct circ_buf *crc)
+{
+ char *p;
+ int len;
+ int remained = strlen(acpi_aml_batch_pos);
+
+ p = &crc->buf[crc->head];
+ len = circ_space_to_end(crc);
+ if (len > remained) {
+ memcpy(p, acpi_aml_batch_pos, remained);
+ acpi_aml_batch_pos += remained;
+ len = remained;
+ } else {
+ memcpy(p, acpi_aml_batch_pos, len);
+ acpi_aml_batch_pos += len;
+ }
+ if (len > 0)
+ crc->head = (crc->head + len) & (ACPI_AML_BUF_SIZE - 1);
+ return len;
+}
+
+static int acpi_aml_read_batch_log(int fd, struct circ_buf *crc)
+{
+ char *p;
+ int len;
+ int ret = 0;
+
+ p = &crc->buf[crc->head];
+ len = circ_space_to_end(crc);
+ while (ret < len && acpi_aml_log_state != ACPI_AML_LOG_STOP) {
+ if (acpi_aml_log_state == ACPI_AML_PROMPT_ROLL) {
+ *p = acpi_aml_batch_roll;
+ len = 1;
+ crc->head = (crc->head + 1) & (ACPI_AML_BUF_SIZE - 1);
+ ret += 1;
+ acpi_aml_log_state = ACPI_AML_LOG_START;
+ } else {
+ len = read(fd, p, 1);
+ if (len <= 0) {
+ if (len < 0)
+ perror("read");
+ ret = len;
+ break;
+ }
+ }
+ switch (acpi_aml_log_state) {
+ case ACPI_AML_LOG_START:
+ if (*p == '\n')
+ acpi_aml_log_state = ACPI_AML_PROMPT_START;
+ crc->head = (crc->head + 1) & (ACPI_AML_BUF_SIZE - 1);
+ ret += 1;
+ break;
+ case ACPI_AML_PROMPT_START:
+ if (*p == ACPI_DEBUGGER_COMMAND_PROMPT ||
+ *p == ACPI_DEBUGGER_EXECUTE_PROMPT) {
+ acpi_aml_batch_prompt = *p;
+ acpi_aml_log_state = ACPI_AML_PROMPT_STOP;
+ } else {
+ if (*p != '\n')
+ acpi_aml_log_state = ACPI_AML_LOG_START;
+ crc->head = (crc->head + 1) & (ACPI_AML_BUF_SIZE - 1);
+ ret += 1;
+ }
+ break;
+ case ACPI_AML_PROMPT_STOP:
+ if (*p == ' ') {
+ acpi_aml_log_state = ACPI_AML_LOG_STOP;
+ acpi_aml_exit = true;
+ } else {
+ /* Roll back */
+ acpi_aml_log_state = ACPI_AML_PROMPT_ROLL;
+ acpi_aml_batch_roll = *p;
+ *p = acpi_aml_batch_prompt;
+ crc->head = (crc->head + 1) & (ACPI_AML_BUF_SIZE - 1);
+ ret += 1;
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+ return ret;
+}
+
+static int acpi_aml_write(int fd, struct circ_buf *crc)
+{
+ char *p;
+ int len;
+
+ p = &crc->buf[crc->tail];
+ len = circ_count_to_end(crc);
+ len = write(fd, p, len);
+ if (len < 0)
+ perror("write");
+ else if (len > 0)
+ crc->tail = (crc->tail + len) & (ACPI_AML_BUF_SIZE - 1);
+ return len;
+}
+
+static int acpi_aml_write_batch_log(int fd, struct circ_buf *crc)
+{
+ char *p;
+ int len;
+
+ p = &crc->buf[crc->tail];
+ len = circ_count_to_end(crc);
+ if (!acpi_aml_batch_drain) {
+ len = write(fd, p, len);
+ if (len < 0)
+ perror("write");
+ }
+ if (len > 0)
+ crc->tail = (crc->tail + len) & (ACPI_AML_BUF_SIZE - 1);
+ return len;
+}
+
+static int acpi_aml_write_batch_cmd(int fd, struct circ_buf *crc)
+{
+ int len;
+
+ len = acpi_aml_write(fd, crc);
+ if (circ_count_to_end(crc) == 0)
+ acpi_aml_batch_state = ACPI_AML_BATCH_READ_LOG;
+ return len;
+}
+
+static void acpi_aml_loop(int fd)
+{
+ fd_set rfds;
+ fd_set wfds;
+ struct timeval tv;
+ int ret;
+ int maxfd = 0;
+
+ if (acpi_aml_mode == ACPI_AML_BATCH) {
+ acpi_aml_log_state = ACPI_AML_LOG_START;
+ acpi_aml_batch_pos = acpi_aml_batch_cmd;
+ if (acpi_aml_batch_drain)
+ acpi_aml_batch_state = ACPI_AML_BATCH_READ_LOG;
+ else
+ acpi_aml_batch_state = ACPI_AML_BATCH_WRITE_CMD;
+ }
+ acpi_aml_exit = false;
+ while (!acpi_aml_exit) {
+ tv.tv_sec = ACPI_AML_SEC_TICK;
+ tv.tv_usec = 0;
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+
+ if (acpi_aml_cmd_space()) {
+ if (acpi_aml_mode == ACPI_AML_INTERACTIVE)
+ maxfd = acpi_aml_set_fd(STDIN_FILENO, maxfd, &rfds);
+ else if (strlen(acpi_aml_batch_pos) &&
+ acpi_aml_batch_state == ACPI_AML_BATCH_WRITE_CMD)
+ ACPI_AML_BATCH_DO(STDIN_FILENO, read, cmd, ret);
+ }
+ if (acpi_aml_cmd_count() &&
+ (acpi_aml_mode == ACPI_AML_INTERACTIVE ||
+ acpi_aml_batch_state == ACPI_AML_BATCH_WRITE_CMD))
+ maxfd = acpi_aml_set_fd(fd, maxfd, &wfds);
+ if (acpi_aml_log_space() &&
+ (acpi_aml_mode == ACPI_AML_INTERACTIVE ||
+ acpi_aml_batch_state == ACPI_AML_BATCH_READ_LOG))
+ maxfd = acpi_aml_set_fd(fd, maxfd, &rfds);
+ if (acpi_aml_log_count())
+ maxfd = acpi_aml_set_fd(STDOUT_FILENO, maxfd, &wfds);
+
+ ret = select(maxfd+1, &rfds, &wfds, NULL, &tv);
+ if (ret < 0) {
+ perror("select");
+ break;
+ }
+ if (ret > 0) {
+ if (FD_ISSET(STDIN_FILENO, &rfds))
+ ACPI_AML_DO(STDIN_FILENO, read, cmd, ret);
+ if (FD_ISSET(fd, &wfds)) {
+ if (acpi_aml_mode == ACPI_AML_BATCH)
+ ACPI_AML_BATCH_DO(fd, write, cmd, ret);
+ else
+ ACPI_AML_DO(fd, write, cmd, ret);
+ }
+ if (FD_ISSET(fd, &rfds)) {
+ if (acpi_aml_mode == ACPI_AML_BATCH)
+ ACPI_AML_BATCH_DO(fd, read, log, ret);
+ else
+ ACPI_AML_DO(fd, read, log, ret);
+ }
+ if (FD_ISSET(STDOUT_FILENO, &wfds)) {
+ if (acpi_aml_mode == ACPI_AML_BATCH)
+ ACPI_AML_BATCH_DO(STDOUT_FILENO, write, log, ret);
+ else
+ ACPI_AML_DO(STDOUT_FILENO, write, log, ret);
+ }
+ }
+ }
+}
+
+static bool acpi_aml_readable(int fd)
+{
+ fd_set rfds;
+ struct timeval tv;
+ int ret;
+ int maxfd = 0;
+
+ tv.tv_sec = 0;
+ tv.tv_usec = ACPI_AML_USEC_PEEK;
+ FD_ZERO(&rfds);
+ maxfd = acpi_aml_set_fd(fd, maxfd, &rfds);
+ ret = select(maxfd+1, &rfds, NULL, NULL, &tv);
+ if (ret < 0)
+ perror("select");
+ if (ret > 0 && FD_ISSET(fd, &rfds))
+ return true;
+ return false;
+}
+
+/*
+ * This is a userspace IO flush implementation, replying on the prompt
+ * characters and can be turned into a flush() call after kernel implements
+ * .flush() filesystem operation.
+ */
+static void acpi_aml_flush(int fd)
+{
+ while (acpi_aml_readable(fd)) {
+ acpi_aml_batch_drain = true;
+ acpi_aml_loop(fd);
+ acpi_aml_batch_drain = false;
+ }
+}
+
+void usage(FILE *file, char *progname)
+{
+ fprintf(file, "usage: %s [-b cmd] [-f file] [-h]\n", progname);
+ fprintf(file, "\nOptions:\n");
+ fprintf(file, " -b Specify command to be executed in batch mode\n");
+ fprintf(file, " -f Specify interface file other than");
+ fprintf(file, " /sys/kernel/debug/acpi/acpidbg\n");
+ fprintf(file, " -h Print this help message\n");
+}
+
+int main(int argc, char **argv)
+{
+ int fd = -1;
+ int ch;
+ int len;
+ int ret = EXIT_SUCCESS;
+
+ while ((ch = getopt(argc, argv, "b:f:h")) != -1) {
+ switch (ch) {
+ case 'b':
+ if (acpi_aml_batch_cmd) {
+ fprintf(stderr, "Already specify %s\n",
+ acpi_aml_batch_cmd);
+ ret = EXIT_FAILURE;
+ goto exit;
+ }
+ len = strlen(optarg);
+ acpi_aml_batch_cmd = calloc(len + 2, 1);
+ if (!acpi_aml_batch_cmd) {
+ perror("calloc");
+ ret = EXIT_FAILURE;
+ goto exit;
+ }
+ memcpy(acpi_aml_batch_cmd, optarg, len);
+ acpi_aml_batch_cmd[len] = '\n';
+ acpi_aml_mode = ACPI_AML_BATCH;
+ break;
+ case 'f':
+ acpi_aml_file_path = optarg;
+ break;
+ case 'h':
+ usage(stdout, argv[0]);
+ goto exit;
+ break;
+ case '?':
+ default:
+ usage(stderr, argv[0]);
+ ret = EXIT_FAILURE;
+ goto exit;
+ break;
+ }
+ }
+
+ fd = open(acpi_aml_file_path, O_RDWR | O_NONBLOCK);
+ if (fd < 0) {
+ perror("open");
+ ret = EXIT_FAILURE;
+ goto exit;
+ }
+ acpi_aml_set_fl(STDIN_FILENO, O_NONBLOCK);
+ acpi_aml_set_fl(STDOUT_FILENO, O_NONBLOCK);
+
+ if (acpi_aml_mode == ACPI_AML_BATCH)
+ acpi_aml_flush(fd);
+ acpi_aml_loop(fd);
+
+exit:
+ if (fd >= 0)
+ close(fd);
+ if (acpi_aml_batch_cmd)
+ free(acpi_aml_batch_cmd);
+ return ret;
+}
diff --git a/tools/power/acpi/tools/acpidump/Makefile b/tools/power/acpi/tools/acpidump/Makefile
new file mode 100644
index 000000000..b436f8675
--- /dev/null
+++ b/tools/power/acpi/tools/acpidump/Makefile
@@ -0,0 +1,56 @@
+# tools/power/acpi/tools/acpidump/Makefile - ACPI tool Makefile
+#
+# Copyright (c) 2015, Intel Corporation
+# Author: Lv Zheng <lv.zheng@intel.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License.
+
+include ../../Makefile.config
+
+TOOL = acpidump
+EXTRA_INSTALL = install-man
+EXTRA_UNINSTALL = uninstall-man
+
+vpath %.c \
+ ../../../../../drivers/acpi/acpica\
+ ./\
+ ../../common\
+ ../../os_specific/service_layers
+CFLAGS += -DACPI_DUMP_APP -I.
+TOOL_OBJS = \
+ apdump.o\
+ apfiles.o\
+ apmain.o\
+ osunixdir.o\
+ osunixmap.o\
+ osunixxf.o\
+ tbprint.o\
+ tbxfroot.o\
+ utascii.o\
+ utbuffer.o\
+ utdebug.o\
+ utexcep.o\
+ utglobal.o\
+ uthex.o\
+ utmath.o\
+ utnonansi.o\
+ utprint.o\
+ utstring.o\
+ utstrsuppt.o\
+ utstrtoul64.o\
+ utxferror.o\
+ oslinuxtbl.o\
+ cmfsize.o\
+ getopt.o
+
+include ../../Makefile.rules
+
+install-man: $(srctree)/man/acpidump.8
+ $(ECHO) " INST " acpidump.8
+ $(QUIET) $(INSTALL_DATA) -D $< $(DESTDIR)$(mandir)/man8/acpidump.8
+uninstall-man:
+ $(ECHO) " UNINST " acpidump.8
+ $(QUIET) rm -f $(DESTDIR)$(mandir)/man8/acpidump.8
diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h
new file mode 100644
index 000000000..f69f1f559
--- /dev/null
+++ b/tools/power/acpi/tools/acpidump/acpidump.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0 */
+/******************************************************************************
+ *
+ * Module Name: acpidump.h - Include file for acpi_dump utility
+ *
+ * Copyright (C) 2000 - 2018, Intel Corp.
+ *
+ *****************************************************************************/
+
+/*
+ * Global variables. Defined in main.c only, externed in all other files
+ */
+#ifdef _DECLARE_GLOBALS
+#define EXTERN
+#define INIT_GLOBAL(a,b) a=b
+#else
+#define EXTERN extern
+#define INIT_GLOBAL(a,b) a
+#endif
+
+#include <acpi/acpi.h>
+#include "accommon.h"
+#include "actables.h"
+#include "acapps.h"
+
+/* Globals */
+
+EXTERN u8 INIT_GLOBAL(gbl_summary_mode, FALSE);
+EXTERN u8 INIT_GLOBAL(gbl_verbose_mode, FALSE);
+EXTERN u8 INIT_GLOBAL(gbl_binary_mode, FALSE);
+EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, TRUE);
+EXTERN u8 INIT_GLOBAL(gbl_do_not_dump_xsdt, FALSE);
+EXTERN ACPI_FILE INIT_GLOBAL(gbl_output_file, NULL);
+EXTERN char INIT_GLOBAL(*gbl_output_filename, NULL);
+EXTERN u64 INIT_GLOBAL(gbl_rsdp_base, 0);
+
+/* Action table used to defer requested options */
+
+struct ap_dump_action {
+ char *argument;
+ u32 to_be_done;
+};
+
+#define AP_MAX_ACTIONS 32
+
+#define AP_DUMP_ALL_TABLES 0
+#define AP_DUMP_TABLE_BY_ADDRESS 1
+#define AP_DUMP_TABLE_BY_NAME 2
+#define AP_DUMP_TABLE_BY_FILE 3
+
+#define AP_MAX_ACPI_FILES 256 /* Prevent infinite loops */
+
+/* Minimum FADT sizes for various table addresses */
+
+#define MIN_FADT_FOR_DSDT (ACPI_FADT_OFFSET (dsdt) + sizeof (u32))
+#define MIN_FADT_FOR_FACS (ACPI_FADT_OFFSET (facs) + sizeof (u32))
+#define MIN_FADT_FOR_XDSDT (ACPI_FADT_OFFSET (Xdsdt) + sizeof (u64))
+#define MIN_FADT_FOR_XFACS (ACPI_FADT_OFFSET (Xfacs) + sizeof (u64))
+
+/*
+ * apdump - Table get/dump routines
+ */
+int ap_dump_table_from_file(char *pathname);
+
+int ap_dump_table_by_name(char *signature);
+
+int ap_dump_table_by_address(char *ascii_address);
+
+int ap_dump_all_tables(void);
+
+u8 ap_is_valid_header(struct acpi_table_header *table);
+
+u8 ap_is_valid_checksum(struct acpi_table_header *table);
+
+u32 ap_get_table_length(struct acpi_table_header *table);
+
+/*
+ * apfiles - File I/O utilities
+ */
+int ap_open_output_file(char *pathname);
+
+int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance);
+
+struct acpi_table_header *ap_get_table_from_file(char *pathname,
+ u32 *file_size);
diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c
new file mode 100644
index 000000000..a9848de32
--- /dev/null
+++ b/tools/power/acpi/tools/acpidump/apdump.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/******************************************************************************
+ *
+ * Module Name: apdump - Dump routines for ACPI tables (acpidump)
+ *
+ * Copyright (C) 2000 - 2018, Intel Corp.
+ *
+ *****************************************************************************/
+
+#include "acpidump.h"
+
+/* Local prototypes */
+
+static int
+ap_dump_table_buffer(struct acpi_table_header *table,
+ u32 instance, acpi_physical_address address);
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_is_valid_header
+ *
+ * PARAMETERS: table - Pointer to table to be validated
+ *
+ * RETURN: TRUE if the header appears to be valid. FALSE otherwise
+ *
+ * DESCRIPTION: Check for a valid ACPI table header
+ *
+ ******************************************************************************/
+
+u8 ap_is_valid_header(struct acpi_table_header *table)
+{
+
+ if (!ACPI_VALIDATE_RSDP_SIG(table->signature)) {
+
+ /* Make sure signature is all ASCII and a valid ACPI name */
+
+ if (!acpi_ut_valid_nameseg(table->signature)) {
+ fprintf(stderr,
+ "Table signature (0x%8.8X) is invalid\n",
+ *(u32 *)table->signature);
+ return (FALSE);
+ }
+
+ /* Check for minimum table length */
+
+ if (table->length < sizeof(struct acpi_table_header)) {
+ fprintf(stderr, "Table length (0x%8.8X) is invalid\n",
+ table->length);
+ return (FALSE);
+ }
+ }
+
+ return (TRUE);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_is_valid_checksum
+ *
+ * PARAMETERS: table - Pointer to table to be validated
+ *
+ * RETURN: TRUE if the checksum appears to be valid. FALSE otherwise.
+ *
+ * DESCRIPTION: Check for a valid ACPI table checksum.
+ *
+ ******************************************************************************/
+
+u8 ap_is_valid_checksum(struct acpi_table_header *table)
+{
+ acpi_status status;
+ struct acpi_table_rsdp *rsdp;
+
+ if (ACPI_VALIDATE_RSDP_SIG(table->signature)) {
+ /*
+ * Checksum for RSDP.
+ * Note: Other checksums are computed during the table dump.
+ */
+ rsdp = ACPI_CAST_PTR(struct acpi_table_rsdp, table);
+ status = acpi_tb_validate_rsdp(rsdp);
+ } else {
+ status = acpi_tb_verify_checksum(table, table->length);
+ }
+
+ if (ACPI_FAILURE(status)) {
+ fprintf(stderr, "%4.4s: Warning: wrong checksum in table\n",
+ table->signature);
+ }
+
+ return (AE_OK);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_get_table_length
+ *
+ * PARAMETERS: table - Pointer to the table
+ *
+ * RETURN: Table length
+ *
+ * DESCRIPTION: Obtain table length according to table signature.
+ *
+ ******************************************************************************/
+
+u32 ap_get_table_length(struct acpi_table_header *table)
+{
+ struct acpi_table_rsdp *rsdp;
+
+ /* Check if table is valid */
+
+ if (!ap_is_valid_header(table)) {
+ return (0);
+ }
+
+ if (ACPI_VALIDATE_RSDP_SIG(table->signature)) {
+ rsdp = ACPI_CAST_PTR(struct acpi_table_rsdp, table);
+ return (acpi_tb_get_rsdp_length(rsdp));
+ }
+
+ /* Normal ACPI table */
+
+ return (table->length);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_dump_table_buffer
+ *
+ * PARAMETERS: table - ACPI table to be dumped
+ * instance - ACPI table instance no. to be dumped
+ * address - Physical address of the table
+ *
+ * RETURN: None
+ *
+ * DESCRIPTION: Dump an ACPI table in standard ASCII hex format, with a
+ * header that is compatible with the acpi_xtract utility.
+ *
+ ******************************************************************************/
+
+static int
+ap_dump_table_buffer(struct acpi_table_header *table,
+ u32 instance, acpi_physical_address address)
+{
+ u32 table_length;
+
+ table_length = ap_get_table_length(table);
+
+ /* Print only the header if requested */
+
+ if (gbl_summary_mode) {
+ acpi_tb_print_table_header(address, table);
+ return (0);
+ }
+
+ /* Dump to binary file if requested */
+
+ if (gbl_binary_mode) {
+ return (ap_write_to_binary_file(table, instance));
+ }
+
+ /*
+ * Dump the table with header for use with acpixtract utility.
+ * Note: simplest to just always emit a 64-bit address. acpi_xtract
+ * utility can handle this.
+ */
+ fprintf(gbl_output_file, "%4.4s @ 0x%8.8X%8.8X\n",
+ table->signature, ACPI_FORMAT_UINT64(address));
+
+ acpi_ut_dump_buffer_to_file(gbl_output_file,
+ ACPI_CAST_PTR(u8, table), table_length,
+ DB_BYTE_DISPLAY, 0);
+ fprintf(gbl_output_file, "\n");
+ return (0);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_dump_all_tables
+ *
+ * PARAMETERS: None
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Get all tables from the RSDT/XSDT (or at least all of the
+ * tables that we can possibly get).
+ *
+ ******************************************************************************/
+
+int ap_dump_all_tables(void)
+{
+ struct acpi_table_header *table;
+ u32 instance = 0;
+ acpi_physical_address address;
+ acpi_status status;
+ int table_status;
+ u32 i;
+
+ /* Get and dump all available ACPI tables */
+
+ for (i = 0; i < AP_MAX_ACPI_FILES; i++) {
+ status =
+ acpi_os_get_table_by_index(i, &table, &instance, &address);
+ if (ACPI_FAILURE(status)) {
+
+ /* AE_LIMIT means that no more tables are available */
+
+ if (status == AE_LIMIT) {
+ return (0);
+ } else if (i == 0) {
+ fprintf(stderr,
+ "Could not get ACPI tables, %s\n",
+ acpi_format_exception(status));
+ return (-1);
+ } else {
+ fprintf(stderr,
+ "Could not get ACPI table at index %u, %s\n",
+ i, acpi_format_exception(status));
+ continue;
+ }
+ }
+
+ table_status = ap_dump_table_buffer(table, instance, address);
+ ACPI_FREE(table);
+
+ if (table_status) {
+ break;
+ }
+ }
+
+ /* Something seriously bad happened if the loop terminates here */
+
+ return (-1);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_dump_table_by_address
+ *
+ * PARAMETERS: ascii_address - Address for requested ACPI table
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Get an ACPI table via a physical address and dump it.
+ *
+ ******************************************************************************/
+
+int ap_dump_table_by_address(char *ascii_address)
+{
+ acpi_physical_address address;
+ struct acpi_table_header *table;
+ acpi_status status;
+ int table_status;
+ u64 long_address;
+
+ /* Convert argument to an integer physical address */
+
+ status = acpi_ut_strtoul64(ascii_address, &long_address);
+ if (ACPI_FAILURE(status)) {
+ fprintf(stderr, "%s: Could not convert to a physical address\n",
+ ascii_address);
+ return (-1);
+ }
+
+ address = (acpi_physical_address)long_address;
+ status = acpi_os_get_table_by_address(address, &table);
+ if (ACPI_FAILURE(status)) {
+ fprintf(stderr, "Could not get table at 0x%8.8X%8.8X, %s\n",
+ ACPI_FORMAT_UINT64(address),
+ acpi_format_exception(status));
+ return (-1);
+ }
+
+ table_status = ap_dump_table_buffer(table, 0, address);
+ ACPI_FREE(table);
+ return (table_status);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_dump_table_by_name
+ *
+ * PARAMETERS: signature - Requested ACPI table signature
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Get an ACPI table via a signature and dump it. Handles
+ * multiple tables with the same signature (SSDTs).
+ *
+ ******************************************************************************/
+
+int ap_dump_table_by_name(char *signature)
+{
+ char local_signature[ACPI_NAME_SIZE + 1];
+ u32 instance;
+ struct acpi_table_header *table;
+ acpi_physical_address address;
+ acpi_status status;
+ int table_status;
+
+ if (strlen(signature) != ACPI_NAME_SIZE) {
+ fprintf(stderr,
+ "Invalid table signature [%s]: must be exactly 4 characters\n",
+ signature);
+ return (-1);
+ }
+
+ /* Table signatures are expected to be uppercase */
+
+ strcpy(local_signature, signature);
+ acpi_ut_strupr(local_signature);
+
+ /* To be friendly, handle tables whose signatures do not match the name */
+
+ if (ACPI_COMPARE_NAME(local_signature, "FADT")) {
+ strcpy(local_signature, ACPI_SIG_FADT);
+ } else if (ACPI_COMPARE_NAME(local_signature, "MADT")) {
+ strcpy(local_signature, ACPI_SIG_MADT);
+ }
+
+ /* Dump all instances of this signature (to handle multiple SSDTs) */
+
+ for (instance = 0; instance < AP_MAX_ACPI_FILES; instance++) {
+ status = acpi_os_get_table_by_name(local_signature, instance,
+ &table, &address);
+ if (ACPI_FAILURE(status)) {
+
+ /* AE_LIMIT means that no more tables are available */
+
+ if (status == AE_LIMIT) {
+ return (0);
+ }
+
+ fprintf(stderr,
+ "Could not get ACPI table with signature [%s], %s\n",
+ local_signature, acpi_format_exception(status));
+ return (-1);
+ }
+
+ table_status = ap_dump_table_buffer(table, instance, address);
+ ACPI_FREE(table);
+
+ if (table_status) {
+ break;
+ }
+ }
+
+ /* Something seriously bad happened if the loop terminates here */
+
+ return (-1);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_dump_table_from_file
+ *
+ * PARAMETERS: pathname - File containing the binary ACPI table
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Dump an ACPI table from a binary file
+ *
+ ******************************************************************************/
+
+int ap_dump_table_from_file(char *pathname)
+{
+ struct acpi_table_header *table;
+ u32 file_size = 0;
+ int table_status = -1;
+
+ /* Get the entire ACPI table from the file */
+
+ table = ap_get_table_from_file(pathname, &file_size);
+ if (!table) {
+ return (-1);
+ }
+
+ if (!acpi_ut_valid_nameseg(table->signature)) {
+ fprintf(stderr,
+ "No valid ACPI signature was found in input file %s\n",
+ pathname);
+ }
+
+ /* File must be at least as long as the table length */
+
+ if (table->length > file_size) {
+ fprintf(stderr,
+ "Table length (0x%X) is too large for input file (0x%X) %s\n",
+ table->length, file_size, pathname);
+ goto exit;
+ }
+
+ if (gbl_verbose_mode) {
+ fprintf(stderr,
+ "Input file: %s contains table [%4.4s], 0x%X (%u) bytes\n",
+ pathname, table->signature, file_size, file_size);
+ }
+
+ table_status = ap_dump_table_buffer(table, 0, 0);
+
+exit:
+ ACPI_FREE(table);
+ return (table_status);
+}
diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c
new file mode 100644
index 000000000..e86207e5a
--- /dev/null
+++ b/tools/power/acpi/tools/acpidump/apfiles.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/******************************************************************************
+ *
+ * Module Name: apfiles - File-related functions for acpidump utility
+ *
+ * Copyright (C) 2000 - 2018, Intel Corp.
+ *
+ *****************************************************************************/
+
+#include "acpidump.h"
+
+/* Local prototypes */
+
+static int ap_is_existing_file(char *pathname);
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_is_existing_file
+ *
+ * PARAMETERS: pathname - Output filename
+ *
+ * RETURN: 0 on success
+ *
+ * DESCRIPTION: Query for file overwrite if it already exists.
+ *
+ ******************************************************************************/
+
+static int ap_is_existing_file(char *pathname)
+{
+#if !defined(_GNU_EFI) && !defined(_EDK2_EFI)
+ struct stat stat_info;
+
+ if (!stat(pathname, &stat_info)) {
+ fprintf(stderr,
+ "Target path already exists, overwrite? [y|n] ");
+
+ if (getchar() != 'y') {
+ return (-1);
+ }
+ }
+#endif
+
+ return 0;
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_open_output_file
+ *
+ * PARAMETERS: pathname - Output filename
+ *
+ * RETURN: Open file handle
+ *
+ * DESCRIPTION: Open a text output file for acpidump. Checks if file already
+ * exists.
+ *
+ ******************************************************************************/
+
+int ap_open_output_file(char *pathname)
+{
+ ACPI_FILE file;
+
+ /* If file exists, prompt for overwrite */
+
+ if (ap_is_existing_file(pathname) != 0) {
+ return (-1);
+ }
+
+ /* Point stdout to the file */
+
+ file = fopen(pathname, "w");
+ if (!file) {
+ fprintf(stderr, "Could not open output file: %s\n", pathname);
+ return (-1);
+ }
+
+ /* Save the file and path */
+
+ gbl_output_file = file;
+ gbl_output_filename = pathname;
+ return (0);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_write_to_binary_file
+ *
+ * PARAMETERS: table - ACPI table to be written
+ * instance - ACPI table instance no. to be written
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Write an ACPI table to a binary file. Builds the output
+ * filename from the table signature.
+ *
+ ******************************************************************************/
+
+int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance)
+{
+ char filename[ACPI_NAME_SIZE + 16];
+ char instance_str[16];
+ ACPI_FILE file;
+ acpi_size actual;
+ u32 table_length;
+
+ /* Obtain table length */
+
+ table_length = ap_get_table_length(table);
+
+ /* Construct lower-case filename from the table local signature */
+
+ if (ACPI_VALIDATE_RSDP_SIG(table->signature)) {
+ ACPI_MOVE_NAME(filename, ACPI_RSDP_NAME);
+ } else {
+ ACPI_MOVE_NAME(filename, table->signature);
+ }
+
+ filename[0] = (char)tolower((int)filename[0]);
+ filename[1] = (char)tolower((int)filename[1]);
+ filename[2] = (char)tolower((int)filename[2]);
+ filename[3] = (char)tolower((int)filename[3]);
+ filename[ACPI_NAME_SIZE] = 0;
+
+ /* Handle multiple SSDts - create different filenames for each */
+
+ if (instance > 0) {
+ snprintf(instance_str, sizeof(instance_str), "%u", instance);
+ strcat(filename, instance_str);
+ }
+
+ strcat(filename, FILE_SUFFIX_BINARY_TABLE);
+
+ if (gbl_verbose_mode) {
+ fprintf(stderr,
+ "Writing [%4.4s] to binary file: %s 0x%X (%u) bytes\n",
+ table->signature, filename, table->length,
+ table->length);
+ }
+
+ /* Open the file and dump the entire table in binary mode */
+
+ file = fopen(filename, "wb");
+ if (!file) {
+ fprintf(stderr, "Could not open output file: %s\n", filename);
+ return (-1);
+ }
+
+ actual = fwrite(table, 1, table_length, file);
+ if (actual != table_length) {
+ fprintf(stderr, "Error writing binary output file: %s\n",
+ filename);
+ fclose(file);
+ return (-1);
+ }
+
+ fclose(file);
+ return (0);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_get_table_from_file
+ *
+ * PARAMETERS: pathname - File containing the binary ACPI table
+ * out_file_size - Where the file size is returned
+ *
+ * RETURN: Buffer containing the ACPI table. NULL on error.
+ *
+ * DESCRIPTION: Open a file and read it entirely into a new buffer
+ *
+ ******************************************************************************/
+
+struct acpi_table_header *ap_get_table_from_file(char *pathname,
+ u32 *out_file_size)
+{
+ struct acpi_table_header *buffer = NULL;
+ ACPI_FILE file;
+ u32 file_size;
+ acpi_size actual;
+
+ /* Must use binary mode */
+
+ file = fopen(pathname, "rb");
+ if (!file) {
+ fprintf(stderr, "Could not open input file: %s\n", pathname);
+ return (NULL);
+ }
+
+ /* Need file size to allocate a buffer */
+
+ file_size = cm_get_file_size(file);
+ if (file_size == ACPI_UINT32_MAX) {
+ fprintf(stderr,
+ "Could not get input file size: %s\n", pathname);
+ goto cleanup;
+ }
+
+ /* Allocate a buffer for the entire file */
+
+ buffer = ACPI_ALLOCATE_ZEROED(file_size);
+ if (!buffer) {
+ fprintf(stderr,
+ "Could not allocate file buffer of size: %u\n",
+ file_size);
+ goto cleanup;
+ }
+
+ /* Read the entire file */
+
+ actual = fread(buffer, 1, file_size, file);
+ if (actual != file_size) {
+ fprintf(stderr, "Could not read input file: %s\n", pathname);
+ ACPI_FREE(buffer);
+ buffer = NULL;
+ goto cleanup;
+ }
+
+ *out_file_size = file_size;
+
+cleanup:
+ fclose(file);
+ return (buffer);
+}
diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c
new file mode 100644
index 000000000..2d9b94b63
--- /dev/null
+++ b/tools/power/acpi/tools/acpidump/apmain.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/******************************************************************************
+ *
+ * Module Name: apmain - Main module for the acpidump utility
+ *
+ * Copyright (C) 2000 - 2018, Intel Corp.
+ *
+ *****************************************************************************/
+
+#define _DECLARE_GLOBALS
+#include "acpidump.h"
+
+/*
+ * acpidump - A portable utility for obtaining system ACPI tables and dumping
+ * them in an ASCII hex format suitable for binary extraction via acpixtract.
+ *
+ * Obtaining the system ACPI tables is an OS-specific operation.
+ *
+ * This utility can be ported to any host operating system by providing a
+ * module containing system-specific versions of these interfaces:
+ *
+ * acpi_os_get_table_by_address
+ * acpi_os_get_table_by_index
+ * acpi_os_get_table_by_name
+ *
+ * See the ACPICA Reference Guide for the exact definitions of these
+ * interfaces. Also, see these ACPICA source code modules for example
+ * implementations:
+ *
+ * source/os_specific/service_layers/oswintbl.c
+ * source/os_specific/service_layers/oslinuxtbl.c
+ */
+
+/* Local prototypes */
+
+static void ap_display_usage(void);
+
+static int ap_do_options(int argc, char **argv);
+
+static int ap_insert_action(char *argument, u32 to_be_done);
+
+/* Table for deferred actions from command line options */
+
+struct ap_dump_action action_table[AP_MAX_ACTIONS];
+u32 current_action = 0;
+
+#define AP_UTILITY_NAME "ACPI Binary Table Dump Utility"
+#define AP_SUPPORTED_OPTIONS "?a:bc:f:hn:o:r:sv^xz"
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_display_usage
+ *
+ * DESCRIPTION: Usage message for the acpi_dump utility
+ *
+ ******************************************************************************/
+
+static void ap_display_usage(void)
+{
+
+ ACPI_USAGE_HEADER("acpidump [options]");
+
+ ACPI_OPTION("-b", "Dump tables to binary files");
+ ACPI_OPTION("-h -?", "This help message");
+ ACPI_OPTION("-o <File>", "Redirect output to file");
+ ACPI_OPTION("-r <Address>", "Dump tables from specified RSDP");
+ ACPI_OPTION("-s", "Print table summaries only");
+ ACPI_OPTION("-v", "Display version information");
+ ACPI_OPTION("-vd", "Display build date and time");
+ ACPI_OPTION("-z", "Verbose mode");
+
+ ACPI_USAGE_TEXT("\nTable Options:\n");
+
+ ACPI_OPTION("-a <Address>", "Get table via a physical address");
+ ACPI_OPTION("-c <on|off>", "Turning on/off customized table dumping");
+ ACPI_OPTION("-f <BinaryFile>", "Get table via a binary file");
+ ACPI_OPTION("-n <Signature>", "Get table via a name/signature");
+ ACPI_OPTION("-x", "Do not use but dump XSDT");
+ ACPI_OPTION("-x -x", "Do not use or dump XSDT");
+
+ ACPI_USAGE_TEXT("\n"
+ "Invocation without parameters dumps all available tables\n"
+ "Multiple mixed instances of -a, -f, and -n are supported\n\n");
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_insert_action
+ *
+ * PARAMETERS: argument - Pointer to the argument for this action
+ * to_be_done - What to do to process this action
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Add an action item to the action table
+ *
+ ******************************************************************************/
+
+static int ap_insert_action(char *argument, u32 to_be_done)
+{
+
+ /* Insert action and check for table overflow */
+
+ action_table[current_action].argument = argument;
+ action_table[current_action].to_be_done = to_be_done;
+
+ current_action++;
+ if (current_action > AP_MAX_ACTIONS) {
+ fprintf(stderr, "Too many table options (max %d)\n",
+ AP_MAX_ACTIONS);
+ return (-1);
+ }
+
+ return (0);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: ap_do_options
+ *
+ * PARAMETERS: argc/argv - Standard argc/argv
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: Command line option processing. The main actions for getting
+ * and dumping tables are deferred via the action table.
+ *
+ *****************************************************************************/
+
+static int ap_do_options(int argc, char **argv)
+{
+ int j;
+ acpi_status status;
+
+ /* Command line options */
+
+ while ((j =
+ acpi_getopt(argc, argv, AP_SUPPORTED_OPTIONS)) != ACPI_OPT_END)
+ switch (j) {
+ /*
+ * Global options
+ */
+ case 'b': /* Dump all input tables to binary files */
+
+ gbl_binary_mode = TRUE;
+ continue;
+
+ case 'c': /* Dump customized tables */
+
+ if (!strcmp(acpi_gbl_optarg, "on")) {
+ gbl_dump_customized_tables = TRUE;
+ } else if (!strcmp(acpi_gbl_optarg, "off")) {
+ gbl_dump_customized_tables = FALSE;
+ } else {
+ fprintf(stderr,
+ "%s: Cannot handle this switch, please use on|off\n",
+ acpi_gbl_optarg);
+ return (-1);
+ }
+ continue;
+
+ case 'h':
+ case '?':
+
+ ap_display_usage();
+ return (1);
+
+ case 'o': /* Redirect output to a single file */
+
+ if (ap_open_output_file(acpi_gbl_optarg)) {
+ return (-1);
+ }
+ continue;
+
+ case 'r': /* Dump tables from specified RSDP */
+
+ status =
+ acpi_ut_strtoul64(acpi_gbl_optarg, &gbl_rsdp_base);
+ if (ACPI_FAILURE(status)) {
+ fprintf(stderr,
+ "%s: Could not convert to a physical address\n",
+ acpi_gbl_optarg);
+ return (-1);
+ }
+ continue;
+
+ case 's': /* Print table summaries only */
+
+ gbl_summary_mode = TRUE;
+ continue;
+
+ case 'x': /* Do not use XSDT */
+
+ if (!acpi_gbl_do_not_use_xsdt) {
+ acpi_gbl_do_not_use_xsdt = TRUE;
+ } else {
+ gbl_do_not_dump_xsdt = TRUE;
+ }
+ continue;
+
+ case 'v': /* -v: (Version): signon already emitted, just exit */
+
+ switch (acpi_gbl_optarg[0]) {
+ case '^': /* -v: (Version) */
+
+ fprintf(stderr,
+ ACPI_COMMON_SIGNON(AP_UTILITY_NAME));
+ return (1);
+
+ case 'd':
+
+ fprintf(stderr,
+ ACPI_COMMON_SIGNON(AP_UTILITY_NAME));
+ printf(ACPI_COMMON_BUILD_TIME);
+ return (1);
+
+ default:
+
+ printf("Unknown option: -v%s\n",
+ acpi_gbl_optarg);
+ return (-1);
+ }
+ break;
+
+ case 'z': /* Verbose mode */
+
+ gbl_verbose_mode = TRUE;
+ fprintf(stderr, ACPI_COMMON_SIGNON(AP_UTILITY_NAME));
+ continue;
+
+ /*
+ * Table options
+ */
+ case 'a': /* Get table by physical address */
+
+ if (ap_insert_action
+ (acpi_gbl_optarg, AP_DUMP_TABLE_BY_ADDRESS)) {
+ return (-1);
+ }
+ break;
+
+ case 'f': /* Get table from a file */
+
+ if (ap_insert_action
+ (acpi_gbl_optarg, AP_DUMP_TABLE_BY_FILE)) {
+ return (-1);
+ }
+ break;
+
+ case 'n': /* Get table by input name (signature) */
+
+ if (ap_insert_action
+ (acpi_gbl_optarg, AP_DUMP_TABLE_BY_NAME)) {
+ return (-1);
+ }
+ break;
+
+ default:
+
+ ap_display_usage();
+ return (-1);
+ }
+
+ /* If there are no actions, this means "get/dump all tables" */
+
+ if (current_action == 0) {
+ if (ap_insert_action(NULL, AP_DUMP_ALL_TABLES)) {
+ return (-1);
+ }
+ }
+
+ return (0);
+}
+
+/******************************************************************************
+ *
+ * FUNCTION: main
+ *
+ * PARAMETERS: argc/argv - Standard argc/argv
+ *
+ * RETURN: Status
+ *
+ * DESCRIPTION: C main function for acpidump utility
+ *
+ ******************************************************************************/
+
+#if !defined(_GNU_EFI) && !defined(_EDK2_EFI)
+int ACPI_SYSTEM_XFACE main(int argc, char *argv[])
+#else
+int ACPI_SYSTEM_XFACE acpi_main(int argc, char *argv[])
+#endif
+{
+ int status = 0;
+ struct ap_dump_action *action;
+ u32 file_size;
+ u32 i;
+
+ ACPI_DEBUG_INITIALIZE(); /* For debug version only */
+ acpi_os_initialize();
+ gbl_output_file = ACPI_FILE_OUT;
+ acpi_gbl_integer_byte_width = 8;
+
+ /* Process command line options */
+
+ status = ap_do_options(argc, argv);
+ if (status > 0) {
+ return (0);
+ }
+ if (status < 0) {
+ return (status);
+ }
+
+ /* Get/dump ACPI table(s) as requested */
+
+ for (i = 0; i < current_action; i++) {
+ action = &action_table[i];
+ switch (action->to_be_done) {
+ case AP_DUMP_ALL_TABLES:
+
+ status = ap_dump_all_tables();
+ break;
+
+ case AP_DUMP_TABLE_BY_ADDRESS:
+
+ status = ap_dump_table_by_address(action->argument);
+ break;
+
+ case AP_DUMP_TABLE_BY_NAME:
+
+ status = ap_dump_table_by_name(action->argument);
+ break;
+
+ case AP_DUMP_TABLE_BY_FILE:
+
+ status = ap_dump_table_from_file(action->argument);
+ break;
+
+ default:
+
+ fprintf(stderr,
+ "Internal error, invalid action: 0x%X\n",
+ action->to_be_done);
+ return (-1);
+ }
+
+ if (status) {
+ return (status);
+ }
+ }
+
+ if (gbl_output_filename) {
+ if (gbl_verbose_mode) {
+
+ /* Summary for the output file */
+
+ file_size = cm_get_file_size(gbl_output_file);
+ fprintf(stderr,
+ "Output file %s contains 0x%X (%u) bytes\n\n",
+ gbl_output_filename, file_size, file_size);
+ }
+
+ fclose(gbl_output_file);
+ }
+
+ return (status);
+}
diff --git a/tools/power/acpi/tools/ec/Makefile b/tools/power/acpi/tools/ec/Makefile
new file mode 100644
index 000000000..75d8a127b
--- /dev/null
+++ b/tools/power/acpi/tools/ec/Makefile
@@ -0,0 +1,17 @@
+# tools/power/acpi/tools/acpidump/Makefile - ACPI tool Makefile
+#
+# Copyright (c) 2015, Intel Corporation
+# Author: Lv Zheng <lv.zheng@intel.com>
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; version 2
+# of the License.
+
+include ../../Makefile.config
+
+TOOL = ec
+TOOL_OBJS = \
+ ec_access.o
+
+include ../../Makefile.rules
diff --git a/tools/power/acpi/tools/ec/ec_access.c b/tools/power/acpi/tools/ec/ec_access.c
new file mode 100644
index 000000000..5f5064238
--- /dev/null
+++ b/tools/power/acpi/tools/ec/ec_access.c
@@ -0,0 +1,238 @@
+/*
+ * ec_access.c
+ *
+ * Copyright (C) 2010 SUSE Linux Products GmbH
+ * Author:
+ * Thomas Renninger <trenn@suse.de>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include <fcntl.h>
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <libgen.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+
+#define EC_SPACE_SIZE 256
+#define SYSFS_PATH "/sys/kernel/debug/ec/ec0/io"
+
+/* TBD/Enhancements:
+ - Provide param for accessing different ECs (not supported by kernel yet)
+*/
+
+static int read_mode = -1;
+static int sleep_time;
+static int write_byte_offset = -1;
+static int read_byte_offset = -1;
+static uint8_t write_value = -1;
+
+void usage(char progname[], int exit_status)
+{
+ printf("Usage:\n");
+ printf("1) %s -r [-s sleep]\n", basename(progname));
+ printf("2) %s -b byte_offset\n", basename(progname));
+ printf("3) %s -w byte_offset -v value\n\n", basename(progname));
+
+ puts("\t-r [-s sleep] : Dump EC registers");
+ puts("\t If sleep is given, sleep x seconds,");
+ puts("\t re-read EC registers and show changes");
+ puts("\t-b offset : Read value at byte_offset (in hex)");
+ puts("\t-w offset -v value : Write value at byte_offset");
+ puts("\t-h : Print this help\n\n");
+ puts("Offsets and values are in hexadecimal number system.");
+ puts("The offset and value must be between 0 and 0xff.");
+ exit(exit_status);
+}
+
+void parse_opts(int argc, char *argv[])
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "rs:b:w:v:h")) != -1) {
+
+ switch (c) {
+ case 'r':
+ if (read_mode != -1)
+ usage(argv[0], EXIT_FAILURE);
+ read_mode = 1;
+ break;
+ case 's':
+ if (read_mode != -1 && read_mode != 1)
+ usage(argv[0], EXIT_FAILURE);
+
+ sleep_time = atoi(optarg);
+ if (sleep_time <= 0) {
+ sleep_time = 0;
+ usage(argv[0], EXIT_FAILURE);
+ printf("Bad sleep time: %s\n", optarg);
+ }
+ break;
+ case 'b':
+ if (read_mode != -1)
+ usage(argv[0], EXIT_FAILURE);
+ read_mode = 1;
+ read_byte_offset = strtoul(optarg, NULL, 16);
+ break;
+ case 'w':
+ if (read_mode != -1)
+ usage(argv[0], EXIT_FAILURE);
+ read_mode = 0;
+ write_byte_offset = strtoul(optarg, NULL, 16);
+ break;
+ case 'v':
+ write_value = strtoul(optarg, NULL, 16);
+ break;
+ case 'h':
+ usage(argv[0], EXIT_SUCCESS);
+ default:
+ fprintf(stderr, "Unknown option!\n");
+ usage(argv[0], EXIT_FAILURE);
+ }
+ }
+ if (read_mode == 0) {
+ if (write_byte_offset < 0 ||
+ write_byte_offset >= EC_SPACE_SIZE) {
+ fprintf(stderr, "Wrong byte offset 0x%.2x, valid: "
+ "[0-0x%.2x]\n",
+ write_byte_offset, EC_SPACE_SIZE - 1);
+ usage(argv[0], EXIT_FAILURE);
+ }
+ if (write_value < 0 ||
+ write_value >= 255) {
+ fprintf(stderr, "Wrong byte offset 0x%.2x, valid:"
+ "[0-0xff]\n", write_byte_offset);
+ usage(argv[0], EXIT_FAILURE);
+ }
+ }
+ if (read_mode == 1 && read_byte_offset != -1) {
+ if (read_byte_offset < -1 ||
+ read_byte_offset >= EC_SPACE_SIZE) {
+ fprintf(stderr, "Wrong byte offset 0x%.2x, valid: "
+ "[0-0x%.2x]\n",
+ read_byte_offset, EC_SPACE_SIZE - 1);
+ usage(argv[0], EXIT_FAILURE);
+ }
+ }
+ /* Add additional parameter checks here */
+}
+
+void dump_ec(int fd)
+{
+ char buf[EC_SPACE_SIZE];
+ char buf2[EC_SPACE_SIZE];
+ int byte_off, bytes_read;
+
+ bytes_read = read(fd, buf, EC_SPACE_SIZE);
+
+ if (bytes_read == -1)
+ err(EXIT_FAILURE, "Could not read from %s\n", SYSFS_PATH);
+
+ if (bytes_read != EC_SPACE_SIZE)
+ fprintf(stderr, "Could only read %d bytes\n", bytes_read);
+
+ printf(" 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F");
+ for (byte_off = 0; byte_off < bytes_read; byte_off++) {
+ if ((byte_off % 16) == 0)
+ printf("\n%.2X: ", byte_off);
+ printf(" %.2x ", (uint8_t)buf[byte_off]);
+ }
+ printf("\n");
+
+ if (!sleep_time)
+ return;
+
+ printf("\n");
+ lseek(fd, 0, SEEK_SET);
+ sleep(sleep_time);
+
+ bytes_read = read(fd, buf2, EC_SPACE_SIZE);
+
+ if (bytes_read == -1)
+ err(EXIT_FAILURE, "Could not read from %s\n", SYSFS_PATH);
+
+ if (bytes_read != EC_SPACE_SIZE)
+ fprintf(stderr, "Could only read %d bytes\n", bytes_read);
+
+ printf(" 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F");
+ for (byte_off = 0; byte_off < bytes_read; byte_off++) {
+ if ((byte_off % 16) == 0)
+ printf("\n%.2X: ", byte_off);
+
+ if (buf[byte_off] == buf2[byte_off])
+ printf(" %.2x ", (uint8_t)buf2[byte_off]);
+ else
+ printf("*%.2x ", (uint8_t)buf2[byte_off]);
+ }
+ printf("\n");
+}
+
+void read_ec_val(int fd, int byte_offset)
+{
+ uint8_t buf;
+ int error;
+
+ error = lseek(fd, byte_offset, SEEK_SET);
+ if (error != byte_offset)
+ err(EXIT_FAILURE, "Cannot set offset to 0x%.2x", byte_offset);
+
+ error = read(fd, &buf, 1);
+ if (error != 1)
+ err(EXIT_FAILURE, "Could not read byte 0x%.2x from %s\n",
+ byte_offset, SYSFS_PATH);
+ printf("0x%.2x\n", buf);
+ return;
+}
+
+void write_ec_val(int fd, int byte_offset, uint8_t value)
+{
+ int error;
+
+ error = lseek(fd, byte_offset, SEEK_SET);
+ if (error != byte_offset)
+ err(EXIT_FAILURE, "Cannot set offset to 0x%.2x", byte_offset);
+
+ error = write(fd, &value, 1);
+ if (error != 1)
+ err(EXIT_FAILURE, "Cannot write value 0x%.2x to offset 0x%.2x",
+ value, byte_offset);
+}
+
+int main(int argc, char *argv[])
+{
+ int file_mode = O_RDONLY;
+ int fd;
+
+ parse_opts(argc, argv);
+
+ if (read_mode == 0)
+ file_mode = O_WRONLY;
+ else if (read_mode == 1)
+ file_mode = O_RDONLY;
+ else
+ usage(argv[0], EXIT_FAILURE);
+
+ fd = open(SYSFS_PATH, file_mode);
+ if (fd == -1)
+ err(EXIT_FAILURE, "%s", SYSFS_PATH);
+
+ if (read_mode)
+ if (read_byte_offset == -1)
+ dump_ec(fd);
+ else if (read_byte_offset < 0 ||
+ read_byte_offset >= EC_SPACE_SIZE)
+ usage(argv[0], EXIT_FAILURE);
+ else
+ read_ec_val(fd, read_byte_offset);
+ else
+ write_ec_val(fd, write_byte_offset, write_value);
+ close(fd);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/tools/power/cpupower/.gitignore b/tools/power/cpupower/.gitignore
new file mode 100644
index 000000000..1f9977cc6
--- /dev/null
+++ b/tools/power/cpupower/.gitignore
@@ -0,0 +1,28 @@
+.libs
+libcpupower.so
+libcpupower.so.*
+build/ccdv
+cpufreq-info
+cpufreq-set
+cpufreq-aperf
+lib/.libs
+lib/cpufreq.lo
+lib/cpufreq.o
+lib/proc.lo
+lib/proc.o
+lib/sysfs.lo
+lib/sysfs.o
+po/cpupowerutils.pot
+po/*.gmo
+utils/cpufreq-info.o
+utils/cpufreq-set.o
+utils/cpufreq-aperf.o
+cpupower
+bench/cpufreq-bench
+debug/kernel/Module.symvers
+debug/i386/centrino-decode
+debug/i386/dump_psb
+debug/i386/intel_gsic
+debug/i386/powernow-k8-decode
+debug/x86_64/centrino-decode
+debug/x86_64/powernow-k8-decode
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
new file mode 100644
index 000000000..1dd5f4fcf
--- /dev/null
+++ b/tools/power/cpupower/Makefile
@@ -0,0 +1,334 @@
+# Makefile for cpupower
+#
+# Copyright (C) 2005,2006 Dominik Brodowski <linux@dominikbrodowski.net>
+#
+# Based largely on the Makefile for udev by:
+#
+# Copyright (C) 2003,2004 Greg Kroah-Hartman <greg@kroah.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+OUTPUT=./
+ifeq ("$(origin O)", "command line")
+ OUTPUT := $(O)/
+endif
+
+ifneq ($(OUTPUT),)
+# check that the output directory actually exists
+OUTDIR := $(shell cd $(OUTPUT) && pwd)
+$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
+endif
+
+include ../../scripts/Makefile.arch
+
+# --- CONFIGURATION BEGIN ---
+
+# Set the following to `true' to make a unstripped, unoptimized
+# binary. Leave this set to `false' for production use.
+DEBUG ?= true
+
+# make the build silent. Set this to something else to make it noisy again.
+V ?= false
+
+# Internationalization support (output in different languages).
+# Requires gettext.
+NLS ?= true
+
+# Set the following to 'true' to build/install the
+# cpufreq-bench benchmarking tool
+CPUFREQ_BENCH ?= true
+
+# Do not build libraries, but build the code in statically
+# Libraries are still built, otherwise the Makefile code would
+# be rather ugly.
+export STATIC ?= false
+
+# Prefix to the directories we're installing to
+DESTDIR ?=
+
+# --- CONFIGURATION END ---
+
+
+
+# Package-related definitions. Distributions can modify the version
+# and _should_ modify the PACKAGE_BUGREPORT definition
+
+VERSION= $(shell ./utils/version-gen.sh)
+LIB_MAJ= 0.0.1
+LIB_MIN= 0
+
+PACKAGE = cpupower
+PACKAGE_BUGREPORT = linux-pm@vger.kernel.org
+LANGUAGES = de fr it cs pt
+
+
+# Directory definitions. These are default and most probably
+# do not need to be changed. Please note that DESTDIR is
+# added in front of any of them
+
+bindir ?= /usr/bin
+sbindir ?= /usr/sbin
+mandir ?= /usr/man
+includedir ?= /usr/include
+ifeq ($(IS_64_BIT), 1)
+libdir ?= /usr/lib64
+else
+libdir ?= /usr/lib
+endif
+localedir ?= /usr/share/locale
+docdir ?= /usr/share/doc/packages/cpupower
+confdir ?= /etc/
+
+# Toolchain: what tools do we use, and what options do they need:
+
+CP = cp -fpR
+INSTALL = /usr/bin/install -c
+INSTALL_PROGRAM = ${INSTALL}
+INSTALL_DATA = ${INSTALL} -m 644
+INSTALL_SCRIPT = ${INSTALL_PROGRAM}
+
+# If you are running a cross compiler, you may want to set this
+# to something more interesting, like "arm-linux-". If you want
+# to compile vs uClibc, that can be done here as well.
+CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc-
+CC = $(CROSS)gcc
+LD = $(CROSS)gcc
+AR = $(CROSS)ar
+STRIP = $(CROSS)strip
+RANLIB = $(CROSS)ranlib
+HOSTCC = gcc
+MKDIR = mkdir
+
+
+# Now we set up the build system
+#
+
+GMO_FILES = ${shell for HLANG in ${LANGUAGES}; do echo $(OUTPUT)po/$$HLANG.gmo; done;}
+
+export CROSS CC AR STRIP RANLIB CFLAGS LDFLAGS LIB_OBJS
+
+# check if compiler option is supported
+cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
+
+# use '-Os' optimization if available, else use -O2
+OPTIMIZATION := $(call cc-supports,-Os,-O2)
+
+WARNINGS := -Wall -Wchar-subscripts -Wpointer-arith -Wsign-compare
+WARNINGS += $(call cc-supports,-Wno-pointer-sign)
+WARNINGS += $(call cc-supports,-Wdeclaration-after-statement)
+WARNINGS += -Wshadow
+
+CFLAGS += -DVERSION=\"$(VERSION)\" -DPACKAGE=\"$(PACKAGE)\" \
+ -DPACKAGE_BUGREPORT=\"$(PACKAGE_BUGREPORT)\" -D_GNU_SOURCE
+
+UTIL_OBJS = utils/helpers/amd.o utils/helpers/msr.o \
+ utils/helpers/sysfs.o utils/helpers/misc.o utils/helpers/cpuid.o \
+ utils/helpers/pci.o utils/helpers/bitmask.o \
+ utils/idle_monitor/nhm_idle.o utils/idle_monitor/snb_idle.o \
+ utils/idle_monitor/hsw_ext_idle.o \
+ utils/idle_monitor/amd_fam14h_idle.o utils/idle_monitor/cpuidle_sysfs.o \
+ utils/idle_monitor/mperf_monitor.o utils/idle_monitor/cpupower-monitor.o \
+ utils/cpupower.o utils/cpufreq-info.o utils/cpufreq-set.o \
+ utils/cpupower-set.o utils/cpupower-info.o utils/cpuidle-info.o \
+ utils/cpuidle-set.o
+
+UTIL_SRC := $(UTIL_OBJS:.o=.c)
+
+UTIL_OBJS := $(addprefix $(OUTPUT),$(UTIL_OBJS))
+
+UTIL_HEADERS = utils/helpers/helpers.h utils/idle_monitor/cpupower-monitor.h \
+ utils/helpers/bitmask.h \
+ utils/idle_monitor/idle_monitors.h utils/idle_monitor/idle_monitors.def
+
+LIB_HEADERS = lib/cpufreq.h lib/cpupower.h lib/cpuidle.h
+LIB_SRC = lib/cpufreq.c lib/cpupower.c lib/cpuidle.c
+LIB_OBJS = lib/cpufreq.o lib/cpupower.o lib/cpuidle.o
+LIB_OBJS := $(addprefix $(OUTPUT),$(LIB_OBJS))
+
+CFLAGS += -pipe
+
+ifeq ($(strip $(NLS)),true)
+ INSTALL_NLS += install-gmo
+ COMPILE_NLS += create-gmo
+ CFLAGS += -DNLS
+endif
+
+ifeq ($(strip $(CPUFREQ_BENCH)),true)
+ INSTALL_BENCH += install-bench
+ COMPILE_BENCH += compile-bench
+endif
+
+ifeq ($(strip $(STATIC)),true)
+ UTIL_OBJS += $(LIB_OBJS)
+ UTIL_HEADERS += $(LIB_HEADERS)
+ UTIL_SRC += $(LIB_SRC)
+endif
+
+CFLAGS += $(WARNINGS)
+
+ifeq ($(strip $(V)),false)
+ QUIET=@
+ ECHO=@echo
+else
+ QUIET=
+ ECHO=@\#
+endif
+export QUIET ECHO
+
+# if DEBUG is enabled, then we do not strip or optimize
+ifeq ($(strip $(DEBUG)),true)
+ CFLAGS += -O1 -g -DDEBUG
+ STRIPCMD = /bin/true -Since_we_are_debugging
+else
+ CFLAGS += $(OPTIMIZATION) -fomit-frame-pointer
+ STRIPCMD = $(STRIP) -s --remove-section=.note --remove-section=.comment
+endif
+
+
+# the actual make rules
+
+all: libcpupower $(OUTPUT)cpupower $(COMPILE_NLS) $(COMPILE_BENCH)
+
+$(OUTPUT)lib/%.o: $(LIB_SRC) $(LIB_HEADERS)
+ $(ECHO) " CC " $@
+ $(QUIET) $(CC) $(CFLAGS) -fPIC -o $@ -c lib/$*.c
+
+$(OUTPUT)libcpupower.so.$(LIB_MAJ): $(LIB_OBJS)
+ $(ECHO) " LD " $@
+ $(QUIET) $(CC) -shared $(CFLAGS) $(LDFLAGS) -o $@ \
+ -Wl,-soname,libcpupower.so.$(LIB_MIN) $(LIB_OBJS)
+ @ln -sf $(@F) $(OUTPUT)libcpupower.so
+ @ln -sf $(@F) $(OUTPUT)libcpupower.so.$(LIB_MIN)
+
+libcpupower: $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+
+# Let all .o files depend on its .c file and all headers
+# Might be worth to put this into utils/Makefile at some point of time
+$(UTIL_OBJS): $(UTIL_HEADERS)
+
+$(OUTPUT)%.o: %.c
+ $(ECHO) " CC " $@
+ $(QUIET) $(CC) $(CFLAGS) -I./lib -I ./utils -o $@ -c $*.c
+
+$(OUTPUT)cpupower: $(UTIL_OBJS) $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+ $(ECHO) " CC " $@
+ifeq ($(strip $(STATIC)),true)
+ $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lrt -lpci -L$(OUTPUT) -o $@
+else
+ $(QUIET) $(CC) $(CFLAGS) $(LDFLAGS) $(UTIL_OBJS) -lcpupower -lrt -lpci -L$(OUTPUT) -o $@
+endif
+ $(QUIET) $(STRIPCMD) $@
+
+$(OUTPUT)po/$(PACKAGE).pot: $(UTIL_SRC)
+ $(ECHO) " GETTEXT " $@
+ $(QUIET) xgettext --default-domain=$(PACKAGE) --add-comments \
+ --keyword=_ --keyword=N_ $(UTIL_SRC) -p $(@D) -o $(@F)
+
+$(OUTPUT)po/%.gmo: po/%.po
+ $(ECHO) " MSGFMT " $@
+ $(QUIET) msgfmt -o $@ po/$*.po
+
+create-gmo: ${GMO_FILES}
+
+update-po: $(OUTPUT)po/$(PACKAGE).pot
+ $(ECHO) " MSGMRG " $@
+ $(QUIET) @for HLANG in $(LANGUAGES); do \
+ echo -n "Updating $$HLANG "; \
+ if msgmerge po/$$HLANG.po $< -o \
+ $(OUTPUT)po/$$HLANG.new.po; then \
+ mv -f $(OUTPUT)po/$$HLANG.new.po $(OUTPUT)po/$$HLANG.po; \
+ else \
+ echo "msgmerge for $$HLANG failed!"; \
+ rm -f $(OUTPUT)po/$$HLANG.new.po; \
+ fi; \
+ done;
+
+compile-bench: $(OUTPUT)libcpupower.so.$(LIB_MAJ)
+ @V=$(V) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT)
+
+# we compile into subdirectories. if the target directory is not the
+# source directory, they might not exists. So we depend the various
+# files onto their directories.
+DIRECTORY_DEPS = $(LIB_OBJS) $(UTIL_OBJS) $(GMO_FILES)
+$(DIRECTORY_DEPS): | $(sort $(dir $(DIRECTORY_DEPS)))
+
+# In the second step, we make a rule to actually create these directories
+$(sort $(dir $(DIRECTORY_DEPS))):
+ $(ECHO) " MKDIR " $@
+ $(QUIET) $(MKDIR) -p $@ 2>/dev/null
+
+clean:
+ -find $(OUTPUT) \( -not -type d \) -and \( -name '*~' -o -name '*.[oas]' \) -type f -print \
+ | xargs rm -f
+ -rm -f $(OUTPUT)cpupower
+ -rm -f $(OUTPUT)libcpupower.so*
+ -rm -rf $(OUTPUT)po/*.gmo
+ -rm -rf $(OUTPUT)po/*.pot
+ $(MAKE) -C bench O=$(OUTPUT) clean
+
+
+install-lib:
+ $(INSTALL) -d $(DESTDIR)${libdir}
+ $(CP) $(OUTPUT)libcpupower.so* $(DESTDIR)${libdir}/
+ $(INSTALL) -d $(DESTDIR)${includedir}
+ $(INSTALL_DATA) lib/cpufreq.h $(DESTDIR)${includedir}/cpufreq.h
+ $(INSTALL_DATA) lib/cpuidle.h $(DESTDIR)${includedir}/cpuidle.h
+
+install-tools:
+ $(INSTALL) -d $(DESTDIR)${bindir}
+ $(INSTALL_PROGRAM) $(OUTPUT)cpupower $(DESTDIR)${bindir}
+
+install-man:
+ $(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1
+ $(INSTALL_DATA) -D man/cpupower-frequency-set.1 $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1
+ $(INSTALL_DATA) -D man/cpupower-frequency-info.1 $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1
+ $(INSTALL_DATA) -D man/cpupower-idle-set.1 $(DESTDIR)${mandir}/man1/cpupower-idle-set.1
+ $(INSTALL_DATA) -D man/cpupower-idle-info.1 $(DESTDIR)${mandir}/man1/cpupower-idle-info.1
+ $(INSTALL_DATA) -D man/cpupower-set.1 $(DESTDIR)${mandir}/man1/cpupower-set.1
+ $(INSTALL_DATA) -D man/cpupower-info.1 $(DESTDIR)${mandir}/man1/cpupower-info.1
+ $(INSTALL_DATA) -D man/cpupower-monitor.1 $(DESTDIR)${mandir}/man1/cpupower-monitor.1
+
+install-gmo:
+ $(INSTALL) -d $(DESTDIR)${localedir}
+ for HLANG in $(LANGUAGES); do \
+ echo '$(INSTALL_DATA) -D $(OUTPUT)po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo'; \
+ $(INSTALL_DATA) -D $(OUTPUT)po/$$HLANG.gmo $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo; \
+ done;
+
+install-bench:
+ @#DESTDIR must be set from outside to survive
+ @sbindir=$(sbindir) bindir=$(bindir) docdir=$(docdir) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) install
+
+ifeq ($(strip $(STATIC)),true)
+install: all install-tools install-man $(INSTALL_NLS) $(INSTALL_BENCH)
+else
+install: all install-lib install-tools install-man $(INSTALL_NLS) $(INSTALL_BENCH)
+endif
+
+uninstall:
+ - rm -f $(DESTDIR)${libdir}/libcpupower.*
+ - rm -f $(DESTDIR)${includedir}/cpufreq.h
+ - rm -f $(DESTDIR)${includedir}/cpuidle.h
+ - rm -f $(DESTDIR)${bindir}/utils/cpupower
+ - rm -f $(DESTDIR)${mandir}/man1/cpupower.1
+ - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1
+ - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1
+ - rm -f $(DESTDIR)${mandir}/man1/cpupower-set.1
+ - rm -f $(DESTDIR)${mandir}/man1/cpupower-info.1
+ - rm -f $(DESTDIR)${mandir}/man1/cpupower-monitor.1
+ - for HLANG in $(LANGUAGES); do \
+ rm -f $(DESTDIR)${localedir}/$$HLANG/LC_MESSAGES/cpupower.mo; \
+ done;
+
+.PHONY: all utils libcpupower update-po create-gmo install-lib install-tools install-man install-gmo install uninstall clean
diff --git a/tools/power/cpupower/README b/tools/power/cpupower/README
new file mode 100644
index 000000000..1c68f4766
--- /dev/null
+++ b/tools/power/cpupower/README
@@ -0,0 +1,47 @@
+The cpupower package consists of the following elements:
+
+requirements
+------------
+
+On x86 pciutils is needed at runtime (-lpci).
+For compilation pciutils-devel (pci/pci.h) and a gcc version
+providing cpuid.h is needed.
+For both it's not explicitly checked for (yet).
+
+
+libcpupower
+----------
+
+"libcpupower" is a library which offers a unified access method for userspace
+tools and programs to the cpufreq core and drivers in the Linux kernel. This
+allows for code reduction in userspace tools, a clean implementation of
+the interaction to the cpufreq core, and support for both the sysfs and proc
+interfaces [depending on configuration, see below].
+
+
+compilation and installation
+----------------------------
+
+make
+su
+make install
+
+should suffice on most systems. It builds libcpupower to put in
+/usr/lib; cpupower, cpufreq-bench_plot.sh to put in /usr/bin; and
+cpufreq-bench to put in /usr/sbin. If you want to set up the paths
+differently and/or want to configure the package to your specific
+needs, you need to open "Makefile" with an editor of your choice and
+edit the block marked CONFIGURATION.
+
+
+THANKS
+------
+Many thanks to Mattia Dongili who wrote the autotoolization and
+libtoolization, the manpages and the italian language file for cpupower;
+to Dave Jones for his feedback and his dump_psb tool; to Bruno Ducrot for his
+powernow-k8-decode and intel_gsic tools as well as the french language file;
+and to various others commenting on the previous (pre-)releases of
+cpupower.
+
+
+ Dominik Brodowski
diff --git a/tools/power/cpupower/ToDo b/tools/power/cpupower/ToDo
new file mode 100644
index 000000000..6e8b89f28
--- /dev/null
+++ b/tools/power/cpupower/ToDo
@@ -0,0 +1,10 @@
+ToDos sorted by priority:
+
+- Use bitmask functions to parse CPU topology more robust
+ (current implementation has issues on AMD)
+- Try to read out boost states and frequencies on Intel
+- Somewhere saw the ability to read power consumption of
+ RAM from HW on Intel SandyBridge -> another monitor?
+- Add another c1e debug idle monitor
+ -> Is by design racy with BIOS, but could be added
+ with a --force option and some "be careful" messages
diff --git a/tools/power/cpupower/bench/Makefile b/tools/power/cpupower/bench/Makefile
new file mode 100644
index 000000000..f68b4bc55
--- /dev/null
+++ b/tools/power/cpupower/bench/Makefile
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+OUTPUT := ./
+ifeq ("$(origin O)", "command line")
+ifneq ($(O),)
+ OUTPUT := $(O)/
+endif
+endif
+
+ifeq ($(strip $(STATIC)),true)
+LIBS = -L../ -L$(OUTPUT) -lm
+OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o \
+ $(OUTPUT)../lib/cpufreq.o $(OUTPUT)../lib/cpupower.o
+else
+LIBS = -L../ -L$(OUTPUT) -lm -lcpupower
+OBJS = $(OUTPUT)main.o $(OUTPUT)parse.o $(OUTPUT)system.o $(OUTPUT)benchmark.o
+endif
+
+CFLAGS += -D_GNU_SOURCE -I../lib -DDEFAULT_CONFIG_FILE=\"$(confdir)/cpufreq-bench.conf\"
+
+$(OUTPUT)%.o : %.c
+ $(ECHO) " CC " $@
+ $(QUIET) $(CC) -c $(CFLAGS) $< -o $@
+
+$(OUTPUT)cpufreq-bench: $(OBJS)
+ $(ECHO) " CC " $@
+ $(QUIET) $(CC) -o $@ $(CFLAGS) $(LDFLAGS) $(OBJS) $(LIBS)
+
+all: $(OUTPUT)cpufreq-bench
+
+install:
+ mkdir -p $(DESTDIR)/$(sbindir)
+ mkdir -p $(DESTDIR)/$(bindir)
+ mkdir -p $(DESTDIR)/$(docdir)
+ mkdir -p $(DESTDIR)/$(confdir)
+ install -m 755 $(OUTPUT)cpufreq-bench $(DESTDIR)/$(sbindir)/cpufreq-bench
+ install -m 755 cpufreq-bench_plot.sh $(DESTDIR)/$(bindir)/cpufreq-bench_plot.sh
+ install -m 644 README-BENCH $(DESTDIR)/$(docdir)/README-BENCH
+ install -m 755 cpufreq-bench_script.sh $(DESTDIR)/$(docdir)/cpufreq-bench_script.sh
+ install -m 644 example.cfg $(DESTDIR)/$(confdir)/cpufreq-bench.conf
+
+clean:
+ rm -f $(OUTPUT)*.o
+ rm -f $(OUTPUT)cpufreq-bench
diff --git a/tools/power/cpupower/bench/README-BENCH b/tools/power/cpupower/bench/README-BENCH
new file mode 100644
index 000000000..97727aed6
--- /dev/null
+++ b/tools/power/cpupower/bench/README-BENCH
@@ -0,0 +1,124 @@
+This is cpufreq-bench, a microbenchmark for the cpufreq framework.
+
+Purpose
+=======
+
+What is this benchmark for:
+ - Identify worst case performance loss when doing dynamic frequency
+ scaling using Linux kernel governors
+ - Identify average reaction time of a governor to CPU load changes
+ - (Stress) Testing whether a cpufreq low level driver or governor works
+ as expected
+ - Identify cpufreq related performance regressions between kernels
+ - Possibly Real time priority testing? -> what happens if there are
+ processes with a higher prio than the governor's kernel thread
+ - ...
+
+What this benchmark does *not* cover:
+ - Power saving related regressions (In fact as better the performance
+ throughput is, the worse the power savings will be, but the first should
+ mostly count more...)
+ - Real world (workloads)
+
+
+Description
+===========
+
+cpufreq-bench helps to test the condition of a given cpufreq governor.
+For that purpose, it compares the performance governor to a configured
+powersave module.
+
+
+How it works
+============
+You can specify load (100% CPU load) and sleep (0% CPU load) times in us which
+will be run X time in a row (cycles):
+
+ sleep=25000
+ load=25000
+ cycles=20
+
+This part of the configuration file will create 25ms load/sleep turns,
+repeated 20 times.
+
+Adding this:
+ sleep_step=25000
+ load_step=25000
+ rounds=5
+Will increase load and sleep time by 25ms 5 times.
+Together you get following test:
+25ms load/sleep time repeated 20 times (cycles).
+50ms load/sleep time repeated 20 times (cycles).
+..
+100ms load/sleep time repeated 20 times (cycles).
+
+First it is calibrated how long a specific CPU intensive calculation
+takes on this machine and needs to be run in a loop using the performance
+governor.
+Then the above test runs are processed using the performance governor
+and the governor to test. The time the calculation really needed
+with the dynamic freq scaling governor is compared with the time needed
+on full performance and you get the overall performance loss.
+
+
+Example of expected results with ondemand governor:
+
+This shows expected results of the first two test run rounds from
+above config, you there have:
+
+100% CPU load (load) | 0 % CPU load (sleep) | round
+ 25 ms | 25 ms | 1
+ 50 ms | 50 ms | 2
+
+For example if ondemand governor is configured to have a 50ms
+sampling rate you get:
+
+In round 1, ondemand should have rather static 50% load and probably
+won't ever switch up (as long as up_threshold is above).
+
+In round 2, if the ondemand sampling times exactly match the load/sleep
+trigger of the cpufreq-bench, you will see no performance loss (compare with
+below possible ondemand sample kick ins (1)):
+
+But if ondemand always kicks in in the middle of the load sleep cycles, it
+will always see 50% loads and you get worst performance impact never
+switching up (compare with below possible ondemand sample kick ins (2))::
+
+ 50 50 50 50ms ->time
+load -----| |-----| |-----| |-----|
+ | | | | | | |
+sleep |-----| |-----| |-----| |----
+ |-----|-----|-----|-----|-----|-----|-----|---- ondemand sampling (1)
+ 100 0 100 0 100 0 100 load seen by ondemand(%)
+ |-----|-----|-----|-----|-----|-----|-----|-- ondemand sampling (2)
+ 50 50 50 50 50 50 50 load seen by ondemand(%)
+
+You can easily test all kind of load/sleep times and check whether your
+governor in average behaves as expected.
+
+
+ToDo
+====
+
+Provide a gnuplot utility script for easy generation of plots to present
+the outcome nicely.
+
+
+cpufreq-bench Command Usage
+===========================
+-l, --load=<long int> initial load time in us
+-s, --sleep=<long int> initial sleep time in us
+-x, --load-step=<long int> time to be added to load time, in us
+-y, --sleep-step=<long int> time to be added to sleep time, in us
+-c, --cpu=<unsigned int> CPU Number to use, starting at 0
+-p, --prio=<priority> scheduler priority, HIGH, LOW or DEFAULT
+-g, --governor=<governor> cpufreq governor to test
+-n, --cycles=<int> load/sleep cycles to get an average value to compare
+-r, --rounds<int> load/sleep rounds
+-f, --file=<configfile> config file to use
+-o, --output=<dir> output dir, must exist
+-v, --verbose verbose output on/off
+
+Due to the high priority, the application may not be responsible for some time.
+After the benchmark, the logfile is saved in OUTPUTDIR/benchmark_TIMESTAMP.log
+
diff --git a/tools/power/cpupower/bench/benchmark.c b/tools/power/cpupower/bench/benchmark.c
new file mode 100644
index 000000000..429d51ab8
--- /dev/null
+++ b/tools/power/cpupower/bench/benchmark.c
@@ -0,0 +1,194 @@
+/* cpufreq-bench CPUFreq microbenchmark
+ *
+ * Copyright (C) 2008 Christian Kornacker <ckornacker@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <math.h>
+
+#include "config.h"
+#include "system.h"
+#include "benchmark.h"
+
+/* Print out progress if we log into a file */
+#define show_progress(total_time, progress_time) \
+if (config->output != stdout) { \
+ fprintf(stdout, "Progress: %02lu %%\r", \
+ (progress_time * 100) / total_time); \
+ fflush(stdout); \
+}
+
+/**
+ * compute how many rounds of calculation we should do
+ * to get the given load time
+ *
+ * @param load aimed load time in µs
+ *
+ * @retval rounds of calculation
+ **/
+
+unsigned int calculate_timespace(long load, struct config *config)
+{
+ int i;
+ long long now, then;
+ unsigned int estimated = GAUGECOUNT;
+ unsigned int rounds = 0;
+ unsigned int timed = 0;
+
+ if (config->verbose)
+ printf("calibrating load of %lius, please wait...\n", load);
+
+ /* get the initial calculation time for a specific number of rounds */
+ now = get_time();
+ ROUNDS(estimated);
+ then = get_time();
+
+ timed = (unsigned int)(then - now);
+
+ /* approximation of the wanted load time by comparing with the
+ * initial calculation time */
+ for (i = 0; i < 4; i++) {
+ rounds = (unsigned int)(load * estimated / timed);
+ dprintf("calibrating with %u rounds\n", rounds);
+ now = get_time();
+ ROUNDS(rounds);
+ then = get_time();
+
+ timed = (unsigned int)(then - now);
+ estimated = rounds;
+ }
+ if (config->verbose)
+ printf("calibration done\n");
+
+ return estimated;
+}
+
+/**
+ * benchmark
+ * generates a specific sleep an load time with the performance
+ * governor and compares the used time for same calculations done
+ * with the configured powersave governor
+ *
+ * @param config config values for the benchmark
+ *
+ **/
+
+void start_benchmark(struct config *config)
+{
+ unsigned int _round, cycle;
+ long long now, then;
+ long sleep_time = 0, load_time = 0;
+ long performance_time = 0, powersave_time = 0;
+ unsigned int calculations;
+ unsigned long total_time = 0, progress_time = 0;
+
+ sleep_time = config->sleep;
+ load_time = config->load;
+
+ /* For the progress bar */
+ for (_round = 1; _round <= config->rounds; _round++)
+ total_time += _round * (config->sleep + config->load);
+ total_time *= 2; /* powersave and performance cycles */
+
+ for (_round = 0; _round < config->rounds; _round++) {
+ performance_time = 0LL;
+ powersave_time = 0LL;
+
+ show_progress(total_time, progress_time);
+
+ /* set the cpufreq governor to "performance" which disables
+ * P-State switching. */
+ if (set_cpufreq_governor("performance", config->cpu) != 0)
+ return;
+
+ /* calibrate the calculation time. the resulting calculation
+ * _rounds should produce a load which matches the configured
+ * load time */
+ calculations = calculate_timespace(load_time, config);
+
+ if (config->verbose)
+ printf("_round %i: doing %u cycles with %u calculations"
+ " for %lius\n", _round + 1, config->cycles,
+ calculations, load_time);
+
+ fprintf(config->output, "%u %li %li ",
+ _round, load_time, sleep_time);
+
+ if (config->verbose)
+ printf("average: %lius, rps:%li\n",
+ load_time / calculations,
+ 1000000 * calculations / load_time);
+
+ /* do some sleep/load cycles with the performance governor */
+ for (cycle = 0; cycle < config->cycles; cycle++) {
+ now = get_time();
+ usleep(sleep_time);
+ ROUNDS(calculations);
+ then = get_time();
+ performance_time += then - now - sleep_time;
+ if (config->verbose)
+ printf("performance cycle took %lius, "
+ "sleep: %lius, "
+ "load: %lius, rounds: %u\n",
+ (long)(then - now), sleep_time,
+ load_time, calculations);
+ }
+ fprintf(config->output, "%li ",
+ performance_time / config->cycles);
+
+ progress_time += sleep_time + load_time;
+ show_progress(total_time, progress_time);
+
+ /* set the powersave governor which activates P-State switching
+ * again */
+ if (set_cpufreq_governor(config->governor, config->cpu) != 0)
+ return;
+
+ /* again, do some sleep/load cycles with the
+ * powersave governor */
+ for (cycle = 0; cycle < config->cycles; cycle++) {
+ now = get_time();
+ usleep(sleep_time);
+ ROUNDS(calculations);
+ then = get_time();
+ powersave_time += then - now - sleep_time;
+ if (config->verbose)
+ printf("powersave cycle took %lius, "
+ "sleep: %lius, "
+ "load: %lius, rounds: %u\n",
+ (long)(then - now), sleep_time,
+ load_time, calculations);
+ }
+
+ progress_time += sleep_time + load_time;
+
+ /* compare the average sleep/load cycles */
+ fprintf(config->output, "%li ",
+ powersave_time / config->cycles);
+ fprintf(config->output, "%.3f\n",
+ performance_time * 100.0 / powersave_time);
+ fflush(config->output);
+
+ if (config->verbose)
+ printf("performance is at %.2f%%\n",
+ performance_time * 100.0 / powersave_time);
+
+ sleep_time += config->sleep_step;
+ load_time += config->load_step;
+ }
+}
diff --git a/tools/power/cpupower/bench/benchmark.h b/tools/power/cpupower/bench/benchmark.h
new file mode 100644
index 000000000..51d7f50ac
--- /dev/null
+++ b/tools/power/cpupower/bench/benchmark.h
@@ -0,0 +1,29 @@
+/* cpufreq-bench CPUFreq microbenchmark
+ *
+ * Copyright (C) 2008 Christian Kornacker <ckornacker@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/* load loop, this schould take about 1 to 2ms to complete */
+#define ROUNDS(x) {unsigned int rcnt; \
+ for (rcnt = 0; rcnt < x*1000; rcnt++) { \
+ (void)(((int)(pow(rcnt, rcnt) * \
+ sqrt(rcnt*7230970)) ^ 7230716) ^ \
+ (int)atan2(rcnt, rcnt)); \
+ } } \
+
+
+void start_benchmark(struct config *config);
diff --git a/tools/power/cpupower/bench/config.h b/tools/power/cpupower/bench/config.h
new file mode 100644
index 000000000..ee6f258e5
--- /dev/null
+++ b/tools/power/cpupower/bench/config.h
@@ -0,0 +1,36 @@
+/* cpufreq-bench CPUFreq microbenchmark
+ *
+ * Copyright (C) 2008 Christian Kornacker <ckornacker@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/* initial loop count for the load calibration */
+#define GAUGECOUNT 1500
+
+/* default scheduling policy SCHED_OTHER */
+#define SCHEDULER SCHED_OTHER
+
+#define PRIORITY_DEFAULT 0
+#define PRIORITY_HIGH sched_get_priority_max(SCHEDULER)
+#define PRIORITY_LOW sched_get_priority_min(SCHEDULER)
+
+/* enable further debug messages */
+#ifdef DEBUG
+#define dprintf printf
+#else
+#define dprintf(...) do { } while (0)
+#endif
+
diff --git a/tools/power/cpupower/bench/cpufreq-bench_plot.sh b/tools/power/cpupower/bench/cpufreq-bench_plot.sh
new file mode 100644
index 000000000..410021a12
--- /dev/null
+++ b/tools/power/cpupower/bench/cpufreq-bench_plot.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301, USA.
+
+# Author/Copyright(c): 2009, Thomas Renninger <trenn@suse.de>, Novell Inc.
+
+# Helper script to easily create nice plots of your cpufreq-bench results
+
+dir=`mktemp -d`
+output_file="cpufreq-bench.png"
+global_title="cpufreq-bench plot"
+picture_type="jpeg"
+file[0]=""
+
+function usage()
+{
+ echo "cpufreq-bench_plot.sh [OPTIONS] logfile [measure_title] [logfile [measure_title]] ...]"
+ echo
+ echo "Options"
+ echo " -o output_file"
+ echo " -t global_title"
+ echo " -p picture_type [jpeg|gif|png|postscript|...]"
+ exit 1
+}
+
+if [ $# -eq 0 ];then
+ echo "No benchmark results file provided"
+ echo
+ usage
+fi
+
+while getopts o:t:p: name ; do
+ case $name in
+ o)
+ output_file="$OPTARG".$picture_type
+ ;;
+ t)
+ global_title="$OPTARG"
+ ;;
+ p)
+ picture_type="$OPTARG"
+ ;;
+ ?)
+ usage
+ ;;
+ esac
+done
+shift $(($OPTIND -1))
+
+plots=0
+while [ "$1" ];do
+ if [ ! -f "$1" ];then
+ echo "File $1 does not exist"
+ usage
+ fi
+ file[$plots]="$1"
+ title[$plots]="$2"
+ # echo "File: ${file[$plots]} - ${title[plots]}"
+ shift;shift
+ plots=$((plots + 1))
+done
+
+echo "set terminal $picture_type" >> $dir/plot_script.gpl
+echo "set output \"$output_file\"" >> $dir/plot_script.gpl
+echo "set title \"$global_title\"" >> $dir/plot_script.gpl
+echo "set xlabel \"sleep/load time\"" >> $dir/plot_script.gpl
+echo "set ylabel \"Performance (%)\"" >> $dir/plot_script.gpl
+
+for((plot=0;plot<$plots;plot++));do
+
+ # Sanity check
+ ###### I am to dump to get this redirected to stderr/stdout in one awk call... #####
+ cat ${file[$plot]} |grep -v "^#" |awk '{if ($2 != $3) printf("Error in measure %d:Load time %s does not equal sleep time %s, plot will not be correct\n", $1, $2, $3); ERR=1}'
+ ###### I am to dump to get this redirected in one awk call... #####
+
+ # Parse out load time (which must be equal to sleep time for a plot), divide it by 1000
+ # to get ms and parse out the performance in percentage and write it to a temp file for plotting
+ cat ${file[$plot]} |grep -v "^#" |awk '{printf "%lu %.1f\n",$2/1000, $6}' >$dir/data_$plot
+
+ if [ $plot -eq 0 ];then
+ echo -n "plot " >> $dir/plot_script.gpl
+ fi
+ echo -n "\"$dir/data_$plot\" title \"${title[$plot]}\" with lines" >> $dir/plot_script.gpl
+ if [ $(($plot + 1)) -ne $plots ];then
+ echo -n ", " >> $dir/plot_script.gpl
+ fi
+done
+echo >> $dir/plot_script.gpl
+
+gnuplot $dir/plot_script.gpl
+rm -r $dir \ No newline at end of file
diff --git a/tools/power/cpupower/bench/cpufreq-bench_script.sh b/tools/power/cpupower/bench/cpufreq-bench_script.sh
new file mode 100644
index 000000000..de20d2a06
--- /dev/null
+++ b/tools/power/cpupower/bench/cpufreq-bench_script.sh
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+# 02110-1301, USA.
+
+# Author/Copyright(c): 2009, Thomas Renninger <trenn@suse.de>, Novell Inc.
+
+# Ondemand up_threshold and sampling rate test script for cpufreq-bench
+# mircobenchmark.
+# Modify the general variables at the top or extend or copy out parts
+# if you want to test other things
+#
+
+# Default with latest kernels is 95, before micro account patches
+# it was 80, cmp. with git commit 808009131046b62ac434dbc796
+UP_THRESHOLD="60 80 95"
+# Depending on the kernel and the HW sampling rate could be restricted
+# and cannot be set that low...
+# E.g. before git commit cef9615a853ebc4972084f7 one could only set
+# min sampling rate of 80000 if CONFIG_HZ=250
+SAMPLING_RATE="20000 80000"
+
+function measure()
+{
+ local -i up_threshold_set
+ local -i sampling_rate_set
+
+ for up_threshold in $UP_THRESHOLD;do
+ for sampling_rate in $SAMPLING_RATE;do
+ # Set values in sysfs
+ echo $up_threshold >/sys/devices/system/cpu/cpu0/cpufreq/ondemand/up_threshold
+ echo $sampling_rate >/sys/devices/system/cpu/cpu0/cpufreq/ondemand/sampling_rate
+ up_threshold_set=$(cat /sys/devices/system/cpu/cpu0/cpufreq/ondemand/up_threshold)
+ sampling_rate_set=$(cat /sys/devices/system/cpu/cpu0/cpufreq/ondemand/sampling_rate)
+
+ # Verify set values in sysfs
+ if [ ${up_threshold_set} -eq ${up_threshold} ];then
+ echo "up_threshold: $up_threshold, set in sysfs: ${up_threshold_set}"
+ else
+ echo "WARNING: Tried to set up_threshold: $up_threshold, set in sysfs: ${up_threshold_set}"
+ fi
+ if [ ${sampling_rate_set} -eq ${sampling_rate} ];then
+ echo "sampling_rate: $sampling_rate, set in sysfs: ${sampling_rate_set}"
+ else
+ echo "WARNING: Tried to set sampling_rate: $sampling_rate, set in sysfs: ${sampling_rate_set}"
+ fi
+
+ # Benchmark
+ cpufreq-bench -o /var/log/cpufreq-bench/up_threshold_${up_threshold}_sampling_rate_${sampling_rate}
+ done
+ done
+}
+
+function create_plots()
+{
+ local command
+
+ for up_threshold in $UP_THRESHOLD;do
+ command="cpufreq-bench_plot.sh -o \"sampling_rate_${SAMPLING_RATE}_up_threshold_${up_threshold}\" -t \"Ondemand sampling_rate: ${SAMPLING_RATE} comparison - Up_threshold: $up_threshold %\""
+ for sampling_rate in $SAMPLING_RATE;do
+ command="${command} /var/log/cpufreq-bench/up_threshold_${up_threshold}_sampling_rate_${sampling_rate}/* \"sampling_rate = $sampling_rate\""
+ done
+ echo $command
+ eval "$command"
+ echo
+ done
+
+ for sampling_rate in $SAMPLING_RATE;do
+ command="cpufreq-bench_plot.sh -o \"up_threshold_${UP_THRESHOLD}_sampling_rate_${sampling_rate}\" -t \"Ondemand up_threshold: ${UP_THRESHOLD} % comparison - sampling_rate: $sampling_rate\""
+ for up_threshold in $UP_THRESHOLD;do
+ command="${command} /var/log/cpufreq-bench/up_threshold_${up_threshold}_sampling_rate_${sampling_rate}/* \"up_threshold = $up_threshold\""
+ done
+ echo $command
+ eval "$command"
+ echo
+ done
+
+ command="cpufreq-bench_plot.sh -o \"up_threshold_${UP_THRESHOLD}_sampling_rate_${SAMPLING_RATE}\" -t \"Ondemand up_threshold: ${UP_THRESHOLD} and sampling_rate ${SAMPLING_RATE} comparison\""
+ for sampling_rate in $SAMPLING_RATE;do
+ for up_threshold in $UP_THRESHOLD;do
+ command="${command} /var/log/cpufreq-bench/up_threshold_${up_threshold}_sampling_rate_${sampling_rate}/* \"up_threshold = $up_threshold - sampling_rate = $sampling_rate\""
+ done
+ done
+ echo "$command"
+ eval "$command"
+}
+
+measure
+create_plots \ No newline at end of file
diff --git a/tools/power/cpupower/bench/example.cfg b/tools/power/cpupower/bench/example.cfg
new file mode 100644
index 000000000..f91f64360
--- /dev/null
+++ b/tools/power/cpupower/bench/example.cfg
@@ -0,0 +1,11 @@
+sleep = 50000
+load = 50000
+cpu = 0
+priority = LOW
+output = /var/log/cpufreq-bench
+sleep_step = 50000
+load_step = 50000
+cycles = 20
+rounds = 40
+verbose = 0
+governor = ondemand
diff --git a/tools/power/cpupower/bench/main.c b/tools/power/cpupower/bench/main.c
new file mode 100644
index 000000000..24910313a
--- /dev/null
+++ b/tools/power/cpupower/bench/main.c
@@ -0,0 +1,202 @@
+/* cpufreq-bench CPUFreq microbenchmark
+ *
+ * Copyright (C) 2008 Christian Kornacker <ckornacker@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <errno.h>
+
+#include "config.h"
+#include "system.h"
+#include "benchmark.h"
+
+static struct option long_options[] = {
+ {"output", 1, 0, 'o'},
+ {"sleep", 1, 0, 's'},
+ {"load", 1, 0, 'l'},
+ {"verbose", 0, 0, 'v'},
+ {"cpu", 1, 0, 'c'},
+ {"governor", 1, 0, 'g'},
+ {"prio", 1, 0, 'p'},
+ {"file", 1, 0, 'f'},
+ {"cycles", 1, 0, 'n'},
+ {"rounds", 1, 0, 'r'},
+ {"load-step", 1, 0, 'x'},
+ {"sleep-step", 1, 0, 'y'},
+ {"help", 0, 0, 'h'},
+ {0, 0, 0, 0}
+};
+
+/*******************************************************************
+ usage
+*******************************************************************/
+
+void usage()
+{
+ printf("usage: ./bench\n");
+ printf("Options:\n");
+ printf(" -l, --load=<long int>\t\tinitial load time in us\n");
+ printf(" -s, --sleep=<long int>\t\tinitial sleep time in us\n");
+ printf(" -x, --load-step=<long int>\ttime to be added to load time, in us\n");
+ printf(" -y, --sleep-step=<long int>\ttime to be added to sleep time, in us\n");
+ printf(" -c, --cpu=<cpu #>\t\t\tCPU Nr. to use, starting at 0\n");
+ printf(" -p, --prio=<priority>\t\t\tscheduler priority, HIGH, LOW or DEFAULT\n");
+ printf(" -g, --governor=<governor>\t\tcpufreq governor to test\n");
+ printf(" -n, --cycles=<int>\t\t\tload/sleep cycles\n");
+ printf(" -r, --rounds<int>\t\t\tload/sleep rounds\n");
+ printf(" -f, --file=<configfile>\t\tconfig file to use\n");
+ printf(" -o, --output=<dir>\t\t\toutput path. Filename will be OUTPUTPATH/benchmark_TIMESTAMP.log\n");
+ printf(" -v, --verbose\t\t\t\tverbose output on/off\n");
+ printf(" -h, --help\t\t\t\tPrint this help screen\n");
+ exit(1);
+}
+
+/*******************************************************************
+ main
+*******************************************************************/
+
+int main(int argc, char **argv)
+{
+ int c;
+ int option_index = 0;
+ struct config *config = NULL;
+
+ config = prepare_default_config();
+
+ if (config == NULL)
+ return EXIT_FAILURE;
+
+ while (1) {
+ c = getopt_long (argc, argv, "hg:o:s:l:vc:p:f:n:r:x:y:",
+ long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'o':
+ if (config->output != NULL)
+ fclose(config->output);
+
+ config->output = prepare_output(optarg);
+
+ if (config->output == NULL)
+ return EXIT_FAILURE;
+
+ dprintf("user output path -> %s\n", optarg);
+ break;
+ case 's':
+ sscanf(optarg, "%li", &config->sleep);
+ dprintf("user sleep time -> %s\n", optarg);
+ break;
+ case 'l':
+ sscanf(optarg, "%li", &config->load);
+ dprintf("user load time -> %s\n", optarg);
+ break;
+ case 'c':
+ sscanf(optarg, "%u", &config->cpu);
+ dprintf("user cpu -> %s\n", optarg);
+ break;
+ case 'g':
+ strncpy(config->governor, optarg, 14);
+ dprintf("user governor -> %s\n", optarg);
+ break;
+ case 'p':
+ if (string_to_prio(optarg) != SCHED_ERR) {
+ config->prio = string_to_prio(optarg);
+ dprintf("user prio -> %s\n", optarg);
+ } else {
+ if (config != NULL) {
+ if (config->output != NULL)
+ fclose(config->output);
+ free(config);
+ }
+ usage();
+ }
+ break;
+ case 'n':
+ sscanf(optarg, "%u", &config->cycles);
+ dprintf("user cycles -> %s\n", optarg);
+ break;
+ case 'r':
+ sscanf(optarg, "%u", &config->rounds);
+ dprintf("user rounds -> %s\n", optarg);
+ break;
+ case 'x':
+ sscanf(optarg, "%li", &config->load_step);
+ dprintf("user load_step -> %s\n", optarg);
+ break;
+ case 'y':
+ sscanf(optarg, "%li", &config->sleep_step);
+ dprintf("user sleep_step -> %s\n", optarg);
+ break;
+ case 'f':
+ if (prepare_config(optarg, config))
+ return EXIT_FAILURE;
+ break;
+ case 'v':
+ config->verbose = 1;
+ dprintf("verbose output enabled\n");
+ break;
+ case 'h':
+ case '?':
+ default:
+ if (config != NULL) {
+ if (config->output != NULL)
+ fclose(config->output);
+ free(config);
+ }
+ usage();
+ }
+ }
+
+ if (config->verbose) {
+ printf("starting benchmark with parameters:\n");
+ printf("config:\n\t"
+ "sleep=%li\n\t"
+ "load=%li\n\t"
+ "sleep_step=%li\n\t"
+ "load_step=%li\n\t"
+ "cpu=%u\n\t"
+ "cycles=%u\n\t"
+ "rounds=%u\n\t"
+ "governor=%s\n\n",
+ config->sleep,
+ config->load,
+ config->sleep_step,
+ config->load_step,
+ config->cpu,
+ config->cycles,
+ config->rounds,
+ config->governor);
+ }
+
+ prepare_user(config);
+ prepare_system(config);
+ start_benchmark(config);
+
+ if (config->output != stdout)
+ fclose(config->output);
+
+ free(config);
+
+ return EXIT_SUCCESS;
+}
+
diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c
new file mode 100644
index 000000000..9ba8a44ad
--- /dev/null
+++ b/tools/power/cpupower/bench/parse.c
@@ -0,0 +1,238 @@
+/* cpufreq-bench CPUFreq microbenchmark
+ *
+ * Copyright (C) 2008 Christian Kornacker <ckornacker@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <time.h>
+#include <dirent.h>
+
+#include <sys/utsname.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "parse.h"
+#include "config.h"
+
+/**
+ * converts priority string to priority
+ *
+ * @param str string that represents a scheduler priority
+ *
+ * @retval priority
+ * @retval SCHED_ERR when the priority doesn't exit
+ **/
+
+enum sched_prio string_to_prio(const char *str)
+{
+ if (strncasecmp("high", str, strlen(str)) == 0)
+ return SCHED_HIGH;
+ else if (strncasecmp("default", str, strlen(str)) == 0)
+ return SCHED_DEFAULT;
+ else if (strncasecmp("low", str, strlen(str)) == 0)
+ return SCHED_LOW;
+ else
+ return SCHED_ERR;
+}
+
+/**
+ * create and open logfile
+ *
+ * @param dir directory in which the logfile should be created
+ *
+ * @retval logfile on success
+ * @retval NULL when the file can't be created
+ **/
+
+FILE *prepare_output(const char *dirname)
+{
+ FILE *output = NULL;
+ int len;
+ char *filename, *filename_tmp;
+ struct utsname sysdata;
+ DIR *dir;
+
+ dir = opendir(dirname);
+ if (dir == NULL) {
+ if (mkdir(dirname, 0755)) {
+ perror("mkdir");
+ fprintf(stderr, "error: Cannot create dir %s\n",
+ dirname);
+ return NULL;
+ }
+ }
+
+ len = strlen(dirname) + 30;
+ filename = malloc(sizeof(char) * len);
+ if (!filename) {
+ perror("malloc");
+ goto out_dir;
+ }
+
+ if (uname(&sysdata) == 0) {
+ len += strlen(sysdata.nodename) + strlen(sysdata.release);
+ filename_tmp = realloc(filename, sizeof(*filename) * len);
+
+ if (filename_tmp == NULL) {
+ free(filename);
+ perror("realloc");
+ goto out_dir;
+ }
+
+ filename = filename_tmp;
+ snprintf(filename, len - 1, "%s/benchmark_%s_%s_%li.log",
+ dirname, sysdata.nodename, sysdata.release, time(NULL));
+ } else {
+ snprintf(filename, len - 1, "%s/benchmark_%li.log",
+ dirname, time(NULL));
+ }
+
+ dprintf("logfilename: %s\n", filename);
+
+ output = fopen(filename, "w+");
+ if (output == NULL) {
+ perror("fopen");
+ fprintf(stderr, "error: unable to open logfile\n");
+ goto out;
+ }
+
+ fprintf(stdout, "Logfile: %s\n", filename);
+
+ fprintf(output, "#round load sleep performance powersave percentage\n");
+out:
+ free(filename);
+out_dir:
+ closedir(dir);
+ return output;
+}
+
+/**
+ * returns the default config
+ *
+ * @retval default config on success
+ * @retval NULL when the output file can't be created
+ **/
+
+struct config *prepare_default_config()
+{
+ struct config *config = malloc(sizeof(struct config));
+
+ dprintf("loading defaults\n");
+
+ config->sleep = 500000;
+ config->load = 500000;
+ config->sleep_step = 500000;
+ config->load_step = 500000;
+ config->cycles = 5;
+ config->rounds = 50;
+ config->cpu = 0;
+ config->prio = SCHED_HIGH;
+ config->verbose = 0;
+ strncpy(config->governor, "ondemand", 8);
+
+ config->output = stdout;
+
+#ifdef DEFAULT_CONFIG_FILE
+ if (prepare_config(DEFAULT_CONFIG_FILE, config))
+ return NULL;
+#endif
+ return config;
+}
+
+/**
+ * parses config file and returns the config to the caller
+ *
+ * @param path config file name
+ *
+ * @retval 1 on error
+ * @retval 0 on success
+ **/
+
+int prepare_config(const char *path, struct config *config)
+{
+ size_t len = 0;
+ char opt[16], val[32], *line = NULL;
+ FILE *configfile;
+
+ if (config == NULL) {
+ fprintf(stderr, "error: config is NULL\n");
+ return 1;
+ }
+
+ configfile = fopen(path, "r");
+ if (configfile == NULL) {
+ perror("fopen");
+ fprintf(stderr, "error: unable to read configfile\n");
+ free(config);
+ return 1;
+ }
+
+ while (getline(&line, &len, configfile) != -1) {
+ if (line[0] == '#' || line[0] == ' ' || line[0] == '\n')
+ continue;
+
+ if (sscanf(line, "%14s = %30s", opt, val) < 2)
+ continue;
+
+ dprintf("parsing: %s -> %s\n", opt, val);
+
+ if (strcmp("sleep", opt) == 0)
+ sscanf(val, "%li", &config->sleep);
+
+ else if (strcmp("load", opt) == 0)
+ sscanf(val, "%li", &config->load);
+
+ else if (strcmp("load_step", opt) == 0)
+ sscanf(val, "%li", &config->load_step);
+
+ else if (strcmp("sleep_step", opt) == 0)
+ sscanf(val, "%li", &config->sleep_step);
+
+ else if (strcmp("cycles", opt) == 0)
+ sscanf(val, "%u", &config->cycles);
+
+ else if (strcmp("rounds", opt) == 0)
+ sscanf(val, "%u", &config->rounds);
+
+ else if (strcmp("verbose", opt) == 0)
+ sscanf(val, "%u", &config->verbose);
+
+ else if (strcmp("output", opt) == 0)
+ config->output = prepare_output(val);
+
+ else if (strcmp("cpu", opt) == 0)
+ sscanf(val, "%u", &config->cpu);
+
+ else if (strcmp("governor", opt) == 0) {
+ strncpy(config->governor, val,
+ sizeof(config->governor));
+ config->governor[sizeof(config->governor) - 1] = '\0';
+ }
+
+ else if (strcmp("priority", opt) == 0) {
+ if (string_to_prio(val) != SCHED_ERR)
+ config->prio = string_to_prio(val);
+ }
+ }
+
+ free(line);
+
+ return 0;
+}
diff --git a/tools/power/cpupower/bench/parse.h b/tools/power/cpupower/bench/parse.h
new file mode 100644
index 000000000..a8dc632d9
--- /dev/null
+++ b/tools/power/cpupower/bench/parse.h
@@ -0,0 +1,53 @@
+/* cpufreq-bench CPUFreq microbenchmark
+ *
+ * Copyright (C) 2008 Christian Kornacker <ckornacker@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+/* struct that holds the required config parameters */
+struct config
+{
+ long sleep; /* sleep time in µs */
+ long load; /* load time in µs */
+ long sleep_step; /* time value which changes the
+ * sleep time after every round in µs */
+ long load_step; /* time value which changes the
+ * load time after every round in µs */
+ unsigned int cycles; /* calculation cycles with the same sleep/load time */
+ unsigned int rounds; /* calculation rounds with iterated sleep/load time */
+ unsigned int cpu; /* cpu for which the affinity is set */
+ char governor[15]; /* cpufreq governor */
+ enum sched_prio /* possible scheduler priorities */
+ {
+ SCHED_ERR = -1,
+ SCHED_HIGH,
+ SCHED_DEFAULT,
+ SCHED_LOW
+ } prio;
+
+ unsigned int verbose; /* verbose output */
+ FILE *output; /* logfile */
+ char *output_filename; /* logfile name, must be freed at the end
+ if output != NULL and output != stdout*/
+};
+
+enum sched_prio string_to_prio(const char *str);
+
+FILE *prepare_output(const char *dir);
+
+int prepare_config(const char *path, struct config *config);
+struct config *prepare_default_config();
+
diff --git a/tools/power/cpupower/bench/system.c b/tools/power/cpupower/bench/system.c
new file mode 100644
index 000000000..2bb3eef7d
--- /dev/null
+++ b/tools/power/cpupower/bench/system.c
@@ -0,0 +1,192 @@
+/* cpufreq-bench CPUFreq microbenchmark
+ *
+ * Copyright (C) 2008 Christian Kornacker <ckornacker@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <stdio.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <sched.h>
+
+#include <cpufreq.h>
+#include <cpupower.h>
+
+#include "config.h"
+#include "system.h"
+
+/**
+ * returns time since epoch in µs
+ *
+ * @retval time
+ **/
+
+long long int get_time()
+{
+ struct timeval now;
+
+ gettimeofday(&now, NULL);
+
+ return (long long int)(now.tv_sec * 1000000LL + now.tv_usec);
+}
+
+/**
+ * sets the cpufreq governor
+ *
+ * @param governor cpufreq governor name
+ * @param cpu cpu for which the governor should be set
+ *
+ * @retval 0 on success
+ * @retval -1 when failed
+ **/
+
+int set_cpufreq_governor(char *governor, unsigned int cpu)
+{
+
+ dprintf("set %s as cpufreq governor\n", governor);
+
+ if (cpupower_is_cpu_online(cpu) != 1) {
+ perror("cpufreq_cpu_exists");
+ fprintf(stderr, "error: cpu %u does not exist\n", cpu);
+ return -1;
+ }
+
+ if (cpufreq_modify_policy_governor(cpu, governor) != 0) {
+ perror("cpufreq_modify_policy_governor");
+ fprintf(stderr, "error: unable to set %s governor\n", governor);
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * sets cpu affinity for the process
+ *
+ * @param cpu cpu# to which the affinity should be set
+ *
+ * @retval 0 on success
+ * @retval -1 when setting the affinity failed
+ **/
+
+int set_cpu_affinity(unsigned int cpu)
+{
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ dprintf("set affinity to cpu #%u\n", cpu);
+
+ if (sched_setaffinity(getpid(), sizeof(cpu_set_t), &cpuset) < 0) {
+ perror("sched_setaffinity");
+ fprintf(stderr, "warning: unable to set cpu affinity\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * sets the process priority parameter
+ *
+ * @param priority priority value
+ *
+ * @retval 0 on success
+ * @retval -1 when setting the priority failed
+ **/
+
+int set_process_priority(int priority)
+{
+ struct sched_param param;
+
+ dprintf("set scheduler priority to %i\n", priority);
+
+ param.sched_priority = priority;
+
+ if (sched_setscheduler(0, SCHEDULER, &param) < 0) {
+ perror("sched_setscheduler");
+ fprintf(stderr, "warning: unable to set scheduler priority\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * notifies the user that the benchmark may run some time
+ *
+ * @param config benchmark config values
+ *
+ **/
+
+void prepare_user(const struct config *config)
+{
+ unsigned long sleep_time = 0;
+ unsigned long load_time = 0;
+ unsigned int round;
+
+ for (round = 0; round < config->rounds; round++) {
+ sleep_time += 2 * config->cycles *
+ (config->sleep + config->sleep_step * round);
+ load_time += 2 * config->cycles *
+ (config->load + config->load_step * round) +
+ (config->load + config->load_step * round * 4);
+ }
+
+ if (config->verbose || config->output != stdout)
+ printf("approx. test duration: %im\n",
+ (int)((sleep_time + load_time) / 60000000));
+}
+
+/**
+ * sets up the cpu affinity and scheduler priority
+ *
+ * @param config benchmark config values
+ *
+ **/
+
+void prepare_system(const struct config *config)
+{
+ if (config->verbose)
+ printf("set cpu affinity to cpu #%u\n", config->cpu);
+
+ set_cpu_affinity(config->cpu);
+
+ switch (config->prio) {
+ case SCHED_HIGH:
+ if (config->verbose)
+ printf("high priority condition requested\n");
+
+ set_process_priority(PRIORITY_HIGH);
+ break;
+ case SCHED_LOW:
+ if (config->verbose)
+ printf("low priority condition requested\n");
+
+ set_process_priority(PRIORITY_LOW);
+ break;
+ default:
+ if (config->verbose)
+ printf("default priority condition requested\n");
+
+ set_process_priority(PRIORITY_DEFAULT);
+ }
+}
+
diff --git a/tools/power/cpupower/bench/system.h b/tools/power/cpupower/bench/system.h
new file mode 100644
index 000000000..3a8c858b7
--- /dev/null
+++ b/tools/power/cpupower/bench/system.h
@@ -0,0 +1,29 @@
+/* cpufreq-bench CPUFreq microbenchmark
+ *
+ * Copyright (C) 2008 Christian Kornacker <ckornacker@suse.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include "parse.h"
+
+long long get_time();
+
+int set_cpufreq_governor(char *governor, unsigned int cpu);
+int set_cpu_affinity(unsigned int cpu);
+int set_process_priority(int priority);
+
+void prepare_user(const struct config *config);
+void prepare_system(const struct config *config);
diff --git a/tools/power/cpupower/debug/i386/Makefile b/tools/power/cpupower/debug/i386/Makefile
new file mode 100644
index 000000000..b3f771039
--- /dev/null
+++ b/tools/power/cpupower/debug/i386/Makefile
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: GPL-2.0
+OUTPUT=./
+ifeq ("$(origin O)", "command line")
+ OUTPUT := $(O)/
+endif
+
+DESTDIR =
+bindir = /usr/bin
+
+INSTALL = /usr/bin/install
+
+
+default: all
+
+$(OUTPUT)centrino-decode: centrino-decode.c
+ $(CC) $(CFLAGS) -o $@ centrino-decode.c
+
+$(OUTPUT)dump_psb: dump_psb.c
+ $(CC) $(CFLAGS) -o $@ dump_psb.c
+
+$(OUTPUT)intel_gsic: intel_gsic.c
+ $(CC) $(CFLAGS) -o $@ -llrmi intel_gsic.c
+
+$(OUTPUT)powernow-k8-decode: powernow-k8-decode.c
+ $(CC) $(CFLAGS) -o $@ powernow-k8-decode.c
+
+all: $(OUTPUT)centrino-decode $(OUTPUT)dump_psb $(OUTPUT)intel_gsic $(OUTPUT)powernow-k8-decode
+
+clean:
+ rm -rf $(OUTPUT)centrino-decode
+ rm -rf $(OUTPUT)dump_psb
+ rm -rf $(OUTPUT)intel_gsic
+ rm -rf $(OUTPUT)powernow-k8-decode
+
+install:
+ $(INSTALL) -d $(DESTDIR)${bindir}
+ $(INSTALL) $(OUTPUT)centrino-decode $(DESTDIR)${bindir}
+ $(INSTALL) $(OUTPUT)powernow-k8-decode $(DESTDIR)${bindir}
+ $(INSTALL) $(OUTPUT)dump_psb $(DESTDIR)${bindir}
+ $(INSTALL) $(OUTPUT)intel_gsic $(DESTDIR)${bindir}
+
+.PHONY: all default clean install
diff --git a/tools/power/cpupower/debug/i386/centrino-decode.c b/tools/power/cpupower/debug/i386/centrino-decode.c
new file mode 100644
index 000000000..7ef24cce4
--- /dev/null
+++ b/tools/power/cpupower/debug/i386/centrino-decode.c
@@ -0,0 +1,113 @@
+/*
+ * (C) 2003 - 2004 Dominik Brodowski <linux@dominikbrodowski.de>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on code found in
+ * linux/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c
+ * and originally developed by Jeremy Fitzhardinge.
+ *
+ * USAGE: simply run it to decode the current settings on CPU 0,
+ * or pass the CPU number as argument, or pass the MSR content
+ * as argument.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#define MCPU 32
+
+#define MSR_IA32_PERF_STATUS 0x198
+
+static int rdmsr(unsigned int cpu, unsigned int msr,
+ unsigned int *lo, unsigned int *hi)
+{
+ int fd;
+ char file[20];
+ unsigned long long val;
+ int retval = -1;
+
+ *lo = *hi = 0;
+
+ if (cpu > MCPU)
+ goto err1;
+
+ sprintf(file, "/dev/cpu/%d/msr", cpu);
+ fd = open(file, O_RDONLY);
+
+ if (fd < 0)
+ goto err1;
+
+ if (lseek(fd, msr, SEEK_CUR) == -1)
+ goto err2;
+
+ if (read(fd, &val, 8) != 8)
+ goto err2;
+
+ *lo = (uint32_t )(val & 0xffffffffull);
+ *hi = (uint32_t )(val>>32 & 0xffffffffull);
+
+ retval = 0;
+err2:
+ close(fd);
+err1:
+ return retval;
+}
+
+static void decode (unsigned int msr)
+{
+ unsigned int multiplier;
+ unsigned int mv;
+
+ multiplier = ((msr >> 8) & 0xFF);
+
+ mv = (((msr & 0xFF) * 16) + 700);
+
+ printf("0x%x means multiplier %d @ %d mV\n", msr, multiplier, mv);
+}
+
+static int decode_live(unsigned int cpu)
+{
+ unsigned int lo, hi;
+ int err;
+
+ err = rdmsr(cpu, MSR_IA32_PERF_STATUS, &lo, &hi);
+
+ if (err) {
+ printf("can't get MSR_IA32_PERF_STATUS for cpu %d\n", cpu);
+ printf("Possible trouble: you don't run an Enhanced SpeedStep capable cpu\n");
+ printf("or you are not root, or the msr driver is not present\n");
+ return 1;
+ }
+
+ decode(lo);
+
+ return 0;
+}
+
+int main (int argc, char **argv)
+{
+ unsigned int cpu, mode = 0;
+
+ if (argc < 2)
+ cpu = 0;
+ else {
+ cpu = strtoul(argv[1], NULL, 0);
+ if (cpu >= MCPU)
+ mode = 1;
+ }
+
+ if (mode)
+ decode(cpu);
+ else
+ decode_live(cpu);
+
+ return 0;
+}
diff --git a/tools/power/cpupower/debug/i386/dump_psb.c b/tools/power/cpupower/debug/i386/dump_psb.c
new file mode 100644
index 000000000..2c768cf70
--- /dev/null
+++ b/tools/power/cpupower/debug/i386/dump_psb.c
@@ -0,0 +1,196 @@
+/*
+ * dump_psb. (c) 2004, Dave Jones, Red Hat Inc.
+ * Licensed under the GPL v2.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#define _GNU_SOURCE
+#include <getopt.h>
+
+#include <sys/mman.h>
+
+#define LEN (0x100000 - 0xc0000)
+#define OFFSET (0xc0000)
+
+#ifndef __packed
+#define __packed __attribute((packed))
+#endif
+
+static long relevant;
+
+static const int fid_to_mult[32] = {
+ 110, 115, 120, 125, 50, 55, 60, 65,
+ 70, 75, 80, 85, 90, 95, 100, 105,
+ 30, 190, 40, 200, 130, 135, 140, 210,
+ 150, 225, 160, 165, 170, 180, -1, -1,
+};
+
+static const int vid_to_voltage[32] = {
+ 2000, 1950, 1900, 1850, 1800, 1750, 1700, 1650,
+ 1600, 1550, 1500, 1450, 1400, 1350, 1300, 0,
+ 1275, 1250, 1225, 1200, 1175, 1150, 1125, 1100,
+ 1075, 1050, 1024, 1000, 975, 950, 925, 0,
+};
+
+struct psb_header {
+ char signature[10];
+ u_char version;
+ u_char flags;
+ u_short settlingtime;
+ u_char res1;
+ u_char numpst;
+} __packed;
+
+struct pst_header {
+ u_int32_t cpuid;
+ u_char fsb;
+ u_char maxfid;
+ u_char startvid;
+ u_char numpstates;
+} __packed;
+
+static u_int fsb;
+static u_int sgtc;
+
+static int
+decode_pst(char *p, int npstates)
+{
+ int i;
+ int freq, fid, vid;
+
+ for (i = 0; i < npstates; ++i) {
+ fid = *p++;
+ vid = *p++;
+ freq = 100 * fid_to_mult[fid] * fsb;
+
+ printf(" %2d %8dkHz FID %02x (%2d.%01d) VID %02x (%4dmV)\n",
+ i,
+ freq,
+ fid, fid_to_mult[fid]/10, fid_to_mult[fid]%10,
+ vid, vid_to_voltage[vid]);
+ }
+
+ return 0;
+}
+
+static
+void decode_psb(char *p, int numpst)
+{
+ int i;
+ struct psb_header *psb;
+ struct pst_header *pst;
+
+ psb = (struct psb_header*) p;
+
+ if (psb->version != 0x12)
+ return;
+
+ printf("PSB version: %hhx flags: %hhx settling time %hhuus res1 %hhx num pst %hhu\n",
+ psb->version,
+ psb->flags,
+ psb->settlingtime,
+ psb->res1,
+ psb->numpst);
+ sgtc = psb->settlingtime * 100;
+
+ if (sgtc < 10000)
+ sgtc = 10000;
+
+ p = ((char *) psb) + sizeof(struct psb_header);
+
+ if (numpst < 0)
+ numpst = psb->numpst;
+ else
+ printf("Overriding number of pst :%d\n", numpst);
+
+ for (i = 0; i < numpst; i++) {
+ pst = (struct pst_header*) p;
+
+ if (relevant != 0) {
+ if (relevant!= pst->cpuid)
+ goto next_one;
+ }
+
+ printf(" PST %d cpuid %.3x fsb %hhu mfid %hhx svid %hhx numberstates %hhu\n",
+ i+1,
+ pst->cpuid,
+ pst->fsb,
+ pst->maxfid,
+ pst->startvid,
+ pst->numpstates);
+
+ fsb = pst->fsb;
+ decode_pst(p + sizeof(struct pst_header), pst->numpstates);
+
+next_one:
+ p += sizeof(struct pst_header) + 2*pst->numpstates;
+ }
+
+}
+
+static struct option info_opts[] = {
+ {"numpst", no_argument, NULL, 'n'},
+};
+
+void print_help(void)
+{
+ printf ("Usage: dump_psb [options]\n");
+ printf ("Options:\n");
+ printf (" -n, --numpst Set number of PST tables to scan\n");
+ printf (" -r, --relevant Only display PSTs relevant to cpuid N\n");
+}
+
+int
+main(int argc, char *argv[])
+{
+ int fd;
+ int numpst=-1;
+ int ret=0, cont=1;
+ char *mem = NULL;
+ char *p;
+
+ do {
+ ret = getopt_long(argc, argv, "hr:n:", info_opts, NULL);
+ switch (ret){
+ case '?':
+ case 'h':
+ print_help();
+ cont = 0;
+ break;
+ case 'r':
+ relevant = strtol(optarg, NULL, 16);
+ break;
+ case 'n':
+ numpst = strtol(optarg, NULL, 10);
+ break;
+ case -1:
+ cont = 0;
+ break;
+ }
+
+ } while(cont);
+
+ fd = open("/dev/mem", O_RDONLY);
+ if (fd < 0) {
+ printf ("Couldn't open /dev/mem. Are you root?\n");
+ exit(1);
+ }
+
+ mem = mmap(mem, 0x100000 - 0xc0000, PROT_READ, MAP_SHARED, fd, 0xc0000);
+ close(fd);
+
+ for (p = mem; p - mem < LEN; p+=16) {
+ if (memcmp(p, "AMDK7PNOW!", 10) == 0) {
+ decode_psb(p, numpst);
+ break;
+ }
+ }
+
+ munmap(mem, LEN);
+ return 0;
+}
diff --git a/tools/power/cpupower/debug/i386/intel_gsic.c b/tools/power/cpupower/debug/i386/intel_gsic.c
new file mode 100644
index 000000000..d032c826d
--- /dev/null
+++ b/tools/power/cpupower/debug/i386/intel_gsic.c
@@ -0,0 +1,78 @@
+/*
+ * (C) 2003 Bruno Ducrot
+ * (C) 2004 Dominik Brodowski <linux@dominikbrodowski.de>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on code found in
+ * linux/include/asm-i386/ist.h and linux/arch/i386/kernel/setup.c
+ * and originally developed by Andy Grover <andrew.grover@intel.com>
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <lrmi.h>
+
+int main (void)
+{
+ struct LRMI_regs r;
+ int retval;
+
+ if (!LRMI_init())
+ return 0;
+
+ memset(&r, 0, sizeof(r));
+
+ r.eax = 0x0000E980;
+ r.edx = 0x47534943;
+
+ retval = LRMI_int(0x15, &r);
+
+ if (!retval) {
+ printf("Failed!\n");
+ return 0;
+ }
+ if (r.eax == 0x47534943) {
+ printf("BIOS supports GSIC call:\n");
+ printf("\tsignature: %c%c%c%c\n",
+ (r.eax >> 24) & 0xff,
+ (r.eax >> 16) & 0xff,
+ (r.eax >> 8) & 0xff,
+ (r.eax) & 0xff);
+ printf("\tcommand port = 0x%.4x\n",
+ r.ebx & 0xffff);
+ printf("\tcommand = 0x%.4x\n",
+ (r.ebx >> 16) & 0xffff);
+ printf("\tevent port = 0x%.8x\n", r.ecx);
+ printf("\tflags = 0x%.8x\n", r.edx);
+ if (((r.ebx >> 16) & 0xffff) != 0x82) {
+ printf("non-default command value. If speedstep-smi "
+ "doesn't work out of the box,\nyou may want to "
+ "try out the default value by passing "
+ "smi_cmd=0x82 to the module\n ON YOUR OWN "
+ "RISK.\n");
+ }
+ if ((r.ebx & 0xffff) != 0xb2) {
+ printf("non-default command port. If speedstep-smi "
+ "doesn't work out of the box,\nyou may want to "
+ "try out the default value by passing "
+ "smi_port=0x82 to the module\n ON YOUR OWN "
+ "RISK.\n");
+ }
+ } else {
+ printf("BIOS DOES NOT support GSIC call. Dumping registers anyway:\n");
+ printf("eax = 0x%.8x\n", r.eax);
+ printf("ebx = 0x%.8x\n", r.ebx);
+ printf("ecx = 0x%.8x\n", r.ecx);
+ printf("edx = 0x%.8x\n", r.edx);
+ printf("Note also that some BIOS do not support the initial "
+ "GSIC call, but the newer\nspeedstep-smi driver may "
+ "work.\nFor this, you need to pass some arguments to "
+ "the speedstep-smi driver:\n");
+ printf("\tsmi_cmd=0x?? smi_port=0x?? smi_sig=1\n");
+ printf("\nUnfortunately, you have to know what exactly are "
+ "smi_cmd and smi_port, and this\nis system "
+ "dependant.\n");
+ }
+ return 1;
+}
diff --git a/tools/power/cpupower/debug/i386/powernow-k8-decode.c b/tools/power/cpupower/debug/i386/powernow-k8-decode.c
new file mode 100644
index 000000000..638a6b3bf
--- /dev/null
+++ b/tools/power/cpupower/debug/i386/powernow-k8-decode.c
@@ -0,0 +1,96 @@
+/*
+ * (C) 2004 Bruno Ducrot <ducrot@poupinou.org>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on code found in
+ * linux/arch/i386/kernel/cpu/cpufreq/powernow-k8.c
+ * and originally developed by Paul Devriendt
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#define MCPU 32
+
+#define MSR_FIDVID_STATUS 0xc0010042
+
+#define MSR_S_HI_CURRENT_VID 0x0000001f
+#define MSR_S_LO_CURRENT_FID 0x0000003f
+
+static int get_fidvid(uint32_t cpu, uint32_t *fid, uint32_t *vid)
+{
+ int err = 1;
+ uint64_t msr = 0;
+ int fd;
+ char file[20];
+
+ if (cpu > MCPU)
+ goto out;
+
+ sprintf(file, "/dev/cpu/%d/msr", cpu);
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ goto out;
+ lseek(fd, MSR_FIDVID_STATUS, SEEK_CUR);
+ if (read(fd, &msr, 8) != 8)
+ goto err1;
+
+ *fid = ((uint32_t )(msr & 0xffffffffull)) & MSR_S_LO_CURRENT_FID;
+ *vid = ((uint32_t )(msr>>32 & 0xffffffffull)) & MSR_S_HI_CURRENT_VID;
+ err = 0;
+err1:
+ close(fd);
+out:
+ return err;
+}
+
+
+/* Return a frequency in MHz, given an input fid */
+static uint32_t find_freq_from_fid(uint32_t fid)
+{
+ return 800 + (fid * 100);
+}
+
+/* Return a voltage in miliVolts, given an input vid */
+static uint32_t find_millivolts_from_vid(uint32_t vid)
+{
+ return 1550-vid*25;
+}
+
+int main (int argc, char *argv[])
+{
+ int err;
+ int cpu;
+ uint32_t fid, vid;
+
+ if (argc < 2)
+ cpu = 0;
+ else
+ cpu = strtoul(argv[1], NULL, 0);
+
+ err = get_fidvid(cpu, &fid, &vid);
+
+ if (err) {
+ printf("can't get fid, vid from MSR\n");
+ printf("Possible trouble: you don't run a powernow-k8 capable cpu\n");
+ printf("or you are not root, or the msr driver is not present\n");
+ exit(1);
+ }
+
+
+ printf("cpu %d currently at %d MHz and %d mV\n",
+ cpu,
+ find_freq_from_fid(fid),
+ find_millivolts_from_vid(vid));
+
+ return 0;
+}
diff --git a/tools/power/cpupower/debug/kernel/Makefile b/tools/power/cpupower/debug/kernel/Makefile
new file mode 100644
index 000000000..c23e5a6ce
--- /dev/null
+++ b/tools/power/cpupower/debug/kernel/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-m :=
+
+KDIR := /lib/modules/$(shell uname -r)/build
+KMISC := /lib/modules/$(shell uname -r)/cpufrequtils/
+
+ifeq ("$(CONFIG_X86_TSC)", "y")
+ obj-m += cpufreq-test_tsc.o
+endif
+
+default:
+ $(MAKE) -C $(KDIR) M=$(CURDIR)
+
+clean:
+ - rm -rf *.o *.ko .tmp-versions .*.cmd .*.mod.* *.mod.c
+ - rm -rf .tmp_versions* Module.symvers modules.order
+
+install: default
+ install -d $(KMISC)
+ install -m 644 -c *.ko $(KMISC)
+ /sbin/depmod -a
+
+all: default
diff --git a/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c
new file mode 100644
index 000000000..6ff8383f2
--- /dev/null
+++ b/tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c
@@ -0,0 +1,110 @@
+/*
+ * test module to check whether the TSC-based delay routine continues
+ * to work properly after cpufreq transitions. Needs ACPI to work
+ * properly.
+ *
+ * Based partly on the Power Management Timer (PMTMR) code to be found
+ * in arch/i386/kernel/timers/timer_pm.c on recent 2.6. kernels, especially
+ * code written by John Stultz. The read_pmtmr function was copied verbatim
+ * from that file.
+ *
+ * (C) 2004 Dominik Brodowski
+ *
+ * To use:
+ * 1.) pass clock=tsc to the kernel on your bootloader
+ * 2.) modprobe this module (it'll fail)
+ * 3.) change CPU frequency
+ * 4.) modprobe this module again
+ * 5.) if the third value, "diff_pmtmr", changes between 2. and 4., the
+ * TSC-based delay routine on the Linux kernel does not correctly
+ * handle the cpufreq transition. Please report this to
+ * linux-pm@vger.kernel.org
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/acpi.h>
+#include <asm/io.h>
+
+static int pm_tmr_ioport = 0;
+
+/*helper function to safely read acpi pm timesource*/
+static u32 read_pmtmr(void)
+{
+ u32 v1=0,v2=0,v3=0;
+ /* It has been reported that because of various broken
+ * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time
+ * source is not latched, so you must read it multiple
+ * times to insure a safe value is read.
+ */
+ do {
+ v1 = inl(pm_tmr_ioport);
+ v2 = inl(pm_tmr_ioport);
+ v3 = inl(pm_tmr_ioport);
+ } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1)
+ || (v3 > v1 && v3 < v2));
+
+ /* mask the output to 24 bits */
+ return (v2 & 0xFFFFFF);
+}
+
+static int __init cpufreq_test_tsc(void)
+{
+ u32 now, then, diff;
+ u64 now_tsc, then_tsc, diff_tsc;
+ int i;
+
+ /* the following code snipped is copied from arch/x86/kernel/acpi/boot.c
+ of Linux v2.6.25. */
+
+ /* detect the location of the ACPI PM Timer */
+ if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID) {
+ /* FADT rev. 2 */
+ if (acpi_gbl_FADT.xpm_timer_block.space_id !=
+ ACPI_ADR_SPACE_SYSTEM_IO)
+ return 0;
+
+ pm_tmr_ioport = acpi_gbl_FADT.xpm_timer_block.address;
+ /*
+ * "X" fields are optional extensions to the original V1.0
+ * fields, so we must selectively expand V1.0 fields if the
+ * corresponding X field is zero.
+ */
+ if (!pm_tmr_ioport)
+ pm_tmr_ioport = acpi_gbl_FADT.pm_timer_block;
+ } else {
+ /* FADT rev. 1 */
+ pm_tmr_ioport = acpi_gbl_FADT.pm_timer_block;
+ }
+
+ printk(KERN_DEBUG "start--> \n");
+ then = read_pmtmr();
+ then_tsc = rdtsc();
+ for (i=0;i<20;i++) {
+ mdelay(100);
+ now = read_pmtmr();
+ now_tsc = rdtsc();
+ diff = (now - then) & 0xFFFFFF;
+ diff_tsc = now_tsc - then_tsc;
+ printk(KERN_DEBUG "t1: %08u t2: %08u diff_pmtmr: %08u diff_tsc: %016llu\n", then, now, diff, diff_tsc);
+ then = now;
+ then_tsc = now_tsc;
+ }
+ printk(KERN_DEBUG "<-- end \n");
+ return -ENODEV;
+}
+
+static void __exit cpufreq_none(void)
+{
+ return;
+}
+
+module_init(cpufreq_test_tsc)
+module_exit(cpufreq_none)
+
+
+MODULE_AUTHOR("Dominik Brodowski");
+MODULE_DESCRIPTION("Verify the TSC cpufreq notifier working correctly -- needs ACPI-enabled system");
+MODULE_LICENSE ("GPL");
diff --git a/tools/power/cpupower/debug/x86_64/Makefile b/tools/power/cpupower/debug/x86_64/Makefile
new file mode 100644
index 000000000..59af84b8e
--- /dev/null
+++ b/tools/power/cpupower/debug/x86_64/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+OUTPUT=./
+ifeq ("$(origin O)", "command line")
+ OUTPUT := $(O)/
+endif
+
+DESTDIR =
+bindir = /usr/bin
+
+INSTALL = /usr/bin/install
+
+
+default: all
+
+$(OUTPUT)centrino-decode: ../i386/centrino-decode.c
+ $(CC) $(CFLAGS) -o $@ $<
+
+$(OUTPUT)powernow-k8-decode: ../i386/powernow-k8-decode.c
+ $(CC) $(CFLAGS) -o $@ $<
+
+all: $(OUTPUT)centrino-decode $(OUTPUT)powernow-k8-decode
+
+clean:
+ rm -rf $(OUTPUT)centrino-decode $(OUTPUT)powernow-k8-decode
+
+install:
+ $(INSTALL) -d $(DESTDIR)${bindir}
+ $(INSTALL) $(OUTPUT)centrino-decode $(DESTDIR)${bindir}
+ $(INSTALL) $(OUTPUT)powernow-k8-decode $(DESTDIR)${bindir}
+
+.PHONY: all default clean install
diff --git a/tools/power/cpupower/lib/cpufreq.c b/tools/power/cpupower/lib/cpufreq.c
new file mode 100644
index 000000000..0c0f3e3f0
--- /dev/null
+++ b/tools/power/cpupower/lib/cpufreq.c
@@ -0,0 +1,716 @@
+/*
+ * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "cpufreq.h"
+#include "cpupower_intern.h"
+
+/* CPUFREQ sysfs access **************************************************/
+
+/* helper function to read file from /sys into given buffer */
+/* fname is a relative path under "cpuX/cpufreq" dir */
+static unsigned int sysfs_cpufreq_read_file(unsigned int cpu, const char *fname,
+ char *buf, size_t buflen)
+{
+ char path[SYSFS_PATH_MAX];
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
+ cpu, fname);
+ return cpupower_read_sysfs(path, buf, buflen);
+}
+
+/* helper function to write a new value to a /sys file */
+/* fname is a relative path under "cpuX/cpufreq" dir */
+static unsigned int sysfs_cpufreq_write_file(unsigned int cpu,
+ const char *fname,
+ const char *value, size_t len)
+{
+ char path[SYSFS_PATH_MAX];
+ int fd;
+ ssize_t numwrite;
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpufreq/%s",
+ cpu, fname);
+
+ fd = open(path, O_WRONLY);
+ if (fd == -1)
+ return 0;
+
+ numwrite = write(fd, value, len);
+ if (numwrite < 1) {
+ close(fd);
+ return 0;
+ }
+
+ close(fd);
+
+ return (unsigned int) numwrite;
+}
+
+/* read access to files which contain one numeric value */
+
+enum cpufreq_value {
+ CPUINFO_CUR_FREQ,
+ CPUINFO_MIN_FREQ,
+ CPUINFO_MAX_FREQ,
+ CPUINFO_LATENCY,
+ SCALING_CUR_FREQ,
+ SCALING_MIN_FREQ,
+ SCALING_MAX_FREQ,
+ STATS_NUM_TRANSITIONS,
+ MAX_CPUFREQ_VALUE_READ_FILES
+};
+
+static const char *cpufreq_value_files[MAX_CPUFREQ_VALUE_READ_FILES] = {
+ [CPUINFO_CUR_FREQ] = "cpuinfo_cur_freq",
+ [CPUINFO_MIN_FREQ] = "cpuinfo_min_freq",
+ [CPUINFO_MAX_FREQ] = "cpuinfo_max_freq",
+ [CPUINFO_LATENCY] = "cpuinfo_transition_latency",
+ [SCALING_CUR_FREQ] = "scaling_cur_freq",
+ [SCALING_MIN_FREQ] = "scaling_min_freq",
+ [SCALING_MAX_FREQ] = "scaling_max_freq",
+ [STATS_NUM_TRANSITIONS] = "stats/total_trans"
+};
+
+
+static unsigned long sysfs_cpufreq_get_one_value(unsigned int cpu,
+ enum cpufreq_value which)
+{
+ unsigned long value;
+ unsigned int len;
+ char linebuf[MAX_LINE_LEN];
+ char *endp;
+
+ if (which >= MAX_CPUFREQ_VALUE_READ_FILES)
+ return 0;
+
+ len = sysfs_cpufreq_read_file(cpu, cpufreq_value_files[which],
+ linebuf, sizeof(linebuf));
+
+ if (len == 0)
+ return 0;
+
+ value = strtoul(linebuf, &endp, 0);
+
+ if (endp == linebuf || errno == ERANGE)
+ return 0;
+
+ return value;
+}
+
+/* read access to files which contain one string */
+
+enum cpufreq_string {
+ SCALING_DRIVER,
+ SCALING_GOVERNOR,
+ MAX_CPUFREQ_STRING_FILES
+};
+
+static const char *cpufreq_string_files[MAX_CPUFREQ_STRING_FILES] = {
+ [SCALING_DRIVER] = "scaling_driver",
+ [SCALING_GOVERNOR] = "scaling_governor",
+};
+
+
+static char *sysfs_cpufreq_get_one_string(unsigned int cpu,
+ enum cpufreq_string which)
+{
+ char linebuf[MAX_LINE_LEN];
+ char *result;
+ unsigned int len;
+
+ if (which >= MAX_CPUFREQ_STRING_FILES)
+ return NULL;
+
+ len = sysfs_cpufreq_read_file(cpu, cpufreq_string_files[which],
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ result = strdup(linebuf);
+ if (result == NULL)
+ return NULL;
+
+ if (result[strlen(result) - 1] == '\n')
+ result[strlen(result) - 1] = '\0';
+
+ return result;
+}
+
+/* write access */
+
+enum cpufreq_write {
+ WRITE_SCALING_MIN_FREQ,
+ WRITE_SCALING_MAX_FREQ,
+ WRITE_SCALING_GOVERNOR,
+ WRITE_SCALING_SET_SPEED,
+ MAX_CPUFREQ_WRITE_FILES
+};
+
+static const char *cpufreq_write_files[MAX_CPUFREQ_WRITE_FILES] = {
+ [WRITE_SCALING_MIN_FREQ] = "scaling_min_freq",
+ [WRITE_SCALING_MAX_FREQ] = "scaling_max_freq",
+ [WRITE_SCALING_GOVERNOR] = "scaling_governor",
+ [WRITE_SCALING_SET_SPEED] = "scaling_setspeed",
+};
+
+static int sysfs_cpufreq_write_one_value(unsigned int cpu,
+ enum cpufreq_write which,
+ const char *new_value, size_t len)
+{
+ if (which >= MAX_CPUFREQ_WRITE_FILES)
+ return 0;
+
+ if (sysfs_cpufreq_write_file(cpu, cpufreq_write_files[which],
+ new_value, len) != len)
+ return -ENODEV;
+
+ return 0;
+};
+
+unsigned long cpufreq_get_freq_kernel(unsigned int cpu)
+{
+ return sysfs_cpufreq_get_one_value(cpu, SCALING_CUR_FREQ);
+}
+
+unsigned long cpufreq_get_freq_hardware(unsigned int cpu)
+{
+ return sysfs_cpufreq_get_one_value(cpu, CPUINFO_CUR_FREQ);
+}
+
+unsigned long cpufreq_get_transition_latency(unsigned int cpu)
+{
+ return sysfs_cpufreq_get_one_value(cpu, CPUINFO_LATENCY);
+}
+
+int cpufreq_get_hardware_limits(unsigned int cpu,
+ unsigned long *min,
+ unsigned long *max)
+{
+ if ((!min) || (!max))
+ return -EINVAL;
+
+ *min = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MIN_FREQ);
+ if (!*min)
+ return -ENODEV;
+
+ *max = sysfs_cpufreq_get_one_value(cpu, CPUINFO_MAX_FREQ);
+ if (!*max)
+ return -ENODEV;
+
+ return 0;
+}
+
+char *cpufreq_get_driver(unsigned int cpu)
+{
+ return sysfs_cpufreq_get_one_string(cpu, SCALING_DRIVER);
+}
+
+void cpufreq_put_driver(char *ptr)
+{
+ if (!ptr)
+ return;
+ free(ptr);
+}
+
+struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu)
+{
+ struct cpufreq_policy *policy;
+
+ policy = malloc(sizeof(struct cpufreq_policy));
+ if (!policy)
+ return NULL;
+
+ policy->governor = sysfs_cpufreq_get_one_string(cpu, SCALING_GOVERNOR);
+ if (!policy->governor) {
+ free(policy);
+ return NULL;
+ }
+ policy->min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
+ policy->max = sysfs_cpufreq_get_one_value(cpu, SCALING_MAX_FREQ);
+ if ((!policy->min) || (!policy->max)) {
+ free(policy->governor);
+ free(policy);
+ return NULL;
+ }
+
+ return policy;
+}
+
+void cpufreq_put_policy(struct cpufreq_policy *policy)
+{
+ if ((!policy) || (!policy->governor))
+ return;
+
+ free(policy->governor);
+ policy->governor = NULL;
+ free(policy);
+}
+
+struct cpufreq_available_governors *cpufreq_get_available_governors(unsigned
+ int cpu)
+{
+ struct cpufreq_available_governors *first = NULL;
+ struct cpufreq_available_governors *current = NULL;
+ char linebuf[MAX_LINE_LEN];
+ unsigned int pos, i;
+ unsigned int len;
+
+ len = sysfs_cpufreq_read_file(cpu, "scaling_available_governors",
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ pos = 0;
+ for (i = 0; i < len; i++) {
+ if (linebuf[i] == ' ' || linebuf[i] == '\n') {
+ if (i - pos < 2)
+ continue;
+ if (current) {
+ current->next = malloc(sizeof(*current));
+ if (!current->next)
+ goto error_out;
+ current = current->next;
+ } else {
+ first = malloc(sizeof(*first));
+ if (!first)
+ goto error_out;
+ current = first;
+ }
+ current->first = first;
+ current->next = NULL;
+
+ current->governor = malloc(i - pos + 1);
+ if (!current->governor)
+ goto error_out;
+
+ memcpy(current->governor, linebuf + pos, i - pos);
+ current->governor[i - pos] = '\0';
+ pos = i + 1;
+ }
+ }
+
+ return first;
+
+ error_out:
+ while (first) {
+ current = first->next;
+ if (first->governor)
+ free(first->governor);
+ free(first);
+ first = current;
+ }
+ return NULL;
+}
+
+void cpufreq_put_available_governors(struct cpufreq_available_governors *any)
+{
+ struct cpufreq_available_governors *tmp, *next;
+
+ if (!any)
+ return;
+
+ tmp = any->first;
+ while (tmp) {
+ next = tmp->next;
+ if (tmp->governor)
+ free(tmp->governor);
+ free(tmp);
+ tmp = next;
+ }
+}
+
+
+struct cpufreq_available_frequencies
+*cpufreq_get_available_frequencies(unsigned int cpu)
+{
+ struct cpufreq_available_frequencies *first = NULL;
+ struct cpufreq_available_frequencies *current = NULL;
+ char one_value[SYSFS_PATH_MAX];
+ char linebuf[MAX_LINE_LEN];
+ unsigned int pos, i;
+ unsigned int len;
+
+ len = sysfs_cpufreq_read_file(cpu, "scaling_available_frequencies",
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ pos = 0;
+ for (i = 0; i < len; i++) {
+ if (linebuf[i] == ' ' || linebuf[i] == '\n') {
+ if (i - pos < 2)
+ continue;
+ if (i - pos >= SYSFS_PATH_MAX)
+ goto error_out;
+ if (current) {
+ current->next = malloc(sizeof(*current));
+ if (!current->next)
+ goto error_out;
+ current = current->next;
+ } else {
+ first = malloc(sizeof(*first));
+ if (!first)
+ goto error_out;
+ current = first;
+ }
+ current->first = first;
+ current->next = NULL;
+
+ memcpy(one_value, linebuf + pos, i - pos);
+ one_value[i - pos] = '\0';
+ if (sscanf(one_value, "%lu", &current->frequency) != 1)
+ goto error_out;
+
+ pos = i + 1;
+ }
+ }
+
+ return first;
+
+ error_out:
+ while (first) {
+ current = first->next;
+ free(first);
+ first = current;
+ }
+ return NULL;
+}
+
+void cpufreq_put_available_frequencies(struct cpufreq_available_frequencies
+ *any) {
+ struct cpufreq_available_frequencies *tmp, *next;
+
+ if (!any)
+ return;
+
+ tmp = any->first;
+ while (tmp) {
+ next = tmp->next;
+ free(tmp);
+ tmp = next;
+ }
+}
+
+static struct cpufreq_affected_cpus *sysfs_get_cpu_list(unsigned int cpu,
+ const char *file)
+{
+ struct cpufreq_affected_cpus *first = NULL;
+ struct cpufreq_affected_cpus *current = NULL;
+ char one_value[SYSFS_PATH_MAX];
+ char linebuf[MAX_LINE_LEN];
+ unsigned int pos, i;
+ unsigned int len;
+
+ len = sysfs_cpufreq_read_file(cpu, file, linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ pos = 0;
+ for (i = 0; i < len; i++) {
+ if (i == len || linebuf[i] == ' ' || linebuf[i] == '\n') {
+ if (i - pos < 1)
+ continue;
+ if (i - pos >= SYSFS_PATH_MAX)
+ goto error_out;
+ if (current) {
+ current->next = malloc(sizeof(*current));
+ if (!current->next)
+ goto error_out;
+ current = current->next;
+ } else {
+ first = malloc(sizeof(*first));
+ if (!first)
+ goto error_out;
+ current = first;
+ }
+ current->first = first;
+ current->next = NULL;
+
+ memcpy(one_value, linebuf + pos, i - pos);
+ one_value[i - pos] = '\0';
+
+ if (sscanf(one_value, "%u", &current->cpu) != 1)
+ goto error_out;
+
+ pos = i + 1;
+ }
+ }
+
+ return first;
+
+ error_out:
+ while (first) {
+ current = first->next;
+ free(first);
+ first = current;
+ }
+ return NULL;
+}
+
+struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned int cpu)
+{
+ return sysfs_get_cpu_list(cpu, "affected_cpus");
+}
+
+void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *any)
+{
+ struct cpufreq_affected_cpus *tmp, *next;
+
+ if (!any)
+ return;
+
+ tmp = any->first;
+ while (tmp) {
+ next = tmp->next;
+ free(tmp);
+ tmp = next;
+ }
+}
+
+
+struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned int cpu)
+{
+ return sysfs_get_cpu_list(cpu, "related_cpus");
+}
+
+void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *any)
+{
+ cpufreq_put_affected_cpus(any);
+}
+
+static int verify_gov(char *new_gov, char *passed_gov)
+{
+ unsigned int i, j = 0;
+
+ if (!passed_gov || (strlen(passed_gov) > 19))
+ return -EINVAL;
+
+ strncpy(new_gov, passed_gov, 20);
+ for (i = 0; i < 20; i++) {
+ if (j) {
+ new_gov[i] = '\0';
+ continue;
+ }
+ if ((new_gov[i] >= 'a') && (new_gov[i] <= 'z'))
+ continue;
+
+ if ((new_gov[i] >= 'A') && (new_gov[i] <= 'Z'))
+ continue;
+
+ if (new_gov[i] == '-')
+ continue;
+
+ if (new_gov[i] == '_')
+ continue;
+
+ if (new_gov[i] == '\0') {
+ j = 1;
+ continue;
+ }
+ return -EINVAL;
+ }
+ new_gov[19] = '\0';
+ return 0;
+}
+
+int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy)
+{
+ char min[SYSFS_PATH_MAX];
+ char max[SYSFS_PATH_MAX];
+ char gov[SYSFS_PATH_MAX];
+ int ret;
+ unsigned long old_min;
+ int write_max_first;
+
+ if (!policy || !(policy->governor))
+ return -EINVAL;
+
+ if (policy->max < policy->min)
+ return -EINVAL;
+
+ if (verify_gov(gov, policy->governor))
+ return -EINVAL;
+
+ snprintf(min, SYSFS_PATH_MAX, "%lu", policy->min);
+ snprintf(max, SYSFS_PATH_MAX, "%lu", policy->max);
+
+ old_min = sysfs_cpufreq_get_one_value(cpu, SCALING_MIN_FREQ);
+ write_max_first = (old_min && (policy->max < old_min) ? 0 : 1);
+
+ if (write_max_first) {
+ ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
+ max, strlen(max));
+ if (ret)
+ return ret;
+ }
+
+ ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ, min,
+ strlen(min));
+ if (ret)
+ return ret;
+
+ if (!write_max_first) {
+ ret = sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
+ max, strlen(max));
+ if (ret)
+ return ret;
+ }
+
+ return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
+ gov, strlen(gov));
+}
+
+
+int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq)
+{
+ char value[SYSFS_PATH_MAX];
+
+ snprintf(value, SYSFS_PATH_MAX, "%lu", min_freq);
+
+ return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MIN_FREQ,
+ value, strlen(value));
+}
+
+
+int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq)
+{
+ char value[SYSFS_PATH_MAX];
+
+ snprintf(value, SYSFS_PATH_MAX, "%lu", max_freq);
+
+ return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_MAX_FREQ,
+ value, strlen(value));
+}
+
+int cpufreq_modify_policy_governor(unsigned int cpu, char *governor)
+{
+ char new_gov[SYSFS_PATH_MAX];
+
+ if ((!governor) || (strlen(governor) > 19))
+ return -EINVAL;
+
+ if (verify_gov(new_gov, governor))
+ return -EINVAL;
+
+ return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_GOVERNOR,
+ new_gov, strlen(new_gov));
+}
+
+int cpufreq_set_frequency(unsigned int cpu, unsigned long target_frequency)
+{
+ struct cpufreq_policy *pol = cpufreq_get_policy(cpu);
+ char userspace_gov[] = "userspace";
+ char freq[SYSFS_PATH_MAX];
+ int ret;
+
+ if (!pol)
+ return -ENODEV;
+
+ if (strncmp(pol->governor, userspace_gov, 9) != 0) {
+ ret = cpufreq_modify_policy_governor(cpu, userspace_gov);
+ if (ret) {
+ cpufreq_put_policy(pol);
+ return ret;
+ }
+ }
+
+ cpufreq_put_policy(pol);
+
+ snprintf(freq, SYSFS_PATH_MAX, "%lu", target_frequency);
+
+ return sysfs_cpufreq_write_one_value(cpu, WRITE_SCALING_SET_SPEED,
+ freq, strlen(freq));
+}
+
+struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
+ unsigned long long *total_time)
+{
+ struct cpufreq_stats *first = NULL;
+ struct cpufreq_stats *current = NULL;
+ char one_value[SYSFS_PATH_MAX];
+ char linebuf[MAX_LINE_LEN];
+ unsigned int pos, i;
+ unsigned int len;
+
+ len = sysfs_cpufreq_read_file(cpu, "stats/time_in_state",
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ *total_time = 0;
+ pos = 0;
+ for (i = 0; i < len; i++) {
+ if (i == strlen(linebuf) || linebuf[i] == '\n') {
+ if (i - pos < 2)
+ continue;
+ if ((i - pos) >= SYSFS_PATH_MAX)
+ goto error_out;
+ if (current) {
+ current->next = malloc(sizeof(*current));
+ if (!current->next)
+ goto error_out;
+ current = current->next;
+ } else {
+ first = malloc(sizeof(*first));
+ if (!first)
+ goto error_out;
+ current = first;
+ }
+ current->first = first;
+ current->next = NULL;
+
+ memcpy(one_value, linebuf + pos, i - pos);
+ one_value[i - pos] = '\0';
+ if (sscanf(one_value, "%lu %llu",
+ &current->frequency,
+ &current->time_in_state) != 2)
+ goto error_out;
+
+ *total_time = *total_time + current->time_in_state;
+ pos = i + 1;
+ }
+ }
+
+ return first;
+
+ error_out:
+ while (first) {
+ current = first->next;
+ free(first);
+ first = current;
+ }
+ return NULL;
+}
+
+void cpufreq_put_stats(struct cpufreq_stats *any)
+{
+ struct cpufreq_stats *tmp, *next;
+
+ if (!any)
+ return;
+
+ tmp = any->first;
+ while (tmp) {
+ next = tmp->next;
+ free(tmp);
+ tmp = next;
+ }
+}
+
+unsigned long cpufreq_get_transitions(unsigned int cpu)
+{
+ return sysfs_cpufreq_get_one_value(cpu, STATS_NUM_TRANSITIONS);
+}
diff --git a/tools/power/cpupower/lib/cpufreq.h b/tools/power/cpupower/lib/cpufreq.h
new file mode 100644
index 000000000..60beaf5ed
--- /dev/null
+++ b/tools/power/cpupower/lib/cpufreq.h
@@ -0,0 +1,212 @@
+/*
+ * cpufreq.h - definitions for libcpufreq
+ *
+ * Copyright (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __CPUPOWER_CPUFREQ_H__
+#define __CPUPOWER_CPUFREQ_H__
+
+struct cpufreq_policy {
+ unsigned long min;
+ unsigned long max;
+ char *governor;
+};
+
+struct cpufreq_available_governors {
+ char *governor;
+ struct cpufreq_available_governors *next;
+ struct cpufreq_available_governors *first;
+};
+
+struct cpufreq_available_frequencies {
+ unsigned long frequency;
+ struct cpufreq_available_frequencies *next;
+ struct cpufreq_available_frequencies *first;
+};
+
+
+struct cpufreq_affected_cpus {
+ unsigned int cpu;
+ struct cpufreq_affected_cpus *next;
+ struct cpufreq_affected_cpus *first;
+};
+
+struct cpufreq_stats {
+ unsigned long frequency;
+ unsigned long long time_in_state;
+ struct cpufreq_stats *next;
+ struct cpufreq_stats *first;
+};
+
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* determine current CPU frequency
+ * - _kernel variant means kernel's opinion of CPU frequency
+ * - _hardware variant means actual hardware CPU frequency,
+ * which is only available to root.
+ *
+ * returns 0 on failure, else frequency in kHz.
+ */
+
+unsigned long cpufreq_get_freq_kernel(unsigned int cpu);
+
+unsigned long cpufreq_get_freq_hardware(unsigned int cpu);
+
+#define cpufreq_get(cpu) cpufreq_get_freq_kernel(cpu);
+
+
+/* determine CPU transition latency
+ *
+ * returns 0 on failure, else transition latency in 10^(-9) s = nanoseconds
+ */
+unsigned long cpufreq_get_transition_latency(unsigned int cpu);
+
+
+/* determine hardware CPU frequency limits
+ *
+ * These may be limited further by thermal, energy or other
+ * considerations by cpufreq policy notifiers in the kernel.
+ */
+
+int cpufreq_get_hardware_limits(unsigned int cpu,
+ unsigned long *min,
+ unsigned long *max);
+
+
+/* determine CPUfreq driver used
+ *
+ * Remember to call cpufreq_put_driver when no longer needed
+ * to avoid memory leakage, please.
+ */
+
+char *cpufreq_get_driver(unsigned int cpu);
+
+void cpufreq_put_driver(char *ptr);
+
+
+/* determine CPUfreq policy currently used
+ *
+ * Remember to call cpufreq_put_policy when no longer needed
+ * to avoid memory leakage, please.
+ */
+
+
+struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu);
+
+void cpufreq_put_policy(struct cpufreq_policy *policy);
+
+
+/* determine CPUfreq governors currently available
+ *
+ * may be modified by modprobe'ing or rmmod'ing other governors. Please
+ * free allocated memory by calling cpufreq_put_available_governors
+ * after use.
+ */
+
+
+struct cpufreq_available_governors
+*cpufreq_get_available_governors(unsigned int cpu);
+
+void cpufreq_put_available_governors(
+ struct cpufreq_available_governors *first);
+
+
+/* determine CPU frequency states available
+ *
+ * Only present on _some_ ->target() cpufreq drivers. For information purposes
+ * only. Please free allocated memory by calling
+ * cpufreq_put_available_frequencies after use.
+ */
+
+struct cpufreq_available_frequencies
+*cpufreq_get_available_frequencies(unsigned int cpu);
+
+void cpufreq_put_available_frequencies(
+ struct cpufreq_available_frequencies *first);
+
+
+/* determine affected CPUs
+ *
+ * Remember to call cpufreq_put_affected_cpus when no longer needed
+ * to avoid memory leakage, please.
+ */
+
+struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned
+ int cpu);
+
+void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first);
+
+
+/* determine related CPUs
+ *
+ * Remember to call cpufreq_put_related_cpus when no longer needed
+ * to avoid memory leakage, please.
+ */
+
+struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned
+ int cpu);
+
+void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first);
+
+
+/* determine stats for cpufreq subsystem
+ *
+ * This is not available in all kernel versions or configurations.
+ */
+
+struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
+ unsigned long long *total_time);
+
+void cpufreq_put_stats(struct cpufreq_stats *stats);
+
+unsigned long cpufreq_get_transitions(unsigned int cpu);
+
+
+/* set new cpufreq policy
+ *
+ * Tries to set the passed policy as new policy as close as possible,
+ * but results may differ depending e.g. on governors being available.
+ */
+
+int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy);
+
+
+/* modify a policy by only changing min/max freq or governor
+ *
+ * Does not check whether result is what was intended.
+ */
+
+int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq);
+int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq);
+int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
+
+
+/* set a specific frequency
+ *
+ * Does only work if userspace governor can be used and no external
+ * interference (other calls to this function or to set/modify_policy)
+ * occurs. Also does not work on ->range() cpufreq drivers.
+ */
+
+int cpufreq_set_frequency(unsigned int cpu,
+ unsigned long target_frequency);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CPUFREQ_H */
diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c
new file mode 100644
index 000000000..852d25462
--- /dev/null
+++ b/tools/power/cpupower/lib/cpuidle.c
@@ -0,0 +1,380 @@
+/*
+ * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de>
+ * (C) 2011 Thomas Renninger <trenn@novell.com> Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "cpuidle.h"
+#include "cpupower_intern.h"
+
+/*
+ * helper function to check whether a file under "../cpuX/cpuidle/stateX/" dir
+ * exists.
+ * For example the functionality to disable c-states was introduced in later
+ * kernel versions, this function can be used to explicitly check for this
+ * feature.
+ *
+ * returns 1 if the file exists, 0 otherwise.
+ */
+static
+unsigned int cpuidle_state_file_exists(unsigned int cpu,
+ unsigned int idlestate,
+ const char *fname)
+{
+ char path[SYSFS_PATH_MAX];
+ struct stat statbuf;
+
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+ cpu, idlestate, fname);
+ if (stat(path, &statbuf) != 0)
+ return 0;
+ return 1;
+}
+
+/*
+ * helper function to read file from /sys into given buffer
+ * fname is a relative path under "cpuX/cpuidle/stateX/" dir
+ * cstates starting with 0, C0 is not counted as cstate.
+ * This means if you want C1 info, pass 0 as idlestate param
+ */
+static
+unsigned int cpuidle_state_read_file(unsigned int cpu,
+ unsigned int idlestate,
+ const char *fname, char *buf,
+ size_t buflen)
+{
+ char path[SYSFS_PATH_MAX];
+ int fd;
+ ssize_t numread;
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+ cpu, idlestate, fname);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ numread = read(fd, buf, buflen - 1);
+ if (numread < 1) {
+ close(fd);
+ return 0;
+ }
+
+ buf[numread] = '\0';
+ close(fd);
+
+ return (unsigned int) numread;
+}
+
+/*
+ * helper function to write a new value to a /sys file
+ * fname is a relative path under "../cpuX/cpuidle/cstateY/" dir
+ *
+ * Returns the number of bytes written or 0 on error
+ */
+static
+unsigned int cpuidle_state_write_file(unsigned int cpu,
+ unsigned int idlestate,
+ const char *fname,
+ const char *value, size_t len)
+{
+ char path[SYSFS_PATH_MAX];
+ int fd;
+ ssize_t numwrite;
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+ cpu, idlestate, fname);
+
+ fd = open(path, O_WRONLY);
+ if (fd == -1)
+ return 0;
+
+ numwrite = write(fd, value, len);
+ if (numwrite < 1) {
+ close(fd);
+ return 0;
+ }
+
+ close(fd);
+
+ return (unsigned int) numwrite;
+}
+
+/* read access to files which contain one numeric value */
+
+enum idlestate_value {
+ IDLESTATE_USAGE,
+ IDLESTATE_POWER,
+ IDLESTATE_LATENCY,
+ IDLESTATE_TIME,
+ IDLESTATE_DISABLE,
+ MAX_IDLESTATE_VALUE_FILES
+};
+
+static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = {
+ [IDLESTATE_USAGE] = "usage",
+ [IDLESTATE_POWER] = "power",
+ [IDLESTATE_LATENCY] = "latency",
+ [IDLESTATE_TIME] = "time",
+ [IDLESTATE_DISABLE] = "disable",
+};
+
+static
+unsigned long long cpuidle_state_get_one_value(unsigned int cpu,
+ unsigned int idlestate,
+ enum idlestate_value which)
+{
+ unsigned long long value;
+ unsigned int len;
+ char linebuf[MAX_LINE_LEN];
+ char *endp;
+
+ if (which >= MAX_IDLESTATE_VALUE_FILES)
+ return 0;
+
+ len = cpuidle_state_read_file(cpu, idlestate,
+ idlestate_value_files[which],
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return 0;
+
+ value = strtoull(linebuf, &endp, 0);
+
+ if (endp == linebuf || errno == ERANGE)
+ return 0;
+
+ return value;
+}
+
+/* read access to files which contain one string */
+
+enum idlestate_string {
+ IDLESTATE_DESC,
+ IDLESTATE_NAME,
+ MAX_IDLESTATE_STRING_FILES
+};
+
+static const char *idlestate_string_files[MAX_IDLESTATE_STRING_FILES] = {
+ [IDLESTATE_DESC] = "desc",
+ [IDLESTATE_NAME] = "name",
+};
+
+
+static char *cpuidle_state_get_one_string(unsigned int cpu,
+ unsigned int idlestate,
+ enum idlestate_string which)
+{
+ char linebuf[MAX_LINE_LEN];
+ char *result;
+ unsigned int len;
+
+ if (which >= MAX_IDLESTATE_STRING_FILES)
+ return NULL;
+
+ len = cpuidle_state_read_file(cpu, idlestate,
+ idlestate_string_files[which],
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ result = strdup(linebuf);
+ if (result == NULL)
+ return NULL;
+
+ if (result[strlen(result) - 1] == '\n')
+ result[strlen(result) - 1] = '\0';
+
+ return result;
+}
+
+/*
+ * Returns:
+ * 1 if disabled
+ * 0 if enabled
+ * -1 if idlestate is not available
+ * -2 if disabling is not supported by the kernel
+ */
+int cpuidle_is_state_disabled(unsigned int cpu,
+ unsigned int idlestate)
+{
+ if (cpuidle_state_count(cpu) <= idlestate)
+ return -1;
+
+ if (!cpuidle_state_file_exists(cpu, idlestate,
+ idlestate_value_files[IDLESTATE_DISABLE]))
+ return -2;
+ return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_DISABLE);
+}
+
+/*
+ * Pass 1 as last argument to disable or 0 to enable the state
+ * Returns:
+ * 0 on success
+ * negative values on error, for example:
+ * -1 if idlestate is not available
+ * -2 if disabling is not supported by the kernel
+ * -3 No write access to disable/enable C-states
+ */
+int cpuidle_state_disable(unsigned int cpu,
+ unsigned int idlestate,
+ unsigned int disable)
+{
+ char value[SYSFS_PATH_MAX];
+ int bytes_written;
+
+ if (cpuidle_state_count(cpu) <= idlestate)
+ return -1;
+
+ if (!cpuidle_state_file_exists(cpu, idlestate,
+ idlestate_value_files[IDLESTATE_DISABLE]))
+ return -2;
+
+ snprintf(value, SYSFS_PATH_MAX, "%u", disable);
+
+ bytes_written = cpuidle_state_write_file(cpu, idlestate, "disable",
+ value, sizeof(disable));
+ if (bytes_written)
+ return 0;
+ return -3;
+}
+
+unsigned long cpuidle_state_latency(unsigned int cpu,
+ unsigned int idlestate)
+{
+ return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_LATENCY);
+}
+
+unsigned long cpuidle_state_usage(unsigned int cpu,
+ unsigned int idlestate)
+{
+ return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_USAGE);
+}
+
+unsigned long long cpuidle_state_time(unsigned int cpu,
+ unsigned int idlestate)
+{
+ return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_TIME);
+}
+
+char *cpuidle_state_name(unsigned int cpu, unsigned int idlestate)
+{
+ return cpuidle_state_get_one_string(cpu, idlestate, IDLESTATE_NAME);
+}
+
+char *cpuidle_state_desc(unsigned int cpu, unsigned int idlestate)
+{
+ return cpuidle_state_get_one_string(cpu, idlestate, IDLESTATE_DESC);
+}
+
+/*
+ * Returns number of supported C-states of CPU core cpu
+ * Negativ in error case
+ * Zero if cpuidle does not export any C-states
+ */
+unsigned int cpuidle_state_count(unsigned int cpu)
+{
+ char file[SYSFS_PATH_MAX];
+ struct stat statbuf;
+ int idlestates = 1;
+
+
+ snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpuidle");
+ if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))
+ return 0;
+
+ snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpu%u/cpuidle/state0", cpu);
+ if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))
+ return 0;
+
+ while (stat(file, &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) {
+ snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU
+ "cpu%u/cpuidle/state%d", cpu, idlestates);
+ idlestates++;
+ }
+ idlestates--;
+ return idlestates;
+}
+
+/* CPUidle general /sys/devices/system/cpu/cpuidle/ sysfs access ********/
+
+/*
+ * helper function to read file from /sys into given buffer
+ * fname is a relative path under "cpu/cpuidle/" dir
+ */
+static unsigned int sysfs_cpuidle_read_file(const char *fname, char *buf,
+ size_t buflen)
+{
+ char path[SYSFS_PATH_MAX];
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpuidle/%s", fname);
+
+ return cpupower_read_sysfs(path, buf, buflen);
+}
+
+
+
+/* read access to files which contain one string */
+
+enum cpuidle_string {
+ CPUIDLE_GOVERNOR,
+ CPUIDLE_GOVERNOR_RO,
+ CPUIDLE_DRIVER,
+ MAX_CPUIDLE_STRING_FILES
+};
+
+static const char *cpuidle_string_files[MAX_CPUIDLE_STRING_FILES] = {
+ [CPUIDLE_GOVERNOR] = "current_governor",
+ [CPUIDLE_GOVERNOR_RO] = "current_governor_ro",
+ [CPUIDLE_DRIVER] = "current_driver",
+};
+
+
+static char *sysfs_cpuidle_get_one_string(enum cpuidle_string which)
+{
+ char linebuf[MAX_LINE_LEN];
+ char *result;
+ unsigned int len;
+
+ if (which >= MAX_CPUIDLE_STRING_FILES)
+ return NULL;
+
+ len = sysfs_cpuidle_read_file(cpuidle_string_files[which],
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ result = strdup(linebuf);
+ if (result == NULL)
+ return NULL;
+
+ if (result[strlen(result) - 1] == '\n')
+ result[strlen(result) - 1] = '\0';
+
+ return result;
+}
+
+char *cpuidle_get_governor(void)
+{
+ char *tmp = sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR_RO);
+ if (!tmp)
+ return sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR);
+ else
+ return tmp;
+}
+
+char *cpuidle_get_driver(void)
+{
+ return sysfs_cpuidle_get_one_string(CPUIDLE_DRIVER);
+}
+/* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */
diff --git a/tools/power/cpupower/lib/cpuidle.h b/tools/power/cpupower/lib/cpuidle.h
new file mode 100644
index 000000000..2e10fead2
--- /dev/null
+++ b/tools/power/cpupower/lib/cpuidle.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CPUPOWER_CPUIDLE_H__
+#define __CPUPOWER_CPUIDLE_H__
+
+int cpuidle_is_state_disabled(unsigned int cpu,
+ unsigned int idlestate);
+int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate,
+ unsigned int disable);
+unsigned long cpuidle_state_latency(unsigned int cpu,
+ unsigned int idlestate);
+unsigned long cpuidle_state_usage(unsigned int cpu,
+ unsigned int idlestate);
+unsigned long long cpuidle_state_time(unsigned int cpu,
+ unsigned int idlestate);
+char *cpuidle_state_name(unsigned int cpu,
+ unsigned int idlestate);
+char *cpuidle_state_desc(unsigned int cpu,
+ unsigned int idlestate);
+unsigned int cpuidle_state_count(unsigned int cpu);
+
+char *cpuidle_get_governor(void);
+char *cpuidle_get_driver(void);
+
+#endif /* __CPUPOWER_HELPERS_SYSFS_H__ */
diff --git a/tools/power/cpupower/lib/cpupower.c b/tools/power/cpupower/lib/cpupower.c
new file mode 100644
index 000000000..9711d628b
--- /dev/null
+++ b/tools/power/cpupower/lib/cpupower.c
@@ -0,0 +1,192 @@
+/*
+ * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include "cpupower.h"
+#include "cpupower_intern.h"
+
+unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numread;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ numread = read(fd, buf, buflen - 1);
+ if (numread < 1) {
+ close(fd);
+ return 0;
+ }
+
+ buf[numread] = '\0';
+ close(fd);
+
+ return (unsigned int) numread;
+}
+
+/*
+ * Detect whether a CPU is online
+ *
+ * Returns:
+ * 1 -> if CPU is online
+ * 0 -> if CPU is offline
+ * negative errno values in error case
+ */
+int cpupower_is_cpu_online(unsigned int cpu)
+{
+ char path[SYSFS_PATH_MAX];
+ int fd;
+ ssize_t numread;
+ unsigned long long value;
+ char linebuf[MAX_LINE_LEN];
+ char *endp;
+ struct stat statbuf;
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u", cpu);
+
+ if (stat(path, &statbuf) != 0)
+ return 0;
+
+ /*
+ * kernel without CONFIG_HOTPLUG_CPU
+ * -> cpuX directory exists, but not cpuX/online file
+ */
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/online", cpu);
+ if (stat(path, &statbuf) != 0)
+ return 1;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return -errno;
+
+ numread = read(fd, linebuf, MAX_LINE_LEN - 1);
+ if (numread < 1) {
+ close(fd);
+ return -EIO;
+ }
+ linebuf[numread] = '\0';
+ close(fd);
+
+ value = strtoull(linebuf, &endp, 0);
+ if (value > 1)
+ return -EINVAL;
+
+ return value;
+}
+
+/* returns -1 on failure, 0 on success */
+static int sysfs_topology_read_file(unsigned int cpu, const char *fname, int *result)
+{
+ char linebuf[MAX_LINE_LEN];
+ char *endp;
+ char path[SYSFS_PATH_MAX];
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/topology/%s",
+ cpu, fname);
+ if (cpupower_read_sysfs(path, linebuf, MAX_LINE_LEN) == 0)
+ return -1;
+ *result = strtol(linebuf, &endp, 0);
+ if (endp == linebuf || errno == ERANGE)
+ return -1;
+ return 0;
+}
+
+static int __compare(const void *t1, const void *t2)
+{
+ struct cpuid_core_info *top1 = (struct cpuid_core_info *)t1;
+ struct cpuid_core_info *top2 = (struct cpuid_core_info *)t2;
+ if (top1->pkg < top2->pkg)
+ return -1;
+ else if (top1->pkg > top2->pkg)
+ return 1;
+ else if (top1->core < top2->core)
+ return -1;
+ else if (top1->core > top2->core)
+ return 1;
+ else if (top1->cpu < top2->cpu)
+ return -1;
+ else if (top1->cpu > top2->cpu)
+ return 1;
+ else
+ return 0;
+}
+
+/*
+ * Returns amount of cpus, negative on error, cpu_top must be
+ * passed to cpu_topology_release to free resources
+ *
+ * Array is sorted after ->pkg, ->core, then ->cpu
+ */
+int get_cpu_topology(struct cpupower_topology *cpu_top)
+{
+ int cpu, last_pkg, cpus = sysconf(_SC_NPROCESSORS_CONF);
+
+ cpu_top->core_info = malloc(sizeof(struct cpuid_core_info) * cpus);
+ if (cpu_top->core_info == NULL)
+ return -ENOMEM;
+ cpu_top->pkgs = cpu_top->cores = 0;
+ for (cpu = 0; cpu < cpus; cpu++) {
+ cpu_top->core_info[cpu].cpu = cpu;
+ cpu_top->core_info[cpu].is_online = cpupower_is_cpu_online(cpu);
+ if(sysfs_topology_read_file(
+ cpu,
+ "physical_package_id",
+ &(cpu_top->core_info[cpu].pkg)) < 0) {
+ cpu_top->core_info[cpu].pkg = -1;
+ cpu_top->core_info[cpu].core = -1;
+ continue;
+ }
+ if(sysfs_topology_read_file(
+ cpu,
+ "core_id",
+ &(cpu_top->core_info[cpu].core)) < 0) {
+ cpu_top->core_info[cpu].pkg = -1;
+ cpu_top->core_info[cpu].core = -1;
+ continue;
+ }
+ }
+
+ qsort(cpu_top->core_info, cpus, sizeof(struct cpuid_core_info),
+ __compare);
+
+ /* Count the number of distinct pkgs values. This works
+ because the primary sort of the core_info struct was just
+ done by pkg value. */
+ last_pkg = cpu_top->core_info[0].pkg;
+ for(cpu = 1; cpu < cpus; cpu++) {
+ if (cpu_top->core_info[cpu].pkg != last_pkg &&
+ cpu_top->core_info[cpu].pkg != -1) {
+
+ last_pkg = cpu_top->core_info[cpu].pkg;
+ cpu_top->pkgs++;
+ }
+ }
+ if (!(cpu_top->core_info[0].pkg == -1))
+ cpu_top->pkgs++;
+
+ /* Intel's cores count is not consecutively numbered, there may
+ * be a core_id of 3, but none of 2. Assume there always is 0
+ * Get amount of cores by counting duplicates in a package
+ for (cpu = 0; cpu_top->core_info[cpu].pkg = 0 && cpu < cpus; cpu++) {
+ if (cpu_top->core_info[cpu].core == 0)
+ cpu_top->cores++;
+ */
+ return cpus;
+}
+
+void cpu_topology_release(struct cpupower_topology cpu_top)
+{
+ free(cpu_top.core_info);
+}
diff --git a/tools/power/cpupower/lib/cpupower.h b/tools/power/cpupower/lib/cpupower.h
new file mode 100644
index 000000000..e4e4292ea
--- /dev/null
+++ b/tools/power/cpupower/lib/cpupower.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CPUPOWER_CPUPOWER_H__
+#define __CPUPOWER_CPUPOWER_H__
+
+struct cpupower_topology {
+ /* Amount of CPU cores, packages and threads per core in the system */
+ unsigned int cores;
+ unsigned int pkgs;
+ unsigned int threads; /* per core */
+
+ /* Array gets mallocated with cores entries, holding per core info */
+ struct cpuid_core_info *core_info;
+};
+
+struct cpuid_core_info {
+ int pkg;
+ int core;
+ int cpu;
+
+ /* flags */
+ unsigned int is_online:1;
+};
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+int get_cpu_topology(struct cpupower_topology *cpu_top);
+void cpu_topology_release(struct cpupower_topology cpu_top);
+int cpupower_is_cpu_online(unsigned int cpu);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/power/cpupower/lib/cpupower_intern.h b/tools/power/cpupower/lib/cpupower_intern.h
new file mode 100644
index 000000000..4887c76d2
--- /dev/null
+++ b/tools/power/cpupower/lib/cpupower_intern.h
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define PATH_TO_CPU "/sys/devices/system/cpu/"
+#define MAX_LINE_LEN 4096
+#define SYSFS_PATH_MAX 255
+
+unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen);
diff --git a/tools/power/cpupower/man/cpupower-frequency-info.1 b/tools/power/cpupower/man/cpupower-frequency-info.1
new file mode 100644
index 000000000..6aa8d239d
--- /dev/null
+++ b/tools/power/cpupower/man/cpupower-frequency-info.1
@@ -0,0 +1,77 @@
+.TH "CPUPOWER\-FREQUENCY\-INFO" "1" "0.1" "" "cpupower Manual"
+.SH "NAME"
+.LP
+cpupower\-frequency\-info \- Utility to retrieve cpufreq kernel information
+.SH "SYNTAX"
+.LP
+cpupower [ \-c cpulist ] frequency\-info [\fIoptions\fP]
+.SH "DESCRIPTION"
+.LP
+A small tool which prints out cpufreq information helpful to developers and interested users.
+.SH "OPTIONS"
+.LP
+.TP
+\fB\-e\fR \fB\-\-debug\fR
+Prints out debug information.
+.TP
+\fB\-f\fR \fB\-\-freq\fR
+Get frequency the CPU currently runs at, according to the cpufreq core.
+.TP
+\fB\-w\fR \fB\-\-hwfreq\fR
+Get frequency the CPU currently runs at, by reading it from hardware (only available to root).
+.TP
+\fB\-l\fR \fB\-\-hwlimits\fR
+Determine the minimum and maximum CPU frequency allowed.
+.TP
+\fB\-d\fR \fB\-\-driver\fR
+Determines the used cpufreq kernel driver.
+.TP
+\fB\-p\fR \fB\-\-policy\fR
+Gets the currently used cpufreq policy.
+.TP
+\fB\-g\fR \fB\-\-governors\fR
+Determines available cpufreq governors.
+.TP
+\fB\-a\fR \fB\-\-related\-cpus\fR
+Determines which CPUs run at the same hardware frequency.
+.TP
+\fB\-a\fR \fB\-\-affected\-cpus\fR
+Determines which CPUs need to have their frequency coordinated by software.
+.TP
+\fB\-s\fR \fB\-\-stats\fR
+Shows cpufreq statistics if available.
+.TP
+\fB\-y\fR \fB\-\-latency\fR
+Determines the maximum latency on CPU frequency changes.
+.TP
+\fB\-o\fR \fB\-\-proc\fR
+Prints out information like provided by the /proc/cpufreq interface in 2.4. and early 2.6. kernels.
+.TP
+\fB\-m\fR \fB\-\-human\fR
+human\-readable output for the \-f, \-w, \-s and \-y parameters.
+.TP
+\fB\-n\fR \fB\-\-no-rounding\fR
+Output frequencies and latencies without rounding off values.
+.TP
+.SH "REMARKS"
+.LP
+By default only values of core zero are displayed. How to display settings of
+other cores is described in the cpupower(1) manpage in the \-\-cpu option section.
+.LP
+You can't specify more than one of the output specific options \-o \-e \-a \-g \-p \-d \-l \-w \-f \-y.
+.LP
+You also can't specify the \-o option combined with the \-c option.
+.SH "FILES"
+.nf
+\fI/sys/devices/system/cpu/cpu*/cpufreq/\fP
+\fI/proc/cpufreq\fP (deprecated)
+\fI/proc/sys/cpu/\fP (deprecated)
+.fi
+.SH "AUTHORS"
+.nf
+Dominik Brodowski <linux@brodo.de> \- author
+Mattia Dongili<malattia@gmail.com> \- first autolibtoolization
+.fi
+.SH "SEE ALSO"
+.LP
+cpupower\-frequency\-set(1), cpupower(1)
diff --git a/tools/power/cpupower/man/cpupower-frequency-set.1 b/tools/power/cpupower/man/cpupower-frequency-set.1
new file mode 100644
index 000000000..b50570221
--- /dev/null
+++ b/tools/power/cpupower/man/cpupower-frequency-set.1
@@ -0,0 +1,52 @@
+.TH "CPUPOWER\-FREQUENCY\-SET" "1" "0.1" "" "cpupower Manual"
+.SH "NAME"
+.LP
+cpupower\-frequency\-set \- A small tool which allows to modify cpufreq settings.
+.SH "SYNTAX"
+.LP
+cpupower [ \-c cpu ] frequency\-set [\fIoptions\fP]
+.SH "DESCRIPTION"
+.LP
+cpupower frequency\-set allows you to modify cpufreq settings without having to type e.g. "/sys/devices/system/cpu/cpu0/cpufreq/scaling_set_speed" all the time.
+.SH "OPTIONS"
+.LP
+.TP
+\fB\-d\fR \fB\-\-min\fR <FREQ>
+new minimum CPU frequency the governor may select.
+.TP
+\fB\-u\fR \fB\-\-max\fR <FREQ>
+new maximum CPU frequency the governor may select.
+.TP
+\fB\-g\fR \fB\-\-governor\fR <GOV>
+new cpufreq governor.
+.TP
+\fB\-f\fR \fB\-\-freq\fR <FREQ>
+specific frequency to be set. Requires userspace governor to be available and loaded.
+.TP
+\fB\-r\fR \fB\-\-related\fR
+modify all hardware-related CPUs at the same time
+.TP
+.SH "REMARKS"
+.LP
+By default values are applied on all cores. How to modify single core
+configurations is described in the cpupower(1) manpage in the \-\-cpu option section.
+.LP
+The \-f FREQ, \-\-freq FREQ parameter cannot be combined with any other parameter.
+.LP
+FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz by postfixing the value with the wanted unit name, without any space (frequency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).
+.LP
+On Linux kernels up to 2.6.29, the \-r or \-\-related parameter is ignored.
+.SH "FILES"
+.nf
+\fI/sys/devices/system/cpu/cpu*/cpufreq/\fP
+\fI/proc/cpufreq\fP (deprecated)
+\fI/proc/sys/cpu/\fP (deprecated)
+.fi
+.SH "AUTHORS"
+.nf
+Dominik Brodowski <linux@brodo.de> \- author
+Mattia Dongili<malattia@gmail.com> \- first autolibtoolization
+.fi
+.SH "SEE ALSO"
+.LP
+cpupower\-frequency\-info(1), cpupower(1)
diff --git a/tools/power/cpupower/man/cpupower-idle-info.1 b/tools/power/cpupower/man/cpupower-idle-info.1
new file mode 100644
index 000000000..80a1311fa
--- /dev/null
+++ b/tools/power/cpupower/man/cpupower-idle-info.1
@@ -0,0 +1,91 @@
+.TH "CPUPOWER-IDLE-INFO" "1" "0.1" "" "cpupower Manual"
+.SH "NAME"
+.LP
+cpupower\-idle\-info \- Utility to retrieve cpu idle kernel information
+.SH "SYNTAX"
+.LP
+cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP]
+.SH "DESCRIPTION"
+.LP
+A tool which prints out per cpu idle information helpful to developers and interested users.
+.SH "OPTIONS"
+.LP
+.TP
+\fB\-f\fR \fB\-\-silent\fR
+Only print a summary of all available C-states in the system.
+.TP
+\fB\-e\fR \fB\-\-proc\fR
+deprecated.
+Prints out idle information in old /proc/acpi/processor/*/power format. This
+interface has been removed from the kernel for quite some time, do not let
+further code depend on this option, best do not use it.
+
+.SH IDLE\-INFO DESCRIPTIONS
+CPU sleep state statistics and descriptions are retrieved from sysfs files,
+exported by the cpuidle kernel subsystem. The kernel only updates these
+statistics when it enters or leaves an idle state, therefore on a very idle or
+a very busy system, these statistics may not be accurate. They still provide a
+good overview about the usage and availability of processor sleep states on
+the platform.
+
+Be aware that the sleep states as exported by the hardware or BIOS and used by
+the Linux kernel may not exactly reflect the capabilities of the
+processor. This often is the case on the X86 architecture when the acpi_idle
+driver is used. It is also possible that the hardware overrules the kernel
+requests, due to internal activity monitors or other reasons.
+On recent X86 platforms it is often possible to read out hardware registers
+which monitor the duration of sleep states the processor resided in. The
+cpupower monitor tool (cpupower\-monitor(1)) can be used to show real sleep
+state residencies. Please refer to the architecture specific description
+section below.
+
+.SH IDLE\-INFO ARCHITECTURE SPECIFIC DESCRIPTIONS
+.SS "X86"
+POLL idle state
+
+If cpuidle is active, X86 platforms have one special idle state.
+The POLL idle state is not a real idle state, it does not save any
+power. Instead, a busy\-loop is executed doing nothing for a short period of
+time. This state is used if the kernel knows that work has to be processed
+very soon and entering any real hardware idle state may result in a slight
+performance penalty.
+
+There exist two different cpuidle drivers on the X86 architecture platform:
+
+"acpi_idle" cpuidle driver
+
+The acpi_idle cpuidle driver retrieves available sleep states (C\-states) from
+the ACPI BIOS tables (from the _CST ACPI function on recent platforms or from
+the FADT BIOS table on older ones).
+The C1 state is not retrieved from ACPI tables. If the C1 state is entered,
+the kernel will call the hlt instruction (or mwait on Intel).
+
+"intel_idle" cpuidle driver
+
+In kernel 2.6.36 the intel_idle driver was introduced.
+It only serves recent Intel CPUs (Nehalem, Westmere, Sandybridge, Atoms or
+newer). On older Intel CPUs the acpi_idle driver is still used (if the BIOS
+provides C\-state ACPI tables).
+The intel_idle driver knows the sleep state capabilities of the processor and
+ignores ACPI BIOS exported processor sleep states tables.
+
+.SH "REMARKS"
+.LP
+By default only values of core zero are displayed. How to display settings of
+other cores is described in the cpupower(1) manpage in the \-\-cpu option
+section.
+.SH REFERENCES
+http://www.acpi.info/spec.htm
+.SH "FILES"
+.nf
+\fI/sys/devices/system/cpu/cpu*/cpuidle/state*\fP
+\fI/sys/devices/system/cpu/cpuidle/*\fP
+.fi
+.SH "AUTHORS"
+.nf
+Thomas Renninger <trenn@suse.de>
+.fi
+.SH "SEE ALSO"
+.LP
+cpupower(1), cpupower\-monitor(1), cpupower\-info(1), cpupower\-set(1),
+cpupower\-idle\-set(1)
diff --git a/tools/power/cpupower/man/cpupower-idle-set.1 b/tools/power/cpupower/man/cpupower-idle-set.1
new file mode 100644
index 000000000..21916cff7
--- /dev/null
+++ b/tools/power/cpupower/man/cpupower-idle-set.1
@@ -0,0 +1,79 @@
+.TH "CPUPOWER-IDLE-SET" "1" "0.1" "" "cpupower Manual"
+.SH "NAME"
+.LP
+cpupower\-idle\-set \- Utility to set cpu idle state specific kernel options
+.SH "SYNTAX"
+.LP
+cpupower [ \-c cpulist ] idle\-info [\fIoptions\fP]
+.SH "DESCRIPTION"
+.LP
+The cpupower idle\-set subcommand allows to set cpu idle, also called cpu
+sleep state, specific options offered by the kernel. One example is disabling
+sleep states. This can be handy for power vs performance tuning.
+.SH "OPTIONS"
+.LP
+.TP
+\fB\-d\fR \fB\-\-disable\fR <STATE_NO>
+Disable a specific processor sleep state.
+.TP
+\fB\-e\fR \fB\-\-enable\fR <STATE_NO>
+Enable a specific processor sleep state.
+.TP
+\fB\-D\fR \fB\-\-disable-by-latency\fR <LATENCY>
+Disable all idle states with a equal or higher latency than <LATENCY>.
+
+Enable all idle states with a latency lower than <LATENCY>.
+.TP
+\fB\-E\fR \fB\-\-enable-all\fR
+Enable all idle states if not enabled already.
+
+.SH "REMARKS"
+.LP
+Cpuidle Governors Policy on Disabling Sleep States
+
+.RS 4
+Depending on the used cpuidle governor, implementing the kernel policy
+how to choose sleep states, subsequent sleep states on this core, might get
+disabled as well.
+
+There are two cpuidle governors ladder and menu. While the ladder
+governor is always available, if CONFIG_CPU_IDLE is selected, the
+menu governor additionally requires CONFIG_NO_HZ.
+
+The behavior and the effect of the disable variable depends on the
+implementation of a particular governor. In the ladder governor, for
+example, it is not coherent, i.e. if one is disabling a light state,
+then all deeper states are disabled as well. Likewise, if one enables a
+deep state but a lighter state still is disabled, then this has no effect.
+.RE
+.LP
+Disabling the Lightest Sleep State may not have any Affect
+
+.RS 4
+If criteria are not met to enter deeper sleep states and the lightest sleep
+state is chosen when idle, the kernel may still enter this sleep state,
+irrespective of whether it is disabled or not. This is also reflected in
+the usage count of the disabled sleep state when using the cpupower idle-info
+command.
+.RE
+.LP
+Selecting specific CPU Cores
+
+.RS 4
+By default processor sleep states of all CPU cores are set. Please refer
+to the cpupower(1) manpage in the \-\-cpu option section how to disable
+C-states of specific cores.
+.RE
+.SH "FILES"
+.nf
+\fI/sys/devices/system/cpu/cpu*/cpuidle/state*\fP
+\fI/sys/devices/system/cpu/cpuidle/*\fP
+.fi
+.SH "AUTHORS"
+.nf
+Thomas Renninger <trenn@suse.de>
+.fi
+.SH "SEE ALSO"
+.LP
+cpupower(1), cpupower\-monitor(1), cpupower\-info(1), cpupower\-set(1),
+cpupower\-idle\-info(1)
diff --git a/tools/power/cpupower/man/cpupower-info.1 b/tools/power/cpupower/man/cpupower-info.1
new file mode 100644
index 000000000..340bcd0be
--- /dev/null
+++ b/tools/power/cpupower/man/cpupower-info.1
@@ -0,0 +1,19 @@
+.TH CPUPOWER\-INFO "1" "22/02/2011" "" "cpupower Manual"
+.SH NAME
+cpupower\-info \- Shows processor power related kernel or hardware configurations
+.SH SYNOPSIS
+.ft B
+.B cpupower info [ \-b ]
+
+.SH DESCRIPTION
+\fBcpupower info \fP shows kernel configurations or processor hardware
+registers affecting processor power saving policies.
+
+Some options are platform wide, some affect single cores. By default values
+of core zero are displayed only. cpupower --cpu all cpuinfo will show the
+settings of all cores, see cpupower(1) how to choose specific cores.
+
+.SH "SEE ALSO"
+Options are described in detail in:
+
+cpupower(1), cpupower-set(1)
diff --git a/tools/power/cpupower/man/cpupower-monitor.1 b/tools/power/cpupower/man/cpupower-monitor.1
new file mode 100644
index 000000000..914cbb9d9
--- /dev/null
+++ b/tools/power/cpupower/man/cpupower-monitor.1
@@ -0,0 +1,198 @@
+.TH CPUPOWER\-MONITOR "1" "22/02/2011" "" "cpupower Manual"
+.SH NAME
+cpupower\-monitor \- Report processor frequency and idle statistics
+.SH SYNOPSIS
+.ft B
+.B cpupower monitor
+.RB "\-l"
+
+.B cpupower monitor
+.RB [ -c ] [ "\-m <mon1>," [ "<mon2>,..." ] ]
+.RB [ "\-i seconds" ]
+.br
+.B cpupower monitor
+.RB [ -c ][ "\-m <mon1>," [ "<mon2>,..." ] ]
+.RB command
+.br
+.SH DESCRIPTION
+\fBcpupower-monitor \fP reports processor topology, frequency and idle power
+state statistics. Either \fBcommand\fP is forked and
+statistics are printed upon its completion, or statistics are printed periodically.
+
+\fBcpupower-monitor \fP implements independent processor sleep state and
+frequency counters. Some are retrieved from kernel statistics, some are
+directly reading out hardware registers. Use \-l to get an overview which are
+supported on your system.
+
+.SH Options
+.PP
+\-l
+.RS 4
+List available monitors on your system. Additional details about each monitor
+are shown:
+.RS 2
+.IP \(bu
+The name in quotation marks which can be passed to the \-m parameter.
+.IP \(bu
+The number of different counters the monitor supports in brackets.
+.IP \(bu
+The amount of time in seconds the counters might overflow, due to
+implementation constraints.
+.IP \(bu
+The name and a description of each counter and its processor hierarchy level
+coverage in square brackets:
+.RS 4
+.IP \(bu
+[T] \-> Thread
+.IP \(bu
+[C] \-> Core
+.IP \(bu
+[P] \-> Processor Package (Socket)
+.IP \(bu
+[M] \-> Machine/Platform wide counter
+.RE
+.RE
+.RE
+.PP
+\-m <mon1>,<mon2>,...
+.RS 4
+Only display specific monitors. Use the monitor string(s) provided by \-l option.
+.RE
+.PP
+\-i seconds
+.RS 4
+Measure intervall.
+.RE
+.PP
+\-c
+.RS 4
+Schedule the process on every core before starting and ending measuring.
+This could be needed for the Idle_Stats monitor when no other MSR based
+monitor (has to be run on the core that is measured) is run in parallel.
+This is to wake up the processors from deeper sleep states and let the
+kernel re
+-account its cpuidle (C-state) information before reading the
+cpuidle timings from sysfs.
+.RE
+.PP
+command
+.RS 4
+Measure idle and frequency characteristics of an arbitrary command/workload.
+The executable \fBcommand\fP is forked and upon its exit, statistics gathered since it was
+forked are displayed.
+.RE
+.PP
+\-v
+.RS 4
+Increase verbosity if the binary was compiled with the DEBUG option set.
+.RE
+
+.SH MONITOR DESCRIPTIONS
+.SS "Idle_Stats"
+Shows statistics of the cpuidle kernel subsystem. Values are retrieved from
+/sys/devices/system/cpu/cpu*/cpuidle/state*/.
+The kernel updates these values every time an idle state is entered or
+left. Therefore there can be some inaccuracy when cores are in an idle
+state for some time when the measure starts or ends. In worst case it can happen
+that one core stayed in an idle state for the whole measure time and the idle
+state usage time as exported by the kernel did not get updated. In this case
+a state residency of 0 percent is shown while it was 100.
+
+.SS "Mperf"
+The name comes from the aperf/mperf (average and maximum) MSR registers used
+which are available on recent X86 processors. It shows the average frequency
+(including boost frequencies).
+The fact that on all recent hardware the mperf timer stops ticking in any idle
+state it is also used to show C0 (processor is active) and Cx (processor is in
+any sleep state) times. These counters do not have the inaccuracy restrictions
+the "Idle_Stats" counters may show.
+May work poorly on Linux-2.6.20 through 2.6.29, as the \fBacpi-cpufreq \fP
+kernel frequency driver periodically cleared aperf/mperf registers in those
+kernels.
+
+.SS "Nehalem" "SandyBridge" "HaswellExtended"
+Intel Core and Package sleep state counters.
+Threads (hyperthreaded cores) may not be able to enter deeper core states if
+its sibling is utilized.
+Deepest package sleep states may in reality show up as machine/platform wide
+sleep states and can only be entered if all cores are idle. Look up Intel
+manuals (some are provided in the References section) for further details.
+The monitors are named after the CPU family where the sleep state capabilities
+got introduced and may not match exactly the CPU name of the platform.
+For example an IvyBridge processor has sleep state capabilities which got
+introduced in Nehalem and SandyBridge processor families.
+Thus on an IvyBridge processor one will get Nehalem and SandyBridge sleep
+state monitors.
+HaswellExtended extra package sleep state capabilities are available only in a
+specific Haswell (family 0x45) and probably also other future processors.
+
+.SS "Fam_12h" "Fam_14h"
+AMD laptop and desktop processor (family 12h and 14h) sleep state counters.
+The registers are accessed via PCI and therefore can still be read out while
+cores have been offlined.
+
+There is one special counter: NBP1 (North Bridge P1).
+This one always returns 0 or 1, depending on whether the North Bridge P1
+power state got entered at least once during measure time.
+Being able to enter NBP1 state also depends on graphics power management.
+Therefore this counter can be used to verify whether the graphics' driver
+power management is working as expected.
+
+.SH EXAMPLES
+
+cpupower monitor -l" may show:
+.RS 4
+Monitor "Mperf" (3 states) \- Might overflow after 922000000 s
+
+ ...
+
+Monitor "Idle_Stats" (3 states) \- Might overflow after 4294967295 s
+
+ ...
+
+.RE
+cpupower monitor \-m "Idle_Stats,Mperf" scp /tmp/test /nfs/tmp
+
+Monitor the scp command, show both Mperf and Idle_Stats states counter
+statistics, but in exchanged order.
+
+
+
+.RE
+Be careful that the typical command to fully utilize one CPU by doing:
+
+cpupower monitor cat /dev/zero >/dev/null
+
+Does not work as expected, because the measured output is redirected to
+/dev/null. This could get workarounded by putting the line into an own, tiny
+shell script. Hit CTRL\-c to terminate the command and get the measure output
+displayed.
+
+.SH REFERENCES
+"BIOS and Kernel Developer’s Guide (BKDG) for AMD Family 14h Processors"
+http://support.amd.com/us/Processor_TechDocs/43170.pdf
+
+"Intel® Turbo Boost Technology
+in Intel® Core™ Microarchitecture (Nehalem) Based Processors"
+http://download.intel.com/design/processor/applnots/320354.pdf
+
+"Intel® 64 and IA-32 Architectures Software Developer's Manual
+Volume 3B: System Programming Guide"
+http://www.intel.com/products/processor/manuals
+
+.SH FILES
+.ta
+.nf
+/dev/cpu/*/msr
+/sys/devices/system/cpu/cpu*/cpuidle/state*/.
+.fi
+
+.SH "SEE ALSO"
+powertop(8), msr(4), vmstat(8)
+.PP
+.SH AUTHORS
+.nf
+Written by Thomas Renninger <trenn@suse.de>
+
+Nehalem, SandyBridge monitors and command passing
+based on turbostat.8 from Len Brown <len.brown@intel.com>
diff --git a/tools/power/cpupower/man/cpupower-set.1 b/tools/power/cpupower/man/cpupower-set.1
new file mode 100644
index 000000000..2bcc696f4
--- /dev/null
+++ b/tools/power/cpupower/man/cpupower-set.1
@@ -0,0 +1,65 @@
+.TH CPUPOWER\-SET "1" "22/02/2011" "" "cpupower Manual"
+.SH NAME
+cpupower\-set \- Set processor power related kernel or hardware configurations
+.SH SYNOPSIS
+.ft B
+.B cpupower set [ \-b VAL ]
+
+
+.SH DESCRIPTION
+\fBcpupower set \fP sets kernel configurations or directly accesses hardware
+registers affecting processor power saving policies.
+
+Some options are platform wide, some affect single cores. By default values
+are applied on all cores. How to modify single core configurations is
+described in the cpupower(1) manpage in the \-\-cpu option section. Whether an
+option affects the whole system or can be applied to individual cores is
+described in the Options sections.
+
+Use \fBcpupower info \fP to read out current settings and whether they are
+supported on the system at all.
+
+.SH Options
+.PP
+\-\-perf-bias, \-b
+.RS 4
+Sets a register on supported Intel processore which allows software to convey
+its policy for the relative importance of performance versus energy savings to
+the processor.
+
+The range of valid numbers is 0-15, where 0 is maximum
+performance and 15 is maximum energy efficiency.
+
+The processor uses this information in model-specific ways
+when it must select trade-offs between performance and
+energy efficiency.
+
+This policy hint does not supersede Processor Performance states
+(P-states) or CPU Idle power states (C-states), but allows
+software to have influence where it would otherwise be unable
+to express a preference.
+
+For example, this setting may tell the hardware how
+aggressively or conservatively to control frequency
+in the "turbo range" above the explicitly OS-controlled
+P-state frequency range. It may also tell the hardware
+how aggressively it should enter the OS requested C-states.
+
+This option can be applied to individual cores only via the \-\-cpu option,
+cpupower(1).
+
+Setting the performance bias value on one CPU can modify the setting on
+related CPUs as well (for example all CPUs on one socket), because of
+hardware restrictions.
+Use \fBcpupower -c all info -b\fP to verify.
+
+This options needs the msr kernel driver (CONFIG_X86_MSR) loaded.
+.RE
+
+.SH "SEE ALSO"
+cpupower-info(1), cpupower-monitor(1), powertop(1)
+.PP
+.SH AUTHORS
+.nf
+\-\-perf\-bias parts written by Len Brown <len.brown@intel.com>
+Thomas Renninger <trenn@suse.de>
diff --git a/tools/power/cpupower/man/cpupower.1 b/tools/power/cpupower/man/cpupower.1
new file mode 100644
index 000000000..baf741d06
--- /dev/null
+++ b/tools/power/cpupower/man/cpupower.1
@@ -0,0 +1,72 @@
+.TH CPUPOWER "1" "07/03/2011" "" "cpupower Manual"
+.SH NAME
+cpupower \- Shows and sets processor power related values
+.SH SYNOPSIS
+.ft B
+.B cpupower [ \-c cpulist ] <command> [ARGS]
+
+.B cpupower \-v|\-\-version
+
+.B cpupower \-h|\-\-help
+
+.SH DESCRIPTION
+\fBcpupower \fP is a collection of tools to examine and tune power saving
+related features of your processor.
+
+The manpages of the commands (cpupower\-<command>(1)) provide detailed
+descriptions of supported features. Run \fBcpupower help\fP to get an overview
+of supported commands.
+
+.SH Options
+.PP
+\-\-help, \-h
+.RS 4
+Shows supported commands and general usage.
+.RE
+.PP
+\-\-cpu cpulist, \-c cpulist
+.RS 4
+Only show or set values for specific cores.
+This option is not supported by all commands, details can be found in the
+manpages of the commands.
+
+Some commands access all cores (typically the *\-set commands), some only
+the first core (typically the *\-info commands) by default.
+
+The syntax for <cpulist> is based on how the kernel exports CPU bitmasks via
+sysfs files. Some examples:
+.RS 4
+.TP 16
+Input
+Equivalent to
+.TP
+all
+all cores
+.TP
+0\-3
+0,1,2,3
+.TP
+0\-7:2
+0,2,4,6
+.TP
+1,3,5-7
+1,3,5,6,7
+.TP
+0\-3:2,8\-15:4
+0,2,8,12
+.RE
+.RE
+.PP
+\-\-version, \-v
+.RS 4
+Print the package name and version number.
+
+.SH "SEE ALSO"
+cpupower-set(1), cpupower-info(1), cpupower-idle(1),
+cpupower-frequency-set(1), cpupower-frequency-info(1), cpupower-monitor(1),
+powertop(1)
+.PP
+.SH AUTHORS
+.nf
+\-\-perf\-bias parts written by Len Brown <len.brown@intel.com>
+Thomas Renninger <trenn@suse.de>
diff --git a/tools/power/cpupower/po/cs.po b/tools/power/cpupower/po/cs.po
new file mode 100644
index 000000000..cb22c45c5
--- /dev/null
+++ b/tools/power/cpupower/po/cs.po
@@ -0,0 +1,944 @@
+# translation of cs.po to Czech
+# Czech translation for cpufrequtils package
+# Czech messages for cpufrequtils.
+# Copyright (C) 2007 kavol
+# This file is distributed under the same license as the cpufrequtils package.
+#
+# Karel Volný <kavol@seznam.cz>, 2007, 2008.
+msgid ""
+msgstr ""
+"Project-Id-Version: cs\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2011-03-08 17:03+0100\n"
+"PO-Revision-Date: 2008-06-11 16:26+0200\n"
+"Last-Translator: Karel Volný <kavol@seznam.cz>\n"
+"Language-Team: Czech <diskuze@lists.l10n.cz>\n"
+"Language: cs\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=3; plural=(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2;\n"
+"X-Generator: KBabel 1.11.4\n"
+
+#: utils/idle_monitor/nhm_idle.c:36
+msgid "Processor Core C3"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:43
+msgid "Processor Core C6"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:51
+msgid "Processor Package C3"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:58 utils/idle_monitor/amd_fam14h_idle.c:70
+msgid "Processor Package C6"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:33
+msgid "Processor Core C7"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:40
+msgid "Processor Package C2"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:47
+msgid "Processor Package C7"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:56
+msgid "Package in sleep state (PC1 or deeper)"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:63
+msgid "Processor Package C1"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:77
+msgid "North Bridge P1 boolean counter (returns 0 or 1)"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:35
+msgid "Processor Core not idle"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:42
+msgid "Processor Core in an idle state"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:50
+msgid "Average Frequency (including boost) in MHz"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:66
+#, c-format
+msgid ""
+"cpupower monitor: [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i "
+"interval_sec | -c command ...]\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:69
+#, c-format
+msgid ""
+"cpupower monitor: [-v] [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i "
+"interval_sec | -c command ...]\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:71
+#, c-format
+msgid "\t -v: be more verbose\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:73
+#, c-format
+msgid "\t -h: print this help\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:74
+#, c-format
+msgid "\t -i: time intervall to measure for in seconds (default 1)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:75
+#, c-format
+msgid "\t -t: show CPU topology/hierarchy\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:76
+#, c-format
+msgid "\t -l: list available CPU sleep monitors (for use with -m)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:77
+#, c-format
+msgid "\t -m: show specific CPU sleep monitors only (in same order)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:79
+#, c-format
+msgid ""
+"only one of: -t, -l, -m are allowed\n"
+"If none of them is passed,"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:80
+#, c-format
+msgid " all supported monitors are shown\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:197
+#, c-format
+msgid "Monitor %s, Counter %s has no count function. Implementation error\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:207
+#, c-format
+msgid " *is offline\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:236
+#, c-format
+msgid "%s: max monitor name length (%d) exceeded\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:250
+#, c-format
+msgid "No matching monitor found in %s, try -l option\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:266
+#, c-format
+msgid "Monitor \"%s\" (%d states) - Might overflow after %u s\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:319
+#, c-format
+msgid "%s took %.5f seconds and exited with status %d\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:406
+#, c-format
+msgid "Cannot read number of available processors\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:417
+#, c-format
+msgid "Available monitor %s needs root access\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:428
+#, c-format
+msgid "No HW Cstate monitors found\n"
+msgstr ""
+
+#: utils/cpupower.c:78
+#, c-format
+msgid "cpupower [ -c cpulist ] subcommand [ARGS]\n"
+msgstr ""
+
+#: utils/cpupower.c:79
+#, c-format
+msgid "cpupower --version\n"
+msgstr ""
+
+#: utils/cpupower.c:80
+#, c-format
+msgid "Supported subcommands are:\n"
+msgstr ""
+
+#: utils/cpupower.c:83
+#, c-format
+msgid ""
+"\n"
+"Some subcommands can make use of the -c cpulist option.\n"
+msgstr ""
+
+#: utils/cpupower.c:84
+#, c-format
+msgid "Look at the general cpupower manpage how to use it\n"
+msgstr ""
+
+#: utils/cpupower.c:85
+#, c-format
+msgid "and read up the subcommand's manpage whether it is supported.\n"
+msgstr ""
+
+#: utils/cpupower.c:86
+#, c-format
+msgid ""
+"\n"
+"Use cpupower help subcommand for getting help for above subcommands.\n"
+msgstr ""
+
+#: utils/cpupower.c:91
+#, c-format
+msgid "Report errors and bugs to %s, please.\n"
+msgstr ""
+"Chyby v programu prosím hlaste na %s (anglicky).\n"
+"Chyby v překladu prosím hlaste na kavol@seznam.cz (česky ;-)\n"
+
+#: utils/cpupower.c:114
+#, c-format
+msgid "Error parsing cpu list\n"
+msgstr ""
+
+#: utils/cpupower.c:172
+#, c-format
+msgid "Subcommand %s needs root privileges\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:31
+#, c-format
+msgid "Couldn't count the number of CPUs (%s: %s), assuming 1\n"
+msgstr "Nelze zjistit počet CPU (%s: %s), předpokládá se 1.\n"
+
+#: utils/cpufreq-info.c:63
+#, c-format
+msgid ""
+" minimum CPU frequency - maximum CPU frequency - governor\n"
+msgstr ""
+" minimální frekvence CPU - maximální frekvence CPU - regulátor\n"
+
+#: utils/cpufreq-info.c:151
+#, c-format
+msgid "Error while evaluating Boost Capabilities on CPU %d -- are you root?\n"
+msgstr ""
+
+#. P state changes via MSR are identified via cpuid 80000007
+#. on Intel and AMD, but we assume boost capable machines can do that
+#. if (cpuid_eax(0x80000000) >= 0x80000007
+#. && (cpuid_edx(0x80000007) & (1 << 7)))
+#.
+#: utils/cpufreq-info.c:161
+#, c-format
+msgid " boost state support: \n"
+msgstr ""
+
+#: utils/cpufreq-info.c:163
+#, c-format
+msgid " Supported: %s\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164
+msgid "yes"
+msgstr ""
+
+#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164
+msgid "no"
+msgstr ""
+
+#: utils/cpufreq-info.c:164
+#, fuzzy, c-format
+msgid " Active: %s\n"
+msgstr " ovladač: %s\n"
+
+#: utils/cpufreq-info.c:177
+#, c-format
+msgid " Boost States: %d\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:178
+#, c-format
+msgid " Total States: %d\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:181
+#, c-format
+msgid " Pstate-Pb%d: %luMHz (boost state)\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:184
+#, c-format
+msgid " Pstate-P%d: %luMHz\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:211
+#, c-format
+msgid " no or unknown cpufreq driver is active on this CPU\n"
+msgstr " pro tento CPU není aktivní žádný známý ovladač cpufreq\n"
+
+#: utils/cpufreq-info.c:213
+#, c-format
+msgid " driver: %s\n"
+msgstr " ovladač: %s\n"
+
+#: utils/cpufreq-info.c:219
+#, fuzzy, c-format
+msgid " CPUs which run at the same hardware frequency: "
+msgstr " CPU, které musí měnit frekvenci zároveň: "
+
+#: utils/cpufreq-info.c:230
+#, fuzzy, c-format
+msgid " CPUs which need to have their frequency coordinated by software: "
+msgstr " CPU, které musí měnit frekvenci zároveň: "
+
+#: utils/cpufreq-info.c:241
+#, c-format
+msgid " maximum transition latency: "
+msgstr ""
+
+#: utils/cpufreq-info.c:247
+#, c-format
+msgid " hardware limits: "
+msgstr " hardwarové meze: "
+
+#: utils/cpufreq-info.c:256
+#, c-format
+msgid " available frequency steps: "
+msgstr " dostupné frekvence: "
+
+#: utils/cpufreq-info.c:269
+#, c-format
+msgid " available cpufreq governors: "
+msgstr " dostupné regulátory: "
+
+#: utils/cpufreq-info.c:280
+#, c-format
+msgid " current policy: frequency should be within "
+msgstr " současná taktika: frekvence by měla být mezi "
+
+#: utils/cpufreq-info.c:282
+#, c-format
+msgid " and "
+msgstr " a "
+
+#: utils/cpufreq-info.c:286
+#, c-format
+msgid ""
+"The governor \"%s\" may decide which speed to use\n"
+" within this range.\n"
+msgstr ""
+" Regulátor \"%s\" může rozhodnout jakou frekvenci použít\n"
+" v těchto mezích.\n"
+
+#: utils/cpufreq-info.c:293
+#, c-format
+msgid " current CPU frequency is "
+msgstr " současná frekvence CPU je "
+
+#: utils/cpufreq-info.c:296
+#, c-format
+msgid " (asserted by call to hardware)"
+msgstr " (zjištěno hardwarovým voláním)"
+
+#: utils/cpufreq-info.c:304
+#, c-format
+msgid " cpufreq stats: "
+msgstr " statistika cpufreq: "
+
+#: utils/cpufreq-info.c:472
+#, fuzzy, c-format
+msgid "Usage: cpupower freqinfo [options]\n"
+msgstr "Užití: cpufreq-info [přepínače]\n"
+
+#: utils/cpufreq-info.c:473 utils/cpufreq-set.c:26 utils/cpupower-set.c:23
+#: utils/cpupower-info.c:22 utils/cpuidle-info.c:148
+#, c-format
+msgid "Options:\n"
+msgstr "Přepínače:\n"
+
+#: utils/cpufreq-info.c:474
+#, fuzzy, c-format
+msgid " -e, --debug Prints out debug information [default]\n"
+msgstr " -e, --debug Vypíše ladicí informace\n"
+
+#: utils/cpufreq-info.c:475
+#, c-format
+msgid ""
+" -f, --freq Get frequency the CPU currently runs at, according\n"
+" to the cpufreq core *\n"
+msgstr ""
+" -f, --freq Zjistí aktuální frekvenci, na které CPU běží\n"
+" podle cpufreq *\n"
+
+#: utils/cpufreq-info.c:477
+#, c-format
+msgid ""
+" -w, --hwfreq Get frequency the CPU currently runs at, by reading\n"
+" it from hardware (only available to root) *\n"
+msgstr ""
+" -w, --hwfreq Zjistí aktuální frekvenci, na které CPU běží\n"
+" z hardware (dostupné jen uživateli root) *\n"
+
+#: utils/cpufreq-info.c:479
+#, c-format
+msgid ""
+" -l, --hwlimits Determine the minimum and maximum CPU frequency "
+"allowed *\n"
+msgstr ""
+" -l, --hwlimits Zjistí minimální a maximální dostupnou frekvenci CPU "
+"*\n"
+
+#: utils/cpufreq-info.c:480
+#, c-format
+msgid " -d, --driver Determines the used cpufreq kernel driver *\n"
+msgstr " -d, --driver Zjistí aktivní ovladač cpufreq *\n"
+
+#: utils/cpufreq-info.c:481
+#, c-format
+msgid " -p, --policy Gets the currently used cpufreq policy *\n"
+msgstr " -p, --policy Zjistí aktuální taktiku cpufreq *\n"
+
+#: utils/cpufreq-info.c:482
+#, c-format
+msgid " -g, --governors Determines available cpufreq governors *\n"
+msgstr " -g, --governors Zjistí dostupné regulátory cpufreq *\n"
+
+#: utils/cpufreq-info.c:483
+#, fuzzy, c-format
+msgid ""
+" -r, --related-cpus Determines which CPUs run at the same hardware "
+"frequency *\n"
+msgstr ""
+" -a, --affected-cpus Zjistí, které CPU musí měnit frekvenci zároveň *\n"
+
+#: utils/cpufreq-info.c:484
+#, fuzzy, c-format
+msgid ""
+" -a, --affected-cpus Determines which CPUs need to have their frequency\n"
+" coordinated by software *\n"
+msgstr ""
+" -a, --affected-cpus Zjistí, které CPU musí měnit frekvenci zároveň *\n"
+
+#: utils/cpufreq-info.c:486
+#, c-format
+msgid " -s, --stats Shows cpufreq statistics if available\n"
+msgstr " -s, --stats Zobrazí statistiku cpufreq, je-li dostupná\n"
+
+#: utils/cpufreq-info.c:487
+#, fuzzy, c-format
+msgid ""
+" -y, --latency Determines the maximum latency on CPU frequency "
+"changes *\n"
+msgstr ""
+" -l, --hwlimits Zjistí minimální a maximální dostupnou frekvenci CPU "
+"*\n"
+
+#: utils/cpufreq-info.c:488
+#, c-format
+msgid " -b, --boost Checks for turbo or boost modes *\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:489
+#, c-format
+msgid ""
+" -o, --proc Prints out information like provided by the /proc/"
+"cpufreq\n"
+" interface in 2.4. and early 2.6. kernels\n"
+msgstr ""
+" -o, --proc Vypíše informace ve formátu, jaký používalo rozhraní\n"
+" /proc/cpufreq v kernelech řady 2.4 a časné 2.6\n"
+
+#: utils/cpufreq-info.c:491
+#, fuzzy, c-format
+msgid ""
+" -m, --human human-readable output for the -f, -w, -s and -y "
+"parameters\n"
+msgstr ""
+" -m, --human Výstup parametrů -f, -w a -s v „lidmi čitelném“ "
+"formátu\n"
+
+#: utils/cpufreq-info.c:492 utils/cpuidle-info.c:152
+#, c-format
+msgid " -h, --help Prints out this screen\n"
+msgstr " -h, --help Vypíše tuto nápovědu\n"
+
+#: utils/cpufreq-info.c:495
+#, c-format
+msgid ""
+"If no argument or only the -c, --cpu parameter is given, debug output about\n"
+"cpufreq is printed which is useful e.g. for reporting bugs.\n"
+msgstr ""
+"Není-li zadán žádný parametr nebo je-li zadán pouze přepínač -c, --cpu, "
+"jsou\n"
+"vypsány ladicí informace, což může být užitečné například při hlášení chyb.\n"
+
+#: utils/cpufreq-info.c:497
+#, c-format
+msgid ""
+"For the arguments marked with *, omitting the -c or --cpu argument is\n"
+"equivalent to setting it to zero\n"
+msgstr ""
+"Není-li při použití přepínačů označených * zadán parametr -c nebo --cpu,\n"
+"předpokládá se jeho hodnota 0.\n"
+
+#: utils/cpufreq-info.c:580
+#, c-format
+msgid ""
+"The argument passed to this tool can't be combined with passing a --cpu "
+"argument\n"
+msgstr "Zadaný parametr nemůže být použit zároveň s přepínačem -c nebo --cpu\n"
+
+#: utils/cpufreq-info.c:596
+#, c-format
+msgid ""
+"You can't specify more than one --cpu parameter and/or\n"
+"more than one output-specific argument\n"
+msgstr ""
+"Nelze zadat více než jeden parametr -c nebo --cpu\n"
+"anebo více než jeden parametr určující výstup\n"
+
+#: utils/cpufreq-info.c:600 utils/cpufreq-set.c:82 utils/cpupower-set.c:42
+#: utils/cpupower-info.c:42 utils/cpuidle-info.c:213
+#, c-format
+msgid "invalid or unknown argument\n"
+msgstr "neplatný nebo neznámý parametr\n"
+
+#: utils/cpufreq-info.c:617
+#, c-format
+msgid "couldn't analyze CPU %d as it doesn't seem to be present\n"
+msgstr "nelze analyzovat CPU %d, vypadá to, že není přítomen\n"
+
+#: utils/cpufreq-info.c:620 utils/cpupower-info.c:142
+#, c-format
+msgid "analyzing CPU %d:\n"
+msgstr "analyzuji CPU %d:\n"
+
+#: utils/cpufreq-set.c:25
+#, fuzzy, c-format
+msgid "Usage: cpupower frequency-set [options]\n"
+msgstr "Užití: cpufreq-set [přepínače]\n"
+
+#: utils/cpufreq-set.c:27
+#, c-format
+msgid ""
+" -d FREQ, --min FREQ new minimum CPU frequency the governor may "
+"select\n"
+msgstr ""
+" -d FREQ, --min FREQ Nová nejnižší frekvence, kterou může regulátor "
+"vybrat\n"
+
+#: utils/cpufreq-set.c:28
+#, c-format
+msgid ""
+" -u FREQ, --max FREQ new maximum CPU frequency the governor may "
+"select\n"
+msgstr ""
+" -u FREQ, --max FREQ Nová nejvyšší frekvence, kterou může regulátor "
+"zvolit\n"
+
+#: utils/cpufreq-set.c:29
+#, c-format
+msgid " -g GOV, --governor GOV new cpufreq governor\n"
+msgstr " -g GOV, --governors GOV Nový regulátor cpufreq\n"
+
+#: utils/cpufreq-set.c:30
+#, c-format
+msgid ""
+" -f FREQ, --freq FREQ specific frequency to be set. Requires userspace\n"
+" governor to be available and loaded\n"
+msgstr ""
+" -f FREQ, --freq FREQ Frekvence, která má být nastavena. Vyžaduje, aby "
+"byl\n"
+" v jádře nahrán regulátor ‚userspace‘.\n"
+
+#: utils/cpufreq-set.c:32
+#, c-format
+msgid " -r, --related Switches all hardware-related CPUs\n"
+msgstr ""
+
+#: utils/cpufreq-set.c:33 utils/cpupower-set.c:28 utils/cpupower-info.c:27
+#, fuzzy, c-format
+msgid " -h, --help Prints out this screen\n"
+msgstr " -h, --help Vypíše tuto nápovědu\n"
+
+#: utils/cpufreq-set.c:35
+#, fuzzy, c-format
+msgid ""
+"Notes:\n"
+"1. Omitting the -c or --cpu argument is equivalent to setting it to \"all\"\n"
+msgstr ""
+"Není-li při použití přepínačů označených * zadán parametr -c nebo --cpu,\n"
+"předpokládá se jeho hodnota 0.\n"
+
+#: utils/cpufreq-set.c:37
+#, fuzzy, c-format
+msgid ""
+"2. The -f FREQ, --freq FREQ parameter cannot be combined with any other "
+"parameter\n"
+" except the -c CPU, --cpu CPU parameter\n"
+"3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n"
+" by postfixing the value with the wanted unit name, without any space\n"
+" (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"
+msgstr ""
+"Poznámky:\n"
+"1. Vynechání parametru -c nebo --cpu je ekvivalentní jeho nastavení na 0\n"
+"2. Přepínač -f nebo --freq nemůže být použit zároveň s žádným jiným vyjma -"
+"c\n"
+" nebo --cpu\n"
+"3. Frekvence (FREQ) mohou být zadány v Hz, kHz (výchozí), MHz, GHz nebo THz\n"
+" připojením názvu jednotky bez mezery mezi číslem a jednotkou\n"
+" (FREQ v kHz =^ Hz * 0,001 = ^ MHz * 1000 =^ GHz * 1000000)\n"
+
+#: utils/cpufreq-set.c:57
+#, c-format
+msgid ""
+"Error setting new values. Common errors:\n"
+"- Do you have proper administration rights? (super-user?)\n"
+"- Is the governor you requested available and modprobed?\n"
+"- Trying to set an invalid policy?\n"
+"- Trying to set a specific frequency, but userspace governor is not "
+"available,\n"
+" for example because of hardware which cannot be set to a specific "
+"frequency\n"
+" or because the userspace governor isn't loaded?\n"
+msgstr ""
+"Chyba při nastavování nových hodnot. Obvyklé problémy:\n"
+"- Máte patřičná administrátorská práva? (root?)\n"
+"- Je požadovaný regulátor dostupný v jádře? (modprobe?)\n"
+"- Snažíte se nastavit neplatnou taktiku?\n"
+"- Snažíte se nastavit určitou frekvenci, ale není dostupný\n"
+" regulátor ‚userspace‘, například protože není nahrán v jádře,\n"
+" nebo nelze na tomto hardware nastavit určitou frekvenci?\n"
+
+#: utils/cpufreq-set.c:170
+#, c-format
+msgid "wrong, unknown or unhandled CPU?\n"
+msgstr "neznámý nebo nepodporovaný CPU?\n"
+
+#: utils/cpufreq-set.c:302
+#, c-format
+msgid ""
+"the -f/--freq parameter cannot be combined with -d/--min, -u/--max or\n"
+"-g/--governor parameters\n"
+msgstr ""
+"přepínač -f/--freq nemůže být použit zároveň\n"
+"s přepínačem -d/--min, -u/--max nebo -g/--governor\n"
+
+#: utils/cpufreq-set.c:308
+#, c-format
+msgid ""
+"At least one parameter out of -f/--freq, -d/--min, -u/--max, and\n"
+"-g/--governor must be passed\n"
+msgstr ""
+"Musí být zadán alespoň jeden přepínač\n"
+"-f/--freq, -d/--min, -u/--max nebo -g/--governor\n"
+
+#: utils/cpufreq-set.c:347
+#, c-format
+msgid "Setting cpu: %d\n"
+msgstr ""
+
+#: utils/cpupower-set.c:22
+#, c-format
+msgid "Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:24
+#, c-format
+msgid ""
+" -b, --perf-bias [VAL] Sets CPU's power vs performance policy on some\n"
+" Intel models [0-15], see manpage for details\n"
+msgstr ""
+
+#: utils/cpupower-set.c:26
+#, c-format
+msgid ""
+" -m, --sched-mc [VAL] Sets the kernel's multi core scheduler policy.\n"
+msgstr ""
+
+#: utils/cpupower-set.c:27
+#, c-format
+msgid ""
+" -s, --sched-smt [VAL] Sets the kernel's thread sibling scheduler "
+"policy.\n"
+msgstr ""
+
+#: utils/cpupower-set.c:80
+#, c-format
+msgid "--perf-bias param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:91
+#, c-format
+msgid "--sched-mc param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:102
+#, c-format
+msgid "--sched-smt param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:121
+#, c-format
+msgid "Error setting sched-mc %s\n"
+msgstr ""
+
+#: utils/cpupower-set.c:127
+#, c-format
+msgid "Error setting sched-smt %s\n"
+msgstr ""
+
+#: utils/cpupower-set.c:146
+#, c-format
+msgid "Error setting perf-bias value on CPU %d\n"
+msgstr ""
+
+#: utils/cpupower-info.c:21
+#, c-format
+msgid "Usage: cpupower info [ -b ] [ -m ] [ -s ]\n"
+msgstr ""
+
+#: utils/cpupower-info.c:23
+#, c-format
+msgid ""
+" -b, --perf-bias Gets CPU's power vs performance policy on some\n"
+" Intel models [0-15], see manpage for details\n"
+msgstr ""
+
+#: utils/cpupower-info.c:25
+#, fuzzy, c-format
+msgid " -m, --sched-mc Gets the kernel's multi core scheduler policy.\n"
+msgstr " -p, --policy Zjistí aktuální taktiku cpufreq *\n"
+
+#: utils/cpupower-info.c:26
+#, c-format
+msgid ""
+" -s, --sched-smt Gets the kernel's thread sibling scheduler policy.\n"
+msgstr ""
+
+#: utils/cpupower-info.c:28
+#, c-format
+msgid ""
+"\n"
+"Passing no option will show all info, by default only on core 0\n"
+msgstr ""
+
+#: utils/cpupower-info.c:102
+#, c-format
+msgid "System's multi core scheduler setting: "
+msgstr ""
+
+#. if sysfs file is missing it's: errno == ENOENT
+#: utils/cpupower-info.c:105 utils/cpupower-info.c:114
+#, c-format
+msgid "not supported\n"
+msgstr ""
+
+#: utils/cpupower-info.c:111
+#, c-format
+msgid "System's thread sibling scheduler setting: "
+msgstr ""
+
+#: utils/cpupower-info.c:126
+#, c-format
+msgid "Intel's performance bias setting needs root privileges\n"
+msgstr ""
+
+#: utils/cpupower-info.c:128
+#, c-format
+msgid "System does not support Intel's performance bias setting\n"
+msgstr ""
+
+#: utils/cpupower-info.c:147
+#, c-format
+msgid "Could not read perf-bias value\n"
+msgstr ""
+
+#: utils/cpupower-info.c:150
+#, c-format
+msgid "perf-bias: %d\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:28
+#, fuzzy, c-format
+msgid "Analyzing CPU %d:\n"
+msgstr "analyzuji CPU %d:\n"
+
+#: utils/cpuidle-info.c:32
+#, c-format
+msgid "CPU %u: No idle states\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:36
+#, c-format
+msgid "CPU %u: Can't read idle state info\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:41
+#, c-format
+msgid "Could not determine max idle state %u\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:46
+#, c-format
+msgid "Number of idle states: %d\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:48
+#, fuzzy, c-format
+msgid "Available idle states:"
+msgstr " dostupné frekvence: "
+
+#: utils/cpuidle-info.c:71
+#, c-format
+msgid "Flags/Description: %s\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:74
+#, c-format
+msgid "Latency: %lu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:76
+#, c-format
+msgid "Usage: %lu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:78
+#, c-format
+msgid "Duration: %llu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:90
+#, c-format
+msgid "Could not determine cpuidle driver\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:94
+#, fuzzy, c-format
+msgid "CPUidle driver: %s\n"
+msgstr " ovladač: %s\n"
+
+#: utils/cpuidle-info.c:99
+#, c-format
+msgid "Could not determine cpuidle governor\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:103
+#, c-format
+msgid "CPUidle governor: %s\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:122
+#, c-format
+msgid "CPU %u: Can't read C-state info\n"
+msgstr ""
+
+#. printf("Cstates: %d\n", cstates);
+#: utils/cpuidle-info.c:127
+#, c-format
+msgid "active state: C0\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:128
+#, c-format
+msgid "max_cstate: C%u\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:129
+#, c-format
+msgid "maximum allowed latency: %lu usec\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:130
+#, c-format
+msgid "states:\t\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:132
+#, c-format
+msgid " C%d: type[C%d] "
+msgstr ""
+
+#: utils/cpuidle-info.c:134
+#, c-format
+msgid "promotion[--] demotion[--] "
+msgstr ""
+
+#: utils/cpuidle-info.c:135
+#, c-format
+msgid "latency[%03lu] "
+msgstr ""
+
+#: utils/cpuidle-info.c:137
+#, c-format
+msgid "usage[%08lu] "
+msgstr ""
+
+#: utils/cpuidle-info.c:139
+#, c-format
+msgid "duration[%020Lu] \n"
+msgstr ""
+
+#: utils/cpuidle-info.c:147
+#, fuzzy, c-format
+msgid "Usage: cpupower idleinfo [options]\n"
+msgstr "Užití: cpufreq-info [přepínače]\n"
+
+#: utils/cpuidle-info.c:149
+#, fuzzy, c-format
+msgid " -s, --silent Only show general C-state information\n"
+msgstr " -e, --debug Vypíše ladicí informace\n"
+
+#: utils/cpuidle-info.c:150
+#, fuzzy, c-format
+msgid ""
+" -o, --proc Prints out information like provided by the /proc/"
+"acpi/processor/*/power\n"
+" interface in older kernels\n"
+msgstr ""
+" -o, --proc Vypíše informace ve formátu, jaký používalo rozhraní\n"
+" /proc/cpufreq v kernelech řady 2.4 a časné 2.6\n"
+
+#: utils/cpuidle-info.c:209
+#, fuzzy, c-format
+msgid "You can't specify more than one output-specific argument\n"
+msgstr ""
+"Nelze zadat více než jeden parametr -c nebo --cpu\n"
+"anebo více než jeden parametr určující výstup\n"
+
+#~ msgid ""
+#~ " -c CPU, --cpu CPU CPU number which information shall be determined "
+#~ "about\n"
+#~ msgstr ""
+#~ " -c CPU, --cpu CPU Číslo CPU, o kterém se mají zjistit informace\n"
+
+#~ msgid ""
+#~ " -c CPU, --cpu CPU number of CPU where cpufreq settings shall be "
+#~ "modified\n"
+#~ msgstr ""
+#~ " -c CPU, --cpu CPU Číslo CPU pro který se má provést nastavení "
+#~ "cpufreq\n"
diff --git a/tools/power/cpupower/po/de.po b/tools/power/cpupower/po/de.po
new file mode 100644
index 000000000..840c17cc4
--- /dev/null
+++ b/tools/power/cpupower/po/de.po
@@ -0,0 +1,961 @@
+# German translations for cpufrequtils package
+# German messages for cpufrequtils.
+# Copyright (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.net>
+# This file is distributed under the same license as the cpufrequtils package.
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: cpufrequtils 006\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2011-03-08 17:03+0100\n"
+"PO-Revision-Date: 2009-08-08 17:18+0100\n"
+"Last-Translator: <linux@dominikbrodowski.net>\n"
+"Language-Team: NONE\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-1\n"
+"Content-Transfer-Encoding: 8bit\n"
+"Plural-Forms: nplurals=2; plural=(n != 1);\n"
+
+#: utils/idle_monitor/nhm_idle.c:36
+msgid "Processor Core C3"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:43
+msgid "Processor Core C6"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:51
+msgid "Processor Package C3"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:58 utils/idle_monitor/amd_fam14h_idle.c:70
+msgid "Processor Package C6"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:33
+msgid "Processor Core C7"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:40
+msgid "Processor Package C2"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:47
+msgid "Processor Package C7"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:56
+msgid "Package in sleep state (PC1 or deeper)"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:63
+msgid "Processor Package C1"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:77
+msgid "North Bridge P1 boolean counter (returns 0 or 1)"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:35
+msgid "Processor Core not idle"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:42
+msgid "Processor Core in an idle state"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:50
+msgid "Average Frequency (including boost) in MHz"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:66
+#, c-format
+msgid ""
+"cpupower monitor: [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i "
+"interval_sec | -c command ...]\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:69
+#, c-format
+msgid ""
+"cpupower monitor: [-v] [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i "
+"interval_sec | -c command ...]\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:71
+#, c-format
+msgid "\t -v: be more verbose\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:73
+#, c-format
+msgid "\t -h: print this help\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:74
+#, c-format
+msgid "\t -i: time intervall to measure for in seconds (default 1)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:75
+#, c-format
+msgid "\t -t: show CPU topology/hierarchy\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:76
+#, c-format
+msgid "\t -l: list available CPU sleep monitors (for use with -m)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:77
+#, c-format
+msgid "\t -m: show specific CPU sleep monitors only (in same order)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:79
+#, c-format
+msgid ""
+"only one of: -t, -l, -m are allowed\n"
+"If none of them is passed,"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:80
+#, c-format
+msgid " all supported monitors are shown\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:197
+#, c-format
+msgid "Monitor %s, Counter %s has no count function. Implementation error\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:207
+#, c-format
+msgid " *is offline\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:236
+#, c-format
+msgid "%s: max monitor name length (%d) exceeded\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:250
+#, c-format
+msgid "No matching monitor found in %s, try -l option\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:266
+#, c-format
+msgid "Monitor \"%s\" (%d states) - Might overflow after %u s\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:319
+#, c-format
+msgid "%s took %.5f seconds and exited with status %d\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:406
+#, c-format
+msgid "Cannot read number of available processors\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:417
+#, c-format
+msgid "Available monitor %s needs root access\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:428
+#, c-format
+msgid "No HW Cstate monitors found\n"
+msgstr ""
+
+#: utils/cpupower.c:78
+#, c-format
+msgid "cpupower [ -c cpulist ] subcommand [ARGS]\n"
+msgstr ""
+
+#: utils/cpupower.c:79
+#, c-format
+msgid "cpupower --version\n"
+msgstr ""
+
+#: utils/cpupower.c:80
+#, c-format
+msgid "Supported subcommands are:\n"
+msgstr ""
+
+#: utils/cpupower.c:83
+#, c-format
+msgid ""
+"\n"
+"Some subcommands can make use of the -c cpulist option.\n"
+msgstr ""
+
+#: utils/cpupower.c:84
+#, c-format
+msgid "Look at the general cpupower manpage how to use it\n"
+msgstr ""
+
+#: utils/cpupower.c:85
+#, c-format
+msgid "and read up the subcommand's manpage whether it is supported.\n"
+msgstr ""
+
+#: utils/cpupower.c:86
+#, c-format
+msgid ""
+"\n"
+"Use cpupower help subcommand for getting help for above subcommands.\n"
+msgstr ""
+
+#: utils/cpupower.c:91
+#, c-format
+msgid "Report errors and bugs to %s, please.\n"
+msgstr "Bitte melden Sie Fehler an %s.\n"
+
+#: utils/cpupower.c:114
+#, c-format
+msgid "Error parsing cpu list\n"
+msgstr ""
+
+#: utils/cpupower.c:172
+#, c-format
+msgid "Subcommand %s needs root privileges\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:31
+#, c-format
+msgid "Couldn't count the number of CPUs (%s: %s), assuming 1\n"
+msgstr ""
+"Konnte nicht die Anzahl der CPUs herausfinden (%s : %s), nehme daher 1 an.\n"
+
+#: utils/cpufreq-info.c:63
+#, c-format
+msgid ""
+" minimum CPU frequency - maximum CPU frequency - governor\n"
+msgstr ""
+" minimale CPU-Taktfreq. - maximale CPU-Taktfreq. - Regler \n"
+
+#: utils/cpufreq-info.c:151
+#, c-format
+msgid "Error while evaluating Boost Capabilities on CPU %d -- are you root?\n"
+msgstr ""
+
+#. P state changes via MSR are identified via cpuid 80000007
+#. on Intel and AMD, but we assume boost capable machines can do that
+#. if (cpuid_eax(0x80000000) >= 0x80000007
+#. && (cpuid_edx(0x80000007) & (1 << 7)))
+#.
+#: utils/cpufreq-info.c:161
+#, c-format
+msgid " boost state support: \n"
+msgstr ""
+
+#: utils/cpufreq-info.c:163
+#, c-format
+msgid " Supported: %s\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164
+msgid "yes"
+msgstr ""
+
+#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164
+msgid "no"
+msgstr ""
+
+#: utils/cpufreq-info.c:164
+#, fuzzy, c-format
+msgid " Active: %s\n"
+msgstr " Treiber: %s\n"
+
+#: utils/cpufreq-info.c:177
+#, c-format
+msgid " Boost States: %d\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:178
+#, c-format
+msgid " Total States: %d\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:181
+#, c-format
+msgid " Pstate-Pb%d: %luMHz (boost state)\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:184
+#, c-format
+msgid " Pstate-P%d: %luMHz\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:211
+#, c-format
+msgid " no or unknown cpufreq driver is active on this CPU\n"
+msgstr " kein oder nicht bestimmbarer cpufreq-Treiber aktiv\n"
+
+#: utils/cpufreq-info.c:213
+#, c-format
+msgid " driver: %s\n"
+msgstr " Treiber: %s\n"
+
+#: utils/cpufreq-info.c:219
+#, c-format
+msgid " CPUs which run at the same hardware frequency: "
+msgstr " Folgende CPUs laufen mit der gleichen Hardware-Taktfrequenz: "
+
+#: utils/cpufreq-info.c:230
+#, c-format
+msgid " CPUs which need to have their frequency coordinated by software: "
+msgstr " Die Taktfrequenz folgender CPUs werden per Software koordiniert: "
+
+#: utils/cpufreq-info.c:241
+#, c-format
+msgid " maximum transition latency: "
+msgstr " Maximale Dauer eines Taktfrequenzwechsels: "
+
+#: utils/cpufreq-info.c:247
+#, c-format
+msgid " hardware limits: "
+msgstr " Hardwarebedingte Grenzen der Taktfrequenz: "
+
+#: utils/cpufreq-info.c:256
+#, c-format
+msgid " available frequency steps: "
+msgstr " mögliche Taktfrequenzen: "
+
+#: utils/cpufreq-info.c:269
+#, c-format
+msgid " available cpufreq governors: "
+msgstr " mögliche Regler: "
+
+#: utils/cpufreq-info.c:280
+#, c-format
+msgid " current policy: frequency should be within "
+msgstr " momentane Taktik: die Frequenz soll innerhalb "
+
+#: utils/cpufreq-info.c:282
+#, c-format
+msgid " and "
+msgstr " und "
+
+#: utils/cpufreq-info.c:286
+#, c-format
+msgid ""
+"The governor \"%s\" may decide which speed to use\n"
+" within this range.\n"
+msgstr ""
+" liegen. Der Regler \"%s\" kann frei entscheiden,\n"
+" welche Taktfrequenz innerhalb dieser Grenze verwendet "
+"wird.\n"
+
+#: utils/cpufreq-info.c:293
+#, c-format
+msgid " current CPU frequency is "
+msgstr " momentane Taktfrequenz ist "
+
+#: utils/cpufreq-info.c:296
+#, c-format
+msgid " (asserted by call to hardware)"
+msgstr " (verifiziert durch Nachfrage bei der Hardware)"
+
+#: utils/cpufreq-info.c:304
+#, c-format
+msgid " cpufreq stats: "
+msgstr " Statistik:"
+
+#: utils/cpufreq-info.c:472
+#, fuzzy, c-format
+msgid "Usage: cpupower freqinfo [options]\n"
+msgstr "Aufruf: cpufreq-info [Optionen]\n"
+
+#: utils/cpufreq-info.c:473 utils/cpufreq-set.c:26 utils/cpupower-set.c:23
+#: utils/cpupower-info.c:22 utils/cpuidle-info.c:148
+#, c-format
+msgid "Options:\n"
+msgstr "Optionen:\n"
+
+#: utils/cpufreq-info.c:474
+#, fuzzy, c-format
+msgid " -e, --debug Prints out debug information [default]\n"
+msgstr ""
+" -e, --debug Erzeugt detaillierte Informationen, hilfreich\n"
+" zum Aufspüren von Fehlern\n"
+
+#: utils/cpufreq-info.c:475
+#, c-format
+msgid ""
+" -f, --freq Get frequency the CPU currently runs at, according\n"
+" to the cpufreq core *\n"
+msgstr ""
+" -f, --freq Findet die momentane CPU-Taktfrquenz heraus (nach\n"
+" Meinung des Betriebssystems) *\n"
+
+#: utils/cpufreq-info.c:477
+#, c-format
+msgid ""
+" -w, --hwfreq Get frequency the CPU currently runs at, by reading\n"
+" it from hardware (only available to root) *\n"
+msgstr ""
+" -w, --hwfreq Findet die momentane CPU-Taktfrequenz heraus\n"
+" (verifiziert durch Nachfrage bei der Hardware)\n"
+" [nur der Administrator kann dies tun] *\n"
+
+#: utils/cpufreq-info.c:479
+#, c-format
+msgid ""
+" -l, --hwlimits Determine the minimum and maximum CPU frequency "
+"allowed *\n"
+msgstr ""
+" -l, --hwlimits Findet die minimale und maximale Taktfrequenz heraus "
+"*\n"
+
+#: utils/cpufreq-info.c:480
+#, c-format
+msgid " -d, --driver Determines the used cpufreq kernel driver *\n"
+msgstr " -d, --driver Findet den momentanen Treiber heraus *\n"
+
+#: utils/cpufreq-info.c:481
+#, c-format
+msgid " -p, --policy Gets the currently used cpufreq policy *\n"
+msgstr " -p, --policy Findet die momentane Taktik heraus *\n"
+
+#: utils/cpufreq-info.c:482
+#, c-format
+msgid " -g, --governors Determines available cpufreq governors *\n"
+msgstr " -g, --governors Erzeugt eine Liste mit verfügbaren Reglern *\n"
+
+#: utils/cpufreq-info.c:483
+#, c-format
+msgid ""
+" -r, --related-cpus Determines which CPUs run at the same hardware "
+"frequency *\n"
+msgstr ""
+" -r, --related-cpus Findet heraus, welche CPUs mit derselben "
+"physikalischen\n"
+" Taktfrequenz laufen *\n"
+
+#: utils/cpufreq-info.c:484
+#, c-format
+msgid ""
+" -a, --affected-cpus Determines which CPUs need to have their frequency\n"
+" coordinated by software *\n"
+msgstr ""
+" -a, --affected-cpus Findet heraus, von welchen CPUs die Taktfrequenz "
+"durch\n"
+" Software koordiniert werden muss *\n"
+
+#: utils/cpufreq-info.c:486
+#, c-format
+msgid " -s, --stats Shows cpufreq statistics if available\n"
+msgstr ""
+" -s, --stats Zeigt, sofern möglich, Statistiken über cpufreq an.\n"
+
+#: utils/cpufreq-info.c:487
+#, c-format
+msgid ""
+" -y, --latency Determines the maximum latency on CPU frequency "
+"changes *\n"
+msgstr ""
+" -y, --latency Findet die maximale Dauer eines Taktfrequenzwechsels "
+"heraus *\n"
+
+#: utils/cpufreq-info.c:488
+#, c-format
+msgid " -b, --boost Checks for turbo or boost modes *\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:489
+#, c-format
+msgid ""
+" -o, --proc Prints out information like provided by the /proc/"
+"cpufreq\n"
+" interface in 2.4. and early 2.6. kernels\n"
+msgstr ""
+" -o, --proc Erzeugt Informationen in einem ähnlichem Format zu "
+"dem\n"
+" der /proc/cpufreq-Datei in 2.4. und frühen 2.6.\n"
+" Kernel-Versionen\n"
+
+#: utils/cpufreq-info.c:491
+#, c-format
+msgid ""
+" -m, --human human-readable output for the -f, -w, -s and -y "
+"parameters\n"
+msgstr ""
+" -m, --human Formatiert Taktfrequenz- und Zeitdauerangaben in "
+"besser\n"
+" lesbarer Form (MHz, GHz; us, ms)\n"
+
+#: utils/cpufreq-info.c:492 utils/cpuidle-info.c:152
+#, c-format
+msgid " -h, --help Prints out this screen\n"
+msgstr " -h, --help Gibt diese Kurzübersicht aus\n"
+
+#: utils/cpufreq-info.c:495
+#, c-format
+msgid ""
+"If no argument or only the -c, --cpu parameter is given, debug output about\n"
+"cpufreq is printed which is useful e.g. for reporting bugs.\n"
+msgstr ""
+"Sofern kein anderer Parameter als '-c, --cpu' angegeben wird, liefert "
+"dieses\n"
+"Programm Informationen, die z.B. zum Berichten von Fehlern nützlich sind.\n"
+
+#: utils/cpufreq-info.c:497
+#, c-format
+msgid ""
+"For the arguments marked with *, omitting the -c or --cpu argument is\n"
+"equivalent to setting it to zero\n"
+msgstr ""
+"Bei den mit * markierten Parametern wird '--cpu 0' angenommen, soweit nicht\n"
+"mittels -c oder --cpu etwas anderes angegeben wird\n"
+
+#: utils/cpufreq-info.c:580
+#, c-format
+msgid ""
+"The argument passed to this tool can't be combined with passing a --cpu "
+"argument\n"
+msgstr "Diese Option kann nicht mit der --cpu-Option kombiniert werden\n"
+
+#: utils/cpufreq-info.c:596
+#, c-format
+msgid ""
+"You can't specify more than one --cpu parameter and/or\n"
+"more than one output-specific argument\n"
+msgstr ""
+"Man kann nicht mehr als einen --cpu-Parameter und/oder mehr als einen\n"
+"informationsspezifischen Parameter gleichzeitig angeben\n"
+
+#: utils/cpufreq-info.c:600 utils/cpufreq-set.c:82 utils/cpupower-set.c:42
+#: utils/cpupower-info.c:42 utils/cpuidle-info.c:213
+#, c-format
+msgid "invalid or unknown argument\n"
+msgstr "unbekannter oder falscher Parameter\n"
+
+#: utils/cpufreq-info.c:617
+#, c-format
+msgid "couldn't analyze CPU %d as it doesn't seem to be present\n"
+msgstr ""
+"Konnte nicht die CPU %d analysieren, da sie (scheinbar?) nicht existiert.\n"
+
+#: utils/cpufreq-info.c:620 utils/cpupower-info.c:142
+#, c-format
+msgid "analyzing CPU %d:\n"
+msgstr "analysiere CPU %d:\n"
+
+#: utils/cpufreq-set.c:25
+#, fuzzy, c-format
+msgid "Usage: cpupower frequency-set [options]\n"
+msgstr "Aufruf: cpufreq-set [Optionen]\n"
+
+#: utils/cpufreq-set.c:27
+#, c-format
+msgid ""
+" -d FREQ, --min FREQ new minimum CPU frequency the governor may "
+"select\n"
+msgstr ""
+" -d FREQ, --min FREQ neue minimale Taktfrequenz, die der Regler\n"
+" auswählen darf\n"
+
+#: utils/cpufreq-set.c:28
+#, c-format
+msgid ""
+" -u FREQ, --max FREQ new maximum CPU frequency the governor may "
+"select\n"
+msgstr ""
+" -u FREQ, --max FREQ neue maximale Taktfrequenz, die der Regler\n"
+" auswählen darf\n"
+
+#: utils/cpufreq-set.c:29
+#, c-format
+msgid " -g GOV, --governor GOV new cpufreq governor\n"
+msgstr " -g GOV, --governors GOV wechsle zu Regler GOV\n"
+
+#: utils/cpufreq-set.c:30
+#, c-format
+msgid ""
+" -f FREQ, --freq FREQ specific frequency to be set. Requires userspace\n"
+" governor to be available and loaded\n"
+msgstr ""
+" -f FREQ, --freq FREQ setze exakte Taktfrequenz. Benötigt den Regler\n"
+" 'userspace'.\n"
+
+#: utils/cpufreq-set.c:32
+#, c-format
+msgid " -r, --related Switches all hardware-related CPUs\n"
+msgstr ""
+" -r, --related Setze Werte für alle CPUs, deren Taktfrequenz\n"
+" hardwarebedingt identisch ist.\n"
+
+#: utils/cpufreq-set.c:33 utils/cpupower-set.c:28 utils/cpupower-info.c:27
+#, c-format
+msgid " -h, --help Prints out this screen\n"
+msgstr " -h, --help Gibt diese Kurzübersicht aus\n"
+
+#: utils/cpufreq-set.c:35
+#, fuzzy, c-format
+msgid ""
+"Notes:\n"
+"1. Omitting the -c or --cpu argument is equivalent to setting it to \"all\"\n"
+msgstr ""
+"Bei den mit * markierten Parametern wird '--cpu 0' angenommen, soweit nicht\n"
+"mittels -c oder --cpu etwas anderes angegeben wird\n"
+
+#: utils/cpufreq-set.c:37
+#, fuzzy, c-format
+msgid ""
+"2. The -f FREQ, --freq FREQ parameter cannot be combined with any other "
+"parameter\n"
+" except the -c CPU, --cpu CPU parameter\n"
+"3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n"
+" by postfixing the value with the wanted unit name, without any space\n"
+" (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"
+msgstr ""
+"Hinweise:\n"
+"1. Sofern kein -c oder --cpu-Parameter angegeben ist, wird '--cpu 0'\n"
+" angenommen\n"
+"2. Der Parameter -f bzw. --freq kann mit keinem anderen als dem Parameter\n"
+" -c bzw. --cpu kombiniert werden\n"
+"3. FREQuenzen können in Hz, kHz (Standard), MHz, GHz oder THz eingegeben\n"
+" werden, indem der Wert und unmittelbar anschließend (ohne Leerzeichen!)\n"
+" die Einheit angegeben werden. (Bsp: 1GHz )\n"
+" (FREQuenz in kHz =^ MHz * 1000 =^ GHz * 1000000).\n"
+
+#: utils/cpufreq-set.c:57
+#, c-format
+msgid ""
+"Error setting new values. Common errors:\n"
+"- Do you have proper administration rights? (super-user?)\n"
+"- Is the governor you requested available and modprobed?\n"
+"- Trying to set an invalid policy?\n"
+"- Trying to set a specific frequency, but userspace governor is not "
+"available,\n"
+" for example because of hardware which cannot be set to a specific "
+"frequency\n"
+" or because the userspace governor isn't loaded?\n"
+msgstr ""
+"Beim Einstellen ist ein Fehler aufgetreten. Typische Fehlerquellen sind:\n"
+"- nicht ausreichende Rechte (Administrator)\n"
+"- der Regler ist nicht verfügbar bzw. nicht geladen\n"
+"- die angegebene Taktik ist inkorrekt\n"
+"- eine spezifische Frequenz wurde angegeben, aber der Regler 'userspace'\n"
+" kann entweder hardwarebedingt nicht genutzt werden oder ist nicht geladen\n"
+
+#: utils/cpufreq-set.c:170
+#, c-format
+msgid "wrong, unknown or unhandled CPU?\n"
+msgstr "unbekannte oder nicht regelbare CPU\n"
+
+#: utils/cpufreq-set.c:302
+#, c-format
+msgid ""
+"the -f/--freq parameter cannot be combined with -d/--min, -u/--max or\n"
+"-g/--governor parameters\n"
+msgstr ""
+"Der -f bzw. --freq-Parameter kann nicht mit den Parametern -d/--min, -u/--"
+"max\n"
+"oder -g/--governor kombiniert werden\n"
+
+#: utils/cpufreq-set.c:308
+#, c-format
+msgid ""
+"At least one parameter out of -f/--freq, -d/--min, -u/--max, and\n"
+"-g/--governor must be passed\n"
+msgstr ""
+"Es muss mindestens ein Parameter aus -f/--freq, -d/--min, -u/--max oder\n"
+"-g/--governor angegeben werden.\n"
+
+#: utils/cpufreq-set.c:347
+#, c-format
+msgid "Setting cpu: %d\n"
+msgstr ""
+
+#: utils/cpupower-set.c:22
+#, c-format
+msgid "Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:24
+#, c-format
+msgid ""
+" -b, --perf-bias [VAL] Sets CPU's power vs performance policy on some\n"
+" Intel models [0-15], see manpage for details\n"
+msgstr ""
+
+#: utils/cpupower-set.c:26
+#, c-format
+msgid ""
+" -m, --sched-mc [VAL] Sets the kernel's multi core scheduler policy.\n"
+msgstr ""
+
+#: utils/cpupower-set.c:27
+#, c-format
+msgid ""
+" -s, --sched-smt [VAL] Sets the kernel's thread sibling scheduler "
+"policy.\n"
+msgstr ""
+
+#: utils/cpupower-set.c:80
+#, c-format
+msgid "--perf-bias param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:91
+#, c-format
+msgid "--sched-mc param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:102
+#, c-format
+msgid "--sched-smt param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:121
+#, c-format
+msgid "Error setting sched-mc %s\n"
+msgstr ""
+
+#: utils/cpupower-set.c:127
+#, c-format
+msgid "Error setting sched-smt %s\n"
+msgstr ""
+
+#: utils/cpupower-set.c:146
+#, c-format
+msgid "Error setting perf-bias value on CPU %d\n"
+msgstr ""
+
+#: utils/cpupower-info.c:21
+#, c-format
+msgid "Usage: cpupower info [ -b ] [ -m ] [ -s ]\n"
+msgstr ""
+
+#: utils/cpupower-info.c:23
+#, c-format
+msgid ""
+" -b, --perf-bias Gets CPU's power vs performance policy on some\n"
+" Intel models [0-15], see manpage for details\n"
+msgstr ""
+
+#: utils/cpupower-info.c:25
+#, fuzzy, c-format
+msgid " -m, --sched-mc Gets the kernel's multi core scheduler policy.\n"
+msgstr " -p, --policy Findet die momentane Taktik heraus *\n"
+
+#: utils/cpupower-info.c:26
+#, c-format
+msgid ""
+" -s, --sched-smt Gets the kernel's thread sibling scheduler policy.\n"
+msgstr ""
+
+#: utils/cpupower-info.c:28
+#, c-format
+msgid ""
+"\n"
+"Passing no option will show all info, by default only on core 0\n"
+msgstr ""
+
+#: utils/cpupower-info.c:102
+#, c-format
+msgid "System's multi core scheduler setting: "
+msgstr ""
+
+#. if sysfs file is missing it's: errno == ENOENT
+#: utils/cpupower-info.c:105 utils/cpupower-info.c:114
+#, c-format
+msgid "not supported\n"
+msgstr ""
+
+#: utils/cpupower-info.c:111
+#, c-format
+msgid "System's thread sibling scheduler setting: "
+msgstr ""
+
+#: utils/cpupower-info.c:126
+#, c-format
+msgid "Intel's performance bias setting needs root privileges\n"
+msgstr ""
+
+#: utils/cpupower-info.c:128
+#, c-format
+msgid "System does not support Intel's performance bias setting\n"
+msgstr ""
+
+#: utils/cpupower-info.c:147
+#, c-format
+msgid "Could not read perf-bias value\n"
+msgstr ""
+
+#: utils/cpupower-info.c:150
+#, c-format
+msgid "perf-bias: %d\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:28
+#, fuzzy, c-format
+msgid "Analyzing CPU %d:\n"
+msgstr "analysiere CPU %d:\n"
+
+#: utils/cpuidle-info.c:32
+#, c-format
+msgid "CPU %u: No idle states\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:36
+#, c-format
+msgid "CPU %u: Can't read idle state info\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:41
+#, c-format
+msgid "Could not determine max idle state %u\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:46
+#, c-format
+msgid "Number of idle states: %d\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:48
+#, fuzzy, c-format
+msgid "Available idle states:"
+msgstr " mögliche Taktfrequenzen: "
+
+#: utils/cpuidle-info.c:71
+#, c-format
+msgid "Flags/Description: %s\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:74
+#, c-format
+msgid "Latency: %lu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:76
+#, c-format
+msgid "Usage: %lu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:78
+#, c-format
+msgid "Duration: %llu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:90
+#, c-format
+msgid "Could not determine cpuidle driver\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:94
+#, fuzzy, c-format
+msgid "CPUidle driver: %s\n"
+msgstr " Treiber: %s\n"
+
+#: utils/cpuidle-info.c:99
+#, c-format
+msgid "Could not determine cpuidle governor\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:103
+#, c-format
+msgid "CPUidle governor: %s\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:122
+#, c-format
+msgid "CPU %u: Can't read C-state info\n"
+msgstr ""
+
+#. printf("Cstates: %d\n", cstates);
+#: utils/cpuidle-info.c:127
+#, c-format
+msgid "active state: C0\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:128
+#, c-format
+msgid "max_cstate: C%u\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:129
+#, fuzzy, c-format
+msgid "maximum allowed latency: %lu usec\n"
+msgstr " Maximale Dauer eines Taktfrequenzwechsels: "
+
+#: utils/cpuidle-info.c:130
+#, c-format
+msgid "states:\t\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:132
+#, c-format
+msgid " C%d: type[C%d] "
+msgstr ""
+
+#: utils/cpuidle-info.c:134
+#, c-format
+msgid "promotion[--] demotion[--] "
+msgstr ""
+
+#: utils/cpuidle-info.c:135
+#, c-format
+msgid "latency[%03lu] "
+msgstr ""
+
+#: utils/cpuidle-info.c:137
+#, c-format
+msgid "usage[%08lu] "
+msgstr ""
+
+#: utils/cpuidle-info.c:139
+#, c-format
+msgid "duration[%020Lu] \n"
+msgstr ""
+
+#: utils/cpuidle-info.c:147
+#, fuzzy, c-format
+msgid "Usage: cpupower idleinfo [options]\n"
+msgstr "Aufruf: cpufreq-info [Optionen]\n"
+
+#: utils/cpuidle-info.c:149
+#, fuzzy, c-format
+msgid " -s, --silent Only show general C-state information\n"
+msgstr ""
+" -e, --debug Erzeugt detaillierte Informationen, hilfreich\n"
+" zum Aufspüren von Fehlern\n"
+
+#: utils/cpuidle-info.c:150
+#, fuzzy, c-format
+msgid ""
+" -o, --proc Prints out information like provided by the /proc/"
+"acpi/processor/*/power\n"
+" interface in older kernels\n"
+msgstr ""
+" -o, --proc Erzeugt Informationen in einem ähnlichem Format zu "
+"dem\n"
+" der /proc/cpufreq-Datei in 2.4. und frühen 2.6.\n"
+" Kernel-Versionen\n"
+
+#: utils/cpuidle-info.c:209
+#, fuzzy, c-format
+msgid "You can't specify more than one output-specific argument\n"
+msgstr ""
+"Man kann nicht mehr als einen --cpu-Parameter und/oder mehr als einen\n"
+"informationsspezifischen Parameter gleichzeitig angeben\n"
+
+#~ msgid ""
+#~ " -c CPU, --cpu CPU CPU number which information shall be determined "
+#~ "about\n"
+#~ msgstr ""
+#~ " -c CPU, --cpu CPU Nummer der CPU, über die Informationen "
+#~ "herausgefunden werden sollen\n"
+
+#~ msgid ""
+#~ " -c CPU, --cpu CPU number of CPU where cpufreq settings shall be "
+#~ "modified\n"
+#~ msgstr ""
+#~ " -c CPU, --cpu CPU Nummer der CPU, deren Taktfrequenz-"
+#~ "Einstellung\n"
+#~ " werden soll\n"
diff --git a/tools/power/cpupower/po/fr.po b/tools/power/cpupower/po/fr.po
new file mode 100644
index 000000000..b46ca2548
--- /dev/null
+++ b/tools/power/cpupower/po/fr.po
@@ -0,0 +1,947 @@
+# French translations for cpufrequtils package
+# Copyright (C) 2004 THE PACKAGE'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the cpufrequtils package.
+# Ducrot Bruno <ducrot@poupinou.org>, 2004.
+#
+#, fuzzy
+msgid ""
+msgstr ""
+"Project-Id-Version: cpufrequtils 0.1-pre2\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2011-03-08 17:03+0100\n"
+"PO-Revision-Date: 2004-11-17 15:53+1000\n"
+"Last-Translator: Bruno Ducrot <ducrot@poupinou.org>\n"
+"Language-Team: NONE\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=ISO-8859-1\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: utils/idle_monitor/nhm_idle.c:36
+msgid "Processor Core C3"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:43
+msgid "Processor Core C6"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:51
+msgid "Processor Package C3"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:58 utils/idle_monitor/amd_fam14h_idle.c:70
+msgid "Processor Package C6"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:33
+msgid "Processor Core C7"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:40
+msgid "Processor Package C2"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:47
+msgid "Processor Package C7"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:56
+msgid "Package in sleep state (PC1 or deeper)"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:63
+msgid "Processor Package C1"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:77
+msgid "North Bridge P1 boolean counter (returns 0 or 1)"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:35
+msgid "Processor Core not idle"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:42
+msgid "Processor Core in an idle state"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:50
+msgid "Average Frequency (including boost) in MHz"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:66
+#, c-format
+msgid ""
+"cpupower monitor: [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i "
+"interval_sec | -c command ...]\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:69
+#, c-format
+msgid ""
+"cpupower monitor: [-v] [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i "
+"interval_sec | -c command ...]\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:71
+#, c-format
+msgid "\t -v: be more verbose\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:73
+#, c-format
+msgid "\t -h: print this help\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:74
+#, c-format
+msgid "\t -i: time intervall to measure for in seconds (default 1)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:75
+#, c-format
+msgid "\t -t: show CPU topology/hierarchy\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:76
+#, c-format
+msgid "\t -l: list available CPU sleep monitors (for use with -m)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:77
+#, c-format
+msgid "\t -m: show specific CPU sleep monitors only (in same order)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:79
+#, c-format
+msgid ""
+"only one of: -t, -l, -m are allowed\n"
+"If none of them is passed,"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:80
+#, c-format
+msgid " all supported monitors are shown\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:197
+#, c-format
+msgid "Monitor %s, Counter %s has no count function. Implementation error\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:207
+#, c-format
+msgid " *is offline\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:236
+#, c-format
+msgid "%s: max monitor name length (%d) exceeded\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:250
+#, c-format
+msgid "No matching monitor found in %s, try -l option\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:266
+#, c-format
+msgid "Monitor \"%s\" (%d states) - Might overflow after %u s\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:319
+#, c-format
+msgid "%s took %.5f seconds and exited with status %d\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:406
+#, c-format
+msgid "Cannot read number of available processors\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:417
+#, c-format
+msgid "Available monitor %s needs root access\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:428
+#, c-format
+msgid "No HW Cstate monitors found\n"
+msgstr ""
+
+#: utils/cpupower.c:78
+#, c-format
+msgid "cpupower [ -c cpulist ] subcommand [ARGS]\n"
+msgstr ""
+
+#: utils/cpupower.c:79
+#, c-format
+msgid "cpupower --version\n"
+msgstr ""
+
+#: utils/cpupower.c:80
+#, c-format
+msgid "Supported subcommands are:\n"
+msgstr ""
+
+#: utils/cpupower.c:83
+#, c-format
+msgid ""
+"\n"
+"Some subcommands can make use of the -c cpulist option.\n"
+msgstr ""
+
+#: utils/cpupower.c:84
+#, c-format
+msgid "Look at the general cpupower manpage how to use it\n"
+msgstr ""
+
+#: utils/cpupower.c:85
+#, c-format
+msgid "and read up the subcommand's manpage whether it is supported.\n"
+msgstr ""
+
+#: utils/cpupower.c:86
+#, c-format
+msgid ""
+"\n"
+"Use cpupower help subcommand for getting help for above subcommands.\n"
+msgstr ""
+
+#: utils/cpupower.c:91
+#, c-format
+msgid "Report errors and bugs to %s, please.\n"
+msgstr "Veuillez rapportez les erreurs et les bogues à %s, s'il vous plait.\n"
+
+#: utils/cpupower.c:114
+#, c-format
+msgid "Error parsing cpu list\n"
+msgstr ""
+
+#: utils/cpupower.c:172
+#, c-format
+msgid "Subcommand %s needs root privileges\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:31
+#, c-format
+msgid "Couldn't count the number of CPUs (%s: %s), assuming 1\n"
+msgstr "Détermination du nombre de CPUs (%s : %s) impossible. Assume 1\n"
+
+#: utils/cpufreq-info.c:63
+#, c-format
+msgid ""
+" minimum CPU frequency - maximum CPU frequency - governor\n"
+msgstr ""
+" Fréquence CPU minimale - Fréquence CPU maximale - régulateur\n"
+
+#: utils/cpufreq-info.c:151
+#, c-format
+msgid "Error while evaluating Boost Capabilities on CPU %d -- are you root?\n"
+msgstr ""
+
+#. P state changes via MSR are identified via cpuid 80000007
+#. on Intel and AMD, but we assume boost capable machines can do that
+#. if (cpuid_eax(0x80000000) >= 0x80000007
+#. && (cpuid_edx(0x80000007) & (1 << 7)))
+#.
+#: utils/cpufreq-info.c:161
+#, c-format
+msgid " boost state support: \n"
+msgstr ""
+
+#: utils/cpufreq-info.c:163
+#, c-format
+msgid " Supported: %s\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164
+msgid "yes"
+msgstr ""
+
+#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164
+msgid "no"
+msgstr ""
+
+#: utils/cpufreq-info.c:164
+#, fuzzy, c-format
+msgid " Active: %s\n"
+msgstr " pilote : %s\n"
+
+#: utils/cpufreq-info.c:177
+#, c-format
+msgid " Boost States: %d\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:178
+#, c-format
+msgid " Total States: %d\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:181
+#, c-format
+msgid " Pstate-Pb%d: %luMHz (boost state)\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:184
+#, c-format
+msgid " Pstate-P%d: %luMHz\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:211
+#, c-format
+msgid " no or unknown cpufreq driver is active on this CPU\n"
+msgstr " pas de pilotes cpufreq reconnu pour ce CPU\n"
+
+#: utils/cpufreq-info.c:213
+#, c-format
+msgid " driver: %s\n"
+msgstr " pilote : %s\n"
+
+#: utils/cpufreq-info.c:219
+#, fuzzy, c-format
+msgid " CPUs which run at the same hardware frequency: "
+msgstr " CPUs qui doivent changer de fréquences en même temps : "
+
+#: utils/cpufreq-info.c:230
+#, fuzzy, c-format
+msgid " CPUs which need to have their frequency coordinated by software: "
+msgstr " CPUs qui doivent changer de fréquences en même temps : "
+
+#: utils/cpufreq-info.c:241
+#, c-format
+msgid " maximum transition latency: "
+msgstr ""
+
+#: utils/cpufreq-info.c:247
+#, c-format
+msgid " hardware limits: "
+msgstr " limitation matérielle : "
+
+#: utils/cpufreq-info.c:256
+#, c-format
+msgid " available frequency steps: "
+msgstr " plage de fréquence : "
+
+#: utils/cpufreq-info.c:269
+#, c-format
+msgid " available cpufreq governors: "
+msgstr " régulateurs disponibles : "
+
+#: utils/cpufreq-info.c:280
+#, c-format
+msgid " current policy: frequency should be within "
+msgstr " tactique actuelle : la fréquence doit être comprise entre "
+
+#: utils/cpufreq-info.c:282
+#, c-format
+msgid " and "
+msgstr " et "
+
+#: utils/cpufreq-info.c:286
+#, c-format
+msgid ""
+"The governor \"%s\" may decide which speed to use\n"
+" within this range.\n"
+msgstr ""
+"Le régulateur \"%s\" est libre de choisir la vitesse\n"
+" dans cette plage de fréquences.\n"
+
+#: utils/cpufreq-info.c:293
+#, c-format
+msgid " current CPU frequency is "
+msgstr " la fréquence actuelle de ce CPU est "
+
+#: utils/cpufreq-info.c:296
+#, c-format
+msgid " (asserted by call to hardware)"
+msgstr " (vérifié par un appel direct du matériel)"
+
+#: utils/cpufreq-info.c:304
+#, c-format
+msgid " cpufreq stats: "
+msgstr " des statistique concernant cpufreq:"
+
+#: utils/cpufreq-info.c:472
+#, fuzzy, c-format
+msgid "Usage: cpupower freqinfo [options]\n"
+msgstr "Usage : cpufreq-info [options]\n"
+
+#: utils/cpufreq-info.c:473 utils/cpufreq-set.c:26 utils/cpupower-set.c:23
+#: utils/cpupower-info.c:22 utils/cpuidle-info.c:148
+#, c-format
+msgid "Options:\n"
+msgstr "Options :\n"
+
+#: utils/cpufreq-info.c:474
+#, fuzzy, c-format
+msgid " -e, --debug Prints out debug information [default]\n"
+msgstr " -e, --debug Afficher les informations de déboguage\n"
+
+#: utils/cpufreq-info.c:475
+#, c-format
+msgid ""
+" -f, --freq Get frequency the CPU currently runs at, according\n"
+" to the cpufreq core *\n"
+msgstr ""
+" -f, --freq Obtenir la fréquence actuelle du CPU selon le point\n"
+" de vue du coeur du système de cpufreq *\n"
+
+#: utils/cpufreq-info.c:477
+#, c-format
+msgid ""
+" -w, --hwfreq Get frequency the CPU currently runs at, by reading\n"
+" it from hardware (only available to root) *\n"
+msgstr ""
+" -w, --hwfreq Obtenir la fréquence actuelle du CPU directement par\n"
+" le matériel (doit être root) *\n"
+
+#: utils/cpufreq-info.c:479
+#, c-format
+msgid ""
+" -l, --hwlimits Determine the minimum and maximum CPU frequency "
+"allowed *\n"
+msgstr ""
+" -l, --hwlimits Affiche les fréquences minimales et maximales du CPU "
+"*\n"
+
+#: utils/cpufreq-info.c:480
+#, c-format
+msgid " -d, --driver Determines the used cpufreq kernel driver *\n"
+msgstr " -d, --driver Affiche le pilote cpufreq utilisé *\n"
+
+#: utils/cpufreq-info.c:481
+#, c-format
+msgid " -p, --policy Gets the currently used cpufreq policy *\n"
+msgstr " -p, --policy Affiche la tactique actuelle de cpufreq *\n"
+
+#: utils/cpufreq-info.c:482
+#, c-format
+msgid " -g, --governors Determines available cpufreq governors *\n"
+msgstr ""
+" -g, --governors Affiche les régulateurs disponibles de cpufreq *\n"
+
+#: utils/cpufreq-info.c:483
+#, fuzzy, c-format
+msgid ""
+" -r, --related-cpus Determines which CPUs run at the same hardware "
+"frequency *\n"
+msgstr ""
+" -a, --affected-cpus Affiche quels sont les CPUs qui doivent changer de\n"
+" fréquences en même temps *\n"
+
+#: utils/cpufreq-info.c:484
+#, fuzzy, c-format
+msgid ""
+" -a, --affected-cpus Determines which CPUs need to have their frequency\n"
+" coordinated by software *\n"
+msgstr ""
+" -a, --affected-cpus Affiche quels sont les CPUs qui doivent changer de\n"
+" fréquences en même temps *\n"
+
+#: utils/cpufreq-info.c:486
+#, c-format
+msgid " -s, --stats Shows cpufreq statistics if available\n"
+msgstr ""
+" -s, --stats Indique des statistiques concernant cpufreq, si\n"
+" disponibles\n"
+
+#: utils/cpufreq-info.c:487
+#, fuzzy, c-format
+msgid ""
+" -y, --latency Determines the maximum latency on CPU frequency "
+"changes *\n"
+msgstr ""
+" -l, --hwlimits Affiche les fréquences minimales et maximales du CPU "
+"*\n"
+
+#: utils/cpufreq-info.c:488
+#, c-format
+msgid " -b, --boost Checks for turbo or boost modes *\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:489
+#, c-format
+msgid ""
+" -o, --proc Prints out information like provided by the /proc/"
+"cpufreq\n"
+" interface in 2.4. and early 2.6. kernels\n"
+msgstr ""
+" -o, --proc Affiche les informations en utilisant l'interface\n"
+" fournie par /proc/cpufreq, présente dans les "
+"versions\n"
+" 2.4 et les anciennes versions 2.6 du noyau\n"
+
+#: utils/cpufreq-info.c:491
+#, fuzzy, c-format
+msgid ""
+" -m, --human human-readable output for the -f, -w, -s and -y "
+"parameters\n"
+msgstr ""
+" -m, --human affiche dans un format lisible pour un humain\n"
+" pour les options -f, -w et -s (MHz, GHz)\n"
+
+#: utils/cpufreq-info.c:492 utils/cpuidle-info.c:152
+#, c-format
+msgid " -h, --help Prints out this screen\n"
+msgstr " -h, --help affiche l'aide-mémoire\n"
+
+#: utils/cpufreq-info.c:495
+#, c-format
+msgid ""
+"If no argument or only the -c, --cpu parameter is given, debug output about\n"
+"cpufreq is printed which is useful e.g. for reporting bugs.\n"
+msgstr ""
+"Par défaut, les informations de déboguage seront affichées si aucun\n"
+"argument, ou bien si seulement l'argument -c (--cpu) est donné, afin de\n"
+"faciliter les rapports de bogues par exemple\n"
+
+#: utils/cpufreq-info.c:497
+#, c-format
+msgid ""
+"For the arguments marked with *, omitting the -c or --cpu argument is\n"
+"equivalent to setting it to zero\n"
+msgstr "Les arguments avec un * utiliseront le CPU 0 si -c (--cpu) est omis\n"
+
+#: utils/cpufreq-info.c:580
+#, c-format
+msgid ""
+"The argument passed to this tool can't be combined with passing a --cpu "
+"argument\n"
+msgstr "Cette option est incompatible avec --cpu\n"
+
+#: utils/cpufreq-info.c:596
+#, c-format
+msgid ""
+"You can't specify more than one --cpu parameter and/or\n"
+"more than one output-specific argument\n"
+msgstr ""
+"On ne peut indiquer plus d'un paramètre --cpu, tout comme l'on ne peut\n"
+"spécifier plus d'un argument de formatage\n"
+
+#: utils/cpufreq-info.c:600 utils/cpufreq-set.c:82 utils/cpupower-set.c:42
+#: utils/cpupower-info.c:42 utils/cpuidle-info.c:213
+#, c-format
+msgid "invalid or unknown argument\n"
+msgstr "option invalide\n"
+
+#: utils/cpufreq-info.c:617
+#, c-format
+msgid "couldn't analyze CPU %d as it doesn't seem to be present\n"
+msgstr "analyse du CPU %d impossible puisqu'il ne semble pas être présent\n"
+
+#: utils/cpufreq-info.c:620 utils/cpupower-info.c:142
+#, c-format
+msgid "analyzing CPU %d:\n"
+msgstr "analyse du CPU %d :\n"
+
+#: utils/cpufreq-set.c:25
+#, fuzzy, c-format
+msgid "Usage: cpupower frequency-set [options]\n"
+msgstr "Usage : cpufreq-set [options]\n"
+
+#: utils/cpufreq-set.c:27
+#, c-format
+msgid ""
+" -d FREQ, --min FREQ new minimum CPU frequency the governor may "
+"select\n"
+msgstr ""
+" -d FREQ, --min FREQ nouvelle fréquence minimale du CPU à utiliser\n"
+" par le régulateur\n"
+
+#: utils/cpufreq-set.c:28
+#, c-format
+msgid ""
+" -u FREQ, --max FREQ new maximum CPU frequency the governor may "
+"select\n"
+msgstr ""
+" -u FREQ, --max FREQ nouvelle fréquence maximale du CPU à utiliser\n"
+" par le régulateur\n"
+
+#: utils/cpufreq-set.c:29
+#, c-format
+msgid " -g GOV, --governor GOV new cpufreq governor\n"
+msgstr " -g GOV, --governor GOV active le régulateur GOV\n"
+
+#: utils/cpufreq-set.c:30
+#, c-format
+msgid ""
+" -f FREQ, --freq FREQ specific frequency to be set. Requires userspace\n"
+" governor to be available and loaded\n"
+msgstr ""
+" -f FREQ, --freq FREQ fixe la fréquence du processeur à FREQ. Il faut\n"
+" que le régulateur « userspace » soit disponible \n"
+" et activé.\n"
+
+#: utils/cpufreq-set.c:32
+#, c-format
+msgid " -r, --related Switches all hardware-related CPUs\n"
+msgstr ""
+
+#: utils/cpufreq-set.c:33 utils/cpupower-set.c:28 utils/cpupower-info.c:27
+#, fuzzy, c-format
+msgid " -h, --help Prints out this screen\n"
+msgstr " -h, --help affiche l'aide-mémoire\n"
+
+#: utils/cpufreq-set.c:35
+#, fuzzy, c-format
+msgid ""
+"Notes:\n"
+"1. Omitting the -c or --cpu argument is equivalent to setting it to \"all\"\n"
+msgstr "Les arguments avec un * utiliseront le CPU 0 si -c (--cpu) est omis\n"
+
+#: utils/cpufreq-set.c:37
+#, fuzzy, c-format
+msgid ""
+"2. The -f FREQ, --freq FREQ parameter cannot be combined with any other "
+"parameter\n"
+" except the -c CPU, --cpu CPU parameter\n"
+"3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n"
+" by postfixing the value with the wanted unit name, without any space\n"
+" (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"
+msgstr ""
+"Remarque :\n"
+"1. Le CPU numéro 0 sera utilisé par défaut si -c (ou --cpu) est omis ;\n"
+"2. l'argument -f FREQ (ou --freq FREQ) ne peut être utilisé qu'avec --cpu ;\n"
+"3. on pourra préciser l'unité des fréquences en postfixant sans aucune "
+"espace\n"
+" les valeurs par hz, kHz (par défaut), MHz, GHz ou THz\n"
+" (kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"
+
+#: utils/cpufreq-set.c:57
+#, c-format
+msgid ""
+"Error setting new values. Common errors:\n"
+"- Do you have proper administration rights? (super-user?)\n"
+"- Is the governor you requested available and modprobed?\n"
+"- Trying to set an invalid policy?\n"
+"- Trying to set a specific frequency, but userspace governor is not "
+"available,\n"
+" for example because of hardware which cannot be set to a specific "
+"frequency\n"
+" or because the userspace governor isn't loaded?\n"
+msgstr ""
+"En ajustant les nouveaux paramètres, une erreur est apparue. Les sources\n"
+"d'erreur typique sont :\n"
+"- droit d'administration insuffisant (êtes-vous root ?) ;\n"
+"- le régulateur choisi n'est pas disponible, ou bien n'est pas disponible "
+"en\n"
+" tant que module noyau ;\n"
+"- la tactique n'est pas disponible ;\n"
+"- vous voulez utiliser l'option -f/--freq, mais le régulateur « userspace »\n"
+" n'est pas disponible, par exemple parce que le matériel ne le supporte\n"
+" pas, ou bien n'est tout simplement pas chargé.\n"
+
+#: utils/cpufreq-set.c:170
+#, c-format
+msgid "wrong, unknown or unhandled CPU?\n"
+msgstr "CPU inconnu ou non supporté ?\n"
+
+#: utils/cpufreq-set.c:302
+#, c-format
+msgid ""
+"the -f/--freq parameter cannot be combined with -d/--min, -u/--max or\n"
+"-g/--governor parameters\n"
+msgstr ""
+"l'option -f/--freq est incompatible avec les options -d/--min, -u/--max et\n"
+"-g/--governor\n"
+
+#: utils/cpufreq-set.c:308
+#, c-format
+msgid ""
+"At least one parameter out of -f/--freq, -d/--min, -u/--max, and\n"
+"-g/--governor must be passed\n"
+msgstr ""
+"L'un de ces paramètres est obligatoire : -f/--freq, -d/--min, -u/--max et\n"
+"-g/--governor\n"
+
+#: utils/cpufreq-set.c:347
+#, c-format
+msgid "Setting cpu: %d\n"
+msgstr ""
+
+#: utils/cpupower-set.c:22
+#, c-format
+msgid "Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:24
+#, c-format
+msgid ""
+" -b, --perf-bias [VAL] Sets CPU's power vs performance policy on some\n"
+" Intel models [0-15], see manpage for details\n"
+msgstr ""
+
+#: utils/cpupower-set.c:26
+#, c-format
+msgid ""
+" -m, --sched-mc [VAL] Sets the kernel's multi core scheduler policy.\n"
+msgstr ""
+
+#: utils/cpupower-set.c:27
+#, c-format
+msgid ""
+" -s, --sched-smt [VAL] Sets the kernel's thread sibling scheduler "
+"policy.\n"
+msgstr ""
+
+#: utils/cpupower-set.c:80
+#, c-format
+msgid "--perf-bias param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:91
+#, c-format
+msgid "--sched-mc param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:102
+#, c-format
+msgid "--sched-smt param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:121
+#, c-format
+msgid "Error setting sched-mc %s\n"
+msgstr ""
+
+#: utils/cpupower-set.c:127
+#, c-format
+msgid "Error setting sched-smt %s\n"
+msgstr ""
+
+#: utils/cpupower-set.c:146
+#, c-format
+msgid "Error setting perf-bias value on CPU %d\n"
+msgstr ""
+
+#: utils/cpupower-info.c:21
+#, c-format
+msgid "Usage: cpupower info [ -b ] [ -m ] [ -s ]\n"
+msgstr ""
+
+#: utils/cpupower-info.c:23
+#, c-format
+msgid ""
+" -b, --perf-bias Gets CPU's power vs performance policy on some\n"
+" Intel models [0-15], see manpage for details\n"
+msgstr ""
+
+#: utils/cpupower-info.c:25
+#, fuzzy, c-format
+msgid " -m, --sched-mc Gets the kernel's multi core scheduler policy.\n"
+msgstr " -p, --policy Affiche la tactique actuelle de cpufreq *\n"
+
+#: utils/cpupower-info.c:26
+#, c-format
+msgid ""
+" -s, --sched-smt Gets the kernel's thread sibling scheduler policy.\n"
+msgstr ""
+
+#: utils/cpupower-info.c:28
+#, c-format
+msgid ""
+"\n"
+"Passing no option will show all info, by default only on core 0\n"
+msgstr ""
+
+#: utils/cpupower-info.c:102
+#, c-format
+msgid "System's multi core scheduler setting: "
+msgstr ""
+
+#. if sysfs file is missing it's: errno == ENOENT
+#: utils/cpupower-info.c:105 utils/cpupower-info.c:114
+#, c-format
+msgid "not supported\n"
+msgstr ""
+
+#: utils/cpupower-info.c:111
+#, c-format
+msgid "System's thread sibling scheduler setting: "
+msgstr ""
+
+#: utils/cpupower-info.c:126
+#, c-format
+msgid "Intel's performance bias setting needs root privileges\n"
+msgstr ""
+
+#: utils/cpupower-info.c:128
+#, c-format
+msgid "System does not support Intel's performance bias setting\n"
+msgstr ""
+
+#: utils/cpupower-info.c:147
+#, c-format
+msgid "Could not read perf-bias value\n"
+msgstr ""
+
+#: utils/cpupower-info.c:150
+#, c-format
+msgid "perf-bias: %d\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:28
+#, fuzzy, c-format
+msgid "Analyzing CPU %d:\n"
+msgstr "analyse du CPU %d :\n"
+
+#: utils/cpuidle-info.c:32
+#, c-format
+msgid "CPU %u: No idle states\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:36
+#, c-format
+msgid "CPU %u: Can't read idle state info\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:41
+#, c-format
+msgid "Could not determine max idle state %u\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:46
+#, c-format
+msgid "Number of idle states: %d\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:48
+#, fuzzy, c-format
+msgid "Available idle states:"
+msgstr " plage de fréquence : "
+
+#: utils/cpuidle-info.c:71
+#, c-format
+msgid "Flags/Description: %s\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:74
+#, c-format
+msgid "Latency: %lu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:76
+#, c-format
+msgid "Usage: %lu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:78
+#, c-format
+msgid "Duration: %llu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:90
+#, c-format
+msgid "Could not determine cpuidle driver\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:94
+#, fuzzy, c-format
+msgid "CPUidle driver: %s\n"
+msgstr " pilote : %s\n"
+
+#: utils/cpuidle-info.c:99
+#, c-format
+msgid "Could not determine cpuidle governor\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:103
+#, c-format
+msgid "CPUidle governor: %s\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:122
+#, c-format
+msgid "CPU %u: Can't read C-state info\n"
+msgstr ""
+
+#. printf("Cstates: %d\n", cstates);
+#: utils/cpuidle-info.c:127
+#, c-format
+msgid "active state: C0\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:128
+#, c-format
+msgid "max_cstate: C%u\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:129
+#, c-format
+msgid "maximum allowed latency: %lu usec\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:130
+#, c-format
+msgid "states:\t\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:132
+#, c-format
+msgid " C%d: type[C%d] "
+msgstr ""
+
+#: utils/cpuidle-info.c:134
+#, c-format
+msgid "promotion[--] demotion[--] "
+msgstr ""
+
+#: utils/cpuidle-info.c:135
+#, c-format
+msgid "latency[%03lu] "
+msgstr ""
+
+#: utils/cpuidle-info.c:137
+#, c-format
+msgid "usage[%08lu] "
+msgstr ""
+
+#: utils/cpuidle-info.c:139
+#, c-format
+msgid "duration[%020Lu] \n"
+msgstr ""
+
+#: utils/cpuidle-info.c:147
+#, fuzzy, c-format
+msgid "Usage: cpupower idleinfo [options]\n"
+msgstr "Usage : cpufreq-info [options]\n"
+
+#: utils/cpuidle-info.c:149
+#, fuzzy, c-format
+msgid " -s, --silent Only show general C-state information\n"
+msgstr " -e, --debug Afficher les informations de déboguage\n"
+
+#: utils/cpuidle-info.c:150
+#, fuzzy, c-format
+msgid ""
+" -o, --proc Prints out information like provided by the /proc/"
+"acpi/processor/*/power\n"
+" interface in older kernels\n"
+msgstr ""
+" -o, --proc Affiche les informations en utilisant l'interface\n"
+" fournie par /proc/cpufreq, présente dans les "
+"versions\n"
+" 2.4 et les anciennes versions 2.6 du noyau\n"
+
+#: utils/cpuidle-info.c:209
+#, fuzzy, c-format
+msgid "You can't specify more than one output-specific argument\n"
+msgstr ""
+"On ne peut indiquer plus d'un paramètre --cpu, tout comme l'on ne peut\n"
+"spécifier plus d'un argument de formatage\n"
+
+#~ msgid ""
+#~ " -c CPU, --cpu CPU CPU number which information shall be determined "
+#~ "about\n"
+#~ msgstr ""
+#~ " -c CPU, --cpu CPU Numéro du CPU pour lequel l'information sera "
+#~ "affichée\n"
+
+#~ msgid ""
+#~ " -c CPU, --cpu CPU number of CPU where cpufreq settings shall be "
+#~ "modified\n"
+#~ msgstr ""
+#~ " -c CPU, --cpu CPU numéro du CPU à prendre en compte pour les\n"
+#~ " changements\n"
diff --git a/tools/power/cpupower/po/it.po b/tools/power/cpupower/po/it.po
new file mode 100644
index 000000000..f80c4ddb9
--- /dev/null
+++ b/tools/power/cpupower/po/it.po
@@ -0,0 +1,961 @@
+# Italian translations for cpufrequtils package
+# Copyright (C) 2004-2009
+# This file is distributed under the same license as the cpufrequtils package.
+# Mattia Dongili <malattia@gmail.com>.
+#
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: cpufrequtils 0.3\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2011-03-08 17:03+0100\n"
+"PO-Revision-Date: 2009-08-15 12:00+0900\n"
+"Last-Translator: Mattia Dongili <malattia@gmail.com>\n"
+"Language-Team: NONE\n"
+"Language: \n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: utils/idle_monitor/nhm_idle.c:36
+msgid "Processor Core C3"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:43
+msgid "Processor Core C6"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:51
+msgid "Processor Package C3"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:58 utils/idle_monitor/amd_fam14h_idle.c:70
+msgid "Processor Package C6"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:33
+msgid "Processor Core C7"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:40
+msgid "Processor Package C2"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:47
+msgid "Processor Package C7"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:56
+msgid "Package in sleep state (PC1 or deeper)"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:63
+msgid "Processor Package C1"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:77
+msgid "North Bridge P1 boolean counter (returns 0 or 1)"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:35
+msgid "Processor Core not idle"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:42
+msgid "Processor Core in an idle state"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:50
+msgid "Average Frequency (including boost) in MHz"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:66
+#, c-format
+msgid ""
+"cpupower monitor: [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i "
+"interval_sec | -c command ...]\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:69
+#, c-format
+msgid ""
+"cpupower monitor: [-v] [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i "
+"interval_sec | -c command ...]\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:71
+#, c-format
+msgid "\t -v: be more verbose\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:73
+#, c-format
+msgid "\t -h: print this help\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:74
+#, c-format
+msgid "\t -i: time intervall to measure for in seconds (default 1)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:75
+#, c-format
+msgid "\t -t: show CPU topology/hierarchy\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:76
+#, c-format
+msgid "\t -l: list available CPU sleep monitors (for use with -m)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:77
+#, c-format
+msgid "\t -m: show specific CPU sleep monitors only (in same order)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:79
+#, c-format
+msgid ""
+"only one of: -t, -l, -m are allowed\n"
+"If none of them is passed,"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:80
+#, c-format
+msgid " all supported monitors are shown\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:197
+#, c-format
+msgid "Monitor %s, Counter %s has no count function. Implementation error\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:207
+#, c-format
+msgid " *is offline\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:236
+#, c-format
+msgid "%s: max monitor name length (%d) exceeded\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:250
+#, c-format
+msgid "No matching monitor found in %s, try -l option\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:266
+#, c-format
+msgid "Monitor \"%s\" (%d states) - Might overflow after %u s\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:319
+#, c-format
+msgid "%s took %.5f seconds and exited with status %d\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:406
+#, c-format
+msgid "Cannot read number of available processors\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:417
+#, c-format
+msgid "Available monitor %s needs root access\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:428
+#, c-format
+msgid "No HW Cstate monitors found\n"
+msgstr ""
+
+#: utils/cpupower.c:78
+#, c-format
+msgid "cpupower [ -c cpulist ] subcommand [ARGS]\n"
+msgstr ""
+
+#: utils/cpupower.c:79
+#, c-format
+msgid "cpupower --version\n"
+msgstr ""
+
+#: utils/cpupower.c:80
+#, c-format
+msgid "Supported subcommands are:\n"
+msgstr ""
+
+#: utils/cpupower.c:83
+#, c-format
+msgid ""
+"\n"
+"Some subcommands can make use of the -c cpulist option.\n"
+msgstr ""
+
+#: utils/cpupower.c:84
+#, c-format
+msgid "Look at the general cpupower manpage how to use it\n"
+msgstr ""
+
+#: utils/cpupower.c:85
+#, c-format
+msgid "and read up the subcommand's manpage whether it is supported.\n"
+msgstr ""
+
+#: utils/cpupower.c:86
+#, c-format
+msgid ""
+"\n"
+"Use cpupower help subcommand for getting help for above subcommands.\n"
+msgstr ""
+
+#: utils/cpupower.c:91
+#, c-format
+msgid "Report errors and bugs to %s, please.\n"
+msgstr "Per favore, comunicare errori e malfunzionamenti a %s.\n"
+
+#: utils/cpupower.c:114
+#, c-format
+msgid "Error parsing cpu list\n"
+msgstr ""
+
+#: utils/cpupower.c:172
+#, c-format
+msgid "Subcommand %s needs root privileges\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:31
+#, c-format
+msgid "Couldn't count the number of CPUs (%s: %s), assuming 1\n"
+msgstr "Impossibile determinare il numero di CPU (%s: %s), assumo sia 1\n"
+
+#: utils/cpufreq-info.c:63
+#, c-format
+msgid ""
+" minimum CPU frequency - maximum CPU frequency - governor\n"
+msgstr ""
+" frequenza minima CPU - frequenza massima CPU - gestore\n"
+
+#: utils/cpufreq-info.c:151
+#, c-format
+msgid "Error while evaluating Boost Capabilities on CPU %d -- are you root?\n"
+msgstr ""
+
+#. P state changes via MSR are identified via cpuid 80000007
+#. on Intel and AMD, but we assume boost capable machines can do that
+#. if (cpuid_eax(0x80000000) >= 0x80000007
+#. && (cpuid_edx(0x80000007) & (1 << 7)))
+#.
+#: utils/cpufreq-info.c:161
+#, c-format
+msgid " boost state support: \n"
+msgstr ""
+
+#: utils/cpufreq-info.c:163
+#, c-format
+msgid " Supported: %s\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164
+msgid "yes"
+msgstr ""
+
+#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164
+msgid "no"
+msgstr ""
+
+#: utils/cpufreq-info.c:164
+#, fuzzy, c-format
+msgid " Active: %s\n"
+msgstr " modulo %s\n"
+
+#: utils/cpufreq-info.c:177
+#, c-format
+msgid " Boost States: %d\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:178
+#, c-format
+msgid " Total States: %d\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:181
+#, c-format
+msgid " Pstate-Pb%d: %luMHz (boost state)\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:184
+#, c-format
+msgid " Pstate-P%d: %luMHz\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:211
+#, c-format
+msgid " no or unknown cpufreq driver is active on this CPU\n"
+msgstr " nessun modulo o modulo cpufreq sconosciuto per questa CPU\n"
+
+#: utils/cpufreq-info.c:213
+#, c-format
+msgid " driver: %s\n"
+msgstr " modulo %s\n"
+
+#: utils/cpufreq-info.c:219
+#, c-format
+msgid " CPUs which run at the same hardware frequency: "
+msgstr " CPU che operano alla stessa frequenza hardware: "
+
+#: utils/cpufreq-info.c:230
+#, c-format
+msgid " CPUs which need to have their frequency coordinated by software: "
+msgstr " CPU che è necessario siano coordinate dal software: "
+
+#: utils/cpufreq-info.c:241
+#, c-format
+msgid " maximum transition latency: "
+msgstr " latenza massima durante la transizione: "
+
+#: utils/cpufreq-info.c:247
+#, c-format
+msgid " hardware limits: "
+msgstr " limiti hardware: "
+
+#: utils/cpufreq-info.c:256
+#, c-format
+msgid " available frequency steps: "
+msgstr " frequenze disponibili: "
+
+#: utils/cpufreq-info.c:269
+#, c-format
+msgid " available cpufreq governors: "
+msgstr " gestori disponibili: "
+
+#: utils/cpufreq-info.c:280
+#, c-format
+msgid " current policy: frequency should be within "
+msgstr " gestore attuale: la frequenza deve mantenersi tra "
+
+#: utils/cpufreq-info.c:282
+#, c-format
+msgid " and "
+msgstr " e "
+
+#: utils/cpufreq-info.c:286
+#, c-format
+msgid ""
+"The governor \"%s\" may decide which speed to use\n"
+" within this range.\n"
+msgstr ""
+" Il gestore \"%s\" può decidere quale velocità usare\n"
+" in questo intervallo.\n"
+
+#: utils/cpufreq-info.c:293
+#, c-format
+msgid " current CPU frequency is "
+msgstr " la frequenza attuale della CPU è "
+
+#: utils/cpufreq-info.c:296
+#, c-format
+msgid " (asserted by call to hardware)"
+msgstr " (ottenuta da una chiamata diretta all'hardware)"
+
+#: utils/cpufreq-info.c:304
+#, c-format
+msgid " cpufreq stats: "
+msgstr " statistiche cpufreq:"
+
+#: utils/cpufreq-info.c:472
+#, fuzzy, c-format
+msgid "Usage: cpupower freqinfo [options]\n"
+msgstr "Uso: cpufreq-info [opzioni]\n"
+
+#: utils/cpufreq-info.c:473 utils/cpufreq-set.c:26 utils/cpupower-set.c:23
+#: utils/cpupower-info.c:22 utils/cpuidle-info.c:148
+#, c-format
+msgid "Options:\n"
+msgstr "Opzioni:\n"
+
+#: utils/cpufreq-info.c:474
+#, fuzzy, c-format
+msgid " -e, --debug Prints out debug information [default]\n"
+msgstr " -e, --debug Mostra informazioni di debug\n"
+
+#: utils/cpufreq-info.c:475
+#, c-format
+msgid ""
+" -f, --freq Get frequency the CPU currently runs at, according\n"
+" to the cpufreq core *\n"
+msgstr ""
+" -f, --freq Mostra la frequenza attuale della CPU secondo\n"
+" il modulo cpufreq *\n"
+
+#: utils/cpufreq-info.c:477
+#, c-format
+msgid ""
+" -w, --hwfreq Get frequency the CPU currently runs at, by reading\n"
+" it from hardware (only available to root) *\n"
+msgstr ""
+" -w, --hwfreq Mostra la frequenza attuale della CPU leggendola\n"
+" dall'hardware (disponibile solo per l'utente root) *\n"
+
+#: utils/cpufreq-info.c:479
+#, c-format
+msgid ""
+" -l, --hwlimits Determine the minimum and maximum CPU frequency "
+"allowed *\n"
+msgstr ""
+" -l, --hwlimits Determina le frequenze minima e massima possibili per "
+"la CPU *\n"
+
+#: utils/cpufreq-info.c:480
+#, c-format
+msgid " -d, --driver Determines the used cpufreq kernel driver *\n"
+msgstr ""
+" -d, --driver Determina il modulo cpufreq del kernel in uso *\n"
+
+#: utils/cpufreq-info.c:481
+#, c-format
+msgid " -p, --policy Gets the currently used cpufreq policy *\n"
+msgstr ""
+" -p, --policy Mostra il gestore cpufreq attualmente in uso *\n"
+
+#: utils/cpufreq-info.c:482
+#, c-format
+msgid " -g, --governors Determines available cpufreq governors *\n"
+msgstr " -g, --governors Determina i gestori cpufreq disponibili *\n"
+
+#: utils/cpufreq-info.c:483
+#, c-format
+msgid ""
+" -r, --related-cpus Determines which CPUs run at the same hardware "
+"frequency *\n"
+msgstr ""
+" -r, --related-cpus Determina quali CPU operano alla stessa frequenza *\n"
+
+#: utils/cpufreq-info.c:484
+#, c-format
+msgid ""
+" -a, --affected-cpus Determines which CPUs need to have their frequency\n"
+" coordinated by software *\n"
+msgstr ""
+" -a, --affected-cpus Determina quali CPU devono avere la frequenza\n"
+" coordinata dal software *\n"
+
+#: utils/cpufreq-info.c:486
+#, c-format
+msgid " -s, --stats Shows cpufreq statistics if available\n"
+msgstr " -s, --stats Mostra le statistiche se disponibili\n"
+
+#: utils/cpufreq-info.c:487
+#, c-format
+msgid ""
+" -y, --latency Determines the maximum latency on CPU frequency "
+"changes *\n"
+msgstr ""
+" -y, --latency Determina la latenza massima durante i cambi di "
+"frequenza *\n"
+
+#: utils/cpufreq-info.c:488
+#, c-format
+msgid " -b, --boost Checks for turbo or boost modes *\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:489
+#, c-format
+msgid ""
+" -o, --proc Prints out information like provided by the /proc/"
+"cpufreq\n"
+" interface in 2.4. and early 2.6. kernels\n"
+msgstr ""
+" -o, --proc Stampa le informazioni come se provenissero dalla\n"
+" interfaccia cpufreq /proc/ presente nei kernel\n"
+" 2.4 ed i primi 2.6\n"
+
+#: utils/cpufreq-info.c:491
+#, c-format
+msgid ""
+" -m, --human human-readable output for the -f, -w, -s and -y "
+"parameters\n"
+msgstr ""
+" -m, --human formatta l'output delle opzioni -f, -w, -s e -y in "
+"maniera\n"
+" leggibile da un essere umano\n"
+
+#: utils/cpufreq-info.c:492 utils/cpuidle-info.c:152
+#, c-format
+msgid " -h, --help Prints out this screen\n"
+msgstr " -h, --help Stampa questa schermata\n"
+
+#: utils/cpufreq-info.c:495
+#, c-format
+msgid ""
+"If no argument or only the -c, --cpu parameter is given, debug output about\n"
+"cpufreq is printed which is useful e.g. for reporting bugs.\n"
+msgstr ""
+"Se non viene specificata nessuna opzione o viene specificata solo l'opzione -"
+"c, --cpu,\n"
+"le informazioni di debug per cpufreq saranno utili ad esempio a riportare i "
+"bug.\n"
+
+#: utils/cpufreq-info.c:497
+#, c-format
+msgid ""
+"For the arguments marked with *, omitting the -c or --cpu argument is\n"
+"equivalent to setting it to zero\n"
+msgstr ""
+"Per le opzioni segnalate con *, omettere l'opzione -c o --cpu è come "
+"specificarla\n"
+"con il valore 0\n"
+
+#: utils/cpufreq-info.c:580
+#, c-format
+msgid ""
+"The argument passed to this tool can't be combined with passing a --cpu "
+"argument\n"
+msgstr ""
+"L'opzione specificata a questo programma non può essere combinata con --cpu\n"
+
+#: utils/cpufreq-info.c:596
+#, c-format
+msgid ""
+"You can't specify more than one --cpu parameter and/or\n"
+"more than one output-specific argument\n"
+msgstr ""
+"Non è possibile specificare più di una volta l'opzione --cpu e/o\n"
+"specificare più di un parametro di output specifico\n"
+
+#: utils/cpufreq-info.c:600 utils/cpufreq-set.c:82 utils/cpupower-set.c:42
+#: utils/cpupower-info.c:42 utils/cpuidle-info.c:213
+#, c-format
+msgid "invalid or unknown argument\n"
+msgstr "opzione sconosciuta o non valida\n"
+
+#: utils/cpufreq-info.c:617
+#, c-format
+msgid "couldn't analyze CPU %d as it doesn't seem to be present\n"
+msgstr "impossibile analizzare la CPU %d poiché non sembra essere presente\n"
+
+#: utils/cpufreq-info.c:620 utils/cpupower-info.c:142
+#, c-format
+msgid "analyzing CPU %d:\n"
+msgstr "analisi della CPU %d:\n"
+
+#: utils/cpufreq-set.c:25
+#, fuzzy, c-format
+msgid "Usage: cpupower frequency-set [options]\n"
+msgstr "Uso: cpufreq-set [opzioni]\n"
+
+#: utils/cpufreq-set.c:27
+#, c-format
+msgid ""
+" -d FREQ, --min FREQ new minimum CPU frequency the governor may "
+"select\n"
+msgstr ""
+" -d FREQ, --min FREQ la nuova frequenza minima che il gestore cpufreq "
+"può scegliere\n"
+
+#: utils/cpufreq-set.c:28
+#, c-format
+msgid ""
+" -u FREQ, --max FREQ new maximum CPU frequency the governor may "
+"select\n"
+msgstr ""
+" -u FREQ, --max FREQ la nuova frequenza massima che il gestore cpufreq "
+"può scegliere\n"
+
+#: utils/cpufreq-set.c:29
+#, c-format
+msgid " -g GOV, --governor GOV new cpufreq governor\n"
+msgstr " -g GOV, --governor GOV nuovo gestore cpufreq\n"
+
+#: utils/cpufreq-set.c:30
+#, c-format
+msgid ""
+" -f FREQ, --freq FREQ specific frequency to be set. Requires userspace\n"
+" governor to be available and loaded\n"
+msgstr ""
+" -f FREQ, --freq FREQ specifica la frequenza a cui impostare la CPU.\n"
+" È necessario che il gestore userspace sia "
+"disponibile e caricato\n"
+
+#: utils/cpufreq-set.c:32
+#, c-format
+msgid " -r, --related Switches all hardware-related CPUs\n"
+msgstr ""
+" -r, --related Modifica tutte le CPU coordinate dall'hardware\n"
+
+#: utils/cpufreq-set.c:33 utils/cpupower-set.c:28 utils/cpupower-info.c:27
+#, c-format
+msgid " -h, --help Prints out this screen\n"
+msgstr " -h, --help Stampa questa schermata\n"
+
+#: utils/cpufreq-set.c:35
+#, fuzzy, c-format
+msgid ""
+"Notes:\n"
+"1. Omitting the -c or --cpu argument is equivalent to setting it to \"all\"\n"
+msgstr ""
+"Per le opzioni segnalate con *, omettere l'opzione -c o --cpu è come "
+"specificarla\n"
+"con il valore 0\n"
+
+#: utils/cpufreq-set.c:37
+#, fuzzy, c-format
+msgid ""
+"2. The -f FREQ, --freq FREQ parameter cannot be combined with any other "
+"parameter\n"
+" except the -c CPU, --cpu CPU parameter\n"
+"3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n"
+" by postfixing the value with the wanted unit name, without any space\n"
+" (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"
+msgstr ""
+"Note:\n"
+"1. Omettere l'opzione -c o --cpu è equivalente a impostarlo a 0\n"
+"2. l'opzione -f FREQ, --freq FREQ non può essere specificata con altre "
+"opzioni\n"
+" ad eccezione dell'opzione -c CPU o --cpu CPU\n"
+"3. le FREQuenze possono essere specuficate in Hz, kHz (default), MHz, GHz, "
+"or THz\n"
+" postponendo l'unità di misura al valore senza nessuno spazio fra loro\n"
+" (FREQuenza in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"
+
+#: utils/cpufreq-set.c:57
+#, c-format
+msgid ""
+"Error setting new values. Common errors:\n"
+"- Do you have proper administration rights? (super-user?)\n"
+"- Is the governor you requested available and modprobed?\n"
+"- Trying to set an invalid policy?\n"
+"- Trying to set a specific frequency, but userspace governor is not "
+"available,\n"
+" for example because of hardware which cannot be set to a specific "
+"frequency\n"
+" or because the userspace governor isn't loaded?\n"
+msgstr ""
+"Si sono verificati degli errori impostando i nuovi valori.\n"
+"Alcuni errori comuni possono essere:\n"
+"- Hai i necessari diritti di amministrazione? (super-user?)\n"
+"- Il gestore che hai richiesto è disponibile e caricato?\n"
+"- Stai provando ad impostare una politica di gestione non valida?\n"
+"- Stai provando a impostare una specifica frequenza ma il gestore\n"
+" userspace non è disponibile, per esempio a causa dell'hardware\n"
+" che non supporta frequenze fisse o a causa del fatto che\n"
+" il gestore userspace non è caricato?\n"
+
+#: utils/cpufreq-set.c:170
+#, c-format
+msgid "wrong, unknown or unhandled CPU?\n"
+msgstr "CPU errata, sconosciuta o non gestita?\n"
+
+#: utils/cpufreq-set.c:302
+#, c-format
+msgid ""
+"the -f/--freq parameter cannot be combined with -d/--min, -u/--max or\n"
+"-g/--governor parameters\n"
+msgstr ""
+"l'opzione -f/--freq non può venire combinata con i parametri\n"
+" -d/--min, -u/--max o -g/--governor\n"
+
+#: utils/cpufreq-set.c:308
+#, c-format
+msgid ""
+"At least one parameter out of -f/--freq, -d/--min, -u/--max, and\n"
+"-g/--governor must be passed\n"
+msgstr ""
+"Almeno una delle opzioni -f/--freq, -d/--min, -u/--max, e -g/--governor\n"
+"deve essere specificata\n"
+
+#: utils/cpufreq-set.c:347
+#, c-format
+msgid "Setting cpu: %d\n"
+msgstr ""
+
+#: utils/cpupower-set.c:22
+#, c-format
+msgid "Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:24
+#, c-format
+msgid ""
+" -b, --perf-bias [VAL] Sets CPU's power vs performance policy on some\n"
+" Intel models [0-15], see manpage for details\n"
+msgstr ""
+
+#: utils/cpupower-set.c:26
+#, c-format
+msgid ""
+" -m, --sched-mc [VAL] Sets the kernel's multi core scheduler policy.\n"
+msgstr ""
+
+#: utils/cpupower-set.c:27
+#, c-format
+msgid ""
+" -s, --sched-smt [VAL] Sets the kernel's thread sibling scheduler "
+"policy.\n"
+msgstr ""
+
+#: utils/cpupower-set.c:80
+#, c-format
+msgid "--perf-bias param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:91
+#, c-format
+msgid "--sched-mc param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:102
+#, c-format
+msgid "--sched-smt param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:121
+#, c-format
+msgid "Error setting sched-mc %s\n"
+msgstr ""
+
+#: utils/cpupower-set.c:127
+#, c-format
+msgid "Error setting sched-smt %s\n"
+msgstr ""
+
+#: utils/cpupower-set.c:146
+#, c-format
+msgid "Error setting perf-bias value on CPU %d\n"
+msgstr ""
+
+#: utils/cpupower-info.c:21
+#, c-format
+msgid "Usage: cpupower info [ -b ] [ -m ] [ -s ]\n"
+msgstr ""
+
+#: utils/cpupower-info.c:23
+#, c-format
+msgid ""
+" -b, --perf-bias Gets CPU's power vs performance policy on some\n"
+" Intel models [0-15], see manpage for details\n"
+msgstr ""
+
+#: utils/cpupower-info.c:25
+#, fuzzy, c-format
+msgid " -m, --sched-mc Gets the kernel's multi core scheduler policy.\n"
+msgstr ""
+" -p, --policy Mostra il gestore cpufreq attualmente in uso *\n"
+
+#: utils/cpupower-info.c:26
+#, c-format
+msgid ""
+" -s, --sched-smt Gets the kernel's thread sibling scheduler policy.\n"
+msgstr ""
+
+#: utils/cpupower-info.c:28
+#, c-format
+msgid ""
+"\n"
+"Passing no option will show all info, by default only on core 0\n"
+msgstr ""
+
+#: utils/cpupower-info.c:102
+#, c-format
+msgid "System's multi core scheduler setting: "
+msgstr ""
+
+#. if sysfs file is missing it's: errno == ENOENT
+#: utils/cpupower-info.c:105 utils/cpupower-info.c:114
+#, c-format
+msgid "not supported\n"
+msgstr ""
+
+#: utils/cpupower-info.c:111
+#, c-format
+msgid "System's thread sibling scheduler setting: "
+msgstr ""
+
+#: utils/cpupower-info.c:126
+#, c-format
+msgid "Intel's performance bias setting needs root privileges\n"
+msgstr ""
+
+#: utils/cpupower-info.c:128
+#, c-format
+msgid "System does not support Intel's performance bias setting\n"
+msgstr ""
+
+#: utils/cpupower-info.c:147
+#, c-format
+msgid "Could not read perf-bias value\n"
+msgstr ""
+
+#: utils/cpupower-info.c:150
+#, c-format
+msgid "perf-bias: %d\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:28
+#, fuzzy, c-format
+msgid "Analyzing CPU %d:\n"
+msgstr "analisi della CPU %d:\n"
+
+#: utils/cpuidle-info.c:32
+#, c-format
+msgid "CPU %u: No idle states\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:36
+#, c-format
+msgid "CPU %u: Can't read idle state info\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:41
+#, c-format
+msgid "Could not determine max idle state %u\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:46
+#, c-format
+msgid "Number of idle states: %d\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:48
+#, fuzzy, c-format
+msgid "Available idle states:"
+msgstr " frequenze disponibili: "
+
+#: utils/cpuidle-info.c:71
+#, c-format
+msgid "Flags/Description: %s\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:74
+#, c-format
+msgid "Latency: %lu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:76
+#, c-format
+msgid "Usage: %lu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:78
+#, c-format
+msgid "Duration: %llu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:90
+#, c-format
+msgid "Could not determine cpuidle driver\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:94
+#, fuzzy, c-format
+msgid "CPUidle driver: %s\n"
+msgstr " modulo %s\n"
+
+#: utils/cpuidle-info.c:99
+#, c-format
+msgid "Could not determine cpuidle governor\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:103
+#, c-format
+msgid "CPUidle governor: %s\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:122
+#, c-format
+msgid "CPU %u: Can't read C-state info\n"
+msgstr ""
+
+#. printf("Cstates: %d\n", cstates);
+#: utils/cpuidle-info.c:127
+#, c-format
+msgid "active state: C0\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:128
+#, c-format
+msgid "max_cstate: C%u\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:129
+#, fuzzy, c-format
+msgid "maximum allowed latency: %lu usec\n"
+msgstr " latenza massima durante la transizione: "
+
+#: utils/cpuidle-info.c:130
+#, c-format
+msgid "states:\t\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:132
+#, c-format
+msgid " C%d: type[C%d] "
+msgstr ""
+
+#: utils/cpuidle-info.c:134
+#, c-format
+msgid "promotion[--] demotion[--] "
+msgstr ""
+
+#: utils/cpuidle-info.c:135
+#, c-format
+msgid "latency[%03lu] "
+msgstr ""
+
+#: utils/cpuidle-info.c:137
+#, c-format
+msgid "usage[%08lu] "
+msgstr ""
+
+#: utils/cpuidle-info.c:139
+#, c-format
+msgid "duration[%020Lu] \n"
+msgstr ""
+
+#: utils/cpuidle-info.c:147
+#, fuzzy, c-format
+msgid "Usage: cpupower idleinfo [options]\n"
+msgstr "Uso: cpufreq-info [opzioni]\n"
+
+#: utils/cpuidle-info.c:149
+#, fuzzy, c-format
+msgid " -s, --silent Only show general C-state information\n"
+msgstr " -e, --debug Mostra informazioni di debug\n"
+
+#: utils/cpuidle-info.c:150
+#, fuzzy, c-format
+msgid ""
+" -o, --proc Prints out information like provided by the /proc/"
+"acpi/processor/*/power\n"
+" interface in older kernels\n"
+msgstr ""
+" -o, --proc Stampa le informazioni come se provenissero dalla\n"
+" interfaccia cpufreq /proc/ presente nei kernel\n"
+" 2.4 ed i primi 2.6\n"
+
+#: utils/cpuidle-info.c:209
+#, fuzzy, c-format
+msgid "You can't specify more than one output-specific argument\n"
+msgstr ""
+"Non è possibile specificare più di una volta l'opzione --cpu e/o\n"
+"specificare più di un parametro di output specifico\n"
+
+#~ msgid ""
+#~ " -c CPU, --cpu CPU CPU number which information shall be determined "
+#~ "about\n"
+#~ msgstr ""
+#~ " -c CPU, --cpu CPU Numero di CPU per la quale ottenere le "
+#~ "informazioni\n"
+
+#~ msgid ""
+#~ " -c CPU, --cpu CPU number of CPU where cpufreq settings shall be "
+#~ "modified\n"
+#~ msgstr ""
+#~ " -c CPU, --cpu CPU numero di CPU per la quale modificare le "
+#~ "impostazioni\n"
+
+#, fuzzy
+#~ msgid " CPUs which coordinate software frequency requirements: "
+#~ msgstr ""
+#~ " CPU per le quali e` necessario cambiare la frequenza "
+#~ "contemporaneamente: "
diff --git a/tools/power/cpupower/po/pt.po b/tools/power/cpupower/po/pt.po
new file mode 100644
index 000000000..990f5267f
--- /dev/null
+++ b/tools/power/cpupower/po/pt.po
@@ -0,0 +1,957 @@
+# Brazilian Portuguese translations for cpufrequtils package
+# Copyright (C) 2008 THE cpufrequtils'S COPYRIGHT HOLDER
+# This file is distributed under the same license as the cpufrequtils package.
+# Claudio Eduardo <claudioeddy@gmail.com>, 2009.
+#
+#
+msgid ""
+msgstr ""
+"Project-Id-Version: cpufrequtils 004\n"
+"Report-Msgid-Bugs-To: \n"
+"POT-Creation-Date: 2011-03-08 17:03+0100\n"
+"PO-Revision-Date: 2008-06-14 22:16-0400\n"
+"Last-Translator: Claudio Eduardo <claudioeddy@gmail.com>\n"
+"MIME-Version: 1.0\n"
+"Content-Type: text/plain; charset=UTF-8\n"
+"Content-Transfer-Encoding: 8bit\n"
+
+#: utils/idle_monitor/nhm_idle.c:36
+msgid "Processor Core C3"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:43
+msgid "Processor Core C6"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:51
+msgid "Processor Package C3"
+msgstr ""
+
+#: utils/idle_monitor/nhm_idle.c:58 utils/idle_monitor/amd_fam14h_idle.c:70
+msgid "Processor Package C6"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:33
+msgid "Processor Core C7"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:40
+msgid "Processor Package C2"
+msgstr ""
+
+#: utils/idle_monitor/snb_idle.c:47
+msgid "Processor Package C7"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:56
+msgid "Package in sleep state (PC1 or deeper)"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:63
+msgid "Processor Package C1"
+msgstr ""
+
+#: utils/idle_monitor/amd_fam14h_idle.c:77
+msgid "North Bridge P1 boolean counter (returns 0 or 1)"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:35
+msgid "Processor Core not idle"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:42
+msgid "Processor Core in an idle state"
+msgstr ""
+
+#: utils/idle_monitor/mperf_monitor.c:50
+msgid "Average Frequency (including boost) in MHz"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:66
+#, c-format
+msgid ""
+"cpupower monitor: [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i "
+"interval_sec | -c command ...]\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:69
+#, c-format
+msgid ""
+"cpupower monitor: [-v] [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i "
+"interval_sec | -c command ...]\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:71
+#, c-format
+msgid "\t -v: be more verbose\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:73
+#, c-format
+msgid "\t -h: print this help\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:74
+#, c-format
+msgid "\t -i: time intervall to measure for in seconds (default 1)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:75
+#, c-format
+msgid "\t -t: show CPU topology/hierarchy\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:76
+#, c-format
+msgid "\t -l: list available CPU sleep monitors (for use with -m)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:77
+#, c-format
+msgid "\t -m: show specific CPU sleep monitors only (in same order)\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:79
+#, c-format
+msgid ""
+"only one of: -t, -l, -m are allowed\n"
+"If none of them is passed,"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:80
+#, c-format
+msgid " all supported monitors are shown\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:197
+#, c-format
+msgid "Monitor %s, Counter %s has no count function. Implementation error\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:207
+#, c-format
+msgid " *is offline\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:236
+#, c-format
+msgid "%s: max monitor name length (%d) exceeded\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:250
+#, c-format
+msgid "No matching monitor found in %s, try -l option\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:266
+#, c-format
+msgid "Monitor \"%s\" (%d states) - Might overflow after %u s\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:319
+#, c-format
+msgid "%s took %.5f seconds and exited with status %d\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:406
+#, c-format
+msgid "Cannot read number of available processors\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:417
+#, c-format
+msgid "Available monitor %s needs root access\n"
+msgstr ""
+
+#: utils/idle_monitor/cpupower-monitor.c:428
+#, c-format
+msgid "No HW Cstate monitors found\n"
+msgstr ""
+
+#: utils/cpupower.c:78
+#, c-format
+msgid "cpupower [ -c cpulist ] subcommand [ARGS]\n"
+msgstr ""
+
+#: utils/cpupower.c:79
+#, c-format
+msgid "cpupower --version\n"
+msgstr ""
+
+#: utils/cpupower.c:80
+#, c-format
+msgid "Supported subcommands are:\n"
+msgstr ""
+
+#: utils/cpupower.c:83
+#, c-format
+msgid ""
+"\n"
+"Some subcommands can make use of the -c cpulist option.\n"
+msgstr ""
+
+#: utils/cpupower.c:84
+#, c-format
+msgid "Look at the general cpupower manpage how to use it\n"
+msgstr ""
+
+#: utils/cpupower.c:85
+#, c-format
+msgid "and read up the subcommand's manpage whether it is supported.\n"
+msgstr ""
+
+#: utils/cpupower.c:86
+#, c-format
+msgid ""
+"\n"
+"Use cpupower help subcommand for getting help for above subcommands.\n"
+msgstr ""
+
+#: utils/cpupower.c:91
+#, c-format
+msgid "Report errors and bugs to %s, please.\n"
+msgstr "Reporte erros e bugs para %s, por favor.\n"
+
+#: utils/cpupower.c:114
+#, c-format
+msgid "Error parsing cpu list\n"
+msgstr ""
+
+#: utils/cpupower.c:172
+#, c-format
+msgid "Subcommand %s needs root privileges\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:31
+#, c-format
+msgid "Couldn't count the number of CPUs (%s: %s), assuming 1\n"
+msgstr "Não foi possível contar o número de CPUs (%s: %s), assumindo 1\n"
+
+#: utils/cpufreq-info.c:63
+#, c-format
+msgid ""
+" minimum CPU frequency - maximum CPU frequency - governor\n"
+msgstr ""
+" frequência mínina do CPU - frequência máxima do CPU - "
+"regulador\n"
+
+#: utils/cpufreq-info.c:151
+#, c-format
+msgid "Error while evaluating Boost Capabilities on CPU %d -- are you root?\n"
+msgstr ""
+
+#. P state changes via MSR are identified via cpuid 80000007
+#. on Intel and AMD, but we assume boost capable machines can do that
+#. if (cpuid_eax(0x80000000) >= 0x80000007
+#. && (cpuid_edx(0x80000007) & (1 << 7)))
+#.
+#: utils/cpufreq-info.c:161
+#, c-format
+msgid " boost state support: \n"
+msgstr ""
+
+#: utils/cpufreq-info.c:163
+#, c-format
+msgid " Supported: %s\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164
+msgid "yes"
+msgstr ""
+
+#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164
+msgid "no"
+msgstr ""
+
+#: utils/cpufreq-info.c:164
+#, fuzzy, c-format
+msgid " Active: %s\n"
+msgstr " driver: %s\n"
+
+#: utils/cpufreq-info.c:177
+#, c-format
+msgid " Boost States: %d\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:178
+#, c-format
+msgid " Total States: %d\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:181
+#, c-format
+msgid " Pstate-Pb%d: %luMHz (boost state)\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:184
+#, c-format
+msgid " Pstate-P%d: %luMHz\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:211
+#, c-format
+msgid " no or unknown cpufreq driver is active on this CPU\n"
+msgstr " nenhum ou driver do cpufreq deconhecido está ativo nesse CPU\n"
+
+#: utils/cpufreq-info.c:213
+#, c-format
+msgid " driver: %s\n"
+msgstr " driver: %s\n"
+
+#: utils/cpufreq-info.c:219
+#, c-format
+msgid " CPUs which run at the same hardware frequency: "
+msgstr " CPUs que rodam na mesma frequência de hardware: "
+
+#: utils/cpufreq-info.c:230
+#, c-format
+msgid " CPUs which need to have their frequency coordinated by software: "
+msgstr " CPUs que precisam ter suas frequências coordenadas por software: "
+
+#: utils/cpufreq-info.c:241
+#, c-format
+msgid " maximum transition latency: "
+msgstr " maior latência de transição: "
+
+#: utils/cpufreq-info.c:247
+#, c-format
+msgid " hardware limits: "
+msgstr " limites do hardware: "
+
+#: utils/cpufreq-info.c:256
+#, c-format
+msgid " available frequency steps: "
+msgstr " níveis de frequência disponíveis: "
+
+#: utils/cpufreq-info.c:269
+#, c-format
+msgid " available cpufreq governors: "
+msgstr " reguladores do cpufreq disponíveis: "
+
+#: utils/cpufreq-info.c:280
+#, c-format
+msgid " current policy: frequency should be within "
+msgstr " política de frequência atual deve estar entre "
+
+#: utils/cpufreq-info.c:282
+#, c-format
+msgid " and "
+msgstr " e "
+
+#: utils/cpufreq-info.c:286
+#, c-format
+msgid ""
+"The governor \"%s\" may decide which speed to use\n"
+" within this range.\n"
+msgstr ""
+"O regulador \"%s\" deve decidir qual velocidade usar\n"
+" dentro desse limite.\n"
+
+#: utils/cpufreq-info.c:293
+#, c-format
+msgid " current CPU frequency is "
+msgstr " frequência atual do CPU é "
+
+#: utils/cpufreq-info.c:296
+#, c-format
+msgid " (asserted by call to hardware)"
+msgstr " (declarado por chamada ao hardware)"
+
+#: utils/cpufreq-info.c:304
+#, c-format
+msgid " cpufreq stats: "
+msgstr " status do cpufreq: "
+
+#: utils/cpufreq-info.c:472
+#, fuzzy, c-format
+msgid "Usage: cpupower freqinfo [options]\n"
+msgstr "Uso: cpufreq-info [opções]\n"
+
+#: utils/cpufreq-info.c:473 utils/cpufreq-set.c:26 utils/cpupower-set.c:23
+#: utils/cpupower-info.c:22 utils/cpuidle-info.c:148
+#, c-format
+msgid "Options:\n"
+msgstr "Opções:\n"
+
+#: utils/cpufreq-info.c:474
+#, fuzzy, c-format
+msgid " -e, --debug Prints out debug information [default]\n"
+msgstr " -e, --debug Mostra informação de debug\n"
+
+#: utils/cpufreq-info.c:475
+#, c-format
+msgid ""
+" -f, --freq Get frequency the CPU currently runs at, according\n"
+" to the cpufreq core *\n"
+msgstr ""
+" -f, --freq Obtem a frequência na qual o CPU roda no momento, de "
+"acordo\n"
+" com o núcleo do cpufreq *\n"
+
+#: utils/cpufreq-info.c:477
+#, c-format
+msgid ""
+" -w, --hwfreq Get frequency the CPU currently runs at, by reading\n"
+" it from hardware (only available to root) *\n"
+msgstr ""
+" -w, --hwfreq Obtem a frequência na qual o CPU está operando no "
+"momento,\n"
+" através de leitura no hardware (disponível somente "
+"para root) *\n"
+
+#: utils/cpufreq-info.c:479
+#, c-format
+msgid ""
+" -l, --hwlimits Determine the minimum and maximum CPU frequency "
+"allowed *\n"
+msgstr ""
+" -l, --hwlimits Determina a frequência mínima e máxima do CPU "
+"permitida *\n"
+
+#: utils/cpufreq-info.c:480
+#, c-format
+msgid " -d, --driver Determines the used cpufreq kernel driver *\n"
+msgstr ""
+" -d, --driver Determina o driver do kernel do cpufreq usado *\n"
+
+#: utils/cpufreq-info.c:481
+#, c-format
+msgid " -p, --policy Gets the currently used cpufreq policy *\n"
+msgstr ""
+"--p, --policy Obtem a política do cpufreq em uso no momento *\n"
+
+#: utils/cpufreq-info.c:482
+#, c-format
+msgid " -g, --governors Determines available cpufreq governors *\n"
+msgstr ""
+" -g, --governors Determina reguladores do cpufreq disponíveis *\n"
+
+#: utils/cpufreq-info.c:483
+#, c-format
+msgid ""
+" -r, --related-cpus Determines which CPUs run at the same hardware "
+"frequency *\n"
+msgstr ""
+" -r, --related-cpus Determina quais CPUs rodam na mesma frequência de "
+"hardware *\n"
+
+#: utils/cpufreq-info.c:484
+#, c-format
+msgid ""
+" -a, --affected-cpus Determines which CPUs need to have their frequency\n"
+" coordinated by software *\n"
+msgstr ""
+" -a, --affected-cpus Determina quais CPUs precisam ter suas frequências\n"
+" coordenadas por software *\n"
+
+#: utils/cpufreq-info.c:486
+#, c-format
+msgid " -s, --stats Shows cpufreq statistics if available\n"
+msgstr " -s, --stats Mostra estatísticas do cpufreq se disponíveis\n"
+
+#: utils/cpufreq-info.c:487
+#, c-format
+msgid ""
+" -y, --latency Determines the maximum latency on CPU frequency "
+"changes *\n"
+msgstr ""
+" -y, --latency Determina a latência máxima nas trocas de frequência "
+"do CPU *\n"
+
+#: utils/cpufreq-info.c:488
+#, c-format
+msgid " -b, --boost Checks for turbo or boost modes *\n"
+msgstr ""
+
+#: utils/cpufreq-info.c:489
+#, c-format
+msgid ""
+" -o, --proc Prints out information like provided by the /proc/"
+"cpufreq\n"
+" interface in 2.4. and early 2.6. kernels\n"
+msgstr ""
+" -o, --proc Mostra informação do tipo provida pela interface /"
+"proc/cpufreq\n"
+" em kernels 2.4. e mais recentes 2.6\n"
+
+#: utils/cpufreq-info.c:491
+#, c-format
+msgid ""
+" -m, --human human-readable output for the -f, -w, -s and -y "
+"parameters\n"
+msgstr ""
+" -m, --human saída legível para humanos para os parâmetros -f, -w, "
+"-s e -y\n"
+
+#: utils/cpufreq-info.c:492 utils/cpuidle-info.c:152
+#, c-format
+msgid " -h, --help Prints out this screen\n"
+msgstr " -h, --help Imprime essa tela\n"
+
+#: utils/cpufreq-info.c:495
+#, c-format
+msgid ""
+"If no argument or only the -c, --cpu parameter is given, debug output about\n"
+"cpufreq is printed which is useful e.g. for reporting bugs.\n"
+msgstr ""
+"Se nenhum argumento ou somente o parâmetro -c, --cpu é dado, informação de "
+"debug sobre\n"
+"o cpufreq é mostrada, o que é útil por exemplo para reportar bugs.\n"
+
+#: utils/cpufreq-info.c:497
+#, c-format
+msgid ""
+"For the arguments marked with *, omitting the -c or --cpu argument is\n"
+"equivalent to setting it to zero\n"
+msgstr ""
+"Para os argumentos marcados com *, omitir o argumento -c ou --cpu é\n"
+"equivalente a setá-lo como zero\n"
+
+#: utils/cpufreq-info.c:580
+#, c-format
+msgid ""
+"The argument passed to this tool can't be combined with passing a --cpu "
+"argument\n"
+msgstr ""
+"O argumento usado pra essa ferramenta não pode ser combinado com um "
+"argumento --cpu\n"
+
+#: utils/cpufreq-info.c:596
+#, c-format
+msgid ""
+"You can't specify more than one --cpu parameter and/or\n"
+"more than one output-specific argument\n"
+msgstr ""
+"Você não pode especificar mais do que um parâmetro --cpu e/ou\n"
+"mais do que um argumento de saída específico\n"
+
+#: utils/cpufreq-info.c:600 utils/cpufreq-set.c:82 utils/cpupower-set.c:42
+#: utils/cpupower-info.c:42 utils/cpuidle-info.c:213
+#, c-format
+msgid "invalid or unknown argument\n"
+msgstr "argumento inválido ou desconhecido\n"
+
+#: utils/cpufreq-info.c:617
+#, c-format
+msgid "couldn't analyze CPU %d as it doesn't seem to be present\n"
+msgstr ""
+"não foi possível analisar o CPU % já que o mesmo parece não estar presente\n"
+
+#: utils/cpufreq-info.c:620 utils/cpupower-info.c:142
+#, c-format
+msgid "analyzing CPU %d:\n"
+msgstr "analisando o CPU %d:\n"
+
+#: utils/cpufreq-set.c:25
+#, fuzzy, c-format
+msgid "Usage: cpupower frequency-set [options]\n"
+msgstr "Uso: cpufreq-set [opções]\n"
+
+#: utils/cpufreq-set.c:27
+#, c-format
+msgid ""
+" -d FREQ, --min FREQ new minimum CPU frequency the governor may "
+"select\n"
+msgstr ""
+" -d FREQ, --min FREQ nova frequência mínima do CPU que o regulador "
+"deve selecionar\n"
+
+#: utils/cpufreq-set.c:28
+#, c-format
+msgid ""
+" -u FREQ, --max FREQ new maximum CPU frequency the governor may "
+"select\n"
+msgstr ""
+" -u FREQ, --max FREQ nova frequência máxima do CPU que o regulador "
+"deve escolher\n"
+
+#: utils/cpufreq-set.c:29
+#, c-format
+msgid " -g GOV, --governor GOV new cpufreq governor\n"
+msgstr " -g GOV, --governor GOV novo regulador do cpufreq\n"
+
+#: utils/cpufreq-set.c:30
+#, c-format
+msgid ""
+" -f FREQ, --freq FREQ specific frequency to be set. Requires userspace\n"
+" governor to be available and loaded\n"
+msgstr ""
+" -f FREQ, --freq FREQ frequência específica para ser setada. Necessita "
+"que o regulador em\n"
+" nível de usuário esteja disponível e carregado\n"
+
+#: utils/cpufreq-set.c:32
+#, c-format
+msgid " -r, --related Switches all hardware-related CPUs\n"
+msgstr ""
+" -r, --related Modifica todos os CPUs relacionados ao hardware\n"
+
+#: utils/cpufreq-set.c:33 utils/cpupower-set.c:28 utils/cpupower-info.c:27
+#, c-format
+msgid " -h, --help Prints out this screen\n"
+msgstr " -h, --help Mostra essa tela\n"
+
+#: utils/cpufreq-set.c:35
+#, fuzzy, c-format
+msgid ""
+"Notes:\n"
+"1. Omitting the -c or --cpu argument is equivalent to setting it to \"all\"\n"
+msgstr ""
+"Para os argumentos marcados com *, omitir o argumento -c ou --cpu é\n"
+"equivalente a setá-lo como zero\n"
+
+#: utils/cpufreq-set.c:37
+#, fuzzy, c-format
+msgid ""
+"2. The -f FREQ, --freq FREQ parameter cannot be combined with any other "
+"parameter\n"
+" except the -c CPU, --cpu CPU parameter\n"
+"3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n"
+" by postfixing the value with the wanted unit name, without any space\n"
+" (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"
+msgstr ""
+"Notas:\n"
+"1. Omitir o argumento -c or --cpu é equivalente a setá-lo como zero\n"
+"2. O parâmetro -f FREQ, --freq FREQ não pode ser combinado com qualquer "
+"outro parâmetro\n"
+" exceto com o parâmetro -c CPU, --cpu CPU\n"
+"3. FREQuências podem ser usadas em Hz, kHz (padrão), MHz, GHz, o THz\n"
+" colocando o nome desejado da unidade após o valor, sem qualquer espaço\n"
+" (FREQuência em kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n"
+
+#: utils/cpufreq-set.c:57
+#, c-format
+msgid ""
+"Error setting new values. Common errors:\n"
+"- Do you have proper administration rights? (super-user?)\n"
+"- Is the governor you requested available and modprobed?\n"
+"- Trying to set an invalid policy?\n"
+"- Trying to set a specific frequency, but userspace governor is not "
+"available,\n"
+" for example because of hardware which cannot be set to a specific "
+"frequency\n"
+" or because the userspace governor isn't loaded?\n"
+msgstr ""
+"Erro ao setar novos valores. Erros comuns:\n"
+"- Você tem direitos administrativos necessários? (super-usuário?)\n"
+"- O regulador que você requesitou está disponível e foi \"modprobed\"?\n"
+"- Tentando setar uma política inválida?\n"
+"- Tentando setar uma frequência específica, mas o regulador em nível de "
+"usuário não está disponível,\n"
+" por exemplo devido ao hardware que não pode ser setado pra uma frequência "
+"específica\n"
+" ou porque o regulador em nível de usuário não foi carregado?\n"
+
+#: utils/cpufreq-set.c:170
+#, c-format
+msgid "wrong, unknown or unhandled CPU?\n"
+msgstr "CPU errado, desconhecido ou inesperado?\n"
+
+#: utils/cpufreq-set.c:302
+#, c-format
+msgid ""
+"the -f/--freq parameter cannot be combined with -d/--min, -u/--max or\n"
+"-g/--governor parameters\n"
+msgstr ""
+"o parâmetro -f/--freq não pode ser combinado com os parâmetros -d/--min, -"
+"u/--max ou\n"
+"-g/--governor\n"
+
+#: utils/cpufreq-set.c:308
+#, c-format
+msgid ""
+"At least one parameter out of -f/--freq, -d/--min, -u/--max, and\n"
+"-g/--governor must be passed\n"
+msgstr ""
+"Pelo menos um parâmetro entre -f/--freq, -d/--min, -u/--max, e\n"
+"-g/--governor deve ser usado\n"
+
+#: utils/cpufreq-set.c:347
+#, c-format
+msgid "Setting cpu: %d\n"
+msgstr ""
+
+#: utils/cpupower-set.c:22
+#, c-format
+msgid "Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:24
+#, c-format
+msgid ""
+" -b, --perf-bias [VAL] Sets CPU's power vs performance policy on some\n"
+" Intel models [0-15], see manpage for details\n"
+msgstr ""
+
+#: utils/cpupower-set.c:26
+#, c-format
+msgid ""
+" -m, --sched-mc [VAL] Sets the kernel's multi core scheduler policy.\n"
+msgstr ""
+
+#: utils/cpupower-set.c:27
+#, c-format
+msgid ""
+" -s, --sched-smt [VAL] Sets the kernel's thread sibling scheduler "
+"policy.\n"
+msgstr ""
+
+#: utils/cpupower-set.c:80
+#, c-format
+msgid "--perf-bias param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:91
+#, c-format
+msgid "--sched-mc param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:102
+#, c-format
+msgid "--sched-smt param out of range [0-%d]\n"
+msgstr ""
+
+#: utils/cpupower-set.c:121
+#, c-format
+msgid "Error setting sched-mc %s\n"
+msgstr ""
+
+#: utils/cpupower-set.c:127
+#, c-format
+msgid "Error setting sched-smt %s\n"
+msgstr ""
+
+#: utils/cpupower-set.c:146
+#, c-format
+msgid "Error setting perf-bias value on CPU %d\n"
+msgstr ""
+
+#: utils/cpupower-info.c:21
+#, c-format
+msgid "Usage: cpupower info [ -b ] [ -m ] [ -s ]\n"
+msgstr ""
+
+#: utils/cpupower-info.c:23
+#, c-format
+msgid ""
+" -b, --perf-bias Gets CPU's power vs performance policy on some\n"
+" Intel models [0-15], see manpage for details\n"
+msgstr ""
+
+#: utils/cpupower-info.c:25
+#, fuzzy, c-format
+msgid " -m, --sched-mc Gets the kernel's multi core scheduler policy.\n"
+msgstr ""
+"--p, --policy Obtem a política do cpufreq em uso no momento *\n"
+
+#: utils/cpupower-info.c:26
+#, c-format
+msgid ""
+" -s, --sched-smt Gets the kernel's thread sibling scheduler policy.\n"
+msgstr ""
+
+#: utils/cpupower-info.c:28
+#, c-format
+msgid ""
+"\n"
+"Passing no option will show all info, by default only on core 0\n"
+msgstr ""
+
+#: utils/cpupower-info.c:102
+#, c-format
+msgid "System's multi core scheduler setting: "
+msgstr ""
+
+#. if sysfs file is missing it's: errno == ENOENT
+#: utils/cpupower-info.c:105 utils/cpupower-info.c:114
+#, c-format
+msgid "not supported\n"
+msgstr ""
+
+#: utils/cpupower-info.c:111
+#, c-format
+msgid "System's thread sibling scheduler setting: "
+msgstr ""
+
+#: utils/cpupower-info.c:126
+#, c-format
+msgid "Intel's performance bias setting needs root privileges\n"
+msgstr ""
+
+#: utils/cpupower-info.c:128
+#, c-format
+msgid "System does not support Intel's performance bias setting\n"
+msgstr ""
+
+#: utils/cpupower-info.c:147
+#, c-format
+msgid "Could not read perf-bias value\n"
+msgstr ""
+
+#: utils/cpupower-info.c:150
+#, c-format
+msgid "perf-bias: %d\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:28
+#, fuzzy, c-format
+msgid "Analyzing CPU %d:\n"
+msgstr "analisando o CPU %d:\n"
+
+#: utils/cpuidle-info.c:32
+#, c-format
+msgid "CPU %u: No idle states\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:36
+#, c-format
+msgid "CPU %u: Can't read idle state info\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:41
+#, c-format
+msgid "Could not determine max idle state %u\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:46
+#, c-format
+msgid "Number of idle states: %d\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:48
+#, fuzzy, c-format
+msgid "Available idle states:"
+msgstr " níveis de frequência disponíveis: "
+
+#: utils/cpuidle-info.c:71
+#, c-format
+msgid "Flags/Description: %s\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:74
+#, c-format
+msgid "Latency: %lu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:76
+#, c-format
+msgid "Usage: %lu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:78
+#, c-format
+msgid "Duration: %llu\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:90
+#, c-format
+msgid "Could not determine cpuidle driver\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:94
+#, fuzzy, c-format
+msgid "CPUidle driver: %s\n"
+msgstr " driver: %s\n"
+
+#: utils/cpuidle-info.c:99
+#, c-format
+msgid "Could not determine cpuidle governor\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:103
+#, c-format
+msgid "CPUidle governor: %s\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:122
+#, c-format
+msgid "CPU %u: Can't read C-state info\n"
+msgstr ""
+
+#. printf("Cstates: %d\n", cstates);
+#: utils/cpuidle-info.c:127
+#, c-format
+msgid "active state: C0\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:128
+#, c-format
+msgid "max_cstate: C%u\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:129
+#, fuzzy, c-format
+msgid "maximum allowed latency: %lu usec\n"
+msgstr " maior latência de transição: "
+
+#: utils/cpuidle-info.c:130
+#, c-format
+msgid "states:\t\n"
+msgstr ""
+
+#: utils/cpuidle-info.c:132
+#, c-format
+msgid " C%d: type[C%d] "
+msgstr ""
+
+#: utils/cpuidle-info.c:134
+#, c-format
+msgid "promotion[--] demotion[--] "
+msgstr ""
+
+#: utils/cpuidle-info.c:135
+#, c-format
+msgid "latency[%03lu] "
+msgstr ""
+
+#: utils/cpuidle-info.c:137
+#, c-format
+msgid "usage[%08lu] "
+msgstr ""
+
+#: utils/cpuidle-info.c:139
+#, c-format
+msgid "duration[%020Lu] \n"
+msgstr ""
+
+#: utils/cpuidle-info.c:147
+#, fuzzy, c-format
+msgid "Usage: cpupower idleinfo [options]\n"
+msgstr "Uso: cpufreq-info [opções]\n"
+
+#: utils/cpuidle-info.c:149
+#, fuzzy, c-format
+msgid " -s, --silent Only show general C-state information\n"
+msgstr " -e, --debug Mostra informação de debug\n"
+
+#: utils/cpuidle-info.c:150
+#, fuzzy, c-format
+msgid ""
+" -o, --proc Prints out information like provided by the /proc/"
+"acpi/processor/*/power\n"
+" interface in older kernels\n"
+msgstr ""
+" -o, --proc Mostra informação do tipo provida pela interface /"
+"proc/cpufreq\n"
+" em kernels 2.4. e mais recentes 2.6\n"
+
+#: utils/cpuidle-info.c:209
+#, fuzzy, c-format
+msgid "You can't specify more than one output-specific argument\n"
+msgstr ""
+"Você não pode especificar mais do que um parâmetro --cpu e/ou\n"
+"mais do que um argumento de saída específico\n"
+
+#~ msgid ""
+#~ " -c CPU, --cpu CPU CPU number which information shall be determined "
+#~ "about\n"
+#~ msgstr ""
+#~ " -c CPU, --cpu CPU número do CPU sobre o qual as inforções devem ser "
+#~ "determinadas\n"
+
+#~ msgid ""
+#~ " -c CPU, --cpu CPU number of CPU where cpufreq settings shall be "
+#~ "modified\n"
+#~ msgstr ""
+#~ " -c CPU, --cpu CPU número do CPU onde as configurações do cpufreq "
+#~ "vão ser modificadas\n"
diff --git a/tools/power/cpupower/utils/builtin.h b/tools/power/cpupower/utils/builtin.h
new file mode 100644
index 000000000..f7065ae60
--- /dev/null
+++ b/tools/power/cpupower/utils/builtin.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BUILTIN_H
+#define BUILTIN_H
+
+extern int cmd_set(int argc, const char **argv);
+extern int cmd_info(int argc, const char **argv);
+extern int cmd_freq_set(int argc, const char **argv);
+extern int cmd_freq_info(int argc, const char **argv);
+extern int cmd_idle_set(int argc, const char **argv);
+extern int cmd_idle_info(int argc, const char **argv);
+extern int cmd_monitor(int argc, const char **argv);
+
+#endif
diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
new file mode 100644
index 000000000..ccd08dd00
--- /dev/null
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -0,0 +1,648 @@
+/*
+ * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#include <getopt.h>
+
+#include "cpufreq.h"
+#include "helpers/sysfs.h"
+#include "helpers/helpers.h"
+#include "helpers/bitmask.h"
+
+#define LINE_LEN 10
+
+static unsigned int count_cpus(void)
+{
+ FILE *fp;
+ char value[LINE_LEN];
+ unsigned int ret = 0;
+ unsigned int cpunr = 0;
+
+ fp = fopen("/proc/stat", "r");
+ if (!fp) {
+ printf(_("Couldn't count the number of CPUs (%s: %s), assuming 1\n"), "/proc/stat", strerror(errno));
+ return 1;
+ }
+
+ while (!feof(fp)) {
+ if (!fgets(value, LINE_LEN, fp))
+ continue;
+ value[LINE_LEN - 1] = '\0';
+ if (strlen(value) < (LINE_LEN - 2))
+ continue;
+ if (strstr(value, "cpu "))
+ continue;
+ if (sscanf(value, "cpu%d ", &cpunr) != 1)
+ continue;
+ if (cpunr > ret)
+ ret = cpunr;
+ }
+ fclose(fp);
+
+ /* cpu count starts from 0, on error return 1 (UP) */
+ return ret + 1;
+}
+
+
+static void proc_cpufreq_output(void)
+{
+ unsigned int cpu, nr_cpus;
+ struct cpufreq_policy *policy;
+ unsigned int min_pctg = 0;
+ unsigned int max_pctg = 0;
+ unsigned long min, max;
+
+ printf(_(" minimum CPU frequency - maximum CPU frequency - governor\n"));
+
+ nr_cpus = count_cpus();
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ policy = cpufreq_get_policy(cpu);
+ if (!policy)
+ continue;
+
+ if (cpufreq_get_hardware_limits(cpu, &min, &max)) {
+ max = 0;
+ } else {
+ min_pctg = (policy->min * 100) / max;
+ max_pctg = (policy->max * 100) / max;
+ }
+ printf("CPU%3d %9lu kHz (%3d %%) - %9lu kHz (%3d %%) - %s\n",
+ cpu , policy->min, max ? min_pctg : 0, policy->max,
+ max ? max_pctg : 0, policy->governor);
+
+ cpufreq_put_policy(policy);
+ }
+}
+
+static int no_rounding;
+static void print_speed(unsigned long speed)
+{
+ unsigned long tmp;
+
+ if (no_rounding) {
+ if (speed > 1000000)
+ printf("%u.%06u GHz", ((unsigned int) speed/1000000),
+ ((unsigned int) speed%1000000));
+ else if (speed > 1000)
+ printf("%u.%03u MHz", ((unsigned int) speed/1000),
+ (unsigned int) (speed%1000));
+ else
+ printf("%lu kHz", speed);
+ } else {
+ if (speed > 1000000) {
+ tmp = speed%10000;
+ if (tmp >= 5000)
+ speed += 10000;
+ printf("%u.%02u GHz", ((unsigned int) speed/1000000),
+ ((unsigned int) (speed%1000000)/10000));
+ } else if (speed > 100000) {
+ tmp = speed%1000;
+ if (tmp >= 500)
+ speed += 1000;
+ printf("%u MHz", ((unsigned int) speed/1000));
+ } else if (speed > 1000) {
+ tmp = speed%100;
+ if (tmp >= 50)
+ speed += 100;
+ printf("%u.%01u MHz", ((unsigned int) speed/1000),
+ ((unsigned int) (speed%1000)/100));
+ }
+ }
+
+ return;
+}
+
+static void print_duration(unsigned long duration)
+{
+ unsigned long tmp;
+
+ if (no_rounding) {
+ if (duration > 1000000)
+ printf("%u.%06u ms", ((unsigned int) duration/1000000),
+ ((unsigned int) duration%1000000));
+ else if (duration > 100000)
+ printf("%u us", ((unsigned int) duration/1000));
+ else if (duration > 1000)
+ printf("%u.%03u us", ((unsigned int) duration/1000),
+ ((unsigned int) duration%1000));
+ else
+ printf("%lu ns", duration);
+ } else {
+ if (duration > 1000000) {
+ tmp = duration%10000;
+ if (tmp >= 5000)
+ duration += 10000;
+ printf("%u.%02u ms", ((unsigned int) duration/1000000),
+ ((unsigned int) (duration%1000000)/10000));
+ } else if (duration > 100000) {
+ tmp = duration%1000;
+ if (tmp >= 500)
+ duration += 1000;
+ printf("%u us", ((unsigned int) duration / 1000));
+ } else if (duration > 1000) {
+ tmp = duration%100;
+ if (tmp >= 50)
+ duration += 100;
+ printf("%u.%01u us", ((unsigned int) duration/1000),
+ ((unsigned int) (duration%1000)/100));
+ } else
+ printf("%lu ns", duration);
+ }
+ return;
+}
+
+/* --boost / -b */
+
+static int get_boost_mode(unsigned int cpu)
+{
+ int support, active, b_states = 0, ret, pstate_no, i;
+ /* ToDo: Make this more global */
+ unsigned long pstates[MAX_HW_PSTATES] = {0,};
+
+ if (cpupower_cpu_info.vendor != X86_VENDOR_AMD &&
+ cpupower_cpu_info.vendor != X86_VENDOR_INTEL)
+ return 0;
+
+ ret = cpufreq_has_boost_support(cpu, &support, &active, &b_states);
+ if (ret) {
+ printf(_("Error while evaluating Boost Capabilities"
+ " on CPU %d -- are you root?\n"), cpu);
+ return ret;
+ }
+ /* P state changes via MSR are identified via cpuid 80000007
+ on Intel and AMD, but we assume boost capable machines can do that
+ if (cpuid_eax(0x80000000) >= 0x80000007
+ && (cpuid_edx(0x80000007) & (1 << 7)))
+ */
+
+ printf(_(" boost state support:\n"));
+
+ printf(_(" Supported: %s\n"), support ? _("yes") : _("no"));
+ printf(_(" Active: %s\n"), active ? _("yes") : _("no"));
+
+ if (cpupower_cpu_info.vendor == X86_VENDOR_AMD &&
+ cpupower_cpu_info.family >= 0x10) {
+ ret = decode_pstates(cpu, cpupower_cpu_info.family, b_states,
+ pstates, &pstate_no);
+ if (ret)
+ return ret;
+
+ printf(_(" Boost States: %d\n"), b_states);
+ printf(_(" Total States: %d\n"), pstate_no);
+ for (i = 0; i < pstate_no; i++) {
+ if (!pstates[i])
+ continue;
+ if (i < b_states)
+ printf(_(" Pstate-Pb%d: %luMHz (boost state)"
+ "\n"), i, pstates[i]);
+ else
+ printf(_(" Pstate-P%d: %luMHz\n"),
+ i - b_states, pstates[i]);
+ }
+ } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_HAS_TURBO_RATIO) {
+ double bclk;
+ unsigned long long intel_turbo_ratio = 0;
+ unsigned int ratio;
+
+ /* Any way to autodetect this ? */
+ if (cpupower_cpu_info.caps & CPUPOWER_CAP_IS_SNB)
+ bclk = 100.00;
+ else
+ bclk = 133.33;
+ intel_turbo_ratio = msr_intel_get_turbo_ratio(cpu);
+ dprint (" Ratio: 0x%llx - bclk: %f\n",
+ intel_turbo_ratio, bclk);
+
+ ratio = (intel_turbo_ratio >> 24) & 0xFF;
+ if (ratio)
+ printf(_(" %.0f MHz max turbo 4 active cores\n"),
+ ratio * bclk);
+
+ ratio = (intel_turbo_ratio >> 16) & 0xFF;
+ if (ratio)
+ printf(_(" %.0f MHz max turbo 3 active cores\n"),
+ ratio * bclk);
+
+ ratio = (intel_turbo_ratio >> 8) & 0xFF;
+ if (ratio)
+ printf(_(" %.0f MHz max turbo 2 active cores\n"),
+ ratio * bclk);
+
+ ratio = (intel_turbo_ratio >> 0) & 0xFF;
+ if (ratio)
+ printf(_(" %.0f MHz max turbo 1 active cores\n"),
+ ratio * bclk);
+ }
+ return 0;
+}
+
+/* --freq / -f */
+
+static int get_freq_kernel(unsigned int cpu, unsigned int human)
+{
+ unsigned long freq = cpufreq_get_freq_kernel(cpu);
+ printf(_(" current CPU frequency: "));
+ if (!freq) {
+ printf(_(" Unable to call to kernel\n"));
+ return -EINVAL;
+ }
+ if (human) {
+ print_speed(freq);
+ } else
+ printf("%lu", freq);
+ printf(_(" (asserted by call to kernel)\n"));
+ return 0;
+}
+
+
+/* --hwfreq / -w */
+
+static int get_freq_hardware(unsigned int cpu, unsigned int human)
+{
+ unsigned long freq = cpufreq_get_freq_hardware(cpu);
+ printf(_(" current CPU frequency: "));
+ if (!freq) {
+ printf("Unable to call hardware\n");
+ return -EINVAL;
+ }
+ if (human) {
+ print_speed(freq);
+ } else
+ printf("%lu", freq);
+ printf(_(" (asserted by call to hardware)\n"));
+ return 0;
+}
+
+/* --hwlimits / -l */
+
+static int get_hardware_limits(unsigned int cpu, unsigned int human)
+{
+ unsigned long min, max;
+
+ if (cpufreq_get_hardware_limits(cpu, &min, &max)) {
+ printf(_("Not Available\n"));
+ return -EINVAL;
+ }
+
+ if (human) {
+ printf(_(" hardware limits: "));
+ print_speed(min);
+ printf(" - ");
+ print_speed(max);
+ printf("\n");
+ } else {
+ printf("%lu %lu\n", min, max);
+ }
+ return 0;
+}
+
+/* --driver / -d */
+
+static int get_driver(unsigned int cpu)
+{
+ char *driver = cpufreq_get_driver(cpu);
+ if (!driver) {
+ printf(_(" no or unknown cpufreq driver is active on this CPU\n"));
+ return -EINVAL;
+ }
+ printf(" driver: %s\n", driver);
+ cpufreq_put_driver(driver);
+ return 0;
+}
+
+/* --policy / -p */
+
+static int get_policy(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_get_policy(cpu);
+ if (!policy) {
+ printf(_(" Unable to determine current policy\n"));
+ return -EINVAL;
+ }
+ printf(_(" current policy: frequency should be within "));
+ print_speed(policy->min);
+ printf(_(" and "));
+ print_speed(policy->max);
+
+ printf(".\n ");
+ printf(_("The governor \"%s\" may decide which speed to use\n"
+ " within this range.\n"),
+ policy->governor);
+ cpufreq_put_policy(policy);
+ return 0;
+}
+
+/* --governors / -g */
+
+static int get_available_governors(unsigned int cpu)
+{
+ struct cpufreq_available_governors *governors =
+ cpufreq_get_available_governors(cpu);
+
+ printf(_(" available cpufreq governors: "));
+ if (!governors) {
+ printf(_("Not Available\n"));
+ return -EINVAL;
+ }
+
+ while (governors->next) {
+ printf("%s ", governors->governor);
+ governors = governors->next;
+ }
+ printf("%s\n", governors->governor);
+ cpufreq_put_available_governors(governors);
+ return 0;
+}
+
+
+/* --affected-cpus / -a */
+
+static int get_affected_cpus(unsigned int cpu)
+{
+ struct cpufreq_affected_cpus *cpus = cpufreq_get_affected_cpus(cpu);
+
+ printf(_(" CPUs which need to have their frequency coordinated by software: "));
+ if (!cpus) {
+ printf(_("Not Available\n"));
+ return -EINVAL;
+ }
+
+ while (cpus->next) {
+ printf("%d ", cpus->cpu);
+ cpus = cpus->next;
+ }
+ printf("%d\n", cpus->cpu);
+ cpufreq_put_affected_cpus(cpus);
+ return 0;
+}
+
+/* --related-cpus / -r */
+
+static int get_related_cpus(unsigned int cpu)
+{
+ struct cpufreq_affected_cpus *cpus = cpufreq_get_related_cpus(cpu);
+
+ printf(_(" CPUs which run at the same hardware frequency: "));
+ if (!cpus) {
+ printf(_("Not Available\n"));
+ return -EINVAL;
+ }
+
+ while (cpus->next) {
+ printf("%d ", cpus->cpu);
+ cpus = cpus->next;
+ }
+ printf("%d\n", cpus->cpu);
+ cpufreq_put_related_cpus(cpus);
+ return 0;
+}
+
+/* --stats / -s */
+
+static int get_freq_stats(unsigned int cpu, unsigned int human)
+{
+ unsigned long total_trans = cpufreq_get_transitions(cpu);
+ unsigned long long total_time;
+ struct cpufreq_stats *stats = cpufreq_get_stats(cpu, &total_time);
+ while (stats) {
+ if (human) {
+ print_speed(stats->frequency);
+ printf(":%.2f%%",
+ (100.0 * stats->time_in_state) / total_time);
+ } else
+ printf("%lu:%llu",
+ stats->frequency, stats->time_in_state);
+ stats = stats->next;
+ if (stats)
+ printf(", ");
+ }
+ cpufreq_put_stats(stats);
+ if (total_trans)
+ printf(" (%lu)\n", total_trans);
+ return 0;
+}
+
+/* --latency / -y */
+
+static int get_latency(unsigned int cpu, unsigned int human)
+{
+ unsigned long latency = cpufreq_get_transition_latency(cpu);
+
+ printf(_(" maximum transition latency: "));
+ if (!latency || latency == UINT_MAX) {
+ printf(_(" Cannot determine or is not supported.\n"));
+ return -EINVAL;
+ }
+
+ if (human) {
+ print_duration(latency);
+ printf("\n");
+ } else
+ printf("%lu\n", latency);
+ return 0;
+}
+
+static void debug_output_one(unsigned int cpu)
+{
+ struct cpufreq_available_frequencies *freqs;
+
+ get_driver(cpu);
+ get_related_cpus(cpu);
+ get_affected_cpus(cpu);
+ get_latency(cpu, 1);
+ get_hardware_limits(cpu, 1);
+
+ freqs = cpufreq_get_available_frequencies(cpu);
+ if (freqs) {
+ printf(_(" available frequency steps: "));
+ while (freqs->next) {
+ print_speed(freqs->frequency);
+ printf(", ");
+ freqs = freqs->next;
+ }
+ print_speed(freqs->frequency);
+ printf("\n");
+ cpufreq_put_available_frequencies(freqs);
+ }
+
+ get_available_governors(cpu);
+ get_policy(cpu);
+ if (get_freq_hardware(cpu, 1) < 0)
+ get_freq_kernel(cpu, 1);
+ get_boost_mode(cpu);
+}
+
+static struct option info_opts[] = {
+ {"debug", no_argument, NULL, 'e'},
+ {"boost", no_argument, NULL, 'b'},
+ {"freq", no_argument, NULL, 'f'},
+ {"hwfreq", no_argument, NULL, 'w'},
+ {"hwlimits", no_argument, NULL, 'l'},
+ {"driver", no_argument, NULL, 'd'},
+ {"policy", no_argument, NULL, 'p'},
+ {"governors", no_argument, NULL, 'g'},
+ {"related-cpus", no_argument, NULL, 'r'},
+ {"affected-cpus", no_argument, NULL, 'a'},
+ {"stats", no_argument, NULL, 's'},
+ {"latency", no_argument, NULL, 'y'},
+ {"proc", no_argument, NULL, 'o'},
+ {"human", no_argument, NULL, 'm'},
+ {"no-rounding", no_argument, NULL, 'n'},
+ { },
+};
+
+int cmd_freq_info(int argc, char **argv)
+{
+ extern char *optarg;
+ extern int optind, opterr, optopt;
+ int ret = 0, cont = 1;
+ unsigned int cpu = 0;
+ unsigned int human = 0;
+ int output_param = 0;
+
+ do {
+ ret = getopt_long(argc, argv, "oefwldpgrasmybn", info_opts,
+ NULL);
+ switch (ret) {
+ case '?':
+ output_param = '?';
+ cont = 0;
+ break;
+ case -1:
+ cont = 0;
+ break;
+ case 'b':
+ case 'o':
+ case 'a':
+ case 'r':
+ case 'g':
+ case 'p':
+ case 'd':
+ case 'l':
+ case 'w':
+ case 'f':
+ case 'e':
+ case 's':
+ case 'y':
+ if (output_param) {
+ output_param = -1;
+ cont = 0;
+ break;
+ }
+ output_param = ret;
+ break;
+ case 'm':
+ if (human) {
+ output_param = -1;
+ cont = 0;
+ break;
+ }
+ human = 1;
+ break;
+ case 'n':
+ no_rounding = 1;
+ break;
+ default:
+ fprintf(stderr, "invalid or unknown argument\n");
+ return EXIT_FAILURE;
+ }
+ } while (cont);
+
+ switch (output_param) {
+ case 'o':
+ if (!bitmask_isallclear(cpus_chosen)) {
+ printf(_("The argument passed to this tool can't be "
+ "combined with passing a --cpu argument\n"));
+ return -EINVAL;
+ }
+ break;
+ case 0:
+ output_param = 'e';
+ }
+
+ ret = 0;
+
+ /* Default is: show output of CPU 0 only */
+ if (bitmask_isallclear(cpus_chosen))
+ bitmask_setbit(cpus_chosen, 0);
+
+ switch (output_param) {
+ case -1:
+ printf(_("You can't specify more than one --cpu parameter and/or\n"
+ "more than one output-specific argument\n"));
+ return -EINVAL;
+ case '?':
+ printf(_("invalid or unknown argument\n"));
+ return -EINVAL;
+ case 'o':
+ proc_cpufreq_output();
+ return EXIT_SUCCESS;
+ }
+
+ for (cpu = bitmask_first(cpus_chosen);
+ cpu <= bitmask_last(cpus_chosen); cpu++) {
+
+ if (!bitmask_isbitset(cpus_chosen, cpu))
+ continue;
+
+ printf(_("analyzing CPU %d:\n"), cpu);
+
+ if (sysfs_is_cpu_online(cpu) != 1) {
+ printf(_(" *is offline\n"));
+ printf("\n");
+ continue;
+ }
+
+ switch (output_param) {
+ case 'b':
+ get_boost_mode(cpu);
+ break;
+ case 'e':
+ debug_output_one(cpu);
+ break;
+ case 'a':
+ ret = get_affected_cpus(cpu);
+ break;
+ case 'r':
+ ret = get_related_cpus(cpu);
+ break;
+ case 'g':
+ ret = get_available_governors(cpu);
+ break;
+ case 'p':
+ ret = get_policy(cpu);
+ break;
+ case 'd':
+ ret = get_driver(cpu);
+ break;
+ case 'l':
+ ret = get_hardware_limits(cpu, human);
+ break;
+ case 'w':
+ ret = get_freq_hardware(cpu, human);
+ break;
+ case 'f':
+ ret = get_freq_kernel(cpu, human);
+ break;
+ case 's':
+ ret = get_freq_stats(cpu, human);
+ break;
+ case 'y':
+ ret = get_latency(cpu, human);
+ break;
+ }
+ if (ret)
+ return ret;
+ }
+ return ret;
+}
diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
new file mode 100644
index 000000000..08a405593
--- /dev/null
+++ b/tools/power/cpupower/utils/cpufreq-set.c
@@ -0,0 +1,333 @@
+/*
+ * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <ctype.h>
+
+#include <getopt.h>
+
+#include "cpufreq.h"
+#include "cpuidle.h"
+#include "helpers/helpers.h"
+
+#define NORM_FREQ_LEN 32
+
+static struct option set_opts[] = {
+ {"min", required_argument, NULL, 'd'},
+ {"max", required_argument, NULL, 'u'},
+ {"governor", required_argument, NULL, 'g'},
+ {"freq", required_argument, NULL, 'f'},
+ {"related", no_argument, NULL, 'r'},
+ { },
+};
+
+static void print_error(void)
+{
+ printf(_("Error setting new values. Common errors:\n"
+ "- Do you have proper administration rights? (super-user?)\n"
+ "- Is the governor you requested available and modprobed?\n"
+ "- Trying to set an invalid policy?\n"
+ "- Trying to set a specific frequency, but userspace governor is not available,\n"
+ " for example because of hardware which cannot be set to a specific frequency\n"
+ " or because the userspace governor isn't loaded?\n"));
+};
+
+struct freq_units {
+ char *str_unit;
+ int power_of_ten;
+};
+
+const struct freq_units def_units[] = {
+ {"hz", -3},
+ {"khz", 0}, /* default */
+ {"mhz", 3},
+ {"ghz", 6},
+ {"thz", 9},
+ {NULL, 0}
+};
+
+static void print_unknown_arg(void)
+{
+ printf(_("invalid or unknown argument\n"));
+}
+
+static unsigned long string_to_frequency(const char *str)
+{
+ char normalized[NORM_FREQ_LEN];
+ const struct freq_units *unit;
+ const char *scan;
+ char *end;
+ unsigned long freq;
+ int power = 0, match_count = 0, i, cp, pad;
+
+ while (*str == '0')
+ str++;
+
+ for (scan = str; isdigit(*scan) || *scan == '.'; scan++) {
+ if (*scan == '.' && match_count == 0)
+ match_count = 1;
+ else if (*scan == '.' && match_count == 1)
+ return 0;
+ }
+
+ if (*scan) {
+ match_count = 0;
+ for (unit = def_units; unit->str_unit; unit++) {
+ for (i = 0;
+ scan[i] && tolower(scan[i]) == unit->str_unit[i];
+ ++i)
+ continue;
+ if (scan[i])
+ continue;
+ match_count++;
+ power = unit->power_of_ten;
+ }
+ if (match_count != 1)
+ return 0;
+ }
+
+ /* count the number of digits to be copied */
+ for (cp = 0; isdigit(str[cp]); cp++)
+ continue;
+
+ if (str[cp] == '.') {
+ while (power > -1 && isdigit(str[cp+1]))
+ cp++, power--;
+ }
+ if (power >= -1) /* not enough => pad */
+ pad = power + 1;
+ else /* to much => strip */
+ pad = 0, cp += power + 1;
+ /* check bounds */
+ if (cp <= 0 || cp + pad > NORM_FREQ_LEN - 1)
+ return 0;
+
+ /* copy digits */
+ for (i = 0; i < cp; i++, str++) {
+ if (*str == '.')
+ str++;
+ normalized[i] = *str;
+ }
+ /* and pad */
+ for (; i < cp + pad; i++)
+ normalized[i] = '0';
+
+ /* round up, down ? */
+ match_count = (normalized[i-1] >= '5');
+ /* and drop the decimal part */
+ normalized[i-1] = 0; /* cp > 0 && pad >= 0 ==> i > 0 */
+
+ /* final conversion (and applying rounding) */
+ errno = 0;
+ freq = strtoul(normalized, &end, 10);
+ if (errno)
+ return 0;
+ else {
+ if (match_count && freq != ULONG_MAX)
+ freq++;
+ return freq;
+ }
+}
+
+static int do_new_policy(unsigned int cpu, struct cpufreq_policy *new_pol)
+{
+ struct cpufreq_policy *cur_pol = cpufreq_get_policy(cpu);
+ int ret;
+
+ if (!cur_pol) {
+ printf(_("wrong, unknown or unhandled CPU?\n"));
+ return -EINVAL;
+ }
+
+ if (!new_pol->min)
+ new_pol->min = cur_pol->min;
+
+ if (!new_pol->max)
+ new_pol->max = cur_pol->max;
+
+ if (!new_pol->governor)
+ new_pol->governor = cur_pol->governor;
+
+ ret = cpufreq_set_policy(cpu, new_pol);
+
+ cpufreq_put_policy(cur_pol);
+
+ return ret;
+}
+
+
+static int do_one_cpu(unsigned int cpu, struct cpufreq_policy *new_pol,
+ unsigned long freq, unsigned int pc)
+{
+ switch (pc) {
+ case 0:
+ return cpufreq_set_frequency(cpu, freq);
+
+ case 1:
+ /* if only one value of a policy is to be changed, we can
+ * use a "fast path".
+ */
+ if (new_pol->min)
+ return cpufreq_modify_policy_min(cpu, new_pol->min);
+ else if (new_pol->max)
+ return cpufreq_modify_policy_max(cpu, new_pol->max);
+ else if (new_pol->governor)
+ return cpufreq_modify_policy_governor(cpu,
+ new_pol->governor);
+
+ default:
+ /* slow path */
+ return do_new_policy(cpu, new_pol);
+ }
+}
+
+int cmd_freq_set(int argc, char **argv)
+{
+ extern char *optarg;
+ extern int optind, opterr, optopt;
+ int ret = 0, cont = 1;
+ int double_parm = 0, related = 0, policychange = 0;
+ unsigned long freq = 0;
+ char gov[20];
+ unsigned int cpu;
+
+ struct cpufreq_policy new_pol = {
+ .min = 0,
+ .max = 0,
+ .governor = NULL,
+ };
+
+ /* parameter parsing */
+ do {
+ ret = getopt_long(argc, argv, "d:u:g:f:r", set_opts, NULL);
+ switch (ret) {
+ case '?':
+ print_unknown_arg();
+ return -EINVAL;
+ case -1:
+ cont = 0;
+ break;
+ case 'r':
+ if (related)
+ double_parm++;
+ related++;
+ break;
+ case 'd':
+ if (new_pol.min)
+ double_parm++;
+ policychange++;
+ new_pol.min = string_to_frequency(optarg);
+ if (new_pol.min == 0) {
+ print_unknown_arg();
+ return -EINVAL;
+ }
+ break;
+ case 'u':
+ if (new_pol.max)
+ double_parm++;
+ policychange++;
+ new_pol.max = string_to_frequency(optarg);
+ if (new_pol.max == 0) {
+ print_unknown_arg();
+ return -EINVAL;
+ }
+ break;
+ case 'f':
+ if (freq)
+ double_parm++;
+ freq = string_to_frequency(optarg);
+ if (freq == 0) {
+ print_unknown_arg();
+ return -EINVAL;
+ }
+ break;
+ case 'g':
+ if (new_pol.governor)
+ double_parm++;
+ policychange++;
+ if ((strlen(optarg) < 3) || (strlen(optarg) > 18)) {
+ print_unknown_arg();
+ return -EINVAL;
+ }
+ if ((sscanf(optarg, "%19s", gov)) != 1) {
+ print_unknown_arg();
+ return -EINVAL;
+ }
+ new_pol.governor = gov;
+ break;
+ }
+ } while (cont);
+
+ /* parameter checking */
+ if (double_parm) {
+ printf("the same parameter was passed more than once\n");
+ return -EINVAL;
+ }
+
+ if (freq && policychange) {
+ printf(_("the -f/--freq parameter cannot be combined with -d/--min, -u/--max or\n"
+ "-g/--governor parameters\n"));
+ return -EINVAL;
+ }
+
+ if (!freq && !policychange) {
+ printf(_("At least one parameter out of -f/--freq, -d/--min, -u/--max, and\n"
+ "-g/--governor must be passed\n"));
+ return -EINVAL;
+ }
+
+ /* Default is: set all CPUs */
+ if (bitmask_isallclear(cpus_chosen))
+ bitmask_setall(cpus_chosen);
+
+ /* Also set frequency settings for related CPUs if -r is passed */
+ if (related) {
+ for (cpu = bitmask_first(cpus_chosen);
+ cpu <= bitmask_last(cpus_chosen); cpu++) {
+ struct cpufreq_affected_cpus *cpus;
+
+ if (!bitmask_isbitset(cpus_chosen, cpu) ||
+ cpupower_is_cpu_online(cpu) != 1)
+ continue;
+
+ cpus = cpufreq_get_related_cpus(cpu);
+ if (!cpus)
+ break;
+ while (cpus->next) {
+ bitmask_setbit(cpus_chosen, cpus->cpu);
+ cpus = cpus->next;
+ }
+ /* Set the last cpu in related cpus list */
+ bitmask_setbit(cpus_chosen, cpus->cpu);
+ cpufreq_put_related_cpus(cpus);
+ }
+ }
+
+
+ /* loop over CPUs */
+ for (cpu = bitmask_first(cpus_chosen);
+ cpu <= bitmask_last(cpus_chosen); cpu++) {
+
+ if (!bitmask_isbitset(cpus_chosen, cpu) ||
+ cpupower_is_cpu_online(cpu) != 1)
+ continue;
+
+ printf(_("Setting cpu: %d\n"), cpu);
+ ret = do_one_cpu(cpu, &new_pol, freq, policychange);
+ if (ret) {
+ print_error();
+ return ret;
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c
new file mode 100644
index 000000000..b59c85def
--- /dev/null
+++ b/tools/power/cpupower/utils/cpuidle-info.c
@@ -0,0 +1,214 @@
+/*
+ * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de>
+ * (C) 2010 Thomas Renninger <trenn@suse.de>
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+
+#include <cpuidle.h>
+
+#include "helpers/sysfs.h"
+#include "helpers/helpers.h"
+#include "helpers/bitmask.h"
+
+#define LINE_LEN 10
+
+static void cpuidle_cpu_output(unsigned int cpu, int verbose)
+{
+ unsigned int idlestates, idlestate;
+ char *tmp;
+
+ idlestates = cpuidle_state_count(cpu);
+ if (idlestates == 0) {
+ printf(_("CPU %u: No idle states\n"), cpu);
+ return;
+ }
+
+ printf(_("Number of idle states: %d\n"), idlestates);
+ printf(_("Available idle states:"));
+ for (idlestate = 0; idlestate < idlestates; idlestate++) {
+ tmp = cpuidle_state_name(cpu, idlestate);
+ if (!tmp)
+ continue;
+ printf(" %s", tmp);
+ free(tmp);
+ }
+ printf("\n");
+
+ if (!verbose)
+ return;
+
+ for (idlestate = 0; idlestate < idlestates; idlestate++) {
+ int disabled = cpuidle_is_state_disabled(cpu, idlestate);
+ /* Disabled interface not supported on older kernels */
+ if (disabled < 0)
+ disabled = 0;
+ tmp = cpuidle_state_name(cpu, idlestate);
+ if (!tmp)
+ continue;
+ printf("%s%s:\n", tmp, (disabled) ? " (DISABLED) " : "");
+ free(tmp);
+
+ tmp = cpuidle_state_desc(cpu, idlestate);
+ if (!tmp)
+ continue;
+ printf(_("Flags/Description: %s\n"), tmp);
+ free(tmp);
+
+ printf(_("Latency: %lu\n"),
+ cpuidle_state_latency(cpu, idlestate));
+ printf(_("Usage: %lu\n"),
+ cpuidle_state_usage(cpu, idlestate));
+ printf(_("Duration: %llu\n"),
+ cpuidle_state_time(cpu, idlestate));
+ }
+}
+
+static void cpuidle_general_output(void)
+{
+ char *tmp;
+
+ tmp = cpuidle_get_driver();
+ if (!tmp) {
+ printf(_("Could not determine cpuidle driver\n"));
+ return;
+ }
+
+ printf(_("CPUidle driver: %s\n"), tmp);
+ free(tmp);
+
+ tmp = cpuidle_get_governor();
+ if (!tmp) {
+ printf(_("Could not determine cpuidle governor\n"));
+ return;
+ }
+
+ printf(_("CPUidle governor: %s\n"), tmp);
+ free(tmp);
+}
+
+static void proc_cpuidle_cpu_output(unsigned int cpu)
+{
+ long max_allowed_cstate = 2000000000;
+ unsigned int cstate, cstates;
+
+ cstates = cpuidle_state_count(cpu);
+ if (cstates == 0) {
+ printf(_("CPU %u: No C-states info\n"), cpu);
+ return;
+ }
+
+ printf(_("active state: C0\n"));
+ printf(_("max_cstate: C%u\n"), cstates-1);
+ printf(_("maximum allowed latency: %lu usec\n"), max_allowed_cstate);
+ printf(_("states:\t\n"));
+ for (cstate = 1; cstate < cstates; cstate++) {
+ printf(_(" C%d: "
+ "type[C%d] "), cstate, cstate);
+ printf(_("promotion[--] demotion[--] "));
+ printf(_("latency[%03lu] "),
+ cpuidle_state_latency(cpu, cstate));
+ printf(_("usage[%08lu] "),
+ cpuidle_state_usage(cpu, cstate));
+ printf(_("duration[%020Lu] \n"),
+ cpuidle_state_time(cpu, cstate));
+ }
+}
+
+static struct option info_opts[] = {
+ {"silent", no_argument, NULL, 's'},
+ {"proc", no_argument, NULL, 'o'},
+ { },
+};
+
+static inline void cpuidle_exit(int fail)
+{
+ exit(EXIT_FAILURE);
+}
+
+int cmd_idle_info(int argc, char **argv)
+{
+ extern char *optarg;
+ extern int optind, opterr, optopt;
+ int ret = 0, cont = 1, output_param = 0, verbose = 1;
+ unsigned int cpu = 0;
+
+ do {
+ ret = getopt_long(argc, argv, "os", info_opts, NULL);
+ if (ret == -1)
+ break;
+ switch (ret) {
+ case '?':
+ output_param = '?';
+ cont = 0;
+ break;
+ case 's':
+ verbose = 0;
+ break;
+ case -1:
+ cont = 0;
+ break;
+ case 'o':
+ if (output_param) {
+ output_param = -1;
+ cont = 0;
+ break;
+ }
+ output_param = ret;
+ break;
+ }
+ } while (cont);
+
+ switch (output_param) {
+ case -1:
+ printf(_("You can't specify more than one "
+ "output-specific argument\n"));
+ cpuidle_exit(EXIT_FAILURE);
+ case '?':
+ printf(_("invalid or unknown argument\n"));
+ cpuidle_exit(EXIT_FAILURE);
+ }
+
+ /* Default is: show output of CPU 0 only */
+ if (bitmask_isallclear(cpus_chosen))
+ bitmask_setbit(cpus_chosen, 0);
+
+ if (output_param == 0)
+ cpuidle_general_output();
+
+ for (cpu = bitmask_first(cpus_chosen);
+ cpu <= bitmask_last(cpus_chosen); cpu++) {
+
+ if (!bitmask_isbitset(cpus_chosen, cpu))
+ continue;
+
+ printf(_("analyzing CPU %d:\n"), cpu);
+
+ if (sysfs_is_cpu_online(cpu) != 1) {
+ printf(_(" *is offline\n"));
+ printf("\n");
+ continue;
+ }
+
+ switch (output_param) {
+
+ case 'o':
+ proc_cpuidle_cpu_output(cpu);
+ break;
+ case 0:
+ printf("\n");
+ cpuidle_cpu_output(cpu, verbose);
+ break;
+ }
+ printf("\n");
+ }
+ return EXIT_SUCCESS;
+}
diff --git a/tools/power/cpupower/utils/cpuidle-set.c b/tools/power/cpupower/utils/cpuidle-set.c
new file mode 100644
index 000000000..569f268f4
--- /dev/null
+++ b/tools/power/cpupower/utils/cpuidle-set.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <ctype.h>
+#include <getopt.h>
+
+#include <cpufreq.h>
+#include <cpuidle.h>
+
+#include "helpers/helpers.h"
+
+static struct option info_opts[] = {
+ {"disable", required_argument, NULL, 'd'},
+ {"enable", required_argument, NULL, 'e'},
+ {"disable-by-latency", required_argument, NULL, 'D'},
+ {"enable-all", no_argument, NULL, 'E'},
+ { },
+};
+
+
+int cmd_idle_set(int argc, char **argv)
+{
+ extern char *optarg;
+ extern int optind, opterr, optopt;
+ int ret = 0, cont = 1, param = 0, disabled;
+ unsigned long long latency = 0, state_latency;
+ unsigned int cpu = 0, idlestate = 0, idlestates = 0;
+ char *endptr;
+
+ do {
+ ret = getopt_long(argc, argv, "d:e:ED:", info_opts, NULL);
+ if (ret == -1)
+ break;
+ switch (ret) {
+ case '?':
+ param = '?';
+ cont = 0;
+ break;
+ case 'd':
+ if (param) {
+ param = -1;
+ cont = 0;
+ break;
+ }
+ param = ret;
+ idlestate = atoi(optarg);
+ break;
+ case 'e':
+ if (param) {
+ param = -1;
+ cont = 0;
+ break;
+ }
+ param = ret;
+ idlestate = atoi(optarg);
+ break;
+ case 'D':
+ if (param) {
+ param = -1;
+ cont = 0;
+ break;
+ }
+ param = ret;
+ latency = strtoull(optarg, &endptr, 10);
+ if (*endptr != '\0') {
+ printf(_("Bad latency value: %s\n"), optarg);
+ exit(EXIT_FAILURE);
+ }
+ break;
+ case 'E':
+ if (param) {
+ param = -1;
+ cont = 0;
+ break;
+ }
+ param = ret;
+ break;
+ case -1:
+ cont = 0;
+ break;
+ }
+ } while (cont);
+
+ switch (param) {
+ case -1:
+ printf(_("You can't specify more than one "
+ "output-specific argument\n"));
+ exit(EXIT_FAILURE);
+ case '?':
+ printf(_("invalid or unknown argument\n"));
+ exit(EXIT_FAILURE);
+ }
+
+ /* Default is: set all CPUs */
+ if (bitmask_isallclear(cpus_chosen))
+ bitmask_setall(cpus_chosen);
+
+ for (cpu = bitmask_first(cpus_chosen);
+ cpu <= bitmask_last(cpus_chosen); cpu++) {
+
+ if (!bitmask_isbitset(cpus_chosen, cpu))
+ continue;
+
+ if (cpupower_is_cpu_online(cpu) != 1)
+ continue;
+
+ idlestates = cpuidle_state_count(cpu);
+ if (idlestates <= 0)
+ continue;
+
+ switch (param) {
+ case 'd':
+ ret = cpuidle_state_disable(cpu, idlestate, 1);
+ if (ret == 0)
+ printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu);
+ else if (ret == -1)
+ printf(_("Idlestate %u not available on CPU %u\n"),
+ idlestate, cpu);
+ else if (ret == -2)
+ printf(_("Idlestate disabling not supported by kernel\n"));
+ else
+ printf(_("Idlestate %u not disabled on CPU %u\n"),
+ idlestate, cpu);
+ break;
+ case 'e':
+ ret = cpuidle_state_disable(cpu, idlestate, 0);
+ if (ret == 0)
+ printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu);
+ else if (ret == -1)
+ printf(_("Idlestate %u not available on CPU %u\n"),
+ idlestate, cpu);
+ else if (ret == -2)
+ printf(_("Idlestate enabling not supported by kernel\n"));
+ else
+ printf(_("Idlestate %u not enabled on CPU %u\n"),
+ idlestate, cpu);
+ break;
+ case 'D':
+ for (idlestate = 0; idlestate < idlestates; idlestate++) {
+ disabled = cpuidle_is_state_disabled
+ (cpu, idlestate);
+ state_latency = cpuidle_state_latency
+ (cpu, idlestate);
+ if (disabled == 1) {
+ if (latency > state_latency){
+ ret = cpuidle_state_disable
+ (cpu, idlestate, 0);
+ if (ret == 0)
+ printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu);
+ }
+ continue;
+ }
+ if (latency <= state_latency){
+ ret = cpuidle_state_disable
+ (cpu, idlestate, 1);
+ if (ret == 0)
+ printf(_("Idlestate %u disabled on CPU %u\n"), idlestate, cpu);
+ }
+ }
+ break;
+ case 'E':
+ for (idlestate = 0; idlestate < idlestates; idlestate++) {
+ disabled = cpuidle_is_state_disabled
+ (cpu, idlestate);
+ if (disabled == 1) {
+ ret = cpuidle_state_disable
+ (cpu, idlestate, 0);
+ if (ret == 0)
+ printf(_("Idlestate %u enabled on CPU %u\n"), idlestate, cpu);
+ }
+ }
+ break;
+ default:
+ /* Not reachable with proper args checking */
+ printf(_("Invalid or unknown argument\n"));
+ exit(EXIT_FAILURE);
+ break;
+ }
+ }
+ return EXIT_SUCCESS;
+}
diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c
new file mode 100644
index 000000000..c7caa8eaa
--- /dev/null
+++ b/tools/power/cpupower/utils/cpupower-info.c
@@ -0,0 +1,106 @@
+/*
+ * (C) 2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <getopt.h>
+
+#include "helpers/helpers.h"
+#include "helpers/sysfs.h"
+
+static struct option set_opts[] = {
+ {"perf-bias", optional_argument, NULL, 'b'},
+ { },
+};
+
+static void print_wrong_arg_exit(void)
+{
+ printf(_("invalid or unknown argument\n"));
+ exit(EXIT_FAILURE);
+}
+
+int cmd_info(int argc, char **argv)
+{
+ extern char *optarg;
+ extern int optind, opterr, optopt;
+ unsigned int cpu;
+
+ union {
+ struct {
+ int perf_bias:1;
+ };
+ int params;
+ } params = {};
+ int ret = 0;
+
+ setlocale(LC_ALL, "");
+ textdomain(PACKAGE);
+
+ /* parameter parsing */
+ while ((ret = getopt_long(argc, argv, "b", set_opts, NULL)) != -1) {
+ switch (ret) {
+ case 'b':
+ if (params.perf_bias)
+ print_wrong_arg_exit();
+ params.perf_bias = 1;
+ break;
+ default:
+ print_wrong_arg_exit();
+ }
+ };
+
+ if (!params.params)
+ params.params = 0x7;
+
+ /* Default is: show output of CPU 0 only */
+ if (bitmask_isallclear(cpus_chosen))
+ bitmask_setbit(cpus_chosen, 0);
+
+ /* Add more per cpu options here */
+ if (!params.perf_bias)
+ return ret;
+
+ if (params.perf_bias) {
+ if (!run_as_root) {
+ params.perf_bias = 0;
+ printf(_("Intel's performance bias setting needs root privileges\n"));
+ } else if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS)) {
+ printf(_("System does not support Intel's performance"
+ " bias setting\n"));
+ params.perf_bias = 0;
+ }
+ }
+
+ /* loop over CPUs */
+ for (cpu = bitmask_first(cpus_chosen);
+ cpu <= bitmask_last(cpus_chosen); cpu++) {
+
+ if (!bitmask_isbitset(cpus_chosen, cpu))
+ continue;
+
+ printf(_("analyzing CPU %d:\n"), cpu);
+
+ if (sysfs_is_cpu_online(cpu) != 1){
+ printf(_(" *is offline\n"));
+ continue;
+ }
+
+ if (params.perf_bias) {
+ ret = msr_intel_get_perf_bias(cpu);
+ if (ret < 0) {
+ fprintf(stderr,
+ _("Could not read perf-bias value[%d]\n"), ret);
+ exit(EXIT_FAILURE);
+ } else
+ printf(_("perf-bias: %d\n"), ret);
+ }
+ }
+ return 0;
+}
diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
new file mode 100644
index 000000000..532f46b9a
--- /dev/null
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -0,0 +1,99 @@
+/*
+ * (C) 2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <getopt.h>
+
+#include "helpers/helpers.h"
+#include "helpers/sysfs.h"
+#include "helpers/bitmask.h"
+
+static struct option set_opts[] = {
+ {"perf-bias", required_argument, NULL, 'b'},
+ { },
+};
+
+static void print_wrong_arg_exit(void)
+{
+ printf(_("invalid or unknown argument\n"));
+ exit(EXIT_FAILURE);
+}
+
+int cmd_set(int argc, char **argv)
+{
+ extern char *optarg;
+ extern int optind, opterr, optopt;
+ unsigned int cpu;
+
+ union {
+ struct {
+ int perf_bias:1;
+ };
+ int params;
+ } params;
+ int perf_bias = 0;
+ int ret = 0;
+
+ setlocale(LC_ALL, "");
+ textdomain(PACKAGE);
+
+ params.params = 0;
+ /* parameter parsing */
+ while ((ret = getopt_long(argc, argv, "b:",
+ set_opts, NULL)) != -1) {
+ switch (ret) {
+ case 'b':
+ if (params.perf_bias)
+ print_wrong_arg_exit();
+ perf_bias = atoi(optarg);
+ if (perf_bias < 0 || perf_bias > 15) {
+ printf(_("--perf-bias param out "
+ "of range [0-%d]\n"), 15);
+ print_wrong_arg_exit();
+ }
+ params.perf_bias = 1;
+ break;
+ default:
+ print_wrong_arg_exit();
+ }
+ };
+
+ if (!params.params)
+ print_wrong_arg_exit();
+
+ /* Default is: set all CPUs */
+ if (bitmask_isallclear(cpus_chosen))
+ bitmask_setall(cpus_chosen);
+
+ /* loop over CPUs */
+ for (cpu = bitmask_first(cpus_chosen);
+ cpu <= bitmask_last(cpus_chosen); cpu++) {
+
+ if (!bitmask_isbitset(cpus_chosen, cpu))
+ continue;
+
+ if (sysfs_is_cpu_online(cpu) != 1){
+ fprintf(stderr, _("Cannot set values on CPU %d:"), cpu);
+ fprintf(stderr, _(" *is offline\n"));
+ continue;
+ }
+
+ if (params.perf_bias) {
+ ret = msr_intel_set_perf_bias(cpu, perf_bias);
+ if (ret) {
+ fprintf(stderr, _("Error setting perf-bias "
+ "value on CPU %d\n"), cpu);
+ break;
+ }
+ }
+ }
+ return ret;
+}
diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c
new file mode 100644
index 000000000..2dccf4998
--- /dev/null
+++ b/tools/power/cpupower/utils/cpupower.c
@@ -0,0 +1,238 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Ideas taken over from the perf userspace tool (included in the Linus
+ * kernel git repo): subcommand builtins and param parsing.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/utsname.h>
+
+#include "builtin.h"
+#include "helpers/helpers.h"
+#include "helpers/bitmask.h"
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+
+static int cmd_help(int argc, const char **argv);
+
+/* Global cpu_info object available for all binaries
+ * Info only retrieved from CPU 0
+ *
+ * Values will be zero/unknown on non X86 archs
+ */
+struct cpupower_cpu_info cpupower_cpu_info;
+int run_as_root;
+int base_cpu;
+/* Affected cpus chosen by -c/--cpu param */
+struct bitmask *cpus_chosen;
+
+#ifdef DEBUG
+int be_verbose;
+#endif
+
+static void print_help(void);
+
+struct cmd_struct {
+ const char *cmd;
+ int (*main)(int, const char **);
+ int needs_root;
+};
+
+static struct cmd_struct commands[] = {
+ { "frequency-info", cmd_freq_info, 0 },
+ { "frequency-set", cmd_freq_set, 1 },
+ { "idle-info", cmd_idle_info, 0 },
+ { "idle-set", cmd_idle_set, 1 },
+ { "set", cmd_set, 1 },
+ { "info", cmd_info, 0 },
+ { "monitor", cmd_monitor, 0 },
+ { "help", cmd_help, 0 },
+ /* { "bench", cmd_bench, 1 }, */
+};
+
+static void print_help(void)
+{
+ unsigned int i;
+
+#ifdef DEBUG
+ printf(_("Usage:\tcpupower [-d|--debug] [-c|--cpu cpulist ] <command> [<args>]\n"));
+#else
+ printf(_("Usage:\tcpupower [-c|--cpu cpulist ] <command> [<args>]\n"));
+#endif
+ printf(_("Supported commands are:\n"));
+ for (i = 0; i < ARRAY_SIZE(commands); i++)
+ printf("\t%s\n", commands[i].cmd);
+ printf(_("\nNot all commands can make use of the -c cpulist option.\n"));
+ printf(_("\nUse 'cpupower help <command>' for getting help for above commands.\n"));
+}
+
+static int print_man_page(const char *subpage)
+{
+ int len;
+ char *page;
+
+ len = 10; /* enough for "cpupower-" */
+ if (subpage != NULL)
+ len += strlen(subpage);
+
+ page = malloc(len);
+ if (!page)
+ return -ENOMEM;
+
+ sprintf(page, "cpupower");
+ if ((subpage != NULL) && strcmp(subpage, "help")) {
+ strcat(page, "-");
+ strcat(page, subpage);
+ }
+
+ execlp("man", "man", page, NULL);
+
+ /* should not be reached */
+ return -EINVAL;
+}
+
+static int cmd_help(int argc, const char **argv)
+{
+ if (argc > 1) {
+ print_man_page(argv[1]); /* exits within execlp() */
+ return EXIT_FAILURE;
+ }
+
+ print_help();
+ return EXIT_SUCCESS;
+}
+
+static void print_version(void)
+{
+ printf(PACKAGE " " VERSION "\n");
+ printf(_("Report errors and bugs to %s, please.\n"), PACKAGE_BUGREPORT);
+}
+
+static void handle_options(int *argc, const char ***argv)
+{
+ int ret, x, new_argc = 0;
+
+ if (*argc < 1)
+ return;
+
+ for (x = 0; x < *argc && ((*argv)[x])[0] == '-'; x++) {
+ const char *param = (*argv)[x];
+ if (!strcmp(param, "-h") || !strcmp(param, "--help")) {
+ print_help();
+ exit(EXIT_SUCCESS);
+ } else if (!strcmp(param, "-c") || !strcmp(param, "--cpu")) {
+ if (*argc < 2) {
+ print_help();
+ exit(EXIT_FAILURE);
+ }
+ if (!strcmp((*argv)[x+1], "all"))
+ bitmask_setall(cpus_chosen);
+ else {
+ ret = bitmask_parselist(
+ (*argv)[x+1], cpus_chosen);
+ if (ret < 0) {
+ fprintf(stderr, _("Error parsing cpu "
+ "list\n"));
+ exit(EXIT_FAILURE);
+ }
+ }
+ x += 1;
+ /* Cut out param: cpupower -c 1 info -> cpupower info */
+ new_argc += 2;
+ continue;
+ } else if (!strcmp(param, "-v") ||
+ !strcmp(param, "--version")) {
+ print_version();
+ exit(EXIT_SUCCESS);
+#ifdef DEBUG
+ } else if (!strcmp(param, "-d") || !strcmp(param, "--debug")) {
+ be_verbose = 1;
+ new_argc++;
+ continue;
+#endif
+ } else {
+ fprintf(stderr, "Unknown option: %s\n", param);
+ print_help();
+ exit(EXIT_FAILURE);
+ }
+ }
+ *argc -= new_argc;
+ *argv += new_argc;
+}
+
+int main(int argc, const char *argv[])
+{
+ const char *cmd;
+ unsigned int i, ret;
+ struct stat statbuf;
+ struct utsname uts;
+ char pathname[32];
+
+ cpus_chosen = bitmask_alloc(sysconf(_SC_NPROCESSORS_CONF));
+
+ argc--;
+ argv += 1;
+
+ handle_options(&argc, &argv);
+
+ cmd = argv[0];
+
+ if (argc < 1) {
+ print_help();
+ return EXIT_FAILURE;
+ }
+
+ setlocale(LC_ALL, "");
+ textdomain(PACKAGE);
+
+ /* Turn "perf cmd --help" into "perf help cmd" */
+ if (argc > 1 && !strcmp(argv[1], "--help")) {
+ argv[1] = argv[0];
+ argv[0] = cmd = "help";
+ }
+
+ base_cpu = sched_getcpu();
+ if (base_cpu < 0) {
+ fprintf(stderr, _("No valid cpus found.\n"));
+ return EXIT_FAILURE;
+ }
+
+ get_cpu_info(&cpupower_cpu_info);
+ run_as_root = !geteuid();
+ if (run_as_root) {
+ ret = uname(&uts);
+ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+ if (!ret && !strcmp(uts.machine, "x86_64") &&
+ stat(pathname, &statbuf) != 0) {
+ if (system("modprobe msr") == -1)
+ fprintf(stderr, _("MSR access not available.\n"));
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(commands); i++) {
+ struct cmd_struct *p = commands + i;
+ if (strcmp(p->cmd, cmd))
+ continue;
+ if (!run_as_root && p->needs_root) {
+ fprintf(stderr, _("Subcommand %s needs root "
+ "privileges\n"), cmd);
+ return EXIT_FAILURE;
+ }
+ ret = p->main(argc, argv);
+ if (cpus_chosen)
+ bitmask_free(cpus_chosen);
+ return ret;
+ }
+ print_help();
+ return EXIT_FAILURE;
+}
diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
new file mode 100644
index 000000000..9607ada5b
--- /dev/null
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+#if defined(__i386__) || defined(__x86_64__)
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include <pci/pci.h>
+
+#include "helpers/helpers.h"
+
+#define MSR_AMD_PSTATE_STATUS 0xc0010063
+#define MSR_AMD_PSTATE 0xc0010064
+#define MSR_AMD_PSTATE_LIMIT 0xc0010061
+
+union msr_pstate {
+ struct {
+ unsigned fid:6;
+ unsigned did:3;
+ unsigned vid:7;
+ unsigned res1:6;
+ unsigned nbdid:1;
+ unsigned res2:2;
+ unsigned nbvid:7;
+ unsigned iddval:8;
+ unsigned idddiv:2;
+ unsigned res3:21;
+ unsigned en:1;
+ } bits;
+ struct {
+ unsigned fid:8;
+ unsigned did:6;
+ unsigned vid:8;
+ unsigned iddval:8;
+ unsigned idddiv:2;
+ unsigned res1:31;
+ unsigned en:1;
+ } fam17h_bits;
+ unsigned long long val;
+};
+
+static int get_did(int family, union msr_pstate pstate)
+{
+ int t;
+
+ if (family == 0x12)
+ t = pstate.val & 0xf;
+ else if (family == 0x17)
+ t = pstate.fam17h_bits.did;
+ else
+ t = pstate.bits.did;
+
+ return t;
+}
+
+static int get_cof(int family, union msr_pstate pstate)
+{
+ int t;
+ int fid, did, cof;
+
+ did = get_did(family, pstate);
+ if (family == 0x17) {
+ fid = pstate.fam17h_bits.fid;
+ cof = 200 * fid / did;
+ } else {
+ t = 0x10;
+ fid = pstate.bits.fid;
+ if (family == 0x11)
+ t = 0x8;
+ cof = (100 * (fid + t)) >> did;
+ }
+ return cof;
+}
+
+/* Needs:
+ * cpu -> the cpu that gets evaluated
+ * cpu_family -> The cpu's family (0x10, 0x12,...)
+ * boots_states -> how much boost states the machines support
+ *
+ * Fills up:
+ * pstates -> a pointer to an array of size MAX_HW_PSTATES
+ * must be initialized with zeros.
+ * All available HW pstates (including boost states)
+ * no -> amount of pstates above array got filled up with
+ *
+ * returns zero on success, -1 on failure
+ */
+int decode_pstates(unsigned int cpu, unsigned int cpu_family,
+ int boost_states, unsigned long *pstates, int *no)
+{
+ int i, psmax, pscur;
+ union msr_pstate pstate;
+ unsigned long long val;
+
+ /* Only read out frequencies from HW when CPU might be boostable
+ to keep the code as short and clean as possible.
+ Otherwise frequencies are exported via ACPI tables.
+ */
+ if (cpu_family < 0x10 || cpu_family == 0x14)
+ return -1;
+
+ if (read_msr(cpu, MSR_AMD_PSTATE_LIMIT, &val))
+ return -1;
+
+ psmax = (val >> 4) & 0x7;
+
+ if (read_msr(cpu, MSR_AMD_PSTATE_STATUS, &val))
+ return -1;
+
+ pscur = val & 0x7;
+
+ pscur += boost_states;
+ psmax += boost_states;
+ for (i = 0; i <= psmax; i++) {
+ if (i >= MAX_HW_PSTATES) {
+ fprintf(stderr, "HW pstates [%d] exceeding max [%d]\n",
+ psmax, MAX_HW_PSTATES);
+ return -1;
+ }
+ if (read_msr(cpu, MSR_AMD_PSTATE + i, &pstate.val))
+ return -1;
+ if ((cpu_family == 0x17) && (!pstate.fam17h_bits.en))
+ continue;
+ else if (!pstate.bits.en)
+ continue;
+
+ pstates[i] = get_cof(cpu_family, pstate);
+ }
+ *no = i;
+ return 0;
+}
+
+int amd_pci_get_num_boost_states(int *active, int *states)
+{
+ struct pci_access *pci_acc;
+ struct pci_dev *device;
+ uint8_t val = 0;
+
+ *active = *states = 0;
+
+ device = pci_slot_func_init(&pci_acc, 0x18, 4);
+
+ if (device == NULL)
+ return -ENODEV;
+
+ val = pci_read_byte(device, 0x15c);
+ if (val & 3)
+ *active = 1;
+ else
+ *active = 0;
+ *states = (val >> 2) & 7;
+
+ pci_cleanup(pci_acc);
+ return 0;
+}
+#endif /* defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/helpers/bitmask.c b/tools/power/cpupower/utils/helpers/bitmask.c
new file mode 100644
index 000000000..6c7932f5b
--- /dev/null
+++ b/tools/power/cpupower/utils/helpers/bitmask.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <helpers/bitmask.h>
+
+/* How many bits in an unsigned long */
+#define bitsperlong (8 * sizeof(unsigned long))
+
+/* howmany(a,b) : how many elements of size b needed to hold all of a */
+#define howmany(x, y) (((x)+((y)-1))/(y))
+
+/* How many longs in mask of n bits */
+#define longsperbits(n) howmany(n, bitsperlong)
+
+#define max(a, b) ((a) > (b) ? (a) : (b))
+
+/*
+ * Allocate and free `struct bitmask *`
+ */
+
+/* Allocate a new `struct bitmask` with a size of n bits */
+struct bitmask *bitmask_alloc(unsigned int n)
+{
+ struct bitmask *bmp;
+
+ bmp = malloc(sizeof(*bmp));
+ if (bmp == 0)
+ return 0;
+ bmp->size = n;
+ bmp->maskp = calloc(longsperbits(n), sizeof(unsigned long));
+ if (bmp->maskp == 0) {
+ free(bmp);
+ return 0;
+ }
+ return bmp;
+}
+
+/* Free `struct bitmask` */
+void bitmask_free(struct bitmask *bmp)
+{
+ if (bmp == 0)
+ return;
+ free(bmp->maskp);
+ bmp->maskp = (unsigned long *)0xdeadcdef; /* double free tripwire */
+ free(bmp);
+}
+
+/*
+ * The routines _getbit() and _setbit() are the only
+ * routines that actually understand the layout of bmp->maskp[].
+ *
+ * On little endian architectures, this could simply be an array of
+ * bytes. But the kernel layout of bitmasks _is_ visible to userspace
+ * via the sched_(set/get)affinity calls in Linux 2.6, and on big
+ * endian architectures, it is painfully obvious that this is an
+ * array of unsigned longs.
+ */
+
+/* Return the value (0 or 1) of bit n in bitmask bmp */
+static unsigned int _getbit(const struct bitmask *bmp, unsigned int n)
+{
+ if (n < bmp->size)
+ return (bmp->maskp[n/bitsperlong] >> (n % bitsperlong)) & 1;
+ else
+ return 0;
+}
+
+/* Set bit n in bitmask bmp to value v (0 or 1) */
+static void _setbit(struct bitmask *bmp, unsigned int n, unsigned int v)
+{
+ if (n < bmp->size) {
+ if (v)
+ bmp->maskp[n/bitsperlong] |= 1UL << (n % bitsperlong);
+ else
+ bmp->maskp[n/bitsperlong] &=
+ ~(1UL << (n % bitsperlong));
+ }
+}
+
+/*
+ * When parsing bitmask lists, only allow numbers, separated by one
+ * of the allowed next characters.
+ *
+ * The parameter 'sret' is the return from a sscanf "%u%c". It is
+ * -1 if the sscanf input string was empty. It is 0 if the first
+ * character in the sscanf input string was not a decimal number.
+ * It is 1 if the unsigned number matching the "%u" was the end of the
+ * input string. It is 2 if one or more additional characters followed
+ * the matched unsigned number. If it is 2, then 'nextc' is the first
+ * character following the number. The parameter 'ok_next_chars'
+ * is the nul-terminated list of allowed next characters.
+ *
+ * The mask term just scanned was ok if and only if either the numbers
+ * matching the %u were all of the input or if the next character in
+ * the input past the numbers was one of the allowed next characters.
+ */
+static int scan_was_ok(int sret, char nextc, const char *ok_next_chars)
+{
+ return sret == 1 ||
+ (sret == 2 && strchr(ok_next_chars, nextc) != NULL);
+}
+
+static const char *nexttoken(const char *q, int sep)
+{
+ if (q)
+ q = strchr(q, sep);
+ if (q)
+ q++;
+ return q;
+}
+
+/* Set a single bit i in bitmask */
+struct bitmask *bitmask_setbit(struct bitmask *bmp, unsigned int i)
+{
+ _setbit(bmp, i, 1);
+ return bmp;
+}
+
+/* Set all bits in bitmask: bmp = ~0 */
+struct bitmask *bitmask_setall(struct bitmask *bmp)
+{
+ unsigned int i;
+ for (i = 0; i < bmp->size; i++)
+ _setbit(bmp, i, 1);
+ return bmp;
+}
+
+/* Clear all bits in bitmask: bmp = 0 */
+struct bitmask *bitmask_clearall(struct bitmask *bmp)
+{
+ unsigned int i;
+ for (i = 0; i < bmp->size; i++)
+ _setbit(bmp, i, 0);
+ return bmp;
+}
+
+/* True if all bits are clear */
+int bitmask_isallclear(const struct bitmask *bmp)
+{
+ unsigned int i;
+ for (i = 0; i < bmp->size; i++)
+ if (_getbit(bmp, i))
+ return 0;
+ return 1;
+}
+
+/* True if specified bit i is set */
+int bitmask_isbitset(const struct bitmask *bmp, unsigned int i)
+{
+ return _getbit(bmp, i);
+}
+
+/* Number of lowest set bit (min) */
+unsigned int bitmask_first(const struct bitmask *bmp)
+{
+ return bitmask_next(bmp, 0);
+}
+
+/* Number of highest set bit (max) */
+unsigned int bitmask_last(const struct bitmask *bmp)
+{
+ unsigned int i;
+ unsigned int m = bmp->size;
+ for (i = 0; i < bmp->size; i++)
+ if (_getbit(bmp, i))
+ m = i;
+ return m;
+}
+
+/* Number of next set bit at or above given bit i */
+unsigned int bitmask_next(const struct bitmask *bmp, unsigned int i)
+{
+ unsigned int n;
+ for (n = i; n < bmp->size; n++)
+ if (_getbit(bmp, n))
+ break;
+ return n;
+}
+
+/*
+ * Parses a comma-separated list of numbers and ranges of numbers,
+ * with optional ':%u' strides modifying ranges, into provided bitmask.
+ * Some examples of input lists and their equivalent simple list:
+ * Input Equivalent to
+ * 0-3 0,1,2,3
+ * 0-7:2 0,2,4,6
+ * 1,3,5-7 1,3,5,6,7
+ * 0-3:2,8-15:4 0,2,8,12
+ */
+int bitmask_parselist(const char *buf, struct bitmask *bmp)
+{
+ const char *p, *q;
+
+ bitmask_clearall(bmp);
+
+ q = buf;
+ while (p = q, q = nexttoken(q, ','), p) {
+ unsigned int a; /* begin of range */
+ unsigned int b; /* end of range */
+ unsigned int s; /* stride */
+ const char *c1, *c2; /* next tokens after '-' or ',' */
+ char nextc; /* char after sscanf %u match */
+ int sret; /* sscanf return (number of matches) */
+
+ sret = sscanf(p, "%u%c", &a, &nextc);
+ if (!scan_was_ok(sret, nextc, ",-"))
+ goto err;
+ b = a;
+ s = 1;
+ c1 = nexttoken(p, '-');
+ c2 = nexttoken(p, ',');
+ if (c1 != NULL && (c2 == NULL || c1 < c2)) {
+ sret = sscanf(c1, "%u%c", &b, &nextc);
+ if (!scan_was_ok(sret, nextc, ",:"))
+ goto err;
+ c1 = nexttoken(c1, ':');
+ if (c1 != NULL && (c2 == NULL || c1 < c2)) {
+ sret = sscanf(c1, "%u%c", &s, &nextc);
+ if (!scan_was_ok(sret, nextc, ","))
+ goto err;
+ }
+ }
+ if (!(a <= b))
+ goto err;
+ if (b >= bmp->size)
+ goto err;
+ while (a <= b) {
+ _setbit(bmp, a, 1);
+ a += s;
+ }
+ }
+ return 0;
+err:
+ bitmask_clearall(bmp);
+ return -1;
+}
+
+/*
+ * emit(buf, buflen, rbot, rtop, len)
+ *
+ * Helper routine for bitmask_displaylist(). Write decimal number
+ * or range to buf+len, suppressing output past buf+buflen, with optional
+ * comma-prefix. Return len of what would be written to buf, if it
+ * all fit.
+ */
+
+static inline int emit(char *buf, int buflen, int rbot, int rtop, int len)
+{
+ if (len > 0)
+ len += snprintf(buf + len, max(buflen - len, 0), ",");
+ if (rbot == rtop)
+ len += snprintf(buf + len, max(buflen - len, 0), "%d", rbot);
+ else
+ len += snprintf(buf + len, max(buflen - len, 0), "%d-%d",
+ rbot, rtop);
+ return len;
+}
+
+/*
+ * Write decimal list representation of bmp to buf.
+ *
+ * Output format is a comma-separated list of decimal numbers and
+ * ranges. Consecutively set bits are shown as two hyphen-separated
+ * decimal numbers, the smallest and largest bit numbers set in
+ * the range. Output format is compatible with the format
+ * accepted as input by bitmap_parselist().
+ *
+ * The return value is the number of characters which would be
+ * generated for the given input, excluding the trailing '\0', as
+ * per ISO C99.
+ */
+
+int bitmask_displaylist(char *buf, int buflen, const struct bitmask *bmp)
+{
+ int len = 0;
+ /* current bit is 'cur', most recently seen range is [rbot, rtop] */
+ unsigned int cur, rbot, rtop;
+
+ if (buflen > 0)
+ *buf = 0;
+ rbot = cur = bitmask_first(bmp);
+ while (cur < bmp->size) {
+ rtop = cur;
+ cur = bitmask_next(bmp, cur+1);
+ if (cur >= bmp->size || cur > rtop + 1) {
+ len = emit(buf, buflen, rbot, rtop, len);
+ rbot = cur;
+ }
+ }
+ return len;
+}
diff --git a/tools/power/cpupower/utils/helpers/bitmask.h b/tools/power/cpupower/utils/helpers/bitmask.h
new file mode 100644
index 000000000..b98d93a44
--- /dev/null
+++ b/tools/power/cpupower/utils/helpers/bitmask.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CPUPOWER_BITMASK__
+#define __CPUPOWER_BITMASK__
+
+/* Taken over from libbitmask, a project initiated from sgi:
+ * Url: http://oss.sgi.com/projects/cpusets/
+ * Unfortunately it's not very widespread, therefore relevant parts are
+ * pasted here.
+ */
+
+struct bitmask {
+ unsigned int size;
+ unsigned long *maskp;
+};
+
+struct bitmask *bitmask_alloc(unsigned int n);
+void bitmask_free(struct bitmask *bmp);
+
+struct bitmask *bitmask_setbit(struct bitmask *bmp, unsigned int i);
+struct bitmask *bitmask_setall(struct bitmask *bmp);
+struct bitmask *bitmask_clearall(struct bitmask *bmp);
+
+unsigned int bitmask_first(const struct bitmask *bmp);
+unsigned int bitmask_next(const struct bitmask *bmp, unsigned int i);
+unsigned int bitmask_last(const struct bitmask *bmp);
+int bitmask_isallclear(const struct bitmask *bmp);
+int bitmask_isbitset(const struct bitmask *bmp, unsigned int i);
+
+int bitmask_parselist(const char *buf, struct bitmask *bmp);
+int bitmask_displaylist(char *buf, int len, const struct bitmask *bmp);
+
+
+
+#endif /*__CPUPOWER_BITMASK__ */
diff --git a/tools/power/cpupower/utils/helpers/cpuid.c b/tools/power/cpupower/utils/helpers/cpuid.c
new file mode 100644
index 000000000..732b0b41b
--- /dev/null
+++ b/tools/power/cpupower/utils/helpers/cpuid.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include "helpers/helpers.h"
+
+static const char *cpu_vendor_table[X86_VENDOR_MAX] = {
+ "Unknown", "GenuineIntel", "AuthenticAMD",
+};
+
+#if defined(__i386__) || defined(__x86_64__)
+
+/* from gcc */
+#include <cpuid.h>
+
+/*
+ * CPUID functions returning a single datum
+ *
+ * Define unsigned int cpuid_e[abcd]x(unsigned int op)
+ */
+#define cpuid_func(reg) \
+ unsigned int cpuid_##reg(unsigned int op) \
+ { \
+ unsigned int eax, ebx, ecx, edx; \
+ __cpuid(op, eax, ebx, ecx, edx); \
+ return reg; \
+ }
+cpuid_func(eax);
+cpuid_func(ebx);
+cpuid_func(ecx);
+cpuid_func(edx);
+
+#endif /* defined(__i386__) || defined(__x86_64__) */
+
+/* get_cpu_info
+ *
+ * Extract CPU vendor, family, model, stepping info from /proc/cpuinfo
+ *
+ * Returns 0 on success or a negativ error code
+ *
+ * TBD: Should there be a cpuid alternative for this if /proc is not mounted?
+ */
+int get_cpu_info(struct cpupower_cpu_info *cpu_info)
+{
+ FILE *fp;
+ char value[64];
+ unsigned int proc, x;
+ unsigned int unknown = 0xffffff;
+ unsigned int cpuid_level, ext_cpuid_level;
+
+ int ret = -EINVAL;
+
+ cpu_info->vendor = X86_VENDOR_UNKNOWN;
+ cpu_info->family = unknown;
+ cpu_info->model = unknown;
+ cpu_info->stepping = unknown;
+ cpu_info->caps = 0;
+
+ fp = fopen("/proc/cpuinfo", "r");
+ if (!fp)
+ return -EIO;
+
+ while (!feof(fp)) {
+ if (!fgets(value, 64, fp))
+ continue;
+ value[63 - 1] = '\0';
+
+ if (!strncmp(value, "processor\t: ", 12))
+ sscanf(value, "processor\t: %u", &proc);
+
+ if (proc != (unsigned int)base_cpu)
+ continue;
+
+ /* Get CPU vendor */
+ if (!strncmp(value, "vendor_id", 9)) {
+ for (x = 1; x < X86_VENDOR_MAX; x++) {
+ if (strstr(value, cpu_vendor_table[x]))
+ cpu_info->vendor = x;
+ }
+ /* Get CPU family, etc. */
+ } else if (!strncmp(value, "cpu family\t: ", 13)) {
+ sscanf(value, "cpu family\t: %u",
+ &cpu_info->family);
+ } else if (!strncmp(value, "model\t\t: ", 9)) {
+ sscanf(value, "model\t\t: %u",
+ &cpu_info->model);
+ } else if (!strncmp(value, "stepping\t: ", 10)) {
+ sscanf(value, "stepping\t: %u",
+ &cpu_info->stepping);
+
+ /* Exit -> all values must have been set */
+ if (cpu_info->vendor == X86_VENDOR_UNKNOWN ||
+ cpu_info->family == unknown ||
+ cpu_info->model == unknown ||
+ cpu_info->stepping == unknown) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
+ }
+ ret = -ENODEV;
+out:
+ fclose(fp);
+ /* Get some useful CPU capabilities from cpuid */
+ if (cpu_info->vendor != X86_VENDOR_AMD &&
+ cpu_info->vendor != X86_VENDOR_INTEL)
+ return ret;
+
+ cpuid_level = cpuid_eax(0);
+ ext_cpuid_level = cpuid_eax(0x80000000);
+
+ /* Invariant TSC */
+ if (ext_cpuid_level >= 0x80000007 &&
+ (cpuid_edx(0x80000007) & (1 << 8)))
+ cpu_info->caps |= CPUPOWER_CAP_INV_TSC;
+
+ /* Aperf/Mperf registers support */
+ if (cpuid_level >= 6 && (cpuid_ecx(6) & 0x1))
+ cpu_info->caps |= CPUPOWER_CAP_APERF;
+
+ /* AMD Boost state enable/disable register */
+ if (cpu_info->vendor == X86_VENDOR_AMD) {
+ if (ext_cpuid_level >= 0x80000007 &&
+ (cpuid_edx(0x80000007) & (1 << 9)))
+ cpu_info->caps |= CPUPOWER_CAP_AMD_CBP;
+ }
+
+ if (cpu_info->vendor == X86_VENDOR_INTEL) {
+ if (cpuid_level >= 6 &&
+ (cpuid_eax(6) & (1 << 1)))
+ cpu_info->caps |= CPUPOWER_CAP_INTEL_IDA;
+ }
+
+ if (cpu_info->vendor == X86_VENDOR_INTEL) {
+ /* Intel's perf-bias MSR support */
+ if (cpuid_level >= 6 && (cpuid_ecx(6) & (1 << 3)))
+ cpu_info->caps |= CPUPOWER_CAP_PERF_BIAS;
+
+ /* Intel's Turbo Ratio Limit support */
+ if (cpu_info->family == 6) {
+ switch (cpu_info->model) {
+ case 0x1A: /* Core i7, Xeon 5500 series
+ * Bloomfield, Gainstown NHM-EP
+ */
+ case 0x1E: /* Core i7 and i5 Processor
+ * Clarksfield, Lynnfield, Jasper Forest
+ */
+ case 0x1F: /* Core i7 and i5 Processor - Nehalem */
+ case 0x25: /* Westmere Client
+ * Clarkdale, Arrandale
+ */
+ case 0x2C: /* Westmere EP - Gulftown */
+ cpu_info->caps |= CPUPOWER_CAP_HAS_TURBO_RATIO;
+ break;
+ case 0x2A: /* SNB */
+ case 0x2D: /* SNB Xeon */
+ case 0x3A: /* IVB */
+ case 0x3E: /* IVB Xeon */
+ cpu_info->caps |= CPUPOWER_CAP_HAS_TURBO_RATIO;
+ cpu_info->caps |= CPUPOWER_CAP_IS_SNB;
+ break;
+ case 0x2E: /* Nehalem-EX Xeon - Beckton */
+ case 0x2F: /* Westmere-EX Xeon - Eagleton */
+ default:
+ break;
+ }
+ }
+ }
+
+ /* printf("ID: %u - Extid: 0x%x - Caps: 0x%llx\n",
+ cpuid_level, ext_cpuid_level, cpu_info->caps);
+ */
+ return ret;
+}
diff --git a/tools/power/cpupower/utils/helpers/helpers.h b/tools/power/cpupower/utils/helpers/helpers.h
new file mode 100644
index 000000000..41da392be
--- /dev/null
+++ b/tools/power/cpupower/utils/helpers/helpers.h
@@ -0,0 +1,174 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Miscellaneous helpers which do not fit or are worth
+ * to put into separate headers
+ */
+
+#ifndef __CPUPOWERUTILS_HELPERS__
+#define __CPUPOWERUTILS_HELPERS__
+
+#include <libintl.h>
+#include <locale.h>
+
+#include "helpers/bitmask.h"
+#include <cpupower.h>
+
+/* Internationalization ****************************/
+#ifdef NLS
+
+#define _(String) gettext(String)
+#ifndef gettext_noop
+#define gettext_noop(String) String
+#endif
+#define N_(String) gettext_noop(String)
+
+#else /* !NLS */
+
+#define _(String) String
+#define N_(String) String
+
+#endif
+/* Internationalization ****************************/
+
+extern int run_as_root;
+extern int base_cpu;
+extern struct bitmask *cpus_chosen;
+
+/* Global verbose (-d) stuff *********************************/
+/*
+ * define DEBUG via global Makefile variable
+ * Debug output is sent to stderr, do:
+ * cpupower monitor 2>/tmp/debug
+ * to split debug output away from normal output
+*/
+#ifdef DEBUG
+extern int be_verbose;
+
+#define dprint(fmt, ...) { \
+ if (be_verbose) { \
+ fprintf(stderr, "%s: " fmt, \
+ __func__, ##__VA_ARGS__); \
+ } \
+ }
+#else
+static inline void dprint(const char *fmt, ...) { }
+#endif
+extern int be_verbose;
+/* Global verbose (-v) stuff *********************************/
+
+/* cpuid and cpuinfo helpers **************************/
+enum cpupower_cpu_vendor {X86_VENDOR_UNKNOWN = 0, X86_VENDOR_INTEL,
+ X86_VENDOR_AMD, X86_VENDOR_MAX};
+
+#define CPUPOWER_CAP_INV_TSC 0x00000001
+#define CPUPOWER_CAP_APERF 0x00000002
+#define CPUPOWER_CAP_AMD_CBP 0x00000004
+#define CPUPOWER_CAP_PERF_BIAS 0x00000008
+#define CPUPOWER_CAP_HAS_TURBO_RATIO 0x00000010
+#define CPUPOWER_CAP_IS_SNB 0x00000020
+#define CPUPOWER_CAP_INTEL_IDA 0x00000040
+
+#define CPUPOWER_AMD_CPBDIS 0x02000000
+
+#define MAX_HW_PSTATES 10
+
+struct cpupower_cpu_info {
+ enum cpupower_cpu_vendor vendor;
+ unsigned int family;
+ unsigned int model;
+ unsigned int stepping;
+ /* CPU capabilities read out from cpuid */
+ unsigned long long caps;
+};
+
+/* get_cpu_info
+ *
+ * Extract CPU vendor, family, model, stepping info from /proc/cpuinfo
+ *
+ * Returns 0 on success or a negative error code
+ * Only used on x86, below global's struct values are zero/unknown on
+ * other archs
+ */
+extern int get_cpu_info(struct cpupower_cpu_info *cpu_info);
+extern struct cpupower_cpu_info cpupower_cpu_info;
+/* cpuid and cpuinfo helpers **************************/
+
+/* X86 ONLY ****************************************/
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <pci/pci.h>
+
+/* Read/Write msr ****************************/
+extern int read_msr(int cpu, unsigned int idx, unsigned long long *val);
+extern int write_msr(int cpu, unsigned int idx, unsigned long long val);
+
+extern int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val);
+extern int msr_intel_get_perf_bias(unsigned int cpu);
+extern unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu);
+
+/* Read/Write msr ****************************/
+
+/* PCI stuff ****************************/
+extern int amd_pci_get_num_boost_states(int *active, int *states);
+extern struct pci_dev *pci_acc_init(struct pci_access **pacc, int domain,
+ int bus, int slot, int func, int vendor,
+ int dev);
+extern struct pci_dev *pci_slot_func_init(struct pci_access **pacc,
+ int slot, int func);
+
+/* PCI stuff ****************************/
+
+/* AMD HW pstate decoding **************************/
+
+extern int decode_pstates(unsigned int cpu, unsigned int cpu_family,
+ int boost_states, unsigned long *pstates, int *no);
+
+/* AMD HW pstate decoding **************************/
+
+extern int cpufreq_has_boost_support(unsigned int cpu, int *support,
+ int *active, int * states);
+/*
+ * CPUID functions returning a single datum
+ */
+unsigned int cpuid_eax(unsigned int op);
+unsigned int cpuid_ebx(unsigned int op);
+unsigned int cpuid_ecx(unsigned int op);
+unsigned int cpuid_edx(unsigned int op);
+
+/* cpuid and cpuinfo helpers **************************/
+/* X86 ONLY ********************************************/
+#else
+static inline int decode_pstates(unsigned int cpu, unsigned int cpu_family,
+ int boost_states, unsigned long *pstates,
+ int *no)
+{ return -1; };
+
+static inline int read_msr(int cpu, unsigned int idx, unsigned long long *val)
+{ return -1; };
+static inline int write_msr(int cpu, unsigned int idx, unsigned long long val)
+{ return -1; };
+static inline int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val)
+{ return -1; };
+static inline int msr_intel_get_perf_bias(unsigned int cpu)
+{ return -1; };
+static inline unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu)
+{ return 0; };
+
+/* Read/Write msr ****************************/
+
+static inline int cpufreq_has_boost_support(unsigned int cpu, int *support,
+ int *active, int * states)
+{ return -1; }
+
+/* cpuid and cpuinfo helpers **************************/
+
+static inline unsigned int cpuid_eax(unsigned int op) { return 0; };
+static inline unsigned int cpuid_ebx(unsigned int op) { return 0; };
+static inline unsigned int cpuid_ecx(unsigned int op) { return 0; };
+static inline unsigned int cpuid_edx(unsigned int op) { return 0; };
+#endif /* defined(__i386__) || defined(__x86_64__) */
+
+#endif /* __CPUPOWERUTILS_HELPERS__ */
diff --git a/tools/power/cpupower/utils/helpers/misc.c b/tools/power/cpupower/utils/helpers/misc.c
new file mode 100644
index 000000000..80fdf55f4
--- /dev/null
+++ b/tools/power/cpupower/utils/helpers/misc.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+#if defined(__i386__) || defined(__x86_64__)
+
+#include "helpers/helpers.h"
+
+#define MSR_AMD_HWCR 0xc0010015
+
+int cpufreq_has_boost_support(unsigned int cpu, int *support, int *active,
+ int *states)
+{
+ struct cpupower_cpu_info cpu_info;
+ int ret;
+ unsigned long long val;
+
+ *support = *active = *states = 0;
+
+ ret = get_cpu_info(&cpu_info);
+ if (ret)
+ return ret;
+
+ if (cpupower_cpu_info.caps & CPUPOWER_CAP_AMD_CBP) {
+ *support = 1;
+
+ /* AMD Family 0x17 does not utilize PCI D18F4 like prior
+ * families and has no fixed discrete boost states but
+ * has Hardware determined variable increments instead.
+ */
+
+ if (cpu_info.family == 0x17) {
+ if (!read_msr(cpu, MSR_AMD_HWCR, &val)) {
+ if (!(val & CPUPOWER_AMD_CPBDIS))
+ *active = 1;
+ }
+ } else {
+ ret = amd_pci_get_num_boost_states(active, states);
+ if (ret)
+ return ret;
+ }
+ } else if (cpupower_cpu_info.caps & CPUPOWER_CAP_INTEL_IDA)
+ *support = *active = 1;
+ return 0;
+}
+#endif /* #if defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/helpers/msr.c b/tools/power/cpupower/utils/helpers/msr.c
new file mode 100644
index 000000000..ab9950748
--- /dev/null
+++ b/tools/power/cpupower/utils/helpers/msr.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdint.h>
+
+#include "helpers/helpers.h"
+
+/* Intel specific MSRs */
+#define MSR_IA32_PERF_STATUS 0x198
+#define MSR_IA32_MISC_ENABLES 0x1a0
+#define MSR_IA32_ENERGY_PERF_BIAS 0x1b0
+#define MSR_NEHALEM_TURBO_RATIO_LIMIT 0x1ad
+
+/*
+ * read_msr
+ *
+ * Will return 0 on success and -1 on failure.
+ * Possible errno values could be:
+ * EFAULT -If the read/write did not fully complete
+ * EIO -If the CPU does not support MSRs
+ * ENXIO -If the CPU does not exist
+ */
+
+int read_msr(int cpu, unsigned int idx, unsigned long long *val)
+{
+ int fd;
+ char msr_file_name[64];
+
+ sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu);
+ fd = open(msr_file_name, O_RDONLY);
+ if (fd < 0)
+ return -1;
+ if (lseek(fd, idx, SEEK_CUR) == -1)
+ goto err;
+ if (read(fd, val, sizeof *val) != sizeof *val)
+ goto err;
+ close(fd);
+ return 0;
+ err:
+ close(fd);
+ return -1;
+}
+
+/*
+ * write_msr
+ *
+ * Will return 0 on success and -1 on failure.
+ * Possible errno values could be:
+ * EFAULT -If the read/write did not fully complete
+ * EIO -If the CPU does not support MSRs
+ * ENXIO -If the CPU does not exist
+ */
+int write_msr(int cpu, unsigned int idx, unsigned long long val)
+{
+ int fd;
+ char msr_file_name[64];
+
+ sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu);
+ fd = open(msr_file_name, O_WRONLY);
+ if (fd < 0)
+ return -1;
+ if (lseek(fd, idx, SEEK_CUR) == -1)
+ goto err;
+ if (write(fd, &val, sizeof val) != sizeof val)
+ goto err;
+ close(fd);
+ return 0;
+ err:
+ close(fd);
+ return -1;
+}
+
+int msr_intel_get_perf_bias(unsigned int cpu)
+{
+ unsigned long long val;
+ int ret;
+
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
+ return -1;
+
+ ret = read_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &val);
+ if (ret)
+ return ret;
+ return val;
+}
+
+int msr_intel_set_perf_bias(unsigned int cpu, unsigned int val)
+{
+ int ret;
+
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_PERF_BIAS))
+ return -1;
+
+ ret = write_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, val);
+ if (ret)
+ return ret;
+ return 0;
+}
+
+unsigned long long msr_intel_get_turbo_ratio(unsigned int cpu)
+{
+ unsigned long long val;
+ int ret;
+
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_HAS_TURBO_RATIO))
+ return -1;
+
+ ret = read_msr(cpu, MSR_NEHALEM_TURBO_RATIO_LIMIT, &val);
+ if (ret)
+ return ret;
+ return val;
+}
+#endif
diff --git a/tools/power/cpupower/utils/helpers/pci.c b/tools/power/cpupower/utils/helpers/pci.c
new file mode 100644
index 000000000..113f3da2a
--- /dev/null
+++ b/tools/power/cpupower/utils/helpers/pci.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <helpers/helpers.h>
+
+/*
+ * pci_acc_init
+ *
+ * PCI access helper function depending on libpci
+ *
+ * **pacc : if a valid pci_dev is returned
+ * *pacc must be passed to pci_acc_cleanup to free it
+ *
+ * domain: domain
+ * bus: bus
+ * slot: slot
+ * func: func
+ * vendor: vendor
+ * device: device
+ * Pass -1 for one of the six above to match any
+ *
+ * Returns :
+ * struct pci_dev which can be used with pci_{read,write}_* functions
+ * to access the PCI config space of matching pci devices
+ */
+struct pci_dev *pci_acc_init(struct pci_access **pacc, int domain, int bus,
+ int slot, int func, int vendor, int dev)
+{
+ struct pci_filter filter_nb_link;
+ struct pci_dev *device;
+
+ *pacc = pci_alloc();
+ if (*pacc == NULL)
+ return NULL;
+
+ pci_filter_init(*pacc, &filter_nb_link);
+ filter_nb_link.domain = domain;
+ filter_nb_link.bus = bus;
+ filter_nb_link.slot = slot;
+ filter_nb_link.func = func;
+ filter_nb_link.vendor = vendor;
+ filter_nb_link.device = dev;
+
+ pci_init(*pacc);
+ pci_scan_bus(*pacc);
+
+ for (device = (*pacc)->devices; device; device = device->next) {
+ if (pci_filter_match(&filter_nb_link, device))
+ return device;
+ }
+ pci_cleanup(*pacc);
+ return NULL;
+}
+
+/* Typically one wants to get a specific slot(device)/func of the root domain
+ and bus */
+struct pci_dev *pci_slot_func_init(struct pci_access **pacc, int slot,
+ int func)
+{
+ return pci_acc_init(pacc, 0, 0, slot, func, -1, -1);
+}
+
+#endif /* defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c
new file mode 100644
index 000000000..4e8fe2c7b
--- /dev/null
+++ b/tools/power/cpupower/utils/helpers/sysfs.c
@@ -0,0 +1,472 @@
+/*
+ * (C) 2004-2009 Dominik Brodowski <linux@dominikbrodowski.de>
+ * (C) 2011 Thomas Renninger <trenn@novell.com> Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "helpers/sysfs.h"
+
+unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numread;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ numread = read(fd, buf, buflen - 1);
+ if (numread < 1) {
+ close(fd);
+ return 0;
+ }
+
+ buf[numread] = '\0';
+ close(fd);
+
+ return (unsigned int) numread;
+}
+
+/*
+ * Detect whether a CPU is online
+ *
+ * Returns:
+ * 1 -> if CPU is online
+ * 0 -> if CPU is offline
+ * negative errno values in error case
+ */
+int sysfs_is_cpu_online(unsigned int cpu)
+{
+ char path[SYSFS_PATH_MAX];
+ int fd;
+ ssize_t numread;
+ unsigned long long value;
+ char linebuf[MAX_LINE_LEN];
+ char *endp;
+ struct stat statbuf;
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u", cpu);
+
+ if (stat(path, &statbuf) != 0)
+ return 0;
+
+ /*
+ * kernel without CONFIG_HOTPLUG_CPU
+ * -> cpuX directory exists, but not cpuX/online file
+ */
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/online", cpu);
+ if (stat(path, &statbuf) != 0)
+ return 1;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return -errno;
+
+ numread = read(fd, linebuf, MAX_LINE_LEN - 1);
+ if (numread < 1) {
+ close(fd);
+ return -EIO;
+ }
+ linebuf[numread] = '\0';
+ close(fd);
+
+ value = strtoull(linebuf, &endp, 0);
+ if (value > 1)
+ return -EINVAL;
+
+ return value;
+}
+
+/* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */
+
+
+/* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */
+
+/*
+ * helper function to check whether a file under "../cpuX/cpuidle/stateX/" dir
+ * exists.
+ * For example the functionality to disable c-states was introduced in later
+ * kernel versions, this function can be used to explicitly check for this
+ * feature.
+ *
+ * returns 1 if the file exists, 0 otherwise.
+ */
+unsigned int sysfs_idlestate_file_exists(unsigned int cpu,
+ unsigned int idlestate,
+ const char *fname)
+{
+ char path[SYSFS_PATH_MAX];
+ struct stat statbuf;
+
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+ cpu, idlestate, fname);
+ if (stat(path, &statbuf) != 0)
+ return 0;
+ return 1;
+}
+
+/*
+ * helper function to read file from /sys into given buffer
+ * fname is a relative path under "cpuX/cpuidle/stateX/" dir
+ * cstates starting with 0, C0 is not counted as cstate.
+ * This means if you want C1 info, pass 0 as idlestate param
+ */
+unsigned int sysfs_idlestate_read_file(unsigned int cpu, unsigned int idlestate,
+ const char *fname, char *buf, size_t buflen)
+{
+ char path[SYSFS_PATH_MAX];
+ int fd;
+ ssize_t numread;
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+ cpu, idlestate, fname);
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ numread = read(fd, buf, buflen - 1);
+ if (numread < 1) {
+ close(fd);
+ return 0;
+ }
+
+ buf[numread] = '\0';
+ close(fd);
+
+ return (unsigned int) numread;
+}
+
+/*
+ * helper function to write a new value to a /sys file
+ * fname is a relative path under "../cpuX/cpuidle/cstateY/" dir
+ *
+ * Returns the number of bytes written or 0 on error
+ */
+static
+unsigned int sysfs_idlestate_write_file(unsigned int cpu,
+ unsigned int idlestate,
+ const char *fname,
+ const char *value, size_t len)
+{
+ char path[SYSFS_PATH_MAX];
+ int fd;
+ ssize_t numwrite;
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpu%u/cpuidle/state%u/%s",
+ cpu, idlestate, fname);
+
+ fd = open(path, O_WRONLY);
+ if (fd == -1)
+ return 0;
+
+ numwrite = write(fd, value, len);
+ if (numwrite < 1) {
+ close(fd);
+ return 0;
+ }
+
+ close(fd);
+
+ return (unsigned int) numwrite;
+}
+
+/* read access to files which contain one numeric value */
+
+enum idlestate_value {
+ IDLESTATE_USAGE,
+ IDLESTATE_POWER,
+ IDLESTATE_LATENCY,
+ IDLESTATE_TIME,
+ IDLESTATE_DISABLE,
+ MAX_IDLESTATE_VALUE_FILES
+};
+
+static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = {
+ [IDLESTATE_USAGE] = "usage",
+ [IDLESTATE_POWER] = "power",
+ [IDLESTATE_LATENCY] = "latency",
+ [IDLESTATE_TIME] = "time",
+ [IDLESTATE_DISABLE] = "disable",
+};
+
+static unsigned long long sysfs_idlestate_get_one_value(unsigned int cpu,
+ unsigned int idlestate,
+ enum idlestate_value which)
+{
+ unsigned long long value;
+ unsigned int len;
+ char linebuf[MAX_LINE_LEN];
+ char *endp;
+
+ if (which >= MAX_IDLESTATE_VALUE_FILES)
+ return 0;
+
+ len = sysfs_idlestate_read_file(cpu, idlestate,
+ idlestate_value_files[which],
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return 0;
+
+ value = strtoull(linebuf, &endp, 0);
+
+ if (endp == linebuf || errno == ERANGE)
+ return 0;
+
+ return value;
+}
+
+/* read access to files which contain one string */
+
+enum idlestate_string {
+ IDLESTATE_DESC,
+ IDLESTATE_NAME,
+ MAX_IDLESTATE_STRING_FILES
+};
+
+static const char *idlestate_string_files[MAX_IDLESTATE_STRING_FILES] = {
+ [IDLESTATE_DESC] = "desc",
+ [IDLESTATE_NAME] = "name",
+};
+
+
+static char *sysfs_idlestate_get_one_string(unsigned int cpu,
+ unsigned int idlestate,
+ enum idlestate_string which)
+{
+ char linebuf[MAX_LINE_LEN];
+ char *result;
+ unsigned int len;
+
+ if (which >= MAX_IDLESTATE_STRING_FILES)
+ return NULL;
+
+ len = sysfs_idlestate_read_file(cpu, idlestate,
+ idlestate_string_files[which],
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ result = strdup(linebuf);
+ if (result == NULL)
+ return NULL;
+
+ if (result[strlen(result) - 1] == '\n')
+ result[strlen(result) - 1] = '\0';
+
+ return result;
+}
+
+/*
+ * Returns:
+ * 1 if disabled
+ * 0 if enabled
+ * -1 if idlestate is not available
+ * -2 if disabling is not supported by the kernel
+ */
+int sysfs_is_idlestate_disabled(unsigned int cpu,
+ unsigned int idlestate)
+{
+ if (sysfs_get_idlestate_count(cpu) <= idlestate)
+ return -1;
+
+ if (!sysfs_idlestate_file_exists(cpu, idlestate,
+ idlestate_value_files[IDLESTATE_DISABLE]))
+ return -2;
+ return sysfs_idlestate_get_one_value(cpu, idlestate, IDLESTATE_DISABLE);
+}
+
+/*
+ * Pass 1 as last argument to disable or 0 to enable the state
+ * Returns:
+ * 0 on success
+ * negative values on error, for example:
+ * -1 if idlestate is not available
+ * -2 if disabling is not supported by the kernel
+ * -3 No write access to disable/enable C-states
+ */
+int sysfs_idlestate_disable(unsigned int cpu,
+ unsigned int idlestate,
+ unsigned int disable)
+{
+ char value[SYSFS_PATH_MAX];
+ int bytes_written;
+
+ if (sysfs_get_idlestate_count(cpu) <= idlestate)
+ return -1;
+
+ if (!sysfs_idlestate_file_exists(cpu, idlestate,
+ idlestate_value_files[IDLESTATE_DISABLE]))
+ return -2;
+
+ snprintf(value, SYSFS_PATH_MAX, "%u", disable);
+
+ bytes_written = sysfs_idlestate_write_file(cpu, idlestate, "disable",
+ value, sizeof(disable));
+ if (bytes_written)
+ return 0;
+ return -3;
+}
+
+unsigned long sysfs_get_idlestate_latency(unsigned int cpu,
+ unsigned int idlestate)
+{
+ return sysfs_idlestate_get_one_value(cpu, idlestate, IDLESTATE_LATENCY);
+}
+
+unsigned long sysfs_get_idlestate_usage(unsigned int cpu,
+ unsigned int idlestate)
+{
+ return sysfs_idlestate_get_one_value(cpu, idlestate, IDLESTATE_USAGE);
+}
+
+unsigned long long sysfs_get_idlestate_time(unsigned int cpu,
+ unsigned int idlestate)
+{
+ return sysfs_idlestate_get_one_value(cpu, idlestate, IDLESTATE_TIME);
+}
+
+char *sysfs_get_idlestate_name(unsigned int cpu, unsigned int idlestate)
+{
+ return sysfs_idlestate_get_one_string(cpu, idlestate, IDLESTATE_NAME);
+}
+
+char *sysfs_get_idlestate_desc(unsigned int cpu, unsigned int idlestate)
+{
+ return sysfs_idlestate_get_one_string(cpu, idlestate, IDLESTATE_DESC);
+}
+
+/*
+ * Returns number of supported C-states of CPU core cpu
+ * Negativ in error case
+ * Zero if cpuidle does not export any C-states
+ */
+unsigned int sysfs_get_idlestate_count(unsigned int cpu)
+{
+ char file[SYSFS_PATH_MAX];
+ struct stat statbuf;
+ int idlestates = 1;
+
+
+ snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpuidle");
+ if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))
+ return 0;
+
+ snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpu%u/cpuidle/state0", cpu);
+ if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))
+ return 0;
+
+ while (stat(file, &statbuf) == 0 && S_ISDIR(statbuf.st_mode)) {
+ snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU
+ "cpu%u/cpuidle/state%d", cpu, idlestates);
+ idlestates++;
+ }
+ idlestates--;
+ return idlestates;
+}
+
+/* CPUidle general /sys/devices/system/cpu/cpuidle/ sysfs access ********/
+
+/*
+ * helper function to read file from /sys into given buffer
+ * fname is a relative path under "cpu/cpuidle/" dir
+ */
+static unsigned int sysfs_cpuidle_read_file(const char *fname, char *buf,
+ size_t buflen)
+{
+ char path[SYSFS_PATH_MAX];
+
+ snprintf(path, sizeof(path), PATH_TO_CPU "cpuidle/%s", fname);
+
+ return sysfs_read_file(path, buf, buflen);
+}
+
+
+
+/* read access to files which contain one string */
+
+enum cpuidle_string {
+ CPUIDLE_GOVERNOR,
+ CPUIDLE_GOVERNOR_RO,
+ CPUIDLE_DRIVER,
+ MAX_CPUIDLE_STRING_FILES
+};
+
+static const char *cpuidle_string_files[MAX_CPUIDLE_STRING_FILES] = {
+ [CPUIDLE_GOVERNOR] = "current_governor",
+ [CPUIDLE_GOVERNOR_RO] = "current_governor_ro",
+ [CPUIDLE_DRIVER] = "current_driver",
+};
+
+
+static char *sysfs_cpuidle_get_one_string(enum cpuidle_string which)
+{
+ char linebuf[MAX_LINE_LEN];
+ char *result;
+ unsigned int len;
+
+ if (which >= MAX_CPUIDLE_STRING_FILES)
+ return NULL;
+
+ len = sysfs_cpuidle_read_file(cpuidle_string_files[which],
+ linebuf, sizeof(linebuf));
+ if (len == 0)
+ return NULL;
+
+ result = strdup(linebuf);
+ if (result == NULL)
+ return NULL;
+
+ if (result[strlen(result) - 1] == '\n')
+ result[strlen(result) - 1] = '\0';
+
+ return result;
+}
+
+char *sysfs_get_cpuidle_governor(void)
+{
+ char *tmp = sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR_RO);
+ if (!tmp)
+ return sysfs_cpuidle_get_one_string(CPUIDLE_GOVERNOR);
+ else
+ return tmp;
+}
+
+char *sysfs_get_cpuidle_driver(void)
+{
+ return sysfs_cpuidle_get_one_string(CPUIDLE_DRIVER);
+}
+/* CPUidle idlestate specific /sys/devices/system/cpu/cpuX/cpuidle/ access */
+
+/*
+ * Get sched_mc or sched_smt settings
+ * Pass "mc" or "smt" as argument
+ *
+ * Returns negative value on failure
+ */
+int sysfs_get_sched(const char *smt_mc)
+{
+ return -ENODEV;
+}
+
+/*
+ * Get sched_mc or sched_smt settings
+ * Pass "mc" or "smt" as argument
+ *
+ * Returns negative value on failure
+ */
+int sysfs_set_sched(const char *smt_mc, int val)
+{
+ return -ENODEV;
+}
diff --git a/tools/power/cpupower/utils/helpers/sysfs.h b/tools/power/cpupower/utils/helpers/sysfs.h
new file mode 100644
index 000000000..0f0b9ad97
--- /dev/null
+++ b/tools/power/cpupower/utils/helpers/sysfs.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CPUPOWER_HELPERS_SYSFS_H__
+#define __CPUPOWER_HELPERS_SYSFS_H__
+
+#define PATH_TO_CPU "/sys/devices/system/cpu/"
+#define MAX_LINE_LEN 255
+#define SYSFS_PATH_MAX 255
+
+extern unsigned int sysfs_read_file(const char *path, char *buf, size_t buflen);
+
+extern unsigned int sysfs_idlestate_file_exists(unsigned int cpu,
+ unsigned int idlestate,
+ const char *fname);
+
+extern int sysfs_is_cpu_online(unsigned int cpu);
+
+extern int sysfs_is_idlestate_disabled(unsigned int cpu,
+ unsigned int idlestate);
+extern int sysfs_idlestate_disable(unsigned int cpu, unsigned int idlestate,
+ unsigned int disable);
+extern unsigned long sysfs_get_idlestate_latency(unsigned int cpu,
+ unsigned int idlestate);
+extern unsigned long sysfs_get_idlestate_usage(unsigned int cpu,
+ unsigned int idlestate);
+extern unsigned long long sysfs_get_idlestate_time(unsigned int cpu,
+ unsigned int idlestate);
+extern char *sysfs_get_idlestate_name(unsigned int cpu,
+ unsigned int idlestate);
+extern char *sysfs_get_idlestate_desc(unsigned int cpu,
+ unsigned int idlestate);
+extern unsigned int sysfs_get_idlestate_count(unsigned int cpu);
+
+extern char *sysfs_get_cpuidle_governor(void);
+extern char *sysfs_get_cpuidle_driver(void);
+
+extern int sysfs_get_sched(const char *smt_mc);
+extern int sysfs_set_sched(const char *smt_mc, int val);
+
+#endif /* __CPUPOWER_HELPERS_SYSFS_H__ */
diff --git a/tools/power/cpupower/utils/helpers/topology.c b/tools/power/cpupower/utils/helpers/topology.c
new file mode 100644
index 000000000..a1a6c6041
--- /dev/null
+++ b/tools/power/cpupower/utils/helpers/topology.c
@@ -0,0 +1,22 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * ToDo: Needs to be done more properly for AMD/Intel specifics
+ */
+
+/* Helper struct for qsort, must be in sync with cpupower_topology.cpu_info */
+/* Be careful: Need to pass unsigned to the sort, so that offlined cores are
+ in the end, but double check for -1 for offlined cpus at other places */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+
+#include <cpuidle.h>
+
+/* CPU topology/hierarchy parsing ******************/
+
diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
new file mode 100644
index 000000000..c097a3748
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
@@ -0,0 +1,335 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * PCI initialization based on example code from:
+ * Andreas Herrmann <andreas.herrmann3@amd.com>
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <time.h>
+#include <string.h>
+
+#include <pci/pci.h>
+
+#include "idle_monitor/cpupower-monitor.h"
+#include "helpers/helpers.h"
+
+#define PCI_NON_PC0_OFFSET 0xb0
+#define PCI_PC1_OFFSET 0xb4
+#define PCI_PC6_OFFSET 0xb8
+
+#define PCI_MONITOR_ENABLE_REG 0xe0
+
+#define PCI_NON_PC0_ENABLE_BIT 0
+#define PCI_PC1_ENABLE_BIT 1
+#define PCI_PC6_ENABLE_BIT 2
+
+#define PCI_NBP1_STAT_OFFSET 0x98
+#define PCI_NBP1_ACTIVE_BIT 2
+#define PCI_NBP1_ENTERED_BIT 1
+
+#define PCI_NBP1_CAP_OFFSET 0x90
+#define PCI_NBP1_CAPABLE_BIT 31
+
+#define OVERFLOW_MS 343597 /* 32 bit register filled at 12500 HZ
+ (1 tick per 80ns) */
+
+enum amd_fam14h_states {NON_PC0 = 0, PC1, PC6, NBP1,
+ AMD_FAM14H_STATE_NUM};
+
+static int fam14h_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+static int fam14h_nbp1_count(unsigned int id, unsigned long long *count,
+ unsigned int cpu);
+
+static cstate_t amd_fam14h_cstates[AMD_FAM14H_STATE_NUM] = {
+ {
+ .name = "!PC0",
+ .desc = N_("Package in sleep state (PC1 or deeper)"),
+ .id = NON_PC0,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = fam14h_get_count_percent,
+ },
+ {
+ .name = "PC1",
+ .desc = N_("Processor Package C1"),
+ .id = PC1,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = fam14h_get_count_percent,
+ },
+ {
+ .name = "PC6",
+ .desc = N_("Processor Package C6"),
+ .id = PC6,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = fam14h_get_count_percent,
+ },
+ {
+ .name = "NBP1",
+ .desc = N_("North Bridge P1 boolean counter (returns 0 or 1)"),
+ .id = NBP1,
+ .range = RANGE_PACKAGE,
+ .get_count = fam14h_nbp1_count,
+ },
+};
+
+static struct pci_access *pci_acc;
+static struct pci_dev *amd_fam14h_pci_dev;
+static int nbp1_entered;
+
+static struct timespec start_time;
+static unsigned long long timediff;
+
+#ifdef DEBUG
+struct timespec dbg_time;
+long dbg_timediff;
+#endif
+
+static unsigned long long *previous_count[AMD_FAM14H_STATE_NUM];
+static unsigned long long *current_count[AMD_FAM14H_STATE_NUM];
+
+static int amd_fam14h_get_pci_info(struct cstate *state,
+ unsigned int *pci_offset,
+ unsigned int *enable_bit,
+ unsigned int cpu)
+{
+ switch (state->id) {
+ case NON_PC0:
+ *enable_bit = PCI_NON_PC0_ENABLE_BIT;
+ *pci_offset = PCI_NON_PC0_OFFSET;
+ break;
+ case PC1:
+ *enable_bit = PCI_PC1_ENABLE_BIT;
+ *pci_offset = PCI_PC1_OFFSET;
+ break;
+ case PC6:
+ *enable_bit = PCI_PC6_ENABLE_BIT;
+ *pci_offset = PCI_PC6_OFFSET;
+ break;
+ case NBP1:
+ *enable_bit = PCI_NBP1_ENTERED_BIT;
+ *pci_offset = PCI_NBP1_STAT_OFFSET;
+ break;
+ default:
+ return -1;
+ };
+ return 0;
+}
+
+static int amd_fam14h_init(cstate_t *state, unsigned int cpu)
+{
+ int enable_bit, pci_offset, ret;
+ uint32_t val;
+
+ ret = amd_fam14h_get_pci_info(state, &pci_offset, &enable_bit, cpu);
+ if (ret)
+ return ret;
+
+ /* NBP1 needs extra treating -> write 1 to D18F6x98 bit 1 for init */
+ if (state->id == NBP1) {
+ val = pci_read_long(amd_fam14h_pci_dev, pci_offset);
+ val |= 1 << enable_bit;
+ val = pci_write_long(amd_fam14h_pci_dev, pci_offset, val);
+ return ret;
+ }
+
+ /* Enable monitor */
+ val = pci_read_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG);
+ dprint("Init %s: read at offset: 0x%x val: %u\n", state->name,
+ PCI_MONITOR_ENABLE_REG, (unsigned int) val);
+ val |= 1 << enable_bit;
+ pci_write_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG, val);
+
+ dprint("Init %s: offset: 0x%x enable_bit: %d - val: %u (%u)\n",
+ state->name, PCI_MONITOR_ENABLE_REG, enable_bit,
+ (unsigned int) val, cpu);
+
+ /* Set counter to zero */
+ pci_write_long(amd_fam14h_pci_dev, pci_offset, 0);
+ previous_count[state->id][cpu] = 0;
+
+ return 0;
+}
+
+static int amd_fam14h_disable(cstate_t *state, unsigned int cpu)
+{
+ int enable_bit, pci_offset, ret;
+ uint32_t val;
+
+ ret = amd_fam14h_get_pci_info(state, &pci_offset, &enable_bit, cpu);
+ if (ret)
+ return ret;
+
+ val = pci_read_long(amd_fam14h_pci_dev, pci_offset);
+ dprint("%s: offset: 0x%x %u\n", state->name, pci_offset, val);
+ if (state->id == NBP1) {
+ /* was the bit whether NBP1 got entered set? */
+ nbp1_entered = (val & (1 << PCI_NBP1_ACTIVE_BIT)) |
+ (val & (1 << PCI_NBP1_ENTERED_BIT));
+
+ dprint("NBP1 was %sentered - 0x%x - enable_bit: "
+ "%d - pci_offset: 0x%x\n",
+ nbp1_entered ? "" : "not ",
+ val, enable_bit, pci_offset);
+ return ret;
+ }
+ current_count[state->id][cpu] = val;
+
+ dprint("%s: Current - %llu (%u)\n", state->name,
+ current_count[state->id][cpu], cpu);
+ dprint("%s: Previous - %llu (%u)\n", state->name,
+ previous_count[state->id][cpu], cpu);
+
+ val = pci_read_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG);
+ val &= ~(1 << enable_bit);
+ pci_write_long(amd_fam14h_pci_dev, PCI_MONITOR_ENABLE_REG, val);
+
+ return 0;
+}
+
+static int fam14h_nbp1_count(unsigned int id, unsigned long long *count,
+ unsigned int cpu)
+{
+ if (id == NBP1) {
+ if (nbp1_entered)
+ *count = 1;
+ else
+ *count = 0;
+ return 0;
+ }
+ return -1;
+}
+static int fam14h_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ unsigned long diff;
+
+ if (id >= AMD_FAM14H_STATE_NUM)
+ return -1;
+ /* residency count in 80ns -> divide through 12.5 to get us residency */
+ diff = current_count[id][cpu] - previous_count[id][cpu];
+
+ if (timediff == 0)
+ *percent = 0.0;
+ else
+ *percent = 100.0 * diff / timediff / 12.5;
+
+ dprint("Timediff: %llu - res~: %lu us - percent: %.2f %%\n",
+ timediff, diff * 10 / 125, *percent);
+
+ return 0;
+}
+
+static int amd_fam14h_start(void)
+{
+ int num, cpu;
+ clock_gettime(CLOCK_REALTIME, &start_time);
+ for (num = 0; num < AMD_FAM14H_STATE_NUM; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ amd_fam14h_init(&amd_fam14h_cstates[num], cpu);
+ }
+#ifdef DEBUG
+ clock_gettime(CLOCK_REALTIME, &dbg_time);
+ dbg_timediff = timespec_diff_us(start_time, dbg_time);
+ dprint("Enabling counters took: %lu us\n",
+ dbg_timediff);
+#endif
+ return 0;
+}
+
+static int amd_fam14h_stop(void)
+{
+ int num, cpu;
+ struct timespec end_time;
+
+ clock_gettime(CLOCK_REALTIME, &end_time);
+
+ for (num = 0; num < AMD_FAM14H_STATE_NUM; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ amd_fam14h_disable(&amd_fam14h_cstates[num], cpu);
+ }
+#ifdef DEBUG
+ clock_gettime(CLOCK_REALTIME, &dbg_time);
+ dbg_timediff = timespec_diff_us(end_time, dbg_time);
+ dprint("Disabling counters took: %lu ns\n", dbg_timediff);
+#endif
+ timediff = timespec_diff_us(start_time, end_time);
+ if (timediff / 1000 > OVERFLOW_MS)
+ print_overflow_err((unsigned int)timediff / 1000000,
+ OVERFLOW_MS / 1000);
+
+ return 0;
+}
+
+static int is_nbp1_capable(void)
+{
+ uint32_t val;
+ val = pci_read_long(amd_fam14h_pci_dev, PCI_NBP1_CAP_OFFSET);
+ return val & (1 << 31);
+}
+
+struct cpuidle_monitor *amd_fam14h_register(void)
+{
+ int num;
+
+ if (cpupower_cpu_info.vendor != X86_VENDOR_AMD)
+ return NULL;
+
+ if (cpupower_cpu_info.family == 0x14)
+ strncpy(amd_fam14h_monitor.name, "Fam_14h",
+ MONITOR_NAME_LEN - 1);
+ else if (cpupower_cpu_info.family == 0x12)
+ strncpy(amd_fam14h_monitor.name, "Fam_12h",
+ MONITOR_NAME_LEN - 1);
+ else
+ return NULL;
+
+ /* We do not alloc for nbp1 machine wide counter */
+ for (num = 0; num < AMD_FAM14H_STATE_NUM - 1; num++) {
+ previous_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ current_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ }
+
+ /* We need PCI device: Slot 18, Func 6, compare with BKDG
+ for fam 12h/14h */
+ amd_fam14h_pci_dev = pci_slot_func_init(&pci_acc, 0x18, 6);
+ if (amd_fam14h_pci_dev == NULL || pci_acc == NULL)
+ return NULL;
+
+ if (!is_nbp1_capable())
+ amd_fam14h_monitor.hw_states_num = AMD_FAM14H_STATE_NUM - 1;
+
+ amd_fam14h_monitor.name_len = strlen(amd_fam14h_monitor.name);
+ return &amd_fam14h_monitor;
+}
+
+static void amd_fam14h_unregister(void)
+{
+ int num;
+ for (num = 0; num < AMD_FAM14H_STATE_NUM - 1; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+ pci_cleanup(pci_acc);
+}
+
+struct cpuidle_monitor amd_fam14h_monitor = {
+ .name = "",
+ .hw_states = amd_fam14h_cstates,
+ .hw_states_num = AMD_FAM14H_STATE_NUM,
+ .start = amd_fam14h_start,
+ .stop = amd_fam14h_stop,
+ .do_register = amd_fam14h_register,
+ .unregister = amd_fam14h_unregister,
+ .needs_root = 1,
+ .overflow_s = OVERFLOW_MS / 1000,
+};
+#endif /* #if defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
new file mode 100644
index 000000000..85a8f0cc0
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -0,0 +1,214 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+#include <cpuidle.h>
+
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define CPUIDLE_STATES_MAX 10
+static cstate_t cpuidle_cstates[CPUIDLE_STATES_MAX];
+struct cpuidle_monitor cpuidle_sysfs_monitor;
+
+static unsigned long long **previous_count;
+static unsigned long long **current_count;
+static struct timespec start_time;
+static unsigned long long timediff;
+
+static int cpuidle_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ unsigned long long statediff = current_count[cpu][id]
+ - previous_count[cpu][id];
+ dprint("%s: - diff: %llu - percent: %f (%u)\n",
+ cpuidle_cstates[id].name, timediff, *percent, cpu);
+
+ if (timediff == 0)
+ *percent = 0.0;
+ else
+ *percent = ((100.0 * statediff) / timediff);
+
+ dprint("%s: - timediff: %llu - statediff: %llu - percent: %f (%u)\n",
+ cpuidle_cstates[id].name, timediff, statediff, *percent, cpu);
+
+ return 0;
+}
+
+static int cpuidle_start(void)
+{
+ int cpu, state;
+ clock_gettime(CLOCK_REALTIME, &start_time);
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
+ state++) {
+ previous_count[cpu][state] =
+ cpuidle_state_time(cpu, state);
+ dprint("CPU %d - State: %d - Val: %llu\n",
+ cpu, state, previous_count[cpu][state]);
+ }
+ };
+ return 0;
+}
+
+static int cpuidle_stop(void)
+{
+ int cpu, state;
+ struct timespec end_time;
+ clock_gettime(CLOCK_REALTIME, &end_time);
+ timediff = timespec_diff_us(start_time, end_time);
+
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ for (state = 0; state < cpuidle_sysfs_monitor.hw_states_num;
+ state++) {
+ current_count[cpu][state] =
+ cpuidle_state_time(cpu, state);
+ dprint("CPU %d - State: %d - Val: %llu\n",
+ cpu, state, previous_count[cpu][state]);
+ }
+ };
+ return 0;
+}
+
+void fix_up_intel_idle_driver_name(char *tmp, int num)
+{
+ /* fix up cpuidle name for intel idle driver */
+ if (!strncmp(tmp, "NHM-", 4)) {
+ switch (num) {
+ case 1:
+ strcpy(tmp, "C1");
+ break;
+ case 2:
+ strcpy(tmp, "C3");
+ break;
+ case 3:
+ strcpy(tmp, "C6");
+ break;
+ }
+ } else if (!strncmp(tmp, "SNB-", 4)) {
+ switch (num) {
+ case 1:
+ strcpy(tmp, "C1");
+ break;
+ case 2:
+ strcpy(tmp, "C3");
+ break;
+ case 3:
+ strcpy(tmp, "C6");
+ break;
+ case 4:
+ strcpy(tmp, "C7");
+ break;
+ }
+ } else if (!strncmp(tmp, "ATM-", 4)) {
+ switch (num) {
+ case 1:
+ strcpy(tmp, "C1");
+ break;
+ case 2:
+ strcpy(tmp, "C2");
+ break;
+ case 3:
+ strcpy(tmp, "C4");
+ break;
+ case 4:
+ strcpy(tmp, "C6");
+ break;
+ }
+ }
+}
+
+#ifdef __powerpc__
+void map_power_idle_state_name(char *tmp)
+{
+ if (!strncmp(tmp, "stop0_lite", CSTATE_NAME_LEN))
+ strcpy(tmp, "stop0L");
+ else if (!strncmp(tmp, "stop1_lite", CSTATE_NAME_LEN))
+ strcpy(tmp, "stop1L");
+ else if (!strncmp(tmp, "stop2_lite", CSTATE_NAME_LEN))
+ strcpy(tmp, "stop2L");
+}
+#else
+void map_power_idle_state_name(char *tmp) { }
+#endif
+
+static struct cpuidle_monitor *cpuidle_register(void)
+{
+ int num;
+ char *tmp;
+ int this_cpu;
+
+ this_cpu = sched_getcpu();
+
+ /* Assume idle state count is the same for all CPUs */
+ cpuidle_sysfs_monitor.hw_states_num = cpuidle_state_count(this_cpu);
+
+ if (cpuidle_sysfs_monitor.hw_states_num <= 0)
+ return NULL;
+
+ for (num = 0; num < cpuidle_sysfs_monitor.hw_states_num; num++) {
+ tmp = cpuidle_state_name(this_cpu, num);
+ if (tmp == NULL)
+ continue;
+
+ map_power_idle_state_name(tmp);
+ fix_up_intel_idle_driver_name(tmp, num);
+ strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1);
+ free(tmp);
+
+ tmp = cpuidle_state_desc(this_cpu, num);
+ if (tmp == NULL)
+ continue;
+ strncpy(cpuidle_cstates[num].desc, tmp, CSTATE_DESC_LEN - 1);
+ free(tmp);
+
+ cpuidle_cstates[num].range = RANGE_THREAD;
+ cpuidle_cstates[num].id = num;
+ cpuidle_cstates[num].get_count_percent =
+ cpuidle_get_count_percent;
+ };
+
+ /* Free this at program termination */
+ previous_count = malloc(sizeof(long long *) * cpu_count);
+ current_count = malloc(sizeof(long long *) * cpu_count);
+ for (num = 0; num < cpu_count; num++) {
+ previous_count[num] = malloc(sizeof(long long) *
+ cpuidle_sysfs_monitor.hw_states_num);
+ current_count[num] = malloc(sizeof(long long) *
+ cpuidle_sysfs_monitor.hw_states_num);
+ }
+
+ cpuidle_sysfs_monitor.name_len = strlen(cpuidle_sysfs_monitor.name);
+ return &cpuidle_sysfs_monitor;
+}
+
+void cpuidle_unregister(void)
+{
+ int num;
+
+ for (num = 0; num < cpu_count; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+ free(previous_count);
+ free(current_count);
+}
+
+struct cpuidle_monitor cpuidle_sysfs_monitor = {
+ .name = "Idle_Stats",
+ .hw_states = cpuidle_cstates,
+ .start = cpuidle_start,
+ .stop = cpuidle_stop,
+ .do_register = cpuidle_register,
+ .unregister = cpuidle_unregister,
+ .needs_root = 0,
+ .overflow_s = UINT_MAX,
+};
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
new file mode 100644
index 000000000..4a27c55d5
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -0,0 +1,469 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Output format inspired by Len Brown's <lenb@kernel.org> turbostat tool.
+ *
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <libgen.h>
+
+#include "idle_monitor/cpupower-monitor.h"
+#include "idle_monitor/idle_monitors.h"
+#include "helpers/helpers.h"
+
+/* Define pointers to all monitors. */
+#define DEF(x) & x ## _monitor ,
+struct cpuidle_monitor *all_monitors[] = {
+#include "idle_monitors.def"
+0
+};
+
+int cpu_count;
+
+static struct cpuidle_monitor *monitors[MONITORS_MAX];
+static unsigned int avail_monitors;
+
+static char *progname;
+
+enum operation_mode_e { list = 1, show, show_all };
+static int mode;
+static int interval = 1;
+static char *show_monitors_param;
+static struct cpupower_topology cpu_top;
+static unsigned int wake_cpus;
+
+/* ToDo: Document this in the manpage */
+static char range_abbr[RANGE_MAX] = { 'T', 'C', 'P', 'M', };
+
+static void print_wrong_arg_exit(void)
+{
+ printf(_("invalid or unknown argument\n"));
+ exit(EXIT_FAILURE);
+}
+
+long long timespec_diff_us(struct timespec start, struct timespec end)
+{
+ struct timespec temp;
+ if ((end.tv_nsec - start.tv_nsec) < 0) {
+ temp.tv_sec = end.tv_sec - start.tv_sec - 1;
+ temp.tv_nsec = 1000000000 + end.tv_nsec - start.tv_nsec;
+ } else {
+ temp.tv_sec = end.tv_sec - start.tv_sec;
+ temp.tv_nsec = end.tv_nsec - start.tv_nsec;
+ }
+ return (temp.tv_sec * 1000000) + (temp.tv_nsec / 1000);
+}
+
+void print_n_spaces(int n)
+{
+ int x;
+ for (x = 0; x < n; x++)
+ printf(" ");
+}
+
+/*s is filled with left and right spaces
+ *to make its length atleast n+1
+ */
+int fill_string_with_spaces(char *s, int n)
+{
+ char *temp;
+ int len = strlen(s);
+
+ if (len >= n)
+ return -1;
+
+ temp = malloc(sizeof(char) * (n+1));
+ for (; len < n; len++)
+ s[len] = ' ';
+ s[len] = '\0';
+ snprintf(temp, n+1, " %s", s);
+ strcpy(s, temp);
+ free(temp);
+ return 0;
+}
+
+#define MAX_COL_WIDTH 6
+void print_header(int topology_depth)
+{
+ int unsigned mon;
+ int state, need_len;
+ cstate_t s;
+ char buf[128] = "";
+
+ fill_string_with_spaces(buf, topology_depth * 5 - 1);
+ printf("%s|", buf);
+
+ for (mon = 0; mon < avail_monitors; mon++) {
+ need_len = monitors[mon]->hw_states_num * (MAX_COL_WIDTH + 1)
+ - 1;
+ if (mon != 0)
+ printf("||");
+ sprintf(buf, "%s", monitors[mon]->name);
+ fill_string_with_spaces(buf, need_len);
+ printf("%s", buf);
+ }
+ printf("\n");
+
+ if (topology_depth > 2)
+ printf(" PKG|");
+ if (topology_depth > 1)
+ printf("CORE|");
+ if (topology_depth > 0)
+ printf(" CPU|");
+
+ for (mon = 0; mon < avail_monitors; mon++) {
+ if (mon != 0)
+ printf("||");
+ for (state = 0; state < monitors[mon]->hw_states_num; state++) {
+ if (state != 0)
+ printf("|");
+ s = monitors[mon]->hw_states[state];
+ sprintf(buf, "%s", s.name);
+ fill_string_with_spaces(buf, MAX_COL_WIDTH);
+ printf("%s", buf);
+ }
+ printf(" ");
+ }
+ printf("\n");
+}
+
+
+void print_results(int topology_depth, int cpu)
+{
+ unsigned int mon;
+ int state, ret;
+ double percent;
+ unsigned long long result;
+ cstate_t s;
+
+ /* Be careful CPUs may got resorted for pkg value do not just use cpu */
+ if (!bitmask_isbitset(cpus_chosen, cpu_top.core_info[cpu].cpu))
+ return;
+ if (!cpu_top.core_info[cpu].is_online &&
+ cpu_top.core_info[cpu].pkg == -1)
+ return;
+
+ if (topology_depth > 2)
+ printf("%4d|", cpu_top.core_info[cpu].pkg);
+ if (topology_depth > 1)
+ printf("%4d|", cpu_top.core_info[cpu].core);
+ if (topology_depth > 0)
+ printf("%4d|", cpu_top.core_info[cpu].cpu);
+
+ for (mon = 0; mon < avail_monitors; mon++) {
+ if (mon != 0)
+ printf("||");
+
+ for (state = 0; state < monitors[mon]->hw_states_num; state++) {
+ if (state != 0)
+ printf("|");
+
+ s = monitors[mon]->hw_states[state];
+
+ if (s.get_count_percent) {
+ ret = s.get_count_percent(s.id, &percent,
+ cpu_top.core_info[cpu].cpu);
+ if (ret)
+ printf("******");
+ else if (percent >= 100.0)
+ printf("%6.1f", percent);
+ else
+ printf("%6.2f", percent);
+ } else if (s.get_count) {
+ ret = s.get_count(s.id, &result,
+ cpu_top.core_info[cpu].cpu);
+ if (ret)
+ printf("******");
+ else
+ printf("%6llu", result);
+ } else {
+ printf(_("Monitor %s, Counter %s has no count "
+ "function. Implementation error\n"),
+ monitors[mon]->name, s.name);
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+ /*
+ * The monitor could still provide useful data, for example
+ * AMD HW counters partly sit in PCI config space.
+ * It's up to the monitor plug-in to check .is_online, this one
+ * is just for additional info.
+ */
+ if (!cpu_top.core_info[cpu].is_online &&
+ cpu_top.core_info[cpu].pkg != -1) {
+ printf(_(" *is offline\n"));
+ return;
+ } else
+ printf("\n");
+}
+
+
+/* param: string passed by -m param (The list of monitors to show)
+ *
+ * Monitors must have been registered already, matching monitors
+ * are picked out and available monitors array is overridden
+ * with matching ones
+ *
+ * Monitors get sorted in the same order the user passes them
+*/
+
+static void parse_monitor_param(char *param)
+{
+ unsigned int num;
+ int mon, hits = 0;
+ char *tmp = param, *token;
+ struct cpuidle_monitor *tmp_mons[MONITORS_MAX];
+
+
+ for (mon = 0; mon < MONITORS_MAX; mon++, tmp = NULL) {
+ token = strtok(tmp, ",");
+ if (token == NULL)
+ break;
+ if (strlen(token) >= MONITOR_NAME_LEN) {
+ printf(_("%s: max monitor name length"
+ " (%d) exceeded\n"), token, MONITOR_NAME_LEN);
+ continue;
+ }
+
+ for (num = 0; num < avail_monitors; num++) {
+ if (!strcmp(monitors[num]->name, token)) {
+ dprint("Found requested monitor: %s\n", token);
+ tmp_mons[hits] = monitors[num];
+ hits++;
+ }
+ }
+ }
+ if (hits == 0) {
+ printf(_("No matching monitor found in %s, "
+ "try -l option\n"), param);
+ exit(EXIT_FAILURE);
+ }
+ /* Override detected/registerd monitors array with requested one */
+ memcpy(monitors, tmp_mons,
+ sizeof(struct cpuidle_monitor *) * MONITORS_MAX);
+ avail_monitors = hits;
+}
+
+void list_monitors(void)
+{
+ unsigned int mon;
+ int state;
+ cstate_t s;
+
+ for (mon = 0; mon < avail_monitors; mon++) {
+ printf(_("Monitor \"%s\" (%d states) - Might overflow after %u "
+ "s\n"),
+ monitors[mon]->name, monitors[mon]->hw_states_num,
+ monitors[mon]->overflow_s);
+
+ for (state = 0; state < monitors[mon]->hw_states_num; state++) {
+ s = monitors[mon]->hw_states[state];
+ /*
+ * ToDo show more state capabilities:
+ * percent, time (granlarity)
+ */
+ printf("%s\t[%c] -> %s\n", s.name, range_abbr[s.range],
+ gettext(s.desc));
+ }
+ }
+}
+
+int fork_it(char **argv)
+{
+ int status;
+ unsigned int num;
+ unsigned long long timediff;
+ pid_t child_pid;
+ struct timespec start, end;
+
+ child_pid = fork();
+ clock_gettime(CLOCK_REALTIME, &start);
+
+ for (num = 0; num < avail_monitors; num++)
+ monitors[num]->start();
+
+ if (!child_pid) {
+ /* child */
+ execvp(argv[0], argv);
+ } else {
+ /* parent */
+ if (child_pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+
+ signal(SIGINT, SIG_IGN);
+ signal(SIGQUIT, SIG_IGN);
+ if (waitpid(child_pid, &status, 0) == -1) {
+ perror("wait");
+ exit(1);
+ }
+ }
+ clock_gettime(CLOCK_REALTIME, &end);
+ for (num = 0; num < avail_monitors; num++)
+ monitors[num]->stop();
+
+ timediff = timespec_diff_us(start, end);
+ if (WIFEXITED(status))
+ printf(_("%s took %.5f seconds and exited with status %d\n"),
+ argv[0], timediff / (1000.0 * 1000),
+ WEXITSTATUS(status));
+ return 0;
+}
+
+int do_interval_measure(int i)
+{
+ unsigned int num;
+ int cpu;
+
+ if (wake_cpus)
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ bind_cpu(cpu);
+
+ for (num = 0; num < avail_monitors; num++) {
+ dprint("HW C-state residency monitor: %s - States: %d\n",
+ monitors[num]->name, monitors[num]->hw_states_num);
+ monitors[num]->start();
+ }
+
+ sleep(i);
+
+ if (wake_cpus)
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ bind_cpu(cpu);
+
+ for (num = 0; num < avail_monitors; num++)
+ monitors[num]->stop();
+
+
+ return 0;
+}
+
+static void cmdline(int argc, char *argv[])
+{
+ int opt;
+ progname = basename(argv[0]);
+
+ while ((opt = getopt(argc, argv, "+lci:m:")) != -1) {
+ switch (opt) {
+ case 'l':
+ if (mode)
+ print_wrong_arg_exit();
+ mode = list;
+ break;
+ case 'i':
+ /* only allow -i with -m or no option */
+ if (mode && mode != show)
+ print_wrong_arg_exit();
+ interval = atoi(optarg);
+ break;
+ case 'm':
+ if (mode)
+ print_wrong_arg_exit();
+ mode = show;
+ show_monitors_param = optarg;
+ break;
+ case 'c':
+ wake_cpus = 1;
+ break;
+ default:
+ print_wrong_arg_exit();
+ }
+ }
+ if (!mode)
+ mode = show_all;
+}
+
+int cmd_monitor(int argc, char **argv)
+{
+ unsigned int num;
+ struct cpuidle_monitor *test_mon;
+ int cpu;
+
+ cmdline(argc, argv);
+ cpu_count = get_cpu_topology(&cpu_top);
+ if (cpu_count < 0) {
+ printf(_("Cannot read number of available processors\n"));
+ return EXIT_FAILURE;
+ }
+
+ if (!cpu_top.core_info[0].is_online)
+ printf("WARNING: at least one cpu is offline\n");
+
+ /* Default is: monitor all CPUs */
+ if (bitmask_isallclear(cpus_chosen))
+ bitmask_setall(cpus_chosen);
+
+ dprint("System has up to %d CPU cores\n", cpu_count);
+
+ for (num = 0; all_monitors[num]; num++) {
+ dprint("Try to register: %s\n", all_monitors[num]->name);
+ test_mon = all_monitors[num]->do_register();
+ if (test_mon) {
+ if (test_mon->needs_root && !run_as_root) {
+ fprintf(stderr, _("Available monitor %s needs "
+ "root access\n"), test_mon->name);
+ continue;
+ }
+ monitors[avail_monitors] = test_mon;
+ dprint("%s registered\n", all_monitors[num]->name);
+ avail_monitors++;
+ }
+ }
+
+ if (avail_monitors == 0) {
+ printf(_("No HW Cstate monitors found\n"));
+ return 1;
+ }
+
+ if (mode == list) {
+ list_monitors();
+ exit(EXIT_SUCCESS);
+ }
+
+ if (mode == show)
+ parse_monitor_param(show_monitors_param);
+
+ dprint("Packages: %d - Cores: %d - CPUs: %d\n",
+ cpu_top.pkgs, cpu_top.cores, cpu_count);
+
+ /*
+ * if any params left, it must be a command to fork
+ */
+ if (argc - optind)
+ fork_it(argv + optind);
+ else
+ do_interval_measure(interval);
+
+ /* ToDo: Topology parsing needs fixing first to do
+ this more generically */
+ if (cpu_top.pkgs > 1)
+ print_header(3);
+ else
+ print_header(1);
+
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ if (cpu_top.pkgs > 1)
+ print_results(3, cpu);
+ else
+ print_results(1, cpu);
+ }
+
+ for (num = 0; num < avail_monitors; num++)
+ monitors[num]->unregister();
+
+ cpu_topology_release(cpu_top);
+ return 0;
+}
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
new file mode 100644
index 000000000..06b3cd6de
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
@@ -0,0 +1,94 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ */
+
+#ifndef __CPUIDLE_INFO_HW__
+#define __CPUIDLE_INFO_HW__
+
+#include <stdarg.h>
+#include <time.h>
+
+#include "idle_monitor/idle_monitors.h"
+
+#define MONITORS_MAX 20
+#define MONITOR_NAME_LEN 20
+
+/* CSTATE_NAME_LEN is limited by header field width defined
+ * in cpupower-monitor.c. Header field width is defined to be
+ * sum of percent width and two spaces for padding.
+ */
+#ifdef __powerpc__
+#define CSTATE_NAME_LEN 7
+#else
+#define CSTATE_NAME_LEN 5
+#endif
+#define CSTATE_DESC_LEN 60
+
+extern int cpu_count;
+
+/* Hard to define the right names ...: */
+enum power_range_e {
+ RANGE_THREAD, /* Lowest in topology hierarcy, AMD: core, Intel: thread
+ kernel sysfs: cpu */
+ RANGE_CORE, /* AMD: unit, Intel: core, kernel_sysfs: core_id */
+ RANGE_PACKAGE, /* Package, processor socket */
+ RANGE_MACHINE, /* Machine, platform wide */
+ RANGE_MAX };
+
+typedef struct cstate {
+ int id;
+ enum power_range_e range;
+ char name[CSTATE_NAME_LEN];
+ char desc[CSTATE_DESC_LEN];
+
+ /* either provide a percentage or a general count */
+ int (*get_count_percent)(unsigned int self_id, double *percent,
+ unsigned int cpu);
+ int (*get_count)(unsigned int self_id, unsigned long long *count,
+ unsigned int cpu);
+} cstate_t;
+
+struct cpuidle_monitor {
+ /* Name must not contain whitespaces */
+ char name[MONITOR_NAME_LEN];
+ int name_len;
+ int hw_states_num;
+ cstate_t *hw_states;
+ int (*start) (void);
+ int (*stop) (void);
+ struct cpuidle_monitor* (*do_register) (void);
+ void (*unregister)(void);
+ unsigned int overflow_s;
+ int needs_root;
+};
+
+extern long long timespec_diff_us(struct timespec start, struct timespec end);
+
+#define print_overflow_err(mes, ov) \
+{ \
+ fprintf(stderr, gettext("Measure took %u seconds, but registers could " \
+ "overflow at %u seconds, results " \
+ "could be inaccurate\n"), mes, ov); \
+}
+
+
+/* Taken over from x86info project sources -> return 0 on success */
+#include <sched.h>
+#include <sys/types.h>
+#include <unistd.h>
+static inline int bind_cpu(int cpu)
+{
+ cpu_set_t set;
+
+ if (sched_getaffinity(getpid(), sizeof(set), &set) == 0) {
+ CPU_ZERO(&set);
+ CPU_SET(cpu, &set);
+ return sched_setaffinity(getpid(), sizeof(set), &set);
+ }
+ return 1;
+}
+
+#endif /* __CPUIDLE_INFO_HW__ */
diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
new file mode 100644
index 000000000..3e4ff4a1c
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
@@ -0,0 +1,195 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on SandyBridge monitor. Implements the new package C-states
+ * (PC8, PC9, PC10) coming with a specific Haswell (family 0x45) CPU.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define MSR_PKG_C8_RESIDENCY 0x00000630
+#define MSR_PKG_C9_RESIDENCY 0x00000631
+#define MSR_PKG_C10_RESIDENCY 0x00000632
+
+#define MSR_TSC 0x10
+
+enum intel_hsw_ext_id { PC8 = 0, PC9, PC10, HSW_EXT_CSTATE_COUNT,
+ TSC = 0xFFFF };
+
+static int hsw_ext_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+
+static cstate_t hsw_ext_cstates[HSW_EXT_CSTATE_COUNT] = {
+ {
+ .name = "PC8",
+ .desc = N_("Processor Package C8"),
+ .id = PC8,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = hsw_ext_get_count_percent,
+ },
+ {
+ .name = "PC9",
+ .desc = N_("Processor Package C9"),
+ .id = PC9,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = hsw_ext_get_count_percent,
+ },
+ {
+ .name = "PC10",
+ .desc = N_("Processor Package C10"),
+ .id = PC10,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = hsw_ext_get_count_percent,
+ },
+};
+
+static unsigned long long tsc_at_measure_start;
+static unsigned long long tsc_at_measure_end;
+static unsigned long long *previous_count[HSW_EXT_CSTATE_COUNT];
+static unsigned long long *current_count[HSW_EXT_CSTATE_COUNT];
+/* valid flag for all CPUs. If a MSR read failed it will be zero */
+static int *is_valid;
+
+static int hsw_ext_get_count(enum intel_hsw_ext_id id, unsigned long long *val,
+ unsigned int cpu)
+{
+ int msr;
+
+ switch (id) {
+ case PC8:
+ msr = MSR_PKG_C8_RESIDENCY;
+ break;
+ case PC9:
+ msr = MSR_PKG_C9_RESIDENCY;
+ break;
+ case PC10:
+ msr = MSR_PKG_C10_RESIDENCY;
+ break;
+ case TSC:
+ msr = MSR_TSC;
+ break;
+ default:
+ return -1;
+ };
+ if (read_msr(cpu, msr, val))
+ return -1;
+ return 0;
+}
+
+static int hsw_ext_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ *percent = 0.0;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ *percent = (100.0 *
+ (current_count[id][cpu] - previous_count[id][cpu])) /
+ (tsc_at_measure_end - tsc_at_measure_start);
+
+ dprint("%s: previous: %llu - current: %llu - (%u)\n",
+ hsw_ext_cstates[id].name, previous_count[id][cpu],
+ current_count[id][cpu], cpu);
+
+ dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n",
+ hsw_ext_cstates[id].name,
+ (unsigned long long) tsc_at_measure_end - tsc_at_measure_start,
+ current_count[id][cpu] - previous_count[id][cpu],
+ *percent, cpu);
+
+ return 0;
+}
+
+static int hsw_ext_start(void)
+{
+ int num, cpu;
+ unsigned long long val;
+
+ for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ hsw_ext_get_count(num, &val, cpu);
+ previous_count[num][cpu] = val;
+ }
+ }
+ hsw_ext_get_count(TSC, &tsc_at_measure_start, base_cpu);
+ return 0;
+}
+
+static int hsw_ext_stop(void)
+{
+ unsigned long long val;
+ int num, cpu;
+
+ hsw_ext_get_count(TSC, &tsc_at_measure_end, base_cpu);
+
+ for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ is_valid[cpu] = !hsw_ext_get_count(num, &val, cpu);
+ current_count[num][cpu] = val;
+ }
+ }
+ return 0;
+}
+
+struct cpuidle_monitor intel_hsw_ext_monitor;
+
+static struct cpuidle_monitor *hsw_ext_register(void)
+{
+ int num;
+
+ if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL
+ || cpupower_cpu_info.family != 6)
+ return NULL;
+
+ switch (cpupower_cpu_info.model) {
+ case 0x45: /* HSW */
+ break;
+ default:
+ return NULL;
+ }
+
+ is_valid = calloc(cpu_count, sizeof(int));
+ for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) {
+ previous_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ current_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ }
+ intel_hsw_ext_monitor.name_len = strlen(intel_hsw_ext_monitor.name);
+ return &intel_hsw_ext_monitor;
+}
+
+void hsw_ext_unregister(void)
+{
+ int num;
+ free(is_valid);
+ for (num = 0; num < HSW_EXT_CSTATE_COUNT; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+}
+
+struct cpuidle_monitor intel_hsw_ext_monitor = {
+ .name = "HaswellExtended",
+ .hw_states = hsw_ext_cstates,
+ .hw_states_num = HSW_EXT_CSTATE_COUNT,
+ .start = hsw_ext_start,
+ .stop = hsw_ext_stop,
+ .do_register = hsw_ext_register,
+ .unregister = hsw_ext_unregister,
+ .needs_root = 1,
+ .overflow_s = 922000000 /* 922337203 seconds TSC overflow
+ at 20GHz */
+};
+#endif /* defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/idle_monitor/idle_monitors.def b/tools/power/cpupower/utils/idle_monitor/idle_monitors.def
new file mode 100644
index 000000000..0d6ba4dbb
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/idle_monitors.def
@@ -0,0 +1,8 @@
+#if defined(__i386__) || defined(__x86_64__)
+DEF(amd_fam14h)
+DEF(intel_nhm)
+DEF(intel_snb)
+DEF(intel_hsw_ext)
+DEF(mperf)
+#endif
+DEF(cpuidle_sysfs)
diff --git a/tools/power/cpupower/utils/idle_monitor/idle_monitors.h b/tools/power/cpupower/utils/idle_monitor/idle_monitors.h
new file mode 100644
index 000000000..4fcdeb1e0
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/idle_monitors.h
@@ -0,0 +1,18 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on the idea from Michael Matz <matz@suse.de>
+ *
+ */
+
+#ifndef _CPUIDLE_IDLE_MONITORS_H_
+#define _CPUIDLE_IDLE_MONITORS_H_
+
+#define DEF(x) extern struct cpuidle_monitor x ##_monitor;
+#include "idle_monitors.def"
+#undef DEF
+extern struct cpuidle_monitor *all_monitors[];
+
+#endif /* _CPUIDLE_IDLE_MONITORS_H_ */
diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
new file mode 100644
index 000000000..d7c2a6d13
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c
@@ -0,0 +1,340 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#include <cpufreq.h>
+
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define MSR_APERF 0xE8
+#define MSR_MPERF 0xE7
+
+#define MSR_TSC 0x10
+
+#define MSR_AMD_HWCR 0xc0010015
+
+enum mperf_id { C0 = 0, Cx, AVG_FREQ, MPERF_CSTATE_COUNT };
+
+static int mperf_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
+ unsigned int cpu);
+static struct timespec time_start, time_end;
+
+static cstate_t mperf_cstates[MPERF_CSTATE_COUNT] = {
+ {
+ .name = "C0",
+ .desc = N_("Processor Core not idle"),
+ .id = C0,
+ .range = RANGE_THREAD,
+ .get_count_percent = mperf_get_count_percent,
+ },
+ {
+ .name = "Cx",
+ .desc = N_("Processor Core in an idle state"),
+ .id = Cx,
+ .range = RANGE_THREAD,
+ .get_count_percent = mperf_get_count_percent,
+ },
+
+ {
+ .name = "Freq",
+ .desc = N_("Average Frequency (including boost) in MHz"),
+ .id = AVG_FREQ,
+ .range = RANGE_THREAD,
+ .get_count = mperf_get_count_freq,
+ },
+};
+
+enum MAX_FREQ_MODE { MAX_FREQ_SYSFS, MAX_FREQ_TSC_REF };
+static int max_freq_mode;
+/*
+ * The max frequency mperf is ticking at (in C0), either retrieved via:
+ * 1) calculated after measurements if we know TSC ticks at mperf/P0 frequency
+ * 2) cpufreq /sys/devices/.../cpu0/cpufreq/cpuinfo_max_freq at init time
+ * 1. Is preferred as it also works without cpufreq subsystem (e.g. on Xen)
+ */
+static unsigned long max_frequency;
+
+static unsigned long long tsc_at_measure_start;
+static unsigned long long tsc_at_measure_end;
+static unsigned long long *mperf_previous_count;
+static unsigned long long *aperf_previous_count;
+static unsigned long long *mperf_current_count;
+static unsigned long long *aperf_current_count;
+
+/* valid flag for all CPUs. If a MSR read failed it will be zero */
+static int *is_valid;
+
+static int mperf_get_tsc(unsigned long long *tsc)
+{
+ int ret;
+
+ ret = read_msr(base_cpu, MSR_TSC, tsc);
+ if (ret)
+ dprint("Reading TSC MSR failed, returning %llu\n", *tsc);
+ return ret;
+}
+
+static int mperf_init_stats(unsigned int cpu)
+{
+ unsigned long long val;
+ int ret;
+
+ ret = read_msr(cpu, MSR_APERF, &val);
+ aperf_previous_count[cpu] = val;
+ ret |= read_msr(cpu, MSR_MPERF, &val);
+ mperf_previous_count[cpu] = val;
+ is_valid[cpu] = !ret;
+
+ return 0;
+}
+
+static int mperf_measure_stats(unsigned int cpu)
+{
+ unsigned long long val;
+ int ret;
+
+ ret = read_msr(cpu, MSR_APERF, &val);
+ aperf_current_count[cpu] = val;
+ ret |= read_msr(cpu, MSR_MPERF, &val);
+ mperf_current_count[cpu] = val;
+ is_valid[cpu] = !ret;
+
+ return 0;
+}
+
+static int mperf_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ unsigned long long aperf_diff, mperf_diff, tsc_diff;
+ unsigned long long timediff;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ if (id != C0 && id != Cx)
+ return -1;
+
+ mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
+ aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
+
+ if (max_freq_mode == MAX_FREQ_TSC_REF) {
+ tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+ *percent = 100.0 * mperf_diff / tsc_diff;
+ dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n",
+ mperf_cstates[id].name, mperf_diff, tsc_diff);
+ } else if (max_freq_mode == MAX_FREQ_SYSFS) {
+ timediff = max_frequency * timespec_diff_us(time_start, time_end);
+ *percent = 100.0 * mperf_diff / timediff;
+ dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n",
+ mperf_cstates[id].name, mperf_diff, timediff);
+ } else
+ return -1;
+
+ if (id == Cx)
+ *percent = 100.0 - *percent;
+
+ dprint("%s: previous: %llu - current: %llu - (%u)\n",
+ mperf_cstates[id].name, mperf_diff, aperf_diff, cpu);
+ dprint("%s: %f\n", mperf_cstates[id].name, *percent);
+ return 0;
+}
+
+static int mperf_get_count_freq(unsigned int id, unsigned long long *count,
+ unsigned int cpu)
+{
+ unsigned long long aperf_diff, mperf_diff, time_diff, tsc_diff;
+
+ if (id != AVG_FREQ)
+ return 1;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ mperf_diff = mperf_current_count[cpu] - mperf_previous_count[cpu];
+ aperf_diff = aperf_current_count[cpu] - aperf_previous_count[cpu];
+
+ if (max_freq_mode == MAX_FREQ_TSC_REF) {
+ /* Calculate max_freq from TSC count */
+ tsc_diff = tsc_at_measure_end - tsc_at_measure_start;
+ time_diff = timespec_diff_us(time_start, time_end);
+ max_frequency = tsc_diff / time_diff;
+ }
+
+ *count = max_frequency * ((double)aperf_diff / mperf_diff);
+ dprint("%s: Average freq based on %s maximum frequency:\n",
+ mperf_cstates[id].name,
+ (max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read");
+ dprint("max_frequency: %lu\n", max_frequency);
+ dprint("aperf_diff: %llu\n", aperf_diff);
+ dprint("mperf_diff: %llu\n", mperf_diff);
+ dprint("avg freq: %llu\n", *count);
+ return 0;
+}
+
+static int mperf_start(void)
+{
+ int cpu;
+ unsigned long long dbg;
+
+ clock_gettime(CLOCK_REALTIME, &time_start);
+ mperf_get_tsc(&tsc_at_measure_start);
+
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ mperf_init_stats(cpu);
+
+ mperf_get_tsc(&dbg);
+ dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start);
+ return 0;
+}
+
+static int mperf_stop(void)
+{
+ unsigned long long dbg;
+ int cpu;
+
+ for (cpu = 0; cpu < cpu_count; cpu++)
+ mperf_measure_stats(cpu);
+
+ mperf_get_tsc(&tsc_at_measure_end);
+ clock_gettime(CLOCK_REALTIME, &time_end);
+
+ mperf_get_tsc(&dbg);
+ dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
+
+ return 0;
+}
+
+/*
+ * Mperf register is defined to tick at P0 (maximum) frequency
+ *
+ * Instead of reading out P0 which can be tricky to read out from HW,
+ * we use TSC counter if it reliably ticks at P0/mperf frequency.
+ *
+ * Still try to fall back to:
+ * /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq
+ * on older Intel HW without invariant TSC feature.
+ * Or on AMD machines where TSC does not tick at P0 (do not exist yet, but
+ * it's still double checked (MSR_AMD_HWCR)).
+ *
+ * On these machines the user would still get useful mperf
+ * stats when acpi-cpufreq driver is loaded.
+ */
+static int init_maxfreq_mode(void)
+{
+ int ret;
+ unsigned long long hwcr;
+ unsigned long min;
+
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC))
+ goto use_sysfs;
+
+ if (cpupower_cpu_info.vendor == X86_VENDOR_AMD) {
+ /* MSR_AMD_HWCR tells us whether TSC runs at P0/mperf
+ * freq.
+ * A test whether hwcr is accessable/available would be:
+ * (cpupower_cpu_info.family > 0x10 ||
+ * cpupower_cpu_info.family == 0x10 &&
+ * cpupower_cpu_info.model >= 0x2))
+ * This should be the case for all aperf/mperf
+ * capable AMD machines and is therefore safe to test here.
+ * Compare with Linus kernel git commit: acf01734b1747b1ec4
+ */
+ ret = read_msr(0, MSR_AMD_HWCR, &hwcr);
+ /*
+ * If the MSR read failed, assume a Xen system that did
+ * not explicitly provide access to it and assume TSC works
+ */
+ if (ret != 0) {
+ dprint("TSC read 0x%x failed - assume TSC working\n",
+ MSR_AMD_HWCR);
+ return 0;
+ } else if (1 & (hwcr >> 24)) {
+ max_freq_mode = MAX_FREQ_TSC_REF;
+ return 0;
+ } else { /* Use sysfs max frequency if available */ }
+ } else if (cpupower_cpu_info.vendor == X86_VENDOR_INTEL) {
+ /*
+ * On Intel we assume mperf (in C0) is ticking at same
+ * rate than TSC
+ */
+ max_freq_mode = MAX_FREQ_TSC_REF;
+ return 0;
+ }
+use_sysfs:
+ if (cpufreq_get_hardware_limits(0, &min, &max_frequency)) {
+ dprint("Cannot retrieve max freq from cpufreq kernel "
+ "subsystem\n");
+ return -1;
+ }
+ max_freq_mode = MAX_FREQ_SYSFS;
+ max_frequency /= 1000; /* Default automatically to MHz value */
+ return 0;
+}
+
+/*
+ * This monitor provides:
+ *
+ * 1) Average frequency a CPU resided in
+ * This always works if the CPU has aperf/mperf capabilities
+ *
+ * 2) C0 and Cx (any sleep state) time a CPU resided in
+ * Works if mperf timer stops ticking in sleep states which
+ * seem to be the case on all current HW.
+ * Both is directly retrieved from HW registers and is independent
+ * from kernel statistics.
+ */
+struct cpuidle_monitor mperf_monitor;
+struct cpuidle_monitor *mperf_register(void)
+{
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
+ return NULL;
+
+ if (init_maxfreq_mode())
+ return NULL;
+
+ /* Free this at program termination */
+ is_valid = calloc(cpu_count, sizeof(int));
+ mperf_previous_count = calloc(cpu_count, sizeof(unsigned long long));
+ aperf_previous_count = calloc(cpu_count, sizeof(unsigned long long));
+ mperf_current_count = calloc(cpu_count, sizeof(unsigned long long));
+ aperf_current_count = calloc(cpu_count, sizeof(unsigned long long));
+
+ mperf_monitor.name_len = strlen(mperf_monitor.name);
+ return &mperf_monitor;
+}
+
+void mperf_unregister(void)
+{
+ free(mperf_previous_count);
+ free(aperf_previous_count);
+ free(mperf_current_count);
+ free(aperf_current_count);
+ free(is_valid);
+}
+
+struct cpuidle_monitor mperf_monitor = {
+ .name = "Mperf",
+ .hw_states_num = MPERF_CSTATE_COUNT,
+ .hw_states = mperf_cstates,
+ .start = mperf_start,
+ .stop = mperf_stop,
+ .do_register = mperf_register,
+ .unregister = mperf_unregister,
+ .needs_root = 1,
+ .overflow_s = 922000000 /* 922337203 seconds TSC overflow
+ at 20GHz */
+};
+#endif /* #if defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
new file mode 100644
index 000000000..abf8cb5f7
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
@@ -0,0 +1,216 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on Len Brown's <lenb@kernel.org> turbostat tool.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define MSR_PKG_C3_RESIDENCY 0x3F8
+#define MSR_PKG_C6_RESIDENCY 0x3F9
+#define MSR_CORE_C3_RESIDENCY 0x3FC
+#define MSR_CORE_C6_RESIDENCY 0x3FD
+
+#define MSR_TSC 0x10
+
+#define NHM_CSTATE_COUNT 4
+
+enum intel_nhm_id { C3 = 0, C6, PC3, PC6, TSC = 0xFFFF };
+
+static int nhm_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+
+static cstate_t nhm_cstates[NHM_CSTATE_COUNT] = {
+ {
+ .name = "C3",
+ .desc = N_("Processor Core C3"),
+ .id = C3,
+ .range = RANGE_CORE,
+ .get_count_percent = nhm_get_count_percent,
+ },
+ {
+ .name = "C6",
+ .desc = N_("Processor Core C6"),
+ .id = C6,
+ .range = RANGE_CORE,
+ .get_count_percent = nhm_get_count_percent,
+ },
+
+ {
+ .name = "PC3",
+ .desc = N_("Processor Package C3"),
+ .id = PC3,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = nhm_get_count_percent,
+ },
+ {
+ .name = "PC6",
+ .desc = N_("Processor Package C6"),
+ .id = PC6,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = nhm_get_count_percent,
+ },
+};
+
+static unsigned long long tsc_at_measure_start;
+static unsigned long long tsc_at_measure_end;
+static unsigned long long *previous_count[NHM_CSTATE_COUNT];
+static unsigned long long *current_count[NHM_CSTATE_COUNT];
+/* valid flag for all CPUs. If a MSR read failed it will be zero */
+static int *is_valid;
+
+static int nhm_get_count(enum intel_nhm_id id, unsigned long long *val,
+ unsigned int cpu)
+{
+ int msr;
+
+ switch (id) {
+ case C3:
+ msr = MSR_CORE_C3_RESIDENCY;
+ break;
+ case C6:
+ msr = MSR_CORE_C6_RESIDENCY;
+ break;
+ case PC3:
+ msr = MSR_PKG_C3_RESIDENCY;
+ break;
+ case PC6:
+ msr = MSR_PKG_C6_RESIDENCY;
+ break;
+ case TSC:
+ msr = MSR_TSC;
+ break;
+ default:
+ return -1;
+ };
+ if (read_msr(cpu, msr, val))
+ return -1;
+
+ return 0;
+}
+
+static int nhm_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ *percent = 0.0;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ *percent = (100.0 *
+ (current_count[id][cpu] - previous_count[id][cpu])) /
+ (tsc_at_measure_end - tsc_at_measure_start);
+
+ dprint("%s: previous: %llu - current: %llu - (%u)\n",
+ nhm_cstates[id].name, previous_count[id][cpu],
+ current_count[id][cpu], cpu);
+
+ dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n",
+ nhm_cstates[id].name,
+ (unsigned long long) tsc_at_measure_end - tsc_at_measure_start,
+ current_count[id][cpu] - previous_count[id][cpu],
+ *percent, cpu);
+
+ return 0;
+}
+
+static int nhm_start(void)
+{
+ int num, cpu;
+ unsigned long long dbg, val;
+
+ nhm_get_count(TSC, &tsc_at_measure_start, base_cpu);
+
+ for (num = 0; num < NHM_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ is_valid[cpu] = !nhm_get_count(num, &val, cpu);
+ previous_count[num][cpu] = val;
+ }
+ }
+ nhm_get_count(TSC, &dbg, base_cpu);
+ dprint("TSC diff: %llu\n", dbg - tsc_at_measure_start);
+ return 0;
+}
+
+static int nhm_stop(void)
+{
+ unsigned long long val;
+ unsigned long long dbg;
+ int num, cpu;
+
+ nhm_get_count(TSC, &tsc_at_measure_end, base_cpu);
+
+ for (num = 0; num < NHM_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ is_valid[cpu] = !nhm_get_count(num, &val, cpu);
+ current_count[num][cpu] = val;
+ }
+ }
+ nhm_get_count(TSC, &dbg, base_cpu);
+ dprint("TSC diff: %llu\n", dbg - tsc_at_measure_end);
+
+ return 0;
+}
+
+struct cpuidle_monitor intel_nhm_monitor;
+
+struct cpuidle_monitor *intel_nhm_register(void)
+{
+ int num;
+
+ if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL)
+ return NULL;
+
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_INV_TSC))
+ return NULL;
+
+ if (!(cpupower_cpu_info.caps & CPUPOWER_CAP_APERF))
+ return NULL;
+
+ /* Free this at program termination */
+ is_valid = calloc(cpu_count, sizeof(int));
+ for (num = 0; num < NHM_CSTATE_COUNT; num++) {
+ previous_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ current_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ }
+
+ intel_nhm_monitor.name_len = strlen(intel_nhm_monitor.name);
+ return &intel_nhm_monitor;
+}
+
+void intel_nhm_unregister(void)
+{
+ int num;
+
+ for (num = 0; num < NHM_CSTATE_COUNT; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+ free(is_valid);
+}
+
+struct cpuidle_monitor intel_nhm_monitor = {
+ .name = "Nehalem",
+ .hw_states_num = NHM_CSTATE_COUNT,
+ .hw_states = nhm_cstates,
+ .start = nhm_start,
+ .stop = nhm_stop,
+ .do_register = intel_nhm_register,
+ .unregister = intel_nhm_unregister,
+ .needs_root = 1,
+ .overflow_s = 922000000 /* 922337203 seconds TSC overflow
+ at 20GHz */
+};
+#endif
diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
new file mode 100644
index 000000000..a2b452196
--- /dev/null
+++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
@@ -0,0 +1,200 @@
+/*
+ * (C) 2010,2011 Thomas Renninger <trenn@suse.de>, Novell Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2.
+ *
+ * Based on Len Brown's <lenb@kernel.org> turbostat tool.
+ */
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "helpers/helpers.h"
+#include "idle_monitor/cpupower-monitor.h"
+
+#define MSR_PKG_C2_RESIDENCY 0x60D
+#define MSR_PKG_C7_RESIDENCY 0x3FA
+#define MSR_CORE_C7_RESIDENCY 0x3FE
+
+#define MSR_TSC 0x10
+
+enum intel_snb_id { C7 = 0, PC2, PC7, SNB_CSTATE_COUNT, TSC = 0xFFFF };
+
+static int snb_get_count_percent(unsigned int self_id, double *percent,
+ unsigned int cpu);
+
+static cstate_t snb_cstates[SNB_CSTATE_COUNT] = {
+ {
+ .name = "C7",
+ .desc = N_("Processor Core C7"),
+ .id = C7,
+ .range = RANGE_CORE,
+ .get_count_percent = snb_get_count_percent,
+ },
+ {
+ .name = "PC2",
+ .desc = N_("Processor Package C2"),
+ .id = PC2,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = snb_get_count_percent,
+ },
+ {
+ .name = "PC7",
+ .desc = N_("Processor Package C7"),
+ .id = PC7,
+ .range = RANGE_PACKAGE,
+ .get_count_percent = snb_get_count_percent,
+ },
+};
+
+static unsigned long long tsc_at_measure_start;
+static unsigned long long tsc_at_measure_end;
+static unsigned long long *previous_count[SNB_CSTATE_COUNT];
+static unsigned long long *current_count[SNB_CSTATE_COUNT];
+/* valid flag for all CPUs. If a MSR read failed it will be zero */
+static int *is_valid;
+
+static int snb_get_count(enum intel_snb_id id, unsigned long long *val,
+ unsigned int cpu)
+{
+ int msr;
+
+ switch (id) {
+ case C7:
+ msr = MSR_CORE_C7_RESIDENCY;
+ break;
+ case PC2:
+ msr = MSR_PKG_C2_RESIDENCY;
+ break;
+ case PC7:
+ msr = MSR_PKG_C7_RESIDENCY;
+ break;
+ case TSC:
+ msr = MSR_TSC;
+ break;
+ default:
+ return -1;
+ };
+ if (read_msr(cpu, msr, val))
+ return -1;
+ return 0;
+}
+
+static int snb_get_count_percent(unsigned int id, double *percent,
+ unsigned int cpu)
+{
+ *percent = 0.0;
+
+ if (!is_valid[cpu])
+ return -1;
+
+ *percent = (100.0 *
+ (current_count[id][cpu] - previous_count[id][cpu])) /
+ (tsc_at_measure_end - tsc_at_measure_start);
+
+ dprint("%s: previous: %llu - current: %llu - (%u)\n",
+ snb_cstates[id].name, previous_count[id][cpu],
+ current_count[id][cpu], cpu);
+
+ dprint("%s: tsc_diff: %llu - count_diff: %llu - percent: %2.f (%u)\n",
+ snb_cstates[id].name,
+ (unsigned long long) tsc_at_measure_end - tsc_at_measure_start,
+ current_count[id][cpu] - previous_count[id][cpu],
+ *percent, cpu);
+
+ return 0;
+}
+
+static int snb_start(void)
+{
+ int num, cpu;
+ unsigned long long val;
+
+ for (num = 0; num < SNB_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ snb_get_count(num, &val, cpu);
+ previous_count[num][cpu] = val;
+ }
+ }
+ snb_get_count(TSC, &tsc_at_measure_start, base_cpu);
+ return 0;
+}
+
+static int snb_stop(void)
+{
+ unsigned long long val;
+ int num, cpu;
+
+ snb_get_count(TSC, &tsc_at_measure_end, base_cpu);
+
+ for (num = 0; num < SNB_CSTATE_COUNT; num++) {
+ for (cpu = 0; cpu < cpu_count; cpu++) {
+ is_valid[cpu] = !snb_get_count(num, &val, cpu);
+ current_count[num][cpu] = val;
+ }
+ }
+ return 0;
+}
+
+struct cpuidle_monitor intel_snb_monitor;
+
+static struct cpuidle_monitor *snb_register(void)
+{
+ int num;
+
+ if (cpupower_cpu_info.vendor != X86_VENDOR_INTEL
+ || cpupower_cpu_info.family != 6)
+ return NULL;
+
+ switch (cpupower_cpu_info.model) {
+ case 0x2A: /* SNB */
+ case 0x2D: /* SNB Xeon */
+ case 0x3A: /* IVB */
+ case 0x3E: /* IVB Xeon */
+ case 0x3C: /* HSW */
+ case 0x3F: /* HSW */
+ case 0x45: /* HSW */
+ case 0x46: /* HSW */
+ break;
+ default:
+ return NULL;
+ }
+
+ is_valid = calloc(cpu_count, sizeof(int));
+ for (num = 0; num < SNB_CSTATE_COUNT; num++) {
+ previous_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ current_count[num] = calloc(cpu_count,
+ sizeof(unsigned long long));
+ }
+ intel_snb_monitor.name_len = strlen(intel_snb_monitor.name);
+ return &intel_snb_monitor;
+}
+
+void snb_unregister(void)
+{
+ int num;
+ free(is_valid);
+ for (num = 0; num < SNB_CSTATE_COUNT; num++) {
+ free(previous_count[num]);
+ free(current_count[num]);
+ }
+}
+
+struct cpuidle_monitor intel_snb_monitor = {
+ .name = "SandyBridge",
+ .hw_states = snb_cstates,
+ .hw_states_num = SNB_CSTATE_COUNT,
+ .start = snb_start,
+ .stop = snb_stop,
+ .do_register = snb_register,
+ .unregister = snb_unregister,
+ .needs_root = 1,
+ .overflow_s = 922000000 /* 922337203 seconds TSC overflow
+ at 20GHz */
+};
+#endif /* defined(__i386__) || defined(__x86_64__) */
diff --git a/tools/power/cpupower/utils/version-gen.sh b/tools/power/cpupower/utils/version-gen.sh
new file mode 100755
index 000000000..c70de2eab
--- /dev/null
+++ b/tools/power/cpupower/utils/version-gen.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Script which prints out the version to use for building cpupowerutils.
+# Must be called from tools/power/cpupower/
+#
+# Heavily based on tools/perf/util/PERF-VERSION-GEN .
+
+LF='
+'
+
+# First check if there is a .git to get the version from git describe
+# otherwise try to get the version from the kernel makefile
+if test -d ../../../.git -o -f ../../../.git &&
+ VN=$(git describe --abbrev=4 HEAD 2>/dev/null) &&
+ case "$VN" in
+ *$LF*) (exit 1) ;;
+ v[0-9]*)
+ git update-index -q --refresh
+ test -z "$(git diff-index --name-only HEAD --)" ||
+ VN="$VN-dirty" ;;
+ esac
+then
+ VN=$(echo "$VN" | sed -e 's/-/./g');
+else
+ eval $(grep '^VERSION[[:space:]]*=' ../../../Makefile|tr -d ' ')
+ eval $(grep '^PATCHLEVEL[[:space:]]*=' ../../../Makefile|tr -d ' ')
+ eval $(grep '^SUBLEVEL[[:space:]]*=' ../../../Makefile|tr -d ' ')
+ eval $(grep '^EXTRAVERSION[[:space:]]*=' ../../../Makefile|tr -d ' ')
+
+ VN="${VERSION}.${PATCHLEVEL}.${SUBLEVEL}${EXTRAVERSION}"
+fi
+
+VN=$(expr "$VN" : v*'\(.*\)')
+
+echo $VN
diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile
new file mode 100644
index 000000000..c1899cd72
--- /dev/null
+++ b/tools/power/pm-graph/Makefile
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: GPL-2.0
+PREFIX ?= /usr
+DESTDIR ?=
+
+all:
+ @echo "Nothing to build"
+
+install : uninstall
+ install -d $(DESTDIR)$(PREFIX)/lib/pm-graph
+ install sleepgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph
+ install bootgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph
+ install -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config
+ install -m 644 config/cgskip.txt $(DESTDIR)$(PREFIX)/lib/pm-graph/config
+ install -m 644 config/freeze-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
+ install -m 644 config/freeze.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
+ install -m 644 config/freeze-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
+ install -m 644 config/standby-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
+ install -m 644 config/standby.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
+ install -m 644 config/standby-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
+ install -m 644 config/suspend-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
+ install -m 644 config/suspend.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
+ install -m 644 config/suspend-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
+ install -m 644 config/suspend-x2-proc.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
+
+ install -d $(DESTDIR)$(PREFIX)/bin
+ ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph
+ ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph
+
+ install -d $(DESTDIR)$(PREFIX)/share/man/man8
+ install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
+ install sleepgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
+
+uninstall :
+ rm -f $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8
+ rm -f $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8
+
+ rm -f $(DESTDIR)$(PREFIX)/bin/bootgraph
+ rm -f $(DESTDIR)$(PREFIX)/bin/sleepgraph
+
+ rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/config/*
+ if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config ] ; then \
+ rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/config; \
+ fi;
+ rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/*
+ if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph ] ; then \
+ rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph; \
+ fi;
diff --git a/tools/power/pm-graph/bootgraph.8 b/tools/power/pm-graph/bootgraph.8
new file mode 100644
index 000000000..64d513f80
--- /dev/null
+++ b/tools/power/pm-graph/bootgraph.8
@@ -0,0 +1,173 @@
+.TH BOOTGRAPH 8
+.SH NAME
+bootgraph \- Kernel boot timing analysis
+.SH SYNOPSIS
+.ft B
+.B bootgraph
+.RB [ OPTIONS ]
+.RB [ COMMAND ]
+.SH DESCRIPTION
+\fBbootgraph \fP reads the dmesg log from kernel boot and
+creates an html representation of the initcall timeline. It graphs
+every module init call found, through both kernel and user modes. The
+timeline is split into two phases: kernel mode & user mode. kernel mode
+represents a single process run on a single cpu with serial init calls.
+Once user mode begins, the init process is called, and the init calls
+start working in parallel.
+.PP
+If no specific command is given, the tool reads the current dmesg log and
+outputs a new timeline.
+.PP
+The tool can also augment the timeline with ftrace data on custom target
+functions as well as full trace callgraphs.
+.PP
+Generates output files in subdirectory: boot-yymmdd-HHMMSS
+ html timeline : <hostname>_boot.html
+ raw dmesg file : <hostname>_boot_dmesg.txt
+ raw ftrace file : <hostname>_boot_ftrace.txt
+.SH OPTIONS
+.TP
+\fB-h\fR
+Print this help text
+.TP
+\fB-v\fR
+Print the current tool version
+.TP
+\fB-addlogs\fR
+Add the dmesg log to the html output. It will be viewable by
+clicking a button in the timeline.
+.TP
+\fB-result \fIfile\fR
+Export a results table to a text file for parsing.
+.TP
+\fB-o \fIname\fR
+Overrides the output subdirectory name when running a new test.
+Use {date}, {time}, {hostname} for current values.
+.sp
+e.g. boot-{hostname}-{date}-{time}
+.SS "advanced"
+.TP
+\fB-f or -callgraph\fR
+Use ftrace to create initcall callgraphs (default: disabled). If -func
+is not used there will be one callgraph per initcall. This can produce
+very large outputs, i.e. 10MB - 100MB.
+.TP
+\fB-fstat\fR
+Use ftrace to add function detail (default: disabled)
+.TP
+\fB-maxdepth \fIlevel\fR
+limit the callgraph trace depth to \fIlevel\fR (default: 2). This is
+the best way to limit the output size when using -callgraph.
+.TP
+\fB-mincg \fIt\fR
+Discard all callgraphs shorter than \fIt\fR milliseconds (default: 0=all).
+This reduces the html file size as there can be many tiny callgraphs
+which are barely visible in the timeline.
+The value is a float: e.g. 0.001 represents 1 us.
+.TP
+\fB-cgfilter \fI"func1,func2,..."\fR
+Reduce callgraph output in the timeline by limiting it to a list of calls. The
+argument can be a single function name or a comma delimited list.
+(default: none)
+.TP
+\fB-cgskip \fIfile\fR
+Reduce callgraph output in the timeline by skipping over uninteresting
+functions in the trace, e.g. printk or console_unlock. The functions listed
+in this file will show up as empty leaves in the callgraph with only the start/end
+times displayed.
+(default: none)
+.TP
+\fB-timeprec \fIn\fR
+Number of significant digits in timestamps (0:S, 3:ms, [6:us])
+.TP
+\fB-expandcg\fR
+pre-expand the callgraph data in the html output (default: disabled)
+.TP
+\fB-func \fI"func1,func2,..."\fR
+Instead of tracing each initcall, trace a custom list of functions (default: do_one_initcall)
+.TP
+\fB-reboot\fR
+Reboot the machine and generate a new timeline automatically. Works in 4 steps.
+ 1. updates grub with the required kernel parameters
+ 2. installs a cron job which re-runs the tool after reboot
+ 3. reboots the system
+ 4. after startup, extracts the data and generates the timeline
+.TP
+\fB-manual\fR
+Show the requirements to generate a new timeline manually. Requires 3 steps.
+ 1. append the string to the kernel command line via your native boot manager.
+ 2. reboot the system
+ 3. after startup, re-run the tool with the same arguments and no command
+
+.SH COMMANDS
+.SS "rebuild"
+.TP
+\fB-dmesg \fIfile\fR
+Create HTML output from an existing dmesg file.
+.TP
+\fB-ftrace \fIfile\fR
+Create HTML output from an existing ftrace file (used with -dmesg).
+.SS "other"
+.TP
+\fB-flistall\fR
+Print all ftrace functions capable of being captured. These are all the
+possible values you can add to trace via the -func argument.
+.TP
+\fB-sysinfo\fR
+Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode.
+
+.SH EXAMPLES
+Create a timeline using the current dmesg log.
+.IP
+\f(CW$ bootgraph\fR
+.PP
+Create a timeline using the current dmesg and ftrace log.
+.IP
+\f(CW$ bootgraph -callgraph\fR
+.PP
+Create a timeline using the current dmesg, add the log to the html and change the folder.
+.IP
+\f(CW$ bootgraph -addlogs -o "myboot-{date}-{time}"\fR
+.PP
+Capture a new boot timeline by automatically rebooting the machine.
+.IP
+\f(CW$ sudo bootgraph -reboot -addlogs -o "latest-{hostname)"\fR
+.PP
+Capture a new boot timeline with function trace data.
+.IP
+\f(CW$ sudo bootgraph -reboot -f\fR
+.PP
+Capture a new boot timeline with trace & callgraph data. Skip callgraphs smaller than 5ms.
+.IP
+\f(CW$ sudo bootgraph -reboot -callgraph -mincg 5\fR
+.PP
+Capture a new boot timeline with callgraph data over custom functions.
+.IP
+\f(CW$ sudo bootgraph -reboot -callgraph -func "acpi_ps_parse_aml,msleep"\fR
+.PP
+Capture a brand new boot timeline with manual reboot.
+.IP
+\f(CW$ sudo bootgraph -callgraph -manual\fR
+.IP
+\f(CW$ vi /etc/default/grub # add the CMDLINE string to your kernel params\fR
+.IP
+\f(CW$ sudo reboot # reboot the machine\fR
+.IP
+\f(CW$ sudo bootgraph -callgraph # re-run the tool after restart\fR
+.PP
+.SS "rebuild timeline from logs"
+.PP
+Rebuild the html from a previous run's logs, using the same options.
+.IP
+\f(CW$ bootgraph -dmesg dmesg.txt -ftrace ftrace.txt -callgraph\fR
+.PP
+Rebuild the html with different options.
+.IP
+\f(CW$ bootgraph -dmesg dmesg.txt -ftrace ftrace.txt -addlogs\fR
+
+.SH "SEE ALSO"
+dmesg(1), update-grub(8), crontab(1), reboot(8)
+.PP
+.SH AUTHOR
+.nf
+Written by Todd Brandt <todd.e.brandt@linux.intel.com>
diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py
new file mode 100755
index 000000000..8ee626c0f
--- /dev/null
+++ b/tools/power/pm-graph/bootgraph.py
@@ -0,0 +1,1085 @@
+#!/usr/bin/python2
+#
+# Tool for analyzing boot timing
+# Copyright (c) 2013, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# Authors:
+# Todd Brandt <todd.e.brandt@linux.intel.com>
+#
+# Description:
+# This tool is designed to assist kernel and OS developers in optimizing
+# their linux stack's boot time. It creates an html representation of
+# the kernel boot timeline up to the start of the init process.
+#
+
+# ----------------- LIBRARIES --------------------
+
+import sys
+import time
+import os
+import string
+import re
+import platform
+import shutil
+from datetime import datetime, timedelta
+from subprocess import call, Popen, PIPE
+import sleepgraph as aslib
+
+# ----------------- CLASSES --------------------
+
+# Class: SystemValues
+# Description:
+# A global, single-instance container used to
+# store system values and test parameters
+class SystemValues(aslib.SystemValues):
+ title = 'BootGraph'
+ version = '2.2'
+ hostname = 'localhost'
+ testtime = ''
+ kernel = ''
+ dmesgfile = ''
+ ftracefile = ''
+ htmlfile = 'bootgraph.html'
+ testdir = ''
+ kparams = ''
+ result = ''
+ useftrace = False
+ usecallgraph = False
+ suspendmode = 'boot'
+ max_graph_depth = 2
+ graph_filter = 'do_one_initcall'
+ reboot = False
+ manual = False
+ iscronjob = False
+ timeformat = '%.6f'
+ bootloader = 'grub'
+ blexec = []
+ def __init__(self):
+ self.hostname = platform.node()
+ self.testtime = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
+ if os.path.exists('/proc/version'):
+ fp = open('/proc/version', 'r')
+ val = fp.read().strip()
+ fp.close()
+ self.kernel = self.kernelVersion(val)
+ else:
+ self.kernel = 'unknown'
+ self.testdir = datetime.now().strftime('boot-%y%m%d-%H%M%S')
+ def kernelVersion(self, msg):
+ return msg.split()[2]
+ def checkFtraceKernelVersion(self):
+ val = tuple(map(int, self.kernel.split('-')[0].split('.')))
+ if val >= (4, 10, 0):
+ return True
+ return False
+ def kernelParams(self):
+ cmdline = 'initcall_debug log_buf_len=32M'
+ if self.useftrace:
+ if self.cpucount > 0:
+ bs = min(self.memtotal / 2, 2*1024*1024) / self.cpucount
+ else:
+ bs = 131072
+ cmdline += ' trace_buf_size=%dK trace_clock=global '\
+ 'trace_options=nooverwrite,funcgraph-abstime,funcgraph-cpu,'\
+ 'funcgraph-duration,funcgraph-proc,funcgraph-tail,'\
+ 'nofuncgraph-overhead,context-info,graph-time '\
+ 'ftrace=function_graph '\
+ 'ftrace_graph_max_depth=%d '\
+ 'ftrace_graph_filter=%s' % \
+ (bs, self.max_graph_depth, self.graph_filter)
+ return cmdline
+ def setGraphFilter(self, val):
+ master = self.getBootFtraceFilterFunctions()
+ fs = ''
+ for i in val.split(','):
+ func = i.strip()
+ if func == '':
+ doError('badly formatted filter function string')
+ if '[' in func or ']' in func:
+ doError('loadable module functions not allowed - "%s"' % func)
+ if ' ' in func:
+ doError('spaces found in filter functions - "%s"' % func)
+ if func not in master:
+ doError('function "%s" not available for ftrace' % func)
+ if not fs:
+ fs = func
+ else:
+ fs += ','+func
+ if not fs:
+ doError('badly formatted filter function string')
+ self.graph_filter = fs
+ def getBootFtraceFilterFunctions(self):
+ self.rootCheck(True)
+ fp = open(self.tpath+'available_filter_functions')
+ fulllist = fp.read().split('\n')
+ fp.close()
+ list = []
+ for i in fulllist:
+ if not i or ' ' in i or '[' in i or ']' in i:
+ continue
+ list.append(i)
+ return list
+ def myCronJob(self, line):
+ if '@reboot' not in line:
+ return False
+ if 'bootgraph' in line or 'analyze_boot.py' in line or '-cronjob' in line:
+ return True
+ return False
+ def cronjobCmdString(self):
+ cmdline = '%s -cronjob' % os.path.abspath(sys.argv[0])
+ args = iter(sys.argv[1:])
+ for arg in args:
+ if arg in ['-h', '-v', '-cronjob', '-reboot', '-verbose']:
+ continue
+ elif arg in ['-o', '-dmesg', '-ftrace', '-func']:
+ args.next()
+ continue
+ elif arg == '-result':
+ cmdline += ' %s "%s"' % (arg, os.path.abspath(args.next()))
+ continue
+ elif arg == '-cgskip':
+ file = self.configFile(args.next())
+ cmdline += ' %s "%s"' % (arg, os.path.abspath(file))
+ continue
+ cmdline += ' '+arg
+ if self.graph_filter != 'do_one_initcall':
+ cmdline += ' -func "%s"' % self.graph_filter
+ cmdline += ' -o "%s"' % os.path.abspath(self.testdir)
+ return cmdline
+ def manualRebootRequired(self):
+ cmdline = self.kernelParams()
+ print 'To generate a new timeline manually, follow these steps:\n'
+ print '1. Add the CMDLINE string to your kernel command line.'
+ print '2. Reboot the system.'
+ print '3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n'
+ print 'CMDLINE="%s"' % cmdline
+ sys.exit()
+ def blGrub(self):
+ blcmd = ''
+ for cmd in ['update-grub', 'grub-mkconfig', 'grub2-mkconfig']:
+ if blcmd:
+ break
+ blcmd = self.getExec(cmd)
+ if not blcmd:
+ doError('[GRUB] missing update command')
+ if not os.path.exists('/etc/default/grub'):
+ doError('[GRUB] missing /etc/default/grub')
+ if 'grub2' in blcmd:
+ cfg = '/boot/grub2/grub.cfg'
+ else:
+ cfg = '/boot/grub/grub.cfg'
+ if not os.path.exists(cfg):
+ doError('[GRUB] missing %s' % cfg)
+ if 'update-grub' in blcmd:
+ self.blexec = [blcmd]
+ else:
+ self.blexec = [blcmd, '-o', cfg]
+ def getBootLoader(self):
+ if self.bootloader == 'grub':
+ self.blGrub()
+ else:
+ doError('unknown boot loader: %s' % self.bootloader)
+ def writeDatafileHeader(self, filename):
+ self.kparams = open('/proc/cmdline', 'r').read().strip()
+ fp = open(filename, 'w')
+ fp.write(self.teststamp+'\n')
+ fp.write(self.sysstamp+'\n')
+ fp.write('# command | %s\n' % self.cmdline)
+ fp.write('# kparams | %s\n' % self.kparams)
+ fp.close()
+
+sysvals = SystemValues()
+
+# Class: Data
+# Description:
+# The primary container for test data.
+class Data(aslib.Data):
+ dmesg = {} # root data structure
+ start = 0.0 # test start
+ end = 0.0 # test end
+ dmesgtext = [] # dmesg text file in memory
+ testnumber = 0
+ idstr = ''
+ html_device_id = 0
+ valid = False
+ tUserMode = 0.0
+ boottime = ''
+ phases = ['kernel', 'user']
+ do_one_initcall = False
+ def __init__(self, num):
+ self.testnumber = num
+ self.idstr = 'a'
+ self.dmesgtext = []
+ self.dmesg = {
+ 'kernel': {'list': dict(), 'start': -1.0, 'end': -1.0, 'row': 0,
+ 'order': 0, 'color': 'linear-gradient(to bottom, #fff, #bcf)'},
+ 'user': {'list': dict(), 'start': -1.0, 'end': -1.0, 'row': 0,
+ 'order': 1, 'color': '#fff'}
+ }
+ def deviceTopology(self):
+ return ''
+ def newAction(self, phase, name, pid, start, end, ret, ulen):
+ # new device callback for a specific phase
+ self.html_device_id += 1
+ devid = '%s%d' % (self.idstr, self.html_device_id)
+ list = self.dmesg[phase]['list']
+ length = -1.0
+ if(start >= 0 and end >= 0):
+ length = end - start
+ i = 2
+ origname = name
+ while(name in list):
+ name = '%s[%d]' % (origname, i)
+ i += 1
+ list[name] = {'name': name, 'start': start, 'end': end,
+ 'pid': pid, 'length': length, 'row': 0, 'id': devid,
+ 'ret': ret, 'ulen': ulen }
+ return name
+ def deviceMatch(self, pid, cg):
+ if cg.end - cg.start == 0:
+ return ''
+ for p in data.phases:
+ list = self.dmesg[p]['list']
+ for devname in list:
+ dev = list[devname]
+ if pid != dev['pid']:
+ continue
+ if cg.name == 'do_one_initcall':
+ if(cg.start <= dev['start'] and cg.end >= dev['end'] and dev['length'] > 0):
+ dev['ftrace'] = cg
+ self.do_one_initcall = True
+ return devname
+ else:
+ if(cg.start > dev['start'] and cg.end < dev['end']):
+ if 'ftraces' not in dev:
+ dev['ftraces'] = []
+ dev['ftraces'].append(cg)
+ return devname
+ return ''
+ def printDetails(self):
+ sysvals.vprint('Timeline Details:')
+ sysvals.vprint(' Host: %s' % sysvals.hostname)
+ sysvals.vprint(' Kernel: %s' % sysvals.kernel)
+ sysvals.vprint(' Test time: %s' % sysvals.testtime)
+ sysvals.vprint(' Boot time: %s' % self.boottime)
+ for phase in self.phases:
+ dc = len(self.dmesg[phase]['list'])
+ sysvals.vprint('%9s mode: %.3f - %.3f (%d initcalls)' % (phase,
+ self.dmesg[phase]['start']*1000,
+ self.dmesg[phase]['end']*1000, dc))
+
+# ----------------- FUNCTIONS --------------------
+
+# Function: parseKernelLog
+# Description:
+# parse a kernel log for boot data
+def parseKernelLog():
+ sysvals.vprint('Analyzing the dmesg data (%s)...' % \
+ os.path.basename(sysvals.dmesgfile))
+ phase = 'kernel'
+ data = Data(0)
+ data.dmesg['kernel']['start'] = data.start = ktime = 0.0
+ sysvals.stamp = {
+ 'time': datetime.now().strftime('%B %d %Y, %I:%M:%S %p'),
+ 'host': sysvals.hostname,
+ 'mode': 'boot', 'kernel': ''}
+
+ tp = aslib.TestProps()
+ devtemp = dict()
+ if(sysvals.dmesgfile):
+ lf = open(sysvals.dmesgfile, 'r')
+ else:
+ lf = Popen('dmesg', stdout=PIPE).stdout
+ for line in lf:
+ line = line.replace('\r\n', '')
+ # grab the stamp and sysinfo
+ if re.match(tp.stampfmt, line):
+ tp.stamp = line
+ continue
+ elif re.match(tp.sysinfofmt, line):
+ tp.sysinfo = line
+ continue
+ elif re.match(tp.cmdlinefmt, line):
+ tp.cmdline = line
+ continue
+ elif re.match(tp.kparamsfmt, line):
+ tp.kparams = line
+ continue
+ idx = line.find('[')
+ if idx > 1:
+ line = line[idx:]
+ m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+ if(not m):
+ continue
+ ktime = float(m.group('ktime'))
+ if(ktime > 120):
+ break
+ msg = m.group('msg')
+ data.dmesgtext.append(line)
+ if(ktime == 0.0 and re.match('^Linux version .*', msg)):
+ if(not sysvals.stamp['kernel']):
+ sysvals.stamp['kernel'] = sysvals.kernelVersion(msg)
+ continue
+ m = re.match('.* setting system clock to (?P<t>.*) UTC.*', msg)
+ if(m):
+ bt = datetime.strptime(m.group('t'), '%Y-%m-%d %H:%M:%S')
+ bt = bt - timedelta(seconds=int(ktime))
+ data.boottime = bt.strftime('%Y-%m-%d_%H:%M:%S')
+ sysvals.stamp['time'] = bt.strftime('%B %d %Y, %I:%M:%S %p')
+ continue
+ m = re.match('^calling *(?P<f>.*)\+.* @ (?P<p>[0-9]*)', msg)
+ if(m):
+ func = m.group('f')
+ pid = int(m.group('p'))
+ devtemp[func] = (ktime, pid)
+ continue
+ m = re.match('^initcall *(?P<f>.*)\+.* returned (?P<r>.*) after (?P<t>.*) usecs', msg)
+ if(m):
+ data.valid = True
+ data.end = ktime
+ f, r, t = m.group('f', 'r', 't')
+ if(f in devtemp):
+ start, pid = devtemp[f]
+ data.newAction(phase, f, pid, start, ktime, int(r), int(t))
+ del devtemp[f]
+ continue
+ if(re.match('^Freeing unused kernel memory.*', msg)):
+ data.tUserMode = ktime
+ data.dmesg['kernel']['end'] = ktime
+ data.dmesg['user']['start'] = ktime
+ phase = 'user'
+
+ if tp.stamp:
+ sysvals.stamp = 0
+ tp.parseStamp(data, sysvals)
+ data.dmesg['user']['end'] = data.end
+ lf.close()
+ return data
+
+# Function: parseTraceLog
+# Description:
+# Check if trace is available and copy to a temp file
+def parseTraceLog(data):
+ sysvals.vprint('Analyzing the ftrace data (%s)...' % \
+ os.path.basename(sysvals.ftracefile))
+ # if available, calculate cgfilter allowable ranges
+ cgfilter = []
+ if len(sysvals.cgfilter) > 0:
+ for p in data.phases:
+ list = data.dmesg[p]['list']
+ for i in sysvals.cgfilter:
+ if i in list:
+ cgfilter.append([list[i]['start']-0.0001,
+ list[i]['end']+0.0001])
+ # parse the trace log
+ ftemp = dict()
+ tp = aslib.TestProps()
+ tp.setTracerType('function_graph')
+ tf = open(sysvals.ftracefile, 'r')
+ for line in tf:
+ if line[0] == '#':
+ continue
+ m = re.match(tp.ftrace_line_fmt, line.strip())
+ if(not m):
+ continue
+ m_time, m_proc, m_pid, m_msg, m_dur = \
+ m.group('time', 'proc', 'pid', 'msg', 'dur')
+ t = float(m_time)
+ if len(cgfilter) > 0:
+ allow = False
+ for r in cgfilter:
+ if t >= r[0] and t < r[1]:
+ allow = True
+ break
+ if not allow:
+ continue
+ if t > data.end:
+ break
+ if(m_time and m_pid and m_msg):
+ t = aslib.FTraceLine(m_time, m_msg, m_dur)
+ pid = int(m_pid)
+ else:
+ continue
+ if t.fevent or t.fkprobe:
+ continue
+ key = (m_proc, pid)
+ if(key not in ftemp):
+ ftemp[key] = []
+ ftemp[key].append(aslib.FTraceCallGraph(pid, sysvals))
+ cg = ftemp[key][-1]
+ res = cg.addLine(t)
+ if(res != 0):
+ ftemp[key].append(aslib.FTraceCallGraph(pid, sysvals))
+ if(res == -1):
+ ftemp[key][-1].addLine(t)
+
+ tf.close()
+
+ # add the callgraph data to the device hierarchy
+ for key in ftemp:
+ proc, pid = key
+ for cg in ftemp[key]:
+ if len(cg.list) < 1 or cg.invalid or (cg.end - cg.start == 0):
+ continue
+ if(not cg.postProcess()):
+ print('Sanity check failed for %s-%d' % (proc, pid))
+ continue
+ # match cg data to devices
+ devname = data.deviceMatch(pid, cg)
+ if not devname:
+ kind = 'Orphan'
+ if cg.partial:
+ kind = 'Partial'
+ sysvals.vprint('%s callgraph found for %s %s-%d [%f - %f]' %\
+ (kind, cg.name, proc, pid, cg.start, cg.end))
+ elif len(cg.list) > 1000000:
+ print 'WARNING: the callgraph found for %s is massive! (%d lines)' %\
+ (devname, len(cg.list))
+
+# Function: retrieveLogs
+# Description:
+# Create copies of dmesg and/or ftrace for later processing
+def retrieveLogs():
+ # check ftrace is configured first
+ if sysvals.useftrace:
+ tracer = sysvals.fgetVal('current_tracer').strip()
+ if tracer != 'function_graph':
+ doError('ftrace not configured for a boot callgraph')
+ # create the folder and get dmesg
+ sysvals.systemInfo(aslib.dmidecode(sysvals.mempath))
+ sysvals.initTestOutput('boot')
+ sysvals.writeDatafileHeader(sysvals.dmesgfile)
+ call('dmesg >> '+sysvals.dmesgfile, shell=True)
+ if not sysvals.useftrace:
+ return
+ # get ftrace
+ sysvals.writeDatafileHeader(sysvals.ftracefile)
+ call('cat '+sysvals.tpath+'trace >> '+sysvals.ftracefile, shell=True)
+
+# Function: colorForName
+# Description:
+# Generate a repeatable color from a list for a given name
+def colorForName(name):
+ list = [
+ ('c1', '#ec9999'),
+ ('c2', '#ffc1a6'),
+ ('c3', '#fff0a6'),
+ ('c4', '#adf199'),
+ ('c5', '#9fadea'),
+ ('c6', '#a699c1'),
+ ('c7', '#ad99b4'),
+ ('c8', '#eaffea'),
+ ('c9', '#dcecfb'),
+ ('c10', '#ffffea')
+ ]
+ i = 0
+ total = 0
+ count = len(list)
+ while i < len(name):
+ total += ord(name[i])
+ i += 1
+ return list[total % count]
+
+def cgOverview(cg, minlen):
+ stats = dict()
+ large = []
+ for l in cg.list:
+ if l.fcall and l.depth == 1:
+ if l.length >= minlen:
+ large.append(l)
+ if l.name not in stats:
+ stats[l.name] = [0, 0.0]
+ stats[l.name][0] += (l.length * 1000.0)
+ stats[l.name][1] += 1
+ return (large, stats)
+
+# Function: createBootGraph
+# Description:
+# Create the output html file from the resident test data
+# Arguments:
+# testruns: array of Data objects from parseKernelLog or parseTraceLog
+# Output:
+# True if the html file was created, false if it failed
+def createBootGraph(data):
+ # html function templates
+ html_srccall = '<div id={6} title="{5}" class="srccall" style="left:{1}%;top:{2}px;height:{3}px;width:{4}%;line-height:{3}px;">{0}</div>\n'
+ html_timetotal = '<table class="time1">\n<tr>'\
+ '<td class="blue">Init process starts @ <b>{0} ms</b></td>'\
+ '<td class="blue">Last initcall ends @ <b>{1} ms</b></td>'\
+ '</tr>\n</table>\n'
+
+ # device timeline
+ devtl = aslib.Timeline(100, 20)
+
+ # write the test title and general info header
+ devtl.createHeader(sysvals, sysvals.stamp)
+
+ # Generate the header for this timeline
+ t0 = data.start
+ tMax = data.end
+ tTotal = tMax - t0
+ if(tTotal == 0):
+ print('ERROR: No timeline data')
+ return False
+ user_mode = '%.0f'%(data.tUserMode*1000)
+ last_init = '%.0f'%(tTotal*1000)
+ devtl.html += html_timetotal.format(user_mode, last_init)
+
+ # determine the maximum number of rows we need to draw
+ devlist = []
+ for p in data.phases:
+ list = data.dmesg[p]['list']
+ for devname in list:
+ d = aslib.DevItem(0, p, list[devname])
+ devlist.append(d)
+ devtl.getPhaseRows(devlist, 0, 'start')
+ devtl.calcTotalRows()
+
+ # draw the timeline background
+ devtl.createZoomBox()
+ devtl.html += devtl.html_tblock.format('boot', '0', '100', devtl.scaleH)
+ for p in data.phases:
+ phase = data.dmesg[p]
+ length = phase['end']-phase['start']
+ left = '%.3f' % (((phase['start']-t0)*100.0)/tTotal)
+ width = '%.3f' % ((length*100.0)/tTotal)
+ devtl.html += devtl.html_phase.format(left, width, \
+ '%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \
+ phase['color'], '')
+
+ # draw the device timeline
+ num = 0
+ devstats = dict()
+ for phase in data.phases:
+ list = data.dmesg[phase]['list']
+ for devname in sorted(list):
+ cls, color = colorForName(devname)
+ dev = list[devname]
+ info = '@|%.3f|%.3f|%.3f|%d' % (dev['start']*1000.0, dev['end']*1000.0,
+ dev['ulen']/1000.0, dev['ret'])
+ devstats[dev['id']] = {'info':info}
+ dev['color'] = color
+ height = devtl.phaseRowHeight(0, phase, dev['row'])
+ top = '%.6f' % ((dev['row']*height) + devtl.scaleH)
+ left = '%.6f' % (((dev['start']-t0)*100)/tTotal)
+ width = '%.6f' % (((dev['end']-dev['start'])*100)/tTotal)
+ length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000)
+ devtl.html += devtl.html_device.format(dev['id'],
+ devname+length+phase+'_mode', left, top, '%.3f'%height,
+ width, devname, ' '+cls, '')
+ rowtop = devtl.phaseRowTop(0, phase, dev['row'])
+ height = '%.6f' % (devtl.rowH / 2)
+ top = '%.6f' % (rowtop + devtl.scaleH + (devtl.rowH / 2))
+ if data.do_one_initcall:
+ if('ftrace' not in dev):
+ continue
+ cg = dev['ftrace']
+ large, stats = cgOverview(cg, 0.001)
+ devstats[dev['id']]['fstat'] = stats
+ for l in large:
+ left = '%f' % (((l.time-t0)*100)/tTotal)
+ width = '%f' % (l.length*100/tTotal)
+ title = '%s (%0.3fms)' % (l.name, l.length * 1000.0)
+ devtl.html += html_srccall.format(l.name, left,
+ top, height, width, title, 'x%d'%num)
+ num += 1
+ continue
+ if('ftraces' not in dev):
+ continue
+ for cg in dev['ftraces']:
+ left = '%f' % (((cg.start-t0)*100)/tTotal)
+ width = '%f' % ((cg.end-cg.start)*100/tTotal)
+ cglen = (cg.end - cg.start) * 1000.0
+ title = '%s (%0.3fms)' % (cg.name, cglen)
+ cg.id = 'x%d' % num
+ devtl.html += html_srccall.format(cg.name, left,
+ top, height, width, title, dev['id']+cg.id)
+ num += 1
+
+ # draw the time scale, try to make the number of labels readable
+ devtl.createTimeScale(t0, tMax, tTotal, 'boot')
+ devtl.html += '</div>\n'
+
+ # timeline is finished
+ devtl.html += '</div>\n</div>\n'
+
+ # draw a legend which describes the phases by color
+ devtl.html += '<div class="legend">\n'
+ pdelta = 20.0
+ pmargin = 36.0
+ for phase in data.phases:
+ order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin)
+ devtl.html += devtl.html_legend.format(order, \
+ data.dmesg[phase]['color'], phase+'_mode', phase[0])
+ devtl.html += '</div>\n'
+
+ hf = open(sysvals.htmlfile, 'w')
+
+ # add the css
+ extra = '\
+ .c1 {background:rgba(209,0,0,0.4);}\n\
+ .c2 {background:rgba(255,102,34,0.4);}\n\
+ .c3 {background:rgba(255,218,33,0.4);}\n\
+ .c4 {background:rgba(51,221,0,0.4);}\n\
+ .c5 {background:rgba(17,51,204,0.4);}\n\
+ .c6 {background:rgba(34,0,102,0.4);}\n\
+ .c7 {background:rgba(51,0,68,0.4);}\n\
+ .c8 {background:rgba(204,255,204,0.4);}\n\
+ .c9 {background:rgba(169,208,245,0.4);}\n\
+ .c10 {background:rgba(255,255,204,0.4);}\n\
+ .vt {transform:rotate(-60deg);transform-origin:0 0;}\n\
+ table.fstat {table-layout:fixed;padding:150px 15px 0 0;font-size:10px;column-width:30px;}\n\
+ .fstat th {width:55px;}\n\
+ .fstat td {text-align:left;width:35px;}\n\
+ .srccall {position:absolute;font-size:10px;z-index:7;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,#CCC,#969696);}\n\
+ .srccall:hover {color:white;font-weight:bold;border:1px solid white;}\n'
+ aslib.addCSS(hf, sysvals, 1, False, extra)
+
+ # write the device timeline
+ hf.write(devtl.html)
+
+ # add boot specific html
+ statinfo = 'var devstats = {\n'
+ for n in sorted(devstats):
+ statinfo += '\t"%s": [\n\t\t"%s",\n' % (n, devstats[n]['info'])
+ if 'fstat' in devstats[n]:
+ funcs = devstats[n]['fstat']
+ for f in sorted(funcs, key=funcs.get, reverse=True):
+ if funcs[f][0] < 0.01 and len(funcs) > 10:
+ break
+ statinfo += '\t\t"%f|%s|%d",\n' % (funcs[f][0], f, funcs[f][1])
+ statinfo += '\t],\n'
+ statinfo += '};\n'
+ html = \
+ '<div id="devicedetailtitle"></div>\n'\
+ '<div id="devicedetail" style="display:none;">\n'\
+ '<div id="devicedetail0">\n'
+ for p in data.phases:
+ phase = data.dmesg[p]
+ html += devtl.html_phaselet.format(p+'_mode', '0', '100', phase['color'])
+ html += '</div>\n</div>\n'\
+ '<script type="text/javascript">\n'+statinfo+\
+ '</script>\n'
+ hf.write(html)
+
+ # add the callgraph html
+ if(sysvals.usecallgraph):
+ aslib.addCallgraphs(sysvals, hf, data)
+
+ # add the test log as a hidden div
+ if sysvals.testlog and sysvals.logmsg:
+ hf.write('<div id="testlog" style="display:none;">\n'+sysvals.logmsg+'</div>\n')
+ # add the dmesg log as a hidden div
+ if sysvals.dmesglog:
+ hf.write('<div id="dmesglog" style="display:none;">\n')
+ for line in data.dmesgtext:
+ line = line.replace('<', '&lt').replace('>', '&gt')
+ hf.write(line)
+ hf.write('</div>\n')
+
+ # write the footer and close
+ aslib.addScriptCode(hf, [data])
+ hf.write('</body>\n</html>\n')
+ hf.close()
+ return True
+
+# Function: updateCron
+# Description:
+# (restore=False) Set the tool to run automatically on reboot
+# (restore=True) Restore the original crontab
+def updateCron(restore=False):
+ if not restore:
+ sysvals.rootUser(True)
+ crondir = '/var/spool/cron/crontabs/'
+ if not os.path.exists(crondir):
+ crondir = '/var/spool/cron/'
+ if not os.path.exists(crondir):
+ doError('%s not found' % crondir)
+ cronfile = crondir+'root'
+ backfile = crondir+'root-analyze_boot-backup'
+ cmd = sysvals.getExec('crontab')
+ if not cmd:
+ doError('crontab not found')
+ # on restore: move the backup cron back into place
+ if restore:
+ if os.path.exists(backfile):
+ shutil.move(backfile, cronfile)
+ call([cmd, cronfile])
+ return
+ # backup current cron and install new one with reboot
+ if os.path.exists(cronfile):
+ shutil.move(cronfile, backfile)
+ else:
+ fp = open(backfile, 'w')
+ fp.close()
+ res = -1
+ try:
+ fp = open(backfile, 'r')
+ op = open(cronfile, 'w')
+ for line in fp:
+ if not sysvals.myCronJob(line):
+ op.write(line)
+ continue
+ fp.close()
+ op.write('@reboot python %s\n' % sysvals.cronjobCmdString())
+ op.close()
+ res = call([cmd, cronfile])
+ except Exception, e:
+ print 'Exception: %s' % str(e)
+ shutil.move(backfile, cronfile)
+ res = -1
+ if res != 0:
+ doError('crontab failed')
+
+# Function: updateGrub
+# Description:
+# update grub.cfg for all kernels with our parameters
+def updateGrub(restore=False):
+ # call update-grub on restore
+ if restore:
+ try:
+ call(sysvals.blexec, stderr=PIPE, stdout=PIPE,
+ env={'PATH': '.:/sbin:/usr/sbin:/usr/bin:/sbin:/bin'})
+ except Exception, e:
+ print 'Exception: %s\n' % str(e)
+ return
+ # extract the option and create a grub config without it
+ sysvals.rootUser(True)
+ tgtopt = 'GRUB_CMDLINE_LINUX_DEFAULT'
+ cmdline = ''
+ grubfile = '/etc/default/grub'
+ tempfile = '/etc/default/grub.analyze_boot'
+ shutil.move(grubfile, tempfile)
+ res = -1
+ try:
+ fp = open(tempfile, 'r')
+ op = open(grubfile, 'w')
+ cont = False
+ for line in fp:
+ line = line.strip()
+ if len(line) == 0 or line[0] == '#':
+ continue
+ opt = line.split('=')[0].strip()
+ if opt == tgtopt:
+ cmdline = line.split('=', 1)[1].strip('\\')
+ if line[-1] == '\\':
+ cont = True
+ elif cont:
+ cmdline += line.strip('\\')
+ if line[-1] != '\\':
+ cont = False
+ else:
+ op.write('%s\n' % line)
+ fp.close()
+ # if the target option value is in quotes, strip them
+ sp = '"'
+ val = cmdline.strip()
+ if val and (val[0] == '\'' or val[0] == '"'):
+ sp = val[0]
+ val = val.strip(sp)
+ cmdline = val
+ # append our cmd line options
+ if len(cmdline) > 0:
+ cmdline += ' '
+ cmdline += sysvals.kernelParams()
+ # write out the updated target option
+ op.write('\n%s=%s%s%s\n' % (tgtopt, sp, cmdline, sp))
+ op.close()
+ res = call(sysvals.blexec)
+ os.remove(grubfile)
+ except Exception, e:
+ print 'Exception: %s' % str(e)
+ res = -1
+ # cleanup
+ shutil.move(tempfile, grubfile)
+ if res != 0:
+ doError('update grub failed')
+
+# Function: updateKernelParams
+# Description:
+# update boot conf for all kernels with our parameters
+def updateKernelParams(restore=False):
+ # find the boot loader
+ sysvals.getBootLoader()
+ if sysvals.bootloader == 'grub':
+ updateGrub(restore)
+
+# Function: doError Description:
+# generic error function for catastrphic failures
+# Arguments:
+# msg: the error message to print
+# help: True if printHelp should be called after, False otherwise
+def doError(msg, help=False):
+ if help == True:
+ printHelp()
+ print 'ERROR: %s\n' % msg
+ sysvals.outputResult({'error':msg})
+ sys.exit()
+
+# Function: printHelp
+# Description:
+# print out the help text
+def printHelp():
+ print('')
+ print('%s v%s' % (sysvals.title, sysvals.version))
+ print('Usage: bootgraph <options> <command>')
+ print('')
+ print('Description:')
+ print(' This tool reads in a dmesg log of linux kernel boot and')
+ print(' creates an html representation of the boot timeline up to')
+ print(' the start of the init process.')
+ print('')
+ print(' If no specific command is given the tool reads the current dmesg')
+ print(' and/or ftrace log and creates a timeline')
+ print('')
+ print(' Generates output files in subdirectory: boot-yymmdd-HHMMSS')
+ print(' HTML output: <hostname>_boot.html')
+ print(' raw dmesg output: <hostname>_boot_dmesg.txt')
+ print(' raw ftrace output: <hostname>_boot_ftrace.txt')
+ print('')
+ print('Options:')
+ print(' -h Print this help text')
+ print(' -v Print the current tool version')
+ print(' -verbose Print extra information during execution and analysis')
+ print(' -addlogs Add the dmesg log to the html output')
+ print(' -result fn Export a results table to a text file for parsing.')
+ print(' -o name Overrides the output subdirectory name when running a new test')
+ print(' default: boot-{date}-{time}')
+ print(' [advanced]')
+ print(' -fstat Use ftrace to add function detail and statistics (default: disabled)')
+ print(' -f/-callgraph Add callgraph detail, can be very large (default: disabled)')
+ print(' -maxdepth N limit the callgraph data to N call levels (default: 2)')
+ print(' -mincg ms Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
+ print(' -timeprec N Number of significant digits in timestamps (0:S, 3:ms, [6:us])')
+ print(' -expandcg pre-expand the callgraph data in the html output (default: disabled)')
+ print(' -func list Limit ftrace to comma-delimited list of functions (default: do_one_initcall)')
+ print(' -cgfilter S Filter the callgraph output in the timeline')
+ print(' -cgskip file Callgraph functions to skip, off to disable (default: cgskip.txt)')
+ print(' -bl name Use the following boot loader for kernel params (default: grub)')
+ print(' -reboot Reboot the machine automatically and generate a new timeline')
+ print(' -manual Show the steps to generate a new timeline manually (used with -reboot)')
+ print('')
+ print('Other commands:')
+ print(' -flistall Print all functions capable of being captured in ftrace')
+ print(' -sysinfo Print out system info extracted from BIOS')
+ print(' [redo]')
+ print(' -dmesg file Create HTML output using dmesg input (used with -ftrace)')
+ print(' -ftrace file Create HTML output using ftrace input (used with -dmesg)')
+ print('')
+ return True
+
+# ----------------- MAIN --------------------
+# exec start (skipped if script is loaded as library)
+if __name__ == '__main__':
+ # loop through the command line arguments
+ cmd = ''
+ testrun = True
+ switchoff = ['disable', 'off', 'false', '0']
+ simplecmds = ['-sysinfo', '-kpupdate', '-flistall', '-checkbl']
+ cgskip = ''
+ if '-f' in sys.argv:
+ cgskip = sysvals.configFile('cgskip.txt')
+ args = iter(sys.argv[1:])
+ mdset = False
+ for arg in args:
+ if(arg == '-h'):
+ printHelp()
+ sys.exit()
+ elif(arg == '-v'):
+ print("Version %s" % sysvals.version)
+ sys.exit()
+ elif(arg == '-verbose'):
+ sysvals.verbose = True
+ elif(arg in simplecmds):
+ cmd = arg[1:]
+ elif(arg == '-fstat'):
+ sysvals.useftrace = True
+ elif(arg == '-callgraph' or arg == '-f'):
+ sysvals.useftrace = True
+ sysvals.usecallgraph = True
+ elif(arg == '-cgdump'):
+ sysvals.cgdump = True
+ elif(arg == '-mincg'):
+ sysvals.mincglen = aslib.getArgFloat('-mincg', args, 0.0, 10000.0)
+ elif(arg == '-cgfilter'):
+ try:
+ val = args.next()
+ except:
+ doError('No callgraph functions supplied', True)
+ sysvals.setCallgraphFilter(val)
+ elif(arg == '-cgskip'):
+ try:
+ val = args.next()
+ except:
+ doError('No file supplied', True)
+ if val.lower() in switchoff:
+ cgskip = ''
+ else:
+ cgskip = sysvals.configFile(val)
+ if(not cgskip):
+ doError('%s does not exist' % cgskip)
+ elif(arg == '-bl'):
+ try:
+ val = args.next()
+ except:
+ doError('No boot loader name supplied', True)
+ if val.lower() not in ['grub']:
+ doError('Unknown boot loader: %s' % val, True)
+ sysvals.bootloader = val.lower()
+ elif(arg == '-timeprec'):
+ sysvals.setPrecision(aslib.getArgInt('-timeprec', args, 0, 6))
+ elif(arg == '-maxdepth'):
+ mdset = True
+ sysvals.max_graph_depth = aslib.getArgInt('-maxdepth', args, 0, 1000)
+ elif(arg == '-func'):
+ try:
+ val = args.next()
+ except:
+ doError('No filter functions supplied', True)
+ sysvals.useftrace = True
+ sysvals.usecallgraph = True
+ sysvals.rootCheck(True)
+ sysvals.setGraphFilter(val)
+ elif(arg == '-ftrace'):
+ try:
+ val = args.next()
+ except:
+ doError('No ftrace file supplied', True)
+ if(os.path.exists(val) == False):
+ doError('%s does not exist' % val)
+ testrun = False
+ sysvals.ftracefile = val
+ elif(arg == '-addlogs'):
+ sysvals.dmesglog = True
+ elif(arg == '-expandcg'):
+ sysvals.cgexp = True
+ elif(arg == '-dmesg'):
+ try:
+ val = args.next()
+ except:
+ doError('No dmesg file supplied', True)
+ if(os.path.exists(val) == False):
+ doError('%s does not exist' % val)
+ testrun = False
+ sysvals.dmesgfile = val
+ elif(arg == '-o'):
+ try:
+ val = args.next()
+ except:
+ doError('No subdirectory name supplied', True)
+ sysvals.testdir = sysvals.setOutputFolder(val)
+ elif(arg == '-result'):
+ try:
+ val = args.next()
+ except:
+ doError('No result file supplied', True)
+ sysvals.result = val
+ elif(arg == '-reboot'):
+ sysvals.reboot = True
+ elif(arg == '-manual'):
+ sysvals.reboot = True
+ sysvals.manual = True
+ # remaining options are only for cron job use
+ elif(arg == '-cronjob'):
+ sysvals.iscronjob = True
+ else:
+ doError('Invalid argument: '+arg, True)
+
+ # compatibility errors and access checks
+ if(sysvals.iscronjob and (sysvals.reboot or \
+ sysvals.dmesgfile or sysvals.ftracefile or cmd)):
+ doError('-cronjob is meant for batch purposes only')
+ if(sysvals.reboot and (sysvals.dmesgfile or sysvals.ftracefile)):
+ doError('-reboot and -dmesg/-ftrace are incompatible')
+ if cmd or sysvals.reboot or sysvals.iscronjob or testrun:
+ sysvals.rootCheck(True)
+ if (testrun and sysvals.useftrace) or cmd == 'flistall':
+ if not sysvals.verifyFtrace():
+ doError('Ftrace is not properly enabled')
+
+ # run utility commands
+ sysvals.cpuInfo()
+ if cmd != '':
+ if cmd == 'kpupdate':
+ updateKernelParams()
+ elif cmd == 'flistall':
+ for f in sysvals.getBootFtraceFilterFunctions():
+ print f
+ elif cmd == 'checkbl':
+ sysvals.getBootLoader()
+ print 'Boot Loader: %s\n%s' % (sysvals.bootloader, sysvals.blexec)
+ elif(cmd == 'sysinfo'):
+ sysvals.printSystemInfo(True)
+ sys.exit()
+
+ # reboot: update grub, setup a cronjob, and reboot
+ if sysvals.reboot:
+ if (sysvals.useftrace or sysvals.usecallgraph) and \
+ not sysvals.checkFtraceKernelVersion():
+ doError('Ftrace functionality requires kernel v4.10 or newer')
+ if not sysvals.manual:
+ updateKernelParams()
+ updateCron()
+ call('reboot')
+ else:
+ sysvals.manualRebootRequired()
+ sys.exit()
+
+ if sysvals.usecallgraph and cgskip:
+ sysvals.vprint('Using cgskip file: %s' % cgskip)
+ sysvals.setCallgraphBlacklist(cgskip)
+
+ # cronjob: remove the cronjob, grub changes, and disable ftrace
+ if sysvals.iscronjob:
+ updateCron(True)
+ updateKernelParams(True)
+ try:
+ sysvals.fsetVal('0', 'tracing_on')
+ except:
+ pass
+
+ # testrun: generate copies of the logs
+ if testrun:
+ retrieveLogs()
+ else:
+ sysvals.setOutputFile()
+
+ # process the log data
+ if sysvals.dmesgfile:
+ if not mdset:
+ sysvals.max_graph_depth = 0
+ data = parseKernelLog()
+ if(not data.valid):
+ doError('No initcall data found in %s' % sysvals.dmesgfile)
+ if sysvals.useftrace and sysvals.ftracefile:
+ parseTraceLog(data)
+ if sysvals.cgdump:
+ data.debugPrint()
+ sys.exit()
+ else:
+ doError('dmesg file required')
+
+ sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile)
+ sysvals.vprint('Command:\n %s' % sysvals.cmdline)
+ sysvals.vprint('Kernel parameters:\n %s' % sysvals.kparams)
+ data.printDetails()
+ createBootGraph(data)
+
+ # if running as root, change output dir owner to sudo_user
+ if testrun and os.path.isdir(sysvals.testdir) and \
+ os.getuid() == 0 and 'SUDO_USER' in os.environ:
+ cmd = 'chown -R {0}:{0} {1} > /dev/null 2>&1'
+ call(cmd.format(os.environ['SUDO_USER'], sysvals.testdir), shell=True)
+
+ sysvals.stamp['boot'] = (data.tUserMode - data.start) * 1000
+ sysvals.stamp['lastinit'] = data.end * 1000
+ sysvals.outputResult(sysvals.stamp)
diff --git a/tools/power/pm-graph/config/cgskip.txt b/tools/power/pm-graph/config/cgskip.txt
new file mode 100644
index 000000000..e48d588fb
--- /dev/null
+++ b/tools/power/pm-graph/config/cgskip.txt
@@ -0,0 +1,65 @@
+# -----------------------------------------------
+# CallGraph function skip list
+#
+# This file contains a list of functions which are
+# meant to be skipped in the callgraph trace. It reduces
+# the callgraph html file size by treating these functions
+# as leaves with no child calls. It can be editted by
+# adding or removing function symbol names.
+#
+# The sleepgraph tool automatically pulls this file in when
+# it is found in the config folder. It can be ignored if
+# the tool is called with "-cgskip off".
+# -----------------------------------------------
+
+# low level scheduling and timing
+up
+down_timeout
+mutex_lock
+down_read
+complete_all
+schedule_timeout
+wake_up_process
+msleep
+__udelay
+ktime_get
+
+# console calls
+printk
+dev_printk
+console_unlock
+
+# memory handling
+__kmalloc
+__kmalloc_track_caller
+kmem_cache_alloc
+kmem_cache_alloc_trace
+kmem_cache_free
+kstrdup
+kstrdup_const
+kmalloc_slab
+new_slab
+__slab_alloc
+__slab_free
+raw_pci_read
+pci_read
+alloc_pages_current
+
+# debugfs and sysfs setup
+debugfs_remove_recursive
+debugfs_create_dir
+debugfs_create_files
+debugfs_create_dir
+debugfs_get_inode
+sysfs_add_file_mode_ns
+sysfs_add_file
+sysfs_create_dir_ns
+sysfs_create_link
+sysfs_create_group
+sysfs_create_groups
+sysfs_create_bin_file
+dpm_sysfs_add
+sysfs_create_file_ns
+sysfs_merge_group
+sysfs_add_link_to_group
+sysfs_create_link_sd
diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg
new file mode 100644
index 000000000..f8fcb06fd
--- /dev/null
+++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg
@@ -0,0 +1,205 @@
+#
+# This is the configuration file for sleepgraph. It contains
+# all the tool arguments so that they don't have to be given on the
+# command line. It also includes advanced settings for functions
+# and kprobes. It is run like this
+#
+# sudo ./sleepgraph.py -config thisfile.txt
+#
+
+[Settings]
+
+# Verbosity
+# print verbose messages (default: false)
+verbose: false
+
+# Suspend Mode
+# e.g. standby, mem, freeze, disk (default: mem)
+mode: mem
+
+# Automatic Wakeup
+# Use rtcwake to autoresume after X seconds, or off to disable (default: 15)
+rtcwake: 15
+
+# Add Logs
+# add the dmesg and ftrace log to the html output (default: false)
+addlogs: false
+
+# Display function calls
+# graph source functions in the timeline (default: false)
+dev: true
+
+# Callgraph
+# gather detailed ftrace callgraph data on all timeline events (default: false)
+callgraph: false
+
+# Back to Back Suspend/Resume
+# Run two suspend/resumes back to back (default: false)
+x2: false
+
+# Back to Back Suspend Delay
+# Time delay between the two test runs in ms (default: 0 ms)
+x2delay: 0
+
+# Minimum Device Length
+# graph only devices longer than min in the timeline (default: 0.001 ms)
+mindev: 1
+
+# Minimum Callgraph Length
+# provide callgraph data for blocks longer than min (default: 0.001 ms)
+mincg: 1
+
+# Suspend/Resume Gap
+# insert a small visible gap between suspend and resume on the timeline (default: false)
+srgap: false
+
+# Output Directory Format
+# output folder for html, ftrace, and dmesg. Use {date} and {time} for current values
+output-dir: suspend-{hostname}-{date}-{time}-custom
+
+# Override default timeline entries
+# Do not use the internal default functions for timeline entries (default: false)
+# Set this to true if you intend to only use the ones defined in this config
+override-timeline-functions: true
+
+# Override default dev timeline entries
+# Do not use the internal default functions for dev timeline entries (default: false)
+# Set this to true if you intend to only use the ones defined in this config
+override-dev-timeline-functions: true
+
+[timeline_functions_x86_64]
+#
+# Function calls to display in the timeline alongside device callbacks.
+# The tool has an internal set of these functions which should cover the
+# whole of kernel execution, but you can append or override here.
+#
+# This is a list of kprobes which use both symbol data and function arg data.
+# The function calls are displayed on the timeline alongside the device blocks.
+# The args are pulled directly from the stack using this architecture's registers
+# and stack formatting. Three pieces of info are required. The function name,
+# a format string, and an argument list
+#
+# Entry format:
+#
+# function: format{fn_arg1}_{fn_arg2} fn_arg1 fn_arg2 ... [color=purple]
+#
+# Required Arguments:
+#
+# function: The symbol name for the function you want probed, this is the
+# minimum required for an entry, it will show up as the function
+# name with no arguments.
+#
+# example: _cpu_up:
+#
+# Optional Arguments:
+#
+# format: The format to display the data on the timeline in. Use braces to
+# enclose the arg names.
+#
+# example: CPU_ON[{cpu}]
+#
+# color: The color of the entry block in the timeline. The default color is
+# transparent, so the entry shares the phase color. The color is an
+# html color string, either a word, or an RGB.
+#
+# example: [color=#CC00CC]
+#
+# arglist: A list of arguments from registers/stack addresses. See URL:
+# https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst
+#
+# example: cpu=%di:s32
+#
+# Example: Display cpu resume in the timeline
+#
+# _cpu_up: CPU_ON[{cpu}] cpu=%di:s32 [color=orange]
+#
+_cpu_down: CPU_OFF[{cpu}] cpu=%di:s32
+_cpu_up: CPU_ON[{cpu}] cpu=%di:s32
+sys_sync:
+pm_prepare_console:
+pm_notifier_call_chain:
+freeze_processes:
+freeze_kernel_threads:
+pm_restrict_gfp_mask:
+acpi_suspend_begin:
+suspend_console:
+acpi_pm_prepare:
+syscore_suspend:
+arch_enable_nonboot_cpus_end:
+syscore_resume:
+acpi_pm_finish:
+resume_console:
+acpi_pm_end:
+pm_restore_gfp_mask:
+thaw_processes:
+pm_restore_console:
+
+[dev_timeline_functions_x86_64]
+#
+# Dev mode function calls to display inside timeline entries
+#
+# This is a list of kprobes which use both symbol data and function arg data.
+# The function calls are displayed on the timeline alongside the device blocks.
+# The args are pulled directly from the stack using this architecture's registers
+# and stack formatting. Three pieces of info are required. The function name,
+# a format string, and an argument list
+#
+# Entry format:
+#
+# function: format{fn_arg1}_{fn_arg2} fn_arg1 fn_arg2 ... [color=purple]
+#
+# Required Arguments:
+#
+# function: The symbol name for the function you want probed, this is the
+# minimum required for an entry, it will show up as the function
+# name with no arguments.
+#
+# example: ata_eh_recover:
+#
+# Optional Arguments:
+#
+# format: The format to display the data on the timeline in. Use braces to
+# enclose the arg names.
+#
+# example: ata{port}_port_reset
+#
+# color: The color of the entry block in the timeline. The default color is
+# transparent, so the entry shares the phase color. The color is an
+# html color string, either a word, or an RGB.
+#
+# example: [color=#CC00CC]
+#
+# arglist: A list of arguments from registers/stack addresses. See URL:
+# https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst
+#
+# example: port=+36(%di):s32
+#
+# Example: Display ATA port reset as ataN_port_reset in the timeline
+#
+# ata_eh_recover: ata{port}_port_reset port=+36(%di):s32
+#
+msleep: msleep time=%di:s32
+schedule_timeout_uninterruptible: schedule_timeout_uninterruptible timeout=%di:s32
+schedule_timeout: schedule_timeout timeout=%di:s32
+usleep_range: usleep_range min=%di:s32 max=%si:s32
+__const_udelay: udelay loops=%di:s32
+__mutex_lock_slowpath: mutex_lock_slowpath
+ata_eh_recover: ata_eh_recover port=+36(%di):s32
+acpi_os_stall:
+acpi_resume_power_resources:
+acpi_ps_parse_aml:
+ext4_sync_fs:
+i915_gem_resume:
+i915_restore_state:
+intel_opregion_setup:
+g4x_pre_enable_dp:
+vlv_pre_enable_dp:
+chv_pre_enable_dp:
+g4x_enable_dp:
+vlv_enable_dp:
+intel_hpd_init:
+intel_opregion_register:
+intel_dp_detect:
+intel_hdmi_detect:
+intel_opregion_init:
+intel_fbdev_set_suspend:
diff --git a/tools/power/pm-graph/config/example.cfg b/tools/power/pm-graph/config/example.cfg
new file mode 100644
index 000000000..05b2efb9b
--- /dev/null
+++ b/tools/power/pm-graph/config/example.cfg
@@ -0,0 +1,133 @@
+#
+# Generic S3 (Suspend to Mem) test
+#
+# This is the configuration file for sleepgraph. It contains
+# all the tool arguments so that they don't have to be given on the
+# command line. It also includes advanced settings for functions
+# and kprobes. It is run like this
+#
+# sudo ./sleepgraph.py -config config/example.cfg
+#
+
+[Settings]
+
+# ---- General Options ----
+
+# Verbosity
+# print verbose messages (default: false)
+verbose: false
+
+# Suspend Mode
+# e.g. standby, mem, freeze, disk (default: mem)
+mode: mem
+
+# Output Directory Format
+# output folder for html, ftrace, and dmesg. Use {date} and {time} for current values
+output-dir: suspend-{hostname}-{date}-{time}
+
+# Automatic Wakeup
+# Use rtcwake to autoresume after X seconds, or off to disable (default: 15)
+rtcwake: 15
+
+# Add Logs
+# add the dmesg and ftrace log to the html output (default: false)
+addlogs: true
+
+# Suspend/Resume Gap
+# insert a small visible gap between suspend and resume on the timeline (default: false)
+srgap: false
+
+# Skip HTML generation
+# Only capture the logs, don't generate the html timeline (default: false)
+skiphtml: false
+
+# Sync filesystem before suspend
+# run sync before the test, minimizes sys_sync call time (default: false)
+sync: true
+
+# Runtime suspend enable/disable
+# Enable/disable runtime suspend for all devices, restore all after test (default: no-action)
+# rs: disable
+
+# Turn display on/off for test
+# Switch the display on/off for the test using xset (default: no-action)
+# display: on
+
+# Print results to text file
+# Print the status of the test run in the given file (default: no-action)
+result: result.txt
+
+# Gzip the log files to save space
+# Gzip the generated log files, and read gzipped log files (default: false)
+gzip: true
+
+# ---- Advanced Options ----
+
+# Command to execute in lieu of suspend (default: "")
+# command: echo mem > /sys/power/state
+
+# Display user processes
+# graph user processes and cpu usage in the timeline (default: false)
+proc: false
+
+# Display function calls
+# graph source functions in the timeline (default: false)
+dev: false
+
+# Multiple test runs
+# Run N tests D seconds apart, generates separate outputs with a summary (default: false)
+# multi: 3 5
+
+# Back to Back Suspend/Resume
+# Run two suspend/resumes back to back and display in the same timeline (default: false)
+x2: false
+
+# Back to Back Suspend Delay
+# Time delay between the two test runs in ms (default: 0 ms)
+x2delay: 0
+
+# Pre Suspend Delay
+# Include an N ms delay before (1st) suspend (default: 0 ms)
+predelay: 0
+
+# Post Resume Delay
+# Include an N ms delay after (last) resume (default: 0 ms)
+postdelay: 0
+
+# Minimum Device Length
+# graph only devices longer than min in the timeline (default: 0.001 ms)
+mindev: 0.001
+
+# ---- Debug Options ----
+
+# Callgraph
+# gather detailed ftrace callgraph data on all timeline events (default: false)
+callgraph: false
+
+# Callgraph phase filter
+# Only enable callgraphs for one phase, i.e. resume_noirq (default: all)
+cgphase: suspend
+
+# Callgraph x2 test filter
+# Only enable callgraphs test 0 or 1 when using -x2 (default: 1)
+cgtest: 0
+
+# Expand Callgraph
+# pre-expand the callgraph data in the html output (default: disabled)
+expandcg: false
+
+# Minimum Callgraph Length
+# provide callgraph data for blocks longer than min (default: 0.001 ms)
+mincg: 1
+
+# Timestamp Precision
+# Number of significant digits in timestamps (0:S, [3:ms], 6:us)
+timeprec: 6
+
+# Device Filter
+# show only devices whose name/driver includes one of these strings
+# devicefilter: _cpu_up,_cpu_down,i915,usb
+
+# Add kprobe functions to the timeline
+# Add functions to the timeline from a text file (default: no-action)
+# fadd: file.txt
diff --git a/tools/power/pm-graph/config/freeze-callgraph.cfg b/tools/power/pm-graph/config/freeze-callgraph.cfg
new file mode 100644
index 000000000..f692821c4
--- /dev/null
+++ b/tools/power/pm-graph/config/freeze-callgraph.cfg
@@ -0,0 +1,94 @@
+#
+# Full Callgraph for S2 (Freeze) test
+#
+# This is the configuration file for sleepgraph. It contains
+# all the tool arguments so that they don't have to be given on the
+# command line. It also includes advanced settings for functions
+# and kprobes. It is run like this
+#
+# sudo ./sleepgraph.py -config config/freeze-callgraph.cfg
+#
+# NOTE: the output of this test is very large (> 30MB)
+
+[Settings]
+
+# ---- General Options ----
+
+# Verbosity
+# print verbose messages (default: false)
+verbose: false
+
+# Suspend Mode
+# e.g. standby, mem, freeze, disk (default: mem)
+mode: freeze
+
+# Output Directory Format
+# output folder for html, ftrace, and dmesg. Use {date} and {time} for current values
+output-dir: freeze-{hostname}-{date}-{time}-cg
+
+# Automatic Wakeup
+# Use rtcwake to autoresume after X seconds, or off to disable (default: 15)
+rtcwake: 15
+
+# Add Logs
+# add the dmesg and ftrace log to the html output (default: false)
+addlogs: false
+
+# Suspend/Resume Gap
+# insert a small visible gap between suspend and resume on the timeline (default: false)
+srgap: false
+
+# ---- Advanced Options ----
+
+# Command to execute in lieu of freeze (default: "")
+# command: echo freeze > /sys/power/state
+
+# Display user processes
+# graph user processes and cpu usage in the timeline (default: false)
+proc: false
+
+# Display function calls
+# graph source functions in the timeline (default: false)
+dev: false
+
+# Back to Back Suspend/Resume
+# Run two suspend/resumes back to back (default: false)
+x2: false
+
+# Back to Back Suspend Delay
+# Time delay between the two test runs in ms (default: 0 ms)
+x2delay: 0
+
+# Pre Suspend Delay
+# Include an N ms delay before (1st) suspend (default: 0 ms)
+predelay: 0
+
+# Post Resume Delay
+# Include an N ms delay after (last) resume (default: 0 ms)
+postdelay: 0
+
+# Minimum Device Length
+# graph only devices longer than min in the timeline (default: 0.001 ms)
+mindev: 1
+
+# ---- Debug Options ----
+
+# Callgraph
+# gather detailed ftrace callgraph data on all timeline events (default: false)
+callgraph: true
+
+# Expand Callgraph
+# pre-expand the callgraph data in the html output (default: disabled)
+expandcg: false
+
+# Minimum Callgraph Length
+# provide callgraph data for blocks longer than min (default: 0.001 ms)
+mincg: 1
+
+# Timestamp Precision
+# Number of significant digits in timestamps (0:S, [3:ms], 6:us)
+timeprec: 6
+
+# Device Filter
+# show only devs whose name/driver includes one of these strings
+# devicefilter: _cpu_up,_cpu_down,i915,usb
diff --git a/tools/power/pm-graph/config/freeze-dev.cfg b/tools/power/pm-graph/config/freeze-dev.cfg
new file mode 100644
index 000000000..c4ad5cea3
--- /dev/null
+++ b/tools/power/pm-graph/config/freeze-dev.cfg
@@ -0,0 +1,93 @@
+#
+# Dev S2 (Freeze) test - includes src calls / kernel threads
+#
+# This is the configuration file for sleepgraph. It contains
+# all the tool arguments so that they don't have to be given on the
+# command line. It also includes advanced settings for functions
+# and kprobes. It is run like this
+#
+# sudo ./sleepgraph.py -config config/freeze-dev.cfg
+#
+
+[Settings]
+
+# ---- General Options ----
+
+# Verbosity
+# print verbose messages (default: false)
+verbose: false
+
+# Suspend Mode
+# e.g. standby, mem, freeze, disk (default: mem)
+mode: freeze
+
+# Output Directory Format
+# output folder for html, ftrace, and dmesg. Use {date} and {time} for current values
+output-dir: freeze-{hostname}-{date}-{time}-dev
+
+# Automatic Wakeup
+# Use rtcwake to autoresume after X seconds, or off to disable (default: 15)
+rtcwake: 15
+
+# Add Logs
+# add the dmesg and ftrace log to the html output (default: false)
+addlogs: false
+
+# Suspend/Resume Gap
+# insert a small visible gap between suspend and resume on the timeline (default: false)
+srgap: false
+
+# ---- Advanced Options ----
+
+# Command to execute in lieu of freeze (default: "")
+# command: echo freeze > /sys/power/state
+
+# Display user processes
+# graph user processes and cpu usage in the timeline (default: false)
+proc: false
+
+# Display function calls
+# graph source functions in the timeline (default: false)
+dev: true
+
+# Back to Back Suspend/Resume
+# Run two suspend/resumes back to back (default: false)
+x2: false
+
+# Back to Back Suspend Delay
+# Time delay between the two test runs in ms (default: 0 ms)
+x2delay: 0
+
+# Pre Suspend Delay
+# Include an N ms delay before (1st) suspend (default: 0 ms)
+predelay: 0
+
+# Post Resume Delay
+# Include an N ms delay after (last) resume (default: 0 ms)
+postdelay: 0
+
+# Minimum Device Length
+# graph only devices longer than min in the timeline (default: 0.001 ms)
+mindev: 1
+
+# ---- Debug Options ----
+
+# Callgraph
+# gather detailed ftrace callgraph data on all timeline events (default: false)
+callgraph: false
+
+# Expand Callgraph
+# pre-expand the callgraph data in the html output (default: disabled)
+expandcg: false
+
+# Minimum Callgraph Length
+# provide callgraph data for blocks longer than min (default: 0.001 ms)
+mincg: 1
+
+# Timestamp Precision
+# Number of significant digits in timestamps (0:S, [3:ms], 6:us)
+timeprec: 3
+
+# Device Filter
+# show only devs whose name/driver includes one of these strings
+# devicefilter: _cpu_up,_cpu_down,i915,usb
diff --git a/tools/power/pm-graph/config/freeze.cfg b/tools/power/pm-graph/config/freeze.cfg
new file mode 100644
index 000000000..0b70e0b74
--- /dev/null
+++ b/tools/power/pm-graph/config/freeze.cfg
@@ -0,0 +1,93 @@
+#
+# Generic S2 (Freeze) test
+#
+# This is the configuration file for sleepgraph. It contains
+# all the tool arguments so that they don't have to be given on the
+# command line. It also includes advanced settings for functions
+# and kprobes. It is run like this
+#
+# sudo ./sleepgraph.py -config config/freeze.cfg
+#
+
+[Settings]
+
+# ---- General Options ----
+
+# Verbosity
+# print verbose messages (default: false)
+verbose: false
+
+# Suspend Mode
+# e.g. standby, mem, freeze, disk (default: mem)
+mode: freeze
+
+# Output Directory Format
+# output folder for html, ftrace, and dmesg. Use {date} and {time} for current values
+output-dir: freeze-{hostname}-{date}-{time}
+
+# Automatic Wakeup
+# Use rtcwake to autoresume after X seconds, or off to disable (default: 15)
+rtcwake: 15
+
+# Add Logs
+# add the dmesg and ftrace log to the html output (default: false)
+addlogs: false
+
+# Suspend/Resume Gap
+# insert a small visible gap between suspend and resume on the timeline (default: false)
+srgap: false
+
+# ---- Advanced Options ----
+
+# Command to execute in lieu of freeze (default: "")
+# command: echo freeze > /sys/power/state
+
+# Display user processes
+# graph user processes and cpu usage in the timeline (default: false)
+proc: false
+
+# Display function calls
+# graph source functions in the timeline (default: false)
+dev: false
+
+# Back to Back Suspend/Resume
+# Run two suspend/resumes back to back (default: false)
+x2: false
+
+# Back to Back Suspend Delay
+# Time delay between the two test runs in ms (default: 0 ms)
+x2delay: 0
+
+# Pre Suspend Delay
+# Include an N ms delay before (1st) suspend (default: 0 ms)
+predelay: 0
+
+# Post Resume Delay
+# Include an N ms delay after (last) resume (default: 0 ms)
+postdelay: 0
+
+# Minimum Device Length
+# graph only devices longer than min in the timeline (default: 0.001 ms)
+mindev: 0.001
+
+# ---- Debug Options ----
+
+# Callgraph
+# gather detailed ftrace callgraph data on all timeline events (default: false)
+callgraph: false
+
+# Expand Callgraph
+# pre-expand the callgraph data in the html output (default: disabled)
+expandcg: false
+
+# Minimum Callgraph Length
+# provide callgraph data for blocks longer than min (default: 0.001 ms)
+mincg: 1
+
+# Timestamp Precision
+# Number of significant digits in timestamps (0:S, [3:ms], 6:us)
+timeprec: 3
+
+# Device Filter
+# show only devs whose name/driver includes one of these strings
+# devicefilter: _cpu_up,_cpu_down,i915,usb
diff --git a/tools/power/pm-graph/config/standby-callgraph.cfg b/tools/power/pm-graph/config/standby-callgraph.cfg
new file mode 100644
index 000000000..f52a6b9d5
--- /dev/null
+++ b/tools/power/pm-graph/config/standby-callgraph.cfg
@@ -0,0 +1,94 @@
+#
+# Full Callgraph for S1 (Standby) test
+#
+# This is the configuration file for sleepgraph. It contains
+# all the tool arguments so that they don't have to be given on the
+# command line. It also includes advanced settings for functions
+# and kprobes. It is run like this
+#
+# sudo ./sleepgraph.py -config config/standby-callgraph.cfg
+#
+# NOTE: the output of this test is very large (> 30MB)
+
+[Settings]
+
+# ---- General Options ----
+
+# Verbosity
+# print verbose messages (default: false)
+verbose: false
+
+# Suspend Mode
+# e.g. standby, mem, freeze, disk (default: mem)
+mode: standby
+
+# Output Directory Format
+# output folder for html, ftrace, and dmesg. Use {date} and {time} for current values
+output-dir: standby-{hostname}-{date}-{time}-cg
+
+# Automatic Wakeup
+# Use rtcwake to autoresume after X seconds, or off to disable (default: 15)
+rtcwake: 15
+
+# Add Logs
+# add the dmesg and ftrace log to the html output (default: false)
+addlogs: false
+
+# Suspend/Resume Gap
+# insert a small visible gap between suspend and resume on the timeline (default: false)
+srgap: false
+
+# ---- Advanced Options ----
+
+# Command to execute in lieu of standby (default: "")
+# command: echo standby > /sys/power/state
+
+# Display user processes
+# graph user processes and cpu usage in the timeline (default: false)
+proc: false
+
+# Display function calls
+# graph source functions in the timeline (default: false)
+dev: false
+
+# Back to Back Suspend/Resume
+# Run two suspend/resumes back to back (default: false)
+x2: false
+
+# Back to Back Suspend Delay
+# Time delay between the two test runs in ms (default: 0 ms)
+x2delay: 0
+
+# Pre Suspend Delay
+# Include an N ms delay before (1st) suspend (default: 0 ms)
+predelay: 0
+
+# Post Resume Delay
+# Include an N ms delay after (last) resume (default: 0 ms)
+postdelay: 0
+
+# Minimum Device Length
+# graph only devices longer than min in the timeline (default: 0.001 ms)
+mindev: 1
+
+# ---- Debug Options ----
+
+# Callgraph
+# gather detailed ftrace callgraph data on all timeline events (default: false)
+callgraph: true
+
+# Expand Callgraph
+# pre-expand the callgraph data in the html output (default: disabled)
+expandcg: false
+
+# Minimum Callgraph Length
+# provide callgraph data for blocks longer than min (default: 0.001 ms)
+mincg: 1
+
+# Timestamp Precision
+# Number of significant digits in timestamps (0:S, [3:ms], 6:us)
+timeprec: 6
+
+# Device Filter
+# show only devs whose name/driver includes one of these strings
+# devicefilter: _cpu_up,_cpu_down,i915,usb
diff --git a/tools/power/pm-graph/config/standby-dev.cfg b/tools/power/pm-graph/config/standby-dev.cfg
new file mode 100644
index 000000000..a5498ece3
--- /dev/null
+++ b/tools/power/pm-graph/config/standby-dev.cfg
@@ -0,0 +1,93 @@
+#
+# Dev S1 (Standby) test - includes src calls / kernel threads
+#
+# This is the configuration file for sleepgraph. It contains
+# all the tool arguments so that they don't have to be given on the
+# command line. It also includes advanced settings for functions
+# and kprobes. It is run like this
+#
+# sudo ./sleepgraph.py -config config/standby-dev.cfg
+#
+
+[Settings]
+
+# ---- General Options ----
+
+# Verbosity
+# print verbose messages (default: false)
+verbose: false
+
+# Suspend Mode
+# e.g. standby, mem, freeze, disk (default: mem)
+mode: standby
+
+# Output Directory Format
+# output folder for html, ftrace, and dmesg. Use {date} and {time} for current values
+output-dir: standby-{hostname}-{date}-{time}-dev
+
+# Automatic Wakeup
+# Use rtcwake to autoresume after X seconds, or off to disable (default: 15)
+rtcwake: 15
+
+# Add Logs
+# add the dmesg and ftrace log to the html output (default: false)
+addlogs: false
+
+# Suspend/Resume Gap
+# insert a small visible gap between suspend and resume on the timeline (default: false)
+srgap: false
+
+# ---- Advanced Options ----
+
+# Command to execute in lieu of standby (default: "")
+# command: echo standby > /sys/power/state
+
+# Display user processes
+# graph user processes and cpu usage in the timeline (default: false)
+proc: false
+
+# Display function calls
+# graph source functions in the timeline (default: false)
+dev: true
+
+# Back to Back Suspend/Resume
+# Run two suspend/resumes back to back (default: false)
+x2: false
+
+# Back to Back Suspend Delay
+# Time delay between the two test runs in ms (default: 0 ms)
+x2delay: 0
+
+# Pre Suspend Delay
+# Include an N ms delay before (1st) suspend (default: 0 ms)
+predelay: 0
+
+# Post Resume Delay
+# Include an N ms delay after (last) resume (default: 0 ms)
+postdelay: 0
+
+# Minimum Device Length
+# graph only devices longer than min in the timeline (default: 0.001 ms)
+mindev: 1
+
+# ---- Debug Options ----
+
+# Callgraph
+# gather detailed ftrace callgraph data on all timeline events (default: false)
+callgraph: false
+
+# Expand Callgraph
+# pre-expand the callgraph data in the html output (default: disabled)
+expandcg: false
+
+# Minimum Callgraph Length
+# provide callgraph data for blocks longer than min (default: 0.001 ms)
+mincg: 1
+
+# Timestamp Precision
+# Number of significant digits in timestamps (0:S, [3:ms], 6:us)
+timeprec: 3
+
+# Device Filter
+# show only devs whose name/driver includes one of these strings
+# devicefilter: _cpu_up,_cpu_down,i915,usb
diff --git a/tools/power/pm-graph/config/standby.cfg b/tools/power/pm-graph/config/standby.cfg
new file mode 100644
index 000000000..f0dd264df
--- /dev/null
+++ b/tools/power/pm-graph/config/standby.cfg
@@ -0,0 +1,93 @@
+#
+# Generic S1 (Standby) test
+#
+# This is the configuration file for sleepgraph. It contains
+# all the tool arguments so that they don't have to be given on the
+# command line. It also includes advanced settings for functions
+# and kprobes. It is run like this
+#
+# sudo ./sleepgraph.py -config config/standby.cfg
+#
+
+[Settings]
+
+# ---- General Options ----
+
+# Verbosity
+# print verbose messages (default: false)
+verbose: false
+
+# Suspend Mode
+# e.g. standby, mem, freeze, disk (default: mem)
+mode: standby
+
+# Output Directory Format
+# output folder for html, ftrace, and dmesg. Use {date} and {time} for current values
+output-dir: standby-{hostname}-{date}-{time}
+
+# Automatic Wakeup
+# Use rtcwake to autoresume after X seconds, or off to disable (default: 15)
+rtcwake: 15
+
+# Add Logs
+# add the dmesg and ftrace log to the html output (default: false)
+addlogs: false
+
+# Suspend/Resume Gap
+# insert a small visible gap between suspend and resume on the timeline (default: false)
+srgap: false
+
+# ---- Advanced Options ----
+
+# Command to execute in lieu of standby (default: "")
+# command: echo standby > /sys/power/state
+
+# Display user processes
+# graph user processes and cpu usage in the timeline (default: false)
+proc: false
+
+# Display function calls
+# graph source functions in the timeline (default: false)
+dev: false
+
+# Back to Back Suspend/Resume
+# Run two suspend/resumes back to back (default: false)
+x2: false
+
+# Back to Back Suspend Delay
+# Time delay between the two test runs in ms (default: 0 ms)
+x2delay: 0
+
+# Pre Suspend Delay
+# Include an N ms delay before (1st) suspend (default: 0 ms)
+predelay: 0
+
+# Post Resume Delay
+# Include an N ms delay after (last) resume (default: 0 ms)
+postdelay: 0
+
+# Minimum Device Length
+# graph only devices longer than min in the timeline (default: 0.001 ms)
+mindev: 0.001
+
+# ---- Debug Options ----
+
+# Callgraph
+# gather detailed ftrace callgraph data on all timeline events (default: false)
+callgraph: false
+
+# Expand Callgraph
+# pre-expand the callgraph data in the html output (default: disabled)
+expandcg: false
+
+# Minimum Callgraph Length
+# provide callgraph data for blocks longer than min (default: 0.001 ms)
+mincg: 1
+
+# Timestamp Precision
+# Number of significant digits in timestamps (0:S, [3:ms], 6:us)
+timeprec: 3
+
+# Device Filter
+# show only devs whose name/driver includes one of these strings
+# devicefilter: _cpu_up,_cpu_down,i915,usb
diff --git a/tools/power/pm-graph/config/suspend-callgraph.cfg b/tools/power/pm-graph/config/suspend-callgraph.cfg
new file mode 100644
index 000000000..11b8cbc12
--- /dev/null
+++ b/tools/power/pm-graph/config/suspend-callgraph.cfg
@@ -0,0 +1,98 @@
+#
+# Full Callgraph for S3 (Suspend to Mem) test
+#
+# This is the configuration file for sleepgraph. It contains
+# all the tool arguments so that they don't have to be given on the
+# command line. It also includes advanced settings for functions
+# and kprobes. It is run like this
+#
+# sudo ./sleepgraph.py -config config/suspend.cfg
+#
+# NOTE: the output of this test is very large (> 30MB)
+
+[Settings]
+
+# ---- General Options ----
+
+# Verbosity
+# print verbose messages (default: false)
+verbose: false
+
+# Suspend Mode
+# e.g. standby, mem, freeze, disk (default: mem)
+mode: mem
+
+# Output Directory Format
+# output folder for html, ftrace, and dmesg. Use {date} and {time} for current values
+output-dir: suspend-{hostname}-{date}-{time}-cg
+
+# Automatic Wakeup
+# Use rtcwake to autoresume after X seconds, or off to disable (default: 15)
+rtcwake: 15
+
+# Add Logs
+# add the dmesg and ftrace log to the html output (default: false)
+addlogs: false
+
+# Suspend/Resume Gap
+# insert a small visible gap between suspend and resume on the timeline (default: false)
+srgap: false
+
+# ---- Advanced Options ----
+
+# Command to execute in lieu of suspend (default: "")
+# command: echo mem > /sys/power/state
+
+# Display user processes
+# graph user processes and cpu usage in the timeline (default: false)
+proc: false
+
+# Display function calls
+# graph source functions in the timeline (default: false)
+dev: false
+
+# Back to Back Suspend/Resume
+# Run two suspend/resumes back to back (default: false)
+x2: false
+
+# Back to Back Suspend Delay
+# Time delay between the two test runs in ms (default: 0 ms)
+x2delay: 0
+
+# Pre Suspend Delay
+# Include an N ms delay before (1st) suspend (default: 0 ms)
+predelay: 0
+
+# Post Resume Delay
+# Include an N ms delay after (last) resume (default: 0 ms)
+postdelay: 0
+
+# Minimum Device Length
+# graph only devices longer than min in the timeline (default: 0.001 ms)
+mindev: 0.001
+
+# ---- Debug Options ----
+
+# Callgraph
+# gather detailed ftrace callgraph data on all timeline events (default: false)
+callgraph: true
+
+# Max graph depth
+# limit the callgraph trace to this depth (default: 0 = all)
+maxdepth: 5
+
+# Expand Callgraph
+# pre-expand the callgraph data in the html output (default: disabled)
+expandcg: false
+
+# Minimum Callgraph Length
+# provide callgraph data for blocks longer than min (default: 0.001 ms)
+mincg: 1
+
+# Timestamp Precision
+# Number of significant digits in timestamps (0:S, [3:ms], 6:us)
+timeprec: 6
+
+# Device Filter
+# show only devs whose name/driver includes one of these strings
+# devicefilter: _cpu_up,_cpu_down,i915,usb
diff --git a/tools/power/pm-graph/config/suspend-dev.cfg b/tools/power/pm-graph/config/suspend-dev.cfg
new file mode 100644
index 000000000..56f1d21cc
--- /dev/null
+++ b/tools/power/pm-graph/config/suspend-dev.cfg
@@ -0,0 +1,93 @@
+#
+# Dev S3 (Suspend to Mem) test - includes src calls / kernel threads
+#
+# This is the configuration file for sleepgraph. It contains
+# all the tool arguments so that they don't have to be given on the
+# command line. It also includes advanced settings for functions
+# and kprobes. It is run like this
+#
+# sudo ./sleepgraph.py -config config/suspend-dev.cfg
+#
+
+[Settings]
+
+# ---- General Options ----
+
+# Verbosity
+# print verbose messages (default: false)
+verbose: false
+
+# Suspend Mode
+# e.g. standby, mem, freeze, disk (default: mem)
+mode: mem
+
+# Output Directory Format
+# output folder for html, ftrace, and dmesg. Use {date} and {time} for current values
+output-dir: suspend-{hostname}-{date}-{time}-dev
+
+# Automatic Wakeup
+# Use rtcwake to autoresume after X seconds, or off to disable (default: 15)
+rtcwake: 15
+
+# Add Logs
+# add the dmesg and ftrace log to the html output (default: false)
+addlogs: false
+
+# Suspend/Resume Gap
+# insert a small visible gap between suspend and resume on the timeline (default: false)
+srgap: false
+
+# ---- Advanced Options ----
+
+# Command to execute in lieu of suspend (default: "")
+# command: echo mem > /sys/power/state
+
+# Display user processes
+# graph user processes and cpu usage in the timeline (default: false)
+proc: false
+
+# Display function calls
+# graph source functions in the timeline (default: false)
+dev: true
+
+# Back to Back Suspend/Resume
+# Run two suspend/resumes back to back (default: false)
+x2: false
+
+# Back to Back Suspend Delay
+# Time delay between the two test runs in ms (default: 0 ms)
+x2delay: 0
+
+# Pre Suspend Delay
+# Include an N ms delay before (1st) suspend (default: 0 ms)
+predelay: 0
+
+# Post Resume Delay
+# Include an N ms delay after (last) resume (default: 0 ms)
+postdelay: 0
+
+# Minimum Device Length
+# graph only devices longer than min in the timeline (default: 0.001 ms)
+mindev: 1
+
+# ---- Debug Options ----
+
+# Callgraph
+# gather detailed ftrace callgraph data on all timeline events (default: false)
+callgraph: false
+
+# Expand Callgraph
+# pre-expand the callgraph data in the html output (default: disabled)
+expandcg: false
+
+# Minimum Callgraph Length
+# provide callgraph data for blocks longer than min (default: 0.001 ms)
+mincg: 1
+
+# Timestamp Precision
+# Number of significant digits in timestamps (0:S, [3:ms], 6:us)
+timeprec: 3
+
+# Device Filter
+# show only devs whose name/driver includes one of these strings
+# devicefilter: _cpu_up,_cpu_down,i915,usb
diff --git a/tools/power/pm-graph/config/suspend-x2-proc.cfg b/tools/power/pm-graph/config/suspend-x2-proc.cfg
new file mode 100644
index 000000000..0ecca0ede
--- /dev/null
+++ b/tools/power/pm-graph/config/suspend-x2-proc.cfg
@@ -0,0 +1,93 @@
+#
+# Proc S3 (Suspend to Mem) x2 test - includes user processes
+#
+# This is the configuration file for sleepgraph. It contains
+# all the tool arguments so that they don't have to be given on the
+# command line. It also includes advanced settings for functions
+# and kprobes. It is run like this
+#
+# sudo ./sleepgraph.py -config config/suspend-proc.cfg
+#
+
+[Settings]
+
+# ---- General Options ----
+
+# Verbosity
+# print verbose messages (default: false)
+verbose: false
+
+# Suspend Mode
+# e.g. standby, mem, freeze, disk (default: mem)
+mode: mem
+
+# Output Directory Format
+# output folder for html, ftrace, and dmesg. Use {date} and {time} for current values
+output-dir: suspend-{hostname}-{date}-{time}-x2-proc
+
+# Automatic Wakeup
+# Use rtcwake to autoresume after X seconds, or off to disable (default: 15)
+rtcwake: 15
+
+# Add Logs
+# add the dmesg and ftrace log to the html output (default: false)
+addlogs: false
+
+# Suspend/Resume Gap
+# insert a small visible gap between suspend and resume on the timeline (default: false)
+srgap: false
+
+# ---- Advanced Options ----
+
+# Command to execute in lieu of suspend (default: "")
+# command: echo mem > /sys/power/state
+
+# Display user processes
+# graph user processes and cpu usage in the timeline (default: false)
+proc: true
+
+# Display function calls
+# graph source functions in the timeline (default: false)
+dev: false
+
+# Back to Back Suspend/Resume
+# Run two suspend/resumes back to back (default: false)
+x2: true
+
+# Back to Back Suspend Delay
+# Time delay between the two test runs in ms (default: 0 ms)
+x2delay: 1000
+
+# Pre Suspend Delay
+# Include an N ms delay before (1st) suspend (default: 0 ms)
+predelay: 1000
+
+# Post Resume Delay
+# Include an N ms delay after (last) resume (default: 0 ms)
+postdelay: 1000
+
+# Minimum Device Length
+# graph only devices longer than min in the timeline (default: 0.001 ms)
+mindev: 1
+
+# ---- Debug Options ----
+
+# Callgraph
+# gather detailed ftrace callgraph data on all timeline events (default: false)
+callgraph: false
+
+# Expand Callgraph
+# pre-expand the callgraph data in the html output (default: disabled)
+expandcg: false
+
+# Minimum Callgraph Length
+# provide callgraph data for blocks longer than min (default: 0.001 ms)
+mincg: 1
+
+# Timestamp Precision
+# Number of significant digits in timestamps (0:S, [3:ms], 6:us)
+timeprec: 3
+
+# Device Filter
+# show only devs whose name/driver includes one of these strings
+# devicefilter: _cpu_up,_cpu_down,i915,usb
diff --git a/tools/power/pm-graph/config/suspend.cfg b/tools/power/pm-graph/config/suspend.cfg
new file mode 100644
index 000000000..70d293231
--- /dev/null
+++ b/tools/power/pm-graph/config/suspend.cfg
@@ -0,0 +1,93 @@
+#
+# Generic S3 (Suspend to Mem) test
+#
+# This is the configuration file for sleepgraph. It contains
+# all the tool arguments so that they don't have to be given on the
+# command line. It also includes advanced settings for functions
+# and kprobes. It is run like this
+#
+# sudo ./sleepgraph.py -config config/suspend.cfg
+#
+
+[Settings]
+
+# ---- General Options ----
+
+# Verbosity
+# print verbose messages (default: false)
+verbose: false
+
+# Suspend Mode
+# e.g. standby, mem, freeze, disk (default: mem)
+mode: mem
+
+# Output Directory Format
+# output folder for html, ftrace, and dmesg. Use {date} and {time} for current values
+output-dir: suspend-{hostname}-{date}-{time}
+
+# Automatic Wakeup
+# Use rtcwake to autoresume after X seconds, or off to disable (default: 15)
+rtcwake: 15
+
+# Add Logs
+# add the dmesg and ftrace log to the html output (default: false)
+addlogs: false
+
+# Suspend/Resume Gap
+# insert a small visible gap between suspend and resume on the timeline (default: false)
+srgap: false
+
+# ---- Advanced Options ----
+
+# Command to execute in lieu of suspend (default: "")
+# command: echo mem > /sys/power/state
+
+# Display user processes
+# graph user processes and cpu usage in the timeline (default: false)
+proc: false
+
+# Display function calls
+# graph source functions in the timeline (default: false)
+dev: false
+
+# Back to Back Suspend/Resume
+# Run two suspend/resumes back to back (default: false)
+x2: false
+
+# Back to Back Suspend Delay
+# Time delay between the two test runs in ms (default: 0 ms)
+x2delay: 0
+
+# Pre Suspend Delay
+# Include an N ms delay before (1st) suspend (default: 0 ms)
+predelay: 0
+
+# Post Resume Delay
+# Include an N ms delay after (last) resume (default: 0 ms)
+postdelay: 0
+
+# Minimum Device Length
+# graph only devices longer than min in the timeline (default: 0.001 ms)
+mindev: 0.001
+
+# ---- Debug Options ----
+
+# Callgraph
+# gather detailed ftrace callgraph data on all timeline events (default: false)
+callgraph: false
+
+# Expand Callgraph
+# pre-expand the callgraph data in the html output (default: disabled)
+expandcg: false
+
+# Minimum Callgraph Length
+# provide callgraph data for blocks longer than min (default: 0.001 ms)
+mincg: 1
+
+# Timestamp Precision
+# Number of significant digits in timestamps (0:S, [3:ms], 6:us)
+timeprec: 3
+
+# Device Filter
+# show only devs whose name/driver includes one of these strings
+# devicefilter: _cpu_up,_cpu_down,i915,usb
diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8
new file mode 100644
index 000000000..070be2cf7
--- /dev/null
+++ b/tools/power/pm-graph/sleepgraph.8
@@ -0,0 +1,292 @@
+.TH SLEEPGRAPH 8
+.SH NAME
+sleepgraph \- Suspend/Resume timing analysis
+.SH SYNOPSIS
+.ft B
+.B sleepgraph
+.RB [ OPTIONS ]
+.RB [ COMMAND ]
+.SH DESCRIPTION
+\fBsleepgraph \fP is designed to assist kernel and OS developers
+in optimizing their linux stack's suspend/resume time. Using a kernel
+image built with a few extra options enabled, the tool will execute a
+suspend and capture dmesg and ftrace data until resume is complete.
+This data is transformed into a device timeline and an optional
+callgraph to give a detailed view of which devices/subsystems are
+taking the most time in suspend/resume.
+.PP
+If no specific command is given, the default behavior is to initiate
+a suspend/resume.
+.PP
+Generates output files in subdirectory: suspend-yymmdd-HHMMSS
+ html timeline : <hostname>_<mode>.html
+ raw dmesg file : <hostname>_<mode>_dmesg.txt
+ raw ftrace file : <hostname>_<mode>_ftrace.txt
+.SH OPTIONS
+.TP
+\fB-h\fR
+Print the help text.
+.TP
+\fB-v\fR
+Print the current tool version.
+.TP
+\fB-verbose\fR
+Print extra information during execution and analysis.
+.TP
+\fB-config \fIfile\fR
+Pull arguments and config options from a file.
+.TP
+\fB-m \fImode\fR
+Mode to initiate for suspend e.g. standby, freeze, mem (default: mem).
+.TP
+\fB-o \fIname\fR
+Overrides the output subdirectory name when running a new test.
+Use {date}, {time}, {hostname} for current values.
+.sp
+e.g. suspend-{hostname}-{date}-{time}
+.TP
+\fB-rtcwake \fIt\fR | off
+Use rtcwake to autoresume after \fIt\fR seconds (default: 15). Set t to "off" to
+disable rtcwake and require a user keypress to resume.
+.TP
+\fB-addlogs\fR
+Add the dmesg and ftrace logs to the html output. They will be viewable by
+clicking buttons in the timeline.
+.TP
+\fB-result \fIfile\fR
+Export a results table to a text file for parsing.
+.TP
+\fB-sync\fR
+Sync the filesystems before starting the test. This reduces the size of
+the sys_sync call which happens in the suspend_prepare phase.
+.TP
+\fB-rs \fIenable/disable\fR
+During test, enable/disable runtime suspend for all devices. The test is delayed
+by 5 seconds to allow runtime suspend changes to occur. The settings are restored
+after the test is complete.
+.TP
+\fB-display \fIon/off\fR
+Turn the display on or off for the test using the xset command. This helps
+maintain the consistecy of test data for better comparison.
+.TP
+\fB-skiphtml\fR
+Run the test and capture the trace logs, but skip the timeline generation.
+
+.SS "advanced"
+.TP
+\fB-gzip\fR
+Gzip the trace and dmesg logs to save space. The tool can also read in gzipped
+logs for processing.
+.TP
+\fB-cmd \fIstr\fR
+Run the timeline over a custom suspend command, e.g. pm-suspend. By default
+the tool forces suspend via /sys/power/state so this allows testing over
+an OS's official suspend method. The output file will change to
+hostname_command.html and will autodetect which suspend mode was triggered.
+.TP
+\fB-filter \fI"d1,d2,..."\fR
+Filter out all but these device callbacks. These strings can be device names
+or module names. e.g. 0000:00:02.0, ata5, i915, usb, etc.
+.TP
+\fB-mindev \fIt\fR
+Discard all device callbacks shorter than \fIt\fR milliseconds (default: 0.0).
+This reduces the html file size as there can be many tiny callbacks which are barely
+visible. The value is a float: e.g. 0.001 represents 1 us.
+.TP
+\fB-proc\fR
+Add usermode process info into the timeline (default: disabled).
+.TP
+\fB-dev\fR
+Add kernel source calls and threads to the timeline (default: disabled).
+.TP
+\fB-x2\fR
+Run two suspend/resumes back to back (default: disabled).
+.TP
+\fB-x2delay \fIt\fR
+Include \fIt\fR ms delay between multiple test runs (default: 0 ms).
+.TP
+\fB-predelay \fIt\fR
+Include \fIt\fR ms delay before 1st suspend (default: 0 ms).
+.TP
+\fB-postdelay \fIt\fR
+Include \fIt\fR ms delay after last resume (default: 0 ms).
+.TP
+\fB-multi \fIn d\fR
+Execute \fIn\fR consecutive tests at \fId\fR seconds intervals. The outputs will
+be created in a new subdirectory with a summary page: suspend-xN-{date}-{time}.
+
+.SS "ftrace debug"
+.TP
+\fB-f\fR
+Use ftrace to create device callgraphs (default: disabled). This can produce
+very large outputs, i.e. 10MB - 100MB.
+.TP
+\fB-maxdepth \fIlevel\fR
+limit the callgraph trace depth to \fIlevel\fR (default: 0=all). This is
+the best way to limit the output size when using callgraphs via -f.
+.TP
+\fB-expandcg\fR
+pre-expand the callgraph data in the html output (default: disabled)
+.TP
+\fB-fadd \fIfile\fR
+Add functions to be graphed in the timeline from a list in a text file
+.TP
+\fB-mincg \fIt\fR
+Discard all callgraphs shorter than \fIt\fR milliseconds (default: 0.0).
+This reduces the html file size as there can be many tiny callgraphs
+which are barely visible in the timeline.
+The value is a float: e.g. 0.001 represents 1 us.
+.TP
+\fB-cgfilter \fI"func1,func2,..."\fR
+Reduce callgraph output in the timeline by limiting it to a list of calls. The
+argument can be a single function name or a comma delimited list.
+(default: none)
+.TP
+\fB-cgskip \fIfile\fR
+Reduce callgraph timeline size by skipping over uninteresting functions
+in the trace, e.g. printk or console_unlock. The functions listed
+in this file will show up as empty leaves in the callgraph with only the start/end
+times displayed. cgskip.txt is used automatically if found in the path, so
+use "off" to disable completely (default: cgskip.txt)
+.TP
+\fB-cgphase \fIp\fR
+Only show callgraph data for phase \fIp\fR (e.g. suspend_late).
+.TP
+\fB-cgtest \fIn\fR
+In an x2 run, only show callgraph data for test \fIn\fR (e.g. 0 or 1).
+.TP
+\fB-timeprec \fIn\fR
+Number of significant digits in timestamps (0:S, [3:ms], 6:us).
+.TP
+\fB-bufsize \fIN\fR
+Set trace buffer size to N kilo-bytes (default: all of free memory up to 3GB)
+
+.SH COMMANDS
+.TP
+\fB-summary \fIindir\fR
+Create a summary page of all tests in \fIindir\fR. Creates summary.html
+in the current folder. The output page is a table of tests with
+suspend and resume values sorted by suspend mode, host, and kernel.
+Includes test averages by mode and links to the test html files.
+Use -genhtml to include tests with missing html.
+.TP
+\fB-modes\fR
+List available suspend modes.
+.TP
+\fB-status\fR
+Test to see if the system is able to run this tool. Use this along
+with any options you intend to use to see if they will work.
+.TP
+\fB-fpdt\fR
+Print out the contents of the ACPI Firmware Performance Data Table.
+.TP
+\fB-battery\fR
+Print out battery status and current charge.
+.TP
+\fB-sysinfo\fR
+Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode.
+.TP
+\fB-devinfo\fR
+Print out the pm settings of all devices which support runtime suspend.
+.TP
+\fB-flist\fR
+Print the list of ftrace functions currently being captured. Functions
+that are not available as symbols in the current kernel are shown in red.
+By default, the tool traces a list of important suspend/resume functions
+in order to better fill out the timeline. If the user has added their own
+with -fadd they will also be checked.
+.TP
+\fB-flistall\fR
+Print all ftrace functions capable of being captured. These are all the
+possible values you can add to trace via the -fadd argument.
+.SS "rebuild"
+.TP
+\fB-ftrace \fIfile\fR
+Create HTML output from an existing ftrace file.
+.TP
+\fB-dmesg \fIfile\fR
+Create HTML output from an existing dmesg file.
+
+.SH EXAMPLES
+.SS "simple commands"
+Check which suspend modes are currently supported.
+.IP
+\f(CW$ sleepgraph -modes\fR
+.PP
+Read the Firmware Performance Data Table (FPDT)
+.IP
+\f(CW$ sudo sleepgraph -fpdt\fR
+.PP
+Print out the current USB power topology
+.IP
+\f(CW$ sleepgraph -usbtopo
+.PP
+Verify that you can run a command with a set of arguments
+.IP
+\f(CW$ sudo sleepgraph -f -rtcwake 30 -status
+.PP
+Generate a summary of all timelines in a particular folder.
+.IP
+\f(CW$ sleepgraph -summary ~/workspace/myresults/\fR
+.PP
+
+.SS "capturing basic timelines"
+Execute a mem suspend with a 15 second wakeup. Include the logs in the html.
+.IP
+\f(CW$ sudo sleepgraph -rtcwake 15 -addlogs\fR
+.PP
+Execute a standby with a 15 second wakeup. Change the output folder name.
+.IP
+\f(CW$ sudo sleepgraph -m standby -rtcwake 15 -o "standby-{host}-{date}-{time}"\fR
+.PP
+Execute a freeze with no wakeup (require keypress). Change output folder name.
+.IP
+\f(CW$ sudo sleepgraph -m freeze -rtcwake off -o "freeze-{hostname}-{date}-{time}"\fR
+.PP
+
+.SS "capturing advanced timelines"
+Execute a suspend & include dev mode source calls, limit callbacks to 5ms or larger.
+.IP
+\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -dev -mindev 5\fR
+.PP
+Run two suspends back to back, include a 500ms delay before, after, and in between runs.
+.IP
+\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -x2 -predelay 500 -x2delay 500 -postdelay 500\fR
+.PP
+Do a batch run of 10 freezes with 30 seconds delay between runs.
+.IP
+\f(CW$ sudo sleepgraph -m freeze -rtcwake 15 -multi 10 30\fR
+.PP
+Execute a suspend using a custom command.
+.IP
+\f(CW$ sudo sleepgraph -cmd "echo mem > /sys/power/state" -rtcwake 15\fR
+.PP
+
+.SS "adding callgraph data"
+Add device callgraphs. Limit the trace depth and only show callgraphs 10ms or larger.
+.IP
+\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -f -maxdepth 5 -mincg 10\fR
+.PP
+Capture a full callgraph across all suspend, then filter the html by a single phase.
+.IP
+\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -f\fR
+.IP
+\f(CW$ sleepgraph -dmesg host_mem_dmesg.txt -ftrace host_mem_ftrace.txt -f -cgphase resume
+.PP
+
+.SS "rebuild timeline from logs"
+.PP
+Rebuild the html from a previous run's logs, using the same options.
+.IP
+\f(CW$ sleepgraph -dmesg dmesg.txt -ftrace ftrace.txt -callgraph\fR
+.PP
+Rebuild the html with different options.
+.IP
+\f(CW$ sleepgraph -dmesg dmesg.txt -ftrace ftrace.txt -addlogs -srgap\fR
+
+.SH "SEE ALSO"
+dmesg(1)
+.PP
+.SH AUTHOR
+.nf
+Written by Todd Brandt <todd.e.brandt@linux.intel.com>
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
new file mode 100755
index 000000000..0c760478f
--- /dev/null
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -0,0 +1,6057 @@
+#!/usr/bin/python2
+#
+# Tool for analyzing suspend/resume timing
+# Copyright (c) 2013, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# Authors:
+# Todd Brandt <todd.e.brandt@linux.intel.com>
+#
+# Links:
+# Home Page
+# https://01.org/suspendresume
+# Source repo
+# git@github.com:01org/pm-graph
+#
+# Description:
+# This tool is designed to assist kernel and OS developers in optimizing
+# their linux stack's suspend/resume time. Using a kernel image built
+# with a few extra options enabled, the tool will execute a suspend and
+# will capture dmesg and ftrace data until resume is complete. This data
+# is transformed into a device timeline and a callgraph to give a quick
+# and detailed view of which devices and callbacks are taking the most
+# time in suspend/resume. The output is a single html file which can be
+# viewed in firefox or chrome.
+#
+# The following kernel build options are required:
+# CONFIG_PM_DEBUG=y
+# CONFIG_PM_SLEEP_DEBUG=y
+# CONFIG_FTRACE=y
+# CONFIG_FUNCTION_TRACER=y
+# CONFIG_FUNCTION_GRAPH_TRACER=y
+# CONFIG_KPROBES=y
+# CONFIG_KPROBES_ON_FTRACE=y
+#
+# For kernel versions older than 3.15:
+# The following additional kernel parameters are required:
+# (e.g. in file /etc/default/grub)
+# GRUB_CMDLINE_LINUX_DEFAULT="... initcall_debug log_buf_len=16M ..."
+#
+
+# ----------------- LIBRARIES --------------------
+
+import sys
+import time
+import os
+import string
+import re
+import platform
+from datetime import datetime
+import struct
+import ConfigParser
+import gzip
+from threading import Thread
+from subprocess import call, Popen, PIPE
+
+# ----------------- CLASSES --------------------
+
+# Class: SystemValues
+# Description:
+# A global, single-instance container used to
+# store system values and test parameters
+class SystemValues:
+ title = 'SleepGraph'
+ version = '5.1'
+ ansi = False
+ rs = 0
+ display = 0
+ gzip = False
+ sync = False
+ verbose = False
+ testlog = True
+ dmesglog = False
+ ftracelog = False
+ mindevlen = 0.0
+ mincglen = 0.0
+ cgphase = ''
+ cgtest = -1
+ cgskip = ''
+ multitest = {'run': False, 'count': 0, 'delay': 0}
+ max_graph_depth = 0
+ callloopmaxgap = 0.0001
+ callloopmaxlen = 0.005
+ bufsize = 0
+ cpucount = 0
+ memtotal = 204800
+ memfree = 204800
+ srgap = 0
+ cgexp = False
+ testdir = ''
+ outdir = ''
+ tpath = '/sys/kernel/debug/tracing/'
+ fpdtpath = '/sys/firmware/acpi/tables/FPDT'
+ epath = '/sys/kernel/debug/tracing/events/power/'
+ traceevents = [
+ 'suspend_resume',
+ 'device_pm_callback_end',
+ 'device_pm_callback_start'
+ ]
+ logmsg = ''
+ testcommand = ''
+ mempath = '/dev/mem'
+ powerfile = '/sys/power/state'
+ mempowerfile = '/sys/power/mem_sleep'
+ suspendmode = 'mem'
+ memmode = ''
+ hostname = 'localhost'
+ prefix = 'test'
+ teststamp = ''
+ sysstamp = ''
+ dmesgstart = 0.0
+ dmesgfile = ''
+ ftracefile = ''
+ htmlfile = 'output.html'
+ result = ''
+ rtcwake = True
+ rtcwaketime = 15
+ rtcpath = ''
+ devicefilter = []
+ cgfilter = []
+ stamp = 0
+ execcount = 1
+ x2delay = 0
+ skiphtml = False
+ usecallgraph = False
+ usetraceevents = False
+ usetracemarkers = True
+ usekprobes = True
+ usedevsrc = False
+ useprocmon = False
+ notestrun = False
+ cgdump = False
+ mixedphaseheight = True
+ devprops = dict()
+ predelay = 0
+ postdelay = 0
+ procexecfmt = 'ps - (?P<ps>.*)$'
+ devpropfmt = '# Device Properties: .*'
+ tracertypefmt = '# tracer: (?P<t>.*)'
+ firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$'
+ tracefuncs = {
+ 'sys_sync': {},
+ '__pm_notifier_call_chain': {},
+ 'pm_prepare_console': {},
+ 'pm_notifier_call_chain': {},
+ 'freeze_processes': {},
+ 'freeze_kernel_threads': {},
+ 'pm_restrict_gfp_mask': {},
+ 'acpi_suspend_begin': {},
+ 'acpi_hibernation_begin': {},
+ 'acpi_hibernation_enter': {},
+ 'acpi_hibernation_leave': {},
+ 'acpi_pm_freeze': {},
+ 'acpi_pm_thaw': {},
+ 'hibernate_preallocate_memory': {},
+ 'create_basic_memory_bitmaps': {},
+ 'swsusp_write': {},
+ 'suspend_console': {},
+ 'acpi_pm_prepare': {},
+ 'syscore_suspend': {},
+ 'arch_enable_nonboot_cpus_end': {},
+ 'syscore_resume': {},
+ 'acpi_pm_finish': {},
+ 'resume_console': {},
+ 'acpi_pm_end': {},
+ 'pm_restore_gfp_mask': {},
+ 'thaw_processes': {},
+ 'pm_restore_console': {},
+ 'CPU_OFF': {
+ 'func':'_cpu_down',
+ 'args_x86_64': {'cpu':'%di:s32'},
+ 'format': 'CPU_OFF[{cpu}]'
+ },
+ 'CPU_ON': {
+ 'func':'_cpu_up',
+ 'args_x86_64': {'cpu':'%di:s32'},
+ 'format': 'CPU_ON[{cpu}]'
+ },
+ }
+ dev_tracefuncs = {
+ # general wait/delay/sleep
+ 'msleep': { 'args_x86_64': {'time':'%di:s32'}, 'ub': 1 },
+ 'schedule_timeout_uninterruptible': { 'args_x86_64': {'timeout':'%di:s32'}, 'ub': 1 },
+ 'schedule_timeout': { 'args_x86_64': {'timeout':'%di:s32'}, 'ub': 1 },
+ 'udelay': { 'func':'__const_udelay', 'args_x86_64': {'loops':'%di:s32'}, 'ub': 1 },
+ 'usleep_range': { 'args_x86_64': {'min':'%di:s32', 'max':'%si:s32'}, 'ub': 1 },
+ 'mutex_lock_slowpath': { 'func':'__mutex_lock_slowpath', 'ub': 1 },
+ 'acpi_os_stall': {'ub': 1},
+ # ACPI
+ 'acpi_resume_power_resources': {},
+ 'acpi_ps_parse_aml': {},
+ # filesystem
+ 'ext4_sync_fs': {},
+ # 80211
+ 'iwlagn_mac_start': {},
+ 'iwlagn_alloc_bcast_station': {},
+ 'iwl_trans_pcie_start_hw': {},
+ 'iwl_trans_pcie_start_fw': {},
+ 'iwl_run_init_ucode': {},
+ 'iwl_load_ucode_wait_alive': {},
+ 'iwl_alive_start': {},
+ 'iwlagn_mac_stop': {},
+ 'iwlagn_mac_suspend': {},
+ 'iwlagn_mac_resume': {},
+ 'iwlagn_mac_add_interface': {},
+ 'iwlagn_mac_remove_interface': {},
+ 'iwlagn_mac_change_interface': {},
+ 'iwlagn_mac_config': {},
+ 'iwlagn_configure_filter': {},
+ 'iwlagn_mac_hw_scan': {},
+ 'iwlagn_bss_info_changed': {},
+ 'iwlagn_mac_channel_switch': {},
+ 'iwlagn_mac_flush': {},
+ # ATA
+ 'ata_eh_recover': { 'args_x86_64': {'port':'+36(%di):s32'} },
+ # i915
+ 'i915_gem_resume': {},
+ 'i915_restore_state': {},
+ 'intel_opregion_setup': {},
+ 'g4x_pre_enable_dp': {},
+ 'vlv_pre_enable_dp': {},
+ 'chv_pre_enable_dp': {},
+ 'g4x_enable_dp': {},
+ 'vlv_enable_dp': {},
+ 'intel_hpd_init': {},
+ 'intel_opregion_register': {},
+ 'intel_dp_detect': {},
+ 'intel_hdmi_detect': {},
+ 'intel_opregion_init': {},
+ 'intel_fbdev_set_suspend': {},
+ }
+ cgblacklist = []
+ kprobes = dict()
+ timeformat = '%.3f'
+ cmdline = '%s %s' % \
+ (os.path.basename(sys.argv[0]), ' '.join(sys.argv[1:]))
+ def __init__(self):
+ self.archargs = 'args_'+platform.machine()
+ self.hostname = platform.node()
+ if(self.hostname == ''):
+ self.hostname = 'localhost'
+ rtc = "rtc0"
+ if os.path.exists('/dev/rtc'):
+ rtc = os.readlink('/dev/rtc')
+ rtc = '/sys/class/rtc/'+rtc
+ if os.path.exists(rtc) and os.path.exists(rtc+'/date') and \
+ os.path.exists(rtc+'/time') and os.path.exists(rtc+'/wakealarm'):
+ self.rtcpath = rtc
+ if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()):
+ self.ansi = True
+ self.testdir = datetime.now().strftime('suspend-%y%m%d-%H%M%S')
+ def vprint(self, msg):
+ self.logmsg += msg+'\n'
+ if(self.verbose):
+ print(msg)
+ def rootCheck(self, fatal=True):
+ if(os.access(self.powerfile, os.W_OK)):
+ return True
+ if fatal:
+ msg = 'This command requires sysfs mount and root access'
+ print('ERROR: %s\n') % msg
+ self.outputResult({'error':msg})
+ sys.exit()
+ return False
+ def rootUser(self, fatal=False):
+ if 'USER' in os.environ and os.environ['USER'] == 'root':
+ return True
+ if fatal:
+ msg = 'This command must be run as root'
+ print('ERROR: %s\n') % msg
+ self.outputResult({'error':msg})
+ sys.exit()
+ return False
+ def getExec(self, cmd):
+ dirlist = ['/sbin', '/bin', '/usr/sbin', '/usr/bin',
+ '/usr/local/sbin', '/usr/local/bin']
+ for path in dirlist:
+ cmdfull = os.path.join(path, cmd)
+ if os.path.exists(cmdfull):
+ return cmdfull
+ return ''
+ def setPrecision(self, num):
+ if num < 0 or num > 6:
+ return
+ self.timeformat = '%.{0}f'.format(num)
+ def setOutputFolder(self, value):
+ args = dict()
+ n = datetime.now()
+ args['date'] = n.strftime('%y%m%d')
+ args['time'] = n.strftime('%H%M%S')
+ args['hostname'] = args['host'] = self.hostname
+ return value.format(**args)
+ def setOutputFile(self):
+ if self.dmesgfile != '':
+ m = re.match('(?P<name>.*)_dmesg\.txt.*', self.dmesgfile)
+ if(m):
+ self.htmlfile = m.group('name')+'.html'
+ if self.ftracefile != '':
+ m = re.match('(?P<name>.*)_ftrace\.txt.*', self.ftracefile)
+ if(m):
+ self.htmlfile = m.group('name')+'.html'
+ def systemInfo(self, info):
+ p = c = m = b = ''
+ if 'baseboard-manufacturer' in info:
+ m = info['baseboard-manufacturer']
+ elif 'system-manufacturer' in info:
+ m = info['system-manufacturer']
+ if 'baseboard-product-name' in info:
+ p = info['baseboard-product-name']
+ elif 'system-product-name' in info:
+ p = info['system-product-name']
+ if 'processor-version' in info:
+ c = info['processor-version']
+ if 'bios-version' in info:
+ b = info['bios-version']
+ self.sysstamp = '# sysinfo | man:%s | plat:%s | cpu:%s | bios:%s | numcpu:%d | memsz:%d | memfr:%d' % \
+ (m, p, c, b, self.cpucount, self.memtotal, self.memfree)
+ def printSystemInfo(self, fatal=False):
+ self.rootCheck(True)
+ out = dmidecode(self.mempath, fatal)
+ if len(out) < 1:
+ return
+ fmt = '%-24s: %s'
+ for name in sorted(out):
+ print fmt % (name, out[name])
+ print fmt % ('cpucount', ('%d' % self.cpucount))
+ print fmt % ('memtotal', ('%d kB' % self.memtotal))
+ print fmt % ('memfree', ('%d kB' % self.memfree))
+ def cpuInfo(self):
+ self.cpucount = 0
+ fp = open('/proc/cpuinfo', 'r')
+ for line in fp:
+ if re.match('^processor[ \t]*:[ \t]*[0-9]*', line):
+ self.cpucount += 1
+ fp.close()
+ fp = open('/proc/meminfo', 'r')
+ for line in fp:
+ m = re.match('^MemTotal:[ \t]*(?P<sz>[0-9]*) *kB', line)
+ if m:
+ self.memtotal = int(m.group('sz'))
+ m = re.match('^MemFree:[ \t]*(?P<sz>[0-9]*) *kB', line)
+ if m:
+ self.memfree = int(m.group('sz'))
+ fp.close()
+ def initTestOutput(self, name):
+ self.prefix = self.hostname
+ v = open('/proc/version', 'r').read().strip()
+ kver = string.split(v)[2]
+ fmt = name+'-%m%d%y-%H%M%S'
+ testtime = datetime.now().strftime(fmt)
+ self.teststamp = \
+ '# '+testtime+' '+self.prefix+' '+self.suspendmode+' '+kver
+ ext = ''
+ if self.gzip:
+ ext = '.gz'
+ self.dmesgfile = \
+ self.testdir+'/'+self.prefix+'_'+self.suspendmode+'_dmesg.txt'+ext
+ self.ftracefile = \
+ self.testdir+'/'+self.prefix+'_'+self.suspendmode+'_ftrace.txt'+ext
+ self.htmlfile = \
+ self.testdir+'/'+self.prefix+'_'+self.suspendmode+'.html'
+ if not os.path.isdir(self.testdir):
+ os.mkdir(self.testdir)
+ def getValueList(self, value):
+ out = []
+ for i in value.split(','):
+ if i.strip():
+ out.append(i.strip())
+ return out
+ def setDeviceFilter(self, value):
+ self.devicefilter = self.getValueList(value)
+ def setCallgraphFilter(self, value):
+ self.cgfilter = self.getValueList(value)
+ def setCallgraphBlacklist(self, file):
+ self.cgblacklist = self.listFromFile(file)
+ def rtcWakeAlarmOn(self):
+ call('echo 0 > '+self.rtcpath+'/wakealarm', shell=True)
+ nowtime = open(self.rtcpath+'/since_epoch', 'r').read().strip()
+ if nowtime:
+ nowtime = int(nowtime)
+ else:
+ # if hardware time fails, use the software time
+ nowtime = int(datetime.now().strftime('%s'))
+ alarm = nowtime + self.rtcwaketime
+ call('echo %d > %s/wakealarm' % (alarm, self.rtcpath), shell=True)
+ def rtcWakeAlarmOff(self):
+ call('echo 0 > %s/wakealarm' % self.rtcpath, shell=True)
+ def initdmesg(self):
+ # get the latest time stamp from the dmesg log
+ fp = Popen('dmesg', stdout=PIPE).stdout
+ ktime = '0'
+ for line in fp:
+ line = line.replace('\r\n', '')
+ idx = line.find('[')
+ if idx > 1:
+ line = line[idx:]
+ m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+ if(m):
+ ktime = m.group('ktime')
+ fp.close()
+ self.dmesgstart = float(ktime)
+ def getdmesg(self, fwdata=[]):
+ op = self.writeDatafileHeader(sysvals.dmesgfile, fwdata)
+ # store all new dmesg lines since initdmesg was called
+ fp = Popen('dmesg', stdout=PIPE).stdout
+ for line in fp:
+ line = line.replace('\r\n', '')
+ idx = line.find('[')
+ if idx > 1:
+ line = line[idx:]
+ m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+ if(not m):
+ continue
+ ktime = float(m.group('ktime'))
+ if ktime > self.dmesgstart:
+ op.write(line)
+ fp.close()
+ op.close()
+ def listFromFile(self, file):
+ list = []
+ fp = open(file)
+ for i in fp.read().split('\n'):
+ i = i.strip()
+ if i and i[0] != '#':
+ list.append(i)
+ fp.close()
+ return list
+ def addFtraceFilterFunctions(self, file):
+ for i in self.listFromFile(file):
+ if len(i) < 2:
+ continue
+ self.tracefuncs[i] = dict()
+ def getFtraceFilterFunctions(self, current):
+ self.rootCheck(True)
+ if not current:
+ call('cat '+self.tpath+'available_filter_functions', shell=True)
+ return
+ master = self.listFromFile(self.tpath+'available_filter_functions')
+ for i in self.tracefuncs:
+ if 'func' in self.tracefuncs[i]:
+ i = self.tracefuncs[i]['func']
+ if i in master:
+ print i
+ else:
+ print self.colorText(i)
+ def setFtraceFilterFunctions(self, list):
+ master = self.listFromFile(self.tpath+'available_filter_functions')
+ flist = ''
+ for i in list:
+ if i not in master:
+ continue
+ if ' [' in i:
+ flist += i.split(' ')[0]+'\n'
+ else:
+ flist += i+'\n'
+ fp = open(self.tpath+'set_graph_function', 'w')
+ fp.write(flist)
+ fp.close()
+ def basicKprobe(self, name):
+ self.kprobes[name] = {'name': name,'func': name,'args': dict(),'format': name}
+ def defaultKprobe(self, name, kdata):
+ k = kdata
+ for field in ['name', 'format', 'func']:
+ if field not in k:
+ k[field] = name
+ if self.archargs in k:
+ k['args'] = k[self.archargs]
+ else:
+ k['args'] = dict()
+ k['format'] = name
+ self.kprobes[name] = k
+ def kprobeColor(self, name):
+ if name not in self.kprobes or 'color' not in self.kprobes[name]:
+ return ''
+ return self.kprobes[name]['color']
+ def kprobeDisplayName(self, name, dataraw):
+ if name not in self.kprobes:
+ self.basicKprobe(name)
+ data = ''
+ quote=0
+ # first remvoe any spaces inside quotes, and the quotes
+ for c in dataraw:
+ if c == '"':
+ quote = (quote + 1) % 2
+ if quote and c == ' ':
+ data += '_'
+ elif c != '"':
+ data += c
+ fmt, args = self.kprobes[name]['format'], self.kprobes[name]['args']
+ arglist = dict()
+ # now process the args
+ for arg in sorted(args):
+ arglist[arg] = ''
+ m = re.match('.* '+arg+'=(?P<arg>.*) ', data);
+ if m:
+ arglist[arg] = m.group('arg')
+ else:
+ m = re.match('.* '+arg+'=(?P<arg>.*)', data);
+ if m:
+ arglist[arg] = m.group('arg')
+ out = fmt.format(**arglist)
+ out = out.replace(' ', '_').replace('"', '')
+ return out
+ def kprobeText(self, kname, kprobe):
+ name = fmt = func = kname
+ args = dict()
+ if 'name' in kprobe:
+ name = kprobe['name']
+ if 'format' in kprobe:
+ fmt = kprobe['format']
+ if 'func' in kprobe:
+ func = kprobe['func']
+ if self.archargs in kprobe:
+ args = kprobe[self.archargs]
+ if 'args' in kprobe:
+ args = kprobe['args']
+ if re.findall('{(?P<n>[a-z,A-Z,0-9]*)}', func):
+ doError('Kprobe "%s" has format info in the function name "%s"' % (name, func))
+ for arg in re.findall('{(?P<n>[a-z,A-Z,0-9]*)}', fmt):
+ if arg not in args:
+ doError('Kprobe "%s" is missing argument "%s"' % (name, arg))
+ val = 'p:%s_cal %s' % (name, func)
+ for i in sorted(args):
+ val += ' %s=%s' % (i, args[i])
+ val += '\nr:%s_ret %s $retval\n' % (name, func)
+ return val
+ def addKprobes(self, output=False):
+ if len(self.kprobes) < 1:
+ return
+ if output:
+ print(' kprobe functions in this kernel:')
+ # first test each kprobe
+ rejects = []
+ # sort kprobes: trace, ub-dev, custom, dev
+ kpl = [[], [], [], []]
+ linesout = len(self.kprobes)
+ for name in sorted(self.kprobes):
+ res = self.colorText('YES', 32)
+ if not self.testKprobe(name, self.kprobes[name]):
+ res = self.colorText('NO')
+ rejects.append(name)
+ else:
+ if name in self.tracefuncs:
+ kpl[0].append(name)
+ elif name in self.dev_tracefuncs:
+ if 'ub' in self.dev_tracefuncs[name]:
+ kpl[1].append(name)
+ else:
+ kpl[3].append(name)
+ else:
+ kpl[2].append(name)
+ if output:
+ print(' %s: %s' % (name, res))
+ kplist = kpl[0] + kpl[1] + kpl[2] + kpl[3]
+ # remove all failed ones from the list
+ for name in rejects:
+ self.kprobes.pop(name)
+ # set the kprobes all at once
+ self.fsetVal('', 'kprobe_events')
+ kprobeevents = ''
+ for kp in kplist:
+ kprobeevents += self.kprobeText(kp, self.kprobes[kp])
+ self.fsetVal(kprobeevents, 'kprobe_events')
+ if output:
+ check = self.fgetVal('kprobe_events')
+ linesack = (len(check.split('\n')) - 1) / 2
+ print(' kprobe functions enabled: %d/%d' % (linesack, linesout))
+ self.fsetVal('1', 'events/kprobes/enable')
+ def testKprobe(self, kname, kprobe):
+ self.fsetVal('0', 'events/kprobes/enable')
+ kprobeevents = self.kprobeText(kname, kprobe)
+ if not kprobeevents:
+ return False
+ try:
+ self.fsetVal(kprobeevents, 'kprobe_events')
+ check = self.fgetVal('kprobe_events')
+ except:
+ return False
+ linesout = len(kprobeevents.split('\n'))
+ linesack = len(check.split('\n'))
+ if linesack < linesout:
+ return False
+ return True
+ def setVal(self, val, file, mode='w'):
+ if not os.path.exists(file):
+ return False
+ try:
+ fp = open(file, mode, 0)
+ fp.write(val)
+ fp.flush()
+ fp.close()
+ except:
+ return False
+ return True
+ def fsetVal(self, val, path, mode='w'):
+ return self.setVal(val, self.tpath+path, mode)
+ def getVal(self, file):
+ res = ''
+ if not os.path.exists(file):
+ return res
+ try:
+ fp = open(file, 'r')
+ res = fp.read()
+ fp.close()
+ except:
+ pass
+ return res
+ def fgetVal(self, path):
+ return self.getVal(self.tpath+path)
+ def cleanupFtrace(self):
+ if(self.usecallgraph or self.usetraceevents or self.usedevsrc):
+ self.fsetVal('0', 'events/kprobes/enable')
+ self.fsetVal('', 'kprobe_events')
+ self.fsetVal('1024', 'buffer_size_kb')
+ def setupAllKprobes(self):
+ for name in self.tracefuncs:
+ self.defaultKprobe(name, self.tracefuncs[name])
+ for name in self.dev_tracefuncs:
+ self.defaultKprobe(name, self.dev_tracefuncs[name])
+ def isCallgraphFunc(self, name):
+ if len(self.tracefuncs) < 1 and self.suspendmode == 'command':
+ return True
+ for i in self.tracefuncs:
+ if 'func' in self.tracefuncs[i]:
+ f = self.tracefuncs[i]['func']
+ else:
+ f = i
+ if name == f:
+ return True
+ return False
+ def initFtrace(self):
+ self.printSystemInfo(False)
+ print('INITIALIZING FTRACE...')
+ # turn trace off
+ self.fsetVal('0', 'tracing_on')
+ self.cleanupFtrace()
+ # set the trace clock to global
+ self.fsetVal('global', 'trace_clock')
+ self.fsetVal('nop', 'current_tracer')
+ # set trace buffer to an appropriate value
+ cpus = max(1, self.cpucount)
+ if self.bufsize > 0:
+ tgtsize = self.bufsize
+ elif self.usecallgraph or self.usedevsrc:
+ tgtsize = min(self.memfree, 3*1024*1024)
+ else:
+ tgtsize = 65536
+ while not self.fsetVal('%d' % (tgtsize / cpus), 'buffer_size_kb'):
+ # if the size failed to set, lower it and keep trying
+ tgtsize -= 65536
+ if tgtsize < 65536:
+ tgtsize = int(self.fgetVal('buffer_size_kb')) * cpus
+ break
+ print 'Setting trace buffers to %d kB (%d kB per cpu)' % (tgtsize, tgtsize/cpus)
+ # initialize the callgraph trace
+ if(self.usecallgraph):
+ # set trace type
+ self.fsetVal('function_graph', 'current_tracer')
+ self.fsetVal('', 'set_ftrace_filter')
+ # set trace format options
+ self.fsetVal('print-parent', 'trace_options')
+ self.fsetVal('funcgraph-abstime', 'trace_options')
+ self.fsetVal('funcgraph-cpu', 'trace_options')
+ self.fsetVal('funcgraph-duration', 'trace_options')
+ self.fsetVal('funcgraph-proc', 'trace_options')
+ self.fsetVal('funcgraph-tail', 'trace_options')
+ self.fsetVal('nofuncgraph-overhead', 'trace_options')
+ self.fsetVal('context-info', 'trace_options')
+ self.fsetVal('graph-time', 'trace_options')
+ self.fsetVal('%d' % self.max_graph_depth, 'max_graph_depth')
+ cf = ['dpm_run_callback']
+ if(self.usetraceevents):
+ cf += ['dpm_prepare', 'dpm_complete']
+ for fn in self.tracefuncs:
+ if 'func' in self.tracefuncs[fn]:
+ cf.append(self.tracefuncs[fn]['func'])
+ else:
+ cf.append(fn)
+ self.setFtraceFilterFunctions(cf)
+ # initialize the kprobe trace
+ elif self.usekprobes:
+ for name in self.tracefuncs:
+ self.defaultKprobe(name, self.tracefuncs[name])
+ if self.usedevsrc:
+ for name in self.dev_tracefuncs:
+ self.defaultKprobe(name, self.dev_tracefuncs[name])
+ print('INITIALIZING KPROBES...')
+ self.addKprobes(self.verbose)
+ if(self.usetraceevents):
+ # turn trace events on
+ events = iter(self.traceevents)
+ for e in events:
+ self.fsetVal('1', 'events/power/'+e+'/enable')
+ # clear the trace buffer
+ self.fsetVal('', 'trace')
+ def verifyFtrace(self):
+ # files needed for any trace data
+ files = ['buffer_size_kb', 'current_tracer', 'trace', 'trace_clock',
+ 'trace_marker', 'trace_options', 'tracing_on']
+ # files needed for callgraph trace data
+ tp = self.tpath
+ if(self.usecallgraph):
+ files += [
+ 'available_filter_functions',
+ 'set_ftrace_filter',
+ 'set_graph_function'
+ ]
+ for f in files:
+ if(os.path.exists(tp+f) == False):
+ return False
+ return True
+ def verifyKprobes(self):
+ # files needed for kprobes to work
+ files = ['kprobe_events', 'events']
+ tp = self.tpath
+ for f in files:
+ if(os.path.exists(tp+f) == False):
+ return False
+ return True
+ def colorText(self, str, color=31):
+ if not self.ansi:
+ return str
+ return '\x1B[%d;40m%s\x1B[m' % (color, str)
+ def writeDatafileHeader(self, filename, fwdata=[]):
+ fp = self.openlog(filename, 'w')
+ fp.write('%s\n%s\n# command | %s\n' % (self.teststamp, self.sysstamp, self.cmdline))
+ if(self.suspendmode == 'mem' or self.suspendmode == 'command'):
+ for fw in fwdata:
+ if(fw):
+ fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1]))
+ return fp
+ def sudouser(self, dir):
+ if os.path.exists(dir) and os.getuid() == 0 and \
+ 'SUDO_USER' in os.environ:
+ cmd = 'chown -R {0}:{0} {1} > /dev/null 2>&1'
+ call(cmd.format(os.environ['SUDO_USER'], dir), shell=True)
+ def outputResult(self, testdata, num=0):
+ if not self.result:
+ return
+ n = ''
+ if num > 0:
+ n = '%d' % num
+ fp = open(self.result, 'a')
+ if 'error' in testdata:
+ fp.write('result%s: fail\n' % n)
+ fp.write('error%s: %s\n' % (n, testdata['error']))
+ else:
+ fp.write('result%s: pass\n' % n)
+ for v in ['suspend', 'resume', 'boot', 'lastinit']:
+ if v in testdata:
+ fp.write('%s%s: %.3f\n' % (v, n, testdata[v]))
+ for v in ['fwsuspend', 'fwresume']:
+ if v in testdata:
+ fp.write('%s%s: %.3f\n' % (v, n, testdata[v] / 1000000.0))
+ if 'bugurl' in testdata:
+ fp.write('url%s: %s\n' % (n, testdata['bugurl']))
+ fp.close()
+ self.sudouser(self.result)
+ def configFile(self, file):
+ dir = os.path.dirname(os.path.realpath(__file__))
+ if os.path.exists(file):
+ return file
+ elif os.path.exists(dir+'/'+file):
+ return dir+'/'+file
+ elif os.path.exists(dir+'/config/'+file):
+ return dir+'/config/'+file
+ return ''
+ def openlog(self, filename, mode):
+ isgz = self.gzip
+ if mode == 'r':
+ try:
+ with gzip.open(filename, mode+'b') as fp:
+ test = fp.read(64)
+ isgz = True
+ except:
+ isgz = False
+ if isgz:
+ return gzip.open(filename, mode+'b')
+ return open(filename, mode)
+
+sysvals = SystemValues()
+switchvalues = ['enable', 'disable', 'on', 'off', 'true', 'false', '1', '0']
+switchoff = ['disable', 'off', 'false', '0']
+suspendmodename = {
+ 'freeze': 'Freeze (S0)',
+ 'standby': 'Standby (S1)',
+ 'mem': 'Suspend (S3)',
+ 'disk': 'Hibernate (S4)'
+}
+
+# Class: DevProps
+# Description:
+# Simple class which holds property values collected
+# for all the devices used in the timeline.
+class DevProps:
+ syspath = ''
+ altname = ''
+ async = True
+ xtraclass = ''
+ xtrainfo = ''
+ def out(self, dev):
+ return '%s,%s,%d;' % (dev, self.altname, self.async)
+ def debug(self, dev):
+ print '%s:\n\taltname = %s\n\t async = %s' % (dev, self.altname, self.async)
+ def altName(self, dev):
+ if not self.altname or self.altname == dev:
+ return dev
+ return '%s [%s]' % (self.altname, dev)
+ def xtraClass(self):
+ if self.xtraclass:
+ return ' '+self.xtraclass
+ if not self.async:
+ return ' sync'
+ return ''
+ def xtraInfo(self):
+ if self.xtraclass:
+ return ' '+self.xtraclass
+ if self.async:
+ return ' async_device'
+ return ' sync_device'
+
+# Class: DeviceNode
+# Description:
+# A container used to create a device hierachy, with a single root node
+# and a tree of child nodes. Used by Data.deviceTopology()
+class DeviceNode:
+ name = ''
+ children = 0
+ depth = 0
+ def __init__(self, nodename, nodedepth):
+ self.name = nodename
+ self.children = []
+ self.depth = nodedepth
+
+# Class: Data
+# Description:
+# The primary container for suspend/resume test data. There is one for
+# each test run. The data is organized into a cronological hierarchy:
+# Data.dmesg {
+# phases {
+# 10 sequential, non-overlapping phases of S/R
+# contents: times for phase start/end, order/color data for html
+# devlist {
+# device callback or action list for this phase
+# device {
+# a single device callback or generic action
+# contents: start/stop times, pid/cpu/driver info
+# parents/children, html id for timeline/callgraph
+# optionally includes an ftrace callgraph
+# optionally includes dev/ps data
+# }
+# }
+# }
+# }
+#
+class Data:
+ dmesg = {} # root data structure
+ phases = [] # ordered list of phases
+ start = 0.0 # test start
+ end = 0.0 # test end
+ tSuspended = 0.0 # low-level suspend start
+ tResumed = 0.0 # low-level resume start
+ tKernSus = 0.0 # kernel level suspend start
+ tKernRes = 0.0 # kernel level resume end
+ tLow = 0.0 # time spent in low-level suspend (standby/freeze)
+ fwValid = False # is firmware data available
+ fwSuspend = 0 # time spent in firmware suspend
+ fwResume = 0 # time spent in firmware resume
+ dmesgtext = [] # dmesg text file in memory
+ pstl = 0 # process timeline
+ testnumber = 0
+ idstr = ''
+ html_device_id = 0
+ stamp = 0
+ outfile = ''
+ devpids = []
+ kerror = False
+ def __init__(self, num):
+ idchar = 'abcdefghij'
+ self.pstl = dict()
+ self.testnumber = num
+ self.idstr = idchar[num]
+ self.dmesgtext = []
+ self.phases = []
+ self.dmesg = { # fixed list of 10 phases
+ 'suspend_prepare': {'list': dict(), 'start': -1.0, 'end': -1.0,
+ 'row': 0, 'color': '#CCFFCC', 'order': 0},
+ 'suspend': {'list': dict(), 'start': -1.0, 'end': -1.0,
+ 'row': 0, 'color': '#88FF88', 'order': 1},
+ 'suspend_late': {'list': dict(), 'start': -1.0, 'end': -1.0,
+ 'row': 0, 'color': '#00AA00', 'order': 2},
+ 'suspend_noirq': {'list': dict(), 'start': -1.0, 'end': -1.0,
+ 'row': 0, 'color': '#008888', 'order': 3},
+ 'suspend_machine': {'list': dict(), 'start': -1.0, 'end': -1.0,
+ 'row': 0, 'color': '#0000FF', 'order': 4},
+ 'resume_machine': {'list': dict(), 'start': -1.0, 'end': -1.0,
+ 'row': 0, 'color': '#FF0000', 'order': 5},
+ 'resume_noirq': {'list': dict(), 'start': -1.0, 'end': -1.0,
+ 'row': 0, 'color': '#FF9900', 'order': 6},
+ 'resume_early': {'list': dict(), 'start': -1.0, 'end': -1.0,
+ 'row': 0, 'color': '#FFCC00', 'order': 7},
+ 'resume': {'list': dict(), 'start': -1.0, 'end': -1.0,
+ 'row': 0, 'color': '#FFFF88', 'order': 8},
+ 'resume_complete': {'list': dict(), 'start': -1.0, 'end': -1.0,
+ 'row': 0, 'color': '#FFFFCC', 'order': 9}
+ }
+ self.phases = self.sortedPhases()
+ self.devicegroups = []
+ for phase in self.phases:
+ self.devicegroups.append([phase])
+ self.errorinfo = {'suspend':[],'resume':[]}
+ def extractErrorInfo(self):
+ elist = {
+ 'HWERROR' : '.*\[ *Hardware Error *\].*',
+ 'FWBUG' : '.*\[ *Firmware Bug *\].*',
+ 'BUG' : '.*BUG.*',
+ 'ERROR' : '.*ERROR.*',
+ 'WARNING' : '.*WARNING.*',
+ 'IRQ' : '.*genirq: .*',
+ 'TASKFAIL': '.*Freezing of tasks failed.*',
+ }
+ lf = sysvals.openlog(sysvals.dmesgfile, 'r')
+ i = 0
+ list = []
+ for line in lf:
+ i += 1
+ m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+ if not m:
+ continue
+ t = float(m.group('ktime'))
+ if t < self.start or t > self.end:
+ continue
+ dir = 'suspend' if t < self.tSuspended else 'resume'
+ msg = m.group('msg')
+ for err in elist:
+ if re.match(elist[err], msg):
+ list.append((err, dir, t, i, i))
+ self.kerror = True
+ break
+ for e in list:
+ type, dir, t, idx1, idx2 = e
+ sysvals.vprint('kernel %s found in %s at %f' % (type, dir, t))
+ self.errorinfo[dir].append((type, t, idx1, idx2))
+ if self.kerror:
+ sysvals.dmesglog = True
+ lf.close()
+ def setStart(self, time):
+ self.start = time
+ def setEnd(self, time):
+ self.end = time
+ def isTraceEventOutsideDeviceCalls(self, pid, time):
+ for phase in self.phases:
+ list = self.dmesg[phase]['list']
+ for dev in list:
+ d = list[dev]
+ if(d['pid'] == pid and time >= d['start'] and
+ time < d['end']):
+ return False
+ return True
+ def phaseCollision(self, phase, isbegin, line):
+ key = 'end'
+ if isbegin:
+ key = 'start'
+ if self.dmesg[phase][key] >= 0:
+ sysvals.vprint('IGNORE: %s' % line.strip())
+ return True
+ return False
+ def sourcePhase(self, start):
+ for phase in self.phases:
+ pend = self.dmesg[phase]['end']
+ if start <= pend:
+ return phase
+ return 'resume_complete'
+ def sourceDevice(self, phaselist, start, end, pid, type):
+ tgtdev = ''
+ for phase in phaselist:
+ list = self.dmesg[phase]['list']
+ for devname in list:
+ dev = list[devname]
+ # pid must match
+ if dev['pid'] != pid:
+ continue
+ devS = dev['start']
+ devE = dev['end']
+ if type == 'device':
+ # device target event is entirely inside the source boundary
+ if(start < devS or start >= devE or end <= devS or end > devE):
+ continue
+ elif type == 'thread':
+ # thread target event will expand the source boundary
+ if start < devS:
+ dev['start'] = start
+ if end > devE:
+ dev['end'] = end
+ tgtdev = dev
+ break
+ return tgtdev
+ def addDeviceFunctionCall(self, displayname, kprobename, proc, pid, start, end, cdata, rdata):
+ # try to place the call in a device
+ tgtdev = self.sourceDevice(self.phases, start, end, pid, 'device')
+ # calls with device pids that occur outside device bounds are dropped
+ # TODO: include these somehow
+ if not tgtdev and pid in self.devpids:
+ return False
+ # try to place the call in a thread
+ if not tgtdev:
+ tgtdev = self.sourceDevice(self.phases, start, end, pid, 'thread')
+ # create new thread blocks, expand as new calls are found
+ if not tgtdev:
+ if proc == '<...>':
+ threadname = 'kthread-%d' % (pid)
+ else:
+ threadname = '%s-%d' % (proc, pid)
+ tgtphase = self.sourcePhase(start)
+ self.newAction(tgtphase, threadname, pid, '', start, end, '', ' kth', '')
+ return self.addDeviceFunctionCall(displayname, kprobename, proc, pid, start, end, cdata, rdata)
+ # this should not happen
+ if not tgtdev:
+ sysvals.vprint('[%f - %f] %s-%d %s %s %s' % \
+ (start, end, proc, pid, kprobename, cdata, rdata))
+ return False
+ # place the call data inside the src element of the tgtdev
+ if('src' not in tgtdev):
+ tgtdev['src'] = []
+ dtf = sysvals.dev_tracefuncs
+ ubiquitous = False
+ if kprobename in dtf and 'ub' in dtf[kprobename]:
+ ubiquitous = True
+ title = cdata+' '+rdata
+ mstr = '\(.*\) *(?P<args>.*) *\((?P<caller>.*)\+.* arg1=(?P<ret>.*)'
+ m = re.match(mstr, title)
+ if m:
+ c = m.group('caller')
+ a = m.group('args').strip()
+ r = m.group('ret')
+ if len(r) > 6:
+ r = ''
+ else:
+ r = 'ret=%s ' % r
+ if ubiquitous and c in dtf and 'ub' in dtf[c]:
+ return False
+ color = sysvals.kprobeColor(kprobename)
+ e = DevFunction(displayname, a, c, r, start, end, ubiquitous, proc, pid, color)
+ tgtdev['src'].append(e)
+ return True
+ def overflowDevices(self):
+ # get a list of devices that extend beyond the end of this test run
+ devlist = []
+ for phase in self.phases:
+ list = self.dmesg[phase]['list']
+ for devname in list:
+ dev = list[devname]
+ if dev['end'] > self.end:
+ devlist.append(dev)
+ return devlist
+ def mergeOverlapDevices(self, devlist):
+ # merge any devices that overlap devlist
+ for dev in devlist:
+ devname = dev['name']
+ for phase in self.phases:
+ list = self.dmesg[phase]['list']
+ if devname not in list:
+ continue
+ tdev = list[devname]
+ o = min(dev['end'], tdev['end']) - max(dev['start'], tdev['start'])
+ if o <= 0:
+ continue
+ dev['end'] = tdev['end']
+ if 'src' not in dev or 'src' not in tdev:
+ continue
+ dev['src'] += tdev['src']
+ del list[devname]
+ def usurpTouchingThread(self, name, dev):
+ # the caller test has priority of this thread, give it to him
+ for phase in self.phases:
+ list = self.dmesg[phase]['list']
+ if name in list:
+ tdev = list[name]
+ if tdev['start'] - dev['end'] < 0.1:
+ dev['end'] = tdev['end']
+ if 'src' not in dev:
+ dev['src'] = []
+ if 'src' in tdev:
+ dev['src'] += tdev['src']
+ del list[name]
+ break
+ def stitchTouchingThreads(self, testlist):
+ # merge any threads between tests that touch
+ for phase in self.phases:
+ list = self.dmesg[phase]['list']
+ for devname in list:
+ dev = list[devname]
+ if 'htmlclass' not in dev or 'kth' not in dev['htmlclass']:
+ continue
+ for data in testlist:
+ data.usurpTouchingThread(devname, dev)
+ def optimizeDevSrc(self):
+ # merge any src call loops to reduce timeline size
+ for phase in self.phases:
+ list = self.dmesg[phase]['list']
+ for dev in list:
+ if 'src' not in list[dev]:
+ continue
+ src = list[dev]['src']
+ p = 0
+ for e in sorted(src, key=lambda event: event.time):
+ if not p or not e.repeat(p):
+ p = e
+ continue
+ # e is another iteration of p, move it into p
+ p.end = e.end
+ p.length = p.end - p.time
+ p.count += 1
+ src.remove(e)
+ def trimTimeVal(self, t, t0, dT, left):
+ if left:
+ if(t > t0):
+ if(t - dT < t0):
+ return t0
+ return t - dT
+ else:
+ return t
+ else:
+ if(t < t0 + dT):
+ if(t > t0):
+ return t0 + dT
+ return t + dT
+ else:
+ return t
+ def trimTime(self, t0, dT, left):
+ self.tSuspended = self.trimTimeVal(self.tSuspended, t0, dT, left)
+ self.tResumed = self.trimTimeVal(self.tResumed, t0, dT, left)
+ self.start = self.trimTimeVal(self.start, t0, dT, left)
+ self.tKernSus = self.trimTimeVal(self.tKernSus, t0, dT, left)
+ self.tKernRes = self.trimTimeVal(self.tKernRes, t0, dT, left)
+ self.end = self.trimTimeVal(self.end, t0, dT, left)
+ for phase in self.phases:
+ p = self.dmesg[phase]
+ p['start'] = self.trimTimeVal(p['start'], t0, dT, left)
+ p['end'] = self.trimTimeVal(p['end'], t0, dT, left)
+ list = p['list']
+ for name in list:
+ d = list[name]
+ d['start'] = self.trimTimeVal(d['start'], t0, dT, left)
+ d['end'] = self.trimTimeVal(d['end'], t0, dT, left)
+ if('ftrace' in d):
+ cg = d['ftrace']
+ cg.start = self.trimTimeVal(cg.start, t0, dT, left)
+ cg.end = self.trimTimeVal(cg.end, t0, dT, left)
+ for line in cg.list:
+ line.time = self.trimTimeVal(line.time, t0, dT, left)
+ if('src' in d):
+ for e in d['src']:
+ e.time = self.trimTimeVal(e.time, t0, dT, left)
+ for dir in ['suspend', 'resume']:
+ list = []
+ for e in self.errorinfo[dir]:
+ type, tm, idx1, idx2 = e
+ tm = self.trimTimeVal(tm, t0, dT, left)
+ list.append((type, tm, idx1, idx2))
+ self.errorinfo[dir] = list
+ def normalizeTime(self, tZero):
+ # trim out any standby or freeze clock time
+ if(self.tSuspended != self.tResumed):
+ if(self.tResumed > tZero):
+ self.trimTime(self.tSuspended, \
+ self.tResumed-self.tSuspended, True)
+ else:
+ self.trimTime(self.tSuspended, \
+ self.tResumed-self.tSuspended, False)
+ def getTimeValues(self):
+ sktime = (self.dmesg['suspend_machine']['end'] - \
+ self.tKernSus) * 1000
+ rktime = (self.dmesg['resume_complete']['end'] - \
+ self.dmesg['resume_machine']['start']) * 1000
+ return (sktime, rktime)
+ def setPhase(self, phase, ktime, isbegin):
+ if(isbegin):
+ self.dmesg[phase]['start'] = ktime
+ else:
+ self.dmesg[phase]['end'] = ktime
+ def dmesgSortVal(self, phase):
+ return self.dmesg[phase]['order']
+ def sortedPhases(self):
+ return sorted(self.dmesg, key=self.dmesgSortVal)
+ def sortedDevices(self, phase):
+ list = self.dmesg[phase]['list']
+ slist = []
+ tmp = dict()
+ for devname in list:
+ dev = list[devname]
+ if dev['length'] == 0:
+ continue
+ tmp[dev['start']] = devname
+ for t in sorted(tmp):
+ slist.append(tmp[t])
+ return slist
+ def fixupInitcalls(self, phase):
+ # if any calls never returned, clip them at system resume end
+ phaselist = self.dmesg[phase]['list']
+ for devname in phaselist:
+ dev = phaselist[devname]
+ if(dev['end'] < 0):
+ for p in self.phases:
+ if self.dmesg[p]['end'] > dev['start']:
+ dev['end'] = self.dmesg[p]['end']
+ break
+ sysvals.vprint('%s (%s): callback didnt return' % (devname, phase))
+ def deviceFilter(self, devicefilter):
+ for phase in self.phases:
+ list = self.dmesg[phase]['list']
+ rmlist = []
+ for name in list:
+ keep = False
+ for filter in devicefilter:
+ if filter in name or \
+ ('drv' in list[name] and filter in list[name]['drv']):
+ keep = True
+ if not keep:
+ rmlist.append(name)
+ for name in rmlist:
+ del list[name]
+ def fixupInitcallsThatDidntReturn(self):
+ # if any calls never returned, clip them at system resume end
+ for phase in self.phases:
+ self.fixupInitcalls(phase)
+ def phaseOverlap(self, phases):
+ rmgroups = []
+ newgroup = []
+ for group in self.devicegroups:
+ for phase in phases:
+ if phase not in group:
+ continue
+ for p in group:
+ if p not in newgroup:
+ newgroup.append(p)
+ if group not in rmgroups:
+ rmgroups.append(group)
+ for group in rmgroups:
+ self.devicegroups.remove(group)
+ self.devicegroups.append(newgroup)
+ def newActionGlobal(self, name, start, end, pid=-1, color=''):
+ # which phase is this device callback or action in
+ targetphase = 'none'
+ htmlclass = ''
+ overlap = 0.0
+ phases = []
+ for phase in self.phases:
+ pstart = self.dmesg[phase]['start']
+ pend = self.dmesg[phase]['end']
+ # see if the action overlaps this phase
+ o = max(0, min(end, pend) - max(start, pstart))
+ if o > 0:
+ phases.append(phase)
+ # set the target phase to the one that overlaps most
+ if o > overlap:
+ if overlap > 0 and phase == 'post_resume':
+ continue
+ targetphase = phase
+ overlap = o
+ # if no target phase was found, pin it to the edge
+ if targetphase == 'none':
+ p0start = self.dmesg[self.phases[0]]['start']
+ if start <= p0start:
+ targetphase = self.phases[0]
+ else:
+ targetphase = self.phases[-1]
+ if pid == -2:
+ htmlclass = ' bg'
+ elif pid == -3:
+ htmlclass = ' ps'
+ if len(phases) > 1:
+ htmlclass = ' bg'
+ self.phaseOverlap(phases)
+ if targetphase in self.phases:
+ newname = self.newAction(targetphase, name, pid, '', start, end, '', htmlclass, color)
+ return (targetphase, newname)
+ return False
+ def newAction(self, phase, name, pid, parent, start, end, drv, htmlclass='', color=''):
+ # new device callback for a specific phase
+ self.html_device_id += 1
+ devid = '%s%d' % (self.idstr, self.html_device_id)
+ list = self.dmesg[phase]['list']
+ length = -1.0
+ if(start >= 0 and end >= 0):
+ length = end - start
+ if pid == -2:
+ i = 2
+ origname = name
+ while(name in list):
+ name = '%s[%d]' % (origname, i)
+ i += 1
+ list[name] = {'name': name, 'start': start, 'end': end, 'pid': pid,
+ 'par': parent, 'length': length, 'row': 0, 'id': devid, 'drv': drv }
+ if htmlclass:
+ list[name]['htmlclass'] = htmlclass
+ if color:
+ list[name]['color'] = color
+ return name
+ def deviceChildren(self, devname, phase):
+ devlist = []
+ list = self.dmesg[phase]['list']
+ for child in list:
+ if(list[child]['par'] == devname):
+ devlist.append(child)
+ return devlist
+ def printDetails(self):
+ sysvals.vprint('Timeline Details:')
+ sysvals.vprint(' test start: %f' % self.start)
+ sysvals.vprint('kernel suspend start: %f' % self.tKernSus)
+ for phase in self.phases:
+ dc = len(self.dmesg[phase]['list'])
+ sysvals.vprint(' %16s: %f - %f (%d devices)' % (phase, \
+ self.dmesg[phase]['start'], self.dmesg[phase]['end'], dc))
+ sysvals.vprint(' kernel resume end: %f' % self.tKernRes)
+ sysvals.vprint(' test end: %f' % self.end)
+ def deviceChildrenAllPhases(self, devname):
+ devlist = []
+ for phase in self.phases:
+ list = self.deviceChildren(devname, phase)
+ for dev in list:
+ if dev not in devlist:
+ devlist.append(dev)
+ return devlist
+ def masterTopology(self, name, list, depth):
+ node = DeviceNode(name, depth)
+ for cname in list:
+ # avoid recursions
+ if name == cname:
+ continue
+ clist = self.deviceChildrenAllPhases(cname)
+ cnode = self.masterTopology(cname, clist, depth+1)
+ node.children.append(cnode)
+ return node
+ def printTopology(self, node):
+ html = ''
+ if node.name:
+ info = ''
+ drv = ''
+ for phase in self.phases:
+ list = self.dmesg[phase]['list']
+ if node.name in list:
+ s = list[node.name]['start']
+ e = list[node.name]['end']
+ if list[node.name]['drv']:
+ drv = ' {'+list[node.name]['drv']+'}'
+ info += ('<li>%s: %.3fms</li>' % (phase, (e-s)*1000))
+ html += '<li><b>'+node.name+drv+'</b>'
+ if info:
+ html += '<ul>'+info+'</ul>'
+ html += '</li>'
+ if len(node.children) > 0:
+ html += '<ul>'
+ for cnode in node.children:
+ html += self.printTopology(cnode)
+ html += '</ul>'
+ return html
+ def rootDeviceList(self):
+ # list of devices graphed
+ real = []
+ for phase in self.dmesg:
+ list = self.dmesg[phase]['list']
+ for dev in list:
+ if list[dev]['pid'] >= 0 and dev not in real:
+ real.append(dev)
+ # list of top-most root devices
+ rootlist = []
+ for phase in self.dmesg:
+ list = self.dmesg[phase]['list']
+ for dev in list:
+ pdev = list[dev]['par']
+ pid = list[dev]['pid']
+ if(pid < 0 or re.match('[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)):
+ continue
+ if pdev and pdev not in real and pdev not in rootlist:
+ rootlist.append(pdev)
+ return rootlist
+ def deviceTopology(self):
+ rootlist = self.rootDeviceList()
+ master = self.masterTopology('', rootlist, 0)
+ return self.printTopology(master)
+ def selectTimelineDevices(self, widfmt, tTotal, mindevlen):
+ # only select devices that will actually show up in html
+ self.tdevlist = dict()
+ for phase in self.dmesg:
+ devlist = []
+ list = self.dmesg[phase]['list']
+ for dev in list:
+ length = (list[dev]['end'] - list[dev]['start']) * 1000
+ width = widfmt % (((list[dev]['end']-list[dev]['start'])*100)/tTotal)
+ if width != '0.000000' and length >= mindevlen:
+ devlist.append(dev)
+ self.tdevlist[phase] = devlist
+ def addHorizontalDivider(self, devname, devend):
+ phase = 'suspend_prepare'
+ self.newAction(phase, devname, -2, '', \
+ self.start, devend, '', ' sec', '')
+ if phase not in self.tdevlist:
+ self.tdevlist[phase] = []
+ self.tdevlist[phase].append(devname)
+ d = DevItem(0, phase, self.dmesg[phase]['list'][devname])
+ return d
+ def addProcessUsageEvent(self, name, times):
+ # get the start and end times for this process
+ maxC = 0
+ tlast = 0
+ start = -1
+ end = -1
+ for t in sorted(times):
+ if tlast == 0:
+ tlast = t
+ continue
+ if name in self.pstl[t]:
+ if start == -1 or tlast < start:
+ start = tlast
+ if end == -1 or t > end:
+ end = t
+ tlast = t
+ if start == -1 or end == -1:
+ return 0
+ # add a new action for this process and get the object
+ out = self.newActionGlobal(name, start, end, -3)
+ if not out:
+ return 0
+ phase, devname = out
+ dev = self.dmesg[phase]['list'][devname]
+ # get the cpu exec data
+ tlast = 0
+ clast = 0
+ cpuexec = dict()
+ for t in sorted(times):
+ if tlast == 0 or t <= start or t > end:
+ tlast = t
+ continue
+ list = self.pstl[t]
+ c = 0
+ if name in list:
+ c = list[name]
+ if c > maxC:
+ maxC = c
+ if c != clast:
+ key = (tlast, t)
+ cpuexec[key] = c
+ tlast = t
+ clast = c
+ dev['cpuexec'] = cpuexec
+ return maxC
+ def createProcessUsageEvents(self):
+ # get an array of process names
+ proclist = []
+ for t in self.pstl:
+ pslist = self.pstl[t]
+ for ps in pslist:
+ if ps not in proclist:
+ proclist.append(ps)
+ # get a list of data points for suspend and resume
+ tsus = []
+ tres = []
+ for t in sorted(self.pstl):
+ if t < self.tSuspended:
+ tsus.append(t)
+ else:
+ tres.append(t)
+ # process the events for suspend and resume
+ if len(proclist) > 0:
+ sysvals.vprint('Process Execution:')
+ for ps in proclist:
+ c = self.addProcessUsageEvent(ps, tsus)
+ if c > 0:
+ sysvals.vprint('%25s (sus): %d' % (ps, c))
+ c = self.addProcessUsageEvent(ps, tres)
+ if c > 0:
+ sysvals.vprint('%25s (res): %d' % (ps, c))
+ def debugPrint(self):
+ for p in self.phases:
+ list = self.dmesg[p]['list']
+ for devname in list:
+ dev = list[devname]
+ if 'ftrace' in dev:
+ dev['ftrace'].debugPrint(' [%s]' % devname)
+
+# Class: DevFunction
+# Description:
+# A container for kprobe function data we want in the dev timeline
+class DevFunction:
+ row = 0
+ count = 1
+ def __init__(self, name, args, caller, ret, start, end, u, proc, pid, color):
+ self.name = name
+ self.args = args
+ self.caller = caller
+ self.ret = ret
+ self.time = start
+ self.length = end - start
+ self.end = end
+ self.ubiquitous = u
+ self.proc = proc
+ self.pid = pid
+ self.color = color
+ def title(self):
+ cnt = ''
+ if self.count > 1:
+ cnt = '(x%d)' % self.count
+ l = '%0.3fms' % (self.length * 1000)
+ if self.ubiquitous:
+ title = '%s(%s)%s <- %s, %s(%s)' % \
+ (self.name, self.args, cnt, self.caller, self.ret, l)
+ else:
+ title = '%s(%s) %s%s(%s)' % (self.name, self.args, self.ret, cnt, l)
+ return title.replace('"', '')
+ def text(self):
+ if self.count > 1:
+ text = '%s(x%d)' % (self.name, self.count)
+ else:
+ text = self.name
+ return text
+ def repeat(self, tgt):
+ # is the tgt call just a repeat of this call (e.g. are we in a loop)
+ dt = self.time - tgt.end
+ # only combine calls if -all- attributes are identical
+ if tgt.caller == self.caller and \
+ tgt.name == self.name and tgt.args == self.args and \
+ tgt.proc == self.proc and tgt.pid == self.pid and \
+ tgt.ret == self.ret and dt >= 0 and \
+ dt <= sysvals.callloopmaxgap and \
+ self.length < sysvals.callloopmaxlen:
+ return True
+ return False
+
+# Class: FTraceLine
+# Description:
+# A container for a single line of ftrace data. There are six basic types:
+# callgraph line:
+# call: " dpm_run_callback() {"
+# return: " }"
+# leaf: " dpm_run_callback();"
+# trace event:
+# tracing_mark_write: SUSPEND START or RESUME COMPLETE
+# suspend_resume: phase or custom exec block data
+# device_pm_callback: device callback info
+class FTraceLine:
+ time = 0.0
+ length = 0.0
+ fcall = False
+ freturn = False
+ fevent = False
+ fkprobe = False
+ depth = 0
+ name = ''
+ type = ''
+ def __init__(self, t, m='', d=''):
+ self.time = float(t)
+ if not m and not d:
+ return
+ # is this a trace event
+ if(d == 'traceevent' or re.match('^ *\/\* *(?P<msg>.*) \*\/ *$', m)):
+ if(d == 'traceevent'):
+ # nop format trace event
+ msg = m
+ else:
+ # function_graph format trace event
+ em = re.match('^ *\/\* *(?P<msg>.*) \*\/ *$', m)
+ msg = em.group('msg')
+
+ emm = re.match('^(?P<call>.*?): (?P<msg>.*)', msg)
+ if(emm):
+ self.name = emm.group('msg')
+ self.type = emm.group('call')
+ else:
+ self.name = msg
+ km = re.match('^(?P<n>.*)_cal$', self.type)
+ if km:
+ self.fcall = True
+ self.fkprobe = True
+ self.type = km.group('n')
+ return
+ km = re.match('^(?P<n>.*)_ret$', self.type)
+ if km:
+ self.freturn = True
+ self.fkprobe = True
+ self.type = km.group('n')
+ return
+ self.fevent = True
+ return
+ # convert the duration to seconds
+ if(d):
+ self.length = float(d)/1000000
+ # the indentation determines the depth
+ match = re.match('^(?P<d> *)(?P<o>.*)$', m)
+ if(not match):
+ return
+ self.depth = self.getDepth(match.group('d'))
+ m = match.group('o')
+ # function return
+ if(m[0] == '}'):
+ self.freturn = True
+ if(len(m) > 1):
+ # includes comment with function name
+ match = re.match('^} *\/\* *(?P<n>.*) *\*\/$', m)
+ if(match):
+ self.name = match.group('n').strip()
+ # function call
+ else:
+ self.fcall = True
+ # function call with children
+ if(m[-1] == '{'):
+ match = re.match('^(?P<n>.*) *\(.*', m)
+ if(match):
+ self.name = match.group('n').strip()
+ # function call with no children (leaf)
+ elif(m[-1] == ';'):
+ self.freturn = True
+ match = re.match('^(?P<n>.*) *\(.*', m)
+ if(match):
+ self.name = match.group('n').strip()
+ # something else (possibly a trace marker)
+ else:
+ self.name = m
+ def isCall(self):
+ return self.fcall and not self.freturn
+ def isReturn(self):
+ return self.freturn and not self.fcall
+ def isLeaf(self):
+ return self.fcall and self.freturn
+ def getDepth(self, str):
+ return len(str)/2
+ def debugPrint(self, info=''):
+ if self.isLeaf():
+ print(' -- %12.6f (depth=%02d): %s(); (%.3f us) %s' % (self.time, \
+ self.depth, self.name, self.length*1000000, info))
+ elif self.freturn:
+ print(' -- %12.6f (depth=%02d): %s} (%.3f us) %s' % (self.time, \
+ self.depth, self.name, self.length*1000000, info))
+ else:
+ print(' -- %12.6f (depth=%02d): %s() { (%.3f us) %s' % (self.time, \
+ self.depth, self.name, self.length*1000000, info))
+ def startMarker(self):
+ # Is this the starting line of a suspend?
+ if not self.fevent:
+ return False
+ if sysvals.usetracemarkers:
+ if(self.name == 'SUSPEND START'):
+ return True
+ return False
+ else:
+ if(self.type == 'suspend_resume' and
+ re.match('suspend_enter\[.*\] begin', self.name)):
+ return True
+ return False
+ def endMarker(self):
+ # Is this the ending line of a resume?
+ if not self.fevent:
+ return False
+ if sysvals.usetracemarkers:
+ if(self.name == 'RESUME COMPLETE'):
+ return True
+ return False
+ else:
+ if(self.type == 'suspend_resume' and
+ re.match('thaw_processes\[.*\] end', self.name)):
+ return True
+ return False
+
+# Class: FTraceCallGraph
+# Description:
+# A container for the ftrace callgraph of a single recursive function.
+# This can be a dpm_run_callback, dpm_prepare, or dpm_complete callgraph
+# Each instance is tied to a single device in a single phase, and is
+# comprised of an ordered list of FTraceLine objects
+class FTraceCallGraph:
+ id = ''
+ start = -1.0
+ end = -1.0
+ list = []
+ invalid = False
+ depth = 0
+ pid = 0
+ name = ''
+ partial = False
+ vfname = 'missing_function_name'
+ ignore = False
+ sv = 0
+ def __init__(self, pid, sv):
+ self.start = -1.0
+ self.end = -1.0
+ self.list = []
+ self.depth = 0
+ self.pid = pid
+ self.sv = sv
+ def addLine(self, line):
+ # if this is already invalid, just leave
+ if(self.invalid):
+ if(line.depth == 0 and line.freturn):
+ return 1
+ return 0
+ # invalidate on bad depth
+ if(self.depth < 0):
+ self.invalidate(line)
+ return 0
+ # ignore data til we return to the current depth
+ if self.ignore:
+ if line.depth > self.depth:
+ return 0
+ else:
+ self.list[-1].freturn = True
+ self.list[-1].length = line.time - self.list[-1].time
+ self.ignore = False
+ # if this is a return at self.depth, no more work is needed
+ if line.depth == self.depth and line.isReturn():
+ if line.depth == 0:
+ self.end = line.time
+ return 1
+ return 0
+ # compare current depth with this lines pre-call depth
+ prelinedep = line.depth
+ if line.isReturn():
+ prelinedep += 1
+ last = 0
+ lasttime = line.time
+ if len(self.list) > 0:
+ last = self.list[-1]
+ lasttime = last.time
+ if last.isLeaf():
+ lasttime += last.length
+ # handle low misalignments by inserting returns
+ mismatch = prelinedep - self.depth
+ warning = self.sv.verbose and abs(mismatch) > 1
+ info = []
+ if mismatch < 0:
+ idx = 0
+ # add return calls to get the depth down
+ while prelinedep < self.depth:
+ self.depth -= 1
+ if idx == 0 and last and last.isCall():
+ # special case, turn last call into a leaf
+ last.depth = self.depth
+ last.freturn = True
+ last.length = line.time - last.time
+ if warning:
+ info.append(('[make leaf]', last))
+ else:
+ vline = FTraceLine(lasttime)
+ vline.depth = self.depth
+ vline.name = self.vfname
+ vline.freturn = True
+ self.list.append(vline)
+ if warning:
+ if idx == 0:
+ info.append(('', last))
+ info.append(('[add return]', vline))
+ idx += 1
+ if warning:
+ info.append(('', line))
+ # handle high misalignments by inserting calls
+ elif mismatch > 0:
+ idx = 0
+ if warning:
+ info.append(('', last))
+ # add calls to get the depth up
+ while prelinedep > self.depth:
+ if idx == 0 and line.isReturn():
+ # special case, turn this return into a leaf
+ line.fcall = True
+ prelinedep -= 1
+ if warning:
+ info.append(('[make leaf]', line))
+ else:
+ vline = FTraceLine(lasttime)
+ vline.depth = self.depth
+ vline.name = self.vfname
+ vline.fcall = True
+ self.list.append(vline)
+ self.depth += 1
+ if not last:
+ self.start = vline.time
+ if warning:
+ info.append(('[add call]', vline))
+ idx += 1
+ if warning and ('[make leaf]', line) not in info:
+ info.append(('', line))
+ if warning:
+ print 'WARNING: ftrace data missing, corrections made:'
+ for i in info:
+ t, obj = i
+ if obj:
+ obj.debugPrint(t)
+ # process the call and set the new depth
+ skipadd = False
+ md = self.sv.max_graph_depth
+ if line.isCall():
+ # ignore blacklisted/overdepth funcs
+ if (md and self.depth >= md - 1) or (line.name in self.sv.cgblacklist):
+ self.ignore = True
+ else:
+ self.depth += 1
+ elif line.isReturn():
+ self.depth -= 1
+ # remove blacklisted/overdepth/empty funcs that slipped through
+ if (last and last.isCall() and last.depth == line.depth) or \
+ (md and last and last.depth >= md) or \
+ (line.name in self.sv.cgblacklist):
+ while len(self.list) > 0 and self.list[-1].depth > line.depth:
+ self.list.pop(-1)
+ if len(self.list) == 0:
+ self.invalid = True
+ return 1
+ self.list[-1].freturn = True
+ self.list[-1].length = line.time - self.list[-1].time
+ self.list[-1].name = line.name
+ skipadd = True
+ if len(self.list) < 1:
+ self.start = line.time
+ # check for a mismatch that returned all the way to callgraph end
+ res = 1
+ if mismatch < 0 and self.list[-1].depth == 0 and self.list[-1].freturn:
+ line = self.list[-1]
+ skipadd = True
+ res = -1
+ if not skipadd:
+ self.list.append(line)
+ if(line.depth == 0 and line.freturn):
+ if(self.start < 0):
+ self.start = line.time
+ self.end = line.time
+ if line.fcall:
+ self.end += line.length
+ if self.list[0].name == self.vfname:
+ self.invalid = True
+ if res == -1:
+ self.partial = True
+ return res
+ return 0
+ def invalidate(self, line):
+ if(len(self.list) > 0):
+ first = self.list[0]
+ self.list = []
+ self.list.append(first)
+ self.invalid = True
+ id = 'task %s' % (self.pid)
+ window = '(%f - %f)' % (self.start, line.time)
+ if(self.depth < 0):
+ print('Data misalignment for '+id+\
+ ' (buffer overflow), ignoring this callback')
+ else:
+ print('Too much data for '+id+\
+ ' '+window+', ignoring this callback')
+ def slice(self, dev):
+ minicg = FTraceCallGraph(dev['pid'], self.sv)
+ minicg.name = self.name
+ mydepth = -1
+ good = False
+ for l in self.list:
+ if(l.time < dev['start'] or l.time > dev['end']):
+ continue
+ if mydepth < 0:
+ if l.name == 'mutex_lock' and l.freturn:
+ mydepth = l.depth
+ continue
+ elif l.depth == mydepth and l.name == 'mutex_unlock' and l.fcall:
+ good = True
+ break
+ l.depth -= mydepth
+ minicg.addLine(l)
+ if not good or len(minicg.list) < 1:
+ return 0
+ return minicg
+ def repair(self, enddepth):
+ # bring the depth back to 0 with additional returns
+ fixed = False
+ last = self.list[-1]
+ for i in reversed(range(enddepth)):
+ t = FTraceLine(last.time)
+ t.depth = i
+ t.freturn = True
+ fixed = self.addLine(t)
+ if fixed != 0:
+ self.end = last.time
+ return True
+ return False
+ def postProcess(self):
+ if len(self.list) > 0:
+ self.name = self.list[0].name
+ stack = dict()
+ cnt = 0
+ last = 0
+ for l in self.list:
+ # ftrace bug: reported duration is not reliable
+ # check each leaf and clip it at max possible length
+ if last and last.isLeaf():
+ if last.length > l.time - last.time:
+ last.length = l.time - last.time
+ if l.isCall():
+ stack[l.depth] = l
+ cnt += 1
+ elif l.isReturn():
+ if(l.depth not in stack):
+ if self.sv.verbose:
+ print 'Post Process Error: Depth missing'
+ l.debugPrint()
+ return False
+ # calculate call length from call/return lines
+ cl = stack[l.depth]
+ cl.length = l.time - cl.time
+ if cl.name == self.vfname:
+ cl.name = l.name
+ stack.pop(l.depth)
+ l.length = 0
+ cnt -= 1
+ last = l
+ if(cnt == 0):
+ # trace caught the whole call tree
+ return True
+ elif(cnt < 0):
+ if self.sv.verbose:
+ print 'Post Process Error: Depth is less than 0'
+ return False
+ # trace ended before call tree finished
+ return self.repair(cnt)
+ def deviceMatch(self, pid, data):
+ found = ''
+ # add the callgraph data to the device hierarchy
+ borderphase = {
+ 'dpm_prepare': 'suspend_prepare',
+ 'dpm_complete': 'resume_complete'
+ }
+ if(self.name in borderphase):
+ p = borderphase[self.name]
+ list = data.dmesg[p]['list']
+ for devname in list:
+ dev = list[devname]
+ if(pid == dev['pid'] and
+ self.start <= dev['start'] and
+ self.end >= dev['end']):
+ cg = self.slice(dev)
+ if cg:
+ dev['ftrace'] = cg
+ found = devname
+ return found
+ for p in data.phases:
+ if(data.dmesg[p]['start'] <= self.start and
+ self.start <= data.dmesg[p]['end']):
+ list = data.dmesg[p]['list']
+ for devname in list:
+ dev = list[devname]
+ if(pid == dev['pid'] and
+ self.start <= dev['start'] and
+ self.end >= dev['end']):
+ dev['ftrace'] = self
+ found = devname
+ break
+ break
+ return found
+ def newActionFromFunction(self, data):
+ name = self.name
+ if name in ['dpm_run_callback', 'dpm_prepare', 'dpm_complete']:
+ return
+ fs = self.start
+ fe = self.end
+ if fs < data.start or fe > data.end:
+ return
+ phase = ''
+ for p in data.phases:
+ if(data.dmesg[p]['start'] <= self.start and
+ self.start < data.dmesg[p]['end']):
+ phase = p
+ break
+ if not phase:
+ return
+ out = data.newActionGlobal(name, fs, fe, -2)
+ if out:
+ phase, myname = out
+ data.dmesg[phase]['list'][myname]['ftrace'] = self
+ def debugPrint(self, info=''):
+ print('%s pid=%d [%f - %f] %.3f us') % \
+ (self.name, self.pid, self.start, self.end,
+ (self.end - self.start)*1000000)
+ for l in self.list:
+ if l.isLeaf():
+ print('%f (%02d): %s(); (%.3f us)%s' % (l.time, \
+ l.depth, l.name, l.length*1000000, info))
+ elif l.freturn:
+ print('%f (%02d): %s} (%.3f us)%s' % (l.time, \
+ l.depth, l.name, l.length*1000000, info))
+ else:
+ print('%f (%02d): %s() { (%.3f us)%s' % (l.time, \
+ l.depth, l.name, l.length*1000000, info))
+ print(' ')
+
+class DevItem:
+ def __init__(self, test, phase, dev):
+ self.test = test
+ self.phase = phase
+ self.dev = dev
+ def isa(self, cls):
+ if 'htmlclass' in self.dev and cls in self.dev['htmlclass']:
+ return True
+ return False
+
+# Class: Timeline
+# Description:
+# A container for a device timeline which calculates
+# all the html properties to display it correctly
+class Timeline:
+ html = ''
+ height = 0 # total timeline height
+ scaleH = 20 # timescale (top) row height
+ rowH = 30 # device row height
+ bodyH = 0 # body height
+ rows = 0 # total timeline rows
+ rowlines = dict()
+ rowheight = dict()
+ html_tblock = '<div id="block{0}" class="tblock" style="left:{1}%;width:{2}%;"><div class="tback" style="height:{3}px"></div>\n'
+ html_device = '<div id="{0}" title="{1}" class="thread{7}" style="left:{2}%;top:{3}px;height:{4}px;width:{5}%;{8}">{6}</div>\n'
+ html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}px;height:{3}px;background:{4}">{5}</div>\n'
+ html_phaselet = '<div id="{0}" class="phaselet" style="left:{1}%;width:{2}%;background:{3}"></div>\n'
+ html_legend = '<div id="p{3}" class="square" style="left:{0}%;background:{1}">&nbsp;{2}</div>\n'
+ def __init__(self, rowheight, scaleheight):
+ self.rowH = rowheight
+ self.scaleH = scaleheight
+ self.html = ''
+ def createHeader(self, sv, stamp):
+ if(not stamp['time']):
+ return
+ self.html += '<div class="version"><a href="https://01.org/suspendresume">%s v%s</a></div>' \
+ % (sv.title, sv.version)
+ if sv.logmsg and sv.testlog:
+ self.html += '<button id="showtest" class="logbtn btnfmt">log</button>'
+ if sv.dmesglog:
+ self.html += '<button id="showdmesg" class="logbtn btnfmt">dmesg</button>'
+ if sv.ftracelog:
+ self.html += '<button id="showftrace" class="logbtn btnfmt">ftrace</button>'
+ headline_stamp = '<div class="stamp">{0} {1} {2} {3}</div>\n'
+ self.html += headline_stamp.format(stamp['host'], stamp['kernel'],
+ stamp['mode'], stamp['time'])
+ if 'man' in stamp and 'plat' in stamp and 'cpu' in stamp and \
+ stamp['man'] and stamp['plat'] and stamp['cpu']:
+ headline_sysinfo = '<div class="stamp sysinfo">{0} {1} <i>with</i> {2}</div>\n'
+ self.html += headline_sysinfo.format(stamp['man'], stamp['plat'], stamp['cpu'])
+
+ # Function: getDeviceRows
+ # Description:
+ # determine how may rows the device funcs will take
+ # Arguments:
+ # rawlist: the list of devices/actions for a single phase
+ # Output:
+ # The total number of rows needed to display this phase of the timeline
+ def getDeviceRows(self, rawlist):
+ # clear all rows and set them to undefined
+ sortdict = dict()
+ for item in rawlist:
+ item.row = -1
+ sortdict[item] = item.length
+ sortlist = sorted(sortdict, key=sortdict.get, reverse=True)
+ remaining = len(sortlist)
+ rowdata = dict()
+ row = 1
+ # try to pack each row with as many ranges as possible
+ while(remaining > 0):
+ if(row not in rowdata):
+ rowdata[row] = []
+ for i in sortlist:
+ if(i.row >= 0):
+ continue
+ s = i.time
+ e = i.time + i.length
+ valid = True
+ for ritem in rowdata[row]:
+ rs = ritem.time
+ re = ritem.time + ritem.length
+ if(not (((s <= rs) and (e <= rs)) or
+ ((s >= re) and (e >= re)))):
+ valid = False
+ break
+ if(valid):
+ rowdata[row].append(i)
+ i.row = row
+ remaining -= 1
+ row += 1
+ return row
+ # Function: getPhaseRows
+ # Description:
+ # Organize the timeline entries into the smallest
+ # number of rows possible, with no entry overlapping
+ # Arguments:
+ # devlist: the list of devices/actions in a group of contiguous phases
+ # Output:
+ # The total number of rows needed to display this phase of the timeline
+ def getPhaseRows(self, devlist, row=0, sortby='length'):
+ # clear all rows and set them to undefined
+ remaining = len(devlist)
+ rowdata = dict()
+ sortdict = dict()
+ myphases = []
+ # initialize all device rows to -1 and calculate devrows
+ for item in devlist:
+ dev = item.dev
+ tp = (item.test, item.phase)
+ if tp not in myphases:
+ myphases.append(tp)
+ dev['row'] = -1
+ if sortby == 'start':
+ # sort by start 1st, then length 2nd
+ sortdict[item] = (-1*float(dev['start']), float(dev['end']) - float(dev['start']))
+ else:
+ # sort by length 1st, then name 2nd
+ sortdict[item] = (float(dev['end']) - float(dev['start']), item.dev['name'])
+ if 'src' in dev:
+ dev['devrows'] = self.getDeviceRows(dev['src'])
+ # sort the devlist by length so that large items graph on top
+ sortlist = sorted(sortdict, key=sortdict.get, reverse=True)
+ orderedlist = []
+ for item in sortlist:
+ if item.dev['pid'] == -2:
+ orderedlist.append(item)
+ for item in sortlist:
+ if item not in orderedlist:
+ orderedlist.append(item)
+ # try to pack each row with as many devices as possible
+ while(remaining > 0):
+ rowheight = 1
+ if(row not in rowdata):
+ rowdata[row] = []
+ for item in orderedlist:
+ dev = item.dev
+ if(dev['row'] < 0):
+ s = dev['start']
+ e = dev['end']
+ valid = True
+ for ritem in rowdata[row]:
+ rs = ritem.dev['start']
+ re = ritem.dev['end']
+ if(not (((s <= rs) and (e <= rs)) or
+ ((s >= re) and (e >= re)))):
+ valid = False
+ break
+ if(valid):
+ rowdata[row].append(item)
+ dev['row'] = row
+ remaining -= 1
+ if 'devrows' in dev and dev['devrows'] > rowheight:
+ rowheight = dev['devrows']
+ for t, p in myphases:
+ if t not in self.rowlines or t not in self.rowheight:
+ self.rowlines[t] = dict()
+ self.rowheight[t] = dict()
+ if p not in self.rowlines[t] or p not in self.rowheight[t]:
+ self.rowlines[t][p] = dict()
+ self.rowheight[t][p] = dict()
+ rh = self.rowH
+ # section headers should use a different row height
+ if len(rowdata[row]) == 1 and \
+ 'htmlclass' in rowdata[row][0].dev and \
+ 'sec' in rowdata[row][0].dev['htmlclass']:
+ rh = 15
+ self.rowlines[t][p][row] = rowheight
+ self.rowheight[t][p][row] = rowheight * rh
+ row += 1
+ if(row > self.rows):
+ self.rows = int(row)
+ return row
+ def phaseRowHeight(self, test, phase, row):
+ return self.rowheight[test][phase][row]
+ def phaseRowTop(self, test, phase, row):
+ top = 0
+ for i in sorted(self.rowheight[test][phase]):
+ if i >= row:
+ break
+ top += self.rowheight[test][phase][i]
+ return top
+ def calcTotalRows(self):
+ # Calculate the heights and offsets for the header and rows
+ maxrows = 0
+ standardphases = []
+ for t in self.rowlines:
+ for p in self.rowlines[t]:
+ total = 0
+ for i in sorted(self.rowlines[t][p]):
+ total += self.rowlines[t][p][i]
+ if total > maxrows:
+ maxrows = total
+ if total == len(self.rowlines[t][p]):
+ standardphases.append((t, p))
+ self.height = self.scaleH + (maxrows*self.rowH)
+ self.bodyH = self.height - self.scaleH
+ # if there is 1 line per row, draw them the standard way
+ for t, p in standardphases:
+ for i in sorted(self.rowheight[t][p]):
+ self.rowheight[t][p][i] = self.bodyH/len(self.rowlines[t][p])
+ def createZoomBox(self, mode='command', testcount=1):
+ # Create bounding box, add buttons
+ html_zoombox = '<center><button id="zoomin">ZOOM IN +</button><button id="zoomout">ZOOM OUT -</button><button id="zoomdef">ZOOM 1:1</button></center>\n'
+ html_timeline = '<div id="dmesgzoombox" class="zoombox">\n<div id="{0}" class="timeline" style="height:{1}px">\n'
+ html_devlist1 = '<button id="devlist1" class="devlist" style="float:left;">Device Detail{0}</button>'
+ html_devlist2 = '<button id="devlist2" class="devlist" style="float:right;">Device Detail2</button>\n'
+ if mode != 'command':
+ if testcount > 1:
+ self.html += html_devlist2
+ self.html += html_devlist1.format('1')
+ else:
+ self.html += html_devlist1.format('')
+ self.html += html_zoombox
+ self.html += html_timeline.format('dmesg', self.height)
+ # Function: createTimeScale
+ # Description:
+ # Create the timescale for a timeline block
+ # Arguments:
+ # m0: start time (mode begin)
+ # mMax: end time (mode end)
+ # tTotal: total timeline time
+ # mode: suspend or resume
+ # Output:
+ # The html code needed to display the time scale
+ def createTimeScale(self, m0, mMax, tTotal, mode):
+ timescale = '<div class="t" style="right:{0}%">{1}</div>\n'
+ rline = '<div class="t" style="left:0;border-left:1px solid black;border-right:0;">{0}</div>\n'
+ output = '<div class="timescale">\n'
+ # set scale for timeline
+ mTotal = mMax - m0
+ tS = 0.1
+ if(tTotal <= 0):
+ return output+'</div>\n'
+ if(tTotal > 4):
+ tS = 1
+ divTotal = int(mTotal/tS) + 1
+ divEdge = (mTotal - tS*(divTotal-1))*100/mTotal
+ for i in range(divTotal):
+ htmlline = ''
+ if(mode == 'suspend'):
+ pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal) - divEdge)
+ val = '%0.fms' % (float(i-divTotal+1)*tS*1000)
+ if(i == divTotal - 1):
+ val = mode
+ htmlline = timescale.format(pos, val)
+ else:
+ pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal))
+ val = '%0.fms' % (float(i)*tS*1000)
+ htmlline = timescale.format(pos, val)
+ if(i == 0):
+ htmlline = rline.format(mode)
+ output += htmlline
+ self.html += output+'</div>\n'
+
+# Class: TestProps
+# Description:
+# A list of values describing the properties of these test runs
+class TestProps:
+ stamp = ''
+ sysinfo = ''
+ cmdline = ''
+ kparams = ''
+ S0i3 = False
+ fwdata = []
+ stampfmt = '# [a-z]*-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\
+ '(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\
+ ' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$'
+ sysinfofmt = '^# sysinfo .*'
+ cmdlinefmt = '^# command \| (?P<cmd>.*)'
+ kparamsfmt = '^# kparams \| (?P<kp>.*)'
+ ftrace_line_fmt_fg = \
+ '^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\
+ ' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\
+ '[ +!#\*@$]*(?P<dur>[0-9\.]*) .*\| (?P<msg>.*)'
+ ftrace_line_fmt_nop = \
+ ' *(?P<proc>.*)-(?P<pid>[0-9]*) *\[(?P<cpu>[0-9]*)\] *'+\
+ '(?P<flags>.{4}) *(?P<time>[0-9\.]*): *'+\
+ '(?P<msg>.*)'
+ ftrace_line_fmt = ftrace_line_fmt_nop
+ cgformat = False
+ data = 0
+ ktemp = dict()
+ def __init__(self):
+ self.ktemp = dict()
+ def setTracerType(self, tracer):
+ if(tracer == 'function_graph'):
+ self.cgformat = True
+ self.ftrace_line_fmt = self.ftrace_line_fmt_fg
+ elif(tracer == 'nop'):
+ self.ftrace_line_fmt = self.ftrace_line_fmt_nop
+ else:
+ doError('Invalid tracer format: [%s]' % tracer)
+ def parseStamp(self, data, sv):
+ m = re.match(self.stampfmt, self.stamp)
+ data.stamp = {'time': '', 'host': '', 'mode': ''}
+ dt = datetime(int(m.group('y'))+2000, int(m.group('m')),
+ int(m.group('d')), int(m.group('H')), int(m.group('M')),
+ int(m.group('S')))
+ data.stamp['time'] = dt.strftime('%B %d %Y, %I:%M:%S %p')
+ data.stamp['host'] = m.group('host')
+ data.stamp['mode'] = m.group('mode')
+ data.stamp['kernel'] = m.group('kernel')
+ if re.match(self.sysinfofmt, self.sysinfo):
+ for f in self.sysinfo.split('|'):
+ if '#' in f:
+ continue
+ tmp = f.strip().split(':', 1)
+ key = tmp[0]
+ val = tmp[1]
+ data.stamp[key] = val
+ sv.hostname = data.stamp['host']
+ sv.suspendmode = data.stamp['mode']
+ if sv.suspendmode == 'command' and sv.ftracefile != '':
+ modes = ['on', 'freeze', 'standby', 'mem', 'disk']
+ fp = sysvals.openlog(sv.ftracefile, 'r')
+ for line in fp:
+ m = re.match('.* machine_suspend\[(?P<mode>.*)\]', line)
+ if m and m.group('mode') in ['1', '2', '3', '4']:
+ sv.suspendmode = modes[int(m.group('mode'))]
+ data.stamp['mode'] = sv.suspendmode
+ break
+ fp.close()
+ m = re.match(self.cmdlinefmt, self.cmdline)
+ if m:
+ sv.cmdline = m.group('cmd')
+ if self.kparams:
+ m = re.match(self.kparamsfmt, self.kparams)
+ if m:
+ sv.kparams = m.group('kp')
+ if not sv.stamp:
+ sv.stamp = data.stamp
+
+# Class: TestRun
+# Description:
+# A container for a suspend/resume test run. This is necessary as
+# there could be more than one, and they need to be separate.
+class TestRun:
+ ftemp = dict()
+ ttemp = dict()
+ data = 0
+ def __init__(self, dataobj):
+ self.data = dataobj
+ self.ftemp = dict()
+ self.ttemp = dict()
+
+class ProcessMonitor:
+ proclist = dict()
+ running = False
+ def procstat(self):
+ c = ['cat /proc/[1-9]*/stat 2>/dev/null']
+ process = Popen(c, shell=True, stdout=PIPE)
+ running = dict()
+ for line in process.stdout:
+ data = line.split()
+ pid = data[0]
+ name = re.sub('[()]', '', data[1])
+ user = int(data[13])
+ kern = int(data[14])
+ kjiff = ujiff = 0
+ if pid not in self.proclist:
+ self.proclist[pid] = {'name' : name, 'user' : user, 'kern' : kern}
+ else:
+ val = self.proclist[pid]
+ ujiff = user - val['user']
+ kjiff = kern - val['kern']
+ val['user'] = user
+ val['kern'] = kern
+ if ujiff > 0 or kjiff > 0:
+ running[pid] = ujiff + kjiff
+ process.wait()
+ out = ''
+ for pid in running:
+ jiffies = running[pid]
+ val = self.proclist[pid]
+ if out:
+ out += ','
+ out += '%s-%s %d' % (val['name'], pid, jiffies)
+ return 'ps - '+out
+ def processMonitor(self, tid):
+ while self.running:
+ out = self.procstat()
+ if out:
+ sysvals.fsetVal(out, 'trace_marker')
+ def start(self):
+ self.thread = Thread(target=self.processMonitor, args=(0,))
+ self.running = True
+ self.thread.start()
+ def stop(self):
+ self.running = False
+
+# ----------------- FUNCTIONS --------------------
+
+# Function: doesTraceLogHaveTraceEvents
+# Description:
+# Quickly determine if the ftrace log has all of the trace events,
+# markers, and/or kprobes required for primary parsing.
+def doesTraceLogHaveTraceEvents():
+ kpcheck = ['_cal: (', '_cpu_down()']
+ techeck = ['suspend_resume']
+ tmcheck = ['SUSPEND START', 'RESUME COMPLETE']
+ sysvals.usekprobes = False
+ fp = sysvals.openlog(sysvals.ftracefile, 'r')
+ for line in fp:
+ # check for kprobes
+ if not sysvals.usekprobes:
+ for i in kpcheck:
+ if i in line:
+ sysvals.usekprobes = True
+ # check for all necessary trace events
+ check = techeck[:]
+ for i in techeck:
+ if i in line:
+ check.remove(i)
+ techeck = check
+ # check for all necessary trace markers
+ check = tmcheck[:]
+ for i in tmcheck:
+ if i in line:
+ check.remove(i)
+ tmcheck = check
+ fp.close()
+ if len(techeck) == 0:
+ sysvals.usetraceevents = True
+ else:
+ sysvals.usetraceevents = False
+ if len(tmcheck) == 0:
+ sysvals.usetracemarkers = True
+ else:
+ sysvals.usetracemarkers = False
+
+# Function: appendIncompleteTraceLog
+# Description:
+# [deprecated for kernel 3.15 or newer]
+# Legacy support of ftrace outputs that lack the device_pm_callback
+# and/or suspend_resume trace events. The primary data should be
+# taken from dmesg, and this ftrace is used only for callgraph data
+# or custom actions in the timeline. The data is appended to the Data
+# objects provided.
+# Arguments:
+# testruns: the array of Data objects obtained from parseKernelLog
+def appendIncompleteTraceLog(testruns):
+ # create TestRun vessels for ftrace parsing
+ testcnt = len(testruns)
+ testidx = 0
+ testrun = []
+ for data in testruns:
+ testrun.append(TestRun(data))
+
+ # extract the callgraph and traceevent data
+ sysvals.vprint('Analyzing the ftrace data (%s)...' % \
+ os.path.basename(sysvals.ftracefile))
+ tp = TestProps()
+ tf = sysvals.openlog(sysvals.ftracefile, 'r')
+ data = 0
+ for line in tf:
+ # remove any latent carriage returns
+ line = line.replace('\r\n', '')
+ # grab the stamp and sysinfo
+ if re.match(tp.stampfmt, line):
+ tp.stamp = line
+ continue
+ elif re.match(tp.sysinfofmt, line):
+ tp.sysinfo = line
+ continue
+ elif re.match(tp.cmdlinefmt, line):
+ tp.cmdline = line
+ continue
+ # determine the trace data type (required for further parsing)
+ m = re.match(sysvals.tracertypefmt, line)
+ if(m):
+ tp.setTracerType(m.group('t'))
+ continue
+ # device properties line
+ if(re.match(sysvals.devpropfmt, line)):
+ devProps(line)
+ continue
+ # parse only valid lines, if this is not one move on
+ m = re.match(tp.ftrace_line_fmt, line)
+ if(not m):
+ continue
+ # gather the basic message data from the line
+ m_time = m.group('time')
+ m_pid = m.group('pid')
+ m_msg = m.group('msg')
+ if(tp.cgformat):
+ m_param3 = m.group('dur')
+ else:
+ m_param3 = 'traceevent'
+ if(m_time and m_pid and m_msg):
+ t = FTraceLine(m_time, m_msg, m_param3)
+ pid = int(m_pid)
+ else:
+ continue
+ # the line should be a call, return, or event
+ if(not t.fcall and not t.freturn and not t.fevent):
+ continue
+ # look for the suspend start marker
+ if(t.startMarker()):
+ data = testrun[testidx].data
+ tp.parseStamp(data, sysvals)
+ data.setStart(t.time)
+ continue
+ if(not data):
+ continue
+ # find the end of resume
+ if(t.endMarker()):
+ data.setEnd(t.time)
+ testidx += 1
+ if(testidx >= testcnt):
+ break
+ continue
+ # trace event processing
+ if(t.fevent):
+ # general trace events have two types, begin and end
+ if(re.match('(?P<name>.*) begin$', t.name)):
+ isbegin = True
+ elif(re.match('(?P<name>.*) end$', t.name)):
+ isbegin = False
+ else:
+ continue
+ m = re.match('(?P<name>.*)\[(?P<val>[0-9]*)\] .*', t.name)
+ if(m):
+ val = m.group('val')
+ if val == '0':
+ name = m.group('name')
+ else:
+ name = m.group('name')+'['+val+']'
+ else:
+ m = re.match('(?P<name>.*) .*', t.name)
+ name = m.group('name')
+ # special processing for trace events
+ if re.match('dpm_prepare\[.*', name):
+ continue
+ elif re.match('machine_suspend.*', name):
+ continue
+ elif re.match('suspend_enter\[.*', name):
+ if(not isbegin):
+ data.dmesg['suspend_prepare']['end'] = t.time
+ continue
+ elif re.match('dpm_suspend\[.*', name):
+ if(not isbegin):
+ data.dmesg['suspend']['end'] = t.time
+ continue
+ elif re.match('dpm_suspend_late\[.*', name):
+ if(isbegin):
+ data.dmesg['suspend_late']['start'] = t.time
+ else:
+ data.dmesg['suspend_late']['end'] = t.time
+ continue
+ elif re.match('dpm_suspend_noirq\[.*', name):
+ if(isbegin):
+ data.dmesg['suspend_noirq']['start'] = t.time
+ else:
+ data.dmesg['suspend_noirq']['end'] = t.time
+ continue
+ elif re.match('dpm_resume_noirq\[.*', name):
+ if(isbegin):
+ data.dmesg['resume_machine']['end'] = t.time
+ data.dmesg['resume_noirq']['start'] = t.time
+ else:
+ data.dmesg['resume_noirq']['end'] = t.time
+ continue
+ elif re.match('dpm_resume_early\[.*', name):
+ if(isbegin):
+ data.dmesg['resume_early']['start'] = t.time
+ else:
+ data.dmesg['resume_early']['end'] = t.time
+ continue
+ elif re.match('dpm_resume\[.*', name):
+ if(isbegin):
+ data.dmesg['resume']['start'] = t.time
+ else:
+ data.dmesg['resume']['end'] = t.time
+ continue
+ elif re.match('dpm_complete\[.*', name):
+ if(isbegin):
+ data.dmesg['resume_complete']['start'] = t.time
+ else:
+ data.dmesg['resume_complete']['end'] = t.time
+ continue
+ # skip trace events inside devices calls
+ if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)):
+ continue
+ # global events (outside device calls) are simply graphed
+ if(isbegin):
+ # store each trace event in ttemp
+ if(name not in testrun[testidx].ttemp):
+ testrun[testidx].ttemp[name] = []
+ testrun[testidx].ttemp[name].append(\
+ {'begin': t.time, 'end': t.time})
+ else:
+ # finish off matching trace event in ttemp
+ if(name in testrun[testidx].ttemp):
+ testrun[testidx].ttemp[name][-1]['end'] = t.time
+ # call/return processing
+ elif sysvals.usecallgraph:
+ # create a callgraph object for the data
+ if(pid not in testrun[testidx].ftemp):
+ testrun[testidx].ftemp[pid] = []
+ testrun[testidx].ftemp[pid].append(FTraceCallGraph(pid, sysvals))
+ # when the call is finished, see which device matches it
+ cg = testrun[testidx].ftemp[pid][-1]
+ res = cg.addLine(t)
+ if(res != 0):
+ testrun[testidx].ftemp[pid].append(FTraceCallGraph(pid, sysvals))
+ if(res == -1):
+ testrun[testidx].ftemp[pid][-1].addLine(t)
+ tf.close()
+
+ for test in testrun:
+ # add the traceevent data to the device hierarchy
+ if(sysvals.usetraceevents):
+ for name in test.ttemp:
+ for event in test.ttemp[name]:
+ test.data.newActionGlobal(name, event['begin'], event['end'])
+
+ # add the callgraph data to the device hierarchy
+ for pid in test.ftemp:
+ for cg in test.ftemp[pid]:
+ if len(cg.list) < 1 or cg.invalid or (cg.end - cg.start == 0):
+ continue
+ if(not cg.postProcess()):
+ id = 'task %s cpu %s' % (pid, m.group('cpu'))
+ sysvals.vprint('Sanity check failed for '+\
+ id+', ignoring this callback')
+ continue
+ callstart = cg.start
+ callend = cg.end
+ for p in test.data.phases:
+ if(test.data.dmesg[p]['start'] <= callstart and
+ callstart <= test.data.dmesg[p]['end']):
+ list = test.data.dmesg[p]['list']
+ for devname in list:
+ dev = list[devname]
+ if(pid == dev['pid'] and
+ callstart <= dev['start'] and
+ callend >= dev['end']):
+ dev['ftrace'] = cg
+ break
+
+# Function: parseTraceLog
+# Description:
+# Analyze an ftrace log output file generated from this app during
+# the execution phase. Used when the ftrace log is the primary data source
+# and includes the suspend_resume and device_pm_callback trace events
+# The ftrace filename is taken from sysvals
+# Output:
+# An array of Data objects
+def parseTraceLog(live=False):
+ sysvals.vprint('Analyzing the ftrace data (%s)...' % \
+ os.path.basename(sysvals.ftracefile))
+ if(os.path.exists(sysvals.ftracefile) == False):
+ doError('%s does not exist' % sysvals.ftracefile)
+ if not live:
+ sysvals.setupAllKprobes()
+ tracewatch = []
+ if sysvals.usekprobes:
+ tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend',
+ 'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON', 'CPU_OFF']
+
+ # extract the callgraph and traceevent data
+ tp = TestProps()
+ testruns = []
+ testdata = []
+ testrun = 0
+ data = 0
+ tf = sysvals.openlog(sysvals.ftracefile, 'r')
+ phase = 'suspend_prepare'
+ for line in tf:
+ # remove any latent carriage returns
+ line = line.replace('\r\n', '')
+ # stamp and sysinfo lines
+ if re.match(tp.stampfmt, line):
+ tp.stamp = line
+ continue
+ elif re.match(tp.sysinfofmt, line):
+ tp.sysinfo = line
+ continue
+ elif re.match(tp.cmdlinefmt, line):
+ tp.cmdline = line
+ continue
+ # firmware line: pull out any firmware data
+ m = re.match(sysvals.firmwarefmt, line)
+ if(m):
+ tp.fwdata.append((int(m.group('s')), int(m.group('r'))))
+ continue
+ # tracer type line: determine the trace data type
+ m = re.match(sysvals.tracertypefmt, line)
+ if(m):
+ tp.setTracerType(m.group('t'))
+ continue
+ # device properties line
+ if(re.match(sysvals.devpropfmt, line)):
+ devProps(line)
+ continue
+ # ignore all other commented lines
+ if line[0] == '#':
+ continue
+ # ftrace line: parse only valid lines
+ m = re.match(tp.ftrace_line_fmt, line)
+ if(not m):
+ continue
+ # gather the basic message data from the line
+ m_time = m.group('time')
+ m_proc = m.group('proc')
+ m_pid = m.group('pid')
+ m_msg = m.group('msg')
+ if(tp.cgformat):
+ m_param3 = m.group('dur')
+ else:
+ m_param3 = 'traceevent'
+ if(m_time and m_pid and m_msg):
+ t = FTraceLine(m_time, m_msg, m_param3)
+ pid = int(m_pid)
+ else:
+ continue
+ # the line should be a call, return, or event
+ if(not t.fcall and not t.freturn and not t.fevent):
+ continue
+ # find the start of suspend
+ if(t.startMarker()):
+ phase = 'suspend_prepare'
+ data = Data(len(testdata))
+ testdata.append(data)
+ testrun = TestRun(data)
+ testruns.append(testrun)
+ tp.parseStamp(data, sysvals)
+ data.setStart(t.time)
+ data.tKernSus = t.time
+ continue
+ if(not data):
+ continue
+ # process cpu exec line
+ if t.type == 'tracing_mark_write':
+ m = re.match(sysvals.procexecfmt, t.name)
+ if(m):
+ proclist = dict()
+ for ps in m.group('ps').split(','):
+ val = ps.split()
+ if not val:
+ continue
+ name = val[0].replace('--', '-')
+ proclist[name] = int(val[1])
+ data.pstl[t.time] = proclist
+ continue
+ # find the end of resume
+ if(t.endMarker()):
+ data.setEnd(t.time)
+ if data.tKernRes == 0.0:
+ data.tKernRes = t.time
+ if data.dmesg['resume_complete']['end'] < 0:
+ data.dmesg['resume_complete']['end'] = t.time
+ if sysvals.suspendmode == 'mem' and len(tp.fwdata) > data.testnumber:
+ data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber]
+ if(data.tSuspended != 0 and data.tResumed != 0 and \
+ (data.fwSuspend > 0 or data.fwResume > 0)):
+ data.fwValid = True
+ if(not sysvals.usetracemarkers):
+ # no trace markers? then quit and be sure to finish recording
+ # the event we used to trigger resume end
+ if(len(testrun.ttemp['thaw_processes']) > 0):
+ # if an entry exists, assume this is its end
+ testrun.ttemp['thaw_processes'][-1]['end'] = t.time
+ break
+ continue
+ # trace event processing
+ if(t.fevent):
+ if(phase == 'post_resume'):
+ data.setEnd(t.time)
+ if(t.type == 'suspend_resume'):
+ # suspend_resume trace events have two types, begin and end
+ if(re.match('(?P<name>.*) begin$', t.name)):
+ isbegin = True
+ elif(re.match('(?P<name>.*) end$', t.name)):
+ isbegin = False
+ else:
+ continue
+ m = re.match('(?P<name>.*)\[(?P<val>[0-9]*)\] .*', t.name)
+ if(m):
+ val = m.group('val')
+ if val == '0':
+ name = m.group('name')
+ else:
+ name = m.group('name')+'['+val+']'
+ else:
+ m = re.match('(?P<name>.*) .*', t.name)
+ name = m.group('name')
+ # ignore these events
+ if(name.split('[')[0] in tracewatch):
+ continue
+ # -- phase changes --
+ # start of kernel suspend
+ if(re.match('suspend_enter\[.*', t.name)):
+ if(isbegin and data.start == data.tKernSus):
+ data.dmesg[phase]['start'] = t.time
+ data.tKernSus = t.time
+ continue
+ # suspend_prepare start
+ elif(re.match('dpm_prepare\[.*', t.name)):
+ phase = 'suspend_prepare'
+ if(not isbegin):
+ data.dmesg[phase]['end'] = t.time
+ if data.dmesg[phase]['start'] < 0:
+ data.dmesg[phase]['start'] = data.start
+ continue
+ # suspend start
+ elif(re.match('dpm_suspend\[.*', t.name)):
+ phase = 'suspend'
+ data.setPhase(phase, t.time, isbegin)
+ continue
+ # suspend_late start
+ elif(re.match('dpm_suspend_late\[.*', t.name)):
+ phase = 'suspend_late'
+ data.setPhase(phase, t.time, isbegin)
+ continue
+ # suspend_noirq start
+ elif(re.match('dpm_suspend_noirq\[.*', t.name)):
+ if data.phaseCollision('suspend_noirq', isbegin, line):
+ continue
+ phase = 'suspend_noirq'
+ data.setPhase(phase, t.time, isbegin)
+ if(not isbegin):
+ phase = 'suspend_machine'
+ data.dmesg[phase]['start'] = t.time
+ continue
+ # suspend_machine/resume_machine
+ elif(re.match('machine_suspend\[.*', t.name)):
+ if(isbegin):
+ phase = 'suspend_machine'
+ data.dmesg[phase]['end'] = t.time
+ data.tSuspended = t.time
+ else:
+ if(sysvals.suspendmode in ['mem', 'disk'] and not tp.S0i3):
+ data.dmesg['suspend_machine']['end'] = t.time
+ data.tSuspended = t.time
+ phase = 'resume_machine'
+ data.dmesg[phase]['start'] = t.time
+ data.tResumed = t.time
+ data.tLow = data.tResumed - data.tSuspended
+ continue
+ # acpi_suspend
+ elif(re.match('acpi_suspend\[.*', t.name)):
+ # acpi_suspend[0] S0i3
+ if(re.match('acpi_suspend\[0\] begin', t.name)):
+ if(sysvals.suspendmode == 'mem'):
+ tp.S0i3 = True
+ data.dmesg['suspend_machine']['end'] = t.time
+ data.tSuspended = t.time
+ continue
+ # resume_noirq start
+ elif(re.match('dpm_resume_noirq\[.*', t.name)):
+ if data.phaseCollision('resume_noirq', isbegin, line):
+ continue
+ phase = 'resume_noirq'
+ data.setPhase(phase, t.time, isbegin)
+ if(isbegin):
+ data.dmesg['resume_machine']['end'] = t.time
+ continue
+ # resume_early start
+ elif(re.match('dpm_resume_early\[.*', t.name)):
+ phase = 'resume_early'
+ data.setPhase(phase, t.time, isbegin)
+ continue
+ # resume start
+ elif(re.match('dpm_resume\[.*', t.name)):
+ phase = 'resume'
+ data.setPhase(phase, t.time, isbegin)
+ continue
+ # resume complete start
+ elif(re.match('dpm_complete\[.*', t.name)):
+ phase = 'resume_complete'
+ if(isbegin):
+ data.dmesg[phase]['start'] = t.time
+ continue
+ # skip trace events inside devices calls
+ if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)):
+ continue
+ # global events (outside device calls) are graphed
+ if(name not in testrun.ttemp):
+ testrun.ttemp[name] = []
+ if(isbegin):
+ # create a new list entry
+ testrun.ttemp[name].append(\
+ {'begin': t.time, 'end': t.time, 'pid': pid})
+ else:
+ if(len(testrun.ttemp[name]) > 0):
+ # if an entry exists, assume this is its end
+ testrun.ttemp[name][-1]['end'] = t.time
+ elif(phase == 'post_resume'):
+ # post resume events can just have ends
+ testrun.ttemp[name].append({
+ 'begin': data.dmesg[phase]['start'],
+ 'end': t.time})
+ # device callback start
+ elif(t.type == 'device_pm_callback_start'):
+ m = re.match('(?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*',\
+ t.name);
+ if(not m):
+ continue
+ drv = m.group('drv')
+ n = m.group('d')
+ p = m.group('p')
+ if(n and p):
+ data.newAction(phase, n, pid, p, t.time, -1, drv)
+ if pid not in data.devpids:
+ data.devpids.append(pid)
+ # device callback finish
+ elif(t.type == 'device_pm_callback_end'):
+ m = re.match('(?P<drv>.*) (?P<d>.*), err.*', t.name);
+ if(not m):
+ continue
+ n = m.group('d')
+ list = data.dmesg[phase]['list']
+ if(n in list):
+ dev = list[n]
+ dev['length'] = t.time - dev['start']
+ dev['end'] = t.time
+ # kprobe event processing
+ elif(t.fkprobe):
+ kprobename = t.type
+ kprobedata = t.name
+ key = (kprobename, pid)
+ # displayname is generated from kprobe data
+ displayname = ''
+ if(t.fcall):
+ displayname = sysvals.kprobeDisplayName(kprobename, kprobedata)
+ if not displayname:
+ continue
+ if(key not in tp.ktemp):
+ tp.ktemp[key] = []
+ tp.ktemp[key].append({
+ 'pid': pid,
+ 'begin': t.time,
+ 'end': t.time,
+ 'name': displayname,
+ 'cdata': kprobedata,
+ 'proc': m_proc,
+ })
+ elif(t.freturn):
+ if(key not in tp.ktemp) or len(tp.ktemp[key]) < 1:
+ continue
+ e = tp.ktemp[key][-1]
+ if e['begin'] < 0.0 or t.time - e['begin'] < 0.000001:
+ tp.ktemp[key].pop()
+ else:
+ e['end'] = t.time
+ e['rdata'] = kprobedata
+ # end of kernel resume
+ if(kprobename == 'pm_notifier_call_chain' or \
+ kprobename == 'pm_restore_console'):
+ data.dmesg[phase]['end'] = t.time
+ data.tKernRes = t.time
+
+ # callgraph processing
+ elif sysvals.usecallgraph:
+ # create a callgraph object for the data
+ key = (m_proc, pid)
+ if(key not in testrun.ftemp):
+ testrun.ftemp[key] = []
+ testrun.ftemp[key].append(FTraceCallGraph(pid, sysvals))
+ # when the call is finished, see which device matches it
+ cg = testrun.ftemp[key][-1]
+ res = cg.addLine(t)
+ if(res != 0):
+ testrun.ftemp[key].append(FTraceCallGraph(pid, sysvals))
+ if(res == -1):
+ testrun.ftemp[key][-1].addLine(t)
+ tf.close()
+
+ if sysvals.suspendmode == 'command':
+ for test in testruns:
+ for p in test.data.phases:
+ if p == 'suspend_prepare':
+ test.data.dmesg[p]['start'] = test.data.start
+ test.data.dmesg[p]['end'] = test.data.end
+ else:
+ test.data.dmesg[p]['start'] = test.data.end
+ test.data.dmesg[p]['end'] = test.data.end
+ test.data.tSuspended = test.data.end
+ test.data.tResumed = test.data.end
+ test.data.tLow = 0
+ test.data.fwValid = False
+
+ # dev source and procmon events can be unreadable with mixed phase height
+ if sysvals.usedevsrc or sysvals.useprocmon:
+ sysvals.mixedphaseheight = False
+
+ for i in range(len(testruns)):
+ test = testruns[i]
+ data = test.data
+ # find the total time range for this test (begin, end)
+ tlb, tle = data.start, data.end
+ if i < len(testruns) - 1:
+ tle = testruns[i+1].data.start
+ # add the process usage data to the timeline
+ if sysvals.useprocmon:
+ data.createProcessUsageEvents()
+ # add the traceevent data to the device hierarchy
+ if(sysvals.usetraceevents):
+ # add actual trace funcs
+ for name in test.ttemp:
+ for event in test.ttemp[name]:
+ data.newActionGlobal(name, event['begin'], event['end'], event['pid'])
+ # add the kprobe based virtual tracefuncs as actual devices
+ for key in tp.ktemp:
+ name, pid = key
+ if name not in sysvals.tracefuncs:
+ continue
+ for e in tp.ktemp[key]:
+ kb, ke = e['begin'], e['end']
+ if kb == ke or tlb > kb or tle <= kb:
+ continue
+ color = sysvals.kprobeColor(name)
+ data.newActionGlobal(e['name'], kb, ke, pid, color)
+ # add config base kprobes and dev kprobes
+ if sysvals.usedevsrc:
+ for key in tp.ktemp:
+ name, pid = key
+ if name in sysvals.tracefuncs or name not in sysvals.dev_tracefuncs:
+ continue
+ for e in tp.ktemp[key]:
+ kb, ke = e['begin'], e['end']
+ if kb == ke or tlb > kb or tle <= kb:
+ continue
+ data.addDeviceFunctionCall(e['name'], name, e['proc'], pid, kb,
+ ke, e['cdata'], e['rdata'])
+ if sysvals.usecallgraph:
+ # add the callgraph data to the device hierarchy
+ sortlist = dict()
+ for key in test.ftemp:
+ proc, pid = key
+ for cg in test.ftemp[key]:
+ if len(cg.list) < 1 or cg.invalid or (cg.end - cg.start == 0):
+ continue
+ if(not cg.postProcess()):
+ id = 'task %s' % (pid)
+ sysvals.vprint('Sanity check failed for '+\
+ id+', ignoring this callback')
+ continue
+ # match cg data to devices
+ devname = ''
+ if sysvals.suspendmode != 'command':
+ devname = cg.deviceMatch(pid, data)
+ if not devname:
+ sortkey = '%f%f%d' % (cg.start, cg.end, pid)
+ sortlist[sortkey] = cg
+ elif len(cg.list) > 1000000:
+ print 'WARNING: the callgraph for %s is massive (%d lines)' %\
+ (devname, len(cg.list))
+ # create blocks for orphan cg data
+ for sortkey in sorted(sortlist):
+ cg = sortlist[sortkey]
+ name = cg.name
+ if sysvals.isCallgraphFunc(name):
+ sysvals.vprint('Callgraph found for task %d: %.3fms, %s' % (cg.pid, (cg.end - cg.start)*1000, name))
+ cg.newActionFromFunction(data)
+ if sysvals.suspendmode == 'command':
+ return (testdata, '')
+
+ # fill in any missing phases
+ error = []
+ for data in testdata:
+ tn = '' if len(testdata) == 1 else ('%d' % (data.testnumber + 1))
+ terr = ''
+ lp = data.phases[0]
+ for p in data.phases:
+ if(data.dmesg[p]['start'] < 0 and data.dmesg[p]['end'] < 0):
+ if not terr:
+ print 'TEST%s FAILED: %s failed in %s phase' % (tn, sysvals.suspendmode, lp)
+ terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, lp)
+ error.append(terr)
+ sysvals.vprint('WARNING: phase "%s" is missing!' % p)
+ if(data.dmesg[p]['start'] < 0):
+ data.dmesg[p]['start'] = data.dmesg[lp]['end']
+ if(p == 'resume_machine'):
+ data.tSuspended = data.dmesg[lp]['end']
+ data.tResumed = data.dmesg[lp]['end']
+ data.tLow = 0
+ if(data.dmesg[p]['end'] < 0):
+ data.dmesg[p]['end'] = data.dmesg[p]['start']
+ if(p != lp and not ('machine' in p and 'machine' in lp)):
+ data.dmesg[lp]['end'] = data.dmesg[p]['start']
+ lp = p
+
+ if(len(sysvals.devicefilter) > 0):
+ data.deviceFilter(sysvals.devicefilter)
+ data.fixupInitcallsThatDidntReturn()
+ if sysvals.usedevsrc:
+ data.optimizeDevSrc()
+
+ # x2: merge any overlapping devices between test runs
+ if sysvals.usedevsrc and len(testdata) > 1:
+ tc = len(testdata)
+ for i in range(tc - 1):
+ devlist = testdata[i].overflowDevices()
+ for j in range(i + 1, tc):
+ testdata[j].mergeOverlapDevices(devlist)
+ testdata[0].stitchTouchingThreads(testdata[1:])
+ return (testdata, ', '.join(error))
+
+# Function: loadKernelLog
+# Description:
+# [deprecated for kernel 3.15.0 or newer]
+# load the dmesg file into memory and fix up any ordering issues
+# The dmesg filename is taken from sysvals
+# Output:
+# An array of empty Data objects with only their dmesgtext attributes set
+def loadKernelLog():
+ sysvals.vprint('Analyzing the dmesg data (%s)...' % \
+ os.path.basename(sysvals.dmesgfile))
+ if(os.path.exists(sysvals.dmesgfile) == False):
+ doError('%s does not exist' % sysvals.dmesgfile)
+
+ # there can be multiple test runs in a single file
+ tp = TestProps()
+ tp.stamp = datetime.now().strftime('# suspend-%m%d%y-%H%M%S localhost mem unknown')
+ testruns = []
+ data = 0
+ lf = sysvals.openlog(sysvals.dmesgfile, 'r')
+ for line in lf:
+ line = line.replace('\r\n', '')
+ idx = line.find('[')
+ if idx > 1:
+ line = line[idx:]
+ # grab the stamp and sysinfo
+ if re.match(tp.stampfmt, line):
+ tp.stamp = line
+ continue
+ elif re.match(tp.sysinfofmt, line):
+ tp.sysinfo = line
+ continue
+ elif re.match(tp.cmdlinefmt, line):
+ tp.cmdline = line
+ continue
+ m = re.match(sysvals.firmwarefmt, line)
+ if(m):
+ tp.fwdata.append((int(m.group('s')), int(m.group('r'))))
+ continue
+ m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+ if(not m):
+ continue
+ msg = m.group("msg")
+ if(re.match('PM: Syncing filesystems.*', msg)):
+ if(data):
+ testruns.append(data)
+ data = Data(len(testruns))
+ tp.parseStamp(data, sysvals)
+ if len(tp.fwdata) > data.testnumber:
+ data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber]
+ if(data.fwSuspend > 0 or data.fwResume > 0):
+ data.fwValid = True
+ if(not data):
+ continue
+ m = re.match('.* *(?P<k>[0-9]\.[0-9]{2}\.[0-9]-.*) .*', msg)
+ if(m):
+ sysvals.stamp['kernel'] = m.group('k')
+ m = re.match('PM: Preparing system for (?P<m>.*) sleep', msg)
+ if(m):
+ sysvals.stamp['mode'] = sysvals.suspendmode = m.group('m')
+ data.dmesgtext.append(line)
+ lf.close()
+
+ if data:
+ testruns.append(data)
+ if len(testruns) < 1:
+ print('ERROR: dmesg log has no suspend/resume data: %s' \
+ % sysvals.dmesgfile)
+
+ # fix lines with same timestamp/function with the call and return swapped
+ for data in testruns:
+ last = ''
+ for line in data.dmesgtext:
+ mc = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) calling '+\
+ '(?P<f>.*)\+ @ .*, parent: .*', line)
+ mr = re.match('.*(\[ *)(?P<t>[0-9\.]*)(\]) call '+\
+ '(?P<f>.*)\+ returned .* after (?P<dt>.*) usecs', last)
+ if(mc and mr and (mc.group('t') == mr.group('t')) and
+ (mc.group('f') == mr.group('f'))):
+ i = data.dmesgtext.index(last)
+ j = data.dmesgtext.index(line)
+ data.dmesgtext[i] = line
+ data.dmesgtext[j] = last
+ last = line
+ return testruns
+
+# Function: parseKernelLog
+# Description:
+# [deprecated for kernel 3.15.0 or newer]
+# Analyse a dmesg log output file generated from this app during
+# the execution phase. Create a set of device structures in memory
+# for subsequent formatting in the html output file
+# This call is only for legacy support on kernels where the ftrace
+# data lacks the suspend_resume or device_pm_callbacks trace events.
+# Arguments:
+# data: an empty Data object (with dmesgtext) obtained from loadKernelLog
+# Output:
+# The filled Data object
+def parseKernelLog(data):
+ phase = 'suspend_runtime'
+
+ if(data.fwValid):
+ sysvals.vprint('Firmware Suspend = %u ns, Firmware Resume = %u ns' % \
+ (data.fwSuspend, data.fwResume))
+
+ # dmesg phase match table
+ dm = {
+ 'suspend_prepare': 'PM: Syncing filesystems.*',
+ 'suspend': 'PM: Entering [a-z]* sleep.*',
+ 'suspend_late': 'PM: suspend of devices complete after.*',
+ 'suspend_noirq': 'PM: late suspend of devices complete after.*',
+ 'suspend_machine': 'PM: noirq suspend of devices complete after.*',
+ 'resume_machine': 'ACPI: Low-level resume complete.*',
+ 'resume_noirq': 'ACPI: Waking up from system sleep state.*',
+ 'resume_early': 'PM: noirq resume of devices complete after.*',
+ 'resume': 'PM: early resume of devices complete after.*',
+ 'resume_complete': 'PM: resume of devices complete after.*',
+ 'post_resume': '.*Restarting tasks \.\.\..*',
+ }
+ if(sysvals.suspendmode == 'standby'):
+ dm['resume_machine'] = 'PM: Restoring platform NVS memory'
+ elif(sysvals.suspendmode == 'disk'):
+ dm['suspend_late'] = 'PM: freeze of devices complete after.*'
+ dm['suspend_noirq'] = 'PM: late freeze of devices complete after.*'
+ dm['suspend_machine'] = 'PM: noirq freeze of devices complete after.*'
+ dm['resume_machine'] = 'PM: Restoring platform NVS memory'
+ dm['resume_early'] = 'PM: noirq restore of devices complete after.*'
+ dm['resume'] = 'PM: early restore of devices complete after.*'
+ dm['resume_complete'] = 'PM: restore of devices complete after.*'
+ elif(sysvals.suspendmode == 'freeze'):
+ dm['resume_machine'] = 'ACPI: resume from mwait'
+
+ # action table (expected events that occur and show up in dmesg)
+ at = {
+ 'sync_filesystems': {
+ 'smsg': 'PM: Syncing filesystems.*',
+ 'emsg': 'PM: Preparing system for mem sleep.*' },
+ 'freeze_user_processes': {
+ 'smsg': 'Freezing user space processes .*',
+ 'emsg': 'Freezing remaining freezable tasks.*' },
+ 'freeze_tasks': {
+ 'smsg': 'Freezing remaining freezable tasks.*',
+ 'emsg': 'PM: Entering (?P<mode>[a-z,A-Z]*) sleep.*' },
+ 'ACPI prepare': {
+ 'smsg': 'ACPI: Preparing to enter system sleep state.*',
+ 'emsg': 'PM: Saving platform NVS memory.*' },
+ 'PM vns': {
+ 'smsg': 'PM: Saving platform NVS memory.*',
+ 'emsg': 'Disabling non-boot CPUs .*' },
+ }
+
+ t0 = -1.0
+ cpu_start = -1.0
+ prevktime = -1.0
+ actions = dict()
+ for line in data.dmesgtext:
+ # parse each dmesg line into the time and message
+ m = re.match('[ \t]*(\[ *)(?P<ktime>[0-9\.]*)(\]) (?P<msg>.*)', line)
+ if(m):
+ val = m.group('ktime')
+ try:
+ ktime = float(val)
+ except:
+ continue
+ msg = m.group('msg')
+ # initialize data start to first line time
+ if t0 < 0:
+ data.setStart(ktime)
+ t0 = ktime
+ else:
+ continue
+
+ # hack for determining resume_machine end for freeze
+ if(not sysvals.usetraceevents and sysvals.suspendmode == 'freeze' \
+ and phase == 'resume_machine' and \
+ re.match('calling (?P<f>.*)\+ @ .*, parent: .*', msg)):
+ data.dmesg['resume_machine']['end'] = ktime
+ phase = 'resume_noirq'
+ data.dmesg[phase]['start'] = ktime
+
+ # suspend start
+ if(re.match(dm['suspend_prepare'], msg)):
+ phase = 'suspend_prepare'
+ data.dmesg[phase]['start'] = ktime
+ data.setStart(ktime)
+ data.tKernSus = ktime
+ # suspend start
+ elif(re.match(dm['suspend'], msg)):
+ data.dmesg['suspend_prepare']['end'] = ktime
+ phase = 'suspend'
+ data.dmesg[phase]['start'] = ktime
+ # suspend_late start
+ elif(re.match(dm['suspend_late'], msg)):
+ data.dmesg['suspend']['end'] = ktime
+ phase = 'suspend_late'
+ data.dmesg[phase]['start'] = ktime
+ # suspend_noirq start
+ elif(re.match(dm['suspend_noirq'], msg)):
+ data.dmesg['suspend_late']['end'] = ktime
+ phase = 'suspend_noirq'
+ data.dmesg[phase]['start'] = ktime
+ # suspend_machine start
+ elif(re.match(dm['suspend_machine'], msg)):
+ data.dmesg['suspend_noirq']['end'] = ktime
+ phase = 'suspend_machine'
+ data.dmesg[phase]['start'] = ktime
+ # resume_machine start
+ elif(re.match(dm['resume_machine'], msg)):
+ if(sysvals.suspendmode in ['freeze', 'standby']):
+ data.tSuspended = prevktime
+ data.dmesg['suspend_machine']['end'] = prevktime
+ else:
+ data.tSuspended = ktime
+ data.dmesg['suspend_machine']['end'] = ktime
+ phase = 'resume_machine'
+ data.tResumed = ktime
+ data.tLow = data.tResumed - data.tSuspended
+ data.dmesg[phase]['start'] = ktime
+ # resume_noirq start
+ elif(re.match(dm['resume_noirq'], msg)):
+ data.dmesg['resume_machine']['end'] = ktime
+ phase = 'resume_noirq'
+ data.dmesg[phase]['start'] = ktime
+ # resume_early start
+ elif(re.match(dm['resume_early'], msg)):
+ data.dmesg['resume_noirq']['end'] = ktime
+ phase = 'resume_early'
+ data.dmesg[phase]['start'] = ktime
+ # resume start
+ elif(re.match(dm['resume'], msg)):
+ data.dmesg['resume_early']['end'] = ktime
+ phase = 'resume'
+ data.dmesg[phase]['start'] = ktime
+ # resume complete start
+ elif(re.match(dm['resume_complete'], msg)):
+ data.dmesg['resume']['end'] = ktime
+ phase = 'resume_complete'
+ data.dmesg[phase]['start'] = ktime
+ # post resume start
+ elif(re.match(dm['post_resume'], msg)):
+ data.dmesg['resume_complete']['end'] = ktime
+ data.setEnd(ktime)
+ data.tKernRes = ktime
+ break
+
+ # -- device callbacks --
+ if(phase in data.phases):
+ # device init call
+ if(re.match('calling (?P<f>.*)\+ @ .*, parent: .*', msg)):
+ sm = re.match('calling (?P<f>.*)\+ @ '+\
+ '(?P<n>.*), parent: (?P<p>.*)', msg);
+ f = sm.group('f')
+ n = sm.group('n')
+ p = sm.group('p')
+ if(f and n and p):
+ data.newAction(phase, f, int(n), p, ktime, -1, '')
+ # device init return
+ elif(re.match('call (?P<f>.*)\+ returned .* after '+\
+ '(?P<t>.*) usecs', msg)):
+ sm = re.match('call (?P<f>.*)\+ returned .* after '+\
+ '(?P<t>.*) usecs(?P<a>.*)', msg);
+ f = sm.group('f')
+ t = sm.group('t')
+ list = data.dmesg[phase]['list']
+ if(f in list):
+ dev = list[f]
+ dev['length'] = int(t)
+ dev['end'] = ktime
+
+ # if trace events are not available, these are better than nothing
+ if(not sysvals.usetraceevents):
+ # look for known actions
+ for a in at:
+ if(re.match(at[a]['smsg'], msg)):
+ if(a not in actions):
+ actions[a] = []
+ actions[a].append({'begin': ktime, 'end': ktime})
+ if(re.match(at[a]['emsg'], msg)):
+ if(a in actions):
+ actions[a][-1]['end'] = ktime
+ # now look for CPU on/off events
+ if(re.match('Disabling non-boot CPUs .*', msg)):
+ # start of first cpu suspend
+ cpu_start = ktime
+ elif(re.match('Enabling non-boot CPUs .*', msg)):
+ # start of first cpu resume
+ cpu_start = ktime
+ elif(re.match('smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg)):
+ # end of a cpu suspend, start of the next
+ m = re.match('smpboot: CPU (?P<cpu>[0-9]*) is now offline', msg)
+ cpu = 'CPU'+m.group('cpu')
+ if(cpu not in actions):
+ actions[cpu] = []
+ actions[cpu].append({'begin': cpu_start, 'end': ktime})
+ cpu_start = ktime
+ elif(re.match('CPU(?P<cpu>[0-9]*) is up', msg)):
+ # end of a cpu resume, start of the next
+ m = re.match('CPU(?P<cpu>[0-9]*) is up', msg)
+ cpu = 'CPU'+m.group('cpu')
+ if(cpu not in actions):
+ actions[cpu] = []
+ actions[cpu].append({'begin': cpu_start, 'end': ktime})
+ cpu_start = ktime
+ prevktime = ktime
+
+ # fill in any missing phases
+ lp = data.phases[0]
+ for p in data.phases:
+ if(data.dmesg[p]['start'] < 0 and data.dmesg[p]['end'] < 0):
+ print('WARNING: phase "%s" is missing, something went wrong!' % p)
+ print(' In %s, this dmesg line denotes the start of %s:' % \
+ (sysvals.suspendmode, p))
+ print(' "%s"' % dm[p])
+ if(data.dmesg[p]['start'] < 0):
+ data.dmesg[p]['start'] = data.dmesg[lp]['end']
+ if(p == 'resume_machine'):
+ data.tSuspended = data.dmesg[lp]['end']
+ data.tResumed = data.dmesg[lp]['end']
+ data.tLow = 0
+ if(data.dmesg[p]['end'] < 0):
+ data.dmesg[p]['end'] = data.dmesg[p]['start']
+ lp = p
+
+ # fill in any actions we've found
+ for name in actions:
+ for event in actions[name]:
+ data.newActionGlobal(name, event['begin'], event['end'])
+
+ if(len(sysvals.devicefilter) > 0):
+ data.deviceFilter(sysvals.devicefilter)
+ data.fixupInitcallsThatDidntReturn()
+ return True
+
+def callgraphHTML(sv, hf, num, cg, title, color, devid):
+ html_func_top = '<article id="{0}" class="atop" style="background:{1}">\n<input type="checkbox" class="pf" id="f{2}" checked/><label for="f{2}">{3} {4}</label>\n'
+ html_func_start = '<article>\n<input type="checkbox" class="pf" id="f{0}" checked/><label for="f{0}">{1} {2}</label>\n'
+ html_func_end = '</article>\n'
+ html_func_leaf = '<article>{0} {1}</article>\n'
+
+ cgid = devid
+ if cg.id:
+ cgid += cg.id
+ cglen = (cg.end - cg.start) * 1000
+ if cglen < sv.mincglen:
+ return num
+
+ fmt = '<r>(%.3f ms @ '+sv.timeformat+' to '+sv.timeformat+')</r>'
+ flen = fmt % (cglen, cg.start, cg.end)
+ hf.write(html_func_top.format(cgid, color, num, title, flen))
+ num += 1
+ for line in cg.list:
+ if(line.length < 0.000000001):
+ flen = ''
+ else:
+ fmt = '<n>(%.3f ms @ '+sv.timeformat+')</n>'
+ flen = fmt % (line.length*1000, line.time)
+ if line.isLeaf():
+ hf.write(html_func_leaf.format(line.name, flen))
+ elif line.freturn:
+ hf.write(html_func_end)
+ else:
+ hf.write(html_func_start.format(num, line.name, flen))
+ num += 1
+ hf.write(html_func_end)
+ return num
+
+def addCallgraphs(sv, hf, data):
+ hf.write('<section id="callgraphs" class="callgraph">\n')
+ # write out the ftrace data converted to html
+ num = 0
+ for p in data.phases:
+ if sv.cgphase and p != sv.cgphase:
+ continue
+ list = data.dmesg[p]['list']
+ for devname in data.sortedDevices(p):
+ if len(sv.cgfilter) > 0 and devname not in sv.cgfilter:
+ continue
+ dev = list[devname]
+ color = 'white'
+ if 'color' in data.dmesg[p]:
+ color = data.dmesg[p]['color']
+ if 'color' in dev:
+ color = dev['color']
+ name = devname
+ if(devname in sv.devprops):
+ name = sv.devprops[devname].altName(devname)
+ if sv.suspendmode in suspendmodename:
+ name += ' '+p
+ if('ftrace' in dev):
+ cg = dev['ftrace']
+ num = callgraphHTML(sv, hf, num, cg,
+ name, color, dev['id'])
+ if('ftraces' in dev):
+ for cg in dev['ftraces']:
+ num = callgraphHTML(sv, hf, num, cg,
+ name+' &rarr; '+cg.name, color, dev['id'])
+ hf.write('\n\n </section>\n')
+
+# Function: createHTMLSummarySimple
+# Description:
+# Create summary html file for a series of tests
+# Arguments:
+# testruns: array of Data objects from parseTraceLog
+def createHTMLSummarySimple(testruns, htmlfile, folder):
+ # write the html header first (html head, css code, up to body start)
+ html = '<!DOCTYPE html>\n<html>\n<head>\n\
+ <meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\
+ <title>SleepGraph Summary</title>\n\
+ <style type=\'text/css\'>\n\
+ .stamp {width: 100%;text-align:center;background:#888;line-height:30px;color:white;font: 25px Arial;}\n\
+ table {width:100%;border-collapse: collapse;}\n\
+ .summary {border:1px solid;}\n\
+ th {border: 1px solid black;background:#222;color:white;}\n\
+ td {font: 16px "Times New Roman";text-align: center;}\n\
+ tr.head td {border: 1px solid black;background:#aaa;}\n\
+ tr.alt {background-color:#ddd;}\n\
+ tr.notice {color:red;}\n\
+ .minval {background-color:#BBFFBB;}\n\
+ .medval {background-color:#BBBBFF;}\n\
+ .maxval {background-color:#FFBBBB;}\n\
+ .head a {color:#000;text-decoration: none;}\n\
+ </style>\n</head>\n<body>\n'
+
+ # extract the test data into list
+ list = dict()
+ tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []]
+ iMin, iMed, iMax = [0, 0], [0, 0], [0, 0]
+ num = 0
+ lastmode = ''
+ cnt = {'pass':0, 'fail':0, 'hang':0}
+ for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])):
+ mode = data['mode']
+ if mode not in list:
+ list[mode] = {'data': [], 'avg': [0,0], 'min': [0,0], 'max': [0,0], 'med': [0,0]}
+ if lastmode and lastmode != mode and num > 0:
+ for i in range(2):
+ s = sorted(tMed[i])
+ list[lastmode]['med'][i] = s[int(len(s)/2)]
+ iMed[i] = tMed[i].index(list[lastmode]['med'][i])
+ list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num]
+ list[lastmode]['min'] = tMin
+ list[lastmode]['max'] = tMax
+ list[lastmode]['idx'] = (iMin, iMed, iMax)
+ tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [[], []]
+ iMin, iMed, iMax = [0, 0], [0, 0], [0, 0]
+ num = 0
+ tVal = [float(data['suspend']), float(data['resume'])]
+ list[mode]['data'].append([data['host'], data['kernel'],
+ data['time'], tVal[0], tVal[1], data['url'], data['result'],
+ data['issues']])
+ idx = len(list[mode]['data']) - 1
+ if data['result'] == 'pass':
+ cnt['pass'] += 1
+ for i in range(2):
+ tMed[i].append(tVal[i])
+ tAvg[i] += tVal[i]
+ if tMin[i] == 0 or tVal[i] < tMin[i]:
+ iMin[i] = idx
+ tMin[i] = tVal[i]
+ if tMax[i] == 0 or tVal[i] > tMax[i]:
+ iMax[i] = idx
+ tMax[i] = tVal[i]
+ num += 1
+ elif data['result'] == 'hang':
+ cnt['hang'] += 1
+ elif data['result'] == 'fail':
+ cnt['fail'] += 1
+ lastmode = mode
+ if lastmode and num > 0:
+ for i in range(2):
+ s = sorted(tMed[i])
+ list[lastmode]['med'][i] = s[int(len(s)/2)]
+ iMed[i] = tMed[i].index(list[lastmode]['med'][i])
+ list[lastmode]['avg'] = [tAvg[0] / num, tAvg[1] / num]
+ list[lastmode]['min'] = tMin
+ list[lastmode]['max'] = tMax
+ list[lastmode]['idx'] = (iMin, iMed, iMax)
+
+ # group test header
+ desc = []
+ for ilk in sorted(cnt, reverse=True):
+ if cnt[ilk] > 0:
+ desc.append('%d %s' % (cnt[ilk], ilk))
+ html += '<div class="stamp">%s (%d tests: %s)</div>\n' % (folder, len(testruns), ', '.join(desc))
+ th = '\t<th>{0}</th>\n'
+ td = '\t<td>{0}</td>\n'
+ tdh = '\t<td{1}>{0}</td>\n'
+ tdlink = '\t<td><a href="{0}">html</a></td>\n'
+
+ # table header
+ html += '<table class="summary">\n<tr>\n' + th.format('#') +\
+ th.format('Mode') + th.format('Host') + th.format('Kernel') +\
+ th.format('Test Time') + th.format('Result') + th.format('Issues') +\
+ th.format('Suspend') + th.format('Resume') + th.format('Detail') + '</tr>\n'
+
+ # export list into html
+ head = '<tr class="head"><td>{0}</td><td>{1}</td>'+\
+ '<td colspan=8 class="sus">Suspend Avg={2} '+\
+ '<span class=minval><a href="#s{10}min">Min={3}</a></span> '+\
+ '<span class=medval><a href="#s{10}med">Med={4}</a></span> '+\
+ '<span class=maxval><a href="#s{10}max">Max={5}</a></span> '+\
+ 'Resume Avg={6} '+\
+ '<span class=minval><a href="#r{10}min">Min={7}</a></span> '+\
+ '<span class=medval><a href="#r{10}med">Med={8}</a></span> '+\
+ '<span class=maxval><a href="#r{10}max">Max={9}</a></span></td>'+\
+ '</tr>\n'
+ headnone = '<tr class="head"><td>{0}</td><td>{1}</td><td colspan=8></td></tr>\n'
+ for mode in list:
+ # header line for each suspend mode
+ num = 0
+ tAvg, tMin, tMax, tMed = list[mode]['avg'], list[mode]['min'],\
+ list[mode]['max'], list[mode]['med']
+ count = len(list[mode]['data'])
+ if 'idx' in list[mode]:
+ iMin, iMed, iMax = list[mode]['idx']
+ html += head.format('%d' % count, mode.upper(),
+ '%.3f' % tAvg[0], '%.3f' % tMin[0], '%.3f' % tMed[0], '%.3f' % tMax[0],
+ '%.3f' % tAvg[1], '%.3f' % tMin[1], '%.3f' % tMed[1], '%.3f' % tMax[1],
+ mode.lower()
+ )
+ else:
+ iMin = iMed = iMax = [-1, -1, -1]
+ html += headnone.format('%d' % count, mode.upper())
+ for d in list[mode]['data']:
+ # row classes - alternate row color
+ rcls = ['alt'] if num % 2 == 1 else []
+ if d[6] != 'pass':
+ rcls.append('notice')
+ html += '<tr class="'+(' '.join(rcls))+'">\n' if len(rcls) > 0 else '<tr>\n'
+ # figure out if the line has sus or res highlighted
+ idx = list[mode]['data'].index(d)
+ tHigh = ['', '']
+ for i in range(2):
+ tag = 's%s' % mode if i == 0 else 'r%s' % mode
+ if idx == iMin[i]:
+ tHigh[i] = ' id="%smin" class=minval title="Minimum"' % tag
+ elif idx == iMax[i]:
+ tHigh[i] = ' id="%smax" class=maxval title="Maximum"' % tag
+ elif idx == iMed[i]:
+ tHigh[i] = ' id="%smed" class=medval title="Median"' % tag
+ html += td.format("%d" % (list[mode]['data'].index(d) + 1)) # row
+ html += td.format(mode) # mode
+ html += td.format(d[0]) # host
+ html += td.format(d[1]) # kernel
+ html += td.format(d[2]) # time
+ html += td.format(d[6]) # result
+ html += td.format(d[7]) # issues
+ html += tdh.format('%.3f ms' % d[3], tHigh[0]) if d[3] else td.format('') # suspend
+ html += tdh.format('%.3f ms' % d[4], tHigh[1]) if d[4] else td.format('') # resume
+ html += tdlink.format(d[5]) if d[5] else td.format('') # url
+ html += '</tr>\n'
+ num += 1
+
+ # flush the data to file
+ hf = open(htmlfile, 'w')
+ hf.write(html+'</table>\n</body>\n</html>\n')
+ hf.close()
+
+def ordinal(value):
+ suffix = 'th'
+ if value < 10 or value > 19:
+ if value % 10 == 1:
+ suffix = 'st'
+ elif value % 10 == 2:
+ suffix = 'nd'
+ elif value % 10 == 3:
+ suffix = 'rd'
+ return '%d%s' % (value, suffix)
+
+# Function: createHTML
+# Description:
+# Create the output html file from the resident test data
+# Arguments:
+# testruns: array of Data objects from parseKernelLog or parseTraceLog
+# Output:
+# True if the html file was created, false if it failed
+def createHTML(testruns, testfail):
+ if len(testruns) < 1:
+ print('ERROR: Not enough test data to build a timeline')
+ return
+
+ kerror = False
+ for data in testruns:
+ if data.kerror:
+ kerror = True
+ data.normalizeTime(testruns[-1].tSuspended)
+
+ # html function templates
+ html_error = '<div id="{1}" title="kernel error/warning" class="err" style="right:{0}%">{2}&rarr;</div>\n'
+ html_traceevent = '<div title="{0}" class="traceevent{6}" style="left:{1}%;top:{2}px;height:{3}px;width:{4}%;line-height:{3}px;{7}">{5}</div>\n'
+ html_cpuexec = '<div class="jiffie" style="left:{0}%;top:{1}px;height:{2}px;width:{3}%;background:{4};"></div>\n'
+ html_timetotal = '<table class="time1">\n<tr>'\
+ '<td class="green" title="{3}">{2} Suspend Time: <b>{0} ms</b></td>'\
+ '<td class="yellow" title="{4}">{2} Resume Time: <b>{1} ms</b></td>'\
+ '</tr>\n</table>\n'
+ html_timetotal2 = '<table class="time1">\n<tr>'\
+ '<td class="green" title="{4}">{3} Suspend Time: <b>{0} ms</b></td>'\
+ '<td class="gray" title="time spent in low-power mode with clock running">'+sysvals.suspendmode+' time: <b>{1} ms</b></td>'\
+ '<td class="yellow" title="{5}">{3} Resume Time: <b>{2} ms</b></td>'\
+ '</tr>\n</table>\n'
+ html_timetotal3 = '<table class="time1">\n<tr>'\
+ '<td class="green">Execution Time: <b>{0} ms</b></td>'\
+ '<td class="yellow">Command: <b>{1}</b></td>'\
+ '</tr>\n</table>\n'
+ html_timegroups = '<table class="time2">\n<tr>'\
+ '<td class="green" title="time from kernel enter_state({5}) to firmware mode [kernel time only]">{4}Kernel Suspend: {0} ms</td>'\
+ '<td class="purple">{4}Firmware Suspend: {1} ms</td>'\
+ '<td class="purple">{4}Firmware Resume: {2} ms</td>'\
+ '<td class="yellow" title="time from firmware mode to return from kernel enter_state({5}) [kernel time only]">{4}Kernel Resume: {3} ms</td>'\
+ '</tr>\n</table>\n'
+ html_fail = '<table class="testfail"><tr><td>{0}</td></tr></table>\n'
+
+ # html format variables
+ scaleH = 20
+ if kerror:
+ scaleH = 40
+
+ # device timeline
+ devtl = Timeline(30, scaleH)
+
+ # write the test title and general info header
+ devtl.createHeader(sysvals, testruns[0].stamp)
+
+ # Generate the header for this timeline
+ for data in testruns:
+ tTotal = data.end - data.start
+ sktime, rktime = data.getTimeValues()
+ if(tTotal == 0):
+ doError('No timeline data')
+ if(data.tLow > 0):
+ low_time = '%.0f'%(data.tLow*1000)
+ if sysvals.suspendmode == 'command':
+ run_time = '%.0f'%((data.end-data.start)*1000)
+ if sysvals.testcommand:
+ testdesc = sysvals.testcommand
+ else:
+ testdesc = 'unknown'
+ if(len(testruns) > 1):
+ testdesc = ordinal(data.testnumber+1)+' '+testdesc
+ thtml = html_timetotal3.format(run_time, testdesc)
+ devtl.html += thtml
+ elif data.fwValid:
+ suspend_time = '%.0f'%(sktime + (data.fwSuspend/1000000.0))
+ resume_time = '%.0f'%(rktime + (data.fwResume/1000000.0))
+ testdesc1 = 'Total'
+ testdesc2 = ''
+ stitle = 'time from kernel enter_state(%s) to low-power mode [kernel & firmware time]' % sysvals.suspendmode
+ rtitle = 'time from low-power mode to return from kernel enter_state(%s) [firmware & kernel time]' % sysvals.suspendmode
+ if(len(testruns) > 1):
+ testdesc1 = testdesc2 = ordinal(data.testnumber+1)
+ testdesc2 += ' '
+ if(data.tLow == 0):
+ thtml = html_timetotal.format(suspend_time, \
+ resume_time, testdesc1, stitle, rtitle)
+ else:
+ thtml = html_timetotal2.format(suspend_time, low_time, \
+ resume_time, testdesc1, stitle, rtitle)
+ devtl.html += thtml
+ sftime = '%.3f'%(data.fwSuspend / 1000000.0)
+ rftime = '%.3f'%(data.fwResume / 1000000.0)
+ devtl.html += html_timegroups.format('%.3f'%sktime, \
+ sftime, rftime, '%.3f'%rktime, testdesc2, sysvals.suspendmode)
+ else:
+ suspend_time = '%.3f' % sktime
+ resume_time = '%.3f' % rktime
+ testdesc = 'Kernel'
+ stitle = 'time from kernel enter_state(%s) to firmware mode [kernel time only]' % sysvals.suspendmode
+ rtitle = 'time from firmware mode to return from kernel enter_state(%s) [kernel time only]' % sysvals.suspendmode
+ if(len(testruns) > 1):
+ testdesc = ordinal(data.testnumber+1)+' '+testdesc
+ if(data.tLow == 0):
+ thtml = html_timetotal.format(suspend_time, \
+ resume_time, testdesc, stitle, rtitle)
+ else:
+ thtml = html_timetotal2.format(suspend_time, low_time, \
+ resume_time, testdesc, stitle, rtitle)
+ devtl.html += thtml
+
+ if testfail:
+ devtl.html += html_fail.format(testfail)
+
+ # time scale for potentially multiple datasets
+ t0 = testruns[0].start
+ tMax = testruns[-1].end
+ tTotal = tMax - t0
+
+ # determine the maximum number of rows we need to draw
+ fulllist = []
+ threadlist = []
+ pscnt = 0
+ devcnt = 0
+ for data in testruns:
+ data.selectTimelineDevices('%f', tTotal, sysvals.mindevlen)
+ for group in data.devicegroups:
+ devlist = []
+ for phase in group:
+ for devname in data.tdevlist[phase]:
+ d = DevItem(data.testnumber, phase, data.dmesg[phase]['list'][devname])
+ devlist.append(d)
+ if d.isa('kth'):
+ threadlist.append(d)
+ else:
+ if d.isa('ps'):
+ pscnt += 1
+ else:
+ devcnt += 1
+ fulllist.append(d)
+ if sysvals.mixedphaseheight:
+ devtl.getPhaseRows(devlist)
+ if not sysvals.mixedphaseheight:
+ if len(threadlist) > 0 and len(fulllist) > 0:
+ if pscnt > 0 and devcnt > 0:
+ msg = 'user processes & device pm callbacks'
+ elif pscnt > 0:
+ msg = 'user processes'
+ else:
+ msg = 'device pm callbacks'
+ d = testruns[0].addHorizontalDivider(msg, testruns[-1].end)
+ fulllist.insert(0, d)
+ devtl.getPhaseRows(fulllist)
+ if len(threadlist) > 0:
+ d = testruns[0].addHorizontalDivider('asynchronous kernel threads', testruns[-1].end)
+ threadlist.insert(0, d)
+ devtl.getPhaseRows(threadlist, devtl.rows)
+ devtl.calcTotalRows()
+
+ # draw the full timeline
+ devtl.createZoomBox(sysvals.suspendmode, len(testruns))
+ phases = {'suspend':[],'resume':[]}
+ for phase in data.dmesg:
+ if 'resume' in phase:
+ phases['resume'].append(phase)
+ else:
+ phases['suspend'].append(phase)
+
+ # draw each test run chronologically
+ for data in testruns:
+ # now draw the actual timeline blocks
+ for dir in phases:
+ # draw suspend and resume blocks separately
+ bname = '%s%d' % (dir[0], data.testnumber)
+ if dir == 'suspend':
+ m0 = data.start
+ mMax = data.tSuspended
+ left = '%f' % (((m0-t0)*100.0)/tTotal)
+ else:
+ m0 = data.tSuspended
+ mMax = data.end
+ # in an x2 run, remove any gap between blocks
+ if len(testruns) > 1 and data.testnumber == 0:
+ mMax = testruns[1].start
+ left = '%f' % ((((m0-t0)*100.0)+sysvals.srgap/2)/tTotal)
+ mTotal = mMax - m0
+ # if a timeline block is 0 length, skip altogether
+ if mTotal == 0:
+ continue
+ width = '%f' % (((mTotal*100.0)-sysvals.srgap/2)/tTotal)
+ devtl.html += devtl.html_tblock.format(bname, left, width, devtl.scaleH)
+ for b in sorted(phases[dir]):
+ # draw the phase color background
+ phase = data.dmesg[b]
+ length = phase['end']-phase['start']
+ left = '%f' % (((phase['start']-m0)*100.0)/mTotal)
+ width = '%f' % ((length*100.0)/mTotal)
+ devtl.html += devtl.html_phase.format(left, width, \
+ '%.3f'%devtl.scaleH, '%.3f'%devtl.bodyH, \
+ data.dmesg[b]['color'], '')
+ for e in data.errorinfo[dir]:
+ # draw red lines for any kernel errors found
+ type, t, idx1, idx2 = e
+ id = '%d_%d' % (idx1, idx2)
+ right = '%f' % (((mMax-t)*100.0)/mTotal)
+ devtl.html += html_error.format(right, id, type)
+ for b in sorted(phases[dir]):
+ # draw the devices for this phase
+ phaselist = data.dmesg[b]['list']
+ for d in data.tdevlist[b]:
+ name = d
+ drv = ''
+ dev = phaselist[d]
+ xtraclass = ''
+ xtrainfo = ''
+ xtrastyle = ''
+ if 'htmlclass' in dev:
+ xtraclass = dev['htmlclass']
+ if 'color' in dev:
+ xtrastyle = 'background:%s;' % dev['color']
+ if(d in sysvals.devprops):
+ name = sysvals.devprops[d].altName(d)
+ xtraclass = sysvals.devprops[d].xtraClass()
+ xtrainfo = sysvals.devprops[d].xtraInfo()
+ elif xtraclass == ' kth':
+ xtrainfo = ' kernel_thread'
+ if('drv' in dev and dev['drv']):
+ drv = ' {%s}' % dev['drv']
+ rowheight = devtl.phaseRowHeight(data.testnumber, b, dev['row'])
+ rowtop = devtl.phaseRowTop(data.testnumber, b, dev['row'])
+ top = '%.3f' % (rowtop + devtl.scaleH)
+ left = '%f' % (((dev['start']-m0)*100)/mTotal)
+ width = '%f' % (((dev['end']-dev['start'])*100)/mTotal)
+ length = ' (%0.3f ms) ' % ((dev['end']-dev['start'])*1000)
+ title = name+drv+xtrainfo+length
+ if sysvals.suspendmode == 'command':
+ title += sysvals.testcommand
+ elif xtraclass == ' ps':
+ if 'suspend' in b:
+ title += 'pre_suspend_process'
+ else:
+ title += 'post_resume_process'
+ else:
+ title += b
+ devtl.html += devtl.html_device.format(dev['id'], \
+ title, left, top, '%.3f'%rowheight, width, \
+ d+drv, xtraclass, xtrastyle)
+ if('cpuexec' in dev):
+ for t in sorted(dev['cpuexec']):
+ start, end = t
+ j = float(dev['cpuexec'][t]) / 5
+ if j > 1.0:
+ j = 1.0
+ height = '%.3f' % (rowheight/3)
+ top = '%.3f' % (rowtop + devtl.scaleH + 2*rowheight/3)
+ left = '%f' % (((start-m0)*100)/mTotal)
+ width = '%f' % ((end-start)*100/mTotal)
+ color = 'rgba(255, 0, 0, %f)' % j
+ devtl.html += \
+ html_cpuexec.format(left, top, height, width, color)
+ if('src' not in dev):
+ continue
+ # draw any trace events for this device
+ for e in dev['src']:
+ height = '%.3f' % devtl.rowH
+ top = '%.3f' % (rowtop + devtl.scaleH + (e.row*devtl.rowH))
+ left = '%f' % (((e.time-m0)*100)/mTotal)
+ width = '%f' % (e.length*100/mTotal)
+ xtrastyle = ''
+ if e.color:
+ xtrastyle = 'background:%s;' % e.color
+ devtl.html += \
+ html_traceevent.format(e.title(), \
+ left, top, height, width, e.text(), '', xtrastyle)
+ # draw the time scale, try to make the number of labels readable
+ devtl.createTimeScale(m0, mMax, tTotal, dir)
+ devtl.html += '</div>\n'
+
+ # timeline is finished
+ devtl.html += '</div>\n</div>\n'
+
+ # draw a legend which describes the phases by color
+ if sysvals.suspendmode != 'command':
+ data = testruns[-1]
+ devtl.html += '<div class="legend">\n'
+ pdelta = 100.0/len(data.phases)
+ pmargin = pdelta / 4.0
+ for phase in data.phases:
+ tmp = phase.split('_')
+ id = tmp[0][0]
+ if(len(tmp) > 1):
+ id += tmp[1][0]
+ order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin)
+ name = string.replace(phase, '_', ' &nbsp;')
+ devtl.html += devtl.html_legend.format(order, \
+ data.dmesg[phase]['color'], name, id)
+ devtl.html += '</div>\n'
+
+ hf = open(sysvals.htmlfile, 'w')
+ addCSS(hf, sysvals, len(testruns), kerror)
+
+ # write the device timeline
+ hf.write(devtl.html)
+ hf.write('<div id="devicedetailtitle"></div>\n')
+ hf.write('<div id="devicedetail" style="display:none;">\n')
+ # draw the colored boxes for the device detail section
+ for data in testruns:
+ hf.write('<div id="devicedetail%d">\n' % data.testnumber)
+ pscolor = 'linear-gradient(to top left, #ccc, #eee)'
+ hf.write(devtl.html_phaselet.format('pre_suspend_process', \
+ '0', '0', pscolor))
+ for b in data.phases:
+ phase = data.dmesg[b]
+ length = phase['end']-phase['start']
+ left = '%.3f' % (((phase['start']-t0)*100.0)/tTotal)
+ width = '%.3f' % ((length*100.0)/tTotal)
+ hf.write(devtl.html_phaselet.format(b, left, width, \
+ data.dmesg[b]['color']))
+ hf.write(devtl.html_phaselet.format('post_resume_process', \
+ '0', '0', pscolor))
+ if sysvals.suspendmode == 'command':
+ hf.write(devtl.html_phaselet.format('cmdexec', '0', '0', pscolor))
+ hf.write('</div>\n')
+ hf.write('</div>\n')
+
+ # write the ftrace data (callgraph)
+ if sysvals.cgtest >= 0 and len(testruns) > sysvals.cgtest:
+ data = testruns[sysvals.cgtest]
+ else:
+ data = testruns[-1]
+ if sysvals.usecallgraph:
+ addCallgraphs(sysvals, hf, data)
+
+ # add the test log as a hidden div
+ if sysvals.testlog and sysvals.logmsg:
+ hf.write('<div id="testlog" style="display:none;">\n'+sysvals.logmsg+'</div>\n')
+ # add the dmesg log as a hidden div
+ if sysvals.dmesglog and sysvals.dmesgfile:
+ hf.write('<div id="dmesglog" style="display:none;">\n')
+ lf = sysvals.openlog(sysvals.dmesgfile, 'r')
+ for line in lf:
+ line = line.replace('<', '&lt').replace('>', '&gt')
+ hf.write(line)
+ lf.close()
+ hf.write('</div>\n')
+ # add the ftrace log as a hidden div
+ if sysvals.ftracelog and sysvals.ftracefile:
+ hf.write('<div id="ftracelog" style="display:none;">\n')
+ lf = sysvals.openlog(sysvals.ftracefile, 'r')
+ for line in lf:
+ hf.write(line)
+ lf.close()
+ hf.write('</div>\n')
+
+ # write the footer and close
+ addScriptCode(hf, testruns)
+ hf.write('</body>\n</html>\n')
+ hf.close()
+ return True
+
+def addCSS(hf, sv, testcount=1, kerror=False, extra=''):
+ kernel = sv.stamp['kernel']
+ host = sv.hostname[0].upper()+sv.hostname[1:]
+ mode = sv.suspendmode
+ if sv.suspendmode in suspendmodename:
+ mode = suspendmodename[sv.suspendmode]
+ title = host+' '+mode+' '+kernel
+
+ # various format changes by flags
+ cgchk = 'checked'
+ cgnchk = 'not(:checked)'
+ if sv.cgexp:
+ cgchk = 'not(:checked)'
+ cgnchk = 'checked'
+
+ hoverZ = 'z-index:8;'
+ if sv.usedevsrc:
+ hoverZ = ''
+
+ devlistpos = 'absolute'
+ if testcount > 1:
+ devlistpos = 'relative'
+
+ scaleTH = 20
+ if kerror:
+ scaleTH = 60
+
+ # write the html header first (html head, css code, up to body start)
+ html_header = '<!DOCTYPE html>\n<html>\n<head>\n\
+ <meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\
+ <title>'+title+'</title>\n\
+ <style type=\'text/css\'>\n\
+ body {overflow-y:scroll;}\n\
+ .stamp {width:100%;text-align:center;background:gray;line-height:30px;color:white;font:25px Arial;}\n\
+ .stamp.sysinfo {font:10px Arial;}\n\
+ .callgraph {margin-top:30px;box-shadow:5px 5px 20px black;}\n\
+ .callgraph article * {padding-left:28px;}\n\
+ h1 {color:black;font:bold 30px Times;}\n\
+ t0 {color:black;font:bold 30px Times;}\n\
+ t1 {color:black;font:30px Times;}\n\
+ t2 {color:black;font:25px Times;}\n\
+ t3 {color:black;font:20px Times;white-space:nowrap;}\n\
+ t4 {color:black;font:bold 30px Times;line-height:60px;white-space:nowrap;}\n\
+ cS {font:bold 13px Times;}\n\
+ table {width:100%;}\n\
+ .gray {background:rgba(80,80,80,0.1);}\n\
+ .green {background:rgba(204,255,204,0.4);}\n\
+ .purple {background:rgba(128,0,128,0.2);}\n\
+ .yellow {background:rgba(255,255,204,0.4);}\n\
+ .blue {background:rgba(169,208,245,0.4);}\n\
+ .time1 {font:22px Arial;border:1px solid;}\n\
+ .time2 {font:15px Arial;border-bottom:1px solid;border-left:1px solid;border-right:1px solid;}\n\
+ .testfail {font:bold 22px Arial;color:red;border:1px dashed;}\n\
+ td {text-align:center;}\n\
+ r {color:#500000;font:15px Tahoma;}\n\
+ n {color:#505050;font:15px Tahoma;}\n\
+ .tdhl {color:red;}\n\
+ .hide {display:none;}\n\
+ .pf {display:none;}\n\
+ .pf:'+cgchk+' + label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/><rect x="8" y="4" width="2" height="10" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
+ .pf:'+cgnchk+' ~ label {background:url(\'data:image/svg+xml;utf,<?xml version="1.0" standalone="no"?><svg xmlns="http://www.w3.org/2000/svg" height="18" width="18" version="1.1"><circle cx="9" cy="9" r="8" stroke="black" stroke-width="1" fill="white"/><rect x="4" y="8" width="10" height="2" style="fill:black;stroke-width:0"/></svg>\') no-repeat left center;}\n\
+ .pf:'+cgchk+' ~ *:not(:nth-child(2)) {display:none;}\n\
+ .zoombox {position:relative;width:100%;overflow-x:scroll;-webkit-user-select:none;-moz-user-select:none;user-select:none;}\n\
+ .timeline {position:relative;font-size:14px;cursor:pointer;width:100%; overflow:hidden;background:linear-gradient(#cccccc, white);}\n\
+ .thread {position:absolute;height:0%;overflow:hidden;z-index:7;line-height:30px;font-size:14px;border:1px solid;text-align:center;white-space:nowrap;}\n\
+ .thread.ps {border-radius:3px;background:linear-gradient(to top, #ccc, #eee);}\n\
+ .thread:hover {background:white;border:1px solid red;'+hoverZ+'}\n\
+ .thread.sec,.thread.sec:hover {background:black;border:0;color:white;line-height:15px;font-size:10px;}\n\
+ .hover {background:white;border:1px solid red;'+hoverZ+'}\n\
+ .hover.sync {background:white;}\n\
+ .hover.bg,.hover.kth,.hover.sync,.hover.ps {background:white;}\n\
+ .jiffie {position:absolute;pointer-events: none;z-index:8;}\n\
+ .traceevent {position:absolute;font-size:10px;z-index:7;overflow:hidden;color:black;text-align:center;white-space:nowrap;border-radius:5px;border:1px solid black;background:linear-gradient(to bottom right,#CCC,#969696);}\n\
+ .traceevent:hover {color:white;font-weight:bold;border:1px solid white;}\n\
+ .phase {position:absolute;overflow:hidden;border:0px;text-align:center;}\n\
+ .phaselet {float:left;overflow:hidden;border:0px;text-align:center;min-height:100px;font-size:24px;}\n\
+ .t {position:absolute;line-height:'+('%d'%scaleTH)+'px;pointer-events:none;top:0;height:100%;border-right:1px solid black;z-index:6;}\n\
+ .err {position:absolute;top:0%;height:100%;border-right:3px solid red;color:red;font:bold 14px Times;line-height:18px;}\n\
+ .legend {position:relative; width:100%; height:40px; text-align:center;margin-bottom:20px}\n\
+ .legend .square {position:absolute;cursor:pointer;top:10px; width:0px;height:20px;border:1px solid;padding-left:20px;}\n\
+ button {height:40px;width:200px;margin-bottom:20px;margin-top:20px;font-size:24px;}\n\
+ .btnfmt {position:relative;float:right;height:25px;width:auto;margin-top:3px;margin-bottom:0;font-size:10px;text-align:center;}\n\
+ .devlist {position:'+devlistpos+';width:190px;}\n\
+ a:link {color:white;text-decoration:none;}\n\
+ a:visited {color:white;}\n\
+ a:hover {color:white;}\n\
+ a:active {color:white;}\n\
+ .version {position:relative;float:left;color:white;font-size:10px;line-height:30px;margin-left:10px;}\n\
+ #devicedetail {min-height:100px;box-shadow:5px 5px 20px black;}\n\
+ .tblock {position:absolute;height:100%;background:#ddd;}\n\
+ .tback {position:absolute;width:100%;background:linear-gradient(#ccc, #ddd);}\n\
+ .bg {z-index:1;}\n\
+'+extra+'\
+ </style>\n</head>\n<body>\n'
+ hf.write(html_header)
+
+# Function: addScriptCode
+# Description:
+# Adds the javascript code to the output html
+# Arguments:
+# hf: the open html file pointer
+# testruns: array of Data objects from parseKernelLog or parseTraceLog
+def addScriptCode(hf, testruns):
+ t0 = testruns[0].start * 1000
+ tMax = testruns[-1].end * 1000
+ # create an array in javascript memory with the device details
+ detail = ' var devtable = [];\n'
+ for data in testruns:
+ topo = data.deviceTopology()
+ detail += ' devtable[%d] = "%s";\n' % (data.testnumber, topo)
+ detail += ' var bounds = [%f,%f];\n' % (t0, tMax)
+ # add the code which will manipulate the data in the browser
+ script_code = \
+ '<script type="text/javascript">\n'+detail+\
+ ' var resolution = -1;\n'\
+ ' var dragval = [0, 0];\n'\
+ ' function redrawTimescale(t0, tMax, tS) {\n'\
+ ' var rline = \'<div class="t" style="left:0;border-left:1px solid black;border-right:0;">\';\n'\
+ ' var tTotal = tMax - t0;\n'\
+ ' var list = document.getElementsByClassName("tblock");\n'\
+ ' for (var i = 0; i < list.length; i++) {\n'\
+ ' var timescale = list[i].getElementsByClassName("timescale")[0];\n'\
+ ' var m0 = t0 + (tTotal*parseFloat(list[i].style.left)/100);\n'\
+ ' var mTotal = tTotal*parseFloat(list[i].style.width)/100;\n'\
+ ' var mMax = m0 + mTotal;\n'\
+ ' var html = "";\n'\
+ ' var divTotal = Math.floor(mTotal/tS) + 1;\n'\
+ ' if(divTotal > 1000) continue;\n'\
+ ' var divEdge = (mTotal - tS*(divTotal-1))*100/mTotal;\n'\
+ ' var pos = 0.0, val = 0.0;\n'\
+ ' for (var j = 0; j < divTotal; j++) {\n'\
+ ' var htmlline = "";\n'\
+ ' var mode = list[i].id[5];\n'\
+ ' if(mode == "s") {\n'\
+ ' pos = 100 - (((j)*tS*100)/mTotal) - divEdge;\n'\
+ ' val = (j-divTotal+1)*tS;\n'\
+ ' if(j == divTotal - 1)\n'\
+ ' htmlline = \'<div class="t" style="right:\'+pos+\'%"><cS>S&rarr;</cS></div>\';\n'\
+ ' else\n'\
+ ' htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\
+ ' } else {\n'\
+ ' pos = 100 - (((j)*tS*100)/mTotal);\n'\
+ ' val = (j)*tS;\n'\
+ ' htmlline = \'<div class="t" style="right:\'+pos+\'%">\'+val+\'ms</div>\';\n'\
+ ' if(j == 0)\n'\
+ ' if(mode == "r")\n'\
+ ' htmlline = rline+"<cS>&larr;R</cS></div>";\n'\
+ ' else\n'\
+ ' htmlline = rline+"<cS>0ms</div>";\n'\
+ ' }\n'\
+ ' html += htmlline;\n'\
+ ' }\n'\
+ ' timescale.innerHTML = html;\n'\
+ ' }\n'\
+ ' }\n'\
+ ' function zoomTimeline() {\n'\
+ ' var dmesg = document.getElementById("dmesg");\n'\
+ ' var zoombox = document.getElementById("dmesgzoombox");\n'\
+ ' var left = zoombox.scrollLeft;\n'\
+ ' var val = parseFloat(dmesg.style.width);\n'\
+ ' var newval = 100;\n'\
+ ' var sh = window.outerWidth / 2;\n'\
+ ' if(this.id == "zoomin") {\n'\
+ ' newval = val * 1.2;\n'\
+ ' if(newval > 910034) newval = 910034;\n'\
+ ' dmesg.style.width = newval+"%";\n'\
+ ' zoombox.scrollLeft = ((left + sh) * newval / val) - sh;\n'\
+ ' } else if (this.id == "zoomout") {\n'\
+ ' newval = val / 1.2;\n'\
+ ' if(newval < 100) newval = 100;\n'\
+ ' dmesg.style.width = newval+"%";\n'\
+ ' zoombox.scrollLeft = ((left + sh) * newval / val) - sh;\n'\
+ ' } else {\n'\
+ ' zoombox.scrollLeft = 0;\n'\
+ ' dmesg.style.width = "100%";\n'\
+ ' }\n'\
+ ' var tS = [10000, 5000, 2000, 1000, 500, 200, 100, 50, 20, 10, 5, 2, 1];\n'\
+ ' var t0 = bounds[0];\n'\
+ ' var tMax = bounds[1];\n'\
+ ' var tTotal = tMax - t0;\n'\
+ ' var wTotal = tTotal * 100.0 / newval;\n'\
+ ' var idx = 7*window.innerWidth/1100;\n'\
+ ' for(var i = 0; (i < tS.length)&&((wTotal / tS[i]) < idx); i++);\n'\
+ ' if(i >= tS.length) i = tS.length - 1;\n'\
+ ' if(tS[i] == resolution) return;\n'\
+ ' resolution = tS[i];\n'\
+ ' redrawTimescale(t0, tMax, tS[i]);\n'\
+ ' }\n'\
+ ' function deviceName(title) {\n'\
+ ' var name = title.slice(0, title.indexOf(" ("));\n'\
+ ' return name;\n'\
+ ' }\n'\
+ ' function deviceHover() {\n'\
+ ' var name = deviceName(this.title);\n'\
+ ' var dmesg = document.getElementById("dmesg");\n'\
+ ' var dev = dmesg.getElementsByClassName("thread");\n'\
+ ' var cpu = -1;\n'\
+ ' if(name.match("CPU_ON\[[0-9]*\]"))\n'\
+ ' cpu = parseInt(name.slice(7));\n'\
+ ' else if(name.match("CPU_OFF\[[0-9]*\]"))\n'\
+ ' cpu = parseInt(name.slice(8));\n'\
+ ' for (var i = 0; i < dev.length; i++) {\n'\
+ ' dname = deviceName(dev[i].title);\n'\
+ ' var cname = dev[i].className.slice(dev[i].className.indexOf("thread"));\n'\
+ ' if((cpu >= 0 && dname.match("CPU_O[NF]*\\\[*"+cpu+"\\\]")) ||\n'\
+ ' (name == dname))\n'\
+ ' {\n'\
+ ' dev[i].className = "hover "+cname;\n'\
+ ' } else {\n'\
+ ' dev[i].className = cname;\n'\
+ ' }\n'\
+ ' }\n'\
+ ' }\n'\
+ ' function deviceUnhover() {\n'\
+ ' var dmesg = document.getElementById("dmesg");\n'\
+ ' var dev = dmesg.getElementsByClassName("thread");\n'\
+ ' for (var i = 0; i < dev.length; i++) {\n'\
+ ' dev[i].className = dev[i].className.slice(dev[i].className.indexOf("thread"));\n'\
+ ' }\n'\
+ ' }\n'\
+ ' function deviceTitle(title, total, cpu) {\n'\
+ ' var prefix = "Total";\n'\
+ ' if(total.length > 3) {\n'\
+ ' prefix = "Average";\n'\
+ ' total[1] = (total[1]+total[3])/2;\n'\
+ ' total[2] = (total[2]+total[4])/2;\n'\
+ ' }\n'\
+ ' var devtitle = document.getElementById("devicedetailtitle");\n'\
+ ' var name = deviceName(title);\n'\
+ ' if(cpu >= 0) name = "CPU"+cpu;\n'\
+ ' var driver = "";\n'\
+ ' var tS = "<t2>(</t2>";\n'\
+ ' var tR = "<t2>)</t2>";\n'\
+ ' if(total[1] > 0)\n'\
+ ' tS = "<t2>("+prefix+" Suspend:</t2><t0> "+total[1].toFixed(3)+" ms</t0> ";\n'\
+ ' if(total[2] > 0)\n'\
+ ' tR = " <t2>"+prefix+" Resume:</t2><t0> "+total[2].toFixed(3)+" ms<t2>)</t2></t0>";\n'\
+ ' var s = title.indexOf("{");\n'\
+ ' var e = title.indexOf("}");\n'\
+ ' if((s >= 0) && (e >= 0))\n'\
+ ' driver = title.slice(s+1, e) + " <t1>@</t1> ";\n'\
+ ' if(total[1] > 0 && total[2] > 0)\n'\
+ ' devtitle.innerHTML = "<t0>"+driver+name+"</t0> "+tS+tR;\n'\
+ ' else\n'\
+ ' devtitle.innerHTML = "<t0>"+title+"</t0>";\n'\
+ ' return name;\n'\
+ ' }\n'\
+ ' function deviceDetail() {\n'\
+ ' var devinfo = document.getElementById("devicedetail");\n'\
+ ' devinfo.style.display = "block";\n'\
+ ' var name = deviceName(this.title);\n'\
+ ' var cpu = -1;\n'\
+ ' if(name.match("CPU_ON\[[0-9]*\]"))\n'\
+ ' cpu = parseInt(name.slice(7));\n'\
+ ' else if(name.match("CPU_OFF\[[0-9]*\]"))\n'\
+ ' cpu = parseInt(name.slice(8));\n'\
+ ' var dmesg = document.getElementById("dmesg");\n'\
+ ' var dev = dmesg.getElementsByClassName("thread");\n'\
+ ' var idlist = [];\n'\
+ ' var pdata = [[]];\n'\
+ ' if(document.getElementById("devicedetail1"))\n'\
+ ' pdata = [[], []];\n'\
+ ' var pd = pdata[0];\n'\
+ ' var total = [0.0, 0.0, 0.0];\n'\
+ ' for (var i = 0; i < dev.length; i++) {\n'\
+ ' dname = deviceName(dev[i].title);\n'\
+ ' if((cpu >= 0 && dname.match("CPU_O[NF]*\\\[*"+cpu+"\\\]")) ||\n'\
+ ' (name == dname))\n'\
+ ' {\n'\
+ ' idlist[idlist.length] = dev[i].id;\n'\
+ ' var tidx = 1;\n'\
+ ' if(dev[i].id[0] == "a") {\n'\
+ ' pd = pdata[0];\n'\
+ ' } else {\n'\
+ ' if(pdata.length == 1) pdata[1] = [];\n'\
+ ' if(total.length == 3) total[3]=total[4]=0.0;\n'\
+ ' pd = pdata[1];\n'\
+ ' tidx = 3;\n'\
+ ' }\n'\
+ ' var info = dev[i].title.split(" ");\n'\
+ ' var pname = info[info.length-1];\n'\
+ ' pd[pname] = parseFloat(info[info.length-3].slice(1));\n'\
+ ' total[0] += pd[pname];\n'\
+ ' if(pname.indexOf("suspend") >= 0)\n'\
+ ' total[tidx] += pd[pname];\n'\
+ ' else\n'\
+ ' total[tidx+1] += pd[pname];\n'\
+ ' }\n'\
+ ' }\n'\
+ ' var devname = deviceTitle(this.title, total, cpu);\n'\
+ ' var left = 0.0;\n'\
+ ' for (var t = 0; t < pdata.length; t++) {\n'\
+ ' pd = pdata[t];\n'\
+ ' devinfo = document.getElementById("devicedetail"+t);\n'\
+ ' var phases = devinfo.getElementsByClassName("phaselet");\n'\
+ ' for (var i = 0; i < phases.length; i++) {\n'\
+ ' if(phases[i].id in pd) {\n'\
+ ' var w = 100.0*pd[phases[i].id]/total[0];\n'\
+ ' var fs = 32;\n'\
+ ' if(w < 8) fs = 4*w | 0;\n'\
+ ' var fs2 = fs*3/4;\n'\
+ ' phases[i].style.width = w+"%";\n'\
+ ' phases[i].style.left = left+"%";\n'\
+ ' phases[i].title = phases[i].id+" "+pd[phases[i].id]+" ms";\n'\
+ ' left += w;\n'\
+ ' var time = "<t4 style=\\"font-size:"+fs+"px\\">"+pd[phases[i].id]+" ms<br></t4>";\n'\
+ ' var pname = "<t3 style=\\"font-size:"+fs2+"px\\">"+phases[i].id.replace(new RegExp("_", "g"), " ")+"</t3>";\n'\
+ ' phases[i].innerHTML = time+pname;\n'\
+ ' } else {\n'\
+ ' phases[i].style.width = "0%";\n'\
+ ' phases[i].style.left = left+"%";\n'\
+ ' }\n'\
+ ' }\n'\
+ ' }\n'\
+ ' if(typeof devstats !== \'undefined\')\n'\
+ ' callDetail(this.id, this.title);\n'\
+ ' var cglist = document.getElementById("callgraphs");\n'\
+ ' if(!cglist) return;\n'\
+ ' var cg = cglist.getElementsByClassName("atop");\n'\
+ ' if(cg.length < 10) return;\n'\
+ ' for (var i = 0; i < cg.length; i++) {\n'\
+ ' cgid = cg[i].id.split("x")[0]\n'\
+ ' if(idlist.indexOf(cgid) >= 0) {\n'\
+ ' cg[i].style.display = "block";\n'\
+ ' } else {\n'\
+ ' cg[i].style.display = "none";\n'\
+ ' }\n'\
+ ' }\n'\
+ ' }\n'\
+ ' function callDetail(devid, devtitle) {\n'\
+ ' if(!(devid in devstats) || devstats[devid].length < 1)\n'\
+ ' return;\n'\
+ ' var list = devstats[devid];\n'\
+ ' var tmp = devtitle.split(" ");\n'\
+ ' var name = tmp[0], phase = tmp[tmp.length-1];\n'\
+ ' var dd = document.getElementById(phase);\n'\
+ ' var total = parseFloat(tmp[1].slice(1));\n'\
+ ' var mlist = [];\n'\
+ ' var maxlen = 0;\n'\
+ ' var info = []\n'\
+ ' for(var i in list) {\n'\
+ ' if(list[i][0] == "@") {\n'\
+ ' info = list[i].split("|");\n'\
+ ' continue;\n'\
+ ' }\n'\
+ ' var tmp = list[i].split("|");\n'\
+ ' var t = parseFloat(tmp[0]), f = tmp[1], c = parseInt(tmp[2]);\n'\
+ ' var p = (t*100.0/total).toFixed(2);\n'\
+ ' mlist[mlist.length] = [f, c, t.toFixed(2), p+"%"];\n'\
+ ' if(f.length > maxlen)\n'\
+ ' maxlen = f.length;\n'\
+ ' }\n'\
+ ' var pad = 5;\n'\
+ ' if(mlist.length == 0) pad = 30;\n'\
+ ' var html = \'<div style="padding-top:\'+pad+\'px"><t3> <b>\'+name+\':</b>\';\n'\
+ ' if(info.length > 2)\n'\
+ ' html += " start=<b>"+info[1]+"</b>, end=<b>"+info[2]+"</b>";\n'\
+ ' if(info.length > 3)\n'\
+ ' html += ", length<i>(w/o overhead)</i>=<b>"+info[3]+" ms</b>";\n'\
+ ' if(info.length > 4)\n'\
+ ' html += ", return=<b>"+info[4]+"</b>";\n'\
+ ' html += "</t3></div>";\n'\
+ ' if(mlist.length > 0) {\n'\
+ ' html += \'<table class=fstat style="padding-top:\'+(maxlen*5)+\'px;"><tr><th>Function</th>\';\n'\
+ ' for(var i in mlist)\n'\
+ ' html += "<td class=vt>"+mlist[i][0]+"</td>";\n'\
+ ' html += "</tr><tr><th>Calls</th>";\n'\
+ ' for(var i in mlist)\n'\
+ ' html += "<td>"+mlist[i][1]+"</td>";\n'\
+ ' html += "</tr><tr><th>Time(ms)</th>";\n'\
+ ' for(var i in mlist)\n'\
+ ' html += "<td>"+mlist[i][2]+"</td>";\n'\
+ ' html += "</tr><tr><th>Percent</th>";\n'\
+ ' for(var i in mlist)\n'\
+ ' html += "<td>"+mlist[i][3]+"</td>";\n'\
+ ' html += "</tr></table>";\n'\
+ ' }\n'\
+ ' dd.innerHTML = html;\n'\
+ ' var height = (maxlen*5)+100;\n'\
+ ' dd.style.height = height+"px";\n'\
+ ' document.getElementById("devicedetail").style.height = height+"px";\n'\
+ ' }\n'\
+ ' function callSelect() {\n'\
+ ' var cglist = document.getElementById("callgraphs");\n'\
+ ' if(!cglist) return;\n'\
+ ' var cg = cglist.getElementsByClassName("atop");\n'\
+ ' for (var i = 0; i < cg.length; i++) {\n'\
+ ' if(this.id == cg[i].id) {\n'\
+ ' cg[i].style.display = "block";\n'\
+ ' } else {\n'\
+ ' cg[i].style.display = "none";\n'\
+ ' }\n'\
+ ' }\n'\
+ ' }\n'\
+ ' function devListWindow(e) {\n'\
+ ' var win = window.open();\n'\
+ ' var html = "<title>"+e.target.innerHTML+"</title>"+\n'\
+ ' "<style type=\\"text/css\\">"+\n'\
+ ' " ul {list-style-type:circle;padding-left:10px;margin-left:10px;}"+\n'\
+ ' "</style>"\n'\
+ ' var dt = devtable[0];\n'\
+ ' if(e.target.id != "devlist1")\n'\
+ ' dt = devtable[1];\n'\
+ ' win.document.write(html+dt);\n'\
+ ' }\n'\
+ ' function errWindow() {\n'\
+ ' var range = this.id.split("_");\n'\
+ ' var idx1 = parseInt(range[0]);\n'\
+ ' var idx2 = parseInt(range[1]);\n'\
+ ' var win = window.open();\n'\
+ ' var log = document.getElementById("dmesglog");\n'\
+ ' var title = "<title>dmesg log</title>";\n'\
+ ' var text = log.innerHTML.split("\\n");\n'\
+ ' var html = "";\n'\
+ ' for(var i = 0; i < text.length; i++) {\n'\
+ ' if(i == idx1) {\n'\
+ ' html += "<e id=target>"+text[i]+"</e>\\n";\n'\
+ ' } else if(i > idx1 && i <= idx2) {\n'\
+ ' html += "<e>"+text[i]+"</e>\\n";\n'\
+ ' } else {\n'\
+ ' html += text[i]+"\\n";\n'\
+ ' }\n'\
+ ' }\n'\
+ ' win.document.write("<style>e{color:red}</style>"+title+"<pre>"+html+"</pre>");\n'\
+ ' win.location.hash = "#target";\n'\
+ ' win.document.close();\n'\
+ ' }\n'\
+ ' function logWindow(e) {\n'\
+ ' var name = e.target.id.slice(4);\n'\
+ ' var win = window.open();\n'\
+ ' var log = document.getElementById(name+"log");\n'\
+ ' var title = "<title>"+document.title.split(" ")[0]+" "+name+" log</title>";\n'\
+ ' win.document.write(title+"<pre>"+log.innerHTML+"</pre>");\n'\
+ ' win.document.close();\n'\
+ ' }\n'\
+ ' function onMouseDown(e) {\n'\
+ ' dragval[0] = e.clientX;\n'\
+ ' dragval[1] = document.getElementById("dmesgzoombox").scrollLeft;\n'\
+ ' document.onmousemove = onMouseMove;\n'\
+ ' }\n'\
+ ' function onMouseMove(e) {\n'\
+ ' var zoombox = document.getElementById("dmesgzoombox");\n'\
+ ' zoombox.scrollLeft = dragval[1] + dragval[0] - e.clientX;\n'\
+ ' }\n'\
+ ' function onMouseUp(e) {\n'\
+ ' document.onmousemove = null;\n'\
+ ' }\n'\
+ ' function onKeyPress(e) {\n'\
+ ' var c = e.charCode;\n'\
+ ' if(c != 42 && c != 43 && c != 45) return;\n'\
+ ' var click = document.createEvent("Events");\n'\
+ ' click.initEvent("click", true, false);\n'\
+ ' if(c == 43) \n'\
+ ' document.getElementById("zoomin").dispatchEvent(click);\n'\
+ ' else if(c == 45)\n'\
+ ' document.getElementById("zoomout").dispatchEvent(click);\n'\
+ ' else if(c == 42)\n'\
+ ' document.getElementById("zoomdef").dispatchEvent(click);\n'\
+ ' }\n'\
+ ' window.addEventListener("resize", function () {zoomTimeline();});\n'\
+ ' window.addEventListener("load", function () {\n'\
+ ' var dmesg = document.getElementById("dmesg");\n'\
+ ' dmesg.style.width = "100%"\n'\
+ ' dmesg.onmousedown = onMouseDown;\n'\
+ ' document.onmouseup = onMouseUp;\n'\
+ ' document.onkeypress = onKeyPress;\n'\
+ ' document.getElementById("zoomin").onclick = zoomTimeline;\n'\
+ ' document.getElementById("zoomout").onclick = zoomTimeline;\n'\
+ ' document.getElementById("zoomdef").onclick = zoomTimeline;\n'\
+ ' var list = document.getElementsByClassName("err");\n'\
+ ' for (var i = 0; i < list.length; i++)\n'\
+ ' list[i].onclick = errWindow;\n'\
+ ' var list = document.getElementsByClassName("logbtn");\n'\
+ ' for (var i = 0; i < list.length; i++)\n'\
+ ' list[i].onclick = logWindow;\n'\
+ ' list = document.getElementsByClassName("devlist");\n'\
+ ' for (var i = 0; i < list.length; i++)\n'\
+ ' list[i].onclick = devListWindow;\n'\
+ ' var dev = dmesg.getElementsByClassName("thread");\n'\
+ ' for (var i = 0; i < dev.length; i++) {\n'\
+ ' dev[i].onclick = deviceDetail;\n'\
+ ' dev[i].onmouseover = deviceHover;\n'\
+ ' dev[i].onmouseout = deviceUnhover;\n'\
+ ' }\n'\
+ ' var dev = dmesg.getElementsByClassName("srccall");\n'\
+ ' for (var i = 0; i < dev.length; i++)\n'\
+ ' dev[i].onclick = callSelect;\n'\
+ ' zoomTimeline();\n'\
+ ' });\n'\
+ '</script>\n'
+ hf.write(script_code);
+
+def setRuntimeSuspend(before=True):
+ global sysvals
+ sv = sysvals
+ if sv.rs == 0:
+ return
+ if before:
+ # runtime suspend disable or enable
+ if sv.rs > 0:
+ sv.rstgt, sv.rsval, sv.rsdir = 'on', 'auto', 'enabled'
+ else:
+ sv.rstgt, sv.rsval, sv.rsdir = 'auto', 'on', 'disabled'
+ print('CONFIGURING RUNTIME SUSPEND...')
+ sv.rslist = deviceInfo(sv.rstgt)
+ for i in sv.rslist:
+ sv.setVal(sv.rsval, i)
+ print('runtime suspend %s on all devices (%d changed)' % (sv.rsdir, len(sv.rslist)))
+ print('waiting 5 seconds...')
+ time.sleep(5)
+ else:
+ # runtime suspend re-enable or re-disable
+ for i in sv.rslist:
+ sv.setVal(sv.rstgt, i)
+ print('runtime suspend settings restored on %d devices' % len(sv.rslist))
+
+# Function: executeSuspend
+# Description:
+# Execute system suspend through the sysfs interface, then copy the output
+# dmesg and ftrace files to the test output directory.
+def executeSuspend():
+ pm = ProcessMonitor()
+ tp = sysvals.tpath
+ fwdata = []
+ # run these commands to prepare the system for suspend
+ if sysvals.display:
+ if sysvals.display > 0:
+ print('TURN DISPLAY ON')
+ call('xset -d :0.0 dpms force suspend', shell=True)
+ call('xset -d :0.0 dpms force on', shell=True)
+ else:
+ print('TURN DISPLAY OFF')
+ call('xset -d :0.0 dpms force suspend', shell=True)
+ time.sleep(1)
+ if sysvals.sync:
+ print('SYNCING FILESYSTEMS')
+ call('sync', shell=True)
+ # mark the start point in the kernel ring buffer just as we start
+ sysvals.initdmesg()
+ # start ftrace
+ if(sysvals.usecallgraph or sysvals.usetraceevents):
+ print('START TRACING')
+ sysvals.fsetVal('1', 'tracing_on')
+ if sysvals.useprocmon:
+ pm.start()
+ # execute however many s/r runs requested
+ for count in range(1,sysvals.execcount+1):
+ # x2delay in between test runs
+ if(count > 1 and sysvals.x2delay > 0):
+ sysvals.fsetVal('WAIT %d' % sysvals.x2delay, 'trace_marker')
+ time.sleep(sysvals.x2delay/1000.0)
+ sysvals.fsetVal('WAIT END', 'trace_marker')
+ # start message
+ if sysvals.testcommand != '':
+ print('COMMAND START')
+ else:
+ if(sysvals.rtcwake):
+ print('SUSPEND START')
+ else:
+ print('SUSPEND START (press a key to resume)')
+ # set rtcwake
+ if(sysvals.rtcwake):
+ print('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
+ sysvals.rtcWakeAlarmOn()
+ # start of suspend trace marker
+ if(sysvals.usecallgraph or sysvals.usetraceevents):
+ sysvals.fsetVal('SUSPEND START', 'trace_marker')
+ # predelay delay
+ if(count == 1 and sysvals.predelay > 0):
+ sysvals.fsetVal('WAIT %d' % sysvals.predelay, 'trace_marker')
+ time.sleep(sysvals.predelay/1000.0)
+ sysvals.fsetVal('WAIT END', 'trace_marker')
+ # initiate suspend or command
+ if sysvals.testcommand != '':
+ call(sysvals.testcommand+' 2>&1', shell=True);
+ else:
+ mode = sysvals.suspendmode
+ if sysvals.memmode and os.path.exists(sysvals.mempowerfile):
+ mode = 'mem'
+ pf = open(sysvals.mempowerfile, 'w')
+ pf.write(sysvals.memmode)
+ pf.close()
+ pf = open(sysvals.powerfile, 'w')
+ pf.write(mode)
+ # execution will pause here
+ try:
+ pf.close()
+ except:
+ pass
+ if(sysvals.rtcwake):
+ sysvals.rtcWakeAlarmOff()
+ # postdelay delay
+ if(count == sysvals.execcount and sysvals.postdelay > 0):
+ sysvals.fsetVal('WAIT %d' % sysvals.postdelay, 'trace_marker')
+ time.sleep(sysvals.postdelay/1000.0)
+ sysvals.fsetVal('WAIT END', 'trace_marker')
+ # return from suspend
+ print('RESUME COMPLETE')
+ if(sysvals.usecallgraph or sysvals.usetraceevents):
+ sysvals.fsetVal('RESUME COMPLETE', 'trace_marker')
+ if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'):
+ fwdata.append(getFPDT(False))
+ # stop ftrace
+ if(sysvals.usecallgraph or sysvals.usetraceevents):
+ if sysvals.useprocmon:
+ pm.stop()
+ sysvals.fsetVal('0', 'tracing_on')
+ print('CAPTURING TRACE')
+ op = sysvals.writeDatafileHeader(sysvals.ftracefile, fwdata)
+ fp = open(tp+'trace', 'r')
+ for line in fp:
+ op.write(line)
+ op.close()
+ sysvals.fsetVal('', 'trace')
+ devProps()
+ # grab a copy of the dmesg output
+ print('CAPTURING DMESG')
+ sysvals.getdmesg(fwdata)
+
+def readFile(file):
+ if os.path.islink(file):
+ return os.readlink(file).split('/')[-1]
+ else:
+ return sysvals.getVal(file).strip()
+
+# Function: ms2nice
+# Description:
+# Print out a very concise time string in minutes and seconds
+# Output:
+# The time string, e.g. "1901m16s"
+def ms2nice(val):
+ val = int(val)
+ h = val / 3600000
+ m = (val / 60000) % 60
+ s = (val / 1000) % 60
+ if h > 0:
+ return '%d:%02d:%02d' % (h, m, s)
+ if m > 0:
+ return '%02d:%02d' % (m, s)
+ return '%ds' % s
+
+def yesno(val):
+ list = {'enabled':'A', 'disabled':'S', 'auto':'E', 'on':'D',
+ 'active':'A', 'suspended':'S', 'suspending':'S'}
+ if val not in list:
+ return ' '
+ return list[val]
+
+# Function: deviceInfo
+# Description:
+# Detect all the USB hosts and devices currently connected and add
+# a list of USB device names to sysvals for better timeline readability
+def deviceInfo(output=''):
+ if not output:
+ print('LEGEND')
+ print('---------------------------------------------------------------------------------------------')
+ print(' A = async/sync PM queue (A/S) C = runtime active children')
+ print(' R = runtime suspend enabled/disabled (E/D) rACTIVE = runtime active (min/sec)')
+ print(' S = runtime status active/suspended (A/S) rSUSPEND = runtime suspend (min/sec)')
+ print(' U = runtime usage count')
+ print('---------------------------------------------------------------------------------------------')
+ print('DEVICE NAME A R S U C rACTIVE rSUSPEND')
+ print('---------------------------------------------------------------------------------------------')
+
+ res = []
+ tgtval = 'runtime_status'
+ lines = dict()
+ for dirname, dirnames, filenames in os.walk('/sys/devices'):
+ if(not re.match('.*/power', dirname) or
+ 'control' not in filenames or
+ tgtval not in filenames):
+ continue
+ name = ''
+ dirname = dirname[:-6]
+ device = dirname.split('/')[-1]
+ power = dict()
+ power[tgtval] = readFile('%s/power/%s' % (dirname, tgtval))
+ # only list devices which support runtime suspend
+ if power[tgtval] not in ['active', 'suspended', 'suspending']:
+ continue
+ for i in ['product', 'driver', 'subsystem']:
+ file = '%s/%s' % (dirname, i)
+ if os.path.exists(file):
+ name = readFile(file)
+ break
+ for i in ['async', 'control', 'runtime_status', 'runtime_usage',
+ 'runtime_active_kids', 'runtime_active_time',
+ 'runtime_suspended_time']:
+ if i in filenames:
+ power[i] = readFile('%s/power/%s' % (dirname, i))
+ if output:
+ if power['control'] == output:
+ res.append('%s/power/control' % dirname)
+ continue
+ lines[dirname] = '%-26s %-26s %1s %1s %1s %1s %1s %10s %10s' % \
+ (device[:26], name[:26],
+ yesno(power['async']), \
+ yesno(power['control']), \
+ yesno(power['runtime_status']), \
+ power['runtime_usage'], \
+ power['runtime_active_kids'], \
+ ms2nice(power['runtime_active_time']), \
+ ms2nice(power['runtime_suspended_time']))
+ for i in sorted(lines):
+ print lines[i]
+ return res
+
+# Function: devProps
+# Description:
+# Retrieve a list of properties for all devices in the trace log
+def devProps(data=0):
+ props = dict()
+
+ if data:
+ idx = data.index(': ') + 2
+ if idx >= len(data):
+ return
+ devlist = data[idx:].split(';')
+ for dev in devlist:
+ f = dev.split(',')
+ if len(f) < 3:
+ continue
+ dev = f[0]
+ props[dev] = DevProps()
+ props[dev].altname = f[1]
+ if int(f[2]):
+ props[dev].async = True
+ else:
+ props[dev].async = False
+ sysvals.devprops = props
+ if sysvals.suspendmode == 'command' and 'testcommandstring' in props:
+ sysvals.testcommand = props['testcommandstring'].altname
+ return
+
+ if(os.path.exists(sysvals.ftracefile) == False):
+ doError('%s does not exist' % sysvals.ftracefile)
+
+ # first get the list of devices we need properties for
+ msghead = 'Additional data added by AnalyzeSuspend'
+ alreadystamped = False
+ tp = TestProps()
+ tf = sysvals.openlog(sysvals.ftracefile, 'r')
+ for line in tf:
+ if msghead in line:
+ alreadystamped = True
+ continue
+ # determine the trace data type (required for further parsing)
+ m = re.match(sysvals.tracertypefmt, line)
+ if(m):
+ tp.setTracerType(m.group('t'))
+ continue
+ # parse only valid lines, if this is not one move on
+ m = re.match(tp.ftrace_line_fmt, line)
+ if(not m or 'device_pm_callback_start' not in line):
+ continue
+ m = re.match('.*: (?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*', m.group('msg'));
+ if(not m):
+ continue
+ dev = m.group('d')
+ if dev not in props:
+ props[dev] = DevProps()
+ tf.close()
+
+ if not alreadystamped and sysvals.suspendmode == 'command':
+ out = '#\n# '+msghead+'\n# Device Properties: '
+ out += 'testcommandstring,%s,0;' % (sysvals.testcommand)
+ with sysvals.openlog(sysvals.ftracefile, 'a') as fp:
+ fp.write(out+'\n')
+ sysvals.devprops = props
+ return
+
+ # now get the syspath for each of our target devices
+ for dirname, dirnames, filenames in os.walk('/sys/devices'):
+ if(re.match('.*/power', dirname) and 'async' in filenames):
+ dev = dirname.split('/')[-2]
+ if dev in props and (not props[dev].syspath or len(dirname) < len(props[dev].syspath)):
+ props[dev].syspath = dirname[:-6]
+
+ # now fill in the properties for our target devices
+ for dev in props:
+ dirname = props[dev].syspath
+ if not dirname or not os.path.exists(dirname):
+ continue
+ with open(dirname+'/power/async') as fp:
+ text = fp.read()
+ props[dev].async = False
+ if 'enabled' in text:
+ props[dev].async = True
+ fields = os.listdir(dirname)
+ if 'product' in fields:
+ with open(dirname+'/product') as fp:
+ props[dev].altname = fp.read()
+ elif 'name' in fields:
+ with open(dirname+'/name') as fp:
+ props[dev].altname = fp.read()
+ elif 'model' in fields:
+ with open(dirname+'/model') as fp:
+ props[dev].altname = fp.read()
+ elif 'description' in fields:
+ with open(dirname+'/description') as fp:
+ props[dev].altname = fp.read()
+ elif 'id' in fields:
+ with open(dirname+'/id') as fp:
+ props[dev].altname = fp.read()
+ elif 'idVendor' in fields and 'idProduct' in fields:
+ idv, idp = '', ''
+ with open(dirname+'/idVendor') as fp:
+ idv = fp.read().strip()
+ with open(dirname+'/idProduct') as fp:
+ idp = fp.read().strip()
+ props[dev].altname = '%s:%s' % (idv, idp)
+
+ if props[dev].altname:
+ out = props[dev].altname.strip().replace('\n', ' ')
+ out = out.replace(',', ' ')
+ out = out.replace(';', ' ')
+ props[dev].altname = out
+
+ # and now write the data to the ftrace file
+ if not alreadystamped:
+ out = '#\n# '+msghead+'\n# Device Properties: '
+ for dev in sorted(props):
+ out += props[dev].out(dev)
+ with sysvals.openlog(sysvals.ftracefile, 'a') as fp:
+ fp.write(out+'\n')
+
+ sysvals.devprops = props
+
+# Function: getModes
+# Description:
+# Determine the supported power modes on this system
+# Output:
+# A string list of the available modes
+def getModes():
+ modes = []
+ if(os.path.exists(sysvals.powerfile)):
+ fp = open(sysvals.powerfile, 'r')
+ modes = string.split(fp.read())
+ fp.close()
+ if(os.path.exists(sysvals.mempowerfile)):
+ deep = False
+ fp = open(sysvals.mempowerfile, 'r')
+ for m in string.split(fp.read()):
+ memmode = m.strip('[]')
+ if memmode == 'deep':
+ deep = True
+ else:
+ modes.append('mem-%s' % memmode)
+ fp.close()
+ if 'mem' in modes and not deep:
+ modes.remove('mem')
+ return modes
+
+# Function: dmidecode
+# Description:
+# Read the bios tables and pull out system info
+# Arguments:
+# mempath: /dev/mem or custom mem path
+# fatal: True to exit on error, False to return empty dict
+# Output:
+# A dict object with all available key/values
+def dmidecode(mempath, fatal=False):
+ out = dict()
+
+ # the list of values to retrieve, with hardcoded (type, idx)
+ info = {
+ 'bios-vendor': (0, 4),
+ 'bios-version': (0, 5),
+ 'bios-release-date': (0, 8),
+ 'system-manufacturer': (1, 4),
+ 'system-product-name': (1, 5),
+ 'system-version': (1, 6),
+ 'system-serial-number': (1, 7),
+ 'baseboard-manufacturer': (2, 4),
+ 'baseboard-product-name': (2, 5),
+ 'baseboard-version': (2, 6),
+ 'baseboard-serial-number': (2, 7),
+ 'chassis-manufacturer': (3, 4),
+ 'chassis-type': (3, 5),
+ 'chassis-version': (3, 6),
+ 'chassis-serial-number': (3, 7),
+ 'processor-manufacturer': (4, 7),
+ 'processor-version': (4, 16),
+ }
+ if(not os.path.exists(mempath)):
+ if(fatal):
+ doError('file does not exist: %s' % mempath)
+ return out
+ if(not os.access(mempath, os.R_OK)):
+ if(fatal):
+ doError('file is not readable: %s' % mempath)
+ return out
+
+ # by default use legacy scan, but try to use EFI first
+ memaddr = 0xf0000
+ memsize = 0x10000
+ for ep in ['/sys/firmware/efi/systab', '/proc/efi/systab']:
+ if not os.path.exists(ep) or not os.access(ep, os.R_OK):
+ continue
+ fp = open(ep, 'r')
+ buf = fp.read()
+ fp.close()
+ i = buf.find('SMBIOS=')
+ if i >= 0:
+ try:
+ memaddr = int(buf[i+7:], 16)
+ memsize = 0x20
+ except:
+ continue
+
+ # read in the memory for scanning
+ fp = open(mempath, 'rb')
+ try:
+ fp.seek(memaddr)
+ buf = fp.read(memsize)
+ except:
+ if(fatal):
+ doError('DMI table is unreachable, sorry')
+ else:
+ return out
+ fp.close()
+
+ # search for either an SM table or DMI table
+ i = base = length = num = 0
+ while(i < memsize):
+ if buf[i:i+4] == '_SM_' and i < memsize - 16:
+ length = struct.unpack('H', buf[i+22:i+24])[0]
+ base, num = struct.unpack('IH', buf[i+24:i+30])
+ break
+ elif buf[i:i+5] == '_DMI_':
+ length = struct.unpack('H', buf[i+6:i+8])[0]
+ base, num = struct.unpack('IH', buf[i+8:i+14])
+ break
+ i += 16
+ if base == 0 and length == 0 and num == 0:
+ if(fatal):
+ doError('Neither SMBIOS nor DMI were found')
+ else:
+ return out
+
+ # read in the SM or DMI table
+ fp = open(mempath, 'rb')
+ try:
+ fp.seek(base)
+ buf = fp.read(length)
+ except:
+ if(fatal):
+ doError('DMI table is unreachable, sorry')
+ else:
+ return out
+ fp.close()
+
+ # scan the table for the values we want
+ count = i = 0
+ while(count < num and i <= len(buf) - 4):
+ type, size, handle = struct.unpack('BBH', buf[i:i+4])
+ n = i + size
+ while n < len(buf) - 1:
+ if 0 == struct.unpack('H', buf[n:n+2])[0]:
+ break
+ n += 1
+ data = buf[i+size:n+2].split('\0')
+ for name in info:
+ itype, idxadr = info[name]
+ if itype == type:
+ idx = struct.unpack('B', buf[i+idxadr])[0]
+ if idx > 0 and idx < len(data) - 1:
+ s = data[idx-1].strip()
+ if s and s.lower() != 'to be filled by o.e.m.':
+ out[name] = data[idx-1]
+ i = n + 2
+ count += 1
+ return out
+
+def getBattery():
+ p = '/sys/class/power_supply'
+ bat = dict()
+ for d in os.listdir(p):
+ type = sysvals.getVal(os.path.join(p, d, 'type')).strip().lower()
+ if type != 'battery':
+ continue
+ for v in ['status', 'energy_now', 'capacity_now']:
+ bat[v] = sysvals.getVal(os.path.join(p, d, v)).strip().lower()
+ break
+ ac = True
+ if 'status' in bat and 'discharging' in bat['status']:
+ ac = False
+ charge = 0
+ for v in ['energy_now', 'capacity_now']:
+ if v in bat and bat[v]:
+ charge = int(bat[v])
+ return (ac, charge)
+
+# Function: getFPDT
+# Description:
+# Read the acpi bios tables and pull out FPDT, the firmware data
+# Arguments:
+# output: True to output the info to stdout, False otherwise
+def getFPDT(output):
+ rectype = {}
+ rectype[0] = 'Firmware Basic Boot Performance Record'
+ rectype[1] = 'S3 Performance Table Record'
+ prectype = {}
+ prectype[0] = 'Basic S3 Resume Performance Record'
+ prectype[1] = 'Basic S3 Suspend Performance Record'
+
+ sysvals.rootCheck(True)
+ if(not os.path.exists(sysvals.fpdtpath)):
+ if(output):
+ doError('file does not exist: %s' % sysvals.fpdtpath)
+ return False
+ if(not os.access(sysvals.fpdtpath, os.R_OK)):
+ if(output):
+ doError('file is not readable: %s' % sysvals.fpdtpath)
+ return False
+ if(not os.path.exists(sysvals.mempath)):
+ if(output):
+ doError('file does not exist: %s' % sysvals.mempath)
+ return False
+ if(not os.access(sysvals.mempath, os.R_OK)):
+ if(output):
+ doError('file is not readable: %s' % sysvals.mempath)
+ return False
+
+ fp = open(sysvals.fpdtpath, 'rb')
+ buf = fp.read()
+ fp.close()
+
+ if(len(buf) < 36):
+ if(output):
+ doError('Invalid FPDT table data, should '+\
+ 'be at least 36 bytes')
+ return False
+
+ table = struct.unpack('4sIBB6s8sI4sI', buf[0:36])
+ if(output):
+ print('')
+ print('Firmware Performance Data Table (%s)' % table[0])
+ print(' Signature : %s' % table[0])
+ print(' Table Length : %u' % table[1])
+ print(' Revision : %u' % table[2])
+ print(' Checksum : 0x%x' % table[3])
+ print(' OEM ID : %s' % table[4])
+ print(' OEM Table ID : %s' % table[5])
+ print(' OEM Revision : %u' % table[6])
+ print(' Creator ID : %s' % table[7])
+ print(' Creator Revision : 0x%x' % table[8])
+ print('')
+
+ if(table[0] != 'FPDT'):
+ if(output):
+ doError('Invalid FPDT table')
+ return False
+ if(len(buf) <= 36):
+ return False
+ i = 0
+ fwData = [0, 0]
+ records = buf[36:]
+ fp = open(sysvals.mempath, 'rb')
+ while(i < len(records)):
+ header = struct.unpack('HBB', records[i:i+4])
+ if(header[0] not in rectype):
+ i += header[1]
+ continue
+ if(header[1] != 16):
+ i += header[1]
+ continue
+ addr = struct.unpack('Q', records[i+8:i+16])[0]
+ try:
+ fp.seek(addr)
+ first = fp.read(8)
+ except:
+ if(output):
+ print('Bad address 0x%x in %s' % (addr, sysvals.mempath))
+ return [0, 0]
+ rechead = struct.unpack('4sI', first)
+ recdata = fp.read(rechead[1]-8)
+ if(rechead[0] == 'FBPT'):
+ record = struct.unpack('HBBIQQQQQ', recdata)
+ if(output):
+ print('%s (%s)' % (rectype[header[0]], rechead[0]))
+ print(' Reset END : %u ns' % record[4])
+ print(' OS Loader LoadImage Start : %u ns' % record[5])
+ print(' OS Loader StartImage Start : %u ns' % record[6])
+ print(' ExitBootServices Entry : %u ns' % record[7])
+ print(' ExitBootServices Exit : %u ns' % record[8])
+ elif(rechead[0] == 'S3PT'):
+ if(output):
+ print('%s (%s)' % (rectype[header[0]], rechead[0]))
+ j = 0
+ while(j < len(recdata)):
+ prechead = struct.unpack('HBB', recdata[j:j+4])
+ if(prechead[0] not in prectype):
+ continue
+ if(prechead[0] == 0):
+ record = struct.unpack('IIQQ', recdata[j:j+prechead[1]])
+ fwData[1] = record[2]
+ if(output):
+ print(' %s' % prectype[prechead[0]])
+ print(' Resume Count : %u' % \
+ record[1])
+ print(' FullResume : %u ns' % \
+ record[2])
+ print(' AverageResume : %u ns' % \
+ record[3])
+ elif(prechead[0] == 1):
+ record = struct.unpack('QQ', recdata[j+4:j+prechead[1]])
+ fwData[0] = record[1] - record[0]
+ if(output):
+ print(' %s' % prectype[prechead[0]])
+ print(' SuspendStart : %u ns' % \
+ record[0])
+ print(' SuspendEnd : %u ns' % \
+ record[1])
+ print(' SuspendTime : %u ns' % \
+ fwData[0])
+ j += prechead[1]
+ if(output):
+ print('')
+ i += header[1]
+ fp.close()
+ return fwData
+
+# Function: statusCheck
+# Description:
+# Verify that the requested command and options will work, and
+# print the results to the terminal
+# Output:
+# True if the test will work, False if not
+def statusCheck(probecheck=False):
+ status = True
+
+ print('Checking this system (%s)...' % platform.node())
+
+ # check we have root access
+ res = sysvals.colorText('NO (No features of this tool will work!)')
+ if(sysvals.rootCheck(False)):
+ res = 'YES'
+ print(' have root access: %s' % res)
+ if(res != 'YES'):
+ print(' Try running this script with sudo')
+ return False
+
+ # check sysfs is mounted
+ res = sysvals.colorText('NO (No features of this tool will work!)')
+ if(os.path.exists(sysvals.powerfile)):
+ res = 'YES'
+ print(' is sysfs mounted: %s' % res)
+ if(res != 'YES'):
+ return False
+
+ # check target mode is a valid mode
+ if sysvals.suspendmode != 'command':
+ res = sysvals.colorText('NO')
+ modes = getModes()
+ if(sysvals.suspendmode in modes):
+ res = 'YES'
+ else:
+ status = False
+ print(' is "%s" a valid power mode: %s' % (sysvals.suspendmode, res))
+ if(res == 'NO'):
+ print(' valid power modes are: %s' % modes)
+ print(' please choose one with -m')
+
+ # check if ftrace is available
+ res = sysvals.colorText('NO')
+ ftgood = sysvals.verifyFtrace()
+ if(ftgood):
+ res = 'YES'
+ elif(sysvals.usecallgraph):
+ status = False
+ print(' is ftrace supported: %s' % res)
+
+ # check if kprobes are available
+ res = sysvals.colorText('NO')
+ sysvals.usekprobes = sysvals.verifyKprobes()
+ if(sysvals.usekprobes):
+ res = 'YES'
+ else:
+ sysvals.usedevsrc = False
+ print(' are kprobes supported: %s' % res)
+
+ # what data source are we using
+ res = 'DMESG'
+ if(ftgood):
+ sysvals.usetraceevents = True
+ for e in sysvals.traceevents:
+ if not os.path.exists(sysvals.epath+e):
+ sysvals.usetraceevents = False
+ if(sysvals.usetraceevents):
+ res = 'FTRACE (all trace events found)'
+ print(' timeline data source: %s' % res)
+
+ # check if rtcwake
+ res = sysvals.colorText('NO')
+ if(sysvals.rtcpath != ''):
+ res = 'YES'
+ elif(sysvals.rtcwake):
+ status = False
+ print(' is rtcwake supported: %s' % res)
+
+ if not probecheck:
+ return status
+
+ # verify kprobes
+ if sysvals.usekprobes:
+ for name in sysvals.tracefuncs:
+ sysvals.defaultKprobe(name, sysvals.tracefuncs[name])
+ if sysvals.usedevsrc:
+ for name in sysvals.dev_tracefuncs:
+ sysvals.defaultKprobe(name, sysvals.dev_tracefuncs[name])
+ sysvals.addKprobes(True)
+
+ return status
+
+# Function: doError
+# Description:
+# generic error function for catastrphic failures
+# Arguments:
+# msg: the error message to print
+# help: True if printHelp should be called after, False otherwise
+def doError(msg, help=False):
+ if(help == True):
+ printHelp()
+ print('ERROR: %s\n') % msg
+ sysvals.outputResult({'error':msg})
+ sys.exit()
+
+# Function: getArgInt
+# Description:
+# pull out an integer argument from the command line with checks
+def getArgInt(name, args, min, max, main=True):
+ if main:
+ try:
+ arg = args.next()
+ except:
+ doError(name+': no argument supplied', True)
+ else:
+ arg = args
+ try:
+ val = int(arg)
+ except:
+ doError(name+': non-integer value given', True)
+ if(val < min or val > max):
+ doError(name+': value should be between %d and %d' % (min, max), True)
+ return val
+
+# Function: getArgFloat
+# Description:
+# pull out a float argument from the command line with checks
+def getArgFloat(name, args, min, max, main=True):
+ if main:
+ try:
+ arg = args.next()
+ except:
+ doError(name+': no argument supplied', True)
+ else:
+ arg = args
+ try:
+ val = float(arg)
+ except:
+ doError(name+': non-numerical value given', True)
+ if(val < min or val > max):
+ doError(name+': value should be between %f and %f' % (min, max), True)
+ return val
+
+def processData(live=False):
+ print('PROCESSING DATA')
+ error = ''
+ if(sysvals.usetraceevents):
+ testruns, error = parseTraceLog(live)
+ if sysvals.dmesgfile:
+ for data in testruns:
+ data.extractErrorInfo()
+ else:
+ testruns = loadKernelLog()
+ for data in testruns:
+ parseKernelLog(data)
+ if(sysvals.ftracefile and (sysvals.usecallgraph or sysvals.usetraceevents)):
+ appendIncompleteTraceLog(testruns)
+ sysvals.vprint('Command:\n %s' % sysvals.cmdline)
+ for data in testruns:
+ data.printDetails()
+ if sysvals.cgdump:
+ for data in testruns:
+ data.debugPrint()
+ sys.exit()
+ if len(testruns) < 1:
+ return (testruns, {'error': 'timeline generation failed'})
+ sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile)
+ createHTML(testruns, error)
+ print('DONE')
+ data = testruns[0]
+ stamp = data.stamp
+ stamp['suspend'], stamp['resume'] = data.getTimeValues()
+ if data.fwValid:
+ stamp['fwsuspend'], stamp['fwresume'] = data.fwSuspend, data.fwResume
+ if error:
+ stamp['error'] = error
+ return (testruns, stamp)
+
+# Function: rerunTest
+# Description:
+# generate an output from an existing set of ftrace/dmesg logs
+def rerunTest():
+ if sysvals.ftracefile:
+ doesTraceLogHaveTraceEvents()
+ if not sysvals.dmesgfile and not sysvals.usetraceevents:
+ doError('recreating this html output requires a dmesg file')
+ sysvals.setOutputFile()
+ if os.path.exists(sysvals.htmlfile):
+ if not os.path.isfile(sysvals.htmlfile):
+ doError('a directory already exists with this name: %s' % sysvals.htmlfile)
+ elif not os.access(sysvals.htmlfile, os.W_OK):
+ doError('missing permission to write to %s' % sysvals.htmlfile)
+ testruns, stamp = processData(False)
+ return stamp
+
+# Function: runTest
+# Description:
+# execute a suspend/resume, gather the logs, and generate the output
+def runTest(n=0):
+ # prepare for the test
+ sysvals.initFtrace()
+ sysvals.initTestOutput('suspend')
+
+ # execute the test
+ executeSuspend()
+ sysvals.cleanupFtrace()
+ if sysvals.skiphtml:
+ sysvals.sudouser(sysvals.testdir)
+ return
+ testruns, stamp = processData(True)
+ for data in testruns:
+ del data
+ sysvals.sudouser(sysvals.testdir)
+ sysvals.outputResult(stamp, n)
+
+def find_in_html(html, start, end, firstonly=True):
+ n, out = 0, []
+ while n < len(html):
+ m = re.search(start, html[n:])
+ if not m:
+ break
+ i = m.end()
+ m = re.search(end, html[n+i:])
+ if not m:
+ break
+ j = m.start()
+ str = html[n+i:n+i+j]
+ if end == 'ms':
+ num = re.search(r'[-+]?\d*\.\d+|\d+', str)
+ str = num.group() if num else 'NaN'
+ if firstonly:
+ return str
+ out.append(str)
+ n += i+j
+ if firstonly:
+ return ''
+ return out
+
+# Function: runSummary
+# Description:
+# create a summary of tests in a sub-directory
+def runSummary(subdir, local=True, genhtml=False):
+ inpath = os.path.abspath(subdir)
+ outpath = inpath
+ if local:
+ outpath = os.path.abspath('.')
+ print('Generating a summary of folder "%s"' % inpath)
+ if genhtml:
+ for dirname, dirnames, filenames in os.walk(subdir):
+ sysvals.dmesgfile = sysvals.ftracefile = sysvals.htmlfile = ''
+ for filename in filenames:
+ if(re.match('.*_dmesg.txt', filename)):
+ sysvals.dmesgfile = os.path.join(dirname, filename)
+ elif(re.match('.*_ftrace.txt', filename)):
+ sysvals.ftracefile = os.path.join(dirname, filename)
+ sysvals.setOutputFile()
+ if sysvals.ftracefile and sysvals.htmlfile and \
+ not os.path.exists(sysvals.htmlfile):
+ print('FTRACE: %s' % sysvals.ftracefile)
+ if sysvals.dmesgfile:
+ print('DMESG : %s' % sysvals.dmesgfile)
+ rerunTest()
+ testruns = []
+ for dirname, dirnames, filenames in os.walk(subdir):
+ for filename in filenames:
+ if(not re.match('.*.html', filename)):
+ continue
+ file = os.path.join(dirname, filename)
+ html = open(file, 'r').read()
+ suspend = find_in_html(html, 'Kernel Suspend', 'ms')
+ resume = find_in_html(html, 'Kernel Resume', 'ms')
+ line = find_in_html(html, '<div class="stamp">', '</div>')
+ stmp = line.split()
+ if not suspend or not resume or len(stmp) != 8:
+ continue
+ try:
+ dt = datetime.strptime(' '.join(stmp[3:]), '%B %d %Y, %I:%M:%S %p')
+ except:
+ continue
+ tstr = dt.strftime('%Y/%m/%d %H:%M:%S')
+ error = find_in_html(html, '<table class="testfail"><tr><td>', '</td>')
+ result = 'fail' if error else 'pass'
+ ilist = []
+ e = find_in_html(html, 'class="err"[\w=":;\.%\- ]*>', '&rarr;</div>', False)
+ for i in list(set(e)):
+ ilist.append('%sx%d' % (i, e.count(i)) if e.count(i) > 1 else i)
+ data = {
+ 'mode': stmp[2],
+ 'host': stmp[0],
+ 'kernel': stmp[1],
+ 'time': tstr,
+ 'result': result,
+ 'issues': ','.join(ilist),
+ 'suspend': suspend,
+ 'resume': resume,
+ 'url': os.path.relpath(file, outpath),
+ }
+ testruns.append(data)
+ outfile = os.path.join(outpath, 'summary.html')
+ print('Summary file: %s' % outfile)
+ createHTMLSummarySimple(testruns, outfile, inpath)
+
+# Function: checkArgBool
+# Description:
+# check if a boolean string value is true or false
+def checkArgBool(name, value):
+ if value in switchvalues:
+ if value in switchoff:
+ return False
+ return True
+ doError('invalid boolean --> (%s: %s), use "true/false" or "1/0"' % (name, value), True)
+ return False
+
+# Function: configFromFile
+# Description:
+# Configure the script via the info in a config file
+def configFromFile(file):
+ Config = ConfigParser.ConfigParser()
+
+ Config.read(file)
+ sections = Config.sections()
+ overridekprobes = False
+ overridedevkprobes = False
+ if 'Settings' in sections:
+ for opt in Config.options('Settings'):
+ value = Config.get('Settings', opt).lower()
+ option = opt.lower()
+ if(option == 'verbose'):
+ sysvals.verbose = checkArgBool(option, value)
+ elif(option == 'addlogs'):
+ sysvals.dmesglog = sysvals.ftracelog = checkArgBool(option, value)
+ elif(option == 'dev'):
+ sysvals.usedevsrc = checkArgBool(option, value)
+ elif(option == 'proc'):
+ sysvals.useprocmon = checkArgBool(option, value)
+ elif(option == 'x2'):
+ if checkArgBool(option, value):
+ sysvals.execcount = 2
+ elif(option == 'callgraph'):
+ sysvals.usecallgraph = checkArgBool(option, value)
+ elif(option == 'override-timeline-functions'):
+ overridekprobes = checkArgBool(option, value)
+ elif(option == 'override-dev-timeline-functions'):
+ overridedevkprobes = checkArgBool(option, value)
+ elif(option == 'skiphtml'):
+ sysvals.skiphtml = checkArgBool(option, value)
+ elif(option == 'sync'):
+ sysvals.sync = checkArgBool(option, value)
+ elif(option == 'rs' or option == 'runtimesuspend'):
+ if value in switchvalues:
+ if value in switchoff:
+ sysvals.rs = -1
+ else:
+ sysvals.rs = 1
+ else:
+ doError('invalid value --> (%s: %s), use "enable/disable"' % (option, value), True)
+ elif(option == 'display'):
+ if value in switchvalues:
+ if value in switchoff:
+ sysvals.display = -1
+ else:
+ sysvals.display = 1
+ else:
+ doError('invalid value --> (%s: %s), use "on/off"' % (option, value), True)
+ elif(option == 'gzip'):
+ sysvals.gzip = checkArgBool(option, value)
+ elif(option == 'cgfilter'):
+ sysvals.setCallgraphFilter(value)
+ elif(option == 'cgskip'):
+ if value in switchoff:
+ sysvals.cgskip = ''
+ else:
+ sysvals.cgskip = sysvals.configFile(val)
+ if(not sysvals.cgskip):
+ doError('%s does not exist' % sysvals.cgskip)
+ elif(option == 'cgtest'):
+ sysvals.cgtest = getArgInt('cgtest', value, 0, 1, False)
+ elif(option == 'cgphase'):
+ d = Data(0)
+ if value not in d.phases:
+ doError('invalid phase --> (%s: %s), valid phases are %s'\
+ % (option, value, d.phases), True)
+ sysvals.cgphase = value
+ elif(option == 'fadd'):
+ file = sysvals.configFile(value)
+ if(not file):
+ doError('%s does not exist' % value)
+ sysvals.addFtraceFilterFunctions(file)
+ elif(option == 'result'):
+ sysvals.result = value
+ elif(option == 'multi'):
+ nums = value.split()
+ if len(nums) != 2:
+ doError('multi requires 2 integers (exec_count and delay)', True)
+ sysvals.multitest['run'] = True
+ sysvals.multitest['count'] = getArgInt('multi: n d (exec count)', nums[0], 2, 1000000, False)
+ sysvals.multitest['delay'] = getArgInt('multi: n d (delay between tests)', nums[1], 0, 3600, False)
+ elif(option == 'devicefilter'):
+ sysvals.setDeviceFilter(value)
+ elif(option == 'expandcg'):
+ sysvals.cgexp = checkArgBool(option, value)
+ elif(option == 'srgap'):
+ if checkArgBool(option, value):
+ sysvals.srgap = 5
+ elif(option == 'mode'):
+ sysvals.suspendmode = value
+ elif(option == 'command' or option == 'cmd'):
+ sysvals.testcommand = value
+ elif(option == 'x2delay'):
+ sysvals.x2delay = getArgInt('x2delay', value, 0, 60000, False)
+ elif(option == 'predelay'):
+ sysvals.predelay = getArgInt('predelay', value, 0, 60000, False)
+ elif(option == 'postdelay'):
+ sysvals.postdelay = getArgInt('postdelay', value, 0, 60000, False)
+ elif(option == 'maxdepth'):
+ sysvals.max_graph_depth = getArgInt('maxdepth', value, 0, 1000, False)
+ elif(option == 'rtcwake'):
+ if value in switchoff:
+ sysvals.rtcwake = False
+ else:
+ sysvals.rtcwake = True
+ sysvals.rtcwaketime = getArgInt('rtcwake', value, 0, 3600, False)
+ elif(option == 'timeprec'):
+ sysvals.setPrecision(getArgInt('timeprec', value, 0, 6, False))
+ elif(option == 'mindev'):
+ sysvals.mindevlen = getArgFloat('mindev', value, 0.0, 10000.0, False)
+ elif(option == 'callloop-maxgap'):
+ sysvals.callloopmaxgap = getArgFloat('callloop-maxgap', value, 0.0, 1.0, False)
+ elif(option == 'callloop-maxlen'):
+ sysvals.callloopmaxgap = getArgFloat('callloop-maxlen', value, 0.0, 1.0, False)
+ elif(option == 'mincg'):
+ sysvals.mincglen = getArgFloat('mincg', value, 0.0, 10000.0, False)
+ elif(option == 'bufsize'):
+ sysvals.bufsize = getArgInt('bufsize', value, 1, 1024*1024*8, False)
+ elif(option == 'output-dir'):
+ sysvals.outdir = sysvals.setOutputFolder(value)
+
+ if sysvals.suspendmode == 'command' and not sysvals.testcommand:
+ doError('No command supplied for mode "command"')
+
+ # compatibility errors
+ if sysvals.usedevsrc and sysvals.usecallgraph:
+ doError('-dev is not compatible with -f')
+ if sysvals.usecallgraph and sysvals.useprocmon:
+ doError('-proc is not compatible with -f')
+
+ if overridekprobes:
+ sysvals.tracefuncs = dict()
+ if overridedevkprobes:
+ sysvals.dev_tracefuncs = dict()
+
+ kprobes = dict()
+ kprobesec = 'dev_timeline_functions_'+platform.machine()
+ if kprobesec in sections:
+ for name in Config.options(kprobesec):
+ text = Config.get(kprobesec, name)
+ kprobes[name] = (text, True)
+ kprobesec = 'timeline_functions_'+platform.machine()
+ if kprobesec in sections:
+ for name in Config.options(kprobesec):
+ if name in kprobes:
+ doError('Duplicate timeline function found "%s"' % (name))
+ text = Config.get(kprobesec, name)
+ kprobes[name] = (text, False)
+
+ for name in kprobes:
+ function = name
+ format = name
+ color = ''
+ args = dict()
+ text, dev = kprobes[name]
+ data = text.split()
+ i = 0
+ for val in data:
+ # bracketted strings are special formatting, read them separately
+ if val[0] == '[' and val[-1] == ']':
+ for prop in val[1:-1].split(','):
+ p = prop.split('=')
+ if p[0] == 'color':
+ try:
+ color = int(p[1], 16)
+ color = '#'+p[1]
+ except:
+ color = p[1]
+ continue
+ # first real arg should be the format string
+ if i == 0:
+ format = val
+ # all other args are actual function args
+ else:
+ d = val.split('=')
+ args[d[0]] = d[1]
+ i += 1
+ if not function or not format:
+ doError('Invalid kprobe: %s' % name)
+ for arg in re.findall('{(?P<n>[a-z,A-Z,0-9]*)}', format):
+ if arg not in args:
+ doError('Kprobe "%s" is missing argument "%s"' % (name, arg))
+ if (dev and name in sysvals.dev_tracefuncs) or (not dev and name in sysvals.tracefuncs):
+ doError('Duplicate timeline function found "%s"' % (name))
+
+ kp = {
+ 'name': name,
+ 'func': function,
+ 'format': format,
+ sysvals.archargs: args
+ }
+ if color:
+ kp['color'] = color
+ if dev:
+ sysvals.dev_tracefuncs[name] = kp
+ else:
+ sysvals.tracefuncs[name] = kp
+
+# Function: printHelp
+# Description:
+# print out the help text
+def printHelp():
+ print('')
+ print('%s v%s' % (sysvals.title, sysvals.version))
+ print('Usage: sudo sleepgraph <options> <commands>')
+ print('')
+ print('Description:')
+ print(' This tool is designed to assist kernel and OS developers in optimizing')
+ print(' their linux stack\'s suspend/resume time. Using a kernel image built')
+ print(' with a few extra options enabled, the tool will execute a suspend and')
+ print(' capture dmesg and ftrace data until resume is complete. This data is')
+ print(' transformed into a device timeline and an optional callgraph to give')
+ print(' a detailed view of which devices/subsystems are taking the most')
+ print(' time in suspend/resume.')
+ print('')
+ print(' If no specific command is given, the default behavior is to initiate')
+ print(' a suspend/resume and capture the dmesg/ftrace output as an html timeline.')
+ print('')
+ print(' Generates output files in subdirectory: suspend-yymmdd-HHMMSS')
+ print(' HTML output: <hostname>_<mode>.html')
+ print(' raw dmesg output: <hostname>_<mode>_dmesg.txt')
+ print(' raw ftrace output: <hostname>_<mode>_ftrace.txt')
+ print('')
+ print('Options:')
+ print(' -h Print this help text')
+ print(' -v Print the current tool version')
+ print(' -config fn Pull arguments and config options from file fn')
+ print(' -verbose Print extra information during execution and analysis')
+ print(' -m mode Mode to initiate for suspend (default: %s)') % (sysvals.suspendmode)
+ print(' -o name Overrides the output subdirectory name when running a new test')
+ print(' default: suspend-{date}-{time}')
+ print(' -rtcwake t Wakeup t seconds after suspend, set t to "off" to disable (default: 15)')
+ print(' -addlogs Add the dmesg and ftrace logs to the html output')
+ print(' -srgap Add a visible gap in the timeline between sus/res (default: disabled)')
+ print(' -skiphtml Run the test and capture the trace logs, but skip the timeline (default: disabled)')
+ print(' -result fn Export a results table to a text file for parsing.')
+ print(' [testprep]')
+ print(' -sync Sync the filesystems before starting the test')
+ print(' -rs on/off Enable/disable runtime suspend for all devices, restore all after test')
+ print(' -display on/off Turn the display on or off for the test')
+ print(' [advanced]')
+ print(' -gzip Gzip the trace and dmesg logs to save space')
+ print(' -cmd {s} Run the timeline over a custom command, e.g. "sync -d"')
+ print(' -proc Add usermode process info into the timeline (default: disabled)')
+ print(' -dev Add kernel function calls and threads to the timeline (default: disabled)')
+ print(' -x2 Run two suspend/resumes back to back (default: disabled)')
+ print(' -x2delay t Include t ms delay between multiple test runs (default: 0 ms)')
+ print(' -predelay t Include t ms delay before 1st suspend (default: 0 ms)')
+ print(' -postdelay t Include t ms delay after last resume (default: 0 ms)')
+ print(' -mindev ms Discard all device blocks shorter than ms milliseconds (e.g. 0.001 for us)')
+ print(' -multi n d Execute <n> consecutive tests at <d> seconds intervals. The outputs will')
+ print(' be created in a new subdirectory with a summary page.')
+ print(' [debug]')
+ print(' -f Use ftrace to create device callgraphs (default: disabled)')
+ print(' -maxdepth N limit the callgraph data to N call levels (default: 0=all)')
+ print(' -expandcg pre-expand the callgraph data in the html output (default: disabled)')
+ print(' -fadd file Add functions to be graphed in the timeline from a list in a text file')
+ print(' -filter "d1,d2,..." Filter out all but this comma-delimited list of device names')
+ print(' -mincg ms Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
+ print(' -cgphase P Only show callgraph data for phase P (e.g. suspend_late)')
+ print(' -cgtest N Only show callgraph data for test N (e.g. 0 or 1 in an x2 run)')
+ print(' -timeprec N Number of significant digits in timestamps (0:S, [3:ms], 6:us)')
+ print(' -cgfilter S Filter the callgraph output in the timeline')
+ print(' -cgskip file Callgraph functions to skip, off to disable (default: cgskip.txt)')
+ print(' -bufsize N Set trace buffer size to N kilo-bytes (default: all of free memory)')
+ print('')
+ print('Other commands:')
+ print(' -modes List available suspend modes')
+ print(' -status Test to see if the system is enabled to run this tool')
+ print(' -fpdt Print out the contents of the ACPI Firmware Performance Data Table')
+ print(' -battery Print out battery info (if available)')
+ print(' -sysinfo Print out system info extracted from BIOS')
+ print(' -devinfo Print out the pm settings of all devices which support runtime suspend')
+ print(' -flist Print the list of functions currently being captured in ftrace')
+ print(' -flistall Print all functions capable of being captured in ftrace')
+ print(' -summary dir Create a summary of tests in this dir [-genhtml builds missing html]')
+ print(' [redo]')
+ print(' -ftrace ftracefile Create HTML output using ftrace input (used with -dmesg)')
+ print(' -dmesg dmesgfile Create HTML output using dmesg (used with -ftrace)')
+ print('')
+ return True
+
+# ----------------- MAIN --------------------
+# exec start (skipped if script is loaded as library)
+if __name__ == '__main__':
+ genhtml = False
+ cmd = ''
+ simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-devinfo', '-status', '-battery']
+ if '-f' in sys.argv:
+ sysvals.cgskip = sysvals.configFile('cgskip.txt')
+ # loop through the command line arguments
+ args = iter(sys.argv[1:])
+ for arg in args:
+ if(arg == '-m'):
+ try:
+ val = args.next()
+ except:
+ doError('No mode supplied', True)
+ if val == 'command' and not sysvals.testcommand:
+ doError('No command supplied for mode "command"', True)
+ sysvals.suspendmode = val
+ elif(arg in simplecmds):
+ cmd = arg[1:]
+ elif(arg == '-h'):
+ printHelp()
+ sys.exit()
+ elif(arg == '-v'):
+ print("Version %s" % sysvals.version)
+ sys.exit()
+ elif(arg == '-x2'):
+ sysvals.execcount = 2
+ elif(arg == '-x2delay'):
+ sysvals.x2delay = getArgInt('-x2delay', args, 0, 60000)
+ elif(arg == '-predelay'):
+ sysvals.predelay = getArgInt('-predelay', args, 0, 60000)
+ elif(arg == '-postdelay'):
+ sysvals.postdelay = getArgInt('-postdelay', args, 0, 60000)
+ elif(arg == '-f'):
+ sysvals.usecallgraph = True
+ elif(arg == '-skiphtml'):
+ sysvals.skiphtml = True
+ elif(arg == '-cgdump'):
+ sysvals.cgdump = True
+ elif(arg == '-genhtml'):
+ genhtml = True
+ elif(arg == '-addlogs'):
+ sysvals.dmesglog = sysvals.ftracelog = True
+ elif(arg == '-verbose'):
+ sysvals.verbose = True
+ elif(arg == '-proc'):
+ sysvals.useprocmon = True
+ elif(arg == '-dev'):
+ sysvals.usedevsrc = True
+ elif(arg == '-sync'):
+ sysvals.sync = True
+ elif(arg == '-gzip'):
+ sysvals.gzip = True
+ elif(arg == '-rs'):
+ try:
+ val = args.next()
+ except:
+ doError('-rs requires "enable" or "disable"', True)
+ if val.lower() in switchvalues:
+ if val.lower() in switchoff:
+ sysvals.rs = -1
+ else:
+ sysvals.rs = 1
+ else:
+ doError('invalid option: %s, use "enable/disable" or "on/off"' % val, True)
+ elif(arg == '-display'):
+ try:
+ val = args.next()
+ except:
+ doError('-display requires "on" or "off"', True)
+ if val.lower() in switchvalues:
+ if val.lower() in switchoff:
+ sysvals.display = -1
+ else:
+ sysvals.display = 1
+ else:
+ doError('invalid option: %s, use "on/off"' % val, True)
+ elif(arg == '-maxdepth'):
+ sysvals.max_graph_depth = getArgInt('-maxdepth', args, 0, 1000)
+ elif(arg == '-rtcwake'):
+ try:
+ val = args.next()
+ except:
+ doError('No rtcwake time supplied', True)
+ if val.lower() in switchoff:
+ sysvals.rtcwake = False
+ else:
+ sysvals.rtcwake = True
+ sysvals.rtcwaketime = getArgInt('-rtcwake', val, 0, 3600, False)
+ elif(arg == '-timeprec'):
+ sysvals.setPrecision(getArgInt('-timeprec', args, 0, 6))
+ elif(arg == '-mindev'):
+ sysvals.mindevlen = getArgFloat('-mindev', args, 0.0, 10000.0)
+ elif(arg == '-mincg'):
+ sysvals.mincglen = getArgFloat('-mincg', args, 0.0, 10000.0)
+ elif(arg == '-bufsize'):
+ sysvals.bufsize = getArgInt('-bufsize', args, 1, 1024*1024*8)
+ elif(arg == '-cgtest'):
+ sysvals.cgtest = getArgInt('-cgtest', args, 0, 1)
+ elif(arg == '-cgphase'):
+ try:
+ val = args.next()
+ except:
+ doError('No phase name supplied', True)
+ d = Data(0)
+ if val not in d.phases:
+ doError('invalid phase --> (%s: %s), valid phases are %s'\
+ % (arg, val, d.phases), True)
+ sysvals.cgphase = val
+ elif(arg == '-cgfilter'):
+ try:
+ val = args.next()
+ except:
+ doError('No callgraph functions supplied', True)
+ sysvals.setCallgraphFilter(val)
+ elif(arg == '-cgskip'):
+ try:
+ val = args.next()
+ except:
+ doError('No file supplied', True)
+ if val.lower() in switchoff:
+ sysvals.cgskip = ''
+ else:
+ sysvals.cgskip = sysvals.configFile(val)
+ if(not sysvals.cgskip):
+ doError('%s does not exist' % sysvals.cgskip)
+ elif(arg == '-callloop-maxgap'):
+ sysvals.callloopmaxgap = getArgFloat('-callloop-maxgap', args, 0.0, 1.0)
+ elif(arg == '-callloop-maxlen'):
+ sysvals.callloopmaxlen = getArgFloat('-callloop-maxlen', args, 0.0, 1.0)
+ elif(arg == '-cmd'):
+ try:
+ val = args.next()
+ except:
+ doError('No command string supplied', True)
+ sysvals.testcommand = val
+ sysvals.suspendmode = 'command'
+ elif(arg == '-expandcg'):
+ sysvals.cgexp = True
+ elif(arg == '-srgap'):
+ sysvals.srgap = 5
+ elif(arg == '-multi'):
+ sysvals.multitest['run'] = True
+ sysvals.multitest['count'] = getArgInt('-multi n d (exec count)', args, 2, 1000000)
+ sysvals.multitest['delay'] = getArgInt('-multi n d (delay between tests)', args, 0, 3600)
+ elif(arg == '-o'):
+ try:
+ val = args.next()
+ except:
+ doError('No subdirectory name supplied', True)
+ sysvals.outdir = sysvals.setOutputFolder(val)
+ elif(arg == '-config'):
+ try:
+ val = args.next()
+ except:
+ doError('No text file supplied', True)
+ file = sysvals.configFile(val)
+ if(not file):
+ doError('%s does not exist' % val)
+ configFromFile(file)
+ elif(arg == '-fadd'):
+ try:
+ val = args.next()
+ except:
+ doError('No text file supplied', True)
+ file = sysvals.configFile(val)
+ if(not file):
+ doError('%s does not exist' % val)
+ sysvals.addFtraceFilterFunctions(file)
+ elif(arg == '-dmesg'):
+ try:
+ val = args.next()
+ except:
+ doError('No dmesg file supplied', True)
+ sysvals.notestrun = True
+ sysvals.dmesgfile = val
+ if(os.path.exists(sysvals.dmesgfile) == False):
+ doError('%s does not exist' % sysvals.dmesgfile)
+ elif(arg == '-ftrace'):
+ try:
+ val = args.next()
+ except:
+ doError('No ftrace file supplied', True)
+ sysvals.notestrun = True
+ sysvals.ftracefile = val
+ if(os.path.exists(sysvals.ftracefile) == False):
+ doError('%s does not exist' % sysvals.ftracefile)
+ elif(arg == '-summary'):
+ try:
+ val = args.next()
+ except:
+ doError('No directory supplied', True)
+ cmd = 'summary'
+ sysvals.outdir = val
+ sysvals.notestrun = True
+ if(os.path.isdir(val) == False):
+ doError('%s is not accesible' % val)
+ elif(arg == '-filter'):
+ try:
+ val = args.next()
+ except:
+ doError('No devnames supplied', True)
+ sysvals.setDeviceFilter(val)
+ elif(arg == '-result'):
+ try:
+ val = args.next()
+ except:
+ doError('No result file supplied', True)
+ sysvals.result = val
+ else:
+ doError('Invalid argument: '+arg, True)
+
+ # compatibility errors
+ if(sysvals.usecallgraph and sysvals.usedevsrc):
+ doError('-dev is not compatible with -f')
+ if(sysvals.usecallgraph and sysvals.useprocmon):
+ doError('-proc is not compatible with -f')
+
+ if sysvals.usecallgraph and sysvals.cgskip:
+ sysvals.vprint('Using cgskip file: %s' % sysvals.cgskip)
+ sysvals.setCallgraphBlacklist(sysvals.cgskip)
+
+ # callgraph size cannot exceed device size
+ if sysvals.mincglen < sysvals.mindevlen:
+ sysvals.mincglen = sysvals.mindevlen
+
+ # remove existing buffers before calculating memory
+ if(sysvals.usecallgraph or sysvals.usedevsrc):
+ sysvals.fsetVal('16', 'buffer_size_kb')
+ sysvals.cpuInfo()
+
+ # just run a utility command and exit
+ if(cmd != ''):
+ if(cmd == 'status'):
+ statusCheck(True)
+ elif(cmd == 'fpdt'):
+ getFPDT(True)
+ elif(cmd == 'battery'):
+ print 'AC Connect: %s\nCharge: %d' % getBattery()
+ elif(cmd == 'sysinfo'):
+ sysvals.printSystemInfo(True)
+ elif(cmd == 'devinfo'):
+ deviceInfo()
+ elif(cmd == 'modes'):
+ print getModes()
+ elif(cmd == 'flist'):
+ sysvals.getFtraceFilterFunctions(True)
+ elif(cmd == 'flistall'):
+ sysvals.getFtraceFilterFunctions(False)
+ elif(cmd == 'summary'):
+ runSummary(sysvals.outdir, True, genhtml)
+ sys.exit()
+
+ # if instructed, re-analyze existing data files
+ if(sysvals.notestrun):
+ stamp = rerunTest()
+ sysvals.outputResult(stamp)
+ sys.exit()
+
+ # verify that we can run a test
+ if(not statusCheck()):
+ doError('Check FAILED, aborting the test run!')
+
+ # extract mem modes and convert
+ mode = sysvals.suspendmode
+ if 'mem' == mode[:3]:
+ if '-' in mode:
+ memmode = mode.split('-')[-1]
+ else:
+ memmode = 'deep'
+ if memmode == 'shallow':
+ mode = 'standby'
+ elif memmode == 's2idle':
+ mode = 'freeze'
+ else:
+ mode = 'mem'
+ sysvals.memmode = memmode
+ sysvals.suspendmode = mode
+
+ sysvals.systemInfo(dmidecode(sysvals.mempath))
+
+ setRuntimeSuspend(True)
+ if sysvals.display:
+ call('xset -d :0.0 dpms 0 0 0', shell=True)
+ call('xset -d :0.0 s off', shell=True)
+ if sysvals.multitest['run']:
+ # run multiple tests in a separate subdirectory
+ if not sysvals.outdir:
+ s = 'suspend-x%d' % sysvals.multitest['count']
+ sysvals.outdir = datetime.now().strftime(s+'-%y%m%d-%H%M%S')
+ if not os.path.isdir(sysvals.outdir):
+ os.mkdir(sysvals.outdir)
+ for i in range(sysvals.multitest['count']):
+ if(i != 0):
+ print('Waiting %d seconds...' % (sysvals.multitest['delay']))
+ time.sleep(sysvals.multitest['delay'])
+ print('TEST (%d/%d) START' % (i+1, sysvals.multitest['count']))
+ fmt = 'suspend-%y%m%d-%H%M%S'
+ sysvals.testdir = os.path.join(sysvals.outdir, datetime.now().strftime(fmt))
+ runTest(i+1)
+ print('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count']))
+ sysvals.logmsg = ''
+ if not sysvals.skiphtml:
+ runSummary(sysvals.outdir, False, False)
+ sysvals.sudouser(sysvals.outdir)
+ else:
+ if sysvals.outdir:
+ sysvals.testdir = sysvals.outdir
+ # run the test in the current directory
+ runTest()
+ if sysvals.display:
+ call('xset -d :0.0 s reset', shell=True)
+ setRuntimeSuspend(False)
diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
new file mode 100755
index 000000000..dbed3d213
--- /dev/null
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -0,0 +1,617 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+#
+""" This utility can be used to debug and tune the performance of the
+intel_pstate driver. This utility can be used in two ways:
+- If there is Linux trace file with pstate_sample events enabled, then
+this utility can parse the trace file and generate performance plots.
+- If user has not specified a trace file as input via command line parameters,
+then this utility enables and collects trace data for a user specified interval
+and generates performance plots.
+
+Prerequisites:
+ Python version 2.7.x or higher
+ gnuplot 5.0 or higher
+ gnuplot-py 1.8 or higher
+ (Most of the distributions have these required packages. They may be called
+ gnuplot-py, phython-gnuplot or phython3-gnuplot, gnuplot-nox, ... )
+
+ HWP (Hardware P-States are disabled)
+ Kernel config for Linux trace is enabled
+
+ see print_help(): for Usage and Output details
+
+"""
+from __future__ import print_function
+from datetime import datetime
+import subprocess
+import os
+import time
+import re
+import signal
+import sys
+import getopt
+import Gnuplot
+from numpy import *
+from decimal import *
+
+__author__ = "Srinivas Pandruvada"
+__copyright__ = " Copyright (c) 2017, Intel Corporation. "
+__license__ = "GPL version 2"
+
+
+MAX_CPUS = 256
+
+# Define the csv file columns
+C_COMM = 18
+C_GHZ = 17
+C_ELAPSED = 16
+C_SAMPLE = 15
+C_DURATION = 14
+C_LOAD = 13
+C_BOOST = 12
+C_FREQ = 11
+C_TSC = 10
+C_APERF = 9
+C_MPERF = 8
+C_TO = 7
+C_FROM = 6
+C_SCALED = 5
+C_CORE = 4
+C_USEC = 3
+C_SEC = 2
+C_CPU = 1
+
+global sample_num, last_sec_cpu, last_usec_cpu, start_time, testname
+
+# 11 digits covers uptime to 115 days
+getcontext().prec = 11
+
+sample_num =0
+last_sec_cpu = [0] * MAX_CPUS
+last_usec_cpu = [0] * MAX_CPUS
+
+def print_help():
+ print('intel_pstate_tracer.py:')
+ print(' Usage:')
+ print(' If the trace file is available, then to simply parse and plot, use (sudo not required):')
+ print(' ./intel_pstate_tracer.py [-c cpus] -t <trace_file> -n <test_name>')
+ print(' Or')
+ print(' ./intel_pstate_tracer.py [--cpu cpus] ---trace_file <trace_file> --name <test_name>')
+ print(' To generate trace file, parse and plot, use (sudo required):')
+ print(' sudo ./intel_pstate_tracer.py [-c cpus] -i <interval> -n <test_name> -m <kbytes>')
+ print(' Or')
+ print(' sudo ./intel_pstate_tracer.py [--cpu cpus] --interval <interval> --name <test_name> --memory <kbytes>')
+ print(' Optional argument:')
+ print(' cpus: comma separated list of CPUs')
+ print(' kbytes: Kilo bytes of memory per CPU to allocate to the trace buffer. Default: 10240')
+ print(' Output:')
+ print(' If not already present, creates a "results/test_name" folder in the current working directory with:')
+ print(' cpu.csv - comma seperated values file with trace contents and some additional calculations.')
+ print(' cpu???.csv - comma seperated values file for CPU number ???.')
+ print(' *.png - a variety of PNG format plot files created from the trace contents and the additional calculations.')
+ print(' Notes:')
+ print(' Avoid the use of _ (underscore) in test names, because in gnuplot it is a subscript directive.')
+ print(' Maximum number of CPUs is {0:d}. If there are more the script will abort with an error.'.format(MAX_CPUS))
+ print(' Off-line CPUs cause the script to list some warnings, and create some empty files. Use the CPU mask feature for a clean run.')
+ print(' Empty y range warnings for autoscaled plots can occur and can be ignored.')
+
+def plot_perf_busy_with_sample(cpu_index):
+ """ Plot method to per cpu information """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_perf_busy_vs_samples.png" % cpu_index
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:40]')
+ g_plot('set y2range [0:200]')
+ g_plot('set y2tics 0, 10')
+ g_plot('set title "{} : cpu perf busy vs. sample : CPU {:0>3} : {:%F %H:%M}"'.format(testname, cpu_index, datetime.now()))
+# Override common
+ g_plot('set xlabel "Samples"')
+ g_plot('set ylabel "P-State"')
+ g_plot('set y2label "Scaled Busy/performance/io-busy(%)"')
+ set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y2 title "performance",\\'.format(C_SAMPLE, C_CORE))
+ g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 2 axis x1y2 title "scaled-busy",\\'.format(C_SAMPLE, C_SCALED))
+ g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 3 axis x1y2 title "io-boost",\\'.format(C_SAMPLE, C_BOOST))
+ g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 4 axis x1y1 title "P-State"'.format(C_SAMPLE, C_TO))
+
+def plot_perf_busy(cpu_index):
+ """ Plot some per cpu information """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_perf_busy.png" % cpu_index
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:40]')
+ g_plot('set y2range [0:200]')
+ g_plot('set y2tics 0, 10')
+ g_plot('set title "{} : perf busy : CPU {:0>3} : {:%F %H:%M}"'.format(testname, cpu_index, datetime.now()))
+ g_plot('set ylabel "P-State"')
+ g_plot('set y2label "Scaled Busy/performance/io-busy(%)"')
+ set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y2 title "performance",\\'.format(C_ELAPSED, C_CORE))
+ g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 2 axis x1y2 title "scaled-busy",\\'.format(C_ELAPSED, C_SCALED))
+ g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 3 axis x1y2 title "io-boost",\\'.format(C_ELAPSED, C_BOOST))
+ g_plot('"' + file_name + '" using {:d}:{:d} with linespoints linestyle 4 axis x1y1 title "P-State"'.format(C_ELAPSED, C_TO))
+
+def plot_durations(cpu_index):
+ """ Plot per cpu durations """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_durations.png" % cpu_index
+ g_plot = common_all_gnuplot_settings(output_png)
+# Should autoscale be used here? Should seconds be used here?
+ g_plot('set yrange [0:5000]')
+ g_plot('set ytics 0, 500')
+ g_plot('set title "{} : durations : CPU {:0>3} : {:%F %H:%M}"'.format(testname, cpu_index, datetime.now()))
+ g_plot('set ylabel "Timer Duration (MilliSeconds)"')
+# override common
+ g_plot('set key off')
+ set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_DURATION))
+
+def plot_loads(cpu_index):
+ """ Plot per cpu loads """
+
+ file_name = 'cpu{:0>3}.csv'.format(cpu_index)
+ if os.path.exists(file_name):
+ output_png = "cpu%03d_loads.png" % cpu_index
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:100]')
+ g_plot('set ytics 0, 10')
+ g_plot('set title "{} : loads : CPU {:0>3} : {:%F %H:%M}"'.format(testname, cpu_index, datetime.now()))
+ g_plot('set ylabel "CPU load (percent)"')
+# override common
+ g_plot('set key off')
+ set_4_plot_linestyles(g_plot)
+ g_plot('plot "' + file_name + '" using {:d}:{:d} with linespoints linestyle 1 axis x1y1'.format(C_ELAPSED, C_LOAD))
+
+def plot_pstate_cpu_with_sample():
+ """ Plot all cpu information """
+
+ if os.path.exists('cpu.csv'):
+ output_png = 'all_cpu_pstates_vs_samples.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:40]')
+# override common
+ g_plot('set xlabel "Samples"')
+ g_plot('set ylabel "P-State"')
+ g_plot('set title "{} : cpu pstate vs. sample : {:%F %H:%M}"'.format(testname, datetime.now()))
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_SAMPLE, C_TO)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_pstate_cpu():
+ """ Plot all cpu information from csv files """
+
+ output_png = 'all_cpu_pstates.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:40]')
+ g_plot('set ylabel "P-State"')
+ g_plot('set title "{} : cpu pstates : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+# the following command is really cool, but doesn't work with the CPU masking option because it aborts on the first missing file.
+# plot_str = 'plot for [i=0:*] file=sprintf("cpu%03d.csv",i) title_s=sprintf("cpu%03d",i) file using 16:7 pt 7 ps 1 title title_s'
+#
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_TO)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_load_cpu():
+ """ Plot all cpu loads """
+
+ output_png = 'all_cpu_loads.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:100]')
+ g_plot('set ylabel "CPU load (percent)"')
+ g_plot('set title "{} : cpu loads : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_LOAD)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_frequency_cpu():
+ """ Plot all cpu frequencies """
+
+ output_png = 'all_cpu_frequencies.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:4]')
+ g_plot('set ylabel "CPU Frequency (GHz)"')
+ g_plot('set title "{} : cpu frequencies : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_FREQ)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_duration_cpu():
+ """ Plot all cpu durations """
+
+ output_png = 'all_cpu_durations.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:5000]')
+ g_plot('set ytics 0, 500')
+ g_plot('set ylabel "Timer Duration (MilliSeconds)"')
+ g_plot('set title "{} : cpu durations : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_DURATION)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_scaled_cpu():
+ """ Plot all cpu scaled busy """
+
+ output_png = 'all_cpu_scaled.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+# autoscale this one, no set y range
+ g_plot('set ylabel "Scaled Busy (Unitless)"')
+ g_plot('set title "{} : cpu scaled busy : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_SCALED)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_boost_cpu():
+ """ Plot all cpu IO Boosts """
+
+ output_png = 'all_cpu_boost.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+ g_plot('set yrange [0:100]')
+ g_plot('set ylabel "CPU IO Boost (percent)"')
+ g_plot('set title "{} : cpu io boost : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_BOOST)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def plot_ghz_cpu():
+ """ Plot all cpu tsc ghz """
+
+ output_png = 'all_cpu_ghz.png'
+ g_plot = common_all_gnuplot_settings(output_png)
+# autoscale this one, no set y range
+ g_plot('set ylabel "TSC Frequency (GHz)"')
+ g_plot('set title "{} : cpu TSC Frequencies (Sanity check calculation) : {:%F %H:%M}"'.format(testname, datetime.now()))
+
+ title_list = subprocess.check_output('ls cpu???.csv | sed -e \'s/.csv//\'',shell=True).decode('utf-8').replace('\n', ' ')
+ plot_str = "plot for [i in title_list] i.'.csv' using {:d}:{:d} pt 7 ps 1 title i".format(C_ELAPSED, C_GHZ)
+ g_plot('title_list = "{}"'.format(title_list))
+ g_plot(plot_str)
+
+def common_all_gnuplot_settings(output_png):
+ """ common gnuplot settings for multiple CPUs one one graph. """
+
+ g_plot = common_gnuplot_settings()
+ g_plot('set output "' + output_png + '"')
+ return(g_plot)
+
+def common_gnuplot_settings():
+ """ common gnuplot settings. """
+
+ g_plot = Gnuplot.Gnuplot(persist=1)
+# The following line is for rigor only. It seems to be assumed for .csv files
+ g_plot('set datafile separator \",\"')
+ g_plot('set ytics nomirror')
+ g_plot('set xtics nomirror')
+ g_plot('set xtics font ", 10"')
+ g_plot('set ytics font ", 10"')
+ g_plot('set tics out scale 1.0')
+ g_plot('set grid')
+ g_plot('set key out horiz')
+ g_plot('set key bot center')
+ g_plot('set key samplen 2 spacing .8 font ", 9"')
+ g_plot('set term png size 1200, 600')
+ g_plot('set title font ", 11"')
+ g_plot('set ylabel font ", 10"')
+ g_plot('set xlabel font ", 10"')
+ g_plot('set xlabel offset 0, 0.5')
+ g_plot('set xlabel "Elapsed Time (Seconds)"')
+ return(g_plot)
+
+def set_4_plot_linestyles(g_plot):
+ """ set the linestyles used for 4 plots in 1 graphs. """
+
+ g_plot('set style line 1 linetype 1 linecolor rgb "green" pointtype -1')
+ g_plot('set style line 2 linetype 1 linecolor rgb "red" pointtype -1')
+ g_plot('set style line 3 linetype 1 linecolor rgb "purple" pointtype -1')
+ g_plot('set style line 4 linetype 1 linecolor rgb "blue" pointtype -1')
+
+def store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz):
+ """ Store master csv file information """
+
+ global graph_data_present
+
+ if cpu_mask[cpu_int] == 0:
+ return
+
+ try:
+ f_handle = open('cpu.csv', 'a')
+ string_buffer = "CPU_%03u, %05u, %06u, %u, %u, %u, %u, %u, %u, %u, %.4f, %u, %.2f, %.3f, %u, %.3f, %.3f, %s\n" % (cpu_int, int(time_pre_dec), int(time_post_dec), int(core_busy), int(scaled), int(_from), int(_to), int(mperf), int(aperf), int(tsc), freq_ghz, int(io_boost), load, duration_ms, sample_num, elapsed_time, tsc_ghz, common_comm)
+ f_handle.write(string_buffer);
+ f_handle.close()
+ except:
+ print('IO error cpu.csv')
+ return
+
+ graph_data_present = True;
+
+def split_csv():
+ """ seperate the all csv file into per CPU csv files. """
+
+ global current_max_cpu
+
+ if os.path.exists('cpu.csv'):
+ for index in range(0, current_max_cpu + 1):
+ if cpu_mask[int(index)] != 0:
+ os.system('grep -m 1 common_cpu cpu.csv > cpu{:0>3}.csv'.format(index))
+ os.system('grep CPU_{:0>3} cpu.csv >> cpu{:0>3}.csv'.format(index, index))
+
+def fix_ownership(path):
+ """Change the owner of the file to SUDO_UID, if required"""
+
+ uid = os.environ.get('SUDO_UID')
+ gid = os.environ.get('SUDO_GID')
+ if uid is not None:
+ os.chown(path, int(uid), int(gid))
+
+def cleanup_data_files():
+ """ clean up existing data files """
+
+ if os.path.exists('cpu.csv'):
+ os.remove('cpu.csv')
+ f_handle = open('cpu.csv', 'a')
+ f_handle.write('common_cpu, common_secs, common_usecs, core_busy, scaled_busy, from, to, mperf, aperf, tsc, freq, boost, load, duration_ms, sample_num, elapsed_time, tsc_ghz, common_comm')
+ f_handle.write('\n')
+ f_handle.close()
+
+def clear_trace_file():
+ """ Clear trace file """
+
+ try:
+ f_handle = open('/sys/kernel/debug/tracing/trace', 'w')
+ f_handle.close()
+ except:
+ print('IO error clearing trace file ')
+ sys.exit(2)
+
+def enable_trace():
+ """ Enable trace """
+
+ try:
+ open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
+ , 'w').write("1")
+ except:
+ print('IO error enabling trace ')
+ sys.exit(2)
+
+def disable_trace():
+ """ Disable trace """
+
+ try:
+ open('/sys/kernel/debug/tracing/events/power/pstate_sample/enable'
+ , 'w').write("0")
+ except:
+ print('IO error disabling trace ')
+ sys.exit(2)
+
+def set_trace_buffer_size():
+ """ Set trace buffer size """
+
+ try:
+ with open('/sys/kernel/debug/tracing/buffer_size_kb', 'w') as fp:
+ fp.write(memory)
+ except:
+ print('IO error setting trace buffer size ')
+ sys.exit(2)
+
+def free_trace_buffer():
+ """ Free the trace buffer memory """
+
+ try:
+ open('/sys/kernel/debug/tracing/buffer_size_kb'
+ , 'w').write("1")
+ except:
+ print('IO error freeing trace buffer ')
+ sys.exit(2)
+
+def read_trace_data(filename):
+ """ Read and parse trace data """
+
+ global current_max_cpu
+ global sample_num, last_sec_cpu, last_usec_cpu, start_time
+
+ try:
+ data = open(filename, 'r').read()
+ except:
+ print('Error opening ', filename)
+ sys.exit(2)
+
+ for line in data.splitlines():
+ search_obj = \
+ re.search(r'(^(.*?)\[)((\d+)[^\]])(.*?)(\d+)([.])(\d+)(.*?core_busy=)(\d+)(.*?scaled=)(\d+)(.*?from=)(\d+)(.*?to=)(\d+)(.*?mperf=)(\d+)(.*?aperf=)(\d+)(.*?tsc=)(\d+)(.*?freq=)(\d+)'
+ , line)
+
+ if search_obj:
+ cpu = search_obj.group(3)
+ cpu_int = int(cpu)
+ cpu = str(cpu_int)
+
+ time_pre_dec = search_obj.group(6)
+ time_post_dec = search_obj.group(8)
+ core_busy = search_obj.group(10)
+ scaled = search_obj.group(12)
+ _from = search_obj.group(14)
+ _to = search_obj.group(16)
+ mperf = search_obj.group(18)
+ aperf = search_obj.group(20)
+ tsc = search_obj.group(22)
+ freq = search_obj.group(24)
+ common_comm = search_obj.group(2).replace(' ', '')
+
+ # Not all kernel versions have io_boost field
+ io_boost = '0'
+ search_obj = re.search(r'.*?io_boost=(\d+)', line)
+ if search_obj:
+ io_boost = search_obj.group(1)
+
+ if sample_num == 0 :
+ start_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000)
+ sample_num += 1
+
+ if last_sec_cpu[cpu_int] == 0 :
+ last_sec_cpu[cpu_int] = time_pre_dec
+ last_usec_cpu[cpu_int] = time_post_dec
+ else :
+ duration_us = (int(time_pre_dec) - int(last_sec_cpu[cpu_int])) * 1000000 + (int(time_post_dec) - int(last_usec_cpu[cpu_int]))
+ duration_ms = Decimal(duration_us) / Decimal(1000)
+ last_sec_cpu[cpu_int] = time_pre_dec
+ last_usec_cpu[cpu_int] = time_post_dec
+ elapsed_time = Decimal(time_pre_dec) + Decimal(time_post_dec) / Decimal(1000000) - start_time
+ load = Decimal(int(mperf)*100)/ Decimal(tsc)
+ freq_ghz = Decimal(freq)/Decimal(1000000)
+# Sanity check calculation, typically anomalies indicate missed samples
+# However, check for 0 (should never occur)
+ tsc_ghz = Decimal(0)
+ if duration_ms != Decimal(0) :
+ tsc_ghz = Decimal(tsc)/duration_ms/Decimal(1000000)
+ store_csv(cpu_int, time_pre_dec, time_post_dec, core_busy, scaled, _from, _to, mperf, aperf, tsc, freq_ghz, io_boost, common_comm, load, duration_ms, sample_num, elapsed_time, tsc_ghz)
+
+ if cpu_int > current_max_cpu:
+ current_max_cpu = cpu_int
+# End of for each trace line loop
+# Now seperate the main overall csv file into per CPU csv files.
+ split_csv()
+
+def signal_handler(signal, frame):
+ print(' SIGINT: Forcing cleanup before exit.')
+ if interval:
+ disable_trace()
+ clear_trace_file()
+ # Free the memory
+ free_trace_buffer()
+ sys.exit(0)
+
+signal.signal(signal.SIGINT, signal_handler)
+
+interval = ""
+filename = ""
+cpu_list = ""
+testname = ""
+memory = "10240"
+graph_data_present = False;
+
+valid1 = False
+valid2 = False
+
+cpu_mask = zeros((MAX_CPUS,), dtype=int)
+
+try:
+ opts, args = getopt.getopt(sys.argv[1:],"ht:i:c:n:m:",["help","trace_file=","interval=","cpu=","name=","memory="])
+except getopt.GetoptError:
+ print_help()
+ sys.exit(2)
+for opt, arg in opts:
+ if opt == '-h':
+ print()
+ sys.exit()
+ elif opt in ("-t", "--trace_file"):
+ valid1 = True
+ location = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__)))
+ filename = os.path.join(location, arg)
+ elif opt in ("-i", "--interval"):
+ valid1 = True
+ interval = arg
+ elif opt in ("-c", "--cpu"):
+ cpu_list = arg
+ elif opt in ("-n", "--name"):
+ valid2 = True
+ testname = arg
+ elif opt in ("-m", "--memory"):
+ memory = arg
+
+if not (valid1 and valid2):
+ print_help()
+ sys.exit()
+
+if cpu_list:
+ for p in re.split("[,]", cpu_list):
+ if int(p) < MAX_CPUS :
+ cpu_mask[int(p)] = 1
+else:
+ for i in range (0, MAX_CPUS):
+ cpu_mask[i] = 1
+
+if not os.path.exists('results'):
+ os.mkdir('results')
+ # The regular user needs to own the directory, not root.
+ fix_ownership('results')
+
+os.chdir('results')
+if os.path.exists(testname):
+ print('The test name directory already exists. Please provide a unique test name. Test re-run not supported, yet.')
+ sys.exit()
+os.mkdir(testname)
+# The regular user needs to own the directory, not root.
+fix_ownership(testname)
+os.chdir(testname)
+
+# Temporary (or perhaps not)
+cur_version = sys.version_info
+print('python version (should be >= 2.7):')
+print(cur_version)
+
+# Left as "cleanup" for potential future re-run ability.
+cleanup_data_files()
+
+if interval:
+ filename = "/sys/kernel/debug/tracing/trace"
+ clear_trace_file()
+ set_trace_buffer_size()
+ enable_trace()
+ print('Sleeping for ', interval, 'seconds')
+ time.sleep(int(interval))
+ disable_trace()
+
+current_max_cpu = 0
+
+read_trace_data(filename)
+
+if interval:
+ clear_trace_file()
+ # Free the memory
+ free_trace_buffer()
+
+if graph_data_present == False:
+ print('No valid data to plot')
+ sys.exit(2)
+
+for cpu_no in range(0, current_max_cpu + 1):
+ plot_perf_busy_with_sample(cpu_no)
+ plot_perf_busy(cpu_no)
+ plot_durations(cpu_no)
+ plot_loads(cpu_no)
+
+plot_pstate_cpu_with_sample()
+plot_pstate_cpu()
+plot_load_cpu()
+plot_frequency_cpu()
+plot_duration_cpu()
+plot_scaled_cpu()
+plot_boost_cpu()
+plot_ghz_cpu()
+
+# It is preferrable, but not necessary, that the regular user owns the files, not root.
+for root, dirs, files in os.walk('.'):
+ for f in files:
+ fix_ownership(f)
+
+os.chdir('../../')
diff --git a/tools/power/x86/turbostat/.gitignore b/tools/power/x86/turbostat/.gitignore
new file mode 100644
index 000000000..7521370d3
--- /dev/null
+++ b/tools/power/x86/turbostat/.gitignore
@@ -0,0 +1 @@
+turbostat
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
new file mode 100644
index 000000000..ff058bfbc
--- /dev/null
+++ b/tools/power/x86/turbostat/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+CC = $(CROSS_COMPILE)gcc
+BUILD_OUTPUT := $(CURDIR)
+PREFIX ?= /usr
+DESTDIR ?=
+
+ifeq ("$(origin O)", "command line")
+ BUILD_OUTPUT := $(O)
+endif
+
+turbostat : turbostat.c
+CFLAGS += -Wall -I../../../include
+CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
+CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
+
+%: %.c
+ @mkdir -p $(BUILD_OUTPUT)
+ $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@
+
+.PHONY : clean
+clean :
+ @rm -f $(BUILD_OUTPUT)/turbostat
+
+install : turbostat
+ install -d $(DESTDIR)$(PREFIX)/bin
+ install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat
+ install -d $(DESTDIR)$(PREFIX)/share/man/man8
+ install -m 644 turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
new file mode 100644
index 000000000..a6db83a88
--- /dev/null
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -0,0 +1,351 @@
+.TH TURBOSTAT 8
+.SH NAME
+turbostat \- Report processor frequency and idle statistics
+.SH SYNOPSIS
+.ft B
+.B turbostat
+.RB [ Options ]
+.RB command
+.br
+.B turbostat
+.RB [ Options ]
+.RB [ "\--interval seconds" ]
+.SH DESCRIPTION
+\fBturbostat \fP reports processor topology, frequency,
+idle power-state statistics, temperature and power on X86 processors.
+There are two ways to invoke turbostat.
+The first method is to supply a
+\fBcommand\fP, which is forked and statistics are printed
+in one-shot upon its completion.
+The second method is to omit the command,
+and turbostat displays statistics every 5 seconds interval.
+The 5-second interval can be changed using the --interval option.
+.PP
+Some information is not available on older processors.
+.SS Options
+Options can be specified with a single or double '-', and only as much of the option
+name as necessary to disambiguate it from others is necessary. Note that options are case-sensitive.
+.PP
+\fB--add attributes\fP add column with counter having specified 'attributes'. The 'location' attribute is required, all others are optional.
+.nf
+ location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP}
+ msrDDD is a decimal offset, eg. msr16
+ msr0xXXX is a hex offset, eg. msr0x10
+ /sys/path... is an absolute path to a sysfs attribute
+
+ scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP}
+ sample and print the counter for every cpu, core, or package.
+ default: cpu
+
+ size: {\fBu32\fP | \fBu64\fP }
+ MSRs are read as 64-bits, u32 truncates the displayed value to 32-bits.
+ default: u64
+
+ format: {\fBraw\fP | \fBdelta\fP | \fBpercent\fP}
+ 'raw' shows the MSR contents in hex.
+ 'delta' shows the difference in values during the measurement interval.
+ 'percent' shows the delta as a percentage of the cycles elapsed.
+ default: delta
+
+ name: "name_string"
+ Any string that does not match a key-word above is used
+ as the column header.
+.fi
+.PP
+\fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44
+.PP
+\fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group.
+.PP
+\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec", "Time_Of_Day_Seconds", "APIC" and "X2APIC".
+The column name "all" can be used to enable all disabled-by-default built-in counters.
+.PP
+\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group.
+.PP
+\fB--Dump\fP displays the raw counter values.
+.PP
+\fB--quiet\fP Do not decode and print the system configuration header information.
+.PP
+\fB--interval seconds\fP overrides the default 5.0 second measurement interval.
+.PP
+\fB--num_iterations num\fP number of the measurement iterations.
+.PP
+\fB--out output_file\fP turbostat output is written to the specified output_file.
+The file is truncated if it already exists, and it is created if it does not exist.
+.PP
+\fB--help\fP displays usage for the most common parameters.
+.PP
+\fB--Joules\fP displays energy in Joules, rather than dividing Joules by time to print power in Watts.
+.PP
+\fB--list\fP display column header names available for use by --show and --hide, then exit.
+.PP
+\fB--Summary\fP limits output to a 1-line System Summary for each interval.
+.PP
+\fB--TCC temperature\fP sets the Thermal Control Circuit temperature for systems which do not export that value. This is used for making sense of the Digital Thermal Sensor outputs, as they return degrees Celsius below the TCC activation temperature.
+.PP
+\fB--version\fP displays the version.
+.PP
+The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit,
+displays the statistics gathered since it was forked.
+.PP
+.SH ROW DESCRIPTIONS
+The system configuration dump (if --quiet is not used) is followed by statistics. The first row of the statistics labels the content of each column (below). The second row of statistics is the system summary line. The system summary line has a '-' in the columns for the Package, Core, and CPU. The contents of the system summary line depends on the type of column. Columns that count items (eg. IRQ) show the sum across all CPUs in the system. Columns that show a percentage show the average across all CPUs in the system. Columns that dump raw MSR values simply show 0 in the summary. After the system summary row, each row describes a specific Package/Core/CPU. Note that if the --cpu parameter is used to limit which specific CPUs are displayed, turbostat will still collect statistics for all CPUs in the system and will still show the system summary for all CPUs in the system.
+.SH COLUMN DESCRIPTIONS
+.nf
+\fBusec\fP For each CPU, the number of microseconds elapsed during counter collection, including thread migration -- if any. This counter is disabled by default, and is enabled with "--enable usec", or --debug. On the summary row, usec refers to the total elapsed time to collect the counters on all cpus.
+\fBTime_Of_Day_Seconds\fP For each CPU, the gettimeofday(2) value (seconds.subsec since Epoch) when the counters ending the measurement interval were collected. This column is disabled by default, and can be enabled with "--enable Time_Of_Day_Seconds" or "--debug". On the summary row, Time_Of_Day_Seconds refers to the timestamp following collection of counters on the last CPU.
+\fBCore\fP processor core number. Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT).
+\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
+\fBPackage\fP processor package number -- not present on systems with a single processor package.
+\fBAvg_MHz\fP number of cycles executed divided by time elapsed. Note that this includes idle-time when 0 instructions are executed.
+\fBBusy%\fP percent of the measurement interval that the CPU executes instructions, aka. % of time in "C0" state.
+\fBBzy_MHz\fP average clock rate while the CPU was not idle (ie. in "c0" state).
+\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
+\fBIRQ\fP The number of interrupts serviced by that CPU during the measurement interval. The system total line is the sum of interrupts serviced across all CPUs. turbostat parses /proc/interrupts to generate this summary.
+\fBSMI\fP The number of System Management Interrupts serviced CPU during the measurement interval. While this counter is actually per-CPU, SMI are triggered on all processors, so the number should be the same for all CPUs.
+\fBC1, C2, C3...\fP The number times Linux requested the C1, C2, C3 idle state during the measurement interval. The system summary line shows the sum for all CPUs. These are C-state names as exported in /sys/devices/system/cpu/cpu*/cpuidle/state*/name. While their names are generic, their attributes are processor specific. They the system description section of output shows what MWAIT sub-states they are mapped to on each system.
+\fBC1%, C2%, C3%\fP The residency percentage that Linux requested C1, C2, C3.... The system summary is the average of all CPUs in the system. Note that these are software, reflecting what was requested. The hardware counters reflect what was actually achieved.
+\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. These numbers are from hardware residency counters.
+\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor.
+\fBPkgTmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor.
+\fBGFX%rc6\fP The percentage of time the GPU is in the "render C6" state, rc6, during the measurement interval. From /sys/class/drm/card0/power/rc6_residency_ms.
+\fBGFXMHz\fP Instantaneous snapshot of what sysfs presents at the end of the measurement interval. From /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz.
+\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. These numbers are from hardware residency counters.
+\fBPkgWatt\fP Watts consumed by the whole package.
+\fBCorWatt\fP Watts consumed by the core part of the package.
+\fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors.
+\fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors.
+\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. Note that the system summary is the sum of the package throttling time, and thus may be higher than 100% on a multi-package system. Note that the meaning of this field is model specific. For example, some hardware increments this counter when RAPL responds to thermal limits, but does not increment this counter when RAPL responds to power limits. Comparing PkgWatt and PkgTmp to system limits is necessary.
+\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
+.fi
+.SH TOO MUCH INFORMATION EXAMPLE
+By default, turbostat dumps all possible information -- a system configuration header, followed by columns for all counters.
+This is ideal for remote debugging, use the "--out" option to save everything to a text file, and get that file to the expert helping you debug.
+.PP
+When you are not interested in all that information, and there are several ways to see only what you want. First the "--quiet" option will skip the configuration information, and turbostat will show only the counter columns. Second, you can reduce the columns with the "--hide" and "--show" options. If you use the "--show" option, then turbostat will show only the columns you list. If you use the "--hide" option, turbostat will show all columns, except the ones you list.
+.PP
+To find out what columns are available for --show and --hide, the "--list" option is available. For convenience, the special strings "sysfs" can be used to refer to all of the sysfs C-state counters at once:
+.nf
+sudo ./turbostat --show sysfs --quiet sleep 10
+10.003837 sec
+ C1 C1E C3 C6 C7s C1% C1E% C3% C6% C7s%
+ 4 21 2 2 459 0.14 0.82 0.00 0.00 98.93
+ 1 17 2 2 130 0.00 0.02 0.00 0.00 99.80
+ 0 0 0 0 31 0.00 0.00 0.00 0.00 99.95
+ 2 1 0 0 52 1.14 6.49 0.00 0.00 92.21
+ 1 2 0 0 52 0.00 0.08 0.00 0.00 99.86
+ 0 0 0 0 71 0.00 0.00 0.00 0.00 99.89
+ 0 0 0 0 25 0.00 0.00 0.00 0.00 99.96
+ 0 0 0 0 74 0.00 0.00 0.00 0.00 99.94
+ 0 1 0 0 24 0.00 0.00 0.00 0.00 99.84
+.fi
+.PP
+.SH ONE SHOT COMMAND EXAMPLE
+If turbostat is invoked with a command, it will fork that command
+and output the statistics gathered after the command exits.
+In this case, turbostat output goes to stderr, by default.
+Output can instead be saved to a file using the --out option.
+In this example, the "sleep 10" command is forked, and turbostat waits for it to complete before saving all statistics into "ts.out". Note that "sleep 10" is not part of turbostat, but is simply an example of a command that turbostat can fork. The "ts.out" file is what you want to edit in a very wide window, paste into a spreadsheet, or attach to a bugzilla entry.
+
+.nf
+[root@hsw]# ./turbostat -o ts.out sleep 10
+[root@hsw]#
+.fi
+
+.SH PERIODIC INTERVAL EXAMPLE
+Without a command to fork, turbostat displays statistics ever 5 seconds.
+Periodic output goes to stdout, by default, unless --out is used to specify an output file.
+The 5-second interval can be changed with the "-i sec" option.
+.nf
+sudo ./turbostat --quiet --hide sysfs,IRQ,SMI,CoreTmp,PkgTmp,GFX%rc6,GFXMHz,PkgWatt,CorWatt,GFXWatt
+ Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c1 CPU%c3 CPU%c6 CPU%c7
+ - - 488 12.52 3900 3498 12.50 0.00 0.00 74.98
+ 0 0 5 0.13 3900 3498 99.87 0.00 0.00 0.00
+ 0 4 3897 99.99 3900 3498 0.01
+ 1 1 0 0.00 3856 3498 0.01 0.00 0.00 99.98
+ 1 5 0 0.00 3861 3498 0.01
+ 2 2 1 0.02 3889 3498 0.03 0.00 0.00 99.95
+ 2 6 0 0.00 3863 3498 0.05
+ 3 3 0 0.01 3869 3498 0.02 0.00 0.00 99.97
+ 3 7 0 0.00 3878 3498 0.03
+ Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz CPU%c1 CPU%c3 CPU%c6 CPU%c7
+ - - 491 12.59 3900 3498 12.42 0.00 0.00 74.99
+ 0 0 27 0.69 3900 3498 99.31 0.00 0.00 0.00
+ 0 4 3898 99.99 3900 3498 0.01
+ 1 1 0 0.00 3883 3498 0.01 0.00 0.00 99.99
+ 1 5 0 0.00 3898 3498 0.01
+ 2 2 0 0.01 3889 3498 0.02 0.00 0.00 99.98
+ 2 6 0 0.00 3889 3498 0.02
+ 3 3 0 0.00 3856 3498 0.01 0.00 0.00 99.99
+ 3 7 0 0.00 3897 3498 0.01
+.fi
+This example also shows the use of the --hide option to skip columns that are not wanted.
+Note that cpu4 in this example is 99.99% busy, while the other CPUs are all under 1% busy.
+Notice that cpu4's HT sibling is cpu0, which is under 1% busy, but can get into CPU%c1 only,
+because its cpu4's activity on shared hardware keeps it from entering a deeper C-state.
+
+.SH SYSTEM CONFIGURATION INFORMATION EXAMPLE
+
+By default, turbostat always dumps system configuration information
+before taking measurements. In the example above, "--quiet" is used
+to suppress that output. Here is an example of the configuration information:
+.nf
+turbostat version 2017.02.15 - Len Brown <lenb@kernel.org>
+CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3)
+CPUID(1): SSE3 MONITOR - EIST TM2 TSC MSR ACPI-TM TM
+CPUID(6): APERF, TURBO, DTS, PTM, No-HWP, No-HWPnotify, No-HWPwindow, No-HWPepp, No-HWPpkg, EPB
+cpu4: MSR_IA32_MISC_ENABLE: 0x00850089 (TCC EIST No-MWAIT PREFETCH TURBO)
+CPUID(7): No-SGX
+cpu4: MSR_MISC_PWR_MGMT: 0x00400000 (ENable-EIST_Coordination DISable-EPB DISable-OOB)
+RAPL: 3121 sec. Joule Counter Range, at 84 Watts
+cpu4: MSR_PLATFORM_INFO: 0x80838f3012300
+8 * 100.0 = 800.0 MHz max efficiency frequency
+35 * 100.0 = 3500.0 MHz base frequency
+cpu4: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled)
+cpu4: MSR_TURBO_RATIO_LIMIT: 0x25262727
+37 * 100.0 = 3700.0 MHz max turbo 4 active cores
+38 * 100.0 = 3800.0 MHz max turbo 3 active cores
+39 * 100.0 = 3900.0 MHz max turbo 2 active cores
+39 * 100.0 = 3900.0 MHz max turbo 1 active cores
+cpu4: MSR_CONFIG_TDP_NOMINAL: 0x00000023 (base_ratio=35)
+cpu4: MSR_CONFIG_TDP_LEVEL_1: 0x00000000 ()
+cpu4: MSR_CONFIG_TDP_LEVEL_2: 0x00000000 ()
+cpu4: MSR_CONFIG_TDP_CONTROL: 0x80000000 ( lock=1)
+cpu4: MSR_TURBO_ACTIVATION_RATIO: 0x00000000 (MAX_NON_TURBO_RATIO=0 lock=0)
+cpu4: MSR_PKG_CST_CONFIG_CONTROL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0)
+cpu4: POLL: CPUIDLE CORE POLL IDLE
+cpu4: C1: MWAIT 0x00
+cpu4: C1E: MWAIT 0x01
+cpu4: C3: MWAIT 0x10
+cpu4: C6: MWAIT 0x20
+cpu4: C7s: MWAIT 0x32
+cpu4: MSR_MISC_FEATURE_CONTROL: 0x00000000 (L2-Prefetch L2-Prefetch-pair L1-Prefetch L1-IP-Prefetch)
+cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced)
+cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Transitions, MultiCoreTurbo, Amps, Auto-HWP, )
+cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: )
+cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, )
+cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.)
+cpu0: MSR_PKG_POWER_INFO: 0x000002a0 (84 W TDP, RAPL 0 - 0 W, 0.000000 sec.)
+cpu0: MSR_PKG_POWER_LIMIT: 0x428348001a82a0 (UNlocked)
+cpu0: PKG Limit #1: ENabled (84.000000 Watts, 8.000000 sec, clamp DISabled)
+cpu0: PKG Limit #2: ENabled (105.000000 Watts, 0.002441* sec, clamp DISabled)
+cpu0: MSR_PP0_POLICY: 0
+cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked)
+cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
+cpu0: MSR_PP1_POLICY: 0
+cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked)
+cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled)
+cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C)
+cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884c0800 (24 C)
+cpu0: MSR_IA32_THERM_STATUS: 0x884c0000 (24 C +/- 1)
+cpu1: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1)
+cpu2: MSR_IA32_THERM_STATUS: 0x884e0000 (22 C +/- 1)
+cpu3: MSR_IA32_THERM_STATUS: 0x88510000 (19 C +/- 1)
+cpu4: MSR_PKGC3_IRTL: 0x00008842 (valid, 67584 ns)
+cpu4: MSR_PKGC6_IRTL: 0x00008873 (valid, 117760 ns)
+cpu4: MSR_PKGC7_IRTL: 0x00008891 (valid, 148480 ns)
+.fi
+The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency
+available at the minimum package voltage. The \fBTSC frequency\fP is the base
+frequency of the processor -- this should match the brand string
+in /proc/cpuinfo. This base frequency
+should be sustainable on all CPUs indefinitely, given nominal power and cooling.
+The remaining rows show what maximum turbo frequency is possible
+depending on the number of idle cores. Note that not all information is
+available on all processors.
+.SH ADD COUNTER EXAMPLE
+Here we limit turbostat to showing just the CPU number for cpu0 - cpu3.
+We add a counter showing the 32-bit raw value of MSR 0x199 (MSR_IA32_PERF_CTL),
+labeling it with the column header, "PRF_CTRL", and display it only once,
+afte the conclusion of a 0.1 second sleep.
+.nf
+sudo ./turbostat --quiet --cpu 0-3 --show CPU --add msr0x199,u32,raw,PRF_CTRL sleep .1
+0.101604 sec
+CPU PRF_CTRL
+- 0x00000000
+0 0x00000c00
+1 0x00000800
+2 0x00000a00
+3 0x00000800
+
+.fi
+
+.SH INPUT
+
+For interval-mode, turbostat will immediately end the current interval
+when it sees a newline on standard input.
+turbostat will then start the next interval.
+Control-C will be send a SIGINT to turbostat,
+which will immediately abort the program with no further processing.
+.SH SIGNALS
+
+SIGINT will interrupt interval-mode.
+The end-of-interval data will be collected and displayed before turbostat exits.
+
+SIGUSR1 will end current interval,
+end-of-interval data will be collected and displayed before turbostat
+starts a new interval.
+.SH NOTES
+
+.B "turbostat "
+must be run as root.
+Alternatively, non-root users can be enabled to run turbostat this way:
+
+# setcap cap_sys_rawio=ep ./turbostat
+
+# chmod +r /dev/cpu/*/msr
+
+.B "turbostat "
+reads hardware counters, but doesn't write them.
+So it will not interfere with the OS or other programs, including
+multiple invocations of itself.
+
+\fBturbostat \fP
+may work poorly on Linux-2.6.20 through 2.6.29,
+as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF MSRs
+in those kernels.
+
+AVG_MHz = APERF_delta/measurement_interval. This is the actual
+number of elapsed cycles divided by the entire sample interval --
+including idle time. Note that this calculation is resilient
+to systems lacking a non-stop TSC.
+
+TSC_MHz = TSC_delta/measurement_interval.
+On a system with an invariant TSC, this value will be constant
+and will closely match the base frequency value shown
+in the brand string in /proc/cpuinfo. On a system where
+the TSC stops in idle, TSC_MHz will drop
+below the processor's base frequency.
+
+Busy% = MPERF_delta/TSC_delta
+
+Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval
+
+Note that these calculations depend on TSC_delta, so they
+are not reliable during intervals when TSC_MHz is not running at the base frequency.
+
+Turbostat data collection is not atomic.
+Extremely short measurement intervals (much less than 1 second),
+or system activity that prevents turbostat from being able
+to run on all CPUS to quickly collect data, will result in
+inconsistent results.
+
+The APERF, MPERF MSRs are defined to count non-halted cycles.
+Although it is not guaranteed by the architecture, turbostat assumes
+that they count at TSC rate, which is true on all processors tested to date.
+
+.SH REFERENCES
+Volume 3B: System Programming Guide"
+http://www.intel.com/products/processor/manuals/
+
+.SH FILES
+.ta
+.nf
+/dev/cpu/*/msr
+.fi
+
+.SH "SEE ALSO"
+msr(4), vmstat(8)
+.PP
+.SH AUTHOR
+.nf
+Written by Len Brown <len.brown@intel.com>
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
new file mode 100644
index 000000000..2233cf722
--- /dev/null
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -0,0 +1,5620 @@
+/*
+ * turbostat -- show CPU frequency and C-state residency
+ * on modern Intel and AMD processors.
+ *
+ * Copyright (c) 2013 Intel Corporation.
+ * Len Brown <len.brown@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+#include MSRHEADER
+#include INTEL_FAMILY_HEADER
+#include <stdarg.h>
+#include <stdio.h>
+#include <err.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/select.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <dirent.h>
+#include <string.h>
+#include <ctype.h>
+#include <sched.h>
+#include <time.h>
+#include <cpuid.h>
+#include <linux/capability.h>
+#include <errno.h>
+
+char *proc_stat = "/proc/stat";
+FILE *outf;
+int *fd_percpu;
+struct timeval interval_tv = {5, 0};
+struct timespec interval_ts = {5, 0};
+struct timespec one_msec = {0, 1000000};
+unsigned int num_iterations;
+unsigned int debug;
+unsigned int quiet;
+unsigned int shown;
+unsigned int sums_need_wide_columns;
+unsigned int rapl_joules;
+unsigned int summary_only;
+unsigned int list_header_only;
+unsigned int dump_only;
+unsigned int do_snb_cstates;
+unsigned int do_knl_cstates;
+unsigned int do_slm_cstates;
+unsigned int do_cnl_cstates;
+unsigned int use_c1_residency_msr;
+unsigned int has_aperf;
+unsigned int has_epb;
+unsigned int do_irtl_snb;
+unsigned int do_irtl_hsw;
+unsigned int units = 1000000; /* MHz etc */
+unsigned int genuine_intel;
+unsigned int authentic_amd;
+unsigned int max_level, max_extended_level;
+unsigned int has_invariant_tsc;
+unsigned int do_nhm_platform_info;
+unsigned int no_MSR_MISC_PWR_MGMT;
+unsigned int aperf_mperf_multiplier = 1;
+double bclk;
+double base_hz;
+unsigned int has_base_hz;
+double tsc_tweak = 1.0;
+unsigned int show_pkg_only;
+unsigned int show_core_only;
+char *output_buffer, *outp;
+unsigned int do_rapl;
+unsigned int do_dts;
+unsigned int do_ptm;
+unsigned long long gfx_cur_rc6_ms;
+unsigned long long cpuidle_cur_cpu_lpi_us;
+unsigned long long cpuidle_cur_sys_lpi_us;
+unsigned int gfx_cur_mhz;
+unsigned int tcc_activation_temp;
+unsigned int tcc_activation_temp_override;
+double rapl_power_units, rapl_time_units;
+double rapl_dram_energy_units, rapl_energy_units;
+double rapl_joule_counter_range;
+unsigned int do_core_perf_limit_reasons;
+unsigned int has_automatic_cstate_conversion;
+unsigned int do_gfx_perf_limit_reasons;
+unsigned int do_ring_perf_limit_reasons;
+unsigned int crystal_hz;
+unsigned long long tsc_hz;
+int base_cpu;
+double discover_bclk(unsigned int family, unsigned int model);
+unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
+ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
+unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
+unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
+unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
+unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
+unsigned int has_misc_feature_control;
+unsigned int first_counter_read = 1;
+
+#define RAPL_PKG (1 << 0)
+ /* 0x610 MSR_PKG_POWER_LIMIT */
+ /* 0x611 MSR_PKG_ENERGY_STATUS */
+#define RAPL_PKG_PERF_STATUS (1 << 1)
+ /* 0x613 MSR_PKG_PERF_STATUS */
+#define RAPL_PKG_POWER_INFO (1 << 2)
+ /* 0x614 MSR_PKG_POWER_INFO */
+
+#define RAPL_DRAM (1 << 3)
+ /* 0x618 MSR_DRAM_POWER_LIMIT */
+ /* 0x619 MSR_DRAM_ENERGY_STATUS */
+#define RAPL_DRAM_PERF_STATUS (1 << 4)
+ /* 0x61b MSR_DRAM_PERF_STATUS */
+#define RAPL_DRAM_POWER_INFO (1 << 5)
+ /* 0x61c MSR_DRAM_POWER_INFO */
+
+#define RAPL_CORES_POWER_LIMIT (1 << 6)
+ /* 0x638 MSR_PP0_POWER_LIMIT */
+#define RAPL_CORE_POLICY (1 << 7)
+ /* 0x63a MSR_PP0_POLICY */
+
+#define RAPL_GFX (1 << 8)
+ /* 0x640 MSR_PP1_POWER_LIMIT */
+ /* 0x641 MSR_PP1_ENERGY_STATUS */
+ /* 0x642 MSR_PP1_POLICY */
+
+#define RAPL_CORES_ENERGY_STATUS (1 << 9)
+ /* 0x639 MSR_PP0_ENERGY_STATUS */
+#define RAPL_CORES (RAPL_CORES_ENERGY_STATUS | RAPL_CORES_POWER_LIMIT)
+#define TJMAX_DEFAULT 100
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
+/*
+ * buffer size used by sscanf() for added column names
+ * Usually truncated to 7 characters, but also handles 18 columns for raw 64-bit counters
+ */
+#define NAME_BYTES 20
+#define PATH_BYTES 128
+
+int backwards_count;
+char *progname;
+
+#define CPU_SUBSET_MAXCPUS 1024 /* need to use before probe... */
+cpu_set_t *cpu_present_set, *cpu_affinity_set, *cpu_subset;
+size_t cpu_present_setsize, cpu_affinity_setsize, cpu_subset_size;
+#define MAX_ADDED_COUNTERS 8
+#define MAX_ADDED_THREAD_COUNTERS 24
+#define BITMASK_SIZE 32
+
+struct thread_data {
+ struct timeval tv_begin;
+ struct timeval tv_end;
+ unsigned long long tsc;
+ unsigned long long aperf;
+ unsigned long long mperf;
+ unsigned long long c1;
+ unsigned long long irq_count;
+ unsigned int smi_count;
+ unsigned int cpu_id;
+ unsigned int apic_id;
+ unsigned int x2apic_id;
+ unsigned int flags;
+#define CPU_IS_FIRST_THREAD_IN_CORE 0x2
+#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4
+ unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
+} *thread_even, *thread_odd;
+
+struct core_data {
+ unsigned long long c3;
+ unsigned long long c6;
+ unsigned long long c7;
+ unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */
+ unsigned int core_temp_c;
+ unsigned int core_id;
+ unsigned long long counter[MAX_ADDED_COUNTERS];
+} *core_even, *core_odd;
+
+struct pkg_data {
+ unsigned long long pc2;
+ unsigned long long pc3;
+ unsigned long long pc6;
+ unsigned long long pc7;
+ unsigned long long pc8;
+ unsigned long long pc9;
+ unsigned long long pc10;
+ unsigned long long cpu_lpi;
+ unsigned long long sys_lpi;
+ unsigned long long pkg_wtd_core_c0;
+ unsigned long long pkg_any_core_c0;
+ unsigned long long pkg_any_gfxe_c0;
+ unsigned long long pkg_both_core_gfxe_c0;
+ long long gfx_rc6_ms;
+ unsigned int gfx_mhz;
+ unsigned int package_id;
+ unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */
+ unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */
+ unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */
+ unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */
+ unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
+ unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
+ unsigned int pkg_temp_c;
+ unsigned long long counter[MAX_ADDED_COUNTERS];
+} *package_even, *package_odd;
+
+#define ODD_COUNTERS thread_odd, core_odd, package_odd
+#define EVEN_COUNTERS thread_even, core_even, package_even
+
+#define GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no) \
+ ((thread_base) + \
+ ((pkg_no) * \
+ topo.nodes_per_pkg * topo.cores_per_node * topo.threads_per_core) + \
+ ((node_no) * topo.cores_per_node * topo.threads_per_core) + \
+ ((core_no) * topo.threads_per_core) + \
+ (thread_no))
+
+#define GET_CORE(core_base, core_no, node_no, pkg_no) \
+ ((core_base) + \
+ ((pkg_no) * topo.nodes_per_pkg * topo.cores_per_node) + \
+ ((node_no) * topo.cores_per_node) + \
+ (core_no))
+
+
+#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
+
+enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
+enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
+enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
+
+struct msr_counter {
+ unsigned int msr_num;
+ char name[NAME_BYTES];
+ char path[PATH_BYTES];
+ unsigned int width;
+ enum counter_type type;
+ enum counter_format format;
+ struct msr_counter *next;
+ unsigned int flags;
+#define FLAGS_HIDE (1 << 0)
+#define FLAGS_SHOW (1 << 1)
+#define SYSFS_PERCPU (1 << 1)
+};
+
+struct sys_counters {
+ unsigned int added_thread_counters;
+ unsigned int added_core_counters;
+ unsigned int added_package_counters;
+ struct msr_counter *tp;
+ struct msr_counter *cp;
+ struct msr_counter *pp;
+} sys;
+
+struct system_summary {
+ struct thread_data threads;
+ struct core_data cores;
+ struct pkg_data packages;
+} average;
+
+struct cpu_topology {
+ int physical_package_id;
+ int logical_cpu_id;
+ int physical_node_id;
+ int logical_node_id; /* 0-based count within the package */
+ int physical_core_id;
+ int thread_id;
+ cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
+} *cpus;
+
+struct topo_params {
+ int num_packages;
+ int num_cpus;
+ int num_cores;
+ int max_cpu_num;
+ int max_node_num;
+ int nodes_per_pkg;
+ int cores_per_node;
+ int threads_per_core;
+} topo;
+
+struct timeval tv_even, tv_odd, tv_delta;
+
+int *irq_column_2_cpu; /* /proc/interrupts column numbers */
+int *irqs_per_cpu; /* indexed by cpu_num */
+
+void setup_all_buffers(void);
+
+char *sys_lpi_file;
+char *sys_lpi_file_sysfs = "/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us";
+char *sys_lpi_file_debugfs = "/sys/kernel/debug/pmc_core/slp_s0_residency_usec";
+
+int cpu_is_not_present(int cpu)
+{
+ return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
+}
+/*
+ * run func(thread, core, package) in topology order
+ * skip non-present cpus
+ */
+
+int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
+ struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
+{
+ int retval, pkg_no, core_no, thread_no, node_no;
+
+ for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
+ for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
+ for (node_no = 0; node_no < topo.nodes_per_pkg;
+ node_no++) {
+ for (thread_no = 0; thread_no <
+ topo.threads_per_core; ++thread_no) {
+ struct thread_data *t;
+ struct core_data *c;
+ struct pkg_data *p;
+
+ t = GET_THREAD(thread_base, thread_no,
+ core_no, node_no,
+ pkg_no);
+
+ if (cpu_is_not_present(t->cpu_id))
+ continue;
+
+ c = GET_CORE(core_base, core_no,
+ node_no, pkg_no);
+ p = GET_PKG(pkg_base, pkg_no);
+
+ retval = func(t, c, p);
+ if (retval)
+ return retval;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+int cpu_migrate(int cpu)
+{
+ CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
+ CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set);
+ if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1)
+ return -1;
+ else
+ return 0;
+}
+int get_msr_fd(int cpu)
+{
+ char pathname[32];
+ int fd;
+
+ fd = fd_percpu[cpu];
+
+ if (fd)
+ return fd;
+
+ sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+ fd = open(pathname, O_RDONLY);
+ if (fd < 0)
+ err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+
+ fd_percpu[cpu] = fd;
+
+ return fd;
+}
+
+int get_msr(int cpu, off_t offset, unsigned long long *msr)
+{
+ ssize_t retval;
+
+ retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
+
+ if (retval != sizeof *msr)
+ err(-1, "cpu%d: msr offset 0x%llx read failed", cpu, (unsigned long long)offset);
+
+ return 0;
+}
+
+/*
+ * This list matches the column headers, except
+ * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
+ * 2. Core and CPU are moved to the end, we can't have strings that contain them
+ * matching on them for --show and --hide.
+ */
+struct msr_counter bic[] = {
+ { 0x0, "usec" },
+ { 0x0, "Time_Of_Day_Seconds" },
+ { 0x0, "Package" },
+ { 0x0, "Node" },
+ { 0x0, "Avg_MHz" },
+ { 0x0, "Busy%" },
+ { 0x0, "Bzy_MHz" },
+ { 0x0, "TSC_MHz" },
+ { 0x0, "IRQ" },
+ { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
+ { 0x0, "sysfs" },
+ { 0x0, "CPU%c1" },
+ { 0x0, "CPU%c3" },
+ { 0x0, "CPU%c6" },
+ { 0x0, "CPU%c7" },
+ { 0x0, "ThreadC" },
+ { 0x0, "CoreTmp" },
+ { 0x0, "CoreCnt" },
+ { 0x0, "PkgTmp" },
+ { 0x0, "GFX%rc6" },
+ { 0x0, "GFXMHz" },
+ { 0x0, "Pkg%pc2" },
+ { 0x0, "Pkg%pc3" },
+ { 0x0, "Pkg%pc6" },
+ { 0x0, "Pkg%pc7" },
+ { 0x0, "Pkg%pc8" },
+ { 0x0, "Pkg%pc9" },
+ { 0x0, "Pk%pc10" },
+ { 0x0, "CPU%LPI" },
+ { 0x0, "SYS%LPI" },
+ { 0x0, "PkgWatt" },
+ { 0x0, "CorWatt" },
+ { 0x0, "GFXWatt" },
+ { 0x0, "PkgCnt" },
+ { 0x0, "RAMWatt" },
+ { 0x0, "PKG_%" },
+ { 0x0, "RAM_%" },
+ { 0x0, "Pkg_J" },
+ { 0x0, "Cor_J" },
+ { 0x0, "GFX_J" },
+ { 0x0, "RAM_J" },
+ { 0x0, "Mod%c6" },
+ { 0x0, "Totl%C0" },
+ { 0x0, "Any%C0" },
+ { 0x0, "GFX%C0" },
+ { 0x0, "CPUGFX%" },
+ { 0x0, "Core" },
+ { 0x0, "CPU" },
+ { 0x0, "APIC" },
+ { 0x0, "X2APIC" },
+};
+
+#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
+#define BIC_USEC (1ULL << 0)
+#define BIC_TOD (1ULL << 1)
+#define BIC_Package (1ULL << 2)
+#define BIC_Node (1ULL << 3)
+#define BIC_Avg_MHz (1ULL << 4)
+#define BIC_Busy (1ULL << 5)
+#define BIC_Bzy_MHz (1ULL << 6)
+#define BIC_TSC_MHz (1ULL << 7)
+#define BIC_IRQ (1ULL << 8)
+#define BIC_SMI (1ULL << 9)
+#define BIC_sysfs (1ULL << 10)
+#define BIC_CPU_c1 (1ULL << 11)
+#define BIC_CPU_c3 (1ULL << 12)
+#define BIC_CPU_c6 (1ULL << 13)
+#define BIC_CPU_c7 (1ULL << 14)
+#define BIC_ThreadC (1ULL << 15)
+#define BIC_CoreTmp (1ULL << 16)
+#define BIC_CoreCnt (1ULL << 17)
+#define BIC_PkgTmp (1ULL << 18)
+#define BIC_GFX_rc6 (1ULL << 19)
+#define BIC_GFXMHz (1ULL << 20)
+#define BIC_Pkgpc2 (1ULL << 21)
+#define BIC_Pkgpc3 (1ULL << 22)
+#define BIC_Pkgpc6 (1ULL << 23)
+#define BIC_Pkgpc7 (1ULL << 24)
+#define BIC_Pkgpc8 (1ULL << 25)
+#define BIC_Pkgpc9 (1ULL << 26)
+#define BIC_Pkgpc10 (1ULL << 27)
+#define BIC_CPU_LPI (1ULL << 28)
+#define BIC_SYS_LPI (1ULL << 29)
+#define BIC_PkgWatt (1ULL << 30)
+#define BIC_CorWatt (1ULL << 31)
+#define BIC_GFXWatt (1ULL << 32)
+#define BIC_PkgCnt (1ULL << 33)
+#define BIC_RAMWatt (1ULL << 34)
+#define BIC_PKG__ (1ULL << 35)
+#define BIC_RAM__ (1ULL << 36)
+#define BIC_Pkg_J (1ULL << 37)
+#define BIC_Cor_J (1ULL << 38)
+#define BIC_GFX_J (1ULL << 39)
+#define BIC_RAM_J (1ULL << 40)
+#define BIC_Mod_c6 (1ULL << 41)
+#define BIC_Totl_c0 (1ULL << 42)
+#define BIC_Any_c0 (1ULL << 43)
+#define BIC_GFX_c0 (1ULL << 44)
+#define BIC_CPUGFX (1ULL << 45)
+#define BIC_Core (1ULL << 46)
+#define BIC_CPU (1ULL << 47)
+#define BIC_APIC (1ULL << 48)
+#define BIC_X2APIC (1ULL << 49)
+
+#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
+
+unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
+unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
+
+#define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
+#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
+#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
+#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
+
+
+#define MAX_DEFERRED 16
+char *deferred_skip_names[MAX_DEFERRED];
+int deferred_skip_index;
+
+/*
+ * HIDE_LIST - hide this list of counters, show the rest [default]
+ * SHOW_LIST - show this list of counters, hide the rest
+ */
+enum show_hide_mode { SHOW_LIST, HIDE_LIST } global_show_hide_mode = HIDE_LIST;
+
+void help(void)
+{
+ fprintf(outf,
+ "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
+ "\n"
+ "Turbostat forks the specified COMMAND and prints statistics\n"
+ "when COMMAND completes.\n"
+ "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
+ "to print statistics, until interrupted.\n"
+ " -a, --add add a counter\n"
+ " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
+ " -c, --cpu cpu-set limit output to summary plus cpu-set:\n"
+ " {core | package | j,k,l..m,n-p }\n"
+ " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n"
+ " -D, --Dump displays the raw counter values\n"
+ " -e, --enable [all | column]\n"
+ " shows all or the specified disabled column\n"
+ " -H, --hide [column|column,column,...]\n"
+ " hide the specified column(s)\n"
+ " -i, --interval sec.subsec\n"
+ " Override default 5-second measurement interval\n"
+ " -J, --Joules displays energy in Joules instead of Watts\n"
+ " -l, --list list column headers only\n"
+ " -n, --num_iterations num\n"
+ " number of the measurement iterations\n"
+ " -o, --out file\n"
+ " create or truncate \"file\" for all output\n"
+ " -q, --quiet skip decoding system configuration header\n"
+ " -s, --show [column|column,column,...]\n"
+ " show only the specified column(s)\n"
+ " -S, --Summary\n"
+ " limits output to 1-line system summary per interval\n"
+ " -T, --TCC temperature\n"
+ " sets the Thermal Control Circuit temperature in\n"
+ " degrees Celsius\n"
+ " -h, --help print this help message\n"
+ " -v, --version print version information\n"
+ "\n"
+ "For more help, run \"man turbostat\"\n");
+}
+
+/*
+ * bic_lookup
+ * for all the strings in comma separate name_list,
+ * set the approprate bit in return value.
+ */
+unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode)
+{
+ int i;
+ unsigned long long retval = 0;
+
+ while (name_list) {
+ char *comma;
+
+ comma = strchr(name_list, ',');
+
+ if (comma)
+ *comma = '\0';
+
+ if (!strcmp(name_list, "all"))
+ return ~0;
+
+ for (i = 0; i < MAX_BIC; ++i) {
+ if (!strcmp(name_list, bic[i].name)) {
+ retval |= (1ULL << i);
+ break;
+ }
+ }
+ if (i == MAX_BIC) {
+ if (mode == SHOW_LIST) {
+ fprintf(stderr, "Invalid counter name: %s\n", name_list);
+ exit(-1);
+ }
+ deferred_skip_names[deferred_skip_index++] = name_list;
+ if (debug)
+ fprintf(stderr, "deferred \"%s\"\n", name_list);
+ if (deferred_skip_index >= MAX_DEFERRED) {
+ fprintf(stderr, "More than max %d un-recognized --skip options '%s'\n",
+ MAX_DEFERRED, name_list);
+ help();
+ exit(1);
+ }
+ }
+
+ name_list = comma;
+ if (name_list)
+ name_list++;
+
+ }
+ return retval;
+}
+
+
+void print_header(char *delim)
+{
+ struct msr_counter *mp;
+ int printed = 0;
+
+ if (DO_BIC(BIC_USEC))
+ outp += sprintf(outp, "%susec", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_TOD))
+ outp += sprintf(outp, "%sTime_Of_Day_Seconds", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Package))
+ outp += sprintf(outp, "%sPackage", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Node))
+ outp += sprintf(outp, "%sNode", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Core))
+ outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPU))
+ outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_APIC))
+ outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_X2APIC))
+ outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Avg_MHz))
+ outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Busy))
+ outp += sprintf(outp, "%sBusy%%", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Bzy_MHz))
+ outp += sprintf(outp, "%sBzy_MHz", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_TSC_MHz))
+ outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_IRQ)) {
+ if (sums_need_wide_columns)
+ outp += sprintf(outp, "%s IRQ", (printed++ ? delim : ""));
+ else
+ outp += sprintf(outp, "%sIRQ", (printed++ ? delim : ""));
+ }
+
+ if (DO_BIC(BIC_SMI))
+ outp += sprintf(outp, "%sSMI", (printed++ ? delim : ""));
+
+ for (mp = sys.tp; mp; mp = mp->next) {
+
+ if (mp->format == FORMAT_RAW) {
+ if (mp->width == 64)
+ outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), mp->name);
+ else
+ outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), mp->name);
+ } else {
+ if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), mp->name);
+ else
+ outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), mp->name);
+ }
+ }
+
+ if (DO_BIC(BIC_CPU_c1))
+ outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
+ outp += sprintf(outp, "%sCPU%%c3", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPU_c6))
+ outp += sprintf(outp, "%sCPU%%c6", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPU_c7))
+ outp += sprintf(outp, "%sCPU%%c7", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_Mod_c6))
+ outp += sprintf(outp, "%sMod%%c6", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_CoreTmp))
+ outp += sprintf(outp, "%sCoreTmp", (printed++ ? delim : ""));
+
+ for (mp = sys.cp; mp; mp = mp->next) {
+ if (mp->format == FORMAT_RAW) {
+ if (mp->width == 64)
+ outp += sprintf(outp, "%s%18.18s", delim, mp->name);
+ else
+ outp += sprintf(outp, "%s%10.10s", delim, mp->name);
+ } else {
+ if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8s", delim, mp->name);
+ else
+ outp += sprintf(outp, "%s%s", delim, mp->name);
+ }
+ }
+
+ if (DO_BIC(BIC_PkgTmp))
+ outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_GFX_rc6))
+ outp += sprintf(outp, "%sGFX%%rc6", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_GFXMHz))
+ outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_Totl_c0))
+ outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Any_c0))
+ outp += sprintf(outp, "%sAny%%C0", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_GFX_c0))
+ outp += sprintf(outp, "%sGFX%%C0", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPUGFX))
+ outp += sprintf(outp, "%sCPUGFX%%", (printed++ ? delim : ""));
+
+ if (DO_BIC(BIC_Pkgpc2))
+ outp += sprintf(outp, "%sPkg%%pc2", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Pkgpc3))
+ outp += sprintf(outp, "%sPkg%%pc3", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Pkgpc6))
+ outp += sprintf(outp, "%sPkg%%pc6", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Pkgpc7))
+ outp += sprintf(outp, "%sPkg%%pc7", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Pkgpc8))
+ outp += sprintf(outp, "%sPkg%%pc8", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Pkgpc9))
+ outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Pkgpc10))
+ outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPU_LPI))
+ outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_SYS_LPI))
+ outp += sprintf(outp, "%sSYS%%LPI", (printed++ ? delim : ""));
+
+ if (do_rapl && !rapl_joules) {
+ if (DO_BIC(BIC_PkgWatt))
+ outp += sprintf(outp, "%sPkgWatt", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CorWatt))
+ outp += sprintf(outp, "%sCorWatt", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_GFXWatt))
+ outp += sprintf(outp, "%sGFXWatt", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_RAMWatt))
+ outp += sprintf(outp, "%sRAMWatt", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_PKG__))
+ outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_RAM__))
+ outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
+ } else if (do_rapl && rapl_joules) {
+ if (DO_BIC(BIC_Pkg_J))
+ outp += sprintf(outp, "%sPkg_J", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Cor_J))
+ outp += sprintf(outp, "%sCor_J", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_GFX_J))
+ outp += sprintf(outp, "%sGFX_J", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_RAM_J))
+ outp += sprintf(outp, "%sRAM_J", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_PKG__))
+ outp += sprintf(outp, "%sPKG_%%", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_RAM__))
+ outp += sprintf(outp, "%sRAM_%%", (printed++ ? delim : ""));
+ }
+ for (mp = sys.pp; mp; mp = mp->next) {
+ if (mp->format == FORMAT_RAW) {
+ if (mp->width == 64)
+ outp += sprintf(outp, "%s%18.18s", delim, mp->name);
+ else
+ outp += sprintf(outp, "%s%10.10s", delim, mp->name);
+ } else {
+ if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8s", delim, mp->name);
+ else
+ outp += sprintf(outp, "%s%s", delim, mp->name);
+ }
+ }
+
+ outp += sprintf(outp, "\n");
+}
+
+int dump_counters(struct thread_data *t, struct core_data *c,
+ struct pkg_data *p)
+{
+ int i;
+ struct msr_counter *mp;
+
+ outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
+
+ if (t) {
+ outp += sprintf(outp, "CPU: %d flags 0x%x\n",
+ t->cpu_id, t->flags);
+ outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
+ outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
+ outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
+ outp += sprintf(outp, "c1: %016llX\n", t->c1);
+
+ if (DO_BIC(BIC_IRQ))
+ outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
+ if (DO_BIC(BIC_SMI))
+ outp += sprintf(outp, "SMI: %d\n", t->smi_count);
+
+ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+ outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
+ i, mp->msr_num, t->counter[i]);
+ }
+ }
+
+ if (c) {
+ outp += sprintf(outp, "core: %d\n", c->core_id);
+ outp += sprintf(outp, "c3: %016llX\n", c->c3);
+ outp += sprintf(outp, "c6: %016llX\n", c->c6);
+ outp += sprintf(outp, "c7: %016llX\n", c->c7);
+ outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c);
+
+ for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+ outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
+ i, mp->msr_num, c->counter[i]);
+ }
+ outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
+ }
+
+ if (p) {
+ outp += sprintf(outp, "package: %d\n", p->package_id);
+
+ outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0);
+ outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0);
+ outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0);
+ outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0);
+
+ outp += sprintf(outp, "pc2: %016llX\n", p->pc2);
+ if (DO_BIC(BIC_Pkgpc3))
+ outp += sprintf(outp, "pc3: %016llX\n", p->pc3);
+ if (DO_BIC(BIC_Pkgpc6))
+ outp += sprintf(outp, "pc6: %016llX\n", p->pc6);
+ if (DO_BIC(BIC_Pkgpc7))
+ outp += sprintf(outp, "pc7: %016llX\n", p->pc7);
+ outp += sprintf(outp, "pc8: %016llX\n", p->pc8);
+ outp += sprintf(outp, "pc9: %016llX\n", p->pc9);
+ outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
+ outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
+ outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
+ outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
+ outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
+ outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
+ outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
+ outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
+ outp += sprintf(outp, "Throttle PKG: %0X\n",
+ p->rapl_pkg_perf_status);
+ outp += sprintf(outp, "Throttle RAM: %0X\n",
+ p->rapl_dram_perf_status);
+ outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
+
+ for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+ outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
+ i, mp->msr_num, p->counter[i]);
+ }
+ }
+
+ outp += sprintf(outp, "\n");
+
+ return 0;
+}
+
+/*
+ * column formatting convention & formats
+ */
+int format_counters(struct thread_data *t, struct core_data *c,
+ struct pkg_data *p)
+{
+ double interval_float, tsc;
+ char *fmt8;
+ int i;
+ struct msr_counter *mp;
+ char *delim = "\t";
+ int printed = 0;
+
+ /* if showing only 1st thread in core and this isn't one, bail out */
+ if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ return 0;
+
+ /* if showing only 1st thread in pkg and this isn't one, bail out */
+ if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ return 0;
+
+ /*if not summary line and --cpu is used */
+ if ((t != &average.threads) &&
+ (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
+ return 0;
+
+ if (DO_BIC(BIC_USEC)) {
+ /* on each row, print how many usec each timestamp took to gather */
+ struct timeval tv;
+
+ timersub(&t->tv_end, &t->tv_begin, &tv);
+ outp += sprintf(outp, "%5ld\t", tv.tv_sec * 1000000 + tv.tv_usec);
+ }
+
+ /* Time_Of_Day_Seconds: on each row, print sec.usec last timestamp taken */
+ if (DO_BIC(BIC_TOD))
+ outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
+
+ interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0;
+
+ tsc = t->tsc * tsc_tweak;
+
+ /* topo columns, print blanks on 1st (average) line */
+ if (t == &average.threads) {
+ if (DO_BIC(BIC_Package))
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Node))
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_Core))
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_CPU))
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_APIC))
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_X2APIC))
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+ } else {
+ if (DO_BIC(BIC_Package)) {
+ if (p)
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->package_id);
+ else
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+ }
+ if (DO_BIC(BIC_Node)) {
+ if (t)
+ outp += sprintf(outp, "%s%d",
+ (printed++ ? delim : ""),
+ cpus[t->cpu_id].physical_node_id);
+ else
+ outp += sprintf(outp, "%s-",
+ (printed++ ? delim : ""));
+ }
+ if (DO_BIC(BIC_Core)) {
+ if (c)
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_id);
+ else
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+ }
+ if (DO_BIC(BIC_CPU))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
+ if (DO_BIC(BIC_APIC))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
+ if (DO_BIC(BIC_X2APIC))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
+ }
+
+ if (DO_BIC(BIC_Avg_MHz))
+ outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
+ 1.0 / units * t->aperf / interval_float);
+
+ if (DO_BIC(BIC_Busy))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
+
+ if (DO_BIC(BIC_Bzy_MHz)) {
+ if (has_base_hz)
+ outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
+ else
+ outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
+ tsc / units * t->aperf / t->mperf / interval_float);
+ }
+
+ if (DO_BIC(BIC_TSC_MHz))
+ outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
+
+ /* IRQ */
+ if (DO_BIC(BIC_IRQ)) {
+ if (sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->irq_count);
+ else
+ outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->irq_count);
+ }
+
+ /* SMI */
+ if (DO_BIC(BIC_SMI))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->smi_count);
+
+ /* Added counters */
+ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+ if (mp->format == FORMAT_RAW) {
+ if (mp->width == 32)
+ outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
+ else
+ outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
+ } else if (mp->format == FORMAT_DELTA) {
+ if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->counter[i]);
+ else
+ outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
+ } else if (mp->format == FORMAT_PERCENT) {
+ if (mp->type == COUNTER_USEC)
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
+ else
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
+ }
+ }
+
+ /* C1 */
+ if (DO_BIC(BIC_CPU_c1))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
+
+
+ /* print per-core data only for 1st thread in core */
+ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ goto done;
+
+ if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates)
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
+ if (DO_BIC(BIC_CPU_c6))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
+ if (DO_BIC(BIC_CPU_c7))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
+
+ /* Mod%c6 */
+ if (DO_BIC(BIC_Mod_c6))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->mc6_us / tsc);
+
+ if (DO_BIC(BIC_CoreTmp))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), c->core_temp_c);
+
+ for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+ if (mp->format == FORMAT_RAW) {
+ if (mp->width == 32)
+ outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
+ else
+ outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
+ } else if (mp->format == FORMAT_DELTA) {
+ if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->counter[i]);
+ else
+ outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
+ } else if (mp->format == FORMAT_PERCENT) {
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
+ }
+ }
+
+ /* print per-package data only for 1st core in package */
+ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ goto done;
+
+ /* PkgTmp */
+ if (DO_BIC(BIC_PkgTmp))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->pkg_temp_c);
+
+ /* GFXrc6 */
+ if (DO_BIC(BIC_GFX_rc6)) {
+ if (p->gfx_rc6_ms == -1) { /* detect GFX counter reset */
+ outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
+ } else {
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ p->gfx_rc6_ms / 10.0 / interval_float);
+ }
+ }
+
+ /* GFXMHz */
+ if (DO_BIC(BIC_GFXMHz))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
+
+ /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
+ if (DO_BIC(BIC_Totl_c0))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
+ if (DO_BIC(BIC_Any_c0))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
+ if (DO_BIC(BIC_GFX_c0))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
+ if (DO_BIC(BIC_CPUGFX))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
+
+ if (DO_BIC(BIC_Pkgpc2))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
+ if (DO_BIC(BIC_Pkgpc3))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
+ if (DO_BIC(BIC_Pkgpc6))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
+ if (DO_BIC(BIC_Pkgpc7))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
+ if (DO_BIC(BIC_Pkgpc8))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
+ if (DO_BIC(BIC_Pkgpc9))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
+ if (DO_BIC(BIC_Pkgpc10))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
+
+ if (DO_BIC(BIC_CPU_LPI))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
+ if (DO_BIC(BIC_SYS_LPI))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+
+ /*
+ * If measurement interval exceeds minimum RAPL Joule Counter range,
+ * indicate that results are suspect by printing "**" in fraction place.
+ */
+ if (interval_float < rapl_joule_counter_range)
+ fmt8 = "%s%.2f";
+ else
+ fmt8 = "%6.0f**";
+
+ if (DO_BIC(BIC_PkgWatt))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
+ if (DO_BIC(BIC_CorWatt))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
+ if (DO_BIC(BIC_GFXWatt))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
+ if (DO_BIC(BIC_RAMWatt))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
+ if (DO_BIC(BIC_Pkg_J))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
+ if (DO_BIC(BIC_Cor_J))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units);
+ if (DO_BIC(BIC_GFX_J))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units);
+ if (DO_BIC(BIC_RAM_J))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
+ if (DO_BIC(BIC_PKG__))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+ if (DO_BIC(BIC_RAM__))
+ outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+
+ for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+ if (mp->format == FORMAT_RAW) {
+ if (mp->width == 32)
+ outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
+ else
+ outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
+ } else if (mp->format == FORMAT_DELTA) {
+ if ((mp->type == COUNTER_ITEMS) && sums_need_wide_columns)
+ outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->counter[i]);
+ else
+ outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
+ } else if (mp->format == FORMAT_PERCENT) {
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
+ }
+ }
+
+done:
+ if (*(outp - 1) != '\n')
+ outp += sprintf(outp, "\n");
+
+ return 0;
+}
+
+void flush_output_stdout(void)
+{
+ FILE *filep;
+
+ if (outf == stderr)
+ filep = stdout;
+ else
+ filep = outf;
+
+ fputs(output_buffer, filep);
+ fflush(filep);
+
+ outp = output_buffer;
+}
+void flush_output_stderr(void)
+{
+ fputs(output_buffer, outf);
+ fflush(outf);
+ outp = output_buffer;
+}
+void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ static int printed;
+
+ if (!printed || !summary_only)
+ print_header("\t");
+
+ format_counters(&average.threads, &average.cores, &average.packages);
+
+ printed = 1;
+
+ if (summary_only)
+ return;
+
+ for_all_cpus(format_counters, t, c, p);
+}
+
+#define DELTA_WRAP32(new, old) \
+ if (new > old) { \
+ old = new - old; \
+ } else { \
+ old = 0x100000000 + new - old; \
+ }
+
+int
+delta_package(struct pkg_data *new, struct pkg_data *old)
+{
+ int i;
+ struct msr_counter *mp;
+
+
+ if (DO_BIC(BIC_Totl_c0))
+ old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
+ if (DO_BIC(BIC_Any_c0))
+ old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0;
+ if (DO_BIC(BIC_GFX_c0))
+ old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0;
+ if (DO_BIC(BIC_CPUGFX))
+ old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0;
+
+ old->pc2 = new->pc2 - old->pc2;
+ if (DO_BIC(BIC_Pkgpc3))
+ old->pc3 = new->pc3 - old->pc3;
+ if (DO_BIC(BIC_Pkgpc6))
+ old->pc6 = new->pc6 - old->pc6;
+ if (DO_BIC(BIC_Pkgpc7))
+ old->pc7 = new->pc7 - old->pc7;
+ old->pc8 = new->pc8 - old->pc8;
+ old->pc9 = new->pc9 - old->pc9;
+ old->pc10 = new->pc10 - old->pc10;
+ old->cpu_lpi = new->cpu_lpi - old->cpu_lpi;
+ old->sys_lpi = new->sys_lpi - old->sys_lpi;
+ old->pkg_temp_c = new->pkg_temp_c;
+
+ /* flag an error when rc6 counter resets/wraps */
+ if (old->gfx_rc6_ms > new->gfx_rc6_ms)
+ old->gfx_rc6_ms = -1;
+ else
+ old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
+
+ old->gfx_mhz = new->gfx_mhz;
+
+ DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
+ DELTA_WRAP32(new->energy_cores, old->energy_cores);
+ DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
+ DELTA_WRAP32(new->energy_dram, old->energy_dram);
+ DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
+ DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
+
+ for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+ if (mp->format == FORMAT_RAW)
+ old->counter[i] = new->counter[i];
+ else
+ old->counter[i] = new->counter[i] - old->counter[i];
+ }
+
+ return 0;
+}
+
+void
+delta_core(struct core_data *new, struct core_data *old)
+{
+ int i;
+ struct msr_counter *mp;
+
+ old->c3 = new->c3 - old->c3;
+ old->c6 = new->c6 - old->c6;
+ old->c7 = new->c7 - old->c7;
+ old->core_temp_c = new->core_temp_c;
+ old->mc6_us = new->mc6_us - old->mc6_us;
+
+ for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+ if (mp->format == FORMAT_RAW)
+ old->counter[i] = new->counter[i];
+ else
+ old->counter[i] = new->counter[i] - old->counter[i];
+ }
+}
+
+/*
+ * old = new - old
+ */
+int
+delta_thread(struct thread_data *new, struct thread_data *old,
+ struct core_data *core_delta)
+{
+ int i;
+ struct msr_counter *mp;
+
+ /* we run cpuid just the 1st time, copy the results */
+ if (DO_BIC(BIC_APIC))
+ new->apic_id = old->apic_id;
+ if (DO_BIC(BIC_X2APIC))
+ new->x2apic_id = old->x2apic_id;
+
+ /*
+ * the timestamps from start of measurement interval are in "old"
+ * the timestamp from end of measurement interval are in "new"
+ * over-write old w/ new so we can print end of interval values
+ */
+
+ old->tv_begin = new->tv_begin;
+ old->tv_end = new->tv_end;
+
+ old->tsc = new->tsc - old->tsc;
+
+ /* check for TSC < 1 Mcycles over interval */
+ if (old->tsc < (1000 * 1000))
+ errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n"
+ "You can disable all c-states by booting with \"idle=poll\"\n"
+ "or just the deep ones with \"processor.max_cstate=1\"");
+
+ old->c1 = new->c1 - old->c1;
+
+ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
+ if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
+ old->aperf = new->aperf - old->aperf;
+ old->mperf = new->mperf - old->mperf;
+ } else {
+ return -1;
+ }
+ }
+
+
+ if (use_c1_residency_msr) {
+ /*
+ * Some models have a dedicated C1 residency MSR,
+ * which should be more accurate than the derivation below.
+ */
+ } else {
+ /*
+ * As counter collection is not atomic,
+ * it is possible for mperf's non-halted cycles + idle states
+ * to exceed TSC's all cycles: show c1 = 0% in that case.
+ */
+ if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > (old->tsc * tsc_tweak))
+ old->c1 = 0;
+ else {
+ /* normal case, derive c1 */
+ old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
+ - core_delta->c6 - core_delta->c7;
+ }
+ }
+
+ if (old->mperf == 0) {
+ if (debug > 1)
+ fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
+ old->mperf = 1; /* divide by 0 protection */
+ }
+
+ if (DO_BIC(BIC_IRQ))
+ old->irq_count = new->irq_count - old->irq_count;
+
+ if (DO_BIC(BIC_SMI))
+ old->smi_count = new->smi_count - old->smi_count;
+
+ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+ if (mp->format == FORMAT_RAW)
+ old->counter[i] = new->counter[i];
+ else
+ old->counter[i] = new->counter[i] - old->counter[i];
+ }
+ return 0;
+}
+
+int delta_cpu(struct thread_data *t, struct core_data *c,
+ struct pkg_data *p, struct thread_data *t2,
+ struct core_data *c2, struct pkg_data *p2)
+{
+ int retval = 0;
+
+ /* calculate core delta only for 1st thread in core */
+ if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE)
+ delta_core(c, c2);
+
+ /* always calculate thread delta */
+ retval = delta_thread(t, t2, c2); /* c2 is core delta */
+ if (retval)
+ return retval;
+
+ /* calculate package delta only for 1st core in package */
+ if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)
+ retval = delta_package(p, p2);
+
+ return retval;
+}
+
+void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ int i;
+ struct msr_counter *mp;
+
+ t->tv_begin.tv_sec = 0;
+ t->tv_begin.tv_usec = 0;
+ t->tv_end.tv_sec = 0;
+ t->tv_end.tv_usec = 0;
+
+ t->tsc = 0;
+ t->aperf = 0;
+ t->mperf = 0;
+ t->c1 = 0;
+
+ t->irq_count = 0;
+ t->smi_count = 0;
+
+ /* tells format_counters to dump all fields from this set */
+ t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
+
+ c->c3 = 0;
+ c->c6 = 0;
+ c->c7 = 0;
+ c->mc6_us = 0;
+ c->core_temp_c = 0;
+
+ p->pkg_wtd_core_c0 = 0;
+ p->pkg_any_core_c0 = 0;
+ p->pkg_any_gfxe_c0 = 0;
+ p->pkg_both_core_gfxe_c0 = 0;
+
+ p->pc2 = 0;
+ if (DO_BIC(BIC_Pkgpc3))
+ p->pc3 = 0;
+ if (DO_BIC(BIC_Pkgpc6))
+ p->pc6 = 0;
+ if (DO_BIC(BIC_Pkgpc7))
+ p->pc7 = 0;
+ p->pc8 = 0;
+ p->pc9 = 0;
+ p->pc10 = 0;
+ p->cpu_lpi = 0;
+ p->sys_lpi = 0;
+
+ p->energy_pkg = 0;
+ p->energy_dram = 0;
+ p->energy_cores = 0;
+ p->energy_gfx = 0;
+ p->rapl_pkg_perf_status = 0;
+ p->rapl_dram_perf_status = 0;
+ p->pkg_temp_c = 0;
+
+ p->gfx_rc6_ms = 0;
+ p->gfx_mhz = 0;
+ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
+ t->counter[i] = 0;
+
+ for (i = 0, mp = sys.cp; mp; i++, mp = mp->next)
+ c->counter[i] = 0;
+
+ for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
+ p->counter[i] = 0;
+}
+int sum_counters(struct thread_data *t, struct core_data *c,
+ struct pkg_data *p)
+{
+ int i;
+ struct msr_counter *mp;
+
+ /* copy un-changing apic_id's */
+ if (DO_BIC(BIC_APIC))
+ average.threads.apic_id = t->apic_id;
+ if (DO_BIC(BIC_X2APIC))
+ average.threads.x2apic_id = t->x2apic_id;
+
+ /* remember first tv_begin */
+ if (average.threads.tv_begin.tv_sec == 0)
+ average.threads.tv_begin = t->tv_begin;
+
+ /* remember last tv_end */
+ average.threads.tv_end = t->tv_end;
+
+ average.threads.tsc += t->tsc;
+ average.threads.aperf += t->aperf;
+ average.threads.mperf += t->mperf;
+ average.threads.c1 += t->c1;
+
+ average.threads.irq_count += t->irq_count;
+ average.threads.smi_count += t->smi_count;
+
+ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+ if (mp->format == FORMAT_RAW)
+ continue;
+ average.threads.counter[i] += t->counter[i];
+ }
+
+ /* sum per-core values only for 1st thread in core */
+ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ return 0;
+
+ average.cores.c3 += c->c3;
+ average.cores.c6 += c->c6;
+ average.cores.c7 += c->c7;
+ average.cores.mc6_us += c->mc6_us;
+
+ average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c);
+
+ for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+ if (mp->format == FORMAT_RAW)
+ continue;
+ average.cores.counter[i] += c->counter[i];
+ }
+
+ /* sum per-pkg values only for 1st core in pkg */
+ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ return 0;
+
+ if (DO_BIC(BIC_Totl_c0))
+ average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0;
+ if (DO_BIC(BIC_Any_c0))
+ average.packages.pkg_any_core_c0 += p->pkg_any_core_c0;
+ if (DO_BIC(BIC_GFX_c0))
+ average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0;
+ if (DO_BIC(BIC_CPUGFX))
+ average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0;
+
+ average.packages.pc2 += p->pc2;
+ if (DO_BIC(BIC_Pkgpc3))
+ average.packages.pc3 += p->pc3;
+ if (DO_BIC(BIC_Pkgpc6))
+ average.packages.pc6 += p->pc6;
+ if (DO_BIC(BIC_Pkgpc7))
+ average.packages.pc7 += p->pc7;
+ average.packages.pc8 += p->pc8;
+ average.packages.pc9 += p->pc9;
+ average.packages.pc10 += p->pc10;
+
+ average.packages.cpu_lpi = p->cpu_lpi;
+ average.packages.sys_lpi = p->sys_lpi;
+
+ average.packages.energy_pkg += p->energy_pkg;
+ average.packages.energy_dram += p->energy_dram;
+ average.packages.energy_cores += p->energy_cores;
+ average.packages.energy_gfx += p->energy_gfx;
+
+ average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
+ average.packages.gfx_mhz = p->gfx_mhz;
+
+ average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
+
+ average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
+ average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status;
+
+ for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+ if (mp->format == FORMAT_RAW)
+ continue;
+ average.packages.counter[i] += p->counter[i];
+ }
+ return 0;
+}
+/*
+ * sum the counters for all cpus in the system
+ * compute the weighted average
+ */
+void compute_average(struct thread_data *t, struct core_data *c,
+ struct pkg_data *p)
+{
+ int i;
+ struct msr_counter *mp;
+
+ clear_counters(&average.threads, &average.cores, &average.packages);
+
+ for_all_cpus(sum_counters, t, c, p);
+
+ average.threads.tsc /= topo.num_cpus;
+ average.threads.aperf /= topo.num_cpus;
+ average.threads.mperf /= topo.num_cpus;
+ average.threads.c1 /= topo.num_cpus;
+
+ if (average.threads.irq_count > 9999999)
+ sums_need_wide_columns = 1;
+
+ average.cores.c3 /= topo.num_cores;
+ average.cores.c6 /= topo.num_cores;
+ average.cores.c7 /= topo.num_cores;
+ average.cores.mc6_us /= topo.num_cores;
+
+ if (DO_BIC(BIC_Totl_c0))
+ average.packages.pkg_wtd_core_c0 /= topo.num_packages;
+ if (DO_BIC(BIC_Any_c0))
+ average.packages.pkg_any_core_c0 /= topo.num_packages;
+ if (DO_BIC(BIC_GFX_c0))
+ average.packages.pkg_any_gfxe_c0 /= topo.num_packages;
+ if (DO_BIC(BIC_CPUGFX))
+ average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages;
+
+ average.packages.pc2 /= topo.num_packages;
+ if (DO_BIC(BIC_Pkgpc3))
+ average.packages.pc3 /= topo.num_packages;
+ if (DO_BIC(BIC_Pkgpc6))
+ average.packages.pc6 /= topo.num_packages;
+ if (DO_BIC(BIC_Pkgpc7))
+ average.packages.pc7 /= topo.num_packages;
+
+ average.packages.pc8 /= topo.num_packages;
+ average.packages.pc9 /= topo.num_packages;
+ average.packages.pc10 /= topo.num_packages;
+
+ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+ if (mp->format == FORMAT_RAW)
+ continue;
+ if (mp->type == COUNTER_ITEMS) {
+ if (average.threads.counter[i] > 9999999)
+ sums_need_wide_columns = 1;
+ continue;
+ }
+ average.threads.counter[i] /= topo.num_cpus;
+ }
+ for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+ if (mp->format == FORMAT_RAW)
+ continue;
+ if (mp->type == COUNTER_ITEMS) {
+ if (average.cores.counter[i] > 9999999)
+ sums_need_wide_columns = 1;
+ }
+ average.cores.counter[i] /= topo.num_cores;
+ }
+ for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+ if (mp->format == FORMAT_RAW)
+ continue;
+ if (mp->type == COUNTER_ITEMS) {
+ if (average.packages.counter[i] > 9999999)
+ sums_need_wide_columns = 1;
+ }
+ average.packages.counter[i] /= topo.num_packages;
+ }
+}
+
+static unsigned long long rdtsc(void)
+{
+ unsigned int low, high;
+
+ asm volatile("rdtsc" : "=a" (low), "=d" (high));
+
+ return low | ((unsigned long long)high) << 32;
+}
+
+/*
+ * Open a file, and exit on failure
+ */
+FILE *fopen_or_die(const char *path, const char *mode)
+{
+ FILE *filep = fopen(path, mode);
+
+ if (!filep)
+ err(1, "%s: open failed", path);
+ return filep;
+}
+/*
+ * snapshot_sysfs_counter()
+ *
+ * return snapshot of given counter
+ */
+unsigned long long snapshot_sysfs_counter(char *path)
+{
+ FILE *fp;
+ int retval;
+ unsigned long long counter;
+
+ fp = fopen_or_die(path, "r");
+
+ retval = fscanf(fp, "%lld", &counter);
+ if (retval != 1)
+ err(1, "snapshot_sysfs_counter(%s)", path);
+
+ fclose(fp);
+
+ return counter;
+}
+
+int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
+{
+ if (mp->msr_num != 0) {
+ if (get_msr(cpu, mp->msr_num, counterp))
+ return -1;
+ } else {
+ char path[128 + PATH_BYTES];
+
+ if (mp->flags & SYSFS_PERCPU) {
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
+ cpu, mp->path);
+
+ *counterp = snapshot_sysfs_counter(path);
+ } else {
+ *counterp = snapshot_sysfs_counter(mp->path);
+ }
+ }
+
+ return 0;
+}
+
+void get_apic_id(struct thread_data *t)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (DO_BIC(BIC_APIC)) {
+ eax = ebx = ecx = edx = 0;
+ __cpuid(1, eax, ebx, ecx, edx);
+
+ t->apic_id = (ebx >> 24) & 0xff;
+ }
+
+ if (!DO_BIC(BIC_X2APIC))
+ return;
+
+ if (authentic_amd) {
+ unsigned int topology_extensions;
+
+ if (max_extended_level < 0x8000001e)
+ return;
+
+ eax = ebx = ecx = edx = 0;
+ __cpuid(0x80000001, eax, ebx, ecx, edx);
+ topology_extensions = ecx & (1 << 22);
+
+ if (topology_extensions == 0)
+ return;
+
+ eax = ebx = ecx = edx = 0;
+ __cpuid(0x8000001e, eax, ebx, ecx, edx);
+
+ t->x2apic_id = eax;
+ return;
+ }
+
+ if (!genuine_intel)
+ return;
+
+ if (max_level < 0xb)
+ return;
+
+ ecx = 0;
+ __cpuid(0xb, eax, ebx, ecx, edx);
+ t->x2apic_id = edx;
+
+ if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
+ fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n",
+ t->cpu_id, t->apic_id, t->x2apic_id);
+}
+
+/*
+ * get_counters(...)
+ * migrate to cpu
+ * acquire and record local counters for that cpu
+ */
+int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ int cpu = t->cpu_id;
+ unsigned long long msr;
+ int aperf_mperf_retry_count = 0;
+ struct msr_counter *mp;
+ int i;
+
+ gettimeofday(&t->tv_begin, (struct timezone *)NULL);
+
+ if (cpu_migrate(cpu)) {
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ return -1;
+ }
+
+ if (first_counter_read)
+ get_apic_id(t);
+retry:
+ t->tsc = rdtsc(); /* we are running on local CPU of interest */
+
+ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz)) {
+ unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
+
+ /*
+ * The TSC, APERF and MPERF must be read together for
+ * APERF/MPERF and MPERF/TSC to give accurate results.
+ *
+ * Unfortunately, APERF and MPERF are read by
+ * individual system call, so delays may occur
+ * between them. If the time to read them
+ * varies by a large amount, we re-read them.
+ */
+
+ /*
+ * This initial dummy APERF read has been seen to
+ * reduce jitter in the subsequent reads.
+ */
+
+ if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+ return -3;
+
+ t->tsc = rdtsc(); /* re-read close to APERF */
+
+ tsc_before = t->tsc;
+
+ if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+ return -3;
+
+ tsc_between = rdtsc();
+
+ if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
+ return -4;
+
+ tsc_after = rdtsc();
+
+ aperf_time = tsc_between - tsc_before;
+ mperf_time = tsc_after - tsc_between;
+
+ /*
+ * If the system call latency to read APERF and MPERF
+ * differ by more than 2x, then try again.
+ */
+ if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
+ aperf_mperf_retry_count++;
+ if (aperf_mperf_retry_count < 5)
+ goto retry;
+ else
+ warnx("cpu%d jitter %lld %lld",
+ cpu, aperf_time, mperf_time);
+ }
+ aperf_mperf_retry_count = 0;
+
+ t->aperf = t->aperf * aperf_mperf_multiplier;
+ t->mperf = t->mperf * aperf_mperf_multiplier;
+ }
+
+ if (DO_BIC(BIC_IRQ))
+ t->irq_count = irqs_per_cpu[cpu];
+ if (DO_BIC(BIC_SMI)) {
+ if (get_msr(cpu, MSR_SMI_COUNT, &msr))
+ return -5;
+ t->smi_count = msr & 0xFFFFFFFF;
+ }
+ if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) {
+ if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1))
+ return -6;
+ }
+
+ for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
+ if (get_mp(cpu, mp, &t->counter[i]))
+ return -10;
+ }
+
+ /* collect core counters only for 1st thread in core */
+ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ goto done;
+
+ if (DO_BIC(BIC_CPU_c3) && !do_slm_cstates && !do_knl_cstates && !do_cnl_cstates) {
+ if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3))
+ return -6;
+ }
+
+ if (DO_BIC(BIC_CPU_c6) && !do_knl_cstates) {
+ if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6))
+ return -7;
+ } else if (do_knl_cstates) {
+ if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6))
+ return -7;
+ }
+
+ if (DO_BIC(BIC_CPU_c7))
+ if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
+ return -8;
+
+ if (DO_BIC(BIC_Mod_c6))
+ if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
+ return -8;
+
+ if (DO_BIC(BIC_CoreTmp)) {
+ if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
+ return -9;
+ c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
+ }
+
+ for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
+ if (get_mp(cpu, mp, &c->counter[i]))
+ return -10;
+ }
+
+ /* collect package counters only for 1st core in package */
+ if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ goto done;
+
+ if (DO_BIC(BIC_Totl_c0)) {
+ if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0))
+ return -10;
+ }
+ if (DO_BIC(BIC_Any_c0)) {
+ if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0))
+ return -11;
+ }
+ if (DO_BIC(BIC_GFX_c0)) {
+ if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0))
+ return -12;
+ }
+ if (DO_BIC(BIC_CPUGFX)) {
+ if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0))
+ return -13;
+ }
+ if (DO_BIC(BIC_Pkgpc3))
+ if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3))
+ return -9;
+ if (DO_BIC(BIC_Pkgpc6)) {
+ if (do_slm_cstates) {
+ if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6))
+ return -10;
+ } else {
+ if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6))
+ return -10;
+ }
+ }
+
+ if (DO_BIC(BIC_Pkgpc2))
+ if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2))
+ return -11;
+ if (DO_BIC(BIC_Pkgpc7))
+ if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7))
+ return -12;
+ if (DO_BIC(BIC_Pkgpc8))
+ if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8))
+ return -13;
+ if (DO_BIC(BIC_Pkgpc9))
+ if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9))
+ return -13;
+ if (DO_BIC(BIC_Pkgpc10))
+ if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10))
+ return -13;
+
+ if (DO_BIC(BIC_CPU_LPI))
+ p->cpu_lpi = cpuidle_cur_cpu_lpi_us;
+ if (DO_BIC(BIC_SYS_LPI))
+ p->sys_lpi = cpuidle_cur_sys_lpi_us;
+
+ if (do_rapl & RAPL_PKG) {
+ if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
+ return -13;
+ p->energy_pkg = msr & 0xFFFFFFFF;
+ }
+ if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
+ if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
+ return -14;
+ p->energy_cores = msr & 0xFFFFFFFF;
+ }
+ if (do_rapl & RAPL_DRAM) {
+ if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
+ return -15;
+ p->energy_dram = msr & 0xFFFFFFFF;
+ }
+ if (do_rapl & RAPL_GFX) {
+ if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
+ return -16;
+ p->energy_gfx = msr & 0xFFFFFFFF;
+ }
+ if (do_rapl & RAPL_PKG_PERF_STATUS) {
+ if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
+ return -16;
+ p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
+ }
+ if (do_rapl & RAPL_DRAM_PERF_STATUS) {
+ if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
+ return -16;
+ p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
+ }
+ if (DO_BIC(BIC_PkgTmp)) {
+ if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
+ return -17;
+ p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
+ }
+
+ if (DO_BIC(BIC_GFX_rc6))
+ p->gfx_rc6_ms = gfx_cur_rc6_ms;
+
+ if (DO_BIC(BIC_GFXMHz))
+ p->gfx_mhz = gfx_cur_mhz;
+
+ for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
+ if (get_mp(cpu, mp, &p->counter[i]))
+ return -10;
+ }
+done:
+ gettimeofday(&t->tv_end, (struct timezone *)NULL);
+
+ return 0;
+}
+
+/*
+ * MSR_PKG_CST_CONFIG_CONTROL decoding for pkg_cstate_limit:
+ * If you change the values, note they are used both in comparisons
+ * (>= PCL__7) and to index pkg_cstate_limit_strings[].
+ */
+
+#define PCLUKN 0 /* Unknown */
+#define PCLRSV 1 /* Reserved */
+#define PCL__0 2 /* PC0 */
+#define PCL__1 3 /* PC1 */
+#define PCL__2 4 /* PC2 */
+#define PCL__3 5 /* PC3 */
+#define PCL__4 6 /* PC4 */
+#define PCL__6 7 /* PC6 */
+#define PCL_6N 8 /* PC6 No Retention */
+#define PCL_6R 9 /* PC6 Retention */
+#define PCL__7 10 /* PC7 */
+#define PCL_7S 11 /* PC7 Shrink */
+#define PCL__8 12 /* PC8 */
+#define PCL__9 13 /* PC9 */
+#define PCL_10 14 /* PC10 */
+#define PCLUNL 15 /* Unlimited */
+
+int pkg_cstate_limit = PCLUKN;
+char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
+ "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"};
+
+int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
+int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+
+
+static void
+calculate_tsc_tweak()
+{
+ tsc_tweak = base_hz / tsc_hz;
+}
+
+static void
+dump_nhm_platform_info(void)
+{
+ unsigned long long msr;
+ unsigned int ratio;
+
+ get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
+
+ fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
+
+ ratio = (msr >> 40) & 0xFF;
+ fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 8) & 0xFF;
+ fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
+ ratio, bclk, ratio * bclk);
+
+ get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
+ fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
+ base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
+
+ return;
+}
+
+static void
+dump_hsw_turbo_ratio_limits(void)
+{
+ unsigned long long msr;
+ unsigned int ratio;
+
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
+
+ fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
+
+ ratio = (msr >> 8) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 0) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
+ ratio, bclk, ratio * bclk);
+ return;
+}
+
+static void
+dump_ivt_turbo_ratio_limits(void)
+{
+ unsigned long long msr;
+ unsigned int ratio;
+
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
+
+ fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
+
+ ratio = (msr >> 56) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 48) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 40) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 32) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 24) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 16) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 8) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 0) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
+ ratio, bclk, ratio * bclk);
+ return;
+}
+int has_turbo_ratio_group_limits(int family, int model)
+{
+
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_ATOM_GOLDMONT:
+ case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_ATOM_GOLDMONT_X:
+ return 1;
+ }
+ return 0;
+}
+
+static void
+dump_turbo_ratio_limits(int family, int model)
+{
+ unsigned long long msr, core_counts;
+ unsigned int ratio, group_size;
+
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
+ fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
+
+ if (has_turbo_ratio_group_limits(family, model)) {
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);
+ fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, core_counts);
+ } else {
+ core_counts = 0x0807060504030201;
+ }
+
+ ratio = (msr >> 56) & 0xFF;
+ group_size = (core_counts >> 56) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
+
+ ratio = (msr >> 48) & 0xFF;
+ group_size = (core_counts >> 48) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
+
+ ratio = (msr >> 40) & 0xFF;
+ group_size = (core_counts >> 40) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
+
+ ratio = (msr >> 32) & 0xFF;
+ group_size = (core_counts >> 32) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
+
+ ratio = (msr >> 24) & 0xFF;
+ group_size = (core_counts >> 24) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
+
+ ratio = (msr >> 16) & 0xFF;
+ group_size = (core_counts >> 16) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
+
+ ratio = (msr >> 8) & 0xFF;
+ group_size = (core_counts >> 8) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
+
+ ratio = (msr >> 0) & 0xFF;
+ group_size = (core_counts >> 0) & 0xFF;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio, bclk, ratio * bclk, group_size);
+ return;
+}
+
+static void
+dump_atom_turbo_ratio_limits(void)
+{
+ unsigned long long msr;
+ unsigned int ratio;
+
+ get_msr(base_cpu, MSR_ATOM_CORE_RATIOS, &msr);
+ fprintf(outf, "cpu%d: MSR_ATOM_CORE_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
+
+ ratio = (msr >> 0) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 8) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 16) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
+ ratio, bclk, ratio * bclk);
+
+ get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
+ fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
+
+ ratio = (msr >> 24) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 16) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 8) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
+ ratio, bclk, ratio * bclk);
+
+ ratio = (msr >> 0) & 0x3F;
+ if (ratio)
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
+ ratio, bclk, ratio * bclk);
+}
+
+static void
+dump_knl_turbo_ratio_limits(void)
+{
+ const unsigned int buckets_no = 7;
+
+ unsigned long long msr;
+ int delta_cores, delta_ratio;
+ int i, b_nr;
+ unsigned int cores[buckets_no];
+ unsigned int ratio[buckets_no];
+
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
+
+ fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
+ base_cpu, msr);
+
+ /**
+ * Turbo encoding in KNL is as follows:
+ * [0] -- Reserved
+ * [7:1] -- Base value of number of active cores of bucket 1.
+ * [15:8] -- Base value of freq ratio of bucket 1.
+ * [20:16] -- +ve delta of number of active cores of bucket 2.
+ * i.e. active cores of bucket 2 =
+ * active cores of bucket 1 + delta
+ * [23:21] -- Negative delta of freq ratio of bucket 2.
+ * i.e. freq ratio of bucket 2 =
+ * freq ratio of bucket 1 - delta
+ * [28:24]-- +ve delta of number of active cores of bucket 3.
+ * [31:29]-- -ve delta of freq ratio of bucket 3.
+ * [36:32]-- +ve delta of number of active cores of bucket 4.
+ * [39:37]-- -ve delta of freq ratio of bucket 4.
+ * [44:40]-- +ve delta of number of active cores of bucket 5.
+ * [47:45]-- -ve delta of freq ratio of bucket 5.
+ * [52:48]-- +ve delta of number of active cores of bucket 6.
+ * [55:53]-- -ve delta of freq ratio of bucket 6.
+ * [60:56]-- +ve delta of number of active cores of bucket 7.
+ * [63:61]-- -ve delta of freq ratio of bucket 7.
+ */
+
+ b_nr = 0;
+ cores[b_nr] = (msr & 0xFF) >> 1;
+ ratio[b_nr] = (msr >> 8) & 0xFF;
+
+ for (i = 16; i < 64; i += 8) {
+ delta_cores = (msr >> i) & 0x1F;
+ delta_ratio = (msr >> (i + 5)) & 0x7;
+
+ cores[b_nr + 1] = cores[b_nr] + delta_cores;
+ ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
+ b_nr++;
+ }
+
+ for (i = buckets_no - 1; i >= 0; i--)
+ if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
+ fprintf(outf,
+ "%d * %.1f = %.1f MHz max turbo %d active cores\n",
+ ratio[i], bclk, ratio[i] * bclk, cores[i]);
+}
+
+static void
+dump_nhm_cst_cfg(void)
+{
+ unsigned long long msr;
+
+ get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
+
+ fprintf(outf, "cpu%d: MSR_PKG_CST_CONFIG_CONTROL: 0x%08llx", base_cpu, msr);
+
+ fprintf(outf, " (%s%s%s%s%slocked, pkg-cstate-limit=%d (%s)",
+ (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
+ (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
+ (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
+ (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
+ (msr & (1 << 15)) ? "" : "UN",
+ (unsigned int)msr & 0xF,
+ pkg_cstate_limit_strings[pkg_cstate_limit]);
+
+#define AUTOMATIC_CSTATE_CONVERSION (1UL << 16)
+ if (has_automatic_cstate_conversion) {
+ fprintf(outf, ", automatic c-state conversion=%s",
+ (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
+ }
+
+ fprintf(outf, ")\n");
+
+ return;
+}
+
+static void
+dump_config_tdp(void)
+{
+ unsigned long long msr;
+
+ get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
+ fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
+ fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
+
+ get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
+ fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
+ if (msr) {
+ fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
+ fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
+ fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
+ fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
+ }
+ fprintf(outf, ")\n");
+
+ get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
+ fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
+ if (msr) {
+ fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
+ fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
+ fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
+ fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
+ }
+ fprintf(outf, ")\n");
+
+ get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
+ fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
+ if ((msr) & 0x3)
+ fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
+ fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
+ fprintf(outf, ")\n");
+
+ get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
+ fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
+ fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
+ fprintf(outf, ")\n");
+}
+
+unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
+
+void print_irtl(void)
+{
+ unsigned long long msr;
+
+ get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+
+ get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+
+ get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+
+ if (!do_irtl_hsw)
+ return;
+
+ get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+
+ get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+
+ get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+
+}
+void free_fd_percpu(void)
+{
+ int i;
+
+ for (i = 0; i < topo.max_cpu_num + 1; ++i) {
+ if (fd_percpu[i] != 0)
+ close(fd_percpu[i]);
+ }
+
+ free(fd_percpu);
+}
+
+void free_all_buffers(void)
+{
+ int i;
+
+ CPU_FREE(cpu_present_set);
+ cpu_present_set = NULL;
+ cpu_present_setsize = 0;
+
+ CPU_FREE(cpu_affinity_set);
+ cpu_affinity_set = NULL;
+ cpu_affinity_setsize = 0;
+
+ free(thread_even);
+ free(core_even);
+ free(package_even);
+
+ thread_even = NULL;
+ core_even = NULL;
+ package_even = NULL;
+
+ free(thread_odd);
+ free(core_odd);
+ free(package_odd);
+
+ thread_odd = NULL;
+ core_odd = NULL;
+ package_odd = NULL;
+
+ free(output_buffer);
+ output_buffer = NULL;
+ outp = NULL;
+
+ free_fd_percpu();
+
+ free(irq_column_2_cpu);
+ free(irqs_per_cpu);
+
+ for (i = 0; i <= topo.max_cpu_num; ++i) {
+ if (cpus[i].put_ids)
+ CPU_FREE(cpus[i].put_ids);
+ }
+ free(cpus);
+}
+
+
+/*
+ * Parse a file containing a single int.
+ */
+int parse_int_file(const char *fmt, ...)
+{
+ va_list args;
+ char path[PATH_MAX];
+ FILE *filep;
+ int value;
+
+ va_start(args, fmt);
+ vsnprintf(path, sizeof(path), fmt, args);
+ va_end(args);
+ filep = fopen_or_die(path, "r");
+ if (fscanf(filep, "%d", &value) != 1)
+ err(1, "%s: failed to parse number from file", path);
+ fclose(filep);
+ return value;
+}
+
+/*
+ * cpu_is_first_core_in_package(cpu)
+ * return 1 if given CPU is 1st core in package
+ */
+int cpu_is_first_core_in_package(int cpu)
+{
+ return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu);
+}
+
+int get_physical_package_id(int cpu)
+{
+ return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
+}
+
+int get_core_id(int cpu)
+{
+ return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
+}
+
+void set_node_data(void)
+{
+ int pkg, node, lnode, cpu, cpux;
+ int cpu_count;
+
+ /* initialize logical_node_id */
+ for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu)
+ cpus[cpu].logical_node_id = -1;
+
+ cpu_count = 0;
+ for (pkg = 0; pkg < topo.num_packages; pkg++) {
+ lnode = 0;
+ for (cpu = 0; cpu <= topo.max_cpu_num; ++cpu) {
+ if (cpus[cpu].physical_package_id != pkg)
+ continue;
+ /* find a cpu with an unset logical_node_id */
+ if (cpus[cpu].logical_node_id != -1)
+ continue;
+ cpus[cpu].logical_node_id = lnode;
+ node = cpus[cpu].physical_node_id;
+ cpu_count++;
+ /*
+ * find all matching cpus on this pkg and set
+ * the logical_node_id
+ */
+ for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
+ if ((cpus[cpux].physical_package_id == pkg) &&
+ (cpus[cpux].physical_node_id == node)) {
+ cpus[cpux].logical_node_id = lnode;
+ cpu_count++;
+ }
+ }
+ lnode++;
+ if (lnode > topo.nodes_per_pkg)
+ topo.nodes_per_pkg = lnode;
+ }
+ if (cpu_count >= topo.max_cpu_num)
+ break;
+ }
+}
+
+int get_physical_node_id(struct cpu_topology *thiscpu)
+{
+ char path[80];
+ FILE *filep;
+ int i;
+ int cpu = thiscpu->logical_cpu_id;
+
+ for (i = 0; i <= topo.max_cpu_num; i++) {
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
+ cpu, i);
+ filep = fopen(path, "r");
+ if (!filep)
+ continue;
+ fclose(filep);
+ return i;
+ }
+ return -1;
+}
+
+int get_thread_siblings(struct cpu_topology *thiscpu)
+{
+ char path[80], character;
+ FILE *filep;
+ unsigned long map;
+ int so, shift, sib_core;
+ int cpu = thiscpu->logical_cpu_id;
+ int offset = topo.max_cpu_num + 1;
+ size_t size;
+ int thread_id = 0;
+
+ thiscpu->put_ids = CPU_ALLOC((topo.max_cpu_num + 1));
+ if (thiscpu->thread_id < 0)
+ thiscpu->thread_id = thread_id++;
+ if (!thiscpu->put_ids)
+ return -1;
+
+ size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+ CPU_ZERO_S(size, thiscpu->put_ids);
+
+ sprintf(path,
+ "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
+ filep = fopen_or_die(path, "r");
+ do {
+ offset -= BITMASK_SIZE;
+ fscanf(filep, "%lx%c", &map, &character);
+ for (shift = 0; shift < BITMASK_SIZE; shift++) {
+ if ((map >> shift) & 0x1) {
+ so = shift + offset;
+ sib_core = get_core_id(so);
+ if (sib_core == thiscpu->physical_core_id) {
+ CPU_SET_S(so, size, thiscpu->put_ids);
+ if ((so != cpu) &&
+ (cpus[so].thread_id < 0))
+ cpus[so].thread_id =
+ thread_id++;
+ }
+ }
+ }
+ } while (!strncmp(&character, ",", 1));
+ fclose(filep);
+
+ return CPU_COUNT_S(size, thiscpu->put_ids);
+}
+
+/*
+ * run func(thread, core, package) in topology order
+ * skip non-present cpus
+ */
+
+int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
+ struct pkg_data *, struct thread_data *, struct core_data *,
+ struct pkg_data *), struct thread_data *thread_base,
+ struct core_data *core_base, struct pkg_data *pkg_base,
+ struct thread_data *thread_base2, struct core_data *core_base2,
+ struct pkg_data *pkg_base2)
+{
+ int retval, pkg_no, node_no, core_no, thread_no;
+
+ for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
+ for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
+ for (core_no = 0; core_no < topo.cores_per_node;
+ ++core_no) {
+ for (thread_no = 0; thread_no <
+ topo.threads_per_core; ++thread_no) {
+ struct thread_data *t, *t2;
+ struct core_data *c, *c2;
+ struct pkg_data *p, *p2;
+
+ t = GET_THREAD(thread_base, thread_no,
+ core_no, node_no,
+ pkg_no);
+
+ if (cpu_is_not_present(t->cpu_id))
+ continue;
+
+ t2 = GET_THREAD(thread_base2, thread_no,
+ core_no, node_no,
+ pkg_no);
+
+ c = GET_CORE(core_base, core_no,
+ node_no, pkg_no);
+ c2 = GET_CORE(core_base2, core_no,
+ node_no,
+ pkg_no);
+
+ p = GET_PKG(pkg_base, pkg_no);
+ p2 = GET_PKG(pkg_base2, pkg_no);
+
+ retval = func(t, c, p, t2, c2, p2);
+ if (retval)
+ return retval;
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/*
+ * run func(cpu) on every cpu in /proc/stat
+ * return max_cpu number
+ */
+int for_all_proc_cpus(int (func)(int))
+{
+ FILE *fp;
+ int cpu_num;
+ int retval;
+
+ fp = fopen_or_die(proc_stat, "r");
+
+ retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
+ if (retval != 0)
+ err(1, "%s: failed to parse format", proc_stat);
+
+ while (1) {
+ retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
+ if (retval != 1)
+ break;
+
+ retval = func(cpu_num);
+ if (retval) {
+ fclose(fp);
+ return(retval);
+ }
+ }
+ fclose(fp);
+ return 0;
+}
+
+void re_initialize(void)
+{
+ free_all_buffers();
+ setup_all_buffers();
+ printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
+}
+
+void set_max_cpu_num(void)
+{
+ FILE *filep;
+ unsigned long dummy;
+
+ topo.max_cpu_num = 0;
+ filep = fopen_or_die(
+ "/sys/devices/system/cpu/cpu0/topology/thread_siblings",
+ "r");
+ while (fscanf(filep, "%lx,", &dummy) == 1)
+ topo.max_cpu_num += BITMASK_SIZE;
+ fclose(filep);
+ topo.max_cpu_num--; /* 0 based */
+}
+
+/*
+ * count_cpus()
+ * remember the last one seen, it will be the max
+ */
+int count_cpus(int cpu)
+{
+ topo.num_cpus++;
+ return 0;
+}
+int mark_cpu_present(int cpu)
+{
+ CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
+ return 0;
+}
+
+int init_thread_id(int cpu)
+{
+ cpus[cpu].thread_id = -1;
+ return 0;
+}
+
+/*
+ * snapshot_proc_interrupts()
+ *
+ * read and record summary of /proc/interrupts
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_proc_interrupts(void)
+{
+ static FILE *fp;
+ int column, retval;
+
+ if (fp == NULL)
+ fp = fopen_or_die("/proc/interrupts", "r");
+ else
+ rewind(fp);
+
+ /* read 1st line of /proc/interrupts to get cpu* name for each column */
+ for (column = 0; column < topo.num_cpus; ++column) {
+ int cpu_number;
+
+ retval = fscanf(fp, " CPU%d", &cpu_number);
+ if (retval != 1)
+ break;
+
+ if (cpu_number > topo.max_cpu_num) {
+ warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
+ return 1;
+ }
+
+ irq_column_2_cpu[column] = cpu_number;
+ irqs_per_cpu[cpu_number] = 0;
+ }
+
+ /* read /proc/interrupt count lines and sum up irqs per cpu */
+ while (1) {
+ int column;
+ char buf[64];
+
+ retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */
+ if (retval != 1)
+ break;
+
+ /* read the count per cpu */
+ for (column = 0; column < topo.num_cpus; ++column) {
+
+ int cpu_number, irq_count;
+
+ retval = fscanf(fp, " %d", &irq_count);
+ if (retval != 1)
+ break;
+
+ cpu_number = irq_column_2_cpu[column];
+ irqs_per_cpu[cpu_number] += irq_count;
+
+ }
+
+ while (getc(fp) != '\n')
+ ; /* flush interrupt description */
+
+ }
+ return 0;
+}
+/*
+ * snapshot_gfx_rc6_ms()
+ *
+ * record snapshot of
+ * /sys/class/drm/card0/power/rc6_residency_ms
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_gfx_rc6_ms(void)
+{
+ FILE *fp;
+ int retval;
+
+ fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
+
+ retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
+ if (retval != 1)
+ err(1, "GFX rc6");
+
+ fclose(fp);
+
+ return 0;
+}
+/*
+ * snapshot_gfx_mhz()
+ *
+ * record snapshot of
+ * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_gfx_mhz(void)
+{
+ static FILE *fp;
+ int retval;
+
+ if (fp == NULL)
+ fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
+ else {
+ rewind(fp);
+ fflush(fp);
+ }
+
+ retval = fscanf(fp, "%d", &gfx_cur_mhz);
+ if (retval != 1)
+ err(1, "GFX MHz");
+
+ return 0;
+}
+
+/*
+ * snapshot_cpu_lpi()
+ *
+ * record snapshot of
+ * /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
+ */
+int snapshot_cpu_lpi_us(void)
+{
+ FILE *fp;
+ int retval;
+
+ fp = fopen_or_die("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", "r");
+
+ retval = fscanf(fp, "%lld", &cpuidle_cur_cpu_lpi_us);
+ if (retval != 1)
+ err(1, "CPU LPI");
+
+ fclose(fp);
+
+ return 0;
+}
+/*
+ * snapshot_sys_lpi()
+ *
+ * record snapshot of sys_lpi_file
+ */
+int snapshot_sys_lpi_us(void)
+{
+ FILE *fp;
+ int retval;
+
+ fp = fopen_or_die(sys_lpi_file, "r");
+
+ retval = fscanf(fp, "%lld", &cpuidle_cur_sys_lpi_us);
+ if (retval != 1)
+ err(1, "SYS LPI");
+
+ fclose(fp);
+
+ return 0;
+}
+/*
+ * snapshot /proc and /sys files
+ *
+ * return 1 if configuration restart needed, else return 0
+ */
+int snapshot_proc_sysfs_files(void)
+{
+ if (DO_BIC(BIC_IRQ))
+ if (snapshot_proc_interrupts())
+ return 1;
+
+ if (DO_BIC(BIC_GFX_rc6))
+ snapshot_gfx_rc6_ms();
+
+ if (DO_BIC(BIC_GFXMHz))
+ snapshot_gfx_mhz();
+
+ if (DO_BIC(BIC_CPU_LPI))
+ snapshot_cpu_lpi_us();
+
+ if (DO_BIC(BIC_SYS_LPI))
+ snapshot_sys_lpi_us();
+
+ return 0;
+}
+
+int exit_requested;
+
+static void signal_handler (int signal)
+{
+ switch (signal) {
+ case SIGINT:
+ exit_requested = 1;
+ if (debug)
+ fprintf(stderr, " SIGINT\n");
+ break;
+ case SIGUSR1:
+ if (debug > 1)
+ fprintf(stderr, "SIGUSR1\n");
+ break;
+ }
+ /* make sure this manually-invoked interval is at least 1ms long */
+ nanosleep(&one_msec, NULL);
+}
+
+void setup_signal_handler(void)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+
+ sa.sa_handler = &signal_handler;
+
+ if (sigaction(SIGINT, &sa, NULL) < 0)
+ err(1, "sigaction SIGINT");
+ if (sigaction(SIGUSR1, &sa, NULL) < 0)
+ err(1, "sigaction SIGUSR1");
+}
+
+void do_sleep(void)
+{
+ struct timeval select_timeout;
+ fd_set readfds;
+ int retval;
+
+ FD_ZERO(&readfds);
+ FD_SET(0, &readfds);
+
+ if (!isatty(fileno(stdin))) {
+ nanosleep(&interval_ts, NULL);
+ return;
+ }
+
+ select_timeout = interval_tv;
+ retval = select(1, &readfds, NULL, NULL, &select_timeout);
+
+ if (retval == 1) {
+ switch (getc(stdin)) {
+ case 'q':
+ exit_requested = 1;
+ break;
+ }
+ /* make sure this manually-invoked interval is at least 1ms long */
+ nanosleep(&one_msec, NULL);
+ }
+}
+
+
+void turbostat_loop()
+{
+ int retval;
+ int restarted = 0;
+ int done_iters = 0;
+
+ setup_signal_handler();
+
+restart:
+ restarted++;
+
+ snapshot_proc_sysfs_files();
+ retval = for_all_cpus(get_counters, EVEN_COUNTERS);
+ first_counter_read = 0;
+ if (retval < -1) {
+ exit(retval);
+ } else if (retval == -1) {
+ if (restarted > 1) {
+ exit(retval);
+ }
+ re_initialize();
+ goto restart;
+ }
+ restarted = 0;
+ done_iters = 0;
+ gettimeofday(&tv_even, (struct timezone *)NULL);
+
+ while (1) {
+ if (for_all_proc_cpus(cpu_is_not_present)) {
+ re_initialize();
+ goto restart;
+ }
+ do_sleep();
+ if (snapshot_proc_sysfs_files())
+ goto restart;
+ retval = for_all_cpus(get_counters, ODD_COUNTERS);
+ if (retval < -1) {
+ exit(retval);
+ } else if (retval == -1) {
+ re_initialize();
+ goto restart;
+ }
+ gettimeofday(&tv_odd, (struct timezone *)NULL);
+ timersub(&tv_odd, &tv_even, &tv_delta);
+ if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS)) {
+ re_initialize();
+ goto restart;
+ }
+ compute_average(EVEN_COUNTERS);
+ format_all_counters(EVEN_COUNTERS);
+ flush_output_stdout();
+ if (exit_requested)
+ break;
+ if (num_iterations && ++done_iters >= num_iterations)
+ break;
+ do_sleep();
+ if (snapshot_proc_sysfs_files())
+ goto restart;
+ retval = for_all_cpus(get_counters, EVEN_COUNTERS);
+ if (retval < -1) {
+ exit(retval);
+ } else if (retval == -1) {
+ re_initialize();
+ goto restart;
+ }
+ gettimeofday(&tv_even, (struct timezone *)NULL);
+ timersub(&tv_even, &tv_odd, &tv_delta);
+ if (for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS)) {
+ re_initialize();
+ goto restart;
+ }
+ compute_average(ODD_COUNTERS);
+ format_all_counters(ODD_COUNTERS);
+ flush_output_stdout();
+ if (exit_requested)
+ break;
+ if (num_iterations && ++done_iters >= num_iterations)
+ break;
+ }
+}
+
+void check_dev_msr()
+{
+ struct stat sb;
+ char pathname[32];
+
+ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+ if (stat(pathname, &sb))
+ if (system("/sbin/modprobe msr > /dev/null 2>&1"))
+ err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+}
+
+void check_permissions()
+{
+ struct __user_cap_header_struct cap_header_data;
+ cap_user_header_t cap_header = &cap_header_data;
+ struct __user_cap_data_struct cap_data_data;
+ cap_user_data_t cap_data = &cap_data_data;
+ extern int capget(cap_user_header_t hdrp, cap_user_data_t datap);
+ int do_exit = 0;
+ char pathname[32];
+
+ /* check for CAP_SYS_RAWIO */
+ cap_header->pid = getpid();
+ cap_header->version = _LINUX_CAPABILITY_VERSION;
+ if (capget(cap_header, cap_data) < 0)
+ err(-6, "capget(2) failed");
+
+ if ((cap_data->effective & (1 << CAP_SYS_RAWIO)) == 0) {
+ do_exit++;
+ warnx("capget(CAP_SYS_RAWIO) failed,"
+ " try \"# setcap cap_sys_rawio=ep %s\"", progname);
+ }
+
+ /* test file permissions */
+ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+ if (euidaccess(pathname, R_OK)) {
+ do_exit++;
+ warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr");
+ }
+
+ /* if all else fails, thell them to be root */
+ if (do_exit)
+ if (getuid() != 0)
+ warnx("... or simply run as root");
+
+ if (do_exit)
+ exit(-6);
+}
+
+/*
+ * NHM adds support for additional MSRs:
+ *
+ * MSR_SMI_COUNT 0x00000034
+ *
+ * MSR_PLATFORM_INFO 0x000000ce
+ * MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
+ *
+ * MSR_MISC_PWR_MGMT 0x000001aa
+ *
+ * MSR_PKG_C3_RESIDENCY 0x000003f8
+ * MSR_PKG_C6_RESIDENCY 0x000003f9
+ * MSR_CORE_C3_RESIDENCY 0x000003fc
+ * MSR_CORE_C6_RESIDENCY 0x000003fd
+ *
+ * Side effect:
+ * sets global pkg_cstate_limit to decode MSR_PKG_CST_CONFIG_CONTROL
+ * sets has_misc_feature_control
+ */
+int probe_nhm_msrs(unsigned int family, unsigned int model)
+{
+ unsigned long long msr;
+ unsigned int base_ratio;
+ int *pkg_cstate_limits;
+
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ bclk = discover_bclk(family, model);
+
+ switch (model) {
+ case INTEL_FAM6_NEHALEM_EP: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
+ case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
+ case 0x1F: /* Core i7 and i5 Processor - Nehalem */
+ case INTEL_FAM6_WESTMERE: /* Westmere Client - Clarkdale, Arrandale */
+ case INTEL_FAM6_WESTMERE_EP: /* Westmere EP - Gulftown */
+ case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
+ case INTEL_FAM6_WESTMERE_EX: /* Westmere-EX Xeon - Eagleton */
+ pkg_cstate_limits = nhm_pkg_cstate_limits;
+ break;
+ case INTEL_FAM6_SANDYBRIDGE: /* SNB */
+ case INTEL_FAM6_SANDYBRIDGE_X: /* SNB Xeon */
+ case INTEL_FAM6_IVYBRIDGE: /* IVB */
+ case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
+ pkg_cstate_limits = snb_pkg_cstate_limits;
+ has_misc_feature_control = 1;
+ break;
+ case INTEL_FAM6_HASWELL_CORE: /* HSW */
+ case INTEL_FAM6_HASWELL_X: /* HSX */
+ case INTEL_FAM6_HASWELL_ULT: /* HSW */
+ case INTEL_FAM6_HASWELL_GT3E: /* HSW */
+ case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+ case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
+ case INTEL_FAM6_BROADWELL_X: /* BDX */
+ case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */
+ case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+ case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
+ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
+ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
+ case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
+ pkg_cstate_limits = hsw_pkg_cstate_limits;
+ has_misc_feature_control = 1;
+ break;
+ case INTEL_FAM6_SKYLAKE_X: /* SKX */
+ pkg_cstate_limits = skx_pkg_cstate_limits;
+ has_misc_feature_control = 1;
+ break;
+ case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
+ no_MSR_MISC_PWR_MGMT = 1;
+ case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */
+ pkg_cstate_limits = slv_pkg_cstate_limits;
+ break;
+ case INTEL_FAM6_ATOM_AIRMONT: /* AMT */
+ pkg_cstate_limits = amt_pkg_cstate_limits;
+ no_MSR_MISC_PWR_MGMT = 1;
+ break;
+ case INTEL_FAM6_XEON_PHI_KNL: /* PHI */
+ case INTEL_FAM6_XEON_PHI_KNM:
+ pkg_cstate_limits = phi_pkg_cstate_limits;
+ break;
+ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */
+ pkg_cstate_limits = glm_pkg_cstate_limits;
+ break;
+ default:
+ return 0;
+ }
+ get_msr(base_cpu, MSR_PKG_CST_CONFIG_CONTROL, &msr);
+ pkg_cstate_limit = pkg_cstate_limits[msr & 0xF];
+
+ get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
+ base_ratio = (msr >> 8) & 0xFF;
+
+ base_hz = base_ratio * bclk * 1000000;
+ has_base_hz = 1;
+ return 1;
+}
+/*
+ * SLV client has support for unique MSRs:
+ *
+ * MSR_CC6_DEMOTION_POLICY_CONFIG
+ * MSR_MC6_DEMOTION_POLICY_CONFIG
+ */
+
+int has_slv_msrs(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_ATOM_SILVERMONT:
+ case INTEL_FAM6_ATOM_SILVERMONT_MID:
+ case INTEL_FAM6_ATOM_AIRMONT_MID:
+ return 1;
+ }
+ return 0;
+}
+int is_dnv(unsigned int family, unsigned int model)
+{
+
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_ATOM_GOLDMONT_X:
+ return 1;
+ }
+ return 0;
+}
+int is_bdx(unsigned int family, unsigned int model)
+{
+
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_BROADWELL_X:
+ case INTEL_FAM6_BROADWELL_XEON_D:
+ return 1;
+ }
+ return 0;
+}
+int is_skx(unsigned int family, unsigned int model)
+{
+
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_SKYLAKE_X:
+ return 1;
+ }
+ return 0;
+}
+
+int has_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (has_slv_msrs(family, model))
+ return 0;
+
+ switch (model) {
+ /* Nehalem compatible, but do not include turbo-ratio limit support */
+ case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
+ case INTEL_FAM6_WESTMERE_EX: /* Westmere-EX Xeon - Eagleton */
+ case INTEL_FAM6_XEON_PHI_KNL: /* PHI - Knights Landing (different MSR definition) */
+ case INTEL_FAM6_XEON_PHI_KNM:
+ return 0;
+ default:
+ return 1;
+ }
+}
+int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (has_slv_msrs(family, model))
+ return 1;
+
+ return 0;
+}
+int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
+ case INTEL_FAM6_HASWELL_X: /* HSW Xeon */
+ return 1;
+ default:
+ return 0;
+ }
+}
+int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_HASWELL_X: /* HSW Xeon */
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */
+ case INTEL_FAM6_XEON_PHI_KNM:
+ return 1;
+ default:
+ return 0;
+ }
+}
+int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_ATOM_GOLDMONT:
+ case INTEL_FAM6_SKYLAKE_X:
+ return 1;
+ default:
+ return 0;
+ }
+}
+int has_config_tdp(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ if (family != 6)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_IVYBRIDGE: /* IVB */
+ case INTEL_FAM6_HASWELL_CORE: /* HSW */
+ case INTEL_FAM6_HASWELL_X: /* HSX */
+ case INTEL_FAM6_HASWELL_ULT: /* HSW */
+ case INTEL_FAM6_HASWELL_GT3E: /* HSW */
+ case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+ case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
+ case INTEL_FAM6_BROADWELL_X: /* BDX */
+ case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */
+ case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+ case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
+ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
+ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
+ case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
+ case INTEL_FAM6_SKYLAKE_X: /* SKX */
+
+ case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */
+ case INTEL_FAM6_XEON_PHI_KNM:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+static void
+dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
+{
+ if (!do_nhm_platform_info)
+ return;
+
+ dump_nhm_platform_info();
+
+ if (has_hsw_turbo_ratio_limit(family, model))
+ dump_hsw_turbo_ratio_limits();
+
+ if (has_ivt_turbo_ratio_limit(family, model))
+ dump_ivt_turbo_ratio_limits();
+
+ if (has_turbo_ratio_limit(family, model))
+ dump_turbo_ratio_limits(family, model);
+
+ if (has_atom_turbo_ratio_limit(family, model))
+ dump_atom_turbo_ratio_limits();
+
+ if (has_knl_turbo_ratio_limit(family, model))
+ dump_knl_turbo_ratio_limits();
+
+ if (has_config_tdp(family, model))
+ dump_config_tdp();
+
+ dump_nhm_cst_cfg();
+}
+
+static void
+dump_sysfs_cstate_config(void)
+{
+ char path[64];
+ char name_buf[16];
+ char desc[64];
+ FILE *input;
+ int state;
+ char *sp;
+
+ if (!DO_BIC(BIC_sysfs))
+ return;
+
+ for (state = 0; state < 10; ++state) {
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
+ base_cpu, state);
+ input = fopen(path, "r");
+ if (input == NULL)
+ continue;
+ fgets(name_buf, sizeof(name_buf), input);
+
+ /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+ sp = strchr(name_buf, '-');
+ if (!sp)
+ sp = strchrnul(name_buf, '\n');
+ *sp = '\0';
+
+ fclose(input);
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
+ base_cpu, state);
+ input = fopen(path, "r");
+ if (input == NULL)
+ continue;
+ fgets(desc, sizeof(desc), input);
+
+ fprintf(outf, "cpu%d: %s: %s", base_cpu, name_buf, desc);
+ fclose(input);
+ }
+}
+static void
+dump_sysfs_pstate_config(void)
+{
+ char path[64];
+ char driver_buf[64];
+ char governor_buf[64];
+ FILE *input;
+ int turbo;
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
+ base_cpu);
+ input = fopen(path, "r");
+ if (input == NULL) {
+ fprintf(stderr, "NSFOD %s\n", path);
+ return;
+ }
+ fgets(driver_buf, sizeof(driver_buf), input);
+ fclose(input);
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
+ base_cpu);
+ input = fopen(path, "r");
+ if (input == NULL) {
+ fprintf(stderr, "NSFOD %s\n", path);
+ return;
+ }
+ fgets(governor_buf, sizeof(governor_buf), input);
+ fclose(input);
+
+ fprintf(outf, "cpu%d: cpufreq driver: %s", base_cpu, driver_buf);
+ fprintf(outf, "cpu%d: cpufreq governor: %s", base_cpu, governor_buf);
+
+ sprintf(path, "/sys/devices/system/cpu/cpufreq/boost");
+ input = fopen(path, "r");
+ if (input != NULL) {
+ fscanf(input, "%d", &turbo);
+ fprintf(outf, "cpufreq boost: %d\n", turbo);
+ fclose(input);
+ }
+
+ sprintf(path, "/sys/devices/system/cpu/intel_pstate/no_turbo");
+ input = fopen(path, "r");
+ if (input != NULL) {
+ fscanf(input, "%d", &turbo);
+ fprintf(outf, "cpufreq intel_pstate no_turbo: %d\n", turbo);
+ fclose(input);
+ }
+}
+
+
+/*
+ * print_epb()
+ * Decode the ENERGY_PERF_BIAS MSR
+ */
+int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long msr;
+ char *epb_string;
+ int cpu;
+
+ if (!has_epb)
+ return 0;
+
+ cpu = t->cpu_id;
+
+ /* EPB is per-package */
+ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ return 0;
+
+ if (cpu_migrate(cpu)) {
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ return -1;
+ }
+
+ if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr))
+ return 0;
+
+ switch (msr & 0xF) {
+ case ENERGY_PERF_BIAS_PERFORMANCE:
+ epb_string = "performance";
+ break;
+ case ENERGY_PERF_BIAS_NORMAL:
+ epb_string = "balanced";
+ break;
+ case ENERGY_PERF_BIAS_POWERSAVE:
+ epb_string = "powersave";
+ break;
+ default:
+ epb_string = "custom";
+ break;
+ }
+ fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
+
+ return 0;
+}
+/*
+ * print_hwp()
+ * Decode the MSR_HWP_CAPABILITIES
+ */
+int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long msr;
+ int cpu;
+
+ if (!has_hwp)
+ return 0;
+
+ cpu = t->cpu_id;
+
+ /* MSR_HWP_CAPABILITIES is per-package */
+ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ return 0;
+
+ if (cpu_migrate(cpu)) {
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ return -1;
+ }
+
+ if (get_msr(cpu, MSR_PM_ENABLE, &msr))
+ return 0;
+
+ fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
+ cpu, msr, (msr & (1 << 0)) ? "" : "No-");
+
+ /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
+ if ((msr & (1 << 0)) == 0)
+ return 0;
+
+ if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
+ return 0;
+
+ fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
+ "(high %d guar %d eff %d low %d)\n",
+ cpu, msr,
+ (unsigned int)HWP_HIGHEST_PERF(msr),
+ (unsigned int)HWP_GUARANTEED_PERF(msr),
+ (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
+ (unsigned int)HWP_LOWEST_PERF(msr));
+
+ if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
+ return 0;
+
+ fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
+ "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
+ cpu, msr,
+ (unsigned int)(((msr) >> 0) & 0xff),
+ (unsigned int)(((msr) >> 8) & 0xff),
+ (unsigned int)(((msr) >> 16) & 0xff),
+ (unsigned int)(((msr) >> 24) & 0xff),
+ (unsigned int)(((msr) >> 32) & 0xff3),
+ (unsigned int)(((msr) >> 42) & 0x1));
+
+ if (has_hwp_pkg) {
+ if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
+ return 0;
+
+ fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
+ "(min %d max %d des %d epp 0x%x window 0x%x)\n",
+ cpu, msr,
+ (unsigned int)(((msr) >> 0) & 0xff),
+ (unsigned int)(((msr) >> 8) & 0xff),
+ (unsigned int)(((msr) >> 16) & 0xff),
+ (unsigned int)(((msr) >> 24) & 0xff),
+ (unsigned int)(((msr) >> 32) & 0xff3));
+ }
+ if (has_hwp_notify) {
+ if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
+ return 0;
+
+ fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
+ "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
+ cpu, msr,
+ ((msr) & 0x1) ? "EN" : "Dis",
+ ((msr) & 0x2) ? "EN" : "Dis");
+ }
+ if (get_msr(cpu, MSR_HWP_STATUS, &msr))
+ return 0;
+
+ fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
+ "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
+ cpu, msr,
+ ((msr) & 0x1) ? "" : "No-",
+ ((msr) & 0x2) ? "" : "No-");
+
+ return 0;
+}
+
+/*
+ * print_perf_limit()
+ */
+int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long msr;
+ int cpu;
+
+ cpu = t->cpu_id;
+
+ /* per-package */
+ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ return 0;
+
+ if (cpu_migrate(cpu)) {
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ return -1;
+ }
+
+ if (do_core_perf_limit_reasons) {
+ get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
+ fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
+ fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
+ (msr & 1 << 15) ? "bit15, " : "",
+ (msr & 1 << 14) ? "bit14, " : "",
+ (msr & 1 << 13) ? "Transitions, " : "",
+ (msr & 1 << 12) ? "MultiCoreTurbo, " : "",
+ (msr & 1 << 11) ? "PkgPwrL2, " : "",
+ (msr & 1 << 10) ? "PkgPwrL1, " : "",
+ (msr & 1 << 9) ? "CorePwr, " : "",
+ (msr & 1 << 8) ? "Amps, " : "",
+ (msr & 1 << 6) ? "VR-Therm, " : "",
+ (msr & 1 << 5) ? "Auto-HWP, " : "",
+ (msr & 1 << 4) ? "Graphics, " : "",
+ (msr & 1 << 2) ? "bit2, " : "",
+ (msr & 1 << 1) ? "ThermStatus, " : "",
+ (msr & 1 << 0) ? "PROCHOT, " : "");
+ fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
+ (msr & 1 << 31) ? "bit31, " : "",
+ (msr & 1 << 30) ? "bit30, " : "",
+ (msr & 1 << 29) ? "Transitions, " : "",
+ (msr & 1 << 28) ? "MultiCoreTurbo, " : "",
+ (msr & 1 << 27) ? "PkgPwrL2, " : "",
+ (msr & 1 << 26) ? "PkgPwrL1, " : "",
+ (msr & 1 << 25) ? "CorePwr, " : "",
+ (msr & 1 << 24) ? "Amps, " : "",
+ (msr & 1 << 22) ? "VR-Therm, " : "",
+ (msr & 1 << 21) ? "Auto-HWP, " : "",
+ (msr & 1 << 20) ? "Graphics, " : "",
+ (msr & 1 << 18) ? "bit18, " : "",
+ (msr & 1 << 17) ? "ThermStatus, " : "",
+ (msr & 1 << 16) ? "PROCHOT, " : "");
+
+ }
+ if (do_gfx_perf_limit_reasons) {
+ get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
+ fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
+ fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
+ (msr & 1 << 0) ? "PROCHOT, " : "",
+ (msr & 1 << 1) ? "ThermStatus, " : "",
+ (msr & 1 << 4) ? "Graphics, " : "",
+ (msr & 1 << 6) ? "VR-Therm, " : "",
+ (msr & 1 << 8) ? "Amps, " : "",
+ (msr & 1 << 9) ? "GFXPwr, " : "",
+ (msr & 1 << 10) ? "PkgPwrL1, " : "",
+ (msr & 1 << 11) ? "PkgPwrL2, " : "");
+ fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
+ (msr & 1 << 16) ? "PROCHOT, " : "",
+ (msr & 1 << 17) ? "ThermStatus, " : "",
+ (msr & 1 << 20) ? "Graphics, " : "",
+ (msr & 1 << 22) ? "VR-Therm, " : "",
+ (msr & 1 << 24) ? "Amps, " : "",
+ (msr & 1 << 25) ? "GFXPwr, " : "",
+ (msr & 1 << 26) ? "PkgPwrL1, " : "",
+ (msr & 1 << 27) ? "PkgPwrL2, " : "");
+ }
+ if (do_ring_perf_limit_reasons) {
+ get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
+ fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
+ fprintf(outf, " (Active: %s%s%s%s%s%s)",
+ (msr & 1 << 0) ? "PROCHOT, " : "",
+ (msr & 1 << 1) ? "ThermStatus, " : "",
+ (msr & 1 << 6) ? "VR-Therm, " : "",
+ (msr & 1 << 8) ? "Amps, " : "",
+ (msr & 1 << 10) ? "PkgPwrL1, " : "",
+ (msr & 1 << 11) ? "PkgPwrL2, " : "");
+ fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
+ (msr & 1 << 16) ? "PROCHOT, " : "",
+ (msr & 1 << 17) ? "ThermStatus, " : "",
+ (msr & 1 << 22) ? "VR-Therm, " : "",
+ (msr & 1 << 24) ? "Amps, " : "",
+ (msr & 1 << 26) ? "PkgPwrL1, " : "",
+ (msr & 1 << 27) ? "PkgPwrL2, " : "");
+ }
+ return 0;
+}
+
+#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
+#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
+
+double get_tdp(unsigned int model)
+{
+ unsigned long long msr;
+
+ if (do_rapl & RAPL_PKG_POWER_INFO)
+ if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr))
+ return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units;
+
+ switch (model) {
+ case INTEL_FAM6_ATOM_SILVERMONT:
+ case INTEL_FAM6_ATOM_SILVERMONT_X:
+ return 30.0;
+ default:
+ return 135.0;
+ }
+}
+
+/*
+ * rapl_dram_energy_units_probe()
+ * Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
+ */
+static double
+rapl_dram_energy_units_probe(int model, double rapl_energy_units)
+{
+ /* only called for genuine_intel, family 6 */
+
+ switch (model) {
+ case INTEL_FAM6_HASWELL_X: /* HSX */
+ case INTEL_FAM6_BROADWELL_X: /* BDX */
+ case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */
+ case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
+ case INTEL_FAM6_XEON_PHI_KNM:
+ return (rapl_dram_energy_units = 15.3 / 1000000);
+ default:
+ return (rapl_energy_units);
+ }
+}
+
+
+/*
+ * rapl_probe()
+ *
+ * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units
+ */
+void rapl_probe(unsigned int family, unsigned int model)
+{
+ unsigned long long msr;
+ unsigned int time_unit;
+ double tdp;
+
+ if (!genuine_intel)
+ return;
+
+ if (family != 6)
+ return;
+
+ switch (model) {
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_IVYBRIDGE:
+ case INTEL_FAM6_HASWELL_CORE: /* HSW */
+ case INTEL_FAM6_HASWELL_ULT: /* HSW */
+ case INTEL_FAM6_HASWELL_GT3E: /* HSW */
+ case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+ case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ BIC_PRESENT(BIC_GFX_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
+ BIC_PRESENT(BIC_GFXWatt);
+ }
+ break;
+ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
+ if (rapl_joules)
+ BIC_PRESENT(BIC_Pkg_J);
+ else
+ BIC_PRESENT(BIC_PkgWatt);
+ break;
+ case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+ case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
+ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
+ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
+ case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
+ BIC_PRESENT(BIC_PKG__);
+ BIC_PRESENT(BIC_RAM__);
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ BIC_PRESENT(BIC_RAM_J);
+ BIC_PRESENT(BIC_GFX_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
+ BIC_PRESENT(BIC_RAMWatt);
+ BIC_PRESENT(BIC_GFXWatt);
+ }
+ break;
+ case INTEL_FAM6_HASWELL_X: /* HSX */
+ case INTEL_FAM6_BROADWELL_X: /* BDX */
+ case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */
+ case INTEL_FAM6_SKYLAKE_X: /* SKX */
+ case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
+ case INTEL_FAM6_XEON_PHI_KNM:
+ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ BIC_PRESENT(BIC_PKG__);
+ BIC_PRESENT(BIC_RAM__);
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_RAM_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_RAMWatt);
+ }
+ break;
+ case INTEL_FAM6_SANDYBRIDGE_X:
+ case INTEL_FAM6_IVYBRIDGE_X:
+ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ BIC_PRESENT(BIC_PKG__);
+ BIC_PRESENT(BIC_RAM__);
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ BIC_PRESENT(BIC_RAM_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
+ BIC_PRESENT(BIC_RAMWatt);
+ }
+ break;
+ case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
+ case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */
+ do_rapl = RAPL_PKG | RAPL_CORES;
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
+ }
+ break;
+ case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */
+ do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
+ BIC_PRESENT(BIC_PKG__);
+ BIC_PRESENT(BIC_RAM__);
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ BIC_PRESENT(BIC_RAM_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
+ BIC_PRESENT(BIC_RAMWatt);
+ }
+ break;
+ default:
+ return;
+ }
+
+ /* units on package 0, verify later other packages match */
+ if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr))
+ return;
+
+ rapl_power_units = 1.0 / (1 << (msr & 0xF));
+ if (model == INTEL_FAM6_ATOM_SILVERMONT)
+ rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000;
+ else
+ rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F));
+
+ rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units);
+
+ time_unit = msr >> 16 & 0xF;
+ if (time_unit == 0)
+ time_unit = 0xA;
+
+ rapl_time_units = 1.0 / (1 << (time_unit));
+
+ tdp = get_tdp(model);
+
+ rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
+ if (!quiet)
+ fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
+
+ return;
+}
+
+void perf_limit_reasons_probe(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return;
+
+ if (family != 6)
+ return;
+
+ switch (model) {
+ case INTEL_FAM6_HASWELL_CORE: /* HSW */
+ case INTEL_FAM6_HASWELL_ULT: /* HSW */
+ case INTEL_FAM6_HASWELL_GT3E: /* HSW */
+ do_gfx_perf_limit_reasons = 1;
+ case INTEL_FAM6_HASWELL_X: /* HSX */
+ do_core_perf_limit_reasons = 1;
+ do_ring_perf_limit_reasons = 1;
+ default:
+ return;
+ }
+}
+
+void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
+{
+ if (is_skx(family, model) || is_bdx(family, model))
+ has_automatic_cstate_conversion = 1;
+}
+
+int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long msr;
+ unsigned int dts, dts2;
+ int cpu;
+
+ if (!(do_dts || do_ptm))
+ return 0;
+
+ cpu = t->cpu_id;
+
+ /* DTS is per-core, no need to print for each thread */
+ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
+ return 0;
+
+ if (cpu_migrate(cpu)) {
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ return -1;
+ }
+
+ if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) {
+ if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
+ return 0;
+
+ dts = (msr >> 16) & 0x7F;
+ fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
+ cpu, msr, tcc_activation_temp - dts);
+
+ if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
+ return 0;
+
+ dts = (msr >> 16) & 0x7F;
+ dts2 = (msr >> 8) & 0x7F;
+ fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+ cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
+ }
+
+
+ if (do_dts && debug) {
+ unsigned int resolution;
+
+ if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
+ return 0;
+
+ dts = (msr >> 16) & 0x7F;
+ resolution = (msr >> 27) & 0xF;
+ fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
+ cpu, msr, tcc_activation_temp - dts, resolution);
+
+ if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
+ return 0;
+
+ dts = (msr >> 16) & 0x7F;
+ dts2 = (msr >> 8) & 0x7F;
+ fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+ cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
+ }
+
+ return 0;
+}
+
+void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
+{
+ fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
+ cpu, label,
+ ((msr >> 15) & 1) ? "EN" : "DIS",
+ ((msr >> 0) & 0x7FFF) * rapl_power_units,
+ (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
+ (((msr >> 16) & 1) ? "EN" : "DIS"));
+
+ return;
+}
+
+int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long msr;
+ int cpu;
+
+ if (!do_rapl)
+ return 0;
+
+ /* RAPL counters are per package, so print only for 1st thread/package */
+ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ return 0;
+
+ cpu = t->cpu_id;
+ if (cpu_migrate(cpu)) {
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ return -1;
+ }
+
+ if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr))
+ return -1;
+
+ fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx (%f Watts, %f Joules, %f sec.)\n", cpu, msr,
+ rapl_power_units, rapl_energy_units, rapl_time_units);
+
+ if (do_rapl & RAPL_PKG_POWER_INFO) {
+
+ if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
+ return -5;
+
+
+ fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
+ cpu, msr,
+ ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+ ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+ ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+ ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
+
+ }
+ if (do_rapl & RAPL_PKG) {
+
+ if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
+ return -9;
+
+ fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
+ cpu, msr, (msr >> 63) & 1 ? "" : "UN");
+
+ print_power_limit_msr(cpu, msr, "PKG Limit #1");
+ fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
+ cpu,
+ ((msr >> 47) & 1) ? "EN" : "DIS",
+ ((msr >> 32) & 0x7FFF) * rapl_power_units,
+ (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
+ ((msr >> 48) & 1) ? "EN" : "DIS");
+ }
+
+ if (do_rapl & RAPL_DRAM_POWER_INFO) {
+ if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
+ return -6;
+
+ fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
+ cpu, msr,
+ ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+ ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+ ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+ ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
+ }
+ if (do_rapl & RAPL_DRAM) {
+ if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
+ return -9;
+ fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
+ cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+
+ print_power_limit_msr(cpu, msr, "DRAM Limit");
+ }
+ if (do_rapl & RAPL_CORE_POLICY) {
+ if (get_msr(cpu, MSR_PP0_POLICY, &msr))
+ return -7;
+
+ fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
+ }
+ if (do_rapl & RAPL_CORES_POWER_LIMIT) {
+ if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
+ return -9;
+ fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
+ cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+ print_power_limit_msr(cpu, msr, "Cores Limit");
+ }
+ if (do_rapl & RAPL_GFX) {
+ if (get_msr(cpu, MSR_PP1_POLICY, &msr))
+ return -8;
+
+ fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
+
+ if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
+ return -9;
+ fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
+ cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+ print_power_limit_msr(cpu, msr, "GFX Limit");
+ }
+ return 0;
+}
+
+/*
+ * SNB adds support for additional MSRs:
+ *
+ * MSR_PKG_C7_RESIDENCY 0x000003fa
+ * MSR_CORE_C7_RESIDENCY 0x000003fe
+ * MSR_PKG_C2_RESIDENCY 0x0000060d
+ */
+
+int has_snb_msrs(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_SANDYBRIDGE:
+ case INTEL_FAM6_SANDYBRIDGE_X:
+ case INTEL_FAM6_IVYBRIDGE: /* IVB */
+ case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
+ case INTEL_FAM6_HASWELL_CORE: /* HSW */
+ case INTEL_FAM6_HASWELL_X: /* HSW */
+ case INTEL_FAM6_HASWELL_ULT: /* HSW */
+ case INTEL_FAM6_HASWELL_GT3E: /* HSW */
+ case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+ case INTEL_FAM6_BROADWELL_GT3E: /* BDW */
+ case INTEL_FAM6_BROADWELL_X: /* BDX */
+ case INTEL_FAM6_BROADWELL_XEON_D: /* BDX-DE */
+ case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+ case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
+ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
+ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
+ case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
+ case INTEL_FAM6_SKYLAKE_X: /* SKX */
+ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * HSW adds support for additional MSRs:
+ *
+ * MSR_PKG_C8_RESIDENCY 0x00000630
+ * MSR_PKG_C9_RESIDENCY 0x00000631
+ * MSR_PKG_C10_RESIDENCY 0x00000632
+ *
+ * MSR_PKGC8_IRTL 0x00000633
+ * MSR_PKGC9_IRTL 0x00000634
+ * MSR_PKGC10_IRTL 0x00000635
+ *
+ */
+int has_hsw_msrs(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_HASWELL_ULT: /* HSW */
+ case INTEL_FAM6_BROADWELL_CORE: /* BDW */
+ case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+ case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
+ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
+ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
+ case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
+ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * SKL adds support for additional MSRS:
+ *
+ * MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
+ * MSR_PKG_ANY_CORE_C0_RES 0x00000659
+ * MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
+ * MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
+ */
+int has_skl_msrs(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+ case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
+ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
+ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
+ case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
+ return 1;
+ }
+ return 0;
+}
+
+int is_slm(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+ switch (model) {
+ case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
+ case INTEL_FAM6_ATOM_SILVERMONT_X: /* AVN */
+ return 1;
+ }
+ return 0;
+}
+
+int is_knl(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+ switch (model) {
+ case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
+ case INTEL_FAM6_XEON_PHI_KNM:
+ return 1;
+ }
+ return 0;
+}
+
+int is_cnl(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_CANNONLAKE_MOBILE: /* CNL */
+ return 1;
+ }
+
+ return 0;
+}
+
+unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
+{
+ if (is_knl(family, model))
+ return 1024;
+ return 1;
+}
+
+#define SLM_BCLK_FREQS 5
+double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
+
+double slm_bclk(void)
+{
+ unsigned long long msr = 3;
+ unsigned int i;
+ double freq;
+
+ if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
+ fprintf(outf, "SLM BCLK: unknown\n");
+
+ i = msr & 0xf;
+ if (i >= SLM_BCLK_FREQS) {
+ fprintf(outf, "SLM BCLK[%d] invalid\n", i);
+ i = 3;
+ }
+ freq = slm_freq_table[i];
+
+ if (!quiet)
+ fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
+
+ return freq;
+}
+
+double discover_bclk(unsigned int family, unsigned int model)
+{
+ if (has_snb_msrs(family, model) || is_knl(family, model))
+ return 100.00;
+ else if (is_slm(family, model))
+ return slm_bclk();
+ else
+ return 133.33;
+}
+
+/*
+ * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
+ * the Thermal Control Circuit (TCC) activates.
+ * This is usually equal to tjMax.
+ *
+ * Older processors do not have this MSR, so there we guess,
+ * but also allow cmdline over-ride with -T.
+ *
+ * Several MSR temperature values are in units of degrees-C
+ * below this value, including the Digital Thermal Sensor (DTS),
+ * Package Thermal Management Sensor (PTM), and thermal event thresholds.
+ */
+int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long msr;
+ unsigned int target_c_local;
+ int cpu;
+
+ /* tcc_activation_temp is used only for dts or ptm */
+ if (!(do_dts || do_ptm))
+ return 0;
+
+ /* this is a per-package concept */
+ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ return 0;
+
+ cpu = t->cpu_id;
+ if (cpu_migrate(cpu)) {
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ return -1;
+ }
+
+ if (tcc_activation_temp_override != 0) {
+ tcc_activation_temp = tcc_activation_temp_override;
+ fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
+ cpu, tcc_activation_temp);
+ return 0;
+ }
+
+ /* Temperature Target MSR is Nehalem and newer only */
+ if (!do_nhm_platform_info)
+ goto guess;
+
+ if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
+ goto guess;
+
+ target_c_local = (msr >> 16) & 0xFF;
+
+ if (!quiet)
+ fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
+ cpu, msr, target_c_local);
+
+ if (!target_c_local)
+ goto guess;
+
+ tcc_activation_temp = target_c_local;
+
+ return 0;
+
+guess:
+ tcc_activation_temp = TJMAX_DEFAULT;
+ fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
+ cpu, tcc_activation_temp);
+
+ return 0;
+}
+
+void decode_feature_control_msr(void)
+{
+ unsigned long long msr;
+
+ if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
+ fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
+ base_cpu, msr,
+ msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
+ msr & (1 << 18) ? "SGX" : "");
+}
+
+void decode_misc_enable_msr(void)
+{
+ unsigned long long msr;
+
+ if (!genuine_intel)
+ return;
+
+ if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
+ fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%sTCC %sEIST %sMWAIT %sPREFETCH %sTURBO)\n",
+ base_cpu, msr,
+ msr & MSR_IA32_MISC_ENABLE_TM1 ? "" : "No-",
+ msr & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP ? "" : "No-",
+ msr & MSR_IA32_MISC_ENABLE_MWAIT ? "" : "No-",
+ msr & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE ? "No-" : "",
+ msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE ? "No-" : "");
+}
+
+void decode_misc_feature_control(void)
+{
+ unsigned long long msr;
+
+ if (!has_misc_feature_control)
+ return;
+
+ if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
+ fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
+ base_cpu, msr,
+ msr & (0 << 0) ? "No-" : "",
+ msr & (1 << 0) ? "No-" : "",
+ msr & (2 << 0) ? "No-" : "",
+ msr & (3 << 0) ? "No-" : "");
+}
+/*
+ * Decode MSR_MISC_PWR_MGMT
+ *
+ * Decode the bits according to the Nehalem documentation
+ * bit[0] seems to continue to have same meaning going forward
+ * bit[1] less so...
+ */
+void decode_misc_pwr_mgmt_msr(void)
+{
+ unsigned long long msr;
+
+ if (!do_nhm_platform_info)
+ return;
+
+ if (no_MSR_MISC_PWR_MGMT)
+ return;
+
+ if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
+ fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
+ base_cpu, msr,
+ msr & (1 << 0) ? "DIS" : "EN",
+ msr & (1 << 1) ? "EN" : "DIS",
+ msr & (1 << 8) ? "EN" : "DIS");
+}
+/*
+ * Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
+ *
+ * This MSRs are present on Silvermont processors,
+ * Intel Atom processor E3000 series (Baytrail), and friends.
+ */
+void decode_c6_demotion_policy_msr(void)
+{
+ unsigned long long msr;
+
+ if (!get_msr(base_cpu, MSR_CC6_DEMOTION_POLICY_CONFIG, &msr))
+ fprintf(outf, "cpu%d: MSR_CC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-CC6-Demotion)\n",
+ base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
+
+ if (!get_msr(base_cpu, MSR_MC6_DEMOTION_POLICY_CONFIG, &msr))
+ fprintf(outf, "cpu%d: MSR_MC6_DEMOTION_POLICY_CONFIG: 0x%08llx (%sable-MC6-Demotion)\n",
+ base_cpu, msr, msr & (1 << 0) ? "EN" : "DIS");
+}
+
+void process_cpuid()
+{
+ unsigned int eax, ebx, ecx, edx;
+ unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
+ unsigned int has_turbo;
+
+ eax = ebx = ecx = edx = 0;
+
+ __cpuid(0, max_level, ebx, ecx, edx);
+
+ if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
+ genuine_intel = 1;
+ else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
+ authentic_amd = 1;
+
+ if (!quiet)
+ fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
+ (char *)&ebx, (char *)&edx, (char *)&ecx);
+
+ __cpuid(1, fms, ebx, ecx, edx);
+ family = (fms >> 8) & 0xf;
+ model = (fms >> 4) & 0xf;
+ stepping = fms & 0xf;
+ if (family == 0xf)
+ family += (fms >> 20) & 0xff;
+ if (family >= 6)
+ model += ((fms >> 16) & 0xf) << 4;
+ ecx_flags = ecx;
+ edx_flags = edx;
+
+ /*
+ * check max extended function levels of CPUID.
+ * This is needed to check for invariant TSC.
+ * This check is valid for both Intel and AMD.
+ */
+ ebx = ecx = edx = 0;
+ __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
+
+ if (!quiet) {
+ fprintf(outf, "0x%x CPUID levels; 0x%x xlevels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
+ max_level, max_extended_level, family, model, stepping, family, model, stepping);
+ fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
+ ecx_flags & (1 << 0) ? "SSE3" : "-",
+ ecx_flags & (1 << 3) ? "MONITOR" : "-",
+ ecx_flags & (1 << 6) ? "SMX" : "-",
+ ecx_flags & (1 << 7) ? "EIST" : "-",
+ ecx_flags & (1 << 8) ? "TM2" : "-",
+ edx_flags & (1 << 4) ? "TSC" : "-",
+ edx_flags & (1 << 5) ? "MSR" : "-",
+ edx_flags & (1 << 22) ? "ACPI-TM" : "-",
+ edx_flags & (1 << 28) ? "HT" : "-",
+ edx_flags & (1 << 29) ? "TM" : "-");
+ }
+
+ if (!(edx_flags & (1 << 5)))
+ errx(1, "CPUID: no MSR");
+
+ if (max_extended_level >= 0x80000007) {
+
+ /*
+ * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
+ * this check is valid for both Intel and AMD
+ */
+ __cpuid(0x80000007, eax, ebx, ecx, edx);
+ has_invariant_tsc = edx & (1 << 8);
+ }
+
+ /*
+ * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0
+ * this check is valid for both Intel and AMD
+ */
+
+ __cpuid(0x6, eax, ebx, ecx, edx);
+ has_aperf = ecx & (1 << 0);
+ if (has_aperf) {
+ BIC_PRESENT(BIC_Avg_MHz);
+ BIC_PRESENT(BIC_Busy);
+ BIC_PRESENT(BIC_Bzy_MHz);
+ }
+ do_dts = eax & (1 << 0);
+ if (do_dts)
+ BIC_PRESENT(BIC_CoreTmp);
+ has_turbo = eax & (1 << 1);
+ do_ptm = eax & (1 << 6);
+ if (do_ptm)
+ BIC_PRESENT(BIC_PkgTmp);
+ has_hwp = eax & (1 << 7);
+ has_hwp_notify = eax & (1 << 8);
+ has_hwp_activity_window = eax & (1 << 9);
+ has_hwp_epp = eax & (1 << 10);
+ has_hwp_pkg = eax & (1 << 11);
+ has_epb = ecx & (1 << 3);
+
+ if (!quiet)
+ fprintf(outf, "CPUID(6): %sAPERF, %sTURBO, %sDTS, %sPTM, %sHWP, "
+ "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
+ has_aperf ? "" : "No-",
+ has_turbo ? "" : "No-",
+ do_dts ? "" : "No-",
+ do_ptm ? "" : "No-",
+ has_hwp ? "" : "No-",
+ has_hwp_notify ? "" : "No-",
+ has_hwp_activity_window ? "" : "No-",
+ has_hwp_epp ? "" : "No-",
+ has_hwp_pkg ? "" : "No-",
+ has_epb ? "" : "No-");
+
+ if (!quiet)
+ decode_misc_enable_msr();
+
+
+ if (max_level >= 0x7 && !quiet) {
+ int has_sgx;
+
+ ecx = 0;
+
+ __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
+
+ has_sgx = ebx & (1 << 2);
+ fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
+
+ if (has_sgx)
+ decode_feature_control_msr();
+ }
+
+ if (max_level >= 0x15) {
+ unsigned int eax_crystal;
+ unsigned int ebx_tsc;
+
+ /*
+ * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
+ */
+ eax_crystal = ebx_tsc = crystal_hz = edx = 0;
+ __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
+
+ if (ebx_tsc != 0) {
+
+ if (!quiet && (ebx != 0))
+ fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
+ eax_crystal, ebx_tsc, crystal_hz);
+
+ if (crystal_hz == 0)
+ switch(model) {
+ case INTEL_FAM6_SKYLAKE_MOBILE: /* SKL */
+ case INTEL_FAM6_SKYLAKE_DESKTOP: /* SKL */
+ case INTEL_FAM6_KABYLAKE_MOBILE: /* KBL */
+ case INTEL_FAM6_KABYLAKE_DESKTOP: /* KBL */
+ crystal_hz = 24000000; /* 24.0 MHz */
+ break;
+ case INTEL_FAM6_ATOM_GOLDMONT_X: /* DNV */
+ crystal_hz = 25000000; /* 25.0 MHz */
+ break;
+ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
+ crystal_hz = 19200000; /* 19.2 MHz */
+ break;
+ default:
+ crystal_hz = 0;
+ }
+
+ if (crystal_hz) {
+ tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
+ if (!quiet)
+ fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
+ tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
+ }
+ }
+ }
+ if (max_level >= 0x16) {
+ unsigned int base_mhz, max_mhz, bus_mhz, edx;
+
+ /*
+ * CPUID 16H Base MHz, Max MHz, Bus MHz
+ */
+ base_mhz = max_mhz = bus_mhz = edx = 0;
+
+ __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
+ if (!quiet)
+ fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
+ base_mhz, max_mhz, bus_mhz);
+ }
+
+ if (has_aperf)
+ aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
+
+ BIC_PRESENT(BIC_IRQ);
+ BIC_PRESENT(BIC_TSC_MHz);
+
+ if (probe_nhm_msrs(family, model)) {
+ do_nhm_platform_info = 1;
+ BIC_PRESENT(BIC_CPU_c1);
+ BIC_PRESENT(BIC_CPU_c3);
+ BIC_PRESENT(BIC_CPU_c6);
+ BIC_PRESENT(BIC_SMI);
+ }
+ do_snb_cstates = has_snb_msrs(family, model);
+
+ if (do_snb_cstates)
+ BIC_PRESENT(BIC_CPU_c7);
+
+ do_irtl_snb = has_snb_msrs(family, model);
+ if (do_snb_cstates && (pkg_cstate_limit >= PCL__2))
+ BIC_PRESENT(BIC_Pkgpc2);
+ if (pkg_cstate_limit >= PCL__3)
+ BIC_PRESENT(BIC_Pkgpc3);
+ if (pkg_cstate_limit >= PCL__6)
+ BIC_PRESENT(BIC_Pkgpc6);
+ if (do_snb_cstates && (pkg_cstate_limit >= PCL__7))
+ BIC_PRESENT(BIC_Pkgpc7);
+ if (has_slv_msrs(family, model)) {
+ BIC_NOT_PRESENT(BIC_Pkgpc2);
+ BIC_NOT_PRESENT(BIC_Pkgpc3);
+ BIC_PRESENT(BIC_Pkgpc6);
+ BIC_NOT_PRESENT(BIC_Pkgpc7);
+ BIC_PRESENT(BIC_Mod_c6);
+ use_c1_residency_msr = 1;
+ }
+ if (is_dnv(family, model)) {
+ BIC_PRESENT(BIC_CPU_c1);
+ BIC_NOT_PRESENT(BIC_CPU_c3);
+ BIC_NOT_PRESENT(BIC_Pkgpc3);
+ BIC_NOT_PRESENT(BIC_CPU_c7);
+ BIC_NOT_PRESENT(BIC_Pkgpc7);
+ use_c1_residency_msr = 1;
+ }
+ if (is_skx(family, model)) {
+ BIC_NOT_PRESENT(BIC_CPU_c3);
+ BIC_NOT_PRESENT(BIC_Pkgpc3);
+ BIC_NOT_PRESENT(BIC_CPU_c7);
+ BIC_NOT_PRESENT(BIC_Pkgpc7);
+ }
+ if (is_bdx(family, model)) {
+ BIC_NOT_PRESENT(BIC_CPU_c7);
+ BIC_NOT_PRESENT(BIC_Pkgpc7);
+ }
+ if (has_hsw_msrs(family, model)) {
+ BIC_PRESENT(BIC_Pkgpc8);
+ BIC_PRESENT(BIC_Pkgpc9);
+ BIC_PRESENT(BIC_Pkgpc10);
+ }
+ do_irtl_hsw = has_hsw_msrs(family, model);
+ if (has_skl_msrs(family, model)) {
+ BIC_PRESENT(BIC_Totl_c0);
+ BIC_PRESENT(BIC_Any_c0);
+ BIC_PRESENT(BIC_GFX_c0);
+ BIC_PRESENT(BIC_CPUGFX);
+ }
+ do_slm_cstates = is_slm(family, model);
+ do_knl_cstates = is_knl(family, model);
+ do_cnl_cstates = is_cnl(family, model);
+
+ if (!quiet)
+ decode_misc_pwr_mgmt_msr();
+
+ if (!quiet && has_slv_msrs(family, model))
+ decode_c6_demotion_policy_msr();
+
+ rapl_probe(family, model);
+ perf_limit_reasons_probe(family, model);
+ automatic_cstate_conversion_probe(family, model);
+
+ if (!quiet)
+ dump_cstate_pstate_config_info(family, model);
+
+ if (!quiet)
+ dump_sysfs_cstate_config();
+ if (!quiet)
+ dump_sysfs_pstate_config();
+
+ if (has_skl_msrs(family, model))
+ calculate_tsc_tweak();
+
+ if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
+ BIC_PRESENT(BIC_GFX_rc6);
+
+ if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
+ BIC_PRESENT(BIC_GFXMHz);
+
+ if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
+ BIC_PRESENT(BIC_CPU_LPI);
+ else
+ BIC_NOT_PRESENT(BIC_CPU_LPI);
+
+ if (!access(sys_lpi_file_sysfs, R_OK)) {
+ sys_lpi_file = sys_lpi_file_sysfs;
+ BIC_PRESENT(BIC_SYS_LPI);
+ } else if (!access(sys_lpi_file_debugfs, R_OK)) {
+ sys_lpi_file = sys_lpi_file_debugfs;
+ BIC_PRESENT(BIC_SYS_LPI);
+ } else {
+ sys_lpi_file_sysfs = NULL;
+ BIC_NOT_PRESENT(BIC_SYS_LPI);
+ }
+
+ if (!quiet)
+ decode_misc_feature_control();
+
+ return;
+}
+
+/*
+ * in /dev/cpu/ return success for names that are numbers
+ * ie. filter out ".", "..", "microcode".
+ */
+int dir_filter(const struct dirent *dirp)
+{
+ if (isdigit(dirp->d_name[0]))
+ return 1;
+ else
+ return 0;
+}
+
+int open_dev_cpu_msr(int dummy1)
+{
+ return 0;
+}
+
+void topology_probe()
+{
+ int i;
+ int max_core_id = 0;
+ int max_package_id = 0;
+ int max_siblings = 0;
+
+ /* Initialize num_cpus, max_cpu_num */
+ set_max_cpu_num();
+ topo.num_cpus = 0;
+ for_all_proc_cpus(count_cpus);
+ if (!summary_only && topo.num_cpus > 1)
+ BIC_PRESENT(BIC_CPU);
+
+ if (debug > 1)
+ fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
+
+ cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
+ if (cpus == NULL)
+ err(1, "calloc cpus");
+
+ /*
+ * Allocate and initialize cpu_present_set
+ */
+ cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1));
+ if (cpu_present_set == NULL)
+ err(3, "CPU_ALLOC");
+ cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+ CPU_ZERO_S(cpu_present_setsize, cpu_present_set);
+ for_all_proc_cpus(mark_cpu_present);
+
+ /*
+ * Validate that all cpus in cpu_subset are also in cpu_present_set
+ */
+ for (i = 0; i < CPU_SUBSET_MAXCPUS; ++i) {
+ if (CPU_ISSET_S(i, cpu_subset_size, cpu_subset))
+ if (!CPU_ISSET_S(i, cpu_present_setsize, cpu_present_set))
+ err(1, "cpu%d not present", i);
+ }
+
+ /*
+ * Allocate and initialize cpu_affinity_set
+ */
+ cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1));
+ if (cpu_affinity_set == NULL)
+ err(3, "CPU_ALLOC");
+ cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
+ CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set);
+
+ for_all_proc_cpus(init_thread_id);
+
+ /*
+ * For online cpus
+ * find max_core_id, max_package_id
+ */
+ for (i = 0; i <= topo.max_cpu_num; ++i) {
+ int siblings;
+
+ if (cpu_is_not_present(i)) {
+ if (debug > 1)
+ fprintf(outf, "cpu%d NOT PRESENT\n", i);
+ continue;
+ }
+
+ cpus[i].logical_cpu_id = i;
+
+ /* get package information */
+ cpus[i].physical_package_id = get_physical_package_id(i);
+ if (cpus[i].physical_package_id > max_package_id)
+ max_package_id = cpus[i].physical_package_id;
+
+ /* get numa node information */
+ cpus[i].physical_node_id = get_physical_node_id(&cpus[i]);
+ if (cpus[i].physical_node_id > topo.max_node_num)
+ topo.max_node_num = cpus[i].physical_node_id;
+
+ /* get core information */
+ cpus[i].physical_core_id = get_core_id(i);
+ if (cpus[i].physical_core_id > max_core_id)
+ max_core_id = cpus[i].physical_core_id;
+
+ /* get thread information */
+ siblings = get_thread_siblings(&cpus[i]);
+ if (siblings > max_siblings)
+ max_siblings = siblings;
+ if (cpus[i].thread_id == 0)
+ topo.num_cores++;
+ }
+
+ topo.cores_per_node = max_core_id + 1;
+ if (debug > 1)
+ fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
+ max_core_id, topo.cores_per_node);
+ if (!summary_only && topo.cores_per_node > 1)
+ BIC_PRESENT(BIC_Core);
+
+ topo.num_packages = max_package_id + 1;
+ if (debug > 1)
+ fprintf(outf, "max_package_id %d, sizing for %d packages\n",
+ max_package_id, topo.num_packages);
+ if (!summary_only && topo.num_packages > 1)
+ BIC_PRESENT(BIC_Package);
+
+ set_node_data();
+ if (debug > 1)
+ fprintf(outf, "nodes_per_pkg %d\n", topo.nodes_per_pkg);
+ if (!summary_only && topo.nodes_per_pkg > 1)
+ BIC_PRESENT(BIC_Node);
+
+ topo.threads_per_core = max_siblings;
+ if (debug > 1)
+ fprintf(outf, "max_siblings %d\n", max_siblings);
+
+ if (debug < 1)
+ return;
+
+ for (i = 0; i <= topo.max_cpu_num; ++i) {
+ fprintf(outf,
+ "cpu %d pkg %d node %d lnode %d core %d thread %d\n",
+ i, cpus[i].physical_package_id,
+ cpus[i].physical_node_id,
+ cpus[i].logical_node_id,
+ cpus[i].physical_core_id,
+ cpus[i].thread_id);
+ }
+
+}
+
+void
+allocate_counters(struct thread_data **t, struct core_data **c,
+ struct pkg_data **p)
+{
+ int i;
+ int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
+ topo.num_packages;
+ int num_threads = topo.threads_per_core * num_cores;
+
+ *t = calloc(num_threads, sizeof(struct thread_data));
+ if (*t == NULL)
+ goto error;
+
+ for (i = 0; i < num_threads; i++)
+ (*t)[i].cpu_id = -1;
+
+ *c = calloc(num_cores, sizeof(struct core_data));
+ if (*c == NULL)
+ goto error;
+
+ for (i = 0; i < num_cores; i++)
+ (*c)[i].core_id = -1;
+
+ *p = calloc(topo.num_packages, sizeof(struct pkg_data));
+ if (*p == NULL)
+ goto error;
+
+ for (i = 0; i < topo.num_packages; i++)
+ (*p)[i].package_id = i;
+
+ return;
+error:
+ err(1, "calloc counters");
+}
+/*
+ * init_counter()
+ *
+ * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
+ */
+void init_counter(struct thread_data *thread_base, struct core_data *core_base,
+ struct pkg_data *pkg_base, int cpu_id)
+{
+ int pkg_id = cpus[cpu_id].physical_package_id;
+ int node_id = cpus[cpu_id].logical_node_id;
+ int core_id = cpus[cpu_id].physical_core_id;
+ int thread_id = cpus[cpu_id].thread_id;
+ struct thread_data *t;
+ struct core_data *c;
+ struct pkg_data *p;
+
+
+ /* Workaround for systems where physical_node_id==-1
+ * and logical_node_id==(-1 - topo.num_cpus)
+ */
+ if (node_id < 0)
+ node_id = 0;
+
+ t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
+ c = GET_CORE(core_base, core_id, node_id, pkg_id);
+ p = GET_PKG(pkg_base, pkg_id);
+
+ t->cpu_id = cpu_id;
+ if (thread_id == 0) {
+ t->flags |= CPU_IS_FIRST_THREAD_IN_CORE;
+ if (cpu_is_first_core_in_package(cpu_id))
+ t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE;
+ }
+
+ c->core_id = core_id;
+ p->package_id = pkg_id;
+}
+
+
+int initialize_counters(int cpu_id)
+{
+ init_counter(EVEN_COUNTERS, cpu_id);
+ init_counter(ODD_COUNTERS, cpu_id);
+ return 0;
+}
+
+void allocate_output_buffer()
+{
+ output_buffer = calloc(1, (1 + topo.num_cpus) * 2048);
+ outp = output_buffer;
+ if (outp == NULL)
+ err(-1, "calloc output buffer");
+}
+void allocate_fd_percpu(void)
+{
+ fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+ if (fd_percpu == NULL)
+ err(-1, "calloc fd_percpu");
+}
+void allocate_irq_buffers(void)
+{
+ irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
+ if (irq_column_2_cpu == NULL)
+ err(-1, "calloc %d", topo.num_cpus);
+
+ irqs_per_cpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+ if (irqs_per_cpu == NULL)
+ err(-1, "calloc %d", topo.max_cpu_num + 1);
+}
+void setup_all_buffers(void)
+{
+ topology_probe();
+ allocate_irq_buffers();
+ allocate_fd_percpu();
+ allocate_counters(&thread_even, &core_even, &package_even);
+ allocate_counters(&thread_odd, &core_odd, &package_odd);
+ allocate_output_buffer();
+ for_all_proc_cpus(initialize_counters);
+}
+
+void set_base_cpu(void)
+{
+ base_cpu = sched_getcpu();
+ if (base_cpu < 0)
+ err(-ENODEV, "No valid cpus found");
+
+ if (debug > 1)
+ fprintf(outf, "base_cpu = %d\n", base_cpu);
+}
+
+void turbostat_init()
+{
+ setup_all_buffers();
+ set_base_cpu();
+ check_dev_msr();
+ check_permissions();
+ process_cpuid();
+
+
+ if (!quiet)
+ for_all_cpus(print_hwp, ODD_COUNTERS);
+
+ if (!quiet)
+ for_all_cpus(print_epb, ODD_COUNTERS);
+
+ if (!quiet)
+ for_all_cpus(print_perf_limit, ODD_COUNTERS);
+
+ if (!quiet)
+ for_all_cpus(print_rapl, ODD_COUNTERS);
+
+ for_all_cpus(set_temperature_target, ODD_COUNTERS);
+
+ if (!quiet)
+ for_all_cpus(print_thermal, ODD_COUNTERS);
+
+ if (!quiet && do_irtl_snb)
+ print_irtl();
+}
+
+int fork_it(char **argv)
+{
+ pid_t child_pid;
+ int status;
+
+ snapshot_proc_sysfs_files();
+ status = for_all_cpus(get_counters, EVEN_COUNTERS);
+ first_counter_read = 0;
+ if (status)
+ exit(status);
+ /* clear affinity side-effect of get_counters() */
+ sched_setaffinity(0, cpu_present_setsize, cpu_present_set);
+ gettimeofday(&tv_even, (struct timezone *)NULL);
+
+ child_pid = fork();
+ if (!child_pid) {
+ /* child */
+ execvp(argv[0], argv);
+ err(errno, "exec %s", argv[0]);
+ } else {
+
+ /* parent */
+ if (child_pid == -1)
+ err(1, "fork");
+
+ signal(SIGINT, SIG_IGN);
+ signal(SIGQUIT, SIG_IGN);
+ if (waitpid(child_pid, &status, 0) == -1)
+ err(status, "waitpid");
+
+ if (WIFEXITED(status))
+ status = WEXITSTATUS(status);
+ }
+ /*
+ * n.b. fork_it() does not check for errors from for_all_cpus()
+ * because re-starting is problematic when forking
+ */
+ snapshot_proc_sysfs_files();
+ for_all_cpus(get_counters, ODD_COUNTERS);
+ gettimeofday(&tv_odd, (struct timezone *)NULL);
+ timersub(&tv_odd, &tv_even, &tv_delta);
+ if (for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS))
+ fprintf(outf, "%s: Counter reset detected\n", progname);
+ else {
+ compute_average(EVEN_COUNTERS);
+ format_all_counters(EVEN_COUNTERS);
+ }
+
+ fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
+
+ flush_output_stderr();
+
+ return status;
+}
+
+int get_and_dump_counters(void)
+{
+ int status;
+
+ snapshot_proc_sysfs_files();
+ status = for_all_cpus(get_counters, ODD_COUNTERS);
+ if (status)
+ return status;
+
+ status = for_all_cpus(dump_counters, ODD_COUNTERS);
+ if (status)
+ return status;
+
+ flush_output_stdout();
+
+ return status;
+}
+
+void print_version() {
+ fprintf(outf, "turbostat version 18.07.27"
+ " - Len Brown <lenb@kernel.org>\n");
+}
+
+int add_counter(unsigned int msr_num, char *path, char *name,
+ unsigned int width, enum counter_scope scope,
+ enum counter_type type, enum counter_format format, int flags)
+{
+ struct msr_counter *msrp;
+
+ msrp = calloc(1, sizeof(struct msr_counter));
+ if (msrp == NULL) {
+ perror("calloc");
+ exit(1);
+ }
+
+ msrp->msr_num = msr_num;
+ strncpy(msrp->name, name, NAME_BYTES - 1);
+ if (path)
+ strncpy(msrp->path, path, PATH_BYTES - 1);
+ msrp->width = width;
+ msrp->type = type;
+ msrp->format = format;
+ msrp->flags = flags;
+
+ switch (scope) {
+
+ case SCOPE_CPU:
+ msrp->next = sys.tp;
+ sys.tp = msrp;
+ sys.added_thread_counters++;
+ if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
+ fprintf(stderr, "exceeded max %d added thread counters\n",
+ MAX_ADDED_COUNTERS);
+ exit(-1);
+ }
+ break;
+
+ case SCOPE_CORE:
+ msrp->next = sys.cp;
+ sys.cp = msrp;
+ sys.added_core_counters++;
+ if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
+ fprintf(stderr, "exceeded max %d added core counters\n",
+ MAX_ADDED_COUNTERS);
+ exit(-1);
+ }
+ break;
+
+ case SCOPE_PACKAGE:
+ msrp->next = sys.pp;
+ sys.pp = msrp;
+ sys.added_package_counters++;
+ if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
+ fprintf(stderr, "exceeded max %d added package counters\n",
+ MAX_ADDED_COUNTERS);
+ exit(-1);
+ }
+ break;
+ }
+
+ return 0;
+}
+
+void parse_add_command(char *add_command)
+{
+ int msr_num = 0;
+ char *path = NULL;
+ char name_buffer[NAME_BYTES] = "";
+ int width = 64;
+ int fail = 0;
+ enum counter_scope scope = SCOPE_CPU;
+ enum counter_type type = COUNTER_CYCLES;
+ enum counter_format format = FORMAT_DELTA;
+
+ while (add_command) {
+
+ if (sscanf(add_command, "msr0x%x", &msr_num) == 1)
+ goto next;
+
+ if (sscanf(add_command, "msr%d", &msr_num) == 1)
+ goto next;
+
+ if (*add_command == '/') {
+ path = add_command;
+ goto next;
+ }
+
+ if (sscanf(add_command, "u%d", &width) == 1) {
+ if ((width == 32) || (width == 64))
+ goto next;
+ width = 64;
+ }
+ if (!strncmp(add_command, "cpu", strlen("cpu"))) {
+ scope = SCOPE_CPU;
+ goto next;
+ }
+ if (!strncmp(add_command, "core", strlen("core"))) {
+ scope = SCOPE_CORE;
+ goto next;
+ }
+ if (!strncmp(add_command, "package", strlen("package"))) {
+ scope = SCOPE_PACKAGE;
+ goto next;
+ }
+ if (!strncmp(add_command, "cycles", strlen("cycles"))) {
+ type = COUNTER_CYCLES;
+ goto next;
+ }
+ if (!strncmp(add_command, "seconds", strlen("seconds"))) {
+ type = COUNTER_SECONDS;
+ goto next;
+ }
+ if (!strncmp(add_command, "usec", strlen("usec"))) {
+ type = COUNTER_USEC;
+ goto next;
+ }
+ if (!strncmp(add_command, "raw", strlen("raw"))) {
+ format = FORMAT_RAW;
+ goto next;
+ }
+ if (!strncmp(add_command, "delta", strlen("delta"))) {
+ format = FORMAT_DELTA;
+ goto next;
+ }
+ if (!strncmp(add_command, "percent", strlen("percent"))) {
+ format = FORMAT_PERCENT;
+ goto next;
+ }
+
+ if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { /* 18 < NAME_BYTES */
+ char *eos;
+
+ eos = strchr(name_buffer, ',');
+ if (eos)
+ *eos = '\0';
+ goto next;
+ }
+
+next:
+ add_command = strchr(add_command, ',');
+ if (add_command) {
+ *add_command = '\0';
+ add_command++;
+ }
+
+ }
+ if ((msr_num == 0) && (path == NULL)) {
+ fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n");
+ fail++;
+ }
+
+ /* generate default column header */
+ if (*name_buffer == '\0') {
+ if (width == 32)
+ sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
+ else
+ sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : "");
+ }
+
+ if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0))
+ fail++;
+
+ if (fail) {
+ help();
+ exit(1);
+ }
+}
+
+int is_deferred_skip(char *name)
+{
+ int i;
+
+ for (i = 0; i < deferred_skip_index; ++i)
+ if (!strcmp(name, deferred_skip_names[i]))
+ return 1;
+ return 0;
+}
+
+void probe_sysfs(void)
+{
+ char path[64];
+ char name_buf[16];
+ FILE *input;
+ int state;
+ char *sp;
+
+ if (!DO_BIC(BIC_sysfs))
+ return;
+
+ for (state = 10; state >= 0; --state) {
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
+ base_cpu, state);
+ input = fopen(path, "r");
+ if (input == NULL)
+ continue;
+ fgets(name_buf, sizeof(name_buf), input);
+
+ /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+ sp = strchr(name_buf, '-');
+ if (!sp)
+ sp = strchrnul(name_buf, '\n');
+ *sp = '%';
+ *(sp + 1) = '\0';
+
+ fclose(input);
+
+ sprintf(path, "cpuidle/state%d/time", state);
+
+ if (is_deferred_skip(name_buf))
+ continue;
+
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
+ FORMAT_PERCENT, SYSFS_PERCPU);
+ }
+
+ for (state = 10; state >= 0; --state) {
+
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
+ base_cpu, state);
+ input = fopen(path, "r");
+ if (input == NULL)
+ continue;
+ fgets(name_buf, sizeof(name_buf), input);
+ /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+ sp = strchr(name_buf, '-');
+ if (!sp)
+ sp = strchrnul(name_buf, '\n');
+ *sp = '\0';
+ fclose(input);
+
+ sprintf(path, "cpuidle/state%d/usage", state);
+
+ if (is_deferred_skip(name_buf))
+ continue;
+
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
+ FORMAT_DELTA, SYSFS_PERCPU);
+ }
+
+}
+
+
+/*
+ * parse cpuset with following syntax
+ * 1,2,4..6,8-10 and set bits in cpu_subset
+ */
+void parse_cpu_command(char *optarg)
+{
+ unsigned int start, end;
+ char *next;
+
+ if (!strcmp(optarg, "core")) {
+ if (cpu_subset)
+ goto error;
+ show_core_only++;
+ return;
+ }
+ if (!strcmp(optarg, "package")) {
+ if (cpu_subset)
+ goto error;
+ show_pkg_only++;
+ return;
+ }
+ if (show_core_only || show_pkg_only)
+ goto error;
+
+ cpu_subset = CPU_ALLOC(CPU_SUBSET_MAXCPUS);
+ if (cpu_subset == NULL)
+ err(3, "CPU_ALLOC");
+ cpu_subset_size = CPU_ALLOC_SIZE(CPU_SUBSET_MAXCPUS);
+
+ CPU_ZERO_S(cpu_subset_size, cpu_subset);
+
+ next = optarg;
+
+ while (next && *next) {
+
+ if (*next == '-') /* no negative cpu numbers */
+ goto error;
+
+ start = strtoul(next, &next, 10);
+
+ if (start >= CPU_SUBSET_MAXCPUS)
+ goto error;
+ CPU_SET_S(start, cpu_subset_size, cpu_subset);
+
+ if (*next == '\0')
+ break;
+
+ if (*next == ',') {
+ next += 1;
+ continue;
+ }
+
+ if (*next == '-') {
+ next += 1; /* start range */
+ } else if (*next == '.') {
+ next += 1;
+ if (*next == '.')
+ next += 1; /* start range */
+ else
+ goto error;
+ }
+
+ end = strtoul(next, &next, 10);
+ if (end <= start)
+ goto error;
+
+ while (++start <= end) {
+ if (start >= CPU_SUBSET_MAXCPUS)
+ goto error;
+ CPU_SET_S(start, cpu_subset_size, cpu_subset);
+ }
+
+ if (*next == ',')
+ next += 1;
+ else if (*next != '\0')
+ goto error;
+ }
+
+ return;
+
+error:
+ fprintf(stderr, "\"--cpu %s\" malformed\n", optarg);
+ help();
+ exit(-1);
+}
+
+
+void cmdline(int argc, char **argv)
+{
+ int opt;
+ int option_index = 0;
+ static struct option long_options[] = {
+ {"add", required_argument, 0, 'a'},
+ {"cpu", required_argument, 0, 'c'},
+ {"Dump", no_argument, 0, 'D'},
+ {"debug", no_argument, 0, 'd'}, /* internal, not documented */
+ {"enable", required_argument, 0, 'e'},
+ {"interval", required_argument, 0, 'i'},
+ {"num_iterations", required_argument, 0, 'n'},
+ {"help", no_argument, 0, 'h'},
+ {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help
+ {"Joules", no_argument, 0, 'J'},
+ {"list", no_argument, 0, 'l'},
+ {"out", required_argument, 0, 'o'},
+ {"quiet", no_argument, 0, 'q'},
+ {"show", required_argument, 0, 's'},
+ {"Summary", no_argument, 0, 'S'},
+ {"TCC", required_argument, 0, 'T'},
+ {"version", no_argument, 0, 'v' },
+ {0, 0, 0, 0 }
+ };
+
+ progname = argv[0];
+
+ while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
+ long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 'a':
+ parse_add_command(optarg);
+ break;
+ case 'c':
+ parse_cpu_command(optarg);
+ break;
+ case 'D':
+ dump_only++;
+ break;
+ case 'e':
+ /* --enable specified counter */
+ bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
+ break;
+ case 'd':
+ debug++;
+ ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
+ break;
+ case 'H':
+ /*
+ * --hide: do not show those specified
+ * multiple invocations simply clear more bits in enabled mask
+ */
+ bic_enabled &= ~bic_lookup(optarg, HIDE_LIST);
+ break;
+ case 'h':
+ default:
+ help();
+ exit(1);
+ case 'i':
+ {
+ double interval = strtod(optarg, NULL);
+
+ if (interval < 0.001) {
+ fprintf(outf, "interval %f seconds is too small\n",
+ interval);
+ exit(2);
+ }
+
+ interval_tv.tv_sec = interval_ts.tv_sec = interval;
+ interval_tv.tv_usec = (interval - interval_tv.tv_sec) * 1000000;
+ interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
+ }
+ break;
+ case 'J':
+ rapl_joules++;
+ break;
+ case 'l':
+ ENABLE_BIC(BIC_DISABLED_BY_DEFAULT);
+ list_header_only++;
+ quiet++;
+ break;
+ case 'o':
+ outf = fopen_or_die(optarg, "w");
+ break;
+ case 'q':
+ quiet = 1;
+ break;
+ case 'n':
+ num_iterations = strtod(optarg, NULL);
+
+ if (num_iterations <= 0) {
+ fprintf(outf, "iterations %d should be positive number\n",
+ num_iterations);
+ exit(2);
+ }
+ break;
+ case 's':
+ /*
+ * --show: show only those specified
+ * The 1st invocation will clear and replace the enabled mask
+ * subsequent invocations can add to it.
+ */
+ if (shown == 0)
+ bic_enabled = bic_lookup(optarg, SHOW_LIST);
+ else
+ bic_enabled |= bic_lookup(optarg, SHOW_LIST);
+ shown = 1;
+ break;
+ case 'S':
+ summary_only++;
+ break;
+ case 'T':
+ tcc_activation_temp_override = atoi(optarg);
+ break;
+ case 'v':
+ print_version();
+ exit(0);
+ break;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ outf = stderr;
+ cmdline(argc, argv);
+
+ if (!quiet)
+ print_version();
+
+ probe_sysfs();
+
+ turbostat_init();
+
+ /* dump counters and exit */
+ if (dump_only)
+ return get_and_dump_counters();
+
+ /* list header and exit */
+ if (list_header_only) {
+ print_header(",");
+ flush_output_stdout();
+ return 0;
+ }
+
+ /*
+ * if any params left, it must be a command to fork
+ */
+ if (argc - optind)
+ return fork_it(argv + optind);
+ else
+ turbostat_loop();
+
+ return 0;
+}
diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile
new file mode 100644
index 000000000..da781b430
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+CC = $(CROSS_COMPILE)gcc
+BUILD_OUTPUT := $(CURDIR)
+PREFIX := /usr
+DESTDIR :=
+
+ifeq ("$(origin O)", "command line")
+ BUILD_OUTPUT := $(O)
+endif
+
+x86_energy_perf_policy : x86_energy_perf_policy.c
+CFLAGS += -Wall -I../../../include
+CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
+
+%: %.c
+ @mkdir -p $(BUILD_OUTPUT)
+ $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@
+
+.PHONY : clean
+clean :
+ @rm -f $(BUILD_OUTPUT)/x86_energy_perf_policy
+
+install : x86_energy_perf_policy
+ install -d $(DESTDIR)$(PREFIX)/bin
+ install $(BUILD_OUTPUT)/x86_energy_perf_policy $(DESTDIR)$(PREFIX)/bin/x86_energy_perf_policy
+ install -d $(DESTDIR)$(PREFIX)/share/man/man8
+ install -m 644 x86_energy_perf_policy.8 $(DESTDIR)$(PREFIX)/share/man/man8
+
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
new file mode 100644
index 000000000..17db1c3af
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8
@@ -0,0 +1,213 @@
+.\" This page Copyright (C) 2010 - 2015 Len Brown <len.brown@intel.com>
+.\" Distributed under the GPL, Copyleft 1994.
+.TH X86_ENERGY_PERF_POLICY 8
+.SH NAME
+x86_energy_perf_policy \- Manage Energy vs. Performance Policy via x86 Model Specific Registers
+.SH SYNOPSIS
+.B x86_energy_perf_policy
+.RB "[ options ] [ scope ] [field \ value]"
+.br
+.RB "scope: \-\-cpu\ cpu-list | \-\-pkg\ pkg-list"
+.br
+.RB "cpu-list, pkg-list: # | #,# | #-# | all"
+.br
+.RB "field: \-\-all | \-\-epb | \-\-hwp-epp | \-\-hwp-min | \-\-hwp-max | \-\-hwp-desired"
+.br
+.RB "other: (\-\-force | \-\-hwp-enable | \-\-turbo-enable) value)"
+.br
+.RB "value: # | default | performance | balance-performance | balance-power | power"
+.SH DESCRIPTION
+\fBx86_energy_perf_policy\fP
+displays and updates energy-performance policy settings specific to
+Intel Architecture Processors. Settings are accessed via Model Specific Register (MSR)
+updates, no matter if the Linux cpufreq sub-system is enabled or not.
+
+Policy in MSR_IA32_ENERGY_PERF_BIAS (EPB)
+may affect a wide range of hardware decisions,
+such as how aggressively the hardware enters and exits CPU idle states (C-states)
+and Processor Performance States (P-states).
+This policy hint does not replace explicit OS C-state and P-state selection.
+Rather, it tells the hardware how aggressively to implement those selections.
+Further, it allows the OS to influence energy/performance trade-offs where there
+is no software interface, such as in the opportunistic "turbo-mode" P-state range.
+Note that MSR_IA32_ENERGY_PERF_BIAS is defined per CPU,
+but some implementations
+share a single MSR among all CPUs in each processor package.
+On those systems, a write to EPB on one processor will
+be visible, and will have an effect, on all CPUs
+in the same processor package.
+
+Hardware P-States (HWP) are effectively an expansion of hardware
+P-state control from the opportunistic turbo-mode P-state range
+to include the entire range of available P-states.
+On Broadwell Xeon, the initial HWP implementation, EBP influenced HWP.
+That influence was removed in subsequent generations,
+where it was moved to the
+Energy_Performance_Preference (EPP) field in
+a pair of dedicated MSRs -- MSR_IA32_HWP_REQUEST and MSR_IA32_HWP_REQUEST_PKG.
+
+EPP is the most commonly managed knob in HWP mode,
+but MSR_IA32_HWP_REQUEST also allows the user to specify
+minimum-frequency for Quality-of-Service,
+and maximum-frequency for power-capping.
+MSR_IA32_HWP_REQUEST is defined per-CPU.
+
+MSR_IA32_HWP_REQUEST_PKG has the same capability as MSR_IA32_HWP_REQUEST,
+but it can simultaneously set the default policy for all CPUs within a package.
+A bit in per-CPU MSR_IA32_HWP_REQUEST indicates whether it is
+over-ruled-by or exempt-from MSR_IA32_HWP_REQUEST_PKG.
+
+MSR_HWP_CAPABILITIES shows the default values for the fields
+in MSR_IA32_HWP_REQUEST. It is displayed when no values
+are being written.
+
+.SS SCOPE OPTIONS
+.PP
+\fB-c, --cpu\fP Operate on the MSR_IA32_HWP_REQUEST for each CPU in a CPU-list.
+The CPU-list may be comma-separated CPU numbers, with dash for range
+or the string "all". Eg. '--cpu 1,4,6-8' or '--cpu all'.
+When --cpu is used, \fB--hwp-use-pkg\fP is available, which specifies whether the per-cpu
+MSR_IA32_HWP_REQUEST should be over-ruled by MSR_IA32_HWP_REQUEST_PKG (1),
+or exempt from MSR_IA32_HWP_REQUEST_PKG (0).
+
+\fB-p, --pkg\fP Operate on the MSR_IA32_HWP_REQUEST_PKG for each package in the package-list.
+The list is a string of individual package numbers separated
+by commas, and or ranges of package numbers separated by a dash,
+or the string "all".
+For example '--pkg 1,3' or '--pkg all'
+
+.SS VALUE OPTIONS
+.PP
+.I normal | default
+Set a policy with a normal balance between performance and energy efficiency.
+The processor will tolerate minor performance compromise
+for potentially significant energy savings.
+This is a reasonable default for most desktops and servers.
+"default" is a synonym for "normal".
+.PP
+.I performance
+Set a policy for maximum performance,
+accepting no performance sacrifice for the benefit of energy efficiency.
+.PP
+.I balance-performance
+Set a policy with a high priority on performance,
+but allowing some performance loss to benefit energy efficiency.
+.PP
+.I balance-power
+Set a policy where the performance and power are balanced.
+This is the default.
+.PP
+.I power
+Set a policy where the processor can accept
+a measurable performance impact to maximize energy efficiency.
+
+.PP
+The following table shows the mapping from the value strings above to actual MSR values.
+This mapping is defined in the Linux-kernel header, msr-index.h.
+
+.nf
+VALUE STRING EPB EPP
+performance 0 0
+balance-performance 4 128
+normal, default 6 128
+balance-power 8 192
+power 15 255
+.fi
+.PP
+For MSR_IA32_HWP_REQUEST performance fields
+(--hwp-min, --hwp-max, --hwp-desired), the value option
+is in units of 100 MHz, Eg. 12 signifies 1200 MHz.
+
+.SS FIELD OPTIONS
+\fB-a, --all value-string\fP Sets all EPB and EPP and HWP limit fields to the value associated with
+the value-string. In addition, enables turbo-mode and HWP-mode, if they were previous disabled.
+Thus "--all normal" will set a system without cpufreq into a well known configuration.
+.PP
+\fB-B, --epb\fP set EPB per-core or per-package.
+See value strings in the table above.
+.PP
+\fB-d, --debug\fP debug increases verbosity. By default
+x86_energy_perf_policy is silent for updates,
+and verbose for read-only mode.
+.PP
+\fB-P, --hwp-epp\fP set HWP.EPP per-core or per-package.
+See value strings in the table above.
+.PP
+\fB-m, --hwp-min\fP request HWP to not go below the specified core/bus ratio.
+The "default" is the value found in IA32_HWP_CAPABILITIES.min.
+.PP
+\fB-M, --hwp-max\fP request HWP not exceed a the specified core/bus ratio.
+The "default" is the value found in IA32_HWP_CAPABILITIES.max.
+.PP
+\fB-D, --hwp-desired\fP request HWP 'desired' frequency.
+The "normal" setting is 0, which
+corresponds to 'full autonomous' HWP control.
+Non-zero performance values request a specific performance
+level on this processor, specified in multiples of 100 MHz.
+.PP
+\fB-w, --hwp-window\fP specify integer number of microsec
+in the sliding window that HWP uses to maintain average frequency.
+This parameter is meaningful only when the "desired" field above is non-zero.
+Default is 0, allowing the HW to choose.
+.SH OTHER OPTIONS
+.PP
+\fB-f, --force\fP writes the specified values without bounds checking.
+.PP
+\fB-U, --hwp-use-pkg\fP (0 | 1), when used in conjunction with --cpu,
+indicates whether the per-CPU MSR_IA32_HWP_REQUEST should be overruled (1)
+or exempt (0) from per-Package MSR_IA32_HWP_REQUEST_PKG settings.
+The default is exempt.
+.PP
+\fB-H, --hwp-enable\fP enable HardWare-P-state (HWP) mode. Once enabled, system RESET is required to disable HWP mode.
+.PP
+\fB-t, --turbo-enable\fP enable (1) or disable (0) turbo mode.
+.PP
+\fB-v, --version\fP print version and exit.
+.PP
+If no request to change policy is made,
+the default behavior is to read
+and display the current system state,
+including the default capabilities.
+.SH WARNING
+.PP
+This utility writes directly to Model Specific Registers.
+There is no locking or coordination should this utility
+be used to modify HWP limit fields at the same time that
+intel_pstate's sysfs attributes access the same MSRs.
+.PP
+Note that --hwp-desired and --hwp-window are considered experimental.
+Future versions of Linux reserve the right to access these
+fields internally -- potentially conflicting with user-space access.
+.SH EXAMPLE
+.nf
+# sudo x86_energy_perf_policy
+cpu0: EPB 6
+cpu0: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu0: HWP_CAP: low 1 eff 8 guar 27 high 35
+cpu1: EPB 6
+cpu1: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu1: HWP_CAP: low 1 eff 8 guar 27 high 35
+cpu2: EPB 6
+cpu2: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu2: HWP_CAP: low 1 eff 8 guar 27 high 35
+cpu3: EPB 6
+cpu3: HWP_REQ: min 6 max 35 des 0 epp 128 window 0x0 (0*10^0us) use_pkg 0
+cpu3: HWP_CAP: low 1 eff 8 guar 27 high 35
+.fi
+.SH NOTES
+.B "x86_energy_perf_policy"
+runs only as root.
+.SH FILES
+.ta
+.nf
+/dev/cpu/*/msr
+.fi
+.SH "SEE ALSO"
+.nf
+msr(4)
+Intel(R) 64 and IA-32 Architectures Software Developer's Manual
+.fi
+.PP
+.SH AUTHORS
+.nf
+Len Brown
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
new file mode 100644
index 000000000..2aba622d1
--- /dev/null
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -0,0 +1,1430 @@
+/*
+ * x86_energy_perf_policy -- set the energy versus performance
+ * policy preference bias on recent X86 processors.
+ */
+/*
+ * Copyright (c) 2010 - 2017 Intel Corporation.
+ * Len Brown <len.brown@intel.com>
+ *
+ * This program is released under GPL v2
+ */
+
+#define _GNU_SOURCE
+#include MSRHEADER
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sched.h>
+#include <sys/stat.h>
+#include <sys/resource.h>
+#include <getopt.h>
+#include <err.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <cpuid.h>
+#include <errno.h>
+
+#define OPTARG_NORMAL (INT_MAX - 1)
+#define OPTARG_POWER (INT_MAX - 2)
+#define OPTARG_BALANCE_POWER (INT_MAX - 3)
+#define OPTARG_BALANCE_PERFORMANCE (INT_MAX - 4)
+#define OPTARG_PERFORMANCE (INT_MAX - 5)
+
+struct msr_hwp_cap {
+ unsigned char highest;
+ unsigned char guaranteed;
+ unsigned char efficient;
+ unsigned char lowest;
+};
+
+struct msr_hwp_request {
+ unsigned char hwp_min;
+ unsigned char hwp_max;
+ unsigned char hwp_desired;
+ unsigned char hwp_epp;
+ unsigned int hwp_window;
+ unsigned char hwp_use_pkg;
+} req_update;
+
+unsigned int debug;
+unsigned int verbose;
+unsigned int force;
+char *progname;
+int base_cpu;
+unsigned char update_epb;
+unsigned long long new_epb;
+unsigned char turbo_is_enabled;
+unsigned char update_turbo;
+unsigned char turbo_update_value;
+unsigned char update_hwp_epp;
+unsigned char update_hwp_min;
+unsigned char update_hwp_max;
+unsigned char update_hwp_desired;
+unsigned char update_hwp_window;
+unsigned char update_hwp_use_pkg;
+unsigned char update_hwp_enable;
+#define hwp_update_enabled() (update_hwp_enable | update_hwp_epp | update_hwp_max | update_hwp_min | update_hwp_desired | update_hwp_window | update_hwp_use_pkg)
+int max_cpu_num;
+int max_pkg_num;
+#define MAX_PACKAGES 64
+unsigned int first_cpu_in_pkg[MAX_PACKAGES];
+unsigned long long pkg_present_set;
+unsigned long long pkg_selected_set;
+cpu_set_t *cpu_present_set;
+cpu_set_t *cpu_selected_set;
+int genuine_intel;
+
+size_t cpu_setsize;
+
+char *proc_stat = "/proc/stat";
+
+unsigned int has_epb; /* MSR_IA32_ENERGY_PERF_BIAS */
+unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
+ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
+unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
+unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
+unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
+unsigned int has_hwp_request_pkg; /* IA32_HWP_REQUEST_PKG */
+
+unsigned int bdx_highest_ratio;
+
+/*
+ * maintain compatibility with original implementation, but don't document it:
+ */
+void usage(void)
+{
+ fprintf(stderr, "%s [options] [scope][field value]\n", progname);
+ fprintf(stderr, "scope: --cpu cpu-list [--hwp-use-pkg #] | --pkg pkg-list\n");
+ fprintf(stderr, "field: --all | --epb | --hwp-epp | --hwp-min | --hwp-max | --hwp-desired\n");
+ fprintf(stderr, "other: --hwp-enable | --turbo-enable (0 | 1) | --help | --force\n");
+ fprintf(stderr,
+ "value: ( # | \"normal\" | \"performance\" | \"balance-performance\" | \"balance-power\"| \"power\")\n");
+ fprintf(stderr, "--hwp-window usec\n");
+
+ fprintf(stderr, "Specify only Energy Performance BIAS (legacy usage):\n");
+ fprintf(stderr, "%s: [-c cpu] [-v] (-r | policy-value )\n", progname);
+
+ exit(1);
+}
+
+/*
+ * If bdx_highest_ratio is set,
+ * then we must translate between MSR format and simple ratio
+ * used on the cmdline.
+ */
+int ratio_2_msr_perf(int ratio)
+{
+ int msr_perf;
+
+ if (!bdx_highest_ratio)
+ return ratio;
+
+ msr_perf = ratio * 255 / bdx_highest_ratio;
+
+ if (debug)
+ fprintf(stderr, "%d = ratio_to_msr_perf(%d)\n", msr_perf, ratio);
+
+ return msr_perf;
+}
+int msr_perf_2_ratio(int msr_perf)
+{
+ int ratio;
+ double d;
+
+ if (!bdx_highest_ratio)
+ return msr_perf;
+
+ d = (double)msr_perf * (double) bdx_highest_ratio / 255.0;
+ d = d + 0.5; /* round */
+ ratio = (int)d;
+
+ if (debug)
+ fprintf(stderr, "%d = msr_perf_ratio(%d) {%f}\n", ratio, msr_perf, d);
+
+ return ratio;
+}
+int parse_cmdline_epb(int i)
+{
+ if (!has_epb)
+ errx(1, "EPB not enabled on this platform");
+
+ update_epb = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ return ENERGY_PERF_BIAS_POWERSAVE;
+ case OPTARG_BALANCE_POWER:
+ return ENERGY_PERF_BIAS_BALANCE_POWERSAVE;
+ case OPTARG_NORMAL:
+ return ENERGY_PERF_BIAS_NORMAL;
+ case OPTARG_BALANCE_PERFORMANCE:
+ return ENERGY_PERF_BIAS_BALANCE_PERFORMANCE;
+ case OPTARG_PERFORMANCE:
+ return ENERGY_PERF_BIAS_PERFORMANCE;
+ }
+ if (i < 0 || i > ENERGY_PERF_BIAS_POWERSAVE)
+ errx(1, "--epb must be from 0 to 15");
+ return i;
+}
+
+#define HWP_CAP_LOWEST 0
+#define HWP_CAP_HIGHEST 255
+
+/*
+ * "performance" changes hwp_min to cap.highest
+ * All others leave it at cap.lowest
+ */
+int parse_cmdline_hwp_min(int i)
+{
+ update_hwp_min = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ case OPTARG_BALANCE_POWER:
+ case OPTARG_NORMAL:
+ case OPTARG_BALANCE_PERFORMANCE:
+ return HWP_CAP_LOWEST;
+ case OPTARG_PERFORMANCE:
+ return HWP_CAP_HIGHEST;
+ }
+ return i;
+}
+/*
+ * "power" changes hwp_max to cap.lowest
+ * All others leave it at cap.highest
+ */
+int parse_cmdline_hwp_max(int i)
+{
+ update_hwp_max = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ return HWP_CAP_LOWEST;
+ case OPTARG_NORMAL:
+ case OPTARG_BALANCE_POWER:
+ case OPTARG_BALANCE_PERFORMANCE:
+ case OPTARG_PERFORMANCE:
+ return HWP_CAP_HIGHEST;
+ }
+ return i;
+}
+/*
+ * for --hwp-des, all strings leave it in autonomous mode
+ * If you want to change it, you need to explicitly pick a value
+ */
+int parse_cmdline_hwp_desired(int i)
+{
+ update_hwp_desired = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ case OPTARG_BALANCE_POWER:
+ case OPTARG_BALANCE_PERFORMANCE:
+ case OPTARG_NORMAL:
+ case OPTARG_PERFORMANCE:
+ return 0; /* autonomous */
+ }
+ return i;
+}
+
+int parse_cmdline_hwp_window(int i)
+{
+ unsigned int exponent;
+
+ update_hwp_window = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ case OPTARG_BALANCE_POWER:
+ case OPTARG_NORMAL:
+ case OPTARG_BALANCE_PERFORMANCE:
+ case OPTARG_PERFORMANCE:
+ return 0;
+ }
+ if (i < 0 || i > 1270000000) {
+ fprintf(stderr, "--hwp-window: 0 for auto; 1 - 1270000000 usec for window duration\n");
+ usage();
+ }
+ for (exponent = 0; ; ++exponent) {
+ if (debug)
+ printf("%d 10^%d\n", i, exponent);
+
+ if (i <= 127)
+ break;
+
+ i = i / 10;
+ }
+ if (debug)
+ fprintf(stderr, "%d*10^%d: 0x%x\n", i, exponent, (exponent << 7) | i);
+
+ return (exponent << 7) | i;
+}
+int parse_cmdline_hwp_epp(int i)
+{
+ update_hwp_epp = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ return HWP_EPP_POWERSAVE;
+ case OPTARG_BALANCE_POWER:
+ return HWP_EPP_BALANCE_POWERSAVE;
+ case OPTARG_NORMAL:
+ case OPTARG_BALANCE_PERFORMANCE:
+ return HWP_EPP_BALANCE_PERFORMANCE;
+ case OPTARG_PERFORMANCE:
+ return HWP_EPP_PERFORMANCE;
+ }
+ if (i < 0 || i > 0xff) {
+ fprintf(stderr, "--hwp-epp must be from 0 to 0xff\n");
+ usage();
+ }
+ return i;
+}
+int parse_cmdline_turbo(int i)
+{
+ update_turbo = 1;
+
+ switch (i) {
+ case OPTARG_POWER:
+ return 0;
+ case OPTARG_NORMAL:
+ case OPTARG_BALANCE_POWER:
+ case OPTARG_BALANCE_PERFORMANCE:
+ case OPTARG_PERFORMANCE:
+ return 1;
+ }
+ if (i < 0 || i > 1) {
+ fprintf(stderr, "--turbo-enable: 1 to enable, 0 to disable\n");
+ usage();
+ }
+ return i;
+}
+
+int parse_optarg_string(char *s)
+{
+ int i;
+ char *endptr;
+
+ if (!strncmp(s, "default", 7))
+ return OPTARG_NORMAL;
+
+ if (!strncmp(s, "normal", 6))
+ return OPTARG_NORMAL;
+
+ if (!strncmp(s, "power", 9))
+ return OPTARG_POWER;
+
+ if (!strncmp(s, "balance-power", 17))
+ return OPTARG_BALANCE_POWER;
+
+ if (!strncmp(s, "balance-performance", 19))
+ return OPTARG_BALANCE_PERFORMANCE;
+
+ if (!strncmp(s, "performance", 11))
+ return OPTARG_PERFORMANCE;
+
+ i = strtol(s, &endptr, 0);
+ if (s == endptr) {
+ fprintf(stderr, "no digits in \"%s\"\n", s);
+ usage();
+ }
+ if (i == LONG_MIN || i == LONG_MAX)
+ errx(-1, "%s", s);
+
+ if (i > 0xFF)
+ errx(-1, "%d (0x%x) must be < 256", i, i);
+
+ if (i < 0)
+ errx(-1, "%d (0x%x) must be >= 0", i, i);
+ return i;
+}
+
+void parse_cmdline_all(char *s)
+{
+ force++;
+ update_hwp_enable = 1;
+ req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(s));
+ req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(s));
+ req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(s));
+ if (has_epb)
+ new_epb = parse_cmdline_epb(parse_optarg_string(s));
+ turbo_update_value = parse_cmdline_turbo(parse_optarg_string(s));
+ req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(s));
+ req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(s));
+}
+
+void validate_cpu_selected_set(void)
+{
+ int cpu;
+
+ if (CPU_COUNT_S(cpu_setsize, cpu_selected_set) == 0)
+ errx(0, "no CPUs requested");
+
+ for (cpu = 0; cpu <= max_cpu_num; ++cpu) {
+ if (CPU_ISSET_S(cpu, cpu_setsize, cpu_selected_set))
+ if (!CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
+ errx(1, "Requested cpu% is not present", cpu);
+ }
+}
+
+void parse_cmdline_cpu(char *s)
+{
+ char *startp, *endp;
+ int cpu = 0;
+
+ if (pkg_selected_set) {
+ usage();
+ errx(1, "--cpu | --pkg");
+ }
+ cpu_selected_set = CPU_ALLOC((max_cpu_num + 1));
+ if (cpu_selected_set == NULL)
+ err(1, "cpu_selected_set");
+ CPU_ZERO_S(cpu_setsize, cpu_selected_set);
+
+ for (startp = s; startp && *startp;) {
+
+ if (*startp == ',') {
+ startp++;
+ continue;
+ }
+
+ if (*startp == '-') {
+ int end_cpu;
+
+ startp++;
+ end_cpu = strtol(startp, &endp, 10);
+ if (startp == endp)
+ continue;
+
+ while (cpu <= end_cpu) {
+ if (cpu > max_cpu_num)
+ errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num);
+ CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+ cpu++;
+ }
+ startp = endp;
+ continue;
+ }
+
+ if (strncmp(startp, "all", 3) == 0) {
+ for (cpu = 0; cpu <= max_cpu_num; cpu += 1) {
+ if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
+ CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+ }
+ startp += 3;
+ if (*startp == 0)
+ break;
+ }
+ /* "--cpu even" is not documented */
+ if (strncmp(startp, "even", 4) == 0) {
+ for (cpu = 0; cpu <= max_cpu_num; cpu += 2) {
+ if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
+ CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+ }
+ startp += 4;
+ if (*startp == 0)
+ break;
+ }
+
+ /* "--cpu odd" is not documented */
+ if (strncmp(startp, "odd", 3) == 0) {
+ for (cpu = 1; cpu <= max_cpu_num; cpu += 2) {
+ if (CPU_ISSET_S(cpu, cpu_setsize, cpu_present_set))
+ CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+ }
+ startp += 3;
+ if (*startp == 0)
+ break;
+ }
+
+ cpu = strtol(startp, &endp, 10);
+ if (startp == endp)
+ errx(1, "--cpu cpu-set: confused by '%s'", startp);
+ if (cpu > max_cpu_num)
+ errx(1, "Requested cpu%d exceeds max cpu%d", cpu, max_cpu_num);
+ CPU_SET_S(cpu, cpu_setsize, cpu_selected_set);
+ startp = endp;
+ }
+
+ validate_cpu_selected_set();
+
+}
+
+void parse_cmdline_pkg(char *s)
+{
+ char *startp, *endp;
+ int pkg = 0;
+
+ if (cpu_selected_set) {
+ usage();
+ errx(1, "--pkg | --cpu");
+ }
+ pkg_selected_set = 0;
+
+ for (startp = s; startp && *startp;) {
+
+ if (*startp == ',') {
+ startp++;
+ continue;
+ }
+
+ if (*startp == '-') {
+ int end_pkg;
+
+ startp++;
+ end_pkg = strtol(startp, &endp, 10);
+ if (startp == endp)
+ continue;
+
+ while (pkg <= end_pkg) {
+ if (pkg > max_pkg_num)
+ errx(1, "Requested pkg%d exceeds max pkg%d", pkg, max_pkg_num);
+ pkg_selected_set |= 1 << pkg;
+ pkg++;
+ }
+ startp = endp;
+ continue;
+ }
+
+ if (strncmp(startp, "all", 3) == 0) {
+ pkg_selected_set = pkg_present_set;
+ return;
+ }
+
+ pkg = strtol(startp, &endp, 10);
+ if (pkg > max_pkg_num)
+ errx(1, "Requested pkg%d Exceeds max pkg%d", pkg, max_pkg_num);
+ pkg_selected_set |= 1 << pkg;
+ startp = endp;
+ }
+}
+
+void for_packages(unsigned long long pkg_set, int (func)(int))
+{
+ int pkg_num;
+
+ for (pkg_num = 0; pkg_num <= max_pkg_num; ++pkg_num) {
+ if (pkg_set & (1UL << pkg_num))
+ func(pkg_num);
+ }
+}
+
+void print_version(void)
+{
+ printf("x86_energy_perf_policy 17.05.11 (C) Len Brown <len.brown@intel.com>\n");
+}
+
+void cmdline(int argc, char **argv)
+{
+ int opt;
+ int option_index = 0;
+
+ static struct option long_options[] = {
+ {"all", required_argument, 0, 'a'},
+ {"cpu", required_argument, 0, 'c'},
+ {"pkg", required_argument, 0, 'p'},
+ {"debug", no_argument, 0, 'd'},
+ {"hwp-desired", required_argument, 0, 'D'},
+ {"epb", required_argument, 0, 'B'},
+ {"force", no_argument, 0, 'f'},
+ {"hwp-enable", no_argument, 0, 'e'},
+ {"help", no_argument, 0, 'h'},
+ {"hwp-epp", required_argument, 0, 'P'},
+ {"hwp-min", required_argument, 0, 'm'},
+ {"hwp-max", required_argument, 0, 'M'},
+ {"read", no_argument, 0, 'r'},
+ {"turbo-enable", required_argument, 0, 't'},
+ {"hwp-use-pkg", required_argument, 0, 'u'},
+ {"version", no_argument, 0, 'v'},
+ {"hwp-window", required_argument, 0, 'w'},
+ {0, 0, 0, 0 }
+ };
+
+ progname = argv[0];
+
+ while ((opt = getopt_long_only(argc, argv, "+a:c:dD:E:e:f:m:M:rt:u:vw:",
+ long_options, &option_index)) != -1) {
+ switch (opt) {
+ case 'a':
+ parse_cmdline_all(optarg);
+ break;
+ case 'B':
+ new_epb = parse_cmdline_epb(parse_optarg_string(optarg));
+ break;
+ case 'c':
+ parse_cmdline_cpu(optarg);
+ break;
+ case 'e':
+ update_hwp_enable = 1;
+ break;
+ case 'h':
+ usage();
+ break;
+ case 'd':
+ debug++;
+ verbose++;
+ break;
+ case 'f':
+ force++;
+ break;
+ case 'D':
+ req_update.hwp_desired = parse_cmdline_hwp_desired(parse_optarg_string(optarg));
+ break;
+ case 'm':
+ req_update.hwp_min = parse_cmdline_hwp_min(parse_optarg_string(optarg));
+ break;
+ case 'M':
+ req_update.hwp_max = parse_cmdline_hwp_max(parse_optarg_string(optarg));
+ break;
+ case 'p':
+ parse_cmdline_pkg(optarg);
+ break;
+ case 'P':
+ req_update.hwp_epp = parse_cmdline_hwp_epp(parse_optarg_string(optarg));
+ break;
+ case 'r':
+ /* v1 used -r to specify read-only mode, now the default */
+ break;
+ case 't':
+ turbo_update_value = parse_cmdline_turbo(parse_optarg_string(optarg));
+ break;
+ case 'u':
+ update_hwp_use_pkg++;
+ if (atoi(optarg) == 0)
+ req_update.hwp_use_pkg = 0;
+ else
+ req_update.hwp_use_pkg = 1;
+ break;
+ case 'v':
+ print_version();
+ exit(0);
+ break;
+ case 'w':
+ req_update.hwp_window = parse_cmdline_hwp_window(parse_optarg_string(optarg));
+ break;
+ default:
+ usage();
+ }
+ }
+ /*
+ * v1 allowed "performance"|"normal"|"power" with no policy specifier
+ * to update BIAS. Continue to support that, even though no longer documented.
+ */
+ if (argc == optind + 1)
+ new_epb = parse_cmdline_epb(parse_optarg_string(argv[optind]));
+
+ if (argc > optind + 1) {
+ fprintf(stderr, "stray parameter '%s'\n", argv[optind + 1]);
+ usage();
+ }
+}
+
+
+int get_msr(int cpu, int offset, unsigned long long *msr)
+{
+ int retval;
+ char pathname[32];
+ int fd;
+
+ sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+ fd = open(pathname, O_RDONLY);
+ if (fd < 0)
+ err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+
+ retval = pread(fd, msr, sizeof(*msr), offset);
+ if (retval != sizeof(*msr))
+ err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset);
+
+ if (debug > 1)
+ fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr);
+
+ close(fd);
+ return 0;
+}
+
+int put_msr(int cpu, int offset, unsigned long long new_msr)
+{
+ char pathname[32];
+ int retval;
+ int fd;
+
+ sprintf(pathname, "/dev/cpu/%d/msr", cpu);
+ fd = open(pathname, O_RDWR);
+ if (fd < 0)
+ err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
+
+ retval = pwrite(fd, &new_msr, sizeof(new_msr), offset);
+ if (retval != sizeof(new_msr))
+ err(-2, "pwrite(cpu%d, offset 0x%x, 0x%llx) = %d", cpu, offset, new_msr, retval);
+
+ close(fd);
+
+ if (debug > 1)
+ fprintf(stderr, "put_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, new_msr);
+
+ return 0;
+}
+
+void print_hwp_cap(int cpu, struct msr_hwp_cap *cap, char *str)
+{
+ if (cpu != -1)
+ printf("cpu%d: ", cpu);
+
+ printf("HWP_CAP: low %d eff %d guar %d high %d\n",
+ cap->lowest, cap->efficient, cap->guaranteed, cap->highest);
+}
+void read_hwp_cap(int cpu, struct msr_hwp_cap *cap, unsigned int msr_offset)
+{
+ unsigned long long msr;
+
+ get_msr(cpu, msr_offset, &msr);
+
+ cap->highest = msr_perf_2_ratio(HWP_HIGHEST_PERF(msr));
+ cap->guaranteed = msr_perf_2_ratio(HWP_GUARANTEED_PERF(msr));
+ cap->efficient = msr_perf_2_ratio(HWP_MOSTEFFICIENT_PERF(msr));
+ cap->lowest = msr_perf_2_ratio(HWP_LOWEST_PERF(msr));
+}
+
+void print_hwp_request(int cpu, struct msr_hwp_request *h, char *str)
+{
+ if (cpu != -1)
+ printf("cpu%d: ", cpu);
+
+ if (str)
+ printf("%s", str);
+
+ printf("HWP_REQ: min %d max %d des %d epp %d window 0x%x (%d*10^%dus) use_pkg %d\n",
+ h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp,
+ h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7, h->hwp_use_pkg);
+}
+void print_hwp_request_pkg(int pkg, struct msr_hwp_request *h, char *str)
+{
+ printf("pkg%d: ", pkg);
+
+ if (str)
+ printf("%s", str);
+
+ printf("HWP_REQ_PKG: min %d max %d des %d epp %d window 0x%x (%d*10^%dus)\n",
+ h->hwp_min, h->hwp_max, h->hwp_desired, h->hwp_epp,
+ h->hwp_window, h->hwp_window & 0x7F, (h->hwp_window >> 7) & 0x7);
+}
+void read_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
+{
+ unsigned long long msr;
+
+ get_msr(cpu, msr_offset, &msr);
+
+ hwp_req->hwp_min = msr_perf_2_ratio((((msr) >> 0) & 0xff));
+ hwp_req->hwp_max = msr_perf_2_ratio((((msr) >> 8) & 0xff));
+ hwp_req->hwp_desired = msr_perf_2_ratio((((msr) >> 16) & 0xff));
+ hwp_req->hwp_epp = (((msr) >> 24) & 0xff);
+ hwp_req->hwp_window = (((msr) >> 32) & 0x3ff);
+ hwp_req->hwp_use_pkg = (((msr) >> 42) & 0x1);
+}
+
+void write_hwp_request(int cpu, struct msr_hwp_request *hwp_req, unsigned int msr_offset)
+{
+ unsigned long long msr = 0;
+
+ if (debug > 1)
+ printf("cpu%d: requesting min %d max %d des %d epp %d window 0x%0x use_pkg %d\n",
+ cpu, hwp_req->hwp_min, hwp_req->hwp_max,
+ hwp_req->hwp_desired, hwp_req->hwp_epp,
+ hwp_req->hwp_window, hwp_req->hwp_use_pkg);
+
+ msr |= HWP_MIN_PERF(ratio_2_msr_perf(hwp_req->hwp_min));
+ msr |= HWP_MAX_PERF(ratio_2_msr_perf(hwp_req->hwp_max));
+ msr |= HWP_DESIRED_PERF(ratio_2_msr_perf(hwp_req->hwp_desired));
+ msr |= HWP_ENERGY_PERF_PREFERENCE(hwp_req->hwp_epp);
+ msr |= HWP_ACTIVITY_WINDOW(hwp_req->hwp_window);
+ msr |= HWP_PACKAGE_CONTROL(hwp_req->hwp_use_pkg);
+
+ put_msr(cpu, msr_offset, msr);
+}
+
+int print_cpu_msrs(int cpu)
+{
+ unsigned long long msr;
+ struct msr_hwp_request req;
+ struct msr_hwp_cap cap;
+
+ if (has_epb) {
+ get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
+
+ printf("cpu%d: EPB %u\n", cpu, (unsigned int) msr);
+ }
+
+ if (!has_hwp)
+ return 0;
+
+ read_hwp_request(cpu, &req, MSR_HWP_REQUEST);
+ print_hwp_request(cpu, &req, "");
+
+ read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
+ print_hwp_cap(cpu, &cap, "");
+
+ return 0;
+}
+
+int print_pkg_msrs(int pkg)
+{
+ struct msr_hwp_request req;
+ unsigned long long msr;
+
+ if (!has_hwp)
+ return 0;
+
+ read_hwp_request(first_cpu_in_pkg[pkg], &req, MSR_HWP_REQUEST_PKG);
+ print_hwp_request_pkg(pkg, &req, "");
+
+ if (has_hwp_notify) {
+ get_msr(first_cpu_in_pkg[pkg], MSR_HWP_INTERRUPT, &msr);
+ fprintf(stderr,
+ "pkg%d: MSR_HWP_INTERRUPT: 0x%08llx (Excursion_Min-%sabled, Guaranteed_Perf_Change-%sabled)\n",
+ pkg, msr,
+ ((msr) & 0x2) ? "EN" : "Dis",
+ ((msr) & 0x1) ? "EN" : "Dis");
+ }
+ get_msr(first_cpu_in_pkg[pkg], MSR_HWP_STATUS, &msr);
+ fprintf(stderr,
+ "pkg%d: MSR_HWP_STATUS: 0x%08llx (%sExcursion_Min, %sGuaranteed_Perf_Change)\n",
+ pkg, msr,
+ ((msr) & 0x4) ? "" : "No-",
+ ((msr) & 0x1) ? "" : "No-");
+
+ return 0;
+}
+
+/*
+ * Assumption: All HWP systems have 100 MHz bus clock
+ */
+int ratio_2_sysfs_khz(int ratio)
+{
+ int bclk_khz = 100 * 1000; /* 100,000 KHz = 100 MHz */
+
+ return ratio * bclk_khz;
+}
+/*
+ * If HWP is enabled and cpufreq sysfs attribtes are present,
+ * then update sysfs, so that it will not become
+ * stale when we write to MSRs.
+ * (intel_pstate's max_perf_pct and min_perf_pct will follow cpufreq,
+ * so we don't have to touch that.)
+ */
+void update_cpufreq_scaling_freq(int is_max, int cpu, unsigned int ratio)
+{
+ char pathname[64];
+ FILE *fp;
+ int retval;
+ int khz;
+
+ sprintf(pathname, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_%s_freq",
+ cpu, is_max ? "max" : "min");
+
+ fp = fopen(pathname, "w");
+ if (!fp) {
+ if (debug)
+ perror(pathname);
+ return;
+ }
+
+ khz = ratio_2_sysfs_khz(ratio);
+ retval = fprintf(fp, "%d", khz);
+ if (retval < 0)
+ if (debug)
+ perror("fprintf");
+ if (debug)
+ printf("echo %d > %s\n", khz, pathname);
+
+ fclose(fp);
+}
+
+/*
+ * We update all sysfs before updating any MSRs because of
+ * bugs in cpufreq/intel_pstate where the sysfs writes
+ * for a CPU may change the min/max values on other CPUS.
+ */
+
+int update_sysfs(int cpu)
+{
+ if (!has_hwp)
+ return 0;
+
+ if (!hwp_update_enabled())
+ return 0;
+
+ if (access("/sys/devices/system/cpu/cpu0/cpufreq", F_OK))
+ return 0;
+
+ if (update_hwp_min)
+ update_cpufreq_scaling_freq(0, cpu, req_update.hwp_min);
+
+ if (update_hwp_max)
+ update_cpufreq_scaling_freq(1, cpu, req_update.hwp_max);
+
+ return 0;
+}
+
+int verify_hwp_req_self_consistency(int cpu, struct msr_hwp_request *req)
+{
+ /* fail if min > max requested */
+ if (req->hwp_min > req->hwp_max) {
+ errx(1, "cpu%d: requested hwp-min %d > hwp_max %d",
+ cpu, req->hwp_min, req->hwp_max);
+ }
+
+ /* fail if desired > max requestd */
+ if (req->hwp_desired && (req->hwp_desired > req->hwp_max)) {
+ errx(1, "cpu%d: requested hwp-desired %d > hwp_max %d",
+ cpu, req->hwp_desired, req->hwp_max);
+ }
+ /* fail if desired < min requestd */
+ if (req->hwp_desired && (req->hwp_desired < req->hwp_min)) {
+ errx(1, "cpu%d: requested hwp-desired %d < requested hwp_min %d",
+ cpu, req->hwp_desired, req->hwp_min);
+ }
+
+ return 0;
+}
+
+int check_hwp_request_v_hwp_capabilities(int cpu, struct msr_hwp_request *req, struct msr_hwp_cap *cap)
+{
+ if (update_hwp_max) {
+ if (req->hwp_max > cap->highest)
+ errx(1, "cpu%d: requested max %d > capabilities highest %d, use --force?",
+ cpu, req->hwp_max, cap->highest);
+ if (req->hwp_max < cap->lowest)
+ errx(1, "cpu%d: requested max %d < capabilities lowest %d, use --force?",
+ cpu, req->hwp_max, cap->lowest);
+ }
+
+ if (update_hwp_min) {
+ if (req->hwp_min > cap->highest)
+ errx(1, "cpu%d: requested min %d > capabilities highest %d, use --force?",
+ cpu, req->hwp_min, cap->highest);
+ if (req->hwp_min < cap->lowest)
+ errx(1, "cpu%d: requested min %d < capabilities lowest %d, use --force?",
+ cpu, req->hwp_min, cap->lowest);
+ }
+
+ if (update_hwp_min && update_hwp_max && (req->hwp_min > req->hwp_max))
+ errx(1, "cpu%d: requested min %d > requested max %d",
+ cpu, req->hwp_min, req->hwp_max);
+
+ if (update_hwp_desired && req->hwp_desired) {
+ if (req->hwp_desired > req->hwp_max)
+ errx(1, "cpu%d: requested desired %d > requested max %d, use --force?",
+ cpu, req->hwp_desired, req->hwp_max);
+ if (req->hwp_desired < req->hwp_min)
+ errx(1, "cpu%d: requested desired %d < requested min %d, use --force?",
+ cpu, req->hwp_desired, req->hwp_min);
+ if (req->hwp_desired < cap->lowest)
+ errx(1, "cpu%d: requested desired %d < capabilities lowest %d, use --force?",
+ cpu, req->hwp_desired, cap->lowest);
+ if (req->hwp_desired > cap->highest)
+ errx(1, "cpu%d: requested desired %d > capabilities highest %d, use --force?",
+ cpu, req->hwp_desired, cap->highest);
+ }
+
+ return 0;
+}
+
+int update_hwp_request(int cpu)
+{
+ struct msr_hwp_request req;
+ struct msr_hwp_cap cap;
+
+ int msr_offset = MSR_HWP_REQUEST;
+
+ read_hwp_request(cpu, &req, msr_offset);
+ if (debug)
+ print_hwp_request(cpu, &req, "old: ");
+
+ if (update_hwp_min)
+ req.hwp_min = req_update.hwp_min;
+
+ if (update_hwp_max)
+ req.hwp_max = req_update.hwp_max;
+
+ if (update_hwp_desired)
+ req.hwp_desired = req_update.hwp_desired;
+
+ if (update_hwp_window)
+ req.hwp_window = req_update.hwp_window;
+
+ if (update_hwp_epp)
+ req.hwp_epp = req_update.hwp_epp;
+
+ req.hwp_use_pkg = req_update.hwp_use_pkg;
+
+ read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
+ if (debug)
+ print_hwp_cap(cpu, &cap, "");
+
+ if (!force)
+ check_hwp_request_v_hwp_capabilities(cpu, &req, &cap);
+
+ verify_hwp_req_self_consistency(cpu, &req);
+
+ write_hwp_request(cpu, &req, msr_offset);
+
+ if (debug) {
+ read_hwp_request(cpu, &req, msr_offset);
+ print_hwp_request(cpu, &req, "new: ");
+ }
+ return 0;
+}
+int update_hwp_request_pkg(int pkg)
+{
+ struct msr_hwp_request req;
+ struct msr_hwp_cap cap;
+ int cpu = first_cpu_in_pkg[pkg];
+
+ int msr_offset = MSR_HWP_REQUEST_PKG;
+
+ read_hwp_request(cpu, &req, msr_offset);
+ if (debug)
+ print_hwp_request_pkg(pkg, &req, "old: ");
+
+ if (update_hwp_min)
+ req.hwp_min = req_update.hwp_min;
+
+ if (update_hwp_max)
+ req.hwp_max = req_update.hwp_max;
+
+ if (update_hwp_desired)
+ req.hwp_desired = req_update.hwp_desired;
+
+ if (update_hwp_window)
+ req.hwp_window = req_update.hwp_window;
+
+ if (update_hwp_epp)
+ req.hwp_epp = req_update.hwp_epp;
+
+ read_hwp_cap(cpu, &cap, MSR_HWP_CAPABILITIES);
+ if (debug)
+ print_hwp_cap(cpu, &cap, "");
+
+ if (!force)
+ check_hwp_request_v_hwp_capabilities(cpu, &req, &cap);
+
+ verify_hwp_req_self_consistency(cpu, &req);
+
+ write_hwp_request(cpu, &req, msr_offset);
+
+ if (debug) {
+ read_hwp_request(cpu, &req, msr_offset);
+ print_hwp_request_pkg(pkg, &req, "new: ");
+ }
+ return 0;
+}
+
+int enable_hwp_on_cpu(int cpu)
+{
+ unsigned long long msr;
+
+ get_msr(cpu, MSR_PM_ENABLE, &msr);
+ put_msr(cpu, MSR_PM_ENABLE, 1);
+
+ if (verbose)
+ printf("cpu%d: MSR_PM_ENABLE old: %d new: %d\n", cpu, (unsigned int) msr, 1);
+
+ return 0;
+}
+
+int update_cpu_msrs(int cpu)
+{
+ unsigned long long msr;
+
+
+ if (update_epb) {
+ get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr);
+ put_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, new_epb);
+
+ if (verbose)
+ printf("cpu%d: ENERGY_PERF_BIAS old: %d new: %d\n",
+ cpu, (unsigned int) msr, (unsigned int) new_epb);
+ }
+
+ if (update_turbo) {
+ int turbo_is_present_and_disabled;
+
+ get_msr(cpu, MSR_IA32_MISC_ENABLE, &msr);
+
+ turbo_is_present_and_disabled = ((msr & MSR_IA32_MISC_ENABLE_TURBO_DISABLE) != 0);
+
+ if (turbo_update_value == 1) {
+ if (turbo_is_present_and_disabled) {
+ msr &= ~MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
+ put_msr(cpu, MSR_IA32_MISC_ENABLE, msr);
+ if (verbose)
+ printf("cpu%d: turbo ENABLE\n", cpu);
+ }
+ } else {
+ /*
+ * if "turbo_is_enabled" were known to be describe this cpu
+ * then we could use it here to skip redundant disable requests.
+ * but cpu may be in a different package, so we always write.
+ */
+ msr |= MSR_IA32_MISC_ENABLE_TURBO_DISABLE;
+ put_msr(cpu, MSR_IA32_MISC_ENABLE, msr);
+ if (verbose)
+ printf("cpu%d: turbo DISABLE\n", cpu);
+ }
+ }
+
+ if (!has_hwp)
+ return 0;
+
+ if (!hwp_update_enabled())
+ return 0;
+
+ update_hwp_request(cpu);
+ return 0;
+}
+
+/*
+ * Open a file, and exit on failure
+ */
+FILE *fopen_or_die(const char *path, const char *mode)
+{
+ FILE *filep = fopen(path, "r");
+
+ if (!filep)
+ err(1, "%s: open failed", path);
+ return filep;
+}
+
+unsigned int get_pkg_num(int cpu)
+{
+ FILE *fp;
+ char pathname[128];
+ unsigned int pkg;
+ int retval;
+
+ sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu);
+
+ fp = fopen_or_die(pathname, "r");
+ retval = fscanf(fp, "%d\n", &pkg);
+ if (retval != 1)
+ errx(1, "%s: failed to parse", pathname);
+ return pkg;
+}
+
+int set_max_cpu_pkg_num(int cpu)
+{
+ unsigned int pkg;
+
+ if (max_cpu_num < cpu)
+ max_cpu_num = cpu;
+
+ pkg = get_pkg_num(cpu);
+
+ if (pkg >= MAX_PACKAGES)
+ errx(1, "cpu%d: %d >= MAX_PACKAGES (%d)", cpu, pkg, MAX_PACKAGES);
+
+ if (pkg > max_pkg_num)
+ max_pkg_num = pkg;
+
+ if ((pkg_present_set & (1ULL << pkg)) == 0) {
+ pkg_present_set |= (1ULL << pkg);
+ first_cpu_in_pkg[pkg] = cpu;
+ }
+
+ return 0;
+}
+int mark_cpu_present(int cpu)
+{
+ CPU_SET_S(cpu, cpu_setsize, cpu_present_set);
+ return 0;
+}
+
+/*
+ * run func(cpu) on every cpu in /proc/stat
+ * return max_cpu number
+ */
+int for_all_proc_cpus(int (func)(int))
+{
+ FILE *fp;
+ int cpu_num;
+ int retval;
+
+ fp = fopen_or_die(proc_stat, "r");
+
+ retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n");
+ if (retval != 0)
+ err(1, "%s: failed to parse format", proc_stat);
+
+ while (1) {
+ retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num);
+ if (retval != 1)
+ break;
+
+ retval = func(cpu_num);
+ if (retval) {
+ fclose(fp);
+ return retval;
+ }
+ }
+ fclose(fp);
+ return 0;
+}
+
+void for_all_cpus_in_set(size_t set_size, cpu_set_t *cpu_set, int (func)(int))
+{
+ int cpu_num;
+
+ for (cpu_num = 0; cpu_num <= max_cpu_num; ++cpu_num)
+ if (CPU_ISSET_S(cpu_num, set_size, cpu_set))
+ func(cpu_num);
+}
+
+void init_data_structures(void)
+{
+ for_all_proc_cpus(set_max_cpu_pkg_num);
+
+ cpu_setsize = CPU_ALLOC_SIZE((max_cpu_num + 1));
+
+ cpu_present_set = CPU_ALLOC((max_cpu_num + 1));
+ if (cpu_present_set == NULL)
+ err(3, "CPU_ALLOC");
+ CPU_ZERO_S(cpu_setsize, cpu_present_set);
+ for_all_proc_cpus(mark_cpu_present);
+}
+
+/* clear has_hwp if it is not enable (or being enabled) */
+
+void verify_hwp_is_enabled(void)
+{
+ unsigned long long msr;
+
+ if (!has_hwp) /* set in early_cpuid() */
+ return;
+
+ /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
+ get_msr(base_cpu, MSR_PM_ENABLE, &msr);
+ if ((msr & 1) == 0) {
+ fprintf(stderr, "HWP can be enabled using '--hwp-enable'\n");
+ has_hwp = 0;
+ return;
+ }
+}
+
+int req_update_bounds_check(void)
+{
+ if (!hwp_update_enabled())
+ return 0;
+
+ /* fail if min > max requested */
+ if ((update_hwp_max && update_hwp_min) &&
+ (req_update.hwp_min > req_update.hwp_max)) {
+ printf("hwp-min %d > hwp_max %d\n", req_update.hwp_min, req_update.hwp_max);
+ return -EINVAL;
+ }
+
+ /* fail if desired > max requestd */
+ if (req_update.hwp_desired && update_hwp_max &&
+ (req_update.hwp_desired > req_update.hwp_max)) {
+ printf("hwp-desired cannot be greater than hwp_max\n");
+ return -EINVAL;
+ }
+ /* fail if desired < min requestd */
+ if (req_update.hwp_desired && update_hwp_min &&
+ (req_update.hwp_desired < req_update.hwp_min)) {
+ printf("hwp-desired cannot be less than hwp_min\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+void set_base_cpu(void)
+{
+ base_cpu = sched_getcpu();
+ if (base_cpu < 0)
+ err(-ENODEV, "No valid cpus found");
+}
+
+
+void probe_dev_msr(void)
+{
+ struct stat sb;
+ char pathname[32];
+
+ sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
+ if (stat(pathname, &sb))
+ if (system("/sbin/modprobe msr > /dev/null 2>&1"))
+ err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
+}
+
+static void get_cpuid_or_exit(unsigned int leaf,
+ unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ if (!__get_cpuid(leaf, eax, ebx, ecx, edx))
+ errx(1, "Processor not supported\n");
+}
+
+/*
+ * early_cpuid()
+ * initialize turbo_is_enabled, has_hwp, has_epb
+ * before cmdline is parsed
+ */
+void early_cpuid(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ unsigned int fms, family, model;
+
+ get_cpuid_or_exit(1, &fms, &ebx, &ecx, &edx);
+ family = (fms >> 8) & 0xf;
+ model = (fms >> 4) & 0xf;
+ if (family == 6 || family == 0xf)
+ model += ((fms >> 16) & 0xf) << 4;
+
+ if (model == 0x4F) {
+ unsigned long long msr;
+
+ get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
+
+ bdx_highest_ratio = msr & 0xFF;
+ }
+
+ get_cpuid_or_exit(0x6, &eax, &ebx, &ecx, &edx);
+ turbo_is_enabled = (eax >> 1) & 1;
+ has_hwp = (eax >> 7) & 1;
+ has_epb = (ecx >> 3) & 1;
+}
+
+/*
+ * parse_cpuid()
+ * set
+ * has_hwp, has_hwp_notify, has_hwp_activity_window, has_hwp_epp, has_hwp_request_pkg, has_epb
+ */
+void parse_cpuid(void)
+{
+ unsigned int eax, ebx, ecx, edx, max_level;
+ unsigned int fms, family, model, stepping;
+
+ eax = ebx = ecx = edx = 0;
+
+ get_cpuid_or_exit(0, &max_level, &ebx, &ecx, &edx);
+
+ if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
+ genuine_intel = 1;
+
+ if (debug)
+ fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ",
+ (char *)&ebx, (char *)&edx, (char *)&ecx);
+
+ get_cpuid_or_exit(1, &fms, &ebx, &ecx, &edx);
+ family = (fms >> 8) & 0xf;
+ model = (fms >> 4) & 0xf;
+ stepping = fms & 0xf;
+ if (family == 6 || family == 0xf)
+ model += ((fms >> 16) & 0xf) << 4;
+
+ if (debug) {
+ fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
+ max_level, family, model, stepping, family, model, stepping);
+ fprintf(stderr, "CPUID(1): %s %s %s %s %s %s %s %s\n",
+ ecx & (1 << 0) ? "SSE3" : "-",
+ ecx & (1 << 3) ? "MONITOR" : "-",
+ ecx & (1 << 7) ? "EIST" : "-",
+ ecx & (1 << 8) ? "TM2" : "-",
+ edx & (1 << 4) ? "TSC" : "-",
+ edx & (1 << 5) ? "MSR" : "-",
+ edx & (1 << 22) ? "ACPI-TM" : "-",
+ edx & (1 << 29) ? "TM" : "-");
+ }
+
+ if (!(edx & (1 << 5)))
+ errx(1, "CPUID: no MSR");
+
+
+ get_cpuid_or_exit(0x6, &eax, &ebx, &ecx, &edx);
+ /* turbo_is_enabled already set */
+ /* has_hwp already set */
+ has_hwp_notify = eax & (1 << 8);
+ has_hwp_activity_window = eax & (1 << 9);
+ has_hwp_epp = eax & (1 << 10);
+ has_hwp_request_pkg = eax & (1 << 11);
+
+ if (!has_hwp_request_pkg && update_hwp_use_pkg)
+ errx(1, "--hwp-use-pkg is not available on this hardware");
+
+ /* has_epb already set */
+
+ if (debug)
+ fprintf(stderr,
+ "CPUID(6): %sTURBO, %sHWP, %sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
+ turbo_is_enabled ? "" : "No-",
+ has_hwp ? "" : "No-",
+ has_hwp_notify ? "" : "No-",
+ has_hwp_activity_window ? "" : "No-",
+ has_hwp_epp ? "" : "No-",
+ has_hwp_request_pkg ? "" : "No-",
+ has_epb ? "" : "No-");
+
+ return; /* success */
+}
+
+int main(int argc, char **argv)
+{
+ set_base_cpu();
+ probe_dev_msr();
+ init_data_structures();
+
+ early_cpuid(); /* initial cpuid parse before cmdline */
+
+ cmdline(argc, argv);
+
+ if (debug)
+ print_version();
+
+ parse_cpuid();
+
+ /* If CPU-set and PKG-set are not initialized, default to all CPUs */
+ if ((cpu_selected_set == 0) && (pkg_selected_set == 0))
+ cpu_selected_set = cpu_present_set;
+
+ /*
+ * If HWP is being enabled, do it now, so that subsequent operations
+ * that access HWP registers can work.
+ */
+ if (update_hwp_enable)
+ for_all_cpus_in_set(cpu_setsize, cpu_selected_set, enable_hwp_on_cpu);
+
+ /* If HWP present, but disabled, warn and ignore from here forward */
+ verify_hwp_is_enabled();
+
+ if (req_update_bounds_check())
+ return -EINVAL;
+
+ /* display information only, no updates to settings */
+ if (!update_epb && !update_turbo && !hwp_update_enabled()) {
+ if (cpu_selected_set)
+ for_all_cpus_in_set(cpu_setsize, cpu_selected_set, print_cpu_msrs);
+
+ if (has_hwp_request_pkg) {
+ if (pkg_selected_set == 0)
+ pkg_selected_set = pkg_present_set;
+
+ for_packages(pkg_selected_set, print_pkg_msrs);
+ }
+
+ return 0;
+ }
+
+ /* update CPU set */
+ if (cpu_selected_set) {
+ for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_sysfs);
+ for_all_cpus_in_set(cpu_setsize, cpu_selected_set, update_cpu_msrs);
+ } else if (pkg_selected_set)
+ for_packages(pkg_selected_set, update_hwp_request_pkg);
+
+ return 0;
+}
diff --git a/tools/scripts/Makefile.arch b/tools/scripts/Makefile.arch
new file mode 100644
index 000000000..b10b7a27c
--- /dev/null
+++ b/tools/scripts/Makefile.arch
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: GPL-2.0
+HOSTARCH := $(shell uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ \
+ -e s/sun4u/sparc/ -e s/sparc64/sparc/ \
+ -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \
+ -e s/s390x/s390/ -e s/parisc64/parisc/ \
+ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
+ -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ )
+
+ifndef ARCH
+ARCH := $(HOSTARCH)
+endif
+
+SRCARCH := $(ARCH)
+
+# Additional ARCH settings for x86
+ifeq ($(ARCH),i386)
+ SRCARCH := x86
+endif
+ifeq ($(ARCH),x86_64)
+ SRCARCH := x86
+endif
+
+# Additional ARCH settings for sparc
+ifeq ($(ARCH),sparc32)
+ SRCARCH := sparc
+endif
+ifeq ($(ARCH),sparc64)
+ SRCARCH := sparc
+endif
+
+# Additional ARCH settings for sh
+ifeq ($(ARCH),sh64)
+ SRCARCH := sh
+endif
+
+LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
+ifeq ($(LP64), 1)
+ IS_64_BIT := 1
+else
+ IS_64_BIT := 0
+endif
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
new file mode 100644
index 000000000..42dbe05b1
--- /dev/null
+++ b/tools/scripts/Makefile.include
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: GPL-2.0
+ifneq ($(O),)
+ifeq ($(origin O), command line)
+ dummy := $(if $(shell cd $(PWD); test -d $(O) || echo $(O)),$(error O=$(O) does not exist),)
+ ABSOLUTE_O := $(shell cd $(PWD); cd $(O) ; pwd)
+ OUTPUT := $(ABSOLUTE_O)/$(if $(subdir),$(subdir)/)
+ COMMAND_O := O=$(ABSOLUTE_O)
+ifeq ($(objtree),)
+ objtree := $(O)
+endif
+endif
+endif
+
+# check that the output directory actually exists
+ifneq ($(OUTPUT),)
+OUTDIR := $(shell cd $(OUTPUT) && pwd)
+$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist))
+endif
+
+#
+# Include saner warnings here, which can catch bugs:
+#
+EXTRA_WARNINGS := -Wbad-function-cast
+EXTRA_WARNINGS += -Wdeclaration-after-statement
+EXTRA_WARNINGS += -Wformat-security
+EXTRA_WARNINGS += -Wformat-y2k
+EXTRA_WARNINGS += -Winit-self
+EXTRA_WARNINGS += -Wmissing-declarations
+EXTRA_WARNINGS += -Wmissing-prototypes
+EXTRA_WARNINGS += -Wnested-externs
+EXTRA_WARNINGS += -Wno-system-headers
+EXTRA_WARNINGS += -Wold-style-definition
+EXTRA_WARNINGS += -Wpacked
+EXTRA_WARNINGS += -Wredundant-decls
+EXTRA_WARNINGS += -Wshadow
+EXTRA_WARNINGS += -Wstrict-prototypes
+EXTRA_WARNINGS += -Wswitch-default
+EXTRA_WARNINGS += -Wswitch-enum
+EXTRA_WARNINGS += -Wundef
+EXTRA_WARNINGS += -Wwrite-strings
+EXTRA_WARNINGS += -Wformat
+
+CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+# Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,LD,$(CROSS_COMPILE)ld)
+$(call allow-override,CXX,$(CROSS_COMPILE)g++)
+$(call allow-override,STRIP,$(CROSS_COMPILE)strip)
+
+ifneq ($(LLVM),)
+HOSTAR ?= llvm-ar
+HOSTCC ?= clang
+HOSTLD ?= ld.lld
+else
+HOSTAR ?= ar
+HOSTCC ?= gcc
+HOSTLD ?= ld
+endif
+
+ifeq ($(CC_NO_CLANG), 1)
+EXTRA_WARNINGS += -Wstrict-aliasing=3
+endif
+
+# Hack to avoid type-punned warnings on old systems such as RHEL5:
+# We should be changing CFLAGS and checking gcc version, but this
+# will do for now and keep the above -Wstrict-aliasing=3 in place
+# in newer systems.
+# Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h
+ifneq ($(filter 3.%,$(MAKE_VERSION)),) # make-3
+EXTRA_WARNINGS += -fno-strict-aliasing
+endif
+
+ifneq ($(findstring $(MAKEFLAGS), w),w)
+PRINT_DIR = --no-print-directory
+else
+NO_SUBDIR = :
+endif
+
+ifneq ($(findstring s,$(filter-out --%,$(MAKEFLAGS))),)
+ silent=1
+endif
+
+#
+# Define a callable command for descending to a new directory
+#
+# Call by doing: $(call descend,directory[,target])
+#
+descend = \
+ +mkdir -p $(OUTPUT)$(1) && \
+ $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2)
+
+QUIET_SUBDIR0 = +$(MAKE) $(COMMAND_O) -C # space to separate -C and subdir
+QUIET_SUBDIR1 =
+
+ifneq ($(silent),1)
+ ifneq ($(V),1)
+ QUIET_CC = @echo ' CC '$@;
+ QUIET_CC_FPIC = @echo ' CC FPIC '$@;
+ QUIET_AR = @echo ' AR '$@;
+ QUIET_LINK = @echo ' LINK '$@;
+ QUIET_MKDIR = @echo ' MKDIR '$@;
+ QUIET_GEN = @echo ' GEN '$@;
+ QUIET_SUBDIR0 = +@subdir=
+ QUIET_SUBDIR1 = ;$(NO_SUBDIR) \
+ echo ' SUBDIR '$$subdir; \
+ $(MAKE) $(PRINT_DIR) -C $$subdir
+ QUIET_FLEX = @echo ' FLEX '$@;
+ QUIET_BISON = @echo ' BISON '$@;
+
+ descend = \
+ +@echo ' DESCEND '$(1); \
+ mkdir -p $(OUTPUT)$(1) && \
+ $(MAKE) $(COMMAND_O) subdir=$(if $(subdir),$(subdir)/$(1),$(1)) $(PRINT_DIR) -C $(1) $(2)
+
+ QUIET_CLEAN = @printf ' CLEAN %s\n' $1;
+ QUIET_INSTALL = @printf ' INSTALL %s\n' $1;
+ QUIET_UNINST = @printf ' UNINST %s\n' $1;
+ endif
+endif
+
+pound := \#
diff --git a/tools/scripts/utilities.mak b/tools/scripts/utilities.mak
new file mode 100644
index 000000000..c16ce8330
--- /dev/null
+++ b/tools/scripts/utilities.mak
@@ -0,0 +1,179 @@
+# This allows us to work with the newline character:
+define newline
+
+
+endef
+newline := $(newline)
+
+# nl-escape
+#
+# Usage: escape = $(call nl-escape[,escape])
+#
+# This is used as the common way to specify
+# what should replace a newline when escaping
+# newlines; the default is a bizarre string.
+#
+nl-escape = $(if $(1),$(1),m822df3020w6a44id34bt574ctac44eb9f4n)
+
+# escape-nl
+#
+# Usage: escaped-text = $(call escape-nl,text[,escape])
+#
+# GNU make's $(shell ...) function converts to a
+# single space each newline character in the output
+# produced during the expansion; this may not be
+# desirable.
+#
+# The only solution is to change each newline into
+# something that won't be converted, so that the
+# information can be recovered later with
+# $(call unescape-nl...)
+#
+escape-nl = $(subst $(newline),$(call nl-escape,$(2)),$(1))
+
+# unescape-nl
+#
+# Usage: text = $(call unescape-nl,escaped-text[,escape])
+#
+# See escape-nl.
+#
+unescape-nl = $(subst $(call nl-escape,$(2)),$(newline),$(1))
+
+# shell-escape-nl
+#
+# Usage: $(shell some-command | $(call shell-escape-nl[,escape]))
+#
+# Use this to escape newlines from within a shell call;
+# the default escape is a bizarre string.
+#
+# NOTE: The escape is used directly as a string constant
+# in an `awk' program that is delimited by shell
+# single-quotes, so be wary of the characters
+# that are chosen.
+#
+define shell-escape-nl
+awk 'NR==1 {t=$$0} NR>1 {t=t "$(nl-escape)" $$0} END {printf t}'
+endef
+
+# shell-unescape-nl
+#
+# Usage: $(shell some-command | $(call shell-unescape-nl[,escape]))
+#
+# Use this to unescape newlines from within a shell call;
+# the default escape is a bizarre string.
+#
+# NOTE: The escape is used directly as an extended regular
+# expression constant in an `awk' program that is
+# delimited by shell single-quotes, so be wary
+# of the characters that are chosen.
+#
+# (The bash shell has a bug where `{gsub(...),...}' is
+# misinterpreted as a brace expansion; this can be
+# overcome by putting a space between `{' and `gsub').
+#
+define shell-unescape-nl
+awk 'NR==1 {t=$$0} NR>1 {t=t "\n" $$0} END { gsub(/$(nl-escape)/,"\n",t); printf t }'
+endef
+
+# escape-for-shell-sq
+#
+# Usage: embeddable-text = $(call escape-for-shell-sq,text)
+#
+# This function produces text that is suitable for
+# embedding in a shell string that is delimited by
+# single-quotes.
+#
+escape-for-shell-sq = $(subst ','\'',$(1))
+
+# shell-sq
+#
+# Usage: single-quoted-and-escaped-text = $(call shell-sq,text)
+#
+shell-sq = '$(escape-for-shell-sq)'
+
+# shell-wordify
+#
+# Usage: wordified-text = $(call shell-wordify,text)
+#
+# For instance:
+#
+# |define text
+# |hello
+# |world
+# |endef
+# |
+# |target:
+# | echo $(call shell-wordify,$(text))
+#
+# At least GNU make gets confused by expanding a newline
+# within the context of a command line of a makefile rule
+# (this is in constrast to a `$(shell ...)' function call,
+# which can handle it just fine).
+#
+# This function avoids the problem by producing a string
+# that works as a shell word, regardless of whether or
+# not it contains a newline.
+#
+# If the text to be wordified contains a newline, then
+# an intrictate shell command substitution is constructed
+# to render the text as a single line; when the shell
+# processes the resulting escaped text, it transforms
+# it into the original unescaped text.
+#
+# If the text does not contain a newline, then this function
+# produces the same results as the `$(shell-sq)' function.
+#
+shell-wordify = $(if $(findstring $(newline),$(1)),$(_sw-esc-nl),$(shell-sq))
+define _sw-esc-nl
+"$$(echo $(call escape-nl,$(shell-sq),$(2)) | $(call shell-unescape-nl,$(2)))"
+endef
+
+# is-absolute
+#
+# Usage: bool-value = $(call is-absolute,path)
+#
+is-absolute = $(shell echo $(shell-sq) | grep -q ^/ && echo y)
+
+# lookup
+#
+# Usage: absolute-executable-path-or-empty = $(call lookup,path)
+#
+# (It's necessary to use `sh -c' because GNU make messes up by
+# trying too hard and getting things wrong).
+#
+lookup = $(call unescape-nl,$(shell sh -c $(_l-sh)))
+_l-sh = $(call shell-sq,command -v $(shell-sq) | $(call shell-escape-nl,))
+
+# is-executable
+#
+# Usage: bool-value = $(call is-executable,path)
+#
+# (It's necessary to use `sh -c' because GNU make messes up by
+# trying too hard and getting things wrong).
+#
+is-executable = $(call _is-executable-helper,$(shell-sq))
+_is-executable-helper = $(shell sh -c $(_is-executable-sh))
+_is-executable-sh = $(call shell-sq,test -f $(1) -a -x $(1) && echo y)
+
+# get-executable
+#
+# Usage: absolute-executable-path-or-empty = $(call get-executable,path)
+#
+# The goal is to get an absolute path for an executable;
+# the `command -v' is defined by POSIX, but it's not
+# necessarily very portable, so it's only used if
+# relative path resolution is requested, as determined
+# by the presence of a leading `/'.
+#
+get-executable = $(if $(1),$(if $(is-absolute),$(_ge-abspath),$(lookup)))
+_ge-abspath = $(if $(is-executable),$(1))
+
+# get-supplied-or-default-executable
+#
+# Usage: absolute-executable-path-or-empty = $(call get-executable-or-default,variable,default)
+#
+define get-executable-or-default
+$(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
+endef
+_ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2)))
+_gea_err = $(if $(1),$(error Please set '$(1)' appropriately))
diff --git a/tools/spi/.gitignore b/tools/spi/.gitignore
new file mode 100644
index 000000000..428057639
--- /dev/null
+++ b/tools/spi/.gitignore
@@ -0,0 +1,2 @@
+spidev_fdx
+spidev_test
diff --git a/tools/spi/Build b/tools/spi/Build
new file mode 100644
index 000000000..8e8466033
--- /dev/null
+++ b/tools/spi/Build
@@ -0,0 +1,2 @@
+spidev_test-y += spidev_test.o
+spidev_fdx-y += spidev_fdx.o
diff --git a/tools/spi/Makefile b/tools/spi/Makefile
new file mode 100644
index 000000000..815d15589
--- /dev/null
+++ b/tools/spi/Makefile
@@ -0,0 +1,64 @@
+include ../scripts/Makefile.include
+
+bindir ?= /usr/bin
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := spidev_test spidev_fdx
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
+
+all: $(ALL_PROGRAMS)
+
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+#
+# We need the following to be outside of kernel tree
+#
+$(OUTPUT)include/linux/spi/spidev.h: ../../include/uapi/linux/spi/spidev.h
+ mkdir -p $(OUTPUT)include/linux/spi 2>&1 || true
+ ln -sf $(CURDIR)/../../include/uapi/linux/spi/spidev.h $@
+
+prepare: $(OUTPUT)include/linux/spi/spidev.h
+
+#
+# spidev_test
+#
+SPIDEV_TEST_IN := $(OUTPUT)spidev_test-in.o
+$(SPIDEV_TEST_IN): prepare FORCE
+ $(Q)$(MAKE) $(build)=spidev_test
+$(OUTPUT)spidev_test: $(SPIDEV_TEST_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+#
+# spidev_fdx
+#
+SPIDEV_FDX_IN := $(OUTPUT)spidev_fdx-in.o
+$(SPIDEV_FDX_IN): prepare FORCE
+ $(Q)$(MAKE) $(build)=spidev_fdx
+$(OUTPUT)spidev_fdx: $(SPIDEV_FDX_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+clean:
+ rm -f $(ALL_PROGRAMS)
+ rm -f $(OUTPUT)include/linux/spi/spidev.h
+ find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
+
+install: $(ALL_PROGRAMS)
+ install -d -m 755 $(DESTDIR)$(bindir); \
+ for program in $(ALL_PROGRAMS); do \
+ install $$program $(DESTDIR)$(bindir); \
+ done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare
diff --git a/tools/spi/spidev_fdx.c b/tools/spi/spidev_fdx.c
new file mode 100644
index 000000000..7d2a867cd
--- /dev/null
+++ b/tools/spi/spidev_fdx.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/types.h>
+#include <linux/spi/spidev.h>
+
+
+static int verbose;
+
+static void do_read(int fd, int len)
+{
+ unsigned char buf[32], *bp;
+ int status;
+
+ /* read at least 2 bytes, no more than 32 */
+ if (len < 2)
+ len = 2;
+ else if (len > sizeof(buf))
+ len = sizeof(buf);
+ memset(buf, 0, sizeof buf);
+
+ status = read(fd, buf, len);
+ if (status < 0) {
+ perror("read");
+ return;
+ }
+ if (status != len) {
+ fprintf(stderr, "short read\n");
+ return;
+ }
+
+ printf("read(%2d, %2d): %02x %02x,", len, status,
+ buf[0], buf[1]);
+ status -= 2;
+ bp = buf + 2;
+ while (status-- > 0)
+ printf(" %02x", *bp++);
+ printf("\n");
+}
+
+static void do_msg(int fd, int len)
+{
+ struct spi_ioc_transfer xfer[2];
+ unsigned char buf[32], *bp;
+ int status;
+
+ memset(xfer, 0, sizeof xfer);
+ memset(buf, 0, sizeof buf);
+
+ if (len > sizeof buf)
+ len = sizeof buf;
+
+ buf[0] = 0xaa;
+ xfer[0].tx_buf = (unsigned long)buf;
+ xfer[0].len = 1;
+
+ xfer[1].rx_buf = (unsigned long) buf;
+ xfer[1].len = len;
+
+ status = ioctl(fd, SPI_IOC_MESSAGE(2), xfer);
+ if (status < 0) {
+ perror("SPI_IOC_MESSAGE");
+ return;
+ }
+
+ printf("response(%2d, %2d): ", len, status);
+ for (bp = buf; len; len--)
+ printf(" %02x", *bp++);
+ printf("\n");
+}
+
+static void dumpstat(const char *name, int fd)
+{
+ __u8 lsb, bits;
+ __u32 mode, speed;
+
+ if (ioctl(fd, SPI_IOC_RD_MODE32, &mode) < 0) {
+ perror("SPI rd_mode");
+ return;
+ }
+ if (ioctl(fd, SPI_IOC_RD_LSB_FIRST, &lsb) < 0) {
+ perror("SPI rd_lsb_fist");
+ return;
+ }
+ if (ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits) < 0) {
+ perror("SPI bits_per_word");
+ return;
+ }
+ if (ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed) < 0) {
+ perror("SPI max_speed_hz");
+ return;
+ }
+
+ printf("%s: spi mode 0x%x, %d bits %sper word, %d Hz max\n",
+ name, mode, bits, lsb ? "(lsb first) " : "", speed);
+}
+
+int main(int argc, char **argv)
+{
+ int c;
+ int readcount = 0;
+ int msglen = 0;
+ int fd;
+ const char *name;
+
+ while ((c = getopt(argc, argv, "hm:r:v")) != EOF) {
+ switch (c) {
+ case 'm':
+ msglen = atoi(optarg);
+ if (msglen < 0)
+ goto usage;
+ continue;
+ case 'r':
+ readcount = atoi(optarg);
+ if (readcount < 0)
+ goto usage;
+ continue;
+ case 'v':
+ verbose++;
+ continue;
+ case 'h':
+ case '?':
+usage:
+ fprintf(stderr,
+ "usage: %s [-h] [-m N] [-r N] /dev/spidevB.D\n",
+ argv[0]);
+ return 1;
+ }
+ }
+
+ if ((optind + 1) != argc)
+ goto usage;
+ name = argv[optind];
+
+ fd = open(name, O_RDWR);
+ if (fd < 0) {
+ perror("open");
+ return 1;
+ }
+
+ dumpstat(name, fd);
+
+ if (msglen)
+ do_msg(fd, msglen);
+
+ if (readcount)
+ do_read(fd, readcount);
+
+ close(fd);
+ return 0;
+}
diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
new file mode 100644
index 000000000..8c590cd11
--- /dev/null
+++ b/tools/spi/spidev_test.c
@@ -0,0 +1,483 @@
+/*
+ * SPI testing utility (using spidev driver)
+ *
+ * Copyright (c) 2007 MontaVista Software, Inc.
+ * Copyright (c) 2007 Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * Cross-compile with cross-gcc -I/path/to/cross-kernel/include
+ */
+
+#include <stdint.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <fcntl.h>
+#include <time.h>
+#include <sys/ioctl.h>
+#include <linux/ioctl.h>
+#include <sys/stat.h>
+#include <linux/types.h>
+#include <linux/spi/spidev.h>
+
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
+
+static void pabort(const char *s)
+{
+ perror(s);
+ abort();
+}
+
+static const char *device = "/dev/spidev1.1";
+static uint32_t mode;
+static uint8_t bits = 8;
+static char *input_file;
+static char *output_file;
+static uint32_t speed = 500000;
+static uint16_t delay;
+static int verbose;
+static int transfer_size;
+static int iterations;
+static int interval = 5; /* interval in seconds for showing transfer rate */
+
+uint8_t default_tx[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x40, 0x00, 0x00, 0x00, 0x00, 0x95,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xF0, 0x0D,
+};
+
+uint8_t default_rx[ARRAY_SIZE(default_tx)] = {0, };
+char *input_tx;
+
+static void hex_dump(const void *src, size_t length, size_t line_size,
+ char *prefix)
+{
+ int i = 0;
+ const unsigned char *address = src;
+ const unsigned char *line = address;
+ unsigned char c;
+
+ printf("%s | ", prefix);
+ while (length-- > 0) {
+ printf("%02X ", *address++);
+ if (!(++i % line_size) || (length == 0 && i % line_size)) {
+ if (length == 0) {
+ while (i++ % line_size)
+ printf("__ ");
+ }
+ printf(" | "); /* right close */
+ while (line < address) {
+ c = *line++;
+ printf("%c", (c < 33 || c == 255) ? 0x2E : c);
+ }
+ printf("\n");
+ if (length > 0)
+ printf("%s | ", prefix);
+ }
+ }
+}
+
+/*
+ * Unescape - process hexadecimal escape character
+ * converts shell input "\x23" -> 0x23
+ */
+static int unescape(char *_dst, char *_src, size_t len)
+{
+ int ret = 0;
+ int match;
+ char *src = _src;
+ char *dst = _dst;
+ unsigned int ch;
+
+ while (*src) {
+ if (*src == '\\' && *(src+1) == 'x') {
+ match = sscanf(src + 2, "%2x", &ch);
+ if (!match)
+ pabort("malformed input string");
+
+ src += 4;
+ *dst++ = (unsigned char)ch;
+ } else {
+ *dst++ = *src++;
+ }
+ ret++;
+ }
+ return ret;
+}
+
+static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len)
+{
+ int ret;
+ int out_fd;
+ struct spi_ioc_transfer tr = {
+ .tx_buf = (unsigned long)tx,
+ .rx_buf = (unsigned long)rx,
+ .len = len,
+ .delay_usecs = delay,
+ .speed_hz = speed,
+ .bits_per_word = bits,
+ };
+
+ if (mode & SPI_TX_QUAD)
+ tr.tx_nbits = 4;
+ else if (mode & SPI_TX_DUAL)
+ tr.tx_nbits = 2;
+ if (mode & SPI_RX_QUAD)
+ tr.rx_nbits = 4;
+ else if (mode & SPI_RX_DUAL)
+ tr.rx_nbits = 2;
+ if (!(mode & SPI_LOOP)) {
+ if (mode & (SPI_TX_QUAD | SPI_TX_DUAL))
+ tr.rx_buf = 0;
+ else if (mode & (SPI_RX_QUAD | SPI_RX_DUAL))
+ tr.tx_buf = 0;
+ }
+
+ ret = ioctl(fd, SPI_IOC_MESSAGE(1), &tr);
+ if (ret < 1)
+ pabort("can't send spi message");
+
+ if (verbose)
+ hex_dump(tx, len, 32, "TX");
+
+ if (output_file) {
+ out_fd = open(output_file, O_WRONLY | O_CREAT | O_TRUNC, 0666);
+ if (out_fd < 0)
+ pabort("could not open output file");
+
+ ret = write(out_fd, rx, len);
+ if (ret != len)
+ pabort("not all bytes written to output file");
+
+ close(out_fd);
+ }
+
+ if (verbose)
+ hex_dump(rx, len, 32, "RX");
+}
+
+static void print_usage(const char *prog)
+{
+ printf("Usage: %s [-DsbdlHOLC3vpNR24SI]\n", prog);
+ puts(" -D --device device to use (default /dev/spidev1.1)\n"
+ " -s --speed max speed (Hz)\n"
+ " -d --delay delay (usec)\n"
+ " -b --bpw bits per word\n"
+ " -i --input input data from a file (e.g. \"test.bin\")\n"
+ " -o --output output data to a file (e.g. \"results.bin\")\n"
+ " -l --loop loopback\n"
+ " -H --cpha clock phase\n"
+ " -O --cpol clock polarity\n"
+ " -L --lsb least significant bit first\n"
+ " -C --cs-high chip select active high\n"
+ " -3 --3wire SI/SO signals shared\n"
+ " -v --verbose Verbose (show tx buffer)\n"
+ " -p Send data (e.g. \"1234\\xde\\xad\")\n"
+ " -N --no-cs no chip select\n"
+ " -R --ready slave pulls low to pause\n"
+ " -2 --dual dual transfer\n"
+ " -4 --quad quad transfer\n"
+ " -S --size transfer size\n"
+ " -I --iter iterations\n");
+ exit(1);
+}
+
+static void parse_opts(int argc, char *argv[])
+{
+ while (1) {
+ static const struct option lopts[] = {
+ { "device", 1, 0, 'D' },
+ { "speed", 1, 0, 's' },
+ { "delay", 1, 0, 'd' },
+ { "bpw", 1, 0, 'b' },
+ { "input", 1, 0, 'i' },
+ { "output", 1, 0, 'o' },
+ { "loop", 0, 0, 'l' },
+ { "cpha", 0, 0, 'H' },
+ { "cpol", 0, 0, 'O' },
+ { "lsb", 0, 0, 'L' },
+ { "cs-high", 0, 0, 'C' },
+ { "3wire", 0, 0, '3' },
+ { "no-cs", 0, 0, 'N' },
+ { "ready", 0, 0, 'R' },
+ { "dual", 0, 0, '2' },
+ { "verbose", 0, 0, 'v' },
+ { "quad", 0, 0, '4' },
+ { "size", 1, 0, 'S' },
+ { "iter", 1, 0, 'I' },
+ { NULL, 0, 0, 0 },
+ };
+ int c;
+
+ c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3NR24p:vS:I:",
+ lopts, NULL);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'D':
+ device = optarg;
+ break;
+ case 's':
+ speed = atoi(optarg);
+ break;
+ case 'd':
+ delay = atoi(optarg);
+ break;
+ case 'b':
+ bits = atoi(optarg);
+ break;
+ case 'i':
+ input_file = optarg;
+ break;
+ case 'o':
+ output_file = optarg;
+ break;
+ case 'l':
+ mode |= SPI_LOOP;
+ break;
+ case 'H':
+ mode |= SPI_CPHA;
+ break;
+ case 'O':
+ mode |= SPI_CPOL;
+ break;
+ case 'L':
+ mode |= SPI_LSB_FIRST;
+ break;
+ case 'C':
+ mode |= SPI_CS_HIGH;
+ break;
+ case '3':
+ mode |= SPI_3WIRE;
+ break;
+ case 'N':
+ mode |= SPI_NO_CS;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'R':
+ mode |= SPI_READY;
+ break;
+ case 'p':
+ input_tx = optarg;
+ break;
+ case '2':
+ mode |= SPI_TX_DUAL;
+ break;
+ case '4':
+ mode |= SPI_TX_QUAD;
+ break;
+ case 'S':
+ transfer_size = atoi(optarg);
+ break;
+ case 'I':
+ iterations = atoi(optarg);
+ break;
+ default:
+ print_usage(argv[0]);
+ break;
+ }
+ }
+ if (mode & SPI_LOOP) {
+ if (mode & SPI_TX_DUAL)
+ mode |= SPI_RX_DUAL;
+ if (mode & SPI_TX_QUAD)
+ mode |= SPI_RX_QUAD;
+ }
+}
+
+static void transfer_escaped_string(int fd, char *str)
+{
+ size_t size = strlen(str);
+ uint8_t *tx;
+ uint8_t *rx;
+
+ tx = malloc(size);
+ if (!tx)
+ pabort("can't allocate tx buffer");
+
+ rx = malloc(size);
+ if (!rx)
+ pabort("can't allocate rx buffer");
+
+ size = unescape((char *)tx, str, size);
+ transfer(fd, tx, rx, size);
+ free(rx);
+ free(tx);
+}
+
+static void transfer_file(int fd, char *filename)
+{
+ ssize_t bytes;
+ struct stat sb;
+ int tx_fd;
+ uint8_t *tx;
+ uint8_t *rx;
+
+ if (stat(filename, &sb) == -1)
+ pabort("can't stat input file");
+
+ tx_fd = open(filename, O_RDONLY);
+ if (tx_fd < 0)
+ pabort("can't open input file");
+
+ tx = malloc(sb.st_size);
+ if (!tx)
+ pabort("can't allocate tx buffer");
+
+ rx = malloc(sb.st_size);
+ if (!rx)
+ pabort("can't allocate rx buffer");
+
+ bytes = read(tx_fd, tx, sb.st_size);
+ if (bytes != sb.st_size)
+ pabort("failed to read input file");
+
+ transfer(fd, tx, rx, sb.st_size);
+ free(rx);
+ free(tx);
+ close(tx_fd);
+}
+
+static uint64_t _read_count;
+static uint64_t _write_count;
+
+static void show_transfer_rate(void)
+{
+ static uint64_t prev_read_count, prev_write_count;
+ double rx_rate, tx_rate;
+
+ rx_rate = ((_read_count - prev_read_count) * 8) / (interval*1000.0);
+ tx_rate = ((_write_count - prev_write_count) * 8) / (interval*1000.0);
+
+ printf("rate: tx %.1fkbps, rx %.1fkbps\n", rx_rate, tx_rate);
+
+ prev_read_count = _read_count;
+ prev_write_count = _write_count;
+}
+
+static void transfer_buf(int fd, int len)
+{
+ uint8_t *tx;
+ uint8_t *rx;
+ int i;
+
+ tx = malloc(len);
+ if (!tx)
+ pabort("can't allocate tx buffer");
+ for (i = 0; i < len; i++)
+ tx[i] = random();
+
+ rx = malloc(len);
+ if (!rx)
+ pabort("can't allocate rx buffer");
+
+ transfer(fd, tx, rx, len);
+
+ _write_count += len;
+ _read_count += len;
+
+ if (mode & SPI_LOOP) {
+ if (memcmp(tx, rx, len)) {
+ fprintf(stderr, "transfer error !\n");
+ hex_dump(tx, len, 32, "TX");
+ hex_dump(rx, len, 32, "RX");
+ exit(1);
+ }
+ }
+
+ free(rx);
+ free(tx);
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = 0;
+ int fd;
+
+ parse_opts(argc, argv);
+
+ fd = open(device, O_RDWR);
+ if (fd < 0)
+ pabort("can't open device");
+
+ /*
+ * spi mode
+ */
+ ret = ioctl(fd, SPI_IOC_WR_MODE32, &mode);
+ if (ret == -1)
+ pabort("can't set spi mode");
+
+ ret = ioctl(fd, SPI_IOC_RD_MODE32, &mode);
+ if (ret == -1)
+ pabort("can't get spi mode");
+
+ /*
+ * bits per word
+ */
+ ret = ioctl(fd, SPI_IOC_WR_BITS_PER_WORD, &bits);
+ if (ret == -1)
+ pabort("can't set bits per word");
+
+ ret = ioctl(fd, SPI_IOC_RD_BITS_PER_WORD, &bits);
+ if (ret == -1)
+ pabort("can't get bits per word");
+
+ /*
+ * max speed hz
+ */
+ ret = ioctl(fd, SPI_IOC_WR_MAX_SPEED_HZ, &speed);
+ if (ret == -1)
+ pabort("can't set max speed hz");
+
+ ret = ioctl(fd, SPI_IOC_RD_MAX_SPEED_HZ, &speed);
+ if (ret == -1)
+ pabort("can't get max speed hz");
+
+ printf("spi mode: 0x%x\n", mode);
+ printf("bits per word: %d\n", bits);
+ printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000);
+
+ if (input_tx && input_file)
+ pabort("only one of -p and --input may be selected");
+
+ if (input_tx)
+ transfer_escaped_string(fd, input_tx);
+ else if (input_file)
+ transfer_file(fd, input_file);
+ else if (transfer_size) {
+ struct timespec last_stat;
+
+ clock_gettime(CLOCK_MONOTONIC, &last_stat);
+
+ while (iterations-- > 0) {
+ struct timespec current;
+
+ transfer_buf(fd, transfer_size);
+
+ clock_gettime(CLOCK_MONOTONIC, &current);
+ if (current.tv_sec - last_stat.tv_sec > interval) {
+ show_transfer_rate();
+ last_stat = current;
+ }
+ }
+ printf("total: tx %.1fKB, rx %.1fKB\n",
+ _write_count/1024.0, _read_count/1024.0);
+ } else
+ transfer(fd, default_tx, default_rx, sizeof(default_tx));
+
+ close(fd);
+
+ return ret;
+}
diff --git a/tools/testing/fault-injection/failcmd.sh b/tools/testing/fault-injection/failcmd.sh
new file mode 100644
index 000000000..29a6c63c5
--- /dev/null
+++ b/tools/testing/fault-injection/failcmd.sh
@@ -0,0 +1,220 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# NAME
+# failcmd.sh - run a command with injecting slab/page allocation failures
+#
+# SYNOPSIS
+# failcmd.sh --help
+# failcmd.sh [<options>] command [arguments]
+#
+# DESCRIPTION
+# Run command with injecting slab/page allocation failures by fault
+# injection.
+#
+# NOTE: you need to run this script as root.
+#
+
+usage()
+{
+ cat >&2 <<EOF
+Usage: $0 [options] command [arguments]
+
+OPTIONS
+ -p percent
+ --probability=percent
+ likelihood of failure injection, in percent.
+ Default value is 1
+
+ -t value
+ --times=value
+ specifies how many times failures may happen at most.
+ Default value is 1
+
+ --oom-kill-allocating-task=value
+ set /proc/sys/vm/oom_kill_allocating_task to specified value
+ before running the command.
+ Default value is 1
+
+ -h, --help
+ Display a usage message and exit
+
+ --interval=value, --space=value, --verbose=value, --task-filter=value,
+ --stacktrace-depth=value, --require-start=value, --require-end=value,
+ --reject-start=value, --reject-end=value, --ignore-gfp-wait=value
+ See Documentation/fault-injection/fault-injection.txt for more
+ information
+
+ failslab options:
+ --cache-filter=value
+
+ fail_page_alloc options:
+ --ignore-gfp-highmem=value, --min-order=value
+
+ENVIRONMENT
+ FAILCMD_TYPE
+ The following values for FAILCMD_TYPE are recognized:
+
+ failslab
+ inject slab allocation failures
+ fail_page_alloc
+ inject page allocation failures
+
+ If FAILCMD_TYPE is not defined, then failslab is used.
+EOF
+}
+
+if [ $UID != 0 ]; then
+ echo must be run as root >&2
+ exit 1
+fi
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3}'`
+
+if [ ! -d "$DEBUGFS" ]; then
+ echo debugfs is not mounted >&2
+ exit 1
+fi
+
+FAILCMD_TYPE=${FAILCMD_TYPE:-failslab}
+FAULTATTR=$DEBUGFS/$FAILCMD_TYPE
+
+if [ ! -d $FAULTATTR ]; then
+ echo $FAILCMD_TYPE is not available >&2
+ exit 1
+fi
+
+LONGOPTS=probability:,interval:,times:,space:,verbose:,task-filter:
+LONGOPTS=$LONGOPTS,stacktrace-depth:,require-start:,require-end:
+LONGOPTS=$LONGOPTS,reject-start:,reject-end:,oom-kill-allocating-task:,help
+
+if [ $FAILCMD_TYPE = failslab ]; then
+ LONGOPTS=$LONGOPTS,ignore-gfp-wait:,cache-filter:
+elif [ $FAILCMD_TYPE = fail_page_alloc ]; then
+ LONGOPTS=$LONGOPTS,ignore-gfp-wait:,ignore-gfp-highmem:,min-order:
+fi
+
+TEMP=`getopt -o p:i:t:s:v:h --long $LONGOPTS -n 'failcmd.sh' -- "$@"`
+
+if [ $? != 0 ]; then
+ usage
+ exit 1
+fi
+
+eval set -- "$TEMP"
+
+fault_attr_default()
+{
+ echo N > $FAULTATTR/task-filter
+ echo 0 > $FAULTATTR/probability
+ echo 1 > $FAULTATTR/times
+}
+
+fault_attr_default
+
+oom_kill_allocating_task_saved=`cat /proc/sys/vm/oom_kill_allocating_task`
+
+restore_values()
+{
+ fault_attr_default
+ echo $oom_kill_allocating_task_saved \
+ > /proc/sys/vm/oom_kill_allocating_task
+}
+
+#
+# Default options
+#
+declare -i oom_kill_allocating_task=1
+declare task_filter=Y
+declare -i probability=1
+declare -i times=1
+
+while true; do
+ case "$1" in
+ -p|--probability)
+ probability=$2
+ shift 2
+ ;;
+ -i|--interval)
+ echo $2 > $FAULTATTR/interval
+ shift 2
+ ;;
+ -t|--times)
+ times=$2
+ shift 2
+ ;;
+ -s|--space)
+ echo $2 > $FAULTATTR/space
+ shift 2
+ ;;
+ -v|--verbose)
+ echo $2 > $FAULTATTR/verbose
+ shift 2
+ ;;
+ --task-filter)
+ task_filter=$2
+ shift 2
+ ;;
+ --stacktrace-depth)
+ echo $2 > $FAULTATTR/stacktrace-depth
+ shift 2
+ ;;
+ --require-start)
+ echo $2 > $FAULTATTR/require-start
+ shift 2
+ ;;
+ --require-end)
+ echo $2 > $FAULTATTR/require-end
+ shift 2
+ ;;
+ --reject-start)
+ echo $2 > $FAULTATTR/reject-start
+ shift 2
+ ;;
+ --reject-end)
+ echo $2 > $FAULTATTR/reject-end
+ shift 2
+ ;;
+ --oom-kill-allocating-task)
+ oom_kill_allocating_task=$2
+ shift 2
+ ;;
+ --ignore-gfp-wait)
+ echo $2 > $FAULTATTR/ignore-gfp-wait
+ shift 2
+ ;;
+ --cache-filter)
+ echo $2 > $FAULTATTR/cache_filter
+ shift 2
+ ;;
+ --ignore-gfp-highmem)
+ echo $2 > $FAULTATTR/ignore-gfp-highmem
+ shift 2
+ ;;
+ --min-order)
+ echo $2 > $FAULTATTR/min-order
+ shift 2
+ ;;
+ -h|--help)
+ usage
+ exit 0
+ shift
+ ;;
+ --)
+ shift
+ break
+ ;;
+ esac
+done
+
+[ -z "$1" ] && exit 0
+
+echo $oom_kill_allocating_task > /proc/sys/vm/oom_kill_allocating_task
+echo $task_filter > $FAULTATTR/task-filter
+echo $probability > $FAULTATTR/probability
+echo $times > $FAULTATTR/times
+
+trap "restore_values" SIGINT SIGTERM EXIT
+
+cmd="echo 1 > /proc/self/make-it-fail && exec $@"
+bash -c "$cmd"
diff --git a/tools/testing/ktest/compare-ktest-sample.pl b/tools/testing/ktest/compare-ktest-sample.pl
new file mode 100755
index 000000000..ebea21d0a
--- /dev/null
+++ b/tools/testing/ktest/compare-ktest-sample.pl
@@ -0,0 +1,33 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+
+open (IN,"ktest.pl");
+while (<IN>) {
+ # hashes are now used
+ if (/\$opt\{"?([A-Z].*?)(\[.*\])?"?\}/ ||
+ /^\s*"?([A-Z].*?)"?\s*=>\s*/ ||
+ /set_test_option\("(.*?)"/) {
+ $opt{$1} = 1;
+ }
+}
+close IN;
+
+open (IN, "sample.conf");
+while (<IN>) {
+ if (/^\s*#?\s*([A-Z]\S*)\s*=/) {
+ $samp{$1} = 1;
+ }
+}
+close IN;
+
+foreach $opt (keys %opt) {
+ if (!defined($samp{$opt})) {
+ print "opt = $opt\n";
+ }
+}
+
+foreach $samp (keys %samp) {
+ if (!defined($opt{$samp})) {
+ print "samp = $samp\n";
+ }
+}
diff --git a/tools/testing/ktest/config-bisect.pl b/tools/testing/ktest/config-bisect.pl
new file mode 100755
index 000000000..b28feea7c
--- /dev/null
+++ b/tools/testing/ktest/config-bisect.pl
@@ -0,0 +1,770 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2015 - Steven Rostedt, Red Hat Inc.
+# Copyright 2017 - Steven Rostedt, VMware, Inc.
+#
+# Licensed under the terms of the GNU GPL License version 2
+#
+
+# usage:
+# config-bisect.pl [options] good-config bad-config [good|bad]
+#
+
+# Compares a good config to a bad config, then takes half of the diffs
+# and produces a config that is somewhere between the good config and
+# the bad config. That is, the resulting config will start with the
+# good config and will try to make half of the differences of between
+# the good and bad configs match the bad config. It tries because of
+# dependencies between the two configs it may not be able to change
+# exactly half of the configs that are different between the two config
+# files.
+
+# Here's a normal way to use it:
+#
+# $ cd /path/to/linux/kernel
+# $ config-bisect.pl /path/to/good/config /path/to/bad/config
+
+# This will now pull in good config (blowing away .config in that directory
+# so do not make that be one of the good or bad configs), and then
+# build the config with "make oldconfig" to make sure it matches the
+# current kernel. It will then store the configs in that result for
+# the good config. It does the same for the bad config as well.
+# The algorithm will run, merging half of the differences between
+# the two configs and building them with "make oldconfig" to make sure
+# the result changes (dependencies may reset changes the tool had made).
+# It then copies the result of its good config to /path/to/good/config.tmp
+# and the bad config to /path/to/bad/config.tmp (just appends ".tmp" to the
+# files passed in). And the ".config" that you should test will be in
+# directory
+
+# After the first run, determine if the result is good or bad then
+# run the same command appending the result
+
+# For good results:
+# $ config-bisect.pl /path/to/good/config /path/to/bad/config good
+
+# For bad results:
+# $ config-bisect.pl /path/to/good/config /path/to/bad/config bad
+
+# Do not change the good-config or bad-config, config-bisect.pl will
+# copy the good-config to a temp file with the same name as good-config
+# but with a ".tmp" after it. It will do the same with the bad-config.
+
+# If "good" or "bad" is not stated at the end, it will copy the good and
+# bad configs to the .tmp versions. If a .tmp version already exists, it will
+# warn before writing over them (-r will not warn, and just write over them).
+# If the last config is labeled "good", then it will copy it to the good .tmp
+# version. If the last config is labeled "bad", it will copy it to the bad
+# .tmp version. It will continue this until it can not merge the two any more
+# without the result being equal to either the good or bad .tmp configs.
+
+my $start = 0;
+my $val = "";
+
+my $pwd = `pwd`;
+chomp $pwd;
+my $tree = $pwd;
+my $build;
+
+my $output_config;
+my $reset_bisect;
+
+sub usage {
+ print << "EOF"
+
+usage: config-bisect.pl [-l linux-tree][-b build-dir] good-config bad-config [good|bad]
+ -l [optional] define location of linux-tree (default is current directory)
+ -b [optional] define location to build (O=build-dir) (default is linux-tree)
+ good-config the config that is considered good
+ bad-config the config that does not work
+ "good" add this if the last run produced a good config
+ "bad" add this if the last run produced a bad config
+ If "good" or "bad" is not specified, then it is the start of a new bisect
+
+ Note, each run will create copy of good and bad configs with ".tmp" appended.
+
+EOF
+;
+
+ exit(-1);
+}
+
+sub doprint {
+ print @_;
+}
+
+sub dodie {
+ doprint "CRITICAL FAILURE... ", @_, "\n";
+
+ die @_, "\n";
+}
+
+sub expand_path {
+ my ($file) = @_;
+
+ if ($file =~ m,^/,) {
+ return $file;
+ }
+ return "$pwd/$file";
+}
+
+sub read_prompt {
+ my ($cancel, $prompt) = @_;
+
+ my $ans;
+
+ for (;;) {
+ if ($cancel) {
+ print "$prompt [y/n/C] ";
+ } else {
+ print "$prompt [y/N] ";
+ }
+ $ans = <STDIN>;
+ chomp $ans;
+ if ($ans =~ /^\s*$/) {
+ if ($cancel) {
+ $ans = "c";
+ } else {
+ $ans = "n";
+ }
+ }
+ last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
+ if ($cancel) {
+ last if ($ans =~ /^c$/i);
+ print "Please answer either 'y', 'n' or 'c'.\n";
+ } else {
+ print "Please answer either 'y' or 'n'.\n";
+ }
+ }
+ if ($ans =~ /^c/i) {
+ exit;
+ }
+ if ($ans !~ /^y$/i) {
+ return 0;
+ }
+ return 1;
+}
+
+sub read_yn {
+ my ($prompt) = @_;
+
+ return read_prompt 0, $prompt;
+}
+
+sub read_ync {
+ my ($prompt) = @_;
+
+ return read_prompt 1, $prompt;
+}
+
+sub run_command {
+ my ($command, $redirect) = @_;
+ my $start_time;
+ my $end_time;
+ my $dord = 0;
+ my $pid;
+
+ $start_time = time;
+
+ doprint("$command ... ");
+
+ $pid = open(CMD, "$command 2>&1 |") or
+ dodie "unable to exec $command";
+
+ if (defined($redirect)) {
+ open (RD, ">$redirect") or
+ dodie "failed to write to redirect $redirect";
+ $dord = 1;
+ }
+
+ while (<CMD>) {
+ print RD if ($dord);
+ }
+
+ waitpid($pid, 0);
+ my $failed = $?;
+
+ close(CMD);
+ close(RD) if ($dord);
+
+ $end_time = time;
+ my $delta = $end_time - $start_time;
+
+ if ($delta == 1) {
+ doprint "[1 second] ";
+ } else {
+ doprint "[$delta seconds] ";
+ }
+
+ if ($failed) {
+ doprint "FAILED!\n";
+ } else {
+ doprint "SUCCESS\n";
+ }
+
+ return !$failed;
+}
+
+###### CONFIG BISECT ######
+
+# config_ignore holds the configs that were set (or unset) for
+# a good config and we will ignore these configs for the rest
+# of a config bisect. These configs stay as they were.
+my %config_ignore;
+
+# config_set holds what all configs were set as.
+my %config_set;
+
+# config_off holds the set of configs that the bad config had disabled.
+# We need to record them and set them in the .config when running
+# olddefconfig, because olddefconfig keeps the defaults.
+my %config_off;
+
+# config_off_tmp holds a set of configs to turn off for now
+my @config_off_tmp;
+
+# config_list is the set of configs that are being tested
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+my $make;
+
+sub make_oldconfig {
+
+ if (!run_command "$make olddefconfig") {
+ # Perhaps olddefconfig doesn't exist in this version of the kernel
+ # try oldnoconfig
+ doprint "olddefconfig failed, trying make oldnoconfig\n";
+ if (!run_command "$make oldnoconfig") {
+ doprint "oldnoconfig failed, trying yes '' | make oldconfig\n";
+ # try a yes '' | oldconfig
+ run_command "yes '' | $make oldconfig" or
+ dodie "failed make config oldconfig";
+ }
+ }
+}
+
+sub assign_configs {
+ my ($hash, $config) = @_;
+
+ doprint "Reading configs from $config\n";
+
+ open (IN, $config)
+ or dodie "Failed to read $config";
+
+ while (<IN>) {
+ chomp;
+ if (/^((CONFIG\S*)=.*)/) {
+ ${$hash}{$2} = $1;
+ } elsif (/^(# (CONFIG\S*) is not set)/) {
+ ${$hash}{$2} = $1;
+ }
+ }
+
+ close(IN);
+}
+
+sub process_config_ignore {
+ my ($config) = @_;
+
+ assign_configs \%config_ignore, $config;
+}
+
+sub get_dependencies {
+ my ($config) = @_;
+
+ my $arr = $dependency{$config};
+ if (!defined($arr)) {
+ return ();
+ }
+
+ my @deps = @{$arr};
+
+ foreach my $dep (@{$arr}) {
+ print "ADD DEP $dep\n";
+ @deps = (@deps, get_dependencies $dep);
+ }
+
+ return @deps;
+}
+
+sub save_config {
+ my ($pc, $file) = @_;
+
+ my %configs = %{$pc};
+
+ doprint "Saving configs into $file\n";
+
+ open(OUT, ">$file") or dodie "Can not write to $file";
+
+ foreach my $config (keys %configs) {
+ print OUT "$configs{$config}\n";
+ }
+ close(OUT);
+}
+
+sub create_config {
+ my ($name, $pc) = @_;
+
+ doprint "Creating old config from $name configs\n";
+
+ save_config $pc, $output_config;
+
+ make_oldconfig;
+}
+
+# compare two config hashes, and return configs with different vals.
+# It returns B's config values, but you can use A to see what A was.
+sub diff_config_vals {
+ my ($pa, $pb) = @_;
+
+ # crappy Perl way to pass in hashes.
+ my %a = %{$pa};
+ my %b = %{$pb};
+
+ my %ret;
+
+ foreach my $item (keys %a) {
+ if (defined($b{$item}) && $b{$item} ne $a{$item}) {
+ $ret{$item} = $b{$item};
+ }
+ }
+
+ return %ret;
+}
+
+# compare two config hashes and return the configs in B but not A
+sub diff_configs {
+ my ($pa, $pb) = @_;
+
+ my %ret;
+
+ # crappy Perl way to pass in hashes.
+ my %a = %{$pa};
+ my %b = %{$pb};
+
+ foreach my $item (keys %b) {
+ if (!defined($a{$item})) {
+ $ret{$item} = $b{$item};
+ }
+ }
+
+ return %ret;
+}
+
+# return if two configs are equal or not
+# 0 is equal +1 b has something a does not
+# +1 if a and b have a different item.
+# -1 if a has something b does not
+sub compare_configs {
+ my ($pa, $pb) = @_;
+
+ my %ret;
+
+ # crappy Perl way to pass in hashes.
+ my %a = %{$pa};
+ my %b = %{$pb};
+
+ foreach my $item (keys %b) {
+ if (!defined($a{$item})) {
+ return 1;
+ }
+ if ($a{$item} ne $b{$item}) {
+ return 1;
+ }
+ }
+
+ foreach my $item (keys %a) {
+ if (!defined($b{$item})) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+sub process_failed {
+ my ($config) = @_;
+
+ doprint "\n\n***************************************\n";
+ doprint "Found bad config: $config\n";
+ doprint "***************************************\n\n";
+}
+
+sub process_new_config {
+ my ($tc, $nc, $gc, $bc) = @_;
+
+ my %tmp_config = %{$tc};
+ my %good_configs = %{$gc};
+ my %bad_configs = %{$bc};
+
+ my %new_configs;
+
+ my $runtest = 1;
+ my $ret;
+
+ create_config "tmp_configs", \%tmp_config;
+ assign_configs \%new_configs, $output_config;
+
+ $ret = compare_configs \%new_configs, \%bad_configs;
+ if (!$ret) {
+ doprint "New config equals bad config, try next test\n";
+ $runtest = 0;
+ }
+
+ if ($runtest) {
+ $ret = compare_configs \%new_configs, \%good_configs;
+ if (!$ret) {
+ doprint "New config equals good config, try next test\n";
+ $runtest = 0;
+ }
+ }
+
+ %{$nc} = %new_configs;
+
+ return $runtest;
+}
+
+sub convert_config {
+ my ($config) = @_;
+
+ if ($config =~ /^# (.*) is not set/) {
+ $config = "$1=n";
+ }
+
+ $config =~ s/^CONFIG_//;
+ return $config;
+}
+
+sub print_config {
+ my ($sym, $config) = @_;
+
+ $config = convert_config $config;
+ doprint "$sym$config\n";
+}
+
+sub print_config_compare {
+ my ($good_config, $bad_config) = @_;
+
+ $good_config = convert_config $good_config;
+ $bad_config = convert_config $bad_config;
+
+ my $good_value = $good_config;
+ my $bad_value = $bad_config;
+ $good_value =~ s/(.*)=//;
+ my $config = $1;
+
+ $bad_value =~ s/.*=//;
+
+ doprint " $config $good_value -> $bad_value\n";
+}
+
+# Pass in:
+# $phalf: half of the configs names you want to add
+# $oconfigs: The orginial configs to start with
+# $sconfigs: The source to update $oconfigs with (from $phalf)
+# $which: The name of which half that is updating (top / bottom)
+# $type: The name of the source type (good / bad)
+sub make_half {
+ my ($phalf, $oconfigs, $sconfigs, $which, $type) = @_;
+
+ my @half = @{$phalf};
+ my %orig_configs = %{$oconfigs};
+ my %source_configs = %{$sconfigs};
+
+ my %tmp_config = %orig_configs;
+
+ doprint "Settings bisect with $which half of $type configs:\n";
+ foreach my $item (@half) {
+ doprint "Updating $item to $source_configs{$item}\n";
+ $tmp_config{$item} = $source_configs{$item};
+ }
+
+ return %tmp_config;
+}
+
+sub run_config_bisect {
+ my ($pgood, $pbad) = @_;
+
+ my %good_configs = %{$pgood};
+ my %bad_configs = %{$pbad};
+
+ my %diff_configs = diff_config_vals \%good_configs, \%bad_configs;
+ my %b_configs = diff_configs \%good_configs, \%bad_configs;
+ my %g_configs = diff_configs \%bad_configs, \%good_configs;
+
+ # diff_arr is what is in both good and bad but are different (y->n)
+ my @diff_arr = keys %diff_configs;
+ my $len_diff = $#diff_arr + 1;
+
+ # b_arr is what is in bad but not in good (has depends)
+ my @b_arr = keys %b_configs;
+ my $len_b = $#b_arr + 1;
+
+ # g_arr is what is in good but not in bad
+ my @g_arr = keys %g_configs;
+ my $len_g = $#g_arr + 1;
+
+ my $runtest = 0;
+ my %new_configs;
+ my $ret;
+
+ # Look at the configs that are different between good and bad.
+ # This does not include those that depend on other configs
+ # (configs depending on other configs that are not set would
+ # not show up even as a "# CONFIG_FOO is not set"
+
+
+ doprint "# of configs to check: $len_diff\n";
+ doprint "# of configs showing only in good: $len_g\n";
+ doprint "# of configs showing only in bad: $len_b\n";
+
+ if ($len_diff > 0) {
+ # Now test for different values
+
+ doprint "Configs left to check:\n";
+ doprint " Good Config\t\t\tBad Config\n";
+ doprint " -----------\t\t\t----------\n";
+ foreach my $item (@diff_arr) {
+ doprint " $good_configs{$item}\t$bad_configs{$item}\n";
+ }
+
+ my $half = int($#diff_arr / 2);
+ my @tophalf = @diff_arr[0 .. $half];
+
+ doprint "Set tmp config to be good config with some bad config values\n";
+
+ my %tmp_config = make_half \@tophalf, \%good_configs,
+ \%bad_configs, "top", "bad";
+
+ $runtest = process_new_config \%tmp_config, \%new_configs,
+ \%good_configs, \%bad_configs;
+
+ if (!$runtest) {
+ doprint "Set tmp config to be bad config with some good config values\n";
+
+ my %tmp_config = make_half \@tophalf, \%bad_configs,
+ \%good_configs, "top", "good";
+
+ $runtest = process_new_config \%tmp_config, \%new_configs,
+ \%good_configs, \%bad_configs;
+ }
+ }
+
+ if (!$runtest && $len_diff > 0) {
+ # do the same thing, but this time with bottom half
+
+ my $half = int($#diff_arr / 2);
+ my @bottomhalf = @diff_arr[$half+1 .. $#diff_arr];
+
+ doprint "Set tmp config to be good config with some bad config values\n";
+
+ my %tmp_config = make_half \@bottomhalf, \%good_configs,
+ \%bad_configs, "bottom", "bad";
+
+ $runtest = process_new_config \%tmp_config, \%new_configs,
+ \%good_configs, \%bad_configs;
+
+ if (!$runtest) {
+ doprint "Set tmp config to be bad config with some good config values\n";
+
+ my %tmp_config = make_half \@bottomhalf, \%bad_configs,
+ \%good_configs, "bottom", "good";
+
+ $runtest = process_new_config \%tmp_config, \%new_configs,
+ \%good_configs, \%bad_configs;
+ }
+ }
+
+ if ($runtest) {
+ make_oldconfig;
+ doprint "READY TO TEST .config IN $build\n";
+ return 0;
+ }
+
+ doprint "\n%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+ doprint "Hmm, can't make any more changes without making good == bad?\n";
+ doprint "Difference between good (+) and bad (-)\n";
+
+ foreach my $item (keys %bad_configs) {
+ if (!defined($good_configs{$item})) {
+ print_config "-", $bad_configs{$item};
+ }
+ }
+
+ foreach my $item (keys %good_configs) {
+ next if (!defined($bad_configs{$item}));
+ if ($good_configs{$item} ne $bad_configs{$item}) {
+ print_config_compare $good_configs{$item}, $bad_configs{$item};
+ }
+ }
+
+ foreach my $item (keys %good_configs) {
+ if (!defined($bad_configs{$item})) {
+ print_config "+", $good_configs{$item};
+ }
+ }
+ return -1;
+}
+
+sub config_bisect {
+ my ($good_config, $bad_config) = @_;
+ my $ret;
+
+ my %good_configs;
+ my %bad_configs;
+ my %tmp_configs;
+
+ doprint "Run good configs through make oldconfig\n";
+ assign_configs \%tmp_configs, $good_config;
+ create_config "$good_config", \%tmp_configs;
+ assign_configs \%good_configs, $output_config;
+
+ doprint "Run bad configs through make oldconfig\n";
+ assign_configs \%tmp_configs, $bad_config;
+ create_config "$bad_config", \%tmp_configs;
+ assign_configs \%bad_configs, $output_config;
+
+ save_config \%good_configs, $good_config;
+ save_config \%bad_configs, $bad_config;
+
+ return run_config_bisect \%good_configs, \%bad_configs;
+}
+
+while ($#ARGV >= 0) {
+ if ($ARGV[0] !~ m/^-/) {
+ last;
+ }
+ my $opt = shift @ARGV;
+
+ if ($opt eq "-b") {
+ $val = shift @ARGV;
+ if (!defined($val)) {
+ die "-b requires value\n";
+ }
+ $build = $val;
+ }
+
+ elsif ($opt eq "-l") {
+ $val = shift @ARGV;
+ if (!defined($val)) {
+ die "-l requires value\n";
+ }
+ $tree = $val;
+ }
+
+ elsif ($opt eq "-r") {
+ $reset_bisect = 1;
+ }
+
+ elsif ($opt eq "-h") {
+ usage;
+ }
+
+ else {
+ die "Unknow option $opt\n";
+ }
+}
+
+$build = $tree if (!defined($build));
+
+$tree = expand_path $tree;
+$build = expand_path $build;
+
+if ( ! -d $tree ) {
+ die "$tree not a directory\n";
+}
+
+if ( ! -d $build ) {
+ die "$build not a directory\n";
+}
+
+usage if $#ARGV < 1;
+
+if ($#ARGV == 1) {
+ $start = 1;
+} elsif ($#ARGV == 2) {
+ $val = $ARGV[2];
+ if ($val ne "good" && $val ne "bad") {
+ die "Unknown command '$val', bust be either \"good\" or \"bad\"\n";
+ }
+} else {
+ usage;
+}
+
+my $good_start = expand_path $ARGV[0];
+my $bad_start = expand_path $ARGV[1];
+
+my $good = "$good_start.tmp";
+my $bad = "$bad_start.tmp";
+
+$make = "make";
+
+if ($build ne $tree) {
+ $make = "make O=$build"
+}
+
+$output_config = "$build/.config";
+
+if ($start) {
+ if ( ! -f $good_start ) {
+ die "$good_start not found\n";
+ }
+ if ( ! -f $bad_start ) {
+ die "$bad_start not found\n";
+ }
+ if ( -f $good || -f $bad ) {
+ my $p = "";
+
+ if ( -f $good ) {
+ $p = "$good exists\n";
+ }
+
+ if ( -f $bad ) {
+ $p = "$p$bad exists\n";
+ }
+
+ if (!defined($reset_bisect)) {
+ if (!read_yn "${p}Overwrite and start new bisect anyway?") {
+ exit (-1);
+ }
+ }
+ }
+ run_command "cp $good_start $good" or die "failed to copy to $good\n";
+ run_command "cp $bad_start $bad" or die "faield to copy to $bad\n";
+} else {
+ if ( ! -f $good ) {
+ die "Can not find file $good\n";
+ }
+ if ( ! -f $bad ) {
+ die "Can not find file $bad\n";
+ }
+ if ($val eq "good") {
+ run_command "cp $output_config $good" or die "failed to copy $config to $good\n";
+ } elsif ($val eq "bad") {
+ run_command "cp $output_config $bad" or die "failed to copy $config to $bad\n";
+ }
+}
+
+chdir $tree || die "can't change directory to $tree";
+
+my $ret = config_bisect $good, $bad;
+
+if (!$ret) {
+ exit(0);
+}
+
+if ($ret > 0) {
+ doprint "Cleaning temp files\n";
+ run_command "rm $good";
+ run_command "rm $bad";
+ exit(1);
+} else {
+ doprint "See good and bad configs for details:\n";
+ doprint "good: $good\n";
+ doprint "bad: $bad\n";
+ doprint "%%%%%%%% FAILED TO FIND SINGLE BAD CONFIG %%%%%%%%\n";
+}
+exit(2);
diff --git a/tools/testing/ktest/examples/README b/tools/testing/ktest/examples/README
new file mode 100644
index 000000000..a12d295a0
--- /dev/null
+++ b/tools/testing/ktest/examples/README
@@ -0,0 +1,32 @@
+This directory contains example configs to use ktest for various tasks.
+The configs still need to be customized for your environment, but it
+is broken up by task which makes it easier to understand how to set up
+ktest.
+
+The configs are based off of real working configs but have been modified
+and commented to show more generic use cases that are more helpful for
+developers.
+
+crosstests.conf - this config shows an example of testing a git repo against
+ lots of different architectures. It only does build tests, but makes
+ it easy to compile test different archs. You can download the arch
+ cross compilers from:
+ http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+
+test.conf - A generic example of a config. This is based on an actual config
+ used to perform real testing.
+
+kvm.conf - A example of a config that is used to test a virtual guest running
+ on a host.
+
+snowball.conf - An example config that was used to demo ktest.pl against
+ a snowball ARM board.
+
+include/ - The include directory holds default configs that can be
+ included into other configs. This is a real use example that shows how
+ to reuse configs for various machines or set ups. The files here
+ are included by other config files, where the other config files define
+ options and variables that will make the included config work for the
+ given environment.
+
+
diff --git a/tools/testing/ktest/examples/crosstests.conf b/tools/testing/ktest/examples/crosstests.conf
new file mode 100644
index 000000000..6907f3259
--- /dev/null
+++ b/tools/testing/ktest/examples/crosstests.conf
@@ -0,0 +1,225 @@
+#
+# Example config for cross compiling
+#
+# In this config, it is expected that the tool chains from:
+#
+# http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+#
+# running on a x86_64 system have been downloaded and installed into:
+#
+# /usr/local/
+#
+# such that the compiler binaries are something like:
+#
+# /usr/local/gcc-4.5.2-nolibc/mips-linux/bin/mips-linux-gcc
+#
+# Some of the archs will use gcc-4.5.1 instead of gcc-4.5.2
+# this config uses variables to differentiate them.
+#
+# Comments describe some of the options, but full descriptions of
+# options are described in the samples.conf file.
+
+# ${PWD} is defined by ktest.pl to be the directory that the user
+# was in when they executed ktest.pl. It may be better to hardcode the
+# path name here. THIS_DIR is the variable used through out the config file
+# in case you want to change it.
+
+THIS_DIR := ${PWD}
+
+# Update the BUILD_DIR option to the location of your git repo you want to test.
+BUILD_DIR = ${THIS_DIR}/linux.git
+
+# The build will go into this directory. It will be created when you run the test.
+OUTPUT_DIR = ${THIS_DIR}/cross-compile
+
+# The build will be compiled with -j8
+BUILD_OPTIONS = -j8
+
+# The test will not stop when it hits a failure.
+DIE_ON_FAILURE = 0
+
+# If you want to have ktest.pl store the failure somewhere, uncomment this option
+# and change the directory where ktest should store the failures.
+#STORE_FAILURES = ${THIS_DIR}/failures
+
+# The log file is stored in the OUTPUT_DIR called cross.log
+# If you enable this, you need to create the OUTPUT_DIR. It wont be created for you.
+LOG_FILE = ${OUTPUT_DIR}/cross.log
+
+# The log file will be cleared each time you run ktest.
+CLEAR_LOG = 1
+
+# As some archs do not build with the defconfig, they have been marked
+# to be ignored. If you want to test them anyway, change DO_FAILED to 1.
+# If a test that has been marked as DO_FAILED passes, then you should change
+# that test to be DO_DEFAULT
+
+DO_FAILED := 0
+DO_DEFAULT := 1
+
+# By setting both DO_FAILED and DO_DEFAULT to zero, you can pick a single
+# arch that you want to test. (uncomment RUN and chose your arch)
+#RUN := arm
+
+# At the bottom of the config file exists a bisect test. You can update that
+# test and set DO_FAILED and DO_DEFAULT to zero, and uncomment this variable
+# to run the bisect on the arch.
+#RUN := bisect
+
+# By default all tests will be running gcc 4.5.2. Some tests are using 4.5.1
+# and they select that in the test.
+# Note: GCC_VER is declared as on option and not a variable ('=' instead of ':=')
+# This is important. A variable is used only in the config file and if it is set
+# it stays that way for the rest of the config file until it is change again.
+# Here we want GCC_VER to remain persistent and change for each test, as it is used in
+# the MAKE_CMD. By using '=' instead of ':=' we achieve our goal.
+
+GCC_VER = 4.5.2
+MAKE_CMD = PATH=/usr/local/gcc-${GCC_VER}-nolibc/${CROSS}/bin:$PATH CROSS_COMPILE=${CROSS}- make ARCH=${ARCH}
+
+# all tests are only doing builds.
+TEST_TYPE = build
+
+# If you want to add configs on top of the defconfig, you can add those configs into
+# the add-config file and uncomment this option. This is useful if you want to test
+# all cross compiles with PREEMPT set, or TRACING on, etc.
+#ADD_CONFIG = ${THIS_DIR}/add-config
+
+# All tests are using defconfig
+BUILD_TYPE = defconfig
+
+# The test names will have the arch and cross compiler used. This will be shown in
+# the results.
+TEST_NAME = ${ARCH} ${CROSS}
+
+# alpha
+TEST_START IF ${RUN} == alpha || ${DO_DEFAULT}
+# Notice that CROSS and ARCH are also options and not variables (again '=' instead
+# of ':='). This is because TEST_NAME and MAKE_CMD wil use them for each test.
+# Only options are available during runs. Variables are only present in parsing the
+# config file.
+CROSS = alpha-linux
+ARCH = alpha
+
+# arm
+TEST_START IF ${RUN} == arm || ${DO_DEFAULT}
+CROSS = arm-unknown-linux-gnueabi
+ARCH = arm
+
+# ia64
+TEST_START IF ${RUN} == ia64 || ${DO_DEFAULT}
+CROSS = ia64-linux
+ARCH = ia64
+
+# m68k fails with error?
+TEST_START IF ${RUN} == m68k || ${DO_DEFAULT}
+CROSS = m68k-linux
+ARCH = m68k
+
+# mips64
+TEST_START IF ${RUN} == mips || ${RUN} == mips64 || ${DO_DEFAULT}
+CROSS = mips64-linux
+ARCH = mips
+
+# mips32
+TEST_START IF ${RUN} == mips || ${RUN} == mips32 || ${DO_DEFAULT}
+CROSS = mips-linux
+ARCH = mips
+
+# parisc64 failed?
+TEST_START IF ${RUN} == hppa || ${RUN} == hppa64 || ${DO_FAILED}
+CROSS = hppa64-linux
+ARCH = parisc
+
+# parisc
+TEST_START IF ${RUN} == hppa || ${RUN} == hppa32 || ${DO_FAILED}
+CROSS = hppa-linux
+ARCH = parisc
+
+# ppc
+TEST_START IF ${RUN} == ppc || ${RUN} == ppc32 || ${DO_DEFAULT}
+CROSS = powerpc-linux
+ARCH = powerpc
+
+# ppc64
+TEST_START IF ${RUN} == ppc || ${RUN} == ppc64 || ${DO_DEFAULT}
+CROSS = powerpc64-linux
+ARCH = powerpc
+
+# s390
+TEST_START IF ${RUN} == s390 || ${DO_DEFAULT}
+CROSS = s390x-linux
+ARCH = s390
+
+# sh
+TEST_START IF ${RUN} == sh || ${DO_DEFAULT}
+CROSS = sh4-linux
+ARCH = sh
+
+# sparc64
+TEST_START IF ${RUN} == sparc || ${RUN} == sparc64 || ${DO_DEFAULT}
+CROSS = sparc64-linux
+ARCH = sparc64
+
+# sparc
+TEST_START IF ${RUN} == sparc || ${RUN} == sparc32 || ${DO_DEFAULT}
+CROSS = sparc-linux
+ARCH = sparc
+
+# xtensa failed
+TEST_START IF ${RUN} == xtensa || ${DO_FAILED}
+CROSS = xtensa-linux
+ARCH = xtensa
+
+# UML
+TEST_START IF ${RUN} == uml || ${DO_DEFAULT}
+MAKE_CMD = make ARCH=um SUBARCH=x86_64
+ARCH = uml
+CROSS =
+
+TEST_START IF ${RUN} == x86 || ${RUN} == i386 || ${DO_DEFAULT}
+MAKE_CMD = make ARCH=i386
+ARCH = i386
+CROSS =
+
+TEST_START IF ${RUN} == x86 || ${RUN} == x86_64 || ${DO_DEFAULT}
+MAKE_CMD = make ARCH=x86_64
+ARCH = x86_64
+CROSS =
+
+#################################
+
+# This is a bisect if needed. You need to give it a MIN_CONFIG that
+# will be the config file it uses. Basically, just copy the created defconfig
+# for the arch someplace and point MIN_CONFIG to it.
+TEST_START IF ${RUN} == bisect
+MIN_CONFIG = ${THIS_DIR}/min-config
+CROSS = s390x-linux
+ARCH = s390
+TEST_TYPE = bisect
+BISECT_TYPE = build
+BISECT_GOOD = v3.1
+BISECT_BAD = v3.2
+CHECKOUT = v3.2
+
+#################################
+
+# These defaults are needed to keep ktest.pl from complaining. They are
+# ignored because the test does not go pass the build. No install or
+# booting of the target images.
+
+DEFAULTS
+MACHINE = crosstest
+SSH_USER = root
+BUILD_TARGET = cross
+TARGET_IMAGE = image
+POWER_CYCLE = cycle
+CONSOLE = console
+LOCALVERSION = version
+GRUB_MENU = grub
+
+REBOOT_ON_ERROR = 0
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
+REBOOT_ON_SUCCESS = 0
+
diff --git a/tools/testing/ktest/examples/include/bisect.conf b/tools/testing/ktest/examples/include/bisect.conf
new file mode 100644
index 000000000..009bea65b
--- /dev/null
+++ b/tools/testing/ktest/examples/include/bisect.conf
@@ -0,0 +1,90 @@
+#
+# This example shows the bisect tests (git bisect and config bisect)
+#
+
+
+# The config that includes this file may define a RUN_TEST
+# variable that will tell this config what test to run.
+# (what to set the TEST option to).
+#
+DEFAULTS IF NOT DEFINED RUN_TEST
+# Requires that hackbench is in the PATH
+RUN_TEST := ${SSH} hackbench 50
+
+
+# Set TEST to 'bisect' to do a normal git bisect. You need
+# to modify the options below to make it bisect the exact
+# commits you are interested in.
+#
+TEST_START IF ${TEST} == bisect
+TEST_TYPE = bisect
+# You must set the commit that was considered good (git bisect good)
+BISECT_GOOD = v3.3
+# You must set the commit that was considered bad (git bisect bad)
+BISECT_BAD = HEAD
+# It's best to specify the branch to checkout before starting the bisect.
+CHECKOUT = origin/master
+# This can be build, boot, or test. Here we are doing a bisect
+# that requires to run a test to know if the bisect was good or bad.
+# The test should exit with 0 on good, non-zero for bad. But see
+# the BISECT_RET_* options in samples.conf to override this.
+BISECT_TYPE = test
+TEST = ${RUN_TEST}
+# It is usually a good idea to confirm that the GOOD and the BAD
+# commits are truly good and bad respectively. Having BISECT_CHECK
+# set to 1 will check both that the good commit works and the bad
+# commit fails. If you only want to check one or the other,
+# set BISECT_CHECK to 'good' or to 'bad'.
+BISECT_CHECK = 1
+#BISECT_CHECK = good
+#BISECT_CHECK = bad
+
+# Usually it's a good idea to specify the exact config you
+# want to use throughout the entire bisect. Here we placed
+# it in the directory we called ktest.pl from and named it
+# 'config-bisect'.
+MIN_CONFIG = ${THIS_DIR}/config-bisect
+# By default, if we are doing a BISECT_TYPE = test run but the
+# build or boot fails, ktest.pl will do a 'git bisect skip'.
+# Uncomment the below option to make ktest stop testing on such
+# an error.
+#BISECT_SKIP = 0
+# Now if you had BISECT_SKIP = 0 and the test fails, you can
+# examine what happened and then do 'git bisect log > /tmp/replay'
+# Set BISECT_REPLAY to /tmp/replay and ktest.pl will run the
+# 'git bisect replay /tmp/replay' before continuing the bisect test.
+#BISECT_REPLAY = /tmp/replay
+# If you used BISECT_REPLAY after the bisect test failed, you may
+# not want to continue the bisect on that commit that failed.
+# By setting BISECT_START to a new commit. ktest.pl will checkout
+# that commit after it has performed the 'git bisect replay' but
+# before it continues running the bisect test.
+#BISECT_START = 2545eb6198e7e1ec50daa0cfc64a4cdfecf24ec9
+
+# Now if you don't trust ktest.pl to make the decisions for you, then
+# set BISECT_MANUAL to 1. This will cause ktest.pl not to decide
+# if the commit was good or bad. Instead, it will ask you to tell
+# it if the current commit was good. In the mean time, you could
+# take the result, load it on any machine you want. Run several tests,
+# or whatever you feel like. Then, when you are happy, you can tell
+# ktest if you think it was good or not and ktest.pl will continue
+# the git bisect. You can even change what commit it is currently at.
+#BISECT_MANUAL = 1
+
+
+# One of the unique tests that ktest does is the config bisect.
+# Currently (which hopefully will be fixed soon), the bad config
+# must be a superset of the good config. This is because it only
+# searches for a config that causes the target to fail. If the
+# good config is not a subset of the bad config, or if the target
+# fails because of a lack of a config, then it will not find
+# the config for you.
+TEST_START IF ${TEST} == config-bisect
+TEST_TYPE = config_bisect
+# set to build, boot, test
+CONFIG_BISECT_TYPE = boot
+# Set the config that is considered bad.
+CONFIG_BISECT = ${THIS_DIR}/config-bad
+# This config is optional. By default it uses the
+# MIN_CONFIG as the good config.
+CONFIG_BISECT_GOOD = ${THIS_DIR}/config-good
diff --git a/tools/testing/ktest/examples/include/defaults.conf b/tools/testing/ktest/examples/include/defaults.conf
new file mode 100644
index 000000000..63a1a83f4
--- /dev/null
+++ b/tools/testing/ktest/examples/include/defaults.conf
@@ -0,0 +1,157 @@
+# This file holds defaults for most the tests. It defines the options that
+# are most common to tests that are likely to be shared.
+#
+# Note, after including this file, a config file may override any option
+# with a DEFAULTS OVERRIDE section.
+#
+
+# For those cases that use the same machine to boot a 64 bit
+# and a 32 bit version. The MACHINE is the DNS name to get to the
+# box (usually different if it was 64 bit or 32 bit) but the
+# BOX here is defined as a variable that will be the name of the box
+# itself. It is useful for calling scripts that will power cycle
+# the box, as only one script needs to be created to power cycle
+# even though the box itself has multiple operating systems on it.
+# By default, BOX and MACHINE are the same.
+
+DEFAULTS IF NOT DEFINED BOX
+BOX := ${MACHINE}
+
+
+# Consider each box as 64 bit box, unless the config including this file
+# has defined BITS = 32
+
+DEFAULTS IF NOT DEFINED BITS
+BITS := 64
+
+
+DEFAULTS
+
+# THIS_DIR is used through out the configs and defaults to ${PWD} which
+# is the directory that ktest.pl was called from.
+
+THIS_DIR := ${PWD}
+
+
+# to organize your configs, having each machine save their configs
+# into a separate directly is useful.
+CONFIG_DIR := ${THIS_DIR}/configs/${MACHINE}
+
+# Reset the log before running each test.
+CLEAR_LOG = 1
+
+# As installing kernels usually requires root privilege, default the
+# user on the target as root. It is also required that the target
+# allows ssh to root from the host without asking for a password.
+
+SSH_USER = root
+
+# For accesing the machine, we will ssh to root@machine.
+SSH := ssh ${SSH_USER}@${MACHINE}
+
+# Update this. The default here is ktest will ssh to the target box
+# and run a script called 'run-test' located on that box.
+TEST = ${SSH} run-test
+
+# Point build dir to the git repo you use
+BUILD_DIR = ${THIS_DIR}/linux.git
+
+# Each machine will have its own output build directory.
+OUTPUT_DIR = ${THIS_DIR}/build/${MACHINE}
+
+# Yes this config is focused on x86 (but ktest works for other archs too)
+BUILD_TARGET = arch/x86/boot/bzImage
+TARGET_IMAGE = /boot/vmlinuz-test
+
+# have directory for the scripts to reboot and power cycle the boxes
+SCRIPTS_DIR := ${THIS_DIR}/scripts
+
+# You can have each box/machine have a script to power cycle it.
+# Name your script <box>-cycle.
+POWER_CYCLE = ${SCRIPTS_DIR}/${BOX}-cycle
+
+# This script is used to power off the box.
+POWER_OFF = ${SCRIPTS_DIR}/${BOX}-poweroff
+
+# Keep your test kernels separate from your other kernels.
+LOCALVERSION = -test
+
+# The /boot/grub/menu.lst is searched for the line:
+# title Test Kernel
+# and ktest will use that kernel to reboot into.
+# For grub2 or other boot loaders, you need to set BOOT_TYPE
+# to 'script' and define other ways to load the kernel.
+# See snowball.conf example.
+#
+GRUB_MENU = Test Kernel
+
+# The kernel build will use this option.
+BUILD_OPTIONS = -j8
+
+# Keeping the log file with the output dir is convenient.
+LOG_FILE = ${OUTPUT_DIR}/${MACHINE}.log
+
+# Each box should have their own minum configuration
+# See min-config.conf
+MIN_CONFIG = ${CONFIG_DIR}/config-min
+
+# For things like randconfigs, there may be configs you find that
+# are already broken, or there may be some configs that you always
+# want set. Uncomment ADD_CONFIG and point it to the make config files
+# that set the configs you want to keep on (or off) in your build.
+# ADD_CONFIG is usually something to add configs to all machines,
+# where as, MIN_CONFIG is specific per machine.
+#ADD_CONFIG = ${THIS_DIR}/config-broken ${THIS_DIR}/config-general
+
+# To speed up reboots for bisects and patchcheck, instead of
+# waiting 60 seconds for the console to be idle, if this line is
+# seen in the console output, ktest will know the good kernel has
+# finished rebooting and it will be able to continue the tests.
+REBOOT_SUCCESS_LINE = ${MACHINE} login:
+
+# The following is different ways to end the test.
+# by setting the variable REBOOT to: none, error, fail or
+# something else, ktest will power cycle or reboot the target box
+# at the end of the tests.
+#
+# REBOOT := none
+# Don't do anything at the end of the test.
+#
+# REBOOT := error
+# Reboot the box if ktest detects an error
+#
+# REBOOT := fail
+# Do not stop on failure, and after all tests are complete
+# power off the box (for both success and error)
+# This is good to run over a weekend and you don't want to waste
+# electricity.
+#
+
+DEFAULTS IF ${REBOOT} == none
+REBOOT_ON_SUCCESS = 0
+REBOOT_ON_ERROR = 0
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
+
+DEFAULTS ELSE IF ${REBOOT} == error
+REBOOT_ON_SUCCESS = 0
+REBOOT_ON_ERROR = 1
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
+
+DEFAULTS ELSE IF ${REBOOT} == fail
+REBOOT_ON_SUCCESS = 0
+POWEROFF_ON_ERROR = 1
+POWEROFF_ON_SUCCESS = 1
+POWEROFF_AFTER_HALT = 120
+DIE_ON_FAILURE = 0
+
+# Store the failure information into this directory
+# such as the .config, dmesg, and build log.
+STORE_FAILURES = ${THIS_DIR}/failures
+
+DEFAULTS ELSE
+REBOOT_ON_SUCCESS = 1
+REBOOT_ON_ERROR = 1
+POWEROFF_ON_ERROR = 0
+POWEROFF_ON_SUCCESS = 0
diff --git a/tools/testing/ktest/examples/include/min-config.conf b/tools/testing/ktest/examples/include/min-config.conf
new file mode 100644
index 000000000..c703cc46d
--- /dev/null
+++ b/tools/testing/ktest/examples/include/min-config.conf
@@ -0,0 +1,60 @@
+#
+# This file has some examples for creating a MIN_CONFIG.
+# (A .config file that is the minimum for a machine to boot, or
+# to boot and make a network connection.)
+#
+# A MIN_CONFIG is very useful as it is the minimum configuration
+# needed to boot a given machine. You can debug someone else's
+# .config by only setting the configs in your MIN_CONFIG. The closer
+# your MIN_CONFIG is to the true minimum set of configs needed to
+# boot your machine, the closer the config you test with will be
+# to the users config that had the failure.
+#
+# The make_min_config test allows you to create a MIN_CONFIG that
+# is truly the minimum set of configs needed to boot a box.
+#
+# In this example, the final config will reside in
+# ${CONFIG_DIR}/config-new-min and ${CONFIG_DIR}/config-new-min-net.
+# Just move one to the location you have set for MIN_CONFIG.
+#
+# The first test creates a MIN_CONFIG that will be the minimum
+# configuration to boot ${MACHINE} and be able to ssh to it.
+#
+# The second test creates a MIN_CONFIG that will only boot
+# the target and most likely will not let you ssh to it. (Notice
+# how the second test uses the first test's result to continue with.
+# This is because the second test config is a subset of the first).
+#
+# The ${CONFIG_DIR}/config-skip (and -net) will hold the configs
+# that ktest.pl found would not boot the target without them set.
+# The config-new-min holds configs that ktest.pl could not test
+# directly because another config that was needed to boot the box
+# selected them. Sometimes it is possible that this file will hold
+# the true minimum configuration. You can test to see if this is
+# the case by running the boot test with BOOT_TYPE = allnoconfig and
+# setting setting the MIN_CONFIG to ${CONFIG_DIR}/config-skip. If the
+# machine still boots, then you can use the config-skip as your MIN_CONFIG.
+#
+# These tests can run for several hours (and perhaps days).
+# It's OK to kill the test with a Ctrl^C. By restarting without
+# modifying this config, ktest.pl will notice that the config-new-min(-net)
+# exists, and will use that instead as the starting point.
+# The USE_OUTPUT_MIN_CONFIG is set to 1 to keep ktest.pl from asking
+# you if you want to use the OUTPUT_MIN_CONFIG as the starting point.
+# By using the OUTPUT_MIN_CONFIG as the starting point will allow ktest.pl to
+# start almost where it left off.
+#
+TEST_START IF ${TEST} == min-config
+TEST_TYPE = make_min_config
+OUTPUT_MIN_CONFIG = ${CONFIG_DIR}/config-new-min-net
+IGNORE_CONFIG = ${CONFIG_DIR}/config-skip-net
+MIN_CONFIG_TYPE = test
+TEST = ${SSH} echo hi
+USE_OUTPUT_MIN_CONFIG = 1
+
+TEST_START IF ${TEST} == min-config && ${MULTI}
+TEST_TYPE = make_min_config
+OUTPUT_MIN_CONFIG = ${CONFIG_DIR}/config-new-min
+IGNORE_CONFIG = ${CONFIG_DIR}/config-skip
+MIN_CONFIG = ${CONFIG_DIR}/config-new-min-net
+USE_OUTPUT_MIN_CONFIG = 1
diff --git a/tools/testing/ktest/examples/include/patchcheck.conf b/tools/testing/ktest/examples/include/patchcheck.conf
new file mode 100644
index 000000000..0eb0a5ac7
--- /dev/null
+++ b/tools/testing/ktest/examples/include/patchcheck.conf
@@ -0,0 +1,111 @@
+# patchcheck.conf
+#
+# This contains a test that takes two git commits and will test each
+# commit between the two. The build test will look at what files the
+# commit has touched, and if any of those files produce a warning, then
+# the build will fail.
+
+
+# PATCH_START is the commit to begin with and PATCH_END is the commit
+# to end with (inclusive). This is similar to doing a git rebase -i PATCH_START~1
+# and then testing each commit and doing a git rebase --continue.
+# You can use a SHA1, a git tag, or anything that git will accept for a checkout
+
+PATCH_START := HEAD~3
+PATCH_END := HEAD
+
+# Use the oldconfig if build_type wasn't defined
+DEFAULTS IF NOT DEFINED BUILD_TYPE
+DO_BUILD_TYPE := oldconfig
+
+DEFAULTS ELSE
+DO_BUILD_TYPE := ${BUILD_TYPE}
+
+DEFAULTS
+
+
+# Change PATCH_CHECKOUT to be the branch you want to test. The test will
+# do a git checkout of this branch before starting. Obviously both
+# PATCH_START and PATCH_END must be in this branch (and PATCH_START must
+# be contained by PATCH_END).
+
+PATCH_CHECKOUT := test/branch
+
+# Usually it's a good idea to have a set config to use for testing individual
+# patches.
+PATCH_CONFIG := ${CONFIG_DIR}/config-patchcheck
+
+# Change PATCH_TEST to run some test for each patch. Each commit that is
+# tested, after it is built and installed on the test machine, this command
+# will be executed. Usually what is done is to ssh to the target box and
+# run some test scripts. If you just want to boot test your patches
+# comment PATCH_TEST out.
+PATCH_TEST := ${SSH} "/usr/local/bin/ktest-test-script"
+
+DEFAULTS IF DEFINED PATCH_TEST
+PATCH_TEST_TYPE := test
+
+DEFAULTS ELSE
+PATCH_TEST_TYPE := boot
+
+# If for some reason a file has a warning that one of your patches touch
+# but you do not care about it, set IGNORE_WARNINGS to that commit(s)
+# (space delimited)
+#IGNORE_WARNINGS = 39eaf7ef884dcc44f7ff1bac803ca2a1dcf43544 6edb2a8a385f0cdef51dae37ff23e74d76d8a6ce
+
+# Instead of just checking for warnings to files that are changed
+# it can be advantageous to check for any new warnings. If a
+# header file is changed, it could cause a warning in a file not
+# touched by the commit. To detect these kinds of warnings, you
+# can use the WARNINGS_FILE option.
+#
+# If the variable CREATE_WARNINGS_FILE is set, this config will
+# enable the WARNINGS_FILE during the patchcheck test. Also,
+# before running the patchcheck test, it will create the
+# warnings file.
+#
+DEFAULTS IF DEFINED CREATE_WARNINGS_FILE
+WARNINGS_FILE = ${OUTPUT_DIR}/warnings_file
+
+TEST_START IF DEFINED CREATE_WARNINGS_FILE
+# WARNINGS_FILE is already set by the DEFAULTS above
+TEST_TYPE = make_warnings_file
+# Checkout the commit before the patches to test,
+# and record all the warnings that exist before the patches
+# to test are added
+CHECKOUT = ${PATCHCHECK_START}~1
+# Force a full build
+BUILD_NOCLEAN = 0
+BUILD_TYPE = ${DO_BUILD_TYPE}
+
+# If you are running a multi test, and the test failed on the first
+# test but on, say the 5th patch. If you want to restart on the
+# fifth patch, set PATCH_START1. This will make the first test start
+# from this commit instead of the PATCH_START commit.
+# Note, do not change this option. Just define PATCH_START1 in the
+# top config (the one you pass to ktest.pl), and this will use it,
+# otherwise it will just use PATCH_START if PATCH_START1 is not defined.
+DEFAULTS IF NOT DEFINED PATCH_START1
+PATCH_START1 := ${PATCH_START}
+
+TEST_START IF ${TEST} == patchcheck
+TEST_TYPE = patchcheck
+MIN_CONFIG = ${PATCH_CONFIG}
+TEST = ${PATCH_TEST}
+PATCHCHECK_TYPE = ${PATCH_TEST_TYPE}
+PATCHCHECK_START = ${PATCH_START1}
+PATCHCHECK_END = ${PATCH_END}
+CHECKOUT = ${PATCH_CHECKOUT}
+BUILD_TYPE = ${DO_BUILD_TYPE}
+
+TEST_START IF ${TEST} == patchcheck && ${MULTI}
+TEST_TYPE = patchcheck
+MIN_CONFIG = ${PATCH_CONFIG}
+TEST = ${PATCH_TEST}
+PATCHCHECK_TYPE = ${PATCH_TEST_TYPE}
+PATCHCHECK_START = ${PATCH_START}
+PATCHCHECK_END = ${PATCH_END}
+CHECKOUT = ${PATCH_CHECKOUT}
+# Use multi to test different compilers?
+MAKE_CMD = CC=gcc-4.5.1 make
+BUILD_TYPE = ${DO_BUILD_TYPE}
diff --git a/tools/testing/ktest/examples/include/tests.conf b/tools/testing/ktest/examples/include/tests.conf
new file mode 100644
index 000000000..60cedb1a1
--- /dev/null
+++ b/tools/testing/ktest/examples/include/tests.conf
@@ -0,0 +1,74 @@
+#
+# This is an example of various tests that you can run
+#
+# The variable TEST can be of boot, build, randconfig, or test.
+#
+# Note that TEST is a variable created with ':=' and only exists
+# throughout the config processing (not during the tests itself).
+#
+# The TEST option (defined with '=') is used to tell ktest.pl
+# what test to run after a successful boot. The TEST option is
+# persistent into the test runs.
+#
+
+# The config that includes this file may define a BOOT_TYPE
+# variable that tells this config what type of boot test to run.
+# If it's not defined, the below DEFAULTS will set the default
+# to 'oldconfig'.
+#
+DEFAULTS IF NOT DEFINED BOOT_TYPE
+BOOT_TYPE := oldconfig
+
+# The config that includes this file may define a RUN_TEST
+# variable that will tell this config what test to run.
+# (what to set the TEST option to).
+#
+DEFAULTS IF NOT DEFINED RUN_TEST
+# Requires that hackbench is in the PATH
+RUN_TEST := ${SSH} hackbench 50
+
+
+# If TEST is set to 'boot' then just build a kernel and boot
+# the target.
+TEST_START IF ${TEST} == boot
+TEST_TYPE = boot
+# Notice how we set the BUILD_TYPE option to the BOOT_TYPE variable.
+BUILD_TYPE = ${BOOT_TYPE}
+# Do not do a make mrproper.
+BUILD_NOCLEAN = 1
+
+# If you only want to build the kernel, and perhaps install
+# and test it yourself, then just set TEST to build.
+TEST_START IF ${TEST} == build
+TEST_TYPE = build
+BUILD_TYPE = ${BOOT_TYPE}
+BUILD_NOCLEAN = 1
+
+# Build, install, boot and test with a randconfg 10 times.
+# It is important that you have set MIN_CONFIG in the config
+# that includes this file otherwise it is likely that the
+# randconfig will not have the necessary configs needed to
+# boot your box. This version of the test requires a min
+# config that has enough to make sure the target has network
+# working.
+TEST_START ITERATE 10 IF ${TEST} == randconfig
+MIN_CONFIG = ${CONFIG_DIR}/config-min-net
+TEST_TYPE = test
+BUILD_TYPE = randconfig
+TEST = ${RUN_TEST}
+
+# This is the same as above, but only tests to a boot prompt.
+# The MIN_CONFIG used here does not need to have networking
+# working.
+TEST_START ITERATE 10 IF ${TEST} == randconfig && ${MULTI}
+TEST_TYPE = boot
+BUILD_TYPE = randconfig
+MIN_CONFIG = ${CONFIG_DIR}/config-min
+MAKE_CMD = make
+
+# This builds, installs, boots and tests the target.
+TEST_START IF ${TEST} == test
+TEST_TYPE = test
+BUILD_TYPE = ${BOOT_TYPE}
+TEST = ${RUN_TEST}
+BUILD_NOCLEAN = 1
diff --git a/tools/testing/ktest/examples/kvm.conf b/tools/testing/ktest/examples/kvm.conf
new file mode 100644
index 000000000..fbc134f9a
--- /dev/null
+++ b/tools/testing/ktest/examples/kvm.conf
@@ -0,0 +1,92 @@
+#
+# This config is an example usage of ktest.pl with a kvm guest
+#
+# The guest is called 'Guest' and this would be something that
+# could be run on the host to test a virtual machine target.
+
+MACHINE = Guest
+
+
+# Use virsh to read the serial console of the guest
+CONSOLE = virsh console ${MACHINE}
+
+# Use SIGKILL to terminate virsh console. We can't kill virsh console
+# by the default signal, SIGINT.
+CLOSE_CONSOLE_SIGNAL = KILL
+
+#*************************************#
+# This part is the same as test.conf #
+#*************************************#
+
+# The include files will set up the type of test to run. Just set TEST to
+# which test you want to run.
+#
+# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config
+#
+# See the include/*.conf files that define these tests
+#
+TEST := patchcheck
+
+# Some tests may have more than one test to run. Define MULTI := 1 to run
+# the extra tests.
+MULTI := 0
+
+# In case you want to differentiate which type of system you are testing
+BITS := 64
+
+# REBOOT = none, error, fail, empty
+# See include/defaults.conf
+REBOOT := empty
+
+
+# The defaults file will set up various settings that can be used by all
+# machine configs.
+INCLUDE include/defaults.conf
+
+
+#*************************************#
+# Now we are different from test.conf #
+#*************************************#
+
+
+# The example here assumes that Guest is running a Fedora release
+# that uses dracut for its initfs. The POST_INSTALL will be executed
+# after the install of the kernel and modules are complete.
+#
+POST_INSTALL = ${SSH} /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+
+# Guests sometimes get stuck on reboot. We wait 3 seconds after running
+# the reboot command and then do a full power-cycle of the guest.
+# This forces the guest to restart.
+#
+POWERCYCLE_AFTER_REBOOT = 3
+
+# We do the same after the halt command, but this time we wait 20 seconds.
+POWEROFF_AFTER_HALT = 20
+
+
+# As the defaults.conf file has a POWER_CYCLE option already defined,
+# and options can not be defined in the same section more than once
+# (all DEFAULTS sections are considered the same). We use the
+# DEFAULTS OVERRIDE to tell ktest.pl to ignore the previous defined
+# options, for the options set in the OVERRIDE section.
+#
+DEFAULTS OVERRIDE
+
+# Instead of using the default POWER_CYCLE option defined in
+# defaults.conf, we use virsh to cycle it. To do so, we destroy
+# the guest, wait 5 seconds, and then start it up again.
+# Crude, but effective.
+#
+POWER_CYCLE = virsh destroy ${MACHINE}; sleep 5; virsh start ${MACHINE}
+
+
+DEFAULTS
+
+# The following files each handle a different test case.
+# Having them included allows you to set up more than one machine and share
+# the same tests.
+INCLUDE include/patchcheck.conf
+INCLUDE include/tests.conf
+INCLUDE include/bisect.conf
+INCLUDE include/min-config.conf
diff --git a/tools/testing/ktest/examples/snowball.conf b/tools/testing/ktest/examples/snowball.conf
new file mode 100644
index 000000000..a82a3c5bc
--- /dev/null
+++ b/tools/testing/ktest/examples/snowball.conf
@@ -0,0 +1,53 @@
+# This example was used to boot the snowball ARM board.
+# See http://people.redhat.com/srostedt/ktest-embedded-2012/
+
+# PWD is a ktest.pl variable that will result in the process working
+# directory that ktest.pl is executed in.
+
+# THIS_DIR is automatically assigned the PWD of the path that generated
+# the config file. It is best to use this variable when assigning other
+# directory paths within this directory. This allows you to easily
+# move the test cases to other locations or to other machines.
+#
+THIS_DIR := /home/rostedt/work/demo/ktest-embed
+LOG_FILE = ${OUTPUT_DIR}/snowball.log
+CLEAR_LOG = 1
+MAKE_CMD = PATH=/usr/local/gcc-4.5.2-nolibc/arm-unknown-linux-gnueabi/bin:$PATH CROSS_COMPILE=arm-unknown-linux-gnueabi- make ARCH=arm
+ADD_CONFIG = ${THIS_DIR}/addconfig
+
+SCP_TO_TARGET = echo "don't do scp"
+
+TFTPBOOT := /var/lib/tftpboot
+TFTPDEF := ${TFTPBOOT}/snowball-default
+TFTPTEST := ${OUTPUT_DIR}/${BUILD_TARGET}
+
+SWITCH_TO_GOOD = cp ${TFTPDEF} ${TARGET_IMAGE}
+SWITCH_TO_TEST = cp ${TFTPTEST} ${TARGET_IMAGE}
+
+# Define each test with TEST_START
+# The config options below it will override the defaults
+TEST_START SKIP
+TEST_TYPE = boot
+BUILD_TYPE = u8500_defconfig
+BUILD_NOCLEAN = 1
+
+TEST_START
+TEST_TYPE = make_min_config
+OUTPUT_MIN_CONFIG = ${THIS_DIR}/config.newmin
+START_MIN_CONFIG = ${THIS_DIR}/config.orig
+IGNORE_CONFIG = ${THIS_DIR}/config.ignore
+BUILD_NOCLEAN = 1
+
+
+DEFAULTS
+LOCALVERSION = -test
+POWER_CYCLE = echo use the thumb luke; read a
+CONSOLE = cat ${THIS_DIR}/snowball-cat
+REBOOT_TYPE = script
+SSH_USER = root
+BUILD_OPTIONS = -j8 uImage
+BUILD_DIR = ${THIS_DIR}/linux.git
+OUTPUT_DIR = ${THIS_DIR}/snowball-build
+MACHINE = snowball
+TARGET_IMAGE = /var/lib/tftpboot/snowball-image
+BUILD_TARGET = arch/arm/boot/uImage
diff --git a/tools/testing/ktest/examples/test.conf b/tools/testing/ktest/examples/test.conf
new file mode 100644
index 000000000..b725210ef
--- /dev/null
+++ b/tools/testing/ktest/examples/test.conf
@@ -0,0 +1,62 @@
+#
+# Generic config for a machine
+#
+
+# Name your machine (the DNS name, what you ssh to)
+MACHINE = foo
+
+# BOX can be different than foo, if the machine BOX has
+# multiple partitions with different systems installed. For example,
+# you may have a i386 and x86_64 installation on a test box.
+# If this is the case, MACHINE defines the way to connect to the
+# machine, which may be different between which system the machine
+# is booting into. BOX is used for the scripts to reboot and power cycle
+# the machine, where it does not matter which system the machine boots into.
+#
+#BOX := bar
+
+# Define a way to read the console
+CONSOLE = stty -F /dev/ttyS0 115200 parodd; cat /dev/ttyS0
+
+# The include files will set up the type of test to run. Just set TEST to
+# which test you want to run.
+#
+# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config
+#
+# See the include/*.conf files that define these tests
+#
+TEST := patchcheck
+
+# Some tests may have more than one test to run. Define MULTI := 1 to run
+# the extra tests.
+MULTI := 0
+
+# In case you want to differentiate which type of system you are testing
+BITS := 64
+
+# REBOOT = none, error, fail, empty
+# See include/defaults.conf
+REBOOT := empty
+
+# The defaults file will set up various settings that can be used by all
+# machine configs.
+INCLUDE include/defaults.conf
+
+# In case you need to add a patch for a bisect or something
+#PRE_BUILD = patch -p1 < ${THIS_DIR}/fix.patch
+
+# Reset the repo after the build and remove all 'test' modules from the target
+# Notice that DO_POST_BUILD is a variable (defined by ':=') and POST_BUILD
+# is the option (defined by '=')
+
+DO_POST_BUILD := git reset --hard
+POST_BUILD = ${SSH} 'rm -rf /lib/modules/*-test*'; ${DO_POST_BUILD}
+
+# The following files each handle a different test case.
+# Having them included allows you to set up more than one machine and share
+# the same tests.
+INCLUDE include/patchcheck.conf
+INCLUDE include/tests.conf
+INCLUDE include/bisect.conf
+INCLUDE include/min-config.conf
+
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
new file mode 100755
index 000000000..406401f1a
--- /dev/null
+++ b/tools/testing/ktest/ktest.pl
@@ -0,0 +1,4425 @@
+#!/usr/bin/perl -w
+#
+# Copyright 2010 - Steven Rostedt <srostedt@redhat.com>, Red Hat Inc.
+# Licensed under the terms of the GNU GPL License version 2
+#
+
+use strict;
+use IPC::Open2;
+use Fcntl qw(F_GETFL F_SETFL O_NONBLOCK);
+use File::Path qw(mkpath);
+use File::Copy qw(cp);
+use FileHandle;
+use FindBin;
+
+my $VERSION = "0.2";
+
+$| = 1;
+
+my %opt;
+my %repeat_tests;
+my %repeats;
+my %evals;
+
+#default opts
+my %default = (
+ "MAILER" => "sendmail", # default mailer
+ "EMAIL_ON_ERROR" => 1,
+ "EMAIL_WHEN_FINISHED" => 1,
+ "EMAIL_WHEN_CANCELED" => 0,
+ "EMAIL_WHEN_STARTED" => 0,
+ "NUM_TESTS" => 1,
+ "TEST_TYPE" => "build",
+ "BUILD_TYPE" => "randconfig",
+ "MAKE_CMD" => "make",
+ "CLOSE_CONSOLE_SIGNAL" => "INT",
+ "TIMEOUT" => 120,
+ "TMP_DIR" => "/tmp/ktest/\${MACHINE}",
+ "SLEEP_TIME" => 60, # sleep time between tests
+ "BUILD_NOCLEAN" => 0,
+ "REBOOT_ON_ERROR" => 0,
+ "POWEROFF_ON_ERROR" => 0,
+ "REBOOT_ON_SUCCESS" => 1,
+ "POWEROFF_ON_SUCCESS" => 0,
+ "BUILD_OPTIONS" => "",
+ "BISECT_SLEEP_TIME" => 60, # sleep time between bisects
+ "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks
+ "CLEAR_LOG" => 0,
+ "BISECT_MANUAL" => 0,
+ "BISECT_SKIP" => 1,
+ "BISECT_TRIES" => 1,
+ "MIN_CONFIG_TYPE" => "boot",
+ "SUCCESS_LINE" => "login:",
+ "DETECT_TRIPLE_FAULT" => 1,
+ "NO_INSTALL" => 0,
+ "BOOTED_TIMEOUT" => 1,
+ "DIE_ON_FAILURE" => 1,
+ "SSH_EXEC" => "ssh \$SSH_USER\@\$MACHINE \$SSH_COMMAND",
+ "SCP_TO_TARGET" => "scp \$SRC_FILE \$SSH_USER\@\$MACHINE:\$DST_FILE",
+ "SCP_TO_TARGET_INSTALL" => "\${SCP_TO_TARGET}",
+ "REBOOT" => "ssh \$SSH_USER\@\$MACHINE reboot",
+ "STOP_AFTER_SUCCESS" => 10,
+ "STOP_AFTER_FAILURE" => 60,
+ "STOP_TEST_AFTER" => 600,
+ "MAX_MONITOR_WAIT" => 1800,
+ "GRUB_REBOOT" => "grub2-reboot",
+ "SYSLINUX" => "extlinux",
+ "SYSLINUX_PATH" => "/boot/extlinux",
+ "CONNECT_TIMEOUT" => 25,
+
+# required, and we will ask users if they don't have them but we keep the default
+# value something that is common.
+ "REBOOT_TYPE" => "grub",
+ "LOCALVERSION" => "-test",
+ "SSH_USER" => "root",
+ "BUILD_TARGET" => "arch/x86/boot/bzImage",
+ "TARGET_IMAGE" => "/boot/vmlinuz-test",
+
+ "LOG_FILE" => undef,
+ "IGNORE_UNUSED" => 0,
+);
+
+my $ktest_config = "ktest.conf";
+my $version;
+my $have_version = 0;
+my $machine;
+my $last_machine;
+my $ssh_user;
+my $tmpdir;
+my $builddir;
+my $outputdir;
+my $output_config;
+my $test_type;
+my $build_type;
+my $build_options;
+my $final_post_ktest;
+my $pre_ktest;
+my $post_ktest;
+my $pre_test;
+my $post_test;
+my $pre_build;
+my $post_build;
+my $pre_build_die;
+my $post_build_die;
+my $reboot_type;
+my $reboot_script;
+my $power_cycle;
+my $reboot;
+my $reboot_on_error;
+my $switch_to_good;
+my $switch_to_test;
+my $poweroff_on_error;
+my $reboot_on_success;
+my $die_on_failure;
+my $powercycle_after_reboot;
+my $poweroff_after_halt;
+my $max_monitor_wait;
+my $ssh_exec;
+my $scp_to_target;
+my $scp_to_target_install;
+my $power_off;
+my $grub_menu;
+my $last_grub_menu;
+my $grub_file;
+my $grub_number;
+my $grub_reboot;
+my $syslinux;
+my $syslinux_path;
+my $syslinux_label;
+my $target;
+my $make;
+my $pre_install;
+my $post_install;
+my $no_install;
+my $noclean;
+my $minconfig;
+my $start_minconfig;
+my $start_minconfig_defined;
+my $output_minconfig;
+my $minconfig_type;
+my $use_output_minconfig;
+my $warnings_file;
+my $ignore_config;
+my $ignore_errors;
+my $addconfig;
+my $in_bisect = 0;
+my $bisect_bad_commit = "";
+my $reverse_bisect;
+my $bisect_manual;
+my $bisect_skip;
+my $bisect_tries;
+my $config_bisect_good;
+my $bisect_ret_good;
+my $bisect_ret_bad;
+my $bisect_ret_skip;
+my $bisect_ret_abort;
+my $bisect_ret_default;
+my $in_patchcheck = 0;
+my $run_test;
+my $buildlog;
+my $testlog;
+my $dmesg;
+my $monitor_fp;
+my $monitor_pid;
+my $monitor_cnt = 0;
+my $sleep_time;
+my $bisect_sleep_time;
+my $patchcheck_sleep_time;
+my $ignore_warnings;
+my $store_failures;
+my $store_successes;
+my $test_name;
+my $timeout;
+my $connect_timeout;
+my $config_bisect_exec;
+my $booted_timeout;
+my $detect_triplefault;
+my $console;
+my $close_console_signal;
+my $reboot_success_line;
+my $success_line;
+my $stop_after_success;
+my $stop_after_failure;
+my $stop_test_after;
+my $build_target;
+my $target_image;
+my $checkout;
+my $localversion;
+my $iteration = 0;
+my $successes = 0;
+my $stty_orig;
+my $run_command_status = 0;
+
+my $bisect_good;
+my $bisect_bad;
+my $bisect_type;
+my $bisect_start;
+my $bisect_replay;
+my $bisect_files;
+my $bisect_reverse;
+my $bisect_check;
+
+my $config_bisect;
+my $config_bisect_type;
+my $config_bisect_check;
+
+my $patchcheck_type;
+my $patchcheck_start;
+my $patchcheck_cherry;
+my $patchcheck_end;
+
+my $build_time;
+my $install_time;
+my $reboot_time;
+my $test_time;
+
+my $pwd;
+my $dirname = $FindBin::Bin;
+
+my $mailto;
+my $mailer;
+my $mail_path;
+my $mail_command;
+my $email_on_error;
+my $email_when_finished;
+my $email_when_started;
+my $email_when_canceled;
+
+my $script_start_time = localtime();
+
+# set when a test is something other that just building or install
+# which would require more options.
+my $buildonly = 1;
+
+# tell build not to worry about warnings, even when WARNINGS_FILE is set
+my $warnings_ok = 0;
+
+# set when creating a new config
+my $newconfig = 0;
+
+my %entered_configs;
+my %config_help;
+my %variable;
+
+# force_config is the list of configs that we force enabled (or disabled)
+# in a .config file. The MIN_CONFIG and ADD_CONFIG configs.
+my %force_config;
+
+# do not force reboots on config problems
+my $no_reboot = 1;
+
+# reboot on success
+my $reboot_success = 0;
+
+my %option_map = (
+ "MAILTO" => \$mailto,
+ "MAILER" => \$mailer,
+ "MAIL_PATH" => \$mail_path,
+ "MAIL_COMMAND" => \$mail_command,
+ "EMAIL_ON_ERROR" => \$email_on_error,
+ "EMAIL_WHEN_FINISHED" => \$email_when_finished,
+ "EMAIL_WHEN_STARTED" => \$email_when_started,
+ "EMAIL_WHEN_CANCELED" => \$email_when_canceled,
+ "MACHINE" => \$machine,
+ "SSH_USER" => \$ssh_user,
+ "TMP_DIR" => \$tmpdir,
+ "OUTPUT_DIR" => \$outputdir,
+ "BUILD_DIR" => \$builddir,
+ "TEST_TYPE" => \$test_type,
+ "PRE_KTEST" => \$pre_ktest,
+ "POST_KTEST" => \$post_ktest,
+ "PRE_TEST" => \$pre_test,
+ "POST_TEST" => \$post_test,
+ "BUILD_TYPE" => \$build_type,
+ "BUILD_OPTIONS" => \$build_options,
+ "PRE_BUILD" => \$pre_build,
+ "POST_BUILD" => \$post_build,
+ "PRE_BUILD_DIE" => \$pre_build_die,
+ "POST_BUILD_DIE" => \$post_build_die,
+ "POWER_CYCLE" => \$power_cycle,
+ "REBOOT" => \$reboot,
+ "BUILD_NOCLEAN" => \$noclean,
+ "MIN_CONFIG" => \$minconfig,
+ "OUTPUT_MIN_CONFIG" => \$output_minconfig,
+ "START_MIN_CONFIG" => \$start_minconfig,
+ "MIN_CONFIG_TYPE" => \$minconfig_type,
+ "USE_OUTPUT_MIN_CONFIG" => \$use_output_minconfig,
+ "WARNINGS_FILE" => \$warnings_file,
+ "IGNORE_CONFIG" => \$ignore_config,
+ "TEST" => \$run_test,
+ "ADD_CONFIG" => \$addconfig,
+ "REBOOT_TYPE" => \$reboot_type,
+ "GRUB_MENU" => \$grub_menu,
+ "GRUB_FILE" => \$grub_file,
+ "GRUB_REBOOT" => \$grub_reboot,
+ "SYSLINUX" => \$syslinux,
+ "SYSLINUX_PATH" => \$syslinux_path,
+ "SYSLINUX_LABEL" => \$syslinux_label,
+ "PRE_INSTALL" => \$pre_install,
+ "POST_INSTALL" => \$post_install,
+ "NO_INSTALL" => \$no_install,
+ "REBOOT_SCRIPT" => \$reboot_script,
+ "REBOOT_ON_ERROR" => \$reboot_on_error,
+ "SWITCH_TO_GOOD" => \$switch_to_good,
+ "SWITCH_TO_TEST" => \$switch_to_test,
+ "POWEROFF_ON_ERROR" => \$poweroff_on_error,
+ "REBOOT_ON_SUCCESS" => \$reboot_on_success,
+ "DIE_ON_FAILURE" => \$die_on_failure,
+ "POWER_OFF" => \$power_off,
+ "POWERCYCLE_AFTER_REBOOT" => \$powercycle_after_reboot,
+ "POWEROFF_AFTER_HALT" => \$poweroff_after_halt,
+ "MAX_MONITOR_WAIT" => \$max_monitor_wait,
+ "SLEEP_TIME" => \$sleep_time,
+ "BISECT_SLEEP_TIME" => \$bisect_sleep_time,
+ "PATCHCHECK_SLEEP_TIME" => \$patchcheck_sleep_time,
+ "IGNORE_WARNINGS" => \$ignore_warnings,
+ "IGNORE_ERRORS" => \$ignore_errors,
+ "BISECT_MANUAL" => \$bisect_manual,
+ "BISECT_SKIP" => \$bisect_skip,
+ "BISECT_TRIES" => \$bisect_tries,
+ "CONFIG_BISECT_GOOD" => \$config_bisect_good,
+ "BISECT_RET_GOOD" => \$bisect_ret_good,
+ "BISECT_RET_BAD" => \$bisect_ret_bad,
+ "BISECT_RET_SKIP" => \$bisect_ret_skip,
+ "BISECT_RET_ABORT" => \$bisect_ret_abort,
+ "BISECT_RET_DEFAULT" => \$bisect_ret_default,
+ "STORE_FAILURES" => \$store_failures,
+ "STORE_SUCCESSES" => \$store_successes,
+ "TEST_NAME" => \$test_name,
+ "TIMEOUT" => \$timeout,
+ "CONNECT_TIMEOUT" => \$connect_timeout,
+ "CONFIG_BISECT_EXEC" => \$config_bisect_exec,
+ "BOOTED_TIMEOUT" => \$booted_timeout,
+ "CONSOLE" => \$console,
+ "CLOSE_CONSOLE_SIGNAL" => \$close_console_signal,
+ "DETECT_TRIPLE_FAULT" => \$detect_triplefault,
+ "SUCCESS_LINE" => \$success_line,
+ "REBOOT_SUCCESS_LINE" => \$reboot_success_line,
+ "STOP_AFTER_SUCCESS" => \$stop_after_success,
+ "STOP_AFTER_FAILURE" => \$stop_after_failure,
+ "STOP_TEST_AFTER" => \$stop_test_after,
+ "BUILD_TARGET" => \$build_target,
+ "SSH_EXEC" => \$ssh_exec,
+ "SCP_TO_TARGET" => \$scp_to_target,
+ "SCP_TO_TARGET_INSTALL" => \$scp_to_target_install,
+ "CHECKOUT" => \$checkout,
+ "TARGET_IMAGE" => \$target_image,
+ "LOCALVERSION" => \$localversion,
+
+ "BISECT_GOOD" => \$bisect_good,
+ "BISECT_BAD" => \$bisect_bad,
+ "BISECT_TYPE" => \$bisect_type,
+ "BISECT_START" => \$bisect_start,
+ "BISECT_REPLAY" => \$bisect_replay,
+ "BISECT_FILES" => \$bisect_files,
+ "BISECT_REVERSE" => \$bisect_reverse,
+ "BISECT_CHECK" => \$bisect_check,
+
+ "CONFIG_BISECT" => \$config_bisect,
+ "CONFIG_BISECT_TYPE" => \$config_bisect_type,
+ "CONFIG_BISECT_CHECK" => \$config_bisect_check,
+
+ "PATCHCHECK_TYPE" => \$patchcheck_type,
+ "PATCHCHECK_START" => \$patchcheck_start,
+ "PATCHCHECK_CHERRY" => \$patchcheck_cherry,
+ "PATCHCHECK_END" => \$patchcheck_end,
+);
+
+# Options may be used by other options, record them.
+my %used_options;
+
+# default variables that can be used
+chomp ($variable{"PWD"} = `pwd`);
+$pwd = $variable{"PWD"};
+
+$config_help{"MACHINE"} = << "EOF"
+ The machine hostname that you will test.
+ For build only tests, it is still needed to differentiate log files.
+EOF
+ ;
+$config_help{"SSH_USER"} = << "EOF"
+ The box is expected to have ssh on normal bootup, provide the user
+ (most likely root, since you need privileged operations)
+EOF
+ ;
+$config_help{"BUILD_DIR"} = << "EOF"
+ The directory that contains the Linux source code (full path).
+ You can use \${PWD} that will be the path where ktest.pl is run, or use
+ \${THIS_DIR} which is assigned \${PWD} but may be changed later.
+EOF
+ ;
+$config_help{"OUTPUT_DIR"} = << "EOF"
+ The directory that the objects will be built (full path).
+ (can not be same as BUILD_DIR)
+ You can use \${PWD} that will be the path where ktest.pl is run, or use
+ \${THIS_DIR} which is assigned \${PWD} but may be changed later.
+EOF
+ ;
+$config_help{"BUILD_TARGET"} = << "EOF"
+ The location of the compiled file to copy to the target.
+ (relative to OUTPUT_DIR)
+EOF
+ ;
+$config_help{"BUILD_OPTIONS"} = << "EOF"
+ Options to add to \"make\" when building.
+ i.e. -j20
+EOF
+ ;
+$config_help{"TARGET_IMAGE"} = << "EOF"
+ The place to put your image on the test machine.
+EOF
+ ;
+$config_help{"POWER_CYCLE"} = << "EOF"
+ A script or command to reboot the box.
+
+ Here is a digital loggers power switch example
+ POWER_CYCLE = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin\@power/outlet?5=CCL'
+
+ Here is an example to reboot a virtual box on the current host
+ with the name "Guest".
+ POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest
+EOF
+ ;
+$config_help{"CONSOLE"} = << "EOF"
+ The script or command that reads the console
+
+ If you use ttywatch server, something like the following would work.
+CONSOLE = nc -d localhost 3001
+
+ For a virtual machine with guest name "Guest".
+CONSOLE = virsh console Guest
+EOF
+ ;
+$config_help{"LOCALVERSION"} = << "EOF"
+ Required version ending to differentiate the test
+ from other linux builds on the system.
+EOF
+ ;
+$config_help{"REBOOT_TYPE"} = << "EOF"
+ Way to reboot the box to the test kernel.
+ Only valid options so far are "grub", "grub2", "syslinux", and "script".
+
+ If you specify grub, it will assume grub version 1
+ and will search in /boot/grub/menu.lst for the title \$GRUB_MENU
+ and select that target to reboot to the kernel. If this is not
+ your setup, then specify "script" and have a command or script
+ specified in REBOOT_SCRIPT to boot to the target.
+
+ The entry in /boot/grub/menu.lst must be entered in manually.
+ The test will not modify that file.
+
+ If you specify grub2, then you also need to specify both \$GRUB_MENU
+ and \$GRUB_FILE.
+
+ If you specify syslinux, then you may use SYSLINUX to define the syslinux
+ command (defaults to extlinux), and SYSLINUX_PATH to specify the path to
+ the syslinux install (defaults to /boot/extlinux). But you have to specify
+ SYSLINUX_LABEL to define the label to boot to for the test kernel.
+EOF
+ ;
+$config_help{"GRUB_MENU"} = << "EOF"
+ The grub title name for the test kernel to boot
+ (Only mandatory if REBOOT_TYPE = grub or grub2)
+
+ Note, ktest.pl will not update the grub menu.lst, you need to
+ manually add an option for the test. ktest.pl will search
+ the grub menu.lst for this option to find what kernel to
+ reboot into.
+
+ For example, if in the /boot/grub/menu.lst the test kernel title has:
+ title Test Kernel
+ kernel vmlinuz-test
+ GRUB_MENU = Test Kernel
+
+ For grub2, a search of \$GRUB_FILE is performed for the lines
+ that begin with "menuentry". It will not detect submenus. The
+ menu must be a non-nested menu. Add the quotes used in the menu
+ to guarantee your selection, as the first menuentry with the content
+ of \$GRUB_MENU that is found will be used.
+EOF
+ ;
+$config_help{"GRUB_FILE"} = << "EOF"
+ If grub2 is used, the full path for the grub.cfg file is placed
+ here. Use something like /boot/grub2/grub.cfg to search.
+EOF
+ ;
+$config_help{"SYSLINUX_LABEL"} = << "EOF"
+ If syslinux is used, the label that boots the target kernel must
+ be specified with SYSLINUX_LABEL.
+EOF
+ ;
+$config_help{"REBOOT_SCRIPT"} = << "EOF"
+ A script to reboot the target into the test kernel
+ (Only mandatory if REBOOT_TYPE = script)
+EOF
+ ;
+
+sub _logit {
+ if (defined($opt{"LOG_FILE"})) {
+ open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
+ print OUT @_;
+ close(OUT);
+ }
+}
+
+sub logit {
+ if (defined($opt{"LOG_FILE"})) {
+ _logit @_;
+ } else {
+ print @_;
+ }
+}
+
+sub doprint {
+ print @_;
+ _logit @_;
+}
+
+sub read_prompt {
+ my ($cancel, $prompt) = @_;
+
+ my $ans;
+
+ for (;;) {
+ if ($cancel) {
+ print "$prompt [y/n/C] ";
+ } else {
+ print "$prompt [Y/n] ";
+ }
+ $ans = <STDIN>;
+ chomp $ans;
+ if ($ans =~ /^\s*$/) {
+ if ($cancel) {
+ $ans = "c";
+ } else {
+ $ans = "y";
+ }
+ }
+ last if ($ans =~ /^y$/i || $ans =~ /^n$/i);
+ if ($cancel) {
+ last if ($ans =~ /^c$/i);
+ print "Please answer either 'y', 'n' or 'c'.\n";
+ } else {
+ print "Please answer either 'y' or 'n'.\n";
+ }
+ }
+ if ($ans =~ /^c/i) {
+ exit;
+ }
+ if ($ans !~ /^y$/i) {
+ return 0;
+ }
+ return 1;
+}
+
+sub read_yn {
+ my ($prompt) = @_;
+
+ return read_prompt 0, $prompt;
+}
+
+sub read_ync {
+ my ($prompt) = @_;
+
+ return read_prompt 1, $prompt;
+}
+
+sub get_mandatory_config {
+ my ($config) = @_;
+ my $ans;
+
+ return if (defined($opt{$config}));
+
+ if (defined($config_help{$config})) {
+ print "\n";
+ print $config_help{$config};
+ }
+
+ for (;;) {
+ print "$config = ";
+ if (defined($default{$config}) && length($default{$config})) {
+ print "\[$default{$config}\] ";
+ }
+ $ans = <STDIN>;
+ $ans =~ s/^\s*(.*\S)\s*$/$1/;
+ if ($ans =~ /^\s*$/) {
+ if ($default{$config}) {
+ $ans = $default{$config};
+ } else {
+ print "Your answer can not be blank\n";
+ next;
+ }
+ }
+ $entered_configs{$config} = ${ans};
+ last;
+ }
+}
+
+sub show_time {
+ my ($time) = @_;
+
+ my $hours = 0;
+ my $minutes = 0;
+
+ if ($time > 3600) {
+ $hours = int($time / 3600);
+ $time -= $hours * 3600;
+ }
+ if ($time > 60) {
+ $minutes = int($time / 60);
+ $time -= $minutes * 60;
+ }
+
+ if ($hours > 0) {
+ doprint "$hours hour";
+ doprint "s" if ($hours > 1);
+ doprint " ";
+ }
+
+ if ($minutes > 0) {
+ doprint "$minutes minute";
+ doprint "s" if ($minutes > 1);
+ doprint " ";
+ }
+
+ doprint "$time second";
+ doprint "s" if ($time != 1);
+}
+
+sub print_times {
+ doprint "\n";
+ if ($build_time) {
+ doprint "Build time: ";
+ show_time($build_time);
+ doprint "\n";
+ }
+ if ($install_time) {
+ doprint "Install time: ";
+ show_time($install_time);
+ doprint "\n";
+ }
+ if ($reboot_time) {
+ doprint "Reboot time: ";
+ show_time($reboot_time);
+ doprint "\n";
+ }
+ if ($test_time) {
+ doprint "Test time: ";
+ show_time($test_time);
+ doprint "\n";
+ }
+ # reset for iterations like bisect
+ $build_time = 0;
+ $install_time = 0;
+ $reboot_time = 0;
+ $test_time = 0;
+}
+
+sub get_mandatory_configs {
+ get_mandatory_config("MACHINE");
+ get_mandatory_config("BUILD_DIR");
+ get_mandatory_config("OUTPUT_DIR");
+
+ if ($newconfig) {
+ get_mandatory_config("BUILD_OPTIONS");
+ }
+
+ # options required for other than just building a kernel
+ if (!$buildonly) {
+ get_mandatory_config("POWER_CYCLE");
+ get_mandatory_config("CONSOLE");
+ }
+
+ # options required for install and more
+ if ($buildonly != 1) {
+ get_mandatory_config("SSH_USER");
+ get_mandatory_config("BUILD_TARGET");
+ get_mandatory_config("TARGET_IMAGE");
+ }
+
+ get_mandatory_config("LOCALVERSION");
+
+ return if ($buildonly);
+
+ my $rtype = $opt{"REBOOT_TYPE"};
+
+ if (!defined($rtype)) {
+ if (!defined($opt{"GRUB_MENU"})) {
+ get_mandatory_config("REBOOT_TYPE");
+ $rtype = $entered_configs{"REBOOT_TYPE"};
+ } else {
+ $rtype = "grub";
+ }
+ }
+
+ if ($rtype eq "grub") {
+ get_mandatory_config("GRUB_MENU");
+ }
+
+ if ($rtype eq "grub2") {
+ get_mandatory_config("GRUB_MENU");
+ get_mandatory_config("GRUB_FILE");
+ }
+
+ if ($rtype eq "syslinux") {
+ get_mandatory_config("SYSLINUX_LABEL");
+ }
+}
+
+sub process_variables {
+ my ($value, $remove_undef) = @_;
+ my $retval = "";
+
+ # We want to check for '\', and it is just easier
+ # to check the previous characet of '$' and not need
+ # to worry if '$' is the first character. By adding
+ # a space to $value, we can just check [^\\]\$ and
+ # it will still work.
+ $value = " $value";
+
+ while ($value =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) {
+ my $begin = $1;
+ my $var = $2;
+ my $end = $3;
+ # append beginning of value to retval
+ $retval = "$retval$begin";
+ if (defined($variable{$var})) {
+ $retval = "$retval$variable{$var}";
+ } elsif (defined($remove_undef) && $remove_undef) {
+ # for if statements, any variable that is not defined,
+ # we simple convert to 0
+ $retval = "${retval}0";
+ } else {
+ # put back the origin piece.
+ $retval = "$retval\$\{$var\}";
+ # This could be an option that is used later, save
+ # it so we don't warn if this option is not one of
+ # ktests options.
+ $used_options{$var} = 1;
+ }
+ $value = $end;
+ }
+ $retval = "$retval$value";
+
+ # remove the space added in the beginning
+ $retval =~ s/ //;
+
+ return "$retval"
+}
+
+sub set_value {
+ my ($lvalue, $rvalue, $override, $overrides, $name) = @_;
+
+ my $prvalue = process_variables($rvalue);
+
+ if ($lvalue =~ /^(TEST|BISECT|CONFIG_BISECT)_TYPE(\[.*\])?$/ &&
+ $prvalue !~ /^(config_|)bisect$/ &&
+ $prvalue !~ /^build$/ &&
+ $buildonly) {
+
+ # Note if a test is something other than build, then we
+ # will need other mandatory options.
+ if ($prvalue ne "install") {
+ $buildonly = 0;
+ } else {
+ # install still limits some mandatory options.
+ $buildonly = 2;
+ }
+ }
+
+ if (defined($opt{$lvalue})) {
+ if (!$override || defined(${$overrides}{$lvalue})) {
+ my $extra = "";
+ if ($override) {
+ $extra = "In the same override section!\n";
+ }
+ die "$name: $.: Option $lvalue defined more than once!\n$extra";
+ }
+ ${$overrides}{$lvalue} = $prvalue;
+ }
+
+ $opt{$lvalue} = $prvalue;
+}
+
+sub set_eval {
+ my ($lvalue, $rvalue, $name) = @_;
+
+ my $prvalue = process_variables($rvalue);
+ my $arr;
+
+ if (defined($evals{$lvalue})) {
+ $arr = $evals{$lvalue};
+ } else {
+ $arr = [];
+ $evals{$lvalue} = $arr;
+ }
+
+ push @{$arr}, $rvalue;
+}
+
+sub set_variable {
+ my ($lvalue, $rvalue) = @_;
+
+ if ($rvalue =~ /^\s*$/) {
+ delete $variable{$lvalue};
+ } else {
+ $rvalue = process_variables($rvalue);
+ $variable{$lvalue} = $rvalue;
+ }
+}
+
+sub process_compare {
+ my ($lval, $cmp, $rval) = @_;
+
+ # remove whitespace
+
+ $lval =~ s/^\s*//;
+ $lval =~ s/\s*$//;
+
+ $rval =~ s/^\s*//;
+ $rval =~ s/\s*$//;
+
+ if ($cmp eq "==") {
+ return $lval eq $rval;
+ } elsif ($cmp eq "!=") {
+ return $lval ne $rval;
+ } elsif ($cmp eq "=~") {
+ return $lval =~ m/$rval/;
+ } elsif ($cmp eq "!~") {
+ return $lval !~ m/$rval/;
+ }
+
+ my $statement = "$lval $cmp $rval";
+ my $ret = eval $statement;
+
+ # $@ stores error of eval
+ if ($@) {
+ return -1;
+ }
+
+ return $ret;
+}
+
+sub value_defined {
+ my ($val) = @_;
+
+ return defined($variable{$2}) ||
+ defined($opt{$2});
+}
+
+my $d = 0;
+sub process_expression {
+ my ($name, $val) = @_;
+
+ my $c = $d++;
+
+ while ($val =~ s/\(([^\(]*?)\)/\&\&\&\&VAL\&\&\&\&/) {
+ my $express = $1;
+
+ if (process_expression($name, $express)) {
+ $val =~ s/\&\&\&\&VAL\&\&\&\&/ 1 /;
+ } else {
+ $val =~ s/\&\&\&\&VAL\&\&\&\&/ 0 /;
+ }
+ }
+
+ $d--;
+ my $OR = "\\|\\|";
+ my $AND = "\\&\\&";
+
+ while ($val =~ s/^(.*?)($OR|$AND)//) {
+ my $express = $1;
+ my $op = $2;
+
+ if (process_expression($name, $express)) {
+ if ($op eq "||") {
+ return 1;
+ }
+ } else {
+ if ($op eq "&&") {
+ return 0;
+ }
+ }
+ }
+
+ if ($val =~ /(.*)(==|\!=|>=|<=|>|<|=~|\!~)(.*)/) {
+ my $ret = process_compare($1, $2, $3);
+ if ($ret < 0) {
+ die "$name: $.: Unable to process comparison\n";
+ }
+ return $ret;
+ }
+
+ if ($val =~ /^\s*(NOT\s*)?DEFINED\s+(\S+)\s*$/) {
+ if (defined $1) {
+ return !value_defined($2);
+ } else {
+ return value_defined($2);
+ }
+ }
+
+ if ($val =~ /^\s*0\s*$/) {
+ return 0;
+ } elsif ($val =~ /^\s*\d+\s*$/) {
+ return 1;
+ }
+
+ die ("$name: $.: Undefined content $val in if statement\n");
+}
+
+sub process_if {
+ my ($name, $value) = @_;
+
+ # Convert variables and replace undefined ones with 0
+ my $val = process_variables($value, 1);
+ my $ret = process_expression $name, $val;
+
+ return $ret;
+}
+
+sub __read_config {
+ my ($config, $current_test_num) = @_;
+
+ my $in;
+ open($in, $config) || die "can't read file $config";
+
+ my $name = $config;
+ $name =~ s,.*/(.*),$1,;
+
+ my $test_num = $$current_test_num;
+ my $default = 1;
+ my $repeat = 1;
+ my $num_tests_set = 0;
+ my $skip = 0;
+ my $rest;
+ my $line;
+ my $test_case = 0;
+ my $if = 0;
+ my $if_set = 0;
+ my $override = 0;
+
+ my %overrides;
+
+ while (<$in>) {
+
+ # ignore blank lines and comments
+ next if (/^\s*$/ || /\s*\#/);
+
+ if (/^\s*(TEST_START|DEFAULTS)\b(.*)/) {
+
+ my $type = $1;
+ $rest = $2;
+ $line = $2;
+
+ my $old_test_num;
+ my $old_repeat;
+ $override = 0;
+
+ if ($type eq "TEST_START") {
+
+ if ($num_tests_set) {
+ die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+ }
+
+ $old_test_num = $test_num;
+ $old_repeat = $repeat;
+
+ $test_num += $repeat;
+ $default = 0;
+ $repeat = 1;
+ } else {
+ $default = 1;
+ }
+
+ # If SKIP is anywhere in the line, the command will be skipped
+ if ($rest =~ s/\s+SKIP\b//) {
+ $skip = 1;
+ } else {
+ $test_case = 1;
+ $skip = 0;
+ }
+
+ if ($rest =~ s/\sELSE\b//) {
+ if (!$if) {
+ die "$name: $.: ELSE found with out matching IF section\n$_";
+ }
+ $if = 0;
+
+ if ($if_set) {
+ $skip = 1;
+ } else {
+ $skip = 0;
+ }
+ }
+
+ if ($rest =~ s/\sIF\s+(.*)//) {
+ if (process_if($name, $1)) {
+ $if_set = 1;
+ } else {
+ $skip = 1;
+ }
+ $if = 1;
+ } else {
+ $if = 0;
+ $if_set = 0;
+ }
+
+ if (!$skip) {
+ if ($type eq "TEST_START") {
+ if ($rest =~ s/\s+ITERATE\s+(\d+)//) {
+ $repeat = $1;
+ $repeat_tests{"$test_num"} = $repeat;
+ }
+ } elsif ($rest =~ s/\sOVERRIDE\b//) {
+ # DEFAULT only
+ $override = 1;
+ # Clear previous overrides
+ %overrides = ();
+ }
+ }
+
+ if (!$skip && $rest !~ /^\s*$/) {
+ die "$name: $.: Gargbage found after $type\n$_";
+ }
+
+ if ($skip && $type eq "TEST_START") {
+ $test_num = $old_test_num;
+ $repeat = $old_repeat;
+ }
+
+ } elsif (/^\s*ELSE\b(.*)$/) {
+ if (!$if) {
+ die "$name: $.: ELSE found with out matching IF section\n$_";
+ }
+ $rest = $1;
+ if ($if_set) {
+ $skip = 1;
+ $rest = "";
+ } else {
+ $skip = 0;
+
+ if ($rest =~ /\sIF\s+(.*)/) {
+ # May be a ELSE IF section.
+ if (process_if($name, $1)) {
+ $if_set = 1;
+ } else {
+ $skip = 1;
+ }
+ $rest = "";
+ } else {
+ $if = 0;
+ }
+ }
+
+ if ($rest !~ /^\s*$/) {
+ die "$name: $.: Gargbage found after DEFAULTS\n$_";
+ }
+
+ } elsif (/^\s*INCLUDE\s+(\S+)/) {
+
+ next if ($skip);
+
+ if (!$default) {
+ die "$name: $.: INCLUDE can only be done in default sections\n$_";
+ }
+
+ my $file = process_variables($1);
+
+ if ($file !~ m,^/,) {
+ # check the path of the config file first
+ if ($config =~ m,(.*)/,) {
+ if (-f "$1/$file") {
+ $file = "$1/$file";
+ }
+ }
+ }
+
+ if ( ! -r $file ) {
+ die "$name: $.: Can't read file $file\n$_";
+ }
+
+ if (__read_config($file, \$test_num)) {
+ $test_case = 1;
+ }
+
+ } elsif (/^\s*([A-Z_\[\]\d]+)\s*=~\s*(.*?)\s*$/) {
+
+ next if ($skip);
+
+ my $lvalue = $1;
+ my $rvalue = $2;
+
+ if ($default || $lvalue =~ /\[\d+\]$/) {
+ set_eval($lvalue, $rvalue, $name);
+ } else {
+ my $val = "$lvalue\[$test_num\]";
+ set_eval($val, $rvalue, $name);
+ }
+
+ } elsif (/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) {
+
+ next if ($skip);
+
+ my $lvalue = $1;
+ my $rvalue = $2;
+
+ if (!$default &&
+ ($lvalue eq "NUM_TESTS" ||
+ $lvalue eq "LOG_FILE" ||
+ $lvalue eq "CLEAR_LOG")) {
+ die "$name: $.: $lvalue must be set in DEFAULTS section\n";
+ }
+
+ if ($lvalue eq "NUM_TESTS") {
+ if ($test_num) {
+ die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
+ }
+ if (!$default) {
+ die "$name: $.: NUM_TESTS must be set in default section\n";
+ }
+ $num_tests_set = 1;
+ }
+
+ if ($default || $lvalue =~ /\[\d+\]$/) {
+ set_value($lvalue, $rvalue, $override, \%overrides, $name);
+ } else {
+ my $val = "$lvalue\[$test_num\]";
+ set_value($val, $rvalue, $override, \%overrides, $name);
+
+ if ($repeat > 1) {
+ $repeats{$val} = $repeat;
+ }
+ }
+ } elsif (/^\s*([A-Z_\[\]\d]+)\s*:=\s*(.*?)\s*$/) {
+ next if ($skip);
+
+ my $lvalue = $1;
+ my $rvalue = $2;
+
+ # process config variables.
+ # Config variables are only active while reading the
+ # config and can be defined anywhere. They also ignore
+ # TEST_START and DEFAULTS, but are skipped if they are in
+ # on of these sections that have SKIP defined.
+ # The save variable can be
+ # defined multiple times and the new one simply overrides
+ # the prevous one.
+ set_variable($lvalue, $rvalue);
+
+ } else {
+ die "$name: $.: Garbage found in config\n$_";
+ }
+ }
+
+ if ($test_num) {
+ $test_num += $repeat - 1;
+ $opt{"NUM_TESTS"} = $test_num;
+ }
+
+ close($in);
+
+ $$current_test_num = $test_num;
+
+ return $test_case;
+}
+
+sub get_test_case {
+ print "What test case would you like to run?\n";
+ print " (build, install or boot)\n";
+ print " Other tests are available but require editing ktest.conf\n";
+ print " (see tools/testing/ktest/sample.conf)\n";
+ my $ans = <STDIN>;
+ chomp $ans;
+ $default{"TEST_TYPE"} = $ans;
+}
+
+sub read_config {
+ my ($config) = @_;
+
+ my $test_case;
+ my $test_num = 0;
+
+ $test_case = __read_config $config, \$test_num;
+
+ # make sure we have all mandatory configs
+ get_mandatory_configs;
+
+ # was a test specified?
+ if (!$test_case) {
+ print "No test case specified.\n";
+ get_test_case;
+ }
+
+ # set any defaults
+
+ foreach my $default (keys %default) {
+ if (!defined($opt{$default})) {
+ $opt{$default} = $default{$default};
+ }
+ }
+
+ if ($opt{"IGNORE_UNUSED"} == 1) {
+ return;
+ }
+
+ my %not_used;
+
+ # check if there are any stragglers (typos?)
+ foreach my $option (keys %opt) {
+ my $op = $option;
+ # remove per test labels.
+ $op =~ s/\[.*\]//;
+ if (!exists($option_map{$op}) &&
+ !exists($default{$op}) &&
+ !exists($used_options{$op})) {
+ $not_used{$op} = 1;
+ }
+ }
+
+ if (%not_used) {
+ my $s = "s are";
+ $s = " is" if (keys %not_used == 1);
+ print "The following option$s not used; could be a typo:\n";
+ foreach my $option (keys %not_used) {
+ print "$option\n";
+ }
+ print "Set IGRNORE_UNUSED = 1 to have ktest ignore unused variables\n";
+ if (!read_yn "Do you want to continue?") {
+ exit -1;
+ }
+ }
+}
+
+sub __eval_option {
+ my ($name, $option, $i) = @_;
+
+ # Add space to evaluate the character before $
+ $option = " $option";
+ my $retval = "";
+ my $repeated = 0;
+ my $parent = 0;
+
+ foreach my $test (keys %repeat_tests) {
+ if ($i >= $test &&
+ $i < $test + $repeat_tests{$test}) {
+
+ $repeated = 1;
+ $parent = $test;
+ last;
+ }
+ }
+
+ while ($option =~ /(.*?[^\\])\$\{(.*?)\}(.*)/) {
+ my $start = $1;
+ my $var = $2;
+ my $end = $3;
+
+ # Append beginning of line
+ $retval = "$retval$start";
+
+ # If the iteration option OPT[$i] exists, then use that.
+ # otherwise see if the default OPT (without [$i]) exists.
+
+ my $o = "$var\[$i\]";
+ my $parento = "$var\[$parent\]";
+
+ # If a variable contains itself, use the default var
+ if (($var eq $name) && defined($opt{$var})) {
+ $o = $opt{$var};
+ $retval = "$retval$o";
+ } elsif (defined($opt{$o})) {
+ $o = $opt{$o};
+ $retval = "$retval$o";
+ } elsif ($repeated && defined($opt{$parento})) {
+ $o = $opt{$parento};
+ $retval = "$retval$o";
+ } elsif (defined($opt{$var})) {
+ $o = $opt{$var};
+ $retval = "$retval$o";
+ } elsif ($var eq "KERNEL_VERSION" && defined($make)) {
+ # special option KERNEL_VERSION uses kernel version
+ get_version();
+ $retval = "$retval$version";
+ } else {
+ $retval = "$retval\$\{$var\}";
+ }
+
+ $option = $end;
+ }
+
+ $retval = "$retval$option";
+
+ $retval =~ s/^ //;
+
+ return $retval;
+}
+
+sub process_evals {
+ my ($name, $option, $i) = @_;
+
+ my $option_name = "$name\[$i\]";
+ my $ev;
+
+ my $old_option = $option;
+
+ if (defined($evals{$option_name})) {
+ $ev = $evals{$option_name};
+ } elsif (defined($evals{$name})) {
+ $ev = $evals{$name};
+ } else {
+ return $option;
+ }
+
+ for my $e (@{$ev}) {
+ eval "\$option =~ $e";
+ }
+
+ if ($option ne $old_option) {
+ doprint("$name changed from '$old_option' to '$option'\n");
+ }
+
+ return $option;
+}
+
+sub eval_option {
+ my ($name, $option, $i) = @_;
+
+ my $prev = "";
+
+ # Since an option can evaluate to another option,
+ # keep iterating until we do not evaluate any more
+ # options.
+ my $r = 0;
+ while ($prev ne $option) {
+ # Check for recursive evaluations.
+ # 100 deep should be more than enough.
+ if ($r++ > 100) {
+ die "Over 100 evaluations accurred with $option\n" .
+ "Check for recursive variables\n";
+ }
+ $prev = $option;
+ $option = __eval_option($name, $option, $i);
+ }
+
+ $option = process_evals($name, $option, $i);
+
+ return $option;
+}
+
+sub run_command;
+sub start_monitor;
+sub end_monitor;
+sub wait_for_monitor;
+
+sub reboot {
+ my ($time) = @_;
+ my $powercycle = 0;
+
+ # test if the machine can be connected to within a few seconds
+ my $stat = run_ssh("echo check machine status", $connect_timeout);
+ if (!$stat) {
+ doprint("power cycle\n");
+ $powercycle = 1;
+ }
+
+ if ($powercycle) {
+ run_command "$power_cycle";
+
+ start_monitor;
+ # flush out current monitor
+ # May contain the reboot success line
+ wait_for_monitor 1;
+
+ } else {
+ # Make sure everything has been written to disk
+ run_ssh("sync", 10);
+
+ if (defined($time)) {
+ start_monitor;
+ # flush out current monitor
+ # May contain the reboot success line
+ wait_for_monitor 1;
+ }
+
+ # try to reboot normally
+ if (run_command $reboot) {
+ if (defined($powercycle_after_reboot)) {
+ sleep $powercycle_after_reboot;
+ run_command "$power_cycle";
+ }
+ } else {
+ # nope? power cycle it.
+ run_command "$power_cycle";
+ }
+ }
+
+ if (defined($time)) {
+
+ # We only want to get to the new kernel, don't fail
+ # if we stumble over a call trace.
+ my $save_ignore_errors = $ignore_errors;
+ $ignore_errors = 1;
+
+ # Look for the good kernel to boot
+ if (wait_for_monitor($time, "Linux version")) {
+ # reboot got stuck?
+ doprint "Reboot did not finish. Forcing power cycle\n";
+ run_command "$power_cycle";
+ }
+
+ $ignore_errors = $save_ignore_errors;
+
+ # Still need to wait for the reboot to finish
+ wait_for_monitor($time, $reboot_success_line);
+
+ end_monitor;
+ }
+}
+
+sub reboot_to_good {
+ my ($time) = @_;
+
+ if (defined($switch_to_good)) {
+ run_command $switch_to_good;
+ }
+
+ reboot $time;
+}
+
+sub do_not_reboot {
+ my $i = $iteration;
+
+ return $test_type eq "build" || $no_reboot ||
+ ($test_type eq "patchcheck" && $opt{"PATCHCHECK_TYPE[$i]"} eq "build") ||
+ ($test_type eq "bisect" && $opt{"BISECT_TYPE[$i]"} eq "build") ||
+ ($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build");
+}
+
+my $in_die = 0;
+
+sub dodie {
+
+ # avoid recusion
+ return if ($in_die);
+ $in_die = 1;
+
+ doprint "CRITICAL FAILURE... ", @_, "\n";
+
+ my $i = $iteration;
+
+ if ($reboot_on_error && !do_not_reboot) {
+
+ doprint "REBOOTING\n";
+ reboot_to_good;
+
+ } elsif ($poweroff_on_error && defined($power_off)) {
+ doprint "POWERING OFF\n";
+ `$power_off`;
+ }
+
+ if (defined($opt{"LOG_FILE"})) {
+ print " See $opt{LOG_FILE} for more info.\n";
+ }
+
+ if ($email_on_error) {
+ send_email("KTEST: critical failure for your [$test_type] test",
+ "Your test started at $script_start_time has failed with:\n@_\n");
+ }
+
+ if ($monitor_cnt) {
+ # restore terminal settings
+ system("stty $stty_orig");
+ }
+
+ if (defined($post_test)) {
+ run_command $post_test;
+ }
+
+ die @_, "\n";
+}
+
+sub create_pty {
+ my ($ptm, $pts) = @_;
+ my $tmp;
+ my $TIOCSPTLCK = 0x40045431;
+ my $TIOCGPTN = 0x80045430;
+
+ sysopen($ptm, "/dev/ptmx", O_RDWR | O_NONBLOCK) or
+ dodie "Cant open /dev/ptmx";
+
+ # unlockpt()
+ $tmp = pack("i", 0);
+ ioctl($ptm, $TIOCSPTLCK, $tmp) or
+ dodie "ioctl TIOCSPTLCK for /dev/ptmx failed";
+
+ # ptsname()
+ ioctl($ptm, $TIOCGPTN, $tmp) or
+ dodie "ioctl TIOCGPTN for /dev/ptmx failed";
+ $tmp = unpack("i", $tmp);
+
+ sysopen($pts, "/dev/pts/$tmp", O_RDWR | O_NONBLOCK) or
+ dodie "Can't open /dev/pts/$tmp";
+}
+
+sub exec_console {
+ my ($ptm, $pts) = @_;
+
+ close($ptm);
+
+ close(\*STDIN);
+ close(\*STDOUT);
+ close(\*STDERR);
+
+ open(\*STDIN, '<&', $pts);
+ open(\*STDOUT, '>&', $pts);
+ open(\*STDERR, '>&', $pts);
+
+ close($pts);
+
+ exec $console or
+ dodie "Can't open console $console";
+}
+
+sub open_console {
+ my ($ptm) = @_;
+ my $pts = \*PTSFD;
+ my $pid;
+
+ # save terminal settings
+ $stty_orig = `stty -g`;
+
+ # place terminal in cbreak mode so that stdin can be read one character at
+ # a time without having to wait for a newline
+ system("stty -icanon -echo -icrnl");
+
+ create_pty($ptm, $pts);
+
+ $pid = fork;
+
+ if (!$pid) {
+ # child
+ exec_console($ptm, $pts)
+ }
+
+ # parent
+ close($pts);
+
+ return $pid;
+
+ open(PTSFD, "Stop perl from warning about single use of PTSFD");
+}
+
+sub close_console {
+ my ($fp, $pid) = @_;
+
+ doprint "kill child process $pid\n";
+ kill $close_console_signal, $pid;
+
+ doprint "wait for child process $pid to exit\n";
+ waitpid($pid, 0);
+
+ print "closing!\n";
+ close($fp);
+
+ # restore terminal settings
+ system("stty $stty_orig");
+}
+
+sub start_monitor {
+ if ($monitor_cnt++) {
+ return;
+ }
+ $monitor_fp = \*MONFD;
+ $monitor_pid = open_console $monitor_fp;
+
+ return;
+
+ open(MONFD, "Stop perl from warning about single use of MONFD");
+}
+
+sub end_monitor {
+ return if (!defined $console);
+ if (--$monitor_cnt) {
+ return;
+ }
+ close_console($monitor_fp, $monitor_pid);
+}
+
+sub wait_for_monitor {
+ my ($time, $stop) = @_;
+ my $full_line = "";
+ my $line;
+ my $booted = 0;
+ my $start_time = time;
+ my $skip_call_trace = 0;
+ my $bug = 0;
+ my $bug_ignored = 0;
+ my $now;
+
+ doprint "** Wait for monitor to settle down **\n";
+
+ # read the monitor and wait for the system to calm down
+ while (!$booted) {
+ $line = wait_for_input($monitor_fp, $time);
+ last if (!defined($line));
+ print "$line";
+ $full_line .= $line;
+
+ if (defined($stop) && $full_line =~ /$stop/) {
+ doprint "wait for monitor detected $stop\n";
+ $booted = 1;
+ }
+
+ if ($full_line =~ /\[ backtrace testing \]/) {
+ $skip_call_trace = 1;
+ }
+
+ if ($full_line =~ /call trace:/i) {
+ if (!$bug && !$skip_call_trace) {
+ if ($ignore_errors) {
+ $bug_ignored = 1;
+ } else {
+ $bug = 1;
+ }
+ }
+ }
+
+ if ($full_line =~ /\[ end of backtrace testing \]/) {
+ $skip_call_trace = 0;
+ }
+
+ if ($full_line =~ /Kernel panic -/) {
+ $bug = 1;
+ }
+
+ if ($line =~ /\n/) {
+ $full_line = "";
+ }
+ $now = time;
+ if ($now - $start_time >= $max_monitor_wait) {
+ doprint "Exiting monitor flush due to hitting MAX_MONITOR_WAIT\n";
+ return 1;
+ }
+ }
+ print "** Monitor flushed **\n";
+
+ # if stop is defined but wasn't hit, return error
+ # used by reboot (which wants to see a reboot)
+ if (defined($stop) && !$booted) {
+ $bug = 1;
+ }
+ return $bug;
+}
+
+sub save_logs {
+ my ($result, $basedir) = @_;
+ my @t = localtime;
+ my $date = sprintf "%04d%02d%02d%02d%02d%02d",
+ 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0];
+
+ my $type = $build_type;
+ if ($type =~ /useconfig/) {
+ $type = "useconfig";
+ }
+
+ my $dir = "$machine-$test_type-$type-$result-$date";
+
+ $dir = "$basedir/$dir";
+
+ if (!-d $dir) {
+ mkpath($dir) or
+ dodie "can't create $dir";
+ }
+
+ my %files = (
+ "config" => $output_config,
+ "buildlog" => $buildlog,
+ "dmesg" => $dmesg,
+ "testlog" => $testlog,
+ );
+
+ while (my ($name, $source) = each(%files)) {
+ if (-f "$source") {
+ cp "$source", "$dir/$name" or
+ dodie "failed to copy $source";
+ }
+ }
+
+ doprint "*** Saved info to $dir ***\n";
+}
+
+sub fail {
+
+ if ($die_on_failure) {
+ dodie @_;
+ }
+
+ doprint "FAILED\n";
+
+ my $i = $iteration;
+
+ # no need to reboot for just building.
+ if (!do_not_reboot) {
+ doprint "REBOOTING\n";
+ reboot_to_good $sleep_time;
+ }
+
+ my $name = "";
+
+ if (defined($test_name)) {
+ $name = " ($test_name)";
+ }
+
+ print_times;
+
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+
+ if (defined($store_failures)) {
+ save_logs "fail", $store_failures;
+ }
+
+ if (defined($post_test)) {
+ run_command $post_test;
+ }
+
+ return 1;
+}
+
+sub run_command {
+ my ($command, $redirect, $timeout) = @_;
+ my $start_time;
+ my $end_time;
+ my $dolog = 0;
+ my $dord = 0;
+ my $dostdout = 0;
+ my $pid;
+
+ $command =~ s/\$SSH_USER/$ssh_user/g;
+ $command =~ s/\$MACHINE/$machine/g;
+
+ doprint("$command ... ");
+ $start_time = time;
+
+ $pid = open(CMD, "$command 2>&1 |") or
+ (fail "unable to exec $command" and return 0);
+
+ if (defined($opt{"LOG_FILE"})) {
+ open(LOG, ">>$opt{LOG_FILE}") or
+ dodie "failed to write to log";
+ $dolog = 1;
+ }
+
+ if (defined($redirect)) {
+ if ($redirect eq 1) {
+ $dostdout = 1;
+ # Have the output of the command on its own line
+ doprint "\n";
+ } else {
+ open (RD, ">$redirect") or
+ dodie "failed to write to redirect $redirect";
+ $dord = 1;
+ }
+ }
+
+ my $hit_timeout = 0;
+
+ while (1) {
+ my $fp = \*CMD;
+ if (defined($timeout)) {
+ doprint "timeout = $timeout\n";
+ }
+ my $line = wait_for_input($fp, $timeout);
+ if (!defined($line)) {
+ my $now = time;
+ if (defined($timeout) && (($now - $start_time) >= $timeout)) {
+ doprint "Hit timeout of $timeout, killing process\n";
+ $hit_timeout = 1;
+ kill 9, $pid;
+ }
+ last;
+ }
+ print LOG $line if ($dolog);
+ print RD $line if ($dord);
+ print $line if ($dostdout);
+ }
+
+ waitpid($pid, 0);
+ # shift 8 for real exit status
+ $run_command_status = $? >> 8;
+
+ close(CMD);
+ close(LOG) if ($dolog);
+ close(RD) if ($dord);
+
+ $end_time = time;
+ my $delta = $end_time - $start_time;
+
+ if ($delta == 1) {
+ doprint "[1 second] ";
+ } else {
+ doprint "[$delta seconds] ";
+ }
+
+ if ($hit_timeout) {
+ $run_command_status = 1;
+ }
+
+ if ($run_command_status) {
+ doprint "FAILED!\n";
+ } else {
+ doprint "SUCCESS\n";
+ }
+
+ return !$run_command_status;
+}
+
+sub run_ssh {
+ my ($cmd, $timeout) = @_;
+ my $cp_exec = $ssh_exec;
+
+ $cp_exec =~ s/\$SSH_COMMAND/$cmd/g;
+ return run_command "$cp_exec", undef , $timeout;
+}
+
+sub run_scp {
+ my ($src, $dst, $cp_scp) = @_;
+
+ $cp_scp =~ s/\$SRC_FILE/$src/g;
+ $cp_scp =~ s/\$DST_FILE/$dst/g;
+
+ return run_command "$cp_scp";
+}
+
+sub run_scp_install {
+ my ($src, $dst) = @_;
+
+ my $cp_scp = $scp_to_target_install;
+
+ return run_scp($src, $dst, $cp_scp);
+}
+
+sub run_scp_mod {
+ my ($src, $dst) = @_;
+
+ my $cp_scp = $scp_to_target;
+
+ return run_scp($src, $dst, $cp_scp);
+}
+
+sub get_grub2_index {
+
+ return if (defined($grub_number) && defined($last_grub_menu) &&
+ $last_grub_menu eq $grub_menu && defined($last_machine) &&
+ $last_machine eq $machine);
+
+ doprint "Find grub2 menu ... ";
+ $grub_number = -1;
+
+ my $ssh_grub = $ssh_exec;
+ $ssh_grub =~ s,\$SSH_COMMAND,cat $grub_file,g;
+
+ open(IN, "$ssh_grub |")
+ or dodie "unable to get $grub_file";
+
+ my $found = 0;
+
+ while (<IN>) {
+ if (/^menuentry.*$grub_menu/) {
+ $grub_number++;
+ $found = 1;
+ last;
+ } elsif (/^menuentry\s|^submenu\s/) {
+ $grub_number++;
+ }
+ }
+ close(IN);
+
+ dodie "Could not find '$grub_menu' in $grub_file on $machine"
+ if (!$found);
+ doprint "$grub_number\n";
+ $last_grub_menu = $grub_menu;
+ $last_machine = $machine;
+}
+
+sub get_grub_index {
+
+ if ($reboot_type eq "grub2") {
+ get_grub2_index;
+ return;
+ }
+
+ if ($reboot_type ne "grub") {
+ return;
+ }
+ return if (defined($grub_number) && defined($last_grub_menu) &&
+ $last_grub_menu eq $grub_menu && defined($last_machine) &&
+ $last_machine eq $machine);
+
+ doprint "Find grub menu ... ";
+ $grub_number = -1;
+
+ my $ssh_grub = $ssh_exec;
+ $ssh_grub =~ s,\$SSH_COMMAND,cat /boot/grub/menu.lst,g;
+
+ open(IN, "$ssh_grub |")
+ or dodie "unable to get menu.lst";
+
+ my $found = 0;
+
+ while (<IN>) {
+ if (/^\s*title\s+$grub_menu\s*$/) {
+ $grub_number++;
+ $found = 1;
+ last;
+ } elsif (/^\s*title\s/) {
+ $grub_number++;
+ }
+ }
+ close(IN);
+
+ dodie "Could not find '$grub_menu' in /boot/grub/menu on $machine"
+ if (!$found);
+ doprint "$grub_number\n";
+ $last_grub_menu = $grub_menu;
+ $last_machine = $machine;
+}
+
+sub wait_for_input
+{
+ my ($fp, $time) = @_;
+ my $start_time;
+ my $rin;
+ my $rout;
+ my $nr;
+ my $buf;
+ my $line;
+ my $ch;
+
+ if (!defined($time)) {
+ $time = $timeout;
+ }
+
+ $rin = '';
+ vec($rin, fileno($fp), 1) = 1;
+ vec($rin, fileno(\*STDIN), 1) = 1;
+
+ $start_time = time;
+
+ while (1) {
+ $nr = select($rout=$rin, undef, undef, $time);
+
+ last if ($nr <= 0);
+
+ # copy data from stdin to the console
+ if (vec($rout, fileno(\*STDIN), 1) == 1) {
+ $nr = sysread(\*STDIN, $buf, 1000);
+ syswrite($fp, $buf, $nr) if ($nr > 0);
+ }
+
+ # The timeout is based on time waiting for the fp data
+ if (vec($rout, fileno($fp), 1) != 1) {
+ last if (defined($time) && (time - $start_time > $time));
+ next;
+ }
+
+ $line = "";
+
+ # try to read one char at a time
+ while (sysread $fp, $ch, 1) {
+ $line .= $ch;
+ last if ($ch eq "\n");
+ }
+
+ last if (!length($line));
+
+ return $line;
+ }
+ return undef;
+}
+
+sub reboot_to {
+ if (defined($switch_to_test)) {
+ run_command $switch_to_test;
+ }
+
+ if ($reboot_type eq "grub") {
+ run_ssh "'(echo \"savedefault --default=$grub_number --once\" | grub --batch)'";
+ } elsif ($reboot_type eq "grub2") {
+ run_ssh "$grub_reboot $grub_number";
+ } elsif ($reboot_type eq "syslinux") {
+ run_ssh "$syslinux --once \\\"$syslinux_label\\\" $syslinux_path";
+ } elsif (defined $reboot_script) {
+ run_command "$reboot_script";
+ }
+ reboot;
+}
+
+sub get_sha1 {
+ my ($commit) = @_;
+
+ doprint "git rev-list --max-count=1 $commit ... ";
+ my $sha1 = `git rev-list --max-count=1 $commit`;
+ my $ret = $?;
+
+ logit $sha1;
+
+ if ($ret) {
+ doprint "FAILED\n";
+ dodie "Failed to get git $commit";
+ }
+
+ print "SUCCESS\n";
+
+ chomp $sha1;
+
+ return $sha1;
+}
+
+sub monitor {
+ my $booted = 0;
+ my $bug = 0;
+ my $bug_ignored = 0;
+ my $skip_call_trace = 0;
+ my $loops;
+
+ my $start_time = time;
+
+ wait_for_monitor 5;
+
+ my $line;
+ my $full_line = "";
+
+ open(DMESG, "> $dmesg") or
+ dodie "unable to write to $dmesg";
+
+ reboot_to;
+
+ my $success_start;
+ my $failure_start;
+ my $monitor_start = time;
+ my $done = 0;
+ my $version_found = 0;
+
+ while (!$done) {
+
+ if ($bug && defined($stop_after_failure) &&
+ $stop_after_failure >= 0) {
+ my $time = $stop_after_failure - (time - $failure_start);
+ $line = wait_for_input($monitor_fp, $time);
+ if (!defined($line)) {
+ doprint "bug timed out after $booted_timeout seconds\n";
+ doprint "Test forced to stop after $stop_after_failure seconds after failure\n";
+ last;
+ }
+ } elsif ($booted) {
+ $line = wait_for_input($monitor_fp, $booted_timeout);
+ if (!defined($line)) {
+ my $s = $booted_timeout == 1 ? "" : "s";
+ doprint "Successful boot found: break after $booted_timeout second$s\n";
+ last;
+ }
+ } else {
+ $line = wait_for_input($monitor_fp);
+ if (!defined($line)) {
+ my $s = $timeout == 1 ? "" : "s";
+ doprint "Timed out after $timeout second$s\n";
+ last;
+ }
+ }
+
+ doprint $line;
+ print DMESG $line;
+
+ # we are not guaranteed to get a full line
+ $full_line .= $line;
+
+ if ($full_line =~ /$success_line/) {
+ $booted = 1;
+ $success_start = time;
+ }
+
+ if ($booted && defined($stop_after_success) &&
+ $stop_after_success >= 0) {
+ my $now = time;
+ if ($now - $success_start >= $stop_after_success) {
+ doprint "Test forced to stop after $stop_after_success seconds after success\n";
+ last;
+ }
+ }
+
+ if ($full_line =~ /\[ backtrace testing \]/) {
+ $skip_call_trace = 1;
+ }
+
+ if ($full_line =~ /call trace:/i) {
+ if (!$bug && !$skip_call_trace) {
+ if ($ignore_errors) {
+ $bug_ignored = 1;
+ } else {
+ $bug = 1;
+ $failure_start = time;
+ }
+ }
+ }
+
+ if ($bug && defined($stop_after_failure) &&
+ $stop_after_failure >= 0) {
+ my $now = time;
+ if ($now - $failure_start >= $stop_after_failure) {
+ doprint "Test forced to stop after $stop_after_failure seconds after failure\n";
+ last;
+ }
+ }
+
+ if ($full_line =~ /\[ end of backtrace testing \]/) {
+ $skip_call_trace = 0;
+ }
+
+ if ($full_line =~ /Kernel panic -/) {
+ $failure_start = time;
+ $bug = 1;
+ }
+
+ # Detect triple faults by testing the banner
+ if ($full_line =~ /\bLinux version (\S+).*\n/) {
+ if ($1 eq $version) {
+ $version_found = 1;
+ } elsif ($version_found && $detect_triplefault) {
+ # We already booted into the kernel we are testing,
+ # but now we booted into another kernel?
+ # Consider this a triple fault.
+ doprint "Already booted in Linux kernel $version, but now\n";
+ doprint "we booted into Linux kernel $1.\n";
+ doprint "Assuming that this is a triple fault.\n";
+ doprint "To disable this: set DETECT_TRIPLE_FAULT to 0\n";
+ last;
+ }
+ }
+
+ if ($line =~ /\n/) {
+ $full_line = "";
+ }
+
+ if ($stop_test_after > 0 && !$booted && !$bug) {
+ if (time - $monitor_start > $stop_test_after) {
+ doprint "STOP_TEST_AFTER ($stop_test_after seconds) timed out\n";
+ $done = 1;
+ }
+ }
+ }
+
+ my $end_time = time;
+ $reboot_time = $end_time - $start_time;
+
+ close(DMESG);
+
+ if ($bug) {
+ return 0 if ($in_bisect);
+ fail "failed - got a bug report" and return 0;
+ }
+
+ if (!$booted) {
+ return 0 if ($in_bisect);
+ fail "failed - never got a boot prompt." and return 0;
+ }
+
+ if ($bug_ignored) {
+ doprint "WARNING: Call Trace detected but ignored due to IGNORE_ERRORS=1\n";
+ }
+
+ return 1;
+}
+
+sub eval_kernel_version {
+ my ($option) = @_;
+
+ $option =~ s/\$KERNEL_VERSION/$version/g;
+
+ return $option;
+}
+
+sub do_post_install {
+
+ return if (!defined($post_install));
+
+ my $cp_post_install = eval_kernel_version $post_install;
+ run_command "$cp_post_install" or
+ dodie "Failed to run post install";
+}
+
+# Sometimes the reboot fails, and will hang. We try to ssh to the box
+# and if we fail, we force another reboot, that should powercycle it.
+sub test_booted {
+ if (!run_ssh "echo testing connection") {
+ reboot $sleep_time;
+ }
+}
+
+sub install {
+
+ return if ($no_install);
+
+ my $start_time = time;
+
+ if (defined($pre_install)) {
+ my $cp_pre_install = eval_kernel_version $pre_install;
+ run_command "$cp_pre_install" or
+ dodie "Failed to run pre install";
+ }
+
+ my $cp_target = eval_kernel_version $target_image;
+
+ test_booted;
+
+ run_scp_install "$outputdir/$build_target", "$cp_target" or
+ dodie "failed to copy image";
+
+ my $install_mods = 0;
+
+ # should we process modules?
+ $install_mods = 0;
+ open(IN, "$output_config") or dodie("Can't read config file");
+ while (<IN>) {
+ if (/CONFIG_MODULES(=y)?/) {
+ if (defined($1)) {
+ $install_mods = 1;
+ last;
+ }
+ }
+ }
+ close(IN);
+
+ if (!$install_mods) {
+ do_post_install;
+ doprint "No modules needed\n";
+ my $end_time = time;
+ $install_time = $end_time - $start_time;
+ return;
+ }
+
+ run_command "$make INSTALL_MOD_STRIP=1 INSTALL_MOD_PATH=$tmpdir modules_install" or
+ dodie "Failed to install modules";
+
+ my $modlib = "/lib/modules/$version";
+ my $modtar = "ktest-mods.tar.bz2";
+
+ run_ssh "rm -rf $modlib" or
+ dodie "failed to remove old mods: $modlib";
+
+ # would be nice if scp -r did not follow symbolic links
+ run_command "cd $tmpdir && tar -cjf $modtar lib/modules/$version" or
+ dodie "making tarball";
+
+ run_scp_mod "$tmpdir/$modtar", "/tmp" or
+ dodie "failed to copy modules";
+
+ unlink "$tmpdir/$modtar";
+
+ run_ssh "'(cd / && tar xjf /tmp/$modtar)'" or
+ dodie "failed to tar modules";
+
+ run_ssh "rm -f /tmp/$modtar";
+
+ do_post_install;
+
+ my $end_time = time;
+ $install_time = $end_time - $start_time;
+}
+
+sub get_version {
+ # get the release name
+ return if ($have_version);
+ doprint "$make kernelrelease ... ";
+ $version = `$make -s kernelrelease | tail -1`;
+ chomp($version);
+ doprint "$version\n";
+ $have_version = 1;
+}
+
+sub start_monitor_and_install {
+ # Make sure the stable kernel has finished booting
+
+ # Install bisects, don't need console
+ if (defined $console) {
+ start_monitor;
+ wait_for_monitor 5;
+ end_monitor;
+ }
+
+ get_grub_index;
+ get_version;
+ install;
+
+ start_monitor if (defined $console);
+ return monitor;
+}
+
+my $check_build_re = ".*:.*(warning|error|Error):.*";
+my $utf8_quote = "\\x{e2}\\x{80}(\\x{98}|\\x{99})";
+
+sub process_warning_line {
+ my ($line) = @_;
+
+ chomp $line;
+
+ # for distcc heterogeneous systems, some compilers
+ # do things differently causing warning lines
+ # to be slightly different. This makes an attempt
+ # to fixe those issues.
+
+ # chop off the index into the line
+ # using distcc, some compilers give different indexes
+ # depending on white space
+ $line =~ s/^(\s*\S+:\d+:)\d+/$1/;
+
+ # Some compilers use UTF-8 extended for quotes and some don't.
+ $line =~ s/$utf8_quote/'/g;
+
+ return $line;
+}
+
+# Read buildlog and check against warnings file for any
+# new warnings.
+#
+# Returns 1 if OK
+# 0 otherwise
+sub check_buildlog {
+ return 1 if (!defined $warnings_file);
+
+ my %warnings_list;
+
+ # Failed builds should not reboot the target
+ my $save_no_reboot = $no_reboot;
+ $no_reboot = 1;
+
+ if (-f $warnings_file) {
+ open(IN, $warnings_file) or
+ dodie "Error opening $warnings_file";
+
+ while (<IN>) {
+ if (/$check_build_re/) {
+ my $warning = process_warning_line $_;
+
+ $warnings_list{$warning} = 1;
+ }
+ }
+ close(IN);
+ }
+
+ # If warnings file didn't exist, and WARNINGS_FILE exist,
+ # then we fail on any warning!
+
+ open(IN, $buildlog) or dodie "Can't open $buildlog";
+ while (<IN>) {
+ if (/$check_build_re/) {
+ my $warning = process_warning_line $_;
+
+ if (!defined $warnings_list{$warning}) {
+ fail "New warning found (not in $warnings_file)\n$_\n";
+ $no_reboot = $save_no_reboot;
+ return 0;
+ }
+ }
+ }
+ $no_reboot = $save_no_reboot;
+ close(IN);
+}
+
+sub check_patch_buildlog {
+ my ($patch) = @_;
+
+ my @files = `git show $patch | diffstat -l`;
+
+ foreach my $file (@files) {
+ chomp $file;
+ }
+
+ open(IN, "git show $patch |") or
+ dodie "failed to show $patch";
+ while (<IN>) {
+ if (m,^--- a/(.*),) {
+ chomp $1;
+ $files[$#files] = $1;
+ }
+ }
+ close(IN);
+
+ open(IN, $buildlog) or dodie "Can't open $buildlog";
+ while (<IN>) {
+ if (/^\s*(.*?):.*(warning|error)/) {
+ my $err = $1;
+ foreach my $file (@files) {
+ my $fullpath = "$builddir/$file";
+ if ($file eq $err || $fullpath eq $err) {
+ fail "$file built with warnings" and return 0;
+ }
+ }
+ }
+ }
+ close(IN);
+
+ return 1;
+}
+
+sub apply_min_config {
+ my $outconfig = "$output_config.new";
+
+ # Read the config file and remove anything that
+ # is in the force_config hash (from minconfig and others)
+ # then add the force config back.
+
+ doprint "Applying minimum configurations into $output_config.new\n";
+
+ open (OUT, ">$outconfig") or
+ dodie "Can't create $outconfig";
+
+ if (-f $output_config) {
+ open (IN, $output_config) or
+ dodie "Failed to open $output_config";
+ while (<IN>) {
+ if (/^(# )?(CONFIG_[^\s=]*)/) {
+ next if (defined($force_config{$2}));
+ }
+ print OUT;
+ }
+ close IN;
+ }
+ foreach my $config (keys %force_config) {
+ print OUT "$force_config{$config}\n";
+ }
+ close OUT;
+
+ run_command "mv $outconfig $output_config";
+}
+
+sub make_oldconfig {
+
+ my @force_list = keys %force_config;
+
+ if ($#force_list >= 0) {
+ apply_min_config;
+ }
+
+ if (!run_command "$make olddefconfig") {
+ # Perhaps olddefconfig doesn't exist in this version of the kernel
+ # try oldnoconfig
+ doprint "olddefconfig failed, trying make oldnoconfig\n";
+ if (!run_command "$make oldnoconfig") {
+ doprint "oldnoconfig failed, trying yes '' | make oldconfig\n";
+ # try a yes '' | oldconfig
+ run_command "yes '' | $make oldconfig" or
+ dodie "failed make config oldconfig";
+ }
+ }
+}
+
+# read a config file and use this to force new configs.
+sub load_force_config {
+ my ($config) = @_;
+
+ doprint "Loading force configs from $config\n";
+ open(IN, $config) or
+ dodie "failed to read $config";
+ while (<IN>) {
+ chomp;
+ if (/^(CONFIG[^\s=]*)(\s*=.*)/) {
+ $force_config{$1} = $_;
+ } elsif (/^# (CONFIG_\S*) is not set/) {
+ $force_config{$1} = $_;
+ }
+ }
+ close IN;
+}
+
+sub build {
+ my ($type) = @_;
+
+ unlink $buildlog;
+
+ my $start_time = time;
+
+ # Failed builds should not reboot the target
+ my $save_no_reboot = $no_reboot;
+ $no_reboot = 1;
+
+ # Calculate a new version from here.
+ $have_version = 0;
+
+ if (defined($pre_build)) {
+ my $ret = run_command $pre_build;
+ if (!$ret && defined($pre_build_die) &&
+ $pre_build_die) {
+ dodie "failed to pre_build\n";
+ }
+ }
+
+ if ($type =~ /^useconfig:(.*)/) {
+ run_command "cp $1 $output_config" or
+ dodie "could not copy $1 to .config";
+
+ $type = "oldconfig";
+ }
+
+ # old config can ask questions
+ if ($type eq "oldconfig") {
+ $type = "olddefconfig";
+
+ # allow for empty configs
+ run_command "touch $output_config";
+
+ if (!$noclean) {
+ run_command "mv $output_config $outputdir/config_temp" or
+ dodie "moving .config";
+
+ run_command "$make mrproper" or dodie "make mrproper";
+
+ run_command "mv $outputdir/config_temp $output_config" or
+ dodie "moving config_temp";
+ }
+
+ } elsif (!$noclean) {
+ unlink "$output_config";
+ run_command "$make mrproper" or
+ dodie "make mrproper";
+ }
+
+ # add something to distinguish this build
+ open(OUT, "> $outputdir/localversion") or dodie("Can't make localversion file");
+ print OUT "$localversion\n";
+ close(OUT);
+
+ if (defined($minconfig)) {
+ load_force_config($minconfig);
+ }
+
+ if ($type ne "olddefconfig") {
+ run_command "$make $type" or
+ dodie "failed make config";
+ }
+ # Run old config regardless, to enforce min configurations
+ make_oldconfig;
+
+ my $build_ret = run_command "$make $build_options", $buildlog;
+
+ if (defined($post_build)) {
+ # Because a post build may change the kernel version
+ # do it now.
+ get_version;
+ my $ret = run_command $post_build;
+ if (!$ret && defined($post_build_die) &&
+ $post_build_die) {
+ dodie "failed to post_build\n";
+ }
+ }
+
+ if (!$build_ret) {
+ # bisect may need this to pass
+ if ($in_bisect) {
+ $no_reboot = $save_no_reboot;
+ return 0;
+ }
+ fail "failed build" and return 0;
+ }
+
+ $no_reboot = $save_no_reboot;
+
+ my $end_time = time;
+ $build_time = $end_time - $start_time;
+
+ return 1;
+}
+
+sub halt {
+ if (!run_ssh "halt" or defined($power_off)) {
+ if (defined($poweroff_after_halt)) {
+ sleep $poweroff_after_halt;
+ run_command "$power_off";
+ }
+ } else {
+ # nope? the zap it!
+ run_command "$power_off";
+ }
+}
+
+sub success {
+ my ($i) = @_;
+
+ $successes++;
+
+ my $name = "";
+
+ if (defined($test_name)) {
+ $name = " ($test_name)";
+ }
+
+ print_times;
+
+ doprint "\n\n*******************************************\n";
+ doprint "*******************************************\n";
+ doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n";
+ doprint "*******************************************\n";
+ doprint "*******************************************\n";
+
+ if (defined($store_successes)) {
+ save_logs "success", $store_successes;
+ }
+
+ if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) {
+ doprint "Reboot and wait $sleep_time seconds\n";
+ reboot_to_good $sleep_time;
+ }
+
+ if (defined($post_test)) {
+ run_command $post_test;
+ }
+}
+
+sub answer_bisect {
+ for (;;) {
+ doprint "Pass, fail, or skip? [p/f/s]";
+ my $ans = <STDIN>;
+ chomp $ans;
+ if ($ans eq "p" || $ans eq "P") {
+ return 1;
+ } elsif ($ans eq "f" || $ans eq "F") {
+ return 0;
+ } elsif ($ans eq "s" || $ans eq "S") {
+ return -1;
+ } else {
+ print "Please answer 'p', 'f', or 's'\n";
+ }
+ }
+}
+
+sub child_run_test {
+
+ # child should have no power
+ $reboot_on_error = 0;
+ $poweroff_on_error = 0;
+ $die_on_failure = 1;
+
+ run_command $run_test, $testlog;
+
+ exit $run_command_status;
+}
+
+my $child_done;
+
+sub child_finished {
+ $child_done = 1;
+}
+
+sub do_run_test {
+ my $child_pid;
+ my $child_exit;
+ my $line;
+ my $full_line;
+ my $bug = 0;
+ my $bug_ignored = 0;
+
+ my $start_time = time;
+
+ wait_for_monitor 1;
+
+ doprint "run test $run_test\n";
+
+ $child_done = 0;
+
+ $SIG{CHLD} = qw(child_finished);
+
+ $child_pid = fork;
+
+ child_run_test if (!$child_pid);
+
+ $full_line = "";
+
+ do {
+ $line = wait_for_input($monitor_fp, 1);
+ if (defined($line)) {
+
+ # we are not guaranteed to get a full line
+ $full_line .= $line;
+ doprint $line;
+
+ if ($full_line =~ /call trace:/i) {
+ if ($ignore_errors) {
+ $bug_ignored = 1;
+ } else {
+ $bug = 1;
+ }
+ }
+
+ if ($full_line =~ /Kernel panic -/) {
+ $bug = 1;
+ }
+
+ if ($line =~ /\n/) {
+ $full_line = "";
+ }
+ }
+ } while (!$child_done && !$bug);
+
+ if (!$bug && $bug_ignored) {
+ doprint "WARNING: Call Trace detected but ignored due to IGNORE_ERRORS=1\n";
+ }
+
+ if ($bug) {
+ my $failure_start = time;
+ my $now;
+ do {
+ $line = wait_for_input($monitor_fp, 1);
+ if (defined($line)) {
+ doprint $line;
+ }
+ $now = time;
+ if ($now - $failure_start >= $stop_after_failure) {
+ last;
+ }
+ } while (defined($line));
+
+ doprint "Detected kernel crash!\n";
+ # kill the child with extreme prejudice
+ kill 9, $child_pid;
+ }
+
+ waitpid $child_pid, 0;
+ $child_exit = $? >> 8;
+
+ my $end_time = time;
+ $test_time = $end_time - $start_time;
+
+ if (!$bug && $in_bisect) {
+ if (defined($bisect_ret_good)) {
+ if ($child_exit == $bisect_ret_good) {
+ return 1;
+ }
+ }
+ if (defined($bisect_ret_skip)) {
+ if ($child_exit == $bisect_ret_skip) {
+ return -1;
+ }
+ }
+ if (defined($bisect_ret_abort)) {
+ if ($child_exit == $bisect_ret_abort) {
+ fail "test abort" and return -2;
+ }
+ }
+ if (defined($bisect_ret_bad)) {
+ if ($child_exit == $bisect_ret_skip) {
+ return 0;
+ }
+ }
+ if (defined($bisect_ret_default)) {
+ if ($bisect_ret_default eq "good") {
+ return 1;
+ } elsif ($bisect_ret_default eq "bad") {
+ return 0;
+ } elsif ($bisect_ret_default eq "skip") {
+ return -1;
+ } elsif ($bisect_ret_default eq "abort") {
+ return -2;
+ } else {
+ fail "unknown default action: $bisect_ret_default"
+ and return -2;
+ }
+ }
+ }
+
+ if ($bug || $child_exit) {
+ return 0 if $in_bisect;
+ fail "test failed" and return 0;
+ }
+ return 1;
+}
+
+sub run_git_bisect {
+ my ($command) = @_;
+
+ doprint "$command ... ";
+
+ my $output = `$command 2>&1`;
+ my $ret = $?;
+
+ logit $output;
+
+ if ($ret) {
+ doprint "FAILED\n";
+ dodie "Failed to git bisect";
+ }
+
+ doprint "SUCCESS\n";
+ if ($output =~ m/^(Bisecting: .*\(roughly \d+ steps?\))\s+\[([[:xdigit:]]+)\]/) {
+ doprint "$1 [$2]\n";
+ } elsif ($output =~ m/^([[:xdigit:]]+) is the first bad commit/) {
+ $bisect_bad_commit = $1;
+ doprint "Found bad commit... $1\n";
+ return 0;
+ } else {
+ # we already logged it, just print it now.
+ print $output;
+ }
+
+ return 1;
+}
+
+sub bisect_reboot {
+ doprint "Reboot and sleep $bisect_sleep_time seconds\n";
+ reboot_to_good $bisect_sleep_time;
+}
+
+# returns 1 on success, 0 on failure, -1 on skip
+sub run_bisect_test {
+ my ($type, $buildtype) = @_;
+
+ my $failed = 0;
+ my $result;
+ my $output;
+ my $ret;
+
+ $in_bisect = 1;
+
+ build $buildtype or $failed = 1;
+
+ if ($type ne "build") {
+ if ($failed && $bisect_skip) {
+ $in_bisect = 0;
+ return -1;
+ }
+ dodie "Failed on build" if $failed;
+
+ # Now boot the box
+ start_monitor_and_install or $failed = 1;
+
+ if ($type ne "boot") {
+ if ($failed && $bisect_skip) {
+ end_monitor;
+ bisect_reboot;
+ $in_bisect = 0;
+ return -1;
+ }
+ dodie "Failed on boot" if $failed;
+
+ do_run_test or $failed = 1;
+ }
+ end_monitor;
+ }
+
+ if ($failed) {
+ $result = 0;
+ } else {
+ $result = 1;
+ }
+
+ # reboot the box to a kernel we can ssh to
+ if ($type ne "build") {
+ bisect_reboot;
+ }
+ $in_bisect = 0;
+
+ return $result;
+}
+
+sub run_bisect {
+ my ($type) = @_;
+ my $buildtype = "oldconfig";
+
+ # We should have a minconfig to use?
+ if (defined($minconfig)) {
+ $buildtype = "useconfig:$minconfig";
+ }
+
+ # If the user sets bisect_tries to less than 1, then no tries
+ # is a success.
+ my $ret = 1;
+
+ # Still let the user manually decide that though.
+ if ($bisect_tries < 1 && $bisect_manual) {
+ $ret = answer_bisect;
+ }
+
+ for (my $i = 0; $i < $bisect_tries; $i++) {
+ if ($bisect_tries > 1) {
+ my $t = $i + 1;
+ doprint("Running bisect trial $t of $bisect_tries:\n");
+ }
+ $ret = run_bisect_test $type, $buildtype;
+
+ if ($bisect_manual) {
+ $ret = answer_bisect;
+ }
+
+ last if (!$ret);
+ }
+
+ # Are we looking for where it worked, not failed?
+ if ($reverse_bisect && $ret >= 0) {
+ $ret = !$ret;
+ }
+
+ if ($ret > 0) {
+ return "good";
+ } elsif ($ret == 0) {
+ return "bad";
+ } elsif ($bisect_skip) {
+ doprint "HIT A BAD COMMIT ... SKIPPING\n";
+ return "skip";
+ }
+}
+
+sub update_bisect_replay {
+ my $tmp_log = "$tmpdir/ktest_bisect_log";
+ run_command "git bisect log > $tmp_log" or
+ dodie "can't create bisect log";
+ return $tmp_log;
+}
+
+sub bisect {
+ my ($i) = @_;
+
+ my $result;
+
+ dodie "BISECT_GOOD[$i] not defined\n" if (!defined($bisect_good));
+ dodie "BISECT_BAD[$i] not defined\n" if (!defined($bisect_bad));
+ dodie "BISECT_TYPE[$i] not defined\n" if (!defined($bisect_type));
+
+ my $good = $bisect_good;
+ my $bad = $bisect_bad;
+ my $type = $bisect_type;
+ my $start = $bisect_start;
+ my $replay = $bisect_replay;
+ my $start_files = $bisect_files;
+
+ if (defined($start_files)) {
+ $start_files = " -- " . $start_files;
+ } else {
+ $start_files = "";
+ }
+
+ # convert to true sha1's
+ $good = get_sha1($good);
+ $bad = get_sha1($bad);
+
+ if (defined($bisect_reverse) && $bisect_reverse == 1) {
+ doprint "Performing a reverse bisect (bad is good, good is bad!)\n";
+ $reverse_bisect = 1;
+ } else {
+ $reverse_bisect = 0;
+ }
+
+ # Can't have a test without having a test to run
+ if ($type eq "test" && !defined($run_test)) {
+ $type = "boot";
+ }
+
+ # Check if a bisect was running
+ my $bisect_start_file = "$builddir/.git/BISECT_START";
+
+ my $check = $bisect_check;
+ my $do_check = defined($check) && $check ne "0";
+
+ if ( -f $bisect_start_file ) {
+ print "Bisect in progress found\n";
+ if ($do_check) {
+ print " If you say yes, then no checks of good or bad will be done\n";
+ }
+ if (defined($replay)) {
+ print "** BISECT_REPLAY is defined in config file **";
+ print " Ignore config option and perform new git bisect log?\n";
+ if (read_ync " (yes, no, or cancel) ") {
+ $replay = update_bisect_replay;
+ $do_check = 0;
+ }
+ } elsif (read_yn "read git log and continue?") {
+ $replay = update_bisect_replay;
+ $do_check = 0;
+ }
+ }
+
+ if ($do_check) {
+
+ # get current HEAD
+ my $head = get_sha1("HEAD");
+
+ if ($check ne "good") {
+ doprint "TESTING BISECT BAD [$bad]\n";
+ run_command "git checkout $bad" or
+ dodie "Failed to checkout $bad";
+
+ $result = run_bisect $type;
+
+ if ($result ne "bad") {
+ fail "Tested BISECT_BAD [$bad] and it succeeded" and return 0;
+ }
+ }
+
+ if ($check ne "bad") {
+ doprint "TESTING BISECT GOOD [$good]\n";
+ run_command "git checkout $good" or
+ dodie "Failed to checkout $good";
+
+ $result = run_bisect $type;
+
+ if ($result ne "good") {
+ fail "Tested BISECT_GOOD [$good] and it failed" and return 0;
+ }
+ }
+
+ # checkout where we started
+ run_command "git checkout $head" or
+ dodie "Failed to checkout $head";
+ }
+
+ run_command "git bisect start$start_files" or
+ dodie "could not start bisect";
+
+ if (defined($replay)) {
+ run_command "git bisect replay $replay" or
+ dodie "failed to run replay";
+ } else {
+
+ run_command "git bisect good $good" or
+ dodie "could not set bisect good to $good";
+
+ run_git_bisect "git bisect bad $bad" or
+ dodie "could not set bisect bad to $bad";
+
+ }
+
+ if (defined($start)) {
+ run_command "git checkout $start" or
+ dodie "failed to checkout $start";
+ }
+
+ my $test;
+ do {
+ $result = run_bisect $type;
+ $test = run_git_bisect "git bisect $result";
+ print_times;
+ } while ($test);
+
+ run_command "git bisect log" or
+ dodie "could not capture git bisect log";
+
+ run_command "git bisect reset" or
+ dodie "could not reset git bisect";
+
+ doprint "Bad commit was [$bisect_bad_commit]\n";
+
+ success $i;
+}
+
+# config_ignore holds the configs that were set (or unset) for
+# a good config and we will ignore these configs for the rest
+# of a config bisect. These configs stay as they were.
+my %config_ignore;
+
+# config_set holds what all configs were set as.
+my %config_set;
+
+# config_off holds the set of configs that the bad config had disabled.
+# We need to record them and set them in the .config when running
+# olddefconfig, because olddefconfig keeps the defaults.
+my %config_off;
+
+# config_off_tmp holds a set of configs to turn off for now
+my @config_off_tmp;
+
+# config_list is the set of configs that are being tested
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+sub assign_configs {
+ my ($hash, $config) = @_;
+
+ doprint "Reading configs from $config\n";
+
+ open (IN, $config)
+ or dodie "Failed to read $config";
+
+ while (<IN>) {
+ chomp;
+ if (/^((CONFIG\S*)=.*)/) {
+ ${$hash}{$2} = $1;
+ } elsif (/^(# (CONFIG\S*) is not set)/) {
+ ${$hash}{$2} = $1;
+ }
+ }
+
+ close(IN);
+}
+
+sub process_config_ignore {
+ my ($config) = @_;
+
+ assign_configs \%config_ignore, $config;
+}
+
+sub get_dependencies {
+ my ($config) = @_;
+
+ my $arr = $dependency{$config};
+ if (!defined($arr)) {
+ return ();
+ }
+
+ my @deps = @{$arr};
+
+ foreach my $dep (@{$arr}) {
+ print "ADD DEP $dep\n";
+ @deps = (@deps, get_dependencies $dep);
+ }
+
+ return @deps;
+}
+
+sub save_config {
+ my ($pc, $file) = @_;
+
+ my %configs = %{$pc};
+
+ doprint "Saving configs into $file\n";
+
+ open(OUT, ">$file") or dodie "Can not write to $file";
+
+ foreach my $config (keys %configs) {
+ print OUT "$configs{$config}\n";
+ }
+ close(OUT);
+}
+
+sub create_config {
+ my ($name, $pc) = @_;
+
+ doprint "Creating old config from $name configs\n";
+
+ save_config $pc, $output_config;
+
+ make_oldconfig;
+}
+
+sub run_config_bisect_test {
+ my ($type) = @_;
+
+ my $ret = run_bisect_test $type, "oldconfig";
+
+ if ($bisect_manual) {
+ $ret = answer_bisect;
+ }
+
+ return $ret;
+}
+
+sub config_bisect_end {
+ my ($good, $bad) = @_;
+ my $diffexec = "diff -u";
+
+ if (-f "$builddir/scripts/diffconfig") {
+ $diffexec = "$builddir/scripts/diffconfig";
+ }
+ doprint "\n\n***************************************\n";
+ doprint "No more config bisecting possible.\n";
+ run_command "$diffexec $good $bad", 1;
+ doprint "***************************************\n\n";
+}
+
+sub run_config_bisect {
+ my ($good, $bad, $last_result) = @_;
+ my $reset = "";
+ my $cmd;
+ my $ret;
+
+ if (!length($last_result)) {
+ $reset = "-r";
+ }
+ run_command "$config_bisect_exec $reset -b $outputdir $good $bad $last_result", 1;
+
+ # config-bisect returns:
+ # 0 if there is more to bisect
+ # 1 for finding a good config
+ # 2 if it can not find any more configs
+ # -1 (255) on error
+ if ($run_command_status) {
+ return $run_command_status;
+ }
+
+ $ret = run_config_bisect_test $config_bisect_type;
+ if ($ret) {
+ doprint "NEW GOOD CONFIG\n";
+ # Return 3 for good config
+ return 3;
+ } else {
+ doprint "NEW BAD CONFIG\n";
+ # Return 4 for bad config
+ return 4;
+ }
+}
+
+sub config_bisect {
+ my ($i) = @_;
+
+ my $good_config;
+ my $bad_config;
+
+ my $type = $config_bisect_type;
+ my $ret;
+
+ $bad_config = $config_bisect;
+
+ if (defined($config_bisect_good)) {
+ $good_config = $config_bisect_good;
+ } elsif (defined($minconfig)) {
+ $good_config = $minconfig;
+ } else {
+ doprint "No config specified, checking if defconfig works";
+ $ret = run_bisect_test $type, "defconfig";
+ if (!$ret) {
+ fail "Have no good config to compare with, please set CONFIG_BISECT_GOOD";
+ return 1;
+ }
+ $good_config = $output_config;
+ }
+
+ if (!defined($config_bisect_exec)) {
+ # First check the location that ktest.pl ran
+ my @locations = ( "$pwd/config-bisect.pl",
+ "$dirname/config-bisect.pl",
+ "$builddir/tools/testing/ktest/config-bisect.pl",
+ undef );
+ foreach my $loc (@locations) {
+ doprint "loc = $loc\n";
+ $config_bisect_exec = $loc;
+ last if (defined($config_bisect_exec && -x $config_bisect_exec));
+ }
+ if (!defined($config_bisect_exec)) {
+ fail "Could not find an executable config-bisect.pl\n",
+ " Set CONFIG_BISECT_EXEC to point to config-bisect.pl";
+ return 1;
+ }
+ }
+
+ # we don't want min configs to cause issues here.
+ doprint "Disabling 'MIN_CONFIG' for this test\n";
+ undef $minconfig;
+
+ my %good_configs;
+ my %bad_configs;
+ my %tmp_configs;
+
+ if (-f "$tmpdir/good_config.tmp" || -f "$tmpdir/bad_config.tmp") {
+ if (read_yn "Interrupted config-bisect. Continue (n - will start new)?") {
+ if (-f "$tmpdir/good_config.tmp") {
+ $good_config = "$tmpdir/good_config.tmp";
+ } else {
+ $good_config = "$tmpdir/good_config";
+ }
+ if (-f "$tmpdir/bad_config.tmp") {
+ $bad_config = "$tmpdir/bad_config.tmp";
+ } else {
+ $bad_config = "$tmpdir/bad_config";
+ }
+ }
+ }
+ doprint "Run good configs through make oldconfig\n";
+ assign_configs \%tmp_configs, $good_config;
+ create_config "$good_config", \%tmp_configs;
+ $good_config = "$tmpdir/good_config";
+ system("cp $output_config $good_config") == 0 or dodie "cp good config";
+
+ doprint "Run bad configs through make oldconfig\n";
+ assign_configs \%tmp_configs, $bad_config;
+ create_config "$bad_config", \%tmp_configs;
+ $bad_config = "$tmpdir/bad_config";
+ system("cp $output_config $bad_config") == 0 or dodie "cp bad config";
+
+ if (defined($config_bisect_check) && $config_bisect_check ne "0") {
+ if ($config_bisect_check ne "good") {
+ doprint "Testing bad config\n";
+
+ $ret = run_bisect_test $type, "useconfig:$bad_config";
+ if ($ret) {
+ fail "Bad config succeeded when expected to fail!";
+ return 0;
+ }
+ }
+ if ($config_bisect_check ne "bad") {
+ doprint "Testing good config\n";
+
+ $ret = run_bisect_test $type, "useconfig:$good_config";
+ if (!$ret) {
+ fail "Good config failed when expected to succeed!";
+ return 0;
+ }
+ }
+ }
+
+ my $last_run = "";
+
+ do {
+ $ret = run_config_bisect $good_config, $bad_config, $last_run;
+ if ($ret == 3) {
+ $last_run = "good";
+ } elsif ($ret == 4) {
+ $last_run = "bad";
+ }
+ print_times;
+ } while ($ret == 3 || $ret == 4);
+
+ if ($ret == 2) {
+ config_bisect_end "$good_config.tmp", "$bad_config.tmp";
+ }
+
+ return $ret if ($ret < 0);
+
+ success $i;
+}
+
+sub patchcheck_reboot {
+ doprint "Reboot and sleep $patchcheck_sleep_time seconds\n";
+ reboot_to_good $patchcheck_sleep_time;
+}
+
+sub patchcheck {
+ my ($i) = @_;
+
+ dodie "PATCHCHECK_START[$i] not defined\n"
+ if (!defined($patchcheck_start));
+ dodie "PATCHCHECK_TYPE[$i] not defined\n"
+ if (!defined($patchcheck_type));
+
+ my $start = $patchcheck_start;
+
+ my $cherry = $patchcheck_cherry;
+ if (!defined($cherry)) {
+ $cherry = 0;
+ }
+
+ my $end = "HEAD";
+ if (defined($patchcheck_end)) {
+ $end = $patchcheck_end;
+ } elsif ($cherry) {
+ dodie "PATCHCHECK_END must be defined with PATCHCHECK_CHERRY\n";
+ }
+
+ # Get the true sha1's since we can use things like HEAD~3
+ $start = get_sha1($start);
+ $end = get_sha1($end);
+
+ my $type = $patchcheck_type;
+
+ # Can't have a test without having a test to run
+ if ($type eq "test" && !defined($run_test)) {
+ $type = "boot";
+ }
+
+ if ($cherry) {
+ open (IN, "git cherry -v $start $end|") or
+ dodie "could not get git list";
+ } else {
+ open (IN, "git log --pretty=oneline $end|") or
+ dodie "could not get git list";
+ }
+
+ my @list;
+
+ while (<IN>) {
+ chomp;
+ # git cherry adds a '+' we want to remove
+ s/^\+ //;
+ $list[$#list+1] = $_;
+ last if (/^$start/);
+ }
+ close(IN);
+
+ if (!$cherry) {
+ if ($list[$#list] !~ /^$start/) {
+ fail "SHA1 $start not found";
+ }
+
+ # go backwards in the list
+ @list = reverse @list;
+ }
+
+ doprint("Going to test the following commits:\n");
+ foreach my $l (@list) {
+ doprint "$l\n";
+ }
+
+ my $save_clean = $noclean;
+ my %ignored_warnings;
+
+ if (defined($ignore_warnings)) {
+ foreach my $sha1 (split /\s+/, $ignore_warnings) {
+ $ignored_warnings{$sha1} = 1;
+ }
+ }
+
+ $in_patchcheck = 1;
+ foreach my $item (@list) {
+ my $sha1 = $item;
+ $sha1 =~ s/^([[:xdigit:]]+).*/$1/;
+
+ doprint "\nProcessing commit \"$item\"\n\n";
+
+ run_command "git checkout $sha1" or
+ dodie "Failed to checkout $sha1";
+
+ # only clean on the first and last patch
+ if ($item eq $list[0] ||
+ $item eq $list[$#list]) {
+ $noclean = $save_clean;
+ } else {
+ $noclean = 1;
+ }
+
+ if (defined($minconfig)) {
+ build "useconfig:$minconfig" or return 0;
+ } else {
+ # ?? no config to use?
+ build "oldconfig" or return 0;
+ }
+
+ # No need to do per patch checking if warnings file exists
+ if (!defined($warnings_file) && !defined($ignored_warnings{$sha1})) {
+ check_patch_buildlog $sha1 or return 0;
+ }
+
+ check_buildlog or return 0;
+
+ next if ($type eq "build");
+
+ my $failed = 0;
+
+ start_monitor_and_install or $failed = 1;
+
+ if (!$failed && $type ne "boot"){
+ do_run_test or $failed = 1;
+ }
+ end_monitor;
+ if ($failed) {
+ print_times;
+ return 0;
+ }
+ patchcheck_reboot;
+ print_times;
+ }
+ $in_patchcheck = 0;
+ success $i;
+
+ return 1;
+}
+
+my %depends;
+my %depcount;
+my $iflevel = 0;
+my @ifdeps;
+
+# prevent recursion
+my %read_kconfigs;
+
+sub add_dep {
+ # $config depends on $dep
+ my ($config, $dep) = @_;
+
+ if (defined($depends{$config})) {
+ $depends{$config} .= " " . $dep;
+ } else {
+ $depends{$config} = $dep;
+ }
+
+ # record the number of configs depending on $dep
+ if (defined $depcount{$dep}) {
+ $depcount{$dep}++;
+ } else {
+ $depcount{$dep} = 1;
+ }
+}
+
+# taken from streamline_config.pl
+sub read_kconfig {
+ my ($kconfig) = @_;
+
+ my $state = "NONE";
+ my $config;
+ my @kconfigs;
+
+ my $cont = 0;
+ my $line;
+
+
+ if (! -f $kconfig) {
+ doprint "file $kconfig does not exist, skipping\n";
+ return;
+ }
+
+ open(KIN, "$kconfig")
+ or dodie "Can't open $kconfig";
+ while (<KIN>) {
+ chomp;
+
+ # Make sure that lines ending with \ continue
+ if ($cont) {
+ $_ = $line . " " . $_;
+ }
+
+ if (s/\\$//) {
+ $cont = 1;
+ $line = $_;
+ next;
+ }
+
+ $cont = 0;
+
+ # collect any Kconfig sources
+ if (/^source\s*"(.*)"/) {
+ $kconfigs[$#kconfigs+1] = $1;
+ }
+
+ # configs found
+ if (/^\s*(menu)?config\s+(\S+)\s*$/) {
+ $state = "NEW";
+ $config = $2;
+
+ for (my $i = 0; $i < $iflevel; $i++) {
+ add_dep $config, $ifdeps[$i];
+ }
+
+ # collect the depends for the config
+ } elsif ($state eq "NEW" && /^\s*depends\s+on\s+(.*)$/) {
+
+ add_dep $config, $1;
+
+ # Get the configs that select this config
+ } elsif ($state eq "NEW" && /^\s*select\s+(\S+)/) {
+
+ # selected by depends on config
+ add_dep $1, $config;
+
+ # Check for if statements
+ } elsif (/^if\s+(.*\S)\s*$/) {
+ my $deps = $1;
+ # remove beginning and ending non text
+ $deps =~ s/^[^a-zA-Z0-9_]*//;
+ $deps =~ s/[^a-zA-Z0-9_]*$//;
+
+ my @deps = split /[^a-zA-Z0-9_]+/, $deps;
+
+ $ifdeps[$iflevel++] = join ':', @deps;
+
+ } elsif (/^endif/) {
+
+ $iflevel-- if ($iflevel);
+
+ # stop on "help"
+ } elsif (/^\s*help\s*$/) {
+ $state = "NONE";
+ }
+ }
+ close(KIN);
+
+ # read in any configs that were found.
+ foreach $kconfig (@kconfigs) {
+ if (!defined($read_kconfigs{$kconfig})) {
+ $read_kconfigs{$kconfig} = 1;
+ read_kconfig("$builddir/$kconfig");
+ }
+ }
+}
+
+sub read_depends {
+ # find out which arch this is by the kconfig file
+ open (IN, $output_config)
+ or dodie "Failed to read $output_config";
+ my $arch;
+ while (<IN>) {
+ if (m,Linux/(\S+)\s+\S+\s+Kernel Configuration,) {
+ $arch = $1;
+ last;
+ }
+ }
+ close IN;
+
+ if (!defined($arch)) {
+ doprint "Could not find arch from config file\n";
+ doprint "no dependencies used\n";
+ return;
+ }
+
+ # arch is really the subarch, we need to know
+ # what directory to look at.
+ if ($arch eq "i386" || $arch eq "x86_64") {
+ $arch = "x86";
+ }
+
+ my $kconfig = "$builddir/arch/$arch/Kconfig";
+
+ if (! -f $kconfig && $arch =~ /\d$/) {
+ my $orig = $arch;
+ # some subarchs have numbers, truncate them
+ $arch =~ s/\d*$//;
+ $kconfig = "$builddir/arch/$arch/Kconfig";
+ if (! -f $kconfig) {
+ doprint "No idea what arch dir $orig is for\n";
+ doprint "no dependencies used\n";
+ return;
+ }
+ }
+
+ read_kconfig($kconfig);
+}
+
+sub make_new_config {
+ my @configs = @_;
+
+ open (OUT, ">$output_config")
+ or dodie "Failed to write $output_config";
+
+ foreach my $config (@configs) {
+ print OUT "$config\n";
+ }
+ close OUT;
+}
+
+sub chomp_config {
+ my ($config) = @_;
+
+ $config =~ s/CONFIG_//;
+
+ return $config;
+}
+
+sub get_depends {
+ my ($dep) = @_;
+
+ my $kconfig = chomp_config $dep;
+
+ $dep = $depends{"$kconfig"};
+
+ # the dep string we have saves the dependencies as they
+ # were found, including expressions like ! && ||. We
+ # want to split this out into just an array of configs.
+
+ my $valid = "A-Za-z_0-9";
+
+ my @configs;
+
+ while ($dep =~ /[$valid]/) {
+
+ if ($dep =~ /^[^$valid]*([$valid]+)/) {
+ my $conf = "CONFIG_" . $1;
+
+ $configs[$#configs + 1] = $conf;
+
+ $dep =~ s/^[^$valid]*[$valid]+//;
+ } else {
+ dodie "this should never happen";
+ }
+ }
+
+ return @configs;
+}
+
+my %min_configs;
+my %keep_configs;
+my %save_configs;
+my %processed_configs;
+my %nochange_config;
+
+sub test_this_config {
+ my ($config) = @_;
+
+ my $found;
+
+ # if we already processed this config, skip it
+ if (defined($processed_configs{$config})) {
+ return undef;
+ }
+ $processed_configs{$config} = 1;
+
+ # if this config failed during this round, skip it
+ if (defined($nochange_config{$config})) {
+ return undef;
+ }
+
+ my $kconfig = chomp_config $config;
+
+ # Test dependencies first
+ if (defined($depends{"$kconfig"})) {
+ my @parents = get_depends $config;
+ foreach my $parent (@parents) {
+ # if the parent is in the min config, check it first
+ next if (!defined($min_configs{$parent}));
+ $found = test_this_config($parent);
+ if (defined($found)) {
+ return $found;
+ }
+ }
+ }
+
+ # Remove this config from the list of configs
+ # do a make olddefconfig and then read the resulting
+ # .config to make sure it is missing the config that
+ # we had before
+ my %configs = %min_configs;
+ delete $configs{$config};
+ make_new_config ((values %configs), (values %keep_configs));
+ make_oldconfig;
+ undef %configs;
+ assign_configs \%configs, $output_config;
+
+ if (!defined($configs{$config}) || $configs{$config} =~ /^#/) {
+ return $config;
+ }
+
+ doprint "disabling config $config did not change .config\n";
+
+ $nochange_config{$config} = 1;
+
+ return undef;
+}
+
+sub make_min_config {
+ my ($i) = @_;
+
+ my $type = $minconfig_type;
+ if ($type ne "boot" && $type ne "test") {
+ fail "Invalid MIN_CONFIG_TYPE '$minconfig_type'\n" .
+ " make_min_config works only with 'boot' and 'test'\n" and return;
+ }
+
+ if (!defined($output_minconfig)) {
+ fail "OUTPUT_MIN_CONFIG not defined" and return;
+ }
+
+ # If output_minconfig exists, and the start_minconfig
+ # came from min_config, than ask if we should use
+ # that instead.
+ if (-f $output_minconfig && !$start_minconfig_defined) {
+ print "$output_minconfig exists\n";
+ if (!defined($use_output_minconfig)) {
+ if (read_yn " Use it as minconfig?") {
+ $start_minconfig = $output_minconfig;
+ }
+ } elsif ($use_output_minconfig > 0) {
+ doprint "Using $output_minconfig as MIN_CONFIG\n";
+ $start_minconfig = $output_minconfig;
+ } else {
+ doprint "Set to still use MIN_CONFIG as starting point\n";
+ }
+ }
+
+ if (!defined($start_minconfig)) {
+ fail "START_MIN_CONFIG or MIN_CONFIG not defined" and return;
+ }
+
+ my $temp_config = "$tmpdir/temp_config";
+
+ # First things first. We build an allnoconfig to find
+ # out what the defaults are that we can't touch.
+ # Some are selections, but we really can't handle selections.
+
+ my $save_minconfig = $minconfig;
+ undef $minconfig;
+
+ run_command "$make allnoconfig" or return 0;
+
+ read_depends;
+
+ process_config_ignore $output_config;
+
+ undef %save_configs;
+ undef %min_configs;
+
+ if (defined($ignore_config)) {
+ # make sure the file exists
+ `touch $ignore_config`;
+ assign_configs \%save_configs, $ignore_config;
+ }
+
+ %keep_configs = %save_configs;
+
+ doprint "Load initial configs from $start_minconfig\n";
+
+ # Look at the current min configs, and save off all the
+ # ones that were set via the allnoconfig
+ assign_configs \%min_configs, $start_minconfig;
+
+ my @config_keys = keys %min_configs;
+
+ # All configs need a depcount
+ foreach my $config (@config_keys) {
+ my $kconfig = chomp_config $config;
+ if (!defined $depcount{$kconfig}) {
+ $depcount{$kconfig} = 0;
+ }
+ }
+
+ # Remove anything that was set by the make allnoconfig
+ # we shouldn't need them as they get set for us anyway.
+ foreach my $config (@config_keys) {
+ # Remove anything in the ignore_config
+ if (defined($keep_configs{$config})) {
+ my $file = $ignore_config;
+ $file =~ s,.*/(.*?)$,$1,;
+ doprint "$config set by $file ... ignored\n";
+ delete $min_configs{$config};
+ next;
+ }
+ # But make sure the settings are the same. If a min config
+ # sets a selection, we do not want to get rid of it if
+ # it is not the same as what we have. Just move it into
+ # the keep configs.
+ if (defined($config_ignore{$config})) {
+ if ($config_ignore{$config} ne $min_configs{$config}) {
+ doprint "$config is in allnoconfig as '$config_ignore{$config}'";
+ doprint " but it is '$min_configs{$config}' in minconfig .. keeping\n";
+ $keep_configs{$config} = $min_configs{$config};
+ } else {
+ doprint "$config set by allnoconfig ... ignored\n";
+ }
+ delete $min_configs{$config};
+ }
+ }
+
+ my $done = 0;
+ my $take_two = 0;
+
+ while (!$done) {
+
+ my $config;
+ my $found;
+
+ # Now disable each config one by one and do a make oldconfig
+ # till we find a config that changes our list.
+
+ my @test_configs = keys %min_configs;
+
+ # Sort keys by who is most dependent on
+ @test_configs = sort { $depcount{chomp_config($b)} <=> $depcount{chomp_config($a)} }
+ @test_configs ;
+
+ # Put configs that did not modify the config at the end.
+ my $reset = 1;
+ for (my $i = 0; $i < $#test_configs; $i++) {
+ if (!defined($nochange_config{$test_configs[0]})) {
+ $reset = 0;
+ last;
+ }
+ # This config didn't change the .config last time.
+ # Place it at the end
+ my $config = shift @test_configs;
+ push @test_configs, $config;
+ }
+
+ # if every test config has failed to modify the .config file
+ # in the past, then reset and start over.
+ if ($reset) {
+ undef %nochange_config;
+ }
+
+ undef %processed_configs;
+
+ foreach my $config (@test_configs) {
+
+ $found = test_this_config $config;
+
+ last if (defined($found));
+
+ # oh well, try another config
+ }
+
+ if (!defined($found)) {
+ # we could have failed due to the nochange_config hash
+ # reset and try again
+ if (!$take_two) {
+ undef %nochange_config;
+ $take_two = 1;
+ next;
+ }
+ doprint "No more configs found that we can disable\n";
+ $done = 1;
+ last;
+ }
+ $take_two = 0;
+
+ $config = $found;
+
+ doprint "Test with $config disabled\n";
+
+ # set in_bisect to keep build and monitor from dieing
+ $in_bisect = 1;
+
+ my $failed = 0;
+ build "oldconfig" or $failed = 1;
+ if (!$failed) {
+ start_monitor_and_install or $failed = 1;
+
+ if ($type eq "test" && !$failed) {
+ do_run_test or $failed = 1;
+ }
+
+ end_monitor;
+ }
+
+ $in_bisect = 0;
+
+ if ($failed) {
+ doprint "$min_configs{$config} is needed to boot the box... keeping\n";
+ # this config is needed, add it to the ignore list.
+ $keep_configs{$config} = $min_configs{$config};
+ $save_configs{$config} = $min_configs{$config};
+ delete $min_configs{$config};
+
+ # update new ignore configs
+ if (defined($ignore_config)) {
+ open (OUT, ">$temp_config")
+ or dodie "Can't write to $temp_config";
+ foreach my $config (keys %save_configs) {
+ print OUT "$save_configs{$config}\n";
+ }
+ close OUT;
+ run_command "mv $temp_config $ignore_config" or
+ dodie "failed to copy update to $ignore_config";
+ }
+
+ } else {
+ # We booted without this config, remove it from the minconfigs.
+ doprint "$config is not needed, disabling\n";
+
+ delete $min_configs{$config};
+
+ # Also disable anything that is not enabled in this config
+ my %configs;
+ assign_configs \%configs, $output_config;
+ my @config_keys = keys %min_configs;
+ foreach my $config (@config_keys) {
+ if (!defined($configs{$config})) {
+ doprint "$config is not set, disabling\n";
+ delete $min_configs{$config};
+ }
+ }
+
+ # Save off all the current mandatory configs
+ open (OUT, ">$temp_config")
+ or dodie "Can't write to $temp_config";
+ foreach my $config (keys %keep_configs) {
+ print OUT "$keep_configs{$config}\n";
+ }
+ foreach my $config (keys %min_configs) {
+ print OUT "$min_configs{$config}\n";
+ }
+ close OUT;
+
+ run_command "mv $temp_config $output_minconfig" or
+ dodie "failed to copy update to $output_minconfig";
+ }
+
+ doprint "Reboot and wait $sleep_time seconds\n";
+ reboot_to_good $sleep_time;
+ }
+
+ success $i;
+ return 1;
+}
+
+sub make_warnings_file {
+ my ($i) = @_;
+
+ if (!defined($warnings_file)) {
+ dodie "Must define WARNINGS_FILE for make_warnings_file test";
+ }
+
+ if ($build_type eq "nobuild") {
+ dodie "BUILD_TYPE can not be 'nobuild' for make_warnings_file test";
+ }
+
+ build $build_type or dodie "Failed to build";
+
+ open(OUT, ">$warnings_file") or dodie "Can't create $warnings_file";
+
+ open(IN, $buildlog) or dodie "Can't open $buildlog";
+ while (<IN>) {
+
+ # Some compilers use UTF-8 extended for quotes
+ # for distcc heterogeneous systems, this causes issues
+ s/$utf8_quote/'/g;
+
+ if (/$check_build_re/) {
+ print OUT;
+ }
+ }
+ close(IN);
+
+ close(OUT);
+
+ success $i;
+}
+
+$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n";
+
+if ($#ARGV == 0) {
+ $ktest_config = $ARGV[0];
+ if (! -f $ktest_config) {
+ print "$ktest_config does not exist.\n";
+ if (!read_yn "Create it?") {
+ exit 0;
+ }
+ }
+}
+
+if (! -f $ktest_config) {
+ $newconfig = 1;
+ get_test_case;
+ open(OUT, ">$ktest_config") or die "Can not create $ktest_config";
+ print OUT << "EOF"
+# Generated by ktest.pl
+#
+
+# PWD is a ktest.pl variable that will result in the process working
+# directory that ktest.pl is executed in.
+
+# THIS_DIR is automatically assigned the PWD of the path that generated
+# the config file. It is best to use this variable when assigning other
+# directory paths within this directory. This allows you to easily
+# move the test cases to other locations or to other machines.
+#
+THIS_DIR := $variable{"PWD"}
+
+# Define each test with TEST_START
+# The config options below it will override the defaults
+TEST_START
+TEST_TYPE = $default{"TEST_TYPE"}
+
+DEFAULTS
+EOF
+;
+ close(OUT);
+}
+read_config $ktest_config;
+
+if (defined($opt{"LOG_FILE"})) {
+ $opt{"LOG_FILE"} = eval_option("LOG_FILE", $opt{"LOG_FILE"}, -1);
+}
+
+# Append any configs entered in manually to the config file.
+my @new_configs = keys %entered_configs;
+if ($#new_configs >= 0) {
+ print "\nAppending entered in configs to $ktest_config\n";
+ open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config";
+ foreach my $config (@new_configs) {
+ print OUT "$config = $entered_configs{$config}\n";
+ $opt{$config} = process_variables($entered_configs{$config});
+ }
+}
+
+if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) {
+ unlink $opt{"LOG_FILE"};
+}
+
+doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
+
+for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) {
+
+ if (!$i) {
+ doprint "DEFAULT OPTIONS:\n";
+ } else {
+ doprint "\nTEST $i OPTIONS";
+ if (defined($repeat_tests{$i})) {
+ $repeat = $repeat_tests{$i};
+ doprint " ITERATE $repeat";
+ }
+ doprint "\n";
+ }
+
+ foreach my $option (sort keys %opt) {
+
+ if ($option =~ /\[(\d+)\]$/) {
+ next if ($i != $1);
+ } else {
+ next if ($i);
+ }
+
+ doprint "$option = $opt{$option}\n";
+ }
+}
+
+sub option_defined {
+ my ($option) = @_;
+
+ if (defined($opt{$option}) && $opt{$option} !~ /^\s*$/) {
+ return 1;
+ }
+
+ return 0;
+}
+
+sub __set_test_option {
+ my ($name, $i) = @_;
+
+ my $option = "$name\[$i\]";
+
+ if (option_defined($option)) {
+ return $opt{$option};
+ }
+
+ foreach my $test (keys %repeat_tests) {
+ if ($i >= $test &&
+ $i < $test + $repeat_tests{$test}) {
+ $option = "$name\[$test\]";
+ if (option_defined($option)) {
+ return $opt{$option};
+ }
+ }
+ }
+
+ if (option_defined($name)) {
+ return $opt{$name};
+ }
+
+ return undef;
+}
+
+sub set_test_option {
+ my ($name, $i) = @_;
+
+ my $option = __set_test_option($name, $i);
+ return $option if (!defined($option));
+
+ return eval_option($name, $option, $i);
+}
+
+sub find_mailer {
+ my ($mailer) = @_;
+
+ my @paths = split /:/, $ENV{PATH};
+
+ # sendmail is usually in /usr/sbin
+ $paths[$#paths + 1] = "/usr/sbin";
+
+ foreach my $path (@paths) {
+ if (-x "$path/$mailer") {
+ return $path;
+ }
+ }
+
+ return undef;
+}
+
+sub do_send_mail {
+ my ($subject, $message) = @_;
+
+ if (!defined($mail_path)) {
+ # find the mailer
+ $mail_path = find_mailer $mailer;
+ if (!defined($mail_path)) {
+ die "\nCan not find $mailer in PATH\n";
+ }
+ }
+
+ if (!defined($mail_command)) {
+ if ($mailer eq "mail" || $mailer eq "mailx") {
+ $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'";
+ } elsif ($mailer eq "sendmail" ) {
+ $mail_command = "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO";
+ } else {
+ die "\nYour mailer: $mailer is not supported.\n";
+ }
+ }
+
+ $mail_command =~ s/\$MAILER/$mailer/g;
+ $mail_command =~ s/\$MAIL_PATH/$mail_path/g;
+ $mail_command =~ s/\$MAILTO/$mailto/g;
+ $mail_command =~ s/\$SUBJECT/$subject/g;
+ $mail_command =~ s/\$MESSAGE/$message/g;
+
+ my $ret = run_command $mail_command;
+ if (!$ret && defined($file)) {
+ # try again without the file
+ $message .= "\n\n*** FAILED TO SEND LOG ***\n\n";
+ do_send_email($subject, $message);
+ }
+}
+
+sub send_email {
+
+ if (defined($mailto)) {
+ if (!defined($mailer)) {
+ doprint "No email sent: email or mailer not specified in config.\n";
+ return;
+ }
+ do_send_mail @_;
+ }
+}
+
+sub cancel_test {
+ if ($email_when_canceled) {
+ send_email("KTEST: Your [$test_type] test was cancelled",
+ "Your test started at $script_start_time was cancelled: sig int");
+ }
+ die "\nCaught Sig Int, test interrupted: $!\n"
+}
+
+$SIG{INT} = qw(cancel_test);
+
+# First we need to do is the builds
+for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
+
+ # Do not reboot on failing test options
+ $no_reboot = 1;
+ $reboot_success = 0;
+
+ $have_version = 0;
+
+ $iteration = $i;
+
+ $build_time = 0;
+ $install_time = 0;
+ $reboot_time = 0;
+ $test_time = 0;
+
+ undef %force_config;
+
+ my $makecmd = set_test_option("MAKE_CMD", $i);
+
+ $outputdir = set_test_option("OUTPUT_DIR", $i);
+ $builddir = set_test_option("BUILD_DIR", $i);
+
+ chdir $builddir || dodie "can't change directory to $builddir";
+
+ if (!-d $outputdir) {
+ mkpath($outputdir) or
+ dodie "can't create $outputdir";
+ }
+
+ $make = "$makecmd O=$outputdir";
+
+ # Load all the options into their mapped variable names
+ foreach my $opt (keys %option_map) {
+ ${$option_map{$opt}} = set_test_option($opt, $i);
+ }
+
+ $start_minconfig_defined = 1;
+
+ # The first test may override the PRE_KTEST option
+ if ($i == 1) {
+ if (defined($pre_ktest)) {
+ doprint "\n";
+ run_command $pre_ktest;
+ }
+ if ($email_when_started) {
+ send_email("KTEST: Your [$test_type] test was started",
+ "Your test was started on $script_start_time");
+ }
+ }
+
+ # Any test can override the POST_KTEST option
+ # The last test takes precedence.
+ if (defined($post_ktest)) {
+ $final_post_ktest = $post_ktest;
+ }
+
+ if (!defined($start_minconfig)) {
+ $start_minconfig_defined = 0;
+ $start_minconfig = $minconfig;
+ }
+
+ if (!-d $tmpdir) {
+ mkpath($tmpdir) or
+ dodie "can't create $tmpdir";
+ }
+
+ $ENV{"SSH_USER"} = $ssh_user;
+ $ENV{"MACHINE"} = $machine;
+
+ $buildlog = "$tmpdir/buildlog-$machine";
+ $testlog = "$tmpdir/testlog-$machine";
+ $dmesg = "$tmpdir/dmesg-$machine";
+ $output_config = "$outputdir/.config";
+
+ if (!$buildonly) {
+ $target = "$ssh_user\@$machine";
+ if ($reboot_type eq "grub") {
+ dodie "GRUB_MENU not defined" if (!defined($grub_menu));
+ } elsif ($reboot_type eq "grub2") {
+ dodie "GRUB_MENU not defined" if (!defined($grub_menu));
+ dodie "GRUB_FILE not defined" if (!defined($grub_file));
+ } elsif ($reboot_type eq "syslinux") {
+ dodie "SYSLINUX_LABEL not defined" if (!defined($syslinux_label));
+ }
+ }
+
+ my $run_type = $build_type;
+ if ($test_type eq "patchcheck") {
+ $run_type = $patchcheck_type;
+ } elsif ($test_type eq "bisect") {
+ $run_type = $bisect_type;
+ } elsif ($test_type eq "config_bisect") {
+ $run_type = $config_bisect_type;
+ } elsif ($test_type eq "make_min_config") {
+ $run_type = "";
+ } elsif ($test_type eq "make_warnings_file") {
+ $run_type = "";
+ }
+
+ # mistake in config file?
+ if (!defined($run_type)) {
+ $run_type = "ERROR";
+ }
+
+ my $installme = "";
+ $installme = " no_install" if ($no_install);
+
+ my $name = "";
+
+ if (defined($test_name)) {
+ $name = " ($test_name)";
+ }
+
+ doprint "\n\n";
+ doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n";
+
+ if (defined($pre_test)) {
+ run_command $pre_test;
+ }
+
+ unlink $dmesg;
+ unlink $buildlog;
+ unlink $testlog;
+
+ if (defined($addconfig)) {
+ my $min = $minconfig;
+ if (!defined($minconfig)) {
+ $min = "";
+ }
+ run_command "cat $addconfig $min > $tmpdir/add_config" or
+ dodie "Failed to create temp config";
+ $minconfig = "$tmpdir/add_config";
+ }
+
+ if (defined($checkout)) {
+ run_command "git checkout $checkout" or
+ dodie "failed to checkout $checkout";
+ }
+
+ $no_reboot = 0;
+
+ # A test may opt to not reboot the box
+ if ($reboot_on_success) {
+ $reboot_success = 1;
+ }
+
+ if ($test_type eq "bisect") {
+ bisect $i;
+ next;
+ } elsif ($test_type eq "config_bisect") {
+ config_bisect $i;
+ next;
+ } elsif ($test_type eq "patchcheck") {
+ patchcheck $i;
+ next;
+ } elsif ($test_type eq "make_min_config") {
+ make_min_config $i;
+ next;
+ } elsif ($test_type eq "make_warnings_file") {
+ $no_reboot = 1;
+ make_warnings_file $i;
+ next;
+ }
+
+ if ($build_type ne "nobuild") {
+ build $build_type or next;
+ check_buildlog or next;
+ }
+
+ if ($test_type eq "install") {
+ get_version;
+ install;
+ success $i;
+ next;
+ }
+
+ if ($test_type ne "build") {
+ my $failed = 0;
+ start_monitor_and_install or $failed = 1;
+
+ if (!$failed && $test_type ne "boot" && defined($run_test)) {
+ do_run_test or $failed = 1;
+ }
+ end_monitor;
+ if ($failed) {
+ print_times;
+ next;
+ }
+ }
+
+ print_times;
+
+ success $i;
+}
+
+if (defined($final_post_ktest)) {
+ run_command $final_post_ktest;
+}
+
+if ($opt{"POWEROFF_ON_SUCCESS"}) {
+ halt;
+} elsif ($opt{"REBOOT_ON_SUCCESS"} && !do_not_reboot && $reboot_success) {
+ reboot_to_good;
+} elsif (defined($switch_to_good)) {
+ # still need to get to the good kernel
+ run_command $switch_to_good;
+}
+
+
+doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n";
+
+if ($email_when_finished) {
+ send_email("KTEST: Your [$test_type] test has finished!",
+ "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
+}
+exit 0;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
new file mode 100644
index 000000000..6ca6ca0ce
--- /dev/null
+++ b/tools/testing/ktest/sample.conf
@@ -0,0 +1,1348 @@
+#
+# Config file for ktest.pl
+#
+# Place your customized version of this, in the working directory that
+# ktest.pl is run from. By default, ktest.pl will look for a file
+# called "ktest.conf", but you can name it anything you like and specify
+# the name of your config file as the first argument of ktest.pl.
+#
+# Note, all paths must be absolute
+#
+
+# Options set in the beginning of the file are considered to be
+# default options. These options can be overriden by test specific
+# options, with the following exceptions:
+#
+# LOG_FILE
+# CLEAR_LOG
+# POWEROFF_ON_SUCCESS
+# REBOOT_ON_SUCCESS
+#
+# Test specific options are set after the label:
+#
+# TEST_START
+#
+# The options after a TEST_START label are specific to that test.
+# Each TEST_START label will set up a new test. If you want to
+# perform a test more than once, you can add the ITERATE label
+# to it followed by the number of times you want that test
+# to iterate. If the ITERATE is left off, the test will only
+# be performed once.
+#
+# TEST_START ITERATE 10
+#
+# You can skip a test by adding SKIP (before or after the ITERATE
+# and number)
+#
+# TEST_START SKIP
+#
+# TEST_START SKIP ITERATE 10
+#
+# TEST_START ITERATE 10 SKIP
+#
+# The SKIP label causes the options and the test itself to be ignored.
+# This is useful to set up several different tests in one config file, and
+# only enabling the ones you want to use for a current test run.
+#
+# You can add default options anywhere in the file as well
+# with the DEFAULTS tag. This allows you to have default options
+# after the test options to keep the test options at the top
+# of the file. You can even place the DEFAULTS tag between
+# test cases (but not in the middle of a single test case)
+#
+# TEST_START
+# MIN_CONFIG = /home/test/config-test1
+#
+# DEFAULTS
+# MIN_CONFIG = /home/test/config-default
+#
+# TEST_START ITERATE 10
+#
+# The above will run the first test with MIN_CONFIG set to
+# /home/test/config-test-1. Then 10 tests will be executed
+# with MIN_CONFIG with /home/test/config-default.
+#
+# You can also disable defaults with the SKIP option
+#
+# DEFAULTS SKIP
+# MIN_CONFIG = /home/test/config-use-sometimes
+#
+# DEFAULTS
+# MIN_CONFIG = /home/test/config-most-times
+#
+# The above will ignore the first MIN_CONFIG. If you want to
+# use the first MIN_CONFIG, remove the SKIP from the first
+# DEFAULTS tag and add it to the second. Be careful, options
+# may only be declared once per test or default. If you have
+# the same option name under the same test or as default
+# ktest will fail to execute, and no tests will run.
+#
+# DEFAULTS OVERRIDE
+#
+# Options defined in the DEFAULTS section can not be duplicated
+# even if they are defined in two different DEFAULT sections.
+# This is done to catch mistakes where an option is added but
+# the previous option was forgotten about and not commented.
+#
+# The OVERRIDE keyword can be added to a section to allow this
+# section to override other DEFAULT sections values that have
+# been defined previously. It will only override options that
+# have been defined before its use. Options defined later
+# in a non override section will still error. The same option
+# can not be defined in the same section even if that section
+# is marked OVERRIDE.
+#
+#
+#
+# Both TEST_START and DEFAULTS sections can also have the IF keyword
+# The value after the IF must evaluate into a 0 or non 0 positive
+# integer, and can use the config variables (explained below).
+#
+# DEFAULTS IF ${IS_X86_32}
+#
+# The above will process the DEFAULTS section if the config
+# variable IS_X86_32 evaluates to a non zero positive integer
+# otherwise if it evaluates to zero, it will act the same
+# as if the SKIP keyword was used.
+#
+# The ELSE keyword can be used directly after a section with
+# a IF statement.
+#
+# TEST_START IF ${RUN_NET_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# ELSE
+#
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-normal
+#
+#
+# The ELSE keyword can also contain an IF statement to allow multiple
+# if then else sections. But all the sections must be either
+# DEFAULT or TEST_START, they can not be a mixture.
+#
+# TEST_START IF ${RUN_NET_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# ELSE IF ${RUN_DISK_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-tests
+#
+# ELSE IF ${RUN_CPU_TESTS}
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-cpu
+#
+# ELSE
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-network
+#
+# The if statement may also have comparisons that will and for
+# == and !=, strings may be used for both sides.
+#
+# BOX_TYPE := x86_32
+#
+# DEFAULTS IF ${BOX_TYPE} == x86_32
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-32
+# ELSE
+# BUILD_TYPE = useconfig:${CONFIG_DIR}/config-64
+#
+# The DEFINED keyword can be used by the IF statements too.
+# It returns true if the given config variable or option has been defined
+# or false otherwise.
+#
+#
+# DEFAULTS IF DEFINED USE_CC
+# CC := ${USE_CC}
+# ELSE
+# CC := gcc
+#
+#
+# As well as NOT DEFINED.
+#
+# DEFAULTS IF NOT DEFINED MAKE_CMD
+# MAKE_CMD := make ARCH=x86
+#
+#
+# And/or ops (&&,||) may also be used to make complex conditionals.
+#
+# TEST_START IF (DEFINED ALL_TESTS || ${MYTEST} == boottest) && ${MACHINE} == gandalf
+#
+# Notice the use of parentheses. Without any parentheses the above would be
+# processed the same as:
+#
+# TEST_START IF DEFINED ALL_TESTS || (${MYTEST} == boottest && ${MACHINE} == gandalf)
+#
+#
+#
+# INCLUDE file
+#
+# The INCLUDE keyword may be used in DEFAULT sections. This will
+# read another config file and process that file as well. The included
+# file can include other files, add new test cases or default
+# statements. Config variables will be passed to these files and changes
+# to config variables will be seen by top level config files. Including
+# a file is processed just like the contents of the file was cut and pasted
+# into the top level file, except, that include files that end with
+# TEST_START sections will have that section ended at the end of
+# the include file. That is, an included file is included followed
+# by another DEFAULT keyword.
+#
+# Unlike other files referenced in this config, the file path does not need
+# to be absolute. If the file does not start with '/', then the directory
+# that the current config file was located in is used. If no config by the
+# given name is found there, then the current directory is searched.
+#
+# INCLUDE myfile
+# DEFAULT
+#
+# is the same as:
+#
+# INCLUDE myfile
+#
+# Note, if the include file does not contain a full path, the file is
+# searched first by the location of the original include file, and then
+# by the location that ktest.pl was executed in.
+#
+
+#### Config variables ####
+#
+# This config file can also contain "config variables".
+# These are assigned with ":=" instead of the ktest option
+# assigment "=".
+#
+# The difference between ktest options and config variables
+# is that config variables can be used multiple times,
+# where each instance will override the previous instance.
+# And that they only live at time of processing this config.
+#
+# The advantage to config variables are that they can be used
+# by any option or any other config variables to define thing
+# that you may use over and over again in the options.
+#
+# For example:
+#
+# USER := root
+# TARGET := mybox
+# TEST_CASE := ssh ${USER}@${TARGET} /path/to/my/test
+#
+# TEST_START
+# MIN_CONFIG = config1
+# TEST = ${TEST_CASE}
+#
+# TEST_START
+# MIN_CONFIG = config2
+# TEST = ${TEST_CASE}
+#
+# TEST_CASE := ssh ${USER}@${TARGET} /path/to/my/test2
+#
+# TEST_START
+# MIN_CONFIG = config1
+# TEST = ${TEST_CASE}
+#
+# TEST_START
+# MIN_CONFIG = config2
+# TEST = ${TEST_CASE}
+#
+# TEST_DIR := /home/me/test
+#
+# BUILD_DIR = ${TEST_DIR}/linux.git
+# OUTPUT_DIR = ${TEST_DIR}/test
+#
+# Note, the config variables are evaluated immediately, thus
+# updating TARGET after TEST_CASE has been assigned does nothing
+# to TEST_CASE.
+#
+# As shown in the example, to evaluate a config variable, you
+# use the ${X} convention. Simple $X will not work.
+#
+# If the config variable does not exist, the ${X} will not
+# be evaluated. Thus:
+#
+# MAKE_CMD = PATH=/mypath:${PATH} make
+#
+# If PATH is not a config variable, then the ${PATH} in
+# the MAKE_CMD option will be evaluated by the shell when
+# the MAKE_CMD option is passed into shell processing.
+
+#### Using options in other options ####
+#
+# Options that are defined in the config file may also be used
+# by other options. All options are evaulated at time of
+# use (except that config variables are evaluated at config
+# processing time).
+#
+# If an ktest option is used within another option, instead of
+# typing it again in that option you can simply use the option
+# just like you can config variables.
+#
+# MACHINE = mybox
+#
+# TEST = ssh root@${MACHINE} /path/to/test
+#
+# The option will be used per test case. Thus:
+#
+# TEST_TYPE = test
+# TEST = ssh root@{MACHINE}
+#
+# TEST_START
+# MACHINE = box1
+#
+# TEST_START
+# MACHINE = box2
+#
+# For both test cases, MACHINE will be evaluated at the time
+# of the test case. The first test will run ssh root@box1
+# and the second will run ssh root@box2.
+
+#### Mandatory Default Options ####
+
+# These options must be in the default section, although most
+# may be overridden by test options.
+
+# The machine hostname that you will test
+#MACHINE = target
+
+# The box is expected to have ssh on normal bootup, provide the user
+# (most likely root, since you need privileged operations)
+#SSH_USER = root
+
+# The directory that contains the Linux source code
+#BUILD_DIR = /home/test/linux.git
+
+# The directory that the objects will be built
+# (can not be same as BUILD_DIR)
+#OUTPUT_DIR = /home/test/build/target
+
+# The location of the compiled file to copy to the target
+# (relative to OUTPUT_DIR)
+#BUILD_TARGET = arch/x86/boot/bzImage
+
+# The place to put your image on the test machine
+#TARGET_IMAGE = /boot/vmlinuz-test
+
+# A script or command to reboot the box
+#
+# Here is a digital loggers power switch example
+#POWER_CYCLE = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin@power/outlet?5=CCL'
+#
+# Here is an example to reboot a virtual box on the current host
+# with the name "Guest".
+#POWER_CYCLE = virsh destroy Guest; sleep 5; virsh start Guest
+
+# The script or command that reads the console
+#
+# If you use ttywatch server, something like the following would work.
+#CONSOLE = nc -d localhost 3001
+#
+# For a virtual machine with guest name "Guest".
+#CONSOLE = virsh console Guest
+
+# Signal to send to kill console.
+# ktest.pl will create a child process to monitor the console.
+# When the console is finished, ktest will kill the child process
+# with this signal.
+# (default INT)
+#CLOSE_CONSOLE_SIGNAL = HUP
+
+# Required version ending to differentiate the test
+# from other linux builds on the system.
+#LOCALVERSION = -test
+
+# For REBOOT_TYPE = grub2, you must specify where the grub.cfg
+# file is. This is the file that is searched to find the menu
+# option to boot to with GRUB_REBOOT
+#GRUB_FILE = /boot/grub2/grub.cfg
+
+# The tool for REBOOT_TYPE = grub2 to set the next reboot kernel
+# to boot into (one shot mode).
+# (default grub2_reboot)
+#GRUB_REBOOT = grub2_reboot
+
+# The grub title name for the test kernel to boot
+# (Only mandatory if REBOOT_TYPE = grub or grub2)
+#
+# Note, ktest.pl will not update the grub menu.lst, you need to
+# manually add an option for the test. ktest.pl will search
+# the grub menu.lst for this option to find what kernel to
+# reboot into.
+#
+# For example, if in the /boot/grub/menu.lst the test kernel title has:
+# title Test Kernel
+# kernel vmlinuz-test
+#
+# For grub2, a search of top level "menuentry"s are done. No
+# submenu is searched. The menu is found by searching for the
+# contents of GRUB_MENU in the line that starts with "menuentry".
+# You may want to include the quotes around the option. For example:
+# for: menuentry 'Test Kernel'
+# do a: GRUB_MENU = 'Test Kernel'
+# For customizing, add your entry in /etc/grub.d/40_custom.
+#
+#GRUB_MENU = Test Kernel
+
+# For REBOOT_TYPE = syslinux, the name of the syslinux executable
+# (on the target) to use to set up the next reboot to boot the
+# test kernel.
+# (default extlinux)
+#SYSLINUX = syslinux
+
+# For REBOOT_TYPE = syslinux, the path that is passed to to the
+# syslinux command where syslinux is installed.
+# (default /boot/extlinux)
+#SYSLINUX_PATH = /boot/syslinux
+
+# For REBOOT_TYPE = syslinux, the syslinux label that references the
+# test kernel in the syslinux config file.
+# (default undefined)
+#SYSLINUX_LABEL = "test-kernel"
+
+# A script to reboot the target into the test kernel
+# This and SWITCH_TO_TEST are about the same, except
+# SWITCH_TO_TEST is run even for REBOOT_TYPE = grub.
+# This may be left undefined.
+# (default undefined)
+#REBOOT_SCRIPT =
+
+#### Optional Config Options (all have defaults) ####
+
+# Email options for receiving notifications. Users must setup
+# the specified mailer prior to using this feature.
+#
+# (default undefined)
+#MAILTO =
+#
+# Supported mailers: sendmail, mail, mailx
+# (default sendmail)
+#MAILER = sendmail
+#
+# The executable to run
+# (default: for sendmail "/usr/sbin/sendmail", otherwise equals ${MAILER})
+#MAIL_EXEC = /usr/sbin/sendmail
+#
+# The command used to send mail, which uses the above options
+# can be modified. By default if the mailer is "sendmail" then
+# MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+# For mail or mailx:
+# MAIL_COMMAND = "$MAIL_PATH/$MAILER -s \'$SUBJECT\' $MAILTO <<< \'$MESSAGE\'
+# ktest.pl will do the substitution for MAIL_PATH, MAILER, MAILTO at the time
+# it sends the mail if "$FOO" format is used. If "${FOO}" format is used,
+# then the substitutions will occur at the time the config file is read.
+# But note, MAIL_PATH and MAILER require being set by the config file if
+# ${MAIL_PATH} or ${MAILER} are used, but not if $MAIL_PATH or $MAILER are.
+#MAIL_COMMAND = echo \'Subject: $SUBJECT\n\n$MESSAGE\' | $MAIL_PATH/$MAILER -t $MAILTO
+#
+# Errors are defined as those would terminate the script
+# (default 1)
+#EMAIL_ON_ERROR = 1
+# (default 1)
+#EMAIL_WHEN_FINISHED = 1
+# (default 0)
+#EMAIL_WHEN_STARTED = 1
+#
+# Users can cancel the test by Ctrl^C
+# (default 0)
+#EMAIL_WHEN_CANCELED = 1
+
+# Start a test setup. If you leave this off, all options
+# will be default and the test will run once.
+# This is a label and not really an option (it takes no value).
+# You can append ITERATE and a number after it to iterate the
+# test a number of times, or SKIP to ignore this test.
+#
+#TEST_START
+#TEST_START ITERATE 5
+#TEST_START SKIP
+
+# Have the following options as default again. Used after tests
+# have already been defined by TEST_START. Optionally, you can
+# just define all default options before the first TEST_START
+# and you do not need this option.
+#
+# This is a label and not really an option (it takes no value).
+# You can append SKIP to this label and the options within this
+# section will be ignored.
+#
+# DEFAULTS
+# DEFAULTS SKIP
+
+# If you want to execute some command before the first test runs
+# you can set this option. Note, it can be set as a default option
+# or an option in the first test case. All other test cases will
+# ignore it. If both the default and first test have this option
+# set, then the first test will take precedence.
+#
+# default (undefined)
+#PRE_KTEST = ${SSH} ~/set_up_test
+
+# If you want to execute some command after all the tests have
+# completed, you can set this option. Note, it can be set as a
+# default or any test case can override it. If multiple test cases
+# set this option, then the last test case that set it will take
+# precedence
+#
+# default (undefined)
+#POST_KTEST = ${SSH} ~/dismantle_test
+
+# The default test type (default test)
+# The test types may be:
+# build - only build the kernel, do nothing else
+# install - build and install, but do nothing else (does not reboot)
+# boot - build, install, and boot the kernel
+# test - build, boot and if TEST is set, run the test script
+# (If TEST is not set, it defaults back to boot)
+# bisect - Perform a bisect on the kernel (see BISECT_TYPE below)
+# patchcheck - Do a test on a series of commits in git (see PATCHCHECK below)
+#TEST_TYPE = test
+
+# Test to run if there is a successful boot and TEST_TYPE is test.
+# Must exit with 0 on success and non zero on error
+# default (undefined)
+#TEST = ssh user@machine /root/run_test
+
+# The build type is any make config type or special command
+# (default randconfig)
+# nobuild - skip the clean and build step
+# useconfig:/path/to/config - use the given config and run
+# oldconfig on it.
+# This option is ignored if TEST_TYPE is patchcheck or bisect
+#BUILD_TYPE = randconfig
+
+# The make command (default make)
+# If you are building a 32bit x86 on a 64 bit host
+#MAKE_CMD = CC=i386-gcc AS=i386-as make ARCH=i386
+
+# Any build options for the make of the kernel (not for other makes, like configs)
+# (default "")
+#BUILD_OPTIONS = -j20
+
+# If you need to do some special handling before installing
+# you can add a script with this option.
+# The environment variable KERNEL_VERSION will be set to the
+# kernel version that is used.
+#
+# default (undefined)
+#PRE_INSTALL = ssh user@target rm -rf '/lib/modules/*-test*'
+
+# If you need an initrd, you can add a script or code here to install
+# it. The environment variable KERNEL_VERSION will be set to the
+# kernel version that is used. Remember to add the initrd line
+# to your grub menu.lst file.
+#
+# Here's a couple of examples to use:
+#POST_INSTALL = ssh user@target /sbin/mkinitrd --allow-missing -f /boot/initramfs-test.img $KERNEL_VERSION
+#
+# or on some systems:
+#POST_INSTALL = ssh user@target /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+
+# If for some reason you just want to boot the kernel and you do not
+# want the test to install anything new. For example, you may just want
+# to boot test the same kernel over and over and do not want to go through
+# the hassle of installing anything, you can set this option to 1
+# (default 0)
+#NO_INSTALL = 1
+
+# If there is a command that you want to run before the individual test
+# case executes, then you can set this option
+#
+# default (undefined)
+#PRE_TEST = ${SSH} reboot_to_special_kernel
+
+# If there is a command you want to run after the individual test case
+# completes, then you can set this option.
+#
+# default (undefined)
+#POST_TEST = cd ${BUILD_DIR}; git reset --hard
+
+# If there is a script that you require to run before the build is done
+# you can specify it with PRE_BUILD.
+#
+# One example may be if you must add a temporary patch to the build to
+# fix a unrelated bug to perform a patchcheck test. This will apply the
+# patch before each build that is made. Use the POST_BUILD to do a git reset --hard
+# to remove the patch.
+#
+# (default undef)
+#PRE_BUILD = cd ${BUILD_DIR} && patch -p1 < /tmp/temp.patch
+
+# To specify if the test should fail if the PRE_BUILD fails,
+# PRE_BUILD_DIE needs to be set to 1. Otherwise the PRE_BUILD
+# result is ignored.
+# (default 0)
+# PRE_BUILD_DIE = 1
+
+# If there is a script that should run after the build is done
+# you can specify it with POST_BUILD.
+#
+# As the example in PRE_BUILD, POST_BUILD can be used to reset modifications
+# made by the PRE_BUILD.
+#
+# (default undef)
+#POST_BUILD = cd ${BUILD_DIR} && git reset --hard
+
+# To specify if the test should fail if the POST_BUILD fails,
+# POST_BUILD_DIE needs to be set to 1. Otherwise the POST_BUILD
+# result is ignored.
+# (default 0)
+#POST_BUILD_DIE = 1
+
+# Way to reboot the box to the test kernel.
+# Only valid options so far are "grub", "grub2", "syslinux" and "script"
+# (default grub)
+# If you specify grub, it will assume grub version 1
+# and will search in /boot/grub/menu.lst for the title $GRUB_MENU
+# and select that target to reboot to the kernel. If this is not
+# your setup, then specify "script" and have a command or script
+# specified in REBOOT_SCRIPT to boot to the target.
+#
+# For REBOOT_TYPE = grub2, you must define both GRUB_MENU and
+# GRUB_FILE.
+#
+# For REBOOT_TYPE = syslinux, you must define SYSLINUX_LABEL, and
+# perhaps modify SYSLINUX (default extlinux) and SYSLINUX_PATH
+# (default /boot/extlinux)
+#
+# The entry in /boot/grub/menu.lst must be entered in manually.
+# The test will not modify that file.
+#REBOOT_TYPE = grub
+
+# If you are using a machine that doesn't boot with grub, and
+# perhaps gets its kernel from a remote server (tftp), then
+# you can use this option to update the target image with the
+# test image.
+#
+# You could also do the same with POST_INSTALL, but the difference
+# between that option and this option is that POST_INSTALL runs
+# after the install, where this one runs just before a reboot.
+# (default undefined)
+#SWITCH_TO_TEST = cp ${OUTPUT_DIR}/${BUILD_TARGET} ${TARGET_IMAGE}
+
+# If you are using a machine that doesn't boot with grub, and
+# perhaps gets its kernel from a remote server (tftp), then
+# you can use this option to update the target image with the
+# the known good image to reboot safely back into.
+#
+# This option holds a command that will execute before needing
+# to reboot to a good known image.
+# (default undefined)
+#SWITCH_TO_GOOD = ssh ${SSH_USER}/${MACHINE} cp good_image ${TARGET_IMAGE}
+
+# The min config that is needed to build for the machine
+# A nice way to create this is with the following:
+#
+# $ ssh target
+# $ lsmod > mymods
+# $ scp mymods host:/tmp
+# $ exit
+# $ cd linux.git
+# $ rm .config
+# $ make LSMOD=mymods localyesconfig
+# $ grep '^CONFIG' .config > /home/test/config-min
+#
+# If you want even less configs:
+#
+# log in directly to target (do not ssh)
+#
+# $ su
+# # lsmod | cut -d' ' -f1 | xargs rmmod
+#
+# repeat the above several times
+#
+# # lsmod > mymods
+# # reboot
+#
+# May need to reboot to get your network back to copy the mymods
+# to the host, and then remove the previous .config and run the
+# localyesconfig again. The CONFIG_MIN generated like this will
+# not guarantee network activity to the box so the TEST_TYPE of
+# test may fail.
+#
+# You might also want to set:
+# CONFIG_CMDLINE="<your options here>"
+# randconfig may set the above and override your real command
+# line options.
+# (default undefined)
+#MIN_CONFIG = /home/test/config-min
+
+# Sometimes there's options that just break the boot and
+# you do not care about. Here are a few:
+# # CONFIG_STAGING is not set
+# Staging drivers are horrible, and can break the build.
+# # CONFIG_SCSI_DEBUG is not set
+# SCSI_DEBUG may change your root partition
+# # CONFIG_KGDB_SERIAL_CONSOLE is not set
+# KGDB may cause oops waiting for a connection that's not there.
+# This option points to the file containing config options that will be prepended
+# to the MIN_CONFIG (or be the MIN_CONFIG if it is not set)
+#
+# Note, config options in MIN_CONFIG will override these options.
+#
+# (default undefined)
+#ADD_CONFIG = /home/test/config-broken
+
+# The location on the host where to write temp files
+# (default /tmp/ktest/${MACHINE})
+#TMP_DIR = /tmp/ktest/${MACHINE}
+
+# Optional log file to write the status (recommended)
+# Note, this is a DEFAULT section only option.
+# (default undefined)
+#LOG_FILE = /home/test/logfiles/target.log
+
+# Remove old logfile if it exists before starting all tests.
+# Note, this is a DEFAULT section only option.
+# (default 0)
+#CLEAR_LOG = 0
+
+# Line to define a successful boot up in console output.
+# This is what the line contains, not the entire line. If you need
+# the entire line to match, then use regural expression syntax like:
+# (do not add any quotes around it)
+#
+# SUCCESS_LINE = ^MyBox Login:$
+#
+# (default "login:")
+#SUCCESS_LINE = login:
+
+# To speed up between reboots, defining a line that the
+# default kernel produces that represents that the default
+# kernel has successfully booted and can be used to pass
+# a new test kernel to it. Otherwise ktest.pl will wait till
+# SLEEP_TIME to continue.
+# (default undefined)
+#REBOOT_SUCCESS_LINE = login:
+
+# In case the console constantly fills the screen, having
+# a specified time to stop the test after success is recommended.
+# (in seconds)
+# (default 10)
+#STOP_AFTER_SUCCESS = 10
+
+# In case the console constantly fills the screen, having
+# a specified time to stop the test after failure is recommended.
+# (in seconds)
+# (default 60)
+#STOP_AFTER_FAILURE = 60
+
+# In case the console constantly fills the screen, having
+# a specified time to stop the test if it never succeeds nor fails
+# is recommended.
+# Note: this is ignored if a success or failure is detected.
+# (in seconds)
+# (default 600, -1 is to never stop)
+#STOP_TEST_AFTER = 600
+
+# Stop testing if a build fails. If set, the script will end if
+# a failure is detected, otherwise it will save off the .config,
+# dmesg and bootlog in a directory called
+# MACHINE-TEST_TYPE_BUILD_TYPE-fail-yyyymmddhhmmss
+# if the STORE_FAILURES directory is set.
+# (default 1)
+# Note, even if this is set to zero, there are some errors that still
+# stop the tests.
+#DIE_ON_FAILURE = 1
+
+# Directory to store failure directories on failure. If this is not
+# set, DIE_ON_FAILURE=0 will not save off the .config, dmesg and
+# bootlog. This option is ignored if DIE_ON_FAILURE is not set.
+# (default undefined)
+#STORE_FAILURES = /home/test/failures
+
+# Directory to store success directories on success. If this is not
+# set, the .config, dmesg and bootlog will not be saved if a
+# test succeeds.
+# (default undefined)
+#STORE_SUCCESSES = /home/test/successes
+
+# Build without doing a make mrproper, or removing .config
+# (default 0)
+#BUILD_NOCLEAN = 0
+
+# As the test reads the console, after it hits the SUCCESS_LINE
+# the time it waits for the monitor to settle down between reads
+# can usually be lowered.
+# (in seconds) (default 1)
+#BOOTED_TIMEOUT = 1
+
+# The timeout in seconds when we consider the box hung after
+# the console stop producing output. Be sure to leave enough
+# time here to get pass a reboot. Some machines may not produce
+# any console output for a long time during a reboot. You do
+# not want the test to fail just because the system was in
+# the process of rebooting to the test kernel.
+# (default 120)
+#TIMEOUT = 120
+
+# The timeout in seconds when to test if the box can be rebooted
+# or not. Before issuing the reboot command, a ssh connection
+# is attempted to see if the target machine is still active.
+# If the target does not connect within this timeout, a power cycle
+# is issued instead of a reboot.
+# CONNECT_TIMEOUT = 25
+
+# In between tests, a reboot of the box may occur, and this
+# is the time to wait for the console after it stops producing
+# output. Some machines may not produce a large lag on reboot
+# so this should accommodate it.
+# The difference between this and TIMEOUT, is that TIMEOUT happens
+# when rebooting to the test kernel. This sleep time happens
+# after a test has completed and we are about to start running
+# another test. If a reboot to the reliable kernel happens,
+# we wait SLEEP_TIME for the console to stop producing output
+# before starting the next test.
+#
+# You can speed up reboot times even more by setting REBOOT_SUCCESS_LINE.
+# (default 60)
+#SLEEP_TIME = 60
+
+# The time in between bisects to sleep (in seconds)
+# (default 60)
+#BISECT_SLEEP_TIME = 60
+
+# The max wait time (in seconds) for waiting for the console to finish.
+# If for some reason, the console is outputting content without
+# ever finishing, this will cause ktest to get stuck. This
+# option is the max time ktest will wait for the monitor (console)
+# to settle down before continuing.
+# (default 1800)
+#MAX_MONITOR_WAIT
+
+# The time in between patch checks to sleep (in seconds)
+# (default 60)
+#PATCHCHECK_SLEEP_TIME = 60
+
+# Reboot the target box on error (default 0)
+#REBOOT_ON_ERROR = 0
+
+# Power off the target on error (ignored if REBOOT_ON_ERROR is set)
+# Note, this is a DEFAULT section only option.
+# (default 0)
+#POWEROFF_ON_ERROR = 0
+
+# Power off the target after all tests have completed successfully
+# Note, this is a DEFAULT section only option.
+# (default 0)
+#POWEROFF_ON_SUCCESS = 0
+
+# Reboot the target after all test completed successfully (default 1)
+# (ignored if POWEROFF_ON_SUCCESS is set)
+#REBOOT_ON_SUCCESS = 1
+
+# In case there are isses with rebooting, you can specify this
+# to always powercycle after this amount of time after calling
+# reboot.
+# Note, POWERCYCLE_AFTER_REBOOT = 0 does NOT disable it. It just
+# makes it powercycle immediately after rebooting. Do not define
+# it if you do not want it.
+# (default undefined)
+#POWERCYCLE_AFTER_REBOOT = 5
+
+# In case there's isses with halting, you can specify this
+# to always poweroff after this amount of time after calling
+# halt.
+# Note, POWEROFF_AFTER_HALT = 0 does NOT disable it. It just
+# makes it poweroff immediately after halting. Do not define
+# it if you do not want it.
+# (default undefined)
+#POWEROFF_AFTER_HALT = 20
+
+# A script or command to power off the box (default undefined)
+# Needed for POWEROFF_ON_ERROR and SUCCESS
+#
+# Example for digital loggers power switch:
+#POWER_OFF = wget --no-proxy -O /dev/null -q --auth-no-challenge 'http://admin:admin@power/outlet?5=OFF'
+#
+# Example for a virtual guest call "Guest".
+#POWER_OFF = virsh destroy Guest
+
+# To have the build fail on "new" warnings, create a file that
+# contains a list of all known warnings (they must match exactly
+# to the line with 'warning:', 'error:' or 'Error:'. If the option
+# WARNINGS_FILE is set, then that file will be read, and if the
+# build detects a warning, it will examine this file and if the
+# warning does not exist in it, it will fail the build.
+#
+# Note, if this option is defined to a file that does not exist
+# then any warning will fail the build.
+# (see make_warnings_file below)
+#
+# (optional, default undefined)
+#WARNINGS_FILE = ${OUTPUT_DIR}/warnings_file
+
+# The way to execute a command on the target
+# (default ssh $SSH_USER@$MACHINE $SSH_COMMAND";)
+# The variables SSH_USER, MACHINE and SSH_COMMAND are defined
+#SSH_EXEC = ssh $SSH_USER@$MACHINE $SSH_COMMAND";
+
+# The way to copy a file to the target (install and modules)
+# (default scp $SRC_FILE $SSH_USER@$MACHINE:$DST_FILE)
+# The variables SSH_USER, MACHINE are defined by the config
+# SRC_FILE and DST_FILE are ktest internal variables and
+# should only have '$' and not the '${}' notation.
+# (default scp $SRC_FILE ${SSH_USER}@${MACHINE}:$DST_FILE)
+#SCP_TO_TARGET = echo skip scp for $SRC_FILE $DST_FILE
+
+# If install needs to be different than modules, then this
+# option will override the SCP_TO_TARGET for installation.
+# (default ${SCP_TO_TARGET} )
+#SCP_TO_TARGET_INSTALL = scp $SRC_FILE tftp@tftpserver:$DST_FILE
+
+# The nice way to reboot the target
+# (default ssh $SSH_USER@$MACHINE reboot)
+# The variables SSH_USER and MACHINE are defined.
+#REBOOT = ssh $SSH_USER@$MACHINE reboot
+
+# The way triple faults are detected is by testing the kernel
+# banner. If the kernel banner for the kernel we are testing is
+# found, and then later a kernel banner for another kernel version
+# is found, it is considered that we encountered a triple fault,
+# and there is no panic or callback, but simply a reboot.
+# To disable this (because it did a false positive) set the following
+# to 0.
+# (default 1)
+#DETECT_TRIPLE_FAULT = 0
+
+# All options in the config file should be either used by ktest
+# or could be used within a value of another option. If an option
+# in the config file is not used, ktest will warn about it and ask
+# if you want to continue.
+#
+# If you don't care if there are non-used options, enable this
+# option. Be careful though, a non-used option is usually a sign
+# of an option name being typed incorrectly.
+# (default 0)
+#IGNORE_UNUSED = 1
+
+# When testing a kernel that happens to have WARNINGs, and call
+# traces, ktest.pl will detect these and fail a boot or test run
+# due to warnings. By setting this option, ktest will ignore
+# call traces, and will not fail a test if the kernel produces
+# an oops. Use this option with care.
+# (default 0)
+#IGNORE_ERRORS = 1
+
+#### Per test run options ####
+# The following options are only allowed in TEST_START sections.
+# They are ignored in the DEFAULTS sections.
+#
+# All of these are optional and undefined by default, although
+# some of these options are required for TEST_TYPE of patchcheck
+# and bisect.
+#
+#
+# CHECKOUT = branch
+#
+# If the BUILD_DIR is a git repository, then you can set this option
+# to checkout the given branch before running the TEST. If you
+# specify this for the first run, that branch will be used for
+# all preceding tests until a new CHECKOUT is set.
+#
+#
+# TEST_NAME = name
+#
+# If you want the test to have a name that is displayed in
+# the test result banner at the end of the test, then use this
+# option. This is useful to search for the RESULT keyword and
+# not have to translate a test number to a test in the config.
+#
+# For TEST_TYPE = patchcheck
+#
+# This expects the BUILD_DIR to be a git repository, and
+# will checkout the PATCHCHECK_START commit.
+#
+# The option BUILD_TYPE will be ignored.
+#
+# The MIN_CONFIG will be used for all builds of the patchcheck. The build type
+# used for patchcheck is oldconfig.
+#
+# PATCHCHECK_START is required and is the first patch to
+# test (the SHA1 of the commit). You may also specify anything
+# that git checkout allows (branch name, tage, HEAD~3).
+#
+# PATCHCHECK_END is the last patch to check (default HEAD)
+#
+# PATCHCHECK_CHERRY if set to non zero, then git cherry will be
+# performed against PATCHCHECK_START and PATCHCHECK_END. That is
+#
+# git cherry ${PATCHCHECK_START} ${PATCHCHECK_END}
+#
+# Then the changes found will be tested.
+#
+# Note, PATCHCHECK_CHERRY requires PATCHCHECK_END to be defined.
+# (default 0)
+#
+# PATCHCHECK_TYPE is required and is the type of test to run:
+# build, boot, test.
+#
+# Note, the build test will look for warnings, if a warning occurred
+# in a file that a commit touches, the build will fail, unless
+# IGNORE_WARNINGS is set for the given commit's sha1
+#
+# IGNORE_WARNINGS can be used to disable the failure of patchcheck
+# on a particuler commit (SHA1). You can add more than one commit
+# by adding a list of SHA1s that are space delimited.
+#
+# If BUILD_NOCLEAN is set, then make mrproper will not be run on
+# any of the builds, just like all other TEST_TYPE tests. But
+# what makes patchcheck different from the other tests, is if
+# BUILD_NOCLEAN is not set, only the first and last patch run
+# make mrproper. This helps speed up the test.
+#
+# Example:
+# TEST_START
+# TEST_TYPE = patchcheck
+# CHECKOUT = mybranch
+# PATCHCHECK_TYPE = boot
+# PATCHCHECK_START = 747e94ae3d1b4c9bf5380e569f614eb9040b79e7
+# PATCHCHECK_END = HEAD~2
+# IGNORE_WARNINGS = 42f9c6b69b54946ffc0515f57d01dc7f5c0e4712 0c17ca2c7187f431d8ffc79e81addc730f33d128
+#
+#
+#
+# For TEST_TYPE = bisect
+#
+# You can specify a git bisect if the BUILD_DIR is a git repository.
+# The MIN_CONFIG will be used for all builds of the bisect. The build type
+# used for bisecting is oldconfig.
+#
+# The option BUILD_TYPE will be ignored.
+#
+# BISECT_TYPE is the type of test to perform:
+# build - bad fails to build
+# boot - bad builds but fails to boot
+# test - bad boots but fails a test
+#
+# BISECT_GOOD is the commit (SHA1) to label as good (accepts all git good commit types)
+# BISECT_BAD is the commit to label as bad (accepts all git bad commit types)
+#
+# The above three options are required for a bisect operation.
+#
+# BISECT_REPLAY = /path/to/replay/file (optional, default undefined)
+#
+# If an operation failed in the bisect that was not expected to
+# fail. Then the test ends. The state of the BUILD_DIR will be
+# left off at where the failure occurred. You can examine the
+# reason for the failure, and perhaps even find a git commit
+# that would work to continue with. You can run:
+#
+# git bisect log > /path/to/replay/file
+#
+# The adding:
+#
+# BISECT_REPLAY= /path/to/replay/file
+#
+# And running the test again. The test will perform the initial
+# git bisect start, git bisect good, and git bisect bad, and
+# then it will run git bisect replay on this file, before
+# continuing with the bisect.
+#
+# BISECT_START = commit (optional, default undefined)
+#
+# As with BISECT_REPLAY, if the test failed on a commit that
+# just happen to have a bad commit in the middle of the bisect,
+# and you need to skip it. If BISECT_START is defined, it
+# will checkout that commit after doing the initial git bisect start,
+# git bisect good, git bisect bad, and running the git bisect replay
+# if the BISECT_REPLAY is set.
+#
+# BISECT_SKIP = 1 (optional, default 0)
+#
+# If BISECT_TYPE is set to test but the build fails, ktest will
+# simply fail the test and end their. You could use BISECT_REPLAY
+# and BISECT_START to resume after you found a new starting point,
+# or you could set BISECT_SKIP to 1. If BISECT_SKIP is set to 1,
+# when something other than the BISECT_TYPE fails, ktest.pl will
+# run "git bisect skip" and try again.
+#
+# BISECT_FILES = <path> (optional, default undefined)
+#
+# To just run the git bisect on a specific path, set BISECT_FILES.
+# For example:
+#
+# BISECT_FILES = arch/x86 kernel/time
+#
+# Will run the bisect with "git bisect start -- arch/x86 kernel/time"
+#
+# BISECT_REVERSE = 1 (optional, default 0)
+#
+# In those strange instances where it was broken forever
+# and you are trying to find where it started to work!
+# Set BISECT_GOOD to the commit that was last known to fail
+# Set BISECT_BAD to the commit that is known to start working.
+# With BISECT_REVERSE = 1, The test will consider failures as
+# good, and success as bad.
+#
+# BISECT_MANUAL = 1 (optional, default 0)
+#
+# In case there's a problem with automating the bisect for
+# whatever reason. (Can't reboot, want to inspect each iteration)
+# Doing a BISECT_MANUAL will have the test wait for you to
+# tell it if the test passed or failed after each iteration.
+# This is basicall the same as running git bisect yourself
+# but ktest will rebuild and install the kernel for you.
+#
+# BISECT_CHECK = 1 (optional, default 0)
+#
+# Just to be sure the good is good and bad is bad, setting
+# BISECT_CHECK to 1 will start the bisect by first checking
+# out BISECT_BAD and makes sure it fails, then it will check
+# out BISECT_GOOD and makes sure it succeeds before starting
+# the bisect (it works for BISECT_REVERSE too).
+#
+# You can limit the test to just check BISECT_GOOD or
+# BISECT_BAD with BISECT_CHECK = good or
+# BISECT_CHECK = bad, respectively.
+#
+# BISECT_TRIES = 5 (optional, default 1)
+#
+# For those cases that it takes several tries to hit a bug,
+# the BISECT_TRIES is useful. It is the number of times the
+# test is ran before it says the kernel is good. The first failure
+# will stop trying and mark the current SHA1 as bad.
+#
+# Note, as with all race bugs, there's no guarantee that if
+# it succeeds, it is really a good bisect. But it helps in case
+# the bug is some what reliable.
+#
+# You can set BISECT_TRIES to zero, and all tests will be considered
+# good, unless you also set BISECT_MANUAL.
+#
+# BISECT_RET_GOOD = 0 (optional, default undefined)
+#
+# In case the specificed test returns something other than just
+# 0 for good, and non-zero for bad, you can override 0 being
+# good by defining BISECT_RET_GOOD.
+#
+# BISECT_RET_BAD = 1 (optional, default undefined)
+#
+# In case the specificed test returns something other than just
+# 0 for good, and non-zero for bad, you can override non-zero being
+# bad by defining BISECT_RET_BAD.
+#
+# BISECT_RET_ABORT = 255 (optional, default undefined)
+#
+# If you need to abort the bisect if the test discovers something
+# that was wrong, you can define BISECT_RET_ABORT to be the error
+# code returned by the test in order to abort the bisect.
+#
+# BISECT_RET_SKIP = 2 (optional, default undefined)
+#
+# If the test detects that the current commit is neither good
+# nor bad, but something else happened (another bug detected)
+# you can specify BISECT_RET_SKIP to an error code that the
+# test returns when it should skip the current commit.
+#
+# BISECT_RET_DEFAULT = good (optional, default undefined)
+#
+# You can override the default of what to do when the above
+# options are not hit. This may be one of, "good", "bad",
+# "abort" or "skip" (without the quotes).
+#
+# Note, if you do not define any of the previous BISECT_RET_*
+# and define BISECT_RET_DEFAULT, all bisects results will do
+# what the BISECT_RET_DEFAULT has.
+#
+#
+# Example:
+# TEST_START
+# TEST_TYPE = bisect
+# BISECT_GOOD = v2.6.36
+# BISECT_BAD = b5153163ed580e00c67bdfecb02b2e3843817b3e
+# BISECT_TYPE = build
+# MIN_CONFIG = /home/test/config-bisect
+#
+#
+#
+# For TEST_TYPE = config_bisect
+#
+# In those cases that you have two different configs. One of them
+# work, the other does not, and you do not know what config causes
+# the problem.
+# The TEST_TYPE config_bisect will bisect the bad config looking for
+# what config causes the failure.
+#
+# The way it works is this:
+#
+# You can specify a good config with CONFIG_BISECT_GOOD, otherwise it
+# will use the MIN_CONFIG, and if that's not specified, it will use
+# the config that comes with "make defconfig".
+#
+# It runs both the good and bad configs through a make oldconfig to
+# make sure that they are set up for the kernel that is checked out.
+#
+# It then reads the configs that are set, as well as the ones that are
+# not set for both the good and bad configs, and then compares them.
+# It will set half of the good configs within the bad config (note,
+# "set" means to make the bad config match the good config, a config
+# in the good config that is off, will be turned off in the bad
+# config. That is considered a "set").
+#
+# It tests this new config and if it works, it becomes the new good
+# config, otherwise it becomes the new bad config. It continues this
+# process until there's only one config left and it will report that
+# config.
+#
+# The "bad config" can also be a config that is needed to boot but was
+# disabled because it depended on something that wasn't set.
+#
+# During this process, it saves the current good and bad configs in
+# ${TMP_DIR}/good_config and ${TMP_DIR}/bad_config respectively.
+# If you stop the test, you can copy them to a new location to
+# reuse them again.
+#
+# Although the MIN_CONFIG may be the config it starts with, the
+# MIN_CONFIG is ignored.
+#
+# The option BUILD_TYPE will be ignored.
+#
+# CONFIG_BISECT_TYPE is the type of test to perform:
+# build - bad fails to build
+# boot - bad builds but fails to boot
+# test - bad boots but fails a test
+#
+# CONFIG_BISECT is the config that failed to boot
+#
+# If BISECT_MANUAL is set, it will pause between iterations.
+# This is useful to use just ktest.pl just for the config bisect.
+# If you set it to build, it will run the bisect and you can
+# control what happens in between iterations. It will ask you if
+# the test succeeded or not and continue the config bisect.
+#
+# CONFIG_BISECT_GOOD (optional)
+# If you have a good config to start with, then you
+# can specify it with CONFIG_BISECT_GOOD. Otherwise
+# the MIN_CONFIG is the base, if MIN_CONFIG is not set
+# It will build a config with "make defconfig"
+#
+# CONFIG_BISECT_CHECK (optional)
+# Set this to 1 if you want to confirm that the config ktest
+# generates (the bad config with the min config) is still bad.
+# It may be that the min config fixes what broke the bad config
+# and the test will not return a result.
+# Set it to "good" to test only the good config and set it
+# to "bad" to only test the bad config.
+#
+# CONFIG_BISECT_EXEC (optional)
+# The config bisect is a separate program that comes with ktest.pl.
+# By befault, it will look for:
+# `pwd`/config-bisect.pl # the location ktest.pl was executed from.
+# If it does not find it there, it will look for:
+# `dirname <ktest.pl>`/config-bisect.pl # The directory that holds ktest.pl
+# If it does not find it there, it will look for:
+# ${BUILD_DIR}/tools/testing/ktest/config-bisect.pl
+# Setting CONFIG_BISECT_EXEC will override where it looks.
+#
+# Example:
+# TEST_START
+# TEST_TYPE = config_bisect
+# CONFIG_BISECT_TYPE = build
+# CONFIG_BISECT = /home/test/config-bad
+# MIN_CONFIG = /home/test/config-min
+# BISECT_MANUAL = 1
+#
+#
+#
+# For TEST_TYPE = make_min_config
+#
+# After doing a make localyesconfig, your kernel configuration may
+# not be the most useful minimum configuration. Having a true minimum
+# config that you can use against other configs is very useful if
+# someone else has a config that breaks on your code. By only forcing
+# those configurations that are truly required to boot your machine
+# will give you less of a chance that one of your set configurations
+# will make the bug go away. This will give you a better chance to
+# be able to reproduce the reported bug matching the broken config.
+#
+# Note, this does take some time, and may require you to run the
+# test over night, or perhaps over the weekend. But it also allows
+# you to interrupt it, and gives you the current minimum config
+# that was found till that time.
+#
+# Note, this test automatically assumes a BUILD_TYPE of oldconfig
+# and its test type acts like boot.
+# TODO: add a test version that makes the config do more than just
+# boot, like having network access.
+#
+# To save time, the test does not just grab any option and test
+# it. The Kconfig files are examined to determine the dependencies
+# of the configs. If a config is chosen that depends on another
+# config, that config will be checked first. By checking the
+# parents first, we can eliminate whole groups of configs that
+# may have been enabled.
+#
+# For example, if a USB device config is chosen and depends on CONFIG_USB,
+# the CONFIG_USB will be tested before the device. If CONFIG_USB is
+# found not to be needed, it, as well as all configs that depend on
+# it, will be disabled and removed from the current min_config.
+#
+# OUTPUT_MIN_CONFIG is the path and filename of the file that will
+# be created from the MIN_CONFIG. If you interrupt the test, set
+# this file as your new min config, and use it to continue the test.
+# This file does not need to exist on start of test.
+# This file is not created until a config is found that can be removed.
+# If this file exists, you will be prompted if you want to use it
+# as the min_config (overriding MIN_CONFIG) if START_MIN_CONFIG
+# is not defined.
+# (required field)
+#
+# START_MIN_CONFIG is the config to use to start the test with.
+# you can set this as the same OUTPUT_MIN_CONFIG, but if you do
+# the OUTPUT_MIN_CONFIG file must exist.
+# (default MIN_CONFIG)
+#
+# IGNORE_CONFIG is used to specify a config file that has configs that
+# you already know must be set. Configs are written here that have
+# been tested and proved to be required. It is best to define this
+# file if you intend on interrupting the test and running it where
+# it left off. New configs that it finds will be written to this file
+# and will not be tested again in later runs.
+# (optional)
+#
+# MIN_CONFIG_TYPE can be either 'boot' or 'test'. With 'boot' it will
+# test if the created config can just boot the machine. If this is
+# set to 'test', then the TEST option must be defined and the created
+# config will not only boot the target, but also make sure that the
+# config lets the test succeed. This is useful to make sure the final
+# config that is generated allows network activity (ssh).
+# (optional)
+#
+# USE_OUTPUT_MIN_CONFIG set this to 1 if you do not want to be prompted
+# about using the OUTPUT_MIN_CONFIG as the MIN_CONFIG as the starting
+# point. Set it to 0 if you want to always just use the given MIN_CONFIG.
+# If it is not defined, it will prompt you to pick which config
+# to start with (MIN_CONFIG or OUTPUT_MIN_CONFIG).
+#
+# Example:
+#
+# TEST_TYPE = make_min_config
+# OUTPUT_MIN_CONFIG = /path/to/config-new-min
+# START_MIN_CONFIG = /path/to/config-min
+# IGNORE_CONFIG = /path/to/config-tested
+# MIN_CONFIG_TYPE = test
+# TEST = ssh ${USER}@${MACHINE} echo hi
+#
+#
+#
+#
+# For TEST_TYPE = make_warnings_file
+#
+# If you want the build to fail when a new warning is discovered
+# you set the WARNINGS_FILE to point to a file of known warnings.
+#
+# The test "make_warnings_file" will let you create a new warnings
+# file before you run other tests, like patchcheck.
+#
+# What this test does is to run just a build, you still need to
+# specify BUILD_TYPE to tell the test what type of config to use.
+# A BUILD_TYPE of nobuild will fail this test.
+#
+# The test will do the build and scan for all warnings. Any warning
+# it discovers will be saved in the WARNINGS_FILE (required) option.
+#
+# It is recommended (but not necessary) to make sure BUILD_NOCLEAN is
+# off, so that a full build is done (make mrproper is performed).
+# That way, all warnings will be captured.
+#
+# Example:
+#
+# TEST_TYPE = make_warnings_file
+# WARNINGS_FILE = ${OUTPUT_DIR}
+# BUILD_TYPE = useconfig:oldconfig
+# CHECKOUT = v3.8
+# BUILD_NOCLEAN = 0
+#
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
new file mode 100644
index 000000000..0392153a0
--- /dev/null
+++ b/tools/testing/nvdimm/Kbuild
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0
+ldflags-y += --wrap=ioremap_wc
+ldflags-y += --wrap=memremap
+ldflags-y += --wrap=devm_ioremap_nocache
+ldflags-y += --wrap=devm_memremap
+ldflags-y += --wrap=devm_memunmap
+ldflags-y += --wrap=ioremap_nocache
+ldflags-y += --wrap=iounmap
+ldflags-y += --wrap=memunmap
+ldflags-y += --wrap=__devm_request_region
+ldflags-y += --wrap=__devm_release_region
+ldflags-y += --wrap=__request_region
+ldflags-y += --wrap=__release_region
+ldflags-y += --wrap=devm_memremap_pages
+ldflags-y += --wrap=insert_resource
+ldflags-y += --wrap=remove_resource
+ldflags-y += --wrap=acpi_evaluate_object
+ldflags-y += --wrap=acpi_evaluate_dsm
+
+DRIVERS := ../../../drivers
+NVDIMM_SRC := $(DRIVERS)/nvdimm
+ACPI_SRC := $(DRIVERS)/acpi/nfit
+DAX_SRC := $(DRIVERS)/dax
+ccflags-y := -I$(src)/$(NVDIMM_SRC)/
+
+obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
+obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
+obj-$(CONFIG_ND_BTT) += nd_btt.o
+obj-$(CONFIG_ND_BLK) += nd_blk.o
+obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
+obj-$(CONFIG_ACPI_NFIT) += nfit.o
+ifeq ($(CONFIG_DAX),m)
+obj-$(CONFIG_DAX) += dax.o
+endif
+obj-$(CONFIG_DEV_DAX) += device_dax.o
+obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
+
+nfit-y := $(ACPI_SRC)/core.o
+nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
+nfit-y += acpi_nfit_test.o
+nfit-y += config_check.o
+
+nd_pmem-y := $(NVDIMM_SRC)/pmem.o
+nd_pmem-y += pmem-dax.o
+nd_pmem-y += pmem_test.o
+nd_pmem-y += config_check.o
+
+nd_btt-y := $(NVDIMM_SRC)/btt.o
+nd_btt-y += config_check.o
+
+nd_blk-y := $(NVDIMM_SRC)/blk.o
+nd_blk-y += config_check.o
+
+nd_e820-y := $(NVDIMM_SRC)/e820.o
+nd_e820-y += config_check.o
+
+dax-y := $(DAX_SRC)/super.o
+dax-y += config_check.o
+
+device_dax-y := $(DAX_SRC)/device.o
+device_dax-y += dax-dev.o
+device_dax-y += device_dax_test.o
+device_dax-y += config_check.o
+
+dax_pmem-y := $(DAX_SRC)/pmem.o
+dax_pmem-y += config_check.o
+
+libnvdimm-y := $(NVDIMM_SRC)/core.o
+libnvdimm-y += $(NVDIMM_SRC)/bus.o
+libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/dimm.o
+libnvdimm-y += $(NVDIMM_SRC)/region_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/region.o
+libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o
+libnvdimm-y += $(NVDIMM_SRC)/label.o
+libnvdimm-y += $(NVDIMM_SRC)/badrange.o
+libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
+libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
+libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
+libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o
+libnvdimm-y += libnvdimm_test.o
+libnvdimm-y += config_check.o
+
+obj-m += test/
diff --git a/tools/testing/nvdimm/Makefile b/tools/testing/nvdimm/Makefile
new file mode 100644
index 000000000..c37a6a0bd
--- /dev/null
+++ b/tools/testing/nvdimm/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+KDIR ?= ../../../
+
+default:
+ $(MAKE) -C $(KDIR) M=$$PWD
+
+install: default
+ $(MAKE) -C $(KDIR) M=$$PWD modules_install
diff --git a/tools/testing/nvdimm/acpi_nfit_test.c b/tools/testing/nvdimm/acpi_nfit_test.c
new file mode 100644
index 000000000..43521512e
--- /dev/null
+++ b/tools/testing/nvdimm/acpi_nfit_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(acpi_nfit);
diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c
new file mode 100644
index 000000000..cac891028
--- /dev/null
+++ b/tools/testing/nvdimm/config_check.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bug.h>
+
+void check(void)
+{
+ /*
+ * These kconfig symbols must be set to "m" for nfit_test to
+ * load and operate.
+ */
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_PFN));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX));
+ BUILD_BUG_ON(!IS_MODULE(CONFIG_DEV_DAX_PMEM));
+}
diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c
new file mode 100644
index 000000000..36ee3d879
--- /dev/null
+++ b/tools/testing/nvdimm/dax-dev.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include "test/nfit_test.h"
+#include <linux/mm.h>
+#include "../../../drivers/dax/dax-private.h"
+
+phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
+ unsigned long size)
+{
+ struct resource *res;
+ phys_addr_t addr;
+ int i;
+
+ for (i = 0; i < dev_dax->num_resources; i++) {
+ res = &dev_dax->res[i];
+ addr = pgoff * PAGE_SIZE + res->start;
+ if (addr >= res->start && addr <= res->end)
+ break;
+ pgoff -= PHYS_PFN(resource_size(res));
+ }
+
+ if (i < dev_dax->num_resources) {
+ res = &dev_dax->res[i];
+ if (addr + size - 1 <= res->end) {
+ if (get_nfit_res(addr)) {
+ struct page *page;
+
+ if (dev_dax->region->align > PAGE_SIZE)
+ return -1;
+
+ page = vmalloc_to_page((void *)addr);
+ return PFN_PHYS(page_to_pfn(page));
+ } else
+ return addr;
+ }
+ }
+
+ return -1;
+}
diff --git a/tools/testing/nvdimm/device_dax_test.c b/tools/testing/nvdimm/device_dax_test.c
new file mode 100644
index 000000000..24b17bf42
--- /dev/null
+++ b/tools/testing/nvdimm/device_dax_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(device_dax);
diff --git a/tools/testing/nvdimm/libnvdimm_test.c b/tools/testing/nvdimm/libnvdimm_test.c
new file mode 100644
index 000000000..00ca30b23
--- /dev/null
+++ b/tools/testing/nvdimm/libnvdimm_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(libnvdimm);
diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c
new file mode 100644
index 000000000..2e7fd8227
--- /dev/null
+++ b/tools/testing/nvdimm/pmem-dax.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2014-2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include "test/nfit_test.h"
+#include <linux/blkdev.h>
+#include <pmem.h>
+#include <nd.h>
+
+long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
+ long nr_pages, void **kaddr, pfn_t *pfn)
+{
+ resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
+
+ if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512,
+ PFN_PHYS(nr_pages))))
+ return -EIO;
+
+ /*
+ * Limit dax to a single page at a time given vmalloc()-backed
+ * in the nfit_test case.
+ */
+ if (get_nfit_res(pmem->phys_addr + offset)) {
+ struct page *page;
+
+ if (kaddr)
+ *kaddr = pmem->virt_addr + offset;
+ page = vmalloc_to_page(pmem->virt_addr + offset);
+ if (pfn)
+ *pfn = page_to_pfn_t(page);
+ pr_debug_ratelimited("%s: pmem: %p pgoff: %#lx pfn: %#lx\n",
+ __func__, pmem, pgoff, page_to_pfn(page));
+
+ return 1;
+ }
+
+ if (kaddr)
+ *kaddr = pmem->virt_addr + offset;
+ if (pfn)
+ *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+
+ /*
+ * If badblocks are present, limit known good range to the
+ * requested range.
+ */
+ if (unlikely(pmem->bb.count))
+ return nr_pages;
+ return PHYS_PFN(pmem->size - pmem->pfn_pad - offset);
+}
diff --git a/tools/testing/nvdimm/pmem_test.c b/tools/testing/nvdimm/pmem_test.c
new file mode 100644
index 000000000..fd38f9227
--- /dev/null
+++ b/tools/testing/nvdimm/pmem_test.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+
+#include <linux/module.h>
+#include <linux/printk.h>
+#include "watermark.h"
+
+nfit_test_watermark(pmem);
diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild
new file mode 100644
index 000000000..fb3c3d7cd
--- /dev/null
+++ b/tools/testing/nvdimm/test/Kbuild
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+ccflags-y := -I$(src)/../../../../drivers/nvdimm/
+ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/
+
+obj-m += nfit_test.o
+obj-m += nfit_test_iomap.o
+
+nfit_test-y := nfit.o
+nfit_test_iomap-y := iomap.o
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
new file mode 100644
index 000000000..c6635fee2
--- /dev/null
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -0,0 +1,403 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/memremap.h>
+#include <linux/rculist.h>
+#include <linux/export.h>
+#include <linux/ioport.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pfn_t.h>
+#include <linux/acpi.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include "nfit_test.h"
+
+static LIST_HEAD(iomap_head);
+
+static struct iomap_ops {
+ nfit_test_lookup_fn nfit_test_lookup;
+ nfit_test_evaluate_dsm_fn evaluate_dsm;
+ struct list_head list;
+} iomap_ops = {
+ .list = LIST_HEAD_INIT(iomap_ops.list),
+};
+
+void nfit_test_setup(nfit_test_lookup_fn lookup,
+ nfit_test_evaluate_dsm_fn evaluate)
+{
+ iomap_ops.nfit_test_lookup = lookup;
+ iomap_ops.evaluate_dsm = evaluate;
+ list_add_rcu(&iomap_ops.list, &iomap_head);
+}
+EXPORT_SYMBOL(nfit_test_setup);
+
+void nfit_test_teardown(void)
+{
+ list_del_rcu(&iomap_ops.list);
+ synchronize_rcu();
+}
+EXPORT_SYMBOL(nfit_test_teardown);
+
+static struct nfit_test_resource *__get_nfit_res(resource_size_t resource)
+{
+ struct iomap_ops *ops;
+
+ ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list);
+ if (ops)
+ return ops->nfit_test_lookup(resource);
+ return NULL;
+}
+
+struct nfit_test_resource *get_nfit_res(resource_size_t resource)
+{
+ struct nfit_test_resource *res;
+
+ rcu_read_lock();
+ res = __get_nfit_res(resource);
+ rcu_read_unlock();
+
+ return res;
+}
+EXPORT_SYMBOL(get_nfit_res);
+
+void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
+ void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+ if (nfit_res)
+ return (void __iomem *) nfit_res->buf + offset
+ - nfit_res->res.start;
+ return fallback_fn(offset, size);
+}
+
+void __iomem *__wrap_devm_ioremap_nocache(struct device *dev,
+ resource_size_t offset, unsigned long size)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+ if (nfit_res)
+ return (void __iomem *) nfit_res->buf + offset
+ - nfit_res->res.start;
+ return devm_ioremap_nocache(dev, offset, size);
+}
+EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
+
+void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
+ size_t size, unsigned long flags)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+ if (nfit_res)
+ return nfit_res->buf + offset - nfit_res->res.start;
+ return devm_memremap(dev, offset, size, flags);
+}
+EXPORT_SYMBOL(__wrap_devm_memremap);
+
+static void nfit_test_kill(void *_pgmap)
+{
+ struct dev_pagemap *pgmap = _pgmap;
+
+ pgmap->kill(pgmap->ref);
+}
+
+void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
+{
+ resource_size_t offset = pgmap->res.start;
+ struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+ if (nfit_res) {
+ int rc;
+
+ rc = devm_add_action_or_reset(dev, nfit_test_kill, pgmap);
+ if (rc)
+ return ERR_PTR(rc);
+ return nfit_res->buf + offset - nfit_res->res.start;
+ }
+ return devm_memremap_pages(dev, pgmap);
+}
+EXPORT_SYMBOL_GPL(__wrap_devm_memremap_pages);
+
+pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res(addr);
+
+ if (nfit_res)
+ flags &= ~PFN_MAP;
+ return phys_to_pfn_t(addr, flags);
+}
+EXPORT_SYMBOL(__wrap_phys_to_pfn_t);
+
+void *__wrap_memremap(resource_size_t offset, size_t size,
+ unsigned long flags)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res(offset);
+
+ if (nfit_res)
+ return nfit_res->buf + offset - nfit_res->res.start;
+ return memremap(offset, size, flags);
+}
+EXPORT_SYMBOL(__wrap_memremap);
+
+void __wrap_devm_memunmap(struct device *dev, void *addr)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res((long) addr);
+
+ if (nfit_res)
+ return;
+ return devm_memunmap(dev, addr);
+}
+EXPORT_SYMBOL(__wrap_devm_memunmap);
+
+void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size)
+{
+ return __nfit_test_ioremap(offset, size, ioremap_nocache);
+}
+EXPORT_SYMBOL(__wrap_ioremap_nocache);
+
+void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size)
+{
+ return __nfit_test_ioremap(offset, size, ioremap_wc);
+}
+EXPORT_SYMBOL(__wrap_ioremap_wc);
+
+void __wrap_iounmap(volatile void __iomem *addr)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res((long) addr);
+ if (nfit_res)
+ return;
+ return iounmap(addr);
+}
+EXPORT_SYMBOL(__wrap_iounmap);
+
+void __wrap_memunmap(void *addr)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res((long) addr);
+
+ if (nfit_res)
+ return;
+ return memunmap(addr);
+}
+EXPORT_SYMBOL(__wrap_memunmap);
+
+static bool nfit_test_release_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n);
+
+static void nfit_devres_release(struct device *dev, void *data)
+{
+ struct resource *res = *((struct resource **) data);
+
+ WARN_ON(!nfit_test_release_region(NULL, &iomem_resource, res->start,
+ resource_size(res)));
+}
+
+static int match(struct device *dev, void *__res, void *match_data)
+{
+ struct resource *res = *((struct resource **) __res);
+ resource_size_t start = *((resource_size_t *) match_data);
+
+ return res->start == start;
+}
+
+static bool nfit_test_release_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n)
+{
+ if (parent == &iomem_resource) {
+ struct nfit_test_resource *nfit_res = get_nfit_res(start);
+
+ if (nfit_res) {
+ struct nfit_test_request *req;
+ struct resource *res = NULL;
+
+ if (dev) {
+ devres_release(dev, nfit_devres_release, match,
+ &start);
+ return true;
+ }
+
+ spin_lock(&nfit_res->lock);
+ list_for_each_entry(req, &nfit_res->requests, list)
+ if (req->res.start == start) {
+ res = &req->res;
+ list_del(&req->list);
+ break;
+ }
+ spin_unlock(&nfit_res->lock);
+
+ WARN(!res || resource_size(res) != n,
+ "%s: start: %llx n: %llx mismatch: %pr\n",
+ __func__, start, n, res);
+ if (res)
+ kfree(req);
+ return true;
+ }
+ }
+ return false;
+}
+
+static struct resource *nfit_test_request_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n, const char *name, int flags)
+{
+ struct nfit_test_resource *nfit_res;
+
+ if (parent == &iomem_resource) {
+ nfit_res = get_nfit_res(start);
+ if (nfit_res) {
+ struct nfit_test_request *req;
+ struct resource *res = NULL;
+
+ if (start + n > nfit_res->res.start
+ + resource_size(&nfit_res->res)) {
+ pr_debug("%s: start: %llx n: %llx overflow: %pr\n",
+ __func__, start, n,
+ &nfit_res->res);
+ return NULL;
+ }
+
+ spin_lock(&nfit_res->lock);
+ list_for_each_entry(req, &nfit_res->requests, list)
+ if (start == req->res.start) {
+ res = &req->res;
+ break;
+ }
+ spin_unlock(&nfit_res->lock);
+
+ if (res) {
+ WARN(1, "%pr already busy\n", res);
+ return NULL;
+ }
+
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ return NULL;
+ INIT_LIST_HEAD(&req->list);
+ res = &req->res;
+
+ res->start = start;
+ res->end = start + n - 1;
+ res->name = name;
+ res->flags = resource_type(parent);
+ res->flags |= IORESOURCE_BUSY | flags;
+ spin_lock(&nfit_res->lock);
+ list_add(&req->list, &nfit_res->requests);
+ spin_unlock(&nfit_res->lock);
+
+ if (dev) {
+ struct resource **d;
+
+ d = devres_alloc(nfit_devres_release,
+ sizeof(struct resource *),
+ GFP_KERNEL);
+ if (!d)
+ return NULL;
+ *d = res;
+ devres_add(dev, d);
+ }
+
+ pr_debug("%s: %pr\n", __func__, res);
+ return res;
+ }
+ }
+ if (dev)
+ return __devm_request_region(dev, parent, start, n, name);
+ return __request_region(parent, start, n, name, flags);
+}
+
+struct resource *__wrap___request_region(struct resource *parent,
+ resource_size_t start, resource_size_t n, const char *name,
+ int flags)
+{
+ return nfit_test_request_region(NULL, parent, start, n, name, flags);
+}
+EXPORT_SYMBOL(__wrap___request_region);
+
+int __wrap_insert_resource(struct resource *parent, struct resource *res)
+{
+ if (get_nfit_res(res->start))
+ return 0;
+ return insert_resource(parent, res);
+}
+EXPORT_SYMBOL(__wrap_insert_resource);
+
+int __wrap_remove_resource(struct resource *res)
+{
+ if (get_nfit_res(res->start))
+ return 0;
+ return remove_resource(res);
+}
+EXPORT_SYMBOL(__wrap_remove_resource);
+
+struct resource *__wrap___devm_request_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n, const char *name)
+{
+ if (!dev)
+ return NULL;
+ return nfit_test_request_region(dev, parent, start, n, name, 0);
+}
+EXPORT_SYMBOL(__wrap___devm_request_region);
+
+void __wrap___release_region(struct resource *parent, resource_size_t start,
+ resource_size_t n)
+{
+ if (!nfit_test_release_region(NULL, parent, start, n))
+ __release_region(parent, start, n);
+}
+EXPORT_SYMBOL(__wrap___release_region);
+
+void __wrap___devm_release_region(struct device *dev, struct resource *parent,
+ resource_size_t start, resource_size_t n)
+{
+ if (!nfit_test_release_region(dev, parent, start, n))
+ __devm_release_region(dev, parent, start, n);
+}
+EXPORT_SYMBOL(__wrap___devm_release_region);
+
+acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path,
+ struct acpi_object_list *p, struct acpi_buffer *buf)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res((long) handle);
+ union acpi_object **obj;
+
+ if (!nfit_res || strcmp(path, "_FIT") || !buf)
+ return acpi_evaluate_object(handle, path, p, buf);
+
+ obj = nfit_res->buf;
+ buf->length = sizeof(union acpi_object);
+ buf->pointer = *obj;
+ return AE_OK;
+}
+EXPORT_SYMBOL(__wrap_acpi_evaluate_object);
+
+union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid,
+ u64 rev, u64 func, union acpi_object *argv4)
+{
+ union acpi_object *obj = ERR_PTR(-ENXIO);
+ struct iomap_ops *ops;
+
+ rcu_read_lock();
+ ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list);
+ if (ops)
+ obj = ops->evaluate_dsm(handle, guid, rev, func, argv4);
+ rcu_read_unlock();
+
+ if (IS_ERR(obj))
+ return acpi_evaluate_dsm(handle, guid, rev, func, argv4);
+ return obj;
+}
+EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm);
+
+MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
new file mode 100644
index 000000000..fa763dbfd
--- /dev/null
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -0,0 +1,2957 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/libnvdimm.h>
+#include <linux/genalloc.h>
+#include <linux/vmalloc.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/ndctl.h>
+#include <linux/sizes.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <nd-core.h>
+#include <nfit.h>
+#include <nd.h>
+#include "nfit_test.h"
+#include "../watermark.h"
+
+#include <asm/mcsafe_test.h>
+
+/*
+ * Generate an NFIT table to describe the following topology:
+ *
+ * BUS0: Interleaved PMEM regions, and aliasing with BLK regions
+ *
+ * (a) (b) DIMM BLK-REGION
+ * +----------+--------------+----------+---------+
+ * +------+ | blk2.0 | pm0.0 | blk2.1 | pm1.0 | 0 region2
+ * | imc0 +--+- - - - - region0 - - - -+----------+ +
+ * +--+---+ | blk3.0 | pm0.0 | blk3.1 | pm1.0 | 1 region3
+ * | +----------+--------------v----------v v
+ * +--+---+ | |
+ * | cpu0 | region1
+ * +--+---+ | |
+ * | +-------------------------^----------^ ^
+ * +--+---+ | blk4.0 | pm1.0 | 2 region4
+ * | imc1 +--+-------------------------+----------+ +
+ * +------+ | blk5.0 | pm1.0 | 3 region5
+ * +-------------------------+----------+-+-------+
+ *
+ * +--+---+
+ * | cpu1 |
+ * +--+---+ (Hotplug DIMM)
+ * | +----------------------------------------------+
+ * +--+---+ | blk6.0/pm7.0 | 4 region6/7
+ * | imc0 +--+----------------------------------------------+
+ * +------+
+ *
+ *
+ * *) In this layout we have four dimms and two memory controllers in one
+ * socket. Each unique interface (BLK or PMEM) to DPA space
+ * is identified by a region device with a dynamically assigned id.
+ *
+ * *) The first portion of dimm0 and dimm1 are interleaved as REGION0.
+ * A single PMEM namespace "pm0.0" is created using half of the
+ * REGION0 SPA-range. REGION0 spans dimm0 and dimm1. PMEM namespace
+ * allocate from from the bottom of a region. The unallocated
+ * portion of REGION0 aliases with REGION2 and REGION3. That
+ * unallacted capacity is reclaimed as BLK namespaces ("blk2.0" and
+ * "blk3.0") starting at the base of each DIMM to offset (a) in those
+ * DIMMs. "pm0.0", "blk2.0" and "blk3.0" are free-form readable
+ * names that can be assigned to a namespace.
+ *
+ * *) In the last portion of dimm0 and dimm1 we have an interleaved
+ * SPA range, REGION1, that spans those two dimms as well as dimm2
+ * and dimm3. Some of REGION1 allocated to a PMEM namespace named
+ * "pm1.0" the rest is reclaimed in 4 BLK namespaces (for each
+ * dimm in the interleave set), "blk2.1", "blk3.1", "blk4.0", and
+ * "blk5.0".
+ *
+ * *) The portion of dimm2 and dimm3 that do not participate in the
+ * REGION1 interleaved SPA range (i.e. the DPA address below offset
+ * (b) are also included in the "blk4.0" and "blk5.0" namespaces.
+ * Note, that BLK namespaces need not be contiguous in DPA-space, and
+ * can consume aliased capacity from multiple interleave sets.
+ *
+ * BUS1: Legacy NVDIMM (single contiguous range)
+ *
+ * region2
+ * +---------------------+
+ * |---------------------|
+ * || pm2.0 ||
+ * |---------------------|
+ * +---------------------+
+ *
+ * *) A NFIT-table may describe a simple system-physical-address range
+ * with no BLK aliasing. This type of region may optionally
+ * reference an NVDIMM.
+ */
+enum {
+ NUM_PM = 3,
+ NUM_DCR = 5,
+ NUM_HINTS = 8,
+ NUM_BDW = NUM_DCR,
+ NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
+ NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */
+ + 4 /* spa1 iset */ + 1 /* spa11 iset */,
+ DIMM_SIZE = SZ_32M,
+ LABEL_SIZE = SZ_128K,
+ SPA_VCD_SIZE = SZ_4M,
+ SPA0_SIZE = DIMM_SIZE,
+ SPA1_SIZE = DIMM_SIZE*2,
+ SPA2_SIZE = DIMM_SIZE,
+ BDW_SIZE = 64 << 8,
+ DCR_SIZE = 12,
+ NUM_NFITS = 2, /* permit testing multiple NFITs per system */
+};
+
+struct nfit_test_dcr {
+ __le64 bdw_addr;
+ __le32 bdw_status;
+ __u8 aperature[BDW_SIZE];
+};
+
+#define NFIT_DIMM_HANDLE(node, socket, imc, chan, dimm) \
+ (((node & 0xfff) << 16) | ((socket & 0xf) << 12) \
+ | ((imc & 0xf) << 8) | ((chan & 0xf) << 4) | (dimm & 0xf))
+
+static u32 handle[] = {
+ [0] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 0),
+ [1] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1),
+ [2] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0),
+ [3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1),
+ [4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0),
+ [5] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 0),
+ [6] = NFIT_DIMM_HANDLE(1, 0, 0, 0, 1),
+};
+
+static unsigned long dimm_fail_cmd_flags[ARRAY_SIZE(handle)];
+static int dimm_fail_cmd_code[ARRAY_SIZE(handle)];
+
+static const struct nd_intel_smart smart_def = {
+ .flags = ND_INTEL_SMART_HEALTH_VALID
+ | ND_INTEL_SMART_SPARES_VALID
+ | ND_INTEL_SMART_ALARM_VALID
+ | ND_INTEL_SMART_USED_VALID
+ | ND_INTEL_SMART_SHUTDOWN_VALID
+ | ND_INTEL_SMART_MTEMP_VALID
+ | ND_INTEL_SMART_CTEMP_VALID,
+ .health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
+ .media_temperature = 23 * 16,
+ .ctrl_temperature = 25 * 16,
+ .pmic_temperature = 40 * 16,
+ .spares = 75,
+ .alarm_flags = ND_INTEL_SMART_SPARE_TRIP
+ | ND_INTEL_SMART_TEMP_TRIP,
+ .ait_status = 1,
+ .life_used = 5,
+ .shutdown_state = 0,
+ .vendor_size = 0,
+ .shutdown_count = 100,
+};
+
+struct nfit_test_fw {
+ enum intel_fw_update_state state;
+ u32 context;
+ u64 version;
+ u32 size_received;
+ u64 end_time;
+};
+
+struct nfit_test {
+ struct acpi_nfit_desc acpi_desc;
+ struct platform_device pdev;
+ struct list_head resources;
+ void *nfit_buf;
+ dma_addr_t nfit_dma;
+ size_t nfit_size;
+ size_t nfit_filled;
+ int dcr_idx;
+ int num_dcr;
+ int num_pm;
+ void **dimm;
+ dma_addr_t *dimm_dma;
+ void **flush;
+ dma_addr_t *flush_dma;
+ void **label;
+ dma_addr_t *label_dma;
+ void **spa_set;
+ dma_addr_t *spa_set_dma;
+ struct nfit_test_dcr **dcr;
+ dma_addr_t *dcr_dma;
+ int (*alloc)(struct nfit_test *t);
+ void (*setup)(struct nfit_test *t);
+ int setup_hotplug;
+ union acpi_object **_fit;
+ dma_addr_t _fit_dma;
+ struct ars_state {
+ struct nd_cmd_ars_status *ars_status;
+ unsigned long deadline;
+ spinlock_t lock;
+ } ars_state;
+ struct device *dimm_dev[ARRAY_SIZE(handle)];
+ struct nd_intel_smart *smart;
+ struct nd_intel_smart_threshold *smart_threshold;
+ struct badrange badrange;
+ struct work_struct work;
+ struct nfit_test_fw *fw;
+};
+
+static struct workqueue_struct *nfit_wq;
+
+static struct gen_pool *nfit_pool;
+
+static struct nfit_test *to_nfit_test(struct device *dev)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+
+ return container_of(pdev, struct nfit_test, pdev);
+}
+
+static int nd_intel_test_get_fw_info(struct nfit_test *t,
+ struct nd_intel_fw_info *nd_cmd, unsigned int buf_len,
+ int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p, buf_len: %u, idx: %d\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ nd_cmd->status = 0;
+ nd_cmd->storage_size = INTEL_FW_STORAGE_SIZE;
+ nd_cmd->max_send_len = INTEL_FW_MAX_SEND_LEN;
+ nd_cmd->query_interval = INTEL_FW_QUERY_INTERVAL;
+ nd_cmd->max_query_time = INTEL_FW_QUERY_MAX_TIME;
+ nd_cmd->update_cap = 0;
+ nd_cmd->fis_version = INTEL_FW_FIS_VERSION;
+ nd_cmd->run_version = 0;
+ nd_cmd->updated_version = fw->version;
+
+ return 0;
+}
+
+static int nd_intel_test_start_update(struct nfit_test *t,
+ struct nd_intel_fw_start *nd_cmd, unsigned int buf_len,
+ int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ if (fw->state != FW_STATE_NEW) {
+ /* extended status, FW update in progress */
+ nd_cmd->status = 0x10007;
+ return 0;
+ }
+
+ fw->state = FW_STATE_IN_PROGRESS;
+ fw->context++;
+ fw->size_received = 0;
+ nd_cmd->status = 0;
+ nd_cmd->context = fw->context;
+
+ dev_dbg(dev, "%s: context issued: %#x\n", __func__, nd_cmd->context);
+
+ return 0;
+}
+
+static int nd_intel_test_send_data(struct nfit_test *t,
+ struct nd_intel_fw_send_data *nd_cmd, unsigned int buf_len,
+ int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+ u32 *status = (u32 *)&nd_cmd->data[nd_cmd->length];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+
+ dev_dbg(dev, "%s: cmd->status: %#x\n", __func__, *status);
+ dev_dbg(dev, "%s: cmd->data[0]: %#x\n", __func__, nd_cmd->data[0]);
+ dev_dbg(dev, "%s: cmd->data[%u]: %#x\n", __func__, nd_cmd->length-1,
+ nd_cmd->data[nd_cmd->length-1]);
+
+ if (fw->state != FW_STATE_IN_PROGRESS) {
+ dev_dbg(dev, "%s: not in IN_PROGRESS state\n", __func__);
+ *status = 0x5;
+ return 0;
+ }
+
+ if (nd_cmd->context != fw->context) {
+ dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+ __func__, nd_cmd->context, fw->context);
+ *status = 0x10007;
+ return 0;
+ }
+
+ /*
+ * check offset + len > size of fw storage
+ * check length is > max send length
+ */
+ if (nd_cmd->offset + nd_cmd->length > INTEL_FW_STORAGE_SIZE ||
+ nd_cmd->length > INTEL_FW_MAX_SEND_LEN) {
+ *status = 0x3;
+ dev_dbg(dev, "%s: buffer boundary violation\n", __func__);
+ return 0;
+ }
+
+ fw->size_received += nd_cmd->length;
+ dev_dbg(dev, "%s: copying %u bytes, %u bytes so far\n",
+ __func__, nd_cmd->length, fw->size_received);
+ *status = 0;
+ return 0;
+}
+
+static int nd_intel_test_finish_fw(struct nfit_test *t,
+ struct nd_intel_fw_finish_update *nd_cmd,
+ unsigned int buf_len, int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (fw->state == FW_STATE_UPDATED) {
+ /* update already done, need cold boot */
+ nd_cmd->status = 0x20007;
+ return 0;
+ }
+
+ dev_dbg(dev, "%s: context: %#x ctrl_flags: %#x\n",
+ __func__, nd_cmd->context, nd_cmd->ctrl_flags);
+
+ switch (nd_cmd->ctrl_flags) {
+ case 0: /* finish */
+ if (nd_cmd->context != fw->context) {
+ dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+ __func__, nd_cmd->context,
+ fw->context);
+ nd_cmd->status = 0x10007;
+ return 0;
+ }
+ nd_cmd->status = 0;
+ fw->state = FW_STATE_VERIFY;
+ /* set 1 second of time for firmware "update" */
+ fw->end_time = jiffies + HZ;
+ break;
+
+ case 1: /* abort */
+ fw->size_received = 0;
+ /* successfully aborted status */
+ nd_cmd->status = 0x40007;
+ fw->state = FW_STATE_NEW;
+ dev_dbg(dev, "%s: abort successful\n", __func__);
+ break;
+
+ default: /* bad control flag */
+ dev_warn(dev, "%s: unknown control flag: %#x\n",
+ __func__, nd_cmd->ctrl_flags);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nd_intel_test_finish_query(struct nfit_test *t,
+ struct nd_intel_fw_finish_query *nd_cmd,
+ unsigned int buf_len, int idx)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_fw *fw = &t->fw[idx];
+
+ dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
+ __func__, t, nd_cmd, buf_len, idx);
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ if (nd_cmd->context != fw->context) {
+ dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
+ __func__, nd_cmd->context, fw->context);
+ nd_cmd->status = 0x10007;
+ return 0;
+ }
+
+ dev_dbg(dev, "%s context: %#x\n", __func__, nd_cmd->context);
+
+ switch (fw->state) {
+ case FW_STATE_NEW:
+ nd_cmd->updated_fw_rev = 0;
+ nd_cmd->status = 0;
+ dev_dbg(dev, "%s: new state\n", __func__);
+ break;
+
+ case FW_STATE_IN_PROGRESS:
+ /* sequencing error */
+ nd_cmd->status = 0x40007;
+ nd_cmd->updated_fw_rev = 0;
+ dev_dbg(dev, "%s: sequence error\n", __func__);
+ break;
+
+ case FW_STATE_VERIFY:
+ if (time_is_after_jiffies64(fw->end_time)) {
+ nd_cmd->updated_fw_rev = 0;
+ nd_cmd->status = 0x20007;
+ dev_dbg(dev, "%s: still verifying\n", __func__);
+ break;
+ }
+
+ dev_dbg(dev, "%s: transition out verify\n", __func__);
+ fw->state = FW_STATE_UPDATED;
+ /* we are going to fall through if it's "done" */
+ case FW_STATE_UPDATED:
+ nd_cmd->status = 0;
+ /* bogus test version */
+ fw->version = nd_cmd->updated_fw_rev =
+ INTEL_FW_FAKE_VERSION;
+ dev_dbg(dev, "%s: updated\n", __func__);
+ break;
+
+ default: /* we should never get here */
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd,
+ unsigned int buf_len)
+{
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ nd_cmd->status = 0;
+ nd_cmd->config_size = LABEL_SIZE;
+ nd_cmd->max_xfer = SZ_4K;
+
+ return 0;
+}
+
+static int nfit_test_cmd_get_config_data(struct nd_cmd_get_config_data_hdr
+ *nd_cmd, unsigned int buf_len, void *label)
+{
+ unsigned int len, offset = nd_cmd->in_offset;
+ int rc;
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+ if (offset >= LABEL_SIZE)
+ return -EINVAL;
+ if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len)
+ return -EINVAL;
+
+ nd_cmd->status = 0;
+ len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+ memcpy(nd_cmd->out_buf, label + offset, len);
+ rc = buf_len - sizeof(*nd_cmd) - len;
+
+ return rc;
+}
+
+static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd,
+ unsigned int buf_len, void *label)
+{
+ unsigned int len, offset = nd_cmd->in_offset;
+ u32 *status;
+ int rc;
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+ if (offset >= LABEL_SIZE)
+ return -EINVAL;
+ if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len)
+ return -EINVAL;
+
+ status = (void *)nd_cmd + nd_cmd->in_length + sizeof(*nd_cmd);
+ *status = 0;
+ len = min(nd_cmd->in_length, LABEL_SIZE - offset);
+ memcpy(label + offset, nd_cmd->in_buf, len);
+ rc = buf_len - sizeof(*nd_cmd) - (len + 4);
+
+ return rc;
+}
+
+#define NFIT_TEST_CLEAR_ERR_UNIT 256
+
+static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
+ unsigned int buf_len)
+{
+ int ars_recs;
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ /* for testing, only store up to n records that fit within 4k */
+ ars_recs = SZ_4K / sizeof(struct nd_ars_record);
+
+ nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status)
+ + ars_recs * sizeof(struct nd_ars_record);
+ nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
+ nd_cmd->clear_err_unit = NFIT_TEST_CLEAR_ERR_UNIT;
+
+ return 0;
+}
+
+static void post_ars_status(struct ars_state *ars_state,
+ struct badrange *badrange, u64 addr, u64 len)
+{
+ struct nd_cmd_ars_status *ars_status;
+ struct nd_ars_record *ars_record;
+ struct badrange_entry *be;
+ u64 end = addr + len - 1;
+ int i = 0;
+
+ ars_state->deadline = jiffies + 1*HZ;
+ ars_status = ars_state->ars_status;
+ ars_status->status = 0;
+ ars_status->address = addr;
+ ars_status->length = len;
+ ars_status->type = ND_ARS_PERSISTENT;
+
+ spin_lock(&badrange->lock);
+ list_for_each_entry(be, &badrange->list, list) {
+ u64 be_end = be->start + be->length - 1;
+ u64 rstart, rend;
+
+ /* skip entries outside the range */
+ if (be_end < addr || be->start > end)
+ continue;
+
+ rstart = (be->start < addr) ? addr : be->start;
+ rend = (be_end < end) ? be_end : end;
+ ars_record = &ars_status->records[i];
+ ars_record->handle = 0;
+ ars_record->err_address = rstart;
+ ars_record->length = rend - rstart + 1;
+ i++;
+ }
+ spin_unlock(&badrange->lock);
+ ars_status->num_records = i;
+ ars_status->out_length = sizeof(struct nd_cmd_ars_status)
+ + i * sizeof(struct nd_ars_record);
+}
+
+static int nfit_test_cmd_ars_start(struct nfit_test *t,
+ struct ars_state *ars_state,
+ struct nd_cmd_ars_start *ars_start, unsigned int buf_len,
+ int *cmd_rc)
+{
+ if (buf_len < sizeof(*ars_start))
+ return -EINVAL;
+
+ spin_lock(&ars_state->lock);
+ if (time_before(jiffies, ars_state->deadline)) {
+ ars_start->status = NFIT_ARS_START_BUSY;
+ *cmd_rc = -EBUSY;
+ } else {
+ ars_start->status = 0;
+ ars_start->scrub_time = 1;
+ post_ars_status(ars_state, &t->badrange, ars_start->address,
+ ars_start->length);
+ *cmd_rc = 0;
+ }
+ spin_unlock(&ars_state->lock);
+
+ return 0;
+}
+
+static int nfit_test_cmd_ars_status(struct ars_state *ars_state,
+ struct nd_cmd_ars_status *ars_status, unsigned int buf_len,
+ int *cmd_rc)
+{
+ if (buf_len < ars_state->ars_status->out_length)
+ return -EINVAL;
+
+ spin_lock(&ars_state->lock);
+ if (time_before(jiffies, ars_state->deadline)) {
+ memset(ars_status, 0, buf_len);
+ ars_status->status = NFIT_ARS_STATUS_BUSY;
+ ars_status->out_length = sizeof(*ars_status);
+ *cmd_rc = -EBUSY;
+ } else {
+ memcpy(ars_status, ars_state->ars_status,
+ ars_state->ars_status->out_length);
+ *cmd_rc = 0;
+ }
+ spin_unlock(&ars_state->lock);
+ return 0;
+}
+
+static int nfit_test_cmd_clear_error(struct nfit_test *t,
+ struct nd_cmd_clear_error *clear_err,
+ unsigned int buf_len, int *cmd_rc)
+{
+ const u64 mask = NFIT_TEST_CLEAR_ERR_UNIT - 1;
+ if (buf_len < sizeof(*clear_err))
+ return -EINVAL;
+
+ if ((clear_err->address & mask) || (clear_err->length & mask))
+ return -EINVAL;
+
+ badrange_forget(&t->badrange, clear_err->address, clear_err->length);
+ clear_err->status = 0;
+ clear_err->cleared = clear_err->length;
+ *cmd_rc = 0;
+ return 0;
+}
+
+struct region_search_spa {
+ u64 addr;
+ struct nd_region *region;
+};
+
+static int is_region_device(struct device *dev)
+{
+ return !strncmp(dev->kobj.name, "region", 6);
+}
+
+static int nfit_test_search_region_spa(struct device *dev, void *data)
+{
+ struct region_search_spa *ctx = data;
+ struct nd_region *nd_region;
+ resource_size_t ndr_end;
+
+ if (!is_region_device(dev))
+ return 0;
+
+ nd_region = to_nd_region(dev);
+ ndr_end = nd_region->ndr_start + nd_region->ndr_size;
+
+ if (ctx->addr >= nd_region->ndr_start && ctx->addr < ndr_end) {
+ ctx->region = nd_region;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int nfit_test_search_spa(struct nvdimm_bus *bus,
+ struct nd_cmd_translate_spa *spa)
+{
+ int ret;
+ struct nd_region *nd_region = NULL;
+ struct nvdimm *nvdimm = NULL;
+ struct nd_mapping *nd_mapping = NULL;
+ struct region_search_spa ctx = {
+ .addr = spa->spa,
+ .region = NULL,
+ };
+ u64 dpa;
+
+ ret = device_for_each_child(&bus->dev, &ctx,
+ nfit_test_search_region_spa);
+
+ if (!ret)
+ return -ENODEV;
+
+ nd_region = ctx.region;
+
+ dpa = ctx.addr - nd_region->ndr_start;
+
+ /*
+ * last dimm is selected for test
+ */
+ nd_mapping = &nd_region->mapping[nd_region->ndr_mappings - 1];
+ nvdimm = nd_mapping->nvdimm;
+
+ spa->devices[0].nfit_device_handle = handle[nvdimm->id];
+ spa->num_nvdimms = 1;
+ spa->devices[0].dpa = dpa;
+
+ return 0;
+}
+
+static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus,
+ struct nd_cmd_translate_spa *spa, unsigned int buf_len)
+{
+ if (buf_len < spa->translate_length)
+ return -EINVAL;
+
+ if (nfit_test_search_spa(bus, spa) < 0 || !spa->num_nvdimms)
+ spa->status = 2;
+
+ return 0;
+}
+
+static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len,
+ struct nd_intel_smart *smart_data)
+{
+ if (buf_len < sizeof(*smart))
+ return -EINVAL;
+ memcpy(smart, smart_data, sizeof(*smart));
+ return 0;
+}
+
+static int nfit_test_cmd_smart_threshold(
+ struct nd_intel_smart_threshold *out,
+ unsigned int buf_len,
+ struct nd_intel_smart_threshold *smart_t)
+{
+ if (buf_len < sizeof(*smart_t))
+ return -EINVAL;
+ memcpy(out, smart_t, sizeof(*smart_t));
+ return 0;
+}
+
+static void smart_notify(struct device *bus_dev,
+ struct device *dimm_dev, struct nd_intel_smart *smart,
+ struct nd_intel_smart_threshold *thresh)
+{
+ dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n",
+ __func__, thresh->alarm_control, thresh->spares,
+ smart->spares, thresh->media_temperature,
+ smart->media_temperature, thresh->ctrl_temperature,
+ smart->ctrl_temperature);
+ if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP)
+ && smart->spares
+ <= thresh->spares)
+ || ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP)
+ && smart->media_temperature
+ >= thresh->media_temperature)
+ || ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
+ && smart->ctrl_temperature
+ >= thresh->ctrl_temperature)
+ || (smart->health != ND_INTEL_SMART_NON_CRITICAL_HEALTH)
+ || (smart->shutdown_state != 0)) {
+ device_lock(bus_dev);
+ __acpi_nvdimm_notify(dimm_dev, 0x81);
+ device_unlock(bus_dev);
+ }
+}
+
+static int nfit_test_cmd_smart_set_threshold(
+ struct nd_intel_smart_set_threshold *in,
+ unsigned int buf_len,
+ struct nd_intel_smart_threshold *thresh,
+ struct nd_intel_smart *smart,
+ struct device *bus_dev, struct device *dimm_dev)
+{
+ unsigned int size;
+
+ size = sizeof(*in) - 4;
+ if (buf_len < size)
+ return -EINVAL;
+ memcpy(thresh->data, in, size);
+ in->status = 0;
+ smart_notify(bus_dev, dimm_dev, smart, thresh);
+
+ return 0;
+}
+
+static int nfit_test_cmd_smart_inject(
+ struct nd_intel_smart_inject *inj,
+ unsigned int buf_len,
+ struct nd_intel_smart_threshold *thresh,
+ struct nd_intel_smart *smart,
+ struct device *bus_dev, struct device *dimm_dev)
+{
+ if (buf_len != sizeof(*inj))
+ return -EINVAL;
+
+ if (inj->flags & ND_INTEL_SMART_INJECT_MTEMP) {
+ if (inj->mtemp_enable)
+ smart->media_temperature = inj->media_temperature;
+ else
+ smart->media_temperature = smart_def.media_temperature;
+ }
+ if (inj->flags & ND_INTEL_SMART_INJECT_SPARE) {
+ if (inj->spare_enable)
+ smart->spares = inj->spares;
+ else
+ smart->spares = smart_def.spares;
+ }
+ if (inj->flags & ND_INTEL_SMART_INJECT_FATAL) {
+ if (inj->fatal_enable)
+ smart->health = ND_INTEL_SMART_FATAL_HEALTH;
+ else
+ smart->health = ND_INTEL_SMART_NON_CRITICAL_HEALTH;
+ }
+ if (inj->flags & ND_INTEL_SMART_INJECT_SHUTDOWN) {
+ if (inj->unsafe_shutdown_enable) {
+ smart->shutdown_state = 1;
+ smart->shutdown_count++;
+ } else
+ smart->shutdown_state = 0;
+ }
+ inj->status = 0;
+ smart_notify(bus_dev, dimm_dev, smart, thresh);
+
+ return 0;
+}
+
+static void uc_error_notify(struct work_struct *work)
+{
+ struct nfit_test *t = container_of(work, typeof(*t), work);
+
+ __acpi_nfit_notify(&t->pdev.dev, t, NFIT_NOTIFY_UC_MEMORY_ERROR);
+}
+
+static int nfit_test_cmd_ars_error_inject(struct nfit_test *t,
+ struct nd_cmd_ars_err_inj *err_inj, unsigned int buf_len)
+{
+ int rc;
+
+ if (buf_len != sizeof(*err_inj)) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ if (err_inj->err_inj_spa_range_length <= 0) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ rc = badrange_add(&t->badrange, err_inj->err_inj_spa_range_base,
+ err_inj->err_inj_spa_range_length);
+ if (rc < 0)
+ goto err;
+
+ if (err_inj->err_inj_options & (1 << ND_ARS_ERR_INJ_OPT_NOTIFY))
+ queue_work(nfit_wq, &t->work);
+
+ err_inj->status = 0;
+ return 0;
+
+err:
+ err_inj->status = NFIT_ARS_INJECT_INVALID;
+ return rc;
+}
+
+static int nfit_test_cmd_ars_inject_clear(struct nfit_test *t,
+ struct nd_cmd_ars_err_inj_clr *err_clr, unsigned int buf_len)
+{
+ int rc;
+
+ if (buf_len != sizeof(*err_clr)) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ if (err_clr->err_inj_clr_spa_range_length <= 0) {
+ rc = -EINVAL;
+ goto err;
+ }
+
+ badrange_forget(&t->badrange, err_clr->err_inj_clr_spa_range_base,
+ err_clr->err_inj_clr_spa_range_length);
+
+ err_clr->status = 0;
+ return 0;
+
+err:
+ err_clr->status = NFIT_ARS_INJECT_INVALID;
+ return rc;
+}
+
+static int nfit_test_cmd_ars_inject_status(struct nfit_test *t,
+ struct nd_cmd_ars_err_inj_stat *err_stat,
+ unsigned int buf_len)
+{
+ struct badrange_entry *be;
+ int max = SZ_4K / sizeof(struct nd_error_stat_query_record);
+ int i = 0;
+
+ err_stat->status = 0;
+ spin_lock(&t->badrange.lock);
+ list_for_each_entry(be, &t->badrange.list, list) {
+ err_stat->record[i].err_inj_stat_spa_range_base = be->start;
+ err_stat->record[i].err_inj_stat_spa_range_length = be->length;
+ i++;
+ if (i > max)
+ break;
+ }
+ spin_unlock(&t->badrange.lock);
+ err_stat->inj_err_rec_count = i;
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t,
+ struct nd_intel_lss *nd_cmd, unsigned int buf_len)
+{
+ struct device *dev = &t->pdev.dev;
+
+ if (buf_len < sizeof(*nd_cmd))
+ return -EINVAL;
+
+ switch (nd_cmd->enable) {
+ case 0:
+ nd_cmd->status = 0;
+ dev_dbg(dev, "%s: Latch System Shutdown Status disabled\n",
+ __func__);
+ break;
+ case 1:
+ nd_cmd->status = 0;
+ dev_dbg(dev, "%s: Latch System Shutdown Status enabled\n",
+ __func__);
+ break;
+ default:
+ dev_warn(dev, "Unknown enable value: %#x\n", nd_cmd->enable);
+ nd_cmd->status = 0x3;
+ break;
+ }
+
+
+ return 0;
+}
+
+static int override_return_code(int dimm, unsigned int func, int rc)
+{
+ if ((1 << func) & dimm_fail_cmd_flags[dimm]) {
+ if (dimm_fail_cmd_code[dimm])
+ return dimm_fail_cmd_code[dimm];
+ return -EIO;
+ }
+ return rc;
+}
+
+static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
+{
+ int i;
+
+ /* lookup per-dimm data */
+ for (i = 0; i < ARRAY_SIZE(handle); i++)
+ if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i])
+ break;
+ if (i >= ARRAY_SIZE(handle))
+ return -ENXIO;
+ return i;
+}
+
+static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
+ struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+ unsigned int buf_len, int *cmd_rc)
+{
+ struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+ struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
+ unsigned int func = cmd;
+ int i, rc = 0, __cmd_rc;
+
+ if (!cmd_rc)
+ cmd_rc = &__cmd_rc;
+ *cmd_rc = 0;
+
+ if (nvdimm) {
+ struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+ unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
+
+ if (!nfit_mem)
+ return -ENOTTY;
+
+ if (cmd == ND_CMD_CALL) {
+ struct nd_cmd_pkg *call_pkg = buf;
+
+ buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+ buf = (void *) call_pkg->nd_payload;
+ func = call_pkg->nd_command;
+ if (call_pkg->nd_family != nfit_mem->family)
+ return -ENOTTY;
+
+ i = get_dimm(nfit_mem, func);
+ if (i < 0)
+ return i;
+
+ switch (func) {
+ case ND_INTEL_ENABLE_LSS_STATUS:
+ rc = nd_intel_test_cmd_set_lss_status(t,
+ buf, buf_len);
+ break;
+ case ND_INTEL_FW_GET_INFO:
+ rc = nd_intel_test_get_fw_info(t, buf,
+ buf_len, i - t->dcr_idx);
+ break;
+ case ND_INTEL_FW_START_UPDATE:
+ rc = nd_intel_test_start_update(t, buf,
+ buf_len, i - t->dcr_idx);
+ break;
+ case ND_INTEL_FW_SEND_DATA:
+ rc = nd_intel_test_send_data(t, buf,
+ buf_len, i - t->dcr_idx);
+ break;
+ case ND_INTEL_FW_FINISH_UPDATE:
+ rc = nd_intel_test_finish_fw(t, buf,
+ buf_len, i - t->dcr_idx);
+ break;
+ case ND_INTEL_FW_FINISH_QUERY:
+ rc = nd_intel_test_finish_query(t, buf,
+ buf_len, i - t->dcr_idx);
+ break;
+ case ND_INTEL_SMART:
+ rc = nfit_test_cmd_smart(buf, buf_len,
+ &t->smart[i - t->dcr_idx]);
+ break;
+ case ND_INTEL_SMART_THRESHOLD:
+ rc = nfit_test_cmd_smart_threshold(buf,
+ buf_len,
+ &t->smart_threshold[i -
+ t->dcr_idx]);
+ break;
+ case ND_INTEL_SMART_SET_THRESHOLD:
+ rc = nfit_test_cmd_smart_set_threshold(buf,
+ buf_len,
+ &t->smart_threshold[i -
+ t->dcr_idx],
+ &t->smart[i - t->dcr_idx],
+ &t->pdev.dev, t->dimm_dev[i]);
+ break;
+ case ND_INTEL_SMART_INJECT:
+ rc = nfit_test_cmd_smart_inject(buf,
+ buf_len,
+ &t->smart_threshold[i -
+ t->dcr_idx],
+ &t->smart[i - t->dcr_idx],
+ &t->pdev.dev, t->dimm_dev[i]);
+ break;
+ default:
+ return -ENOTTY;
+ }
+ return override_return_code(i, func, rc);
+ }
+
+ if (!test_bit(cmd, &cmd_mask)
+ || !test_bit(func, &nfit_mem->dsm_mask))
+ return -ENOTTY;
+
+ i = get_dimm(nfit_mem, func);
+ if (i < 0)
+ return i;
+
+ switch (func) {
+ case ND_CMD_GET_CONFIG_SIZE:
+ rc = nfit_test_cmd_get_config_size(buf, buf_len);
+ break;
+ case ND_CMD_GET_CONFIG_DATA:
+ rc = nfit_test_cmd_get_config_data(buf, buf_len,
+ t->label[i - t->dcr_idx]);
+ break;
+ case ND_CMD_SET_CONFIG_DATA:
+ rc = nfit_test_cmd_set_config_data(buf, buf_len,
+ t->label[i - t->dcr_idx]);
+ break;
+ default:
+ return -ENOTTY;
+ }
+ return override_return_code(i, func, rc);
+ } else {
+ struct ars_state *ars_state = &t->ars_state;
+ struct nd_cmd_pkg *call_pkg = buf;
+
+ if (!nd_desc)
+ return -ENOTTY;
+
+ if (cmd == ND_CMD_CALL) {
+ func = call_pkg->nd_command;
+
+ buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+ buf = (void *) call_pkg->nd_payload;
+
+ switch (func) {
+ case NFIT_CMD_TRANSLATE_SPA:
+ rc = nfit_test_cmd_translate_spa(
+ acpi_desc->nvdimm_bus, buf, buf_len);
+ return rc;
+ case NFIT_CMD_ARS_INJECT_SET:
+ rc = nfit_test_cmd_ars_error_inject(t, buf,
+ buf_len);
+ return rc;
+ case NFIT_CMD_ARS_INJECT_CLEAR:
+ rc = nfit_test_cmd_ars_inject_clear(t, buf,
+ buf_len);
+ return rc;
+ case NFIT_CMD_ARS_INJECT_GET:
+ rc = nfit_test_cmd_ars_inject_status(t, buf,
+ buf_len);
+ return rc;
+ default:
+ return -ENOTTY;
+ }
+ }
+
+ if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask))
+ return -ENOTTY;
+
+ switch (func) {
+ case ND_CMD_ARS_CAP:
+ rc = nfit_test_cmd_ars_cap(buf, buf_len);
+ break;
+ case ND_CMD_ARS_START:
+ rc = nfit_test_cmd_ars_start(t, ars_state, buf,
+ buf_len, cmd_rc);
+ break;
+ case ND_CMD_ARS_STATUS:
+ rc = nfit_test_cmd_ars_status(ars_state, buf, buf_len,
+ cmd_rc);
+ break;
+ case ND_CMD_CLEAR_ERROR:
+ rc = nfit_test_cmd_clear_error(t, buf, buf_len, cmd_rc);
+ break;
+ default:
+ return -ENOTTY;
+ }
+ }
+
+ return rc;
+}
+
+static DEFINE_SPINLOCK(nfit_test_lock);
+static struct nfit_test *instances[NUM_NFITS];
+
+static void release_nfit_res(void *data)
+{
+ struct nfit_test_resource *nfit_res = data;
+
+ spin_lock(&nfit_test_lock);
+ list_del(&nfit_res->list);
+ spin_unlock(&nfit_test_lock);
+
+ if (resource_size(&nfit_res->res) >= DIMM_SIZE)
+ gen_pool_free(nfit_pool, nfit_res->res.start,
+ resource_size(&nfit_res->res));
+ vfree(nfit_res->buf);
+ kfree(nfit_res);
+}
+
+static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
+ void *buf)
+{
+ struct device *dev = &t->pdev.dev;
+ struct nfit_test_resource *nfit_res = kzalloc(sizeof(*nfit_res),
+ GFP_KERNEL);
+ int rc;
+
+ if (!buf || !nfit_res || !*dma)
+ goto err;
+ rc = devm_add_action(dev, release_nfit_res, nfit_res);
+ if (rc)
+ goto err;
+ INIT_LIST_HEAD(&nfit_res->list);
+ memset(buf, 0, size);
+ nfit_res->dev = dev;
+ nfit_res->buf = buf;
+ nfit_res->res.start = *dma;
+ nfit_res->res.end = *dma + size - 1;
+ nfit_res->res.name = "NFIT";
+ spin_lock_init(&nfit_res->lock);
+ INIT_LIST_HEAD(&nfit_res->requests);
+ spin_lock(&nfit_test_lock);
+ list_add(&nfit_res->list, &t->resources);
+ spin_unlock(&nfit_test_lock);
+
+ return nfit_res->buf;
+ err:
+ if (*dma && size >= DIMM_SIZE)
+ gen_pool_free(nfit_pool, *dma, size);
+ if (buf)
+ vfree(buf);
+ kfree(nfit_res);
+ return NULL;
+}
+
+static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma)
+{
+ struct genpool_data_align data = {
+ .align = SZ_128M,
+ };
+ void *buf = vmalloc(size);
+
+ if (size >= DIMM_SIZE)
+ *dma = gen_pool_alloc_algo(nfit_pool, size,
+ gen_pool_first_fit_align, &data);
+ else
+ *dma = (unsigned long) buf;
+ return __test_alloc(t, size, dma, buf);
+}
+
+static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(instances); i++) {
+ struct nfit_test_resource *n, *nfit_res = NULL;
+ struct nfit_test *t = instances[i];
+
+ if (!t)
+ continue;
+ spin_lock(&nfit_test_lock);
+ list_for_each_entry(n, &t->resources, list) {
+ if (addr >= n->res.start && (addr < n->res.start
+ + resource_size(&n->res))) {
+ nfit_res = n;
+ break;
+ } else if (addr >= (unsigned long) n->buf
+ && (addr < (unsigned long) n->buf
+ + resource_size(&n->res))) {
+ nfit_res = n;
+ break;
+ }
+ }
+ spin_unlock(&nfit_test_lock);
+ if (nfit_res)
+ return nfit_res;
+ }
+
+ return NULL;
+}
+
+static int ars_state_init(struct device *dev, struct ars_state *ars_state)
+{
+ /* for testing, only store up to n records that fit within 4k */
+ ars_state->ars_status = devm_kzalloc(dev,
+ sizeof(struct nd_cmd_ars_status) + SZ_4K, GFP_KERNEL);
+ if (!ars_state->ars_status)
+ return -ENOMEM;
+ spin_lock_init(&ars_state->lock);
+ return 0;
+}
+
+static void put_dimms(void *data)
+{
+ struct nfit_test *t = data;
+ int i;
+
+ for (i = 0; i < t->num_dcr; i++)
+ if (t->dimm_dev[i])
+ device_unregister(t->dimm_dev[i]);
+}
+
+static struct class *nfit_test_dimm;
+
+static int dimm_name_to_id(struct device *dev)
+{
+ int dimm;
+
+ if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1)
+ return -ENXIO;
+ return dimm;
+}
+
+static ssize_t handle_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int dimm = dimm_name_to_id(dev);
+
+ if (dimm < 0)
+ return dimm;
+
+ return sprintf(buf, "%#x\n", handle[dimm]);
+}
+DEVICE_ATTR_RO(handle);
+
+static ssize_t fail_cmd_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int dimm = dimm_name_to_id(dev);
+
+ if (dimm < 0)
+ return dimm;
+
+ return sprintf(buf, "%#lx\n", dimm_fail_cmd_flags[dimm]);
+}
+
+static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ int dimm = dimm_name_to_id(dev);
+ unsigned long val;
+ ssize_t rc;
+
+ if (dimm < 0)
+ return dimm;
+
+ rc = kstrtol(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ dimm_fail_cmd_flags[dimm] = val;
+ return size;
+}
+static DEVICE_ATTR_RW(fail_cmd);
+
+static ssize_t fail_cmd_code_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int dimm = dimm_name_to_id(dev);
+
+ if (dimm < 0)
+ return dimm;
+
+ return sprintf(buf, "%d\n", dimm_fail_cmd_code[dimm]);
+}
+
+static ssize_t fail_cmd_code_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ int dimm = dimm_name_to_id(dev);
+ unsigned long val;
+ ssize_t rc;
+
+ if (dimm < 0)
+ return dimm;
+
+ rc = kstrtol(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ dimm_fail_cmd_code[dimm] = val;
+ return size;
+}
+static DEVICE_ATTR_RW(fail_cmd_code);
+
+static struct attribute *nfit_test_dimm_attributes[] = {
+ &dev_attr_fail_cmd.attr,
+ &dev_attr_fail_cmd_code.attr,
+ &dev_attr_handle.attr,
+ NULL,
+};
+
+static struct attribute_group nfit_test_dimm_attribute_group = {
+ .attrs = nfit_test_dimm_attributes,
+};
+
+static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
+ &nfit_test_dimm_attribute_group,
+ NULL,
+};
+
+static int nfit_test_dimm_init(struct nfit_test *t)
+{
+ int i;
+
+ if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t))
+ return -ENOMEM;
+ for (i = 0; i < t->num_dcr; i++) {
+ t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm,
+ &t->pdev.dev, 0, NULL,
+ nfit_test_dimm_attribute_groups,
+ "test_dimm%d", i + t->dcr_idx);
+ if (!t->dimm_dev[i])
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static void smart_init(struct nfit_test *t)
+{
+ int i;
+ const struct nd_intel_smart_threshold smart_t_data = {
+ .alarm_control = ND_INTEL_SMART_SPARE_TRIP
+ | ND_INTEL_SMART_TEMP_TRIP,
+ .media_temperature = 40 * 16,
+ .ctrl_temperature = 30 * 16,
+ .spares = 5,
+ };
+
+ for (i = 0; i < t->num_dcr; i++) {
+ memcpy(&t->smart[i], &smart_def, sizeof(smart_def));
+ memcpy(&t->smart_threshold[i], &smart_t_data,
+ sizeof(smart_t_data));
+ }
+}
+
+static int nfit_test0_alloc(struct nfit_test *t)
+{
+ size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
+ + sizeof(struct acpi_nfit_memory_map) * NUM_MEM
+ + sizeof(struct acpi_nfit_control_region) * NUM_DCR
+ + offsetof(struct acpi_nfit_control_region,
+ window_size) * NUM_DCR
+ + sizeof(struct acpi_nfit_data_region) * NUM_BDW
+ + (sizeof(struct acpi_nfit_flush_address)
+ + sizeof(u64) * NUM_HINTS) * NUM_DCR
+ + sizeof(struct acpi_nfit_capabilities);
+ int i;
+
+ t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
+ if (!t->nfit_buf)
+ return -ENOMEM;
+ t->nfit_size = nfit_size;
+
+ t->spa_set[0] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[0]);
+ if (!t->spa_set[0])
+ return -ENOMEM;
+
+ t->spa_set[1] = test_alloc(t, SPA1_SIZE, &t->spa_set_dma[1]);
+ if (!t->spa_set[1])
+ return -ENOMEM;
+
+ t->spa_set[2] = test_alloc(t, SPA0_SIZE, &t->spa_set_dma[2]);
+ if (!t->spa_set[2])
+ return -ENOMEM;
+
+ for (i = 0; i < t->num_dcr; i++) {
+ t->dimm[i] = test_alloc(t, DIMM_SIZE, &t->dimm_dma[i]);
+ if (!t->dimm[i])
+ return -ENOMEM;
+
+ t->label[i] = test_alloc(t, LABEL_SIZE, &t->label_dma[i]);
+ if (!t->label[i])
+ return -ENOMEM;
+ sprintf(t->label[i], "label%d", i);
+
+ t->flush[i] = test_alloc(t, max(PAGE_SIZE,
+ sizeof(u64) * NUM_HINTS),
+ &t->flush_dma[i]);
+ if (!t->flush[i])
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < t->num_dcr; i++) {
+ t->dcr[i] = test_alloc(t, LABEL_SIZE, &t->dcr_dma[i]);
+ if (!t->dcr[i])
+ return -ENOMEM;
+ }
+
+ t->_fit = test_alloc(t, sizeof(union acpi_object **), &t->_fit_dma);
+ if (!t->_fit)
+ return -ENOMEM;
+
+ if (nfit_test_dimm_init(t))
+ return -ENOMEM;
+ smart_init(t);
+ return ars_state_init(&t->pdev.dev, &t->ars_state);
+}
+
+static int nfit_test1_alloc(struct nfit_test *t)
+{
+ size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
+ + sizeof(struct acpi_nfit_memory_map) * 2
+ + offsetof(struct acpi_nfit_control_region, window_size) * 2;
+ int i;
+
+ t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
+ if (!t->nfit_buf)
+ return -ENOMEM;
+ t->nfit_size = nfit_size;
+
+ t->spa_set[0] = test_alloc(t, SPA2_SIZE, &t->spa_set_dma[0]);
+ if (!t->spa_set[0])
+ return -ENOMEM;
+
+ for (i = 0; i < t->num_dcr; i++) {
+ t->label[i] = test_alloc(t, LABEL_SIZE, &t->label_dma[i]);
+ if (!t->label[i])
+ return -ENOMEM;
+ sprintf(t->label[i], "label%d", i);
+ }
+
+ t->spa_set[1] = test_alloc(t, SPA_VCD_SIZE, &t->spa_set_dma[1]);
+ if (!t->spa_set[1])
+ return -ENOMEM;
+
+ if (nfit_test_dimm_init(t))
+ return -ENOMEM;
+ smart_init(t);
+ return ars_state_init(&t->pdev.dev, &t->ars_state);
+}
+
+static void dcr_common_init(struct acpi_nfit_control_region *dcr)
+{
+ dcr->vendor_id = 0xabcd;
+ dcr->device_id = 0;
+ dcr->revision_id = 1;
+ dcr->valid_fields = 1;
+ dcr->manufacturing_location = 0xa;
+ dcr->manufacturing_date = cpu_to_be16(2016);
+}
+
+static void nfit_test0_setup(struct nfit_test *t)
+{
+ const int flush_hint_size = sizeof(struct acpi_nfit_flush_address)
+ + (sizeof(u64) * NUM_HINTS);
+ struct acpi_nfit_desc *acpi_desc;
+ struct acpi_nfit_memory_map *memdev;
+ void *nfit_buf = t->nfit_buf;
+ struct acpi_nfit_system_address *spa;
+ struct acpi_nfit_control_region *dcr;
+ struct acpi_nfit_data_region *bdw;
+ struct acpi_nfit_flush_address *flush;
+ struct acpi_nfit_capabilities *pcap;
+ unsigned int offset = 0, i;
+
+ /*
+ * spa0 (interleave first half of dimm0 and dimm1, note storage
+ * does not actually alias the related block-data-window
+ * regions)
+ */
+ spa = nfit_buf;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+ spa->range_index = 0+1;
+ spa->address = t->spa_set_dma[0];
+ spa->length = SPA0_SIZE;
+ offset += spa->header.length;
+
+ /*
+ * spa1 (interleave last half of the 4 DIMMS, note storage
+ * does not actually alias the related block-data-window
+ * regions)
+ */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+ spa->range_index = 1+1;
+ spa->address = t->spa_set_dma[1];
+ spa->length = SPA1_SIZE;
+ offset += spa->header.length;
+
+ /* spa2 (dcr0) dimm0 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 2+1;
+ spa->address = t->dcr_dma[0];
+ spa->length = DCR_SIZE;
+ offset += spa->header.length;
+
+ /* spa3 (dcr1) dimm1 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 3+1;
+ spa->address = t->dcr_dma[1];
+ spa->length = DCR_SIZE;
+ offset += spa->header.length;
+
+ /* spa4 (dcr2) dimm2 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 4+1;
+ spa->address = t->dcr_dma[2];
+ spa->length = DCR_SIZE;
+ offset += spa->header.length;
+
+ /* spa5 (dcr3) dimm3 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 5+1;
+ spa->address = t->dcr_dma[3];
+ spa->length = DCR_SIZE;
+ offset += spa->header.length;
+
+ /* spa6 (bdw for dcr0) dimm0 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 6+1;
+ spa->address = t->dimm_dma[0];
+ spa->length = DIMM_SIZE;
+ offset += spa->header.length;
+
+ /* spa7 (bdw for dcr1) dimm1 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 7+1;
+ spa->address = t->dimm_dma[1];
+ spa->length = DIMM_SIZE;
+ offset += spa->header.length;
+
+ /* spa8 (bdw for dcr2) dimm2 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 8+1;
+ spa->address = t->dimm_dma[2];
+ spa->length = DIMM_SIZE;
+ offset += spa->header.length;
+
+ /* spa9 (bdw for dcr3) dimm3 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 9+1;
+ spa->address = t->dimm_dma[3];
+ spa->length = DIMM_SIZE;
+ offset += spa->header.length;
+
+ /* mem-region0 (spa0, dimm0) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[0];
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 0+1;
+ memdev->region_index = 4+1;
+ memdev->region_size = SPA0_SIZE/2;
+ memdev->region_offset = 1;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 2;
+ offset += memdev->header.length;
+
+ /* mem-region1 (spa0, dimm1) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[1];
+ memdev->physical_id = 1;
+ memdev->region_id = 0;
+ memdev->range_index = 0+1;
+ memdev->region_index = 5+1;
+ memdev->region_size = SPA0_SIZE/2;
+ memdev->region_offset = (1 << 8);
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 2;
+ memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
+
+ /* mem-region2 (spa1, dimm0) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[0];
+ memdev->physical_id = 0;
+ memdev->region_id = 1;
+ memdev->range_index = 1+1;
+ memdev->region_index = 4+1;
+ memdev->region_size = SPA1_SIZE/4;
+ memdev->region_offset = (1 << 16);
+ memdev->address = SPA0_SIZE/2;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 4;
+ memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
+
+ /* mem-region3 (spa1, dimm1) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[1];
+ memdev->physical_id = 1;
+ memdev->region_id = 1;
+ memdev->range_index = 1+1;
+ memdev->region_index = 5+1;
+ memdev->region_size = SPA1_SIZE/4;
+ memdev->region_offset = (1 << 24);
+ memdev->address = SPA0_SIZE/2;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 4;
+ offset += memdev->header.length;
+
+ /* mem-region4 (spa1, dimm2) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[2];
+ memdev->physical_id = 2;
+ memdev->region_id = 0;
+ memdev->range_index = 1+1;
+ memdev->region_index = 6+1;
+ memdev->region_size = SPA1_SIZE/4;
+ memdev->region_offset = (1ULL << 32);
+ memdev->address = SPA0_SIZE/2;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 4;
+ memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
+
+ /* mem-region5 (spa1, dimm3) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[3];
+ memdev->physical_id = 3;
+ memdev->region_id = 0;
+ memdev->range_index = 1+1;
+ memdev->region_index = 7+1;
+ memdev->region_size = SPA1_SIZE/4;
+ memdev->region_offset = (1ULL << 40);
+ memdev->address = SPA0_SIZE/2;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 4;
+ offset += memdev->header.length;
+
+ /* mem-region6 (spa/dcr0, dimm0) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[0];
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 2+1;
+ memdev->region_index = 0+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region7 (spa/dcr1, dimm1) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[1];
+ memdev->physical_id = 1;
+ memdev->region_id = 0;
+ memdev->range_index = 3+1;
+ memdev->region_index = 1+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region8 (spa/dcr2, dimm2) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[2];
+ memdev->physical_id = 2;
+ memdev->region_id = 0;
+ memdev->range_index = 4+1;
+ memdev->region_index = 2+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region9 (spa/dcr3, dimm3) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[3];
+ memdev->physical_id = 3;
+ memdev->region_id = 0;
+ memdev->range_index = 5+1;
+ memdev->region_index = 3+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region10 (spa/bdw0, dimm0) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[0];
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 6+1;
+ memdev->region_index = 0+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region11 (spa/bdw1, dimm1) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[1];
+ memdev->physical_id = 1;
+ memdev->region_id = 0;
+ memdev->range_index = 7+1;
+ memdev->region_index = 1+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region12 (spa/bdw2, dimm2) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[2];
+ memdev->physical_id = 2;
+ memdev->region_id = 0;
+ memdev->range_index = 8+1;
+ memdev->region_index = 2+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region13 (spa/dcr3, dimm3) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[3];
+ memdev->physical_id = 3;
+ memdev->region_id = 0;
+ memdev->range_index = 9+1;
+ memdev->region_index = 3+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
+
+ /* dcr-descriptor0: blk */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(*dcr);
+ dcr->region_index = 0+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[0];
+ dcr->code = NFIT_FIC_BLK;
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor1: blk */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(*dcr);
+ dcr->region_index = 1+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[1];
+ dcr->code = NFIT_FIC_BLK;
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor2: blk */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(*dcr);
+ dcr->region_index = 2+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[2];
+ dcr->code = NFIT_FIC_BLK;
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor3: blk */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(*dcr);
+ dcr->region_index = 3+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[3];
+ dcr->code = NFIT_FIC_BLK;
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor0: pmem */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 4+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[0];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor1: pmem */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 5+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[1];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor2: pmem */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 6+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[2];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor3: pmem */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 7+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[3];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ /* bdw0 (spa/dcr0, dimm0) */
+ bdw = nfit_buf + offset;
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(*bdw);
+ bdw->region_index = 0+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+ offset += bdw->header.length;
+
+ /* bdw1 (spa/dcr1, dimm1) */
+ bdw = nfit_buf + offset;
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(*bdw);
+ bdw->region_index = 1+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+ offset += bdw->header.length;
+
+ /* bdw2 (spa/dcr2, dimm2) */
+ bdw = nfit_buf + offset;
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(*bdw);
+ bdw->region_index = 2+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+ offset += bdw->header.length;
+
+ /* bdw3 (spa/dcr3, dimm3) */
+ bdw = nfit_buf + offset;
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(*bdw);
+ bdw->region_index = 3+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+ offset += bdw->header.length;
+
+ /* flush0 (dimm0) */
+ flush = nfit_buf + offset;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = flush_hint_size;
+ flush->device_handle = handle[0];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
+ offset += flush->header.length;
+
+ /* flush1 (dimm1) */
+ flush = nfit_buf + offset;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = flush_hint_size;
+ flush->device_handle = handle[1];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
+ offset += flush->header.length;
+
+ /* flush2 (dimm2) */
+ flush = nfit_buf + offset;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = flush_hint_size;
+ flush->device_handle = handle[2];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
+ offset += flush->header.length;
+
+ /* flush3 (dimm3) */
+ flush = nfit_buf + offset;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = flush_hint_size;
+ flush->device_handle = handle[3];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
+ offset += flush->header.length;
+
+ /* platform capabilities */
+ pcap = nfit_buf + offset;
+ pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
+ pcap->header.length = sizeof(*pcap);
+ pcap->highest_capability = 1;
+ pcap->capabilities = ACPI_NFIT_CAPABILITY_MEM_FLUSH;
+ offset += pcap->header.length;
+
+ if (t->setup_hotplug) {
+ /* dcr-descriptor4: blk */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = sizeof(*dcr);
+ dcr->region_index = 8+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[4];
+ dcr->code = NFIT_FIC_BLK;
+ dcr->windows = 1;
+ dcr->window_size = DCR_SIZE;
+ dcr->command_offset = 0;
+ dcr->command_size = 8;
+ dcr->status_offset = 8;
+ dcr->status_size = 4;
+ offset += dcr->header.length;
+
+ /* dcr-descriptor4: pmem */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 9+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[4];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ /* bdw4 (spa/dcr4, dimm4) */
+ bdw = nfit_buf + offset;
+ bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
+ bdw->header.length = sizeof(*bdw);
+ bdw->region_index = 8+1;
+ bdw->windows = 1;
+ bdw->offset = 0;
+ bdw->size = BDW_SIZE;
+ bdw->capacity = DIMM_SIZE;
+ bdw->start_address = 0;
+ offset += bdw->header.length;
+
+ /* spa10 (dcr4) dimm4 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
+ spa->range_index = 10+1;
+ spa->address = t->dcr_dma[4];
+ spa->length = DCR_SIZE;
+ offset += spa->header.length;
+
+ /*
+ * spa11 (single-dimm interleave for hotplug, note storage
+ * does not actually alias the related block-data-window
+ * regions)
+ */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+ spa->range_index = 11+1;
+ spa->address = t->spa_set_dma[2];
+ spa->length = SPA0_SIZE;
+ offset += spa->header.length;
+
+ /* spa12 (bdw for dcr4) dimm4 */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
+ spa->range_index = 12+1;
+ spa->address = t->dimm_dma[4];
+ spa->length = DIMM_SIZE;
+ offset += spa->header.length;
+
+ /* mem-region14 (spa/dcr4, dimm4) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[4];
+ memdev->physical_id = 4;
+ memdev->region_id = 0;
+ memdev->range_index = 10+1;
+ memdev->region_index = 8+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* mem-region15 (spa11, dimm4) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[4];
+ memdev->physical_id = 4;
+ memdev->region_id = 0;
+ memdev->range_index = 11+1;
+ memdev->region_index = 9+1;
+ memdev->region_size = SPA0_SIZE;
+ memdev->region_offset = (1ULL << 48);
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+ offset += memdev->header.length;
+
+ /* mem-region16 (spa/bdw4, dimm4) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[4];
+ memdev->physical_id = 4;
+ memdev->region_id = 0;
+ memdev->range_index = 12+1;
+ memdev->region_index = 8+1;
+ memdev->region_size = 0;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ offset += memdev->header.length;
+
+ /* flush3 (dimm4) */
+ flush = nfit_buf + offset;
+ flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
+ flush->header.length = flush_hint_size;
+ flush->device_handle = handle[4];
+ flush->hint_count = NUM_HINTS;
+ for (i = 0; i < NUM_HINTS; i++)
+ flush->hint_address[i] = t->flush_dma[4]
+ + i * sizeof(u64);
+ offset += flush->header.length;
+
+ /* sanity check to make sure we've filled the buffer */
+ WARN_ON(offset != t->nfit_size);
+ }
+
+ t->nfit_filled = offset;
+
+ post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
+ SPA0_SIZE);
+
+ acpi_desc = &t->acpi_desc;
+ set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_SMART_INJECT, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
+ set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
+ set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
+ set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
+ set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
+ set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
+}
+
+static void nfit_test1_setup(struct nfit_test *t)
+{
+ size_t offset;
+ void *nfit_buf = t->nfit_buf;
+ struct acpi_nfit_memory_map *memdev;
+ struct acpi_nfit_control_region *dcr;
+ struct acpi_nfit_system_address *spa;
+ struct acpi_nfit_desc *acpi_desc;
+
+ offset = 0;
+ /* spa0 (flat range with no bdw aliasing) */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
+ spa->range_index = 0+1;
+ spa->address = t->spa_set_dma[0];
+ spa->length = SPA2_SIZE;
+ offset += spa->header.length;
+
+ /* virtual cd region */
+ spa = nfit_buf + offset;
+ spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
+ spa->header.length = sizeof(*spa);
+ memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
+ spa->range_index = 0;
+ spa->address = t->spa_set_dma[1];
+ spa->length = SPA_VCD_SIZE;
+ offset += spa->header.length;
+
+ /* mem-region0 (spa0, dimm0) */
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[5];
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 0+1;
+ memdev->region_index = 0+1;
+ memdev->region_size = SPA2_SIZE;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED
+ | ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED
+ | ACPI_NFIT_MEM_NOT_ARMED;
+ offset += memdev->header.length;
+
+ /* dcr-descriptor0 */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 0+1;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[5];
+ dcr->code = NFIT_FIC_BYTE;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ memdev = nfit_buf + offset;
+ memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
+ memdev->header.length = sizeof(*memdev);
+ memdev->device_handle = handle[6];
+ memdev->physical_id = 0;
+ memdev->region_id = 0;
+ memdev->range_index = 0;
+ memdev->region_index = 0+2;
+ memdev->region_size = SPA2_SIZE;
+ memdev->region_offset = 0;
+ memdev->address = 0;
+ memdev->interleave_index = 0;
+ memdev->interleave_ways = 1;
+ memdev->flags = ACPI_NFIT_MEM_MAP_FAILED;
+ offset += memdev->header.length;
+
+ /* dcr-descriptor1 */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 0+2;
+ dcr_common_init(dcr);
+ dcr->serial_number = ~handle[6];
+ dcr->code = NFIT_FIC_BYTE;
+ dcr->windows = 0;
+ offset += dcr->header.length;
+
+ /* sanity check to make sure we've filled the buffer */
+ WARN_ON(offset != t->nfit_size);
+
+ t->nfit_filled = offset;
+
+ post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
+ SPA2_SIZE);
+
+ acpi_desc = &t->acpi_desc;
+ set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
+ set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+ set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
+}
+
+static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
+ void *iobuf, u64 len, int rw)
+{
+ struct nfit_blk *nfit_blk = ndbr->blk_provider_data;
+ struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW];
+ struct nd_region *nd_region = &ndbr->nd_region;
+ unsigned int lane;
+
+ lane = nd_region_acquire_lane(nd_region);
+ if (rw)
+ memcpy(mmio->addr.base + dpa, iobuf, len);
+ else {
+ memcpy(iobuf, mmio->addr.base + dpa, len);
+
+ /* give us some some coverage of the arch_invalidate_pmem() API */
+ arch_invalidate_pmem(mmio->addr.base + dpa, len);
+ }
+ nd_region_release_lane(nd_region, lane);
+
+ return 0;
+}
+
+static unsigned long nfit_ctl_handle;
+
+union acpi_object *result;
+
+static union acpi_object *nfit_test_evaluate_dsm(acpi_handle handle,
+ const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4)
+{
+ if (handle != &nfit_ctl_handle)
+ return ERR_PTR(-ENXIO);
+
+ return result;
+}
+
+static int setup_result(void *buf, size_t size)
+{
+ result = kmalloc(sizeof(union acpi_object) + size, GFP_KERNEL);
+ if (!result)
+ return -ENOMEM;
+ result->package.type = ACPI_TYPE_BUFFER,
+ result->buffer.pointer = (void *) (result + 1);
+ result->buffer.length = size;
+ memcpy(result->buffer.pointer, buf, size);
+ memset(buf, 0, size);
+ return 0;
+}
+
+static int nfit_ctl_test(struct device *dev)
+{
+ int rc, cmd_rc;
+ struct nvdimm *nvdimm;
+ struct acpi_device *adev;
+ struct nfit_mem *nfit_mem;
+ struct nd_ars_record *record;
+ struct acpi_nfit_desc *acpi_desc;
+ const u64 test_val = 0x0123456789abcdefULL;
+ unsigned long mask, cmd_size, offset;
+ union {
+ struct nd_cmd_get_config_size cfg_size;
+ struct nd_cmd_clear_error clear_err;
+ struct nd_cmd_ars_status ars_stat;
+ struct nd_cmd_ars_cap ars_cap;
+ char buf[sizeof(struct nd_cmd_ars_status)
+ + sizeof(struct nd_ars_record)];
+ } cmds;
+
+ adev = devm_kzalloc(dev, sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ return -ENOMEM;
+ *adev = (struct acpi_device) {
+ .handle = &nfit_ctl_handle,
+ .dev = {
+ .init_name = "test-adev",
+ },
+ };
+
+ acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
+ if (!acpi_desc)
+ return -ENOMEM;
+ *acpi_desc = (struct acpi_nfit_desc) {
+ .nd_desc = {
+ .cmd_mask = 1UL << ND_CMD_ARS_CAP
+ | 1UL << ND_CMD_ARS_START
+ | 1UL << ND_CMD_ARS_STATUS
+ | 1UL << ND_CMD_CLEAR_ERROR
+ | 1UL << ND_CMD_CALL,
+ .module = THIS_MODULE,
+ .provider_name = "ACPI.NFIT",
+ .ndctl = acpi_nfit_ctl,
+ .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA
+ | 1UL << NFIT_CMD_ARS_INJECT_SET
+ | 1UL << NFIT_CMD_ARS_INJECT_CLEAR
+ | 1UL << NFIT_CMD_ARS_INJECT_GET,
+ },
+ .dev = &adev->dev,
+ };
+
+ nfit_mem = devm_kzalloc(dev, sizeof(*nfit_mem), GFP_KERNEL);
+ if (!nfit_mem)
+ return -ENOMEM;
+
+ mask = 1UL << ND_CMD_SMART | 1UL << ND_CMD_SMART_THRESHOLD
+ | 1UL << ND_CMD_DIMM_FLAGS | 1UL << ND_CMD_GET_CONFIG_SIZE
+ | 1UL << ND_CMD_GET_CONFIG_DATA | 1UL << ND_CMD_SET_CONFIG_DATA
+ | 1UL << ND_CMD_VENDOR;
+ *nfit_mem = (struct nfit_mem) {
+ .adev = adev,
+ .family = NVDIMM_FAMILY_INTEL,
+ .dsm_mask = mask,
+ };
+
+ nvdimm = devm_kzalloc(dev, sizeof(*nvdimm), GFP_KERNEL);
+ if (!nvdimm)
+ return -ENOMEM;
+ *nvdimm = (struct nvdimm) {
+ .provider_data = nfit_mem,
+ .cmd_mask = mask,
+ .dev = {
+ .init_name = "test-dimm",
+ },
+ };
+
+
+ /* basic checkout of a typical 'get config size' command */
+ cmd_size = sizeof(cmds.cfg_size);
+ cmds.cfg_size = (struct nd_cmd_get_config_size) {
+ .status = 0,
+ .config_size = SZ_128K,
+ .max_xfer = SZ_4K,
+ };
+ rc = setup_result(cmds.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
+ cmds.buf, cmd_size, &cmd_rc);
+
+ if (rc < 0 || cmd_rc || cmds.cfg_size.status != 0
+ || cmds.cfg_size.config_size != SZ_128K
+ || cmds.cfg_size.max_xfer != SZ_4K) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+
+ /* test ars_status with zero output */
+ cmd_size = offsetof(struct nd_cmd_ars_status, address);
+ cmds.ars_stat = (struct nd_cmd_ars_status) {
+ .out_length = 0,
+ };
+ rc = setup_result(cmds.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
+ cmds.buf, cmd_size, &cmd_rc);
+
+ if (rc < 0 || cmd_rc) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+
+ /* test ars_cap with benign extended status */
+ cmd_size = sizeof(cmds.ars_cap);
+ cmds.ars_cap = (struct nd_cmd_ars_cap) {
+ .status = ND_ARS_PERSISTENT << 16,
+ };
+ offset = offsetof(struct nd_cmd_ars_cap, status);
+ rc = setup_result(cmds.buf + offset, cmd_size - offset);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_CAP,
+ cmds.buf, cmd_size, &cmd_rc);
+
+ if (rc < 0 || cmd_rc) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+
+ /* test ars_status with 'status' trimmed from 'out_length' */
+ cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
+ cmds.ars_stat = (struct nd_cmd_ars_status) {
+ .out_length = cmd_size - 4,
+ };
+ record = &cmds.ars_stat.records[0];
+ *record = (struct nd_ars_record) {
+ .length = test_val,
+ };
+ rc = setup_result(cmds.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
+ cmds.buf, cmd_size, &cmd_rc);
+
+ if (rc < 0 || cmd_rc || record->length != test_val) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+
+ /* test ars_status with 'Output (Size)' including 'status' */
+ cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
+ cmds.ars_stat = (struct nd_cmd_ars_status) {
+ .out_length = cmd_size,
+ };
+ record = &cmds.ars_stat.records[0];
+ *record = (struct nd_ars_record) {
+ .length = test_val,
+ };
+ rc = setup_result(cmds.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
+ cmds.buf, cmd_size, &cmd_rc);
+
+ if (rc < 0 || cmd_rc || record->length != test_val) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+
+ /* test extended status for get_config_size results in failure */
+ cmd_size = sizeof(cmds.cfg_size);
+ cmds.cfg_size = (struct nd_cmd_get_config_size) {
+ .status = 1 << 16,
+ };
+ rc = setup_result(cmds.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
+ cmds.buf, cmd_size, &cmd_rc);
+
+ if (rc < 0 || cmd_rc >= 0) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+ /* test clear error */
+ cmd_size = sizeof(cmds.clear_err);
+ cmds.clear_err = (struct nd_cmd_clear_error) {
+ .length = 512,
+ .cleared = 512,
+ };
+ rc = setup_result(cmds.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR,
+ cmds.buf, cmd_size, &cmd_rc);
+ if (rc < 0 || cmd_rc) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+ return 0;
+}
+
+static int nfit_test_probe(struct platform_device *pdev)
+{
+ struct nvdimm_bus_descriptor *nd_desc;
+ struct acpi_nfit_desc *acpi_desc;
+ struct device *dev = &pdev->dev;
+ struct nfit_test *nfit_test;
+ struct nfit_mem *nfit_mem;
+ union acpi_object *obj;
+ int rc;
+
+ if (strcmp(dev_name(&pdev->dev), "nfit_test.0") == 0) {
+ rc = nfit_ctl_test(&pdev->dev);
+ if (rc)
+ return rc;
+ }
+
+ nfit_test = to_nfit_test(&pdev->dev);
+
+ /* common alloc */
+ if (nfit_test->num_dcr) {
+ int num = nfit_test->num_dcr;
+
+ nfit_test->dimm = devm_kcalloc(dev, num, sizeof(void *),
+ GFP_KERNEL);
+ nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t),
+ GFP_KERNEL);
+ nfit_test->flush = devm_kcalloc(dev, num, sizeof(void *),
+ GFP_KERNEL);
+ nfit_test->flush_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t),
+ GFP_KERNEL);
+ nfit_test->label = devm_kcalloc(dev, num, sizeof(void *),
+ GFP_KERNEL);
+ nfit_test->label_dma = devm_kcalloc(dev, num,
+ sizeof(dma_addr_t), GFP_KERNEL);
+ nfit_test->dcr = devm_kcalloc(dev, num,
+ sizeof(struct nfit_test_dcr *), GFP_KERNEL);
+ nfit_test->dcr_dma = devm_kcalloc(dev, num,
+ sizeof(dma_addr_t), GFP_KERNEL);
+ nfit_test->smart = devm_kcalloc(dev, num,
+ sizeof(struct nd_intel_smart), GFP_KERNEL);
+ nfit_test->smart_threshold = devm_kcalloc(dev, num,
+ sizeof(struct nd_intel_smart_threshold),
+ GFP_KERNEL);
+ nfit_test->fw = devm_kcalloc(dev, num,
+ sizeof(struct nfit_test_fw), GFP_KERNEL);
+ if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label
+ && nfit_test->label_dma && nfit_test->dcr
+ && nfit_test->dcr_dma && nfit_test->flush
+ && nfit_test->flush_dma
+ && nfit_test->fw)
+ /* pass */;
+ else
+ return -ENOMEM;
+ }
+
+ if (nfit_test->num_pm) {
+ int num = nfit_test->num_pm;
+
+ nfit_test->spa_set = devm_kcalloc(dev, num, sizeof(void *),
+ GFP_KERNEL);
+ nfit_test->spa_set_dma = devm_kcalloc(dev, num,
+ sizeof(dma_addr_t), GFP_KERNEL);
+ if (nfit_test->spa_set && nfit_test->spa_set_dma)
+ /* pass */;
+ else
+ return -ENOMEM;
+ }
+
+ /* per-nfit specific alloc */
+ if (nfit_test->alloc(nfit_test))
+ return -ENOMEM;
+
+ nfit_test->setup(nfit_test);
+ acpi_desc = &nfit_test->acpi_desc;
+ acpi_nfit_desc_init(acpi_desc, &pdev->dev);
+ acpi_desc->blk_do_io = nfit_test_blk_do_io;
+ nd_desc = &acpi_desc->nd_desc;
+ nd_desc->provider_name = NULL;
+ nd_desc->module = THIS_MODULE;
+ nd_desc->ndctl = nfit_test_ctl;
+
+ rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
+ nfit_test->nfit_filled);
+ if (rc)
+ return rc;
+
+ rc = devm_add_action_or_reset(&pdev->dev, acpi_nfit_shutdown, acpi_desc);
+ if (rc)
+ return rc;
+
+ if (nfit_test->setup != nfit_test0_setup)
+ return 0;
+
+ nfit_test->setup_hotplug = 1;
+ nfit_test->setup(nfit_test);
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+ obj->type = ACPI_TYPE_BUFFER;
+ obj->buffer.length = nfit_test->nfit_size;
+ obj->buffer.pointer = nfit_test->nfit_buf;
+ *(nfit_test->_fit) = obj;
+ __acpi_nfit_notify(&pdev->dev, nfit_test, 0x80);
+
+ /* associate dimm devices with nfit_mem data for notification testing */
+ mutex_lock(&acpi_desc->init_mutex);
+ list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+ u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(handle); i++)
+ if (nfit_handle == handle[i])
+ dev_set_drvdata(nfit_test->dimm_dev[i],
+ nfit_mem);
+ }
+ mutex_unlock(&acpi_desc->init_mutex);
+
+ return 0;
+}
+
+static int nfit_test_remove(struct platform_device *pdev)
+{
+ return 0;
+}
+
+static void nfit_test_release(struct device *dev)
+{
+ struct nfit_test *nfit_test = to_nfit_test(dev);
+
+ kfree(nfit_test);
+}
+
+static const struct platform_device_id nfit_test_id[] = {
+ { KBUILD_MODNAME },
+ { },
+};
+
+static struct platform_driver nfit_test_driver = {
+ .probe = nfit_test_probe,
+ .remove = nfit_test_remove,
+ .driver = {
+ .name = KBUILD_MODNAME,
+ },
+ .id_table = nfit_test_id,
+};
+
+static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+
+enum INJECT {
+ INJECT_NONE,
+ INJECT_SRC,
+ INJECT_DST,
+};
+
+static void mcsafe_test_init(char *dst, char *src, size_t size)
+{
+ size_t i;
+
+ memset(dst, 0xff, size);
+ for (i = 0; i < size; i++)
+ src[i] = (char) i;
+}
+
+static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
+ size_t size, unsigned long rem)
+{
+ size_t i;
+
+ for (i = 0; i < size - rem; i++)
+ if (dst[i] != (unsigned char) i) {
+ pr_info_once("%s:%d: offset: %zd got: %#x expect: %#x\n",
+ __func__, __LINE__, i, dst[i],
+ (unsigned char) i);
+ return false;
+ }
+ for (i = size - rem; i < size; i++)
+ if (dst[i] != 0xffU) {
+ pr_info_once("%s:%d: offset: %zd got: %#x expect: 0xff\n",
+ __func__, __LINE__, i, dst[i]);
+ return false;
+ }
+ return true;
+}
+
+void mcsafe_test(void)
+{
+ char *inject_desc[] = { "none", "source", "destination" };
+ enum INJECT inj;
+
+ if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
+ pr_info("%s: run...\n", __func__);
+ } else {
+ pr_info("%s: disabled, skip.\n", __func__);
+ return;
+ }
+
+ for (inj = INJECT_NONE; inj <= INJECT_DST; inj++) {
+ int i;
+
+ pr_info("%s: inject: %s\n", __func__, inject_desc[inj]);
+ for (i = 0; i < 512; i++) {
+ unsigned long expect, rem;
+ void *src, *dst;
+ bool valid;
+
+ switch (inj) {
+ case INJECT_NONE:
+ mcsafe_inject_src(NULL);
+ mcsafe_inject_dst(NULL);
+ dst = &mcsafe_buf[2048];
+ src = &mcsafe_buf[1024 - i];
+ expect = 0;
+ break;
+ case INJECT_SRC:
+ mcsafe_inject_src(&mcsafe_buf[1024]);
+ mcsafe_inject_dst(NULL);
+ dst = &mcsafe_buf[2048];
+ src = &mcsafe_buf[1024 - i];
+ expect = 512 - i;
+ break;
+ case INJECT_DST:
+ mcsafe_inject_src(NULL);
+ mcsafe_inject_dst(&mcsafe_buf[2048]);
+ dst = &mcsafe_buf[2048 - i];
+ src = &mcsafe_buf[1024];
+ expect = 512 - i;
+ break;
+ }
+
+ mcsafe_test_init(dst, src, 512);
+ rem = __memcpy_mcsafe(dst, src, 512);
+ valid = mcsafe_test_validate(dst, src, 512, expect);
+ if (rem == expect && valid)
+ continue;
+ pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
+ __func__,
+ ((unsigned long) dst) & ~PAGE_MASK,
+ ((unsigned long ) src) & ~PAGE_MASK,
+ 512, i, rem, valid ? "valid" : "bad",
+ expect);
+ }
+ }
+
+ mcsafe_inject_src(NULL);
+ mcsafe_inject_dst(NULL);
+}
+
+static __init int nfit_test_init(void)
+{
+ int rc, i;
+
+ pmem_test();
+ libnvdimm_test();
+ acpi_nfit_test();
+ device_dax_test();
+ mcsafe_test();
+
+ nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
+
+ nfit_wq = create_singlethread_workqueue("nfit");
+ if (!nfit_wq)
+ return -ENOMEM;
+
+ nfit_test_dimm = class_create(THIS_MODULE, "nfit_test_dimm");
+ if (IS_ERR(nfit_test_dimm)) {
+ rc = PTR_ERR(nfit_test_dimm);
+ goto err_register;
+ }
+
+ nfit_pool = gen_pool_create(ilog2(SZ_4M), NUMA_NO_NODE);
+ if (!nfit_pool) {
+ rc = -ENOMEM;
+ goto err_register;
+ }
+
+ if (gen_pool_add(nfit_pool, SZ_4G, SZ_4G, NUMA_NO_NODE)) {
+ rc = -ENOMEM;
+ goto err_register;
+ }
+
+ for (i = 0; i < NUM_NFITS; i++) {
+ struct nfit_test *nfit_test;
+ struct platform_device *pdev;
+
+ nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL);
+ if (!nfit_test) {
+ rc = -ENOMEM;
+ goto err_register;
+ }
+ INIT_LIST_HEAD(&nfit_test->resources);
+ badrange_init(&nfit_test->badrange);
+ switch (i) {
+ case 0:
+ nfit_test->num_pm = NUM_PM;
+ nfit_test->dcr_idx = 0;
+ nfit_test->num_dcr = NUM_DCR;
+ nfit_test->alloc = nfit_test0_alloc;
+ nfit_test->setup = nfit_test0_setup;
+ break;
+ case 1:
+ nfit_test->num_pm = 2;
+ nfit_test->dcr_idx = NUM_DCR;
+ nfit_test->num_dcr = 2;
+ nfit_test->alloc = nfit_test1_alloc;
+ nfit_test->setup = nfit_test1_setup;
+ break;
+ default:
+ rc = -EINVAL;
+ goto err_register;
+ }
+ pdev = &nfit_test->pdev;
+ pdev->name = KBUILD_MODNAME;
+ pdev->id = i;
+ pdev->dev.release = nfit_test_release;
+ rc = platform_device_register(pdev);
+ if (rc) {
+ put_device(&pdev->dev);
+ goto err_register;
+ }
+ get_device(&pdev->dev);
+
+ rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
+ if (rc)
+ goto err_register;
+
+ instances[i] = nfit_test;
+ INIT_WORK(&nfit_test->work, uc_error_notify);
+ }
+
+ rc = platform_driver_register(&nfit_test_driver);
+ if (rc)
+ goto err_register;
+ return 0;
+
+ err_register:
+ if (nfit_pool)
+ gen_pool_destroy(nfit_pool);
+
+ destroy_workqueue(nfit_wq);
+ for (i = 0; i < NUM_NFITS; i++)
+ if (instances[i])
+ platform_device_unregister(&instances[i]->pdev);
+ nfit_test_teardown();
+ for (i = 0; i < NUM_NFITS; i++)
+ if (instances[i])
+ put_device(&instances[i]->pdev.dev);
+
+ return rc;
+}
+
+static __exit void nfit_test_exit(void)
+{
+ int i;
+
+ flush_workqueue(nfit_wq);
+ destroy_workqueue(nfit_wq);
+ for (i = 0; i < NUM_NFITS; i++)
+ platform_device_unregister(&instances[i]->pdev);
+ platform_driver_unregister(&nfit_test_driver);
+ nfit_test_teardown();
+
+ gen_pool_destroy(nfit_pool);
+
+ for (i = 0; i < NUM_NFITS; i++)
+ put_device(&instances[i]->pdev.dev);
+ class_destroy(nfit_test_dimm);
+}
+
+module_init(nfit_test_init);
+module_exit(nfit_test_exit);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Intel Corporation");
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
new file mode 100644
index 000000000..3de57cc87
--- /dev/null
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -0,0 +1,249 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#ifndef __NFIT_TEST_H__
+#define __NFIT_TEST_H__
+#include <linux/acpi.h>
+#include <linux/list.h>
+#include <linux/uuid.h>
+#include <linux/ioport.h>
+#include <linux/spinlock_types.h>
+
+struct nfit_test_request {
+ struct list_head list;
+ struct resource res;
+};
+
+struct nfit_test_resource {
+ struct list_head requests;
+ struct list_head list;
+ struct resource res;
+ struct device *dev;
+ spinlock_t lock;
+ int req_count;
+ void *buf;
+};
+
+#define ND_TRANSLATE_SPA_STATUS_INVALID_SPA 2
+#define NFIT_ARS_INJECT_INVALID 2
+
+enum err_inj_options {
+ ND_ARS_ERR_INJ_OPT_NOTIFY = 0,
+};
+
+/* nfit commands */
+enum nfit_cmd_num {
+ NFIT_CMD_TRANSLATE_SPA = 5,
+ NFIT_CMD_ARS_INJECT_SET = 7,
+ NFIT_CMD_ARS_INJECT_CLEAR = 8,
+ NFIT_CMD_ARS_INJECT_GET = 9,
+};
+
+struct nd_cmd_translate_spa {
+ __u64 spa;
+ __u32 status;
+ __u8 flags;
+ __u8 _reserved[3];
+ __u64 translate_length;
+ __u32 num_nvdimms;
+ struct nd_nvdimm_device {
+ __u32 nfit_device_handle;
+ __u32 _reserved;
+ __u64 dpa;
+ } __packed devices[0];
+
+} __packed;
+
+struct nd_cmd_ars_err_inj {
+ __u64 err_inj_spa_range_base;
+ __u64 err_inj_spa_range_length;
+ __u8 err_inj_options;
+ __u32 status;
+} __packed;
+
+struct nd_cmd_ars_err_inj_clr {
+ __u64 err_inj_clr_spa_range_base;
+ __u64 err_inj_clr_spa_range_length;
+ __u32 status;
+} __packed;
+
+struct nd_cmd_ars_err_inj_stat {
+ __u32 status;
+ __u32 inj_err_rec_count;
+ struct nd_error_stat_query_record {
+ __u64 err_inj_stat_spa_range_base;
+ __u64 err_inj_stat_spa_range_length;
+ } __packed record[0];
+} __packed;
+
+#define ND_INTEL_SMART 1
+#define ND_INTEL_SMART_THRESHOLD 2
+#define ND_INTEL_ENABLE_LSS_STATUS 10
+#define ND_INTEL_FW_GET_INFO 12
+#define ND_INTEL_FW_START_UPDATE 13
+#define ND_INTEL_FW_SEND_DATA 14
+#define ND_INTEL_FW_FINISH_UPDATE 15
+#define ND_INTEL_FW_FINISH_QUERY 16
+#define ND_INTEL_SMART_SET_THRESHOLD 17
+#define ND_INTEL_SMART_INJECT 18
+
+#define ND_INTEL_SMART_HEALTH_VALID (1 << 0)
+#define ND_INTEL_SMART_SPARES_VALID (1 << 1)
+#define ND_INTEL_SMART_USED_VALID (1 << 2)
+#define ND_INTEL_SMART_MTEMP_VALID (1 << 3)
+#define ND_INTEL_SMART_CTEMP_VALID (1 << 4)
+#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID (1 << 5)
+#define ND_INTEL_SMART_AIT_STATUS_VALID (1 << 6)
+#define ND_INTEL_SMART_PTEMP_VALID (1 << 7)
+#define ND_INTEL_SMART_ALARM_VALID (1 << 9)
+#define ND_INTEL_SMART_SHUTDOWN_VALID (1 << 10)
+#define ND_INTEL_SMART_VENDOR_VALID (1 << 11)
+#define ND_INTEL_SMART_SPARE_TRIP (1 << 0)
+#define ND_INTEL_SMART_TEMP_TRIP (1 << 1)
+#define ND_INTEL_SMART_CTEMP_TRIP (1 << 2)
+#define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0)
+#define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1)
+#define ND_INTEL_SMART_FATAL_HEALTH (1 << 2)
+#define ND_INTEL_SMART_INJECT_MTEMP (1 << 0)
+#define ND_INTEL_SMART_INJECT_SPARE (1 << 1)
+#define ND_INTEL_SMART_INJECT_FATAL (1 << 2)
+#define ND_INTEL_SMART_INJECT_SHUTDOWN (1 << 3)
+
+struct nd_intel_smart {
+ __u32 status;
+ union {
+ struct {
+ __u32 flags;
+ __u8 reserved0[4];
+ __u8 health;
+ __u8 spares;
+ __u8 life_used;
+ __u8 alarm_flags;
+ __u16 media_temperature;
+ __u16 ctrl_temperature;
+ __u32 shutdown_count;
+ __u8 ait_status;
+ __u16 pmic_temperature;
+ __u8 reserved1[8];
+ __u8 shutdown_state;
+ __u32 vendor_size;
+ __u8 vendor_data[92];
+ } __packed;
+ __u8 data[128];
+ };
+} __packed;
+
+struct nd_intel_smart_threshold {
+ __u32 status;
+ union {
+ struct {
+ __u16 alarm_control;
+ __u8 spares;
+ __u16 media_temperature;
+ __u16 ctrl_temperature;
+ __u8 reserved[1];
+ } __packed;
+ __u8 data[8];
+ };
+} __packed;
+
+struct nd_intel_smart_set_threshold {
+ __u16 alarm_control;
+ __u8 spares;
+ __u16 media_temperature;
+ __u16 ctrl_temperature;
+ __u32 status;
+} __packed;
+
+struct nd_intel_smart_inject {
+ __u64 flags;
+ __u8 mtemp_enable;
+ __u16 media_temperature;
+ __u8 spare_enable;
+ __u8 spares;
+ __u8 fatal_enable;
+ __u8 unsafe_shutdown_enable;
+ __u32 status;
+} __packed;
+
+#define INTEL_FW_STORAGE_SIZE 0x100000
+#define INTEL_FW_MAX_SEND_LEN 0xFFEC
+#define INTEL_FW_QUERY_INTERVAL 250000
+#define INTEL_FW_QUERY_MAX_TIME 3000000
+#define INTEL_FW_FIS_VERSION 0x0105
+#define INTEL_FW_FAKE_VERSION 0xffffffffabcd
+
+enum intel_fw_update_state {
+ FW_STATE_NEW = 0,
+ FW_STATE_IN_PROGRESS,
+ FW_STATE_VERIFY,
+ FW_STATE_UPDATED,
+};
+
+struct nd_intel_fw_info {
+ __u32 status;
+ __u32 storage_size;
+ __u32 max_send_len;
+ __u32 query_interval;
+ __u32 max_query_time;
+ __u8 update_cap;
+ __u8 reserved[3];
+ __u32 fis_version;
+ __u64 run_version;
+ __u64 updated_version;
+} __packed;
+
+struct nd_intel_fw_start {
+ __u32 status;
+ __u32 context;
+} __packed;
+
+/* this one has the output first because the variable input data size */
+struct nd_intel_fw_send_data {
+ __u32 context;
+ __u32 offset;
+ __u32 length;
+ __u8 data[0];
+/* this field is not declared due ot variable data from input */
+/* __u32 status; */
+} __packed;
+
+struct nd_intel_fw_finish_update {
+ __u8 ctrl_flags;
+ __u8 reserved[3];
+ __u32 context;
+ __u32 status;
+} __packed;
+
+struct nd_intel_fw_finish_query {
+ __u32 context;
+ __u32 status;
+ __u64 updated_fw_rev;
+} __packed;
+
+struct nd_intel_lss {
+ __u8 enable;
+ __u32 status;
+} __packed;
+
+typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t);
+typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle,
+ const guid_t *guid, u64 rev, u64 func,
+ union acpi_object *argv4);
+void __iomem *__wrap_ioremap_nocache(resource_size_t offset,
+ unsigned long size);
+void __wrap_iounmap(volatile void __iomem *addr);
+void nfit_test_setup(nfit_test_lookup_fn lookup,
+ nfit_test_evaluate_dsm_fn evaluate);
+void nfit_test_teardown(void);
+struct nfit_test_resource *get_nfit_res(resource_size_t resource);
+#endif
diff --git a/tools/testing/nvdimm/watermark.h b/tools/testing/nvdimm/watermark.h
new file mode 100644
index 000000000..ed0528757
--- /dev/null
+++ b/tools/testing/nvdimm/watermark.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+#ifndef _TEST_NVDIMM_WATERMARK_H_
+#define _TEST_NVDIMM_WATERMARK_H_
+int pmem_test(void);
+int libnvdimm_test(void);
+int acpi_nfit_test(void);
+int device_dax_test(void);
+
+/*
+ * dummy routine for nfit_test to validate it is linking to the properly
+ * mocked module and not the standard one from the base tree.
+ */
+#define nfit_test_watermark(x) \
+int x##_test(void) \
+{ \
+ pr_debug("%s for nfit_test\n", KBUILD_MODNAME); \
+ return 0; \
+} \
+EXPORT_SYMBOL(x##_test)
+#endif /* _TEST_NVDIMM_WATERMARK_H_ */
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
new file mode 100644
index 000000000..d4706c0ff
--- /dev/null
+++ b/tools/testing/radix-tree/.gitignore
@@ -0,0 +1,6 @@
+generated/map-shift.h
+idr.c
+idr-test
+main
+multiorder
+radix-tree.c
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
new file mode 100644
index 000000000..37baecc37
--- /dev/null
+++ b/tools/testing/radix-tree/Makefile
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I. -I../../include -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address \
+ -fsanitize=undefined
+LDFLAGS += -fsanitize=address -fsanitize=undefined
+LDLIBS+= -lpthread -lurcu
+TARGETS = main idr-test multiorder
+CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
+OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
+ tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
+
+ifndef SHIFT
+ SHIFT=3
+endif
+
+ifeq ($(BUILD), 32)
+ CFLAGS += -m32
+ LDFLAGS += -m32
+endif
+
+targets: generated/map-shift.h $(TARGETS)
+
+main: $(OFILES)
+
+idr-test.o: ../../../lib/test_ida.c
+idr-test: idr-test.o $(CORE_OFILES)
+
+multiorder: multiorder.o $(CORE_OFILES)
+
+clean:
+ $(RM) $(TARGETS) *.o radix-tree.c idr.c generated/map-shift.h
+
+vpath %.c ../../lib
+
+$(OFILES): Makefile *.h */*.h generated/map-shift.h \
+ ../../include/linux/*.h \
+ ../../include/asm/*.h \
+ ../../../include/linux/radix-tree.h \
+ ../../../include/linux/idr.h
+
+radix-tree.c: ../../../lib/radix-tree.c
+ sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+idr.c: ../../../lib/idr.c
+ sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+generated/map-shift.h:
+ @if ! grep -qws $(SHIFT) generated/map-shift.h; then \
+ echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \
+ generated/map-shift.h; \
+ fi
diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
new file mode 100644
index 000000000..99c40f3ed
--- /dev/null
+++ b/tools/testing/radix-tree/benchmark.c
@@ -0,0 +1,257 @@
+/*
+ * benchmark.c:
+ * Author: Konstantin Khlebnikov <koct9i@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/radix-tree.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <time.h>
+#include "test.h"
+
+#define for_each_index(i, base, order) \
+ for (i = base; i < base + (1 << order); i++)
+
+#define NSEC_PER_SEC 1000000000L
+
+static long long benchmark_iter(struct radix_tree_root *root, bool tagged)
+{
+ volatile unsigned long sink = 0;
+ struct radix_tree_iter iter;
+ struct timespec start, finish;
+ long long nsec;
+ int l, loops = 1;
+ void **slot;
+
+#ifdef BENCHMARK
+again:
+#endif
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ for (l = 0; l < loops; l++) {
+ if (tagged) {
+ radix_tree_for_each_tagged(slot, root, &iter, 0, 0)
+ sink ^= (unsigned long)slot;
+ } else {
+ radix_tree_for_each_slot(slot, root, &iter, 0)
+ sink ^= (unsigned long)slot;
+ }
+ }
+ clock_gettime(CLOCK_MONOTONIC, &finish);
+
+ nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (finish.tv_nsec - start.tv_nsec);
+
+#ifdef BENCHMARK
+ if (loops == 1 && nsec * 5 < NSEC_PER_SEC) {
+ loops = NSEC_PER_SEC / nsec / 4 + 1;
+ goto again;
+ }
+#endif
+
+ nsec /= loops;
+ return nsec;
+}
+
+static void benchmark_insert(struct radix_tree_root *root,
+ unsigned long size, unsigned long step, int order)
+{
+ struct timespec start, finish;
+ unsigned long index;
+ long long nsec;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (index = 0 ; index < size ; index += step)
+ item_insert_order(root, index, order);
+
+ clock_gettime(CLOCK_MONOTONIC, &finish);
+
+ nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (finish.tv_nsec - start.tv_nsec);
+
+ printv(2, "Size: %8ld, step: %8ld, order: %d, insertion: %15lld ns\n",
+ size, step, order, nsec);
+}
+
+static void benchmark_tagging(struct radix_tree_root *root,
+ unsigned long size, unsigned long step, int order)
+{
+ struct timespec start, finish;
+ unsigned long index;
+ long long nsec;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (index = 0 ; index < size ; index += step)
+ radix_tree_tag_set(root, index, 0);
+
+ clock_gettime(CLOCK_MONOTONIC, &finish);
+
+ nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (finish.tv_nsec - start.tv_nsec);
+
+ printv(2, "Size: %8ld, step: %8ld, order: %d, tagging: %17lld ns\n",
+ size, step, order, nsec);
+}
+
+static void benchmark_delete(struct radix_tree_root *root,
+ unsigned long size, unsigned long step, int order)
+{
+ struct timespec start, finish;
+ unsigned long index, i;
+ long long nsec;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (index = 0 ; index < size ; index += step)
+ for_each_index(i, index, order)
+ item_delete(root, i);
+
+ clock_gettime(CLOCK_MONOTONIC, &finish);
+
+ nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (finish.tv_nsec - start.tv_nsec);
+
+ printv(2, "Size: %8ld, step: %8ld, order: %d, deletion: %16lld ns\n",
+ size, step, order, nsec);
+}
+
+static void benchmark_size(unsigned long size, unsigned long step, int order)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ long long normal, tagged;
+
+ benchmark_insert(&tree, size, step, order);
+ benchmark_tagging(&tree, size, step, order);
+
+ tagged = benchmark_iter(&tree, true);
+ normal = benchmark_iter(&tree, false);
+
+ printv(2, "Size: %8ld, step: %8ld, order: %d, tagged iteration: %8lld ns\n",
+ size, step, order, tagged);
+ printv(2, "Size: %8ld, step: %8ld, order: %d, normal iteration: %8lld ns\n",
+ size, step, order, normal);
+
+ benchmark_delete(&tree, size, step, order);
+
+ item_kill_tree(&tree);
+ rcu_barrier();
+}
+
+static long long __benchmark_split(unsigned long index,
+ int old_order, int new_order)
+{
+ struct timespec start, finish;
+ long long nsec;
+ RADIX_TREE(tree, GFP_ATOMIC);
+
+ item_insert_order(&tree, index, old_order);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ radix_tree_split(&tree, index, new_order);
+ clock_gettime(CLOCK_MONOTONIC, &finish);
+ nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (finish.tv_nsec - start.tv_nsec);
+
+ item_kill_tree(&tree);
+
+ return nsec;
+
+}
+
+static void benchmark_split(unsigned long size, unsigned long step)
+{
+ int i, j, idx;
+ long long nsec = 0;
+
+
+ for (idx = 0; idx < size; idx += step) {
+ for (i = 3; i < 11; i++) {
+ for (j = 0; j < i; j++) {
+ nsec += __benchmark_split(idx, i, j);
+ }
+ }
+ }
+
+ printv(2, "Size %8ld, step %8ld, split time %10lld ns\n",
+ size, step, nsec);
+
+}
+
+static long long __benchmark_join(unsigned long index,
+ unsigned order1, unsigned order2)
+{
+ unsigned long loc;
+ struct timespec start, finish;
+ long long nsec;
+ void *item, *item2 = item_create(index + 1, order1);
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert_order(&tree, index, order2);
+ item = radix_tree_lookup(&tree, index);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ radix_tree_join(&tree, index + 1, order1, item2);
+ clock_gettime(CLOCK_MONOTONIC, &finish);
+ nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
+ (finish.tv_nsec - start.tv_nsec);
+
+ loc = find_item(&tree, item);
+ if (loc == -1)
+ free(item);
+
+ item_kill_tree(&tree);
+
+ return nsec;
+}
+
+static void benchmark_join(unsigned long step)
+{
+ int i, j, idx;
+ long long nsec = 0;
+
+ for (idx = 0; idx < 1 << 10; idx += step) {
+ for (i = 1; i < 15; i++) {
+ for (j = 0; j < i; j++) {
+ nsec += __benchmark_join(idx, i, j);
+ }
+ }
+ }
+
+ printv(2, "Size %8d, step %8ld, join time %10lld ns\n",
+ 1 << 10, step, nsec);
+}
+
+void benchmark(void)
+{
+ unsigned long size[] = {1 << 10, 1 << 20, 0};
+ unsigned long step[] = {1, 2, 7, 15, 63, 64, 65,
+ 128, 256, 512, 12345, 0};
+ int c, s;
+
+ printv(1, "starting benchmarks\n");
+ printv(1, "RADIX_TREE_MAP_SHIFT = %d\n", RADIX_TREE_MAP_SHIFT);
+
+ for (c = 0; size[c]; c++)
+ for (s = 0; step[s]; s++)
+ benchmark_size(size[c], step[s], 0);
+
+ for (c = 0; size[c]; c++)
+ for (s = 0; step[s]; s++)
+ benchmark_size(size[c], step[s] << 9, 9);
+
+ for (c = 0; size[c]; c++)
+ for (s = 0; step[s]; s++)
+ benchmark_split(size[c], step[s]);
+
+ for (s = 0; step[s]; s++)
+ benchmark_join(step[s]);
+}
diff --git a/tools/testing/radix-tree/generated/autoconf.h b/tools/testing/radix-tree/generated/autoconf.h
new file mode 100644
index 000000000..cf88dc5b8
--- /dev/null
+++ b/tools/testing/radix-tree/generated/autoconf.h
@@ -0,0 +1 @@
+#define CONFIG_RADIX_TREE_MULTIORDER 1
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
new file mode 100644
index 000000000..235eef71f
--- /dev/null
+++ b/tools/testing/radix-tree/idr-test.c
@@ -0,0 +1,512 @@
+/*
+ * idr-test.c: Test the IDR API
+ * Copyright (c) 2016 Matthew Wilcox <willy@infradead.org>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/bitmap.h>
+#include <linux/idr.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+
+#include "test.h"
+
+#define DUMMY_PTR ((void *)0x12)
+
+int item_idr_free(int id, void *p, void *data)
+{
+ struct item *item = p;
+ assert(item->index == id);
+ free(p);
+
+ return 0;
+}
+
+void item_idr_remove(struct idr *idr, int id)
+{
+ struct item *item = idr_find(idr, id);
+ assert(item->index == id);
+ idr_remove(idr, id);
+ free(item);
+}
+
+void idr_alloc_test(void)
+{
+ unsigned long i;
+ DEFINE_IDR(idr);
+
+ assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0, 0x4000, GFP_KERNEL) == 0);
+ assert(idr_alloc_cyclic(&idr, DUMMY_PTR, 0x3ffd, 0x4000, GFP_KERNEL) == 0x3ffd);
+ idr_remove(&idr, 0x3ffd);
+ idr_remove(&idr, 0);
+
+ for (i = 0x3ffe; i < 0x4003; i++) {
+ int id;
+ struct item *item;
+
+ if (i < 0x4000)
+ item = item_create(i, 0);
+ else
+ item = item_create(i - 0x3fff, 0);
+
+ id = idr_alloc_cyclic(&idr, item, 1, 0x4000, GFP_KERNEL);
+ assert(id == item->index);
+ }
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+}
+
+void idr_replace_test(void)
+{
+ DEFINE_IDR(idr);
+
+ idr_alloc(&idr, (void *)-1, 10, 11, GFP_KERNEL);
+ idr_replace(&idr, &idr, 10);
+
+ idr_destroy(&idr);
+}
+
+/*
+ * Unlike the radix tree, you can put a NULL pointer -- with care -- into
+ * the IDR. Some interfaces, like idr_find() do not distinguish between
+ * "present, value is NULL" and "not present", but that's exactly what some
+ * users want.
+ */
+void idr_null_test(void)
+{
+ int i;
+ DEFINE_IDR(idr);
+
+ assert(idr_is_empty(&idr));
+
+ assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+ assert(!idr_is_empty(&idr));
+ idr_remove(&idr, 0);
+ assert(idr_is_empty(&idr));
+
+ assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+ assert(!idr_is_empty(&idr));
+ idr_destroy(&idr);
+ assert(idr_is_empty(&idr));
+
+ for (i = 0; i < 10; i++) {
+ assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == i);
+ }
+
+ assert(idr_replace(&idr, DUMMY_PTR, 3) == NULL);
+ assert(idr_replace(&idr, DUMMY_PTR, 4) == NULL);
+ assert(idr_replace(&idr, NULL, 4) == DUMMY_PTR);
+ assert(idr_replace(&idr, DUMMY_PTR, 11) == ERR_PTR(-ENOENT));
+ idr_remove(&idr, 5);
+ assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 5);
+ idr_remove(&idr, 5);
+
+ for (i = 0; i < 9; i++) {
+ idr_remove(&idr, i);
+ assert(!idr_is_empty(&idr));
+ }
+ idr_remove(&idr, 8);
+ assert(!idr_is_empty(&idr));
+ idr_remove(&idr, 9);
+ assert(idr_is_empty(&idr));
+
+ assert(idr_alloc(&idr, NULL, 0, 0, GFP_KERNEL) == 0);
+ assert(idr_replace(&idr, DUMMY_PTR, 3) == ERR_PTR(-ENOENT));
+ assert(idr_replace(&idr, DUMMY_PTR, 0) == NULL);
+ assert(idr_replace(&idr, NULL, 0) == DUMMY_PTR);
+
+ idr_destroy(&idr);
+ assert(idr_is_empty(&idr));
+
+ for (i = 1; i < 10; i++) {
+ assert(idr_alloc(&idr, NULL, 1, 0, GFP_KERNEL) == i);
+ }
+
+ idr_destroy(&idr);
+ assert(idr_is_empty(&idr));
+}
+
+void idr_nowait_test(void)
+{
+ unsigned int i;
+ DEFINE_IDR(idr);
+
+ idr_preload(GFP_KERNEL);
+
+ for (i = 0; i < 3; i++) {
+ struct item *item = item_create(i, 0);
+ assert(idr_alloc(&idr, item, i, i + 1, GFP_NOWAIT) == i);
+ }
+
+ idr_preload_end();
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+}
+
+void idr_get_next_test(int base)
+{
+ unsigned long i;
+ int nextid;
+ DEFINE_IDR(idr);
+ idr_init_base(&idr, base);
+
+ int indices[] = {4, 7, 9, 15, 65, 128, 1000, 99999, 0};
+
+ for(i = 0; indices[i]; i++) {
+ struct item *item = item_create(indices[i], 0);
+ assert(idr_alloc(&idr, item, indices[i], indices[i+1],
+ GFP_KERNEL) == indices[i]);
+ }
+
+ for(i = 0, nextid = 0; indices[i]; i++) {
+ idr_get_next(&idr, &nextid);
+ assert(nextid == indices[i]);
+ nextid++;
+ }
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+}
+
+int idr_u32_cb(int id, void *ptr, void *data)
+{
+ BUG_ON(id < 0);
+ BUG_ON(ptr != DUMMY_PTR);
+ return 0;
+}
+
+void idr_u32_test1(struct idr *idr, u32 handle)
+{
+ static bool warned = false;
+ u32 id = handle;
+ int sid = 0;
+ void *ptr;
+
+ BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL));
+ BUG_ON(id != handle);
+ BUG_ON(idr_alloc_u32(idr, DUMMY_PTR, &id, id, GFP_KERNEL) != -ENOSPC);
+ BUG_ON(id != handle);
+ if (!warned && id > INT_MAX)
+ printk("vvv Ignore these warnings\n");
+ ptr = idr_get_next(idr, &sid);
+ if (id > INT_MAX) {
+ BUG_ON(ptr != NULL);
+ BUG_ON(sid != 0);
+ } else {
+ BUG_ON(ptr != DUMMY_PTR);
+ BUG_ON(sid != id);
+ }
+ idr_for_each(idr, idr_u32_cb, NULL);
+ if (!warned && id > INT_MAX) {
+ printk("^^^ Warnings over\n");
+ warned = true;
+ }
+ BUG_ON(idr_remove(idr, id) != DUMMY_PTR);
+ BUG_ON(!idr_is_empty(idr));
+}
+
+void idr_u32_test(int base)
+{
+ DEFINE_IDR(idr);
+ idr_init_base(&idr, base);
+ idr_u32_test1(&idr, 10);
+ idr_u32_test1(&idr, 0x7fffffff);
+ idr_u32_test1(&idr, 0x80000000);
+ idr_u32_test1(&idr, 0x80000001);
+ idr_u32_test1(&idr, 0xffe00000);
+ idr_u32_test1(&idr, 0xffffffff);
+}
+
+static inline void *idr_mk_value(unsigned long v)
+{
+ BUG_ON((long)v < 0);
+ return (void *)((v & 1) | 2 | (v << 1));
+}
+
+DEFINE_IDR(find_idr);
+
+static void *idr_throbber(void *arg)
+{
+ time_t start = time(NULL);
+ int id = *(int *)arg;
+
+ rcu_register_thread();
+ do {
+ idr_alloc(&find_idr, idr_mk_value(id), id, id + 1, GFP_KERNEL);
+ idr_remove(&find_idr, id);
+ } while (time(NULL) < start + 10);
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+void idr_find_test_1(int anchor_id, int throbber_id)
+{
+ pthread_t throbber;
+ time_t start = time(NULL);
+
+ pthread_create(&throbber, NULL, idr_throbber, &throbber_id);
+
+ BUG_ON(idr_alloc(&find_idr, idr_mk_value(anchor_id), anchor_id,
+ anchor_id + 1, GFP_KERNEL) != anchor_id);
+
+ do {
+ int id = 0;
+ void *entry = idr_get_next(&find_idr, &id);
+ BUG_ON(entry != idr_mk_value(id));
+ } while (time(NULL) < start + 11);
+
+ pthread_join(throbber, NULL);
+
+ idr_remove(&find_idr, anchor_id);
+ BUG_ON(!idr_is_empty(&find_idr));
+}
+
+void idr_find_test(void)
+{
+ idr_find_test_1(100000, 0);
+ idr_find_test_1(0, 100000);
+}
+
+void idr_checks(void)
+{
+ unsigned long i;
+ DEFINE_IDR(idr);
+
+ for (i = 0; i < 10000; i++) {
+ struct item *item = item_create(i, 0);
+ assert(idr_alloc(&idr, item, 0, 20000, GFP_KERNEL) == i);
+ }
+
+ assert(idr_alloc(&idr, DUMMY_PTR, 5, 30, GFP_KERNEL) < 0);
+
+ for (i = 0; i < 5000; i++)
+ item_idr_remove(&idr, i);
+
+ idr_remove(&idr, 3);
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+
+ assert(idr_is_empty(&idr));
+
+ idr_remove(&idr, 3);
+ idr_remove(&idr, 0);
+
+ assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == 0);
+ idr_remove(&idr, 1);
+ for (i = 1; i < RADIX_TREE_MAP_SIZE; i++)
+ assert(idr_alloc(&idr, DUMMY_PTR, 0, 0, GFP_KERNEL) == i);
+ idr_remove(&idr, 1 << 30);
+ idr_destroy(&idr);
+
+ for (i = INT_MAX - 3UL; i < INT_MAX + 1UL; i++) {
+ struct item *item = item_create(i, 0);
+ assert(idr_alloc(&idr, item, i, i + 10, GFP_KERNEL) == i);
+ }
+ assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i, GFP_KERNEL) == -ENOSPC);
+ assert(idr_alloc(&idr, DUMMY_PTR, i - 2, i + 10, GFP_KERNEL) == -ENOSPC);
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+ idr_destroy(&idr);
+
+ assert(idr_is_empty(&idr));
+
+ idr_set_cursor(&idr, INT_MAX - 3UL);
+ for (i = INT_MAX - 3UL; i < INT_MAX + 3UL; i++) {
+ struct item *item;
+ unsigned int id;
+ if (i <= INT_MAX)
+ item = item_create(i, 0);
+ else
+ item = item_create(i - INT_MAX - 1, 0);
+
+ id = idr_alloc_cyclic(&idr, item, 0, 0, GFP_KERNEL);
+ assert(id == item->index);
+ }
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+ assert(idr_is_empty(&idr));
+
+ for (i = 1; i < 10000; i++) {
+ struct item *item = item_create(i, 0);
+ assert(idr_alloc(&idr, item, 1, 20000, GFP_KERNEL) == i);
+ }
+
+ idr_for_each(&idr, item_idr_free, &idr);
+ idr_destroy(&idr);
+
+ idr_replace_test();
+ idr_alloc_test();
+ idr_null_test();
+ idr_nowait_test();
+ idr_get_next_test(0);
+ idr_get_next_test(1);
+ idr_get_next_test(4);
+ idr_u32_test(4);
+ idr_u32_test(1);
+ idr_u32_test(0);
+ idr_find_test();
+}
+
+#define module_init(x)
+#define module_exit(x)
+#define MODULE_AUTHOR(x)
+#define MODULE_LICENSE(x)
+#define dump_stack() assert(0)
+void ida_dump(struct ida *);
+
+#include "../../../lib/test_ida.c"
+
+/*
+ * Check that we get the correct error when we run out of memory doing
+ * allocations. In userspace, GFP_NOWAIT will always fail an allocation.
+ * The first test is for not having a bitmap available, and the second test
+ * is for not being able to allocate a level of the radix tree.
+ */
+void ida_check_nomem(void)
+{
+ DEFINE_IDA(ida);
+ int id;
+
+ id = ida_alloc_min(&ida, 256, GFP_NOWAIT);
+ IDA_BUG_ON(&ida, id != -ENOMEM);
+ id = ida_alloc_min(&ida, 1UL << 30, GFP_NOWAIT);
+ IDA_BUG_ON(&ida, id != -ENOMEM);
+ IDA_BUG_ON(&ida, !ida_is_empty(&ida));
+}
+
+/*
+ * Check handling of conversions between exceptional entries and full bitmaps.
+ */
+void ida_check_conv_user(void)
+{
+ DEFINE_IDA(ida);
+ unsigned long i;
+
+ radix_tree_cpu_dead(1);
+ for (i = 0; i < 1000000; i++) {
+ int id = ida_alloc(&ida, GFP_NOWAIT);
+ if (id == -ENOMEM) {
+ IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) !=
+ BITS_PER_LONG - 2);
+ id = ida_alloc(&ida, GFP_KERNEL);
+ } else {
+ IDA_BUG_ON(&ida, (i % IDA_BITMAP_BITS) ==
+ BITS_PER_LONG - 2);
+ }
+ IDA_BUG_ON(&ida, id != i);
+ }
+ ida_destroy(&ida);
+}
+
+void ida_check_random(void)
+{
+ DEFINE_IDA(ida);
+ DECLARE_BITMAP(bitmap, 2048);
+ unsigned int i;
+ time_t s = time(NULL);
+
+ repeat:
+ memset(bitmap, 0, sizeof(bitmap));
+ for (i = 0; i < 100000; i++) {
+ int i = rand();
+ int bit = i & 2047;
+ if (test_bit(bit, bitmap)) {
+ __clear_bit(bit, bitmap);
+ ida_free(&ida, bit);
+ } else {
+ __set_bit(bit, bitmap);
+ IDA_BUG_ON(&ida, ida_alloc_min(&ida, bit, GFP_KERNEL)
+ != bit);
+ }
+ }
+ ida_destroy(&ida);
+ if (time(NULL) < s + 10)
+ goto repeat;
+}
+
+void ida_simple_get_remove_test(void)
+{
+ DEFINE_IDA(ida);
+ unsigned long i;
+
+ for (i = 0; i < 10000; i++) {
+ assert(ida_simple_get(&ida, 0, 20000, GFP_KERNEL) == i);
+ }
+ assert(ida_simple_get(&ida, 5, 30, GFP_KERNEL) < 0);
+
+ for (i = 0; i < 10000; i++) {
+ ida_simple_remove(&ida, i);
+ }
+ assert(ida_is_empty(&ida));
+
+ ida_destroy(&ida);
+}
+
+void user_ida_checks(void)
+{
+ radix_tree_cpu_dead(1);
+
+ ida_check_nomem();
+ ida_check_conv_user();
+ ida_check_random();
+ ida_simple_get_remove_test();
+
+ radix_tree_cpu_dead(1);
+}
+
+static void *ida_random_fn(void *arg)
+{
+ rcu_register_thread();
+ ida_check_random();
+ rcu_unregister_thread();
+ return NULL;
+}
+
+void ida_thread_tests(void)
+{
+ pthread_t threads[20];
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(threads); i++)
+ if (pthread_create(&threads[i], NULL, ida_random_fn, NULL)) {
+ perror("creating ida thread");
+ exit(1);
+ }
+
+ while (i--)
+ pthread_join(threads[i], NULL);
+}
+
+void ida_tests(void)
+{
+ user_ida_checks();
+ ida_checks();
+ ida_exit();
+ ida_thread_tests();
+}
+
+int __weak main(void)
+{
+ radix_tree_init();
+ idr_checks();
+ ida_tests();
+ radix_tree_cpu_dead(1);
+ rcu_barrier();
+ if (nr_allocated)
+ printf("nr_allocated = %d\n", nr_allocated);
+ return 0;
+}
diff --git a/tools/testing/radix-tree/iteration_check.c b/tools/testing/radix-tree/iteration_check.c
new file mode 100644
index 000000000..a92bab513
--- /dev/null
+++ b/tools/testing/radix-tree/iteration_check.c
@@ -0,0 +1,221 @@
+/*
+ * iteration_check.c: test races having to do with radix tree iteration
+ * Copyright (c) 2016 Intel Corporation
+ * Author: Ross Zwisler <ross.zwisler@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/radix-tree.h>
+#include <pthread.h>
+#include "test.h"
+
+#define NUM_THREADS 5
+#define MAX_IDX 100
+#define TAG 0
+#define NEW_TAG 1
+
+static pthread_mutex_t tree_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_t threads[NUM_THREADS];
+static unsigned int seeds[3];
+static RADIX_TREE(tree, GFP_KERNEL);
+static bool test_complete;
+static int max_order;
+
+/* relentlessly fill the tree with tagged entries */
+static void *add_entries_fn(void *arg)
+{
+ rcu_register_thread();
+
+ while (!test_complete) {
+ unsigned long pgoff;
+ int order;
+
+ for (pgoff = 0; pgoff < MAX_IDX; pgoff++) {
+ pthread_mutex_lock(&tree_lock);
+ for (order = max_order; order >= 0; order--) {
+ if (item_insert_order(&tree, pgoff, order)
+ == 0) {
+ item_tag_set(&tree, pgoff, TAG);
+ break;
+ }
+ }
+ pthread_mutex_unlock(&tree_lock);
+ }
+ }
+
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+/*
+ * Iterate over the tagged entries, doing a radix_tree_iter_retry() as we find
+ * things that have been removed and randomly resetting our iteration to the
+ * next chunk with radix_tree_iter_resume(). Both radix_tree_iter_retry() and
+ * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
+ * NULL 'slot' variable.
+ */
+static void *tagged_iteration_fn(void *arg)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+
+ rcu_register_thread();
+
+ while (!test_complete) {
+ rcu_read_lock();
+ radix_tree_for_each_tagged(slot, &tree, &iter, 0, TAG) {
+ void *entry = radix_tree_deref_slot(slot);
+ if (unlikely(!entry))
+ continue;
+
+ if (radix_tree_deref_retry(entry)) {
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+
+ if (rand_r(&seeds[0]) % 50 == 0) {
+ slot = radix_tree_iter_resume(slot, &iter);
+ rcu_read_unlock();
+ rcu_barrier();
+ rcu_read_lock();
+ }
+ }
+ rcu_read_unlock();
+ }
+
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+/*
+ * Iterate over the entries, doing a radix_tree_iter_retry() as we find things
+ * that have been removed and randomly resetting our iteration to the next
+ * chunk with radix_tree_iter_resume(). Both radix_tree_iter_retry() and
+ * radix_tree_iter_resume() cause radix_tree_next_slot() to be called with a
+ * NULL 'slot' variable.
+ */
+static void *untagged_iteration_fn(void *arg)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+
+ rcu_register_thread();
+
+ while (!test_complete) {
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ void *entry = radix_tree_deref_slot(slot);
+ if (unlikely(!entry))
+ continue;
+
+ if (radix_tree_deref_retry(entry)) {
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+
+ if (rand_r(&seeds[1]) % 50 == 0) {
+ slot = radix_tree_iter_resume(slot, &iter);
+ rcu_read_unlock();
+ rcu_barrier();
+ rcu_read_lock();
+ }
+ }
+ rcu_read_unlock();
+ }
+
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+/*
+ * Randomly remove entries to help induce radix_tree_iter_retry() calls in the
+ * two iteration functions.
+ */
+static void *remove_entries_fn(void *arg)
+{
+ rcu_register_thread();
+
+ while (!test_complete) {
+ int pgoff;
+
+ pgoff = rand_r(&seeds[2]) % MAX_IDX;
+
+ pthread_mutex_lock(&tree_lock);
+ item_delete(&tree, pgoff);
+ pthread_mutex_unlock(&tree_lock);
+ }
+
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+static void *tag_entries_fn(void *arg)
+{
+ rcu_register_thread();
+
+ while (!test_complete) {
+ tag_tagged_items(&tree, &tree_lock, 0, MAX_IDX, 10, TAG,
+ NEW_TAG);
+ }
+ rcu_unregister_thread();
+ return NULL;
+}
+
+/* This is a unit test for a bug found by the syzkaller tester */
+void iteration_test(unsigned order, unsigned test_duration)
+{
+ int i;
+
+ printv(1, "Running %siteration tests for %d seconds\n",
+ order > 0 ? "multiorder " : "", test_duration);
+
+ max_order = order;
+ test_complete = false;
+
+ for (i = 0; i < 3; i++)
+ seeds[i] = rand();
+
+ if (pthread_create(&threads[0], NULL, tagged_iteration_fn, NULL)) {
+ perror("create tagged iteration thread");
+ exit(1);
+ }
+ if (pthread_create(&threads[1], NULL, untagged_iteration_fn, NULL)) {
+ perror("create untagged iteration thread");
+ exit(1);
+ }
+ if (pthread_create(&threads[2], NULL, add_entries_fn, NULL)) {
+ perror("create add entry thread");
+ exit(1);
+ }
+ if (pthread_create(&threads[3], NULL, remove_entries_fn, NULL)) {
+ perror("create remove entry thread");
+ exit(1);
+ }
+ if (pthread_create(&threads[4], NULL, tag_entries_fn, NULL)) {
+ perror("create tag entry thread");
+ exit(1);
+ }
+
+ sleep(test_duration);
+ test_complete = true;
+
+ for (i = 0; i < NUM_THREADS; i++) {
+ if (pthread_join(threads[i], NULL)) {
+ perror("pthread_join");
+ exit(1);
+ }
+ }
+
+ item_kill_tree(&tree);
+}
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
new file mode 100644
index 000000000..44a0d1ad4
--- /dev/null
+++ b/tools/testing/radix-tree/linux.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include <linux/gfp.h>
+#include <linux/poison.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <urcu/uatomic.h>
+
+int nr_allocated;
+int preempt_count;
+int kmalloc_verbose;
+int test_verbose;
+
+struct kmem_cache {
+ pthread_mutex_t lock;
+ int size;
+ int nr_objs;
+ void *objs;
+ void (*ctor)(void *);
+};
+
+void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
+{
+ struct radix_tree_node *node;
+
+ if (!(flags & __GFP_DIRECT_RECLAIM))
+ return NULL;
+
+ pthread_mutex_lock(&cachep->lock);
+ if (cachep->nr_objs) {
+ cachep->nr_objs--;
+ node = cachep->objs;
+ cachep->objs = node->parent;
+ pthread_mutex_unlock(&cachep->lock);
+ node->parent = NULL;
+ } else {
+ pthread_mutex_unlock(&cachep->lock);
+ node = malloc(cachep->size);
+ if (cachep->ctor)
+ cachep->ctor(node);
+ }
+
+ uatomic_inc(&nr_allocated);
+ if (kmalloc_verbose)
+ printf("Allocating %p from slab\n", node);
+ return node;
+}
+
+void kmem_cache_free(struct kmem_cache *cachep, void *objp)
+{
+ assert(objp);
+ uatomic_dec(&nr_allocated);
+ if (kmalloc_verbose)
+ printf("Freeing %p to slab\n", objp);
+ pthread_mutex_lock(&cachep->lock);
+ if (cachep->nr_objs > 10) {
+ memset(objp, POISON_FREE, cachep->size);
+ free(objp);
+ } else {
+ struct radix_tree_node *node = objp;
+ cachep->nr_objs++;
+ node->parent = cachep->objs;
+ cachep->objs = node;
+ }
+ pthread_mutex_unlock(&cachep->lock);
+}
+
+void *kmalloc(size_t size, gfp_t gfp)
+{
+ void *ret;
+
+ if (!(gfp & __GFP_DIRECT_RECLAIM))
+ return NULL;
+
+ ret = malloc(size);
+ uatomic_inc(&nr_allocated);
+ if (kmalloc_verbose)
+ printf("Allocating %p from malloc\n", ret);
+ if (gfp & __GFP_ZERO)
+ memset(ret, 0, size);
+ return ret;
+}
+
+void kfree(void *p)
+{
+ if (!p)
+ return;
+ uatomic_dec(&nr_allocated);
+ if (kmalloc_verbose)
+ printf("Freeing %p to malloc\n", p);
+ free(p);
+}
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t offset,
+ unsigned long flags, void (*ctor)(void *))
+{
+ struct kmem_cache *ret = malloc(sizeof(*ret));
+
+ pthread_mutex_init(&ret->lock, NULL);
+ ret->size = size;
+ ret->nr_objs = 0;
+ ret->objs = NULL;
+ ret->ctor = ctor;
+ return ret;
+}
diff --git a/tools/testing/radix-tree/linux/bug.h b/tools/testing/radix-tree/linux/bug.h
new file mode 100644
index 000000000..23b8ed52f
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bug.h
@@ -0,0 +1 @@
+#include "asm/bug.h"
diff --git a/tools/testing/radix-tree/linux/compiler_types.h b/tools/testing/radix-tree/linux/compiler_types.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/radix-tree/linux/compiler_types.h
diff --git a/tools/testing/radix-tree/linux/cpu.h b/tools/testing/radix-tree/linux/cpu.h
new file mode 100644
index 000000000..a45530d78
--- /dev/null
+++ b/tools/testing/radix-tree/linux/cpu.h
@@ -0,0 +1 @@
+#define cpuhp_setup_state_nocalls(a, b, c, d) (0)
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
new file mode 100644
index 000000000..32159c08a
--- /dev/null
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _GFP_H
+#define _GFP_H
+
+#include <linux/types.h>
+
+#define __GFP_BITS_SHIFT 26
+#define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+
+#define __GFP_HIGH 0x20u
+#define __GFP_IO 0x40u
+#define __GFP_FS 0x80u
+#define __GFP_NOWARN 0x200u
+#define __GFP_ZERO 0x8000u
+#define __GFP_ATOMIC 0x80000u
+#define __GFP_ACCOUNT 0x100000u
+#define __GFP_DIRECT_RECLAIM 0x400000u
+#define __GFP_KSWAPD_RECLAIM 0x2000000u
+
+#define __GFP_RECLAIM (__GFP_DIRECT_RECLAIM|__GFP_KSWAPD_RECLAIM)
+
+#define GFP_ZONEMASK 0x0fu
+#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM)
+
+
+static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags)
+{
+ return !!(gfp_flags & __GFP_DIRECT_RECLAIM);
+}
+
+#endif
diff --git a/tools/testing/radix-tree/linux/idr.h b/tools/testing/radix-tree/linux/idr.h
new file mode 100644
index 000000000..4e342f2e3
--- /dev/null
+++ b/tools/testing/radix-tree/linux/idr.h
@@ -0,0 +1 @@
+#include "../../../../include/linux/idr.h"
diff --git a/tools/testing/radix-tree/linux/init.h b/tools/testing/radix-tree/linux/init.h
new file mode 100644
index 000000000..1bb0afc21
--- /dev/null
+++ b/tools/testing/radix-tree/linux/init.h
@@ -0,0 +1 @@
+#define __init
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
new file mode 100644
index 000000000..426f32f28
--- /dev/null
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _KERNEL_H
+#define _KERNEL_H
+
+#include "../../include/linux/kernel.h"
+#include <string.h>
+#include <stdio.h>
+#include <limits.h>
+
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+#include "../../../include/linux/kconfig.h"
+
+#define printk printf
+#define pr_debug printk
+#define pr_cont printk
+
+#endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/kmemleak.h b/tools/testing/radix-tree/linux/kmemleak.h
new file mode 100644
index 000000000..155f11278
--- /dev/null
+++ b/tools/testing/radix-tree/linux/kmemleak.h
@@ -0,0 +1 @@
+static inline void kmemleak_update_trace(const void *ptr) { }
diff --git a/tools/testing/radix-tree/linux/percpu.h b/tools/testing/radix-tree/linux/percpu.h
new file mode 100644
index 000000000..b2403aa74
--- /dev/null
+++ b/tools/testing/radix-tree/linux/percpu.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define DECLARE_PER_CPU(type, val) extern type val
+#define DEFINE_PER_CPU(type, val) type val
+
+#define __get_cpu_var(var) var
+#define this_cpu_ptr(var) var
+#define this_cpu_read(var) var
+#define this_cpu_xchg(var, val) uatomic_xchg(&var, val)
+#define this_cpu_cmpxchg(var, old, new) uatomic_cmpxchg(&var, old, new)
+#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+#define per_cpu(var, cpu) (*per_cpu_ptr(&(var), cpu))
diff --git a/tools/testing/radix-tree/linux/preempt.h b/tools/testing/radix-tree/linux/preempt.h
new file mode 100644
index 000000000..edb10302b
--- /dev/null
+++ b/tools/testing/radix-tree/linux/preempt.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_PREEMPT_H
+#define __LINUX_PREEMPT_H
+
+extern int preempt_count;
+
+#define preempt_disable() uatomic_inc(&preempt_count)
+#define preempt_enable() uatomic_dec(&preempt_count)
+
+static inline int in_interrupt(void)
+{
+ return 0;
+}
+
+#endif /* __LINUX_PREEMPT_H */
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
new file mode 100644
index 000000000..24f13d27a
--- /dev/null
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TEST_RADIX_TREE_H
+#define _TEST_RADIX_TREE_H
+
+#include "generated/map-shift.h"
+#include "../../../../include/linux/radix-tree.h"
+
+extern int kmalloc_verbose;
+extern int test_verbose;
+
+static inline void trace_call_rcu(struct rcu_head *head,
+ void (*func)(struct rcu_head *head))
+{
+ if (kmalloc_verbose)
+ printf("Delaying free of %p to slab\n", (char *)head -
+ offsetof(struct radix_tree_node, rcu_head));
+ call_rcu(head, func);
+}
+
+#define printv(verbosity_level, fmt, ...) \
+ if(test_verbose >= verbosity_level) \
+ printf(fmt, ##__VA_ARGS__)
+
+#undef call_rcu
+#define call_rcu(x, y) trace_call_rcu(x, y)
+
+#endif /* _TEST_RADIX_TREE_H */
diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h
new file mode 100644
index 000000000..73ed33658
--- /dev/null
+++ b/tools/testing/radix-tree/linux/rcupdate.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _RCUPDATE_H
+#define _RCUPDATE_H
+
+#include <urcu.h>
+
+#define rcu_dereference_raw(p) rcu_dereference(p)
+#define rcu_dereference_protected(p, cond) rcu_dereference(p)
+
+#endif
diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h
new file mode 100644
index 000000000..a037def0d
--- /dev/null
+++ b/tools/testing/radix-tree/linux/slab.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef SLAB_H
+#define SLAB_H
+
+#include <linux/types.h>
+#include <linux/gfp.h>
+
+#define SLAB_HWCACHE_ALIGN 1
+#define SLAB_PANIC 2
+#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
+
+void *kmalloc(size_t size, gfp_t);
+void kfree(void *);
+
+static inline void *kzalloc(size_t size, gfp_t gfp)
+{
+ return kmalloc(size, gfp | __GFP_ZERO);
+}
+
+void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
+void kmem_cache_free(struct kmem_cache *cachep, void *objp);
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t offset,
+ unsigned long flags, void (*ctor)(void *));
+
+#endif /* SLAB_H */
diff --git a/tools/testing/radix-tree/linux/xarray.h b/tools/testing/radix-tree/linux/xarray.h
new file mode 100644
index 000000000..df3812cda
--- /dev/null
+++ b/tools/testing/radix-tree/linux/xarray.h
@@ -0,0 +1,2 @@
+#include "generated/map-shift.h"
+#include "../../../../include/linux/xarray.h"
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
new file mode 100644
index 000000000..b741686e5
--- /dev/null
+++ b/tools/testing/radix-tree/main.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <assert.h>
+#include <limits.h>
+
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+
+#include "test.h"
+#include "regression.h"
+
+void __gang_check(unsigned long middle, long down, long up, int chunk, int hop)
+{
+ long idx;
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ middle = 1 << 30;
+
+ for (idx = -down; idx < up; idx++)
+ item_insert(&tree, middle + idx);
+
+ item_check_absent(&tree, middle - down - 1);
+ for (idx = -down; idx < up; idx++)
+ item_check_present(&tree, middle + idx);
+ item_check_absent(&tree, middle + up);
+
+ if (chunk > 0) {
+ item_gang_check_present(&tree, middle - down, up + down,
+ chunk, hop);
+ item_full_scan(&tree, middle - down, down + up, chunk);
+ }
+ item_kill_tree(&tree);
+}
+
+void gang_check(void)
+{
+ __gang_check(1UL << 30, 128, 128, 35, 2);
+ __gang_check(1UL << 31, 128, 128, 32, 32);
+ __gang_check(1UL << 31, 128, 128, 32, 100);
+ __gang_check(1UL << 31, 128, 128, 17, 7);
+ __gang_check(0xffff0000UL, 0, 65536, 17, 7);
+ __gang_check(0xfffffffeUL, 1, 1, 17, 7);
+}
+
+void __big_gang_check(void)
+{
+ unsigned long start;
+ int wrapped = 0;
+
+ start = 0;
+ do {
+ unsigned long old_start;
+
+// printf("0x%08lx\n", start);
+ __gang_check(start, rand() % 113 + 1, rand() % 71,
+ rand() % 157, rand() % 91 + 1);
+ old_start = start;
+ start += rand() % 1000000;
+ start %= 1ULL << 33;
+ if (start < old_start)
+ wrapped = 1;
+ } while (!wrapped);
+}
+
+void big_gang_check(bool long_run)
+{
+ int i;
+
+ for (i = 0; i < (long_run ? 1000 : 3); i++) {
+ __big_gang_check();
+ printv(2, "%d ", i);
+ fflush(stdout);
+ }
+}
+
+void add_and_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert(&tree, 44);
+ item_check_present(&tree, 44);
+ item_check_absent(&tree, 43);
+ item_kill_tree(&tree);
+}
+
+void dynamic_height_check(void)
+{
+ int i;
+ RADIX_TREE(tree, GFP_KERNEL);
+ tree_verify_min_height(&tree, 0);
+
+ item_insert(&tree, 42);
+ tree_verify_min_height(&tree, 42);
+
+ item_insert(&tree, 1000000);
+ tree_verify_min_height(&tree, 1000000);
+
+ assert(item_delete(&tree, 1000000));
+ tree_verify_min_height(&tree, 42);
+
+ assert(item_delete(&tree, 42));
+ tree_verify_min_height(&tree, 0);
+
+ for (i = 0; i < 1000; i++) {
+ item_insert(&tree, i);
+ tree_verify_min_height(&tree, i);
+ }
+
+ i--;
+ for (;;) {
+ assert(item_delete(&tree, i));
+ if (i == 0) {
+ tree_verify_min_height(&tree, 0);
+ break;
+ }
+ i--;
+ tree_verify_min_height(&tree, i);
+ }
+
+ item_kill_tree(&tree);
+}
+
+void check_copied_tags(struct radix_tree_root *tree, unsigned long start, unsigned long end, unsigned long *idx, int count, int fromtag, int totag)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+/* if (i % 1000 == 0)
+ putchar('.'); */
+ if (idx[i] < start || idx[i] > end) {
+ if (item_tag_get(tree, idx[i], totag)) {
+ printv(2, "%lu-%lu: %lu, tags %d-%d\n", start,
+ end, idx[i], item_tag_get(tree, idx[i],
+ fromtag),
+ item_tag_get(tree, idx[i], totag));
+ }
+ assert(!item_tag_get(tree, idx[i], totag));
+ continue;
+ }
+ if (item_tag_get(tree, idx[i], fromtag) ^
+ item_tag_get(tree, idx[i], totag)) {
+ printv(2, "%lu-%lu: %lu, tags %d-%d\n", start, end,
+ idx[i], item_tag_get(tree, idx[i], fromtag),
+ item_tag_get(tree, idx[i], totag));
+ }
+ assert(!(item_tag_get(tree, idx[i], fromtag) ^
+ item_tag_get(tree, idx[i], totag)));
+ }
+}
+
+#define ITEMS 50000
+
+void copy_tag_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ unsigned long idx[ITEMS];
+ unsigned long start, end, count = 0, tagged, cur, tmp;
+ int i;
+
+// printf("generating radix tree indices...\n");
+ start = rand();
+ end = rand();
+ if (start > end && (rand() % 10)) {
+ cur = start;
+ start = end;
+ end = cur;
+ }
+ /* Specifically create items around the start and the end of the range
+ * with high probability to check for off by one errors */
+ cur = rand();
+ if (cur & 1) {
+ item_insert(&tree, start);
+ if (cur & 2) {
+ if (start <= end)
+ count++;
+ item_tag_set(&tree, start, 0);
+ }
+ }
+ if (cur & 4) {
+ item_insert(&tree, start-1);
+ if (cur & 8)
+ item_tag_set(&tree, start-1, 0);
+ }
+ if (cur & 16) {
+ item_insert(&tree, end);
+ if (cur & 32) {
+ if (start <= end)
+ count++;
+ item_tag_set(&tree, end, 0);
+ }
+ }
+ if (cur & 64) {
+ item_insert(&tree, end+1);
+ if (cur & 128)
+ item_tag_set(&tree, end+1, 0);
+ }
+
+ for (i = 0; i < ITEMS; i++) {
+ do {
+ idx[i] = rand();
+ } while (item_lookup(&tree, idx[i]));
+
+ item_insert(&tree, idx[i]);
+ if (rand() & 1) {
+ item_tag_set(&tree, idx[i], 0);
+ if (idx[i] >= start && idx[i] <= end)
+ count++;
+ }
+/* if (i % 1000 == 0)
+ putchar('.'); */
+ }
+
+// printf("\ncopying tags...\n");
+ tagged = tag_tagged_items(&tree, NULL, start, end, ITEMS, 0, 1);
+
+// printf("checking copied tags\n");
+ assert(tagged == count);
+ check_copied_tags(&tree, start, end, idx, ITEMS, 0, 1);
+
+ /* Copy tags in several rounds */
+// printf("\ncopying tags...\n");
+ tmp = rand() % (count / 10 + 2);
+ tagged = tag_tagged_items(&tree, NULL, start, end, tmp, 0, 2);
+ assert(tagged == count);
+
+// printf("%lu %lu %lu\n", tagged, tmp, count);
+// printf("checking copied tags\n");
+ check_copied_tags(&tree, start, end, idx, ITEMS, 0, 2);
+ verify_tag_consistency(&tree, 0);
+ verify_tag_consistency(&tree, 1);
+ verify_tag_consistency(&tree, 2);
+// printf("\n");
+ item_kill_tree(&tree);
+}
+
+static void __locate_check(struct radix_tree_root *tree, unsigned long index,
+ unsigned order)
+{
+ struct item *item;
+ unsigned long index2;
+
+ item_insert_order(tree, index, order);
+ item = item_lookup(tree, index);
+ index2 = find_item(tree, item);
+ if (index != index2) {
+ printv(2, "index %ld order %d inserted; found %ld\n",
+ index, order, index2);
+ abort();
+ }
+}
+
+static void __order_0_locate_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ int i;
+
+ for (i = 0; i < 50; i++)
+ __locate_check(&tree, rand() % INT_MAX, 0);
+
+ item_kill_tree(&tree);
+}
+
+static void locate_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ unsigned order;
+ unsigned long offset, index;
+
+ __order_0_locate_check();
+
+ for (order = 0; order < 20; order++) {
+ for (offset = 0; offset < (1 << (order + 3));
+ offset += (1UL << order)) {
+ for (index = 0; index < (1UL << (order + 5));
+ index += (1UL << order)) {
+ __locate_check(&tree, index + offset, order);
+ }
+ if (find_item(&tree, &tree) != -1)
+ abort();
+
+ item_kill_tree(&tree);
+ }
+ }
+
+ if (find_item(&tree, &tree) != -1)
+ abort();
+ __locate_check(&tree, -1, 0);
+ if (find_item(&tree, &tree) != -1)
+ abort();
+ item_kill_tree(&tree);
+}
+
+static void single_thread_tests(bool long_run)
+{
+ int i;
+
+ printv(1, "starting single_thread_tests: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ multiorder_checks();
+ rcu_barrier();
+ printv(2, "after multiorder_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ locate_check();
+ rcu_barrier();
+ printv(2, "after locate_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ tag_check();
+ rcu_barrier();
+ printv(2, "after tag_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ gang_check();
+ rcu_barrier();
+ printv(2, "after gang_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ add_and_check();
+ rcu_barrier();
+ printv(2, "after add_and_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ dynamic_height_check();
+ rcu_barrier();
+ printv(2, "after dynamic_height_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ idr_checks();
+ ida_tests();
+ rcu_barrier();
+ printv(2, "after idr_checks: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ big_gang_check(long_run);
+ rcu_barrier();
+ printv(2, "after big_gang_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ for (i = 0; i < (long_run ? 2000 : 3); i++) {
+ copy_tag_check();
+ printv(2, "%d ", i);
+ fflush(stdout);
+ }
+ rcu_barrier();
+ printv(2, "after copy_tag_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+}
+
+int main(int argc, char **argv)
+{
+ bool long_run = false;
+ int opt;
+ unsigned int seed = time(NULL);
+
+ while ((opt = getopt(argc, argv, "ls:v")) != -1) {
+ if (opt == 'l')
+ long_run = true;
+ else if (opt == 's')
+ seed = strtoul(optarg, NULL, 0);
+ else if (opt == 'v')
+ test_verbose++;
+ }
+
+ printf("random seed %u\n", seed);
+ srand(seed);
+
+ printf("running tests\n");
+
+ rcu_register_thread();
+ radix_tree_init();
+
+ regression1_test();
+ regression2_test();
+ regression3_test();
+ iteration_test(0, 10 + 90 * long_run);
+ iteration_test(7, 10 + 90 * long_run);
+ single_thread_tests(long_run);
+
+ /* Free any remaining preallocated nodes */
+ radix_tree_cpu_dead(0);
+
+ benchmark();
+
+ rcu_barrier();
+ printv(2, "after rcu_barrier: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
+ rcu_unregister_thread();
+
+ printf("tests completed\n");
+
+ exit(0);
+}
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
new file mode 100644
index 000000000..7bf405638
--- /dev/null
+++ b/tools/testing/radix-tree/multiorder.c
@@ -0,0 +1,719 @@
+/*
+ * multiorder.c: Multi-order radix tree entry testing
+ * Copyright (c) 2016 Intel Corporation
+ * Author: Ross Zwisler <ross.zwisler@linux.intel.com>
+ * Author: Matthew Wilcox <matthew.r.wilcox@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/radix-tree.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <pthread.h>
+
+#include "test.h"
+
+#define for_each_index(i, base, order) \
+ for (i = base; i < base + (1 << order); i++)
+
+static void __multiorder_tag_test(int index, int order)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ int base, err, i;
+
+ /* our canonical entry */
+ base = index & ~((1 << order) - 1);
+
+ printv(2, "Multiorder tag test with index %d, canonical entry %d\n",
+ index, base);
+
+ err = item_insert_order(&tree, index, order);
+ assert(!err);
+
+ /*
+ * Verify we get collisions for covered indices. We try and fail to
+ * insert an exceptional entry so we don't leak memory via
+ * item_insert_order().
+ */
+ for_each_index(i, base, order) {
+ err = __radix_tree_insert(&tree, i, order,
+ (void *)(0xA0 | RADIX_TREE_EXCEPTIONAL_ENTRY));
+ assert(err == -EEXIST);
+ }
+
+ for_each_index(i, base, order) {
+ assert(!radix_tree_tag_get(&tree, i, 0));
+ assert(!radix_tree_tag_get(&tree, i, 1));
+ }
+
+ assert(radix_tree_tag_set(&tree, index, 0));
+
+ for_each_index(i, base, order) {
+ assert(radix_tree_tag_get(&tree, i, 0));
+ assert(!radix_tree_tag_get(&tree, i, 1));
+ }
+
+ assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 1);
+ assert(radix_tree_tag_clear(&tree, index, 0));
+
+ for_each_index(i, base, order) {
+ assert(!radix_tree_tag_get(&tree, i, 0));
+ assert(radix_tree_tag_get(&tree, i, 1));
+ }
+
+ assert(radix_tree_tag_clear(&tree, index, 1));
+
+ assert(!radix_tree_tagged(&tree, 0));
+ assert(!radix_tree_tagged(&tree, 1));
+
+ item_kill_tree(&tree);
+}
+
+static void __multiorder_tag_test2(unsigned order, unsigned long index2)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ unsigned long index = (1 << order);
+ index2 += index;
+
+ assert(item_insert_order(&tree, 0, order) == 0);
+ assert(item_insert(&tree, index2) == 0);
+
+ assert(radix_tree_tag_set(&tree, 0, 0));
+ assert(radix_tree_tag_set(&tree, index2, 0));
+
+ assert(tag_tagged_items(&tree, NULL, 0, ~0UL, 10, 0, 1) == 2);
+
+ item_kill_tree(&tree);
+}
+
+static void multiorder_tag_tests(void)
+{
+ int i, j;
+
+ /* test multi-order entry for indices 0-7 with no sibling pointers */
+ __multiorder_tag_test(0, 3);
+ __multiorder_tag_test(5, 3);
+
+ /* test multi-order entry for indices 8-15 with no sibling pointers */
+ __multiorder_tag_test(8, 3);
+ __multiorder_tag_test(15, 3);
+
+ /*
+ * Our order 5 entry covers indices 0-31 in a tree with height=2.
+ * This is broken up as follows:
+ * 0-7: canonical entry
+ * 8-15: sibling 1
+ * 16-23: sibling 2
+ * 24-31: sibling 3
+ */
+ __multiorder_tag_test(0, 5);
+ __multiorder_tag_test(29, 5);
+
+ /* same test, but with indices 32-63 */
+ __multiorder_tag_test(32, 5);
+ __multiorder_tag_test(44, 5);
+
+ /*
+ * Our order 8 entry covers indices 0-255 in a tree with height=3.
+ * This is broken up as follows:
+ * 0-63: canonical entry
+ * 64-127: sibling 1
+ * 128-191: sibling 2
+ * 192-255: sibling 3
+ */
+ __multiorder_tag_test(0, 8);
+ __multiorder_tag_test(190, 8);
+
+ /* same test, but with indices 256-511 */
+ __multiorder_tag_test(256, 8);
+ __multiorder_tag_test(300, 8);
+
+ __multiorder_tag_test(0x12345678UL, 8);
+
+ for (i = 1; i < 10; i++)
+ for (j = 0; j < (10 << i); j++)
+ __multiorder_tag_test2(i, j);
+}
+
+static void multiorder_check(unsigned long index, int order)
+{
+ unsigned long i;
+ unsigned long min = index & ~((1UL << order) - 1);
+ unsigned long max = min + (1UL << order);
+ void **slot;
+ struct item *item2 = item_create(min, order);
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ printv(2, "Multiorder index %ld, order %d\n", index, order);
+
+ assert(item_insert_order(&tree, index, order) == 0);
+
+ for (i = min; i < max; i++) {
+ struct item *item = item_lookup(&tree, i);
+ assert(item != 0);
+ assert(item->index == index);
+ }
+ for (i = 0; i < min; i++)
+ item_check_absent(&tree, i);
+ for (i = max; i < 2*max; i++)
+ item_check_absent(&tree, i);
+ for (i = min; i < max; i++)
+ assert(radix_tree_insert(&tree, i, item2) == -EEXIST);
+
+ slot = radix_tree_lookup_slot(&tree, index);
+ free(*slot);
+ radix_tree_replace_slot(&tree, slot, item2);
+ for (i = min; i < max; i++) {
+ struct item *item = item_lookup(&tree, i);
+ assert(item != 0);
+ assert(item->index == min);
+ }
+
+ assert(item_delete(&tree, min) != 0);
+
+ for (i = 0; i < 2*max; i++)
+ item_check_absent(&tree, i);
+}
+
+static void multiorder_shrink(unsigned long index, int order)
+{
+ unsigned long i;
+ unsigned long max = 1 << order;
+ RADIX_TREE(tree, GFP_KERNEL);
+ struct radix_tree_node *node;
+
+ printv(2, "Multiorder shrink index %ld, order %d\n", index, order);
+
+ assert(item_insert_order(&tree, 0, order) == 0);
+
+ node = tree.rnode;
+
+ assert(item_insert(&tree, index) == 0);
+ assert(node != tree.rnode);
+
+ assert(item_delete(&tree, index) != 0);
+ assert(node == tree.rnode);
+
+ for (i = 0; i < max; i++) {
+ struct item *item = item_lookup(&tree, i);
+ assert(item != 0);
+ assert(item->index == 0);
+ }
+ for (i = max; i < 2*max; i++)
+ item_check_absent(&tree, i);
+
+ if (!item_delete(&tree, 0)) {
+ printv(2, "failed to delete index %ld (order %d)\n", index, order);
+ abort();
+ }
+
+ for (i = 0; i < 2*max; i++)
+ item_check_absent(&tree, i);
+}
+
+static void multiorder_insert_bug(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert(&tree, 0);
+ radix_tree_tag_set(&tree, 0, 0);
+ item_insert_order(&tree, 3 << 6, 6);
+
+ item_kill_tree(&tree);
+}
+
+void multiorder_iteration(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ struct radix_tree_iter iter;
+ void **slot;
+ int i, j, err;
+
+ printv(1, "Multiorder iteration test\n");
+
+#define NUM_ENTRIES 11
+ int index[NUM_ENTRIES] = {0, 2, 4, 8, 16, 32, 34, 36, 64, 72, 128};
+ int order[NUM_ENTRIES] = {1, 1, 2, 3, 4, 1, 0, 1, 3, 0, 7};
+
+ for (i = 0; i < NUM_ENTRIES; i++) {
+ err = item_insert_order(&tree, index[i], order[i]);
+ assert(!err);
+ }
+
+ for (j = 0; j < 256; j++) {
+ for (i = 0; i < NUM_ENTRIES; i++)
+ if (j <= (index[i] | ((1 << order[i]) - 1)))
+ break;
+
+ radix_tree_for_each_slot(slot, &tree, &iter, j) {
+ int height = order[i] / RADIX_TREE_MAP_SHIFT;
+ int shift = height * RADIX_TREE_MAP_SHIFT;
+ unsigned long mask = (1UL << order[i]) - 1;
+ struct item *item = *slot;
+
+ assert((iter.index | mask) == (index[i] | mask));
+ assert(iter.shift == shift);
+ assert(!radix_tree_is_internal_node(item));
+ assert((item->index | mask) == (index[i] | mask));
+ assert(item->order == order[i]);
+ i++;
+ }
+ }
+
+ item_kill_tree(&tree);
+}
+
+void multiorder_tagged_iteration(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ struct radix_tree_iter iter;
+ void **slot;
+ int i, j;
+
+ printv(1, "Multiorder tagged iteration test\n");
+
+#define MT_NUM_ENTRIES 9
+ int index[MT_NUM_ENTRIES] = {0, 2, 4, 16, 32, 40, 64, 72, 128};
+ int order[MT_NUM_ENTRIES] = {1, 0, 2, 4, 3, 1, 3, 0, 7};
+
+#define TAG_ENTRIES 7
+ int tag_index[TAG_ENTRIES] = {0, 4, 16, 40, 64, 72, 128};
+
+ for (i = 0; i < MT_NUM_ENTRIES; i++)
+ assert(!item_insert_order(&tree, index[i], order[i]));
+
+ assert(!radix_tree_tagged(&tree, 1));
+
+ for (i = 0; i < TAG_ENTRIES; i++)
+ assert(radix_tree_tag_set(&tree, tag_index[i], 1));
+
+ for (j = 0; j < 256; j++) {
+ int k;
+
+ for (i = 0; i < TAG_ENTRIES; i++) {
+ for (k = i; index[k] < tag_index[i]; k++)
+ ;
+ if (j <= (index[k] | ((1 << order[k]) - 1)))
+ break;
+ }
+
+ radix_tree_for_each_tagged(slot, &tree, &iter, j, 1) {
+ unsigned long mask;
+ struct item *item = *slot;
+ for (k = i; index[k] < tag_index[i]; k++)
+ ;
+ mask = (1UL << order[k]) - 1;
+
+ assert((iter.index | mask) == (tag_index[i] | mask));
+ assert(!radix_tree_is_internal_node(item));
+ assert((item->index | mask) == (tag_index[i] | mask));
+ assert(item->order == order[k]);
+ i++;
+ }
+ }
+
+ assert(tag_tagged_items(&tree, NULL, 0, ~0UL, TAG_ENTRIES, 1, 2) ==
+ TAG_ENTRIES);
+
+ for (j = 0; j < 256; j++) {
+ int mask, k;
+
+ for (i = 0; i < TAG_ENTRIES; i++) {
+ for (k = i; index[k] < tag_index[i]; k++)
+ ;
+ if (j <= (index[k] | ((1 << order[k]) - 1)))
+ break;
+ }
+
+ radix_tree_for_each_tagged(slot, &tree, &iter, j, 2) {
+ struct item *item = *slot;
+ for (k = i; index[k] < tag_index[i]; k++)
+ ;
+ mask = (1 << order[k]) - 1;
+
+ assert((iter.index | mask) == (tag_index[i] | mask));
+ assert(!radix_tree_is_internal_node(item));
+ assert((item->index | mask) == (tag_index[i] | mask));
+ assert(item->order == order[k]);
+ i++;
+ }
+ }
+
+ assert(tag_tagged_items(&tree, NULL, 1, ~0UL, MT_NUM_ENTRIES * 2, 1, 0)
+ == TAG_ENTRIES);
+ i = 0;
+ radix_tree_for_each_tagged(slot, &tree, &iter, 0, 0) {
+ assert(iter.index == tag_index[i]);
+ i++;
+ }
+
+ item_kill_tree(&tree);
+}
+
+/*
+ * Basic join checks: make sure we can't find an entry in the tree after
+ * a larger entry has replaced it
+ */
+static void multiorder_join1(unsigned long index,
+ unsigned order1, unsigned order2)
+{
+ unsigned long loc;
+ void *item, *item2 = item_create(index + 1, order1);
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert_order(&tree, index, order2);
+ item = radix_tree_lookup(&tree, index);
+ radix_tree_join(&tree, index + 1, order1, item2);
+ loc = find_item(&tree, item);
+ if (loc == -1)
+ free(item);
+ item = radix_tree_lookup(&tree, index + 1);
+ assert(item == item2);
+ item_kill_tree(&tree);
+}
+
+/*
+ * Check that the accounting of exceptional entries is handled correctly
+ * by joining an exceptional entry to a normal pointer.
+ */
+static void multiorder_join2(unsigned order1, unsigned order2)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ struct radix_tree_node *node;
+ void *item1 = item_create(0, order1);
+ void *item2;
+
+ item_insert_order(&tree, 0, order2);
+ radix_tree_insert(&tree, 1 << order2, (void *)0x12UL);
+ item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
+ assert(item2 == (void *)0x12UL);
+ assert(node->exceptional == 1);
+
+ item2 = radix_tree_lookup(&tree, 0);
+ free(item2);
+
+ radix_tree_join(&tree, 0, order1, item1);
+ item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
+ assert(item2 == item1);
+ assert(node->exceptional == 0);
+ item_kill_tree(&tree);
+}
+
+/*
+ * This test revealed an accounting bug for exceptional entries at one point.
+ * Nodes were being freed back into the pool with an elevated exception count
+ * by radix_tree_join() and then radix_tree_split() was failing to zero the
+ * count of exceptional entries.
+ */
+static void multiorder_join3(unsigned int order)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ struct radix_tree_node *node;
+ void **slot;
+ struct radix_tree_iter iter;
+ unsigned long i;
+
+ for (i = 0; i < (1 << order); i++) {
+ radix_tree_insert(&tree, i, (void *)0x12UL);
+ }
+
+ radix_tree_join(&tree, 0, order, (void *)0x16UL);
+ rcu_barrier();
+
+ radix_tree_split(&tree, 0, 0);
+
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ radix_tree_iter_replace(&tree, &iter, slot, (void *)0x12UL);
+ }
+
+ __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(node->exceptional == node->count);
+
+ item_kill_tree(&tree);
+}
+
+static void multiorder_join(void)
+{
+ int i, j, idx;
+
+ for (idx = 0; idx < 1024; idx = idx * 2 + 3) {
+ for (i = 1; i < 15; i++) {
+ for (j = 0; j < i; j++) {
+ multiorder_join1(idx, i, j);
+ }
+ }
+ }
+
+ for (i = 1; i < 15; i++) {
+ for (j = 0; j < i; j++) {
+ multiorder_join2(i, j);
+ }
+ }
+
+ for (i = 3; i < 10; i++) {
+ multiorder_join3(i);
+ }
+}
+
+static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
+{
+ struct radix_tree_preload *rtp = &radix_tree_preloads;
+ if (rtp->nr != 0)
+ printv(2, "split(%u %u) remaining %u\n", old_order, new_order,
+ rtp->nr);
+ /*
+ * Can't check for equality here as some nodes may have been
+ * RCU-freed while we ran. But we should never finish with more
+ * nodes allocated since they should have all been preloaded.
+ */
+ if (nr_allocated > alloc)
+ printv(2, "split(%u %u) allocated %u %u\n", old_order, new_order,
+ alloc, nr_allocated);
+}
+
+static void __multiorder_split(int old_order, int new_order)
+{
+ RADIX_TREE(tree, GFP_ATOMIC);
+ void **slot;
+ struct radix_tree_iter iter;
+ unsigned alloc;
+ struct item *item;
+
+ radix_tree_preload(GFP_KERNEL);
+ assert(item_insert_order(&tree, 0, old_order) == 0);
+ radix_tree_preload_end();
+
+ /* Wipe out the preloaded cache or it'll confuse check_mem() */
+ radix_tree_cpu_dead(0);
+
+ item = radix_tree_tag_set(&tree, 0, 2);
+
+ radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
+ alloc = nr_allocated;
+ radix_tree_split(&tree, 0, new_order);
+ check_mem(old_order, new_order, alloc);
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ radix_tree_iter_replace(&tree, &iter, slot,
+ item_create(iter.index, new_order));
+ }
+ radix_tree_preload_end();
+
+ item_kill_tree(&tree);
+ free(item);
+}
+
+static void __multiorder_split2(int old_order, int new_order)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ void **slot;
+ struct radix_tree_iter iter;
+ struct radix_tree_node *node;
+ void *item;
+
+ __radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+
+ item = __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(item == (void *)0x12);
+ assert(node->exceptional > 0);
+
+ radix_tree_split(&tree, 0, new_order);
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ radix_tree_iter_replace(&tree, &iter, slot,
+ item_create(iter.index, new_order));
+ }
+
+ item = __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(item != (void *)0x12);
+ assert(node->exceptional == 0);
+
+ item_kill_tree(&tree);
+}
+
+static void __multiorder_split3(int old_order, int new_order)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ void **slot;
+ struct radix_tree_iter iter;
+ struct radix_tree_node *node;
+ void *item;
+
+ __radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+
+ item = __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(item == (void *)0x12);
+ assert(node->exceptional > 0);
+
+ radix_tree_split(&tree, 0, new_order);
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16);
+ }
+
+ item = __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(item == (void *)0x16);
+ assert(node->exceptional > 0);
+
+ item_kill_tree(&tree);
+
+ __radix_tree_insert(&tree, 0, old_order, (void *)0x12);
+
+ item = __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(item == (void *)0x12);
+ assert(node->exceptional > 0);
+
+ radix_tree_split(&tree, 0, new_order);
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ if (iter.index == (1 << new_order))
+ radix_tree_iter_replace(&tree, &iter, slot,
+ (void *)0x16);
+ else
+ radix_tree_iter_replace(&tree, &iter, slot, NULL);
+ }
+
+ item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL);
+ assert(item == (void *)0x16);
+ assert(node->count == node->exceptional);
+ do {
+ node = node->parent;
+ if (!node)
+ break;
+ assert(node->count == 1);
+ assert(node->exceptional == 0);
+ } while (1);
+
+ item_kill_tree(&tree);
+}
+
+static void multiorder_split(void)
+{
+ int i, j;
+
+ for (i = 3; i < 11; i++)
+ for (j = 0; j < i; j++) {
+ __multiorder_split(i, j);
+ __multiorder_split2(i, j);
+ __multiorder_split3(i, j);
+ }
+}
+
+static void multiorder_account(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ struct radix_tree_node *node;
+ void **slot;
+
+ item_insert_order(&tree, 0, 5);
+
+ __radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
+ __radix_tree_lookup(&tree, 0, &node, NULL);
+ assert(node->count == node->exceptional * 2);
+ radix_tree_delete(&tree, 1 << 5);
+ assert(node->exceptional == 0);
+
+ __radix_tree_insert(&tree, 1 << 5, 5, (void *)0x12);
+ __radix_tree_lookup(&tree, 1 << 5, &node, &slot);
+ assert(node->count == node->exceptional * 2);
+ __radix_tree_replace(&tree, node, slot, NULL, NULL);
+ assert(node->exceptional == 0);
+
+ item_kill_tree(&tree);
+}
+
+bool stop_iteration = false;
+
+static void *creator_func(void *ptr)
+{
+ /* 'order' is set up to ensure we have sibling entries */
+ unsigned int order = RADIX_TREE_MAP_SHIFT - 1;
+ struct radix_tree_root *tree = ptr;
+ int i;
+
+ for (i = 0; i < 10000; i++) {
+ item_insert_order(tree, 0, order);
+ item_delete_rcu(tree, 0);
+ }
+
+ stop_iteration = true;
+ return NULL;
+}
+
+static void *iterator_func(void *ptr)
+{
+ struct radix_tree_root *tree = ptr;
+ struct radix_tree_iter iter;
+ struct item *item;
+ void **slot;
+
+ while (!stop_iteration) {
+ rcu_read_lock();
+ radix_tree_for_each_slot(slot, tree, &iter, 0) {
+ item = radix_tree_deref_slot(slot);
+
+ if (!item)
+ continue;
+ if (radix_tree_deref_retry(item)) {
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+
+ item_sanity(item, iter.index);
+ }
+ rcu_read_unlock();
+ }
+ return NULL;
+}
+
+static void multiorder_iteration_race(void)
+{
+ const int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
+ pthread_t worker_thread[num_threads];
+ RADIX_TREE(tree, GFP_KERNEL);
+ int i;
+
+ pthread_create(&worker_thread[0], NULL, &creator_func, &tree);
+ for (i = 1; i < num_threads; i++)
+ pthread_create(&worker_thread[i], NULL, &iterator_func, &tree);
+
+ for (i = 0; i < num_threads; i++)
+ pthread_join(worker_thread[i], NULL);
+
+ item_kill_tree(&tree);
+}
+
+void multiorder_checks(void)
+{
+ int i;
+
+ for (i = 0; i < 20; i++) {
+ multiorder_check(200, i);
+ multiorder_check(0, i);
+ multiorder_check((1UL << i) + 1, i);
+ }
+
+ for (i = 0; i < 15; i++)
+ multiorder_shrink((1UL << (i + RADIX_TREE_MAP_SHIFT)), i);
+
+ multiorder_insert_bug();
+ multiorder_tag_tests();
+ multiorder_iteration();
+ multiorder_tagged_iteration();
+ multiorder_join();
+ multiorder_split();
+ multiorder_account();
+ multiorder_iteration_race();
+
+ radix_tree_cpu_dead(0);
+}
+
+int __weak main(void)
+{
+ radix_tree_init();
+ multiorder_checks();
+ return 0;
+}
diff --git a/tools/testing/radix-tree/regression.h b/tools/testing/radix-tree/regression.h
new file mode 100644
index 000000000..3c8a1584e
--- /dev/null
+++ b/tools/testing/radix-tree/regression.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __REGRESSION_H__
+#define __REGRESSION_H__
+
+void regression1_test(void);
+void regression2_test(void);
+void regression3_test(void);
+
+#endif
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
new file mode 100644
index 000000000..0aece092f
--- /dev/null
+++ b/tools/testing/radix-tree/regression1.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Regression1
+ * Description:
+ * Salman Qazi describes the following radix-tree bug:
+ *
+ * In the following case, we get can get a deadlock:
+ *
+ * 0. The radix tree contains two items, one has the index 0.
+ * 1. The reader (in this case find_get_pages) takes the rcu_read_lock.
+ * 2. The reader acquires slot(s) for item(s) including the index 0 item.
+ * 3. The non-zero index item is deleted, and as a consequence the other item
+ * is moved to the root of the tree. The place where it used to be is queued
+ * for deletion after the readers finish.
+ * 3b. The zero item is deleted, removing it from the direct slot, it remains in
+ * the rcu-delayed indirect node.
+ * 4. The reader looks at the index 0 slot, and finds that the page has 0 ref
+ * count
+ * 5. The reader looks at it again, hoping that the item will either be freed
+ * or the ref count will increase. This never happens, as the slot it is
+ * looking at will never be updated. Also, this slot can never be reclaimed
+ * because the reader is holding rcu_read_lock and is in an infinite loop.
+ *
+ * The fix is to re-use the same "indirect" pointer case that requires a slot
+ * lookup retry into a general "retry the lookup" bit.
+ *
+ * Running:
+ * This test should run to completion in a few seconds. The above bug would
+ * cause it to hang indefinitely.
+ *
+ * Upstream commit:
+ * Not yet
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include "regression.h"
+
+static RADIX_TREE(mt_tree, GFP_KERNEL);
+static pthread_mutex_t mt_lock = PTHREAD_MUTEX_INITIALIZER;
+
+struct page {
+ pthread_mutex_t lock;
+ struct rcu_head rcu;
+ int count;
+ unsigned long index;
+};
+
+static struct page *page_alloc(void)
+{
+ struct page *p;
+ p = malloc(sizeof(struct page));
+ p->count = 1;
+ p->index = 1;
+ pthread_mutex_init(&p->lock, NULL);
+
+ return p;
+}
+
+static void page_rcu_free(struct rcu_head *rcu)
+{
+ struct page *p = container_of(rcu, struct page, rcu);
+ assert(!p->count);
+ pthread_mutex_destroy(&p->lock);
+ free(p);
+}
+
+static void page_free(struct page *p)
+{
+ call_rcu(&p->rcu, page_rcu_free);
+}
+
+static unsigned find_get_pages(unsigned long start,
+ unsigned int nr_pages, struct page **pages)
+{
+ unsigned int i;
+ unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_slot(&mt_tree,
+ (void ***)pages, NULL, start, nr_pages);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page)) {
+ /*
+ * Transient condition which can only trigger
+ * when entry at index 0 moves out of or back
+ * to root: none yet gotten, safe to restart.
+ */
+ assert((start | i) == 0);
+ goto restart;
+ }
+ /*
+ * No exceptional entries are inserted in this test.
+ */
+ assert(0);
+ }
+
+ pthread_mutex_lock(&page->lock);
+ if (!page->count) {
+ pthread_mutex_unlock(&page->lock);
+ goto repeat;
+ }
+ /* don't actually update page refcount */
+ pthread_mutex_unlock(&page->lock);
+
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ goto repeat;
+ }
+
+ pages[ret] = page;
+ ret++;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+static pthread_barrier_t worker_barrier;
+
+static void *regression1_fn(void *arg)
+{
+ rcu_register_thread();
+
+ if (pthread_barrier_wait(&worker_barrier) ==
+ PTHREAD_BARRIER_SERIAL_THREAD) {
+ int j;
+
+ for (j = 0; j < 1000000; j++) {
+ struct page *p;
+
+ p = page_alloc();
+ pthread_mutex_lock(&mt_lock);
+ radix_tree_insert(&mt_tree, 0, p);
+ pthread_mutex_unlock(&mt_lock);
+
+ p = page_alloc();
+ pthread_mutex_lock(&mt_lock);
+ radix_tree_insert(&mt_tree, 1, p);
+ pthread_mutex_unlock(&mt_lock);
+
+ pthread_mutex_lock(&mt_lock);
+ p = radix_tree_delete(&mt_tree, 1);
+ pthread_mutex_lock(&p->lock);
+ p->count--;
+ pthread_mutex_unlock(&p->lock);
+ pthread_mutex_unlock(&mt_lock);
+ page_free(p);
+
+ pthread_mutex_lock(&mt_lock);
+ p = radix_tree_delete(&mt_tree, 0);
+ pthread_mutex_lock(&p->lock);
+ p->count--;
+ pthread_mutex_unlock(&p->lock);
+ pthread_mutex_unlock(&mt_lock);
+ page_free(p);
+ }
+ } else {
+ int j;
+
+ for (j = 0; j < 100000000; j++) {
+ struct page *pages[10];
+
+ find_get_pages(0, 10, pages);
+ }
+ }
+
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+static pthread_t *threads;
+void regression1_test(void)
+{
+ int nr_threads;
+ int i;
+ long arg;
+
+ /* Regression #1 */
+ printv(1, "running regression test 1, should finish in under a minute\n");
+ nr_threads = 2;
+ pthread_barrier_init(&worker_barrier, NULL, nr_threads);
+
+ threads = malloc(nr_threads * sizeof(pthread_t *));
+
+ for (i = 0; i < nr_threads; i++) {
+ arg = i;
+ if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) {
+ perror("pthread_create");
+ exit(1);
+ }
+ }
+
+ for (i = 0; i < nr_threads; i++) {
+ if (pthread_join(threads[i], NULL)) {
+ perror("pthread_join");
+ exit(1);
+ }
+ }
+
+ free(threads);
+
+ printv(1, "regression test 1, done\n");
+}
diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c
new file mode 100644
index 000000000..424b91c77
--- /dev/null
+++ b/tools/testing/radix-tree/regression2.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Regression2
+ * Description:
+ * Toshiyuki Okajima describes the following radix-tree bug:
+ *
+ * In the following case, we can get a hangup on
+ * radix_radix_tree_gang_lookup_tag_slot.
+ *
+ * 0. The radix tree contains RADIX_TREE_MAP_SIZE items. And the tag of
+ * a certain item has PAGECACHE_TAG_DIRTY.
+ * 1. radix_tree_range_tag_if_tagged(, start, end, , PAGECACHE_TAG_DIRTY,
+ * PAGECACHE_TAG_TOWRITE) is called to add PAGECACHE_TAG_TOWRITE tag
+ * for the tag which has PAGECACHE_TAG_DIRTY. However, there is no tag with
+ * PAGECACHE_TAG_DIRTY within the range from start to end. As the result,
+ * There is no tag with PAGECACHE_TAG_TOWRITE but the root tag has
+ * PAGECACHE_TAG_TOWRITE.
+ * 2. An item is added into the radix tree and then the level of it is
+ * extended into 2 from 1. At that time, the new radix tree node succeeds
+ * the tag status of the root tag. Therefore the tag of the new radix tree
+ * node has PAGECACHE_TAG_TOWRITE but there is not slot with
+ * PAGECACHE_TAG_TOWRITE tag in the child node of the new radix tree node.
+ * 3. The tag of a certain item is cleared with PAGECACHE_TAG_DIRTY.
+ * 4. All items within the index range from 0 to RADIX_TREE_MAP_SIZE - 1 are
+ * released. (Only the item which index is RADIX_TREE_MAP_SIZE exist in the
+ * radix tree.) As the result, the slot of the radix tree node is NULL but
+ * the tag which corresponds to the slot has PAGECACHE_TAG_TOWRITE.
+ * 5. radix_tree_gang_lookup_tag_slot(PAGECACHE_TAG_TOWRITE) calls
+ * __lookup_tag. __lookup_tag returns with 0. And __lookup_tag doesn't
+ * change the index that is the input and output parameter. Because the 1st
+ * slot of the radix tree node is NULL, but the tag which corresponds to
+ * the slot has PAGECACHE_TAG_TOWRITE.
+ * Therefore radix_tree_gang_lookup_tag_slot tries to get some items by
+ * calling __lookup_tag, but it cannot get any items forever.
+ *
+ * The fix is to change that radix_tree_tag_if_tagged doesn't tag the root tag
+ * if it doesn't set any tags within the specified range.
+ *
+ * Running:
+ * This test should run to completion immediately. The above bug would cause it
+ * to hang indefinitely.
+ *
+ * Upstream commit:
+ * Not yet
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "regression.h"
+#include "test.h"
+
+#define PAGECACHE_TAG_DIRTY 0
+#define PAGECACHE_TAG_WRITEBACK 1
+#define PAGECACHE_TAG_TOWRITE 2
+
+static RADIX_TREE(mt_tree, GFP_KERNEL);
+unsigned long page_count = 0;
+
+struct page {
+ unsigned long index;
+};
+
+static struct page *page_alloc(void)
+{
+ struct page *p;
+ p = malloc(sizeof(struct page));
+ p->index = page_count++;
+
+ return p;
+}
+
+void regression2_test(void)
+{
+ int i;
+ struct page *p;
+ int max_slots = RADIX_TREE_MAP_SIZE;
+ unsigned long int start, end;
+ struct page *pages[1];
+
+ printv(1, "running regression test 2 (should take milliseconds)\n");
+ /* 0. */
+ for (i = 0; i <= max_slots - 1; i++) {
+ p = page_alloc();
+ radix_tree_insert(&mt_tree, i, p);
+ }
+ radix_tree_tag_set(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY);
+
+ /* 1. */
+ start = 0;
+ end = max_slots - 2;
+ tag_tagged_items(&mt_tree, NULL, start, end, 1,
+ PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
+
+ /* 2. */
+ p = page_alloc();
+ radix_tree_insert(&mt_tree, max_slots, p);
+
+ /* 3. */
+ radix_tree_tag_clear(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY);
+
+ /* 4. */
+ for (i = max_slots - 1; i >= 0; i--)
+ free(radix_tree_delete(&mt_tree, i));
+
+ /* 5. */
+ // NOTE: start should not be 0 because radix_tree_gang_lookup_tag_slot
+ // can return.
+ start = 1;
+ end = max_slots - 2;
+ radix_tree_gang_lookup_tag_slot(&mt_tree, (void ***)pages, start, end,
+ PAGECACHE_TAG_TOWRITE);
+
+ /* We remove all the remained nodes */
+ free(radix_tree_delete(&mt_tree, max_slots));
+
+ BUG_ON(!radix_tree_empty(&mt_tree));
+
+ printv(1, "regression test 2, done\n");
+}
diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c
new file mode 100644
index 000000000..ace2543c3
--- /dev/null
+++ b/tools/testing/radix-tree/regression3.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Regression3
+ * Description:
+ * Helper radix_tree_iter_retry resets next_index to the current index.
+ * In following radix_tree_next_slot current chunk size becomes zero.
+ * This isn't checked and it tries to dereference null pointer in slot.
+ *
+ * Helper radix_tree_iter_resume reset slot to NULL and next_index to index + 1,
+ * for tagger iteraction it also must reset cached tags in iterator to abort
+ * next radix_tree_next_slot and go to slow-path into radix_tree_next_chunk.
+ *
+ * Running:
+ * This test should run to completion immediately. The above bug would
+ * cause it to segfault.
+ *
+ * Upstream commit:
+ * Not yet
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "regression.h"
+
+void regression3_test(void)
+{
+ RADIX_TREE(root, GFP_KERNEL);
+ void *ptr0 = (void *)4ul;
+ void *ptr = (void *)8ul;
+ struct radix_tree_iter iter;
+ void **slot;
+ bool first;
+
+ printv(1, "running regression test 3 (should take milliseconds)\n");
+
+ radix_tree_insert(&root, 0, ptr0);
+ radix_tree_tag_set(&root, 0, 0);
+
+ first = true;
+ radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
+ printv(2, "tagged %ld %p\n", iter.index, *slot);
+ if (first) {
+ radix_tree_insert(&root, 1, ptr);
+ radix_tree_tag_set(&root, 1, 0);
+ first = false;
+ }
+ if (radix_tree_deref_retry(*slot)) {
+ printv(2, "retry at %ld\n", iter.index);
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+ }
+ radix_tree_delete(&root, 1);
+
+ first = true;
+ radix_tree_for_each_slot(slot, &root, &iter, 0) {
+ printv(2, "slot %ld %p\n", iter.index, *slot);
+ if (first) {
+ radix_tree_insert(&root, 1, ptr);
+ first = false;
+ }
+ if (radix_tree_deref_retry(*slot)) {
+ printv(2, "retry at %ld\n", iter.index);
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+ }
+ radix_tree_delete(&root, 1);
+
+ first = true;
+ radix_tree_for_each_contig(slot, &root, &iter, 0) {
+ printv(2, "contig %ld %p\n", iter.index, *slot);
+ if (first) {
+ radix_tree_insert(&root, 1, ptr);
+ first = false;
+ }
+ if (radix_tree_deref_retry(*slot)) {
+ printv(2, "retry at %ld\n", iter.index);
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+ }
+
+ radix_tree_for_each_slot(slot, &root, &iter, 0) {
+ printv(2, "slot %ld %p\n", iter.index, *slot);
+ if (!iter.index) {
+ printv(2, "next at %ld\n", iter.index);
+ slot = radix_tree_iter_resume(slot, &iter);
+ }
+ }
+
+ radix_tree_for_each_contig(slot, &root, &iter, 0) {
+ printv(2, "contig %ld %p\n", iter.index, *slot);
+ if (!iter.index) {
+ printv(2, "next at %ld\n", iter.index);
+ slot = radix_tree_iter_resume(slot, &iter);
+ }
+ }
+
+ radix_tree_tag_set(&root, 0, 0);
+ radix_tree_tag_set(&root, 1, 0);
+ radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
+ printv(2, "tagged %ld %p\n", iter.index, *slot);
+ if (!iter.index) {
+ printv(2, "next at %ld\n", iter.index);
+ slot = radix_tree_iter_resume(slot, &iter);
+ }
+ }
+
+ radix_tree_delete(&root, 0);
+ radix_tree_delete(&root, 1);
+
+ printv(1, "regression test 3 passed\n");
+}
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
new file mode 100644
index 000000000..543181e48
--- /dev/null
+++ b/tools/testing/radix-tree/tag_check.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+
+#include "test.h"
+
+
+static void
+__simple_checks(struct radix_tree_root *tree, unsigned long index, int tag)
+{
+ unsigned long first = 0;
+ int ret;
+
+ item_check_absent(tree, index);
+ assert(item_tag_get(tree, index, tag) == 0);
+
+ item_insert(tree, index);
+ assert(item_tag_get(tree, index, tag) == 0);
+ item_tag_set(tree, index, tag);
+ ret = item_tag_get(tree, index, tag);
+ assert(ret != 0);
+ ret = tag_tagged_items(tree, NULL, first, ~0UL, 10, tag, !tag);
+ assert(ret == 1);
+ ret = item_tag_get(tree, index, !tag);
+ assert(ret != 0);
+ ret = item_delete(tree, index);
+ assert(ret != 0);
+ item_insert(tree, index);
+ ret = item_tag_get(tree, index, tag);
+ assert(ret == 0);
+ ret = item_delete(tree, index);
+ assert(ret != 0);
+ ret = item_delete(tree, index);
+ assert(ret == 0);
+}
+
+void simple_checks(void)
+{
+ unsigned long index;
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ for (index = 0; index < 10000; index++) {
+ __simple_checks(&tree, index, 0);
+ __simple_checks(&tree, index, 1);
+ }
+ verify_tag_consistency(&tree, 0);
+ verify_tag_consistency(&tree, 1);
+ printv(2, "before item_kill_tree: %d allocated\n", nr_allocated);
+ item_kill_tree(&tree);
+ rcu_barrier();
+ printv(2, "after item_kill_tree: %d allocated\n", nr_allocated);
+}
+
+/*
+ * Check that tags propagate correctly when extending a tree.
+ */
+static void extend_checks(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert(&tree, 43);
+ assert(item_tag_get(&tree, 43, 0) == 0);
+ item_tag_set(&tree, 43, 0);
+ assert(item_tag_get(&tree, 43, 0) == 1);
+ item_insert(&tree, 1000000);
+ assert(item_tag_get(&tree, 43, 0) == 1);
+
+ item_insert(&tree, 0);
+ item_tag_set(&tree, 0, 0);
+ item_delete(&tree, 1000000);
+ assert(item_tag_get(&tree, 43, 0) != 0);
+ item_delete(&tree, 43);
+ assert(item_tag_get(&tree, 43, 0) == 0); /* crash */
+ assert(item_tag_get(&tree, 0, 0) == 1);
+
+ verify_tag_consistency(&tree, 0);
+
+ item_kill_tree(&tree);
+}
+
+/*
+ * Check that tags propagate correctly when contracting a tree.
+ */
+static void contract_checks(void)
+{
+ struct item *item;
+ int tmp;
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ tmp = 1<<RADIX_TREE_MAP_SHIFT;
+ item_insert(&tree, tmp);
+ item_insert(&tree, tmp+1);
+ item_tag_set(&tree, tmp, 0);
+ item_tag_set(&tree, tmp, 1);
+ item_tag_set(&tree, tmp+1, 0);
+ item_delete(&tree, tmp+1);
+ item_tag_clear(&tree, tmp, 1);
+
+ assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 0) == 1);
+ assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 1) == 0);
+
+ assert(item_tag_get(&tree, tmp, 0) == 1);
+ assert(item_tag_get(&tree, tmp, 1) == 0);
+
+ verify_tag_consistency(&tree, 0);
+ item_kill_tree(&tree);
+}
+
+/*
+ * Stupid tag thrasher
+ *
+ * Create a large linear array corresponding to the tree. Each element in
+ * the array is coherent with each node in the tree
+ */
+
+enum {
+ NODE_ABSENT = 0,
+ NODE_PRESENT = 1,
+ NODE_TAGGED = 2,
+};
+
+#define THRASH_SIZE (1000 * 1000)
+#define N 127
+#define BATCH 33
+
+static void gang_check(struct radix_tree_root *tree,
+ char *thrash_state, int tag)
+{
+ struct item *items[BATCH];
+ int nr_found;
+ unsigned long index = 0;
+ unsigned long last_index = 0;
+
+ while ((nr_found = radix_tree_gang_lookup_tag(tree, (void **)items,
+ index, BATCH, tag))) {
+ int i;
+
+ for (i = 0; i < nr_found; i++) {
+ struct item *item = items[i];
+
+ while (last_index < item->index) {
+ assert(thrash_state[last_index] != NODE_TAGGED);
+ last_index++;
+ }
+ assert(thrash_state[last_index] == NODE_TAGGED);
+ last_index++;
+ }
+ index = items[nr_found - 1]->index + 1;
+ }
+}
+
+static void do_thrash(struct radix_tree_root *tree, char *thrash_state, int tag)
+{
+ int insert_chunk;
+ int delete_chunk;
+ int tag_chunk;
+ int untag_chunk;
+ int total_tagged = 0;
+ int total_present = 0;
+
+ for (insert_chunk = 1; insert_chunk < THRASH_SIZE; insert_chunk *= N)
+ for (delete_chunk = 1; delete_chunk < THRASH_SIZE; delete_chunk *= N)
+ for (tag_chunk = 1; tag_chunk < THRASH_SIZE; tag_chunk *= N)
+ for (untag_chunk = 1; untag_chunk < THRASH_SIZE; untag_chunk *= N) {
+ int i;
+ unsigned long index;
+ int nr_inserted = 0;
+ int nr_deleted = 0;
+ int nr_tagged = 0;
+ int nr_untagged = 0;
+ int actual_total_tagged;
+ int actual_total_present;
+
+ for (i = 0; i < insert_chunk; i++) {
+ index = rand() % THRASH_SIZE;
+ if (thrash_state[index] != NODE_ABSENT)
+ continue;
+ item_check_absent(tree, index);
+ item_insert(tree, index);
+ assert(thrash_state[index] != NODE_PRESENT);
+ thrash_state[index] = NODE_PRESENT;
+ nr_inserted++;
+ total_present++;
+ }
+
+ for (i = 0; i < delete_chunk; i++) {
+ index = rand() % THRASH_SIZE;
+ if (thrash_state[index] == NODE_ABSENT)
+ continue;
+ item_check_present(tree, index);
+ if (item_tag_get(tree, index, tag)) {
+ assert(thrash_state[index] == NODE_TAGGED);
+ total_tagged--;
+ } else {
+ assert(thrash_state[index] == NODE_PRESENT);
+ }
+ item_delete(tree, index);
+ assert(thrash_state[index] != NODE_ABSENT);
+ thrash_state[index] = NODE_ABSENT;
+ nr_deleted++;
+ total_present--;
+ }
+
+ for (i = 0; i < tag_chunk; i++) {
+ index = rand() % THRASH_SIZE;
+ if (thrash_state[index] != NODE_PRESENT) {
+ if (item_lookup(tree, index))
+ assert(item_tag_get(tree, index, tag));
+ continue;
+ }
+ item_tag_set(tree, index, tag);
+ item_tag_set(tree, index, tag);
+ assert(thrash_state[index] != NODE_TAGGED);
+ thrash_state[index] = NODE_TAGGED;
+ nr_tagged++;
+ total_tagged++;
+ }
+
+ for (i = 0; i < untag_chunk; i++) {
+ index = rand() % THRASH_SIZE;
+ if (thrash_state[index] != NODE_TAGGED)
+ continue;
+ item_check_present(tree, index);
+ assert(item_tag_get(tree, index, tag));
+ item_tag_clear(tree, index, tag);
+ item_tag_clear(tree, index, tag);
+ assert(thrash_state[index] != NODE_PRESENT);
+ thrash_state[index] = NODE_PRESENT;
+ nr_untagged++;
+ total_tagged--;
+ }
+
+ actual_total_tagged = 0;
+ actual_total_present = 0;
+ for (index = 0; index < THRASH_SIZE; index++) {
+ switch (thrash_state[index]) {
+ case NODE_ABSENT:
+ item_check_absent(tree, index);
+ break;
+ case NODE_PRESENT:
+ item_check_present(tree, index);
+ assert(!item_tag_get(tree, index, tag));
+ actual_total_present++;
+ break;
+ case NODE_TAGGED:
+ item_check_present(tree, index);
+ assert(item_tag_get(tree, index, tag));
+ actual_total_present++;
+ actual_total_tagged++;
+ break;
+ }
+ }
+
+ gang_check(tree, thrash_state, tag);
+
+ printv(2, "%d(%d) %d(%d) %d(%d) %d(%d) / "
+ "%d(%d) present, %d(%d) tagged\n",
+ insert_chunk, nr_inserted,
+ delete_chunk, nr_deleted,
+ tag_chunk, nr_tagged,
+ untag_chunk, nr_untagged,
+ total_present, actual_total_present,
+ total_tagged, actual_total_tagged);
+ }
+}
+
+static void thrash_tags(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ char *thrash_state;
+
+ thrash_state = malloc(THRASH_SIZE);
+ memset(thrash_state, 0, THRASH_SIZE);
+
+ do_thrash(&tree, thrash_state, 0);
+
+ verify_tag_consistency(&tree, 0);
+ item_kill_tree(&tree);
+ free(thrash_state);
+}
+
+static void leak_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert(&tree, 1000000);
+ item_delete(&tree, 1000000);
+ item_kill_tree(&tree);
+}
+
+static void __leak_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+ item_insert(&tree, 1000000);
+ printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+ item_delete(&tree, 1000000);
+ printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+ item_kill_tree(&tree);
+ printv(2, "%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+}
+
+static void single_check(void)
+{
+ struct item *items[BATCH];
+ RADIX_TREE(tree, GFP_KERNEL);
+ int ret;
+ unsigned long first = 0;
+
+ item_insert(&tree, 0);
+ item_tag_set(&tree, 0, 0);
+ ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 0);
+ assert(ret == 1);
+ ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 1, BATCH, 0);
+ assert(ret == 0);
+ verify_tag_consistency(&tree, 0);
+ verify_tag_consistency(&tree, 1);
+ ret = tag_tagged_items(&tree, NULL, first, 10, 10, 0, 1);
+ assert(ret == 1);
+ ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 1);
+ assert(ret == 1);
+ item_tag_clear(&tree, 0, 0);
+ ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 0);
+ assert(ret == 0);
+ item_kill_tree(&tree);
+}
+
+void radix_tree_clear_tags_test(void)
+{
+ unsigned long index;
+ struct radix_tree_node *node;
+ struct radix_tree_iter iter;
+ void **slot;
+
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert(&tree, 0);
+ item_tag_set(&tree, 0, 0);
+ __radix_tree_lookup(&tree, 0, &node, &slot);
+ radix_tree_clear_tags(&tree, node, slot);
+ assert(item_tag_get(&tree, 0, 0) == 0);
+
+ for (index = 0; index < 1000; index++) {
+ item_insert(&tree, index);
+ item_tag_set(&tree, index, 0);
+ }
+
+ radix_tree_for_each_slot(slot, &tree, &iter, 0) {
+ radix_tree_clear_tags(&tree, iter.node, slot);
+ assert(item_tag_get(&tree, iter.index, 0) == 0);
+ }
+
+ item_kill_tree(&tree);
+}
+
+void tag_check(void)
+{
+ single_check();
+ extend_checks();
+ contract_checks();
+ rcu_barrier();
+ printv(2, "after extend_checks: %d allocated\n", nr_allocated);
+ __leak_check();
+ leak_check();
+ rcu_barrier();
+ printv(2, "after leak_check: %d allocated\n", nr_allocated);
+ simple_checks();
+ rcu_barrier();
+ printv(2, "after simple_checks: %d allocated\n", nr_allocated);
+ thrash_tags();
+ rcu_barrier();
+ printv(2, "after thrash_tags: %d allocated\n", nr_allocated);
+ radix_tree_clear_tags_test();
+}
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
new file mode 100644
index 000000000..def601557
--- /dev/null
+++ b/tools/testing/radix-tree/test.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "test.h"
+
+struct item *
+item_tag_set(struct radix_tree_root *root, unsigned long index, int tag)
+{
+ return radix_tree_tag_set(root, index, tag);
+}
+
+struct item *
+item_tag_clear(struct radix_tree_root *root, unsigned long index, int tag)
+{
+ return radix_tree_tag_clear(root, index, tag);
+}
+
+int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag)
+{
+ return radix_tree_tag_get(root, index, tag);
+}
+
+int __item_insert(struct radix_tree_root *root, struct item *item)
+{
+ return __radix_tree_insert(root, item->index, item->order, item);
+}
+
+struct item *item_create(unsigned long index, unsigned int order)
+{
+ struct item *ret = malloc(sizeof(*ret));
+
+ ret->index = index;
+ ret->order = order;
+ return ret;
+}
+
+int item_insert_order(struct radix_tree_root *root, unsigned long index,
+ unsigned order)
+{
+ struct item *item = item_create(index, order);
+ int err = __item_insert(root, item);
+ if (err)
+ free(item);
+ return err;
+}
+
+int item_insert(struct radix_tree_root *root, unsigned long index)
+{
+ return item_insert_order(root, index, 0);
+}
+
+void item_sanity(struct item *item, unsigned long index)
+{
+ unsigned long mask;
+ assert(!radix_tree_is_internal_node(item));
+ assert(item->order < BITS_PER_LONG);
+ mask = (1UL << item->order) - 1;
+ assert((item->index | mask) == (index | mask));
+}
+
+int item_delete(struct radix_tree_root *root, unsigned long index)
+{
+ struct item *item = radix_tree_delete(root, index);
+
+ if (item) {
+ item_sanity(item, index);
+ free(item);
+ return 1;
+ }
+ return 0;
+}
+
+static void item_free_rcu(struct rcu_head *head)
+{
+ struct item *item = container_of(head, struct item, rcu_head);
+
+ free(item);
+}
+
+int item_delete_rcu(struct radix_tree_root *root, unsigned long index)
+{
+ struct item *item = radix_tree_delete(root, index);
+
+ if (item) {
+ item_sanity(item, index);
+ call_rcu(&item->rcu_head, item_free_rcu);
+ return 1;
+ }
+ return 0;
+}
+
+void item_check_present(struct radix_tree_root *root, unsigned long index)
+{
+ struct item *item;
+
+ item = radix_tree_lookup(root, index);
+ assert(item != NULL);
+ item_sanity(item, index);
+}
+
+struct item *item_lookup(struct radix_tree_root *root, unsigned long index)
+{
+ return radix_tree_lookup(root, index);
+}
+
+void item_check_absent(struct radix_tree_root *root, unsigned long index)
+{
+ struct item *item;
+
+ item = radix_tree_lookup(root, index);
+ assert(item == NULL);
+}
+
+/*
+ * Scan only the passed (start, start+nr] for present items
+ */
+void item_gang_check_present(struct radix_tree_root *root,
+ unsigned long start, unsigned long nr,
+ int chunk, int hop)
+{
+ struct item *items[chunk];
+ unsigned long into;
+
+ for (into = 0; into < nr; ) {
+ int nfound;
+ int nr_to_find = chunk;
+ int i;
+
+ if (nr_to_find > (nr - into))
+ nr_to_find = nr - into;
+
+ nfound = radix_tree_gang_lookup(root, (void **)items,
+ start + into, nr_to_find);
+ assert(nfound == nr_to_find);
+ for (i = 0; i < nfound; i++)
+ assert(items[i]->index == start + into + i);
+ into += hop;
+ }
+}
+
+/*
+ * Scan the entire tree, only expecting present items (start, start+nr]
+ */
+void item_full_scan(struct radix_tree_root *root, unsigned long start,
+ unsigned long nr, int chunk)
+{
+ struct item *items[chunk];
+ unsigned long into = 0;
+ unsigned long this_index = start;
+ int nfound;
+ int i;
+
+// printf("%s(0x%08lx, 0x%08lx, %d)\n", __FUNCTION__, start, nr, chunk);
+
+ while ((nfound = radix_tree_gang_lookup(root, (void **)items, into,
+ chunk))) {
+// printf("At 0x%08lx, nfound=%d\n", into, nfound);
+ for (i = 0; i < nfound; i++) {
+ assert(items[i]->index == this_index);
+ this_index++;
+ }
+// printf("Found 0x%08lx->0x%08lx\n",
+// items[0]->index, items[nfound-1]->index);
+ into = this_index;
+ }
+ if (chunk)
+ assert(this_index == start + nr);
+ nfound = radix_tree_gang_lookup(root, (void **)items,
+ this_index, chunk);
+ assert(nfound == 0);
+}
+
+/* Use the same pattern as tag_pages_for_writeback() in mm/page-writeback.c */
+int tag_tagged_items(struct radix_tree_root *root, pthread_mutex_t *lock,
+ unsigned long start, unsigned long end, unsigned batch,
+ unsigned iftag, unsigned thentag)
+{
+ unsigned long tagged = 0;
+ struct radix_tree_iter iter;
+ void **slot;
+
+ if (batch == 0)
+ batch = 1;
+
+ if (lock)
+ pthread_mutex_lock(lock);
+ radix_tree_for_each_tagged(slot, root, &iter, start, iftag) {
+ if (iter.index > end)
+ break;
+ radix_tree_iter_tag_set(root, &iter, thentag);
+ tagged++;
+ if ((tagged % batch) != 0)
+ continue;
+ slot = radix_tree_iter_resume(slot, &iter);
+ if (lock) {
+ pthread_mutex_unlock(lock);
+ rcu_barrier();
+ pthread_mutex_lock(lock);
+ }
+ }
+ if (lock)
+ pthread_mutex_unlock(lock);
+
+ return tagged;
+}
+
+/* Use the same pattern as find_swap_entry() in mm/shmem.c */
+unsigned long find_item(struct radix_tree_root *root, void *item)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+ unsigned long found = -1;
+ unsigned long checked = 0;
+
+ radix_tree_for_each_slot(slot, root, &iter, 0) {
+ if (*slot == item) {
+ found = iter.index;
+ break;
+ }
+ checked++;
+ if ((checked % 4) != 0)
+ continue;
+ slot = radix_tree_iter_resume(slot, &iter);
+ }
+
+ return found;
+}
+
+static int verify_node(struct radix_tree_node *slot, unsigned int tag,
+ int tagged)
+{
+ int anyset = 0;
+ int i;
+ int j;
+
+ slot = entry_to_node(slot);
+
+ /* Verify consistency at this level */
+ for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) {
+ if (slot->tags[tag][i]) {
+ anyset = 1;
+ break;
+ }
+ }
+ if (tagged != anyset) {
+ printf("tag: %u, shift %u, tagged: %d, anyset: %d\n",
+ tag, slot->shift, tagged, anyset);
+ for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) {
+ printf("tag %d: ", j);
+ for (i = 0; i < RADIX_TREE_TAG_LONGS; i++)
+ printf("%016lx ", slot->tags[j][i]);
+ printf("\n");
+ }
+ return 1;
+ }
+ assert(tagged == anyset);
+
+ /* Go for next level */
+ if (slot->shift > 0) {
+ for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
+ if (slot->slots[i])
+ if (verify_node(slot->slots[i], tag,
+ !!test_bit(i, slot->tags[tag]))) {
+ printf("Failure at off %d\n", i);
+ for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) {
+ printf("tag %d: ", j);
+ for (i = 0; i < RADIX_TREE_TAG_LONGS; i++)
+ printf("%016lx ", slot->tags[j][i]);
+ printf("\n");
+ }
+ return 1;
+ }
+ }
+ return 0;
+}
+
+void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag)
+{
+ struct radix_tree_node *node = root->rnode;
+ if (!radix_tree_is_internal_node(node))
+ return;
+ verify_node(node, tag, !!root_tag_get(root, tag));
+}
+
+void item_kill_tree(struct radix_tree_root *root)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+ struct item *items[32];
+ int nfound;
+
+ radix_tree_for_each_slot(slot, root, &iter, 0) {
+ if (radix_tree_exceptional_entry(*slot))
+ radix_tree_delete(root, iter.index);
+ }
+
+ while ((nfound = radix_tree_gang_lookup(root, (void **)items, 0, 32))) {
+ int i;
+
+ for (i = 0; i < nfound; i++) {
+ void *ret;
+
+ ret = radix_tree_delete(root, items[i]->index);
+ assert(ret == items[i]);
+ free(items[i]);
+ }
+ }
+ assert(radix_tree_gang_lookup(root, (void **)items, 0, 32) == 0);
+ assert(root->rnode == NULL);
+}
+
+void tree_verify_min_height(struct radix_tree_root *root, int maxindex)
+{
+ unsigned shift;
+ struct radix_tree_node *node = root->rnode;
+ if (!radix_tree_is_internal_node(node)) {
+ assert(maxindex == 0);
+ return;
+ }
+
+ node = entry_to_node(node);
+ assert(maxindex <= node_maxindex(node));
+
+ shift = node->shift;
+ if (shift > 0)
+ assert(maxindex > shift_maxindex(shift - RADIX_TREE_MAP_SHIFT));
+ else
+ assert(maxindex > 0);
+}
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
new file mode 100644
index 000000000..92d901eac
--- /dev/null
+++ b/tools/testing/radix-tree/test.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/gfp.h>
+#include <linux/types.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
+
+struct item {
+ struct rcu_head rcu_head;
+ unsigned long index;
+ unsigned int order;
+};
+
+struct item *item_create(unsigned long index, unsigned int order);
+int __item_insert(struct radix_tree_root *root, struct item *item);
+int item_insert(struct radix_tree_root *root, unsigned long index);
+void item_sanity(struct item *item, unsigned long index);
+int item_insert_order(struct radix_tree_root *root, unsigned long index,
+ unsigned order);
+int item_delete(struct radix_tree_root *root, unsigned long index);
+int item_delete_rcu(struct radix_tree_root *root, unsigned long index);
+struct item *item_lookup(struct radix_tree_root *root, unsigned long index);
+
+void item_check_present(struct radix_tree_root *root, unsigned long index);
+void item_check_absent(struct radix_tree_root *root, unsigned long index);
+void item_gang_check_present(struct radix_tree_root *root,
+ unsigned long start, unsigned long nr,
+ int chunk, int hop);
+void item_full_scan(struct radix_tree_root *root, unsigned long start,
+ unsigned long nr, int chunk);
+void item_kill_tree(struct radix_tree_root *root);
+
+int tag_tagged_items(struct radix_tree_root *, pthread_mutex_t *,
+ unsigned long start, unsigned long end, unsigned batch,
+ unsigned iftag, unsigned thentag);
+unsigned long find_item(struct radix_tree_root *, void *item);
+
+void tag_check(void);
+void multiorder_checks(void);
+void iteration_test(unsigned order, unsigned duration);
+void benchmark(void);
+void idr_checks(void);
+void ida_tests(void);
+
+struct item *
+item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
+struct item *
+item_tag_clear(struct radix_tree_root *root, unsigned long index, int tag);
+int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag);
+void tree_verify_min_height(struct radix_tree_root *root, int maxindex);
+void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag);
+
+extern int nr_allocated;
+
+/* Normally private parts of lib/radix-tree.c */
+struct radix_tree_node *entry_to_node(void *ptr);
+void radix_tree_dump(struct radix_tree_root *root);
+int root_tag_get(struct radix_tree_root *root, unsigned int tag);
+unsigned long node_maxindex(struct radix_tree_node *);
+unsigned long shift_maxindex(unsigned int shift);
+int radix_tree_cpu_dead(unsigned int cpu);
+struct radix_tree_preload {
+ unsigned nr;
+ struct radix_tree_node *nodes;
+};
+extern struct radix_tree_preload radix_tree_preloads;
diff --git a/tools/testing/scatterlist/Makefile b/tools/testing/scatterlist/Makefile
new file mode 100644
index 000000000..933c3a6e4
--- /dev/null
+++ b/tools/testing/scatterlist/Makefile
@@ -0,0 +1,30 @@
+CFLAGS += -I. -I../../include -g -O2 -Wall -fsanitize=address
+LDFLAGS += -fsanitize=address -fsanitize=undefined
+TARGETS = main
+OFILES = main.o scatterlist.o
+
+ifeq ($(BUILD), 32)
+ CFLAGS += -m32
+ LDFLAGS += -m32
+endif
+
+targets: include $(TARGETS)
+
+main: $(OFILES)
+
+clean:
+ $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h asm/io.h
+ @rmdir asm
+
+scatterlist.c: ../../../lib/scatterlist.c
+ @sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
+
+.PHONY: include
+
+include: ../../../include/linux/scatterlist.h
+ @mkdir -p linux
+ @mkdir -p asm
+ @touch asm/io.h
+ @touch linux/highmem.h
+ @touch linux/kmemleak.h
+ @cp $< linux/scatterlist.h
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
new file mode 100644
index 000000000..6f9ac14aa
--- /dev/null
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -0,0 +1,125 @@
+#ifndef _LINUX_MM_H
+#define _LINUX_MM_H
+
+#include <assert.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdio.h>
+
+typedef unsigned long dma_addr_t;
+
+#define unlikely
+
+#define BUG_ON(x) assert(!(x))
+
+#define WARN_ON(condition) ({ \
+ int __ret_warn_on = !!(condition); \
+ unlikely(__ret_warn_on); \
+})
+
+#define WARN_ON_ONCE(condition) ({ \
+ int __ret_warn_on = !!(condition); \
+ if (unlikely(__ret_warn_on)) \
+ assert(0); \
+ unlikely(__ret_warn_on); \
+})
+
+#define PAGE_SIZE (4096)
+#define PAGE_SHIFT (12)
+#define PAGE_MASK (~(PAGE_SIZE-1))
+
+#define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+#define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask))
+#define ALIGN(x, a) __ALIGN_KERNEL((x), (a))
+
+#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
+
+#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
+
+#define virt_to_page(x) ((void *)x)
+#define page_address(x) ((void *)x)
+
+static inline unsigned long page_to_phys(struct page *page)
+{
+ assert(0);
+
+ return 0;
+}
+
+#define page_to_pfn(page) ((unsigned long)(page) / PAGE_SIZE)
+#define pfn_to_page(pfn) (void *)((pfn) * PAGE_SIZE)
+#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
+
+#define __min(t1, t2, min1, min2, x, y) ({ \
+ t1 min1 = (x); \
+ t2 min2 = (y); \
+ (void) (&min1 == &min2); \
+ min1 < min2 ? min1 : min2; })
+
+#define ___PASTE(a,b) a##b
+#define __PASTE(a,b) ___PASTE(a,b)
+
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
+#define min(x, y) \
+ __min(typeof(x), typeof(y), \
+ __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
+ x, y)
+
+#define min_t(type, x, y) \
+ __min(type, type, \
+ __UNIQUE_ID(min1_), __UNIQUE_ID(min2_), \
+ x, y)
+
+#define preemptible() (1)
+
+static inline void *kmap(struct page *page)
+{
+ assert(0);
+
+ return NULL;
+}
+
+static inline void *kmap_atomic(struct page *page)
+{
+ assert(0);
+
+ return NULL;
+}
+
+static inline void kunmap(void *addr)
+{
+ assert(0);
+}
+
+static inline void kunmap_atomic(void *addr)
+{
+ assert(0);
+}
+
+static inline unsigned long __get_free_page(unsigned int flags)
+{
+ return (unsigned long)malloc(PAGE_SIZE);
+}
+
+static inline void free_page(unsigned long page)
+{
+ free((void *)page);
+}
+
+static inline void *kmalloc(unsigned int size, unsigned int flags)
+{
+ return malloc(size);
+}
+
+#define kfree(x) free(x)
+
+#define kmemleak_alloc(a, b, c, d)
+#define kmemleak_free(a)
+
+#define PageSlab(p) (0)
+#define flush_kernel_dcache_page(p)
+
+#endif
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
new file mode 100644
index 000000000..0a1464181
--- /dev/null
+++ b/tools/testing/scatterlist/main.c
@@ -0,0 +1,79 @@
+#include <stdio.h>
+#include <assert.h>
+
+#include <linux/scatterlist.h>
+
+#define MAX_PAGES (64)
+
+static void set_pages(struct page **pages, const unsigned *array, unsigned num)
+{
+ unsigned int i;
+
+ assert(num < MAX_PAGES);
+ for (i = 0; i < num; i++)
+ pages[i] = (struct page *)(unsigned long)
+ ((1 + array[i]) * PAGE_SIZE);
+}
+
+#define pfn(...) (unsigned []){ __VA_ARGS__ }
+
+int main(void)
+{
+ const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT;
+ struct test {
+ int alloc_ret;
+ unsigned num_pages;
+ unsigned *pfn;
+ unsigned size;
+ unsigned int max_seg;
+ unsigned int expected_segments;
+ } *test, tests[] = {
+ { -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
+ { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
+ { -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
+ { 0, 1, pfn(0), PAGE_SIZE, sgmax, 1 },
+ { 0, 1, pfn(0), 1, sgmax, 1 },
+ { 0, 2, pfn(0, 1), 2 * PAGE_SIZE, sgmax, 1 },
+ { 0, 2, pfn(1, 0), 2 * PAGE_SIZE, sgmax, 2 },
+ { 0, 3, pfn(0, 1, 2), 3 * PAGE_SIZE, sgmax, 1 },
+ { 0, 3, pfn(0, 2, 1), 3 * PAGE_SIZE, sgmax, 3 },
+ { 0, 3, pfn(0, 1, 3), 3 * PAGE_SIZE, sgmax, 2 },
+ { 0, 3, pfn(1, 2, 4), 3 * PAGE_SIZE, sgmax, 2 },
+ { 0, 3, pfn(1, 3, 4), 3 * PAGE_SIZE, sgmax, 2 },
+ { 0, 4, pfn(0, 1, 3, 4), 4 * PAGE_SIZE, sgmax, 2 },
+ { 0, 5, pfn(0, 1, 3, 4, 5), 5 * PAGE_SIZE, sgmax, 2 },
+ { 0, 5, pfn(0, 1, 3, 4, 6), 5 * PAGE_SIZE, sgmax, 3 },
+ { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, sgmax, 1 },
+ { 0, 5, pfn(0, 1, 2, 3, 4), 5 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
+ { 0, 6, pfn(0, 1, 2, 3, 4, 5), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
+ { 0, 6, pfn(0, 2, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 4 },
+ { 0, 6, pfn(0, 1, 3, 4, 5, 6), 6 * PAGE_SIZE, 2 * PAGE_SIZE, 3 },
+ { 0, 0, NULL, 0, 0, 0 },
+ };
+ unsigned int i;
+
+ for (i = 0, test = tests; test->expected_segments; test++, i++) {
+ struct page *pages[MAX_PAGES];
+ struct sg_table st;
+ int ret;
+
+ set_pages(pages, test->pfn, test->num_pages);
+
+ ret = __sg_alloc_table_from_pages(&st, pages, test->num_pages,
+ 0, test->size, test->max_seg,
+ GFP_KERNEL);
+ assert(ret == test->alloc_ret);
+
+ if (test->alloc_ret)
+ continue;
+
+ assert(st.nents == test->expected_segments);
+ assert(st.orig_nents == test->expected_segments);
+
+ sg_free_table(&st);
+ }
+
+ assert(i == (sizeof(tests) / sizeof(tests[0])) - 1);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore
new file mode 100644
index 000000000..917503524
--- /dev/null
+++ b/tools/testing/selftests/.gitignore
@@ -0,0 +1,5 @@
+kselftest
+gpiogpio-event-mon
+gpiogpio-hammer
+gpioinclude/
+gpiolsgpio
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
new file mode 100644
index 000000000..f0017c831
--- /dev/null
+++ b/tools/testing/selftests/Makefile
@@ -0,0 +1,167 @@
+# SPDX-License-Identifier: GPL-2.0
+TARGETS = android
+TARGETS += bpf
+TARGETS += breakpoints
+TARGETS += capabilities
+TARGETS += cgroup
+TARGETS += cpufreq
+TARGETS += cpu-hotplug
+TARGETS += efivarfs
+TARGETS += exec
+TARGETS += filesystems
+TARGETS += firmware
+TARGETS += ftrace
+TARGETS += futex
+TARGETS += gpio
+TARGETS += intel_pstate
+TARGETS += ipc
+TARGETS += kcmp
+TARGETS += kvm
+TARGETS += lib
+TARGETS += membarrier
+TARGETS += memfd
+TARGETS += memory-hotplug
+TARGETS += mount
+TARGETS += mqueue
+TARGETS += net
+TARGETS += netfilter
+TARGETS += nsfs
+TARGETS += powerpc
+TARGETS += proc
+TARGETS += pstore
+TARGETS += ptrace
+TARGETS += rseq
+TARGETS += rtc
+TARGETS += seccomp
+TARGETS += sigaltstack
+TARGETS += size
+TARGETS += sparc64
+TARGETS += splice
+TARGETS += static_keys
+TARGETS += sync
+TARGETS += sysctl
+ifneq (1, $(quicktest))
+TARGETS += timers
+endif
+TARGETS += user
+TARGETS += vm
+TARGETS += x86
+TARGETS += zram
+#Please keep the TARGETS list alphabetically sorted
+# Run "make quicktest=1 run_tests" or
+# "make quicktest=1 kselftest" from top level Makefile
+
+TARGETS_HOTPLUG = cpu-hotplug
+TARGETS_HOTPLUG += memory-hotplug
+
+# Clear LDFLAGS and MAKEFLAGS if called from main
+# Makefile to avoid test build failures when test
+# Makefile doesn't have explicit build rules.
+ifeq (1,$(MAKELEVEL))
+override LDFLAGS =
+override MAKEFLAGS =
+endif
+
+ifneq ($(KBUILD_SRC),)
+override LDFLAGS =
+endif
+
+BUILD := $(O)
+ifndef BUILD
+ BUILD := $(KBUILD_OUTPUT)
+endif
+ifndef BUILD
+ BUILD := $(shell pwd)
+endif
+
+# KSFT_TAP_LEVEL is used from KSFT framework to prevent nested TAP header
+# printing from tests. Applicable to run_tests case where run_tests adds
+# TAP header prior running tests and when a test program invokes another
+# with system() call. Export it here to cover override RUN_TESTS defines.
+export KSFT_TAP_LEVEL=`echo 1`
+
+export BUILD
+all:
+ @for TARGET in $(TARGETS); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ mkdir $$BUILD_TARGET -p; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
+ done;
+
+run_tests: all
+ @for TARGET in $(TARGETS); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
+ done;
+
+hotplug:
+ @for TARGET in $(TARGETS_HOTPLUG); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET;\
+ done;
+
+run_hotplug: hotplug
+ @for TARGET in $(TARGETS_HOTPLUG); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET run_full_test;\
+ done;
+
+clean_hotplug:
+ @for TARGET in $(TARGETS_HOTPLUG); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
+ done;
+
+run_pstore_crash:
+ make -C pstore run_crash
+
+INSTALL_PATH ?= install
+INSTALL_PATH := $(abspath $(INSTALL_PATH))
+ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
+
+install:
+ifdef INSTALL_PATH
+ @# Ask all targets to install their files
+ mkdir -p $(INSTALL_PATH)
+ @for TARGET in $(TARGETS); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \
+ done;
+
+ @# Ask all targets to emit their test scripts
+ echo "#!/bin/sh" > $(ALL_SCRIPT)
+ echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT)
+ echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT)
+ echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
+ echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT)
+ echo " OUTPUT=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT)
+ echo " cat /dev/null > \$$OUTPUT" >> $(ALL_SCRIPT)
+ echo "else" >> $(ALL_SCRIPT)
+ echo " OUTPUT=/dev/stdout" >> $(ALL_SCRIPT)
+ echo "fi" >> $(ALL_SCRIPT)
+ echo "export KSFT_TAP_LEVEL=1" >> $(ALL_SCRIPT)
+ echo "export skip=4" >> $(ALL_SCRIPT)
+
+ for TARGET in $(TARGETS); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ echo "echo ; echo TAP version 13" >> $(ALL_SCRIPT); \
+ echo "echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \
+ echo "echo ========================================" >> $(ALL_SCRIPT); \
+ echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
+ echo "cd $$TARGET" >> $(ALL_SCRIPT); \
+ make -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
+ echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
+ done;
+
+ chmod u+x $(ALL_SCRIPT)
+else
+ $(error Error: set INSTALL_PATH to use install)
+endif
+
+clean:
+ @for TARGET in $(TARGETS); do \
+ BUILD_TARGET=$$BUILD/$$TARGET; \
+ make OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
+ done;
+
+.PHONY: all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
new file mode 100644
index 000000000..72c25a3cb
--- /dev/null
+++ b/tools/testing/selftests/android/Makefile
@@ -0,0 +1,38 @@
+SUBDIRS := ion
+
+TEST_PROGS := run.sh
+
+.PHONY: all clean
+
+include ../lib.mk
+
+all:
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ #SUBDIR test prog name should be in the form: SUBDIR_test.sh \
+ TEST=$$DIR"_test.sh"; \
+ if [ -e $$DIR/$$TEST ]; then \
+ rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
+ fi \
+ done
+
+override define INSTALL_RULE
+ mkdir -p $(INSTALL_PATH)
+ install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+
+ @for SUBDIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$SUBDIR; \
+ mkdir $$BUILD_TARGET -p; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+ done;
+endef
+
+override define CLEAN
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ done
+endef
diff --git a/tools/testing/selftests/android/config b/tools/testing/selftests/android/config
new file mode 100644
index 000000000..b4ad748a9
--- /dev/null
+++ b/tools/testing/selftests/android/config
@@ -0,0 +1,5 @@
+CONFIG_ANDROID=y
+CONFIG_STAGING=y
+CONFIG_ION=y
+CONFIG_ION_SYSTEM_HEAP=y
+CONFIG_DRM_VGEM=y
diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore
new file mode 100644
index 000000000..95e8f4561
--- /dev/null
+++ b/tools/testing/selftests/android/ion/.gitignore
@@ -0,0 +1,3 @@
+ionapp_export
+ionapp_import
+ionmap_test
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
new file mode 100644
index 000000000..88cfe88e4
--- /dev/null
+++ b/tools/testing/selftests/android/ion/Makefile
@@ -0,0 +1,19 @@
+
+INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ -I../../../../../usr/include/
+CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
+
+TEST_GEN_FILES := ionapp_export ionapp_import ionmap_test
+
+all: $(TEST_GEN_FILES)
+
+$(TEST_GEN_FILES): ipcsocket.c ionutils.c
+
+TEST_PROGS := ion_test.sh
+
+KSFT_KHDR_INSTALL := 1
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
+$(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c
+$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c
diff --git a/tools/testing/selftests/android/ion/README b/tools/testing/selftests/android/ion/README
new file mode 100644
index 000000000..21783e9c4
--- /dev/null
+++ b/tools/testing/selftests/android/ion/README
@@ -0,0 +1,101 @@
+ION BUFFER SHARING UTILITY
+==========================
+File: ion_test.sh : Utility to test ION driver buffer sharing mechanism.
+Author: Pintu Kumar <pintu.ping@gmail.com>
+
+Introduction:
+-------------
+This is a test utility to verify ION buffer sharing in user space
+between 2 independent processes.
+It uses unix domain socket (with SCM_RIGHTS) as IPC to transfer an FD to
+another process to share the same buffer.
+This utility demonstrates how ION buffer sharing can be implemented between
+two user space processes, using various heap types.
+The following heap types are supported by ION driver.
+ION_HEAP_TYPE_SYSTEM (0)
+ION_HEAP_TYPE_SYSTEM_CONTIG (1)
+ION_HEAP_TYPE_CARVEOUT (2)
+ION_HEAP_TYPE_CHUNK (3)
+ION_HEAP_TYPE_DMA (4)
+
+By default only the SYSTEM and SYSTEM_CONTIG heaps are supported.
+Each heap is associated with the respective heap id.
+This utility is designed in the form of client/server program.
+The server part (ionapp_export) is the exporter of the buffer.
+It is responsible for creating an ION client, allocating the buffer based on
+the heap id, writing some data to this buffer and then exporting the FD
+(associated with this buffer) to another process using socket IPC.
+This FD is called as buffer FD (which is different than the ION client FD).
+
+The client part (ionapp_import) is the importer of the buffer.
+It retrives the FD from the socket data and installs into its address space.
+This new FD internally points to the same kernel buffer.
+So first it reads the data that is stored in this buffer and prints it.
+Then it writes the different size of data (it could be different data) to the
+same buffer.
+Finally the buffer FD must be closed by both the exporter and importer.
+Thus the same kernel buffer is shared among two user space processes using
+ION driver and only one time allocation.
+
+Prerequisite:
+-------------
+This utility works only if /dev/ion interface is present.
+The following configs needs to be enabled in kernel to include ion driver.
+CONFIG_ANDROID=y
+CONFIG_STAGING=y
+CONFIG_ION=y
+CONFIG_ION_SYSTEM_HEAP=y
+
+This utility requires to be run as root user.
+
+
+Compile and test:
+-----------------
+This utility is made to be run as part of kselftest framework in kernel.
+To compile and run using kselftest you can simply do the following from the
+kernel top directory.
+linux$ make TARGETS=android kselftest
+Or you can also use:
+linux$ make -C tools/testing/selftests TARGETS=android run_tests
+Using the selftest it can directly execute the ion_test.sh script to test the
+buffer sharing using ion system heap.
+Currently the heap size is hard coded as just 10 bytes inside this script.
+You need to be a root user to run under selftest.
+
+You can also compile and test manually using the following steps:
+ion$ make
+These will generate 2 executable: ionapp_export, ionapp_import
+Now you can run the export and import manually by specifying the heap type
+and the heap size.
+You can also directly execute the shell script to run the test automatically.
+Simply use the following command to run the test.
+ion$ sudo ./ion_test.sh
+
+Test Results:
+-------------
+The utility is verified on Ubuntu-32 bit system with Linux Kernel 4.14.
+Here is the snapshot of the test result using kselftest.
+
+linux# make TARGETS=android kselftest
+heap_type: 0, heap_size: 10
+--------------------------------------
+heap type: 0
+ heap id: 1
+heap name: ion_system_heap
+--------------------------------------
+Fill buffer content:
+0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
+Sharing fd: 6, Client fd: 5
+<ion_close_buffer_fd>: buffer release successfully....
+Received buffer fd: 4
+Read buffer content:
+0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0x0 0x0 0x0 0x0 0x0 0x0
+0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
+Fill buffer content:
+0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
+0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
+0xfd 0xfd
+<ion_close_buffer_fd>: buffer release successfully....
+ion_test.sh: heap_type: 0 - [PASS]
+
+ion_test.sh: done
diff --git a/tools/testing/selftests/android/ion/ion.h b/tools/testing/selftests/android/ion/ion.h
new file mode 100644
index 000000000..f7021ac51
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ion.h
@@ -0,0 +1,143 @@
+/*
+ * ion.h
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/* This file is copied from drivers/staging/android/uapi/ion.h
+ * This local copy is required for the selftest to pass, when build
+ * outside the kernel source tree.
+ * Please keep this file in sync with its original file until the
+ * ion driver is moved outside the staging tree.
+ */
+
+#ifndef _UAPI_LINUX_ION_H
+#define _UAPI_LINUX_ION_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/**
+ * enum ion_heap_types - list of all possible types of heaps
+ * @ION_HEAP_TYPE_SYSTEM: memory allocated via vmalloc
+ * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc
+ * @ION_HEAP_TYPE_CARVEOUT: memory allocated from a prereserved
+ * carveout heap, allocations are physically
+ * contiguous
+ * @ION_HEAP_TYPE_DMA: memory allocated via DMA API
+ * @ION_NUM_HEAPS: helper for iterating over heaps, a bit mask
+ * is used to identify the heaps, so only 32
+ * total heap types are supported
+ */
+enum ion_heap_type {
+ ION_HEAP_TYPE_SYSTEM,
+ ION_HEAP_TYPE_SYSTEM_CONTIG,
+ ION_HEAP_TYPE_CARVEOUT,
+ ION_HEAP_TYPE_CHUNK,
+ ION_HEAP_TYPE_DMA,
+ ION_HEAP_TYPE_CUSTOM, /*
+ * must be last so device specific heaps always
+ * are at the end of this enum
+ */
+};
+
+#define ION_NUM_HEAP_IDS (sizeof(unsigned int) * 8)
+
+/**
+ * allocation flags - the lower 16 bits are used by core ion, the upper 16
+ * bits are reserved for use by the heaps themselves.
+ */
+
+/*
+ * mappings of this buffer should be cached, ion will do cache maintenance
+ * when the buffer is mapped for dma
+ */
+#define ION_FLAG_CACHED 1
+
+/**
+ * DOC: Ion Userspace API
+ *
+ * create a client by opening /dev/ion
+ * most operations handled via following ioctls
+ *
+ */
+
+/**
+ * struct ion_allocation_data - metadata passed from userspace for allocations
+ * @len: size of the allocation
+ * @heap_id_mask: mask of heap ids to allocate from
+ * @flags: flags passed to heap
+ * @handle: pointer that will be populated with a cookie to use to
+ * refer to this allocation
+ *
+ * Provided by userspace as an argument to the ioctl
+ */
+struct ion_allocation_data {
+ __u64 len;
+ __u32 heap_id_mask;
+ __u32 flags;
+ __u32 fd;
+ __u32 unused;
+};
+
+#define MAX_HEAP_NAME 32
+
+/**
+ * struct ion_heap_data - data about a heap
+ * @name - first 32 characters of the heap name
+ * @type - heap type
+ * @heap_id - heap id for the heap
+ */
+struct ion_heap_data {
+ char name[MAX_HEAP_NAME];
+ __u32 type;
+ __u32 heap_id;
+ __u32 reserved0;
+ __u32 reserved1;
+ __u32 reserved2;
+};
+
+/**
+ * struct ion_heap_query - collection of data about all heaps
+ * @cnt - total number of heaps to be copied
+ * @heaps - buffer to copy heap data
+ */
+struct ion_heap_query {
+ __u32 cnt; /* Total number of heaps to be copied */
+ __u32 reserved0; /* align to 64bits */
+ __u64 heaps; /* buffer to be populated */
+ __u32 reserved1;
+ __u32 reserved2;
+};
+
+#define ION_IOC_MAGIC 'I'
+
+/**
+ * DOC: ION_IOC_ALLOC - allocate memory
+ *
+ * Takes an ion_allocation_data struct and returns it with the handle field
+ * populated with the opaque handle for the allocation.
+ */
+#define ION_IOC_ALLOC _IOWR(ION_IOC_MAGIC, 0, \
+ struct ion_allocation_data)
+
+/**
+ * DOC: ION_IOC_HEAP_QUERY - information about available heaps
+ *
+ * Takes an ion_heap_query structure and populates information about
+ * available Ion heaps.
+ */
+#define ION_IOC_HEAP_QUERY _IOWR(ION_IOC_MAGIC, 8, \
+ struct ion_heap_query)
+
+#endif /* _UAPI_LINUX_ION_H */
diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh
new file mode 100755
index 000000000..69e676cfc
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ion_test.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+heapsize=4096
+TCID="ion_test.sh"
+errcode=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+run_test()
+{
+ heaptype=$1
+ ./ionapp_export -i $heaptype -s $heapsize &
+ sleep 1
+ ./ionapp_import
+ if [ $? -ne 0 ]; then
+ echo "$TCID: heap_type: $heaptype - [FAIL]"
+ errcode=1
+ else
+ echo "$TCID: heap_type: $heaptype - [PASS]"
+ fi
+ sleep 1
+ echo ""
+}
+
+check_root()
+{
+ uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo $TCID: must be run as root >&2
+ exit $ksft_skip
+ fi
+}
+
+check_device()
+{
+ DEVICE=/dev/ion
+ if [ ! -e $DEVICE ]; then
+ echo $TCID: No $DEVICE device found >&2
+ echo $TCID: May be CONFIG_ION is not set >&2
+ exit $ksft_skip
+ fi
+}
+
+main_function()
+{
+ check_device
+ check_root
+
+ # ION_SYSTEM_HEAP TEST
+ run_test 0
+ # ION_SYSTEM_CONTIG_HEAP TEST
+ run_test 1
+}
+
+main_function
+echo "$TCID: done"
+exit $errcode
diff --git a/tools/testing/selftests/android/ion/ionapp_export.c b/tools/testing/selftests/android/ion/ionapp_export.c
new file mode 100644
index 000000000..b5fa0a2dc
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionapp_export.c
@@ -0,0 +1,136 @@
+/*
+ * ionapp_export.c
+ *
+ * It is a user space utility to create and export android
+ * ion memory buffer fd to another process using unix domain socket as IPC.
+ * This acts like a server for ionapp_import(client).
+ * So, this server has to be started first before the client.
+ *
+ * Copyright (C) 2017 Pintu Kumar <pintu.ping@gmail.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/time.h>
+#include "ionutils.h"
+#include "ipcsocket.h"
+
+
+void print_usage(int argc, char *argv[])
+{
+ printf("Usage: %s [-h <help>] [-i <heap id>] [-s <size in bytes>]\n",
+ argv[0]);
+}
+
+int main(int argc, char *argv[])
+{
+ int opt, ret, status, heapid;
+ int sockfd, client_fd, shared_fd;
+ unsigned char *map_buf;
+ unsigned long map_len, heap_type, heap_size, flags;
+ struct ion_buffer_info info;
+ struct socket_info skinfo;
+
+ if (argc < 2) {
+ print_usage(argc, argv);
+ return -1;
+ }
+
+ heap_size = 0;
+ flags = 0;
+ heap_type = ION_HEAP_TYPE_SYSTEM;
+
+ while ((opt = getopt(argc, argv, "hi:s:")) != -1) {
+ switch (opt) {
+ case 'h':
+ print_usage(argc, argv);
+ exit(0);
+ break;
+ case 'i':
+ heapid = atoi(optarg);
+ switch (heapid) {
+ case 0:
+ heap_type = ION_HEAP_TYPE_SYSTEM;
+ break;
+ case 1:
+ heap_type = ION_HEAP_TYPE_SYSTEM_CONTIG;
+ break;
+ default:
+ printf("ERROR: heap type not supported\n");
+ exit(1);
+ }
+ break;
+ case 's':
+ heap_size = atoi(optarg);
+ break;
+ default:
+ print_usage(argc, argv);
+ exit(1);
+ break;
+ }
+ }
+
+ if (heap_size <= 0) {
+ printf("heap_size cannot be 0\n");
+ print_usage(argc, argv);
+ exit(1);
+ }
+
+ printf("heap_type: %ld, heap_size: %ld\n", heap_type, heap_size);
+ info.heap_type = heap_type;
+ info.heap_size = heap_size;
+ info.flag_type = flags;
+
+ /* This is server: open the socket connection first */
+ /* Here; 1 indicates server or exporter */
+ status = opensocket(&sockfd, SOCKET_NAME, 1);
+ if (status < 0) {
+ fprintf(stderr, "<%s>: Failed opensocket.\n", __func__);
+ goto err_socket;
+ }
+ skinfo.sockfd = sockfd;
+
+ ret = ion_export_buffer_fd(&info);
+ if (ret < 0) {
+ fprintf(stderr, "FAILED: ion_get_buffer_fd\n");
+ goto err_export;
+ }
+ client_fd = info.ionfd;
+ shared_fd = info.buffd;
+ map_buf = info.buffer;
+ map_len = info.buflen;
+ write_buffer(map_buf, map_len);
+
+ /* share ion buf fd with other user process */
+ printf("Sharing fd: %d, Client fd: %d\n", shared_fd, client_fd);
+ skinfo.datafd = shared_fd;
+ skinfo.buflen = map_len;
+
+ ret = socket_send_fd(&skinfo);
+ if (ret < 0) {
+ fprintf(stderr, "FAILED: socket_send_fd\n");
+ goto err_send;
+ }
+
+err_send:
+err_export:
+ ion_close_buffer_fd(&info);
+
+err_socket:
+ closesocket(sockfd, SOCKET_NAME);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/android/ion/ionapp_import.c b/tools/testing/selftests/android/ion/ionapp_import.c
new file mode 100644
index 000000000..ae2d704cf
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionapp_import.c
@@ -0,0 +1,88 @@
+/*
+ * ionapp_import.c
+ *
+ * It is a user space utility to receive android ion memory buffer fd
+ * over unix domain socket IPC that can be exported by ionapp_export.
+ * This acts like a client for ionapp_export.
+ *
+ * Copyright (C) 2017 Pintu Kumar <pintu.ping@gmail.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include "ionutils.h"
+#include "ipcsocket.h"
+
+
+int main(void)
+{
+ int ret, status;
+ int sockfd, shared_fd;
+ unsigned char *map_buf;
+ unsigned long map_len;
+ struct ion_buffer_info info;
+ struct socket_info skinfo;
+
+ /* This is the client part. Here 0 means client or importer */
+ status = opensocket(&sockfd, SOCKET_NAME, 0);
+ if (status < 0) {
+ fprintf(stderr, "No exporter exists...\n");
+ ret = status;
+ goto err_socket;
+ }
+
+ skinfo.sockfd = sockfd;
+
+ ret = socket_receive_fd(&skinfo);
+ if (ret < 0) {
+ fprintf(stderr, "Failed: socket_receive_fd\n");
+ goto err_recv;
+ }
+
+ shared_fd = skinfo.datafd;
+ printf("Received buffer fd: %d\n", shared_fd);
+ if (shared_fd <= 0) {
+ fprintf(stderr, "ERROR: improper buf fd\n");
+ ret = -1;
+ goto err_fd;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.buffd = shared_fd;
+ info.buflen = ION_BUFFER_LEN;
+
+ ret = ion_import_buffer_fd(&info);
+ if (ret < 0) {
+ fprintf(stderr, "Failed: ion_use_buffer_fd\n");
+ goto err_import;
+ }
+
+ map_buf = info.buffer;
+ map_len = info.buflen;
+ read_buffer(map_buf, map_len);
+
+ /* Write probably new data to the same buffer again */
+ map_len = ION_BUFFER_LEN;
+ write_buffer(map_buf, map_len);
+
+err_import:
+ ion_close_buffer_fd(&info);
+err_fd:
+err_recv:
+err_socket:
+ closesocket(sockfd, SOCKET_NAME);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/android/ion/ionmap_test.c b/tools/testing/selftests/android/ion/ionmap_test.c
new file mode 100644
index 000000000..dab36b06b
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionmap_test.c
@@ -0,0 +1,136 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/dma-buf.h>
+
+#include <drm/drm.h>
+
+#include "ion.h"
+#include "ionutils.h"
+
+int check_vgem(int fd)
+{
+ drm_version_t version = { 0 };
+ char name[5];
+ int ret;
+
+ version.name_len = 4;
+ version.name = name;
+
+ ret = ioctl(fd, DRM_IOCTL_VERSION, &version);
+ if (ret)
+ return 1;
+
+ return strcmp(name, "vgem");
+}
+
+int open_vgem(void)
+{
+ int i, fd;
+ const char *drmstr = "/dev/dri/card";
+
+ fd = -1;
+ for (i = 0; i < 16; i++) {
+ char name[80];
+
+ sprintf(name, "%s%u", drmstr, i);
+
+ fd = open(name, O_RDWR);
+ if (fd < 0)
+ continue;
+
+ if (check_vgem(fd)) {
+ close(fd);
+ continue;
+ } else {
+ break;
+ }
+
+ }
+ return fd;
+}
+
+int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle)
+{
+ struct drm_prime_handle import_handle = { 0 };
+ int ret;
+
+ import_handle.fd = dma_buf_fd;
+ import_handle.flags = 0;
+ import_handle.handle = 0;
+
+ ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle);
+ if (ret == 0)
+ *handle = import_handle.handle;
+ return ret;
+}
+
+void close_handle(int vgem_fd, uint32_t handle)
+{
+ struct drm_gem_close close = { 0 };
+
+ close.handle = handle;
+ ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+
+int main()
+{
+ int ret, vgem_fd;
+ struct ion_buffer_info info;
+ uint32_t handle = 0;
+ struct dma_buf_sync sync = { 0 };
+
+ info.heap_type = ION_HEAP_TYPE_SYSTEM;
+ info.heap_size = 4096;
+ info.flag_type = ION_FLAG_CACHED;
+
+ ret = ion_export_buffer_fd(&info);
+ if (ret < 0) {
+ printf("ion buffer alloc failed\n");
+ return -1;
+ }
+
+ vgem_fd = open_vgem();
+ if (vgem_fd < 0) {
+ ret = vgem_fd;
+ printf("Failed to open vgem\n");
+ goto out_ion;
+ }
+
+ ret = import_vgem_fd(vgem_fd, info.buffd, &handle);
+
+ if (ret < 0) {
+ printf("Failed to import buffer\n");
+ goto out_vgem;
+ }
+
+ sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW;
+ ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
+ if (ret)
+ printf("sync start failed %d\n", errno);
+
+ memset(info.buffer, 0xff, 4096);
+
+ sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW;
+ ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
+ if (ret)
+ printf("sync end failed %d\n", errno);
+
+ close_handle(vgem_fd, handle);
+ ret = 0;
+
+out_vgem:
+ close(vgem_fd);
+out_ion:
+ ion_close_buffer_fd(&info);
+ printf("done.\n");
+ return ret;
+}
diff --git a/tools/testing/selftests/android/ion/ionutils.c b/tools/testing/selftests/android/ion/ionutils.c
new file mode 100644
index 000000000..7d1d37c4e
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionutils.c
@@ -0,0 +1,253 @@
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+//#include <stdint.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include "ionutils.h"
+#include "ipcsocket.h"
+
+
+void write_buffer(void *buffer, unsigned long len)
+{
+ int i;
+ unsigned char *ptr = (unsigned char *)buffer;
+
+ if (!ptr) {
+ fprintf(stderr, "<%s>: Invalid buffer...\n", __func__);
+ return;
+ }
+
+ printf("Fill buffer content:\n");
+ memset(ptr, 0xfd, len);
+ for (i = 0; i < len; i++)
+ printf("0x%x ", ptr[i]);
+ printf("\n");
+}
+
+void read_buffer(void *buffer, unsigned long len)
+{
+ int i;
+ unsigned char *ptr = (unsigned char *)buffer;
+
+ if (!ptr) {
+ fprintf(stderr, "<%s>: Invalid buffer...\n", __func__);
+ return;
+ }
+
+ printf("Read buffer content:\n");
+ for (i = 0; i < len; i++)
+ printf("0x%x ", ptr[i]);
+ printf("\n");
+}
+
+int ion_export_buffer_fd(struct ion_buffer_info *ion_info)
+{
+ int i, ret, ionfd, buffer_fd;
+ unsigned int heap_id;
+ unsigned long maplen;
+ unsigned char *map_buffer;
+ struct ion_allocation_data alloc_data;
+ struct ion_heap_query query;
+ struct ion_heap_data heap_data[MAX_HEAP_COUNT];
+
+ if (!ion_info) {
+ fprintf(stderr, "<%s>: Invalid ion info\n", __func__);
+ return -1;
+ }
+
+ /* Create an ION client */
+ ionfd = open(ION_DEVICE, O_RDWR);
+ if (ionfd < 0) {
+ fprintf(stderr, "<%s>: Failed to open ion client: %s\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ memset(&query, 0, sizeof(query));
+ query.cnt = MAX_HEAP_COUNT;
+ query.heaps = (unsigned long int)&heap_data[0];
+ /* Query ION heap_id_mask from ION heap */
+ ret = ioctl(ionfd, ION_IOC_HEAP_QUERY, &query);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed: ION_IOC_HEAP_QUERY: %s\n",
+ __func__, strerror(errno));
+ goto err_query;
+ }
+
+ heap_id = MAX_HEAP_COUNT + 1;
+ for (i = 0; i < query.cnt; i++) {
+ if (heap_data[i].type == ion_info->heap_type) {
+ heap_id = heap_data[i].heap_id;
+ break;
+ }
+ }
+
+ if (heap_id > MAX_HEAP_COUNT) {
+ fprintf(stderr, "<%s>: ERROR: heap type does not exists\n",
+ __func__);
+ goto err_heap;
+ }
+
+ alloc_data.len = ion_info->heap_size;
+ alloc_data.heap_id_mask = 1 << heap_id;
+ alloc_data.flags = ion_info->flag_type;
+
+ /* Allocate memory for this ION client as per heap_type */
+ ret = ioctl(ionfd, ION_IOC_ALLOC, &alloc_data);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed: ION_IOC_ALLOC: %s\n",
+ __func__, strerror(errno));
+ goto err_alloc;
+ }
+
+ /* This will return a valid buffer fd */
+ buffer_fd = alloc_data.fd;
+ maplen = alloc_data.len;
+
+ if (buffer_fd < 0 || maplen <= 0) {
+ fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n",
+ __func__, buffer_fd, maplen);
+ goto err_fd_data;
+ }
+
+ /* Create memory mapped buffer for the buffer fd */
+ map_buffer = (unsigned char *)mmap(NULL, maplen, PROT_READ|PROT_WRITE,
+ MAP_SHARED, buffer_fd, 0);
+ if (map_buffer == MAP_FAILED) {
+ fprintf(stderr, "<%s>: Failed: mmap: %s\n",
+ __func__, strerror(errno));
+ goto err_mmap;
+ }
+
+ ion_info->ionfd = ionfd;
+ ion_info->buffd = buffer_fd;
+ ion_info->buffer = map_buffer;
+ ion_info->buflen = maplen;
+
+ return 0;
+
+ munmap(map_buffer, maplen);
+
+err_fd_data:
+err_mmap:
+ /* in case of error: close the buffer fd */
+ if (buffer_fd)
+ close(buffer_fd);
+
+err_query:
+err_heap:
+err_alloc:
+ /* In case of error: close the ion client fd */
+ if (ionfd)
+ close(ionfd);
+
+ return -1;
+}
+
+int ion_import_buffer_fd(struct ion_buffer_info *ion_info)
+{
+ int buffd;
+ unsigned char *map_buf;
+ unsigned long map_len;
+
+ if (!ion_info) {
+ fprintf(stderr, "<%s>: Invalid ion info\n", __func__);
+ return -1;
+ }
+
+ map_len = ion_info->buflen;
+ buffd = ion_info->buffd;
+
+ if (buffd < 0 || map_len <= 0) {
+ fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n",
+ __func__, buffd, map_len);
+ goto err_buffd;
+ }
+
+ map_buf = (unsigned char *)mmap(NULL, map_len, PROT_READ|PROT_WRITE,
+ MAP_SHARED, buffd, 0);
+ if (map_buf == MAP_FAILED) {
+ printf("<%s>: Failed - mmap: %s\n",
+ __func__, strerror(errno));
+ goto err_mmap;
+ }
+
+ ion_info->buffer = map_buf;
+ ion_info->buflen = map_len;
+
+ return 0;
+
+err_mmap:
+ if (buffd)
+ close(buffd);
+
+err_buffd:
+ return -1;
+}
+
+void ion_close_buffer_fd(struct ion_buffer_info *ion_info)
+{
+ if (ion_info) {
+ /* unmap the buffer properly in the end */
+ munmap(ion_info->buffer, ion_info->buflen);
+ /* close the buffer fd */
+ if (ion_info->buffd > 0)
+ close(ion_info->buffd);
+ /* Finally, close the client fd */
+ if (ion_info->ionfd > 0)
+ close(ion_info->ionfd);
+ }
+}
+
+int socket_send_fd(struct socket_info *info)
+{
+ int status;
+ int fd, sockfd;
+ struct socketdata skdata;
+
+ if (!info) {
+ fprintf(stderr, "<%s>: Invalid socket info\n", __func__);
+ return -1;
+ }
+
+ sockfd = info->sockfd;
+ fd = info->datafd;
+ memset(&skdata, 0, sizeof(skdata));
+ skdata.data = fd;
+ skdata.len = sizeof(skdata.data);
+ status = sendtosocket(sockfd, &skdata);
+ if (status < 0) {
+ fprintf(stderr, "<%s>: Failed: sendtosocket\n", __func__);
+ return -1;
+ }
+
+ return 0;
+}
+
+int socket_receive_fd(struct socket_info *info)
+{
+ int status;
+ int fd, sockfd;
+ struct socketdata skdata;
+
+ if (!info) {
+ fprintf(stderr, "<%s>: Invalid socket info\n", __func__);
+ return -1;
+ }
+
+ sockfd = info->sockfd;
+ memset(&skdata, 0, sizeof(skdata));
+ status = receivefromsocket(sockfd, &skdata);
+ if (status < 0) {
+ fprintf(stderr, "<%s>: Failed: receivefromsocket\n", __func__);
+ return -1;
+ }
+
+ fd = (int)skdata.data;
+ info->datafd = fd;
+
+ return status;
+}
diff --git a/tools/testing/selftests/android/ion/ionutils.h b/tools/testing/selftests/android/ion/ionutils.h
new file mode 100644
index 000000000..9941eb858
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ionutils.h
@@ -0,0 +1,55 @@
+#ifndef __ION_UTILS_H
+#define __ION_UTILS_H
+
+#include "ion.h"
+
+#define SOCKET_NAME "ion_socket"
+#define ION_DEVICE "/dev/ion"
+
+#define ION_BUFFER_LEN 4096
+#define MAX_HEAP_COUNT ION_HEAP_TYPE_CUSTOM
+
+struct socket_info {
+ int sockfd;
+ int datafd;
+ unsigned long buflen;
+};
+
+struct ion_buffer_info {
+ int ionfd;
+ int buffd;
+ unsigned int heap_type;
+ unsigned int flag_type;
+ unsigned long heap_size;
+ unsigned long buflen;
+ unsigned char *buffer;
+};
+
+
+/* This is used to fill the data into the mapped buffer */
+void write_buffer(void *buffer, unsigned long len);
+
+/* This is used to read the data from the exported buffer */
+void read_buffer(void *buffer, unsigned long len);
+
+/* This is used to create an ION buffer FD for the kernel buffer
+ * So you can export this same buffer to others in the form of FD
+ */
+int ion_export_buffer_fd(struct ion_buffer_info *ion_info);
+
+/* This is used to import or map an exported FD.
+ * So we point to same buffer without making a copy. Hence zero-copy.
+ */
+int ion_import_buffer_fd(struct ion_buffer_info *ion_info);
+
+/* This is used to close all references for the ION client */
+void ion_close_buffer_fd(struct ion_buffer_info *ion_info);
+
+/* This is used to send FD to another process using socket IPC */
+int socket_send_fd(struct socket_info *skinfo);
+
+/* This is used to receive FD from another process using socket IPC */
+int socket_receive_fd(struct socket_info *skinfo);
+
+
+#endif
diff --git a/tools/testing/selftests/android/ion/ipcsocket.c b/tools/testing/selftests/android/ion/ipcsocket.c
new file mode 100644
index 000000000..7dc521002
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ipcsocket.c
@@ -0,0 +1,227 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/un.h>
+#include <errno.h>
+
+#include "ipcsocket.h"
+
+
+int opensocket(int *sockfd, const char *name, int connecttype)
+{
+ int ret, temp = 1;
+
+ if (!name || strlen(name) > MAX_SOCK_NAME_LEN) {
+ fprintf(stderr, "<%s>: Invalid socket name.\n", __func__);
+ return -1;
+ }
+
+ ret = socket(PF_LOCAL, SOCK_STREAM, 0);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed socket: <%s>\n",
+ __func__, strerror(errno));
+ return ret;
+ }
+
+ *sockfd = ret;
+ if (setsockopt(*sockfd, SOL_SOCKET, SO_REUSEADDR,
+ (char *)&temp, sizeof(int)) < 0) {
+ fprintf(stderr, "<%s>: Failed setsockopt: <%s>\n",
+ __func__, strerror(errno));
+ goto err;
+ }
+
+ sprintf(sock_name, "/tmp/%s", name);
+
+ if (connecttype == 1) {
+ /* This is for Server connection */
+ struct sockaddr_un skaddr;
+ int clientfd;
+ socklen_t sklen;
+
+ unlink(sock_name);
+ memset(&skaddr, 0, sizeof(skaddr));
+ skaddr.sun_family = AF_LOCAL;
+ strcpy(skaddr.sun_path, sock_name);
+
+ ret = bind(*sockfd, (struct sockaddr *)&skaddr,
+ SUN_LEN(&skaddr));
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed bind: <%s>\n",
+ __func__, strerror(errno));
+ goto err;
+ }
+
+ ret = listen(*sockfd, 5);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed listen: <%s>\n",
+ __func__, strerror(errno));
+ goto err;
+ }
+
+ memset(&skaddr, 0, sizeof(skaddr));
+ sklen = sizeof(skaddr);
+
+ ret = accept(*sockfd, (struct sockaddr *)&skaddr,
+ (socklen_t *)&sklen);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed accept: <%s>\n",
+ __func__, strerror(errno));
+ goto err;
+ }
+
+ clientfd = ret;
+ *sockfd = clientfd;
+ } else {
+ /* This is for client connection */
+ struct sockaddr_un skaddr;
+
+ memset(&skaddr, 0, sizeof(skaddr));
+ skaddr.sun_family = AF_LOCAL;
+ strcpy(skaddr.sun_path, sock_name);
+
+ ret = connect(*sockfd, (struct sockaddr *)&skaddr,
+ SUN_LEN(&skaddr));
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed connect: <%s>\n",
+ __func__, strerror(errno));
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ if (*sockfd)
+ close(*sockfd);
+
+ return ret;
+}
+
+int sendtosocket(int sockfd, struct socketdata *skdata)
+{
+ int ret, buffd;
+ unsigned int len;
+ char cmsg_b[CMSG_SPACE(sizeof(int))];
+ struct cmsghdr *cmsg;
+ struct msghdr msgh;
+ struct iovec iov;
+ struct timeval timeout;
+ fd_set selFDs;
+
+ if (!skdata) {
+ fprintf(stderr, "<%s>: socketdata is NULL\n", __func__);
+ return -1;
+ }
+
+ FD_ZERO(&selFDs);
+ FD_SET(0, &selFDs);
+ FD_SET(sockfd, &selFDs);
+ timeout.tv_sec = 20;
+ timeout.tv_usec = 0;
+
+ ret = select(sockfd+1, NULL, &selFDs, NULL, &timeout);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed select: <%s>\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ if (FD_ISSET(sockfd, &selFDs)) {
+ buffd = skdata->data;
+ len = skdata->len;
+ memset(&msgh, 0, sizeof(msgh));
+ msgh.msg_control = &cmsg_b;
+ msgh.msg_controllen = CMSG_LEN(len);
+ iov.iov_base = "OK";
+ iov.iov_len = 2;
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(len);
+ memcpy(CMSG_DATA(cmsg), &buffd, len);
+
+ ret = sendmsg(sockfd, &msgh, MSG_DONTWAIT);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed sendmsg: <%s>\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int receivefromsocket(int sockfd, struct socketdata *skdata)
+{
+ int ret, buffd;
+ unsigned int len = 0;
+ char cmsg_b[CMSG_SPACE(sizeof(int))];
+ struct cmsghdr *cmsg;
+ struct msghdr msgh;
+ struct iovec iov;
+ fd_set recvFDs;
+ char data[32];
+
+ if (!skdata) {
+ fprintf(stderr, "<%s>: socketdata is NULL\n", __func__);
+ return -1;
+ }
+
+ FD_ZERO(&recvFDs);
+ FD_SET(0, &recvFDs);
+ FD_SET(sockfd, &recvFDs);
+
+ ret = select(sockfd+1, &recvFDs, NULL, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed select: <%s>\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ if (FD_ISSET(sockfd, &recvFDs)) {
+ len = sizeof(buffd);
+ memset(&msgh, 0, sizeof(msgh));
+ msgh.msg_control = &cmsg_b;
+ msgh.msg_controllen = CMSG_LEN(len);
+ iov.iov_base = data;
+ iov.iov_len = sizeof(data)-1;
+ msgh.msg_iov = &iov;
+ msgh.msg_iovlen = 1;
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(len);
+
+ ret = recvmsg(sockfd, &msgh, MSG_DONTWAIT);
+ if (ret < 0) {
+ fprintf(stderr, "<%s>: Failed recvmsg: <%s>\n",
+ __func__, strerror(errno));
+ return -1;
+ }
+
+ memcpy(&buffd, CMSG_DATA(cmsg), len);
+ skdata->data = buffd;
+ skdata->len = len;
+ }
+ return 0;
+}
+
+int closesocket(int sockfd, char *name)
+{
+ char sockname[MAX_SOCK_NAME_LEN];
+
+ if (sockfd)
+ close(sockfd);
+ sprintf(sockname, "/tmp/%s", name);
+ unlink(sockname);
+ shutdown(sockfd, 2);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/android/ion/ipcsocket.h b/tools/testing/selftests/android/ion/ipcsocket.h
new file mode 100644
index 000000000..b3e84498a
--- /dev/null
+++ b/tools/testing/selftests/android/ion/ipcsocket.h
@@ -0,0 +1,35 @@
+
+#ifndef _IPCSOCKET_H
+#define _IPCSOCKET_H
+
+
+#define MAX_SOCK_NAME_LEN 64
+
+char sock_name[MAX_SOCK_NAME_LEN];
+
+/* This structure is responsible for holding the IPC data
+ * data: hold the buffer fd
+ * len: just the length of 32-bit integer fd
+ */
+struct socketdata {
+ int data;
+ unsigned int len;
+};
+
+/* This API is used to open the IPC socket connection
+ * name: implies a unique socket name in the system
+ * connecttype: implies server(0) or client(1)
+ */
+int opensocket(int *sockfd, const char *name, int connecttype);
+
+/* This is the API to send socket data over IPC socket */
+int sendtosocket(int sockfd, struct socketdata *data);
+
+/* This is the API to receive socket data over IPC socket */
+int receivefromsocket(int sockfd, struct socketdata *data);
+
+/* This is the API to close the socket connection */
+int closesocket(int sockfd, char *name);
+
+
+#endif
diff --git a/tools/testing/selftests/android/run.sh b/tools/testing/selftests/android/run.sh
new file mode 100755
index 000000000..dd8edf291
--- /dev/null
+++ b/tools/testing/selftests/android/run.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+(cd ion; ./ion_test.sh)
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
new file mode 100644
index 000000000..49938d72c
--- /dev/null
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -0,0 +1,21 @@
+test_verifier
+test_maps
+test_lru_map
+test_lpm_map
+test_tag
+FEATURE-DUMP.libbpf
+fixdep
+test_align
+test_dev_cgroup
+test_progs
+test_tcpbpf_user
+test_verifier_log
+feature
+test_libbpf_open
+test_sock
+test_sock_addr
+urandom_read
+test_btf
+test_sockmap
+test_lirc_mode2_user
+get_cgroup_id_user
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
new file mode 100644
index 000000000..f3f874ba1
--- /dev/null
+++ b/tools/testing/selftests/bpf/Makefile
@@ -0,0 +1,145 @@
+# SPDX-License-Identifier: GPL-2.0
+
+LIBDIR := ../../../lib
+BPFDIR := $(LIBDIR)/bpf
+APIDIR := ../../../include/uapi
+GENDIR := ../../../../include/generated
+GENHDR := $(GENDIR)/autoconf.h
+
+ifneq ($(wildcard $(GENHDR)),)
+ GENFLAGS := -DHAVE_GENHDR
+endif
+
+CFLAGS += -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include
+LDLIBS += -lcap -lelf -lrt -lpthread
+
+TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
+all: $(TEST_CUSTOM_PROGS)
+
+$(TEST_CUSTOM_PROGS): $(OUTPUT)/%: %.c
+ $(CC) -o $(TEST_CUSTOM_PROGS) -static $< -Wl,--build-id
+
+# Order correspond to 'make run_tests' order
+TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
+ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
+ test_sock test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user \
+ test_socket_cookie test_cgroup_storage test_select_reuseport
+
+TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \
+ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \
+ sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o test_tracepoint.o \
+ test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
+ sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
+ sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
+ test_btf_haskv.o test_btf_nokv.o test_sockmap_kern.o test_tunnel_kern.o \
+ test_get_stack_rawtp.o test_sockmap_kern.o test_sockhash_kern.o \
+ test_lwt_seg6local.o sendmsg4_prog.o sendmsg6_prog.o test_lirc_mode2_kern.o \
+ get_cgroup_id_kern.o socket_cookie_prog.o test_select_reuseport_kern.o \
+ test_skb_cgroup_id_kern.o
+
+# Order correspond to 'make run_tests' order
+TEST_PROGS := test_kmod.sh \
+ test_libbpf.sh \
+ test_xdp_redirect.sh \
+ test_xdp_meta.sh \
+ test_offload.py \
+ test_sock_addr.sh \
+ test_tunnel.sh \
+ test_lwt_seg6local.sh \
+ test_lirc_mode2.sh \
+ test_skb_cgroup_id.sh
+
+# Compile but not part of 'make run_tests'
+TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user
+
+include ../lib.mk
+
+BPFOBJ := $(OUTPUT)/libbpf.a
+
+$(TEST_GEN_PROGS): $(BPFOBJ)
+
+$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
+
+$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
+$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
+$(OUTPUT)/test_sock: cgroup_helpers.c
+$(OUTPUT)/test_sock_addr: cgroup_helpers.c
+$(OUTPUT)/test_socket_cookie: cgroup_helpers.c
+$(OUTPUT)/test_sockmap: cgroup_helpers.c
+$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
+$(OUTPUT)/test_progs: trace_helpers.c
+$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
+$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
+
+.PHONY: force
+
+# force a rebuild of BPFOBJ when its dependencies are updated
+force:
+
+$(BPFOBJ): force
+ $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/
+
+CLANG ?= clang
+LLC ?= llc
+LLVM_OBJCOPY ?= llvm-objcopy
+BTF_PAHOLE ?= pahole
+
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+
+# Let newer LLVM versions transparently probe the kernel for availability
+# of full BPF instruction set.
+ifeq ($(PROBE),)
+ CPU ?= probe
+else
+ CPU ?= generic
+endif
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
+CLANG_FLAGS = -I. -I./include/uapi -I../../../include/uapi \
+ $(CLANG_SYS_INCLUDES) \
+ -Wno-compare-distinct-pointer-types
+
+$(OUTPUT)/test_l4lb_noinline.o: CLANG_FLAGS += -fno-inline
+$(OUTPUT)/test_xdp_noinline.o: CLANG_FLAGS += -fno-inline
+
+BTF_LLC_PROBE := $(shell $(LLC) -march=bpf -mattr=help 2>&1 | grep dwarfris)
+BTF_PAHOLE_PROBE := $(shell $(BTF_PAHOLE) --help 2>&1 | grep BTF)
+BTF_OBJCOPY_PROBE := $(shell $(LLVM_OBJCOPY) --help 2>&1 | grep -i 'usage.*llvm')
+
+ifneq ($(BTF_LLC_PROBE),)
+ifneq ($(BTF_PAHOLE_PROBE),)
+ifneq ($(BTF_OBJCOPY_PROBE),)
+ CLANG_FLAGS += -g
+ LLC_FLAGS += -mattr=dwarfris
+ DWARF2BTF = y
+endif
+endif
+endif
+
+# Have one program compiled without "-target bpf" to test whether libbpf loads
+# it successfully
+$(OUTPUT)/test_xdp.o: test_xdp.c
+ $(CLANG) $(CLANG_FLAGS) \
+ -O2 -emit-llvm -c $< -o - | \
+ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+ $(BTF_PAHOLE) -J $@
+endif
+
+$(OUTPUT)/%.o: %.c
+ $(CLANG) $(CLANG_FLAGS) \
+ -O2 -target bpf -emit-llvm -c $< -o - | \
+ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
+ifeq ($(DWARF2BTF),y)
+ $(BTF_PAHOLE) -J $@
+endif
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
diff --git a/tools/testing/selftests/bpf/bpf_endian.h b/tools/testing/selftests/bpf/bpf_endian.h
new file mode 100644
index 000000000..b25595ea4
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_endian.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_ENDIAN__
+#define __BPF_ENDIAN__
+
+#include <linux/swab.h>
+
+/* LLVM's BPF target selects the endianness of the CPU
+ * it compiles on, or the user specifies (bpfel/bpfeb),
+ * respectively. The used __BYTE_ORDER__ is defined by
+ * the compiler, we cannot rely on __BYTE_ORDER from
+ * libc headers, since it doesn't reflect the actual
+ * requested byte order.
+ *
+ * Note, LLVM's BPF target has different __builtin_bswapX()
+ * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE
+ * in bpfel and bpfeb case, which means below, that we map
+ * to cpu_to_be16(). We could use it unconditionally in BPF
+ * case, but better not rely on it, so that this header here
+ * can be used from application and BPF program side, which
+ * use different targets.
+ */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+# define __bpf_ntohs(x) __builtin_bswap16(x)
+# define __bpf_htons(x) __builtin_bswap16(x)
+# define __bpf_constant_ntohs(x) ___constant_swab16(x)
+# define __bpf_constant_htons(x) ___constant_swab16(x)
+# define __bpf_ntohl(x) __builtin_bswap32(x)
+# define __bpf_htonl(x) __builtin_bswap32(x)
+# define __bpf_constant_ntohl(x) ___constant_swab32(x)
+# define __bpf_constant_htonl(x) ___constant_swab32(x)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+# define __bpf_ntohs(x) (x)
+# define __bpf_htons(x) (x)
+# define __bpf_constant_ntohs(x) (x)
+# define __bpf_constant_htons(x) (x)
+# define __bpf_ntohl(x) (x)
+# define __bpf_htonl(x) (x)
+# define __bpf_constant_ntohl(x) (x)
+# define __bpf_constant_htonl(x) (x)
+#else
+# error "Fix your compiler's __BYTE_ORDER__?!"
+#endif
+
+#define bpf_htons(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_htons(x) : __bpf_htons(x))
+#define bpf_ntohs(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_ntohs(x) : __bpf_ntohs(x))
+#define bpf_htonl(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_htonl(x) : __bpf_htonl(x))
+#define bpf_ntohl(x) \
+ (__builtin_constant_p(x) ? \
+ __bpf_constant_ntohl(x) : __bpf_ntohl(x))
+
+#endif /* __BPF_ENDIAN__ */
diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h
new file mode 100644
index 000000000..e4be77302
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_helpers.h
@@ -0,0 +1,336 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_HELPERS_H
+#define __BPF_HELPERS_H
+
+/* helper macro to place programs, maps, license in
+ * different sections in elf_bpf file. Section names
+ * are interpreted by elf_bpf loader
+ */
+#define SEC(NAME) __attribute__((section(NAME), used))
+
+/* helper functions called from eBPF programs written in C */
+static void *(*bpf_map_lookup_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_lookup_elem;
+static int (*bpf_map_update_elem)(void *map, void *key, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_map_update_elem;
+static int (*bpf_map_delete_elem)(void *map, void *key) =
+ (void *) BPF_FUNC_map_delete_elem;
+static int (*bpf_probe_read)(void *dst, int size, void *unsafe_ptr) =
+ (void *) BPF_FUNC_probe_read;
+static unsigned long long (*bpf_ktime_get_ns)(void) =
+ (void *) BPF_FUNC_ktime_get_ns;
+static int (*bpf_trace_printk)(const char *fmt, int fmt_size, ...) =
+ (void *) BPF_FUNC_trace_printk;
+static void (*bpf_tail_call)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_tail_call;
+static unsigned long long (*bpf_get_smp_processor_id)(void) =
+ (void *) BPF_FUNC_get_smp_processor_id;
+static unsigned long long (*bpf_get_current_pid_tgid)(void) =
+ (void *) BPF_FUNC_get_current_pid_tgid;
+static unsigned long long (*bpf_get_current_uid_gid)(void) =
+ (void *) BPF_FUNC_get_current_uid_gid;
+static int (*bpf_get_current_comm)(void *buf, int buf_size) =
+ (void *) BPF_FUNC_get_current_comm;
+static unsigned long long (*bpf_perf_event_read)(void *map,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_perf_event_read;
+static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) =
+ (void *) BPF_FUNC_clone_redirect;
+static int (*bpf_redirect)(int ifindex, int flags) =
+ (void *) BPF_FUNC_redirect;
+static int (*bpf_redirect_map)(void *map, int key, int flags) =
+ (void *) BPF_FUNC_redirect_map;
+static int (*bpf_perf_event_output)(void *ctx, void *map,
+ unsigned long long flags, void *data,
+ int size) =
+ (void *) BPF_FUNC_perf_event_output;
+static int (*bpf_get_stackid)(void *ctx, void *map, int flags) =
+ (void *) BPF_FUNC_get_stackid;
+static int (*bpf_probe_write_user)(void *dst, void *src, int size) =
+ (void *) BPF_FUNC_probe_write_user;
+static int (*bpf_current_task_under_cgroup)(void *map, int index) =
+ (void *) BPF_FUNC_current_task_under_cgroup;
+static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) =
+ (void *) BPF_FUNC_skb_get_tunnel_key;
+static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) =
+ (void *) BPF_FUNC_skb_set_tunnel_key;
+static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) =
+ (void *) BPF_FUNC_skb_get_tunnel_opt;
+static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
+ (void *) BPF_FUNC_skb_set_tunnel_opt;
+static unsigned long long (*bpf_get_prandom_u32)(void) =
+ (void *) BPF_FUNC_get_prandom_u32;
+static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
+ (void *) BPF_FUNC_xdp_adjust_head;
+static int (*bpf_xdp_adjust_meta)(void *ctx, int offset) =
+ (void *) BPF_FUNC_xdp_adjust_meta;
+static int (*bpf_get_socket_cookie)(void *ctx) =
+ (void *) BPF_FUNC_get_socket_cookie;
+static int (*bpf_setsockopt)(void *ctx, int level, int optname, void *optval,
+ int optlen) =
+ (void *) BPF_FUNC_setsockopt;
+static int (*bpf_getsockopt)(void *ctx, int level, int optname, void *optval,
+ int optlen) =
+ (void *) BPF_FUNC_getsockopt;
+static int (*bpf_sock_ops_cb_flags_set)(void *ctx, int flags) =
+ (void *) BPF_FUNC_sock_ops_cb_flags_set;
+static int (*bpf_sk_redirect_map)(void *ctx, void *map, int key, int flags) =
+ (void *) BPF_FUNC_sk_redirect_map;
+static int (*bpf_sk_redirect_hash)(void *ctx, void *map, void *key, int flags) =
+ (void *) BPF_FUNC_sk_redirect_hash;
+static int (*bpf_sock_map_update)(void *map, void *key, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_sock_map_update;
+static int (*bpf_sock_hash_update)(void *map, void *key, void *value,
+ unsigned long long flags) =
+ (void *) BPF_FUNC_sock_hash_update;
+static int (*bpf_perf_event_read_value)(void *map, unsigned long long flags,
+ void *buf, unsigned int buf_size) =
+ (void *) BPF_FUNC_perf_event_read_value;
+static int (*bpf_perf_prog_read_value)(void *ctx, void *buf,
+ unsigned int buf_size) =
+ (void *) BPF_FUNC_perf_prog_read_value;
+static int (*bpf_override_return)(void *ctx, unsigned long rc) =
+ (void *) BPF_FUNC_override_return;
+static int (*bpf_msg_redirect_map)(void *ctx, void *map, int key, int flags) =
+ (void *) BPF_FUNC_msg_redirect_map;
+static int (*bpf_msg_redirect_hash)(void *ctx,
+ void *map, void *key, int flags) =
+ (void *) BPF_FUNC_msg_redirect_hash;
+static int (*bpf_msg_apply_bytes)(void *ctx, int len) =
+ (void *) BPF_FUNC_msg_apply_bytes;
+static int (*bpf_msg_cork_bytes)(void *ctx, int len) =
+ (void *) BPF_FUNC_msg_cork_bytes;
+static int (*bpf_msg_pull_data)(void *ctx, int start, int end, int flags) =
+ (void *) BPF_FUNC_msg_pull_data;
+static int (*bpf_bind)(void *ctx, void *addr, int addr_len) =
+ (void *) BPF_FUNC_bind;
+static int (*bpf_xdp_adjust_tail)(void *ctx, int offset) =
+ (void *) BPF_FUNC_xdp_adjust_tail;
+static int (*bpf_skb_get_xfrm_state)(void *ctx, int index, void *state,
+ int size, int flags) =
+ (void *) BPF_FUNC_skb_get_xfrm_state;
+static int (*bpf_sk_select_reuseport)(void *ctx, void *map, void *key, __u32 flags) =
+ (void *) BPF_FUNC_sk_select_reuseport;
+static int (*bpf_get_stack)(void *ctx, void *buf, int size, int flags) =
+ (void *) BPF_FUNC_get_stack;
+static int (*bpf_fib_lookup)(void *ctx, struct bpf_fib_lookup *params,
+ int plen, __u32 flags) =
+ (void *) BPF_FUNC_fib_lookup;
+static int (*bpf_lwt_push_encap)(void *ctx, unsigned int type, void *hdr,
+ unsigned int len) =
+ (void *) BPF_FUNC_lwt_push_encap;
+static int (*bpf_lwt_seg6_store_bytes)(void *ctx, unsigned int offset,
+ void *from, unsigned int len) =
+ (void *) BPF_FUNC_lwt_seg6_store_bytes;
+static int (*bpf_lwt_seg6_action)(void *ctx, unsigned int action, void *param,
+ unsigned int param_len) =
+ (void *) BPF_FUNC_lwt_seg6_action;
+static int (*bpf_lwt_seg6_adjust_srh)(void *ctx, unsigned int offset,
+ unsigned int len) =
+ (void *) BPF_FUNC_lwt_seg6_adjust_srh;
+static int (*bpf_rc_repeat)(void *ctx) =
+ (void *) BPF_FUNC_rc_repeat;
+static int (*bpf_rc_keydown)(void *ctx, unsigned int protocol,
+ unsigned long long scancode, unsigned int toggle) =
+ (void *) BPF_FUNC_rc_keydown;
+static unsigned long long (*bpf_get_current_cgroup_id)(void) =
+ (void *) BPF_FUNC_get_current_cgroup_id;
+static void *(*bpf_get_local_storage)(void *map, unsigned long long flags) =
+ (void *) BPF_FUNC_get_local_storage;
+static unsigned long long (*bpf_skb_cgroup_id)(void *ctx) =
+ (void *) BPF_FUNC_skb_cgroup_id;
+static unsigned long long (*bpf_skb_ancestor_cgroup_id)(void *ctx, int level) =
+ (void *) BPF_FUNC_skb_ancestor_cgroup_id;
+
+/* llvm builtin functions that eBPF C program may use to
+ * emit BPF_LD_ABS and BPF_LD_IND instructions
+ */
+struct sk_buff;
+unsigned long long load_byte(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb,
+ unsigned long long off) asm("llvm.bpf.load.word");
+
+/* a helper structure used by eBPF C program
+ * to describe map attributes to elf_bpf loader
+ */
+struct bpf_map_def {
+ unsigned int type;
+ unsigned int key_size;
+ unsigned int value_size;
+ unsigned int max_entries;
+ unsigned int map_flags;
+ unsigned int inner_map_idx;
+ unsigned int numa_node;
+};
+
+#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \
+ struct ____btf_map_##name { \
+ type_key key; \
+ type_val value; \
+ }; \
+ struct ____btf_map_##name \
+ __attribute__ ((section(".maps." #name), used)) \
+ ____btf_map_##name = { }
+
+static int (*bpf_skb_load_bytes)(void *ctx, int off, void *to, int len) =
+ (void *) BPF_FUNC_skb_load_bytes;
+static int (*bpf_skb_load_bytes_relative)(void *ctx, int off, void *to, int len, __u32 start_header) =
+ (void *) BPF_FUNC_skb_load_bytes_relative;
+static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
+ (void *) BPF_FUNC_skb_store_bytes;
+static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+ (void *) BPF_FUNC_l3_csum_replace;
+static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+ (void *) BPF_FUNC_l4_csum_replace;
+static int (*bpf_csum_diff)(void *from, int from_size, void *to, int to_size, int seed) =
+ (void *) BPF_FUNC_csum_diff;
+static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
+ (void *) BPF_FUNC_skb_under_cgroup;
+static int (*bpf_skb_change_head)(void *, int len, int flags) =
+ (void *) BPF_FUNC_skb_change_head;
+static int (*bpf_skb_pull_data)(void *, int len) =
+ (void *) BPF_FUNC_skb_pull_data;
+
+/* Scan the ARCH passed in from ARCH env variable (see Makefile) */
+#if defined(__TARGET_ARCH_x86)
+ #define bpf_target_x86
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_s930x)
+ #define bpf_target_s930x
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_arm64)
+ #define bpf_target_arm64
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_mips)
+ #define bpf_target_mips
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_powerpc)
+ #define bpf_target_powerpc
+ #define bpf_target_defined
+#elif defined(__TARGET_ARCH_sparc)
+ #define bpf_target_sparc
+ #define bpf_target_defined
+#else
+ #undef bpf_target_defined
+#endif
+
+/* Fall back to what the compiler says */
+#ifndef bpf_target_defined
+#if defined(__x86_64__)
+ #define bpf_target_x86
+#elif defined(__s390x__)
+ #define bpf_target_s930x
+#elif defined(__aarch64__)
+ #define bpf_target_arm64
+#elif defined(__mips__)
+ #define bpf_target_mips
+#elif defined(__powerpc__)
+ #define bpf_target_powerpc
+#elif defined(__sparc__)
+ #define bpf_target_sparc
+#endif
+#endif
+
+#if defined(bpf_target_x86)
+
+#define PT_REGS_PARM1(x) ((x)->di)
+#define PT_REGS_PARM2(x) ((x)->si)
+#define PT_REGS_PARM3(x) ((x)->dx)
+#define PT_REGS_PARM4(x) ((x)->cx)
+#define PT_REGS_PARM5(x) ((x)->r8)
+#define PT_REGS_RET(x) ((x)->sp)
+#define PT_REGS_FP(x) ((x)->bp)
+#define PT_REGS_RC(x) ((x)->ax)
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->ip)
+
+#elif defined(bpf_target_s390x)
+
+#define PT_REGS_PARM1(x) ((x)->gprs[2])
+#define PT_REGS_PARM2(x) ((x)->gprs[3])
+#define PT_REGS_PARM3(x) ((x)->gprs[4])
+#define PT_REGS_PARM4(x) ((x)->gprs[5])
+#define PT_REGS_PARM5(x) ((x)->gprs[6])
+#define PT_REGS_RET(x) ((x)->gprs[14])
+#define PT_REGS_FP(x) ((x)->gprs[11]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->gprs[2])
+#define PT_REGS_SP(x) ((x)->gprs[15])
+#define PT_REGS_IP(x) ((x)->psw.addr)
+
+#elif defined(bpf_target_arm64)
+
+#define PT_REGS_PARM1(x) ((x)->regs[0])
+#define PT_REGS_PARM2(x) ((x)->regs[1])
+#define PT_REGS_PARM3(x) ((x)->regs[2])
+#define PT_REGS_PARM4(x) ((x)->regs[3])
+#define PT_REGS_PARM5(x) ((x)->regs[4])
+#define PT_REGS_RET(x) ((x)->regs[30])
+#define PT_REGS_FP(x) ((x)->regs[29]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[0])
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->pc)
+
+#elif defined(bpf_target_mips)
+
+#define PT_REGS_PARM1(x) ((x)->regs[4])
+#define PT_REGS_PARM2(x) ((x)->regs[5])
+#define PT_REGS_PARM3(x) ((x)->regs[6])
+#define PT_REGS_PARM4(x) ((x)->regs[7])
+#define PT_REGS_PARM5(x) ((x)->regs[8])
+#define PT_REGS_RET(x) ((x)->regs[31])
+#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */
+#define PT_REGS_RC(x) ((x)->regs[1])
+#define PT_REGS_SP(x) ((x)->regs[29])
+#define PT_REGS_IP(x) ((x)->cp0_epc)
+
+#elif defined(bpf_target_powerpc)
+
+#define PT_REGS_PARM1(x) ((x)->gpr[3])
+#define PT_REGS_PARM2(x) ((x)->gpr[4])
+#define PT_REGS_PARM3(x) ((x)->gpr[5])
+#define PT_REGS_PARM4(x) ((x)->gpr[6])
+#define PT_REGS_PARM5(x) ((x)->gpr[7])
+#define PT_REGS_RC(x) ((x)->gpr[3])
+#define PT_REGS_SP(x) ((x)->sp)
+#define PT_REGS_IP(x) ((x)->nip)
+
+#elif defined(bpf_target_sparc)
+
+#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1])
+#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2])
+#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3])
+#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4])
+#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7])
+#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
+#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
+
+/* Should this also be a bpf_target check for the sparc case? */
+#if defined(__arch64__)
+#define PT_REGS_IP(x) ((x)->tpc)
+#else
+#define PT_REGS_IP(x) ((x)->pc)
+#endif
+
+#endif
+
+#ifdef bpf_target_powerpc
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = (ctx)->link; })
+#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
+#elif bpf_target_sparc
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ (ip) = PT_REGS_RET(ctx); })
+#define BPF_KRETPROBE_READ_RET_IP BPF_KPROBE_READ_RET_IP
+#else
+#define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ \
+ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })
+#define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ \
+ bpf_probe_read(&(ip), sizeof(ip), \
+ (void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_rand.h b/tools/testing/selftests/bpf/bpf_rand.h
new file mode 100644
index 000000000..59bf3e1a9
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_rand.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_RAND__
+#define __BPF_RAND__
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <time.h>
+
+static inline uint64_t bpf_rand_mask(uint64_t mask)
+{
+ return (((uint64_t)(uint32_t)rand()) |
+ ((uint64_t)(uint32_t)rand() << 32)) & mask;
+}
+
+#define bpf_rand_ux(x, m) \
+static inline uint64_t bpf_rand_u##x(int shift) \
+{ \
+ return bpf_rand_mask((m)) << shift; \
+}
+
+bpf_rand_ux( 8, 0xffULL)
+bpf_rand_ux(16, 0xffffULL)
+bpf_rand_ux(24, 0xffffffULL)
+bpf_rand_ux(32, 0xffffffffULL)
+bpf_rand_ux(40, 0xffffffffffULL)
+bpf_rand_ux(48, 0xffffffffffffULL)
+bpf_rand_ux(56, 0xffffffffffffffULL)
+bpf_rand_ux(64, 0xffffffffffffffffULL)
+
+static inline void bpf_semi_rand_init(void)
+{
+ srand(time(NULL));
+}
+
+static inline uint64_t bpf_semi_rand_get(void)
+{
+ switch (rand() % 39) {
+ case 0: return 0x000000ff00000000ULL | bpf_rand_u8(0);
+ case 1: return 0xffffffff00000000ULL | bpf_rand_u16(0);
+ case 2: return 0x00000000ffff0000ULL | bpf_rand_u16(0);
+ case 3: return 0x8000000000000000ULL | bpf_rand_u32(0);
+ case 4: return 0x00000000f0000000ULL | bpf_rand_u32(0);
+ case 5: return 0x0000000100000000ULL | bpf_rand_u24(0);
+ case 6: return 0x800ff00000000000ULL | bpf_rand_u32(0);
+ case 7: return 0x7fffffff00000000ULL | bpf_rand_u32(0);
+ case 8: return 0xffffffffffffff00ULL ^ bpf_rand_u32(24);
+ case 9: return 0xffffffffffffff00ULL | bpf_rand_u8(0);
+ case 10: return 0x0000000010000000ULL | bpf_rand_u32(0);
+ case 11: return 0xf000000000000000ULL | bpf_rand_u8(0);
+ case 12: return 0x0000f00000000000ULL | bpf_rand_u8(8);
+ case 13: return 0x000000000f000000ULL | bpf_rand_u8(16);
+ case 14: return 0x0000000000000f00ULL | bpf_rand_u8(32);
+ case 15: return 0x00fff00000000f00ULL | bpf_rand_u8(48);
+ case 16: return 0x00007fffffffffffULL ^ bpf_rand_u32(1);
+ case 17: return 0xffff800000000000ULL | bpf_rand_u8(4);
+ case 18: return 0xffff800000000000ULL | bpf_rand_u8(20);
+ case 19: return (0xffffffc000000000ULL + 0x80000ULL) | bpf_rand_u32(0);
+ case 20: return (0xffffffc000000000ULL - 0x04000000ULL) | bpf_rand_u32(0);
+ case 21: return 0x0000000000000000ULL | bpf_rand_u8(55) | bpf_rand_u32(20);
+ case 22: return 0xffffffffffffffffULL ^ bpf_rand_u8(3) ^ bpf_rand_u32(40);
+ case 23: return 0x0000000000000000ULL | bpf_rand_u8(bpf_rand_u8(0) % 64);
+ case 24: return 0x0000000000000000ULL | bpf_rand_u16(bpf_rand_u8(0) % 64);
+ case 25: return 0xffffffffffffffffULL ^ bpf_rand_u8(bpf_rand_u8(0) % 64);
+ case 26: return 0xffffffffffffffffULL ^ bpf_rand_u40(bpf_rand_u8(0) % 64);
+ case 27: return 0x0000800000000000ULL;
+ case 28: return 0x8000000000000000ULL;
+ case 29: return 0x0000000000000000ULL;
+ case 30: return 0xffffffffffffffffULL;
+ case 31: return bpf_rand_u16(bpf_rand_u8(0) % 64);
+ case 32: return bpf_rand_u24(bpf_rand_u8(0) % 64);
+ case 33: return bpf_rand_u32(bpf_rand_u8(0) % 64);
+ case 34: return bpf_rand_u40(bpf_rand_u8(0) % 64);
+ case 35: return bpf_rand_u48(bpf_rand_u8(0) % 64);
+ case 36: return bpf_rand_u56(bpf_rand_u8(0) % 64);
+ case 37: return bpf_rand_u64(bpf_rand_u8(0) % 64);
+ default: return bpf_rand_u64(0);
+ }
+}
+
+#endif /* __BPF_RAND__ */
diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h
new file mode 100644
index 000000000..9dac9b30f
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_rlimit.h
@@ -0,0 +1,28 @@
+#include <sys/resource.h>
+#include <stdio.h>
+
+static __attribute__((constructor)) void bpf_rlimit_ctor(void)
+{
+ struct rlimit rlim_old, rlim_new = {
+ .rlim_cur = RLIM_INFINITY,
+ .rlim_max = RLIM_INFINITY,
+ };
+
+ getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+ /* For the sake of running the test cases, we temporarily
+ * set rlimit to infinity in order for kernel to focus on
+ * errors from actual test cases and not getting noise
+ * from hitting memlock limits. The limit is on per-process
+ * basis and not a global one, hence destructor not really
+ * needed here.
+ */
+ if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
+ perror("Unable to lift memlock rlimit");
+ /* Trying out lower limit, but expect potential test
+ * case failures from this!
+ */
+ rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+ rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+ setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+ }
+}
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
new file mode 100644
index 000000000..84fd6f1bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_UTIL__
+#define __BPF_UTIL__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+static inline unsigned int bpf_num_possible_cpus(void)
+{
+ static const char *fcpu = "/sys/devices/system/cpu/possible";
+ unsigned int start, end, possible_cpus = 0;
+ char buff[128];
+ FILE *fp;
+ int len, n, i, j = 0;
+
+ fp = fopen(fcpu, "r");
+ if (!fp) {
+ printf("Failed to open %s: '%s'!\n", fcpu, strerror(errno));
+ exit(1);
+ }
+
+ if (!fgets(buff, sizeof(buff), fp)) {
+ printf("Failed to read %s!\n", fcpu);
+ exit(1);
+ }
+
+ len = strlen(buff);
+ for (i = 0; i <= len; i++) {
+ if (buff[i] == ',' || buff[i] == '\0') {
+ buff[i] = '\0';
+ n = sscanf(&buff[j], "%u-%u", &start, &end);
+ if (n <= 0) {
+ printf("Failed to retrieve # possible CPUs!\n");
+ exit(1);
+ } else if (n == 1) {
+ end = start;
+ }
+ possible_cpus += end - start + 1;
+ j = i + 1;
+ }
+ }
+
+ fclose(fp);
+
+ return possible_cpus;
+}
+
+#define __bpf_percpu_val_align __attribute__((__aligned__(8)))
+
+#define BPF_DECLARE_PERCPU(type, name) \
+ struct { type v; /* padding */ } __bpf_percpu_val_align \
+ name[bpf_num_possible_cpus()]
+#define bpf_percpu(name, cpu) name[(cpu)].v
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#endif /* __BPF_UTIL__ */
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
new file mode 100644
index 000000000..6af24f9a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/sched.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <ftw.h>
+
+
+#include "cgroup_helpers.h"
+
+/*
+ * To avoid relying on the system setup, when setup_cgroup_env is called
+ * we create a new mount namespace, and cgroup namespace. The cgroup2
+ * root is mounted at CGROUP_MOUNT_PATH
+ *
+ * Unfortunately, most people don't have cgroupv2 enabled at this point in time.
+ * It's easier to create our own mount namespace and manage it ourselves.
+ *
+ * We assume /mnt exists.
+ */
+
+#define WALK_FD_LIMIT 16
+#define CGROUP_MOUNT_PATH "/mnt"
+#define CGROUP_WORK_DIR "/cgroup-test-work-dir"
+#define format_cgroup_path(buf, path) \
+ snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
+ CGROUP_WORK_DIR, path)
+
+/**
+ * setup_cgroup_environment() - Setup the cgroup environment
+ *
+ * After calling this function, cleanup_cgroup_environment should be called
+ * once testing is complete.
+ *
+ * This function will print an error to stderr and return 1 if it is unable
+ * to setup the cgroup environment. If setup is successful, 0 is returned.
+ */
+int setup_cgroup_environment(void)
+{
+ char cgroup_workdir[PATH_MAX - 24];
+
+ format_cgroup_path(cgroup_workdir, "");
+
+ if (unshare(CLONE_NEWNS)) {
+ log_err("unshare");
+ return 1;
+ }
+
+ if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
+ log_err("mount fakeroot");
+ return 1;
+ }
+
+ if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
+ log_err("mount cgroup2");
+ return 1;
+ }
+
+ /* Cleanup existing failed runs, now that the environment is setup */
+ cleanup_cgroup_environment();
+
+ if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
+ log_err("mkdir cgroup work dir");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int nftwfunc(const char *filename, const struct stat *statptr,
+ int fileflags, struct FTW *pfwt)
+{
+ if ((fileflags & FTW_D) && rmdir(filename))
+ log_err("Removing cgroup: %s", filename);
+ return 0;
+}
+
+
+static int join_cgroup_from_top(char *cgroup_path)
+{
+ char cgroup_procs_path[PATH_MAX + 1];
+ pid_t pid = getpid();
+ int fd, rc = 0;
+
+ snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
+ "%s/cgroup.procs", cgroup_path);
+
+ fd = open(cgroup_procs_path, O_WRONLY);
+ if (fd < 0) {
+ log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
+ return 1;
+ }
+
+ if (dprintf(fd, "%d\n", pid) < 0) {
+ log_err("Joining Cgroup");
+ rc = 1;
+ }
+
+ close(fd);
+ return rc;
+}
+
+/**
+ * join_cgroup() - Join a cgroup
+ * @path: The cgroup path, relative to the workdir, to join
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir, and it joins it. For example, passing "/my-cgroup" as the path
+ * would actually put the calling process into the cgroup
+ * "/cgroup-test-work-dir/my-cgroup"
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_cgroup(const char *path)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_path, path);
+ return join_cgroup_from_top(cgroup_path);
+}
+
+/**
+ * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
+ *
+ * This is an idempotent function to delete all temporary cgroups that
+ * have been created during the test, including the cgroup testing work
+ * directory.
+ *
+ * At call time, it moves the calling process to the root cgroup, and then
+ * runs the deletion process. It is idempotent, and should not fail, unless
+ * a process is lingering.
+ *
+ * On failure, it will print an error to stderr, and try to continue.
+ */
+void cleanup_cgroup_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_workdir, "");
+ join_cgroup_from_top(CGROUP_MOUNT_PATH);
+ nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+}
+
+/**
+ * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
+ * @path: The cgroup path, relative to the workdir, to join
+ *
+ * This function creates a cgroup under the top level workdir and returns the
+ * file descriptor. It is idempotent.
+ *
+ * On success, it returns the file descriptor. On failure it returns 0.
+ * If there is a failure, it prints the error to stderr.
+ */
+int create_and_get_cgroup(const char *path)
+{
+ char cgroup_path[PATH_MAX + 1];
+ int fd;
+
+ format_cgroup_path(cgroup_path, path);
+ if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
+ log_err("mkdiring cgroup %s .. %s", path, cgroup_path);
+ return 0;
+ }
+
+ fd = open(cgroup_path, O_RDONLY);
+ if (fd < 0) {
+ log_err("Opening Cgroup");
+ return 0;
+ }
+
+ return fd;
+}
+
+/**
+ * get_cgroup_id() - Get cgroup id for a particular cgroup path
+ * @path: The cgroup path, relative to the workdir, to join
+ *
+ * On success, it returns the cgroup id. On failure it returns 0,
+ * which is an invalid cgroup id.
+ * If there is a failure, it prints the error to stderr.
+ */
+unsigned long long get_cgroup_id(const char *path)
+{
+ int dirfd, err, flags, mount_id, fhsize;
+ union {
+ unsigned long long cgid;
+ unsigned char raw_bytes[8];
+ } id;
+ char cgroup_workdir[PATH_MAX + 1];
+ struct file_handle *fhp, *fhp2;
+ unsigned long long ret = 0;
+
+ format_cgroup_path(cgroup_workdir, path);
+
+ dirfd = AT_FDCWD;
+ flags = 0;
+ fhsize = sizeof(*fhp);
+ fhp = calloc(1, fhsize);
+ if (!fhp) {
+ log_err("calloc");
+ return 0;
+ }
+ err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
+ if (err >= 0 || fhp->handle_bytes != 8) {
+ log_err("name_to_handle_at");
+ goto free_mem;
+ }
+
+ fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
+ fhp2 = realloc(fhp, fhsize);
+ if (!fhp2) {
+ log_err("realloc");
+ goto free_mem;
+ }
+ err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
+ fhp = fhp2;
+ if (err < 0) {
+ log_err("name_to_handle_at");
+ goto free_mem;
+ }
+
+ memcpy(id.raw_bytes, fhp->f_handle, 8);
+ ret = id.cgid;
+
+free_mem:
+ free(fhp);
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
new file mode 100644
index 000000000..d64bb8957
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CGROUP_HELPERS_H
+#define __CGROUP_HELPERS_H
+#include <errno.h>
+#include <string.h>
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+
+
+int create_and_get_cgroup(const char *path);
+int join_cgroup(const char *path);
+int setup_cgroup_environment(void);
+void cleanup_cgroup_environment(void);
+unsigned long long get_cgroup_id(const char *path);
+
+#endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
new file mode 100644
index 000000000..b4994a949
--- /dev/null
+++ b/tools/testing/selftests/bpf/config
@@ -0,0 +1,20 @@
+CONFIG_BPF=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_NET_CLS_BPF=m
+CONFIG_BPF_EVENTS=y
+CONFIG_TEST_BPF=m
+CONFIG_CGROUP_BPF=y
+CONFIG_NETDEVSIM=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_IPIP=y
+CONFIG_IPV6=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_NET_IPGRE=y
+CONFIG_IPV6_GRE=y
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_HMAC=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_VXLAN=y
+CONFIG_GENEVE=y
diff --git a/tools/testing/selftests/bpf/connect4_prog.c b/tools/testing/selftests/bpf/connect4_prog.c
new file mode 100644
index 000000000..5a88a681d
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect4_prog.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP4 0x7f000004U
+#define DST_REWRITE_IP4 0x7f000001U
+#define DST_REWRITE_PORT4 4444
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect4")
+int connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in sa;
+
+ /* Rewrite destination. */
+ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
+
+ if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
+ ///* Rewrite source. */
+ memset(&sa, 0, sizeof(sa));
+
+ sa.sin_family = AF_INET;
+ sa.sin_port = bpf_htons(0);
+ sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/connect6_prog.c b/tools/testing/selftests/bpf/connect6_prog.c
new file mode 100644
index 000000000..8ea3f7d12
--- /dev/null
+++ b/tools/testing/selftests/bpf/connect6_prog.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP6_0 0
+#define SRC_REWRITE_IP6_1 0
+#define SRC_REWRITE_IP6_2 0
+#define SRC_REWRITE_IP6_3 6
+
+#define DST_REWRITE_IP6_0 0
+#define DST_REWRITE_IP6_1 0
+#define DST_REWRITE_IP6_2 0
+#define DST_REWRITE_IP6_3 1
+
+#define DST_REWRITE_PORT6 6666
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect6")
+int connect_v6_prog(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in6 sa;
+
+ /* Rewrite destination. */
+ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+
+ if (ctx->type == SOCK_DGRAM || ctx->type == SOCK_STREAM) {
+ /* Rewrite source. */
+ memset(&sa, 0, sizeof(sa));
+
+ sa.sin6_family = AF_INET6;
+ sa.sin6_port = bpf_htons(0);
+
+ sa.sin6_addr.s6_addr32[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ sa.sin6_addr.s6_addr32[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ sa.sin6_addr.s6_addr32[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ sa.sin6_addr.s6_addr32[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/dev_cgroup.c b/tools/testing/selftests/bpf/dev_cgroup.c
new file mode 100644
index 000000000..ce41a3475
--- /dev/null
+++ b/tools/testing/selftests/bpf/dev_cgroup.c
@@ -0,0 +1,60 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/version.h>
+#include "bpf_helpers.h"
+
+SEC("cgroup/dev")
+int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
+{
+ short type = ctx->access_type & 0xFFFF;
+#ifdef DEBUG
+ short access = ctx->access_type >> 16;
+ char fmt[] = " %d:%d \n";
+
+ switch (type) {
+ case BPF_DEVCG_DEV_BLOCK:
+ fmt[0] = 'b';
+ break;
+ case BPF_DEVCG_DEV_CHAR:
+ fmt[0] = 'c';
+ break;
+ default:
+ fmt[0] = '?';
+ break;
+ }
+
+ if (access & BPF_DEVCG_ACC_READ)
+ fmt[8] = 'r';
+
+ if (access & BPF_DEVCG_ACC_WRITE)
+ fmt[9] = 'w';
+
+ if (access & BPF_DEVCG_ACC_MKNOD)
+ fmt[10] = 'm';
+
+ bpf_trace_printk(fmt, sizeof(fmt), ctx->major, ctx->minor);
+#endif
+
+ /* Allow access to /dev/zero and /dev/random.
+ * Forbid everything else.
+ */
+ if (ctx->major != 1 || type != BPF_DEVCG_DEV_CHAR)
+ return 0;
+
+ switch (ctx->minor) {
+ case 5: /* 1:5 /dev/zero */
+ case 9: /* 1:9 /dev/urandom */
+ return 1;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/get_cgroup_id_kern.c
new file mode 100644
index 000000000..014dba10b
--- /dev/null
+++ b/tools/testing/selftests/bpf/get_cgroup_id_kern.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") cg_ids = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") pidmap = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int trace(void *ctx)
+{
+ __u32 pid = bpf_get_current_pid_tgid();
+ __u32 key = 0, *expected_pid;
+ __u64 *val;
+
+ expected_pid = bpf_map_lookup_elem(&pidmap, &key);
+ if (!expected_pid || *expected_pid != pid)
+ return 0;
+
+ val = bpf_map_lookup_elem(&cg_ids, &key);
+ if (val)
+ *val = bpf_get_current_cgroup_id();
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
new file mode 100644
index 000000000..e8da7b391
--- /dev/null
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+
+#define CHECK(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("%s:FAIL:%s ", __func__, tag); \
+ printf(format); \
+ } else { \
+ printf("%s:PASS:%s\n", __func__, tag); \
+ } \
+ __ret; \
+})
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_map *map;
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (!map)
+ return -1;
+ return bpf_map__fd(map);
+}
+
+#define TEST_CGROUP "/test-bpf-get-cgroup-id/"
+
+int main(int argc, char **argv)
+{
+ const char *probe_name = "syscalls/sys_enter_nanosleep";
+ const char *file = "get_cgroup_id_kern.o";
+ int err, bytes, efd, prog_fd, pmu_fd;
+ int cgroup_fd, cgidmap_fd, pidmap_fd;
+ struct perf_event_attr attr = {};
+ struct bpf_object *obj;
+ __u64 kcgid = 0, ucgid;
+ __u32 key = 0, pid;
+ int exit_code = 1;
+ char buf[256];
+
+ err = setup_cgroup_environment();
+ if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err,
+ errno))
+ return 1;
+
+ cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n",
+ cgroup_fd, errno))
+ goto cleanup_cgroup_env;
+
+ err = join_cgroup(TEST_CGROUP);
+ if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno))
+ goto cleanup_cgroup_env;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ goto cleanup_cgroup_env;
+
+ cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids");
+ if (CHECK(cgidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+ cgidmap_fd, errno))
+ goto close_prog;
+
+ pidmap_fd = bpf_find_map(__func__, obj, "pidmap");
+ if (CHECK(pidmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+ pidmap_fd, errno))
+ goto close_prog;
+
+ pid = getpid();
+ bpf_map_update_elem(pidmap_fd, &key, &pid, 0);
+
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+ "bytes %d errno %d\n", bytes, errno))
+ goto close_prog;
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ /* attach to this pid so the all bpf invocations will be in the
+ * cgroup associated with this pid.
+ */
+ pmu_fd = syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+ errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ /* trigger some syscalls */
+ sleep(1);
+
+ err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid);
+ if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
+ goto close_pmu;
+
+ ucgid = get_cgroup_id(TEST_CGROUP);
+ if (CHECK(kcgid != ucgid, "compare_cgroup_id",
+ "kern cgid %llx user cgid %llx", kcgid, ucgid))
+ goto close_pmu;
+
+ exit_code = 0;
+ printf("%s:PASS\n", argv[0]);
+
+close_pmu:
+ close(pmu_fd);
+close_prog:
+ bpf_object__close(obj);
+cleanup_cgroup_env:
+ cleanup_cgroup_environment();
+ return exit_code;
+}
diff --git a/tools/testing/selftests/bpf/gnu/stubs.h b/tools/testing/selftests/bpf/gnu/stubs.h
new file mode 100644
index 000000000..719225b16
--- /dev/null
+++ b/tools/testing/selftests/bpf/gnu/stubs.h
@@ -0,0 +1 @@
+/* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */
diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/testing/selftests/bpf/include/uapi/linux/types.h
new file mode 100644
index 000000000..91fa51a9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/include/uapi/linux/types.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _UAPI_LINUX_TYPES_H
+#define _UAPI_LINUX_TYPES_H
+
+#include <asm-generic/int-ll64.h>
+
+/* copied from linux:include/uapi/linux/types.h */
+#define __bitwise
+typedef __u16 __bitwise __le16;
+typedef __u16 __bitwise __be16;
+typedef __u32 __bitwise __le32;
+typedef __u32 __bitwise __be32;
+typedef __u64 __bitwise __le64;
+typedef __u64 __bitwise __be64;
+
+typedef __u16 __bitwise __sum16;
+typedef __u32 __bitwise __wsum;
+
+#define __aligned_u64 __u64 __attribute__((aligned(8)))
+#define __aligned_be64 __be64 __attribute__((aligned(8)))
+#define __aligned_le64 __le64 __attribute__((aligned(8)))
+
+#endif /* _UAPI_LINUX_TYPES_H */
diff --git a/tools/testing/selftests/bpf/sample_map_ret0.c b/tools/testing/selftests/bpf/sample_map_ret0.c
new file mode 100644
index 000000000..075630367
--- /dev/null
+++ b/tools/testing/selftests/bpf/sample_map_ret0.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+struct bpf_map_def SEC("maps") htab = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(long),
+ .max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(long),
+ .max_entries = 2,
+};
+
+/* Sample program which should always load for testing control paths. */
+SEC(".text") int func()
+{
+ __u64 key64 = 0;
+ __u32 key = 0;
+ long *value;
+
+ value = bpf_map_lookup_elem(&htab, &key);
+ if (!value)
+ return 1;
+ value = bpf_map_lookup_elem(&array, &key64);
+ if (!value)
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/sample_ret0.c b/tools/testing/selftests/bpf/sample_ret0.c
new file mode 100644
index 000000000..fec99750d
--- /dev/null
+++ b/tools/testing/selftests/bpf/sample_ret0.c
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+
+/* Sample program which should always load for testing control paths. */
+int func()
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/sendmsg4_prog.c b/tools/testing/selftests/bpf/sendmsg4_prog.c
new file mode 100644
index 000000000..a91536b1c
--- /dev/null
+++ b/tools/testing/selftests/bpf/sendmsg4_prog.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC1_IP4 0xAC100001U /* 172.16.0.1 */
+#define SRC2_IP4 0x00000000U
+#define SRC_REWRITE_IP4 0x7f000004U
+#define DST_IP4 0xC0A801FEU /* 192.168.1.254 */
+#define DST_REWRITE_IP4 0x7f000001U
+#define DST_PORT 4040
+#define DST_REWRITE_PORT4 4444
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/sendmsg4")
+int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
+{
+ if (ctx->type != SOCK_DGRAM)
+ return 0;
+
+ /* Rewrite source. */
+ if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) ||
+ ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) {
+ ctx->msg_src_ip4 = bpf_htonl(SRC_REWRITE_IP4);
+ } else {
+ /* Unexpected source. Reject sendmsg. */
+ return 0;
+ }
+
+ /* Rewrite destination. */
+ if ((ctx->user_ip4 >> 24) == (bpf_htonl(DST_IP4) >> 24) &&
+ ctx->user_port == bpf_htons(DST_PORT)) {
+ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
+ } else {
+ /* Unexpected source. Reject sendmsg. */
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sendmsg6_prog.c b/tools/testing/selftests/bpf/sendmsg6_prog.c
new file mode 100644
index 000000000..a68062820
--- /dev/null
+++ b/tools/testing/selftests/bpf/sendmsg6_prog.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define SRC_REWRITE_IP6_0 0
+#define SRC_REWRITE_IP6_1 0
+#define SRC_REWRITE_IP6_2 0
+#define SRC_REWRITE_IP6_3 6
+
+#define DST_REWRITE_IP6_0 0
+#define DST_REWRITE_IP6_1 0
+#define DST_REWRITE_IP6_2 0
+#define DST_REWRITE_IP6_3 1
+
+#define DST_REWRITE_PORT6 6666
+
+int _version SEC("version") = 1;
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
+{
+ if (ctx->type != SOCK_DGRAM)
+ return 0;
+
+ /* Rewrite source. */
+ if (ctx->msg_src_ip6[3] == bpf_htonl(1) ||
+ ctx->msg_src_ip6[3] == bpf_htonl(0)) {
+ ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+ } else {
+ /* Unexpected source. Reject sendmsg. */
+ return 0;
+ }
+
+ /* Rewrite destination. */
+ if (ctx->user_ip6[0] == bpf_htonl(0xFACEB00C)) {
+ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_3);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+ } else {
+ /* Unexpected destination. Reject sendmsg. */
+ return 0;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/socket_cookie_prog.c b/tools/testing/selftests/bpf/socket_cookie_prog.c
new file mode 100644
index 000000000..9ff8ac4b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/socket_cookie_prog.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include <sys/socket.h>
+
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+struct bpf_map_def SEC("maps") socket_cookies = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u64),
+ .value_size = sizeof(__u32),
+ .max_entries = 1 << 8,
+};
+
+SEC("cgroup/connect6")
+int set_cookie(struct bpf_sock_addr *ctx)
+{
+ __u32 cookie_value = 0xFF;
+ __u64 cookie_key;
+
+ if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
+ return 1;
+
+ cookie_key = bpf_get_socket_cookie(ctx);
+ if (bpf_map_update_elem(&socket_cookies, &cookie_key, &cookie_value, 0))
+ return 0;
+
+ return 1;
+}
+
+SEC("sockops")
+int update_cookie(struct bpf_sock_ops *ctx)
+{
+ __u32 new_cookie_value;
+ __u32 *cookie_value;
+ __u64 cookie_key;
+
+ if (ctx->family != AF_INET6)
+ return 1;
+
+ if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+ return 1;
+
+ cookie_key = bpf_get_socket_cookie(ctx);
+
+ cookie_value = bpf_map_lookup_elem(&socket_cookies, &cookie_key);
+ if (!cookie_value)
+ return 1;
+
+ new_cookie_value = (ctx->local_port << 8) | *cookie_value;
+ bpf_map_update_elem(&socket_cookies, &cookie_key, &new_cookie_value, 0);
+
+ return 1;
+}
+
+int _version SEC("version") = 1;
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_parse_prog.c b/tools/testing/selftests/bpf/sockmap_parse_prog.c
new file mode 100644
index 000000000..0f92858f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_parse_prog.c
@@ -0,0 +1,46 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long) skb->data_end;
+ void *data = (void *)(long) skb->data;
+ __u32 lport = skb->local_port;
+ __u32 rport = skb->remote_port;
+ __u8 *d = data;
+ __u32 len = (__u32) data_end - (__u32) data;
+ int err;
+
+ if (data + 10 > data_end) {
+ err = bpf_skb_pull_data(skb, 10);
+ if (err)
+ return SK_DROP;
+
+ data_end = (void *)(long)skb->data_end;
+ data = (void *)(long)skb->data;
+ if (data + 10 > data_end)
+ return SK_DROP;
+ }
+
+ /* This write/read is a bit pointless but tests the verifier and
+ * strparser handler for read/write pkt data and access into sk
+ * fields.
+ */
+ d = data;
+ d[7] = 1;
+ return skb->len;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
new file mode 100644
index 000000000..12a7b5c82
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_tcp_msg_prog.c
@@ -0,0 +1,33 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+SEC("sk_msg1")
+int bpf_prog1(struct sk_msg_md *msg)
+{
+ void *data_end = (void *)(long) msg->data_end;
+ void *data = (void *)(long) msg->data;
+
+ char *d;
+
+ if (data + 8 > data_end)
+ return SK_DROP;
+
+ bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
+ d = (char *)data;
+ bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
+
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
new file mode 100644
index 000000000..2ce7634a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/sockmap_verdict_prog.c
@@ -0,0 +1,73 @@
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+struct bpf_map_def SEC("maps") sock_map_rx = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_tx = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_msg = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_break = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long) skb->data_end;
+ void *data = (void *)(long) skb->data;
+ __u32 lport = skb->local_port;
+ __u32 rport = skb->remote_port;
+ __u8 *d = data;
+ __u8 sk, map;
+
+ if (data + 8 > data_end)
+ return SK_DROP;
+
+ map = d[0];
+ sk = d[1];
+
+ d[0] = 0xd;
+ d[1] = 0xe;
+ d[2] = 0xa;
+ d[3] = 0xd;
+ d[4] = 0xb;
+ d[5] = 0xe;
+ d[6] = 0xe;
+ d[7] = 0xf;
+
+ if (!map)
+ return bpf_sk_redirect_map(skb, &sock_map_rx, sk, 0);
+ return bpf_sk_redirect_map(skb, &sock_map_tx, sk, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
new file mode 100755
index 000000000..7f8200a87
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_client.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+ buf = b''
+ while len(buf) < n:
+ rem = n - len(buf)
+ try: s = sock.recv(rem)
+ except (socket.error) as e: return b''
+ buf += s
+ return buf
+
+def send(sock, s):
+ total = len(s)
+ count = 0
+ while count < total:
+ try: n = sock.send(s)
+ except (socket.error) as e: n = 0
+ if n == 0:
+ return count;
+ count += n
+ return count
+
+
+serverPort = int(sys.argv[1])
+HostName = socket.gethostname()
+
+# create active socket
+sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+try:
+ sock.connect((HostName, serverPort))
+except socket.error as e:
+ sys.exit(1)
+
+buf = b''
+n = 0
+while n < 1000:
+ buf += b'+'
+ n += 1
+
+sock.settimeout(1);
+n = send(sock, buf)
+n = read(sock, 500)
+sys.exit(0)
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
new file mode 100755
index 000000000..b39903fca
--- /dev/null
+++ b/tools/testing/selftests/bpf/tcp_server.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+
+import sys, os, os.path, getopt
+import socket, time
+import subprocess
+import select
+
+def read(sock, n):
+ buf = b''
+ while len(buf) < n:
+ rem = n - len(buf)
+ try: s = sock.recv(rem)
+ except (socket.error) as e: return b''
+ buf += s
+ return buf
+
+def send(sock, s):
+ total = len(s)
+ count = 0
+ while count < total:
+ try: n = sock.send(s)
+ except (socket.error) as e: n = 0
+ if n == 0:
+ return count;
+ count += n
+ return count
+
+
+SERVER_PORT = 12877
+MAX_PORTS = 2
+
+serverPort = SERVER_PORT
+serverSocket = None
+
+HostName = socket.gethostname()
+
+# create passive socket
+serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+host = socket.gethostname()
+
+try: serverSocket.bind((host, 0))
+except socket.error as msg:
+ print('bind fails: ' + str(msg))
+
+sn = serverSocket.getsockname()
+serverPort = sn[1]
+
+cmdStr = ("./tcp_client.py %d &") % (serverPort)
+os.system(cmdStr)
+
+buf = b''
+n = 0
+while n < 500:
+ buf += b'.'
+ n += 1
+
+serverSocket.listen(MAX_PORTS)
+readList = [serverSocket]
+
+while True:
+ readyRead, readyWrite, inError = \
+ select.select(readList, [], [], 2)
+
+ if len(readyRead) > 0:
+ waitCount = 0
+ for sock in readyRead:
+ if sock == serverSocket:
+ (clientSocket, address) = serverSocket.accept()
+ address = str(address[0])
+ readList.append(clientSocket)
+ else:
+ sock.settimeout(1);
+ s = read(sock, 1000)
+ n = send(sock, buf)
+ sock.close()
+ serverSocket.close()
+ sys.exit(0)
+ else:
+ print('Select timeout!')
+ sys.exit(1)
diff --git a/tools/testing/selftests/bpf/test_adjust_tail.c b/tools/testing/selftests/bpf/test_adjust_tail.c
new file mode 100644
index 000000000..4cd5e860c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_adjust_tail.c
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("xdp_adjust_tail")
+int _xdp_adjust_tail(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ int offset = 0;
+
+ if (data_end - data == 54)
+ offset = 256;
+ else
+ offset = 20;
+ if (bpf_xdp_adjust_tail(xdp, 0 - offset))
+ return XDP_DROP;
+ return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
new file mode 100644
index 000000000..3c789d03b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -0,0 +1,719 @@
+#include <asm/types.h>
+#include <linux/types.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdbool.h>
+
+#include <linux/unistd.h>
+#include <linux/filter.h>
+#include <linux/bpf_perf_event.h>
+#include <linux/bpf.h>
+
+#include <bpf/bpf.h>
+
+#include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+#define MAX_INSNS 512
+#define MAX_MATCHES 16
+
+struct bpf_reg_match {
+ unsigned int line;
+ const char *match;
+};
+
+struct bpf_align_test {
+ const char *descr;
+ struct bpf_insn insns[MAX_INSNS];
+ enum {
+ UNDEF,
+ ACCEPT,
+ REJECT
+ } result;
+ enum bpf_prog_type prog_type;
+ /* Matches must be in order of increasing line */
+ struct bpf_reg_match matches[MAX_MATCHES];
+};
+
+static struct bpf_align_test tests[] = {
+ /* Four tests of known constants. These aren't staggeringly
+ * interesting since we track exact values now.
+ */
+ {
+ .descr = "mov",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 16),
+ BPF_MOV64_IMM(BPF_REG_3, 32),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {1, "R1=ctx(id=0,off=0,imm=0)"},
+ {1, "R10=fp0"},
+ {1, "R3_w=inv2"},
+ {2, "R3_w=inv4"},
+ {3, "R3_w=inv8"},
+ {4, "R3_w=inv16"},
+ {5, "R3_w=inv32"},
+ },
+ },
+ {
+ .descr = "shift",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_3, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 32),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {1, "R1=ctx(id=0,off=0,imm=0)"},
+ {1, "R10=fp0"},
+ {1, "R3_w=inv1"},
+ {2, "R3_w=inv2"},
+ {3, "R3_w=inv4"},
+ {4, "R3_w=inv8"},
+ {5, "R3_w=inv16"},
+ {6, "R3_w=inv1"},
+ {7, "R4_w=inv32"},
+ {8, "R4_w=inv16"},
+ {9, "R4_w=inv8"},
+ {10, "R4_w=inv4"},
+ {11, "R4_w=inv2"},
+ },
+ },
+ {
+ .descr = "addsub",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {1, "R1=ctx(id=0,off=0,imm=0)"},
+ {1, "R10=fp0"},
+ {1, "R3_w=inv4"},
+ {2, "R3_w=inv8"},
+ {3, "R3_w=inv10"},
+ {4, "R4_w=inv8"},
+ {5, "R4_w=inv12"},
+ {6, "R4_w=inv14"},
+ },
+ },
+ {
+ .descr = "mul",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 7),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 2),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {1, "R1=ctx(id=0,off=0,imm=0)"},
+ {1, "R10=fp0"},
+ {1, "R3_w=inv7"},
+ {2, "R3_w=inv7"},
+ {3, "R3_w=inv14"},
+ {4, "R3_w=inv56"},
+ },
+ },
+
+ /* Tests using unknown values */
+#define PREP_PKT_POINTERS \
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \
+ offsetof(struct __sk_buff, data)), \
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, \
+ offsetof(struct __sk_buff, data_end))
+
+#define LOAD_UNKNOWN(DST_REG) \
+ PREP_PKT_POINTERS, \
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), \
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), \
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 1), \
+ BPF_EXIT_INSN(), \
+ BPF_LDX_MEM(BPF_B, DST_REG, BPF_REG_2, 0)
+
+ {
+ .descr = "unknown shift",
+ .insns = {
+ LOAD_UNKNOWN(BPF_REG_3),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+ LOAD_UNKNOWN(BPF_REG_4),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 5),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {7, "R0=pkt(id=0,off=8,r=8,imm=0)"},
+ {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ {18, "R3=pkt_end(id=0,off=0,imm=0)"},
+ {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
+ {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ },
+ },
+ {
+ .descr = "unknown mul",
+ .insns = {
+ LOAD_UNKNOWN(BPF_REG_3),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 1),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 4),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 8),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+ {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ },
+ },
+ {
+ .descr = "packet const offset",
+ .insns = {
+ PREP_PKT_POINTERS,
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+
+ /* Skip over ethernet header. */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 3),
+ BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 0),
+ BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
+ {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"},
+ {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"},
+ {10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
+ {10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
+ {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+ {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+ {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+ },
+ },
+ {
+ .descr = "packet variable offset",
+ .insns = {
+ LOAD_UNKNOWN(BPF_REG_6),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+
+ /* First, add a constant to the R5 packet pointer,
+ * then a variable with a known alignment.
+ */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+ /* Now, test in the other direction. Adding first
+ * the variable offset to R5, then the constant.
+ */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+ /* Test multiple accumulations of unknown values
+ * into a packet pointer.
+ */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ /* Calculated offset in R6 has unknown value, but known
+ * alignment of 4.
+ */
+ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+ {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Offset is added to packet pointer R5, resulting in
+ * known fixed offset, and variable offset from R6.
+ */
+ {11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* At the time the word size load is performed from R5,
+ * it's total offset is NET_IP_ALIGN + reg->off (0) +
+ * reg->aux_off (14) which is 16. Then the variable
+ * offset is considered using reg->aux_off_align which
+ * is 4 and meets the load's requirements.
+ */
+ {15, "R4=pkt(id=1,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {15, "R5=pkt(id=1,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Variable offset is added to R5 packet pointer,
+ * resulting in auxiliary alignment of 4.
+ */
+ {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Constant offset is added to R5, resulting in
+ * reg->off of 14.
+ */
+ {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off
+ * (14) which is 16. Then the variable offset is 4-byte
+ * aligned, so the total offset is 4-byte aligned and
+ * meets the load's requirements.
+ */
+ {23, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {23, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Constant offset is added to R5 packet pointer,
+ * resulting in reg->off value of 14.
+ */
+ {26, "R5_w=pkt(id=0,off=14,r=8"},
+ /* Variable offset is added to R5, resulting in a
+ * variable offset of (4n).
+ */
+ {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Constant is added to R5 again, setting reg->off to 18. */
+ {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* And once more we add a variable; resulting var_off
+ * is still (4n), fixed offset is not changed.
+ * Also, we create a new reg->id.
+ */
+ {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (18)
+ * which is 20. Then the variable offset is (4n), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+ {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+ {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+ },
+ },
+ {
+ .descr = "packet variable offset 2",
+ .insns = {
+ /* Create an unknown offset, (4n+2)-aligned */
+ LOAD_UNKNOWN(BPF_REG_6),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+ /* Add it to the packet pointer */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ /* Check bounds and perform a read */
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+ /* Make a (4n) offset from the value we just read */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xff),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+ /* Add it to the packet pointer */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ /* Check bounds and perform a read */
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ /* Calculated offset in R6 has unknown value, but known
+ * alignment of 4.
+ */
+ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+ {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Adding 14 makes R6 be (4n+2) */
+ {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ /* Packet pointer has (4n+2) offset */
+ {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+ * which is 2. Then the variable offset is (4n+2), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+ {15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ /* Newly read value in R6 was shifted left by 2, so has
+ * known alignment of 4.
+ */
+ {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Added (4n) to packet pointer's (4n+2) var_off, giving
+ * another (4n+2).
+ */
+ {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+ {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+ * which is 2. Then the variable offset is (4n+2), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+ {23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+ },
+ },
+ {
+ .descr = "dubious pointer arithmetic",
+ .insns = {
+ PREP_PKT_POINTERS,
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* (ptr - ptr) << 2 */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2),
+ /* We have a (4n) value. Let's make a packet offset
+ * out of it. First add 14, to make it a (4n+2)
+ */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ /* Then make sure it's nonnegative */
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_5, 0, 1),
+ BPF_EXIT_INSN(),
+ /* Add it to packet pointer */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+ /* Check bounds and perform a read */
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .matches = {
+ {4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
+ /* (ptr - ptr) << 2 == unknown, (4n) */
+ {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+ /* (4n) + 14 == (4n+2). We blow our bounds, because
+ * the add could overflow.
+ */
+ {7, "R5=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+ /* Checked s>=0 */
+ {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ /* packet pointer + nonnegative (4n+2) */
+ {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ {13, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
+ * We checked the bounds, but it might have been able
+ * to overflow if the packet pointer started in the
+ * upper half of the address space.
+ * So we did not get a 'range' on R6, and the access
+ * attempt will fail.
+ */
+ {15, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ }
+ },
+ {
+ .descr = "variable subtraction",
+ .insns = {
+ /* Create an unknown offset, (4n+2)-aligned */
+ LOAD_UNKNOWN(BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+ /* Create another unknown, (4n)-aligned, and subtract
+ * it from the first one
+ */
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_7),
+ /* Bounds-check the result */
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_6, 0, 1),
+ BPF_EXIT_INSN(),
+ /* Add it to the packet pointer */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ /* Check bounds and perform a read */
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ /* Calculated offset in R6 has unknown value, but known
+ * alignment of 4.
+ */
+ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+ {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Adding 14 makes R6 be (4n+2) */
+ {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ /* New unknown value in R7 is (4n) */
+ {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ /* Subtracting it from R6 blows our unsigned bounds */
+ {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"},
+ /* Checked s>= 0 */
+ {14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+ * which is 2. Then the variable offset is (4n+2), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+ {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ },
+ },
+ {
+ .descr = "pointer variable subtraction",
+ .insns = {
+ /* Create an unknown offset, (4n+2)-aligned and bounded
+ * to [14,74]
+ */
+ LOAD_UNKNOWN(BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xf),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+ /* Subtract it from the packet pointer */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_6),
+ /* Create another unknown, (4n)-aligned and >= 74.
+ * That in fact means >= 76, since 74 % 4 == 2
+ */
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 76),
+ /* Add it to the packet pointer */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_7),
+ /* Check bounds and perform a read */
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .matches = {
+ /* Calculated offset in R6 has unknown value, but known
+ * alignment of 4.
+ */
+ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"},
+ {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
+ /* Adding 14 makes R6 be (4n+2) */
+ {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
+ /* Subtracting from packet pointer overflows ubounds */
+ {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
+ /* New unknown value in R7 is (4n), >= 76 */
+ {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
+ /* Adding it to packet pointer gives nice bounds again */
+ {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+ /* At the time the word size load is performed from R5,
+ * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+ * which is 2. Then the variable offset is (4n+2), so
+ * the total offset is 4-byte aligned and meets the
+ * load's requirements.
+ */
+ {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+ },
+ },
+};
+
+static int probe_filter_length(const struct bpf_insn *fp)
+{
+ int len;
+
+ for (len = MAX_INSNS - 1; len > 0; --len)
+ if (fp[len].code != 0 || fp[len].imm != 0)
+ break;
+ return len + 1;
+}
+
+static char bpf_vlog[32768];
+
+static int do_test_single(struct bpf_align_test *test)
+{
+ struct bpf_insn *prog = test->insns;
+ int prog_type = test->prog_type;
+ char bpf_vlog_copy[32768];
+ const char *line_ptr;
+ int cur_line = -1;
+ int prog_len, i;
+ int fd_prog;
+ int ret;
+
+ prog_len = probe_filter_length(prog);
+ fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
+ prog, prog_len, BPF_F_STRICT_ALIGNMENT,
+ "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2);
+ if (fd_prog < 0 && test->result != REJECT) {
+ printf("Failed to load program.\n");
+ printf("%s", bpf_vlog);
+ ret = 1;
+ } else if (fd_prog >= 0 && test->result == REJECT) {
+ printf("Unexpected success to load!\n");
+ printf("%s", bpf_vlog);
+ ret = 1;
+ close(fd_prog);
+ } else {
+ ret = 0;
+ /* We make a local copy so that we can strtok() it */
+ strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
+ line_ptr = strtok(bpf_vlog_copy, "\n");
+ for (i = 0; i < MAX_MATCHES; i++) {
+ struct bpf_reg_match m = test->matches[i];
+
+ if (!m.match)
+ break;
+ while (line_ptr) {
+ cur_line = -1;
+ sscanf(line_ptr, "%u: ", &cur_line);
+ if (cur_line == m.line)
+ break;
+ line_ptr = strtok(NULL, "\n");
+ }
+ if (!line_ptr) {
+ printf("Failed to find line %u for match: %s\n",
+ m.line, m.match);
+ ret = 1;
+ printf("%s", bpf_vlog);
+ break;
+ }
+ if (!strstr(line_ptr, m.match)) {
+ printf("Failed to find match %u: %s\n",
+ m.line, m.match);
+ ret = 1;
+ printf("%s", bpf_vlog);
+ break;
+ }
+ }
+ if (fd_prog >= 0)
+ close(fd_prog);
+ }
+ return ret;
+}
+
+static int do_test(unsigned int from, unsigned int to)
+{
+ int all_pass = 0;
+ int all_fail = 0;
+ unsigned int i;
+
+ for (i = from; i < to; i++) {
+ struct bpf_align_test *test = &tests[i];
+ int fail;
+
+ printf("Test %3d: %s ... ",
+ i, test->descr);
+ fail = do_test_single(test);
+ if (fail) {
+ all_fail++;
+ printf("FAIL\n");
+ } else {
+ all_pass++;
+ printf("PASS\n");
+ }
+ }
+ printf("Results: %d pass %d fail\n",
+ all_pass, all_fail);
+ return all_fail ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int from = 0, to = ARRAY_SIZE(tests);
+
+ if (argc == 3) {
+ unsigned int l = atoi(argv[argc - 2]);
+ unsigned int u = atoi(argv[argc - 1]);
+
+ if (l < to && u < to) {
+ from = l;
+ to = u + 1;
+ }
+ } else if (argc == 2) {
+ unsigned int t = atoi(argv[argc - 1]);
+
+ if (t < to) {
+ from = t;
+ to = t + 1;
+ }
+ }
+ return do_test(from, to);
+}
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
new file mode 100644
index 000000000..29116366a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -0,0 +1,2840 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+#include <bpf/bpf.h>
+#include <sys/resource.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+static uint32_t pass_cnt;
+static uint32_t error_cnt;
+static uint32_t skip_cnt;
+
+#define CHECK(condition, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \
+ fprintf(stderr, format); \
+ } \
+ __ret; \
+})
+
+static int count_result(int err)
+{
+ if (err)
+ error_cnt++;
+ else
+ pass_cnt++;
+
+ fprintf(stderr, "\n");
+ return err;
+}
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+
+__printf(1, 2)
+static int __base_pr(const char *format, ...)
+{
+ va_list args;
+ int err;
+
+ va_start(args, format);
+ err = vfprintf(stderr, format, args);
+ va_end(args);
+ return err;
+}
+
+#define BTF_INFO_ENC(kind, root, vlen) \
+ ((!!(root) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+
+#define BTF_TYPE_ENC(name, info, size_or_type) \
+ (name), (info), (size_or_type)
+
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+ ((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+ BTF_INT_ENC(encoding, bits_offset, bits)
+
+#define BTF_ARRAY_ENC(type, index_type, nr_elems) \
+ (type), (index_type), (nr_elems)
+#define BTF_TYPE_ARRAY_ENC(type, index_type, nr_elems) \
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), \
+ BTF_ARRAY_ENC(type, index_type, nr_elems)
+
+#define BTF_MEMBER_ENC(name, type, bits_offset) \
+ (name), (type), (bits_offset)
+#define BTF_ENUM_ENC(name, val) (name), (val)
+
+#define BTF_TYPEDEF_ENC(name, type) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), type)
+
+#define BTF_PTR_ENC(name, type) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), type)
+
+#define BTF_END_RAW 0xdeadbeef
+#define NAME_TBD 0xdeadb33f
+
+#define MAX_NR_RAW_TYPES 1024
+#define BTF_LOG_BUF_SIZE 65535
+
+static struct args {
+ unsigned int raw_test_num;
+ unsigned int file_test_num;
+ unsigned int get_info_test_num;
+ bool raw_test;
+ bool file_test;
+ bool get_info_test;
+ bool pprint_test;
+ bool always_log;
+} args;
+
+static char btf_log_buf[BTF_LOG_BUF_SIZE];
+
+static struct btf_header hdr_tmpl = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+};
+
+struct btf_raw_test {
+ const char *descr;
+ const char *str_sec;
+ const char *map_name;
+ const char *err_str;
+ __u32 raw_types[MAX_NR_RAW_TYPES];
+ __u32 str_sec_size;
+ enum bpf_map_type map_type;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 key_type_id;
+ __u32 value_type_id;
+ __u32 max_entries;
+ bool btf_load_err;
+ bool map_create_err;
+ bool ordered_map;
+ bool lossless_map;
+ int hdr_len_delta;
+ int type_off_delta;
+ int str_off_delta;
+ int str_len_delta;
+};
+
+static struct btf_raw_test raw_tests[] = {
+/* enum E {
+ * E0,
+ * E1,
+ * };
+ *
+ * struct A {
+ * unsigned long long m;
+ * int n;
+ * char o;
+ * [3 bytes hole]
+ * int p[8];
+ * int q[4][8];
+ * enum E r;
+ * };
+ */
+{
+ .descr = "struct test #1",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* unsigned long long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ /* char */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 8, 1), /* [3] */
+ /* int[8] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 8), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 6), 180),
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* unsigned long long m;*/
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64),/* int n; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 96),/* char o; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 128),/* int p[8] */
+ BTF_MEMBER_ENC(NAME_TBD, 6, 384),/* int q[4][8] */
+ BTF_MEMBER_ENC(NAME_TBD, 7, 1408), /* enum E r */
+ /* } */
+ /* int[4][8] */
+ BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [6] */
+ /* enum E */ /* [7] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_ENUM_ENC(NAME_TBD, 1),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0p\0q\0r\0E\0E0\0E1"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_test1_map",
+ .key_size = sizeof(int),
+ .value_size = 180,
+ .key_type_id = 1,
+ .value_type_id = 5,
+ .max_entries = 4,
+},
+
+/* typedef struct b Struct_B;
+ *
+ * struct A {
+ * int m;
+ * struct b n[4];
+ * const Struct_B o[4];
+ * };
+ *
+ * struct B {
+ * int m;
+ * int n;
+ * };
+ */
+{
+ .descr = "struct test #2",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* struct b [4] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(4, 1, 4),
+
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 3), 68),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct B n[4] */
+ BTF_MEMBER_ENC(NAME_TBD, 8, 288),/* const Struct_B o[4];*/
+ /* } */
+
+ /* struct B { */ /* [4] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
+ /* } */
+
+ /* const int */ /* [5] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
+ /* typedef struct b Struct_B */ /* [6] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 4),
+ /* const Struct_B */ /* [7] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 6),
+ /* const Struct_B [4] */ /* [8] */
+ BTF_TYPE_ARRAY_ENC(7, 1, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0o\0B\0m\0n\0Struct_B",
+ .str_sec_size = sizeof("\0A\0m\0n\0o\0B\0m\0n\0Struct_B"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_test2_map",
+ .key_size = sizeof(int),
+ .value_size = 68,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+},
+
+{
+ .descr = "struct test #3 Invalid member offset",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int64 */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8),
+
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 16),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 64), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0), /* int64 n; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n\0",
+ .str_sec_size = sizeof("\0A\0m\0n\0"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_test3_map",
+ .key_size = sizeof(int),
+ .value_size = 16,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid member bits_offset",
+},
+
+/* Test member exceeds the size of struct.
+ *
+ * struct A {
+ * int m;
+ * int n;
+ * };
+ */
+{
+ .descr = "size check test #1",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* struct A { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),/* int n; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n",
+ .str_sec_size = sizeof("\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check1_map",
+ .key_size = sizeof(int),
+ .value_size = 1,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
+},
+
+/* Test member exeeds the size of struct
+ *
+ * struct A {
+ * int m;
+ * int n[2];
+ * };
+ */
+{
+ .descr = "size check test #2",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+ /* int[2] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 2),
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 3 - 1),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* int n[2]; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n",
+ .str_sec_size = sizeof("\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check2_map",
+ .key_size = sizeof(int),
+ .value_size = 1,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
+},
+
+/* Test member exeeds the size of struct
+ *
+ * struct A {
+ * int m;
+ * void *n;
+ * };
+ */
+{
+ .descr = "size check test #3",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+ /* void* */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) + sizeof(void *) - 1),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* void *n; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0n",
+ .str_sec_size = sizeof("\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check3_map",
+ .key_size = sizeof(int),
+ .value_size = 1,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
+},
+
+/* Test member exceeds the size of struct
+ *
+ * enum E {
+ * E0,
+ * E1,
+ * };
+ *
+ * struct A {
+ * int m;
+ * enum E n;
+ * };
+ */
+{
+ .descr = "size check test #4",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, sizeof(int)),
+ /* enum E { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 2), sizeof(int)),
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_ENUM_ENC(NAME_TBD, 1),
+ /* } */
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), sizeof(int) * 2 - 1),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* enum E n; */
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0E\0E0\0E1\0A\0m\0n",
+ .str_sec_size = sizeof("\0E\0E0\0E1\0A\0m\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "size_check4_map",
+ .key_size = sizeof(int),
+ .value_size = 1,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
+},
+
+/* typedef const void * const_void_ptr;
+ * struct A {
+ * const_void_ptr m;
+ * };
+ */
+{
+ .descr = "void test #1",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ /* const void* */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+ /* typedef const void * const_void_ptr */
+ BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */
+ /* struct A { */ /* [5] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+ /* const_void_ptr m; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 0),
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0const_void_ptr\0A\0m",
+ .str_sec_size = sizeof("\0const_void_ptr\0A\0m"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "void_test1_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *),
+ .key_type_id = 1,
+ .value_type_id = 4,
+ .max_entries = 4,
+},
+
+/* struct A {
+ * const void m;
+ * };
+ */
+{
+ .descr = "void test #2",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ /* struct A { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 8),
+ /* const void m; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+ /* } */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m",
+ .str_sec_size = sizeof("\0A\0m"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "void_test2_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *),
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid member",
+},
+
+/* typedef const void * const_void_ptr;
+ * const_void_ptr[4]
+ */
+{
+ .descr = "void test #3",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ /* const void* */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+ /* typedef const void * const_void_ptr */
+ BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [4] */
+ /* const_void_ptr[4] */
+ BTF_TYPE_ARRAY_ENC(4, 1, 4), /* [5] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0const_void_ptr",
+ .str_sec_size = sizeof("\0const_void_ptr"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "void_test3_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *) * 4,
+ .key_type_id = 1,
+ .value_type_id = 5,
+ .max_entries = 4,
+},
+
+/* const void[4] */
+{
+ .descr = "void test #4",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ /* const void[4] */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(2, 1, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m",
+ .str_sec_size = sizeof("\0A\0m"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "void_test4_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *) * 4,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid elem",
+},
+
+/* Array_A <------------------+
+ * elem_type == Array_B |
+ * | |
+ * | |
+ * Array_B <-------- + |
+ * elem_type == Array A --+
+ */
+{
+ .descr = "loop test #1",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* Array_A */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 8),
+ /* Array_B */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(2, 1, 8),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test1_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(sizeof(int) * 8),
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+/* typedef is _before_ the BTF type of Array_A and Array_B
+ *
+ * typedef Array_B int_array;
+ *
+ * Array_A <------------------+
+ * elem_type == int_array |
+ * | |
+ * | |
+ * Array_B <-------- + |
+ * elem_type == Array_A --+
+ */
+{
+ .descr = "loop test #2",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* typedef Array_B int_array */
+ BTF_TYPEDEF_ENC(1, 4), /* [2] */
+ /* Array_A */
+ BTF_TYPE_ARRAY_ENC(2, 1, 8), /* [3] */
+ /* Array_B */
+ BTF_TYPE_ARRAY_ENC(3, 1, 8), /* [4] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int_array\0",
+ .str_sec_size = sizeof("\0int_array"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test2_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(sizeof(int) * 8),
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+/* Array_A <------------------+
+ * elem_type == Array_B |
+ * | |
+ * | |
+ * Array_B <-------- + |
+ * elem_type == Array_A --+
+ */
+{
+ .descr = "loop test #3",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* Array_A */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 8),
+ /* Array_B */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(2, 1, 8),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test3_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(sizeof(int) * 8),
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+/* typedef is _between_ the BTF type of Array_A and Array_B
+ *
+ * typedef Array_B int_array;
+ *
+ * Array_A <------------------+
+ * elem_type == int_array |
+ * | |
+ * | |
+ * Array_B <-------- + |
+ * elem_type == Array_A --+
+ */
+{
+ .descr = "loop test #4",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* Array_A */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 8),
+ /* typedef Array_B int_array */ /* [3] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 4),
+ /* Array_B */ /* [4] */
+ BTF_TYPE_ARRAY_ENC(2, 1, 8),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int_array\0",
+ .str_sec_size = sizeof("\0int_array"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test4_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(sizeof(int) * 8),
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+/* typedef struct B Struct_B
+ *
+ * struct A {
+ * int x;
+ * Struct_B y;
+ * };
+ *
+ * struct B {
+ * int x;
+ * struct A y;
+ * };
+ */
+{
+ .descr = "loop test #5",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* struct A */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 32),/* Struct_B y; */
+ /* typedef struct B Struct_B */
+ BTF_TYPEDEF_ENC(NAME_TBD, 4), /* [3] */
+ /* struct B */ /* [4] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A y; */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0x\0y\0Struct_B\0B\0x\0y",
+ .str_sec_size = sizeof("\0A\0x\0y\0Struct_B\0B\0x\0y"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test5_map",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+/* struct A {
+ * int x;
+ * struct A array_a[4];
+ * };
+ */
+{
+ .descr = "loop test #6",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 4), /* [2] */
+ /* struct A */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0), /* int x; */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 32),/* struct A array_a[4]; */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0x\0y",
+ .str_sec_size = sizeof("\0A\0x\0y"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test6_map",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+{
+ .descr = "loop test #7",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* struct A { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+ /* const void *m; */
+ BTF_MEMBER_ENC(NAME_TBD, 3, 0),
+ /* CONST type_id=3 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+ /* PTR type_id=2 */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 3),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m",
+ .str_sec_size = sizeof("\0A\0m"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test7_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *),
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+{
+ .descr = "loop test #8",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* struct A { */ /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+ /* const void *m; */
+ BTF_MEMBER_ENC(NAME_TBD, 4, 0),
+ /* struct B { */ /* [3] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), sizeof(void *)),
+ /* const void *n; */
+ BTF_MEMBER_ENC(NAME_TBD, 6, 0),
+ /* CONST type_id=5 */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 5),
+ /* PTR type_id=6 */ /* [5] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 6),
+ /* CONST type_id=7 */ /* [6] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 7),
+ /* PTR type_id=4 */ /* [7] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0m\0B\0n",
+ .str_sec_size = sizeof("\0A\0m\0B\0n"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "loop_test8_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void *),
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Loop detected",
+},
+
+{
+ .descr = "string section does not end with null",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int") - 1,
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid string section",
+},
+
+{
+ .descr = "empty string section",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = 0,
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid string section",
+},
+
+{
+ .descr = "empty type section",
+ .raw_types = {
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "No type found",
+},
+
+{
+ .descr = "btf_header test. Longer hdr_len",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .hdr_len_delta = 4,
+ .err_str = "Unsupported btf_header",
+},
+
+{
+ .descr = "btf_header test. Gap between hdr and type",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .type_off_delta = 4,
+ .err_str = "Unsupported section found",
+},
+
+{
+ .descr = "btf_header test. Gap between type and str",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_off_delta = 4,
+ .err_str = "Unsupported section found",
+},
+
+{
+ .descr = "btf_header test. Overlap between type and str",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_off_delta = -4,
+ .err_str = "Section overlap found",
+},
+
+{
+ .descr = "btf_header test. Larger BTF size",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_len_delta = -4,
+ .err_str = "Unsupported section found",
+},
+
+{
+ .descr = "btf_header test. Smaller BTF size",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0int",
+ .str_sec_size = sizeof("\0int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "hdr_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .str_len_delta = 4,
+ .err_str = "Total section length too long",
+},
+
+{
+ .descr = "array test. index_type/elem_type \"int\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 1, 16),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "array test. index_type/elem_type \"const int\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 3, 16),
+ /* CONST type_id=1 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "array test. index_type \"const int:31\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int:31 */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
+ /* int[16] */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(1, 4, 16),
+ /* CONST type_id=2 */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test. elem_type \"const int:31\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int:31 */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 31, 4),
+ /* int[16] */ /* [3] */
+ BTF_TYPE_ARRAY_ENC(4, 1, 16),
+ /* CONST type_id=2 */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid array of int",
+},
+
+{
+ .descr = "array test. index_type \"void\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 0, 16),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test. index_type \"const void\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(1, 3, 16),
+ /* CONST type_id=0 (void) */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test. elem_type \"const void\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 16),
+ /* CONST type_id=0 (void) */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid elem",
+},
+
+{
+ .descr = "array test. elem_type \"const void *\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void *[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 1, 16),
+ /* CONST type_id=4 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+ /* void* */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "array test. index_type \"const void *\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* const void *[16] */ /* [2] */
+ BTF_TYPE_ARRAY_ENC(3, 3, 16),
+ /* CONST type_id=4 */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 4),
+ /* void* */ /* [4] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid index",
+},
+
+{
+ .descr = "array test. t->size != 0\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int[16] */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 1),
+ BTF_ARRAY_ENC(1, 1, 16),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "size != 0",
+},
+
+{
+ .descr = "int test. invalid int_data",
+ .raw_types = {
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), 4),
+ 0x10000000,
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid int_data",
+},
+
+{
+ .descr = "invalid BTF_INFO",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_TYPE_ENC(0, 0x10000000, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid btf_info",
+},
+
+{
+ .descr = "fwd test. t->type != 0\"",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* fwd type */ /* [2] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 1),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "fwd_test_map",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "type != 0",
+},
+
+{
+ .descr = "typedef (invalid name, name_off = 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(0, 1), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__int",
+ .str_sec_size = sizeof("\0__int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "typedef_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "typedef (invalid name, invalid identifier)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__!int",
+ .str_sec_size = sizeof("\0__!int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "typedef_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "ptr type (invalid name, name_off <> 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__int",
+ .str_sec_size = sizeof("\0__int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "ptr_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "volatile type (invalid name, name_off <> 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_VOLATILE, 0, 0), 1), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__int",
+ .str_sec_size = sizeof("\0__int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "volatile_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "const type (invalid name, name_off <> 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 1), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__int",
+ .str_sec_size = sizeof("\0__int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "const_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "restrict type (invalid name, name_off <> 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 1), /* [2] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_RESTRICT, 0, 0), 2), /* [3] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__int",
+ .str_sec_size = sizeof("\0__int"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "restrict_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "fwd type (invalid name, name_off = 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__skb",
+ .str_sec_size = sizeof("\0__skb"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "fwd_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "fwd type (invalid name, invalid identifier)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_FWD, 0, 0), 0), /* [2] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__!skb",
+ .str_sec_size = sizeof("\0__!skb"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "fwd_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "array type (invalid name, name_off <> 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_ARRAY, 0, 0), 0), /* [2] */
+ BTF_ARRAY_ENC(1, 1, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0__skb",
+ .str_sec_size = sizeof("\0__skb"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "struct type (name_off = 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0,
+ BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A",
+ .str_sec_size = sizeof("\0A"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "struct type (invalid name, invalid identifier)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A!\0B",
+ .str_sec_size = sizeof("\0A!\0B"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "struct member (name_off = 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0,
+ BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A",
+ .str_sec_size = sizeof("\0A"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "struct member (invalid name, invalid identifier)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 4), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0B*",
+ .str_sec_size = sizeof("\0A\0B*"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "struct_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "enum type (name_off = 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0,
+ BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+ sizeof(int)), /* [2] */
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A\0B",
+ .str_sec_size = sizeof("\0A\0B"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "enum_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+},
+
+{
+ .descr = "enum type (invalid name, invalid identifier)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD,
+ BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+ sizeof(int)), /* [2] */
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A!\0B",
+ .str_sec_size = sizeof("\0A!\0B"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "enum_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "enum member (invalid name, name_off = 0)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0,
+ BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+ sizeof(int)), /* [2] */
+ BTF_ENUM_ENC(0, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "enum_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+
+{
+ .descr = "enum member (invalid name, invalid identifier)",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(0,
+ BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1),
+ sizeof(int)), /* [2] */
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_END_RAW,
+ },
+ .str_sec = "\0A!",
+ .str_sec_size = sizeof("\0A!"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "enum_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .btf_load_err = true,
+ .err_str = "Invalid name",
+},
+{
+ .descr = "arraymap invalid btf key (a bit field)",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* 32 bit int with 32 bit offset */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 32, 32, 8),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_map_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 2,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .map_create_err = true,
+},
+
+{
+ .descr = "arraymap invalid btf key (!= 32 bits)",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* 16 bit int with 0 bit offset */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 16, 2),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_map_check_btf",
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 2,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .map_create_err = true,
+},
+
+{
+ .descr = "arraymap invalid btf value (too small)",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_map_check_btf",
+ .key_size = sizeof(int),
+ /* btf_value_size < map->value_size */
+ .value_size = sizeof(__u64),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .map_create_err = true,
+},
+
+{
+ .descr = "arraymap invalid btf value (too big)",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "array_map_check_btf",
+ .key_size = sizeof(int),
+ /* btf_value_size > map->value_size */
+ .value_size = sizeof(__u16),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 4,
+ .map_create_err = true,
+},
+
+}; /* struct btf_raw_test raw_tests[] */
+
+static const char *get_next_str(const char *start, const char *end)
+{
+ return start < end - 1 ? start + 1 : NULL;
+}
+
+static int get_type_sec_size(const __u32 *raw_types)
+{
+ int i;
+
+ for (i = MAX_NR_RAW_TYPES - 1;
+ i >= 0 && raw_types[i] != BTF_END_RAW;
+ i--)
+ ;
+
+ return i < 0 ? i : i * sizeof(raw_types[0]);
+}
+
+static void *btf_raw_create(const struct btf_header *hdr,
+ const __u32 *raw_types,
+ const char *str,
+ unsigned int str_sec_size,
+ unsigned int *btf_size)
+{
+ const char *next_str = str, *end_str = str + str_sec_size;
+ unsigned int size_needed, offset;
+ struct btf_header *ret_hdr;
+ int i, type_sec_size;
+ uint32_t *ret_types;
+ void *raw_btf;
+
+ type_sec_size = get_type_sec_size(raw_types);
+ if (CHECK(type_sec_size < 0, "Cannot get nr_raw_types"))
+ return NULL;
+
+ size_needed = sizeof(*hdr) + type_sec_size + str_sec_size;
+ raw_btf = malloc(size_needed);
+ if (CHECK(!raw_btf, "Cannot allocate memory for raw_btf"))
+ return NULL;
+
+ /* Copy header */
+ memcpy(raw_btf, hdr, sizeof(*hdr));
+ offset = sizeof(*hdr);
+
+ /* Copy type section */
+ ret_types = raw_btf + offset;
+ for (i = 0; i < type_sec_size / sizeof(raw_types[0]); i++) {
+ if (raw_types[i] == NAME_TBD) {
+ next_str = get_next_str(next_str, end_str);
+ if (CHECK(!next_str, "Error in getting next_str")) {
+ free(raw_btf);
+ return NULL;
+ }
+ ret_types[i] = next_str - str;
+ next_str += strlen(next_str);
+ } else {
+ ret_types[i] = raw_types[i];
+ }
+ }
+ offset += type_sec_size;
+
+ /* Copy string section */
+ memcpy(raw_btf + offset, str, str_sec_size);
+
+ ret_hdr = (struct btf_header *)raw_btf;
+ ret_hdr->type_len = type_sec_size;
+ ret_hdr->str_off = type_sec_size;
+ ret_hdr->str_len = str_sec_size;
+
+ *btf_size = size_needed;
+
+ return raw_btf;
+}
+
+static int do_test_raw(unsigned int test_num)
+{
+ struct btf_raw_test *test = &raw_tests[test_num - 1];
+ struct bpf_create_map_attr create_attr = {};
+ int map_fd = -1, btf_fd = -1;
+ unsigned int raw_btf_size;
+ struct btf_header *hdr;
+ void *raw_btf;
+ int err;
+
+ fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr);
+ raw_btf = btf_raw_create(&hdr_tmpl,
+ test->raw_types,
+ test->str_sec,
+ test->str_sec_size,
+ &raw_btf_size);
+
+ if (!raw_btf)
+ return -1;
+
+ hdr = raw_btf;
+
+ hdr->hdr_len = (int)hdr->hdr_len + test->hdr_len_delta;
+ hdr->type_off = (int)hdr->type_off + test->type_off_delta;
+ hdr->str_off = (int)hdr->str_off + test->str_off_delta;
+ hdr->str_len = (int)hdr->str_len + test->str_len_delta;
+
+ *btf_log_buf = '\0';
+ btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+ btf_log_buf, BTF_LOG_BUF_SIZE,
+ args.always_log);
+ free(raw_btf);
+
+ err = ((btf_fd == -1) != test->btf_load_err);
+ if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
+ btf_fd, test->btf_load_err) ||
+ CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
+ "expected err_str:%s", test->err_str)) {
+ err = -1;
+ goto done;
+ }
+
+ if (err || btf_fd == -1)
+ goto done;
+
+ create_attr.name = test->map_name;
+ create_attr.map_type = test->map_type;
+ create_attr.key_size = test->key_size;
+ create_attr.value_size = test->value_size;
+ create_attr.max_entries = test->max_entries;
+ create_attr.btf_fd = btf_fd;
+ create_attr.btf_key_type_id = test->key_type_id;
+ create_attr.btf_value_type_id = test->value_type_id;
+
+ map_fd = bpf_create_map_xattr(&create_attr);
+
+ err = ((map_fd == -1) != test->map_create_err);
+ CHECK(err, "map_fd:%d test->map_create_err:%u",
+ map_fd, test->map_create_err);
+
+done:
+ if (!err)
+ fprintf(stderr, "OK");
+
+ if (*btf_log_buf && (err || args.always_log))
+ fprintf(stderr, "\n%s", btf_log_buf);
+
+ if (btf_fd != -1)
+ close(btf_fd);
+ if (map_fd != -1)
+ close(map_fd);
+
+ return err;
+}
+
+static int test_raw(void)
+{
+ unsigned int i;
+ int err = 0;
+
+ if (args.raw_test_num)
+ return count_result(do_test_raw(args.raw_test_num));
+
+ for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
+ err |= count_result(do_test_raw(i));
+
+ return err;
+}
+
+struct btf_get_info_test {
+ const char *descr;
+ const char *str_sec;
+ __u32 raw_types[MAX_NR_RAW_TYPES];
+ __u32 str_sec_size;
+ int btf_size_delta;
+ int (*special_test)(unsigned int test_num);
+};
+
+static int test_big_btf_info(unsigned int test_num);
+static int test_btf_id(unsigned int test_num);
+
+const struct btf_get_info_test get_info_tests[] = {
+{
+ .descr = "== raw_btf_size+1",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .btf_size_delta = 1,
+},
+{
+ .descr = "== raw_btf_size-3",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .btf_size_delta = -3,
+},
+{
+ .descr = "Large bpf_btf_info",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .special_test = test_big_btf_info,
+},
+{
+ .descr = "BTF ID",
+ .raw_types = {
+ /* int */ /* [1] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* unsigned int */ /* [2] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),
+ BTF_END_RAW,
+ },
+ .str_sec = "",
+ .str_sec_size = sizeof(""),
+ .special_test = test_btf_id,
+},
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64)(unsigned long)ptr;
+}
+
+static int test_big_btf_info(unsigned int test_num)
+{
+ const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+ uint8_t *raw_btf = NULL, *user_btf = NULL;
+ unsigned int raw_btf_size;
+ struct {
+ struct bpf_btf_info info;
+ uint64_t garbage;
+ } info_garbage;
+ struct bpf_btf_info *info;
+ int btf_fd = -1, err;
+ uint32_t info_len;
+
+ raw_btf = btf_raw_create(&hdr_tmpl,
+ test->raw_types,
+ test->str_sec,
+ test->str_sec_size,
+ &raw_btf_size);
+
+ if (!raw_btf)
+ return -1;
+
+ *btf_log_buf = '\0';
+
+ user_btf = malloc(raw_btf_size);
+ if (CHECK(!user_btf, "!user_btf")) {
+ err = -1;
+ goto done;
+ }
+
+ btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+ btf_log_buf, BTF_LOG_BUF_SIZE,
+ args.always_log);
+ if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ /*
+ * GET_INFO should error out if the userspace info
+ * has non zero tailing bytes.
+ */
+ info = &info_garbage.info;
+ memset(info, 0, sizeof(*info));
+ info_garbage.garbage = 0xdeadbeef;
+ info_len = sizeof(info_garbage);
+ info->btf = ptr_to_u64(user_btf);
+ info->btf_size = raw_btf_size;
+
+ err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+ if (CHECK(!err, "!err")) {
+ err = -1;
+ goto done;
+ }
+
+ /*
+ * GET_INFO should succeed even info_len is larger than
+ * the kernel supported as long as tailing bytes are zero.
+ * The kernel supported info len should also be returned
+ * to userspace.
+ */
+ info_garbage.garbage = 0;
+ err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+ if (CHECK(err || info_len != sizeof(*info),
+ "err:%d errno:%d info_len:%u sizeof(*info):%lu",
+ err, errno, info_len, sizeof(*info))) {
+ err = -1;
+ goto done;
+ }
+
+ fprintf(stderr, "OK");
+
+done:
+ if (*btf_log_buf && (err || args.always_log))
+ fprintf(stderr, "\n%s", btf_log_buf);
+
+ free(raw_btf);
+ free(user_btf);
+
+ if (btf_fd != -1)
+ close(btf_fd);
+
+ return err;
+}
+
+static int test_btf_id(unsigned int test_num)
+{
+ const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+ struct bpf_create_map_attr create_attr = {};
+ uint8_t *raw_btf = NULL, *user_btf[2] = {};
+ int btf_fd[2] = {-1, -1}, map_fd = -1;
+ struct bpf_map_info map_info = {};
+ struct bpf_btf_info info[2] = {};
+ unsigned int raw_btf_size;
+ uint32_t info_len;
+ int err, i, ret;
+
+ raw_btf = btf_raw_create(&hdr_tmpl,
+ test->raw_types,
+ test->str_sec,
+ test->str_sec_size,
+ &raw_btf_size);
+
+ if (!raw_btf)
+ return -1;
+
+ *btf_log_buf = '\0';
+
+ for (i = 0; i < 2; i++) {
+ user_btf[i] = malloc(raw_btf_size);
+ if (CHECK(!user_btf[i], "!user_btf[%d]", i)) {
+ err = -1;
+ goto done;
+ }
+ info[i].btf = ptr_to_u64(user_btf[i]);
+ info[i].btf_size = raw_btf_size;
+ }
+
+ btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
+ btf_log_buf, BTF_LOG_BUF_SIZE,
+ args.always_log);
+ if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ /* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */
+ info_len = sizeof(info[0]);
+ err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len);
+ if (CHECK(err, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
+ if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ ret = 0;
+ err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len);
+ if (CHECK(err || info[0].id != info[1].id ||
+ info[0].btf_size != info[1].btf_size ||
+ (ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)),
+ "err:%d errno:%d id0:%u id1:%u btf_size0:%u btf_size1:%u memcmp:%d",
+ err, errno, info[0].id, info[1].id,
+ info[0].btf_size, info[1].btf_size, ret)) {
+ err = -1;
+ goto done;
+ }
+
+ /* Test btf members in struct bpf_map_info */
+ create_attr.name = "test_btf_id";
+ create_attr.map_type = BPF_MAP_TYPE_ARRAY;
+ create_attr.key_size = sizeof(int);
+ create_attr.value_size = sizeof(unsigned int);
+ create_attr.max_entries = 4;
+ create_attr.btf_fd = btf_fd[0];
+ create_attr.btf_key_type_id = 1;
+ create_attr.btf_value_type_id = 2;
+
+ map_fd = bpf_create_map_xattr(&create_attr);
+ if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ info_len = sizeof(map_info);
+ err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+ if (CHECK(err || map_info.btf_id != info[0].id ||
+ map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2,
+ "err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u",
+ err, errno, info[0].id, map_info.btf_id, map_info.btf_key_type_id,
+ map_info.btf_value_type_id)) {
+ err = -1;
+ goto done;
+ }
+
+ for (i = 0; i < 2; i++) {
+ close(btf_fd[i]);
+ btf_fd[i] = -1;
+ }
+
+ /* Test BTF ID is removed from the kernel */
+ btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
+ if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+ close(btf_fd[0]);
+ btf_fd[0] = -1;
+
+ /* The map holds the last ref to BTF and its btf_id */
+ close(map_fd);
+ map_fd = -1;
+ btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
+ if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
+ err = -1;
+ goto done;
+ }
+
+ fprintf(stderr, "OK");
+
+done:
+ if (*btf_log_buf && (err || args.always_log))
+ fprintf(stderr, "\n%s", btf_log_buf);
+
+ free(raw_btf);
+ if (map_fd != -1)
+ close(map_fd);
+ for (i = 0; i < 2; i++) {
+ free(user_btf[i]);
+ if (btf_fd[i] != -1)
+ close(btf_fd[i]);
+ }
+
+ return err;
+}
+
+static int do_test_get_info(unsigned int test_num)
+{
+ const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
+ unsigned int raw_btf_size, user_btf_size, expected_nbytes;
+ uint8_t *raw_btf = NULL, *user_btf = NULL;
+ struct bpf_btf_info info = {};
+ int btf_fd = -1, err, ret;
+ uint32_t info_len;
+
+ fprintf(stderr, "BTF GET_INFO test[%u] (%s): ",
+ test_num, test->descr);
+
+ if (test->special_test)
+ return test->special_test(test_num);
+
+ raw_btf = btf_raw_create(&hdr_tmpl,
+ test->raw_types,
+ test->str_sec,
+ test->str_sec_size,
+ &raw_btf_size);
+
+ if (!raw_btf)
+ return -1;
+
+ *btf_log_buf = '\0';
+
+ user_btf = malloc(raw_btf_size);
+ if (CHECK(!user_btf, "!user_btf")) {
+ err = -1;
+ goto done;
+ }
+
+ btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+ btf_log_buf, BTF_LOG_BUF_SIZE,
+ args.always_log);
+ if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ user_btf_size = (int)raw_btf_size + test->btf_size_delta;
+ expected_nbytes = min(raw_btf_size, user_btf_size);
+ if (raw_btf_size > expected_nbytes)
+ memset(user_btf + expected_nbytes, 0xff,
+ raw_btf_size - expected_nbytes);
+
+ info_len = sizeof(info);
+ info.btf = ptr_to_u64(user_btf);
+ info.btf_size = user_btf_size;
+
+ ret = 0;
+ err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len);
+ if (CHECK(err || !info.id || info_len != sizeof(info) ||
+ info.btf_size != raw_btf_size ||
+ (ret = memcmp(raw_btf, user_btf, expected_nbytes)),
+ "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%lu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
+ err, errno, info.id, info_len, sizeof(info),
+ raw_btf_size, info.btf_size, expected_nbytes, ret)) {
+ err = -1;
+ goto done;
+ }
+
+ while (expected_nbytes < raw_btf_size) {
+ fprintf(stderr, "%u...", expected_nbytes);
+ if (CHECK(user_btf[expected_nbytes++] != 0xff,
+ "user_btf[%u]:%x != 0xff", expected_nbytes - 1,
+ user_btf[expected_nbytes - 1])) {
+ err = -1;
+ goto done;
+ }
+ }
+
+ fprintf(stderr, "OK");
+
+done:
+ if (*btf_log_buf && (err || args.always_log))
+ fprintf(stderr, "\n%s", btf_log_buf);
+
+ free(raw_btf);
+ free(user_btf);
+
+ if (btf_fd != -1)
+ close(btf_fd);
+
+ return err;
+}
+
+static int test_get_info(void)
+{
+ unsigned int i;
+ int err = 0;
+
+ if (args.get_info_test_num)
+ return count_result(do_test_get_info(args.get_info_test_num));
+
+ for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
+ err |= count_result(do_test_get_info(i));
+
+ return err;
+}
+
+struct btf_file_test {
+ const char *file;
+ bool btf_kv_notfound;
+};
+
+static struct btf_file_test file_tests[] = {
+{
+ .file = "test_btf_haskv.o",
+},
+{
+ .file = "test_btf_nokv.o",
+ .btf_kv_notfound = true,
+},
+};
+
+static int file_has_btf_elf(const char *fn)
+{
+ Elf_Scn *scn = NULL;
+ GElf_Ehdr ehdr;
+ int elf_fd;
+ Elf *elf;
+ int ret;
+
+ if (CHECK(elf_version(EV_CURRENT) == EV_NONE,
+ "elf_version(EV_CURRENT) == EV_NONE"))
+ return -1;
+
+ elf_fd = open(fn, O_RDONLY);
+ if (CHECK(elf_fd == -1, "open(%s): errno:%d", fn, errno))
+ return -1;
+
+ elf = elf_begin(elf_fd, ELF_C_READ, NULL);
+ if (CHECK(!elf, "elf_begin(%s): %s", fn, elf_errmsg(elf_errno()))) {
+ ret = -1;
+ goto done;
+ }
+
+ if (CHECK(!gelf_getehdr(elf, &ehdr), "!gelf_getehdr(%s)", fn)) {
+ ret = -1;
+ goto done;
+ }
+
+ while ((scn = elf_nextscn(elf, scn))) {
+ const char *sh_name;
+ GElf_Shdr sh;
+
+ if (CHECK(gelf_getshdr(scn, &sh) != &sh,
+ "file:%s gelf_getshdr != &sh", fn)) {
+ ret = -1;
+ goto done;
+ }
+
+ sh_name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);
+ if (!strcmp(sh_name, BTF_ELF_SEC)) {
+ ret = 1;
+ goto done;
+ }
+ }
+
+ ret = 0;
+
+done:
+ close(elf_fd);
+ elf_end(elf);
+ return ret;
+}
+
+static int do_test_file(unsigned int test_num)
+{
+ const struct btf_file_test *test = &file_tests[test_num - 1];
+ struct bpf_object *obj = NULL;
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ int err;
+
+ fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
+ test->file);
+
+ err = file_has_btf_elf(test->file);
+ if (err == -1)
+ return err;
+
+ if (err == 0) {
+ fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
+ skip_cnt++;
+ return 0;
+ }
+
+ obj = bpf_object__open(test->file);
+ if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
+ return PTR_ERR(obj);
+
+ err = bpf_object__btf_fd(obj);
+ if (CHECK(err == -1, "bpf_object__btf_fd: -1"))
+ goto done;
+
+ prog = bpf_program__next(NULL, obj);
+ if (CHECK(!prog, "Cannot find bpf_prog")) {
+ err = -1;
+ goto done;
+ }
+
+ bpf_program__set_type(prog, BPF_PROG_TYPE_TRACEPOINT);
+ err = bpf_object__load(obj);
+ if (CHECK(err < 0, "bpf_object__load: %d", err))
+ goto done;
+
+ map = bpf_object__find_map_by_name(obj, "btf_map");
+ if (CHECK(!map, "btf_map not found")) {
+ err = -1;
+ goto done;
+ }
+
+ err = (bpf_map__btf_key_type_id(map) == 0 || bpf_map__btf_value_type_id(map) == 0)
+ != test->btf_kv_notfound;
+ if (CHECK(err, "btf_key_type_id:%u btf_value_type_id:%u test->btf_kv_notfound:%u",
+ bpf_map__btf_key_type_id(map), bpf_map__btf_value_type_id(map),
+ test->btf_kv_notfound))
+ goto done;
+
+ fprintf(stderr, "OK");
+
+done:
+ bpf_object__close(obj);
+ return err;
+}
+
+static int test_file(void)
+{
+ unsigned int i;
+ int err = 0;
+
+ if (args.file_test_num)
+ return count_result(do_test_file(args.file_test_num));
+
+ for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
+ err |= count_result(do_test_file(i));
+
+ return err;
+}
+
+const char *pprint_enum_str[] = {
+ "ENUM_ZERO",
+ "ENUM_ONE",
+ "ENUM_TWO",
+ "ENUM_THREE",
+};
+
+struct pprint_mapv {
+ uint32_t ui32;
+ uint16_t ui16;
+ /* 2 bytes hole */
+ int32_t si32;
+ uint32_t unused_bits2a:2,
+ bits28:28,
+ unused_bits2b:2;
+ union {
+ uint64_t ui64;
+ uint8_t ui8a[8];
+ };
+ enum {
+ ENUM_ZERO,
+ ENUM_ONE,
+ ENUM_TWO,
+ ENUM_THREE,
+ } aenum;
+};
+
+static struct btf_raw_test pprint_test_template = {
+ .raw_types = {
+ /* unsighed char */ /* [1] */
+ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 8, 1),
+ /* unsigned short */ /* [2] */
+ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 16, 2),
+ /* unsigned int */ /* [3] */
+ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 32, 4),
+ /* int */ /* [4] */
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* unsigned long long */ /* [5] */
+ BTF_TYPE_INT_ENC(NAME_TBD, 0, 0, 64, 8),
+ /* 2 bits */ /* [6] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 2, 2),
+ /* 28 bits */ /* [7] */
+ BTF_TYPE_INT_ENC(0, 0, 0, 28, 4),
+ /* uint8_t[8] */ /* [8] */
+ BTF_TYPE_ARRAY_ENC(9, 1, 8),
+ /* typedef unsigned char uint8_t */ /* [9] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 1),
+ /* typedef unsigned short uint16_t */ /* [10] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 2),
+ /* typedef unsigned int uint32_t */ /* [11] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 3),
+ /* typedef int int32_t */ /* [12] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 4),
+ /* typedef unsigned long long uint64_t *//* [13] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 5),
+ /* union (anon) */ /* [14] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_UNION, 0, 2), 8),
+ BTF_MEMBER_ENC(NAME_TBD, 13, 0),/* uint64_t ui64; */
+ BTF_MEMBER_ENC(NAME_TBD, 8, 0), /* uint8_t ui8a[8]; */
+ /* enum (anon) */ /* [15] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 0, 4), 4),
+ BTF_ENUM_ENC(NAME_TBD, 0),
+ BTF_ENUM_ENC(NAME_TBD, 1),
+ BTF_ENUM_ENC(NAME_TBD, 2),
+ BTF_ENUM_ENC(NAME_TBD, 3),
+ /* struct pprint_mapv */ /* [16] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 8), 32),
+ BTF_MEMBER_ENC(NAME_TBD, 11, 0), /* uint32_t ui32 */
+ BTF_MEMBER_ENC(NAME_TBD, 10, 32), /* uint16_t ui16 */
+ BTF_MEMBER_ENC(NAME_TBD, 12, 64), /* int32_t si32 */
+ BTF_MEMBER_ENC(NAME_TBD, 6, 96), /* unused_bits2a */
+ BTF_MEMBER_ENC(NAME_TBD, 7, 98), /* bits28 */
+ BTF_MEMBER_ENC(NAME_TBD, 6, 126), /* unused_bits2b */
+ BTF_MEMBER_ENC(0, 14, 128), /* union (anon) */
+ BTF_MEMBER_ENC(NAME_TBD, 15, 192), /* aenum */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum",
+ .str_sec_size = sizeof("\0unsigned char\0unsigned short\0unsigned int\0int\0unsigned long long\0uint8_t\0uint16_t\0uint32_t\0int32_t\0uint64_t\0ui64\0ui8a\0ENUM_ZERO\0ENUM_ONE\0ENUM_TWO\0ENUM_THREE\0pprint_mapv\0ui32\0ui16\0si32\0unused_bits2a\0bits28\0unused_bits2b\0aenum"),
+ .key_size = sizeof(unsigned int),
+ .value_size = sizeof(struct pprint_mapv),
+ .key_type_id = 3, /* unsigned int */
+ .value_type_id = 16, /* struct pprint_mapv */
+ .max_entries = 128 * 1024,
+};
+
+static struct btf_pprint_test_meta {
+ const char *descr;
+ enum bpf_map_type map_type;
+ const char *map_name;
+ bool ordered_map;
+ bool lossless_map;
+} pprint_tests_meta[] = {
+{
+ .descr = "BTF pretty print array",
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "pprint_test_array",
+ .ordered_map = true,
+ .lossless_map = true,
+},
+
+{
+ .descr = "BTF pretty print hash",
+ .map_type = BPF_MAP_TYPE_HASH,
+ .map_name = "pprint_test_hash",
+ .ordered_map = false,
+ .lossless_map = true,
+},
+
+{
+ .descr = "BTF pretty print lru hash",
+ .map_type = BPF_MAP_TYPE_LRU_HASH,
+ .map_name = "pprint_test_lru_hash",
+ .ordered_map = false,
+ .lossless_map = false,
+},
+
+};
+
+
+static void set_pprint_mapv(struct pprint_mapv *v, uint32_t i)
+{
+ v->ui32 = i;
+ v->si32 = -i;
+ v->unused_bits2a = 3;
+ v->bits28 = i;
+ v->unused_bits2b = 3;
+ v->ui64 = i;
+ v->aenum = i & 0x03;
+}
+
+static int do_test_pprint(void)
+{
+ const struct btf_raw_test *test = &pprint_test_template;
+ struct bpf_create_map_attr create_attr = {};
+ unsigned int key, nr_read_elems;
+ bool ordered_map, lossless_map;
+ int map_fd = -1, btf_fd = -1;
+ struct pprint_mapv mapv = {};
+ unsigned int raw_btf_size;
+ char expected_line[255];
+ FILE *pin_file = NULL;
+ char pin_path[255];
+ size_t line_len = 0;
+ char *line = NULL;
+ uint8_t *raw_btf;
+ ssize_t nread;
+ int err, ret;
+
+ fprintf(stderr, "%s......", test->descr);
+ raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
+ test->str_sec, test->str_sec_size,
+ &raw_btf_size);
+
+ if (!raw_btf)
+ return -1;
+
+ *btf_log_buf = '\0';
+ btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
+ btf_log_buf, BTF_LOG_BUF_SIZE,
+ args.always_log);
+ free(raw_btf);
+
+ if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ create_attr.name = test->map_name;
+ create_attr.map_type = test->map_type;
+ create_attr.key_size = test->key_size;
+ create_attr.value_size = test->value_size;
+ create_attr.max_entries = test->max_entries;
+ create_attr.btf_fd = btf_fd;
+ create_attr.btf_key_type_id = test->key_type_id;
+ create_attr.btf_value_type_id = test->value_type_id;
+
+ map_fd = bpf_create_map_xattr(&create_attr);
+ if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ err = -1;
+ goto done;
+ }
+
+ ret = snprintf(pin_path, sizeof(pin_path), "%s/%s",
+ "/sys/fs/bpf", test->map_name);
+
+ if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long",
+ "/sys/fs/bpf", test->map_name)) {
+ err = -1;
+ goto done;
+ }
+
+ err = bpf_obj_pin(map_fd, pin_path);
+ if (CHECK(err, "bpf_obj_pin(%s): errno:%d.", pin_path, errno))
+ goto done;
+
+ for (key = 0; key < test->max_entries; key++) {
+ set_pprint_mapv(&mapv, key);
+ bpf_map_update_elem(map_fd, &key, &mapv, 0);
+ }
+
+ pin_file = fopen(pin_path, "r");
+ if (CHECK(!pin_file, "fopen(%s): errno:%d", pin_path, errno)) {
+ err = -1;
+ goto done;
+ }
+
+ /* Skip lines start with '#' */
+ while ((nread = getline(&line, &line_len, pin_file)) > 0 &&
+ *line == '#')
+ ;
+
+ if (CHECK(nread <= 0, "Unexpected EOF")) {
+ err = -1;
+ goto done;
+ }
+
+ nr_read_elems = 0;
+ ordered_map = test->ordered_map;
+ lossless_map = test->lossless_map;
+ do {
+ ssize_t nexpected_line;
+ unsigned int next_key;
+
+ next_key = ordered_map ? nr_read_elems : atoi(line);
+ set_pprint_mapv(&mapv, next_key);
+ nexpected_line = snprintf(expected_line, sizeof(expected_line),
+ "%u: {%u,0,%d,0x%x,0x%x,0x%x,{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s}\n",
+ next_key,
+ mapv.ui32, mapv.si32,
+ mapv.unused_bits2a, mapv.bits28, mapv.unused_bits2b,
+ mapv.ui64,
+ mapv.ui8a[0], mapv.ui8a[1], mapv.ui8a[2], mapv.ui8a[3],
+ mapv.ui8a[4], mapv.ui8a[5], mapv.ui8a[6], mapv.ui8a[7],
+ pprint_enum_str[mapv.aenum]);
+
+ if (CHECK(nexpected_line == sizeof(expected_line),
+ "expected_line is too long")) {
+ err = -1;
+ goto done;
+ }
+
+ if (strcmp(expected_line, line)) {
+ err = -1;
+ fprintf(stderr, "unexpected pprint output\n");
+ fprintf(stderr, "expected: %s", expected_line);
+ fprintf(stderr, " read: %s", line);
+ goto done;
+ }
+
+ nread = getline(&line, &line_len, pin_file);
+ } while (++nr_read_elems < test->max_entries && nread > 0);
+
+ if (lossless_map &&
+ CHECK(nr_read_elems < test->max_entries,
+ "Unexpected EOF. nr_read_elems:%u test->max_entries:%u",
+ nr_read_elems, test->max_entries)) {
+ err = -1;
+ goto done;
+ }
+
+ if (CHECK(nread > 0, "Unexpected extra pprint output: %s", line)) {
+ err = -1;
+ goto done;
+ }
+
+ err = 0;
+
+done:
+ if (!err)
+ fprintf(stderr, "OK");
+ if (*btf_log_buf && (err || args.always_log))
+ fprintf(stderr, "\n%s", btf_log_buf);
+ if (btf_fd != -1)
+ close(btf_fd);
+ if (map_fd != -1)
+ close(map_fd);
+ if (pin_file)
+ fclose(pin_file);
+ unlink(pin_path);
+ free(line);
+
+ return err;
+}
+
+static int test_pprint(void)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) {
+ pprint_test_template.descr = pprint_tests_meta[i].descr;
+ pprint_test_template.map_type = pprint_tests_meta[i].map_type;
+ pprint_test_template.map_name = pprint_tests_meta[i].map_name;
+ pprint_test_template.ordered_map = pprint_tests_meta[i].ordered_map;
+ pprint_test_template.lossless_map = pprint_tests_meta[i].lossless_map;
+
+ err |= count_result(do_test_pprint());
+ }
+
+ return err;
+}
+
+static void usage(const char *cmd)
+{
+ fprintf(stderr, "Usage: %s [-l] [[-r test_num (1 - %zu)] | [-g test_num (1 - %zu)] | [-f test_num (1 - %zu)] | [-p]]\n",
+ cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
+ ARRAY_SIZE(file_tests));
+}
+
+static int parse_args(int argc, char **argv)
+{
+ const char *optstr = "lpf:r:g:";
+ int opt;
+
+ while ((opt = getopt(argc, argv, optstr)) != -1) {
+ switch (opt) {
+ case 'l':
+ args.always_log = true;
+ break;
+ case 'f':
+ args.file_test_num = atoi(optarg);
+ args.file_test = true;
+ break;
+ case 'r':
+ args.raw_test_num = atoi(optarg);
+ args.raw_test = true;
+ break;
+ case 'g':
+ args.get_info_test_num = atoi(optarg);
+ args.get_info_test = true;
+ break;
+ case 'p':
+ args.pprint_test = true;
+ break;
+ case 'h':
+ usage(argv[0]);
+ exit(0);
+ default:
+ usage(argv[0]);
+ return -1;
+ }
+ }
+
+ if (args.raw_test_num &&
+ (args.raw_test_num < 1 ||
+ args.raw_test_num > ARRAY_SIZE(raw_tests))) {
+ fprintf(stderr, "BTF raw test number must be [1 - %zu]\n",
+ ARRAY_SIZE(raw_tests));
+ return -1;
+ }
+
+ if (args.file_test_num &&
+ (args.file_test_num < 1 ||
+ args.file_test_num > ARRAY_SIZE(file_tests))) {
+ fprintf(stderr, "BTF file test number must be [1 - %zu]\n",
+ ARRAY_SIZE(file_tests));
+ return -1;
+ }
+
+ if (args.get_info_test_num &&
+ (args.get_info_test_num < 1 ||
+ args.get_info_test_num > ARRAY_SIZE(get_info_tests))) {
+ fprintf(stderr, "BTF get info test number must be [1 - %zu]\n",
+ ARRAY_SIZE(get_info_tests));
+ return -1;
+ }
+
+ return 0;
+}
+
+static void print_summary(void)
+{
+ fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n",
+ pass_cnt - skip_cnt, skip_cnt, error_cnt);
+}
+
+int main(int argc, char **argv)
+{
+ int err = 0;
+
+ err = parse_args(argc, argv);
+ if (err)
+ return err;
+
+ if (args.always_log)
+ libbpf_set_print(__base_pr, __base_pr, __base_pr);
+
+ if (args.raw_test)
+ err |= test_raw();
+
+ if (args.get_info_test)
+ err |= test_get_info();
+
+ if (args.file_test)
+ err |= test_file();
+
+ if (args.pprint_test)
+ err |= test_pprint();
+
+ if (args.raw_test || args.get_info_test || args.file_test ||
+ args.pprint_test)
+ goto done;
+
+ err |= test_raw();
+ err |= test_get_info();
+ err |= test_file();
+
+done:
+ print_summary();
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_btf_haskv.c b/tools/testing/selftests/bpf/test_btf_haskv.c
new file mode 100644
index 000000000..b21b876f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_haskv.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+ unsigned int v4;
+ unsigned int v6;
+};
+
+struct bpf_map_def SEC("maps") btf_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct ipv_counts),
+ .max_entries = 4,
+};
+
+BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
+
+struct dummy_tracepoint_args {
+ unsigned long long pad;
+ struct sock *sock;
+};
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+ struct ipv_counts *counts;
+ int key = 0;
+
+ if (!arg->sock)
+ return 0;
+
+ counts = bpf_map_lookup_elem(&btf_map, &key);
+ if (!counts)
+ return 0;
+
+ counts->v6++;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_btf_nokv.c b/tools/testing/selftests/bpf/test_btf_nokv.c
new file mode 100644
index 000000000..0ed8e088e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_btf_nokv.c
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+struct ipv_counts {
+ unsigned int v4;
+ unsigned int v6;
+};
+
+struct bpf_map_def SEC("maps") btf_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(struct ipv_counts),
+ .max_entries = 4,
+};
+
+struct dummy_tracepoint_args {
+ unsigned long long pad;
+ struct sock *sock;
+};
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+ struct ipv_counts *counts;
+ int key = 0;
+
+ if (!arg->sock)
+ return 0;
+
+ counts = bpf_map_lookup_elem(&btf_map, &key);
+ if (!counts)
+ return 0;
+
+ counts->v6++;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
new file mode 100644
index 000000000..4e196e3bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <assert.h>
+#include <bpf/bpf.h>
+#include <linux/filter.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+#define TEST_CGROUP "/test-bpf-cgroup-storage-buf/"
+
+int main(int argc, char **argv)
+{
+ struct bpf_insn prog[] = {
+ BPF_LD_MAP_FD(BPF_REG_1, 0), /* map fd */
+ BPF_MOV64_IMM(BPF_REG_2, 0), /* flags, not used */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ };
+ size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ int error = EXIT_FAILURE;
+ int map_fd, prog_fd, cgroup_fd;
+ struct bpf_cgroup_storage_key key;
+ unsigned long long value;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
+ sizeof(value), 0, 0);
+ if (map_fd < 0) {
+ printf("Failed to create map: %s\n", strerror(errno));
+ goto out;
+ }
+
+ prog[0].imm = map_fd;
+ prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ prog, insns_cnt, "GPL", 0,
+ bpf_log_buf, BPF_LOG_BUF_SIZE);
+ if (prog_fd < 0) {
+ printf("Failed to load bpf program: %s\n", bpf_log_buf);
+ goto out;
+ }
+
+ if (setup_cgroup_environment()) {
+ printf("Failed to setup cgroup environment\n");
+ goto err;
+ }
+
+ /* Create a cgroup, get fd, and join it */
+ cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ if (!cgroup_fd) {
+ printf("Failed to create test cgroup\n");
+ goto err;
+ }
+
+ if (join_cgroup(TEST_CGROUP)) {
+ printf("Failed to join cgroup\n");
+ goto err;
+ }
+
+ /* Attach the bpf program */
+ if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
+ printf("Failed to attach bpf program\n");
+ goto err;
+ }
+
+ if (bpf_map_get_next_key(map_fd, NULL, &key)) {
+ printf("Failed to get the first key in cgroup storage\n");
+ goto err;
+ }
+
+ if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+ printf("Failed to lookup cgroup storage\n");
+ goto err;
+ }
+
+ /* Every second packet should be dropped */
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+
+ /* Check the counter in the cgroup local storage */
+ if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+ printf("Failed to lookup cgroup storage\n");
+ goto err;
+ }
+
+ if (value != 3) {
+ printf("Unexpected data in the cgroup storage: %llu\n", value);
+ goto err;
+ }
+
+ /* Bump the counter in the cgroup local storage */
+ value++;
+ if (bpf_map_update_elem(map_fd, &key, &value, 0)) {
+ printf("Failed to update the data in the cgroup storage\n");
+ goto err;
+ }
+
+ /* Every second packet should be dropped */
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null"));
+ assert(system("ping localhost -c 1 -W 1 -q > /dev/null") == 0);
+
+ /* Check the final value of the counter in the cgroup local storage */
+ if (bpf_map_lookup_elem(map_fd, &key, &value)) {
+ printf("Failed to lookup the cgroup storage\n");
+ goto err;
+ }
+
+ if (value != 7) {
+ printf("Unexpected data in the cgroup storage: %llu\n", value);
+ goto err;
+ }
+
+ error = 0;
+ printf("test_cgroup_storage:PASS\n");
+
+err:
+ cleanup_cgroup_environment();
+
+out:
+ return error;
+}
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
new file mode 100644
index 000000000..9c8b50bac
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -0,0 +1,96 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/time.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+
+#define DEV_CGROUP_PROG "./dev_cgroup.o"
+
+#define TEST_CGROUP "/test-bpf-based-device-cgroup/"
+
+int main(int argc, char **argv)
+{
+ struct bpf_object *obj;
+ int error = EXIT_FAILURE;
+ int prog_fd, cgroup_fd;
+ __u32 prog_cnt;
+
+ if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
+ &obj, &prog_fd)) {
+ printf("Failed to load DEV_CGROUP program\n");
+ goto out;
+ }
+
+ if (setup_cgroup_environment()) {
+ printf("Failed to load DEV_CGROUP program\n");
+ goto err;
+ }
+
+ /* Create a cgroup, get fd, and join it */
+ cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ if (!cgroup_fd) {
+ printf("Failed to create test cgroup\n");
+ goto err;
+ }
+
+ if (join_cgroup(TEST_CGROUP)) {
+ printf("Failed to join cgroup\n");
+ goto err;
+ }
+
+ /* Attach bpf program */
+ if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_DEVICE, 0)) {
+ printf("Failed to attach DEV_CGROUP program");
+ goto err;
+ }
+
+ if (bpf_prog_query(cgroup_fd, BPF_CGROUP_DEVICE, 0, NULL, NULL,
+ &prog_cnt)) {
+ printf("Failed to query attached programs");
+ goto err;
+ }
+
+ /* All operations with /dev/zero and and /dev/urandom are allowed,
+ * everything else is forbidden.
+ */
+ assert(system("rm -f /tmp/test_dev_cgroup_null") == 0);
+ assert(system("mknod /tmp/test_dev_cgroup_null c 1 3"));
+ assert(system("rm -f /tmp/test_dev_cgroup_null") == 0);
+
+ /* /dev/zero is whitelisted */
+ assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0);
+ assert(system("mknod /tmp/test_dev_cgroup_zero c 1 5") == 0);
+ assert(system("rm -f /tmp/test_dev_cgroup_zero") == 0);
+
+ assert(system("dd if=/dev/urandom of=/dev/zero count=64") == 0);
+
+ /* src is allowed, target is forbidden */
+ assert(system("dd if=/dev/urandom of=/dev/full count=64"));
+
+ /* src is forbidden, target is allowed */
+ assert(system("dd if=/dev/random of=/dev/zero count=64"));
+
+ error = 0;
+ printf("test_dev_cgroup:PASS\n");
+
+err:
+ cleanup_cgroup_environment();
+
+out:
+ return error;
+}
diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
new file mode 100644
index 000000000..f6d9f238e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* Permit pretty deep stack traces */
+#define MAX_STACK_RAWTP 100
+struct stack_trace_t {
+ int pid;
+ int kern_stack_size;
+ int user_stack_size;
+ int user_stack_buildid_size;
+ __u64 kern_stack[MAX_STACK_RAWTP];
+ __u64 user_stack[MAX_STACK_RAWTP];
+ struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
+};
+
+struct bpf_map_def SEC("maps") perfmap = {
+ .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(__u32),
+ .max_entries = 2,
+};
+
+struct bpf_map_def SEC("maps") stackdata_map = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct stack_trace_t),
+ .max_entries = 1,
+};
+
+/* Allocate per-cpu space twice the needed. For the code below
+ * usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+ * if (usize < 0)
+ * return 0;
+ * ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+ *
+ * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64),
+ * verifier will complain that access "raw_data + usize"
+ * with size "max_len - usize" may be out of bound.
+ * The maximum "raw_data + usize" is "raw_data + max_len"
+ * and the maximum "max_len - usize" is "max_len", verifier
+ * concludes that the maximum buffer access range is
+ * "raw_data[0...max_len * 2 - 1]" and hence reject the program.
+ *
+ * Doubling the to-be-used max buffer size can fix this verifier
+ * issue and avoid complicated C programming massaging.
+ * This is an acceptable workaround since there is one entry here.
+ */
+struct bpf_map_def SEC("maps") rawdata_map = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
+ .max_entries = 1,
+};
+
+SEC("tracepoint/raw_syscalls/sys_enter")
+int bpf_prog1(void *ctx)
+{
+ int max_len, max_buildid_len, usize, ksize, total_size;
+ struct stack_trace_t *data;
+ void *raw_data;
+ __u32 key = 0;
+
+ data = bpf_map_lookup_elem(&stackdata_map, &key);
+ if (!data)
+ return 0;
+
+ max_len = MAX_STACK_RAWTP * sizeof(__u64);
+ max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id);
+ data->pid = bpf_get_current_pid_tgid();
+ data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack,
+ max_len, 0);
+ data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len,
+ BPF_F_USER_STACK);
+ data->user_stack_buildid_size = bpf_get_stack(
+ ctx, data->user_stack_buildid, max_buildid_len,
+ BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+ bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data));
+
+ /* write both kernel and user stacks to the same buffer */
+ raw_data = bpf_map_lookup_elem(&rawdata_map, &key);
+ if (!raw_data)
+ return 0;
+
+ usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
+ if (usize < 0)
+ return 0;
+
+ ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
+ if (ksize < 0)
+ return 0;
+
+ total_size = usize + ksize;
+ if (total_size > 0 && total_size <= max_len)
+ bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_iptunnel_common.h b/tools/testing/selftests/bpf/test_iptunnel_common.h
new file mode 100644
index 000000000..e4cd252a1
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_iptunnel_common.h
@@ -0,0 +1,37 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _TEST_IPTNL_COMMON_H
+#define _TEST_IPTNL_COMMON_H
+
+#include <linux/types.h>
+
+#define MAX_IPTNL_ENTRIES 256U
+
+struct vip {
+ union {
+ __u32 v6[4];
+ __u32 v4;
+ } daddr;
+ __u16 dport;
+ __u16 family;
+ __u8 protocol;
+};
+
+struct iptnl_info {
+ union {
+ __u32 v6[4];
+ __u32 v4;
+ } saddr;
+ union {
+ __u32 v6[4];
+ __u32 v4;
+ } daddr;
+ __u16 family;
+ __u8 dmac[6];
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
new file mode 100755
index 000000000..9df0d2ac4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ "$(id -u)" != "0" ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
+fi
+
+SRC_TREE=../../../../
+
+test_run()
+{
+ sysctl -w net.core.bpf_jit_enable=$1 2>&1 > /dev/null
+ sysctl -w net.core.bpf_jit_harden=$2 2>&1 > /dev/null
+
+ echo "[ JIT enabled:$1 hardened:$2 ]"
+ dmesg -C
+ if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then
+ insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null
+ if [ $? -ne 0 ]; then
+ rc=1
+ fi
+ else
+ # Use modprobe dry run to check for missing test_bpf module
+ if ! /sbin/modprobe -q -n test_bpf; then
+ echo "test_bpf: [SKIP]"
+ elif /sbin/modprobe -q test_bpf; then
+ echo "test_bpf: ok"
+ else
+ echo "test_bpf: [FAIL]"
+ rc=1
+ fi
+ fi
+ rmmod test_bpf 2> /dev/null
+ dmesg | grep FAIL
+}
+
+test_save()
+{
+ JE=`sysctl -n net.core.bpf_jit_enable`
+ JH=`sysctl -n net.core.bpf_jit_harden`
+}
+
+test_restore()
+{
+ sysctl -w net.core.bpf_jit_enable=$JE 2>&1 > /dev/null
+ sysctl -w net.core.bpf_jit_harden=$JH 2>&1 > /dev/null
+}
+
+rc=0
+test_save
+test_run 0 0
+test_run 1 0
+test_run 1 1
+test_run 1 2
+test_restore
+exit $rc
diff --git a/tools/testing/selftests/bpf/test_l4lb.c b/tools/testing/selftests/bpf/test_l4lb.c
new file mode 100644
index 000000000..1e10c9590
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_l4lb.c
@@ -0,0 +1,473 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+static inline __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static inline u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static inline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[ETH_ALEN];
+ unsigned char eth_source[ETH_ALEN];
+ unsigned short eth_proto;
+};
+
+struct bpf_map_def SEC("maps") vip_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip),
+ .value_size = sizeof(struct vip_meta),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ch_rings = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = CH_RINGS_SIZE,
+};
+
+struct bpf_map_def SEC("maps") reals = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct real_definition),
+ .max_entries = MAX_REALS,
+};
+
+struct bpf_map_def SEC("maps") stats = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct vip_stats),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ctl_array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct ctl_value),
+ .max_entries = CTL_MAP_SIZE,
+};
+
+static __always_inline __u32 get_packet_hash(struct packet_description *pckt,
+ bool ipv6)
+{
+ if (ipv6)
+ return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+ pckt->ports, CH_RINGS_SIZE);
+ else
+ return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static __always_inline bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
+{
+ __u32 hash = get_packet_hash(pckt, is_ipv6) % RING_SIZE;
+ __u32 key = RING_SIZE * vip_info->vip_num + hash;
+ __u32 *real_pos;
+
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return false;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return false;
+ return true;
+}
+
+static __always_inline int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+ return TC_ACT_OK;
+ off += sizeof(struct icmp6hdr);
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+ pckt->proto = ip6h->nexthdr;
+ pckt->flags |= F_ICMP;
+ memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+ memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+ return TC_ACT_UNSPEC;
+}
+
+static __always_inline int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+ icmp_hdr->code != ICMP_FRAG_NEEDED)
+ return TC_ACT_OK;
+ off += sizeof(struct icmphdr);
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+ pckt->proto = iph->protocol;
+ pckt->flags |= F_ICMP;
+ pckt->src = iph->daddr;
+ pckt->dst = iph->saddr;
+ return TC_ACT_UNSPEC;
+}
+
+static __always_inline bool parse_udp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct udphdr *udp;
+ udp = data + off;
+
+ if (udp + 1 > data_end)
+ return false;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = udp->source;
+ pckt->port16[1] = udp->dest;
+ } else {
+ pckt->port16[0] = udp->dest;
+ pckt->port16[1] = udp->source;
+ }
+ return true;
+}
+
+static __always_inline bool parse_tcp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct tcphdr *tcp;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return false;
+
+ if (tcp->syn)
+ pckt->flags |= F_SYN_SET;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = tcp->source;
+ pckt->port16[1] = tcp->dest;
+ } else {
+ pckt->port16[0] = tcp->dest;
+ pckt->port16[1] = tcp->source;
+ }
+ return true;
+}
+
+static __always_inline int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct __sk_buff *skb)
+{
+ void *pkt_start = (void *)(long)skb->data;
+ struct packet_description pckt = {};
+ struct eth_hdr *eth = pkt_start;
+ struct bpf_tunnel_key tkey = {};
+ struct vip_stats *data_stats;
+ struct real_definition *dst;
+ struct vip_meta *vip_info;
+ struct ctl_value *cval;
+ __u32 v4_intf_pos = 1;
+ __u32 v6_intf_pos = 2;
+ struct ipv6hdr *ip6h;
+ struct vip vip = {};
+ struct iphdr *iph;
+ int tun_flag = 0;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u32 ifindex;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ tkey.tunnel_ttl = 64;
+ if (is_ipv6) {
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ iph_len = sizeof(struct ipv6hdr);
+ protocol = ip6h->nexthdr;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(ip6h->payload_len);
+ off += iph_len;
+ if (protocol == IPPROTO_FRAGMENT) {
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_ICMPV6) {
+ action = parse_icmpv6(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV6_PLUS_ICMP_HDR;
+ } else {
+ memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+ memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+ }
+ } else {
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ protocol = iph->protocol;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(iph->tot_len);
+ off += IPV4_HDR_LEN_NO_OPT;
+
+ if (iph->frag_off & PCKT_FRAGMENTED)
+ return TC_ACT_SHOT;
+ if (protocol == IPPROTO_ICMP) {
+ action = parse_icmp(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV4_PLUS_ICMP_HDR;
+ } else {
+ pckt.src = iph->saddr;
+ pckt.dst = iph->daddr;
+ }
+ }
+ protocol = pckt.proto;
+
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else {
+ return TC_ACT_SHOT;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.daddr.v6, pckt.dstv6, 16);
+ else
+ vip.daddr.v4 = pckt.dst;
+
+ vip.dport = pckt.port16[1];
+ vip.protocol = pckt.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.dport = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return TC_ACT_SHOT;
+ pckt.port16[1] = 0;
+ }
+
+ if (vip_info->flags & F_HASH_NO_SRC_PORT)
+ pckt.port16[0] = 0;
+
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+ return TC_ACT_SHOT;
+
+ if (dst->flags & F_IPV6) {
+ cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+ tun_flag = BPF_F_TUNINFO_IPV6;
+ } else {
+ cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ tkey.remote_ipv4 = dst->dst;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return TC_ACT_SHOT;
+ data_stats->pkts++;
+ data_stats->bytes += pkt_bytes;
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+ *(u32 *)eth->eth_dest = tkey.remote_ipv4;
+ return bpf_redirect(ifindex, 0);
+}
+
+SEC("l4lb-demo")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return TC_ACT_SHOT;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == bpf_htons(ETH_P_IP))
+ return process_packet(data, nh_off, data_end, false, ctx);
+ else if (eth_proto == bpf_htons(ETH_P_IPV6))
+ return process_packet(data, nh_off, data_end, true, ctx);
+ else
+ return TC_ACT_SHOT;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_l4lb_noinline.c b/tools/testing/selftests/bpf/test_l4lb_noinline.c
new file mode 100644
index 000000000..ba44a14e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_l4lb_noinline.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+#include "test_iptunnel_common.h"
+#include "bpf_endian.h"
+
+int _version SEC("version") = 1;
+
+static __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[ETH_ALEN];
+ unsigned char eth_source[ETH_ALEN];
+ unsigned short eth_proto;
+};
+
+struct bpf_map_def SEC("maps") vip_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip),
+ .value_size = sizeof(struct vip_meta),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ch_rings = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = CH_RINGS_SIZE,
+};
+
+struct bpf_map_def SEC("maps") reals = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct real_definition),
+ .max_entries = MAX_REALS,
+};
+
+struct bpf_map_def SEC("maps") stats = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct vip_stats),
+ .max_entries = MAX_VIPS,
+};
+
+struct bpf_map_def SEC("maps") ctl_array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct ctl_value),
+ .max_entries = CTL_MAP_SIZE,
+};
+
+static __u32 get_packet_hash(struct packet_description *pckt,
+ bool ipv6)
+{
+ if (ipv6)
+ return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+ pckt->ports, CH_RINGS_SIZE);
+ else
+ return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
+{
+ __u32 hash = get_packet_hash(pckt, is_ipv6);
+ __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
+ __u32 *real_pos;
+
+ if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
+ hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+ return 0;
+
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return false;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return false;
+ return true;
+}
+
+static int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+ return TC_ACT_OK;
+ off += sizeof(struct icmp6hdr);
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+ pckt->proto = ip6h->nexthdr;
+ pckt->flags |= F_ICMP;
+ memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+ memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+ return TC_ACT_UNSPEC;
+}
+
+static int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+ icmp_hdr->code != ICMP_FRAG_NEEDED)
+ return TC_ACT_OK;
+ off += sizeof(struct icmphdr);
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+ pckt->proto = iph->protocol;
+ pckt->flags |= F_ICMP;
+ pckt->src = iph->daddr;
+ pckt->dst = iph->saddr;
+ return TC_ACT_UNSPEC;
+}
+
+static bool parse_udp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct udphdr *udp;
+ udp = data + off;
+
+ if (udp + 1 > data_end)
+ return false;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = udp->source;
+ pckt->port16[1] = udp->dest;
+ } else {
+ pckt->port16[0] = udp->dest;
+ pckt->port16[1] = udp->source;
+ }
+ return true;
+}
+
+static bool parse_tcp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
+{
+ struct tcphdr *tcp;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return false;
+
+ if (tcp->syn)
+ pckt->flags |= F_SYN_SET;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = tcp->source;
+ pckt->port16[1] = tcp->dest;
+ } else {
+ pckt->port16[0] = tcp->dest;
+ pckt->port16[1] = tcp->source;
+ }
+ return true;
+}
+
+static int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct __sk_buff *skb)
+{
+ void *pkt_start = (void *)(long)skb->data;
+ struct packet_description pckt = {};
+ struct eth_hdr *eth = pkt_start;
+ struct bpf_tunnel_key tkey = {};
+ struct vip_stats *data_stats;
+ struct real_definition *dst;
+ struct vip_meta *vip_info;
+ struct ctl_value *cval;
+ __u32 v4_intf_pos = 1;
+ __u32 v6_intf_pos = 2;
+ struct ipv6hdr *ip6h;
+ struct vip vip = {};
+ struct iphdr *iph;
+ int tun_flag = 0;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u32 ifindex;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ tkey.tunnel_ttl = 64;
+ if (is_ipv6) {
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ iph_len = sizeof(struct ipv6hdr);
+ protocol = ip6h->nexthdr;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(ip6h->payload_len);
+ off += iph_len;
+ if (protocol == IPPROTO_FRAGMENT) {
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_ICMPV6) {
+ action = parse_icmpv6(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV6_PLUS_ICMP_HDR;
+ } else {
+ memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+ memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+ }
+ } else {
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ if (iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ protocol = iph->protocol;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(iph->tot_len);
+ off += IPV4_HDR_LEN_NO_OPT;
+
+ if (iph->frag_off & PCKT_FRAGMENTED)
+ return TC_ACT_SHOT;
+ if (protocol == IPPROTO_ICMP) {
+ action = parse_icmp(data, data_end, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV4_PLUS_ICMP_HDR;
+ } else {
+ pckt.src = iph->saddr;
+ pckt.dst = iph->daddr;
+ }
+ }
+ protocol = pckt.proto;
+
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(data, off, data_end, &pckt))
+ return TC_ACT_SHOT;
+ } else {
+ return TC_ACT_SHOT;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.daddr.v6, pckt.dstv6, 16);
+ else
+ vip.daddr.v4 = pckt.dst;
+
+ vip.dport = pckt.port16[1];
+ vip.protocol = pckt.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.dport = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return TC_ACT_SHOT;
+ pckt.port16[1] = 0;
+ }
+
+ if (vip_info->flags & F_HASH_NO_SRC_PORT)
+ pckt.port16[0] = 0;
+
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+ return TC_ACT_SHOT;
+
+ if (dst->flags & F_IPV6) {
+ cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+ tun_flag = BPF_F_TUNINFO_IPV6;
+ } else {
+ cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ tkey.remote_ipv4 = dst->dst;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return TC_ACT_SHOT;
+ data_stats->pkts++;
+ data_stats->bytes += pkt_bytes;
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+ *(u32 *)eth->eth_dest = tkey.remote_ipv4;
+ return bpf_redirect(ifindex, 0);
+}
+
+SEC("l4lb-demo")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return TC_ACT_SHOT;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == bpf_htons(ETH_P_IP))
+ return process_packet(data, nh_off, data_end, false, ctx);
+ else if (eth_proto == bpf_htons(ETH_P_IPV6))
+ return process_packet(data, nh_off, data_end, true, ctx);
+ else
+ return TC_ACT_SHOT;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh
new file mode 100755
index 000000000..2989b2e2d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_libbpf.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+export TESTNAME=test_libbpf
+
+# Determine selftest success via shell exit code
+exit_handler()
+{
+ if [ $? -eq 0 ]; then
+ echo "selftests: $TESTNAME [PASS]";
+ else
+ echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2
+ echo "selftests: $TESTNAME [FAILED]";
+ fi
+}
+
+libbpf_open_file()
+{
+ LAST_LOADED=$1
+ if [ -n "$VERBOSE" ]; then
+ ./test_libbpf_open $1
+ else
+ ./test_libbpf_open --quiet $1
+ fi
+}
+
+# Exit script immediately (well catched by trap handler) if any
+# program/thing exits with a non-zero status.
+set -e
+
+# (Use 'trap -l' to list meaning of numbers)
+trap exit_handler 0 2 3 6 9
+
+libbpf_open_file test_l4lb.o
+
+# Load a program with BPF-to-BPF calls
+libbpf_open_file test_l4lb_noinline.o
+
+# Load a program compiled without the "-target bpf" flag
+libbpf_open_file test_xdp.o
+
+# Success
+exit 0
diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c
new file mode 100644
index 000000000..cbd55f5f8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_libbpf_open.c
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc.
+ */
+static const char *__doc__ =
+ "Libbpf test program for loading BPF ELF object files";
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <bpf/libbpf.h>
+#include <getopt.h>
+
+#include "bpf_rlimit.h"
+
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"debug", no_argument, NULL, 'D' },
+ {"quiet", no_argument, NULL, 'q' },
+ {0, 0, NULL, 0 }
+};
+
+static void usage(char *argv[])
+{
+ int i;
+
+ printf("\nDOCUMENTATION:\n%s\n\n", __doc__);
+ printf(" Usage: %s (options-see-below) BPF_FILE\n", argv[0]);
+ printf(" Listing options:\n");
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-12s", long_options[i].name);
+ printf(" short-option: -%c",
+ long_options[i].val);
+ printf("\n");
+ }
+ printf("\n");
+}
+
+#define DEFINE_PRINT_FN(name, enabled) \
+static int libbpf_##name(const char *fmt, ...) \
+{ \
+ va_list args; \
+ int ret; \
+ \
+ va_start(args, fmt); \
+ if (enabled) { \
+ fprintf(stderr, "[" #name "] "); \
+ ret = vfprintf(stderr, fmt, args); \
+ } \
+ va_end(args); \
+ return ret; \
+}
+DEFINE_PRINT_FN(warning, 1)
+DEFINE_PRINT_FN(info, 1)
+DEFINE_PRINT_FN(debug, 1)
+
+#define EXIT_FAIL_LIBBPF EXIT_FAILURE
+#define EXIT_FAIL_OPTION 2
+
+int test_walk_progs(struct bpf_object *obj, bool verbose)
+{
+ struct bpf_program *prog;
+ int cnt = 0;
+
+ bpf_object__for_each_program(prog, obj) {
+ cnt++;
+ if (verbose)
+ printf("Prog (count:%d) section_name: %s\n", cnt,
+ bpf_program__title(prog, false));
+ }
+ return 0;
+}
+
+int test_walk_maps(struct bpf_object *obj, bool verbose)
+{
+ struct bpf_map *map;
+ int cnt = 0;
+
+ bpf_map__for_each(map, obj) {
+ cnt++;
+ if (verbose)
+ printf("Map (count:%d) name: %s\n", cnt,
+ bpf_map__name(map));
+ }
+ return 0;
+}
+
+int test_open_file(char *filename, bool verbose)
+{
+ struct bpf_object *bpfobj = NULL;
+ long err;
+
+ if (verbose)
+ printf("Open BPF ELF-file with libbpf: %s\n", filename);
+
+ /* Load BPF ELF object file and check for errors */
+ bpfobj = bpf_object__open(filename);
+ err = libbpf_get_error(bpfobj);
+ if (err) {
+ char err_buf[128];
+ libbpf_strerror(err, err_buf, sizeof(err_buf));
+ if (verbose)
+ printf("Unable to load eBPF objects in file '%s': %s\n",
+ filename, err_buf);
+ return EXIT_FAIL_LIBBPF;
+ }
+ test_walk_progs(bpfobj, verbose);
+ test_walk_maps(bpfobj, verbose);
+
+ if (verbose)
+ printf("Close BPF ELF-file with libbpf: %s\n",
+ bpf_object__name(bpfobj));
+ bpf_object__close(bpfobj);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ char filename[1024] = { 0 };
+ bool verbose = 1;
+ int longindex = 0;
+ int opt;
+
+ libbpf_set_print(libbpf_warning, libbpf_info, NULL);
+
+ /* Parse commands line args */
+ while ((opt = getopt_long(argc, argv, "hDq",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 'D':
+ libbpf_set_print(libbpf_warning, libbpf_info,
+ libbpf_debug);
+ break;
+ case 'q': /* Use in scripting mode */
+ verbose = 0;
+ break;
+ case 'h':
+ default:
+ usage(argv);
+ return EXIT_FAIL_OPTION;
+ }
+ }
+ if (optind >= argc) {
+ usage(argv);
+ printf("ERROR: Expected BPF_FILE argument after options\n");
+ return EXIT_FAIL_OPTION;
+ }
+ snprintf(filename, sizeof(filename), "%s", argv[optind]);
+
+ return test_open_file(filename, verbose);
+}
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
new file mode 100755
index 000000000..795e56e3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=$ksft_skip
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
+fi
+
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+modprobe rc-loopback
+
+for i in /sys/class/rc/rc*
+do
+ if grep -q DRV_NAME=rc-loopback $i/uevent
+ then
+ LIRCDEV=$(grep DEVNAME= $i/lirc*/uevent | sed sQDEVNAME=Q/dev/Q)
+ fi
+done
+
+if [ -n "$LIRCDEV" ];
+then
+ TYPE=lirc_mode2
+ ./test_lirc_mode2_user $LIRCDEV
+ ret=$?
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ else
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+ fi
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_kern.c b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
new file mode 100644
index 000000000..ba2685556
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_kern.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// test ir decoder
+//
+// Copyright (C) 2018 Sean Young <sean@mess.org>
+
+#include <linux/bpf.h>
+#include <linux/lirc.h>
+#include "bpf_helpers.h"
+
+SEC("lirc_mode2")
+int bpf_decoder(unsigned int *sample)
+{
+ if (LIRC_IS_PULSE(*sample)) {
+ unsigned int duration = LIRC_VALUE(*sample);
+
+ if (duration & 0x10000)
+ bpf_rc_keydown(sample, 0x40, duration & 0xffff, 0);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
new file mode 100644
index 000000000..d470d63c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+// test ir decoder
+//
+// Copyright (C) 2018 Sean Young <sean@mess.org>
+
+// A lirc chardev is a device representing a consumer IR (cir) device which
+// can receive infrared signals from remote control and/or transmit IR.
+//
+// IR is sent as a series of pulses and space somewhat like morse code. The
+// BPF program can decode this into scancodes so that rc-core can translate
+// this into input key codes using the rc keymap.
+//
+// This test works by sending IR over rc-loopback, so the IR is processed by
+// BPF and then decoded into scancodes. The lirc chardev must be the one
+// associated with rc-loopback, see the output of ir-keytable(1).
+//
+// The following CONFIG options must be enabled for the test to succeed:
+// CONFIG_RC_CORE=y
+// CONFIG_BPF_RAWIR_EVENT=y
+// CONFIG_RC_LOOPBACK=y
+
+// Steps:
+// 1. Open the /dev/lircN device for rc-loopback (given on command line)
+// 2. Attach bpf_lirc_mode2 program which decodes some IR.
+// 3. Send some IR to the same IR device; since it is loopback, this will
+// end up in the bpf program
+// 4. bpf program should decode IR and report keycode
+// 5. We can read keycode from same /dev/lirc device
+
+#include <linux/bpf.h>
+#include <linux/lirc.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <poll.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+int main(int argc, char **argv)
+{
+ struct bpf_object *obj;
+ int ret, lircfd, progfd, mode;
+ int testir = 0x1dead;
+ u32 prog_ids[10], prog_flags[10], prog_cnt;
+
+ if (argc != 2) {
+ printf("Usage: %s /dev/lircN\n", argv[0]);
+ return 2;
+ }
+
+ ret = bpf_prog_load("test_lirc_mode2_kern.o",
+ BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
+ if (ret) {
+ printf("Failed to load bpf program\n");
+ return 1;
+ }
+
+ lircfd = open(argv[1], O_RDWR | O_NONBLOCK);
+ if (lircfd == -1) {
+ printf("failed to open lirc device %s: %m\n", argv[1]);
+ return 1;
+ }
+
+ /* Let's try detach it before it was ever attached */
+ ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
+ if (ret != -1 || errno != ENOENT) {
+ printf("bpf_prog_detach2 not attached should fail: %m\n");
+ return 1;
+ }
+
+ mode = LIRC_MODE_SCANCODE;
+ if (ioctl(lircfd, LIRC_SET_REC_MODE, &mode)) {
+ printf("failed to set rec mode: %m\n");
+ return 1;
+ }
+
+ prog_cnt = 10;
+ ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
+ &prog_cnt);
+ if (ret) {
+ printf("Failed to query bpf programs on lirc device: %m\n");
+ return 1;
+ }
+
+ if (prog_cnt != 0) {
+ printf("Expected nothing to be attached\n");
+ return 1;
+ }
+
+ ret = bpf_prog_attach(progfd, lircfd, BPF_LIRC_MODE2, 0);
+ if (ret) {
+ printf("Failed to attach bpf to lirc device: %m\n");
+ return 1;
+ }
+
+ /* Write raw IR */
+ ret = write(lircfd, &testir, sizeof(testir));
+ if (ret != sizeof(testir)) {
+ printf("Failed to send test IR message: %m\n");
+ return 1;
+ }
+
+ struct pollfd pfd = { .fd = lircfd, .events = POLLIN };
+ struct lirc_scancode lsc;
+
+ poll(&pfd, 1, 100);
+
+ /* Read decoded IR */
+ ret = read(lircfd, &lsc, sizeof(lsc));
+ if (ret != sizeof(lsc)) {
+ printf("Failed to read decoded IR: %m\n");
+ return 1;
+ }
+
+ if (lsc.scancode != 0xdead || lsc.rc_proto != 64) {
+ printf("Incorrect scancode decoded\n");
+ return 1;
+ }
+
+ prog_cnt = 10;
+ ret = bpf_prog_query(lircfd, BPF_LIRC_MODE2, 0, prog_flags, prog_ids,
+ &prog_cnt);
+ if (ret) {
+ printf("Failed to query bpf programs on lirc device: %m\n");
+ return 1;
+ }
+
+ if (prog_cnt != 1) {
+ printf("Expected one program to be attached\n");
+ return 1;
+ }
+
+ /* Let's try detaching it now it is actually attached */
+ ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
+ if (ret) {
+ printf("bpf_prog_detach2: returned %m\n");
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
new file mode 100644
index 000000000..006be3963
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -0,0 +1,804 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Randomized tests for eBPF longest-prefix-match maps
+ *
+ * This program runs randomized tests against the lpm-bpf-map. It implements a
+ * "Trivial Longest Prefix Match" (tlpm) based on simple, linear, singly linked
+ * lists. The implementation should be pretty straightforward.
+ *
+ * Based on tlpm, this inserts randomized data into bpf-lpm-maps and verifies
+ * the trie-based bpf-map implementation behaves the same way as tlpm.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/bpf.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/time.h>
+
+#include <bpf/bpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+
+struct tlpm_node {
+ struct tlpm_node *next;
+ size_t n_bits;
+ uint8_t key[];
+};
+
+static struct tlpm_node *tlpm_match(struct tlpm_node *list,
+ const uint8_t *key,
+ size_t n_bits);
+
+static struct tlpm_node *tlpm_add(struct tlpm_node *list,
+ const uint8_t *key,
+ size_t n_bits)
+{
+ struct tlpm_node *node;
+ size_t n;
+
+ n = (n_bits + 7) / 8;
+
+ /* 'overwrite' an equivalent entry if one already exists */
+ node = tlpm_match(list, key, n_bits);
+ if (node && node->n_bits == n_bits) {
+ memcpy(node->key, key, n);
+ return list;
+ }
+
+ /* add new entry with @key/@n_bits to @list and return new head */
+
+ node = malloc(sizeof(*node) + n);
+ assert(node);
+
+ node->next = list;
+ node->n_bits = n_bits;
+ memcpy(node->key, key, n);
+
+ return node;
+}
+
+static void tlpm_clear(struct tlpm_node *list)
+{
+ struct tlpm_node *node;
+
+ /* free all entries in @list */
+
+ while ((node = list)) {
+ list = list->next;
+ free(node);
+ }
+}
+
+static struct tlpm_node *tlpm_match(struct tlpm_node *list,
+ const uint8_t *key,
+ size_t n_bits)
+{
+ struct tlpm_node *best = NULL;
+ size_t i;
+
+ /* Perform longest prefix-match on @key/@n_bits. That is, iterate all
+ * entries and match each prefix against @key. Remember the "best"
+ * entry we find (i.e., the longest prefix that matches) and return it
+ * to the caller when done.
+ */
+
+ for ( ; list; list = list->next) {
+ for (i = 0; i < n_bits && i < list->n_bits; ++i) {
+ if ((key[i / 8] & (1 << (7 - i % 8))) !=
+ (list->key[i / 8] & (1 << (7 - i % 8))))
+ break;
+ }
+
+ if (i >= list->n_bits) {
+ if (!best || i > best->n_bits)
+ best = list;
+ }
+ }
+
+ return best;
+}
+
+static struct tlpm_node *tlpm_delete(struct tlpm_node *list,
+ const uint8_t *key,
+ size_t n_bits)
+{
+ struct tlpm_node *best = tlpm_match(list, key, n_bits);
+ struct tlpm_node *node;
+
+ if (!best || best->n_bits != n_bits)
+ return list;
+
+ if (best == list) {
+ node = best->next;
+ free(best);
+ return node;
+ }
+
+ for (node = list; node; node = node->next) {
+ if (node->next == best) {
+ node->next = best->next;
+ free(best);
+ return list;
+ }
+ }
+ /* should never get here */
+ assert(0);
+ return list;
+}
+
+static void test_lpm_basic(void)
+{
+ struct tlpm_node *list = NULL, *t1, *t2;
+
+ /* very basic, static tests to verify tlpm works as expected */
+
+ assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+
+ t1 = list = tlpm_add(list, (uint8_t[]){ 0xff }, 8);
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0x00 }, 16));
+ assert(!tlpm_match(list, (uint8_t[]){ 0x7f }, 8));
+ assert(!tlpm_match(list, (uint8_t[]){ 0xfe }, 8));
+ assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 7));
+
+ t2 = list = tlpm_add(list, (uint8_t[]){ 0xff, 0xff }, 16);
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+ assert(t2 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 15));
+ assert(!tlpm_match(list, (uint8_t[]){ 0x7f, 0xff }, 16));
+
+ list = tlpm_delete(list, (uint8_t[]){ 0xff, 0xff }, 16);
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+ assert(t1 == tlpm_match(list, (uint8_t[]){ 0xff, 0xff }, 16));
+
+ list = tlpm_delete(list, (uint8_t[]){ 0xff }, 8);
+ assert(!tlpm_match(list, (uint8_t[]){ 0xff }, 8));
+
+ tlpm_clear(list);
+}
+
+static void test_lpm_order(void)
+{
+ struct tlpm_node *t1, *t2, *l1 = NULL, *l2 = NULL;
+ size_t i, j;
+
+ /* Verify the tlpm implementation works correctly regardless of the
+ * order of entries. Insert a random set of entries into @l1, and copy
+ * the same data in reverse order into @l2. Then verify a lookup of
+ * random keys will yield the same result in both sets.
+ */
+
+ for (i = 0; i < (1 << 12); ++i)
+ l1 = tlpm_add(l1, (uint8_t[]){
+ rand() % 0xff,
+ rand() % 0xff,
+ }, rand() % 16 + 1);
+
+ for (t1 = l1; t1; t1 = t1->next)
+ l2 = tlpm_add(l2, t1->key, t1->n_bits);
+
+ for (i = 0; i < (1 << 8); ++i) {
+ uint8_t key[] = { rand() % 0xff, rand() % 0xff };
+
+ t1 = tlpm_match(l1, key, 16);
+ t2 = tlpm_match(l2, key, 16);
+
+ assert(!t1 == !t2);
+ if (t1) {
+ assert(t1->n_bits == t2->n_bits);
+ for (j = 0; j < t1->n_bits; ++j)
+ assert((t1->key[j / 8] & (1 << (7 - j % 8))) ==
+ (t2->key[j / 8] & (1 << (7 - j % 8))));
+ }
+ }
+
+ tlpm_clear(l1);
+ tlpm_clear(l2);
+}
+
+static void test_lpm_map(int keysize)
+{
+ size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
+ struct tlpm_node *t, *list = NULL;
+ struct bpf_lpm_trie_key *key;
+ uint8_t *data, *value;
+ int r, map;
+
+ /* Compare behavior of tlpm vs. bpf-lpm. Create a randomized set of
+ * prefixes and insert it into both tlpm and bpf-lpm. Then run some
+ * randomized lookups and verify both maps return the same result.
+ */
+
+ n_matches = 0;
+ n_matches_after_delete = 0;
+ n_nodes = 1 << 8;
+ n_lookups = 1 << 16;
+
+ data = alloca(keysize);
+ memset(data, 0, keysize);
+
+ value = alloca(keysize + 1);
+ memset(value, 0, keysize + 1);
+
+ key = alloca(sizeof(*key) + keysize);
+ memset(key, 0, sizeof(*key) + keysize);
+
+ map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ sizeof(*key) + keysize,
+ keysize + 1,
+ 4096,
+ BPF_F_NO_PREALLOC);
+ assert(map >= 0);
+
+ for (i = 0; i < n_nodes; ++i) {
+ for (j = 0; j < keysize; ++j)
+ value[j] = rand() & 0xff;
+ value[keysize] = rand() % (8 * keysize + 1);
+
+ list = tlpm_add(list, value, value[keysize]);
+
+ key->prefixlen = value[keysize];
+ memcpy(key->data, value, keysize);
+ r = bpf_map_update_elem(map, key, value, 0);
+ assert(!r);
+ }
+
+ for (i = 0; i < n_lookups; ++i) {
+ for (j = 0; j < keysize; ++j)
+ data[j] = rand() & 0xff;
+
+ t = tlpm_match(list, data, 8 * keysize);
+
+ key->prefixlen = 8 * keysize;
+ memcpy(key->data, data, keysize);
+ r = bpf_map_lookup_elem(map, key, value);
+ assert(!r || errno == ENOENT);
+ assert(!t == !!r);
+
+ if (t) {
+ ++n_matches;
+ assert(t->n_bits == value[keysize]);
+ for (j = 0; j < t->n_bits; ++j)
+ assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
+ (value[j / 8] & (1 << (7 - j % 8))));
+ }
+ }
+
+ /* Remove the first half of the elements in the tlpm and the
+ * corresponding nodes from the bpf-lpm. Then run the same
+ * large number of random lookups in both and make sure they match.
+ * Note: we need to count the number of nodes actually inserted
+ * since there may have been duplicates.
+ */
+ for (i = 0, t = list; t; i++, t = t->next)
+ ;
+ for (j = 0; j < i / 2; ++j) {
+ key->prefixlen = list->n_bits;
+ memcpy(key->data, list->key, keysize);
+ r = bpf_map_delete_elem(map, key);
+ assert(!r);
+ list = tlpm_delete(list, list->key, list->n_bits);
+ assert(list);
+ }
+ for (i = 0; i < n_lookups; ++i) {
+ for (j = 0; j < keysize; ++j)
+ data[j] = rand() & 0xff;
+
+ t = tlpm_match(list, data, 8 * keysize);
+
+ key->prefixlen = 8 * keysize;
+ memcpy(key->data, data, keysize);
+ r = bpf_map_lookup_elem(map, key, value);
+ assert(!r || errno == ENOENT);
+ assert(!t == !!r);
+
+ if (t) {
+ ++n_matches_after_delete;
+ assert(t->n_bits == value[keysize]);
+ for (j = 0; j < t->n_bits; ++j)
+ assert((t->key[j / 8] & (1 << (7 - j % 8))) ==
+ (value[j / 8] & (1 << (7 - j % 8))));
+ }
+ }
+
+ close(map);
+ tlpm_clear(list);
+
+ /* With 255 random nodes in the map, we are pretty likely to match
+ * something on every lookup. For statistics, use this:
+ *
+ * printf(" nodes: %zu\n"
+ * " lookups: %zu\n"
+ * " matches: %zu\n"
+ * "matches(delete): %zu\n",
+ * n_nodes, n_lookups, n_matches, n_matches_after_delete);
+ */
+}
+
+/* Test the implementation with some 'real world' examples */
+
+static void test_lpm_ipaddr(void)
+{
+ struct bpf_lpm_trie_key *key_ipv4;
+ struct bpf_lpm_trie_key *key_ipv6;
+ size_t key_size_ipv4;
+ size_t key_size_ipv6;
+ int map_fd_ipv4;
+ int map_fd_ipv6;
+ __u64 value;
+
+ key_size_ipv4 = sizeof(*key_ipv4) + sizeof(__u32);
+ key_size_ipv6 = sizeof(*key_ipv6) + sizeof(__u32) * 4;
+ key_ipv4 = alloca(key_size_ipv4);
+ key_ipv6 = alloca(key_size_ipv6);
+
+ map_fd_ipv4 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ key_size_ipv4, sizeof(value),
+ 100, BPF_F_NO_PREALLOC);
+ assert(map_fd_ipv4 >= 0);
+
+ map_fd_ipv6 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ key_size_ipv6, sizeof(value),
+ 100, BPF_F_NO_PREALLOC);
+ assert(map_fd_ipv6 >= 0);
+
+ /* Fill data some IPv4 and IPv6 address ranges */
+ value = 1;
+ key_ipv4->prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
+ assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+ value = 2;
+ key_ipv4->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
+ assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+ value = 3;
+ key_ipv4->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", key_ipv4->data);
+ assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+ value = 5;
+ key_ipv4->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", key_ipv4->data);
+ assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+ value = 4;
+ key_ipv4->prefixlen = 23;
+ inet_pton(AF_INET, "192.168.0.0", key_ipv4->data);
+ assert(bpf_map_update_elem(map_fd_ipv4, key_ipv4, &value, 0) == 0);
+
+ value = 0xdeadbeef;
+ key_ipv6->prefixlen = 64;
+ inet_pton(AF_INET6, "2a00:1450:4001:814::200e", key_ipv6->data);
+ assert(bpf_map_update_elem(map_fd_ipv6, key_ipv6, &value, 0) == 0);
+
+ /* Set tprefixlen to maximum for lookups */
+ key_ipv4->prefixlen = 32;
+ key_ipv6->prefixlen = 128;
+
+ /* Test some lookups that should come back with a value */
+ inet_pton(AF_INET, "192.168.128.23", key_ipv4->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0);
+ assert(value == 3);
+
+ inet_pton(AF_INET, "192.168.0.1", key_ipv4->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == 0);
+ assert(value == 2);
+
+ inet_pton(AF_INET6, "2a00:1450:4001:814::", key_ipv6->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0);
+ assert(value == 0xdeadbeef);
+
+ inet_pton(AF_INET6, "2a00:1450:4001:814::1", key_ipv6->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == 0);
+ assert(value == 0xdeadbeef);
+
+ /* Test some lookups that should not match any entry */
+ inet_pton(AF_INET, "10.0.0.1", key_ipv4->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
+ errno == ENOENT);
+
+ inet_pton(AF_INET, "11.11.11.11", key_ipv4->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
+ errno == ENOENT);
+
+ inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data);
+ assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 &&
+ errno == ENOENT);
+
+ close(map_fd_ipv4);
+ close(map_fd_ipv6);
+}
+
+static void test_lpm_delete(void)
+{
+ struct bpf_lpm_trie_key *key;
+ size_t key_size;
+ int map_fd;
+ __u64 value;
+
+ key_size = sizeof(*key) + sizeof(__u32);
+ key = alloca(key_size);
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ key_size, sizeof(value),
+ 100, BPF_F_NO_PREALLOC);
+ assert(map_fd >= 0);
+
+ /* Add nodes:
+ * 192.168.0.0/16 (1)
+ * 192.168.0.0/24 (2)
+ * 192.168.128.0/24 (3)
+ * 192.168.1.0/24 (4)
+ *
+ * (1)
+ * / \
+ * (IM) (3)
+ * / \
+ * (2) (4)
+ */
+ value = 1;
+ key->prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", key->data);
+ assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+ value = 2;
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", key->data);
+ assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+ value = 3;
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", key->data);
+ assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+ value = 4;
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", key->data);
+ assert(bpf_map_update_elem(map_fd, key, &value, 0) == 0);
+
+ /* remove non-existent node */
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "10.0.0.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
+ errno == ENOENT);
+
+ key->prefixlen = 30; // unused prefix so far
+ inet_pton(AF_INET, "192.255.0.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == -1 &&
+ errno == ENOENT);
+
+ key->prefixlen = 16; // same prefix as the root node
+ inet_pton(AF_INET, "192.255.0.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == -1 &&
+ errno == ENOENT);
+
+ /* assert initial lookup */
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.0.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+ assert(value == 2);
+
+ /* remove leaf node */
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.0.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+ assert(value == 1);
+
+ /* remove leaf (and intermediary) node */
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.1.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+ assert(value == 1);
+
+ /* remove root node */
+ key->prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.128.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == 0);
+ assert(value == 3);
+
+ /* remove last node */
+ key->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", key->data);
+ assert(bpf_map_delete_elem(map_fd, key) == 0);
+
+ key->prefixlen = 32;
+ inet_pton(AF_INET, "192.168.128.1", key->data);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
+ errno == ENOENT);
+
+ close(map_fd);
+}
+
+static void test_lpm_get_next_key(void)
+{
+ struct bpf_lpm_trie_key *key_p, *next_key_p;
+ size_t key_size;
+ __u32 value = 0;
+ int map_fd;
+
+ key_size = sizeof(*key_p) + sizeof(__u32);
+ key_p = alloca(key_size);
+ next_key_p = alloca(key_size);
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value),
+ 100, BPF_F_NO_PREALLOC);
+ assert(map_fd >= 0);
+
+ /* empty tree. get_next_key should return ENOENT */
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 &&
+ errno == ENOENT);
+
+ /* get and verify the first key, get the second one should fail. */
+ key_p->prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168);
+
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* no exact matching key should get the first one in post order. */
+ key_p->prefixlen = 8;
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168);
+
+ /* add one more element (total two) */
+ key_p->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168 && key_p->data[2] == 128);
+
+ memset(next_key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* Add one more element (total three) */
+ key_p->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168 && key_p->data[2] == 0);
+
+ memset(next_key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* Add one more element (total four) */
+ key_p->prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168 && key_p->data[2] == 0);
+
+ memset(next_key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 1);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* Add one more element (total five) */
+ key_p->prefixlen = 28;
+ inet_pton(AF_INET, "192.168.1.128", key_p->data);
+ assert(bpf_map_update_elem(map_fd, key_p, &value, 0) == 0);
+
+ memset(key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == 0);
+ assert(key_p->prefixlen == 24 && key_p->data[0] == 192 &&
+ key_p->data[1] == 168 && key_p->data[2] == 0);
+
+ memset(next_key_p, 0, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 28 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 1 &&
+ next_key_p->data[3] == 128);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 1);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 128);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 16 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168);
+
+ memcpy(key_p, next_key_p, key_size);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
+ errno == ENOENT);
+
+ /* no exact matching key should return the first one in post order */
+ key_p->prefixlen = 22;
+ inet_pton(AF_INET, "192.168.1.0", key_p->data);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == 0);
+ assert(next_key_p->prefixlen == 24 && next_key_p->data[0] == 192 &&
+ next_key_p->data[1] == 168 && next_key_p->data[2] == 0);
+
+ close(map_fd);
+}
+
+#define MAX_TEST_KEYS 4
+struct lpm_mt_test_info {
+ int cmd; /* 0: update, 1: delete, 2: lookup, 3: get_next_key */
+ int iter;
+ int map_fd;
+ struct {
+ __u32 prefixlen;
+ __u32 data;
+ } key[MAX_TEST_KEYS];
+};
+
+static void *lpm_test_command(void *arg)
+{
+ int i, j, ret, iter, key_size;
+ struct lpm_mt_test_info *info = arg;
+ struct bpf_lpm_trie_key *key_p;
+
+ key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
+ key_p = alloca(key_size);
+ for (iter = 0; iter < info->iter; iter++)
+ for (i = 0; i < MAX_TEST_KEYS; i++) {
+ /* first half of iterations in forward order,
+ * and second half in backward order.
+ */
+ j = (iter < (info->iter / 2)) ? i : MAX_TEST_KEYS - i - 1;
+ key_p->prefixlen = info->key[j].prefixlen;
+ memcpy(key_p->data, &info->key[j].data, sizeof(__u32));
+ if (info->cmd == 0) {
+ __u32 value = j;
+ /* update must succeed */
+ assert(bpf_map_update_elem(info->map_fd, key_p, &value, 0) == 0);
+ } else if (info->cmd == 1) {
+ ret = bpf_map_delete_elem(info->map_fd, key_p);
+ assert(ret == 0 || errno == ENOENT);
+ } else if (info->cmd == 2) {
+ __u32 value;
+ ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
+ assert(ret == 0 || errno == ENOENT);
+ } else {
+ struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
+ ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
+ assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
+ }
+ }
+
+ // Pass successful exit info back to the main thread
+ pthread_exit((void *)info);
+}
+
+static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd)
+{
+ info->iter = 2000;
+ info->map_fd = map_fd;
+ info->key[0].prefixlen = 16;
+ inet_pton(AF_INET, "192.168.0.0", &info->key[0].data);
+ info->key[1].prefixlen = 24;
+ inet_pton(AF_INET, "192.168.0.0", &info->key[1].data);
+ info->key[2].prefixlen = 24;
+ inet_pton(AF_INET, "192.168.128.0", &info->key[2].data);
+ info->key[3].prefixlen = 24;
+ inet_pton(AF_INET, "192.168.1.0", &info->key[3].data);
+}
+
+static void test_lpm_multi_thread(void)
+{
+ struct lpm_mt_test_info info[4];
+ size_t key_size, value_size;
+ pthread_t thread_id[4];
+ int i, map_fd;
+ void *ret;
+
+ /* create a trie */
+ value_size = sizeof(__u32);
+ key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
+ map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size,
+ 100, BPF_F_NO_PREALLOC);
+
+ /* create 4 threads to test update, delete, lookup and get_next_key */
+ setup_lpm_mt_test_info(&info[0], map_fd);
+ for (i = 0; i < 4; i++) {
+ if (i != 0)
+ memcpy(&info[i], &info[0], sizeof(info[i]));
+ info[i].cmd = i;
+ assert(pthread_create(&thread_id[i], NULL, &lpm_test_command, &info[i]) == 0);
+ }
+
+ for (i = 0; i < 4; i++)
+ assert(pthread_join(thread_id[i], &ret) == 0 && ret == (void *)&info[i]);
+
+ close(map_fd);
+}
+
+int main(void)
+{
+ int i;
+
+ /* we want predictable, pseudo random tests */
+ srand(0xf00ba1);
+
+ test_lpm_basic();
+ test_lpm_order();
+
+ /* Test with 8, 16, 24, 32, ... 128 bit prefix length */
+ for (i = 1; i <= 16; ++i)
+ test_lpm_map(i);
+
+ test_lpm_ipaddr();
+ test_lpm_delete();
+ test_lpm_get_next_key();
+ test_lpm_multi_thread();
+
+ printf("test_lpm: OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
new file mode 100644
index 000000000..781c7de34
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -0,0 +1,646 @@
+/*
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include <sys/wait.h>
+
+#include <bpf/bpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+
+#define LOCAL_FREE_TARGET (128)
+#define PERCPU_FREE_TARGET (4)
+
+static int nr_cpus;
+
+static int create_map(int map_type, int map_flags, unsigned int size)
+{
+ int map_fd;
+
+ map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
+ sizeof(unsigned long long), size, map_flags);
+
+ if (map_fd == -1)
+ perror("bpf_create_map");
+
+ return map_fd;
+}
+
+static int map_subset(int map0, int map1)
+{
+ unsigned long long next_key = 0;
+ unsigned long long value0[nr_cpus], value1[nr_cpus];
+ int ret;
+
+ while (!bpf_map_get_next_key(map1, &next_key, &next_key)) {
+ assert(!bpf_map_lookup_elem(map1, &next_key, value1));
+ ret = bpf_map_lookup_elem(map0, &next_key, value0);
+ if (ret) {
+ printf("key:%llu not found from map. %s(%d)\n",
+ next_key, strerror(errno), errno);
+ return 0;
+ }
+ if (value0[0] != value1[0]) {
+ printf("key:%llu value0:%llu != value1:%llu\n",
+ next_key, value0[0], value1[0]);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int map_equal(int lru_map, int expected)
+{
+ return map_subset(lru_map, expected) && map_subset(expected, lru_map);
+}
+
+static int sched_next_online(int pid, int *next_to_try)
+{
+ cpu_set_t cpuset;
+ int next = *next_to_try;
+ int ret = -1;
+
+ while (next < nr_cpus) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(next++, &cpuset);
+ if (!sched_setaffinity(pid, sizeof(cpuset), &cpuset)) {
+ ret = 0;
+ break;
+ }
+ }
+
+ *next_to_try = next;
+ return ret;
+}
+
+/* Size of the LRU amp is 2
+ * Add key=1 (+1 key)
+ * Add key=2 (+1 key)
+ * Lookup Key=1
+ * Add Key=3
+ * => Key=2 will be removed by LRU
+ * Iterate map. Only found key=1 and key=3
+ */
+static void test_lru_sanity0(int map_type, int map_flags)
+{
+ unsigned long long key, value[nr_cpus];
+ int lru_map_fd, expected_map_fd;
+ int next_cpu = 0;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, &next_cpu) != -1);
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ lru_map_fd = create_map(map_type, map_flags, 2 * nr_cpus);
+ else
+ lru_map_fd = create_map(map_type, map_flags, 2);
+ assert(lru_map_fd != -1);
+
+ expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, 2);
+ assert(expected_map_fd != -1);
+
+ value[0] = 1234;
+
+ /* insert key=1 element */
+
+ key = 1;
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ /* BPF_NOEXIST means: add new element if it doesn't exist */
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
+ /* key=1 already exists */
+ && errno == EEXIST);
+
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -1 &&
+ errno == EINVAL);
+
+ /* insert key=2 element */
+
+ /* check that key=2 is not found */
+ key = 2;
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
+ errno == ENOENT);
+
+ /* BPF_EXIST means: update existing element */
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
+ /* key=2 is not there */
+ errno == ENOENT);
+
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+
+ /* insert key=3 element */
+
+ /* check that key=3 is not found */
+ key = 3;
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
+ errno == ENOENT);
+
+ /* check that key=1 can be found and mark the ref bit to
+ * stop LRU from removing key=1
+ */
+ key = 1;
+ assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+ assert(value[0] == 1234);
+
+ key = 3;
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ /* key=2 has been removed from the LRU */
+ key = 2;
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1);
+
+ assert(map_equal(lru_map_fd, expected_map_fd));
+
+ close(expected_map_fd);
+ close(lru_map_fd);
+
+ printf("Pass\n");
+}
+
+/* Size of the LRU map is 1.5*tgt_free
+ * Insert 1 to tgt_free (+tgt_free keys)
+ * Lookup 1 to tgt_free/2
+ * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys)
+ * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU
+ */
+static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
+{
+ unsigned long long key, end_key, value[nr_cpus];
+ int lru_map_fd, expected_map_fd;
+ unsigned int batch_size;
+ unsigned int map_size;
+ int next_cpu = 0;
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ /* This test is only applicable to common LRU list */
+ return;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, &next_cpu) != -1);
+
+ batch_size = tgt_free / 2;
+ assert(batch_size * 2 == tgt_free);
+
+ map_size = tgt_free + batch_size;
+ lru_map_fd = create_map(map_type, map_flags, map_size);
+ assert(lru_map_fd != -1);
+
+ expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+ assert(expected_map_fd != -1);
+
+ value[0] = 1234;
+
+ /* Insert 1 to tgt_free (+tgt_free keys) */
+ end_key = 1 + tgt_free;
+ for (key = 1; key < end_key; key++)
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ /* Lookup 1 to tgt_free/2 */
+ end_key = 1 + batch_size;
+ for (key = 1; key < end_key; key++) {
+ assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ /* Insert 1+tgt_free to 2*tgt_free
+ * => 1+tgt_free/2 to LOCALFREE_TARGET will be
+ * removed by LRU
+ */
+ key = 1 + tgt_free;
+ end_key = key + tgt_free;
+ for (; key < end_key; key++) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ assert(map_equal(lru_map_fd, expected_map_fd));
+
+ close(expected_map_fd);
+ close(lru_map_fd);
+
+ printf("Pass\n");
+}
+
+/* Size of the LRU map 1.5 * tgt_free
+ * Insert 1 to tgt_free (+tgt_free keys)
+ * Update 1 to tgt_free/2
+ * => The original 1 to tgt_free/2 will be removed due to
+ * the LRU shrink process
+ * Re-insert 1 to tgt_free/2 again and do a lookup immeidately
+ * Insert 1+tgt_free to tgt_free*3/2
+ * Insert 1+tgt_free*3/2 to tgt_free*5/2
+ * => Key 1+tgt_free to tgt_free*3/2
+ * will be removed from LRU because it has never
+ * been lookup and ref bit is not set
+ */
+static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
+{
+ unsigned long long key, value[nr_cpus];
+ unsigned long long end_key;
+ int lru_map_fd, expected_map_fd;
+ unsigned int batch_size;
+ unsigned int map_size;
+ int next_cpu = 0;
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ /* This test is only applicable to common LRU list */
+ return;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, &next_cpu) != -1);
+
+ batch_size = tgt_free / 2;
+ assert(batch_size * 2 == tgt_free);
+
+ map_size = tgt_free + batch_size;
+ lru_map_fd = create_map(map_type, map_flags, map_size);
+ assert(lru_map_fd != -1);
+
+ expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+ assert(expected_map_fd != -1);
+
+ value[0] = 1234;
+
+ /* Insert 1 to tgt_free (+tgt_free keys) */
+ end_key = 1 + tgt_free;
+ for (key = 1; key < end_key; key++)
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ /* Any bpf_map_update_elem will require to acquire a new node
+ * from LRU first.
+ *
+ * The local list is running out of free nodes.
+ * It gets from the global LRU list which tries to
+ * shrink the inactive list to get tgt_free
+ * number of free nodes.
+ *
+ * Hence, the oldest key 1 to tgt_free/2
+ * are removed from the LRU list.
+ */
+ key = 1;
+ if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_delete_elem(lru_map_fd, &key));
+ } else {
+ assert(bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_EXIST));
+ }
+
+ /* Re-insert 1 to tgt_free/2 again and do a lookup
+ * immeidately.
+ */
+ end_key = 1 + batch_size;
+ value[0] = 4321;
+ for (key = 1; key < end_key; key++) {
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value));
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+ assert(value[0] == 4321);
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ value[0] = 1234;
+
+ /* Insert 1+tgt_free to tgt_free*3/2 */
+ end_key = 1 + tgt_free + batch_size;
+ for (key = 1 + tgt_free; key < end_key; key++)
+ /* These newly added but not referenced keys will be
+ * gone during the next LRU shrink.
+ */
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ /* Insert 1+tgt_free*3/2 to tgt_free*5/2 */
+ end_key = key + tgt_free;
+ for (; key < end_key; key++) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ assert(map_equal(lru_map_fd, expected_map_fd));
+
+ close(expected_map_fd);
+ close(lru_map_fd);
+
+ printf("Pass\n");
+}
+
+/* Size of the LRU map is 2*tgt_free
+ * It is to test the active/inactive list rotation
+ * Insert 1 to 2*tgt_free (+2*tgt_free keys)
+ * Lookup key 1 to tgt_free*3/2
+ * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys)
+ * => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU
+ */
+static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
+{
+ unsigned long long key, end_key, value[nr_cpus];
+ int lru_map_fd, expected_map_fd;
+ unsigned int batch_size;
+ unsigned int map_size;
+ int next_cpu = 0;
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ /* This test is only applicable to common LRU list */
+ return;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, &next_cpu) != -1);
+
+ batch_size = tgt_free / 2;
+ assert(batch_size * 2 == tgt_free);
+
+ map_size = tgt_free * 2;
+ lru_map_fd = create_map(map_type, map_flags, map_size);
+ assert(lru_map_fd != -1);
+
+ expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+ assert(expected_map_fd != -1);
+
+ value[0] = 1234;
+
+ /* Insert 1 to 2*tgt_free (+2*tgt_free keys) */
+ end_key = 1 + (2 * tgt_free);
+ for (key = 1; key < end_key; key++)
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ /* Lookup key 1 to tgt_free*3/2 */
+ end_key = tgt_free + batch_size;
+ for (key = 1; key < end_key; key++) {
+ assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ /* Add 1+2*tgt_free to tgt_free*5/2
+ * (+tgt_free/2 keys)
+ */
+ key = 2 * tgt_free + 1;
+ end_key = key + batch_size;
+ for (; key < end_key; key++) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ assert(map_equal(lru_map_fd, expected_map_fd));
+
+ close(expected_map_fd);
+ close(lru_map_fd);
+
+ printf("Pass\n");
+}
+
+/* Test deletion */
+static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
+{
+ int lru_map_fd, expected_map_fd;
+ unsigned long long key, value[nr_cpus];
+ unsigned long long end_key;
+ int next_cpu = 0;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, &next_cpu) != -1);
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ lru_map_fd = create_map(map_type, map_flags,
+ 3 * tgt_free * nr_cpus);
+ else
+ lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free);
+ assert(lru_map_fd != -1);
+
+ expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0,
+ 3 * tgt_free);
+ assert(expected_map_fd != -1);
+
+ value[0] = 1234;
+
+ for (key = 1; key <= 2 * tgt_free; key++)
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+
+ key = 1;
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
+
+ for (key = 1; key <= tgt_free; key++) {
+ assert(!bpf_map_lookup_elem(lru_map_fd, &key, value));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ for (; key <= 2 * tgt_free; key++) {
+ assert(!bpf_map_delete_elem(lru_map_fd, &key));
+ assert(bpf_map_delete_elem(lru_map_fd, &key));
+ }
+
+ end_key = key + 2 * tgt_free;
+ for (; key < end_key; key++) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ assert(map_equal(lru_map_fd, expected_map_fd));
+
+ close(expected_map_fd);
+ close(lru_map_fd);
+
+ printf("Pass\n");
+}
+
+static void do_test_lru_sanity5(unsigned long long last_key, int map_fd)
+{
+ unsigned long long key, value[nr_cpus];
+
+ /* Ensure the last key inserted by previous CPU can be found */
+ assert(!bpf_map_lookup_elem(map_fd, &last_key, value));
+
+ value[0] = 1234;
+
+ key = last_key + 1;
+ assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
+ assert(!bpf_map_lookup_elem(map_fd, &key, value));
+
+ /* Cannot find the last key because it was removed by LRU */
+ assert(bpf_map_lookup_elem(map_fd, &last_key, value));
+}
+
+/* Test map with only one element */
+static void test_lru_sanity5(int map_type, int map_flags)
+{
+ unsigned long long key, value[nr_cpus];
+ int next_cpu = 0;
+ int map_fd;
+
+ if (map_flags & BPF_F_NO_COMMON_LRU)
+ return;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ map_fd = create_map(map_type, map_flags, 1);
+ assert(map_fd != -1);
+
+ value[0] = 1234;
+ key = 0;
+ assert(!bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST));
+
+ while (sched_next_online(0, &next_cpu) != -1) {
+ pid_t pid;
+
+ pid = fork();
+ if (pid == 0) {
+ do_test_lru_sanity5(key, map_fd);
+ exit(0);
+ } else if (pid == -1) {
+ printf("couldn't spawn process to test key:%llu\n",
+ key);
+ exit(1);
+ } else {
+ int status;
+
+ assert(waitpid(pid, &status, 0) == pid);
+ assert(status == 0);
+ key++;
+ }
+ }
+
+ close(map_fd);
+ /* At least one key should be tested */
+ assert(key > 0);
+
+ printf("Pass\n");
+}
+
+/* Test list rotation for BPF_F_NO_COMMON_LRU map */
+static void test_lru_sanity6(int map_type, int map_flags, int tgt_free)
+{
+ int lru_map_fd, expected_map_fd;
+ unsigned long long key, value[nr_cpus];
+ unsigned int map_size = tgt_free * 2;
+ int next_cpu = 0;
+
+ if (!(map_flags & BPF_F_NO_COMMON_LRU))
+ return;
+
+ printf("%s (map_type:%d map_flags:0x%X): ", __func__, map_type,
+ map_flags);
+
+ assert(sched_next_online(0, &next_cpu) != -1);
+
+ expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, map_size);
+ assert(expected_map_fd != -1);
+
+ lru_map_fd = create_map(map_type, map_flags, map_size * nr_cpus);
+ assert(lru_map_fd != -1);
+
+ value[0] = 1234;
+
+ for (key = 1; key <= tgt_free; key++) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ for (; key <= tgt_free * 2; key++) {
+ unsigned long long stable_key;
+
+ /* Make ref bit sticky for key: [1, tgt_free] */
+ for (stable_key = 1; stable_key <= tgt_free; stable_key++) {
+ /* Mark the ref bit */
+ assert(!bpf_map_lookup_elem(lru_map_fd, &stable_key,
+ value));
+ }
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ for (; key <= tgt_free * 3; key++) {
+ assert(!bpf_map_update_elem(lru_map_fd, &key, value,
+ BPF_NOEXIST));
+ assert(!bpf_map_update_elem(expected_map_fd, &key, value,
+ BPF_NOEXIST));
+ }
+
+ assert(map_equal(lru_map_fd, expected_map_fd));
+
+ close(expected_map_fd);
+ close(lru_map_fd);
+
+ printf("Pass\n");
+}
+
+int main(int argc, char **argv)
+{
+ int map_types[] = {BPF_MAP_TYPE_LRU_HASH,
+ BPF_MAP_TYPE_LRU_PERCPU_HASH};
+ int map_flags[] = {0, BPF_F_NO_COMMON_LRU};
+ int t, f;
+
+ setbuf(stdout, NULL);
+
+ nr_cpus = bpf_num_possible_cpus();
+ assert(nr_cpus != -1);
+ printf("nr_cpus:%d\n\n", nr_cpus);
+
+ for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
+ unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ?
+ PERCPU_FREE_TARGET : LOCAL_FREE_TARGET;
+
+ for (t = 0; t < sizeof(map_types) / sizeof(*map_types); t++) {
+ test_lru_sanity0(map_types[t], map_flags[f]);
+ test_lru_sanity1(map_types[t], map_flags[f], tgt_free);
+ test_lru_sanity2(map_types[t], map_flags[f], tgt_free);
+ test_lru_sanity3(map_types[t], map_flags[f], tgt_free);
+ test_lru_sanity4(map_types[t], map_flags[f], tgt_free);
+ test_lru_sanity5(map_types[t], map_flags[f]);
+ test_lru_sanity6(map_types[t], map_flags[f], tgt_free);
+
+ printf("\n");
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.c b/tools/testing/selftests/bpf/test_lwt_seg6local.c
new file mode 100644
index 000000000..e2f6ed0a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.c
@@ -0,0 +1,437 @@
+#include <stddef.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <linux/seg6_local.h>
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+/* Packet parsing state machine helpers. */
+#define cursor_advance(_cursor, _len) \
+ ({ void *_tmp = _cursor; _cursor += _len; _tmp; })
+
+#define SR6_FLAG_ALERT (1 << 4)
+
+#define htonll(x) ((bpf_htonl(1)) == 1 ? (x) : ((uint64_t)bpf_htonl((x) & \
+ 0xFFFFFFFF) << 32) | bpf_htonl((x) >> 32))
+#define ntohll(x) ((bpf_ntohl(1)) == 1 ? (x) : ((uint64_t)bpf_ntohl((x) & \
+ 0xFFFFFFFF) << 32) | bpf_ntohl((x) >> 32))
+#define BPF_PACKET_HEADER __attribute__((packed))
+
+struct ip6_t {
+ unsigned int ver:4;
+ unsigned int priority:8;
+ unsigned int flow_label:20;
+ unsigned short payload_len;
+ unsigned char next_header;
+ unsigned char hop_limit;
+ unsigned long long src_hi;
+ unsigned long long src_lo;
+ unsigned long long dst_hi;
+ unsigned long long dst_lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_addr_t {
+ unsigned long long hi;
+ unsigned long long lo;
+} BPF_PACKET_HEADER;
+
+struct ip6_srh_t {
+ unsigned char nexthdr;
+ unsigned char hdrlen;
+ unsigned char type;
+ unsigned char segments_left;
+ unsigned char first_segment;
+ unsigned char flags;
+ unsigned short tag;
+
+ struct ip6_addr_t segments[0];
+} BPF_PACKET_HEADER;
+
+struct sr6_tlv_t {
+ unsigned char type;
+ unsigned char len;
+ unsigned char value[0];
+} BPF_PACKET_HEADER;
+
+static __always_inline struct ip6_srh_t *get_srh(struct __sk_buff *skb)
+{
+ void *cursor, *data_end;
+ struct ip6_srh_t *srh;
+ struct ip6_t *ip;
+ uint8_t *ipver;
+
+ data_end = (void *)(long)skb->data_end;
+ cursor = (void *)(long)skb->data;
+ ipver = (uint8_t *)cursor;
+
+ if ((void *)ipver + sizeof(*ipver) > data_end)
+ return NULL;
+
+ if ((*ipver >> 4) != 6)
+ return NULL;
+
+ ip = cursor_advance(cursor, sizeof(*ip));
+ if ((void *)ip + sizeof(*ip) > data_end)
+ return NULL;
+
+ if (ip->next_header != 43)
+ return NULL;
+
+ srh = cursor_advance(cursor, sizeof(*srh));
+ if ((void *)srh + sizeof(*srh) > data_end)
+ return NULL;
+
+ if (srh->type != 4)
+ return NULL;
+
+ return srh;
+}
+
+static __always_inline
+int update_tlv_pad(struct __sk_buff *skb, uint32_t new_pad,
+ uint32_t old_pad, uint32_t pad_off)
+{
+ int err;
+
+ if (new_pad != old_pad) {
+ err = bpf_lwt_seg6_adjust_srh(skb, pad_off,
+ (int) new_pad - (int) old_pad);
+ if (err)
+ return err;
+ }
+
+ if (new_pad > 0) {
+ char pad_tlv_buf[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0};
+ struct sr6_tlv_t *pad_tlv = (struct sr6_tlv_t *) pad_tlv_buf;
+
+ pad_tlv->type = SR6_TLV_PADDING;
+ pad_tlv->len = new_pad - 2;
+
+ err = bpf_lwt_seg6_store_bytes(skb, pad_off,
+ (void *)pad_tlv_buf, new_pad);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static __always_inline
+int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
+ uint32_t *tlv_off, uint32_t *pad_size,
+ uint32_t *pad_off)
+{
+ uint32_t srh_off, cur_off;
+ int offset_valid = 0;
+ int err;
+
+ srh_off = (char *)srh - (char *)(long)skb->data;
+ // cur_off = end of segments, start of possible TLVs
+ cur_off = srh_off + sizeof(*srh) +
+ sizeof(struct ip6_addr_t) * (srh->first_segment + 1);
+
+ *pad_off = 0;
+
+ // we can only go as far as ~10 TLVs due to the BPF max stack size
+ #pragma clang loop unroll(full)
+ for (int i = 0; i < 10; i++) {
+ struct sr6_tlv_t tlv;
+
+ if (cur_off == *tlv_off)
+ offset_valid = 1;
+
+ if (cur_off >= srh_off + ((srh->hdrlen + 1) << 3))
+ break;
+
+ err = bpf_skb_load_bytes(skb, cur_off, &tlv, sizeof(tlv));
+ if (err)
+ return err;
+
+ if (tlv.type == SR6_TLV_PADDING) {
+ *pad_size = tlv.len + sizeof(tlv);
+ *pad_off = cur_off;
+
+ if (*tlv_off == srh_off) {
+ *tlv_off = cur_off;
+ offset_valid = 1;
+ }
+ break;
+
+ } else if (tlv.type == SR6_TLV_HMAC) {
+ break;
+ }
+
+ cur_off += sizeof(tlv) + tlv.len;
+ } // we reached the padding or HMAC TLVs, or the end of the SRH
+
+ if (*pad_off == 0)
+ *pad_off = cur_off;
+
+ if (*tlv_off == -1)
+ *tlv_off = cur_off;
+ else if (!offset_valid)
+ return -EINVAL;
+
+ return 0;
+}
+
+static __always_inline
+int add_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh, uint32_t tlv_off,
+ struct sr6_tlv_t *itlv, uint8_t tlv_size)
+{
+ uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+ uint8_t len_remaining, new_pad;
+ uint32_t pad_off = 0;
+ uint32_t pad_size = 0;
+ uint32_t partial_srh_len;
+ int err;
+
+ if (tlv_off != -1)
+ tlv_off += srh_off;
+
+ if (itlv->type == SR6_TLV_PADDING || itlv->type == SR6_TLV_HMAC)
+ return -EINVAL;
+
+ err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, sizeof(*itlv) + itlv->len);
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_store_bytes(skb, tlv_off, (void *)itlv, tlv_size);
+ if (err)
+ return err;
+
+ // the following can't be moved inside update_tlv_pad because the
+ // bpf verifier has some issues with it
+ pad_off += sizeof(*itlv) + itlv->len;
+ partial_srh_len = pad_off - srh_off;
+ len_remaining = partial_srh_len % 8;
+ new_pad = 8 - len_remaining;
+
+ if (new_pad == 1) // cannot pad for 1 byte only
+ new_pad = 9;
+ else if (new_pad == 8)
+ new_pad = 0;
+
+ return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+static __always_inline
+int delete_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh,
+ uint32_t tlv_off)
+{
+ uint32_t srh_off = (char *)srh - (char *)(long)skb->data;
+ uint8_t len_remaining, new_pad;
+ uint32_t partial_srh_len;
+ uint32_t pad_off = 0;
+ uint32_t pad_size = 0;
+ struct sr6_tlv_t tlv;
+ int err;
+
+ tlv_off += srh_off;
+
+ err = is_valid_tlv_boundary(skb, srh, &tlv_off, &pad_size, &pad_off);
+ if (err)
+ return err;
+
+ err = bpf_skb_load_bytes(skb, tlv_off, &tlv, sizeof(tlv));
+ if (err)
+ return err;
+
+ err = bpf_lwt_seg6_adjust_srh(skb, tlv_off, -(sizeof(tlv) + tlv.len));
+ if (err)
+ return err;
+
+ pad_off -= sizeof(tlv) + tlv.len;
+ partial_srh_len = pad_off - srh_off;
+ len_remaining = partial_srh_len % 8;
+ new_pad = 8 - len_remaining;
+ if (new_pad == 1) // cannot pad for 1 byte only
+ new_pad = 9;
+ else if (new_pad == 8)
+ new_pad = 0;
+
+ return update_tlv_pad(skb, new_pad, pad_size, pad_off);
+}
+
+static __always_inline
+int has_egr_tlv(struct __sk_buff *skb, struct ip6_srh_t *srh)
+{
+ int tlv_offset = sizeof(struct ip6_t) + sizeof(struct ip6_srh_t) +
+ ((srh->first_segment + 1) << 4);
+ struct sr6_tlv_t tlv;
+
+ if (bpf_skb_load_bytes(skb, tlv_offset, &tlv, sizeof(struct sr6_tlv_t)))
+ return 0;
+
+ if (tlv.type == SR6_TLV_EGRESS && tlv.len == 18) {
+ struct ip6_addr_t egr_addr;
+
+ if (bpf_skb_load_bytes(skb, tlv_offset + 4, &egr_addr, 16))
+ return 0;
+
+ // check if egress TLV value is correct
+ if (ntohll(egr_addr.hi) == 0xfd00000000000000 &&
+ ntohll(egr_addr.lo) == 0x4)
+ return 1;
+ }
+
+ return 0;
+}
+
+// This function will push a SRH with segments fd00::1, fd00::2, fd00::3,
+// fd00::4
+SEC("encap_srh")
+int __encap_srh(struct __sk_buff *skb)
+{
+ unsigned long long hi = 0xfd00000000000000;
+ struct ip6_addr_t *seg;
+ struct ip6_srh_t *srh;
+ char srh_buf[72]; // room for 4 segments
+ int err;
+
+ srh = (struct ip6_srh_t *)srh_buf;
+ srh->nexthdr = 0;
+ srh->hdrlen = 8;
+ srh->type = 4;
+ srh->segments_left = 3;
+ srh->first_segment = 3;
+ srh->flags = 0;
+ srh->tag = 0;
+
+ seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
+
+ #pragma clang loop unroll(full)
+ for (unsigned long long lo = 0; lo < 4; lo++) {
+ seg->lo = htonll(4 - lo);
+ seg->hi = htonll(hi);
+ seg = (struct ip6_addr_t *)((char *)seg + sizeof(*seg));
+ }
+
+ err = bpf_lwt_push_encap(skb, 0, (void *)srh, sizeof(srh_buf));
+ if (err)
+ return BPF_DROP;
+
+ return BPF_REDIRECT;
+}
+
+// Add an Egress TLV fc00::4, add the flag A,
+// and apply End.X action to fc42::1
+SEC("add_egr_x")
+int __add_egr_x(struct __sk_buff *skb)
+{
+ unsigned long long hi = 0xfc42000000000000;
+ unsigned long long lo = 0x1;
+ struct ip6_srh_t *srh = get_srh(skb);
+ uint8_t new_flags = SR6_FLAG_ALERT;
+ struct ip6_addr_t addr;
+ int err, offset;
+
+ if (srh == NULL)
+ return BPF_DROP;
+
+ uint8_t tlv[20] = {2, 18, 0, 0, 0xfd, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x4};
+
+ err = add_tlv(skb, srh, (srh->hdrlen+1) << 3,
+ (struct sr6_tlv_t *)&tlv, 20);
+ if (err)
+ return BPF_DROP;
+
+ offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+ err = bpf_lwt_seg6_store_bytes(skb, offset,
+ (void *)&new_flags, sizeof(new_flags));
+ if (err)
+ return BPF_DROP;
+
+ addr.lo = htonll(lo);
+ addr.hi = htonll(hi);
+ err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_X,
+ (void *)&addr, sizeof(addr));
+ if (err)
+ return BPF_DROP;
+ return BPF_REDIRECT;
+}
+
+// Pop the Egress TLV, reset the flags, change the tag 2442 and finally do a
+// simple End action
+SEC("pop_egr")
+int __pop_egr(struct __sk_buff *skb)
+{
+ struct ip6_srh_t *srh = get_srh(skb);
+ uint16_t new_tag = bpf_htons(2442);
+ uint8_t new_flags = 0;
+ int err, offset;
+
+ if (srh == NULL)
+ return BPF_DROP;
+
+ if (srh->flags != SR6_FLAG_ALERT)
+ return BPF_DROP;
+
+ if (srh->hdrlen != 11) // 4 segments + Egress TLV + Padding TLV
+ return BPF_DROP;
+
+ if (!has_egr_tlv(skb, srh))
+ return BPF_DROP;
+
+ err = delete_tlv(skb, srh, 8 + (srh->first_segment + 1) * 16);
+ if (err)
+ return BPF_DROP;
+
+ offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, flags);
+ if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_flags,
+ sizeof(new_flags)))
+ return BPF_DROP;
+
+ offset = sizeof(struct ip6_t) + offsetof(struct ip6_srh_t, tag);
+ if (bpf_lwt_seg6_store_bytes(skb, offset, (void *)&new_tag,
+ sizeof(new_tag)))
+ return BPF_DROP;
+
+ return BPF_OK;
+}
+
+// Inspect if the Egress TLV and flag have been removed, if the tag is correct,
+// then apply a End.T action to reach the last segment
+SEC("inspect_t")
+int __inspect_t(struct __sk_buff *skb)
+{
+ struct ip6_srh_t *srh = get_srh(skb);
+ int table = 117;
+ int err;
+
+ if (srh == NULL)
+ return BPF_DROP;
+
+ if (srh->flags != 0)
+ return BPF_DROP;
+
+ if (srh->tag != bpf_htons(2442))
+ return BPF_DROP;
+
+ if (srh->hdrlen != 8) // 4 segments
+ return BPF_DROP;
+
+ err = bpf_lwt_seg6_action(skb, SEG6_LOCAL_ACTION_END_T,
+ (void *)&table, sizeof(table));
+
+ if (err)
+ return BPF_DROP;
+
+ return BPF_REDIRECT;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
new file mode 100755
index 000000000..785eabf2a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+# Connects 6 network namespaces through veths.
+# Each NS may have different IPv6 global scope addresses :
+# NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
+# fb00::1 fd00::1 fd00::2 fd00::3 fb00::6
+# fc42::1 fd00::4
+#
+# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
+# IPv6 header with a Segment Routing Header, with segments :
+# fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
+#
+# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
+# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
+# - fd00::2 : remove the TLV, change the flags, add a tag
+# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
+#
+# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
+# Each End.BPF action will validate the operations applied on the SRH by the
+# previous BPF program in the chain, otherwise the packet is dropped.
+#
+# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
+# datagram can be read on NS6 when binding to fb00::6.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+msg="skip all tests:"
+if [ $UID != 0 ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
+fi
+
+TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
+
+cleanup()
+{
+ if [ "$?" = "0" ]; then
+ echo "selftests: test_lwt_seg6local [PASS]";
+ else
+ echo "selftests: test_lwt_seg6local [FAILED]";
+ fi
+
+ set +e
+ ip netns del ns1 2> /dev/null
+ ip netns del ns2 2> /dev/null
+ ip netns del ns3 2> /dev/null
+ ip netns del ns4 2> /dev/null
+ ip netns del ns5 2> /dev/null
+ ip netns del ns6 2> /dev/null
+ rm -f $TMP_FILE
+}
+
+set -e
+
+ip netns add ns1
+ip netns add ns2
+ip netns add ns3
+ip netns add ns4
+ip netns add ns5
+ip netns add ns6
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 type veth peer name veth2
+ip link add veth3 type veth peer name veth4
+ip link add veth5 type veth peer name veth6
+ip link add veth7 type veth peer name veth8
+ip link add veth9 type veth peer name veth10
+
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+ip link set veth3 netns ns2
+ip link set veth4 netns ns3
+ip link set veth5 netns ns3
+ip link set veth6 netns ns4
+ip link set veth7 netns ns4
+ip link set veth8 netns ns5
+ip link set veth9 netns ns5
+ip link set veth10 netns ns6
+
+ip netns exec ns1 ip link set dev veth1 up
+ip netns exec ns2 ip link set dev veth2 up
+ip netns exec ns2 ip link set dev veth3 up
+ip netns exec ns3 ip link set dev veth4 up
+ip netns exec ns3 ip link set dev veth5 up
+ip netns exec ns4 ip link set dev veth6 up
+ip netns exec ns4 ip link set dev veth7 up
+ip netns exec ns5 ip link set dev veth8 up
+ip netns exec ns5 ip link set dev veth9 up
+ip netns exec ns6 ip link set dev veth10 up
+ip netns exec ns6 ip link set dev lo up
+
+# All link scope addresses and routes required between veths
+ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
+ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
+ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
+ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
+ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
+ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
+ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
+ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
+ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
+ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
+ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
+ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
+ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
+ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
+ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
+ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
+
+ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
+ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
+
+ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
+ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
+
+ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
+ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
+
+ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
+ip netns exec ns4 ip -6 addr add fc42::1 dev lo
+ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
+
+ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
+ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
+
+ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
+ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
+
+ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
+ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
+ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
+
+ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
+ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
+sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
+kill -INT $!
+
+if [[ $(< $TMP_FILE) != "foobar" ]]; then
+ exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
new file mode 100644
index 000000000..87ba89df9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -0,0 +1,1451 @@
+/*
+ * Testsuite for eBPF maps
+ *
+ * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#include <sys/wait.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <linux/bpf.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
+static int map_flags;
+
+#define CHECK(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
+ printf(format); \
+ exit(-1); \
+ } \
+})
+
+static void test_hashmap(int task, void *data)
+{
+ long long key, next_key, first_key, value;
+ int fd;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ 2, map_flags);
+ if (fd < 0) {
+ printf("Failed to create hashmap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* Insert key=1 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+ value = 0;
+ /* BPF_NOEXIST means add new element if it doesn't exist. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ /* key=1 already exists. */
+ errno == EEXIST);
+
+ /* -1 is an invalid flag. */
+ assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
+ errno == EINVAL);
+
+ /* Check that key=1 can be found. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
+
+ key = 2;
+ /* Check that key=2 is not found. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+
+ /* BPF_EXIST means update existing element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ /* key=2 is not there. */
+ errno == ENOENT);
+
+ /* Insert key=2 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
+
+ /* key=1 and key=2 were inserted, check that key=0 cannot be
+ * inserted due to max_entries limit.
+ */
+ key = 0;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == E2BIG);
+
+ /* Update existing element, though the map is full. */
+ key = 1;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
+ key = 2;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+ key = 3;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == E2BIG);
+
+ /* Check that key = 0 doesn't exist. */
+ key = 0;
+ assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+
+ /* Iterate over two elements. */
+ assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
+ (first_key == 1 || first_key == 2));
+ assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
+ (next_key == first_key));
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
+ (next_key == 1 || next_key == 2) &&
+ (next_key != first_key));
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ /* Delete both elements. */
+ key = 1;
+ assert(bpf_map_delete_elem(fd, &key) == 0);
+ key = 2;
+ assert(bpf_map_delete_elem(fd, &key) == 0);
+ assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+
+ key = 0;
+ /* Check that map is empty. */
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ close(fd);
+}
+
+static void test_hashmap_sizes(int task, void *data)
+{
+ int fd, i, j;
+
+ for (i = 1; i <= 512; i <<= 1)
+ for (j = 1; j <= 1 << 18; j <<= 1) {
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
+ 2, map_flags);
+ if (fd < 0) {
+ if (errno == ENOMEM)
+ return;
+ printf("Failed to create hashmap key=%d value=%d '%s'\n",
+ i, j, strerror(errno));
+ exit(1);
+ }
+ close(fd);
+ usleep(10); /* give kernel time to destroy */
+ }
+}
+
+static void test_hashmap_percpu(int task, void *data)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ BPF_DECLARE_PERCPU(long, value);
+ long long key, next_key, first_key;
+ int expected_key_mask = 0;
+ int fd, i;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
+ sizeof(bpf_percpu(value, 0)), 2, map_flags);
+ if (fd < 0) {
+ printf("Failed to create hashmap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(value, i) = i + 100;
+
+ key = 1;
+ /* Insert key=1 element. */
+ assert(!(expected_key_mask & key));
+ assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
+ expected_key_mask |= key;
+
+ /* BPF_NOEXIST means add new element if it doesn't exist. */
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ /* key=1 already exists. */
+ errno == EEXIST);
+
+ /* -1 is an invalid flag. */
+ assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
+ errno == EINVAL);
+
+ /* Check that key=1 can be found. Value could be 0 if the lookup
+ * was run from a different CPU.
+ */
+ bpf_percpu(value, 0) = 1;
+ assert(bpf_map_lookup_elem(fd, &key, value) == 0 &&
+ bpf_percpu(value, 0) == 100);
+
+ key = 2;
+ /* Check that key=2 is not found. */
+ assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
+
+ /* BPF_EXIST means update existing element. */
+ assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
+ /* key=2 is not there. */
+ errno == ENOENT);
+
+ /* Insert key=2 element. */
+ assert(!(expected_key_mask & key));
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
+ expected_key_mask |= key;
+
+ /* key=1 and key=2 were inserted, check that key=0 cannot be
+ * inserted due to max_entries limit.
+ */
+ key = 0;
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ errno == E2BIG);
+
+ /* Check that key = 0 doesn't exist. */
+ assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+
+ /* Iterate over two elements. */
+ assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
+ ((expected_key_mask & first_key) == first_key));
+ while (!bpf_map_get_next_key(fd, &key, &next_key)) {
+ if (first_key) {
+ assert(next_key == first_key);
+ first_key = 0;
+ }
+ assert((expected_key_mask & next_key) == next_key);
+ expected_key_mask &= ~next_key;
+
+ assert(bpf_map_lookup_elem(fd, &next_key, value) == 0);
+
+ for (i = 0; i < nr_cpus; i++)
+ assert(bpf_percpu(value, i) == i + 100);
+
+ key = next_key;
+ }
+ assert(errno == ENOENT);
+
+ /* Update with BPF_EXIST. */
+ key = 1;
+ assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == 0);
+
+ /* Delete both elements. */
+ key = 1;
+ assert(bpf_map_delete_elem(fd, &key) == 0);
+ key = 2;
+ assert(bpf_map_delete_elem(fd, &key) == 0);
+ assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+
+ key = 0;
+ /* Check that map is empty. */
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ close(fd);
+}
+
+static void test_hashmap_walk(int task, void *data)
+{
+ int fd, i, max_entries = 1000;
+ long long key, value, next_key;
+ bool next_key_valid = true;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ max_entries, map_flags);
+ if (fd < 0) {
+ printf("Failed to create hashmap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < max_entries; i++) {
+ key = i; value = key;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
+ }
+
+ for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
+ &next_key) == 0; i++) {
+ key = next_key;
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
+ }
+
+ assert(i == max_entries);
+
+ assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
+ for (i = 0; next_key_valid; i++) {
+ next_key_valid = bpf_map_get_next_key(fd, &key, &next_key) == 0;
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
+ value++;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
+ key = next_key;
+ }
+
+ assert(i == max_entries);
+
+ for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
+ &next_key) == 0; i++) {
+ key = next_key;
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
+ assert(value - 1 == key);
+ }
+
+ assert(i == max_entries);
+ close(fd);
+}
+
+static void test_arraymap(int task, void *data)
+{
+ int key, next_key, fd;
+ long long value;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
+ 2, 0);
+ if (fd < 0) {
+ printf("Failed to create arraymap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* Insert key=1 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+ value = 0;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == EEXIST);
+
+ /* Check that key=1 can be found. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
+
+ key = 0;
+ /* Check that key=0 is also found and zero initialized. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
+
+ /* key=0 and key=1 were inserted, check that key=2 cannot be inserted
+ * due to max_entries limit.
+ */
+ key = 2;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ errno == E2BIG);
+
+ /* Check that key = 2 doesn't exist. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+
+ /* Iterate over two elements. */
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
+ next_key == 0);
+ assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
+ next_key == 0);
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
+ next_key == 1);
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ /* Delete shouldn't succeed. */
+ key = 1;
+ assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+
+ close(fd);
+}
+
+static void test_arraymap_percpu(int task, void *data)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ BPF_DECLARE_PERCPU(long, values);
+ int key, next_key, fd, i;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
+ sizeof(bpf_percpu(values, 0)), 2, 0);
+ if (fd < 0) {
+ printf("Failed to create arraymap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(values, i) = i + 100;
+
+ key = 1;
+ /* Insert key=1 element. */
+ assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
+
+ bpf_percpu(values, 0) = 0;
+ assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
+ errno == EEXIST);
+
+ /* Check that key=1 can be found. */
+ assert(bpf_map_lookup_elem(fd, &key, values) == 0 &&
+ bpf_percpu(values, 0) == 100);
+
+ key = 0;
+ /* Check that key=0 is also found and zero initialized. */
+ assert(bpf_map_lookup_elem(fd, &key, values) == 0 &&
+ bpf_percpu(values, 0) == 0 &&
+ bpf_percpu(values, nr_cpus - 1) == 0);
+
+ /* Check that key=2 cannot be inserted due to max_entries limit. */
+ key = 2;
+ assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
+ errno == E2BIG);
+
+ /* Check that key = 2 doesn't exist. */
+ assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
+
+ /* Iterate over two elements. */
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
+ next_key == 0);
+ assert(bpf_map_get_next_key(fd, &key, &next_key) == 0 &&
+ next_key == 0);
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
+ next_key == 1);
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ errno == ENOENT);
+
+ /* Delete shouldn't succeed. */
+ key = 1;
+ assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+
+ close(fd);
+}
+
+static void test_arraymap_percpu_many_keys(void)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ BPF_DECLARE_PERCPU(long, values);
+ /* nr_keys is not too large otherwise the test stresses percpu
+ * allocator more than anything else
+ */
+ unsigned int nr_keys = 2000;
+ int key, fd, i;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
+ sizeof(bpf_percpu(values, 0)), nr_keys, 0);
+ if (fd < 0) {
+ printf("Failed to create per-cpu arraymap '%s'!\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(values, i) = i + 10;
+
+ for (key = 0; key < nr_keys; key++)
+ assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
+
+ for (key = 0; key < nr_keys; key++) {
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(values, i) = 0;
+
+ assert(bpf_map_lookup_elem(fd, &key, values) == 0);
+
+ for (i = 0; i < nr_cpus; i++)
+ assert(bpf_percpu(values, i) == i + 10);
+ }
+
+ close(fd);
+}
+
+static void test_devmap(int task, void *data)
+{
+ int fd;
+ __u32 key, value;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value),
+ 2, 0);
+ if (fd < 0) {
+ printf("Failed to create arraymap '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ close(fd);
+}
+
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <sys/select.h>
+#include <linux/err.h>
+#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
+#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
+#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o"
+static void test_sockmap(int tasks, void *data)
+{
+ struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break;
+ int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break;
+ int ports[] = {50200, 50201, 50202, 50204};
+ int err, i, fd, udp, sfd[6] = {0xdeadbeef};
+ u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
+ int parse_prog, verdict_prog, msg_prog;
+ struct sockaddr_in addr;
+ int one = 1, s, sc, rc;
+ struct bpf_object *obj;
+ struct timeval to;
+ __u32 key, value;
+ pid_t pid[tasks];
+ fd_set w;
+
+ /* Create some sockets to use with sockmap */
+ for (i = 0; i < 2; i++) {
+ sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
+ if (sfd[i] < 0)
+ goto out;
+ err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
+ (char *)&one, sizeof(one));
+ if (err) {
+ printf("failed to setsockopt\n");
+ goto out;
+ }
+ err = ioctl(sfd[i], FIONBIO, (char *)&one);
+ if (err < 0) {
+ printf("failed to ioctl\n");
+ goto out;
+ }
+ memset(&addr, 0, sizeof(struct sockaddr_in));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+ addr.sin_port = htons(ports[i]);
+ err = bind(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0) {
+ printf("failed to bind: err %i: %i:%i\n",
+ err, i, sfd[i]);
+ goto out;
+ }
+ err = listen(sfd[i], 32);
+ if (err < 0) {
+ printf("failed to listen\n");
+ goto out;
+ }
+ }
+
+ for (i = 2; i < 4; i++) {
+ sfd[i] = socket(AF_INET, SOCK_STREAM, 0);
+ if (sfd[i] < 0)
+ goto out;
+ err = setsockopt(sfd[i], SOL_SOCKET, SO_REUSEADDR,
+ (char *)&one, sizeof(one));
+ if (err) {
+ printf("set sock opt\n");
+ goto out;
+ }
+ memset(&addr, 0, sizeof(struct sockaddr_in));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+ addr.sin_port = htons(ports[i - 2]);
+ err = connect(sfd[i], (struct sockaddr *)&addr, sizeof(addr));
+ if (err) {
+ printf("failed to connect\n");
+ goto out;
+ }
+ }
+
+
+ for (i = 4; i < 6; i++) {
+ sfd[i] = accept(sfd[i - 4], NULL, NULL);
+ if (sfd[i] < 0) {
+ printf("accept failed\n");
+ goto out;
+ }
+ }
+
+ /* Test sockmap with connected sockets */
+ fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP,
+ sizeof(key), sizeof(value),
+ 6, 0);
+ if (fd < 0) {
+ printf("Failed to create sockmap %i\n", fd);
+ goto out_sockmap;
+ }
+
+ /* Test update with unsupported UDP socket */
+ udp = socket(AF_INET, SOCK_DGRAM, 0);
+ i = 0;
+ err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
+ if (!err) {
+ printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
+ i, udp);
+ goto out_sockmap;
+ }
+
+ /* Test update without programs */
+ for (i = 0; i < 6; i++) {
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+ if (i < 2 && !err) {
+ printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
+ i, sfd[i]);
+ goto out_sockmap;
+ } else if (i >= 2 && err) {
+ printf("Failed noprog update sockmap '%i:%i'\n",
+ i, sfd[i]);
+ goto out_sockmap;
+ }
+ }
+
+ /* Test attaching/detaching bad fds */
+ err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (!err) {
+ printf("Failed invalid parser prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(-1, fd, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!err) {
+ printf("Failed invalid verdict prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(-1, fd, BPF_SK_MSG_VERDICT, 0);
+ if (!err) {
+ printf("Failed invalid msg verdict prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(-1, fd, __MAX_BPF_ATTACH_TYPE, 0);
+ if (!err) {
+ printf("Failed unknown prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_PARSER);
+ if (err) {
+ printf("Failed empty parser prog detach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_VERDICT);
+ if (err) {
+ printf("Failed empty verdict prog detach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_detach(fd, BPF_SK_MSG_VERDICT);
+ if (err) {
+ printf("Failed empty msg verdict prog detach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_detach(fd, __MAX_BPF_ATTACH_TYPE);
+ if (!err) {
+ printf("Detach invalid prog successful\n");
+ goto out_sockmap;
+ }
+
+ /* Load SK_SKB program and Attach */
+ err = bpf_prog_load(SOCKMAP_PARSE_PROG,
+ BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog);
+ if (err) {
+ printf("Failed to load SK_SKB parse prog\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG,
+ BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog);
+ if (err) {
+ printf("Failed to load SK_SKB msg prog\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
+ BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
+ if (err) {
+ printf("Failed to load SK_SKB verdict prog\n");
+ goto out_sockmap;
+ }
+
+ bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
+ if (IS_ERR(bpf_map_rx)) {
+ printf("Failed to load map rx from verdict prog\n");
+ goto out_sockmap;
+ }
+
+ map_fd_rx = bpf_map__fd(bpf_map_rx);
+ if (map_fd_rx < 0) {
+ printf("Failed to get map rx fd\n");
+ goto out_sockmap;
+ }
+
+ bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
+ if (IS_ERR(bpf_map_tx)) {
+ printf("Failed to load map tx from verdict prog\n");
+ goto out_sockmap;
+ }
+
+ map_fd_tx = bpf_map__fd(bpf_map_tx);
+ if (map_fd_tx < 0) {
+ printf("Failed to get map tx fd\n");
+ goto out_sockmap;
+ }
+
+ bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
+ if (IS_ERR(bpf_map_msg)) {
+ printf("Failed to load map msg from msg_verdict prog\n");
+ goto out_sockmap;
+ }
+
+ map_fd_msg = bpf_map__fd(bpf_map_msg);
+ if (map_fd_msg < 0) {
+ printf("Failed to get map msg fd\n");
+ goto out_sockmap;
+ }
+
+ bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
+ if (IS_ERR(bpf_map_break)) {
+ printf("Failed to load map tx from verdict prog\n");
+ goto out_sockmap;
+ }
+
+ map_fd_break = bpf_map__fd(bpf_map_break);
+ if (map_fd_break < 0) {
+ printf("Failed to get map tx fd\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(parse_prog, map_fd_break,
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (!err) {
+ printf("Allowed attaching SK_SKB program to invalid map\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(parse_prog, map_fd_rx,
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ printf("Failed stream parser bpf prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(verdict_prog, map_fd_rx,
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err) {
+ printf("Failed stream verdict bpf prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(msg_prog, map_fd_msg, BPF_SK_MSG_VERDICT, 0);
+ if (err) {
+ printf("Failed msg verdict bpf prog attach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_attach(verdict_prog, map_fd_rx,
+ __MAX_BPF_ATTACH_TYPE, 0);
+ if (!err) {
+ printf("Attached unknown bpf prog\n");
+ goto out_sockmap;
+ }
+
+ /* Test map update elem afterwards fd lives in fd and map_fd */
+ for (i = 2; i < 6; i++) {
+ err = bpf_map_update_elem(map_fd_rx, &i, &sfd[i], BPF_ANY);
+ if (err) {
+ printf("Failed map_fd_rx update sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ err = bpf_map_update_elem(map_fd_tx, &i, &sfd[i], BPF_ANY);
+ if (err) {
+ printf("Failed map_fd_tx update sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ }
+
+ /* Test map delete elem and remove send/recv sockets */
+ for (i = 2; i < 4; i++) {
+ err = bpf_map_delete_elem(map_fd_rx, &i);
+ if (err) {
+ printf("Failed delete sockmap rx %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ err = bpf_map_delete_elem(map_fd_tx, &i);
+ if (err) {
+ printf("Failed delete sockmap tx %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ }
+
+ /* Put sfd[2] (sending fd below) into msg map to test sendmsg bpf */
+ i = 0;
+ err = bpf_map_update_elem(map_fd_msg, &i, &sfd[2], BPF_ANY);
+ if (err) {
+ printf("Failed map_fd_msg update sockmap %i\n", err);
+ goto out_sockmap;
+ }
+
+ /* Test map send/recv */
+ for (i = 0; i < 2; i++) {
+ buf[0] = i;
+ buf[1] = 0x5;
+ sc = send(sfd[2], buf, 20, 0);
+ if (sc < 0) {
+ printf("Failed sockmap send\n");
+ goto out_sockmap;
+ }
+
+ FD_ZERO(&w);
+ FD_SET(sfd[3], &w);
+ to.tv_sec = 30;
+ to.tv_usec = 0;
+ s = select(sfd[3] + 1, &w, NULL, NULL, &to);
+ if (s == -1) {
+ perror("Failed sockmap select()");
+ goto out_sockmap;
+ } else if (!s) {
+ printf("Failed sockmap unexpected timeout\n");
+ goto out_sockmap;
+ }
+
+ if (!FD_ISSET(sfd[3], &w)) {
+ printf("Failed sockmap select/recv\n");
+ goto out_sockmap;
+ }
+
+ rc = recv(sfd[3], buf, sizeof(buf), 0);
+ if (rc < 0) {
+ printf("Failed sockmap recv\n");
+ goto out_sockmap;
+ }
+ }
+
+ /* Negative null entry lookup from datapath should be dropped */
+ buf[0] = 1;
+ buf[1] = 12;
+ sc = send(sfd[2], buf, 20, 0);
+ if (sc < 0) {
+ printf("Failed sockmap send\n");
+ goto out_sockmap;
+ }
+
+ /* Push fd into same slot */
+ i = 2;
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
+ if (!err) {
+ printf("Failed allowed sockmap dup slot BPF_NOEXIST\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+ if (err) {
+ printf("Failed sockmap update new slot BPF_ANY\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
+ if (err) {
+ printf("Failed sockmap update new slot BPF_EXIST\n");
+ goto out_sockmap;
+ }
+
+ /* Delete the elems without programs */
+ for (i = 2; i < 6; i++) {
+ err = bpf_map_delete_elem(fd, &i);
+ if (err) {
+ printf("Failed delete sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ }
+ }
+
+ /* Test having multiple maps open and set with programs on same fds */
+ err = bpf_prog_attach(parse_prog, fd,
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ printf("Failed fd bpf parse prog attach\n");
+ goto out_sockmap;
+ }
+ err = bpf_prog_attach(verdict_prog, fd,
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err) {
+ printf("Failed fd bpf verdict prog attach\n");
+ goto out_sockmap;
+ }
+
+ for (i = 4; i < 6; i++) {
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
+ if (!err) {
+ printf("Failed allowed duplicate programs in update ANY sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_NOEXIST);
+ if (!err) {
+ printf("Failed allowed duplicate program in update NOEXIST sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_EXIST);
+ if (!err) {
+ printf("Failed allowed duplicate program in update EXIST sockmap %i '%i:%i'\n",
+ err, i, sfd[i]);
+ goto out_sockmap;
+ }
+ }
+
+ /* Test tasks number of forked operations */
+ for (i = 0; i < tasks; i++) {
+ pid[i] = fork();
+ if (pid[i] == 0) {
+ for (i = 0; i < 6; i++) {
+ bpf_map_delete_elem(map_fd_tx, &i);
+ bpf_map_delete_elem(map_fd_rx, &i);
+ bpf_map_update_elem(map_fd_tx, &i,
+ &sfd[i], BPF_ANY);
+ bpf_map_update_elem(map_fd_rx, &i,
+ &sfd[i], BPF_ANY);
+ }
+ exit(0);
+ } else if (pid[i] == -1) {
+ printf("Couldn't spawn #%d process!\n", i);
+ exit(1);
+ }
+ }
+
+ for (i = 0; i < tasks; i++) {
+ int status;
+
+ assert(waitpid(pid[i], &status, 0) == pid[i]);
+ assert(status == 0);
+ }
+
+ err = bpf_prog_detach(map_fd_rx, __MAX_BPF_ATTACH_TYPE);
+ if (!err) {
+ printf("Detached an invalid prog type.\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_PARSER);
+ if (err) {
+ printf("Failed parser prog detach\n");
+ goto out_sockmap;
+ }
+
+ err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_VERDICT);
+ if (err) {
+ printf("Failed parser prog detach\n");
+ goto out_sockmap;
+ }
+
+ /* Test map close sockets and empty maps */
+ for (i = 0; i < 6; i++) {
+ bpf_map_delete_elem(map_fd_tx, &i);
+ bpf_map_delete_elem(map_fd_rx, &i);
+ close(sfd[i]);
+ }
+ close(fd);
+ close(map_fd_rx);
+ bpf_object__close(obj);
+ return;
+out:
+ for (i = 0; i < 6; i++)
+ close(sfd[i]);
+ printf("Failed to create sockmap '%i:%s'!\n", i, strerror(errno));
+ exit(1);
+out_sockmap:
+ for (i = 0; i < 6; i++) {
+ if (map_fd_tx)
+ bpf_map_delete_elem(map_fd_tx, &i);
+ if (map_fd_rx)
+ bpf_map_delete_elem(map_fd_rx, &i);
+ close(sfd[i]);
+ }
+ close(fd);
+ exit(1);
+}
+
+#define MAP_SIZE (32 * 1024)
+
+static void test_map_large(void)
+{
+ struct bigkey {
+ int a;
+ char b[116];
+ long long c;
+ } key;
+ int fd, i, value;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE, map_flags);
+ if (fd < 0) {
+ printf("Failed to create large map '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ for (i = 0; i < MAP_SIZE; i++) {
+ key = (struct bigkey) { .c = i };
+ value = i;
+
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == 0);
+ }
+
+ key.c = -1;
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == E2BIG);
+
+ /* Iterate through all elements. */
+ assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
+ key.c = -1;
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_map_get_next_key(fd, &key, &key) == 0);
+ assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+
+ key.c = 0;
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
+ key.a = 1;
+ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+
+ close(fd);
+}
+
+#define run_parallel(N, FN, DATA) \
+ printf("Fork %d tasks to '" #FN "'\n", N); \
+ __run_parallel(N, FN, DATA)
+
+static void __run_parallel(int tasks, void (*fn)(int task, void *data),
+ void *data)
+{
+ pid_t pid[tasks];
+ int i;
+
+ fflush(stdout);
+
+ for (i = 0; i < tasks; i++) {
+ pid[i] = fork();
+ if (pid[i] == 0) {
+ fn(i, data);
+ exit(0);
+ } else if (pid[i] == -1) {
+ printf("Couldn't spawn #%d process!\n", i);
+ exit(1);
+ }
+ }
+
+ for (i = 0; i < tasks; i++) {
+ int status;
+
+ assert(waitpid(pid[i], &status, 0) == pid[i]);
+ assert(status == 0);
+ }
+}
+
+static void test_map_stress(void)
+{
+ run_parallel(100, test_hashmap, NULL);
+ run_parallel(100, test_hashmap_percpu, NULL);
+ run_parallel(100, test_hashmap_sizes, NULL);
+ run_parallel(100, test_hashmap_walk, NULL);
+
+ run_parallel(100, test_arraymap, NULL);
+ run_parallel(100, test_arraymap_percpu, NULL);
+}
+
+#define TASKS 1024
+
+#define DO_UPDATE 1
+#define DO_DELETE 0
+
+static void test_update_delete(int fn, void *data)
+{
+ int do_update = ((int *)data)[1];
+ int fd = ((int *)data)[0];
+ int i, key, value;
+
+ for (i = fn; i < MAP_SIZE; i += TASKS) {
+ key = value = i;
+
+ if (do_update) {
+ assert(bpf_map_update_elem(fd, &key, &value,
+ BPF_NOEXIST) == 0);
+ assert(bpf_map_update_elem(fd, &key, &value,
+ BPF_EXIST) == 0);
+ } else {
+ assert(bpf_map_delete_elem(fd, &key) == 0);
+ }
+ }
+}
+
+static void test_map_parallel(void)
+{
+ int i, fd, key = 0, value = 0;
+ int data[2];
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE, map_flags);
+ if (fd < 0) {
+ printf("Failed to create map for parallel test '%s'!\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ /* Use the same fd in children to add elements to this map:
+ * child_0 adds key=0, key=1024, key=2048, ...
+ * child_1 adds key=1, key=1025, key=2049, ...
+ * child_1023 adds key=1023, ...
+ */
+ data[0] = fd;
+ data[1] = DO_UPDATE;
+ run_parallel(TASKS, test_update_delete, data);
+
+ /* Check that key=0 is already there. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ errno == EEXIST);
+
+ /* Check that all elements were inserted. */
+ assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
+ key = -1;
+ for (i = 0; i < MAP_SIZE; i++)
+ assert(bpf_map_get_next_key(fd, &key, &key) == 0);
+ assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+
+ /* Another check for all elements */
+ for (i = 0; i < MAP_SIZE; i++) {
+ key = MAP_SIZE - i - 1;
+
+ assert(bpf_map_lookup_elem(fd, &key, &value) == 0 &&
+ value == key);
+ }
+
+ /* Now let's delete all elemenets in parallel. */
+ data[1] = DO_DELETE;
+ run_parallel(TASKS, test_update_delete, data);
+
+ /* Nothing should be left. */
+ key = -1;
+ assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+}
+
+static void test_map_rdonly(void)
+{
+ int fd, key = 0, value = 0;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE, map_flags | BPF_F_RDONLY);
+ if (fd < 0) {
+ printf("Failed to create map for read only test '%s'!\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* Insert key=1 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
+ errno == EPERM);
+
+ /* Check that key=2 is not found. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+}
+
+static void test_map_wronly(void)
+{
+ int fd, key = 0, value = 0;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
+ MAP_SIZE, map_flags | BPF_F_WRONLY);
+ if (fd < 0) {
+ printf("Failed to create map for read only test '%s'!\n",
+ strerror(errno));
+ exit(1);
+ }
+
+ key = 1;
+ value = 1234;
+ /* Insert key=1 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+ /* Check that key=2 is not found. */
+ assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
+ assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+}
+
+static void prepare_reuseport_grp(int type, int map_fd,
+ __s64 *fds64, __u64 *sk_cookies,
+ unsigned int n)
+{
+ socklen_t optlen, addrlen;
+ struct sockaddr_in6 s6;
+ const __u32 index0 = 0;
+ const int optval = 1;
+ unsigned int i;
+ u64 sk_cookie;
+ __s64 fd64;
+ int err;
+
+ s6.sin6_family = AF_INET6;
+ s6.sin6_addr = in6addr_any;
+ s6.sin6_port = 0;
+ addrlen = sizeof(s6);
+ optlen = sizeof(sk_cookie);
+
+ for (i = 0; i < n; i++) {
+ fd64 = socket(AF_INET6, type, 0);
+ CHECK(fd64 == -1, "socket()",
+ "sock_type:%d fd64:%lld errno:%d\n",
+ type, fd64, errno);
+
+ err = setsockopt(fd64, SOL_SOCKET, SO_REUSEPORT,
+ &optval, sizeof(optval));
+ CHECK(err == -1, "setsockopt(SO_REUSEPORT)",
+ "err:%d errno:%d\n", err, errno);
+
+ /* reuseport_array does not allow unbound sk */
+ err = bpf_map_update_elem(map_fd, &index0, &fd64,
+ BPF_ANY);
+ CHECK(err != -1 || errno != EINVAL,
+ "reuseport array update unbound sk",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+
+ err = bind(fd64, (struct sockaddr *)&s6, sizeof(s6));
+ CHECK(err == -1, "bind()",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+ if (i == 0) {
+ err = getsockname(fd64, (struct sockaddr *)&s6,
+ &addrlen);
+ CHECK(err == -1, "getsockname()",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ }
+
+ err = getsockopt(fd64, SOL_SOCKET, SO_COOKIE, &sk_cookie,
+ &optlen);
+ CHECK(err == -1, "getsockopt(SO_COOKIE)",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+ if (type == SOCK_STREAM) {
+ /*
+ * reuseport_array does not allow
+ * non-listening tcp sk.
+ */
+ err = bpf_map_update_elem(map_fd, &index0, &fd64,
+ BPF_ANY);
+ CHECK(err != -1 || errno != EINVAL,
+ "reuseport array update non-listening sk",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ err = listen(fd64, 0);
+ CHECK(err == -1, "listen()",
+ "sock_type:%d, err:%d errno:%d\n",
+ type, err, errno);
+ }
+
+ fds64[i] = fd64;
+ sk_cookies[i] = sk_cookie;
+ }
+}
+
+static void test_reuseport_array(void)
+{
+#define REUSEPORT_FD_IDX(err, last) ({ (err) ? last : !last; })
+
+ const __u32 array_size = 4, index0 = 0, index3 = 3;
+ int types[2] = { SOCK_STREAM, SOCK_DGRAM }, type;
+ __u64 grpa_cookies[2], sk_cookie, map_cookie;
+ __s64 grpa_fds64[2] = { -1, -1 }, fd64 = -1;
+ const __u32 bad_index = array_size;
+ int map_fd, err, t, f;
+ __u32 fds_idx = 0;
+ int fd;
+
+ map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ sizeof(__u32), sizeof(__u64), array_size, 0);
+ CHECK(map_fd == -1, "reuseport array create",
+ "map_fd:%d, errno:%d\n", map_fd, errno);
+
+ /* Test lookup/update/delete with invalid index */
+ err = bpf_map_delete_elem(map_fd, &bad_index);
+ CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
+ "err:%d errno:%d\n", err, errno);
+
+ err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
+ CHECK(err != -1 || errno != E2BIG,
+ "reuseport array update >=max_entries",
+ "err:%d errno:%d\n", err, errno);
+
+ err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array update >=max_entries",
+ "err:%d errno:%d\n", err, errno);
+
+ /* Test lookup/delete non existence elem */
+ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array lookup not-exist elem",
+ "err:%d errno:%d\n", err, errno);
+ err = bpf_map_delete_elem(map_fd, &index3);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array del not-exist elem",
+ "err:%d errno:%d\n", err, errno);
+
+ for (t = 0; t < ARRAY_SIZE(types); t++) {
+ type = types[t];
+
+ prepare_reuseport_grp(type, map_fd, grpa_fds64,
+ grpa_cookies, ARRAY_SIZE(grpa_fds64));
+
+ /* Test BPF_* update flags */
+ /* BPF_EXIST failure case */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_EXIST);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array update empty elem BPF_EXIST",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+ /* BPF_NOEXIST success case */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_NOEXIST);
+ CHECK(err == -1,
+ "reuseport array update empty elem BPF_NOEXIST",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+ /* BPF_EXIST success case. */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_EXIST);
+ CHECK(err == -1,
+ "reuseport array update same elem BPF_EXIST",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+ fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+ /* BPF_NOEXIST failure case */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_NOEXIST);
+ CHECK(err != -1 || errno != EEXIST,
+ "reuseport array update non-empty elem BPF_NOEXIST",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
+
+ /* BPF_ANY case (always succeed) */
+ err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
+ BPF_ANY);
+ CHECK(err == -1,
+ "reuseport array update same sk with BPF_ANY",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+ fd64 = grpa_fds64[fds_idx];
+ sk_cookie = grpa_cookies[fds_idx];
+
+ /* The same sk cannot be added to reuseport_array twice */
+ err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
+ CHECK(err != -1 || errno != EBUSY,
+ "reuseport array update same sk with same index",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+
+ err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
+ CHECK(err != -1 || errno != EBUSY,
+ "reuseport array update same sk with different index",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+
+ /* Test delete elem */
+ err = bpf_map_delete_elem(map_fd, &index3);
+ CHECK(err == -1, "reuseport array delete sk",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+
+ /* Add it back with BPF_NOEXIST */
+ err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
+ CHECK(err == -1,
+ "reuseport array re-add with BPF_NOEXIST after del",
+ "sock_type:%d err:%d errno:%d\n", type, err, errno);
+
+ /* Test cookie */
+ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+ CHECK(err == -1 || sk_cookie != map_cookie,
+ "reuseport array lookup re-added sk",
+ "sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
+ type, err, errno, sk_cookie, map_cookie);
+
+ /* Test elem removed by close() */
+ for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
+ close(grpa_fds64[f]);
+ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+ CHECK(err != -1 || errno != ENOENT,
+ "reuseport array lookup after close()",
+ "sock_type:%d err:%d errno:%d\n",
+ type, err, errno);
+ }
+
+ /* Test SOCK_RAW */
+ fd64 = socket(AF_INET6, SOCK_RAW, IPPROTO_UDP);
+ CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
+ err, errno);
+ err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
+ CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
+ "err:%d errno:%d\n", err, errno);
+ close(fd64);
+
+ /* Close the 64 bit value map */
+ close(map_fd);
+
+ /* Test 32 bit fd */
+ map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ sizeof(__u32), sizeof(__u32), array_size, 0);
+ CHECK(map_fd == -1, "reuseport array create",
+ "map_fd:%d, errno:%d\n", map_fd, errno);
+ prepare_reuseport_grp(SOCK_STREAM, map_fd, &fd64, &sk_cookie, 1);
+ fd = fd64;
+ err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
+ CHECK(err == -1, "reuseport array update 32 bit fd",
+ "err:%d errno:%d\n", err, errno);
+ err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
+ CHECK(err != -1 || errno != ENOSPC,
+ "reuseport array lookup 32 bit fd",
+ "err:%d errno:%d\n", err, errno);
+ close(fd);
+ close(map_fd);
+}
+
+static void run_all_tests(void)
+{
+ test_hashmap(0, NULL);
+ test_hashmap_percpu(0, NULL);
+ test_hashmap_walk(0, NULL);
+
+ test_arraymap(0, NULL);
+ test_arraymap_percpu(0, NULL);
+
+ test_arraymap_percpu_many_keys();
+
+ test_devmap(0, NULL);
+ test_sockmap(0, NULL);
+
+ test_map_large();
+ test_map_parallel();
+ test_map_stress();
+
+ test_map_rdonly();
+ test_map_wronly();
+
+ test_reuseport_array();
+}
+
+int main(void)
+{
+ map_flags = 0;
+ run_all_tests();
+
+ map_flags = BPF_F_NO_PREALLOC;
+ run_all_tests();
+
+ printf("test_maps: OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_obj_id.c b/tools/testing/selftests/bpf/test_obj_id.c
new file mode 100644
index 000000000..880d2963b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_obj_id.c
@@ -0,0 +1,35 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+/* It is a dumb bpf program such that it must have no
+ * issue to be loaded since testing the verifier is
+ * not the focus here.
+ */
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def SEC("maps") test_map_id = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 1,
+};
+
+SEC("test_obj_id_dummy")
+int test_obj_id(struct __sk_buff *skb)
+{
+ __u32 key = 0;
+ __u64 *value;
+
+ value = bpf_map_lookup_elem(&test_map_id, &key);
+
+ return TC_ACT_OK;
+}
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
new file mode 100755
index 000000000..6b46db61c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -0,0 +1,1288 @@
+#!/usr/bin/env python3
+
+# Copyright (C) 2017 Netronome Systems, Inc.
+#
+# This software is licensed under the GNU General License Version 2,
+# June 1991 as shown in the file COPYING in the top-level directory of this
+# source tree.
+#
+# THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS"
+# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
+# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE
+# OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME
+# THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+from datetime import datetime
+import argparse
+import json
+import os
+import pprint
+import random
+import string
+import struct
+import subprocess
+import time
+
+logfile = None
+log_level = 1
+skip_extack = False
+bpf_test_dir = os.path.dirname(os.path.realpath(__file__))
+pp = pprint.PrettyPrinter()
+devs = [] # devices we created for clean up
+files = [] # files to be removed
+netns = [] # net namespaces to be removed
+
+def log_get_sec(level=0):
+ return "*" * (log_level + level)
+
+def log_level_inc(add=1):
+ global log_level
+ log_level += add
+
+def log_level_dec(sub=1):
+ global log_level
+ log_level -= sub
+
+def log_level_set(level):
+ global log_level
+ log_level = level
+
+def log(header, data, level=None):
+ """
+ Output to an optional log.
+ """
+ if logfile is None:
+ return
+ if level is not None:
+ log_level_set(level)
+
+ if not isinstance(data, str):
+ data = pp.pformat(data)
+
+ if len(header):
+ logfile.write("\n" + log_get_sec() + " ")
+ logfile.write(header)
+ if len(header) and len(data.strip()):
+ logfile.write("\n")
+ logfile.write(data)
+
+def skip(cond, msg):
+ if not cond:
+ return
+ print("SKIP: " + msg)
+ log("SKIP: " + msg, "", level=1)
+ os.sys.exit(0)
+
+def fail(cond, msg):
+ if not cond:
+ return
+ print("FAIL: " + msg)
+ log("FAIL: " + msg, "", level=1)
+ os.sys.exit(1)
+
+def start_test(msg):
+ log(msg, "", level=1)
+ log_level_inc()
+ print(msg)
+
+def cmd(cmd, shell=True, include_stderr=False, background=False, fail=True):
+ """
+ Run a command in subprocess and return tuple of (retval, stdout);
+ optionally return stderr as well as third value.
+ """
+ proc = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ if background:
+ msg = "%s START: %s" % (log_get_sec(1),
+ datetime.now().strftime("%H:%M:%S.%f"))
+ log("BKG " + proc.args, msg)
+ return proc
+
+ return cmd_result(proc, include_stderr=include_stderr, fail=fail)
+
+def cmd_result(proc, include_stderr=False, fail=False):
+ stdout, stderr = proc.communicate()
+ stdout = stdout.decode("utf-8")
+ stderr = stderr.decode("utf-8")
+ proc.stdout.close()
+ proc.stderr.close()
+
+ stderr = "\n" + stderr
+ if stderr[-1] == "\n":
+ stderr = stderr[:-1]
+
+ sec = log_get_sec(1)
+ log("CMD " + proc.args,
+ "RETCODE: %d\n%s STDOUT:\n%s%s STDERR:%s\n%s END: %s" %
+ (proc.returncode, sec, stdout, sec, stderr,
+ sec, datetime.now().strftime("%H:%M:%S.%f")))
+
+ if proc.returncode != 0 and fail:
+ if len(stderr) > 0 and stderr[-1] == "\n":
+ stderr = stderr[:-1]
+ raise Exception("Command failed: %s\n%s" % (proc.args, stderr))
+
+ if include_stderr:
+ return proc.returncode, stdout, stderr
+ else:
+ return proc.returncode, stdout
+
+def rm(f):
+ cmd("rm -f %s" % (f))
+ if f in files:
+ files.remove(f)
+
+def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False):
+ params = ""
+ if JSON:
+ params += "%s " % (flags["json"])
+
+ if ns != "":
+ ns = "ip netns exec %s " % (ns)
+
+ if include_stderr:
+ ret, stdout, stderr = cmd(ns + name + " " + params + args,
+ fail=fail, include_stderr=True)
+ else:
+ ret, stdout = cmd(ns + name + " " + params + args,
+ fail=fail, include_stderr=False)
+
+ if JSON and len(stdout.strip()) != 0:
+ out = json.loads(stdout)
+ else:
+ out = stdout
+
+ if include_stderr:
+ return ret, out, stderr
+ else:
+ return ret, out
+
+def bpftool(args, JSON=True, ns="", fail=True, include_stderr=False):
+ return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns,
+ fail=fail, include_stderr=include_stderr)
+
+def bpftool_prog_list(expected=None, ns=""):
+ _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
+ # Remove the base progs
+ for p in base_progs:
+ if p in progs:
+ progs.remove(p)
+ if expected is not None:
+ if len(progs) != expected:
+ fail(True, "%d BPF programs loaded, expected %d" %
+ (len(progs), expected))
+ return progs
+
+def bpftool_map_list(expected=None, ns=""):
+ _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
+ # Remove the base maps
+ for m in base_maps:
+ if m in maps:
+ maps.remove(m)
+ if expected is not None:
+ if len(maps) != expected:
+ fail(True, "%d BPF maps loaded, expected %d" %
+ (len(maps), expected))
+ return maps
+
+def bpftool_prog_list_wait(expected=0, n_retry=20):
+ for i in range(n_retry):
+ nprogs = len(bpftool_prog_list())
+ if nprogs == expected:
+ return
+ time.sleep(0.05)
+ raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
+
+def bpftool_map_list_wait(expected=0, n_retry=20):
+ for i in range(n_retry):
+ nmaps = len(bpftool_map_list())
+ if nmaps == expected:
+ return
+ time.sleep(0.05)
+ raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
+
+def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None,
+ fail=True, include_stderr=False):
+ args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name)
+ if prog_type is not None:
+ args += " type " + prog_type
+ if dev is not None:
+ args += " dev " + dev
+ if len(maps):
+ args += " map " + " map ".join(maps)
+
+ res = bpftool(args, fail=fail, include_stderr=include_stderr)
+ if res[0] == 0:
+ files.append(file_name)
+ return res
+
+def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False):
+ if force:
+ args = "-force " + args
+ return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns,
+ fail=fail, include_stderr=include_stderr)
+
+def tc(args, JSON=True, ns="", fail=True, include_stderr=False):
+ return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns,
+ fail=fail, include_stderr=include_stderr)
+
+def ethtool(dev, opt, args, fail=True):
+ return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
+
+def bpf_obj(name, sec=".text", path=bpf_test_dir,):
+ return "obj %s sec %s" % (os.path.join(path, name), sec)
+
+def bpf_pinned(name):
+ return "pinned %s" % (name)
+
+def bpf_bytecode(bytecode):
+ return "bytecode \"%s\"" % (bytecode)
+
+def mknetns(n_retry=10):
+ for i in range(n_retry):
+ name = ''.join([random.choice(string.ascii_letters) for i in range(8)])
+ ret, _ = ip("netns add %s" % (name), fail=False)
+ if ret == 0:
+ netns.append(name)
+ return name
+ return None
+
+def int2str(fmt, val):
+ ret = []
+ for b in struct.pack(fmt, val):
+ ret.append(int(b))
+ return " ".join(map(lambda x: str(x), ret))
+
+def str2int(strtab):
+ inttab = []
+ for i in strtab:
+ inttab.append(int(i, 16))
+ ba = bytearray(inttab)
+ if len(strtab) == 4:
+ fmt = "I"
+ elif len(strtab) == 8:
+ fmt = "Q"
+ else:
+ raise Exception("String array of len %d can't be unpacked to an int" %
+ (len(strtab)))
+ return struct.unpack(fmt, ba)[0]
+
+class DebugfsDir:
+ """
+ Class for accessing DebugFS directories as a dictionary.
+ """
+
+ def __init__(self, path):
+ self.path = path
+ self._dict = self._debugfs_dir_read(path)
+
+ def __len__(self):
+ return len(self._dict.keys())
+
+ def __getitem__(self, key):
+ if type(key) is int:
+ key = list(self._dict.keys())[key]
+ return self._dict[key]
+
+ def __setitem__(self, key, value):
+ log("DebugFS set %s = %s" % (key, value), "")
+ log_level_inc()
+
+ cmd("echo '%s' > %s/%s" % (value, self.path, key))
+ log_level_dec()
+
+ _, out = cmd('cat %s/%s' % (self.path, key))
+ self._dict[key] = out.strip()
+
+ def _debugfs_dir_read(self, path):
+ dfs = {}
+
+ log("DebugFS state for %s" % (path), "")
+ log_level_inc(add=2)
+
+ _, out = cmd('ls ' + path)
+ for f in out.split():
+ p = os.path.join(path, f)
+ if os.path.isfile(p):
+ _, out = cmd('cat %s/%s' % (path, f))
+ dfs[f] = out.strip()
+ elif os.path.isdir(p):
+ dfs[f] = DebugfsDir(p)
+ else:
+ raise Exception("%s is neither file nor directory" % (p))
+
+ log_level_dec()
+ log("DebugFS state", dfs)
+ log_level_dec()
+
+ return dfs
+
+class NetdevSim:
+ """
+ Class for netdevsim netdevice and its attributes.
+ """
+
+ def __init__(self, link=None):
+ self.link = link
+
+ self.dev = self._netdevsim_create()
+ devs.append(self)
+
+ self.ns = ""
+
+ self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname'])
+ self.sdev_dir = self.dfs_dir + '/sdev/'
+ self.dfs_refresh()
+
+ def __getitem__(self, key):
+ return self.dev[key]
+
+ def _netdevsim_create(self):
+ link = "" if self.link is None else "link " + self.link.dev['ifname']
+ _, old = ip("link show")
+ ip("link add sim%d {link} type netdevsim".format(link=link))
+ _, new = ip("link show")
+
+ for dev in new:
+ f = filter(lambda x: x["ifname"] == dev["ifname"], old)
+ if len(list(f)) == 0:
+ return dev
+
+ raise Exception("failed to create netdevsim device")
+
+ def remove(self):
+ devs.remove(self)
+ ip("link del dev %s" % (self.dev["ifname"]), ns=self.ns)
+
+ def dfs_refresh(self):
+ self.dfs = DebugfsDir(self.dfs_dir)
+ return self.dfs
+
+ def dfs_read(self, f):
+ path = os.path.join(self.dfs_dir, f)
+ _, data = cmd('cat %s' % (path))
+ return data.strip()
+
+ def dfs_num_bound_progs(self):
+ path = os.path.join(self.sdev_dir, "bpf_bound_progs")
+ _, progs = cmd('ls %s' % (path))
+ return len(progs.split())
+
+ def dfs_get_bound_progs(self, expected):
+ progs = DebugfsDir(os.path.join(self.sdev_dir, "bpf_bound_progs"))
+ if expected is not None:
+ if len(progs) != expected:
+ fail(True, "%d BPF programs bound, expected %d" %
+ (len(progs), expected))
+ return progs
+
+ def wait_for_flush(self, bound=0, total=0, n_retry=20):
+ for i in range(n_retry):
+ nbound = self.dfs_num_bound_progs()
+ nprogs = len(bpftool_prog_list())
+ if nbound == bound and nprogs == total:
+ return
+ time.sleep(0.05)
+ raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs))
+
+ def set_ns(self, ns):
+ name = "1" if ns == "" else ns
+ ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns)
+ self.ns = ns
+
+ def set_mtu(self, mtu, fail=True):
+ return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu),
+ fail=fail)
+
+ def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False,
+ fail=True, include_stderr=False):
+ if verbose:
+ bpf += " verbose"
+ return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf),
+ force=force, JSON=JSON,
+ fail=fail, include_stderr=include_stderr)
+
+ def unset_xdp(self, mode, force=False, JSON=True,
+ fail=True, include_stderr=False):
+ return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode),
+ force=force, JSON=JSON,
+ fail=fail, include_stderr=include_stderr)
+
+ def ip_link_show(self, xdp):
+ _, link = ip("link show dev %s" % (self['ifname']))
+ if len(link) > 1:
+ raise Exception("Multiple objects on ip link show")
+ if len(link) < 1:
+ return {}
+ fail(xdp != "xdp" in link,
+ "XDP program not reporting in iplink (reported %s, expected %s)" %
+ ("xdp" in link, xdp))
+ return link[0]
+
+ def tc_add_ingress(self):
+ tc("qdisc add dev %s ingress" % (self['ifname']))
+
+ def tc_del_ingress(self):
+ tc("qdisc del dev %s ingress" % (self['ifname']))
+
+ def tc_flush_filters(self, bound=0, total=0):
+ self.tc_del_ingress()
+ self.tc_add_ingress()
+ self.wait_for_flush(bound=bound, total=total)
+
+ def tc_show_ingress(self, expected=None):
+ # No JSON support, oh well...
+ flags = ["skip_sw", "skip_hw", "in_hw"]
+ named = ["protocol", "pref", "chain", "handle", "id", "tag"]
+
+ args = "-s filter show dev %s ingress" % (self['ifname'])
+ _, out = tc(args, JSON=False)
+
+ filters = []
+ lines = out.split('\n')
+ for line in lines:
+ words = line.split()
+ if "handle" not in words:
+ continue
+ fltr = {}
+ for flag in flags:
+ fltr[flag] = flag in words
+ for name in named:
+ try:
+ idx = words.index(name)
+ fltr[name] = words[idx + 1]
+ except ValueError:
+ pass
+ filters.append(fltr)
+
+ if expected is not None:
+ fail(len(filters) != expected,
+ "%d ingress filters loaded, expected %d" %
+ (len(filters), expected))
+ return filters
+
+ def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None,
+ chain=None, cls="", params="",
+ fail=True, include_stderr=False):
+ spec = ""
+ if prio is not None:
+ spec += " prio %d" % (prio)
+ if handle:
+ spec += " handle %s" % (handle)
+ if chain is not None:
+ spec += " chain %d" % (chain)
+
+ return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\
+ .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec,
+ cls=cls, params=params),
+ fail=fail, include_stderr=include_stderr)
+
+ def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None,
+ chain=None, da=False, verbose=False,
+ skip_sw=False, skip_hw=False,
+ fail=True, include_stderr=False):
+ cls = "bpf " + bpf
+
+ params = ""
+ if da:
+ params += " da"
+ if verbose:
+ params += " verbose"
+ if skip_sw:
+ params += " skip_sw"
+ if skip_hw:
+ params += " skip_hw"
+
+ return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls,
+ chain=chain, params=params,
+ fail=fail, include_stderr=include_stderr)
+
+ def set_ethtool_tc_offloads(self, enable, fail=True):
+ args = "hw-tc-offload %s" % ("on" if enable else "off")
+ return ethtool(self, "-K", args, fail=fail)
+
+################################################################################
+def clean_up():
+ global files, netns, devs
+
+ for dev in devs:
+ dev.remove()
+ for f in files:
+ cmd("rm -f %s" % (f))
+ for ns in netns:
+ cmd("ip netns delete %s" % (ns))
+ files = []
+ netns = []
+
+def pin_prog(file_name, idx=0):
+ progs = bpftool_prog_list(expected=(idx + 1))
+ prog = progs[idx]
+ bpftool("prog pin id %d %s" % (prog["id"], file_name))
+ files.append(file_name)
+
+ return file_name, bpf_pinned(file_name)
+
+def pin_map(file_name, idx=0, expected=1):
+ maps = bpftool_map_list(expected=expected)
+ m = maps[idx]
+ bpftool("map pin id %d %s" % (m["id"], file_name))
+ files.append(file_name)
+
+ return file_name, bpf_pinned(file_name)
+
+def check_dev_info_removed(prog_file=None, map_file=None):
+ bpftool_prog_list(expected=0)
+ ret, err = bpftool("prog show pin %s" % (prog_file), fail=False)
+ fail(ret == 0, "Showing prog with removed device did not fail")
+ fail(err["error"].find("No such device") == -1,
+ "Showing prog with removed device expected ENODEV, error is %s" %
+ (err["error"]))
+
+ bpftool_map_list(expected=0)
+ ret, err = bpftool("map show pin %s" % (map_file), fail=False)
+ fail(ret == 0, "Showing map with removed device did not fail")
+ fail(err["error"].find("No such device") == -1,
+ "Showing map with removed device expected ENODEV, error is %s" %
+ (err["error"]))
+
+def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False):
+ progs = bpftool_prog_list(expected=1, ns=ns)
+ prog = progs[0]
+
+ fail("dev" not in prog.keys(), "Device parameters not reported")
+ dev = prog["dev"]
+ fail("ifindex" not in dev.keys(), "Device parameters not reported")
+ fail("ns_dev" not in dev.keys(), "Device parameters not reported")
+ fail("ns_inode" not in dev.keys(), "Device parameters not reported")
+
+ if not other_ns:
+ fail("ifname" not in dev.keys(), "Ifname not reported")
+ fail(dev["ifname"] != sim["ifname"],
+ "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"]))
+ else:
+ fail("ifname" in dev.keys(), "Ifname is reported for other ns")
+
+ maps = bpftool_map_list(expected=2, ns=ns)
+ for m in maps:
+ fail("dev" not in m.keys(), "Device parameters not reported")
+ fail(dev != m["dev"], "Map's device different than program's")
+
+def check_extack(output, reference, args):
+ if skip_extack:
+ return
+ lines = output.split("\n")
+ comp = len(lines) >= 2 and lines[1] == 'Error: ' + reference
+ fail(not comp, "Missing or incorrect netlink extack message")
+
+def check_extack_nsim(output, reference, args):
+ check_extack(output, "netdevsim: " + reference, args)
+
+def check_no_extack(res, needle):
+ fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"),
+ "Found '%s' in command output, leaky extack?" % (needle))
+
+def check_verifier_log(output, reference):
+ lines = output.split("\n")
+ for l in reversed(lines):
+ if l == reference:
+ return
+ fail(True, "Missing or incorrect message from netdevsim in verifier log")
+
+def test_spurios_extack(sim, obj, skip_hw, needle):
+ res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw,
+ include_stderr=True)
+ check_no_extack(res, needle)
+ res = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+ skip_hw=skip_hw, include_stderr=True)
+ check_no_extack(res, needle)
+ res = sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf",
+ include_stderr=True)
+ check_no_extack(res, needle)
+
+
+# Parse command line
+parser = argparse.ArgumentParser()
+parser.add_argument("--log", help="output verbose log to given file")
+args = parser.parse_args()
+if args.log:
+ logfile = open(args.log, 'w+')
+ logfile.write("# -*-Org-*-")
+
+log("Prepare...", "", level=1)
+log_level_inc()
+
+# Check permissions
+skip(os.getuid() != 0, "test must be run as root")
+
+# Check tools
+ret, progs = bpftool("prog", fail=False)
+skip(ret != 0, "bpftool not installed")
+base_progs = progs
+_, base_maps = bpftool("map")
+
+# Check netdevsim
+ret, out = cmd("modprobe netdevsim", fail=False)
+skip(ret != 0, "netdevsim module could not be loaded")
+
+# Check debugfs
+_, out = cmd("mount")
+if out.find("/sys/kernel/debug type debugfs") == -1:
+ cmd("mount -t debugfs none /sys/kernel/debug")
+
+# Check samples are compiled
+samples = ["sample_ret0.o", "sample_map_ret0.o"]
+for s in samples:
+ ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False)
+ skip(ret != 0, "sample %s/%s not found, please compile it" %
+ (bpf_test_dir, s))
+
+# Check if iproute2 is built with libmnl (needed by extack support)
+_, _, err = cmd("tc qdisc delete dev lo handle 0",
+ fail=False, include_stderr=True)
+if err.find("Error: Failed to find qdisc with specified handle.") == -1:
+ print("Warning: no extack message in iproute2 output, libmnl missing?")
+ log("Warning: no extack message in iproute2 output, libmnl missing?", "")
+ skip_extack = True
+
+# Check if net namespaces seem to work
+ns = mknetns()
+skip(ns is None, "Could not create a net namespace")
+cmd("ip netns delete %s" % (ns))
+netns = []
+
+try:
+ obj = bpf_obj("sample_ret0.o")
+ bytecode = bpf_bytecode("1,6 0 0 4294967295,")
+
+ start_test("Test destruction of generic XDP...")
+ sim = NetdevSim()
+ sim.set_xdp(obj, "generic")
+ sim.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+ sim.tc_add_ingress()
+
+ start_test("Test TC non-offloaded...")
+ ret, _ = sim.cls_bpf_add_filter(obj, skip_hw=True, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+
+ start_test("Test TC non-offloaded isn't getting bound...")
+ ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+ sim.dfs_get_bound_progs(expected=0)
+
+ sim.tc_flush_filters()
+
+ start_test("Test TC offloads are off by default...")
+ ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "TC filter loaded without enabling TC offloads")
+ check_extack(err, "TC offload is disabled on net device.", args)
+ sim.wait_for_flush()
+
+ sim.set_ethtool_tc_offloads(True)
+ sim.dfs["bpf_tc_non_bound_accept"] = "Y"
+
+ start_test("Test TC offload by default...")
+ ret, _ = sim.cls_bpf_add_filter(obj, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+ sim.dfs_get_bound_progs(expected=0)
+ ingress = sim.tc_show_ingress(expected=1)
+ fltr = ingress[0]
+ fail(not fltr["in_hw"], "Filter not offloaded by default")
+
+ sim.tc_flush_filters()
+
+ start_test("Test TC cBPF bytcode tries offload by default...")
+ ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False)
+ fail(ret != 0, "Software TC filter did not load")
+ sim.dfs_get_bound_progs(expected=0)
+ ingress = sim.tc_show_ingress(expected=1)
+ fltr = ingress[0]
+ fail(not fltr["in_hw"], "Bytecode not offloaded by default")
+
+ sim.tc_flush_filters()
+ sim.dfs["bpf_tc_non_bound_accept"] = "N"
+
+ start_test("Test TC cBPF unbound bytecode doesn't offload...")
+ ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "TC bytecode loaded for offload")
+ check_extack_nsim(err, "netdevsim configured to reject unbound programs.",
+ args)
+ sim.wait_for_flush()
+
+ start_test("Test non-0 chain offload...")
+ ret, _, err = sim.cls_bpf_add_filter(obj, chain=1, prio=1, handle=1,
+ skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Offloaded a filter to chain other than 0")
+ check_extack(err, "Driver supports only offload of chain 0.", args)
+ sim.tc_flush_filters()
+
+ start_test("Test TC replace...")
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True)
+ sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True)
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ start_test("Test TC replace bad flags...")
+ for i in range(3):
+ for j in range(3):
+ ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1,
+ skip_sw=(j == 1), skip_hw=(j == 2),
+ fail=False)
+ fail(bool(ret) != bool(j),
+ "Software TC incorrect load in replace test, iteration %d" %
+ (j))
+ sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf")
+
+ start_test("Test spurious extack from the driver...")
+ test_spurios_extack(sim, obj, False, "netdevsim")
+ test_spurios_extack(sim, obj, True, "netdevsim")
+
+ sim.set_ethtool_tc_offloads(False)
+
+ test_spurios_extack(sim, obj, False, "TC offload is disabled")
+ test_spurios_extack(sim, obj, True, "TC offload is disabled")
+
+ sim.set_ethtool_tc_offloads(True)
+
+ sim.tc_flush_filters()
+
+ start_test("Test TC offloads work...")
+ ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret != 0, "TC filter did not load with TC offloads enabled")
+ check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+
+ start_test("Test TC offload basics...")
+ dfs = sim.dfs_get_bound_progs(expected=1)
+ progs = bpftool_prog_list(expected=1)
+ ingress = sim.tc_show_ingress(expected=1)
+
+ dprog = dfs[0]
+ prog = progs[0]
+ fltr = ingress[0]
+ fail(fltr["skip_hw"], "TC does reports 'skip_hw' on offloaded filter")
+ fail(not fltr["in_hw"], "TC does not report 'in_hw' for offloaded filter")
+ fail(not fltr["skip_sw"], "TC does not report 'skip_sw' back")
+
+ start_test("Test TC offload is device-bound...")
+ fail(str(prog["id"]) != fltr["id"], "Program IDs don't match")
+ fail(prog["tag"] != fltr["tag"], "Program tags don't match")
+ fail(fltr["id"] != dprog["id"], "Program IDs don't match")
+ fail(dprog["state"] != "xlated", "Offloaded program state not translated")
+ fail(dprog["loaded"] != "Y", "Offloaded program is not loaded")
+
+ start_test("Test disabling TC offloads is rejected while filters installed...")
+ ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
+ fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...")
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test qdisc removal frees things...")
+ sim.tc_flush_filters()
+ sim.tc_show_ingress(expected=0)
+
+ start_test("Test disabling TC offloads is OK without filters...")
+ ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
+ fail(ret != 0,
+ "Driver refused to disable TC offloads without filters installed...")
+
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test destroying device gets rid of TC filters...")
+ sim.cls_bpf_add_filter(obj, skip_sw=True)
+ sim.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test destroying device gets rid of XDP...")
+ sim.set_xdp(obj, "offload")
+ sim.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test XDP prog reporting...")
+ sim.set_xdp(obj, "drv")
+ ipl = sim.ip_link_show(xdp=True)
+ progs = bpftool_prog_list(expected=1)
+ fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
+ "Loaded program has wrong ID")
+
+ start_test("Test XDP prog replace without force...")
+ ret, _ = sim.set_xdp(obj, "drv", fail=False)
+ fail(ret == 0, "Replaced XDP program without -force")
+ sim.wait_for_flush(total=1)
+
+ start_test("Test XDP prog replace with force...")
+ ret, _ = sim.set_xdp(obj, "drv", force=True, fail=False)
+ fail(ret != 0, "Could not replace XDP program with -force")
+ bpftool_prog_list_wait(expected=1)
+ ipl = sim.ip_link_show(xdp=True)
+ progs = bpftool_prog_list(expected=1)
+ fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"],
+ "Loaded program has wrong ID")
+ fail("dev" in progs[0].keys(),
+ "Device parameters reported for non-offloaded program")
+
+ start_test("Test XDP prog replace with bad flags...")
+ ret, _, err = sim.set_xdp(obj, "generic", force=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Replaced XDP program with a program in different mode")
+ fail(err.count("File exists") != 1, "Replaced driver XDP with generic")
+ ret, _, err = sim.set_xdp(obj, "", force=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Replaced XDP program with a program in different mode")
+ check_extack(err, "program loaded with different flags.", args)
+
+ start_test("Test XDP prog remove with bad flags...")
+ ret, _, err = sim.unset_xdp("", force=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Removed program with a bad mode")
+ check_extack(err, "program loaded with different flags.", args)
+
+ start_test("Test MTU restrictions...")
+ ret, _ = sim.set_mtu(9000, fail=False)
+ fail(ret == 0,
+ "Driver should refuse to increase MTU to 9000 with XDP loaded...")
+ sim.unset_xdp("drv")
+ bpftool_prog_list_wait(expected=0)
+ sim.set_mtu(9000)
+ ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True)
+ fail(ret == 0, "Driver should refuse to load program with MTU of 9000...")
+ check_extack_nsim(err, "MTU too large w/ XDP enabled.", args)
+ sim.set_mtu(1500)
+
+ sim.wait_for_flush()
+ start_test("Test non-offload XDP attaching to HW...")
+ bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/nooffload")
+ nooffload = bpf_pinned("/sys/fs/bpf/nooffload")
+ ret, _, err = sim.set_xdp(nooffload, "offload",
+ fail=False, include_stderr=True)
+ fail(ret == 0, "attached non-offloaded XDP program to HW")
+ check_extack_nsim(err, "xdpoffload of non-bound program.", args)
+ rm("/sys/fs/bpf/nooffload")
+
+ start_test("Test offload XDP attaching to drv...")
+ bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload",
+ dev=sim['ifname'])
+ offload = bpf_pinned("/sys/fs/bpf/offload")
+ ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True)
+ fail(ret == 0, "attached offloaded XDP program to drv")
+ check_extack(err, "using device-bound program without HW_MODE flag is not supported.", args)
+ rm("/sys/fs/bpf/offload")
+ sim.wait_for_flush()
+
+ start_test("Test XDP offload...")
+ _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True)
+ ipl = sim.ip_link_show(xdp=True)
+ link_xdp = ipl["xdp"]["prog"]
+ progs = bpftool_prog_list(expected=1)
+ prog = progs[0]
+ fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID")
+ check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+
+ start_test("Test XDP offload is device bound...")
+ dfs = sim.dfs_get_bound_progs(expected=1)
+ dprog = dfs[0]
+
+ fail(prog["id"] != link_xdp["id"], "Program IDs don't match")
+ fail(prog["tag"] != link_xdp["tag"], "Program tags don't match")
+ fail(str(link_xdp["id"]) != dprog["id"], "Program IDs don't match")
+ fail(dprog["state"] != "xlated", "Offloaded program state not translated")
+ fail(dprog["loaded"] != "Y", "Offloaded program is not loaded")
+
+ start_test("Test removing XDP program many times...")
+ sim.unset_xdp("offload")
+ sim.unset_xdp("offload")
+ sim.unset_xdp("drv")
+ sim.unset_xdp("drv")
+ sim.unset_xdp("")
+ sim.unset_xdp("")
+ bpftool_prog_list_wait(expected=0)
+
+ start_test("Test attempt to use a program for a wrong device...")
+ sim2 = NetdevSim()
+ sim2.set_xdp(obj, "offload")
+ pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
+
+ ret, _, err = sim.set_xdp(pinned, "offload",
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Pinned program loaded for a different device accepted")
+ check_extack_nsim(err, "program bound to different dev.", args)
+ sim2.remove()
+ ret, _, err = sim.set_xdp(pinned, "offload",
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Pinned program loaded for a removed device accepted")
+ check_extack_nsim(err, "xdpoffload of non-bound program.", args)
+ rm(pin_file)
+ bpftool_prog_list_wait(expected=0)
+
+ start_test("Test multi-attachment XDP - attach...")
+ sim.set_xdp(obj, "offload")
+ xdp = sim.ip_link_show(xdp=True)["xdp"]
+ offloaded = sim.dfs_read("bpf_offloaded_id")
+ fail("prog" not in xdp, "Base program not reported in single program mode")
+ fail(len(ipl["xdp"]["attached"]) != 1,
+ "Wrong attached program count with one program")
+
+ sim.set_xdp(obj, "")
+ two_xdps = sim.ip_link_show(xdp=True)["xdp"]
+ offloaded2 = sim.dfs_read("bpf_offloaded_id")
+
+ fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs")
+ fail("prog" in two_xdps, "Base program reported in multi program mode")
+ fail(xdp["attached"][0] not in two_xdps["attached"],
+ "Offload program not reported after driver activated")
+ fail(len(two_xdps["attached"]) != 2,
+ "Wrong attached program count with two programs")
+ fail(two_xdps["attached"][0]["prog"]["id"] ==
+ two_xdps["attached"][1]["prog"]["id"],
+ "offloaded and drv programs have the same id")
+ fail(offloaded != offloaded2,
+ "offload ID changed after loading driver program")
+
+ start_test("Test multi-attachment XDP - replace...")
+ ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True)
+ fail(err.count("busy") != 1, "Replaced one of programs without -force")
+
+ start_test("Test multi-attachment XDP - detach...")
+ ret, _, err = sim.unset_xdp("drv", force=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Removed program with a bad mode")
+ check_extack(err, "program loaded with different flags.", args)
+
+ sim.unset_xdp("offload")
+ xdp = sim.ip_link_show(xdp=True)["xdp"]
+ offloaded = sim.dfs_read("bpf_offloaded_id")
+
+ fail(xdp["mode"] != 1, "Bad mode reported after multiple programs")
+ fail("prog" not in xdp,
+ "Base program not reported after multi program mode")
+ fail(xdp["attached"][0] not in two_xdps["attached"],
+ "Offload program not reported after driver activated")
+ fail(len(ipl["xdp"]["attached"]) != 1,
+ "Wrong attached program count with remaining programs")
+ fail(offloaded != "0", "offload ID reported with only driver program left")
+
+ start_test("Test multi-attachment XDP - device remove...")
+ sim.set_xdp(obj, "offload")
+ sim.remove()
+
+ sim = NetdevSim()
+ sim.set_ethtool_tc_offloads(True)
+
+ start_test("Test mixing of TC and XDP...")
+ sim.tc_add_ingress()
+ sim.set_xdp(obj, "offload")
+ ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Loading TC when XDP active should fail")
+ check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
+ sim.unset_xdp("offload")
+ sim.wait_for_flush()
+
+ sim.cls_bpf_add_filter(obj, skip_sw=True)
+ ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True)
+ fail(ret == 0, "Loading XDP when TC active should fail")
+ check_extack_nsim(err, "TC program is already loaded.", args)
+
+ start_test("Test binding TC from pinned...")
+ pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
+ sim.tc_flush_filters(bound=1, total=1)
+ sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True)
+ sim.tc_flush_filters(bound=1, total=1)
+
+ start_test("Test binding XDP from pinned...")
+ sim.set_xdp(obj, "offload")
+ pin_file, pinned = pin_prog("/sys/fs/bpf/tmp2", idx=1)
+
+ sim.set_xdp(pinned, "offload", force=True)
+ sim.unset_xdp("offload")
+ sim.set_xdp(pinned, "offload", force=True)
+ sim.unset_xdp("offload")
+
+ start_test("Test offload of wrong type fails...")
+ ret, _ = sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True, fail=False)
+ fail(ret == 0, "Managed to attach XDP program to TC")
+
+ start_test("Test asking for TC offload of two filters...")
+ sim.cls_bpf_add_filter(obj, da=True, skip_sw=True)
+ ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Managed to offload two TC filters at the same time")
+ check_extack_nsim(err, "driver and netdev offload states mismatch.", args)
+
+ sim.tc_flush_filters(bound=2, total=2)
+
+ start_test("Test if netdev removal waits for translation...")
+ delay_msec = 500
+ sim.dfs["bpf_bind_verifier_delay"] = delay_msec
+ start = time.time()
+ cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \
+ (sim['ifname'], obj)
+ tc_proc = cmd(cmd_line, background=True, fail=False)
+ # Wait for the verifier to start
+ while sim.dfs_num_bound_progs() <= 2:
+ pass
+ sim.remove()
+ end = time.time()
+ ret, _ = cmd_result(tc_proc, fail=False)
+ time_diff = end - start
+ log("Time", "start:\t%s\nend:\t%s\ndiff:\t%s" % (start, end, time_diff))
+
+ fail(ret == 0, "Managed to load TC filter on a unregistering device")
+ delay_sec = delay_msec * 0.001
+ fail(time_diff < delay_sec, "Removal process took %s, expected %s" %
+ (time_diff, delay_sec))
+
+ # Remove all pinned files and reinstantiate the netdev
+ clean_up()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+ map_obj = bpf_obj("sample_map_ret0.o")
+ start_test("Test loading program with maps...")
+ sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+
+ start_test("Test bpftool bound info reporting (own ns)...")
+ check_dev_info(False, "")
+
+ start_test("Test bpftool bound info reporting (other ns)...")
+ ns = mknetns()
+ sim.set_ns(ns)
+ check_dev_info(True, "")
+
+ start_test("Test bpftool bound info reporting (remote ns)...")
+ check_dev_info(False, ns)
+
+ start_test("Test bpftool bound info reporting (back to own ns)...")
+ sim.set_ns("")
+ check_dev_info(False, "")
+
+ prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog")
+ map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2)
+ sim.remove()
+
+ start_test("Test bpftool bound info reporting (removed dev)...")
+ check_dev_info_removed(prog_file=prog_file, map_file=map_file)
+
+ # Remove all pinned files and reinstantiate the netdev
+ clean_up()
+ bpftool_prog_list_wait(expected=0)
+
+ sim = NetdevSim()
+
+ start_test("Test map update (no flags)...")
+ sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+ maps = bpftool_map_list(expected=2)
+ array = maps[0] if maps[0]["type"] == "array" else maps[1]
+ htab = maps[0] if maps[0]["type"] == "hash" else maps[1]
+ for m in maps:
+ for i in range(2):
+ bpftool("map update id %d key %s value %s" %
+ (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+ for m in maps:
+ ret, _ = bpftool("map update id %d key %s value %s" %
+ (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+ fail=False)
+ fail(ret == 0, "added too many entries")
+
+ start_test("Test map update (exists)...")
+ for m in maps:
+ for i in range(2):
+ bpftool("map update id %d key %s value %s exist" %
+ (m["id"], int2str("I", i), int2str("Q", i * 3)))
+
+ for m in maps:
+ ret, err = bpftool("map update id %d key %s value %s exist" %
+ (m["id"], int2str("I", 3), int2str("Q", 3 * 3)),
+ fail=False)
+ fail(ret == 0, "updated non-existing key")
+ fail(err["error"].find("No such file or directory") == -1,
+ "expected ENOENT, error is '%s'" % (err["error"]))
+
+ start_test("Test map update (noexist)...")
+ for m in maps:
+ for i in range(2):
+ ret, err = bpftool("map update id %d key %s value %s noexist" %
+ (m["id"], int2str("I", i), int2str("Q", i * 3)),
+ fail=False)
+ fail(ret == 0, "updated existing key")
+ fail(err["error"].find("File exists") == -1,
+ "expected EEXIST, error is '%s'" % (err["error"]))
+
+ start_test("Test map dump...")
+ for m in maps:
+ _, entries = bpftool("map dump id %d" % (m["id"]))
+ for i in range(2):
+ key = str2int(entries[i]["key"])
+ fail(key != i, "expected key %d, got %d" % (key, i))
+ val = str2int(entries[i]["value"])
+ fail(val != i * 3, "expected value %d, got %d" % (val, i * 3))
+
+ start_test("Test map getnext...")
+ for m in maps:
+ _, entry = bpftool("map getnext id %d" % (m["id"]))
+ key = str2int(entry["next_key"])
+ fail(key != 0, "next key %d, expected %d" % (key, 0))
+ _, entry = bpftool("map getnext id %d key %s" %
+ (m["id"], int2str("I", 0)))
+ key = str2int(entry["next_key"])
+ fail(key != 1, "next key %d, expected %d" % (key, 1))
+ ret, err = bpftool("map getnext id %d key %s" %
+ (m["id"], int2str("I", 1)), fail=False)
+ fail(ret == 0, "got next key past the end of map")
+ fail(err["error"].find("No such file or directory") == -1,
+ "expected ENOENT, error is '%s'" % (err["error"]))
+
+ start_test("Test map delete (htab)...")
+ for i in range(2):
+ bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i)))
+
+ start_test("Test map delete (array)...")
+ for i in range(2):
+ ret, err = bpftool("map delete id %d key %s" %
+ (htab["id"], int2str("I", i)), fail=False)
+ fail(ret == 0, "removed entry from an array")
+ fail(err["error"].find("No such file or directory") == -1,
+ "expected ENOENT, error is '%s'" % (err["error"]))
+
+ start_test("Test map remove...")
+ sim.unset_xdp("offload")
+ bpftool_map_list_wait(expected=0)
+ sim.remove()
+
+ sim = NetdevSim()
+ sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
+ sim.remove()
+ bpftool_map_list_wait(expected=0)
+
+ start_test("Test map creation fail path...")
+ sim = NetdevSim()
+ sim.dfs["bpf_map_accept"] = "N"
+ ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False)
+ fail(ret == 0,
+ "netdevsim didn't refuse to create a map with offload disabled")
+
+ sim.remove()
+
+ start_test("Test multi-dev ASIC program reuse...")
+ simA = NetdevSim()
+ simB1 = NetdevSim()
+ simB2 = NetdevSim(link=simB1)
+ simB3 = NetdevSim(link=simB1)
+ sims = (simA, simB1, simB2, simB3)
+ simB = (simB1, simB2, simB3)
+
+ bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA",
+ dev=simA['ifname'])
+ progA = bpf_pinned("/sys/fs/bpf/nsimA")
+ bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB",
+ dev=simB1['ifname'])
+ progB = bpf_pinned("/sys/fs/bpf/nsimB")
+
+ simA.set_xdp(progA, "offload", JSON=False)
+ for d in simB:
+ d.set_xdp(progB, "offload", JSON=False)
+
+ start_test("Test multi-dev ASIC cross-dev replace...")
+ ret, _ = simA.set_xdp(progB, "offload", force=True, JSON=False, fail=False)
+ fail(ret == 0, "cross-ASIC program allowed")
+ for d in simB:
+ ret, _ = d.set_xdp(progA, "offload", force=True, JSON=False, fail=False)
+ fail(ret == 0, "cross-ASIC program allowed")
+
+ start_test("Test multi-dev ASIC cross-dev install...")
+ for d in sims:
+ d.unset_xdp("offload")
+
+ ret, _, err = simA.set_xdp(progB, "offload", force=True, JSON=False,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "cross-ASIC program allowed")
+ check_extack_nsim(err, "program bound to different dev.", args)
+ for d in simB:
+ ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "cross-ASIC program allowed")
+ check_extack_nsim(err, "program bound to different dev.", args)
+
+ start_test("Test multi-dev ASIC cross-dev map reuse...")
+
+ mapA = bpftool("prog show %s" % (progA))[1]["map_ids"][0]
+ mapB = bpftool("prog show %s" % (progB))[1]["map_ids"][0]
+
+ ret, _ = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+ dev=simB3['ifname'],
+ maps=["idx 0 id %d" % (mapB)],
+ fail=False)
+ fail(ret != 0, "couldn't reuse a map on the same ASIC")
+ rm("/sys/fs/bpf/nsimB_")
+
+ ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA_",
+ dev=simA['ifname'],
+ maps=["idx 0 id %d" % (mapB)],
+ fail=False, include_stderr=True)
+ fail(ret == 0, "could reuse a map on a different ASIC")
+ fail(err.count("offload device mismatch between prog and map") == 0,
+ "error message missing for cross-ASIC map")
+
+ ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+ dev=simB1['ifname'],
+ maps=["idx 0 id %d" % (mapA)],
+ fail=False, include_stderr=True)
+ fail(ret == 0, "could reuse a map on a different ASIC")
+ fail(err.count("offload device mismatch between prog and map") == 0,
+ "error message missing for cross-ASIC map")
+
+ start_test("Test multi-dev ASIC cross-dev destruction...")
+ bpftool_prog_list_wait(expected=2)
+
+ simA.remove()
+ bpftool_prog_list_wait(expected=1)
+
+ ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+ fail(ifnameB != simB1['ifname'], "program not bound to originial device")
+ simB1.remove()
+ bpftool_prog_list_wait(expected=1)
+
+ start_test("Test multi-dev ASIC cross-dev destruction - move...")
+ ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+ fail(ifnameB not in (simB2['ifname'], simB3['ifname']),
+ "program not bound to remaining devices")
+
+ simB2.remove()
+ ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"]
+ fail(ifnameB != simB3['ifname'], "program not bound to remaining device")
+
+ simB3.remove()
+ bpftool_prog_list_wait(expected=0)
+
+ start_test("Test multi-dev ASIC cross-dev destruction - orphaned...")
+ ret, out = bpftool("prog show %s" % (progB), fail=False)
+ fail(ret == 0, "got information about orphaned program")
+ fail("error" not in out, "no error reported for get info on orphaned")
+ fail(out["error"] != "can't get prog info: No such device",
+ "wrong error for get info on orphaned")
+
+ print("%s: OK" % (os.path.basename(__file__)))
+
+finally:
+ log("Clean up...", "", level=1)
+ log_level_inc()
+ clean_up()
diff --git a/tools/testing/selftests/bpf/test_pkt_access.c b/tools/testing/selftests/bpf/test_pkt_access.c
new file mode 100644
index 000000000..6e11ba117
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_pkt_access.c
@@ -0,0 +1,65 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+int _version SEC("version") = 1;
+
+SEC("test1")
+int process(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth = (struct ethhdr *)(data);
+ struct tcphdr *tcp = NULL;
+ __u8 proto = 255;
+ __u64 ihl_len;
+
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)(eth + 1);
+
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+ ihl_len = iph->ihl * 4;
+ proto = iph->protocol;
+ tcp = (struct tcphdr *)((void *)(iph) + ihl_len);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(eth + 1);
+
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+ ihl_len = sizeof(*ip6h);
+ proto = ip6h->nexthdr;
+ tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len);
+ }
+
+ if (tcp) {
+ if (((void *)(tcp) + 20) > data_end || proto != 6)
+ return TC_ACT_SHOT;
+ barrier(); /* to force ordering of checks */
+ if (((void *)(tcp) + 18) > data_end)
+ return TC_ACT_SHOT;
+ if (tcp->urg_ptr == 123)
+ return TC_ACT_OK;
+ }
+
+ return TC_ACT_UNSPEC;
+}
diff --git a/tools/testing/selftests/bpf/test_pkt_md_access.c b/tools/testing/selftests/bpf/test_pkt_md_access.c
new file mode 100644
index 000000000..7956302ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_pkt_md_access.c
@@ -0,0 +1,46 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define TEST_FIELD(TYPE, FIELD, MASK) \
+ { \
+ TYPE tmp = *(volatile TYPE *)&skb->FIELD; \
+ if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK)) \
+ return TC_ACT_SHOT; \
+ }
+#else
+#define TEST_FIELD_OFFSET(a, b) ((sizeof(a) - sizeof(b)) / sizeof(b))
+#define TEST_FIELD(TYPE, FIELD, MASK) \
+ { \
+ TYPE tmp = *((volatile TYPE *)&skb->FIELD + \
+ TEST_FIELD_OFFSET(skb->FIELD, TYPE)); \
+ if (tmp != ((*(volatile __u32 *)&skb->FIELD) & MASK)) \
+ return TC_ACT_SHOT; \
+ }
+#endif
+
+SEC("test1")
+int process(struct __sk_buff *skb)
+{
+ TEST_FIELD(__u8, len, 0xFF);
+ TEST_FIELD(__u16, len, 0xFFFF);
+ TEST_FIELD(__u32, len, 0xFFFFFFFF);
+ TEST_FIELD(__u16, protocol, 0xFFFF);
+ TEST_FIELD(__u32, protocol, 0xFFFFFFFF);
+ TEST_FIELD(__u8, hash, 0xFF);
+ TEST_FIELD(__u16, hash, 0xFFFF);
+ TEST_FIELD(__u32, hash, 0xFFFFFFFF);
+
+ return TC_ACT_OK;
+}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
new file mode 100644
index 000000000..0fcd38ffc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -0,0 +1,1756 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include <linux/types.h>
+typedef __u16 __sum16;
+#include <arpa/inet.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/filter.h>
+#include <linux/perf_event.h>
+#include <linux/unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_iptunnel_common.h"
+#include "bpf_util.h"
+#include "bpf_endian.h"
+#include "bpf_rlimit.h"
+#include "trace_helpers.h"
+
+static int error_cnt, pass_cnt;
+static bool jit_enabled;
+
+#define MAGIC_BYTES 123
+
+/* ipv4 test vector */
+static struct {
+ struct ethhdr eth;
+ struct iphdr iph;
+ struct tcphdr tcp;
+} __packed pkt_v4 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = 6,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.urg_ptr = 123,
+};
+
+/* ipv6 test vector */
+static struct {
+ struct ethhdr eth;
+ struct ipv6hdr iph;
+ struct tcphdr tcp;
+} __packed pkt_v6 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = 6,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.urg_ptr = 123,
+};
+
+#define CHECK(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ error_cnt++; \
+ printf("%s:FAIL:%s ", __func__, tag); \
+ printf(format); \
+ } else { \
+ pass_cnt++; \
+ printf("%s:PASS:%s %d nsec\n", __func__, tag, duration);\
+ } \
+ __ret; \
+})
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_map *map;
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (!map) {
+ printf("%s:FAIL:map '%s' not found\n", test, name);
+ error_cnt++;
+ return -1;
+ }
+ return bpf_map__fd(map);
+}
+
+static void test_pkt_access(void)
+{
+ const char *file = "./test_pkt_access.o";
+ struct bpf_object *obj;
+ __u32 duration, retval;
+ int err, prog_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || errno || retval, "ipv4",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || errno || retval, "ipv6",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+ bpf_object__close(obj);
+}
+
+static void test_xdp(void)
+{
+ struct vip key4 = {.protocol = 6, .family = AF_INET};
+ struct vip key6 = {.protocol = 6, .family = AF_INET6};
+ struct iptnl_info value4 = {.family = AF_INET};
+ struct iptnl_info value6 = {.family = AF_INET6};
+ const char *file = "./test_xdp.o";
+ struct bpf_object *obj;
+ char buf[128];
+ struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr);
+ struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+ __u32 duration, retval, size;
+ int err, prog_fd, map_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ map_fd = bpf_find_map(__func__, obj, "vip2tnl");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &key4, &value4, 0);
+ bpf_map_update_elem(map_fd, &key6, &value6, 0);
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+
+ CHECK(err || errno || retval != XDP_TX || size != 74 ||
+ iph->protocol != IPPROTO_IPIP, "ipv4",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != XDP_TX || size != 114 ||
+ iph6->nexthdr != IPPROTO_IPV6, "ipv6",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+out:
+ bpf_object__close(obj);
+}
+
+static void test_xdp_adjust_tail(void)
+{
+ const char *file = "./test_adjust_tail.o";
+ struct bpf_object *obj;
+ char buf[128];
+ __u32 duration, retval, size;
+ int err, prog_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+
+ CHECK(err || errno || retval != XDP_DROP,
+ "ipv4", "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != XDP_TX || size != 54,
+ "ipv6", "err %d errno %d retval %d size %d\n",
+ err, errno, retval, size);
+ bpf_object__close(obj);
+}
+
+
+
+#define MAGIC_VAL 0x1234
+#define NUM_ITER 100000
+#define VIP_NUM 5
+
+static void test_l4lb(const char *file)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct vip key = {.protocol = 6};
+ struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+ } value = {.vip_num = VIP_NUM};
+ __u32 stats_key = VIP_NUM;
+ struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+ } stats[nr_cpus];
+ struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+ } real_def = {.dst = MAGIC_VAL};
+ __u32 ch_key = 11, real_num = 3;
+ __u32 duration, retval, size;
+ int err, i, prog_fd, map_fd;
+ __u64 bytes = 0, pkts = 0;
+ struct bpf_object *obj;
+ char buf[128];
+ u32 *magic = (u32 *)buf;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ map_fd = bpf_find_map(__func__, obj, "vip_map");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &key, &value, 0);
+
+ map_fd = bpf_find_map(__func__, obj, "ch_rings");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
+
+ map_fd = bpf_find_map(__func__, obj, "reals");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
+
+ err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
+ *magic != MAGIC_VAL, "ipv4",
+ "err %d errno %d retval %d size %d magic %x\n",
+ err, errno, retval, size, *magic);
+
+ err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
+ *magic != MAGIC_VAL, "ipv6",
+ "err %d errno %d retval %d size %d magic %x\n",
+ err, errno, retval, size, *magic);
+
+ map_fd = bpf_find_map(__func__, obj, "stats");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_lookup_elem(map_fd, &stats_key, stats);
+ for (i = 0; i < nr_cpus; i++) {
+ bytes += stats[i].bytes;
+ pkts += stats[i].pkts;
+ }
+ if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
+ error_cnt++;
+ printf("test_l4lb:FAIL:stats %lld %lld\n", bytes, pkts);
+ }
+out:
+ bpf_object__close(obj);
+}
+
+static void test_l4lb_all(void)
+{
+ const char *file1 = "./test_l4lb.o";
+ const char *file2 = "./test_l4lb_noinline.o";
+
+ test_l4lb(file1);
+ test_l4lb(file2);
+}
+
+static void test_xdp_noinline(void)
+{
+ const char *file = "./test_xdp_noinline.o";
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct vip key = {.protocol = 6};
+ struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+ } value = {.vip_num = VIP_NUM};
+ __u32 stats_key = VIP_NUM;
+ struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+ } stats[nr_cpus];
+ struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+ } real_def = {.dst = MAGIC_VAL};
+ __u32 ch_key = 11, real_num = 3;
+ __u32 duration, retval, size;
+ int err, i, prog_fd, map_fd;
+ __u64 bytes = 0, pkts = 0;
+ struct bpf_object *obj;
+ char buf[128];
+ u32 *magic = (u32 *)buf;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ map_fd = bpf_find_map(__func__, obj, "vip_map");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &key, &value, 0);
+
+ map_fd = bpf_find_map(__func__, obj, "ch_rings");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
+
+ map_fd = bpf_find_map(__func__, obj, "reals");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
+
+ err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != 1 || size != 54 ||
+ *magic != MAGIC_VAL, "ipv4",
+ "err %d errno %d retval %d size %d magic %x\n",
+ err, errno, retval, size, *magic);
+
+ err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+ buf, &size, &retval, &duration);
+ CHECK(err || errno || retval != 1 || size != 74 ||
+ *magic != MAGIC_VAL, "ipv6",
+ "err %d errno %d retval %d size %d magic %x\n",
+ err, errno, retval, size, *magic);
+
+ map_fd = bpf_find_map(__func__, obj, "stats");
+ if (map_fd < 0)
+ goto out;
+ bpf_map_lookup_elem(map_fd, &stats_key, stats);
+ for (i = 0; i < nr_cpus; i++) {
+ bytes += stats[i].bytes;
+ pkts += stats[i].pkts;
+ }
+ if (bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2) {
+ error_cnt++;
+ printf("test_xdp_noinline:FAIL:stats %lld %lld\n", bytes, pkts);
+ }
+out:
+ bpf_object__close(obj);
+}
+
+static void test_tcp_estats(void)
+{
+ const char *file = "./test_tcp_estats.o";
+ int err, prog_fd;
+ struct bpf_object *obj;
+ __u32 duration = 0;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ CHECK(err, "", "err %d errno %d\n", err, errno);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ bpf_object__close(obj);
+}
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static bool is_jit_enabled(void)
+{
+ const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+ bool enabled = false;
+ int sysctl_fd;
+
+ sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
+ if (sysctl_fd != -1) {
+ char tmpc;
+
+ if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
+ enabled = (tmpc != '0');
+ close(sysctl_fd);
+ }
+
+ return enabled;
+}
+
+static void test_bpf_obj_id(void)
+{
+ const __u64 array_magic_value = 0xfaceb00c;
+ const __u32 array_key = 0;
+ const int nr_iters = 2;
+ const char *file = "./test_obj_id.o";
+ const char *expected_prog_name = "test_obj_id";
+ const char *expected_map_name = "test_map_id";
+ const __u64 nsec_per_sec = 1000000000;
+
+ struct bpf_object *objs[nr_iters];
+ int prog_fds[nr_iters], map_fds[nr_iters];
+ /* +1 to test for the info_len returned by kernel */
+ struct bpf_prog_info prog_infos[nr_iters + 1];
+ struct bpf_map_info map_infos[nr_iters + 1];
+ /* Each prog only uses one map. +1 to test nr_map_ids
+ * returned by kernel.
+ */
+ __u32 map_ids[nr_iters + 1];
+ char jited_insns[128], xlated_insns[128], zeros[128];
+ __u32 i, next_id, info_len, nr_id_found, duration = 0;
+ struct timespec real_time_ts, boot_time_ts;
+ int err = 0;
+ __u64 array_value;
+ uid_t my_uid = getuid();
+ time_t now, load_time;
+
+ err = bpf_prog_get_fd_by_id(0);
+ CHECK(err >= 0 || errno != ENOENT,
+ "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
+
+ err = bpf_map_get_fd_by_id(0);
+ CHECK(err >= 0 || errno != ENOENT,
+ "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
+
+ for (i = 0; i < nr_iters; i++)
+ objs[i] = NULL;
+
+ /* Check bpf_obj_get_info_by_fd() */
+ bzero(zeros, sizeof(zeros));
+ for (i = 0; i < nr_iters; i++) {
+ now = time(NULL);
+ err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER,
+ &objs[i], &prog_fds[i]);
+ /* test_obj_id.o is a dumb prog. It should never fail
+ * to load.
+ */
+ if (err)
+ error_cnt++;
+ assert(!err);
+
+ /* Insert a magic value to the map */
+ map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
+ assert(map_fds[i] >= 0);
+ err = bpf_map_update_elem(map_fds[i], &array_key,
+ &array_magic_value, 0);
+ assert(!err);
+
+ /* Check getting map info */
+ info_len = sizeof(struct bpf_map_info) * 2;
+ bzero(&map_infos[i], info_len);
+ err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i],
+ &info_len);
+ if (CHECK(err ||
+ map_infos[i].type != BPF_MAP_TYPE_ARRAY ||
+ map_infos[i].key_size != sizeof(__u32) ||
+ map_infos[i].value_size != sizeof(__u64) ||
+ map_infos[i].max_entries != 1 ||
+ map_infos[i].map_flags != 0 ||
+ info_len != sizeof(struct bpf_map_info) ||
+ strcmp((char *)map_infos[i].name, expected_map_name),
+ "get-map-info(fd)",
+ "err %d errno %d type %d(%d) info_len %u(%Zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
+ err, errno,
+ map_infos[i].type, BPF_MAP_TYPE_ARRAY,
+ info_len, sizeof(struct bpf_map_info),
+ map_infos[i].key_size,
+ map_infos[i].value_size,
+ map_infos[i].max_entries,
+ map_infos[i].map_flags,
+ map_infos[i].name, expected_map_name))
+ goto done;
+
+ /* Check getting prog info */
+ info_len = sizeof(struct bpf_prog_info) * 2;
+ bzero(&prog_infos[i], info_len);
+ bzero(jited_insns, sizeof(jited_insns));
+ bzero(xlated_insns, sizeof(xlated_insns));
+ prog_infos[i].jited_prog_insns = ptr_to_u64(jited_insns);
+ prog_infos[i].jited_prog_len = sizeof(jited_insns);
+ prog_infos[i].xlated_prog_insns = ptr_to_u64(xlated_insns);
+ prog_infos[i].xlated_prog_len = sizeof(xlated_insns);
+ prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
+ prog_infos[i].nr_map_ids = 2;
+ err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
+ assert(!err);
+ err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
+ assert(!err);
+ err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i],
+ &info_len);
+ load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
+ + (prog_infos[i].load_time / nsec_per_sec);
+ if (CHECK(err ||
+ prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
+ info_len != sizeof(struct bpf_prog_info) ||
+ (jit_enabled && !prog_infos[i].jited_prog_len) ||
+ (jit_enabled &&
+ !memcmp(jited_insns, zeros, sizeof(zeros))) ||
+ !prog_infos[i].xlated_prog_len ||
+ !memcmp(xlated_insns, zeros, sizeof(zeros)) ||
+ load_time < now - 60 || load_time > now + 60 ||
+ prog_infos[i].created_by_uid != my_uid ||
+ prog_infos[i].nr_map_ids != 1 ||
+ *(int *)prog_infos[i].map_ids != map_infos[i].id ||
+ strcmp((char *)prog_infos[i].name, expected_prog_name),
+ "get-prog-info(fd)",
+ "err %d errno %d i %d type %d(%d) info_len %u(%Zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+ err, errno, i,
+ prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
+ info_len, sizeof(struct bpf_prog_info),
+ jit_enabled,
+ prog_infos[i].jited_prog_len,
+ prog_infos[i].xlated_prog_len,
+ !!memcmp(jited_insns, zeros, sizeof(zeros)),
+ !!memcmp(xlated_insns, zeros, sizeof(zeros)),
+ load_time, now,
+ prog_infos[i].created_by_uid, my_uid,
+ prog_infos[i].nr_map_ids, 1,
+ *(int *)prog_infos[i].map_ids, map_infos[i].id,
+ prog_infos[i].name, expected_prog_name))
+ goto done;
+ }
+
+ /* Check bpf_prog_get_next_id() */
+ nr_id_found = 0;
+ next_id = 0;
+ while (!bpf_prog_get_next_id(next_id, &next_id)) {
+ struct bpf_prog_info prog_info = {};
+ __u32 saved_map_id;
+ int prog_fd;
+
+ info_len = sizeof(prog_info);
+
+ prog_fd = bpf_prog_get_fd_by_id(next_id);
+ if (prog_fd < 0 && errno == ENOENT)
+ /* The bpf_prog is in the dead row */
+ continue;
+ if (CHECK(prog_fd < 0, "get-prog-fd(next_id)",
+ "prog_fd %d next_id %d errno %d\n",
+ prog_fd, next_id, errno))
+ break;
+
+ for (i = 0; i < nr_iters; i++)
+ if (prog_infos[i].id == next_id)
+ break;
+
+ if (i == nr_iters)
+ continue;
+
+ nr_id_found++;
+
+ /* Negative test:
+ * prog_info.nr_map_ids = 1
+ * prog_info.map_ids = NULL
+ */
+ prog_info.nr_map_ids = 1;
+ err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ if (CHECK(!err || errno != EFAULT,
+ "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)",
+ err, errno, EFAULT))
+ break;
+ bzero(&prog_info, sizeof(prog_info));
+ info_len = sizeof(prog_info);
+
+ saved_map_id = *(int *)(prog_infos[i].map_ids);
+ prog_info.map_ids = prog_infos[i].map_ids;
+ prog_info.nr_map_ids = 2;
+ err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ prog_infos[i].jited_prog_insns = 0;
+ prog_infos[i].xlated_prog_insns = 0;
+ CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
+ memcmp(&prog_info, &prog_infos[i], info_len) ||
+ *(int *)prog_info.map_ids != saved_map_id,
+ "get-prog-info(next_id->fd)",
+ "err %d errno %d info_len %u(%Zu) memcmp %d map_id %u(%u)\n",
+ err, errno, info_len, sizeof(struct bpf_prog_info),
+ memcmp(&prog_info, &prog_infos[i], info_len),
+ *(int *)prog_info.map_ids, saved_map_id);
+ close(prog_fd);
+ }
+ CHECK(nr_id_found != nr_iters,
+ "check total prog id found by get_next_id",
+ "nr_id_found %u(%u)\n",
+ nr_id_found, nr_iters);
+
+ /* Check bpf_map_get_next_id() */
+ nr_id_found = 0;
+ next_id = 0;
+ while (!bpf_map_get_next_id(next_id, &next_id)) {
+ struct bpf_map_info map_info = {};
+ int map_fd;
+
+ info_len = sizeof(map_info);
+
+ map_fd = bpf_map_get_fd_by_id(next_id);
+ if (map_fd < 0 && errno == ENOENT)
+ /* The bpf_map is in the dead row */
+ continue;
+ if (CHECK(map_fd < 0, "get-map-fd(next_id)",
+ "map_fd %d next_id %u errno %d\n",
+ map_fd, next_id, errno))
+ break;
+
+ for (i = 0; i < nr_iters; i++)
+ if (map_infos[i].id == next_id)
+ break;
+
+ if (i == nr_iters)
+ continue;
+
+ nr_id_found++;
+
+ err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
+ assert(!err);
+
+ err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+ CHECK(err || info_len != sizeof(struct bpf_map_info) ||
+ memcmp(&map_info, &map_infos[i], info_len) ||
+ array_value != array_magic_value,
+ "check get-map-info(next_id->fd)",
+ "err %d errno %d info_len %u(%Zu) memcmp %d array_value %llu(%llu)\n",
+ err, errno, info_len, sizeof(struct bpf_map_info),
+ memcmp(&map_info, &map_infos[i], info_len),
+ array_value, array_magic_value);
+
+ close(map_fd);
+ }
+ CHECK(nr_id_found != nr_iters,
+ "check total map id found by get_next_id",
+ "nr_id_found %u(%u)\n",
+ nr_id_found, nr_iters);
+
+done:
+ for (i = 0; i < nr_iters; i++)
+ bpf_object__close(objs[i]);
+}
+
+static void test_pkt_md_access(void)
+{
+ const char *file = "./test_pkt_md_access.o";
+ struct bpf_object *obj;
+ __u32 duration, retval;
+ int err, prog_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (err) {
+ error_cnt++;
+ return;
+ }
+
+ err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+
+ bpf_object__close(obj);
+}
+
+static void test_obj_name(void)
+{
+ struct {
+ const char *name;
+ int success;
+ int expected_errno;
+ } tests[] = {
+ { "", 1, 0 },
+ { "_123456789ABCDE", 1, 0 },
+ { "_123456789ABCDEF", 0, EINVAL },
+ { "_123456789ABCD\n", 0, EINVAL },
+ };
+ struct bpf_insn prog[] = {
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ __u32 duration = 0;
+ int i;
+
+ for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ size_t name_len = strlen(tests[i].name) + 1;
+ union bpf_attr attr;
+ size_t ncopy;
+ int fd;
+
+ /* test different attr.prog_name during BPF_PROG_LOAD */
+ ncopy = name_len < sizeof(attr.prog_name) ?
+ name_len : sizeof(attr.prog_name);
+ bzero(&attr, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SCHED_CLS;
+ attr.insn_cnt = 2;
+ attr.insns = ptr_to_u64(prog);
+ attr.license = ptr_to_u64("");
+ memcpy(attr.prog_name, tests[i].name, ncopy);
+
+ fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ CHECK((tests[i].success && fd < 0) ||
+ (!tests[i].success && fd != -1) ||
+ (!tests[i].success && errno != tests[i].expected_errno),
+ "check-bpf-prog-name",
+ "fd %d(%d) errno %d(%d)\n",
+ fd, tests[i].success, errno, tests[i].expected_errno);
+
+ if (fd != -1)
+ close(fd);
+
+ /* test different attr.map_name during BPF_MAP_CREATE */
+ ncopy = name_len < sizeof(attr.map_name) ?
+ name_len : sizeof(attr.map_name);
+ bzero(&attr, sizeof(attr));
+ attr.map_type = BPF_MAP_TYPE_ARRAY;
+ attr.key_size = 4;
+ attr.value_size = 4;
+ attr.max_entries = 1;
+ attr.map_flags = 0;
+ memcpy(attr.map_name, tests[i].name, ncopy);
+ fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
+ CHECK((tests[i].success && fd < 0) ||
+ (!tests[i].success && fd != -1) ||
+ (!tests[i].success && errno != tests[i].expected_errno),
+ "check-bpf-map-name",
+ "fd %d(%d) errno %d(%d)\n",
+ fd, tests[i].success, errno, tests[i].expected_errno);
+
+ if (fd != -1)
+ close(fd);
+ }
+}
+
+static void test_tp_attach_query(void)
+{
+ const int num_progs = 3;
+ int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs];
+ __u32 duration = 0, info_len, saved_prog_ids[num_progs];
+ const char *file = "./test_tracepoint.o";
+ struct perf_event_query_bpf *query;
+ struct perf_event_attr attr = {};
+ struct bpf_object *obj[num_progs];
+ struct bpf_prog_info prog_info;
+ char buf[256];
+
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ return;
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+ "read", "bytes %d errno %d\n", bytes, errno))
+ return;
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ query = malloc(sizeof(*query) + sizeof(__u32) * num_progs);
+ for (i = 0; i < num_progs; i++) {
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i],
+ &prog_fd[i]);
+ if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ goto cleanup1;
+
+ bzero(&prog_info, sizeof(prog_info));
+ prog_info.jited_prog_len = 0;
+ prog_info.xlated_prog_len = 0;
+ prog_info.nr_map_ids = 0;
+ info_len = sizeof(prog_info);
+ err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len);
+ if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n",
+ err, errno))
+ goto cleanup1;
+ saved_prog_ids[i] = prog_info.id;
+
+ pmu_fd[i] = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(pmu_fd[i] < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd[i], errno))
+ goto cleanup2;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+ err, errno))
+ goto cleanup3;
+
+ if (i == 0) {
+ /* check NULL prog array query */
+ query->ids_len = num_progs;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ if (CHECK(err || query->prog_cnt != 0,
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d query->prog_cnt %u\n",
+ err, errno, query->prog_cnt))
+ goto cleanup3;
+ }
+
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[i]);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+ err, errno))
+ goto cleanup3;
+
+ if (i == 1) {
+ /* try to get # of programs only */
+ query->ids_len = 0;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ if (CHECK(err || query->prog_cnt != 2,
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d query->prog_cnt %u\n",
+ err, errno, query->prog_cnt))
+ goto cleanup3;
+
+ /* try a few negative tests */
+ /* invalid query pointer */
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF,
+ (struct perf_event_query_bpf *)0x1);
+ if (CHECK(!err || errno != EFAULT,
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d\n", err, errno))
+ goto cleanup3;
+
+ /* no enough space */
+ query->ids_len = 1;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ if (CHECK(!err || errno != ENOSPC || query->prog_cnt != 2,
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d query->prog_cnt %u\n",
+ err, errno, query->prog_cnt))
+ goto cleanup3;
+ }
+
+ query->ids_len = num_progs;
+ err = ioctl(pmu_fd[i], PERF_EVENT_IOC_QUERY_BPF, query);
+ if (CHECK(err || query->prog_cnt != (i + 1),
+ "perf_event_ioc_query_bpf",
+ "err %d errno %d query->prog_cnt %u\n",
+ err, errno, query->prog_cnt))
+ goto cleanup3;
+ for (j = 0; j < i + 1; j++)
+ if (CHECK(saved_prog_ids[j] != query->ids[j],
+ "perf_event_ioc_query_bpf",
+ "#%d saved_prog_id %x query prog_id %x\n",
+ j, saved_prog_ids[j], query->ids[j]))
+ goto cleanup3;
+ }
+
+ i = num_progs - 1;
+ for (; i >= 0; i--) {
+ cleanup3:
+ ioctl(pmu_fd[i], PERF_EVENT_IOC_DISABLE);
+ cleanup2:
+ close(pmu_fd[i]);
+ cleanup1:
+ bpf_object__close(obj[i]);
+ }
+ free(query);
+}
+
+static int compare_map_keys(int map1_fd, int map2_fd)
+{
+ __u32 key, next_key;
+ char val_buf[PERF_MAX_STACK_DEPTH *
+ sizeof(struct bpf_stack_build_id)];
+ int err;
+
+ err = bpf_map_get_next_key(map1_fd, NULL, &key);
+ if (err)
+ return err;
+ err = bpf_map_lookup_elem(map2_fd, &key, val_buf);
+ if (err)
+ return err;
+
+ while (bpf_map_get_next_key(map1_fd, &key, &next_key) == 0) {
+ err = bpf_map_lookup_elem(map2_fd, &next_key, val_buf);
+ if (err)
+ return err;
+
+ key = next_key;
+ }
+ if (errno != ENOENT)
+ return -1;
+
+ return 0;
+}
+
+static int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len)
+{
+ __u32 key, next_key, *cur_key_p, *next_key_p;
+ char *val_buf1, *val_buf2;
+ int i, err = 0;
+
+ val_buf1 = malloc(stack_trace_len);
+ val_buf2 = malloc(stack_trace_len);
+ cur_key_p = NULL;
+ next_key_p = &key;
+ while (bpf_map_get_next_key(smap_fd, cur_key_p, next_key_p) == 0) {
+ err = bpf_map_lookup_elem(smap_fd, next_key_p, val_buf1);
+ if (err)
+ goto out;
+ err = bpf_map_lookup_elem(amap_fd, next_key_p, val_buf2);
+ if (err)
+ goto out;
+ for (i = 0; i < stack_trace_len; i++) {
+ if (val_buf1[i] != val_buf2[i]) {
+ err = -1;
+ goto out;
+ }
+ }
+ key = *next_key_p;
+ cur_key_p = &key;
+ next_key_p = &next_key;
+ }
+ if (errno != ENOENT)
+ err = -1;
+
+out:
+ free(val_buf1);
+ free(val_buf2);
+ return err;
+}
+
+static void test_stacktrace_map()
+{
+ int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+ const char *file = "./test_stacktrace_map.o";
+ int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
+ struct perf_event_attr attr = {};
+ __u32 key, val, duration = 0;
+ struct bpf_object *obj;
+ char buf[256];
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ return;
+
+ /* Get the ID for the sched/sched_switch tracepoint */
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (bytes <= 0 || bytes >= sizeof(buf))
+ goto close_prog;
+
+ /* Open the perf event and attach bpf progrram */
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (err)
+ goto disable_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (err)
+ goto disable_pmu;
+
+ /* find map fds */
+ control_map_fd = bpf_find_map(__func__, obj, "control_map");
+ if (control_map_fd < 0)
+ goto disable_pmu;
+
+ stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+ if (stackid_hmap_fd < 0)
+ goto disable_pmu;
+
+ stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+ if (stackmap_fd < 0)
+ goto disable_pmu;
+
+ stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+ if (stack_amap_fd < 0)
+ goto disable_pmu;
+
+ /* give some time for bpf program run */
+ sleep(1);
+
+ /* disable stack trace collection */
+ key = 0;
+ val = 1;
+ bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vise versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu_noerr;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu_noerr;
+
+ stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
+ err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+ if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu_noerr;
+
+ goto disable_pmu_noerr;
+disable_pmu:
+ error_cnt++;
+disable_pmu_noerr:
+ ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+ close(pmu_fd);
+close_prog:
+ bpf_object__close(obj);
+}
+
+static void test_stacktrace_map_raw_tp()
+{
+ int control_map_fd, stackid_hmap_fd, stackmap_fd;
+ const char *file = "./test_stacktrace_map.o";
+ int efd, err, prog_fd;
+ __u32 key, val, duration = 0;
+ struct bpf_object *obj;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ return;
+
+ efd = bpf_raw_tracepoint_open("sched_switch", prog_fd);
+ if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ /* find map fds */
+ control_map_fd = bpf_find_map(__func__, obj, "control_map");
+ if (control_map_fd < 0)
+ goto close_prog;
+
+ stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+ if (stackid_hmap_fd < 0)
+ goto close_prog;
+
+ stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+ if (stackmap_fd < 0)
+ goto close_prog;
+
+ /* give some time for bpf program run */
+ sleep(1);
+
+ /* disable stack trace collection */
+ key = 0;
+ val = 1;
+ bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vise versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+ "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ goto close_prog_noerr;
+close_prog:
+ error_cnt++;
+close_prog_noerr:
+ bpf_object__close(obj);
+}
+
+static int extract_build_id(char *build_id, size_t size)
+{
+ FILE *fp;
+ char *line = NULL;
+ size_t len = 0;
+
+ fp = popen("readelf -n ./urandom_read | grep 'Build ID'", "r");
+ if (fp == NULL)
+ return -1;
+
+ if (getline(&line, &len, fp) == -1)
+ goto err;
+ pclose(fp);
+
+ if (len > size)
+ len = size;
+ memcpy(build_id, line, len);
+ build_id[len] = '\0';
+ free(line);
+ return 0;
+err:
+ pclose(fp);
+ return -1;
+}
+
+static void test_stacktrace_build_id(void)
+{
+ int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+ const char *file = "./test_stacktrace_build_id.o";
+ int bytes, efd, err, pmu_fd, prog_fd, stack_trace_len;
+ struct perf_event_attr attr = {};
+ __u32 key, previous_key, val, duration = 0;
+ struct bpf_object *obj;
+ char buf[256];
+ int i, j;
+ struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+ int build_id_matches = 0;
+ int retry = 1;
+
+retry:
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ goto out;
+
+ /* Get the ID for the sched/sched_switch tracepoint */
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/random/urandom_read/id");
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf),
+ "read", "bytes %d errno %d\n", bytes, errno))
+ goto close_prog;
+
+ /* Open the perf event and attach bpf progrram */
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+ err, errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ /* find map fds */
+ control_map_fd = bpf_find_map(__func__, obj, "control_map");
+ if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+ if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+ if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+ if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+ == 0);
+ assert(system("./urandom_read") == 0);
+ /* disable stack trace collection */
+ key = 0;
+ val = 1;
+ bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vise versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = extract_build_id(buf, 256);
+
+ if (CHECK(err, "get build_id with readelf",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+ if (CHECK(err, "get_next_key from stackmap",
+ "err %d, errno %d\n", err, errno))
+ goto disable_pmu;
+
+ do {
+ char build_id[64];
+
+ err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+ if (CHECK(err, "lookup_elem from stackmap",
+ "err %d, errno %d\n", err, errno))
+ goto disable_pmu;
+ for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+ if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+ id_offs[i].offset != 0) {
+ for (j = 0; j < 20; ++j)
+ sprintf(build_id + 2 * j, "%02x",
+ id_offs[i].build_id[j] & 0xff);
+ if (strstr(buf, build_id) != NULL)
+ build_id_matches = 1;
+ }
+ previous_key = key;
+ } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+ /* stack_map_get_build_id_offset() is racy and sometimes can return
+ * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
+ * try it one more time.
+ */
+ if (build_id_matches < 1 && retry--) {
+ ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+ close(pmu_fd);
+ bpf_object__close(obj);
+ printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
+ __func__);
+ goto retry;
+ }
+
+ if (CHECK(build_id_matches < 1, "build id match",
+ "Didn't find expected build ID from the map\n"))
+ goto disable_pmu;
+
+ stack_trace_len = PERF_MAX_STACK_DEPTH
+ * sizeof(struct bpf_stack_build_id);
+ err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+ CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
+ "err %d errno %d\n", err, errno);
+
+disable_pmu:
+ ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+ close(pmu_fd);
+
+close_prog:
+ bpf_object__close(obj);
+
+out:
+ return;
+}
+
+static void test_stacktrace_build_id_nmi(void)
+{
+ int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+ const char *file = "./test_stacktrace_build_id.o";
+ int err, pmu_fd, prog_fd;
+ struct perf_event_attr attr = {
+ .sample_freq = 5000,
+ .freq = 1,
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ };
+ __u32 key, previous_key, val, duration = 0;
+ struct bpf_object *obj;
+ char buf[256];
+ int i, j;
+ struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+ int build_id_matches = 0;
+ int retry = 1;
+
+retry:
+ err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ return;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(pmu_fd < 0, "perf_event_open",
+ "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
+ pmu_fd, errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+ err, errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ /* find map fds */
+ control_map_fd = bpf_find_map(__func__, obj, "control_map");
+ if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+ if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+ if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+ if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+ == 0);
+ assert(system("taskset 0x1 ./urandom_read 100000") == 0);
+ /* disable stack trace collection */
+ key = 0;
+ val = 1;
+ bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vise versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = extract_build_id(buf, 256);
+
+ if (CHECK(err, "get build_id with readelf",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+ if (CHECK(err, "get_next_key from stackmap",
+ "err %d, errno %d\n", err, errno))
+ goto disable_pmu;
+
+ do {
+ char build_id[64];
+
+ err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+ if (CHECK(err, "lookup_elem from stackmap",
+ "err %d, errno %d\n", err, errno))
+ goto disable_pmu;
+ for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+ if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+ id_offs[i].offset != 0) {
+ for (j = 0; j < 20; ++j)
+ sprintf(build_id + 2 * j, "%02x",
+ id_offs[i].build_id[j] & 0xff);
+ if (strstr(buf, build_id) != NULL)
+ build_id_matches = 1;
+ }
+ previous_key = key;
+ } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+ /* stack_map_get_build_id_offset() is racy and sometimes can return
+ * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
+ * try it one more time.
+ */
+ if (build_id_matches < 1 && retry--) {
+ ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+ close(pmu_fd);
+ bpf_object__close(obj);
+ printf("%s:WARN:Didn't find expected build ID from the map, retrying\n",
+ __func__);
+ goto retry;
+ }
+
+ if (CHECK(build_id_matches < 1, "build id match",
+ "Didn't find expected build ID from the map\n"))
+ goto disable_pmu;
+
+ /*
+ * We intentionally skip compare_stack_ips(). This is because we
+ * only support one in_nmi() ips-to-build_id translation per cpu
+ * at any time, thus stack_amap here will always fallback to
+ * BPF_STACK_BUILD_ID_IP;
+ */
+
+disable_pmu:
+ ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+ close(pmu_fd);
+
+close_prog:
+ bpf_object__close(obj);
+}
+
+#define MAX_CNT_RAWTP 10ull
+#define MAX_STACK_RAWTP 100
+struct get_stack_trace_t {
+ int pid;
+ int kern_stack_size;
+ int user_stack_size;
+ int user_stack_buildid_size;
+ __u64 kern_stack[MAX_STACK_RAWTP];
+ __u64 user_stack[MAX_STACK_RAWTP];
+ struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
+};
+
+static int get_stack_print_output(void *data, int size)
+{
+ bool good_kern_stack = false, good_user_stack = false;
+ const char *nonjit_func = "___bpf_prog_run";
+ struct get_stack_trace_t *e = data;
+ int i, num_stack;
+ static __u64 cnt;
+ struct ksym *ks;
+
+ cnt++;
+
+ if (size < sizeof(struct get_stack_trace_t)) {
+ __u64 *raw_data = data;
+ bool found = false;
+
+ num_stack = size / sizeof(__u64);
+ /* If jit is enabled, we do not have a good way to
+ * verify the sanity of the kernel stack. So we
+ * just assume it is good if the stack is not empty.
+ * This could be improved in the future.
+ */
+ if (jit_enabled) {
+ found = num_stack > 0;
+ } else {
+ for (i = 0; i < num_stack; i++) {
+ ks = ksym_search(raw_data[i]);
+ if (strcmp(ks->name, nonjit_func) == 0) {
+ found = true;
+ break;
+ }
+ }
+ }
+ if (found) {
+ good_kern_stack = true;
+ good_user_stack = true;
+ }
+ } else {
+ num_stack = e->kern_stack_size / sizeof(__u64);
+ if (jit_enabled) {
+ good_kern_stack = num_stack > 0;
+ } else {
+ for (i = 0; i < num_stack; i++) {
+ ks = ksym_search(e->kern_stack[i]);
+ if (strcmp(ks->name, nonjit_func) == 0) {
+ good_kern_stack = true;
+ break;
+ }
+ }
+ }
+ if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0)
+ good_user_stack = true;
+ }
+ if (!good_kern_stack || !good_user_stack)
+ return LIBBPF_PERF_EVENT_ERROR;
+
+ if (cnt == MAX_CNT_RAWTP)
+ return LIBBPF_PERF_EVENT_DONE;
+
+ return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void test_get_stack_raw_tp(void)
+{
+ const char *file = "./test_get_stack_rawtp.o";
+ int i, efd, err, prog_fd, pmu_fd, perfmap_fd;
+ struct perf_event_attr attr = {};
+ struct timespec tv = {0, 10};
+ __u32 key = 0, duration = 0;
+ struct bpf_object *obj;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ return;
+
+ efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
+ if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ perfmap_fd = bpf_find_map(__func__, obj, "perfmap");
+ if (CHECK(perfmap_fd < 0, "bpf_find_map", "err %d errno %d\n",
+ perfmap_fd, errno))
+ goto close_prog;
+
+ err = load_kallsyms();
+ if (CHECK(err < 0, "load_kallsyms", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT;
+ pmu_fd = syscall(__NR_perf_event_open, &attr, getpid()/*pid*/, -1/*cpu*/,
+ -1/*group_fd*/, 0);
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n", pmu_fd,
+ errno))
+ goto close_prog;
+
+ err = bpf_map_update_elem(perfmap_fd, &key, &pmu_fd, BPF_ANY);
+ if (CHECK(err < 0, "bpf_map_update_elem", "err %d errno %d\n", err,
+ errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err < 0, "ioctl PERF_EVENT_IOC_ENABLE", "err %d errno %d\n",
+ err, errno))
+ goto close_prog;
+
+ err = perf_event_mmap(pmu_fd);
+ if (CHECK(err < 0, "perf_event_mmap", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ /* trigger some syscall action */
+ for (i = 0; i < MAX_CNT_RAWTP; i++)
+ nanosleep(&tv, NULL);
+
+ err = perf_event_poller(pmu_fd, get_stack_print_output);
+ if (CHECK(err < 0, "perf_event_poller", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ goto close_prog_noerr;
+close_prog:
+ error_cnt++;
+close_prog_noerr:
+ bpf_object__close(obj);
+}
+
+static void test_task_fd_query_rawtp(void)
+{
+ const char *file = "./test_get_stack_rawtp.o";
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ struct bpf_object *obj;
+ int efd, err, prog_fd;
+ __u32 duration = 0;
+ char buf[256];
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ return;
+
+ efd = bpf_raw_tracepoint_open("sys_enter", prog_fd);
+ if (CHECK(efd < 0, "raw_tp_open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+
+ /* query (getpid(), efd) */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
+ errno))
+ goto close_prog;
+
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ strcmp(buf, "sys_enter") == 0;
+ if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
+ fd_type, buf))
+ goto close_prog;
+
+ /* test zero len */
+ len = 0;
+ err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query (len = 0)", "err %d errno %d\n",
+ err, errno))
+ goto close_prog;
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ len == strlen("sys_enter");
+ if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+ goto close_prog;
+
+ /* test empty buffer */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), efd, 0, 0, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query (buf = 0)", "err %d errno %d\n",
+ err, errno))
+ goto close_prog;
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ len == strlen("sys_enter");
+ if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+ goto close_prog;
+
+ /* test smaller buffer */
+ len = 3;
+ err = bpf_task_fd_query(getpid(), efd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err >= 0 || errno != ENOSPC, "bpf_task_fd_query (len = 3)",
+ "err %d errno %d\n", err, errno))
+ goto close_prog;
+ err = fd_type == BPF_FD_TYPE_RAW_TRACEPOINT &&
+ len == strlen("sys_enter") &&
+ strcmp(buf, "sy") == 0;
+ if (CHECK(!err, "check_results", "fd_type %d len %u\n", fd_type, len))
+ goto close_prog;
+
+ goto close_prog_noerr;
+close_prog:
+ error_cnt++;
+close_prog_noerr:
+ bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp_core(const char *probe_name,
+ const char *tp_name)
+{
+ const char *file = "./test_tracepoint.o";
+ int err, bytes, efd, prog_fd, pmu_fd;
+ struct perf_event_attr attr = {};
+ __u64 probe_offset, probe_addr;
+ __u32 len, prog_id, fd_type;
+ struct bpf_object *obj;
+ __u32 duration = 0;
+ char buf[256];
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ efd = open(buf, O_RDONLY, 0);
+ if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
+ goto close_prog;
+ bytes = read(efd, buf, sizeof(buf));
+ close(efd);
+ if (CHECK(bytes <= 0 || bytes >= sizeof(buf), "read",
+ "bytes %d errno %d\n", bytes, errno))
+ goto close_prog;
+
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(err, "perf_event_open", "err %d errno %d\n", err, errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ /* query (getpid(), pmu_fd) */
+ len = sizeof(buf);
+ err = bpf_task_fd_query(getpid(), pmu_fd, 0, buf, &len, &prog_id,
+ &fd_type, &probe_offset, &probe_addr);
+ if (CHECK(err < 0, "bpf_task_fd_query", "err %d errno %d\n", err,
+ errno))
+ goto close_pmu;
+
+ err = (fd_type == BPF_FD_TYPE_TRACEPOINT) && !strcmp(buf, tp_name);
+ if (CHECK(!err, "check_results", "fd_type %d tp_name %s\n",
+ fd_type, buf))
+ goto close_pmu;
+
+ close(pmu_fd);
+ goto close_prog_noerr;
+
+close_pmu:
+ close(pmu_fd);
+close_prog:
+ error_cnt++;
+close_prog_noerr:
+ bpf_object__close(obj);
+}
+
+static void test_task_fd_query_tp(void)
+{
+ test_task_fd_query_tp_core("sched/sched_switch",
+ "sched_switch");
+ test_task_fd_query_tp_core("syscalls/sys_enter_read",
+ "sys_enter_read");
+}
+
+int main(void)
+{
+ jit_enabled = is_jit_enabled();
+
+ test_pkt_access();
+ test_xdp();
+ test_xdp_adjust_tail();
+ test_l4lb_all();
+ test_xdp_noinline();
+ test_tcp_estats();
+ test_bpf_obj_id();
+ test_pkt_md_access();
+ test_obj_name();
+ test_tp_attach_query();
+ test_stacktrace_map();
+ test_stacktrace_build_id();
+ test_stacktrace_build_id_nmi();
+ test_stacktrace_map_raw_tp();
+ test_get_stack_raw_tp();
+ test_task_fd_query_rawtp();
+ test_task_fd_query_tp();
+
+ printf("Summary: %d PASSED, %d FAILED\n", pass_cnt, error_cnt);
+ return error_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/test_select_reuseport.c
new file mode 100644
index 000000000..b14d25bfa
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport.c
@@ -0,0 +1,700 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "test_select_reuseport_common.h"
+
+#define MIN_TCPHDR_LEN 20
+#define UDPHDR_LEN 8
+
+#define TCP_SYNCOOKIE_SYSCTL "/proc/sys/net/ipv4/tcp_syncookies"
+#define TCP_FO_SYSCTL "/proc/sys/net/ipv4/tcp_fastopen"
+#define REUSEPORT_ARRAY_SIZE 32
+
+static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
+static __u32 expected_results[NR_RESULTS];
+static int sk_fds[REUSEPORT_ARRAY_SIZE];
+static int reuseport_array, outer_map;
+static int select_by_skb_data_prog;
+static int saved_tcp_syncookie;
+static struct bpf_object *obj;
+static int saved_tcp_fo;
+static __u32 index_zero;
+static int epfd;
+
+static union sa46 {
+ struct sockaddr_in6 v6;
+ struct sockaddr_in v4;
+ sa_family_t family;
+} srv_sa;
+
+#define CHECK(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
+ printf(format); \
+ exit(-1); \
+ } \
+})
+
+static void create_maps(void)
+{
+ struct bpf_create_map_attr attr = {};
+
+ /* Creating reuseport_array */
+ attr.name = "reuseport_array";
+ attr.map_type = BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
+ attr.key_size = sizeof(__u32);
+ attr.value_size = sizeof(__u32);
+ attr.max_entries = REUSEPORT_ARRAY_SIZE;
+
+ reuseport_array = bpf_create_map_xattr(&attr);
+ CHECK(reuseport_array == -1, "creating reuseport_array",
+ "reuseport_array:%d errno:%d\n", reuseport_array, errno);
+
+ /* Creating outer_map */
+ attr.name = "outer_map";
+ attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
+ attr.key_size = sizeof(__u32);
+ attr.value_size = sizeof(__u32);
+ attr.max_entries = 1;
+ attr.inner_map_fd = reuseport_array;
+ outer_map = bpf_create_map_xattr(&attr);
+ CHECK(outer_map == -1, "creating outer_map",
+ "outer_map:%d errno:%d\n", outer_map, errno);
+}
+
+static void prepare_bpf_obj(void)
+{
+ struct bpf_program *prog;
+ struct bpf_map *map;
+ int err;
+ struct bpf_object_open_attr attr = {
+ .file = "test_select_reuseport_kern.o",
+ .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+ };
+
+ obj = bpf_object__open_xattr(&attr);
+ CHECK(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
+ "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
+
+ prog = bpf_program__next(NULL, obj);
+ CHECK(!prog, "get first bpf_program", "!prog\n");
+ bpf_program__set_type(prog, attr.prog_type);
+
+ map = bpf_object__find_map_by_name(obj, "outer_map");
+ CHECK(!map, "find outer_map", "!map\n");
+ err = bpf_map__reuse_fd(map, outer_map);
+ CHECK(err, "reuse outer_map", "err:%d\n", err);
+
+ err = bpf_object__load(obj);
+ CHECK(err, "load bpf_object", "err:%d\n", err);
+
+ select_by_skb_data_prog = bpf_program__fd(prog);
+ CHECK(select_by_skb_data_prog == -1, "get prog fd",
+ "select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
+
+ map = bpf_object__find_map_by_name(obj, "result_map");
+ CHECK(!map, "find result_map", "!map\n");
+ result_map = bpf_map__fd(map);
+ CHECK(result_map == -1, "get result_map fd",
+ "result_map:%d\n", result_map);
+
+ map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
+ CHECK(!map, "find tmp_index_ovr_map", "!map\n");
+ tmp_index_ovr_map = bpf_map__fd(map);
+ CHECK(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+ "tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
+
+ map = bpf_object__find_map_by_name(obj, "linum_map");
+ CHECK(!map, "find linum_map", "!map\n");
+ linum_map = bpf_map__fd(map);
+ CHECK(linum_map == -1, "get linum_map fd",
+ "linum_map:%d\n", linum_map);
+
+ map = bpf_object__find_map_by_name(obj, "data_check_map");
+ CHECK(!map, "find data_check_map", "!map\n");
+ data_check_map = bpf_map__fd(map);
+ CHECK(data_check_map == -1, "get data_check_map fd",
+ "data_check_map:%d\n", data_check_map);
+}
+
+static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
+{
+ memset(sa, 0, sizeof(*sa));
+ sa->family = family;
+ if (sa->family == AF_INET6)
+ sa->v6.sin6_addr = in6addr_loopback;
+ else
+ sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+}
+
+static void sa46_init_inany(union sa46 *sa, sa_family_t family)
+{
+ memset(sa, 0, sizeof(*sa));
+ sa->family = family;
+ if (sa->family == AF_INET6)
+ sa->v6.sin6_addr = in6addr_any;
+ else
+ sa->v4.sin_addr.s_addr = INADDR_ANY;
+}
+
+static int read_int_sysctl(const char *sysctl)
+{
+ char buf[16];
+ int fd, ret;
+
+ fd = open(sysctl, 0);
+ CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
+ sysctl, fd, errno);
+
+ ret = read(fd, buf, sizeof(buf));
+ CHECK(ret <= 0, "read(sysctl)", "sysctl:%s ret:%d errno:%d\n",
+ sysctl, ret, errno);
+ close(fd);
+
+ return atoi(buf);
+}
+
+static void write_int_sysctl(const char *sysctl, int v)
+{
+ int fd, ret, size;
+ char buf[16];
+
+ fd = open(sysctl, O_RDWR);
+ CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n",
+ sysctl, fd, errno);
+
+ size = snprintf(buf, sizeof(buf), "%d", v);
+ ret = write(fd, buf, size);
+ CHECK(ret != size, "write(sysctl)",
+ "sysctl:%s ret:%d size:%d errno:%d\n", sysctl, ret, size, errno);
+ close(fd);
+}
+
+static void restore_sysctls(void)
+{
+ write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo);
+ write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie);
+}
+
+static void enable_fastopen(void)
+{
+ int fo;
+
+ fo = read_int_sysctl(TCP_FO_SYSCTL);
+ write_int_sysctl(TCP_FO_SYSCTL, fo | 7);
+}
+
+static void enable_syncookie(void)
+{
+ write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2);
+}
+
+static void disable_syncookie(void)
+{
+ write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0);
+}
+
+static __u32 get_linum(void)
+{
+ __u32 linum;
+ int err;
+
+ err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
+ CHECK(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+ err, errno);
+
+ return linum;
+}
+
+static void check_data(int type, sa_family_t family, const struct cmd *cmd,
+ int cli_fd)
+{
+ struct data_check expected = {}, result;
+ union sa46 cli_sa;
+ socklen_t addrlen;
+ int err;
+
+ addrlen = sizeof(cli_sa);
+ err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
+ &addrlen);
+ CHECK(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+ err, errno);
+
+ err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
+ CHECK(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+ err, errno);
+
+ if (type == SOCK_STREAM) {
+ expected.len = MIN_TCPHDR_LEN;
+ expected.ip_protocol = IPPROTO_TCP;
+ } else {
+ expected.len = UDPHDR_LEN;
+ expected.ip_protocol = IPPROTO_UDP;
+ }
+
+ if (family == AF_INET6) {
+ expected.eth_protocol = htons(ETH_P_IPV6);
+ expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
+ !srv_sa.v6.sin6_addr.s6_addr32[2] &&
+ !srv_sa.v6.sin6_addr.s6_addr32[1] &&
+ !srv_sa.v6.sin6_addr.s6_addr32[0];
+
+ memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
+ sizeof(cli_sa.v6.sin6_addr));
+ memcpy(&expected.skb_addrs[4], &in6addr_loopback,
+ sizeof(in6addr_loopback));
+ expected.skb_ports[0] = cli_sa.v6.sin6_port;
+ expected.skb_ports[1] = srv_sa.v6.sin6_port;
+ } else {
+ expected.eth_protocol = htons(ETH_P_IP);
+ expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
+
+ expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
+ expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
+ expected.skb_ports[0] = cli_sa.v4.sin_port;
+ expected.skb_ports[1] = srv_sa.v4.sin_port;
+ }
+
+ if (memcmp(&result, &expected, offsetof(struct data_check,
+ equal_check_end))) {
+ printf("unexpected data_check\n");
+ printf(" result: (0x%x, %u, %u)\n",
+ result.eth_protocol, result.ip_protocol,
+ result.bind_inany);
+ printf("expected: (0x%x, %u, %u)\n",
+ expected.eth_protocol, expected.ip_protocol,
+ expected.bind_inany);
+ CHECK(1, "data_check result != expected",
+ "bpf_prog_linum:%u\n", get_linum());
+ }
+
+ CHECK(!result.hash, "data_check result.hash empty",
+ "result.hash:%u", result.hash);
+
+ expected.len += cmd ? sizeof(*cmd) : 0;
+ if (type == SOCK_STREAM)
+ CHECK(expected.len > result.len, "expected.len > result.len",
+ "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
+ expected.len, result.len, get_linum());
+ else
+ CHECK(expected.len != result.len, "expected.len != result.len",
+ "expected.len:%u result.len:%u bpf_prog_linum:%u\n",
+ expected.len, result.len, get_linum());
+}
+
+static void check_results(void)
+{
+ __u32 results[NR_RESULTS];
+ __u32 i, broken = 0;
+ int err;
+
+ for (i = 0; i < NR_RESULTS; i++) {
+ err = bpf_map_lookup_elem(result_map, &i, &results[i]);
+ CHECK(err == -1, "lookup_elem(result_map)",
+ "i:%u err:%d errno:%d\n", i, err, errno);
+ }
+
+ for (i = 0; i < NR_RESULTS; i++) {
+ if (results[i] != expected_results[i]) {
+ broken = i;
+ break;
+ }
+ }
+
+ if (i == NR_RESULTS)
+ return;
+
+ printf("unexpected result\n");
+ printf(" result: [");
+ printf("%u", results[0]);
+ for (i = 1; i < NR_RESULTS; i++)
+ printf(", %u", results[i]);
+ printf("]\n");
+
+ printf("expected: [");
+ printf("%u", expected_results[0]);
+ for (i = 1; i < NR_RESULTS; i++)
+ printf(", %u", expected_results[i]);
+ printf("]\n");
+
+ CHECK(expected_results[broken] != results[broken],
+ "unexpected result",
+ "expected_results[%u] != results[%u] bpf_prog_linum:%u\n",
+ broken, broken, get_linum());
+}
+
+static int send_data(int type, sa_family_t family, void *data, size_t len,
+ enum result expected)
+{
+ union sa46 cli_sa;
+ int fd, err;
+
+ fd = socket(family, type, 0);
+ CHECK(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
+
+ sa46_init_loopback(&cli_sa, family);
+ err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
+ CHECK(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
+
+ err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
+ sizeof(srv_sa));
+ CHECK(err != len && expected >= PASS,
+ "sendto()", "family:%u err:%d errno:%d expected:%d\n",
+ family, err, errno, expected);
+
+ return fd;
+}
+
+static void do_test(int type, sa_family_t family, struct cmd *cmd,
+ enum result expected)
+{
+ int nev, srv_fd, cli_fd;
+ struct epoll_event ev;
+ struct cmd rcv_cmd;
+ ssize_t nread;
+
+ cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0,
+ expected);
+ nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0);
+ CHECK((nev <= 0 && expected >= PASS) ||
+ (nev > 0 && expected < PASS),
+ "nev <> expected",
+ "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n",
+ nev, expected, type, family,
+ cmd ? cmd->reuseport_index : -1,
+ cmd ? cmd->pass_on_failure : -1);
+ check_results();
+ check_data(type, family, cmd, cli_fd);
+
+ if (expected < PASS)
+ return;
+
+ CHECK(expected != PASS_ERR_SK_SELECT_REUSEPORT &&
+ cmd->reuseport_index != ev.data.u32,
+ "check cmd->reuseport_index",
+ "cmd:(%u, %u) ev.data.u32:%u\n",
+ cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32);
+
+ srv_fd = sk_fds[ev.data.u32];
+ if (type == SOCK_STREAM) {
+ int new_fd = accept(srv_fd, NULL, 0);
+
+ CHECK(new_fd == -1, "accept(srv_fd)",
+ "ev.data.u32:%u new_fd:%d errno:%d\n",
+ ev.data.u32, new_fd, errno);
+
+ nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
+ CHECK(nread != sizeof(rcv_cmd),
+ "recv(new_fd)",
+ "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
+ ev.data.u32, nread, sizeof(rcv_cmd), errno);
+
+ close(new_fd);
+ } else {
+ nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT);
+ CHECK(nread != sizeof(rcv_cmd),
+ "recv(sk_fds)",
+ "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n",
+ ev.data.u32, nread, sizeof(rcv_cmd), errno);
+ }
+
+ close(cli_fd);
+}
+
+static void test_err_inner_map(int type, sa_family_t family)
+{
+ struct cmd cmd = {
+ .reuseport_index = 0,
+ .pass_on_failure = 0,
+ };
+
+ printf("%s: ", __func__);
+ expected_results[DROP_ERR_INNER_MAP]++;
+ do_test(type, family, &cmd, DROP_ERR_INNER_MAP);
+ printf("OK\n");
+}
+
+static void test_err_skb_data(int type, sa_family_t family)
+{
+ printf("%s: ", __func__);
+ expected_results[DROP_ERR_SKB_DATA]++;
+ do_test(type, family, NULL, DROP_ERR_SKB_DATA);
+ printf("OK\n");
+}
+
+static void test_err_sk_select_port(int type, sa_family_t family)
+{
+ struct cmd cmd = {
+ .reuseport_index = REUSEPORT_ARRAY_SIZE,
+ .pass_on_failure = 0,
+ };
+
+ printf("%s: ", __func__);
+ expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++;
+ do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT);
+ printf("OK\n");
+}
+
+static void test_pass(int type, sa_family_t family)
+{
+ struct cmd cmd;
+ int i;
+
+ printf("%s: ", __func__);
+ cmd.pass_on_failure = 0;
+ for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
+ expected_results[PASS]++;
+ cmd.reuseport_index = i;
+ do_test(type, family, &cmd, PASS);
+ }
+ printf("OK\n");
+}
+
+static void test_syncookie(int type, sa_family_t family)
+{
+ int err, tmp_index = 1;
+ struct cmd cmd = {
+ .reuseport_index = 0,
+ .pass_on_failure = 0,
+ };
+
+ if (type != SOCK_STREAM)
+ return;
+
+ printf("%s: ", __func__);
+ /*
+ * +1 for TCP-SYN and
+ * +1 for the TCP-ACK (ack the syncookie)
+ */
+ expected_results[PASS] += 2;
+ enable_syncookie();
+ /*
+ * Simulate TCP-SYN and TCP-ACK are handled by two different sk:
+ * TCP-SYN: select sk_fds[tmp_index = 1] tmp_index is from the
+ * tmp_index_ovr_map
+ * TCP-ACK: select sk_fds[reuseport_index = 0] reuseport_index
+ * is from the cmd.reuseport_index
+ */
+ err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
+ &tmp_index, BPF_ANY);
+ CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+ "err:%d errno:%d\n", err, errno);
+ do_test(type, family, &cmd, PASS);
+ err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
+ &tmp_index);
+ CHECK(err == -1 || tmp_index != -1,
+ "lookup_elem(tmp_index_ovr_map)",
+ "err:%d errno:%d tmp_index:%d\n",
+ err, errno, tmp_index);
+ disable_syncookie();
+ printf("OK\n");
+}
+
+static void test_pass_on_err(int type, sa_family_t family)
+{
+ struct cmd cmd = {
+ .reuseport_index = REUSEPORT_ARRAY_SIZE,
+ .pass_on_failure = 1,
+ };
+
+ printf("%s: ", __func__);
+ expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1;
+ do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT);
+ printf("OK\n");
+}
+
+static void prepare_sk_fds(int type, sa_family_t family, bool inany)
+{
+ const int first = REUSEPORT_ARRAY_SIZE - 1;
+ int i, err, optval = 1;
+ struct epoll_event ev;
+ socklen_t addrlen;
+
+ if (inany)
+ sa46_init_inany(&srv_sa, family);
+ else
+ sa46_init_loopback(&srv_sa, family);
+ addrlen = sizeof(srv_sa);
+
+ /*
+ * The sk_fds[] is filled from the back such that the order
+ * is exactly opposite to the (struct sock_reuseport *)reuse->socks[].
+ */
+ for (i = first; i >= 0; i--) {
+ sk_fds[i] = socket(family, type, 0);
+ CHECK(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n",
+ i, sk_fds[i], errno);
+ err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT,
+ &optval, sizeof(optval));
+ CHECK(err == -1, "setsockopt(SO_REUSEPORT)",
+ "sk_fds[%d] err:%d errno:%d\n",
+ i, err, errno);
+
+ if (i == first) {
+ err = setsockopt(sk_fds[i], SOL_SOCKET,
+ SO_ATTACH_REUSEPORT_EBPF,
+ &select_by_skb_data_prog,
+ sizeof(select_by_skb_data_prog));
+ CHECK(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+ "err:%d errno:%d\n", err, errno);
+ }
+
+ err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
+ CHECK(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+ i, err, errno);
+
+ if (type == SOCK_STREAM) {
+ err = listen(sk_fds[i], 10);
+ CHECK(err == -1, "listen()",
+ "sk_fds[%d] err:%d errno:%d\n",
+ i, err, errno);
+ }
+
+ err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
+ BPF_NOEXIST);
+ CHECK(err == -1, "update_elem(reuseport_array)",
+ "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
+
+ if (i == first) {
+ socklen_t addrlen = sizeof(srv_sa);
+
+ err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa,
+ &addrlen);
+ CHECK(err == -1, "getsockname()",
+ "sk_fds[%d] err:%d errno:%d\n", i, err, errno);
+ }
+ }
+
+ epfd = epoll_create(1);
+ CHECK(epfd == -1, "epoll_create(1)",
+ "epfd:%d errno:%d\n", epfd, errno);
+
+ ev.events = EPOLLIN;
+ for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) {
+ ev.data.u32 = i;
+ err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev);
+ CHECK(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i);
+ }
+}
+
+static void setup_per_test(int type, unsigned short family, bool inany)
+{
+ int ovr = -1, err;
+
+ prepare_sk_fds(type, family, inany);
+ err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
+ BPF_ANY);
+ CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+ "err:%d errno:%d\n", err, errno);
+}
+
+static void cleanup_per_test(void)
+{
+ int i, err, zero = 0;
+
+ memset(expected_results, 0, sizeof(expected_results));
+
+ for (i = 0; i < NR_RESULTS; i++) {
+ err = bpf_map_update_elem(result_map, &i, &zero, BPF_ANY);
+ CHECK(err, "reset elem in result_map",
+ "i:%u err:%d errno:%d\n", i, err, errno);
+ }
+
+ err = bpf_map_update_elem(linum_map, &zero, &zero, BPF_ANY);
+ CHECK(err, "reset line number in linum_map", "err:%d errno:%d\n",
+ err, errno);
+
+ for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++)
+ close(sk_fds[i]);
+ close(epfd);
+
+ err = bpf_map_delete_elem(outer_map, &index_zero);
+ CHECK(err == -1, "delete_elem(outer_map)",
+ "err:%d errno:%d\n", err, errno);
+}
+
+static void cleanup(void)
+{
+ close(outer_map);
+ close(reuseport_array);
+ bpf_object__close(obj);
+}
+
+static void test_all(void)
+{
+ /* Extra SOCK_STREAM to test bind_inany==true */
+ const int types[] = { SOCK_STREAM, SOCK_DGRAM, SOCK_STREAM };
+ const char * const type_strings[] = { "TCP", "UDP", "TCP" };
+ const char * const family_strings[] = { "IPv6", "IPv4" };
+ const unsigned short families[] = { AF_INET6, AF_INET };
+ const bool bind_inany[] = { false, false, true };
+ int t, f, err;
+
+ for (f = 0; f < ARRAY_SIZE(families); f++) {
+ unsigned short family = families[f];
+
+ for (t = 0; t < ARRAY_SIZE(types); t++) {
+ bool inany = bind_inany[t];
+ int type = types[t];
+
+ printf("######## %s/%s %s ########\n",
+ family_strings[f], type_strings[t],
+ inany ? " INANY " : "LOOPBACK");
+
+ setup_per_test(type, family, inany);
+
+ test_err_inner_map(type, family);
+
+ /* Install reuseport_array to the outer_map */
+ err = bpf_map_update_elem(outer_map, &index_zero,
+ &reuseport_array, BPF_ANY);
+ CHECK(err == -1, "update_elem(outer_map)",
+ "err:%d errno:%d\n", err, errno);
+
+ test_err_skb_data(type, family);
+ test_err_sk_select_port(type, family);
+ test_pass(type, family);
+ test_syncookie(type, family);
+ test_pass_on_err(type, family);
+
+ cleanup_per_test();
+ printf("\n");
+ }
+ }
+}
+
+int main(int argc, const char **argv)
+{
+ create_maps();
+ prepare_bpf_obj();
+ saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
+ saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL);
+ enable_fastopen();
+ disable_syncookie();
+ atexit(restore_sysctls);
+
+ test_all();
+
+ cleanup();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_select_reuseport_common.h b/tools/testing/selftests/bpf/test_select_reuseport_common.h
new file mode 100644
index 000000000..08eb2a9f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport_common.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2018 Facebook */
+
+#ifndef __TEST_SELECT_REUSEPORT_COMMON_H
+#define __TEST_SELECT_REUSEPORT_COMMON_H
+
+#include <linux/types.h>
+
+enum result {
+ DROP_ERR_INNER_MAP,
+ DROP_ERR_SKB_DATA,
+ DROP_ERR_SK_SELECT_REUSEPORT,
+ DROP_MISC,
+ PASS,
+ PASS_ERR_SK_SELECT_REUSEPORT,
+ NR_RESULTS,
+};
+
+struct cmd {
+ __u32 reuseport_index;
+ __u32 pass_on_failure;
+};
+
+struct data_check {
+ __u32 ip_protocol;
+ __u32 skb_addrs[8];
+ __u16 skb_ports[2];
+ __u16 eth_protocol;
+ __u8 bind_inany;
+ __u8 equal_check_end[0];
+
+ __u32 len;
+ __u32 hash;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/test_select_reuseport_kern.c
new file mode 100644
index 000000000..5b54ec637
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_select_reuseport_kern.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook */
+
+#include <stdlib.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#include "bpf_endian.h"
+#include "bpf_helpers.h"
+#include "test_select_reuseport_common.h"
+
+int _version SEC("version") = 1;
+
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+#endif
+
+struct bpf_map_def SEC("maps") outer_map = {
+ .type = BPF_MAP_TYPE_ARRAY_OF_MAPS,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") result_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = NR_RESULTS,
+};
+
+struct bpf_map_def SEC("maps") tmp_index_ovr_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(int),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") linum_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") data_check_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct data_check),
+ .max_entries = 1,
+};
+
+#define GOTO_DONE(_result) ({ \
+ result = (_result); \
+ linum = __LINE__; \
+ goto done; \
+})
+
+SEC("select_by_skb_data")
+int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
+{
+ __u32 linum, index = 0, flags = 0, index_zero = 0;
+ __u32 *result_cnt, *linum_value;
+ struct data_check data_check = {};
+ struct cmd *cmd, cmd_copy;
+ void *data, *data_end;
+ void *reuseport_array;
+ enum result result;
+ int *index_ovr;
+ int err;
+
+ data = reuse_md->data;
+ data_end = reuse_md->data_end;
+ data_check.len = reuse_md->len;
+ data_check.eth_protocol = reuse_md->eth_protocol;
+ data_check.ip_protocol = reuse_md->ip_protocol;
+ data_check.hash = reuse_md->hash;
+ data_check.bind_inany = reuse_md->bind_inany;
+ if (data_check.eth_protocol == bpf_htons(ETH_P_IP)) {
+ if (bpf_skb_load_bytes_relative(reuse_md,
+ offsetof(struct iphdr, saddr),
+ data_check.skb_addrs, 8,
+ BPF_HDR_START_NET))
+ GOTO_DONE(DROP_MISC);
+ } else {
+ if (bpf_skb_load_bytes_relative(reuse_md,
+ offsetof(struct ipv6hdr, saddr),
+ data_check.skb_addrs, 32,
+ BPF_HDR_START_NET))
+ GOTO_DONE(DROP_MISC);
+ }
+
+ /*
+ * The ip_protocol could be a compile time decision
+ * if the bpf_prog.o is dedicated to either TCP or
+ * UDP.
+ *
+ * Otherwise, reuse_md->ip_protocol or
+ * the protocol field in the iphdr can be used.
+ */
+ if (data_check.ip_protocol == IPPROTO_TCP) {
+ struct tcphdr *th = data;
+
+ if (th + 1 > data_end)
+ GOTO_DONE(DROP_MISC);
+
+ data_check.skb_ports[0] = th->source;
+ data_check.skb_ports[1] = th->dest;
+
+ if ((th->doff << 2) + sizeof(*cmd) > data_check.len)
+ GOTO_DONE(DROP_ERR_SKB_DATA);
+ if (bpf_skb_load_bytes(reuse_md, th->doff << 2, &cmd_copy,
+ sizeof(cmd_copy)))
+ GOTO_DONE(DROP_MISC);
+ cmd = &cmd_copy;
+ } else if (data_check.ip_protocol == IPPROTO_UDP) {
+ struct udphdr *uh = data;
+
+ if (uh + 1 > data_end)
+ GOTO_DONE(DROP_MISC);
+
+ data_check.skb_ports[0] = uh->source;
+ data_check.skb_ports[1] = uh->dest;
+
+ if (sizeof(struct udphdr) + sizeof(*cmd) > data_check.len)
+ GOTO_DONE(DROP_ERR_SKB_DATA);
+ if (data + sizeof(struct udphdr) + sizeof(*cmd) > data_end) {
+ if (bpf_skb_load_bytes(reuse_md, sizeof(struct udphdr),
+ &cmd_copy, sizeof(cmd_copy)))
+ GOTO_DONE(DROP_MISC);
+ cmd = &cmd_copy;
+ } else {
+ cmd = data + sizeof(struct udphdr);
+ }
+ } else {
+ GOTO_DONE(DROP_MISC);
+ }
+
+ reuseport_array = bpf_map_lookup_elem(&outer_map, &index_zero);
+ if (!reuseport_array)
+ GOTO_DONE(DROP_ERR_INNER_MAP);
+
+ index = cmd->reuseport_index;
+ index_ovr = bpf_map_lookup_elem(&tmp_index_ovr_map, &index_zero);
+ if (!index_ovr)
+ GOTO_DONE(DROP_MISC);
+
+ if (*index_ovr != -1) {
+ index = *index_ovr;
+ *index_ovr = -1;
+ }
+ err = bpf_sk_select_reuseport(reuse_md, reuseport_array, &index,
+ flags);
+ if (!err)
+ GOTO_DONE(PASS);
+
+ if (cmd->pass_on_failure)
+ GOTO_DONE(PASS_ERR_SK_SELECT_REUSEPORT);
+ else
+ GOTO_DONE(DROP_ERR_SK_SELECT_REUSEPORT);
+
+done:
+ result_cnt = bpf_map_lookup_elem(&result_map, &result);
+ if (!result_cnt)
+ return SK_DROP;
+
+ bpf_map_update_elem(&linum_map, &index_zero, &linum, BPF_ANY);
+ bpf_map_update_elem(&data_check_map, &index_zero, &data_check, BPF_ANY);
+
+ (*result_cnt)++;
+ return result < PASS ? SK_DROP : SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
new file mode 100755
index 000000000..42544a969
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2018 Facebook
+
+set -eu
+
+wait_for_ip()
+{
+ local _i
+ echo -n "Wait for testing link-local IP to become available "
+ for _i in $(seq ${MAX_PING_TRIES}); do
+ echo -n "."
+ if ping -6 -q -c 1 -W 1 ff02::1%${TEST_IF} >/dev/null 2>&1; then
+ echo " OK"
+ return
+ fi
+ sleep 1
+ done
+ echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
+ exit 1
+}
+
+setup()
+{
+ # Create testing interfaces not to interfere with current environment.
+ ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
+ ip link set ${TEST_IF} up
+ ip link set ${TEST_IF_PEER} up
+
+ wait_for_ip
+
+ tc qdisc add dev ${TEST_IF} clsact
+ tc filter add dev ${TEST_IF} egress bpf obj ${BPF_PROG_OBJ} \
+ sec ${BPF_PROG_SECTION} da
+
+ BPF_PROG_ID=$(tc filter show dev ${TEST_IF} egress | \
+ awk '/ id / {sub(/.* id /, "", $0); print($1)}')
+}
+
+cleanup()
+{
+ ip link del ${TEST_IF} 2>/dev/null || :
+ ip link del ${TEST_IF_PEER} 2>/dev/null || :
+}
+
+main()
+{
+ trap cleanup EXIT 2 3 6 15
+ setup
+ ${PROG} ${TEST_IF} ${BPF_PROG_ID}
+}
+
+DIR=$(dirname $0)
+TEST_IF="test_cgid_1"
+TEST_IF_PEER="test_cgid_2"
+MAX_PING_TRIES=5
+BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o"
+BPF_PROG_SECTION="cgroup_id_logger"
+BPF_PROG_ID=0
+PROG="${DIR}/test_skb_cgroup_id_user"
+
+main
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c
new file mode 100644
index 000000000..68cf9829f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_kern.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+#include <string.h>
+
+#include "bpf_helpers.h"
+
+#define NUM_CGROUP_LEVELS 4
+
+struct bpf_map_def SEC("maps") cgroup_ids = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = NUM_CGROUP_LEVELS,
+};
+
+static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
+{
+ __u64 id;
+
+ /* [1] &level passed to external function that may change it, it's
+ * incompatible with loop unroll.
+ */
+ id = bpf_skb_ancestor_cgroup_id(skb, level);
+ bpf_map_update_elem(&cgroup_ids, &level, &id, 0);
+}
+
+SEC("cgroup_id_logger")
+int log_cgroup_id(struct __sk_buff *skb)
+{
+ /* Loop unroll can't be used here due to [1]. Unrolling manually.
+ * Number of calls should be in sync with NUM_CGROUP_LEVELS.
+ */
+ log_nth_level(skb, 0);
+ log_nth_level(skb, 1);
+ log_nth_level(skb, 2);
+ log_nth_level(skb, 3);
+
+ return TC_ACT_OK;
+}
+
+int _version SEC("version") = 1;
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
new file mode 100644
index 000000000..c121cc59f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+#define CGROUP_PATH "/skb_cgroup_test"
+#define NUM_CGROUP_LEVELS 4
+
+/* RFC 4291, Section 2.7.1 */
+#define LINKLOCAL_MULTICAST "ff02::1"
+
+static int mk_dst_addr(const char *ip, const char *iface,
+ struct sockaddr_in6 *dst)
+{
+ memset(dst, 0, sizeof(*dst));
+
+ dst->sin6_family = AF_INET6;
+ dst->sin6_port = htons(1025);
+
+ if (inet_pton(AF_INET6, ip, &dst->sin6_addr) != 1) {
+ log_err("Invalid IPv6: %s", ip);
+ return -1;
+ }
+
+ dst->sin6_scope_id = if_nametoindex(iface);
+ if (!dst->sin6_scope_id) {
+ log_err("Failed to get index of iface: %s", iface);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int send_packet(const char *iface)
+{
+ struct sockaddr_in6 dst;
+ char msg[] = "msg";
+ int err = 0;
+ int fd = -1;
+
+ if (mk_dst_addr(LINKLOCAL_MULTICAST, iface, &dst))
+ goto err;
+
+ fd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (fd == -1) {
+ log_err("Failed to create UDP socket");
+ goto err;
+ }
+
+ if (sendto(fd, &msg, sizeof(msg), 0, (const struct sockaddr *)&dst,
+ sizeof(dst)) == -1) {
+ log_err("Failed to send datagram");
+ goto err;
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ if (fd >= 0)
+ close(fd);
+ return err;
+}
+
+int get_map_fd_by_prog_id(int prog_id)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 map_ids[1];
+ int prog_fd = -1;
+ int map_fd = -1;
+
+ prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (prog_fd < 0) {
+ log_err("Failed to get fd by prog id %d", prog_id);
+ goto err;
+ }
+
+ info.nr_map_ids = 1;
+ info.map_ids = (__u64) (unsigned long) map_ids;
+
+ if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+ log_err("Failed to get info by prog fd %d", prog_fd);
+ goto err;
+ }
+
+ if (!info.nr_map_ids) {
+ log_err("No maps found for prog fd %d", prog_fd);
+ goto err;
+ }
+
+ map_fd = bpf_map_get_fd_by_id(map_ids[0]);
+ if (map_fd < 0)
+ log_err("Failed to get fd by map id %d", map_ids[0]);
+err:
+ if (prog_fd >= 0)
+ close(prog_fd);
+ return map_fd;
+}
+
+int check_ancestor_cgroup_ids(int prog_id)
+{
+ __u64 actual_ids[NUM_CGROUP_LEVELS], expected_ids[NUM_CGROUP_LEVELS];
+ __u32 level;
+ int err = 0;
+ int map_fd;
+
+ expected_ids[0] = 0x100000001; /* root cgroup */
+ expected_ids[1] = get_cgroup_id("");
+ expected_ids[2] = get_cgroup_id(CGROUP_PATH);
+ expected_ids[3] = 0; /* non-existent cgroup */
+
+ map_fd = get_map_fd_by_prog_id(prog_id);
+ if (map_fd < 0)
+ goto err;
+
+ for (level = 0; level < NUM_CGROUP_LEVELS; ++level) {
+ if (bpf_map_lookup_elem(map_fd, &level, &actual_ids[level])) {
+ log_err("Failed to lookup key %d", level);
+ goto err;
+ }
+ if (actual_ids[level] != expected_ids[level]) {
+ log_err("%llx (actual) != %llx (expected), level: %u\n",
+ actual_ids[level], expected_ids[level], level);
+ goto err;
+ }
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ if (map_fd >= 0)
+ close(map_fd);
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ int cgfd = -1;
+ int err = 0;
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s iface prog_id\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cgfd = create_and_get_cgroup(CGROUP_PATH);
+ if (!cgfd)
+ goto err;
+
+ if (join_cgroup(CGROUP_PATH))
+ goto err;
+
+ if (send_packet(argv[1]))
+ goto err;
+
+ if (check_ancestor_cgroup_ids(atoi(argv[2])))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(cgfd);
+ cleanup_cgroup_environment();
+ printf("[%s]\n", err ? "FAIL" : "PASS");
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
new file mode 100644
index 000000000..e95671220
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <linux/filter.h>
+
+#include <bpf/bpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_endian.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+#define CG_PATH "/foo"
+#define MAX_INSNS 512
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct sock_test {
+ const char *descr;
+ /* BPF prog properties */
+ struct bpf_insn insns[MAX_INSNS];
+ enum bpf_attach_type expected_attach_type;
+ enum bpf_attach_type attach_type;
+ /* Socket properties */
+ int domain;
+ int type;
+ /* Endpoint to bind() to */
+ const char *ip;
+ unsigned short port;
+ /* Expected test result */
+ enum {
+ LOAD_REJECT,
+ ATTACH_REJECT,
+ BIND_REJECT,
+ SUCCESS,
+ } result;
+};
+
+static struct sock_test tests[] = {
+ {
+ "bind4 load with invalid access: src_ip6",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip6[0])),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ LOAD_REJECT,
+ },
+ {
+ "bind4 load with invalid access: mark",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, mark)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ LOAD_REJECT,
+ },
+ {
+ "bind6 load with invalid access: src_ip4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ LOAD_REJECT,
+ },
+ {
+ "sock_create load with invalid access: src_port",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ 0,
+ 0,
+ NULL,
+ 0,
+ LOAD_REJECT,
+ },
+ {
+ "sock_create load w/o expected_attach_type (compat mode)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ 0,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ AF_INET,
+ SOCK_STREAM,
+ "127.0.0.1",
+ 8097,
+ SUCCESS,
+ },
+ {
+ "sock_create load w/ expected_attach_type",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET_SOCK_CREATE,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ AF_INET,
+ SOCK_STREAM,
+ "127.0.0.1",
+ 8097,
+ SUCCESS,
+ },
+ {
+ "attach type mismatch bind4 vs bind6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ ATTACH_REJECT,
+ },
+ {
+ "attach type mismatch bind6 vs bind4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ ATTACH_REJECT,
+ },
+ {
+ "attach type mismatch default vs bind4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ 0,
+ BPF_CGROUP_INET4_POST_BIND,
+ 0,
+ 0,
+ NULL,
+ 0,
+ ATTACH_REJECT,
+ },
+ {
+ "attach type mismatch bind6 vs sock_create",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET_SOCK_CREATE,
+ 0,
+ 0,
+ NULL,
+ 0,
+ ATTACH_REJECT,
+ },
+ {
+ "bind4 reject all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ "0.0.0.0",
+ 0,
+ BIND_REJECT,
+ },
+ {
+ "bind6 reject all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ "::",
+ 0,
+ BIND_REJECT,
+ },
+ {
+ "bind6 deny specific IP & port",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip6[3])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x00000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ "::1",
+ 8193,
+ BIND_REJECT,
+ },
+ {
+ "bind4 allow specific IP & port",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_A(1),
+
+ /* else return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ "127.0.0.1",
+ 4098,
+ SUCCESS,
+ },
+ {
+ "bind4 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET4_POST_BIND,
+ BPF_CGROUP_INET4_POST_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ "0.0.0.0",
+ 0,
+ SUCCESS,
+ },
+ {
+ "bind6 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ BPF_CGROUP_INET6_POST_BIND,
+ BPF_CGROUP_INET6_POST_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ "::",
+ 0,
+ SUCCESS,
+ },
+};
+
+static size_t probe_prog_length(const struct bpf_insn *fp)
+{
+ size_t len;
+
+ for (len = MAX_INSNS - 1; len > 0; --len)
+ if (fp[len].code != 0 || fp[len].imm != 0)
+ break;
+ return len + 1;
+}
+
+static int load_sock_prog(const struct bpf_insn *prog,
+ enum bpf_attach_type attach_type)
+{
+ struct bpf_load_program_attr attr;
+
+ memset(&attr, 0, sizeof(struct bpf_load_program_attr));
+ attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+ attr.expected_attach_type = attach_type;
+ attr.insns = prog;
+ attr.insns_cnt = probe_prog_length(attr.insns);
+ attr.license = "GPL";
+
+ return bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+}
+
+static int attach_sock_prog(int cgfd, int progfd,
+ enum bpf_attach_type attach_type)
+{
+ return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
+}
+
+static int bind_sock(int domain, int type, const char *ip, unsigned short port)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in6 *addr6;
+ struct sockaddr_in *addr4;
+ int sockfd = -1;
+ socklen_t len;
+ int err = 0;
+
+ sockfd = socket(domain, type, 0);
+ if (sockfd < 0)
+ goto err;
+
+ memset(&addr, 0, sizeof(addr));
+
+ if (domain == AF_INET) {
+ len = sizeof(struct sockaddr_in);
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = domain;
+ addr4->sin_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1)
+ goto err;
+ } else if (domain == AF_INET6) {
+ len = sizeof(struct sockaddr_in6);
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = domain;
+ addr6->sin6_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1)
+ goto err;
+ } else {
+ goto err;
+ }
+
+ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(sockfd);
+ return err;
+}
+
+static int run_test_case(int cgfd, const struct sock_test *test)
+{
+ int progfd = -1;
+ int err = 0;
+
+ printf("Test case: %s .. ", test->descr);
+ progfd = load_sock_prog(test->insns, test->expected_attach_type);
+ if (progfd < 0) {
+ if (test->result == LOAD_REJECT)
+ goto out;
+ else
+ goto err;
+ }
+
+ if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) {
+ if (test->result == ATTACH_REJECT)
+ goto out;
+ else
+ goto err;
+ }
+
+ if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
+ /* sys_bind() may fail for different reasons, errno has to be
+ * checked to confirm that BPF program rejected it.
+ */
+ if (test->result == BIND_REJECT && errno == EPERM)
+ goto out;
+ else
+ goto err;
+ }
+
+
+ if (test->result != SUCCESS)
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ /* Detaching w/o checking return code: best effort attempt. */
+ if (progfd != -1)
+ bpf_prog_detach(cgfd, test->attach_type);
+ close(progfd);
+ printf("[%s]\n", err ? "FAIL" : "PASS");
+ return err;
+}
+
+static int run_tests(int cgfd)
+{
+ int passes = 0;
+ int fails = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ if (run_test_case(cgfd, &tests[i]))
+ ++fails;
+ else
+ ++passes;
+ }
+ printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+ return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+ int cgfd = -1;
+ int err = 0;
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cgfd = create_and_get_cgroup(CG_PATH);
+ if (!cgfd)
+ goto err;
+
+ if (join_cgroup(CG_PATH))
+ goto err;
+
+ if (run_tests(cgfd))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(cgfd);
+ cleanup_cgroup_environment();
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
new file mode 100644
index 000000000..e38f1cb70
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -0,0 +1,1441 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+
+#include <linux/filter.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "cgroup_helpers.h"
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+
+#ifndef ENOTSUPP
+# define ENOTSUPP 524
+#endif
+
+#define CG_PATH "/foo"
+#define CONNECT4_PROG_PATH "./connect4_prog.o"
+#define CONNECT6_PROG_PATH "./connect6_prog.o"
+#define SENDMSG4_PROG_PATH "./sendmsg4_prog.o"
+#define SENDMSG6_PROG_PATH "./sendmsg6_prog.o"
+
+#define SERV4_IP "192.168.1.254"
+#define SERV4_REWRITE_IP "127.0.0.1"
+#define SRC4_IP "172.16.0.1"
+#define SRC4_REWRITE_IP "127.0.0.4"
+#define SERV4_PORT 4040
+#define SERV4_REWRITE_PORT 4444
+
+#define SERV6_IP "face:b00c:1234:5678::abcd"
+#define SERV6_REWRITE_IP "::1"
+#define SERV6_V4MAPPED_IP "::ffff:192.168.0.4"
+#define SRC6_IP "::1"
+#define SRC6_REWRITE_IP "::6"
+#define WILDCARD6_IP "::"
+#define SERV6_PORT 6060
+#define SERV6_REWRITE_PORT 6666
+
+#define INET_NTOP_BUF 40
+
+struct sock_addr_test;
+
+typedef int (*load_fn)(const struct sock_addr_test *test);
+typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
+
+char bpf_log_buf[BPF_LOG_BUF_SIZE];
+
+struct sock_addr_test {
+ const char *descr;
+ /* BPF prog properties */
+ load_fn loadfn;
+ enum bpf_attach_type expected_attach_type;
+ enum bpf_attach_type attach_type;
+ /* Socket properties */
+ int domain;
+ int type;
+ /* IP:port pairs for BPF prog to override */
+ const char *requested_ip;
+ unsigned short requested_port;
+ const char *expected_ip;
+ unsigned short expected_port;
+ const char *expected_src_ip;
+ /* Expected test result */
+ enum {
+ LOAD_REJECT,
+ ATTACH_REJECT,
+ SYSCALL_EPERM,
+ SYSCALL_ENOTSUPP,
+ SUCCESS,
+ } expected_result;
+};
+
+static int bind4_prog_load(const struct sock_addr_test *test);
+static int bind6_prog_load(const struct sock_addr_test *test);
+static int connect4_prog_load(const struct sock_addr_test *test);
+static int connect6_prog_load(const struct sock_addr_test *test);
+static int sendmsg_allow_prog_load(const struct sock_addr_test *test);
+static int sendmsg_deny_prog_load(const struct sock_addr_test *test);
+static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test);
+static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test);
+static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test);
+
+static struct sock_addr_test tests[] = {
+ /* bind */
+ {
+ "bind4: load prog with wrong expected attach type",
+ bind4_prog_load,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "bind4: attach prog with wrong attach type",
+ bind4_prog_load,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "bind4: rewrite IP & TCP port in",
+ bind4_prog_load,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET4_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ "bind4: rewrite IP & UDP port in",
+ bind4_prog_load,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET4_BIND,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ "bind6: load prog with wrong expected attach type",
+ bind6_prog_load,
+ BPF_CGROUP_INET4_BIND,
+ BPF_CGROUP_INET6_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "bind6: attach prog with wrong attach type",
+ bind6_prog_load,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET4_BIND,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "bind6: rewrite IP & TCP port in",
+ bind6_prog_load,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET6_BIND,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ "bind6: rewrite IP & UDP port in",
+ bind6_prog_load,
+ BPF_CGROUP_INET6_BIND,
+ BPF_CGROUP_INET6_BIND,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+
+ /* connect */
+ {
+ "connect4: load prog with wrong expected attach type",
+ connect4_prog_load,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET4_CONNECT,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "connect4: attach prog with wrong attach type",
+ connect4_prog_load,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "connect4: rewrite IP & TCP port",
+ connect4_prog_load,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET4_CONNECT,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "connect4: rewrite IP & UDP port",
+ connect4_prog_load,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET4_CONNECT,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "connect6: load prog with wrong expected attach type",
+ connect6_prog_load,
+ BPF_CGROUP_INET4_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ AF_INET6,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "connect6: attach prog with wrong attach type",
+ connect6_prog_load,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET4_CONNECT,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "connect6: rewrite IP & TCP port",
+ connect6_prog_load,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "connect6: rewrite IP & UDP port",
+ connect6_prog_load,
+ BPF_CGROUP_INET6_CONNECT,
+ BPF_CGROUP_INET6_CONNECT,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+
+ /* sendmsg */
+ {
+ "sendmsg4: load prog with wrong expected attach type",
+ sendmsg4_rw_asm_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "sendmsg4: attach prog with wrong attach type",
+ sendmsg4_rw_asm_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "sendmsg4: rewrite IP & port (asm)",
+ sendmsg4_rw_asm_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg4: rewrite IP & port (C)",
+ sendmsg4_rw_c_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg4: deny call",
+ sendmsg_deny_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ "sendmsg6: load prog with wrong expected attach type",
+ sendmsg6_rw_asm_prog_load,
+ BPF_CGROUP_UDP4_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ "sendmsg6: attach prog with wrong attach type",
+ sendmsg6_rw_asm_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP4_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ "sendmsg6: rewrite IP & port (asm)",
+ sendmsg6_rw_asm_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg6: rewrite IP & port (C)",
+ sendmsg6_rw_c_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg6: IPv4-mapped IPv6",
+ sendmsg6_rw_v4mapped_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_ENOTSUPP,
+ },
+ {
+ "sendmsg6: set dst IP = [::] (BSD'ism)",
+ sendmsg6_rw_wildcard_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg6: preserve dst IP = [::] (BSD'ism)",
+ sendmsg_allow_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ WILDCARD6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_PORT,
+ SRC6_IP,
+ SUCCESS,
+ },
+ {
+ "sendmsg6: deny call",
+ sendmsg_deny_prog_load,
+ BPF_CGROUP_UDP6_SENDMSG,
+ BPF_CGROUP_UDP6_SENDMSG,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+};
+
+static int mk_sockaddr(int domain, const char *ip, unsigned short port,
+ struct sockaddr *addr, socklen_t addr_len)
+{
+ struct sockaddr_in6 *addr6;
+ struct sockaddr_in *addr4;
+
+ if (domain != AF_INET && domain != AF_INET6) {
+ log_err("Unsupported address family");
+ return -1;
+ }
+
+ memset(addr, 0, addr_len);
+
+ if (domain == AF_INET) {
+ if (addr_len < sizeof(struct sockaddr_in))
+ return -1;
+ addr4 = (struct sockaddr_in *)addr;
+ addr4->sin_family = domain;
+ addr4->sin_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1) {
+ log_err("Invalid IPv4: %s", ip);
+ return -1;
+ }
+ } else if (domain == AF_INET6) {
+ if (addr_len < sizeof(struct sockaddr_in6))
+ return -1;
+ addr6 = (struct sockaddr_in6 *)addr;
+ addr6->sin6_family = domain;
+ addr6->sin6_port = htons(port);
+ if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1) {
+ log_err("Invalid IPv6: %s", ip);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int load_insns(const struct sock_addr_test *test,
+ const struct bpf_insn *insns, size_t insns_cnt)
+{
+ struct bpf_load_program_attr load_attr;
+ int ret;
+
+ memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
+ load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+ load_attr.expected_attach_type = test->expected_attach_type;
+ load_attr.insns = insns;
+ load_attr.insns_cnt = insns_cnt;
+ load_attr.license = "GPL";
+
+ ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ if (ret < 0 && test->expected_result != LOAD_REJECT) {
+ log_err(">>> Loading program error.\n"
+ ">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
+ }
+
+ return ret;
+}
+
+/* [1] These testing programs try to read different context fields, including
+ * narrow loads of different sizes from user_ip4 and user_ip6, and write to
+ * those allowed to be overridden.
+ *
+ * [2] BPF_LD_IMM64 & BPF_JMP_REG are used below whenever there is a need to
+ * compare a register with unsigned 32bit integer. BPF_JMP_IMM can't be used
+ * in such cases since it accepts only _signed_ 32bit integer as IMM
+ * argument. Also note that BPF_LD_IMM64 contains 2 instructions what matters
+ * to count jumps properly.
+ */
+
+static int bind4_prog_load(const struct sock_addr_test *test)
+{
+ union {
+ uint8_t u4_addr8[4];
+ uint16_t u4_addr16[2];
+ uint32_t u4_addr32;
+ } ip4;
+ struct sockaddr_in addr4_rw;
+
+ if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
+ log_err("Invalid IPv4: %s", SERV4_IP);
+ return -1;
+ }
+
+ if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
+ (struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
+ return -1;
+
+ /* See [1]. */
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (sk.family == AF_INET && */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 16),
+
+ /* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, type)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
+ BPF_JMP_A(1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 12),
+
+ /* 1st_byte_of_user_ip4 == expected && */
+ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 10),
+
+ /* 1st_half_of_user_ip4 == expected && */
+ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 8),
+
+ /* whole_user_ip4 == expected) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+ BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
+ BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
+
+ /* user_ip4 = addr4_rw.sin_addr */
+ BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_addr.s_addr),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+
+ /* user_port = addr4_rw.sin_port */
+ BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_port),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_port)),
+ /* } */
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int bind6_prog_load(const struct sock_addr_test *test)
+{
+ struct sockaddr_in6 addr6_rw;
+ struct in6_addr ip6;
+
+ if (inet_pton(AF_INET6, SERV6_IP, (void *)&ip6) != 1) {
+ log_err("Invalid IPv6: %s", SERV6_IP);
+ return -1;
+ }
+
+ if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT,
+ (struct sockaddr *)&addr6_rw, sizeof(addr6_rw)) == -1)
+ return -1;
+
+ /* See [1]. */
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (sk.family == AF_INET6 && */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
+
+ /* 5th_byte_of_user_ip6 == expected && */
+ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip6[1])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr[4], 16),
+
+ /* 3rd_half_of_user_ip6 == expected && */
+ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip6[1])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr16[2], 14),
+
+ /* last_word_of_user_ip6 == expected) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_ip6[3])),
+ BPF_LD_IMM64(BPF_REG_8, ip6.s6_addr32[3]), /* See [2]. */
+ BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 10),
+
+
+#define STORE_IPV6_WORD(N) \
+ BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_addr.s6_addr32[N]), \
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \
+ offsetof(struct bpf_sock_addr, user_ip6[N]))
+
+ /* user_ip6 = addr6_rw.sin6_addr */
+ STORE_IPV6_WORD(0),
+ STORE_IPV6_WORD(1),
+ STORE_IPV6_WORD(2),
+ STORE_IPV6_WORD(3),
+
+ /* user_port = addr6_rw.sin6_port */
+ BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_port),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_port)),
+
+ /* } */
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int load_path(const struct sock_addr_test *test, const char *path)
+{
+ struct bpf_prog_load_attr attr;
+ struct bpf_object *obj;
+ int prog_fd;
+
+ memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
+ attr.file = path;
+ attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
+ attr.expected_attach_type = test->expected_attach_type;
+
+ if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
+ if (test->expected_result != LOAD_REJECT)
+ log_err(">>> Loading program (%s) error.\n", path);
+ return -1;
+ }
+
+ return prog_fd;
+}
+
+static int connect4_prog_load(const struct sock_addr_test *test)
+{
+ return load_path(test, CONNECT4_PROG_PATH);
+}
+
+static int connect6_prog_load(const struct sock_addr_test *test)
+{
+ return load_path(test, CONNECT6_PROG_PATH);
+}
+
+static int sendmsg_ret_only_prog_load(const struct sock_addr_test *test,
+ int32_t rc)
+{
+ struct bpf_insn insns[] = {
+ /* return rc */
+ BPF_MOV64_IMM(BPF_REG_0, rc),
+ BPF_EXIT_INSN(),
+ };
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg_allow_prog_load(const struct sock_addr_test *test)
+{
+ return sendmsg_ret_only_prog_load(test, /*rc*/ 1);
+}
+
+static int sendmsg_deny_prog_load(const struct sock_addr_test *test)
+{
+ return sendmsg_ret_only_prog_load(test, /*rc*/ 0);
+}
+
+static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test)
+{
+ struct sockaddr_in dst4_rw_addr;
+ struct in_addr src4_rw_ip;
+
+ if (inet_pton(AF_INET, SRC4_REWRITE_IP, (void *)&src4_rw_ip) != 1) {
+ log_err("Invalid IPv4: %s", SRC4_REWRITE_IP);
+ return -1;
+ }
+
+ if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
+ (struct sockaddr *)&dst4_rw_addr,
+ sizeof(dst4_rw_addr)) == -1)
+ return -1;
+
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (sk.family == AF_INET && */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 8),
+
+ /* sk.type == SOCK_DGRAM) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, type)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 6),
+
+ /* msg_src_ip4 = src4_rw_ip */
+ BPF_MOV32_IMM(BPF_REG_7, src4_rw_ip.s_addr),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, msg_src_ip4)),
+
+ /* user_ip4 = dst4_rw_addr.sin_addr */
+ BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_addr.s_addr),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_ip4)),
+
+ /* user_port = dst4_rw_addr.sin_port */
+ BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_port),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_port)),
+ /* } */
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test)
+{
+ return load_path(test, SENDMSG4_PROG_PATH);
+}
+
+static int sendmsg6_rw_dst_asm_prog_load(const struct sock_addr_test *test,
+ const char *rw_dst_ip)
+{
+ struct sockaddr_in6 dst6_rw_addr;
+ struct in6_addr src6_rw_ip;
+
+ if (inet_pton(AF_INET6, SRC6_REWRITE_IP, (void *)&src6_rw_ip) != 1) {
+ log_err("Invalid IPv6: %s", SRC6_REWRITE_IP);
+ return -1;
+ }
+
+ if (mk_sockaddr(AF_INET6, rw_dst_ip, SERV6_REWRITE_PORT,
+ (struct sockaddr *)&dst6_rw_addr,
+ sizeof(dst6_rw_addr)) == -1)
+ return -1;
+
+ struct bpf_insn insns[] = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (sk.family == AF_INET6) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, family)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
+
+#define STORE_IPV6_WORD_N(DST, SRC, N) \
+ BPF_MOV32_IMM(BPF_REG_7, SRC[N]), \
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \
+ offsetof(struct bpf_sock_addr, DST[N]))
+
+#define STORE_IPV6(DST, SRC) \
+ STORE_IPV6_WORD_N(DST, SRC, 0), \
+ STORE_IPV6_WORD_N(DST, SRC, 1), \
+ STORE_IPV6_WORD_N(DST, SRC, 2), \
+ STORE_IPV6_WORD_N(DST, SRC, 3)
+
+ STORE_IPV6(msg_src_ip6, src6_rw_ip.s6_addr32),
+ STORE_IPV6(user_ip6, dst6_rw_addr.sin6_addr.s6_addr32),
+
+ /* user_port = dst6_rw_addr.sin6_port */
+ BPF_MOV32_IMM(BPF_REG_7, dst6_rw_addr.sin6_port),
+ BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
+ offsetof(struct bpf_sock_addr, user_port)),
+
+ /* } */
+
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ };
+
+ return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+}
+
+static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test)
+{
+ return sendmsg6_rw_dst_asm_prog_load(test, SERV6_REWRITE_IP);
+}
+
+static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test)
+{
+ return sendmsg6_rw_dst_asm_prog_load(test, SERV6_V4MAPPED_IP);
+}
+
+static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test)
+{
+ return sendmsg6_rw_dst_asm_prog_load(test, WILDCARD6_IP);
+}
+
+static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test)
+{
+ return load_path(test, SENDMSG6_PROG_PATH);
+}
+
+static int cmp_addr(const struct sockaddr_storage *addr1,
+ const struct sockaddr_storage *addr2, int cmp_port)
+{
+ const struct sockaddr_in *four1, *four2;
+ const struct sockaddr_in6 *six1, *six2;
+
+ if (addr1->ss_family != addr2->ss_family)
+ return -1;
+
+ if (addr1->ss_family == AF_INET) {
+ four1 = (const struct sockaddr_in *)addr1;
+ four2 = (const struct sockaddr_in *)addr2;
+ return !((four1->sin_port == four2->sin_port || !cmp_port) &&
+ four1->sin_addr.s_addr == four2->sin_addr.s_addr);
+ } else if (addr1->ss_family == AF_INET6) {
+ six1 = (const struct sockaddr_in6 *)addr1;
+ six2 = (const struct sockaddr_in6 *)addr2;
+ return !((six1->sin6_port == six2->sin6_port || !cmp_port) &&
+ !memcmp(&six1->sin6_addr, &six2->sin6_addr,
+ sizeof(struct in6_addr)));
+ }
+
+ return -1;
+}
+
+static int cmp_sock_addr(info_fn fn, int sock1,
+ const struct sockaddr_storage *addr2, int cmp_port)
+{
+ struct sockaddr_storage addr1;
+ socklen_t len1 = sizeof(addr1);
+
+ memset(&addr1, 0, len1);
+ if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0)
+ return -1;
+
+ return cmp_addr(&addr1, addr2, cmp_port);
+}
+
+static int cmp_local_ip(int sock1, const struct sockaddr_storage *addr2)
+{
+ return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 0);
+}
+
+static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2)
+{
+ return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 1);
+}
+
+static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2)
+{
+ return cmp_sock_addr(getpeername, sock1, addr2, /*cmp_port*/ 1);
+}
+
+static int start_server(int type, const struct sockaddr_storage *addr,
+ socklen_t addr_len)
+{
+ int fd;
+
+ fd = socket(addr->ss_family, type, 0);
+ if (fd == -1) {
+ log_err("Failed to create server socket");
+ goto out;
+ }
+
+ if (bind(fd, (const struct sockaddr *)addr, addr_len) == -1) {
+ log_err("Failed to bind server socket");
+ goto close_out;
+ }
+
+ if (type == SOCK_STREAM) {
+ if (listen(fd, 128) == -1) {
+ log_err("Failed to listen on server socket");
+ goto close_out;
+ }
+ }
+
+ goto out;
+close_out:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int connect_to_server(int type, const struct sockaddr_storage *addr,
+ socklen_t addr_len)
+{
+ int domain;
+ int fd = -1;
+
+ domain = addr->ss_family;
+
+ if (domain != AF_INET && domain != AF_INET6) {
+ log_err("Unsupported address family");
+ goto err;
+ }
+
+ fd = socket(domain, type, 0);
+ if (fd == -1) {
+ log_err("Failed to create client socket");
+ goto err;
+ }
+
+ if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) {
+ log_err("Fail to connect to server");
+ goto err;
+ }
+
+ goto out;
+err:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+int init_pktinfo(int domain, struct cmsghdr *cmsg)
+{
+ struct in6_pktinfo *pktinfo6;
+ struct in_pktinfo *pktinfo4;
+
+ if (domain == AF_INET) {
+ cmsg->cmsg_level = SOL_IP;
+ cmsg->cmsg_type = IP_PKTINFO;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
+ pktinfo4 = (struct in_pktinfo *)CMSG_DATA(cmsg);
+ memset(pktinfo4, 0, sizeof(struct in_pktinfo));
+ if (inet_pton(domain, SRC4_IP,
+ (void *)&pktinfo4->ipi_spec_dst) != 1)
+ return -1;
+ } else if (domain == AF_INET6) {
+ cmsg->cmsg_level = SOL_IPV6;
+ cmsg->cmsg_type = IPV6_PKTINFO;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
+ pktinfo6 = (struct in6_pktinfo *)CMSG_DATA(cmsg);
+ memset(pktinfo6, 0, sizeof(struct in6_pktinfo));
+ if (inet_pton(domain, SRC6_IP,
+ (void *)&pktinfo6->ipi6_addr) != 1)
+ return -1;
+ } else {
+ return -1;
+ }
+
+ return 0;
+}
+
+static int sendmsg_to_server(int type, const struct sockaddr_storage *addr,
+ socklen_t addr_len, int set_cmsg, int flags,
+ int *syscall_err)
+{
+ union {
+ char buf[CMSG_SPACE(sizeof(struct in6_pktinfo))];
+ struct cmsghdr align;
+ } control6;
+ union {
+ char buf[CMSG_SPACE(sizeof(struct in_pktinfo))];
+ struct cmsghdr align;
+ } control4;
+ struct msghdr hdr;
+ struct iovec iov;
+ char data = 'a';
+ int domain;
+ int fd = -1;
+
+ domain = addr->ss_family;
+
+ if (domain != AF_INET && domain != AF_INET6) {
+ log_err("Unsupported address family");
+ goto err;
+ }
+
+ fd = socket(domain, type, 0);
+ if (fd == -1) {
+ log_err("Failed to create client socket");
+ goto err;
+ }
+
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = &data;
+ iov.iov_len = sizeof(data);
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_name = (void *)addr;
+ hdr.msg_namelen = addr_len;
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+
+ if (set_cmsg) {
+ if (domain == AF_INET) {
+ hdr.msg_control = &control4;
+ hdr.msg_controllen = sizeof(control4.buf);
+ } else if (domain == AF_INET6) {
+ hdr.msg_control = &control6;
+ hdr.msg_controllen = sizeof(control6.buf);
+ }
+ if (init_pktinfo(domain, CMSG_FIRSTHDR(&hdr))) {
+ log_err("Fail to init pktinfo");
+ goto err;
+ }
+ }
+
+ if (sendmsg(fd, &hdr, flags) != sizeof(data)) {
+ log_err("Fail to send message to server");
+ *syscall_err = errno;
+ goto err;
+ }
+
+ goto out;
+err:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int fastconnect_to_server(const struct sockaddr_storage *addr,
+ socklen_t addr_len)
+{
+ int sendmsg_err;
+
+ return sendmsg_to_server(SOCK_STREAM, addr, addr_len, /*set_cmsg*/0,
+ MSG_FASTOPEN, &sendmsg_err);
+}
+
+static int recvmsg_from_client(int sockfd, struct sockaddr_storage *src_addr)
+{
+ struct timeval tv;
+ struct msghdr hdr;
+ struct iovec iov;
+ char data[64];
+ fd_set rfds;
+
+ FD_ZERO(&rfds);
+ FD_SET(sockfd, &rfds);
+
+ tv.tv_sec = 2;
+ tv.tv_usec = 0;
+
+ if (select(sockfd + 1, &rfds, NULL, NULL, &tv) <= 0 ||
+ !FD_ISSET(sockfd, &rfds))
+ return -1;
+
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = data;
+ iov.iov_len = sizeof(data);
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_name = src_addr;
+ hdr.msg_namelen = sizeof(struct sockaddr_storage);
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+
+ return recvmsg(sockfd, &hdr, 0);
+}
+
+static int init_addrs(const struct sock_addr_test *test,
+ struct sockaddr_storage *requested_addr,
+ struct sockaddr_storage *expected_addr,
+ struct sockaddr_storage *expected_src_addr)
+{
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+
+ if (mk_sockaddr(test->domain, test->expected_ip, test->expected_port,
+ (struct sockaddr *)expected_addr, addr_len) == -1)
+ goto err;
+
+ if (mk_sockaddr(test->domain, test->requested_ip, test->requested_port,
+ (struct sockaddr *)requested_addr, addr_len) == -1)
+ goto err;
+
+ if (test->expected_src_ip &&
+ mk_sockaddr(test->domain, test->expected_src_ip, 0,
+ (struct sockaddr *)expected_src_addr, addr_len) == -1)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int run_bind_test_case(const struct sock_addr_test *test)
+{
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+ struct sockaddr_storage requested_addr;
+ struct sockaddr_storage expected_addr;
+ int clientfd = -1;
+ int servfd = -1;
+ int err = 0;
+
+ if (init_addrs(test, &requested_addr, &expected_addr, NULL))
+ goto err;
+
+ servfd = start_server(test->type, &requested_addr, addr_len);
+ if (servfd == -1)
+ goto err;
+
+ if (cmp_local_addr(servfd, &expected_addr))
+ goto err;
+
+ /* Try to connect to server just in case */
+ clientfd = connect_to_server(test->type, &expected_addr, addr_len);
+ if (clientfd == -1)
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(clientfd);
+ close(servfd);
+ return err;
+}
+
+static int run_connect_test_case(const struct sock_addr_test *test)
+{
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+ struct sockaddr_storage expected_src_addr;
+ struct sockaddr_storage requested_addr;
+ struct sockaddr_storage expected_addr;
+ int clientfd = -1;
+ int servfd = -1;
+ int err = 0;
+
+ if (init_addrs(test, &requested_addr, &expected_addr,
+ &expected_src_addr))
+ goto err;
+
+ /* Prepare server to connect to */
+ servfd = start_server(test->type, &expected_addr, addr_len);
+ if (servfd == -1)
+ goto err;
+
+ clientfd = connect_to_server(test->type, &requested_addr, addr_len);
+ if (clientfd == -1)
+ goto err;
+
+ /* Make sure src and dst addrs were overridden properly */
+ if (cmp_peer_addr(clientfd, &expected_addr))
+ goto err;
+
+ if (cmp_local_ip(clientfd, &expected_src_addr))
+ goto err;
+
+ if (test->type == SOCK_STREAM) {
+ /* Test TCP Fast Open scenario */
+ clientfd = fastconnect_to_server(&requested_addr, addr_len);
+ if (clientfd == -1)
+ goto err;
+
+ /* Make sure src and dst addrs were overridden properly */
+ if (cmp_peer_addr(clientfd, &expected_addr))
+ goto err;
+
+ if (cmp_local_ip(clientfd, &expected_src_addr))
+ goto err;
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(clientfd);
+ close(servfd);
+ return err;
+}
+
+static int run_sendmsg_test_case(const struct sock_addr_test *test)
+{
+ socklen_t addr_len = sizeof(struct sockaddr_storage);
+ struct sockaddr_storage expected_src_addr;
+ struct sockaddr_storage requested_addr;
+ struct sockaddr_storage expected_addr;
+ struct sockaddr_storage real_src_addr;
+ int clientfd = -1;
+ int servfd = -1;
+ int set_cmsg;
+ int err = 0;
+
+ if (test->type != SOCK_DGRAM)
+ goto err;
+
+ if (init_addrs(test, &requested_addr, &expected_addr,
+ &expected_src_addr))
+ goto err;
+
+ /* Prepare server to sendmsg to */
+ servfd = start_server(test->type, &expected_addr, addr_len);
+ if (servfd == -1)
+ goto err;
+
+ for (set_cmsg = 0; set_cmsg <= 1; ++set_cmsg) {
+ if (clientfd >= 0)
+ close(clientfd);
+
+ clientfd = sendmsg_to_server(test->type, &requested_addr,
+ addr_len, set_cmsg, /*flags*/0,
+ &err);
+ if (err)
+ goto out;
+ else if (clientfd == -1)
+ goto err;
+
+ /* Try to receive message on server instead of using
+ * getpeername(2) on client socket, to check that client's
+ * destination address was rewritten properly, since
+ * getpeername(2) doesn't work with unconnected datagram
+ * sockets.
+ *
+ * Get source address from recvmsg(2) as well to make sure
+ * source was rewritten properly: getsockname(2) can't be used
+ * since socket is unconnected and source defined for one
+ * specific packet may differ from the one used by default and
+ * returned by getsockname(2).
+ */
+ if (recvmsg_from_client(servfd, &real_src_addr) == -1)
+ goto err;
+
+ if (cmp_addr(&real_src_addr, &expected_src_addr, /*cmp_port*/0))
+ goto err;
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(clientfd);
+ close(servfd);
+ return err;
+}
+
+static int run_test_case(int cgfd, const struct sock_addr_test *test)
+{
+ int progfd = -1;
+ int err = 0;
+
+ printf("Test case: %s .. ", test->descr);
+
+ progfd = test->loadfn(test);
+ if (test->expected_result == LOAD_REJECT && progfd < 0)
+ goto out;
+ else if (test->expected_result == LOAD_REJECT || progfd < 0)
+ goto err;
+
+ err = bpf_prog_attach(progfd, cgfd, test->attach_type,
+ BPF_F_ALLOW_OVERRIDE);
+ if (test->expected_result == ATTACH_REJECT && err) {
+ err = 0; /* error was expected, reset it */
+ goto out;
+ } else if (test->expected_result == ATTACH_REJECT || err) {
+ goto err;
+ }
+
+ switch (test->attach_type) {
+ case BPF_CGROUP_INET4_BIND:
+ case BPF_CGROUP_INET6_BIND:
+ err = run_bind_test_case(test);
+ break;
+ case BPF_CGROUP_INET4_CONNECT:
+ case BPF_CGROUP_INET6_CONNECT:
+ err = run_connect_test_case(test);
+ break;
+ case BPF_CGROUP_UDP4_SENDMSG:
+ case BPF_CGROUP_UDP6_SENDMSG:
+ err = run_sendmsg_test_case(test);
+ break;
+ default:
+ goto err;
+ }
+
+ if (test->expected_result == SYSCALL_EPERM && err == EPERM) {
+ err = 0; /* error was expected, reset it */
+ goto out;
+ }
+
+ if (test->expected_result == SYSCALL_ENOTSUPP && err == ENOTSUPP) {
+ err = 0; /* error was expected, reset it */
+ goto out;
+ }
+
+ if (err || test->expected_result != SUCCESS)
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ /* Detaching w/o checking return code: best effort attempt. */
+ if (progfd != -1)
+ bpf_prog_detach(cgfd, test->attach_type);
+ close(progfd);
+ printf("[%s]\n", err ? "FAIL" : "PASS");
+ return err;
+}
+
+static int run_tests(int cgfd)
+{
+ int passes = 0;
+ int fails = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ if (run_test_case(cgfd, &tests[i]))
+ ++fails;
+ else
+ ++passes;
+ }
+ printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
+ return fails ? -1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+ int cgfd = -1;
+ int err = 0;
+
+ if (argc < 2) {
+ fprintf(stderr,
+ "%s has to be run via %s.sh. Skip direct run.\n",
+ argv[0], argv[0]);
+ exit(err);
+ }
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cgfd = create_and_get_cgroup(CG_PATH);
+ if (!cgfd)
+ goto err;
+
+ if (join_cgroup(CG_PATH))
+ goto err;
+
+ if (run_tests(cgfd))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(cgfd);
+ cleanup_cgroup_environment();
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
new file mode 100755
index 000000000..9832a875a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sock_addr.sh
@@ -0,0 +1,57 @@
+#!/bin/sh
+
+set -eu
+
+ping_once()
+{
+ ping -${1} -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
+}
+
+wait_for_ip()
+{
+ local _i
+ echo -n "Wait for testing IPv4/IPv6 to become available "
+ for _i in $(seq ${MAX_PING_TRIES}); do
+ echo -n "."
+ if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then
+ echo " OK"
+ return
+ fi
+ done
+ echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
+ exit 1
+}
+
+setup()
+{
+ # Create testing interfaces not to interfere with current environment.
+ ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
+ ip link set ${TEST_IF} up
+ ip link set ${TEST_IF_PEER} up
+
+ ip -4 addr add ${TEST_IPv4} dev ${TEST_IF}
+ ip -6 addr add ${TEST_IPv6} dev ${TEST_IF}
+ wait_for_ip
+}
+
+cleanup()
+{
+ ip link del ${TEST_IF} 2>/dev/null || :
+ ip link del ${TEST_IF_PEER} 2>/dev/null || :
+}
+
+main()
+{
+ trap cleanup EXIT 2 3 6 15
+ setup
+ ./test_sock_addr setup_done
+}
+
+BASENAME=$(basename $0 .sh)
+TEST_IF="${BASENAME}1"
+TEST_IF_PEER="${BASENAME}2"
+TEST_IPv4="127.0.0.4/8"
+TEST_IPv6="::6/128"
+MAX_PING_TRIES=5
+
+main
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
new file mode 100644
index 000000000..68e108e46
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+#define CG_PATH "/foo"
+#define SOCKET_COOKIE_PROG "./socket_cookie_prog.o"
+
+static int start_server(void)
+{
+ struct sockaddr_in6 addr;
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1) {
+ log_err("Failed to create server socket");
+ goto out;
+ }
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin6_family = AF_INET6;
+ addr.sin6_addr = in6addr_loopback;
+ addr.sin6_port = 0;
+
+ if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) == -1) {
+ log_err("Failed to bind server socket");
+ goto close_out;
+ }
+
+ if (listen(fd, 128) == -1) {
+ log_err("Failed to listen on server socket");
+ goto close_out;
+ }
+
+ goto out;
+
+close_out:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int connect_to_server(int server_fd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1) {
+ log_err("Failed to create client socket");
+ goto out;
+ }
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ goto close_out;
+ }
+
+ if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
+ log_err("Fail to connect to server");
+ goto close_out;
+ }
+
+ goto out;
+
+close_out:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int validate_map(struct bpf_map *map, int client_fd)
+{
+ __u32 cookie_expected_value;
+ struct sockaddr_in6 addr;
+ socklen_t len = sizeof(addr);
+ __u32 cookie_value;
+ __u64 cookie_key;
+ int err = 0;
+ int map_fd;
+
+ if (!map) {
+ log_err("Map not found in BPF object");
+ goto err;
+ }
+
+ map_fd = bpf_map__fd(map);
+
+ err = bpf_map_get_next_key(map_fd, NULL, &cookie_key);
+ if (err) {
+ log_err("Can't get cookie key from map");
+ goto out;
+ }
+
+ err = bpf_map_lookup_elem(map_fd, &cookie_key, &cookie_value);
+ if (err) {
+ log_err("Can't get cookie value from map");
+ goto out;
+ }
+
+ err = getsockname(client_fd, (struct sockaddr *)&addr, &len);
+ if (err) {
+ log_err("Can't get client local addr");
+ goto out;
+ }
+
+ cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
+ if (cookie_value != cookie_expected_value) {
+ log_err("Unexpected value in map: %x != %x", cookie_value,
+ cookie_expected_value);
+ goto err;
+ }
+
+ goto out;
+err:
+ err = -1;
+out:
+ return err;
+}
+
+static int run_test(int cgfd)
+{
+ enum bpf_attach_type attach_type;
+ struct bpf_prog_load_attr attr;
+ struct bpf_program *prog;
+ struct bpf_object *pobj;
+ const char *prog_name;
+ int server_fd = -1;
+ int client_fd = -1;
+ int prog_fd = -1;
+ int err = 0;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.file = SOCKET_COOKIE_PROG;
+ attr.prog_type = BPF_PROG_TYPE_UNSPEC;
+
+ err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd);
+ if (err) {
+ log_err("Failed to load %s", attr.file);
+ goto out;
+ }
+
+ bpf_object__for_each_program(prog, pobj) {
+ prog_name = bpf_program__title(prog, /*needs_copy*/ false);
+
+ if (strcmp(prog_name, "cgroup/connect6") == 0) {
+ attach_type = BPF_CGROUP_INET6_CONNECT;
+ } else if (strcmp(prog_name, "sockops") == 0) {
+ attach_type = BPF_CGROUP_SOCK_OPS;
+ } else {
+ log_err("Unexpected prog: %s", prog_name);
+ goto err;
+ }
+
+ err = bpf_prog_attach(bpf_program__fd(prog), cgfd, attach_type,
+ BPF_F_ALLOW_OVERRIDE);
+ if (err) {
+ log_err("Failed to attach prog %s", prog_name);
+ goto out;
+ }
+ }
+
+ server_fd = start_server();
+ if (server_fd == -1)
+ goto err;
+
+ client_fd = connect_to_server(server_fd);
+ if (client_fd == -1)
+ goto err;
+
+ if (validate_map(bpf_map__next(NULL, pobj), client_fd))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(client_fd);
+ close(server_fd);
+ bpf_object__close(pobj);
+ printf("%s\n", err ? "FAILED" : "PASSED");
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ int cgfd = -1;
+ int err = 0;
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cgfd = create_and_get_cgroup(CG_PATH);
+ if (!cgfd)
+ goto err;
+
+ if (join_cgroup(CG_PATH))
+ goto err;
+
+ if (run_test(cgfd))
+ goto err;
+
+ goto out;
+err:
+ err = -1;
+out:
+ close(cgfd);
+ cleanup_cgroup_environment();
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/test_sockhash_kern.c b/tools/testing/selftests/bpf/test_sockhash_kern.c
new file mode 100644
index 000000000..e67559164
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockhash_kern.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+#undef SOCKMAP
+#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKHASH
+#include "./test_sockmap_kern.h"
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
new file mode 100644
index 000000000..a7fc91bb9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -0,0 +1,1527 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <sched.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/types.h>
+#include <sys/sendfile.h>
+
+#include <linux/netlink.h>
+#include <linux/socket.h>
+#include <linux/sock_diag.h>
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <libgen.h>
+
+#include <getopt.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "bpf_rlimit.h"
+#include "cgroup_helpers.h"
+
+int running;
+static void running_handler(int a);
+
+/* randomly selected ports for testing on lo */
+#define S1_PORT 10000
+#define S2_PORT 10001
+
+#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
+#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
+#define CG_PATH "/sockmap"
+
+/* global sockets */
+int s1, s2, c1, c2, p1, p2;
+int test_cnt;
+int passed;
+int failed;
+int map_fd[8];
+struct bpf_map *maps[8];
+int prog_fd[11];
+
+int txmsg_pass;
+int txmsg_noisy;
+int txmsg_redir;
+int txmsg_redir_noisy;
+int txmsg_drop;
+int txmsg_apply;
+int txmsg_cork;
+int txmsg_start;
+int txmsg_end;
+int txmsg_ingress;
+int txmsg_skb;
+
+static const struct option long_options[] = {
+ {"help", no_argument, NULL, 'h' },
+ {"cgroup", required_argument, NULL, 'c' },
+ {"rate", required_argument, NULL, 'r' },
+ {"verbose", no_argument, NULL, 'v' },
+ {"iov_count", required_argument, NULL, 'i' },
+ {"length", required_argument, NULL, 'l' },
+ {"test", required_argument, NULL, 't' },
+ {"data_test", no_argument, NULL, 'd' },
+ {"txmsg", no_argument, &txmsg_pass, 1 },
+ {"txmsg_noisy", no_argument, &txmsg_noisy, 1 },
+ {"txmsg_redir", no_argument, &txmsg_redir, 1 },
+ {"txmsg_redir_noisy", no_argument, &txmsg_redir_noisy, 1},
+ {"txmsg_drop", no_argument, &txmsg_drop, 1 },
+ {"txmsg_apply", required_argument, NULL, 'a'},
+ {"txmsg_cork", required_argument, NULL, 'k'},
+ {"txmsg_start", required_argument, NULL, 's'},
+ {"txmsg_end", required_argument, NULL, 'e'},
+ {"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
+ {"txmsg_skb", no_argument, &txmsg_skb, 1 },
+ {0, 0, NULL, 0 }
+};
+
+static void usage(char *argv[])
+{
+ int i;
+
+ printf(" Usage: %s --cgroup <cgroup_path>\n", argv[0]);
+ printf(" options:\n");
+ for (i = 0; long_options[i].name != 0; i++) {
+ printf(" --%-12s", long_options[i].name);
+ if (long_options[i].flag != NULL)
+ printf(" flag (internal value:%d)\n",
+ *long_options[i].flag);
+ else
+ printf(" -%c\n", long_options[i].val);
+ }
+ printf("\n");
+}
+
+static int sockmap_init_sockets(int verbose)
+{
+ int i, err, one = 1;
+ struct sockaddr_in addr;
+ int *fds[4] = {&s1, &s2, &c1, &c2};
+
+ s1 = s2 = p1 = p2 = c1 = c2 = 0;
+
+ /* Init sockets */
+ for (i = 0; i < 4; i++) {
+ *fds[i] = socket(AF_INET, SOCK_STREAM, 0);
+ if (*fds[i] < 0) {
+ perror("socket s1 failed()");
+ return errno;
+ }
+ }
+
+ /* Allow reuse */
+ for (i = 0; i < 2; i++) {
+ err = setsockopt(*fds[i], SOL_SOCKET, SO_REUSEADDR,
+ (char *)&one, sizeof(one));
+ if (err) {
+ perror("setsockopt failed()");
+ return errno;
+ }
+ }
+
+ /* Non-blocking sockets */
+ for (i = 0; i < 2; i++) {
+ err = ioctl(*fds[i], FIONBIO, (char *)&one);
+ if (err < 0) {
+ perror("ioctl s1 failed()");
+ return errno;
+ }
+ }
+
+ /* Bind server sockets */
+ memset(&addr, 0, sizeof(struct sockaddr_in));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+ addr.sin_port = htons(S1_PORT);
+ err = bind(s1, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0) {
+ perror("bind s1 failed()\n");
+ return errno;
+ }
+
+ addr.sin_port = htons(S2_PORT);
+ err = bind(s2, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0) {
+ perror("bind s2 failed()\n");
+ return errno;
+ }
+
+ /* Listen server sockets */
+ addr.sin_port = htons(S1_PORT);
+ err = listen(s1, 32);
+ if (err < 0) {
+ perror("listen s1 failed()\n");
+ return errno;
+ }
+
+ addr.sin_port = htons(S2_PORT);
+ err = listen(s2, 32);
+ if (err < 0) {
+ perror("listen s1 failed()\n");
+ return errno;
+ }
+
+ /* Initiate Connect */
+ addr.sin_port = htons(S1_PORT);
+ err = connect(c1, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0 && errno != EINPROGRESS) {
+ perror("connect c1 failed()\n");
+ return errno;
+ }
+
+ addr.sin_port = htons(S2_PORT);
+ err = connect(c2, (struct sockaddr *)&addr, sizeof(addr));
+ if (err < 0 && errno != EINPROGRESS) {
+ perror("connect c2 failed()\n");
+ return errno;
+ } else if (err < 0) {
+ err = 0;
+ }
+
+ /* Accept Connecrtions */
+ p1 = accept(s1, NULL, NULL);
+ if (p1 < 0) {
+ perror("accept s1 failed()\n");
+ return errno;
+ }
+
+ p2 = accept(s2, NULL, NULL);
+ if (p2 < 0) {
+ perror("accept s1 failed()\n");
+ return errno;
+ }
+
+ if (verbose) {
+ printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
+ printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
+ c1, s1, c2, s2);
+ }
+ return 0;
+}
+
+struct msg_stats {
+ size_t bytes_sent;
+ size_t bytes_recvd;
+ struct timespec start;
+ struct timespec end;
+};
+
+struct sockmap_options {
+ int verbose;
+ bool base;
+ bool sendpage;
+ bool data_test;
+ bool drop_expected;
+ int iov_count;
+ int iov_length;
+ int rate;
+};
+
+static int msg_loop_sendpage(int fd, int iov_length, int cnt,
+ struct msg_stats *s,
+ struct sockmap_options *opt)
+{
+ bool drop = opt->drop_expected;
+ unsigned char k = 0;
+ FILE *file;
+ int i, fp;
+
+ file = fopen(".sendpage_tst.tmp", "w+");
+ if (!file) {
+ perror("create file for sendpage");
+ return 1;
+ }
+ for (i = 0; i < iov_length * cnt; i++, k++)
+ fwrite(&k, sizeof(char), 1, file);
+ fflush(file);
+ fseek(file, 0, SEEK_SET);
+ fclose(file);
+
+ fp = open(".sendpage_tst.tmp", O_RDONLY);
+ if (fp < 0) {
+ perror("reopen file for sendpage");
+ return 1;
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &s->start);
+ for (i = 0; i < cnt; i++) {
+ int sent = sendfile(fd, fp, NULL, iov_length);
+
+ if (!drop && sent < 0) {
+ perror("send loop error:");
+ close(fp);
+ return sent;
+ } else if (drop && sent >= 0) {
+ printf("sendpage loop error expected: %i\n", sent);
+ close(fp);
+ return -EIO;
+ }
+
+ if (sent > 0)
+ s->bytes_sent += sent;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ close(fp);
+ return 0;
+}
+
+static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
+ struct msg_stats *s, bool tx,
+ struct sockmap_options *opt)
+{
+ struct msghdr msg = {0};
+ int err, i, flags = MSG_NOSIGNAL;
+ struct iovec *iov;
+ unsigned char k;
+ bool data_test = opt->data_test;
+ bool drop = opt->drop_expected;
+
+ iov = calloc(iov_count, sizeof(struct iovec));
+ if (!iov)
+ return errno;
+
+ k = 0;
+ for (i = 0; i < iov_count; i++) {
+ unsigned char *d = calloc(iov_length, sizeof(char));
+
+ if (!d) {
+ fprintf(stderr, "iov_count %i/%i OOM\n", i, iov_count);
+ goto out_errno;
+ }
+ iov[i].iov_base = d;
+ iov[i].iov_len = iov_length;
+
+ if (data_test && tx) {
+ int j;
+
+ for (j = 0; j < iov_length; j++)
+ d[j] = k++;
+ }
+ }
+
+ msg.msg_iov = iov;
+ msg.msg_iovlen = iov_count;
+ k = 0;
+
+ if (tx) {
+ clock_gettime(CLOCK_MONOTONIC, &s->start);
+ for (i = 0; i < cnt; i++) {
+ int sent = sendmsg(fd, &msg, flags);
+
+ if (!drop && sent < 0) {
+ perror("send loop error:");
+ goto out_errno;
+ } else if (drop && sent >= 0) {
+ printf("send loop error expected: %i\n", sent);
+ errno = -EIO;
+ goto out_errno;
+ }
+ if (sent > 0)
+ s->bytes_sent += sent;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ } else {
+ int slct, recv, max_fd = fd;
+ int fd_flags = O_NONBLOCK;
+ struct timeval timeout;
+ float total_bytes;
+ int bytes_cnt = 0;
+ int chunk_sz;
+ fd_set w;
+
+ if (opt->sendpage)
+ chunk_sz = iov_length * cnt;
+ else
+ chunk_sz = iov_length * iov_count;
+
+ fcntl(fd, fd_flags);
+ total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
+ err = clock_gettime(CLOCK_MONOTONIC, &s->start);
+ if (err < 0)
+ perror("recv start time: ");
+ while (s->bytes_recvd < total_bytes) {
+ if (txmsg_cork) {
+ timeout.tv_sec = 0;
+ timeout.tv_usec = 300000;
+ } else {
+ timeout.tv_sec = 1;
+ timeout.tv_usec = 0;
+ }
+
+ /* FD sets */
+ FD_ZERO(&w);
+ FD_SET(fd, &w);
+
+ slct = select(max_fd + 1, &w, NULL, NULL, &timeout);
+ if (slct == -1) {
+ perror("select()");
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ goto out_errno;
+ } else if (!slct) {
+ if (opt->verbose)
+ fprintf(stderr, "unexpected timeout\n");
+ errno = -EIO;
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ goto out_errno;
+ }
+
+ recv = recvmsg(fd, &msg, flags);
+ if (recv < 0) {
+ if (errno != EWOULDBLOCK) {
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ perror("recv failed()\n");
+ goto out_errno;
+ }
+ }
+
+ s->bytes_recvd += recv;
+
+ if (data_test) {
+ int j;
+
+ for (i = 0; i < msg.msg_iovlen; i++) {
+ unsigned char *d = iov[i].iov_base;
+
+ for (j = 0;
+ j < iov[i].iov_len && recv; j++) {
+ if (d[j] != k++) {
+ errno = -EIO;
+ fprintf(stderr,
+ "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
+ i, j, d[j], k - 1, d[j+1], k);
+ goto out_errno;
+ }
+ bytes_cnt++;
+ if (bytes_cnt == chunk_sz) {
+ k = 0;
+ bytes_cnt = 0;
+ }
+ recv--;
+ }
+ }
+ }
+ }
+ clock_gettime(CLOCK_MONOTONIC, &s->end);
+ }
+
+ for (i = 0; i < iov_count; i++)
+ free(iov[i].iov_base);
+ free(iov);
+ return 0;
+out_errno:
+ for (i = 0; i < iov_count; i++)
+ free(iov[i].iov_base);
+ free(iov);
+ return errno;
+}
+
+static float giga = 1000000000;
+
+static inline float sentBps(struct msg_stats s)
+{
+ return s.bytes_sent / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static inline float recvdBps(struct msg_stats s)
+{
+ return s.bytes_recvd / (s.end.tv_sec - s.start.tv_sec);
+}
+
+static int sendmsg_test(struct sockmap_options *opt)
+{
+ float sent_Bps = 0, recvd_Bps = 0;
+ int rx_fd, txpid, rxpid, err = 0;
+ struct msg_stats s = {0};
+ int iov_count = opt->iov_count;
+ int iov_buf = opt->iov_length;
+ int rx_status, tx_status;
+ int cnt = opt->rate;
+
+ errno = 0;
+
+ if (opt->base)
+ rx_fd = p1;
+ else
+ rx_fd = p2;
+
+ rxpid = fork();
+ if (rxpid == 0) {
+ if (opt->drop_expected)
+ exit(0);
+
+ if (opt->sendpage)
+ iov_count = 1;
+ err = msg_loop(rx_fd, iov_count, iov_buf,
+ cnt, &s, false, opt);
+ if (err && opt->verbose)
+ fprintf(stderr,
+ "msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
+ iov_count, iov_buf, cnt, err);
+ shutdown(p2, SHUT_RDWR);
+ shutdown(p1, SHUT_RDWR);
+ if (s.end.tv_sec - s.start.tv_sec) {
+ sent_Bps = sentBps(s);
+ recvd_Bps = recvdBps(s);
+ }
+ if (opt->verbose)
+ fprintf(stdout,
+ "rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ if (err && txmsg_cork)
+ err = 0;
+ exit(err ? 1 : 0);
+ } else if (rxpid == -1) {
+ perror("msg_loop_rx: ");
+ return errno;
+ }
+
+ txpid = fork();
+ if (txpid == 0) {
+ if (opt->sendpage)
+ err = msg_loop_sendpage(c1, iov_buf, cnt, &s, opt);
+ else
+ err = msg_loop(c1, iov_count, iov_buf,
+ cnt, &s, true, opt);
+
+ if (err)
+ fprintf(stderr,
+ "msg_loop_tx: iov_count %i iov_buf %i cnt %i err %i\n",
+ iov_count, iov_buf, cnt, err);
+ shutdown(c1, SHUT_RDWR);
+ if (s.end.tv_sec - s.start.tv_sec) {
+ sent_Bps = sentBps(s);
+ recvd_Bps = recvdBps(s);
+ }
+ if (opt->verbose)
+ fprintf(stdout,
+ "tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
+ s.bytes_sent, sent_Bps, sent_Bps/giga,
+ s.bytes_recvd, recvd_Bps, recvd_Bps/giga);
+ exit(err ? 1 : 0);
+ } else if (txpid == -1) {
+ perror("msg_loop_tx: ");
+ return errno;
+ }
+
+ assert(waitpid(rxpid, &rx_status, 0) == rxpid);
+ assert(waitpid(txpid, &tx_status, 0) == txpid);
+ if (WIFEXITED(rx_status)) {
+ err = WEXITSTATUS(rx_status);
+ if (err) {
+ fprintf(stderr, "rx thread exited with err %d. ", err);
+ goto out;
+ }
+ }
+ if (WIFEXITED(tx_status)) {
+ err = WEXITSTATUS(tx_status);
+ if (err)
+ fprintf(stderr, "tx thread exited with err %d. ", err);
+ }
+out:
+ return err;
+}
+
+static int forever_ping_pong(int rate, struct sockmap_options *opt)
+{
+ struct timeval timeout;
+ char buf[1024] = {0};
+ int sc;
+
+ timeout.tv_sec = 10;
+ timeout.tv_usec = 0;
+
+ /* Ping/Pong data from client to server */
+ sc = send(c1, buf, sizeof(buf), 0);
+ if (sc < 0) {
+ perror("send failed()\n");
+ return sc;
+ }
+
+ do {
+ int s, rc, i, max_fd = p2;
+ fd_set w;
+
+ /* FD sets */
+ FD_ZERO(&w);
+ FD_SET(c1, &w);
+ FD_SET(c2, &w);
+ FD_SET(p1, &w);
+ FD_SET(p2, &w);
+
+ s = select(max_fd + 1, &w, NULL, NULL, &timeout);
+ if (s == -1) {
+ perror("select()");
+ break;
+ } else if (!s) {
+ fprintf(stderr, "unexpected timeout\n");
+ break;
+ }
+
+ for (i = 0; i <= max_fd && s > 0; ++i) {
+ if (!FD_ISSET(i, &w))
+ continue;
+
+ s--;
+
+ rc = recv(i, buf, sizeof(buf), 0);
+ if (rc < 0) {
+ if (errno != EWOULDBLOCK) {
+ perror("recv failed()\n");
+ return rc;
+ }
+ }
+
+ if (rc == 0) {
+ close(i);
+ break;
+ }
+
+ sc = send(i, buf, rc, 0);
+ if (sc < 0) {
+ perror("send failed()\n");
+ return sc;
+ }
+ }
+
+ if (rate)
+ sleep(rate);
+
+ if (opt->verbose) {
+ printf(".");
+ fflush(stdout);
+
+ }
+ } while (running);
+
+ return 0;
+}
+
+enum {
+ PING_PONG,
+ SENDMSG,
+ BASE,
+ BASE_SENDPAGE,
+ SENDPAGE,
+};
+
+static int run_options(struct sockmap_options *options, int cg_fd, int test)
+{
+ int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
+
+ /* If base test skip BPF setup */
+ if (test == BASE || test == BASE_SENDPAGE)
+ goto run;
+
+ /* Attach programs to sockmap */
+ err = bpf_prog_attach(prog_fd[0], map_fd[0],
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
+ prog_fd[0], map_fd[0], err, strerror(errno));
+ return err;
+ }
+
+ err = bpf_prog_attach(prog_fd[1], map_fd[0],
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err) {
+ fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+
+ /* Attach to cgroups */
+ err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ if (err) {
+ fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+
+run:
+ err = sockmap_init_sockets(options->verbose);
+ if (err) {
+ fprintf(stderr, "ERROR: test socket failed: %d\n", err);
+ goto out;
+ }
+
+ /* Attach txmsg program to sockmap */
+ if (txmsg_pass)
+ tx_prog_fd = prog_fd[3];
+ else if (txmsg_noisy)
+ tx_prog_fd = prog_fd[4];
+ else if (txmsg_redir)
+ tx_prog_fd = prog_fd[5];
+ else if (txmsg_redir_noisy)
+ tx_prog_fd = prog_fd[6];
+ else if (txmsg_drop)
+ tx_prog_fd = prog_fd[9];
+ /* apply and cork must be last */
+ else if (txmsg_apply)
+ tx_prog_fd = prog_fd[7];
+ else if (txmsg_cork)
+ tx_prog_fd = prog_fd[8];
+ else
+ tx_prog_fd = 0;
+
+ if (tx_prog_fd) {
+ int redir_fd, i = 0;
+
+ err = bpf_prog_attach(tx_prog_fd,
+ map_fd[1], BPF_SK_MSG_VERDICT, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
+ err, strerror(errno));
+ goto out;
+ }
+
+ err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
+ err, strerror(errno));
+ goto out;
+ }
+
+ if (txmsg_redir || txmsg_redir_noisy)
+ redir_fd = c2;
+ else
+ redir_fd = c1;
+
+ err = bpf_map_update_elem(map_fd[2], &i, &redir_fd, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg): %d (%s\n",
+ err, strerror(errno));
+ goto out;
+ }
+
+ if (txmsg_apply) {
+ err = bpf_map_update_elem(map_fd[3],
+ &i, &txmsg_apply, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (apply_bytes): %d (%s\n",
+ err, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (txmsg_cork) {
+ err = bpf_map_update_elem(map_fd[4],
+ &i, &txmsg_cork, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (cork_bytes): %d (%s\n",
+ err, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (txmsg_start) {
+ err = bpf_map_update_elem(map_fd[5],
+ &i, &txmsg_start, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_start): %d (%s)\n",
+ err, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (txmsg_end) {
+ i = 1;
+ err = bpf_map_update_elem(map_fd[5],
+ &i, &txmsg_end, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_end): %d (%s)\n",
+ err, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (txmsg_ingress) {
+ int in = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+ i = 1;
+ err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
+ err, strerror(errno));
+ }
+ err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ i = 2;
+ err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+
+ if (txmsg_skb) {
+ int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
+ p2 : p1;
+ int ingress = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[7],
+ &i, &ingress, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ i = 3;
+ err = bpf_map_update_elem(map_fd[0],
+ &i, &skb_fd, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+ }
+
+ if (txmsg_drop)
+ options->drop_expected = true;
+
+ if (test == PING_PONG)
+ err = forever_ping_pong(options->rate, options);
+ else if (test == SENDMSG) {
+ options->base = false;
+ options->sendpage = false;
+ err = sendmsg_test(options);
+ } else if (test == SENDPAGE) {
+ options->base = false;
+ options->sendpage = true;
+ err = sendmsg_test(options);
+ } else if (test == BASE) {
+ options->base = true;
+ options->sendpage = false;
+ err = sendmsg_test(options);
+ } else if (test == BASE_SENDPAGE) {
+ options->base = true;
+ options->sendpage = true;
+ err = sendmsg_test(options);
+ } else
+ fprintf(stderr, "unknown test\n");
+out:
+ /* Detatch and zero all the maps */
+ bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+ bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
+ bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+ if (tx_prog_fd >= 0)
+ bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
+
+ for (i = 0; i < 8; i++) {
+ key = next_key = 0;
+ bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+ while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
+ bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
+ key = next_key;
+ }
+ }
+
+ close(s1);
+ close(s2);
+ close(p1);
+ close(p2);
+ close(c1);
+ close(c2);
+ return err;
+}
+
+static char *test_to_str(int test)
+{
+ switch (test) {
+ case SENDMSG:
+ return "sendmsg";
+ case SENDPAGE:
+ return "sendpage";
+ }
+ return "unknown";
+}
+
+#define OPTSTRING 60
+static void test_options(char *options)
+{
+ char tstr[OPTSTRING];
+
+ memset(options, 0, OPTSTRING);
+
+ if (txmsg_pass)
+ strncat(options, "pass,", OPTSTRING);
+ if (txmsg_noisy)
+ strncat(options, "pass_noisy,", OPTSTRING);
+ if (txmsg_redir)
+ strncat(options, "redir,", OPTSTRING);
+ if (txmsg_redir_noisy)
+ strncat(options, "redir_noisy,", OPTSTRING);
+ if (txmsg_drop)
+ strncat(options, "drop,", OPTSTRING);
+ if (txmsg_apply) {
+ snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
+ strncat(options, tstr, OPTSTRING);
+ }
+ if (txmsg_cork) {
+ snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
+ strncat(options, tstr, OPTSTRING);
+ }
+ if (txmsg_start) {
+ snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
+ strncat(options, tstr, OPTSTRING);
+ }
+ if (txmsg_end) {
+ snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
+ strncat(options, tstr, OPTSTRING);
+ }
+ if (txmsg_ingress)
+ strncat(options, "ingress,", OPTSTRING);
+ if (txmsg_skb)
+ strncat(options, "skb,", OPTSTRING);
+}
+
+static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
+{
+ char *options = calloc(OPTSTRING, sizeof(char));
+ int err;
+
+ if (test == SENDPAGE)
+ opt->sendpage = true;
+ else
+ opt->sendpage = false;
+
+ if (txmsg_drop)
+ opt->drop_expected = true;
+ else
+ opt->drop_expected = false;
+
+ test_options(options);
+
+ fprintf(stdout,
+ "[TEST %i]: (%i, %i, %i, %s, %s): ",
+ test_cnt, opt->rate, opt->iov_count, opt->iov_length,
+ test_to_str(test), options);
+ fflush(stdout);
+ err = run_options(opt, cgrp, test);
+ fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+ test_cnt++;
+ !err ? passed++ : failed++;
+ free(options);
+ return err;
+}
+
+static int test_exec(int cgrp, struct sockmap_options *opt)
+{
+ int err = __test_exec(cgrp, SENDMSG, opt);
+
+ if (err)
+ goto out;
+
+ err = __test_exec(cgrp, SENDPAGE, opt);
+out:
+ return err;
+}
+
+static int test_loop(int cgrp)
+{
+ struct sockmap_options opt;
+
+ int err, i, l, r;
+
+ opt.verbose = 0;
+ opt.base = false;
+ opt.sendpage = false;
+ opt.data_test = false;
+ opt.drop_expected = false;
+ opt.iov_count = 0;
+ opt.iov_length = 0;
+ opt.rate = 0;
+
+ r = 1;
+ for (i = 1; i < 100; i += 33) {
+ for (l = 1; l < 100; l += 33) {
+ opt.rate = r;
+ opt.iov_count = i;
+ opt.iov_length = l;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+ }
+ }
+ sched_yield();
+out:
+ return err;
+}
+
+static int test_txmsg(int cgrp)
+{
+ int err;
+
+ txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+ txmsg_apply = txmsg_cork = 0;
+ txmsg_ingress = txmsg_skb = 0;
+
+ txmsg_pass = 1;
+ err = test_loop(cgrp);
+ txmsg_pass = 0;
+ if (err)
+ goto out;
+
+ txmsg_redir = 1;
+ err = test_loop(cgrp);
+ txmsg_redir = 0;
+ if (err)
+ goto out;
+
+ txmsg_drop = 1;
+ err = test_loop(cgrp);
+ txmsg_drop = 0;
+ if (err)
+ goto out;
+
+ txmsg_redir = 1;
+ txmsg_ingress = 1;
+ err = test_loop(cgrp);
+ txmsg_redir = 0;
+ txmsg_ingress = 0;
+ if (err)
+ goto out;
+out:
+ txmsg_pass = 0;
+ txmsg_redir = 0;
+ txmsg_drop = 0;
+ return err;
+}
+
+static int test_send(struct sockmap_options *opt, int cgrp)
+{
+ int err;
+
+ opt->iov_length = 1;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 1;
+ opt->iov_count = 1024;
+ opt->rate = 1;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 1024;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 1;
+ opt->iov_count = 1;
+ opt->rate = 512;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->iov_length = 256;
+ opt->iov_count = 1024;
+ opt->rate = 2;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+
+ opt->rate = 100;
+ opt->iov_count = 1;
+ opt->iov_length = 5;
+ err = test_exec(cgrp, opt);
+ if (err)
+ goto out;
+out:
+ sched_yield();
+ return err;
+}
+
+static int test_mixed(int cgrp)
+{
+ struct sockmap_options opt = {0};
+ int err;
+
+ txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+ txmsg_apply = txmsg_cork = 0;
+ txmsg_start = txmsg_end = 0;
+ /* Test small and large iov_count values with pass/redir/apply/cork */
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 0;
+ txmsg_cork = 1;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 1;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 0;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1024;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_cork = 4096;
+ txmsg_apply = 4096;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 0;
+ txmsg_cork = 1;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 0;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1024;
+ txmsg_cork = 1024;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_cork = 4096;
+ txmsg_apply = 4096;
+ err = test_send(&opt, cgrp);
+ if (err)
+ goto out;
+out:
+ return err;
+}
+
+static int test_start_end(int cgrp)
+{
+ struct sockmap_options opt = {0};
+ int err, i;
+
+ /* Test basic start/end with lots of iov_count and iov_lengths */
+ txmsg_start = 1;
+ txmsg_end = 2;
+ err = test_txmsg(cgrp);
+ if (err)
+ goto out;
+
+ /* Test start/end with cork */
+ opt.rate = 16;
+ opt.iov_count = 1;
+ opt.iov_length = 100;
+ txmsg_cork = 1600;
+
+ for (i = 99; i <= 1600; i += 500) {
+ txmsg_start = 0;
+ txmsg_end = i;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+ }
+
+ /* Test start/end with cork but pull data in middle */
+ for (i = 199; i <= 1600; i += 500) {
+ txmsg_start = 100;
+ txmsg_end = i;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+ }
+
+ /* Test start/end with cork pulling last sg entry */
+ txmsg_start = 1500;
+ txmsg_end = 1600;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end pull of single byte in last page */
+ txmsg_start = 1111;
+ txmsg_end = 1112;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end with end < start */
+ txmsg_start = 1111;
+ txmsg_end = 0;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end with end > data */
+ txmsg_start = 0;
+ txmsg_end = 1601;
+ err = test_exec(cgrp, &opt);
+ if (err)
+ goto out;
+
+ /* Test start/end with start > data */
+ txmsg_start = 1601;
+ txmsg_end = 1600;
+ err = test_exec(cgrp, &opt);
+
+out:
+ txmsg_start = 0;
+ txmsg_end = 0;
+ sched_yield();
+ return err;
+}
+
+char *map_names[] = {
+ "sock_map",
+ "sock_map_txmsg",
+ "sock_map_redir",
+ "sock_apply_bytes",
+ "sock_cork_bytes",
+ "sock_pull_bytes",
+ "sock_redir_flags",
+ "sock_skb_opts",
+};
+
+int prog_attach_type[] = {
+ BPF_SK_SKB_STREAM_PARSER,
+ BPF_SK_SKB_STREAM_VERDICT,
+ BPF_CGROUP_SOCK_OPS,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+ BPF_SK_MSG_VERDICT,
+};
+
+int prog_type[] = {
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_SOCK_OPS,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+ BPF_PROG_TYPE_SK_MSG,
+};
+
+static int populate_progs(char *bpf_file)
+{
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int i = 0;
+ long err;
+
+ obj = bpf_object__open(bpf_file);
+ err = libbpf_get_error(obj);
+ if (err) {
+ char err_buf[256];
+
+ libbpf_strerror(err, err_buf, sizeof(err_buf));
+ printf("Unable to load eBPF objects in file '%s' : %s\n",
+ bpf_file, err_buf);
+ return -1;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ bpf_program__set_type(prog, prog_type[i]);
+ bpf_program__set_expected_attach_type(prog,
+ prog_attach_type[i]);
+ i++;
+ }
+
+ i = bpf_object__load(obj);
+ i = 0;
+ bpf_object__for_each_program(prog, obj) {
+ prog_fd[i] = bpf_program__fd(prog);
+ i++;
+ }
+
+ for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
+ maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
+ map_fd[i] = bpf_map__fd(maps[i]);
+ if (map_fd[i] < 0) {
+ fprintf(stderr, "load_bpf_file: (%i) %s\n",
+ map_fd[i], strerror(errno));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int __test_suite(char *bpf_file)
+{
+ int cg_fd, err;
+
+ err = populate_progs(bpf_file);
+ if (err < 0) {
+ fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
+ return err;
+ }
+
+ if (setup_cgroup_environment()) {
+ fprintf(stderr, "ERROR: cgroup env failed\n");
+ return -EINVAL;
+ }
+
+ cg_fd = create_and_get_cgroup(CG_PATH);
+ if (cg_fd < 0) {
+ fprintf(stderr,
+ "ERROR: (%i) open cg path failed: %s\n",
+ cg_fd, optarg);
+ return cg_fd;
+ }
+
+ if (join_cgroup(CG_PATH)) {
+ fprintf(stderr, "ERROR: failed to join cgroup\n");
+ return -EINVAL;
+ }
+
+ /* Tests basic commands and APIs with range of iov values */
+ txmsg_start = txmsg_end = 0;
+ err = test_txmsg(cg_fd);
+ if (err)
+ goto out;
+
+ /* Tests interesting combinations of APIs used together */
+ err = test_mixed(cg_fd);
+ if (err)
+ goto out;
+
+ /* Tests pull_data API using start/end API */
+ err = test_start_end(cg_fd);
+ if (err)
+ goto out;
+
+out:
+ printf("Summary: %i PASSED %i FAILED\n", passed, failed);
+ cleanup_cgroup_environment();
+ close(cg_fd);
+ return err;
+}
+
+static int test_suite(void)
+{
+ int err;
+
+ err = __test_suite(BPF_SOCKMAP_FILENAME);
+ if (err)
+ goto out;
+ err = __test_suite(BPF_SOCKHASH_FILENAME);
+out:
+ return err;
+}
+
+int main(int argc, char **argv)
+{
+ int iov_count = 1, length = 1024, rate = 1;
+ struct sockmap_options options = {0};
+ int opt, longindex, err, cg_fd = 0;
+ char *bpf_file = BPF_SOCKMAP_FILENAME;
+ int test = PING_PONG;
+
+ if (argc < 2)
+ return test_suite();
+
+ while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:",
+ long_options, &longindex)) != -1) {
+ switch (opt) {
+ case 's':
+ txmsg_start = atoi(optarg);
+ break;
+ case 'e':
+ txmsg_end = atoi(optarg);
+ break;
+ case 'a':
+ txmsg_apply = atoi(optarg);
+ break;
+ case 'k':
+ txmsg_cork = atoi(optarg);
+ break;
+ case 'c':
+ cg_fd = open(optarg, O_DIRECTORY, O_RDONLY);
+ if (cg_fd < 0) {
+ fprintf(stderr,
+ "ERROR: (%i) open cg path failed: %s\n",
+ cg_fd, optarg);
+ return cg_fd;
+ }
+ break;
+ case 'r':
+ rate = atoi(optarg);
+ break;
+ case 'v':
+ options.verbose = 1;
+ break;
+ case 'i':
+ iov_count = atoi(optarg);
+ break;
+ case 'l':
+ length = atoi(optarg);
+ break;
+ case 'd':
+ options.data_test = true;
+ break;
+ case 't':
+ if (strcmp(optarg, "ping") == 0) {
+ test = PING_PONG;
+ } else if (strcmp(optarg, "sendmsg") == 0) {
+ test = SENDMSG;
+ } else if (strcmp(optarg, "base") == 0) {
+ test = BASE;
+ } else if (strcmp(optarg, "base_sendpage") == 0) {
+ test = BASE_SENDPAGE;
+ } else if (strcmp(optarg, "sendpage") == 0) {
+ test = SENDPAGE;
+ } else {
+ usage(argv);
+ return -1;
+ }
+ break;
+ case 0:
+ break;
+ case 'h':
+ default:
+ usage(argv);
+ return -1;
+ }
+ }
+
+ if (!cg_fd) {
+ fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
+ argv[0]);
+ return -1;
+ }
+
+ err = populate_progs(bpf_file);
+ if (err) {
+ fprintf(stderr, "populate program: (%s) %s\n",
+ bpf_file, strerror(errno));
+ return 1;
+ }
+ running = 1;
+
+ /* catch SIGINT */
+ signal(SIGINT, running_handler);
+
+ options.iov_count = iov_count;
+ options.iov_length = length;
+ options.rate = rate;
+
+ err = run_options(&options, cg_fd, test);
+ close(cg_fd);
+ return err;
+}
+
+void running_handler(int a)
+{
+ running = 0;
+}
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.c b/tools/testing/selftests/bpf/test_sockmap_kern.c
new file mode 100644
index 000000000..677b2ed1c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
+#define SOCKMAP
+#define TEST_MAP_TYPE BPF_MAP_TYPE_SOCKMAP
+#include "./test_sockmap_kern.h"
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
new file mode 100644
index 000000000..8e8e41780
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_sockmap_kern.h
@@ -0,0 +1,363 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+/* Sockmap sample program connects a client and a backend together
+ * using cgroups.
+ *
+ * client:X <---> frontend:80 client:X <---> backend:80
+ *
+ * For simplicity we hard code values here and bind 1:1. The hard
+ * coded values are part of the setup in sockmap.sh script that
+ * is associated with this BPF program.
+ *
+ * The bpf_printk is verbose and prints information as connections
+ * are established and verdicts are decided.
+ */
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+struct bpf_map_def SEC("maps") sock_map = {
+ .type = TEST_MAP_TYPE,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_txmsg = {
+ .type = TEST_MAP_TYPE,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_map_redir = {
+ .type = TEST_MAP_TYPE,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 20,
+};
+
+struct bpf_map_def SEC("maps") sock_apply_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_cork_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_pull_bytes = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 2
+};
+
+struct bpf_map_def SEC("maps") sock_redir_flags = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+struct bpf_map_def SEC("maps") sock_skb_opts = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .max_entries = 1
+};
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+ __u32 lport = skb->local_port;
+ __u32 rport = skb->remote_port;
+ int len, *f, ret, zero = 0;
+ __u64 flags = 0;
+
+ if (lport == 10000)
+ ret = 10;
+ else
+ ret = 1;
+
+ len = (__u32)skb->data_end - (__u32)skb->data;
+ f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
+ if (f && *f) {
+ ret = 3;
+ flags = *f;
+ }
+
+ bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
+ len, flags);
+#ifdef SOCKMAP
+ return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
+#else
+ return bpf_sk_redirect_hash(skb, &sock_map, &ret, flags);
+#endif
+
+}
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+ __u32 lport, rport;
+ int op, err = 0, index, key, ret;
+
+
+ op = (int) skops->op;
+
+ switch (op) {
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ lport = skops->local_port;
+ rport = skops->remote_port;
+
+ if (lport == 10000) {
+ ret = 1;
+#ifdef SOCKMAP
+ err = bpf_sock_map_update(skops, &sock_map, &ret,
+ BPF_NOEXIST);
+#else
+ err = bpf_sock_hash_update(skops, &sock_map, &ret,
+ BPF_NOEXIST);
+#endif
+ bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
+ lport, bpf_ntohl(rport), err);
+ }
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ lport = skops->local_port;
+ rport = skops->remote_port;
+
+ if (bpf_ntohl(rport) == 10001) {
+ ret = 10;
+#ifdef SOCKMAP
+ err = bpf_sock_map_update(skops, &sock_map, &ret,
+ BPF_NOEXIST);
+#else
+ err = bpf_sock_hash_update(skops, &sock_map, &ret,
+ BPF_NOEXIST);
+#endif
+ bpf_printk("active(%i -> %i) map ctx update err: %d\n",
+ lport, bpf_ntohl(rport), err);
+ }
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1;
+ int *start, *end;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+ return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+ int err1 = -1, err2 = -1, zero = 0, one = 1;
+ int *bytes, *start, *end, len1, len2;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ err1 = bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ err2 = bpf_msg_cork_bytes(msg, *bytes);
+ len1 = (__u64)msg->data_end - (__u64)msg->data;
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end) {
+ int err;
+
+ bpf_printk("sk_msg2: pull(%i:%i)\n",
+ start ? *start : 0, end ? *end : 0);
+ err = bpf_msg_pull_data(msg, *start, *end, 0);
+ if (err)
+ bpf_printk("sk_msg2: pull_data err %i\n",
+ err);
+ len2 = (__u64)msg->data_end - (__u64)msg->data;
+ bpf_printk("sk_msg2: length update %i->%i\n",
+ len1, len2);
+ }
+ bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+ len1, err1, err2);
+ return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1, key = 0;
+ int *start, *end, *f;
+ __u64 flags = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+ f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+ if (f && *f) {
+ key = 2;
+ flags = *f;
+ }
+#ifdef SOCKMAP
+ return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+#else
+ return bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
+#endif
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+ int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
+ int *f, *bytes, *start, *end, len1, len2;
+ __u64 flags = 0;
+
+ int err;
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ err1 = bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ err2 = bpf_msg_cork_bytes(msg, *bytes);
+ len1 = (__u64)msg->data_end - (__u64)msg->data;
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end) {
+
+ bpf_printk("sk_msg2: pull(%i:%i)\n",
+ start ? *start : 0, end ? *end : 0);
+ err = bpf_msg_pull_data(msg, *start, *end, 0);
+ if (err)
+ bpf_printk("sk_msg2: pull_data err %i\n",
+ err);
+ len2 = (__u64)msg->data_end - (__u64)msg->data;
+ bpf_printk("sk_msg2: length update %i->%i\n",
+ len1, len2);
+ }
+ f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+ if (f && *f) {
+ key = 2;
+ flags = *f;
+ }
+ bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
+ len1, flags, err1 ? err1 : err2);
+#ifdef SOCKMAP
+ err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+#else
+ err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
+#endif
+ bpf_printk("sk_msg3: err %i\n", err);
+ return err;
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+ void *data_end = (void *)(long) msg->data_end;
+ void *data = (void *)(long) msg->data;
+ int ret = 0, *bytes, zero = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes) {
+ ret = bpf_msg_apply_bytes(msg, *bytes);
+ if (ret)
+ return SK_DROP;
+ } else {
+ return SK_DROP;
+ }
+ return SK_PASS;
+}
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+ void *data_end = (void *)(long) msg->data_end;
+ void *data = (void *)(long) msg->data;
+ int ret = 0, *bytes, zero = 0;
+
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes) {
+ if (((__u64)data_end - (__u64)data) >= *bytes)
+ return SK_PASS;
+ ret = bpf_msg_cork_bytes(msg, *bytes);
+ if (ret)
+ return SK_DROP;
+ }
+ return SK_PASS;
+}
+
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+ int *bytes, zero = 0, one = 1;
+ int *start, *end;
+
+ bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+ if (bytes)
+ bpf_msg_apply_bytes(msg, *bytes);
+ bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+ if (bytes)
+ bpf_msg_cork_bytes(msg, *bytes);
+ start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
+ end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
+ if (start && end)
+ bpf_msg_pull_data(msg, *start, *end, 0);
+
+ return SK_DROP;
+}
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
new file mode 100644
index 000000000..d86c281e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stacktrace_build_id.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+ .type = BPF_MAP_TYPE_STACK_TRACE,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct bpf_stack_build_id)
+ * PERF_MAX_STACK_DEPTH,
+ .max_entries = 128,
+ .map_flags = BPF_F_STACK_BUILD_ID,
+};
+
+struct bpf_map_def SEC("maps") stack_amap = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct bpf_stack_build_id)
+ * PERF_MAX_STACK_DEPTH,
+ .max_entries = 128,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
+struct random_urandom_args {
+ unsigned long long pad;
+ int got_bits;
+ int pool_left;
+ int input_left;
+};
+
+SEC("tracepoint/random/urandom_read")
+int oncpu(struct random_urandom_args *args)
+{
+ __u32 max_len = sizeof(struct bpf_stack_build_id)
+ * PERF_MAX_STACK_DEPTH;
+ __u32 key = 0, val = 0, *value_p;
+ void *stack_p;
+
+ value_p = bpf_map_lookup_elem(&control_map, &key);
+ if (value_p && *value_p)
+ return 0; /* skip if non-zero *value_p */
+
+ /* The size of stackmap and stackid_hmap should be the same */
+ key = bpf_get_stackid(args, &stackmap, BPF_F_USER_STACK);
+ if ((int)key >= 0) {
+ bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+ stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+ if (stack_p)
+ bpf_get_stack(args, stack_p, max_len,
+ BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_stacktrace_map.c b/tools/testing/selftests/bpf/test_stacktrace_map.c
new file mode 100644
index 000000000..af111af7c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_stacktrace_map.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+struct bpf_map_def SEC("maps") control_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 1,
+};
+
+struct bpf_map_def SEC("maps") stackid_hmap = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stackmap = {
+ .type = BPF_MAP_TYPE_STACK_TRACE,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
+ .max_entries = 16384,
+};
+
+struct bpf_map_def SEC("maps") stack_amap = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64) * PERF_MAX_STACK_DEPTH,
+ .max_entries = 16384,
+};
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+ unsigned long long pad;
+ char prev_comm[16];
+ int prev_pid;
+ int prev_prio;
+ long long prev_state;
+ char next_comm[16];
+ int next_pid;
+ int next_prio;
+};
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+ __u32 max_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
+ __u32 key = 0, val = 0, *value_p;
+ void *stack_p;
+
+ value_p = bpf_map_lookup_elem(&control_map, &key);
+ if (value_p && *value_p)
+ return 0; /* skip if non-zero *value_p */
+
+ /* The size of stackmap and stackid_hmap should be the same */
+ key = bpf_get_stackid(ctx, &stackmap, 0);
+ if ((int)key >= 0) {
+ bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+ stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+ if (stack_p)
+ bpf_get_stack(ctx, stack_p, max_len, 0);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
new file mode 100644
index 000000000..6272c784c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <time.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sched.h>
+#include <limits.h>
+#include <assert.h>
+
+#include <sys/socket.h>
+
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_alg.h>
+
+#include <bpf/bpf.h>
+
+#include "../../../include/linux/filter.h"
+#include "bpf_rlimit.h"
+
+static struct bpf_insn prog[BPF_MAXINSNS];
+
+static void bpf_gen_imm_prog(unsigned int insns, int fd_map)
+{
+ int i;
+
+ srand(time(NULL));
+ for (i = 0; i < insns; i++)
+ prog[i] = BPF_ALU64_IMM(BPF_MOV, i % BPF_REG_10, rand());
+ prog[i - 1] = BPF_EXIT_INSN();
+}
+
+static void bpf_gen_map_prog(unsigned int insns, int fd_map)
+{
+ int i, j = 0;
+
+ for (i = 0; i + 1 < insns; i += 2) {
+ struct bpf_insn tmp[] = {
+ BPF_LD_MAP_FD(j++ % BPF_REG_10, fd_map)
+ };
+
+ memcpy(&prog[i], tmp, sizeof(tmp));
+ }
+ if (insns % 2 == 0)
+ prog[insns - 2] = BPF_ALU64_IMM(BPF_MOV, i % BPF_REG_10, 42);
+ prog[insns - 1] = BPF_EXIT_INSN();
+}
+
+static int bpf_try_load_prog(int insns, int fd_map,
+ void (*bpf_filler)(unsigned int insns,
+ int fd_map))
+{
+ int fd_prog;
+
+ bpf_filler(insns, fd_map);
+ fd_prog = bpf_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0,
+ NULL, 0);
+ assert(fd_prog > 0);
+ if (fd_map > 0)
+ bpf_filler(insns, 0);
+ return fd_prog;
+}
+
+static int __hex2bin(char ch)
+{
+ if ((ch >= '0') && (ch <= '9'))
+ return ch - '0';
+ ch = tolower(ch);
+ if ((ch >= 'a') && (ch <= 'f'))
+ return ch - 'a' + 10;
+ return -1;
+}
+
+static int hex2bin(uint8_t *dst, const char *src, size_t count)
+{
+ while (count--) {
+ int hi = __hex2bin(*src++);
+ int lo = __hex2bin(*src++);
+
+ if ((hi < 0) || (lo < 0))
+ return -1;
+ *dst++ = (hi << 4) | lo;
+ }
+ return 0;
+}
+
+static void tag_from_fdinfo(int fd_prog, uint8_t *tag, uint32_t len)
+{
+ const int prefix_len = sizeof("prog_tag:\t") - 1;
+ char buff[256];
+ int ret = -1;
+ FILE *fp;
+
+ snprintf(buff, sizeof(buff), "/proc/%d/fdinfo/%d", getpid(),
+ fd_prog);
+ fp = fopen(buff, "r");
+ assert(fp);
+
+ while (fgets(buff, sizeof(buff), fp)) {
+ if (strncmp(buff, "prog_tag:\t", prefix_len))
+ continue;
+ ret = hex2bin(tag, buff + prefix_len, len);
+ break;
+ }
+
+ fclose(fp);
+ assert(!ret);
+}
+
+static void tag_from_alg(int insns, uint8_t *tag, uint32_t len)
+{
+ static const struct sockaddr_alg alg = {
+ .salg_family = AF_ALG,
+ .salg_type = "hash",
+ .salg_name = "sha1",
+ };
+ int fd_base, fd_alg, ret;
+ ssize_t size;
+
+ fd_base = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ assert(fd_base > 0);
+
+ ret = bind(fd_base, (struct sockaddr *)&alg, sizeof(alg));
+ assert(!ret);
+
+ fd_alg = accept(fd_base, NULL, 0);
+ assert(fd_alg > 0);
+
+ insns *= sizeof(struct bpf_insn);
+ size = write(fd_alg, prog, insns);
+ assert(size == insns);
+
+ size = read(fd_alg, tag, len);
+ assert(size == len);
+
+ close(fd_alg);
+ close(fd_base);
+}
+
+static void tag_dump(const char *prefix, uint8_t *tag, uint32_t len)
+{
+ int i;
+
+ printf("%s", prefix);
+ for (i = 0; i < len; i++)
+ printf("%02x", tag[i]);
+ printf("\n");
+}
+
+static void tag_exit_report(int insns, int fd_map, uint8_t *ftag,
+ uint8_t *atag, uint32_t len)
+{
+ printf("Program tag mismatch for %d insns%s!\n", insns,
+ fd_map < 0 ? "" : " with map");
+
+ tag_dump(" fdinfo result: ", ftag, len);
+ tag_dump(" af_alg result: ", atag, len);
+ exit(1);
+}
+
+static void do_test(uint32_t *tests, int start_insns, int fd_map,
+ void (*bpf_filler)(unsigned int insns, int fd))
+{
+ int i, fd_prog;
+
+ for (i = start_insns; i <= BPF_MAXINSNS; i++) {
+ uint8_t ftag[8], atag[sizeof(ftag)];
+
+ fd_prog = bpf_try_load_prog(i, fd_map, bpf_filler);
+ tag_from_fdinfo(fd_prog, ftag, sizeof(ftag));
+ tag_from_alg(i, atag, sizeof(atag));
+ if (memcmp(ftag, atag, sizeof(ftag)))
+ tag_exit_report(i, fd_map, ftag, atag, sizeof(ftag));
+
+ close(fd_prog);
+ sched_yield();
+ (*tests)++;
+ }
+}
+
+int main(void)
+{
+ uint32_t tests = 0;
+ int i, fd_map;
+
+ fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int),
+ sizeof(int), 1, BPF_F_NO_PREALLOC);
+ assert(fd_map > 0);
+
+ for (i = 0; i < 5; i++) {
+ do_test(&tests, 2, -1, bpf_gen_imm_prog);
+ do_test(&tests, 3, fd_map, bpf_gen_map_prog);
+ }
+
+ printf("test_tag: OK (%u tests)\n", tests);
+ close(fd_map);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_tcp_estats.c b/tools/testing/selftests/bpf/test_tcp_estats.c
new file mode 100644
index 000000000..bee3bbecc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_estats.c
@@ -0,0 +1,258 @@
+/* Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+/* This program shows clang/llvm is able to generate code pattern
+ * like:
+ * _tcp_send_active_reset:
+ * 0: bf 16 00 00 00 00 00 00 r6 = r1
+ * ......
+ * 335: b7 01 00 00 0f 00 00 00 r1 = 15
+ * 336: 05 00 48 00 00 00 00 00 goto 72
+ *
+ * LBB0_3:
+ * 337: b7 01 00 00 01 00 00 00 r1 = 1
+ * 338: 63 1a d0 ff 00 00 00 00 *(u32 *)(r10 - 48) = r1
+ * 408: b7 01 00 00 03 00 00 00 r1 = 3
+ *
+ * LBB0_4:
+ * 409: 71 a2 fe ff 00 00 00 00 r2 = *(u8 *)(r10 - 2)
+ * 410: bf a7 00 00 00 00 00 00 r7 = r10
+ * 411: 07 07 00 00 b8 ff ff ff r7 += -72
+ * 412: bf 73 00 00 00 00 00 00 r3 = r7
+ * 413: 0f 13 00 00 00 00 00 00 r3 += r1
+ * 414: 73 23 2d 00 00 00 00 00 *(u8 *)(r3 + 45) = r2
+ *
+ * From the above code snippet, the code generated by the compiler
+ * is reasonable. The "r1" is assigned to different values in basic
+ * blocks "_tcp_send_active_reset" and "LBB0_3", and used in "LBB0_4".
+ * The verifier should be able to handle such code patterns.
+ */
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/ipv6.h>
+#include <linux/version.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+
+#define _(P) ({typeof(P) val = 0; bpf_probe_read(&val, sizeof(val), &P); val;})
+#define TCP_ESTATS_MAGIC 0xBAADBEEF
+
+/* This test case needs "sock" and "pt_regs" data structure.
+ * Recursively, "sock" needs "sock_common" and "inet_sock".
+ * However, this is a unit test case only for
+ * verifier purpose without bpf program execution.
+ * We can safely mock much simpler data structures, basically
+ * only taking the necessary fields from kernel headers.
+ */
+typedef __u32 __bitwise __portpair;
+typedef __u64 __bitwise __addrpair;
+
+struct sock_common {
+ unsigned short skc_family;
+ union {
+ __addrpair skc_addrpair;
+ struct {
+ __be32 skc_daddr;
+ __be32 skc_rcv_saddr;
+ };
+ };
+ union {
+ __portpair skc_portpair;
+ struct {
+ __be16 skc_dport;
+ __u16 skc_num;
+ };
+ };
+ struct in6_addr skc_v6_daddr;
+ struct in6_addr skc_v6_rcv_saddr;
+};
+
+struct sock {
+ struct sock_common __sk_common;
+#define sk_family __sk_common.skc_family
+#define sk_v6_daddr __sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
+};
+
+struct inet_sock {
+ struct sock sk;
+#define inet_daddr sk.__sk_common.skc_daddr
+#define inet_dport sk.__sk_common.skc_dport
+ __be32 inet_saddr;
+ __be16 inet_sport;
+};
+
+struct pt_regs {
+ long di;
+};
+
+static inline struct inet_sock *inet_sk(const struct sock *sk)
+{
+ return (struct inet_sock *)sk;
+}
+
+/* Define various data structures for state recording.
+ * Some fields are not used due to test simplification.
+ */
+enum tcp_estats_addrtype {
+ TCP_ESTATS_ADDRTYPE_IPV4 = 1,
+ TCP_ESTATS_ADDRTYPE_IPV6 = 2
+};
+
+enum tcp_estats_event_type {
+ TCP_ESTATS_ESTABLISH,
+ TCP_ESTATS_PERIODIC,
+ TCP_ESTATS_TIMEOUT,
+ TCP_ESTATS_RETRANSMIT_TIMEOUT,
+ TCP_ESTATS_RETRANSMIT_OTHER,
+ TCP_ESTATS_SYN_RETRANSMIT,
+ TCP_ESTATS_SYNACK_RETRANSMIT,
+ TCP_ESTATS_TERM,
+ TCP_ESTATS_TX_RESET,
+ TCP_ESTATS_RX_RESET,
+ TCP_ESTATS_WRITE_TIMEOUT,
+ TCP_ESTATS_CONN_TIMEOUT,
+ TCP_ESTATS_ACK_LATENCY,
+ TCP_ESTATS_NEVENTS,
+};
+
+struct tcp_estats_event {
+ int pid;
+ int cpu;
+ unsigned long ts;
+ unsigned int magic;
+ enum tcp_estats_event_type event_type;
+};
+
+/* The below data structure is packed in order for
+ * llvm compiler to generate expected code.
+ */
+struct tcp_estats_conn_id {
+ unsigned int localaddressType;
+ struct {
+ unsigned char data[16];
+ } localaddress;
+ struct {
+ unsigned char data[16];
+ } remaddress;
+ unsigned short localport;
+ unsigned short remport;
+} __attribute__((__packed__));
+
+struct tcp_estats_basic_event {
+ struct tcp_estats_event event;
+ struct tcp_estats_conn_id conn_id;
+};
+
+struct bpf_map_def SEC("maps") ev_record_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct tcp_estats_basic_event),
+ .max_entries = 1024,
+};
+
+struct dummy_tracepoint_args {
+ unsigned long long pad;
+ struct sock *sock;
+};
+
+static __always_inline void tcp_estats_ev_init(struct tcp_estats_event *event,
+ enum tcp_estats_event_type type)
+{
+ event->magic = TCP_ESTATS_MAGIC;
+ event->ts = bpf_ktime_get_ns();
+ event->event_type = type;
+}
+
+static __always_inline void unaligned_u32_set(unsigned char *to, __u8 *from)
+{
+ to[0] = _(from[0]);
+ to[1] = _(from[1]);
+ to[2] = _(from[2]);
+ to[3] = _(from[3]);
+}
+
+static __always_inline void conn_id_ipv4_init(struct tcp_estats_conn_id *conn_id,
+ __be32 *saddr, __be32 *daddr)
+{
+ conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV4;
+
+ unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
+ unaligned_u32_set(conn_id->remaddress.data, (__u8 *)daddr);
+}
+
+static __always_inline void conn_id_ipv6_init(struct tcp_estats_conn_id *conn_id,
+ __be32 *saddr, __be32 *daddr)
+{
+ conn_id->localaddressType = TCP_ESTATS_ADDRTYPE_IPV6;
+
+ unaligned_u32_set(conn_id->localaddress.data, (__u8 *)saddr);
+ unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32),
+ (__u8 *)(saddr + 1));
+ unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 2,
+ (__u8 *)(saddr + 2));
+ unaligned_u32_set(conn_id->localaddress.data + sizeof(__u32) * 3,
+ (__u8 *)(saddr + 3));
+
+ unaligned_u32_set(conn_id->remaddress.data,
+ (__u8 *)(daddr));
+ unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32),
+ (__u8 *)(daddr + 1));
+ unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 2,
+ (__u8 *)(daddr + 2));
+ unaligned_u32_set(conn_id->remaddress.data + sizeof(__u32) * 3,
+ (__u8 *)(daddr + 3));
+}
+
+static __always_inline void tcp_estats_conn_id_init(struct tcp_estats_conn_id *conn_id,
+ struct sock *sk)
+{
+ conn_id->localport = _(inet_sk(sk)->inet_sport);
+ conn_id->remport = _(inet_sk(sk)->inet_dport);
+
+ if (_(sk->sk_family) == AF_INET6)
+ conn_id_ipv6_init(conn_id,
+ sk->sk_v6_rcv_saddr.s6_addr32,
+ sk->sk_v6_daddr.s6_addr32);
+ else
+ conn_id_ipv4_init(conn_id,
+ &inet_sk(sk)->inet_saddr,
+ &inet_sk(sk)->inet_daddr);
+}
+
+static __always_inline void tcp_estats_init(struct sock *sk,
+ struct tcp_estats_event *event,
+ struct tcp_estats_conn_id *conn_id,
+ enum tcp_estats_event_type type)
+{
+ tcp_estats_ev_init(event, type);
+ tcp_estats_conn_id_init(conn_id, sk);
+}
+
+static __always_inline void send_basic_event(struct sock *sk,
+ enum tcp_estats_event_type type)
+{
+ struct tcp_estats_basic_event ev;
+ __u32 key = bpf_get_prandom_u32();
+
+ memset(&ev, 0, sizeof(ev));
+ tcp_estats_init(sk, &ev.event, &ev.conn_id, type);
+ bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY);
+}
+
+SEC("dummy_tracepoint")
+int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+{
+ if (!arg->sock)
+ return 0;
+
+ send_basic_event(arg->sock, TCP_ESTATS_TX_RESET);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
new file mode 100644
index 000000000..7bcfa6207
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef _TEST_TCPBPF_H
+#define _TEST_TCPBPF_H
+
+struct tcpbpf_globals {
+ __u32 event_map;
+ __u32 total_retrans;
+ __u32 data_segs_in;
+ __u32 data_segs_out;
+ __u32 bad_cb_test_rv;
+ __u32 good_cb_test_rv;
+ __u64 bytes_received;
+ __u64 bytes_acked;
+ __u32 num_listen;
+};
+#endif
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
new file mode 100644
index 000000000..4b7fd540c
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_tcpbpf.h"
+
+struct bpf_map_def SEC("maps") global_map = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct tcpbpf_globals),
+ .max_entries = 2,
+};
+
+static inline void update_event_map(int event)
+{
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (gp == NULL) {
+ struct tcpbpf_globals g = {0};
+
+ g.event_map |= (1 << event);
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ } else {
+ g = *gp;
+ g.event_map |= (1 << event);
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+}
+
+int _version SEC("version") = 1;
+
+SEC("sockops")
+int bpf_testcb(struct bpf_sock_ops *skops)
+{
+ int rv = -1;
+ int bad_call_rv = 0;
+ int good_call_rv = 0;
+ int op;
+ int v = 0;
+
+ op = (int) skops->op;
+
+ update_event_map(op);
+
+ switch (op) {
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ /* Test failure to set largest cb flag (assumes not defined) */
+ bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
+ /* Set callback */
+ good_call_rv = bpf_sock_ops_cb_flags_set(skops,
+ BPF_SOCK_OPS_STATE_CB_FLAG);
+ /* Update results */
+ {
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (!gp)
+ break;
+ g = *gp;
+ g.bad_cb_test_rv = bad_call_rv;
+ g.good_cb_test_rv = good_call_rv;
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+ break;
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ skops->sk_txhash = 0x12345f;
+ v = 0xff;
+ rv = bpf_setsockopt(skops, SOL_IPV6, IPV6_TCLASS, &v,
+ sizeof(v));
+ break;
+ case BPF_SOCK_OPS_RTO_CB:
+ break;
+ case BPF_SOCK_OPS_RETRANS_CB:
+ break;
+ case BPF_SOCK_OPS_STATE_CB:
+ if (skops->args[1] == BPF_TCP_CLOSE) {
+ __u32 key = 0;
+ struct tcpbpf_globals g, *gp;
+
+ gp = bpf_map_lookup_elem(&global_map, &key);
+ if (!gp)
+ break;
+ g = *gp;
+ if (skops->args[0] == BPF_TCP_LISTEN) {
+ g.num_listen++;
+ } else {
+ g.total_retrans = skops->total_retrans;
+ g.data_segs_in = skops->data_segs_in;
+ g.data_segs_out = skops->data_segs_out;
+ g.bytes_received = skops->bytes_received;
+ g.bytes_acked = skops->bytes_acked;
+ }
+ bpf_map_update_elem(&global_map, &key, &g,
+ BPF_ANY);
+ }
+ break;
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ bpf_sock_ops_cb_flags_set(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
+ break;
+ default:
+ rv = -1;
+ }
+ skops->reply = rv;
+ return 1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
new file mode 100644
index 000000000..a275c2971
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcpbpf_user.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <sys/types.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_rlimit.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+
+#include "test_tcpbpf.h"
+
+#define EXPECT_EQ(expected, actual, fmt) \
+ do { \
+ if ((expected) != (actual)) { \
+ printf(" Value of: " #actual "\n" \
+ " Actual: %" fmt "\n" \
+ " Expected: %" fmt "\n", \
+ (actual), (expected)); \
+ goto err; \
+ } \
+ } while (0)
+
+int verify_result(const struct tcpbpf_globals *result)
+{
+ __u32 expected_events;
+
+ expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
+ (1 << BPF_SOCK_OPS_RWND_INIT) |
+ (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
+ (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
+ (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
+ (1 << BPF_SOCK_OPS_NEEDS_ECN) |
+ (1 << BPF_SOCK_OPS_STATE_CB) |
+ (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
+
+ EXPECT_EQ(expected_events, result->event_map, "#" PRIx32);
+ EXPECT_EQ(501ULL, result->bytes_received, "llu");
+ EXPECT_EQ(1002ULL, result->bytes_acked, "llu");
+ EXPECT_EQ(1, result->data_segs_in, PRIu32);
+ EXPECT_EQ(1, result->data_segs_out, PRIu32);
+ EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
+ EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
+ EXPECT_EQ(1, result->num_listen, PRIu32);
+
+ return 0;
+err:
+ return -1;
+}
+
+static int bpf_find_map(const char *test, struct bpf_object *obj,
+ const char *name)
+{
+ struct bpf_map *map;
+
+ map = bpf_object__find_map_by_name(obj, name);
+ if (!map) {
+ printf("%s:FAIL:map '%s' not found\n", test, name);
+ return -1;
+ }
+ return bpf_map__fd(map);
+}
+
+int main(int argc, char **argv)
+{
+ const char *file = "test_tcpbpf_kern.o";
+ struct tcpbpf_globals g = {0};
+ const char *cg_path = "/foo";
+ int error = EXIT_FAILURE;
+ struct bpf_object *obj;
+ int prog_fd, map_fd;
+ int cg_fd = -1;
+ __u32 key = 0;
+ int rv;
+
+ if (setup_cgroup_environment())
+ goto err;
+
+ cg_fd = create_and_get_cgroup(cg_path);
+ if (!cg_fd)
+ goto err;
+
+ if (join_cgroup(cg_path))
+ goto err;
+
+ if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
+ printf("FAILED: load_bpf_file failed for: %s\n", file);
+ goto err;
+ }
+
+ rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ if (rv) {
+ printf("FAILED: bpf_prog_attach: %d (%s)\n",
+ error, strerror(errno));
+ goto err;
+ }
+
+ if (system("./tcp_server.py")) {
+ printf("FAILED: TCP server\n");
+ goto err;
+ }
+
+ map_fd = bpf_find_map(__func__, obj, "global_map");
+ if (map_fd < 0)
+ goto err;
+
+ rv = bpf_map_lookup_elem(map_fd, &key, &g);
+ if (rv != 0) {
+ printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
+ goto err;
+ }
+
+ if (verify_result(&g)) {
+ printf("FAILED: Wrong stats\n");
+ goto err;
+ }
+
+ printf("PASSED!\n");
+ error = 0;
+err:
+ bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
+ close(cg_fd);
+ cleanup_cgroup_environment();
+ return error;
+}
diff --git a/tools/testing/selftests/bpf/test_tracepoint.c b/tools/testing/selftests/bpf/test_tracepoint.c
new file mode 100644
index 000000000..04bf08451
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tracepoint.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+struct sched_switch_args {
+ unsigned long long pad;
+ char prev_comm[16];
+ int prev_pid;
+ int prev_prio;
+ long long prev_state;
+ char next_comm[16];
+ int next_pid;
+ int next_prio;
+};
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct sched_switch_args *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
+__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
new file mode 100755
index 000000000..546aee3e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -0,0 +1,731 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# End-to-end eBPF tunnel test suite
+# The script tests BPF network tunnel implementation.
+#
+# Topology:
+# ---------
+# root namespace | at_ns0 namespace
+# |
+# ----------- | -----------
+# | tnl dev | | | tnl dev | (overlay network)
+# ----------- | -----------
+# metadata-mode | native-mode
+# with bpf |
+# |
+# ---------- | ----------
+# | veth1 | --------- | veth0 | (underlay network)
+# ---------- peer ----------
+#
+#
+# Device Configuration
+# --------------------
+# Root namespace with metadata-mode tunnel + BPF
+# Device names and addresses:
+# veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
+# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay)
+#
+# Namespace at_ns0 with native tunnel
+# Device names and addresses:
+# veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
+# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay)
+#
+#
+# End-to-end ping packet flow
+# ---------------------------
+# Most of the tests start by namespace creation, device configuration,
+# then ping the underlay and overlay network. When doing 'ping 10.1.1.100'
+# from root namespace, the following operations happen:
+# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
+# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
+# with remote_ip=172.16.1.200 and others.
+# 3) Outer tunnel header is prepended and route the packet to veth1's egress
+# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
+# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
+# 6) Forward the packet to the overlay tnl dev
+
+PING_ARG="-c 3 -w 10 -q"
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+config_device()
+{
+ ip netns add at_ns0
+ ip link add veth0 type veth peer name veth1
+ ip link set veth0 netns at_ns0
+ ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip link set dev veth1 up mtu 1500
+ ip addr add dev veth1 172.16.1.200/24
+}
+
+add_gre_tunnel()
+{
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+ # root namespace
+ ip link add dev $DEV type $TYPE key 2 external
+ ip link set dev $DEV up
+ ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6gretap_tunnel()
+{
+
+ # assign ipv6 address
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
+ local ::11 remote ::22
+
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip addr add dev $DEV fc80::200/24
+ ip link set dev $DEV up
+}
+
+add_erspan_tunnel()
+{
+ # at_ns0 namespace
+ if [ "$1" == "v1" ]; then
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 1 erspan 123
+ else
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 2 erspan_dir egress erspan_hwid 3
+ fi
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip link set dev $DEV up
+ ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6erspan_tunnel()
+{
+
+ # assign ipv6 address
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ if [ "$1" == "v1" ]; then
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local ::11 remote ::22 \
+ erspan_ver 1 erspan 123
+ else
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE seq key 2 \
+ local ::11 remote ::22 \
+ erspan_ver 2 erspan_dir egress erspan_hwid 7
+ fi
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
+add_vxlan_tunnel()
+{
+ # Set static ARP entry here because iptables set-mark works
+ # on L3 packet, as a result not applying to ARP packets,
+ # causing errors at get_tunnel_{key/opt}.
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE \
+ id 2 dstport 4789 gbp remote 172.16.1.200
+ ip netns exec at_ns0 \
+ ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
+ ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external gbp dstport 4789
+ ip link set dev $DEV address 52:54:00:d9:02:00 up
+ ip addr add dev $DEV 10.1.1.200/24
+ arp -s 10.1.1.100 52:54:00:d9:01:00
+}
+
+add_ip6vxlan_tunnel()
+{
+ #ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
+ ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ #ip -4 addr del 172.16.1.200 dev veth1
+ ip -6 addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
+ local ::11 remote ::22
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external dstport 4789
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
+add_geneve_tunnel()
+{
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE \
+ id 2 dstport 6081 remote 172.16.1.200
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+ # root namespace
+ ip link add dev $DEV type $TYPE dstport 6081 external
+ ip link set dev $DEV up
+ ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ip6geneve_tunnel()
+{
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE id 22 \
+ remote ::22 # geneve has no local option
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
+add_ipip_tunnel()
+{
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE \
+ local 172.16.1.100 remote 172.16.1.200
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip link set dev $DEV up
+ ip addr add dev $DEV 10.1.1.200/24
+}
+
+add_ipip6tnl_tunnel()
+{
+ ip netns exec at_ns0 ip addr add ::11/96 dev veth0
+ ip netns exec at_ns0 ip link set dev veth0 up
+ ip addr add dev veth1 ::22/96
+ ip link set dev veth1 up
+
+ # at_ns0 namespace
+ ip netns exec at_ns0 \
+ ip link add dev $DEV_NS type $TYPE \
+ local ::11 remote ::22
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip link set dev $DEV_NS up
+
+ # root namespace
+ ip link add dev $DEV type $TYPE external
+ ip addr add dev $DEV 10.1.1.200/24
+ ip link set dev $DEV up
+}
+
+test_gre()
+{
+ TYPE=gretap
+ DEV_NS=gretap00
+ DEV=gretap11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_gre_tunnel
+ attach_bpf $DEV gre_set_tunnel gre_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gre()
+{
+ TYPE=ip6gre
+ DEV_NS=ip6gre00
+ DEV=ip6gre11
+ ret=0
+
+ check $TYPE
+ config_device
+ # reuse the ip6gretap function
+ add_ip6gretap_tunnel
+ attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+ # underlay
+ ping6 $PING_ARG ::11
+ # overlay: ipv4 over ipv6
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ # overlay: ipv6 over ipv6
+ ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6gretap()
+{
+ TYPE=ip6gretap
+ DEV_NS=ip6gretap00
+ DEV=ip6gretap11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ip6gretap_tunnel
+ attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
+ # underlay
+ ping6 $PING_ARG ::11
+ # overlay: ipv4 over ipv6
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ # overlay: ipv6 over ipv6
+ ip netns exec at_ns0 ping6 $PING_ARG fc80::200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_erspan()
+{
+ TYPE=erspan
+ DEV_NS=erspan00
+ DEV=erspan11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_erspan_tunnel $1
+ attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6erspan()
+{
+ TYPE=ip6erspan
+ DEV_NS=ip6erspan00
+ DEV=ip6erspan11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ip6erspan_tunnel $1
+ attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
+ ping6 $PING_ARG ::11
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_vxlan()
+{
+ TYPE=vxlan
+ DEV_NS=vxlan00
+ DEV=vxlan11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_vxlan_tunnel
+ attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6vxlan()
+{
+ TYPE=vxlan
+ DEV_NS=ip6vxlan00
+ DEV=ip6vxlan11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ip6vxlan_tunnel
+ ip link set dev veth1 mtu 1500
+ attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
+ # underlay
+ ping6 $PING_ARG ::11
+ # ip4 over ip6
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_geneve()
+{
+ TYPE=geneve
+ DEV_NS=geneve00
+ DEV=geneve11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_geneve_tunnel
+ attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ip6geneve()
+{
+ TYPE=geneve
+ DEV_NS=ip6geneve00
+ DEV=ip6geneve11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ip6geneve_tunnel
+ attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
+}
+
+test_ipip()
+{
+ TYPE=ipip
+ DEV_NS=ipip00
+ DEV=ipip11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ipip_tunnel
+ ip link set dev veth1 mtu 1500
+ attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+test_ipip6()
+{
+ TYPE=ip6tnl
+ DEV_NS=ipip6tnl00
+ DEV=ipip6tnl11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_ipip6tnl_tunnel
+ ip link set dev veth1 mtu 1500
+ attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
+ # underlay
+ ping6 $PING_ARG ::11
+ # ip4 over ip6
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
+setup_xfrm_tunnel()
+{
+ auth=0x$(printf '1%.0s' {1..40})
+ enc=0x$(printf '2%.0s' {1..32})
+ spi_in_to_out=0x1
+ spi_out_to_in=0x2
+ # at_ns0 namespace
+ # at_ns0 -> root
+ ip netns exec at_ns0 \
+ ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+ spi $spi_in_to_out reqid 1 mode tunnel \
+ auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+ ip netns exec at_ns0 \
+ ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
+ tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+ mode tunnel
+ # root -> at_ns0
+ ip netns exec at_ns0 \
+ ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+ spi $spi_out_to_in reqid 2 mode tunnel \
+ auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+ ip netns exec at_ns0 \
+ ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
+ tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+ mode tunnel
+ # address & route
+ ip netns exec at_ns0 \
+ ip addr add dev veth0 10.1.1.100/32
+ ip netns exec at_ns0 \
+ ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
+ src 10.1.1.100
+
+ # root namespace
+ # at_ns0 -> root
+ ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
+ spi $spi_in_to_out reqid 1 mode tunnel \
+ auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+ ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
+ tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
+ mode tunnel
+ # root -> at_ns0
+ ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
+ spi $spi_out_to_in reqid 2 mode tunnel \
+ auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
+ ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
+ tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
+ mode tunnel
+ # address & route
+ ip addr add dev veth1 10.1.1.200/32
+ ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
+}
+
+test_xfrm_tunnel()
+{
+ config_device
+ > /sys/kernel/debug/tracing/trace
+ setup_xfrm_tunnel
+ tc qdisc add dev veth1 clsact
+ tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
+ sec xfrm_get_state
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ sleep 1
+ grep "reqid 1" /sys/kernel/debug/tracing/trace
+ check_err $?
+ grep "spi 0x1" /sys/kernel/debug/tracing/trace
+ check_err $?
+ grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: xfrm tunnel"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
+}
+
+attach_bpf()
+{
+ DEV=$1
+ SET=$2
+ GET=$3
+ tc qdisc add dev $DEV clsact
+ tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET
+ tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET
+}
+
+cleanup()
+{
+ ip netns delete at_ns0 2> /dev/null
+ ip link del veth1 2> /dev/null
+ ip link del ipip11 2> /dev/null
+ ip link del ipip6tnl11 2> /dev/null
+ ip link del gretap11 2> /dev/null
+ ip link del ip6gre11 2> /dev/null
+ ip link del ip6gretap11 2> /dev/null
+ ip link del vxlan11 2> /dev/null
+ ip link del ip6vxlan11 2> /dev/null
+ ip link del geneve11 2> /dev/null
+ ip link del ip6geneve11 2> /dev/null
+ ip link del erspan11 2> /dev/null
+ ip link del ip6erspan11 2> /dev/null
+ ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
+ ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
+ ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
+ ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
+}
+
+cleanup_exit()
+{
+ echo "CATCH SIGKILL or SIGINT, cleanup and exit"
+ cleanup
+ exit 0
+}
+
+check()
+{
+ ip link help 2>&1 | grep -q "\s$1\s"
+ if [ $? -ne 0 ];then
+ echo "SKIP $1: iproute2 not support"
+ cleanup
+ return 1
+ fi
+}
+
+enable_debug()
+{
+ echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+ echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
+ echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
+ echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
+ echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
+}
+
+check_err()
+{
+ if [ $ret -eq 0 ]; then
+ ret=$1
+ fi
+}
+
+bpf_tunnel_test()
+{
+ echo "Testing GRE tunnel..."
+ test_gre
+ echo "Testing IP6GRE tunnel..."
+ test_ip6gre
+ echo "Testing IP6GRETAP tunnel..."
+ test_ip6gretap
+ echo "Testing ERSPAN tunnel..."
+ test_erspan v2
+ echo "Testing IP6ERSPAN tunnel..."
+ test_ip6erspan v2
+ echo "Testing VXLAN tunnel..."
+ test_vxlan
+ echo "Testing IP6VXLAN tunnel..."
+ test_ip6vxlan
+ echo "Testing GENEVE tunnel..."
+ test_geneve
+ echo "Testing IP6GENEVE tunnel..."
+ test_ip6geneve
+ echo "Testing IPIP tunnel..."
+ test_ipip
+ echo "Testing IPIP6 tunnel..."
+ test_ipip6
+ echo "Testing IPSec tunnel..."
+ test_xfrm_tunnel
+}
+
+trap cleanup 0 3 6
+trap cleanup_exit 2 9
+
+cleanup
+bpf_tunnel_test
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_tunnel_kern.c b/tools/testing/selftests/bpf/test_tunnel_kern.c
new file mode 100644
index 000000000..504df69c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tunnel_kern.c
@@ -0,0 +1,713 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2016 VMware
+ * Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/pkt_cls.h>
+#include <linux/erspan.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+
+#define ERROR(ret) do {\
+ char fmt[] = "ERROR line:%d ret:%d\n";\
+ bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
+ } while (0)
+
+int _version SEC("version") = 1;
+
+struct geneve_opt {
+ __be16 opt_class;
+ __u8 type;
+ __u8 length:5;
+ __u8 r3:1;
+ __u8 r2:1;
+ __u8 r1:1;
+ __u8 opt_data[8]; /* hard-coded to 8 byte */
+};
+
+struct vxlan_metadata {
+ __u32 gbp;
+};
+
+SEC("gre_set_tunnel")
+int _gre_set_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("gre_get_tunnel")
+int _gre_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ char fmt[] = "key %d remote ip 0x%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
+ return TC_ACT_OK;
+}
+
+SEC("ip6gretap_set_tunnel")
+int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+ key.tunnel_label = 0xabcde;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
+ BPF_F_SEQ_NUMBER);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6gretap_get_tunnel")
+int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip6 ::%x label %x\n";
+ struct bpf_tunnel_key key;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+ return TC_ACT_OK;
+}
+
+SEC("erspan_set_tunnel")
+int _erspan_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&md, 0, sizeof(md));
+#ifdef ERSPAN_V1
+ md.version = 1;
+ md.u.index = bpf_htonl(123);
+#else
+ __u8 direction = 1;
+ __u8 hwid = 7;
+
+ md.version = 2;
+ md.u.md2.dir = direction;
+ md.u.md2.hwid = hwid & 0xf;
+ md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("erspan_get_tunnel")
+int _erspan_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ __u32 index;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+ char fmt2[] = "\tindex %x\n";
+
+ index = bpf_ntohl(md.u.index);
+ bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+ char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+ bpf_trace_printk(fmt2, sizeof(fmt2),
+ md.u.md2.dir,
+ (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+ bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+ return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_set_tunnel")
+int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11);
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&md, 0, sizeof(md));
+
+#ifdef ERSPAN_V1
+ md.u.index = bpf_htonl(123);
+ md.version = 1;
+#else
+ __u8 direction = 0;
+ __u8 hwid = 17;
+
+ md.version = 2;
+ md.u.md2.dir = direction;
+ md.u.md2.hwid = hwid & 0xf;
+ md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+#endif
+
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip4ip6erspan_get_tunnel")
+int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
+ struct bpf_tunnel_key key;
+ struct erspan_metadata md;
+ __u32 index;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, md.version);
+
+#ifdef ERSPAN_V1
+ char fmt2[] = "\tindex %x\n";
+
+ index = bpf_ntohl(md.u.index);
+ bpf_trace_printk(fmt2, sizeof(fmt2), index);
+#else
+ char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
+
+ bpf_trace_printk(fmt2, sizeof(fmt2),
+ md.u.md2.dir,
+ (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+ bpf_ntohl(md.u.md2.timestamp));
+#endif
+
+ return TC_ACT_OK;
+}
+
+SEC("vxlan_set_tunnel")
+int _vxlan_set_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ struct vxlan_metadata md;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("vxlan_get_tunnel")
+int _vxlan_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ struct vxlan_metadata md;
+ char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, md.gbp);
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_set_tunnel")
+int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_id = 22;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6vxlan_get_tunnel")
+int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip6 ::%x label %x\n";
+ struct bpf_tunnel_key key;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+
+ return TC_ACT_OK;
+}
+
+SEC("geneve_set_tunnel")
+int _geneve_set_tunnel(struct __sk_buff *skb)
+{
+ int ret, ret2;
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ __builtin_memset(&gopt, 0x0, sizeof(gopt));
+ gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+ gopt.type = 0x08;
+ gopt.r1 = 0;
+ gopt.r2 = 0;
+ gopt.r3 = 0;
+ gopt.length = 2; /* 4-byte multiple */
+ *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("geneve_get_tunnel")
+int _geneve_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+ char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+ return TC_ACT_OK;
+}
+
+SEC("ip6geneve_set_tunnel")
+int _ip6geneve_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+ int ret;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_id = 22;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&gopt, 0x0, sizeof(gopt));
+ gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+ gopt.type = 0x08;
+ gopt.r1 = 0;
+ gopt.r2 = 0;
+ gopt.r3 = 0;
+ gopt.length = 2; /* 4-byte multiple */
+ *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
+
+ ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6geneve_get_tunnel")
+int _ip6geneve_get_tunnel(struct __sk_buff *skb)
+{
+ char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
+ struct bpf_tunnel_key key;
+ struct geneve_opt gopt;
+ int ret;
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+
+ return TC_ACT_OK;
+}
+
+SEC("ipip_set_tunnel")
+int _ipip_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph = data;
+ struct tcphdr *tcp = data + sizeof(*iph);
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ /* single length check */
+ if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+ ERROR(1);
+ return TC_ACT_SHOT;
+ }
+
+ key.tunnel_ttl = 64;
+ if (iph->protocol == IPPROTO_ICMP) {
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ } else {
+ if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ if (tcp->dest == bpf_htons(5200))
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ else if (tcp->dest == bpf_htons(5201))
+ key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
+ else
+ return TC_ACT_SHOT;
+ }
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ipip_get_tunnel")
+int _ipip_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ char fmt[] = "remote ip 0x%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
+ return TC_ACT_OK;
+}
+
+SEC("ipip6_set_tunnel")
+int _ipip6_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph = data;
+ struct tcphdr *tcp = data + sizeof(*iph);
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ /* single length check */
+ if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+ ERROR(1);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ipip6_get_tunnel")
+int _ipip6_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ char fmt[] = "remote ip6 %x::%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+ bpf_htonl(key.remote_ipv6[3]));
+ return TC_ACT_OK;
+}
+
+SEC("ip6ip6_set_tunnel")
+int _ip6ip6_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ void *data = (void *)(long)skb->data;
+ struct ipv6hdr *iph = data;
+ struct tcphdr *tcp = data + sizeof(*iph);
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ /* single length check */
+ if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
+ ERROR(1);
+ return TC_ACT_SHOT;
+ }
+
+ key.remote_ipv6[0] = bpf_htonl(0x2401db00);
+ key.tunnel_ttl = 64;
+
+ if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
+ key.remote_ipv6[3] = bpf_htonl(1);
+ } else {
+ if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
+ ERROR(iph->nexthdr);
+ return TC_ACT_SHOT;
+ }
+
+ if (tcp->dest == bpf_htons(5200)) {
+ key.remote_ipv6[3] = bpf_htonl(1);
+ } else if (tcp->dest == bpf_htons(5201)) {
+ key.remote_ipv6[3] = bpf_htonl(2);
+ } else {
+ ERROR(tcp->dest);
+ return TC_ACT_SHOT;
+ }
+ }
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("ip6ip6_get_tunnel")
+int _ip6ip6_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ char fmt[] = "remote ip6 %x::%x\n";
+
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ ERROR(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
+ bpf_htonl(key.remote_ipv6[3]));
+ return TC_ACT_OK;
+}
+
+SEC("xfrm_get_state")
+int _xfrm_get_state(struct __sk_buff *skb)
+{
+ struct bpf_xfrm_state x;
+ char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
+ int ret;
+
+ ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
+ if (ret < 0)
+ return TC_ACT_OK;
+
+ bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
+ bpf_ntohl(x.remote_ipv4));
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
new file mode 100644
index 000000000..858e55143
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -0,0 +1,13243 @@
+/*
+ * Testsuite for eBPF verifier
+ *
+ * Copyright (c) 2014 PLUMgrid, http://plumgrid.com
+ * Copyright (c) 2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <endian.h>
+#include <asm/types.h>
+#include <linux/types.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <limits.h>
+
+#include <sys/capability.h>
+
+#include <linux/unistd.h>
+#include <linux/filter.h>
+#include <linux/bpf_perf_event.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+
+#include <bpf/bpf.h>
+
+#ifdef HAVE_GENHDR
+# include "autoconf.h"
+#else
+# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__)
+# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
+# endif
+#endif
+#include "bpf_rlimit.h"
+#include "bpf_rand.h"
+#include "bpf_util.h"
+#include "../../../include/linux/filter.h"
+
+#define MAX_INSNS BPF_MAXINSNS
+#define MAX_FIXUPS 8
+#define MAX_NR_MAPS 8
+#define POINTER_VALUE 0xcafe4all
+#define TEST_DATA_LEN 64
+
+#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0)
+#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1)
+
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+static bool unpriv_disabled = false;
+
+struct bpf_test {
+ const char *descr;
+ struct bpf_insn insns[MAX_INSNS];
+ int fixup_map1[MAX_FIXUPS];
+ int fixup_map2[MAX_FIXUPS];
+ int fixup_map3[MAX_FIXUPS];
+ int fixup_map4[MAX_FIXUPS];
+ int fixup_prog1[MAX_FIXUPS];
+ int fixup_prog2[MAX_FIXUPS];
+ int fixup_map_in_map[MAX_FIXUPS];
+ int fixup_cgroup_storage[MAX_FIXUPS];
+ const char *errstr;
+ const char *errstr_unpriv;
+ uint32_t retval, retval_unpriv;
+ enum {
+ UNDEF,
+ ACCEPT,
+ REJECT
+ } result, result_unpriv;
+ enum bpf_prog_type prog_type;
+ uint8_t flags;
+ __u8 data[TEST_DATA_LEN];
+ void (*fill_helper)(struct bpf_test *self);
+};
+
+/* Note we want this to be 64 bit aligned so that the end of our array is
+ * actually the end of the structure.
+ */
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct other_val {
+ long long foo;
+ long long bar;
+};
+
+static void bpf_fill_ld_abs_vlan_push_pop(struct bpf_test *self)
+{
+ /* test: {skb->data[0], vlan_push} x 68 + {skb->data[0], vlan_pop} x 68 */
+#define PUSH_CNT 51
+ unsigned int len = BPF_MAXINSNS;
+ struct bpf_insn *insn = self->insns;
+ int i = 0, j, k = 0;
+
+ insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+loop:
+ for (j = 0; j < PUSH_CNT; j++) {
+ insn[i++] = BPF_LD_ABS(BPF_B, 0);
+ insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+ i++;
+ insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+ insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1);
+ insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2);
+ insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_vlan_push),
+ insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+ i++;
+ }
+
+ for (j = 0; j < PUSH_CNT; j++) {
+ insn[i++] = BPF_LD_ABS(BPF_B, 0);
+ insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x34, len - i - 2);
+ i++;
+ insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
+ insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_vlan_pop),
+ insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 2);
+ i++;
+ }
+ if (++k < 5)
+ goto loop;
+
+ for (; i < len - 1; i++)
+ insn[i] = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, 0xbef);
+ insn[len - 1] = BPF_EXIT_INSN();
+}
+
+static void bpf_fill_jump_around_ld_abs(struct bpf_test *self)
+{
+ struct bpf_insn *insn = self->insns;
+ unsigned int len = BPF_MAXINSNS;
+ int i = 0;
+
+ insn[i++] = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
+ insn[i++] = BPF_LD_ABS(BPF_B, 0);
+ insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 10, len - i - 2);
+ i++;
+ while (i < len - 1)
+ insn[i++] = BPF_LD_ABS(BPF_B, 1);
+ insn[i] = BPF_EXIT_INSN();
+}
+
+static void bpf_fill_rand_ld_dw(struct bpf_test *self)
+{
+ struct bpf_insn *insn = self->insns;
+ uint64_t res = 0;
+ int i = 0;
+
+ insn[i++] = BPF_MOV32_IMM(BPF_REG_0, 0);
+ while (i < self->retval) {
+ uint64_t val = bpf_semi_rand_get();
+ struct bpf_insn tmp[2] = { BPF_LD_IMM64(BPF_REG_1, val) };
+
+ res ^= val;
+ insn[i++] = tmp[0];
+ insn[i++] = tmp[1];
+ insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
+ }
+ insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+ insn[i++] = BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32);
+ insn[i++] = BPF_ALU64_REG(BPF_XOR, BPF_REG_0, BPF_REG_1);
+ insn[i] = BPF_EXIT_INSN();
+ res ^= (res >> 32);
+ self->retval = (uint32_t)res;
+}
+
+static struct bpf_test tests[] = {
+ {
+ "add+sub+mul",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_2, 3),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 3),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = -3,
+ },
+ {
+ "DIV32 by 0, zero check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "DIV32 by 0, zero check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "DIV64 by 0, zero check",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "MOD32 by 0, zero check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "MOD32 by 0, zero check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "MOD64 by 0, zero check",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, 1),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "DIV32 by 0, zero check ok, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 2),
+ BPF_MOV32_IMM(BPF_REG_2, 16),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 8,
+ },
+ {
+ "DIV32 by 0, zero check 1, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV32 by 0, zero check 2, cls",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV64 by 0, zero check, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "MOD32 by 0, zero check ok, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_MOV32_IMM(BPF_REG_1, 3),
+ BPF_MOV32_IMM(BPF_REG_2, 5),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "MOD32 by 0, zero check 1, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "MOD32 by 0, zero check 2, cls",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "MOD64 by 0, zero check 1, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 2),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "MOD64 by 0, zero check 2, cls",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_0, -1),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = -1,
+ },
+ /* Just make sure that JITs used udiv/umod as otherwise we get
+ * an exception from INT_MIN/-1 overflow similarly as with div
+ * by zero.
+ */
+ {
+ "DIV32 overflow, check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, -1),
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV32 overflow, check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV64 overflow, check 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, -1),
+ BPF_LD_IMM64(BPF_REG_0, LLONG_MIN),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "DIV64 overflow, check 2",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_0, LLONG_MIN),
+ BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "MOD32 overflow, check 1",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_1, -1),
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = INT_MIN,
+ },
+ {
+ "MOD32 overflow, check 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
+ BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = INT_MIN,
+ },
+ {
+ "MOD64 overflow, check 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, -1),
+ BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "MOD64 overflow, check 2",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "xor32 zero extend check",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_2, -1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
+ BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 0xffff),
+ BPF_ALU32_REG(BPF_XOR, BPF_REG_2, BPF_REG_2),
+ BPF_MOV32_IMM(BPF_REG_0, 2),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "empty prog",
+ .insns = {
+ },
+ .errstr = "unknown opcode 00",
+ .result = REJECT,
+ },
+ {
+ "only exit insn",
+ .insns = {
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "unreachable",
+ .insns = {
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unreachable",
+ .result = REJECT,
+ },
+ {
+ "unreachable2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unreachable",
+ .result = REJECT,
+ },
+ {
+ "out of range jump",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "out of range jump2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, -2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "test1 ld_imm64",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_LD_IMM insn",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "test2 ld_imm64",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid BPF_LD_IMM insn",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "test3 ld_imm64",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test4 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test5 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test6 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "test7 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+ BPF_RAW_INSN(0, 0, 0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "test8 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 1, 1),
+ BPF_RAW_INSN(0, 0, 0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "test9 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+ BPF_RAW_INSN(0, 0, 0, 1, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test10 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+ BPF_RAW_INSN(0, BPF_REG_1, 0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test11 ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 1),
+ BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "test12 ld_imm64",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1),
+ BPF_RAW_INSN(0, 0, 0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "not pointing to valid bpf_map",
+ .result = REJECT,
+ },
+ {
+ "test13 ld_imm64",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, BPF_REG_1, 0, 1),
+ BPF_RAW_INSN(0, 0, BPF_REG_1, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_ld_imm64 insn",
+ .result = REJECT,
+ },
+ {
+ "arsh32 on imm",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "unknown opcode c4",
+ },
+ {
+ "arsh32 on reg",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 5),
+ BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "unknown opcode cc",
+ },
+ {
+ "arsh64 on imm",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_ALU64_IMM(BPF_ARSH, BPF_REG_0, 5),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "arsh64 on reg",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 5),
+ BPF_ALU64_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "no bpf_exit",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_0, BPF_REG_2),
+ },
+ .errstr = "not an exit",
+ .result = REJECT,
+ },
+ {
+ "loop (back-edge)",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "loop2 (back-edge)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "conditional loop",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "read uninitialized register",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R2 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "read invalid register",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R15 is invalid",
+ .result = REJECT,
+ },
+ {
+ "program doesn't init R0 before exit",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "program doesn't init R0 before exit in all branches",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "stack out of bounds",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack",
+ .result = REJECT,
+ },
+ {
+ "invalid call insn1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL | BPF_X, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unknown opcode 8d",
+ .result = REJECT,
+ },
+ {
+ "invalid call insn2",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_CALL uses reserved",
+ .result = REJECT,
+ },
+ {
+ "invalid function call",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, 1234567),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid func unknown#1234567",
+ .result = REJECT,
+ },
+ {
+ "uninitialized stack1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 2 },
+ .errstr = "invalid indirect read from stack",
+ .result = REJECT,
+ },
+ {
+ "uninitialized stack2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid read from stack",
+ .result = REJECT,
+ },
+ {
+ "invalid fp arithmetic",
+ /* If this gets ever changed, make sure JITs can deal with it. */
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 subtraction from stack pointer",
+ .result = REJECT,
+ },
+ {
+ "non-invalid fp arithmetic",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "invalid argument register",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_cgroup_classid),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_cgroup_classid),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 !read_ok",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "non-invalid argument register",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_cgroup_classid),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_cgroup_classid),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "check valid spill/fill",
+ .insns = {
+ /* spill R1(ctx) into stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ /* fill it back into R2 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
+ /* should be able to access R0 = *(R2 + 8) */
+ /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .retval = POINTER_VALUE,
+ },
+ {
+ "check valid spill/fill, skb mark",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = ACCEPT,
+ },
+ {
+ "check corrupted spill/fill",
+ .insns = {
+ /* spill R1(ctx) into stack */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ /* mess up with R1 pointer on stack */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23),
+ /* fill back into R0 is fine for priv.
+ * R0 now becomes SCALAR_VALUE.
+ */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ /* Load from R0 should fail. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .errstr = "R0 invalid mem access 'inv",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "check corrupted spill/fill, LSB",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_H, BPF_REG_10, -8, 0xcafe),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = POINTER_VALUE,
+ },
+ {
+ "check corrupted spill/fill, MSB",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x12345678),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = POINTER_VALUE,
+ },
+ {
+ "invalid src register in STX",
+ .insns = {
+ BPF_STX_MEM(BPF_B, BPF_REG_10, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R15 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid dst register in STX",
+ .insns = {
+ BPF_STX_MEM(BPF_B, 14, BPF_REG_10, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R14 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid dst register in ST",
+ .insns = {
+ BPF_ST_MEM(BPF_B, 14, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R14 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid src register in LDX",
+ .insns = {
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, 12, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R12 is invalid",
+ .result = REJECT,
+ },
+ {
+ "invalid dst register in LDX",
+ .insns = {
+ BPF_LDX_MEM(BPF_B, 11, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R11 is invalid",
+ .result = REJECT,
+ },
+ {
+ "junk insn",
+ .insns = {
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unknown opcode 00",
+ .result = REJECT,
+ },
+ {
+ "junk insn2",
+ .insns = {
+ BPF_RAW_INSN(1, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_LDX uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "junk insn3",
+ .insns = {
+ BPF_RAW_INSN(-1, 0, 0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unknown opcode ff",
+ .result = REJECT,
+ },
+ {
+ "junk insn4",
+ .insns = {
+ BPF_RAW_INSN(-1, -1, -1, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unknown opcode ff",
+ .result = REJECT,
+ },
+ {
+ "junk insn5",
+ .insns = {
+ BPF_RAW_INSN(0x7f, -1, -1, -1, -1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_ALU uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "misaligned read from stack",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned stack access",
+ .result = REJECT,
+ },
+ {
+ "invalid map_fd for function call",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_delete_elem),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "fd 0 is not pointing to valid bpf_map",
+ .result = REJECT,
+ },
+ {
+ "don't check return value before access",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 invalid mem access 'map_value_or_null'",
+ .result = REJECT,
+ },
+ {
+ "access memory with incorrect alignment",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "misaligned value access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "sometimes access memory with incorrect alignment",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 invalid mem access",
+ .errstr_unpriv = "R0 leaks addr",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "jump test 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "jump test 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 14),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 11),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 5),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "jump test 3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -8, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 19),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -16, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 15),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 2, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -32, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 11),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -40, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -40),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 7),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -48, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -48),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 5, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, -56, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -56),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_delete_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 24 },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = -ENOENT,
+ },
+ {
+ "jump test 4",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 3),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "jump test 5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_3, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "access skb fields ok",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, queue_mapping)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, protocol)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_present)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, vlan_tci)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, napi_id)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "access skb fields bad1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad3",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -12),
+ },
+ .fixup_map1 = { 6 },
+ .errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "access skb fields bad4",
+ .insns = {
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 3),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -13),
+ },
+ .fixup_map1 = { 7 },
+ .errstr = "different pointers",
+ .errstr_unpriv = "R1 pointer comparison",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff family",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, family)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff remote_ip4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff local_ip4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff remote_ip6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff local_ip6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff remote_port",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_port)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "invalid access __sk_buff remote_port",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_port)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "valid access __sk_buff family",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, family)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access __sk_buff remote_ip4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access __sk_buff local_ip4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access __sk_buff remote_ip6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_ip6[3])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access __sk_buff local_ip6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_ip6[3])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access __sk_buff remote_port",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, remote_port)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access __sk_buff remote_port",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, local_port)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "invalid access of tc_classid for SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "invalid access of skb->mark for SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "check skb->mark is not writeable by SK_SKB",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "check skb->tc_index is writeable by SK_SKB",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "check skb->priority is writeable by SK_SKB",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, priority)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "direct packet read for SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "direct packet write for SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "overlapping checks for direct packet access SK_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access family in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, family)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access remote_ip4 in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access local_ip4 in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip4)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access remote_port in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_port)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access local_port in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_port)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "valid access remote_ip6 in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, remote_ip6[3])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "valid access local_ip6 in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip6[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip6[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip6[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_ip6[3])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ },
+ {
+ "invalid 64B read of family in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, family)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid read past end of SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, local_port) + 4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "invalid read offset in SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, family) + 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet read for SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, data)),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+ offsetof(struct sk_msg_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "direct packet write for SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, data)),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+ offsetof(struct sk_msg_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "overlapping checks for direct packet access SK_MSG",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
+ offsetof(struct sk_msg_md, data)),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
+ offsetof(struct sk_msg_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_MSG,
+ },
+ {
+ "check skb->mark is not writeable by sockets",
+ .insns = {
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "check skb->tc_index is not writeable by sockets",
+ .insns = {
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "R1 leaks addr",
+ .result = REJECT,
+ },
+ {
+ "check cb access: byte",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0]) + 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0]) + 2),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0]) + 3),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1]) + 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1]) + 2),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1]) + 3),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2]) + 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2]) + 2),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2]) + 3),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3]) + 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3]) + 2),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3]) + 3),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 2),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0]) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0]) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0]) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1]) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1]) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1]) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2]) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2]) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2]) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3]) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3]) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3]) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4]) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4]) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4]) + 3),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "__sk_buff->hash, offset 0, byte store not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, hash)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "__sk_buff->tc_index, offset 3, byte store not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tc_index) + 3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check skb->hash byte load permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash)),
+#else
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 3),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check skb->hash byte load permitted 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check skb->hash byte load permitted 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check skb->hash byte load permitted 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 3),
+#else
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash)),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check cb access: byte, wrong type",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "check cb access: half",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0]) + 2),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1]) + 2),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2]) + 2),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3]) + 2),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0]) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1]) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2]) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3]) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4]) + 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check cb access: half, unaligned",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0]) + 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check __sk_buff->hash, offset 0, half store not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, hash)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check __sk_buff->tc_index, offset 2, half store not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tc_index) + 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check skb->hash half load permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash)),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 2),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check skb->hash half load permitted 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 2),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash)),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check skb->hash half load not permitted, unaligned 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 1),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 3),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "check skb->hash half load not permitted, unaligned 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 3),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hash) + 1),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "check cb access: half, wrong type",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "check cb access: word",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check cb access: word, unaligned 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0]) + 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check cb access: word, unaligned 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check cb access: word, unaligned 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check cb access: word, unaligned 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4]) + 3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check cb access: double",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "check cb access: double, unaligned 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[1])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check cb access: double, unaligned 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "misaligned context access",
+ .result = REJECT,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "check cb access: double, oob 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check cb access: double, oob 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check __sk_buff->ifindex dw store not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, ifindex)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check __sk_buff->ifindex dw load not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, ifindex)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "check cb access: double, wrong type",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "check out of range skb->cb access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0]) + 256),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .errstr_unpriv = "",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_ACT,
+ },
+ {
+ "write skb fields from socket prog",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[4])),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[2])),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .errstr_unpriv = "R1 leaks addr",
+ .result_unpriv = REJECT,
+ },
+ {
+ "write skb fields from tc_cls_act prog",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, tc_index)),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[3])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "PTR_TO_STACK store/load",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0xfaceb00c,
+ },
+ {
+ "PTR_TO_STACK store/load - bad alignment on off",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
+ },
+ {
+ "PTR_TO_STACK store/load - bad alignment on reg",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
+ },
+ {
+ "PTR_TO_STACK store/load - out of bounds low",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack off=-79992 size=8",
+ .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
+ },
+ {
+ "PTR_TO_STACK store/load - out of bounds high",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack off=0 size=8",
+ },
+ {
+ "unpriv: return pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr",
+ .retval = POINTER_VALUE,
+ },
+ {
+ "unpriv: add const to pointer",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: add pointer to pointer",
+ .insns = {
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R1 pointer += pointer",
+ },
+ {
+ "unpriv: neg pointer",
+ .insns = {
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer arithmetic",
+ },
+ {
+ "unpriv: cmp pointer with const",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 pointer comparison",
+ },
+ {
+ "unpriv: cmp pointer with pointer",
+ .insns = {
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R10 pointer comparison",
+ },
+ {
+ "unpriv: check that printk is disallowed",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_trace_printk),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "unknown func bpf_trace_printk#6",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: pass pointer to helper function",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_update_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr_unpriv = "R4 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: indirectly pass pointer on stack to helper function",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "invalid indirect read from stack off -8+0 size 8",
+ .result = REJECT,
+ },
+ {
+ "unpriv: mangle pointer on stack 1",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: mangle pointer on stack 2",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "attempt to corrupt spilled",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: read pointer from stack in small chunks",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid size",
+ .result = REJECT,
+ },
+ {
+ "unpriv: write pointer into ctx",
+ .insns = {
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 leaks addr",
+ .result_unpriv = REJECT,
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ },
+ {
+ "unpriv: spill/fill of ctx",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: spill/fill of ctx 2",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_hash_recalc),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of ctx 3",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_hash_recalc),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R1 type=fp expected=ctx",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of ctx 4",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_10,
+ BPF_REG_0, -8, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_hash_recalc),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R1 type=inv expected=ctx",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of different pointers stx",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 42),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
+ offsetof(struct __sk_buff, mark)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "same insn cannot be used with different pointers",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "unpriv: spill/fill of different pointers ldx",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+ -(__s32)offsetof(struct bpf_perf_event_data,
+ sample_period) - 8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data,
+ sample_period)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "same insn cannot be used with different pointers",
+ .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+ },
+ {
+ "unpriv: write pointer into map elem value",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "alu32: mov u32 const",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_7, 0),
+ BPF_ALU32_IMM(BPF_AND, BPF_REG_7, 1),
+ BPF_MOV32_REG(BPF_REG_0, BPF_REG_7),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R7 invalid mem access 'inv'",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "unpriv: partial copy of pointer",
+ .insns = {
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 partial copy",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: pass pointer to tail_call",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .errstr_unpriv = "R3 leaks addr into helper",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: cmp map pointer with zero",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .errstr_unpriv = "R1 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: write into frame pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_10, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "frame pointer is read only",
+ .result = REJECT,
+ },
+ {
+ "unpriv: spill/fill frame pointer",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "frame pointer is read only",
+ .result = REJECT,
+ },
+ {
+ "unpriv: cmp of frame pointer",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_10, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: adding of fp, reg",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: adding of fp, imm",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "unpriv: cmp of stack pointer",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R2 pointer comparison",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "runtime/jit: tail_call within bounds, prog once",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "runtime/jit: tail_call within bounds, prog loop",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .result = ACCEPT,
+ .retval = 41,
+ },
+ {
+ "runtime/jit: tail_call within bounds, no prog",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "runtime/jit: tail_call out of bounds",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 256),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "runtime/jit: pass negative index to tail_call",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, -1),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 1 },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "runtime/jit: pass > 32bit index to tail_call",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 2 },
+ .result = ACCEPT,
+ .retval = 42,
+ /* Verifier rewrite for unpriv skips tail call here. */
+ .retval_unpriv = 2,
+ },
+ {
+ "stack pointer arithmetic",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 4),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1),
+ BPF_ST_MEM(0, BPF_REG_2, 4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_ST_MEM(0, BPF_REG_2, 4, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "raw_stack: no skb_load_bytes",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ /* Call to skb_load_bytes() omitted. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid read from stack off -8+0 size 8",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, negative len",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R4 min value is negative",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, negative len 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, ~0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R4 min value is negative",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, zero len",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack type R3",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, no init",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, init",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xcafe),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, spilled regs around bounds",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
+ offsetof(struct __sk_buff, priority)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, spilled regs corruption",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'inv'",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "raw_stack: skb_load_bytes, spilled regs corruption 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
+ offsetof(struct __sk_buff, priority)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_3,
+ offsetof(struct __sk_buff, pkt_type)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R3 invalid mem access 'inv'",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "raw_stack: skb_load_bytes, spilled regs + data",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
+ offsetof(struct __sk_buff, priority)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, invalid access 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -513),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack type R3 off=-513 access_size=8",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, invalid access 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack type R3 off=-1 access_size=8",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, invalid access 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 0xffffffff),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R4 min value is negative",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, invalid access 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, invalid access 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, invalid access 6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid stack type R3 off=-512 access_size=0",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "raw_stack: skb_load_bytes, large access",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_4, 512),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "context stores via ST",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_ST stores into R1 context is not allowed",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "context stores via XADD",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_1,
+ BPF_REG_0, offsetof(struct __sk_buff, mark), 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "BPF_XADD stores into R1 context is not allowed",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 15),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_3, 12),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 14),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_3, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access off=76",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+ },
+ {
+ "direct packet access: test4 (write)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test5 (pkt_end >= reg, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test6 (pkt_end >= reg, bad access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test7 (pkt_end >= reg, both accesses)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test8 (double test, variant 1)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test9 (double test, variant 2)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test10 (write invalid)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test11 (shift, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 144),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 3),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
+ },
+ {
+ "direct packet access: test12 (and, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 144),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
+ },
+ {
+ "direct packet access: test13 (branches, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 13),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_MOV64_IMM(BPF_REG_4, 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 14),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 24),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
+ },
+ {
+ "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7),
+ BPF_MOV64_IMM(BPF_REG_5, 12),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 4),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
+ },
+ {
+ "direct packet access: test15 (spill with xadd)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 4096),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R2 invalid mem access 'inv'",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet access: test16 (arith on data_end)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 16),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test17 (pruning, alignment)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_JMP_A(-6),
+ },
+ .errstr = "misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ },
+ {
+ "direct packet access: test18 (imm += pkt_ptr, 1)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_IMM(BPF_REG_0, 8),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test19 (imm += pkt_ptr, 2)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ BPF_MOV64_IMM(BPF_REG_4, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+ BPF_STX_MEM(BPF_B, BPF_REG_4, BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test20 (x += pkt_ptr, 1)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0x7fff),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet access: test21 (x += pkt_ptr, 2)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
+ BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0x7fff),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet access: test22 (x += pkt_ptr, 3)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_3, -16),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
+ BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
+ BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 49),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_STX_MEM(BPF_H, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet access: test23 (x += pkt_ptr, 4)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_0, 31),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet access: test24 (x += pkt_ptr, 5)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_0, 64),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7fff - 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "direct packet access: test25 (marking on <, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test26 (marking on <, bad access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "direct packet access: test27 (marking on <=, good access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 1,
+ },
+ {
+ "direct packet access: test28 (marking on <=, bad access)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -4),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test1, valid packet_ptr range",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_update_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 5 },
+ .result_unpriv = ACCEPT,
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "helper access to packet: test2, unchecked packet_ptr",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "helper access to packet: test3, variable add",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
+ BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 11 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "helper access to packet: test4, packet_ptr with bad range",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 7 },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "helper access to packet: test5, packet_ptr with too short range",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 6 },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "helper access to packet: test6, cls valid packet_ptr range",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_update_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 5 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test7, cls unchecked packet_ptr",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test8, cls variable add",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
+ BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 11 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test9, cls packet_ptr with bad range",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 7 },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test10, cls packet_ptr with too short range",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 6 },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test11, cls unsuitable helper 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 42),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_store_bytes),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "helper access to the packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test12, cls unsuitable helper 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "helper access to the packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test13, cls helper ok",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test14, cls helper ok sub",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test15, cls helper fail sub",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 12),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test16, cls helper fail range 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test17, cls helper fail range 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, -9),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R2 min value is negative",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test18, cls helper fail range 3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, ~0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R2 min value is negative",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test19, cls helper range zero",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test20, pkt end as input",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R1 type=pkt_end expected=fp",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to packet: test21, wrong reg",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
+ BPF_MOV64_IMM(BPF_REG_2, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_diff),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "valid map access into an array with a constant",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "valid map access into an array with a register",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_1, 4),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "valid map access into an array with a variable",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "valid map access into an array with a signed variable",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+ BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid map access into an array with a constant",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=48 size=8",
+ .result = REJECT,
+ },
+ {
+ "invalid map access into an array with a register",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 min value is outside of the array range",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid map access into an array with a variable",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid map access into an array with no floor check",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+ BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .errstr = "R0 unbounded memory access",
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid map access into an array with a invalid max check",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .errstr = "invalid access to map value, value_size=48 off=44 size=8",
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid map access into an array with a invalid max check",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3, 11 },
+ .errstr = "R0 pointer += pointer",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "valid cgroup storage access",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .result = REJECT,
+ .errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "fd 1 is not pointing to valid bpf_map",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid per-cgroup storage access 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=64 off=256 size=4",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=64 off=-2 size=4",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid cgroup storage access 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 7),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "get_local_storage() doesn't support non-zero flags",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "invalid cgroup storage access 6",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_local_storage),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_cgroup_storage = { 1 },
+ .result = REJECT,
+ .errstr = "get_local_storage() doesn't support non-zero flags",
+ .errstr_unpriv = "R2 leaks addr into helper function",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ },
+ {
+ "multiple registers share map_lookup_elem result",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS
+ },
+ {
+ "alu ops on ptr_to_map_value_or_null, 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS
+ },
+ {
+ "alu ops on ptr_to_map_value_or_null, 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS
+ },
+ {
+ "alu ops on ptr_to_map_value_or_null, 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "R4 pointer arithmetic on PTR_TO_MAP_VALUE_OR_NULL",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS
+ },
+ {
+ "invalid memory access with multiple map_lookup_elem calls",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .result = REJECT,
+ .errstr = "R4 !read_ok",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS
+ },
+ {
+ "valid indirect map_lookup_elem access with 2nd lookup in branch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_2, 10),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS
+ },
+ {
+ "invalid map access from else condition",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES-1, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 unbounded memory access",
+ .result = REJECT,
+ .errstr_unpriv = "R0 leaks addr",
+ .result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "constant register |= constant should keep constant type",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
+ BPF_MOV64_IMM(BPF_REG_2, 34),
+ BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "constant register |= constant should not bypass stack boundary checks",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
+ BPF_MOV64_IMM(BPF_REG_2, 34),
+ BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack type R1 off=-48 access_size=58",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "constant register |= constant register should keep constant type",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
+ BPF_MOV64_IMM(BPF_REG_2, 34),
+ BPF_MOV64_IMM(BPF_REG_4, 13),
+ BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "constant register |= constant register should not bypass stack boundary checks",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
+ BPF_MOV64_IMM(BPF_REG_2, 34),
+ BPF_MOV64_IMM(BPF_REG_4, 24),
+ BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack type R1 off=-48 access_size=58",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "invalid direct packet write for LWT_IN",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "cannot write into packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "invalid direct packet write for LWT_OUT",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "cannot write into packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_OUT,
+ },
+ {
+ "direct packet write for LWT_XMIT",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_XMIT,
+ },
+ {
+ "direct packet read for LWT_IN",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "direct packet read for LWT_OUT",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_OUT,
+ },
+ {
+ "direct packet read for LWT_XMIT",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_XMIT,
+ },
+ {
+ "overlapping checks for direct packet access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_XMIT,
+ },
+ {
+ "make headroom for LWT_XMIT",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_2, 34),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
+ /* split for s390 to succeed */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 42),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_XMIT,
+ },
+ {
+ "invalid access of tc_classid for LWT_IN",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "invalid access of tc_classid for LWT_OUT",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "invalid access of tc_classid for LWT_XMIT",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid)),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "leak pointer into ctx 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 2 },
+ .errstr_unpriv = "R2 leaks addr into mem",
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .errstr = "BPF_XADD stores into R1 context is not allowed",
+ },
+ {
+ "leak pointer into ctx 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R10 leaks addr into mem",
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .errstr = "BPF_XADD stores into R1 context is not allowed",
+ },
+ {
+ "leak pointer into ctx 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
+ offsetof(struct __sk_buff, cb[0])),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 1 },
+ .errstr_unpriv = "R2 leaks addr into ctx",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "leak pointer into map val",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr_unpriv = "R6 leaks addr into mem",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "helper access to map: full range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: partial range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: empty range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_trace_printk),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=0 size=0",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: out-of-bound range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=0 size=56",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: negative range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R2 min value is negative",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const imm): full range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_2,
+ sizeof(struct test_val) -
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const imm): partial range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const imm): empty range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_trace_printk),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=4 size=0",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const imm): out-of-bound range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_2,
+ sizeof(struct test_val) -
+ offsetof(struct test_val, foo) + 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=4 size=52",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const imm): negative range (> adjustment)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R2 min value is negative",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const imm): negative range (< adjustment)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R2 min value is negative",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const reg): full range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ offsetof(struct test_val, foo)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2,
+ sizeof(struct test_val) -
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const reg): partial range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ offsetof(struct test_val, foo)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const reg): empty range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_trace_printk),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R1 min value is outside of the array range",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const reg): out-of-bound range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ offsetof(struct test_val, foo)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2,
+ sizeof(struct test_val) -
+ offsetof(struct test_val, foo) + 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=4 size=52",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const reg): negative range (> adjustment)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ offsetof(struct test_val, foo)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R2 min value is negative",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via const reg): negative range (< adjustment)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ offsetof(struct test_val, foo)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R2 min value is negative",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via variable): full range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct test_val, foo), 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2,
+ sizeof(struct test_val) -
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via variable): partial range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct test_val, foo), 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via variable): empty range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct test_val, foo), 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_trace_printk),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R1 min value is outside of the array range",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via variable): no max check",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R1 unbounded memory access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to adjusted map (via variable): wrong max check",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct test_val, foo), 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2,
+ sizeof(struct test_val) -
+ offsetof(struct test_val, foo) + 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "invalid access to map value, value_size=48 off=4 size=45",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using <, good access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using <, bad access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = REJECT,
+ .errstr = "R1 unbounded memory access",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using <=, good access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using <=, bad access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = REJECT,
+ .errstr = "R1 unbounded memory access",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using s<, good access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 0, -3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using s<, good access 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using s<, bad access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = REJECT,
+ .errstr = "R1 min value is negative",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using s<=, good access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0, -3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using s<=, good access 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to map: bounds check using s<=, bad access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = REJECT,
+ .errstr = "R1 min value is negative",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map lookup helper access to map",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 8 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map update helper access to map",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map update helper access to map: wrong size",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .fixup_map3 = { 10 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=8 off=0 size=16",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const imm)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+ offsetof(struct other_val, bar)),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 9 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const imm): out-of-bound 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
+ sizeof(struct other_val) - 4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 9 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=12 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const imm): out-of-bound 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 9 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const reg)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ offsetof(struct other_val, bar)),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const reg): out-of-bound 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3,
+ sizeof(struct other_val) - 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=12 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via const reg): out-of-bound 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, -4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via variable)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct other_val, bar), 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 11 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via variable): no max check",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 10 },
+ .result = REJECT,
+ .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map helper access to adjusted map (via variable): wrong max check",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3,
+ offsetof(struct other_val, bar) + 1, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map3 = { 3, 11 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=16 off=9 size=8",
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "map element value is preserved across register spilling",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "map element value or null is marked on register spilling",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "map element value store of cleared call register",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R1 !read_ok",
+ .errstr = "R1 !read_ok",
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "map element value with unaligned store",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23),
+ BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3),
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22),
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23),
+ BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "map element value with unaligned load",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "map element value illegal alu op, 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 bitwise operator &= on pointer",
+ .result = REJECT,
+ },
+ {
+ "map element value illegal alu op, 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 32-bit pointer arithmetic prohibited",
+ .result = REJECT,
+ },
+ {
+ "map element value illegal alu op, 3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 pointer arithmetic with /= operator",
+ .result = REJECT,
+ },
+ {
+ "map element value illegal alu op, 4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 pointer arithmetic prohibited",
+ .errstr = "invalid mem access 'inv'",
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "map element value illegal alu op, 5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_IMM(BPF_REG_3, 4096),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 invalid mem access 'inv'",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "map element value is preserved across register spilling",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0,
+ offsetof(struct test_val, foo)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "helper access to variable memory: stack, bitwise AND + JMP, correct bounds",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, bitwise AND, zero included",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid indirect read from stack off -64+0 size 64",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, bitwise AND + JMP, wrong max",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack type R1 off=-64 access_size=65",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP, correct bounds",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP (signed), correct bounds",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP, bounds + offset",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack type R1 off=-64 access_size=65",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP, wrong max",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid stack type R1 off=-64 access_size=65",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP, no max check",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ /* because max wasn't checked, signed min is negative */
+ .errstr = "R2 min value is negative, either use unsigned or 'var &= const'",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP, no min check",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid indirect read from stack off -64+0 size 64",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: stack, JMP (signed), no min check",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R2 min value is negative",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: map, JMP, correct bounds",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+ sizeof(struct test_val), 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: map, JMP, wrong max",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+ sizeof(struct test_val) + 1, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 4 },
+ .errstr = "invalid access to map value, value_size=48 off=0 size=49",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: map adjusted, JMP, correct bounds",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
+ BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+ sizeof(struct test_val) - 20, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: map adjusted, JMP, wrong max",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_2,
+ sizeof(struct test_val) - 19, 4),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 4 },
+ .errstr = "R1 min value is outside of the array range",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 type=inv expected=fp",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_EMIT_CALL(BPF_FUNC_csum_diff),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 0 /* csum_diff of 64-byte packet */,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 type=inv expected=fp",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 type=inv expected=fp",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: 8 bytes leak",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid indirect read from stack off -64+32 size 64",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "helper access to variable memory: 8 bytes no leak (init memory)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "invalid and of negative number",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 max value is outside of the array range",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid range check",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 12),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+ BPF_ALU32_IMM(BPF_MOD, BPF_REG_1, 2),
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_ALU32_REG(BPF_AND, BPF_REG_9, BPF_REG_1),
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_9, 1),
+ BPF_ALU32_IMM(BPF_RSH, BPF_REG_9, 1),
+ BPF_MOV32_IMM(BPF_REG_3, 1),
+ BPF_ALU32_REG(BPF_SUB, BPF_REG_3, BPF_REG_9),
+ BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0x10000000),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
+ BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
+ BPF_MOV64_REG(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr = "R0 max value is outside of the array range",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "map in map access",
+ .insns = {
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_in_map = { 3 },
+ .result = ACCEPT,
+ },
+ {
+ "invalid inner map pointer",
+ .insns = {
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_in_map = { 3 },
+ .errstr = "R1 pointer arithmetic on CONST_PTR_TO_MAP prohibited",
+ .result = REJECT,
+ },
+ {
+ "forgot null checking on the inner map pointer",
+ .insns = {
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_ST_MEM(0, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_in_map = { 3 },
+ .errstr = "R1 type=map_value_or_null expected=map_ptr",
+ .result = REJECT,
+ },
+ {
+ "ld_abs: check calling conv, r1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LD_ABS(BPF_W, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_abs: check calling conv, r2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_LD_ABS(BPF_W, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R2 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_abs: check calling conv, r3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_LD_ABS(BPF_W, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R3 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_abs: check calling conv, r4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_LD_ABS(BPF_W, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R4 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_abs: check calling conv, r5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_LD_ABS(BPF_W, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R5 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_abs: check calling conv, r7",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_7, 0),
+ BPF_LD_ABS(BPF_W, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ },
+ {
+ "ld_abs: tests on r6 and skb data reload helper",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_vlan_push),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 42 /* ultimate return value */,
+ },
+ {
+ "ld_ind: check calling conv, r1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_ind: check calling conv, r2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R2 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_ind: check calling conv, r3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R3 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_ind: check calling conv, r4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_4, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R4 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_ind: check calling conv, r5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R5 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "ld_ind: check calling conv, r7",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_7, 1),
+ BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "check bpf_perf_event_data->sample_period byte load permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period)),
+#else
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period) + 7),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+ },
+ {
+ "check bpf_perf_event_data->sample_period half load permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period)),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period) + 6),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+ },
+ {
+ "check bpf_perf_event_data->sample_period word load permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period)),
+#else
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period) + 4),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+ },
+ {
+ "check bpf_perf_event_data->sample_period dword load permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_perf_event_data, sample_period)),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_PERF_EVENT,
+ },
+ {
+ "check skb->data half load not permitted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data) + 2),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ },
+ {
+ "check skb->tc_classid half load not permitted for lwt prog",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid)),
+#else
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, tc_classid) + 2),
+#endif
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid bpf_context access",
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "bounds checks mixing signed and unsigned, positive bounds",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 2),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 4, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
+ BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
+ BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 4",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 5",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 4),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_6, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_4, 1, 4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_ST_MEM(BPF_H, BPF_REG_10, -512, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_load_bytes),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R4 min value is negative, either use unsigned",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 7",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 8",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 9",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 10",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 11",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+ /* Dead branch. */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 12",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -6),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 13",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, 2),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_7, 1),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 4, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_7),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 14",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_MOV64_IMM(BPF_REG_8, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 42, 6),
+ BPF_JMP_REG(BPF_JSGT, BPF_REG_8, BPF_REG_1, 3),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -7),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ },
+ {
+ "bounds checks mixing signed and unsigned, variant 15",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
+ BPF_MOV64_IMM(BPF_REG_2, -6),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "unbounded min value",
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ },
+ {
+ "subtraction bounds (map value) variant 1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 5),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 56),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 max value is outside of the array range",
+ .result = REJECT,
+ },
+ {
+ "subtraction bounds (map value) variant 2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 6),
+ BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 4),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
+ .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
+ .result = REJECT,
+ },
+ {
+ "bounds check based on zero-extended MOV",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ /* r2 = 0x0000'0000'ffff'ffff */
+ BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
+ /* r2 = 0 */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+ /* no-op */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ /* access at offset 0 */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT
+ },
+ {
+ "bounds check based on sign-extended MOV. test1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ /* r2 = 0xffff'ffff'ffff'ffff */
+ BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+ /* r2 = 0xffff'ffff */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
+ /* r0 = <oob pointer> */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ /* access to OOB pointer */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "map_value pointer and 4294967295",
+ .result = REJECT
+ },
+ {
+ "bounds check based on sign-extended MOV. test2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ /* r2 = 0xffff'ffff'ffff'ffff */
+ BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
+ /* r2 = 0xfff'ffff */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
+ /* r0 = <oob pointer> */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ /* access to OOB pointer */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 min value is outside of the array range",
+ .result = REJECT
+ },
+ {
+ "bounds check based on reg_off + var_off + insn_off. test1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "value_size=8 off=1073741825",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "bounds check based on reg_off + var_off + insn_off. test2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 4 },
+ .errstr = "value 1073741823",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "bounds check after truncation of non-boundary-crossing range",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ /* r1 = [0x00, 0xff] */
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ /* r2 = 0x10'0000'0000 */
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
+ /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+ /* r1 = [0x00, 0xff] */
+ BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
+ /* r1 = 0 */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+ /* no-op */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* access at offset 0 */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT
+ },
+ {
+ "bounds check after truncation of boundary-crossing range (1)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ /* r1 = [0x00, 0xff] */
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = [0xffff'ff80, 0x1'0000'007f] */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = [0xffff'ff80, 0xffff'ffff] or
+ * [0x0000'0000, 0x0000'007f]
+ */
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = [0x00, 0xff] or
+ * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+ */
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = 0 or
+ * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+ */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+ /* no-op or OOB pointer computation */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* potentially OOB access */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ /* not actually fully unbounded, but the bound is very high */
+ .errstr = "R0 unbounded memory access",
+ .result = REJECT
+ },
+ {
+ "bounds check after truncation of boundary-crossing range (2)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ /* r1 = [0x00, 0xff] */
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = [0xffff'ff80, 0x1'0000'007f] */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = [0xffff'ff80, 0xffff'ffff] or
+ * [0x0000'0000, 0x0000'007f]
+ * difference to previous test: truncation via MOV32
+ * instead of ALU32.
+ */
+ BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = [0x00, 0xff] or
+ * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
+ */
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
+ /* r1 = 0 or
+ * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
+ */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+ /* no-op or OOB pointer computation */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* potentially OOB access */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ /* not actually fully unbounded, but the bound is very high */
+ .errstr = "R0 unbounded memory access",
+ .result = REJECT
+ },
+ {
+ "bounds check after wrapping 32-bit addition",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
+ /* r1 = 0x7fff'ffff */
+ BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
+ /* r1 = 0xffff'fffe */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+ /* r1 = 0 */
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
+ /* no-op */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* access at offset 0 */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT
+ },
+ {
+ "bounds check after shift with oversized count operand",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV64_IMM(BPF_REG_2, 32),
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ /* r1 = (u32)1 << (u32)32 = ? */
+ BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
+ /* r1 = [0x0000, 0xffff] */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
+ /* computes unknown pointer, potentially OOB */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* potentially OOB access */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 max value is outside of the array range",
+ .result = REJECT
+ },
+ {
+ "bounds check after right shift of maybe-negative number",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ /* r1 = [0x00, 0xff] */
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ /* r1 = [-0x01, 0xfe] */
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+ /* r1 = 0 or 0xff'ffff'ffff'ffff */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+ /* r1 = 0 or 0xffff'ffff'ffff */
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
+ /* computes unknown pointer, potentially OOB */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* potentially OOB access */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ /* exit */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R0 unbounded memory access",
+ .result = REJECT
+ },
+ {
+ "bounds check map access with off+size signed 32bit overflow. test1",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_A(0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "map_value pointer and 2147483646",
+ .result = REJECT
+ },
+ {
+ "bounds check map access with off+size signed 32bit overflow. test2",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_A(0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "pointer offset 1073741822",
+ .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
+ .result = REJECT
+ },
+ {
+ "bounds check map access with off+size signed 32bit overflow. test3",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+ BPF_JMP_A(0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "pointer offset -1073741822",
+ .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
+ .result = REJECT
+ },
+ {
+ "bounds check map access with off+size signed 32bit overflow. test4",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_1, 1000000),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
+ BPF_JMP_A(0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "map_value pointer and 1000000000000",
+ .result = REJECT
+ },
+ {
+ "pointer/scalar confusion in state equality check (way 1)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_A(1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ BPF_JMP_A(0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .retval = POINTER_VALUE,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr as return value"
+ },
+ {
+ "pointer/scalar confusion in state equality check (way 2)",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ BPF_JMP_A(1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = ACCEPT,
+ .retval = POINTER_VALUE,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr as return value"
+ },
+ {
+ "variable-offset ctx access",
+ .insns = {
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ /* add it to skb. We now have either &skb->len or
+ * &skb->pkt_type, but we don't know which
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ /* dereference it */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "variable ctx access var_off=(0x0; 0x4)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "variable-offset stack access",
+ .insns = {
+ /* Fill the top 8 bytes of the stack */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+ /* add it to fp. We now have either fp-4 or fp-8, but
+ * we don't know which
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* dereference it */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "variable stack access var_off=(0xfffffffffffffff8; 0x4)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "indirect variable-offset stack access, out of bound",
+ .insns = {
+ /* Fill the top 8 bytes of the stack */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
+ /* add it to fp. We now have either fp-4 or fp-8, but
+ * we don't know which
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* dereference it indirectly */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 5 },
+ .errstr = "invalid stack type R2 var_off",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "indirect variable-offset stack access, max_off+size > max_initialized",
+ .insns = {
+ /* Fill only the second from top 8 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
+ * which. fp-12 size 8 is partially uninitialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 5 },
+ .errstr = "invalid indirect read from stack var_off",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "indirect variable-offset stack access, min_off < min_initialized",
+ .insns = {
+ /* Fill only the top 8 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
+ * which. fp-16 size 8 is partially uninitialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 5 },
+ .errstr = "invalid indirect read from stack var_off",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "indirect variable-offset stack access, ok",
+ .insns = {
+ /* Fill the top 16 bytes of the stack. */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* Get an unknown value. */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* Make it small and 4-byte aligned. */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know
+ * which, but either way it points to initialized stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
+ /* Dereference it indirectly. */
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 6 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "direct stack access with 32-bit wraparound. test1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_EXIT_INSN()
+ },
+ .errstr = "fp pointer and 2147483647",
+ .result = REJECT
+ },
+ {
+ "direct stack access with 32-bit wraparound. test2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_EXIT_INSN()
+ },
+ .errstr = "fp pointer and 1073741823",
+ .result = REJECT
+ },
+ {
+ "direct stack access with 32-bit wraparound. test3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_EXIT_INSN()
+ },
+ .errstr = "fp pointer offset 1073741822",
+ .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
+ .result = REJECT
+ },
+ {
+ "liveness pruning and write screening",
+ .insns = {
+ /* Get an unknown value */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ /* branch conditions teach us nothing about R2 */
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_LWT_IN,
+ },
+ {
+ "varlen_map_value_access pruning",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
+ BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_MOV32_IMM(BPF_REG_1, 0),
+ BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 3 },
+ .errstr_unpriv = "R0 leaks addr",
+ .errstr = "R0 unbounded memory access",
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "invalid 64-bit BPF_END",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ {
+ .code = BPF_ALU64 | BPF_END | BPF_TO_LE,
+ .dst_reg = BPF_REG_0,
+ .src_reg = 0,
+ .off = 0,
+ .imm = 32,
+ },
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "unknown opcode d7",
+ .result = REJECT,
+ },
+ {
+ "XDP, using ifindex from netdev",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, ingress_ifindex)),
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .retval = 1,
+ },
+ {
+ "meta access, test1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet, off=-8",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test5",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_4, 3),
+ BPF_MOV64_IMM(BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_xdp_adjust_meta),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R3 !read_ok",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test7",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test8",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test9",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test10",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_IMM(BPF_REG_5, 42),
+ BPF_MOV64_IMM(BPF_REG_6, 24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
+ BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_5, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid access to packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test11",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_IMM(BPF_REG_5, 42),
+ BPF_MOV64_IMM(BPF_REG_6, 24),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
+ BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_5, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "meta access, test12",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 5),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "arithmetic ops make PTR_TO_CTX unusable",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
+ offsetof(struct __sk_buff, data) -
+ offsetof(struct __sk_buff, mark)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "dereference of modified ctx ptr",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "pkt_end - pkt_start is allowed",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = TEST_DATA_LEN,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "XDP pkt read, pkt_end mangling, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "XDP pkt read, pkt_end mangling, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R3 pointer arithmetic on PTR_TO_PACKET_END",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "XDP pkt read, pkt_data' > pkt_end, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' > pkt_end, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' > pkt_end, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end > pkt_data', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end > pkt_data', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end > pkt_data', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' < pkt_end, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' < pkt_end, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' < pkt_end, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end < pkt_data', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end < pkt_data', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end < pkt_data', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' >= pkt_end, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' >= pkt_end, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' >= pkt_end, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end >= pkt_data', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end >= pkt_data', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end >= pkt_data', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' <= pkt_end, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' <= pkt_end, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data' <= pkt_end, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end <= pkt_data', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end <= pkt_data', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_end <= pkt_data', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' > pkt_data, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' > pkt_data, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' > pkt_data, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data > pkt_meta', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data > pkt_meta', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data > pkt_meta', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' < pkt_data, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' < pkt_data, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' < pkt_data, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data < pkt_meta', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data < pkt_meta', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data < pkt_meta', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' >= pkt_data, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' >= pkt_data, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' >= pkt_data, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data >= pkt_meta', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data >= pkt_meta', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data >= pkt_meta', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' <= pkt_data, good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' <= pkt_data, bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_meta' <= pkt_data, bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data <= pkt_meta', good access",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data <= pkt_meta', bad access 1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "XDP pkt read, pkt_data <= pkt_meta', bad access 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data_meta)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R1 offset is outside of the packet",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "check deducing bounds from const, 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
+ },
+ {
+ "check deducing bounds from const, 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "check deducing bounds from const, 3",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
+ },
+ {
+ "check deducing bounds from const, 4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R6 has pointer with unsupported alu operation",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+ {
+ "check deducing bounds from const, 5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
+ },
+ {
+ "check deducing bounds from const, 6",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
+ },
+ {
+ "check deducing bounds from const, 7",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, ~0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "dereference of modified ctx ptr",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "check deducing bounds from const, 8",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, ~0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "dereference of modified ctx ptr",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "check deducing bounds from const, 9",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
+ },
+ {
+ "check deducing bounds from const, 10",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
+ /* Marks reg as unknown. */
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "math between ctx pointer and register with unbounded min value is not allowed",
+ .result = REJECT,
+ },
+ {
+ "bpf_exit with invalid return code. test1",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 has value (0x0; 0xffffffff)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 has value (0x0; 0x3)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test4",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test5",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 has value (0x2; 0x0)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test6",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 is not a known value (ctx)",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "bpf_exit with invalid return code. test7",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4),
+ BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "R0 has unknown scalar value",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ },
+ {
+ "calls: basic sanity",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: not on unpriviledged",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "calls: div by 0 in subprog",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(BPF_REG_2, 0),
+ BPF_MOV32_IMM(BPF_REG_3, 1),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_3, BPF_REG_2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "calls: multiple ret types in subprog 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'inv'",
+ },
+ {
+ "calls: multiple ret types in subprog 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 9),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6,
+ offsetof(struct __sk_buff, data)),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 16 },
+ .result = REJECT,
+ .errstr = "R0 min value is outside of the array range",
+ },
+ {
+ "calls: overlapping caller/callee",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "last insn is not an exit or jmp",
+ .result = REJECT,
+ },
+ {
+ "calls: wrong recursive calls",
+ .insns = {
+ BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "calls: wrong src reg",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "BPF_CALL uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "calls: wrong off value",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, -1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "BPF_CALL uses reserved fields",
+ .result = REJECT,
+ },
+ {
+ "calls: jump back loop",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge from insn 0 to 0",
+ .result = REJECT,
+ },
+ {
+ "calls: conditional call",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "calls: conditional call 2",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: conditional call 3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge from insn",
+ .result = REJECT,
+ },
+ {
+ "calls: conditional call 4",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -5),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: conditional call 5",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -6),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge from insn",
+ .result = REJECT,
+ },
+ {
+ "calls: conditional call 6",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -2),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge from insn",
+ .result = REJECT,
+ },
+ {
+ "calls: using r0 returned by callee",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+ },
+ {
+ "calls: using uninit r0 from callee",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "!read_ok",
+ .result = REJECT,
+ },
+ {
+ "calls: callee is using r1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_ACT,
+ .result = ACCEPT,
+ .retval = TEST_DATA_LEN,
+ },
+ {
+ "calls: callee using args1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "allowed for root only",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = POINTER_VALUE,
+ },
+ {
+ "calls: callee using wrong args2",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "R2 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "calls: callee using two args",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
+ offsetof(struct __sk_buff, len)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_6,
+ offsetof(struct __sk_buff, len)),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "allowed for root only",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ .retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN,
+ },
+ {
+ "calls: callee changing pkt pointers",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_8, BPF_REG_7, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ /* clear_all_pkt_pointers() has to walk all frames
+ * to make sure that pkt pointers in the caller
+ * are cleared when callee is calling a helper that
+ * adjusts packet size
+ */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_xdp_adjust_head),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R6 invalid mem access 'inv'",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: two calls with args",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = TEST_DATA_LEN + TEST_DATA_LEN,
+ },
+ {
+ "calls: calls with stack arith",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -64),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "calls: calls with misaligned stack access",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -61),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -63),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
+ .errstr = "misaligned stack access",
+ .result = REJECT,
+ },
+ {
+ "calls: calls control flow, jump test",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 43),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 43,
+ },
+ {
+ "calls: calls control flow, jump test 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 43),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "jump out of range from insn 1 to 4",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls with bad jump",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range from insn 11 to 9",
+ .result = REJECT,
+ },
+ {
+ "calls: recursive call. test1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "calls: recursive call. test2",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "back-edge",
+ .result = REJECT,
+ },
+ {
+ "calls: unreachable code",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "unreachable insn 6",
+ .result = REJECT,
+ },
+ {
+ "calls: invalid call",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -4),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "invalid destination",
+ .result = REJECT,
+ },
+ {
+ "calls: invalid call 2",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 0x7fffffff),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "invalid destination",
+ .result = REJECT,
+ },
+ {
+ "calls: jumping across function bodies. test1",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "calls: jumping across function bodies. test2",
+ .insns = {
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "jump out of range",
+ .result = REJECT,
+ },
+ {
+ "calls: call without exit",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -2),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "not an exit",
+ .result = REJECT,
+ },
+ {
+ "calls: call into middle of ld_imm64",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "last insn",
+ .result = REJECT,
+ },
+ {
+ "calls: call into middle of other call",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "last insn",
+ .result = REJECT,
+ },
+ {
+ "calls: ld_abs with changing ctx data in callee",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_vlan_push),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls with bad fallthrough",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, len)),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .errstr = "not an exit",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls with stack read",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+ },
+ {
+ "calls: two calls with stack write",
+ .insns = {
+ /* main prog */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 7),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_8),
+ /* write into stack frame of main prog */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* read from stack frame of main prog */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+ },
+ {
+ "calls: stack overflow using two frames (pre-call access)",
+ .insns = {
+ /* prog 1 */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* prog 2 */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .errstr = "combined stack size",
+ .result = REJECT,
+ },
+ {
+ "calls: stack overflow using two frames (post-call access)",
+ .insns = {
+ /* prog 1 */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 2),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+ BPF_EXIT_INSN(),
+
+ /* prog 2 */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .errstr = "combined stack size",
+ .result = REJECT,
+ },
+ {
+ "calls: stack depth check using three frames. test1",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* A */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0),
+ BPF_EXIT_INSN(),
+ /* B */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ /* stack_main=32, stack_A=256, stack_B=64
+ * and max(main+A, main+A+B) < 512
+ */
+ .result = ACCEPT,
+ },
+ {
+ "calls: stack depth check using three frames. test2",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 5), /* call B */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* A */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0),
+ BPF_EXIT_INSN(),
+ /* B */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -3), /* call A */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ /* stack_main=32, stack_A=64, stack_B=256
+ * and max(main+A, main+A+B) < 512
+ */
+ .result = ACCEPT,
+ },
+ {
+ "calls: stack depth check using three frames. test3",
+ .insns = {
+ /* main */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 8), /* call B */
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_6, 0, 1),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -64, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* A */
+ BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 10, 1),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -224, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -3),
+ /* B */
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 1),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, -6), /* call A */
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -256, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ /* stack_main=64, stack_A=224, stack_B=256
+ * and max(main+A, main+A+B) > 512
+ */
+ .errstr = "combined stack",
+ .result = REJECT,
+ },
+ {
+ "calls: stack depth check using three frames. test4",
+ /* void main(void) {
+ * func1(0);
+ * func1(1);
+ * func2(1);
+ * }
+ * void func1(int alloc_or_recurse) {
+ * if (alloc_or_recurse) {
+ * frame_pointer[-300] = 1;
+ * } else {
+ * func2(alloc_or_recurse);
+ * }
+ * }
+ * void func2(int alloc_or_recurse) {
+ * if (alloc_or_recurse) {
+ * frame_pointer[-300] = 1;
+ * }
+ * }
+ */
+ .insns = {
+ /* main */
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 6), /* call A */
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4), /* call A */
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 7), /* call B */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* A */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+ BPF_EXIT_INSN(),
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */
+ BPF_EXIT_INSN(),
+ /* B */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_ST_MEM(BPF_B, BPF_REG_10, -300, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = REJECT,
+ .errstr = "combined stack",
+ },
+ {
+ "calls: stack depth check using three frames. test5",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call A */
+ BPF_EXIT_INSN(),
+ /* A */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call B */
+ BPF_EXIT_INSN(),
+ /* B */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call C */
+ BPF_EXIT_INSN(),
+ /* C */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call D */
+ BPF_EXIT_INSN(),
+ /* D */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call E */
+ BPF_EXIT_INSN(),
+ /* E */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call F */
+ BPF_EXIT_INSN(),
+ /* F */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call G */
+ BPF_EXIT_INSN(),
+ /* G */
+ BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 1), /* call H */
+ BPF_EXIT_INSN(),
+ /* H */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .errstr = "call stack",
+ .result = REJECT,
+ },
+ {
+ "calls: spill into caller stack frame",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .errstr = "cannot spill",
+ .result = REJECT,
+ },
+ {
+ "calls: write into caller stack frame",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "calls: write into callee stack frame",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -8),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .errstr = "cannot return stack pointer",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls with stack write and void return",
+ .insns = {
+ /* main prog */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* write into stack frame of main prog */
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_EXIT_INSN(), /* void return */
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = ACCEPT,
+ },
+ {
+ "calls: ambiguous return value",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "allowed for root only",
+ .result_unpriv = REJECT,
+ .errstr = "R0 !read_ok",
+ .result = REJECT,
+ },
+ {
+ "calls: two calls that return map_value",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
+
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ /* fetch secound map_value_ptr from the stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ /* call 3rd function twice */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* first time with fp-8 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ /* second time with fp-16 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ /* lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr into stack frame of main prog */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(), /* return 0 */
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map1 = { 23 },
+ .result = ACCEPT,
+ },
+ {
+ "calls: two calls that return map_value with bool condition",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ /* call 3rd function twice */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* first time with fp-8 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ /* second time with fp-16 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ /* fetch secound map_value_ptr from the stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ /* lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(), /* return 0 */
+ /* write map_value_ptr into stack frame of main prog */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(), /* return 1 */
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map1 = { 23 },
+ .result = ACCEPT,
+ },
+ {
+ "calls: two calls that return map_value with incorrect bool check",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ /* call 3rd function twice */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* first time with fp-8 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 9),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ /* second time with fp-16 */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ /* fetch secound map_value_ptr from the stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ /* lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(), /* return 0 */
+ /* write map_value_ptr into stack frame of main prog */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(), /* return 1 */
+ },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map1 = { 23 },
+ .result = REJECT,
+ .errstr = "invalid read from stack off -16+0 size 8",
+ },
+ {
+ "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 1 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=8 off=2 size=8",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test2",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), /* 20 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, /* 24 */
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), /* 30 */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), /* 34 */
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 1 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = ACCEPT,
+ },
+ {
+ "calls: two jumps that receive map_value via arg=ptr_stack_of_jumper. test3",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -24),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0), // 26
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ /* write map_value_ptr into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), // 30
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), // 34
+ BPF_JMP_IMM(BPF_JA, 0, 0, -30),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 1 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -8),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = REJECT,
+ .errstr = "invalid access to map value, value_size=8 off=2 size=8",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: two calls that receive map_value_ptr_or_null via arg. test1",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 1 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = ACCEPT,
+ },
+ {
+ "calls: two calls that receive map_value_ptr_or_null via arg. test2",
+ .insns = {
+ /* main prog */
+ /* pass fp-16, fp-8 into a function */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
+ /* 1st lookup from map */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_8, 1),
+
+ /* 2nd lookup from map */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-16 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_9, 1),
+
+ /* call 3rd func with fp-8, 0|1, fp-16, 0|1 */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_9),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 2 */
+ /* if arg2 == 1 do *arg1 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 1, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+
+ /* if arg4 == 0 do *arg3 = 0 */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_4, 0, 2),
+ /* fetch map_value_ptr from the stack of this function */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ /* write into map value */
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map1 = { 12, 22 },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'inv'",
+ },
+ {
+ "calls: pkt_ptr spill into caller stack",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ /* now the pkt range is verified, read pkt_ptr from stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = POINTER_VALUE,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ /* Marking is still kept, but not in all cases safe. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ /* now the pkt range is verified, read pkt_ptr from stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 3",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ /* Marking is still kept and safe here. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* now the pkt range is verified, read pkt_ptr from stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ /* Check marking propagated. */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_ST_MEM(BPF_W, BPF_REG_4, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 5",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ /* spill checked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "same insn cannot be used with different",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 6",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ /* spill checked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "R4 invalid mem access",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 7",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ /* spill checked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "R4 invalid mem access",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 8",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
+ /* spill checked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: pkt_ptr spill into caller stack 9",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_4, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_IMM(BPF_REG_5, 0),
+ /* spill unchecked pkt_ptr into stack of caller */
+ BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_5, 1),
+ /* don't read back pkt_ptr from stack here */
+ /* write 4 bytes into packet */
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "invalid access to packet",
+ .result = REJECT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "calls: caller stack init to zero or map_value_or_null",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
+ /* fetch map_value_or_null or const_zero from stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ /* store into map_value */
+ BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+
+ /* subprog 1 */
+ /* if (ctx == 0) return; */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8),
+ /* else bpf_map_lookup() and *(fp - 8) = r0 */
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ /* write map_value_ptr_or_null into stack frame of main prog at fp-8 */
+ BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 13 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "calls: stack init to zero and pruning",
+ .insns = {
+ /* first make allocated_stack 16 byte */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ /* now fork the execution such that the false branch
+ * of JGT insn will be verified second and it skisp zero
+ * init of fp-8 stack slot. If stack liveness marking
+ * is missing live_read marks from call map_lookup
+ * processing then pruning will incorrectly assume
+ * that fp-8 stack slot was unused in the fall-through
+ * branch and will accept the program incorrectly
+ */
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 6 },
+ .errstr = "invalid indirect read from stack off -8+0 size 8",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_XDP,
+ },
+ {
+ "calls: two calls returning different map pointers for lookup (hash, array)",
+ .insns = {
+ /* main prog */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_CALL_REL(11),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_CALL_REL(12),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* subprog 1 */
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* subprog 2 */
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map2 = { 13 },
+ .fixup_map4 = { 16 },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "calls: two calls returning different map pointers for lookup (hash, map in map)",
+ .insns = {
+ /* main prog */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_CALL_REL(11),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_CALL_REL(12),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* subprog 1 */
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* subprog 2 */
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_in_map = { 16 },
+ .fixup_map4 = { 13 },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'map_ptr'",
+ },
+ {
+ "cond: two branches returning different map pointers for lookup (tail, tail)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 3),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5 },
+ .fixup_prog2 = { 2 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "tail_call abusing map_ptr",
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "cond: two branches returning same map pointers for lookup (tail, tail)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 3),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog2 = { 2, 5 },
+ .result_unpriv = ACCEPT,
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "search pruning: all branches should be verified (nop operation)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_A(1),
+ BPF_MOV64_IMM(BPF_REG_4, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "R6 invalid mem access 'inv'",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "search pruning: all branches should be verified (invalid stack access)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
+ BPF_JMP_A(1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24),
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .errstr = "invalid read from stack off -16+0 size 8",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "jit: lsh, rsh, arsh by 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 0xff),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 1),
+ BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 1),
+ BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0xff, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x7f, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "jit: mov32 for ldimm64, 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_LD_IMM64(BPF_REG_1, 0xfeffffffffffffffULL),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 32),
+ BPF_LD_IMM64(BPF_REG_2, 0xfeffffffULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "jit: mov32 for ldimm64, 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_LD_IMM64(BPF_REG_1, 0x1ffffffffULL),
+ BPF_LD_IMM64(BPF_REG_2, 0xffffffffULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "jit: various mul tests",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+ BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xefefefULL),
+ BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+ BPF_ALU64_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
+ BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_3, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
+ BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+ },
+ {
+ "xadd/w check unaligned stack",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "misaligned stack access off",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "xadd/w check unaligned map",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map1 = { 3 },
+ .result = REJECT,
+ .errstr = "misaligned value access off",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ },
+ {
+ "xadd/w check unaligned pkt",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+ offsetof(struct xdp_md, data)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+ offsetof(struct xdp_md, data_end)),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 99),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 6),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0),
+ BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1),
+ BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "BPF_XADD stores into R2 packet",
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ },
+ {
+ "xadd/w check whether src/dst got mangled, 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 3,
+ },
+ {
+ "xadd/w check whether src/dst got mangled, 2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+ BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+ BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
+ BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .retval = 3,
+ },
+ {
+ "bpf_get_stack return R0 within range",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)/2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)/2),
+ BPF_MOV64_IMM(BPF_REG_4, 256),
+ BPF_EMIT_CALL(BPF_FUNC_get_stack),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32),
+ BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32),
+ BPF_JMP_REG(BPF_JSLT, BPF_REG_8, BPF_REG_1, 16),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+ BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)/2),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5),
+ BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_9),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_EMIT_CALL(BPF_FUNC_get_stack),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map2 = { 4 },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ },
+ {
+ "ld_abs: invalid op 1",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_DW, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "unknown opcode",
+ },
+ {
+ "ld_abs: invalid op 2",
+ .insns = {
+ BPF_MOV32_IMM(BPF_REG_0, 256),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_IND(BPF_DW, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "unknown opcode",
+ },
+ {
+ "ld_abs: nmap reduced",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_H, 12),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 28),
+ BPF_LD_ABS(BPF_H, 12),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 26),
+ BPF_MOV32_IMM(BPF_REG_0, 18),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -64),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -64),
+ BPF_LD_IND(BPF_W, BPF_REG_7, 14),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -60),
+ BPF_MOV32_IMM(BPF_REG_0, 280971478),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -60),
+ BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 15),
+ BPF_LD_ABS(BPF_H, 12),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0x806, 13),
+ BPF_MOV32_IMM(BPF_REG_0, 22),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -56),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -56),
+ BPF_LD_IND(BPF_H, BPF_REG_7, 14),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -52),
+ BPF_MOV32_IMM(BPF_REG_0, 17366),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -48),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_10, -48),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -52),
+ BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_7),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 256),
+ BPF_EXIT_INSN(),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .data = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6,
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 256,
+ },
+ {
+ "ld_abs: div + abs, test 1",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, 3),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0),
+ BPF_LD_ABS(BPF_B, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+ BPF_LD_IND(BPF_B, BPF_REG_8, -70),
+ BPF_EXIT_INSN(),
+ },
+ .data = {
+ 10, 20, 30, 40, 50,
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 10,
+ },
+ {
+ "ld_abs: div + abs, test 2",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, 3),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_2, 2),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_2),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_8, BPF_REG_0),
+ BPF_LD_ABS(BPF_B, 128),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_0),
+ BPF_LD_IND(BPF_B, BPF_REG_8, -70),
+ BPF_EXIT_INSN(),
+ },
+ .data = {
+ 10, 20, 30, 40, 50,
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "ld_abs: div + abs, test 3",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+ BPF_LD_ABS(BPF_B, 3),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ },
+ .data = {
+ 10, 20, 30, 40, 50,
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "ld_abs: div + abs, test 4",
+ .insns = {
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+ BPF_LD_ABS(BPF_B, 256),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_7),
+ BPF_EXIT_INSN(),
+ },
+ .data = {
+ 10, 20, 30, 40, 50,
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0,
+ },
+ {
+ "ld_abs: vlan + abs, test 1",
+ .insns = { },
+ .data = {
+ 0x34,
+ },
+ .fill_helper = bpf_fill_ld_abs_vlan_push_pop,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 0xbef,
+ },
+ {
+ "ld_abs: vlan + abs, test 2",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_skb_vlan_push),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_LD_ABS(BPF_B, 0),
+ BPF_LD_ABS(BPF_H, 0),
+ BPF_LD_ABS(BPF_W, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .data = {
+ 0x34,
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "ld_abs: jump around ld_abs",
+ .insns = { },
+ .data = {
+ 10, 11,
+ },
+ .fill_helper = bpf_fill_jump_around_ld_abs,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 10,
+ },
+ {
+ "ld_dw: xor semi-random 64 bit imms, test 1",
+ .insns = { },
+ .data = { },
+ .fill_helper = bpf_fill_rand_ld_dw,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 4090,
+ },
+ {
+ "ld_dw: xor semi-random 64 bit imms, test 2",
+ .insns = { },
+ .data = { },
+ .fill_helper = bpf_fill_rand_ld_dw,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2047,
+ },
+ {
+ "ld_dw: xor semi-random 64 bit imms, test 3",
+ .insns = { },
+ .data = { },
+ .fill_helper = bpf_fill_rand_ld_dw,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 511,
+ },
+ {
+ "ld_dw: xor semi-random 64 bit imms, test 4",
+ .insns = { },
+ .data = { },
+ .fill_helper = bpf_fill_rand_ld_dw,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 5,
+ },
+ {
+ "pass unmodified ctx pointer to helper",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_update),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "pass modified ctx pointer to helper, 1",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_update),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "dereference of modified ctx ptr",
+ },
+ {
+ "pass modified ctx pointer to helper, 2",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_socket_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .errstr_unpriv = "dereference of modified ctx ptr",
+ .errstr = "dereference of modified ctx ptr",
+ },
+ {
+ "pass modified ctx pointer to helper, 3",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_csum_update),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "variable ctx access var_off=(0x0; 0x4)",
+ },
+ {
+ "mov64 src == dst",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_2),
+ // Check bounds are OK
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "mov64 src != dst",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
+ // Check bounds are OK
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ },
+ {
+ "calls: ctx read at start of subprog",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
+ BPF_JMP_REG(BPF_JSGT, BPF_REG_0, BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
+ .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+ },
+};
+
+static int probe_filter_length(const struct bpf_insn *fp)
+{
+ int len;
+
+ for (len = MAX_INSNS - 1; len > 0; --len)
+ if (fp[len].code != 0 || fp[len].imm != 0)
+ break;
+ return len + 1;
+}
+
+static int create_map(uint32_t type, uint32_t size_key,
+ uint32_t size_value, uint32_t max_elem)
+{
+ int fd;
+
+ fd = bpf_create_map(type, size_key, size_value, max_elem,
+ type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0);
+ if (fd < 0)
+ printf("Failed to create hash map '%s'!\n", strerror(errno));
+
+ return fd;
+}
+
+static int create_prog_dummy1(enum bpf_map_type prog_type)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_load_program(prog_type, prog,
+ ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
+static int create_prog_dummy2(enum bpf_map_type prog_type, int mfd, int idx)
+{
+ struct bpf_insn prog[] = {
+ BPF_MOV64_IMM(BPF_REG_3, idx),
+ BPF_LD_MAP_FD(BPF_REG_2, mfd),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 41),
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_load_program(prog_type, prog,
+ ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+}
+
+static int create_prog_array(enum bpf_map_type prog_type, uint32_t max_elem,
+ int p1key)
+{
+ int p2key = 1;
+ int mfd, p1fd, p2fd;
+
+ mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
+ sizeof(int), max_elem, 0);
+ if (mfd < 0) {
+ printf("Failed to create prog array '%s'!\n", strerror(errno));
+ return -1;
+ }
+
+ p1fd = create_prog_dummy1(prog_type);
+ p2fd = create_prog_dummy2(prog_type, mfd, p2key);
+ if (p1fd < 0 || p2fd < 0)
+ goto out;
+ if (bpf_map_update_elem(mfd, &p1key, &p1fd, BPF_ANY) < 0)
+ goto out;
+ if (bpf_map_update_elem(mfd, &p2key, &p2fd, BPF_ANY) < 0)
+ goto out;
+ close(p2fd);
+ close(p1fd);
+
+ return mfd;
+out:
+ close(p2fd);
+ close(p1fd);
+ close(mfd);
+ return -1;
+}
+
+static int create_map_in_map(void)
+{
+ int inner_map_fd, outer_map_fd;
+
+ inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+ sizeof(int), 1, 0);
+ if (inner_map_fd < 0) {
+ printf("Failed to create array '%s'!\n", strerror(errno));
+ return inner_map_fd;
+ }
+
+ outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL,
+ sizeof(int), inner_map_fd, 1, 0);
+ if (outer_map_fd < 0)
+ printf("Failed to create array of maps '%s'!\n",
+ strerror(errno));
+
+ close(inner_map_fd);
+
+ return outer_map_fd;
+}
+
+static int create_cgroup_storage(void)
+{
+ int fd;
+
+ fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
+ sizeof(struct bpf_cgroup_storage_key),
+ TEST_DATA_LEN, 0, 0);
+ if (fd < 0)
+ printf("Failed to create array '%s'!\n", strerror(errno));
+
+ return fd;
+}
+
+static char bpf_vlog[UINT_MAX >> 8];
+
+static void do_test_fixup(struct bpf_test *test, enum bpf_map_type prog_type,
+ struct bpf_insn *prog, int *map_fds)
+{
+ int *fixup_map1 = test->fixup_map1;
+ int *fixup_map2 = test->fixup_map2;
+ int *fixup_map3 = test->fixup_map3;
+ int *fixup_map4 = test->fixup_map4;
+ int *fixup_prog1 = test->fixup_prog1;
+ int *fixup_prog2 = test->fixup_prog2;
+ int *fixup_map_in_map = test->fixup_map_in_map;
+ int *fixup_cgroup_storage = test->fixup_cgroup_storage;
+
+ if (test->fill_helper)
+ test->fill_helper(test);
+
+ /* Allocating HTs with 1 elem is fine here, since we only test
+ * for verifier and not do a runtime lookup, so the only thing
+ * that really matters is value size in this case.
+ */
+ if (*fixup_map1) {
+ map_fds[0] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+ sizeof(long long), 1);
+ do {
+ prog[*fixup_map1].imm = map_fds[0];
+ fixup_map1++;
+ } while (*fixup_map1);
+ }
+
+ if (*fixup_map2) {
+ map_fds[1] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+ sizeof(struct test_val), 1);
+ do {
+ prog[*fixup_map2].imm = map_fds[1];
+ fixup_map2++;
+ } while (*fixup_map2);
+ }
+
+ if (*fixup_map3) {
+ map_fds[2] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+ sizeof(struct other_val), 1);
+ do {
+ prog[*fixup_map3].imm = map_fds[2];
+ fixup_map3++;
+ } while (*fixup_map3);
+ }
+
+ if (*fixup_map4) {
+ map_fds[3] = create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+ sizeof(struct test_val), 1);
+ do {
+ prog[*fixup_map4].imm = map_fds[3];
+ fixup_map4++;
+ } while (*fixup_map4);
+ }
+
+ if (*fixup_prog1) {
+ map_fds[4] = create_prog_array(prog_type, 4, 0);
+ do {
+ prog[*fixup_prog1].imm = map_fds[4];
+ fixup_prog1++;
+ } while (*fixup_prog1);
+ }
+
+ if (*fixup_prog2) {
+ map_fds[5] = create_prog_array(prog_type, 8, 7);
+ do {
+ prog[*fixup_prog2].imm = map_fds[5];
+ fixup_prog2++;
+ } while (*fixup_prog2);
+ }
+
+ if (*fixup_map_in_map) {
+ map_fds[6] = create_map_in_map();
+ do {
+ prog[*fixup_map_in_map].imm = map_fds[6];
+ fixup_map_in_map++;
+ } while (*fixup_map_in_map);
+ }
+
+ if (*fixup_cgroup_storage) {
+ map_fds[7] = create_cgroup_storage();
+ do {
+ prog[*fixup_cgroup_storage].imm = map_fds[7];
+ fixup_cgroup_storage++;
+ } while (*fixup_cgroup_storage);
+ }
+}
+
+static int set_admin(bool admin)
+{
+ cap_t caps;
+ const cap_value_t cap_val = CAP_SYS_ADMIN;
+ int ret = -1;
+
+ caps = cap_get_proc();
+ if (!caps) {
+ perror("cap_get_proc");
+ return -1;
+ }
+ if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+ admin ? CAP_SET : CAP_CLEAR)) {
+ perror("cap_set_flag");
+ goto out;
+ }
+ if (cap_set_proc(caps)) {
+ perror("cap_set_proc");
+ goto out;
+ }
+ ret = 0;
+out:
+ if (cap_free(caps))
+ perror("cap_free");
+ return ret;
+}
+
+static void do_test_single(struct bpf_test *test, bool unpriv,
+ int *passes, int *errors)
+{
+ int fd_prog, expected_ret, alignment_prevented_execution;
+ int prog_len, prog_type = test->prog_type;
+ struct bpf_insn *prog = test->insns;
+ int map_fds[MAX_NR_MAPS];
+ const char *expected_err;
+ uint32_t expected_val;
+ uint32_t retval;
+ __u32 pflags;
+ int i, err;
+
+ for (i = 0; i < MAX_NR_MAPS; i++)
+ map_fds[i] = -1;
+
+ if (!prog_type)
+ prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ do_test_fixup(test, prog_type, prog, map_fds);
+ prog_len = probe_filter_length(prog);
+
+ pflags = 0;
+ if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
+ pflags |= BPF_F_STRICT_ALIGNMENT;
+ if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
+ pflags |= BPF_F_ANY_ALIGNMENT;
+ fd_prog = bpf_verify_program(prog_type, prog, prog_len, pflags,
+ "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 1);
+
+ expected_ret = unpriv && test->result_unpriv != UNDEF ?
+ test->result_unpriv : test->result;
+ expected_err = unpriv && test->errstr_unpriv ?
+ test->errstr_unpriv : test->errstr;
+ expected_val = unpriv && test->retval_unpriv ?
+ test->retval_unpriv : test->retval;
+
+ alignment_prevented_execution = 0;
+
+ if (expected_ret == ACCEPT) {
+ if (fd_prog < 0) {
+ printf("FAIL\nFailed to load prog '%s'!\n",
+ strerror(errno));
+ goto fail_log;
+ }
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ if (fd_prog >= 0 &&
+ (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)) {
+ alignment_prevented_execution = 1;
+ goto test_ok;
+ }
+#endif
+ } else {
+ if (fd_prog >= 0) {
+ printf("FAIL\nUnexpected success to load!\n");
+ goto fail_log;
+ }
+ if (!strstr(bpf_vlog, expected_err)) {
+ printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
+ expected_err, bpf_vlog);
+ goto fail_log;
+ }
+ }
+
+ if (fd_prog >= 0) {
+ __u8 tmp[TEST_DATA_LEN << 2];
+ __u32 size_tmp = sizeof(tmp);
+
+ if (unpriv)
+ set_admin(true);
+ err = bpf_prog_test_run(fd_prog, 1, test->data,
+ sizeof(test->data), tmp, &size_tmp,
+ &retval, NULL);
+ if (unpriv)
+ set_admin(false);
+ if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
+ printf("Unexpected bpf_prog_test_run error\n");
+ goto fail_log;
+ }
+ if (!err && retval != expected_val &&
+ expected_val != POINTER_VALUE) {
+ printf("FAIL retval %d != %d\n", retval, expected_val);
+ goto fail_log;
+ }
+ }
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+test_ok:
+#endif
+ (*passes)++;
+ printf("OK%s\n", alignment_prevented_execution ?
+ " (NOTE: not executed due to unknown alignment)" : "");
+close_fds:
+ close(fd_prog);
+ for (i = 0; i < MAX_NR_MAPS; i++)
+ close(map_fds[i]);
+ sched_yield();
+ return;
+fail_log:
+ (*errors)++;
+ printf("%s", bpf_vlog);
+ goto close_fds;
+}
+
+static bool is_admin(void)
+{
+ cap_t caps;
+ cap_flag_value_t sysadmin = CAP_CLEAR;
+ const cap_value_t cap_val = CAP_SYS_ADMIN;
+
+#ifdef CAP_IS_SUPPORTED
+ if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
+ perror("cap_get_flag");
+ return false;
+ }
+#endif
+ caps = cap_get_proc();
+ if (!caps) {
+ perror("cap_get_proc");
+ return false;
+ }
+ if (cap_get_flag(caps, cap_val, CAP_EFFECTIVE, &sysadmin))
+ perror("cap_get_flag");
+ if (cap_free(caps))
+ perror("cap_free");
+ return (sysadmin == CAP_SET);
+}
+
+static void get_unpriv_disabled()
+{
+ char buf[2];
+ FILE *fd;
+
+ fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
+ if (!fd) {
+ perror("fopen /proc/sys/"UNPRIV_SYSCTL);
+ unpriv_disabled = true;
+ return;
+ }
+ if (fgets(buf, 2, fd) == buf && atoi(buf))
+ unpriv_disabled = true;
+ fclose(fd);
+}
+
+static bool test_as_unpriv(struct bpf_test *test)
+{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ /* Some architectures have strict alignment requirements. In
+ * that case, the BPF verifier detects if a program has
+ * unaligned accesses and rejects them. A user can pass
+ * BPF_F_ANY_ALIGNMENT to a program to override this
+ * check. That, however, will only work when a privileged user
+ * loads a program. An unprivileged user loading a program
+ * with this flag will be rejected prior entering the
+ * verifier.
+ */
+ if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
+ return false;
+#endif
+ return !test->prog_type ||
+ test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER ||
+ test->prog_type == BPF_PROG_TYPE_CGROUP_SKB;
+}
+
+static int do_test(bool unpriv, unsigned int from, unsigned int to)
+{
+ int i, passes = 0, errors = 0, skips = 0;
+
+ for (i = from; i < to; i++) {
+ struct bpf_test *test = &tests[i];
+
+ /* Program types that are not supported by non-root we
+ * skip right away.
+ */
+ if (test_as_unpriv(test) && unpriv_disabled) {
+ printf("#%d/u %s SKIP\n", i, test->descr);
+ skips++;
+ } else if (test_as_unpriv(test)) {
+ if (!unpriv)
+ set_admin(false);
+ printf("#%d/u %s ", i, test->descr);
+ do_test_single(test, true, &passes, &errors);
+ if (!unpriv)
+ set_admin(true);
+ }
+
+ if (unpriv) {
+ printf("#%d/p %s SKIP\n", i, test->descr);
+ skips++;
+ } else {
+ printf("#%d/p %s ", i, test->descr);
+ do_test_single(test, false, &passes, &errors);
+ }
+ }
+
+ printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
+ skips, errors);
+ return errors ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int from = 0, to = ARRAY_SIZE(tests);
+ bool unpriv = !is_admin();
+
+ if (argc == 3) {
+ unsigned int l = atoi(argv[argc - 2]);
+ unsigned int u = atoi(argv[argc - 1]);
+
+ if (l < to && u < to) {
+ from = l;
+ to = u + 1;
+ }
+ } else if (argc == 2) {
+ unsigned int t = atoi(argv[argc - 1]);
+
+ if (t < to) {
+ from = t;
+ to = t + 1;
+ }
+ }
+
+ get_unpriv_disabled();
+ if (unpriv && unpriv_disabled) {
+ printf("Cannot run as unprivileged user with sysctl %s.\n",
+ UNPRIV_SYSCTL);
+ return EXIT_FAILURE;
+ }
+
+ bpf_semi_rand_init();
+ return do_test(unpriv, from, to);
+}
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
new file mode 100644
index 000000000..8d6918c3b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_verifier_log.c
@@ -0,0 +1,174 @@
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+
+#include <bpf/bpf.h>
+
+#include "bpf_rlimit.h"
+
+#define LOG_SIZE (1 << 20)
+
+#define err(str...) printf("ERROR: " str)
+
+static const struct bpf_insn code_sample[] = {
+ /* We need a few instructions to pass the min log length */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_EXIT_INSN(),
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static int load(char *log, size_t log_len, int log_level)
+{
+ union bpf_attr attr;
+
+ bzero(&attr, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insn_cnt = (__u32)(sizeof(code_sample) / sizeof(struct bpf_insn));
+ attr.insns = ptr_to_u64(code_sample);
+ attr.license = ptr_to_u64("GPL");
+ attr.log_buf = ptr_to_u64(log);
+ attr.log_size = log_len;
+ attr.log_level = log_level;
+
+ return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+static void check_ret(int ret, int exp_errno)
+{
+ if (ret > 0) {
+ close(ret);
+ err("broken sample loaded successfully!?\n");
+ exit(1);
+ }
+
+ if (!ret || errno != exp_errno) {
+ err("Program load returned: ret:%d/errno:%d, expected ret:%d/errno:%d\n",
+ ret, errno, -1, exp_errno);
+ exit(1);
+ }
+}
+
+static void check_ones(const char *buf, size_t len, const char *msg)
+{
+ while (len--)
+ if (buf[len] != 1) {
+ err("%s", msg);
+ exit(1);
+ }
+}
+
+static void test_log_good(char *log, size_t buf_len, size_t log_len,
+ size_t exp_len, int exp_errno, const char *full_log)
+{
+ size_t len;
+ int ret;
+
+ memset(log, 1, buf_len);
+
+ ret = load(log, log_len, 1);
+ check_ret(ret, exp_errno);
+
+ len = strnlen(log, buf_len);
+ if (len == buf_len) {
+ err("verifier did not NULL terminate the log\n");
+ exit(1);
+ }
+ if (exp_len && len != exp_len) {
+ err("incorrect log length expected:%zd have:%zd\n",
+ exp_len, len);
+ exit(1);
+ }
+
+ if (strchr(log, 1)) {
+ err("verifier leaked a byte through\n");
+ exit(1);
+ }
+
+ check_ones(log + len + 1, buf_len - len - 1,
+ "verifier wrote bytes past NULL termination\n");
+
+ if (memcmp(full_log, log, LOG_SIZE)) {
+ err("log did not match expected output\n");
+ exit(1);
+ }
+}
+
+static void test_log_bad(char *log, size_t log_len, int log_level)
+{
+ int ret;
+
+ ret = load(log, log_len, log_level);
+ check_ret(ret, EINVAL);
+ if (log)
+ check_ones(log, LOG_SIZE,
+ "verifier touched log with bad parameters\n");
+}
+
+int main(int argc, char **argv)
+{
+ char full_log[LOG_SIZE];
+ char log[LOG_SIZE];
+ size_t want_len;
+ int i;
+
+ memset(log, 1, LOG_SIZE);
+
+ /* Test incorrect attr */
+ printf("Test log_level 0...\n");
+ test_log_bad(log, LOG_SIZE, 0);
+
+ printf("Test log_size < 128...\n");
+ test_log_bad(log, 15, 1);
+
+ printf("Test log_buff = NULL...\n");
+ test_log_bad(NULL, LOG_SIZE, 1);
+
+ /* Test with log big enough */
+ printf("Test oversized buffer...\n");
+ test_log_good(full_log, LOG_SIZE, LOG_SIZE, 0, EACCES, full_log);
+
+ want_len = strlen(full_log);
+
+ printf("Test exact buffer...\n");
+ test_log_good(log, LOG_SIZE, want_len + 2, want_len, EACCES, full_log);
+
+ printf("Test undersized buffers...\n");
+ for (i = 0; i < 64; i++) {
+ full_log[want_len - i + 1] = 1;
+ full_log[want_len - i] = 0;
+
+ test_log_good(log, LOG_SIZE, want_len + 1 - i, want_len - i,
+ ENOSPC, full_log);
+ }
+
+ printf("test_verifier_log: OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_xdp.c b/tools/testing/selftests/bpf/test_xdp.c
new file mode 100644
index 000000000..5e7df8bb5
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp.c
@@ -0,0 +1,235 @@
+/* Copyright (c) 2016,2017 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include "bpf_helpers.h"
+#include "bpf_endian.h"
+#include "test_iptunnel_common.h"
+
+int _version SEC("version") = 1;
+
+struct bpf_map_def SEC("maps") rxcnt = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u64),
+ .max_entries = 256,
+};
+
+struct bpf_map_def SEC("maps") vip2tnl = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip),
+ .value_size = sizeof(struct iptnl_info),
+ .max_entries = MAX_IPTNL_ENTRIES,
+};
+
+static __always_inline void count_tx(__u32 protocol)
+{
+ __u64 *rxcnt_count;
+
+ rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+ if (rxcnt_count)
+ *rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, void *data_end,
+ __u8 protocol)
+{
+ struct tcphdr *th;
+ struct udphdr *uh;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)trans_data;
+ if (th + 1 > data_end)
+ return -1;
+ return th->dest;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)trans_data;
+ if (uh + 1 > data_end)
+ return -1;
+ return uh->dest;
+ default:
+ return 0;
+ }
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+ const struct ethhdr *old_eth,
+ const struct iptnl_info *tnl,
+ __be16 h_proto)
+{
+ memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+ memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+ new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct iphdr *iph = data + sizeof(struct ethhdr);
+ __u16 *next_iph;
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+ __u32 csum = 0;
+ int i;
+
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+
+ dport = get_dport(iph + 1, data_end, iph->protocol);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = iph->protocol;
+ vip.family = AF_INET;
+ vip.daddr.v4 = iph->daddr;
+ vip.dport = dport;
+ payload_len = bpf_ntohs(iph->tot_len);
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v4-in-v4 */
+ if (!tnl || tnl->family != AF_INET)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+
+ new_eth = data;
+ iph = data + sizeof(*new_eth);
+ old_eth = data + sizeof(*iph);
+
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end ||
+ iph + 1 > data_end)
+ return XDP_DROP;
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
+
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) >> 2;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 0;
+ iph->tot_len = bpf_htons(payload_len + sizeof(*iph));
+ iph->daddr = tnl->daddr.v4;
+ iph->saddr = tnl->saddr.v4;
+ iph->ttl = 8;
+
+ next_iph = (__u16 *)iph;
+#pragma clang loop unroll(full)
+ for (i = 0; i < sizeof(*iph) >> 1; i++)
+ csum += *next_iph++;
+
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = ip6h->nexthdr;
+ vip.family = AF_INET6;
+ memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+ vip.dport = dport;
+ payload_len = ip6h->payload_len;
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v6-in-v6 */
+ if (!tnl || tnl->family != AF_INET6)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+
+ new_eth = data;
+ ip6h = data + sizeof(*new_eth);
+ old_eth = data + sizeof(*ip6h);
+
+ if (new_eth + 1 > data_end || old_eth + 1 > data_end ||
+ ip6h + 1 > data_end)
+ return XDP_DROP;
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
+
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+ ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + sizeof(*ip6h));
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip6h->hop_limit = 8;
+ memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+ memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+SEC("xdp_tx_iptunnel")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ __u16 h_proto;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == bpf_htons(ETH_P_IP))
+ return handle_ipv4(xdp);
+ else if (h_proto == bpf_htons(ETH_P_IPV6))
+
+ return handle_ipv6(xdp);
+ else
+ return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.c b/tools/testing/selftests/bpf/test_xdp_meta.c
new file mode 100644
index 000000000..8d0182650
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_meta.c
@@ -0,0 +1,53 @@
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+
+#include "bpf_helpers.h"
+
+#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
+#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
+#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
+
+SEC("t")
+int ing_cls(struct __sk_buff *ctx)
+{
+ __u8 *data, *data_meta, *data_end;
+ __u32 diff = 0;
+
+ data_meta = ctx_ptr(ctx, data_meta);
+ data_end = ctx_ptr(ctx, data_end);
+ data = ctx_ptr(ctx, data);
+
+ if (data + ETH_ALEN > data_end ||
+ data_meta + round_up(ETH_ALEN, 4) > data)
+ return TC_ACT_SHOT;
+
+ diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0];
+ diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2];
+
+ return diff ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+SEC("x")
+int ing_xdp(struct xdp_md *ctx)
+{
+ __u8 *data, *data_meta, *data_end;
+ int ret;
+
+ ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4));
+ if (ret < 0)
+ return XDP_DROP;
+
+ data_meta = ctx_ptr(ctx, data_meta);
+ data_end = ctx_ptr(ctx, data_end);
+ data = ctx_ptr(ctx, data);
+
+ if (data + ETH_ALEN > data_end ||
+ data_meta + round_up(ETH_ALEN, 4) > data)
+ return XDP_DROP;
+
+ __builtin_memcpy(data_meta, data, ETH_ALEN);
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
new file mode 100755
index 000000000..637fcf4fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_meta.sh
@@ -0,0 +1,52 @@
+#!/bin/sh
+
+cleanup()
+{
+ if [ "$?" = "0" ]; then
+ echo "selftests: test_xdp_meta [PASS]";
+ else
+ echo "selftests: test_xdp_meta [FAILED]";
+ fi
+
+ set +e
+ ip link del veth1 2> /dev/null
+ ip netns del ns1 2> /dev/null
+ ip netns del ns2 2> /dev/null
+}
+
+ip link set dev lo xdp off 2>/dev/null > /dev/null
+if [ $? -ne 0 ];then
+ echo "selftests: [SKIP] Could not run test without the ip xdp support"
+ exit 0
+fi
+set -e
+
+ip netns add ns1
+ip netns add ns2
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 type veth peer name veth2
+
+ip link set veth1 netns ns1
+ip link set veth2 netns ns2
+
+ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth1
+ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth2
+
+ip netns exec ns1 tc qdisc add dev veth1 clsact
+ip netns exec ns2 tc qdisc add dev veth2 clsact
+
+ip netns exec ns1 tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t
+ip netns exec ns2 tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t
+
+ip netns exec ns1 ip link set dev veth1 xdp obj test_xdp_meta.o sec x
+ip netns exec ns2 ip link set dev veth2 xdp obj test_xdp_meta.o sec x
+
+ip netns exec ns1 ip link set dev veth1 up
+ip netns exec ns2 ip link set dev veth2 up
+
+ip netns exec ns1 ping -c 1 10.1.1.22
+ip netns exec ns2 ping -c 1 10.1.1.11
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_noinline.c b/tools/testing/selftests/bpf/test_xdp_noinline.c
new file mode 100644
index 000000000..5e4aac74f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_noinline.c
@@ -0,0 +1,833 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include "bpf_helpers.h"
+
+#define bpf_printk(fmt, ...) \
+({ \
+ char ____fmt[] = fmt; \
+ bpf_trace_printk(____fmt, sizeof(____fmt), \
+ ##__VA_ARGS__); \
+})
+
+static __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static __attribute__ ((noinline))
+u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static __attribute__ ((noinline))
+u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static __attribute__ ((noinline))
+u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+struct flow_key {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+};
+
+struct packet_description {
+ struct flow_key flow;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_definition {
+ union {
+ __be32 vip;
+ __be32 vipv6[4];
+ };
+ __u16 port;
+ __u16 family;
+ __u8 proto;
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_pos_lru {
+ __u32 pos;
+ __u64 atime;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct lb_stats {
+ __u64 v2;
+ __u64 v1;
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) vip_map = {
+ .type = BPF_MAP_TYPE_HASH,
+ .key_size = sizeof(struct vip_definition),
+ .value_size = sizeof(struct vip_meta),
+ .max_entries = 512,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) lru_cache = {
+ .type = BPF_MAP_TYPE_LRU_HASH,
+ .key_size = sizeof(struct flow_key),
+ .value_size = sizeof(struct real_pos_lru),
+ .max_entries = 300,
+ .map_flags = 1U << 1,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ch_rings = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(__u32),
+ .max_entries = 12 * 655,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) reals = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct real_definition),
+ .max_entries = 40,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) stats = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct lb_stats),
+ .max_entries = 515,
+ .map_flags = 0,
+};
+
+struct bpf_map_def __attribute__ ((section("maps"), used)) ctl_array = {
+ .type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(__u32),
+ .value_size = sizeof(struct ctl_value),
+ .max_entries = 16,
+ .map_flags = 0,
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[6];
+ unsigned char eth_source[6];
+ unsigned short eth_proto;
+};
+
+static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
+{
+ __u64 off = sizeof(struct eth_hdr);
+ if (is_ipv6) {
+ off += sizeof(struct ipv6hdr);
+ if (is_icmp)
+ off += sizeof(struct icmp6hdr) + sizeof(struct ipv6hdr);
+ } else {
+ off += sizeof(struct iphdr);
+ if (is_icmp)
+ off += sizeof(struct icmphdr) + sizeof(struct iphdr);
+ }
+ return off;
+}
+
+static __attribute__ ((noinline))
+bool parse_udp(void *data, void *data_end,
+ bool is_ipv6, struct packet_description *pckt)
+{
+
+ bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+ __u64 off = calc_offset(is_ipv6, is_icmp);
+ struct udphdr *udp;
+ udp = data + off;
+
+ if (udp + 1 > data_end)
+ return 0;
+ if (!is_icmp) {
+ pckt->flow.port16[0] = udp->source;
+ pckt->flow.port16[1] = udp->dest;
+ } else {
+ pckt->flow.port16[0] = udp->dest;
+ pckt->flow.port16[1] = udp->source;
+ }
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool parse_tcp(void *data, void *data_end,
+ bool is_ipv6, struct packet_description *pckt)
+{
+
+ bool is_icmp = !((pckt->flags & (1 << 0)) == 0);
+ __u64 off = calc_offset(is_ipv6, is_icmp);
+ struct tcphdr *tcp;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return 0;
+ if (tcp->syn)
+ pckt->flags |= (1 << 1);
+ if (!is_icmp) {
+ pckt->flow.port16[0] = tcp->source;
+ pckt->flow.port16[1] = tcp->dest;
+ } else {
+ pckt->flow.port16[0] = tcp->dest;
+ pckt->flow.port16[1] = tcp->source;
+ }
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
+ struct packet_description *pckt,
+ struct real_definition *dst, __u32 pkt_bytes)
+{
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+ struct ipv6hdr *ip6h;
+ __u32 ip_suffix;
+ void *data_end;
+ void *data;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+ return 0;
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+ new_eth = data;
+ ip6h = data + sizeof(struct eth_hdr);
+ old_eth = data + sizeof(struct ipv6hdr);
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end || ip6h + 1 > data_end)
+ return 0;
+ memcpy(new_eth->eth_dest, cval->mac, 6);
+ memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+ new_eth->eth_proto = 56710;
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip_suffix = pckt->flow.srcv6[3] ^ pckt->flow.port16[0];
+ ip6h->payload_len =
+ __builtin_bswap16(pkt_bytes + sizeof(struct ipv6hdr));
+ ip6h->hop_limit = 4;
+
+ ip6h->saddr.in6_u.u6_addr32[0] = 1;
+ ip6h->saddr.in6_u.u6_addr32[1] = 2;
+ ip6h->saddr.in6_u.u6_addr32[2] = 3;
+ ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
+ memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
+ struct packet_description *pckt,
+ struct real_definition *dst, __u32 pkt_bytes)
+{
+
+ __u32 ip_suffix = __builtin_bswap16(pckt->flow.port16[0]);
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+ __u16 *next_iph_u16;
+ struct iphdr *iph;
+ __u32 csum = 0;
+ void *data_end;
+ void *data;
+
+ ip_suffix <<= 15;
+ ip_suffix ^= pckt->flow.src;
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+ return 0;
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+ new_eth = data;
+ iph = data + sizeof(struct eth_hdr);
+ old_eth = data + sizeof(struct iphdr);
+ if (new_eth + 1 > data_end ||
+ old_eth + 1 > data_end || iph + 1 > data_end)
+ return 0;
+ memcpy(new_eth->eth_dest, cval->mac, 6);
+ memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
+ new_eth->eth_proto = 8;
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 1;
+ iph->tot_len = __builtin_bswap16(pkt_bytes + sizeof(struct iphdr));
+ /* don't update iph->daddr, since it will overwrite old eth_proto
+ * and multiple iterations of bpf_prog_run() will fail
+ */
+
+ iph->saddr = ((0xFFFF0000 & ip_suffix) | 4268) ^ dst->dst;
+ iph->ttl = 4;
+
+ next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+ for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+ csum += *next_iph_u16++;
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+ if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+ return 0;
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
+{
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+
+ old_eth = *data;
+ new_eth = *data + sizeof(struct ipv6hdr);
+ memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+ memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+ if (inner_v4)
+ new_eth->eth_proto = 8;
+ else
+ new_eth->eth_proto = 56710;
+ if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
+ return 0;
+ *data = (void *)(long)xdp->data;
+ *data_end = (void *)(long)xdp->data_end;
+ return 1;
+}
+
+static __attribute__ ((noinline))
+bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
+{
+ struct eth_hdr *new_eth;
+ struct eth_hdr *old_eth;
+
+ old_eth = *data;
+ new_eth = *data + sizeof(struct iphdr);
+ memcpy(new_eth->eth_source, old_eth->eth_source, 6);
+ memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
+ new_eth->eth_proto = 8;
+ if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
+ return 0;
+ *data = (void *)(long)xdp->data;
+ *data_end = (void *)(long)xdp->data_end;
+ return 1;
+}
+
+static __attribute__ ((noinline))
+int swap_mac_and_send(void *data, void *data_end)
+{
+ unsigned char tmp_mac[6];
+ struct eth_hdr *eth;
+
+ eth = data;
+ memcpy(tmp_mac, eth->eth_source, 6);
+ memcpy(eth->eth_source, eth->eth_dest, 6);
+ memcpy(eth->eth_dest, tmp_mac, 6);
+ return XDP_TX;
+}
+
+static __attribute__ ((noinline))
+int send_icmp_reply(void *data, void *data_end)
+{
+ struct icmphdr *icmp_hdr;
+ __u16 *next_iph_u16;
+ __u32 tmp_addr = 0;
+ struct iphdr *iph;
+ __u32 csum1 = 0;
+ __u32 csum = 0;
+ __u64 off = 0;
+
+ if (data + sizeof(struct eth_hdr)
+ + sizeof(struct iphdr) + sizeof(struct icmphdr) > data_end)
+ return XDP_DROP;
+ off += sizeof(struct eth_hdr);
+ iph = data + off;
+ off += sizeof(struct iphdr);
+ icmp_hdr = data + off;
+ icmp_hdr->type = 0;
+ icmp_hdr->checksum += 0x0007;
+ iph->ttl = 4;
+ tmp_addr = iph->daddr;
+ iph->daddr = iph->saddr;
+ iph->saddr = tmp_addr;
+ iph->check = 0;
+ next_iph_u16 = (__u16 *) iph;
+#pragma clang loop unroll(full)
+ for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
+ csum += *next_iph_u16++;
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+ return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int send_icmp6_reply(void *data, void *data_end)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+ __be32 tmp_addr[4];
+ __u64 off = 0;
+
+ if (data + sizeof(struct eth_hdr)
+ + sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) > data_end)
+ return XDP_DROP;
+ off += sizeof(struct eth_hdr);
+ ip6h = data + off;
+ off += sizeof(struct ipv6hdr);
+ icmp_hdr = data + off;
+ icmp_hdr->icmp6_type = 129;
+ icmp_hdr->icmp6_cksum -= 0x0001;
+ ip6h->hop_limit = 4;
+ memcpy(tmp_addr, ip6h->saddr.in6_u.u6_addr32, 16);
+ memcpy(ip6h->saddr.in6_u.u6_addr32, ip6h->daddr.in6_u.u6_addr32, 16);
+ memcpy(ip6h->daddr.in6_u.u6_addr32, tmp_addr, 16);
+ return swap_mac_and_send(data, data_end);
+}
+
+static __attribute__ ((noinline))
+int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return XDP_DROP;
+ if (icmp_hdr->icmp6_type == 128)
+ return send_icmp6_reply(data, data_end);
+ if (icmp_hdr->icmp6_type != 3)
+ return XDP_PASS;
+ off += sizeof(struct icmp6hdr);
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+ pckt->flow.proto = ip6h->nexthdr;
+ pckt->flags |= (1 << 0);
+ memcpy(pckt->flow.srcv6, ip6h->daddr.in6_u.u6_addr32, 16);
+ memcpy(pckt->flow.dstv6, ip6h->saddr.in6_u.u6_addr32, 16);
+ return -1;
+}
+
+static __attribute__ ((noinline))
+int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
+{
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = data + off;
+ if (icmp_hdr + 1 > data_end)
+ return XDP_DROP;
+ if (icmp_hdr->type == 8)
+ return send_icmp_reply(data, data_end);
+ if ((icmp_hdr->type != 3) || (icmp_hdr->code != 4))
+ return XDP_PASS;
+ off += sizeof(struct icmphdr);
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+ if (iph->ihl != 5)
+ return XDP_DROP;
+ pckt->flow.proto = iph->protocol;
+ pckt->flags |= (1 << 0);
+ pckt->flow.src = iph->daddr;
+ pckt->flow.dst = iph->saddr;
+ return -1;
+}
+
+static __attribute__ ((noinline))
+__u32 get_packet_hash(struct packet_description *pckt,
+ bool hash_16bytes)
+{
+ if (hash_16bytes)
+ return jhash_2words(jhash(pckt->flow.srcv6, 16, 12),
+ pckt->flow.ports, 24);
+ else
+ return jhash_2words(pckt->flow.src, pckt->flow.ports,
+ 24);
+}
+
+__attribute__ ((noinline))
+static bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6, void *lru_map)
+{
+ struct real_pos_lru new_dst_lru = { };
+ bool hash_16bytes = is_ipv6;
+ __u32 *real_pos, hash, key;
+ __u64 cur_time;
+
+ if (vip_info->flags & (1 << 2))
+ hash_16bytes = 1;
+ if (vip_info->flags & (1 << 3)) {
+ pckt->flow.port16[0] = pckt->flow.port16[1];
+ memset(pckt->flow.srcv6, 0, 16);
+ }
+ hash = get_packet_hash(pckt, hash_16bytes);
+ if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
+ hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+ return 0;
+ key = 2 * vip_info->vip_num + hash % 2;
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return 0;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return 0;
+ if (!(vip_info->flags & (1 << 1))) {
+ __u32 conn_rate_key = 512 + 2;
+ struct lb_stats *conn_rate_stats =
+ bpf_map_lookup_elem(&stats, &conn_rate_key);
+
+ if (!conn_rate_stats)
+ return 1;
+ cur_time = bpf_ktime_get_ns();
+ if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
+ conn_rate_stats->v1 = 1;
+ conn_rate_stats->v2 = cur_time;
+ } else {
+ conn_rate_stats->v1 += 1;
+ if (conn_rate_stats->v1 >= 1)
+ return 1;
+ }
+ if (pckt->flow.proto == IPPROTO_UDP)
+ new_dst_lru.atime = cur_time;
+ new_dst_lru.pos = key;
+ bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
+ }
+ return 1;
+}
+
+__attribute__ ((noinline))
+static void connection_table_lookup(struct real_definition **real,
+ struct packet_description *pckt,
+ void *lru_map)
+{
+
+ struct real_pos_lru *dst_lru;
+ __u64 cur_time;
+ __u32 key;
+
+ dst_lru = bpf_map_lookup_elem(lru_map, &pckt->flow);
+ if (!dst_lru)
+ return;
+ if (pckt->flow.proto == IPPROTO_UDP) {
+ cur_time = bpf_ktime_get_ns();
+ if (cur_time - dst_lru->atime > 300000)
+ return;
+ dst_lru->atime = cur_time;
+ }
+ key = dst_lru->pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+}
+
+/* don't believe your eyes!
+ * below function has 6 arguments whereas bpf and llvm allow maximum of 5
+ * but since it's _static_ llvm can optimize one argument away
+ */
+__attribute__ ((noinline))
+static int process_l3_headers_v6(struct packet_description *pckt,
+ __u8 *protocol, __u64 off,
+ __u16 *pkt_bytes, void *data,
+ void *data_end)
+{
+ struct ipv6hdr *ip6h;
+ __u64 iph_len;
+ int action;
+
+ ip6h = data + off;
+ if (ip6h + 1 > data_end)
+ return XDP_DROP;
+ iph_len = sizeof(struct ipv6hdr);
+ *protocol = ip6h->nexthdr;
+ pckt->flow.proto = *protocol;
+ *pkt_bytes = __builtin_bswap16(ip6h->payload_len);
+ off += iph_len;
+ if (*protocol == 45) {
+ return XDP_DROP;
+ } else if (*protocol == 59) {
+ action = parse_icmpv6(data, data_end, off, pckt);
+ if (action >= 0)
+ return action;
+ } else {
+ memcpy(pckt->flow.srcv6, ip6h->saddr.in6_u.u6_addr32, 16);
+ memcpy(pckt->flow.dstv6, ip6h->daddr.in6_u.u6_addr32, 16);
+ }
+ return -1;
+}
+
+__attribute__ ((noinline))
+static int process_l3_headers_v4(struct packet_description *pckt,
+ __u8 *protocol, __u64 off,
+ __u16 *pkt_bytes, void *data,
+ void *data_end)
+{
+ struct iphdr *iph;
+ __u64 iph_len;
+ int action;
+
+ iph = data + off;
+ if (iph + 1 > data_end)
+ return XDP_DROP;
+ if (iph->ihl != 5)
+ return XDP_DROP;
+ *protocol = iph->protocol;
+ pckt->flow.proto = *protocol;
+ *pkt_bytes = __builtin_bswap16(iph->tot_len);
+ off += 20;
+ if (iph->frag_off & 65343)
+ return XDP_DROP;
+ if (*protocol == IPPROTO_ICMP) {
+ action = parse_icmp(data, data_end, off, pckt);
+ if (action >= 0)
+ return action;
+ } else {
+ pckt->flow.src = iph->saddr;
+ pckt->flow.dst = iph->daddr;
+ }
+ return -1;
+}
+
+__attribute__ ((noinline))
+static int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct xdp_md *xdp)
+{
+
+ struct real_definition *dst = NULL;
+ struct packet_description pckt = { };
+ struct vip_definition vip = { };
+ struct lb_stats *data_stats;
+ struct eth_hdr *eth = data;
+ void *lru_map = &lru_cache;
+ struct vip_meta *vip_info;
+ __u32 lru_stats_key = 513;
+ __u32 mac_addr_pos = 0;
+ __u32 stats_key = 512;
+ struct ctl_value *cval;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ if (is_ipv6)
+ action = process_l3_headers_v6(&pckt, &protocol, off,
+ &pkt_bytes, data, data_end);
+ else
+ action = process_l3_headers_v4(&pckt, &protocol, off,
+ &pkt_bytes, data, data_end);
+ if (action >= 0)
+ return action;
+ protocol = pckt.flow.proto;
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(data, data_end, is_ipv6, &pckt))
+ return XDP_DROP;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(data, data_end, is_ipv6, &pckt))
+ return XDP_DROP;
+ } else {
+ return XDP_TX;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.vipv6, pckt.flow.dstv6, 16);
+ else
+ vip.vip = pckt.flow.dst;
+ vip.port = pckt.flow.port16[1];
+ vip.proto = pckt.flow.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.port = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return XDP_PASS;
+ if (!(vip_info->flags & (1 << 4)))
+ pckt.flow.port16[1] = 0;
+ }
+ if (data_end - data > 1400)
+ return XDP_DROP;
+ data_stats = bpf_map_lookup_elem(&stats, &stats_key);
+ if (!data_stats)
+ return XDP_DROP;
+ data_stats->v1 += 1;
+ if (!dst) {
+ if (vip_info->flags & (1 << 0))
+ pckt.flow.port16[0] = 0;
+ if (!(pckt.flags & (1 << 1)) && !(vip_info->flags & (1 << 1)))
+ connection_table_lookup(&dst, &pckt, lru_map);
+ if (dst)
+ goto out;
+ if (pckt.flow.proto == IPPROTO_TCP) {
+ struct lb_stats *lru_stats =
+ bpf_map_lookup_elem(&stats, &lru_stats_key);
+
+ if (!lru_stats)
+ return XDP_DROP;
+ if (pckt.flags & (1 << 1))
+ lru_stats->v1 += 1;
+ else
+ lru_stats->v2 += 1;
+ }
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6, lru_map))
+ return XDP_DROP;
+ data_stats->v2 += 1;
+ }
+out:
+ cval = bpf_map_lookup_elem(&ctl_array, &mac_addr_pos);
+ if (!cval)
+ return XDP_DROP;
+ if (dst->flags & (1 << 0)) {
+ if (!encap_v6(xdp, cval, &pckt, dst, pkt_bytes))
+ return XDP_DROP;
+ } else {
+ if (!encap_v4(xdp, cval, &pckt, dst, pkt_bytes))
+ return XDP_DROP;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return XDP_DROP;
+ data_stats->v1 += 1;
+ data_stats->v2 += pkt_bytes;
+
+ data = (void *)(long)xdp->data;
+ data_end = (void *)(long)xdp->data_end;
+ if (data + 4 > data_end)
+ return XDP_DROP;
+ *(u32 *)data = dst->dst;
+ return XDP_DROP;
+}
+
+__attribute__ ((section("xdp-test"), used))
+int balancer_ingress(struct xdp_md *ctx)
+{
+ void *data = (void *)(long)ctx->data;
+ void *data_end = (void *)(long)ctx->data_end;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == 8)
+ return process_packet(data, nh_off, data_end, 0, ctx);
+ else if (eth_proto == 56710)
+ return process_packet(data, nh_off, data_end, 1, ctx);
+ else
+ return XDP_DROP;
+}
+
+char _license[] __attribute__ ((section("license"), used)) = "GPL";
+int _version __attribute__ ((section("version"), used)) = 1;
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.c b/tools/testing/selftests/bpf/test_xdp_redirect.c
new file mode 100644
index 000000000..ef9e704be
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.c
@@ -0,0 +1,28 @@
+/* Copyright (c) 2017 VMware
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include "bpf_helpers.h"
+
+int _version SEC("version") = 1;
+
+SEC("redirect_to_111")
+int xdp_redirect_to_111(struct xdp_md *xdp)
+{
+ return bpf_redirect(111, 0);
+}
+SEC("redirect_to_222")
+int xdp_redirect_to_222(struct xdp_md *xdp)
+{
+ return bpf_redirect(222, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
new file mode 100755
index 000000000..c4b17e08d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -0,0 +1,61 @@
+#!/bin/sh
+# Create 2 namespaces with two veth peers, and
+# forward packets in-between using generic XDP
+#
+# NS1(veth11) NS2(veth22)
+# | |
+# | |
+# (veth1, ------ (veth2,
+# id:111) id:222)
+# | xdp forwarding |
+# ------------------
+
+cleanup()
+{
+ if [ "$?" = "0" ]; then
+ echo "selftests: test_xdp_redirect [PASS]";
+ else
+ echo "selftests: test_xdp_redirect [FAILED]";
+ fi
+
+ set +e
+ ip link del veth1 2> /dev/null
+ ip link del veth2 2> /dev/null
+ ip netns del ns1 2> /dev/null
+ ip netns del ns2 2> /dev/null
+}
+
+ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
+if [ $? -ne 0 ];then
+ echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
+ exit 0
+fi
+set -e
+
+ip netns add ns1
+ip netns add ns2
+
+trap cleanup 0 2 3 6 9
+
+ip link add veth1 index 111 type veth peer name veth11
+ip link add veth2 index 222 type veth peer name veth22
+
+ip link set veth11 netns ns1
+ip link set veth22 netns ns2
+
+ip link set veth1 up
+ip link set veth2 up
+
+ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11
+ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22
+
+ip netns exec ns1 ip link set dev veth11 up
+ip netns exec ns2 ip link set dev veth22 up
+
+ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222
+ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111
+
+ip netns exec ns1 ping -c 1 10.1.1.22
+ip netns exec ns2 ping -c 1 10.1.1.11
+
+exit 0
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
new file mode 100644
index 000000000..82922f13d
--- /dev/null
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+#include <poll.h>
+#include <unistd.h>
+#include <linux/perf_event.h>
+#include <sys/mman.h>
+#include "trace_helpers.h"
+
+#define MAX_SYMS 300000
+static struct ksym syms[MAX_SYMS];
+static int sym_cnt;
+
+static int ksym_cmp(const void *p1, const void *p2)
+{
+ return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
+}
+
+int load_kallsyms(void)
+{
+ FILE *f = fopen("/proc/kallsyms", "r");
+ char func[256], buf[256];
+ char symbol;
+ void *addr;
+ int i = 0;
+
+ if (!f)
+ return -ENOENT;
+
+ while (!feof(f)) {
+ if (!fgets(buf, sizeof(buf), f))
+ break;
+ if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
+ break;
+ if (!addr)
+ continue;
+ syms[i].addr = (long) addr;
+ syms[i].name = strdup(func);
+ i++;
+ }
+ fclose(f);
+ sym_cnt = i;
+ qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
+ return 0;
+}
+
+struct ksym *ksym_search(long key)
+{
+ int start = 0, end = sym_cnt;
+ int result;
+
+ /* kallsyms not loaded. return NULL */
+ if (sym_cnt <= 0)
+ return NULL;
+
+ while (start < end) {
+ size_t mid = start + (end - start) / 2;
+
+ result = key - syms[mid].addr;
+ if (result < 0)
+ end = mid;
+ else if (result > 0)
+ start = mid + 1;
+ else
+ return &syms[mid];
+ }
+
+ if (start >= 1 && syms[start - 1].addr < key &&
+ key < syms[start].addr)
+ /* valid ksym */
+ return &syms[start - 1];
+
+ /* out of range. return _stext */
+ return &syms[0];
+}
+
+long ksym_get_addr(const char *name)
+{
+ int i;
+
+ for (i = 0; i < sym_cnt; i++) {
+ if (strcmp(syms[i].name, name) == 0)
+ return syms[i].addr;
+ }
+
+ return 0;
+}
+
+static int page_size;
+static int page_cnt = 8;
+static struct perf_event_mmap_page *header;
+
+int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header)
+{
+ void *base;
+ int mmap_size;
+
+ page_size = getpagesize();
+ mmap_size = page_size * (page_cnt + 1);
+
+ base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (base == MAP_FAILED) {
+ printf("mmap err\n");
+ return -1;
+ }
+
+ *header = base;
+ return 0;
+}
+
+int perf_event_mmap(int fd)
+{
+ return perf_event_mmap_header(fd, &header);
+}
+
+static int perf_event_poll(int fd)
+{
+ struct pollfd pfd = { .fd = fd, .events = POLLIN };
+
+ return poll(&pfd, 1, 1000);
+}
+
+struct perf_event_sample {
+ struct perf_event_header header;
+ __u32 size;
+ char data[];
+};
+
+static enum bpf_perf_event_ret bpf_perf_event_print(void *event, void *priv)
+{
+ struct perf_event_sample *e = event;
+ perf_event_print_fn fn = priv;
+ int ret;
+
+ if (e->header.type == PERF_RECORD_SAMPLE) {
+ ret = fn(e->data, e->size);
+ if (ret != LIBBPF_PERF_EVENT_CONT)
+ return ret;
+ } else if (e->header.type == PERF_RECORD_LOST) {
+ struct {
+ struct perf_event_header header;
+ __u64 id;
+ __u64 lost;
+ } *lost = (void *) e;
+ printf("lost %lld events\n", lost->lost);
+ } else {
+ printf("unknown event type=%d size=%d\n",
+ e->header.type, e->header.size);
+ }
+
+ return LIBBPF_PERF_EVENT_CONT;
+}
+
+int perf_event_poller(int fd, perf_event_print_fn output_fn)
+{
+ enum bpf_perf_event_ret ret;
+ void *buf = NULL;
+ size_t len = 0;
+
+ for (;;) {
+ perf_event_poll(fd);
+ ret = bpf_perf_event_read_simple(header, page_cnt * page_size,
+ page_size, &buf, &len,
+ bpf_perf_event_print,
+ output_fn);
+ if (ret != LIBBPF_PERF_EVENT_CONT)
+ break;
+ }
+ free(buf);
+
+ return ret;
+}
+
+int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
+ int num_fds, perf_event_print_fn output_fn)
+{
+ enum bpf_perf_event_ret ret;
+ struct pollfd *pfds;
+ void *buf = NULL;
+ size_t len = 0;
+ int i;
+
+ pfds = calloc(num_fds, sizeof(*pfds));
+ if (!pfds)
+ return LIBBPF_PERF_EVENT_ERROR;
+
+ for (i = 0; i < num_fds; i++) {
+ pfds[i].fd = fds[i];
+ pfds[i].events = POLLIN;
+ }
+
+ for (;;) {
+ poll(pfds, num_fds, 1000);
+ for (i = 0; i < num_fds; i++) {
+ if (!pfds[i].revents)
+ continue;
+
+ ret = bpf_perf_event_read_simple(headers[i],
+ page_cnt * page_size,
+ page_size, &buf, &len,
+ bpf_perf_event_print,
+ output_fn);
+ if (ret != LIBBPF_PERF_EVENT_CONT)
+ break;
+ }
+ }
+ free(buf);
+ free(pfds);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
new file mode 100644
index 000000000..18924f23d
--- /dev/null
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRACE_HELPER_H
+#define __TRACE_HELPER_H
+
+#include <libbpf.h>
+#include <linux/perf_event.h>
+
+struct ksym {
+ long addr;
+ char *name;
+};
+
+int load_kallsyms(void);
+struct ksym *ksym_search(long key);
+long ksym_get_addr(const char *name);
+
+typedef enum bpf_perf_event_ret (*perf_event_print_fn)(void *data, int size);
+
+int perf_event_mmap(int fd);
+int perf_event_mmap_header(int fd, struct perf_event_mmap_page **header);
+/* return LIBBPF_PERF_EVENT_DONE or LIBBPF_PERF_EVENT_ERROR */
+int perf_event_poller(int fd, perf_event_print_fn output_fn);
+int perf_event_poller_multi(int *fds, struct perf_event_mmap_page **headers,
+ int num_fds, perf_event_print_fn output_fn);
+#endif
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
new file mode 100644
index 000000000..9de8b7cb4
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -0,0 +1,28 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+
+#define BUF_SIZE 256
+
+int main(int argc, char *argv[])
+{
+ int fd = open("/dev/urandom", O_RDONLY);
+ int i;
+ char buf[BUF_SIZE];
+ int count = 4;
+
+ if (fd < 0)
+ return 1;
+
+ if (argc == 2)
+ count = atoi(argv[1]);
+
+ for (i = 0; i < count; ++i)
+ read(fd, buf, BUF_SIZE);
+
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/breakpoints/.gitignore b/tools/testing/selftests/breakpoints/.gitignore
new file mode 100644
index 000000000..a23bb4a6f
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/.gitignore
@@ -0,0 +1,2 @@
+breakpoint_test
+step_after_suspend_test
diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
new file mode 100644
index 000000000..9ec2c78de
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+# Taken from perf makefile
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+TEST_GEN_PROGS := step_after_suspend_test
+
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS += breakpoint_test
+endif
+ifneq (,$(filter $(ARCH),aarch64 arm64))
+TEST_GEN_PROGS += breakpoint_test_arm64
+endif
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c
new file mode 100644
index 000000000..901b85ea6
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/breakpoint_test.c
@@ -0,0 +1,401 @@
+/*
+ * Copyright (C) 2011 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftests for breakpoints (and more generally the do_debug() path) in x86.
+ */
+
+
+#include <sys/ptrace.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/user.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
+
+#include "../kselftest.h"
+
+
+/* Breakpoint access modes */
+enum {
+ BP_X = 1,
+ BP_RW = 2,
+ BP_W = 4,
+};
+
+static pid_t child_pid;
+
+/*
+ * Ensures the child and parent are always "talking" about
+ * the same test sequence. (ie: that we haven't forgotten
+ * to call check_trapped() somewhere).
+ */
+static int nr_tests;
+
+static void set_breakpoint_addr(void *addr, int n)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_POKEUSER, child_pid,
+ offsetof(struct user, u_debugreg[n]), addr);
+ if (ret)
+ ksft_exit_fail_msg("Can't set breakpoint addr: %s\n",
+ strerror(errno));
+}
+
+static void toggle_breakpoint(int n, int type, int len,
+ int local, int global, int set)
+{
+ int ret;
+
+ int xtype, xlen;
+ unsigned long vdr7, dr7;
+
+ switch (type) {
+ case BP_X:
+ xtype = 0;
+ break;
+ case BP_W:
+ xtype = 1;
+ break;
+ case BP_RW:
+ xtype = 3;
+ break;
+ }
+
+ switch (len) {
+ case 1:
+ xlen = 0;
+ break;
+ case 2:
+ xlen = 4;
+ break;
+ case 4:
+ xlen = 0xc;
+ break;
+ case 8:
+ xlen = 8;
+ break;
+ }
+
+ dr7 = ptrace(PTRACE_PEEKUSER, child_pid,
+ offsetof(struct user, u_debugreg[7]), 0);
+
+ vdr7 = (xlen | xtype) << 16;
+ vdr7 <<= 4 * n;
+
+ if (local) {
+ vdr7 |= 1 << (2 * n);
+ vdr7 |= 1 << 8;
+ }
+ if (global) {
+ vdr7 |= 2 << (2 * n);
+ vdr7 |= 1 << 9;
+ }
+
+ if (set)
+ dr7 |= vdr7;
+ else
+ dr7 &= ~vdr7;
+
+ ret = ptrace(PTRACE_POKEUSER, child_pid,
+ offsetof(struct user, u_debugreg[7]), dr7);
+ if (ret) {
+ ksft_print_msg("Can't set dr7: %s\n", strerror(errno));
+ exit(-1);
+ }
+}
+
+/* Dummy variables to test read/write accesses */
+static unsigned long long dummy_var[4];
+
+/* Dummy functions to test execution accesses */
+static void dummy_func(void) { }
+static void dummy_func1(void) { }
+static void dummy_func2(void) { }
+static void dummy_func3(void) { }
+
+static void (*dummy_funcs[])(void) = {
+ dummy_func,
+ dummy_func1,
+ dummy_func2,
+ dummy_func3,
+};
+
+static int trapped;
+
+static void check_trapped(void)
+{
+ /*
+ * If we haven't trapped, wake up the parent
+ * so that it notices the failure.
+ */
+ if (!trapped)
+ kill(getpid(), SIGUSR1);
+ trapped = 0;
+
+ nr_tests++;
+}
+
+static void write_var(int len)
+{
+ char *pcval; short *psval; int *pival; long long *plval;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ switch (len) {
+ case 1:
+ pcval = (char *)&dummy_var[i];
+ *pcval = 0xff;
+ break;
+ case 2:
+ psval = (short *)&dummy_var[i];
+ *psval = 0xffff;
+ break;
+ case 4:
+ pival = (int *)&dummy_var[i];
+ *pival = 0xffffffff;
+ break;
+ case 8:
+ plval = (long long *)&dummy_var[i];
+ *plval = 0xffffffffffffffffLL;
+ break;
+ }
+ check_trapped();
+ }
+}
+
+static void read_var(int len)
+{
+ char cval; short sval; int ival; long long lval;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ switch (len) {
+ case 1:
+ cval = *(char *)&dummy_var[i];
+ break;
+ case 2:
+ sval = *(short *)&dummy_var[i];
+ break;
+ case 4:
+ ival = *(int *)&dummy_var[i];
+ break;
+ case 8:
+ lval = *(long long *)&dummy_var[i];
+ break;
+ }
+ check_trapped();
+ }
+}
+
+/*
+ * Do the r/w/x accesses to trigger the breakpoints. And run
+ * the usual traps.
+ */
+static void trigger_tests(void)
+{
+ int len, local, global, i;
+ char val;
+ int ret;
+
+ ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+ if (ret) {
+ ksft_print_msg("Can't be traced? %s\n", strerror(errno));
+ return;
+ }
+
+ /* Wake up father so that it sets up the first test */
+ kill(getpid(), SIGUSR1);
+
+ /* Test instruction breakpoints */
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+
+ for (i = 0; i < 4; i++) {
+ dummy_funcs[i]();
+ check_trapped();
+ }
+ }
+ }
+
+ /* Test write watchpoints */
+ for (len = 1; len <= sizeof(long); len <<= 1) {
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ write_var(len);
+ }
+ }
+ }
+
+ /* Test read/write watchpoints (on read accesses) */
+ for (len = 1; len <= sizeof(long); len <<= 1) {
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ read_var(len);
+ }
+ }
+ }
+
+ /* Icebp trap */
+ asm(".byte 0xf1\n");
+ check_trapped();
+
+ /* Int 3 trap */
+ asm("int $3\n");
+ check_trapped();
+
+ kill(getpid(), SIGUSR1);
+}
+
+static void check_success(const char *msg)
+{
+ int child_nr_tests;
+ int status;
+ int ret;
+
+ /* Wait for the child to SIGTRAP */
+ wait(&status);
+
+ ret = 0;
+
+ if (WSTOPSIG(status) == SIGTRAP) {
+ child_nr_tests = ptrace(PTRACE_PEEKDATA, child_pid,
+ &nr_tests, 0);
+ if (child_nr_tests == nr_tests)
+ ret = 1;
+ if (ptrace(PTRACE_POKEDATA, child_pid, &trapped, 1))
+ ksft_exit_fail_msg("Can't poke: %s\n", strerror(errno));
+ }
+
+ nr_tests++;
+
+ if (ret)
+ ksft_test_result_pass(msg);
+ else
+ ksft_test_result_fail(msg);
+}
+
+static void launch_instruction_breakpoints(char *buf, int local, int global)
+{
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ set_breakpoint_addr(dummy_funcs[i], i);
+ toggle_breakpoint(i, BP_X, 1, local, global, 1);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ sprintf(buf, "Test breakpoint %d with local: %d global: %d\n",
+ i, local, global);
+ check_success(buf);
+ toggle_breakpoint(i, BP_X, 1, local, global, 0);
+ }
+}
+
+static void launch_watchpoints(char *buf, int mode, int len,
+ int local, int global)
+{
+ const char *mode_str;
+ int i;
+
+ if (mode == BP_W)
+ mode_str = "write";
+ else
+ mode_str = "read";
+
+ for (i = 0; i < 4; i++) {
+ set_breakpoint_addr(&dummy_var[i], i);
+ toggle_breakpoint(i, mode, len, local, global, 1);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ sprintf(buf,
+ "Test %s watchpoint %d with len: %d local: %d global: %d\n",
+ mode_str, i, len, local, global);
+ check_success(buf);
+ toggle_breakpoint(i, mode, len, local, global, 0);
+ }
+}
+
+/* Set the breakpoints and check the child successfully trigger them */
+static void launch_tests(void)
+{
+ char buf[1024];
+ int len, local, global, i;
+
+ /* Instruction breakpoints */
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ launch_instruction_breakpoints(buf, local, global);
+ }
+ }
+
+ /* Write watchpoint */
+ for (len = 1; len <= sizeof(long); len <<= 1) {
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ launch_watchpoints(buf, BP_W, len,
+ local, global);
+ }
+ }
+ }
+
+ /* Read-Write watchpoint */
+ for (len = 1; len <= sizeof(long); len <<= 1) {
+ for (local = 0; local < 2; local++) {
+ for (global = 0; global < 2; global++) {
+ if (!local && !global)
+ continue;
+ launch_watchpoints(buf, BP_RW, len,
+ local, global);
+ }
+ }
+ }
+
+ /* Icebp traps */
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success("Test icebp\n");
+
+ /* Int 3 traps */
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success("Test int 3 trap\n");
+
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+}
+
+int main(int argc, char **argv)
+{
+ pid_t pid;
+ int ret;
+
+ ksft_print_header();
+
+ pid = fork();
+ if (!pid) {
+ trigger_tests();
+ exit(0);
+ }
+
+ child_pid = pid;
+
+ wait(NULL);
+
+ launch_tests();
+
+ wait(NULL);
+
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
new file mode 100644
index 000000000..2d95e5add
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
@@ -0,0 +1,258 @@
+/*
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Original Code by Pavel Labath <labath@google.com>
+ *
+ * Code modified by Pratyush Anand <panand@redhat.com>
+ * for testing different byte select for each access size.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <asm/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/ptrace.h>
+#include <sys/param.h>
+#include <sys/uio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <elf.h>
+#include <errno.h>
+#include <signal.h>
+
+#include "../kselftest.h"
+
+static volatile uint8_t var[96] __attribute__((__aligned__(32)));
+
+static void child(int size, int wr)
+{
+ volatile uint8_t *addr = &var[32 + wr];
+
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) {
+ ksft_print_msg(
+ "ptrace(PTRACE_TRACEME) failed: %s\n",
+ strerror(errno));
+ _exit(1);
+ }
+
+ if (raise(SIGSTOP) != 0) {
+ ksft_print_msg(
+ "raise(SIGSTOP) failed: %s\n", strerror(errno));
+ _exit(1);
+ }
+
+ if ((uintptr_t) addr % size) {
+ ksft_print_msg(
+ "Wrong address write for the given size: %s\n",
+ strerror(errno));
+ _exit(1);
+ }
+
+ switch (size) {
+ case 1:
+ *addr = 47;
+ break;
+ case 2:
+ *(uint16_t *)addr = 47;
+ break;
+ case 4:
+ *(uint32_t *)addr = 47;
+ break;
+ case 8:
+ *(uint64_t *)addr = 47;
+ break;
+ case 16:
+ __asm__ volatile ("stp x29, x30, %0" : "=m" (addr[0]));
+ break;
+ case 32:
+ __asm__ volatile ("stp q29, q30, %0" : "=m" (addr[0]));
+ break;
+ }
+
+ _exit(0);
+}
+
+static bool set_watchpoint(pid_t pid, int size, int wp)
+{
+ const volatile uint8_t *addr = &var[32 + wp];
+ const int offset = (uintptr_t)addr % 8;
+ const unsigned int byte_mask = ((1 << size) - 1) << offset;
+ const unsigned int type = 2; /* Write */
+ const unsigned int enable = 1;
+ const unsigned int control = byte_mask << 5 | type << 3 | enable;
+ struct user_hwdebug_state dreg_state;
+ struct iovec iov;
+
+ memset(&dreg_state, 0, sizeof(dreg_state));
+ dreg_state.dbg_regs[0].addr = (uintptr_t)(addr - offset);
+ dreg_state.dbg_regs[0].ctrl = control;
+ iov.iov_base = &dreg_state;
+ iov.iov_len = offsetof(struct user_hwdebug_state, dbg_regs) +
+ sizeof(dreg_state.dbg_regs[0]);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_ARM_HW_WATCH, &iov) == 0)
+ return true;
+
+ if (errno == EIO)
+ ksft_print_msg(
+ "ptrace(PTRACE_SETREGSET, NT_ARM_HW_WATCH) not supported on this hardware: %s\n",
+ strerror(errno));
+
+ ksft_print_msg(
+ "ptrace(PTRACE_SETREGSET, NT_ARM_HW_WATCH) failed: %s\n",
+ strerror(errno));
+ return false;
+}
+
+static bool run_test(int wr_size, int wp_size, int wr, int wp)
+{
+ int status;
+ siginfo_t siginfo;
+ pid_t pid = fork();
+ pid_t wpid;
+
+ if (pid < 0) {
+ ksft_test_result_fail(
+ "fork() failed: %s\n", strerror(errno));
+ return false;
+ }
+ if (pid == 0)
+ child(wr_size, wr);
+
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ ksft_print_msg(
+ "waitpid() failed: %s\n", strerror(errno));
+ return false;
+ }
+ if (!WIFSTOPPED(status)) {
+ ksft_print_msg(
+ "child did not stop: %s\n", strerror(errno));
+ return false;
+ }
+ if (WSTOPSIG(status) != SIGSTOP) {
+ ksft_print_msg("child did not stop with SIGSTOP\n");
+ return false;
+ }
+
+ if (!set_watchpoint(pid, wp_size, wp))
+ return false;
+
+ if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
+ ksft_print_msg(
+ "ptrace(PTRACE_SINGLESTEP) failed: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ alarm(3);
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ ksft_print_msg(
+ "waitpid() failed: %s\n", strerror(errno));
+ return false;
+ }
+ alarm(0);
+ if (WIFEXITED(status)) {
+ ksft_print_msg("child did not single-step\n");
+ return false;
+ }
+ if (!WIFSTOPPED(status)) {
+ ksft_print_msg("child did not stop\n");
+ return false;
+ }
+ if (WSTOPSIG(status) != SIGTRAP) {
+ ksft_print_msg("child did not stop with SIGTRAP\n");
+ return false;
+ }
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &siginfo) != 0) {
+ ksft_print_msg(
+ "ptrace(PTRACE_GETSIGINFO): %s\n",
+ strerror(errno));
+ return false;
+ }
+ if (siginfo.si_code != TRAP_HWBKPT) {
+ ksft_print_msg(
+ "Unexpected si_code %d\n", siginfo.si_code);
+ return false;
+ }
+
+ kill(pid, SIGKILL);
+ wpid = waitpid(pid, &status, 0);
+ if (wpid != pid) {
+ ksft_print_msg(
+ "waitpid() failed: %s\n", strerror(errno));
+ return false;
+ }
+ return true;
+}
+
+static void sigalrm(int sig)
+{
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+ bool succeeded = true;
+ struct sigaction act;
+ int wr, wp, size;
+ bool result;
+
+ ksft_print_header();
+
+ act.sa_handler = sigalrm;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ sigaction(SIGALRM, &act, NULL);
+ for (size = 1; size <= 32; size = size*2) {
+ for (wr = 0; wr <= 32; wr = wr + size) {
+ for (wp = wr - size; wp <= wr + size; wp = wp + size) {
+ result = run_test(size, MIN(size, 8), wr, wp);
+ if ((result && wr == wp) ||
+ (!result && wr != wp))
+ ksft_test_result_pass(
+ "Test size = %d write offset = %d watchpoint offset = %d\n",
+ size, wr, wp);
+ else {
+ ksft_test_result_fail(
+ "Test size = %d write offset = %d watchpoint offset = %d\n",
+ size, wr, wp);
+ succeeded = false;
+ }
+ }
+ }
+ }
+
+ for (size = 1; size <= 32; size = size*2) {
+ if (run_test(size, 8, -size, -8))
+ ksft_test_result_pass(
+ "Test size = %d write offset = %d watchpoint offset = -8\n",
+ size, -size);
+ else {
+ ksft_test_result_fail(
+ "Test size = %d write offset = %d watchpoint offset = -8\n",
+ size, -size);
+ succeeded = false;
+ }
+ }
+
+ if (succeeded)
+ ksft_exit_pass();
+ else
+ ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
new file mode 100644
index 000000000..f82dcc1f8
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <sys/stat.h>
+#include <sys/timerfd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+
+void child(int cpu)
+{
+ cpu_set_t set;
+
+ CPU_ZERO(&set);
+ CPU_SET(cpu, &set);
+ if (sched_setaffinity(0, sizeof(set), &set) != 0) {
+ ksft_print_msg("sched_setaffinity() failed: %s\n",
+ strerror(errno));
+ _exit(1);
+ }
+
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) {
+ ksft_print_msg("ptrace(PTRACE_TRACEME) failed: %s\n",
+ strerror(errno));
+ _exit(1);
+ }
+
+ if (raise(SIGSTOP) != 0) {
+ ksft_print_msg("raise(SIGSTOP) failed: %s\n", strerror(errno));
+ _exit(1);
+ }
+
+ _exit(0);
+}
+
+bool run_test(int cpu)
+{
+ int status;
+ pid_t pid = fork();
+ pid_t wpid;
+
+ if (pid < 0) {
+ ksft_print_msg("fork() failed: %s\n", strerror(errno));
+ return false;
+ }
+ if (pid == 0)
+ child(cpu);
+
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
+ return false;
+ }
+ if (!WIFSTOPPED(status)) {
+ ksft_print_msg("child did not stop: %s\n", strerror(errno));
+ return false;
+ }
+ if (WSTOPSIG(status) != SIGSTOP) {
+ ksft_print_msg("child did not stop with SIGSTOP: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ if (ptrace(PTRACE_SINGLESTEP, pid, NULL, NULL) < 0) {
+ if (errno == EIO) {
+ ksft_exit_skip(
+ "ptrace(PTRACE_SINGLESTEP) not supported on this architecture: %s\n",
+ strerror(errno));
+ }
+ ksft_print_msg("ptrace(PTRACE_SINGLESTEP) failed: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ ksft_print_msg("waitpid() failed: $s\n", strerror(errno));
+ return false;
+ }
+ if (WIFEXITED(status)) {
+ ksft_print_msg("child did not single-step: %s\n",
+ strerror(errno));
+ return false;
+ }
+ if (!WIFSTOPPED(status)) {
+ ksft_print_msg("child did not stop: %s\n", strerror(errno));
+ return false;
+ }
+ if (WSTOPSIG(status) != SIGTRAP) {
+ ksft_print_msg("child did not stop with SIGTRAP: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
+ ksft_print_msg("ptrace(PTRACE_CONT) failed: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
+ return false;
+ }
+ if (!WIFEXITED(status)) {
+ ksft_print_msg("child did not exit after PTRACE_CONT: %s\n",
+ strerror(errno));
+ return false;
+ }
+
+ return true;
+}
+
+void suspend(void)
+{
+ int power_state_fd;
+ struct sigevent event = {};
+ int timerfd;
+ int err;
+ struct itimerspec spec = {};
+
+ if (getuid() != 0)
+ ksft_exit_skip("Please run the test as root - Exiting.\n");
+
+ power_state_fd = open("/sys/power/state", O_RDWR);
+ if (power_state_fd < 0)
+ ksft_exit_fail_msg(
+ "open(\"/sys/power/state\") failed %s)\n",
+ strerror(errno));
+
+ timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
+ if (timerfd < 0)
+ ksft_exit_fail_msg("timerfd_create() failed\n");
+
+ spec.it_value.tv_sec = 5;
+ err = timerfd_settime(timerfd, 0, &spec, NULL);
+ if (err < 0)
+ ksft_exit_fail_msg("timerfd_settime() failed\n");
+
+ if (write(power_state_fd, "mem", strlen("mem")) != strlen("mem"))
+ ksft_exit_fail_msg("Failed to enter Suspend state\n");
+
+ close(timerfd);
+ close(power_state_fd);
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+ bool do_suspend = true;
+ bool succeeded = true;
+ cpu_set_t available_cpus;
+ int err;
+ int cpu;
+
+ ksft_print_header();
+
+ while ((opt = getopt(argc, argv, "n")) != -1) {
+ switch (opt) {
+ case 'n':
+ do_suspend = false;
+ break;
+ default:
+ printf("Usage: %s [-n]\n", argv[0]);
+ printf(" -n: do not trigger a suspend/resume cycle before the test\n");
+ return -1;
+ }
+ }
+
+ if (do_suspend)
+ suspend();
+
+ err = sched_getaffinity(0, sizeof(available_cpus), &available_cpus);
+ if (err < 0)
+ ksft_exit_fail_msg("sched_getaffinity() failed\n");
+
+ for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+ bool test_success;
+
+ if (!CPU_ISSET(cpu, &available_cpus))
+ continue;
+
+ test_success = run_test(cpu);
+ if (test_success) {
+ ksft_test_result_pass("CPU %d\n", cpu);
+ } else {
+ ksft_test_result_fail("CPU %d\n", cpu);
+ succeeded = false;
+ }
+ }
+
+ if (succeeded)
+ ksft_exit_pass();
+ else
+ ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/capabilities/.gitignore b/tools/testing/selftests/capabilities/.gitignore
new file mode 100644
index 000000000..b732dd0d4
--- /dev/null
+++ b/tools/testing/selftests/capabilities/.gitignore
@@ -0,0 +1,2 @@
+test_execve
+validate_cap
diff --git a/tools/testing/selftests/capabilities/Makefile b/tools/testing/selftests/capabilities/Makefile
new file mode 100644
index 000000000..6e9d98d45
--- /dev/null
+++ b/tools/testing/selftests/capabilities/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_FILES := validate_cap
+TEST_GEN_PROGS := test_execve
+
+CFLAGS += -O2 -g -std=gnu99 -Wall
+LDLIBS += -lcap-ng -lrt -ldl
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
new file mode 100644
index 000000000..3ab39a61b
--- /dev/null
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <cap-ng.h>
+#include <linux/capability.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <limits.h>
+#include <libgen.h>
+#include <malloc.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+
+#include "../kselftest.h"
+
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT 47
+# define PR_CAP_AMBIENT_IS_SET 1
+# define PR_CAP_AMBIENT_RAISE 2
+# define PR_CAP_AMBIENT_LOWER 3
+# define PR_CAP_AMBIENT_CLEAR_ALL 4
+#endif
+
+static int nerrs;
+static pid_t mpid; /* main() pid is used to avoid duplicate test counts */
+
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
+{
+ char buf[4096];
+ int fd;
+ ssize_t written;
+ int buf_len;
+
+ buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
+ if (buf_len < 0)
+ ksft_exit_fail_msg("vsnprintf failed - %s\n", strerror(errno));
+
+ if (buf_len >= sizeof(buf))
+ ksft_exit_fail_msg("vsnprintf output truncated\n");
+
+
+ fd = open(filename, O_WRONLY);
+ if (fd < 0) {
+ if ((errno == ENOENT) && enoent_ok)
+ return;
+ ksft_exit_fail_msg("open of %s failed - %s\n",
+ filename, strerror(errno));
+ }
+ written = write(fd, buf, buf_len);
+ if (written != buf_len) {
+ if (written >= 0) {
+ ksft_exit_fail_msg("short write to %s\n", filename);
+ } else {
+ ksft_exit_fail_msg("write to %s failed - %s\n",
+ filename, strerror(errno));
+ }
+ }
+ if (close(fd) != 0) {
+ ksft_exit_fail_msg("close of %s failed - %s\n",
+ filename, strerror(errno));
+ }
+}
+
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(true, filename, fmt, ap);
+ va_end(ap);
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(false, filename, fmt, ap);
+ va_end(ap);
+}
+
+static bool create_and_enter_ns(uid_t inner_uid)
+{
+ uid_t outer_uid;
+ gid_t outer_gid;
+ int i;
+ bool have_outer_privilege;
+
+ outer_uid = getuid();
+ outer_gid = getgid();
+
+ /*
+ * TODO: If we're already root, we could skip creating the userns.
+ */
+
+ if (unshare(CLONE_NEWNS) == 0) {
+ ksft_print_msg("[NOTE]\tUsing global UIDs for tests\n");
+ if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0)
+ ksft_exit_fail_msg("PR_SET_KEEPCAPS - %s\n",
+ strerror(errno));
+ if (setresuid(inner_uid, inner_uid, -1) != 0)
+ ksft_exit_fail_msg("setresuid - %s\n", strerror(errno));
+
+ // Re-enable effective caps
+ capng_get_caps_process();
+ for (i = 0; i < CAP_LAST_CAP; i++)
+ if (capng_have_capability(CAPNG_PERMITTED, i))
+ capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ ksft_exit_fail_msg(
+ "capng_apply - %s\n", strerror(errno));
+
+ have_outer_privilege = true;
+ } else if (unshare(CLONE_NEWUSER | CLONE_NEWNS) == 0) {
+ ksft_print_msg("[NOTE]\tUsing a user namespace for tests\n");
+ maybe_write_file("/proc/self/setgroups", "deny");
+ write_file("/proc/self/uid_map", "%d %d 1", inner_uid, outer_uid);
+ write_file("/proc/self/gid_map", "0 %d 1", outer_gid);
+
+ have_outer_privilege = false;
+ } else {
+ ksft_exit_skip("must be root or be able to create a userns\n");
+ }
+
+ if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL) != 0)
+ ksft_exit_fail_msg("remount everything private - %s\n",
+ strerror(errno));
+
+ return have_outer_privilege;
+}
+
+static void chdir_to_tmpfs(void)
+{
+ char cwd[PATH_MAX];
+ if (getcwd(cwd, sizeof(cwd)) != cwd)
+ ksft_exit_fail_msg("getcwd - %s\n", strerror(errno));
+
+ if (mount("private_tmp", ".", "tmpfs", 0, "mode=0777") != 0)
+ ksft_exit_fail_msg("mount private tmpfs - %s\n",
+ strerror(errno));
+
+ if (chdir(cwd) != 0)
+ ksft_exit_fail_msg("chdir to private tmpfs - %s\n",
+ strerror(errno));
+}
+
+static void copy_fromat_to(int fromfd, const char *fromname, const char *toname)
+{
+ int from = openat(fromfd, fromname, O_RDONLY);
+ if (from == -1)
+ ksft_exit_fail_msg("open copy source - %s\n", strerror(errno));
+
+ int to = open(toname, O_CREAT | O_WRONLY | O_EXCL, 0700);
+
+ while (true) {
+ char buf[4096];
+ ssize_t sz = read(from, buf, sizeof(buf));
+ if (sz == 0)
+ break;
+ if (sz < 0)
+ ksft_exit_fail_msg("read - %s\n", strerror(errno));
+
+ if (write(to, buf, sz) != sz)
+ /* no short writes on tmpfs */
+ ksft_exit_fail_msg("write - %s\n", strerror(errno));
+ }
+
+ close(from);
+ close(to);
+}
+
+static bool fork_wait(void)
+{
+ pid_t child = fork();
+ if (child == 0) {
+ nerrs = 0;
+ return true;
+ } else if (child > 0) {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ ksft_print_msg("Child died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ ksft_print_msg("Child failed\n");
+ nerrs++;
+ } else {
+ /* don't print this message for mpid */
+ if (getpid() != mpid)
+ ksft_test_result_pass("Passed\n");
+ }
+ return false;
+ } else {
+ ksft_exit_fail_msg("fork - %s\n", strerror(errno));
+ return false;
+ }
+}
+
+static void exec_other_validate_cap(const char *name,
+ bool eff, bool perm, bool inh, bool ambient)
+{
+ execl(name, name, (eff ? "1" : "0"),
+ (perm ? "1" : "0"), (inh ? "1" : "0"), (ambient ? "1" : "0"),
+ NULL);
+ ksft_exit_fail_msg("execl - %s\n", strerror(errno));
+}
+
+static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient)
+{
+ exec_other_validate_cap("./validate_cap", eff, perm, inh, ambient);
+}
+
+static int do_tests(int uid, const char *our_path)
+{
+ bool have_outer_privilege = create_and_enter_ns(uid);
+
+ int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY);
+ if (ourpath_fd == -1)
+ ksft_exit_fail_msg("open '%s' - %s\n",
+ our_path, strerror(errno));
+
+ chdir_to_tmpfs();
+
+ copy_fromat_to(ourpath_fd, "validate_cap", "validate_cap");
+
+ if (have_outer_privilege) {
+ uid_t gid = getegid();
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_suidroot");
+ if (chown("validate_cap_suidroot", 0, -1) != 0)
+ ksft_exit_fail_msg("chown - %s\n", strerror(errno));
+ if (chmod("validate_cap_suidroot", S_ISUID | 0700) != 0)
+ ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_suidnonroot");
+ if (chown("validate_cap_suidnonroot", uid + 1, -1) != 0)
+ ksft_exit_fail_msg("chown - %s\n", strerror(errno));
+ if (chmod("validate_cap_suidnonroot", S_ISUID | 0700) != 0)
+ ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_sgidroot");
+ if (chown("validate_cap_sgidroot", -1, 0) != 0)
+ ksft_exit_fail_msg("chown - %s\n", strerror(errno));
+ if (chmod("validate_cap_sgidroot", S_ISGID | 0710) != 0)
+ ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
+
+ copy_fromat_to(ourpath_fd, "validate_cap",
+ "validate_cap_sgidnonroot");
+ if (chown("validate_cap_sgidnonroot", -1, gid + 1) != 0)
+ ksft_exit_fail_msg("chown - %s\n", strerror(errno));
+ if (chmod("validate_cap_sgidnonroot", S_ISGID | 0710) != 0)
+ ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
+ }
+
+ capng_get_caps_process();
+
+ /* Make sure that i starts out clear */
+ capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+
+ if (uid == 0) {
+ ksft_print_msg("[RUN]\tRoot => ep\n");
+ if (fork_wait())
+ exec_validate_cap(true, true, false, false);
+ } else {
+ ksft_print_msg("[RUN]\tNon-root => no caps\n");
+ if (fork_wait())
+ exec_validate_cap(false, false, false, false);
+ }
+
+ ksft_print_msg("Check cap_ambient manipulation rules\n");
+
+ /* We should not be able to add ambient caps yet. */
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != -1 || errno != EPERM) {
+ if (errno == EINVAL)
+ ksft_test_result_fail(
+ "PR_CAP_AMBIENT_RAISE isn't supported\n");
+ else
+ ksft_test_result_fail(
+ "PR_CAP_AMBIENT_RAISE should have failed eith EPERM on a non-inheritable cap\n");
+ return 1;
+ }
+ ksft_test_result_pass(
+ "PR_CAP_AMBIENT_RAISE failed on non-inheritable cap\n");
+
+ capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_RAW);
+ capng_update(CAPNG_DROP, CAPNG_PERMITTED, CAP_NET_RAW);
+ capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, CAP_NET_RAW);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_RAW, 0, 0, 0) != -1 || errno != EPERM) {
+ ksft_test_result_fail(
+ "PR_CAP_AMBIENT_RAISE should have failed on a non-permitted cap\n");
+ return 1;
+ }
+ ksft_test_result_pass(
+ "PR_CAP_AMBIENT_RAISE failed on non-permitted cap\n");
+
+ capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+ ksft_test_result_fail(
+ "PR_CAP_AMBIENT_RAISE should have succeeded\n");
+ return 1;
+ }
+ ksft_test_result_pass("PR_CAP_AMBIENT_RAISE worked\n");
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 1) {
+ ksft_test_result_fail("PR_CAP_AMBIENT_IS_SET is broken\n");
+ return 1;
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_CLEAR_ALL, 0, 0, 0, 0) != 0)
+ ksft_exit_fail_msg("PR_CAP_AMBIENT_CLEAR_ALL - %s\n",
+ strerror(errno));
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+ ksft_test_result_fail(
+ "PR_CAP_AMBIENT_CLEAR_ALL didn't work\n");
+ return 1;
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
+ ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n",
+ strerror(errno));
+
+ capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0) {
+ ksft_test_result_fail("Dropping I should have dropped A\n");
+ return 1;
+ }
+
+ ksft_test_result_pass("Basic manipulation appears to work\n");
+
+ capng_update(CAPNG_ADD, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ ksft_exit_fail_msg("capng_apply - %s\n", strerror(errno));
+ if (uid == 0) {
+ ksft_print_msg("[RUN]\tRoot +i => eip\n");
+ if (fork_wait())
+ exec_validate_cap(true, true, true, false);
+ } else {
+ ksft_print_msg("[RUN]\tNon-root +i => i\n");
+ if (fork_wait())
+ exec_validate_cap(false, false, true, false);
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, CAP_NET_BIND_SERVICE, 0, 0, 0) != 0)
+ ksft_exit_fail_msg("PR_CAP_AMBIENT_RAISE - %s\n",
+ strerror(errno));
+
+ ksft_print_msg("[RUN]\tUID %d +ia => eipa\n", uid);
+ if (fork_wait())
+ exec_validate_cap(true, true, true, true);
+
+ /* The remaining tests need real privilege */
+
+ if (!have_outer_privilege) {
+ ksft_test_result_skip("SUID/SGID tests (needs privilege)\n");
+ goto done;
+ }
+
+ if (uid == 0) {
+ ksft_print_msg("[RUN]\tRoot +ia, suidroot => eipa\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_suidroot",
+ true, true, true, true);
+
+ ksft_print_msg("[RUN]\tRoot +ia, suidnonroot => ip\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_suidnonroot",
+ false, true, true, false);
+
+ ksft_print_msg("[RUN]\tRoot +ia, sgidroot => eipa\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_sgidroot",
+ true, true, true, true);
+
+ if (fork_wait()) {
+ ksft_print_msg(
+ "[RUN]\tRoot, gid != 0, +ia, sgidroot => eip\n");
+ if (setresgid(1, 1, 1) != 0)
+ ksft_exit_fail_msg("setresgid - %s\n",
+ strerror(errno));
+ exec_other_validate_cap("./validate_cap_sgidroot",
+ true, true, true, false);
+ }
+
+ ksft_print_msg("[RUN]\tRoot +ia, sgidnonroot => eip\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_sgidnonroot",
+ true, true, true, false);
+ } else {
+ ksft_print_msg("[RUN]\tNon-root +ia, sgidnonroot => i\n");
+ if (fork_wait())
+ exec_other_validate_cap("./validate_cap_sgidnonroot",
+ false, false, true, false);
+
+ if (fork_wait()) {
+ ksft_print_msg("[RUN]\tNon-root +ia, sgidroot => i\n");
+ if (setresgid(1, 1, 1) != 0)
+ ksft_exit_fail_msg("setresgid - %s\n",
+ strerror(errno));
+ exec_other_validate_cap("./validate_cap_sgidroot",
+ false, false, true, false);
+ }
+ }
+
+done:
+ ksft_print_cnts();
+ return nerrs ? 1 : 0;
+}
+
+int main(int argc, char **argv)
+{
+ char *tmp1, *tmp2, *our_path;
+
+ ksft_print_header();
+
+ /* Find our path */
+ tmp1 = strdup(argv[0]);
+ if (!tmp1)
+ ksft_exit_fail_msg("strdup - %s\n", strerror(errno));
+ tmp2 = dirname(tmp1);
+ our_path = strdup(tmp2);
+ if (!our_path)
+ ksft_exit_fail_msg("strdup - %s\n", strerror(errno));
+ free(tmp1);
+
+ mpid = getpid();
+
+ if (fork_wait()) {
+ ksft_print_msg("[RUN]\t+++ Tests with uid == 0 +++\n");
+ return do_tests(0, our_path);
+ }
+
+ ksft_print_msg("==================================================\n");
+
+ if (fork_wait()) {
+ ksft_print_msg("[RUN]\t+++ Tests with uid != 0 +++\n");
+ return do_tests(1, our_path);
+ }
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
new file mode 100644
index 000000000..cdfc94268
--- /dev/null
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <cap-ng.h>
+#include <linux/capability.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <sys/auxv.h>
+
+#include "../kselftest.h"
+
+#ifndef PR_CAP_AMBIENT
+#define PR_CAP_AMBIENT 47
+# define PR_CAP_AMBIENT_IS_SET 1
+# define PR_CAP_AMBIENT_RAISE 2
+# define PR_CAP_AMBIENT_LOWER 3
+# define PR_CAP_AMBIENT_CLEAR_ALL 4
+#endif
+
+#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19)
+# define HAVE_GETAUXVAL
+#endif
+
+static bool bool_arg(char **argv, int i)
+{
+ if (!strcmp(argv[i], "0"))
+ return false;
+ else if (!strcmp(argv[i], "1"))
+ return true;
+ else {
+ ksft_exit_fail_msg("wrong argv[%d]\n", i);
+ return false;
+ }
+}
+
+int main(int argc, char **argv)
+{
+ const char *atsec = "";
+
+ /*
+ * Be careful just in case a setgid or setcapped copy of this
+ * helper gets out.
+ */
+
+ if (argc != 5)
+ ksft_exit_fail_msg("wrong argc\n");
+
+#ifdef HAVE_GETAUXVAL
+ if (getauxval(AT_SECURE))
+ atsec = " (AT_SECURE is set)";
+ else
+ atsec = " (AT_SECURE is not set)";
+#endif
+
+ capng_get_caps_process();
+
+ if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) {
+ ksft_print_msg("Wrong effective state%s\n", atsec);
+ return 1;
+ }
+
+ if (capng_have_capability(CAPNG_PERMITTED, CAP_NET_BIND_SERVICE) != bool_arg(argv, 2)) {
+ ksft_print_msg("Wrong permitted state%s\n", atsec);
+ return 1;
+ }
+
+ if (capng_have_capability(CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 3)) {
+ ksft_print_msg("Wrong inheritable state%s\n", atsec);
+ return 1;
+ }
+
+ if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_NET_BIND_SERVICE, 0, 0, 0) != bool_arg(argv, 4)) {
+ ksft_print_msg("Wrong ambient state%s\n", atsec);
+ return 1;
+ }
+
+ ksft_print_msg("%s: Capabilities after execve were correct\n",
+ "validate_cap:");
+ return 0;
+}
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
new file mode 100644
index 000000000..adacda50a
--- /dev/null
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -0,0 +1,2 @@
+test_memcontrol
+test_core
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
new file mode 100644
index 000000000..23fbaa4a9
--- /dev/null
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wall
+
+all:
+
+TEST_GEN_PROGS = test_memcontrol
+TEST_GEN_PROGS += test_core
+
+include ../lib.mk
+
+$(OUTPUT)/test_memcontrol: cgroup_util.c
+$(OUTPUT)/test_core: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
new file mode 100644
index 000000000..a516d01f0
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -0,0 +1,370 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "cgroup_util.h"
+
+static ssize_t read_text(const char *path, char *buf, size_t max_len)
+{
+ ssize_t len;
+ int fd;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ len = read(fd, buf, max_len - 1);
+ if (len < 0)
+ goto out;
+
+ buf[len] = 0;
+out:
+ close(fd);
+ return len;
+}
+
+static ssize_t write_text(const char *path, char *buf, ssize_t len)
+{
+ int fd;
+
+ fd = open(path, O_WRONLY | O_APPEND);
+ if (fd < 0)
+ return fd;
+
+ len = write(fd, buf, len);
+ if (len < 0) {
+ close(fd);
+ return len;
+ }
+
+ close(fd);
+
+ return len;
+}
+
+char *cg_name(const char *root, const char *name)
+{
+ size_t len = strlen(root) + strlen(name) + 2;
+ char *ret = malloc(len);
+
+ snprintf(ret, len, "%s/%s", root, name);
+
+ return ret;
+}
+
+char *cg_name_indexed(const char *root, const char *name, int index)
+{
+ size_t len = strlen(root) + strlen(name) + 10;
+ char *ret = malloc(len);
+
+ snprintf(ret, len, "%s/%s_%d", root, name, index);
+
+ return ret;
+}
+
+int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
+{
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+
+ if (read_text(path, buf, len) >= 0)
+ return 0;
+
+ return -1;
+}
+
+int cg_read_strcmp(const char *cgroup, const char *control,
+ const char *expected)
+{
+ size_t size;
+ char *buf;
+ int ret;
+
+ /* Handle the case of comparing against empty string */
+ if (!expected)
+ return -1;
+ else
+ size = strlen(expected) + 1;
+
+ buf = malloc(size);
+ if (!buf)
+ return -1;
+
+ if (cg_read(cgroup, control, buf, size)) {
+ free(buf);
+ return -1;
+ }
+
+ ret = strcmp(expected, buf);
+ free(buf);
+ return ret;
+}
+
+int cg_read_strstr(const char *cgroup, const char *control, const char *needle)
+{
+ char buf[PAGE_SIZE];
+
+ if (cg_read(cgroup, control, buf, sizeof(buf)))
+ return -1;
+
+ return strstr(buf, needle) ? 0 : -1;
+}
+
+long cg_read_long(const char *cgroup, const char *control)
+{
+ char buf[128];
+
+ if (cg_read(cgroup, control, buf, sizeof(buf)))
+ return -1;
+
+ return atol(buf);
+}
+
+long cg_read_key_long(const char *cgroup, const char *control, const char *key)
+{
+ char buf[PAGE_SIZE];
+ char *ptr;
+
+ if (cg_read(cgroup, control, buf, sizeof(buf)))
+ return -1;
+
+ ptr = strstr(buf, key);
+ if (!ptr)
+ return -1;
+
+ return atol(ptr + strlen(key));
+}
+
+int cg_write(const char *cgroup, const char *control, char *buf)
+{
+ char path[PATH_MAX];
+ ssize_t len = strlen(buf);
+
+ snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+
+ if (write_text(path, buf, len) == len)
+ return 0;
+
+ return -1;
+}
+
+int cg_find_unified_root(char *root, size_t len)
+{
+ char buf[10 * PAGE_SIZE];
+ char *fs, *mount, *type;
+ const char delim[] = "\n\t ";
+
+ if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
+ return -1;
+
+ /*
+ * Example:
+ * cgroup /sys/fs/cgroup cgroup2 rw,seclabel,noexec,relatime 0 0
+ */
+ for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
+ mount = strtok(NULL, delim);
+ type = strtok(NULL, delim);
+ strtok(NULL, delim);
+ strtok(NULL, delim);
+ strtok(NULL, delim);
+
+ if (strcmp(type, "cgroup2") == 0) {
+ strncpy(root, mount, len);
+ return 0;
+ }
+ }
+
+ return -1;
+}
+
+int cg_create(const char *cgroup)
+{
+ return mkdir(cgroup, 0755);
+}
+
+static int cg_killall(const char *cgroup)
+{
+ char buf[PAGE_SIZE];
+ char *ptr = buf;
+
+ if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
+ return -1;
+
+ while (ptr < buf + sizeof(buf)) {
+ int pid = strtol(ptr, &ptr, 10);
+
+ if (pid == 0)
+ break;
+ if (*ptr)
+ ptr++;
+ else
+ break;
+ if (kill(pid, SIGKILL))
+ return -1;
+ }
+
+ return 0;
+}
+
+int cg_destroy(const char *cgroup)
+{
+ int ret;
+
+retry:
+ ret = rmdir(cgroup);
+ if (ret && errno == EBUSY) {
+ ret = cg_killall(cgroup);
+ if (ret)
+ return ret;
+ usleep(100);
+ goto retry;
+ }
+
+ if (ret && errno == ENOENT)
+ ret = 0;
+
+ return ret;
+}
+
+int cg_enter_current(const char *cgroup)
+{
+ char pidbuf[64];
+
+ snprintf(pidbuf, sizeof(pidbuf), "%d", getpid());
+ return cg_write(cgroup, "cgroup.procs", pidbuf);
+}
+
+int cg_run(const char *cgroup,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg)
+{
+ int pid, retcode;
+
+ pid = fork();
+ if (pid < 0) {
+ return pid;
+ } else if (pid == 0) {
+ char buf[64];
+
+ snprintf(buf, sizeof(buf), "%d", getpid());
+ if (cg_write(cgroup, "cgroup.procs", buf))
+ exit(EXIT_FAILURE);
+ exit(fn(cgroup, arg));
+ } else {
+ waitpid(pid, &retcode, 0);
+ if (WIFEXITED(retcode))
+ return WEXITSTATUS(retcode);
+ else
+ return -1;
+ }
+}
+
+int cg_run_nowait(const char *cgroup,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg)
+{
+ int pid;
+
+ pid = fork();
+ if (pid == 0) {
+ char buf[64];
+
+ snprintf(buf, sizeof(buf), "%d", getpid());
+ if (cg_write(cgroup, "cgroup.procs", buf))
+ exit(EXIT_FAILURE);
+ exit(fn(cgroup, arg));
+ }
+
+ return pid;
+}
+
+int get_temp_fd(void)
+{
+ return open(".", O_TMPFILE | O_RDWR | O_EXCL);
+}
+
+int alloc_pagecache(int fd, size_t size)
+{
+ char buf[PAGE_SIZE];
+ struct stat st;
+ int i;
+
+ if (fstat(fd, &st))
+ goto cleanup;
+
+ size += st.st_size;
+
+ if (ftruncate(fd, size))
+ goto cleanup;
+
+ for (i = 0; i < size; i += sizeof(buf))
+ read(fd, buf, sizeof(buf));
+
+ return 0;
+
+cleanup:
+ return -1;
+}
+
+int alloc_anon(const char *cgroup, void *arg)
+{
+ size_t size = (unsigned long)arg;
+ char *buf, *ptr;
+
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
+
+ free(buf);
+ return 0;
+}
+
+int is_swap_enabled(void)
+{
+ char buf[PAGE_SIZE];
+ const char delim[] = "\n";
+ int cnt = 0;
+ char *line;
+
+ if (read_text("/proc/swaps", buf, sizeof(buf)) <= 0)
+ return -1;
+
+ for (line = strtok(buf, delim); line; line = strtok(NULL, delim))
+ cnt++;
+
+ return cnt > 1;
+}
+
+int set_oom_adj_score(int pid, int score)
+{
+ char path[PATH_MAX];
+ int fd, len;
+
+ sprintf(path, "/proc/%d/oom_score_adj", pid);
+
+ fd = open(path, O_WRONLY | O_APPEND);
+ if (fd < 0)
+ return fd;
+
+ len = dprintf(fd, "%d", score);
+ if (len < 0) {
+ close(fd);
+ return len;
+ }
+
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
new file mode 100644
index 000000000..9ac8b7958
--- /dev/null
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
+
+#define PAGE_SIZE 4096
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define MB(x) (x << 20)
+
+/*
+ * Checks if two given values differ by less than err% of their sum.
+ */
+static inline int values_close(long a, long b, int err)
+{
+ return abs(a - b) <= (a + b) / 100 * err;
+}
+
+extern int cg_find_unified_root(char *root, size_t len);
+extern char *cg_name(const char *root, const char *name);
+extern char *cg_name_indexed(const char *root, const char *name, int index);
+extern int cg_create(const char *cgroup);
+extern int cg_destroy(const char *cgroup);
+extern int cg_read(const char *cgroup, const char *control,
+ char *buf, size_t len);
+extern int cg_read_strcmp(const char *cgroup, const char *control,
+ const char *expected);
+extern int cg_read_strstr(const char *cgroup, const char *control,
+ const char *needle);
+extern long cg_read_long(const char *cgroup, const char *control);
+long cg_read_key_long(const char *cgroup, const char *control, const char *key);
+extern int cg_write(const char *cgroup, const char *control, char *buf);
+extern int cg_run(const char *cgroup,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg);
+extern int cg_enter_current(const char *cgroup);
+extern int cg_run_nowait(const char *cgroup,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg);
+extern int get_temp_fd(void);
+extern int alloc_pagecache(int fd, size_t size);
+extern int alloc_anon(const char *cgroup, void *arg);
+extern int is_swap_enabled(void);
+extern int set_oom_adj_score(int pid, int score);
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
new file mode 100644
index 000000000..599234c5e
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -0,0 +1,567 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+#include <linux/limits.h>
+#include <linux/sched.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+/*
+ * A(0) - B(0) - C(1)
+ * \ D(0)
+ *
+ * A, B and C's "populated" fields would be 1 while D's 0.
+ * test that after the one process in C is moved to root,
+ * A,B and C's "populated" fields would flip to "0" and file
+ * modified events will be generated on the
+ * "cgroup.events" files of both cgroups.
+ */
+static int test_cgcore_populated(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_c = NULL, *cg_test_d = NULL;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_a/cg_test_b");
+ cg_test_c = cg_name(root, "cg_test_a/cg_test_b/cg_test_c");
+ cg_test_d = cg_name(root, "cg_test_a/cg_test_b/cg_test_d");
+
+ if (!cg_test_a || !cg_test_b || !cg_test_c || !cg_test_d)
+ goto cleanup;
+
+ if (cg_create(cg_test_a))
+ goto cleanup;
+
+ if (cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_create(cg_test_c))
+ goto cleanup;
+
+ if (cg_create(cg_test_d))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_c))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 1\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 1\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 1\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
+ goto cleanup;
+
+ if (cg_enter_current(root))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_a, "cgroup.events", "populated 0\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_b, "cgroup.events", "populated 0\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_c, "cgroup.events", "populated 0\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (cg_test_d)
+ cg_destroy(cg_test_d);
+ if (cg_test_c)
+ cg_destroy(cg_test_c);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_d);
+ free(cg_test_c);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
+/*
+ * A (domain threaded) - B (threaded) - C (domain)
+ *
+ * test that C can't be used until it is turned into a
+ * threaded cgroup. "cgroup.type" file will report "domain (invalid)" in
+ * these cases. Operations which fail due to invalid topology use
+ * EOPNOTSUPP as the errno.
+ */
+static int test_cgcore_invalid_domain(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *grandparent = NULL, *parent = NULL, *child = NULL;
+
+ grandparent = cg_name(root, "cg_test_grandparent");
+ parent = cg_name(root, "cg_test_grandparent/cg_test_parent");
+ child = cg_name(root, "cg_test_grandparent/cg_test_parent/cg_test_child");
+ if (!parent || !child || !grandparent)
+ goto cleanup;
+
+ if (cg_create(grandparent))
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.type", "threaded"))
+ goto cleanup;
+
+ if (cg_read_strcmp(child, "cgroup.type", "domain invalid\n"))
+ goto cleanup;
+
+ if (!cg_enter_current(child))
+ goto cleanup;
+
+ if (errno != EOPNOTSUPP)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ if (grandparent)
+ cg_destroy(grandparent);
+ free(child);
+ free(parent);
+ free(grandparent);
+ return ret;
+}
+
+/*
+ * Test that when a child becomes threaded
+ * the parent type becomes domain threaded.
+ */
+static int test_cgcore_parent_becomes_threaded(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent = NULL, *child = NULL;
+
+ parent = cg_name(root, "cg_test_parent");
+ child = cg_name(root, "cg_test_parent/cg_test_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(child, "cgroup.type", "threaded"))
+ goto cleanup;
+
+ if (cg_read_strcmp(parent, "cgroup.type", "domain threaded\n"))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+ return ret;
+
+}
+
+/*
+ * Test that there's no internal process constrain on threaded cgroups.
+ * You can add threads/processes on a parent with a controller enabled.
+ */
+static int test_cgcore_no_internal_process_constraint_on_threads(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent = NULL, *child = NULL;
+
+ if (cg_read_strstr(root, "cgroup.controllers", "cpu") ||
+ cg_write(root, "cgroup.subtree_control", "+cpu")) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ parent = cg_name(root, "cg_test_parent");
+ child = cg_name(root, "cg_test_parent/cg_test_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.type", "threaded"))
+ goto cleanup;
+
+ if (cg_write(child, "cgroup.type", "threaded"))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+ goto cleanup;
+
+ if (cg_enter_current(parent))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ cg_enter_current(root);
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+ return ret;
+}
+
+/*
+ * Test that you can't enable a controller on a child if it's not enabled
+ * on the parent.
+ */
+static int test_cgcore_top_down_constraint_enable(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent = NULL, *child = NULL;
+
+ parent = cg_name(root, "cg_test_parent");
+ child = cg_name(root, "cg_test_parent/cg_test_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (!cg_write(child, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+ return ret;
+}
+
+/*
+ * Test that you can't disable a controller on a parent
+ * if it's enabled in a child.
+ */
+static int test_cgcore_top_down_constraint_disable(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent = NULL, *child = NULL;
+
+ parent = cg_name(root, "cg_test_parent");
+ child = cg_name(root, "cg_test_parent/cg_test_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_write(child, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (!cg_write(parent, "cgroup.subtree_control", "-memory"))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+ return ret;
+}
+
+/*
+ * Test internal process constraint.
+ * You can't add a pid to a domain parent if a controller is enabled.
+ */
+static int test_cgcore_internal_process_constraint(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent = NULL, *child = NULL;
+
+ parent = cg_name(root, "cg_test_parent");
+ child = cg_name(root, "cg_test_parent/cg_test_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (!cg_enter_current(parent))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+ return ret;
+}
+
+/*
+ * cgroup migration permission check should be performed based on the
+ * credentials at the time of open instead of write.
+ */
+static int test_cgcore_lesser_euid_open(const char *root)
+{
+ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+ int cg_test_b_procs_fd = -1;
+ uid_t saved_uid;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_b");
+
+ if (!cg_test_a || !cg_test_b)
+ goto cleanup;
+
+ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+ if (!cg_test_a_procs || !cg_test_b_procs)
+ goto cleanup;
+
+ if (cg_create(cg_test_a) || cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_a))
+ goto cleanup;
+
+ if (chown(cg_test_a_procs, test_euid, -1) ||
+ chown(cg_test_b_procs, test_euid, -1))
+ goto cleanup;
+
+ saved_uid = geteuid();
+ if (seteuid(test_euid))
+ goto cleanup;
+
+ cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR);
+
+ if (seteuid(saved_uid))
+ goto cleanup;
+
+ if (cg_test_b_procs_fd < 0)
+ goto cleanup;
+
+ if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_test_b_procs_fd >= 0)
+ close(cg_test_b_procs_fd);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_b_procs);
+ free(cg_test_a_procs);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
+struct lesser_ns_open_thread_arg {
+ const char *path;
+ int fd;
+ int err;
+};
+
+static int lesser_ns_open_thread_fn(void *arg)
+{
+ struct lesser_ns_open_thread_arg *targ = arg;
+
+ targ->fd = open(targ->path, O_RDWR);
+ targ->err = errno;
+ return 0;
+}
+
+/*
+ * cgroup migration permission check should be performed based on the cgroup
+ * namespace at the time of open instead of write.
+ */
+static int test_cgcore_lesser_ns_open(const char *root)
+{
+ static char stack[65536];
+ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+ int cg_test_b_procs_fd = -1;
+ struct lesser_ns_open_thread_arg targ = { .fd = -1 };
+ pid_t pid;
+ int status;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_b");
+
+ if (!cg_test_a || !cg_test_b)
+ goto cleanup;
+
+ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+ if (!cg_test_a_procs || !cg_test_b_procs)
+ goto cleanup;
+
+ if (cg_create(cg_test_a) || cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_b))
+ goto cleanup;
+
+ if (chown(cg_test_a_procs, test_euid, -1) ||
+ chown(cg_test_b_procs, test_euid, -1))
+ goto cleanup;
+
+ targ.path = cg_test_b_procs;
+ pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack),
+ CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD,
+ &targ);
+ if (pid < 0)
+ goto cleanup;
+
+ if (waitpid(pid, &status, 0) < 0)
+ goto cleanup;
+
+ if (!WIFEXITED(status))
+ goto cleanup;
+
+ cg_test_b_procs_fd = targ.fd;
+ if (cg_test_b_procs_fd < 0)
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_a))
+ goto cleanup;
+
+ if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_test_b_procs_fd >= 0)
+ close(cg_test_b_procs_fd);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_b_procs);
+ free(cg_test_a_procs);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct corecg_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_cgcore_internal_process_constraint),
+ T(test_cgcore_top_down_constraint_enable),
+ T(test_cgcore_top_down_constraint_disable),
+ T(test_cgcore_no_internal_process_constraint_on_threads),
+ T(test_cgcore_parent_becomes_threaded),
+ T(test_cgcore_invalid_domain),
+ T(test_cgcore_populated),
+ T(test_cgcore_lesser_euid_open),
+ T(test_cgcore_lesser_ns_open),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ ksft_exit_skip("Failed to set memory controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
new file mode 100644
index 000000000..c19a97dd0
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -0,0 +1,1228 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <linux/oom.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <netdb.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+/*
+ * This test creates two nested cgroups with and without enabling
+ * the memory controller.
+ */
+static int test_memcg_subtree_control(const char *root)
+{
+ char *parent, *child, *parent2 = NULL, *child2 = NULL;
+ int ret = KSFT_FAIL;
+ char buf[PAGE_SIZE];
+
+ /* Create two nested cgroups with the memory controller enabled */
+ parent = cg_name(root, "memcg_test_0");
+ child = cg_name(root, "memcg_test_0/memcg_test_1");
+ if (!parent || !child)
+ goto cleanup_free;
+
+ if (cg_create(parent))
+ goto cleanup_free;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup_parent;
+
+ if (cg_create(child))
+ goto cleanup_parent;
+
+ if (cg_read_strstr(child, "cgroup.controllers", "memory"))
+ goto cleanup_child;
+
+ /* Create two nested cgroups without enabling memory controller */
+ parent2 = cg_name(root, "memcg_test_1");
+ child2 = cg_name(root, "memcg_test_1/memcg_test_1");
+ if (!parent2 || !child2)
+ goto cleanup_free2;
+
+ if (cg_create(parent2))
+ goto cleanup_free2;
+
+ if (cg_create(child2))
+ goto cleanup_parent2;
+
+ if (cg_read(child2, "cgroup.controllers", buf, sizeof(buf)))
+ goto cleanup_all;
+
+ if (!cg_read_strstr(child2, "cgroup.controllers", "memory"))
+ goto cleanup_all;
+
+ ret = KSFT_PASS;
+
+cleanup_all:
+ cg_destroy(child2);
+cleanup_parent2:
+ cg_destroy(parent2);
+cleanup_free2:
+ free(parent2);
+ free(child2);
+cleanup_child:
+ cg_destroy(child);
+cleanup_parent:
+ cg_destroy(parent);
+cleanup_free:
+ free(parent);
+ free(child);
+
+ return ret;
+}
+
+static int alloc_anon_50M_check(const char *cgroup, void *arg)
+{
+ size_t size = MB(50);
+ char *buf, *ptr;
+ long anon, current;
+ int ret = -1;
+
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
+
+ current = cg_read_long(cgroup, "memory.current");
+ if (current < size)
+ goto cleanup;
+
+ if (!values_close(size, current, 3))
+ goto cleanup;
+
+ anon = cg_read_key_long(cgroup, "memory.stat", "anon ");
+ if (anon < 0)
+ goto cleanup;
+
+ if (!values_close(anon, current, 3))
+ goto cleanup;
+
+ ret = 0;
+cleanup:
+ free(buf);
+ return ret;
+}
+
+static int alloc_pagecache_50M_check(const char *cgroup, void *arg)
+{
+ size_t size = MB(50);
+ int ret = -1;
+ long current, file;
+ int fd;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ return -1;
+
+ if (alloc_pagecache(fd, size))
+ goto cleanup;
+
+ current = cg_read_long(cgroup, "memory.current");
+ if (current < size)
+ goto cleanup;
+
+ file = cg_read_key_long(cgroup, "memory.stat", "file ");
+ if (file < 0)
+ goto cleanup;
+
+ if (!values_close(file, current, 10))
+ goto cleanup;
+
+ ret = 0;
+
+cleanup:
+ close(fd);
+ return ret;
+}
+
+/*
+ * This test create a memory cgroup, allocates
+ * some anonymous memory and some pagecache
+ * and check memory.current and some memory.stat values.
+ */
+static int test_memcg_current(const char *root)
+{
+ int ret = KSFT_FAIL;
+ long current;
+ char *memcg;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ current = cg_read_long(memcg, "memory.current");
+ if (current != 0)
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_anon_50M_check, NULL))
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_pagecache_50M_check, NULL))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+static int alloc_pagecache_50M(const char *cgroup, void *arg)
+{
+ int fd = (long)arg;
+
+ return alloc_pagecache(fd, MB(50));
+}
+
+static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
+{
+ int fd = (long)arg;
+ int ppid = getppid();
+
+ if (alloc_pagecache(fd, MB(50)))
+ return -1;
+
+ while (getppid() == ppid)
+ sleep(1);
+
+ return 0;
+}
+
+static int alloc_anon_noexit(const char *cgroup, void *arg)
+{
+ int ppid = getppid();
+
+ if (alloc_anon(cgroup, arg))
+ return -1;
+
+ while (getppid() == ppid)
+ sleep(1);
+
+ return 0;
+}
+
+/*
+ * Wait until processes are killed asynchronously by the OOM killer
+ * If we exceed a timeout, fail.
+ */
+static int cg_test_proc_killed(const char *cgroup)
+{
+ int limit;
+
+ for (limit = 10; limit > 0; limit--) {
+ if (cg_read_strcmp(cgroup, "cgroup.procs", "") == 0)
+ return 0;
+
+ usleep(100000);
+ }
+ return -1;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A memory.min = 50M, memory.max = 200M
+ * A/B memory.min = 50M, memory.current = 50M
+ * A/B/C memory.min = 75M, memory.current = 50M
+ * A/B/D memory.min = 25M, memory.current = 50M
+ * A/B/E memory.min = 500M, memory.current = 0
+ * A/B/F memory.min = 0, memory.current = 50M
+ *
+ * Usages are pagecache, but the test keeps a running
+ * process in every leaf cgroup.
+ * Then it creates A/G and creates a significant
+ * memory pressure in it.
+ *
+ * A/B memory.current ~= 50M
+ * A/B/C memory.current ~= 33M
+ * A/B/D memory.current ~= 17M
+ * A/B/E memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is
+ * unprotected memory in A available, and checks
+ * checks that memory.min protects pagecache even
+ * in this case.
+ */
+static int test_memcg_min(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent[3] = {NULL};
+ char *children[4] = {NULL};
+ long c[4];
+ int i, attempts;
+ int fd;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ goto cleanup;
+
+ parent[0] = cg_name(root, "memcg_test_0");
+ if (!parent[0])
+ goto cleanup;
+
+ parent[1] = cg_name(parent[0], "memcg_test_1");
+ if (!parent[1])
+ goto cleanup;
+
+ parent[2] = cg_name(parent[0], "memcg_test_2");
+ if (!parent[2])
+ goto cleanup;
+
+ if (cg_create(parent[0]))
+ goto cleanup;
+
+ if (cg_read_long(parent[0], "memory.min")) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "memory.max", "200M"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_create(parent[1]))
+ goto cleanup;
+
+ if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_create(parent[2]))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ children[i] = cg_name_indexed(parent[1], "child_memcg", i);
+ if (!children[i])
+ goto cleanup;
+
+ if (cg_create(children[i]))
+ goto cleanup;
+
+ if (i == 2)
+ continue;
+
+ cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
+ (void *)(long)fd);
+ }
+
+ if (cg_write(parent[0], "memory.min", "50M"))
+ goto cleanup;
+ if (cg_write(parent[1], "memory.min", "50M"))
+ goto cleanup;
+ if (cg_write(children[0], "memory.min", "75M"))
+ goto cleanup;
+ if (cg_write(children[1], "memory.min", "25M"))
+ goto cleanup;
+ if (cg_write(children[2], "memory.min", "500M"))
+ goto cleanup;
+ if (cg_write(children[3], "memory.min", "0"))
+ goto cleanup;
+
+ attempts = 0;
+ while (!values_close(cg_read_long(parent[1], "memory.current"),
+ MB(150), 3)) {
+ if (attempts++ > 5)
+ break;
+ sleep(1);
+ }
+
+ if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
+ goto cleanup;
+
+ if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++)
+ c[i] = cg_read_long(children[i], "memory.current");
+
+ if (!values_close(c[0], MB(33), 10))
+ goto cleanup;
+
+ if (!values_close(c[1], MB(17), 10))
+ goto cleanup;
+
+ if (!values_close(c[2], 0, 1))
+ goto cleanup;
+
+ if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
+ goto cleanup;
+
+ if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
+ if (!children[i])
+ continue;
+
+ cg_destroy(children[i]);
+ free(children[i]);
+ }
+
+ for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
+ if (!parent[i])
+ continue;
+
+ cg_destroy(parent[i]);
+ free(parent[i]);
+ }
+ close(fd);
+ return ret;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A memory.low = 50M, memory.max = 200M
+ * A/B memory.low = 50M, memory.current = 50M
+ * A/B/C memory.low = 75M, memory.current = 50M
+ * A/B/D memory.low = 25M, memory.current = 50M
+ * A/B/E memory.low = 500M, memory.current = 0
+ * A/B/F memory.low = 0, memory.current = 50M
+ *
+ * Usages are pagecache.
+ * Then it creates A/G an creates a significant
+ * memory pressure in it.
+ *
+ * Then it checks actual memory usages and expects that:
+ * A/B memory.current ~= 50M
+ * A/B/ memory.current ~= 33M
+ * A/B/D memory.current ~= 17M
+ * A/B/E memory.current ~= 0
+ *
+ * After that it tries to allocate more than there is
+ * unprotected memory in A available,
+ * and checks low and oom events in memory.events.
+ */
+static int test_memcg_low(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent[3] = {NULL};
+ char *children[4] = {NULL};
+ long low, oom;
+ long c[4];
+ int i;
+ int fd;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ goto cleanup;
+
+ parent[0] = cg_name(root, "memcg_test_0");
+ if (!parent[0])
+ goto cleanup;
+
+ parent[1] = cg_name(parent[0], "memcg_test_1");
+ if (!parent[1])
+ goto cleanup;
+
+ parent[2] = cg_name(parent[0], "memcg_test_2");
+ if (!parent[2])
+ goto cleanup;
+
+ if (cg_create(parent[0]))
+ goto cleanup;
+
+ if (cg_read_long(parent[0], "memory.low"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "memory.max", "200M"))
+ goto cleanup;
+
+ if (cg_write(parent[0], "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_create(parent[1]))
+ goto cleanup;
+
+ if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_create(parent[2]))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ children[i] = cg_name_indexed(parent[1], "child_memcg", i);
+ if (!children[i])
+ goto cleanup;
+
+ if (cg_create(children[i]))
+ goto cleanup;
+
+ if (i == 2)
+ continue;
+
+ if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
+ goto cleanup;
+ }
+
+ if (cg_write(parent[0], "memory.low", "50M"))
+ goto cleanup;
+ if (cg_write(parent[1], "memory.low", "50M"))
+ goto cleanup;
+ if (cg_write(children[0], "memory.low", "75M"))
+ goto cleanup;
+ if (cg_write(children[1], "memory.low", "25M"))
+ goto cleanup;
+ if (cg_write(children[2], "memory.low", "500M"))
+ goto cleanup;
+ if (cg_write(children[3], "memory.low", "0"))
+ goto cleanup;
+
+ if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
+ goto cleanup;
+
+ if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++)
+ c[i] = cg_read_long(children[i], "memory.current");
+
+ if (!values_close(c[0], MB(33), 10))
+ goto cleanup;
+
+ if (!values_close(c[1], MB(17), 10))
+ goto cleanup;
+
+ if (!values_close(c[2], 0, 1))
+ goto cleanup;
+
+ if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
+ fprintf(stderr,
+ "memory.low prevents from allocating anon memory\n");
+ goto cleanup;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ oom = cg_read_key_long(children[i], "memory.events", "oom ");
+ low = cg_read_key_long(children[i], "memory.events", "low ");
+
+ if (oom)
+ goto cleanup;
+ if (i < 2 && low <= 0)
+ goto cleanup;
+ if (i >= 2 && low)
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
+ if (!children[i])
+ continue;
+
+ cg_destroy(children[i]);
+ free(children[i]);
+ }
+
+ for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
+ if (!parent[i])
+ continue;
+
+ cg_destroy(parent[i]);
+ free(parent[i]);
+ }
+ close(fd);
+ return ret;
+}
+
+static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
+{
+ size_t size = MB(50);
+ int ret = -1;
+ long current;
+ int fd;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ return -1;
+
+ if (alloc_pagecache(fd, size))
+ goto cleanup;
+
+ current = cg_read_long(cgroup, "memory.current");
+ if (current <= MB(29) || current > MB(30))
+ goto cleanup;
+
+ ret = 0;
+
+cleanup:
+ close(fd);
+ return ret;
+
+}
+
+/*
+ * This test checks that memory.high limits the amount of
+ * memory which can be consumed by either anonymous memory
+ * or pagecache.
+ */
+static int test_memcg_high(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+ long high;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_read_strcmp(memcg, "memory.high", "max\n"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.high", "30M"))
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
+ goto cleanup;
+
+ high = cg_read_key_long(memcg, "memory.events", "high ");
+ if (high <= 0)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+/*
+ * This test checks that memory.max limits the amount of
+ * memory which can be consumed by either anonymous memory
+ * or pagecache.
+ */
+static int test_memcg_max(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+ long current, max;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_read_strcmp(memcg, "memory.max", "max\n"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "30M"))
+ goto cleanup;
+
+ /* Should be killed by OOM killer */
+ if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_pagecache_max_30M, NULL))
+ goto cleanup;
+
+ current = cg_read_long(memcg, "memory.current");
+ if (current > MB(30) || !current)
+ goto cleanup;
+
+ max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (max <= 0)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
+{
+ long mem_max = (long)arg;
+ size_t size = MB(50);
+ char *buf, *ptr;
+ long mem_current, swap_current;
+ int ret = -1;
+
+ buf = malloc(size);
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
+
+ mem_current = cg_read_long(cgroup, "memory.current");
+ if (!mem_current || !values_close(mem_current, mem_max, 3))
+ goto cleanup;
+
+ swap_current = cg_read_long(cgroup, "memory.swap.current");
+ if (!swap_current ||
+ !values_close(mem_current + swap_current, size, 3))
+ goto cleanup;
+
+ ret = 0;
+cleanup:
+ free(buf);
+ return ret;
+}
+
+/*
+ * This test checks that memory.swap.max limits the amount of
+ * anonymous memory which can be swapped out.
+ */
+static int test_memcg_swap_max(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+ long max;
+
+ if (!is_swap_enabled())
+ return KSFT_SKIP;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_read_long(memcg, "memory.swap.current")) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (cg_read_strcmp(memcg, "memory.max", "max\n"))
+ goto cleanup;
+
+ if (cg_read_strcmp(memcg, "memory.swap.max", "max\n"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "30M"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "30M"))
+ goto cleanup;
+
+ /* Should be killed by OOM killer */
+ if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
+ goto cleanup;
+
+ if (cg_run(memcg, alloc_anon_50M_check_swap, (void *)MB(30)))
+ goto cleanup;
+
+ max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (max <= 0)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM. Then it checks for oom and oom_kill events in
+ * memory.events.
+ */
+static int test_memcg_oom_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "30M"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_read_strcmp(memcg, "cgroup.procs", ""))
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom ") != 1)
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 1)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+struct tcp_server_args {
+ unsigned short port;
+ int ctl[2];
+};
+
+static int tcp_server(const char *cgroup, void *arg)
+{
+ struct tcp_server_args *srv_args = arg;
+ struct sockaddr_in6 saddr = { 0 };
+ socklen_t slen = sizeof(saddr);
+ int sk, client_sk, ctl_fd, yes = 1, ret = -1;
+
+ close(srv_args->ctl[0]);
+ ctl_fd = srv_args->ctl[1];
+
+ saddr.sin6_family = AF_INET6;
+ saddr.sin6_addr = in6addr_any;
+ saddr.sin6_port = htons(srv_args->port);
+
+ sk = socket(AF_INET6, SOCK_STREAM, 0);
+ if (sk < 0)
+ return ret;
+
+ if (setsockopt(sk, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0)
+ goto cleanup;
+
+ if (bind(sk, (struct sockaddr *)&saddr, slen)) {
+ write(ctl_fd, &errno, sizeof(errno));
+ goto cleanup;
+ }
+
+ if (listen(sk, 1))
+ goto cleanup;
+
+ ret = 0;
+ if (write(ctl_fd, &ret, sizeof(ret)) != sizeof(ret)) {
+ ret = -1;
+ goto cleanup;
+ }
+
+ client_sk = accept(sk, NULL, NULL);
+ if (client_sk < 0)
+ goto cleanup;
+
+ ret = -1;
+ for (;;) {
+ uint8_t buf[0x100000];
+
+ if (write(client_sk, buf, sizeof(buf)) <= 0) {
+ if (errno == ECONNRESET)
+ ret = 0;
+ break;
+ }
+ }
+
+ close(client_sk);
+
+cleanup:
+ close(sk);
+ return ret;
+}
+
+static int tcp_client(const char *cgroup, unsigned short port)
+{
+ const char server[] = "localhost";
+ struct addrinfo *ai;
+ char servport[6];
+ int retries = 0x10; /* nice round number */
+ int sk, ret;
+
+ snprintf(servport, sizeof(servport), "%hd", port);
+ ret = getaddrinfo(server, servport, NULL, &ai);
+ if (ret)
+ return ret;
+
+ sk = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+ if (sk < 0)
+ goto free_ainfo;
+
+ ret = connect(sk, ai->ai_addr, ai->ai_addrlen);
+ if (ret < 0)
+ goto close_sk;
+
+ ret = KSFT_FAIL;
+ while (retries--) {
+ uint8_t buf[0x100000];
+ long current, sock;
+
+ if (read(sk, buf, sizeof(buf)) <= 0)
+ goto close_sk;
+
+ current = cg_read_long(cgroup, "memory.current");
+ sock = cg_read_key_long(cgroup, "memory.stat", "sock ");
+
+ if (current < 0 || sock < 0)
+ goto close_sk;
+
+ if (current < sock)
+ goto close_sk;
+
+ if (values_close(current, sock, 10)) {
+ ret = KSFT_PASS;
+ break;
+ }
+ }
+
+close_sk:
+ close(sk);
+free_ainfo:
+ freeaddrinfo(ai);
+ return ret;
+}
+
+/*
+ * This test checks socket memory accounting.
+ * The test forks a TCP server listens on a random port between 1000
+ * and 61000. Once it gets a client connection, it starts writing to
+ * its socket.
+ * The TCP client interleaves reads from the socket with check whether
+ * memory.current and memory.stat.sock are similar.
+ */
+static int test_memcg_sock(const char *root)
+{
+ int bind_retries = 5, ret = KSFT_FAIL, pid, err;
+ unsigned short port;
+ char *memcg;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ while (bind_retries--) {
+ struct tcp_server_args args;
+
+ if (pipe(args.ctl))
+ goto cleanup;
+
+ port = args.port = 1000 + rand() % 60000;
+
+ pid = cg_run_nowait(memcg, tcp_server, &args);
+ if (pid < 0)
+ goto cleanup;
+
+ close(args.ctl[1]);
+ if (read(args.ctl[0], &err, sizeof(err)) != sizeof(err))
+ goto cleanup;
+ close(args.ctl[0]);
+
+ if (!err)
+ break;
+ if (err != EADDRINUSE)
+ goto cleanup;
+
+ waitpid(pid, NULL, 0);
+ }
+
+ if (err == EADDRINUSE) {
+ ret = KSFT_SKIP;
+ goto cleanup;
+ }
+
+ if (tcp_client(memcg, port) != KSFT_PASS)
+ goto cleanup;
+
+ waitpid(pid, &err, 0);
+ if (WEXITSTATUS(err))
+ goto cleanup;
+
+ if (cg_read_long(memcg, "memory.current") < 0)
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.stat", "sock "))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes in the leaf (but not the parent) were killed.
+ */
+static int test_memcg_oom_group_leaf_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent, *child;
+
+ parent = cg_name(root, "memcg_test_0");
+ child = cg_name(root, "memcg_test_0/memcg_test_1");
+
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_write(child, "memory.max", "50M"))
+ goto cleanup;
+
+ if (cg_write(child, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(child, "memory.oom.group", "1"))
+ goto cleanup;
+
+ cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+ if (!cg_run(child, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_test_proc_killed(child))
+ goto cleanup;
+
+ if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
+ goto cleanup;
+
+ if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes in the parent and leaf were killed.
+ */
+static int test_memcg_oom_group_parent_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent, *child;
+
+ parent = cg_name(root, "memcg_test_0");
+ child = cg_name(root, "memcg_test_0/memcg_test_1");
+
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "memory.max", "80M"))
+ goto cleanup;
+
+ if (cg_write(parent, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(parent, "memory.oom.group", "1"))
+ goto cleanup;
+
+ cg_run_nowait(parent, alloc_anon_noexit, (void *) MB(60));
+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+ cg_run_nowait(child, alloc_anon_noexit, (void *) MB(1));
+
+ if (!cg_run(child, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_test_proc_killed(child))
+ goto cleanup;
+ if (cg_test_proc_killed(parent))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (child)
+ cg_destroy(child);
+ if (parent)
+ cg_destroy(parent);
+ free(child);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * This test disables swapping and tries to allocate anonymous memory
+ * up to OOM with memory.group.oom set. Then it checks that all
+ * processes were killed except those set with OOM_SCORE_ADJ_MIN
+ */
+static int test_memcg_oom_group_score_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *memcg;
+ int safe_pid;
+
+ memcg = cg_name(root, "memcg_test_0");
+
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "50M"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.oom.group", "1"))
+ goto cleanup;
+
+ safe_pid = cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
+ if (set_oom_adj_score(safe_pid, OOM_SCORE_ADJ_MIN))
+ goto cleanup;
+
+ cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(1));
+ if (!cg_run(memcg, alloc_anon, (void *)MB(100)))
+ goto cleanup;
+
+ if (cg_read_key_long(memcg, "memory.events", "oom_kill ") != 3)
+ goto cleanup;
+
+ if (kill(safe_pid, SIGKILL))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (memcg)
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
+
+#define T(x) { x, #x }
+struct memcg_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_memcg_subtree_control),
+ T(test_memcg_current),
+ T(test_memcg_min),
+ T(test_memcg_low),
+ T(test_memcg_high),
+ T(test_memcg_max),
+ T(test_memcg_oom_events),
+ T(test_memcg_swap_max),
+ T(test_memcg_sock),
+ T(test_memcg_oom_group_leaf_events),
+ T(test_memcg_oom_group_parent_events),
+ T(test_memcg_oom_group_score_events),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ /*
+ * Check that memory controller is available:
+ * memory is listed in cgroup.controllers
+ */
+ if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+ ksft_exit_skip("memory controller isn't available\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ ksft_exit_skip("Failed to set memory controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile
new file mode 100644
index 000000000..d8be047ee
--- /dev/null
+++ b/tools/testing/selftests/cpu-hotplug/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := cpu-on-off-test.sh
+
+include ../lib.mk
+
+run_full_test:
+ @/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
+
+clean:
diff --git a/tools/testing/selftests/cpu-hotplug/config b/tools/testing/selftests/cpu-hotplug/config
new file mode 100644
index 000000000..d4aca2ad5
--- /dev/null
+++ b/tools/testing/selftests/cpu-hotplug/config
@@ -0,0 +1 @@
+CONFIG_NOTIFIER_ERROR_INJECTION=y
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
new file mode 100755
index 000000000..0d26b5e3f
--- /dev/null
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -0,0 +1,293 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+SYSFS=
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+prerequisite()
+{
+ msg="skip all tests:"
+
+ if [ $UID != 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+
+ taskset -p 01 $$
+
+ SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+ if [ ! -d "$SYSFS" ]; then
+ echo $msg sysfs is not mounted >&2
+ exit $ksft_skip
+ fi
+
+ if ! ls $SYSFS/devices/system/cpu/cpu* > /dev/null 2>&1; then
+ echo $msg cpu hotplug is not supported >&2
+ exit $ksft_skip
+ fi
+
+ echo "CPU online/offline summary:"
+ online_cpus=`cat $SYSFS/devices/system/cpu/online`
+ online_max=${online_cpus##*-}
+
+ if [[ "$online_cpus" = "$online_max" ]]; then
+ echo "$msg: since there is only one cpu: $online_cpus"
+ exit $ksft_skip
+ fi
+
+ present_cpus=`cat $SYSFS/devices/system/cpu/present`
+ present_max=${present_cpus##*-}
+ echo "present_cpus = $present_cpus present_max = $present_max"
+
+ echo -e "\t Cpus in online state: $online_cpus"
+
+ offline_cpus=`cat $SYSFS/devices/system/cpu/offline`
+ if [[ "a$offline_cpus" = "a" ]]; then
+ offline_cpus=0
+ else
+ offline_max=${offline_cpus##*-}
+ fi
+ echo -e "\t Cpus in offline state: $offline_cpus"
+}
+
+#
+# list all hot-pluggable CPUs
+#
+hotpluggable_cpus()
+{
+ local state=${1:-.\*}
+
+ for cpu in $SYSFS/devices/system/cpu/cpu*; do
+ if [ -f $cpu/online ] && grep -q $state $cpu/online; then
+ echo ${cpu##/*/cpu}
+ fi
+ done
+}
+
+hotplaggable_offline_cpus()
+{
+ hotpluggable_cpus 0
+}
+
+hotpluggable_online_cpus()
+{
+ hotpluggable_cpus 1
+}
+
+cpu_is_online()
+{
+ grep -q 1 $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+cpu_is_offline()
+{
+ grep -q 0 $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+online_cpu()
+{
+ echo 1 > $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+offline_cpu()
+{
+ echo 0 > $SYSFS/devices/system/cpu/cpu$1/online
+}
+
+online_cpu_expect_success()
+{
+ local cpu=$1
+
+ if ! online_cpu $cpu; then
+ echo $FUNCNAME $cpu: unexpected fail >&2
+ exit 1
+ elif ! cpu_is_online $cpu; then
+ echo $FUNCNAME $cpu: unexpected offline >&2
+ exit 1
+ fi
+}
+
+online_cpu_expect_fail()
+{
+ local cpu=$1
+
+ if online_cpu $cpu 2> /dev/null; then
+ echo $FUNCNAME $cpu: unexpected success >&2
+ exit 1
+ elif ! cpu_is_offline $cpu; then
+ echo $FUNCNAME $cpu: unexpected online >&2
+ exit 1
+ fi
+}
+
+offline_cpu_expect_success()
+{
+ local cpu=$1
+
+ if ! offline_cpu $cpu; then
+ echo $FUNCNAME $cpu: unexpected fail >&2
+ exit 1
+ elif ! cpu_is_offline $cpu; then
+ echo $FUNCNAME $cpu: unexpected offline >&2
+ exit 1
+ fi
+}
+
+offline_cpu_expect_fail()
+{
+ local cpu=$1
+
+ if offline_cpu $cpu 2> /dev/null; then
+ echo $FUNCNAME $cpu: unexpected success >&2
+ exit 1
+ elif ! cpu_is_online $cpu; then
+ echo $FUNCNAME $cpu: unexpected offline >&2
+ exit 1
+ fi
+}
+
+error=-12
+allcpus=0
+priority=0
+online_cpus=0
+online_max=0
+offline_cpus=0
+offline_max=0
+present_cpus=0
+present_max=0
+
+while getopts e:ahp: opt; do
+ case $opt in
+ e)
+ error=$OPTARG
+ ;;
+ a)
+ allcpus=1
+ ;;
+ h)
+ echo "Usage $0 [ -a ] [ -e errno ] [ -p notifier-priority ]"
+ echo -e "\t default offline one cpu"
+ echo -e "\t run with -a option to offline all cpus"
+ exit
+ ;;
+ p)
+ priority=$OPTARG
+ ;;
+ esac
+done
+
+if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
+ echo "error code must be -4095 <= errno < 0" >&2
+ exit 1
+fi
+
+prerequisite
+
+#
+# Safe test (default) - offline and online one cpu
+#
+if [ $allcpus -eq 0 ]; then
+ echo "Limited scope test: one hotplug cpu"
+ echo -e "\t (leaves cpu in the original state):"
+ echo -e "\t online to offline to online: cpu $online_max"
+ offline_cpu_expect_success $online_max
+ online_cpu_expect_success $online_max
+
+ if [[ $offline_cpus -gt 0 ]]; then
+ echo -e "\t offline to online to offline: cpu $present_max"
+ online_cpu_expect_success $present_max
+ offline_cpu_expect_success $present_max
+ online_cpu $present_max
+ fi
+ exit 0
+else
+ echo "Full scope test: all hotplug cpus"
+ echo -e "\t online all offline cpus"
+ echo -e "\t offline all online cpus"
+ echo -e "\t online all offline cpus"
+fi
+
+#
+# Online all hot-pluggable CPUs
+#
+for cpu in `hotplaggable_offline_cpus`; do
+ online_cpu_expect_success $cpu
+done
+
+#
+# Offline all hot-pluggable CPUs
+#
+for cpu in `hotpluggable_online_cpus`; do
+ offline_cpu_expect_success $cpu
+done
+
+#
+# Online all hot-pluggable CPUs again
+#
+for cpu in `hotplaggable_offline_cpus`; do
+ online_cpu_expect_success $cpu
+done
+
+#
+# Test with cpu notifier error injection
+#
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/cpu
+
+prerequisite_extra()
+{
+ msg="skip extra tests:"
+
+ /sbin/modprobe -q -r cpu-notifier-error-inject
+ /sbin/modprobe -q cpu-notifier-error-inject priority=$priority
+
+ if [ ! -d "$DEBUGFS" ]; then
+ echo $msg debugfs is not mounted >&2
+ exit $ksft_skip
+ fi
+
+ if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
+ echo $msg cpu-notifier-error-inject module is not available >&2
+ exit $ksft_skip
+ fi
+}
+
+prerequisite_extra
+
+#
+# Offline all hot-pluggable CPUs
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+for cpu in `hotpluggable_online_cpus`; do
+ offline_cpu_expect_success $cpu
+done
+
+#
+# Test CPU hot-add error handling (offline => online)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
+for cpu in `hotplaggable_offline_cpus`; do
+ online_cpu_expect_fail $cpu
+done
+
+#
+# Online all hot-pluggable CPUs
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
+for cpu in `hotplaggable_offline_cpus`; do
+ online_cpu_expect_success $cpu
+done
+
+#
+# Test CPU hot-remove error handling (online => offline)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+for cpu in `hotpluggable_online_cpus`; do
+ offline_cpu_expect_fail $cpu
+done
+
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
+/sbin/modprobe -q -r cpu-notifier-error-inject
diff --git a/tools/testing/selftests/cpufreq/Makefile b/tools/testing/selftests/cpufreq/Makefile
new file mode 100644
index 000000000..c86ca8342
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := main.sh
+TEST_FILES := cpu.sh cpufreq.sh governor.sh module.sh special-tests.sh
+
+include ../lib.mk
+
+clean:
diff --git a/tools/testing/selftests/cpufreq/config b/tools/testing/selftests/cpufreq/config
new file mode 100644
index 000000000..27ff72ebd
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/config
@@ -0,0 +1,15 @@
+CONFIG_CPU_FREQ=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
+CONFIG_DEBUG_RT_MUTEXES=y
+CONFIG_DEBUG_PI_LIST=y
+CONFIG_DEBUG_SPINLOCK=y
+CONFIG_DEBUG_MUTEXES=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_LOCKDEP=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
diff --git a/tools/testing/selftests/cpufreq/cpu.sh b/tools/testing/selftests/cpufreq/cpu.sh
new file mode 100755
index 000000000..39fdcdfb8
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/cpu.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# CPU helpers
+
+# protect against multiple inclusion
+if [ $FILE_CPU ]; then
+ return 0
+else
+ FILE_CPU=DONE
+fi
+
+source cpufreq.sh
+
+for_each_cpu()
+{
+ cpus=$(ls $CPUROOT | grep "cpu[0-9].*")
+ for cpu in $cpus; do
+ $@ $cpu
+ done
+}
+
+for_each_non_boot_cpu()
+{
+ cpus=$(ls $CPUROOT | grep "cpu[1-9].*")
+ for cpu in $cpus; do
+ $@ $cpu
+ done
+}
+
+#$1: cpu
+offline_cpu()
+{
+ printf "Offline $1\n"
+ echo 0 > $CPUROOT/$1/online
+}
+
+#$1: cpu
+online_cpu()
+{
+ printf "Online $1\n"
+ echo 1 > $CPUROOT/$1/online
+}
+
+#$1: cpu
+reboot_cpu()
+{
+ offline_cpu $1
+ online_cpu $1
+}
+
+# Reboot CPUs
+# param: number of times we want to run the loop
+reboot_cpus()
+{
+ printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+ for i in `seq 1 $1`; do
+ for_each_non_boot_cpu offline_cpu
+ for_each_non_boot_cpu online_cpu
+ printf "\n"
+ done
+
+ printf "\n%s\n\n" "------------------------------------------------"
+}
+
+# Prints warning for all CPUs with missing cpufreq directory
+print_unmanaged_cpus()
+{
+ for_each_cpu cpu_should_have_cpufreq_directory
+}
+
+# Counts CPUs with cpufreq directories
+count_cpufreq_managed_cpus()
+{
+ count=0;
+
+ for cpu in `ls $CPUROOT | grep "cpu[0-9].*"`; do
+ if [ -d $CPUROOT/$cpu/cpufreq ]; then
+ let count=count+1;
+ fi
+ done
+
+ echo $count;
+}
diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
new file mode 100755
index 000000000..b583a2fb4
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/cpufreq.sh
@@ -0,0 +1,242 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# protect against multiple inclusion
+if [ $FILE_CPUFREQ ]; then
+ return 0
+else
+ FILE_CPUFREQ=DONE
+fi
+
+source cpu.sh
+
+
+# $1: cpu
+cpu_should_have_cpufreq_directory()
+{
+ if [ ! -d $CPUROOT/$1/cpufreq ]; then
+ printf "Warning: No cpufreq directory present for $1\n"
+ fi
+}
+
+cpu_should_not_have_cpufreq_directory()
+{
+ if [ -d $CPUROOT/$1/cpufreq ]; then
+ printf "Warning: cpufreq directory present for $1\n"
+ fi
+}
+
+for_each_policy()
+{
+ policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+ for policy in $policies; do
+ $@ $policy
+ done
+}
+
+for_each_policy_concurrent()
+{
+ policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+ for policy in $policies; do
+ $@ $policy &
+ done
+}
+
+# $1: Path
+read_cpufreq_files_in_dir()
+{
+ local files=`ls $1`
+
+ printf "Printing directory: $1\n\n"
+
+ for file in $files; do
+ if [ -f $1/$file ]; then
+ printf "$file:"
+ cat $1/$file
+ else
+ printf "\n"
+ read_cpufreq_files_in_dir "$1/$file"
+ fi
+ done
+ printf "\n"
+}
+
+
+read_all_cpufreq_files()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+ read_cpufreq_files_in_dir $CPUFREQROOT
+
+ printf "%s\n\n" "------------------------------------------------"
+}
+
+
+# UPDATE CPUFREQ FILES
+
+# $1: directory path
+update_cpufreq_files_in_dir()
+{
+ local files=`ls $1`
+
+ printf "Updating directory: $1\n\n"
+
+ for file in $files; do
+ if [ -f $1/$file ]; then
+ # file is writable ?
+ local wfile=$(ls -l $1/$file | awk '$1 ~ /^.*w.*/ { print $NF; }')
+
+ if [ ! -z $wfile ]; then
+ # scaling_setspeed is a special file and we
+ # should skip updating it
+ if [ $file != "scaling_setspeed" ]; then
+ local val=$(cat $1/$file)
+ printf "Writing $val to: $file\n"
+ echo $val > $1/$file
+ fi
+ fi
+ else
+ printf "\n"
+ update_cpufreq_files_in_dir "$1/$file"
+ fi
+ done
+
+ printf "\n"
+}
+
+# Update all writable files with their existing values
+update_all_cpufreq_files()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+ update_cpufreq_files_in_dir $CPUFREQROOT
+
+ printf "%s\n\n" "------------------------------------------------"
+}
+
+
+# CHANGE CPU FREQUENCIES
+
+# $1: policy
+find_current_freq()
+{
+ cat $CPUFREQROOT/$1/scaling_cur_freq
+}
+
+# $1: policy
+# $2: frequency
+set_cpu_frequency()
+{
+ printf "Change frequency for $1 to $2\n"
+ echo $2 > $CPUFREQROOT/$1/scaling_setspeed
+}
+
+# $1: policy
+test_all_frequencies()
+{
+ local filepath="$CPUFREQROOT/$1"
+
+ backup_governor $1
+
+ local found=$(switch_governor $1 "userspace")
+ if [ $found = 1 ]; then
+ printf "${FUNCNAME[0]}: userspace governor not available for: $1\n"
+ return;
+ fi
+
+ printf "Switched governor for $1 to userspace\n\n"
+
+ local freqs=$(cat $filepath/scaling_available_frequencies)
+ printf "Available frequencies for $1: $freqs\n\n"
+
+ # Set all frequencies one-by-one
+ for freq in $freqs; do
+ set_cpu_frequency $1 $freq
+ done
+
+ printf "\n"
+
+ restore_governor $1
+}
+
+# $1: loop count
+shuffle_frequency_for_all_cpus()
+{
+ printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+ for i in `seq 1 $1`; do
+ for_each_policy test_all_frequencies
+ done
+ printf "\n%s\n\n" "------------------------------------------------"
+}
+
+# Basic cpufreq tests
+cpufreq_basic_tests()
+{
+ printf "*** RUNNING CPUFREQ SANITY TESTS ***\n"
+ printf "====================================\n\n"
+
+ count=$(count_cpufreq_managed_cpus)
+ if [ $count = 0 ]; then
+ printf "No cpu is managed by cpufreq core, exiting\n"
+ exit;
+ else
+ printf "CPUFreq manages: $count CPUs\n\n"
+ fi
+
+ # Detect & print which CPUs are not managed by cpufreq
+ print_unmanaged_cpus
+
+ # read/update all cpufreq files
+ read_all_cpufreq_files
+ update_all_cpufreq_files
+
+ # hotplug cpus
+ reboot_cpus 5
+
+ # Test all frequencies
+ shuffle_frequency_for_all_cpus 2
+
+ # Test all governors
+ shuffle_governors_for_all_cpus 1
+}
+
+# Suspend/resume
+# $1: "suspend" or "hibernate", $2: loop count
+do_suspend()
+{
+ printf "** Test: Running ${FUNCNAME[0]}: Trying $1 for $2 loops **\n\n"
+
+ # Is the directory available
+ if [ ! -d $SYSFS/power/ -o ! -f $SYSFS/power/state ]; then
+ printf "$SYSFS/power/state not available\n"
+ return 1
+ fi
+
+ if [ $1 = "suspend" ]; then
+ filename="mem"
+ elif [ $1 = "hibernate" ]; then
+ filename="disk"
+ else
+ printf "$1 is not a valid option\n"
+ return 1
+ fi
+
+ if [ -n $filename ]; then
+ present=$(cat $SYSFS/power/state | grep $filename)
+
+ if [ -z "$present" ]; then
+ printf "Tried to $1 but $filename isn't present in $SYSFS/power/state\n"
+ return 1;
+ fi
+
+ for i in `seq 1 $2`; do
+ printf "Starting $1\n"
+ echo $filename > $SYSFS/power/state
+ printf "Came out of $1\n"
+
+ printf "Do basic tests after finishing $1 to verify cpufreq state\n\n"
+ cpufreq_basic_tests
+ done
+ fi
+}
diff --git a/tools/testing/selftests/cpufreq/governor.sh b/tools/testing/selftests/cpufreq/governor.sh
new file mode 100755
index 000000000..fe37df79c
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/governor.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test governors
+
+# protect against multiple inclusion
+if [ $FILE_GOVERNOR ]; then
+ return 0
+else
+ FILE_GOVERNOR=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+
+CUR_GOV=
+CUR_FREQ=
+
+# Find governor's directory path
+# $1: policy, $2: governor
+find_gov_directory()
+{
+ if [ -d $CPUFREQROOT/$2 ]; then
+ printf "$CPUFREQROOT/$2\n"
+ elif [ -d $CPUFREQROOT/$1/$2 ]; then
+ printf "$CPUFREQROOT/$1/$2\n"
+ else
+ printf "INVALID\n"
+ fi
+}
+
+# $1: policy
+find_current_governor()
+{
+ cat $CPUFREQROOT/$1/scaling_governor
+}
+
+# $1: policy
+backup_governor()
+{
+ CUR_GOV=$(find_current_governor $1)
+
+ printf "Governor backup done for $1: $CUR_GOV\n"
+
+ if [ $CUR_GOV == "userspace" ]; then
+ CUR_FREQ=$(find_current_freq $1)
+ printf "Governor frequency backup done for $1: $CUR_FREQ\n"
+ fi
+
+ printf "\n"
+}
+
+# $1: policy
+restore_governor()
+{
+ __switch_governor $1 $CUR_GOV
+
+ printf "Governor restored for $1 to $CUR_GOV\n"
+
+ if [ $CUR_GOV == "userspace" ]; then
+ set_cpu_frequency $1 $CUR_FREQ
+ printf "Governor frequency restored for $1: $CUR_FREQ\n"
+ fi
+
+ printf "\n"
+}
+
+# param:
+# $1: policy, $2: governor
+__switch_governor()
+{
+ echo $2 > $CPUFREQROOT/$1/scaling_governor
+}
+
+# param:
+# $1: cpu, $2: governor
+__switch_governor_for_cpu()
+{
+ echo $2 > $CPUROOT/$1/cpufreq/scaling_governor
+}
+
+# SWITCH GOVERNORS
+
+# $1: cpu, $2: governor
+switch_governor()
+{
+ local filepath=$CPUFREQROOT/$1/scaling_available_governors
+
+ # check if governor is available
+ local found=$(cat $filepath | grep $2 | wc -l)
+ if [ $found = 0 ]; then
+ echo 1;
+ return
+ fi
+
+ __switch_governor $1 $2
+ echo 0;
+}
+
+# $1: policy, $2: governor
+switch_show_governor()
+{
+ cur_gov=find_current_governor
+ if [ $cur_gov == "userspace" ]; then
+ cur_freq=find_current_freq
+ fi
+
+ # switch governor
+ __switch_governor $1 $2
+
+ printf "\nSwitched governor for $1 to $2\n\n"
+
+ if [ $2 == "userspace" -o $2 == "powersave" -o $2 == "performance" ]; then
+ printf "No files to read for $2 governor\n\n"
+ return
+ fi
+
+ # show governor files
+ local govpath=$(find_gov_directory $1 $2)
+ read_cpufreq_files_in_dir $govpath
+}
+
+# $1: function to be called, $2: policy
+call_for_each_governor()
+{
+ local filepath=$CPUFREQROOT/$2/scaling_available_governors
+
+ # Exit if cpu isn't managed by cpufreq core
+ if [ ! -f $filepath ]; then
+ return;
+ fi
+
+ backup_governor $2
+
+ local governors=$(cat $filepath)
+ printf "Available governors for $2: $governors\n"
+
+ for governor in $governors; do
+ $1 $2 $governor
+ done
+
+ restore_governor $2
+}
+
+# $1: loop count
+shuffle_governors_for_all_cpus()
+{
+ printf "** Test: Running ${FUNCNAME[0]} for $1 loops **\n\n"
+
+ for i in `seq 1 $1`; do
+ for_each_policy call_for_each_governor switch_show_governor
+ done
+ printf "%s\n\n" "------------------------------------------------"
+}
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
new file mode 100755
index 000000000..31f8c9a76
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+source module.sh
+source special-tests.sh
+
+FUNC=basic # do basic tests by default
+OUTFILE=cpufreq_selftest
+SYSFS=
+CPUROOT=
+CPUFREQROOT=
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+helpme()
+{
+ printf "Usage: $0 [-h] [-todg args]
+ [-h <help>]
+ [-o <output-file-for-dump>]
+ [-t <basic: Basic cpufreq testing
+ suspend: suspend/resume,
+ hibernate: hibernate/resume,
+ modtest: test driver or governor modules. Only to be used with -d or -g options,
+ sptest1: Simple governor switch to produce lockdep.
+ sptest2: Concurrent governor switch to produce lockdep.
+ sptest3: Governor races, shuffle between governors quickly.
+ sptest4: CPU hotplugs with updates to cpufreq files.>]
+ [-d <driver's module name: only with \"-t modtest>\"]
+ [-g <governor's module name: only with \"-t modtest>\"]
+ \n"
+ exit 2
+}
+
+prerequisite()
+{
+ msg="skip all tests:"
+
+ if [ $UID != 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+
+ taskset -p 01 $$
+
+ SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+ if [ ! -d "$SYSFS" ]; then
+ echo $msg sysfs is not mounted >&2
+ exit 2
+ fi
+
+ CPUROOT=$SYSFS/devices/system/cpu
+ CPUFREQROOT="$CPUROOT/cpufreq"
+
+ if ! ls $CPUROOT/cpu* > /dev/null 2>&1; then
+ echo $msg cpus not available in sysfs >&2
+ exit 2
+ fi
+
+ if ! ls $CPUROOT/cpufreq > /dev/null 2>&1; then
+ echo $msg cpufreq directory not available in sysfs >&2
+ exit 2
+ fi
+}
+
+parse_arguments()
+{
+ while getopts ht:o:d:g: arg
+ do
+ case $arg in
+ h) # --help
+ helpme
+ ;;
+
+ t) # --func_type (Function to perform: basic, suspend, hibernate, modtest, sptest1/2/3/4 (default: basic))
+ FUNC=$OPTARG
+ ;;
+
+ o) # --output-file (Output file to store dumps)
+ OUTFILE=$OPTARG
+ ;;
+
+ d) # --driver-mod-name (Name of the driver module)
+ DRIVER_MOD=$OPTARG
+ ;;
+
+ g) # --governor-mod-name (Name of the governor module)
+ GOVERNOR_MOD=$OPTARG
+ ;;
+
+ \?)
+ helpme
+ ;;
+ esac
+ done
+}
+
+do_test()
+{
+ # Check if CPUs are managed by cpufreq or not
+ count=$(count_cpufreq_managed_cpus)
+
+ if [ $count = 0 -a $FUNC != "modtest" ]; then
+ echo "No cpu is managed by cpufreq core, exiting"
+ exit 2;
+ fi
+
+ case "$FUNC" in
+ "basic")
+ cpufreq_basic_tests
+ ;;
+
+ "suspend")
+ do_suspend "suspend" 1
+ ;;
+
+ "hibernate")
+ do_suspend "hibernate" 1
+ ;;
+
+ "modtest")
+ # Do we have modules in place?
+ if [ -z $DRIVER_MOD ] && [ -z $GOVERNOR_MOD ]; then
+ echo "No driver or governor module passed with -d or -g"
+ exit 2;
+ fi
+
+ if [ $DRIVER_MOD ]; then
+ if [ $GOVERNOR_MOD ]; then
+ module_test $DRIVER_MOD $GOVERNOR_MOD
+ else
+ module_driver_test $DRIVER_MOD
+ fi
+ else
+ if [ $count = 0 ]; then
+ echo "No cpu is managed by cpufreq core, exiting"
+ exit 2;
+ fi
+
+ module_governor_test $GOVERNOR_MOD
+ fi
+ ;;
+
+ "sptest1")
+ simple_lockdep
+ ;;
+
+ "sptest2")
+ concurrent_lockdep
+ ;;
+
+ "sptest3")
+ governor_race
+ ;;
+
+ "sptest4")
+ hotplug_with_updates
+ ;;
+
+ *)
+ echo "Invalid [-f] function type"
+ helpme
+ ;;
+ esac
+}
+
+# clear dumps
+# $1: file name
+clear_dumps()
+{
+ echo "" > $1.txt
+ echo "" > $1.dmesg_cpufreq.txt
+ echo "" > $1.dmesg_full.txt
+}
+
+# $1: output file name
+dmesg_dumps()
+{
+ dmesg | grep cpufreq >> $1.dmesg_cpufreq.txt
+
+ # We may need the full logs as well
+ dmesg >> $1.dmesg_full.txt
+}
+
+# Parse arguments
+parse_arguments $@
+
+# Make sure all requirements are met
+prerequisite
+
+# Run requested functions
+clear_dumps $OUTFILE
+do_test >> $OUTFILE.txt
+dmesg_dumps $OUTFILE
diff --git a/tools/testing/selftests/cpufreq/module.sh b/tools/testing/selftests/cpufreq/module.sh
new file mode 100755
index 000000000..22563cd12
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/module.sh
@@ -0,0 +1,244 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Modules specific tests cases
+
+# protect against multiple inclusion
+if [ $FILE_MODULE ]; then
+ return 0
+else
+ FILE_MODULE=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+
+# Check basic insmod/rmmod
+# $1: module
+test_basic_insmod_rmmod()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+ printf "Inserting $1 module\n"
+ # insert module
+ insmod $1
+ if [ $? != 0 ]; then
+ printf "Insmod $1 failed\n"
+ exit;
+ fi
+
+ printf "Removing $1 module\n"
+ # remove module
+ rmmod $1
+ if [ $? != 0 ]; then
+ printf "rmmod $1 failed\n"
+ exit;
+ fi
+
+ printf "\n"
+}
+
+# Insert cpufreq driver module and perform basic tests
+# $1: cpufreq-driver module to insert
+# $2: If we want to play with CPUs (1) or not (0)
+module_driver_test_single()
+{
+ printf "** Test: Running ${FUNCNAME[0]} for driver $1 and cpus_hotplug=$2 **\n\n"
+
+ if [ $2 -eq 1 ]; then
+ # offline all non-boot CPUs
+ for_each_non_boot_cpu offline_cpu
+ printf "\n"
+ fi
+
+ # insert module
+ printf "Inserting $1 module\n\n"
+ insmod $1
+ if [ $? != 0 ]; then
+ printf "Insmod $1 failed\n"
+ return;
+ fi
+
+ if [ $2 -eq 1 ]; then
+ # online all non-boot CPUs
+ for_each_non_boot_cpu online_cpu
+ printf "\n"
+ fi
+
+ # run basic tests
+ cpufreq_basic_tests
+
+ # remove module
+ printf "Removing $1 module\n\n"
+ rmmod $1
+ if [ $? != 0 ]; then
+ printf "rmmod $1 failed\n"
+ return;
+ fi
+
+ # There shouldn't be any cpufreq directories now.
+ for_each_cpu cpu_should_not_have_cpufreq_directory
+ printf "\n"
+}
+
+# $1: cpufreq-driver module to insert
+module_driver_test()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+ # check if module is present or not
+ ls $1 > /dev/null
+ if [ $? != 0 ]; then
+ printf "$1: not present in `pwd` folder\n"
+ return;
+ fi
+
+ # test basic module tests
+ test_basic_insmod_rmmod $1
+
+ # Do simple module test
+ module_driver_test_single $1 0
+
+ # Remove CPUs before inserting module and then bring them back
+ module_driver_test_single $1 1
+ printf "\n"
+}
+
+# find governor name based on governor module name
+# $1: governor module name
+find_gov_name()
+{
+ if [ $1 = "cpufreq_ondemand.ko" ]; then
+ printf "ondemand"
+ elif [ $1 = "cpufreq_conservative.ko" ]; then
+ printf "conservative"
+ elif [ $1 = "cpufreq_userspace.ko" ]; then
+ printf "userspace"
+ elif [ $1 = "cpufreq_performance.ko" ]; then
+ printf "performance"
+ elif [ $1 = "cpufreq_powersave.ko" ]; then
+ printf "powersave"
+ elif [ $1 = "cpufreq_schedutil.ko" ]; then
+ printf "schedutil"
+ fi
+}
+
+# $1: governor string, $2: governor module, $3: policy
+# example: module_governor_test_single "ondemand" "cpufreq_ondemand.ko" 2
+module_governor_test_single()
+{
+ printf "** Test: Running ${FUNCNAME[0]} for $3 **\n\n"
+
+ backup_governor $3
+
+ # switch to new governor
+ printf "Switch from $CUR_GOV to $1\n"
+ switch_show_governor $3 $1
+
+ # try removing module, it should fail as governor is used
+ printf "Removing $2 module\n\n"
+ rmmod $2
+ if [ $? = 0 ]; then
+ printf "WARN: rmmod $2 succeeded even if governor is used\n"
+ insmod $2
+ else
+ printf "Pass: unable to remove $2 while it is being used\n\n"
+ fi
+
+ # switch back to old governor
+ printf "Switchback to $CUR_GOV from $1\n"
+ restore_governor $3
+ printf "\n"
+}
+
+# Insert cpufreq governor module and perform basic tests
+# $1: cpufreq-governor module to insert
+module_governor_test()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+ # check if module is present or not
+ ls $1 > /dev/null
+ if [ $? != 0 ]; then
+ printf "$1: not present in `pwd` folder\n"
+ return;
+ fi
+
+ # test basic module tests
+ test_basic_insmod_rmmod $1
+
+ # insert module
+ printf "Inserting $1 module\n\n"
+ insmod $1
+ if [ $? != 0 ]; then
+ printf "Insmod $1 failed\n"
+ return;
+ fi
+
+ # switch to new governor for each cpu
+ for_each_policy module_governor_test_single $(find_gov_name $1) $1
+
+ # remove module
+ printf "Removing $1 module\n\n"
+ rmmod $1
+ if [ $? != 0 ]; then
+ printf "rmmod $1 failed\n"
+ return;
+ fi
+ printf "\n"
+}
+
+# test modules: driver and governor
+# $1: driver module, $2: governor module
+module_test()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n\n"
+
+ # check if modules are present or not
+ ls $1 $2 > /dev/null
+ if [ $? != 0 ]; then
+ printf "$1 or $2: is not present in `pwd` folder\n"
+ return;
+ fi
+
+ # TEST1: Insert gov after driver
+ # insert driver module
+ printf "Inserting $1 module\n\n"
+ insmod $1
+ if [ $? != 0 ]; then
+ printf "Insmod $1 failed\n"
+ return;
+ fi
+
+ # run governor tests
+ module_governor_test $2
+
+ # remove driver module
+ printf "Removing $1 module\n\n"
+ rmmod $1
+ if [ $? != 0 ]; then
+ printf "rmmod $1 failed\n"
+ return;
+ fi
+
+ # TEST2: Insert driver after governor
+ # insert governor module
+ printf "Inserting $2 module\n\n"
+ insmod $2
+ if [ $? != 0 ]; then
+ printf "Insmod $2 failed\n"
+ return;
+ fi
+
+ # run governor tests
+ module_driver_test $1
+
+ # remove driver module
+ printf "Removing $2 module\n\n"
+ rmmod $2
+ if [ $? != 0 ]; then
+ printf "rmmod $2 failed\n"
+ return;
+ fi
+}
diff --git a/tools/testing/selftests/cpufreq/special-tests.sh b/tools/testing/selftests/cpufreq/special-tests.sh
new file mode 100755
index 000000000..8d40505dc
--- /dev/null
+++ b/tools/testing/selftests/cpufreq/special-tests.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Special test cases reported by people
+
+# Testcase 1: Reported here: http://marc.info/?l=linux-pm&m=140618592709858&w=2
+
+# protect against multiple inclusion
+if [ $FILE_SPECIAL ]; then
+ return 0
+else
+ FILE_SPECIAL=DONE
+fi
+
+source cpu.sh
+source cpufreq.sh
+source governor.sh
+
+# Test 1
+# $1: policy
+__simple_lockdep()
+{
+ # switch to ondemand
+ __switch_governor $1 "ondemand"
+
+ # cat ondemand files
+ local ondir=$(find_gov_directory $1 "ondemand")
+ if [ -z $ondir ]; then
+ printf "${FUNCNAME[0]}Ondemand directory not created, quit"
+ return
+ fi
+
+ cat $ondir/*
+
+ # switch to conservative
+ __switch_governor $1 "conservative"
+}
+
+simple_lockdep()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+ for_each_policy __simple_lockdep
+}
+
+# Test 2
+# $1: policy
+__concurrent_lockdep()
+{
+ for i in `seq 0 100`; do
+ __simple_lockdep $1
+ done
+}
+
+concurrent_lockdep()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+ for_each_policy_concurrent __concurrent_lockdep
+}
+
+# Test 3
+quick_shuffle()
+{
+ # this is called concurrently from governor_race
+ for I in `seq 1000`
+ do
+ echo ondemand | sudo tee $CPUFREQROOT/policy*/scaling_governor &
+ echo userspace | sudo tee $CPUFREQROOT/policy*/scaling_governor &
+ done
+}
+
+governor_race()
+{
+ printf "** Test: Running ${FUNCNAME[0]} **\n"
+
+ # run 8 concurrent instances
+ for I in `seq 8`
+ do
+ quick_shuffle &
+ done
+}
+
+# Test 4
+# $1: cpu
+hotplug_with_updates_cpu()
+{
+ local filepath="$CPUROOT/$1/cpufreq"
+
+ # switch to ondemand
+ __switch_governor_for_cpu $1 "ondemand"
+
+ for i in `seq 1 5000`
+ do
+ reboot_cpu $1
+ done &
+
+ local freqs=$(cat $filepath/scaling_available_frequencies)
+ local oldfreq=$(cat $filepath/scaling_min_freq)
+
+ for j in `seq 1 5000`
+ do
+ # Set all frequencies one-by-one
+ for freq in $freqs; do
+ echo $freq > $filepath/scaling_min_freq
+ done
+ done
+
+ # restore old freq
+ echo $oldfreq > $filepath/scaling_min_freq
+}
+
+hotplug_with_updates()
+{
+ for_each_non_boot_cpu hotplug_with_updates_cpu
+}
diff --git a/tools/testing/selftests/drivers/gpu/drm_mm.sh b/tools/testing/selftests/drivers/gpu/drm_mm.sh
new file mode 100755
index 000000000..b789dc825
--- /dev/null
+++ b/tools/testing/selftests/drivers/gpu/drm_mm.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs API tests for struct drm_mm (DRM range manager)
+
+if ! /sbin/modprobe -n -q test-drm_mm; then
+ echo "drivers/gpu/drm_mm: [skip]"
+ exit 77
+fi
+
+if /sbin/modprobe -q test-drm_mm; then
+ /sbin/modprobe -q -r test-drm_mm
+ echo "drivers/gpu/drm_mm: ok"
+else
+ echo "drivers/gpu/drm_mm: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/gpu/i915.sh b/tools/testing/selftests/drivers/gpu/i915.sh
new file mode 100755
index 000000000..d3895bc71
--- /dev/null
+++ b/tools/testing/selftests/drivers/gpu/i915.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs hardware independent tests for i915 (drivers/gpu/drm/i915)
+
+if ! /sbin/modprobe -q -r i915; then
+ echo "drivers/gpu/i915: [SKIP]"
+ exit 77
+fi
+
+if /sbin/modprobe -q i915 mock_selftests=-1; then
+ /sbin/modprobe -q -r i915
+ echo "drivers/gpu/i915: ok"
+else
+ echo "drivers/gpu/i915: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
new file mode 100755
index 000000000..76f1ab489
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
@@ -0,0 +1,217 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# ../../../net/forwarding/mirror_gre_topo_lib.sh for more details.
+#
+# Test offloading various features of offloading gretap mirrors specific to
+# mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/mirror_lib.sh
+source $lib_dir/mirror_gre_lib.sh
+source $lib_dir/mirror_gre_topo_lib.sh
+
+setup_keyful()
+{
+ tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \
+ ttl 100 tos inherit allow-localremote \
+ key 1234
+
+ tunnel_create h3-gt6-key ip6gretap 2001:db8:3::2 2001:db8:3::1 \
+ key 1234
+ ip link set h3-gt6-key vrf v$h3
+ matchall_sink_create h3-gt6-key
+
+ ip address add dev $swp3 2001:db8:3::1/64
+ ip address add dev $h3 2001:db8:3::2/64
+}
+
+cleanup_keyful()
+{
+ ip address del dev $h3 2001:db8:3::2/64
+ ip address del dev $swp3 2001:db8:3::1/64
+
+ tunnel_destroy h3-gt6-key
+ tunnel_destroy gt6-key
+}
+
+setup_soft()
+{
+ # Set up a topology for testing underlay routes that point at an
+ # unsupported soft device.
+
+ tunnel_create gt6-soft ip6gretap 2001:db8:4::1 2001:db8:4::2 \
+ ttl 100 tos inherit allow-localremote
+
+ tunnel_create h3-gt6-soft ip6gretap 2001:db8:4::2 2001:db8:4::1
+ ip link set h3-gt6-soft vrf v$h3
+ matchall_sink_create h3-gt6-soft
+
+ ip link add name v1 type veth peer name v2
+ ip link set dev v1 up
+ ip address add dev v1 2001:db8:4::1/64
+
+ ip link set dev v2 vrf v$h3
+ ip link set dev v2 up
+ ip address add dev v2 2001:db8:4::2/64
+}
+
+cleanup_soft()
+{
+ ip link del dev v1
+
+ tunnel_destroy h3-gt6-soft
+ tunnel_destroy gt6-soft
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ setup_keyful
+ setup_soft
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ cleanup_soft
+ cleanup_keyful
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_ttl_inherit()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $tundev type $type ttl inherit
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ ip link set dev $tundev type $type ttl 100
+
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: no offload on TTL of inherit ($tcflags)"
+}
+
+test_span_gre_tos_fixed()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $tundev type $type tos 0x10
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ ip link set dev $tundev type $type tos inherit
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: no offload on a fixed TOS ($tcflags)"
+}
+
+test_span_failable()
+{
+ local should_fail=$1; shift
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ if ((should_fail)); then
+ fail_test_span_gre_dir $tundev ingress
+ else
+ quick_test_span_gre_dir $tundev ingress
+ fi
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: should_fail=$should_fail ($tcflags)"
+}
+
+test_failable()
+{
+ local should_fail=$1; shift
+
+ test_span_failable $should_fail gt6-key "mirror to keyful gretap"
+ test_span_failable $should_fail gt6-soft "mirror to gretap w/ soft underlay"
+}
+
+test_sw()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ test_failable 0
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+test_hw()
+{
+ test_failable 1
+
+ test_span_gre_tos_fixed gt4 gretap "mirror to gretap"
+ test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap"
+
+ test_span_gre_ttl_inherit gt4 gretap "mirror to gretap"
+ test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+if ! tc_offload_check; then
+ check_err 1 "Could not test offloaded functionality"
+ log_test "mlxsw-specific tests for mirror to gretap"
+ exit
+fi
+
+tcflags="skip_hw"
+test_sw
+
+tcflags="skip_sw"
+test_hw
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
new file mode 100644
index 000000000..6f3a70df6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
@@ -0,0 +1,197 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# Test offloading a number of mirrors-to-gretap. The test creates a number of
+# tunnels. Then it adds one flower mirror for each of the tunnels, matching a
+# given host IP. Then it generates traffic at each of the host IPs and checks
+# that the traffic has been mirrored at the appropriate tunnel.
+#
+# +--------------------------+ +--------------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 2001:db8:1:X::1/64 | | 2001:db8:1:X::2/64 | |
+# +-----|--------------------+ +--------------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirrors | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | + $swp3 + gt6-<X> (ip6gretap) |
+# | | 2001:db8:2:X::1/64 : loc=2001:db8:2:X::1 |
+# | | : rem=2001:db8:2:X::2 |
+# | | : ttl=100 |
+# | | : tos=inherit |
+# | | : |
+# +-----|--------------------------------:----------------------------------+
+# | :
+# +-----|--------------------------------:----------------------------------+
+# | H3 + $h3 + h3-gt6-<X> (ip6gretap) |
+# | 2001:db8:2:X::2/64 loc=2001:db8:2:X::2 |
+# | rem=2001:db8:2:X::1 |
+# | ttl=100 |
+# | tos=inherit |
+# | |
+# +-------------------------------------------------------------------------+
+
+source ../../../../net/forwarding/mirror_lib.sh
+
+MIRROR_NUM_NETIFS=6
+
+mirror_gre_ipv6_addr()
+{
+ local net=$1; shift
+ local num=$1; shift
+
+ printf "2001:db8:%x:%x" $net $num
+}
+
+mirror_gre_tunnels_create()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ MIRROR_GRE_BATCH_FILE="$(mktemp)"
+ for ((i=0; i < count; ++i)); do
+ local match_dip=$(mirror_gre_ipv6_addr 1 $i)::2
+ local htun=h3-gt6-$i
+ local tun=gt6-$i
+
+ ((mirror_gre_tunnels++))
+
+ ip address add dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+ ip address add dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+
+ ip address add dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+ ip address add dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+
+ tunnel_create $tun ip6gretap \
+ $(mirror_gre_ipv6_addr 2 $i)::1 \
+ $(mirror_gre_ipv6_addr 2 $i)::2 \
+ ttl 100 tos inherit allow-localremote
+
+ tunnel_create $htun ip6gretap \
+ $(mirror_gre_ipv6_addr 2 $i)::2 \
+ $(mirror_gre_ipv6_addr 2 $i)::1
+ ip link set $htun vrf v$h3
+ matchall_sink_create $htun
+
+ cat >> $MIRROR_GRE_BATCH_FILE <<-EOF
+ filter add dev $swp1 ingress pref 1000 \
+ protocol ipv6 \
+ flower $tcflags dst_ip $match_dip \
+ action mirred egress mirror dev $tun
+ EOF
+ done
+
+ tc -b $MIRROR_GRE_BATCH_FILE
+ check_err_fail $should_fail $? "Mirror rule insertion"
+}
+
+mirror_gre_tunnels_destroy()
+{
+ local count=$1; shift
+
+ for ((i=0; i < count; ++i)); do
+ local htun=h3-gt6-$i
+ local tun=gt6-$i
+
+ ip address del dev $h3 $(mirror_gre_ipv6_addr 2 $i)::2/64
+ ip address del dev $swp3 $(mirror_gre_ipv6_addr 2 $i)::1/64
+
+ ip address del dev $h2 $(mirror_gre_ipv6_addr 1 $i)::2/64
+ ip address del dev $h1 $(mirror_gre_ipv6_addr 1 $i)::1/64
+
+ tunnel_destroy $htun
+ tunnel_destroy $tun
+ done
+}
+
+__mirror_gre_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ mirror_gre_tunnels_create $count $should_fail
+ if ((should_fail)); then
+ return
+ fi
+
+ sleep 5
+
+ for ((i = 0; i < count; ++i)); do
+ local dip=$(mirror_gre_ipv6_addr 1 $i)::2
+ local htun=h3-gt6-$i
+ local message
+
+ icmp6_capture_install $htun
+ mirror_test v$h1 "" $dip $htun 100 10
+ icmp6_capture_uninstall $htun
+ done
+}
+
+mirror_gre_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
+ check_err 1 "Could not test offloaded functionality"
+ return
+ fi
+
+ tcflags="skip_sw"
+ __mirror_gre_test $count $should_fail
+}
+
+mirror_gre_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ mirror_gre_tunnels=0
+
+ vrf_prepare
+
+ simple_if_init $h1
+ simple_if_init $h2
+ simple_if_init $h3
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ tc qdisc add dev $swp1 clsact
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ ip link set dev $swp3 up
+}
+
+mirror_gre_cleanup()
+{
+ mirror_gre_tunnels_destroy $mirror_gre_tunnels
+
+ ip link set dev $swp3 down
+
+ ip link set dev $swp2 down
+
+ tc qdisc del dev $swp1 clsact
+ ip link set dev $swp1 down
+
+ ip link del dev br1
+
+ simple_if_fini $h3
+ simple_if_fini $h2
+ simple_if_fini $h1
+
+ vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
new file mode 100755
index 000000000..1ca631d5a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -0,0 +1,189 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization and rewrite. Packets ingress $swp1 with a DSCP
+# tag and are prioritized according to the map at $swp1. They egress $swp2 and
+# the DSCP value is updated to match the map at that interface. The updated DSCP
+# tag is verified at $h2.
+#
+# ICMP responses are produced with the same DSCP tag that arrived at $h2. They
+# go through prioritization at $swp2 and DSCP retagging at $swp1. The tag is
+# verified at $h1--it should match the original tag.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | +-|----------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | | APP=0,5,10 .. 7,5,17 APP=0,5,20 .. 7,5,27 | |
+# | +--------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_dscp
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+h1_create()
+{
+ local dscp;
+
+ simple_if_init $h1 192.0.2.1/28
+ tc qdisc add dev $h1 clsact
+ dscp_capture_install $h1 10
+}
+
+h1_destroy()
+{
+ dscp_capture_uninstall $h1 10
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+ tc qdisc add dev $h2 clsact
+ dscp_capture_install $h2 20
+}
+
+h2_destroy()
+{
+ dscp_capture_uninstall $h2 20
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+dscp_map()
+{
+ local base=$1; shift
+
+ for prio in {0..7}; do
+ echo app=$prio,5,$((base + prio))
+ done
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ lldptool -T -i $swp1 -V APP $(dscp_map 10) >/dev/null
+ lldptool -T -i $swp2 -V APP $(dscp_map 20) >/dev/null
+ lldpad_app_wait_set $swp1
+ lldpad_app_wait_set $swp2
+}
+
+switch_destroy()
+{
+ lldptool -T -i $swp2 -V APP -d $(dscp_map 20) >/dev/null
+ lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
+ lldpad_app_wait_del
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+dscp_ping_test()
+{
+ local vrf_name=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local prio=$1; shift
+ local dev_10=$1; shift
+ local dev_20=$1; shift
+
+ local dscp_10=$(((prio + 10) << 2))
+ local dscp_20=$(((prio + 20) << 2))
+
+ RET=0
+
+ local -A t0s
+ eval "t0s=($(dscp_fetch_stats $dev_10 10)
+ $(dscp_fetch_stats $dev_20 20))"
+
+ ip vrf exec $vrf_name \
+ ${PING} -Q $dscp_10 ${sip:+-I $sip} $dip \
+ -c 10 -i 0.1 -w 2 &> /dev/null
+
+ local -A t1s
+ eval "t1s=($(dscp_fetch_stats $dev_10 10)
+ $(dscp_fetch_stats $dev_20 20))"
+
+ for key in ${!t0s[@]}; do
+ local expect
+ if ((key == prio+10 || key == prio+20)); then
+ expect=10
+ else
+ expect=0
+ fi
+
+ local delta=$((t1s[$key] - t0s[$key]))
+ ((expect == delta))
+ check_err $? "DSCP $key: Expected to capture $expect packets, got $delta."
+ done
+
+ log_test "DSCP rewrite: $dscp_10-(prio $prio)-$dscp_20"
+}
+
+test_dscp()
+{
+ for prio in {0..7}; do
+ dscp_ping_test v$h1 192.0.2.1 192.0.2.2 $prio $h1 $h2
+ done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
new file mode 100755
index 000000000..281d90766
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
@@ -0,0 +1,233 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for DSCP prioritization in the router.
+#
+# With ip_forward_update_priority disabled, the packets are expected to keep
+# their DSCP (which in this test uses only values 0..7) intact as they are
+# forwarded by the switch. That is verified at $h2. ICMP responses are formed
+# with the same DSCP as the requests, and likewise pass through the switch
+# intact, which is verified at $h1.
+#
+# With ip_forward_update_priority enabled, router reprioritizes the packets
+# according to the table in reprioritize(). Thus, say, DSCP 7 maps to priority
+# 4, which on egress maps back to DSCP 4. The response packet then gets
+# reprioritized to 6, getting DSCP 6 on egress.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.18/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | + $swp1 $swp2 + |
+# | 192.0.2.2/28 192.0.2.17/28 |
+# | APP=0,5,0 .. 7,5,7 APP=0,5,0 .. 7,5,7 |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_update
+ test_no_update
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+
+reprioritize()
+{
+ local in=$1; shift
+
+ # This is based on rt_tos2priority in include/net/route.h. Assuming 1:1
+ # mapping between priorities and TOS, it yields a new priority for a
+ # packet with ingress priority of $in.
+ local -a reprio=(0 0 2 2 6 6 4 4)
+
+ echo ${reprio[$in]}
+}
+
+h1_create()
+{
+ local dscp;
+
+ simple_if_init $h1 192.0.2.1/28
+ tc qdisc add dev $h1 clsact
+ dscp_capture_install $h1 0
+ ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ dscp_capture_uninstall $h1 0
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.18/28
+ tc qdisc add dev $h2 clsact
+ dscp_capture_install $h2 0
+ ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+}
+
+h2_destroy()
+{
+ ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ dscp_capture_uninstall $h2 0
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.18/28
+}
+
+dscp_map()
+{
+ local base=$1; shift
+
+ for prio in {0..7}; do
+ echo app=$prio,5,$((base + prio))
+ done
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/28
+ __simple_if_init $swp2 v$swp1 192.0.2.17/28
+
+ lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null
+ lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
+ lldpad_app_wait_set $swp1
+ lldpad_app_wait_set $swp2
+}
+
+switch_destroy()
+{
+ lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
+ lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null
+ lldpad_app_wait_del
+
+ __simple_if_fini $swp2 192.0.2.17/28
+ simple_if_fini $swp1 192.0.2.2/28
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ sysctl_set net.ipv4.ip_forward_update_priority 1
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ sysctl_restore net.ipv4.ip_forward_update_priority
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.18
+}
+
+dscp_ping_test()
+{
+ local vrf_name=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local prio=$1; shift
+ local reprio=$1; shift
+ local dev1=$1; shift
+ local dev2=$1; shift
+
+ local prio2=$($reprio $prio) # ICMP Request egress prio
+ local prio3=$($reprio $prio2) # ICMP Response egress prio
+
+ local dscp=$((prio << 2)) # ICMP Request ingress DSCP
+ local dscp2=$((prio2 << 2)) # ICMP Request egress DSCP
+ local dscp3=$((prio3 << 2)) # ICMP Response egress DSCP
+
+ RET=0
+
+ eval "local -A dev1_t0s=($(dscp_fetch_stats $dev1 0))"
+ eval "local -A dev2_t0s=($(dscp_fetch_stats $dev2 0))"
+
+ ip vrf exec $vrf_name \
+ ${PING} -Q $dscp ${sip:+-I $sip} $dip \
+ -c 10 -i 0.1 -w 2 &> /dev/null
+
+ eval "local -A dev1_t1s=($(dscp_fetch_stats $dev1 0))"
+ eval "local -A dev2_t1s=($(dscp_fetch_stats $dev2 0))"
+
+ for i in {0..7}; do
+ local dscpi=$((i << 2))
+ local expect2=0
+ local expect3=0
+
+ if ((i == prio2)); then
+ expect2=10
+ fi
+ if ((i == prio3)); then
+ expect3=10
+ fi
+
+ local delta=$((dev2_t1s[$i] - dev2_t0s[$i]))
+ ((expect2 == delta))
+ check_err $? "DSCP $dscpi@$dev2: Expected to capture $expect2 packets, got $delta."
+
+ delta=$((dev1_t1s[$i] - dev1_t0s[$i]))
+ ((expect3 == delta))
+ check_err $? "DSCP $dscpi@$dev1: Expected to capture $expect3 packets, got $delta."
+ done
+
+ log_test "DSCP rewrite: $dscp-(prio $prio2)-$dscp2-(prio $prio3)-$dscp3"
+}
+
+__test_update()
+{
+ local update=$1; shift
+ local reprio=$1; shift
+
+ sysctl_restore net.ipv4.ip_forward_update_priority
+ sysctl_set net.ipv4.ip_forward_update_priority $update
+
+ for prio in {0..7}; do
+ dscp_ping_test v$h1 192.0.2.1 192.0.2.18 $prio $reprio $h1 $h2
+ done
+}
+
+test_update()
+{
+ __test_update 1 reprioritize
+}
+
+test_no_update()
+{
+ __test_update 0 echo
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
new file mode 100644
index 000000000..d231649b4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ROUTER_NUM_NETIFS=4
+
+router_h1_create()
+{
+ simple_if_init $h1 192.0.1.1/24
+ ip route add 193.0.0.0/8 via 192.0.1.2 dev $h1
+}
+
+router_h1_destroy()
+{
+ ip route del 193.0.0.0/8 via 192.0.1.2 dev $h1
+ simple_if_fini $h1 192.0.1.1/24
+}
+
+router_h2_create()
+{
+ simple_if_init $h2 192.0.2.1/24
+ tc qdisc add dev $h2 handle ffff: ingress
+}
+
+router_h2_destroy()
+{
+ tc qdisc del dev $h2 handle ffff: ingress
+ simple_if_fini $h2 192.0.2.1/24
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ ip address add 192.0.1.2/24 dev $rp1
+ ip address add 192.0.2.2/24 dev $rp2
+}
+
+router_destroy()
+{
+ ip address del 192.0.2.2/24 dev $rp2
+ ip address del 192.0.1.2/24 dev $rp1
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+router_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ rp1mac=$(mac_get $rp1)
+
+ vrf_prepare
+
+ router_h1_create
+ router_h2_create
+
+ router_create
+}
+
+router_offload_validate()
+{
+ local route_count=$1
+ local offloaded_count
+
+ offloaded_count=$(ip route | grep -o 'offload' | wc -l)
+ [[ $offloaded_count -ge $route_count ]]
+}
+
+router_routes_create()
+{
+ local route_count=$1
+ local count=0
+
+ ROUTE_FILE="$(mktemp)"
+
+ for i in {0..255}
+ do
+ for j in {0..255}
+ do
+ for k in {0..255}
+ do
+ if [[ $count -eq $route_count ]]; then
+ break 3
+ fi
+
+ echo route add 193.${i}.${j}.${k}/32 via \
+ 192.0.2.1 dev $rp2 >> $ROUTE_FILE
+ ((count++))
+ done
+ done
+ done
+
+ ip -b $ROUTE_FILE &> /dev/null
+}
+
+router_routes_destroy()
+{
+ if [[ -v ROUTE_FILE ]]; then
+ rm -f $ROUTE_FILE
+ fi
+}
+
+router_test()
+{
+ local route_count=$1
+ local should_fail=$2
+ local count=0
+
+ RET=0
+
+ router_routes_create $route_count
+
+ router_offload_validate $route_count
+ check_err_fail $should_fail $? "Offload of $route_count routes"
+ if [[ $RET -ne 0 ]] || [[ $should_fail -eq 1 ]]; then
+ return
+ fi
+
+ tc filter add dev $h2 ingress protocol ip pref 1 flower \
+ skip_sw dst_ip 193.0.0.0/8 action drop
+
+ for i in {0..255}
+ do
+ for j in {0..255}
+ do
+ for k in {0..255}
+ do
+ if [[ $count -eq $route_count ]]; then
+ break 3
+ fi
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $rp1mac \
+ -A 192.0.1.1 -B 193.${i}.${j}.${k} \
+ -t ip -q
+ ((count++))
+ done
+ done
+ done
+
+ tc_check_packets "dev $h2 ingress" 1 $route_count
+ check_err $? "Offload mismatch"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 flower \
+ skip_sw dst_ip 193.0.0.0/8 action drop
+
+ router_routes_destroy
+}
+
+router_cleanup()
+{
+ pre_cleanup
+
+ router_routes_destroy
+ router_destroy
+
+ router_h2_destroy
+ router_h1_destroy
+
+ vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
new file mode 100755
index 000000000..3b75180f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the A-TCAM and C-TCAM operation in Spectrum-2.
+# It tries to exercise as many code paths in the eRP state machine as
+# possible.
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="single_mask_test identical_filters_test two_masks_test \
+ multiple_masks_test ctcam_edge_cases_test"
+NUM_NETIFS=2
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+single_mask_test()
+{
+ # When only a single mask is required, the device uses the master
+ # mask and not the eRP table. Verify that under this mode the right
+ # filter is matched
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Single filter - did not match"
+
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 198.51.100.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 2
+ check_err $? "Two filters - did not match highest priority"
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Two filters - did not match lowest priority"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 198.51.100.1 -B 198.51.100.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 2
+ check_err $? "Single filter - did not match after delete"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "single mask test ($tcflags)"
+}
+
+identical_filters_test()
+{
+ # When two filters that only differ in their priority are used,
+ # one needs to be inserted into the C-TCAM. This test verifies
+ # that filters are correctly spilled to C-TCAM and that the right
+ # filter is matched
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match A-TCAM filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match C-TCAM filter after A-TCAM delete"
+
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 2
+ check_err $? "Did not match C-TCAM filter after A-TCAM add"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match A-TCAM filter after C-TCAM delete"
+
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "identical filters test ($tcflags)"
+}
+
+two_masks_test()
+{
+ # When more than one mask is required, the eRP table is used. This
+ # test verifies that the eRP table is correctly allocated and used
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.0.0/16 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Two filters - did not match highest priority"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Single filter - did not match"
+
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Two filters - did not match highest priority after add"
+
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "two masks test ($tcflags)"
+}
+
+multiple_masks_test()
+{
+ # The number of masks in a region is limited. Once the maximum
+ # number of masks has been reached filters that require new
+ # masks are spilled to the C-TCAM. This test verifies that
+ # spillage is performed correctly and that the right filter is
+ # matched
+
+ local index
+
+ RET=0
+
+ NUM_MASKS=32
+ BASE_INDEX=100
+
+ for i in $(eval echo {1..$NUM_MASKS}); do
+ index=$((BASE_INDEX - i))
+
+ tc filter add dev $h2 ingress protocol ip pref $index \
+ handle $index \
+ flower $tcflags dst_ip 192.0.2.2/${i} src_ip 192.0.2.1 \
+ action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \
+ -B 192.0.2.2 -t ip -q
+
+ tc_check_packets "dev $h2 ingress" $index 1
+ check_err $? "$i filters - did not match highest priority (add)"
+ done
+
+ for i in $(eval echo {$NUM_MASKS..1}); do
+ index=$((BASE_INDEX - i))
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 \
+ -B 192.0.2.2 -t ip -q
+
+ tc_check_packets "dev $h2 ingress" $index 2
+ check_err $? "$i filters - did not match highest priority (del)"
+
+ tc filter del dev $h2 ingress protocol ip pref $index \
+ handle $index flower
+ done
+
+ log_test "multiple masks test ($tcflags)"
+}
+
+ctcam_two_atcam_masks_test()
+{
+ RET=0
+
+ # First case: C-TCAM is disabled when there are two A-TCAM masks.
+ # We push a filter into the C-TCAM by using two identical filters
+ # as in identical_filters_test()
+
+ # Filter goes into A-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ # Filter goes into C-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ # Filter goes into A-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match A-TCAM filter"
+
+ # Delete both A-TCAM and C-TCAM filters and make sure the remaining
+ # A-TCAM filter still works
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match A-TCAM filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "ctcam with two atcam masks test ($tcflags)"
+}
+
+ctcam_one_atcam_mask_test()
+{
+ RET=0
+
+ # Second case: C-TCAM is disabled when there is one A-TCAM mask.
+ # The test is similar to identical_filters_test()
+
+ # Filter goes into A-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ # Filter goes into C-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match C-TCAM filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match A-TCAM filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "ctcam with one atcam mask test ($tcflags)"
+}
+
+ctcam_no_atcam_masks_test()
+{
+ RET=0
+
+ # Third case: C-TCAM is disabled when there are no A-TCAM masks
+ # This test exercises the code path that transitions the eRP table
+ # to its initial state after deleting the last C-TCAM mask
+
+ # Filter goes into A-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ # Filter goes into C-TCAM
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "ctcam with no atcam masks test ($tcflags)"
+}
+
+ctcam_edge_cases_test()
+{
+ # When the C-TCAM is disabled after deleting the last C-TCAM
+ # mask, we want to make sure the eRP state machine is put in
+ # the correct state
+
+ ctcam_two_atcam_masks_test
+ ctcam_one_atcam_mask_test
+ ctcam_no_atcam_masks_test
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+if ! tc_offload_check; then
+ check_err 1 "Could not test offloaded functionality"
+ log_test "mlxsw-specific tests for tc flower"
+ exit
+else
+ tcflags="skip_sw"
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
new file mode 100644
index 000000000..73035e250
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
@@ -0,0 +1,119 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source "../../../../net/forwarding/devlink_lib.sh"
+
+if [ "$DEVLINK_VIDDID" != "15b3:cb84" ]; then
+ echo "SKIP: test is tailored for Mellanox Spectrum"
+ exit 1
+fi
+
+# Needed for returning to default
+declare -A KVD_DEFAULTS
+
+KVD_CHILDREN="linear hash_single hash_double"
+KVDL_CHILDREN="singles chunks large_chunks"
+
+devlink_sp_resource_minimize()
+{
+ local size
+ local i
+
+ for i in $KVD_CHILDREN; do
+ size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+ devlink_resource_size_set "$size" kvd "$i"
+ done
+
+ for i in $KVDL_CHILDREN; do
+ size=$(devlink_resource_get kvd linear "$i" | \
+ jq '.["size_min"]')
+ devlink_resource_size_set "$size" kvd linear "$i"
+ done
+}
+
+devlink_sp_size_kvd_to_default()
+{
+ local need_reload=0
+ local i
+
+ for i in $KVD_CHILDREN; do
+ local size=$(echo "${KVD_DEFAULTS[kvd_$i]}" | jq '.["size"]')
+ current_size=$(devlink_resource_size_get kvd "$i")
+
+ if [ "$size" -ne "$current_size" ]; then
+ devlink_resource_size_set "$size" kvd "$i"
+ need_reload=1
+ fi
+ done
+
+ for i in $KVDL_CHILDREN; do
+ local size=$(echo "${KVD_DEFAULTS[kvd_linear_$i]}" | \
+ jq '.["size"]')
+ current_size=$(devlink_resource_size_get kvd linear "$i")
+
+ if [ "$size" -ne "$current_size" ]; then
+ devlink_resource_size_set "$size" kvd linear "$i"
+ need_reload=1
+ fi
+ done
+
+ if [ "$need_reload" -ne "0" ]; then
+ devlink_reload
+ fi
+}
+
+devlink_sp_read_kvd_defaults()
+{
+ local key
+ local i
+
+ KVD_DEFAULTS[kvd]=$(devlink_resource_get "kvd")
+ for i in $KVD_CHILDREN; do
+ key=kvd_$i
+ KVD_DEFAULTS[$key]=$(devlink_resource_get kvd "$i")
+ done
+
+ for i in $KVDL_CHILDREN; do
+ key=kvd_linear_$i
+ KVD_DEFAULTS[$key]=$(devlink_resource_get kvd linear "$i")
+ done
+}
+
+KVD_PROFILES="default scale ipv4_max"
+
+devlink_sp_resource_kvd_profile_set()
+{
+ local profile=$1
+
+ case "$profile" in
+ scale)
+ devlink_resource_size_set 64000 kvd linear
+ devlink_resource_size_set 15616 kvd linear singles
+ devlink_resource_size_set 32000 kvd linear chunks
+ devlink_resource_size_set 16384 kvd linear large_chunks
+ devlink_resource_size_set 128000 kvd hash_single
+ devlink_resource_size_set 48000 kvd hash_double
+ devlink_reload
+ ;;
+ ipv4_max)
+ devlink_resource_size_set 64000 kvd linear
+ devlink_resource_size_set 15616 kvd linear singles
+ devlink_resource_size_set 32000 kvd linear chunks
+ devlink_resource_size_set 16384 kvd linear large_chunks
+ devlink_resource_size_set 144000 kvd hash_single
+ devlink_resource_size_set 32768 kvd hash_double
+ devlink_reload
+ ;;
+ default)
+ devlink_resource_size_set 98304 kvd linear
+ devlink_resource_size_set 16384 kvd linear singles
+ devlink_resource_size_set 49152 kvd linear chunks
+ devlink_resource_size_set 32768 kvd linear large_chunks
+ devlink_resource_size_set 87040 kvd hash_single
+ devlink_resource_size_set 60416 kvd hash_double
+ devlink_reload
+ ;;
+ *)
+ check_err 1 "Unknown profile $profile"
+ esac
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
new file mode 100755
index 000000000..b1fe960e3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=1
+source devlink_lib_spectrum.sh
+
+setup_prepare()
+{
+ devlink_sp_read_kvd_defaults
+}
+
+cleanup()
+{
+ pre_cleanup
+ devlink_sp_size_kvd_to_default
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+profiles_test()
+{
+ local i
+
+ log_info "Running profile tests"
+
+ for i in $KVD_PROFILES; do
+ RET=0
+ devlink_sp_resource_kvd_profile_set $i
+ log_test "'$i' profile"
+ done
+
+ # Default is explicitly tested at end to ensure it's actually applied
+ RET=0
+ devlink_sp_resource_kvd_profile_set "default"
+ log_test "'default' profile"
+}
+
+resources_min_test()
+{
+ local size
+ local i
+ local j
+
+ log_info "Running KVD-minimum tests"
+
+ for i in $KVD_CHILDREN; do
+ RET=0
+ size=$(devlink_resource_get kvd "$i" | jq '.["size_min"]')
+ devlink_resource_size_set "$size" kvd "$i"
+
+ # In case of linear, need to minimize sub-resources as well
+ if [[ "$i" == "linear" ]]; then
+ for j in $KVDL_CHILDREN; do
+ devlink_resource_size_set 0 kvd linear "$j"
+ done
+ fi
+
+ devlink_reload
+ devlink_sp_size_kvd_to_default
+ log_test "'$i' minimize [$size]"
+ done
+}
+
+resources_max_test()
+{
+ local min_size
+ local size
+ local i
+ local j
+
+ log_info "Running KVD-maximum tests"
+ for i in $KVD_CHILDREN; do
+ RET=0
+ devlink_sp_resource_minimize
+
+ # Calculate the maximum possible size for the given partition
+ size=$(devlink_resource_size_get kvd)
+ for j in $KVD_CHILDREN; do
+ if [ "$i" != "$j" ]; then
+ min_size=$(devlink_resource_get kvd "$j" | \
+ jq '.["size_min"]')
+ size=$((size - min_size))
+ fi
+ done
+
+ # Test almost maximum size
+ devlink_resource_size_set "$((size - 128))" kvd "$i"
+ devlink_reload
+ log_test "'$i' almost maximize [$((size - 128))]"
+
+ # Test above maximum size
+ devlink resource set "$DEVLINK_DEV" \
+ path "kvd/$i" size $((size + 128)) &> /dev/null
+ check_fail $? "Set kvd/$i to size $((size + 128)) should fail"
+ log_test "'$i' Overflow rejection [$((size + 128))]"
+
+ # Test maximum size
+ if [ "$i" == "hash_single" ] || [ "$i" == "hash_double" ]; then
+ echo "SKIP: Observed problem with exact max $i"
+ continue
+ fi
+
+ devlink_resource_size_set "$size" kvd "$i"
+ devlink_reload
+ log_test "'$i' maximize [$size]"
+
+ devlink_sp_size_kvd_to_default
+ done
+}
+
+profiles_test
+resources_min_test
+resources_max_test
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
new file mode 100644
index 000000000..8d2186c7c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/mirror_gre_scale.sh
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../mirror_gre_scale.sh
+
+mirror_gre_get_target()
+{
+ local should_fail=$1; shift
+
+ if ((! should_fail)); then
+ echo 3
+ else
+ echo 4
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
new file mode 100755
index 000000000..a0a80e1a6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NUM_NETIFS=6
+source ../../../../net/forwarding/lib.sh
+source ../../../../net/forwarding/tc_common.sh
+source devlink_lib_spectrum.sh
+
+current_test=""
+
+cleanup()
+{
+ pre_cleanup
+ if [ ! -z $current_test ]; then
+ ${current_test}_cleanup
+ fi
+ devlink_sp_size_kvd_to_default
+}
+
+devlink_sp_read_kvd_defaults
+trap cleanup EXIT
+
+ALL_TESTS="router tc_flower mirror_gre"
+for current_test in ${TESTS:-$ALL_TESTS}; do
+ source ${current_test}_scale.sh
+
+ num_netifs_var=${current_test^^}_NUM_NETIFS
+ num_netifs=${!num_netifs_var:-$NUM_NETIFS}
+
+ for profile in $KVD_PROFILES; do
+ RET=0
+ devlink_sp_resource_kvd_profile_set $profile
+ if [[ $RET -gt 0 ]]; then
+ log_test "'$current_test' [$profile] setting"
+ continue
+ fi
+
+ for should_fail in 0 1; do
+ RET=0
+ target=$(${current_test}_get_target "$should_fail")
+ ${current_test}_setup_prepare
+ setup_wait $num_netifs
+ ${current_test}_test "$target" "$should_fail"
+ ${current_test}_cleanup
+ if [[ "$should_fail" -eq 0 ]]; then
+ log_test "'$current_test' [$profile] $target"
+ else
+ log_test "'$current_test' [$profile] overflow $target"
+ fi
+ done
+ done
+done
+current_test=""
+
+exit "$RET"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh
new file mode 100644
index 000000000..21c4697d5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/router_scale.sh
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../router_scale.sh
+
+router_get_target()
+{
+ local should_fail=$1
+ local target
+
+ target=$(devlink_resource_size_get kvd hash_single)
+
+ if [[ $should_fail -eq 0 ]]; then
+ target=$((target * 85 / 100))
+ else
+ target=$((target + 1))
+ fi
+
+ echo $target
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh
new file mode 100644
index 000000000..f9bfd8937
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_flower_scale.sh
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_flower_scale.sh
+
+tc_flower_get_target()
+{
+ local should_fail=$1; shift
+
+ # 6144 (6x1024) is the theoretical maximum.
+ # One bank of 512 rules is taken by the 18-byte MC router rule.
+ # One rule is the ACL catch-all.
+ # 6144 - 512 - 1 = 5631
+ local target=5631
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
new file mode 100644
index 000000000..a6d733d2a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -0,0 +1,134 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for resource limit of offloaded flower rules. The test adds a given
+# number of flower matches for different IPv6 addresses, then generates traffic,
+# and ensures each was hit exactly once. This file contains functions to set up
+# a testing topology and run the test, and is meant to be sourced from a test
+# script that calls the testing routine with a given number of rules.
+
+TC_FLOWER_NUM_NETIFS=2
+
+tc_flower_h1_create()
+{
+ simple_if_init $h1
+ tc qdisc add dev $h1 clsact
+}
+
+tc_flower_h1_destroy()
+{
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1
+}
+
+tc_flower_h2_create()
+{
+ simple_if_init $h2
+ tc qdisc add dev $h2 clsact
+}
+
+tc_flower_h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2
+}
+
+tc_flower_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ vrf_prepare
+
+ tc_flower_h1_create
+ tc_flower_h2_create
+}
+
+tc_flower_cleanup()
+{
+ pre_cleanup
+
+ tc_flower_h2_destroy
+ tc_flower_h1_destroy
+
+ vrf_cleanup
+
+ if [[ -v TC_FLOWER_BATCH_FILE ]]; then
+ rm -f $TC_FLOWER_BATCH_FILE
+ fi
+}
+
+tc_flower_addr()
+{
+ local num=$1; shift
+
+ printf "2001:db8:1::%x" $num
+}
+
+tc_flower_rules_create()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ TC_FLOWER_BATCH_FILE="$(mktemp)"
+
+ for ((i = 0; i < count; ++i)); do
+ cat >> $TC_FLOWER_BATCH_FILE <<-EOF
+ filter add dev $h2 ingress \
+ prot ipv6 \
+ pref 1000 \
+ flower $tcflags dst_ip $(tc_flower_addr $i) \
+ action drop
+ EOF
+ done
+
+ tc -b $TC_FLOWER_BATCH_FILE
+ check_err_fail $should_fail $? "Rule insertion"
+}
+
+__tc_flower_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+ local last=$((count - 1))
+
+ tc_flower_rules_create $count $should_fail
+
+ for ((i = 0; i < count; ++i)); do
+ $MZ $h1 -q -c 1 -t ip -p 20 -b bc -6 \
+ -A 2001:db8:2::1 \
+ -B $(tc_flower_addr $i)
+ done
+
+ MISMATCHES=$(
+ tc -j -s filter show dev $h2 ingress |
+ jq -r '[ .[] | select(.kind == "flower") | .options |
+ values as $rule | .actions[].stats.packets |
+ select(. != 1) | "\(.) on \($rule.keys.dst_ip)" ] |
+ join(", ")'
+ )
+
+ test -z "$MISMATCHES"
+ check_err $? "Expected to capture 1 packet for each IP, but got $MISMATCHES"
+}
+
+tc_flower_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ # We use lower 16 bits of IPv6 address for match. Also there are only 16
+ # bits of rule priority space.
+ if ((count > 65536)); then
+ check_err 1 "Invalid count of $count. At most 65536 rules supported"
+ return
+ fi
+
+ if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
+ check_err 1 "Could not test offloaded functionality"
+ return
+ fi
+
+ tcflags="skip_sw"
+ __tc_flower_test $count $should_fail
+}
diff --git a/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
new file mode 100755
index 000000000..128f0ab24
--- /dev/null
+++ b/tools/testing/selftests/drivers/usb/usbip/usbip_test.sh
@@ -0,0 +1,200 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+usage() { echo "usbip_test.sh -b <busid> -p <usbip tools path>"; exit 1; }
+
+while getopts "h:b:p:" arg; do
+ case "${arg}" in
+ h)
+ usage
+ ;;
+ b)
+ busid=${OPTARG}
+ ;;
+ p)
+ tools_path=${OPTARG}
+ ;;
+ *)
+ usage
+ ;;
+ esac
+done
+shift $((OPTIND-1))
+
+if [ -z "${busid}" ]; then
+ usage
+fi
+
+echo "Running USB over IP Testing on $busid";
+
+test_end_msg="End of USB over IP Testing on $busid"
+
+if [ $UID != 0 ]; then
+ echo "Please run usbip_test as root [SKIP]"
+ echo $test_end_msg
+ exit $ksft_skip
+fi
+
+echo "Load usbip_host module"
+if ! /sbin/modprobe -q -n usbip_host; then
+ echo "usbip_test: module usbip_host is not found [SKIP]"
+ echo $test_end_msg
+ exit $ksft_skip
+fi
+
+if /sbin/modprobe -q usbip_host; then
+ echo "usbip_test: module usbip_host is loaded [OK]"
+else
+ echo "usbip_test: module usbip_host failed to load [FAIL]"
+ echo $test_end_msg
+ exit 1
+fi
+
+echo "Load vhci_hcd module"
+if /sbin/modprobe -q vhci_hcd; then
+ echo "usbip_test: module vhci_hcd is loaded [OK]"
+else
+ echo "usbip_test: module vhci_hcd failed to load [FAIL]"
+ echo $test_end_msg
+ exit 1
+fi
+echo "=============================================================="
+
+cd $tools_path;
+
+if [ ! -f src/usbip ]; then
+ echo "Please build usbip tools"
+ echo $test_end_msg
+ exit $ksft_skip
+fi
+
+echo "Expect to see export-able devices";
+src/usbip list -l;
+echo "=============================================================="
+
+echo "Run lsusb to see all usb devices"
+lsusb -t;
+echo "=============================================================="
+
+src/usbipd -D;
+
+echo "Get exported devices from localhost - expect to see none";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "bind devices";
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - bound devices should be under usbip_host control"
+lsusb -t;
+echo "=============================================================="
+
+echo "bind devices - expect already bound messages"
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Get exported devices from localhost - expect to see exported devices";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "unbind devices";
+src/usbip unbind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - bound devices should be rebound to original drivers"
+lsusb -t;
+echo "=============================================================="
+
+echo "unbind devices - expect no devices bound message";
+src/usbip unbind -b $busid;
+echo "=============================================================="
+
+echo "Get exported devices from localhost - expect to see none";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - should fail with no devices"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "bind devices";
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "List imported devices - expect to see exported devices";
+src/usbip list -r localhost;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - should work"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+# Wait for sysfs file to be updated. Without this sleep, usbip port
+# shows no imported devices.
+sleep 3;
+
+echo "List imported devices - expect to see imported devices";
+src/usbip port;
+echo "=============================================================="
+
+echo "Import devices from localhost - expect already imported messages"
+src/usbip attach -r localhost -b $busid;
+echo "=============================================================="
+
+echo "Un-import devices";
+src/usbip detach -p 00;
+src/usbip detach -p 01;
+echo "=============================================================="
+
+echo "List imported devices - expect to see none";
+src/usbip port;
+echo "=============================================================="
+
+echo "Un-import devices - expect no devices to detach messages";
+src/usbip detach -p 00;
+src/usbip detach -p 01;
+echo "=============================================================="
+
+echo "Detach invalid port tests - expect invalid port error message";
+src/usbip detach -p 100;
+echo "=============================================================="
+
+echo "Expect to see export-able devices";
+src/usbip list -l;
+echo "=============================================================="
+
+echo "Remove usbip_host module";
+rmmod usbip_host;
+
+echo "Run lsusb - bound devices should be rebound to original drivers"
+lsusb -t;
+echo "=============================================================="
+
+echo "Run bind without usbip_host - expect fail"
+src/usbip bind -b $busid;
+echo "=============================================================="
+
+echo "Run lsusb - devices that failed to bind aren't bound to any driver"
+lsusb -t;
+echo "=============================================================="
+
+echo "modprobe usbip_host - does it work?"
+/sbin/modprobe usbip_host
+echo "Should see -busid- is not in match_busid table... skip! dmesg"
+echo "=============================================================="
+dmesg | grep "is not in match_busid table"
+echo "=============================================================="
+
+echo $test_end_msg
diff --git a/tools/testing/selftests/efivarfs/.gitignore b/tools/testing/selftests/efivarfs/.gitignore
new file mode 100644
index 000000000..336184935
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/.gitignore
@@ -0,0 +1,2 @@
+create-read
+open-unlink
diff --git a/tools/testing/selftests/efivarfs/Makefile b/tools/testing/selftests/efivarfs/Makefile
new file mode 100644
index 000000000..c49dcea69
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/Makefile
@@ -0,0 +1,7 @@
+CFLAGS = -Wall
+
+TEST_GEN_FILES := open-unlink create-read
+TEST_PROGS := efivarfs.sh
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/efivarfs/config b/tools/testing/selftests/efivarfs/config
new file mode 100644
index 000000000..4e151f100
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/config
@@ -0,0 +1 @@
+CONFIG_EFIVAR_FS=y
diff --git a/tools/testing/selftests/efivarfs/create-read.c b/tools/testing/selftests/efivarfs/create-read.c
new file mode 100644
index 000000000..9674a1939
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/create-read.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+
+int main(int argc, char **argv)
+{
+ const char *path;
+ char buf[4];
+ int fd, rc;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <path>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ path = argv[1];
+
+ /* create a test variable */
+ fd = open(path, O_RDWR | O_CREAT, 0600);
+ if (fd < 0) {
+ perror("open(O_WRONLY)");
+ return EXIT_FAILURE;
+ }
+
+ rc = read(fd, buf, sizeof(buf));
+ if (rc != 0) {
+ fprintf(stderr, "Reading a new var should return EOF\n");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
new file mode 100755
index 000000000..a47029a79
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/efivarfs.sh
@@ -0,0 +1,215 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+efivarfs_mount=/sys/firmware/efi/efivars
+test_guid=210be57c-9849-4fc7-a635-e6382d1aec27
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+check_prereqs()
+{
+ local msg="skip all tests:"
+
+ if [ $UID != 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+
+ if ! grep -q "^\S\+ $efivarfs_mount efivarfs" /proc/mounts; then
+ echo $msg efivarfs is not mounted on $efivarfs_mount >&2
+ exit $ksft_skip
+ fi
+}
+
+run_test()
+{
+ local test="$1"
+
+ echo "--------------------"
+ echo "running $test"
+ echo "--------------------"
+
+ if [ "$(type -t $test)" = 'function' ]; then
+ ( $test )
+ else
+ ( ./$test )
+ fi
+
+ if [ $? -ne 0 ]; then
+ echo " [FAIL]"
+ rc=1
+ else
+ echo " [PASS]"
+ fi
+}
+
+test_create()
+{
+ local attrs='\x07\x00\x00\x00'
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+ printf "$attrs\x00" > $file
+
+ if [ ! -e $file ]; then
+ echo "$file couldn't be created" >&2
+ exit 1
+ fi
+
+ if [ $(stat -c %s $file) -ne 5 ]; then
+ echo "$file has invalid size" >&2
+ exit 1
+ fi
+}
+
+test_create_empty()
+{
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+ : > $file
+
+ if [ ! -e $file ]; then
+ echo "$file can not be created without writing" >&2
+ exit 1
+ fi
+}
+
+test_create_read()
+{
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+ ./create-read $file
+}
+
+test_delete()
+{
+ local attrs='\x07\x00\x00\x00'
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+ printf "$attrs\x00" > $file
+
+ if [ ! -e $file ]; then
+ echo "$file couldn't be created" >&2
+ exit 1
+ fi
+
+ rm $file 2>/dev/null
+ if [ $? -ne 0 ]; then
+ chattr -i $file
+ rm $file
+ fi
+
+ if [ -e $file ]; then
+ echo "$file couldn't be deleted" >&2
+ exit 1
+ fi
+
+}
+
+# test that we can remove a variable by issuing a write with only
+# attributes specified
+test_zero_size_delete()
+{
+ local attrs='\x07\x00\x00\x00'
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+
+ printf "$attrs\x00" > $file
+
+ if [ ! -e $file ]; then
+ echo "$file does not exist" >&2
+ exit 1
+ fi
+
+ chattr -i $file
+ printf "$attrs" > $file
+
+ if [ -e $file ]; then
+ echo "$file should have been deleted" >&2
+ exit 1
+ fi
+}
+
+test_open_unlink()
+{
+ local file=$efivarfs_mount/$FUNCNAME-$test_guid
+ ./open-unlink $file
+}
+
+# test that we can create a range of filenames
+test_valid_filenames()
+{
+ local attrs='\x07\x00\x00\x00'
+ local ret=0
+
+ local file_list="abc dump-type0-11-1-1362436005 1234 -"
+ for f in $file_list; do
+ local file=$efivarfs_mount/$f-$test_guid
+
+ printf "$attrs\x00" > $file
+
+ if [ ! -e $file ]; then
+ echo "$file could not be created" >&2
+ ret=1
+ else
+ rm $file 2>/dev/null
+ if [ $? -ne 0 ]; then
+ chattr -i $file
+ rm $file
+ fi
+ fi
+ done
+
+ exit $ret
+}
+
+test_invalid_filenames()
+{
+ local attrs='\x07\x00\x00\x00'
+ local ret=0
+
+ local file_list="
+ -1234-1234-1234-123456789abc
+ foo
+ foo-bar
+ -foo-
+ foo-barbazba-foob-foob-foob-foobarbazfoo
+ foo-------------------------------------
+ -12345678-1234-1234-1234-123456789abc
+ a-12345678=1234-1234-1234-123456789abc
+ a-12345678-1234=1234-1234-123456789abc
+ a-12345678-1234-1234=1234-123456789abc
+ a-12345678-1234-1234-1234=123456789abc
+ 1112345678-1234-1234-1234-123456789abc"
+
+ for f in $file_list; do
+ local file=$efivarfs_mount/$f
+
+ printf "$attrs\x00" 2>/dev/null > $file
+
+ if [ -e $file ]; then
+ echo "Creating $file should have failed" >&2
+ rm $file 2>/dev/null
+ if [ $? -ne 0 ]; then
+ chattr -i $file
+ rm $file
+ fi
+ ret=1
+ fi
+ done
+
+ exit $ret
+}
+
+check_prereqs
+
+rc=0
+
+run_test test_create
+run_test test_create_empty
+run_test test_create_read
+run_test test_delete
+run_test test_zero_size_delete
+run_test test_open_unlink
+run_test test_valid_filenames
+run_test test_invalid_filenames
+
+exit $rc
diff --git a/tools/testing/selftests/efivarfs/open-unlink.c b/tools/testing/selftests/efivarfs/open-unlink.c
new file mode 100644
index 000000000..562742d44
--- /dev/null
+++ b/tools/testing/selftests/efivarfs/open-unlink.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+
+static int set_immutable(const char *path, int immutable)
+{
+ unsigned int flags;
+ int fd;
+ int rc;
+ int error;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ rc = ioctl(fd, FS_IOC_GETFLAGS, &flags);
+ if (rc < 0) {
+ error = errno;
+ close(fd);
+ errno = error;
+ return rc;
+ }
+
+ if (immutable)
+ flags |= FS_IMMUTABLE_FL;
+ else
+ flags &= ~FS_IMMUTABLE_FL;
+
+ rc = ioctl(fd, FS_IOC_SETFLAGS, &flags);
+ error = errno;
+ close(fd);
+ errno = error;
+ return rc;
+}
+
+static int get_immutable(const char *path)
+{
+ unsigned int flags;
+ int fd;
+ int rc;
+ int error;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ rc = ioctl(fd, FS_IOC_GETFLAGS, &flags);
+ if (rc < 0) {
+ error = errno;
+ close(fd);
+ errno = error;
+ return rc;
+ }
+ close(fd);
+ if (flags & FS_IMMUTABLE_FL)
+ return 1;
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ const char *path;
+ char buf[5];
+ int fd, rc;
+
+ if (argc < 2) {
+ fprintf(stderr, "usage: %s <path>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ path = argv[1];
+
+ /* attributes: EFI_VARIABLE_NON_VOLATILE |
+ * EFI_VARIABLE_BOOTSERVICE_ACCESS |
+ * EFI_VARIABLE_RUNTIME_ACCESS
+ */
+ *(uint32_t *)buf = 0x7;
+ buf[4] = 0;
+
+ /* create a test variable */
+ fd = open(path, O_WRONLY | O_CREAT, 0600);
+ if (fd < 0) {
+ perror("open(O_WRONLY)");
+ return EXIT_FAILURE;
+ }
+
+ rc = write(fd, buf, sizeof(buf));
+ if (rc != sizeof(buf)) {
+ perror("write");
+ return EXIT_FAILURE;
+ }
+
+ close(fd);
+
+ rc = get_immutable(path);
+ if (rc < 0) {
+ perror("ioctl(FS_IOC_GETFLAGS)");
+ return EXIT_FAILURE;
+ } else if (rc) {
+ rc = set_immutable(path, 0);
+ if (rc < 0) {
+ perror("ioctl(FS_IOC_SETFLAGS)");
+ return EXIT_FAILURE;
+ }
+ }
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (unlink(path) < 0) {
+ perror("unlink");
+ return EXIT_FAILURE;
+ }
+
+ rc = read(fd, buf, sizeof(buf));
+ if (rc > 0) {
+ fprintf(stderr, "reading from an unlinked variable "
+ "shouldn't be possible\n");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
new file mode 100644
index 000000000..64073e050
--- /dev/null
+++ b/tools/testing/selftests/exec/.gitignore
@@ -0,0 +1,9 @@
+subdir*
+script*
+execveat
+execveat.symlink
+execveat.moved
+execveat.path.ephemeral
+execveat.ephemeral
+execveat.denatured
+xxxxxxxx* \ No newline at end of file
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
new file mode 100644
index 000000000..427c41ba5
--- /dev/null
+++ b/tools/testing/selftests/exec/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS = -Wall
+
+TEST_GEN_PROGS := execveat
+TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
+# Makefile is a run-time dependency, since it's accessed by the execveat test
+TEST_FILES := Makefile
+
+EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*
+
+include ../lib.mk
+
+$(OUTPUT)/subdir:
+ mkdir -p $@
+$(OUTPUT)/script:
+ echo '#!/bin/sh' > $@
+ echo 'exit $$*' >> $@
+ chmod +x $@
+$(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
+ cd $(OUTPUT) && ln -s -f $(shell basename $<) $(shell basename $@)
+$(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
+ cp $< $@
+ chmod -x $@
+
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
new file mode 100644
index 000000000..47cbf54d0
--- /dev/null
+++ b/tools/testing/selftests/exec/execveat.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 2014 Google, Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftests for execveat(2).
+ */
+
+#define _GNU_SOURCE /* to get O_PATH, AT_EMPTY_PATH */
+#include <sys/sendfile.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+static char longpath[2 * PATH_MAX] = "";
+static char *envp[] = { "IN_TEST=yes", NULL, NULL };
+static char *argv[] = { "execveat", "99", NULL };
+
+static int execveat_(int fd, const char *path, char **argv, char **envp,
+ int flags)
+{
+#ifdef __NR_execveat
+ return syscall(__NR_execveat, fd, path, argv, envp, flags);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+#define check_execveat_fail(fd, path, flags, errno) \
+ _check_execveat_fail(fd, path, flags, errno, #errno)
+static int _check_execveat_fail(int fd, const char *path, int flags,
+ int expected_errno, const char *errno_str)
+{
+ int rc;
+
+ errno = 0;
+ printf("Check failure of execveat(%d, '%s', %d) with %s... ",
+ fd, path?:"(null)", flags, errno_str);
+ rc = execveat_(fd, path, argv, envp, flags);
+
+ if (rc > 0) {
+ printf("[FAIL] (unexpected success from execveat(2))\n");
+ return 1;
+ }
+ if (errno != expected_errno) {
+ printf("[FAIL] (expected errno %d (%s) not %d (%s)\n",
+ expected_errno, strerror(expected_errno),
+ errno, strerror(errno));
+ return 1;
+ }
+ printf("[OK]\n");
+ return 0;
+}
+
+static int check_execveat_invoked_rc(int fd, const char *path, int flags,
+ int expected_rc, int expected_rc2)
+{
+ int status;
+ int rc;
+ pid_t child;
+ int pathlen = path ? strlen(path) : 0;
+
+ if (pathlen > 40)
+ printf("Check success of execveat(%d, '%.20s...%s', %d)... ",
+ fd, path, (path + pathlen - 20), flags);
+ else
+ printf("Check success of execveat(%d, '%s', %d)... ",
+ fd, path?:"(null)", flags);
+ child = fork();
+ if (child < 0) {
+ printf("[FAIL] (fork() failed)\n");
+ return 1;
+ }
+ if (child == 0) {
+ /* Child: do execveat(). */
+ rc = execveat_(fd, path, argv, envp, flags);
+ printf("[FAIL]: execveat() failed, rc=%d errno=%d (%s)\n",
+ rc, errno, strerror(errno));
+ exit(1); /* should not reach here */
+ }
+ /* Parent: wait for & check child's exit status. */
+ rc = waitpid(child, &status, 0);
+ if (rc != child) {
+ printf("[FAIL] (waitpid(%d,...) returned %d)\n", child, rc);
+ return 1;
+ }
+ if (!WIFEXITED(status)) {
+ printf("[FAIL] (child %d did not exit cleanly, status=%08x)\n",
+ child, status);
+ return 1;
+ }
+ if ((WEXITSTATUS(status) != expected_rc) &&
+ (WEXITSTATUS(status) != expected_rc2)) {
+ printf("[FAIL] (child %d exited with %d not %d nor %d)\n",
+ child, WEXITSTATUS(status), expected_rc, expected_rc2);
+ return 1;
+ }
+ printf("[OK]\n");
+ return 0;
+}
+
+static int check_execveat(int fd, const char *path, int flags)
+{
+ return check_execveat_invoked_rc(fd, path, flags, 99, 99);
+}
+
+static char *concat(const char *left, const char *right)
+{
+ char *result = malloc(strlen(left) + strlen(right) + 1);
+
+ strcpy(result, left);
+ strcat(result, right);
+ return result;
+}
+
+static int open_or_die(const char *filename, int flags)
+{
+ int fd = open(filename, flags);
+
+ if (fd < 0) {
+ printf("Failed to open '%s'; "
+ "check prerequisites are available\n", filename);
+ exit(1);
+ }
+ return fd;
+}
+
+static void exe_cp(const char *src, const char *dest)
+{
+ int in_fd = open_or_die(src, O_RDONLY);
+ int out_fd = open(dest, O_RDWR|O_CREAT|O_TRUNC, 0755);
+ struct stat info;
+
+ fstat(in_fd, &info);
+ sendfile(out_fd, in_fd, NULL, info.st_size);
+ close(in_fd);
+ close(out_fd);
+}
+
+#define XX_DIR_LEN 200
+static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
+{
+ int fail = 0;
+ int ii, count, len;
+ char longname[XX_DIR_LEN + 1];
+ int fd;
+
+ if (*longpath == '\0') {
+ /* Create a filename close to PATH_MAX in length */
+ char *cwd = getcwd(NULL, 0);
+
+ if (!cwd) {
+ printf("Failed to getcwd(), errno=%d (%s)\n",
+ errno, strerror(errno));
+ return 2;
+ }
+ strcpy(longpath, cwd);
+ strcat(longpath, "/");
+ memset(longname, 'x', XX_DIR_LEN - 1);
+ longname[XX_DIR_LEN - 1] = '/';
+ longname[XX_DIR_LEN] = '\0';
+ count = (PATH_MAX - 3 - strlen(cwd)) / XX_DIR_LEN;
+ for (ii = 0; ii < count; ii++) {
+ strcat(longpath, longname);
+ mkdir(longpath, 0755);
+ }
+ len = (PATH_MAX - 3 - strlen(cwd)) - (count * XX_DIR_LEN);
+ if (len <= 0)
+ len = 1;
+ memset(longname, 'y', len);
+ longname[len] = '\0';
+ strcat(longpath, longname);
+ free(cwd);
+ }
+ exe_cp(src, longpath);
+
+ /*
+ * Execute as a pre-opened file descriptor, which works whether this is
+ * a script or not (because the interpreter sees a filename like
+ * "/dev/fd/20").
+ */
+ fd = open(longpath, O_RDONLY);
+ if (fd > 0) {
+ printf("Invoke copy of '%s' via filename of length %zu:\n",
+ src, strlen(longpath));
+ fail += check_execveat(fd, "", AT_EMPTY_PATH);
+ } else {
+ printf("Failed to open length %zu filename, errno=%d (%s)\n",
+ strlen(longpath), errno, strerror(errno));
+ fail++;
+ }
+
+ /*
+ * Execute as a long pathname relative to "/". If this is a script,
+ * the interpreter will launch but fail to open the script because its
+ * name ("/dev/fd/5/xxx....") is bigger than PATH_MAX.
+ *
+ * The failure code is usually 127 (POSIX: "If a command is not found,
+ * the exit status shall be 127."), but some systems give 126 (POSIX:
+ * "If the command name is found, but it is not an executable utility,
+ * the exit status shall be 126."), so allow either.
+ */
+ if (is_script)
+ fail += check_execveat_invoked_rc(root_dfd, longpath + 1, 0,
+ 127, 126);
+ else
+ fail += check_execveat(root_dfd, longpath + 1, 0);
+
+ return fail;
+}
+
+static int run_tests(void)
+{
+ int fail = 0;
+ char *fullname = realpath("execveat", NULL);
+ char *fullname_script = realpath("script", NULL);
+ char *fullname_symlink = concat(fullname, ".symlink");
+ int subdir_dfd = open_or_die("subdir", O_DIRECTORY|O_RDONLY);
+ int subdir_dfd_ephemeral = open_or_die("subdir.ephemeral",
+ O_DIRECTORY|O_RDONLY);
+ int dot_dfd = open_or_die(".", O_DIRECTORY|O_RDONLY);
+ int root_dfd = open_or_die("/", O_DIRECTORY|O_RDONLY);
+ int dot_dfd_path = open_or_die(".", O_DIRECTORY|O_RDONLY|O_PATH);
+ int dot_dfd_cloexec = open_or_die(".", O_DIRECTORY|O_RDONLY|O_CLOEXEC);
+ int fd = open_or_die("execveat", O_RDONLY);
+ int fd_path = open_or_die("execveat", O_RDONLY|O_PATH);
+ int fd_symlink = open_or_die("execveat.symlink", O_RDONLY);
+ int fd_denatured = open_or_die("execveat.denatured", O_RDONLY);
+ int fd_denatured_path = open_or_die("execveat.denatured",
+ O_RDONLY|O_PATH);
+ int fd_script = open_or_die("script", O_RDONLY);
+ int fd_ephemeral = open_or_die("execveat.ephemeral", O_RDONLY);
+ int fd_ephemeral_path = open_or_die("execveat.path.ephemeral",
+ O_RDONLY|O_PATH);
+ int fd_script_ephemeral = open_or_die("script.ephemeral", O_RDONLY);
+ int fd_cloexec = open_or_die("execveat", O_RDONLY|O_CLOEXEC);
+ int fd_script_cloexec = open_or_die("script", O_RDONLY|O_CLOEXEC);
+
+ /* Check if we have execveat at all, and bail early if not */
+ errno = 0;
+ execveat_(-1, NULL, NULL, NULL, 0);
+ if (errno == ENOSYS) {
+ ksft_exit_skip(
+ "ENOSYS calling execveat - no kernel support?\n");
+ }
+
+ /* Change file position to confirm it doesn't affect anything */
+ lseek(fd, 10, SEEK_SET);
+
+ /* Normal executable file: */
+ /* dfd + path */
+ fail += check_execveat(subdir_dfd, "../execveat", 0);
+ fail += check_execveat(dot_dfd, "execveat", 0);
+ fail += check_execveat(dot_dfd_path, "execveat", 0);
+ /* absolute path */
+ fail += check_execveat(AT_FDCWD, fullname, 0);
+ /* absolute path with nonsense dfd */
+ fail += check_execveat(99, fullname, 0);
+ /* fd + no path */
+ fail += check_execveat(fd, "", AT_EMPTY_PATH);
+ /* O_CLOEXEC fd + no path */
+ fail += check_execveat(fd_cloexec, "", AT_EMPTY_PATH);
+ /* O_PATH fd */
+ fail += check_execveat(fd_path, "", AT_EMPTY_PATH);
+
+ /* Mess with executable file that's already open: */
+ /* fd + no path to a file that's been renamed */
+ rename("execveat.ephemeral", "execveat.moved");
+ fail += check_execveat(fd_ephemeral, "", AT_EMPTY_PATH);
+ /* fd + no path to a file that's been deleted */
+ unlink("execveat.moved"); /* remove the file now fd open */
+ fail += check_execveat(fd_ephemeral, "", AT_EMPTY_PATH);
+
+ /* Mess with executable file that's already open with O_PATH */
+ /* fd + no path to a file that's been deleted */
+ unlink("execveat.path.ephemeral");
+ fail += check_execveat(fd_ephemeral_path, "", AT_EMPTY_PATH);
+
+ /* Invalid argument failures */
+ fail += check_execveat_fail(fd, "", 0, ENOENT);
+ fail += check_execveat_fail(fd, NULL, AT_EMPTY_PATH, EFAULT);
+
+ /* Symlink to executable file: */
+ /* dfd + path */
+ fail += check_execveat(dot_dfd, "execveat.symlink", 0);
+ fail += check_execveat(dot_dfd_path, "execveat.symlink", 0);
+ /* absolute path */
+ fail += check_execveat(AT_FDCWD, fullname_symlink, 0);
+ /* fd + no path, even with AT_SYMLINK_NOFOLLOW (already followed) */
+ fail += check_execveat(fd_symlink, "", AT_EMPTY_PATH);
+ fail += check_execveat(fd_symlink, "",
+ AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW);
+
+ /* Symlink fails when AT_SYMLINK_NOFOLLOW set: */
+ /* dfd + path */
+ fail += check_execveat_fail(dot_dfd, "execveat.symlink",
+ AT_SYMLINK_NOFOLLOW, ELOOP);
+ fail += check_execveat_fail(dot_dfd_path, "execveat.symlink",
+ AT_SYMLINK_NOFOLLOW, ELOOP);
+ /* absolute path */
+ fail += check_execveat_fail(AT_FDCWD, fullname_symlink,
+ AT_SYMLINK_NOFOLLOW, ELOOP);
+
+ /* Shell script wrapping executable file: */
+ /* dfd + path */
+ fail += check_execveat(subdir_dfd, "../script", 0);
+ fail += check_execveat(dot_dfd, "script", 0);
+ fail += check_execveat(dot_dfd_path, "script", 0);
+ /* absolute path */
+ fail += check_execveat(AT_FDCWD, fullname_script, 0);
+ /* fd + no path */
+ fail += check_execveat(fd_script, "", AT_EMPTY_PATH);
+ fail += check_execveat(fd_script, "",
+ AT_EMPTY_PATH|AT_SYMLINK_NOFOLLOW);
+ /* O_CLOEXEC fd fails for a script (as script file inaccessible) */
+ fail += check_execveat_fail(fd_script_cloexec, "", AT_EMPTY_PATH,
+ ENOENT);
+ fail += check_execveat_fail(dot_dfd_cloexec, "script", 0, ENOENT);
+
+ /* Mess with script file that's already open: */
+ /* fd + no path to a file that's been renamed */
+ rename("script.ephemeral", "script.moved");
+ fail += check_execveat(fd_script_ephemeral, "", AT_EMPTY_PATH);
+ /* fd + no path to a file that's been deleted */
+ unlink("script.moved"); /* remove the file while fd open */
+ fail += check_execveat(fd_script_ephemeral, "", AT_EMPTY_PATH);
+
+ /* Rename a subdirectory in the path: */
+ rename("subdir.ephemeral", "subdir.moved");
+ fail += check_execveat(subdir_dfd_ephemeral, "../script", 0);
+ fail += check_execveat(subdir_dfd_ephemeral, "script", 0);
+ /* Remove the subdir and its contents */
+ unlink("subdir.moved/script");
+ unlink("subdir.moved");
+ /* Shell loads via deleted subdir OK because name starts with .. */
+ fail += check_execveat(subdir_dfd_ephemeral, "../script", 0);
+ fail += check_execveat_fail(subdir_dfd_ephemeral, "script", 0, ENOENT);
+
+ /* Flag values other than AT_SYMLINK_NOFOLLOW => EINVAL */
+ fail += check_execveat_fail(dot_dfd, "execveat", 0xFFFF, EINVAL);
+ /* Invalid path => ENOENT */
+ fail += check_execveat_fail(dot_dfd, "no-such-file", 0, ENOENT);
+ fail += check_execveat_fail(dot_dfd_path, "no-such-file", 0, ENOENT);
+ fail += check_execveat_fail(AT_FDCWD, "no-such-file", 0, ENOENT);
+ /* Attempt to execute directory => EACCES */
+ fail += check_execveat_fail(dot_dfd, "", AT_EMPTY_PATH, EACCES);
+ /* Attempt to execute non-executable => EACCES */
+ fail += check_execveat_fail(dot_dfd, "Makefile", 0, EACCES);
+ fail += check_execveat_fail(fd_denatured, "", AT_EMPTY_PATH, EACCES);
+ fail += check_execveat_fail(fd_denatured_path, "", AT_EMPTY_PATH,
+ EACCES);
+ /* Attempt to execute nonsense FD => EBADF */
+ fail += check_execveat_fail(99, "", AT_EMPTY_PATH, EBADF);
+ fail += check_execveat_fail(99, "execveat", 0, EBADF);
+ /* Attempt to execute relative to non-directory => ENOTDIR */
+ fail += check_execveat_fail(fd, "execveat", 0, ENOTDIR);
+
+ fail += check_execveat_pathmax(root_dfd, "execveat", 0);
+ fail += check_execveat_pathmax(root_dfd, "script", 1);
+ return fail;
+}
+
+static void prerequisites(void)
+{
+ int fd;
+ const char *script = "#!/bin/sh\nexit $*\n";
+
+ /* Create ephemeral copies of files */
+ exe_cp("execveat", "execveat.ephemeral");
+ exe_cp("execveat", "execveat.path.ephemeral");
+ exe_cp("script", "script.ephemeral");
+ mkdir("subdir.ephemeral", 0755);
+
+ fd = open("subdir.ephemeral/script", O_RDWR|O_CREAT|O_TRUNC, 0755);
+ write(fd, script, strlen(script));
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ int ii;
+ int rc;
+ const char *verbose = getenv("VERBOSE");
+
+ if (argc >= 2) {
+ /* If we are invoked with an argument, don't run tests. */
+ const char *in_test = getenv("IN_TEST");
+
+ if (verbose) {
+ printf(" invoked with:");
+ for (ii = 0; ii < argc; ii++)
+ printf(" [%d]='%s'", ii, argv[ii]);
+ printf("\n");
+ }
+
+ /* Check expected environment transferred. */
+ if (!in_test || strcmp(in_test, "yes") != 0) {
+ printf("[FAIL] (no IN_TEST=yes in env)\n");
+ return 1;
+ }
+
+ /* Use the final argument as an exit code. */
+ rc = atoi(argv[argc - 1]);
+ fflush(stdout);
+ } else {
+ prerequisites();
+ if (verbose)
+ envp[1] = "VERBOSE=1";
+ rc = run_tests();
+ if (rc > 0)
+ printf("%d tests failed\n", rc);
+ }
+ return rc;
+}
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore
new file mode 100644
index 000000000..8449cf671
--- /dev/null
+++ b/tools/testing/selftests/filesystems/.gitignore
@@ -0,0 +1,2 @@
+dnotify_test
+devpts_pts
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
new file mode 100644
index 000000000..129880fb4
--- /dev/null
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../usr/include/
+TEST_GEN_PROGS := devpts_pts
+TEST_GEN_PROGS_EXTENDED := dnotify_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/devpts_pts.c b/tools/testing/selftests/filesystems/devpts_pts.c
new file mode 100644
index 000000000..b1fc9b916
--- /dev/null
+++ b/tools/testing/selftests/filesystems/devpts_pts.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <asm/ioctls.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include "../kselftest.h"
+
+static bool terminal_dup2(int duplicate, int original)
+{
+ int ret;
+
+ ret = dup2(duplicate, original);
+ if (ret < 0)
+ return false;
+
+ return true;
+}
+
+static int terminal_set_stdfds(int fd)
+{
+ int i;
+
+ if (fd < 0)
+ return 0;
+
+ for (i = 0; i < 3; i++)
+ if (!terminal_dup2(fd, (int[]){STDIN_FILENO, STDOUT_FILENO,
+ STDERR_FILENO}[i]))
+ return -1;
+
+ return 0;
+}
+
+static int login_pty(int fd)
+{
+ int ret;
+
+ setsid();
+
+ ret = ioctl(fd, TIOCSCTTY, NULL);
+ if (ret < 0)
+ return -1;
+
+ ret = terminal_set_stdfds(fd);
+ if (ret < 0)
+ return -1;
+
+ if (fd > STDERR_FILENO)
+ close(fd);
+
+ return 0;
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+ return -1;
+ }
+ if (ret != pid)
+ goto again;
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+ return -1;
+
+ return 0;
+}
+
+static int resolve_procfd_symlink(int fd, char *buf, size_t buflen)
+{
+ int ret;
+ char procfd[4096];
+
+ ret = snprintf(procfd, 4096, "/proc/self/fd/%d", fd);
+ if (ret < 0 || ret >= 4096)
+ return -1;
+
+ ret = readlink(procfd, buf, buflen);
+ if (ret < 0 || (size_t)ret >= buflen)
+ return -1;
+
+ buf[ret] = '\0';
+
+ return 0;
+}
+
+static int do_tiocgptpeer(char *ptmx, char *expected_procfd_contents)
+{
+ int ret;
+ int master = -1, slave = -1, fret = -1;
+
+ master = open(ptmx, O_RDWR | O_NOCTTY | O_CLOEXEC);
+ if (master < 0) {
+ fprintf(stderr, "Failed to open \"%s\": %s\n", ptmx,
+ strerror(errno));
+ return -1;
+ }
+
+ /*
+ * grantpt() makes assumptions about /dev/pts/ so ignore it. It's also
+ * not really needed.
+ */
+ ret = unlockpt(master);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to unlock terminal\n");
+ goto do_cleanup;
+ }
+
+#ifdef TIOCGPTPEER
+ slave = ioctl(master, TIOCGPTPEER, O_RDWR | O_NOCTTY | O_CLOEXEC);
+#endif
+ if (slave < 0) {
+ if (errno == EINVAL) {
+ fprintf(stderr, "TIOCGPTPEER is not supported. "
+ "Skipping test.\n");
+ fret = KSFT_SKIP;
+ } else {
+ fprintf(stderr,
+ "Failed to perform TIOCGPTPEER ioctl\n");
+ fret = EXIT_FAILURE;
+ }
+ goto do_cleanup;
+ }
+
+ pid_t pid = fork();
+ if (pid < 0)
+ goto do_cleanup;
+
+ if (pid == 0) {
+ char buf[4096];
+
+ ret = login_pty(slave);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to setup terminal\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ ret = resolve_procfd_symlink(STDIN_FILENO, buf, sizeof(buf));
+ if (ret < 0) {
+ fprintf(stderr, "Failed to retrieve pathname of pts "
+ "slave file descriptor\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (strncmp(expected_procfd_contents, buf,
+ strlen(expected_procfd_contents)) != 0) {
+ fprintf(stderr, "Received invalid contents for "
+ "\"/proc/<pid>/fd/%d\" symlink: %s\n",
+ STDIN_FILENO, buf);
+ _exit(-1);
+ }
+
+ fprintf(stderr, "Contents of \"/proc/<pid>/fd/%d\" "
+ "symlink are valid: %s\n", STDIN_FILENO, buf);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ ret = wait_for_pid(pid);
+ if (ret < 0)
+ goto do_cleanup;
+
+ fret = EXIT_SUCCESS;
+
+do_cleanup:
+ if (master >= 0)
+ close(master);
+ if (slave >= 0)
+ close(slave);
+
+ return fret;
+}
+
+static int verify_non_standard_devpts_mount(void)
+{
+ char *mntpoint;
+ int ret = -1;
+ char devpts[] = P_tmpdir "/devpts_fs_XXXXXX";
+ char ptmx[] = P_tmpdir "/devpts_fs_XXXXXX/ptmx";
+
+ ret = umount("/dev/pts");
+ if (ret < 0) {
+ fprintf(stderr, "Failed to unmount \"/dev/pts\": %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ (void)umount("/dev/ptmx");
+
+ mntpoint = mkdtemp(devpts);
+ if (!mntpoint) {
+ fprintf(stderr, "Failed to create temporary mountpoint: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ ret = mount("devpts", mntpoint, "devpts", MS_NOSUID | MS_NOEXEC,
+ "newinstance,ptmxmode=0666,mode=0620,gid=5");
+ if (ret < 0) {
+ fprintf(stderr, "Failed to mount devpts fs to \"%s\" in new "
+ "mount namespace: %s\n", mntpoint,
+ strerror(errno));
+ unlink(mntpoint);
+ return -1;
+ }
+
+ ret = snprintf(ptmx, sizeof(ptmx), "%s/ptmx", devpts);
+ if (ret < 0 || (size_t)ret >= sizeof(ptmx)) {
+ unlink(mntpoint);
+ return -1;
+ }
+
+ ret = do_tiocgptpeer(ptmx, mntpoint);
+ unlink(mntpoint);
+ if (ret < 0)
+ return -1;
+
+ return 0;
+}
+
+static int verify_ptmx_bind_mount(void)
+{
+ int ret;
+
+ ret = mount("/dev/pts/ptmx", "/dev/ptmx", NULL, MS_BIND, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to bind mount \"/dev/pts/ptmx\" to "
+ "\"/dev/ptmx\" mount namespace\n");
+ return -1;
+ }
+
+ ret = do_tiocgptpeer("/dev/ptmx", "/dev/pts/");
+ if (ret < 0)
+ return -1;
+
+ return 0;
+}
+
+static int verify_invalid_ptmx_bind_mount(void)
+{
+ int ret;
+ char mntpoint_fd;
+ char ptmx[] = P_tmpdir "/devpts_ptmx_XXXXXX";
+
+ mntpoint_fd = mkstemp(ptmx);
+ if (mntpoint_fd < 0) {
+ fprintf(stderr, "Failed to create temporary directory: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ ret = mount("/dev/pts/ptmx", ptmx, NULL, MS_BIND, NULL);
+ close(mntpoint_fd);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to bind mount \"/dev/pts/ptmx\" to "
+ "\"%s\" mount namespace\n", ptmx);
+ return -1;
+ }
+
+ ret = do_tiocgptpeer(ptmx, "/dev/pts/");
+ if (ret == 0)
+ return -1;
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+
+ if (!isatty(STDIN_FILENO)) {
+ fprintf(stderr, "Standard input file descriptor is not attached "
+ "to a terminal. Skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
+ ret = unshare(CLONE_NEWNS);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to unshare mount namespace\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to make \"/\" MS_PRIVATE in new mount "
+ "namespace\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = verify_ptmx_bind_mount();
+ if (ret < 0)
+ exit(EXIT_FAILURE);
+
+ ret = verify_invalid_ptmx_bind_mount();
+ if (ret < 0)
+ exit(EXIT_FAILURE);
+
+ ret = verify_non_standard_devpts_mount();
+ if (ret < 0)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/filesystems/dnotify_test.c b/tools/testing/selftests/filesystems/dnotify_test.c
new file mode 100644
index 000000000..c0a9b2d33
--- /dev/null
+++ b/tools/testing/selftests/filesystems/dnotify_test.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE /* needed to get the defines */
+#include <fcntl.h> /* in glibc 2.2 this has the needed
+ values defined */
+#include <signal.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static volatile int event_fd;
+
+static void handler(int sig, siginfo_t *si, void *data)
+{
+ event_fd = si->si_fd;
+}
+
+int main(void)
+{
+ struct sigaction act;
+ int fd;
+
+ act.sa_sigaction = handler;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ sigaction(SIGRTMIN + 1, &act, NULL);
+
+ fd = open(".", O_RDONLY);
+ fcntl(fd, F_SETSIG, SIGRTMIN + 1);
+ fcntl(fd, F_NOTIFY, DN_MODIFY|DN_CREATE|DN_MULTISHOT);
+ /* we will now be notified if any of the files
+ in "." is modified or new files are created */
+ while (1) {
+ pause();
+ printf("Got event on fd=%d\n", event_fd);
+ }
+}
diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
new file mode 100644
index 000000000..261c81f08
--- /dev/null
+++ b/tools/testing/selftests/firmware/Makefile
@@ -0,0 +1,12 @@
+# Makefile for firmware loading selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := fw_run_tests.sh
+TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh
+
+include ../lib.mk
+
+# Nothing to clean up.
+clean:
diff --git a/tools/testing/selftests/firmware/config b/tools/testing/selftests/firmware/config
new file mode 100644
index 000000000..bf634dda0
--- /dev/null
+++ b/tools/testing/selftests/firmware/config
@@ -0,0 +1,5 @@
+CONFIG_TEST_FIRMWARE=y
+CONFIG_FW_LOADER=y
+CONFIG_FW_LOADER_USER_HELPER=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
diff --git a/tools/testing/selftests/firmware/fw_fallback.sh b/tools/testing/selftests/firmware/fw_fallback.sh
new file mode 100755
index 000000000..70d18be46
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_fallback.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# This validates that the kernel will fall back to using the fallback mechanism
+# to load firmware it can't find on disk itself. We must request a firmware
+# that the kernel won't find, and any installed helper (e.g. udev) also
+# won't find so that we can do the load ourself manually.
+set -e
+
+TEST_REQS_FW_SYSFS_FALLBACK="yes"
+TEST_REQS_FW_SET_CUSTOM_PATH="no"
+TEST_DIR=$(dirname $0)
+source $TEST_DIR/fw_lib.sh
+
+check_mods
+check_setup
+verify_reqs
+setup_tmp_file
+
+trap "test_finish" EXIT
+
+load_fw()
+{
+ local name="$1"
+ local file="$2"
+
+ # This will block until our load (below) has finished.
+ echo -n "$name" >"$DIR"/trigger_request &
+
+ # Give kernel a chance to react.
+ local timeout=10
+ while [ ! -e "$DIR"/"$name"/loading ]; do
+ sleep 0.1
+ timeout=$(( $timeout - 1 ))
+ if [ "$timeout" -eq 0 ]; then
+ echo "$0: firmware interface never appeared" >&2
+ exit 1
+ fi
+ done
+
+ echo 1 >"$DIR"/"$name"/loading
+ cat "$file" >"$DIR"/"$name"/data
+ echo 0 >"$DIR"/"$name"/loading
+
+ # Wait for request to finish.
+ wait
+}
+
+load_fw_cancel()
+{
+ local name="$1"
+ local file="$2"
+
+ # This will block until our load (below) has finished.
+ echo -n "$name" >"$DIR"/trigger_request 2>/dev/null &
+
+ # Give kernel a chance to react.
+ local timeout=10
+ while [ ! -e "$DIR"/"$name"/loading ]; do
+ sleep 0.1
+ timeout=$(( $timeout - 1 ))
+ if [ "$timeout" -eq 0 ]; then
+ echo "$0: firmware interface never appeared" >&2
+ exit 1
+ fi
+ done
+
+ echo -1 >"$DIR"/"$name"/loading
+
+ # Wait for request to finish.
+ wait
+}
+
+load_fw_custom()
+{
+ if [ ! -e "$DIR"/trigger_custom_fallback ]; then
+ echo "$0: custom fallback trigger not present, ignoring test" >&2
+ exit $ksft_skip
+ fi
+
+ local name="$1"
+ local file="$2"
+
+ echo -n "$name" >"$DIR"/trigger_custom_fallback 2>/dev/null &
+
+ # Give kernel a chance to react.
+ local timeout=10
+ while [ ! -e "$DIR"/"$name"/loading ]; do
+ sleep 0.1
+ timeout=$(( $timeout - 1 ))
+ if [ "$timeout" -eq 0 ]; then
+ echo "$0: firmware interface never appeared" >&2
+ exit 1
+ fi
+ done
+
+ echo 1 >"$DIR"/"$name"/loading
+ cat "$file" >"$DIR"/"$name"/data
+ echo 0 >"$DIR"/"$name"/loading
+
+ # Wait for request to finish.
+ wait
+ return 0
+}
+
+
+load_fw_custom_cancel()
+{
+ if [ ! -e "$DIR"/trigger_custom_fallback ]; then
+ echo "$0: canceling custom fallback trigger not present, ignoring test" >&2
+ exit $ksft_skip
+ fi
+
+ local name="$1"
+ local file="$2"
+
+ echo -n "$name" >"$DIR"/trigger_custom_fallback 2>/dev/null &
+
+ # Give kernel a chance to react.
+ local timeout=10
+ while [ ! -e "$DIR"/"$name"/loading ]; do
+ sleep 0.1
+ timeout=$(( $timeout - 1 ))
+ if [ "$timeout" -eq 0 ]; then
+ echo "$0: firmware interface never appeared" >&2
+ exit 1
+ fi
+ done
+
+ echo -1 >"$DIR"/"$name"/loading
+
+ # Wait for request to finish.
+ wait
+ return 0
+}
+
+load_fw_fallback_with_child()
+{
+ local name="$1"
+ local file="$2"
+
+ # This is the value already set but we want to be explicit
+ echo 4 >/sys/class/firmware/timeout
+
+ sleep 1 &
+ SECONDS_BEFORE=$(date +%s)
+ echo -n "$name" >"$DIR"/trigger_request 2>/dev/null
+ SECONDS_AFTER=$(date +%s)
+ SECONDS_DELTA=$(($SECONDS_AFTER - $SECONDS_BEFORE))
+ if [ "$SECONDS_DELTA" -lt 4 ]; then
+ RET=1
+ else
+ RET=0
+ fi
+ wait
+ return $RET
+}
+
+test_syfs_timeout()
+{
+ DEVPATH="$DIR"/"nope-$NAME"/loading
+
+ # Test failure when doing nothing (timeout works).
+ echo -n 2 >/sys/class/firmware/timeout
+ echo -n "nope-$NAME" >"$DIR"/trigger_request 2>/dev/null &
+
+ # Give the kernel some time to load the loading file, must be less
+ # than the timeout above.
+ sleep 1
+ if [ ! -f $DEVPATH ]; then
+ echo "$0: fallback mechanism immediately cancelled"
+ echo ""
+ echo "The file never appeared: $DEVPATH"
+ echo ""
+ echo "This might be a distribution udev rule setup by your distribution"
+ echo "to immediately cancel all fallback requests, this must be"
+ echo "removed before running these tests. To confirm look for"
+ echo "a firmware rule like /lib/udev/rules.d/50-firmware.rules"
+ echo "and see if you have something like this:"
+ echo ""
+ echo "SUBSYSTEM==\"firmware\", ACTION==\"add\", ATTR{loading}=\"-1\""
+ echo ""
+ echo "If you do remove this file or comment out this line before"
+ echo "proceeding with these tests."
+ exit 1
+ fi
+
+ if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not expected to match" >&2
+ exit 1
+ else
+ echo "$0: timeout works"
+ fi
+}
+
+run_sysfs_main_tests()
+{
+ test_syfs_timeout
+ # Put timeout high enough for us to do work but not so long that failures
+ # slow down this test too much.
+ echo 4 >/sys/class/firmware/timeout
+
+ # Load this script instead of the desired firmware.
+ load_fw "$NAME" "$0"
+ if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not expected to match" >&2
+ exit 1
+ else
+ echo "$0: firmware comparison works"
+ fi
+
+ # Do a proper load, which should work correctly.
+ load_fw "$NAME" "$FW"
+ if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not loaded" >&2
+ exit 1
+ else
+ echo "$0: fallback mechanism works"
+ fi
+
+ load_fw_cancel "nope-$NAME" "$FW"
+ if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was expected to be cancelled" >&2
+ exit 1
+ else
+ echo "$0: cancelling fallback mechanism works"
+ fi
+
+ set +e
+ load_fw_fallback_with_child "nope-signal-$NAME" "$FW"
+ if [ "$?" -eq 0 ]; then
+ echo "$0: SIGCHLD on sync ignored as expected" >&2
+ else
+ echo "$0: error - sync firmware request cancelled due to SIGCHLD" >&2
+ exit 1
+ fi
+ set -e
+}
+
+run_sysfs_custom_load_tests()
+{
+ RANDOM_FILE_PATH=$(setup_random_file)
+ RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
+ if load_fw_custom "$RANDOM_FILE" "$RANDOM_FILE_PATH" ; then
+ if ! diff -q "$RANDOM_FILE_PATH" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not loaded" >&2
+ exit 1
+ else
+ echo "$0: custom fallback loading mechanism works"
+ fi
+ fi
+
+ RANDOM_FILE_PATH=$(setup_random_file)
+ RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
+ if load_fw_custom "$RANDOM_FILE" "$RANDOM_FILE_PATH" ; then
+ if ! diff -q "$RANDOM_FILE_PATH" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not loaded" >&2
+ exit 1
+ else
+ echo "$0: custom fallback loading mechanism works"
+ fi
+ fi
+
+ RANDOM_FILE_REAL="$RANDOM_FILE_PATH"
+ FAKE_RANDOM_FILE_PATH=$(setup_random_file_fake)
+ FAKE_RANDOM_FILE="$(basename $FAKE_RANDOM_FILE_PATH)"
+
+ if load_fw_custom_cancel "$FAKE_RANDOM_FILE" "$RANDOM_FILE_REAL" ; then
+ if diff -q "$RANDOM_FILE_PATH" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was expected to be cancelled" >&2
+ exit 1
+ else
+ echo "$0: cancelling custom fallback mechanism works"
+ fi
+ fi
+}
+
+if [ "$HAS_FW_LOADER_USER_HELPER_FALLBACK" = "yes" ]; then
+ run_sysfs_main_tests
+fi
+
+run_sysfs_custom_load_tests
+
+exit 0
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
new file mode 100755
index 000000000..a4320c4b4
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -0,0 +1,332 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# This validates that the kernel will load firmware out of its list of
+# firmware locations on disk. Since the user helper does similar work,
+# we reset the custom load directory to a location the user helper doesn't
+# know so we can be sure we're not accidentally testing the user helper.
+set -e
+
+TEST_REQS_FW_SYSFS_FALLBACK="no"
+TEST_REQS_FW_SET_CUSTOM_PATH="yes"
+TEST_DIR=$(dirname $0)
+source $TEST_DIR/fw_lib.sh
+
+check_mods
+check_setup
+verify_reqs
+setup_tmp_file
+
+trap "test_finish" EXIT
+
+if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ # Turn down the timeout so failures don't take so long.
+ echo 1 >/sys/class/firmware/timeout
+fi
+
+if printf '\000' >"$DIR"/trigger_request 2> /dev/null; then
+ echo "$0: empty filename should not succeed" >&2
+ exit 1
+fi
+
+if [ ! -e "$DIR"/trigger_async_request ]; then
+ echo "$0: empty filename: async trigger not present, ignoring test" >&2
+ exit $ksft_skip
+else
+ if printf '\000' >"$DIR"/trigger_async_request 2> /dev/null; then
+ echo "$0: empty filename should not succeed (async)" >&2
+ exit 1
+ fi
+fi
+
+# Request a firmware that doesn't exist, it should fail.
+if echo -n "nope-$NAME" >"$DIR"/trigger_request 2> /dev/null; then
+ echo "$0: firmware shouldn't have loaded" >&2
+ exit 1
+fi
+if diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not expected to match" >&2
+ exit 1
+else
+ if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ echo "$0: timeout works"
+ fi
+fi
+
+# This should succeed via kernel load or will fail after 1 second after
+# being handed over to the user helper, which won't find the fw either.
+if ! echo -n "$NAME" >"$DIR"/trigger_request ; then
+ echo "$0: could not trigger request" >&2
+ exit 1
+fi
+
+# Verify the contents are what we expect.
+if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not loaded" >&2
+ exit 1
+else
+ echo "$0: filesystem loading works"
+fi
+
+# Try the asynchronous version too
+if [ ! -e "$DIR"/trigger_async_request ]; then
+ echo "$0: firmware loading: async trigger not present, ignoring test" >&2
+ exit $ksft_skip
+else
+ if ! echo -n "$NAME" >"$DIR"/trigger_async_request ; then
+ echo "$0: could not trigger async request" >&2
+ exit 1
+ fi
+
+ # Verify the contents are what we expect.
+ if ! diff -q "$FW" /dev/test_firmware >/dev/null ; then
+ echo "$0: firmware was not loaded (async)" >&2
+ exit 1
+ else
+ echo "$0: async filesystem loading works"
+ fi
+fi
+
+### Batched requests tests
+test_config_present()
+{
+ if [ ! -f $DIR/reset ]; then
+ echo "Configuration triggers not present, ignoring test"
+ exit $ksft_skip
+ fi
+}
+
+# Defaults :
+#
+# send_uevent: 1
+# sync_direct: 0
+# name: test-firmware.bin
+# num_requests: 4
+config_reset()
+{
+ echo 1 > $DIR/reset
+}
+
+release_all_firmware()
+{
+ echo 1 > $DIR/release_all_firmware
+}
+
+config_set_name()
+{
+ echo -n $1 > $DIR/config_name
+}
+
+config_set_sync_direct()
+{
+ echo 1 > $DIR/config_sync_direct
+}
+
+config_unset_sync_direct()
+{
+ echo 0 > $DIR/config_sync_direct
+}
+
+config_set_uevent()
+{
+ echo 1 > $DIR/config_send_uevent
+}
+
+config_unset_uevent()
+{
+ echo 0 > $DIR/config_send_uevent
+}
+
+config_trigger_sync()
+{
+ echo -n 1 > $DIR/trigger_batched_requests 2>/dev/null
+}
+
+config_trigger_async()
+{
+ echo -n 1 > $DIR/trigger_batched_requests_async 2> /dev/null
+}
+
+config_set_read_fw_idx()
+{
+ echo -n $1 > $DIR/config_read_fw_idx 2> /dev/null
+}
+
+read_firmwares()
+{
+ for i in $(seq 0 3); do
+ config_set_read_fw_idx $i
+ # Verify the contents are what we expect.
+ # -Z required for now -- check for yourself, md5sum
+ # on $FW and DIR/read_firmware will yield the same. Even
+ # cmp agrees, so something is off.
+ if ! diff -q -Z "$FW" $DIR/read_firmware 2>/dev/null ; then
+ echo "request #$i: firmware was not loaded" >&2
+ exit 1
+ fi
+ done
+}
+
+read_firmwares_expect_nofile()
+{
+ for i in $(seq 0 3); do
+ config_set_read_fw_idx $i
+ # Ensures contents differ
+ if diff -q -Z "$FW" $DIR/read_firmware 2>/dev/null ; then
+ echo "request $i: file was not expected to match" >&2
+ exit 1
+ fi
+ done
+}
+
+test_batched_request_firmware_nofile()
+{
+ echo -n "Batched request_firmware() nofile try #$1: "
+ config_reset
+ config_set_name nope-test-firmware.bin
+ config_trigger_sync
+ read_firmwares_expect_nofile
+ release_all_firmware
+ echo "OK"
+}
+
+test_batched_request_firmware_direct_nofile()
+{
+ echo -n "Batched request_firmware_direct() nofile try #$1: "
+ config_reset
+ config_set_name nope-test-firmware.bin
+ config_set_sync_direct
+ config_trigger_sync
+ release_all_firmware
+ echo "OK"
+}
+
+test_request_firmware_nowait_uevent_nofile()
+{
+ echo -n "Batched request_firmware_nowait(uevent=true) nofile try #$1: "
+ config_reset
+ config_set_name nope-test-firmware.bin
+ config_trigger_async
+ release_all_firmware
+ echo "OK"
+}
+
+test_wait_and_cancel_custom_load()
+{
+ if [ "$HAS_FW_LOADER_USER_HELPER" != "yes" ]; then
+ return
+ fi
+ local timeout=10
+ name=$1
+ while [ ! -e "$DIR"/"$name"/loading ]; do
+ sleep 0.1
+ timeout=$(( $timeout - 1 ))
+ if [ "$timeout" -eq 0 ]; then
+ echo "firmware interface never appeared:" >&2
+ echo "$DIR/$name/loading" >&2
+ exit 1
+ fi
+ done
+ echo -1 >"$DIR"/"$name"/loading
+}
+
+test_request_firmware_nowait_custom_nofile()
+{
+ echo -n "Batched request_firmware_nowait(uevent=false) nofile try #$1: "
+ config_reset
+ config_unset_uevent
+ RANDOM_FILE_PATH=$(setup_random_file_fake)
+ RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
+ config_set_name $RANDOM_FILE
+ config_trigger_async &
+ test_wait_and_cancel_custom_load $RANDOM_FILE
+ wait
+ release_all_firmware
+ echo "OK"
+}
+
+test_batched_request_firmware()
+{
+ echo -n "Batched request_firmware() try #$1: "
+ config_reset
+ config_trigger_sync
+ read_firmwares
+ release_all_firmware
+ echo "OK"
+}
+
+test_batched_request_firmware_direct()
+{
+ echo -n "Batched request_firmware_direct() try #$1: "
+ config_reset
+ config_set_sync_direct
+ config_trigger_sync
+ release_all_firmware
+ echo "OK"
+}
+
+test_request_firmware_nowait_uevent()
+{
+ echo -n "Batched request_firmware_nowait(uevent=true) try #$1: "
+ config_reset
+ config_trigger_async
+ release_all_firmware
+ echo "OK"
+}
+
+test_request_firmware_nowait_custom()
+{
+ echo -n "Batched request_firmware_nowait(uevent=false) try #$1: "
+ config_reset
+ config_unset_uevent
+ RANDOM_FILE_PATH=$(setup_random_file)
+ RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
+ config_set_name $RANDOM_FILE
+ config_trigger_async
+ release_all_firmware
+ echo "OK"
+}
+
+# Only continue if batched request triggers are present on the
+# test-firmware driver
+test_config_present
+
+# test with the file present
+echo
+echo "Testing with the file present..."
+for i in $(seq 1 5); do
+ test_batched_request_firmware $i
+done
+
+for i in $(seq 1 5); do
+ test_batched_request_firmware_direct $i
+done
+
+for i in $(seq 1 5); do
+ test_request_firmware_nowait_uevent $i
+done
+
+for i in $(seq 1 5); do
+ test_request_firmware_nowait_custom $i
+done
+
+# Test for file not found, errors are expected, the failure would be
+# a hung task, which would require a hard reset.
+echo
+echo "Testing with the file missing..."
+for i in $(seq 1 5); do
+ test_batched_request_firmware_nofile $i
+done
+
+for i in $(seq 1 5); do
+ test_batched_request_firmware_direct_nofile $i
+done
+
+for i in $(seq 1 5); do
+ test_request_firmware_nowait_uevent_nofile $i
+done
+
+for i in $(seq 1 5); do
+ test_request_firmware_nowait_custom_nofile $i
+done
+
+exit 0
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
new file mode 100755
index 000000000..8a853ace5
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Library of helpers for test scripts.
+set -e
+
+DIR=/sys/devices/virtual/misc/test_firmware
+
+PROC_CONFIG="/proc/config.gz"
+TEST_DIR=$(dirname $0)
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+print_reqs_exit()
+{
+ echo "You must have the following enabled in your kernel:" >&2
+ cat $TEST_DIR/config >&2
+ exit $ksft_skip
+}
+
+test_modprobe()
+{
+ if [ ! -d $DIR ]; then
+ print_reqs_exit
+ fi
+}
+
+check_mods()
+{
+ local uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo "skip all tests: must be run as root" >&2
+ exit $ksft_skip
+ fi
+
+ trap "test_modprobe" EXIT
+ if [ ! -d $DIR ]; then
+ modprobe test_firmware
+ fi
+ if [ ! -f $PROC_CONFIG ]; then
+ if modprobe configs 2>/dev/null; then
+ echo "Loaded configs module"
+ if [ ! -f $PROC_CONFIG ]; then
+ echo "You must have the following enabled in your kernel:" >&2
+ cat $TEST_DIR/config >&2
+ echo "Resorting to old heuristics" >&2
+ fi
+ else
+ echo "Failed to load configs module, using old heuristics" >&2
+ fi
+ fi
+}
+
+check_setup()
+{
+ HAS_FW_LOADER_USER_HELPER="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER=y)"
+ HAS_FW_LOADER_USER_HELPER_FALLBACK="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y)"
+ PROC_FW_IGNORE_SYSFS_FALLBACK="0"
+ PROC_FW_FORCE_SYSFS_FALLBACK="0"
+
+ if [ -z $PROC_SYS_DIR ]; then
+ PROC_SYS_DIR="/proc/sys/kernel"
+ fi
+
+ FW_PROC="${PROC_SYS_DIR}/firmware_config"
+ FW_FORCE_SYSFS_FALLBACK="$FW_PROC/force_sysfs_fallback"
+ FW_IGNORE_SYSFS_FALLBACK="$FW_PROC/ignore_sysfs_fallback"
+
+ if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
+ PROC_FW_FORCE_SYSFS_FALLBACK="$(cat $FW_FORCE_SYSFS_FALLBACK)"
+ fi
+
+ if [ -f $FW_IGNORE_SYSFS_FALLBACK ]; then
+ PROC_FW_IGNORE_SYSFS_FALLBACK="$(cat $FW_IGNORE_SYSFS_FALLBACK)"
+ fi
+
+ if [ "$PROC_FW_FORCE_SYSFS_FALLBACK" = "1" ]; then
+ HAS_FW_LOADER_USER_HELPER="yes"
+ HAS_FW_LOADER_USER_HELPER_FALLBACK="yes"
+ fi
+
+ if [ "$PROC_FW_IGNORE_SYSFS_FALLBACK" = "1" ]; then
+ HAS_FW_LOADER_USER_HELPER_FALLBACK="no"
+ HAS_FW_LOADER_USER_HELPER="no"
+ fi
+
+ if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ OLD_TIMEOUT="$(cat /sys/class/firmware/timeout)"
+ fi
+
+ OLD_FWPATH="$(cat /sys/module/firmware_class/parameters/path)"
+}
+
+verify_reqs()
+{
+ if [ "$TEST_REQS_FW_SYSFS_FALLBACK" = "yes" ]; then
+ if [ ! "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ echo "usermode helper disabled so ignoring test"
+ exit 0
+ fi
+ fi
+}
+
+setup_tmp_file()
+{
+ FWPATH=$(mktemp -d)
+ FW="$FWPATH/test-firmware.bin"
+ echo "ABCD0123" >"$FW"
+ NAME=$(basename "$FW")
+ if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then
+ echo -n "$FWPATH" >/sys/module/firmware_class/parameters/path
+ fi
+}
+
+__setup_random_file()
+{
+ RANDOM_FILE_PATH="$(mktemp -p $FWPATH)"
+ # mktemp says dry-run -n is unsafe, so...
+ if [[ "$1" = "fake" ]]; then
+ rm -rf $RANDOM_FILE_PATH
+ sync
+ else
+ echo "ABCD0123" >"$RANDOM_FILE_PATH"
+ fi
+ echo $RANDOM_FILE_PATH
+}
+
+setup_random_file()
+{
+ echo $(__setup_random_file)
+}
+
+setup_random_file_fake()
+{
+ echo $(__setup_random_file fake)
+}
+
+proc_set_force_sysfs_fallback()
+{
+ if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
+ echo -n $1 > $FW_FORCE_SYSFS_FALLBACK
+ check_setup
+ fi
+}
+
+proc_set_ignore_sysfs_fallback()
+{
+ if [ -f $FW_IGNORE_SYSFS_FALLBACK ]; then
+ echo -n $1 > $FW_IGNORE_SYSFS_FALLBACK
+ check_setup
+ fi
+}
+
+proc_restore_defaults()
+{
+ proc_set_force_sysfs_fallback 0
+ proc_set_ignore_sysfs_fallback 0
+}
+
+test_finish()
+{
+ if [ "$HAS_FW_LOADER_USER_HELPER" = "yes" ]; then
+ echo "$OLD_TIMEOUT" >/sys/class/firmware/timeout
+ fi
+ if [ "$TEST_REQS_FW_SET_CUSTOM_PATH" = "yes" ]; then
+ if [ "$OLD_FWPATH" = "" ]; then
+ # A zero-length write won't work; write a null byte
+ printf '\000' >/sys/module/firmware_class/parameters/path
+ else
+ echo -n "$OLD_FWPATH" >/sys/module/firmware_class/parameters/path
+ fi
+ fi
+ if [ -f $FW ]; then
+ rm -f "$FW"
+ fi
+ if [ -d $FWPATH ]; then
+ rm -rf "$FWPATH"
+ fi
+ proc_restore_defaults
+}
+
+kconfig_has()
+{
+ if [ -f $PROC_CONFIG ]; then
+ if zgrep -q $1 $PROC_CONFIG 2>/dev/null; then
+ echo "yes"
+ else
+ echo "no"
+ fi
+ else
+ # We currently don't have easy heuristics to infer this
+ # so best we can do is just try to use the kernel assuming
+ # you had enabled it. This matches the old behaviour.
+ if [ "$1" = "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y" ]; then
+ echo "yes"
+ elif [ "$1" = "CONFIG_FW_LOADER_USER_HELPER=y" ]; then
+ if [ -d /sys/class/firmware/ ]; then
+ echo yes
+ else
+ echo no
+ fi
+ fi
+ fi
+}
diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh
new file mode 100755
index 000000000..cffdd4eb0
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_run_tests.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This runs all known tests across all known possible configurations we could
+# emulate in one run.
+
+set -e
+
+TEST_DIR=$(dirname $0)
+source $TEST_DIR/fw_lib.sh
+
+export HAS_FW_LOADER_USER_HELPER=""
+export HAS_FW_LOADER_USER_HELPER_FALLBACK=""
+
+run_tests()
+{
+ proc_set_force_sysfs_fallback $1
+ proc_set_ignore_sysfs_fallback $2
+ $TEST_DIR/fw_filesystem.sh
+
+ proc_set_force_sysfs_fallback $1
+ proc_set_ignore_sysfs_fallback $2
+ $TEST_DIR/fw_fallback.sh
+}
+
+run_test_config_0001()
+{
+ echo "-----------------------------------------------------"
+ echo "Running kernel configuration test 1 -- rare"
+ echo "Emulates:"
+ echo "CONFIG_FW_LOADER=y"
+ echo "CONFIG_FW_LOADER_USER_HELPER=n"
+ echo "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n"
+ run_tests 0 1
+}
+
+run_test_config_0002()
+{
+ echo "-----------------------------------------------------"
+ echo "Running kernel configuration test 2 -- distro"
+ echo "Emulates:"
+ echo "CONFIG_FW_LOADER=y"
+ echo "CONFIG_FW_LOADER_USER_HELPER=y"
+ echo "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=n"
+ proc_set_ignore_sysfs_fallback 0
+ run_tests 0 0
+}
+
+run_test_config_0003()
+{
+ echo "-----------------------------------------------------"
+ echo "Running kernel configuration test 3 -- android"
+ echo "Emulates:"
+ echo "CONFIG_FW_LOADER=y"
+ echo "CONFIG_FW_LOADER_USER_HELPER=y"
+ echo "CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y"
+ run_tests 1 0
+}
+
+check_mods
+check_setup
+
+if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
+ run_test_config_0001
+ run_test_config_0002
+ run_test_config_0003
+else
+ echo "Running basic kernel configuration, working with your config"
+ run_tests
+fi
diff --git a/tools/testing/selftests/ftrace/.gitignore b/tools/testing/selftests/ftrace/.gitignore
new file mode 100644
index 000000000..98d8a5a63
--- /dev/null
+++ b/tools/testing/selftests/ftrace/.gitignore
@@ -0,0 +1 @@
+logs
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
new file mode 100644
index 000000000..cd1f5b3a7
--- /dev/null
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := ftracetest
+TEST_FILES := test.d
+EXTRA_CLEAN := $(OUTPUT)/logs/*
+
+include ../lib.mk
diff --git a/tools/testing/selftests/ftrace/README b/tools/testing/selftests/ftrace/README
new file mode 100644
index 000000000..182e76fa4
--- /dev/null
+++ b/tools/testing/selftests/ftrace/README
@@ -0,0 +1,82 @@
+Linux Ftrace Testcases
+
+This is a collection of testcases for ftrace tracing feature in the Linux
+kernel. Since ftrace exports interfaces via the debugfs, we just need
+shell scripts for testing. Feel free to add new test cases.
+
+Running the ftrace testcases
+============================
+
+At first, you need to be the root user to run this script.
+To run all testcases:
+
+ $ sudo ./ftracetest
+
+To run specific testcases:
+
+ # ./ftracetest test.d/basic3.tc
+
+Or you can also run testcases under given directory:
+
+ # ./ftracetest test.d/kprobe/
+
+Contributing new testcases
+==========================
+
+Copy test.d/template to your testcase (whose filename must have *.tc
+extension) and rewrite the test description line.
+
+ * The working directory of the script is <debugfs>/tracing/.
+
+ * Take care with side effects as the tests are run with root privilege.
+
+ * The tests should not run for a long period of time (more than 1 min.)
+ These are to be unit tests.
+
+ * You can add a directory for your testcases under test.d/ if needed.
+
+ * The test cases should run on dash (busybox shell) for testing on
+ minimal cross-build environments.
+
+ * Note that the tests are run with "set -e" (errexit) option. If any
+ command fails, the test will be terminated immediately.
+
+ * The tests can return some result codes instead of pass or fail by
+ using exit_unresolved, exit_untested, exit_unsupported and exit_xfail.
+
+Result code
+===========
+
+Ftracetest supports following result codes.
+
+ * PASS: The test succeeded as expected. The test which exits with 0 is
+ counted as passed test.
+
+ * FAIL: The test failed, but was expected to succeed. The test which exits
+ with !0 is counted as failed test.
+
+ * UNRESOLVED: The test produced unclear or intermidiate results.
+ for example, the test was interrupted
+ or the test depends on a previous test, which failed.
+ or the test was set up incorrectly
+ The test which is in above situation, must call exit_unresolved.
+
+ * UNTESTED: The test was not run, currently just a placeholder.
+ In this case, the test must call exit_untested.
+
+ * UNSUPPORTED: The test failed because of lack of feature.
+ In this case, the test must call exit_unsupported.
+
+ * XFAIL: The test failed, and was expected to fail.
+ To return XFAIL, call exit_xfail from the test.
+
+There are some sample test scripts for result code under samples/.
+You can also run samples as below:
+
+ # ./ftracetest samples/
+
+TODO
+====
+
+ * Fancy colored output :)
+
diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config
new file mode 100644
index 000000000..07db5ab09
--- /dev/null
+++ b/tools/testing/selftests/ftrace/config
@@ -0,0 +1,9 @@
+CONFIG_KPROBES=y
+CONFIG_FTRACE=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_STACK_TRACER=y
+CONFIG_HIST_TRIGGERS=y
+CONFIG_PREEMPT_TRACER=y
+CONFIG_IRQSOFF_TRACER=y
+CONFIG_PREEMPTIRQ_DELAY_TEST=m
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
new file mode 100755
index 000000000..f9a9d424c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -0,0 +1,330 @@
+#!/bin/sh
+
+# ftracetest - Ftrace test shell scripts
+#
+# Copyright (C) Hitachi Ltd., 2014
+# Written by Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+usage() { # errno [message]
+[ ! -z "$2" ] && echo $2
+echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]"
+echo " Options:"
+echo " -h|--help Show help message"
+echo " -k|--keep Keep passed test logs"
+echo " -v|--verbose Increase verbosity of test messages"
+echo " -vv Alias of -v -v (Show all results in stdout)"
+echo " -vvv Alias of -v -v -v (Show all commands immediately)"
+echo " --fail-unsupported Treat UNSUPPORTED as a failure"
+echo " -d|--debug Debug mode (trace all shell commands)"
+echo " -l|--logdir <dir> Save logs on the <dir>"
+echo " If <dir> is -, all logs output in console only"
+exit $1
+}
+
+errexit() { # message
+ echo "Error: $1" 1>&2
+ exit 1
+}
+
+# Ensuring user privilege
+if [ `id -u` -ne 0 ]; then
+ errexit "this must be run by root user"
+fi
+
+# Utilities
+absdir() { # file_path
+ (cd `dirname $1`; pwd)
+}
+
+abspath() {
+ echo `absdir $1`/`basename $1`
+}
+
+find_testcases() { #directory
+ echo `find $1 -name \*.tc | sort`
+}
+
+parse_opts() { # opts
+ local OPT_TEST_CASES=
+ local OPT_TEST_DIR=
+
+ while [ ! -z "$1" ]; do
+ case "$1" in
+ --help|-h)
+ usage 0
+ ;;
+ --keep|-k)
+ KEEP_LOG=1
+ shift 1
+ ;;
+ --verbose|-v|-vv|-vvv)
+ VERBOSE=$((VERBOSE + 1))
+ [ $1 = '-vv' ] && VERBOSE=$((VERBOSE + 1))
+ [ $1 = '-vvv' ] && VERBOSE=$((VERBOSE + 2))
+ shift 1
+ ;;
+ --debug|-d)
+ DEBUG=1
+ shift 1
+ ;;
+ --fail-unsupported)
+ UNSUPPORTED_RESULT=1
+ shift 1
+ ;;
+ --logdir|-l)
+ LOG_DIR=$2
+ shift 2
+ ;;
+ *.tc)
+ if [ -f "$1" ]; then
+ OPT_TEST_CASES="$OPT_TEST_CASES `abspath $1`"
+ shift 1
+ else
+ usage 1 "$1 is not a testcase"
+ fi
+ ;;
+ *)
+ if [ -d "$1" ]; then
+ OPT_TEST_DIR=`abspath $1`
+ OPT_TEST_CASES="$OPT_TEST_CASES `find_testcases $OPT_TEST_DIR`"
+ shift 1
+ else
+ usage 1 "Invalid option ($1)"
+ fi
+ ;;
+ esac
+ done
+ if [ ! -z "$OPT_TEST_CASES" ]; then
+ TEST_CASES=$OPT_TEST_CASES
+ fi
+}
+
+# Parameters
+DEBUGFS_DIR=`grep debugfs /proc/mounts | cut -f2 -d' ' | head -1`
+if [ -z "$DEBUGFS_DIR" ]; then
+ TRACING_DIR=`grep tracefs /proc/mounts | cut -f2 -d' ' | head -1`
+else
+ TRACING_DIR=$DEBUGFS_DIR/tracing
+fi
+
+TOP_DIR=`absdir $0`
+TEST_DIR=$TOP_DIR/test.d
+TEST_CASES=`find_testcases $TEST_DIR`
+LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/
+KEEP_LOG=0
+DEBUG=0
+VERBOSE=0
+UNSUPPORTED_RESULT=0
+# Parse command-line options
+parse_opts $*
+
+[ $DEBUG -ne 0 ] && set -x
+
+# Verify parameters
+if [ -z "$TRACING_DIR" -o ! -d "$TRACING_DIR" ]; then
+ errexit "No ftrace directory found"
+fi
+
+# Preparing logs
+if [ "x$LOG_DIR" = "x-" ]; then
+ LOG_FILE=
+ date
+else
+ LOG_FILE=$LOG_DIR/ftracetest.log
+ mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
+ date > $LOG_FILE
+fi
+
+prlog() { # messages
+ [ -z "$LOG_FILE" ] && echo "$@" || echo "$@" | tee -a $LOG_FILE
+}
+catlog() { #file
+ [ -z "$LOG_FILE" ] && cat $1 || cat $1 | tee -a $LOG_FILE
+}
+prlog "=== Ftrace unit tests ==="
+
+
+# Testcase management
+# Test result codes - Dejagnu extended code
+PASS=0 # The test succeeded.
+FAIL=1 # The test failed, but was expected to succeed.
+UNRESOLVED=2 # The test produced indeterminate results. (e.g. interrupted)
+UNTESTED=3 # The test was not run, currently just a placeholder.
+UNSUPPORTED=4 # The test failed because of lack of feature.
+XFAIL=5 # The test failed, and was expected to fail.
+
+# Accumulations
+PASSED_CASES=
+FAILED_CASES=
+UNRESOLVED_CASES=
+UNTESTED_CASES=
+UNSUPPORTED_CASES=
+XFAILED_CASES=
+UNDEFINED_CASES=
+TOTAL_RESULT=0
+
+INSTANCE=
+CASENO=0
+testcase() { # testfile
+ CASENO=$((CASENO+1))
+ desc=`grep "^#[ \t]*description:" $1 | cut -f2 -d:`
+ prlog -n "[$CASENO]$INSTANCE$desc"
+}
+
+test_on_instance() { # testfile
+ grep -q "^#[ \t]*flags:.*instance" $1
+}
+
+eval_result() { # sigval
+ case $1 in
+ $PASS)
+ prlog " [PASS]"
+ PASSED_CASES="$PASSED_CASES $CASENO"
+ return 0
+ ;;
+ $FAIL)
+ prlog " [FAIL]"
+ FAILED_CASES="$FAILED_CASES $CASENO"
+ return 1 # this is a bug.
+ ;;
+ $UNRESOLVED)
+ prlog " [UNRESOLVED]"
+ UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO"
+ return 1 # this is a kind of bug.. something happened.
+ ;;
+ $UNTESTED)
+ prlog " [UNTESTED]"
+ UNTESTED_CASES="$UNTESTED_CASES $CASENO"
+ return 0
+ ;;
+ $UNSUPPORTED)
+ prlog " [UNSUPPORTED]"
+ UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
+ return $UNSUPPORTED_RESULT # depends on use case
+ ;;
+ $XFAIL)
+ prlog " [XFAIL]"
+ XFAILED_CASES="$XFAILED_CASES $CASENO"
+ return 0
+ ;;
+ *)
+ prlog " [UNDEFINED]"
+ UNDEFINED_CASES="$UNDEFINED_CASES $CASENO"
+ return 1 # this must be a test bug
+ ;;
+ esac
+}
+
+# Signal handling for result codes
+SIG_RESULT=
+SIG_BASE=36 # Use realtime signals
+SIG_PID=$$
+
+exit_pass () {
+ exit 0
+}
+
+SIG_FAIL=$((SIG_BASE + FAIL))
+exit_fail () {
+ exit 1
+}
+trap 'SIG_RESULT=$FAIL' $SIG_FAIL
+
+SIG_UNRESOLVED=$((SIG_BASE + UNRESOLVED))
+exit_unresolved () {
+ kill -s $SIG_UNRESOLVED $SIG_PID
+ exit 0
+}
+trap 'SIG_RESULT=$UNRESOLVED' $SIG_UNRESOLVED
+
+SIG_UNTESTED=$((SIG_BASE + UNTESTED))
+exit_untested () {
+ kill -s $SIG_UNTESTED $SIG_PID
+ exit 0
+}
+trap 'SIG_RESULT=$UNTESTED' $SIG_UNTESTED
+
+SIG_UNSUPPORTED=$((SIG_BASE + UNSUPPORTED))
+exit_unsupported () {
+ kill -s $SIG_UNSUPPORTED $SIG_PID
+ exit 0
+}
+trap 'SIG_RESULT=$UNSUPPORTED' $SIG_UNSUPPORTED
+
+SIG_XFAIL=$((SIG_BASE + XFAIL))
+exit_xfail () {
+ kill -s $SIG_XFAIL $SIG_PID
+ exit 0
+}
+trap 'SIG_RESULT=$XFAIL' $SIG_XFAIL
+
+__run_test() { # testfile
+ # setup PID and PPID, $$ is not updated.
+ (cd $TRACING_DIR; read PID _ < /proc/self/stat; set -e; set -x; initialize_ftrace; . $1)
+ [ $? -ne 0 ] && kill -s $SIG_FAIL $SIG_PID
+}
+
+# Run one test case
+run_test() { # testfile
+ local testname=`basename $1`
+ if [ ! -z "$LOG_FILE" ] ; then
+ local testlog=`mktemp $LOG_DIR/${testname}-log.XXXXXX`
+ else
+ local testlog=/proc/self/fd/1
+ fi
+ export TMPDIR=`mktemp -d /tmp/ftracetest-dir.XXXXXX`
+ testcase $1
+ echo "execute$INSTANCE: "$1 > $testlog
+ SIG_RESULT=0
+ if [ -z "$LOG_FILE" ]; then
+ __run_test $1 2>&1
+ elif [ $VERBOSE -ge 3 ]; then
+ __run_test $1 | tee -a $testlog 2>&1
+ elif [ $VERBOSE -eq 2 ]; then
+ __run_test $1 2>> $testlog | tee -a $testlog
+ else
+ __run_test $1 >> $testlog 2>&1
+ fi
+ eval_result $SIG_RESULT
+ if [ $? -eq 0 ]; then
+ # Remove test log if the test was done as it was expected.
+ [ $KEEP_LOG -eq 0 -a ! -z "$LOG_FILE" ] && rm $testlog
+ else
+ [ $VERBOSE -eq 1 -o $VERBOSE -eq 2 ] && catlog $testlog
+ TOTAL_RESULT=1
+ fi
+ rm -rf $TMPDIR
+}
+
+# load in the helper functions
+. $TEST_DIR/functions
+
+# Main loop
+for t in $TEST_CASES; do
+ run_test $t
+done
+
+# Test on instance loop
+INSTANCE=" (instance) "
+for t in $TEST_CASES; do
+ test_on_instance $t || continue
+ SAVED_TRACING_DIR=$TRACING_DIR
+ export TRACING_DIR=`mktemp -d $TRACING_DIR/instances/ftracetest.XXXXXX`
+ run_test $t
+ rmdir $TRACING_DIR
+ TRACING_DIR=$SAVED_TRACING_DIR
+done
+
+prlog ""
+prlog "# of passed: " `echo $PASSED_CASES | wc -w`
+prlog "# of failed: " `echo $FAILED_CASES | wc -w`
+prlog "# of unresolved: " `echo $UNRESOLVED_CASES | wc -w`
+prlog "# of untested: " `echo $UNTESTED_CASES | wc -w`
+prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w`
+prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w`
+prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w`
+
+# if no error, return 0
+exit $TOTAL_RESULT
diff --git a/tools/testing/selftests/ftrace/samples/fail.tc b/tools/testing/selftests/ftrace/samples/fail.tc
new file mode 100644
index 000000000..15e35b956
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/fail.tc
@@ -0,0 +1,4 @@
+#!/bin/sh
+# description: failure-case example
+cat non-exist-file
+echo "this is not executed"
diff --git a/tools/testing/selftests/ftrace/samples/pass.tc b/tools/testing/selftests/ftrace/samples/pass.tc
new file mode 100644
index 000000000..d01549370
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/pass.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: pass-case example
+return 0
diff --git a/tools/testing/selftests/ftrace/samples/unresolved.tc b/tools/testing/selftests/ftrace/samples/unresolved.tc
new file mode 100644
index 000000000..41e99d335
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/unresolved.tc
@@ -0,0 +1,4 @@
+#!/bin/sh
+# description: unresolved-case example
+trap exit_unresolved INT
+kill -INT $PID
diff --git a/tools/testing/selftests/ftrace/samples/unsupported.tc b/tools/testing/selftests/ftrace/samples/unsupported.tc
new file mode 100644
index 000000000..45910ff13
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/unsupported.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: unsupported-case example
+exit_unsupported
diff --git a/tools/testing/selftests/ftrace/samples/untested.tc b/tools/testing/selftests/ftrace/samples/untested.tc
new file mode 100644
index 000000000..35a45946e
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/untested.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: untested-case example
+exit_untested
diff --git a/tools/testing/selftests/ftrace/samples/xfail.tc b/tools/testing/selftests/ftrace/samples/xfail.tc
new file mode 100644
index 000000000..9dd395323
--- /dev/null
+++ b/tools/testing/selftests/ftrace/samples/xfail.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: xfail-case example
+cat non-exist-file || exit_xfail
diff --git a/tools/testing/selftests/ftrace/settings b/tools/testing/selftests/ftrace/settings
new file mode 100644
index 000000000..e7b941753
--- /dev/null
+++ b/tools/testing/selftests/ftrace/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic1.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic1.tc
new file mode 100644
index 000000000..9980ff14a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic1.tc
@@ -0,0 +1,3 @@
+#!/bin/sh
+# description: Basic trace file check
+test -f README -a -f trace -a -f tracing_on -a -f trace_pipe
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc
new file mode 100644
index 000000000..531e47236
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic2.tc
@@ -0,0 +1,9 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Basic test for tracers
+# flags: instance
+test -f available_tracers
+for t in `cat available_tracers`; do
+ echo $t > current_tracer
+done
+echo nop > current_tracer
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc
new file mode 100644
index 000000000..58a2506f7
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic3.tc
@@ -0,0 +1,10 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Basic trace clock test
+# flags: instance
+test -f trace_clock
+for c in `cat trace_clock | tr -d \[\]`; do
+ echo $c > trace_clock
+ grep '\['$c'\]' trace_clock
+done
+echo local > trace_clock
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc
new file mode 100644
index 000000000..0696098d6
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc
@@ -0,0 +1,5 @@
+#!/bin/sh
+# description: Basic event tracing check
+test -f available_events -a -f set_event -a -d events
+# check scheduler events are available
+grep -q sched available_events && exit_pass || exit_fail
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
new file mode 100644
index 000000000..3b1f45e13
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot.tc
@@ -0,0 +1,28 @@
+#!/bin/sh
+# description: Snapshot and tracing setting
+# flags: instance
+
+[ ! -f snapshot ] && exit_unsupported
+
+echo "Set tracing off"
+echo 0 > tracing_on
+
+echo "Allocate and take a snapshot"
+echo 1 > snapshot
+
+# Since trace buffer is empty, snapshot is also empty, but allocated
+grep -q "Snapshot is allocated" snapshot
+
+echo "Ensure keep tracing off"
+test `cat tracing_on` -eq 0
+
+echo "Set tracing on"
+echo 1 > tracing_on
+
+echo "Take a snapshot again"
+echo 1 > snapshot
+
+echo "Ensure keep tracing on"
+test `cat tracing_on` -eq 1
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
new file mode 100644
index 000000000..9daf03418
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - enable/disable with event level files
+# flags: instance
+
+do_reset() {
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+yield() {
+ ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo 'sched:sched_switch' > set_event
+
+yield
+
+count=`cat trace | grep sched_switch | wc -l`
+if [ $count -eq 0 ]; then
+ fail "sched_switch events are not recorded"
+fi
+
+do_reset
+
+echo 1 > events/sched/sched_switch/enable
+
+yield
+
+count=`cat trace | grep sched_switch | wc -l`
+if [ $count -eq 0 ]; then
+ fail "sched_switch events are not recorded"
+fi
+
+do_reset
+
+echo 0 > events/sched/sched_switch/enable
+
+yield
+
+count=`cat trace | grep sched_switch | wc -l`
+if [ $count -ne 0 ]; then
+ fail "sched_switch events should not be recorded"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
new file mode 100644
index 000000000..132478b30
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/event-pid.tc
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - restricts events based on pid
+# flags: instance
+
+do_reset() {
+ echo > set_event
+ echo > set_event_pid
+ echo 0 > options/event-fork
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+yield() {
+ ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f set_event_pid ]; then
+ echo "event pid filtering is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo 1 > events/sched/sched_switch/enable
+
+yield
+
+count=`cat trace | grep sched_switch | wc -l`
+if [ $count -eq 0 ]; then
+ fail "sched_switch events are not recorded"
+fi
+
+do_reset
+
+read mypid rest < /proc/self/stat
+
+echo $mypid > set_event_pid
+echo 'sched:sched_switch' > set_event
+
+yield
+
+count=`cat trace | grep sched_switch | grep -v "pid=$mypid" | wc -l`
+if [ $count -ne 0 ]; then
+ fail "sched_switch events from other task are recorded"
+fi
+
+do_reset
+
+echo $mypid > set_event_pid
+echo 1 > options/event-fork
+echo 1 > events/sched/sched_switch/enable
+
+yield
+
+count=`cat trace | grep sched_switch | grep -v "pid=$mypid" | wc -l`
+if [ $count -eq 0 ]; then
+ fail "sched_switch events from other task are not recorded"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
new file mode 100644
index 000000000..6a37a8642
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - enable/disable with subsystem level files
+# flags: instance
+
+do_reset() {
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+yield() {
+ ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo 'sched:*' > set_event
+
+yield
+
+count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+if [ $count -lt 3 ]; then
+ fail "at least fork, exec and exit events should be recorded"
+fi
+
+do_reset
+
+echo 1 > events/sched/enable
+
+yield
+
+count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+if [ $count -lt 3 ]; then
+ fail "at least fork, exec and exit events should be recorded"
+fi
+
+do_reset
+
+echo 0 > events/sched/enable
+
+yield
+
+count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l`
+if [ $count -ne 0 ]; then
+ fail "any of scheduler events should not be recorded"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
new file mode 100644
index 000000000..4e9b6e2c0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc
@@ -0,0 +1,65 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - enable/disable with top level files
+
+do_reset() {
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+yield() {
+ ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo '*:*' > set_event
+
+yield
+
+echo 0 > tracing_on
+
+count=`head -n 128 trace | grep -v ^# | wc -l`
+if [ $count -eq 0 ]; then
+ fail "none of events are recorded"
+fi
+
+do_reset
+
+echo 1 > events/enable
+echo 1 > tracing_on
+
+yield
+
+echo 0 > tracing_on
+count=`head -n 128 trace | grep -v ^# | wc -l`
+if [ $count -eq 0 ]; then
+ fail "none of events are recorded"
+fi
+
+do_reset
+
+echo 0 > events/enable
+
+yield
+
+count=`cat trace | grep -v ^# | wc -l`
+if [ $count -ne 0 ]; then
+ fail "any of events should not be recorded"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
new file mode 100644
index 000000000..1aec99d10
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
@@ -0,0 +1,92 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph filters with stack tracer
+
+# Make sure that function graph filtering works, and is not
+# affected by other tracers enabled (like stack tracer)
+
+if ! grep -q function_graph available_tracers; then
+ echo "no function graph tracer configured"
+ exit_unsupported
+fi
+
+if [ ! -f set_ftrace_filter ]; then
+ echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+ exit_unsupported
+fi
+
+do_reset() {
+ reset_tracer
+ if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then
+ echo 0 > /proc/sys/kernel/stack_tracer_enabled
+ fi
+ enable_tracing
+ clear_trace
+ echo > set_ftrace_filter
+}
+
+fail() { # msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+disable_tracing
+clear_trace;
+
+# filter something, schedule is always good
+if ! echo "schedule" > set_ftrace_filter; then
+ # test for powerpc 64
+ if ! echo ".schedule" > set_ftrace_filter; then
+ fail "can not enable schedule filter"
+ fi
+fi
+
+echo function_graph > current_tracer
+
+if [ ! -f stack_trace ]; then
+ echo "Stack tracer not configured"
+ do_reset
+ exit_unsupported;
+fi
+
+echo "Now testing with stack tracer"
+
+echo 1 > /proc/sys/kernel/stack_tracer_enabled
+
+disable_tracing
+clear_trace
+enable_tracing
+sleep 1
+
+count=`cat trace | grep '()' | grep -v schedule | wc -l`
+
+if [ $count -ne 0 ]; then
+ fail "Graph filtering not working with stack tracer?"
+fi
+
+# Make sure we did find something
+count=`cat trace | grep 'schedule()' | wc -l`
+if [ $count -eq 0 ]; then
+ fail "No schedule traces found?"
+fi
+
+echo 0 > /proc/sys/kernel/stack_tracer_enabled
+clear_trace
+sleep 1
+
+
+count=`cat trace | grep '()' | grep -v schedule | wc -l`
+
+if [ $count -ne 0 ]; then
+ fail "Graph filtering not working after stack tracer disabled?"
+fi
+
+count=`cat trace | grep 'schedule()' | wc -l`
+if [ $count -eq 0 ]; then
+ fail "No schedule traces found?"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
new file mode 100644
index 000000000..9f8d27ca3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
@@ -0,0 +1,53 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph filters
+
+# Make sure that function graph filtering works
+
+if ! grep -q function_graph available_tracers; then
+ echo "no function graph tracer configured"
+ exit_unsupported
+fi
+
+do_reset() {
+ reset_tracer
+ enable_tracing
+ clear_trace
+}
+
+fail() { # msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+disable_tracing
+clear_trace
+
+# filter something, schedule is always good
+if ! echo "schedule" > set_ftrace_filter; then
+ # test for powerpc 64
+ if ! echo ".schedule" > set_ftrace_filter; then
+ fail "can not enable schedule filter"
+ fi
+fi
+
+echo function_graph > current_tracer
+enable_tracing
+sleep 1
+# search for functions (has "()" on the line), and make sure
+# that only the schedule function was found
+count=`cat trace | grep '()' | grep -v schedule | wc -l`
+if [ $count -ne 0 ]; then
+ fail "Graph filtering not working by itself?"
+fi
+
+# Make sure we did find something
+count=`cat trace | grep 'schedule()' | wc -l`
+if [ $count -eq 0 ]; then
+ fail "No schedule traces found?"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
new file mode 100644
index 000000000..f4e92afab
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function glob filters
+
+# Make sure that function glob matching filter works.
+
+if ! grep -q function available_tracers; then
+ echo "no function tracer configured"
+ exit_unsupported
+fi
+
+disable_tracing
+clear_trace
+
+ftrace_filter_check() { # glob grep
+ echo "$1" > set_ftrace_filter
+ cut -f1 -d" " set_ftrace_filter > $TMPDIR/actual
+ cut -f1 -d" " available_filter_functions | grep "$2" > $TMPDIR/expected
+ DIFF=`diff $TMPDIR/actual $TMPDIR/expected`
+ test -z "$DIFF"
+}
+
+# filter by *, front match
+ftrace_filter_check '*schedule' '^.*schedule$'
+
+# filter by *, middle match
+ftrace_filter_check '*schedule*' '^.*schedule.*$'
+
+# filter by *, end match
+ftrace_filter_check 'schedule*' '^schedule.*$'
+
+# filter by *mid*end
+ftrace_filter_check '*pin*lock' '.*pin.*lock$'
+
+# filter by start*mid*
+ftrace_filter_check 'mutex*try*' '^mutex.*try.*'
+
+# Advanced full-glob matching feature is recently supported.
+# Skip the tests if we are sure the kernel does not support it.
+if grep -q 'accepts: .* glob-matching-pattern' README ; then
+
+# filter by *, both side match
+ftrace_filter_check 'sch*ule' '^sch.*ule$'
+
+# filter by char class.
+ftrace_filter_check '[Ss]y[Ss]_*' '^[Ss]y[Ss]_.*$'
+
+# filter by ?, schedule is always good
+if ! echo "sch?dule" > set_ftrace_filter; then
+ # test for powerpc 64
+ if ! echo ".sch?dule" > set_ftrace_filter; then
+ fail "can not enable schedule filter"
+ fi
+ cat set_ftrace_filter | grep '^.schedule$'
+else
+ cat set_ftrace_filter | grep '^schedule$'
+fi
+
+fi
+
+echo > set_ftrace_filter
+enable_tracing
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
new file mode 100644
index 000000000..524ce24b3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -0,0 +1,118 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function pid filters
+
+# Make sure that function pid matching filter works.
+# Also test it on an instance directory
+
+if ! grep -q function available_tracers; then
+ echo "no function tracer configured"
+ exit_unsupported
+fi
+
+if [ ! -f set_ftrace_pid ]; then
+ echo "set_ftrace_pid not found? Is function tracer not set?"
+ exit_unsupported
+fi
+
+if [ ! -f set_ftrace_filter ]; then
+ echo "set_ftrace_filter not found? Is function tracer not set?"
+ exit_unsupported
+fi
+
+do_function_fork=1
+
+if [ ! -f options/function-fork ]; then
+ do_function_fork=0
+ echo "no option for function-fork found. Option will not be tested."
+fi
+
+read PID _ < /proc/self/stat
+
+if [ $do_function_fork -eq 1 ]; then
+ # default value of function-fork option
+ orig_value=`grep function-fork trace_options`
+fi
+
+do_reset() {
+ reset_tracer
+ clear_trace
+ enable_tracing
+ echo > set_ftrace_filter
+ echo > set_ftrace_pid
+
+ if [ $do_function_fork -eq 0 ]; then
+ return
+ fi
+
+ echo $orig_value > trace_options
+}
+
+fail() { # msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+yield() {
+ ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1
+}
+
+do_test() {
+ disable_tracing
+
+ echo do_execve* > set_ftrace_filter
+ echo *do_fork >> set_ftrace_filter
+
+ echo $PID > set_ftrace_pid
+ echo function > current_tracer
+
+ if [ $do_function_fork -eq 1 ]; then
+ # don't allow children to be traced
+ echo nofunction-fork > trace_options
+ fi
+
+ enable_tracing
+ yield
+
+ count_pid=`cat trace | grep -v ^# | grep $PID | wc -l`
+ count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l`
+
+ # count_other should be 0
+ if [ $count_pid -eq 0 -o $count_other -ne 0 ]; then
+ fail "PID filtering not working?"
+ fi
+
+ disable_tracing
+ clear_trace
+
+ if [ $do_function_fork -eq 0 ]; then
+ return
+ fi
+
+ # allow children to be traced
+ echo function-fork > trace_options
+
+ enable_tracing
+ yield
+
+ count_pid=`cat trace | grep -v ^# | grep $PID | wc -l`
+ count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l`
+
+ # count_other should NOT be 0
+ if [ $count_pid -eq 0 -o $count_other -eq 0 ]; then
+ fail "PID filtering not following fork?"
+ fi
+}
+
+do_test
+
+mkdir instances/foo
+cd instances/foo
+do_test
+cd ../../
+rmdir instances/foo
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
new file mode 100644
index 000000000..6fed4cf2d
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
@@ -0,0 +1,123 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - test for function event triggers
+# flags: instance
+#
+# Ftrace allows to add triggers to functions, such as enabling or disabling
+# tracing, enabling or disabling trace events, or recording a stack trace
+# within the ring buffer.
+#
+# This test is designed to test event triggers
+#
+
+# The triggers are set within the set_ftrace_filter file
+if [ ! -f set_ftrace_filter ]; then
+ echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+ exit_unsupported
+fi
+
+do_reset() {
+ reset_ftrace_filter
+ reset_tracer
+ disable_events
+ clear_trace
+ enable_tracing
+}
+
+fail() { # mesg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+SLEEP_TIME=".1"
+
+do_reset
+
+echo "Testing function probes with events:"
+
+EVENT="sched:sched_switch"
+EVENT_ENABLE="events/sched/sched_switch/enable"
+
+cnt_trace() {
+ grep -v '^#' trace | wc -l
+}
+
+test_event_enabled() {
+ val=$1
+
+ e=`cat $EVENT_ENABLE`
+ if [ "$e" != $val ]; then
+ fail "Expected $val but found $e"
+ fi
+}
+
+run_enable_disable() {
+ enable=$1 # enable
+ Enable=$2 # Enable
+ check_disable=$3 # 0
+ check_enable_star=$4 # 1*
+ check_disable_star=$5 # 0*
+
+ cnt=`cnt_trace`
+ if [ $cnt -ne 0 ]; then
+ fail "Found junk in trace file"
+ fi
+
+ echo "$Enable event all the time"
+
+ echo $check_disable > $EVENT_ENABLE
+ sleep $SLEEP_TIME
+
+ test_event_enabled $check_disable
+
+ echo "schedule:${enable}_event:$EVENT" > set_ftrace_filter
+ if [ -d ../../instances ]; then # Check instances
+ cur=`cat set_ftrace_filter`
+ top=`cat ../../set_ftrace_filter`
+ if [ "$cur" = "$top" ]; then
+ echo "This kernel is too old to support per instance filter"
+ reset_ftrace_filter
+ exit_unsupported
+ fi
+ fi
+
+ echo " make sure it works 5 times"
+
+ for i in `seq 5`; do
+ sleep $SLEEP_TIME
+ echo " test $i"
+ test_event_enabled $check_enable_star
+
+ echo $check_disable > $EVENT_ENABLE
+ done
+ sleep $SLEEP_TIME
+ echo " make sure it's still works"
+ test_event_enabled $check_enable_star
+
+ reset_ftrace_filter
+
+ echo " make sure it only works 3 times"
+
+ echo $check_disable > $EVENT_ENABLE
+ sleep $SLEEP_TIME
+
+ echo "schedule:${enable}_event:$EVENT:3" > set_ftrace_filter
+
+ for i in `seq 3`; do
+ sleep $SLEEP_TIME
+ echo " test $i"
+ test_event_enabled $check_enable_star
+
+ echo $check_disable > $EVENT_ENABLE
+ done
+
+ sleep $SLEEP_TIME
+ echo " make sure it stop working"
+ test_event_enabled $check_disable_star
+
+ do_reset
+}
+
+run_enable_disable enable Enable 0 "1*" "0*"
+run_enable_disable disable Disable 1 "0*" "1*"
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
new file mode 100644
index 000000000..b2d5a8feb
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
@@ -0,0 +1,81 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function profiler with function tracing
+
+# There was a bug after a rewrite of the ftrace infrastructure that
+# caused the function_profiler not to be able to run with the function
+# tracer, because the function_profiler used the function_graph tracer
+# and it was assumed the two could not run simultaneously.
+#
+# There was another related bug where the solution to the first bug
+# broke the way filtering of the function tracer worked.
+#
+# This test triggers those bugs on those kernels.
+#
+# We need function_graph and profiling to to run this test
+if ! grep -q function_graph available_tracers; then
+ echo "no function graph tracer configured"
+ exit_unsupported;
+fi
+
+if [ ! -f set_ftrace_filter ]; then
+ echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+ exit_unsupported
+fi
+
+if [ ! -f function_profile_enabled ]; then
+ echo "function_profile_enabled not found, function profiling enabled?"
+ exit_unsupported
+fi
+
+fail() { # mesg
+ reset_tracer
+ echo > set_ftrace_filter
+ echo $1
+ exit_fail
+}
+
+echo "Testing function tracer with profiler:"
+echo "enable function tracer"
+echo function > current_tracer
+echo "enable profiler"
+echo 1 > function_profile_enabled
+
+sleep 1
+
+echo "Now filter on just schedule"
+echo '*schedule' > set_ftrace_filter
+clear_trace
+
+echo "Now disable function profiler"
+echo 0 > function_profile_enabled
+
+sleep 1
+
+# make sure only schedule functions exist
+
+echo "testing if only schedule is being traced"
+if grep -v -e '^#' -e 'schedule' trace; then
+ fail "more than schedule was found"
+fi
+
+echo "Make sure schedule was traced"
+if ! grep -e 'schedule' trace > /dev/null; then
+ cat trace
+ fail "can not find schedule in trace"
+fi
+
+echo > set_ftrace_filter
+clear_trace
+
+sleep 1
+
+echo "make sure something other than scheduler is being traced"
+if ! grep -v -e '^#' -e 'schedule' trace > /dev/null; then
+ cat trace
+ fail "no other functions besides schedule was found"
+fi
+
+reset_tracer
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
new file mode 100644
index 000000000..68e7a48f5
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -0,0 +1,170 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - test reading of set_ftrace_filter
+#
+# The set_ftrace_filter file of ftrace is used to list functions as well as
+# triggers (probes) attached to functions. The code to read this file is not
+# straight forward and has had various bugs in the past. This test is designed
+# to add functions and triggers to that file in various ways and read that
+# file in various ways (cat vs dd).
+#
+
+# The triggers are set within the set_ftrace_filter file
+if [ ! -f set_ftrace_filter ]; then
+ echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+ exit_unsupported
+fi
+
+do_reset() {
+ reset_tracer
+ reset_ftrace_filter
+ disable_events
+ clear_trace
+ enable_tracing
+}
+
+fail() { # mesg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+do_reset
+
+FILTER=set_ftrace_filter
+FUNC1="schedule"
+FUNC2="do_softirq"
+
+ALL_FUNCS="#### all functions enabled ####"
+
+test_func() {
+ if ! echo "$1" | grep -q "^$2\$"; then
+ return 0
+ fi
+ echo "$1" | grep -v "^$2\$"
+ return 1
+}
+
+check_set_ftrace_filter() {
+ cat=`cat $FILTER`
+ dd1=`dd if=$FILTER bs=1 | grep -v -e 'records in' -e 'records out' -e 'bytes copied'`
+ dd100=`dd if=$FILTER bs=100 | grep -v -e 'records in' -e 'records out' -e 'bytes copied'`
+
+ echo "Testing '$@'"
+
+ while [ $# -gt 0 ]; do
+ echo "test $1"
+ if cat=`test_func "$cat" "$1"`; then
+ return 0
+ fi
+ if dd1=`test_func "$dd1" "$1"`; then
+ return 0
+ fi
+ if dd100=`test_func "$dd100" "$1"`; then
+ return 0
+ fi
+ shift
+ done
+
+ if [ -n "$cat" ]; then
+ return 0
+ fi
+ if [ -n "$dd1" ]; then
+ return 0
+ fi
+ if [ -n "$dd100" ]; then
+ return 0
+ fi
+ return 1;
+}
+
+if check_set_ftrace_filter "$ALL_FUNCS"; then
+ fail "Expected only $ALL_FUNCS"
+fi
+
+echo "$FUNC1:traceoff" > set_ftrace_filter
+if check_set_ftrace_filter "$ALL_FUNCS" "$FUNC1:traceoff:unlimited"; then
+ fail "Expected $ALL_FUNCS and $FUNC1:traceoff:unlimited"
+fi
+
+echo "$FUNC1" > set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC1:traceoff:unlimited"; then
+ fail "Expected $FUNC1 and $FUNC1:traceoff:unlimited"
+fi
+
+echo "$FUNC2" >> set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC2" "$FUNC1:traceoff:unlimited"; then
+ fail "Expected $FUNC1 $FUNC2 and $FUNC1:traceoff:unlimited"
+fi
+
+echo "$FUNC2:traceoff" >> set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC2" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then
+ fail "Expected $FUNC1 $FUNC2 $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited"
+fi
+
+echo "$FUNC1" > set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then
+ fail "Expected $FUNC1 $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited"
+fi
+
+echo > set_ftrace_filter
+if check_set_ftrace_filter "$ALL_FUNCS" "$FUNC1:traceoff:unlimited" "$FUNC2:traceoff:unlimited"; then
+ fail "Expected $ALL_FUNCS $FUNC1:traceoff:unlimited and $FUNC2:traceoff:unlimited"
+fi
+
+reset_ftrace_filter
+
+if check_set_ftrace_filter "$ALL_FUNCS"; then
+ fail "Expected $ALL_FUNCS"
+fi
+
+echo "$FUNC1" > set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" ; then
+ fail "Expected $FUNC1"
+fi
+
+echo "$FUNC2" >> set_ftrace_filter
+if check_set_ftrace_filter "$FUNC1" "$FUNC2" ; then
+ fail "Expected $FUNC1 and $FUNC2"
+fi
+
+test_actual() { # Compares $TMPDIR/expected with set_ftrace_filter
+ cat set_ftrace_filter | grep -v '#' | cut -d' ' -f1 | cut -d':' -f1 | sort -u > $TMPDIR/actual
+ DIFF=`diff $TMPDIR/actual $TMPDIR/expected`
+ test -z "$DIFF"
+}
+
+# Set traceoff trigger for all fuctions with "lock" in their name
+cat available_filter_functions | cut -d' ' -f1 | grep 'lock' | sort -u > $TMPDIR/expected
+echo '*lock*:traceoff' > set_ftrace_filter
+test_actual
+
+# now remove all with 'try' in it, and end with lock
+grep -v 'try.*lock$' $TMPDIR/expected > $TMPDIR/expected2
+mv $TMPDIR/expected2 $TMPDIR/expected
+echo '!*try*lock:traceoff' >> set_ftrace_filter
+test_actual
+
+# remove all that start with "m" and end with "lock"
+grep -v '^m.*lock$' $TMPDIR/expected > $TMPDIR/expected2
+mv $TMPDIR/expected2 $TMPDIR/expected
+echo '!m*lock:traceoff' >> set_ftrace_filter
+test_actual
+
+# remove all that start with "c" and have "unlock"
+grep -v '^c.*unlock' $TMPDIR/expected > $TMPDIR/expected2
+mv $TMPDIR/expected2 $TMPDIR/expected
+echo '!c*unlock*:traceoff' >> set_ftrace_filter
+test_actual
+
+# clear all the rest
+> $TMPDIR/expected
+echo '!*:traceoff' >> set_ftrace_filter
+test_actual
+
+rm $TMPDIR/expected
+rm $TMPDIR/actual
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
new file mode 100644
index 000000000..f6d9ac732
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
@@ -0,0 +1,186 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - test for function traceon/off triggers
+# flags: instance
+#
+# Ftrace allows to add triggers to functions, such as enabling or disabling
+# tracing, enabling or disabling trace events, or recording a stack trace
+# within the ring buffer.
+#
+# This test is designed to test enabling and disabling tracing triggers
+#
+
+# The triggers are set within the set_ftrace_filter file
+if [ ! -f set_ftrace_filter ]; then
+ echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
+ exit_unsupported
+fi
+
+do_reset() {
+ reset_ftrace_filter
+ reset_tracer
+ disable_events
+ clear_trace
+ enable_tracing
+}
+
+fail() { # mesg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+SLEEP_TIME=".1"
+
+do_reset
+
+echo "Testing function probes with enabling disabling tracing:"
+
+cnt_trace() {
+ grep -v '^#' trace | wc -l
+}
+
+echo '** DISABLE TRACING'
+disable_tracing
+clear_trace
+
+cnt=`cnt_trace`
+if [ $cnt -ne 0 ]; then
+ fail "Found junk in trace"
+fi
+
+
+echo '** ENABLE EVENTS'
+
+echo 1 > events/enable
+
+echo '** ENABLE TRACING'
+enable_tracing
+
+cnt=`cnt_trace`
+if [ $cnt -eq 0 ]; then
+ fail "Nothing found in trace"
+fi
+
+# powerpc uses .schedule
+func="schedule"
+available_file=available_filter_functions
+if [ -d ../../instances -a -f ../../available_filter_functions ]; then
+ available_file=../../available_filter_functions
+fi
+x=`grep '^\.schedule$' available_filter_functions | wc -l`
+if [ "$x" -eq 1 ]; then
+ func=".schedule"
+fi
+
+echo '** SET TRACEOFF'
+
+echo "$func:traceoff" > set_ftrace_filter
+if [ -d ../../instances ]; then # Check instances
+ cur=`cat set_ftrace_filter`
+ top=`cat ../../set_ftrace_filter`
+ if [ "$cur" = "$top" ]; then
+ echo "This kernel is too old to support per instance filter"
+ reset_ftrace_filter
+ exit_unsupported
+ fi
+fi
+
+cnt=`grep schedule set_ftrace_filter | wc -l`
+if [ $cnt -ne 1 ]; then
+ fail "Did not find traceoff trigger"
+fi
+
+cnt=`cnt_trace`
+sleep $SLEEP_TIME
+cnt2=`cnt_trace`
+
+if [ $cnt -ne $cnt2 ]; then
+ fail "Tracing is not stopped"
+fi
+
+on=`cat tracing_on`
+if [ $on != "0" ]; then
+ fail "Tracing is not off"
+fi
+
+csum1=`md5sum trace`
+sleep $SLEEP_TIME
+csum2=`md5sum trace`
+
+if [ "$csum1" != "$csum2" ]; then
+ fail "Tracing file is still changing"
+fi
+
+clear_trace
+
+cnt=`cnt_trace`
+if [ $cnt -ne 0 ]; then
+ fail "Tracing is still happeing"
+fi
+
+echo "!$func:traceoff" >> set_ftrace_filter
+
+cnt=`grep schedule set_ftrace_filter | wc -l`
+if [ $cnt -ne 0 ]; then
+ fail "traceoff trigger still exists"
+fi
+
+on=`cat tracing_on`
+if [ $on != "0" ]; then
+ fail "Tracing is started again"
+fi
+
+echo "$func:traceon" > set_ftrace_filter
+
+cnt=`grep schedule set_ftrace_filter | wc -l`
+if [ $cnt -ne 1 ]; then
+ fail "traceon trigger not found"
+fi
+
+cnt=`cnt_trace`
+if [ $cnt -eq 0 ]; then
+ fail "Tracing did not start"
+fi
+
+on=`cat tracing_on`
+if [ $on != "1" ]; then
+ fail "Tracing was not enabled"
+fi
+
+
+echo "!$func:traceon" >> set_ftrace_filter
+
+cnt=`grep schedule set_ftrace_filter | wc -l`
+if [ $cnt -ne 0 ]; then
+ fail "traceon trigger still exists"
+fi
+
+check_sleep() {
+ val=$1
+ sleep $SLEEP_TIME
+ cat set_ftrace_filter
+ on=`cat tracing_on`
+ if [ $on != "$val" ]; then
+ fail "Expected tracing_on to be $val, but it was $on"
+ fi
+}
+
+
+echo "$func:traceoff:3" > set_ftrace_filter
+check_sleep "0"
+echo 1 > tracing_on
+check_sleep "0"
+echo 1 > tracing_on
+check_sleep "0"
+echo 1 > tracing_on
+check_sleep "1"
+echo "!$func:traceoff:0" > set_ftrace_filter
+
+if grep -e traceon -e traceoff set_ftrace_filter; then
+ fail "Tracing on and off triggers still exist"
+fi
+
+disable_events
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
new file mode 100644
index 000000000..e4645d5e3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -0,0 +1,100 @@
+
+clear_trace() { # reset trace output
+ echo > trace
+}
+
+disable_tracing() { # stop trace recording
+ echo 0 > tracing_on
+}
+
+enable_tracing() { # start trace recording
+ echo 1 > tracing_on
+}
+
+reset_tracer() { # reset the current tracer
+ echo nop > current_tracer
+}
+
+reset_trigger_file() {
+ # remove action triggers first
+ grep -H ':on[^:]*(' $@ |
+ while read line; do
+ cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+ file=`echo $line | cut -f1 -d:`
+ echo "!$cmd" >> $file
+ done
+ grep -Hv ^# $@ |
+ while read line; do
+ cmd=`echo $line | cut -f2- -d: | cut -f1 -d"["`
+ file=`echo $line | cut -f1 -d:`
+ echo "!$cmd" > $file
+ done
+}
+
+reset_trigger() { # reset all current setting triggers
+ if [ -d events/synthetic ]; then
+ reset_trigger_file events/synthetic/*/trigger
+ fi
+ reset_trigger_file events/*/*/trigger
+}
+
+reset_events_filter() { # reset all current setting filters
+ grep -v ^none events/*/*/filter |
+ while read line; do
+ echo 0 > `echo $line | cut -f1 -d:`
+ done
+}
+
+reset_ftrace_filter() { # reset all triggers in set_ftrace_filter
+ echo > set_ftrace_filter
+ grep -v '^#' set_ftrace_filter | while read t; do
+ tr=`echo $t | cut -d: -f2`
+ if [ "$tr" = "" ]; then
+ continue
+ fi
+ if ! grep -q "$t" set_ftrace_filter; then
+ continue;
+ fi
+ name=`echo $t | cut -d: -f1 | cut -d' ' -f1`
+ if [ $tr = "enable_event" -o $tr = "disable_event" ]; then
+ tr=`echo $t | cut -d: -f2-4`
+ limit=`echo $t | cut -d: -f5`
+ else
+ tr=`echo $t | cut -d: -f2`
+ limit=`echo $t | cut -d: -f3`
+ fi
+ if [ "$limit" != "unlimited" ]; then
+ tr="$tr:$limit"
+ fi
+ echo "!$name:$tr" > set_ftrace_filter
+ done
+}
+
+disable_events() {
+ echo 0 > events/enable
+}
+
+clear_synthetic_events() { # reset all current synthetic events
+ grep -v ^# synthetic_events |
+ while read line; do
+ echo "!$line" >> synthetic_events
+ done
+}
+
+initialize_ftrace() { # Reset ftrace to initial-state
+# As the initial state, ftrace will be set to nop tracer,
+# no events, no triggers, no filters, no function filters,
+# no probes, and tracing on.
+ disable_tracing
+ reset_tracer
+ reset_trigger
+ reset_events_filter
+ disable_events
+ echo > set_event_pid # event tracer is always on
+ [ -f set_ftrace_filter ] && echo | tee set_ftrace_*
+ [ -f set_graph_function ] && echo | tee set_graph_*
+ [ -f stack_trace_filter ] && echo > stack_trace_filter
+ [ -f kprobe_events ] && echo > kprobe_events
+ [ -f uprobe_events ] && echo > uprobe_events
+ enable_tracing
+}
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
new file mode 100644
index 000000000..4fa0f7914
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
@@ -0,0 +1,146 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test creation and deletion of trace instances while setting an event
+
+if [ ! -d instances ] ; then
+ echo "no instance directory with this kernel"
+ exit_unsupported;
+fi
+
+fail() { # mesg
+ rmdir foo 2>/dev/null
+ echo $1
+ set -e
+ exit_fail
+}
+
+cd instances
+
+# we don't want to fail on error
+set +e
+
+mkdir x
+rmdir x
+result=$?
+
+if [ $result -ne 0 ]; then
+ echo "instance rmdir not supported"
+ exit_unsupported
+fi
+
+instance_slam() {
+ while :; do
+ mkdir foo 2> /dev/null
+ rmdir foo 2> /dev/null
+ done
+}
+
+instance_read() {
+ while :; do
+ cat foo/trace 1> /dev/null 2>&1
+ done
+}
+
+instance_set() {
+ while :; do
+ echo 1 > foo/events/sched/sched_switch
+ done 2> /dev/null
+}
+
+instance_slam &
+p1=$!
+echo $p1
+
+instance_set &
+p2=$!
+echo $p2
+
+instance_read &
+p3=$!
+echo $p3
+
+sleep 1
+
+kill -1 $p3
+kill -1 $p2
+kill -1 $p1
+
+echo "Wait for processes to finish"
+wait $p1 $p2 $p3
+echo "all processes finished, wait for cleanup"
+sleep 1
+
+mkdir foo
+ls foo > /dev/null
+rmdir foo
+if [ -d foo ]; then
+ fail "foo still exists"
+fi
+
+mkdir foo
+echo "schedule:enable_event:sched:sched_switch" > foo/set_ftrace_filter
+rmdir foo
+if [ -d foo ]; then
+ fail "foo still exists"
+fi
+if grep -q "schedule:enable_event:sched:sched_switch" ../set_ftrace_filter; then
+ echo "Older kernel detected. Cleanup filter"
+ echo '!schedule:enable_event:sched:sched_switch' > ../set_ftrace_filter
+fi
+
+instance_slam() {
+ while :; do
+ mkdir x
+ mkdir y
+ mkdir z
+ rmdir x
+ rmdir y
+ rmdir z
+ done 2>/dev/null
+}
+
+instance_slam &
+p1=$!
+echo $p1
+
+instance_slam &
+p2=$!
+echo $p2
+
+instance_slam &
+p3=$!
+echo $p3
+
+instance_slam &
+p4=$!
+echo $p4
+
+instance_slam &
+p5=$!
+echo $p5
+
+ls -lR >/dev/null
+sleep 1
+
+kill -1 $p1
+kill -1 $p2
+kill -1 $p3
+kill -1 $p4
+kill -1 $p5
+
+echo "Wait for processes to finish"
+wait $p1 $p2 $p3 $p4 $p5
+echo "all processes finished, wait for cleanup"
+
+mkdir x y z
+ls x y z
+rmdir x y z
+for d in x y z; do
+ if [ -d $d ]; then
+ fail "instance $d still exists"
+ fi
+done
+
+set -e
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance.tc b/tools/testing/selftests/ftrace/test.d/instances/instance.tc
new file mode 100644
index 000000000..b84651283
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance.tc
@@ -0,0 +1,86 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test creation and deletion of trace instances
+
+if [ ! -d instances ] ; then
+ echo "no instance directory with this kernel"
+ exit_unsupported;
+fi
+
+fail() { # mesg
+ rmdir x y z 2>/dev/null
+ echo $1
+ set -e
+ exit_fail
+}
+
+cd instances
+
+# we don't want to fail on error
+set +e
+
+mkdir x
+rmdir x
+result=$?
+
+if [ $result -ne 0 ]; then
+ echo "instance rmdir not supported"
+ exit_unsupported
+fi
+
+instance_slam() {
+ while :; do
+ mkdir x
+ mkdir y
+ mkdir z
+ rmdir x
+ rmdir y
+ rmdir z
+ done 2>/dev/null
+}
+
+instance_slam &
+p1=$!
+echo $p1
+
+instance_slam &
+p2=$!
+echo $p2
+
+instance_slam &
+p3=$!
+echo $p3
+
+instance_slam &
+p4=$!
+echo $p4
+
+instance_slam &
+p5=$!
+echo $p5
+
+ls -lR >/dev/null
+sleep 1
+
+kill -1 $p1
+kill -1 $p2
+kill -1 $p3
+kill -1 $p4
+kill -1 $p5
+
+echo "Wait for processes to finish"
+wait $p1 $p2 $p3 $p4 $p5
+echo "all processes finished, wait for cleanup"
+
+mkdir x y z
+ls x y z
+rmdir x y z
+for d in x y z; do
+ if [ -d $d ]; then
+ fail "instance $d still exists"
+ fi
+done
+
+set -e
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
new file mode 100644
index 000000000..4604d2103
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event - adding and removing
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+echo p:myevent _do_fork > kprobe_events
+grep myevent kprobe_events
+test -d events/kprobes/myevent
+echo > kprobe_events
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
new file mode 100644
index 000000000..bbc443a91
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event - busy event check
+
+[ -f kprobe_events ] || exit_unsupported
+
+echo 0 > events/enable
+echo > kprobe_events
+echo p:myevent _do_fork > kprobe_events
+test -d events/kprobes/myevent
+echo 1 > events/kprobes/myevent/enable
+echo > kprobe_events && exit_fail # this must fail
+echo 0 > events/kprobes/myevent/enable
+echo > kprobe_events # this must succeed
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
new file mode 100644
index 000000000..8b43c6804
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event with arguments
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events
+grep testprobe kprobe_events
+test -d events/kprobes/testprobe
+echo 1 > events/kprobes/testprobe/enable
+( echo "forked")
+echo 0 > events/kprobes/testprobe/enable
+echo "-:testprobe" >> kprobe_events
+clear_trace
+test -d events/kprobes/testprobe && exit_fail || exit_pass
+
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
new file mode 100644
index 000000000..1ad70cdaf
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -0,0 +1,48 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event string type argument
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+
+case `uname -m` in
+x86_64)
+ ARG1=%di
+;;
+i[3456]86)
+ ARG1=%ax
+;;
+aarch64)
+ ARG1=%x0
+;;
+arm*)
+ ARG1=%r0
+;;
+ppc64*)
+ ARG1=%r3
+;;
+ppc*)
+ ARG1=%r3
+;;
+*)
+ echo "Please implement other architecture here"
+ exit_untested
+esac
+
+: "Test get argument (1)"
+echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+echo "p:test _do_fork" >> kprobe_events
+grep -qe "testprobe.* arg1=\"test\"" trace
+
+echo 0 > events/kprobes/testprobe/enable
+: "Test get argument (2)"
+echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+echo "p:test _do_fork" >> kprobe_events
+grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
+
+echo 0 > events/enable
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
new file mode 100644
index 000000000..92ffb3bd3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -0,0 +1,107 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event argument syntax
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue
+
+echo 0 > events/enable
+echo > kprobe_events
+
+PROBEFUNC="vfs_read"
+GOODREG=
+BADREG=
+GOODSYM="_sdata"
+if ! grep -qw ${GOODSYM} /proc/kallsyms ; then
+ GOODSYM=$PROBEFUNC
+fi
+BADSYM="deaqswdefr"
+SYMADDR=0x`grep -w ${GOODSYM} /proc/kallsyms | cut -f 1 -d " "`
+GOODTYPE="x16"
+BADTYPE="y16"
+
+case `uname -m` in
+x86_64|i[3456]86)
+ GOODREG=%ax
+ BADREG=%ex
+;;
+aarch64)
+ GOODREG=%x0
+ BADREG=%ax
+;;
+arm*)
+ GOODREG=%r0
+ BADREG=%ax
+;;
+ppc*)
+ GOODREG=%r3
+ BADREG=%msr
+;;
+*)
+ echo "Please implement other architecture here"
+ exit_untested
+esac
+
+test_goodarg() # Good-args
+{
+ while [ "$1" ]; do
+ echo "p ${PROBEFUNC} $1" > kprobe_events
+ shift 1
+ done;
+}
+
+test_badarg() # Bad-args
+{
+ while [ "$1" ]; do
+ ! echo "p ${PROBEFUNC} $1" > kprobe_events
+ shift 1
+ done;
+}
+
+echo > kprobe_events
+
+: "Register access"
+test_goodarg ${GOODREG}
+test_badarg ${BADREG}
+
+: "Symbol access"
+test_goodarg "@${GOODSYM}" "@${SYMADDR}" "@${GOODSYM}+10" "@${GOODSYM}-10"
+test_badarg "@" "@${BADSYM}" "@${GOODSYM}*10" "@${GOODSYM}/10" \
+ "@${GOODSYM}%10" "@${GOODSYM}&10" "@${GOODSYM}|10"
+
+: "Stack access"
+test_goodarg "\$stack" "\$stack0" "\$stack1"
+test_badarg "\$stackp" "\$stack0+10" "\$stack1-10"
+
+: "Retval access"
+echo "r ${PROBEFUNC} \$retval" > kprobe_events
+! echo "p ${PROBEFUNC} \$retval" > kprobe_events
+
+# $comm was introduced in 4.8, older kernels reject it.
+if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then
+: "Comm access"
+test_goodarg "\$comm"
+fi
+
+: "Indirect memory access"
+test_goodarg "+0(${GOODREG})" "-0(${GOODREG})" "+10(\$stack)" \
+ "+0(\$stack1)" "+10(@${GOODSYM}-10)" "+0(+10(+20(\$stack)))"
+test_badarg "+(${GOODREG})" "(${GOODREG}+10)" "-(${GOODREG})" "(${GOODREG})" \
+ "+10(\$comm)" "+0(${GOODREG})+10"
+
+: "Name assignment"
+test_goodarg "varname=${GOODREG}"
+test_badarg "varname=varname2=${GOODREG}"
+
+: "Type syntax"
+test_goodarg "${GOODREG}:${GOODTYPE}"
+test_badarg "${GOODREG}::${GOODTYPE}" "${GOODREG}:${BADTYPE}" \
+ "${GOODTYPE}:${GOODREG}"
+
+: "Combination check"
+
+test_goodarg "\$comm:string" "+0(\$stack):string"
+test_badarg "\$comm:x64" "\$stack:string" "${GOODREG}:string"
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
new file mode 100644
index 000000000..2a1755bfc
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -0,0 +1,38 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobes event arguments with types
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+grep "x8/16/32/64" README > /dev/null || exit_unsupported # version issue
+
+echo 0 > events/enable
+echo > kprobe_events
+enable_tracing
+
+echo 'p:testprobe _do_fork $stack0:s32 $stack0:u32 $stack0:x32 $stack0:b8@4/32' > kprobe_events
+grep testprobe kprobe_events
+test -d events/kprobes/testprobe
+
+echo 1 > events/kprobes/testprobe/enable
+( echo "forked")
+echo 0 > events/kprobes/testprobe/enable
+ARGS=`tail -n 1 trace | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'`
+
+check_types() {
+ X1=`printf "%x" $1 | tail -c 8`
+ X2=`printf "%x" $2`
+ X3=`printf "%x" $3`
+ test $X1 = $X2
+ test $X2 = $X3
+ test 0x$X3 = $3
+
+ B4=`printf "%02x" $4`
+ B3=`echo -n $X3 | tail -c 3 | head -c 2`
+ test $B3 = $B4
+}
+check_types $ARGS
+
+echo "-:testprobe" >> kprobe_events
+clear_trace
+test -d events/kprobes/testprobe && exit_fail || exit_pass
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
new file mode 100644
index 000000000..2724a1068
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
@@ -0,0 +1,37 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event auto/manual naming
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+disable_events
+echo > kprobe_events
+
+:;: "Add an event on function without name" ;:
+
+FUNC=`grep " [tT] .*vfs_read$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "`
+[ "x" != "x$FUNC" ] || exit_unresolved
+echo "p $FUNC" > kprobe_events
+PROBE_NAME=`echo $FUNC | tr ".:" "_"`
+test -d events/kprobes/p_${PROBE_NAME}_0 || exit_failure
+
+:;: "Add an event on function with new name" ;:
+
+echo "p:event1 $FUNC" > kprobe_events
+test -d events/kprobes/event1 || exit_failure
+
+:;: "Add an event on function with new name and group" ;:
+
+echo "p:kprobes2/event2 $FUNC" > kprobe_events
+test -d events/kprobes2/event2 || exit_failure
+
+:;: "Add an event on dot function without name" ;:
+
+FUNC=`grep -m 10 " [tT] .*\.isra\..*$" /proc/kallsyms | tail -n 1 | cut -f 3 -d " "`
+[ "x" != "x$FUNC" ] || exit_unresolved
+echo "p $FUNC" > kprobe_events
+EVENT=`grep $FUNC kprobe_events | cut -f 1 -d " " | cut -f 2 -d:`
+[ "x" != "x$EVENT" ] || exit_failure
+test -d events/$EVENT || exit_failure
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
new file mode 100644
index 000000000..cc4cac0e6
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -0,0 +1,56 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event with function tracer
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+grep function available_tracers || exit_unsupported # this is configurable
+
+# prepare
+echo nop > current_tracer
+echo _do_fork > set_ftrace_filter
+echo 0 > events/enable
+echo > kprobe_events
+echo 'p:testprobe _do_fork' > kprobe_events
+
+# kprobe on / ftrace off
+echo 1 > events/kprobes/testprobe/enable
+echo > trace
+( echo "forked")
+grep testprobe trace
+! grep '_do_fork <-' trace
+
+# kprobe on / ftrace on
+echo function > current_tracer
+echo > trace
+( echo "forked")
+grep testprobe trace
+grep '_do_fork <-' trace
+
+# kprobe off / ftrace on
+echo 0 > events/kprobes/testprobe/enable
+echo > trace
+( echo "forked")
+! grep testprobe trace
+grep '_do_fork <-' trace
+
+# kprobe on / ftrace on
+echo 1 > events/kprobes/testprobe/enable
+echo function > current_tracer
+echo > trace
+( echo "forked")
+grep testprobe trace
+grep '_do_fork <-' trace
+
+# kprobe on / ftrace off
+echo nop > current_tracer
+echo > trace
+( echo "forked")
+grep testprobe trace
+! grep '_do_fork <-' trace
+
+# cleanup
+echo nop > current_tracer
+echo > set_ftrace_filter
+echo 0 > events/kprobes/testprobe/enable
+echo > kprobe_events
+echo > trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
new file mode 100644
index 000000000..1e9f75f7a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_module.tc
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe dynamic event - probing module
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+disable_events
+echo > kprobe_events
+
+:;: "Add an event on a module function without specifying event name" ;:
+
+MOD=`lsmod | head -n 2 | tail -n 1 | cut -f1 -d" "`
+FUNC=`grep -m 1 ".* t .*\\[$MOD\\]" /proc/kallsyms | xargs | cut -f3 -d" "`
+[ "x" != "x$MOD" -a "y" != "y$FUNC" ] || exit_unresolved
+echo "p $MOD:$FUNC" > kprobe_events
+PROBE_NAME=`echo $MOD:$FUNC | tr ".:" "_"`
+test -d events/kprobes/p_${PROBE_NAME}_0 || exit_failure
+
+:;: "Add an event on a module function with new event name" ;:
+
+echo "p:event1 $MOD:$FUNC" > kprobe_events
+test -d events/kprobes/event1 || exit_failure
+
+:;: "Add an event on a module function with new event and group name" ;:
+
+echo "p:kprobes1/event1 $MOD:$FUNC" > kprobe_events
+test -d events/kprobes1/event1 || exit_failure
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
new file mode 100644
index 000000000..321954683
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -0,0 +1,17 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kretprobe dynamic event with arguments
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+echo 0 > events/enable
+echo > kprobe_events
+echo 'r:testprobe2 _do_fork $retval' > kprobe_events
+grep testprobe2 kprobe_events
+test -d events/kprobes/testprobe2
+echo 1 > events/kprobes/testprobe2/enable
+( echo "forked")
+echo 0 > events/kprobes/testprobe2/enable
+echo '-:testprobe2' >> kprobe_events
+clear_trace
+test -d events/kprobes/testprobe2 && exit_fail || exit_pass
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
new file mode 100644
index 000000000..7c0290684
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_maxactive.tc
@@ -0,0 +1,41 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kretprobe dynamic event with maxactive
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+grep -q 'r\[maxactive\]' README || exit_unsupported # this is older version
+
+echo > kprobe_events
+
+# Test if we successfully reject unknown messages
+if echo 'a:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi
+
+# Test if we successfully reject too big maxactive
+if echo 'r1000000:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi
+
+# Test if we successfully reject unparsable numbers for maxactive
+if echo 'r10fuzz:myprobeaccept inet_csk_accept' > kprobe_events; then false; else true; fi
+
+# Test for kretprobe with event name without maxactive
+echo 'r:myprobeaccept inet_csk_accept' > kprobe_events
+grep myprobeaccept kprobe_events
+test -d events/kprobes/myprobeaccept
+echo '-:myprobeaccept' >> kprobe_events
+
+# Test for kretprobe with event name with a small maxactive
+echo 'r10:myprobeaccept inet_csk_accept' > kprobe_events
+grep myprobeaccept kprobe_events
+test -d events/kprobes/myprobeaccept
+echo '-:myprobeaccept' >> kprobe_events
+
+# Test for kretprobe without event name without maxactive
+echo 'r inet_csk_accept' > kprobe_events
+grep inet_csk_accept kprobe_events
+echo > kprobe_events
+
+# Test for kretprobe without event name with a small maxactive
+echo 'r10 inet_csk_accept' > kprobe_events
+grep inet_csk_accept kprobe_events
+echo > kprobe_events
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
new file mode 100644
index 000000000..da298f191
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Register/unregister many kprobe events
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+# ftrace fentry skip size depends on the machine architecture.
+# Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
+case `uname -m` in
+ x86_64|i[3456]86) OFFS=5;;
+ ppc64le) OFFS=8;;
+ *) OFFS=0;;
+esac
+
+if [ -d events/kprobes ]; then
+ echo 0 > events/kprobes/enable
+ echo > kprobe_events
+fi
+
+N=0
+echo "Setup up kprobes on first available 256 text symbols"
+grep -i " t " /proc/kallsyms | cut -f3 -d" " | grep -v .*\\..* | \
+while read i; do
+ echo p ${i}+${OFFS} >> kprobe_events && N=$((N+1)) ||:
+ test $N -eq 256 && break
+done
+
+L=`cat kprobe_events | wc -l`
+if [ $L -ne 256 ]; then
+ echo "The number of kprobes events ($L) is not 256"
+ exit_fail
+fi
+
+echo 1 > events/kprobes/enable
+echo 0 > events/kprobes/enable
+echo > kprobe_events
+echo "Waiting for unoptimizing & freeing"
+sleep 5
+echo "Done"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
new file mode 100644
index 000000000..519d2763f
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
@@ -0,0 +1,43 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe events - probe points
+
+[ -f kprobe_events ] || exit_unsupported # this is configurable
+
+TARGET_FUNC=tracefs_create_dir
+
+dec_addr() { # hexaddr
+ printf "%d" "0x"`echo $1 | tail -c 8`
+}
+
+set_offs() { # prev target next
+ A1=`dec_addr $1`
+ A2=`dec_addr $2`
+ A3=`dec_addr $3`
+ TARGET="0x$2" # an address
+ PREV=`expr $A1 - $A2` # offset to previous symbol
+ NEXT=+`expr $A3 - $A2` # offset to next symbol
+ OVERFLOW=+`printf "0x%x" ${PREV}` # overflow offset to previous symbol
+}
+
+# We have to decode symbol addresses to get correct offsets.
+# If the offset is not an instruction boundary, it cause -EILSEQ.
+set_offs `grep -A1 -B1 ${TARGET_FUNC} /proc/kallsyms | cut -f 1 -d " " | xargs`
+
+UINT_TEST=no
+# printf "%x" -1 returns (unsigned long)-1.
+if [ `printf "%x" -1 | wc -c` != 9 ]; then
+ UINT_TEST=yes
+fi
+
+echo 0 > events/enable
+echo > kprobe_events
+echo "p:testprobe ${TARGET_FUNC}" > kprobe_events
+echo "p:testprobe ${TARGET}" > kprobe_events
+echo "p:testprobe ${TARGET_FUNC}${NEXT}" > kprobe_events
+! echo "p:testprobe ${TARGET_FUNC}${PREV}" > kprobe_events
+if [ "${UINT_TEST}" = yes ]; then
+! echo "p:testprobe ${TARGET_FUNC}${OVERFLOW}" > kprobe_events
+fi
+echo > kprobe_events
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
new file mode 100644
index 000000000..cbd174334
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
@@ -0,0 +1,73 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: test for the preemptirqsoff tracer
+
+MOD=preemptirq_delay_test
+
+fail() {
+ reset_tracer
+ rmmod $MOD || true
+ exit_fail
+}
+
+unsup() { #msg
+ reset_tracer
+ rmmod $MOD || true
+ echo $1
+ exit_unsupported
+}
+
+modprobe $MOD || unsup "$MOD module not available"
+rmmod $MOD
+
+grep -q "preemptoff" available_tracers || unsup "preemptoff tracer not enabled"
+grep -q "irqsoff" available_tracers || unsup "irqsoff tracer not enabled"
+
+reset_tracer
+
+# Simulate preemptoff section for half a second couple of times
+echo preemptoff > current_tracer
+sleep 1
+modprobe $MOD test_mode=preempt delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=preempt delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=preempt delay=500000 || fail
+rmmod $MOD || fail
+
+cat trace
+
+# Confirm which tracer
+grep -q "tracer: preemptoff" trace || fail
+
+# Check the end of the section
+egrep -q "5.....us : <stack trace>" trace || fail
+
+# Check for 500ms of latency
+egrep -q "latency: 5..... us" trace || fail
+
+reset_tracer
+
+# Simulate irqsoff section for half a second couple of times
+echo irqsoff > current_tracer
+sleep 1
+modprobe $MOD test_mode=irq delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=irq delay=500000 || fail
+rmmod $MOD || fail
+modprobe $MOD test_mode=irq delay=500000 || fail
+rmmod $MOD || fail
+
+cat trace
+
+# Confirm which tracer
+grep -q "tracer: irqsoff" trace || fail
+
+# Check the end of the section
+egrep -q "5.....us : <stack trace>" trace || fail
+
+# Check for 500ms of latency
+egrep -q "latency: 5..... us" trace || fail
+
+reset_tracer
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/template b/tools/testing/selftests/ftrace/test.d/template
new file mode 100644
index 000000000..5c39ceb18
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/template
@@ -0,0 +1,10 @@
+#!/bin/sh
+# description: %HERE DESCRIBE WHAT THIS DOES%
+# you have to add ".tc" extention for your testcase file
+# Note that all tests are run with "errexit" option.
+
+exit 0 # Return 0 if the test is passed, otherwise return !0
+# Or you can call exit_pass for passed test, and exit_fail for failed test.
+# If the test could not run because of lack of feature, call exit_unsupported
+# If the test returned unclear results, call exit_unresolved
+# If the test is a dummy, or a placeholder, call exit_untested
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
new file mode 100644
index 000000000..2aabab363
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-extended-error-support.tc
@@ -0,0 +1,39 @@
+#!/bin/sh
+# description: event trigger - test extended error support
+
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test extended error support"
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+! echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger 2> /dev/null
+if ! grep -q "ERROR:" events/sched/sched_wakeup/hist; then
+ fail "Failed to generate extended error in histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
new file mode 100644
index 000000000..7fd5b4a8f
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test field variable support
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test field variable support"
+
+echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events
+echo 'hist:keys=comm:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=pid,prio,comm:vals=lat:sort=pid,prio' > events/synthetic/wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+ fail "Failed to create inter-event histogram"
+fi
+
+if ! grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+ fail "Failed to create histogram with field variable"
+fi
+
+echo '!hist:keys=next_comm:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).wakeup_latency($wakeup_lat,next_pid,sched.sched_waking.prio,next_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+
+if grep -q "synthetic_prio=prio" events/sched/sched_waking/hist; then
+ fail "Failed to remove histogram with field variable"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
new file mode 100644
index 000000000..c93dbe38b
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -0,0 +1,58 @@
+#!/bin/sh
+# description: event trigger - test inter-event combined histogram trigger
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+clear_synthetic_events
+
+echo "Test create synthetic event"
+
+echo 'waking_latency u64 lat pid_t pid' > synthetic_events
+if [ ! -d events/synthetic/waking_latency ]; then
+ fail "Failed to create waking_latency synthetic event"
+fi
+
+echo "Test combined histogram"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_waking/trigger
+echo 'hist:keys=pid:waking_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).waking_latency($waking_lat,pid) if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' > events/synthetic/waking_latency/trigger
+
+echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
+echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
+
+echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+
+ping localhost -c 3
+if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+ fail "Failed to create combined histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
new file mode 100644
index 000000000..c193dce61
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
@@ -0,0 +1,44 @@
+#!/bin/sh
+# description: event trigger - test multiple actions on hist trigger
+
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test multiple actions on hist trigger"
+echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+TRIGGER1=events/sched/sched_wakeup/trigger
+TRIGGER2=events/sched/sched_switch/trigger
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="cyclictest"' > $TRIGGER1
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0 if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,next_pid) if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid:onmatch(sched.sched_wakeup).wakeup_latency(sched.sched_switch.$wakeup_lat,prev_pid) if next_comm=="cyclictest"' >> $TRIGGER2
+echo 'hist:keys=next_pid if next_comm=="cyclictest"' >> $TRIGGER2
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
new file mode 100644
index 000000000..e84e7d048
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm) if next_comm=="ping"' > events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if ! grep -q "ping" events/synthetic/wakeup_latency/hist; then
+ fail "Failed to create onmatch action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
new file mode 100644
index 000000000..7907d8aac
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -0,0 +1,50 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmatch-onmax action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event"
+echo "Test histogram variables,simple expression support and onmatch-onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' > events/sched/sched_wakeup/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid,next_comm):onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+echo 'hist:keys=comm,pid,lat:wakeup_lat=lat:sort=lat' > events/synthetic/wakeup_latency/trigger
+ping localhost -c 5
+if [ ! grep -q "ping" events/synthetic/wakeup_latency/hist -o ! grep -q "max:" events/sched/sched_switch/hist]; then
+ fail "Failed to create onmatch-onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
new file mode 100644
index 000000000..38b7ed624
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -0,0 +1,48 @@
+#!/bin/sh
+# description: event trigger - test inter-event histogram trigger onmax action
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+echo "Test onmax action"
+
+echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_waking/trigger
+echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0:onmax($wakeup_lat).save(next_comm,prev_pid,prev_prio,prev_comm) if next_comm=="ping"' >> events/sched/sched_switch/trigger
+ping localhost -c 3
+if ! grep -q "max:" events/sched/sched_switch/hist; then
+ fail "Failed to create onmax action inter-event histogram"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
new file mode 100644
index 000000000..c604438df
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-createremove.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# description: event trigger - test synthetic event create remove
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+clear_synthetic_events
+reset_tracer
+do_reset
+
+echo "Test create synthetic event"
+
+echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
+if [ ! -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to create wakeup_latency synthetic event"
+fi
+
+reset_trigger
+
+echo "Test remove synthetic event"
+echo '!wakeup_latency u64 lat pid_t pid char comm[16]' >> synthetic_events
+if [ -d events/synthetic/wakeup_latency ]; then
+ fail "Failed to delete wakeup_latency synthetic event"
+fi
+
+reset_trigger
+
+echo "Test create synthetic event with an error"
+echo 'wakeup_latency u64 lat pid_t pid char' > synthetic_events > /dev/null
+if [ -d events/synthetic/wakeup_latency ]; then
+ fail "Created wakeup_latency synthetic event with an invalid format"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
new file mode 100644
index 000000000..88e6c3f43
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
@@ -0,0 +1,80 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test synthetic_events syntax parser
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic event is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test synthetic_events syntax parser"
+
+echo > synthetic_events
+
+# synthetic event must have a field
+! echo "myevent" >> synthetic_events
+echo "myevent u64 var1" >> synthetic_events
+
+# synthetic event must be found in synthetic_events
+grep "myevent[[:space:]]u64 var1" synthetic_events
+
+# it is not possible to add same name event
+! echo "myevent u64 var2" >> synthetic_events
+
+# Non-append open will cleanup all events and add new one
+echo "myevent u64 var2" > synthetic_events
+
+# multiple fields with different spaces
+echo "myevent u64 var1; u64 var2;" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+echo "myevent u64 var1 ; u64 var2 ;" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+echo "myevent u64 var1 ;u64 var2" > synthetic_events
+grep "myevent[[:space:]]u64 var1; u64 var2" synthetic_events
+
+# test field types
+echo "myevent u32 var" > synthetic_events
+echo "myevent u16 var" > synthetic_events
+echo "myevent u8 var" > synthetic_events
+echo "myevent s64 var" > synthetic_events
+echo "myevent s32 var" > synthetic_events
+echo "myevent s16 var" > synthetic_events
+echo "myevent s8 var" > synthetic_events
+
+echo "myevent char var" > synthetic_events
+echo "myevent int var" > synthetic_events
+echo "myevent long var" > synthetic_events
+echo "myevent pid_t var" > synthetic_events
+
+echo "myevent unsigned char var" > synthetic_events
+echo "myevent unsigned int var" > synthetic_events
+echo "myevent unsigned long var" > synthetic_events
+grep "myevent[[:space:]]unsigned long var" synthetic_events
+
+# test string type
+echo "myevent char var[10]" > synthetic_events
+grep "myevent[[:space:]]char\[10\] var" synthetic_events
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
new file mode 100644
index 000000000..28cc355a3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-eventonoff.tc
@@ -0,0 +1,66 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test event enable/disable trigger
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+FEATURE=`grep enable_event events/sched/sched_process_fork/trigger`
+if [ -z "$FEATURE" ]; then
+ echo "event enable/disable trigger is not supported"
+ exit_unsupported
+fi
+
+echo "Test enable_event trigger"
+echo 0 > events/sched/sched_switch/enable
+echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat events/sched/sched_switch/enable` != '1*' ]; then
+ fail "enable_event trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test disable_event trigger"
+echo 1 > events/sched/sched_switch/enable
+echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat events/sched/sched_switch/enable` != '0*' ]; then
+ fail "disable_event trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test semantic error for event enable/disable trigger"
+! echo 'enable_event:nogroup:noevent' > events/sched/sched_process_fork/trigger
+! echo 'disable_event+1' > events/sched/sched_process_fork/trigger
+echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+! echo 'enable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+! echo 'disable_event:sched:sched_switch' > events/sched/sched_process_fork/trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
new file mode 100644
index 000000000..a48e23eb8
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-filter.tc
@@ -0,0 +1,61 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test trigger filter
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test trigger filter"
+echo 1 > tracing_on
+echo 'traceoff if child_pid == 0' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat tracing_on` -ne 1 ]; then
+ fail "traceoff trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test semantic error for trigger filter"
+! echo 'traceoff if a' > events/sched/sched_process_fork/trigger
+! echo 'traceoff if common_pid=0' > events/sched/sched_process_fork/trigger
+! echo 'traceoff if common_pid==b' > events/sched/sched_process_fork/trigger
+echo 'traceoff if common_pid == 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+! echo 'traceoff if common_pid == child_pid' > events/sched/sched_process_fork/trigger
+echo 'traceoff if common_pid <= 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+echo 'traceoff if common_pid >= 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+echo 'traceoff if parent_pid >= 0 && child_pid >= 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+echo 'traceoff if parent_pid >= 0 || child_pid >= 0' > events/sched/sched_process_fork/trigger
+echo '!traceoff' > events/sched/sched_process_fork/trigger
+
+
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
new file mode 100644
index 000000000..8da80efc4
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
@@ -0,0 +1,76 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test histogram modifiers
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test histogram with execname modifier"
+
+echo 'hist:keys=common_pid.execname' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+COMM=`cat /proc/$$/comm`
+grep "common_pid: $COMM" events/sched/sched_process_fork/hist > /dev/null || \
+ fail "execname modifier on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with hex modifier"
+
+echo 'hist:keys=parent_pid.hex' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+# Note that $$ is the parent pid. $PID is current PID.
+HEX=`printf %x $PID`
+grep "parent_pid: $HEX" events/sched/sched_process_fork/hist > /dev/null || \
+ fail "hex modifier on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with syscall modifier"
+
+echo 'hist:keys=id.syscall' > events/raw_syscalls/sys_exit/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep "id: \(unknown_\|sys_\)" events/raw_syscalls/sys_exit/hist > /dev/null || \
+ fail "syscall modifier on raw_syscalls/sys_exit did not work"
+
+
+reset_trigger
+
+echo "Test histgram with log2 modifier"
+
+echo 'hist:keys=bytes_req.log2' > events/kmem/kmalloc/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep 'bytes_req: ~ 2^[0-9]*' events/kmem/kmalloc/hist > /dev/null || \
+ fail "log2 modifier on kmem/kmalloc did not work"
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
new file mode 100644
index 000000000..449fe9ff9
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
@@ -0,0 +1,84 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test histogram trigger
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test histogram basic tigger"
+
+echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep parent_pid events/sched/sched_process_fork/hist > /dev/null || \
+ fail "hist trigger on sched_process_fork did not work"
+grep child events/sched/sched_process_fork/hist > /dev/null || \
+ fail "hist trigger on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with compound keys"
+
+echo 'hist:keys=parent_pid,child_pid' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep '^{ parent_pid:.*, child_pid:.*}' events/sched/sched_process_fork/hist > /dev/null || \
+ fail "compound keys on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with string key"
+
+echo 'hist:keys=parent_comm' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+COMM=`cat /proc/$$/comm`
+grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \
+ fail "string key on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with sort key"
+
+echo 'hist:keys=parent_pid,child_pid:sort=child_pid.ascending' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+
+check_inc() {
+ while [ $# -gt 1 ]; do
+ [ $1 -gt $2 ] && return 1
+ shift 1
+ done
+ return 0
+}
+check_inc `grep -o "child_pid:[[:space:]]*[[:digit:]]*" \
+ events/sched/sched_process_fork/hist | cut -d: -f2 ` ||
+ fail "sort param on sched_process_fork did not work"
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
new file mode 100644
index 000000000..c5ef8b9d0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test multiple histogram triggers
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+reset_trigger
+
+echo "Test histogram multiple tiggers"
+
+echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
+echo 'hist:keys=parent_comm:vals=child_pid' >> events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep parent_pid events/sched/sched_process_fork/hist > /dev/null || \
+ fail "hist trigger on sched_process_fork did not work"
+grep child events/sched/sched_process_fork/hist > /dev/null || \
+ fail "hist trigger on sched_process_fork did not work"
+COMM=`cat /proc/$$/comm`
+grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \
+ fail "string key on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test histogram with its name"
+
+echo 'hist:name=test_hist:keys=common_pid' > events/sched/sched_process_fork/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep test_hist events/sched/sched_process_fork/hist > /dev/null || \
+ fail "named event on sched_process_fork did not work"
+
+echo "Test same named histogram on different events"
+
+echo 'hist:name=test_hist:keys=common_pid' > events/sched/sched_process_exit/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep test_hist events/sched/sched_process_exit/hist > /dev/null || \
+ fail "named event on sched_process_fork did not work"
+
+diffs=`diff events/sched/sched_process_exit/hist events/sched/sched_process_fork/hist | wc -l`
+test $diffs -eq 0 || fail "Same name histograms are not same"
+
+reset_trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
new file mode 100644
index 000000000..ed38f0050
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-snapshot.tc
@@ -0,0 +1,62 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test snapshot-trigger
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f snapshot ]; then
+ echo "snapshot is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+FEATURE=`grep snapshot events/sched/sched_process_fork/trigger`
+if [ -z "$FEATURE" ]; then
+ echo "snapshot trigger is not supported"
+ exit_unsupported
+fi
+
+echo "Test snapshot tigger"
+echo 0 > snapshot
+echo 1 > events/sched/sched_process_fork/enable
+( echo "forked")
+echo 'snapshot:1' > events/sched/sched_process_fork/trigger
+( echo "forked")
+grep sched_process_fork snapshot > /dev/null || \
+ fail "snapshot trigger on sched_process_fork did not work"
+
+reset_trigger
+echo 0 > snapshot
+echo 0 > events/sched/sched_process_fork/enable
+
+echo "Test snapshot semantic errors"
+
+! echo "snapshot+1" > events/sched/sched_process_fork/trigger
+echo "snapshot" > events/sched/sched_process_fork/trigger
+! echo "snapshot" > events/sched/sched_process_fork/trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
new file mode 100644
index 000000000..3121d795a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-stacktrace.tc
@@ -0,0 +1,54 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test stacktrace-trigger
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+FEATURE=`grep stacktrace events/sched/sched_process_fork/trigger`
+if [ -z "$FEATURE" ]; then
+ echo "stacktrace trigger is not supported"
+ exit_unsupported
+fi
+
+echo "Test stacktrace tigger"
+echo 0 > trace
+echo 0 > options/stacktrace
+echo 'stacktrace' > events/sched/sched_process_fork/trigger
+( echo "forked")
+grep "<stack trace>" trace > /dev/null || \
+ fail "stacktrace trigger on sched_process_fork did not work"
+
+reset_trigger
+
+echo "Test stacktrace semantic errors"
+
+! echo "stacktrace:foo" > events/sched/sched_process_fork/trigger
+echo "stacktrace" > events/sched/sched_process_fork/trigger
+! echo "stacktrace" > events/sched/sched_process_fork/trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
new file mode 100644
index 000000000..2acbfe2c0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-hist.tc
@@ -0,0 +1,49 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram trigger
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+ echo "event trace_marker is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
+do_reset
+
+echo "Test histogram trace_marker tigger"
+
+echo 'hist:keys=common_pid' > events/ftrace/print/trigger
+for i in `seq 1 10` ; do echo "hello" > trace_marker; done
+grep 'hitcount: *10$' events/ftrace/print/hist > /dev/null || \
+ fail "hist trigger did not trigger correct times on trace_marker"
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
new file mode 100644
index 000000000..6748e8cb4
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-snapshot.tc
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test snapshot trigger
+# flags: instance
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ echo 0 > snapshot
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f snapshot ]; then
+ echo "snapshot is not supported"
+ exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+ echo "event trace_marker is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+test_trace() {
+ file=$1
+ x=$2
+
+ cat $file | while read line; do
+ comment=`echo $line | sed -e 's/^#//'`
+ if [ "$line" != "$comment" ]; then
+ continue
+ fi
+ echo "testing $line for >$x<"
+ match=`echo $line | sed -e "s/>$x<//"`
+ if [ "$line" == "$match" ]; then
+ fail "$line does not have >$x< in it"
+ fi
+ let x=$x+2
+ done
+}
+
+do_reset
+
+echo "Test snapshot trace_marker tigger"
+
+echo 'snapshot' > events/ftrace/print/trigger
+
+# make sure the snapshot is allocated
+
+grep -q 'Snapshot is allocated' snapshot
+
+for i in `seq 1 10` ; do echo "hello >$i<" > trace_marker; done
+
+test_trace trace 1
+test_trace snapshot 2
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
new file mode 100644
index 000000000..0a69c5d1c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic-kernel.tc
@@ -0,0 +1,68 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram with synthetic event against kernel event
+# flags:
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ echo > synthetic_events
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic events not supported"
+ exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+ echo "event trace_marker is not supported"
+ exit_unsupported
+fi
+
+if [ ! -d events/sched/sched_waking ]; then
+ echo "event sched_waking is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
+do_reset
+
+echo "Test histogram kernel event to trace_marker latency histogram trigger"
+
+echo 'latency u64 lat' > synthetic_events
+echo 'hist:keys=pid:ts0=common_timestamp.usecs' > events/sched/sched_waking/trigger
+echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_waking).latency($lat)' > events/ftrace/print/trigger
+echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger
+sleep 1
+echo "hello" > trace_marker
+
+grep 'hitcount: *1$' events/ftrace/print/hist > /dev/null || \
+ fail "hist trigger did not trigger correct times on trace_marker"
+
+grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
+ fail "hist trigger did not trigger "
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
new file mode 100644
index 000000000..3666dd6ab
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-trace-marker-synthetic.tc
@@ -0,0 +1,66 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: trace_marker trigger - test histogram with synthetic event
+# flags:
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ echo > synthetic_events
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f synthetic_events ]; then
+ echo "synthetic events not supported"
+ exit_unsupported
+fi
+
+if [ ! -d events/ftrace/print ]; then
+ echo "event trace_marker is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/ftrace/print/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
+do_reset
+
+echo "Test histogram trace_marker to trace_marker latency histogram trigger"
+
+echo 'latency u64 lat' > synthetic_events
+echo 'hist:keys=common_pid:ts0=common_timestamp.usecs if buf == "start"' > events/ftrace/print/trigger
+echo 'hist:keys=common_pid:lat=common_timestamp.usecs-$ts0:onmatch(ftrace.print).latency($lat) if buf == "end"' >> events/ftrace/print/trigger
+echo 'hist:keys=common_pid,lat:sort=lat' > events/synthetic/latency/trigger
+echo -n "start" > trace_marker
+echo -n "end" > trace_marker
+
+cnt=`grep 'hitcount: *1$' events/ftrace/print/hist | wc -l`
+
+if [ $cnt -ne 2 ]; then
+ fail "hist trace_marker trigger did not trigger correctly"
+fi
+
+grep 'hitcount: *1$' events/synthetic/latency/hist > /dev/null || \
+ fail "hist trigger did not trigger "
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
new file mode 100644
index 000000000..c59d9eb54
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-traceonoff.tc
@@ -0,0 +1,59 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test traceon/off trigger
+
+do_reset() {
+ reset_trigger
+ echo > set_event
+ clear_trace
+}
+
+fail() { #msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f events/sched/sched_process_fork/trigger ]; then
+ echo "event trigger is not supported"
+ exit_unsupported
+fi
+
+reset_tracer
+do_reset
+
+echo "Test traceoff trigger"
+echo 1 > tracing_on
+echo 'traceoff' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat tracing_on` -ne 0 ]; then
+ fail "traceoff trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test traceon trigger"
+echo 0 > tracing_on
+echo 'traceon' > events/sched/sched_process_fork/trigger
+( echo "forked")
+if [ `cat tracing_on` -ne 1 ]; then
+ fail "traceoff trigger on sched_process_fork did not work"
+fi
+
+reset_trigger
+
+echo "Test semantic error for traceoff/on trigger"
+! echo 'traceoff:badparam' > events/sched/sched_process_fork/trigger
+! echo 'traceoff+0' > events/sched/sched_process_fork/trigger
+echo 'traceon' > events/sched/sched_process_fork/trigger
+! echo 'traceon' > events/sched/sched_process_fork/trigger
+! echo 'traceoff' > events/sched/sched_process_fork/trigger
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
new file mode 100644
index 000000000..11e157d75
--- /dev/null
+++ b/tools/testing/selftests/futex/Makefile
@@ -0,0 +1,37 @@
+# SPDX-License-Identifier: GPL-2.0
+SUBDIRS := functional
+
+TEST_PROGS := run.sh
+
+.PHONY: all clean
+
+include ../lib.mk
+
+all:
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ if [ -e $$DIR/$(TEST_PROGS) ]; then \
+ rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \
+ fi \
+ done
+
+override define INSTALL_RULE
+ mkdir -p $(INSTALL_PATH)
+ install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+
+ @for SUBDIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$SUBDIR; \
+ mkdir $$BUILD_TARGET -p; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+ done;
+endef
+
+override define CLEAN
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ done
+endef
diff --git a/tools/testing/selftests/futex/README b/tools/testing/selftests/futex/README
new file mode 100644
index 000000000..f3926c33e
--- /dev/null
+++ b/tools/testing/selftests/futex/README
@@ -0,0 +1,62 @@
+Futex Test
+==========
+Futex Test is intended to thoroughly test the Linux kernel futex system call
+API.
+
+Functional tests shall test the documented behavior of the futex operation
+code under test. This includes checking for proper behavior under normal use,
+odd corner cases, regression tests, and abject abuse and misuse.
+
+Futextest will also provide example implementation of mutual exclusion
+primitives. These can be used as is in user applications or can serve as
+examples for system libraries. These will likely be added to either a new lib/
+directory or purely as header files under include/, I'm leaning toward the
+latter.
+
+Quick Start
+-----------
+# make
+# ./run.sh
+
+Design and Implementation Goals
+-------------------------------
+o Tests should be as self contained as is practical so as to facilitate sharing
+ the individual tests on mailing list discussions and bug reports.
+o The build system shall remain as simple as possible, avoiding any archive or
+ shared object building and linking.
+o Where possible, any helper functions or other package-wide code shall be
+ implemented in header files, avoiding the need to compile intermediate object
+ files.
+o External dependencies shall remain as minimal as possible. Currently gcc
+ and glibc are the only dependencies.
+o Tests return 0 for success and < 0 for failure.
+
+Output Formatting
+-----------------
+Test output shall be easily parsable by both human and machine. Title and
+results are printed to stdout, while intermediate ERROR or FAIL messages are
+sent to stderr. Tests shall support the -c option to print PASS, FAIL, and
+ERROR strings in color for easy visual parsing. Output shall conform to the
+following format:
+
+test_name: Description of the test
+ Arguments: arg1=val1 #units specified for clarity where appropriate
+ ERROR: Description of unexpected error
+ FAIL: Reason for test failure
+ # FIXME: Perhaps an " INFO: informational message" option would be
+ # useful here. Using -v to toggle it them on and off, as with -c.
+ # there may be multiple ERROR or FAIL messages
+Result: (PASS|FAIL|ERROR)
+
+Naming
+------
+o FIXME: decide on a sane test naming scheme. Currently the tests are named
+ based on the primary futex operation they test. Eventually this will become a
+ problem as we intend to write multiple tests which collide in this namespace.
+ Perhaps something like "wait-wake-1" "wait-wake-2" is adequate, leaving the
+ detailed description in the test source and the output.
+
+Coding Style
+------------
+o The Futex Test project adheres to the coding standards set forth by Linux
+ kernel as defined in the Linux source Documentation/process/coding-style.rst.
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
new file mode 100644
index 000000000..a09f57061
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -0,0 +1,7 @@
+futex_requeue_pi
+futex_requeue_pi_mismatched_ops
+futex_requeue_pi_signal_restart
+futex_wait_private_mapped_file
+futex_wait_timeout
+futex_wait_uninitialized_heap
+futex_wait_wouldblock
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
new file mode 100644
index 000000000..30996306c
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+INCLUDES := -I../include -I../../
+CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
+LDFLAGS := $(LDFLAGS) -pthread -lrt
+
+HEADERS := \
+ ../include/futextest.h \
+ ../include/atomic.h \
+ ../include/logging.h
+TEST_GEN_FILES := \
+ futex_wait_timeout \
+ futex_wait_wouldblock \
+ futex_requeue_pi \
+ futex_requeue_pi_signal_restart \
+ futex_requeue_pi_mismatched_ops \
+ futex_wait_uninitialized_heap \
+ futex_wait_private_mapped_file
+
+TEST_PROGS := run.sh
+
+top_srcdir = ../../../../..
+KSFT_KHDR_INSTALL := 1
+include ../../lib.mk
+
+$(TEST_GEN_FILES): $(HEADERS)
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
new file mode 100644
index 000000000..54cd5c414
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -0,0 +1,412 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2006-2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * This test excercises the futex syscall op codes needed for requeuing
+ * priority inheritance aware POSIX condition variables and mutexes.
+ *
+ * AUTHORS
+ * Sripathi Kodi <sripathik@in.ibm.com>
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2008-Jan-13: Initial version by Sripathi Kodi <sripathik@in.ibm.com>
+ * 2009-Nov-6: futex test adaptation by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include "atomic.h"
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-requeue-pi"
+#define MAX_WAKE_ITERS 1000
+#define THREAD_MAX 10
+#define SIGNAL_PERIOD_US 100
+
+atomic_t waiters_blocked = ATOMIC_INITIALIZER;
+atomic_t waiters_woken = ATOMIC_INITIALIZER;
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+futex_t wake_complete = FUTEX_INITIALIZER;
+
+/* Test option defaults */
+static long timeout_ns;
+static int broadcast;
+static int owner;
+static int locked;
+
+struct thread_arg {
+ long id;
+ struct timespec *timeout;
+ int lock;
+ int ret;
+};
+#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 }
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -b Broadcast wakeup (all waiters)\n");
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -l Lock the pi futex across requeue\n");
+ printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n");
+ printf(" -t N Timeout in nanoseconds (default: 0)\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
+ int policy, int prio)
+{
+ int ret;
+ struct sched_param schedp;
+ pthread_attr_t attr;
+
+ pthread_attr_init(&attr);
+ memset(&schedp, 0, sizeof(schedp));
+
+ ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+ if (ret) {
+ error("pthread_attr_setinheritsched\n", ret);
+ return -1;
+ }
+
+ ret = pthread_attr_setschedpolicy(&attr, policy);
+ if (ret) {
+ error("pthread_attr_setschedpolicy\n", ret);
+ return -1;
+ }
+
+ schedp.sched_priority = prio;
+ ret = pthread_attr_setschedparam(&attr, &schedp);
+ if (ret) {
+ error("pthread_attr_setschedparam\n", ret);
+ return -1;
+ }
+
+ ret = pthread_create(pth, &attr, func, arg);
+ if (ret) {
+ error("pthread_create\n", ret);
+ return -1;
+ }
+ return 0;
+}
+
+
+void *waiterfn(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ futex_t old_val;
+
+ info("Waiter %ld: running\n", args->id);
+ /* Each thread sleeps for a different amount of time
+ * This is to avoid races, because we don't lock the
+ * external mutex here */
+ usleep(1000 * (long)args->id);
+
+ old_val = f1;
+ atomic_inc(&waiters_blocked);
+ info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
+ &f1, f1, &f2);
+ args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout,
+ FUTEX_PRIVATE_FLAG);
+
+ info("waiter %ld woke with %d %s\n", args->id, args->ret,
+ args->ret < 0 ? strerror(errno) : "");
+ atomic_inc(&waiters_woken);
+ if (args->ret < 0) {
+ if (args->timeout && errno == ETIMEDOUT)
+ args->ret = 0;
+ else {
+ args->ret = RET_ERROR;
+ error("futex_wait_requeue_pi\n", errno);
+ }
+ futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ }
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ info("Waiter %ld: exiting with %d\n", args->id, args->ret);
+ pthread_exit((void *)&args->ret);
+}
+
+void *broadcast_wakerfn(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ int nr_requeue = INT_MAX;
+ int task_count = 0;
+ futex_t old_val;
+ int nr_wake = 1;
+ int i = 0;
+
+ info("Waker: waiting for waiters to block\n");
+ while (waiters_blocked.val < THREAD_MAX)
+ usleep(1000);
+ usleep(1000);
+
+ info("Waker: Calling broadcast\n");
+ if (args->lock) {
+ info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ }
+ continue_requeue:
+ old_val = f1;
+ args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue,
+ FUTEX_PRIVATE_FLAG);
+ if (args->ret < 0) {
+ args->ret = RET_ERROR;
+ error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ } else if (++i < MAX_WAKE_ITERS) {
+ task_count += args->ret;
+ if (task_count < THREAD_MAX - waiters_woken.val)
+ goto continue_requeue;
+ } else {
+ error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
+ 0, MAX_WAKE_ITERS, task_count, THREAD_MAX);
+ args->ret = RET_ERROR;
+ }
+
+ futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
+
+ if (args->lock)
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ if (args->ret > 0)
+ args->ret = task_count;
+
+ info("Waker: exiting with %d\n", args->ret);
+ pthread_exit((void *)&args->ret);
+}
+
+void *signal_wakerfn(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ unsigned int old_val;
+ int nr_requeue = 0;
+ int task_count = 0;
+ int nr_wake = 1;
+ int i = 0;
+
+ info("Waker: waiting for waiters to block\n");
+ while (waiters_blocked.val < THREAD_MAX)
+ usleep(1000);
+ usleep(1000);
+
+ while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) {
+ info("task_count: %d, waiters_woken: %d\n",
+ task_count, waiters_woken.val);
+ if (args->lock) {
+ info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
+ futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ }
+ info("Waker: Calling signal\n");
+ /* cond_signal */
+ old_val = f1;
+ args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2,
+ nr_wake, nr_requeue,
+ FUTEX_PRIVATE_FLAG);
+ if (args->ret < 0)
+ args->ret = -errno;
+ info("futex: %x\n", f2);
+ if (args->lock) {
+ info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
+ f2, &f2);
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+ }
+ info("futex: %x\n", f2);
+ if (args->ret < 0) {
+ error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ args->ret = RET_ERROR;
+ break;
+ }
+
+ task_count += args->ret;
+ usleep(SIGNAL_PERIOD_US);
+ i++;
+ /* we have to loop at least THREAD_MAX times */
+ if (i > MAX_WAKE_ITERS + THREAD_MAX) {
+ error("max signaling iterations (%d) reached, giving up on pending waiters.\n",
+ 0, MAX_WAKE_ITERS + THREAD_MAX);
+ args->ret = RET_ERROR;
+ break;
+ }
+ }
+
+ futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
+
+ if (args->ret >= 0)
+ args->ret = task_count;
+
+ info("Waker: exiting with %d\n", args->ret);
+ info("Waker: waiters_woken: %d\n", waiters_woken.val);
+ pthread_exit((void *)&args->ret);
+}
+
+void *third_party_blocker(void *arg)
+{
+ struct thread_arg *args = (struct thread_arg *)arg;
+ int ret2 = 0;
+
+ args->ret = futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+ if (args->ret)
+ goto out;
+ args->ret = futex_wait(&wake_complete, wake_complete, NULL,
+ FUTEX_PRIVATE_FLAG);
+ ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ out:
+ if (args->ret || ret2) {
+ error("third_party_blocker() futex error", 0);
+ args->ret = RET_ERROR;
+ }
+
+ pthread_exit((void *)&args->ret);
+}
+
+int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
+{
+ void *(*wakerfn)(void *) = signal_wakerfn;
+ struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER;
+ struct thread_arg waker_arg = THREAD_ARG_INITIALIZER;
+ pthread_t waiter[THREAD_MAX], waker, blocker;
+ struct timespec ts, *tsp = NULL;
+ struct thread_arg args[THREAD_MAX];
+ int *waiter_ret;
+ int i, ret = RET_PASS;
+
+ if (timeout_ns) {
+ time_t secs;
+
+ info("timeout_ns = %ld\n", timeout_ns);
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+ secs = (ts.tv_nsec + timeout_ns) / 1000000000;
+ ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000;
+ ts.tv_sec += secs;
+ info("ts.tv_sec = %ld\n", ts.tv_sec);
+ info("ts.tv_nsec = %ld\n", ts.tv_nsec);
+ tsp = &ts;
+ }
+
+ if (broadcast)
+ wakerfn = broadcast_wakerfn;
+
+ if (third_party_owner) {
+ if (create_rt_thread(&blocker, third_party_blocker,
+ (void *)&blocker_arg, SCHED_FIFO, 1)) {
+ error("Creating third party blocker thread failed\n",
+ errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+ }
+
+ atomic_set(&waiters_woken, 0);
+ for (i = 0; i < THREAD_MAX; i++) {
+ args[i].id = i;
+ args[i].timeout = tsp;
+ info("Starting thread %d\n", i);
+ if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i],
+ SCHED_FIFO, 1)) {
+ error("Creating waiting thread failed\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+ }
+ waker_arg.lock = lock;
+ if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg,
+ SCHED_FIFO, 1)) {
+ error("Creating waker thread failed\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ /* Wait for threads to finish */
+ /* Store the first error or failure encountered in waiter_ret */
+ waiter_ret = &args[0].ret;
+ for (i = 0; i < THREAD_MAX; i++)
+ pthread_join(waiter[i],
+ *waiter_ret ? NULL : (void **)&waiter_ret);
+
+ if (third_party_owner)
+ pthread_join(blocker, NULL);
+ pthread_join(waker, NULL);
+
+out:
+ if (!ret) {
+ if (*waiter_ret)
+ ret = *waiter_ret;
+ else if (waker_arg.ret < 0)
+ ret = waker_arg.ret;
+ else if (blocker_arg.ret)
+ ret = blocker_arg.ret;
+ }
+
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ int c, ret;
+
+ while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
+ switch (c) {
+ case 'b':
+ broadcast = 1;
+ break;
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'l':
+ locked = 1;
+ break;
+ case 'o':
+ owner = 1;
+ locked = 0;
+ break;
+ case 't':
+ timeout_ns = atoi(optarg);
+ break;
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_print_msg("%s: Test requeue functionality\n", basename(argv[0]));
+ ksft_print_msg(
+ "\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
+ broadcast, locked, owner, timeout_ns);
+
+ /*
+ * FIXME: unit_test is obsolete now that we parse options and the
+ * various style of runs are done by run.sh - simplify the code and move
+ * unit_test into main()
+ */
+ ret = unit_test(broadcast, locked, owner, timeout_ns);
+
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
new file mode 100644
index 000000000..08187a165
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
@@ -0,0 +1,138 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * 1. Block a thread using FUTEX_WAIT
+ * 2. Attempt to use FUTEX_CMP_REQUEUE_PI on the futex from 1.
+ * 3. The kernel must detect the mismatch and return -EINVAL.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-requeue-pi-mismatched-ops"
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+int child_ret = 0;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *blocking_child(void *arg)
+{
+ child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG);
+ if (child_ret < 0) {
+ child_ret = -errno;
+ error("futex_wait\n", errno);
+ }
+ return (void *)&child_ret;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = RET_PASS;
+ pthread_t child;
+ int c;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_print_msg("%s: Detect mismatched requeue_pi operations\n",
+ basename(argv[0]));
+
+ if (pthread_create(&child, NULL, blocking_child, NULL)) {
+ error("pthread_create\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+ /* Allow the child to block in the kernel. */
+ sleep(1);
+
+ /*
+ * The kernel should detect the waiter did not setup the
+ * q->requeue_pi_key and return -EINVAL. If it does not,
+ * it likely gave the lock to the child, which is now hung
+ * in the kernel.
+ */
+ ret = futex_cmp_requeue_pi(&f1, f1, &f2, 1, 0, FUTEX_PRIVATE_FLAG);
+ if (ret < 0) {
+ if (errno == EINVAL) {
+ /*
+ * The kernel correctly detected the mismatched
+ * requeue_pi target and aborted. Wake the child with
+ * FUTEX_WAKE.
+ */
+ ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG);
+ if (ret == 1) {
+ ret = RET_PASS;
+ } else if (ret < 0) {
+ error("futex_wake\n", errno);
+ ret = RET_ERROR;
+ } else {
+ error("futex_wake did not wake the child\n", 0);
+ ret = RET_ERROR;
+ }
+ } else {
+ error("futex_cmp_requeue_pi\n", errno);
+ ret = RET_ERROR;
+ }
+ } else if (ret > 0) {
+ fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
+ ret = RET_FAIL;
+ } else {
+ error("futex_cmp_requeue_pi found no waiters\n", 0);
+ ret = RET_ERROR;
+ }
+
+ pthread_join(child, NULL);
+
+ if (!ret)
+ ret = child_ret;
+
+ out:
+ /* If the kernel crashes, we shouldn't return at all. */
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
new file mode 100644
index 000000000..f0542a344
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
@@ -0,0 +1,225 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2006-2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * This test exercises the futex_wait_requeue_pi() signal handling both
+ * before and after the requeue. The first should be restarted by the
+ * kernel. The latter should return EWOULDBLOCK to the waiter.
+ *
+ * AUTHORS
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2008-May-5: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "atomic.h"
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-requeue-pi-signal-restart"
+#define DELAY_US 100
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+atomic_t requeued = ATOMIC_INITIALIZER;
+
+int waiter_ret = 0;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
+ int policy, int prio)
+{
+ struct sched_param schedp;
+ pthread_attr_t attr;
+ int ret;
+
+ pthread_attr_init(&attr);
+ memset(&schedp, 0, sizeof(schedp));
+
+ ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+ if (ret) {
+ error("pthread_attr_setinheritsched\n", ret);
+ return -1;
+ }
+
+ ret = pthread_attr_setschedpolicy(&attr, policy);
+ if (ret) {
+ error("pthread_attr_setschedpolicy\n", ret);
+ return -1;
+ }
+
+ schedp.sched_priority = prio;
+ ret = pthread_attr_setschedparam(&attr, &schedp);
+ if (ret) {
+ error("pthread_attr_setschedparam\n", ret);
+ return -1;
+ }
+
+ ret = pthread_create(pth, &attr, func, arg);
+ if (ret) {
+ error("pthread_create\n", ret);
+ return -1;
+ }
+ return 0;
+}
+
+void handle_signal(int signo)
+{
+ info("signal received %s requeue\n",
+ requeued.val ? "after" : "prior to");
+}
+
+void *waiterfn(void *arg)
+{
+ unsigned int old_val;
+ int res;
+
+ waiter_ret = RET_PASS;
+
+ info("Waiter running\n");
+ info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ old_val = f1;
+ res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL,
+ FUTEX_PRIVATE_FLAG);
+ if (!requeued.val || errno != EWOULDBLOCK) {
+ fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
+ res, strerror(errno));
+ info("w2:futex: %x\n", f2);
+ if (!res)
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+ waiter_ret = RET_FAIL;
+ }
+
+ info("Waiter exiting with %d\n", waiter_ret);
+ pthread_exit(NULL);
+}
+
+
+int main(int argc, char *argv[])
+{
+ unsigned int old_val;
+ struct sigaction sa;
+ pthread_t waiter;
+ int c, res, ret = RET_PASS;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_print_msg("%s: Test signal handling during requeue_pi\n",
+ basename(argv[0]));
+ ksft_print_msg("\tArguments: <none>\n");
+
+ sa.sa_handler = handle_signal;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = 0;
+ if (sigaction(SIGUSR1, &sa, NULL)) {
+ error("sigaction\n", errno);
+ exit(1);
+ }
+
+ info("m1:f2: %x\n", f2);
+ info("Creating waiter\n");
+ res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1);
+ if (res) {
+ error("Creating waiting thread failed", res);
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+ info("m2:f2: %x\n", f2);
+ futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG);
+ info("m3:f2: %x\n", f2);
+
+ while (1) {
+ /*
+ * signal the waiter before requeue, waiter should automatically
+ * restart futex_wait_requeue_pi() in the kernel. Wait for the
+ * waiter to block on f1 again.
+ */
+ info("Issuing SIGUSR1 to waiter\n");
+ pthread_kill(waiter, SIGUSR1);
+ usleep(DELAY_US);
+
+ info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
+ old_val = f1;
+ res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0,
+ FUTEX_PRIVATE_FLAG);
+ /*
+ * If res is non-zero, we either requeued the waiter or hit an
+ * error, break out and handle it. If it is zero, then the
+ * signal may have hit before the the waiter was blocked on f1.
+ * Try again.
+ */
+ if (res > 0) {
+ atomic_set(&requeued, 1);
+ break;
+ } else if (res < 0) {
+ error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+ ret = RET_ERROR;
+ break;
+ }
+ }
+ info("m4:f2: %x\n", f2);
+
+ /*
+ * Signal the waiter after requeue, waiter should return from
+ * futex_wait_requeue_pi() with EWOULDBLOCK. Join the thread here so the
+ * futex_unlock_pi() can't happen before the signal wakeup is detected
+ * in the kernel.
+ */
+ info("Issuing SIGUSR1 to waiter\n");
+ pthread_kill(waiter, SIGUSR1);
+ info("Waiting for waiter to return\n");
+ pthread_join(waiter, NULL);
+
+ info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
+ futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+ info("m5:f2: %x\n", f2);
+
+ out:
+ if (ret == RET_PASS && waiter_ret)
+ ret = waiter_ret;
+
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
new file mode 100644
index 000000000..6216de828
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
@@ -0,0 +1,128 @@
+/******************************************************************************
+ *
+ * Copyright FUJITSU LIMITED 2010
+ * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Internally, Futex has two handling mode, anon and file. The private file
+ * mapping is special. At first it behave as file, but after write anything
+ * it behave as anon. This test is intent to test such case.
+ *
+ * AUTHOR
+ * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * HISTORY
+ * 2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *****************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <libgen.h>
+#include <signal.h>
+
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-wait-private-mapped-file"
+#define PAGE_SZ 4096
+
+char pad[PAGE_SZ] = {1};
+futex_t val = 1;
+char pad2[PAGE_SZ] = {1};
+
+#define WAKE_WAIT_US 3000000
+struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0};
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *thr_futex_wait(void *arg)
+{
+ int ret;
+
+ info("futex wait\n");
+ ret = futex_wait(&val, 1, &wait_timeout, 0);
+ if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) {
+ error("futex error.\n", errno);
+ print_result(TEST_NAME, RET_ERROR);
+ exit(RET_ERROR);
+ }
+
+ if (ret && errno == ETIMEDOUT)
+ fail("waiter timedout\n");
+
+ info("futex_wait: ret = %d, errno = %d\n", ret, errno);
+
+ return NULL;
+}
+
+int main(int argc, char **argv)
+{
+ pthread_t thr;
+ int ret = RET_PASS;
+ int res;
+ int c;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_print_msg(
+ "%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
+ basename(argv[0]));
+
+ ret = pthread_create(&thr, NULL, thr_futex_wait, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "pthread_create error\n");
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ info("wait a while\n");
+ usleep(WAKE_WAIT_US);
+ val = 2;
+ res = futex_wake(&val, 1, 0);
+ info("futex_wake %d\n", res);
+ if (res != 1) {
+ fail("FUTEX_WAKE didn't find the waiting thread.\n");
+ ret = RET_FAIL;
+ }
+
+ info("join\n");
+ pthread_join(thr, NULL);
+
+ out:
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
new file mode 100644
index 000000000..bab3dfe17
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -0,0 +1,89 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Block on a futex and wait for timeout.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-wait-timeout"
+
+static long timeout_ns = 100000; /* 100us default timeout */
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -t N Timeout in nanoseconds (default: 100,000)\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+ futex_t f1 = FUTEX_INITIALIZER;
+ struct timespec to;
+ int res, ret = RET_PASS;
+ int c;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 't':
+ timeout_ns = atoi(optarg);
+ break;
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_print_msg("%s: Block on a futex and wait for timeout\n",
+ basename(argv[0]));
+ ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
+
+ /* initialize timeout */
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ info("Calling futex_wait on f1: %u @ %p\n", f1, &f1);
+ res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG);
+ if (!res || errno != ETIMEDOUT) {
+ fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
+ ret = RET_FAIL;
+ }
+
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
new file mode 100644
index 000000000..269753225
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
@@ -0,0 +1,126 @@
+/******************************************************************************
+ *
+ * Copyright FUJITSU LIMITED 2010
+ * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Wait on uninitialized heap. It shold be zero and FUTEX_WAIT should
+ * return immediately. This test is intent to test zero page handling in
+ * futex.
+ *
+ * AUTHOR
+ * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * HISTORY
+ * 2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *****************************************************************************/
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/futex.h>
+#include <libgen.h>
+
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-wait-uninitialized-heap"
+#define WAIT_US 5000000
+
+static int child_blocked = 1;
+static int child_ret;
+void *buf;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *wait_thread(void *arg)
+{
+ int res;
+
+ child_ret = RET_PASS;
+ res = futex_wait(buf, 1, NULL, 0);
+ child_blocked = 0;
+
+ if (res != 0 && errno != EWOULDBLOCK) {
+ error("futex failure\n", errno);
+ child_ret = RET_ERROR;
+ }
+ pthread_exit(NULL);
+}
+
+int main(int argc, char **argv)
+{
+ int c, ret = RET_PASS;
+ long page_size;
+ pthread_t thr;
+
+ while ((c = getopt(argc, argv, "chv:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+ if (buf == (void *)-1) {
+ error("mmap\n", errno);
+ exit(1);
+ }
+
+ ksft_print_header();
+ ksft_print_msg("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
+ basename(argv[0]));
+
+
+ ret = pthread_create(&thr, NULL, wait_thread, NULL);
+ if (ret) {
+ error("pthread_create\n", errno);
+ ret = RET_ERROR;
+ goto out;
+ }
+
+ info("waiting %dus for child to return\n", WAIT_US);
+ usleep(WAIT_US);
+
+ ret = child_ret;
+ if (child_blocked) {
+ fail("child blocked in kernel\n");
+ ret = RET_FAIL;
+ }
+
+ out:
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
new file mode 100644
index 000000000..da15a6326
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -0,0 +1,81 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Test if FUTEX_WAIT op returns -EWOULDBLOCK if the futex value differs
+ * from the expected one.
+ *
+ * AUTHOR
+ * Gowrishankar <gowrishankar.m@in.ibm.com>
+ *
+ * HISTORY
+ * 2009-Nov-14: Initial version by Gowrishankar <gowrishankar.m@in.ibm.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-wait-wouldblock"
+#define timeout_ns 100000
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+ struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
+ futex_t f1 = FUTEX_INITIALIZER;
+ int res, ret = RET_PASS;
+ int c;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
+ basename(argv[0]));
+
+ info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+ res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
+ if (!res || errno != EWOULDBLOCK) {
+ fail("futex_wait returned: %d %s\n",
+ res ? errno : res, res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ }
+
+ print_result(TEST_NAME, ret);
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
new file mode 100755
index 000000000..7ff002eed
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -0,0 +1,79 @@
+#!/bin/sh
+
+###############################################################################
+#
+# Copyright © International Business Machines Corp., 2009
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# DESCRIPTION
+# Run tests in the current directory.
+#
+# AUTHOR
+# Darren Hart <dvhart@linux.intel.com>
+#
+# HISTORY
+# 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+# 2010-Jan-6: Add futex_wait_uninitialized_heap and futex_wait_private_mapped_file
+# by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+#
+###############################################################################
+
+# Test for a color capable console
+if [ -z "$USE_COLOR" ]; then
+ tput setf 7 || tput setaf 7
+ if [ $? -eq 0 ]; then
+ USE_COLOR=1
+ tput sgr0
+ fi
+fi
+if [ "$USE_COLOR" -eq 1 ]; then
+ COLOR="-c"
+fi
+
+
+echo
+# requeue pi testing
+# without timeouts
+./futex_requeue_pi $COLOR
+./futex_requeue_pi $COLOR -b
+./futex_requeue_pi $COLOR -b -l
+./futex_requeue_pi $COLOR -b -o
+./futex_requeue_pi $COLOR -l
+./futex_requeue_pi $COLOR -o
+# with timeouts
+./futex_requeue_pi $COLOR -b -l -t 5000
+./futex_requeue_pi $COLOR -l -t 5000
+./futex_requeue_pi $COLOR -b -l -t 500000
+./futex_requeue_pi $COLOR -l -t 500000
+./futex_requeue_pi $COLOR -b -t 5000
+./futex_requeue_pi $COLOR -t 5000
+./futex_requeue_pi $COLOR -b -t 500000
+./futex_requeue_pi $COLOR -t 500000
+./futex_requeue_pi $COLOR -b -o -t 5000
+./futex_requeue_pi $COLOR -l -t 5000
+./futex_requeue_pi $COLOR -b -o -t 500000
+./futex_requeue_pi $COLOR -l -t 500000
+# with long timeout
+./futex_requeue_pi $COLOR -b -l -t 2000000000
+./futex_requeue_pi $COLOR -l -t 2000000000
+
+
+echo
+./futex_requeue_pi_mismatched_ops $COLOR
+
+echo
+./futex_requeue_pi_signal_restart $COLOR
+
+echo
+./futex_wait_timeout $COLOR
+
+echo
+./futex_wait_wouldblock $COLOR
+
+echo
+./futex_wait_uninitialized_heap $COLOR
+./futex_wait_private_mapped_file $COLOR
diff --git a/tools/testing/selftests/futex/include/atomic.h b/tools/testing/selftests/futex/include/atomic.h
new file mode 100644
index 000000000..f861da3e3
--- /dev/null
+++ b/tools/testing/selftests/futex/include/atomic.h
@@ -0,0 +1,83 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * GCC atomic builtin wrappers
+ * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-17: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _ATOMIC_H
+#define _ATOMIC_H
+
+typedef struct {
+ volatile int val;
+} atomic_t;
+
+#define ATOMIC_INITIALIZER { 0 }
+
+/**
+ * atomic_cmpxchg() - Atomic compare and exchange
+ * @uaddr: The address of the futex to be modified
+ * @oldval: The expected value of the futex
+ * @newval: The new value to try and assign the futex
+ *
+ * Return the old value of addr->val.
+ */
+static inline int
+atomic_cmpxchg(atomic_t *addr, int oldval, int newval)
+{
+ return __sync_val_compare_and_swap(&addr->val, oldval, newval);
+}
+
+/**
+ * atomic_inc() - Atomic incrememnt
+ * @addr: Address of the variable to increment
+ *
+ * Return the new value of addr->val.
+ */
+static inline int
+atomic_inc(atomic_t *addr)
+{
+ return __sync_add_and_fetch(&addr->val, 1);
+}
+
+/**
+ * atomic_dec() - Atomic decrement
+ * @addr: Address of the variable to decrement
+ *
+ * Return the new value of addr-val.
+ */
+static inline int
+atomic_dec(atomic_t *addr)
+{
+ return __sync_sub_and_fetch(&addr->val, 1);
+}
+
+/**
+ * atomic_set() - Atomic set
+ * @addr: Address of the variable to set
+ * @newval: New value for the atomic_t
+ *
+ * Return the new value of addr->val.
+ */
+static inline int
+atomic_set(atomic_t *addr, int newval)
+{
+ addr->val = newval;
+ return newval;
+}
+
+#endif
diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
new file mode 100644
index 000000000..b98c3aba7
--- /dev/null
+++ b/tools/testing/selftests/futex/include/futextest.h
@@ -0,0 +1,266 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Glibc independent futex library for testing kernel functionality.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _FUTEXTEST_H
+#define _FUTEXTEST_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/futex.h>
+
+typedef volatile u_int32_t futex_t;
+#define FUTEX_INITIALIZER 0
+
+/* Define the newer op codes if the system header file is not up to date. */
+#ifndef FUTEX_WAIT_BITSET
+#define FUTEX_WAIT_BITSET 9
+#endif
+#ifndef FUTEX_WAKE_BITSET
+#define FUTEX_WAKE_BITSET 10
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI
+#define FUTEX_WAIT_REQUEUE_PI 11
+#endif
+#ifndef FUTEX_CMP_REQUEUE_PI
+#define FUTEX_CMP_REQUEUE_PI 12
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE
+#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \
+ FUTEX_PRIVATE_FLAG)
+#endif
+#ifndef FUTEX_REQUEUE_PI_PRIVATE
+#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \
+ FUTEX_PRIVATE_FLAG)
+#endif
+
+/**
+ * futex() - SYS_futex syscall wrapper
+ * @uaddr: address of first futex
+ * @op: futex op code
+ * @val: typically expected value of uaddr, but varies by op
+ * @timeout: typically an absolute struct timespec (except where noted
+ * otherwise). Overloaded by some ops
+ * @uaddr2: address of second futex for some ops\
+ * @val3: varies by op
+ * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
+ *
+ * futex() is used by all the following futex op wrappers. It can also be
+ * used for misuse and abuse testing. Generally, the specific op wrappers
+ * should be used instead. It is a macro instead of an static inline function as
+ * some of the types over overloaded (timeout is used for nr_requeue for
+ * example).
+ *
+ * These argument descriptions are the defaults for all
+ * like-named arguments in the following wrappers except where noted below.
+ */
+#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \
+ syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3)
+
+/**
+ * futex_wait() - block on uaddr with optional timeout
+ * @timeout: relative timeout
+ */
+static inline int
+futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake() - wake one or more tasks blocked on uaddr
+ * @nr_wake: wake up to this many tasks
+ */
+static inline int
+futex_wake(futex_t *uaddr, int nr_wake, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_wait_bitset() - block on uaddr with bitset
+ * @bitset: bitset to be used with futex_wake_bitset
+ */
+static inline int
+futex_wait_bitset(futex_t *uaddr, futex_t val, struct timespec *timeout,
+ u_int32_t bitset, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT_BITSET, val, timeout, NULL, bitset,
+ opflags);
+}
+
+/**
+ * futex_wake_bitset() - wake one or more tasks blocked on uaddr with bitset
+ * @bitset: bitset to compare with that used in futex_wait_bitset
+ */
+static inline int
+futex_wake_bitset(futex_t *uaddr, int nr_wake, u_int32_t bitset, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE_BITSET, nr_wake, NULL, NULL, bitset,
+ opflags);
+}
+
+/**
+ * futex_lock_pi() - block on uaddr as a PI mutex
+ * @detect: whether (1) or not (0) to perform deadlock detection
+ */
+static inline int
+futex_lock_pi(futex_t *uaddr, struct timespec *timeout, int detect,
+ int opflags)
+{
+ return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter
+ */
+static inline int
+futex_unlock_pi(futex_t *uaddr, int opflags)
+{
+ return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake_op() - FIXME: COME UP WITH A GOOD ONE LINE DESCRIPTION
+ */
+static inline int
+futex_wake_op(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_wake2,
+ int wake_op, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE_OP, nr_wake, nr_wake2, uaddr2, wake_op,
+ opflags);
+}
+
+/**
+ * futex_requeue() - requeue without expected value comparison, deprecated
+ * @nr_wake: wake up to this many tasks
+ * @nr_requeue: requeue up to this many tasks
+ *
+ * Due to its inherently racy implementation, futex_requeue() is deprecated in
+ * favor of futex_cmp_requeue().
+ */
+static inline int
+futex_requeue(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_requeue,
+ int opflags)
+{
+ return futex(uaddr, FUTEX_REQUEUE, nr_wake, nr_requeue, uaddr2, 0,
+ opflags);
+}
+
+/**
+ * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
+ * @nr_wake: wake up to this many tasks
+ * @nr_requeue: requeue up to this many tasks
+ */
+static inline int
+futex_cmp_requeue(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake,
+ int nr_requeue, int opflags)
+{
+ return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
+ val, opflags);
+}
+
+/**
+ * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2
+ * @uaddr: non-PI futex source
+ * @uaddr2: PI futex target
+ *
+ * This is the first half of the requeue_pi mechanism. It shall always be
+ * paired with futex_cmp_requeue_pi().
+ */
+static inline int
+futex_wait_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2,
+ struct timespec *timeout, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0,
+ opflags);
+}
+
+/**
+ * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2 (PI aware)
+ * @uaddr: non-PI futex source
+ * @uaddr2: PI futex target
+ * @nr_wake: wake up to this many tasks
+ * @nr_requeue: requeue up to this many tasks
+ */
+static inline int
+futex_cmp_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake,
+ int nr_requeue, int opflags)
+{
+ return futex(uaddr, FUTEX_CMP_REQUEUE_PI, nr_wake, nr_requeue, uaddr2,
+ val, opflags);
+}
+
+/**
+ * futex_cmpxchg() - atomic compare and exchange
+ * @uaddr: The address of the futex to be modified
+ * @oldval: The expected value of the futex
+ * @newval: The new value to try and assign the futex
+ *
+ * Implement cmpxchg using gcc atomic builtins.
+ * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
+ *
+ * Return the old futex value.
+ */
+static inline u_int32_t
+futex_cmpxchg(futex_t *uaddr, u_int32_t oldval, u_int32_t newval)
+{
+ return __sync_val_compare_and_swap(uaddr, oldval, newval);
+}
+
+/**
+ * futex_dec() - atomic decrement of the futex value
+ * @uaddr: The address of the futex to be modified
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_dec(futex_t *uaddr)
+{
+ return __sync_sub_and_fetch(uaddr, 1);
+}
+
+/**
+ * futex_inc() - atomic increment of the futex value
+ * @uaddr: the address of the futex to be modified
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_inc(futex_t *uaddr)
+{
+ return __sync_add_and_fetch(uaddr, 1);
+}
+
+/**
+ * futex_set() - atomic decrement of the futex value
+ * @uaddr: the address of the futex to be modified
+ * @newval: New value for the atomic_t
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_set(futex_t *uaddr, u_int32_t newval)
+{
+ *uaddr = newval;
+ return newval;
+}
+
+#endif
diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h
new file mode 100644
index 000000000..01989644e
--- /dev/null
+++ b/tools/testing/selftests/futex/include/logging.h
@@ -0,0 +1,152 @@
+/******************************************************************************
+ *
+ * Copyright © International Business Machines Corp., 2009
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * DESCRIPTION
+ * Glibc independent futex library for testing kernel functionality.
+ *
+ * AUTHOR
+ * Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _LOGGING_H
+#define _LOGGING_H
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/futex.h>
+#include "kselftest.h"
+
+/*
+ * Define PASS, ERROR, and FAIL strings with and without color escape
+ * sequences, default to no color.
+ */
+#define ESC 0x1B, '['
+#define BRIGHT '1'
+#define GREEN '3', '2'
+#define YELLOW '3', '3'
+#define RED '3', '1'
+#define ESCEND 'm'
+#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND
+#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND
+#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND
+#define RESET_COLOR ESC, '0', 'm'
+static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S',
+ RESET_COLOR, 0};
+static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R',
+ RESET_COLOR, 0};
+static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L',
+ RESET_COLOR, 0};
+static const char INFO_NORMAL[] = " INFO";
+static const char PASS_NORMAL[] = " PASS";
+static const char ERROR_NORMAL[] = "ERROR";
+static const char FAIL_NORMAL[] = " FAIL";
+const char *INFO = INFO_NORMAL;
+const char *PASS = PASS_NORMAL;
+const char *ERROR = ERROR_NORMAL;
+const char *FAIL = FAIL_NORMAL;
+
+/* Verbosity setting for INFO messages */
+#define VQUIET 0
+#define VCRITICAL 1
+#define VINFO 2
+#define VMAX VINFO
+int _verbose = VCRITICAL;
+
+/* Functional test return codes */
+#define RET_PASS 0
+#define RET_ERROR -1
+#define RET_FAIL -2
+
+/**
+ * log_color() - Use colored output for PASS, ERROR, and FAIL strings
+ * @use_color: use color (1) or not (0)
+ */
+void log_color(int use_color)
+{
+ if (use_color) {
+ PASS = PASS_COLOR;
+ ERROR = ERROR_COLOR;
+ FAIL = FAIL_COLOR;
+ } else {
+ PASS = PASS_NORMAL;
+ ERROR = ERROR_NORMAL;
+ FAIL = FAIL_NORMAL;
+ }
+}
+
+/**
+ * log_verbosity() - Set verbosity of test output
+ * @verbose: Enable (1) verbose output or not (0)
+ *
+ * Currently setting verbose=1 will enable INFO messages and 0 will disable
+ * them. FAIL and ERROR messages are always displayed.
+ */
+void log_verbosity(int level)
+{
+ if (level > VMAX)
+ level = VMAX;
+ else if (level < 0)
+ level = 0;
+ _verbose = level;
+}
+
+/**
+ * print_result() - Print standard PASS | ERROR | FAIL results
+ * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL
+ *
+ * print_result() is primarily intended for functional tests.
+ */
+void print_result(const char *test_name, int ret)
+{
+ switch (ret) {
+ case RET_PASS:
+ ksft_test_result_pass("%s\n", test_name);
+ ksft_print_cnts();
+ return;
+ case RET_ERROR:
+ ksft_test_result_error("%s\n", test_name);
+ ksft_print_cnts();
+ return;
+ case RET_FAIL:
+ ksft_test_result_fail("%s\n", test_name);
+ ksft_print_cnts();
+ return;
+ }
+}
+
+/* log level macros */
+#define info(message, vargs...) \
+do { \
+ if (_verbose >= VINFO) \
+ fprintf(stderr, "\t%s: "message, INFO, ##vargs); \
+} while (0)
+
+#define error(message, err, args...) \
+do { \
+ if (_verbose >= VCRITICAL) {\
+ if (err) \
+ fprintf(stderr, "\t%s: %s: "message, \
+ ERROR, strerror(err), ##args); \
+ else \
+ fprintf(stderr, "\t%s: "message, ERROR, ##args); \
+ } \
+} while (0)
+
+#define fail(message, args...) \
+do { \
+ if (_verbose >= VCRITICAL) \
+ fprintf(stderr, "\t%s: "message, FAIL, ##args); \
+} while (0)
+
+#endif
diff --git a/tools/testing/selftests/futex/run.sh b/tools/testing/selftests/futex/run.sh
new file mode 100755
index 000000000..88bcb1767
--- /dev/null
+++ b/tools/testing/selftests/futex/run.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+###############################################################################
+#
+# Copyright © International Business Machines Corp., 2009
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# DESCRIPTION
+# Run all tests under the functional, performance, and stress directories.
+# Format and summarize the results.
+#
+# AUTHOR
+# Darren Hart <dvhart@linux.intel.com>
+#
+# HISTORY
+# 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+#
+###############################################################################
+
+# Test for a color capable shell and pass the result to the subdir scripts
+USE_COLOR=0
+tput setf 7 || tput setaf 7
+if [ $? -eq 0 ]; then
+ USE_COLOR=1
+ tput sgr0
+fi
+export USE_COLOR
+
+(cd functional; ./run.sh)
diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh
new file mode 100755
index 000000000..a27e2eec3
--- /dev/null
+++ b/tools/testing/selftests/gen_kselftest_tar.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+#
+# SPDX-License-Identifier: GPL-2.0
+# gen_kselftest_tar
+# Generate kselftest tarball
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+# main
+main()
+{
+ if [ "$#" -eq 0 ]; then
+ echo "$0: Generating default compression gzip"
+ copts="cvzf"
+ ext=".tar.gz"
+ else
+ case "$1" in
+ tar)
+ copts="cvf"
+ ext=".tar"
+ ;;
+ targz)
+ copts="cvzf"
+ ext=".tar.gz"
+ ;;
+ tarbz2)
+ copts="cvjf"
+ ext=".tar.bz2"
+ ;;
+ tarxz)
+ copts="cvJf"
+ ext=".tar.xz"
+ ;;
+ *)
+ echo "Unknown tarball format $1"
+ exit 1
+ ;;
+ esac
+ fi
+
+ install_dir=./kselftest
+
+# Run install using INSTALL_KSFT_PATH override to generate install
+# directory
+./kselftest_install.sh
+tar $copts kselftest${ext} $install_dir
+echo "Kselftest archive kselftest${ext} created!"
+
+# clean up install directory
+rm -rf kselftest
+}
+
+main "$@"
diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore
new file mode 100644
index 000000000..7d14f743d
--- /dev/null
+++ b/tools/testing/selftests/gpio/.gitignore
@@ -0,0 +1 @@
+gpio-mockup-chardev
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
new file mode 100644
index 000000000..59ea4c461
--- /dev/null
+++ b/tools/testing/selftests/gpio/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_PROGS := gpio-mockup.sh
+TEST_FILES := gpio-mockup-sysfs.sh $(BINARIES)
+BINARIES := gpio-mockup-chardev
+EXTRA_PROGS := ../gpiogpio-event-mon ../gpiogpio-hammer ../gpiolsgpio
+EXTRA_DIRS := ../gpioinclude/
+EXTRA_OBJS := ../gpiogpio-event-mon-in.o ../gpiogpio-event-mon.o
+EXTRA_OBJS += ../gpiogpio-hammer-in.o ../gpiogpio-utils.o ../gpiolsgpio-in.o
+EXTRA_OBJS += ../gpiolsgpio.o
+
+KSFT_KHDR_INSTALL := 1
+include ../lib.mk
+
+all: $(BINARIES)
+
+override define CLEAN
+ $(RM) $(BINARIES) $(EXTRA_PROGS) $(EXTRA_OBJS)
+ $(RM) -r $(EXTRA_DIRS)
+endef
+
+CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/
+LDLIBS += -lmount -I/usr/include/libmount
+
+$(BINARIES):| khdr
+$(BINARIES): ../../../gpio/gpio-utils.o
+
+../../../gpio/gpio-utils.o:
+ make ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) -C ../../../gpio
diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config
new file mode 100644
index 000000000..abaa6902b
--- /dev/null
+++ b/tools/testing/selftests/gpio/config
@@ -0,0 +1,2 @@
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_MOCKUP=m
diff --git a/tools/testing/selftests/gpio/gpio-mockup-chardev.c b/tools/testing/selftests/gpio/gpio-mockup-chardev.c
new file mode 100644
index 000000000..aaa1e9f08
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-mockup-chardev.c
@@ -0,0 +1,327 @@
+/*
+ * GPIO chardev test helper
+ *
+ * Copyright (C) 2016 Bamvor Jian Zhang
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <libmount.h>
+#include <err.h>
+#include <dirent.h>
+#include <linux/gpio.h>
+#include "../../../gpio/gpio-utils.h"
+
+#define CONSUMER "gpio-selftest"
+#define GC_NUM 10
+enum direction {
+ OUT,
+ IN
+};
+
+static int get_debugfs(char **path)
+{
+ struct libmnt_context *cxt;
+ struct libmnt_table *tb;
+ struct libmnt_iter *itr = NULL;
+ struct libmnt_fs *fs;
+ int found = 0, ret;
+
+ cxt = mnt_new_context();
+ if (!cxt)
+ err(EXIT_FAILURE, "libmount context allocation failed");
+
+ itr = mnt_new_iter(MNT_ITER_FORWARD);
+ if (!itr)
+ err(EXIT_FAILURE, "failed to initialize libmount iterator");
+
+ if (mnt_context_get_mtab(cxt, &tb))
+ err(EXIT_FAILURE, "failed to read mtab");
+
+ while (mnt_table_next_fs(tb, itr, &fs) == 0) {
+ const char *type = mnt_fs_get_fstype(fs);
+
+ if (!strcmp(type, "debugfs")) {
+ found = 1;
+ break;
+ }
+ }
+ if (found) {
+ ret = asprintf(path, "%s/gpio", mnt_fs_get_target(fs));
+ if (ret < 0)
+ err(EXIT_FAILURE, "failed to format string");
+ }
+
+ mnt_free_iter(itr);
+ mnt_free_context(cxt);
+
+ if (!found)
+ return -1;
+
+ return 0;
+}
+
+static int gpio_debugfs_get(const char *consumer, int *dir, int *value)
+{
+ char *debugfs;
+ FILE *f;
+ char *line = NULL;
+ size_t len = 0;
+ char *cur;
+ int found = 0;
+
+ if (get_debugfs(&debugfs) != 0)
+ err(EXIT_FAILURE, "debugfs is not mounted");
+
+ f = fopen(debugfs, "r");
+ if (!f)
+ err(EXIT_FAILURE, "read from gpio debugfs failed");
+
+ /*
+ * gpio-2 ( |gpio-selftest ) in lo
+ */
+ while (getline(&line, &len, f) != -1) {
+ cur = strstr(line, consumer);
+ if (cur == NULL)
+ continue;
+
+ cur = strchr(line, ')');
+ if (!cur)
+ continue;
+
+ cur += 2;
+ if (!strncmp(cur, "out", 3)) {
+ *dir = OUT;
+ cur += 4;
+ } else if (!strncmp(cur, "in", 2)) {
+ *dir = IN;
+ cur += 4;
+ }
+
+ if (!strncmp(cur, "hi", 2))
+ *value = 1;
+ else if (!strncmp(cur, "lo", 2))
+ *value = 0;
+
+ found = 1;
+ break;
+ }
+ free(debugfs);
+ fclose(f);
+ free(line);
+
+ if (!found)
+ return -1;
+
+ return 0;
+}
+
+static struct gpiochip_info *list_gpiochip(const char *gpiochip_name, int *ret)
+{
+ struct gpiochip_info *cinfo;
+ struct gpiochip_info *current;
+ const struct dirent *ent;
+ DIR *dp;
+ char *chrdev_name;
+ int fd;
+ int i = 0;
+
+ cinfo = calloc(sizeof(struct gpiochip_info) * 4, GC_NUM + 1);
+ if (!cinfo)
+ err(EXIT_FAILURE, "gpiochip_info allocation failed");
+
+ current = cinfo;
+ dp = opendir("/dev");
+ if (!dp) {
+ *ret = -errno;
+ goto error_out;
+ } else {
+ *ret = 0;
+ }
+
+ while (ent = readdir(dp), ent) {
+ if (check_prefix(ent->d_name, "gpiochip")) {
+ *ret = asprintf(&chrdev_name, "/dev/%s", ent->d_name);
+ if (*ret < 0)
+ goto error_out;
+
+ fd = open(chrdev_name, 0);
+ if (fd == -1) {
+ *ret = -errno;
+ fprintf(stderr, "Failed to open %s\n",
+ chrdev_name);
+ goto error_close_dir;
+ }
+ *ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, current);
+ if (*ret == -1) {
+ perror("Failed to issue CHIPINFO IOCTL\n");
+ goto error_close_dir;
+ }
+ close(fd);
+ if (strcmp(current->label, gpiochip_name) == 0
+ || check_prefix(current->label, gpiochip_name)) {
+ *ret = 0;
+ current++;
+ i++;
+ }
+ }
+ }
+
+ if ((!*ret && i == 0) || *ret < 0) {
+ free(cinfo);
+ cinfo = NULL;
+ }
+ if (!*ret && i > 0) {
+ cinfo = realloc(cinfo, sizeof(struct gpiochip_info) * 4 * i);
+ *ret = i;
+ }
+
+error_close_dir:
+ closedir(dp);
+error_out:
+ if (*ret < 0)
+ err(EXIT_FAILURE, "list gpiochip failed: %s", strerror(*ret));
+
+ return cinfo;
+}
+
+int gpio_pin_test(struct gpiochip_info *cinfo, int line, int flag, int value)
+{
+ struct gpiohandle_data data;
+ unsigned int lines[] = {line};
+ int fd;
+ int debugfs_dir = IN;
+ int debugfs_value = 0;
+ int ret;
+
+ data.values[0] = value;
+ ret = gpiotools_request_linehandle(cinfo->name, lines, 1, flag, &data,
+ CONSUMER);
+ if (ret < 0)
+ goto fail_out;
+ else
+ fd = ret;
+
+ ret = gpio_debugfs_get(CONSUMER, &debugfs_dir, &debugfs_value);
+ if (ret) {
+ ret = -EINVAL;
+ goto fail_out;
+ }
+ if (flag & GPIOHANDLE_REQUEST_INPUT) {
+ if (debugfs_dir != IN) {
+ errno = -EINVAL;
+ ret = -errno;
+ }
+ } else if (flag & GPIOHANDLE_REQUEST_OUTPUT) {
+ if (flag & GPIOHANDLE_REQUEST_ACTIVE_LOW)
+ debugfs_value = !debugfs_value;
+
+ if (!(debugfs_dir == OUT && value == debugfs_value)) {
+ errno = -EINVAL;
+ ret = -errno;
+ }
+ }
+ gpiotools_release_linehandle(fd);
+
+fail_out:
+ if (ret)
+ err(EXIT_FAILURE, "gpio<%s> line<%d> test flag<0x%x> value<%d>",
+ cinfo->name, line, flag, value);
+
+ return ret;
+}
+
+void gpio_pin_tests(struct gpiochip_info *cinfo, unsigned int line)
+{
+ printf("line<%d>", line);
+ gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 0);
+ printf(".");
+ gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 1);
+ printf(".");
+ gpio_pin_test(cinfo, line,
+ GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW,
+ 0);
+ printf(".");
+ gpio_pin_test(cinfo, line,
+ GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW,
+ 1);
+ printf(".");
+ gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_INPUT, 0);
+ printf(".");
+}
+
+/*
+ * ./gpio-mockup-chardev gpio_chip_name_prefix is_valid_gpio_chip
+ * Return 0 if successful or exit with EXIT_FAILURE if test failed.
+ * gpio_chip_name_prefix: The prefix of gpiochip you want to test. E.g.
+ * gpio-mockup
+ * is_valid_gpio_chip: Whether the gpio_chip is valid. 1 means valid,
+ * 0 means invalid which could not be found by
+ * list_gpiochip.
+ */
+int main(int argc, char *argv[])
+{
+ char *prefix;
+ int valid;
+ struct gpiochip_info *cinfo;
+ struct gpiochip_info *current;
+ int i;
+ int ret;
+
+ if (argc < 3) {
+ printf("Usage: %s prefix is_valid", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ prefix = argv[1];
+ valid = strcmp(argv[2], "true") == 0 ? 1 : 0;
+
+ printf("Test gpiochip %s: ", prefix);
+ cinfo = list_gpiochip(prefix, &ret);
+ if (!cinfo) {
+ if (!valid && ret == 0) {
+ printf("Invalid test successful\n");
+ ret = 0;
+ goto out;
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+ } else if (cinfo && !valid) {
+ ret = -EINVAL;
+ goto out;
+ }
+ current = cinfo;
+ for (i = 0; i < ret; i++) {
+ gpio_pin_tests(current, 0);
+ gpio_pin_tests(current, current->lines - 1);
+ gpio_pin_tests(current, random() % current->lines);
+ current++;
+ }
+ ret = 0;
+ printf("successful\n");
+
+out:
+ if (ret)
+ fprintf(stderr, "gpio<%s> test failed\n", prefix);
+
+ if (cinfo)
+ free(cinfo);
+
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
new file mode 100755
index 000000000..dd269d877
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
@@ -0,0 +1,135 @@
+
+# SPDX-License-Identifier: GPL-2.0
+is_consistent()
+{
+ val=
+
+ active_low_sysfs=`cat $GPIO_SYSFS/gpio$nr/active_low`
+ val_sysfs=`cat $GPIO_SYSFS/gpio$nr/value`
+ dir_sysfs=`cat $GPIO_SYSFS/gpio$nr/direction`
+
+ gpio_this_debugfs=`cat $GPIO_DEBUGFS |grep "gpio-$nr" | sed "s/(.*)//g"`
+ dir_debugfs=`echo $gpio_this_debugfs | awk '{print $2}'`
+ val_debugfs=`echo $gpio_this_debugfs | awk '{print $3}'`
+ if [ $val_debugfs = "lo" ]; then
+ val=0
+ elif [ $val_debugfs = "hi" ]; then
+ val=1
+ fi
+
+ if [ $active_low_sysfs = "1" ]; then
+ if [ $val = "0" ]; then
+ val="1"
+ else
+ val="0"
+ fi
+ fi
+
+ if [ $val_sysfs = $val ] && [ $dir_sysfs = $dir_debugfs ]; then
+ echo -n "."
+ else
+ echo "test fail, exit"
+ die
+ fi
+}
+
+test_pin_logic()
+{
+ nr=$1
+ direction=$2
+ active_low=$3
+ value=$4
+
+ echo $direction > $GPIO_SYSFS/gpio$nr/direction
+ echo $active_low > $GPIO_SYSFS/gpio$nr/active_low
+ if [ $direction = "out" ]; then
+ echo $value > $GPIO_SYSFS/gpio$nr/value
+ fi
+ is_consistent $nr
+}
+
+test_one_pin()
+{
+ nr=$1
+
+ echo -n "test pin<$nr>"
+
+ echo $nr > $GPIO_SYSFS/export 2>/dev/null
+
+ if [ X$? != X0 ]; then
+ echo "test GPIO pin $nr failed"
+ die
+ fi
+
+ #"Checking if the sysfs is consistent with debugfs: "
+ is_consistent $nr
+
+ #"Checking the logic of active_low: "
+ test_pin_logic $nr out 1 1
+ test_pin_logic $nr out 1 0
+ test_pin_logic $nr out 0 1
+ test_pin_logic $nr out 0 0
+
+ #"Checking the logic of direction: "
+ test_pin_logic $nr in 1 1
+ test_pin_logic $nr out 1 0
+ test_pin_logic $nr low 0 1
+ test_pin_logic $nr high 0 0
+
+ echo $nr > $GPIO_SYSFS/unexport
+
+ echo "successful"
+}
+
+test_one_pin_fail()
+{
+ nr=$1
+
+ echo $nr > $GPIO_SYSFS/export 2>/dev/null
+
+ if [ X$? != X0 ]; then
+ echo "test invalid pin $nr successful"
+ else
+ echo "test invalid pin $nr failed"
+ echo $nr > $GPIO_SYSFS/unexport 2>/dev/null
+ die
+ fi
+}
+
+list_chip()
+{
+ echo `ls -d $GPIO_DRV_SYSFS/gpiochip* 2>/dev/null`
+}
+
+test_chip()
+{
+ chip=$1
+ name=`basename $chip`
+ base=`cat $chip/base`
+ ngpio=`cat $chip/ngpio`
+ printf "%-10s %-5s %-5s\n" $name $base $ngpio
+ if [ $ngpio = "0" ]; then
+ echo "number of gpio is zero is not allowed".
+ fi
+ test_one_pin $base
+ test_one_pin $(($base + $ngpio - 1))
+ test_one_pin $((( RANDOM % $ngpio ) + $base ))
+}
+
+test_chips_sysfs()
+{
+ gpiochip=`list_chip $module`
+ if [ X"$gpiochip" = X ]; then
+ if [ X"$valid" = Xfalse ]; then
+ echo "successful"
+ else
+ echo "fail"
+ die
+ fi
+ else
+ for chip in $gpiochip; do
+ test_chip $chip
+ done
+ fi
+}
+
diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh
new file mode 100755
index 000000000..7f35b9880
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-mockup.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+#exit status
+#1: Internal error
+#2: sysfs/debugfs not mount
+#3: insert module fail when gpio-mockup is a module.
+#4: Skip test including run as non-root user.
+#5: other reason.
+
+SYSFS=
+GPIO_SYSFS=
+GPIO_DRV_SYSFS=
+DEBUGFS=
+GPIO_DEBUGFS=
+dev_type=
+module=
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+usage()
+{
+ echo "Usage:"
+ echo "$0 [-f] [-m name] [-t type]"
+ echo "-f: full test. It maybe conflict with existence gpio device."
+ echo "-m: module name, default name is gpio-mockup. It could also test"
+ echo " other gpio device."
+ echo "-t: interface type: chardev(char device) and sysfs(being"
+ echo " deprecated). The first one is default"
+ echo ""
+ echo "$0 -h"
+ echo "This usage"
+}
+
+prerequisite()
+{
+ msg="skip all tests:"
+ if [ $UID != 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+ SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+ if [ ! -d "$SYSFS" ]; then
+ echo $msg sysfs is not mounted >&2
+ exit 2
+ fi
+ GPIO_SYSFS=`echo $SYSFS/class/gpio`
+ GPIO_DRV_SYSFS=`echo $SYSFS/devices/platform/$module/gpio`
+ DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+ if [ ! -d "$DEBUGFS" ]; then
+ echo $msg debugfs is not mounted >&2
+ exit 2
+ fi
+ GPIO_DEBUGFS=`echo $DEBUGFS/gpio`
+ source gpio-mockup-sysfs.sh
+}
+
+try_insert_module()
+{
+ if [ -d "$GPIO_DRV_SYSFS" ]; then
+ echo "$GPIO_DRV_SYSFS exist. Skip insert module"
+ else
+ modprobe -q $module $1
+ if [ X$? != X0 ]; then
+ echo $msg insmod $module failed >&2
+ exit 3
+ fi
+ fi
+}
+
+remove_module()
+{
+ modprobe -r -q $module
+}
+
+die()
+{
+ remove_module
+ exit 5
+}
+
+test_chips()
+{
+ if [ X$dev_type = Xsysfs ]; then
+ echo "WARNING: sysfs ABI of gpio is going to deprecated."
+ test_chips_sysfs $*
+ else
+ $BASE/gpio-mockup-chardev $*
+ fi
+}
+
+gpio_test()
+{
+ param=$1
+ valid=$2
+
+ if [ X"$param" = X ]; then
+ die
+ fi
+ try_insert_module "gpio_mockup_ranges=$param"
+ echo -n "GPIO $module test with ranges: <"
+ echo "$param>: "
+ printf "%-10s %s\n" $param
+ test_chips $module $valid
+ remove_module
+}
+
+BASE=`dirname $0`
+
+dev_type=
+TEMP=`getopt -o fhm:t: -n '$0' -- "$@"`
+
+if [ "$?" != "0" ]; then
+ echo "Parameter process failed, Terminating..." >&2
+ exit 1
+fi
+
+# Note the quotes around `$TEMP': they are essential!
+eval set -- "$TEMP"
+
+while true; do
+ case $1 in
+ -f)
+ full_test=true
+ shift
+ ;;
+ -h)
+ usage
+ exit
+ ;;
+ -m)
+ module=$2
+ shift 2
+ ;;
+ -t)
+ dev_type=$2
+ shift 2
+ ;;
+ --)
+ shift
+ break
+ ;;
+ *)
+ echo "Internal error!"
+ exit 1
+ ;;
+ esac
+done
+
+if [ X"$module" = X ]; then
+ module="gpio-mockup"
+fi
+
+if [ X$dev_type != Xsysfs ]; then
+ dev_type="chardev"
+fi
+
+prerequisite
+
+echo "1. Test dynamic allocation of gpio successful means insert gpiochip and"
+echo " manipulate gpio pin successful"
+gpio_test "-1,32" true
+gpio_test "-1,32,-1,32" true
+gpio_test "-1,32,-1,32,-1,32" true
+if [ X$full_test = Xtrue ]; then
+ gpio_test "-1,32,32,64" true
+ gpio_test "-1,32,40,64,-1,5" true
+ gpio_test "-1,32,32,64,-1,32" true
+ gpio_test "0,32,32,64,-1,32,-1,32" true
+ gpio_test "-1,32,-1,32,0,32,32,64" true
+ echo "2. Do basic test: successful means insert gpiochip and"
+ echo " manipulate gpio pin successful"
+ gpio_test "0,32" true
+ gpio_test "0,32,32,64" true
+ gpio_test "0,32,40,64,64,96" true
+fi
+echo "3. Error test: successful means insert gpiochip failed"
+echo "3.1 Test number of gpio overflow"
+#Currently: The max number of gpio(1024) is defined in arm architecture.
+gpio_test "-1,32,-1,1024" false
+if [ X$full_test = Xtrue ]; then
+ echo "3.2 Test zero line of gpio"
+ gpio_test "0,0" false
+ echo "3.3 Test range overlap"
+ echo "3.3.1 Test corner case"
+ gpio_test "0,32,0,1" false
+ gpio_test "0,32,32,64,32,40" false
+ gpio_test "0,32,35,64,35,45" false
+ gpio_test "0,32,31,32" false
+ gpio_test "0,32,32,64,36,37" false
+ gpio_test "0,32,35,64,34,36" false
+ echo "3.3.2 Test inserting invalid second gpiochip"
+ gpio_test "0,32,30,35" false
+ gpio_test "0,32,1,5" false
+ gpio_test "10,32,9,14" false
+ gpio_test "10,32,30,35" false
+ echo "3.3.3 Test others"
+ gpio_test "0,32,40,56,39,45" false
+ gpio_test "0,32,40,56,30,33" false
+ gpio_test "0,32,40,56,30,41" false
+ gpio_test "0,32,40,56,20,21" false
+fi
+
+echo GPIO test PASS
+
diff --git a/tools/testing/selftests/ia64/.gitignore b/tools/testing/selftests/ia64/.gitignore
new file mode 100644
index 000000000..ab806edc8
--- /dev/null
+++ b/tools/testing/selftests/ia64/.gitignore
@@ -0,0 +1 @@
+aliasing-test
diff --git a/tools/testing/selftests/ia64/Makefile b/tools/testing/selftests/ia64/Makefile
new file mode 100644
index 000000000..4bce1a84b
--- /dev/null
+++ b/tools/testing/selftests/ia64/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_PROGS := aliasing-test
+
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+ rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/ia64/aliasing-test.c b/tools/testing/selftests/ia64/aliasing-test.c
new file mode 100644
index 000000000..62a190d45
--- /dev/null
+++ b/tools/testing/selftests/ia64/aliasing-test.c
@@ -0,0 +1,263 @@
+/*
+ * Exercise /dev/mem mmap cases that have been troublesome in the past
+ *
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ * Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/pci.h>
+
+int sum;
+
+static int map_mem(char *path, off_t offset, size_t length, int touch)
+{
+ int fd, rc;
+ void *addr;
+ int *c;
+
+ fd = open(path, O_RDWR);
+ if (fd == -1) {
+ perror(path);
+ return -1;
+ }
+
+ if (fnmatch("/proc/bus/pci/*", path, 0) == 0) {
+ rc = ioctl(fd, PCIIOC_MMAP_IS_MEM);
+ if (rc == -1)
+ perror("PCIIOC_MMAP_IS_MEM ioctl");
+ }
+
+ addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset);
+ if (addr == MAP_FAILED)
+ return 1;
+
+ if (touch) {
+ c = (int *) addr;
+ while (c < (int *) (addr + length))
+ sum += *c++;
+ }
+
+ rc = munmap(addr, length);
+ if (rc == -1) {
+ perror("munmap");
+ return -1;
+ }
+
+ close(fd);
+ return 0;
+}
+
+static int scan_tree(char *path, char *file, off_t offset, size_t length, int touch)
+{
+ struct dirent **namelist;
+ char *name, *path2;
+ int i, n, r, rc = 0, result = 0;
+ struct stat buf;
+
+ n = scandir(path, &namelist, 0, alphasort);
+ if (n < 0) {
+ perror("scandir");
+ return -1;
+ }
+
+ for (i = 0; i < n; i++) {
+ name = namelist[i]->d_name;
+
+ if (fnmatch(".", name, 0) == 0)
+ goto skip;
+ if (fnmatch("..", name, 0) == 0)
+ goto skip;
+
+ path2 = malloc(strlen(path) + strlen(name) + 3);
+ strcpy(path2, path);
+ strcat(path2, "/");
+ strcat(path2, name);
+
+ if (fnmatch(file, name, 0) == 0) {
+ rc = map_mem(path2, offset, length, touch);
+ if (rc == 0)
+ fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable");
+ else if (rc > 0)
+ fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length);
+ else {
+ fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length);
+ return rc;
+ }
+ } else {
+ r = lstat(path2, &buf);
+ if (r == 0 && S_ISDIR(buf.st_mode)) {
+ rc = scan_tree(path2, file, offset, length, touch);
+ if (rc < 0)
+ return rc;
+ }
+ }
+
+ result |= rc;
+ free(path2);
+
+skip:
+ free(namelist[i]);
+ }
+ free(namelist);
+ return result;
+}
+
+char buf[1024];
+
+static int read_rom(char *path)
+{
+ int fd, rc;
+ size_t size = 0;
+
+ fd = open(path, O_RDWR);
+ if (fd == -1) {
+ perror(path);
+ return -1;
+ }
+
+ rc = write(fd, "1", 2);
+ if (rc <= 0) {
+ close(fd);
+ perror("write");
+ return -1;
+ }
+
+ do {
+ rc = read(fd, buf, sizeof(buf));
+ if (rc > 0)
+ size += rc;
+ } while (rc > 0);
+
+ close(fd);
+ return size;
+}
+
+static int scan_rom(char *path, char *file)
+{
+ struct dirent **namelist;
+ char *name, *path2;
+ int i, n, r, rc = 0, result = 0;
+ struct stat buf;
+
+ n = scandir(path, &namelist, 0, alphasort);
+ if (n < 0) {
+ perror("scandir");
+ return -1;
+ }
+
+ for (i = 0; i < n; i++) {
+ name = namelist[i]->d_name;
+
+ if (fnmatch(".", name, 0) == 0)
+ goto skip;
+ if (fnmatch("..", name, 0) == 0)
+ goto skip;
+
+ path2 = malloc(strlen(path) + strlen(name) + 3);
+ strcpy(path2, path);
+ strcat(path2, "/");
+ strcat(path2, name);
+
+ if (fnmatch(file, name, 0) == 0) {
+ rc = read_rom(path2);
+
+ /*
+ * It's OK if the ROM is unreadable. Maybe there
+ * is no ROM, or some other error occurred. The
+ * important thing is that no MCA happened.
+ */
+ if (rc > 0)
+ fprintf(stderr, "PASS: %s read %d bytes\n", path2, rc);
+ else {
+ fprintf(stderr, "PASS: %s not readable\n", path2);
+ return rc;
+ }
+ } else {
+ r = lstat(path2, &buf);
+ if (r == 0 && S_ISDIR(buf.st_mode)) {
+ rc = scan_rom(path2, file);
+ if (rc < 0)
+ return rc;
+ }
+ }
+
+ result |= rc;
+ free(path2);
+
+skip:
+ free(namelist[i]);
+ }
+ free(namelist);
+ return result;
+}
+
+int main(void)
+{
+ int rc;
+
+ if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0)
+ fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n");
+
+ /*
+ * It's not safe to blindly read the VGA frame buffer. If you know
+ * how to poke the card the right way, it should respond, but it's
+ * not safe in general. Many machines, e.g., Intel chipsets, cover
+ * up a non-responding card by just returning -1, but others will
+ * report the failure as a machine check.
+ */
+ if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0)
+ fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n");
+
+ if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0)
+ fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n");
+
+ /*
+ * Often you can map all the individual pieces above (0-0xA0000,
+ * 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole
+ * thing at once. This is because the individual pieces use different
+ * attributes, and there's no single attribute supported over the
+ * whole region.
+ */
+ rc = map_mem("/dev/mem", 0, 1024*1024, 0);
+ if (rc == 0)
+ fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n");
+ else if (rc > 0)
+ fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n");
+
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1);
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0);
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1);
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0);
+
+ scan_rom("/sys/devices", "rom");
+
+ scan_tree("/proc/bus/pci", "??.?", 0, 0xA0000, 1);
+ scan_tree("/proc/bus/pci", "??.?", 0xA0000, 0x20000, 0);
+ scan_tree("/proc/bus/pci", "??.?", 0xC0000, 0x40000, 1);
+ scan_tree("/proc/bus/pci", "??.?", 0, 1024*1024, 0);
+
+ return rc;
+}
diff --git a/tools/testing/selftests/intel_pstate/.gitignore b/tools/testing/selftests/intel_pstate/.gitignore
new file mode 100644
index 000000000..3bfcbae5f
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/.gitignore
@@ -0,0 +1,2 @@
+aperf
+msr
diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
new file mode 100644
index 000000000..7340fd6a9
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
+LDLIBS := $(LDLIBS) -lm
+
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq (x86,$(ARCH))
+TEST_GEN_FILES := msr aperf
+endif
+
+TEST_PROGS := run.sh
+
+include ../lib.mk
+
+$(TEST_GEN_FILES): $(HEADERS)
diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
new file mode 100644
index 000000000..f6cd03a87
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <math.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/timeb.h>
+#include <sched.h>
+#include <errno.h>
+#include <string.h>
+#include "../kselftest.h"
+
+void usage(char *name) {
+ printf ("Usage: %s cpunum\n", name);
+}
+
+int main(int argc, char **argv) {
+ unsigned int i, cpu, fd;
+ char msr_file_name[64];
+ long long tsc, old_tsc, new_tsc;
+ long long aperf, old_aperf, new_aperf;
+ long long mperf, old_mperf, new_mperf;
+ struct timeb before, after;
+ long long int start, finish, total;
+ cpu_set_t cpuset;
+
+ if (argc != 2) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ errno = 0;
+ cpu = strtol(argv[1], (char **) NULL, 10);
+
+ if (errno) {
+ usage(argv[0]);
+ return 1;
+ }
+
+ sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu);
+ fd = open(msr_file_name, O_RDONLY);
+
+ if (fd == -1) {
+ printf("/dev/cpu/%d/msr: %s\n", cpu, strerror(errno));
+ return KSFT_SKIP;
+ }
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ if (sched_setaffinity(0, sizeof(cpu_set_t), &cpuset)) {
+ perror("Failed to set cpu affinity");
+ return 1;
+ }
+
+ ftime(&before);
+ pread(fd, &old_tsc, sizeof(old_tsc), 0x10);
+ pread(fd, &old_aperf, sizeof(old_mperf), 0xe7);
+ pread(fd, &old_mperf, sizeof(old_aperf), 0xe8);
+
+ for (i=0; i<0x8fffffff; i++) {
+ sqrt(i);
+ }
+
+ ftime(&after);
+ pread(fd, &new_tsc, sizeof(new_tsc), 0x10);
+ pread(fd, &new_aperf, sizeof(new_mperf), 0xe7);
+ pread(fd, &new_mperf, sizeof(new_aperf), 0xe8);
+
+ tsc = new_tsc-old_tsc;
+ aperf = new_aperf-old_aperf;
+ mperf = new_mperf-old_mperf;
+
+ start = before.time*1000 + before.millitm;
+ finish = after.time*1000 + after.millitm;
+ total = finish - start;
+
+ printf("runTime: %4.2f\n", 1.0*total/1000);
+ printf("freq: %7.0f\n", tsc / (1.0*aperf / (1.0 * mperf)) / total);
+ return 0;
+}
diff --git a/tools/testing/selftests/intel_pstate/msr.c b/tools/testing/selftests/intel_pstate/msr.c
new file mode 100644
index 000000000..88fdd2a4b
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/msr.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <math.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/timeb.h>
+#include <sched.h>
+#include <errno.h>
+
+
+int main(int argc, char **argv) {
+ int cpu, fd;
+ long long msr;
+ char msr_file_name[64];
+
+ if (argc != 2)
+ return 1;
+
+ errno = 0;
+ cpu = strtol(argv[1], (char **) NULL, 10);
+
+ if (errno)
+ return 1;
+
+ sprintf(msr_file_name, "/dev/cpu/%d/msr", cpu);
+ fd = open(msr_file_name, O_RDONLY);
+
+ if (fd == -1) {
+ perror("Failed to open");
+ return 1;
+ }
+
+ pread(fd, &msr, sizeof(msr), 0x199);
+
+ printf("msr 0x199: 0x%llx\n", msr);
+ return 0;
+}
diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh
new file mode 100755
index 000000000..e7008f614
--- /dev/null
+++ b/tools/testing/selftests/intel_pstate/run.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test runs on Intel x86 based hardware which support the intel_pstate
+# driver. The test checks the frequency settings from the maximum turbo
+# state to the minimum supported frequency, in decrements of 100MHz. The
+# test runs the aperf.c program to put load on each processor.
+#
+# The results are displayed in a table which indicate the "Target" state,
+# or the requested frequency in MHz, the Actual frequency, as read from
+# /proc/cpuinfo, the difference between the Target and Actual frequencies,
+# and the value of MSR 0x199 (MSR_IA32_PERF_CTL) which indicates what
+# pstate the cpu is in, and the value of
+# /sys/devices/system/cpu/intel_pstate/max_perf_pct X maximum turbo state
+#
+# Notes: In some cases several frequency values may be placed in the
+# /tmp/result.X files. This is done on purpose in order to catch cases
+# where the pstate driver may not be working at all. There is the case
+# where, for example, several "similar" frequencies are in the file:
+#
+#
+#/tmp/result.3100:1:cpu MHz : 2899.980
+#/tmp/result.3100:2:cpu MHz : 2900.000
+#/tmp/result.3100:3:msr 0x199: 0x1e00
+#/tmp/result.3100:4:max_perf_pct 94
+#
+# and the test will error out in those cases. The result.X file can be checked
+# for consistency and modified to remove the extra MHz values. The result.X
+# files can be re-evaluated by setting EVALUATE_ONLY to 1 below.
+
+EVALUATE_ONLY=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/ | grep -q x86; then
+ echo "$0 # Skipped: Test can only run on x86 architectures."
+ exit $ksft_skip
+fi
+
+msg="skip all tests:"
+if [ $UID != 0 ] && [ $EVALUATE_ONLY == 0 ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
+fi
+
+max_cpus=$(($(nproc)-1))
+
+function run_test () {
+
+ file_ext=$1
+ for cpu in `seq 0 $max_cpus`
+ do
+ echo "launching aperf load on $cpu"
+ ./aperf $cpu &
+ done
+
+ echo "sleeping for 5 seconds"
+ sleep 5
+ grep MHz /proc/cpuinfo | sort -u > /tmp/result.freqs
+ num_freqs=$(wc -l /tmp/result.freqs | awk ' { print $1 } ')
+ if [ $num_freqs -ge 2 ]; then
+ tail -n 1 /tmp/result.freqs > /tmp/result.$1
+ else
+ cp /tmp/result.freqs /tmp/result.$1
+ fi
+ ./msr 0 >> /tmp/result.$1
+
+ max_perf_pct=$(cat /sys/devices/system/cpu/intel_pstate/max_perf_pct)
+ echo "max_perf_pct $max_perf_pct" >> /tmp/result.$1
+
+ for job in `jobs -p`
+ do
+ echo "waiting for job id $job"
+ wait $job
+ done
+}
+
+#
+# MAIN (ALL UNITS IN MHZ)
+#
+
+# Get the marketing frequency
+_mkt_freq=$(cat /proc/cpuinfo | grep -m 1 "model name" | awk '{print $NF}')
+_mkt_freq=$(echo $_mkt_freq | tr -d [:alpha:][:punct:])
+mkt_freq=${_mkt_freq}0
+
+# Get the ranges from cpupower
+_min_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $1 } ')
+min_freq=$(($_min_freq / 1000))
+_max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ')
+max_freq=$(($_max_freq / 1000))
+
+
+[ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq`
+do
+ echo "Setting maximum frequency to $freq"
+ cpupower frequency-set -g powersave --max=${freq}MHz >& /dev/null
+ run_test $freq
+done
+
+[ $EVALUATE_ONLY -eq 0 ] && cpupower frequency-set -g powersave --max=${max_freq}MHz >& /dev/null
+
+echo "========================================================================"
+echo "The marketing frequency of the cpu is $mkt_freq MHz"
+echo "The maximum frequency of the cpu is $max_freq MHz"
+echo "The minimum frequency of the cpu is $min_freq MHz"
+
+# make a pretty table
+echo "Target Actual Difference MSR(0x199) max_perf_pct" | tr " " "\n" > /tmp/result.tab
+for freq in `seq $max_freq -100 $min_freq`
+do
+ result_freq=$(cat /tmp/result.${freq} | grep "cpu MHz" | awk ' { print $4 } ' | awk -F "." ' { print $1 } ')
+ msr=$(cat /tmp/result.${freq} | grep "msr" | awk ' { print $3 } ')
+ max_perf_pct=$(cat /tmp/result.${freq} | grep "max_perf_pct" | awk ' { print $2 } ' )
+ cat >> /tmp/result.tab << EOF
+$freq
+$result_freq
+$((result_freq - freq))
+$msr
+$((max_perf_pct * max_freq))
+EOF
+done
+
+# print the table
+pr -aTt -5 < /tmp/result.tab
+
+exit 0
diff --git a/tools/testing/selftests/ipc/.gitignore b/tools/testing/selftests/ipc/.gitignore
new file mode 100644
index 000000000..9af04c935
--- /dev/null
+++ b/tools/testing/selftests/ipc/.gitignore
@@ -0,0 +1,2 @@
+msgque_test
+msgque
diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile
new file mode 100644
index 000000000..1c4448a84
--- /dev/null
+++ b/tools/testing/selftests/ipc/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+ ARCH := x86
+ CFLAGS := -DCONFIG_X86_32 -D__i386__
+endif
+ifeq ($(ARCH),x86_64)
+ ARCH := x86
+ CFLAGS := -DCONFIG_X86_64 -D__x86_64__
+endif
+
+CFLAGS += -I../../../../usr/include/
+
+TEST_GEN_PROGS := msgque
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/ipc/config b/tools/testing/selftests/ipc/config
new file mode 100644
index 000000000..070244710
--- /dev/null
+++ b/tools/testing/selftests/ipc/config
@@ -0,0 +1,2 @@
+CONFIG_EXPERT=y
+CONFIG_CHECKPOINT_RESTORE=y
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
new file mode 100644
index 000000000..5ec4d9e18
--- /dev/null
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/msg.h>
+#include <fcntl.h>
+
+#include "../kselftest.h"
+
+#define MAX_MSG_SIZE 32
+
+struct msg1 {
+ int msize;
+ long mtype;
+ char mtext[MAX_MSG_SIZE];
+};
+
+#define TEST_STRING "Test sysv5 msg"
+#define MSG_TYPE 1
+
+#define ANOTHER_TEST_STRING "Yet another test sysv5 msg"
+#define ANOTHER_MSG_TYPE 26538
+
+struct msgque_data {
+ key_t key;
+ int msq_id;
+ int qbytes;
+ int qnum;
+ int mode;
+ struct msg1 *messages;
+};
+
+int restore_queue(struct msgque_data *msgque)
+{
+ int fd, ret, id, i;
+ char buf[32];
+
+ fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY);
+ if (fd == -1) {
+ printf("Failed to open /proc/sys/kernel/msg_next_id\n");
+ return -errno;
+ }
+ sprintf(buf, "%d", msgque->msq_id);
+
+ ret = write(fd, buf, strlen(buf));
+ if (ret != strlen(buf)) {
+ printf("Failed to write to /proc/sys/kernel/msg_next_id\n");
+ return -errno;
+ }
+
+ id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL);
+ if (id == -1) {
+ printf("Failed to create queue\n");
+ return -errno;
+ }
+
+ if (id != msgque->msq_id) {
+ printf("Restored queue has wrong id (%d instead of %d)\n",
+ id, msgque->msq_id);
+ ret = -EFAULT;
+ goto destroy;
+ }
+
+ for (i = 0; i < msgque->qnum; i++) {
+ if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype,
+ msgque->messages[i].msize, IPC_NOWAIT) != 0) {
+ printf("msgsnd failed (%m)\n");
+ ret = -errno;
+ goto destroy;
+ };
+ }
+ return 0;
+
+destroy:
+ if (msgctl(id, IPC_RMID, NULL))
+ printf("Failed to destroy queue: %d\n", -errno);
+ return ret;
+}
+
+int check_and_destroy_queue(struct msgque_data *msgque)
+{
+ struct msg1 message;
+ int cnt = 0, ret;
+
+ while (1) {
+ ret = msgrcv(msgque->msq_id, &message.mtype, MAX_MSG_SIZE,
+ 0, IPC_NOWAIT);
+ if (ret < 0) {
+ if (errno == ENOMSG)
+ break;
+ printf("Failed to read IPC message: %m\n");
+ ret = -errno;
+ goto err;
+ }
+ if (ret != msgque->messages[cnt].msize) {
+ printf("Wrong message size: %d (expected %d)\n", ret,
+ msgque->messages[cnt].msize);
+ ret = -EINVAL;
+ goto err;
+ }
+ if (message.mtype != msgque->messages[cnt].mtype) {
+ printf("Wrong message type\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) {
+ printf("Wrong message content\n");
+ ret = -EINVAL;
+ goto err;
+ }
+ cnt++;
+ }
+
+ if (cnt != msgque->qnum) {
+ printf("Wrong message number\n");
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = 0;
+err:
+ if (msgctl(msgque->msq_id, IPC_RMID, NULL)) {
+ printf("Failed to destroy queue: %d\n", -errno);
+ return -errno;
+ }
+ return ret;
+}
+
+int dump_queue(struct msgque_data *msgque)
+{
+ struct msqid_ds ds;
+ int kern_id;
+ int i, ret;
+
+ for (kern_id = 0; kern_id < 256; kern_id++) {
+ ret = msgctl(kern_id, MSG_STAT, &ds);
+ if (ret < 0) {
+ if (errno == EINVAL)
+ continue;
+ printf("Failed to get stats for IPC queue with id %d\n",
+ kern_id);
+ return -errno;
+ }
+
+ if (ret == msgque->msq_id)
+ break;
+ }
+
+ msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum);
+ if (msgque->messages == NULL) {
+ printf("Failed to get stats for IPC queue\n");
+ return -ENOMEM;
+ }
+
+ msgque->qnum = ds.msg_qnum;
+ msgque->mode = ds.msg_perm.mode;
+ msgque->qbytes = ds.msg_qbytes;
+
+ for (i = 0; i < msgque->qnum; i++) {
+ ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype,
+ MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY);
+ if (ret < 0) {
+ printf("Failed to copy IPC message: %m (%d)\n", errno);
+ return -errno;
+ }
+ msgque->messages[i].msize = ret;
+ }
+ return 0;
+}
+
+int fill_msgque(struct msgque_data *msgque)
+{
+ struct msg1 msgbuf;
+
+ msgbuf.mtype = MSG_TYPE;
+ memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING));
+ if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING),
+ IPC_NOWAIT) != 0) {
+ printf("First message send failed (%m)\n");
+ return -errno;
+ };
+
+ msgbuf.mtype = ANOTHER_MSG_TYPE;
+ memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
+ if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING),
+ IPC_NOWAIT) != 0) {
+ printf("Second message send failed (%m)\n");
+ return -errno;
+ };
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int msg, pid, err;
+ struct msgque_data msgque;
+
+ if (getuid() != 0)
+ return ksft_exit_skip(
+ "Please run the test as root - Exiting.\n");
+
+ msgque.key = ftok(argv[0], 822155650);
+ if (msgque.key == -1) {
+ printf("Can't make key: %d\n", -errno);
+ return ksft_exit_fail();
+ }
+
+ msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666);
+ if (msgque.msq_id == -1) {
+ err = -errno;
+ printf("Can't create queue: %d\n", err);
+ goto err_out;
+ }
+
+ err = fill_msgque(&msgque);
+ if (err) {
+ printf("Failed to fill queue: %d\n", err);
+ goto err_destroy;
+ }
+
+ err = dump_queue(&msgque);
+ if (err) {
+ printf("Failed to dump queue: %d\n", err);
+ goto err_destroy;
+ }
+
+ err = check_and_destroy_queue(&msgque);
+ if (err) {
+ printf("Failed to check and destroy queue: %d\n", err);
+ goto err_out;
+ }
+
+ err = restore_queue(&msgque);
+ if (err) {
+ printf("Failed to restore queue: %d\n", err);
+ goto err_destroy;
+ }
+
+ err = check_and_destroy_queue(&msgque);
+ if (err) {
+ printf("Failed to test queue: %d\n", err);
+ goto err_out;
+ }
+ return ksft_exit_pass();
+
+err_destroy:
+ if (msgctl(msgque.msq_id, IPC_RMID, NULL)) {
+ printf("Failed to destroy queue: %d\n", -errno);
+ return ksft_exit_fail();
+ }
+err_out:
+ return ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/kcmp/.gitignore b/tools/testing/selftests/kcmp/.gitignore
new file mode 100644
index 000000000..5a9b3732b
--- /dev/null
+++ b/tools/testing/selftests/kcmp/.gitignore
@@ -0,0 +1,2 @@
+kcmp_test
+kcmp-test-file
diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile
new file mode 100644
index 000000000..47aa9887f
--- /dev/null
+++ b/tools/testing/selftests/kcmp/Makefile
@@ -0,0 +1,8 @@
+CFLAGS += -I../../../../usr/include/
+
+TEST_GEN_PROGS := kcmp_test
+
+EXTRA_CLEAN := $(OUTPUT)/kcmp-test-file
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c
new file mode 100644
index 000000000..6ea7b9f37
--- /dev/null
+++ b/tools/testing/selftests/kcmp/kcmp_test.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <limits.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <linux/unistd.h>
+#include <linux/kcmp.h>
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/epoll.h>
+
+#include "../kselftest.h"
+
+static long sys_kcmp(int pid1, int pid2, int type, unsigned long fd1, unsigned long fd2)
+{
+ return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2);
+}
+
+static const unsigned int duped_num = 64;
+
+int main(int argc, char **argv)
+{
+ const char kpath[] = "kcmp-test-file";
+ struct kcmp_epoll_slot epoll_slot;
+ struct epoll_event ev;
+ int pid1, pid2;
+ int pipefd[2];
+ int fd1, fd2;
+ int epollfd;
+ int status;
+ int fddup;
+
+ fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644);
+ pid1 = getpid();
+
+ if (fd1 < 0) {
+ perror("Can't create file");
+ ksft_exit_fail();
+ }
+
+ if (pipe(pipefd)) {
+ perror("Can't create pipe");
+ ksft_exit_fail();
+ }
+
+ epollfd = epoll_create1(0);
+ if (epollfd < 0) {
+ perror("epoll_create1 failed");
+ ksft_exit_fail();
+ }
+
+ memset(&ev, 0xff, sizeof(ev));
+ ev.events = EPOLLIN | EPOLLOUT;
+
+ if (epoll_ctl(epollfd, EPOLL_CTL_ADD, pipefd[0], &ev)) {
+ perror("epoll_ctl failed");
+ ksft_exit_fail();
+ }
+
+ fddup = dup2(pipefd[1], duped_num);
+ if (fddup < 0) {
+ perror("dup2 failed");
+ ksft_exit_fail();
+ }
+
+ if (epoll_ctl(epollfd, EPOLL_CTL_ADD, fddup, &ev)) {
+ perror("epoll_ctl failed");
+ ksft_exit_fail();
+ }
+ close(fddup);
+
+ pid2 = fork();
+ if (pid2 < 0) {
+ perror("fork failed");
+ ksft_exit_fail();
+ }
+
+ if (!pid2) {
+ int pid2 = getpid();
+ int ret;
+
+ fd2 = open(kpath, O_RDWR, 0644);
+ if (fd2 < 0) {
+ perror("Can't open file");
+ ksft_exit_fail();
+ }
+
+ /* An example of output and arguments */
+ printf("pid1: %6d pid2: %6d FD: %2ld FILES: %2ld VM: %2ld "
+ "FS: %2ld SIGHAND: %2ld IO: %2ld SYSVSEM: %2ld "
+ "INV: %2ld\n",
+ pid1, pid2,
+ sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd2),
+ sys_kcmp(pid1, pid2, KCMP_FILES, 0, 0),
+ sys_kcmp(pid1, pid2, KCMP_VM, 0, 0),
+ sys_kcmp(pid1, pid2, KCMP_FS, 0, 0),
+ sys_kcmp(pid1, pid2, KCMP_SIGHAND, 0, 0),
+ sys_kcmp(pid1, pid2, KCMP_IO, 0, 0),
+ sys_kcmp(pid1, pid2, KCMP_SYSVSEM, 0, 0),
+
+ /* This one should fail */
+ sys_kcmp(pid1, pid2, KCMP_TYPES + 1, 0, 0));
+
+ /* This one should return same fd */
+ ret = sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd1);
+ if (ret) {
+ printf("FAIL: 0 expected but %d returned (%s)\n",
+ ret, strerror(errno));
+ ksft_inc_fail_cnt();
+ ret = -1;
+ } else {
+ printf("PASS: 0 returned as expected\n");
+ ksft_inc_pass_cnt();
+ }
+
+ /* Compare with self */
+ ret = sys_kcmp(pid1, pid1, KCMP_VM, 0, 0);
+ if (ret) {
+ printf("FAIL: 0 expected but %d returned (%s)\n",
+ ret, strerror(errno));
+ ksft_inc_fail_cnt();
+ ret = -1;
+ } else {
+ printf("PASS: 0 returned as expected\n");
+ ksft_inc_pass_cnt();
+ }
+
+ /* Compare epoll target */
+ epoll_slot = (struct kcmp_epoll_slot) {
+ .efd = epollfd,
+ .tfd = duped_num,
+ .toff = 0,
+ };
+ ret = sys_kcmp(pid1, pid1, KCMP_EPOLL_TFD, pipefd[1],
+ (unsigned long)(void *)&epoll_slot);
+ if (ret) {
+ printf("FAIL: 0 expected but %d returned (%s)\n",
+ ret, strerror(errno));
+ ksft_inc_fail_cnt();
+ ret = -1;
+ } else {
+ printf("PASS: 0 returned as expected\n");
+ ksft_inc_pass_cnt();
+ }
+
+ ksft_print_cnts();
+
+ if (ret)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+ }
+
+ waitpid(pid2, &status, P_ALL);
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/kmod/Makefile b/tools/testing/selftests/kmod/Makefile
new file mode 100644
index 000000000..fa2ccc5fb
--- /dev/null
+++ b/tools/testing/selftests/kmod/Makefile
@@ -0,0 +1,11 @@
+# Makefile for kmod loading selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := kmod.sh
+
+include ../lib.mk
+
+# Nothing to clean up.
+clean:
diff --git a/tools/testing/selftests/kmod/config b/tools/testing/selftests/kmod/config
new file mode 100644
index 000000000..259f4fd6b
--- /dev/null
+++ b/tools/testing/selftests/kmod/config
@@ -0,0 +1,7 @@
+CONFIG_TEST_KMOD=m
+CONFIG_TEST_LKM=m
+CONFIG_XFS_FS=m
+
+# For the module parameter force_init_test is used
+CONFIG_TUN=m
+CONFIG_BTRFS_FS=m
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
new file mode 100755
index 000000000..1f118916a
--- /dev/null
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -0,0 +1,623 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or at your option any
+# later version; or, when distributed separately from the Linux kernel or
+# when incorporated into other software packages, subject to the following
+# license:
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of copyleft-next (version 0.3.1 or later) as published
+# at http://copyleft-next.org/.
+
+# This is a stress test script for kmod, the kernel module loader. It uses
+# test_kmod which exposes a series of knobs for the API for us so we can
+# tweak each test in userspace rather than in kernelspace.
+#
+# The way kmod works is it uses the kernel's usermode helper API to eventually
+# call /sbin/modprobe. It has a limit of the number of concurrent calls
+# possible. The kernel interface to load modules is request_module(), however
+# mount uses get_fs_type(). Both behave slightly differently, but the
+# differences are important enough to test each call separately. For this
+# reason test_kmod starts by providing tests for both calls.
+#
+# The test driver test_kmod assumes a series of defaults which you can
+# override by exporting to your environment prior running this script.
+# For instance this script assumes you do not have xfs loaded upon boot.
+# If this is false, export DEFAULT_KMOD_FS="ext4" prior to running this
+# script if the filesyste module you don't have loaded upon bootup
+# is ext4 instead. Refer to allow_user_defaults() for a list of user
+# override variables possible.
+#
+# You'll want at least 4 GiB of RAM to expect to run these tests
+# without running out of memory on them. For other requirements refer
+# to test_reqs()
+
+set -e
+
+TEST_NAME="kmod"
+TEST_DRIVER="test_${TEST_NAME}"
+TEST_DIR=$(dirname $0)
+
+# This represents
+#
+# TEST_ID:TEST_COUNT:ENABLED
+#
+# TEST_ID: is the test id number
+# TEST_COUNT: number of times we should run the test
+# ENABLED: 1 if enabled, 0 otherwise
+#
+# Once these are enabled please leave them as-is. Write your own test,
+# we have tons of space.
+ALL_TESTS="0001:3:1"
+ALL_TESTS="$ALL_TESTS 0002:3:1"
+ALL_TESTS="$ALL_TESTS 0003:1:1"
+ALL_TESTS="$ALL_TESTS 0004:1:1"
+ALL_TESTS="$ALL_TESTS 0005:10:1"
+ALL_TESTS="$ALL_TESTS 0006:10:1"
+ALL_TESTS="$ALL_TESTS 0007:5:1"
+ALL_TESTS="$ALL_TESTS 0008:150:1"
+ALL_TESTS="$ALL_TESTS 0009:150:1"
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+test_modprobe()
+{
+ if [ ! -d $DIR ]; then
+ echo "$0: $DIR not present" >&2
+ echo "You must have the following enabled in your kernel:" >&2
+ cat $TEST_DIR/config >&2
+ exit $ksft_skip
+ fi
+}
+
+function allow_user_defaults()
+{
+ if [ -z $DEFAULT_KMOD_DRIVER ]; then
+ DEFAULT_KMOD_DRIVER="test_module"
+ fi
+
+ if [ -z $DEFAULT_KMOD_FS ]; then
+ DEFAULT_KMOD_FS="xfs"
+ fi
+
+ if [ -z $PROC_DIR ]; then
+ PROC_DIR="/proc/sys/kernel/"
+ fi
+
+ if [ -z $MODPROBE_LIMIT ]; then
+ MODPROBE_LIMIT=50
+ fi
+
+ if [ -z $DIR ]; then
+ DIR="/sys/devices/virtual/misc/${TEST_DRIVER}0/"
+ fi
+
+ if [ -z $DEFAULT_NUM_TESTS ]; then
+ DEFAULT_NUM_TESTS=150
+ fi
+
+ MODPROBE_LIMIT_FILE="${PROC_DIR}/kmod-limit"
+}
+
+test_reqs()
+{
+ if ! which modprobe 2> /dev/null > /dev/null; then
+ echo "$0: You need modprobe installed" >&2
+ exit $ksft_skip
+ fi
+
+ if ! which kmod 2> /dev/null > /dev/null; then
+ echo "$0: You need kmod installed" >&2
+ exit $ksft_skip
+ fi
+
+ # kmod 19 has a bad bug where it returns 0 when modprobe
+ # gets called *even* if the module was not loaded due to
+ # some bad heuristics. For details see:
+ #
+ # A work around is possible in-kernel but its rather
+ # complex.
+ KMOD_VERSION=$(kmod --version | awk '{print $3}')
+ if [[ $KMOD_VERSION -le 19 ]]; then
+ echo "$0: You need at least kmod 20" >&2
+ echo "kmod <= 19 is buggy, for details see:" >&2
+ echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
+ exit $ksft_skip
+ fi
+
+ uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+}
+
+function load_req_mod()
+{
+ trap "test_modprobe" EXIT
+
+ if [ ! -d $DIR ]; then
+ # Alanis: "Oh isn't it ironic?"
+ modprobe $TEST_DRIVER
+ fi
+}
+
+test_finish()
+{
+ echo "Test completed"
+}
+
+errno_name_to_val()
+{
+ case "$1" in
+ # kmod calls modprobe and upon of a module not found
+ # modprobe returns just 1... However in the kernel we
+ # *sometimes* see 256...
+ MODULE_NOT_FOUND)
+ echo 256;;
+ SUCCESS)
+ echo 0;;
+ -EPERM)
+ echo -1;;
+ -ENOENT)
+ echo -2;;
+ -EINVAL)
+ echo -22;;
+ -ERR_ANY)
+ echo -123456;;
+ *)
+ echo invalid;;
+ esac
+}
+
+errno_val_to_name()
+ case "$1" in
+ 256)
+ echo MODULE_NOT_FOUND;;
+ 0)
+ echo SUCCESS;;
+ -1)
+ echo -EPERM;;
+ -2)
+ echo -ENOENT;;
+ -22)
+ echo -EINVAL;;
+ -123456)
+ echo -ERR_ANY;;
+ *)
+ echo invalid;;
+ esac
+
+config_set_test_case_driver()
+{
+ if ! echo -n 1 >$DIR/config_test_case; then
+ echo "$0: Unable to set to test case to driver" >&2
+ exit 1
+ fi
+}
+
+config_set_test_case_fs()
+{
+ if ! echo -n 2 >$DIR/config_test_case; then
+ echo "$0: Unable to set to test case to fs" >&2
+ exit 1
+ fi
+}
+
+config_num_threads()
+{
+ if ! echo -n $1 >$DIR/config_num_threads; then
+ echo "$0: Unable to set to number of threads" >&2
+ exit 1
+ fi
+}
+
+config_get_modprobe_limit()
+{
+ if [[ -f ${MODPROBE_LIMIT_FILE} ]] ; then
+ MODPROBE_LIMIT=$(cat $MODPROBE_LIMIT_FILE)
+ fi
+ echo $MODPROBE_LIMIT
+}
+
+config_num_thread_limit_extra()
+{
+ MODPROBE_LIMIT=$(config_get_modprobe_limit)
+ let EXTRA_LIMIT=$MODPROBE_LIMIT+$1
+ config_num_threads $EXTRA_LIMIT
+}
+
+# For special characters use printf directly,
+# refer to kmod_test_0001
+config_set_driver()
+{
+ if ! echo -n $1 >$DIR/config_test_driver; then
+ echo "$0: Unable to set driver" >&2
+ exit 1
+ fi
+}
+
+config_set_fs()
+{
+ if ! echo -n $1 >$DIR/config_test_fs; then
+ echo "$0: Unable to set driver" >&2
+ exit 1
+ fi
+}
+
+config_get_driver()
+{
+ cat $DIR/config_test_driver
+}
+
+config_get_test_result()
+{
+ cat $DIR/test_result
+}
+
+config_reset()
+{
+ if ! echo -n "1" >"$DIR"/reset; then
+ echo "$0: reset shuld have worked" >&2
+ exit 1
+ fi
+}
+
+config_show_config()
+{
+ echo "----------------------------------------------------"
+ cat "$DIR"/config
+ echo "----------------------------------------------------"
+}
+
+config_trigger()
+{
+ if ! echo -n "1" >"$DIR"/trigger_config 2>/dev/null; then
+ echo "$1: FAIL - loading should have worked"
+ config_show_config
+ exit 1
+ fi
+ echo "$1: OK! - loading kmod test"
+}
+
+config_trigger_want_fail()
+{
+ if echo "1" > $DIR/trigger_config 2>/dev/null; then
+ echo "$1: FAIL - test case was expected to fail"
+ config_show_config
+ exit 1
+ fi
+ echo "$1: OK! - kmod test case failed as expected"
+}
+
+config_expect_result()
+{
+ RC=$(config_get_test_result)
+ RC_NAME=$(errno_val_to_name $RC)
+
+ ERRNO_NAME=$2
+ ERRNO=$(errno_name_to_val $ERRNO_NAME)
+
+ if [[ $ERRNO_NAME = "-ERR_ANY" ]]; then
+ if [[ $RC -ge 0 ]]; then
+ echo "$1: FAIL, test expects $ERRNO_NAME - got $RC_NAME ($RC)" >&2
+ config_show_config
+ exit 1
+ fi
+ elif [[ $RC != $ERRNO ]]; then
+ echo "$1: FAIL, test expects $ERRNO_NAME ($ERRNO) - got $RC_NAME ($RC)" >&2
+ config_show_config
+ exit 1
+ fi
+ echo "$1: OK! - Return value: $RC ($RC_NAME), expected $ERRNO_NAME"
+}
+
+kmod_defaults_driver()
+{
+ config_reset
+ modprobe -r $DEFAULT_KMOD_DRIVER
+ config_set_driver $DEFAULT_KMOD_DRIVER
+}
+
+kmod_defaults_fs()
+{
+ config_reset
+ modprobe -r $DEFAULT_KMOD_FS
+ config_set_fs $DEFAULT_KMOD_FS
+ config_set_test_case_fs
+}
+
+kmod_test_0001_driver()
+{
+ NAME='\000'
+
+ kmod_defaults_driver
+ config_num_threads 1
+ printf '\000' >"$DIR"/config_test_driver
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND
+}
+
+kmod_test_0001_fs()
+{
+ NAME='\000'
+
+ kmod_defaults_fs
+ config_num_threads 1
+ printf '\000' >"$DIR"/config_test_fs
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} -EINVAL
+}
+
+kmod_test_0001()
+{
+ kmod_test_0001_driver
+ kmod_test_0001_fs
+}
+
+kmod_test_0002_driver()
+{
+ NAME="nope-$DEFAULT_KMOD_DRIVER"
+
+ kmod_defaults_driver
+ config_set_driver $NAME
+ config_num_threads 1
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND
+}
+
+kmod_test_0002_fs()
+{
+ NAME="nope-$DEFAULT_KMOD_FS"
+
+ kmod_defaults_fs
+ config_set_fs $NAME
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} -EINVAL
+}
+
+kmod_test_0002()
+{
+ kmod_test_0002_driver
+ kmod_test_0002_fs
+}
+
+kmod_test_0003()
+{
+ kmod_defaults_fs
+ config_num_threads 1
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0004()
+{
+ kmod_defaults_fs
+ config_num_threads 2
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0005()
+{
+ kmod_defaults_driver
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0006()
+{
+ kmod_defaults_fs
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0007()
+{
+ kmod_test_0005
+ kmod_test_0006
+}
+
+kmod_test_0008()
+{
+ kmod_defaults_driver
+ MODPROBE_LIMIT=$(config_get_modprobe_limit)
+ let EXTRA=$MODPROBE_LIMIT/6
+ config_num_thread_limit_extra $EXTRA
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+kmod_test_0009()
+{
+ kmod_defaults_fs
+ MODPROBE_LIMIT=$(config_get_modprobe_limit)
+ let EXTRA=$MODPROBE_LIMIT/4
+ config_num_thread_limit_extra $EXTRA
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} SUCCESS
+}
+
+list_tests()
+{
+ echo "Test ID list:"
+ echo
+ echo "TEST_ID x NUM_TEST"
+ echo "TEST_ID: Test ID"
+ echo "NUM_TESTS: Number of recommended times to run the test"
+ echo
+ echo "0001 x $(get_test_count 0001) - Simple test - 1 thread for empty string"
+ echo "0002 x $(get_test_count 0002) - Simple test - 1 thread for modules/filesystems that do not exist"
+ echo "0003 x $(get_test_count 0003) - Simple test - 1 thread for get_fs_type() only"
+ echo "0004 x $(get_test_count 0004) - Simple test - 2 threads for get_fs_type() only"
+ echo "0005 x $(get_test_count 0005) - multithreaded tests with default setup - request_module() only"
+ echo "0006 x $(get_test_count 0006) - multithreaded tests with default setup - get_fs_type() only"
+ echo "0007 x $(get_test_count 0007) - multithreaded tests with default setup test request_module() and get_fs_type()"
+ echo "0008 x $(get_test_count 0008) - multithreaded - push kmod_concurrent over max_modprobes for request_module()"
+ echo "0009 x $(get_test_count 0009) - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()"
+}
+
+usage()
+{
+ NUM_TESTS=$(grep -o ' ' <<<"$ALL_TESTS" | grep -c .)
+ let NUM_TESTS=$NUM_TESTS+1
+ MAX_TEST=$(printf "%04d\n" $NUM_TESTS)
+ echo "Usage: $0 [ -t <4-number-digit> ] | [ -w <4-number-digit> ] |"
+ echo " [ -s <4-number-digit> ] | [ -c <4-number-digit> <test- count>"
+ echo " [ all ] [ -h | --help ] [ -l ]"
+ echo ""
+ echo "Valid tests: 0001-$MAX_TEST"
+ echo ""
+ echo " all Runs all tests (default)"
+ echo " -t Run test ID the number amount of times is recommended"
+ echo " -w Watch test ID run until it runs into an error"
+ echo " -s Run test ID once"
+ echo " -c Run test ID x test-count number of times"
+ echo " -l List all test ID list"
+ echo " -h|--help Help"
+ echo
+ echo "If an error every occurs execution will immediately terminate."
+ echo "If you are adding a new test try using -w <test-ID> first to"
+ echo "make sure the test passes a series of tests."
+ echo
+ echo Example uses:
+ echo
+ echo "${TEST_NAME}.sh -- executes all tests"
+ echo "${TEST_NAME}.sh -t 0008 -- Executes test ID 0008 number of times is recomended"
+ echo "${TEST_NAME}.sh -w 0008 -- Watch test ID 0008 run until an error occurs"
+ echo "${TEST_NAME}.sh -s 0008 -- Run test ID 0008 once"
+ echo "${TEST_NAME}.sh -c 0008 3 -- Run test ID 0008 three times"
+ echo
+ list_tests
+ exit 1
+}
+
+function test_num()
+{
+ re='^[0-9]+$'
+ if ! [[ $1 =~ $re ]]; then
+ usage
+ fi
+}
+
+function get_test_data()
+{
+ test_num $1
+ local field_num=$(echo $1 | sed 's/^0*//')
+ echo $ALL_TESTS | awk '{print $'$field_num'}'
+}
+
+function get_test_count()
+{
+ TEST_DATA=$(get_test_data $1)
+ LAST_TWO=${TEST_DATA#*:*}
+ echo ${LAST_TWO%:*}
+}
+
+function get_test_enabled()
+{
+ TEST_DATA=$(get_test_data $1)
+ echo ${TEST_DATA#*:*:}
+}
+
+function run_all_tests()
+{
+ for i in $ALL_TESTS ; do
+ TEST_ID=${i%:*:*}
+ ENABLED=$(get_test_enabled $TEST_ID)
+ TEST_COUNT=$(get_test_count $TEST_ID)
+ if [[ $ENABLED -eq "1" ]]; then
+ test_case $TEST_ID $TEST_COUNT
+ fi
+ done
+}
+
+function watch_log()
+{
+ if [ $# -ne 3 ]; then
+ clear
+ fi
+ date
+ echo "Running test: $2 - run #$1"
+}
+
+function watch_case()
+{
+ i=0
+ while [ 1 ]; do
+
+ if [ $# -eq 1 ]; then
+ test_num $1
+ watch_log $i ${TEST_NAME}_test_$1
+ ${TEST_NAME}_test_$1
+ else
+ watch_log $i all
+ run_all_tests
+ fi
+ let i=$i+1
+ done
+}
+
+function test_case()
+{
+ NUM_TESTS=$DEFAULT_NUM_TESTS
+ if [ $# -eq 2 ]; then
+ NUM_TESTS=$2
+ fi
+
+ i=0
+ while [ $i -lt $NUM_TESTS ]; do
+ test_num $1
+ watch_log $i ${TEST_NAME}_test_$1 noclear
+ RUN_TEST=${TEST_NAME}_test_$1
+ $RUN_TEST
+ let i=$i+1
+ done
+}
+
+function parse_args()
+{
+ if [ $# -eq 0 ]; then
+ run_all_tests
+ else
+ if [[ "$1" = "all" ]]; then
+ run_all_tests
+ elif [[ "$1" = "-w" ]]; then
+ shift
+ watch_case $@
+ elif [[ "$1" = "-t" ]]; then
+ shift
+ test_num $1
+ test_case $1 $(get_test_count $1)
+ elif [[ "$1" = "-c" ]]; then
+ shift
+ test_num $1
+ test_num $2
+ test_case $1 $2
+ elif [[ "$1" = "-s" ]]; then
+ shift
+ test_case $1 1
+ elif [[ "$1" = "-l" ]]; then
+ list_tests
+ elif [[ "$1" = "-h" || "$1" = "--help" ]]; then
+ usage
+ else
+ usage
+ fi
+ fi
+}
+
+test_reqs
+allow_user_defaults
+load_req_mod
+
+trap "test_finish" EXIT
+
+parse_args $@
+
+exit 0
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
new file mode 100644
index 000000000..a3edb2c8e
--- /dev/null
+++ b/tools/testing/selftests/kselftest.h
@@ -0,0 +1,183 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * kselftest.h: kselftest framework return codes to include from
+ * selftests.
+ *
+ * Copyright (c) 2014 Shuah Khan <shuahkh@osg.samsung.com>
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ *
+ */
+#ifndef __KSELFTEST_H
+#define __KSELFTEST_H
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+/* define kselftest exit codes */
+#define KSFT_PASS 0
+#define KSFT_FAIL 1
+#define KSFT_XFAIL 2
+#define KSFT_XPASS 3
+#define KSFT_SKIP 4
+
+/* counters */
+struct ksft_count {
+ unsigned int ksft_pass;
+ unsigned int ksft_fail;
+ unsigned int ksft_xfail;
+ unsigned int ksft_xpass;
+ unsigned int ksft_xskip;
+ unsigned int ksft_error;
+};
+
+static struct ksft_count ksft_cnt;
+
+static inline int ksft_test_num(void)
+{
+ return ksft_cnt.ksft_pass + ksft_cnt.ksft_fail +
+ ksft_cnt.ksft_xfail + ksft_cnt.ksft_xpass +
+ ksft_cnt.ksft_xskip + ksft_cnt.ksft_error;
+}
+
+static inline void ksft_inc_pass_cnt(void) { ksft_cnt.ksft_pass++; }
+static inline void ksft_inc_fail_cnt(void) { ksft_cnt.ksft_fail++; }
+static inline void ksft_inc_xfail_cnt(void) { ksft_cnt.ksft_xfail++; }
+static inline void ksft_inc_xpass_cnt(void) { ksft_cnt.ksft_xpass++; }
+static inline void ksft_inc_xskip_cnt(void) { ksft_cnt.ksft_xskip++; }
+static inline void ksft_inc_error_cnt(void) { ksft_cnt.ksft_error++; }
+
+static inline int ksft_get_pass_cnt(void) { return ksft_cnt.ksft_pass; }
+static inline int ksft_get_fail_cnt(void) { return ksft_cnt.ksft_fail; }
+static inline int ksft_get_xfail_cnt(void) { return ksft_cnt.ksft_xfail; }
+static inline int ksft_get_xpass_cnt(void) { return ksft_cnt.ksft_xpass; }
+static inline int ksft_get_xskip_cnt(void) { return ksft_cnt.ksft_xskip; }
+static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; }
+
+static inline void ksft_print_header(void)
+{
+ if (!(getenv("KSFT_TAP_LEVEL")))
+ printf("TAP version 13\n");
+}
+
+static inline void ksft_print_cnts(void)
+{
+ printf("Pass %d Fail %d Xfail %d Xpass %d Skip %d Error %d\n",
+ ksft_cnt.ksft_pass, ksft_cnt.ksft_fail,
+ ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass,
+ ksft_cnt.ksft_xskip, ksft_cnt.ksft_error);
+ printf("1..%d\n", ksft_test_num());
+}
+
+static inline void ksft_print_msg(const char *msg, ...)
+{
+ va_list args;
+
+ va_start(args, msg);
+ printf("# ");
+ vprintf(msg, args);
+ va_end(args);
+}
+
+static inline void ksft_test_result_pass(const char *msg, ...)
+{
+ va_list args;
+
+ ksft_cnt.ksft_pass++;
+
+ va_start(args, msg);
+ printf("ok %d ", ksft_test_num());
+ vprintf(msg, args);
+ va_end(args);
+}
+
+static inline void ksft_test_result_fail(const char *msg, ...)
+{
+ va_list args;
+
+ ksft_cnt.ksft_fail++;
+
+ va_start(args, msg);
+ printf("not ok %d ", ksft_test_num());
+ vprintf(msg, args);
+ va_end(args);
+}
+
+static inline void ksft_test_result_skip(const char *msg, ...)
+{
+ va_list args;
+
+ ksft_cnt.ksft_xskip++;
+
+ va_start(args, msg);
+ printf("ok %d # skip ", ksft_test_num());
+ vprintf(msg, args);
+ va_end(args);
+}
+
+static inline void ksft_test_result_error(const char *msg, ...)
+{
+ va_list args;
+
+ ksft_cnt.ksft_error++;
+
+ va_start(args, msg);
+ printf("not ok %d # error ", ksft_test_num());
+ vprintf(msg, args);
+ va_end(args);
+}
+
+static inline int ksft_exit_pass(void)
+{
+ ksft_print_cnts();
+ exit(KSFT_PASS);
+}
+
+static inline int ksft_exit_fail(void)
+{
+ printf("Bail out!\n");
+ ksft_print_cnts();
+ exit(KSFT_FAIL);
+}
+
+static inline int ksft_exit_fail_msg(const char *msg, ...)
+{
+ va_list args;
+
+ va_start(args, msg);
+ printf("Bail out! ");
+ vprintf(msg, args);
+ va_end(args);
+
+ ksft_print_cnts();
+ exit(KSFT_FAIL);
+}
+
+static inline int ksft_exit_xfail(void)
+{
+ ksft_print_cnts();
+ exit(KSFT_XFAIL);
+}
+
+static inline int ksft_exit_xpass(void)
+{
+ ksft_print_cnts();
+ exit(KSFT_XPASS);
+}
+
+static inline int ksft_exit_skip(const char *msg, ...)
+{
+ if (msg) {
+ va_list args;
+
+ va_start(args, msg);
+ printf("1..%d # Skipped: ", ksft_test_num());
+ vprintf(msg, args);
+ va_end(args);
+ } else {
+ ksft_print_cnts();
+ }
+ exit(KSFT_SKIP);
+}
+
+#endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
new file mode 100644
index 000000000..76d654ef3
--- /dev/null
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -0,0 +1,779 @@
+/*
+ * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by the GPLv2 license.
+ *
+ * kselftest_harness.h: simple C unit test helper.
+ *
+ * See documentation in Documentation/dev-tools/kselftest.rst
+ *
+ * API inspired by code.google.com/p/googletest
+ */
+
+/**
+ * DOC: example
+ *
+ * .. code-block:: c
+ *
+ * #include "../kselftest_harness.h"
+ *
+ * TEST(standalone_test) {
+ * do_some_stuff;
+ * EXPECT_GT(10, stuff) {
+ * stuff_state_t state;
+ * enumerate_stuff_state(&state);
+ * TH_LOG("expectation failed with state: %s", state.msg);
+ * }
+ * more_stuff;
+ * ASSERT_NE(some_stuff, NULL) TH_LOG("how did it happen?!");
+ * last_stuff;
+ * EXPECT_EQ(0, last_stuff);
+ * }
+ *
+ * FIXTURE(my_fixture) {
+ * mytype_t *data;
+ * int awesomeness_level;
+ * };
+ * FIXTURE_SETUP(my_fixture) {
+ * self->data = mytype_new();
+ * ASSERT_NE(NULL, self->data);
+ * }
+ * FIXTURE_TEARDOWN(my_fixture) {
+ * mytype_free(self->data);
+ * }
+ * TEST_F(my_fixture, data_is_good) {
+ * EXPECT_EQ(1, is_my_data_good(self->data));
+ * }
+ *
+ * TEST_HARNESS_MAIN
+ */
+
+#ifndef __KSELFTEST_HARNESS_H
+#define __KSELFTEST_HARNESS_H
+
+#define _GNU_SOURCE
+#include <asm/types.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+
+/* Utilities exposed to the test definitions */
+#ifndef TH_LOG_STREAM
+# define TH_LOG_STREAM stderr
+#endif
+
+#ifndef TH_LOG_ENABLED
+# define TH_LOG_ENABLED 1
+#endif
+
+/**
+ * TH_LOG(fmt, ...)
+ *
+ * @fmt: format string
+ * @...: optional arguments
+ *
+ * .. code-block:: c
+ *
+ * TH_LOG(format, ...)
+ *
+ * Optional debug logging function available for use in tests.
+ * Logging may be enabled or disabled by defining TH_LOG_ENABLED.
+ * E.g., #define TH_LOG_ENABLED 1
+ *
+ * If no definition is provided, logging is enabled by default.
+ *
+ * If there is no way to print an error message for the process running the
+ * test (e.g. not allowed to write to stderr), it is still possible to get the
+ * ASSERT_* number for which the test failed. This behavior can be enabled by
+ * writing `_metadata->no_print = true;` before the check sequence that is
+ * unable to print. When an error occur, instead of printing an error message
+ * and calling `abort(3)`, the test process call `_exit(2)` with the assert
+ * number as argument, which is then printed by the parent process.
+ */
+#define TH_LOG(fmt, ...) do { \
+ if (TH_LOG_ENABLED) \
+ __TH_LOG(fmt, ##__VA_ARGS__); \
+} while (0)
+
+/* Unconditional logger for internal use. */
+#define __TH_LOG(fmt, ...) \
+ fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \
+ __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
+
+/**
+ * XFAIL(statement, fmt, ...)
+ *
+ * @statement: statement to run after reporting XFAIL
+ * @fmt: format string
+ * @...: optional arguments
+ *
+ * This forces a "pass" after reporting a failure with an XFAIL prefix,
+ * and runs "statement", which is usually "return" or "goto skip".
+ */
+#define XFAIL(statement, fmt, ...) do { \
+ if (TH_LOG_ENABLED) { \
+ fprintf(TH_LOG_STREAM, "[ XFAIL! ] " fmt "\n", \
+ ##__VA_ARGS__); \
+ } \
+ /* TODO: find a way to pass xfail to test runner process. */ \
+ _metadata->passed = 1; \
+ _metadata->trigger = 0; \
+ statement; \
+} while (0)
+
+/**
+ * TEST(test_name) - Defines the test function and creates the registration
+ * stub
+ *
+ * @test_name: test name
+ *
+ * .. code-block:: c
+ *
+ * TEST(name) { implementation }
+ *
+ * Defines a test by name.
+ * Names must be unique and tests must not be run in parallel. The
+ * implementation containing block is a function and scoping should be treated
+ * as such. Returning early may be performed with a bare "return;" statement.
+ *
+ * EXPECT_* and ASSERT_* are valid in a TEST() { } context.
+ */
+#define TEST(test_name) __TEST_IMPL(test_name, -1)
+
+/**
+ * TEST_SIGNAL(test_name, signal)
+ *
+ * @test_name: test name
+ * @signal: signal number
+ *
+ * .. code-block:: c
+ *
+ * TEST_SIGNAL(name, signal) { implementation }
+ *
+ * Defines a test by name and the expected term signal.
+ * Names must be unique and tests must not be run in parallel. The
+ * implementation containing block is a function and scoping should be treated
+ * as such. Returning early may be performed with a bare "return;" statement.
+ *
+ * EXPECT_* and ASSERT_* are valid in a TEST() { } context.
+ */
+#define TEST_SIGNAL(test_name, signal) __TEST_IMPL(test_name, signal)
+
+#define __TEST_IMPL(test_name, _signal) \
+ static void test_name(struct __test_metadata *_metadata); \
+ static struct __test_metadata _##test_name##_object = \
+ { name: "global." #test_name, \
+ fn: &test_name, termsig: _signal }; \
+ static void __attribute__((constructor)) _register_##test_name(void) \
+ { \
+ __register_test(&_##test_name##_object); \
+ } \
+ static void test_name( \
+ struct __test_metadata __attribute__((unused)) *_metadata)
+
+/**
+ * FIXTURE_DATA(datatype_name) - Wraps the struct name so we have one less
+ * argument to pass around
+ *
+ * @datatype_name: datatype name
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE_DATA(datatype name)
+ *
+ * This call may be used when the type of the fixture data
+ * is needed. In general, this should not be needed unless
+ * the *self* is being passed to a helper directly.
+ */
+#define FIXTURE_DATA(datatype_name) struct _test_data_##datatype_name
+
+/**
+ * FIXTURE(fixture_name) - Called once per fixture to setup the data and
+ * register
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE(datatype name) {
+ * type property1;
+ * ...
+ * };
+ *
+ * Defines the data provided to TEST_F()-defined tests as *self*. It should be
+ * populated and cleaned up using FIXTURE_SETUP() and FIXTURE_TEARDOWN().
+ */
+#define FIXTURE(fixture_name) \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_data(void) \
+ { \
+ __fixture_count++; \
+ } \
+ FIXTURE_DATA(fixture_name)
+
+/**
+ * FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture.
+ * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE_SETUP(fixture name) { implementation }
+ *
+ * Populates the required "setup" function for a fixture. An instance of the
+ * datatype defined with FIXTURE_DATA() will be exposed as *self* for the
+ * implementation.
+ *
+ * ASSERT_* are valid for use in this context and will prempt the execution
+ * of any dependent fixture tests.
+ *
+ * A bare "return;" statement may be used to return early.
+ */
+#define FIXTURE_SETUP(fixture_name) \
+ void fixture_name##_setup( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+/**
+ * FIXTURE_TEARDOWN(fixture_name)
+ * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE_TEARDOWN(fixture name) { implementation }
+ *
+ * Populates the required "teardown" function for a fixture. An instance of the
+ * datatype defined with FIXTURE_DATA() will be exposed as *self* for the
+ * implementation to clean up.
+ *
+ * A bare "return;" statement may be used to return early.
+ */
+#define FIXTURE_TEARDOWN(fixture_name) \
+ void fixture_name##_teardown( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+
+/**
+ * TEST_F(fixture_name, test_name) - Emits test registration and helpers for
+ * fixture-based test cases
+ *
+ * @fixture_name: fixture name
+ * @test_name: test name
+ *
+ * .. code-block:: c
+ *
+ * TEST_F(fixture, name) { implementation }
+ *
+ * Defines a test that depends on a fixture (e.g., is part of a test case).
+ * Very similar to TEST() except that *self* is the setup instance of fixture's
+ * datatype exposed for use by the implementation.
+ *
+ * Warning: use of ASSERT_* here will skip TEARDOWN.
+ */
+/* TODO(wad) register fixtures on dedicated test lists. */
+#define TEST_F(fixture_name, test_name) \
+ __TEST_F_IMPL(fixture_name, test_name, -1)
+
+#define TEST_F_SIGNAL(fixture_name, test_name, signal) \
+ __TEST_F_IMPL(fixture_name, test_name, signal)
+
+#define __TEST_F_IMPL(fixture_name, test_name, signal) \
+ static void fixture_name##_##test_name( \
+ struct __test_metadata *_metadata, \
+ FIXTURE_DATA(fixture_name) *self); \
+ static inline void wrapper_##fixture_name##_##test_name( \
+ struct __test_metadata *_metadata) \
+ { \
+ /* fixture data is alloced, setup, and torn down per call. */ \
+ FIXTURE_DATA(fixture_name) self; \
+ memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
+ fixture_name##_setup(_metadata, &self); \
+ /* Let setup failure terminate early. */ \
+ if (!_metadata->passed) \
+ return; \
+ fixture_name##_##test_name(_metadata, &self); \
+ fixture_name##_teardown(_metadata, &self); \
+ } \
+ static struct __test_metadata \
+ _##fixture_name##_##test_name##_object = { \
+ name: #fixture_name "." #test_name, \
+ fn: &wrapper_##fixture_name##_##test_name, \
+ termsig: signal, \
+ }; \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_##test_name(void) \
+ { \
+ __register_test(&_##fixture_name##_##test_name##_object); \
+ } \
+ static void fixture_name##_##test_name( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+
+/**
+ * TEST_HARNESS_MAIN - Simple wrapper to run the test harness
+ *
+ * .. code-block:: c
+ *
+ * TEST_HARNESS_MAIN
+ *
+ * Use once to append a main() to the test file.
+ */
+#define TEST_HARNESS_MAIN \
+ static void __attribute__((constructor)) \
+ __constructor_order_last(void) \
+ { \
+ if (!__constructor_order) \
+ __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; \
+ } \
+ int main(int argc, char **argv) { \
+ return test_harness_run(argc, argv); \
+ }
+
+/**
+ * DOC: operators
+ *
+ * Operators for use in TEST() and TEST_F().
+ * ASSERT_* calls will stop test execution immediately.
+ * EXPECT_* calls will emit a failure warning, note it, and continue.
+ */
+
+/**
+ * ASSERT_EQ(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_EQ(expected, measured): expected == measured
+ */
+#define ASSERT_EQ(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, ==, 1)
+
+/**
+ * ASSERT_NE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_NE(expected, measured): expected != measured
+ */
+#define ASSERT_NE(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, !=, 1)
+
+/**
+ * ASSERT_LT(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_LT(expected, measured): expected < measured
+ */
+#define ASSERT_LT(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, <, 1)
+
+/**
+ * ASSERT_LE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_LE(expected, measured): expected <= measured
+ */
+#define ASSERT_LE(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, <=, 1)
+
+/**
+ * ASSERT_GT(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_GT(expected, measured): expected > measured
+ */
+#define ASSERT_GT(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, >, 1)
+
+/**
+ * ASSERT_GE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_GE(expected, measured): expected >= measured
+ */
+#define ASSERT_GE(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, >=, 1)
+
+/**
+ * ASSERT_NULL(seen)
+ *
+ * @seen: measured value
+ *
+ * ASSERT_NULL(measured): NULL == measured
+ */
+#define ASSERT_NULL(seen) \
+ __EXPECT(NULL, "NULL", seen, #seen, ==, 1)
+
+/**
+ * ASSERT_TRUE(seen)
+ *
+ * @seen: measured value
+ *
+ * ASSERT_TRUE(measured): measured != 0
+ */
+#define ASSERT_TRUE(seen) \
+ __EXPECT(0, "0", seen, #seen, !=, 1)
+
+/**
+ * ASSERT_FALSE(seen)
+ *
+ * @seen: measured value
+ *
+ * ASSERT_FALSE(measured): measured == 0
+ */
+#define ASSERT_FALSE(seen) \
+ __EXPECT(0, "0", seen, #seen, ==, 1)
+
+/**
+ * ASSERT_STREQ(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_STREQ(expected, measured): !strcmp(expected, measured)
+ */
+#define ASSERT_STREQ(expected, seen) \
+ __EXPECT_STR(expected, seen, ==, 1)
+
+/**
+ * ASSERT_STRNE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * ASSERT_STRNE(expected, measured): strcmp(expected, measured)
+ */
+#define ASSERT_STRNE(expected, seen) \
+ __EXPECT_STR(expected, seen, !=, 1)
+
+/**
+ * EXPECT_EQ(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_EQ(expected, measured): expected == measured
+ */
+#define EXPECT_EQ(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, ==, 0)
+
+/**
+ * EXPECT_NE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_NE(expected, measured): expected != measured
+ */
+#define EXPECT_NE(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, !=, 0)
+
+/**
+ * EXPECT_LT(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_LT(expected, measured): expected < measured
+ */
+#define EXPECT_LT(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, <, 0)
+
+/**
+ * EXPECT_LE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_LE(expected, measured): expected <= measured
+ */
+#define EXPECT_LE(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, <=, 0)
+
+/**
+ * EXPECT_GT(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_GT(expected, measured): expected > measured
+ */
+#define EXPECT_GT(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, >, 0)
+
+/**
+ * EXPECT_GE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_GE(expected, measured): expected >= measured
+ */
+#define EXPECT_GE(expected, seen) \
+ __EXPECT(expected, #expected, seen, #seen, >=, 0)
+
+/**
+ * EXPECT_NULL(seen)
+ *
+ * @seen: measured value
+ *
+ * EXPECT_NULL(measured): NULL == measured
+ */
+#define EXPECT_NULL(seen) \
+ __EXPECT(NULL, "NULL", seen, #seen, ==, 0)
+
+/**
+ * EXPECT_TRUE(seen)
+ *
+ * @seen: measured value
+ *
+ * EXPECT_TRUE(measured): 0 != measured
+ */
+#define EXPECT_TRUE(seen) \
+ __EXPECT(0, "0", seen, #seen, !=, 0)
+
+/**
+ * EXPECT_FALSE(seen)
+ *
+ * @seen: measured value
+ *
+ * EXPECT_FALSE(measured): 0 == measured
+ */
+#define EXPECT_FALSE(seen) \
+ __EXPECT(0, "0", seen, #seen, ==, 0)
+
+/**
+ * EXPECT_STREQ(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_STREQ(expected, measured): !strcmp(expected, measured)
+ */
+#define EXPECT_STREQ(expected, seen) \
+ __EXPECT_STR(expected, seen, ==, 0)
+
+/**
+ * EXPECT_STRNE(expected, seen)
+ *
+ * @expected: expected value
+ * @seen: measured value
+ *
+ * EXPECT_STRNE(expected, measured): strcmp(expected, measured)
+ */
+#define EXPECT_STRNE(expected, seen) \
+ __EXPECT_STR(expected, seen, !=, 0)
+
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+
+/* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is
+ * not thread-safe, but it should be fine in most sane test scenarios.
+ *
+ * Using __bail(), which optionally abort()s, is the easiest way to early
+ * return while still providing an optional block to the API consumer.
+ */
+#define OPTIONAL_HANDLER(_assert) \
+ for (; _metadata->trigger; _metadata->trigger = \
+ __bail(_assert, _metadata->no_print, _metadata->step))
+
+#define __INC_STEP(_metadata) \
+ if (_metadata->passed && _metadata->step < 255) \
+ _metadata->step++;
+
+#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \
+ /* Avoid multiple evaluation of the cases */ \
+ __typeof__(_expected) __exp = (_expected); \
+ __typeof__(_seen) __seen = (_seen); \
+ if (_assert) __INC_STEP(_metadata); \
+ if (!(__exp _t __seen)) { \
+ unsigned long long __exp_print = (uintptr_t)__exp; \
+ unsigned long long __seen_print = (uintptr_t)__seen; \
+ __TH_LOG("Expected %s (%llu) %s %s (%llu)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ _metadata->passed = 0; \
+ /* Ensure the optional handler is triggered */ \
+ _metadata->trigger = 1; \
+ } \
+} while (0); OPTIONAL_HANDLER(_assert)
+
+#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \
+ const char *__exp = (_expected); \
+ const char *__seen = (_seen); \
+ if (_assert) __INC_STEP(_metadata); \
+ if (!(strcmp(__exp, __seen) _t 0)) { \
+ __TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \
+ _metadata->passed = 0; \
+ _metadata->trigger = 1; \
+ } \
+} while (0); OPTIONAL_HANDLER(_assert)
+
+/* Contains all the information for test execution and status checking. */
+struct __test_metadata {
+ const char *name;
+ void (*fn)(struct __test_metadata *);
+ int termsig;
+ int passed;
+ int trigger; /* extra handler after the evaluation */
+ __u8 step;
+ bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
+ struct __test_metadata *prev, *next;
+};
+
+/* Storage for the (global) tests to be run. */
+static struct __test_metadata *__test_list;
+static unsigned int __test_count;
+static unsigned int __fixture_count;
+static int __constructor_order;
+
+#define _CONSTRUCTOR_ORDER_FORWARD 1
+#define _CONSTRUCTOR_ORDER_BACKWARD -1
+
+/*
+ * Since constructors are called in reverse order, reverse the test
+ * list so tests are run in source declaration order.
+ * https://gcc.gnu.org/onlinedocs/gccint/Initialization.html
+ * However, it seems not all toolchains do this correctly, so use
+ * __constructor_order to detect which direction is called first
+ * and adjust list building logic to get things running in the right
+ * direction.
+ */
+static inline void __register_test(struct __test_metadata *t)
+{
+ __test_count++;
+ /* Circular linked list where only prev is circular. */
+ if (__test_list == NULL) {
+ __test_list = t;
+ t->next = NULL;
+ t->prev = t;
+ return;
+ }
+ if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) {
+ t->next = NULL;
+ t->prev = __test_list->prev;
+ t->prev->next = t;
+ __test_list->prev = t;
+ } else {
+ t->next = __test_list;
+ t->next->prev = t;
+ t->prev = t;
+ __test_list = t;
+ }
+}
+
+static inline int __bail(int for_realz, bool no_print, __u8 step)
+{
+ if (for_realz) {
+ if (no_print)
+ _exit(step);
+ abort();
+ }
+ return 0;
+}
+
+void __run_test(struct __test_metadata *t)
+{
+ pid_t child_pid;
+ int status;
+
+ t->passed = 1;
+ t->trigger = 0;
+ printf("[ RUN ] %s\n", t->name);
+ child_pid = fork();
+ if (child_pid < 0) {
+ printf("ERROR SPAWNING TEST CHILD\n");
+ t->passed = 0;
+ } else if (child_pid == 0) {
+ t->fn(t);
+ /* return the step that failed or 0 */
+ _exit(t->passed ? 0 : t->step);
+ } else {
+ /* TODO(wad) add timeout support. */
+ waitpid(child_pid, &status, 0);
+ if (WIFEXITED(status)) {
+ t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0;
+ if (t->termsig != -1) {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test exited normally "
+ "instead of by signal (code: %d)\n",
+ t->name,
+ WEXITSTATUS(status));
+ } else if (!t->passed) {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test failed at step #%d\n",
+ t->name,
+ WEXITSTATUS(status));
+ }
+ } else if (WIFSIGNALED(status)) {
+ t->passed = 0;
+ if (WTERMSIG(status) == SIGABRT) {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test terminated by assertion\n",
+ t->name);
+ } else if (WTERMSIG(status) == t->termsig) {
+ t->passed = 1;
+ } else {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test terminated unexpectedly "
+ "by signal %d\n",
+ t->name,
+ WTERMSIG(status));
+ }
+ } else {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test ended in some other way [%u]\n",
+ t->name,
+ status);
+ }
+ }
+ printf("[ %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name);
+}
+
+static int test_harness_run(int __attribute__((unused)) argc,
+ char __attribute__((unused)) **argv)
+{
+ struct __test_metadata *t;
+ int ret = 0;
+ unsigned int count = 0;
+ unsigned int pass_count = 0;
+
+ /* TODO(wad) add optional arguments similar to gtest. */
+ printf("[==========] Running %u tests from %u test cases.\n",
+ __test_count, __fixture_count + 1);
+ for (t = __test_list; t; t = t->next) {
+ count++;
+ __run_test(t);
+ if (t->passed)
+ pass_count++;
+ else
+ ret = 1;
+ }
+ printf("[==========] %u / %u tests passed.\n", pass_count, count);
+ printf("[ %s ]\n", (ret ? "FAILED" : "PASSED"));
+ return ret;
+}
+
+static void __attribute__((constructor)) __constructor_order_first(void)
+{
+ if (!__constructor_order)
+ __constructor_order = _CONSTRUCTOR_ORDER_FORWARD;
+}
+
+#endif /* __KSELFTEST_HARNESS_H */
diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh
new file mode 100755
index 000000000..ec3044638
--- /dev/null
+++ b/tools/testing/selftests/kselftest_install.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Kselftest Install
+# Install kselftest tests
+# Author: Shuah Khan <shuahkh@osg.samsung.com>
+# Copyright (C) 2015 Samsung Electronics Co., Ltd.
+
+install_loc=`pwd`
+
+main()
+{
+ if [ $(basename $install_loc) != "selftests" ]; then
+ echo "$0: Please run it in selftests directory ..."
+ exit 1;
+ fi
+ if [ "$#" -eq 0 ]; then
+ echo "$0: Installing in default location - $install_loc ..."
+ elif [ ! -d "$1" ]; then
+ echo "$0: $1 doesn't exist!!"
+ exit 1;
+ else
+ install_loc=$1
+ echo "$0: Installing in specified location - $install_loc ..."
+ fi
+
+ install_dir=$install_loc/kselftest
+
+# Create install directory
+ mkdir -p $install_dir
+# Build tests
+ INSTALL_PATH=$install_dir make install
+}
+
+main "$@"
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
new file mode 100644
index 000000000..5c34752e1
--- /dev/null
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -0,0 +1,6 @@
+cr4_cpuid_sync_test
+platform_info_test
+set_sregs_test
+sync_regs_test
+vmx_tsc_adjust_test
+state_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
new file mode 100644
index 000000000..cc83e2fd3
--- /dev/null
+++ b/tools/testing/selftests/kvm/Makefile
@@ -0,0 +1,43 @@
+all:
+
+top_srcdir = ../../../../
+KSFT_KHDR_INSTALL := 1
+UNAME_M := $(shell uname -m)
+
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
+LIBKVM_x86_64 = lib/x86.c lib/vmx.c
+
+TEST_GEN_PROGS_x86_64 = platform_info_test
+TEST_GEN_PROGS_x86_64 += set_sregs_test
+TEST_GEN_PROGS_x86_64 += sync_regs_test
+TEST_GEN_PROGS_x86_64 += vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += state_test
+TEST_GEN_PROGS_x86_64 += dirty_log_test
+
+TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
+LIBKVM += $(LIBKVM_$(UNAME_M))
+
+INSTALL_HDR_PATH = $(top_srcdir)/usr
+LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
+LINUX_TOOL_INCLUDE = $(top_srcdir)tools/include
+CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -I..
+LDFLAGS += -pthread
+
+# After inclusion, $(OUTPUT) is defined and
+# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
+include ../lib.mk
+
+STATIC_LIBS := $(OUTPUT)/libkvm.a
+LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
+EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS)
+
+x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
+$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
+ $(AR) crs $@ $^
+
+all: $(STATIC_LIBS)
+$(TEST_GEN_PROGS): $(STATIC_LIBS)
diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config
new file mode 100644
index 000000000..63ed533f7
--- /dev/null
+++ b/tools/testing/selftests/kvm/config
@@ -0,0 +1,3 @@
+CONFIG_KVM=y
+CONFIG_KVM_INTEL=y
+CONFIG_KVM_AMD=y
diff --git a/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
new file mode 100644
index 000000000..11ec358bf
--- /dev/null
+++ b/tools/testing/selftests/kvm/cr4_cpuid_sync_test.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CR4 and CPUID sync test
+ *
+ * Copyright 2018, Red Hat, Inc. and/or its affiliates.
+ *
+ * Author:
+ * Wei Huang <wei@redhat.com>
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+
+#define X86_FEATURE_XSAVE (1<<26)
+#define X86_FEATURE_OSXSAVE (1<<27)
+#define VCPU_ID 1
+
+static inline bool cr4_cpuid_is_sync(void)
+{
+ int func, subfunc;
+ uint32_t eax, ebx, ecx, edx;
+ uint64_t cr4;
+
+ func = 0x1;
+ subfunc = 0x0;
+ __asm__ __volatile__("cpuid"
+ : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
+ : "a"(func), "c"(subfunc));
+
+ cr4 = get_cr4();
+
+ return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
+}
+
+static void guest_code(void)
+{
+ uint64_t cr4;
+
+ /* turn on CR4.OSXSAVE */
+ cr4 = get_cr4();
+ cr4 |= X86_CR4_OSXSAVE;
+ set_cr4(cr4);
+
+ /* verify CR4.OSXSAVE == CPUID.OSXSAVE */
+ GUEST_ASSERT(cr4_cpuid_is_sync());
+
+ /* notify hypervisor to change CR4 */
+ GUEST_SYNC(0);
+
+ /* check again */
+ GUEST_ASSERT(cr4_cpuid_is_sync());
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct kvm_sregs sregs;
+ struct kvm_cpuid_entry2 *entry;
+ int rc;
+
+ entry = kvm_get_supported_cpuid_entry(1);
+ if (!(entry->ecx & X86_FEATURE_XSAVE)) {
+ printf("XSAVE feature not supported, skipping test\n");
+ return 0;
+ }
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ run = vcpu_state(vm, VCPU_ID);
+
+ while (1) {
+ rc = _vcpu_run(vm, VCPU_ID);
+
+ if (run->exit_reason == KVM_EXIT_IO) {
+ switch (run->io.port) {
+ case GUEST_PORT_SYNC:
+ /* emulate hypervisor clearing CR4.OSXSAVE */
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ sregs.cr4 &= ~X86_CR4_OSXSAVE;
+ vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ break;
+ case GUEST_PORT_ABORT:
+ TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
+ break;
+ case GUEST_PORT_DONE:
+ goto done;
+ default:
+ TEST_ASSERT(false, "Unknown port 0x%x.",
+ run->io.port);
+ }
+ }
+ }
+
+ kvm_vm_free(vm);
+
+done:
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
new file mode 100644
index 000000000..a9c4b5e21
--- /dev/null
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging test
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define DEBUG printf
+
+#define VCPU_ID 1
+/* The memory slot index to track dirty pages */
+#define TEST_MEM_SLOT_INDEX 1
+/*
+ * GPA offset of the testing memory slot. Must be bigger than the
+ * default vm mem slot, which is DEFAULT_GUEST_PHY_PAGES.
+ */
+#define TEST_MEM_OFFSET (1ULL << 30) /* 1G */
+/* Size of the testing memory slot */
+#define TEST_MEM_PAGES (1ULL << 18) /* 1G for 4K pages */
+/* How many pages to dirty for each guest loop */
+#define TEST_PAGES_PER_LOOP 1024
+/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */
+#define TEST_HOST_LOOP_N 32UL
+/* Interval for each host loop (ms) */
+#define TEST_HOST_LOOP_INTERVAL 10UL
+
+/*
+ * Guest variables. We use these variables to share data between host
+ * and guest. There are two copies of the variables, one in host memory
+ * (which is unused) and one in guest memory. When the host wants to
+ * access these variables, it needs to call addr_gva2hva() to access the
+ * guest copy.
+ */
+uint64_t guest_random_array[TEST_PAGES_PER_LOOP];
+uint64_t guest_iteration;
+uint64_t guest_page_size;
+
+/*
+ * Writes to the first byte of a random page within the testing memory
+ * region continuously.
+ */
+void guest_code(void)
+{
+ int i = 0;
+ uint64_t volatile *array = guest_random_array;
+ uint64_t volatile *guest_addr;
+
+ while (true) {
+ for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
+ /*
+ * Write to the first 8 bytes of a random page
+ * on the testing memory region.
+ */
+ guest_addr = (uint64_t *)
+ (TEST_MEM_OFFSET +
+ (array[i] % TEST_MEM_PAGES) * guest_page_size);
+ *guest_addr = guest_iteration;
+ }
+ /* Tell the host that we need more random numbers */
+ GUEST_SYNC(1);
+ }
+}
+
+/*
+ * Host variables. These variables should only be used by the host
+ * rather than the guest.
+ */
+bool host_quit;
+
+/* Points to the test VM memory region on which we track dirty logs */
+void *host_test_mem;
+
+/* For statistics only */
+uint64_t host_dirty_count;
+uint64_t host_clear_count;
+uint64_t host_track_next_count;
+
+/*
+ * We use this bitmap to track some pages that should have its dirty
+ * bit set in the _next_ iteration. For example, if we detected the
+ * page value changed to current iteration but at the same time the
+ * page bit is cleared in the latest bitmap, then the system must
+ * report that write in the next get dirty log call.
+ */
+unsigned long *host_bmap_track;
+
+void generate_random_array(uint64_t *guest_array, uint64_t size)
+{
+ uint64_t i;
+
+ for (i = 0; i < size; i++) {
+ guest_array[i] = random();
+ }
+}
+
+void *vcpu_worker(void *data)
+{
+ int ret;
+ uint64_t loops, *guest_array, pages_count = 0;
+ struct kvm_vm *vm = data;
+ struct kvm_run *run;
+ struct guest_args args;
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ /* Retrieve the guest random array pointer and cache it */
+ guest_array = addr_gva2hva(vm, (vm_vaddr_t)guest_random_array);
+
+ DEBUG("VCPU starts\n");
+
+ generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+
+ while (!READ_ONCE(host_quit)) {
+ /* Let the guest to dirty these random pages */
+ ret = _vcpu_run(vm, VCPU_ID);
+ guest_args_read(vm, VCPU_ID, &args);
+ if (run->exit_reason == KVM_EXIT_IO &&
+ args.port == GUEST_PORT_SYNC) {
+ pages_count += TEST_PAGES_PER_LOOP;
+ generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+ } else {
+ TEST_ASSERT(false,
+ "Invalid guest sync status: "
+ "exit_reason=%s\n",
+ exit_reason_str(run->exit_reason));
+ }
+ }
+
+ DEBUG("VCPU exits, dirtied %"PRIu64" pages\n", pages_count);
+
+ return NULL;
+}
+
+void vm_dirty_log_verify(unsigned long *bmap, uint64_t iteration)
+{
+ uint64_t page;
+ uint64_t volatile *value_ptr;
+
+ for (page = 0; page < TEST_MEM_PAGES; page++) {
+ value_ptr = host_test_mem + page * getpagesize();
+
+ /* If this is a special page that we were tracking... */
+ if (test_and_clear_bit(page, host_bmap_track)) {
+ host_track_next_count++;
+ TEST_ASSERT(test_bit(page, bmap),
+ "Page %"PRIu64" should have its dirty bit "
+ "set in this iteration but it is missing",
+ page);
+ }
+
+ if (test_bit(page, bmap)) {
+ host_dirty_count++;
+ /*
+ * If the bit is set, the value written onto
+ * the corresponding page should be either the
+ * previous iteration number or the current one.
+ */
+ TEST_ASSERT(*value_ptr == iteration ||
+ *value_ptr == iteration - 1,
+ "Set page %"PRIu64" value %"PRIu64
+ " incorrect (iteration=%"PRIu64")",
+ page, *value_ptr, iteration);
+ } else {
+ host_clear_count++;
+ /*
+ * If cleared, the value written can be any
+ * value smaller or equals to the iteration
+ * number. Note that the value can be exactly
+ * (iteration-1) if that write can happen
+ * like this:
+ *
+ * (1) increase loop count to "iteration-1"
+ * (2) write to page P happens (with value
+ * "iteration-1")
+ * (3) get dirty log for "iteration-1"; we'll
+ * see that page P bit is set (dirtied),
+ * and not set the bit in host_bmap_track
+ * (4) increase loop count to "iteration"
+ * (which is current iteration)
+ * (5) get dirty log for current iteration,
+ * we'll see that page P is cleared, with
+ * value "iteration-1".
+ */
+ TEST_ASSERT(*value_ptr <= iteration,
+ "Clear page %"PRIu64" value %"PRIu64
+ " incorrect (iteration=%"PRIu64")",
+ page, *value_ptr, iteration);
+ if (*value_ptr == iteration) {
+ /*
+ * This page is _just_ modified; it
+ * should report its dirtyness in the
+ * next run
+ */
+ set_bit(page, host_bmap_track);
+ }
+ }
+ }
+}
+
+void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-i iterations] [-I interval] [-h]\n", name);
+ puts("");
+ printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+ TEST_HOST_LOOP_N);
+ printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
+ TEST_HOST_LOOP_INTERVAL);
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t vcpu_thread;
+ struct kvm_vm *vm;
+ uint64_t volatile *psize, *iteration;
+ unsigned long *bmap, iterations = TEST_HOST_LOOP_N,
+ interval = TEST_HOST_LOOP_INTERVAL;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hi:I:")) != -1) {
+ switch (opt) {
+ case 'i':
+ iterations = strtol(optarg, NULL, 10);
+ break;
+ case 'I':
+ interval = strtol(optarg, NULL, 10);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ TEST_ASSERT(iterations > 2, "Iteration must be bigger than zero\n");
+ TEST_ASSERT(interval > 0, "Interval must be bigger than zero");
+
+ DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
+ iterations, interval);
+
+ srandom(time(0));
+
+ bmap = bitmap_alloc(TEST_MEM_PAGES);
+ host_bmap_track = bitmap_alloc(TEST_MEM_PAGES);
+
+ vm = vm_create_default(VCPU_ID, TEST_MEM_PAGES, guest_code);
+
+ /* Add an extra memory slot for testing dirty logging */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ TEST_MEM_OFFSET,
+ TEST_MEM_SLOT_INDEX,
+ TEST_MEM_PAGES,
+ KVM_MEM_LOG_DIRTY_PAGES);
+ /* Cache the HVA pointer of the region */
+ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)TEST_MEM_OFFSET);
+
+ /* Do 1:1 mapping for the dirty track memory slot */
+ virt_map(vm, TEST_MEM_OFFSET, TEST_MEM_OFFSET,
+ TEST_MEM_PAGES * getpagesize(), 0);
+
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ /* Tell the guest about the page size on the system */
+ psize = addr_gva2hva(vm, (vm_vaddr_t)&guest_page_size);
+ *psize = getpagesize();
+
+ /* Start the iterations */
+ iteration = addr_gva2hva(vm, (vm_vaddr_t)&guest_iteration);
+ *iteration = 1;
+
+ /* Start dirtying pages */
+ pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
+
+ while (*iteration < iterations) {
+ /* Give the vcpu thread some time to dirty some pages */
+ usleep(interval * 1000);
+ kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+ vm_dirty_log_verify(bmap, *iteration);
+ (*iteration)++;
+ }
+
+ /* Tell the vcpu thread to quit */
+ host_quit = true;
+ pthread_join(vcpu_thread, NULL);
+
+ DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
+ "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
+ host_track_next_count);
+
+ free(bmap);
+ free(host_bmap_track);
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
new file mode 100644
index 000000000..3acf9a917
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -0,0 +1,194 @@
+/*
+ * tools/testing/selftests/kvm/include/kvm_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+#ifndef SELFTEST_KVM_UTIL_H
+#define SELFTEST_KVM_UTIL_H 1
+
+#include "test_util.h"
+
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+#include <sys/ioctl.h>
+
+#include "sparsebit.h"
+
+/*
+ * Memslots can't cover the gfn starting at this gpa otherwise vCPUs can't be
+ * created. Only applies to VMs using EPT.
+ */
+#define KVM_DEFAULT_IDENTITY_MAP_ADDRESS 0xfffbc000ul
+
+
+/* Callers of kvm_util only have an incomplete/opaque description of the
+ * structure kvm_util is using to maintain the state of a VM.
+ */
+struct kvm_vm;
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR 0x2000
+
+#define DEFAULT_GUEST_PHY_PAGES 512
+#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
+#define DEFAULT_STACK_PGS 5
+
+enum vm_guest_mode {
+ VM_MODE_FLAT48PG,
+};
+
+enum vm_mem_backing_src_type {
+ VM_MEM_SRC_ANONYMOUS,
+ VM_MEM_SRC_ANONYMOUS_THP,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB,
+};
+
+int kvm_check_cap(long cap);
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
+
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
+void kvm_vm_free(struct kvm_vm *vmp);
+void kvm_vm_restart(struct kvm_vm *vmp, int perm);
+void kvm_vm_release(struct kvm_vm *vmp);
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
+
+int kvm_memcmp_hva_gva(void *hva,
+ struct kvm_vm *vm, const vm_vaddr_t gva, size_t len);
+
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+ uint32_t data_memslot, uint32_t pgd_memslot);
+
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void vcpu_dump(FILE *stream, struct kvm_vm *vm,
+ uint32_t vcpuid, uint8_t indent);
+
+void vm_create_irqchip(struct kvm_vm *vm);
+
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags);
+
+void vcpu_ioctl(struct kvm_vm *vm,
+ uint32_t vcpuid, unsigned long ioctl, void *arg);
+void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ uint32_t data_memslot, uint32_t pgd_memslot);
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ size_t size, uint32_t pgd_memslot);
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_mp_state *mp_state);
+void vcpu_regs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_regs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs);
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
+void vcpu_sregs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs);
+int _vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs);
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events);
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value);
+
+const char *exit_reason_str(unsigned int exit_reason);
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint32_t pgd_memslot);
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
+ vm_paddr_t paddr_min, uint32_t memslot);
+
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+void vcpu_set_cpuid(
+ struct kvm_vm *vm, uint32_t vcpuid, struct kvm_cpuid2 *cpuid);
+
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
+
+static inline struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_entry(uint32_t function)
+{
+ return kvm_get_supported_cpuid_index(function, 0);
+}
+
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size,
+ void *guest_code);
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
+
+typedef void (*vmx_guest_code_t)(vm_vaddr_t vmxon_vaddr,
+ vm_paddr_t vmxon_paddr,
+ vm_vaddr_t vmcs_vaddr,
+ vm_paddr_t vmcs_paddr);
+
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+ uint64_t end);
+
+struct kvm_dirty_log *
+allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
+
+int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
+
+#define GUEST_PORT_SYNC 0x1000
+#define GUEST_PORT_ABORT 0x1001
+#define GUEST_PORT_DONE 0x1002
+
+static inline void __exit_to_l0(uint16_t port, uint64_t arg0, uint64_t arg1)
+{
+ __asm__ __volatile__("in %[port], %%al"
+ :
+ : [port]"d"(port), "D"(arg0), "S"(arg1)
+ : "rax");
+}
+
+/*
+ * Allows to pass three arguments to the host: port is 16bit wide,
+ * arg0 & arg1 are 64bit wide
+ */
+#define GUEST_SYNC_ARGS(_port, _arg0, _arg1) \
+ __exit_to_l0(_port, (uint64_t) (_arg0), (uint64_t) (_arg1))
+
+#define GUEST_ASSERT(_condition) do { \
+ if (!(_condition)) \
+ GUEST_SYNC_ARGS(GUEST_PORT_ABORT, \
+ "Failed guest assert: " \
+ #_condition, __LINE__); \
+ } while (0)
+
+#define GUEST_SYNC(stage) GUEST_SYNC_ARGS(GUEST_PORT_SYNC, "hello", stage)
+
+#define GUEST_DONE() GUEST_SYNC_ARGS(GUEST_PORT_DONE, 0, 0)
+
+struct guest_args {
+ uint64_t arg0;
+ uint64_t arg1;
+ uint16_t port;
+} __attribute__ ((packed));
+
+void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct guest_args *args);
+
+#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
new file mode 100644
index 000000000..54cfeb656
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -0,0 +1,75 @@
+/*
+ * tools/testing/selftests/kvm/include/sparsebit.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * Header file that describes API to the sparsebit library.
+ * This library provides a memory efficient means of storing
+ * the settings of bits indexed via a uint64_t. Memory usage
+ * is reasonable, significantly less than (2^64 / 8) bytes, as
+ * long as bits that are mostly set or mostly cleared are close
+ * to each other. This library is efficient in memory usage
+ * even in the case where most bits are set.
+ */
+
+#ifndef _TEST_SPARSEBIT_H_
+#define _TEST_SPARSEBIT_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct sparsebit;
+typedef uint64_t sparsebit_idx_t;
+typedef uint64_t sparsebit_num_t;
+
+struct sparsebit *sparsebit_alloc(void);
+void sparsebit_free(struct sparsebit **sbitp);
+void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src);
+
+bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_set_num(struct sparsebit *sbit,
+ sparsebit_idx_t idx, sparsebit_num_t num);
+bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_clear_num(struct sparsebit *sbit,
+ sparsebit_idx_t idx, sparsebit_num_t num);
+sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit);
+bool sparsebit_any_set(struct sparsebit *sbit);
+bool sparsebit_any_clear(struct sparsebit *sbit);
+bool sparsebit_all_set(struct sparsebit *sbit);
+bool sparsebit_all_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit,
+ sparsebit_idx_t start, sparsebit_num_t num);
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit,
+ sparsebit_idx_t start, sparsebit_num_t num);
+
+void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_set_num(struct sparsebit *sbitp, sparsebit_idx_t start,
+ sparsebit_num_t num);
+void sparsebit_set_all(struct sparsebit *sbitp);
+
+void sparsebit_clear(struct sparsebit *sbitp, sparsebit_idx_t idx);
+void sparsebit_clear_num(struct sparsebit *sbitp,
+ sparsebit_idx_t start, sparsebit_num_t num);
+void sparsebit_clear_all(struct sparsebit *sbitp);
+
+void sparsebit_dump(FILE *stream, struct sparsebit *sbit,
+ unsigned int indent);
+void sparsebit_validate_internal(struct sparsebit *sbit);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _TEST_SPARSEBIT_H_ */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
new file mode 100644
index 000000000..73c393343
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -0,0 +1,44 @@
+/*
+ * tools/testing/selftests/kvm/include/test_util.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef TEST_UTIL_H
+#define TEST_UTIL_H 1
+
+#include <stdlib.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include "kselftest.h"
+
+ssize_t test_write(int fd, const void *buf, size_t count);
+ssize_t test_read(int fd, void *buf, size_t count);
+int test_seq_read(const char *path, char **bufp, size_t *sizep);
+
+void test_assert(bool exp, const char *exp_str,
+ const char *file, unsigned int line, const char *fmt, ...);
+
+#define TEST_ASSERT(e, fmt, ...) \
+ test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+#define ASSERT_EQ(a, b) do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ TEST_ASSERT(__a == __b, \
+ "ASSERT_EQ(%s, %s) failed.\n" \
+ "\t%s is %#lx\n" \
+ "\t%s is %#lx", \
+ #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+} while (0)
+
+#endif /* TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/vmx.h b/tools/testing/selftests/kvm/include/vmx.h
new file mode 100644
index 000000000..b9ffe1024
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/vmx.h
@@ -0,0 +1,552 @@
+/*
+ * tools/testing/selftests/kvm/include/vmx.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef SELFTEST_KVM_VMX_H
+#define SELFTEST_KVM_VMX_H
+
+#include <stdint.h>
+#include "x86.h"
+
+#define CPUID_VMX_BIT 5
+
+#define CPUID_VMX (1 << 5)
+
+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
+#define CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004
+#define CPU_BASED_USE_TSC_OFFSETING 0x00000008
+#define CPU_BASED_HLT_EXITING 0x00000080
+#define CPU_BASED_INVLPG_EXITING 0x00000200
+#define CPU_BASED_MWAIT_EXITING 0x00000400
+#define CPU_BASED_RDPMC_EXITING 0x00000800
+#define CPU_BASED_RDTSC_EXITING 0x00001000
+#define CPU_BASED_CR3_LOAD_EXITING 0x00008000
+#define CPU_BASED_CR3_STORE_EXITING 0x00010000
+#define CPU_BASED_CR8_LOAD_EXITING 0x00080000
+#define CPU_BASED_CR8_STORE_EXITING 0x00100000
+#define CPU_BASED_TPR_SHADOW 0x00200000
+#define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000
+#define CPU_BASED_MOV_DR_EXITING 0x00800000
+#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
+#define CPU_BASED_USE_IO_BITMAPS 0x02000000
+#define CPU_BASED_MONITOR_TRAP 0x08000000
+#define CPU_BASED_USE_MSR_BITMAPS 0x10000000
+#define CPU_BASED_MONITOR_EXITING 0x20000000
+#define CPU_BASED_PAUSE_EXITING 0x40000000
+#define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000
+
+#define CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x0401e172
+
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
+#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_DESC 0x00000004
+#define SECONDARY_EXEC_RDTSCP 0x00000008
+#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
+#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
+#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
+#define SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080
+#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
+#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
+#define SECONDARY_EXEC_RDRAND_EXITING 0x00000800
+#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
+#define SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000
+#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
+#define SECONDARY_EXEC_RDSEED_EXITING 0x00010000
+#define SECONDARY_EXEC_ENABLE_PML 0x00020000
+#define SECONDARY_EPT_VE 0x00040000
+#define SECONDARY_ENABLE_XSAV_RESTORE 0x00100000
+#define SECONDARY_EXEC_TSC_SCALING 0x02000000
+
+#define PIN_BASED_EXT_INTR_MASK 0x00000001
+#define PIN_BASED_NMI_EXITING 0x00000008
+#define PIN_BASED_VIRTUAL_NMIS 0x00000020
+#define PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040
+#define PIN_BASED_POSTED_INTR 0x00000080
+
+#define PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR 0x00000016
+
+#define VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004
+#define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200
+#define VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000
+#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
+#define VM_EXIT_SAVE_IA32_PAT 0x00040000
+#define VM_EXIT_LOAD_IA32_PAT 0x00080000
+#define VM_EXIT_SAVE_IA32_EFER 0x00100000
+#define VM_EXIT_LOAD_IA32_EFER 0x00200000
+#define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000
+
+#define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff
+
+#define VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004
+#define VM_ENTRY_IA32E_MODE 0x00000200
+#define VM_ENTRY_SMM 0x00000400
+#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
+#define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000
+#define VM_ENTRY_LOAD_IA32_PAT 0x00004000
+#define VM_ENTRY_LOAD_IA32_EFER 0x00008000
+
+#define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff
+
+#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
+#define VMX_MISC_SAVE_EFER_LMA 0x00000020
+
+#define EXIT_REASON_FAILED_VMENTRY 0x80000000
+#define EXIT_REASON_EXCEPTION_NMI 0
+#define EXIT_REASON_EXTERNAL_INTERRUPT 1
+#define EXIT_REASON_TRIPLE_FAULT 2
+#define EXIT_REASON_PENDING_INTERRUPT 7
+#define EXIT_REASON_NMI_WINDOW 8
+#define EXIT_REASON_TASK_SWITCH 9
+#define EXIT_REASON_CPUID 10
+#define EXIT_REASON_HLT 12
+#define EXIT_REASON_INVD 13
+#define EXIT_REASON_INVLPG 14
+#define EXIT_REASON_RDPMC 15
+#define EXIT_REASON_RDTSC 16
+#define EXIT_REASON_VMCALL 18
+#define EXIT_REASON_VMCLEAR 19
+#define EXIT_REASON_VMLAUNCH 20
+#define EXIT_REASON_VMPTRLD 21
+#define EXIT_REASON_VMPTRST 22
+#define EXIT_REASON_VMREAD 23
+#define EXIT_REASON_VMRESUME 24
+#define EXIT_REASON_VMWRITE 25
+#define EXIT_REASON_VMOFF 26
+#define EXIT_REASON_VMON 27
+#define EXIT_REASON_CR_ACCESS 28
+#define EXIT_REASON_DR_ACCESS 29
+#define EXIT_REASON_IO_INSTRUCTION 30
+#define EXIT_REASON_MSR_READ 31
+#define EXIT_REASON_MSR_WRITE 32
+#define EXIT_REASON_INVALID_STATE 33
+#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION 40
+#define EXIT_REASON_MCE_DURING_VMENTRY 41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_EOI_INDUCED 45
+#define EXIT_REASON_EPT_VIOLATION 48
+#define EXIT_REASON_EPT_MISCONFIG 49
+#define EXIT_REASON_INVEPT 50
+#define EXIT_REASON_RDTSCP 51
+#define EXIT_REASON_PREEMPTION_TIMER 52
+#define EXIT_REASON_INVVPID 53
+#define EXIT_REASON_WBINVD 54
+#define EXIT_REASON_XSETBV 55
+#define EXIT_REASON_APIC_WRITE 56
+#define EXIT_REASON_INVPCID 58
+#define EXIT_REASON_PML_FULL 62
+#define EXIT_REASON_XSAVES 63
+#define EXIT_REASON_XRSTORS 64
+#define LAST_EXIT_REASON 64
+
+enum vmcs_field {
+ VIRTUAL_PROCESSOR_ID = 0x00000000,
+ POSTED_INTR_NV = 0x00000002,
+ GUEST_ES_SELECTOR = 0x00000800,
+ GUEST_CS_SELECTOR = 0x00000802,
+ GUEST_SS_SELECTOR = 0x00000804,
+ GUEST_DS_SELECTOR = 0x00000806,
+ GUEST_FS_SELECTOR = 0x00000808,
+ GUEST_GS_SELECTOR = 0x0000080a,
+ GUEST_LDTR_SELECTOR = 0x0000080c,
+ GUEST_TR_SELECTOR = 0x0000080e,
+ GUEST_INTR_STATUS = 0x00000810,
+ GUEST_PML_INDEX = 0x00000812,
+ HOST_ES_SELECTOR = 0x00000c00,
+ HOST_CS_SELECTOR = 0x00000c02,
+ HOST_SS_SELECTOR = 0x00000c04,
+ HOST_DS_SELECTOR = 0x00000c06,
+ HOST_FS_SELECTOR = 0x00000c08,
+ HOST_GS_SELECTOR = 0x00000c0a,
+ HOST_TR_SELECTOR = 0x00000c0c,
+ IO_BITMAP_A = 0x00002000,
+ IO_BITMAP_A_HIGH = 0x00002001,
+ IO_BITMAP_B = 0x00002002,
+ IO_BITMAP_B_HIGH = 0x00002003,
+ MSR_BITMAP = 0x00002004,
+ MSR_BITMAP_HIGH = 0x00002005,
+ VM_EXIT_MSR_STORE_ADDR = 0x00002006,
+ VM_EXIT_MSR_STORE_ADDR_HIGH = 0x00002007,
+ VM_EXIT_MSR_LOAD_ADDR = 0x00002008,
+ VM_EXIT_MSR_LOAD_ADDR_HIGH = 0x00002009,
+ VM_ENTRY_MSR_LOAD_ADDR = 0x0000200a,
+ VM_ENTRY_MSR_LOAD_ADDR_HIGH = 0x0000200b,
+ PML_ADDRESS = 0x0000200e,
+ PML_ADDRESS_HIGH = 0x0000200f,
+ TSC_OFFSET = 0x00002010,
+ TSC_OFFSET_HIGH = 0x00002011,
+ VIRTUAL_APIC_PAGE_ADDR = 0x00002012,
+ VIRTUAL_APIC_PAGE_ADDR_HIGH = 0x00002013,
+ APIC_ACCESS_ADDR = 0x00002014,
+ APIC_ACCESS_ADDR_HIGH = 0x00002015,
+ POSTED_INTR_DESC_ADDR = 0x00002016,
+ POSTED_INTR_DESC_ADDR_HIGH = 0x00002017,
+ EPT_POINTER = 0x0000201a,
+ EPT_POINTER_HIGH = 0x0000201b,
+ EOI_EXIT_BITMAP0 = 0x0000201c,
+ EOI_EXIT_BITMAP0_HIGH = 0x0000201d,
+ EOI_EXIT_BITMAP1 = 0x0000201e,
+ EOI_EXIT_BITMAP1_HIGH = 0x0000201f,
+ EOI_EXIT_BITMAP2 = 0x00002020,
+ EOI_EXIT_BITMAP2_HIGH = 0x00002021,
+ EOI_EXIT_BITMAP3 = 0x00002022,
+ EOI_EXIT_BITMAP3_HIGH = 0x00002023,
+ VMREAD_BITMAP = 0x00002026,
+ VMREAD_BITMAP_HIGH = 0x00002027,
+ VMWRITE_BITMAP = 0x00002028,
+ VMWRITE_BITMAP_HIGH = 0x00002029,
+ XSS_EXIT_BITMAP = 0x0000202C,
+ XSS_EXIT_BITMAP_HIGH = 0x0000202D,
+ TSC_MULTIPLIER = 0x00002032,
+ TSC_MULTIPLIER_HIGH = 0x00002033,
+ GUEST_PHYSICAL_ADDRESS = 0x00002400,
+ GUEST_PHYSICAL_ADDRESS_HIGH = 0x00002401,
+ VMCS_LINK_POINTER = 0x00002800,
+ VMCS_LINK_POINTER_HIGH = 0x00002801,
+ GUEST_IA32_DEBUGCTL = 0x00002802,
+ GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
+ GUEST_IA32_PAT = 0x00002804,
+ GUEST_IA32_PAT_HIGH = 0x00002805,
+ GUEST_IA32_EFER = 0x00002806,
+ GUEST_IA32_EFER_HIGH = 0x00002807,
+ GUEST_IA32_PERF_GLOBAL_CTRL = 0x00002808,
+ GUEST_IA32_PERF_GLOBAL_CTRL_HIGH= 0x00002809,
+ GUEST_PDPTR0 = 0x0000280a,
+ GUEST_PDPTR0_HIGH = 0x0000280b,
+ GUEST_PDPTR1 = 0x0000280c,
+ GUEST_PDPTR1_HIGH = 0x0000280d,
+ GUEST_PDPTR2 = 0x0000280e,
+ GUEST_PDPTR2_HIGH = 0x0000280f,
+ GUEST_PDPTR3 = 0x00002810,
+ GUEST_PDPTR3_HIGH = 0x00002811,
+ GUEST_BNDCFGS = 0x00002812,
+ GUEST_BNDCFGS_HIGH = 0x00002813,
+ HOST_IA32_PAT = 0x00002c00,
+ HOST_IA32_PAT_HIGH = 0x00002c01,
+ HOST_IA32_EFER = 0x00002c02,
+ HOST_IA32_EFER_HIGH = 0x00002c03,
+ HOST_IA32_PERF_GLOBAL_CTRL = 0x00002c04,
+ HOST_IA32_PERF_GLOBAL_CTRL_HIGH = 0x00002c05,
+ PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
+ CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
+ EXCEPTION_BITMAP = 0x00004004,
+ PAGE_FAULT_ERROR_CODE_MASK = 0x00004006,
+ PAGE_FAULT_ERROR_CODE_MATCH = 0x00004008,
+ CR3_TARGET_COUNT = 0x0000400a,
+ VM_EXIT_CONTROLS = 0x0000400c,
+ VM_EXIT_MSR_STORE_COUNT = 0x0000400e,
+ VM_EXIT_MSR_LOAD_COUNT = 0x00004010,
+ VM_ENTRY_CONTROLS = 0x00004012,
+ VM_ENTRY_MSR_LOAD_COUNT = 0x00004014,
+ VM_ENTRY_INTR_INFO_FIELD = 0x00004016,
+ VM_ENTRY_EXCEPTION_ERROR_CODE = 0x00004018,
+ VM_ENTRY_INSTRUCTION_LEN = 0x0000401a,
+ TPR_THRESHOLD = 0x0000401c,
+ SECONDARY_VM_EXEC_CONTROL = 0x0000401e,
+ PLE_GAP = 0x00004020,
+ PLE_WINDOW = 0x00004022,
+ VM_INSTRUCTION_ERROR = 0x00004400,
+ VM_EXIT_REASON = 0x00004402,
+ VM_EXIT_INTR_INFO = 0x00004404,
+ VM_EXIT_INTR_ERROR_CODE = 0x00004406,
+ IDT_VECTORING_INFO_FIELD = 0x00004408,
+ IDT_VECTORING_ERROR_CODE = 0x0000440a,
+ VM_EXIT_INSTRUCTION_LEN = 0x0000440c,
+ VMX_INSTRUCTION_INFO = 0x0000440e,
+ GUEST_ES_LIMIT = 0x00004800,
+ GUEST_CS_LIMIT = 0x00004802,
+ GUEST_SS_LIMIT = 0x00004804,
+ GUEST_DS_LIMIT = 0x00004806,
+ GUEST_FS_LIMIT = 0x00004808,
+ GUEST_GS_LIMIT = 0x0000480a,
+ GUEST_LDTR_LIMIT = 0x0000480c,
+ GUEST_TR_LIMIT = 0x0000480e,
+ GUEST_GDTR_LIMIT = 0x00004810,
+ GUEST_IDTR_LIMIT = 0x00004812,
+ GUEST_ES_AR_BYTES = 0x00004814,
+ GUEST_CS_AR_BYTES = 0x00004816,
+ GUEST_SS_AR_BYTES = 0x00004818,
+ GUEST_DS_AR_BYTES = 0x0000481a,
+ GUEST_FS_AR_BYTES = 0x0000481c,
+ GUEST_GS_AR_BYTES = 0x0000481e,
+ GUEST_LDTR_AR_BYTES = 0x00004820,
+ GUEST_TR_AR_BYTES = 0x00004822,
+ GUEST_INTERRUPTIBILITY_INFO = 0x00004824,
+ GUEST_ACTIVITY_STATE = 0X00004826,
+ GUEST_SYSENTER_CS = 0x0000482A,
+ VMX_PREEMPTION_TIMER_VALUE = 0x0000482E,
+ HOST_IA32_SYSENTER_CS = 0x00004c00,
+ CR0_GUEST_HOST_MASK = 0x00006000,
+ CR4_GUEST_HOST_MASK = 0x00006002,
+ CR0_READ_SHADOW = 0x00006004,
+ CR4_READ_SHADOW = 0x00006006,
+ CR3_TARGET_VALUE0 = 0x00006008,
+ CR3_TARGET_VALUE1 = 0x0000600a,
+ CR3_TARGET_VALUE2 = 0x0000600c,
+ CR3_TARGET_VALUE3 = 0x0000600e,
+ EXIT_QUALIFICATION = 0x00006400,
+ GUEST_LINEAR_ADDRESS = 0x0000640a,
+ GUEST_CR0 = 0x00006800,
+ GUEST_CR3 = 0x00006802,
+ GUEST_CR4 = 0x00006804,
+ GUEST_ES_BASE = 0x00006806,
+ GUEST_CS_BASE = 0x00006808,
+ GUEST_SS_BASE = 0x0000680a,
+ GUEST_DS_BASE = 0x0000680c,
+ GUEST_FS_BASE = 0x0000680e,
+ GUEST_GS_BASE = 0x00006810,
+ GUEST_LDTR_BASE = 0x00006812,
+ GUEST_TR_BASE = 0x00006814,
+ GUEST_GDTR_BASE = 0x00006816,
+ GUEST_IDTR_BASE = 0x00006818,
+ GUEST_DR7 = 0x0000681a,
+ GUEST_RSP = 0x0000681c,
+ GUEST_RIP = 0x0000681e,
+ GUEST_RFLAGS = 0x00006820,
+ GUEST_PENDING_DBG_EXCEPTIONS = 0x00006822,
+ GUEST_SYSENTER_ESP = 0x00006824,
+ GUEST_SYSENTER_EIP = 0x00006826,
+ HOST_CR0 = 0x00006c00,
+ HOST_CR3 = 0x00006c02,
+ HOST_CR4 = 0x00006c04,
+ HOST_FS_BASE = 0x00006c06,
+ HOST_GS_BASE = 0x00006c08,
+ HOST_TR_BASE = 0x00006c0a,
+ HOST_GDTR_BASE = 0x00006c0c,
+ HOST_IDTR_BASE = 0x00006c0e,
+ HOST_IA32_SYSENTER_ESP = 0x00006c10,
+ HOST_IA32_SYSENTER_EIP = 0x00006c12,
+ HOST_RSP = 0x00006c14,
+ HOST_RIP = 0x00006c16,
+};
+
+struct vmx_msr_entry {
+ uint32_t index;
+ uint32_t reserved;
+ uint64_t value;
+} __attribute__ ((aligned(16)));
+
+static inline int vmxon(uint64_t phys)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmxon %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(phys)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline void vmxoff(void)
+{
+ __asm__ __volatile__("vmxoff");
+}
+
+static inline int vmclear(uint64_t vmcs_pa)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmclear %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(vmcs_pa)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int vmptrld(uint64_t vmcs_pa)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmptrld %[pa]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [pa]"m"(vmcs_pa)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline int vmptrst(uint64_t *value)
+{
+ uint64_t tmp;
+ uint8_t ret;
+
+ __asm__ __volatile__("vmptrst %[value]; setna %[ret]"
+ : [value]"=m"(tmp), [ret]"=rm"(ret)
+ : : "cc", "memory");
+
+ *value = tmp;
+ return ret;
+}
+
+/*
+ * A wrapper around vmptrst that ignores errors and returns zero if the
+ * vmptrst instruction fails.
+ */
+static inline uint64_t vmptrstz(void)
+{
+ uint64_t value = 0;
+ vmptrst(&value);
+ return value;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmlaunch.
+ */
+static inline int vmlaunch(void)
+{
+ int ret;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "vmwrite %%rsp, %[host_rsp];"
+ "lea 1f(%%rip), %%rax;"
+ "vmwrite %%rax, %[host_rip];"
+ "vmlaunch;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"((uint64_t)HOST_RSP),
+ [host_rip]"r"((uint64_t)HOST_RIP)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+/*
+ * No guest state (e.g. GPRs) is established by this vmresume.
+ */
+static inline int vmresume(void)
+{
+ int ret;
+
+ __asm__ __volatile__("push %%rbp;"
+ "push %%rcx;"
+ "push %%rdx;"
+ "push %%rsi;"
+ "push %%rdi;"
+ "push $0;"
+ "vmwrite %%rsp, %[host_rsp];"
+ "lea 1f(%%rip), %%rax;"
+ "vmwrite %%rax, %[host_rip];"
+ "vmresume;"
+ "incq (%%rsp);"
+ "1: pop %%rax;"
+ "pop %%rdi;"
+ "pop %%rsi;"
+ "pop %%rdx;"
+ "pop %%rcx;"
+ "pop %%rbp;"
+ : [ret]"=&a"(ret)
+ : [host_rsp]"r"((uint64_t)HOST_RSP),
+ [host_rip]"r"((uint64_t)HOST_RIP)
+ : "memory", "cc", "rbx", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15");
+ return ret;
+}
+
+static inline void vmcall(void)
+{
+ /* Currently, L1 destroys our GPRs during vmexits. */
+ __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : :
+ "rax", "rbx", "rcx", "rdx",
+ "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
+ "r13", "r14", "r15");
+}
+
+static inline int vmread(uint64_t encoding, uint64_t *value)
+{
+ uint64_t tmp;
+ uint8_t ret;
+
+ __asm__ __volatile__("vmread %[encoding], %[value]; setna %[ret]"
+ : [value]"=rm"(tmp), [ret]"=rm"(ret)
+ : [encoding]"r"(encoding)
+ : "cc", "memory");
+
+ *value = tmp;
+ return ret;
+}
+
+/*
+ * A wrapper around vmread that ignores errors and returns zero if the
+ * vmread instruction fails.
+ */
+static inline uint64_t vmreadz(uint64_t encoding)
+{
+ uint64_t value = 0;
+ vmread(encoding, &value);
+ return value;
+}
+
+static inline int vmwrite(uint64_t encoding, uint64_t value)
+{
+ uint8_t ret;
+
+ __asm__ __volatile__ ("vmwrite %[value], %[encoding]; setna %[ret]"
+ : [ret]"=rm"(ret)
+ : [value]"rm"(value), [encoding]"r"(encoding)
+ : "cc", "memory");
+
+ return ret;
+}
+
+static inline uint32_t vmcs_revision(void)
+{
+ return rdmsr(MSR_IA32_VMX_BASIC);
+}
+
+struct vmx_pages {
+ void *vmxon_hva;
+ uint64_t vmxon_gpa;
+ void *vmxon;
+
+ void *vmcs_hva;
+ uint64_t vmcs_gpa;
+ void *vmcs;
+
+ void *msr_hva;
+ uint64_t msr_gpa;
+ void *msr;
+
+ void *shadow_vmcs_hva;
+ uint64_t shadow_vmcs_gpa;
+ void *shadow_vmcs;
+
+ void *vmread_hva;
+ uint64_t vmread_gpa;
+ void *vmread;
+
+ void *vmwrite_hva;
+ uint64_t vmwrite_gpa;
+ void *vmwrite;
+};
+
+struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
+bool prepare_for_vmx_operation(struct vmx_pages *vmx);
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
+
+#endif /* !SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/include/x86.h b/tools/testing/selftests/kvm/include/x86.h
new file mode 100644
index 000000000..a7667a613
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86.h
@@ -0,0 +1,1047 @@
+/*
+ * tools/testing/selftests/kvm/include/x86.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ */
+
+#ifndef SELFTEST_KVM_X86_H
+#define SELFTEST_KVM_X86_H
+
+#include <assert.h>
+#include <stdint.h>
+
+#define X86_EFLAGS_FIXED (1u << 1)
+
+#define X86_CR4_VME (1ul << 0)
+#define X86_CR4_PVI (1ul << 1)
+#define X86_CR4_TSD (1ul << 2)
+#define X86_CR4_DE (1ul << 3)
+#define X86_CR4_PSE (1ul << 4)
+#define X86_CR4_PAE (1ul << 5)
+#define X86_CR4_MCE (1ul << 6)
+#define X86_CR4_PGE (1ul << 7)
+#define X86_CR4_PCE (1ul << 8)
+#define X86_CR4_OSFXSR (1ul << 9)
+#define X86_CR4_OSXMMEXCPT (1ul << 10)
+#define X86_CR4_UMIP (1ul << 11)
+#define X86_CR4_VMXE (1ul << 13)
+#define X86_CR4_SMXE (1ul << 14)
+#define X86_CR4_FSGSBASE (1ul << 16)
+#define X86_CR4_PCIDE (1ul << 17)
+#define X86_CR4_OSXSAVE (1ul << 18)
+#define X86_CR4_SMEP (1ul << 20)
+#define X86_CR4_SMAP (1ul << 21)
+#define X86_CR4_PKE (1ul << 22)
+
+/* The enum values match the intruction encoding of each register */
+enum x86_register {
+ RAX = 0,
+ RCX,
+ RDX,
+ RBX,
+ RSP,
+ RBP,
+ RSI,
+ RDI,
+ R8,
+ R9,
+ R10,
+ R11,
+ R12,
+ R13,
+ R14,
+ R15,
+};
+
+struct desc64 {
+ uint16_t limit0;
+ uint16_t base0;
+ unsigned base1:8, type:4, s:1, dpl:2, p:1;
+ unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
+ uint32_t base3;
+ uint32_t zero1;
+} __attribute__((packed));
+
+struct desc_ptr {
+ uint16_t size;
+ uint64_t address;
+} __attribute__((packed));
+
+static inline uint64_t get_desc64_base(const struct desc64 *desc)
+{
+ return ((uint64_t)desc->base3 << 32) |
+ (desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline uint64_t rdtsc(void)
+{
+ uint32_t eax, edx;
+
+ /*
+ * The lfence is to wait (on Intel CPUs) until all previous
+ * instructions have been executed.
+ */
+ __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
+ return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdtscp(uint32_t *aux)
+{
+ uint32_t eax, edx;
+
+ __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
+ return ((uint64_t)edx) << 32 | eax;
+}
+
+static inline uint64_t rdmsr(uint32_t msr)
+{
+ uint32_t a, d;
+
+ __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+ return a | ((uint64_t) d << 32);
+}
+
+static inline void wrmsr(uint32_t msr, uint64_t value)
+{
+ uint32_t a = value;
+ uint32_t d = value >> 32;
+
+ __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+
+static inline uint16_t inw(uint16_t port)
+{
+ uint16_t tmp;
+
+ __asm__ __volatile__("in %%dx, %%ax"
+ : /* output */ "=a" (tmp)
+ : /* input */ "d" (port));
+
+ return tmp;
+}
+
+static inline uint16_t get_es(void)
+{
+ uint16_t es;
+
+ __asm__ __volatile__("mov %%es, %[es]"
+ : /* output */ [es]"=rm"(es));
+ return es;
+}
+
+static inline uint16_t get_cs(void)
+{
+ uint16_t cs;
+
+ __asm__ __volatile__("mov %%cs, %[cs]"
+ : /* output */ [cs]"=rm"(cs));
+ return cs;
+}
+
+static inline uint16_t get_ss(void)
+{
+ uint16_t ss;
+
+ __asm__ __volatile__("mov %%ss, %[ss]"
+ : /* output */ [ss]"=rm"(ss));
+ return ss;
+}
+
+static inline uint16_t get_ds(void)
+{
+ uint16_t ds;
+
+ __asm__ __volatile__("mov %%ds, %[ds]"
+ : /* output */ [ds]"=rm"(ds));
+ return ds;
+}
+
+static inline uint16_t get_fs(void)
+{
+ uint16_t fs;
+
+ __asm__ __volatile__("mov %%fs, %[fs]"
+ : /* output */ [fs]"=rm"(fs));
+ return fs;
+}
+
+static inline uint16_t get_gs(void)
+{
+ uint16_t gs;
+
+ __asm__ __volatile__("mov %%gs, %[gs]"
+ : /* output */ [gs]"=rm"(gs));
+ return gs;
+}
+
+static inline uint16_t get_tr(void)
+{
+ uint16_t tr;
+
+ __asm__ __volatile__("str %[tr]"
+ : /* output */ [tr]"=rm"(tr));
+ return tr;
+}
+
+static inline uint64_t get_cr0(void)
+{
+ uint64_t cr0;
+
+ __asm__ __volatile__("mov %%cr0, %[cr0]"
+ : /* output */ [cr0]"=r"(cr0));
+ return cr0;
+}
+
+static inline uint64_t get_cr3(void)
+{
+ uint64_t cr3;
+
+ __asm__ __volatile__("mov %%cr3, %[cr3]"
+ : /* output */ [cr3]"=r"(cr3));
+ return cr3;
+}
+
+static inline uint64_t get_cr4(void)
+{
+ uint64_t cr4;
+
+ __asm__ __volatile__("mov %%cr4, %[cr4]"
+ : /* output */ [cr4]"=r"(cr4));
+ return cr4;
+}
+
+static inline void set_cr4(uint64_t val)
+{
+ __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
+}
+
+static inline uint64_t get_gdt_base(void)
+{
+ struct desc_ptr gdt;
+ __asm__ __volatile__("sgdt %[gdt]"
+ : /* output */ [gdt]"=m"(gdt));
+ return gdt.address;
+}
+
+static inline uint64_t get_idt_base(void)
+{
+ struct desc_ptr idt;
+ __asm__ __volatile__("sidt %[idt]"
+ : /* output */ [idt]"=m"(idt));
+ return idt.address;
+}
+
+#define SET_XMM(__var, __xmm) \
+ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
+
+static inline void set_xmm(int n, unsigned long val)
+{
+ switch (n) {
+ case 0:
+ SET_XMM(val, xmm0);
+ break;
+ case 1:
+ SET_XMM(val, xmm1);
+ break;
+ case 2:
+ SET_XMM(val, xmm2);
+ break;
+ case 3:
+ SET_XMM(val, xmm3);
+ break;
+ case 4:
+ SET_XMM(val, xmm4);
+ break;
+ case 5:
+ SET_XMM(val, xmm5);
+ break;
+ case 6:
+ SET_XMM(val, xmm6);
+ break;
+ case 7:
+ SET_XMM(val, xmm7);
+ break;
+ }
+}
+
+typedef unsigned long v1di __attribute__ ((vector_size (8)));
+static inline unsigned long get_xmm(int n)
+{
+ assert(n >= 0 && n <= 7);
+
+ register v1di xmm0 __asm__("%xmm0");
+ register v1di xmm1 __asm__("%xmm1");
+ register v1di xmm2 __asm__("%xmm2");
+ register v1di xmm3 __asm__("%xmm3");
+ register v1di xmm4 __asm__("%xmm4");
+ register v1di xmm5 __asm__("%xmm5");
+ register v1di xmm6 __asm__("%xmm6");
+ register v1di xmm7 __asm__("%xmm7");
+ switch (n) {
+ case 0:
+ return (unsigned long)xmm0;
+ case 1:
+ return (unsigned long)xmm1;
+ case 2:
+ return (unsigned long)xmm2;
+ case 3:
+ return (unsigned long)xmm3;
+ case 4:
+ return (unsigned long)xmm4;
+ case 5:
+ return (unsigned long)xmm5;
+ case 6:
+ return (unsigned long)xmm6;
+ case 7:
+ return (unsigned long)xmm7;
+ }
+ return 0;
+}
+
+struct kvm_x86_state;
+struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state);
+
+/*
+ * Basic CPU control in CR0
+ */
+#define X86_CR0_PE (1UL<<0) /* Protection Enable */
+#define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */
+#define X86_CR0_EM (1UL<<2) /* Emulation */
+#define X86_CR0_TS (1UL<<3) /* Task Switched */
+#define X86_CR0_ET (1UL<<4) /* Extension Type */
+#define X86_CR0_NE (1UL<<5) /* Numeric Error */
+#define X86_CR0_WP (1UL<<16) /* Write Protect */
+#define X86_CR0_AM (1UL<<18) /* Alignment Mask */
+#define X86_CR0_NW (1UL<<29) /* Not Write-through */
+#define X86_CR0_CD (1UL<<30) /* Cache Disable */
+#define X86_CR0_PG (1UL<<31) /* Paging */
+
+/*
+ * CPU model specific register (MSR) numbers.
+ */
+
+/* x86-64 specific MSRs */
+#define MSR_EFER 0xc0000080 /* extended feature register */
+#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */
+#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */
+#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */
+#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */
+#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */
+#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */
+#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */
+#define MSR_TSC_AUX 0xc0000103 /* Auxiliary TSC */
+
+/* EFER bits: */
+#define EFER_SCE (1<<0) /* SYSCALL/SYSRET */
+#define EFER_LME (1<<8) /* Long mode enable */
+#define EFER_LMA (1<<10) /* Long mode active (read-only) */
+#define EFER_NX (1<<11) /* No execute enable */
+#define EFER_SVME (1<<12) /* Enable virtualization */
+#define EFER_LMSLE (1<<13) /* Long Mode Segment Limit Enable */
+#define EFER_FFXSR (1<<14) /* Enable Fast FXSAVE/FXRSTOR */
+
+/* Intel MSRs. Some also available on other CPUs */
+
+#define MSR_PPIN_CTL 0x0000004e
+#define MSR_PPIN 0x0000004f
+
+#define MSR_IA32_PERFCTR0 0x000000c1
+#define MSR_IA32_PERFCTR1 0x000000c2
+#define MSR_FSB_FREQ 0x000000cd
+#define MSR_PLATFORM_INFO 0x000000ce
+#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
+#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
+
+#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
+#define NHM_C3_AUTO_DEMOTE (1UL << 25)
+#define NHM_C1_AUTO_DEMOTE (1UL << 26)
+#define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25)
+#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
+#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
+
+#define MSR_MTRRcap 0x000000fe
+#define MSR_IA32_BBL_CR_CTL 0x00000119
+#define MSR_IA32_BBL_CR_CTL3 0x0000011e
+
+#define MSR_IA32_SYSENTER_CS 0x00000174
+#define MSR_IA32_SYSENTER_ESP 0x00000175
+#define MSR_IA32_SYSENTER_EIP 0x00000176
+
+#define MSR_IA32_MCG_CAP 0x00000179
+#define MSR_IA32_MCG_STATUS 0x0000017a
+#define MSR_IA32_MCG_CTL 0x0000017b
+#define MSR_IA32_MCG_EXT_CTL 0x000004d0
+
+#define MSR_OFFCORE_RSP_0 0x000001a6
+#define MSR_OFFCORE_RSP_1 0x000001a7
+#define MSR_TURBO_RATIO_LIMIT 0x000001ad
+#define MSR_TURBO_RATIO_LIMIT1 0x000001ae
+#define MSR_TURBO_RATIO_LIMIT2 0x000001af
+
+#define MSR_LBR_SELECT 0x000001c8
+#define MSR_LBR_TOS 0x000001c9
+#define MSR_LBR_NHM_FROM 0x00000680
+#define MSR_LBR_NHM_TO 0x000006c0
+#define MSR_LBR_CORE_FROM 0x00000040
+#define MSR_LBR_CORE_TO 0x00000060
+
+#define MSR_LBR_INFO_0 0x00000dc0 /* ... 0xddf for _31 */
+#define LBR_INFO_MISPRED BIT_ULL(63)
+#define LBR_INFO_IN_TX BIT_ULL(62)
+#define LBR_INFO_ABORT BIT_ULL(61)
+#define LBR_INFO_CYCLES 0xffff
+
+#define MSR_IA32_PEBS_ENABLE 0x000003f1
+#define MSR_IA32_DS_AREA 0x00000600
+#define MSR_IA32_PERF_CAPABILITIES 0x00000345
+#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
+
+#define MSR_IA32_RTIT_CTL 0x00000570
+#define MSR_IA32_RTIT_STATUS 0x00000571
+#define MSR_IA32_RTIT_ADDR0_A 0x00000580
+#define MSR_IA32_RTIT_ADDR0_B 0x00000581
+#define MSR_IA32_RTIT_ADDR1_A 0x00000582
+#define MSR_IA32_RTIT_ADDR1_B 0x00000583
+#define MSR_IA32_RTIT_ADDR2_A 0x00000584
+#define MSR_IA32_RTIT_ADDR2_B 0x00000585
+#define MSR_IA32_RTIT_ADDR3_A 0x00000586
+#define MSR_IA32_RTIT_ADDR3_B 0x00000587
+#define MSR_IA32_RTIT_CR3_MATCH 0x00000572
+#define MSR_IA32_RTIT_OUTPUT_BASE 0x00000560
+#define MSR_IA32_RTIT_OUTPUT_MASK 0x00000561
+
+#define MSR_MTRRfix64K_00000 0x00000250
+#define MSR_MTRRfix16K_80000 0x00000258
+#define MSR_MTRRfix16K_A0000 0x00000259
+#define MSR_MTRRfix4K_C0000 0x00000268
+#define MSR_MTRRfix4K_C8000 0x00000269
+#define MSR_MTRRfix4K_D0000 0x0000026a
+#define MSR_MTRRfix4K_D8000 0x0000026b
+#define MSR_MTRRfix4K_E0000 0x0000026c
+#define MSR_MTRRfix4K_E8000 0x0000026d
+#define MSR_MTRRfix4K_F0000 0x0000026e
+#define MSR_MTRRfix4K_F8000 0x0000026f
+#define MSR_MTRRdefType 0x000002ff
+
+#define MSR_IA32_CR_PAT 0x00000277
+
+#define MSR_IA32_DEBUGCTLMSR 0x000001d9
+#define MSR_IA32_LASTBRANCHFROMIP 0x000001db
+#define MSR_IA32_LASTBRANCHTOIP 0x000001dc
+#define MSR_IA32_LASTINTFROMIP 0x000001dd
+#define MSR_IA32_LASTINTTOIP 0x000001de
+
+/* DEBUGCTLMSR bits (others vary by model): */
+#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT 1
+#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
+#define DEBUGCTLMSR_TR (1UL << 6)
+#define DEBUGCTLMSR_BTS (1UL << 7)
+#define DEBUGCTLMSR_BTINT (1UL << 8)
+#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9)
+#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
+#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
+#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
+#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
+
+#define MSR_PEBS_FRONTEND 0x000003f7
+
+#define MSR_IA32_POWER_CTL 0x000001fc
+
+#define MSR_IA32_MC0_CTL 0x00000400
+#define MSR_IA32_MC0_STATUS 0x00000401
+#define MSR_IA32_MC0_ADDR 0x00000402
+#define MSR_IA32_MC0_MISC 0x00000403
+
+/* C-state Residency Counters */
+#define MSR_PKG_C3_RESIDENCY 0x000003f8
+#define MSR_PKG_C6_RESIDENCY 0x000003f9
+#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa
+#define MSR_PKG_C7_RESIDENCY 0x000003fa
+#define MSR_CORE_C3_RESIDENCY 0x000003fc
+#define MSR_CORE_C6_RESIDENCY 0x000003fd
+#define MSR_CORE_C7_RESIDENCY 0x000003fe
+#define MSR_KNL_CORE_C6_RESIDENCY 0x000003ff
+#define MSR_PKG_C2_RESIDENCY 0x0000060d
+#define MSR_PKG_C8_RESIDENCY 0x00000630
+#define MSR_PKG_C9_RESIDENCY 0x00000631
+#define MSR_PKG_C10_RESIDENCY 0x00000632
+
+/* Interrupt Response Limit */
+#define MSR_PKGC3_IRTL 0x0000060a
+#define MSR_PKGC6_IRTL 0x0000060b
+#define MSR_PKGC7_IRTL 0x0000060c
+#define MSR_PKGC8_IRTL 0x00000633
+#define MSR_PKGC9_IRTL 0x00000634
+#define MSR_PKGC10_IRTL 0x00000635
+
+/* Run Time Average Power Limiting (RAPL) Interface */
+
+#define MSR_RAPL_POWER_UNIT 0x00000606
+
+#define MSR_PKG_POWER_LIMIT 0x00000610
+#define MSR_PKG_ENERGY_STATUS 0x00000611
+#define MSR_PKG_PERF_STATUS 0x00000613
+#define MSR_PKG_POWER_INFO 0x00000614
+
+#define MSR_DRAM_POWER_LIMIT 0x00000618
+#define MSR_DRAM_ENERGY_STATUS 0x00000619
+#define MSR_DRAM_PERF_STATUS 0x0000061b
+#define MSR_DRAM_POWER_INFO 0x0000061c
+
+#define MSR_PP0_POWER_LIMIT 0x00000638
+#define MSR_PP0_ENERGY_STATUS 0x00000639
+#define MSR_PP0_POLICY 0x0000063a
+#define MSR_PP0_PERF_STATUS 0x0000063b
+
+#define MSR_PP1_POWER_LIMIT 0x00000640
+#define MSR_PP1_ENERGY_STATUS 0x00000641
+#define MSR_PP1_POLICY 0x00000642
+
+/* Config TDP MSRs */
+#define MSR_CONFIG_TDP_NOMINAL 0x00000648
+#define MSR_CONFIG_TDP_LEVEL_1 0x00000649
+#define MSR_CONFIG_TDP_LEVEL_2 0x0000064A
+#define MSR_CONFIG_TDP_CONTROL 0x0000064B
+#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C
+
+#define MSR_PLATFORM_ENERGY_STATUS 0x0000064D
+
+#define MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658
+#define MSR_PKG_ANY_CORE_C0_RES 0x00000659
+#define MSR_PKG_ANY_GFXE_C0_RES 0x0000065A
+#define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B
+
+#define MSR_CORE_C1_RES 0x00000660
+#define MSR_MODULE_C6_RES_MS 0x00000664
+
+#define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668
+#define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669
+
+#define MSR_ATOM_CORE_RATIOS 0x0000066a
+#define MSR_ATOM_CORE_VIDS 0x0000066b
+#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c
+#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d
+
+
+#define MSR_CORE_PERF_LIMIT_REASONS 0x00000690
+#define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0
+#define MSR_RING_PERF_LIMIT_REASONS 0x000006B1
+
+/* Hardware P state interface */
+#define MSR_PPERF 0x0000064e
+#define MSR_PERF_LIMIT_REASONS 0x0000064f
+#define MSR_PM_ENABLE 0x00000770
+#define MSR_HWP_CAPABILITIES 0x00000771
+#define MSR_HWP_REQUEST_PKG 0x00000772
+#define MSR_HWP_INTERRUPT 0x00000773
+#define MSR_HWP_REQUEST 0x00000774
+#define MSR_HWP_STATUS 0x00000777
+
+/* CPUID.6.EAX */
+#define HWP_BASE_BIT (1<<7)
+#define HWP_NOTIFICATIONS_BIT (1<<8)
+#define HWP_ACTIVITY_WINDOW_BIT (1<<9)
+#define HWP_ENERGY_PERF_PREFERENCE_BIT (1<<10)
+#define HWP_PACKAGE_LEVEL_REQUEST_BIT (1<<11)
+
+/* IA32_HWP_CAPABILITIES */
+#define HWP_HIGHEST_PERF(x) (((x) >> 0) & 0xff)
+#define HWP_GUARANTEED_PERF(x) (((x) >> 8) & 0xff)
+#define HWP_MOSTEFFICIENT_PERF(x) (((x) >> 16) & 0xff)
+#define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff)
+
+/* IA32_HWP_REQUEST */
+#define HWP_MIN_PERF(x) (x & 0xff)
+#define HWP_MAX_PERF(x) ((x & 0xff) << 8)
+#define HWP_DESIRED_PERF(x) ((x & 0xff) << 16)
+#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24)
+#define HWP_EPP_PERFORMANCE 0x00
+#define HWP_EPP_BALANCE_PERFORMANCE 0x80
+#define HWP_EPP_BALANCE_POWERSAVE 0xC0
+#define HWP_EPP_POWERSAVE 0xFF
+#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32)
+#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42)
+
+/* IA32_HWP_STATUS */
+#define HWP_GUARANTEED_CHANGE(x) (x & 0x1)
+#define HWP_EXCURSION_TO_MINIMUM(x) (x & 0x4)
+
+/* IA32_HWP_INTERRUPT */
+#define HWP_CHANGE_TO_GUARANTEED_INT(x) (x & 0x1)
+#define HWP_EXCURSION_TO_MINIMUM_INT(x) (x & 0x2)
+
+#define MSR_AMD64_MC0_MASK 0xc0010044
+
+#define MSR_IA32_MCx_CTL(x) (MSR_IA32_MC0_CTL + 4*(x))
+#define MSR_IA32_MCx_STATUS(x) (MSR_IA32_MC0_STATUS + 4*(x))
+#define MSR_IA32_MCx_ADDR(x) (MSR_IA32_MC0_ADDR + 4*(x))
+#define MSR_IA32_MCx_MISC(x) (MSR_IA32_MC0_MISC + 4*(x))
+
+#define MSR_AMD64_MCx_MASK(x) (MSR_AMD64_MC0_MASK + (x))
+
+/* These are consecutive and not in the normal 4er MCE bank block */
+#define MSR_IA32_MC0_CTL2 0x00000280
+#define MSR_IA32_MCx_CTL2(x) (MSR_IA32_MC0_CTL2 + (x))
+
+#define MSR_P6_PERFCTR0 0x000000c1
+#define MSR_P6_PERFCTR1 0x000000c2
+#define MSR_P6_EVNTSEL0 0x00000186
+#define MSR_P6_EVNTSEL1 0x00000187
+
+#define MSR_KNC_PERFCTR0 0x00000020
+#define MSR_KNC_PERFCTR1 0x00000021
+#define MSR_KNC_EVNTSEL0 0x00000028
+#define MSR_KNC_EVNTSEL1 0x00000029
+
+/* Alternative perfctr range with full access. */
+#define MSR_IA32_PMC0 0x000004c1
+
+/* AMD64 MSRs. Not complete. See the architecture manual for a more
+ complete list. */
+
+#define MSR_AMD64_PATCH_LEVEL 0x0000008b
+#define MSR_AMD64_TSC_RATIO 0xc0000104
+#define MSR_AMD64_NB_CFG 0xc001001f
+#define MSR_AMD64_PATCH_LOADER 0xc0010020
+#define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140
+#define MSR_AMD64_OSVW_STATUS 0xc0010141
+#define MSR_AMD64_LS_CFG 0xc0011020
+#define MSR_AMD64_DC_CFG 0xc0011022
+#define MSR_AMD64_BU_CFG2 0xc001102a
+#define MSR_AMD64_IBSFETCHCTL 0xc0011030
+#define MSR_AMD64_IBSFETCHLINAD 0xc0011031
+#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032
+#define MSR_AMD64_IBSFETCH_REG_COUNT 3
+#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1)
+#define MSR_AMD64_IBSOPCTL 0xc0011033
+#define MSR_AMD64_IBSOPRIP 0xc0011034
+#define MSR_AMD64_IBSOPDATA 0xc0011035
+#define MSR_AMD64_IBSOPDATA2 0xc0011036
+#define MSR_AMD64_IBSOPDATA3 0xc0011037
+#define MSR_AMD64_IBSDCLINAD 0xc0011038
+#define MSR_AMD64_IBSDCPHYSAD 0xc0011039
+#define MSR_AMD64_IBSOP_REG_COUNT 7
+#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
+#define MSR_AMD64_IBSCTL 0xc001103a
+#define MSR_AMD64_IBSBRTARGET 0xc001103b
+#define MSR_AMD64_IBSOPDATA4 0xc001103d
+#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SEV 0xc0010131
+#define MSR_AMD64_SEV_ENABLED_BIT 0
+#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+
+/* Fam 17h MSRs */
+#define MSR_F17H_IRPERF 0xc00000e9
+
+/* Fam 16h MSRs */
+#define MSR_F16H_L2I_PERF_CTL 0xc0010230
+#define MSR_F16H_L2I_PERF_CTR 0xc0010231
+#define MSR_F16H_DR1_ADDR_MASK 0xc0011019
+#define MSR_F16H_DR2_ADDR_MASK 0xc001101a
+#define MSR_F16H_DR3_ADDR_MASK 0xc001101b
+#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
+
+/* Fam 15h MSRs */
+#define MSR_F15H_PERF_CTL 0xc0010200
+#define MSR_F15H_PERF_CTR 0xc0010201
+#define MSR_F15H_NB_PERF_CTL 0xc0010240
+#define MSR_F15H_NB_PERF_CTR 0xc0010241
+#define MSR_F15H_PTSC 0xc0010280
+#define MSR_F15H_IC_CFG 0xc0011021
+
+/* Fam 10h MSRs */
+#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
+#define FAM10H_MMIO_CONF_ENABLE (1<<0)
+#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf
+#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2
+#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffffULL
+#define FAM10H_MMIO_CONF_BASE_SHIFT 20
+#define MSR_FAM10H_NODE_ID 0xc001100c
+#define MSR_F10H_DECFG 0xc0011029
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT 1
+#define MSR_F10H_DECFG_LFENCE_SERIALIZE BIT_ULL(MSR_F10H_DECFG_LFENCE_SERIALIZE_BIT)
+
+/* K8 MSRs */
+#define MSR_K8_TOP_MEM1 0xc001001a
+#define MSR_K8_TOP_MEM2 0xc001001d
+#define MSR_K8_SYSCFG 0xc0010010
+#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT 23
+#define MSR_K8_SYSCFG_MEM_ENCRYPT BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_K8_INT_PENDING_MSG 0xc0010055
+/* C1E active bits in int pending message */
+#define K8_INTP_C1E_ACTIVE_MASK 0x18000000
+#define MSR_K8_TSEG_ADDR 0xc0010112
+#define MSR_K8_TSEG_MASK 0xc0010113
+#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */
+#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */
+#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */
+
+/* K7 MSRs */
+#define MSR_K7_EVNTSEL0 0xc0010000
+#define MSR_K7_PERFCTR0 0xc0010004
+#define MSR_K7_EVNTSEL1 0xc0010001
+#define MSR_K7_PERFCTR1 0xc0010005
+#define MSR_K7_EVNTSEL2 0xc0010002
+#define MSR_K7_PERFCTR2 0xc0010006
+#define MSR_K7_EVNTSEL3 0xc0010003
+#define MSR_K7_PERFCTR3 0xc0010007
+#define MSR_K7_CLK_CTL 0xc001001b
+#define MSR_K7_HWCR 0xc0010015
+#define MSR_K7_HWCR_SMMLOCK_BIT 0
+#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
+#define MSR_K7_FID_VID_CTL 0xc0010041
+#define MSR_K7_FID_VID_STATUS 0xc0010042
+
+/* K6 MSRs */
+#define MSR_K6_WHCR 0xc0000082
+#define MSR_K6_UWCCR 0xc0000085
+#define MSR_K6_EPMR 0xc0000086
+#define MSR_K6_PSOR 0xc0000087
+#define MSR_K6_PFIR 0xc0000088
+
+/* Centaur-Hauls/IDT defined MSRs. */
+#define MSR_IDT_FCR1 0x00000107
+#define MSR_IDT_FCR2 0x00000108
+#define MSR_IDT_FCR3 0x00000109
+#define MSR_IDT_FCR4 0x0000010a
+
+#define MSR_IDT_MCR0 0x00000110
+#define MSR_IDT_MCR1 0x00000111
+#define MSR_IDT_MCR2 0x00000112
+#define MSR_IDT_MCR3 0x00000113
+#define MSR_IDT_MCR4 0x00000114
+#define MSR_IDT_MCR5 0x00000115
+#define MSR_IDT_MCR6 0x00000116
+#define MSR_IDT_MCR7 0x00000117
+#define MSR_IDT_MCR_CTRL 0x00000120
+
+/* VIA Cyrix defined MSRs*/
+#define MSR_VIA_FCR 0x00001107
+#define MSR_VIA_LONGHAUL 0x0000110a
+#define MSR_VIA_RNG 0x0000110b
+#define MSR_VIA_BCR2 0x00001147
+
+/* Transmeta defined MSRs */
+#define MSR_TMTA_LONGRUN_CTRL 0x80868010
+#define MSR_TMTA_LONGRUN_FLAGS 0x80868011
+#define MSR_TMTA_LRTI_READOUT 0x80868018
+#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a
+
+/* Intel defined MSRs. */
+#define MSR_IA32_P5_MC_ADDR 0x00000000
+#define MSR_IA32_P5_MC_TYPE 0x00000001
+#define MSR_IA32_TSC 0x00000010
+#define MSR_IA32_PLATFORM_ID 0x00000017
+#define MSR_IA32_EBL_CR_POWERON 0x0000002a
+#define MSR_EBC_FREQUENCY_ID 0x0000002c
+#define MSR_SMI_COUNT 0x00000034
+#define MSR_IA32_FEATURE_CONTROL 0x0000003a
+#define MSR_IA32_TSC_ADJUST 0x0000003b
+#define MSR_IA32_BNDCFGS 0x00000d90
+
+#define MSR_IA32_BNDCFGS_RSVD 0x00000ffc
+
+#define MSR_IA32_XSS 0x00000da0
+
+#define FEATURE_CONTROL_LOCKED (1<<0)
+#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1)
+#define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2)
+#define FEATURE_CONTROL_LMCE (1<<20)
+
+#define MSR_IA32_APICBASE 0x0000001b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+
+#define MSR_IA32_TSCDEADLINE 0x000006e0
+
+#define MSR_IA32_UCODE_WRITE 0x00000079
+#define MSR_IA32_UCODE_REV 0x0000008b
+
+#define MSR_IA32_SMM_MONITOR_CTL 0x0000009b
+#define MSR_IA32_SMBASE 0x0000009e
+
+#define MSR_IA32_PERF_STATUS 0x00000198
+#define MSR_IA32_PERF_CTL 0x00000199
+#define INTEL_PERF_CTL_MASK 0xffff
+#define MSR_AMD_PSTATE_DEF_BASE 0xc0010064
+#define MSR_AMD_PERF_STATUS 0xc0010063
+#define MSR_AMD_PERF_CTL 0xc0010062
+
+#define MSR_IA32_MPERF 0x000000e7
+#define MSR_IA32_APERF 0x000000e8
+
+#define MSR_IA32_THERM_CONTROL 0x0000019a
+#define MSR_IA32_THERM_INTERRUPT 0x0000019b
+
+#define THERM_INT_HIGH_ENABLE (1 << 0)
+#define THERM_INT_LOW_ENABLE (1 << 1)
+#define THERM_INT_PLN_ENABLE (1 << 24)
+
+#define MSR_IA32_THERM_STATUS 0x0000019c
+
+#define THERM_STATUS_PROCHOT (1 << 0)
+#define THERM_STATUS_POWER_LIMIT (1 << 10)
+
+#define MSR_THERM2_CTL 0x0000019d
+
+#define MSR_THERM2_CTL_TM_SELECT (1ULL << 16)
+
+#define MSR_IA32_MISC_ENABLE 0x000001a0
+
+#define MSR_IA32_TEMPERATURE_TARGET 0x000001a2
+
+#define MSR_MISC_FEATURE_CONTROL 0x000001a4
+#define MSR_MISC_PWR_MGMT 0x000001aa
+
+#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
+#define ENERGY_PERF_BIAS_PERFORMANCE 0
+#define ENERGY_PERF_BIAS_BALANCE_PERFORMANCE 4
+#define ENERGY_PERF_BIAS_NORMAL 6
+#define ENERGY_PERF_BIAS_BALANCE_POWERSAVE 8
+#define ENERGY_PERF_BIAS_POWERSAVE 15
+
+#define MSR_IA32_PACKAGE_THERM_STATUS 0x000001b1
+
+#define PACKAGE_THERM_STATUS_PROCHOT (1 << 0)
+#define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10)
+
+#define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2
+
+#define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0)
+#define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1)
+#define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24)
+
+/* Thermal Thresholds Support */
+#define THERM_INT_THRESHOLD0_ENABLE (1 << 15)
+#define THERM_SHIFT_THRESHOLD0 8
+#define THERM_MASK_THRESHOLD0 (0x7f << THERM_SHIFT_THRESHOLD0)
+#define THERM_INT_THRESHOLD1_ENABLE (1 << 23)
+#define THERM_SHIFT_THRESHOLD1 16
+#define THERM_MASK_THRESHOLD1 (0x7f << THERM_SHIFT_THRESHOLD1)
+#define THERM_STATUS_THRESHOLD0 (1 << 6)
+#define THERM_LOG_THRESHOLD0 (1 << 7)
+#define THERM_STATUS_THRESHOLD1 (1 << 8)
+#define THERM_LOG_THRESHOLD1 (1 << 9)
+
+/* MISC_ENABLE bits: architectural */
+#define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0
+#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT)
+#define MSR_IA32_MISC_ENABLE_TCC_BIT 1
+#define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT)
+#define MSR_IA32_MISC_ENABLE_EMON_BIT 7
+#define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT)
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11
+#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12
+#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT)
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16
+#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT)
+#define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18
+#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT)
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22
+#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT)
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23
+#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34
+#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT)
+
+/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2
+#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT)
+#define MSR_IA32_MISC_ENABLE_TM1_BIT 3
+#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT)
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4
+#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6
+#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8
+#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9
+#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_BIT 10
+#define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT)
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10
+#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT)
+#define MSR_IA32_MISC_ENABLE_TM2_BIT 13
+#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT)
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19
+#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20
+#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT)
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24
+#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT)
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37
+#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38
+#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT)
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
+#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
+
+/* MISC_FEATURES_ENABLES non-architectural features */
+#define MSR_MISC_FEATURES_ENABLES 0x00000140
+
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
+#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
+
+#define MSR_IA32_TSC_DEADLINE 0x000006E0
+
+/* P4/Xeon+ specific */
+#define MSR_IA32_MCG_EAX 0x00000180
+#define MSR_IA32_MCG_EBX 0x00000181
+#define MSR_IA32_MCG_ECX 0x00000182
+#define MSR_IA32_MCG_EDX 0x00000183
+#define MSR_IA32_MCG_ESI 0x00000184
+#define MSR_IA32_MCG_EDI 0x00000185
+#define MSR_IA32_MCG_EBP 0x00000186
+#define MSR_IA32_MCG_ESP 0x00000187
+#define MSR_IA32_MCG_EFLAGS 0x00000188
+#define MSR_IA32_MCG_EIP 0x00000189
+#define MSR_IA32_MCG_RESERVED 0x0000018a
+
+/* Pentium IV performance counter MSRs */
+#define MSR_P4_BPU_PERFCTR0 0x00000300
+#define MSR_P4_BPU_PERFCTR1 0x00000301
+#define MSR_P4_BPU_PERFCTR2 0x00000302
+#define MSR_P4_BPU_PERFCTR3 0x00000303
+#define MSR_P4_MS_PERFCTR0 0x00000304
+#define MSR_P4_MS_PERFCTR1 0x00000305
+#define MSR_P4_MS_PERFCTR2 0x00000306
+#define MSR_P4_MS_PERFCTR3 0x00000307
+#define MSR_P4_FLAME_PERFCTR0 0x00000308
+#define MSR_P4_FLAME_PERFCTR1 0x00000309
+#define MSR_P4_FLAME_PERFCTR2 0x0000030a
+#define MSR_P4_FLAME_PERFCTR3 0x0000030b
+#define MSR_P4_IQ_PERFCTR0 0x0000030c
+#define MSR_P4_IQ_PERFCTR1 0x0000030d
+#define MSR_P4_IQ_PERFCTR2 0x0000030e
+#define MSR_P4_IQ_PERFCTR3 0x0000030f
+#define MSR_P4_IQ_PERFCTR4 0x00000310
+#define MSR_P4_IQ_PERFCTR5 0x00000311
+#define MSR_P4_BPU_CCCR0 0x00000360
+#define MSR_P4_BPU_CCCR1 0x00000361
+#define MSR_P4_BPU_CCCR2 0x00000362
+#define MSR_P4_BPU_CCCR3 0x00000363
+#define MSR_P4_MS_CCCR0 0x00000364
+#define MSR_P4_MS_CCCR1 0x00000365
+#define MSR_P4_MS_CCCR2 0x00000366
+#define MSR_P4_MS_CCCR3 0x00000367
+#define MSR_P4_FLAME_CCCR0 0x00000368
+#define MSR_P4_FLAME_CCCR1 0x00000369
+#define MSR_P4_FLAME_CCCR2 0x0000036a
+#define MSR_P4_FLAME_CCCR3 0x0000036b
+#define MSR_P4_IQ_CCCR0 0x0000036c
+#define MSR_P4_IQ_CCCR1 0x0000036d
+#define MSR_P4_IQ_CCCR2 0x0000036e
+#define MSR_P4_IQ_CCCR3 0x0000036f
+#define MSR_P4_IQ_CCCR4 0x00000370
+#define MSR_P4_IQ_CCCR5 0x00000371
+#define MSR_P4_ALF_ESCR0 0x000003ca
+#define MSR_P4_ALF_ESCR1 0x000003cb
+#define MSR_P4_BPU_ESCR0 0x000003b2
+#define MSR_P4_BPU_ESCR1 0x000003b3
+#define MSR_P4_BSU_ESCR0 0x000003a0
+#define MSR_P4_BSU_ESCR1 0x000003a1
+#define MSR_P4_CRU_ESCR0 0x000003b8
+#define MSR_P4_CRU_ESCR1 0x000003b9
+#define MSR_P4_CRU_ESCR2 0x000003cc
+#define MSR_P4_CRU_ESCR3 0x000003cd
+#define MSR_P4_CRU_ESCR4 0x000003e0
+#define MSR_P4_CRU_ESCR5 0x000003e1
+#define MSR_P4_DAC_ESCR0 0x000003a8
+#define MSR_P4_DAC_ESCR1 0x000003a9
+#define MSR_P4_FIRM_ESCR0 0x000003a4
+#define MSR_P4_FIRM_ESCR1 0x000003a5
+#define MSR_P4_FLAME_ESCR0 0x000003a6
+#define MSR_P4_FLAME_ESCR1 0x000003a7
+#define MSR_P4_FSB_ESCR0 0x000003a2
+#define MSR_P4_FSB_ESCR1 0x000003a3
+#define MSR_P4_IQ_ESCR0 0x000003ba
+#define MSR_P4_IQ_ESCR1 0x000003bb
+#define MSR_P4_IS_ESCR0 0x000003b4
+#define MSR_P4_IS_ESCR1 0x000003b5
+#define MSR_P4_ITLB_ESCR0 0x000003b6
+#define MSR_P4_ITLB_ESCR1 0x000003b7
+#define MSR_P4_IX_ESCR0 0x000003c8
+#define MSR_P4_IX_ESCR1 0x000003c9
+#define MSR_P4_MOB_ESCR0 0x000003aa
+#define MSR_P4_MOB_ESCR1 0x000003ab
+#define MSR_P4_MS_ESCR0 0x000003c0
+#define MSR_P4_MS_ESCR1 0x000003c1
+#define MSR_P4_PMH_ESCR0 0x000003ac
+#define MSR_P4_PMH_ESCR1 0x000003ad
+#define MSR_P4_RAT_ESCR0 0x000003bc
+#define MSR_P4_RAT_ESCR1 0x000003bd
+#define MSR_P4_SAAT_ESCR0 0x000003ae
+#define MSR_P4_SAAT_ESCR1 0x000003af
+#define MSR_P4_SSU_ESCR0 0x000003be
+#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */
+
+#define MSR_P4_TBPU_ESCR0 0x000003c2
+#define MSR_P4_TBPU_ESCR1 0x000003c3
+#define MSR_P4_TC_ESCR0 0x000003c4
+#define MSR_P4_TC_ESCR1 0x000003c5
+#define MSR_P4_U2L_ESCR0 0x000003b0
+#define MSR_P4_U2L_ESCR1 0x000003b1
+
+#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2
+
+/* Intel Core-based CPU performance counters */
+#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
+#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
+#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
+#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
+#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
+#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
+#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
+
+/* Geode defined MSRs */
+#define MSR_GEODE_BUSCONT_CONF0 0x00001900
+
+/* Intel VT MSRs */
+#define MSR_IA32_VMX_BASIC 0x00000480
+#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481
+#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482
+#define MSR_IA32_VMX_EXIT_CTLS 0x00000483
+#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484
+#define MSR_IA32_VMX_MISC 0x00000485
+#define MSR_IA32_VMX_CR0_FIXED0 0x00000486
+#define MSR_IA32_VMX_CR0_FIXED1 0x00000487
+#define MSR_IA32_VMX_CR4_FIXED0 0x00000488
+#define MSR_IA32_VMX_CR4_FIXED1 0x00000489
+#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a
+#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
+#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
+#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d
+#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
+#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f
+#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490
+#define MSR_IA32_VMX_VMFUNC 0x00000491
+
+/* VMX_BASIC bits and bitmasks */
+#define VMX_BASIC_VMCS_SIZE_SHIFT 32
+#define VMX_BASIC_TRUE_CTLS (1ULL << 55)
+#define VMX_BASIC_64 0x0001000000000000LLU
+#define VMX_BASIC_MEM_TYPE_SHIFT 50
+#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU
+#define VMX_BASIC_MEM_TYPE_WB 6LLU
+#define VMX_BASIC_INOUT 0x0040000000000000LLU
+
+/* MSR_IA32_VMX_MISC bits */
+#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29)
+#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F
+/* AMD-V MSRs */
+
+#define MSR_VM_CR 0xc0010114
+#define MSR_VM_IGNNE 0xc0010115
+#define MSR_VM_HSAVE_PA 0xc0010117
+
+#endif /* !SELFTEST_KVM_X86_H */
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
new file mode 100644
index 000000000..d30667706
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -0,0 +1,92 @@
+/*
+ * tools/testing/selftests/kvm/lib/assert.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/
+
+#include "test_util.h"
+
+#include <execinfo.h>
+#include <sys/syscall.h>
+
+#include "../../kselftest.h"
+
+/* Dumps the current stack trace to stderr. */
+static void __attribute__((noinline)) test_dump_stack(void);
+static void test_dump_stack(void)
+{
+ /*
+ * Build and run this command:
+ *
+ * addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
+ * grep -v test_dump_stack | cat -n 1>&2
+ *
+ * Note that the spacing is different and there's no newline.
+ */
+ size_t i;
+ size_t n = 20;
+ void *stack[n];
+ const char *addr2line = "addr2line -s -e /proc/$PPID/exe -fpai";
+ const char *pipeline = "|cat -n 1>&2";
+ char cmd[strlen(addr2line) + strlen(pipeline) +
+ /* N bytes per addr * 2 digits per byte + 1 space per addr: */
+ n * (((sizeof(void *)) * 2) + 1) +
+ /* Null terminator: */
+ 1];
+ char *c;
+
+ n = backtrace(stack, n);
+ c = &cmd[0];
+ c += sprintf(c, "%s", addr2line);
+ /*
+ * Skip the first 3 frames: backtrace, test_dump_stack, and
+ * test_assert. We hope that backtrace isn't inlined and the other two
+ * we've declared noinline.
+ */
+ for (i = 2; i < n; i++)
+ c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
+ c += sprintf(c, "%s", pipeline);
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-result"
+ system(cmd);
+#pragma GCC diagnostic pop
+}
+
+static pid_t _gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+
+void __attribute__((noinline))
+test_assert(bool exp, const char *exp_str,
+ const char *file, unsigned int line, const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!(exp)) {
+ va_start(ap, fmt);
+
+ fprintf(stderr, "==== Test Assertion Failure ====\n"
+ " %s:%u: %s\n"
+ " pid=%d tid=%d - %s\n",
+ file, line, exp_str, getpid(), _gettid(),
+ strerror(errno));
+ test_dump_stack();
+ if (fmt) {
+ fputs(" ", stderr);
+ vfprintf(stderr, fmt, ap);
+ fputs("\n", stderr);
+ }
+ va_end(ap);
+
+ if (errno == EACCES)
+ ksft_exit_skip("Access denied - Exiting.\n");
+ exit(254);
+ }
+
+ return;
+}
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
new file mode 100644
index 000000000..5eb857584
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -0,0 +1,197 @@
+/*
+ * tools/testing/selftests/kvm/lib/elf.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+
+#include <bits/endian.h>
+#include <linux/elf.h>
+
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
+{
+ off_t offset_rv;
+
+ /* Open the ELF file. */
+ int fd;
+ fd = open(filename, O_RDONLY);
+ TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+ " filename: %s\n"
+ " rv: %i errno: %i", filename, fd, errno);
+
+ /* Read in and validate ELF Identification Record.
+ * The ELF Identification record is the first 16 (EI_NIDENT) bytes
+ * of the ELF header, which is at the beginning of the ELF file.
+ * For now it is only safe to read the first EI_NIDENT bytes. Once
+ * read and validated, the value of e_ehsize can be used to determine
+ * the real size of the ELF header.
+ */
+ unsigned char ident[EI_NIDENT];
+ test_read(fd, ident, sizeof(ident));
+ TEST_ASSERT((ident[EI_MAG0] == ELFMAG0) && (ident[EI_MAG1] == ELFMAG1)
+ && (ident[EI_MAG2] == ELFMAG2) && (ident[EI_MAG3] == ELFMAG3),
+ "ELF MAGIC Mismatch,\n"
+ " filename: %s\n"
+ " ident[EI_MAG0 - EI_MAG3]: %02x %02x %02x %02x\n"
+ " Expected: %02x %02x %02x %02x",
+ filename,
+ ident[EI_MAG0], ident[EI_MAG1], ident[EI_MAG2], ident[EI_MAG3],
+ ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3);
+ TEST_ASSERT(ident[EI_CLASS] == ELFCLASS64,
+ "Current implementation only able to handle ELFCLASS64,\n"
+ " filename: %s\n"
+ " ident[EI_CLASS]: %02x\n"
+ " expected: %02x",
+ filename,
+ ident[EI_CLASS], ELFCLASS64);
+ TEST_ASSERT(((BYTE_ORDER == LITTLE_ENDIAN)
+ && (ident[EI_DATA] == ELFDATA2LSB))
+ || ((BYTE_ORDER == BIG_ENDIAN)
+ && (ident[EI_DATA] == ELFDATA2MSB)), "Current "
+ "implementation only able to handle\n"
+ "cases where the host and ELF file endianness\n"
+ "is the same:\n"
+ " host BYTE_ORDER: %u\n"
+ " host LITTLE_ENDIAN: %u\n"
+ " host BIG_ENDIAN: %u\n"
+ " ident[EI_DATA]: %u\n"
+ " ELFDATA2LSB: %u\n"
+ " ELFDATA2MSB: %u",
+ BYTE_ORDER, LITTLE_ENDIAN, BIG_ENDIAN,
+ ident[EI_DATA], ELFDATA2LSB, ELFDATA2MSB);
+ TEST_ASSERT(ident[EI_VERSION] == EV_CURRENT,
+ "Current implementation only able to handle current "
+ "ELF version,\n"
+ " filename: %s\n"
+ " ident[EI_VERSION]: %02x\n"
+ " expected: %02x",
+ filename, ident[EI_VERSION], EV_CURRENT);
+
+ /* Read in the ELF header.
+ * With the ELF Identification portion of the ELF header
+ * validated, especially that the value at EI_VERSION is
+ * as expected, it is now safe to read the entire ELF header.
+ */
+ offset_rv = lseek(fd, 0, SEEK_SET);
+ TEST_ASSERT(offset_rv == 0, "Seek to ELF header failed,\n"
+ " rv: %zi expected: %i", offset_rv, 0);
+ test_read(fd, hdrp, sizeof(*hdrp));
+ TEST_ASSERT(hdrp->e_phentsize == sizeof(Elf64_Phdr),
+ "Unexpected physical header size,\n"
+ " hdrp->e_phentsize: %x\n"
+ " expected: %zx",
+ hdrp->e_phentsize, sizeof(Elf64_Phdr));
+ TEST_ASSERT(hdrp->e_shentsize == sizeof(Elf64_Shdr),
+ "Unexpected section header size,\n"
+ " hdrp->e_shentsize: %x\n"
+ " expected: %zx",
+ hdrp->e_shentsize, sizeof(Elf64_Shdr));
+}
+
+/* VM ELF Load
+ *
+ * Input Args:
+ * filename - Path to ELF file
+ *
+ * Output Args: None
+ *
+ * Input/Output Args:
+ * vm - Pointer to opaque type that describes the VM.
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Loads the program image of the ELF file specified by filename,
+ * into the virtual address space of the VM pointed to by vm. On entry
+ * the VM needs to not be using any of the virtual address space used
+ * by the image and it needs to have sufficient available physical pages, to
+ * back the virtual pages used to load the image.
+ */
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
+ uint32_t data_memslot, uint32_t pgd_memslot)
+{
+ off_t offset, offset_rv;
+ Elf64_Ehdr hdr;
+
+ /* Open the ELF file. */
+ int fd;
+ fd = open(filename, O_RDONLY);
+ TEST_ASSERT(fd >= 0, "Failed to open ELF file,\n"
+ " filename: %s\n"
+ " rv: %i errno: %i", filename, fd, errno);
+
+ /* Read in the ELF header. */
+ elfhdr_get(filename, &hdr);
+
+ /* For each program header.
+ * The following ELF header members specify the location
+ * and size of the program headers:
+ *
+ * e_phoff - File offset to start of program headers
+ * e_phentsize - Size of each program header
+ * e_phnum - Number of program header entries
+ */
+ for (unsigned int n1 = 0; n1 < hdr.e_phnum; n1++) {
+ /* Seek to the beginning of the program header. */
+ offset = hdr.e_phoff + (n1 * hdr.e_phentsize);
+ offset_rv = lseek(fd, offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == offset,
+ "Failed to seek to begining of program header %u,\n"
+ " filename: %s\n"
+ " rv: %jd errno: %i",
+ n1, filename, (intmax_t) offset_rv, errno);
+
+ /* Read in the program header. */
+ Elf64_Phdr phdr;
+ test_read(fd, &phdr, sizeof(phdr));
+
+ /* Skip if this header doesn't describe a loadable segment. */
+ if (phdr.p_type != PT_LOAD)
+ continue;
+
+ /* Allocate memory for this segment within the VM. */
+ TEST_ASSERT(phdr.p_memsz > 0, "Unexpected loadable segment "
+ "memsize of 0,\n"
+ " phdr index: %u p_memsz: 0x%" PRIx64,
+ n1, (uint64_t) phdr.p_memsz);
+ vm_vaddr_t seg_vstart = phdr.p_vaddr;
+ seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+ vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
+ seg_vend |= vm->page_size - 1;
+ size_t seg_size = seg_vend - seg_vstart + 1;
+
+ vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
+ data_memslot, pgd_memslot);
+ TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
+ "virtual memory for segment at requested min addr,\n"
+ " segment idx: %u\n"
+ " seg_vstart: 0x%lx\n"
+ " vaddr: 0x%lx",
+ n1, seg_vstart, vaddr);
+ memset(addr_gva2hva(vm, vaddr), 0, seg_size);
+ /* TODO(lhuemill): Set permissions of each memory segment
+ * based on the least-significant 3 bits of phdr.p_flags.
+ */
+
+ /* Load portion of initial state that is contained within
+ * the ELF file.
+ */
+ if (phdr.p_filesz) {
+ offset_rv = lseek(fd, phdr.p_offset, SEEK_SET);
+ TEST_ASSERT(offset_rv == phdr.p_offset,
+ "Seek to program segment offset failed,\n"
+ " program header idx: %u errno: %i\n"
+ " offset_rv: 0x%jx\n"
+ " expected: 0x%jx\n",
+ n1, errno, (intmax_t) offset_rv,
+ (intmax_t) phdr.p_offset);
+ test_read(fd, addr_gva2hva(vm, phdr.p_vaddr),
+ phdr.p_filesz);
+ }
+ }
+}
diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c
new file mode 100644
index 000000000..cff869ffe
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/io.c
@@ -0,0 +1,158 @@
+/*
+ * tools/testing/selftests/kvm/lib/io.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+
+/* Test Write
+ *
+ * A wrapper for write(2), that automatically handles the following
+ * special conditions:
+ *
+ * + Interrupted system call (EINTR)
+ * + Write of less than requested amount
+ * + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional write is performed to automatically
+ * continue writing the requested data.
+ * There are also many cases where write(2) can return an unexpected
+ * error (e.g. EIO). Such errors cause a TEST_ASSERT failure.
+ *
+ * Note, for function signature compatibility with write(2), this function
+ * returns the number of bytes written, but that value will always be equal
+ * to the number of requested bytes. All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * write(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ * fd - Opened file descriptor to file to be written.
+ * count - Number of bytes to write.
+ *
+ * Output:
+ * buf - Starting address of data to be written.
+ *
+ * Return:
+ * On success, number of bytes written.
+ * On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_write(int fd, const void *buf, size_t count)
+{
+ ssize_t rc;
+ ssize_t num_written = 0;
+ size_t num_left = count;
+ const char *ptr = buf;
+
+ /* Note: Count of zero is allowed (see "RETURN VALUE" portion of
+ * write(2) manpage for details.
+ */
+ TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+ do {
+ rc = write(fd, ptr, num_left);
+
+ switch (rc) {
+ case -1:
+ TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+ "Unexpected write failure,\n"
+ " rc: %zi errno: %i", rc, errno);
+ continue;
+
+ case 0:
+ TEST_ASSERT(false, "Unexpected EOF,\n"
+ " rc: %zi num_written: %zi num_left: %zu",
+ rc, num_written, num_left);
+ break;
+
+ default:
+ TEST_ASSERT(rc >= 0, "Unexpected ret from write,\n"
+ " rc: %zi errno: %i", rc, errno);
+ num_written += rc;
+ num_left -= rc;
+ ptr += rc;
+ break;
+ }
+ } while (num_written < count);
+
+ return num_written;
+}
+
+/* Test Read
+ *
+ * A wrapper for read(2), that automatically handles the following
+ * special conditions:
+ *
+ * + Interrupted system call (EINTR)
+ * + Read of less than requested amount
+ * + Non-block return (EAGAIN)
+ *
+ * For each of the above, an additional read is performed to automatically
+ * continue reading the requested data.
+ * There are also many cases where read(2) can return an unexpected
+ * error (e.g. EIO). Such errors cause a TEST_ASSERT failure. Note,
+ * it is expected that the file opened by fd at the current file position
+ * contains at least the number of requested bytes to be read. A TEST_ASSERT
+ * failure is produced if an End-Of-File condition occurs, before all the
+ * data is read. It is the callers responsibility to assure that sufficient
+ * data exists.
+ *
+ * Note, for function signature compatibility with read(2), this function
+ * returns the number of bytes read, but that value will always be equal
+ * to the number of requested bytes. All other conditions in this and
+ * future enhancements to this function either automatically issue another
+ * read(2) or cause a TEST_ASSERT failure.
+ *
+ * Args:
+ * fd - Opened file descriptor to file to be read.
+ * count - Number of bytes to read.
+ *
+ * Output:
+ * buf - Starting address of where to write the bytes read.
+ *
+ * Return:
+ * On success, number of bytes read.
+ * On failure, a TEST_ASSERT failure is caused.
+ */
+ssize_t test_read(int fd, void *buf, size_t count)
+{
+ ssize_t rc;
+ ssize_t num_read = 0;
+ size_t num_left = count;
+ char *ptr = buf;
+
+ /* Note: Count of zero is allowed (see "If count is zero" portion of
+ * read(2) manpage for details.
+ */
+ TEST_ASSERT(count >= 0, "Unexpected count, count: %li", count);
+
+ do {
+ rc = read(fd, ptr, num_left);
+
+ switch (rc) {
+ case -1:
+ TEST_ASSERT(errno == EAGAIN || errno == EINTR,
+ "Unexpected read failure,\n"
+ " rc: %zi errno: %i", rc, errno);
+ break;
+
+ case 0:
+ TEST_ASSERT(false, "Unexpected EOF,\n"
+ " rc: %zi num_read: %zi num_left: %zu",
+ rc, num_read, num_left);
+ break;
+
+ default:
+ TEST_ASSERT(rc > 0, "Unexpected ret from read,\n"
+ " rc: %zi errno: %i", rc, errno);
+ num_read += rc;
+ num_left -= rc;
+ ptr += rc;
+ break;
+ }
+ } while (num_read < count);
+
+ return num_read;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
new file mode 100644
index 000000000..b138fd5e4
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -0,0 +1,1680 @@
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <linux/kernel.h>
+
+#define KVM_DEV_PATH "/dev/kvm"
+
+#define KVM_UTIL_PGS_PER_HUGEPG 512
+#define KVM_UTIL_MIN_PADDR 0x2000
+
+/* Aligns x up to the next multiple of size. Size must be a power of 2. */
+static void *align(void *x, size_t size)
+{
+ size_t mask = size - 1;
+ TEST_ASSERT(size != 0 && !(size & (size - 1)),
+ "size not a power of 2: %lu", size);
+ return (void *) (((size_t) x + mask) & ~mask);
+}
+
+/* Capability
+ *
+ * Input Args:
+ * cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return:
+ * On success, the Value corresponding to the capability (KVM_CAP_*)
+ * specified by the value of cap. On failure a TEST_ASSERT failure
+ * is produced.
+ *
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+int kvm_check_cap(long cap)
+{
+ int ret;
+ int kvm_fd;
+
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
+ TEST_ASSERT(ret >= 0, "KVM_CHECK_EXTENSION IOCTL failed,\n"
+ " rc: %i errno: %i", ret, errno);
+
+ close(kvm_fd);
+
+ return ret;
+}
+
+/* VM Enable Capability
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
+ *
+ * Enables a capability (KVM_CAP_*) on the VM.
+ */
+int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
+ TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
+ " rc: %i errno: %i", ret, errno);
+
+ return ret;
+}
+
+static void vm_open(struct kvm_vm *vm, int perm)
+{
+ vm->kvm_fd = open(KVM_DEV_PATH, perm);
+ if (vm->kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ /* Create VM. */
+ vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, NULL);
+ TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
+ "rc: %i errno: %i", vm->fd, errno);
+}
+
+/* VM Create
+ *
+ * Input Args:
+ * mode - VM Mode (e.g. VM_MODE_FLAT48PG)
+ * phy_pages - Physical memory pages
+ * perm - permission
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_FLAT48PG).
+ * When phy_pages is non-zero, a memory region of phy_pages physical pages
+ * is created and mapped starting at guest physical address 0. The file
+ * descriptor to control the created VM is created with the permissions
+ * given by perm (e.g. O_RDWR).
+ */
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+{
+ struct kvm_vm *vm;
+ int kvm_fd;
+
+ /* Allocate memory. */
+ vm = calloc(1, sizeof(*vm));
+ TEST_ASSERT(vm != NULL, "Insufficent Memory");
+
+ vm->mode = mode;
+ vm_open(vm, perm);
+
+ /* Setup mode specific traits. */
+ switch (vm->mode) {
+ case VM_MODE_FLAT48PG:
+ vm->page_size = 0x1000;
+ vm->page_shift = 12;
+
+ /* Limit to 48-bit canonical virtual addresses. */
+ vm->vpages_valid = sparsebit_alloc();
+ sparsebit_set_num(vm->vpages_valid,
+ 0, (1ULL << (48 - 1)) >> vm->page_shift);
+ sparsebit_set_num(vm->vpages_valid,
+ (~((1ULL << (48 - 1)) - 1)) >> vm->page_shift,
+ (1ULL << (48 - 1)) >> vm->page_shift);
+
+ /* Limit physical addresses to 52-bits. */
+ vm->max_gfn = ((1ULL << 52) >> vm->page_shift) - 1;
+ break;
+
+ default:
+ TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
+ }
+
+ /* Allocate and setup memory for guest. */
+ vm->vpages_mapped = sparsebit_alloc();
+ if (phy_pages != 0)
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ 0, 0, phy_pages, 0);
+
+ return vm;
+}
+
+/* VM Restart
+ *
+ * Input Args:
+ * vm - VM that has been released before
+ * perm - permission
+ *
+ * Output Args: None
+ *
+ * Reopens the file descriptors associated to the VM and reinstates the
+ * global state, such as the irqchip and the memory regions that are mapped
+ * into the guest.
+ */
+void kvm_vm_restart(struct kvm_vm *vmp, int perm)
+{
+ struct userspace_mem_region *region;
+
+ vm_open(vmp, perm);
+ if (vmp->has_irqchip)
+ vm_create_irqchip(vmp);
+
+ for (region = vmp->userspace_mem_region_head; region;
+ region = region->next) {
+ int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ " rc: %i errno: %i\n"
+ " slot: %u flags: 0x%x\n"
+ " guest_phys_addr: 0x%lx size: 0x%lx",
+ ret, errno, region->region.slot, region->region.flags,
+ region->region.guest_phys_addr,
+ region->region.memory_size);
+ }
+}
+
+void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+{
+ struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
+ TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
+ strerror(-ret));
+}
+
+/* Userspace Memory Region Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * start - Starting VM physical address
+ * end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to overlapping region, NULL if no such region.
+ *
+ * Searches for a region with any physical memory that overlaps with
+ * any portion of the guest physical addresses from start to end
+ * inclusive. If multiple overlapping regions exist, a pointer to any
+ * of the regions is returned. Null is returned only when no overlapping
+ * region exists.
+ */
+static struct userspace_mem_region *userspace_mem_region_find(
+ struct kvm_vm *vm, uint64_t start, uint64_t end)
+{
+ struct userspace_mem_region *region;
+
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ uint64_t existing_start = region->region.guest_phys_addr;
+ uint64_t existing_end = region->region.guest_phys_addr
+ + region->region.memory_size - 1;
+ if (start <= existing_end && end >= existing_start)
+ return region;
+ }
+
+ return NULL;
+}
+
+/* KVM Userspace Memory Region Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * start - Starting VM physical address
+ * end - Ending VM physical address, inclusive.
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to overlapping region, NULL if no such region.
+ *
+ * Public interface to userspace_mem_region_find. Allows tests to look up
+ * the memslot datastructure for a given range of guest physical memory.
+ */
+struct kvm_userspace_memory_region *
+kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
+ uint64_t end)
+{
+ struct userspace_mem_region *region;
+
+ region = userspace_mem_region_find(vm, start, end);
+ if (!region)
+ return NULL;
+
+ return &region->region;
+}
+
+/* VCPU Find
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to VCPU structure
+ *
+ * Locates a vcpu structure that describes the VCPU specified by vcpuid and
+ * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU
+ * for the specified vcpuid.
+ */
+struct vcpu *vcpu_find(struct kvm_vm *vm,
+ uint32_t vcpuid)
+{
+ struct vcpu *vcpup;
+
+ for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
+ if (vcpup->id == vcpuid)
+ return vcpup;
+ }
+
+ return NULL;
+}
+
+/* VM VCPU Remove
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None, TEST_ASSERT failures for all error conditions
+ *
+ * Within the VM specified by vm, removes the VCPU given by vcpuid.
+ */
+static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ ret = munmap(vcpu->state, sizeof(*vcpu->state));
+ TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
+ "errno: %i", ret, errno);
+ close(vcpu->fd);
+ TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
+ "errno: %i", ret, errno);
+
+ if (vcpu->next)
+ vcpu->next->prev = vcpu->prev;
+ if (vcpu->prev)
+ vcpu->prev->next = vcpu->next;
+ else
+ vm->vcpu_head = vcpu->next;
+ free(vcpu);
+}
+
+void kvm_vm_release(struct kvm_vm *vmp)
+{
+ int ret;
+
+ /* Free VCPUs. */
+ while (vmp->vcpu_head)
+ vm_vcpu_rm(vmp, vmp->vcpu_head->id);
+
+ /* Close file descriptor for the VM. */
+ ret = close(vmp->fd);
+ TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
+ " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
+
+ close(vmp->kvm_fd);
+ TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
+ " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
+}
+
+/* Destroys and frees the VM pointed to by vmp.
+ */
+void kvm_vm_free(struct kvm_vm *vmp)
+{
+ int ret;
+
+ if (vmp == NULL)
+ return;
+
+ /* Free userspace_mem_regions. */
+ while (vmp->userspace_mem_region_head) {
+ struct userspace_mem_region *region
+ = vmp->userspace_mem_region_head;
+
+ region->region.memory_size = 0;
+ ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
+ &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+
+ vmp->userspace_mem_region_head = region->next;
+ sparsebit_free(&region->unused_phy_pages);
+ ret = munmap(region->mmap_start, region->mmap_size);
+ TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
+ ret, errno);
+
+ free(region);
+ }
+
+ /* Free sparsebit arrays. */
+ sparsebit_free(&vmp->vpages_valid);
+ sparsebit_free(&vmp->vpages_mapped);
+
+ kvm_vm_release(vmp);
+
+ /* Free the structure describing the VM. */
+ free(vmp);
+}
+
+/* Memory Compare, host virtual to guest virtual
+ *
+ * Input Args:
+ * hva - Starting host virtual address
+ * vm - Virtual Machine
+ * gva - Starting guest virtual address
+ * len - number of bytes to compare
+ *
+ * Output Args: None
+ *
+ * Input/Output Args: None
+ *
+ * Return:
+ * Returns 0 if the bytes starting at hva for a length of len
+ * are equal the guest virtual bytes starting at gva. Returns
+ * a value < 0, if bytes at hva are less than those at gva.
+ * Otherwise a value > 0 is returned.
+ *
+ * Compares the bytes starting at the host virtual address hva, for
+ * a length of len, to the guest bytes starting at the guest virtual
+ * address given by gva.
+ */
+int kvm_memcmp_hva_gva(void *hva,
+ struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
+{
+ size_t amt;
+
+ /* Compare a batch of bytes until either a match is found
+ * or all the bytes have been compared.
+ */
+ for (uintptr_t offset = 0; offset < len; offset += amt) {
+ uintptr_t ptr1 = (uintptr_t)hva + offset;
+
+ /* Determine host address for guest virtual address
+ * at offset.
+ */
+ uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset);
+
+ /* Determine amount to compare on this pass.
+ * Don't allow the comparsion to cross a page boundary.
+ */
+ amt = len - offset;
+ if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift))
+ amt = vm->page_size - (ptr1 % vm->page_size);
+ if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift))
+ amt = vm->page_size - (ptr2 % vm->page_size);
+
+ assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift));
+ assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift));
+
+ /* Perform the comparison. If there is a difference
+ * return that result to the caller, otherwise need
+ * to continue on looking for a mismatch.
+ */
+ int ret = memcmp((void *)ptr1, (void *)ptr2, amt);
+ if (ret != 0)
+ return ret;
+ }
+
+ /* No mismatch found. Let the caller know the two memory
+ * areas are equal.
+ */
+ return 0;
+}
+
+/* Allocate an instance of struct kvm_cpuid2
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the allocated struct. The caller is responsible
+ * for freeing this struct.
+ *
+ * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
+ * array to be decided at allocation time, allocation is slightly
+ * complicated. This function uses a reasonable default length for
+ * the array and performs the appropriate allocation.
+ */
+static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
+{
+ struct kvm_cpuid2 *cpuid;
+ int nent = 100;
+ size_t size;
+
+ size = sizeof(*cpuid);
+ size += nent * sizeof(struct kvm_cpuid_entry2);
+ cpuid = malloc(size);
+ if (!cpuid) {
+ perror("malloc");
+ abort();
+ }
+
+ cpuid->nent = nent;
+
+ return cpuid;
+}
+
+/* KVM Supported CPUID Get
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ *
+ * Return: The supported KVM CPUID
+ *
+ * Get the guest CPUID supported by KVM.
+ */
+struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+{
+ static struct kvm_cpuid2 *cpuid;
+ int ret;
+ int kvm_fd;
+
+ if (cpuid)
+ return cpuid;
+
+ cpuid = allocate_kvm_cpuid2();
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
+ TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
+ ret, errno);
+
+ close(kvm_fd);
+ return cpuid;
+}
+
+/* Locate a cpuid entry.
+ *
+ * Input Args:
+ * cpuid: The cpuid.
+ * function: The function of the cpuid entry to find.
+ *
+ * Output Args: None
+ *
+ * Return: A pointer to the cpuid entry. Never returns NULL.
+ */
+struct kvm_cpuid_entry2 *
+kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
+{
+ struct kvm_cpuid2 *cpuid;
+ struct kvm_cpuid_entry2 *entry = NULL;
+ int i;
+
+ cpuid = kvm_get_supported_cpuid();
+ for (i = 0; i < cpuid->nent; i++) {
+ if (cpuid->entries[i].function == function &&
+ cpuid->entries[i].index == index) {
+ entry = &cpuid->entries[i];
+ break;
+ }
+ }
+
+ TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
+ function, index);
+ return entry;
+}
+
+/* VM Userspace Memory Region Add
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * backing_src - Storage source for this region.
+ * NULL to use anonymous memory.
+ * guest_paddr - Starting guest physical address
+ * slot - KVM region slot
+ * npages - Number of physical pages
+ * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Allocates a memory area of the number of pages specified by npages
+ * and maps it to the VM specified by vm, at a starting physical address
+ * given by guest_paddr. The region is created with a KVM region slot
+ * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The
+ * region is created with the flags given by flags.
+ */
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags)
+{
+ int ret;
+ unsigned long pmem_size = 0;
+ struct userspace_mem_region *region;
+ size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+
+ TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
+ "address not on a page boundary.\n"
+ " guest_paddr: 0x%lx vm->page_size: 0x%x",
+ guest_paddr, vm->page_size);
+ TEST_ASSERT((((guest_paddr >> vm->page_shift) + npages) - 1)
+ <= vm->max_gfn, "Physical range beyond maximum "
+ "supported physical address,\n"
+ " guest_paddr: 0x%lx npages: 0x%lx\n"
+ " vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ guest_paddr, npages, vm->max_gfn, vm->page_size);
+
+ /* Confirm a mem region with an overlapping address doesn't
+ * already exist.
+ */
+ region = (struct userspace_mem_region *) userspace_mem_region_find(
+ vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
+ if (region != NULL)
+ TEST_ASSERT(false, "overlapping userspace_mem_region already "
+ "exists\n"
+ " requested guest_paddr: 0x%lx npages: 0x%lx "
+ "page_size: 0x%x\n"
+ " existing guest_paddr: 0x%lx size: 0x%lx",
+ guest_paddr, npages, vm->page_size,
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size);
+
+ /* Confirm no region with the requested slot already exists. */
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if (region->region.slot == slot)
+ break;
+ }
+ if (region != NULL)
+ TEST_ASSERT(false, "A mem region with the requested slot "
+ "already exists.\n"
+ " requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
+ " existing slot: %u paddr: 0x%lx size: 0x%lx",
+ slot, guest_paddr, npages,
+ region->region.slot,
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size);
+
+ /* Allocate and initialize new mem region structure. */
+ region = calloc(1, sizeof(*region));
+ TEST_ASSERT(region != NULL, "Insufficient Memory");
+ region->mmap_size = npages * vm->page_size;
+
+ /* Enough memory to align up to a huge page. */
+ if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
+ region->mmap_size += huge_page_size;
+ region->mmap_start = mmap(NULL, region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS
+ | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
+ -1, 0);
+ TEST_ASSERT(region->mmap_start != MAP_FAILED,
+ "test_malloc failed, mmap_start: %p errno: %i",
+ region->mmap_start, errno);
+
+ /* Align THP allocation up to start of a huge page. */
+ region->host_mem = align(region->mmap_start,
+ src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1);
+
+ /* As needed perform madvise */
+ if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
+ ret = madvise(region->host_mem, npages * vm->page_size,
+ src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+ TEST_ASSERT(ret == 0, "madvise failed,\n"
+ " addr: %p\n"
+ " length: 0x%lx\n"
+ " src_type: %x",
+ region->host_mem, npages * vm->page_size, src_type);
+ }
+
+ region->unused_phy_pages = sparsebit_alloc();
+ sparsebit_set_num(region->unused_phy_pages,
+ guest_paddr >> vm->page_shift, npages);
+ region->region.slot = slot;
+ region->region.flags = flags;
+ region->region.guest_phys_addr = guest_paddr;
+ region->region.memory_size = npages * vm->page_size;
+ region->region.userspace_addr = (uintptr_t) region->host_mem;
+ ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ " rc: %i errno: %i\n"
+ " slot: %u flags: 0x%x\n"
+ " guest_phys_addr: 0x%lx size: 0x%lx",
+ ret, errno, slot, flags,
+ guest_paddr, (uint64_t) region->region.memory_size);
+
+ /* Add to linked-list of memory regions. */
+ if (vm->userspace_mem_region_head)
+ vm->userspace_mem_region_head->prev = region;
+ region->next = vm->userspace_mem_region_head;
+ vm->userspace_mem_region_head = region;
+}
+
+/* Memslot to region
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * memslot - KVM memory slot ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to memory region structure that describe memory region
+ * using kvm memory slot ID given by memslot. TEST_ASSERT failure
+ * on error (e.g. currently no memory region using memslot as a KVM
+ * memory slot ID).
+ */
+static struct userspace_mem_region *memslot2region(struct kvm_vm *vm,
+ uint32_t memslot)
+{
+ struct userspace_mem_region *region;
+
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if (region->region.slot == memslot)
+ break;
+ }
+ if (region == NULL) {
+ fprintf(stderr, "No mem region with the requested slot found,\n"
+ " requested slot: %u\n", memslot);
+ fputs("---- vm dump ----\n", stderr);
+ vm_dump(stderr, vm, 2);
+ TEST_ASSERT(false, "Mem region not found");
+ }
+
+ return region;
+}
+
+/* VM Memory Region Flags Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * flags - Starting guest physical address
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the flags of the memory region specified by the value of slot,
+ * to the values given by flags.
+ */
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
+{
+ int ret;
+ struct userspace_mem_region *region;
+
+ /* Locate memory region. */
+ region = memslot2region(vm, slot);
+
+ region->region.flags = flags;
+
+ ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ " rc: %i errno: %i slot: %u flags: 0x%x",
+ ret, errno, slot, flags);
+}
+
+/* VCPU mmap Size
+ *
+ * Input Args: None
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Size of VCPU state
+ *
+ * Returns the size of the structure pointed to by the return value
+ * of vcpu_state().
+ */
+static int vcpu_mmap_sz(void)
+{
+ int dev_fd, ret;
+
+ dev_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (dev_fd < 0)
+ exit(KSFT_SKIP);
+
+ ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
+ TEST_ASSERT(ret >= sizeof(struct kvm_run),
+ "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
+ __func__, ret, errno);
+
+ close(dev_fd);
+
+ return ret;
+}
+
+/* VM VCPU Add
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates and adds to the VM specified by vm and virtual CPU with
+ * the ID given by vcpuid.
+ */
+void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot, int gdt_memslot)
+{
+ struct vcpu *vcpu;
+
+ /* Confirm a vcpu with the specified id doesn't already exist. */
+ vcpu = vcpu_find(vm, vcpuid);
+ if (vcpu != NULL)
+ TEST_ASSERT(false, "vcpu with the specified id "
+ "already exists,\n"
+ " requested vcpuid: %u\n"
+ " existing vcpuid: %u state: %p",
+ vcpuid, vcpu->id, vcpu->state);
+
+ /* Allocate and initialize new vcpu structure. */
+ vcpu = calloc(1, sizeof(*vcpu));
+ TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
+ vcpu->id = vcpuid;
+ vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
+ TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
+ vcpu->fd, errno);
+
+ TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
+ "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
+ vcpu_mmap_sz(), sizeof(*vcpu->state));
+ vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+ PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
+ TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
+ "vcpu id: %u errno: %i", vcpuid, errno);
+
+ /* Add to linked-list of VCPUs. */
+ if (vm->vcpu_head)
+ vm->vcpu_head->prev = vcpu;
+ vcpu->next = vm->vcpu_head;
+ vm->vcpu_head = vcpu;
+
+ vcpu_setup(vm, vcpuid, pgd_memslot, gdt_memslot);
+}
+
+/* VM Virtual Address Unused Gap
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * sz - Size (bytes)
+ * vaddr_min - Minimum Virtual Address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Lowest virtual address at or below vaddr_min, with at least
+ * sz unused bytes. TEST_ASSERT failure if no area of at least
+ * size sz is available.
+ *
+ * Within the VM specified by vm, locates the lowest starting virtual
+ * address >= vaddr_min, that has at least sz unallocated bytes. A
+ * TEST_ASSERT failure occurs for invalid input or no area of at least
+ * sz unallocated bytes >= vaddr_min is available.
+ */
+static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min)
+{
+ uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
+
+ /* Determine lowest permitted virtual page index. */
+ uint64_t pgidx_start = (vaddr_min + vm->page_size - 1) >> vm->page_shift;
+ if ((pgidx_start * vm->page_size) < vaddr_min)
+ goto no_va_found;
+
+ /* Loop over section with enough valid virtual page indexes. */
+ if (!sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages))
+ pgidx_start = sparsebit_next_set_num(vm->vpages_valid,
+ pgidx_start, pages);
+ do {
+ /*
+ * Are there enough unused virtual pages available at
+ * the currently proposed starting virtual page index.
+ * If not, adjust proposed starting index to next
+ * possible.
+ */
+ if (sparsebit_is_clear_num(vm->vpages_mapped,
+ pgidx_start, pages))
+ goto va_found;
+ pgidx_start = sparsebit_next_clear_num(vm->vpages_mapped,
+ pgidx_start, pages);
+ if (pgidx_start == 0)
+ goto no_va_found;
+
+ /*
+ * If needed, adjust proposed starting virtual address,
+ * to next range of valid virtual addresses.
+ */
+ if (!sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages)) {
+ pgidx_start = sparsebit_next_set_num(
+ vm->vpages_valid, pgidx_start, pages);
+ if (pgidx_start == 0)
+ goto no_va_found;
+ }
+ } while (pgidx_start != 0);
+
+no_va_found:
+ TEST_ASSERT(false, "No vaddr of specified pages available, "
+ "pages: 0x%lx", pages);
+
+ /* NOT REACHED */
+ return -1;
+
+va_found:
+ TEST_ASSERT(sparsebit_is_set_num(vm->vpages_valid,
+ pgidx_start, pages),
+ "Unexpected, invalid virtual page index range,\n"
+ " pgidx_start: 0x%lx\n"
+ " pages: 0x%lx",
+ pgidx_start, pages);
+ TEST_ASSERT(sparsebit_is_clear_num(vm->vpages_mapped,
+ pgidx_start, pages),
+ "Unexpected, pages already mapped,\n"
+ " pgidx_start: 0x%lx\n"
+ " pages: 0x%lx",
+ pgidx_start, pages);
+
+ return pgidx_start * vm->page_size;
+}
+
+/* VM Virtual Address Allocate
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * sz - Size in bytes
+ * vaddr_min - Minimum starting virtual address
+ * data_memslot - Memory region slot for data pages
+ * pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least sz bytes within the virtual address space of the vm
+ * given by vm. The allocated bytes are mapped to a virtual address >=
+ * the address given by vaddr_min. Note that each allocation uses a
+ * a unique set of pages, with the minimum real allocation being at least
+ * a page.
+ */
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ uint32_t data_memslot, uint32_t pgd_memslot)
+{
+ uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
+
+ virt_pgd_alloc(vm, pgd_memslot);
+
+ /* Find an unused range of virtual page addresses of at least
+ * pages in length.
+ */
+ vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
+
+ /* Map the virtual pages. */
+ for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
+ pages--, vaddr += vm->page_size) {
+ vm_paddr_t paddr;
+
+ paddr = vm_phy_page_alloc(vm, KVM_UTIL_MIN_PADDR, data_memslot);
+
+ virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+
+ sparsebit_set(vm->vpages_mapped,
+ vaddr >> vm->page_shift);
+ }
+
+ return vaddr_start;
+}
+
+/*
+ * Map a range of VM virtual address to the VM's physical address
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - Virtuall address to map
+ * paddr - VM Physical Address
+ * size - The size of the range to map
+ * pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the
+ * page range starting at vaddr to the page range starting at paddr.
+ */
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ size_t size, uint32_t pgd_memslot)
+{
+ size_t page_size = vm->page_size;
+ size_t npages = size / page_size;
+
+ TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
+ TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
+
+ while (npages--) {
+ virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+ vaddr += page_size;
+ paddr += page_size;
+ }
+}
+
+/* Address VM Physical to Host Virtual
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gpa - VM physical address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent host virtual address
+ *
+ * Locates the memory region containing the VM physical address given
+ * by gpa, within the VM given by vm. When found, the host virtual
+ * address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing gpa exists.
+ */
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+ struct userspace_mem_region *region;
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if ((gpa >= region->region.guest_phys_addr)
+ && (gpa <= (region->region.guest_phys_addr
+ + region->region.memory_size - 1)))
+ return (void *) ((uintptr_t) region->host_mem
+ + (gpa - region->region.guest_phys_addr));
+ }
+
+ TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa);
+ return NULL;
+}
+
+/* Address Host Virtual to VM Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * hva - Host virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Locates the memory region containing the host virtual address given
+ * by hva, within the VM given by vm. When found, the equivalent
+ * VM physical address is returned. A TEST_ASSERT failure occurs if no
+ * region containing hva exists.
+ */
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
+{
+ struct userspace_mem_region *region;
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ if ((hva >= region->host_mem)
+ && (hva <= (region->host_mem
+ + region->region.memory_size - 1)))
+ return (vm_paddr_t) ((uintptr_t)
+ region->region.guest_phys_addr
+ + (hva - (uintptr_t) region->host_mem));
+ }
+
+ TEST_ASSERT(false, "No mapping to a guest physical address, "
+ "hva: %p", hva);
+ return -1;
+}
+
+/* VM Create IRQ Chip
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Creates an interrupt controller chip for the VM specified by vm.
+ */
+void vm_create_irqchip(struct kvm_vm *vm)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
+ TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+
+ vm->has_irqchip = true;
+}
+
+/* VM VCPU State
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to structure that describes the state of the VCPU.
+ *
+ * Locates and returns a pointer to a structure that describes the
+ * state of the VCPU with the given vcpuid.
+ */
+struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ return vcpu->state;
+}
+
+/* VM VCPU Run
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Switch to executing the code for the VCPU given by vcpuid, within the VM
+ * given by vm.
+ */
+void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ int ret = _vcpu_run(vm, vcpuid);
+ TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int rc;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ do {
+ rc = ioctl(vcpu->fd, KVM_RUN, NULL);
+ } while (rc == -1 && errno == EINTR);
+ return rc;
+}
+
+/* VM VCPU Set MP State
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * mp_state - mp_state to be set
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the MP state of the VCPU given by vcpuid, to the state given
+ * by mp_state.
+ */
+void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_mp_state *mp_state)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
+ TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+/* VM VCPU Regs Get
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args:
+ * regs - current state of VCPU regs
+ *
+ * Return: None
+ *
+ * Obtains the current register state for the VCPU specified by vcpuid
+ * and stores it at the location given by regs.
+ */
+void vcpu_regs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
+ TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/* VM VCPU Regs Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * regs - Values to set VCPU regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the regs of the VCPU specified by vcpuid to the values
+ * given by regs.
+ */
+void vcpu_regs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_regs *regs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Set the regs. */
+ ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
+ TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
+ TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_vcpu_events *events)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Set the regs. */
+ ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
+ TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/* VCPU Get MSR
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * msr_index - Index of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
+ *
+ * Get value of MSR for VCPU.
+ */
+uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+ int r;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
+ " rc: %i errno: %i", r, errno);
+
+ return buffer.entry.data;
+}
+
+/* VCPU Set MSR
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * msr_index - Index of MSR
+ * msr_value - New value of MSR
+ *
+ * Output Args: None
+ *
+ * Return: On success, nothing. On failure a TEST_ASSERT is produced.
+ *
+ * Set value of MSR for VCPU.
+ */
+void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
+ uint64_t msr_value)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+ int r;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ memset(&buffer, 0, sizeof(buffer));
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ buffer.entry.data = msr_value;
+ r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
+ " rc: %i errno: %i", r, errno);
+}
+
+/* VM VCPU Args Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first num function input arguments to the values
+ * given as variable args. Each of the variable args is expected to
+ * be of type uint64_t.
+ */
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+ va_list ap;
+ struct kvm_regs regs;
+
+ TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
+ " num: %u\n",
+ num);
+
+ va_start(ap, num);
+ vcpu_regs_get(vm, vcpuid, &regs);
+
+ if (num >= 1)
+ regs.rdi = va_arg(ap, uint64_t);
+
+ if (num >= 2)
+ regs.rsi = va_arg(ap, uint64_t);
+
+ if (num >= 3)
+ regs.rdx = va_arg(ap, uint64_t);
+
+ if (num >= 4)
+ regs.rcx = va_arg(ap, uint64_t);
+
+ if (num >= 5)
+ regs.r8 = va_arg(ap, uint64_t);
+
+ if (num >= 6)
+ regs.r9 = va_arg(ap, uint64_t);
+
+ vcpu_regs_set(vm, vcpuid, &regs);
+ va_end(ap);
+}
+
+/* VM VCPU System Regs Get
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args:
+ * sregs - current state of VCPU system regs
+ *
+ * Return: None
+ *
+ * Obtains the current system register state for the VCPU specified by
+ * vcpuid and stores it at the location given by sregs.
+ */
+void vcpu_sregs_get(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ /* Get the regs. */
+ ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
+ TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
+ ret, errno);
+}
+
+/* VM VCPU System Regs Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * sregs - Values to set VCPU system regs to
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the system regs of the VCPU specified by vcpuid to the values
+ * given by sregs.
+ */
+void vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+ int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
+ TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+}
+
+int _vcpu_sregs_set(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_sregs *sregs)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ /* Get the regs. */
+ return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
+}
+
+/* VCPU Ioctl
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * cmd - Ioctl number
+ * arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VCPU fd.
+ */
+void vcpu_ioctl(struct kvm_vm *vm,
+ uint32_t vcpuid, unsigned long cmd, void *arg)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ ret = ioctl(vcpu->fd, cmd, arg);
+ TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
+ cmd, ret, errno, strerror(errno));
+}
+
+/* VM Ioctl
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cmd - Ioctl number
+ * arg - Argument to pass to the ioctl
+ *
+ * Return: None
+ *
+ * Issues an arbitrary ioctl on a VM fd.
+ */
+void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+ int ret;
+
+ ret = ioctl(vm->fd, cmd, arg);
+ TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
+ cmd, ret, errno, strerror(errno));
+}
+
+/* VM Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VM given by vm, to the FILE stream
+ * given by stream.
+ */
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ struct userspace_mem_region *region;
+ struct vcpu *vcpu;
+
+ fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
+ fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
+ fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
+ fprintf(stream, "%*sMem Regions:\n", indent, "");
+ for (region = vm->userspace_mem_region_head; region;
+ region = region->next) {
+ fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
+ "host_virt: %p\n", indent + 2, "",
+ (uint64_t) region->region.guest_phys_addr,
+ (uint64_t) region->region.memory_size,
+ region->host_mem);
+ fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
+ sparsebit_dump(stream, region->unused_phy_pages, 0);
+ }
+ fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
+ sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
+ fprintf(stream, "%*spgd_created: %u\n", indent, "",
+ vm->pgd_created);
+ if (vm->pgd_created) {
+ fprintf(stream, "%*sVirtual Translation Tables:\n",
+ indent + 2, "");
+ virt_dump(stream, vm, indent + 4);
+ }
+ fprintf(stream, "%*sVCPUs:\n", indent, "");
+ for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
+ vcpu_dump(stream, vm, vcpu->id, indent + 2);
+}
+
+/* VM VCPU Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by vcpuid, within the VM
+ * given by vm, to the FILE stream given by stream.
+ */
+void vcpu_dump(FILE *stream, struct kvm_vm *vm,
+ uint32_t vcpuid, uint8_t indent)
+{
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+
+ fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+
+ fprintf(stream, "%*sregs:\n", indent + 2, "");
+ vcpu_regs_get(vm, vcpuid, &regs);
+ regs_dump(stream, &regs, indent + 4);
+
+ fprintf(stream, "%*ssregs:\n", indent + 2, "");
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+ sregs_dump(stream, &sregs, indent + 4);
+}
+
+/* Known KVM exit reasons */
+static struct exit_reason {
+ unsigned int reason;
+ const char *name;
+} exit_reasons_known[] = {
+ {KVM_EXIT_UNKNOWN, "UNKNOWN"},
+ {KVM_EXIT_EXCEPTION, "EXCEPTION"},
+ {KVM_EXIT_IO, "IO"},
+ {KVM_EXIT_HYPERCALL, "HYPERCALL"},
+ {KVM_EXIT_DEBUG, "DEBUG"},
+ {KVM_EXIT_HLT, "HLT"},
+ {KVM_EXIT_MMIO, "MMIO"},
+ {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
+ {KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
+ {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
+ {KVM_EXIT_INTR, "INTR"},
+ {KVM_EXIT_SET_TPR, "SET_TPR"},
+ {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
+ {KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
+ {KVM_EXIT_S390_RESET, "S390_RESET"},
+ {KVM_EXIT_DCR, "DCR"},
+ {KVM_EXIT_NMI, "NMI"},
+ {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
+ {KVM_EXIT_OSI, "OSI"},
+ {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
+ {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
+#endif
+};
+
+/* Exit Reason String
+ *
+ * Input Args:
+ * exit_reason - Exit reason
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Constant string pointer describing the exit reason.
+ *
+ * Locates and returns a constant string that describes the KVM exit
+ * reason given by exit_reason. If no such string is found, a constant
+ * string of "Unknown" is returned.
+ */
+const char *exit_reason_str(unsigned int exit_reason)
+{
+ unsigned int n1;
+
+ for (n1 = 0; n1 < ARRAY_SIZE(exit_reasons_known); n1++) {
+ if (exit_reason == exit_reasons_known[n1].reason)
+ return exit_reasons_known[n1].name;
+ }
+
+ return "Unknown";
+}
+
+/* Physical Page Allocate
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * paddr_min - Physical address minimum
+ * memslot - Memory region to allocate page from
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting physical address
+ *
+ * Within the VM specified by vm, locates an available physical page
+ * at or above paddr_min. If found, the page is marked as in use
+ * and its address is returned. A TEST_ASSERT failure occurs if no
+ * page is available at or above paddr_min.
+ */
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm,
+ vm_paddr_t paddr_min, uint32_t memslot)
+{
+ struct userspace_mem_region *region;
+ sparsebit_idx_t pg;
+
+ TEST_ASSERT((paddr_min % vm->page_size) == 0, "Min physical address "
+ "not divisible by page size.\n"
+ " paddr_min: 0x%lx page_size: 0x%x",
+ paddr_min, vm->page_size);
+
+ /* Locate memory region. */
+ region = memslot2region(vm, memslot);
+
+ /* Locate next available physical page at or above paddr_min. */
+ pg = paddr_min >> vm->page_shift;
+
+ if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
+ pg = sparsebit_next_set(region->unused_phy_pages, pg);
+ if (pg == 0) {
+ fprintf(stderr, "No guest physical page available, "
+ "paddr_min: 0x%lx page_size: 0x%x memslot: %u",
+ paddr_min, vm->page_size, memslot);
+ fputs("---- vm dump ----\n", stderr);
+ vm_dump(stderr, vm, 2);
+ abort();
+ }
+ }
+
+ /* Specify page as in use and return its address. */
+ sparsebit_clear(region->unused_phy_pages, pg);
+
+ return pg * vm->page_size;
+}
+
+/* Address Guest Virtual to Host Virtual
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent host virtual address
+ */
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
+}
+
+void guest_args_read(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct guest_args *args)
+{
+ struct kvm_run *run = vcpu_state(vm, vcpu_id);
+ struct kvm_regs regs;
+
+ memset(&regs, 0, sizeof(regs));
+ vcpu_regs_get(vm, vcpu_id, &regs);
+
+ args->port = run->io.port;
+ args->arg0 = regs.rdi;
+ args->arg1 = regs.rsi;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
new file mode 100644
index 000000000..542ed606b
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -0,0 +1,72 @@
+/*
+ * tools/testing/selftests/kvm/lib/kvm_util.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#ifndef KVM_UTIL_INTERNAL_H
+#define KVM_UTIL_INTERNAL_H 1
+
+#include "sparsebit.h"
+
+#ifndef BITS_PER_BYTE
+#define BITS_PER_BYTE 8
+#endif
+
+#ifndef BITS_PER_LONG
+#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
+#endif
+
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
+
+/* Concrete definition of struct kvm_vm. */
+struct userspace_mem_region {
+ struct userspace_mem_region *next, *prev;
+ struct kvm_userspace_memory_region region;
+ struct sparsebit *unused_phy_pages;
+ int fd;
+ off_t offset;
+ void *host_mem;
+ void *mmap_start;
+ size_t mmap_size;
+};
+
+struct vcpu {
+ struct vcpu *next, *prev;
+ uint32_t id;
+ int fd;
+ struct kvm_run *state;
+};
+
+struct kvm_vm {
+ int mode;
+ int kvm_fd;
+ int fd;
+ unsigned int page_size;
+ unsigned int page_shift;
+ uint64_t max_gfn;
+ struct vcpu *vcpu_head;
+ struct userspace_mem_region *userspace_mem_region_head;
+ struct sparsebit *vpages_valid;
+ struct sparsebit *vpages_mapped;
+
+ bool has_irqchip;
+ bool pgd_created;
+ vm_paddr_t pgd;
+ vm_vaddr_t gdt;
+ vm_vaddr_t tss;
+};
+
+struct vcpu *vcpu_find(struct kvm_vm *vm,
+ uint32_t vcpuid);
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot);
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+void regs_dump(FILE *stream, struct kvm_regs *regs,
+ uint8_t indent);
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
+ uint8_t indent);
+
+#endif
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
new file mode 100644
index 000000000..b132bc95d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -0,0 +1,2087 @@
+/*
+ * Sparse bit array
+ *
+ * Copyright (C) 2018, Google LLC.
+ * Copyright (C) 2018, Red Hat, Inc. (code style cleanup and fuzzing driver)
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * This library provides functions to support a memory efficient bit array,
+ * with an index size of 2^64. A sparsebit array is allocated through
+ * the use sparsebit_alloc() and free'd via sparsebit_free(),
+ * such as in the following:
+ *
+ * struct sparsebit *s;
+ * s = sparsebit_alloc();
+ * sparsebit_free(&s);
+ *
+ * The struct sparsebit type resolves down to a struct sparsebit.
+ * Note that, sparsebit_free() takes a pointer to the sparsebit
+ * structure. This is so that sparsebit_free() is able to poison
+ * the pointer (e.g. set it to NULL) to the struct sparsebit before
+ * returning to the caller.
+ *
+ * Between the return of sparsebit_alloc() and the call of
+ * sparsebit_free(), there are multiple query and modifying operations
+ * that can be performed on the allocated sparsebit array. All of
+ * these operations take as a parameter the value returned from
+ * sparsebit_alloc() and most also take a bit index. Frequently
+ * used routines include:
+ *
+ * ---- Query Operations
+ * sparsebit_is_set(s, idx)
+ * sparsebit_is_clear(s, idx)
+ * sparsebit_any_set(s)
+ * sparsebit_first_set(s)
+ * sparsebit_next_set(s, prev_idx)
+ *
+ * ---- Modifying Operations
+ * sparsebit_set(s, idx)
+ * sparsebit_clear(s, idx)
+ * sparsebit_set_num(s, idx, num);
+ * sparsebit_clear_num(s, idx, num);
+ *
+ * A common operation, is to itterate over all the bits set in a test
+ * sparsebit array. This can be done via code with the following structure:
+ *
+ * sparsebit_idx_t idx;
+ * if (sparsebit_any_set(s)) {
+ * idx = sparsebit_first_set(s);
+ * do {
+ * ...
+ * idx = sparsebit_next_set(s, idx);
+ * } while (idx != 0);
+ * }
+ *
+ * The index of the first bit set needs to be obtained via
+ * sparsebit_first_set(), because sparsebit_next_set(), needs
+ * the index of the previously set. The sparsebit_idx_t type is
+ * unsigned, so there is no previous index before 0 that is available.
+ * Also, the call to sparsebit_first_set() is not made unless there
+ * is at least 1 bit in the array set. This is because sparsebit_first_set()
+ * aborts if sparsebit_first_set() is called with no bits set.
+ * It is the callers responsibility to assure that the
+ * sparsebit array has at least a single bit set before calling
+ * sparsebit_first_set().
+ *
+ * ==== Implementation Overview ====
+ * For the most part the internal implementation of sparsebit is
+ * opaque to the caller. One important implementation detail that the
+ * caller may need to be aware of is the spatial complexity of the
+ * implementation. This implementation of a sparsebit array is not
+ * only sparse, in that it uses memory proportional to the number of bits
+ * set. It is also efficient in memory usage when most of the bits are
+ * set.
+ *
+ * At a high-level the state of the bit settings are maintained through
+ * the use of a binary-search tree, where each node contains at least
+ * the following members:
+ *
+ * typedef uint64_t sparsebit_idx_t;
+ * typedef uint64_t sparsebit_num_t;
+ *
+ * sparsebit_idx_t idx;
+ * uint32_t mask;
+ * sparsebit_num_t num_after;
+ *
+ * The idx member contains the bit index of the first bit described by this
+ * node, while the mask member stores the setting of the first 32-bits.
+ * The setting of the bit at idx + n, where 0 <= n < 32, is located in the
+ * mask member at 1 << n.
+ *
+ * Nodes are sorted by idx and the bits described by two nodes will never
+ * overlap. The idx member is always aligned to the mask size, i.e. a
+ * multiple of 32.
+ *
+ * Beyond a typical implementation, the nodes in this implementation also
+ * contains a member named num_after. The num_after member holds the
+ * number of bits immediately after the mask bits that are contiguously set.
+ * The use of the num_after member allows this implementation to efficiently
+ * represent cases where most bits are set. For example, the case of all
+ * but the last two bits set, is represented by the following two nodes:
+ *
+ * node 0 - idx: 0x0 mask: 0xffffffff num_after: 0xffffffffffffffc0
+ * node 1 - idx: 0xffffffffffffffe0 mask: 0x3fffffff num_after: 0
+ *
+ * ==== Invariants ====
+ * This implementation usses the following invariants:
+ *
+ * + Node are only used to represent bits that are set.
+ * Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ * + Sum of bits set in all the nodes is equal to the value of
+ * the struct sparsebit_pvt num_set member.
+ *
+ * + The setting of at least one bit is always described in a nodes
+ * mask (mask >= 1).
+ *
+ * + A node with all mask bits set only occurs when the last bit
+ * described by the previous node is not equal to this nodes
+ * starting index - 1. All such occurences of this condition are
+ * avoided by moving the setting of the nodes mask bits into
+ * the previous nodes num_after setting.
+ *
+ * + Node starting index is evenly divisible by the number of bits
+ * within a nodes mask member.
+ *
+ * + Nodes never represent a range of bits that wrap around the
+ * highest supported index.
+ *
+ * (idx + MASK_BITS + num_after - 1) <= ((sparsebit_idx_t) 0) - 1)
+ *
+ * As a consequence of the above, the num_after member of a node
+ * will always be <=:
+ *
+ * maximum_index - nodes_starting_index - number_of_mask_bits
+ *
+ * + Nodes within the binary search tree are sorted based on each
+ * nodes starting index.
+ *
+ * + The range of bits described by any two nodes do not overlap. The
+ * range of bits described by a single node is:
+ *
+ * start: node->idx
+ * end (inclusive): node->idx + MASK_BITS + node->num_after - 1;
+ *
+ * Note, at times these invariants are temporarily violated for a
+ * specific portion of the code. For example, when setting a mask
+ * bit, there is a small delay between when the mask bit is set and the
+ * value in the struct sparsebit_pvt num_set member is updated. Other
+ * temporary violations occur when node_split() is called with a specified
+ * index and assures that a node where its mask represents the bit
+ * at the specified index exists. At times to do this node_split()
+ * must split an existing node into two nodes or create a node that
+ * has no bits set. Such temporary violations must be corrected before
+ * returning to the caller. These corrections are typically performed
+ * by the local function node_reduce().
+ */
+
+#include "test_util.h"
+#include "sparsebit.h"
+#include <limits.h>
+#include <assert.h>
+
+#define DUMP_LINE_MAX 100 /* Does not include indent amount */
+
+typedef uint32_t mask_t;
+#define MASK_BITS (sizeof(mask_t) * CHAR_BIT)
+
+struct node {
+ struct node *parent;
+ struct node *left;
+ struct node *right;
+ sparsebit_idx_t idx; /* index of least-significant bit in mask */
+ sparsebit_num_t num_after; /* num contiguously set after mask */
+ mask_t mask;
+};
+
+struct sparsebit {
+ /*
+ * Points to root node of the binary search
+ * tree. Equal to NULL when no bits are set in
+ * the entire sparsebit array.
+ */
+ struct node *root;
+
+ /*
+ * A redundant count of the total number of bits set. Used for
+ * diagnostic purposes and to change the time complexity of
+ * sparsebit_num_set() from O(n) to O(1).
+ * Note: Due to overflow, a value of 0 means none or all set.
+ */
+ sparsebit_num_t num_set;
+};
+
+/* Returns the number of set bits described by the settings
+ * of the node pointed to by nodep.
+ */
+static sparsebit_num_t node_num_set(struct node *nodep)
+{
+ return nodep->num_after + __builtin_popcount(nodep->mask);
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index.
+ */
+static struct node *node_first(struct sparsebit *s)
+{
+ struct node *nodep;
+
+ for (nodep = s->root; nodep && nodep->left; nodep = nodep->left)
+ ;
+
+ return nodep;
+}
+
+/* Returns a pointer to the node that describes the
+ * lowest bit index > the index of the node pointed to by np.
+ * Returns NULL if no node with a higher index exists.
+ */
+static struct node *node_next(struct sparsebit *s, struct node *np)
+{
+ struct node *nodep = np;
+
+ /*
+ * If current node has a right child, next node is the left-most
+ * of the right child.
+ */
+ if (nodep->right) {
+ for (nodep = nodep->right; nodep->left; nodep = nodep->left)
+ ;
+ return nodep;
+ }
+
+ /*
+ * No right child. Go up until node is left child of a parent.
+ * That parent is then the next node.
+ */
+ while (nodep->parent && nodep == nodep->parent->right)
+ nodep = nodep->parent;
+
+ return nodep->parent;
+}
+
+/* Searches for and returns a pointer to the node that describes the
+ * highest index < the index of the node pointed to by np.
+ * Returns NULL if no node with a lower index exists.
+ */
+static struct node *node_prev(struct sparsebit *s, struct node *np)
+{
+ struct node *nodep = np;
+
+ /*
+ * If current node has a left child, next node is the right-most
+ * of the left child.
+ */
+ if (nodep->left) {
+ for (nodep = nodep->left; nodep->right; nodep = nodep->right)
+ ;
+ return (struct node *) nodep;
+ }
+
+ /*
+ * No left child. Go up until node is right child of a parent.
+ * That parent is then the next node.
+ */
+ while (nodep->parent && nodep == nodep->parent->left)
+ nodep = nodep->parent;
+
+ return (struct node *) nodep->parent;
+}
+
+
+/* Allocates space to hold a copy of the node sub-tree pointed to by
+ * subtree and duplicates the bit settings to the newly allocated nodes.
+ * Returns the newly allocated copy of subtree.
+ */
+static struct node *node_copy_subtree(struct node *subtree)
+{
+ struct node *root;
+
+ /* Duplicate the node at the root of the subtree */
+ root = calloc(1, sizeof(*root));
+ if (!root) {
+ perror("calloc");
+ abort();
+ }
+
+ root->idx = subtree->idx;
+ root->mask = subtree->mask;
+ root->num_after = subtree->num_after;
+
+ /* As needed, recursively duplicate the left and right subtrees */
+ if (subtree->left) {
+ root->left = node_copy_subtree(subtree->left);
+ root->left->parent = root;
+ }
+
+ if (subtree->right) {
+ root->right = node_copy_subtree(subtree->right);
+ root->right->parent = root;
+ }
+
+ return root;
+}
+
+/* Searches for and returns a pointer to the node that describes the setting
+ * of the bit given by idx. A node describes the setting of a bit if its
+ * index is within the bits described by the mask bits or the number of
+ * contiguous bits set after the mask. Returns NULL if there is no such node.
+ */
+static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Find the node that describes the setting of the bit at idx */
+ for (nodep = s->root; nodep;
+ nodep = nodep->idx > idx ? nodep->left : nodep->right) {
+ if (idx >= nodep->idx &&
+ idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+ break;
+ }
+
+ return nodep;
+}
+
+/* Entry Requirements:
+ * + A node that describes the setting of idx is not already present.
+ *
+ * Adds a new node to describe the setting of the bit at the index given
+ * by idx. Returns a pointer to the newly added node.
+ *
+ * TODO(lhuemill): Degenerate cases causes the tree to get unbalanced.
+ */
+static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep, *parentp, *prev;
+
+ /* Allocate and initialize the new node. */
+ nodep = calloc(1, sizeof(*nodep));
+ if (!nodep) {
+ perror("calloc");
+ abort();
+ }
+
+ nodep->idx = idx & -MASK_BITS;
+
+ /* If no nodes, set it up as the root node. */
+ if (!s->root) {
+ s->root = nodep;
+ return nodep;
+ }
+
+ /*
+ * Find the parent where the new node should be attached
+ * and add the node there.
+ */
+ parentp = s->root;
+ while (true) {
+ if (idx < parentp->idx) {
+ if (!parentp->left) {
+ parentp->left = nodep;
+ nodep->parent = parentp;
+ break;
+ }
+ parentp = parentp->left;
+ } else {
+ assert(idx > parentp->idx + MASK_BITS + parentp->num_after - 1);
+ if (!parentp->right) {
+ parentp->right = nodep;
+ nodep->parent = parentp;
+ break;
+ }
+ parentp = parentp->right;
+ }
+ }
+
+ /*
+ * Does num_after bits of previous node overlap with the mask
+ * of the new node? If so set the bits in the new nodes mask
+ * and reduce the previous nodes num_after.
+ */
+ prev = node_prev(s, nodep);
+ while (prev && prev->idx + MASK_BITS + prev->num_after - 1 >= nodep->idx) {
+ unsigned int n1 = (prev->idx + MASK_BITS + prev->num_after - 1)
+ - nodep->idx;
+ assert(prev->num_after > 0);
+ assert(n1 < MASK_BITS);
+ assert(!(nodep->mask & (1 << n1)));
+ nodep->mask |= (1 << n1);
+ prev->num_after--;
+ }
+
+ return nodep;
+}
+
+/* Returns whether all the bits in the sparsebit array are set. */
+bool sparsebit_all_set(struct sparsebit *s)
+{
+ /*
+ * If any nodes there must be at least one bit set. Only case
+ * where a bit is set and total num set is 0, is when all bits
+ * are set.
+ */
+ return s->root && s->num_set == 0;
+}
+
+/* Clears all bits described by the node pointed to by nodep, then
+ * removes the node.
+ */
+static void node_rm(struct sparsebit *s, struct node *nodep)
+{
+ struct node *tmp;
+ sparsebit_num_t num_set;
+
+ num_set = node_num_set(nodep);
+ assert(s->num_set >= num_set || sparsebit_all_set(s));
+ s->num_set -= node_num_set(nodep);
+
+ /* Have both left and right child */
+ if (nodep->left && nodep->right) {
+ /*
+ * Move left children to the leftmost leaf node
+ * of the right child.
+ */
+ for (tmp = nodep->right; tmp->left; tmp = tmp->left)
+ ;
+ tmp->left = nodep->left;
+ nodep->left = NULL;
+ tmp->left->parent = tmp;
+ }
+
+ /* Left only child */
+ if (nodep->left) {
+ if (!nodep->parent) {
+ s->root = nodep->left;
+ nodep->left->parent = NULL;
+ } else {
+ nodep->left->parent = nodep->parent;
+ if (nodep == nodep->parent->left)
+ nodep->parent->left = nodep->left;
+ else {
+ assert(nodep == nodep->parent->right);
+ nodep->parent->right = nodep->left;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+ }
+
+
+ /* Right only child */
+ if (nodep->right) {
+ if (!nodep->parent) {
+ s->root = nodep->right;
+ nodep->right->parent = NULL;
+ } else {
+ nodep->right->parent = nodep->parent;
+ if (nodep == nodep->parent->left)
+ nodep->parent->left = nodep->right;
+ else {
+ assert(nodep == nodep->parent->right);
+ nodep->parent->right = nodep->right;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+ }
+
+ /* Leaf Node */
+ if (!nodep->parent) {
+ s->root = NULL;
+ } else {
+ if (nodep->parent->left == nodep)
+ nodep->parent->left = NULL;
+ else {
+ assert(nodep == nodep->parent->right);
+ nodep->parent->right = NULL;
+ }
+ }
+
+ nodep->parent = nodep->left = nodep->right = NULL;
+ free(nodep);
+
+ return;
+}
+
+/* Splits the node containing the bit at idx so that there is a node
+ * that starts at the specified index. If no such node exists, a new
+ * node at the specified index is created. Returns the new node.
+ *
+ * idx must start of a mask boundary.
+ */
+static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep1, *nodep2;
+ sparsebit_idx_t offset;
+ sparsebit_num_t orig_num_after;
+
+ assert(!(idx % MASK_BITS));
+
+ /*
+ * Is there a node that describes the setting of idx?
+ * If not, add it.
+ */
+ nodep1 = node_find(s, idx);
+ if (!nodep1)
+ return node_add(s, idx);
+
+ /*
+ * All done if the starting index of the node is where the
+ * split should occur.
+ */
+ if (nodep1->idx == idx)
+ return nodep1;
+
+ /*
+ * Split point not at start of mask, so it must be part of
+ * bits described by num_after.
+ */
+
+ /*
+ * Calculate offset within num_after for where the split is
+ * to occur.
+ */
+ offset = idx - (nodep1->idx + MASK_BITS);
+ orig_num_after = nodep1->num_after;
+
+ /*
+ * Add a new node to describe the bits starting at
+ * the split point.
+ */
+ nodep1->num_after = offset;
+ nodep2 = node_add(s, idx);
+
+ /* Move bits after the split point into the new node */
+ nodep2->num_after = orig_num_after - offset;
+ if (nodep2->num_after >= MASK_BITS) {
+ nodep2->mask = ~(mask_t) 0;
+ nodep2->num_after -= MASK_BITS;
+ } else {
+ nodep2->mask = (1 << nodep2->num_after) - 1;
+ nodep2->num_after = 0;
+ }
+
+ return nodep2;
+}
+
+/* Iteratively reduces the node pointed to by nodep and its adjacent
+ * nodes into a more compact form. For example, a node with a mask with
+ * all bits set adjacent to a previous node, will get combined into a
+ * single node with an increased num_after setting.
+ *
+ * After each reduction, a further check is made to see if additional
+ * reductions are possible with the new previous and next nodes. Note,
+ * a search for a reduction is only done across the nodes nearest nodep
+ * and those that became part of a reduction. Reductions beyond nodep
+ * and the adjacent nodes that are reduced are not discovered. It is the
+ * responsibility of the caller to pass a nodep that is within one node
+ * of each possible reduction.
+ *
+ * This function does not fix the temporary violation of all invariants.
+ * For example it does not fix the case where the bit settings described
+ * by two or more nodes overlap. Such a violation introduces the potential
+ * complication of a bit setting for a specific index having different settings
+ * in different nodes. This would then introduce the further complication
+ * of which node has the correct setting of the bit and thus such conditions
+ * are not allowed.
+ *
+ * This function is designed to fix invariant violations that are introduced
+ * by node_split() and by changes to the nodes mask or num_after members.
+ * For example, when setting a bit within a nodes mask, the function that
+ * sets the bit doesn't have to worry about whether the setting of that
+ * bit caused the mask to have leading only or trailing only bits set.
+ * Instead, the function can call node_reduce(), with nodep equal to the
+ * node address that it set a mask bit in, and node_reduce() will notice
+ * the cases of leading or trailing only bits and that there is an
+ * adjacent node that the bit settings could be merged into.
+ *
+ * This implementation specifically detects and corrects violation of the
+ * following invariants:
+ *
+ * + Node are only used to represent bits that are set.
+ * Nodes with a mask of 0 and num_after of 0 are not allowed.
+ *
+ * + The setting of at least one bit is always described in a nodes
+ * mask (mask >= 1).
+ *
+ * + A node with all mask bits set only occurs when the last bit
+ * described by the previous node is not equal to this nodes
+ * starting index - 1. All such occurences of this condition are
+ * avoided by moving the setting of the nodes mask bits into
+ * the previous nodes num_after setting.
+ */
+static void node_reduce(struct sparsebit *s, struct node *nodep)
+{
+ bool reduction_performed;
+
+ do {
+ reduction_performed = false;
+ struct node *prev, *next, *tmp;
+
+ /* 1) Potential reductions within the current node. */
+
+ /* Nodes with all bits cleared may be removed. */
+ if (nodep->mask == 0 && nodep->num_after == 0) {
+ /*
+ * About to remove the node pointed to by
+ * nodep, which normally would cause a problem
+ * for the next pass through the reduction loop,
+ * because the node at the starting point no longer
+ * exists. This potential problem is handled
+ * by first remembering the location of the next
+ * or previous nodes. Doesn't matter which, because
+ * once the node at nodep is removed, there will be
+ * no other nodes between prev and next.
+ *
+ * Note, the checks performed on nodep against both
+ * both prev and next both check for an adjacent
+ * node that can be reduced into a single node. As
+ * such, after removing the node at nodep, doesn't
+ * matter whether the nodep for the next pass
+ * through the loop is equal to the previous pass
+ * prev or next node. Either way, on the next pass
+ * the one not selected will become either the
+ * prev or next node.
+ */
+ tmp = node_next(s, nodep);
+ if (!tmp)
+ tmp = node_prev(s, nodep);
+
+ node_rm(s, nodep);
+ nodep = NULL;
+
+ nodep = tmp;
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * When the mask is 0, can reduce the amount of num_after
+ * bits by moving the initial num_after bits into the mask.
+ */
+ if (nodep->mask == 0) {
+ assert(nodep->num_after != 0);
+ assert(nodep->idx + MASK_BITS > nodep->idx);
+
+ nodep->idx += MASK_BITS;
+
+ if (nodep->num_after >= MASK_BITS) {
+ nodep->mask = ~0;
+ nodep->num_after -= MASK_BITS;
+ } else {
+ nodep->mask = (1u << nodep->num_after) - 1;
+ nodep->num_after = 0;
+ }
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * 2) Potential reductions between the current and
+ * previous nodes.
+ */
+ prev = node_prev(s, nodep);
+ if (prev) {
+ sparsebit_idx_t prev_highest_bit;
+
+ /* Nodes with no bits set can be removed. */
+ if (prev->mask == 0 && prev->num_after == 0) {
+ node_rm(s, prev);
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * All mask bits set and previous node has
+ * adjacent index.
+ */
+ if (nodep->mask + 1 == 0 &&
+ prev->idx + MASK_BITS == nodep->idx) {
+ prev->num_after += MASK_BITS + nodep->num_after;
+ nodep->mask = 0;
+ nodep->num_after = 0;
+
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * Is node adjacent to previous node and the node
+ * contains a single contiguous range of bits
+ * starting from the beginning of the mask?
+ */
+ prev_highest_bit = prev->idx + MASK_BITS - 1 + prev->num_after;
+ if (prev_highest_bit + 1 == nodep->idx &&
+ (nodep->mask | (nodep->mask >> 1)) == nodep->mask) {
+ /*
+ * How many contiguous bits are there?
+ * Is equal to the total number of set
+ * bits, due to an earlier check that
+ * there is a single contiguous range of
+ * set bits.
+ */
+ unsigned int num_contiguous
+ = __builtin_popcount(nodep->mask);
+ assert((num_contiguous > 0) &&
+ ((1ULL << num_contiguous) - 1) == nodep->mask);
+
+ prev->num_after += num_contiguous;
+ nodep->mask = 0;
+
+ /*
+ * For predictable performance, handle special
+ * case where all mask bits are set and there
+ * is a non-zero num_after setting. This code
+ * is functionally correct without the following
+ * conditionalized statements, but without them
+ * the value of num_after is only reduced by
+ * the number of mask bits per pass. There are
+ * cases where num_after can be close to 2^64.
+ * Without this code it could take nearly
+ * (2^64) / 32 passes to perform the full
+ * reduction.
+ */
+ if (num_contiguous == MASK_BITS) {
+ prev->num_after += nodep->num_after;
+ nodep->num_after = 0;
+ }
+
+ reduction_performed = true;
+ continue;
+ }
+ }
+
+ /*
+ * 3) Potential reductions between the current and
+ * next nodes.
+ */
+ next = node_next(s, nodep);
+ if (next) {
+ /* Nodes with no bits set can be removed. */
+ if (next->mask == 0 && next->num_after == 0) {
+ node_rm(s, next);
+ reduction_performed = true;
+ continue;
+ }
+
+ /*
+ * Is next node index adjacent to current node
+ * and has a mask with all bits set?
+ */
+ if (next->idx == nodep->idx + MASK_BITS + nodep->num_after &&
+ next->mask == ~(mask_t) 0) {
+ nodep->num_after += MASK_BITS;
+ next->mask = 0;
+ nodep->num_after += next->num_after;
+ next->num_after = 0;
+
+ node_rm(s, next);
+ next = NULL;
+
+ reduction_performed = true;
+ continue;
+ }
+ }
+ } while (nodep && reduction_performed);
+}
+
+/* Returns whether the bit at the index given by idx, within the
+ * sparsebit array is set or not.
+ */
+bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Find the node that describes the setting of the bit at idx */
+ for (nodep = s->root; nodep;
+ nodep = nodep->idx > idx ? nodep->left : nodep->right)
+ if (idx >= nodep->idx &&
+ idx <= nodep->idx + MASK_BITS + nodep->num_after - 1)
+ goto have_node;
+
+ return false;
+
+have_node:
+ /* Bit is set if it is any of the bits described by num_after */
+ if (nodep->num_after && idx >= nodep->idx + MASK_BITS)
+ return true;
+
+ /* Is the corresponding mask bit set */
+ assert(idx >= nodep->idx && idx - nodep->idx < MASK_BITS);
+ return !!(nodep->mask & (1 << (idx - nodep->idx)));
+}
+
+/* Within the sparsebit array pointed to by s, sets the bit
+ * at the index given by idx.
+ */
+static void bit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Skip bits that are already set */
+ if (sparsebit_is_set(s, idx))
+ return;
+
+ /*
+ * Get a node where the bit at idx is described by the mask.
+ * The node_split will also create a node, if there isn't
+ * already a node that describes the setting of bit.
+ */
+ nodep = node_split(s, idx & -MASK_BITS);
+
+ /* Set the bit within the nodes mask */
+ assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+ assert(!(nodep->mask & (1 << (idx - nodep->idx))));
+ nodep->mask |= 1 << (idx - nodep->idx);
+ s->num_set++;
+
+ node_reduce(s, nodep);
+}
+
+/* Within the sparsebit array pointed to by s, clears the bit
+ * at the index given by idx.
+ */
+static void bit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ struct node *nodep;
+
+ /* Skip bits that are already cleared */
+ if (!sparsebit_is_set(s, idx))
+ return;
+
+ /* Is there a node that describes the setting of this bit? */
+ nodep = node_find(s, idx);
+ if (!nodep)
+ return;
+
+ /*
+ * If a num_after bit, split the node, so that the bit is
+ * part of a node mask.
+ */
+ if (idx >= nodep->idx + MASK_BITS)
+ nodep = node_split(s, idx & -MASK_BITS);
+
+ /*
+ * After node_split above, bit at idx should be within the mask.
+ * Clear that bit.
+ */
+ assert(idx >= nodep->idx && idx <= nodep->idx + MASK_BITS - 1);
+ assert(nodep->mask & (1 << (idx - nodep->idx)));
+ nodep->mask &= ~(1 << (idx - nodep->idx));
+ assert(s->num_set > 0 || sparsebit_all_set(s));
+ s->num_set--;
+
+ node_reduce(s, nodep);
+}
+
+/* Recursively dumps to the FILE stream given by stream the contents
+ * of the sub-tree of nodes pointed to by nodep. Each line of output
+ * is prefixed by the number of spaces given by indent. On each
+ * recursion, the indent amount is increased by 2. This causes nodes
+ * at each level deeper into the binary search tree to be displayed
+ * with a greater indent.
+ */
+static void dump_nodes(FILE *stream, struct node *nodep,
+ unsigned int indent)
+{
+ char *node_type;
+
+ /* Dump contents of node */
+ if (!nodep->parent)
+ node_type = "root";
+ else if (nodep == nodep->parent->left)
+ node_type = "left";
+ else {
+ assert(nodep == nodep->parent->right);
+ node_type = "right";
+ }
+ fprintf(stream, "%*s---- %s nodep: %p\n", indent, "", node_type, nodep);
+ fprintf(stream, "%*s parent: %p left: %p right: %p\n", indent, "",
+ nodep->parent, nodep->left, nodep->right);
+ fprintf(stream, "%*s idx: 0x%lx mask: 0x%x num_after: 0x%lx\n",
+ indent, "", nodep->idx, nodep->mask, nodep->num_after);
+
+ /* If present, dump contents of left child nodes */
+ if (nodep->left)
+ dump_nodes(stream, nodep->left, indent + 2);
+
+ /* If present, dump contents of right child nodes */
+ if (nodep->right)
+ dump_nodes(stream, nodep->right, indent + 2);
+}
+
+static inline sparsebit_idx_t node_first_set(struct node *nodep, int start)
+{
+ mask_t leading = (mask_t)1 << start;
+ int n1 = __builtin_ctz(nodep->mask & -leading);
+
+ return nodep->idx + n1;
+}
+
+static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
+{
+ mask_t leading = (mask_t)1 << start;
+ int n1 = __builtin_ctz(~nodep->mask & -leading);
+
+ return nodep->idx + n1;
+}
+
+/* Dumps to the FILE stream specified by stream, the implementation dependent
+ * internal state of s. Each line of output is prefixed with the number
+ * of spaces given by indent. The output is completely implementation
+ * dependent and subject to change. Output from this function should only
+ * be used for diagnostic purposes. For example, this function can be
+ * used by test cases after they detect an unexpected condition, as a means
+ * to capture diagnostic information.
+ */
+static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
+ unsigned int indent)
+{
+ /* Dump the contents of s */
+ fprintf(stream, "%*sroot: %p\n", indent, "", s->root);
+ fprintf(stream, "%*snum_set: 0x%lx\n", indent, "", s->num_set);
+
+ if (s->root)
+ dump_nodes(stream, s->root, indent);
+}
+
+/* Allocates and returns a new sparsebit array. The initial state
+ * of the newly allocated sparsebit array has all bits cleared.
+ */
+struct sparsebit *sparsebit_alloc(void)
+{
+ struct sparsebit *s;
+
+ /* Allocate top level structure. */
+ s = calloc(1, sizeof(*s));
+ if (!s) {
+ perror("calloc");
+ abort();
+ }
+
+ return s;
+}
+
+/* Frees the implementation dependent data for the sparsebit array
+ * pointed to by s and poisons the pointer to that data.
+ */
+void sparsebit_free(struct sparsebit **sbitp)
+{
+ struct sparsebit *s = *sbitp;
+
+ if (!s)
+ return;
+
+ sparsebit_clear_all(s);
+ free(s);
+ *sbitp = NULL;
+}
+
+/* Makes a copy of the sparsebit array given by s, to the sparsebit
+ * array given by d. Note, d must have already been allocated via
+ * sparsebit_alloc(). It can though already have bits set, which
+ * if different from src will be cleared.
+ */
+void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
+{
+ /* First clear any bits already set in the destination */
+ sparsebit_clear_all(d);
+
+ if (s->root) {
+ d->root = node_copy_subtree(s->root);
+ d->num_set = s->num_set;
+ }
+}
+
+/* Returns whether num consecutive bits starting at idx are all set. */
+bool sparsebit_is_set_num(struct sparsebit *s,
+ sparsebit_idx_t idx, sparsebit_num_t num)
+{
+ sparsebit_idx_t next_cleared;
+
+ assert(num > 0);
+ assert(idx + num - 1 >= idx);
+
+ /* With num > 0, the first bit must be set. */
+ if (!sparsebit_is_set(s, idx))
+ return false;
+
+ /* Find the next cleared bit */
+ next_cleared = sparsebit_next_clear(s, idx);
+
+ /*
+ * If no cleared bits beyond idx, then there are at least num
+ * set bits. idx + num doesn't wrap. Otherwise check if
+ * there are enough set bits between idx and the next cleared bit.
+ */
+ return next_cleared == 0 || next_cleared - idx >= num;
+}
+
+/* Returns whether the bit at the index given by idx. */
+bool sparsebit_is_clear(struct sparsebit *s,
+ sparsebit_idx_t idx)
+{
+ return !sparsebit_is_set(s, idx);
+}
+
+/* Returns whether num consecutive bits starting at idx are all cleared. */
+bool sparsebit_is_clear_num(struct sparsebit *s,
+ sparsebit_idx_t idx, sparsebit_num_t num)
+{
+ sparsebit_idx_t next_set;
+
+ assert(num > 0);
+ assert(idx + num - 1 >= idx);
+
+ /* With num > 0, the first bit must be cleared. */
+ if (!sparsebit_is_clear(s, idx))
+ return false;
+
+ /* Find the next set bit */
+ next_set = sparsebit_next_set(s, idx);
+
+ /*
+ * If no set bits beyond idx, then there are at least num
+ * cleared bits. idx + num doesn't wrap. Otherwise check if
+ * there are enough cleared bits between idx and the next set bit.
+ */
+ return next_set == 0 || next_set - idx >= num;
+}
+
+/* Returns the total number of bits set. Note: 0 is also returned for
+ * the case of all bits set. This is because with all bits set, there
+ * is 1 additional bit set beyond what can be represented in the return
+ * value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
+ * to determine if the sparsebit array has any bits set.
+ */
+sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
+{
+ return s->num_set;
+}
+
+/* Returns whether any bit is set in the sparsebit array. */
+bool sparsebit_any_set(struct sparsebit *s)
+{
+ /*
+ * Nodes only describe set bits. If any nodes then there
+ * is at least 1 bit set.
+ */
+ if (!s->root)
+ return false;
+
+ /*
+ * Every node should have a non-zero mask. For now will
+ * just assure that the root node has a non-zero mask,
+ * which is a quick check that at least 1 bit is set.
+ */
+ assert(s->root->mask != 0);
+ assert(s->num_set > 0 ||
+ (s->root->num_after == ((sparsebit_num_t) 0) - MASK_BITS &&
+ s->root->mask == ~(mask_t) 0));
+
+ return true;
+}
+
+/* Returns whether all the bits in the sparsebit array are cleared. */
+bool sparsebit_all_clear(struct sparsebit *s)
+{
+ return !sparsebit_any_set(s);
+}
+
+/* Returns whether all the bits in the sparsebit array are set. */
+bool sparsebit_any_clear(struct sparsebit *s)
+{
+ return !sparsebit_all_set(s);
+}
+
+/* Returns the index of the first set bit. Abort if no bits are set.
+ */
+sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
+{
+ struct node *nodep;
+
+ /* Validate at least 1 bit is set */
+ assert(sparsebit_any_set(s));
+
+ nodep = node_first(s);
+ return node_first_set(nodep, 0);
+}
+
+/* Returns the index of the first cleared bit. Abort if
+ * no bits are cleared.
+ */
+sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
+{
+ struct node *nodep1, *nodep2;
+
+ /* Validate at least 1 bit is cleared. */
+ assert(sparsebit_any_clear(s));
+
+ /* If no nodes or first node index > 0 then lowest cleared is 0 */
+ nodep1 = node_first(s);
+ if (!nodep1 || nodep1->idx > 0)
+ return 0;
+
+ /* Does the mask in the first node contain any cleared bits. */
+ if (nodep1->mask != ~(mask_t) 0)
+ return node_first_clear(nodep1, 0);
+
+ /*
+ * All mask bits set in first node. If there isn't a second node
+ * then the first cleared bit is the first bit after the bits
+ * described by the first node.
+ */
+ nodep2 = node_next(s, nodep1);
+ if (!nodep2) {
+ /*
+ * No second node. First cleared bit is first bit beyond
+ * bits described by first node.
+ */
+ assert(nodep1->mask == ~(mask_t) 0);
+ assert(nodep1->idx + MASK_BITS + nodep1->num_after != (sparsebit_idx_t) 0);
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+ }
+
+ /*
+ * There is a second node.
+ * If it is not adjacent to the first node, then there is a gap
+ * of cleared bits between the nodes, and the first cleared bit
+ * is the first bit within the gap.
+ */
+ if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+ /*
+ * Second node is adjacent to the first node.
+ * Because it is adjacent, its mask should be non-zero. If all
+ * its mask bits are set, then with it being adjacent, it should
+ * have had the mask bits moved into the num_after setting of the
+ * previous node.
+ */
+ return node_first_clear(nodep2, 0);
+}
+
+/* Returns index of next bit set within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are set.
+ */
+sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
+ sparsebit_idx_t prev)
+{
+ sparsebit_idx_t lowest_possible = prev + 1;
+ sparsebit_idx_t start;
+ struct node *nodep;
+
+ /* A bit after the highest index can't be set. */
+ if (lowest_possible == 0)
+ return 0;
+
+ /*
+ * Find the leftmost 'candidate' overlapping or to the right
+ * of lowest_possible.
+ */
+ struct node *candidate = NULL;
+
+ /* True iff lowest_possible is within candidate */
+ bool contains = false;
+
+ /*
+ * Find node that describes setting of bit at lowest_possible.
+ * If such a node doesn't exist, find the node with the lowest
+ * starting index that is > lowest_possible.
+ */
+ for (nodep = s->root; nodep;) {
+ if ((nodep->idx + MASK_BITS + nodep->num_after - 1)
+ >= lowest_possible) {
+ candidate = nodep;
+ if (candidate->idx <= lowest_possible) {
+ contains = true;
+ break;
+ }
+ nodep = nodep->left;
+ } else {
+ nodep = nodep->right;
+ }
+ }
+ if (!candidate)
+ return 0;
+
+ assert(candidate->mask != 0);
+
+ /* Does the candidate node describe the setting of lowest_possible? */
+ if (!contains) {
+ /*
+ * Candidate doesn't describe setting of bit at lowest_possible.
+ * Candidate points to the first node with a starting index
+ * > lowest_possible.
+ */
+ assert(candidate->idx > lowest_possible);
+
+ return node_first_set(candidate, 0);
+ }
+
+ /*
+ * Candidate describes setting of bit at lowest_possible.
+ * Note: although the node describes the setting of the bit
+ * at lowest_possible, its possible that its setting and the
+ * setting of all latter bits described by this node are 0.
+ * For now, just handle the cases where this node describes
+ * a bit at or after an index of lowest_possible that is set.
+ */
+ start = lowest_possible - candidate->idx;
+
+ if (start < MASK_BITS && candidate->mask >= (1 << start))
+ return node_first_set(candidate, start);
+
+ if (candidate->num_after) {
+ sparsebit_idx_t first_num_after_idx = candidate->idx + MASK_BITS;
+
+ return lowest_possible < first_num_after_idx
+ ? first_num_after_idx : lowest_possible;
+ }
+
+ /*
+ * Although candidate node describes setting of bit at
+ * the index of lowest_possible, all bits at that index and
+ * latter that are described by candidate are cleared. With
+ * this, the next bit is the first bit in the next node, if
+ * such a node exists. If a next node doesn't exist, then
+ * there is no next set bit.
+ */
+ candidate = node_next(s, candidate);
+ if (!candidate)
+ return 0;
+
+ return node_first_set(candidate, 0);
+}
+
+/* Returns index of next bit cleared within s after the index given by prev.
+ * Returns 0 if there are no bits after prev that are cleared.
+ */
+sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
+ sparsebit_idx_t prev)
+{
+ sparsebit_idx_t lowest_possible = prev + 1;
+ sparsebit_idx_t idx;
+ struct node *nodep1, *nodep2;
+
+ /* A bit after the highest index can't be set. */
+ if (lowest_possible == 0)
+ return 0;
+
+ /*
+ * Does a node describing the setting of lowest_possible exist?
+ * If not, the bit at lowest_possible is cleared.
+ */
+ nodep1 = node_find(s, lowest_possible);
+ if (!nodep1)
+ return lowest_possible;
+
+ /* Does a mask bit in node 1 describe the next cleared bit. */
+ for (idx = lowest_possible - nodep1->idx; idx < MASK_BITS; idx++)
+ if (!(nodep1->mask & (1 << idx)))
+ return nodep1->idx + idx;
+
+ /*
+ * Next cleared bit is not described by node 1. If there
+ * isn't a next node, then next cleared bit is described
+ * by bit after the bits described by the first node.
+ */
+ nodep2 = node_next(s, nodep1);
+ if (!nodep2)
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+ /*
+ * There is a second node.
+ * If it is not adjacent to the first node, then there is a gap
+ * of cleared bits between the nodes, and the next cleared bit
+ * is the first bit within the gap.
+ */
+ if (nodep1->idx + MASK_BITS + nodep1->num_after != nodep2->idx)
+ return nodep1->idx + MASK_BITS + nodep1->num_after;
+
+ /*
+ * Second node is adjacent to the first node.
+ * Because it is adjacent, its mask should be non-zero. If all
+ * its mask bits are set, then with it being adjacent, it should
+ * have had the mask bits moved into the num_after setting of the
+ * previous node.
+ */
+ return node_first_clear(nodep2, 0);
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively set
+ * bits. Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ sparsebit_idx_t idx;
+
+ assert(num >= 1);
+
+ for (idx = sparsebit_next_set(s, start);
+ idx != 0 && idx + num - 1 >= idx;
+ idx = sparsebit_next_set(s, idx)) {
+ assert(sparsebit_is_set(s, idx));
+
+ /*
+ * Does the sequence of bits starting at idx consist of
+ * num set bits?
+ */
+ if (sparsebit_is_set_num(s, idx, num))
+ return idx;
+
+ /*
+ * Sequence of set bits at idx isn't large enough.
+ * Skip this entire sequence of set bits.
+ */
+ idx = sparsebit_next_clear(s, idx);
+ if (idx == 0)
+ return 0;
+ }
+
+ return 0;
+}
+
+/* Starting with the index 1 greater than the index given by start, finds
+ * and returns the index of the first sequence of num consecutively cleared
+ * bits. Returns a value of 0 of no such sequence exists.
+ */
+sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ sparsebit_idx_t idx;
+
+ assert(num >= 1);
+
+ for (idx = sparsebit_next_clear(s, start);
+ idx != 0 && idx + num - 1 >= idx;
+ idx = sparsebit_next_clear(s, idx)) {
+ assert(sparsebit_is_clear(s, idx));
+
+ /*
+ * Does the sequence of bits starting at idx consist of
+ * num cleared bits?
+ */
+ if (sparsebit_is_clear_num(s, idx, num))
+ return idx;
+
+ /*
+ * Sequence of cleared bits at idx isn't large enough.
+ * Skip this entire sequence of cleared bits.
+ */
+ idx = sparsebit_next_set(s, idx);
+ if (idx == 0)
+ return 0;
+ }
+
+ return 0;
+}
+
+/* Sets the bits * in the inclusive range idx through idx + num - 1. */
+void sparsebit_set_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ struct node *nodep, *next;
+ unsigned int n1;
+ sparsebit_idx_t idx;
+ sparsebit_num_t n;
+ sparsebit_idx_t middle_start, middle_end;
+
+ assert(num > 0);
+ assert(start + num - 1 >= start);
+
+ /*
+ * Leading - bits before first mask boundary.
+ *
+ * TODO(lhuemill): With some effort it may be possible to
+ * replace the following loop with a sequential sequence
+ * of statements. High level sequence would be:
+ *
+ * 1. Use node_split() to force node that describes setting
+ * of idx to be within the mask portion of a node.
+ * 2. Form mask of bits to be set.
+ * 3. Determine number of mask bits already set in the node
+ * and store in a local variable named num_already_set.
+ * 4. Set the appropriate mask bits within the node.
+ * 5. Increment struct sparsebit_pvt num_set member
+ * by the number of bits that were actually set.
+ * Exclude from the counts bits that were already set.
+ * 6. Before returning to the caller, use node_reduce() to
+ * handle the multiple corner cases that this method
+ * introduces.
+ */
+ for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+ bit_set(s, idx);
+
+ /* Middle - bits spanning one or more entire mask */
+ middle_start = idx;
+ middle_end = middle_start + (n & -MASK_BITS) - 1;
+ if (n >= MASK_BITS) {
+ nodep = node_split(s, middle_start);
+
+ /*
+ * As needed, split just after end of middle bits.
+ * No split needed if end of middle bits is at highest
+ * supported bit index.
+ */
+ if (middle_end + 1 > middle_end)
+ (void) node_split(s, middle_end + 1);
+
+ /* Delete nodes that only describe bits within the middle. */
+ for (next = node_next(s, nodep);
+ next && (next->idx < middle_end);
+ next = node_next(s, nodep)) {
+ assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+ node_rm(s, next);
+ next = NULL;
+ }
+
+ /* As needed set each of the mask bits */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (!(nodep->mask & (1 << n1))) {
+ nodep->mask |= 1 << n1;
+ s->num_set++;
+ }
+ }
+
+ s->num_set -= nodep->num_after;
+ nodep->num_after = middle_end - middle_start + 1 - MASK_BITS;
+ s->num_set += nodep->num_after;
+
+ node_reduce(s, nodep);
+ }
+ idx = middle_end + 1;
+ n -= middle_end - middle_start + 1;
+
+ /* Trailing - bits at and beyond last mask boundary */
+ assert(n < MASK_BITS);
+ for (; n > 0; idx++, n--)
+ bit_set(s, idx);
+}
+
+/* Clears the bits * in the inclusive range idx through idx + num - 1. */
+void sparsebit_clear_num(struct sparsebit *s,
+ sparsebit_idx_t start, sparsebit_num_t num)
+{
+ struct node *nodep, *next;
+ unsigned int n1;
+ sparsebit_idx_t idx;
+ sparsebit_num_t n;
+ sparsebit_idx_t middle_start, middle_end;
+
+ assert(num > 0);
+ assert(start + num - 1 >= start);
+
+ /* Leading - bits before first mask boundary */
+ for (idx = start, n = num; n > 0 && idx % MASK_BITS != 0; idx++, n--)
+ bit_clear(s, idx);
+
+ /* Middle - bits spanning one or more entire mask */
+ middle_start = idx;
+ middle_end = middle_start + (n & -MASK_BITS) - 1;
+ if (n >= MASK_BITS) {
+ nodep = node_split(s, middle_start);
+
+ /*
+ * As needed, split just after end of middle bits.
+ * No split needed if end of middle bits is at highest
+ * supported bit index.
+ */
+ if (middle_end + 1 > middle_end)
+ (void) node_split(s, middle_end + 1);
+
+ /* Delete nodes that only describe bits within the middle. */
+ for (next = node_next(s, nodep);
+ next && (next->idx < middle_end);
+ next = node_next(s, nodep)) {
+ assert(next->idx + MASK_BITS + next->num_after - 1 <= middle_end);
+ node_rm(s, next);
+ next = NULL;
+ }
+
+ /* As needed clear each of the mask bits */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1)) {
+ nodep->mask &= ~(1 << n1);
+ s->num_set--;
+ }
+ }
+
+ /* Clear any bits described by num_after */
+ s->num_set -= nodep->num_after;
+ nodep->num_after = 0;
+
+ /*
+ * Delete the node that describes the beginning of
+ * the middle bits and perform any allowed reductions
+ * with the nodes prev or next of nodep.
+ */
+ node_reduce(s, nodep);
+ nodep = NULL;
+ }
+ idx = middle_end + 1;
+ n -= middle_end - middle_start + 1;
+
+ /* Trailing - bits at and beyond last mask boundary */
+ assert(n < MASK_BITS);
+ for (; n > 0; idx++, n--)
+ bit_clear(s, idx);
+}
+
+/* Sets the bit at the index given by idx. */
+void sparsebit_set(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ sparsebit_set_num(s, idx, 1);
+}
+
+/* Clears the bit at the index given by idx. */
+void sparsebit_clear(struct sparsebit *s, sparsebit_idx_t idx)
+{
+ sparsebit_clear_num(s, idx, 1);
+}
+
+/* Sets the bits in the entire addressable range of the sparsebit array. */
+void sparsebit_set_all(struct sparsebit *s)
+{
+ sparsebit_set(s, 0);
+ sparsebit_set_num(s, 1, ~(sparsebit_idx_t) 0);
+ assert(sparsebit_all_set(s));
+}
+
+/* Clears the bits in the entire addressable range of the sparsebit array. */
+void sparsebit_clear_all(struct sparsebit *s)
+{
+ sparsebit_clear(s, 0);
+ sparsebit_clear_num(s, 1, ~(sparsebit_idx_t) 0);
+ assert(!sparsebit_any_set(s));
+}
+
+static size_t display_range(FILE *stream, sparsebit_idx_t low,
+ sparsebit_idx_t high, bool prepend_comma_space)
+{
+ char *fmt_str;
+ size_t sz;
+
+ /* Determine the printf format string */
+ if (low == high)
+ fmt_str = prepend_comma_space ? ", 0x%lx" : "0x%lx";
+ else
+ fmt_str = prepend_comma_space ? ", 0x%lx:0x%lx" : "0x%lx:0x%lx";
+
+ /*
+ * When stream is NULL, just determine the size of what would
+ * have been printed, else print the range.
+ */
+ if (!stream)
+ sz = snprintf(NULL, 0, fmt_str, low, high);
+ else
+ sz = fprintf(stream, fmt_str, low, high);
+
+ return sz;
+}
+
+
+/* Dumps to the FILE stream given by stream, the bit settings
+ * of s. Each line of output is prefixed with the number of
+ * spaces given by indent. The length of each line is implementation
+ * dependent and does not depend on the indent amount. The following
+ * is an example output of a sparsebit array that has bits:
+ *
+ * 0x5, 0x8, 0xa:0xe, 0x12
+ *
+ * This corresponds to a sparsebit whose bits 5, 8, 10, 11, 12, 13, 14, 18
+ * are set. Note that a ':', instead of a '-' is used to specify a range of
+ * contiguous bits. This is done because '-' is used to specify command-line
+ * options, and sometimes ranges are specified as command-line arguments.
+ */
+void sparsebit_dump(FILE *stream, struct sparsebit *s,
+ unsigned int indent)
+{
+ size_t current_line_len = 0;
+ size_t sz;
+ struct node *nodep;
+
+ if (!sparsebit_any_set(s))
+ return;
+
+ /* Display initial indent */
+ fprintf(stream, "%*s", indent, "");
+
+ /* For each node */
+ for (nodep = node_first(s); nodep; nodep = node_next(s, nodep)) {
+ unsigned int n1;
+ sparsebit_idx_t low, high;
+
+ /* For each group of bits in the mask */
+ for (n1 = 0; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1)) {
+ low = high = nodep->idx + n1;
+
+ for (; n1 < MASK_BITS; n1++) {
+ if (nodep->mask & (1 << n1))
+ high = nodep->idx + n1;
+ else
+ break;
+ }
+
+ if ((n1 == MASK_BITS) && nodep->num_after)
+ high += nodep->num_after;
+
+ /*
+ * How much room will it take to display
+ * this range.
+ */
+ sz = display_range(NULL, low, high,
+ current_line_len != 0);
+
+ /*
+ * If there is not enough room, display
+ * a newline plus the indent of the next
+ * line.
+ */
+ if (current_line_len + sz > DUMP_LINE_MAX) {
+ fputs("\n", stream);
+ fprintf(stream, "%*s", indent, "");
+ current_line_len = 0;
+ }
+
+ /* Display the range */
+ sz = display_range(stream, low, high,
+ current_line_len != 0);
+ current_line_len += sz;
+ }
+ }
+
+ /*
+ * If num_after and most significant-bit of mask is not
+ * set, then still need to display a range for the bits
+ * described by num_after.
+ */
+ if (!(nodep->mask & (1 << (MASK_BITS - 1))) && nodep->num_after) {
+ low = nodep->idx + MASK_BITS;
+ high = nodep->idx + MASK_BITS + nodep->num_after - 1;
+
+ /*
+ * How much room will it take to display
+ * this range.
+ */
+ sz = display_range(NULL, low, high,
+ current_line_len != 0);
+
+ /*
+ * If there is not enough room, display
+ * a newline plus the indent of the next
+ * line.
+ */
+ if (current_line_len + sz > DUMP_LINE_MAX) {
+ fputs("\n", stream);
+ fprintf(stream, "%*s", indent, "");
+ current_line_len = 0;
+ }
+
+ /* Display the range */
+ sz = display_range(stream, low, high,
+ current_line_len != 0);
+ current_line_len += sz;
+ }
+ }
+ fputs("\n", stream);
+}
+
+/* Validates the internal state of the sparsebit array given by
+ * s. On error, diagnostic information is printed to stderr and
+ * abort is called.
+ */
+void sparsebit_validate_internal(struct sparsebit *s)
+{
+ bool error_detected = false;
+ struct node *nodep, *prev = NULL;
+ sparsebit_num_t total_bits_set = 0;
+ unsigned int n1;
+
+ /* For each node */
+ for (nodep = node_first(s); nodep;
+ prev = nodep, nodep = node_next(s, nodep)) {
+
+ /*
+ * Increase total bits set by the number of bits set
+ * in this node.
+ */
+ for (n1 = 0; n1 < MASK_BITS; n1++)
+ if (nodep->mask & (1 << n1))
+ total_bits_set++;
+
+ total_bits_set += nodep->num_after;
+
+ /*
+ * Arbitrary choice as to whether a mask of 0 is allowed
+ * or not. For diagnostic purposes it is beneficial to
+ * have only one valid means to represent a set of bits.
+ * To support this an arbitrary choice has been made
+ * to not allow a mask of zero.
+ */
+ if (nodep->mask == 0) {
+ fprintf(stderr, "Node mask of zero, "
+ "nodep: %p nodep->mask: 0x%x",
+ nodep, nodep->mask);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * Validate num_after is not greater than the max index
+ * - the number of mask bits. The num_after member
+ * uses 0-based indexing and thus has no value that
+ * represents all bits set. This limitation is handled
+ * by requiring a non-zero mask. With a non-zero mask,
+ * MASK_BITS worth of bits are described by the mask,
+ * which makes the largest needed num_after equal to:
+ *
+ * (~(sparsebit_num_t) 0) - MASK_BITS + 1
+ */
+ if (nodep->num_after
+ > (~(sparsebit_num_t) 0) - MASK_BITS + 1) {
+ fprintf(stderr, "num_after too large, "
+ "nodep: %p nodep->num_after: 0x%lx",
+ nodep, nodep->num_after);
+ error_detected = true;
+ break;
+ }
+
+ /* Validate node index is divisible by the mask size */
+ if (nodep->idx % MASK_BITS) {
+ fprintf(stderr, "Node index not divisible by "
+ "mask size,\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "MASK_BITS: %lu\n",
+ nodep, nodep->idx, MASK_BITS);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * Validate bits described by node don't wrap beyond the
+ * highest supported index.
+ */
+ if ((nodep->idx + MASK_BITS + nodep->num_after - 1) < nodep->idx) {
+ fprintf(stderr, "Bits described by node wrap "
+ "beyond highest supported index,\n"
+ " nodep: %p nodep->idx: 0x%lx\n"
+ " MASK_BITS: %lu nodep->num_after: 0x%lx",
+ nodep, nodep->idx, MASK_BITS, nodep->num_after);
+ error_detected = true;
+ break;
+ }
+
+ /* Check parent pointers. */
+ if (nodep->left) {
+ if (nodep->left->parent != nodep) {
+ fprintf(stderr, "Left child parent pointer "
+ "doesn't point to this node,\n"
+ " nodep: %p nodep->left: %p "
+ "nodep->left->parent: %p",
+ nodep, nodep->left,
+ nodep->left->parent);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (nodep->right) {
+ if (nodep->right->parent != nodep) {
+ fprintf(stderr, "Right child parent pointer "
+ "doesn't point to this node,\n"
+ " nodep: %p nodep->right: %p "
+ "nodep->right->parent: %p",
+ nodep, nodep->right,
+ nodep->right->parent);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (!nodep->parent) {
+ if (s->root != nodep) {
+ fprintf(stderr, "Unexpected root node, "
+ "s->root: %p nodep: %p",
+ s->root, nodep);
+ error_detected = true;
+ break;
+ }
+ }
+
+ if (prev) {
+ /*
+ * Is index of previous node before index of
+ * current node?
+ */
+ if (prev->idx >= nodep->idx) {
+ fprintf(stderr, "Previous node index "
+ ">= current node index,\n"
+ " prev: %p prev->idx: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx",
+ prev, prev->idx, nodep, nodep->idx);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * Nodes occur in asscending order, based on each
+ * nodes starting index.
+ */
+ if ((prev->idx + MASK_BITS + prev->num_after - 1)
+ >= nodep->idx) {
+ fprintf(stderr, "Previous node bit range "
+ "overlap with current node bit range,\n"
+ " prev: %p prev->idx: 0x%lx "
+ "prev->num_after: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "nodep->num_after: 0x%lx\n"
+ " MASK_BITS: %lu",
+ prev, prev->idx, prev->num_after,
+ nodep, nodep->idx, nodep->num_after,
+ MASK_BITS);
+ error_detected = true;
+ break;
+ }
+
+ /*
+ * When the node has all mask bits set, it shouldn't
+ * be adjacent to the last bit described by the
+ * previous node.
+ */
+ if (nodep->mask == ~(mask_t) 0 &&
+ prev->idx + MASK_BITS + prev->num_after == nodep->idx) {
+ fprintf(stderr, "Current node has mask with "
+ "all bits set and is adjacent to the "
+ "previous node,\n"
+ " prev: %p prev->idx: 0x%lx "
+ "prev->num_after: 0x%lx\n"
+ " nodep: %p nodep->idx: 0x%lx "
+ "nodep->num_after: 0x%lx\n"
+ " MASK_BITS: %lu",
+ prev, prev->idx, prev->num_after,
+ nodep, nodep->idx, nodep->num_after,
+ MASK_BITS);
+
+ error_detected = true;
+ break;
+ }
+ }
+ }
+
+ if (!error_detected) {
+ /*
+ * Is sum of bits set in each node equal to the count
+ * of total bits set.
+ */
+ if (s->num_set != total_bits_set) {
+ fprintf(stderr, "Number of bits set missmatch,\n"
+ " s->num_set: 0x%lx total_bits_set: 0x%lx",
+ s->num_set, total_bits_set);
+
+ error_detected = true;
+ }
+ }
+
+ if (error_detected) {
+ fputs(" dump_internal:\n", stderr);
+ sparsebit_dump_internal(stderr, s, 4);
+ abort();
+ }
+}
+
+
+#ifdef FUZZ
+/* A simple but effective fuzzing driver. Look for bugs with the help
+ * of some invariants and of a trivial representation of sparsebit.
+ * Just use 512 bytes of /dev/zero and /dev/urandom as inputs, and let
+ * afl-fuzz do the magic. :)
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+
+struct range {
+ sparsebit_idx_t first, last;
+ bool set;
+};
+
+struct sparsebit *s;
+struct range ranges[1000];
+int num_ranges;
+
+static bool get_value(sparsebit_idx_t idx)
+{
+ int i;
+
+ for (i = num_ranges; --i >= 0; )
+ if (ranges[i].first <= idx && idx <= ranges[i].last)
+ return ranges[i].set;
+
+ return false;
+}
+
+static void operate(int code, sparsebit_idx_t first, sparsebit_idx_t last)
+{
+ sparsebit_num_t num;
+ sparsebit_idx_t next;
+
+ if (first < last) {
+ num = last - first + 1;
+ } else {
+ num = first - last + 1;
+ first = last;
+ last = first + num - 1;
+ }
+
+ switch (code) {
+ case 0:
+ sparsebit_set(s, first);
+ assert(sparsebit_is_set(s, first));
+ assert(!sparsebit_is_clear(s, first));
+ assert(sparsebit_any_set(s));
+ assert(!sparsebit_all_clear(s));
+ if (get_value(first))
+ return;
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = first, .set = true };
+ break;
+ case 1:
+ sparsebit_clear(s, first);
+ assert(!sparsebit_is_set(s, first));
+ assert(sparsebit_is_clear(s, first));
+ assert(sparsebit_any_clear(s));
+ assert(!sparsebit_all_set(s));
+ if (!get_value(first))
+ return;
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = first, .set = false };
+ break;
+ case 2:
+ assert(sparsebit_is_set(s, first) == get_value(first));
+ assert(sparsebit_is_clear(s, first) == !get_value(first));
+ break;
+ case 3:
+ if (sparsebit_any_set(s))
+ assert(get_value(sparsebit_first_set(s)));
+ if (sparsebit_any_clear(s))
+ assert(!get_value(sparsebit_first_clear(s)));
+ sparsebit_set_all(s);
+ assert(!sparsebit_any_clear(s));
+ assert(sparsebit_all_set(s));
+ num_ranges = 0;
+ ranges[num_ranges++] = (struct range)
+ { .first = 0, .last = ~(sparsebit_idx_t)0, .set = true };
+ break;
+ case 4:
+ if (sparsebit_any_set(s))
+ assert(get_value(sparsebit_first_set(s)));
+ if (sparsebit_any_clear(s))
+ assert(!get_value(sparsebit_first_clear(s)));
+ sparsebit_clear_all(s);
+ assert(!sparsebit_any_set(s));
+ assert(sparsebit_all_clear(s));
+ num_ranges = 0;
+ break;
+ case 5:
+ next = sparsebit_next_set(s, first);
+ assert(next == 0 || next > first);
+ assert(next == 0 || get_value(next));
+ break;
+ case 6:
+ next = sparsebit_next_clear(s, first);
+ assert(next == 0 || next > first);
+ assert(next == 0 || !get_value(next));
+ break;
+ case 7:
+ next = sparsebit_next_clear(s, first);
+ if (sparsebit_is_set_num(s, first, num)) {
+ assert(next == 0 || next > last);
+ if (first)
+ next = sparsebit_next_set(s, first - 1);
+ else if (sparsebit_any_set(s))
+ next = sparsebit_first_set(s);
+ else
+ return;
+ assert(next == first);
+ } else {
+ assert(sparsebit_is_clear(s, first) || next <= last);
+ }
+ break;
+ case 8:
+ next = sparsebit_next_set(s, first);
+ if (sparsebit_is_clear_num(s, first, num)) {
+ assert(next == 0 || next > last);
+ if (first)
+ next = sparsebit_next_clear(s, first - 1);
+ else if (sparsebit_any_clear(s))
+ next = sparsebit_first_clear(s);
+ else
+ return;
+ assert(next == first);
+ } else {
+ assert(sparsebit_is_set(s, first) || next <= last);
+ }
+ break;
+ case 9:
+ sparsebit_set_num(s, first, num);
+ assert(sparsebit_is_set_num(s, first, num));
+ assert(!sparsebit_is_clear_num(s, first, num));
+ assert(sparsebit_any_set(s));
+ assert(!sparsebit_all_clear(s));
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = last, .set = true };
+ break;
+ case 10:
+ sparsebit_clear_num(s, first, num);
+ assert(!sparsebit_is_set_num(s, first, num));
+ assert(sparsebit_is_clear_num(s, first, num));
+ assert(sparsebit_any_clear(s));
+ assert(!sparsebit_all_set(s));
+ if (num_ranges == 1000)
+ exit(0);
+ ranges[num_ranges++] = (struct range)
+ { .first = first, .last = last, .set = false };
+ break;
+ case 11:
+ sparsebit_validate_internal(s);
+ break;
+ default:
+ break;
+ }
+}
+
+unsigned char get8(void)
+{
+ int ch;
+
+ ch = getchar();
+ if (ch == EOF)
+ exit(0);
+ return ch;
+}
+
+uint64_t get64(void)
+{
+ uint64_t x;
+
+ x = get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ x = (x << 8) | get8();
+ return (x << 8) | get8();
+}
+
+int main(void)
+{
+ s = sparsebit_alloc();
+ for (;;) {
+ uint8_t op = get8() & 0xf;
+ uint64_t first = get64();
+ uint64_t last = get64();
+
+ operate(op, first, last);
+ }
+}
+#endif
diff --git a/tools/testing/selftests/kvm/lib/vmx.c b/tools/testing/selftests/kvm/lib/vmx.c
new file mode 100644
index 000000000..b987c3c97
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/vmx.c
@@ -0,0 +1,283 @@
+/*
+ * tools/testing/selftests/kvm/lib/x86.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+/* Allocate memory regions for nested VMX tests.
+ *
+ * Input Args:
+ * vm - The VM to allocate guest-virtual addresses in.
+ *
+ * Output Args:
+ * p_vmx_gva - The guest virtual address for the struct vmx_pages.
+ *
+ * Return:
+ * Pointer to structure with the addresses of the VMX areas.
+ */
+struct vmx_pages *
+vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
+{
+ vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
+
+ /* Setup of a region of guest memory for the vmxon region. */
+ vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
+ vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
+
+ /* Setup of a region of guest memory for a vmcs. */
+ vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
+ vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
+
+ /* Setup of a region of guest memory for the MSR bitmap. */
+ vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
+ vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
+ memset(vmx->msr_hva, 0, getpagesize());
+
+ /* Setup of a region of guest memory for the shadow VMCS. */
+ vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
+ vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
+
+ /* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
+ vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
+ vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
+ memset(vmx->vmread_hva, 0, getpagesize());
+
+ vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
+ vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
+ memset(vmx->vmwrite_hva, 0, getpagesize());
+
+ *p_vmx_gva = vmx_gva;
+ return vmx;
+}
+
+bool prepare_for_vmx_operation(struct vmx_pages *vmx)
+{
+ uint64_t feature_control;
+ uint64_t required;
+ unsigned long cr0;
+ unsigned long cr4;
+
+ /*
+ * Ensure bits in CR0 and CR4 are valid in VMX operation:
+ * - Bit X is 1 in _FIXED0: bit X is fixed to 1 in CRx.
+ * - Bit X is 0 in _FIXED1: bit X is fixed to 0 in CRx.
+ */
+ __asm__ __volatile__("mov %%cr0, %0" : "=r"(cr0) : : "memory");
+ cr0 &= rdmsr(MSR_IA32_VMX_CR0_FIXED1);
+ cr0 |= rdmsr(MSR_IA32_VMX_CR0_FIXED0);
+ __asm__ __volatile__("mov %0, %%cr0" : : "r"(cr0) : "memory");
+
+ __asm__ __volatile__("mov %%cr4, %0" : "=r"(cr4) : : "memory");
+ cr4 &= rdmsr(MSR_IA32_VMX_CR4_FIXED1);
+ cr4 |= rdmsr(MSR_IA32_VMX_CR4_FIXED0);
+ /* Enable VMX operation */
+ cr4 |= X86_CR4_VMXE;
+ __asm__ __volatile__("mov %0, %%cr4" : : "r"(cr4) : "memory");
+
+ /*
+ * Configure IA32_FEATURE_CONTROL MSR to allow VMXON:
+ * Bit 0: Lock bit. If clear, VMXON causes a #GP.
+ * Bit 2: Enables VMXON outside of SMX operation. If clear, VMXON
+ * outside of SMX causes a #GP.
+ */
+ required = FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
+ required |= FEATURE_CONTROL_LOCKED;
+ feature_control = rdmsr(MSR_IA32_FEATURE_CONTROL);
+ if ((feature_control & required) != required)
+ wrmsr(MSR_IA32_FEATURE_CONTROL, feature_control | required);
+
+ /* Enter VMX root operation. */
+ *(uint32_t *)(vmx->vmxon) = vmcs_revision();
+ if (vmxon(vmx->vmxon_gpa))
+ return false;
+
+ /* Load a VMCS. */
+ *(uint32_t *)(vmx->vmcs) = vmcs_revision();
+ if (vmclear(vmx->vmcs_gpa))
+ return false;
+
+ if (vmptrld(vmx->vmcs_gpa))
+ return false;
+
+ /* Setup shadow VMCS, do not load it yet. */
+ *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
+ if (vmclear(vmx->shadow_vmcs_gpa))
+ return false;
+
+ return true;
+}
+
+/*
+ * Initialize the control fields to the most basic settings possible.
+ */
+static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
+{
+ vmwrite(VIRTUAL_PROCESSOR_ID, 0);
+ vmwrite(POSTED_INTR_NV, 0);
+
+ vmwrite(PIN_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PINBASED_CTLS));
+ if (!vmwrite(SECONDARY_VM_EXEC_CONTROL, 0))
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL,
+ rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
+ else
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, rdmsr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS));
+ vmwrite(EXCEPTION_BITMAP, 0);
+ vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
+ vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, -1); /* Never match */
+ vmwrite(CR3_TARGET_COUNT, 0);
+ vmwrite(VM_EXIT_CONTROLS, rdmsr(MSR_IA32_VMX_EXIT_CTLS) |
+ VM_EXIT_HOST_ADDR_SPACE_SIZE); /* 64-bit host */
+ vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
+ vmwrite(VM_ENTRY_CONTROLS, rdmsr(MSR_IA32_VMX_ENTRY_CTLS) |
+ VM_ENTRY_IA32E_MODE); /* 64-bit guest */
+ vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
+ vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
+ vmwrite(TPR_THRESHOLD, 0);
+
+ vmwrite(CR0_GUEST_HOST_MASK, 0);
+ vmwrite(CR4_GUEST_HOST_MASK, 0);
+ vmwrite(CR0_READ_SHADOW, get_cr0());
+ vmwrite(CR4_READ_SHADOW, get_cr4());
+
+ vmwrite(MSR_BITMAP, vmx->msr_gpa);
+ vmwrite(VMREAD_BITMAP, vmx->vmread_gpa);
+ vmwrite(VMWRITE_BITMAP, vmx->vmwrite_gpa);
+}
+
+/*
+ * Initialize the host state fields based on the current host state, with
+ * the exception of HOST_RSP and HOST_RIP, which should be set by vmlaunch
+ * or vmresume.
+ */
+static inline void init_vmcs_host_state(void)
+{
+ uint32_t exit_controls = vmreadz(VM_EXIT_CONTROLS);
+
+ vmwrite(HOST_ES_SELECTOR, get_es());
+ vmwrite(HOST_CS_SELECTOR, get_cs());
+ vmwrite(HOST_SS_SELECTOR, get_ss());
+ vmwrite(HOST_DS_SELECTOR, get_ds());
+ vmwrite(HOST_FS_SELECTOR, get_fs());
+ vmwrite(HOST_GS_SELECTOR, get_gs());
+ vmwrite(HOST_TR_SELECTOR, get_tr());
+
+ if (exit_controls & VM_EXIT_LOAD_IA32_PAT)
+ vmwrite(HOST_IA32_PAT, rdmsr(MSR_IA32_CR_PAT));
+ if (exit_controls & VM_EXIT_LOAD_IA32_EFER)
+ vmwrite(HOST_IA32_EFER, rdmsr(MSR_EFER));
+ if (exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
+ vmwrite(HOST_IA32_PERF_GLOBAL_CTRL,
+ rdmsr(MSR_CORE_PERF_GLOBAL_CTRL));
+
+ vmwrite(HOST_IA32_SYSENTER_CS, rdmsr(MSR_IA32_SYSENTER_CS));
+
+ vmwrite(HOST_CR0, get_cr0());
+ vmwrite(HOST_CR3, get_cr3());
+ vmwrite(HOST_CR4, get_cr4());
+ vmwrite(HOST_FS_BASE, rdmsr(MSR_FS_BASE));
+ vmwrite(HOST_GS_BASE, rdmsr(MSR_GS_BASE));
+ vmwrite(HOST_TR_BASE,
+ get_desc64_base((struct desc64 *)(get_gdt_base() + get_tr())));
+ vmwrite(HOST_GDTR_BASE, get_gdt_base());
+ vmwrite(HOST_IDTR_BASE, get_idt_base());
+ vmwrite(HOST_IA32_SYSENTER_ESP, rdmsr(MSR_IA32_SYSENTER_ESP));
+ vmwrite(HOST_IA32_SYSENTER_EIP, rdmsr(MSR_IA32_SYSENTER_EIP));
+}
+
+/*
+ * Initialize the guest state fields essentially as a clone of
+ * the host state fields. Some host state fields have fixed
+ * values, and we set the corresponding guest state fields accordingly.
+ */
+static inline void init_vmcs_guest_state(void *rip, void *rsp)
+{
+ vmwrite(GUEST_ES_SELECTOR, vmreadz(HOST_ES_SELECTOR));
+ vmwrite(GUEST_CS_SELECTOR, vmreadz(HOST_CS_SELECTOR));
+ vmwrite(GUEST_SS_SELECTOR, vmreadz(HOST_SS_SELECTOR));
+ vmwrite(GUEST_DS_SELECTOR, vmreadz(HOST_DS_SELECTOR));
+ vmwrite(GUEST_FS_SELECTOR, vmreadz(HOST_FS_SELECTOR));
+ vmwrite(GUEST_GS_SELECTOR, vmreadz(HOST_GS_SELECTOR));
+ vmwrite(GUEST_LDTR_SELECTOR, 0);
+ vmwrite(GUEST_TR_SELECTOR, vmreadz(HOST_TR_SELECTOR));
+ vmwrite(GUEST_INTR_STATUS, 0);
+ vmwrite(GUEST_PML_INDEX, 0);
+
+ vmwrite(VMCS_LINK_POINTER, -1ll);
+ vmwrite(GUEST_IA32_DEBUGCTL, 0);
+ vmwrite(GUEST_IA32_PAT, vmreadz(HOST_IA32_PAT));
+ vmwrite(GUEST_IA32_EFER, vmreadz(HOST_IA32_EFER));
+ vmwrite(GUEST_IA32_PERF_GLOBAL_CTRL,
+ vmreadz(HOST_IA32_PERF_GLOBAL_CTRL));
+
+ vmwrite(GUEST_ES_LIMIT, -1);
+ vmwrite(GUEST_CS_LIMIT, -1);
+ vmwrite(GUEST_SS_LIMIT, -1);
+ vmwrite(GUEST_DS_LIMIT, -1);
+ vmwrite(GUEST_FS_LIMIT, -1);
+ vmwrite(GUEST_GS_LIMIT, -1);
+ vmwrite(GUEST_LDTR_LIMIT, -1);
+ vmwrite(GUEST_TR_LIMIT, 0x67);
+ vmwrite(GUEST_GDTR_LIMIT, 0xffff);
+ vmwrite(GUEST_IDTR_LIMIT, 0xffff);
+ vmwrite(GUEST_ES_AR_BYTES,
+ vmreadz(GUEST_ES_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_CS_AR_BYTES, 0xa09b);
+ vmwrite(GUEST_SS_AR_BYTES, 0xc093);
+ vmwrite(GUEST_DS_AR_BYTES,
+ vmreadz(GUEST_DS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_FS_AR_BYTES,
+ vmreadz(GUEST_FS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_GS_AR_BYTES,
+ vmreadz(GUEST_GS_SELECTOR) == 0 ? 0x10000 : 0xc093);
+ vmwrite(GUEST_LDTR_AR_BYTES, 0x10000);
+ vmwrite(GUEST_TR_AR_BYTES, 0x8b);
+ vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
+ vmwrite(GUEST_ACTIVITY_STATE, 0);
+ vmwrite(GUEST_SYSENTER_CS, vmreadz(HOST_IA32_SYSENTER_CS));
+ vmwrite(VMX_PREEMPTION_TIMER_VALUE, 0);
+
+ vmwrite(GUEST_CR0, vmreadz(HOST_CR0));
+ vmwrite(GUEST_CR3, vmreadz(HOST_CR3));
+ vmwrite(GUEST_CR4, vmreadz(HOST_CR4));
+ vmwrite(GUEST_ES_BASE, 0);
+ vmwrite(GUEST_CS_BASE, 0);
+ vmwrite(GUEST_SS_BASE, 0);
+ vmwrite(GUEST_DS_BASE, 0);
+ vmwrite(GUEST_FS_BASE, vmreadz(HOST_FS_BASE));
+ vmwrite(GUEST_GS_BASE, vmreadz(HOST_GS_BASE));
+ vmwrite(GUEST_LDTR_BASE, 0);
+ vmwrite(GUEST_TR_BASE, vmreadz(HOST_TR_BASE));
+ vmwrite(GUEST_GDTR_BASE, vmreadz(HOST_GDTR_BASE));
+ vmwrite(GUEST_IDTR_BASE, vmreadz(HOST_IDTR_BASE));
+ vmwrite(GUEST_DR7, 0x400);
+ vmwrite(GUEST_RSP, (uint64_t)rsp);
+ vmwrite(GUEST_RIP, (uint64_t)rip);
+ vmwrite(GUEST_RFLAGS, 2);
+ vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, 0);
+ vmwrite(GUEST_SYSENTER_ESP, vmreadz(HOST_IA32_SYSENTER_ESP));
+ vmwrite(GUEST_SYSENTER_EIP, vmreadz(HOST_IA32_SYSENTER_EIP));
+}
+
+void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
+{
+ init_vmcs_control_fields(vmx);
+ init_vmcs_host_state();
+ init_vmcs_guest_state(guest_rip, guest_rsp);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86.c b/tools/testing/selftests/kvm/lib/x86.c
new file mode 100644
index 000000000..800fe3606
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86.c
@@ -0,0 +1,893 @@
+/*
+ * tools/testing/selftests/kvm/lib/x86.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kvm_util_internal.h"
+#include "x86.h"
+
+/* Minimum physical address used for virtual translation tables. */
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+/* Virtual translation table structure declarations */
+struct pageMapL4Entry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageDirectoryPointerEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageDirectoryEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t ignored_06:1;
+ uint64_t page_size:1;
+ uint64_t ignored_11_08:4;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+struct pageTableEntry {
+ uint64_t present:1;
+ uint64_t writable:1;
+ uint64_t user:1;
+ uint64_t write_through:1;
+ uint64_t cache_disable:1;
+ uint64_t accessed:1;
+ uint64_t dirty:1;
+ uint64_t reserved_07:1;
+ uint64_t global:1;
+ uint64_t ignored_11_09:3;
+ uint64_t address:40;
+ uint64_t ignored_62_52:11;
+ uint64_t execute_disable:1;
+};
+
+/* Register Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * regs - register
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the registers given by regs, to the FILE stream
+ * given by steam.
+ */
+void regs_dump(FILE *stream, struct kvm_regs *regs,
+ uint8_t indent)
+{
+ fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
+ "rcx: 0x%.16llx rdx: 0x%.16llx\n",
+ indent, "",
+ regs->rax, regs->rbx, regs->rcx, regs->rdx);
+ fprintf(stream, "%*srsi: 0x%.16llx rdi: 0x%.16llx "
+ "rsp: 0x%.16llx rbp: 0x%.16llx\n",
+ indent, "",
+ regs->rsi, regs->rdi, regs->rsp, regs->rbp);
+ fprintf(stream, "%*sr8: 0x%.16llx r9: 0x%.16llx "
+ "r10: 0x%.16llx r11: 0x%.16llx\n",
+ indent, "",
+ regs->r8, regs->r9, regs->r10, regs->r11);
+ fprintf(stream, "%*sr12: 0x%.16llx r13: 0x%.16llx "
+ "r14: 0x%.16llx r15: 0x%.16llx\n",
+ indent, "",
+ regs->r12, regs->r13, regs->r14, regs->r15);
+ fprintf(stream, "%*srip: 0x%.16llx rfl: 0x%.16llx\n",
+ indent, "",
+ regs->rip, regs->rflags);
+}
+
+/* Segment Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * segment - KVM segment
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM segment given by segment, to the FILE stream
+ * given by steam.
+ */
+static void segment_dump(FILE *stream, struct kvm_segment *segment,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.8x "
+ "selector: 0x%.4x type: 0x%.2x\n",
+ indent, "", segment->base, segment->limit,
+ segment->selector, segment->type);
+ fprintf(stream, "%*spresent: 0x%.2x dpl: 0x%.2x "
+ "db: 0x%.2x s: 0x%.2x l: 0x%.2x\n",
+ indent, "", segment->present, segment->dpl,
+ segment->db, segment->s, segment->l);
+ fprintf(stream, "%*sg: 0x%.2x avl: 0x%.2x "
+ "unusable: 0x%.2x padding: 0x%.2x\n",
+ indent, "", segment->g, segment->avl,
+ segment->unusable, segment->padding);
+}
+
+/* dtable Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * dtable - KVM dtable
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the KVM dtable given by dtable, to the FILE stream
+ * given by steam.
+ */
+static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
+ uint8_t indent)
+{
+ fprintf(stream, "%*sbase: 0x%.16llx limit: 0x%.4x "
+ "padding: 0x%.4x 0x%.4x 0x%.4x\n",
+ indent, "", dtable->base, dtable->limit,
+ dtable->padding[0], dtable->padding[1], dtable->padding[2]);
+}
+
+/* System Register Dump
+ *
+ * Input Args:
+ * indent - Left margin indent amount
+ * sregs - System registers
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps the state of the system registers given by sregs, to the FILE stream
+ * given by steam.
+ */
+void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
+ uint8_t indent)
+{
+ unsigned int i;
+
+ fprintf(stream, "%*scs:\n", indent, "");
+ segment_dump(stream, &sregs->cs, indent + 2);
+ fprintf(stream, "%*sds:\n", indent, "");
+ segment_dump(stream, &sregs->ds, indent + 2);
+ fprintf(stream, "%*ses:\n", indent, "");
+ segment_dump(stream, &sregs->es, indent + 2);
+ fprintf(stream, "%*sfs:\n", indent, "");
+ segment_dump(stream, &sregs->fs, indent + 2);
+ fprintf(stream, "%*sgs:\n", indent, "");
+ segment_dump(stream, &sregs->gs, indent + 2);
+ fprintf(stream, "%*sss:\n", indent, "");
+ segment_dump(stream, &sregs->ss, indent + 2);
+ fprintf(stream, "%*str:\n", indent, "");
+ segment_dump(stream, &sregs->tr, indent + 2);
+ fprintf(stream, "%*sldt:\n", indent, "");
+ segment_dump(stream, &sregs->ldt, indent + 2);
+
+ fprintf(stream, "%*sgdt:\n", indent, "");
+ dtable_dump(stream, &sregs->gdt, indent + 2);
+ fprintf(stream, "%*sidt:\n", indent, "");
+ dtable_dump(stream, &sregs->idt, indent + 2);
+
+ fprintf(stream, "%*scr0: 0x%.16llx cr2: 0x%.16llx "
+ "cr3: 0x%.16llx cr4: 0x%.16llx\n",
+ indent, "",
+ sregs->cr0, sregs->cr2, sregs->cr3, sregs->cr4);
+ fprintf(stream, "%*scr8: 0x%.16llx efer: 0x%.16llx "
+ "apic_base: 0x%.16llx\n",
+ indent, "",
+ sregs->cr8, sregs->efer, sregs->apic_base);
+
+ fprintf(stream, "%*sinterrupt_bitmap:\n", indent, "");
+ for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) {
+ fprintf(stream, "%*s%.16llx\n", indent + 2, "",
+ sregs->interrupt_bitmap[i]);
+ }
+}
+
+void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+{
+ int rc;
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ /* If needed, create page map l4 table. */
+ if (!vm->pgd_created) {
+ vm_paddr_t paddr = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
+ vm->pgd = paddr;
+ vm->pgd_created = true;
+ }
+}
+
+/* VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * pgd_memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within the VM given by vm, creates a virtual translation for the page
+ * starting at vaddr to the page starting at paddr.
+ */
+void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint32_t pgd_memslot)
+{
+ uint16_t index[4];
+ struct pageMapL4Entry *pml4e;
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x",
+ vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx",
+ vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ index[0] = (vaddr >> 12) & 0x1ffu;
+ index[1] = (vaddr >> 21) & 0x1ffu;
+ index[2] = (vaddr >> 30) & 0x1ffu;
+ index[3] = (vaddr >> 39) & 0x1ffu;
+
+ /* Allocate page directory pointer table if not present. */
+ pml4e = addr_gpa2hva(vm, vm->pgd);
+ if (!pml4e[index[3]].present) {
+ pml4e[index[3]].address = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+ >> vm->page_shift;
+ pml4e[index[3]].writable = true;
+ pml4e[index[3]].present = true;
+ }
+
+ /* Allocate page directory table if not present. */
+ struct pageDirectoryPointerEntry *pdpe;
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+ if (!pdpe[index[2]].present) {
+ pdpe[index[2]].address = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+ >> vm->page_shift;
+ pdpe[index[2]].writable = true;
+ pdpe[index[2]].present = true;
+ }
+
+ /* Allocate page table if not present. */
+ struct pageDirectoryEntry *pde;
+ pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+ if (!pde[index[1]].present) {
+ pde[index[1]].address = vm_phy_page_alloc(vm,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
+ >> vm->page_shift;
+ pde[index[1]].writable = true;
+ pde[index[1]].present = true;
+ }
+
+ /* Fill in page table entry. */
+ struct pageTableEntry *pte;
+ pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+ pte[index[0]].address = paddr >> vm->page_shift;
+ pte[index[0]].writable = true;
+ pte[index[0]].present = 1;
+}
+
+/* Virtual Translation Tables Dump
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args:
+ * stream - Output FILE stream
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by stream, the contents of all the
+ * virtual translation tables for the VM given by vm.
+ */
+void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ struct pageMapL4Entry *pml4e, *pml4e_start;
+ struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
+ struct pageDirectoryEntry *pde, *pde_start;
+ struct pageTableEntry *pte, *pte_start;
+
+ if (!vm->pgd_created)
+ return;
+
+ fprintf(stream, "%*s "
+ " no\n", indent, "");
+ fprintf(stream, "%*s index hvaddr gpaddr "
+ "addr w exec dirty\n",
+ indent, "");
+ pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
+ vm->pgd);
+ for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
+ pml4e = &pml4e_start[n1];
+ if (!pml4e->present)
+ continue;
+ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+ " %u\n",
+ indent, "",
+ pml4e - pml4e_start, pml4e,
+ addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
+ pml4e->writable, pml4e->execute_disable);
+
+ pdpe_start = addr_gpa2hva(vm, pml4e->address
+ * vm->page_size);
+ for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
+ pdpe = &pdpe_start[n2];
+ if (!pdpe->present)
+ continue;
+ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
+ "%u %u\n",
+ indent, "",
+ pdpe - pdpe_start, pdpe,
+ addr_hva2gpa(vm, pdpe),
+ (uint64_t) pdpe->address, pdpe->writable,
+ pdpe->execute_disable);
+
+ pde_start = addr_gpa2hva(vm,
+ pdpe->address * vm->page_size);
+ for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
+ pde = &pde_start[n3];
+ if (!pde->present)
+ continue;
+ fprintf(stream, "%*spde 0x%-3zx %p "
+ "0x%-12lx 0x%-10lx %u %u\n",
+ indent, "", pde - pde_start, pde,
+ addr_hva2gpa(vm, pde),
+ (uint64_t) pde->address, pde->writable,
+ pde->execute_disable);
+
+ pte_start = addr_gpa2hva(vm,
+ pde->address * vm->page_size);
+ for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
+ pte = &pte_start[n4];
+ if (!pte->present)
+ continue;
+ fprintf(stream, "%*spte 0x%-3zx %p "
+ "0x%-12lx 0x%-10lx %u %u "
+ " %u 0x%-10lx\n",
+ indent, "",
+ pte - pte_start, pte,
+ addr_hva2gpa(vm, pte),
+ (uint64_t) pte->address,
+ pte->writable,
+ pte->execute_disable,
+ pte->dirty,
+ ((uint64_t) n1 << 27)
+ | ((uint64_t) n2 << 18)
+ | ((uint64_t) n3 << 9)
+ | ((uint64_t) n4));
+ }
+ }
+ }
+ }
+}
+
+/* Set Unusable Segment
+ *
+ * Input Args: None
+ *
+ * Output Args:
+ * segp - Pointer to segment register
+ *
+ * Return: None
+ *
+ * Sets the segment register pointed to by segp to an unusable state.
+ */
+static void kvm_seg_set_unusable(struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->unusable = true;
+}
+
+static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
+{
+ void *gdt = addr_gva2hva(vm, vm->gdt);
+ struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
+
+ desc->limit0 = segp->limit & 0xFFFF;
+ desc->base0 = segp->base & 0xFFFF;
+ desc->base1 = segp->base >> 16;
+ desc->type = segp->type;
+ desc->s = segp->s;
+ desc->dpl = segp->dpl;
+ desc->p = segp->present;
+ desc->limit1 = segp->limit >> 16;
+ desc->avl = segp->avl;
+ desc->l = segp->l;
+ desc->db = segp->db;
+ desc->g = segp->g;
+ desc->base2 = segp->base >> 24;
+ if (!segp->s)
+ desc->base3 = segp->base >> 32;
+}
+
+
+/* Set Long Mode Flat Kernel Code Segment
+ *
+ * Input Args:
+ * vm - VM whose GDT is being filled, or NULL to only write segp
+ * selector - selector value
+ *
+ * Output Args:
+ * segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a code segment
+ * with the selector value given by selector.
+ */
+static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
+ struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = selector;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
+ * | kFlagCodeReadable
+ */
+ segp->g = true;
+ segp->l = true;
+ segp->present = 1;
+ if (vm)
+ kvm_seg_fill_gdt_64bit(vm, segp);
+}
+
+/* Set Long Mode Flat Kernel Data Segment
+ *
+ * Input Args:
+ * vm - VM whose GDT is being filled, or NULL to only write segp
+ * selector - selector value
+ *
+ * Output Args:
+ * segp - Pointer to KVM segment
+ *
+ * Return: None
+ *
+ * Sets up the KVM segment pointed to by segp, to be a data segment
+ * with the selector value given by selector.
+ */
+static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
+ struct kvm_segment *segp)
+{
+ memset(segp, 0, sizeof(*segp));
+ segp->selector = selector;
+ segp->limit = 0xFFFFFFFFu;
+ segp->s = 0x1; /* kTypeCodeData */
+ segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
+ * | kFlagDataWritable
+ */
+ segp->g = true;
+ segp->present = true;
+ if (vm)
+ kvm_seg_fill_gdt_64bit(vm, segp);
+}
+
+/* Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gpa - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Translates the VM virtual address given by gva to a VM physical
+ * address and then locates the memory region containing the VM
+ * physical address, within the VM given by vm. When found, the host
+ * virtual address providing the memory to the vm physical address is returned.
+ * A TEST_ASSERT failure occurs if no region containing translated
+ * VM virtual address exists.
+ */
+vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint16_t index[4];
+ struct pageMapL4Entry *pml4e;
+ struct pageDirectoryPointerEntry *pdpe;
+ struct pageDirectoryEntry *pde;
+ struct pageTableEntry *pte;
+ void *hva;
+
+ TEST_ASSERT(vm->mode == VM_MODE_FLAT48PG, "Attempt to use "
+ "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ index[0] = (gva >> 12) & 0x1ffu;
+ index[1] = (gva >> 21) & 0x1ffu;
+ index[2] = (gva >> 30) & 0x1ffu;
+ index[3] = (gva >> 39) & 0x1ffu;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+ pml4e = addr_gpa2hva(vm, vm->pgd);
+ if (!pml4e[index[3]].present)
+ goto unmapped_gva;
+
+ pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
+ if (!pdpe[index[2]].present)
+ goto unmapped_gva;
+
+ pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
+ if (!pde[index[1]].present)
+ goto unmapped_gva;
+
+ pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
+ if (!pte[index[0]].present)
+ goto unmapped_gva;
+
+ return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
+
+unmapped_gva:
+ TEST_ASSERT(false, "No mapping for vm virtual address, "
+ "gva: 0x%lx", gva);
+}
+
+static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
+ int pgd_memslot)
+{
+ if (!vm->gdt)
+ vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
+ KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+
+ dt->base = vm->gdt;
+ dt->limit = getpagesize();
+}
+
+static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
+ int selector, int gdt_memslot,
+ int pgd_memslot)
+{
+ if (!vm->tss)
+ vm->tss = vm_vaddr_alloc(vm, getpagesize(),
+ KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+
+ memset(segp, 0, sizeof(*segp));
+ segp->base = vm->tss;
+ segp->limit = 0x67;
+ segp->selector = selector;
+ segp->type = 0xb;
+ segp->present = 1;
+ kvm_seg_fill_gdt_64bit(vm, segp);
+}
+
+void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+{
+ struct kvm_sregs sregs;
+
+ /* Set mode specific system register values. */
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+
+ sregs.idt.limit = 0;
+
+ kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
+
+ switch (vm->mode) {
+ case VM_MODE_FLAT48PG:
+ sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+ sregs.cr4 |= X86_CR4_PAE;
+ sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+ kvm_seg_set_unusable(&sregs.ldt);
+ kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
+ kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
+ kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
+ kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
+ break;
+
+ default:
+ TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ sregs.cr3 = vm->pgd;
+ vcpu_sregs_set(vm, vcpuid, &sregs);
+}
+/* Adds a vCPU with reasonable defaults (i.e., a stack)
+ *
+ * Input Args:
+ * vcpuid - The id of the VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ */
+void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+{
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ vm_vaddr_t stack_vaddr;
+ stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+ DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+
+ /* Create VCPU */
+ vm_vcpu_add(vm, vcpuid, 0, 0);
+
+ /* Setup guest general purpose registers */
+ vcpu_regs_get(vm, vcpuid, &regs);
+ regs.rflags = regs.rflags | 0x2;
+ regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
+ regs.rip = (unsigned long) guest_code;
+ vcpu_regs_set(vm, vcpuid, &regs);
+
+ /* Setup the MP state */
+ mp_state.mp_state = 0;
+ vcpu_set_mp_state(vm, vcpuid, &mp_state);
+}
+
+/* VM VCPU CPUID Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU id
+ * cpuid - The CPUID values to set.
+ *
+ * Output Args: None
+ *
+ * Return: void
+ *
+ * Set the VCPU's CPUID.
+ */
+void vcpu_set_cpuid(struct kvm_vm *vm,
+ uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int rc;
+
+ TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+
+ rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
+ TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
+ rc, errno);
+
+}
+/* Create a VM with reasonable defaults
+ *
+ * Input Args:
+ * vcpuid - The id of the single VCPU to add to the VM.
+ * extra_mem_pages - The size of extra memories to add (this will
+ * decide how much extra space we will need to
+ * setup the page tables using mem slot 0)
+ * guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
+ void *guest_code)
+{
+ struct kvm_vm *vm;
+ /*
+ * For x86 the maximum page table size for a memory region
+ * will be when only 4K pages are used. In that case the
+ * total extra size for page tables (for extra N pages) will
+ * be: N/512+N/512^2+N/512^3+... which is definitely smaller
+ * than N/512*2.
+ */
+ uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
+
+ /* Create VM */
+ vm = vm_create(VM_MODE_FLAT48PG,
+ DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
+ O_RDWR);
+
+ /* Setup guest code */
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+
+ /* Setup IRQ Chip */
+ vm_create_irqchip(vm);
+
+ /* Add the first vCPU. */
+ vm_vcpu_add_default(vm, vcpuid, guest_code);
+
+ return vm;
+}
+
+struct kvm_x86_state {
+ struct kvm_vcpu_events events;
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ struct kvm_xsave xsave;
+ struct kvm_xcrs xcrs;
+ struct kvm_sregs sregs;
+ struct kvm_debugregs debugregs;
+ union {
+ struct kvm_nested_state nested;
+ char nested_[16384];
+ };
+ struct kvm_msrs msrs;
+};
+
+static int kvm_get_num_msrs(struct kvm_vm *vm)
+{
+ struct kvm_msr_list nmsrs;
+ int r;
+
+ nmsrs.nmsrs = 0;
+ r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+ TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
+ r);
+
+ return nmsrs.nmsrs;
+}
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct kvm_msr_list *list;
+ struct kvm_x86_state *state;
+ int nmsrs, r, i;
+ static int nested_size = -1;
+
+ if (nested_size == -1) {
+ nested_size = kvm_check_cap(KVM_CAP_NESTED_STATE);
+ TEST_ASSERT(nested_size <= sizeof(state->nested_),
+ "Nested state size too big, %i > %zi",
+ nested_size, sizeof(state->nested_));
+ }
+
+ nmsrs = kvm_get_num_msrs(vm);
+ list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+ list->nmsrs = nmsrs;
+ r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
+ r);
+
+ state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
+ r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
+ r);
+
+ if (kvm_check_cap(KVM_CAP_XCRS)) {
+ r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
+ r);
+ }
+
+ r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
+ r);
+
+ if (nested_size) {
+ state->nested.size = sizeof(state->nested_);
+ r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
+ r);
+ TEST_ASSERT(state->nested.size <= nested_size,
+ "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+ state->nested.size, nested_size);
+ } else
+ state->nested.size = 0;
+
+ state->msrs.nmsrs = nmsrs;
+ for (i = 0; i < nmsrs; i++)
+ state->msrs.entries[i].index = list->indices[i];
+ r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
+ TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed at %x)",
+ r, r == nmsrs ? -1 : list->indices[r]);
+
+ r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
+ r);
+
+ free(list);
+ return state;
+}
+
+void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int r;
+
+ if (state->nested.size) {
+ r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
+ r);
+ }
+
+ r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
+ r);
+
+ if (kvm_check_cap(KVM_CAP_XCRS)) {
+ r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
+ r);
+ }
+
+ r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
+ TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
+ r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
+
+ r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
+ r);
+
+ r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
+ TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
+ r);
+}
diff --git a/tools/testing/selftests/kvm/platform_info_test.c b/tools/testing/selftests/kvm/platform_info_test.c
new file mode 100644
index 000000000..65db510dd
--- /dev/null
+++ b/tools/testing/selftests/kvm/platform_info_test.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_CAP_MSR_PLATFORM_INFO
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of controlling guest access to
+ * MSR_PLATFORM_INFO.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 0
+#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
+
+static void guest_code(void)
+{
+ uint64_t msr_platform_info;
+
+ for (;;) {
+ msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
+ GUEST_SYNC(msr_platform_info);
+ asm volatile ("inc %r11");
+ }
+}
+
+static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
+{
+ struct kvm_enable_cap cap = {};
+
+ cap.cap = KVM_CAP_MSR_PLATFORM_INFO;
+ cap.flags = 0;
+ cap.args[0] = (int)enable;
+ vm_enable_cap(vm, &cap);
+}
+
+static void test_msr_platform_info_enabled(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct guest_args args;
+
+ set_msr_platform_info_enabled(vm, true);
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ guest_args_read(vm, VCPU_ID, &args);
+ TEST_ASSERT(args.port == GUEST_PORT_SYNC,
+ "Received IO from port other than PORT_HOST_SYNC: %u\n",
+ run->io.port);
+ TEST_ASSERT((args.arg1 & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
+ MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
+ "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
+ MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+}
+
+static void test_msr_platform_info_disabled(struct kvm_vm *vm)
+{
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+
+ set_msr_platform_info_enabled(vm, false);
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
+ "Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_run *state;
+ int rv;
+ uint64_t msr_platform_info;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
+ if (!rv) {
+ fprintf(stderr,
+ "KVM_CAP_MSR_PLATFORM_INFO not supported, skip test\n");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO);
+ vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO,
+ msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+ test_msr_platform_info_enabled(vm);
+ test_msr_platform_info_disabled(vm);
+ vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/set_sregs_test.c b/tools/testing/selftests/kvm/set_sregs_test.c
new file mode 100644
index 000000000..881419d57
--- /dev/null
+++ b/tools/testing/selftests/kvm/set_sregs_test.c
@@ -0,0 +1,54 @@
+/*
+ * KVM_SET_SREGS tests
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * This is a regression test for the bug fixed by the following commit:
+ * d3802286fa0f ("kvm: x86: Disallow illegal IA32_APIC_BASE MSR values")
+ *
+ * That bug allowed a user-mode program that called the KVM_SET_SREGS
+ * ioctl to put a VCPU's local APIC into an invalid state.
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 5
+
+int main(int argc, char *argv[])
+{
+ struct kvm_sregs sregs;
+ struct kvm_vm *vm;
+ int rc;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, NULL);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ sregs.apic_base = 1 << 10;
+ rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
+ sregs.apic_base);
+ sregs.apic_base = 1 << 11;
+ rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
+ sregs.apic_base);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/state_test.c b/tools/testing/selftests/kvm/state_test.c
new file mode 100644
index 000000000..900e3e9df
--- /dev/null
+++ b/tools/testing/selftests/kvm/state_test.c
@@ -0,0 +1,196 @@
+/*
+ * KVM_GET/SET_* tests
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Tests for vCPU state save/restore, including nested guest state.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+#define VCPU_ID 5
+
+static bool have_nested_state;
+
+void l2_guest_code(void)
+{
+ GUEST_SYNC(5);
+
+ /* Exit to L1 */
+ vmcall();
+
+ /* L1 has now set up a shadow VMCS for us. */
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+ GUEST_SYNC(9);
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
+ GUEST_SYNC(10);
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
+ GUEST_SYNC(11);
+
+ /* Done, exit to L1 and never come back. */
+ vmcall();
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT(vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+ GUEST_SYNC(3);
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(4);
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ /* Check that the launched state is preserved. */
+ GUEST_ASSERT(vmlaunch());
+
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_SYNC(6);
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
+
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
+ vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
+
+ GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+ GUEST_ASSERT(vmlaunch());
+ GUEST_SYNC(7);
+ GUEST_ASSERT(vmlaunch());
+ GUEST_ASSERT(vmresume());
+
+ vmwrite(GUEST_RIP, 0xc0ffee);
+ GUEST_SYNC(8);
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
+
+ GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+ GUEST_ASSERT(vmlaunch());
+ GUEST_ASSERT(vmresume());
+ GUEST_SYNC(12);
+ GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
+ GUEST_ASSERT(vmlaunch());
+ GUEST_ASSERT(vmresume());
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ GUEST_SYNC(1);
+ GUEST_SYNC(2);
+
+ if (vmx_pages)
+ l1_guest_code(vmx_pages);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct vmx_pages *vmx_pages = NULL;
+ vm_vaddr_t vmx_pages_gva = 0;
+
+ struct kvm_regs regs1, regs2;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_x86_state *state;
+ int stage;
+
+ struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ run = vcpu_state(vm, VCPU_ID);
+
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+ vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ } else {
+ printf("will skip nested state checks\n");
+ vcpu_args_set(vm, VCPU_ID, 1, 0);
+ }
+
+ for (stage = 1;; stage++) {
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+ switch (run->io.port) {
+ case GUEST_PORT_ABORT:
+ TEST_ASSERT(false, "%s at %s:%d", (const char *) regs1.rdi,
+ __FILE__, regs1.rsi);
+ /* NOT REACHED */
+ case GUEST_PORT_SYNC:
+ break;
+ case GUEST_PORT_DONE:
+ goto done;
+ default:
+ TEST_ASSERT(false, "Unknown port 0x%x.", run->io.port);
+ }
+
+ /* PORT_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)regs1.rdi, "hello") &&
+ regs1.rsi == stage, "Unexpected register values vmexit #%lx, got %lx",
+ stage, (ulong) regs1.rsi);
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID, 0, 0);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_load_state(vm, VCPU_ID, state);
+ run = vcpu_state(vm, VCPU_ID);
+ free(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vm, VCPU_ID, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c
new file mode 100644
index 000000000..213343e5d
--- /dev/null
+++ b/tools/testing/selftests/kvm/sync_regs_test.c
@@ -0,0 +1,237 @@
+/*
+ * Test for x86 KVM_CAP_SYNC_REGS
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies expected behavior of x86 KVM_CAP_SYNC_REGS functionality,
+ * including requesting an invalid register set, updates to/from values
+ * in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+
+#define VCPU_ID 5
+
+void guest_code(void)
+{
+ for (;;) {
+ GUEST_SYNC(0);
+ asm volatile ("inc %r11");
+ }
+}
+
+static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
+{
+#define REG_COMPARE(reg) \
+ TEST_ASSERT(left->reg == right->reg, \
+ "Register " #reg \
+ " values did not match: 0x%llx, 0x%llx\n", \
+ left->reg, right->reg)
+ REG_COMPARE(rax);
+ REG_COMPARE(rbx);
+ REG_COMPARE(rcx);
+ REG_COMPARE(rdx);
+ REG_COMPARE(rsi);
+ REG_COMPARE(rdi);
+ REG_COMPARE(rsp);
+ REG_COMPARE(rbp);
+ REG_COMPARE(r8);
+ REG_COMPARE(r9);
+ REG_COMPARE(r10);
+ REG_COMPARE(r11);
+ REG_COMPARE(r12);
+ REG_COMPARE(r13);
+ REG_COMPARE(r14);
+ REG_COMPARE(r15);
+ REG_COMPARE(rip);
+ REG_COMPARE(rflags);
+#undef REG_COMPARE
+}
+
+static void compare_sregs(struct kvm_sregs *left, struct kvm_sregs *right)
+{
+}
+
+static void compare_vcpu_events(struct kvm_vcpu_events *left,
+ struct kvm_vcpu_events *right)
+{
+}
+
+#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
+#define INVALID_SYNC_FIELD 0x80000000
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_regs regs;
+ struct kvm_sregs sregs;
+ struct kvm_vcpu_events events;
+ int rv, cap;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+ if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
+ fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+ if ((cap & INVALID_SYNC_FIELD) != 0) {
+ fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ /* Request reading invalid register set from VCPU. */
+ run->kvm_valid_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+ run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+
+ /* Request setting invalid register set into VCPU. */
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+ run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(rv < 0 && errno == EINVAL,
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ rv);
+ vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+
+ /* Request and verify all valid register sets. */
+ /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ compare_regs(&regs, &run->s.regs.regs);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ compare_sregs(&sregs, &run->s.regs.sregs);
+
+ vcpu_events_get(vm, VCPU_ID, &events);
+ compare_vcpu_events(&events, &run->s.regs.events);
+
+ /* Set and verify various register values. */
+ run->s.regs.regs.r11 = 0xBAD1DEA;
+ run->s.regs.sregs.apic_base = 1 << 11;
+ /* TODO run->s.regs.events.XYZ = ABC; */
+
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 == 0xBAD1DEA + 1,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+ TEST_ASSERT(run->s.regs.sregs.apic_base == 1 << 11,
+ "apic_base sync regs value incorrect 0x%llx.",
+ run->s.regs.sregs.apic_base);
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ compare_regs(&regs, &run->s.regs.regs);
+
+ vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ compare_sregs(&sregs, &run->s.regs.sregs);
+
+ vcpu_events_get(vm, VCPU_ID, &events);
+ compare_vcpu_events(&events, &run->s.regs.events);
+
+ /* Clear kvm_dirty_regs bits, verify new s.regs values are
+ * overwritten with existing guest values.
+ */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.regs.r11 = 0xDEADBEEF;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 != 0xDEADBEEF,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+
+ /* Clear kvm_valid_regs bits and kvm_dirty_bits.
+ * Verify s.regs values are not overwritten with existing guest values
+ * and that guest values are not overwritten with kvm_sync_regs values.
+ */
+ run->kvm_valid_regs = 0;
+ run->kvm_dirty_regs = 0;
+ run->s.regs.regs.r11 = 0xAAAA;
+ regs.r11 = 0xBAC0;
+ vcpu_regs_set(vm, VCPU_ID, &regs);
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 == 0xAAAA,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ TEST_ASSERT(regs.r11 == 0xBAC0 + 1,
+ "r11 guest value incorrect 0x%llx.",
+ regs.r11);
+
+ /* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
+ * with existing guest values but that guest values are overwritten
+ * with kvm_sync_regs values.
+ */
+ run->kvm_valid_regs = 0;
+ run->kvm_dirty_regs = TEST_SYNC_FIELDS;
+ run->s.regs.regs.r11 = 0xBBBB;
+ rv = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->s.regs.regs.r11 == 0xBBBB,
+ "r11 sync regs value incorrect 0x%llx.",
+ run->s.regs.regs.r11);
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ TEST_ASSERT(regs.r11 == 0xBBBB + 1,
+ "r11 guest value incorrect 0x%llx.",
+ regs.r11);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
new file mode 100644
index 000000000..49bcc68b0
--- /dev/null
+++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c
@@ -0,0 +1,175 @@
+/*
+ * gtests/tests/vmx_tsc_adjust_test.c
+ *
+ * Copyright (C) 2018, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ *
+ * IA32_TSC_ADJUST test
+ *
+ * According to the SDM, "if an execution of WRMSR to the
+ * IA32_TIME_STAMP_COUNTER MSR adds (or subtracts) value X from the TSC,
+ * the logical processor also adds (or subtracts) value X from the
+ * IA32_TSC_ADJUST MSR.
+ *
+ * Note that when L1 doesn't intercept writes to IA32_TSC, a
+ * WRMSR(IA32_TSC) from L2 sets L1's TSC value, not L2's perceived TSC
+ * value.
+ *
+ * This test verifies that this unusual case is handled correctly.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "x86.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "../kselftest.h"
+
+#ifndef MSR_IA32_TSC_ADJUST
+#define MSR_IA32_TSC_ADJUST 0x3b
+#endif
+
+#define PAGE_SIZE 4096
+#define VCPU_ID 5
+
+#define TSC_ADJUST_VALUE (1ll << 32)
+#define TSC_OFFSET_VALUE -(1ll << 48)
+
+enum {
+ PORT_ABORT = 0x1000,
+ PORT_REPORT,
+ PORT_DONE,
+};
+
+enum {
+ VMXON_PAGE = 0,
+ VMCS_PAGE,
+ MSR_BITMAP_PAGE,
+
+ NUM_VMX_PAGES,
+};
+
+struct kvm_single_msr {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+} __attribute__((packed));
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void check_ia32_tsc_adjust(int64_t max)
+{
+ int64_t adjust;
+
+ adjust = rdmsr(MSR_IA32_TSC_ADJUST);
+ GUEST_SYNC(adjust);
+ GUEST_ASSERT(adjust <= max);
+}
+
+static void l2_guest_code(void)
+{
+ uint64_t l1_tsc = rdtsc() - TSC_OFFSET_VALUE;
+
+ wrmsr(MSR_IA32_TSC, l1_tsc - TSC_ADJUST_VALUE);
+ check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+ /* Exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+ uintptr_t save_cr3;
+
+ GUEST_ASSERT(rdtsc() < TSC_ADJUST_VALUE);
+ wrmsr(MSR_IA32_TSC, rdtsc() - TSC_ADJUST_VALUE);
+ check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ vmwrite(TSC_OFFSET, TSC_OFFSET_VALUE);
+
+ /* Jump into L2. First, test failure to load guest CR3. */
+ save_cr3 = vmreadz(GUEST_CR3);
+ vmwrite(GUEST_CR3, -1ull);
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) ==
+ (EXIT_REASON_FAILED_VMENTRY | EXIT_REASON_INVALID_STATE));
+ check_ia32_tsc_adjust(-1 * TSC_ADJUST_VALUE);
+ vmwrite(GUEST_CR3, save_cr3);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ check_ia32_tsc_adjust(-2 * TSC_ADJUST_VALUE);
+
+ GUEST_DONE();
+}
+
+void report(int64_t val)
+{
+ printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
+ val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
+}
+
+int main(int argc, char *argv[])
+{
+ struct vmx_pages *vmx_pages;
+ vm_vaddr_t vmx_pages_gva;
+ struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
+
+ if (!(entry->ecx & CPUID_VMX)) {
+ fprintf(stderr, "nested VMX not enabled, skipping test\n");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ /* Allocate VMX pages and shared descriptors (vmx_pages). */
+ vmx_pages = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct guest_args args;
+
+ vcpu_run(vm, VCPU_ID);
+ guest_args_read(vm, VCPU_ID, &args);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (args.port) {
+ case GUEST_PORT_ABORT:
+ TEST_ASSERT(false, "%s", (const char *) args.arg0);
+ /* NOT REACHED */
+ case GUEST_PORT_SYNC:
+ report(args.arg1);
+ break;
+ case GUEST_PORT_DONE:
+ goto done;
+ default:
+ TEST_ASSERT(false, "Unknown port 0x%x.", args.port);
+ }
+ }
+
+ kvm_vm_free(vm);
+done:
+ return 0;
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
new file mode 100644
index 000000000..9700281be
--- /dev/null
+++ b/tools/testing/selftests/lib.mk
@@ -0,0 +1,169 @@
+# This mimics the top-level Makefile. We do it explicitly here so that this
+# Makefile can operate with or without the kbuild infrastructure.
+ifneq ($(LLVM),)
+CC := clang
+else
+CC := $(CROSS_COMPILE)gcc
+endif
+
+ifeq (0,$(MAKELEVEL))
+OUTPUT := $(shell pwd)
+endif
+
+# The following are built by lib.mk common compile rules.
+# TEST_CUSTOM_PROGS should be used by tests that require
+# custom build rule and prevent common build rule use.
+# TEST_PROGS are for test shell scripts.
+# TEST_CUSTOM_PROGS and TEST_PROGS will be run by common run_tests
+# and install targets. Common clean doesn't touch them.
+TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
+TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
+TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
+
+ifdef KSFT_KHDR_INSTALL
+top_srcdir ?= ../../../..
+include $(top_srcdir)/scripts/subarch.include
+ARCH ?= $(SUBARCH)
+
+.PHONY: khdr
+.NOTPARALLEL:
+khdr:
+ make ARCH=$(ARCH) -C $(top_srcdir) headers_install
+
+all: khdr $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+else
+all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
+endif
+
+.ONESHELL:
+define RUN_TEST_PRINT_RESULT
+ TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \
+ echo $$TEST_HDR_MSG; \
+ echo "========================================"; \
+ if [ ! -x $$TEST ]; then \
+ echo "$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.";\
+ echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
+ else \
+ cd `dirname $$TEST` > /dev/null; \
+ if [ "X$(summary)" != "X" ]; then \
+ (./$$BASENAME_TEST > /tmp/$$BASENAME_TEST 2>&1 && \
+ echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \
+ (if [ $$? -eq $$skip ]; then \
+ echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \
+ else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
+ fi;) \
+ else \
+ (./$$BASENAME_TEST && \
+ echo "ok 1..$$test_num $$TEST_HDR_MSG [PASS]") || \
+ (if [ $$? -eq $$skip ]; then \
+ echo "not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]"; \
+ else echo "not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]"; \
+ fi;) \
+ fi; \
+ cd - > /dev/null; \
+ fi;
+endef
+
+define RUN_TESTS
+ @export KSFT_TAP_LEVEL=`echo 1`; \
+ test_num=`echo 0`; \
+ skip=`echo 4`; \
+ echo "TAP version 13"; \
+ for TEST in $(1); do \
+ BASENAME_TEST=`basename $$TEST`; \
+ test_num=`echo $$test_num+1 | bc`; \
+ $(call RUN_TEST_PRINT_RESULT,$(TEST),$(BASENAME_TEST),$(test_num),$(skip)) \
+ done;
+endef
+
+run_tests: all
+ifneq ($(KBUILD_SRC),)
+ @if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then
+ @rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT)
+ fi
+ @if [ "X$(TEST_PROGS)" != "X" ]; then
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS))
+ else
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS))
+ fi
+else
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
+endif
+
+define INSTALL_SINGLE_RULE
+ $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH))
+ $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
+ $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
+endef
+
+define INSTALL_RULE
+ $(eval INSTALL_LIST = $(TEST_PROGS)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_FILES)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_GEN_PROGS)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE)
+endef
+
+install: all
+ifdef INSTALL_PATH
+ $(INSTALL_RULE)
+else
+ $(error Error: set INSTALL_PATH to use install)
+endif
+
+define EMIT_TESTS
+ @test_num=`echo 0`; \
+ for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
+ BASENAME_TEST=`basename $$TEST`; \
+ test_num=`echo $$test_num+1 | bc`; \
+ TEST_HDR_MSG="selftests: "`basename $$PWD`:" $$BASENAME_TEST"; \
+ echo "echo $$TEST_HDR_MSG"; \
+ if [ ! -x $$TEST ]; then \
+ echo "echo \"$$TEST_HDR_MSG: Warning: file $$BASENAME_TEST is not executable, correct this.\""; \
+ echo "echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\""; \
+ else
+ echo "(./$$BASENAME_TEST >> \$$OUTPUT 2>&1 && echo \"ok 1..$$test_num $$TEST_HDR_MSG [PASS]\") || (if [ \$$? -eq \$$skip ]; then echo \"not ok 1..$$test_num $$TEST_HDR_MSG [SKIP]\"; else echo \"not ok 1..$$test_num $$TEST_HDR_MSG [FAIL]\"; fi;)"; \
+ fi; \
+ done;
+endef
+
+emit_tests:
+ $(EMIT_TESTS)
+
+# define if isn't already. It is undefined in make O= case.
+ifeq ($(RM),)
+RM := rm -f
+endif
+
+define CLEAN
+ $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+endef
+
+clean:
+ $(CLEAN)
+
+# When make O= with kselftest target from main level
+# the following aren't defined.
+#
+ifneq ($(KBUILD_SRC),)
+LINK.c = $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+COMPILE.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c
+LINK.S = $(CC) $(ASFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH)
+endif
+
+# Selftest makefiles can override those targets by setting
+# OVERRIDE_TARGETS = 1.
+ifeq ($(OVERRIDE_TARGETS),)
+$(OUTPUT)/%:%.c
+ $(LINK.c) $^ $(LDLIBS) -o $@
+
+$(OUTPUT)/%.o:%.S
+ $(COMPILE.S) $^ -o $@
+
+$(OUTPUT)/%:%.S
+ $(LINK.S) $^ $(LDLIBS) -o $@
+endif
+
+.PHONY: run_tests all clean install emit_tests
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
new file mode 100644
index 000000000..70d5711e3
--- /dev/null
+++ b/tools/testing/selftests/lib/Makefile
@@ -0,0 +1,8 @@
+# Makefile for lib/ function selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh
new file mode 100755
index 000000000..5a90006d1
--- /dev/null
+++ b/tools/testing/selftests/lib/bitmap.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Runs bitmap infrastructure tests using test_bitmap kernel module
+if ! /sbin/modprobe -q -n test_bitmap; then
+ echo "bitmap: module test_bitmap is not found [SKIP]"
+ exit $ksft_skip
+fi
+
+if /sbin/modprobe -q test_bitmap; then
+ /sbin/modprobe -q -r test_bitmap
+ echo "bitmap: ok"
+else
+ echo "bitmap: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
new file mode 100644
index 000000000..126933bcc
--- /dev/null
+++ b/tools/testing/selftests/lib/config
@@ -0,0 +1,3 @@
+CONFIG_TEST_PRINTF=m
+CONFIG_TEST_BITMAP=m
+CONFIG_PRIME_NUMBERS=m
diff --git a/tools/testing/selftests/lib/prime_numbers.sh b/tools/testing/selftests/lib/prime_numbers.sh
new file mode 100755
index 000000000..78e7483c8
--- /dev/null
+++ b/tools/testing/selftests/lib/prime_numbers.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Checks fast/slow prime_number generation for inconsistencies
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n prime_numbers; then
+ echo "prime_numbers: module prime_numbers is not found [SKIP]"
+ exit $ksft_skip
+fi
+
+if /sbin/modprobe -q prime_numbers selftest=65536; then
+ /sbin/modprobe -q -r prime_numbers
+ echo "prime_numbers: ok"
+else
+ echo "prime_numbers: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/lib/printf.sh b/tools/testing/selftests/lib/printf.sh
new file mode 100755
index 000000000..45a23e2d6
--- /dev/null
+++ b/tools/testing/selftests/lib/printf.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs printf infrastructure using test_printf kernel module
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_printf; then
+ echo "printf: module test_printf is not found [SKIP]"
+ exit $ksft_skip
+fi
+
+if /sbin/modprobe -q test_printf; then
+ /sbin/modprobe -q -r test_printf
+ echo "printf: ok"
+else
+ echo "printf: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/locking/Makefile b/tools/testing/selftests/locking/Makefile
new file mode 100644
index 000000000..6e7761ab3
--- /dev/null
+++ b/tools/testing/selftests/locking/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for locking/ww_mutx selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := ww_mutex.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/locking/ww_mutex.sh b/tools/testing/selftests/locking/ww_mutex.sh
new file mode 100755
index 000000000..91e4ac756
--- /dev/null
+++ b/tools/testing/selftests/locking/ww_mutex.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Runs API tests for struct ww_mutex (Wait/Wound mutexes)
+if ! /sbin/modprobe -q -n test-ww_mutex; then
+ echo "ww_mutex: module test-ww_mutex is not found [SKIP]"
+ exit $ksft_skip
+fi
+
+if /sbin/modprobe -q test-ww_mutex; then
+ /sbin/modprobe -q -r test-ww_mutex
+ echo "locking/ww_mutex: ok"
+else
+ echo "locking/ww_mutex: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/media_tests/.gitignore b/tools/testing/selftests/media_tests/.gitignore
new file mode 100644
index 000000000..8745eba39
--- /dev/null
+++ b/tools/testing/selftests/media_tests/.gitignore
@@ -0,0 +1,3 @@
+media_device_test
+media_device_open
+video_device_test
diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile
new file mode 100644
index 000000000..60826d7d3
--- /dev/null
+++ b/tools/testing/selftests/media_tests/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+CFLAGS += -I../ -I../../../../usr/include/
+TEST_GEN_PROGS := media_device_test media_device_open video_device_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/media_tests/bind_unbind_sample.sh b/tools/testing/selftests/media_tests/bind_unbind_sample.sh
new file mode 100755
index 000000000..0101c1ec4
--- /dev/null
+++ b/tools/testing/selftests/media_tests/bind_unbind_sample.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Find device number in /sys/bus/usb/drivers/drivername
+# Edit this file to update the driver numer and name
+# Example test for uvcvideo driver
+#i=0
+# while :; do
+# i=$((i+1))
+# echo 1-5:1.0 > /sys/bus/usb/drivers/uvcvideo/unbind;
+# echo 1-5:1.0 > /sys/bus/usb/drivers/uvcvideo/bind;
+# clear
+# echo $i
+#done
diff --git a/tools/testing/selftests/media_tests/media_device_open.c b/tools/testing/selftests/media_tests/media_device_open.c
new file mode 100644
index 000000000..93183a37b
--- /dev/null
+++ b/tools/testing/selftests/media_tests/media_device_open.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * media_device_open.c - Media Controller Device Open Test
+ *
+ * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ *
+ */
+
+/*
+ * This file adds a test for Media Controller API.
+ * This test should be run as root and should not be
+ * included in the Kselftest run. This test should be
+ * run when hardware and driver that makes use Media
+ * Controller API are present in the system.
+ *
+ * This test opens user specified Media Device and calls
+ * MEDIA_IOC_DEVICE_INFO ioctl, closes the file, and exits.
+ *
+ * Usage:
+ * sudo ./media_device_open -d /dev/mediaX
+ *
+ * Run this test is a loop and run bind/unbind on the driver.
+*/
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <linux/media.h>
+
+#include "../kselftest.h"
+
+int main(int argc, char **argv)
+{
+ int opt;
+ char media_device[256];
+ int count = 0;
+ struct media_device_info mdi;
+ int ret;
+ int fd;
+
+ if (argc < 2) {
+ printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+ exit(-1);
+ }
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "d:")) != -1) {
+ switch (opt) {
+ case 'd':
+ strncpy(media_device, optarg, sizeof(media_device) - 1);
+ media_device[sizeof(media_device)-1] = '\0';
+ break;
+ default:
+ printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+ exit(-1);
+ }
+ }
+
+ if (getuid() != 0)
+ ksft_exit_skip("Please run the test as root - Exiting.\n");
+
+ /* Open Media device and keep it open */
+ fd = open(media_device, O_RDWR);
+ if (fd == -1) {
+ printf("Media Device open errno %s\n", strerror(errno));
+ exit(-1);
+ }
+
+ ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi);
+ if (ret < 0)
+ printf("Media Device Info errno %s\n", strerror(errno));
+ else
+ printf("Media device model %s driver %s\n",
+ mdi.model, mdi.driver);
+}
diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c
new file mode 100644
index 000000000..4b9953359
--- /dev/null
+++ b/tools/testing/selftests/media_tests/media_device_test.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * media_device_test.c - Media Controller Device ioctl loop Test
+ *
+ * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ *
+ */
+
+/*
+ * This file adds a test for Media Controller API.
+ * This test should be run as root and should not be
+ * included in the Kselftest run. This test should be
+ * run when hardware and driver that makes use Media
+ * Controller API are present in the system.
+ *
+ * This test opens user specified Media Device and calls
+ * MEDIA_IOC_DEVICE_INFO ioctl in a loop once every 10
+ * seconds.
+ *
+ * Usage:
+ * sudo ./media_device_test -d /dev/mediaX
+ *
+ * While test is running, remove the device and
+ * ensure there are no use after free errors and
+ * other Oops in the dmesg. Enable KaSan kernel
+ * config option for use-after-free error detection.
+*/
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <linux/media.h>
+
+#include "../kselftest.h"
+
+int main(int argc, char **argv)
+{
+ int opt;
+ char media_device[256];
+ int count;
+ struct media_device_info mdi;
+ int ret;
+ int fd;
+
+ if (argc < 2) {
+ printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+ exit(-1);
+ }
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "d:")) != -1) {
+ switch (opt) {
+ case 'd':
+ strncpy(media_device, optarg, sizeof(media_device) - 1);
+ media_device[sizeof(media_device)-1] = '\0';
+ break;
+ default:
+ printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+ exit(-1);
+ }
+ }
+
+ if (getuid() != 0)
+ ksft_exit_skip("Please run the test as root - Exiting.\n");
+
+ /* Generate random number of interations */
+ srand((unsigned int) time(NULL));
+ count = rand();
+
+ /* Open Media device and keep it open */
+ fd = open(media_device, O_RDWR);
+ if (fd == -1) {
+ printf("Media Device open errno %s\n", strerror(errno));
+ exit(-1);
+ }
+
+ printf("\nNote:\n"
+ "While test is running, remove the device and\n"
+ "ensure there are no use after free errors and\n"
+ "other Oops in the dmesg. Enable KaSan kernel\n"
+ "config option for use-after-free error detection.\n\n");
+
+ printf("Running test for %d iterations\n", count);
+
+ while (count > 0) {
+ ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi);
+ if (ret < 0)
+ printf("Media Device Info errno %s\n", strerror(errno));
+ else
+ printf("Media device model %s driver %s - count %d\n",
+ mdi.model, mdi.driver, count);
+ sleep(10);
+ count--;
+ }
+}
diff --git a/tools/testing/selftests/media_tests/open_loop_test.sh b/tools/testing/selftests/media_tests/open_loop_test.sh
new file mode 100755
index 000000000..d4c0179bb
--- /dev/null
+++ b/tools/testing/selftests/media_tests/open_loop_test.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+ i=0
+file=/dev/media$1
+ while :; do
+ echo $file
+ i=$((i+1))
+ R=$(./media_device_open -d $file);
+ # clear
+ echo -e "Loop $i\n$R"
+ done
diff --git a/tools/testing/selftests/media_tests/regression_test.txt b/tools/testing/selftests/media_tests/regression_test.txt
new file mode 100644
index 000000000..262736768
--- /dev/null
+++ b/tools/testing/selftests/media_tests/regression_test.txt
@@ -0,0 +1,43 @@
+Testing for regressions in Media Controller API register, ioctl, syscall,
+and unregister paths. There have a few problems that result in user-after
+free on media_device, media_devnode, and cdev pointers when the driver is
+unbound while ioctl is in progress.
+
+Test Procedure:
+
+Run bin/unbind loop while ioctls are in progress.
+Run rmmod and modprobe.
+Disconnect the device.
+
+Setup:
+
+Build media_device_test
+cd tools/testing/selftests/media_tests
+make
+
+Regressions test for cdev user-after free error on /dev/mediaX when driver
+is unbound:
+
+Start media_device_test to regression test media devnode dynamic alloc
+and cdev user-after-free fixes. This opens media dev files and sits in
+a loop running media ioctl MEDIA_IOC_DEVICE_INFO command once every 10
+seconds. The idea is when device file goes away, media devnode and cdev
+should stick around until this test exits.
+
+The test for a random number of iterations or until user kills it with a
+sleep 10 in between the ioctl calls.
+
+sudo ./media_device_test -d /dev/mediaX
+
+Regression test for media_devnode unregister race with ioctl_syscall:
+
+Start 6 open_loop_test.sh tests with different /dev/mediaX files. When
+device file goes away after unbind, device file name changes. Start the
+test with possible device names. If we start with /dev/media0 for example,
+after unbind, /dev/media1 or /dev/media2 could get created. The idea is
+keep ioctls going while bind/unbind runs.
+
+Copy bind_unbind_sample.txt and make changes to specify the driver name
+and number to run bind and unbind. Start the bind_unbind.sh
+
+Run dmesg looking for any user-after free errors or mutex lock errors.
diff --git a/tools/testing/selftests/media_tests/video_device_test.c b/tools/testing/selftests/media_tests/video_device_test.c
new file mode 100644
index 000000000..0f6aef2e2
--- /dev/null
+++ b/tools/testing/selftests/media_tests/video_device_test.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * video_device_test - Video Device Test
+ *
+ * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ *
+ */
+
+/*
+ * This file adds a test for Video Device. This test should not be included
+ * in the Kselftest run. This test should be run when hardware and driver
+ * that makes use of V4L2 API is present.
+ *
+ * This test opens user specified Video Device and calls video ioctls in a
+ * loop once every 10 seconds.
+ *
+ * Usage:
+ * sudo ./video_device_test -d /dev/videoX
+ *
+ * While test is running, remove the device or unbind the driver and
+ * ensure there are no use after free errors and other Oops in the
+ * dmesg.
+ * When possible, enable KaSan kernel config option for use-after-free
+ * error detection.
+*/
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <time.h>
+#include <linux/videodev2.h>
+
+int main(int argc, char **argv)
+{
+ int opt;
+ char video_dev[256];
+ int count;
+ struct v4l2_tuner vtuner;
+ struct v4l2_capability vcap;
+ int ret;
+ int fd;
+
+ if (argc < 2) {
+ printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
+ exit(-1);
+ }
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "d:")) != -1) {
+ switch (opt) {
+ case 'd':
+ strncpy(video_dev, optarg, sizeof(video_dev) - 1);
+ video_dev[sizeof(video_dev)-1] = '\0';
+ break;
+ default:
+ printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
+ exit(-1);
+ }
+ }
+
+ /* Generate random number of interations */
+ srand((unsigned int) time(NULL));
+ count = rand();
+
+ /* Open Video device and keep it open */
+ fd = open(video_dev, O_RDWR);
+ if (fd == -1) {
+ printf("Video Device open errno %s\n", strerror(errno));
+ exit(-1);
+ }
+
+ printf("\nNote:\n"
+ "While test is running, remove the device or unbind\n"
+ "driver and ensure there are no use after free errors\n"
+ "and other Oops in the dmesg. When possible, enable KaSan\n"
+ "kernel config option for use-after-free error detection.\n\n");
+
+ while (count > 0) {
+ ret = ioctl(fd, VIDIOC_QUERYCAP, &vcap);
+ if (ret < 0)
+ printf("VIDIOC_QUERYCAP errno %s\n", strerror(errno));
+ else
+ printf("Video device driver %s\n", vcap.driver);
+
+ ret = ioctl(fd, VIDIOC_G_TUNER, &vtuner);
+ if (ret < 0)
+ printf("VIDIOC_G_TUNER, errno %s\n", strerror(errno));
+ else
+ printf("type %d rangelow %d rangehigh %d\n",
+ vtuner.type, vtuner.rangelow, vtuner.rangehigh);
+ sleep(10);
+ count--;
+ }
+}
diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore
new file mode 100644
index 000000000..020c44f49
--- /dev/null
+++ b/tools/testing/selftests/membarrier/.gitignore
@@ -0,0 +1 @@
+membarrier_test
diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile
new file mode 100644
index 000000000..02845532b
--- /dev/null
+++ b/tools/testing/selftests/membarrier/Makefile
@@ -0,0 +1,6 @@
+CFLAGS += -g -I../../../../usr/include/
+
+TEST_GEN_PROGS := membarrier_test
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/membarrier/membarrier_test.c b/tools/testing/selftests/membarrier/membarrier_test.c
new file mode 100644
index 000000000..6793f8ecc
--- /dev/null
+++ b/tools/testing/selftests/membarrier/membarrier_test.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/membarrier.h>
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include "../kselftest.h"
+
+static int sys_membarrier(int cmd, int flags)
+{
+ return syscall(__NR_membarrier, cmd, flags);
+}
+
+static int test_membarrier_cmd_fail(void)
+{
+ int cmd = -1, flags = 0;
+ const char *test_name = "sys membarrier invalid command";
+
+ if (sys_membarrier(cmd, flags) != -1) {
+ ksft_exit_fail_msg(
+ "%s test: command = %d, flags = %d. Should fail, but passed\n",
+ test_name, cmd, flags);
+ }
+ if (errno != EINVAL) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+ test_name, flags, EINVAL, strerror(EINVAL),
+ errno, strerror(errno));
+ }
+
+ ksft_test_result_pass(
+ "%s test: command = %d, flags = %d, errno = %d. Failed as expected\n",
+ test_name, cmd, flags, errno);
+ return 0;
+}
+
+static int test_membarrier_flags_fail(void)
+{
+ int cmd = MEMBARRIER_CMD_QUERY, flags = 1;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_QUERY invalid flags";
+
+ if (sys_membarrier(cmd, flags) != -1) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should fail, but passed\n",
+ test_name, flags);
+ }
+ if (errno != EINVAL) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+ test_name, flags, EINVAL, strerror(EINVAL),
+ errno, strerror(errno));
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d, errno = %d. Failed as expected\n",
+ test_name, flags, errno);
+ return 0;
+}
+
+static int test_membarrier_global_success(void)
+{
+ int cmd = MEMBARRIER_CMD_GLOBAL, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n", test_name, flags);
+ return 0;
+}
+
+static int test_membarrier_private_expedited_fail(void)
+{
+ int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED not registered failure";
+
+ if (sys_membarrier(cmd, flags) != -1) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should fail, but passed\n",
+ test_name, flags);
+ }
+ if (errno != EPERM) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+ test_name, flags, EPERM, strerror(EPERM),
+ errno, strerror(errno));
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ return 0;
+}
+
+static int test_membarrier_register_private_expedited_success(void)
+{
+ int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n",
+ test_name, flags);
+ return 0;
+}
+
+static int test_membarrier_private_expedited_success(void)
+{
+ int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n",
+ test_name, flags);
+ return 0;
+}
+
+static int test_membarrier_private_expedited_sync_core_fail(void)
+{
+ int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE not registered failure";
+
+ if (sys_membarrier(cmd, flags) != -1) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should fail, but passed\n",
+ test_name, flags);
+ }
+ if (errno != EPERM) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d. Should return (%d: \"%s\"), but returned (%d: \"%s\").\n",
+ test_name, flags, EPERM, strerror(EPERM),
+ errno, strerror(errno));
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ return 0;
+}
+
+static int test_membarrier_register_private_expedited_sync_core_success(void)
+{
+ int cmd = MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n",
+ test_name, flags);
+ return 0;
+}
+
+static int test_membarrier_private_expedited_sync_core_success(void)
+{
+ int cmd = MEMBARRIER_CMD_PRIVATE_EXPEDITED, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n",
+ test_name, flags);
+ return 0;
+}
+
+static int test_membarrier_register_global_expedited_success(void)
+{
+ int cmd = MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n",
+ test_name, flags);
+ return 0;
+}
+
+static int test_membarrier_global_expedited_success(void)
+{
+ int cmd = MEMBARRIER_CMD_GLOBAL_EXPEDITED, flags = 0;
+ const char *test_name = "sys membarrier MEMBARRIER_CMD_GLOBAL_EXPEDITED";
+
+ if (sys_membarrier(cmd, flags) != 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ }
+
+ ksft_test_result_pass(
+ "%s test: flags = %d\n",
+ test_name, flags);
+ return 0;
+}
+
+static int test_membarrier(void)
+{
+ int status;
+
+ status = test_membarrier_cmd_fail();
+ if (status)
+ return status;
+ status = test_membarrier_flags_fail();
+ if (status)
+ return status;
+ status = test_membarrier_global_success();
+ if (status)
+ return status;
+ status = test_membarrier_private_expedited_fail();
+ if (status)
+ return status;
+ status = test_membarrier_register_private_expedited_success();
+ if (status)
+ return status;
+ status = test_membarrier_private_expedited_success();
+ if (status)
+ return status;
+ status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
+ if (status < 0) {
+ ksft_test_result_fail("sys_membarrier() failed\n");
+ return status;
+ }
+ if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
+ status = test_membarrier_private_expedited_sync_core_fail();
+ if (status)
+ return status;
+ status = test_membarrier_register_private_expedited_sync_core_success();
+ if (status)
+ return status;
+ status = test_membarrier_private_expedited_sync_core_success();
+ if (status)
+ return status;
+ }
+ /*
+ * It is valid to send a global membarrier from a non-registered
+ * process.
+ */
+ status = test_membarrier_global_expedited_success();
+ if (status)
+ return status;
+ status = test_membarrier_register_global_expedited_success();
+ if (status)
+ return status;
+ status = test_membarrier_global_expedited_success();
+ if (status)
+ return status;
+ return 0;
+}
+
+static int test_membarrier_query(void)
+{
+ int flags = 0, ret;
+
+ ret = sys_membarrier(MEMBARRIER_CMD_QUERY, flags);
+ if (ret < 0) {
+ if (errno == ENOSYS) {
+ /*
+ * It is valid to build a kernel with
+ * CONFIG_MEMBARRIER=n. However, this skips the tests.
+ */
+ ksft_exit_skip(
+ "sys membarrier (CONFIG_MEMBARRIER) is disabled.\n");
+ }
+ ksft_exit_fail_msg("sys_membarrier() failed\n");
+ }
+ if (!(ret & MEMBARRIER_CMD_GLOBAL))
+ ksft_exit_skip(
+ "sys_membarrier unsupported: CMD_GLOBAL not found.\n");
+
+ ksft_test_result_pass("sys_membarrier available\n");
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+
+ test_membarrier_query();
+ test_membarrier();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/memfd/.gitignore b/tools/testing/selftests/memfd/.gitignore
new file mode 100644
index 000000000..afe87c40a
--- /dev/null
+++ b/tools/testing/selftests/memfd/.gitignore
@@ -0,0 +1,4 @@
+fuse_mnt
+fuse_test
+memfd_test
+memfd-test-file
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
new file mode 100644
index 000000000..53a848109
--- /dev/null
+++ b/tools/testing/selftests/memfd/Makefile
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -D_FILE_OFFSET_BITS=64
+CFLAGS += -I../../../../include/uapi/
+CFLAGS += -I../../../../include/
+CFLAGS += -I../../../../usr/include/
+
+TEST_GEN_PROGS := memfd_test
+TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh
+TEST_GEN_FILES := fuse_mnt fuse_test
+
+fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
+
+include ../lib.mk
+
+$(OUTPUT)/fuse_mnt: LDLIBS += $(shell pkg-config fuse --libs)
+
+$(OUTPUT)/memfd_test: memfd_test.c common.o
+$(OUTPUT)/fuse_test: fuse_test.c common.o
+
+EXTRA_CLEAN = common.o
diff --git a/tools/testing/selftests/memfd/common.c b/tools/testing/selftests/memfd/common.c
new file mode 100644
index 000000000..8eb3d75f6
--- /dev/null
+++ b/tools/testing/selftests/memfd/common.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "common.h"
+
+int hugetlbfs_test = 0;
+
+/*
+ * Copied from mlock2-tests.c
+ */
+unsigned long default_huge_page_size(void)
+{
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(f);
+ return hps;
+}
+
+int sys_memfd_create(const char *name, unsigned int flags)
+{
+ if (hugetlbfs_test)
+ flags |= MFD_HUGETLB;
+
+ return syscall(__NR_memfd_create, name, flags);
+}
diff --git a/tools/testing/selftests/memfd/common.h b/tools/testing/selftests/memfd/common.h
new file mode 100644
index 000000000..522d2c630
--- /dev/null
+++ b/tools/testing/selftests/memfd/common.h
@@ -0,0 +1,9 @@
+#ifndef COMMON_H_
+#define COMMON_H_
+
+extern int hugetlbfs_test;
+
+unsigned long default_huge_page_size(void);
+int sys_memfd_create(const char *name, unsigned int flags);
+
+#endif
diff --git a/tools/testing/selftests/memfd/config b/tools/testing/selftests/memfd/config
new file mode 100644
index 000000000..835c7f4da
--- /dev/null
+++ b/tools/testing/selftests/memfd/config
@@ -0,0 +1 @@
+CONFIG_FUSE_FS=m
diff --git a/tools/testing/selftests/memfd/fuse_mnt.c b/tools/testing/selftests/memfd/fuse_mnt.c
new file mode 100644
index 000000000..6936f2a00
--- /dev/null
+++ b/tools/testing/selftests/memfd/fuse_mnt.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * memfd test file-system
+ * This file uses FUSE to create a dummy file-system with only one file /memfd.
+ * This file is read-only and takes 1s per read.
+ *
+ * This file-system is used by the memfd test-cases to force the kernel to pin
+ * pages during reads(). Due to the 1s delay of this file-system, this is a
+ * nice way to test race-conditions against get_user_pages() in the kernel.
+ *
+ * We use direct_io==1 to force the kernel to use direct-IO for this
+ * file-system.
+ */
+
+#define FUSE_USE_VERSION 26
+
+#include <fuse.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static const char memfd_content[] = "memfd-example-content";
+static const char memfd_path[] = "/memfd";
+
+static int memfd_getattr(const char *path, struct stat *st)
+{
+ memset(st, 0, sizeof(*st));
+
+ if (!strcmp(path, "/")) {
+ st->st_mode = S_IFDIR | 0755;
+ st->st_nlink = 2;
+ } else if (!strcmp(path, memfd_path)) {
+ st->st_mode = S_IFREG | 0444;
+ st->st_nlink = 1;
+ st->st_size = strlen(memfd_content);
+ } else {
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int memfd_readdir(const char *path,
+ void *buf,
+ fuse_fill_dir_t filler,
+ off_t offset,
+ struct fuse_file_info *fi)
+{
+ if (strcmp(path, "/"))
+ return -ENOENT;
+
+ filler(buf, ".", NULL, 0);
+ filler(buf, "..", NULL, 0);
+ filler(buf, memfd_path + 1, NULL, 0);
+
+ return 0;
+}
+
+static int memfd_open(const char *path, struct fuse_file_info *fi)
+{
+ if (strcmp(path, memfd_path))
+ return -ENOENT;
+
+ if ((fi->flags & 3) != O_RDONLY)
+ return -EACCES;
+
+ /* force direct-IO */
+ fi->direct_io = 1;
+
+ return 0;
+}
+
+static int memfd_read(const char *path,
+ char *buf,
+ size_t size,
+ off_t offset,
+ struct fuse_file_info *fi)
+{
+ size_t len;
+
+ if (strcmp(path, memfd_path) != 0)
+ return -ENOENT;
+
+ sleep(1);
+
+ len = strlen(memfd_content);
+ if (offset < len) {
+ if (offset + size > len)
+ size = len - offset;
+
+ memcpy(buf, memfd_content + offset, size);
+ } else {
+ size = 0;
+ }
+
+ return size;
+}
+
+static struct fuse_operations memfd_ops = {
+ .getattr = memfd_getattr,
+ .readdir = memfd_readdir,
+ .open = memfd_open,
+ .read = memfd_read,
+};
+
+int main(int argc, char *argv[])
+{
+ return fuse_main(argc, argv, &memfd_ops, NULL);
+}
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
new file mode 100644
index 000000000..b018e8357
--- /dev/null
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * memfd GUP test-case
+ * This tests memfd interactions with get_user_pages(). We require the
+ * fuse_mnt.c program to provide a fake direct-IO FUSE mount-point for us. This
+ * file-system delays _all_ reads by 1s and forces direct-IO. This means, any
+ * read() on files in that file-system will pin the receive-buffer pages for at
+ * least 1s via get_user_pages().
+ *
+ * We use this trick to race ADD_SEALS against a write on a memfd object. The
+ * ADD_SEALS must fail if the memfd pages are still pinned. Note that we use
+ * the read() syscall with our memory-mapped memfd object as receive buffer to
+ * force the kernel to write into our memfd object.
+ */
+
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/falloc.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "common.h"
+
+#define MFD_DEF_SIZE 8192
+#define STACK_SIZE 65536
+
+static size_t mfd_def_size = MFD_DEF_SIZE;
+
+static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
+{
+ int r, fd;
+
+ fd = sys_memfd_create(name, flags);
+ if (fd < 0) {
+ printf("memfd_create(\"%s\", %u) failed: %m\n",
+ name, flags);
+ abort();
+ }
+
+ r = ftruncate(fd, sz);
+ if (r < 0) {
+ printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
+ abort();
+ }
+
+ return fd;
+}
+
+static __u64 mfd_assert_get_seals(int fd)
+{
+ long r;
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0) {
+ printf("GET_SEALS(%d) failed: %m\n", fd);
+ abort();
+ }
+
+ return r;
+}
+
+static void mfd_assert_has_seals(int fd, __u64 seals)
+{
+ __u64 s;
+
+ s = mfd_assert_get_seals(fd);
+ if (s != seals) {
+ printf("%llu != %llu = GET_SEALS(%d)\n",
+ (unsigned long long)seals, (unsigned long long)s, fd);
+ abort();
+ }
+}
+
+static void mfd_assert_add_seals(int fd, __u64 seals)
+{
+ long r;
+ __u64 s;
+
+ s = mfd_assert_get_seals(fd);
+ r = fcntl(fd, F_ADD_SEALS, seals);
+ if (r < 0) {
+ printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
+ fd, (unsigned long long)s, (unsigned long long)seals);
+ abort();
+ }
+}
+
+static int mfd_busy_add_seals(int fd, __u64 seals)
+{
+ long r;
+ __u64 s;
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0)
+ s = 0;
+ else
+ s = r;
+
+ r = fcntl(fd, F_ADD_SEALS, seals);
+ if (r < 0 && errno != EBUSY) {
+ printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected with EBUSY: %m\n",
+ fd, (unsigned long long)s, (unsigned long long)seals);
+ abort();
+ }
+
+ return r;
+}
+
+static void *mfd_assert_mmap_shared(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
+static void *mfd_assert_mmap_private(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
+static int global_mfd = -1;
+static void *global_p = NULL;
+
+static int sealing_thread_fn(void *arg)
+{
+ int sig, r;
+
+ /*
+ * This thread first waits 200ms so any pending operation in the parent
+ * is correctly started. After that, it tries to seal @global_mfd as
+ * SEAL_WRITE. This _must_ fail as the parent thread has a read() into
+ * that memory mapped object still ongoing.
+ * We then wait one more second and try sealing again. This time it
+ * must succeed as there shouldn't be anyone else pinning the pages.
+ */
+
+ /* wait 200ms for FUSE-request to be active */
+ usleep(200000);
+
+ /* unmount mapping before sealing to avoid i_mmap_writable failures */
+ munmap(global_p, mfd_def_size);
+
+ /* Try sealing the global file; expect EBUSY or success. Current
+ * kernels will never succeed, but in the future, kernels might
+ * implement page-replacements or other fancy ways to avoid racing
+ * writes. */
+ r = mfd_busy_add_seals(global_mfd, F_SEAL_WRITE);
+ if (r >= 0) {
+ printf("HURRAY! This kernel fixed GUP races!\n");
+ } else {
+ /* wait 1s more so the FUSE-request is done */
+ sleep(1);
+
+ /* try sealing the global file again */
+ mfd_assert_add_seals(global_mfd, F_SEAL_WRITE);
+ }
+
+ return 0;
+}
+
+static pid_t spawn_sealing_thread(void)
+{
+ uint8_t *stack;
+ pid_t pid;
+
+ stack = malloc(STACK_SIZE);
+ if (!stack) {
+ printf("malloc(STACK_SIZE) failed: %m\n");
+ abort();
+ }
+
+ pid = clone(sealing_thread_fn,
+ stack + STACK_SIZE,
+ SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM,
+ NULL);
+ if (pid < 0) {
+ printf("clone() failed: %m\n");
+ abort();
+ }
+
+ return pid;
+}
+
+static void join_sealing_thread(pid_t pid)
+{
+ waitpid(pid, NULL, 0);
+}
+
+int main(int argc, char **argv)
+{
+ char *zero;
+ int fd, mfd, r;
+ void *p;
+ int was_sealed;
+ pid_t pid;
+
+ if (argc < 2) {
+ printf("error: please pass path to file in fuse_mnt mount-point\n");
+ abort();
+ }
+
+ if (argc >= 3) {
+ if (!strcmp(argv[2], "hugetlbfs")) {
+ unsigned long hpage_size = default_huge_page_size();
+
+ if (!hpage_size) {
+ printf("Unable to determine huge page size\n");
+ abort();
+ }
+
+ hugetlbfs_test = 1;
+ mfd_def_size = hpage_size * 2;
+ } else {
+ printf("Unknown option: %s\n", argv[2]);
+ abort();
+ }
+ }
+
+ zero = calloc(sizeof(*zero), mfd_def_size);
+
+ /* open FUSE memfd file for GUP testing */
+ printf("opening: %s\n", argv[1]);
+ fd = open(argv[1], O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ printf("cannot open(\"%s\"): %m\n", argv[1]);
+ abort();
+ }
+
+ /* create new memfd-object */
+ mfd = mfd_assert_new("kern_memfd_fuse",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ /* mmap memfd-object for writing */
+ p = mfd_assert_mmap_shared(mfd);
+
+ /* pass mfd+mapping to a separate sealing-thread which tries to seal
+ * the memfd objects with SEAL_WRITE while we write into it */
+ global_mfd = mfd;
+ global_p = p;
+ pid = spawn_sealing_thread();
+
+ /* Use read() on the FUSE file to read into our memory-mapped memfd
+ * object. This races the other thread which tries to seal the
+ * memfd-object.
+ * If @fd is on the memfd-fake-FUSE-FS, the read() is delayed by 1s.
+ * This guarantees that the receive-buffer is pinned for 1s until the
+ * data is written into it. The racing ADD_SEALS should thus fail as
+ * the pages are still pinned. */
+ r = read(fd, p, mfd_def_size);
+ if (r < 0) {
+ printf("read() failed: %m\n");
+ abort();
+ } else if (!r) {
+ printf("unexpected EOF on read()\n");
+ abort();
+ }
+
+ was_sealed = mfd_assert_get_seals(mfd) & F_SEAL_WRITE;
+
+ /* Wait for sealing-thread to finish and verify that it
+ * successfully sealed the file after the second try. */
+ join_sealing_thread(pid);
+ mfd_assert_has_seals(mfd, F_SEAL_WRITE);
+
+ /* *IF* the memfd-object was sealed at the time our read() returned,
+ * then the kernel did a page-replacement or canceled the read() (or
+ * whatever magic it did..). In that case, the memfd object is still
+ * all zero.
+ * In case the memfd-object was *not* sealed, the read() was successfull
+ * and the memfd object must *not* be all zero.
+ * Note that in real scenarios, there might be a mixture of both, but
+ * in this test-cases, we have explicit 200ms delays which should be
+ * enough to avoid any in-flight writes. */
+
+ p = mfd_assert_mmap_private(mfd);
+ if (was_sealed && memcmp(p, zero, mfd_def_size)) {
+ printf("memfd sealed during read() but data not discarded\n");
+ abort();
+ } else if (!was_sealed && !memcmp(p, zero, mfd_def_size)) {
+ printf("memfd sealed after read() but data discarded\n");
+ abort();
+ }
+
+ close(mfd);
+ close(fd);
+
+ printf("fuse: DONE\n");
+ free(zero);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
new file mode 100644
index 000000000..a4e520b94
--- /dev/null
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -0,0 +1,970 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/falloc.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "common.h"
+
+#define MEMFD_STR "memfd:"
+#define MEMFD_HUGE_STR "memfd-hugetlb:"
+#define SHARED_FT_STR "(shared file-table)"
+
+#define MFD_DEF_SIZE 8192
+#define STACK_SIZE 65536
+
+/*
+ * Default is not to test hugetlbfs
+ */
+static size_t mfd_def_size = MFD_DEF_SIZE;
+static const char *memfd_str = MEMFD_STR;
+
+static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
+{
+ int r, fd;
+
+ fd = sys_memfd_create(name, flags);
+ if (fd < 0) {
+ printf("memfd_create(\"%s\", %u) failed: %m\n",
+ name, flags);
+ abort();
+ }
+
+ r = ftruncate(fd, sz);
+ if (r < 0) {
+ printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
+ abort();
+ }
+
+ return fd;
+}
+
+static void mfd_fail_new(const char *name, unsigned int flags)
+{
+ int r;
+
+ r = sys_memfd_create(name, flags);
+ if (r >= 0) {
+ printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
+ name, flags);
+ close(r);
+ abort();
+ }
+}
+
+static unsigned int mfd_assert_get_seals(int fd)
+{
+ int r;
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0) {
+ printf("GET_SEALS(%d) failed: %m\n", fd);
+ abort();
+ }
+
+ return (unsigned int)r;
+}
+
+static void mfd_assert_has_seals(int fd, unsigned int seals)
+{
+ unsigned int s;
+
+ s = mfd_assert_get_seals(fd);
+ if (s != seals) {
+ printf("%u != %u = GET_SEALS(%d)\n", seals, s, fd);
+ abort();
+ }
+}
+
+static void mfd_assert_add_seals(int fd, unsigned int seals)
+{
+ int r;
+ unsigned int s;
+
+ s = mfd_assert_get_seals(fd);
+ r = fcntl(fd, F_ADD_SEALS, seals);
+ if (r < 0) {
+ printf("ADD_SEALS(%d, %u -> %u) failed: %m\n", fd, s, seals);
+ abort();
+ }
+}
+
+static void mfd_fail_add_seals(int fd, unsigned int seals)
+{
+ int r;
+ unsigned int s;
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0)
+ s = 0;
+ else
+ s = (unsigned int)r;
+
+ r = fcntl(fd, F_ADD_SEALS, seals);
+ if (r >= 0) {
+ printf("ADD_SEALS(%d, %u -> %u) didn't fail as expected\n",
+ fd, s, seals);
+ abort();
+ }
+}
+
+static void mfd_assert_size(int fd, size_t size)
+{
+ struct stat st;
+ int r;
+
+ r = fstat(fd, &st);
+ if (r < 0) {
+ printf("fstat(%d) failed: %m\n", fd);
+ abort();
+ } else if (st.st_size != size) {
+ printf("wrong file size %lld, but expected %lld\n",
+ (long long)st.st_size, (long long)size);
+ abort();
+ }
+}
+
+static int mfd_assert_dup(int fd)
+{
+ int r;
+
+ r = dup(fd);
+ if (r < 0) {
+ printf("dup(%d) failed: %m\n", fd);
+ abort();
+ }
+
+ return r;
+}
+
+static void *mfd_assert_mmap_shared(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
+static void *mfd_assert_mmap_private(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ,
+ MAP_PRIVATE,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
+static int mfd_assert_open(int fd, int flags, mode_t mode)
+{
+ char buf[512];
+ int r;
+
+ sprintf(buf, "/proc/self/fd/%d", fd);
+ r = open(buf, flags, mode);
+ if (r < 0) {
+ printf("open(%s) failed: %m\n", buf);
+ abort();
+ }
+
+ return r;
+}
+
+static void mfd_fail_open(int fd, int flags, mode_t mode)
+{
+ char buf[512];
+ int r;
+
+ sprintf(buf, "/proc/self/fd/%d", fd);
+ r = open(buf, flags, mode);
+ if (r >= 0) {
+ printf("open(%s) didn't fail as expected\n", buf);
+ abort();
+ }
+}
+
+static void mfd_assert_read(int fd)
+{
+ char buf[16];
+ void *p;
+ ssize_t l;
+
+ l = read(fd, buf, sizeof(buf));
+ if (l != sizeof(buf)) {
+ printf("read() failed: %m\n");
+ abort();
+ }
+
+ /* verify PROT_READ *is* allowed */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ,
+ MAP_PRIVATE,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+ munmap(p, mfd_def_size);
+
+ /* verify MAP_PRIVATE is *always* allowed (even writable) */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+ munmap(p, mfd_def_size);
+}
+
+static void mfd_assert_write(int fd)
+{
+ ssize_t l;
+ void *p;
+ int r;
+
+ /*
+ * huegtlbfs does not support write, but we want to
+ * verify everything else here.
+ */
+ if (!hugetlbfs_test) {
+ /* verify write() succeeds */
+ l = write(fd, "\0\0\0\0", 4);
+ if (l != 4) {
+ printf("write() failed: %m\n");
+ abort();
+ }
+ }
+
+ /* verify PROT_READ | PROT_WRITE is allowed */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+ *(char *)p = 0;
+ munmap(p, mfd_def_size);
+
+ /* verify PROT_WRITE is allowed */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+ *(char *)p = 0;
+ munmap(p, mfd_def_size);
+
+ /* verify PROT_READ with MAP_SHARED is allowed and a following
+ * mprotect(PROT_WRITE) allows writing */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
+ if (r < 0) {
+ printf("mprotect() failed: %m\n");
+ abort();
+ }
+
+ *(char *)p = 0;
+ munmap(p, mfd_def_size);
+
+ /* verify PUNCH_HOLE works */
+ r = fallocate(fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ 0,
+ mfd_def_size);
+ if (r < 0) {
+ printf("fallocate(PUNCH_HOLE) failed: %m\n");
+ abort();
+ }
+}
+
+static void mfd_fail_write(int fd)
+{
+ ssize_t l;
+ void *p;
+ int r;
+
+ /* verify write() fails */
+ l = write(fd, "data", 4);
+ if (l != -EPERM) {
+ printf("expected EPERM on write(), but got %d: %m\n", (int)l);
+ abort();
+ }
+
+ /* verify PROT_READ | PROT_WRITE is not allowed */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p != MAP_FAILED) {
+ printf("mmap() didn't fail as expected\n");
+ abort();
+ }
+
+ /* verify PROT_WRITE is not allowed */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p != MAP_FAILED) {
+ printf("mmap() didn't fail as expected\n");
+ abort();
+ }
+
+ /* Verify PROT_READ with MAP_SHARED with a following mprotect is not
+ * allowed. Note that for r/w the kernel already prevents the mmap. */
+ p = mmap(NULL,
+ mfd_def_size,
+ PROT_READ,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p != MAP_FAILED) {
+ r = mprotect(p, mfd_def_size, PROT_READ | PROT_WRITE);
+ if (r >= 0) {
+ printf("mmap()+mprotect() didn't fail as expected\n");
+ abort();
+ }
+ munmap(p, mfd_def_size);
+ }
+
+ /* verify PUNCH_HOLE fails */
+ r = fallocate(fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ 0,
+ mfd_def_size);
+ if (r >= 0) {
+ printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
+ abort();
+ }
+}
+
+static void mfd_assert_shrink(int fd)
+{
+ int r, fd2;
+
+ r = ftruncate(fd, mfd_def_size / 2);
+ if (r < 0) {
+ printf("ftruncate(SHRINK) failed: %m\n");
+ abort();
+ }
+
+ mfd_assert_size(fd, mfd_def_size / 2);
+
+ fd2 = mfd_assert_open(fd,
+ O_RDWR | O_CREAT | O_TRUNC,
+ S_IRUSR | S_IWUSR);
+ close(fd2);
+
+ mfd_assert_size(fd, 0);
+}
+
+static void mfd_fail_shrink(int fd)
+{
+ int r;
+
+ r = ftruncate(fd, mfd_def_size / 2);
+ if (r >= 0) {
+ printf("ftruncate(SHRINK) didn't fail as expected\n");
+ abort();
+ }
+
+ mfd_fail_open(fd,
+ O_RDWR | O_CREAT | O_TRUNC,
+ S_IRUSR | S_IWUSR);
+}
+
+static void mfd_assert_grow(int fd)
+{
+ int r;
+
+ r = ftruncate(fd, mfd_def_size * 2);
+ if (r < 0) {
+ printf("ftruncate(GROW) failed: %m\n");
+ abort();
+ }
+
+ mfd_assert_size(fd, mfd_def_size * 2);
+
+ r = fallocate(fd,
+ 0,
+ 0,
+ mfd_def_size * 4);
+ if (r < 0) {
+ printf("fallocate(ALLOC) failed: %m\n");
+ abort();
+ }
+
+ mfd_assert_size(fd, mfd_def_size * 4);
+}
+
+static void mfd_fail_grow(int fd)
+{
+ int r;
+
+ r = ftruncate(fd, mfd_def_size * 2);
+ if (r >= 0) {
+ printf("ftruncate(GROW) didn't fail as expected\n");
+ abort();
+ }
+
+ r = fallocate(fd,
+ 0,
+ 0,
+ mfd_def_size * 4);
+ if (r >= 0) {
+ printf("fallocate(ALLOC) didn't fail as expected\n");
+ abort();
+ }
+}
+
+static void mfd_assert_grow_write(int fd)
+{
+ static char *buf;
+ ssize_t l;
+
+ /* hugetlbfs does not support write */
+ if (hugetlbfs_test)
+ return;
+
+ buf = malloc(mfd_def_size * 8);
+ if (!buf) {
+ printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
+ abort();
+ }
+
+ l = pwrite(fd, buf, mfd_def_size * 8, 0);
+ if (l != (mfd_def_size * 8)) {
+ printf("pwrite() failed: %m\n");
+ abort();
+ }
+
+ mfd_assert_size(fd, mfd_def_size * 8);
+}
+
+static void mfd_fail_grow_write(int fd)
+{
+ static char *buf;
+ ssize_t l;
+
+ /* hugetlbfs does not support write */
+ if (hugetlbfs_test)
+ return;
+
+ buf = malloc(mfd_def_size * 8);
+ if (!buf) {
+ printf("malloc(%zu) failed: %m\n", mfd_def_size * 8);
+ abort();
+ }
+
+ l = pwrite(fd, buf, mfd_def_size * 8, 0);
+ if (l == (mfd_def_size * 8)) {
+ printf("pwrite() didn't fail as expected\n");
+ abort();
+ }
+}
+
+static int idle_thread_fn(void *arg)
+{
+ sigset_t set;
+ int sig;
+
+ /* dummy waiter; SIGTERM terminates us anyway */
+ sigemptyset(&set);
+ sigaddset(&set, SIGTERM);
+ sigwait(&set, &sig);
+
+ return 0;
+}
+
+static pid_t spawn_idle_thread(unsigned int flags)
+{
+ uint8_t *stack;
+ pid_t pid;
+
+ stack = malloc(STACK_SIZE);
+ if (!stack) {
+ printf("malloc(STACK_SIZE) failed: %m\n");
+ abort();
+ }
+
+ pid = clone(idle_thread_fn,
+ stack + STACK_SIZE,
+ SIGCHLD | flags,
+ NULL);
+ if (pid < 0) {
+ printf("clone() failed: %m\n");
+ abort();
+ }
+
+ return pid;
+}
+
+static void join_idle_thread(pid_t pid)
+{
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+}
+
+/*
+ * Test memfd_create() syscall
+ * Verify syscall-argument validation, including name checks, flag validation
+ * and more.
+ */
+static void test_create(void)
+{
+ char buf[2048];
+ int fd;
+
+ printf("%s CREATE\n", memfd_str);
+
+ /* test NULL name */
+ mfd_fail_new(NULL, 0);
+
+ /* test over-long name (not zero-terminated) */
+ memset(buf, 0xff, sizeof(buf));
+ mfd_fail_new(buf, 0);
+
+ /* test over-long zero-terminated name */
+ memset(buf, 0xff, sizeof(buf));
+ buf[sizeof(buf) - 1] = 0;
+ mfd_fail_new(buf, 0);
+
+ /* verify "" is a valid name */
+ fd = mfd_assert_new("", 0, 0);
+ close(fd);
+
+ /* verify invalid O_* open flags */
+ mfd_fail_new("", 0x0100);
+ mfd_fail_new("", ~MFD_CLOEXEC);
+ mfd_fail_new("", ~MFD_ALLOW_SEALING);
+ mfd_fail_new("", ~0);
+ mfd_fail_new("", 0x80000000U);
+
+ /* verify MFD_CLOEXEC is allowed */
+ fd = mfd_assert_new("", 0, MFD_CLOEXEC);
+ close(fd);
+
+ /* verify MFD_ALLOW_SEALING is allowed */
+ fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
+ close(fd);
+
+ /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
+ fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+ close(fd);
+}
+
+/*
+ * Test basic sealing
+ * A very basic sealing test to see whether setting/retrieving seals works.
+ */
+static void test_basic(void)
+{
+ int fd;
+
+ printf("%s BASIC\n", memfd_str);
+
+ fd = mfd_assert_new("kern_memfd_basic",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ /* add basic seals */
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_WRITE);
+
+ /* add them again */
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_WRITE);
+
+ /* add more seals and seal against sealing */
+ mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_GROW |
+ F_SEAL_WRITE |
+ F_SEAL_SEAL);
+
+ /* verify that sealing no longer works */
+ mfd_fail_add_seals(fd, F_SEAL_GROW);
+ mfd_fail_add_seals(fd, 0);
+
+ close(fd);
+
+ /* verify sealing does not work without MFD_ALLOW_SEALING */
+ fd = mfd_assert_new("kern_memfd_basic",
+ mfd_def_size,
+ MFD_CLOEXEC);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+ mfd_fail_add_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_GROW |
+ F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+ close(fd);
+}
+
+/*
+ * Test SEAL_WRITE
+ * Test whether SEAL_WRITE actually prevents modifications.
+ */
+static void test_seal_write(void)
+{
+ int fd;
+
+ printf("%s SEAL-WRITE\n", memfd_str);
+
+ fd = mfd_assert_new("kern_memfd_seal_write",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE);
+
+ mfd_assert_read(fd);
+ mfd_fail_write(fd);
+ mfd_assert_shrink(fd);
+ mfd_assert_grow(fd);
+ mfd_fail_grow_write(fd);
+
+ close(fd);
+}
+
+/*
+ * Test SEAL_SHRINK
+ * Test whether SEAL_SHRINK actually prevents shrinking
+ */
+static void test_seal_shrink(void)
+{
+ int fd;
+
+ printf("%s SEAL-SHRINK\n", memfd_str);
+
+ fd = mfd_assert_new("kern_memfd_seal_shrink",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK);
+
+ mfd_assert_read(fd);
+ mfd_assert_write(fd);
+ mfd_fail_shrink(fd);
+ mfd_assert_grow(fd);
+ mfd_assert_grow_write(fd);
+
+ close(fd);
+}
+
+/*
+ * Test SEAL_GROW
+ * Test whether SEAL_GROW actually prevents growing
+ */
+static void test_seal_grow(void)
+{
+ int fd;
+
+ printf("%s SEAL-GROW\n", memfd_str);
+
+ fd = mfd_assert_new("kern_memfd_seal_grow",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_GROW);
+ mfd_assert_has_seals(fd, F_SEAL_GROW);
+
+ mfd_assert_read(fd);
+ mfd_assert_write(fd);
+ mfd_assert_shrink(fd);
+ mfd_fail_grow(fd);
+ mfd_fail_grow_write(fd);
+
+ close(fd);
+}
+
+/*
+ * Test SEAL_SHRINK | SEAL_GROW
+ * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
+ */
+static void test_seal_resize(void)
+{
+ int fd;
+
+ printf("%s SEAL-RESIZE\n", memfd_str);
+
+ fd = mfd_assert_new("kern_memfd_seal_resize",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
+
+ mfd_assert_read(fd);
+ mfd_assert_write(fd);
+ mfd_fail_shrink(fd);
+ mfd_fail_grow(fd);
+ mfd_fail_grow_write(fd);
+
+ close(fd);
+}
+
+/*
+ * Test sharing via dup()
+ * Test that seals are shared between dupped FDs and they're all equal.
+ */
+static void test_share_dup(char *banner, char *b_suffix)
+{
+ int fd, fd2;
+
+ printf("%s %s %s\n", memfd_str, banner, b_suffix);
+
+ fd = mfd_assert_new("kern_memfd_share_dup",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+
+ fd2 = mfd_assert_dup(fd);
+ mfd_assert_has_seals(fd2, 0);
+
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE);
+
+ mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+ mfd_assert_add_seals(fd, F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+
+ mfd_fail_add_seals(fd, F_SEAL_GROW);
+ mfd_fail_add_seals(fd2, F_SEAL_GROW);
+ mfd_fail_add_seals(fd, F_SEAL_SEAL);
+ mfd_fail_add_seals(fd2, F_SEAL_SEAL);
+
+ close(fd2);
+
+ mfd_fail_add_seals(fd, F_SEAL_GROW);
+ close(fd);
+}
+
+/*
+ * Test sealing with active mmap()s
+ * Modifying seals is only allowed if no other mmap() refs exist.
+ */
+static void test_share_mmap(char *banner, char *b_suffix)
+{
+ int fd;
+ void *p;
+
+ printf("%s %s %s\n", memfd_str, banner, b_suffix);
+
+ fd = mfd_assert_new("kern_memfd_share_mmap",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+
+ /* shared/writable ref prevents sealing WRITE, but allows others */
+ p = mfd_assert_mmap_shared(fd);
+ mfd_fail_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK);
+ munmap(p, mfd_def_size);
+
+ /* readable ref allows sealing */
+ p = mfd_assert_mmap_private(fd);
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+ munmap(p, mfd_def_size);
+
+ close(fd);
+}
+
+/*
+ * Test sealing with open(/proc/self/fd/%d)
+ * Via /proc we can get access to a separate file-context for the same memfd.
+ * This is *not* like dup(), but like a real separate open(). Make sure the
+ * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
+ */
+static void test_share_open(char *banner, char *b_suffix)
+{
+ int fd, fd2;
+
+ printf("%s %s %s\n", memfd_str, banner, b_suffix);
+
+ fd = mfd_assert_new("kern_memfd_share_open",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+
+ fd2 = mfd_assert_open(fd, O_RDWR, 0);
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE);
+
+ mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+ close(fd);
+ fd = mfd_assert_open(fd2, O_RDONLY, 0);
+
+ mfd_fail_add_seals(fd, F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+ close(fd2);
+ fd2 = mfd_assert_open(fd, O_RDWR, 0);
+
+ mfd_assert_add_seals(fd2, F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+
+ close(fd2);
+ close(fd);
+}
+
+/*
+ * Test sharing via fork()
+ * Test whether seal-modifications work as expected with forked childs.
+ */
+static void test_share_fork(char *banner, char *b_suffix)
+{
+ int fd;
+ pid_t pid;
+
+ printf("%s %s %s\n", memfd_str, banner, b_suffix);
+
+ fd = mfd_assert_new("kern_memfd_share_fork",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+
+ pid = spawn_idle_thread(0);
+ mfd_assert_add_seals(fd, F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+ mfd_fail_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+ join_idle_thread(pid);
+
+ mfd_fail_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ pid_t pid;
+
+ if (argc == 2) {
+ if (!strcmp(argv[1], "hugetlbfs")) {
+ unsigned long hpage_size = default_huge_page_size();
+
+ if (!hpage_size) {
+ printf("Unable to determine huge page size\n");
+ abort();
+ }
+
+ hugetlbfs_test = 1;
+ memfd_str = MEMFD_HUGE_STR;
+ mfd_def_size = hpage_size * 2;
+ } else {
+ printf("Unknown option: %s\n", argv[1]);
+ abort();
+ }
+ }
+
+ test_create();
+ test_basic();
+
+ test_seal_write();
+ test_seal_shrink();
+ test_seal_grow();
+ test_seal_resize();
+
+ test_share_dup("SHARE-DUP", "");
+ test_share_mmap("SHARE-MMAP", "");
+ test_share_open("SHARE-OPEN", "");
+ test_share_fork("SHARE-FORK", "");
+
+ /* Run test-suite in a multi-threaded environment with a shared
+ * file-table. */
+ pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
+ test_share_dup("SHARE-DUP", SHARED_FT_STR);
+ test_share_mmap("SHARE-MMAP", SHARED_FT_STR);
+ test_share_open("SHARE-OPEN", SHARED_FT_STR);
+ test_share_fork("SHARE-FORK", SHARED_FT_STR);
+ join_idle_thread(pid);
+
+ printf("memfd: DONE\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/memfd/run_fuse_test.sh b/tools/testing/selftests/memfd/run_fuse_test.sh
new file mode 100755
index 000000000..22e572e2d
--- /dev/null
+++ b/tools/testing/selftests/memfd/run_fuse_test.sh
@@ -0,0 +1,15 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+if test -d "./mnt" ; then
+ fusermount -u ./mnt
+ rmdir ./mnt
+fi
+
+set -e
+
+mkdir mnt
+./fuse_mnt ./mnt
+./fuse_test ./mnt/memfd $@
+fusermount -u ./mnt
+rmdir ./mnt
diff --git a/tools/testing/selftests/memfd/run_hugetlbfs_test.sh b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
new file mode 100755
index 000000000..fb633eeb0
--- /dev/null
+++ b/tools/testing/selftests/memfd/run_hugetlbfs_test.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# please run as root
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+#
+# To test memfd_create with hugetlbfs, there needs to be hpages_test
+# huge pages free. Attempt to allocate enough pages to test.
+#
+hpages_test=8
+
+#
+# Get count of free huge pages from /proc/meminfo
+#
+while read name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs=$size
+ fi
+done < /proc/meminfo
+
+#
+# If not enough free huge pages for test, attempt to increase
+#
+if [ -n "$freepgs" ] && [ $freepgs -lt $hpages_test ]; then
+ nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
+ hpages_needed=`expr $hpages_test - $freepgs`
+
+ if [ $UID != 0 ]; then
+ echo "Please run memfd with hugetlbfs test as root"
+ exit $ksft_skip
+ fi
+
+ echo 3 > /proc/sys/vm/drop_caches
+ echo $(( $hpages_needed + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
+ while read name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs=$size
+ fi
+ done < /proc/meminfo
+fi
+
+#
+# If still not enough huge pages available, exit. But, give back any huge
+# pages potentially allocated above.
+#
+if [ $freepgs -lt $hpages_test ]; then
+ # nr_hugepgs non-zero only if we attempted to increase
+ if [ -n "$nr_hugepgs" ]; then
+ echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+ fi
+ printf "Not enough huge pages available (%d < %d)\n" \
+ $freepgs $needpgs
+ exit $ksft_skip
+fi
+
+#
+# Run the hugetlbfs test
+#
+./memfd_test hugetlbfs
+./run_fuse_test.sh hugetlbfs
+
+#
+# Give back any huge pages allocated for the test
+#
+if [ -n "$nr_hugepgs" ]; then
+ echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+fi
diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile
new file mode 100644
index 000000000..e0a625e34
--- /dev/null
+++ b/tools/testing/selftests/memory-hotplug/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+TEST_PROGS := mem-on-off-test.sh
+
+run_full_test:
+ @/bin/bash ./mem-on-off-test.sh -r 10 && echo "memory-hotplug selftests: [PASS]" || echo "memory-hotplug selftests: [FAIL]"
+
+clean:
diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config
new file mode 100644
index 000000000..a7e8cd5bb
--- /dev/null
+++ b/tools/testing/selftests/memory-hotplug/config
@@ -0,0 +1,5 @@
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTPLUG_SPARSE=y
+CONFIG_NOTIFIER_ERROR_INJECTION=y
+CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
+CONFIG_MEMORY_HOTREMOVE=y
diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
new file mode 100755
index 000000000..b37585e6a
--- /dev/null
+++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
@@ -0,0 +1,291 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+SYSFS=
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+prerequisite()
+{
+ msg="skip all tests:"
+
+ if [ $UID != 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+
+ SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+ if [ ! -d "$SYSFS" ]; then
+ echo $msg sysfs is not mounted >&2
+ exit $ksft_skip
+ fi
+
+ if ! ls $SYSFS/devices/system/memory/memory* > /dev/null 2>&1; then
+ echo $msg memory hotplug is not supported >&2
+ exit $ksft_skip
+ fi
+
+ if ! grep -q 1 $SYSFS/devices/system/memory/memory*/removable; then
+ echo $msg no hot-pluggable memory >&2
+ exit $ksft_skip
+ fi
+}
+
+#
+# list all hot-pluggable memory
+#
+hotpluggable_memory()
+{
+ local state=${1:-.\*}
+
+ for memory in $SYSFS/devices/system/memory/memory*; do
+ if grep -q 1 $memory/removable &&
+ grep -q $state $memory/state; then
+ echo ${memory##/*/memory}
+ fi
+ done
+}
+
+hotpluggable_offline_memory()
+{
+ hotpluggable_memory offline
+}
+
+hotpluggable_online_memory()
+{
+ hotpluggable_memory online
+}
+
+memory_is_online()
+{
+ grep -q online $SYSFS/devices/system/memory/memory$1/state
+}
+
+memory_is_offline()
+{
+ grep -q offline $SYSFS/devices/system/memory/memory$1/state
+}
+
+online_memory()
+{
+ echo online > $SYSFS/devices/system/memory/memory$1/state
+}
+
+offline_memory()
+{
+ echo offline > $SYSFS/devices/system/memory/memory$1/state
+}
+
+online_memory_expect_success()
+{
+ local memory=$1
+
+ if ! online_memory $memory; then
+ echo $FUNCNAME $memory: unexpected fail >&2
+ return 1
+ elif ! memory_is_online $memory; then
+ echo $FUNCNAME $memory: unexpected offline >&2
+ return 1
+ fi
+ return 0
+}
+
+online_memory_expect_fail()
+{
+ local memory=$1
+
+ if online_memory $memory 2> /dev/null; then
+ echo $FUNCNAME $memory: unexpected success >&2
+ return 1
+ elif ! memory_is_offline $memory; then
+ echo $FUNCNAME $memory: unexpected online >&2
+ return 1
+ fi
+ return 0
+}
+
+offline_memory_expect_success()
+{
+ local memory=$1
+
+ if ! offline_memory $memory; then
+ echo $FUNCNAME $memory: unexpected fail >&2
+ return 1
+ elif ! memory_is_offline $memory; then
+ echo $FUNCNAME $memory: unexpected offline >&2
+ return 1
+ fi
+ return 0
+}
+
+offline_memory_expect_fail()
+{
+ local memory=$1
+
+ if offline_memory $memory 2> /dev/null; then
+ echo $FUNCNAME $memory: unexpected success >&2
+ return 1
+ elif ! memory_is_online $memory; then
+ echo $FUNCNAME $memory: unexpected offline >&2
+ return 1
+ fi
+ return 0
+}
+
+error=-12
+priority=0
+# Run with default of ratio=2 for Kselftest run
+ratio=2
+retval=0
+
+while getopts e:hp:r: opt; do
+ case $opt in
+ e)
+ error=$OPTARG
+ ;;
+ h)
+ echo "Usage $0 [ -e errno ] [ -p notifier-priority ] [ -r percent-of-memory-to-offline ]"
+ exit
+ ;;
+ p)
+ priority=$OPTARG
+ ;;
+ r)
+ ratio=$OPTARG
+ if [ "$ratio" -gt 100 ] || [ "$ratio" -lt 0 ]; then
+ echo "The percentage should be an integer within 0~100 range"
+ exit 1
+ fi
+ ;;
+ esac
+done
+
+if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
+ echo "error code must be -4095 <= errno < 0" >&2
+ exit 1
+fi
+
+prerequisite
+
+echo "Test scope: $ratio% hotplug memory"
+
+#
+# Online all hot-pluggable memory
+#
+hotpluggable_num=`hotpluggable_offline_memory | wc -l`
+echo -e "\t online all hot-pluggable memory in offline state:"
+if [ "$hotpluggable_num" -gt 0 ]; then
+ for memory in `hotpluggable_offline_memory`; do
+ echo "offline->online memory$memory"
+ if ! online_memory_expect_success $memory; then
+ retval=1
+ fi
+ done
+else
+ echo -e "\t\t SKIPPED - no hot-pluggable memory in offline state"
+fi
+
+#
+# Offline $ratio percent of hot-pluggable memory
+#
+hotpluggable_num=`hotpluggable_online_memory | wc -l`
+target=`echo "a=$hotpluggable_num*$ratio; if ( a%100 ) a/100+1 else a/100" | bc`
+echo -e "\t offline $ratio% hot-pluggable memory in online state"
+echo -e "\t trying to offline $target out of $hotpluggable_num memory block(s):"
+for memory in `hotpluggable_online_memory`; do
+ if [ "$target" -gt 0 ]; then
+ echo "online->offline memory$memory"
+ if offline_memory_expect_success $memory; then
+ target=$(($target - 1))
+ fi
+ fi
+done
+if [ "$target" -gt 0 ]; then
+ retval=1
+ echo -e "\t\t FAILED - unable to offline some memory blocks, device busy?"
+fi
+
+#
+# Online all hot-pluggable memory again
+#
+hotpluggable_num=`hotpluggable_offline_memory | wc -l`
+echo -e "\t online all hot-pluggable memory in offline state:"
+if [ "$hotpluggable_num" -gt 0 ]; then
+ for memory in `hotpluggable_offline_memory`; do
+ echo "offline->online memory$memory"
+ if ! online_memory_expect_success $memory; then
+ retval=1
+ fi
+ done
+else
+ echo -e "\t\t SKIPPED - no hot-pluggable memory in offline state"
+fi
+
+#
+# Test with memory notifier error injection
+#
+
+DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
+NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/memory
+
+prerequisite_extra()
+{
+ msg="skip extra tests:"
+
+ /sbin/modprobe -q -r memory-notifier-error-inject
+ /sbin/modprobe -q memory-notifier-error-inject priority=$priority
+
+ if [ ! -d "$DEBUGFS" ]; then
+ echo $msg debugfs is not mounted >&2
+ exit $retval
+ fi
+
+ if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
+ echo $msg memory-notifier-error-inject module is not available >&2
+ exit $retval
+ fi
+}
+
+echo -e "\t Test with memory notifier error injection"
+prerequisite_extra
+
+#
+# Offline $ratio percent of hot-pluggable memory
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+for memory in `hotpluggable_online_memory`; do
+ if [ $((RANDOM % 100)) -lt $ratio ]; then
+ offline_memory_expect_success $memory
+ fi
+done
+
+#
+# Test memory hot-add error handling (offline => online)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
+for memory in `hotpluggable_offline_memory`; do
+ online_memory_expect_fail $memory
+done
+
+#
+# Online all hot-pluggable memory
+#
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
+for memory in `hotpluggable_offline_memory`; do
+ online_memory_expect_success $memory
+done
+
+#
+# Test memory hot-remove error handling (online => offline)
+#
+echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+for memory in `hotpluggable_online_memory`; do
+ offline_memory_expect_fail $memory
+done
+
+echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
+/sbin/modprobe -q -r memory-notifier-error-inject
+
+exit $retval
diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore
new file mode 100644
index 000000000..856ad4107
--- /dev/null
+++ b/tools/testing/selftests/mount/.gitignore
@@ -0,0 +1 @@
+unprivileged-remount-test
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
new file mode 100644
index 000000000..026890744
--- /dev/null
+++ b/tools/testing/selftests/mount/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mount selftests.
+CFLAGS = -Wall \
+ -O2
+
+TEST_PROGS := run_tests.sh
+TEST_GEN_FILES := unprivileged-remount-test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/mount/config b/tools/testing/selftests/mount/config
new file mode 100644
index 000000000..416bd53ce
--- /dev/null
+++ b/tools/testing/selftests/mount/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_tests.sh
new file mode 100755
index 000000000..4ab8f507d
--- /dev/null
+++ b/tools/testing/selftests/mount/run_tests.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Run mount selftests
+if [ -f /proc/self/uid_map ] ; then
+ ./unprivileged-remount-test ;
+else
+ echo "WARN: No /proc/self/uid_map exist, test skipped." ;
+ exit $ksft_skip
+fi
diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
new file mode 100644
index 000000000..584dc6bc3
--- /dev/null
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <stdbool.h>
+#include <stdarg.h>
+
+#ifndef CLONE_NEWNS
+# define CLONE_NEWNS 0x00020000
+#endif
+#ifndef CLONE_NEWUTS
+# define CLONE_NEWUTS 0x04000000
+#endif
+#ifndef CLONE_NEWIPC
+# define CLONE_NEWIPC 0x08000000
+#endif
+#ifndef CLONE_NEWNET
+# define CLONE_NEWNET 0x40000000
+#endif
+#ifndef CLONE_NEWUSER
+# define CLONE_NEWUSER 0x10000000
+#endif
+#ifndef CLONE_NEWPID
+# define CLONE_NEWPID 0x20000000
+#endif
+
+#ifndef MS_REC
+# define MS_REC 16384
+#endif
+#ifndef MS_RELATIME
+# define MS_RELATIME (1 << 21)
+#endif
+#ifndef MS_STRICTATIME
+# define MS_STRICTATIME (1 << 24)
+#endif
+
+static void die(char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, va_list ap)
+{
+ char buf[4096];
+ int fd;
+ ssize_t written;
+ int buf_len;
+
+ buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
+ if (buf_len < 0) {
+ die("vsnprintf failed: %s\n",
+ strerror(errno));
+ }
+ if (buf_len >= sizeof(buf)) {
+ die("vsnprintf output truncated\n");
+ }
+
+ fd = open(filename, O_WRONLY);
+ if (fd < 0) {
+ if ((errno == ENOENT) && enoent_ok)
+ return;
+ die("open of %s failed: %s\n",
+ filename, strerror(errno));
+ }
+ written = write(fd, buf, buf_len);
+ if (written != buf_len) {
+ if (written >= 0) {
+ die("short write to %s\n", filename);
+ } else {
+ die("write to %s failed: %s\n",
+ filename, strerror(errno));
+ }
+ }
+ if (close(fd) != 0) {
+ die("close of %s failed: %s\n",
+ filename, strerror(errno));
+ }
+}
+
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(true, filename, fmt, ap);
+ va_end(ap);
+
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(false, filename, fmt, ap);
+ va_end(ap);
+
+}
+
+static int read_mnt_flags(const char *path)
+{
+ int ret;
+ struct statvfs stat;
+ int mnt_flags;
+
+ ret = statvfs(path, &stat);
+ if (ret != 0) {
+ die("statvfs of %s failed: %s\n",
+ path, strerror(errno));
+ }
+ if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | \
+ ST_NOEXEC | ST_NOATIME | ST_NODIRATIME | ST_RELATIME | \
+ ST_SYNCHRONOUS | ST_MANDLOCK)) {
+ die("Unrecognized mount flags\n");
+ }
+ mnt_flags = 0;
+ if (stat.f_flag & ST_RDONLY)
+ mnt_flags |= MS_RDONLY;
+ if (stat.f_flag & ST_NOSUID)
+ mnt_flags |= MS_NOSUID;
+ if (stat.f_flag & ST_NODEV)
+ mnt_flags |= MS_NODEV;
+ if (stat.f_flag & ST_NOEXEC)
+ mnt_flags |= MS_NOEXEC;
+ if (stat.f_flag & ST_NOATIME)
+ mnt_flags |= MS_NOATIME;
+ if (stat.f_flag & ST_NODIRATIME)
+ mnt_flags |= MS_NODIRATIME;
+ if (stat.f_flag & ST_RELATIME)
+ mnt_flags |= MS_RELATIME;
+ if (stat.f_flag & ST_SYNCHRONOUS)
+ mnt_flags |= MS_SYNCHRONOUS;
+ if (stat.f_flag & ST_MANDLOCK)
+ mnt_flags |= ST_MANDLOCK;
+
+ return mnt_flags;
+}
+
+static void create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER) !=0) {
+ die("unshare(CLONE_NEWUSER) failed: %s\n",
+ strerror(errno));
+ }
+
+ maybe_write_file("/proc/self/setgroups", "deny");
+ write_file("/proc/self/uid_map", "0 %d 1", uid);
+ write_file("/proc/self/gid_map", "0 %d 1", gid);
+
+ if (setgid(0) != 0) {
+ die ("setgid(0) failed %s\n",
+ strerror(errno));
+ }
+ if (setuid(0) != 0) {
+ die("setuid(0) failed %s\n",
+ strerror(errno));
+ }
+}
+
+static
+bool test_unpriv_remount(const char *fstype, const char *mount_options,
+ int mount_flags, int remount_flags, int invalid_flags)
+{
+ pid_t child;
+
+ child = fork();
+ if (child == -1) {
+ die("fork failed: %s\n",
+ strerror(errno));
+ }
+ if (child != 0) { /* parent */
+ pid_t pid;
+ int status;
+ pid = waitpid(child, &status, 0);
+ if (pid == -1) {
+ die("waitpid failed: %s\n",
+ strerror(errno));
+ }
+ if (pid != child) {
+ die("waited for %d got %d\n",
+ child, pid);
+ }
+ if (!WIFEXITED(status)) {
+ die("child did not terminate cleanly\n");
+ }
+ return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ }
+
+ create_and_enter_userns();
+ if (unshare(CLONE_NEWNS) != 0) {
+ die("unshare(CLONE_NEWNS) failed: %s\n",
+ strerror(errno));
+ }
+
+ if (mount("testing", "/tmp", fstype, mount_flags, mount_options) != 0) {
+ die("mount of %s with options '%s' on /tmp failed: %s\n",
+ fstype,
+ mount_options? mount_options : "",
+ strerror(errno));
+ }
+
+ create_and_enter_userns();
+
+ if (unshare(CLONE_NEWNS) != 0) {
+ die("unshare(CLONE_NEWNS) failed: %s\n",
+ strerror(errno));
+ }
+
+ if (mount("/tmp", "/tmp", "none",
+ MS_REMOUNT | MS_BIND | remount_flags, NULL) != 0) {
+ /* system("cat /proc/self/mounts"); */
+ die("remount of /tmp failed: %s\n",
+ strerror(errno));
+ }
+
+ if (mount("/tmp", "/tmp", "none",
+ MS_REMOUNT | MS_BIND | invalid_flags, NULL) == 0) {
+ /* system("cat /proc/self/mounts"); */
+ die("remount of /tmp with invalid flags "
+ "succeeded unexpectedly\n");
+ }
+ exit(EXIT_SUCCESS);
+}
+
+static bool test_unpriv_remount_simple(int mount_flags)
+{
+ return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags, 0);
+}
+
+static bool test_unpriv_remount_atime(int mount_flags, int invalid_flags)
+{
+ return test_unpriv_remount("ramfs", NULL, mount_flags, mount_flags,
+ invalid_flags);
+}
+
+static bool test_priv_mount_unpriv_remount(void)
+{
+ pid_t child;
+ int ret;
+ const char *orig_path = "/dev";
+ const char *dest_path = "/tmp";
+ int orig_mnt_flags, remount_mnt_flags;
+
+ child = fork();
+ if (child == -1) {
+ die("fork failed: %s\n",
+ strerror(errno));
+ }
+ if (child != 0) { /* parent */
+ pid_t pid;
+ int status;
+ pid = waitpid(child, &status, 0);
+ if (pid == -1) {
+ die("waitpid failed: %s\n",
+ strerror(errno));
+ }
+ if (pid != child) {
+ die("waited for %d got %d\n",
+ child, pid);
+ }
+ if (!WIFEXITED(status)) {
+ die("child did not terminate cleanly\n");
+ }
+ return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ }
+
+ orig_mnt_flags = read_mnt_flags(orig_path);
+
+ create_and_enter_userns();
+ ret = unshare(CLONE_NEWNS);
+ if (ret != 0) {
+ die("unshare(CLONE_NEWNS) failed: %s\n",
+ strerror(errno));
+ }
+
+ ret = mount(orig_path, dest_path, "bind", MS_BIND | MS_REC, NULL);
+ if (ret != 0) {
+ die("recursive bind mount of %s onto %s failed: %s\n",
+ orig_path, dest_path, strerror(errno));
+ }
+
+ ret = mount(dest_path, dest_path, "none",
+ MS_REMOUNT | MS_BIND | orig_mnt_flags , NULL);
+ if (ret != 0) {
+ /* system("cat /proc/self/mounts"); */
+ die("remount of /tmp failed: %s\n",
+ strerror(errno));
+ }
+
+ remount_mnt_flags = read_mnt_flags(dest_path);
+ if (orig_mnt_flags != remount_mnt_flags) {
+ die("Mount flags unexpectedly changed during remount of %s originally mounted on %s\n",
+ dest_path, orig_path);
+ }
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ if (!test_unpriv_remount_simple(MS_RDONLY)) {
+ die("MS_RDONLY malfunctions\n");
+ }
+ if (!test_unpriv_remount("devpts", "newinstance", MS_NODEV, MS_NODEV, 0)) {
+ die("MS_NODEV malfunctions\n");
+ }
+ if (!test_unpriv_remount_simple(MS_NOSUID)) {
+ die("MS_NOSUID malfunctions\n");
+ }
+ if (!test_unpriv_remount_simple(MS_NOEXEC)) {
+ die("MS_NOEXEC malfunctions\n");
+ }
+ if (!test_unpriv_remount_atime(MS_RELATIME,
+ MS_NOATIME))
+ {
+ die("MS_RELATIME malfunctions\n");
+ }
+ if (!test_unpriv_remount_atime(MS_STRICTATIME,
+ MS_NOATIME))
+ {
+ die("MS_STRICTATIME malfunctions\n");
+ }
+ if (!test_unpriv_remount_atime(MS_NOATIME,
+ MS_STRICTATIME))
+ {
+ die("MS_NOATIME malfunctions\n");
+ }
+ if (!test_unpriv_remount_atime(MS_RELATIME|MS_NODIRATIME,
+ MS_NOATIME))
+ {
+ die("MS_RELATIME|MS_NODIRATIME malfunctions\n");
+ }
+ if (!test_unpriv_remount_atime(MS_STRICTATIME|MS_NODIRATIME,
+ MS_NOATIME))
+ {
+ die("MS_STRICTATIME|MS_NODIRATIME malfunctions\n");
+ }
+ if (!test_unpriv_remount_atime(MS_NOATIME|MS_NODIRATIME,
+ MS_STRICTATIME))
+ {
+ die("MS_NOATIME|MS_DIRATIME malfunctions\n");
+ }
+ if (!test_unpriv_remount("ramfs", NULL, MS_STRICTATIME, 0, MS_NOATIME))
+ {
+ die("Default atime malfunctions\n");
+ }
+ if (!test_priv_mount_unpriv_remount()) {
+ die("Mount flags unexpectedly changed after remount\n");
+ }
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/mqueue/.gitignore b/tools/testing/selftests/mqueue/.gitignore
new file mode 100644
index 000000000..d8d423772
--- /dev/null
+++ b/tools/testing/selftests/mqueue/.gitignore
@@ -0,0 +1,2 @@
+mq_open_tests
+mq_perf_tests
diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile
new file mode 100644
index 000000000..8a58055fc
--- /dev/null
+++ b/tools/testing/selftests/mqueue/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O2
+LDLIBS = -lrt -lpthread -lpopt
+
+TEST_GEN_PROGS := mq_open_tests mq_perf_tests
+
+include ../lib.mk
diff --git a/tools/testing/selftests/mqueue/mq_open_tests.c b/tools/testing/selftests/mqueue/mq_open_tests.c
new file mode 100644
index 000000000..9403ac01b
--- /dev/null
+++ b/tools/testing/selftests/mqueue/mq_open_tests.c
@@ -0,0 +1,502 @@
+/*
+ * This application is Copyright 2012 Red Hat, Inc.
+ * Doug Ledford <dledford@redhat.com>
+ *
+ * mq_open_tests is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3.
+ *
+ * mq_open_tests is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * For the full text of the license, see <http://www.gnu.org/licenses/>.
+ *
+ * mq_open_tests.c
+ * Tests the various situations that should either succeed or fail to
+ * open a posix message queue and then reports whether or not they
+ * did as they were supposed to.
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <mqueue.h>
+#include <error.h>
+
+#include "../kselftest.h"
+
+static char *usage =
+"Usage:\n"
+" %s path\n"
+"\n"
+" path Path name of the message queue to create\n"
+"\n"
+" Note: this program must be run as root in order to enable all tests\n"
+"\n";
+
+char *DEF_MSGS = "/proc/sys/fs/mqueue/msg_default";
+char *DEF_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_default";
+char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
+char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
+
+int default_settings;
+struct rlimit saved_limits, cur_limits;
+int saved_def_msgs, saved_def_msgsize, saved_max_msgs, saved_max_msgsize;
+int cur_def_msgs, cur_def_msgsize, cur_max_msgs, cur_max_msgsize;
+FILE *def_msgs, *def_msgsize, *max_msgs, *max_msgsize;
+char *queue_path;
+char *default_queue_path = "/test1";
+mqd_t queue = -1;
+
+static inline void __set(FILE *stream, int value, char *err_msg);
+void shutdown(int exit_val, char *err_cause, int line_no);
+static inline int get(FILE *stream);
+static inline void set(FILE *stream, int value);
+static inline void getr(int type, struct rlimit *rlim);
+static inline void setr(int type, struct rlimit *rlim);
+void validate_current_settings();
+static inline void test_queue(struct mq_attr *attr, struct mq_attr *result);
+static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result);
+
+static inline void __set(FILE *stream, int value, char *err_msg)
+{
+ rewind(stream);
+ if (fprintf(stream, "%d", value) < 0)
+ perror(err_msg);
+}
+
+
+void shutdown(int exit_val, char *err_cause, int line_no)
+{
+ static int in_shutdown = 0;
+
+ /* In case we get called recursively by a set() call below */
+ if (in_shutdown++)
+ return;
+
+ if (seteuid(0) == -1)
+ perror("seteuid() failed");
+
+ if (queue != -1)
+ if (mq_close(queue))
+ perror("mq_close() during shutdown");
+ if (queue_path)
+ /*
+ * Be silent if this fails, if we cleaned up already it's
+ * expected to fail
+ */
+ mq_unlink(queue_path);
+ if (default_settings) {
+ if (saved_def_msgs)
+ __set(def_msgs, saved_def_msgs,
+ "failed to restore saved_def_msgs");
+ if (saved_def_msgsize)
+ __set(def_msgsize, saved_def_msgsize,
+ "failed to restore saved_def_msgsize");
+ }
+ if (saved_max_msgs)
+ __set(max_msgs, saved_max_msgs,
+ "failed to restore saved_max_msgs");
+ if (saved_max_msgsize)
+ __set(max_msgsize, saved_max_msgsize,
+ "failed to restore saved_max_msgsize");
+ if (exit_val)
+ error(exit_val, errno, "%s at %d", err_cause, line_no);
+ exit(0);
+}
+
+static inline int get(FILE *stream)
+{
+ int value;
+ rewind(stream);
+ if (fscanf(stream, "%d", &value) != 1)
+ shutdown(4, "Error reading /proc entry", __LINE__ - 1);
+ return value;
+}
+
+static inline void set(FILE *stream, int value)
+{
+ int new_value;
+
+ rewind(stream);
+ if (fprintf(stream, "%d", value) < 0)
+ return shutdown(5, "Failed writing to /proc file",
+ __LINE__ - 1);
+ new_value = get(stream);
+ if (new_value != value)
+ return shutdown(5, "We didn't get what we wrote to /proc back",
+ __LINE__ - 1);
+}
+
+static inline void getr(int type, struct rlimit *rlim)
+{
+ if (getrlimit(type, rlim))
+ shutdown(6, "getrlimit()", __LINE__ - 1);
+}
+
+static inline void setr(int type, struct rlimit *rlim)
+{
+ if (setrlimit(type, rlim))
+ shutdown(7, "setrlimit()", __LINE__ - 1);
+}
+
+void validate_current_settings()
+{
+ int rlim_needed;
+
+ if (cur_limits.rlim_cur < 4096) {
+ printf("Current rlimit value for POSIX message queue bytes is "
+ "unreasonably low,\nincreasing.\n\n");
+ cur_limits.rlim_cur = 8192;
+ cur_limits.rlim_max = 16384;
+ setr(RLIMIT_MSGQUEUE, &cur_limits);
+ }
+
+ if (default_settings) {
+ rlim_needed = (cur_def_msgs + 1) * (cur_def_msgsize + 1 +
+ 2 * sizeof(void *));
+ if (rlim_needed > cur_limits.rlim_cur) {
+ printf("Temporarily lowering default queue parameters "
+ "to something that will work\n"
+ "with the current rlimit values.\n\n");
+ set(def_msgs, 10);
+ cur_def_msgs = 10;
+ set(def_msgsize, 128);
+ cur_def_msgsize = 128;
+ }
+ } else {
+ rlim_needed = (cur_max_msgs + 1) * (cur_max_msgsize + 1 +
+ 2 * sizeof(void *));
+ if (rlim_needed > cur_limits.rlim_cur) {
+ printf("Temporarily lowering maximum queue parameters "
+ "to something that will work\n"
+ "with the current rlimit values in case this is "
+ "a kernel that ties the default\n"
+ "queue parameters to the maximum queue "
+ "parameters.\n\n");
+ set(max_msgs, 10);
+ cur_max_msgs = 10;
+ set(max_msgsize, 128);
+ cur_max_msgsize = 128;
+ }
+ }
+}
+
+/*
+ * test_queue - Test opening a queue, shutdown if we fail. This should
+ * only be called in situations that should never fail. We clean up
+ * after ourselves and return the queue attributes in *result.
+ */
+static inline void test_queue(struct mq_attr *attr, struct mq_attr *result)
+{
+ int flags = O_RDWR | O_EXCL | O_CREAT;
+ int perms = DEFFILEMODE;
+
+ if ((queue = mq_open(queue_path, flags, perms, attr)) == -1)
+ shutdown(1, "mq_open()", __LINE__);
+ if (mq_getattr(queue, result))
+ shutdown(1, "mq_getattr()", __LINE__);
+ if (mq_close(queue))
+ shutdown(1, "mq_close()", __LINE__);
+ queue = -1;
+ if (mq_unlink(queue_path))
+ shutdown(1, "mq_unlink()", __LINE__);
+}
+
+/*
+ * Same as test_queue above, but failure is not fatal.
+ * Returns:
+ * 0 - Failed to create a queue
+ * 1 - Created a queue, attributes in *result
+ */
+static inline int test_queue_fail(struct mq_attr *attr, struct mq_attr *result)
+{
+ int flags = O_RDWR | O_EXCL | O_CREAT;
+ int perms = DEFFILEMODE;
+
+ if ((queue = mq_open(queue_path, flags, perms, attr)) == -1)
+ return 0;
+ if (mq_getattr(queue, result))
+ shutdown(1, "mq_getattr()", __LINE__);
+ if (mq_close(queue))
+ shutdown(1, "mq_close()", __LINE__);
+ queue = -1;
+ if (mq_unlink(queue_path))
+ shutdown(1, "mq_unlink()", __LINE__);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ struct mq_attr attr, result;
+
+ if (argc != 2) {
+ printf("Using Default queue path - %s\n", default_queue_path);
+ queue_path = default_queue_path;
+ } else {
+
+ /*
+ * Although we can create a msg queue with a non-absolute path name,
+ * unlink will fail. So, if the name doesn't start with a /, add one
+ * when we save it.
+ */
+ if (*argv[1] == '/')
+ queue_path = strdup(argv[1]);
+ else {
+ queue_path = malloc(strlen(argv[1]) + 2);
+ if (!queue_path) {
+ perror("malloc()");
+ exit(1);
+ }
+ queue_path[0] = '/';
+ queue_path[1] = 0;
+ strcat(queue_path, argv[1]);
+ }
+ }
+
+ if (getuid() != 0)
+ ksft_exit_skip("Not running as root, but almost all tests "
+ "require root in order to modify\nsystem settings. "
+ "Exiting.\n");
+
+ /* Find out what files there are for us to make tweaks in */
+ def_msgs = fopen(DEF_MSGS, "r+");
+ def_msgsize = fopen(DEF_MSGSIZE, "r+");
+ max_msgs = fopen(MAX_MSGS, "r+");
+ max_msgsize = fopen(MAX_MSGSIZE, "r+");
+
+ if (!max_msgs)
+ shutdown(2, "Failed to open msg_max", __LINE__);
+ if (!max_msgsize)
+ shutdown(2, "Failed to open msgsize_max", __LINE__);
+ if (def_msgs || def_msgsize)
+ default_settings = 1;
+
+ /* Load up the current system values for everything we can */
+ getr(RLIMIT_MSGQUEUE, &saved_limits);
+ cur_limits = saved_limits;
+ if (default_settings) {
+ saved_def_msgs = cur_def_msgs = get(def_msgs);
+ saved_def_msgsize = cur_def_msgsize = get(def_msgsize);
+ }
+ saved_max_msgs = cur_max_msgs = get(max_msgs);
+ saved_max_msgsize = cur_max_msgsize = get(max_msgsize);
+
+ /* Tell the user our initial state */
+ printf("\nInitial system state:\n");
+ printf("\tUsing queue path:\t\t%s\n", queue_path);
+ printf("\tRLIMIT_MSGQUEUE(soft):\t\t%ld\n",
+ (long) saved_limits.rlim_cur);
+ printf("\tRLIMIT_MSGQUEUE(hard):\t\t%ld\n",
+ (long) saved_limits.rlim_max);
+ printf("\tMaximum Message Size:\t\t%d\n", saved_max_msgsize);
+ printf("\tMaximum Queue Size:\t\t%d\n", saved_max_msgs);
+ if (default_settings) {
+ printf("\tDefault Message Size:\t\t%d\n", saved_def_msgsize);
+ printf("\tDefault Queue Size:\t\t%d\n", saved_def_msgs);
+ } else {
+ printf("\tDefault Message Size:\t\tNot Supported\n");
+ printf("\tDefault Queue Size:\t\tNot Supported\n");
+ }
+ printf("\n");
+
+ validate_current_settings();
+
+ printf("Adjusted system state for testing:\n");
+ printf("\tRLIMIT_MSGQUEUE(soft):\t\t%ld\n", (long) cur_limits.rlim_cur);
+ printf("\tRLIMIT_MSGQUEUE(hard):\t\t%ld\n", (long) cur_limits.rlim_max);
+ printf("\tMaximum Message Size:\t\t%d\n", cur_max_msgsize);
+ printf("\tMaximum Queue Size:\t\t%d\n", cur_max_msgs);
+ if (default_settings) {
+ printf("\tDefault Message Size:\t\t%d\n", cur_def_msgsize);
+ printf("\tDefault Queue Size:\t\t%d\n", cur_def_msgs);
+ }
+
+ printf("\n\nTest series 1, behavior when no attr struct "
+ "passed to mq_open:\n");
+ if (!default_settings) {
+ test_queue(NULL, &result);
+ printf("Given sane system settings, mq_open without an attr "
+ "struct succeeds:\tPASS\n");
+ if (result.mq_maxmsg != cur_max_msgs ||
+ result.mq_msgsize != cur_max_msgsize) {
+ printf("Kernel does not support setting the default "
+ "mq attributes,\nbut also doesn't tie the "
+ "defaults to the maximums:\t\t\tPASS\n");
+ } else {
+ set(max_msgs, ++cur_max_msgs);
+ set(max_msgsize, ++cur_max_msgsize);
+ test_queue(NULL, &result);
+ if (result.mq_maxmsg == cur_max_msgs &&
+ result.mq_msgsize == cur_max_msgsize)
+ printf("Kernel does not support setting the "
+ "default mq attributes and\n"
+ "also ties system wide defaults to "
+ "the system wide maximums:\t\t"
+ "FAIL\n");
+ else
+ printf("Kernel does not support setting the "
+ "default mq attributes,\n"
+ "but also doesn't tie the defaults to "
+ "the maximums:\t\t\tPASS\n");
+ }
+ } else {
+ printf("Kernel supports setting defaults separately from "
+ "maximums:\t\tPASS\n");
+ /*
+ * While we are here, go ahead and test that the kernel
+ * properly follows the default settings
+ */
+ test_queue(NULL, &result);
+ printf("Given sane values, mq_open without an attr struct "
+ "succeeds:\t\tPASS\n");
+ if (result.mq_maxmsg != cur_def_msgs ||
+ result.mq_msgsize != cur_def_msgsize)
+ printf("Kernel supports setting defaults, but does "
+ "not actually honor them:\tFAIL\n\n");
+ else {
+ set(def_msgs, ++cur_def_msgs);
+ set(def_msgsize, ++cur_def_msgsize);
+ /* In case max was the same as the default */
+ set(max_msgs, ++cur_max_msgs);
+ set(max_msgsize, ++cur_max_msgsize);
+ test_queue(NULL, &result);
+ if (result.mq_maxmsg != cur_def_msgs ||
+ result.mq_msgsize != cur_def_msgsize)
+ printf("Kernel supports setting defaults, but "
+ "does not actually honor them:\t"
+ "FAIL\n");
+ else
+ printf("Kernel properly honors default setting "
+ "knobs:\t\t\t\tPASS\n");
+ }
+ set(def_msgs, cur_max_msgs + 1);
+ cur_def_msgs = cur_max_msgs + 1;
+ set(def_msgsize, cur_max_msgsize + 1);
+ cur_def_msgsize = cur_max_msgsize + 1;
+ if (cur_def_msgs * (cur_def_msgsize + 2 * sizeof(void *)) >=
+ cur_limits.rlim_cur) {
+ cur_limits.rlim_cur = (cur_def_msgs + 2) *
+ (cur_def_msgsize + 2 * sizeof(void *));
+ cur_limits.rlim_max = 2 * cur_limits.rlim_cur;
+ setr(RLIMIT_MSGQUEUE, &cur_limits);
+ }
+ if (test_queue_fail(NULL, &result)) {
+ if (result.mq_maxmsg == cur_max_msgs &&
+ result.mq_msgsize == cur_max_msgsize)
+ printf("Kernel properly limits default values "
+ "to lesser of default/max:\t\tPASS\n");
+ else
+ printf("Kernel does not properly set default "
+ "queue parameters when\ndefaults > "
+ "max:\t\t\t\t\t\t\t\tFAIL\n");
+ } else
+ printf("Kernel fails to open mq because defaults are "
+ "greater than maximums:\tFAIL\n");
+ set(def_msgs, --cur_def_msgs);
+ set(def_msgsize, --cur_def_msgsize);
+ cur_limits.rlim_cur = cur_limits.rlim_max = cur_def_msgs *
+ cur_def_msgsize;
+ setr(RLIMIT_MSGQUEUE, &cur_limits);
+ if (test_queue_fail(NULL, &result))
+ printf("Kernel creates queue even though defaults "
+ "would exceed\nrlimit setting:"
+ "\t\t\t\t\t\t\t\tFAIL\n");
+ else
+ printf("Kernel properly fails to create queue when "
+ "defaults would\nexceed rlimit:"
+ "\t\t\t\t\t\t\t\tPASS\n");
+ }
+
+ /*
+ * Test #2 - open with an attr struct that exceeds rlimit
+ */
+ printf("\n\nTest series 2, behavior when attr struct is "
+ "passed to mq_open:\n");
+ cur_max_msgs = 32;
+ cur_max_msgsize = cur_limits.rlim_max >> 4;
+ set(max_msgs, cur_max_msgs);
+ set(max_msgsize, cur_max_msgsize);
+ attr.mq_maxmsg = cur_max_msgs;
+ attr.mq_msgsize = cur_max_msgsize;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open in excess of rlimit max when euid = 0 "
+ "succeeded:\t\tFAIL\n");
+ else
+ printf("Queue open in excess of rlimit max when euid = 0 "
+ "failed:\t\tPASS\n");
+ attr.mq_maxmsg = cur_max_msgs + 1;
+ attr.mq_msgsize = 10;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open with mq_maxmsg > limit when euid = 0 "
+ "succeeded:\t\tPASS\n");
+ else
+ printf("Queue open with mq_maxmsg > limit when euid = 0 "
+ "failed:\t\tFAIL\n");
+ attr.mq_maxmsg = 1;
+ attr.mq_msgsize = cur_max_msgsize + 1;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open with mq_msgsize > limit when euid = 0 "
+ "succeeded:\t\tPASS\n");
+ else
+ printf("Queue open with mq_msgsize > limit when euid = 0 "
+ "failed:\t\tFAIL\n");
+ attr.mq_maxmsg = 65536;
+ attr.mq_msgsize = 65536;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open with total size > 2GB when euid = 0 "
+ "succeeded:\t\tFAIL\n");
+ else
+ printf("Queue open with total size > 2GB when euid = 0 "
+ "failed:\t\t\tPASS\n");
+
+ if (seteuid(99) == -1) {
+ perror("seteuid() failed");
+ exit(1);
+ }
+
+ attr.mq_maxmsg = cur_max_msgs;
+ attr.mq_msgsize = cur_max_msgsize;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open in excess of rlimit max when euid = 99 "
+ "succeeded:\t\tFAIL\n");
+ else
+ printf("Queue open in excess of rlimit max when euid = 99 "
+ "failed:\t\tPASS\n");
+ attr.mq_maxmsg = cur_max_msgs + 1;
+ attr.mq_msgsize = 10;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open with mq_maxmsg > limit when euid = 99 "
+ "succeeded:\t\tFAIL\n");
+ else
+ printf("Queue open with mq_maxmsg > limit when euid = 99 "
+ "failed:\t\tPASS\n");
+ attr.mq_maxmsg = 1;
+ attr.mq_msgsize = cur_max_msgsize + 1;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open with mq_msgsize > limit when euid = 99 "
+ "succeeded:\t\tFAIL\n");
+ else
+ printf("Queue open with mq_msgsize > limit when euid = 99 "
+ "failed:\t\tPASS\n");
+ attr.mq_maxmsg = 65536;
+ attr.mq_msgsize = 65536;
+ if (test_queue_fail(&attr, &result))
+ printf("Queue open with total size > 2GB when euid = 99 "
+ "succeeded:\t\tFAIL\n");
+ else
+ printf("Queue open with total size > 2GB when euid = 99 "
+ "failed:\t\t\tPASS\n");
+
+ shutdown(0,"",0);
+}
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
new file mode 100644
index 000000000..84fda3b49
--- /dev/null
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -0,0 +1,752 @@
+/*
+ * This application is Copyright 2012 Red Hat, Inc.
+ * Doug Ledford <dledford@redhat.com>
+ *
+ * mq_perf_tests is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, version 3.
+ *
+ * mq_perf_tests is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * For the full text of the license, see <http://www.gnu.org/licenses/>.
+ *
+ * mq_perf_tests.c
+ * Tests various types of message queue workloads, concentrating on those
+ * situations that invole large message sizes, large message queue depths,
+ * or both, and reports back useful metrics about kernel message queue
+ * performance.
+ *
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <mqueue.h>
+#include <popt.h>
+#include <error.h>
+
+#include "../kselftest.h"
+
+static char *usage =
+"Usage:\n"
+" %s [-c #[,#..] -f] path\n"
+"\n"
+" -c # Skip most tests and go straight to a high queue depth test\n"
+" and then run that test continuously (useful for running at\n"
+" the same time as some other workload to see how much the\n"
+" cache thrashing caused by adding messages to a very deep\n"
+" queue impacts the performance of other programs). The number\n"
+" indicates which CPU core we should bind the process to during\n"
+" the run. If you have more than one physical CPU, then you\n"
+" will need one copy per physical CPU package, and you should\n"
+" specify the CPU cores to pin ourself to via a comma separated\n"
+" list of CPU values.\n"
+" -f Only usable with continuous mode. Pin ourself to the CPUs\n"
+" as requested, then instead of looping doing a high mq\n"
+" workload, just busy loop. This will allow us to lock up a\n"
+" single CPU just like we normally would, but without actually\n"
+" thrashing the CPU cache. This is to make it easier to get\n"
+" comparable numbers from some other workload running on the\n"
+" other CPUs. One set of numbers with # CPUs locked up running\n"
+" an mq workload, and another set of numbers with those same\n"
+" CPUs locked away from the test workload, but not doing\n"
+" anything to trash the cache like the mq workload might.\n"
+" path Path name of the message queue to create\n"
+"\n"
+" Note: this program must be run as root in order to enable all tests\n"
+"\n";
+
+char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
+char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define MAX_CPUS 64
+char *cpu_option_string;
+int cpus_to_pin[MAX_CPUS];
+int num_cpus_to_pin;
+pthread_t cpu_threads[MAX_CPUS];
+pthread_t main_thread;
+cpu_set_t *cpu_set;
+int cpu_set_size;
+int cpus_online;
+
+#define MSG_SIZE 16
+#define TEST1_LOOPS 10000000
+#define TEST2_LOOPS 100000
+int continuous_mode;
+int continuous_mode_fake;
+
+struct rlimit saved_limits, cur_limits;
+int saved_max_msgs, saved_max_msgsize;
+int cur_max_msgs, cur_max_msgsize;
+FILE *max_msgs, *max_msgsize;
+int cur_nice;
+char *queue_path = "/mq_perf_tests";
+mqd_t queue = -1;
+struct mq_attr result;
+int mq_prio_max;
+
+const struct poptOption options[] = {
+ {
+ .longName = "continuous",
+ .shortName = 'c',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &cpu_option_string,
+ .val = 'c',
+ .descrip = "Run continuous tests at a high queue depth in "
+ "order to test the effects of cache thrashing on "
+ "other tasks on the system. This test is intended "
+ "to be run on one core of each physical CPU while "
+ "some other CPU intensive task is run on all the other "
+ "cores of that same physical CPU and the other task "
+ "is timed. It is assumed that the process of adding "
+ "messages to the message queue in a tight loop will "
+ "impact that other task to some degree. Once the "
+ "tests are performed in this way, you should then "
+ "re-run the tests using fake mode in order to check "
+ "the difference in time required to perform the CPU "
+ "intensive task",
+ .argDescrip = "cpu[,cpu]",
+ },
+ {
+ .longName = "fake",
+ .shortName = 'f',
+ .argInfo = POPT_ARG_NONE,
+ .arg = &continuous_mode_fake,
+ .val = 0,
+ .descrip = "Tie up the CPUs that we would normally tie up in"
+ "continuous mode, but don't actually do any mq stuff, "
+ "just keep the CPU busy so it can't be used to process "
+ "system level tasks as this would free up resources on "
+ "the other CPU cores and skew the comparison between "
+ "the no-mqueue work and mqueue work tests",
+ .argDescrip = NULL,
+ },
+ {
+ .longName = "path",
+ .shortName = 'p',
+ .argInfo = POPT_ARG_STRING | POPT_ARGFLAG_SHOW_DEFAULT,
+ .arg = &queue_path,
+ .val = 'p',
+ .descrip = "The name of the path to use in the mqueue "
+ "filesystem for our tests",
+ .argDescrip = "pathname",
+ },
+ POPT_AUTOHELP
+ POPT_TABLEEND
+};
+
+static inline void __set(FILE *stream, int value, char *err_msg);
+void shutdown(int exit_val, char *err_cause, int line_no);
+void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context);
+void sig_action(int signum, siginfo_t *info, void *context);
+static inline int get(FILE *stream);
+static inline void set(FILE *stream, int value);
+static inline int try_set(FILE *stream, int value);
+static inline void getr(int type, struct rlimit *rlim);
+static inline void setr(int type, struct rlimit *rlim);
+static inline void open_queue(struct mq_attr *attr);
+void increase_limits(void);
+
+static inline void __set(FILE *stream, int value, char *err_msg)
+{
+ rewind(stream);
+ if (fprintf(stream, "%d", value) < 0)
+ perror(err_msg);
+}
+
+
+void shutdown(int exit_val, char *err_cause, int line_no)
+{
+ static int in_shutdown = 0;
+ int errno_at_shutdown = errno;
+ int i;
+
+ /* In case we get called by multiple threads or from an sighandler */
+ if (in_shutdown++)
+ return;
+
+ /* Free the cpu_set allocated using CPU_ALLOC in main function */
+ CPU_FREE(cpu_set);
+
+ for (i = 0; i < num_cpus_to_pin; i++)
+ if (cpu_threads[i]) {
+ pthread_kill(cpu_threads[i], SIGUSR1);
+ pthread_join(cpu_threads[i], NULL);
+ }
+
+ if (queue != -1)
+ if (mq_close(queue))
+ perror("mq_close() during shutdown");
+ if (queue_path)
+ /*
+ * Be silent if this fails, if we cleaned up already it's
+ * expected to fail
+ */
+ mq_unlink(queue_path);
+ if (saved_max_msgs)
+ __set(max_msgs, saved_max_msgs,
+ "failed to restore saved_max_msgs");
+ if (saved_max_msgsize)
+ __set(max_msgsize, saved_max_msgsize,
+ "failed to restore saved_max_msgsize");
+ if (exit_val)
+ error(exit_val, errno_at_shutdown, "%s at %d",
+ err_cause, line_no);
+ exit(0);
+}
+
+void sig_action_SIGUSR1(int signum, siginfo_t *info, void *context)
+{
+ if (pthread_self() != main_thread)
+ pthread_exit(0);
+ else {
+ fprintf(stderr, "Caught signal %d in SIGUSR1 handler, "
+ "exiting\n", signum);
+ shutdown(0, "", 0);
+ fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n");
+ exit(0);
+ }
+}
+
+void sig_action(int signum, siginfo_t *info, void *context)
+{
+ if (pthread_self() != main_thread)
+ pthread_kill(main_thread, signum);
+ else {
+ fprintf(stderr, "Caught signal %d, exiting\n", signum);
+ shutdown(0, "", 0);
+ fprintf(stderr, "\n\nReturned from shutdown?!?!\n\n");
+ exit(0);
+ }
+}
+
+static inline int get(FILE *stream)
+{
+ int value;
+ rewind(stream);
+ if (fscanf(stream, "%d", &value) != 1)
+ shutdown(4, "Error reading /proc entry", __LINE__);
+ return value;
+}
+
+static inline void set(FILE *stream, int value)
+{
+ int new_value;
+
+ rewind(stream);
+ if (fprintf(stream, "%d", value) < 0)
+ return shutdown(5, "Failed writing to /proc file", __LINE__);
+ new_value = get(stream);
+ if (new_value != value)
+ return shutdown(5, "We didn't get what we wrote to /proc back",
+ __LINE__);
+}
+
+static inline int try_set(FILE *stream, int value)
+{
+ int new_value;
+
+ rewind(stream);
+ fprintf(stream, "%d", value);
+ new_value = get(stream);
+ return new_value == value;
+}
+
+static inline void getr(int type, struct rlimit *rlim)
+{
+ if (getrlimit(type, rlim))
+ shutdown(6, "getrlimit()", __LINE__);
+}
+
+static inline void setr(int type, struct rlimit *rlim)
+{
+ if (setrlimit(type, rlim))
+ shutdown(7, "setrlimit()", __LINE__);
+}
+
+/**
+ * open_queue - open the global queue for testing
+ * @attr - An attr struct specifying the desired queue traits
+ * @result - An attr struct that lists the actual traits the queue has
+ *
+ * This open is not allowed to fail, failure will result in an orderly
+ * shutdown of the program. The global queue_path is used to set what
+ * queue to open, the queue descriptor is saved in the global queue
+ * variable.
+ */
+static inline void open_queue(struct mq_attr *attr)
+{
+ int flags = O_RDWR | O_EXCL | O_CREAT | O_NONBLOCK;
+ int perms = DEFFILEMODE;
+
+ queue = mq_open(queue_path, flags, perms, attr);
+ if (queue == -1)
+ shutdown(1, "mq_open()", __LINE__);
+ if (mq_getattr(queue, &result))
+ shutdown(1, "mq_getattr()", __LINE__);
+ printf("\n\tQueue %s created:\n", queue_path);
+ printf("\t\tmq_flags:\t\t\t%s\n", result.mq_flags & O_NONBLOCK ?
+ "O_NONBLOCK" : "(null)");
+ printf("\t\tmq_maxmsg:\t\t\t%lu\n", result.mq_maxmsg);
+ printf("\t\tmq_msgsize:\t\t\t%lu\n", result.mq_msgsize);
+ printf("\t\tmq_curmsgs:\t\t\t%lu\n", result.mq_curmsgs);
+}
+
+void *fake_cont_thread(void *arg)
+{
+ int i;
+
+ for (i = 0; i < num_cpus_to_pin; i++)
+ if (cpu_threads[i] == pthread_self())
+ break;
+ printf("\tStarted fake continuous mode thread %d on CPU %d\n", i,
+ cpus_to_pin[i]);
+ while (1)
+ ;
+}
+
+void *cont_thread(void *arg)
+{
+ char buff[MSG_SIZE];
+ int i, priority;
+
+ for (i = 0; i < num_cpus_to_pin; i++)
+ if (cpu_threads[i] == pthread_self())
+ break;
+ printf("\tStarted continuous mode thread %d on CPU %d\n", i,
+ cpus_to_pin[i]);
+ while (1) {
+ while (mq_send(queue, buff, sizeof(buff), 0) == 0)
+ ;
+ mq_receive(queue, buff, sizeof(buff), &priority);
+ }
+}
+
+#define drain_queue() \
+ while (mq_receive(queue, buff, MSG_SIZE, &prio_in) == MSG_SIZE)
+
+#define do_untimed_send() \
+ do { \
+ if (mq_send(queue, buff, MSG_SIZE, prio_out)) \
+ shutdown(3, "Test send failure", __LINE__); \
+ } while (0)
+
+#define do_send_recv() \
+ do { \
+ clock_gettime(clock, &start); \
+ if (mq_send(queue, buff, MSG_SIZE, prio_out)) \
+ shutdown(3, "Test send failure", __LINE__); \
+ clock_gettime(clock, &middle); \
+ if (mq_receive(queue, buff, MSG_SIZE, &prio_in) != MSG_SIZE) \
+ shutdown(3, "Test receive failure", __LINE__); \
+ clock_gettime(clock, &end); \
+ nsec = ((middle.tv_sec - start.tv_sec) * 1000000000) + \
+ (middle.tv_nsec - start.tv_nsec); \
+ send_total.tv_nsec += nsec; \
+ if (send_total.tv_nsec >= 1000000000) { \
+ send_total.tv_sec++; \
+ send_total.tv_nsec -= 1000000000; \
+ } \
+ nsec = ((end.tv_sec - middle.tv_sec) * 1000000000) + \
+ (end.tv_nsec - middle.tv_nsec); \
+ recv_total.tv_nsec += nsec; \
+ if (recv_total.tv_nsec >= 1000000000) { \
+ recv_total.tv_sec++; \
+ recv_total.tv_nsec -= 1000000000; \
+ } \
+ } while (0)
+
+struct test {
+ char *desc;
+ void (*func)(int *);
+};
+
+void const_prio(int *prio)
+{
+ return;
+}
+
+void inc_prio(int *prio)
+{
+ if (++*prio == mq_prio_max)
+ *prio = 0;
+}
+
+void dec_prio(int *prio)
+{
+ if (--*prio < 0)
+ *prio = mq_prio_max - 1;
+}
+
+void random_prio(int *prio)
+{
+ *prio = random() % mq_prio_max;
+}
+
+struct test test2[] = {
+ {"\n\tTest #2a: Time send/recv message, queue full, constant prio\n",
+ const_prio},
+ {"\n\tTest #2b: Time send/recv message, queue full, increasing prio\n",
+ inc_prio},
+ {"\n\tTest #2c: Time send/recv message, queue full, decreasing prio\n",
+ dec_prio},
+ {"\n\tTest #2d: Time send/recv message, queue full, random prio\n",
+ random_prio},
+ {NULL, NULL}
+};
+
+/**
+ * Tests to perform (all done with MSG_SIZE messages):
+ *
+ * 1) Time to add/remove message with 0 messages on queue
+ * 1a) with constant prio
+ * 2) Time to add/remove message when queue close to capacity:
+ * 2a) with constant prio
+ * 2b) with increasing prio
+ * 2c) with decreasing prio
+ * 2d) with random prio
+ * 3) Test limits of priorities honored (double check _SC_MQ_PRIO_MAX)
+ */
+void *perf_test_thread(void *arg)
+{
+ char buff[MSG_SIZE];
+ int prio_out, prio_in;
+ int i;
+ clockid_t clock;
+ pthread_t *t;
+ struct timespec res, start, middle, end, send_total, recv_total;
+ unsigned long long nsec;
+ struct test *cur_test;
+
+ t = &cpu_threads[0];
+ printf("\n\tStarted mqueue performance test thread on CPU %d\n",
+ cpus_to_pin[0]);
+ mq_prio_max = sysconf(_SC_MQ_PRIO_MAX);
+ if (mq_prio_max == -1)
+ shutdown(2, "sysconf(_SC_MQ_PRIO_MAX)", __LINE__);
+ if (pthread_getcpuclockid(cpu_threads[0], &clock) != 0)
+ shutdown(2, "pthread_getcpuclockid", __LINE__);
+
+ if (clock_getres(clock, &res))
+ shutdown(2, "clock_getres()", __LINE__);
+
+ printf("\t\tMax priorities:\t\t\t%d\n", mq_prio_max);
+ printf("\t\tClock resolution:\t\t%lu nsec%s\n", res.tv_nsec,
+ res.tv_nsec > 1 ? "s" : "");
+
+
+
+ printf("\n\tTest #1: Time send/recv message, queue empty\n");
+ printf("\t\t(%d iterations)\n", TEST1_LOOPS);
+ prio_out = 0;
+ send_total.tv_sec = 0;
+ send_total.tv_nsec = 0;
+ recv_total.tv_sec = 0;
+ recv_total.tv_nsec = 0;
+ for (i = 0; i < TEST1_LOOPS; i++)
+ do_send_recv();
+ printf("\t\tSend msg:\t\t\t%ld.%lus total time\n",
+ send_total.tv_sec, send_total.tv_nsec);
+ nsec = ((unsigned long long)send_total.tv_sec * 1000000000 +
+ send_total.tv_nsec) / TEST1_LOOPS;
+ printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+ printf("\t\tRecv msg:\t\t\t%ld.%lus total time\n",
+ recv_total.tv_sec, recv_total.tv_nsec);
+ nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 +
+ recv_total.tv_nsec) / TEST1_LOOPS;
+ printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+
+
+ for (cur_test = test2; cur_test->desc != NULL; cur_test++) {
+ printf("%s:\n", cur_test->desc);
+ printf("\t\t(%d iterations)\n", TEST2_LOOPS);
+ prio_out = 0;
+ send_total.tv_sec = 0;
+ send_total.tv_nsec = 0;
+ recv_total.tv_sec = 0;
+ recv_total.tv_nsec = 0;
+ printf("\t\tFilling queue...");
+ fflush(stdout);
+ clock_gettime(clock, &start);
+ for (i = 0; i < result.mq_maxmsg - 1; i++) {
+ do_untimed_send();
+ cur_test->func(&prio_out);
+ }
+ clock_gettime(clock, &end);
+ nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) *
+ 1000000000) + (end.tv_nsec - start.tv_nsec);
+ printf("done.\t\t%lld.%llds\n", nsec / 1000000000,
+ nsec % 1000000000);
+ printf("\t\tTesting...");
+ fflush(stdout);
+ for (i = 0; i < TEST2_LOOPS; i++) {
+ do_send_recv();
+ cur_test->func(&prio_out);
+ }
+ printf("done.\n");
+ printf("\t\tSend msg:\t\t\t%ld.%lus total time\n",
+ send_total.tv_sec, send_total.tv_nsec);
+ nsec = ((unsigned long long)send_total.tv_sec * 1000000000 +
+ send_total.tv_nsec) / TEST2_LOOPS;
+ printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+ printf("\t\tRecv msg:\t\t\t%ld.%lus total time\n",
+ recv_total.tv_sec, recv_total.tv_nsec);
+ nsec = ((unsigned long long)recv_total.tv_sec * 1000000000 +
+ recv_total.tv_nsec) / TEST2_LOOPS;
+ printf("\t\t\t\t\t\t%lld nsec/msg\n", nsec);
+ printf("\t\tDraining queue...");
+ fflush(stdout);
+ clock_gettime(clock, &start);
+ drain_queue();
+ clock_gettime(clock, &end);
+ nsec = ((unsigned long long)(end.tv_sec - start.tv_sec) *
+ 1000000000) + (end.tv_nsec - start.tv_nsec);
+ printf("done.\t\t%lld.%llds\n", nsec / 1000000000,
+ nsec % 1000000000);
+ }
+ return 0;
+}
+
+void increase_limits(void)
+{
+ cur_limits.rlim_cur = RLIM_INFINITY;
+ cur_limits.rlim_max = RLIM_INFINITY;
+ setr(RLIMIT_MSGQUEUE, &cur_limits);
+ while (try_set(max_msgs, cur_max_msgs += 10))
+ ;
+ cur_max_msgs = get(max_msgs);
+ while (try_set(max_msgsize, cur_max_msgsize += 1024))
+ ;
+ cur_max_msgsize = get(max_msgsize);
+ if (setpriority(PRIO_PROCESS, 0, -20) != 0)
+ shutdown(2, "setpriority()", __LINE__);
+ cur_nice = -20;
+}
+
+int main(int argc, char *argv[])
+{
+ struct mq_attr attr;
+ char *option, *next_option;
+ int i, cpu, rc;
+ struct sigaction sa;
+ poptContext popt_context;
+ void *retval;
+
+ main_thread = pthread_self();
+ num_cpus_to_pin = 0;
+
+ if (sysconf(_SC_NPROCESSORS_ONLN) == -1) {
+ perror("sysconf(_SC_NPROCESSORS_ONLN)");
+ exit(1);
+ }
+
+ if (getuid() != 0)
+ ksft_exit_skip("Not running as root, but almost all tests "
+ "require root in order to modify\nsystem settings. "
+ "Exiting.\n");
+
+ cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
+ cpu_set = CPU_ALLOC(cpus_online);
+ if (cpu_set == NULL) {
+ perror("CPU_ALLOC()");
+ exit(1);
+ }
+ cpu_set_size = CPU_ALLOC_SIZE(cpus_online);
+ CPU_ZERO_S(cpu_set_size, cpu_set);
+
+ popt_context = poptGetContext(NULL, argc, (const char **)argv,
+ options, 0);
+
+ while ((rc = poptGetNextOpt(popt_context)) > 0) {
+ switch (rc) {
+ case 'c':
+ continuous_mode = 1;
+ option = cpu_option_string;
+ do {
+ next_option = strchr(option, ',');
+ if (next_option)
+ *next_option = '\0';
+ cpu = atoi(option);
+ if (cpu >= cpus_online)
+ fprintf(stderr, "CPU %d exceeds "
+ "cpus online, ignoring.\n",
+ cpu);
+ else
+ cpus_to_pin[num_cpus_to_pin++] = cpu;
+ if (next_option)
+ option = ++next_option;
+ } while (next_option && num_cpus_to_pin < MAX_CPUS);
+ /* Double check that they didn't give us the same CPU
+ * more than once */
+ for (cpu = 0; cpu < num_cpus_to_pin; cpu++) {
+ if (CPU_ISSET_S(cpus_to_pin[cpu], cpu_set_size,
+ cpu_set)) {
+ fprintf(stderr, "Any given CPU may "
+ "only be given once.\n");
+ goto err_code;
+ } else
+ CPU_SET_S(cpus_to_pin[cpu],
+ cpu_set_size, cpu_set);
+ }
+ break;
+ case 'p':
+ /*
+ * Although we can create a msg queue with a
+ * non-absolute path name, unlink will fail. So,
+ * if the name doesn't start with a /, add one
+ * when we save it.
+ */
+ option = queue_path;
+ if (*option != '/') {
+ queue_path = malloc(strlen(option) + 2);
+ if (!queue_path) {
+ perror("malloc()");
+ goto err_code;
+ }
+ queue_path[0] = '/';
+ queue_path[1] = 0;
+ strcat(queue_path, option);
+ free(option);
+ }
+ break;
+ }
+ }
+
+ if (continuous_mode && num_cpus_to_pin == 0) {
+ fprintf(stderr, "Must pass at least one CPU to continuous "
+ "mode.\n");
+ poptPrintUsage(popt_context, stderr, 0);
+ goto err_code;
+ } else if (!continuous_mode) {
+ num_cpus_to_pin = 1;
+ cpus_to_pin[0] = cpus_online - 1;
+ }
+
+ max_msgs = fopen(MAX_MSGS, "r+");
+ max_msgsize = fopen(MAX_MSGSIZE, "r+");
+ if (!max_msgs)
+ shutdown(2, "Failed to open msg_max", __LINE__);
+ if (!max_msgsize)
+ shutdown(2, "Failed to open msgsize_max", __LINE__);
+
+ /* Load up the current system values for everything we can */
+ getr(RLIMIT_MSGQUEUE, &saved_limits);
+ cur_limits = saved_limits;
+ saved_max_msgs = cur_max_msgs = get(max_msgs);
+ saved_max_msgsize = cur_max_msgsize = get(max_msgsize);
+ errno = 0;
+ cur_nice = getpriority(PRIO_PROCESS, 0);
+ if (errno)
+ shutdown(2, "getpriority()", __LINE__);
+
+ /* Tell the user our initial state */
+ printf("\nInitial system state:\n");
+ printf("\tUsing queue path:\t\t\t%s\n", queue_path);
+ printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%ld\n",
+ (long) saved_limits.rlim_cur);
+ printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%ld\n",
+ (long) saved_limits.rlim_max);
+ printf("\tMaximum Message Size:\t\t\t%d\n", saved_max_msgsize);
+ printf("\tMaximum Queue Size:\t\t\t%d\n", saved_max_msgs);
+ printf("\tNice value:\t\t\t\t%d\n", cur_nice);
+ printf("\n");
+
+ increase_limits();
+
+ printf("Adjusted system state for testing:\n");
+ if (cur_limits.rlim_cur == RLIM_INFINITY) {
+ printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t(unlimited)\n");
+ printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t(unlimited)\n");
+ } else {
+ printf("\tRLIMIT_MSGQUEUE(soft):\t\t\t%ld\n",
+ (long) cur_limits.rlim_cur);
+ printf("\tRLIMIT_MSGQUEUE(hard):\t\t\t%ld\n",
+ (long) cur_limits.rlim_max);
+ }
+ printf("\tMaximum Message Size:\t\t\t%d\n", cur_max_msgsize);
+ printf("\tMaximum Queue Size:\t\t\t%d\n", cur_max_msgs);
+ printf("\tNice value:\t\t\t\t%d\n", cur_nice);
+ printf("\tContinuous mode:\t\t\t(%s)\n", continuous_mode ?
+ (continuous_mode_fake ? "fake mode" : "enabled") :
+ "disabled");
+ printf("\tCPUs to pin:\t\t\t\t%d", cpus_to_pin[0]);
+ for (cpu = 1; cpu < num_cpus_to_pin; cpu++)
+ printf(",%d", cpus_to_pin[cpu]);
+ printf("\n");
+
+ sa.sa_sigaction = sig_action_SIGUSR1;
+ sigemptyset(&sa.sa_mask);
+ sigaddset(&sa.sa_mask, SIGHUP);
+ sigaddset(&sa.sa_mask, SIGINT);
+ sigaddset(&sa.sa_mask, SIGQUIT);
+ sigaddset(&sa.sa_mask, SIGTERM);
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGUSR1, &sa, NULL) == -1)
+ shutdown(1, "sigaction(SIGUSR1)", __LINE__);
+ sa.sa_sigaction = sig_action;
+ if (sigaction(SIGHUP, &sa, NULL) == -1)
+ shutdown(1, "sigaction(SIGHUP)", __LINE__);
+ if (sigaction(SIGINT, &sa, NULL) == -1)
+ shutdown(1, "sigaction(SIGINT)", __LINE__);
+ if (sigaction(SIGQUIT, &sa, NULL) == -1)
+ shutdown(1, "sigaction(SIGQUIT)", __LINE__);
+ if (sigaction(SIGTERM, &sa, NULL) == -1)
+ shutdown(1, "sigaction(SIGTERM)", __LINE__);
+
+ if (!continuous_mode_fake) {
+ attr.mq_flags = O_NONBLOCK;
+ attr.mq_maxmsg = cur_max_msgs;
+ attr.mq_msgsize = MSG_SIZE;
+ open_queue(&attr);
+ }
+ for (i = 0; i < num_cpus_to_pin; i++) {
+ pthread_attr_t thread_attr;
+ void *thread_func;
+
+ if (continuous_mode_fake)
+ thread_func = &fake_cont_thread;
+ else if (continuous_mode)
+ thread_func = &cont_thread;
+ else
+ thread_func = &perf_test_thread;
+
+ CPU_ZERO_S(cpu_set_size, cpu_set);
+ CPU_SET_S(cpus_to_pin[i], cpu_set_size, cpu_set);
+ pthread_attr_init(&thread_attr);
+ pthread_attr_setaffinity_np(&thread_attr, cpu_set_size,
+ cpu_set);
+ if (pthread_create(&cpu_threads[i], &thread_attr, thread_func,
+ NULL))
+ shutdown(1, "pthread_create()", __LINE__);
+ pthread_attr_destroy(&thread_attr);
+ }
+
+ if (!continuous_mode) {
+ pthread_join(cpu_threads[0], &retval);
+ shutdown((long)retval, "perf_test_thread()", __LINE__);
+ } else {
+ while (1)
+ sleep(1);
+ }
+ shutdown(0, "", 0);
+
+err_code:
+ CPU_FREE(cpu_set);
+ exit(1);
+
+}
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
new file mode 100644
index 000000000..78b24cf76
--- /dev/null
+++ b/tools/testing/selftests/net/.gitignore
@@ -0,0 +1,16 @@
+msg_zerocopy
+socket
+psock_fanout
+psock_snd
+psock_tpacket
+reuseport_bpf
+reuseport_bpf_cpu
+reuseport_bpf_numa
+reuseport_dualstack
+reuseaddr_conflict
+tcp_mmap
+udpgso
+udpgso_bench_rx
+udpgso_bench_tx
+tcp_inq
+tls
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
new file mode 100644
index 000000000..9a3764a10
--- /dev/null
+++ b/tools/testing/selftests/net/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for net selftests
+
+CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -I../../../../usr/include/
+
+TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
+TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
+TEST_PROGS_EXTENDED := in_netns.sh
+TEST_GEN_FILES = socket
+TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd
+TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
+TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+
+KSFT_KHDR_INSTALL := 1
+include ../lib.mk
+
+$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
+$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread
+$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
new file mode 100644
index 000000000..cd3a2f154
--- /dev/null
+++ b/tools/testing/selftests/net/config
@@ -0,0 +1,16 @@
+CONFIG_USER_NS=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_TEST_BPF=m
+CONFIG_NUMA=y
+CONFIG_NET_VRF=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_VETH=y
+CONFIG_INET_XFRM_MODE_TUNNEL=y
+CONFIG_NET_IPVTI=y
+CONFIG_INET6_XFRM_MODE_TUNNEL=y
+CONFIG_IPV6_VTI=y
+CONFIG_DUMMY=y
+CONFIG_BRIDGE=y
+CONFIG_VLAN_8021Q=y
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
new file mode 100755
index 000000000..864f865ee
--- /dev/null
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -0,0 +1,467 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# IPv4 and IPv6 onlink tests
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# Network interfaces
+# - odd in current namespace; even in peer ns
+declare -A NETIFS
+# default VRF
+NETIFS[p1]=veth1
+NETIFS[p2]=veth2
+NETIFS[p3]=veth3
+NETIFS[p4]=veth4
+# VRF
+NETIFS[p5]=veth5
+NETIFS[p6]=veth6
+NETIFS[p7]=veth7
+NETIFS[p8]=veth8
+
+# /24 network
+declare -A V4ADDRS
+V4ADDRS[p1]=169.254.1.1
+V4ADDRS[p2]=169.254.1.2
+V4ADDRS[p3]=169.254.3.1
+V4ADDRS[p4]=169.254.3.2
+V4ADDRS[p5]=169.254.5.1
+V4ADDRS[p6]=169.254.5.2
+V4ADDRS[p7]=169.254.7.1
+V4ADDRS[p8]=169.254.7.2
+
+# /64 network
+declare -A V6ADDRS
+V6ADDRS[p1]=2001:db8:101::1
+V6ADDRS[p2]=2001:db8:101::2
+V6ADDRS[p3]=2001:db8:301::1
+V6ADDRS[p4]=2001:db8:301::2
+V6ADDRS[p5]=2001:db8:501::1
+V6ADDRS[p6]=2001:db8:501::2
+V6ADDRS[p7]=2001:db8:701::1
+V6ADDRS[p8]=2001:db8:701::2
+
+# Test networks:
+# [1] = default table
+# [2] = VRF
+#
+# /32 host routes
+declare -A TEST_NET4
+TEST_NET4[1]=169.254.101
+TEST_NET4[2]=169.254.102
+# /128 host routes
+declare -A TEST_NET6
+TEST_NET6[1]=2001:db8:101
+TEST_NET6[2]=2001:db8:102
+
+# connected gateway
+CONGW[1]=169.254.1.254
+CONGW[2]=169.254.3.254
+CONGW[3]=169.254.5.254
+
+# recursive gateway
+RECGW4[1]=169.254.11.254
+RECGW4[2]=169.254.12.254
+RECGW6[1]=2001:db8:11::64
+RECGW6[2]=2001:db8:12::64
+
+# for v4 mapped to v6
+declare -A TEST_NET4IN6IN6
+TEST_NET4IN6[1]=10.1.1.254
+TEST_NET4IN6[2]=10.2.1.254
+
+# mcast address
+MCAST6=ff02::1
+
+
+PEER_NS=bart
+PEER_CMD="ip netns exec ${PEER_NS}"
+VRF=lisa
+VRF_TABLE=1101
+PBR_TABLE=101
+
+################################################################################
+# utilities
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-50s [ OK ]\n" "${msg}"
+ else
+ nfail=$((nfail+1))
+ printf "\n TEST: %-50s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "######################################################################"
+ echo "TEST SECTION: $*"
+ echo "######################################################################"
+}
+
+log_subsection()
+{
+ echo
+ echo "#########################################"
+ echo "TEST SUBSECTION: $*"
+}
+
+run_cmd()
+{
+ echo
+ echo "COMMAND: $*"
+ eval $*
+}
+
+get_linklocal()
+{
+ local dev=$1
+ local pfx
+ local addr
+
+ addr=$(${pfx} ip -6 -br addr show dev ${dev} | \
+ awk '{
+ for (i = 3; i <= NF; ++i) {
+ if ($i ~ /^fe80/)
+ print $i
+ }
+ }'
+ )
+ addr=${addr/\/*}
+
+ [ -z "$addr" ] && return 1
+
+ echo $addr
+
+ return 0
+}
+
+################################################################################
+#
+
+setup()
+{
+ echo
+ echo "########################################"
+ echo "Configuring interfaces"
+
+ set -e
+
+ # create namespace
+ ip netns add ${PEER_NS}
+ ip -netns ${PEER_NS} li set lo up
+
+ # add vrf table
+ ip li add ${VRF} type vrf table ${VRF_TABLE}
+ ip li set ${VRF} up
+ ip ro add table ${VRF_TABLE} unreachable default metric 8192
+ ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192
+
+ # create test interfaces
+ ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
+ ip li add ${NETIFS[p3]} type veth peer name ${NETIFS[p4]}
+ ip li add ${NETIFS[p5]} type veth peer name ${NETIFS[p6]}
+ ip li add ${NETIFS[p7]} type veth peer name ${NETIFS[p8]}
+
+ # enslave vrf interfaces
+ for n in 5 7; do
+ ip li set ${NETIFS[p${n}]} vrf ${VRF}
+ done
+
+ # add addresses
+ for n in 1 3 5 7; do
+ ip li set ${NETIFS[p${n}]} up
+ ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+ ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
+ done
+
+ # move peer interfaces to namespace and add addresses
+ for n in 2 4 6 8; do
+ ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
+ ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
+ ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
+ done
+
+ ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64}
+ ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64}
+
+ set +e
+}
+
+cleanup()
+{
+ # make sure we start from a clean slate
+ ip netns del ${PEER_NS} 2>/dev/null
+ for n in 1 3 5 7; do
+ ip link del ${NETIFS[p${n}]} 2>/dev/null
+ done
+ ip link del ${VRF} 2>/dev/null
+ ip ro flush table ${VRF_TABLE}
+ ip -6 ro flush table ${VRF_TABLE}
+}
+
+################################################################################
+# IPv4 tests
+#
+
+run_ip()
+{
+ local table="$1"
+ local prefix="$2"
+ local gw="$3"
+ local dev="$4"
+ local exp_rc="$5"
+ local desc="$6"
+
+ # dev arg may be empty
+ [ -n "${dev}" ] && dev="dev ${dev}"
+
+ run_cmd ip ro add table "${table}" "${prefix}"/32 via "${gw}" "${dev}" onlink
+ log_test $? ${exp_rc} "${desc}"
+}
+
+run_ip_mpath()
+{
+ local table="$1"
+ local prefix="$2"
+ local nh1="$3"
+ local nh2="$4"
+ local exp_rc="$5"
+ local desc="$6"
+
+ # dev arg may be empty
+ [ -n "${dev}" ] && dev="dev ${dev}"
+
+ run_cmd ip ro add table "${table}" "${prefix}"/32 \
+ nexthop via ${nh1} nexthop via ${nh2}
+ log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv4()
+{
+ # - unicast connected, unicast recursive
+ #
+ log_subsection "default VRF - main table"
+
+ run_ip 254 ${TEST_NET4[1]}.1 ${CONGW[1]} ${NETIFS[p1]} 0 "unicast connected"
+ run_ip 254 ${TEST_NET4[1]}.2 ${RECGW4[1]} ${NETIFS[p1]} 0 "unicast recursive"
+
+ log_subsection "VRF ${VRF}"
+
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.1 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.2 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+ log_subsection "VRF device, PBR table"
+
+ run_ip ${PBR_TABLE} ${TEST_NET4[2]}.3 ${CONGW[3]} ${NETIFS[p5]} 0 "unicast connected"
+ run_ip ${PBR_TABLE} ${TEST_NET4[2]}.4 ${RECGW4[2]} ${NETIFS[p5]} 0 "unicast recursive"
+
+ # multipath version
+ #
+ log_subsection "default VRF - main table - multipath"
+
+ run_ip_mpath 254 ${TEST_NET4[1]}.5 \
+ "${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+ "${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+ 0 "unicast connected - multipath"
+
+ run_ip_mpath 254 ${TEST_NET4[1]}.6 \
+ "${RECGW4[1]} dev ${NETIFS[p1]} onlink" \
+ "${RECGW4[2]} dev ${NETIFS[p3]} onlink" \
+ 0 "unicast recursive - multipath"
+
+ run_ip_mpath 254 ${TEST_NET4[1]}.7 \
+ "${CONGW[1]} dev ${NETIFS[p1]}" \
+ "${CONGW[2]} dev ${NETIFS[p3]} onlink" \
+ 0 "unicast connected - multipath onlink first only"
+
+ run_ip_mpath 254 ${TEST_NET4[1]}.8 \
+ "${CONGW[1]} dev ${NETIFS[p1]} onlink" \
+ "${CONGW[2]} dev ${NETIFS[p3]}" \
+ 0 "unicast connected - multipath onlink second only"
+}
+
+invalid_onlink_ipv4()
+{
+ run_ip 254 ${TEST_NET4[1]}.11 ${V4ADDRS[p1]} ${NETIFS[p1]} 2 \
+ "Invalid gw - local unicast address"
+
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.11 ${V4ADDRS[p5]} ${NETIFS[p5]} 2 \
+ "Invalid gw - local unicast address, VRF"
+
+ run_ip 254 ${TEST_NET4[1]}.101 ${V4ADDRS[p1]} "" 2 "No nexthop device given"
+
+ run_ip 254 ${TEST_NET4[1]}.102 ${V4ADDRS[p3]} ${NETIFS[p1]} 2 \
+ "Gateway resolves to wrong nexthop device"
+
+ run_ip ${VRF_TABLE} ${TEST_NET4[2]}.103 ${V4ADDRS[p7]} ${NETIFS[p5]} 2 \
+ "Gateway resolves to wrong nexthop device - VRF"
+}
+
+################################################################################
+# IPv6 tests
+#
+
+run_ip6()
+{
+ local table="$1"
+ local prefix="$2"
+ local gw="$3"
+ local dev="$4"
+ local exp_rc="$5"
+ local desc="$6"
+
+ # dev arg may be empty
+ [ -n "${dev}" ] && dev="dev ${dev}"
+
+ run_cmd ip -6 ro add table "${table}" "${prefix}"/128 via "${gw}" "${dev}" onlink
+ log_test $? ${exp_rc} "${desc}"
+}
+
+run_ip6_mpath()
+{
+ local table="$1"
+ local prefix="$2"
+ local opts="$3"
+ local nh1="$4"
+ local nh2="$5"
+ local exp_rc="$6"
+ local desc="$7"
+
+ run_cmd ip -6 ro add table "${table}" "${prefix}"/128 "${opts}" \
+ nexthop via ${nh1} nexthop via ${nh2}
+ log_test $? ${exp_rc} "${desc}"
+}
+
+valid_onlink_ipv6()
+{
+ # - unicast connected, unicast recursive, v4-mapped
+ #
+ log_subsection "default VRF - main table"
+
+ run_ip6 254 ${TEST_NET6[1]}::1 ${V6ADDRS[p1]/::*}::64 ${NETIFS[p1]} 0 "unicast connected"
+ run_ip6 254 ${TEST_NET6[1]}::2 ${RECGW6[1]} ${NETIFS[p1]} 0 "unicast recursive"
+ run_ip6 254 ${TEST_NET6[1]}::3 ::ffff:${TEST_NET4IN6[1]} ${NETIFS[p1]} 0 "v4-mapped"
+
+ log_subsection "VRF ${VRF}"
+
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::1 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::2 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::3 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+ log_subsection "VRF device, PBR table"
+
+ run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::4 ${V6ADDRS[p5]/::*}::64 ${NETIFS[p5]} 0 "unicast connected"
+ run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::5 ${RECGW6[2]} ${NETIFS[p5]} 0 "unicast recursive"
+ run_ip6 ${PBR_TABLE} ${TEST_NET6[2]}::6 ::ffff:${TEST_NET4IN6[2]} ${NETIFS[p5]} 0 "v4-mapped"
+
+ # multipath version
+ #
+ log_subsection "default VRF - main table - multipath"
+
+ run_ip6_mpath 254 ${TEST_NET6[1]}::4 "onlink" \
+ "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}" \
+ "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+ 0 "unicast connected - multipath onlink"
+
+ run_ip6_mpath 254 ${TEST_NET6[1]}::5 "onlink" \
+ "${RECGW6[1]} dev ${NETIFS[p1]}" \
+ "${RECGW6[2]} dev ${NETIFS[p3]}" \
+ 0 "unicast recursive - multipath onlink"
+
+ run_ip6_mpath 254 ${TEST_NET6[1]}::6 "onlink" \
+ "::ffff:${TEST_NET4IN6[1]} dev ${NETIFS[p1]}" \
+ "::ffff:${TEST_NET4IN6[2]} dev ${NETIFS[p3]}" \
+ 0 "v4-mapped - multipath onlink"
+
+ run_ip6_mpath 254 ${TEST_NET6[1]}::7 "" \
+ "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+ "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+ 0 "unicast connected - multipath onlink both nexthops"
+
+ run_ip6_mpath 254 ${TEST_NET6[1]}::8 "" \
+ "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]} onlink" \
+ "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]}" \
+ 0 "unicast connected - multipath onlink first only"
+
+ run_ip6_mpath 254 ${TEST_NET6[1]}::9 "" \
+ "${V6ADDRS[p1]/::*}::64 dev ${NETIFS[p1]}" \
+ "${V6ADDRS[p3]/::*}::64 dev ${NETIFS[p3]} onlink" \
+ 0 "unicast connected - multipath onlink second only"
+}
+
+invalid_onlink_ipv6()
+{
+ local lladdr
+
+ lladdr=$(get_linklocal ${NETIFS[p1]}) || return 1
+
+ run_ip6 254 ${TEST_NET6[1]}::11 ${V6ADDRS[p1]} ${NETIFS[p1]} 2 \
+ "Invalid gw - local unicast address"
+ run_ip6 254 ${TEST_NET6[1]}::12 ${lladdr} ${NETIFS[p1]} 2 \
+ "Invalid gw - local linklocal address"
+ run_ip6 254 ${TEST_NET6[1]}::12 ${MCAST6} ${NETIFS[p1]} 2 \
+ "Invalid gw - multicast address"
+
+ lladdr=$(get_linklocal ${NETIFS[p5]}) || return 1
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::11 ${V6ADDRS[p5]} ${NETIFS[p5]} 2 \
+ "Invalid gw - local unicast address, VRF"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${lladdr} ${NETIFS[p5]} 2 \
+ "Invalid gw - local linklocal address, VRF"
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::12 ${MCAST6} ${NETIFS[p5]} 2 \
+ "Invalid gw - multicast address, VRF"
+
+ run_ip6 254 ${TEST_NET6[1]}::101 ${V6ADDRS[p1]} "" 2 \
+ "No nexthop device given"
+
+ # default VRF validation is done against LOCAL table
+ # run_ip6 254 ${TEST_NET6[1]}::102 ${V6ADDRS[p3]/::[0-9]/::64} ${NETIFS[p1]} 2 \
+ # "Gateway resolves to wrong nexthop device"
+
+ run_ip6 ${VRF_TABLE} ${TEST_NET6[2]}::103 ${V6ADDRS[p7]/::[0-9]/::64} ${NETIFS[p5]} 2 \
+ "Gateway resolves to wrong nexthop device - VRF"
+}
+
+run_onlink_tests()
+{
+ log_section "IPv4 onlink"
+ log_subsection "Valid onlink commands"
+ valid_onlink_ipv4
+ log_subsection "Invalid onlink commands"
+ invalid_onlink_ipv4
+
+ log_section "IPv6 onlink"
+ log_subsection "Valid onlink commands"
+ valid_onlink_ipv6
+ log_subsection "Invalid onlink commands"
+ invalid_onlink_ipv6
+}
+
+################################################################################
+# main
+
+nsuccess=0
+nfail=0
+
+cleanup
+setup
+run_onlink_tests
+cleanup
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
new file mode 100755
index 000000000..ba2d9fab2
--- /dev/null
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -0,0 +1,255 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking IPv4 and IPv6 FIB rules API
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+IP="ip -netns testns"
+
+RTABLE=100
+GW_IP4=192.51.100.2
+SRC_IP=192.51.100.3
+GW_IP6=2001:db8:1::2
+SRC_IP6=2001:db8:1::3
+
+DEV_ADDR=192.51.100.1
+DEV_ADDR6=2001:db8:1::1
+DEV=dummy0
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-50s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-50s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "######################################################################"
+ echo "TEST SECTION: $*"
+ echo "######################################################################"
+}
+
+setup()
+{
+ set -e
+ ip netns add testns
+ $IP link set dev lo up
+
+ $IP link add dummy0 type dummy
+ $IP link set dev dummy0 up
+ $IP address add $DEV_ADDR/24 dev dummy0
+ $IP -6 address add $DEV_ADDR6/64 dev dummy0
+
+ set +e
+}
+
+cleanup()
+{
+ $IP link del dev dummy0 &> /dev/null
+ ip netns del testns
+}
+
+fib_check_iproute_support()
+{
+ ip rule help 2>&1 | grep -q $1
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 iprule too old, missing $1 match"
+ return 1
+ fi
+
+ ip route get help 2>&1 | grep -q $2
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 get route too old, missing $2 match"
+ return 1
+ fi
+
+ return 0
+}
+
+fib_rule6_del()
+{
+ $IP -6 rule del $1
+ log_test $? 0 "rule6 del $1"
+}
+
+fib_rule6_del_by_pref()
+{
+ pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+ $IP -6 rule del pref $pref
+}
+
+fib_rule6_test_match_n_redirect()
+{
+ local match="$1"
+ local getmatch="$2"
+
+ $IP -6 rule add $match table $RTABLE
+ $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
+ log_test $? 0 "rule6 check: $1"
+
+ fib_rule6_del_by_pref "$match"
+ log_test $? 0 "rule6 del by pref: $match"
+}
+
+fib_rule6_test()
+{
+ # setup the fib rule redirect route
+ $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
+
+ match="oif $DEV"
+ fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+ match="from $SRC_IP6 iif $DEV"
+ fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+ match="tos 0x10"
+ fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+ match="fwmark 0x64"
+ getmatch="mark 0x64"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+ fib_check_iproute_support "uidrange" "uid"
+ if [ $? -eq 0 ]; then
+ match="uidrange 100-100"
+ getmatch="uid 100"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ fi
+
+ fib_check_iproute_support "sport" "sport"
+ if [ $? -eq 0 ]; then
+ match="sport 666 dport 777"
+ fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto tcp"
+ fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto ipv6-icmp"
+ fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match"
+ fi
+}
+
+fib_rule4_del()
+{
+ $IP rule del $1
+ log_test $? 0 "del $1"
+}
+
+fib_rule4_del_by_pref()
+{
+ pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+ $IP rule del pref $pref
+}
+
+fib_rule4_test_match_n_redirect()
+{
+ local match="$1"
+ local getmatch="$2"
+
+ $IP rule add $match table $RTABLE
+ $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
+ log_test $? 0 "rule4 check: $1"
+
+ fib_rule4_del_by_pref "$match"
+ log_test $? 0 "rule4 del by pref: $match"
+}
+
+fib_rule4_test()
+{
+ # setup the fib rule redirect route
+ $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
+
+ match="oif $DEV"
+ fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+ match="from $SRC_IP iif $DEV"
+ fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+ match="tos 0x10"
+ fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+ match="fwmark 0x64"
+ getmatch="mark 0x64"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+ fib_check_iproute_support "uidrange" "uid"
+ if [ $? -eq 0 ]; then
+ match="uidrange 100-100"
+ getmatch="uid 100"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ fi
+
+ fib_check_iproute_support "sport" "sport"
+ if [ $? -eq 0 ]; then
+ match="sport 666 dport 777"
+ fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto tcp"
+ fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match"
+ fi
+
+ fib_check_iproute_support "ipproto" "ipproto"
+ if [ $? -eq 0 ]; then
+ match="ipproto icmp"
+ fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+ fi
+}
+
+run_fibrule_tests()
+{
+ log_section "IPv4 fib rule"
+ fib_rule4_test
+ log_section "IPv6 fib rule"
+ fib_rule6_test
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+# start clean
+cleanup &> /dev/null
+setup
+run_fibrule_tests
+cleanup
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
new file mode 100755
index 000000000..a5ba14976
--- /dev/null
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -0,0 +1,1439 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking IPv4 and IPv6 FIB behavior in response to
+# different events.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+IP="ip -netns testns"
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+}
+
+setup()
+{
+ set -e
+ ip netns add testns
+ $IP link set dev lo up
+
+ $IP link add dummy0 type dummy
+ $IP link set dev dummy0 up
+ $IP address add 198.51.100.1/24 dev dummy0
+ $IP -6 address add 2001:db8:1::1/64 dev dummy0
+ set +e
+
+}
+
+cleanup()
+{
+ $IP link del dev dummy0 &> /dev/null
+ ip netns del testns
+}
+
+get_linklocal()
+{
+ local dev=$1
+ local addr
+
+ addr=$($IP -6 -br addr show dev ${dev} | \
+ awk '{
+ for (i = 3; i <= NF; ++i) {
+ if ($i ~ /^fe80/)
+ print $i
+ }
+ }'
+ )
+ addr=${addr/\/*}
+
+ [ -z "$addr" ] && return 1
+
+ echo $addr
+
+ return 0
+}
+
+fib_unreg_unicast_test()
+{
+ echo
+ echo "Single path route test"
+
+ setup
+
+ echo " Start point"
+ $IP route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ set -e
+ $IP link del dev dummy0
+ set +e
+
+ echo " Nexthop device deleted"
+ $IP route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 2 "IPv4 fibmatch - no route"
+ $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 2 "IPv6 fibmatch - no route"
+
+ cleanup
+}
+
+fib_unreg_multipath_test()
+{
+
+ echo
+ echo "Multipath route test"
+
+ setup
+
+ set -e
+ $IP link add dummy1 type dummy
+ $IP link set dev dummy1 up
+ $IP address add 192.0.2.1/24 dev dummy1
+ $IP -6 address add 2001:db8:2::1/64 dev dummy1
+
+ $IP route add 203.0.113.0/24 \
+ nexthop via 198.51.100.2 dev dummy0 \
+ nexthop via 192.0.2.2 dev dummy1
+ $IP -6 route add 2001:db8:3::/64 \
+ nexthop via 2001:db8:1::2 dev dummy0 \
+ nexthop via 2001:db8:2::2 dev dummy1
+ set +e
+
+ echo " Start point"
+ $IP route get fibmatch 203.0.113.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ set -e
+ $IP link del dev dummy0
+ set +e
+
+ echo " One nexthop device deleted"
+ $IP route get fibmatch 203.0.113.1 &> /dev/null
+ log_test $? 2 "IPv4 - multipath route removed on delete"
+
+ $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ # In IPv6 we do not flush the entire multipath route.
+ log_test $? 0 "IPv6 - multipath down to single path"
+
+ set -e
+ $IP link del dev dummy1
+ set +e
+
+ echo " Second nexthop device deleted"
+ $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ log_test $? 2 "IPv6 - no route"
+
+ cleanup
+}
+
+fib_unreg_test()
+{
+ fib_unreg_unicast_test
+ fib_unreg_multipath_test
+}
+
+fib_down_unicast_test()
+{
+ echo
+ echo "Single path, admin down"
+
+ setup
+
+ echo " Start point"
+ $IP route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ set -e
+ $IP link set dev dummy0 down
+ set +e
+
+ echo " Route deleted on down"
+ $IP route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 2 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 2 "IPv6 fibmatch"
+
+ cleanup
+}
+
+fib_down_multipath_test_do()
+{
+ local down_dev=$1
+ local up_dev=$2
+
+ $IP route get fibmatch 203.0.113.1 \
+ oif $down_dev &> /dev/null
+ log_test $? 2 "IPv4 fibmatch on down device"
+ $IP -6 route get fibmatch 2001:db8:3::1 \
+ oif $down_dev &> /dev/null
+ log_test $? 2 "IPv6 fibmatch on down device"
+
+ $IP route get fibmatch 203.0.113.1 \
+ oif $up_dev &> /dev/null
+ log_test $? 0 "IPv4 fibmatch on up device"
+ $IP -6 route get fibmatch 2001:db8:3::1 \
+ oif $up_dev &> /dev/null
+ log_test $? 0 "IPv6 fibmatch on up device"
+
+ $IP route get fibmatch 203.0.113.1 | \
+ grep $down_dev | grep -q "dead linkdown"
+ log_test $? 0 "IPv4 flags on down device"
+ $IP -6 route get fibmatch 2001:db8:3::1 | \
+ grep $down_dev | grep -q "dead linkdown"
+ log_test $? 0 "IPv6 flags on down device"
+
+ $IP route get fibmatch 203.0.113.1 | \
+ grep $up_dev | grep -q "dead linkdown"
+ log_test $? 1 "IPv4 flags on up device"
+ $IP -6 route get fibmatch 2001:db8:3::1 | \
+ grep $up_dev | grep -q "dead linkdown"
+ log_test $? 1 "IPv6 flags on up device"
+}
+
+fib_down_multipath_test()
+{
+ echo
+ echo "Admin down multipath"
+
+ setup
+
+ set -e
+ $IP link add dummy1 type dummy
+ $IP link set dev dummy1 up
+
+ $IP address add 192.0.2.1/24 dev dummy1
+ $IP -6 address add 2001:db8:2::1/64 dev dummy1
+
+ $IP route add 203.0.113.0/24 \
+ nexthop via 198.51.100.2 dev dummy0 \
+ nexthop via 192.0.2.2 dev dummy1
+ $IP -6 route add 2001:db8:3::/64 \
+ nexthop via 2001:db8:1::2 dev dummy0 \
+ nexthop via 2001:db8:2::2 dev dummy1
+ set +e
+
+ echo " Verify start point"
+ $IP route get fibmatch 203.0.113.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+
+ $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ set -e
+ $IP link set dev dummy0 down
+ set +e
+
+ echo " One device down, one up"
+ fib_down_multipath_test_do "dummy0" "dummy1"
+
+ set -e
+ $IP link set dev dummy0 up
+ $IP link set dev dummy1 down
+ set +e
+
+ echo " Other device down and up"
+ fib_down_multipath_test_do "dummy1" "dummy0"
+
+ set -e
+ $IP link set dev dummy0 down
+ set +e
+
+ echo " Both devices down"
+ $IP route get fibmatch 203.0.113.1 &> /dev/null
+ log_test $? 2 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:3::1 &> /dev/null
+ log_test $? 2 "IPv6 fibmatch"
+
+ $IP link del dev dummy1
+ cleanup
+}
+
+fib_down_test()
+{
+ fib_down_unicast_test
+ fib_down_multipath_test
+}
+
+# Local routes should not be affected when carrier changes.
+fib_carrier_local_test()
+{
+ echo
+ echo "Local carrier tests - single path"
+
+ setup
+
+ set -e
+ $IP link set dev dummy0 carrier on
+ set +e
+
+ echo " Start point"
+ $IP route get fibmatch 198.51.100.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ $IP route get fibmatch 198.51.100.1 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv4 - no linkdown flag"
+ $IP -6 route get fibmatch 2001:db8:1::1 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv6 - no linkdown flag"
+
+ set -e
+ $IP link set dev dummy0 carrier off
+ sleep 1
+ set +e
+
+ echo " Carrier off on nexthop"
+ $IP route get fibmatch 198.51.100.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ $IP route get fibmatch 198.51.100.1 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv4 - linkdown flag set"
+ $IP -6 route get fibmatch 2001:db8:1::1 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv6 - linkdown flag set"
+
+ set -e
+ $IP address add 192.0.2.1/24 dev dummy0
+ $IP -6 address add 2001:db8:2::1/64 dev dummy0
+ set +e
+
+ echo " Route to local address with carrier down"
+ $IP route get fibmatch 192.0.2.1 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:2::1 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ $IP route get fibmatch 192.0.2.1 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv4 linkdown flag set"
+ $IP -6 route get fibmatch 2001:db8:2::1 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv6 linkdown flag set"
+
+ cleanup
+}
+
+fib_carrier_unicast_test()
+{
+ ret=0
+
+ echo
+ echo "Single path route carrier test"
+
+ setup
+
+ set -e
+ $IP link set dev dummy0 carrier on
+ set +e
+
+ echo " Start point"
+ $IP route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ $IP route get fibmatch 198.51.100.2 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv4 no linkdown flag"
+ $IP -6 route get fibmatch 2001:db8:1::2 | \
+ grep -q "linkdown"
+ log_test $? 1 "IPv6 no linkdown flag"
+
+ set -e
+ $IP link set dev dummy0 carrier off
+ sleep 1
+ set +e
+
+ echo " Carrier down"
+ $IP route get fibmatch 198.51.100.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:1::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ $IP route get fibmatch 198.51.100.2 | \
+ grep -q "linkdown"
+ log_test $? 0 "IPv4 linkdown flag set"
+ $IP -6 route get fibmatch 2001:db8:1::2 | \
+ grep -q "linkdown"
+ log_test $? 0 "IPv6 linkdown flag set"
+
+ set -e
+ $IP address add 192.0.2.1/24 dev dummy0
+ $IP -6 address add 2001:db8:2::1/64 dev dummy0
+ set +e
+
+ echo " Second address added with carrier down"
+ $IP route get fibmatch 192.0.2.2 &> /dev/null
+ log_test $? 0 "IPv4 fibmatch"
+ $IP -6 route get fibmatch 2001:db8:2::2 &> /dev/null
+ log_test $? 0 "IPv6 fibmatch"
+
+ $IP route get fibmatch 192.0.2.2 | \
+ grep -q "linkdown"
+ log_test $? 0 "IPv4 linkdown flag set"
+ $IP -6 route get fibmatch 2001:db8:2::2 | \
+ grep -q "linkdown"
+ log_test $? 0 "IPv6 linkdown flag set"
+
+ cleanup
+}
+
+fib_carrier_test()
+{
+ fib_carrier_local_test
+ fib_carrier_unicast_test
+}
+
+################################################################################
+# Tests on nexthop spec
+
+# run 'ip route add' with given spec
+add_rt()
+{
+ local desc="$1"
+ local erc=$2
+ local vrf=$3
+ local pfx=$4
+ local gw=$5
+ local dev=$6
+ local cmd out rc
+
+ [ "$vrf" = "-" ] && vrf="default"
+ [ -n "$gw" ] && gw="via $gw"
+ [ -n "$dev" ] && dev="dev $dev"
+
+ cmd="$IP route add vrf $vrf $pfx $gw $dev"
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\n COMMAND: $cmd\n"
+ fi
+
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+ log_test $rc $erc "$desc"
+}
+
+fib4_nexthop()
+{
+ echo
+ echo "IPv4 nexthop tests"
+
+ echo "<<< write me >>>"
+}
+
+fib6_nexthop()
+{
+ local lldummy=$(get_linklocal dummy0)
+ local llv1=$(get_linklocal dummy0)
+
+ if [ -z "$lldummy" ]; then
+ echo "Failed to get linklocal address for dummy0"
+ return 1
+ fi
+ if [ -z "$llv1" ]; then
+ echo "Failed to get linklocal address for veth1"
+ return 1
+ fi
+
+ echo
+ echo "IPv6 nexthop tests"
+
+ add_rt "Directly connected nexthop, unicast address" 0 \
+ - 2001:db8:101::/64 2001:db8:1::2
+ add_rt "Directly connected nexthop, unicast address with device" 0 \
+ - 2001:db8:102::/64 2001:db8:1::2 "dummy0"
+ add_rt "Gateway is linklocal address" 0 \
+ - 2001:db8:103::1/64 $llv1 "veth0"
+
+ # fails because LL address requires a device
+ add_rt "Gateway is linklocal address, no device" 2 \
+ - 2001:db8:104::1/64 $llv1
+
+ # local address can not be a gateway
+ add_rt "Gateway can not be local unicast address" 2 \
+ - 2001:db8:105::/64 2001:db8:1::1
+ add_rt "Gateway can not be local unicast address, with device" 2 \
+ - 2001:db8:106::/64 2001:db8:1::1 "dummy0"
+ add_rt "Gateway can not be a local linklocal address" 2 \
+ - 2001:db8:107::1/64 $lldummy "dummy0"
+
+ # VRF tests
+ add_rt "Gateway can be local address in a VRF" 0 \
+ - 2001:db8:108::/64 2001:db8:51::2
+ add_rt "Gateway can be local address in a VRF, with device" 0 \
+ - 2001:db8:109::/64 2001:db8:51::2 "veth0"
+ add_rt "Gateway can be local linklocal address in a VRF" 0 \
+ - 2001:db8:110::1/64 $llv1 "veth0"
+
+ add_rt "Redirect to VRF lookup" 0 \
+ - 2001:db8:111::/64 "" "red"
+
+ add_rt "VRF route, gateway can be local address in default VRF" 0 \
+ red 2001:db8:112::/64 2001:db8:51::1
+
+ # local address in same VRF fails
+ add_rt "VRF route, gateway can not be a local address" 2 \
+ red 2001:db8:113::1/64 2001:db8:2::1
+ add_rt "VRF route, gateway can not be a local addr with device" 2 \
+ red 2001:db8:114::1/64 2001:db8:2::1 "dummy1"
+}
+
+# Default VRF:
+# dummy0 - 198.51.100.1/24 2001:db8:1::1/64
+# veth0 - 192.0.2.1/24 2001:db8:51::1/64
+#
+# VRF red:
+# dummy1 - 192.168.2.1/24 2001:db8:2::1/64
+# veth1 - 192.0.2.2/24 2001:db8:51::2/64
+#
+# [ dummy0 veth0 ]--[ veth1 dummy1 ]
+
+fib_nexthop_test()
+{
+ setup
+
+ set -e
+
+ $IP -4 rule add pref 32765 table local
+ $IP -4 rule del pref 0
+ $IP -6 rule add pref 32765 table local
+ $IP -6 rule del pref 0
+
+ $IP link add red type vrf table 1
+ $IP link set red up
+ $IP -4 route add vrf red unreachable default metric 4278198272
+ $IP -6 route add vrf red unreachable default metric 4278198272
+
+ $IP link add veth0 type veth peer name veth1
+ $IP link set dev veth0 up
+ $IP address add 192.0.2.1/24 dev veth0
+ $IP -6 address add 2001:db8:51::1/64 dev veth0
+
+ $IP link set dev veth1 vrf red up
+ $IP address add 192.0.2.2/24 dev veth1
+ $IP -6 address add 2001:db8:51::2/64 dev veth1
+
+ $IP link add dummy1 type dummy
+ $IP link set dev dummy1 vrf red up
+ $IP address add 192.168.2.1/24 dev dummy1
+ $IP -6 address add 2001:db8:2::1/64 dev dummy1
+ set +e
+
+ sleep 1
+ fib4_nexthop
+ fib6_nexthop
+
+ (
+ $IP link del dev dummy1
+ $IP link del veth0
+ $IP link del red
+ ) 2>/dev/null
+ cleanup
+}
+
+################################################################################
+# Tests on route add and replace
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf " COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+check_expected()
+{
+ local out="$1"
+ local expected="$2"
+ local rc=0
+
+ [ "${out}" = "${expected}" ] && return 0
+
+ if [ -z "${out}" ]; then
+ if [ "$VERBOSE" = "1" ]; then
+ printf "\nNo route entry found\n"
+ printf "Expected:\n"
+ printf " ${expected}\n"
+ fi
+ return 1
+ fi
+
+ # tricky way to convert output to 1-line without ip's
+ # messy '\'; this drops all extra white space
+ out=$(echo ${out})
+ if [ "${out}" != "${expected}" ]; then
+ rc=1
+ if [ "${VERBOSE}" = "1" ]; then
+ printf " Unexpected route entry. Have:\n"
+ printf " ${out}\n"
+ printf " Expected:\n"
+ printf " ${expected}\n\n"
+ fi
+ fi
+
+ return $rc
+}
+
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route6()
+{
+ local pfx="$1"
+ local nh="$2"
+ local out
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo
+ echo " ##################################################"
+ echo
+ fi
+
+ run_cmd "$IP -6 ro flush ${pfx}"
+ [ $? -ne 0 ] && exit 1
+
+ out=$($IP -6 ro ls match ${pfx})
+ if [ -n "$out" ]; then
+ echo "Failed to flush routes for prefix used for tests."
+ exit 1
+ fi
+
+ run_cmd "$IP -6 ro add ${pfx} ${nh}"
+ if [ $? -ne 0 ]; then
+ echo "Failed to add initial route for test."
+ exit 1
+ fi
+}
+
+# add initial route - used in replace route tests
+add_initial_route6()
+{
+ add_route6 "2001:db8:104::/64" "$1"
+}
+
+check_route6()
+{
+ local pfx="2001:db8:104::/64"
+ local expected="$1"
+ local out
+ local rc=0
+
+ out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
+ check_expected "${out}" "${expected}"
+}
+
+route_cleanup()
+{
+ $IP li del red 2>/dev/null
+ $IP li del dummy1 2>/dev/null
+ $IP li del veth1 2>/dev/null
+ $IP li del veth3 2>/dev/null
+
+ cleanup &> /dev/null
+}
+
+route_setup()
+{
+ route_cleanup
+ setup
+
+ [ "${VERBOSE}" = "1" ] && set -x
+ set -e
+
+ $IP li add red up type vrf table 101
+ $IP li add veth1 type veth peer name veth2
+ $IP li add veth3 type veth peer name veth4
+
+ $IP li set veth1 up
+ $IP li set veth3 up
+ $IP li set veth2 vrf red up
+ $IP li set veth4 vrf red up
+ $IP li add dummy1 type dummy
+ $IP li set dummy1 vrf red up
+
+ $IP -6 addr add 2001:db8:101::1/64 dev veth1
+ $IP -6 addr add 2001:db8:101::2/64 dev veth2
+ $IP -6 addr add 2001:db8:103::1/64 dev veth3
+ $IP -6 addr add 2001:db8:103::2/64 dev veth4
+ $IP -6 addr add 2001:db8:104::1/64 dev dummy1
+
+ $IP addr add 172.16.101.1/24 dev veth1
+ $IP addr add 172.16.101.2/24 dev veth2
+ $IP addr add 172.16.103.1/24 dev veth3
+ $IP addr add 172.16.103.2/24 dev veth4
+ $IP addr add 172.16.104.1/24 dev dummy1
+
+ set +e
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv6_rt_add()
+{
+ local rc
+
+ echo
+ echo "IPv6 route add / append tests"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2"
+ log_test $? 2 "Attempt to add duplicate route - gw"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 dev veth3"
+ log_test $? 2 "Attempt to add duplicate route - dev only"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
+ log_test $? 2 "Attempt to add duplicate route - reject route"
+
+ # route append with same prefix adds a new route
+ # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Append nexthop to existing route - gw"
+
+ # insert mpath directly
+ add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Add multipath route"
+
+ add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ log_test $? 2 "Attempt to add duplicate multipath route"
+
+ # insert of a second route without append but different metric
+ add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2 metric 512"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::3 metric 256"
+ rc=$?
+ fi
+ log_test $rc 0 "Route add with different metrics"
+
+ run_cmd "$IP -6 ro del 2001:db8:104::/64 metric 512"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:104::/64 via 2001:db8:103::3 dev veth3 metric 256 2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+ rc=$?
+ fi
+ log_test $rc 0 "Route delete with metric"
+}
+
+ipv6_rt_replace_single()
+{
+ # single path with single path
+ #
+ add_initial_route6 "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+ log_test $? 0 "Single path with single path"
+
+ # single path with multipath
+ #
+ add_initial_route6 "nexthop via 2001:db8:101::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Single path with multipath"
+
+ # single path with single path using MULTIPATH attribute
+ #
+ add_initial_route6 "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:103::2"
+ check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+ log_test $? 0 "Single path with single path via multipath attribute"
+
+ # route replace fails - invalid nexthop
+ add_initial_route6 "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:104::2"
+ if [ $? -eq 0 ]; then
+ # previous command is expected to fail so if it returns 0
+ # that means the test failed.
+ log_test 0 1 "Invalid nexthop"
+ else
+ check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+ log_test $? 0 "Invalid nexthop"
+ fi
+
+ # replace non-existent route
+ # - note use of change versus replace since ip adds NLM_F_CREATE
+ # for replace
+ add_initial_route6 "via 2001:db8:101::2"
+ run_cmd "$IP -6 ro change 2001:db8:105::/64 via 2001:db8:101::2"
+ log_test $? 2 "Single path - replace of non-existent route"
+}
+
+ipv6_rt_replace_mpath()
+{
+ # multipath with multipath
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::3 dev veth3 weight 1"
+ log_test $? 0 "Multipath with multipath"
+
+ # multipath with single
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:101::3"
+ check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+ log_test $? 0 "Multipath with single path"
+
+ # multipath with single
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3"
+ check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+ log_test $? 0 "Multipath with single path via multipath attribute"
+
+ # multipath with dev-only
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 dev veth1"
+ check_route6 "2001:db8:104::/64 dev veth1 metric 1024"
+ log_test $? 0 "Multipath with dev-only"
+
+ # route replace fails - invalid nexthop 1
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Multipath - invalid first nexthop"
+
+ # route replace fails - invalid nexthop 2
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:113::3"
+ check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+ log_test $? 0 "Multipath - invalid second nexthop"
+
+ # multipath non-existent route
+ add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+ run_cmd "$IP -6 ro change 2001:db8:105::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+ log_test $? 2 "Multipath - replace of non-existent route"
+}
+
+ipv6_rt_replace()
+{
+ echo
+ echo "IPv6 route replace tests"
+
+ ipv6_rt_replace_single
+ ipv6_rt_replace_mpath
+}
+
+ipv6_route_test()
+{
+ route_setup
+
+ ipv6_rt_add
+ ipv6_rt_replace
+
+ route_cleanup
+}
+
+ip_addr_metric_check()
+{
+ ip addr help 2>&1 | grep -q metric
+ if [ $? -ne 0 ]; then
+ echo "iproute2 command does not support metric for addresses. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+ipv6_addr_metric_test()
+{
+ local rc
+
+ echo
+ echo "IPv6 prefix route tests"
+
+ ip_addr_metric_check || return 1
+
+ setup
+
+ set -e
+ $IP li add dummy1 type dummy
+ $IP li add dummy2 type dummy
+ $IP li set dummy1 up
+ $IP li set dummy2 up
+
+ # default entry is metric 256
+ run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64"
+ run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64"
+ set +e
+
+ check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 256 2001:db8:104::/64 dev dummy2 proto kernel metric 256"
+ log_test $? 0 "Default metric"
+
+ set -e
+ run_cmd "$IP -6 addr flush dev dummy1"
+ run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64 metric 257"
+ set +e
+
+ check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 256 2001:db8:104::/64 dev dummy1 proto kernel metric 257"
+ log_test $? 0 "User specified metric on first device"
+
+ set -e
+ run_cmd "$IP -6 addr flush dev dummy2"
+ run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64 metric 258"
+ set +e
+
+ check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 257 2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+ log_test $? 0 "User specified metric on second device"
+
+ run_cmd "$IP -6 addr del dev dummy1 2001:db8:104::1/64 metric 257"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+ rc=$?
+ fi
+ log_test $rc 0 "Delete of address on first device"
+
+ run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::2/64 metric 259"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric of address"
+
+ # verify prefix route removed on down
+ run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
+ run_cmd "$IP li set dev dummy2 down"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ out=$($IP -6 ro ls match 2001:db8:104::/64)
+ check_expected "${out}" ""
+ rc=$?
+ fi
+ log_test $rc 0 "Prefix route removed on link down"
+
+ # verify prefix route re-inserted with assigned metric
+ run_cmd "$IP li set dev dummy2 up"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+ rc=$?
+ fi
+ log_test $rc 0 "Prefix route with metric on link up"
+
+ # verify peer metric added correctly
+ set -e
+ run_cmd "$IP -6 addr flush dev dummy2"
+ run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::1 peer 2001:db8:104::2 metric 260"
+ set +e
+
+ check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260"
+ log_test $? 0 "Set metric with peer route on local side"
+ check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260"
+ log_test $? 0 "Set metric with peer route on peer side"
+
+ set -e
+ run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::1 peer 2001:db8:104::3 metric 261"
+ set +e
+
+ check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 261"
+ log_test $? 0 "Modify metric and peer address on local side"
+ check_route6 "2001:db8:104::3 dev dummy2 proto kernel metric 261"
+ log_test $? 0 "Modify metric and peer address on peer side"
+
+ $IP li del dummy1
+ $IP li del dummy2
+ cleanup
+}
+
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route()
+{
+ local pfx="$1"
+ local nh="$2"
+ local out
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo
+ echo " ##################################################"
+ echo
+ fi
+
+ run_cmd "$IP ro flush ${pfx}"
+ [ $? -ne 0 ] && exit 1
+
+ out=$($IP ro ls match ${pfx})
+ if [ -n "$out" ]; then
+ echo "Failed to flush routes for prefix used for tests."
+ exit 1
+ fi
+
+ run_cmd "$IP ro add ${pfx} ${nh}"
+ if [ $? -ne 0 ]; then
+ echo "Failed to add initial route for test."
+ exit 1
+ fi
+}
+
+# add initial route - used in replace route tests
+add_initial_route()
+{
+ add_route "172.16.104.0/24" "$1"
+}
+
+check_route()
+{
+ local pfx="172.16.104.0/24"
+ local expected="$1"
+ local out
+
+ out=$($IP ro ls match ${pfx})
+ check_expected "${out}" "${expected}"
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv4_rt_add()
+{
+ local rc
+
+ echo
+ echo "IPv4 route add / append tests"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2"
+ log_test $? 2 "Attempt to add duplicate route - gw"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro add 172.16.104.0/24 dev veth3"
+ log_test $? 2 "Attempt to add duplicate route - dev only"
+
+ # route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro add unreachable 172.16.104.0/24"
+ log_test $? 2 "Attempt to add duplicate route - reject route"
+
+ # iproute2 prepend only sets NLM_F_CREATE
+ # - adds a new route; does NOT convert existing route to ECMP
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro prepend 172.16.104.0/24 via 172.16.103.2"
+ check_route "172.16.104.0/24 via 172.16.103.2 dev veth3 172.16.104.0/24 via 172.16.101.2 dev veth1"
+ log_test $? 0 "Add new nexthop for existing prefix"
+
+ # route append with same prefix adds a new route
+ # - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.2 dev veth3"
+ log_test $? 0 "Append nexthop to existing route - gw"
+
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 dev veth3 scope link"
+ log_test $? 0 "Append nexthop to existing route - dev only"
+
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro append unreachable 172.16.104.0/24"
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 unreachable 172.16.104.0/24"
+ log_test $? 0 "Append nexthop to existing route - reject route"
+
+ run_cmd "$IP ro flush 172.16.104.0/24"
+ run_cmd "$IP ro add unreachable 172.16.104.0/24"
+ run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+ check_route "unreachable 172.16.104.0/24 172.16.104.0/24 via 172.16.103.2 dev veth3"
+ log_test $? 0 "Append nexthop to existing reject route - gw"
+
+ run_cmd "$IP ro flush 172.16.104.0/24"
+ run_cmd "$IP ro add unreachable 172.16.104.0/24"
+ run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+ check_route "unreachable 172.16.104.0/24 172.16.104.0/24 dev veth3 scope link"
+ log_test $? 0 "Append nexthop to existing reject route - dev only"
+
+ # insert mpath directly
+ add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ log_test $? 0 "add multipath route"
+
+ add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ log_test $? 2 "Attempt to add duplicate multipath route"
+
+ # insert of a second route without append but different metric
+ add_route "172.16.104.0/24" "via 172.16.101.2"
+ run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2 metric 512"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.3 metric 256"
+ rc=$?
+ fi
+ log_test $rc 0 "Route add with different metrics"
+
+ run_cmd "$IP ro del 172.16.104.0/24 metric 512"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.3 dev veth3 metric 256"
+ rc=$?
+ fi
+ log_test $rc 0 "Route delete with metric"
+}
+
+ipv4_rt_replace_single()
+{
+ # single path with single path
+ #
+ add_initial_route "via 172.16.101.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.103.2"
+ check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+ log_test $? 0 "Single path with single path"
+
+ # single path with multipath
+ #
+ add_initial_route "nexthop via 172.16.101.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.2"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ log_test $? 0 "Single path with multipath"
+
+ # single path with reject
+ #
+ add_initial_route "nexthop via 172.16.101.2"
+ run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+ check_route "unreachable 172.16.104.0/24"
+ log_test $? 0 "Single path with reject route"
+
+ # single path with single path using MULTIPATH attribute
+ #
+ add_initial_route "via 172.16.101.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.103.2"
+ check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+ log_test $? 0 "Single path with single path via multipath attribute"
+
+ # route replace fails - invalid nexthop
+ add_initial_route "via 172.16.101.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 via 2001:db8:104::2"
+ if [ $? -eq 0 ]; then
+ # previous command is expected to fail so if it returns 0
+ # that means the test failed.
+ log_test 0 1 "Invalid nexthop"
+ else
+ check_route "172.16.104.0/24 via 172.16.101.2 dev veth1"
+ log_test $? 0 "Invalid nexthop"
+ fi
+
+ # replace non-existent route
+ # - note use of change versus replace since ip adds NLM_F_CREATE
+ # for replace
+ add_initial_route "via 172.16.101.2"
+ run_cmd "$IP ro change 172.16.105.0/24 via 172.16.101.2"
+ log_test $? 2 "Single path - replace of non-existent route"
+}
+
+ipv4_rt_replace_mpath()
+{
+ # multipath with multipath
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.3 dev veth3 weight 1"
+ log_test $? 0 "Multipath with multipath"
+
+ # multipath with single
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.101.3"
+ check_route "172.16.104.0/24 via 172.16.101.3 dev veth1"
+ log_test $? 0 "Multipath with single path"
+
+ # multipath with single
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3"
+ check_route "172.16.104.0/24 via 172.16.101.3 dev veth1"
+ log_test $? 0 "Multipath with single path via multipath attribute"
+
+ # multipath with reject
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+ check_route "unreachable 172.16.104.0/24"
+ log_test $? 0 "Multipath with reject route"
+
+ # route replace fails - invalid nexthop 1
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.111.3 nexthop via 172.16.103.3"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ log_test $? 0 "Multipath - invalid first nexthop"
+
+ # route replace fails - invalid nexthop 2
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.113.3"
+ check_route "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+ log_test $? 0 "Multipath - invalid second nexthop"
+
+ # multipath non-existent route
+ add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+ run_cmd "$IP ro change 172.16.105.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+ log_test $? 2 "Multipath - replace of non-existent route"
+}
+
+ipv4_rt_replace()
+{
+ echo
+ echo "IPv4 route replace tests"
+
+ ipv4_rt_replace_single
+ ipv4_rt_replace_mpath
+}
+
+ipv4_route_test()
+{
+ route_setup
+
+ ipv4_rt_add
+ ipv4_rt_replace
+
+ route_cleanup
+}
+
+ipv4_addr_metric_test()
+{
+ local rc
+
+ echo
+ echo "IPv4 prefix route tests"
+
+ ip_addr_metric_check || return 1
+
+ setup
+
+ set -e
+ $IP li add dummy1 type dummy
+ $IP li add dummy2 type dummy
+ $IP li set dummy1 up
+ $IP li set dummy2 up
+
+ # default entry is metric 256
+ run_cmd "$IP addr add dev dummy1 172.16.104.1/24"
+ run_cmd "$IP addr add dev dummy2 172.16.104.2/24"
+ set +e
+
+ check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2"
+ log_test $? 0 "Default metric"
+
+ set -e
+ run_cmd "$IP addr flush dev dummy1"
+ run_cmd "$IP addr add dev dummy1 172.16.104.1/24 metric 257"
+ set +e
+
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257"
+ log_test $? 0 "User specified metric on first device"
+
+ set -e
+ run_cmd "$IP addr flush dev dummy2"
+ run_cmd "$IP addr add dev dummy2 172.16.104.2/24 metric 258"
+ set +e
+
+ check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+ log_test $? 0 "User specified metric on second device"
+
+ run_cmd "$IP addr del dev dummy1 172.16.104.1/24 metric 257"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+ rc=$?
+ fi
+ log_test $rc 0 "Delete of address on first device"
+
+ run_cmd "$IP addr change dev dummy2 172.16.104.2/24 metric 259"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric of address"
+
+ # verify prefix route removed on down
+ run_cmd "$IP li set dev dummy2 down"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ out=$($IP ro ls match 172.16.104.0/24)
+ check_expected "${out}" ""
+ rc=$?
+ fi
+ log_test $rc 0 "Prefix route removed on link down"
+
+ # verify prefix route re-inserted with assigned metric
+ run_cmd "$IP li set dev dummy2 up"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+ rc=$?
+ fi
+ log_test $rc 0 "Prefix route with metric on link up"
+
+ # explicitly check for metric changes on edge scenarios
+ run_cmd "$IP addr flush dev dummy2"
+ run_cmd "$IP addr add dev dummy2 172.16.104.0/24 metric 259"
+ run_cmd "$IP addr change dev dummy2 172.16.104.0/24 metric 260"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.0 metric 260"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric of .0/24 address"
+
+ run_cmd "$IP addr flush dev dummy2"
+ run_cmd "$IP addr add dev dummy2 172.16.104.1/32 peer 172.16.104.2 metric 260"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.2 dev dummy2 proto kernel scope link src 172.16.104.1 metric 260"
+ rc=$?
+ fi
+ log_test $rc 0 "Set metric of address with peer route"
+
+ run_cmd "$IP addr change dev dummy2 172.16.104.1/32 peer 172.16.104.3 metric 261"
+ rc=$?
+ if [ $rc -eq 0 ]; then
+ check_route "172.16.104.3 dev dummy2 proto kernel scope link src 172.16.104.1 metric 261"
+ rc=$?
+ fi
+ log_test $rc 0 "Modify metric and peer address for peer route"
+
+ $IP li del dummy1
+ $IP li del dummy2
+ cleanup
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhv o
+do
+ case $o in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+PEER_CMD="ip netns exec ${PEER_NS}"
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip route help 2>&1 | grep -q fibmatch
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing fibmatch"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+ case $t in
+ fib_unreg_test|unregister) fib_unreg_test;;
+ fib_down_test|down) fib_down_test;;
+ fib_carrier_test|carrier) fib_carrier_test;;
+ fib_nexthop_test|nexthop) fib_nexthop_test;;
+ ipv6_route_test|ipv6_rt) ipv6_route_test;;
+ ipv4_route_test|ipv4_rt) ipv4_route_test;;
+ ipv6_addr_metric) ipv6_addr_metric_test;;
+ ipv4_addr_metric) ipv4_addr_metric_test;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
new file mode 100644
index 000000000..a793eef5b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/.gitignore
@@ -0,0 +1 @@
+forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
new file mode 100644
index 000000000..b8a2af8fc
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/README
@@ -0,0 +1,58 @@
+Motivation
+==========
+
+One of the nice things about network namespaces is that they allow one
+to easily create and test complex environments.
+
+Unfortunately, these namespaces can not be used with actual switching
+ASICs, as their ports can not be migrated to other network namespaces
+(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+L1-separation provided by namespaces.
+
+However, a similar kind of flexibility can be achieved by using VRFs and
+by looping the switch ports together. For example:
+
+ br0
+ +
+ vrf-h1 | vrf-h2
+ + +---+----+ +
+ | | | |
+ 192.0.2.1/24 + + + + 192.0.2.2/24
+ swp1 swp2 swp3 swp4
+ + + + +
+ | | | |
+ +--------+ +--------+
+
+The VRFs act as lightweight namespaces representing hosts connected to
+the switch.
+
+This approach for testing switch ASICs has several advantages over the
+traditional method that requires multiple physical machines, to name a
+few:
+
+1. Only the device under test (DUT) is being tested without noise from
+other system.
+
+2. Ability to easily provision complex topologies. Testing bridging
+between 4-ports LAGs or 8-way ECMP requires many physical links that are
+not always available. With the VRF-based approach one merely needs to
+loopback more ports.
+
+These tests are written with switch ASICs in mind, but they can be run
+on any Linux box using veth pairs to emulate physical loopbacks.
+
+Guidelines for Writing Tests
+============================
+
+o Where possible, reuse an existing topology for different tests instead
+ of recreating the same topology.
+o Tests that use anything but the most trivial topologies should include
+ an ASCII art showing the topology.
+o Where possible, IPv6 and IPv4 addresses shall conform to RFC 3849 and
+ RFC 5737, respectively.
+o Where possible, tests shall be written so that they can be reused by
+ multiple topologies and added to lib.sh.
+o Checks shall be added to lib.sh for any external dependencies.
+o Code shall be checked using ShellCheck [1] prior to submission.
+
+1. https://www.shellcheck.net/
diff --git a/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh b/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh
new file mode 100755
index 000000000..a43b4645c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_port_isolation.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 flooding"
+NUM_NETIFS=6
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.3/24 2001:db8:1::3/64
+}
+
+h3_destroy()
+{
+ simple_if_fini $h3 192.0.2.3/24 2001:db8:1::3/64
+}
+
+switch_create()
+{
+ ip link add dev br0 type bridge
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+ ip link set dev $swp3 master br0
+
+ ip link set dev $swp1 type bridge_slave isolated on
+ check_err $? "Can't set isolation on port $swp1"
+ ip link set dev $swp2 type bridge_slave isolated on
+ check_err $? "Can't set isolation on port $swp2"
+ ip link set dev $swp3 type bridge_slave isolated off
+ check_err $? "Can't disable isolation on port $swp3"
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+ ip link set dev $swp3 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp3 down
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ RET=0
+ ping_do $h1 192.0.2.2
+ check_fail $? "Ping worked when it should not have"
+
+ RET=0
+ ping_do $h3 192.0.2.2
+ check_err $? "Ping didn't work when it should have"
+
+ log_test "Isolated port ping"
+}
+
+ping_ipv6()
+{
+ RET=0
+ ping6_do $h1 2001:db8:1::2
+ check_fail $? "Ping6 worked when it should not have"
+
+ RET=0
+ ping6_do $h3 2001:db8:1::2
+ check_err $? "Ping6 didn't work when it should have"
+
+ log_test "Isolated port ping6"
+}
+
+flooding()
+{
+ local mac=de:ad:be:ef:13:37
+ local ip=192.0.2.100
+
+ RET=0
+ flood_test_do false $mac $ip $h1 $h2
+ check_err $? "Packet was flooded when it should not have been"
+
+ RET=0
+ flood_test_do true $mac $ip $h3 $h2
+ check_err $? "Packet was not flooded when it should have been"
+
+ log_test "Isolated port flooding"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
new file mode 100755
index 000000000..b90dff8d3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn"
+NUM_NETIFS=4
+CHECK_TC="yes"
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ # 10 Seconds ageing time.
+ ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+ mcast_snooping 0
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+ learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+ flood_test $swp2 $h1 $h2
+}
+
+vlan_deletion()
+{
+ # Test that the deletion of a VLAN on a bridge port does not affect
+ # the PVID VLAN
+ log_info "Add and delete a VLAN on bridge port $swp1"
+
+ bridge vlan add vid 10 dev $swp1
+ bridge vlan del vid 10 dev $swp1
+
+ ping_ipv4
+ ping_ipv6
+}
+
+extern_learn()
+{
+ local mac=de:ad:be:ef:13:37
+ local ageing_time
+
+ # Test that externally learned FDB entries can roam, but not age out
+ RET=0
+
+ bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn vlan 1
+
+ bridge fdb show brport $swp1 | grep -q de:ad:be:ef:13:37
+ check_err $? "Did not find FDB entry when should"
+
+ # Wait for 10 seconds after the ageing time to make sure the FDB entry
+ # was not aged out
+ ageing_time=$(bridge_ageing_time_get br0)
+ sleep $((ageing_time + 10))
+
+ bridge fdb show brport $swp1 | grep -q de:ad:be:ef:13:37
+ check_err $? "FDB entry was aged out when should not"
+
+ $MZ $h2 -c 1 -p 64 -a $mac -t ip -q
+
+ bridge fdb show brport $swp2 | grep -q de:ad:be:ef:13:37
+ check_err $? "FDB entry did not roam when should"
+
+ log_test "Externally learned FDB entry - ageing & roaming"
+
+ bridge fdb del de:ad:be:ef:13:37 dev $swp2 master vlan 1 &> /dev/null
+ bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
new file mode 100755
index 000000000..c15c6c85c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ # 10 Seconds ageing time.
+ ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+ learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+ flood_test $swp2 $h1 $h2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
new file mode 100644
index 000000000..5cd2aed97
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/config
@@ -0,0 +1,12 @@
+CONFIG_BRIDGE=m
+CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_VRF=m
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
new file mode 100644
index 000000000..5ab1e5f43
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Source library
+
+relative_path="${BASH_SOURCE%/*}"
+if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
+ relative_path="."
+fi
+
+source "$relative_path/lib.sh"
+
+##############################################################################
+# Defines
+
+DEVLINK_DEV=$(devlink port show | grep "${NETIFS[p1]}" | \
+ grep -v "${NETIFS[p1]}[0-9]" | cut -d" " -f1 | \
+ rev | cut -d"/" -f2- | rev)
+if [ -z "$DEVLINK_DEV" ]; then
+ echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
+ exit 1
+fi
+if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
+ echo "SKIP: devlink device's bus is not PCI"
+ exit 1
+fi
+
+DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
+ -n | cut -d" " -f3)
+
+##############################################################################
+# Sanity checks
+
+devlink -j resource show "$DEVLINK_DEV" &> /dev/null
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing devlink resource support"
+ exit 1
+fi
+
+##############################################################################
+# Devlink helpers
+
+devlink_resource_names_to_path()
+{
+ local resource
+ local path=""
+
+ for resource in "${@}"; do
+ if [ "$path" == "" ]; then
+ path="$resource"
+ else
+ path="${path}/$resource"
+ fi
+ done
+
+ echo "$path"
+}
+
+devlink_resource_get()
+{
+ local name=$1
+ local resource_name=.[][\"$DEVLINK_DEV\"]
+
+ resource_name="$resource_name | .[] | select (.name == \"$name\")"
+
+ shift
+ for resource in "${@}"; do
+ resource_name="${resource_name} | .[\"resources\"][] | \
+ select (.name == \"$resource\")"
+ done
+
+ devlink -j resource show "$DEVLINK_DEV" | jq "$resource_name"
+}
+
+devlink_resource_size_get()
+{
+ local size=$(devlink_resource_get "$@" | jq '.["size_new"]')
+
+ if [ "$size" == "null" ]; then
+ devlink_resource_get "$@" | jq '.["size"]'
+ else
+ echo "$size"
+ fi
+}
+
+devlink_resource_size_set()
+{
+ local new_size=$1
+ local path
+
+ shift
+ path=$(devlink_resource_names_to_path "$@")
+ devlink resource set "$DEVLINK_DEV" path "$path" size "$new_size"
+ check_err $? "Failed setting path $path to size $size"
+}
+
+devlink_reload()
+{
+ local still_pending
+
+ devlink dev reload "$DEVLINK_DEV" &> /dev/null
+ check_err $? "Failed reload"
+
+ still_pending=$(devlink resource show "$DEVLINK_DEV" | \
+ grep -c "size_new")
+ check_err $still_pending "Failed reload - There are still unset sizes"
+}
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
new file mode 100644
index 000000000..e819d049d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+declare -A NETIFS
+
+NETIFS[p1]=veth0
+NETIFS[p2]=veth1
+NETIFS[p3]=veth2
+NETIFS[p4]=veth3
+NETIFS[p5]=veth4
+NETIFS[p6]=veth5
+NETIFS[p7]=veth6
+NETIFS[p8]=veth7
+
+##############################################################################
+# Defines
+
+# IPv4 ping utility name
+PING=ping
+# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
+PING6=ping6
+# Packet generator. Some distributions use 'mz'.
+MZ=mausezahn
+# Time to wait after interfaces participating in the test are all UP
+WAIT_TIME=5
+# Whether to pause on failure or not.
+PAUSE_ON_FAIL=no
+# Whether to pause on cleanup or not.
+PAUSE_ON_CLEANUP=no
+# Type of network interface to create
+NETIF_TYPE=veth
+# Whether to create virtual interfaces (veth) or not
+NETIF_CREATE=yes
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh
new file mode 100755
index 000000000..a8d8e8b3d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh
@@ -0,0 +1,257 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.1/28 | |
+# +-------------------|-----+
+# |
+# +-------------------|------------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 192.0.2.2/28 |
+# | |
+# | + g1a (gre) + g1b (gre) |
+# | loc=192.0.2.65 loc=192.0.2.81 |
+# | rem=192.0.2.66 --. rem=192.0.2.82 --. |
+# | tos=inherit | tos=inherit | |
+# | .------------------' | |
+# | | .------------------' |
+# | v v |
+# | + $ul1.111 (vlan) + $ul1.222 (vlan) |
+# | | 192.0.2.129/28 | 192.0.2.145/28 |
+# | \ / |
+# | \________________/ |
+# | | |
+# | + $ul1 |
+# +------------|-------------------------------+
+# |
+# +------------|-------------------------------+
+# | SW2 + $ul2 |
+# | _______|________ |
+# | / \ |
+# | / \ |
+# | + $ul2.111 (vlan) + $ul2.222 (vlan) |
+# | ^ 192.0.2.130/28 ^ 192.0.2.146/28 |
+# | | | |
+# | | '------------------. |
+# | '------------------. | |
+# | + g2a (gre) | + g2b (gre) | |
+# | loc=192.0.2.66 | loc=192.0.2.82 | |
+# | rem=192.0.2.65 --' rem=192.0.2.81 --' |
+# | tos=inherit tos=inherit |
+# | |
+# | $ol2 + |
+# | 192.0.2.17/28 | |
+# +-------------------|------------------------+
+# |
+# +-------------------|-----+
+# | H2 | |
+# | $h2 + |
+# | 192.0.2.18/28 |
+# +-------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ multipath_ipv4
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+ ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 192.0.2.2/28
+ __simple_if_init $ul1 v$ol1
+ vlan_create $ul1 111 v$ol1 192.0.2.129/28
+ vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+ tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+ __simple_if_init g1a v$ol1 192.0.2.65/32
+ ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+ tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+ __simple_if_init g1b v$ol1 192.0.2.81/32
+ ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+ ip route add vrf v$ol1 192.0.2.16/28 \
+ nexthop dev g1a \
+ nexthop dev g1b
+}
+
+sw1_destroy()
+{
+ ip route del vrf v$ol1 192.0.2.16/28
+
+ ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+ __simple_if_fini g1b 192.0.2.81/32
+ tunnel_destroy g1b
+
+ ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+ __simple_if_fini g1a 192.0.2.65/32
+ tunnel_destroy g1a
+
+ vlan_destroy $ul1 222
+ vlan_destroy $ul1 111
+ __simple_if_fini $ul1
+ simple_if_fini $ol1 192.0.2.2/28
+}
+
+sw2_create()
+{
+ simple_if_init $ol2 192.0.2.17/28
+ __simple_if_init $ul2 v$ol2
+ vlan_create $ul2 111 v$ol2 192.0.2.130/28
+ vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+ tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+ __simple_if_init g2a v$ol2 192.0.2.66/32
+ ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+ tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+ __simple_if_init g2b v$ol2 192.0.2.82/32
+ ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+ ip route add vrf v$ol2 192.0.2.0/28 \
+ nexthop dev g2a \
+ nexthop dev g2b
+
+ tc qdisc add dev $ul2 clsact
+ tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+ tc qdisc del dev $ul2 clsact
+
+ ip route del vrf v$ol2 192.0.2.0/28
+
+ ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+ __simple_if_fini g2b 192.0.2.82/32
+ tunnel_destroy g2b
+
+ ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+ __simple_if_fini g2a 192.0.2.66/32
+ tunnel_destroy g2a
+
+ vlan_destroy $ul2 222
+ vlan_destroy $ul2 111
+ __simple_if_fini $ul2
+ simple_if_fini $ol2 192.0.2.17/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.18/28
+ ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+}
+
+h2_destroy()
+{
+ ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ simple_if_fini $h2 192.0.2.18/28
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+multipath4_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ ip route replace vrf v$ol1 192.0.2.16/28 \
+ nexthop dev g1a weight $weight1 \
+ nexthop dev g1b weight $weight2
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ ip vrf exec v$h1 \
+ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip route replace vrf v$ol1 192.0.2.16/28 \
+ nexthop dev g1a \
+ nexthop dev g1b
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.18
+}
+
+multipath_ipv4()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
new file mode 100644
index 000000000..08bac6cf1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -0,0 +1,964 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+# Can be overridden by the configuration file.
+PING=${PING:=ping}
+PING6=${PING6:=ping6}
+MZ=${MZ:=mausezahn}
+ARPING=${ARPING:=arping}
+TEAMD=${TEAMD:=teamd}
+WAIT_TIME=${WAIT_TIME:=5}
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
+NETIF_TYPE=${NETIF_TYPE:=veth}
+NETIF_CREATE=${NETIF_CREATE:=yes}
+
+relative_path="${BASH_SOURCE%/*}"
+if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
+ relative_path="."
+fi
+
+if [[ -f $relative_path/forwarding.config ]]; then
+ source "$relative_path/forwarding.config"
+fi
+
+##############################################################################
+# Sanity checks
+
+check_tc_version()
+{
+ tc -j &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing JSON support"
+ exit 1
+ fi
+}
+
+check_tc_shblock_support()
+{
+ tc filter help 2>&1 | grep block &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing shared block support"
+ exit 1
+ fi
+}
+
+check_tc_chain_support()
+{
+ tc help 2>&1|grep chain &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing chain support"
+ exit 1
+ fi
+}
+
+if [[ "$(id -u)" -ne 0 ]]; then
+ echo "SKIP: need root privileges"
+ exit 0
+fi
+
+if [[ "$CHECK_TC" = "yes" ]]; then
+ check_tc_version
+fi
+
+require_command()
+{
+ local cmd=$1; shift
+
+ if [[ ! -x "$(command -v "$cmd")" ]]; then
+ echo "SKIP: $cmd not installed"
+ exit 1
+ fi
+}
+
+require_command jq
+require_command $MZ
+
+if [[ ! -v NUM_NETIFS ]]; then
+ echo "SKIP: importer does not define \"NUM_NETIFS\""
+ exit 1
+fi
+
+##############################################################################
+# Command line options handling
+
+count=0
+
+while [[ $# -gt 0 ]]; do
+ if [[ "$count" -eq "0" ]]; then
+ unset NETIFS
+ declare -A NETIFS
+ fi
+ count=$((count + 1))
+ NETIFS[p$count]="$1"
+ shift
+done
+
+##############################################################################
+# Network interfaces configuration
+
+create_netif_veth()
+{
+ local i
+
+ for i in $(eval echo {1..$NUM_NETIFS}); do
+ local j=$((i+1))
+
+ ip link show dev ${NETIFS[p$i]} &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ ip link add ${NETIFS[p$i]} type veth \
+ peer name ${NETIFS[p$j]}
+ if [[ $? -ne 0 ]]; then
+ echo "Failed to create netif"
+ exit 1
+ fi
+ fi
+ i=$j
+ done
+}
+
+create_netif()
+{
+ case "$NETIF_TYPE" in
+ veth) create_netif_veth
+ ;;
+ *) echo "Can not create interfaces of type \'$NETIF_TYPE\'"
+ exit 1
+ ;;
+ esac
+}
+
+if [[ "$NETIF_CREATE" = "yes" ]]; then
+ create_netif
+fi
+
+for i in $(eval echo {1..$NUM_NETIFS}); do
+ ip link show dev ${NETIFS[p$i]} &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: could not find all required interfaces"
+ exit 1
+ fi
+done
+
+##############################################################################
+# Helpers
+
+# Exit status to return at the end. Set in case one of the tests fails.
+EXIT_STATUS=0
+# Per-test return value. Clear at the beginning of each test.
+RET=0
+
+check_err()
+{
+ local err=$1
+ local msg=$2
+
+ if [[ $RET -eq 0 && $err -ne 0 ]]; then
+ RET=$err
+ retmsg=$msg
+ fi
+}
+
+check_fail()
+{
+ local err=$1
+ local msg=$2
+
+ if [[ $RET -eq 0 && $err -eq 0 ]]; then
+ RET=1
+ retmsg=$msg
+ fi
+}
+
+check_err_fail()
+{
+ local should_fail=$1; shift
+ local err=$1; shift
+ local what=$1; shift
+
+ if ((should_fail)); then
+ check_fail $err "$what succeeded, but should have failed"
+ else
+ check_err $err "$what failed"
+ fi
+}
+
+log_test()
+{
+ local test_name=$1
+ local opt_str=$2
+
+ if [[ $# -eq 2 ]]; then
+ opt_str="($opt_str)"
+ fi
+
+ if [[ $RET -ne 0 ]]; then
+ EXIT_STATUS=1
+ printf "TEST: %-60s [FAIL]\n" "$test_name $opt_str"
+ if [[ ! -z "$retmsg" ]]; then
+ printf "\t%s\n" "$retmsg"
+ fi
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo "Hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ return 1
+ fi
+
+ printf "TEST: %-60s [PASS]\n" "$test_name $opt_str"
+ return 0
+}
+
+log_info()
+{
+ local msg=$1
+
+ echo "INFO: $msg"
+}
+
+setup_wait_dev()
+{
+ local dev=$1; shift
+
+ while true; do
+ ip link show dev $dev up \
+ | grep 'state UP' &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ sleep 1
+ else
+ break
+ fi
+ done
+}
+
+setup_wait()
+{
+ local num_netifs=${1:-$NUM_NETIFS}
+
+ for ((i = 1; i <= num_netifs; ++i)); do
+ setup_wait_dev ${NETIFS[p$i]}
+ done
+
+ # Make sure links are ready.
+ sleep $WAIT_TIME
+}
+
+lldpad_app_wait_set()
+{
+ local dev=$1; shift
+
+ while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do
+ echo "$dev: waiting for lldpad to push pending APP updates"
+ sleep 5
+ done
+}
+
+lldpad_app_wait_del()
+{
+ # Give lldpad a chance to push down the changes. If the device is downed
+ # too soon, the updates will be left pending. However, they will have
+ # been struck off the lldpad's DB already, so we won't be able to tell
+ # they are pending. Then on next test iteration this would cause
+ # weirdness as newly-added APP rules conflict with the old ones,
+ # sometimes getting stuck in an "unknown" state.
+ sleep 5
+}
+
+pre_cleanup()
+{
+ if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
+ echo "Pausing before cleanup, hit any key to continue"
+ read
+ fi
+}
+
+vrf_prepare()
+{
+ ip -4 rule add pref 32765 table local
+ ip -4 rule del pref 0
+ ip -6 rule add pref 32765 table local
+ ip -6 rule del pref 0
+}
+
+vrf_cleanup()
+{
+ ip -6 rule add pref 0 table local
+ ip -6 rule del pref 32765
+ ip -4 rule add pref 0 table local
+ ip -4 rule del pref 32765
+}
+
+__last_tb_id=0
+declare -A __TB_IDS
+
+__vrf_td_id_assign()
+{
+ local vrf_name=$1
+
+ __last_tb_id=$((__last_tb_id + 1))
+ __TB_IDS[$vrf_name]=$__last_tb_id
+ return $__last_tb_id
+}
+
+__vrf_td_id_lookup()
+{
+ local vrf_name=$1
+
+ return ${__TB_IDS[$vrf_name]}
+}
+
+vrf_create()
+{
+ local vrf_name=$1
+ local tb_id
+
+ __vrf_td_id_assign $vrf_name
+ tb_id=$?
+
+ ip link add dev $vrf_name type vrf table $tb_id
+ ip -4 route add table $tb_id unreachable default metric 4278198272
+ ip -6 route add table $tb_id unreachable default metric 4278198272
+}
+
+vrf_destroy()
+{
+ local vrf_name=$1
+ local tb_id
+
+ __vrf_td_id_lookup $vrf_name
+ tb_id=$?
+
+ ip -6 route del table $tb_id unreachable default metric 4278198272
+ ip -4 route del table $tb_id unreachable default metric 4278198272
+ ip link del dev $vrf_name
+}
+
+__addr_add_del()
+{
+ local if_name=$1
+ local add_del=$2
+ local array
+
+ shift
+ shift
+ array=("${@}")
+
+ for addrstr in "${array[@]}"; do
+ ip address $add_del $addrstr dev $if_name
+ done
+}
+
+__simple_if_init()
+{
+ local if_name=$1; shift
+ local vrf_name=$1; shift
+ local addrs=("${@}")
+
+ ip link set dev $if_name master $vrf_name
+ ip link set dev $if_name up
+
+ __addr_add_del $if_name add "${addrs[@]}"
+}
+
+__simple_if_fini()
+{
+ local if_name=$1; shift
+ local addrs=("${@}")
+
+ __addr_add_del $if_name del "${addrs[@]}"
+
+ ip link set dev $if_name down
+ ip link set dev $if_name nomaster
+}
+
+simple_if_init()
+{
+ local if_name=$1
+ local vrf_name
+ local array
+
+ shift
+ vrf_name=v$if_name
+ array=("${@}")
+
+ vrf_create $vrf_name
+ ip link set dev $vrf_name up
+ __simple_if_init $if_name $vrf_name "${array[@]}"
+}
+
+simple_if_fini()
+{
+ local if_name=$1
+ local vrf_name
+ local array
+
+ shift
+ vrf_name=v$if_name
+ array=("${@}")
+
+ __simple_if_fini $if_name "${array[@]}"
+ vrf_destroy $vrf_name
+}
+
+tunnel_create()
+{
+ local name=$1; shift
+ local type=$1; shift
+ local local=$1; shift
+ local remote=$1; shift
+
+ ip link add name $name type $type \
+ local $local remote $remote "$@"
+ ip link set dev $name up
+}
+
+tunnel_destroy()
+{
+ local name=$1; shift
+
+ ip link del dev $name
+}
+
+vlan_create()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local vrf=$1; shift
+ local ips=("${@}")
+ local name=$if_name.$vid
+
+ ip link add name $name link $if_name type vlan id $vid
+ if [ "$vrf" != "" ]; then
+ ip link set dev $name master $vrf
+ fi
+ ip link set dev $name up
+ __addr_add_del $name add "${ips[@]}"
+}
+
+vlan_destroy()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local name=$if_name.$vid
+
+ ip link del dev $name
+}
+
+team_create()
+{
+ local if_name=$1; shift
+ local mode=$1; shift
+
+ require_command $TEAMD
+ $TEAMD -t $if_name -d -c '{"runner": {"name": "'$mode'"}}'
+ for slave in "$@"; do
+ ip link set dev $slave down
+ ip link set dev $slave master $if_name
+ ip link set dev $slave up
+ done
+ ip link set dev $if_name up
+}
+
+team_destroy()
+{
+ local if_name=$1; shift
+
+ $TEAMD -t $if_name -k
+}
+
+master_name_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["master"]'
+}
+
+link_stats_tx_packets_get()
+{
+ local if_name=$1
+
+ ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
+}
+
+tc_rule_stats_get()
+{
+ local dev=$1; shift
+ local pref=$1; shift
+ local dir=$1; shift
+
+ tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \
+ | jq '.[1].options.actions[].stats.packets'
+}
+
+mac_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
+bridge_ageing_time_get()
+{
+ local bridge=$1
+ local ageing_time
+
+ # Need to divide by 100 to convert to seconds.
+ ageing_time=$(ip -j -d link show dev $bridge \
+ | jq '.[]["linkinfo"]["info_data"]["ageing_time"]')
+ echo $((ageing_time / 100))
+}
+
+declare -A SYSCTL_ORIG
+sysctl_set()
+{
+ local key=$1; shift
+ local value=$1; shift
+
+ SYSCTL_ORIG[$key]=$(sysctl -n $key)
+ sysctl -qw $key=$value
+}
+
+sysctl_restore()
+{
+ local key=$1; shift
+
+ sysctl -qw $key=${SYSCTL_ORIG["$key"]}
+}
+
+forwarding_enable()
+{
+ sysctl_set net.ipv4.conf.all.forwarding 1
+ sysctl_set net.ipv6.conf.all.forwarding 1
+}
+
+forwarding_restore()
+{
+ sysctl_restore net.ipv6.conf.all.forwarding
+ sysctl_restore net.ipv4.conf.all.forwarding
+}
+
+tc_offload_check()
+{
+ local num_netifs=${1:-$NUM_NETIFS}
+
+ for ((i = 1; i <= num_netifs; ++i)); do
+ ethtool -k ${NETIFS[p$i]} \
+ | grep "hw-tc-offload: on" &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+trap_install()
+{
+ local dev=$1; shift
+ local direction=$1; shift
+
+ # Some devices may not support or need in-hardware trapping of traffic
+ # (e.g. the veth pairs that this library creates for non-existent
+ # loopbacks). Use continue instead, so that there is a filter in there
+ # (some tests check counters), and so that other filters are still
+ # processed.
+ tc filter add dev $dev $direction pref 1 \
+ flower skip_sw action trap 2>/dev/null \
+ || tc filter add dev $dev $direction pref 1 \
+ flower action continue
+}
+
+trap_uninstall()
+{
+ local dev=$1; shift
+ local direction=$1; shift
+
+ tc filter del dev $dev $direction pref 1 flower
+}
+
+slow_path_trap_install()
+{
+ # For slow-path testing, we need to install a trap to get to
+ # slow path the packets that would otherwise be switched in HW.
+ if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+ trap_install "$@"
+ fi
+}
+
+slow_path_trap_uninstall()
+{
+ if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+ trap_uninstall "$@"
+ fi
+}
+
+__icmp_capture_add_del()
+{
+ local add_del=$1; shift
+ local pref=$1; shift
+ local vsuf=$1; shift
+ local tundev=$1; shift
+ local filter=$1; shift
+
+ tc filter $add_del dev "$tundev" ingress \
+ proto ip$vsuf pref $pref \
+ flower ip_proto icmp$vsuf $filter \
+ action pass
+}
+
+icmp_capture_install()
+{
+ __icmp_capture_add_del add 100 "" "$@"
+}
+
+icmp_capture_uninstall()
+{
+ __icmp_capture_add_del del 100 "" "$@"
+}
+
+icmp6_capture_install()
+{
+ __icmp_capture_add_del add 100 v6 "$@"
+}
+
+icmp6_capture_uninstall()
+{
+ __icmp_capture_add_del del 100 v6 "$@"
+}
+
+__vlan_capture_add_del()
+{
+ local add_del=$1; shift
+ local pref=$1; shift
+ local dev=$1; shift
+ local filter=$1; shift
+
+ tc filter $add_del dev "$dev" ingress \
+ proto 802.1q pref $pref \
+ flower $filter \
+ action pass
+}
+
+vlan_capture_install()
+{
+ __vlan_capture_add_del add 100 "$@"
+}
+
+vlan_capture_uninstall()
+{
+ __vlan_capture_add_del del 100 "$@"
+}
+
+__dscp_capture_add_del()
+{
+ local add_del=$1; shift
+ local dev=$1; shift
+ local base=$1; shift
+ local dscp;
+
+ for prio in {0..7}; do
+ dscp=$((base + prio))
+ __icmp_capture_add_del $add_del $((dscp + 100)) "" $dev \
+ "skip_hw ip_tos $((dscp << 2))"
+ done
+}
+
+dscp_capture_install()
+{
+ local dev=$1; shift
+ local base=$1; shift
+
+ __dscp_capture_add_del add $dev $base
+}
+
+dscp_capture_uninstall()
+{
+ local dev=$1; shift
+ local base=$1; shift
+
+ __dscp_capture_add_del del $dev $base
+}
+
+dscp_fetch_stats()
+{
+ local dev=$1; shift
+ local base=$1; shift
+
+ for prio in {0..7}; do
+ local dscp=$((base + prio))
+ local t=$(tc_rule_stats_get $dev $((dscp + 100)))
+ echo "[$dscp]=$t "
+ done
+}
+
+matchall_sink_create()
+{
+ local dev=$1; shift
+
+ tc qdisc add dev $dev clsact
+ tc filter add dev $dev ingress \
+ pref 10000 \
+ matchall \
+ action drop
+}
+
+tests_run()
+{
+ local current_test
+
+ for current_test in ${TESTS:-$ALL_TESTS}; do
+ $current_test
+ done
+}
+
+multipath_eval()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local packets_rp12=$4
+ local packets_rp13=$5
+ local weights_ratio packets_ratio diff
+
+ RET=0
+
+ if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+ weights_ratio=$(echo "scale=2; $weight_rp12 / $weight_rp13" \
+ | bc -l)
+ else
+ weights_ratio=$(echo "scale=2; $weight_rp13 / $weight_rp12" \
+ | bc -l)
+ fi
+
+ if [[ "$packets_rp12" -eq "0" || "$packets_rp13" -eq "0" ]]; then
+ check_err 1 "Packet difference is 0"
+ log_test "Multipath"
+ log_info "Expected ratio $weights_ratio"
+ return
+ fi
+
+ if [[ "$weight_rp12" -gt "$weight_rp13" ]]; then
+ packets_ratio=$(echo "scale=2; $packets_rp12 / $packets_rp13" \
+ | bc -l)
+ else
+ packets_ratio=$(echo "scale=2; $packets_rp13 / $packets_rp12" \
+ | bc -l)
+ fi
+
+ diff=$(echo $weights_ratio - $packets_ratio | bc -l)
+ diff=${diff#-}
+
+ test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
+ check_err $? "Too large discrepancy between expected and measured ratios"
+ log_test "$desc"
+ log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
+}
+
+##############################################################################
+# Tests
+
+ping_do()
+{
+ local if_name=$1
+ local dip=$2
+ local vrf_name
+
+ vrf_name=$(master_name_get $if_name)
+ ip vrf exec $vrf_name $PING $dip -c 10 -i 0.1 -w 2 &> /dev/null
+}
+
+ping_test()
+{
+ RET=0
+
+ ping_do $1 $2
+ check_err $?
+ log_test "ping"
+}
+
+ping6_do()
+{
+ local if_name=$1
+ local dip=$2
+ local vrf_name
+
+ vrf_name=$(master_name_get $if_name)
+ ip vrf exec $vrf_name $PING6 $dip -c 10 -i 0.1 -w 2 &> /dev/null
+}
+
+ping6_test()
+{
+ RET=0
+
+ ping6_do $1 $2
+ check_err $?
+ log_test "ping6"
+}
+
+learning_test()
+{
+ local bridge=$1
+ local br_port1=$2 # Connected to `host1_if`.
+ local host1_if=$3
+ local host2_if=$4
+ local mac=de:ad:be:ef:13:37
+ local ageing_time
+
+ RET=0
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ # Disable unknown unicast flooding on `br_port1` to make sure
+ # packets are only forwarded through the port after a matching
+ # FDB entry was installed.
+ bridge link set dev $br_port1 flood off
+
+ tc qdisc add dev $host1_if ingress
+ tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host1_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_fail $? "Packet reached second host when should not"
+
+ $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+ sleep 1
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_err $? "Did not find FDB record when should"
+
+ $MZ $host2_if -c 1 -p 64 -b $mac -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host1_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err $? "Packet did not reach second host when should"
+
+ # Wait for 10 seconds after the ageing time to make sure FDB
+ # record was aged-out.
+ ageing_time=$(bridge_ageing_time_get $bridge)
+ sleep $((ageing_time + 10))
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ bridge link set dev $br_port1 learning off
+
+ $MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
+ sleep 1
+
+ bridge -j fdb show br $bridge brport $br_port1 \
+ | jq -e ".[] | select(.mac == \"$mac\")" &> /dev/null
+ check_fail $? "Found FDB record when should not"
+
+ bridge link set dev $br_port1 learning on
+
+ tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host1_if ingress
+
+ bridge link set dev $br_port1 flood on
+
+ log_test "FDB learning"
+}
+
+flood_test_do()
+{
+ local should_flood=$1
+ local mac=$2
+ local ip=$3
+ local host1_if=$4
+ local host2_if=$5
+ local err=0
+
+ # Add an ACL on `host2_if` which will tell us whether the packet
+ # was flooded to it or not.
+ tc qdisc add dev $host2_if ingress
+ tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t ip -q
+ sleep 1
+
+ tc -j -s filter show dev $host2_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ if [[ $? -ne 0 && $should_flood == "true" || \
+ $? -eq 0 && $should_flood == "false" ]]; then
+ err=1
+ fi
+
+ tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
+ tc qdisc del dev $host2_if ingress
+
+ return $err
+}
+
+flood_unicast_test()
+{
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+ local mac=de:ad:be:ef:13:37
+ local ip=192.0.2.100
+
+ RET=0
+
+ bridge link set dev $br_port flood off
+
+ flood_test_do false $mac $ip $host1_if $host2_if
+ check_err $? "Packet flooded when should not"
+
+ bridge link set dev $br_port flood on
+
+ flood_test_do true $mac $ip $host1_if $host2_if
+ check_err $? "Packet was not flooded when should"
+
+ log_test "Unknown unicast flood"
+}
+
+flood_multicast_test()
+{
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+ local mac=01:00:5e:00:00:01
+ local ip=239.0.0.1
+
+ RET=0
+
+ bridge link set dev $br_port mcast_flood off
+
+ flood_test_do false $mac $ip $host1_if $host2_if
+ check_err $? "Packet flooded when should not"
+
+ bridge link set dev $br_port mcast_flood on
+
+ flood_test_do true $mac $ip $host1_if $host2_if
+ check_err $? "Packet was not flooded when should"
+
+ log_test "Unregistered multicast flood"
+}
+
+flood_test()
+{
+ # `br_port` is connected to `host2_if`
+ local br_port=$1
+ local host1_if=$2
+ local host2_if=$3
+
+ flood_unicast_test $br_port $host1_if $host2_if
+ flood_multicast_test $br_port $host1_if $host2_if
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
new file mode 100755
index 000000000..026644360
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the device to mirror to is a
+# gretap or ip6gretap netdevice. Expect that the packets come out encapsulated,
+# and another gretap / ip6gretap netdevice is then capable of decapsulating the
+# traffic. Test that the payload is what is expected (ICMP ping request or
+# reply, depending on test).
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+ test_gretap_mac
+ test_ip6gretap_mac
+ test_two_spans
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_mac()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local what=$1; shift
+
+ case "$direction" in
+ ingress) local src_mac=$(mac_get $h1); local dst_mac=$(mac_get $h2)
+ ;;
+ egress) local src_mac=$(mac_get $h2); local dst_mac=$(mac_get $h1)
+ ;;
+ esac
+
+ RET=0
+
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac"
+
+ mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10
+
+ icmp_capture_uninstall h3-${tundev}
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what: envelope MAC ($tcflags)"
+}
+
+test_two_spans()
+{
+ RET=0
+
+ mirror_install $swp1 ingress gt4 "matchall $tcflags"
+ mirror_install $swp1 egress gt6 "matchall $tcflags"
+ quick_test_span_gre_dir gt4 ingress
+ quick_test_span_gre_dir gt6 egress
+
+ mirror_uninstall $swp1 ingress
+ fail_test_span_gre_dir gt4 ingress
+ quick_test_span_gre_dir gt6 egress
+
+ mirror_install $swp1 ingress gt4 "matchall $tcflags"
+ mirror_uninstall $swp1 egress
+ quick_test_span_gre_dir gt4 ingress
+ fail_test_span_gre_dir gt6 egress
+
+ mirror_uninstall $swp1 ingress
+ log_test "two simultaneously configured mirrors ($tcflags)"
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+test_gretap_mac()
+{
+ test_span_gre_mac gt4 ingress "mirror to gretap"
+ test_span_gre_mac gt4 egress "mirror to gretap"
+}
+
+test_ip6gretap_mac()
+{
+ test_span_gre_mac gt6 ingress "mirror to ip6gretap"
+ test_span_gre_mac gt6 egress "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
new file mode 100755
index 000000000..360ca133b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
@@ -0,0 +1,226 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | +---------------------------------------------------------------------+ |
+# | | OL + gt6 (ip6gretap) + gt4 (gretap) | |
+# | | : loc=2001:db8:2::1 : loc=192.0.2.129 | |
+# | | : rem=2001:db8:2::2 : rem=192.0.2.130 | |
+# | | : ttl=100 : ttl=100 | |
+# | | : tos=inherit : tos=inherit | |
+# | +-------------------------:--|-------------------:--|-----------------+ |
+# | : | : | |
+# | +-------------------------:--|-------------------:--|-----------------+ |
+# | | UL : |,---------------------' | |
+# | | + $swp3 : || : | |
+# | | | 192.0.2.129/28 : vv : | |
+# | | | 2001:db8:2::1/64 : + ul (dummy) : | |
+# | +---|---------------------:----------------------:--------------------+ |
+# +-----|---------------------:----------------------:----------------------+
+# | : :
+# +-----|---------------------:----------------------:----------------------+
+# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) |
+# | 192.0.2.130/28 loc=2001:db8:2::2 loc=192.0.2.130 |
+# | 2001:db8:2::2/64 rem=2001:db8:2::1 rem=192.0.2.129 |
+# | ttl=100 ttl=100 |
+# | tos=inherit tos=inherit |
+# | |
+# +-------------------------------------------------------------------------+
+#
+# This tests mirroring to gretap and ip6gretap configured in an overlay /
+# underlay manner, i.e. with a bound dummy device that marks underlay VRF where
+# the encapsulated packed should be routed.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.130/28 2001:db8:2::2/64
+
+ tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+ ip link set h3-gt4 vrf v$h3
+ matchall_sink_create h3-gt4
+
+ tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+ ip link set h3-gt6 vrf v$h3
+ matchall_sink_create h3-gt6
+}
+
+h3_destroy()
+{
+ tunnel_destroy h3-gt6
+ tunnel_destroy h3-gt4
+
+ simple_if_fini $h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+switch_create()
+{
+ # Bridge between H1 and H2.
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+
+ # Underlay.
+
+ simple_if_init $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+ ip link add name ul type dummy
+ ip link set dev ul master v$swp3
+ ip link set dev ul up
+
+ # Overlay.
+
+ vrf_create vrf-ol
+ ip link set dev vrf-ol up
+
+ tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+ ttl 100 tos inherit dev ul
+ ip link set dev gt4 master vrf-ol
+ ip link set dev gt4 up
+
+ tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+ ttl 100 tos inherit dev ul allow-localremote
+ ip link set dev gt6 master vrf-ol
+ ip link set dev gt6 up
+}
+
+switch_destroy()
+{
+ vrf_destroy vrf-ol
+
+ tunnel_destroy gt6
+ tunnel_destroy gt4
+
+ simple_if_fini $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+ ip link del dev ul
+
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp1 down
+ ip link set dev $swp2 down
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap w/ UL"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap w/ UL"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap w/ UL"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL"
+}
+
+test_all()
+{
+ RET=0
+
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
new file mode 100755
index 000000000..c5095da7f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device without vlan filtering (802.1d).
+#
+# This test uses standard topology for testing mirror-to-gretap. See
+# mirror_gre_topo_lib.sh for more details. The full topology is as follows:
+#
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o---> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 + br1 (802.1q bridge) $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | +---------------------------------------------------------------------+ |
+# | | + br2 (802.1d bridge) | |
+# | | 192.0.2.129/28 | |
+# | | + $swp3 2001:db8:2::1/64 | |
+# | +---|-----------------------------------------------------------------+ |
+# | | ^ ^ |
+# | | + gt6 (ip6gretap) | + gt4 (gretap) | |
+# | | : loc=2001:db8:2::1 | : loc=192.0.2.129 | |
+# | | : rem=2001:db8:2::2 -+ : rem=192.0.2.130 -+ |
+# | | : ttl=100 : ttl=100 |
+# | | : tos=inherit : tos=inherit |
+# +-----|---------------------:----------------------:----------------------+
+# | : :
+# +-----|---------------------:----------------------:----------------------+
+# | H3 + $h3 + h3-gt6(ip6gretap) + h3-gt4 (gretap) |
+# | 192.0.2.130/28 loc=2001:db8:2::2 loc=192.0.2.130 |
+# | 2001:db8:2::2/64 rem=2001:db8:2::1 rem=192.0.2.129 |
+# | ttl=100 ttl=100 |
+# | tos=inherit tos=inherit |
+# +-------------------------------------------------------------------------+
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev br2 up
+
+ ip link set dev $swp3 master br2
+ ip route add 192.0.2.130/32 dev br2
+ ip -6 route add 2001:db8:2::2/128 dev br2
+
+ ip address add dev br2 192.0.2.129/28
+ ip address add dev br2 2001:db8:2::1/64
+
+ ip address add dev $h3 192.0.2.130/28
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $h3 192.0.2.130/28
+ ip link del dev br2
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
new file mode 100755
index 000000000..f8cda822c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device without vlan filtering (802.1d). The device attached to that
+# bridge is a VLAN.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+ test_gretap_stp
+ test_ip6gretap_stp
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev br2 up
+
+ vlan_create $swp3 555
+
+ ip link set dev $swp3.555 master br2
+ ip route add 192.0.2.130/32 dev br2
+ ip -6 route add 2001:db8:2::2/128 dev br2
+
+ ip address add dev br2 192.0.2.129/32
+ ip address add dev br2 2001:db8:2::1/128
+
+ vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vlan_destroy $h3 555
+ ip link del dev br2
+ vlan_destroy $swp3 555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_vlan_match()
+{
+ local tundev=$1; shift
+ local vlan_match=$1; shift
+ local what=$1; shift
+
+ full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+ full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+ test_vlan_match gt4 'skip_hw vlan_id 555 vlan_ethtype ip' \
+ "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+ "mirror to ip6gretap"
+}
+
+test_gretap_stp()
+{
+ # Sometimes after mirror installation, the neighbor's state is not valid.
+ # The reason is that there is no SW datapath activity related to the
+ # neighbor for the remote GRE address. Therefore whether the corresponding
+ # neighbor will be valid is a matter of luck, and the test is thus racy.
+ # Set the neighbor's state to permanent, so it would be always valid.
+ ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+ nud permanent dev br2
+ full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
+}
+
+test_ip6gretap_stp()
+{
+ ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+ nud permanent dev br2
+ full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
new file mode 100755
index 000000000..9ff22f280
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device with vlan filtering (802.1q).
+#
+# This test uses standard topology for testing mirror-to-gretap. See
+# mirror_gre_topo_lib.sh for more details. The full topology is as follows:
+#
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|---------------------------------------------------------------|-----+
+# | SW o---> mirror | |
+# | +---|---------------------------------------------------------------|---+ |
+# | | + $swp1 + br1 (802.1q bridge) $swp2 + | |
+# | | 192.0.2.129/28 | |
+# | | + $swp3 2001:db8:2::1/64 | |
+# | | | vid555 vid555[pvid,untagged] | |
+# | +---|-------------------------------------------------------------------+ |
+# | | ^ ^ |
+# | | + gt6 (ip6gretap) | + gt4 (gretap) | |
+# | | : loc=2001:db8:2::1 | : loc=192.0.2.129 | |
+# | | : rem=2001:db8:2::2 -+ : rem=192.0.2.130 -+ |
+# | | : ttl=100 : ttl=100 |
+# | | : tos=inherit : tos=inherit |
+# +-----|---------------------:------------------------:----------------------+
+# | : :
+# +-----|---------------------:------------------------:----------------------+
+# | H3 + $h3 + h3-gt6(ip6gretap) + h3-gt4 (gretap) |
+# | | loc=2001:db8:2::2 loc=192.0.2.130 |
+# | + $h3.555 rem=2001:db8:2::1 rem=192.0.2.129 |
+# | 192.0.2.130/28 ttl=100 ttl=100 |
+# | 2001:db8:2::2/64 tos=inherit tos=inherit |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+ # Avoid changing br1's PVID while it is operational as a L3 interface.
+ ip link set dev br1 down
+
+ ip link set dev $swp3 master br1
+ bridge vlan add dev br1 vid 555 pvid untagged self
+ ip link set dev br1 up
+ ip address add dev br1 192.0.2.129/28
+ ip address add dev br1 2001:db8:2::1/64
+
+ ip -4 route add 192.0.2.130/32 dev br1
+ ip -6 route add 2001:db8:2::2/128 dev br1
+
+ vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+ bridge vlan add dev $swp3 vid 555
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set dev $swp3 nomaster
+ vlan_destroy $h3 555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+ full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+tests()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+tests
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ tests
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
new file mode 100755
index 000000000..61844caf6
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device with vlan filtering (802.1q), and the egress device is a team
+# device.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1.333 | | $h1.555 + |
+# | | 192.0.2.1/28 | | 192.0.2.18/28 | |
+# +-----|----------------+ +----------------|-----+
+# | $h1 |
+# +--------------------------------+------------------------------+
+# |
+# +--------------------------------------|------------------------------------+
+# | SW o---> mirror |
+# | | |
+# | +--------------------------------+------------------------------+ |
+# | | $swp1 | |
+# | + $swp1.333 $swp1.555 + |
+# | 192.0.2.2/28 192.0.2.17/28 |
+# | |
+# | +-----------------------------------------------------------------------+ |
+# | | BR1 (802.1q) | |
+# | | + lag (team) 192.0.2.129/28 | |
+# | | / \ 2001:db8:2::1/64 | |
+# | +---/---\---------------------------------------------------------------+ |
+# | / \ ^ |
+# | | \ + gt4 (gretap) | |
+# | | \ loc=192.0.2.129 | |
+# | | \ rem=192.0.2.130 -+ |
+# | | \ ttl=100 |
+# | | \ tos=inherit |
+# | | \ |
+# | | \_________________________________ |
+# | | \ |
+# | + $swp3 + $swp4 |
+# +---|------------------------------------------------|----------------------+
+# | |
+# +---|----------------------+ +---|----------------------+
+# | + $h3 H3 | | + $h4 H4 |
+# | 192.0.2.130/28 | | 192.0.2.130/28 |
+# | 2001:db8:2::2/64 | | 2001:db8:2::2/64 |
+# +--------------------------+ +--------------------------+
+
+ALL_TESTS="
+ test_mirror_gretap_first
+ test_mirror_gretap_second
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+require_command $ARPING
+
+vlan_host_create()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local vrf_name=$1; shift
+ local ips=("${@}")
+
+ vrf_create $vrf_name
+ ip link set dev $vrf_name up
+ vlan_create $if_name $vid $vrf_name "${ips[@]}"
+}
+
+vlan_host_destroy()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local vrf_name=$1; shift
+
+ vlan_destroy $if_name $vid
+ ip link set dev $vrf_name down
+ vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+ vlan_host_create $h1 333 vrf-h1 192.0.2.1/28
+ ip -4 route add 192.0.2.16/28 vrf vrf-h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip -4 route del 192.0.2.16/28 vrf vrf-h1
+ vlan_host_destroy $h1 333 vrf-h1
+}
+
+h2_create()
+{
+ vlan_host_create $h1 555 vrf-h2 192.0.2.18/28
+ ip -4 route add 192.0.2.0/28 vrf vrf-h2 nexthop via 192.0.2.17
+}
+
+h2_destroy()
+{
+ ip -4 route del 192.0.2.0/28 vrf vrf-h2
+ vlan_host_destroy $h1 555 vrf-h2
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.130/28
+ tc qdisc add dev $h3 clsact
+}
+
+h3_destroy()
+{
+ tc qdisc del dev $h3 clsact
+ simple_if_fini $h3 192.0.2.130/28
+}
+
+h4_create()
+{
+ simple_if_init $h4 192.0.2.130/28
+ tc qdisc add dev $h4 clsact
+}
+
+h4_destroy()
+{
+ tc qdisc del dev $h4 clsact
+ simple_if_fini $h4 192.0.2.130/28
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+ tc qdisc add dev $swp1 clsact
+ vlan_create $swp1 333 "" 192.0.2.2/28
+ vlan_create $swp1 555 "" 192.0.2.17/28
+
+ tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+ ttl 100 tos inherit
+
+ ip link set dev $swp3 up
+ ip link set dev $swp4 up
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+ __addr_add_del br1 add 192.0.2.129/32
+ ip -4 route add 192.0.2.130/32 dev br1
+
+ team_create lag loadbalance $swp3 $swp4
+ ip link set dev lag master br1
+}
+
+switch_destroy()
+{
+ ip link set dev lag nomaster
+ team_destroy lag
+
+ ip -4 route del 192.0.2.130/32 dev br1
+ __addr_add_del br1 del 192.0.2.129/32
+ ip link set dev br1 down
+ ip link del dev br1
+
+ ip link set dev $swp4 down
+ ip link set dev $swp3 down
+
+ tunnel_destroy gt4
+
+ vlan_destroy $swp1 555
+ vlan_destroy $swp1 333
+ tc qdisc del dev $swp1 clsact
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp3=${NETIFS[p3]}
+ h3=${NETIFS[p4]}
+
+ swp4=${NETIFS[p5]}
+ h4=${NETIFS[p6]}
+
+ vrf_prepare
+
+ ip link set dev $h1 up
+ h1_create
+ h2_create
+ h3_create
+ h4_create
+ switch_create
+
+ trap_install $h3 ingress
+ trap_install $h4 ingress
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ trap_uninstall $h4 ingress
+ trap_uninstall $h3 ingress
+
+ switch_destroy
+ h4_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+ ip link set dev $h1 down
+
+ vrf_cleanup
+}
+
+test_lag_slave()
+{
+ local host_dev=$1; shift
+ local up_dev=$1; shift
+ local down_dev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress gt4 \
+ "proto 802.1q flower vlan_id 333 $tcflags"
+
+ # Test connectivity through $up_dev when $down_dev is set down.
+ ip link set dev $down_dev down
+ setup_wait_dev $up_dev
+ setup_wait_dev $host_dev
+ $ARPING -I br1 192.0.2.130 -qfc 1
+ sleep 2
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 10
+
+ # Test lack of connectivity when both slaves are down.
+ ip link set dev $up_dev down
+ sleep 2
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0
+
+ ip link set dev $up_dev up
+ ip link set dev $down_dev up
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what ($tcflags)"
+}
+
+test_mirror_gretap_first()
+{
+ test_lag_slave $h3 $swp3 $swp4 "mirror to gretap: LAG first slave"
+}
+
+test_mirror_gretap_second()
+{
+ test_lag_slave $h4 $swp4 $swp3 "mirror to gretap: LAG second slave"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
new file mode 100755
index 000000000..135902aa8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -0,0 +1,271 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test how mirrors to gretap and ip6gretap react to changes to relevant
+# configuration.
+
+ALL_TESTS="
+ test_ttl
+ test_tun_up
+ test_egress_up
+ test_remote_ip
+ test_tun_del
+ test_route_del
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ # This test downs $swp3, which deletes the configured IPv6 address
+ # unless this sysctl is set.
+ sysctl_set net.ipv6.conf.$swp3.keep_addr_on_down 1
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ sysctl_restore net.ipv6.conf.$swp3.keep_addr_on_down
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_ttl()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local prot=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ tc filter add dev $h3 ingress pref 77 prot $prot \
+ flower ip_ttl 50 action pass
+
+ mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0
+
+ ip link set dev $tundev type $type ttl 50
+ mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+
+ ip link set dev $tundev type $type ttl 100
+ tc filter del dev $h3 ingress pref 77
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: TTL change ($tcflags)"
+}
+
+test_span_gre_tun_up()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $tundev down
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ ip link set dev $tundev up
+
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: tunnel down/up ($tcflags)"
+}
+
+test_span_gre_egress_up()
+{
+ local tundev=$1; shift
+ local remote_ip=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $swp3 down
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ # After setting the device up, wait for neighbor to get resolved so that
+ # we can expect mirroring to work.
+ ip link set dev $swp3 up
+ setup_wait_dev $swp3
+ ping -c 1 -I $swp3 $remote_ip &>/dev/null
+
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: egress down/up ($tcflags)"
+}
+
+test_span_gre_remote_ip()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local correct_ip=$1; shift
+ local wrong_ip=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip link set dev $tundev type $type remote $wrong_ip
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+
+ ip link set dev $tundev type $type remote $correct_ip
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: remote address change ($tcflags)"
+}
+
+test_span_gre_tun_del()
+{
+ local tundev=$1; shift
+ local type=$1; shift
+ local flags=$1; shift
+ local local_ip=$1; shift
+ local remote_ip=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+ ip link del dev $tundev
+ fail_test_span_gre_dir $tundev ingress
+
+ tunnel_create $tundev $type $local_ip $remote_ip \
+ ttl 100 tos inherit $flags
+
+ # Recreating the tunnel doesn't reestablish mirroring, so reinstall it
+ # and verify it works for the follow-up tests.
+ mirror_uninstall $swp1 ingress
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: tunnel deleted ($tcflags)"
+}
+
+test_span_gre_route_del()
+{
+ local tundev=$1; shift
+ local edev=$1; shift
+ local route=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ ip route del $route dev $edev
+ fail_test_span_gre_dir $tundev ingress
+
+ ip route add $route dev $edev
+ quick_test_span_gre_dir $tundev ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: underlay route removal ($tcflags)"
+}
+
+test_ttl()
+{
+ test_span_gre_ttl gt4 gretap ip "mirror to gretap"
+ test_span_gre_ttl gt6 ip6gretap ipv6 "mirror to ip6gretap"
+}
+
+test_tun_up()
+{
+ test_span_gre_tun_up gt4 "mirror to gretap"
+ test_span_gre_tun_up gt6 "mirror to ip6gretap"
+}
+
+test_egress_up()
+{
+ test_span_gre_egress_up gt4 192.0.2.130 "mirror to gretap"
+ test_span_gre_egress_up gt6 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_remote_ip()
+{
+ test_span_gre_remote_ip gt4 gretap 192.0.2.130 192.0.2.132 "mirror to gretap"
+ test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap"
+}
+
+test_tun_del()
+{
+ test_span_gre_tun_del gt4 gretap "" \
+ 192.0.2.129 192.0.2.130 "mirror to gretap"
+ test_span_gre_tun_del gt6 ip6gretap allow-localremote \
+ 2001:db8:2::1 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_route_del()
+{
+ test_span_gre_route_del gt4 $swp3 192.0.2.128/28 "mirror to gretap"
+ test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
new file mode 100755
index 000000000..12914f406
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# This tests flower-triggered mirroring to gretap and ip6gretap netdevices. The
+# interfaces on H1 and H2 have two addresses each. Flower match on one of the
+# addresses is configured with mirror action. It is expected that when pinging
+# this address, mirroring takes place, whereas when pinging the other one,
+# there's no mirroring.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+
+ ip address add dev $h1 192.0.2.3/28
+ ip address add dev $h2 192.0.2.4/28
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h2 192.0.2.4/28
+ ip address del dev $h1 192.0.2.3/28
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_dir_acl()
+{
+ test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+fail_test_span_gre_dir_acl()
+{
+ fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+full_test_span_gre_dir_acl()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local match_dip=$1; shift
+ local what=$1; shift
+
+ mirror_install $swp1 $direction $tundev \
+ "protocol ip flower $tcflags dst_ip $match_dip"
+ fail_test_span_gre_dir $tundev $direction
+ test_span_gre_dir_acl "$tundev" "$direction" \
+ "$forward_type" "$backward_type"
+ mirror_uninstall $swp1 $direction
+
+ # Test lack of mirroring after ACL mirror is uninstalled.
+ fail_test_span_gre_dir_acl "$tundev" "$direction"
+
+ log_test "$direction $what ($tcflags)"
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir_acl gt4 ingress 8 0 192.0.2.4 "ACL mirror to gretap"
+ full_test_span_gre_dir_acl gt4 egress 0 8 192.0.2.3 "ACL mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ full_test_span_gre_dir_acl gt6 ingress 8 0 192.0.2.4 "ACL mirror to ip6gretap"
+ full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap"
+}
+
+test_all()
+{
+ RET=0
+
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
new file mode 100755
index 000000000..9edf4cb10
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
@@ -0,0 +1,285 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# team device.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1.333 | | $h1.555 + |
+# | | 192.0.2.1/28 | | 192.0.2.18/28 | |
+# +----|-----------------+ +----------------|-----+
+# | $h1 |
+# +---------------------------------+------------------------------+
+# |
+# +--------------------------------------|------------------------------------+
+# | SW o---> mirror |
+# | | |
+# | +----------------------------------+------------------------------+ |
+# | | $swp1 | |
+# | + $swp1.333 $swp1.555 + |
+# | 192.0.2.2/28 192.0.2.17/28 |
+# | |
+# | |
+# | + gt4 (gretap) ,-> + lag1 (team) |
+# | loc=192.0.2.129 | | 192.0.2.129/28 |
+# | rem=192.0.2.130 --' | |
+# | ttl=100 | |
+# | tos=inherit | |
+# | _____________________|______________________ |
+# | / \ |
+# | / \ |
+# | + $swp3 + $swp4 |
+# +---|------------------------------------------------|----------------------+
+# | |
+# +---|------------------------------------------------|----------------------+
+# | + $h3 + $h4 H3 |
+# | \ / |
+# | \____________________________________________/ |
+# | | |
+# | + lag2 (team) |
+# | 192.0.2.130/28 |
+# | |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ test_mirror_gretap_first
+ test_mirror_gretap_second
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+require_command $ARPING
+
+vlan_host_create()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local vrf_name=$1; shift
+ local ips=("${@}")
+
+ vrf_create $vrf_name
+ ip link set dev $vrf_name up
+ vlan_create $if_name $vid $vrf_name "${ips[@]}"
+}
+
+vlan_host_destroy()
+{
+ local if_name=$1; shift
+ local vid=$1; shift
+ local vrf_name=$1; shift
+
+ vlan_destroy $if_name $vid
+ ip link set dev $vrf_name down
+ vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+ vlan_host_create $h1 333 vrf-h1 192.0.2.1/28
+ ip -4 route add 192.0.2.16/28 vrf vrf-h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip -4 route del 192.0.2.16/28 vrf vrf-h1
+ vlan_host_destroy $h1 333 vrf-h1
+}
+
+h2_create()
+{
+ vlan_host_create $h1 555 vrf-h2 192.0.2.18/28
+ ip -4 route add 192.0.2.0/28 vrf vrf-h2 nexthop via 192.0.2.17
+}
+
+h2_destroy()
+{
+ ip -4 route del 192.0.2.0/28 vrf vrf-h2
+ vlan_host_destroy $h1 555 vrf-h2
+}
+
+h3_create_team()
+{
+ team_create lag2 lacp $h3 $h4
+ __simple_if_init lag2 vrf-h3 192.0.2.130/32
+ ip -4 route add vrf vrf-h3 192.0.2.129/32 dev lag2
+}
+
+h3_destroy_team()
+{
+ ip -4 route del vrf vrf-h3 192.0.2.129/32 dev lag2
+ __simple_if_fini lag2 192.0.2.130/32
+ team_destroy lag2
+
+ ip link set dev $h3 down
+ ip link set dev $h4 down
+}
+
+h3_create()
+{
+ vrf_create vrf-h3
+ ip link set dev vrf-h3 up
+ tc qdisc add dev $h3 clsact
+ tc qdisc add dev $h4 clsact
+ h3_create_team
+}
+
+h3_destroy()
+{
+ h3_destroy_team
+ tc qdisc del dev $h4 clsact
+ tc qdisc del dev $h3 clsact
+ ip link set dev vrf-h3 down
+ vrf_destroy vrf-h3
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+ tc qdisc add dev $swp1 clsact
+ vlan_create $swp1 333 "" 192.0.2.2/28
+ vlan_create $swp1 555 "" 192.0.2.17/28
+
+ tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+ ttl 100 tos inherit
+
+ ip link set dev $swp3 up
+ ip link set dev $swp4 up
+ team_create lag1 lacp $swp3 $swp4
+ __addr_add_del lag1 add 192.0.2.129/32
+ ip -4 route add 192.0.2.130/32 dev lag1
+}
+
+switch_destroy()
+{
+ ip -4 route del 192.0.2.130/32 dev lag1
+ __addr_add_del lag1 del 192.0.2.129/32
+ team_destroy lag1
+
+ ip link set dev $swp4 down
+ ip link set dev $swp3 down
+
+ tunnel_destroy gt4
+
+ vlan_destroy $swp1 555
+ vlan_destroy $swp1 333
+ tc qdisc del dev $swp1 clsact
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp3=${NETIFS[p3]}
+ h3=${NETIFS[p4]}
+
+ swp4=${NETIFS[p5]}
+ h4=${NETIFS[p6]}
+
+ vrf_prepare
+
+ ip link set dev $h1 up
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+
+ trap_install $h3 ingress
+ trap_install $h4 ingress
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ trap_uninstall $h4 ingress
+ trap_uninstall $h3 ingress
+
+ switch_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+ ip link set dev $h1 down
+
+ vrf_cleanup
+}
+
+test_lag_slave()
+{
+ local up_dev=$1; shift
+ local down_dev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress gt4 \
+ "proto 802.1q flower vlan_id 333 $tcflags"
+
+ # Move $down_dev away from the team. That will prompt change in
+ # txability of the connected device, without changing its upness. The
+ # driver should notice the txability change and move the traffic to the
+ # other slave.
+ ip link set dev $down_dev nomaster
+ sleep 2
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $up_dev 1 10
+
+ # Test lack of connectivity when neither slave is txable.
+ ip link set dev $up_dev nomaster
+ sleep 2
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0
+ mirror_uninstall $swp1 ingress
+
+ # Recreate H3's team device, because mlxsw, which this test is
+ # predominantly mean to test, requires a bottom-up construction and
+ # doesn't allow enslavement to a device that already has an upper.
+ h3_destroy_team
+ h3_create_team
+ # Wait for ${h,swp}{3,4}.
+ setup_wait
+
+ log_test "$what ($tcflags)"
+}
+
+test_mirror_gretap_first()
+{
+ test_lag_slave $h3 $h4 "mirror to gretap: LAG first slave"
+}
+
+test_mirror_gretap_second()
+{
+ test_lag_slave $h4 $h3 "mirror to gretap: LAG second slave"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
new file mode 100644
index 000000000..fac486178
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: GPL-2.0
+
+source "$relative_path/mirror_lib.sh"
+
+quick_test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+
+ do_test_span_dir_ips 10 h3-$tundev "$@"
+}
+
+fail_test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+
+ do_test_span_dir_ips 0 h3-$tundev "$@"
+}
+
+test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+
+ test_span_dir_ips h3-$tundev "$@"
+}
+
+full_test_span_gre_dir_ips()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local what=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+ "$backward_type" "$ip1" "$ip2"
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what ($tcflags)"
+}
+
+full_test_span_gre_dir_vlan_ips()
+{
+ local tundev=$1; shift
+ local direction=$1; shift
+ local vlan_match=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local what=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+
+ test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+ "$backward_type" "$ip1" "$ip2"
+
+ tc filter add dev $h3 ingress pref 77 prot 802.1q \
+ flower $vlan_match \
+ action pass
+ mirror_test v$h1 $ip1 $ip2 $h3 77 10
+ tc filter del dev $h3 ingress pref 77
+
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what ($tcflags)"
+}
+
+quick_test_span_gre_dir()
+{
+ quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_gre_dir()
+{
+ fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_gre_dir()
+{
+ test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir()
+{
+ full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir_vlan()
+{
+ full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_stp_ips()
+{
+ local tundev=$1; shift
+ local nbpdev=$1; shift
+ local what=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+ local h3mac=$(mac_get $h3)
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+ bridge link set dev $nbpdev state disabled
+ sleep 1
+ fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+ bridge link set dev $nbpdev state forwarding
+ sleep 1
+ quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: STP state ($tcflags)"
+}
+
+full_test_span_gre_stp()
+{
+ full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
new file mode 100755
index 000000000..fc0508e40
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for mirroring to gretap and ip6gretap, such that the neighbor entry for
+# the tunnel remote address has invalid address at the time that the mirroring
+# is set up. Later on, the neighbor is deleted and it is expected to be
+# reinitialized using the usual ARP process, and the mirroring offload updated.
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip address add dev $swp3 192.0.2.129/28
+ ip address add dev $h3 192.0.2.130/28
+
+ ip address add dev $swp3 2001:db8:2::1/64
+ ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip address del dev $h3 2001:db8:2::2/64
+ ip address del dev $swp3 2001:db8:2::1/64
+
+ ip address del dev $h3 192.0.2.130/28
+ ip address del dev $swp3 192.0.2.129/28
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_span_gre_neigh()
+{
+ local addr=$1; shift
+ local tundev=$1; shift
+ local direction=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55
+ mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ fail_test_span_gre_dir $tundev ingress
+ ip neigh del dev $swp3 $addr
+ quick_test_span_gre_dir $tundev ingress
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction $what: neighbor change ($tcflags)"
+}
+
+test_gretap()
+{
+ test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap"
+ test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap"
+ test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
new file mode 100755
index 000000000..6f9ef1820
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test that gretap and ip6gretap mirroring works when the other tunnel endpoint
+# is reachable through a next-hop route (as opposed to directly-attached route).
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.$h3.rp_filter 0
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ sysctl_set net.ipv4.conf.v$h3.rp_filter 0
+
+ ip address add dev $swp3 192.0.2.161/28
+ ip address add dev $h3 192.0.2.162/28
+ ip address add dev gt4 192.0.2.129/32
+ ip address add dev h3-gt4 192.0.2.130/32
+
+ # IPv6 route can't be added after address. Such routes are rejected due
+ # to the gateway address having been configured on the local system. It
+ # works the other way around though.
+ ip address add dev $swp3 2001:db8:4::1/64
+ ip -6 route add 2001:db8:2::2/128 via 2001:db8:4::2
+ ip address add dev $h3 2001:db8:4::2/64
+ ip address add dev gt6 2001:db8:2::1
+ ip address add dev h3-gt6 2001:db8:2::2
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip -6 route del 2001:db8:2::2/128 via 2001:db8:4::2
+ ip address del dev $h3 2001:db8:4::2/64
+ ip address del dev $swp3 2001:db8:4::1/64
+
+ ip address del dev $h3 192.0.2.162/28
+ ip address del dev $swp3 192.0.2.161/28
+
+ sysctl_restore net.ipv4.conf.v$h3.rp_filter 0
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+
+ sysctl_restore net.ipv4.conf.$h3.rp_filter
+ sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+test_gretap()
+{
+ RET=0
+ mirror_install $swp1 ingress gt4 "matchall $tcflags"
+
+ # For IPv4, test that there's no mirroring without the route directing
+ # the traffic to tunnel remote address. Then add it and test that
+ # mirroring starts. For IPv6 we can't test this due to the limitation
+ # that routes for locally-specified IPv6 addresses can't be added.
+ fail_test_span_gre_dir gt4 ingress
+
+ ip route add 192.0.2.130/32 via 192.0.2.162
+ quick_test_span_gre_dir gt4 ingress
+ ip route del 192.0.2.130/32 via 192.0.2.162
+
+ mirror_uninstall $swp1 ingress
+ log_test "mirror to gre with next-hop remote ($tcflags)"
+}
+
+test_ip6gretap()
+{
+ RET=0
+
+ mirror_install $swp1 ingress gt6 "matchall $tcflags"
+ quick_test_span_gre_dir gt6 ingress
+ mirror_uninstall $swp1 ingress
+
+ log_test "mirror to ip6gre with next-hop remote ($tcflags)"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
new file mode 100644
index 000000000..39c03e286
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring to gretap and ip6gretap
+# netdevices. The tests that use it tweak it in one way or another--importantly,
+# $swp3 and $h3 need to have addresses set up.
+#
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | + $swp3 + gt6 (ip6gretap) + gt4 (gretap) |
+# | | : loc=2001:db8:2::1 : loc=192.0.2.129 |
+# | | : rem=2001:db8:2::2 : rem=192.0.2.130 |
+# | | : ttl=100 : ttl=100 |
+# | | : tos=inherit : tos=inherit |
+# | | : : |
+# +-----|---------------------:----------------------:----------------------+
+# | : :
+# +-----|---------------------:----------------------:----------------------+
+# | H3 + $h3 + h3-gt6 (ip6gretap) + h3-gt4 (gretap) |
+# | loc=2001:db8:2::2 loc=192.0.2.130 |
+# | rem=2001:db8:2::1 rem=192.0.2.129 |
+# | ttl=100 ttl=100 |
+# | tos=inherit tos=inherit |
+# | |
+# +-------------------------------------------------------------------------+
+
+source "$relative_path/mirror_topo_lib.sh"
+
+mirror_gre_topo_h3_create()
+{
+ mirror_topo_h3_create
+
+ tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+ ip link set h3-gt4 vrf v$h3
+ matchall_sink_create h3-gt4
+
+ tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+ ip link set h3-gt6 vrf v$h3
+ matchall_sink_create h3-gt6
+}
+
+mirror_gre_topo_h3_destroy()
+{
+ tunnel_destroy h3-gt6
+ tunnel_destroy h3-gt4
+
+ mirror_topo_h3_destroy
+}
+
+mirror_gre_topo_switch_create()
+{
+ mirror_topo_switch_create
+
+ tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+ ttl 100 tos inherit
+
+ tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+ ttl 100 tos inherit allow-localremote
+}
+
+mirror_gre_topo_switch_destroy()
+{
+ tunnel_destroy gt6
+ tunnel_destroy gt4
+
+ mirror_topo_switch_destroy
+}
+
+mirror_gre_topo_create()
+{
+ mirror_topo_h1_create
+ mirror_topo_h2_create
+ mirror_gre_topo_h3_create
+
+ mirror_gre_topo_switch_create
+}
+
+mirror_gre_topo_destroy()
+{
+ mirror_gre_topo_switch_destroy
+
+ mirror_gre_topo_h3_destroy
+ mirror_topo_h2_destroy
+ mirror_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
new file mode 100755
index 000000000..88cecdb9a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a gretap netdevice
+# whose underlay route points at a vlan device.
+
+ALL_TESTS="
+ test_gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ ip link add name $swp3.555 link $swp3 type vlan id 555
+ ip address add dev $swp3.555 192.0.2.129/32
+ ip address add dev $swp3.555 2001:db8:2::1/128
+ ip link set dev $swp3.555 up
+
+ ip route add 192.0.2.130/32 dev $swp3.555
+ ip -6 route add 2001:db8:2::2/128 dev $swp3.555
+
+ ip link add name $h3.555 link $h3 type vlan id 555
+ ip link set dev $h3.555 master v$h3
+ ip address add dev $h3.555 192.0.2.130/28
+ ip address add dev $h3.555 2001:db8:2::2/64
+ ip link set dev $h3.555 up
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link del dev $h3.555
+ ip link del dev $swp3.555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+}
+
+test_gretap()
+{
+ full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+ full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
new file mode 100755
index 000000000..204b25f13
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# vlan device on top of a bridge device with vlan filtering (802.1q).
+
+ALL_TESTS="
+ test_gretap
+ test_ip6gretap
+ test_gretap_forbidden_cpu
+ test_ip6gretap_forbidden_cpu
+ test_gretap_forbidden_egress
+ test_ip6gretap_forbidden_egress
+ test_gretap_untagged_egress
+ test_ip6gretap_untagged_egress
+ test_gretap_fdb_roaming
+ test_ip6gretap_fdb_roaming
+ test_gretap_stp
+ test_ip6gretap_stp
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+require_command $ARPING
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ # gt4's remote address is at $h3.555, not $h3. Thus the packets arriving
+ # directly to $h3 for test_gretap_untagged_egress() are rejected by
+ # rp_filter and the test spuriously fails.
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.$h3.rp_filter 0
+
+ vrf_prepare
+ mirror_gre_topo_create
+
+ vlan_create br1 555 "" 192.0.2.129/32 2001:db8:2::1/128
+ bridge vlan add dev br1 vid 555 self
+ ip route rep 192.0.2.130/32 dev br1.555
+ ip -6 route rep 2001:db8:2::2/128 dev br1.555
+
+ vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+
+ ip link set dev $swp3 master br1
+ bridge vlan add dev $swp3 vid 555
+ bridge vlan add dev $swp2 vid 555
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp3 nomaster
+ vlan_destroy $h3 555
+ vlan_destroy br1 555
+
+ mirror_gre_topo_destroy
+ vrf_cleanup
+
+ sysctl_restore net.ipv4.conf.$h3.rp_filter
+ sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+test_vlan_match()
+{
+ local tundev=$1; shift
+ local vlan_match=$1; shift
+ local what=$1; shift
+
+ full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+ full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+ test_vlan_match gt4 'skip_hw vlan_id 555 vlan_ethtype ip' \
+ "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+ test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+ "mirror to ip6gretap"
+}
+
+test_span_gre_forbidden_cpu()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ # Run the pass-test first, to prime neighbor table.
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ # Now forbid the VLAN at the bridge and see it fail.
+ bridge vlan del dev br1 vid 555 self
+ sleep 1
+ fail_test_span_gre_dir $tundev ingress
+
+ bridge vlan add dev br1 vid 555 self
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: vlan forbidden at a bridge ($tcflags)"
+}
+
+test_gretap_forbidden_cpu()
+{
+ test_span_gre_forbidden_cpu gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden_cpu()
+{
+ test_span_gre_forbidden_cpu gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_forbidden_egress()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ bridge vlan del dev $swp3 vid 555
+ sleep 1
+ fail_test_span_gre_dir $tundev ingress
+
+ bridge vlan add dev $swp3 vid 555
+ # Re-prime FDB
+ $ARPING -I br1.555 192.0.2.130 -fqc 1
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: vlan forbidden at a bridge egress ($tcflags)"
+}
+
+test_gretap_forbidden_egress()
+{
+ test_span_gre_forbidden_egress gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden_egress()
+{
+ test_span_gre_forbidden_egress gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_untagged_egress()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+
+ quick_test_span_gre_dir $tundev ingress
+ quick_test_span_vlan_dir $h3 555 ingress
+
+ bridge vlan add dev $swp3 vid 555 pvid untagged
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+ fail_test_span_vlan_dir $h3 555 ingress
+
+ bridge vlan add dev $swp3 vid 555
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+ quick_test_span_vlan_dir $h3 555 ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: vlan untagged at a bridge egress ($tcflags)"
+}
+
+test_gretap_untagged_egress()
+{
+ test_span_gre_untagged_egress gt4 "mirror to gretap"
+}
+
+test_ip6gretap_untagged_egress()
+{
+ test_span_gre_untagged_egress gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_fdb_roaming()
+{
+ local tundev=$1; shift
+ local what=$1; shift
+ local h3mac=$(mac_get $h3)
+
+ RET=0
+
+ mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ quick_test_span_gre_dir $tundev ingress
+
+ bridge fdb del dev $swp3 $h3mac vlan 555 master
+ bridge fdb add dev $swp2 $h3mac vlan 555 master
+ sleep 1
+ fail_test_span_gre_dir $tundev ingress
+
+ bridge fdb del dev $swp2 $h3mac vlan 555 master
+ # Re-prime FDB
+ $ARPING -I br1.555 192.0.2.130 -fqc 1
+ sleep 1
+ quick_test_span_gre_dir $tundev ingress
+
+ mirror_uninstall $swp1 ingress
+
+ log_test "$what: MAC roaming ($tcflags)"
+}
+
+test_gretap_fdb_roaming()
+{
+ test_span_gre_fdb_roaming gt4 "mirror to gretap"
+}
+
+test_ip6gretap_fdb_roaming()
+{
+ test_span_gre_fdb_roaming gt6 "mirror to ip6gretap"
+}
+
+test_gretap_stp()
+{
+ full_test_span_gre_stp gt4 $swp3 "mirror to gretap"
+}
+
+test_ip6gretap_stp()
+{
+ full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap"
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+
+ tests_run
+
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
new file mode 100644
index 000000000..07991e102
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: GPL-2.0
+
+mirror_install()
+{
+ local from_dev=$1; shift
+ local direction=$1; shift
+ local to_dev=$1; shift
+ local filter=$1; shift
+
+ tc filter add dev $from_dev $direction \
+ pref 1000 $filter \
+ action mirred egress mirror dev $to_dev
+}
+
+mirror_uninstall()
+{
+ local from_dev=$1; shift
+ local direction=$1; shift
+
+ tc filter del dev $swp1 $direction pref 1000
+}
+
+mirror_test()
+{
+ local vrf_name=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dev=$1; shift
+ local pref=$1; shift
+ local expect=$1; shift
+
+ local t0=$(tc_rule_stats_get $dev $pref)
+ ip vrf exec $vrf_name \
+ ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.1 -w 2 &> /dev/null
+ local t1=$(tc_rule_stats_get $dev $pref)
+ local delta=$((t1 - t0))
+ # Tolerate a couple stray extra packets.
+ ((expect <= delta && delta <= expect + 2))
+ check_err $? "Expected to capture $expect packets, got $delta."
+}
+
+do_test_span_dir_ips()
+{
+ local expect=$1; shift
+ local dev=$1; shift
+ local direction=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ icmp_capture_install $dev
+ mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+ mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+ icmp_capture_uninstall $dev
+}
+
+quick_test_span_dir_ips()
+{
+ do_test_span_dir_ips 10 "$@"
+}
+
+fail_test_span_dir_ips()
+{
+ do_test_span_dir_ips 0 "$@"
+}
+
+test_span_dir_ips()
+{
+ local dev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2"
+
+ icmp_capture_install $dev "type $forward_type"
+ mirror_test v$h1 $ip1 $ip2 $dev 100 10
+ icmp_capture_uninstall $dev
+
+ icmp_capture_install $dev "type $backward_type"
+ mirror_test v$h2 $ip2 $ip1 $dev 100 10
+ icmp_capture_uninstall $dev
+}
+
+fail_test_span_dir()
+{
+ fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_dir()
+{
+ test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+do_test_span_vlan_dir_ips()
+{
+ local expect=$1; shift
+ local dev=$1; shift
+ local vid=$1; shift
+ local direction=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ # Install the capture as skip_hw to avoid double-counting of packets.
+ # The traffic is meant for local box anyway, so will be trapped to
+ # kernel.
+ vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype ip"
+ mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+ mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+ vlan_capture_uninstall $dev
+}
+
+quick_test_span_vlan_dir_ips()
+{
+ do_test_span_vlan_dir_ips 10 "$@"
+}
+
+fail_test_span_vlan_dir_ips()
+{
+ do_test_span_vlan_dir_ips 0 "$@"
+}
+
+quick_test_span_vlan_dir()
+{
+ quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_vlan_dir()
+{
+ fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
new file mode 100644
index 000000000..04979e596
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring. The tests that use it
+# tweak it in one way or another--typically add more devices to the topology.
+#
+# +---------------------+ +---------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +-----|---------------+ +---------------|-----+
+# | |
+# +-----|-------------------------------------------------------------|-----+
+# | SW o--> mirror | |
+# | +---|-------------------------------------------------------------|---+ |
+# | | + $swp1 BR $swp2 + | |
+# | +---------------------------------------------------------------------+ |
+# | |
+# | + $swp3 |
+# +-----|-------------------------------------------------------------------+
+# |
+# +-----|-------------------------------------------------------------------+
+# | H3 + $h3 |
+# | |
+# +-------------------------------------------------------------------------+
+
+mirror_topo_h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+mirror_topo_h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+mirror_topo_h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+}
+
+mirror_topo_h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+mirror_topo_h3_create()
+{
+ simple_if_init $h3
+ tc qdisc add dev $h3 clsact
+}
+
+mirror_topo_h3_destroy()
+{
+ tc qdisc del dev $h3 clsact
+ simple_if_fini $h3
+}
+
+mirror_topo_switch_create()
+{
+ ip link set dev $swp3 up
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+}
+
+mirror_topo_switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp1 down
+ ip link set dev $swp2 down
+ ip link del dev br1
+
+ ip link set dev $swp3 down
+}
+
+mirror_topo_create()
+{
+ mirror_topo_h1_create
+ mirror_topo_h2_create
+ mirror_topo_h3_create
+
+ mirror_topo_switch_create
+}
+
+mirror_topo_destroy()
+{
+ mirror_topo_switch_destroy
+
+ mirror_topo_h3_destroy
+ mirror_topo_h2_destroy
+ mirror_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
new file mode 100755
index 000000000..9ab2ce77b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing mirroring. See mirror_topo_lib.sh
+# for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a vlan device.
+
+ALL_TESTS="
+ test_vlan
+ test_tagged_vlan
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_topo_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ mirror_topo_create
+
+ vlan_create $swp3 555
+
+ vlan_create $h3 555 v$h3
+ matchall_sink_create $h3.555
+
+ vlan_create $h1 111 v$h1 192.0.2.17/28
+ bridge vlan add dev $swp1 vid 111
+
+ vlan_create $h2 111 v$h2 192.0.2.18/28
+ bridge vlan add dev $swp2 vid 111
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vlan_destroy $h2 111
+ vlan_destroy $h1 111
+ vlan_destroy $h3 555
+ vlan_destroy $swp3 555
+
+ mirror_topo_destroy
+ vrf_cleanup
+}
+
+test_vlan_dir()
+{
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+ test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type"
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction mirror to vlan ($tcflags)"
+}
+
+test_vlan()
+{
+ test_vlan_dir ingress 8 0
+ test_vlan_dir egress 0 8
+}
+
+test_tagged_vlan_dir()
+{
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+
+ RET=0
+
+ mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+ do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \
+ 192.0.2.17 192.0.2.18
+ do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" \
+ 192.0.2.17 192.0.2.18
+ mirror_uninstall $swp1 $direction
+
+ log_test "$direction mirror tagged to vlan ($tcflags)"
+}
+
+test_tagged_vlan()
+{
+ test_tagged_vlan_dir ingress 8 0
+ test_tagged_vlan_dir egress 0 8
+}
+
+test_all()
+{
+ slow_path_trap_install $swp1 ingress
+ slow_path_trap_install $swp1 egress
+ trap_install $h3 ingress
+
+ tests_run
+
+ trap_uninstall $h3 ingress
+ slow_path_trap_uninstall $swp1 egress
+ slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+ echo "WARN: Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
new file mode 100755
index 000000000..a75cb51cc
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -0,0 +1,135 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ ip address add 192.0.2.1/24 dev $rp1
+ ip address add 2001:db8:1::1/64 dev $rp1
+
+ ip address add 198.51.100.1/24 dev $rp2
+ ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+ ip address del 2001:db8:2::1/64 dev $rp2
+ ip address del 198.51.100.1/24 dev $rp2
+
+ ip address del 2001:db8:1::1/64 dev $rp1
+ ip address del 192.0.2.1/24 dev $rp1
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge.sh b/tools/testing/selftests/net/forwarding/router_bridge.sh
new file mode 100755
index 000000000..ebc596a27
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+ ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf v$h1
+ ip -4 route del 192.0.2.128/28 vrf v$h1
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+ ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2
+ ip -4 route del 192.0.2.0/28 vrf v$h2
+ simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+ ip link set dev $swp2 up
+ __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+ __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+ ip link set dev $swp2 down
+
+ __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
new file mode 100755
index 000000000..fa6a88c50
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ vlan
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf v$h1
+ ip -4 route del 192.0.2.128/28 vrf v$h1
+ vlan_destroy $h1 555
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+ ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2
+ ip -4 route del 192.0.2.0/28 vrf v$h2
+ simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ bridge vlan add dev br1 vid 555 self pvid untagged
+ bridge vlan add dev $swp1 vid 555
+
+ __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+ ip link set dev $swp2 up
+ __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+ __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+ ip link set dev $swp2 down
+
+ __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+vlan()
+{
+ RET=0
+
+ bridge vlan add dev br1 vid 333 self
+ check_err $? "Can't add a non-PVID VLAN"
+ bridge vlan del dev br1 vid 333 self
+ check_err $? "Can't remove a non-PVID VLAN"
+
+ log_test "vlan"
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_broadcast.sh b/tools/testing/selftests/net/forwarding/router_broadcast.sh
new file mode 100755
index 000000000..7bd2ebb6e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_broadcast.sh
@@ -0,0 +1,233 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4"
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 198.51.200.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+}
+
+h1_destroy()
+{
+ ip route del 198.51.200.0/24 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 198.51.200.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+}
+
+h2_destroy()
+{
+ ip route del 198.51.200.0/24 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+h3_create()
+{
+ vrf_create "vrf-h3"
+ ip link set dev $h3 master vrf-h3
+
+ ip link set dev vrf-h3 up
+ ip link set dev $h3 up
+
+ ip address add 198.51.200.2/24 dev $h3
+
+ ip route add 192.0.2.0/24 vrf vrf-h3 nexthop via 198.51.200.1
+ ip route add 198.51.100.0/24 vrf vrf-h3 nexthop via 198.51.200.1
+}
+
+h3_destroy()
+{
+ ip route del 198.51.100.0/24 vrf vrf-h3
+ ip route del 192.0.2.0/24 vrf vrf-h3
+
+ ip address del 198.51.200.2/24 dev $h3
+
+ ip link set dev $h3 down
+ vrf_destroy "vrf-h3"
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+ ip link set dev $rp3 up
+
+ ip address add 192.0.2.1/24 dev $rp1
+
+ ip address add 198.51.100.1/24 dev $rp2
+ ip address add 198.51.200.1/24 dev $rp3
+}
+
+router_destroy()
+{
+ ip address del 198.51.200.1/24 dev $rp3
+ ip address del 198.51.100.1/24 dev $rp2
+
+ ip address del 192.0.2.1/24 dev $rp1
+
+ ip link set dev $rp3 down
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+bc_forwarding_disable()
+{
+ sysctl_set net.ipv4.conf.all.bc_forwarding 0
+ sysctl_set net.ipv4.conf.$rp1.bc_forwarding 0
+}
+
+bc_forwarding_enable()
+{
+ sysctl_set net.ipv4.conf.all.bc_forwarding 1
+ sysctl_set net.ipv4.conf.$rp1.bc_forwarding 1
+}
+
+bc_forwarding_restore()
+{
+ sysctl_restore net.ipv4.conf.$rp1.bc_forwarding
+ sysctl_restore net.ipv4.conf.all.bc_forwarding
+}
+
+ping_test_from()
+{
+ local oif=$1
+ local dip=$2
+ local from=$3
+ local fail=${4:-0}
+
+ RET=0
+
+ log_info "ping $dip, expected reply from $from"
+ ip vrf exec $(master_name_get $oif) \
+ $PING -I $oif $dip -c 10 -i 0.1 -w 2 -b 2>&1 | grep $from &> /dev/null
+ check_err_fail $fail $?
+}
+
+ping_ipv4()
+{
+ sysctl_set net.ipv4.icmp_echo_ignore_broadcasts 0
+
+ bc_forwarding_disable
+ log_info "bc_forwarding disabled on r1 =>"
+ ping_test_from $h1 198.51.100.255 192.0.2.1
+ log_test "h1 -> net2: reply from r1 (not forwarding)"
+ ping_test_from $h1 198.51.200.255 192.0.2.1
+ log_test "h1 -> net3: reply from r1 (not forwarding)"
+ ping_test_from $h1 192.0.2.255 192.0.2.1
+ log_test "h1 -> net1: reply from r1 (not dropping)"
+ ping_test_from $h1 255.255.255.255 192.0.2.1
+ log_test "h1 -> 255.255.255.255: reply from r1 (not forwarding)"
+
+ ping_test_from $h2 192.0.2.255 198.51.100.1
+ log_test "h2 -> net1: reply from r1 (not forwarding)"
+ ping_test_from $h2 198.51.200.255 198.51.100.1
+ log_test "h2 -> net3: reply from r1 (not forwarding)"
+ ping_test_from $h2 198.51.100.255 198.51.100.1
+ log_test "h2 -> net2: reply from r1 (not dropping)"
+ ping_test_from $h2 255.255.255.255 198.51.100.1
+ log_test "h2 -> 255.255.255.255: reply from r1 (not forwarding)"
+ bc_forwarding_restore
+
+ bc_forwarding_enable
+ log_info "bc_forwarding enabled on r1 =>"
+ ping_test_from $h1 198.51.100.255 198.51.100.2
+ log_test "h1 -> net2: reply from h2 (forwarding)"
+ ping_test_from $h1 198.51.200.255 198.51.200.2
+ log_test "h1 -> net3: reply from h3 (forwarding)"
+ ping_test_from $h1 192.0.2.255 192.0.2.1 1
+ log_test "h1 -> net1: no reply (dropping)"
+ ping_test_from $h1 255.255.255.255 192.0.2.1
+ log_test "h1 -> 255.255.255.255: reply from r1 (not forwarding)"
+
+ ping_test_from $h2 192.0.2.255 192.0.2.2
+ log_test "h2 -> net1: reply from h1 (forwarding)"
+ ping_test_from $h2 198.51.200.255 198.51.200.2
+ log_test "h2 -> net3: reply from h3 (forwarding)"
+ ping_test_from $h2 198.51.100.255 198.51.100.1 1
+ log_test "h2 -> net2: no reply (dropping)"
+ ping_test_from $h2 255.255.255.255 198.51.100.1
+ log_test "h2 -> 255.255.255.255: reply from r1 (not forwarding)"
+ bc_forwarding_restore
+
+ sysctl_restore net.ipv4.icmp_echo_ignore_broadcasts
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
new file mode 100755
index 000000000..79a209927
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -0,0 +1,342 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+ vrf_create "vrf-r1"
+ ip link set dev $rp11 master vrf-r1
+ ip link set dev $rp12 master vrf-r1
+ ip link set dev $rp13 master vrf-r1
+
+ ip link set dev vrf-r1 up
+ ip link set dev $rp11 up
+ ip link set dev $rp12 up
+ ip link set dev $rp13 up
+
+ ip address add 192.0.2.1/24 dev $rp11
+ ip address add 2001:db8:1::1/64 dev $rp11
+
+ ip address add 169.254.2.12/24 dev $rp12
+ ip address add fe80:2::12/64 dev $rp12
+
+ ip address add 169.254.3.13/24 dev $rp13
+ ip address add fe80:3::13/64 dev $rp13
+
+ ip route add 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 \
+ nexthop via 169.254.3.23 dev $rp13
+ ip route add 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-r1
+ ip route del 198.51.100.0/24 vrf vrf-r1
+
+ ip address del fe80:3::13/64 dev $rp13
+ ip address del 169.254.3.13/24 dev $rp13
+
+ ip address del fe80:2::12/64 dev $rp12
+ ip address del 169.254.2.12/24 dev $rp12
+
+ ip address del 2001:db8:1::1/64 dev $rp11
+ ip address del 192.0.2.1/24 dev $rp11
+
+ ip link set dev $rp13 down
+ ip link set dev $rp12 down
+ ip link set dev $rp11 down
+
+ vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+ vrf_create "vrf-r2"
+ ip link set dev $rp21 master vrf-r2
+ ip link set dev $rp22 master vrf-r2
+ ip link set dev $rp23 master vrf-r2
+
+ ip link set dev vrf-r2 up
+ ip link set dev $rp21 up
+ ip link set dev $rp22 up
+ ip link set dev $rp23 up
+
+ ip address add 198.51.100.1/24 dev $rp21
+ ip address add 2001:db8:2::1/64 dev $rp21
+
+ ip address add 169.254.2.22/24 dev $rp22
+ ip address add fe80:2::22/64 dev $rp22
+
+ ip address add 169.254.3.23/24 dev $rp23
+ ip address add fe80:3::23/64 dev $rp23
+
+ ip route add 192.0.2.0/24 vrf vrf-r2 \
+ nexthop via 169.254.2.12 dev $rp22 \
+ nexthop via 169.254.3.13 dev $rp23
+ ip route add 2001:db8:1::/64 vrf vrf-r2 \
+ nexthop via fe80:2::12 dev $rp22 \
+ nexthop via fe80:3::13 dev $rp23
+}
+
+router2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-r2
+ ip route del 192.0.2.0/24 vrf vrf-r2
+
+ ip address del fe80:3::23/64 dev $rp23
+ ip address del 169.254.3.23/24 dev $rp23
+
+ ip address del fe80:2::22/64 dev $rp22
+ ip address del 169.254.2.22/24 dev $rp22
+
+ ip address del 2001:db8:2::1/64 dev $rp21
+ ip address del 198.51.100.1/24 dev $rp21
+
+ ip link set dev $rp23 down
+ ip link set dev $rp22 down
+ ip link set dev $rp21 down
+
+ vrf_destroy "vrf-r2"
+}
+
+multipath4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
+ nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ # Restore settings.
+ ip route replace 198.51.100.0/24 vrf vrf-r1 \
+ nexthop via 169.254.2.22 dev $rp12 \
+ nexthop via 169.254.3.23 dev $rp13
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the configured weights.
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+ nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
+ nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+ done
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 \
+ nexthop via fe80:2::22 dev $rp12 \
+ nexthop via fe80:3::23 dev $rp13
+}
+
+multipath_test()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ log_info "Running IPv6 multipath tests"
+ multipath6_test "ECMP" 1 1
+ multipath6_test "Weighted MP 2:1" 2 1
+ multipath6_test "Weighted MP 11:45" 11 45
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ multipath6_l4_test "ECMP" 1 1
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp11=${NETIFS[p2]}
+
+ rp12=${NETIFS[p3]}
+ rp22=${NETIFS[p4]}
+
+ rp13=${NETIFS[p5]}
+ rp23=${NETIFS[p6]}
+
+ rp21=${NETIFS[p7]}
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
new file mode 100755
index 000000000..813d02d19
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -0,0 +1,213 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
+ mirred_egress_mirror_test gact_trap_test"
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 clsact
+
+ simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+ simple_if_fini $swp2 192.0.2.1/24
+
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+mirred_egress_test()
+{
+ local action=$1
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched without redirect rule inserted"
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action mirred egress $action \
+ dev $swp2
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match incoming $action packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "mirred egress $action ($tcflags)"
+}
+
+gact_drop_and_ok_test()
+{
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 102 1
+ check_err $? "Packet was not dropped"
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action ok
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Did not see passed packet"
+
+ tc_check_packets "dev $swp1 ingress" 102 2
+ check_fail $? "Packet was dropped and it should not reach here"
+
+ tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "gact drop and ok ($tcflags)"
+}
+
+gact_trap_test()
+{
+ RET=0
+
+ if [[ "$tcflags" != "skip_sw" ]]; then
+ return 0;
+ fi
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_hw dst_ip 192.0.2.2 action drop
+ tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.2 action mirred egress redirect \
+ dev $swp2
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_fail $? "Saw packet without trap rule inserted"
+
+ tc filter add dev $swp1 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action trap
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 102 1
+ check_err $? "Packet was not trapped"
+
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Did not see trapped packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $swp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "trap ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ swp1origmac=$(mac_get $swp1)
+ swp2origmac=$(mac_get $swp2)
+ ip link set $swp1 address $h2mac
+ ip link set $swp2 address $h1mac
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ ip link set $swp2 address $swp2origmac
+ ip link set $swp1 address $swp1origmac
+}
+
+mirred_egress_redirect_test()
+{
+ mirred_egress_test "redirect"
+}
+
+mirred_egress_mirror_test()
+{
+ mirred_egress_test "mirror"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
new file mode 100755
index 000000000..2934fb5ed
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -0,0 +1,205 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="unreachable_chain_test gact_goto_chain_test create_destroy_chain \
+ template_filter_fits"
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+unreachable_chain_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower $tcflags dst_mac $h2mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 1101 1
+ check_fail $? "matched on filter in unreachable chain"
+
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower
+
+ log_test "unreachable chain ($tcflags)"
+}
+
+gact_goto_chain_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower $tcflags dst_mac $h2mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_mac $h2mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_mac $h2mac action goto chain 1
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct filter with goto chain action"
+
+ tc_check_packets "dev $h2 ingress" 1101 1
+ check_err $? "Did not match on correct filter in chain 1"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower
+
+ log_test "gact goto chain ($tcflags)"
+}
+
+create_destroy_chain()
+{
+ RET=0
+
+ tc chain add dev $h2 ingress
+ check_err $? "Failed to create default chain"
+
+ output="$(tc -j chain get dev $h2 ingress)"
+ check_err $? "Failed to get default chain"
+
+ echo $output | jq -e ".[] | select(.chain == 0)" &> /dev/null
+ check_err $? "Unexpected output for default chain"
+
+ tc chain add dev $h2 ingress chain 1
+ check_err $? "Failed to create chain 1"
+
+ output="$(tc -j chain get dev $h2 ingress chain 1)"
+ check_err $? "Failed to get chain 1"
+
+ echo $output | jq -e ".[] | select(.chain == 1)" &> /dev/null
+ check_err $? "Unexpected output for chain 1"
+
+ output="$(tc -j chain show dev $h2 ingress)"
+ check_err $? "Failed to dump chains"
+
+ echo $output | jq -e ".[] | select(.chain == 0)" &> /dev/null
+ check_err $? "Can't find default chain in dump"
+
+ echo $output | jq -e ".[] | select(.chain == 1)" &> /dev/null
+ check_err $? "Can't find chain 1 in dump"
+
+ tc chain del dev $h2 ingress
+ check_err $? "Failed to destroy default chain"
+
+ tc chain del dev $h2 ingress chain 1
+ check_err $? "Failed to destroy chain 1"
+
+ log_test "create destroy chain"
+}
+
+template_filter_fits()
+{
+ RET=0
+
+ tc chain add dev $h2 ingress protocol ip \
+ flower dst_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+ tc chain add dev $h2 ingress chain 1 protocol ip \
+ flower src_mac 00:00:00:00:00:00/FF:FF:FF:FF:FF:FF &> /dev/null
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 1101 \
+ flower dst_mac $h2mac action drop
+ check_err $? "Failed to insert filter which fits template"
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 1102 \
+ flower src_mac $h2mac action drop &> /dev/null
+ check_fail $? "Incorrectly succeded to insert filter which does not template"
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower src_mac $h2mac action drop
+ check_err $? "Failed to insert filter which fits template"
+
+ tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
+ flower dst_mac $h2mac action drop &> /dev/null
+ check_fail $? "Incorrectly succeded to insert filter which does not template"
+
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
+ flower &> /dev/null
+ tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
+ flower &> /dev/null
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 1102 \
+ flower &> /dev/null
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 1101 \
+ flower &> /dev/null
+
+ tc chain del dev $h2 ingress chain 1
+ tc chain del dev $h2 ingress
+
+ log_test "template filter fits"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+check_tc_chain_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
new file mode 100644
index 000000000..9d3b64a2a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+CHECK_TC="yes"
+
+tc_check_packets()
+{
+ local id=$1
+ local handle=$2
+ local count=$3
+ local ret
+
+ output="$(tc -j -s filter show $id)"
+ # workaround the jq bug which causes jq to return 0 in case input is ""
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ return $ret
+ fi
+ echo $output | \
+ jq -e ".[] \
+ | select(.options.handle == $handle) \
+ | select(.options.actions[0].stats.packets == $count)" \
+ &> /dev/null
+ return $?
+}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
new file mode 100755
index 000000000..20d1077e5
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -0,0 +1,260 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
+ match_src_ip_test match_ip_flags_test"
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 198.51.100.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 198.51.100.2/24
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/24 198.51.100.2/24
+}
+
+match_dst_mac_test()
+{
+ local dummy_mac=de:ad:be:ef:aa:aa
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_mac $dummy_mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_mac $h2mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "dst_mac match ($tcflags)"
+}
+
+match_src_mac_test()
+{
+ local dummy_mac=de:ad:be:ef:aa:aa
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags src_mac $dummy_mac action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags src_mac $h1mac action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ log_test "src_mac match ($tcflags)"
+}
+
+match_dst_ip_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 198.51.100.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags dst_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct filter with mask"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "dst_ip match ($tcflags)"
+}
+
+match_src_ip_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags src_ip 198.51.100.1 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags src_ip 192.0.2.1 action drop
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags src_ip 192.0.2.0/24 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct filter with mask"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+
+ log_test "src_ip match ($tcflags)"
+}
+
+match_ip_flags_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags ip_flags frag action continue
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags ip_flags firstfrag action continue
+ tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+ $tcflags ip_flags nofirstfrag action continue
+ tc filter add dev $h2 ingress protocol ip pref 4 handle 104 flower \
+ $tcflags ip_flags nofrag action drop
+
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=0" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on wrong frag filter (nofrag)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_fail $? "Matched on wrong firstfrag filter (nofrag)"
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on nofirstfrag filter (nofrag) "
+
+ tc_check_packets "dev $h2 ingress" 104 1
+ check_err $? "Did not match on nofrag filter (nofrag)"
+
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=0,mf" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on frag filter (1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match fistfrag filter (1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Matched on wrong nofirstfrag filter (1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 104 1
+ check_err $? "Match on wrong nofrag filter (1stfrag)"
+
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=256,mf" -q
+ $MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "frag=256" -q
+
+ tc_check_packets "dev $h2 ingress" 101 3
+ check_err $? "Did not match on frag filter (no1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Matched on wrong firstfrag filter (no1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 103 3
+ check_err $? "Did not match on nofirstfrag filter (no1stfrag)"
+
+ tc_check_packets "dev $h2 ingress" 104 1
+ check_err $? "Matched on nofrag filter (no1stfrag)"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+ tc filter del dev $h2 ingress protocol ip pref 4 handle 104 flower
+
+ log_test "ip_flags match ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
new file mode 100755
index 000000000..9826a446e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="shared_block_test"
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.1/24
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.1/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 ingress_block 22 egress_block 23 clsact
+
+ simple_if_init $swp2 192.0.2.2/24
+ tc qdisc add dev $swp2 ingress_block 22 egress_block 23 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ simple_if_fini $swp2 192.0.2.2/24
+
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+shared_block_test()
+{
+ RET=0
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "block 22" 101 1
+ check_err $? "Did not match first incoming packet on a block"
+
+ $MZ $h2 -c 1 -p 64 -a $h2mac -b $swmac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "block 22" 101 2
+ check_err $? "Did not match second incoming packet on a block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ log_test "shared block ($tcflags)"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ swmac=$(mac_get $swp1)
+ swp2origmac=$(mac_get $swp2)
+ ip link set $swp2 address $swmac
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ ip link set $swp2 address $swp2origmac
+}
+
+check_tc_shblock_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/in_netns.sh b/tools/testing/selftests/net/in_netns.sh
new file mode 100755
index 000000000..88795b510
--- /dev/null
+++ b/tools/testing/selftests/net/in_netns.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Execute a subprocess in a network namespace
+
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+ ip netns add "${NETNS}"
+ ip -netns "${NETNS}" link set lo up
+}
+
+cleanup() {
+ ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+ip netns exec "${NETNS}" "$@"
+exit "$?"
diff --git a/tools/testing/selftests/net/ip6_gre_headroom.sh b/tools/testing/selftests/net/ip6_gre_headroom.sh
new file mode 100755
index 000000000..5b41e8bb6
--- /dev/null
+++ b/tools/testing/selftests/net/ip6_gre_headroom.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that enough headroom is reserved for the first packet passing through an
+# IPv6 GRE-like netdevice.
+
+setup_prepare()
+{
+ ip link add h1 type veth peer name swp1
+ ip link add h3 type veth peer name swp3
+
+ ip link set dev h1 up
+ ip address add 192.0.2.1/28 dev h1
+
+ ip link add dev vh3 type vrf table 20
+ ip link set dev h3 master vh3
+ ip link set dev vh3 up
+ ip link set dev h3 up
+
+ ip link set dev swp3 up
+ ip address add dev swp3 2001:db8:2::1/64
+ ip address add dev swp3 2001:db8:2::3/64
+
+ ip link set dev swp1 up
+ tc qdisc add dev swp1 clsact
+
+ ip link add name er6 type ip6erspan \
+ local 2001:db8:2::1 remote 2001:db8:2::2 oseq okey 123
+ ip link set dev er6 up
+
+ ip link add name gt6 type ip6gretap \
+ local 2001:db8:2::3 remote 2001:db8:2::4
+ ip link set dev gt6 up
+
+ sleep 1
+}
+
+cleanup()
+{
+ ip link del dev gt6
+ ip link del dev er6
+ ip link del dev swp1
+ ip link del dev swp3
+ ip link del dev vh3
+}
+
+test_headroom()
+{
+ local type=$1; shift
+ local tundev=$1; shift
+
+ tc filter add dev swp1 ingress pref 1000 matchall skip_hw \
+ action mirred egress mirror dev $tundev
+ ping -I h1 192.0.2.2 -c 1 -w 2 &> /dev/null
+ tc filter del dev swp1 ingress pref 1000
+
+ # If it doesn't panic, it passes.
+ printf "TEST: %-60s [PASS]\n" "$type headroom"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+test_headroom ip6gretap gt6
+test_headroom ip6erspan er6
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
new file mode 100644
index 000000000..c53959193
--- /dev/null
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -0,0 +1,810 @@
+/* Evaluate MSG_ZEROCOPY
+ *
+ * Send traffic between two processes over one of the supported
+ * protocols and modes:
+ *
+ * PF_INET/PF_INET6
+ * - SOCK_STREAM
+ * - SOCK_DGRAM
+ * - SOCK_DGRAM with UDP_CORK
+ * - SOCK_RAW
+ * - SOCK_RAW with IP_HDRINCL
+ *
+ * PF_PACKET
+ * - SOCK_DGRAM
+ * - SOCK_RAW
+ *
+ * PF_RDS
+ * - SOCK_SEQPACKET
+ *
+ * Start this program on two connected hosts, one in send mode and
+ * the other with option '-r' to put it in receiver mode.
+ *
+ * If zerocopy mode ('-z') is enabled, the sender will verify that
+ * the kernel queues completions on the error queue for all zerocopy
+ * transfers.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <linux/rds.h>
+
+#ifndef SO_EE_ORIGIN_ZEROCOPY
+#define SO_EE_ORIGIN_ZEROCOPY 5
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY 60
+#endif
+
+#ifndef SO_EE_CODE_ZEROCOPY_COPIED
+#define SO_EE_CODE_ZEROCOPY_COPIED 1
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY 0x4000000
+#endif
+
+static int cfg_cork;
+static bool cfg_cork_mixed;
+static int cfg_cpu = -1; /* default: pin to last cpu */
+static int cfg_family = PF_UNSPEC;
+static int cfg_ifindex = 1;
+static int cfg_payload_len;
+static int cfg_port = 8000;
+static bool cfg_rx;
+static int cfg_runtime_ms = 4200;
+static int cfg_verbose;
+static int cfg_waittime_ms = 500;
+static bool cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+static struct sockaddr_storage cfg_src_addr;
+
+static char payload[IP_MAXPACKET];
+static long packets, bytes, completions, expected_completions;
+static int zerocopied = -1;
+static uint32_t next_completion;
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static uint16_t get_ip_csum(const uint16_t *start, int num_words)
+{
+ unsigned long sum = 0;
+ int i;
+
+ for (i = 0; i < num_words; i++)
+ sum += start[i];
+
+ while (sum >> 16)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+
+ return ~sum;
+}
+
+static int do_setcpu(int cpu)
+{
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask))
+ fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
+ else if (cfg_verbose)
+ fprintf(stderr, "cpu: %u\n", cpu);
+
+ return 0;
+}
+
+static void do_setsockopt(int fd, int level, int optname, int val)
+{
+ if (setsockopt(fd, level, optname, &val, sizeof(val)))
+ error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
+}
+
+static int do_poll(int fd, int events)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.events = events;
+ pfd.revents = 0;
+ pfd.fd = fd;
+
+ ret = poll(&pfd, 1, cfg_waittime_ms);
+ if (ret == -1)
+ error(1, errno, "poll");
+
+ return ret && (pfd.revents & events);
+}
+
+static int do_accept(int fd)
+{
+ int fda = fd;
+
+ fd = accept(fda, NULL, NULL);
+ if (fd == -1)
+ error(1, errno, "accept");
+ if (close(fda))
+ error(1, errno, "close listen sock");
+
+ return fd;
+}
+
+static void add_zcopy_cookie(struct msghdr *msg, uint32_t cookie)
+{
+ struct cmsghdr *cm;
+
+ if (!msg->msg_control)
+ error(1, errno, "NULL cookie");
+ cm = (void *)msg->msg_control;
+ cm->cmsg_len = CMSG_LEN(sizeof(cookie));
+ cm->cmsg_level = SOL_RDS;
+ cm->cmsg_type = RDS_CMSG_ZCOPY_COOKIE;
+ memcpy(CMSG_DATA(cm), &cookie, sizeof(cookie));
+}
+
+static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
+{
+ int ret, len, i, flags;
+ static uint32_t cookie;
+ char ckbuf[CMSG_SPACE(sizeof(cookie))];
+
+ len = 0;
+ for (i = 0; i < msg->msg_iovlen; i++)
+ len += msg->msg_iov[i].iov_len;
+
+ flags = MSG_DONTWAIT;
+ if (do_zerocopy) {
+ flags |= MSG_ZEROCOPY;
+ if (domain == PF_RDS) {
+ memset(&msg->msg_control, 0, sizeof(msg->msg_control));
+ msg->msg_controllen = CMSG_SPACE(sizeof(cookie));
+ msg->msg_control = (struct cmsghdr *)ckbuf;
+ add_zcopy_cookie(msg, ++cookie);
+ }
+ }
+
+ ret = sendmsg(fd, msg, flags);
+ if (ret == -1 && errno == EAGAIN)
+ return false;
+ if (ret == -1)
+ error(1, errno, "send");
+ if (cfg_verbose && ret != len)
+ fprintf(stderr, "send: ret=%u != %u\n", ret, len);
+
+ if (len) {
+ packets++;
+ bytes += ret;
+ if (do_zerocopy && ret)
+ expected_completions++;
+ }
+ if (do_zerocopy && domain == PF_RDS) {
+ msg->msg_control = NULL;
+ msg->msg_controllen = 0;
+ }
+
+ return true;
+}
+
+static void do_sendmsg_corked(int fd, struct msghdr *msg)
+{
+ bool do_zerocopy = cfg_zerocopy;
+ int i, payload_len, extra_len;
+
+ /* split up the packet. for non-multiple, make first buffer longer */
+ payload_len = cfg_payload_len / cfg_cork;
+ extra_len = cfg_payload_len - (cfg_cork * payload_len);
+
+ do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
+
+ for (i = 0; i < cfg_cork; i++) {
+
+ /* in mixed-frags mode, alternate zerocopy and copy frags
+ * start with non-zerocopy, to ensure attach later works
+ */
+ if (cfg_cork_mixed)
+ do_zerocopy = (i & 1);
+
+ msg->msg_iov[0].iov_len = payload_len + extra_len;
+ extra_len = 0;
+
+ do_sendmsg(fd, msg, do_zerocopy,
+ (cfg_dst_addr.ss_family == AF_INET ?
+ PF_INET : PF_INET6));
+ }
+
+ do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
+}
+
+static int setup_iph(struct iphdr *iph, uint16_t payload_len)
+{
+ struct sockaddr_in *daddr = (void *) &cfg_dst_addr;
+ struct sockaddr_in *saddr = (void *) &cfg_src_addr;
+
+ memset(iph, 0, sizeof(*iph));
+
+ iph->version = 4;
+ iph->tos = 0;
+ iph->ihl = 5;
+ iph->ttl = 2;
+ iph->saddr = saddr->sin_addr.s_addr;
+ iph->daddr = daddr->sin_addr.s_addr;
+ iph->protocol = IPPROTO_EGP;
+ iph->tot_len = htons(sizeof(*iph) + payload_len);
+ iph->check = get_ip_csum((void *) iph, iph->ihl << 1);
+
+ return sizeof(*iph);
+}
+
+static int setup_ip6h(struct ipv6hdr *ip6h, uint16_t payload_len)
+{
+ struct sockaddr_in6 *daddr = (void *) &cfg_dst_addr;
+ struct sockaddr_in6 *saddr = (void *) &cfg_src_addr;
+
+ memset(ip6h, 0, sizeof(*ip6h));
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(payload_len);
+ ip6h->nexthdr = IPPROTO_EGP;
+ ip6h->hop_limit = 2;
+ ip6h->saddr = saddr->sin6_addr;
+ ip6h->daddr = daddr->sin6_addr;
+
+ return sizeof(*ip6h);
+}
+
+
+static void setup_sockaddr(int domain, const char *str_addr,
+ struct sockaddr_storage *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+}
+
+static int do_setup_tx(int domain, int type, int protocol)
+{
+ int fd;
+
+ fd = socket(domain, type, protocol);
+ if (fd == -1)
+ error(1, errno, "socket t");
+
+ do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
+ if (cfg_zerocopy)
+ do_setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, 1);
+
+ if (domain != PF_PACKET && domain != PF_RDS)
+ if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
+ error(1, errno, "connect");
+
+ if (domain == PF_RDS) {
+ if (bind(fd, (void *) &cfg_src_addr, cfg_alen))
+ error(1, errno, "bind");
+ }
+
+ return fd;
+}
+
+static uint32_t do_process_zerocopy_cookies(struct rds_zcopy_cookies *ck)
+{
+ int i;
+
+ if (ck->num > RDS_MAX_ZCOOKIES)
+ error(1, 0, "Returned %d cookies, max expected %d\n",
+ ck->num, RDS_MAX_ZCOOKIES);
+ for (i = 0; i < ck->num; i++)
+ if (cfg_verbose >= 2)
+ fprintf(stderr, "%d\n", ck->cookies[i]);
+ return ck->num;
+}
+
+static bool do_recvmsg_completion(int fd)
+{
+ char cmsgbuf[CMSG_SPACE(sizeof(struct rds_zcopy_cookies))];
+ struct rds_zcopy_cookies *ck;
+ struct cmsghdr *cmsg;
+ struct msghdr msg;
+ bool ret = false;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_control = cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+
+ if (recvmsg(fd, &msg, MSG_DONTWAIT))
+ return ret;
+
+ if (msg.msg_flags & MSG_CTRUNC)
+ error(1, errno, "recvmsg notification: truncated");
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_RDS &&
+ cmsg->cmsg_type == RDS_CMSG_ZCOPY_COMPLETION) {
+
+ ck = (struct rds_zcopy_cookies *)CMSG_DATA(cmsg);
+ completions += do_process_zerocopy_cookies(ck);
+ ret = true;
+ break;
+ }
+ error(0, 0, "ignoring cmsg at level %d type %d\n",
+ cmsg->cmsg_level, cmsg->cmsg_type);
+ }
+ return ret;
+}
+
+static bool do_recv_completion(int fd, int domain)
+{
+ struct sock_extended_err *serr;
+ struct msghdr msg = {};
+ struct cmsghdr *cm;
+ uint32_t hi, lo, range;
+ int ret, zerocopy;
+ char control[100];
+
+ if (domain == PF_RDS)
+ return do_recvmsg_completion(fd);
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret == -1 && errno == EAGAIN)
+ return false;
+ if (ret == -1)
+ error(1, errno, "recvmsg notification");
+ if (msg.msg_flags & MSG_CTRUNC)
+ error(1, errno, "recvmsg notification: truncated");
+
+ cm = CMSG_FIRSTHDR(&msg);
+ if (!cm)
+ error(1, 0, "cmsg: no cmsg");
+ if (!((cm->cmsg_level == SOL_IP && cm->cmsg_type == IP_RECVERR) ||
+ (cm->cmsg_level == SOL_IPV6 && cm->cmsg_type == IPV6_RECVERR) ||
+ (cm->cmsg_level == SOL_PACKET && cm->cmsg_type == PACKET_TX_TIMESTAMP)))
+ error(1, 0, "serr: wrong type: %d.%d",
+ cm->cmsg_level, cm->cmsg_type);
+
+ serr = (void *) CMSG_DATA(cm);
+
+ if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
+ error(1, 0, "serr: wrong origin: %u", serr->ee_origin);
+ if (serr->ee_errno != 0)
+ error(1, 0, "serr: wrong error code: %u", serr->ee_errno);
+
+ hi = serr->ee_data;
+ lo = serr->ee_info;
+ range = hi - lo + 1;
+
+ /* Detect notification gaps. These should not happen often, if at all.
+ * Gaps can occur due to drops, reordering and retransmissions.
+ */
+ if (lo != next_completion)
+ fprintf(stderr, "gap: %u..%u does not append to %u\n",
+ lo, hi, next_completion);
+ next_completion = hi + 1;
+
+ zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
+ if (zerocopied == -1)
+ zerocopied = zerocopy;
+ else if (zerocopied != zerocopy) {
+ fprintf(stderr, "serr: inconsistent\n");
+ zerocopied = zerocopy;
+ }
+
+ if (cfg_verbose >= 2)
+ fprintf(stderr, "completed: %u (h=%u l=%u)\n",
+ range, hi, lo);
+
+ completions += range;
+ return true;
+}
+
+/* Read all outstanding messages on the errqueue */
+static void do_recv_completions(int fd, int domain)
+{
+ while (do_recv_completion(fd, domain)) {}
+}
+
+/* Wait for all remaining completions on the errqueue */
+static void do_recv_remaining_completions(int fd, int domain)
+{
+ int64_t tstop = gettimeofday_ms() + cfg_waittime_ms;
+
+ while (completions < expected_completions &&
+ gettimeofday_ms() < tstop) {
+ if (do_poll(fd, domain == PF_RDS ? POLLIN : POLLERR))
+ do_recv_completions(fd, domain);
+ }
+
+ if (completions < expected_completions)
+ fprintf(stderr, "missing notifications: %lu < %lu\n",
+ completions, expected_completions);
+}
+
+static void do_tx(int domain, int type, int protocol)
+{
+ struct iovec iov[3] = { {0} };
+ struct sockaddr_ll laddr;
+ struct msghdr msg = {0};
+ struct ethhdr eth;
+ union {
+ struct ipv6hdr ip6h;
+ struct iphdr iph;
+ } nh;
+ uint64_t tstop;
+ int fd;
+
+ fd = do_setup_tx(domain, type, protocol);
+
+ if (domain == PF_PACKET) {
+ uint16_t proto = cfg_family == PF_INET ? ETH_P_IP : ETH_P_IPV6;
+
+ /* sock_raw passes ll header as data */
+ if (type == SOCK_RAW) {
+ memset(eth.h_dest, 0x06, ETH_ALEN);
+ memset(eth.h_source, 0x02, ETH_ALEN);
+ eth.h_proto = htons(proto);
+ iov[0].iov_base = &eth;
+ iov[0].iov_len = sizeof(eth);
+ msg.msg_iovlen++;
+ }
+
+ /* both sock_raw and sock_dgram expect name */
+ memset(&laddr, 0, sizeof(laddr));
+ laddr.sll_family = AF_PACKET;
+ laddr.sll_ifindex = cfg_ifindex;
+ laddr.sll_protocol = htons(proto);
+ laddr.sll_halen = ETH_ALEN;
+
+ memset(laddr.sll_addr, 0x06, ETH_ALEN);
+
+ msg.msg_name = &laddr;
+ msg.msg_namelen = sizeof(laddr);
+ }
+
+ /* packet and raw sockets with hdrincl must pass network header */
+ if (domain == PF_PACKET || protocol == IPPROTO_RAW) {
+ if (cfg_family == PF_INET)
+ iov[1].iov_len = setup_iph(&nh.iph, cfg_payload_len);
+ else
+ iov[1].iov_len = setup_ip6h(&nh.ip6h, cfg_payload_len);
+
+ iov[1].iov_base = (void *) &nh;
+ msg.msg_iovlen++;
+ }
+
+ if (domain == PF_RDS) {
+ msg.msg_name = &cfg_dst_addr;
+ msg.msg_namelen = (cfg_dst_addr.ss_family == AF_INET ?
+ sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6));
+ }
+
+ iov[2].iov_base = payload;
+ iov[2].iov_len = cfg_payload_len;
+ msg.msg_iovlen++;
+ msg.msg_iov = &iov[3 - msg.msg_iovlen];
+
+ tstop = gettimeofday_ms() + cfg_runtime_ms;
+ do {
+ if (cfg_cork)
+ do_sendmsg_corked(fd, &msg);
+ else
+ do_sendmsg(fd, &msg, cfg_zerocopy, domain);
+
+ while (!do_poll(fd, POLLOUT)) {
+ if (cfg_zerocopy)
+ do_recv_completions(fd, domain);
+ }
+
+ } while (gettimeofday_ms() < tstop);
+
+ if (cfg_zerocopy)
+ do_recv_remaining_completions(fd, domain);
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
+ packets, bytes >> 20, completions,
+ zerocopied == 1 ? 'y' : 'n');
+}
+
+static int do_setup_rx(int domain, int type, int protocol)
+{
+ int fd;
+
+ /* If tx over PF_PACKET, rx over PF_INET(6)/SOCK_RAW,
+ * to recv the only copy of the packet, not a clone
+ */
+ if (domain == PF_PACKET)
+ error(1, 0, "Use PF_INET/SOCK_RAW to read");
+
+ if (type == SOCK_RAW && protocol == IPPROTO_RAW)
+ error(1, 0, "IPPROTO_RAW: not supported on Rx");
+
+ fd = socket(domain, type, protocol);
+ if (fd == -1)
+ error(1, errno, "socket r");
+
+ do_setsockopt(fd, SOL_SOCKET, SO_RCVBUF, 1 << 21);
+ do_setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT, 1 << 16);
+ do_setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, 1);
+
+ if (bind(fd, (void *) &cfg_dst_addr, cfg_alen))
+ error(1, errno, "bind");
+
+ if (type == SOCK_STREAM) {
+ if (listen(fd, 1))
+ error(1, errno, "listen");
+ fd = do_accept(fd);
+ }
+
+ return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+ int ret;
+
+ /* MSG_TRUNC flushes up to len bytes */
+ ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+ if (ret == -1)
+ error(1, errno, "flush");
+ if (!ret)
+ return;
+
+ packets++;
+ bytes += ret;
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_datagram(int fd, int type)
+{
+ int ret, off = 0;
+ char buf[64];
+
+ /* MSG_TRUNC will return full datagram length */
+ ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT | MSG_TRUNC);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+
+ /* raw ipv4 return with header, raw ipv6 without */
+ if (cfg_family == PF_INET && type == SOCK_RAW) {
+ off += sizeof(struct iphdr);
+ ret -= sizeof(struct iphdr);
+ }
+
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (ret != cfg_payload_len)
+ error(1, 0, "recv: ret=%u != %u", ret, cfg_payload_len);
+ if (ret > sizeof(buf) - off)
+ ret = sizeof(buf) - off;
+ if (memcmp(buf + off, payload, ret))
+ error(1, 0, "recv: data mismatch");
+
+ packets++;
+ bytes += cfg_payload_len;
+}
+
+static void do_rx(int domain, int type, int protocol)
+{
+ uint64_t tstop;
+ int fd;
+
+ fd = do_setup_rx(domain, type, protocol);
+
+ tstop = gettimeofday_ms() + cfg_runtime_ms;
+ do {
+ if (type == SOCK_STREAM)
+ do_flush_tcp(fd);
+ else
+ do_flush_datagram(fd, type);
+
+ do_poll(fd, POLLIN);
+
+ } while (gettimeofday_ms() < tstop);
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ fprintf(stderr, "rx=%lu (%lu MB)\n", packets, bytes >> 20);
+}
+
+static void do_test(int domain, int type, int protocol)
+{
+ int i;
+
+ if (cfg_cork && (domain == PF_PACKET || type != SOCK_DGRAM))
+ error(1, 0, "can only cork udp sockets");
+
+ do_setcpu(cfg_cpu);
+
+ for (i = 0; i < IP_MAXPACKET; i++)
+ payload[i] = 'a' + (i % 26);
+
+ if (cfg_rx)
+ do_rx(domain, type, protocol);
+ else
+ do_tx(domain, type, protocol);
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [options] <test>", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ const int max_payload_len = sizeof(payload) -
+ sizeof(struct ipv6hdr) -
+ sizeof(struct tcphdr) -
+ 40 /* max tcp options */;
+ int c;
+ char *daddr = NULL, *saddr = NULL;
+ char *cfg_test;
+
+ cfg_payload_len = max_payload_len;
+
+ while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
+ switch (c) {
+ case '4':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'c':
+ cfg_cork = strtol(optarg, NULL, 0);
+ break;
+ case 'C':
+ cfg_cpu = strtol(optarg, NULL, 0);
+ break;
+ case 'D':
+ daddr = optarg;
+ break;
+ case 'i':
+ cfg_ifindex = if_nametoindex(optarg);
+ if (cfg_ifindex == 0)
+ error(1, errno, "invalid iface: %s", optarg);
+ break;
+ case 'm':
+ cfg_cork_mixed = true;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 'r':
+ cfg_rx = true;
+ break;
+ case 's':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'S':
+ saddr = optarg;
+ break;
+ case 't':
+ cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
+ break;
+ case 'v':
+ cfg_verbose++;
+ break;
+ case 'z':
+ cfg_zerocopy = true;
+ break;
+ }
+ }
+
+ cfg_test = argv[argc - 1];
+ if (strcmp(cfg_test, "rds") == 0) {
+ if (!daddr)
+ error(1, 0, "-D <server addr> required for PF_RDS\n");
+ if (!cfg_rx && !saddr)
+ error(1, 0, "-S <client addr> required for PF_RDS\n");
+ }
+ setup_sockaddr(cfg_family, daddr, &cfg_dst_addr);
+ setup_sockaddr(cfg_family, saddr, &cfg_src_addr);
+
+ if (cfg_payload_len > max_payload_len)
+ error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
+ if (cfg_cork_mixed && (!cfg_zerocopy || !cfg_cork))
+ error(1, 0, "-m: cork_mixed requires corking and zerocopy");
+
+ if (optind != argc - 1)
+ usage(argv[0]);
+}
+
+int main(int argc, char **argv)
+{
+ const char *cfg_test;
+
+ parse_opts(argc, argv);
+
+ cfg_test = argv[argc - 1];
+
+ if (!strcmp(cfg_test, "packet"))
+ do_test(PF_PACKET, SOCK_RAW, 0);
+ else if (!strcmp(cfg_test, "packet_dgram"))
+ do_test(PF_PACKET, SOCK_DGRAM, 0);
+ else if (!strcmp(cfg_test, "raw"))
+ do_test(cfg_family, SOCK_RAW, IPPROTO_EGP);
+ else if (!strcmp(cfg_test, "raw_hdrincl"))
+ do_test(cfg_family, SOCK_RAW, IPPROTO_RAW);
+ else if (!strcmp(cfg_test, "tcp"))
+ do_test(cfg_family, SOCK_STREAM, 0);
+ else if (!strcmp(cfg_test, "udp"))
+ do_test(cfg_family, SOCK_DGRAM, 0);
+ else if (!strcmp(cfg_test, "rds"))
+ do_test(PF_RDS, SOCK_SEQPACKET, 0);
+ else
+ error(1, 0, "unknown cfg_test %s", cfg_test);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
new file mode 100755
index 000000000..c43c6debd
--- /dev/null
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+#
+# Send data between two processes across namespaces
+# Run twice: once without and once with zerocopy
+
+set -e
+
+readonly DEV="veth0"
+readonly DEV_MTU=65535
+readonly BIN="./msg_zerocopy"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+readonly path_sysctl_mem="net.core.optmem_max"
+
+# No arguments: automated test
+if [[ "$#" -eq "0" ]]; then
+ $0 4 tcp -t 1
+ $0 6 tcp -t 1
+ echo "OK. All tests passed"
+ exit 0
+fi
+
+# Argument parsing
+if [[ "$#" -lt "2" ]]; then
+ echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
+ exit 1
+fi
+
+readonly IP="$1"
+shift
+readonly TXMODE="$1"
+shift
+readonly EXTRA_ARGS="$@"
+
+# Argument parsing: configure addresses
+if [[ "${IP}" == "4" ]]; then
+ readonly SADDR="${SADDR4}"
+ readonly DADDR="${DADDR4}"
+elif [[ "${IP}" == "6" ]]; then
+ readonly SADDR="${SADDR6}"
+ readonly DADDR="${DADDR6}"
+else
+ echo "Invalid IP version ${IP}"
+ exit 1
+fi
+
+# Argument parsing: select receive mode
+#
+# This differs from send mode for
+# - packet: use raw recv, because packet receives skb clones
+# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option
+case "${TXMODE}" in
+'packet' | 'packet_dgram' | 'raw_hdrincl')
+ RXMODE='raw'
+ ;;
+*)
+ RXMODE="${TXMODE}"
+ ;;
+esac
+
+# Start of state changes: install cleanup handler
+save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
+
+cleanup() {
+ ip netns del "${NS2}"
+ ip netns del "${NS1}"
+ sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
+}
+
+trap cleanup EXIT
+
+# Configure system settings
+sysctl -w -q "${path_sysctl_mem}=1000000"
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
+ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+
+# Optionally disable sg or csum offload to test edge cases
+# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+
+do_test() {
+ local readonly ARGS="$1"
+
+ echo "ipv${IP} ${TXMODE} ${ARGS}"
+ ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
+ sleep 0.2
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
+ wait
+}
+
+do_test "${EXTRA_ARGS}"
+do_test "-z ${EXTRA_ARGS}"
+echo ok
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
new file mode 100755
index 000000000..e3afcb424
--- /dev/null
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -0,0 +1,205 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking network interface
+# For the moment it tests only ethernet interface (but wifi could be easily added)
+#
+# We assume that all network driver are loaded
+# if not they probably have failed earlier in the boot process and their logged error will be catched by another test
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# this function will try to up the interface
+# if already up, nothing done
+# arg1: network interface name
+kci_net_start()
+{
+ netdev=$1
+
+ ip link show "$netdev" |grep -q UP
+ if [ $? -eq 0 ];then
+ echo "SKIP: $netdev: interface already up"
+ return $ksft_skip
+ fi
+
+ ip link set "$netdev" up
+ if [ $? -ne 0 ];then
+ echo "FAIL: $netdev: Fail to up interface"
+ return 1
+ else
+ echo "PASS: $netdev: set interface up"
+ NETDEV_STARTED=1
+ fi
+ return 0
+}
+
+# this function will try to setup an IP and MAC address on a network interface
+# Doing nothing if the interface was already up
+# arg1: network interface name
+kci_net_setup()
+{
+ netdev=$1
+
+ # do nothing if the interface was already up
+ if [ $NETDEV_STARTED -eq 0 ];then
+ return 0
+ fi
+
+ MACADDR='02:03:04:05:06:07'
+ ip link set dev $netdev address "$MACADDR"
+ if [ $? -ne 0 ];then
+ echo "FAIL: $netdev: Cannot set MAC address"
+ else
+ ip link show $netdev |grep -q "$MACADDR"
+ if [ $? -eq 0 ];then
+ echo "PASS: $netdev: set MAC address"
+ else
+ echo "FAIL: $netdev: Cannot set MAC address"
+ fi
+ fi
+
+ #check that the interface did not already have an IP
+ ip address show "$netdev" |grep '^[[:space:]]*inet'
+ if [ $? -eq 0 ];then
+ echo "SKIP: $netdev: already have an IP"
+ return $ksft_skip
+ fi
+
+ # TODO what ipaddr to set ? DHCP ?
+ echo "SKIP: $netdev: set IP address"
+ return $ksft_skip
+}
+
+# test an ethtool command
+# arg1: return code for not supported (see ethtool code source)
+# arg2: summary of the command
+# arg3: command to execute
+kci_netdev_ethtool_test()
+{
+ if [ $# -le 2 ];then
+ echo "SKIP: $netdev: ethtool: invalid number of arguments"
+ return 1
+ fi
+ $3 >/dev/null
+ ret=$?
+ if [ $ret -ne 0 ];then
+ if [ $ret -eq "$1" ];then
+ echo "SKIP: $netdev: ethtool $2 not supported"
+ return $ksft_skip
+ else
+ echo "FAIL: $netdev: ethtool $2"
+ return 1
+ fi
+ else
+ echo "PASS: $netdev: ethtool $2"
+ fi
+ return 0
+}
+
+# test ethtool commands
+# arg1: network interface name
+kci_netdev_ethtool()
+{
+ netdev=$1
+
+ #check presence of ethtool
+ ethtool --version 2>/dev/null >/dev/null
+ if [ $? -ne 0 ];then
+ echo "SKIP: ethtool not present"
+ return $ksft_skip
+ fi
+
+ TMP_ETHTOOL_FEATURES="$(mktemp)"
+ if [ ! -e "$TMP_ETHTOOL_FEATURES" ];then
+ echo "SKIP: Cannot create a tmp file"
+ return 1
+ fi
+
+ ethtool -k "$netdev" > "$TMP_ETHTOOL_FEATURES"
+ if [ $? -ne 0 ];then
+ echo "FAIL: $netdev: ethtool list features"
+ rm "$TMP_ETHTOOL_FEATURES"
+ return 1
+ fi
+ echo "PASS: $netdev: ethtool list features"
+ #TODO for each non fixed features, try to turn them on/off
+ rm "$TMP_ETHTOOL_FEATURES"
+
+ kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev"
+ kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev"
+ return 0
+}
+
+# stop a netdev
+# arg1: network interface name
+kci_netdev_stop()
+{
+ netdev=$1
+
+ if [ $NETDEV_STARTED -eq 0 ];then
+ echo "SKIP: $netdev: interface kept up"
+ return 0
+ fi
+
+ ip link set "$netdev" down
+ if [ $? -ne 0 ];then
+ echo "FAIL: $netdev: stop interface"
+ return 1
+ fi
+ echo "PASS: $netdev: stop interface"
+ return 0
+}
+
+# run all test on a netdev
+# arg1: network interface name
+kci_test_netdev()
+{
+ NETDEV_STARTED=0
+ IFACE_TO_UPDOWN="$1"
+ IFACE_TO_TEST="$1"
+ #check for VLAN interface
+ MASTER_IFACE="$(echo $1 | cut -d@ -f2)"
+ if [ ! -z "$MASTER_IFACE" ];then
+ IFACE_TO_UPDOWN="$MASTER_IFACE"
+ IFACE_TO_TEST="$(echo $1 | cut -d@ -f1)"
+ fi
+
+ NETDEV_STARTED=0
+ kci_net_start "$IFACE_TO_UPDOWN"
+
+ kci_net_setup "$IFACE_TO_TEST"
+
+ kci_netdev_ethtool "$IFACE_TO_TEST"
+
+ kci_netdev_stop "$IFACE_TO_UPDOWN"
+ return 0
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+ip link show 2>/dev/null >/dev/null
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without the ip tool"
+ exit $ksft_skip
+fi
+
+TMP_LIST_NETDEV="$(mktemp)"
+if [ ! -e "$TMP_LIST_NETDEV" ];then
+ echo "FAIL: Cannot create a tmp file"
+ exit 1
+fi
+
+ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\ -f2 | cut -d: -f1> "$TMP_LIST_NETDEV"
+while read netdev
+do
+ kci_test_netdev "$netdev"
+done < "$TMP_LIST_NETDEV"
+
+rm "$TMP_LIST_NETDEV"
+exit 0
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
new file mode 100755
index 000000000..0ab9423d0
--- /dev/null
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -0,0 +1,477 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that route PMTU values match expectations, and that initial device MTU
+# values are assigned correctly
+#
+# Tests currently implemented:
+#
+# - pmtu_vti4_exception
+# Set up vti tunnel on top of veth, with xfrm states and policies, in two
+# namespaces with matching endpoints. Check that route exception is not
+# created if link layer MTU is not exceeded, then exceed it and check that
+# exception is created with the expected PMTU. The approach described
+# below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
+# changes alone won't affect PMTU
+#
+# - pmtu_vti6_exception
+# Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
+# namespaces with matching endpoints. Check that route exception is
+# created by exceeding link layer MTU with ping to other endpoint. Then
+# decrease and increase MTU of tunnel, checking that route exception PMTU
+# changes accordingly
+#
+# - pmtu_vti4_default_mtu
+# Set up vti4 tunnel on top of veth, in two namespaces with matching
+# endpoints. Check that MTU assigned to vti interface is the MTU of the
+# lower layer (veth) minus additional lower layer headers (zero, for veth)
+# minus IPv4 header length
+#
+# - pmtu_vti6_default_mtu
+# Same as above, for IPv6
+#
+# - pmtu_vti4_link_add_mtu
+# Set up vti4 interface passing MTU value at link creation, check MTU is
+# configured, and that link is not created with invalid MTU values
+#
+# - pmtu_vti6_link_add_mtu
+# Same as above, for IPv6
+#
+# - pmtu_vti6_link_change_mtu
+# Set up two dummy interfaces with different MTUs, create a vti6 tunnel
+# and check that configured MTU is used on link creation and changes, and
+# that MTU is properly calculated instead when MTU is not configured from
+# userspace
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Some systems don't have a ping6 binary anymore
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+tests="
+ pmtu_vti6_exception vti6: PMTU exceptions
+ pmtu_vti4_exception vti4: PMTU exceptions
+ pmtu_vti4_default_mtu vti4: default MTU assignment
+ pmtu_vti6_default_mtu vti6: default MTU assignment
+ pmtu_vti4_link_add_mtu vti4: MTU setting on link creation
+ pmtu_vti6_link_add_mtu vti6: MTU setting on link creation
+ pmtu_vti6_link_change_mtu vti6: MTU changes on link changes"
+
+NS_A="ns-$(mktemp -u XXXXXX)"
+NS_B="ns-$(mktemp -u XXXXXX)"
+ns_a="ip netns exec ${NS_A}"
+ns_b="ip netns exec ${NS_B}"
+
+veth4_a_addr="192.168.1.1"
+veth4_b_addr="192.168.1.2"
+veth4_mask="24"
+veth6_a_addr="fd00:1::a"
+veth6_b_addr="fd00:1::b"
+veth6_mask="64"
+
+vti4_a_addr="192.168.2.1"
+vti4_b_addr="192.168.2.2"
+vti4_mask="24"
+vti6_a_addr="fd00:2::a"
+vti6_b_addr="fd00:2::b"
+vti6_mask="64"
+
+dummy6_0_addr="fc00:1000::0"
+dummy6_1_addr="fc00:1001::0"
+dummy6_mask="64"
+
+cleanup_done=1
+err_buf=
+
+err() {
+ err_buf="${err_buf}${1}
+"
+}
+
+err_flush() {
+ echo -n "${err_buf}"
+ err_buf=
+}
+
+setup_namespaces() {
+ ip netns add ${NS_A} || return 1
+ ip netns add ${NS_B}
+}
+
+setup_veth() {
+ ${ns_a} ip link add veth_a type veth peer name veth_b || return 1
+ ${ns_a} ip link set veth_b netns ${NS_B}
+
+ ${ns_a} ip addr add ${veth4_a_addr}/${veth4_mask} dev veth_a
+ ${ns_b} ip addr add ${veth4_b_addr}/${veth4_mask} dev veth_b
+
+ ${ns_a} ip addr add ${veth6_a_addr}/${veth6_mask} dev veth_a
+ ${ns_b} ip addr add ${veth6_b_addr}/${veth6_mask} dev veth_b
+
+ ${ns_a} ip link set veth_a up
+ ${ns_b} ip link set veth_b up
+}
+
+setup_vti() {
+ proto=${1}
+ veth_a_addr="${2}"
+ veth_b_addr="${3}"
+ vti_a_addr="${4}"
+ vti_b_addr="${5}"
+ vti_mask=${6}
+
+ [ ${proto} -eq 6 ] && vti_type="vti6" || vti_type="vti"
+
+ ${ns_a} ip link add vti${proto}_a type ${vti_type} local ${veth_a_addr} remote ${veth_b_addr} key 10 || return 1
+ ${ns_b} ip link add vti${proto}_b type ${vti_type} local ${veth_b_addr} remote ${veth_a_addr} key 10
+
+ ${ns_a} ip addr add ${vti_a_addr}/${vti_mask} dev vti${proto}_a
+ ${ns_b} ip addr add ${vti_b_addr}/${vti_mask} dev vti${proto}_b
+
+ ${ns_a} ip link set vti${proto}_a up
+ ${ns_b} ip link set vti${proto}_b up
+
+ sleep 1
+}
+
+setup_vti4() {
+ setup_vti 4 ${veth4_a_addr} ${veth4_b_addr} ${vti4_a_addr} ${vti4_b_addr} ${vti4_mask}
+}
+
+setup_vti6() {
+ setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${vti6_a_addr} ${vti6_b_addr} ${vti6_mask}
+}
+
+setup_xfrm() {
+ proto=${1}
+ veth_a_addr="${2}"
+ veth_b_addr="${3}"
+
+ ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
+ ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+ ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+ ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+
+ ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+ ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead "rfc4106(gcm(aes))" 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+ ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
+ ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
+}
+
+setup_xfrm4() {
+ setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
+}
+
+setup_xfrm6() {
+ setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
+}
+
+setup() {
+ [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
+
+ cleanup_done=0
+ for arg do
+ eval setup_${arg} || { echo " ${arg} not supported"; return 1; }
+ done
+}
+
+cleanup() {
+ [ ${cleanup_done} -eq 1 ] && return
+ ip netns del ${NS_A} 2> /dev/null
+ ip netns del ${NS_B} 2> /dev/null
+ cleanup_done=1
+}
+
+mtu() {
+ ns_cmd="${1}"
+ dev="${2}"
+ mtu="${3}"
+
+ ${ns_cmd} ip link set dev ${dev} mtu ${mtu}
+}
+
+mtu_parse() {
+ input="${1}"
+
+ next=0
+ for i in ${input}; do
+ [ ${next} -eq 1 ] && echo "${i}" && return
+ [ "${i}" = "mtu" ] && next=1
+ done
+}
+
+link_get() {
+ ns_cmd="${1}"
+ name="${2}"
+
+ ${ns_cmd} ip link show dev "${name}"
+}
+
+link_get_mtu() {
+ ns_cmd="${1}"
+ name="${2}"
+
+ mtu_parse "$(link_get "${ns_cmd}" ${name})"
+}
+
+route_get_dst_exception() {
+ ns_cmd="${1}"
+ dst="${2}"
+
+ ${ns_cmd} ip route get "${dst}"
+}
+
+route_get_dst_pmtu_from_exception() {
+ ns_cmd="${1}"
+ dst="${2}"
+
+ mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
+}
+
+test_pmtu_vti4_exception() {
+ setup namespaces veth vti4 xfrm4 || return 2
+
+ veth_mtu=1500
+ vti_mtu=$((veth_mtu - 20))
+
+ # SPI SN IV ICV pad length next header
+ esp_payload_rfc4106=$((vti_mtu - 4 - 4 - 8 - 16 - 1 - 1))
+ ping_payload=$((esp_payload_rfc4106 - 28))
+
+ mtu "${ns_a}" veth_a ${veth_mtu}
+ mtu "${ns_b}" veth_b ${veth_mtu}
+ mtu "${ns_a}" vti4_a ${vti_mtu}
+ mtu "${ns_b}" vti4_b ${vti_mtu}
+
+ # Send DF packet without exceeding link layer MTU, check that no
+ # exception is created
+ ${ns_a} ping -q -M want -i 0.1 -w 2 -s ${ping_payload} ${vti4_b_addr} > /dev/null
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+ if [ "${pmtu}" != "" ]; then
+ err " unexpected exception created with PMTU ${pmtu} for IP payload length ${esp_payload_rfc4106}"
+ return 1
+ fi
+
+ # Now exceed link layer MTU by one byte, check that exception is created
+ ${ns_a} ping -q -M want -i 0.1 -w 2 -s $((ping_payload + 1)) ${vti4_b_addr} > /dev/null
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti4_b_addr})"
+ if [ "${pmtu}" = "" ]; then
+ err " exception not created for IP payload length $((esp_payload_rfc4106 + 1))"
+ return 1
+ fi
+
+ # ...with the right PMTU value
+ if [ ${pmtu} -ne ${esp_payload_rfc4106} ]; then
+ err " wrong PMTU ${pmtu} in exception, expected: ${esp_payload_rfc4106}"
+ return 1
+ fi
+}
+
+test_pmtu_vti6_exception() {
+ setup namespaces veth vti6 xfrm6 || return 2
+ fail=0
+
+ # Create route exception by exceeding link layer MTU
+ mtu "${ns_a}" veth_a 4000
+ mtu "${ns_b}" veth_b 4000
+ mtu "${ns_a}" vti6_a 5000
+ mtu "${ns_b}" vti6_b 5000
+ ${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
+
+ # Check that exception was created
+ if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then
+ err " tunnel exceeding link layer MTU didn't create route exception"
+ return 1
+ fi
+
+ # Decrease tunnel MTU, check for PMTU decrease in route exception
+ mtu "${ns_a}" vti6_a 3000
+
+ if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 3000 ]; then
+ err " decreasing tunnel MTU didn't decrease route exception PMTU"
+ fail=1
+ fi
+
+ # Increase tunnel MTU, check for PMTU increase in route exception
+ mtu "${ns_a}" vti6_a 9000
+ if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" -ne 9000 ]; then
+ err " increasing tunnel MTU didn't increase route exception PMTU"
+ fail=1
+ fi
+
+ return ${fail}
+}
+
+test_pmtu_vti4_default_mtu() {
+ setup namespaces veth vti4 || return 2
+
+ # Check that MTU of vti device is MTU of veth minus IPv4 header length
+ veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+ vti4_mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+ if [ $((veth_mtu - vti4_mtu)) -ne 20 ]; then
+ err " vti MTU ${vti4_mtu} is not veth MTU ${veth_mtu} minus IPv4 header length"
+ return 1
+ fi
+}
+
+test_pmtu_vti6_default_mtu() {
+ setup namespaces veth vti6 || return 2
+
+ # Check that MTU of vti device is MTU of veth minus IPv6 header length
+ veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
+ vti6_mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+ if [ $((veth_mtu - vti6_mtu)) -ne 40 ]; then
+ err " vti MTU ${vti6_mtu} is not veth MTU ${veth_mtu} minus IPv6 header length"
+ return 1
+ fi
+}
+
+test_pmtu_vti4_link_add_mtu() {
+ setup namespaces || return 2
+
+ ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+ [ $? -ne 0 ] && err " vti not supported" && return 2
+ ${ns_a} ip link del vti4_a
+
+ fail=0
+
+ min=68
+ max=$((65535 - 20))
+ # Check invalid values first
+ for v in $((min - 1)) $((max + 1)); do
+ ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null
+ # This can fail, or MTU can be adjusted to a proper value
+ [ $? -ne 0 ] && continue
+ mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+ if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+ err " vti tunnel created with invalid MTU ${mtu}"
+ fail=1
+ fi
+ ${ns_a} ip link del vti4_a
+ done
+
+ # Now check valid values
+ for v in ${min} 1300 ${max}; do
+ ${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
+ mtu="$(link_get_mtu "${ns_a}" vti4_a)"
+ ${ns_a} ip link del vti4_a
+ if [ "${mtu}" != "${v}" ]; then
+ err " vti MTU ${mtu} doesn't match configured value ${v}"
+ fail=1
+ fi
+ done
+
+ return ${fail}
+}
+
+test_pmtu_vti6_link_add_mtu() {
+ setup namespaces || return 2
+
+ ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+ [ $? -ne 0 ] && err " vti6 not supported" && return 2
+ ${ns_a} ip link del vti6_a
+
+ fail=0
+
+ min=68 # vti6 can carry IPv4 packets too
+ max=$((65535 - 40))
+ # Check invalid values first
+ for v in $((min - 1)) $((max + 1)); do
+ ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 2>/dev/null
+ # This can fail, or MTU can be adjusted to a proper value
+ [ $? -ne 0 ] && continue
+ mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+ if [ ${mtu} -lt ${min} -o ${mtu} -gt ${max} ]; then
+ err " vti6 tunnel created with invalid MTU ${v}"
+ fail=1
+ fi
+ ${ns_a} ip link del vti6_a
+ done
+
+ # Now check valid values
+ for v in 68 1280 1300 $((65535 - 40)); do
+ ${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
+ mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+ ${ns_a} ip link del vti6_a
+ if [ "${mtu}" != "${v}" ]; then
+ err " vti6 MTU ${mtu} doesn't match configured value ${v}"
+ fail=1
+ fi
+ done
+
+ return ${fail}
+}
+
+test_pmtu_vti6_link_change_mtu() {
+ setup namespaces || return 2
+
+ ${ns_a} ip link add dummy0 mtu 1500 type dummy
+ [ $? -ne 0 ] && err " dummy not supported" && return 2
+ ${ns_a} ip link add dummy1 mtu 3000 type dummy
+ ${ns_a} ip link set dummy0 up
+ ${ns_a} ip link set dummy1 up
+
+ ${ns_a} ip addr add ${dummy6_0_addr}/${dummy6_mask} dev dummy0
+ ${ns_a} ip addr add ${dummy6_1_addr}/${dummy6_mask} dev dummy1
+
+ fail=0
+
+ # Create vti6 interface bound to device, passing MTU, check it
+ ${ns_a} ip link add vti6_a mtu 1300 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+ mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+ if [ ${mtu} -ne 1300 ]; then
+ err " vti6 MTU ${mtu} doesn't match configured value 1300"
+ fail=1
+ fi
+
+ # Move to another device with different MTU, without passing MTU, check
+ # MTU is adjusted
+ ${ns_a} ip link set vti6_a type vti6 remote ${dummy6_1_addr} local ${dummy6_1_addr}
+ mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+ if [ ${mtu} -ne $((3000 - 40)) ]; then
+ err " vti MTU ${mtu} is not dummy MTU 3000 minus IPv6 header length"
+ fail=1
+ fi
+
+ # Move it back, passing MTU, check MTU is not overridden
+ ${ns_a} ip link set vti6_a mtu 1280 type vti6 remote ${dummy6_0_addr} local ${dummy6_0_addr}
+ mtu="$(link_get_mtu "${ns_a}" vti6_a)"
+ if [ ${mtu} -ne 1280 ]; then
+ err " vti6 MTU ${mtu} doesn't match configured value 1280"
+ fail=1
+ fi
+
+ return ${fail}
+}
+
+trap cleanup EXIT
+
+exitcode=0
+desc=0
+IFS="
+"
+for t in ${tests}; do
+ [ $desc -eq 0 ] && name="${t}" && desc=1 && continue || desc=0
+
+ (
+ unset IFS
+ eval test_${name}
+ ret=$?
+ cleanup
+
+ if [ $ret -eq 0 ]; then
+ printf "TEST: %-60s [ OK ]\n" "${t}"
+ elif [ $ret -eq 1 ]; then
+ printf "TEST: %-60s [FAIL]\n" "${t}"
+ err_flush
+ exit 1
+ elif [ $ret -eq 2 ]; then
+ printf "TEST: %-60s [SKIP]\n" "${t}"
+ err_flush
+ fi
+ )
+ [ $? -ne 0 ] && exitcode=1
+done
+
+exit ${exitcode}
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
new file mode 100644
index 000000000..f496ba3b1
--- /dev/null
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -0,0 +1,486 @@
+/*
+ * Copyright 2013 Google Inc.
+ * Author: Willem de Bruijn (willemb@google.com)
+ *
+ * A basic test of packet socket fanout behavior.
+ *
+ * Control:
+ * - create fanout fails as expected with illegal flag combinations
+ * - join fanout fails as expected with diverging types or flags
+ *
+ * Datapath:
+ * Open a pair of packet sockets and a pair of INET sockets, send a known
+ * number of packets across the two INET sockets and count the number of
+ * packets enqueued onto the two packet sockets.
+ *
+ * The test currently runs for
+ * - PACKET_FANOUT_HASH
+ * - PACKET_FANOUT_HASH with PACKET_FANOUT_FLAG_ROLLOVER
+ * - PACKET_FANOUT_LB
+ * - PACKET_FANOUT_CPU
+ * - PACKET_FANOUT_ROLLOVER
+ * - PACKET_FANOUT_CBPF
+ * - PACKET_FANOUT_EBPF
+ *
+ * Todo:
+ * - functionality: PACKET_FANOUT_FLAG_DEFRAG
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE /* for sched_setaffinity */
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/unistd.h> /* for __NR_bpf */
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "psock_lib.h"
+
+#define RING_NUM_FRAMES 20
+
+/* Open a socket in a given fanout mode.
+ * @return -1 if mode is bad, a valid socket otherwise */
+static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
+{
+ struct sockaddr_ll addr = {0};
+ int fd, val;
+
+ fd = socket(PF_PACKET, SOCK_RAW, 0);
+ if (fd < 0) {
+ perror("socket packet");
+ exit(1);
+ }
+
+ pair_udp_setfilter(fd);
+
+ addr.sll_family = AF_PACKET;
+ addr.sll_protocol = htons(ETH_P_IP);
+ addr.sll_ifindex = if_nametoindex("lo");
+ if (addr.sll_ifindex == 0) {
+ perror("if_nametoindex");
+ exit(1);
+ }
+ if (bind(fd, (void *) &addr, sizeof(addr))) {
+ perror("bind packet");
+ exit(1);
+ }
+
+ val = (((int) typeflags) << 16) | group_id;
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
+ if (close(fd)) {
+ perror("close packet");
+ exit(1);
+ }
+ return -1;
+ }
+
+ return fd;
+}
+
+static void sock_fanout_set_cbpf(int fd)
+{
+ struct sock_filter bpf_filter[] = {
+ BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80), /* ldb [80] */
+ BPF_STMT(BPF_RET+BPF_A, 0), /* ret A */
+ };
+ struct sock_fprog bpf_prog;
+
+ bpf_prog.filter = bpf_filter;
+ bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog,
+ sizeof(bpf_prog))) {
+ perror("fanout data cbpf");
+ exit(1);
+ }
+}
+
+static void sock_fanout_getopts(int fd, uint16_t *typeflags, uint16_t *group_id)
+{
+ int sockopt;
+ socklen_t sockopt_len = sizeof(sockopt);
+
+ if (getsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+ &sockopt, &sockopt_len)) {
+ perror("failed to getsockopt");
+ exit(1);
+ }
+ *typeflags = sockopt >> 16;
+ *group_id = sockopt & 0xfffff;
+}
+
+static void sock_fanout_set_ebpf(int fd)
+{
+ static char log_buf[65536];
+
+ const int len_off = __builtin_offsetof(struct __sk_buff, len);
+ struct bpf_insn prog[] = {
+ { BPF_ALU64 | BPF_MOV | BPF_X, 6, 1, 0, 0 },
+ { BPF_LDX | BPF_W | BPF_MEM, 0, 6, len_off, 0 },
+ { BPF_JMP | BPF_JGE | BPF_K, 0, 0, 1, DATA_LEN },
+ { BPF_JMP | BPF_JA | BPF_K, 0, 0, 4, 0 },
+ { BPF_LD | BPF_B | BPF_ABS, 0, 0, 0, 0x50 },
+ { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 2, DATA_CHAR },
+ { BPF_JMP | BPF_JEQ | BPF_K, 0, 0, 1, DATA_CHAR_1 },
+ { BPF_ALU | BPF_MOV | BPF_K, 0, 0, 0, 0 },
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ union bpf_attr attr;
+ int pfd;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insns = (unsigned long) prog;
+ attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+ attr.license = (unsigned long) "GPL";
+ attr.log_buf = (unsigned long) log_buf,
+ attr.log_size = sizeof(log_buf),
+ attr.log_level = 1,
+
+ pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (pfd < 0) {
+ perror("bpf");
+ fprintf(stderr, "bpf verifier:\n%s\n", log_buf);
+ exit(1);
+ }
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
+ perror("fanout data ebpf");
+ exit(1);
+ }
+
+ if (close(pfd)) {
+ perror("close ebpf");
+ exit(1);
+ }
+}
+
+static char *sock_fanout_open_ring(int fd)
+{
+ struct tpacket_req req = {
+ .tp_block_size = getpagesize(),
+ .tp_frame_size = getpagesize(),
+ .tp_block_nr = RING_NUM_FRAMES,
+ .tp_frame_nr = RING_NUM_FRAMES,
+ };
+ char *ring;
+ int val = TPACKET_V2;
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, (void *) &val,
+ sizeof(val))) {
+ perror("packetsock ring setsockopt version");
+ exit(1);
+ }
+ if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req,
+ sizeof(req))) {
+ perror("packetsock ring setsockopt");
+ exit(1);
+ }
+
+ ring = mmap(0, req.tp_block_size * req.tp_block_nr,
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (ring == MAP_FAILED) {
+ perror("packetsock ring mmap");
+ exit(1);
+ }
+
+ return ring;
+}
+
+static int sock_fanout_read_ring(int fd, void *ring)
+{
+ struct tpacket2_hdr *header = ring;
+ int count = 0;
+
+ while (count < RING_NUM_FRAMES && header->tp_status & TP_STATUS_USER) {
+ count++;
+ header = ring + (count * getpagesize());
+ }
+
+ return count;
+}
+
+static int sock_fanout_read(int fds[], char *rings[], const int expect[])
+{
+ int ret[2];
+
+ ret[0] = sock_fanout_read_ring(fds[0], rings[0]);
+ ret[1] = sock_fanout_read_ring(fds[1], rings[1]);
+
+ fprintf(stderr, "info: count=%d,%d, expect=%d,%d\n",
+ ret[0], ret[1], expect[0], expect[1]);
+
+ if ((!(ret[0] == expect[0] && ret[1] == expect[1])) &&
+ (!(ret[0] == expect[1] && ret[1] == expect[0]))) {
+ fprintf(stderr, "warning: incorrect queue lengths\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Test illegal mode + flag combination */
+static void test_control_single(void)
+{
+ fprintf(stderr, "test: control single socket\n");
+
+ if (sock_fanout_open(PACKET_FANOUT_ROLLOVER |
+ PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
+ fprintf(stderr, "ERROR: opened socket with dual rollover\n");
+ exit(1);
+ }
+}
+
+/* Test illegal group with different modes or flags */
+static void test_control_group(void)
+{
+ int fds[2];
+
+ fprintf(stderr, "test: control multiple sockets\n");
+
+ fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+ if (fds[0] == -1) {
+ fprintf(stderr, "ERROR: failed to open HASH socket\n");
+ exit(1);
+ }
+ if (sock_fanout_open(PACKET_FANOUT_HASH |
+ PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) {
+ fprintf(stderr, "ERROR: joined group with wrong flag defrag\n");
+ exit(1);
+ }
+ if (sock_fanout_open(PACKET_FANOUT_HASH |
+ PACKET_FANOUT_FLAG_ROLLOVER, 0) != -1) {
+ fprintf(stderr, "ERROR: joined group with wrong flag ro\n");
+ exit(1);
+ }
+ if (sock_fanout_open(PACKET_FANOUT_CPU, 0) != -1) {
+ fprintf(stderr, "ERROR: joined group with wrong mode\n");
+ exit(1);
+ }
+ fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+ if (fds[1] == -1) {
+ fprintf(stderr, "ERROR: failed to join group\n");
+ exit(1);
+ }
+ if (close(fds[1]) || close(fds[0])) {
+ fprintf(stderr, "ERROR: closing sockets\n");
+ exit(1);
+ }
+}
+
+/* Test creating a unique fanout group ids */
+static void test_unique_fanout_group_ids(void)
+{
+ int fds[3];
+ uint16_t typeflags, first_group_id, second_group_id;
+
+ fprintf(stderr, "test: unique ids\n");
+
+ fds[0] = sock_fanout_open(PACKET_FANOUT_HASH |
+ PACKET_FANOUT_FLAG_UNIQUEID, 0);
+ if (fds[0] == -1) {
+ fprintf(stderr, "ERROR: failed to create a unique id group.\n");
+ exit(1);
+ }
+
+ sock_fanout_getopts(fds[0], &typeflags, &first_group_id);
+ if (typeflags != PACKET_FANOUT_HASH) {
+ fprintf(stderr, "ERROR: unexpected typeflags %x\n", typeflags);
+ exit(1);
+ }
+
+ if (sock_fanout_open(PACKET_FANOUT_CPU, first_group_id) != -1) {
+ fprintf(stderr, "ERROR: joined group with wrong type.\n");
+ exit(1);
+ }
+
+ fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, first_group_id);
+ if (fds[1] == -1) {
+ fprintf(stderr,
+ "ERROR: failed to join previously created group.\n");
+ exit(1);
+ }
+
+ fds[2] = sock_fanout_open(PACKET_FANOUT_HASH |
+ PACKET_FANOUT_FLAG_UNIQUEID, 0);
+ if (fds[2] == -1) {
+ fprintf(stderr,
+ "ERROR: failed to create a second unique id group.\n");
+ exit(1);
+ }
+
+ sock_fanout_getopts(fds[2], &typeflags, &second_group_id);
+ if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_UNIQUEID,
+ second_group_id) != -1) {
+ fprintf(stderr,
+ "ERROR: specified a group id when requesting unique id\n");
+ exit(1);
+ }
+
+ if (close(fds[0]) || close(fds[1]) || close(fds[2])) {
+ fprintf(stderr, "ERROR: closing sockets\n");
+ exit(1);
+ }
+}
+
+static int test_datapath(uint16_t typeflags, int port_off,
+ const int expect1[], const int expect2[])
+{
+ const int expect0[] = { 0, 0 };
+ char *rings[2];
+ uint8_t type = typeflags & 0xFF;
+ int fds[2], fds_udp[2][2], ret;
+
+ fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n",
+ typeflags, (uint16_t)PORT_BASE,
+ (uint16_t)(PORT_BASE + port_off));
+
+ fds[0] = sock_fanout_open(typeflags, 0);
+ fds[1] = sock_fanout_open(typeflags, 0);
+ if (fds[0] == -1 || fds[1] == -1) {
+ fprintf(stderr, "ERROR: failed open\n");
+ exit(1);
+ }
+ if (type == PACKET_FANOUT_CBPF)
+ sock_fanout_set_cbpf(fds[0]);
+ else if (type == PACKET_FANOUT_EBPF)
+ sock_fanout_set_ebpf(fds[0]);
+
+ rings[0] = sock_fanout_open_ring(fds[0]);
+ rings[1] = sock_fanout_open_ring(fds[1]);
+ pair_udp_open(fds_udp[0], PORT_BASE);
+ pair_udp_open(fds_udp[1], PORT_BASE + port_off);
+ sock_fanout_read(fds, rings, expect0);
+
+ /* Send data, but not enough to overflow a queue */
+ pair_udp_send(fds_udp[0], 15);
+ pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1);
+ ret = sock_fanout_read(fds, rings, expect1);
+
+ /* Send more data, overflow the queue */
+ pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1);
+ /* TODO: ensure consistent order between expect1 and expect2 */
+ ret |= sock_fanout_read(fds, rings, expect2);
+
+ if (munmap(rings[1], RING_NUM_FRAMES * getpagesize()) ||
+ munmap(rings[0], RING_NUM_FRAMES * getpagesize())) {
+ fprintf(stderr, "close rings\n");
+ exit(1);
+ }
+ if (close(fds_udp[1][1]) || close(fds_udp[1][0]) ||
+ close(fds_udp[0][1]) || close(fds_udp[0][0]) ||
+ close(fds[1]) || close(fds[0])) {
+ fprintf(stderr, "close datapath\n");
+ exit(1);
+ }
+
+ return ret;
+}
+
+static int set_cpuaffinity(int cpuid)
+{
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpuid, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask)) {
+ if (errno != EINVAL) {
+ fprintf(stderr, "setaffinity %d\n", cpuid);
+ exit(1);
+ }
+ return 1;
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } };
+ const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } };
+ const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } };
+ const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } };
+ const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } };
+ const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } };
+ const int expect_bpf[2][2] = { { 15, 5 }, { 15, 20 } };
+ const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } };
+ int port_off = 2, tries = 20, ret;
+
+ test_control_single();
+ test_control_group();
+ test_unique_fanout_group_ids();
+
+ /* find a set of ports that do not collide onto the same socket */
+ ret = test_datapath(PACKET_FANOUT_HASH, port_off,
+ expect_hash[0], expect_hash[1]);
+ while (ret) {
+ fprintf(stderr, "info: trying alternate ports (%d)\n", tries);
+ ret = test_datapath(PACKET_FANOUT_HASH, ++port_off,
+ expect_hash[0], expect_hash[1]);
+ if (!--tries) {
+ fprintf(stderr, "too many collisions\n");
+ return 1;
+ }
+ }
+
+ ret |= test_datapath(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_ROLLOVER,
+ port_off, expect_hash_rb[0], expect_hash_rb[1]);
+ ret |= test_datapath(PACKET_FANOUT_LB,
+ port_off, expect_lb[0], expect_lb[1]);
+ ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
+ port_off, expect_rb[0], expect_rb[1]);
+
+ ret |= test_datapath(PACKET_FANOUT_CBPF,
+ port_off, expect_bpf[0], expect_bpf[1]);
+ ret |= test_datapath(PACKET_FANOUT_EBPF,
+ port_off, expect_bpf[0], expect_bpf[1]);
+
+ set_cpuaffinity(0);
+ ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
+ expect_cpu0[0], expect_cpu0[1]);
+ if (!set_cpuaffinity(1))
+ /* TODO: test that choice alternates with previous */
+ ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
+ expect_cpu1[0], expect_cpu1[1]);
+
+ ret |= test_datapath(PACKET_FANOUT_FLAG_UNIQUEID, port_off,
+ expect_uniqueid[0], expect_uniqueid[1]);
+
+ if (ret)
+ return 1;
+
+ printf("OK. All tests passed\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
new file mode 100644
index 000000000..7d990d6c8
--- /dev/null
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright 2013 Google Inc.
+ * Author: Willem de Bruijn <willemb@google.com>
+ * Daniel Borkmann <dborkman@redhat.com>
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef PSOCK_LIB_H
+#define PSOCK_LIB_H
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <string.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+
+#define DATA_LEN 100
+#define DATA_CHAR 'a'
+#define DATA_CHAR_1 'b'
+
+#define PORT_BASE 8000
+
+#ifndef __maybe_unused
+# define __maybe_unused __attribute__ ((__unused__))
+#endif
+
+static __maybe_unused void pair_udp_setfilter(int fd)
+{
+ /* the filter below checks for all of the following conditions that
+ * are based on the contents of create_payload()
+ * ether type 0x800 and
+ * ip proto udp and
+ * skb->len == DATA_LEN and
+ * udp[38] == 'a' or udp[38] == 'b'
+ * It can be generated from the following bpf_asm input:
+ * ldh [12]
+ * jne #0x800, drop ; ETH_P_IP
+ * ldb [23]
+ * jneq #17, drop ; IPPROTO_UDP
+ * ld len ; ld skb->len
+ * jlt #100, drop ; DATA_LEN
+ * ldb [80]
+ * jeq #97, pass ; DATA_CHAR
+ * jne #98, drop ; DATA_CHAR_1
+ * pass:
+ * ret #-1
+ * drop:
+ * ret #0
+ */
+ struct sock_filter bpf_filter[] = {
+ { 0x28, 0, 0, 0x0000000c },
+ { 0x15, 0, 8, 0x00000800 },
+ { 0x30, 0, 0, 0x00000017 },
+ { 0x15, 0, 6, 0x00000011 },
+ { 0x80, 0, 0, 0000000000 },
+ { 0x35, 0, 4, 0x00000064 },
+ { 0x30, 0, 0, 0x00000050 },
+ { 0x15, 1, 0, 0x00000061 },
+ { 0x15, 0, 1, 0x00000062 },
+ { 0x06, 0, 0, 0xffffffff },
+ { 0x06, 0, 0, 0000000000 },
+ };
+ struct sock_fprog bpf_prog;
+
+ bpf_prog.filter = bpf_filter;
+ bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
+ sizeof(bpf_prog))) {
+ perror("setsockopt SO_ATTACH_FILTER");
+ exit(1);
+ }
+}
+
+static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
+{
+ struct sockaddr_in saddr, daddr;
+
+ fds[0] = socket(PF_INET, SOCK_DGRAM, 0);
+ fds[1] = socket(PF_INET, SOCK_DGRAM, 0);
+ if (fds[0] == -1 || fds[1] == -1) {
+ fprintf(stderr, "ERROR: socket dgram\n");
+ exit(1);
+ }
+
+ memset(&saddr, 0, sizeof(saddr));
+ saddr.sin_family = AF_INET;
+ saddr.sin_port = htons(port);
+ saddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+ memset(&daddr, 0, sizeof(daddr));
+ daddr.sin_family = AF_INET;
+ daddr.sin_port = htons(port + 1);
+ daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+
+ /* must bind both to get consistent hash result */
+ if (bind(fds[1], (void *) &daddr, sizeof(daddr))) {
+ perror("bind");
+ exit(1);
+ }
+ if (bind(fds[0], (void *) &saddr, sizeof(saddr))) {
+ perror("bind");
+ exit(1);
+ }
+ if (connect(fds[0], (void *) &daddr, sizeof(daddr))) {
+ perror("connect");
+ exit(1);
+ }
+}
+
+static __maybe_unused void pair_udp_send_char(int fds[], int num, char payload)
+{
+ char buf[DATA_LEN], rbuf[DATA_LEN];
+
+ memset(buf, payload, sizeof(buf));
+ while (num--) {
+ /* Should really handle EINTR and EAGAIN */
+ if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) {
+ fprintf(stderr, "ERROR: send failed left=%d\n", num);
+ exit(1);
+ }
+ if (read(fds[1], rbuf, sizeof(rbuf)) != sizeof(rbuf)) {
+ fprintf(stderr, "ERROR: recv failed left=%d\n", num);
+ exit(1);
+ }
+ if (memcmp(buf, rbuf, sizeof(buf))) {
+ fprintf(stderr, "ERROR: data failed left=%d\n", num);
+ exit(1);
+ }
+ }
+}
+
+static __maybe_unused void pair_udp_send(int fds[], int num)
+{
+ return pair_udp_send_char(fds, num, DATA_CHAR);
+}
+
+static __maybe_unused void pair_udp_close(int fds[])
+{
+ close(fds[0]);
+ close(fds[1]);
+}
+
+#endif /* PSOCK_LIB_H */
diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c
new file mode 100644
index 000000000..7d15e10a9
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
+#include <net/if.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "psock_lib.h"
+
+static bool cfg_use_bind;
+static bool cfg_use_csum_off;
+static bool cfg_use_csum_off_bad;
+static bool cfg_use_dgram;
+static bool cfg_use_gso;
+static bool cfg_use_qdisc_bypass;
+static bool cfg_use_vlan;
+static bool cfg_use_vnet;
+
+static char *cfg_ifname = "lo";
+static int cfg_mtu = 1500;
+static int cfg_payload_len = DATA_LEN;
+static int cfg_truncate_len = INT_MAX;
+static uint16_t cfg_port = 8000;
+
+/* test sending up to max mtu + 1 */
+#define TEST_SZ (sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU + 1)
+
+static char tbuf[TEST_SZ], rbuf[TEST_SZ];
+
+static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
+{
+ unsigned long sum = 0;
+ int i;
+
+ for (i = 0; i < num_u16; i++)
+ sum += start[i];
+
+ return sum;
+}
+
+static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
+ unsigned long sum)
+{
+ sum += add_csum_hword(start, num_u16);
+
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ return ~sum;
+}
+
+static int build_vnet_header(void *header)
+{
+ struct virtio_net_hdr *vh = header;
+
+ vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
+
+ if (cfg_use_csum_off) {
+ vh->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vh->csum_start = ETH_HLEN + sizeof(struct iphdr);
+ vh->csum_offset = __builtin_offsetof(struct udphdr, check);
+
+ /* position check field exactly one byte beyond end of packet */
+ if (cfg_use_csum_off_bad)
+ vh->csum_start += sizeof(struct udphdr) + cfg_payload_len -
+ vh->csum_offset - 1;
+ }
+
+ if (cfg_use_gso) {
+ vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+ vh->gso_size = cfg_mtu - sizeof(struct iphdr);
+ }
+
+ return sizeof(*vh);
+}
+
+static int build_eth_header(void *header)
+{
+ struct ethhdr *eth = header;
+
+ if (cfg_use_vlan) {
+ uint16_t *tag = header + ETH_HLEN;
+
+ eth->h_proto = htons(ETH_P_8021Q);
+ tag[1] = htons(ETH_P_IP);
+ return ETH_HLEN + 4;
+ }
+
+ eth->h_proto = htons(ETH_P_IP);
+ return ETH_HLEN;
+}
+
+static int build_ipv4_header(void *header, int payload_len)
+{
+ struct iphdr *iph = header;
+
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->ttl = 8;
+ iph->tot_len = htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len);
+ iph->id = htons(1337);
+ iph->protocol = IPPROTO_UDP;
+ iph->saddr = htonl((172 << 24) | (17 << 16) | 2);
+ iph->daddr = htonl((172 << 24) | (17 << 16) | 1);
+ iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
+
+ return iph->ihl << 2;
+}
+
+static int build_udp_header(void *header, int payload_len)
+{
+ const int alen = sizeof(uint32_t);
+ struct udphdr *udph = header;
+ int len = sizeof(*udph) + payload_len;
+
+ udph->source = htons(9);
+ udph->dest = htons(cfg_port);
+ udph->len = htons(len);
+
+ if (cfg_use_csum_off)
+ udph->check = build_ip_csum(header - (2 * alen), alen,
+ htons(IPPROTO_UDP) + udph->len);
+ else
+ udph->check = 0;
+
+ return sizeof(*udph);
+}
+
+static int build_packet(int payload_len)
+{
+ int off = 0;
+
+ off += build_vnet_header(tbuf);
+ off += build_eth_header(tbuf + off);
+ off += build_ipv4_header(tbuf + off, payload_len);
+ off += build_udp_header(tbuf + off, payload_len);
+
+ if (off + payload_len > sizeof(tbuf))
+ error(1, 0, "payload length exceeds max");
+
+ memset(tbuf + off, DATA_CHAR, payload_len);
+
+ return off + payload_len;
+}
+
+static void do_bind(int fd)
+{
+ struct sockaddr_ll laddr = {0};
+
+ laddr.sll_family = AF_PACKET;
+ laddr.sll_protocol = htons(ETH_P_IP);
+ laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+ if (!laddr.sll_ifindex)
+ error(1, errno, "if_nametoindex");
+
+ if (bind(fd, (void *)&laddr, sizeof(laddr)))
+ error(1, errno, "bind");
+}
+
+static void do_send(int fd, char *buf, int len)
+{
+ int ret;
+
+ if (!cfg_use_vnet) {
+ buf += sizeof(struct virtio_net_hdr);
+ len -= sizeof(struct virtio_net_hdr);
+ }
+ if (cfg_use_dgram) {
+ buf += ETH_HLEN;
+ len -= ETH_HLEN;
+ }
+
+ if (cfg_use_bind) {
+ ret = write(fd, buf, len);
+ } else {
+ struct sockaddr_ll laddr = {0};
+
+ laddr.sll_protocol = htons(ETH_P_IP);
+ laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+ if (!laddr.sll_ifindex)
+ error(1, errno, "if_nametoindex");
+
+ ret = sendto(fd, buf, len, 0, (void *)&laddr, sizeof(laddr));
+ }
+
+ if (ret == -1)
+ error(1, errno, "write");
+ if (ret != len)
+ error(1, 0, "write: %u %u", ret, len);
+
+ fprintf(stderr, "tx: %u\n", ret);
+}
+
+static int do_tx(void)
+{
+ const int one = 1;
+ int fd, len;
+
+ fd = socket(PF_PACKET, cfg_use_dgram ? SOCK_DGRAM : SOCK_RAW, 0);
+ if (fd == -1)
+ error(1, errno, "socket t");
+
+ if (cfg_use_bind)
+ do_bind(fd);
+
+ if (cfg_use_qdisc_bypass &&
+ setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one)))
+ error(1, errno, "setsockopt qdisc bypass");
+
+ if (cfg_use_vnet &&
+ setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
+ error(1, errno, "setsockopt vnet");
+
+ len = build_packet(cfg_payload_len);
+
+ if (cfg_truncate_len < len)
+ len = cfg_truncate_len;
+
+ do_send(fd, tbuf, len);
+
+ if (close(fd))
+ error(1, errno, "close t");
+
+ return len;
+}
+
+static int setup_rx(void)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ struct sockaddr_in raddr = {0};
+ int fd;
+
+ fd = socket(PF_INET, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket r");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcv timeout");
+
+ raddr.sin_family = AF_INET;
+ raddr.sin_port = htons(cfg_port);
+ raddr.sin_addr.s_addr = htonl(INADDR_ANY);
+
+ if (bind(fd, (void *)&raddr, sizeof(raddr)))
+ error(1, errno, "bind r");
+
+ return fd;
+}
+
+static void do_rx(int fd, int expected_len, char *expected)
+{
+ int ret;
+
+ ret = recv(fd, rbuf, sizeof(rbuf), 0);
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (ret != expected_len)
+ error(1, 0, "recv: %u != %u", ret, expected_len);
+
+ if (memcmp(rbuf, expected, ret))
+ error(1, 0, "recv: data mismatch");
+
+ fprintf(stderr, "rx: %u\n", ret);
+}
+
+static int setup_sniffer(void)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ int fd;
+
+ fd = socket(PF_PACKET, SOCK_RAW, 0);
+ if (fd == -1)
+ error(1, errno, "socket p");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcv timeout");
+
+ pair_udp_setfilter(fd);
+ do_bind(fd);
+
+ return fd;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "bcCdgl:qt:vV")) != -1) {
+ switch (c) {
+ case 'b':
+ cfg_use_bind = true;
+ break;
+ case 'c':
+ cfg_use_csum_off = true;
+ break;
+ case 'C':
+ cfg_use_csum_off_bad = true;
+ break;
+ case 'd':
+ cfg_use_dgram = true;
+ break;
+ case 'g':
+ cfg_use_gso = true;
+ break;
+ case 'l':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'q':
+ cfg_use_qdisc_bypass = true;
+ break;
+ case 't':
+ cfg_truncate_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'v':
+ cfg_use_vnet = true;
+ break;
+ case 'V':
+ cfg_use_vlan = true;
+ break;
+ default:
+ error(1, 0, "%s: parse error", argv[0]);
+ }
+ }
+
+ if (cfg_use_vlan && cfg_use_dgram)
+ error(1, 0, "option vlan (-V) conflicts with dgram (-d)");
+
+ if (cfg_use_csum_off && !cfg_use_vnet)
+ error(1, 0, "option csum offload (-c) requires vnet (-v)");
+
+ if (cfg_use_csum_off_bad && !cfg_use_csum_off)
+ error(1, 0, "option csum bad (-C) requires csum offload (-c)");
+
+ if (cfg_use_gso && !cfg_use_csum_off)
+ error(1, 0, "option gso (-g) requires csum offload (-c)");
+}
+
+static void run_test(void)
+{
+ int fdr, fds, total_len;
+
+ fdr = setup_rx();
+ fds = setup_sniffer();
+
+ total_len = do_tx();
+
+ /* BPF filter accepts only this length, vlan changes MAC */
+ if (cfg_payload_len == DATA_LEN && !cfg_use_vlan)
+ do_rx(fds, total_len - sizeof(struct virtio_net_hdr),
+ tbuf + sizeof(struct virtio_net_hdr));
+
+ do_rx(fdr, cfg_payload_len, tbuf + total_len - cfg_payload_len);
+
+ if (close(fds))
+ error(1, errno, "close s");
+ if (close(fdr))
+ error(1, errno, "close r");
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ if (system("ip link set dev lo mtu 1500"))
+ error(1, errno, "ip link set mtu");
+ if (system("ip addr add dev lo 172.17.0.1/24"))
+ error(1, errno, "ip addr add");
+
+ run_test();
+
+ fprintf(stderr, "OK\n\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
new file mode 100755
index 000000000..6331d91b8
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of packet socket send regression tests
+
+set -e
+
+readonly mtu=1500
+readonly iphlen=20
+readonly udphlen=8
+
+readonly vnet_hlen=10
+readonly eth_hlen=14
+
+readonly mss="$((${mtu} - ${iphlen} - ${udphlen}))"
+readonly mss_exceeds="$((${mss} + 1))"
+
+readonly max_mtu=65535
+readonly max_mss="$((${max_mtu} - ${iphlen} - ${udphlen}))"
+readonly max_mss_exceeds="$((${max_mss} + 1))"
+
+# functional checks (not a full cross-product)
+
+echo "dgram"
+./in_netns.sh ./psock_snd -d
+
+echo "dgram bind"
+./in_netns.sh ./psock_snd -d -b
+
+echo "raw"
+./in_netns.sh ./psock_snd
+
+echo "raw bind"
+./in_netns.sh ./psock_snd -b
+
+echo "raw qdisc bypass"
+./in_netns.sh ./psock_snd -q
+
+echo "raw vlan"
+./in_netns.sh ./psock_snd -V
+
+echo "raw vnet hdr"
+./in_netns.sh ./psock_snd -v
+
+echo "raw csum_off"
+./in_netns.sh ./psock_snd -v -c
+
+echo "raw csum_off with bad offset (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -C)
+
+
+# bounds check: send {max, max + 1, min, min - 1} lengths
+
+echo "raw min size"
+./in_netns.sh ./psock_snd -l 0
+
+echo "raw mtu size"
+./in_netns.sh ./psock_snd -l "${mss}"
+
+echo "raw mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
+
+# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
+#
+# echo "raw vlan mtu size"
+# ./in_netns.sh ./psock_snd -V -l "${mss}"
+
+echo "raw vlan mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
+
+echo "dgram mtu size"
+./in_netns.sh ./psock_snd -d -l "${mss}"
+
+echo "dgram mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
+
+echo "raw truncate hlen (fails: does not arrive)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
+
+echo "raw truncate hlen - 1 (fails: EINVAL)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
+
+
+# gso checks: implies -l, because with gso len must exceed gso_size
+
+echo "raw gso min size"
+./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
+
+echo "raw gso min size - 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
+
+echo "raw gso max size"
+./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
+
+echo "raw gso max size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
+
+echo "OK. All tests passed"
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
new file mode 100644
index 000000000..7ec4fa4d5
--- /dev/null
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -0,0 +1,864 @@
+/*
+ * Copyright 2013 Red Hat, Inc.
+ * Author: Daniel Borkmann <dborkman@redhat.com>
+ * Chetan Loke <loke.chetan@gmail.com> (TPACKET_V3 usage example)
+ *
+ * A basic test of packet socket's TPACKET_V1/TPACKET_V2/TPACKET_V3 behavior.
+ *
+ * Control:
+ * Test the setup of the TPACKET socket with different patterns that are
+ * known to fail (TODO) resp. succeed (OK).
+ *
+ * Datapath:
+ * Open a pair of packet sockets and send resp. receive an a priori known
+ * packet pattern accross the sockets and check if it was received resp.
+ * sent correctly. Fanout in combination with RX_RING is currently not
+ * tested here.
+ *
+ * The test currently runs for
+ * - TPACKET_V1: RX_RING, TX_RING
+ * - TPACKET_V2: RX_RING, TX_RING
+ * - TPACKET_V3: RX_RING
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <linux/if_packet.h>
+#include <linux/filter.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <bits/wordsize.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <arpa/inet.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+#include <net/if.h>
+#include <inttypes.h>
+#include <poll.h>
+
+#include "psock_lib.h"
+
+#include "../kselftest.h"
+
+#ifndef bug_on
+# define bug_on(cond) assert(!(cond))
+#endif
+
+#ifndef __aligned_tpacket
+# define __aligned_tpacket __attribute__((aligned(TPACKET_ALIGNMENT)))
+#endif
+
+#ifndef __align_tpacket
+# define __align_tpacket(x) __attribute__((aligned(TPACKET_ALIGN(x))))
+#endif
+
+#define NUM_PACKETS 100
+#define ALIGN_8(x) (((x) + 8 - 1) & ~(8 - 1))
+
+struct ring {
+ struct iovec *rd;
+ uint8_t *mm_space;
+ size_t mm_len, rd_len;
+ struct sockaddr_ll ll;
+ void (*walk)(int sock, struct ring *ring);
+ int type, rd_num, flen, version;
+ union {
+ struct tpacket_req req;
+ struct tpacket_req3 req3;
+ };
+};
+
+struct block_desc {
+ uint32_t version;
+ uint32_t offset_to_priv;
+ struct tpacket_hdr_v1 h1;
+};
+
+union frame_map {
+ struct {
+ struct tpacket_hdr tp_h __aligned_tpacket;
+ struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket_hdr));
+ } *v1;
+ struct {
+ struct tpacket2_hdr tp_h __aligned_tpacket;
+ struct sockaddr_ll s_ll __align_tpacket(sizeof(struct tpacket2_hdr));
+ } *v2;
+ void *raw;
+};
+
+static unsigned int total_packets, total_bytes;
+
+static int pfsocket(int ver)
+{
+ int ret, sock = socket(PF_PACKET, SOCK_RAW, 0);
+ if (sock == -1) {
+ perror("socket");
+ exit(1);
+ }
+
+ ret = setsockopt(sock, SOL_PACKET, PACKET_VERSION, &ver, sizeof(ver));
+ if (ret == -1) {
+ perror("setsockopt");
+ exit(1);
+ }
+
+ return sock;
+}
+
+static void status_bar_update(void)
+{
+ if (total_packets % 10 == 0) {
+ fprintf(stderr, ".");
+ fflush(stderr);
+ }
+}
+
+static void test_payload(void *pay, size_t len)
+{
+ struct ethhdr *eth = pay;
+
+ if (len < sizeof(struct ethhdr)) {
+ fprintf(stderr, "test_payload: packet too "
+ "small: %zu bytes!\n", len);
+ exit(1);
+ }
+
+ if (eth->h_proto != htons(ETH_P_IP)) {
+ fprintf(stderr, "test_payload: wrong ethernet "
+ "type: 0x%x!\n", ntohs(eth->h_proto));
+ exit(1);
+ }
+}
+
+static void create_payload(void *pay, size_t *len)
+{
+ int i;
+ struct ethhdr *eth = pay;
+ struct iphdr *ip = pay + sizeof(*eth);
+
+ /* Lets create some broken crap, that still passes
+ * our BPF filter.
+ */
+
+ *len = DATA_LEN + 42;
+
+ memset(pay, 0xff, ETH_ALEN * 2);
+ eth->h_proto = htons(ETH_P_IP);
+
+ for (i = 0; i < sizeof(*ip); ++i)
+ ((uint8_t *) pay)[i + sizeof(*eth)] = (uint8_t) rand();
+
+ ip->ihl = 5;
+ ip->version = 4;
+ ip->protocol = 0x11;
+ ip->frag_off = 0;
+ ip->ttl = 64;
+ ip->tot_len = htons((uint16_t) *len - sizeof(*eth));
+
+ ip->saddr = htonl(INADDR_LOOPBACK);
+ ip->daddr = htonl(INADDR_LOOPBACK);
+
+ memset(pay + sizeof(*eth) + sizeof(*ip),
+ DATA_CHAR, DATA_LEN);
+}
+
+static inline int __v1_rx_kernel_ready(struct tpacket_hdr *hdr)
+{
+ return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
+}
+
+static inline void __v1_rx_user_ready(struct tpacket_hdr *hdr)
+{
+ hdr->tp_status = TP_STATUS_KERNEL;
+ __sync_synchronize();
+}
+
+static inline int __v2_rx_kernel_ready(struct tpacket2_hdr *hdr)
+{
+ return ((hdr->tp_status & TP_STATUS_USER) == TP_STATUS_USER);
+}
+
+static inline void __v2_rx_user_ready(struct tpacket2_hdr *hdr)
+{
+ hdr->tp_status = TP_STATUS_KERNEL;
+ __sync_synchronize();
+}
+
+static inline int __v1_v2_rx_kernel_ready(void *base, int version)
+{
+ switch (version) {
+ case TPACKET_V1:
+ return __v1_rx_kernel_ready(base);
+ case TPACKET_V2:
+ return __v2_rx_kernel_ready(base);
+ default:
+ bug_on(1);
+ return 0;
+ }
+}
+
+static inline void __v1_v2_rx_user_ready(void *base, int version)
+{
+ switch (version) {
+ case TPACKET_V1:
+ __v1_rx_user_ready(base);
+ break;
+ case TPACKET_V2:
+ __v2_rx_user_ready(base);
+ break;
+ }
+}
+
+static void walk_v1_v2_rx(int sock, struct ring *ring)
+{
+ struct pollfd pfd;
+ int udp_sock[2];
+ union frame_map ppd;
+ unsigned int frame_num = 0;
+
+ bug_on(ring->type != PACKET_RX_RING);
+
+ pair_udp_open(udp_sock, PORT_BASE);
+
+ memset(&pfd, 0, sizeof(pfd));
+ pfd.fd = sock;
+ pfd.events = POLLIN | POLLERR;
+ pfd.revents = 0;
+
+ pair_udp_send(udp_sock, NUM_PACKETS);
+
+ while (total_packets < NUM_PACKETS * 2) {
+ while (__v1_v2_rx_kernel_ready(ring->rd[frame_num].iov_base,
+ ring->version)) {
+ ppd.raw = ring->rd[frame_num].iov_base;
+
+ switch (ring->version) {
+ case TPACKET_V1:
+ test_payload((uint8_t *) ppd.raw + ppd.v1->tp_h.tp_mac,
+ ppd.v1->tp_h.tp_snaplen);
+ total_bytes += ppd.v1->tp_h.tp_snaplen;
+ break;
+
+ case TPACKET_V2:
+ test_payload((uint8_t *) ppd.raw + ppd.v2->tp_h.tp_mac,
+ ppd.v2->tp_h.tp_snaplen);
+ total_bytes += ppd.v2->tp_h.tp_snaplen;
+ break;
+ }
+
+ status_bar_update();
+ total_packets++;
+
+ __v1_v2_rx_user_ready(ppd.raw, ring->version);
+
+ frame_num = (frame_num + 1) % ring->rd_num;
+ }
+
+ poll(&pfd, 1, 1);
+ }
+
+ pair_udp_close(udp_sock);
+
+ if (total_packets != 2 * NUM_PACKETS) {
+ fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
+ ring->version, total_packets, NUM_PACKETS);
+ exit(1);
+ }
+
+ fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
+}
+
+static inline int __v1_tx_kernel_ready(struct tpacket_hdr *hdr)
+{
+ return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
+}
+
+static inline void __v1_tx_user_ready(struct tpacket_hdr *hdr)
+{
+ hdr->tp_status = TP_STATUS_SEND_REQUEST;
+ __sync_synchronize();
+}
+
+static inline int __v2_tx_kernel_ready(struct tpacket2_hdr *hdr)
+{
+ return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
+}
+
+static inline void __v2_tx_user_ready(struct tpacket2_hdr *hdr)
+{
+ hdr->tp_status = TP_STATUS_SEND_REQUEST;
+ __sync_synchronize();
+}
+
+static inline int __v3_tx_kernel_ready(struct tpacket3_hdr *hdr)
+{
+ return !(hdr->tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING));
+}
+
+static inline void __v3_tx_user_ready(struct tpacket3_hdr *hdr)
+{
+ hdr->tp_status = TP_STATUS_SEND_REQUEST;
+ __sync_synchronize();
+}
+
+static inline int __tx_kernel_ready(void *base, int version)
+{
+ switch (version) {
+ case TPACKET_V1:
+ return __v1_tx_kernel_ready(base);
+ case TPACKET_V2:
+ return __v2_tx_kernel_ready(base);
+ case TPACKET_V3:
+ return __v3_tx_kernel_ready(base);
+ default:
+ bug_on(1);
+ return 0;
+ }
+}
+
+static inline void __tx_user_ready(void *base, int version)
+{
+ switch (version) {
+ case TPACKET_V1:
+ __v1_tx_user_ready(base);
+ break;
+ case TPACKET_V2:
+ __v2_tx_user_ready(base);
+ break;
+ case TPACKET_V3:
+ __v3_tx_user_ready(base);
+ break;
+ }
+}
+
+static void __v1_v2_set_packet_loss_discard(int sock)
+{
+ int ret, discard = 1;
+
+ ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard,
+ sizeof(discard));
+ if (ret == -1) {
+ perror("setsockopt");
+ exit(1);
+ }
+}
+
+static inline void *get_next_frame(struct ring *ring, int n)
+{
+ uint8_t *f0 = ring->rd[0].iov_base;
+
+ switch (ring->version) {
+ case TPACKET_V1:
+ case TPACKET_V2:
+ return ring->rd[n].iov_base;
+ case TPACKET_V3:
+ return f0 + (n * ring->req3.tp_frame_size);
+ default:
+ bug_on(1);
+ }
+}
+
+static void walk_tx(int sock, struct ring *ring)
+{
+ struct pollfd pfd;
+ int rcv_sock, ret;
+ size_t packet_len;
+ union frame_map ppd;
+ char packet[1024];
+ unsigned int frame_num = 0, got = 0;
+ struct sockaddr_ll ll = {
+ .sll_family = PF_PACKET,
+ .sll_halen = ETH_ALEN,
+ };
+ int nframes;
+
+ /* TPACKET_V{1,2} sets up the ring->rd* related variables based
+ * on frames (e.g., rd_num is tp_frame_nr) whereas V3 sets these
+ * up based on blocks (e.g, rd_num is tp_block_nr)
+ */
+ if (ring->version <= TPACKET_V2)
+ nframes = ring->rd_num;
+ else
+ nframes = ring->req3.tp_frame_nr;
+
+ bug_on(ring->type != PACKET_TX_RING);
+ bug_on(nframes < NUM_PACKETS);
+
+ rcv_sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+ if (rcv_sock == -1) {
+ perror("socket");
+ exit(1);
+ }
+
+ pair_udp_setfilter(rcv_sock);
+
+ ll.sll_ifindex = if_nametoindex("lo");
+ ret = bind(rcv_sock, (struct sockaddr *) &ll, sizeof(ll));
+ if (ret == -1) {
+ perror("bind");
+ exit(1);
+ }
+
+ memset(&pfd, 0, sizeof(pfd));
+ pfd.fd = sock;
+ pfd.events = POLLOUT | POLLERR;
+ pfd.revents = 0;
+
+ total_packets = NUM_PACKETS;
+ create_payload(packet, &packet_len);
+
+ while (total_packets > 0) {
+ void *next = get_next_frame(ring, frame_num);
+
+ while (__tx_kernel_ready(next, ring->version) &&
+ total_packets > 0) {
+ ppd.raw = next;
+
+ switch (ring->version) {
+ case TPACKET_V1:
+ ppd.v1->tp_h.tp_snaplen = packet_len;
+ ppd.v1->tp_h.tp_len = packet_len;
+
+ memcpy((uint8_t *) ppd.raw + TPACKET_HDRLEN -
+ sizeof(struct sockaddr_ll), packet,
+ packet_len);
+ total_bytes += ppd.v1->tp_h.tp_snaplen;
+ break;
+
+ case TPACKET_V2:
+ ppd.v2->tp_h.tp_snaplen = packet_len;
+ ppd.v2->tp_h.tp_len = packet_len;
+
+ memcpy((uint8_t *) ppd.raw + TPACKET2_HDRLEN -
+ sizeof(struct sockaddr_ll), packet,
+ packet_len);
+ total_bytes += ppd.v2->tp_h.tp_snaplen;
+ break;
+ case TPACKET_V3: {
+ struct tpacket3_hdr *tx = next;
+
+ tx->tp_snaplen = packet_len;
+ tx->tp_len = packet_len;
+ tx->tp_next_offset = 0;
+
+ memcpy((uint8_t *)tx + TPACKET3_HDRLEN -
+ sizeof(struct sockaddr_ll), packet,
+ packet_len);
+ total_bytes += tx->tp_snaplen;
+ break;
+ }
+ }
+
+ status_bar_update();
+ total_packets--;
+
+ __tx_user_ready(next, ring->version);
+
+ frame_num = (frame_num + 1) % nframes;
+ }
+
+ poll(&pfd, 1, 1);
+ }
+
+ bug_on(total_packets != 0);
+
+ ret = sendto(sock, NULL, 0, 0, NULL, 0);
+ if (ret == -1) {
+ perror("sendto");
+ exit(1);
+ }
+
+ while ((ret = recvfrom(rcv_sock, packet, sizeof(packet),
+ 0, NULL, NULL)) > 0 &&
+ total_packets < NUM_PACKETS) {
+ got += ret;
+ test_payload(packet, ret);
+
+ status_bar_update();
+ total_packets++;
+ }
+
+ close(rcv_sock);
+
+ if (total_packets != NUM_PACKETS) {
+ fprintf(stderr, "walk_v%d_rx: received %u out of %u pkts\n",
+ ring->version, total_packets, NUM_PACKETS);
+ exit(1);
+ }
+
+ fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, got);
+}
+
+static void walk_v1_v2(int sock, struct ring *ring)
+{
+ if (ring->type == PACKET_RX_RING)
+ walk_v1_v2_rx(sock, ring);
+ else
+ walk_tx(sock, ring);
+}
+
+static uint64_t __v3_prev_block_seq_num = 0;
+
+void __v3_test_block_seq_num(struct block_desc *pbd)
+{
+ if (__v3_prev_block_seq_num + 1 != pbd->h1.seq_num) {
+ fprintf(stderr, "\nprev_block_seq_num:%"PRIu64", expected "
+ "seq:%"PRIu64" != actual seq:%"PRIu64"\n",
+ __v3_prev_block_seq_num, __v3_prev_block_seq_num + 1,
+ (uint64_t) pbd->h1.seq_num);
+ exit(1);
+ }
+
+ __v3_prev_block_seq_num = pbd->h1.seq_num;
+}
+
+static void __v3_test_block_len(struct block_desc *pbd, uint32_t bytes, int block_num)
+{
+ if (pbd->h1.num_pkts && bytes != pbd->h1.blk_len) {
+ fprintf(stderr, "\nblock:%u with %upackets, expected "
+ "len:%u != actual len:%u\n", block_num,
+ pbd->h1.num_pkts, bytes, pbd->h1.blk_len);
+ exit(1);
+ }
+}
+
+static void __v3_test_block_header(struct block_desc *pbd, const int block_num)
+{
+ if ((pbd->h1.block_status & TP_STATUS_USER) == 0) {
+ fprintf(stderr, "\nblock %u: not in TP_STATUS_USER\n", block_num);
+ exit(1);
+ }
+
+ __v3_test_block_seq_num(pbd);
+}
+
+static void __v3_walk_block(struct block_desc *pbd, const int block_num)
+{
+ int num_pkts = pbd->h1.num_pkts, i;
+ unsigned long bytes = 0, bytes_with_padding = ALIGN_8(sizeof(*pbd));
+ struct tpacket3_hdr *ppd;
+
+ __v3_test_block_header(pbd, block_num);
+
+ ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd +
+ pbd->h1.offset_to_first_pkt);
+
+ for (i = 0; i < num_pkts; ++i) {
+ bytes += ppd->tp_snaplen;
+
+ if (ppd->tp_next_offset)
+ bytes_with_padding += ppd->tp_next_offset;
+ else
+ bytes_with_padding += ALIGN_8(ppd->tp_snaplen + ppd->tp_mac);
+
+ test_payload((uint8_t *) ppd + ppd->tp_mac, ppd->tp_snaplen);
+
+ status_bar_update();
+ total_packets++;
+
+ ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd + ppd->tp_next_offset);
+ __sync_synchronize();
+ }
+
+ __v3_test_block_len(pbd, bytes_with_padding, block_num);
+ total_bytes += bytes;
+}
+
+void __v3_flush_block(struct block_desc *pbd)
+{
+ pbd->h1.block_status = TP_STATUS_KERNEL;
+ __sync_synchronize();
+}
+
+static void walk_v3_rx(int sock, struct ring *ring)
+{
+ unsigned int block_num = 0;
+ struct pollfd pfd;
+ struct block_desc *pbd;
+ int udp_sock[2];
+
+ bug_on(ring->type != PACKET_RX_RING);
+
+ pair_udp_open(udp_sock, PORT_BASE);
+
+ memset(&pfd, 0, sizeof(pfd));
+ pfd.fd = sock;
+ pfd.events = POLLIN | POLLERR;
+ pfd.revents = 0;
+
+ pair_udp_send(udp_sock, NUM_PACKETS);
+
+ while (total_packets < NUM_PACKETS * 2) {
+ pbd = (struct block_desc *) ring->rd[block_num].iov_base;
+
+ while ((pbd->h1.block_status & TP_STATUS_USER) == 0)
+ poll(&pfd, 1, 1);
+
+ __v3_walk_block(pbd, block_num);
+ __v3_flush_block(pbd);
+
+ block_num = (block_num + 1) % ring->rd_num;
+ }
+
+ pair_udp_close(udp_sock);
+
+ if (total_packets != 2 * NUM_PACKETS) {
+ fprintf(stderr, "walk_v3_rx: received %u out of %u pkts\n",
+ total_packets, NUM_PACKETS);
+ exit(1);
+ }
+
+ fprintf(stderr, " %u pkts (%u bytes)", NUM_PACKETS, total_bytes >> 1);
+}
+
+static void walk_v3(int sock, struct ring *ring)
+{
+ if (ring->type == PACKET_RX_RING)
+ walk_v3_rx(sock, ring);
+ else
+ walk_tx(sock, ring);
+}
+
+static void __v1_v2_fill(struct ring *ring, unsigned int blocks)
+{
+ ring->req.tp_block_size = getpagesize() << 2;
+ ring->req.tp_frame_size = TPACKET_ALIGNMENT << 7;
+ ring->req.tp_block_nr = blocks;
+
+ ring->req.tp_frame_nr = ring->req.tp_block_size /
+ ring->req.tp_frame_size *
+ ring->req.tp_block_nr;
+
+ ring->mm_len = ring->req.tp_block_size * ring->req.tp_block_nr;
+ ring->walk = walk_v1_v2;
+ ring->rd_num = ring->req.tp_frame_nr;
+ ring->flen = ring->req.tp_frame_size;
+}
+
+static void __v3_fill(struct ring *ring, unsigned int blocks, int type)
+{
+ if (type == PACKET_RX_RING) {
+ ring->req3.tp_retire_blk_tov = 64;
+ ring->req3.tp_sizeof_priv = 0;
+ ring->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+ }
+ ring->req3.tp_block_size = getpagesize() << 2;
+ ring->req3.tp_frame_size = TPACKET_ALIGNMENT << 7;
+ ring->req3.tp_block_nr = blocks;
+
+ ring->req3.tp_frame_nr = ring->req3.tp_block_size /
+ ring->req3.tp_frame_size *
+ ring->req3.tp_block_nr;
+
+ ring->mm_len = ring->req3.tp_block_size * ring->req3.tp_block_nr;
+ ring->walk = walk_v3;
+ ring->rd_num = ring->req3.tp_block_nr;
+ ring->flen = ring->req3.tp_block_size;
+}
+
+static void setup_ring(int sock, struct ring *ring, int version, int type)
+{
+ int ret = 0;
+ unsigned int blocks = 256;
+
+ ring->type = type;
+ ring->version = version;
+
+ switch (version) {
+ case TPACKET_V1:
+ case TPACKET_V2:
+ if (type == PACKET_TX_RING)
+ __v1_v2_set_packet_loss_discard(sock);
+ __v1_v2_fill(ring, blocks);
+ ret = setsockopt(sock, SOL_PACKET, type, &ring->req,
+ sizeof(ring->req));
+ break;
+
+ case TPACKET_V3:
+ __v3_fill(ring, blocks, type);
+ ret = setsockopt(sock, SOL_PACKET, type, &ring->req3,
+ sizeof(ring->req3));
+ break;
+ }
+
+ if (ret == -1) {
+ perror("setsockopt");
+ exit(1);
+ }
+
+ ring->rd_len = ring->rd_num * sizeof(*ring->rd);
+ ring->rd = malloc(ring->rd_len);
+ if (ring->rd == NULL) {
+ perror("malloc");
+ exit(1);
+ }
+
+ total_packets = 0;
+ total_bytes = 0;
+}
+
+static void mmap_ring(int sock, struct ring *ring)
+{
+ int i;
+
+ ring->mm_space = mmap(0, ring->mm_len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0);
+ if (ring->mm_space == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ memset(ring->rd, 0, ring->rd_len);
+ for (i = 0; i < ring->rd_num; ++i) {
+ ring->rd[i].iov_base = ring->mm_space + (i * ring->flen);
+ ring->rd[i].iov_len = ring->flen;
+ }
+}
+
+static void bind_ring(int sock, struct ring *ring)
+{
+ int ret;
+
+ pair_udp_setfilter(sock);
+
+ ring->ll.sll_family = PF_PACKET;
+ ring->ll.sll_protocol = htons(ETH_P_ALL);
+ ring->ll.sll_ifindex = if_nametoindex("lo");
+ ring->ll.sll_hatype = 0;
+ ring->ll.sll_pkttype = 0;
+ ring->ll.sll_halen = 0;
+
+ ret = bind(sock, (struct sockaddr *) &ring->ll, sizeof(ring->ll));
+ if (ret == -1) {
+ perror("bind");
+ exit(1);
+ }
+}
+
+static void walk_ring(int sock, struct ring *ring)
+{
+ ring->walk(sock, ring);
+}
+
+static void unmap_ring(int sock, struct ring *ring)
+{
+ munmap(ring->mm_space, ring->mm_len);
+ free(ring->rd);
+}
+
+static int test_kernel_bit_width(void)
+{
+ char in[512], *ptr;
+ int num = 0, fd;
+ ssize_t ret;
+
+ fd = open("/proc/kallsyms", O_RDONLY);
+ if (fd == -1) {
+ perror("open");
+ exit(1);
+ }
+
+ ret = read(fd, in, sizeof(in));
+ if (ret <= 0) {
+ perror("read");
+ exit(1);
+ }
+
+ close(fd);
+
+ ptr = in;
+ while(!isspace(*ptr)) {
+ num++;
+ ptr++;
+ }
+
+ return num * 4;
+}
+
+static int test_user_bit_width(void)
+{
+ return __WORDSIZE;
+}
+
+static const char *tpacket_str[] = {
+ [TPACKET_V1] = "TPACKET_V1",
+ [TPACKET_V2] = "TPACKET_V2",
+ [TPACKET_V3] = "TPACKET_V3",
+};
+
+static const char *type_str[] = {
+ [PACKET_RX_RING] = "PACKET_RX_RING",
+ [PACKET_TX_RING] = "PACKET_TX_RING",
+};
+
+static int test_tpacket(int version, int type)
+{
+ int sock;
+ struct ring ring;
+
+ fprintf(stderr, "test: %s with %s ", tpacket_str[version],
+ type_str[type]);
+ fflush(stderr);
+
+ if (version == TPACKET_V1 &&
+ test_kernel_bit_width() != test_user_bit_width()) {
+ fprintf(stderr, "test: skip %s %s since user and kernel "
+ "space have different bit width\n",
+ tpacket_str[version], type_str[type]);
+ return KSFT_SKIP;
+ }
+
+ sock = pfsocket(version);
+ memset(&ring, 0, sizeof(ring));
+ setup_ring(sock, &ring, version, type);
+ mmap_ring(sock, &ring);
+ bind_ring(sock, &ring);
+ walk_ring(sock, &ring);
+ unmap_ring(sock, &ring);
+ close(sock);
+
+ fprintf(stderr, "\n");
+ return 0;
+}
+
+int main(void)
+{
+ int ret = 0;
+
+ ret |= test_tpacket(TPACKET_V1, PACKET_RX_RING);
+ ret |= test_tpacket(TPACKET_V1, PACKET_TX_RING);
+
+ ret |= test_tpacket(TPACKET_V2, PACKET_RX_RING);
+ ret |= test_tpacket(TPACKET_V2, PACKET_TX_RING);
+
+ ret |= test_tpacket(TPACKET_V3, PACKET_RX_RING);
+ ret |= test_tpacket(TPACKET_V3, PACKET_TX_RING);
+
+ if (ret)
+ return 1;
+
+ printf("OK. All tests passed\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c
new file mode 100644
index 000000000..7c5b12664
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_conflict.c
@@ -0,0 +1,114 @@
+/*
+ * Test for the regression introduced by
+ *
+ * b9470c27607b ("inet: kill smallest_size and smallest_port")
+ *
+ * If we open an ipv4 socket on a port with reuseaddr we shouldn't reset the tb
+ * when we open the ipv6 conterpart, which is what was happening previously.
+ */
+#include <errno.h>
+#include <error.h>
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#define PORT 9999
+
+int open_port(int ipv6, int any)
+{
+ int fd = -1;
+ int reuseaddr = 1;
+ int v6only = 1;
+ int addrlen;
+ int ret = -1;
+ struct sockaddr *addr;
+ int family = ipv6 ? AF_INET6 : AF_INET;
+
+ struct sockaddr_in6 addr6 = {
+ .sin6_family = AF_INET6,
+ .sin6_port = htons(PORT),
+ .sin6_addr = in6addr_any
+ };
+ struct sockaddr_in addr4 = {
+ .sin_family = AF_INET,
+ .sin_port = htons(PORT),
+ .sin_addr.s_addr = any ? htonl(INADDR_ANY) : inet_addr("127.0.0.1"),
+ };
+
+
+ if (ipv6) {
+ addr = (struct sockaddr*)&addr6;
+ addrlen = sizeof(addr6);
+ } else {
+ addr = (struct sockaddr*)&addr4;
+ addrlen = sizeof(addr4);
+ }
+
+ if ((fd = socket(family, SOCK_STREAM, IPPROTO_TCP)) < 0) {
+ perror("socket");
+ goto out;
+ }
+
+ if (ipv6 && setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&v6only,
+ sizeof(v6only)) < 0) {
+ perror("setsockopt IPV6_V6ONLY");
+ goto out;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr,
+ sizeof(reuseaddr)) < 0) {
+ perror("setsockopt SO_REUSEADDR");
+ goto out;
+ }
+
+ if (bind(fd, addr, addrlen) < 0) {
+ perror("bind");
+ goto out;
+ }
+
+ if (any)
+ return fd;
+
+ if (listen(fd, 1) < 0) {
+ perror("listen");
+ goto out;
+ }
+ return fd;
+out:
+ close(fd);
+ return ret;
+}
+
+int main(void)
+{
+ int listenfd;
+ int fd1, fd2;
+
+ fprintf(stderr, "Opening 127.0.0.1:%d\n", PORT);
+ listenfd = open_port(0, 0);
+ if (listenfd < 0)
+ error(1, errno, "Couldn't open listen socket");
+ fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+ fd1 = open_port(0, 1);
+ if (fd1 >= 0)
+ error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+ fprintf(stderr, "Opening in6addr_any:%d\n", PORT);
+ fd1 = open_port(1, 1);
+ if (fd1 < 0)
+ error(1, errno, "Couldn't open ipv6 reuseport");
+ fprintf(stderr, "Opening INADDR_ANY:%d\n", PORT);
+ fd2 = open_port(0, 1);
+ if (fd2 >= 0)
+ error(1, 0, "Was allowed to create an ipv4 reuseport on a already bound non-reuseport socket");
+ close(fd1);
+ fprintf(stderr, "Opening INADDR_ANY:%d after closing ipv6 socket\n", PORT);
+ fd1 = open_port(0, 1);
+ if (fd1 >= 0)
+ error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6");
+ fprintf(stderr, "Success");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
new file mode 100644
index 000000000..b5277106d
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -0,0 +1,641 @@
+/*
+ * Test functionality of BPF filters for SO_REUSEPORT. The tests below will use
+ * a BPF program (both classic and extended) to read the first word from an
+ * incoming packet (expected to be in network byte-order), calculate a modulus
+ * of that number, and then dispatch the packet to the Nth socket using the
+ * result. These tests are run for each supported address family and protocol.
+ * Additionally, a few edge cases in the implementation are tested.
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+struct test_params {
+ int recv_family;
+ int send_family;
+ int protocol;
+ size_t recv_socks;
+ uint16_t recv_port;
+ uint16_t send_port_min;
+};
+
+static size_t sockaddr_size(void)
+{
+ return sizeof(struct sockaddr_storage);
+}
+
+static struct sockaddr *new_any_sockaddr(int family, uint16_t port)
+{
+ struct sockaddr_storage *addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+
+ addr = malloc(sizeof(struct sockaddr_storage));
+ memset(addr, 0, sizeof(struct sockaddr_storage));
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(port);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(port);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+ return (struct sockaddr *)addr;
+}
+
+static struct sockaddr *new_loopback_sockaddr(int family, uint16_t port)
+{
+ struct sockaddr *addr = new_any_sockaddr(family, port);
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)addr;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)addr;
+ addr6->sin6_addr = in6addr_loopback;
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+ return addr;
+}
+
+static void attach_ebpf(int fd, uint16_t mod)
+{
+ static char bpf_log_buf[65536];
+ static const char bpf_license[] = "GPL";
+
+ int bpf_fd;
+ const struct bpf_insn prog[] = {
+ /* BPF_MOV64_REG(BPF_REG_6, BPF_REG_1) */
+ { BPF_ALU64 | BPF_MOV | BPF_X, BPF_REG_6, BPF_REG_1, 0, 0 },
+ /* BPF_LD_ABS(BPF_W, 0) R0 = (uint32_t)skb[0] */
+ { BPF_LD | BPF_ABS | BPF_W, 0, 0, 0, 0 },
+ /* BPF_ALU64_IMM(BPF_MOD, BPF_REG_0, mod) */
+ { BPF_ALU64 | BPF_MOD | BPF_K, BPF_REG_0, 0, 0, mod },
+ /* BPF_EXIT_INSN() */
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insn_cnt = ARRAY_SIZE(prog);
+ attr.insns = (unsigned long) &prog;
+ attr.license = (unsigned long) &bpf_license;
+ attr.log_buf = (unsigned long) &bpf_log_buf;
+ attr.log_size = sizeof(bpf_log_buf);
+ attr.log_level = 1;
+ attr.kern_version = 0;
+
+ bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (bpf_fd < 0)
+ error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+ sizeof(bpf_fd)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
+
+ close(bpf_fd);
+}
+
+static void attach_cbpf(int fd, uint16_t mod)
+{
+ struct sock_filter code[] = {
+ /* A = (uint32_t)skb[0] */
+ { BPF_LD | BPF_W | BPF_ABS, 0, 0, 0 },
+ /* A = A % mod */
+ { BPF_ALU | BPF_MOD, 0, 0, mod },
+ /* return A */
+ { BPF_RET | BPF_A, 0, 0, 0 },
+ };
+ struct sock_fprog p = {
+ .len = ARRAY_SIZE(code),
+ .filter = code,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
+}
+
+static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
+ void (*attach_bpf)(int, uint16_t))
+{
+ struct sockaddr * const addr =
+ new_any_sockaddr(p.recv_family, p.recv_port);
+ int i, opt;
+
+ for (i = 0; i < p.recv_socks; ++i) {
+ fd[i] = socket(p.recv_family, p.protocol, 0);
+ if (fd[i] < 0)
+ error(1, errno, "failed to create recv %d", i);
+
+ opt = 1;
+ if (setsockopt(fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on %d", i);
+
+ if (i == 0)
+ attach_bpf(fd[i], mod);
+
+ if (bind(fd[i], addr, sockaddr_size()))
+ error(1, errno, "failed to bind recv socket %d", i);
+
+ if (p.protocol == SOCK_STREAM) {
+ opt = 4;
+ if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt,
+ sizeof(opt)))
+ error(1, errno,
+ "failed to set TCP_FASTOPEN on %d", i);
+ if (listen(fd[i], p.recv_socks * 10))
+ error(1, errno, "failed to listen on socket");
+ }
+ }
+ free(addr);
+}
+
+static void send_from(struct test_params p, uint16_t sport, char *buf,
+ size_t len)
+{
+ struct sockaddr * const saddr = new_any_sockaddr(p.send_family, sport);
+ struct sockaddr * const daddr =
+ new_loopback_sockaddr(p.send_family, p.recv_port);
+ const int fd = socket(p.send_family, p.protocol, 0), one = 1;
+
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)))
+ error(1, errno, "failed to set reuseaddr");
+
+ if (bind(fd, saddr, sockaddr_size()))
+ error(1, errno, "failed to bind send socket");
+
+ if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+ free(saddr);
+ free(daddr);
+}
+
+static void test_recv_order(const struct test_params p, int fd[], int mod)
+{
+ char recv_buf[8], send_buf[8];
+ struct msghdr msg;
+ struct iovec recv_io = { recv_buf, 8 };
+ struct epoll_event ev;
+ int epfd, conn, i, sport, expected;
+ uint32_t data, ndata;
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+ for (i = 0; i < p.recv_socks; ++i) {
+ ev.events = EPOLLIN;
+ ev.data.fd = fd[i];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd[i], &ev))
+ error(1, errno, "failed to register sock %d epoll", i);
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &recv_io;
+ msg.msg_iovlen = 1;
+
+ for (data = 0; data < p.recv_socks * 2; ++data) {
+ sport = p.send_port_min + data;
+ ndata = htonl(data);
+ memcpy(send_buf, &ndata, sizeof(ndata));
+ send_from(p, sport, send_buf, sizeof(ndata));
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll wait failed");
+
+ if (p.protocol == SOCK_STREAM) {
+ conn = accept(ev.data.fd, NULL, NULL);
+ if (conn < 0)
+ error(1, errno, "error accepting");
+ i = recvmsg(conn, &msg, 0);
+ close(conn);
+ } else {
+ i = recvmsg(ev.data.fd, &msg, 0);
+ }
+ if (i < 0)
+ error(1, errno, "recvmsg error");
+ if (i != sizeof(ndata))
+ error(1, 0, "expected size %zd got %d",
+ sizeof(ndata), i);
+
+ for (i = 0; i < p.recv_socks; ++i)
+ if (ev.data.fd == fd[i])
+ break;
+ memcpy(&ndata, recv_buf, sizeof(ndata));
+ fprintf(stderr, "Socket %d: %d\n", i, ntohl(ndata));
+
+ expected = (sport % mod);
+ if (i != expected)
+ error(1, 0, "expected socket %d", expected);
+ }
+}
+
+static void test_reuseport_ebpf(struct test_params p)
+{
+ int i, fd[p.recv_socks];
+
+ fprintf(stderr, "Testing EBPF mod %zd...\n", p.recv_socks);
+ build_recv_group(p, fd, p.recv_socks, attach_ebpf);
+ test_recv_order(p, fd, p.recv_socks);
+
+ p.send_port_min += p.recv_socks * 2;
+ fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
+ attach_ebpf(fd[0], p.recv_socks / 2);
+ test_recv_order(p, fd, p.recv_socks / 2);
+
+ for (i = 0; i < p.recv_socks; ++i)
+ close(fd[i]);
+}
+
+static void test_reuseport_cbpf(struct test_params p)
+{
+ int i, fd[p.recv_socks];
+
+ fprintf(stderr, "Testing CBPF mod %zd...\n", p.recv_socks);
+ build_recv_group(p, fd, p.recv_socks, attach_cbpf);
+ test_recv_order(p, fd, p.recv_socks);
+
+ p.send_port_min += p.recv_socks * 2;
+ fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
+ attach_cbpf(fd[0], p.recv_socks / 2);
+ test_recv_order(p, fd, p.recv_socks / 2);
+
+ for (i = 0; i < p.recv_socks; ++i)
+ close(fd[i]);
+}
+
+static void test_extra_filter(const struct test_params p)
+{
+ struct sockaddr * const addr =
+ new_any_sockaddr(p.recv_family, p.recv_port);
+ int fd1, fd2, opt;
+
+ fprintf(stderr, "Testing too many filters...\n");
+ fd1 = socket(p.recv_family, p.protocol, 0);
+ if (fd1 < 0)
+ error(1, errno, "failed to create socket 1");
+ fd2 = socket(p.recv_family, p.protocol, 0);
+ if (fd2 < 0)
+ error(1, errno, "failed to create socket 2");
+
+ opt = 1;
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 1");
+ if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 2");
+
+ attach_ebpf(fd1, 10);
+ attach_ebpf(fd2, 10);
+
+ if (bind(fd1, addr, sockaddr_size()))
+ error(1, errno, "failed to bind recv socket 1");
+
+ if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
+ error(1, errno, "bind socket 2 should fail with EADDRINUSE");
+
+ free(addr);
+}
+
+static void test_filter_no_reuseport(const struct test_params p)
+{
+ struct sockaddr * const addr =
+ new_any_sockaddr(p.recv_family, p.recv_port);
+ const char bpf_license[] = "GPL";
+ struct bpf_insn ecode[] = {
+ { BPF_ALU64 | BPF_MOV | BPF_K, BPF_REG_0, 0, 0, 10 },
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ struct sock_filter ccode[] = {{ BPF_RET | BPF_A, 0, 0, 0 }};
+ union bpf_attr eprog;
+ struct sock_fprog cprog;
+ int fd, bpf_fd;
+
+ fprintf(stderr, "Testing filters on non-SO_REUSEPORT socket...\n");
+
+ memset(&eprog, 0, sizeof(eprog));
+ eprog.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ eprog.insn_cnt = ARRAY_SIZE(ecode);
+ eprog.insns = (unsigned long) &ecode;
+ eprog.license = (unsigned long) &bpf_license;
+ eprog.kern_version = 0;
+
+ memset(&cprog, 0, sizeof(cprog));
+ cprog.len = ARRAY_SIZE(ccode);
+ cprog.filter = ccode;
+
+
+ bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &eprog, sizeof(eprog));
+ if (bpf_fd < 0)
+ error(1, errno, "ebpf error");
+ fd = socket(p.recv_family, p.protocol, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create socket 1");
+
+ if (bind(fd, addr, sockaddr_size()))
+ error(1, errno, "failed to bind recv socket 1");
+
+ errno = 0;
+ if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+ sizeof(bpf_fd)) || errno != EINVAL)
+ error(1, errno, "setsockopt should have returned EINVAL");
+
+ errno = 0;
+ if (!setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &cprog,
+ sizeof(cprog)) || errno != EINVAL)
+ error(1, errno, "setsockopt should have returned EINVAL");
+
+ free(addr);
+}
+
+static void test_filter_without_bind(void)
+{
+ int fd1, fd2, opt = 1;
+
+ fprintf(stderr, "Testing filter add without bind...\n");
+ fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd1 < 0)
+ error(1, errno, "failed to create socket 1");
+ fd2 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd2 < 0)
+ error(1, errno, "failed to create socket 2");
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 1");
+ if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 2");
+
+ attach_ebpf(fd1, 10);
+ attach_cbpf(fd2, 10);
+
+ close(fd1);
+ close(fd2);
+}
+
+void enable_fastopen(void)
+{
+ int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0);
+ int rw_mask = 3; /* bit 1: client side; bit-2 server side */
+ int val, size;
+ char buf[16];
+
+ if (fd < 0)
+ error(1, errno, "Unable to open tcp_fastopen sysctl");
+ if (read(fd, buf, sizeof(buf)) <= 0)
+ error(1, errno, "Unable to read tcp_fastopen sysctl");
+ val = atoi(buf);
+ close(fd);
+
+ if ((val & rw_mask) != rw_mask) {
+ fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+ if (fd < 0)
+ error(1, errno,
+ "Unable to open tcp_fastopen sysctl for writing");
+ val |= rw_mask;
+ size = snprintf(buf, 16, "%d", val);
+ if (write(fd, buf, size) <= 0)
+ error(1, errno, "Unable to write tcp_fastopen sysctl");
+ close(fd);
+ }
+}
+
+static struct rlimit rlim_old;
+
+static __attribute__((constructor)) void main_ctor(void)
+{
+ getrlimit(RLIMIT_MEMLOCK, &rlim_old);
+
+ if (rlim_old.rlim_cur != RLIM_INFINITY) {
+ struct rlimit rlim_new;
+
+ rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
+ rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
+ setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+ }
+}
+
+static __attribute__((destructor)) void main_dtor(void)
+{
+ setrlimit(RLIMIT_MEMLOCK, &rlim_old);
+}
+
+int main(void)
+{
+ fprintf(stderr, "---- IPv4 UDP ----\n");
+ /* NOTE: UDP socket lookups traverse a different code path when there
+ * are > 10 sockets in a group. Run the bpf test through both paths.
+ */
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8000,
+ .send_port_min = 9000});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8000,
+ .send_port_min = 9000});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8001,
+ .send_port_min = 9020});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8001,
+ .send_port_min = 9020});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8002});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8008});
+
+ fprintf(stderr, "---- IPv6 UDP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8003,
+ .send_port_min = 9040});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8003,
+ .send_port_min = 9040});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8004,
+ .send_port_min = 9060});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8004,
+ .send_port_min = 9060});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8005});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_DGRAM,
+ .recv_port = 8009});
+
+ fprintf(stderr, "---- IPv6 UDP w/ mapped IPv4 ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8006,
+ .send_port_min = 9080});
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8006,
+ .send_port_min = 9080});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 10,
+ .recv_port = 8007,
+ .send_port_min = 9100});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_DGRAM,
+ .recv_socks = 20,
+ .recv_port = 8007,
+ .send_port_min = 9100});
+
+ /* TCP fastopen is required for the TCP tests */
+ enable_fastopen();
+ fprintf(stderr, "---- IPv4 TCP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8008,
+ .send_port_min = 9120});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8009,
+ .send_port_min = 9160});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8010});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8011});
+
+ fprintf(stderr, "---- IPv6 TCP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8012,
+ .send_port_min = 9200});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8013,
+ .send_port_min = 9240});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8014});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8015});
+
+ fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8016,
+ .send_port_min = 9320});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8017,
+ .send_port_min = 9360});
+
+ test_filter_without_bind();
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_bpf_cpu.c b/tools/testing/selftests/net/reuseport_bpf_cpu.c
new file mode 100644
index 000000000..2d6461747
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf_cpu.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test functionality of BPF filters with SO_REUSEPORT. This program creates
+ * an SO_REUSEPORT receiver group containing one socket per CPU core. It then
+ * creates a BPF program that will select a socket from this group based
+ * on the core id that receives the packet. The sending code artificially
+ * moves itself to run on different core ids and sends one message from
+ * each core. Since these packets are delivered over loopback, they should
+ * arrive on the same core that sent them. The receiving code then ensures
+ * that the packet was received on the socket for the corresponding core id.
+ * This entire process is done for several different core id permutations
+ * and for each IPv4/IPv6 and TCP/UDP combination.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/filter.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+static const int PORT = 8888;
+
+static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+ size_t i;
+ int opt;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ for (i = 0; i < len; ++i) {
+ rcv_fd[i] = socket(family, proto, 0);
+ if (rcv_fd[i] < 0)
+ error(1, errno, "failed to create receive socket");
+
+ opt = 1;
+ if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT");
+
+ if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr)))
+ error(1, errno, "failed to bind receive socket");
+
+ if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10))
+ error(1, errno, "failed to listen on receive port");
+ }
+}
+
+static void attach_bpf(int fd)
+{
+ struct sock_filter code[] = {
+ /* A = raw_smp_processor_id() */
+ { BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU },
+ /* return A */
+ { BPF_RET | BPF_A, 0, 0, 0 },
+ };
+ struct sock_fprog p = {
+ .len = 2,
+ .filter = code,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
+}
+
+static void send_from_cpu(int cpu_id, int family, int proto)
+{
+ struct sockaddr_storage saddr, daddr;
+ struct sockaddr_in *saddr4, *daddr4;
+ struct sockaddr_in6 *saddr6, *daddr6;
+ cpu_set_t cpu_set;
+ int fd;
+
+ switch (family) {
+ case AF_INET:
+ saddr4 = (struct sockaddr_in *)&saddr;
+ saddr4->sin_family = AF_INET;
+ saddr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ saddr4->sin_port = 0;
+
+ daddr4 = (struct sockaddr_in *)&daddr;
+ daddr4->sin_family = AF_INET;
+ daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ daddr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ saddr6 = (struct sockaddr_in6 *)&saddr;
+ saddr6->sin6_family = AF_INET6;
+ saddr6->sin6_addr = in6addr_any;
+ saddr6->sin6_port = 0;
+
+ daddr6 = (struct sockaddr_in6 *)&daddr;
+ daddr6->sin6_family = AF_INET6;
+ daddr6->sin6_addr = in6addr_loopback;
+ daddr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ memset(&cpu_set, 0, sizeof(cpu_set));
+ CPU_SET(cpu_id, &cpu_set);
+ if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0)
+ error(1, errno, "failed to pin to cpu");
+
+ fd = socket(family, proto, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
+ error(1, errno, "failed to bind send socket");
+
+ if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
+ error(1, errno, "failed to connect send socket");
+
+ if (send(fd, "a", 1, 0) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+}
+
+static
+void receive_on_cpu(int *rcv_fd, int len, int epfd, int cpu_id, int proto)
+{
+ struct epoll_event ev;
+ int i, fd;
+ char buf[8];
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll_wait failed");
+
+ if (proto == SOCK_STREAM) {
+ fd = accept(ev.data.fd, NULL, NULL);
+ if (fd < 0)
+ error(1, errno, "failed to accept");
+ i = recv(fd, buf, sizeof(buf), 0);
+ close(fd);
+ } else {
+ i = recv(ev.data.fd, buf, sizeof(buf), 0);
+ }
+
+ if (i < 0)
+ error(1, errno, "failed to recv");
+
+ for (i = 0; i < len; ++i)
+ if (ev.data.fd == rcv_fd[i])
+ break;
+ if (i == len)
+ error(1, 0, "failed to find socket");
+ fprintf(stderr, "send cpu %d, receive socket %d\n", cpu_id, i);
+ if (cpu_id != i)
+ error(1, 0, "cpu id/receive socket mismatch");
+}
+
+static void test(int *rcv_fd, int len, int family, int proto)
+{
+ struct epoll_event ev;
+ int epfd, cpu;
+
+ build_rcv_group(rcv_fd, len, family, proto);
+ attach_bpf(rcv_fd[0]);
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+ for (cpu = 0; cpu < len; ++cpu) {
+ ev.events = EPOLLIN;
+ ev.data.fd = rcv_fd[cpu];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[cpu], &ev))
+ error(1, errno, "failed to register sock epoll");
+ }
+
+ /* Forward iterate */
+ for (cpu = 0; cpu < len; ++cpu) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Reverse iterate */
+ for (cpu = len - 1; cpu >= 0; --cpu) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Even cores */
+ for (cpu = 0; cpu < len; cpu += 2) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Odd cores */
+ for (cpu = 1; cpu < len; cpu += 2) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ close(epfd);
+ for (cpu = 0; cpu < len; ++cpu)
+ close(rcv_fd[cpu]);
+}
+
+int main(void)
+{
+ int *rcv_fd, cpus;
+
+ cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (cpus <= 0)
+ error(1, errno, "failed counting cpus");
+
+ rcv_fd = calloc(cpus, sizeof(int));
+ if (!rcv_fd)
+ error(1, 0, "failed to allocate array");
+
+ fprintf(stderr, "---- IPv4 UDP ----\n");
+ test(rcv_fd, cpus, AF_INET, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv6 UDP ----\n");
+ test(rcv_fd, cpus, AF_INET6, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv4 TCP ----\n");
+ test(rcv_fd, cpus, AF_INET, SOCK_STREAM);
+
+ fprintf(stderr, "---- IPv6 TCP ----\n");
+ test(rcv_fd, cpus, AF_INET6, SOCK_STREAM);
+
+ free(rcv_fd);
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
new file mode 100644
index 000000000..c9f478b40
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test functionality of BPF filters with SO_REUSEPORT. Same test as
+ * in reuseport_bpf_cpu, only as one socket per NUMA node.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <numa.h>
+
+#include "../kselftest.h"
+
+static const int PORT = 8888;
+
+static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+ size_t i;
+ int opt;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ for (i = 0; i < len; ++i) {
+ rcv_fd[i] = socket(family, proto, 0);
+ if (rcv_fd[i] < 0)
+ error(1, errno, "failed to create receive socket");
+
+ opt = 1;
+ if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT");
+
+ if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr)))
+ error(1, errno, "failed to bind receive socket");
+
+ if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10))
+ error(1, errno, "failed to listen on receive port");
+ }
+}
+
+static void attach_bpf(int fd)
+{
+ static char bpf_log_buf[65536];
+ static const char bpf_license[] = "";
+
+ int bpf_fd;
+ const struct bpf_insn prog[] = {
+ /* R0 = bpf_get_numa_node_id() */
+ { BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_numa_node_id },
+ /* return R0 */
+ { BPF_JMP | BPF_EXIT, 0, 0, 0, 0 }
+ };
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+ attr.insns = (unsigned long) &prog;
+ attr.license = (unsigned long) &bpf_license;
+ attr.log_buf = (unsigned long) &bpf_log_buf;
+ attr.log_size = sizeof(bpf_log_buf);
+ attr.log_level = 1;
+
+ bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+ if (bpf_fd < 0)
+ error(1, errno, "ebpf error. log:\n%s\n", bpf_log_buf);
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &bpf_fd,
+ sizeof(bpf_fd)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_EBPF");
+
+ close(bpf_fd);
+}
+
+static void send_from_node(int node_id, int family, int proto)
+{
+ struct sockaddr_storage saddr, daddr;
+ struct sockaddr_in *saddr4, *daddr4;
+ struct sockaddr_in6 *saddr6, *daddr6;
+ int fd;
+
+ switch (family) {
+ case AF_INET:
+ saddr4 = (struct sockaddr_in *)&saddr;
+ saddr4->sin_family = AF_INET;
+ saddr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ saddr4->sin_port = 0;
+
+ daddr4 = (struct sockaddr_in *)&daddr;
+ daddr4->sin_family = AF_INET;
+ daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ daddr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ saddr6 = (struct sockaddr_in6 *)&saddr;
+ saddr6->sin6_family = AF_INET6;
+ saddr6->sin6_addr = in6addr_any;
+ saddr6->sin6_port = 0;
+
+ daddr6 = (struct sockaddr_in6 *)&daddr;
+ daddr6->sin6_family = AF_INET6;
+ daddr6->sin6_addr = in6addr_loopback;
+ daddr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ if (numa_run_on_node(node_id) < 0)
+ error(1, errno, "failed to pin to node");
+
+ fd = socket(family, proto, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
+ error(1, errno, "failed to bind send socket");
+
+ if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
+ error(1, errno, "failed to connect send socket");
+
+ if (send(fd, "a", 1, 0) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+}
+
+static
+void receive_on_node(int *rcv_fd, int len, int epfd, int node_id, int proto)
+{
+ struct epoll_event ev;
+ int i, fd;
+ char buf[8];
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll_wait failed");
+
+ if (proto == SOCK_STREAM) {
+ fd = accept(ev.data.fd, NULL, NULL);
+ if (fd < 0)
+ error(1, errno, "failed to accept");
+ i = recv(fd, buf, sizeof(buf), 0);
+ close(fd);
+ } else {
+ i = recv(ev.data.fd, buf, sizeof(buf), 0);
+ }
+
+ if (i < 0)
+ error(1, errno, "failed to recv");
+
+ for (i = 0; i < len; ++i)
+ if (ev.data.fd == rcv_fd[i])
+ break;
+ if (i == len)
+ error(1, 0, "failed to find socket");
+ fprintf(stderr, "send node %d, receive socket %d\n", node_id, i);
+ if (node_id != i)
+ error(1, 0, "node id/receive socket mismatch");
+}
+
+static void test(int *rcv_fd, int len, int family, int proto)
+{
+ struct epoll_event ev;
+ int epfd, node;
+
+ build_rcv_group(rcv_fd, len, family, proto);
+ attach_bpf(rcv_fd[0]);
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+ for (node = 0; node < len; ++node) {
+ ev.events = EPOLLIN;
+ ev.data.fd = rcv_fd[node];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[node], &ev))
+ error(1, errno, "failed to register sock epoll");
+ }
+
+ /* Forward iterate */
+ for (node = 0; node < len; ++node) {
+ send_from_node(node, family, proto);
+ receive_on_node(rcv_fd, len, epfd, node, proto);
+ }
+
+ /* Reverse iterate */
+ for (node = len - 1; node >= 0; --node) {
+ send_from_node(node, family, proto);
+ receive_on_node(rcv_fd, len, epfd, node, proto);
+ }
+
+ close(epfd);
+ for (node = 0; node < len; ++node)
+ close(rcv_fd[node]);
+}
+
+int main(void)
+{
+ int *rcv_fd, nodes;
+
+ if (numa_available() < 0)
+ ksft_exit_skip("no numa api support\n");
+
+ nodes = numa_max_node() + 1;
+
+ rcv_fd = calloc(nodes, sizeof(int));
+ if (!rcv_fd)
+ error(1, 0, "failed to allocate array");
+
+ fprintf(stderr, "---- IPv4 UDP ----\n");
+ test(rcv_fd, nodes, AF_INET, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv6 UDP ----\n");
+ test(rcv_fd, nodes, AF_INET6, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv4 TCP ----\n");
+ test(rcv_fd, nodes, AF_INET, SOCK_STREAM);
+
+ fprintf(stderr, "---- IPv6 TCP ----\n");
+ test(rcv_fd, nodes, AF_INET6, SOCK_STREAM);
+
+ free(rcv_fd);
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c
new file mode 100644
index 000000000..fb7a59ed7
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_dualstack.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * It is possible to use SO_REUSEPORT to open multiple sockets bound to
+ * equivalent local addresses using AF_INET and AF_INET6 at the same time. If
+ * the AF_INET6 socket has IPV6_V6ONLY set, it's clear which socket should
+ * receive a given incoming packet. However, when it is not set, incoming v4
+ * packets should prefer the AF_INET socket(s). This behavior was defined with
+ * the original SO_REUSEPORT implementation, but broke with
+ * e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
+ * This test creates these mixed AF_INET/AF_INET6 sockets and asserts the
+ * AF_INET preference for v4 packets.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+static const int PORT = 8888;
+
+static void build_rcv_fd(int family, int proto, int *rcv_fds, int count)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+ int opt, i;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ for (i = 0; i < count; ++i) {
+ rcv_fds[i] = socket(family, proto, 0);
+ if (rcv_fds[i] < 0)
+ error(1, errno, "failed to create receive socket");
+
+ opt = 1;
+ if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT");
+
+ if (bind(rcv_fds[i], (struct sockaddr *)&addr, sizeof(addr)))
+ error(1, errno, "failed to bind receive socket");
+
+ if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
+ error(1, errno, "failed to listen on receive port");
+ }
+}
+
+static void send_from_v4(int proto)
+{
+ struct sockaddr_in saddr, daddr;
+ int fd;
+
+ saddr.sin_family = AF_INET;
+ saddr.sin_addr.s_addr = htonl(INADDR_ANY);
+ saddr.sin_port = 0;
+
+ daddr.sin_family = AF_INET;
+ daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ daddr.sin_port = htons(PORT);
+
+ fd = socket(AF_INET, proto, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
+ error(1, errno, "failed to bind send socket");
+
+ if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
+ error(1, errno, "failed to connect send socket");
+
+ if (send(fd, "a", 1, 0) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+}
+
+static int receive_once(int epfd, int proto)
+{
+ struct epoll_event ev;
+ int i, fd;
+ char buf[8];
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll_wait failed");
+
+ if (proto == SOCK_STREAM) {
+ fd = accept(ev.data.fd, NULL, NULL);
+ if (fd < 0)
+ error(1, errno, "failed to accept");
+ i = recv(fd, buf, sizeof(buf), 0);
+ close(fd);
+ } else {
+ i = recv(ev.data.fd, buf, sizeof(buf), 0);
+ }
+
+ if (i < 0)
+ error(1, errno, "failed to recv");
+
+ return ev.data.fd;
+}
+
+static void test(int *rcv_fds, int count, int proto)
+{
+ struct epoll_event ev;
+ int epfd, i, test_fd;
+ int test_family;
+ socklen_t len;
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+
+ ev.events = EPOLLIN;
+ for (i = 0; i < count; ++i) {
+ ev.data.fd = rcv_fds[i];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev))
+ error(1, errno, "failed to register sock epoll");
+ }
+
+ send_from_v4(proto);
+
+ test_fd = receive_once(epfd, proto);
+ len = sizeof(test_family);
+ if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len))
+ error(1, errno, "failed to read socket domain");
+ if (test_family != AF_INET)
+ error(1, 0, "expected to receive on v4 socket but got v6 (%d)",
+ test_family);
+
+ close(epfd);
+}
+
+int main(void)
+{
+ int rcv_fds[32], i;
+
+ fprintf(stderr, "---- UDP IPv4 created before IPv6 ----\n");
+ build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 5);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[5]), 5);
+ test(rcv_fds, 10, SOCK_DGRAM);
+ for (i = 0; i < 10; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- UDP IPv6 created before IPv4 ----\n");
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds, 5);
+ build_rcv_fd(AF_INET, SOCK_DGRAM, &(rcv_fds[5]), 5);
+ test(rcv_fds, 10, SOCK_DGRAM);
+ for (i = 0; i < 10; ++i)
+ close(rcv_fds[i]);
+
+ /* NOTE: UDP socket lookups traverse a different code path when there
+ * are > 10 sockets in a group.
+ */
+ fprintf(stderr, "---- UDP IPv4 created before IPv6 (large) ----\n");
+ build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 16);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[16]), 16);
+ test(rcv_fds, 32, SOCK_DGRAM);
+ for (i = 0; i < 32; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- UDP IPv6 created before IPv4 (large) ----\n");
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds, 16);
+ build_rcv_fd(AF_INET, SOCK_DGRAM, &(rcv_fds[16]), 16);
+ test(rcv_fds, 32, SOCK_DGRAM);
+ for (i = 0; i < 32; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- TCP IPv4 created before IPv6 ----\n");
+ build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 5);
+ build_rcv_fd(AF_INET6, SOCK_STREAM, &(rcv_fds[5]), 5);
+ test(rcv_fds, 10, SOCK_STREAM);
+ for (i = 0; i < 10; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- TCP IPv6 created before IPv4 ----\n");
+ build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds, 5);
+ build_rcv_fd(AF_INET, SOCK_STREAM, &(rcv_fds[5]), 5);
+ test(rcv_fds, 10, SOCK_STREAM);
+ for (i = 0; i < 10; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
new file mode 100755
index 000000000..ff665de78
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -0,0 +1,1023 @@
+#!/bin/bash
+#
+# This test is for checking rtnetlink callpaths, and get as much coverage as possible.
+#
+# set -e
+
+devdummy="test-dummy0"
+ret=0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# set global exit status, but never reset nonzero one.
+check_err()
+{
+ if [ $ret -eq 0 ]; then
+ ret=$1
+ fi
+}
+
+# same but inverted -- used when command must fail for test to pass
+check_fail()
+{
+ if [ $1 -eq 0 ]; then
+ ret=1
+ fi
+}
+
+kci_add_dummy()
+{
+ ip link add name "$devdummy" type dummy
+ check_err $?
+ ip link set "$devdummy" up
+ check_err $?
+}
+
+kci_del_dummy()
+{
+ ip link del dev "$devdummy"
+ check_err $?
+}
+
+kci_test_netconf()
+{
+ dev="$1"
+ r=$ret
+
+ ip netconf show dev "$dev" > /dev/null
+ check_err $?
+
+ for f in 4 6; do
+ ip -$f netconf show dev "$dev" > /dev/null
+ check_err $?
+ done
+
+ if [ $ret -ne 0 ] ;then
+ echo "FAIL: ip netconf show $dev"
+ test $r -eq 0 && ret=0
+ return 1
+ fi
+}
+
+# add a bridge with vlans on top
+kci_test_bridge()
+{
+ devbr="test-br0"
+ vlandev="testbr-vlan1"
+
+ ret=0
+ ip link add name "$devbr" type bridge
+ check_err $?
+
+ ip link set dev "$devdummy" master "$devbr"
+ check_err $?
+
+ ip link set "$devbr" up
+ check_err $?
+
+ ip link add link "$devbr" name "$vlandev" type vlan id 1
+ check_err $?
+ ip addr add dev "$vlandev" 10.200.7.23/30
+ check_err $?
+ ip -6 addr add dev "$vlandev" dead:42::1234/64
+ check_err $?
+ ip -d link > /dev/null
+ check_err $?
+ ip r s t all > /dev/null
+ check_err $?
+
+ for name in "$devbr" "$vlandev" "$devdummy" ; do
+ kci_test_netconf "$name"
+ done
+
+ ip -6 addr del dev "$vlandev" dead:42::1234/64
+ check_err $?
+
+ ip link del dev "$vlandev"
+ check_err $?
+ ip link del dev "$devbr"
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: bridge setup"
+ return 1
+ fi
+ echo "PASS: bridge setup"
+
+}
+
+kci_test_gre()
+{
+ gredev=neta
+ rem=10.42.42.1
+ loc=10.0.0.1
+
+ ret=0
+ ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
+ check_err $?
+ ip link set $gredev up
+ check_err $?
+ ip addr add 10.23.7.10 dev $gredev
+ check_err $?
+ ip route add 10.23.8.0/30 dev $gredev
+ check_err $?
+ ip addr add dev "$devdummy" 10.23.7.11/24
+ check_err $?
+ ip link > /dev/null
+ check_err $?
+ ip addr > /dev/null
+ check_err $?
+
+ kci_test_netconf "$gredev"
+
+ ip addr del dev "$devdummy" 10.23.7.11/24
+ check_err $?
+
+ ip link del $gredev
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: gre tunnel endpoint"
+ return 1
+ fi
+ echo "PASS: gre tunnel endpoint"
+}
+
+# tc uses rtnetlink too, for full tc testing
+# please see tools/testing/selftests/tc-testing.
+kci_test_tc()
+{
+ dev=lo
+ ret=0
+
+ tc qdisc add dev "$dev" root handle 1: htb
+ check_err $?
+ tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
+ check_err $?
+ tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
+ check_err $?
+ tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
+ check_err $?
+ tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
+ check_err $?
+ tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
+ check_err $?
+ tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
+ check_err $?
+ tc filter show dev "$dev" parent 1:0 > /dev/null
+ check_err $?
+ tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
+ check_err $?
+ tc filter show dev "$dev" parent 1:0 > /dev/null
+ check_err $?
+ tc qdisc del dev "$dev" root handle 1: htb
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: tc htb hierarchy"
+ return 1
+ fi
+ echo "PASS: tc htb hierarchy"
+
+}
+
+kci_test_polrouting()
+{
+ ret=0
+ ip rule add fwmark 1 lookup 100
+ check_err $?
+ ip route add local 0.0.0.0/0 dev lo table 100
+ check_err $?
+ ip r s t all > /dev/null
+ check_err $?
+ ip rule del fwmark 1 lookup 100
+ check_err $?
+ ip route del local 0.0.0.0/0 dev lo table 100
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: policy route test"
+ return 1
+ fi
+ echo "PASS: policy routing"
+}
+
+kci_test_route_get()
+{
+ ret=0
+
+ ip route get 127.0.0.1 > /dev/null
+ check_err $?
+ ip route get 127.0.0.1 dev "$devdummy" > /dev/null
+ check_err $?
+ ip route get ::1 > /dev/null
+ check_err $?
+ ip route get fe80::1 dev "$devdummy" > /dev/null
+ check_err $?
+ ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x1 mark 0x1 > /dev/null
+ check_err $?
+ ip route get ::1 from ::1 iif lo oif lo tos 0x1 mark 0x1 > /dev/null
+ check_err $?
+ ip addr add dev "$devdummy" 10.23.7.11/24
+ check_err $?
+ ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null
+ check_err $?
+ ip addr del dev "$devdummy" 10.23.7.11/24
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: route get"
+ return 1
+ fi
+
+ echo "PASS: route get"
+}
+
+kci_test_addrlft()
+{
+ for i in $(seq 10 100) ;do
+ lft=$(((RANDOM%3) + 1))
+ ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
+ check_err $?
+ done
+
+ sleep 5
+
+ ip addr show dev "$devdummy" | grep "10.23.11."
+ if [ $? -eq 0 ]; then
+ echo "FAIL: preferred_lft addresses remaining"
+ check_err 1
+ return
+ fi
+
+ echo "PASS: preferred_lft addresses have expired"
+}
+
+kci_test_addrlabel()
+{
+ ret=0
+
+ ip addrlabel add prefix dead::/64 dev lo label 1
+ check_err $?
+
+ ip addrlabel list |grep -q "prefix dead::/64 dev lo label 1"
+ check_err $?
+
+ ip addrlabel del prefix dead::/64 dev lo label 1 2> /dev/null
+ check_err $?
+
+ ip addrlabel add prefix dead::/64 label 1 2> /dev/null
+ check_err $?
+
+ ip addrlabel del prefix dead::/64 label 1 2> /dev/null
+ check_err $?
+
+ # concurrent add/delete
+ for i in $(seq 1 1000); do
+ ip addrlabel add prefix 1c3::/64 label 12345 2>/dev/null
+ done &
+
+ for i in $(seq 1 1000); do
+ ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
+ done
+
+ wait
+
+ ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: ipv6 addrlabel"
+ return 1
+ fi
+
+ echo "PASS: ipv6 addrlabel"
+}
+
+kci_test_ifalias()
+{
+ ret=0
+ namewant=$(uuidgen)
+ syspathname="/sys/class/net/$devdummy/ifalias"
+
+ ip link set dev "$devdummy" alias "$namewant"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: cannot set interface alias of $devdummy to $namewant"
+ return 1
+ fi
+
+ ip link show "$devdummy" | grep -q "alias $namewant"
+ check_err $?
+
+ if [ -r "$syspathname" ] ; then
+ read namehave < "$syspathname"
+ if [ "$namewant" != "$namehave" ]; then
+ echo "FAIL: did set ifalias $namewant but got $namehave"
+ return 1
+ fi
+
+ namewant=$(uuidgen)
+ echo "$namewant" > "$syspathname"
+ ip link show "$devdummy" | grep -q "alias $namewant"
+ check_err $?
+
+ # sysfs interface allows to delete alias again
+ echo "" > "$syspathname"
+
+ ip link show "$devdummy" | grep -q "alias $namewant"
+ check_fail $?
+
+ for i in $(seq 1 100); do
+ uuidgen > "$syspathname" &
+ done
+
+ wait
+
+ # re-add the alias -- kernel should free mem when dummy dev is removed
+ ip link set dev "$devdummy" alias "$namewant"
+ check_err $?
+ fi
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: set interface alias $devdummy to $namewant"
+ return 1
+ fi
+
+ echo "PASS: set ifalias $namewant for $devdummy"
+}
+
+kci_test_vrf()
+{
+ vrfname="test-vrf"
+ ret=0
+
+ ip link show type vrf 2>/dev/null
+ if [ $? -ne 0 ]; then
+ echo "SKIP: vrf: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ ip link add "$vrfname" type vrf table 10
+ check_err $?
+ if [ $ret -ne 0 ];then
+ echo "FAIL: can't add vrf interface, skipping test"
+ return 0
+ fi
+
+ ip -br link show type vrf | grep -q "$vrfname"
+ check_err $?
+ if [ $ret -ne 0 ];then
+ echo "FAIL: created vrf device not found"
+ return 1
+ fi
+
+ ip link set dev "$vrfname" up
+ check_err $?
+
+ ip link set dev "$devdummy" master "$vrfname"
+ check_err $?
+ ip link del dev "$vrfname"
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: vrf"
+ return 1
+ fi
+
+ echo "PASS: vrf"
+}
+
+kci_test_encap_vxlan()
+{
+ ret=0
+ vxlan="test-vxlan0"
+ vlan="test-vlan0"
+ testns="$1"
+
+ ip netns exec "$testns" ip link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
+ dev "$devdummy" dstport 4789 2>/dev/null
+ if [ $? -ne 0 ]; then
+ echo "FAIL: can't add vxlan interface, skipping test"
+ return 0
+ fi
+ check_err $?
+
+ ip netns exec "$testns" ip addr add 10.2.11.49/24 dev "$vxlan"
+ check_err $?
+
+ ip netns exec "$testns" ip link set up dev "$vxlan"
+ check_err $?
+
+ ip netns exec "$testns" ip link add link "$vxlan" name "$vlan" type vlan id 1
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$vxlan"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: vxlan"
+ return 1
+ fi
+ echo "PASS: vxlan"
+}
+
+kci_test_encap_fou()
+{
+ ret=0
+ name="test-fou"
+ testns="$1"
+
+ ip fou help 2>&1 |grep -q 'Usage: ip fou'
+ if [ $? -ne 0 ];then
+ echo "SKIP: fou: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ ip netns exec "$testns" ip fou add port 7777 ipproto 47 2>/dev/null
+ if [ $? -ne 0 ];then
+ echo "FAIL: can't add fou port 7777, skipping test"
+ return 1
+ fi
+
+ ip netns exec "$testns" ip fou add port 8888 ipproto 4
+ check_err $?
+
+ ip netns exec "$testns" ip fou del port 9999 2>/dev/null
+ check_fail $?
+
+ ip netns exec "$testns" ip fou del port 7777
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: fou"
+ return 1
+ fi
+
+ echo "PASS: fou"
+}
+
+# test various encap methods, use netns to avoid unwanted interference
+kci_test_encap()
+{
+ testns="testns"
+ ret=0
+
+ ip netns add "$testns"
+ if [ $? -ne 0 ]; then
+ echo "SKIP encap tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ ip netns exec "$testns" ip link set lo up
+ check_err $?
+
+ ip netns exec "$testns" ip link add name "$devdummy" type dummy
+ check_err $?
+ ip netns exec "$testns" ip link set "$devdummy" up
+ check_err $?
+
+ kci_test_encap_vxlan "$testns"
+ kci_test_encap_fou "$testns"
+
+ ip netns del "$testns"
+}
+
+kci_test_macsec()
+{
+ msname="test_macsec0"
+ ret=0
+
+ ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
+ if [ $? -ne 0 ]; then
+ echo "SKIP: macsec: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
+ check_err $?
+ if [ $ret -ne 0 ];then
+ echo "FAIL: can't add macsec interface, skipping test"
+ return 1
+ fi
+
+ ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+ check_err $?
+
+ ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
+ check_err $?
+
+ ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef
+ check_err $?
+
+ ip macsec show > /dev/null
+ check_err $?
+
+ ip link del dev "$msname"
+ check_err $?
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: macsec"
+ return 1
+ fi
+
+ echo "PASS: macsec"
+}
+
+#-------------------------------------------------------------------
+# Example commands
+# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
+# spi 0x07 mode transport reqid 0x07 replay-window 32 \
+# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+# sel src 14.0.0.52/24 dst 14.0.0.70/24
+# ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \
+# tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \
+# spi 0x07 mode transport reqid 0x07
+#
+# Subcommands not tested
+# ip x s update
+# ip x s allocspi
+# ip x s deleteall
+# ip x p update
+# ip x p deleteall
+# ip x p set
+#-------------------------------------------------------------------
+kci_test_ipsec()
+{
+ ret=0
+ algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
+ srcip=192.168.123.1
+ dstip=192.168.123.2
+ spi=7
+
+ ip addr add $srcip dev $devdummy
+
+ # flush to be sure there's nothing configured
+ ip x s flush ; ip x p flush
+ check_err $?
+
+ # start the monitor in the background
+ tmpfile=`mktemp /var/run/ipsectestXXX`
+ mpid=`(ip x m > $tmpfile & echo $!) 2>/dev/null`
+ sleep 0.2
+
+ ipsecid="proto esp src $srcip dst $dstip spi 0x07"
+ ip x s add $ipsecid \
+ mode transport reqid 0x07 replay-window 32 \
+ $algo sel src $srcip/24 dst $dstip/24
+ check_err $?
+
+ lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
+ test $lines -eq 2
+ check_err $?
+
+ ip x s count | grep -q "SAD count 1"
+ check_err $?
+
+ lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l`
+ test $lines -eq 2
+ check_err $?
+
+ ip x s delete $ipsecid
+ check_err $?
+
+ lines=`ip x s list | wc -l`
+ test $lines -eq 0
+ check_err $?
+
+ ipsecsel="dir out src $srcip/24 dst $dstip/24"
+ ip x p add $ipsecsel \
+ tmpl proto esp src $srcip dst $dstip \
+ spi 0x07 mode transport reqid 0x07
+ check_err $?
+
+ lines=`ip x p list | grep $srcip | grep $dstip | wc -l`
+ test $lines -eq 2
+ check_err $?
+
+ ip x p count | grep -q "SPD IN 0 OUT 1 FWD 0"
+ check_err $?
+
+ lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l`
+ test $lines -eq 2
+ check_err $?
+
+ ip x p delete $ipsecsel
+ check_err $?
+
+ lines=`ip x p list | wc -l`
+ test $lines -eq 0
+ check_err $?
+
+ # check the monitor results
+ kill $mpid
+ lines=`wc -l $tmpfile | cut "-d " -f1`
+ test $lines -eq 20
+ check_err $?
+ rm -rf $tmpfile
+
+ # clean up any leftovers
+ ip x s flush
+ check_err $?
+ ip x p flush
+ check_err $?
+ ip addr del $srcip/32 dev $devdummy
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: ipsec"
+ return 1
+ fi
+ echo "PASS: ipsec"
+}
+
+#-------------------------------------------------------------------
+# Example commands
+# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
+# spi 0x07 mode transport reqid 0x07 replay-window 32 \
+# aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+# sel src 14.0.0.52/24 dst 14.0.0.70/24
+# offload dev sim1 dir out
+# ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \
+# tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \
+# spi 0x07 mode transport reqid 0x07
+#
+#-------------------------------------------------------------------
+kci_test_ipsec_offload()
+{
+ ret=0
+ algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
+ srcip=192.168.123.3
+ dstip=192.168.123.4
+ dev=simx1
+ sysfsd=/sys/kernel/debug/netdevsim/$dev
+ sysfsf=$sysfsd/ipsec
+
+ # setup netdevsim since dummydev doesn't have offload support
+ modprobe netdevsim
+ check_err $?
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: ipsec_offload can't load netdevsim"
+ return 1
+ fi
+
+ ip link add $dev type netdevsim
+ ip addr add $srcip dev $dev
+ ip link set $dev up
+ if [ ! -d $sysfsd ] ; then
+ echo "FAIL: ipsec_offload can't create device $dev"
+ return 1
+ fi
+ if [ ! -f $sysfsf ] ; then
+ echo "FAIL: ipsec_offload netdevsim doesn't support IPsec offload"
+ return 1
+ fi
+
+ # flush to be sure there's nothing configured
+ ip x s flush ; ip x p flush
+
+ # create offloaded SAs, both in and out
+ ip x p add dir out src $srcip/24 dst $dstip/24 \
+ tmpl proto esp src $srcip dst $dstip spi 9 \
+ mode transport reqid 42
+ check_err $?
+ ip x p add dir out src $dstip/24 dst $srcip/24 \
+ tmpl proto esp src $dstip dst $srcip spi 9 \
+ mode transport reqid 42
+ check_err $?
+
+ ip x s add proto esp src $srcip dst $dstip spi 9 \
+ mode transport reqid 42 $algo sel src $srcip/24 dst $dstip/24 \
+ offload dev $dev dir out
+ check_err $?
+ ip x s add proto esp src $dstip dst $srcip spi 9 \
+ mode transport reqid 42 $algo sel src $dstip/24 dst $srcip/24 \
+ offload dev $dev dir in
+ check_err $?
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: ipsec_offload can't create SA"
+ return 1
+ fi
+
+ # does offload show up in ip output
+ lines=`ip x s list | grep -c "crypto offload parameters: dev $dev dir"`
+ if [ $lines -ne 2 ] ; then
+ echo "FAIL: ipsec_offload SA offload missing from list output"
+ check_err 1
+ fi
+
+ # use ping to exercise the Tx path
+ ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null
+
+ # does driver have correct offload info
+ diff $sysfsf - << EOF
+SA count=2 tx=3
+sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000
+sa[0] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
+sa[0] key=0x34333231 38373635 32313039 36353433
+sa[1] rx ipaddr=0x00000000 00000000 00000000 037ba8c0
+sa[1] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
+sa[1] key=0x34333231 38373635 32313039 36353433
+EOF
+ if [ $? -ne 0 ] ; then
+ echo "FAIL: ipsec_offload incorrect driver data"
+ check_err 1
+ fi
+
+ # does offload get removed from driver
+ ip x s flush
+ ip x p flush
+ lines=`grep -c "SA count=0" $sysfsf`
+ if [ $lines -ne 1 ] ; then
+ echo "FAIL: ipsec_offload SA not removed from driver"
+ check_err 1
+ fi
+
+ # clean up any leftovers
+ ip link del $dev
+ rmmod netdevsim
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: ipsec_offload"
+ return 1
+ fi
+ echo "PASS: ipsec_offload"
+}
+
+kci_test_gretap()
+{
+ testns="testns"
+ DEV_NS=gretap00
+ ret=0
+
+ ip netns add "$testns"
+ if [ $? -ne 0 ]; then
+ echo "SKIP gretap tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ ip link help gretap 2>&1 | grep -q "^Usage:"
+ if [ $? -ne 0 ];then
+ echo "SKIP: gretap: iproute2 too old"
+ ip netns del "$testns"
+ return $ksft_skip
+ fi
+
+ # test native tunnel
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap seq \
+ key 102 local 172.16.1.100 remote 172.16.1.200
+ check_err $?
+
+ ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+ check_err $?
+
+ ip netns exec "$testns" ip link set dev $DEV_NS up
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ # test external mode
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type gretap external
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: gretap"
+ ip netns del "$testns"
+ return 1
+ fi
+ echo "PASS: gretap"
+
+ ip netns del "$testns"
+}
+
+kci_test_ip6gretap()
+{
+ testns="testns"
+ DEV_NS=ip6gretap00
+ ret=0
+
+ ip netns add "$testns"
+ if [ $? -ne 0 ]; then
+ echo "SKIP ip6gretap tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ ip link help ip6gretap 2>&1 | grep -q "^Usage:"
+ if [ $? -ne 0 ];then
+ echo "SKIP: ip6gretap: iproute2 too old"
+ ip netns del "$testns"
+ return $ksft_skip
+ fi
+
+ # test native tunnel
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap seq \
+ key 102 local fc00:100::1 remote fc00:100::2
+ check_err $?
+
+ ip netns exec "$testns" ip addr add dev "$DEV_NS" fc00:200::1/96
+ check_err $?
+
+ ip netns exec "$testns" ip link set dev $DEV_NS up
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ # test external mode
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6gretap external
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: ip6gretap"
+ ip netns del "$testns"
+ return 1
+ fi
+ echo "PASS: ip6gretap"
+
+ ip netns del "$testns"
+}
+
+kci_test_erspan()
+{
+ testns="testns"
+ DEV_NS=erspan00
+ ret=0
+
+ ip link help erspan 2>&1 | grep -q "^Usage:"
+ if [ $? -ne 0 ];then
+ echo "SKIP: erspan: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ ip netns add "$testns"
+ if [ $? -ne 0 ]; then
+ echo "SKIP erspan tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ # test native tunnel erspan v1
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \
+ key 102 local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 1 erspan 488
+ check_err $?
+
+ ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+ check_err $?
+
+ ip netns exec "$testns" ip link set dev $DEV_NS up
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ # test native tunnel erspan v2
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan seq \
+ key 102 local 172.16.1.100 remote 172.16.1.200 \
+ erspan_ver 2 erspan_dir ingress erspan_hwid 7
+ check_err $?
+
+ ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+ check_err $?
+
+ ip netns exec "$testns" ip link set dev $DEV_NS up
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ # test external mode
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type erspan external
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: erspan"
+ ip netns del "$testns"
+ return 1
+ fi
+ echo "PASS: erspan"
+
+ ip netns del "$testns"
+}
+
+kci_test_ip6erspan()
+{
+ testns="testns"
+ DEV_NS=ip6erspan00
+ ret=0
+
+ ip link help ip6erspan 2>&1 | grep -q "^Usage:"
+ if [ $? -ne 0 ];then
+ echo "SKIP: ip6erspan: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ ip netns add "$testns"
+ if [ $? -ne 0 ]; then
+ echo "SKIP ip6erspan tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ # test native tunnel ip6erspan v1
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \
+ key 102 local fc00:100::1 remote fc00:100::2 \
+ erspan_ver 1 erspan 488
+ check_err $?
+
+ ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+ check_err $?
+
+ ip netns exec "$testns" ip link set dev $DEV_NS up
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ # test native tunnel ip6erspan v2
+ ip netns exec "$testns" ip link add dev "$DEV_NS" type ip6erspan seq \
+ key 102 local fc00:100::1 remote fc00:100::2 \
+ erspan_ver 2 erspan_dir ingress erspan_hwid 7
+ check_err $?
+
+ ip netns exec "$testns" ip addr add dev "$DEV_NS" 10.1.1.100/24
+ check_err $?
+
+ ip netns exec "$testns" ip link set dev $DEV_NS up
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ # test external mode
+ ip netns exec "$testns" ip link add dev "$DEV_NS" \
+ type ip6erspan external
+ check_err $?
+
+ ip netns exec "$testns" ip link del "$DEV_NS"
+ check_err $?
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: ip6erspan"
+ ip netns del "$testns"
+ return 1
+ fi
+ echo "PASS: ip6erspan"
+
+ ip netns del "$testns"
+}
+
+kci_test_rtnl()
+{
+ kci_add_dummy
+ if [ $ret -ne 0 ];then
+ echo "FAIL: cannot add dummy interface"
+ return 1
+ fi
+
+ kci_test_polrouting
+ kci_test_route_get
+ kci_test_addrlft
+ kci_test_tc
+ kci_test_gre
+ kci_test_gretap
+ kci_test_ip6gretap
+ kci_test_erspan
+ kci_test_ip6erspan
+ kci_test_bridge
+ kci_test_addrlabel
+ kci_test_ifalias
+ kci_test_vrf
+ kci_test_encap
+ kci_test_macsec
+ kci_test_ipsec
+ kci_test_ipsec_offload
+
+ kci_del_dummy
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+for x in ip tc;do
+ $x -Version 2>/dev/null >/dev/null
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without the $x tool"
+ exit $ksft_skip
+ fi
+done
+
+kci_test_rtnl
+
+exit $ret
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
new file mode 100755
index 000000000..bea079edc
--- /dev/null
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -0,0 +1,27 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+if [ $(id -u) != 0 ]; then
+ echo $msg must be run as root >&2
+ exit 0
+fi
+
+echo "--------------------"
+echo "running psock_fanout test"
+echo "--------------------"
+./in_netns.sh ./psock_fanout
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+else
+ echo "[PASS]"
+fi
+
+echo "--------------------"
+echo "running psock_tpacket test"
+echo "--------------------"
+./in_netns.sh ./psock_tpacket
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+else
+ echo "[PASS]"
+fi
diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests
new file mode 100755
index 000000000..14e41faf2
--- /dev/null
+++ b/tools/testing/selftests/net/run_netsocktests
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+echo "--------------------"
+echo "running socket test"
+echo "--------------------"
+./socket
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exit 1
+else
+ echo "[PASS]"
+fi
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c
new file mode 100644
index 000000000..afca1ead6
--- /dev/null
+++ b/tools/testing/selftests/net/socket.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+struct socket_testcase {
+ int domain;
+ int type;
+ int protocol;
+
+ /* 0 = valid file descriptor
+ * -foo = error foo
+ */
+ int expect;
+
+ /* If non-zero, accept EAFNOSUPPORT to handle the case
+ * of the protocol not being configured into the kernel.
+ */
+ int nosupport_ok;
+};
+
+static struct socket_testcase tests[] = {
+ { AF_MAX, 0, 0, -EAFNOSUPPORT, 0 },
+ { AF_INET, SOCK_STREAM, IPPROTO_TCP, 0, 1 },
+ { AF_INET, SOCK_DGRAM, IPPROTO_TCP, -EPROTONOSUPPORT, 1 },
+ { AF_INET, SOCK_DGRAM, IPPROTO_UDP, 0, 1 },
+ { AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1 },
+};
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define ERR_STRING_SZ 64
+
+static int run_tests(void)
+{
+ char err_string1[ERR_STRING_SZ];
+ char err_string2[ERR_STRING_SZ];
+ int i, err;
+
+ err = 0;
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ struct socket_testcase *s = &tests[i];
+ int fd;
+
+ fd = socket(s->domain, s->type, s->protocol);
+ if (fd < 0) {
+ if (s->nosupport_ok &&
+ errno == EAFNOSUPPORT)
+ continue;
+
+ if (s->expect < 0 &&
+ errno == -s->expect)
+ continue;
+
+ strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
+ strerror_r(errno, err_string2, ERR_STRING_SZ);
+
+ fprintf(stderr, "socket(%d, %d, %d) expected "
+ "err (%s) got (%s)\n",
+ s->domain, s->type, s->protocol,
+ err_string1, err_string2);
+
+ err = -1;
+ break;
+ } else {
+ close(fd);
+
+ if (s->expect < 0) {
+ strerror_r(errno, err_string1, ERR_STRING_SZ);
+
+ fprintf(stderr, "socket(%d, %d, %d) expected "
+ "success got err (%s)\n",
+ s->domain, s->type, s->protocol,
+ err_string1);
+
+ err = -1;
+ break;
+ }
+ }
+ }
+
+ return err;
+}
+
+int main(void)
+{
+ int err = run_tests();
+
+ return err;
+}
diff --git a/tools/testing/selftests/net/tcp_inq.c b/tools/testing/selftests/net/tcp_inq.c
new file mode 100644
index 000000000..d044b29dd
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_inq.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Soheil Hassas Yeganeh (soheil@google.com)
+ *
+ * Simple example on how to use TCP_INQ and TCP_CM_INQ.
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ */
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#ifndef TCP_INQ
+#define TCP_INQ 36
+#endif
+
+#ifndef TCP_CM_INQ
+#define TCP_CM_INQ TCP_INQ
+#endif
+
+#define BUF_SIZE 8192
+#define CMSG_SIZE 32
+
+static int family = AF_INET6;
+static socklen_t addr_len = sizeof(struct sockaddr_in6);
+static int port = 4974;
+
+static void setup_loopback_addr(int family, struct sockaddr_storage *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (family) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr4->sin_port = htons(port);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_loopback;
+ addr6->sin6_port = htons(port);
+ break;
+ default:
+ error(1, 0, "illegal family");
+ }
+}
+
+void *start_server(void *arg)
+{
+ int server_fd = (int)(unsigned long)arg;
+ struct sockaddr_in addr;
+ socklen_t addrlen = sizeof(addr);
+ char *buf;
+ int fd;
+ int r;
+
+ buf = malloc(BUF_SIZE);
+
+ for (;;) {
+ fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen);
+ if (fd == -1) {
+ perror("accept");
+ break;
+ }
+ do {
+ r = send(fd, buf, BUF_SIZE, 0);
+ } while (r < 0 && errno == EINTR);
+ if (r < 0)
+ perror("send");
+ if (r != BUF_SIZE)
+ fprintf(stderr, "can only send %d bytes\n", r);
+ /* TCP_INQ can overestimate in-queue by one byte if we send
+ * the FIN packet. Sleep for 1 second, so that the client
+ * likely invoked recvmsg().
+ */
+ sleep(1);
+ close(fd);
+ }
+
+ free(buf);
+ close(server_fd);
+ pthread_exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_storage listen_addr, addr;
+ int c, one = 1, inq = -1;
+ pthread_t server_thread;
+ char cmsgbuf[CMSG_SIZE];
+ struct iovec iov[1];
+ struct cmsghdr *cm;
+ struct msghdr msg;
+ int server_fd, fd;
+ char *buf;
+
+ while ((c = getopt(argc, argv, "46p:")) != -1) {
+ switch (c) {
+ case '4':
+ family = PF_INET;
+ addr_len = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ family = PF_INET6;
+ addr_len = sizeof(struct sockaddr_in6);
+ break;
+ case 'p':
+ port = atoi(optarg);
+ break;
+ }
+ }
+
+ server_fd = socket(family, SOCK_STREAM, 0);
+ if (server_fd < 0)
+ error(1, errno, "server socket");
+ setup_loopback_addr(family, &listen_addr);
+ if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR,
+ &one, sizeof(one)) != 0)
+ error(1, errno, "setsockopt(SO_REUSEADDR)");
+ if (bind(server_fd, (const struct sockaddr *)&listen_addr,
+ addr_len) == -1)
+ error(1, errno, "bind");
+ if (listen(server_fd, 128) == -1)
+ error(1, errno, "listen");
+ if (pthread_create(&server_thread, NULL, start_server,
+ (void *)(unsigned long)server_fd) != 0)
+ error(1, errno, "pthread_create");
+
+ fd = socket(family, SOCK_STREAM, 0);
+ if (fd < 0)
+ error(1, errno, "client socket");
+ setup_loopback_addr(family, &addr);
+ if (connect(fd, (const struct sockaddr *)&addr, addr_len) == -1)
+ error(1, errno, "connect");
+ if (setsockopt(fd, SOL_TCP, TCP_INQ, &one, sizeof(one)) != 0)
+ error(1, errno, "setsockopt(TCP_INQ)");
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsgbuf;
+ msg.msg_controllen = sizeof(cmsgbuf);
+ msg.msg_flags = 0;
+
+ buf = malloc(BUF_SIZE);
+ iov[0].iov_base = buf;
+ iov[0].iov_len = BUF_SIZE / 2;
+
+ if (recvmsg(fd, &msg, 0) != iov[0].iov_len)
+ error(1, errno, "recvmsg");
+ if (msg.msg_flags & MSG_CTRUNC)
+ error(1, 0, "control message is truncated");
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm))
+ if (cm->cmsg_level == SOL_TCP && cm->cmsg_type == TCP_CM_INQ)
+ inq = *((int *) CMSG_DATA(cm));
+
+ if (inq != BUF_SIZE - iov[0].iov_len) {
+ fprintf(stderr, "unexpected inq: %d\n", inq);
+ exit(1);
+ }
+
+ printf("PASSED\n");
+ free(buf);
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
new file mode 100644
index 000000000..e8c5dff44
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Eric Dumazet (edumazet@google.com)
+ *
+ * Reference program demonstrating tcp mmap() usage,
+ * and SO_RCVLOWAT hints for receiver.
+ *
+ * Note : NIC with header split is needed to use mmap() on TCP :
+ * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload.
+ *
+ * How to use on loopback interface :
+ *
+ * ifconfig lo mtu 61512 # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header)
+ * tcp_mmap -s -z &
+ * tcp_mmap -H ::1 -z
+ *
+ * Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12)
+ * (4096 : page size on x86, 12: TCP TS option length)
+ * tcp_mmap -s -z -M $((4096+12)) &
+ * tcp_mmap -H ::1 -z -M $((4096+12))
+ *
+ * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface.
+ * We might use sendfile() instead, but really this test program is about mmap(), for receivers ;)
+ *
+ * $ ./tcp_mmap -s & # Without mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit
+ * cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit
+ * cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches
+ * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit
+ * cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit
+ * cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches
+ * $ kill %1 # kill tcp_mmap server
+ *
+ * $ ./tcp_mmap -s -z & # With mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit
+ * cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit
+ * cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit
+ * cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit
+ * cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <error.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <poll.h>
+#include <linux/tcp.h>
+#include <assert.h>
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY 0x4000000
+#endif
+
+#define FILE_SZ (1UL << 35)
+static int cfg_family = AF_INET6;
+static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
+static int cfg_port = 8787;
+
+static int rcvbuf; /* Default: autotuning. Can be set with -r <integer> option */
+static int sndbuf; /* Default: autotuning. Can be set with -w <integer> option */
+static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */
+static int xflg; /* hash received data (simple xor) (-h option) */
+static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
+
+static int chunk_size = 512*1024;
+
+unsigned long htotal;
+
+static inline void prefetch(const void *x)
+{
+#if defined(__x86_64__)
+ asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x));
+#endif
+}
+
+void hash_zone(void *zone, unsigned int length)
+{
+ unsigned long temp = htotal;
+
+ while (length >= 8*sizeof(long)) {
+ prefetch(zone + 384);
+ temp ^= *(unsigned long *)zone;
+ temp ^= *(unsigned long *)(zone + sizeof(long));
+ temp ^= *(unsigned long *)(zone + 2*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 3*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 4*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 5*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 6*sizeof(long));
+ temp ^= *(unsigned long *)(zone + 7*sizeof(long));
+ zone += 8*sizeof(long);
+ length -= 8*sizeof(long);
+ }
+ while (length >= 1) {
+ temp ^= *(unsigned char *)zone;
+ zone += 1;
+ length--;
+ }
+ htotal = temp;
+}
+
+void *child_thread(void *arg)
+{
+ unsigned long total_mmap = 0, total = 0;
+ struct tcp_zerocopy_receive zc;
+ unsigned long delta_usec;
+ int flags = MAP_SHARED;
+ struct timeval t0, t1;
+ char *buffer = NULL;
+ void *addr = NULL;
+ double throughput;
+ struct rusage ru;
+ int lu, fd;
+
+ fd = (int)(unsigned long)arg;
+
+ gettimeofday(&t0, NULL);
+
+ fcntl(fd, F_SETFL, O_NDELAY);
+ buffer = malloc(chunk_size);
+ if (!buffer) {
+ perror("malloc");
+ goto error;
+ }
+ if (zflg) {
+ addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
+ if (addr == (void *)-1)
+ zflg = 0;
+ }
+ while (1) {
+ struct pollfd pfd = { .fd = fd, .events = POLLIN, };
+ int sub;
+
+ poll(&pfd, 1, 10000);
+ if (zflg) {
+ socklen_t zc_len = sizeof(zc);
+ int res;
+
+ zc.address = (__u64)addr;
+ zc.length = chunk_size;
+ zc.recv_skip_hint = 0;
+ res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
+ &zc, &zc_len);
+ if (res == -1)
+ break;
+
+ if (zc.length) {
+ assert(zc.length <= chunk_size);
+ total_mmap += zc.length;
+ if (xflg)
+ hash_zone(addr, zc.length);
+ total += zc.length;
+ }
+ if (zc.recv_skip_hint) {
+ assert(zc.recv_skip_hint <= chunk_size);
+ lu = read(fd, buffer, zc.recv_skip_hint);
+ if (lu > 0) {
+ if (xflg)
+ hash_zone(buffer, lu);
+ total += lu;
+ }
+ }
+ continue;
+ }
+ sub = 0;
+ while (sub < chunk_size) {
+ lu = read(fd, buffer + sub, chunk_size - sub);
+ if (lu == 0)
+ goto end;
+ if (lu < 0)
+ break;
+ if (xflg)
+ hash_zone(buffer + sub, lu);
+ total += lu;
+ sub += lu;
+ }
+ }
+end:
+ gettimeofday(&t1, NULL);
+ delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
+
+ throughput = 0;
+ if (delta_usec)
+ throughput = total * 8.0 / (double)delta_usec / 1000.0;
+ getrusage(RUSAGE_THREAD, &ru);
+ if (total > 1024*1024) {
+ unsigned long total_usec;
+ unsigned long mb = total >> 20;
+ total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec +
+ 1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
+ printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n"
+ " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n",
+ total / (1024.0 * 1024.0),
+ 100.0*total_mmap/total,
+ (double)delta_usec / 1000000.0,
+ throughput,
+ (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0,
+ (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0,
+ (double)total_usec/mb,
+ ru.ru_nvcsw);
+ }
+error:
+ free(buffer);
+ close(fd);
+ if (zflg)
+ munmap(addr, chunk_size);
+ pthread_exit(0);
+}
+
+static void apply_rcvsnd_buf(int fd)
+{
+ if (rcvbuf && setsockopt(fd, SOL_SOCKET,
+ SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) {
+ perror("setsockopt SO_RCVBUF");
+ }
+
+ if (sndbuf && setsockopt(fd, SOL_SOCKET,
+ SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) {
+ perror("setsockopt SO_SNDBUF");
+ }
+}
+
+
+static void setup_sockaddr(int domain, const char *str_addr,
+ struct sockaddr_storage *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+}
+
+static void do_accept(int fdlisten)
+{
+ if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
+ &chunk_size, sizeof(chunk_size)) == -1) {
+ perror("setsockopt SO_RCVLOWAT");
+ }
+
+ apply_rcvsnd_buf(fdlisten);
+
+ while (1) {
+ struct sockaddr_in addr;
+ socklen_t addrlen = sizeof(addr);
+ pthread_t th;
+ int fd, res;
+
+ fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen);
+ if (fd == -1) {
+ perror("accept");
+ continue;
+ }
+ res = pthread_create(&th, NULL, child_thread,
+ (void *)(unsigned long)fd);
+ if (res) {
+ errno = res;
+ perror("pthread_create");
+ close(fd);
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_storage listenaddr, addr;
+ unsigned int max_pacing_rate = 0;
+ unsigned long total = 0;
+ char *host = NULL;
+ int fd, c, on = 1;
+ char *buffer;
+ int sflg = 0;
+ int mss = 0;
+
+ while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'p':
+ cfg_port = atoi(optarg);
+ break;
+ case 'H':
+ host = optarg;
+ break;
+ case 's': /* server : listen for incoming connections */
+ sflg++;
+ break;
+ case 'r':
+ rcvbuf = atoi(optarg);
+ break;
+ case 'w':
+ sndbuf = atoi(optarg);
+ break;
+ case 'z':
+ zflg = 1;
+ break;
+ case 'M':
+ mss = atoi(optarg);
+ break;
+ case 'x':
+ xflg = 1;
+ break;
+ case 'k':
+ keepflag = 1;
+ break;
+ case 'P':
+ max_pacing_rate = atoi(optarg) ;
+ break;
+ default:
+ exit(1);
+ }
+ }
+ if (sflg) {
+ int fdlisten = socket(cfg_family, SOCK_STREAM, 0);
+
+ if (fdlisten == -1) {
+ perror("socket");
+ exit(1);
+ }
+ apply_rcvsnd_buf(fdlisten);
+ setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+
+ setup_sockaddr(cfg_family, host, &listenaddr);
+
+ if (mss &&
+ setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG,
+ &mss, sizeof(mss)) == -1) {
+ perror("setsockopt TCP_MAXSEG");
+ exit(1);
+ }
+ if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) {
+ perror("bind");
+ exit(1);
+ }
+ if (listen(fdlisten, 128) == -1) {
+ perror("listen");
+ exit(1);
+ }
+ do_accept(fdlisten);
+ }
+ buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (buffer == (char *)-1) {
+ perror("mmap");
+ exit(1);
+ }
+
+ fd = socket(cfg_family, SOCK_STREAM, 0);
+ if (fd == -1) {
+ perror("socket");
+ exit(1);
+ }
+ apply_rcvsnd_buf(fd);
+
+ setup_sockaddr(cfg_family, host, &addr);
+
+ if (mss &&
+ setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
+ perror("setsockopt TCP_MAXSEG");
+ exit(1);
+ }
+ if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) {
+ perror("connect");
+ exit(1);
+ }
+ if (max_pacing_rate &&
+ setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE,
+ &max_pacing_rate, sizeof(max_pacing_rate)) == -1)
+ perror("setsockopt SO_MAX_PACING_RATE");
+
+ if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY,
+ &on, sizeof(on)) == -1) {
+ perror("setsockopt SO_ZEROCOPY, (-z option disabled)");
+ zflg = 0;
+ }
+ while (total < FILE_SZ) {
+ long wr = FILE_SZ - total;
+
+ if (wr > chunk_size)
+ wr = chunk_size;
+ /* Note : we just want to fill the pipe with 0 bytes */
+ wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0);
+ if (wr <= 0)
+ break;
+ total += wr;
+ }
+ close(fd);
+ munmap(buffer, chunk_size);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/test_bpf.sh b/tools/testing/selftests/net/test_bpf.sh
new file mode 100755
index 000000000..65677909c
--- /dev/null
+++ b/tools/testing/selftests/net/test_bpf.sh
@@ -0,0 +1,11 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs bpf test using test_bpf kernel module
+
+if /sbin/modprobe -q test_bpf ; then
+ /sbin/modprobe -q -r test_bpf;
+ echo "test_bpf: ok";
+else
+ echo "test_bpf: [FAIL]";
+ exit 1;
+fi
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
new file mode 100644
index 000000000..7549d39cc
--- /dev/null
+++ b/tools/testing/selftests/net/tls.c
@@ -0,0 +1,741 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/tls.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+
+#include <sys/types.h>
+#include <sys/sendfile.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include "../kselftest_harness.h"
+
+#define TLS_PAYLOAD_MAX_LEN 16384
+#define SOL_TLS 282
+
+FIXTURE(tls)
+{
+ int fd, cfd;
+ bool notls;
+};
+
+FIXTURE_SETUP(tls)
+{
+ struct tls12_crypto_info_aes_gcm_128 tls12;
+ struct sockaddr_in addr;
+ socklen_t len;
+ int sfd, ret;
+
+ self->notls = false;
+ len = sizeof(addr);
+
+ memset(&tls12, 0, sizeof(tls12));
+ tls12.info.version = TLS_1_2_VERSION;
+ tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = 0;
+
+ self->fd = socket(AF_INET, SOCK_STREAM, 0);
+ sfd = socket(AF_INET, SOCK_STREAM, 0);
+
+ ret = bind(sfd, &addr, sizeof(addr));
+ ASSERT_EQ(ret, 0);
+ ret = listen(sfd, 10);
+ ASSERT_EQ(ret, 0);
+
+ ret = getsockname(sfd, &addr, &len);
+ ASSERT_EQ(ret, 0);
+
+ ret = connect(self->fd, &addr, sizeof(addr));
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ if (ret != 0) {
+ self->notls = true;
+ printf("Failure setting TCP_ULP, testing without tls\n");
+ }
+
+ if (!self->notls) {
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
+ sizeof(tls12));
+ ASSERT_EQ(ret, 0);
+ }
+
+ self->cfd = accept(sfd, &addr, &len);
+ ASSERT_GE(self->cfd, 0);
+
+ if (!self->notls) {
+ ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls",
+ sizeof("tls"));
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
+ sizeof(tls12));
+ ASSERT_EQ(ret, 0);
+ }
+
+ close(sfd);
+}
+
+FIXTURE_TEARDOWN(tls)
+{
+ close(self->fd);
+ close(self->cfd);
+}
+
+TEST_F(tls, sendfile)
+{
+ int filefd = open("/proc/self/exe", O_RDONLY);
+ struct stat st;
+
+ EXPECT_GE(filefd, 0);
+ fstat(filefd, &st);
+ EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+}
+
+TEST_F(tls, send_then_sendfile)
+{
+ int filefd = open("/proc/self/exe", O_RDONLY);
+ char const *test_str = "test_send";
+ int to_send = strlen(test_str) + 1;
+ char recv_buf[10];
+ struct stat st;
+ char *buf;
+
+ EXPECT_GE(filefd, 0);
+ fstat(filefd, &st);
+ buf = (char *)malloc(st.st_size);
+
+ EXPECT_EQ(send(self->fd, test_str, to_send, 0), to_send);
+ EXPECT_EQ(recv(self->cfd, recv_buf, to_send, 0), to_send);
+ EXPECT_EQ(memcmp(test_str, recv_buf, to_send), 0);
+
+ EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+ EXPECT_EQ(recv(self->cfd, buf, st.st_size, 0), st.st_size);
+}
+
+TEST_F(tls, recv_max)
+{
+ unsigned int send_len = TLS_PAYLOAD_MAX_LEN;
+ char recv_mem[TLS_PAYLOAD_MAX_LEN];
+ char buf[TLS_PAYLOAD_MAX_LEN];
+
+ EXPECT_GE(send(self->fd, buf, send_len, 0), 0);
+ EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
+ EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0);
+}
+
+TEST_F(tls, recv_small)
+{
+ char const *test_str = "test_read";
+ int send_len = 10;
+ char buf[10];
+
+ send_len = strlen(test_str) + 1;
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, msg_more)
+{
+ char const *test_str = "test_read";
+ int send_len = 10;
+ char buf[10 * 2];
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len * 2, MSG_DONTWAIT),
+ send_len * 2);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, sendmsg_single)
+{
+ struct msghdr msg;
+
+ char const *test_str = "test_sendmsg";
+ size_t send_len = 13;
+ struct iovec vec;
+ char buf[13];
+
+ vec.iov_base = (char *)test_str;
+ vec.iov_len = send_len;
+ memset(&msg, 0, sizeof(struct msghdr));
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, sendmsg_large)
+{
+ void *mem = malloc(16384);
+ size_t send_len = 16384;
+ size_t sends = 128;
+ struct msghdr msg;
+ size_t recvs = 0;
+ size_t sent = 0;
+
+ memset(&msg, 0, sizeof(struct msghdr));
+ while (sent++ < sends) {
+ struct iovec vec = { (void *)mem, send_len };
+
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len);
+ }
+
+ while (recvs++ < sends)
+ EXPECT_NE(recv(self->fd, mem, send_len, 0), -1);
+
+ free(mem);
+}
+
+TEST_F(tls, sendmsg_multiple)
+{
+ char const *test_str = "test_sendmsg_multiple";
+ struct iovec vec[5];
+ char *test_strs[5];
+ struct msghdr msg;
+ int total_len = 0;
+ int len_cmp = 0;
+ int iov_len = 5;
+ char *buf;
+ int i;
+
+ memset(&msg, 0, sizeof(struct msghdr));
+ for (i = 0; i < iov_len; i++) {
+ test_strs[i] = (char *)malloc(strlen(test_str) + 1);
+ snprintf(test_strs[i], strlen(test_str) + 1, "%s", test_str);
+ vec[i].iov_base = (void *)test_strs[i];
+ vec[i].iov_len = strlen(test_strs[i]) + 1;
+ total_len += vec[i].iov_len;
+ }
+ msg.msg_iov = vec;
+ msg.msg_iovlen = iov_len;
+
+ EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len);
+ buf = malloc(total_len);
+ EXPECT_NE(recv(self->fd, buf, total_len, 0), -1);
+ for (i = 0; i < iov_len; i++) {
+ EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp,
+ strlen(test_strs[i])),
+ 0);
+ len_cmp += strlen(buf + len_cmp) + 1;
+ }
+ for (i = 0; i < iov_len; i++)
+ free(test_strs[i]);
+ free(buf);
+}
+
+TEST_F(tls, sendmsg_multiple_stress)
+{
+ char const *test_str = "abcdefghijklmno";
+ struct iovec vec[1024];
+ char *test_strs[1024];
+ int iov_len = 1024;
+ int total_len = 0;
+ char buf[1 << 14];
+ struct msghdr msg;
+ int len_cmp = 0;
+ int i;
+
+ memset(&msg, 0, sizeof(struct msghdr));
+ for (i = 0; i < iov_len; i++) {
+ test_strs[i] = (char *)malloc(strlen(test_str) + 1);
+ snprintf(test_strs[i], strlen(test_str) + 1, "%s", test_str);
+ vec[i].iov_base = (void *)test_strs[i];
+ vec[i].iov_len = strlen(test_strs[i]) + 1;
+ total_len += vec[i].iov_len;
+ }
+ msg.msg_iov = vec;
+ msg.msg_iovlen = iov_len;
+
+ EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len);
+ EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1);
+
+ for (i = 0; i < iov_len; i++)
+ len_cmp += strlen(buf + len_cmp) + 1;
+
+ for (i = 0; i < iov_len; i++)
+ free(test_strs[i]);
+}
+
+TEST_F(tls, splice_from_pipe)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ int p[2];
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_GE(write(p[1], mem_send, send_len), 0);
+ EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), 0);
+ EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, splice_from_pipe2)
+{
+ int send_len = 16000;
+ char mem_send[16000];
+ char mem_recv[16000];
+ int p2[2];
+ int p[2];
+
+ ASSERT_GE(pipe(p), 0);
+ ASSERT_GE(pipe(p2), 0);
+ EXPECT_GE(write(p[1], mem_send, 8000), 0);
+ EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0);
+ EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0);
+ EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0);
+ EXPECT_GE(recv(self->cfd, mem_recv, send_len, 0), 0);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, send_and_splice)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ char const *test_str = "test_read";
+ int send_len2 = 10;
+ char buf[10];
+ int p[2];
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_EQ(send(self->fd, test_str, send_len2, 0), send_len2);
+ EXPECT_EQ(recv(self->cfd, buf, send_len2, MSG_WAITALL), send_len2);
+ EXPECT_EQ(memcmp(test_str, buf, send_len2), 0);
+
+ EXPECT_GE(write(p[1], mem_send, send_len), send_len);
+ EXPECT_GE(splice(p[0], NULL, self->fd, NULL, send_len, 0), send_len);
+
+ EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, splice_to_pipe)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ int p[2];
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_GE(send(self->fd, mem_send, send_len, 0), 0);
+ EXPECT_GE(splice(self->cfd, NULL, p[1], NULL, send_len, 0), 0);
+ EXPECT_GE(read(p[0], mem_recv, send_len), 0);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_single)
+{
+ char const *test_str = "test_recvmsg_single";
+ int send_len = strlen(test_str) + 1;
+ char buf[20];
+ struct msghdr hdr;
+ struct iovec vec;
+
+ memset(&hdr, 0, sizeof(hdr));
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ vec.iov_base = (char *)buf;
+ vec.iov_len = send_len;
+ hdr.msg_iovlen = 1;
+ hdr.msg_iov = &vec;
+ EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_single_max)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char send_mem[TLS_PAYLOAD_MAX_LEN];
+ char recv_mem[TLS_PAYLOAD_MAX_LEN];
+ struct iovec vec;
+ struct msghdr hdr;
+
+ EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len);
+ vec.iov_base = (char *)recv_mem;
+ vec.iov_len = TLS_PAYLOAD_MAX_LEN;
+
+ hdr.msg_iovlen = 1;
+ hdr.msg_iov = &vec;
+ EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+ EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0);
+}
+
+TEST_F(tls, recvmsg_multiple)
+{
+ unsigned int msg_iovlen = 1024;
+ unsigned int len_compared = 0;
+ struct iovec vec[1024];
+ char *iov_base[1024];
+ unsigned int iov_len = 16;
+ int send_len = 1 << 14;
+ char buf[1 << 14];
+ struct msghdr hdr;
+ int i;
+
+ EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len);
+ for (i = 0; i < msg_iovlen; i++) {
+ iov_base[i] = (char *)malloc(iov_len);
+ vec[i].iov_base = iov_base[i];
+ vec[i].iov_len = iov_len;
+ }
+
+ hdr.msg_iovlen = msg_iovlen;
+ hdr.msg_iov = vec;
+ EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
+ for (i = 0; i < msg_iovlen; i++)
+ len_compared += iov_len;
+
+ for (i = 0; i < msg_iovlen; i++)
+ free(iov_base[i]);
+}
+
+TEST_F(tls, single_send_multiple_recv)
+{
+ unsigned int total_len = TLS_PAYLOAD_MAX_LEN * 2;
+ unsigned int send_len = TLS_PAYLOAD_MAX_LEN;
+ char send_mem[TLS_PAYLOAD_MAX_LEN * 2];
+ char recv_mem[TLS_PAYLOAD_MAX_LEN * 2];
+
+ EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
+ memset(recv_mem, 0, total_len);
+
+ EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
+ EXPECT_NE(recv(self->cfd, recv_mem + send_len, send_len, 0), -1);
+ EXPECT_EQ(memcmp(send_mem, recv_mem, total_len), 0);
+}
+
+TEST_F(tls, multiple_send_single_recv)
+{
+ unsigned int total_len = 2 * 10;
+ unsigned int send_len = 10;
+ char recv_mem[2 * 10];
+ char send_mem[10];
+
+ EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
+ EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
+ memset(recv_mem, 0, total_len);
+ EXPECT_EQ(recv(self->cfd, recv_mem, total_len, 0), total_len);
+
+ EXPECT_EQ(memcmp(send_mem, recv_mem, send_len), 0);
+ EXPECT_EQ(memcmp(send_mem, recv_mem + send_len, send_len), 0);
+}
+
+TEST_F(tls, recv_partial)
+{
+ char const *test_str = "test_read_partial";
+ char const *test_str_first = "test_read";
+ char const *test_str_second = "_partial";
+ int send_len = strlen(test_str) + 1;
+ char recv_mem[18];
+
+ memset(recv_mem, 0, sizeof(recv_mem));
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), 0), -1);
+ EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0);
+ memset(recv_mem, 0, sizeof(recv_mem));
+ EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), 0), -1);
+ EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)),
+ 0);
+}
+
+TEST_F(tls, recv_nonblock)
+{
+ char buf[4096];
+ bool err;
+
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1);
+ err = (errno == EAGAIN || errno == EWOULDBLOCK);
+ EXPECT_EQ(err, true);
+}
+
+TEST_F(tls, recv_peek)
+{
+ char const *test_str = "test_read_peek";
+ int send_len = strlen(test_str) + 1;
+ char buf[15];
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+ memset(buf, 0, sizeof(buf));
+ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_peek_multiple)
+{
+ char const *test_str = "test_read_peek";
+ int send_len = strlen(test_str) + 1;
+ unsigned int num_peeks = 100;
+ char buf[15];
+ int i;
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ for (i = 0; i < num_peeks; i++) {
+ EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+ memset(buf, 0, sizeof(buf));
+ }
+ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_peek_multiple_records)
+{
+ char const *test_str = "test_read_peek_mult_recs";
+ char const *test_str_first = "test_read_peek";
+ char const *test_str_second = "_mult_recs";
+ int len;
+ char buf[64];
+
+ len = strlen(test_str_first);
+ EXPECT_EQ(send(self->fd, test_str_first, len, 0), len);
+
+ len = strlen(test_str_second) + 1;
+ EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
+ len = strlen(test_str_first);
+ memset(buf, 0, len);
+ EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
+
+ /* MSG_PEEK can only peek into the current record. */
+ len = strlen(test_str_first);
+ EXPECT_EQ(memcmp(test_str_first, buf, len), 0);
+
+ len = strlen(test_str) + 1;
+ memset(buf, 0, len);
+ EXPECT_EQ(recv(self->cfd, buf, len, MSG_WAITALL), len);
+
+ /* Non-MSG_PEEK will advance strparser (and therefore record)
+ * however.
+ */
+ len = strlen(test_str) + 1;
+ EXPECT_EQ(memcmp(test_str, buf, len), 0);
+
+ /* MSG_MORE will hold current record open, so later MSG_PEEK
+ * will see everything.
+ */
+ len = strlen(test_str_first);
+ EXPECT_EQ(send(self->fd, test_str_first, len, MSG_MORE), len);
+
+ len = strlen(test_str_second) + 1;
+ EXPECT_EQ(send(self->fd, test_str_second, len, 0), len);
+
+ len = strlen(test_str) + 1;
+ memset(buf, 0, len);
+ EXPECT_EQ(recv(self->cfd, buf, len, MSG_PEEK | MSG_WAITALL), len);
+
+ len = strlen(test_str) + 1;
+ EXPECT_EQ(memcmp(test_str, buf, len), 0);
+}
+
+TEST_F(tls, pollin)
+{
+ char const *test_str = "test_poll";
+ struct pollfd fd = { 0, 0, 0 };
+ char buf[10];
+ int send_len = 10;
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ fd.fd = self->cfd;
+ fd.events = POLLIN;
+
+ EXPECT_EQ(poll(&fd, 1, 20), 1);
+ EXPECT_EQ(fd.revents & POLLIN, 1);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ /* Test timing out */
+ EXPECT_EQ(poll(&fd, 1, 20), 0);
+}
+
+TEST_F(tls, poll_wait)
+{
+ char const *test_str = "test_poll_wait";
+ int send_len = strlen(test_str) + 1;
+ struct pollfd fd = { 0, 0, 0 };
+ char recv_mem[15];
+
+ fd.fd = self->cfd;
+ fd.events = POLLIN;
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ /* Set timeout to inf. secs */
+ EXPECT_EQ(poll(&fd, 1, -1), 1);
+ EXPECT_EQ(fd.revents & POLLIN, 1);
+ EXPECT_EQ(recv(self->cfd, recv_mem, send_len, 0), send_len);
+}
+
+TEST_F(tls, blocking)
+{
+ size_t data = 100000;
+ int res = fork();
+
+ EXPECT_NE(res, -1);
+
+ if (res) {
+ /* parent */
+ size_t left = data;
+ char buf[16384];
+ int status;
+ int pid2;
+
+ while (left) {
+ int res = send(self->fd, buf,
+ left > 16384 ? 16384 : left, 0);
+
+ EXPECT_GE(res, 0);
+ left -= res;
+ }
+
+ pid2 = wait(&status);
+ EXPECT_EQ(status, 0);
+ EXPECT_EQ(res, pid2);
+ } else {
+ /* child */
+ size_t left = data;
+ char buf[16384];
+
+ while (left) {
+ int res = recv(self->cfd, buf,
+ left > 16384 ? 16384 : left, 0);
+
+ EXPECT_GE(res, 0);
+ left -= res;
+ }
+ }
+}
+
+TEST_F(tls, nonblocking)
+{
+ size_t data = 100000;
+ int sendbuf = 100;
+ int flags;
+ int res;
+
+ flags = fcntl(self->fd, F_GETFL, 0);
+ fcntl(self->fd, F_SETFL, flags | O_NONBLOCK);
+ fcntl(self->cfd, F_SETFL, flags | O_NONBLOCK);
+
+ /* Ensure nonblocking behavior by imposing a small send
+ * buffer.
+ */
+ EXPECT_EQ(setsockopt(self->fd, SOL_SOCKET, SO_SNDBUF,
+ &sendbuf, sizeof(sendbuf)), 0);
+
+ res = fork();
+ EXPECT_NE(res, -1);
+
+ if (res) {
+ /* parent */
+ bool eagain = false;
+ size_t left = data;
+ char buf[16384];
+ int status;
+ int pid2;
+
+ while (left) {
+ int res = send(self->fd, buf,
+ left > 16384 ? 16384 : left, 0);
+
+ if (res == -1 && errno == EAGAIN) {
+ eagain = true;
+ usleep(10000);
+ continue;
+ }
+ EXPECT_GE(res, 0);
+ left -= res;
+ }
+
+ EXPECT_TRUE(eagain);
+ pid2 = wait(&status);
+
+ EXPECT_EQ(status, 0);
+ EXPECT_EQ(res, pid2);
+ } else {
+ /* child */
+ bool eagain = false;
+ size_t left = data;
+ char buf[16384];
+
+ while (left) {
+ int res = recv(self->cfd, buf,
+ left > 16384 ? 16384 : left, 0);
+
+ if (res == -1 && errno == EAGAIN) {
+ eagain = true;
+ usleep(10000);
+ continue;
+ }
+ EXPECT_GE(res, 0);
+ left -= res;
+ }
+ EXPECT_TRUE(eagain);
+ }
+}
+
+TEST_F(tls, control_msg)
+{
+ if (self->notls)
+ return;
+
+ char cbuf[CMSG_SPACE(sizeof(char))];
+ char const *test_str = "test_read";
+ int cmsg_len = sizeof(char);
+ char record_type = 100;
+ struct cmsghdr *cmsg;
+ struct msghdr msg;
+ int send_len = 10;
+ struct iovec vec;
+ char buf[10];
+
+ vec.iov_base = (char *)test_str;
+ vec.iov_len = 10;
+ memset(&msg, 0, sizeof(struct msghdr));
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_TLS;
+ /* test sending non-record types. */
+ cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
+ cmsg->cmsg_len = CMSG_LEN(cmsg_len);
+ *CMSG_DATA(cmsg) = record_type;
+ msg.msg_controllen = cmsg->cmsg_len;
+
+ EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+ /* Should fail because we didn't provide a control message */
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+
+ vec.iov_base = buf;
+ EXPECT_EQ(recvmsg(self->cfd, &msg, 0), send_len);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ EXPECT_NE(cmsg, NULL);
+ EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
+ EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
+ record_type = *((unsigned char *)CMSG_DATA(cmsg));
+ EXPECT_EQ(record_type, 100);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
new file mode 100644
index 000000000..23177b643
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.c
@@ -0,0 +1,685 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <net/if.h>
+#include <linux/in.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT 103
+#endif
+
+#ifndef UDP_MAX_SEGMENTS
+#define UDP_MAX_SEGMENTS (1 << 6UL)
+#endif
+
+#define CONST_MTU_TEST 1500
+
+#define CONST_HDRLEN_V4 (sizeof(struct iphdr) + sizeof(struct udphdr))
+#define CONST_HDRLEN_V6 (sizeof(struct ip6_hdr) + sizeof(struct udphdr))
+
+#define CONST_MSS_V4 (CONST_MTU_TEST - CONST_HDRLEN_V4)
+#define CONST_MSS_V6 (CONST_MTU_TEST - CONST_HDRLEN_V6)
+
+#define CONST_MAX_SEGS_V4 (ETH_MAX_MTU / CONST_MSS_V4)
+#define CONST_MAX_SEGS_V6 (ETH_MAX_MTU / CONST_MSS_V6)
+
+static bool cfg_do_ipv4;
+static bool cfg_do_ipv6;
+static bool cfg_do_connected;
+static bool cfg_do_connectionless;
+static bool cfg_do_msgmore;
+static bool cfg_do_setsockopt;
+static int cfg_specific_test_id = -1;
+
+static const char cfg_ifname[] = "lo";
+static unsigned short cfg_port = 9000;
+
+static char buf[ETH_MAX_MTU];
+
+struct testcase {
+ int tlen; /* send() buffer size, may exceed mss */
+ bool tfail; /* send() call is expected to fail */
+ int gso_len; /* mss after applying gso */
+ int r_num_mss; /* recv(): number of calls of full mss */
+ int r_len_last; /* recv(): size of last non-mss dgram, if any */
+};
+
+const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
+const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+
+struct testcase testcases_v4[] = {
+ {
+ /* no GSO: send a single byte */
+ .tlen = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* no GSO: send a single MSS */
+ .tlen = CONST_MSS_V4,
+ .r_num_mss = 1,
+ },
+ {
+ /* no GSO: send a single MSS + 1B: fail */
+ .tlen = CONST_MSS_V4 + 1,
+ .tfail = true,
+ },
+ {
+ /* send a single MSS: will fall back to no GSO */
+ .tlen = CONST_MSS_V4,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 1,
+ },
+ {
+ /* send a single MSS + 1B */
+ .tlen = CONST_MSS_V4 + 1,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* send exactly 2 MSS */
+ .tlen = CONST_MSS_V4 * 2,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2 MSS + 1B */
+ .tlen = (CONST_MSS_V4 * 2) + 1,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send MAX segs */
+ .tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4),
+ },
+
+ {
+ /* send MAX bytes */
+ .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4,
+ .gso_len = CONST_MSS_V4,
+ .r_num_mss = CONST_MAX_SEGS_V4,
+ .r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 -
+ (CONST_MAX_SEGS_V4 * CONST_MSS_V4),
+ },
+ {
+ /* send MAX + 1: fail */
+ .tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1,
+ .gso_len = CONST_MSS_V4,
+ .tfail = true,
+ },
+ {
+ /* send a single 1B MSS: will fall back to no GSO */
+ .tlen = 1,
+ .gso_len = 1,
+ .r_num_mss = 1,
+ },
+ {
+ /* send 2 1B segments */
+ .tlen = 2,
+ .gso_len = 1,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2B + 2B + 1B segments */
+ .tlen = 5,
+ .gso_len = 2,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send max number of min sized segments */
+ .tlen = UDP_MAX_SEGMENTS,
+ .gso_len = 1,
+ .r_num_mss = UDP_MAX_SEGMENTS,
+ },
+ {
+ /* send max number + 1 of min sized segments: fail */
+ .tlen = UDP_MAX_SEGMENTS + 1,
+ .gso_len = 1,
+ .tfail = true,
+ },
+ {
+ /* EOL */
+ }
+};
+
+#ifndef IP6_MAX_MTU
+#define IP6_MAX_MTU (ETH_MAX_MTU + sizeof(struct ip6_hdr))
+#endif
+
+struct testcase testcases_v6[] = {
+ {
+ /* no GSO: send a single byte */
+ .tlen = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* no GSO: send a single MSS */
+ .tlen = CONST_MSS_V6,
+ .r_num_mss = 1,
+ },
+ {
+ /* no GSO: send a single MSS + 1B: fail */
+ .tlen = CONST_MSS_V6 + 1,
+ .tfail = true,
+ },
+ {
+ /* send a single MSS: will fall back to no GSO */
+ .tlen = CONST_MSS_V6,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 1,
+ },
+ {
+ /* send a single MSS + 1B */
+ .tlen = CONST_MSS_V6 + 1,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 1,
+ .r_len_last = 1,
+ },
+ {
+ /* send exactly 2 MSS */
+ .tlen = CONST_MSS_V6 * 2,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2 MSS + 1B */
+ .tlen = (CONST_MSS_V6 * 2) + 1,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send MAX segs */
+ .tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6),
+ },
+
+ {
+ /* send MAX bytes */
+ .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6,
+ .gso_len = CONST_MSS_V6,
+ .r_num_mss = CONST_MAX_SEGS_V6,
+ .r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 -
+ (CONST_MAX_SEGS_V6 * CONST_MSS_V6),
+ },
+ {
+ /* send MAX + 1: fail */
+ .tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1,
+ .gso_len = CONST_MSS_V6,
+ .tfail = true,
+ },
+ {
+ /* send a single 1B MSS: will fall back to no GSO */
+ .tlen = 1,
+ .gso_len = 1,
+ .r_num_mss = 1,
+ },
+ {
+ /* send 2 1B segments */
+ .tlen = 2,
+ .gso_len = 1,
+ .r_num_mss = 2,
+ },
+ {
+ /* send 2B + 2B + 1B segments */
+ .tlen = 5,
+ .gso_len = 2,
+ .r_num_mss = 2,
+ .r_len_last = 1,
+ },
+ {
+ /* send max number of min sized segments */
+ .tlen = UDP_MAX_SEGMENTS,
+ .gso_len = 1,
+ .r_num_mss = UDP_MAX_SEGMENTS,
+ },
+ {
+ /* send max number + 1 of min sized segments: fail */
+ .tlen = UDP_MAX_SEGMENTS + 1,
+ .gso_len = 1,
+ .tfail = true,
+ },
+ {
+ /* EOL */
+ }
+};
+
+static unsigned int get_device_mtu(int fd, const char *ifname)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ strcpy(ifr.ifr_name, ifname);
+
+ if (ioctl(fd, SIOCGIFMTU, &ifr))
+ error(1, errno, "ioctl get mtu");
+
+ return ifr.ifr_mtu;
+}
+
+static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_mtu = mtu;
+ strcpy(ifr.ifr_name, ifname);
+
+ if (ioctl(fd, SIOCSIFMTU, &ifr))
+ error(1, errno, "ioctl set mtu");
+}
+
+static void set_device_mtu(int fd, int mtu)
+{
+ int val;
+
+ val = get_device_mtu(fd, cfg_ifname);
+ fprintf(stderr, "device mtu (orig): %u\n", val);
+
+ __set_device_mtu(fd, cfg_ifname, mtu);
+ val = get_device_mtu(fd, cfg_ifname);
+ if (val != mtu)
+ error(1, 0, "unable to set device mtu to %u\n", val);
+
+ fprintf(stderr, "device mtu (test): %u\n", val);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+ int level, name, val;
+
+ if (is_ipv4) {
+ level = SOL_IP;
+ name = IP_MTU_DISCOVER;
+ val = IP_PMTUDISC_DO;
+ } else {
+ level = SOL_IPV6;
+ name = IPV6_MTU_DISCOVER;
+ val = IPV6_PMTUDISC_DO;
+ }
+
+ if (setsockopt(fd, level, name, &val, sizeof(val)))
+ error(1, errno, "setsockopt path mtu");
+}
+
+static unsigned int get_path_mtu(int fd, bool is_ipv4)
+{
+ socklen_t vallen;
+ unsigned int mtu;
+ int ret;
+
+ vallen = sizeof(mtu);
+ if (is_ipv4)
+ ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen);
+ else
+ ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen);
+
+ if (ret)
+ error(1, errno, "getsockopt mtu");
+
+
+ fprintf(stderr, "path mtu (read): %u\n", mtu);
+ return mtu;
+}
+
+/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
+static void set_route_mtu(int mtu, bool is_ipv4)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ struct rtmsg *rt;
+ char data[NLMSG_ALIGN(sizeof(*nh)) +
+ NLMSG_ALIGN(sizeof(*rt)) +
+ NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
+ NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
+ NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
+ int fd, ret, alen, off = 0;
+
+ alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (fd == -1)
+ error(1, errno, "socket netlink");
+
+ memset(data, 0, sizeof(data));
+
+ nh = (void *)data;
+ nh->nlmsg_type = RTM_NEWROUTE;
+ nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
+ off += NLMSG_ALIGN(sizeof(*nh));
+
+ rt = (void *)(data + off);
+ rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
+ rt->rtm_table = RT_TABLE_MAIN;
+ rt->rtm_dst_len = alen << 3;
+ rt->rtm_protocol = RTPROT_BOOT;
+ rt->rtm_scope = RT_SCOPE_UNIVERSE;
+ rt->rtm_type = RTN_UNICAST;
+ off += NLMSG_ALIGN(sizeof(*rt));
+
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_DST;
+ rta->rta_len = RTA_LENGTH(alen);
+ if (is_ipv4)
+ memcpy(RTA_DATA(rta), &addr4, alen);
+ else
+ memcpy(RTA_DATA(rta), &addr6, alen);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_OIF;
+ rta->rta_len = RTA_LENGTH(sizeof(int));
+ *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* MTU is a subtype in a metrics type */
+ rta = (void *)(data + off);
+ rta->rta_type = RTA_METRICS;
+ rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* now fill MTU subtype. Note that it fits within above rta_len */
+ rta = (void *)(((char *) rta) + RTA_LENGTH(0));
+ rta->rta_type = RTAX_MTU;
+ rta->rta_len = RTA_LENGTH(sizeof(int));
+ *((int *)(RTA_DATA(rta))) = mtu;
+
+ nh->nlmsg_len = off;
+
+ ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
+ if (ret != off)
+ error(1, errno, "send netlink: %uB != %uB\n", ret, off);
+
+ if (close(fd))
+ error(1, errno, "close netlink");
+
+ fprintf(stderr, "route mtu (test): %u\n", mtu);
+}
+
+static bool __send_one(int fd, struct msghdr *msg, int flags)
+{
+ int ret;
+
+ ret = sendmsg(fd, msg, flags);
+ if (ret == -1 &&
+ (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL))
+ return false;
+ if (ret == -1)
+ error(1, errno, "sendmsg");
+ if (ret != msg->msg_iov->iov_len)
+ error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
+ if (msg->msg_flags)
+ error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
+
+ return true;
+}
+
+static bool send_one(int fd, int len, int gso_len,
+ struct sockaddr *addr, socklen_t alen)
+{
+ char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ struct cmsghdr *cm;
+
+ iov.iov_base = buf;
+ iov.iov_len = len;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ msg.msg_name = addr;
+ msg.msg_namelen = alen;
+
+ if (gso_len && !cfg_do_setsockopt) {
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ cm = CMSG_FIRSTHDR(&msg);
+ cm->cmsg_level = SOL_UDP;
+ cm->cmsg_type = UDP_SEGMENT;
+ cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+ *((uint16_t *) CMSG_DATA(cm)) = gso_len;
+ }
+
+ /* If MSG_MORE, send 1 byte followed by remainder */
+ if (cfg_do_msgmore && len > 1) {
+ iov.iov_len = 1;
+ if (!__send_one(fd, &msg, MSG_MORE))
+ error(1, 0, "send 1B failed");
+
+ iov.iov_base++;
+ iov.iov_len = len - 1;
+ }
+
+ return __send_one(fd, &msg, 0);
+}
+
+static int recv_one(int fd, int flags)
+{
+ int ret;
+
+ ret = recv(fd, buf, sizeof(buf), flags);
+ if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT))
+ return 0;
+ if (ret == -1)
+ error(1, errno, "recv");
+
+ return ret;
+}
+
+static void run_one(struct testcase *test, int fdt, int fdr,
+ struct sockaddr *addr, socklen_t alen)
+{
+ int i, ret, val, mss;
+ bool sent;
+
+ fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
+ addr->sa_family == AF_INET ? 4 : 6,
+ test->tlen, test->gso_len,
+ test->tfail ? "(fail)" : "");
+
+ val = test->gso_len;
+ if (cfg_do_setsockopt) {
+ if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
+ error(1, errno, "setsockopt udp segment");
+ }
+
+ sent = send_one(fdt, test->tlen, test->gso_len, addr, alen);
+ if (sent && test->tfail)
+ error(1, 0, "send succeeded while expecting failure");
+ if (!sent && !test->tfail)
+ error(1, 0, "send failed while expecting success");
+ if (!sent)
+ return;
+
+ if (test->gso_len)
+ mss = test->gso_len;
+ else
+ mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
+
+
+ /* Recv all full MSS datagrams */
+ for (i = 0; i < test->r_num_mss; i++) {
+ ret = recv_one(fdr, 0);
+ if (ret != mss)
+ error(1, 0, "recv.%d: %d != %d", i, ret, mss);
+ }
+
+ /* Recv the non-full last datagram, if tlen was not a multiple of mss */
+ if (test->r_len_last) {
+ ret = recv_one(fdr, 0);
+ if (ret != test->r_len_last)
+ error(1, 0, "recv.%d: %d != %d (last)",
+ i, ret, test->r_len_last);
+ }
+
+ /* Verify received all data */
+ ret = recv_one(fdr, MSG_DONTWAIT);
+ if (ret)
+ error(1, 0, "recv: unexpected datagram");
+}
+
+static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen)
+{
+ struct testcase *tests, *test;
+
+ tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6;
+
+ for (test = tests; test->tlen; test++) {
+ /* if a specific test is given, then skip all others */
+ if (cfg_specific_test_id == -1 ||
+ cfg_specific_test_id == test - tests)
+ run_one(test, fdt, fdr, addr, alen);
+ }
+}
+
+static void run_test(struct sockaddr *addr, socklen_t alen)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ int fdr, fdt, val;
+
+ fdr = socket(addr->sa_family, SOCK_DGRAM, 0);
+ if (fdr == -1)
+ error(1, errno, "socket r");
+
+ if (bind(fdr, addr, alen))
+ error(1, errno, "bind");
+
+ /* Have tests fail quickly instead of hang */
+ if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcv timeout");
+
+ fdt = socket(addr->sa_family, SOCK_DGRAM, 0);
+ if (fdt == -1)
+ error(1, errno, "socket t");
+
+ /* Do not fragment these datagrams: only succeed if GSO works */
+ set_pmtu_discover(fdt, addr->sa_family == AF_INET);
+
+ if (cfg_do_connectionless) {
+ set_device_mtu(fdt, CONST_MTU_TEST);
+ run_all(fdt, fdr, addr, alen);
+ }
+
+ if (cfg_do_connected) {
+ set_device_mtu(fdt, CONST_MTU_TEST + 100);
+ set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
+
+ if (connect(fdt, addr, alen))
+ error(1, errno, "connect");
+
+ val = get_path_mtu(fdt, addr->sa_family == AF_INET);
+ if (val != CONST_MTU_TEST)
+ error(1, 0, "bad path mtu %u\n", val);
+
+ run_all(fdt, fdr, addr, 0 /* use connected addr */);
+ }
+
+ if (close(fdt))
+ error(1, errno, "close t");
+ if (close(fdr))
+ error(1, errno, "close r");
+}
+
+static void run_test_v4(void)
+{
+ struct sockaddr_in addr = {0};
+
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(cfg_port);
+ addr.sin_addr = addr4;
+
+ run_test((void *)&addr, sizeof(addr));
+}
+
+static void run_test_v6(void)
+{
+ struct sockaddr_in6 addr = {0};
+
+ addr.sin6_family = AF_INET6;
+ addr.sin6_port = htons(cfg_port);
+ addr.sin6_addr = addr6;
+
+ run_test((void *)&addr, sizeof(addr));
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
+ switch (c) {
+ case '4':
+ cfg_do_ipv4 = true;
+ break;
+ case '6':
+ cfg_do_ipv6 = true;
+ break;
+ case 'c':
+ cfg_do_connected = true;
+ break;
+ case 'C':
+ cfg_do_connectionless = true;
+ break;
+ case 'm':
+ cfg_do_msgmore = true;
+ break;
+ case 's':
+ cfg_do_setsockopt = true;
+ break;
+ case 't':
+ cfg_specific_test_id = strtoul(optarg, NULL, 0);
+ break;
+ default:
+ error(1, 0, "%s: parse error", argv[0]);
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ if (cfg_do_ipv4)
+ run_test_v4();
+ if (cfg_do_ipv6)
+ run_test_v6();
+
+ fprintf(stderr, "OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
new file mode 100755
index 000000000..fec24f584
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso regression tests
+
+echo "ipv4 cmsg"
+./in_netns.sh ./udpgso -4 -C
+
+echo "ipv4 setsockopt"
+./in_netns.sh ./udpgso -4 -C -s
+
+echo "ipv6 cmsg"
+./in_netns.sh ./udpgso -6 -C
+
+echo "ipv6 setsockopt"
+./in_netns.sh ./udpgso -6 -C -s
+
+echo "ipv4 connected"
+./in_netns.sh ./udpgso -4 -c
+
+# blocked on 2nd loopback address
+# echo "ipv6 connected"
+# ./in_netns.sh ./udpgso -6 -c
+
+echo "ipv4 msg_more"
+./in_netns.sh ./udpgso -4 -C -m
+
+echo "ipv6 msg_more"
+./in_netns.sh ./udpgso -6 -C -m
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
new file mode 100755
index 000000000..99e537ab5
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso benchmarks
+
+wake_children() {
+ local -r jobs="$(jobs -p)"
+
+ if [[ "${jobs}" != "" ]]; then
+ kill -1 ${jobs} 2>/dev/null
+ fi
+}
+trap wake_children EXIT
+
+run_one() {
+ local -r args=$@
+
+ ./udpgso_bench_rx &
+ ./udpgso_bench_rx -t &
+
+ ./udpgso_bench_tx ${args}
+}
+
+run_in_netns() {
+ local -r args=$@
+
+ ./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+ local -r args=$@
+
+ echo "udp"
+ run_in_netns ${args}
+
+ echo "udp gso"
+ run_in_netns ${args} -S
+}
+
+run_tcp() {
+ local -r args=$@
+
+ echo "tcp"
+ run_in_netns ${args} -t
+
+ echo "tcp zerocopy"
+ run_in_netns ${args} -t -z
+}
+
+run_all() {
+ local -r core_args="-l 4"
+ local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
+ local -r ipv6_args="${core_args} -6 -D ::1"
+
+ echo "ipv4"
+ run_tcp "${ipv4_args}"
+ run_udp "${ipv4_args}"
+
+ echo "ipv6"
+ run_tcp "${ipv4_args}"
+ run_udp "${ipv6_args}"
+}
+
+if [[ $# -eq 0 ]]; then
+ run_all
+elif [[ $1 == "__subprocess" ]]; then
+ shift
+ run_one $@
+else
+ run_in_netns $@
+fi
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
new file mode 100644
index 000000000..727cf67a3
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static int cfg_port = 8000;
+static bool cfg_tcp;
+static bool cfg_verify;
+
+static bool interrupted;
+static unsigned long packets, bytes;
+
+static void sigint_handler(int signum)
+{
+ if (signum == SIGINT)
+ interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_poll(int fd)
+{
+ struct pollfd pfd;
+ int ret;
+
+ pfd.events = POLLIN;
+ pfd.revents = 0;
+ pfd.fd = fd;
+
+ do {
+ ret = poll(&pfd, 1, 10);
+ if (ret == -1)
+ error(1, errno, "poll");
+ if (ret == 0)
+ continue;
+ if (pfd.revents != POLLIN)
+ error(1, errno, "poll: 0x%x expected 0x%x\n",
+ pfd.revents, POLLIN);
+ } while (!ret && !interrupted);
+}
+
+static int do_socket(bool do_tcp)
+{
+ struct sockaddr_in6 addr = {0};
+ int fd, val;
+
+ fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket");
+
+ val = 1 << 21;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)))
+ error(1, errno, "setsockopt rcvbuf");
+ val = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
+ error(1, errno, "setsockopt reuseport");
+
+ addr.sin6_family = PF_INET6;
+ addr.sin6_port = htons(cfg_port);
+ addr.sin6_addr = in6addr_any;
+ if (bind(fd, (void *) &addr, sizeof(addr)))
+ error(1, errno, "bind");
+
+ if (do_tcp) {
+ int accept_fd = fd;
+
+ if (listen(accept_fd, 1))
+ error(1, errno, "listen");
+
+ do_poll(accept_fd);
+
+ fd = accept(accept_fd, NULL, NULL);
+ if (fd == -1)
+ error(1, errno, "accept");
+ if (close(accept_fd))
+ error(1, errno, "close accept fd");
+ }
+
+ return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+ int ret;
+
+ while (true) {
+ /* MSG_TRUNC flushes up to len bytes */
+ ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+ if (ret == -1)
+ error(1, errno, "flush");
+ if (ret == 0) {
+ /* client detached */
+ exit(0);
+ }
+
+ packets++;
+ bytes += ret;
+ }
+
+}
+
+static char sanitized_char(char val)
+{
+ return (val >= 'a' && val <= 'z') ? val : '.';
+}
+
+static void do_verify_udp(const char *data, int len)
+{
+ char cur = data[0];
+ int i;
+
+ /* verify contents */
+ if (cur < 'a' || cur > 'z')
+ error(1, 0, "data initial byte out of range");
+
+ for (i = 1; i < len; i++) {
+ if (cur == 'z')
+ cur = 'a';
+ else
+ cur++;
+
+ if (data[i] != cur)
+ error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
+ i, len,
+ sanitized_char(data[i]), data[i],
+ sanitized_char(cur), cur);
+ }
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_udp(int fd)
+{
+ static char rbuf[ETH_DATA_LEN];
+ int ret, len, budget = 256;
+
+ len = cfg_verify ? sizeof(rbuf) : 0;
+ while (budget--) {
+ /* MSG_TRUNC will make return value full datagram length */
+ ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ return;
+ if (ret == -1)
+ error(1, errno, "recv");
+ if (len) {
+ if (ret == 0)
+ error(1, errno, "recv: 0 byte datagram\n");
+
+ do_verify_udp(rbuf, ret);
+ }
+
+ packets++;
+ bytes += ret;
+ }
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [-tv] [-p port]", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "ptv")) != -1) {
+ switch (c) {
+ case 'p':
+ cfg_port = htons(strtoul(optarg, NULL, 0));
+ break;
+ case 't':
+ cfg_tcp = true;
+ break;
+ case 'v':
+ cfg_verify = true;
+ break;
+ }
+ }
+
+ if (optind != argc)
+ usage(argv[0]);
+
+ if (cfg_tcp && cfg_verify)
+ error(1, 0, "TODO: implement verify mode for tcp");
+}
+
+static void do_recv(void)
+{
+ unsigned long tnow, treport;
+ int fd;
+
+ fd = do_socket(cfg_tcp);
+
+ treport = gettimeofday_ms() + 1000;
+ do {
+ do_poll(fd);
+
+ if (cfg_tcp)
+ do_flush_tcp(fd);
+ else
+ do_flush_udp(fd);
+
+ tnow = gettimeofday_ms();
+ if (tnow > treport) {
+ if (packets)
+ fprintf(stderr,
+ "%s rx: %6lu MB/s %8lu calls/s\n",
+ cfg_tcp ? "tcp" : "udp",
+ bytes >> 20, packets);
+ bytes = packets = 0;
+ treport = tnow + 1000;
+ }
+
+ } while (!interrupted);
+
+ if (close(fd))
+ error(1, errno, "close");
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ signal(SIGINT, sigint_handler);
+
+ do_recv();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
new file mode 100644
index 000000000..463a2cbd0
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT 103
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY 60
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY 0x4000000
+#endif
+
+#define NUM_PKT 100
+
+static bool cfg_cache_trash;
+static int cfg_cpu = -1;
+static int cfg_connected = true;
+static int cfg_family = PF_UNSPEC;
+static uint16_t cfg_mss;
+static int cfg_payload_len = (1472 * 42);
+static int cfg_port = 8000;
+static int cfg_runtime_ms = -1;
+static bool cfg_segment;
+static bool cfg_sendmmsg;
+static bool cfg_tcp;
+static bool cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static bool interrupted;
+static char buf[NUM_PKT][ETH_MAX_MTU];
+
+static void sigint_handler(int signum)
+{
+ if (signum == SIGINT)
+ interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int set_cpu(int cpu)
+{
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask))
+ error(1, 0, "setaffinity %d", cpu);
+
+ return 0;
+}
+
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+}
+
+static void flush_zerocopy(int fd)
+{
+ struct msghdr msg = {0}; /* flush */
+ int ret;
+
+ while (1) {
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret == -1 && errno == EAGAIN)
+ break;
+ if (ret == -1)
+ error(1, errno, "errqueue");
+ if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
+ error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+ msg.msg_flags = 0;
+ }
+}
+
+static int send_tcp(int fd, char *data)
+{
+ int ret, done = 0, count = 0;
+
+ while (done < cfg_payload_len) {
+ ret = send(fd, data + done, cfg_payload_len - done,
+ cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "write");
+
+ done += ret;
+ count++;
+ }
+
+ return count;
+}
+
+static int send_udp(int fd, char *data)
+{
+ int ret, total_len, len, count = 0;
+
+ total_len = cfg_payload_len;
+
+ while (total_len) {
+ len = total_len < cfg_mss ? total_len : cfg_mss;
+
+ ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
+ cfg_connected ? NULL : (void *)&cfg_dst_addr,
+ cfg_connected ? 0 : cfg_alen);
+ if (ret == -1)
+ error(1, errno, "write");
+ if (ret != len)
+ error(1, errno, "write: %uB != %uB\n", ret, len);
+
+ total_len -= len;
+ count++;
+ }
+
+ return count;
+}
+
+static int send_udp_sendmmsg(int fd, char *data)
+{
+ const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
+ struct mmsghdr mmsgs[max_nr_msg];
+ struct iovec iov[max_nr_msg];
+ unsigned int off = 0, left;
+ int i = 0, ret;
+
+ memset(mmsgs, 0, sizeof(mmsgs));
+
+ left = cfg_payload_len;
+ while (left) {
+ if (i == max_nr_msg)
+ error(1, 0, "sendmmsg: exceeds max_nr_msg");
+
+ iov[i].iov_base = data + off;
+ iov[i].iov_len = cfg_mss < left ? cfg_mss : left;
+
+ mmsgs[i].msg_hdr.msg_iov = iov + i;
+ mmsgs[i].msg_hdr.msg_iovlen = 1;
+
+ off += iov[i].iov_len;
+ left -= iov[i].iov_len;
+ i++;
+ }
+
+ ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "sendmmsg");
+
+ return ret;
+}
+
+static void send_udp_segment_cmsg(struct cmsghdr *cm)
+{
+ uint16_t *valp;
+
+ cm->cmsg_level = SOL_UDP;
+ cm->cmsg_type = UDP_SEGMENT;
+ cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss));
+ valp = (void *)CMSG_DATA(cm);
+ *valp = cfg_mss;
+}
+
+static int send_udp_segment(int fd, char *data)
+{
+ char control[CMSG_SPACE(sizeof(cfg_mss))] = {0};
+ struct msghdr msg = {0};
+ struct iovec iov = {0};
+ int ret;
+
+ iov.iov_base = data;
+ iov.iov_len = cfg_payload_len;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+ send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
+
+ msg.msg_name = (void *)&cfg_dst_addr;
+ msg.msg_namelen = cfg_alen;
+
+ ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+ if (ret == -1)
+ error(1, errno, "sendmsg");
+ if (ret != iov.iov_len)
+ error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
+
+ return 1;
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]",
+ filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ const char *bind_addr = NULL;
+ int max_len, hdrlen;
+ int c;
+
+ while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) {
+ switch (c) {
+ case '4':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'c':
+ cfg_cache_trash = true;
+ break;
+ case 'C':
+ cfg_cpu = strtol(optarg, NULL, 0);
+ break;
+ case 'D':
+ bind_addr = optarg;
+ break;
+ case 'l':
+ cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
+ break;
+ case 'm':
+ cfg_sendmmsg = true;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 's':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 'S':
+ cfg_segment = true;
+ break;
+ case 't':
+ cfg_tcp = true;
+ break;
+ case 'u':
+ cfg_connected = false;
+ break;
+ case 'z':
+ cfg_zerocopy = true;
+ break;
+ }
+ }
+
+ if (!bind_addr)
+ bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
+
+ setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr);
+
+ if (optind != argc)
+ usage(argv[0]);
+
+ if (cfg_family == PF_UNSPEC)
+ error(1, 0, "must pass one of -4 or -6");
+ if (cfg_tcp && !cfg_connected)
+ error(1, 0, "connectionless tcp makes no sense");
+ if (cfg_segment && cfg_sendmmsg)
+ error(1, 0, "cannot combine segment offload and sendmmsg");
+
+ if (cfg_family == PF_INET)
+ hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
+ else
+ hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr);
+
+ cfg_mss = ETH_DATA_LEN - hdrlen;
+ max_len = ETH_MAX_MTU - hdrlen;
+
+ if (cfg_payload_len > max_len)
+ error(1, 0, "payload length %u exceeds max %u",
+ cfg_payload_len, max_len);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+ int level, name, val;
+
+ if (is_ipv4) {
+ level = SOL_IP;
+ name = IP_MTU_DISCOVER;
+ val = IP_PMTUDISC_DO;
+ } else {
+ level = SOL_IPV6;
+ name = IPV6_MTU_DISCOVER;
+ val = IPV6_PMTUDISC_DO;
+ }
+
+ if (setsockopt(fd, level, name, &val, sizeof(val)))
+ error(1, errno, "setsockopt path mtu");
+}
+
+int main(int argc, char **argv)
+{
+ unsigned long num_msgs, num_sends;
+ unsigned long tnow, treport, tstop;
+ int fd, i, val;
+
+ parse_opts(argc, argv);
+
+ if (cfg_cpu > 0)
+ set_cpu(cfg_cpu);
+
+ for (i = 0; i < sizeof(buf[0]); i++)
+ buf[0][i] = 'a' + (i % 26);
+ for (i = 1; i < NUM_PKT; i++)
+ memcpy(buf[i], buf[0], sizeof(buf[0]));
+
+ signal(SIGINT, sigint_handler);
+
+ fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket");
+
+ if (cfg_zerocopy) {
+ val = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
+ error(1, errno, "setsockopt zerocopy");
+ }
+
+ if (cfg_connected &&
+ connect(fd, (void *)&cfg_dst_addr, cfg_alen))
+ error(1, errno, "connect");
+
+ if (cfg_segment)
+ set_pmtu_discover(fd, cfg_family == PF_INET);
+
+ num_msgs = num_sends = 0;
+ tnow = gettimeofday_ms();
+ tstop = tnow + cfg_runtime_ms;
+ treport = tnow + 1000;
+
+ i = 0;
+ do {
+ if (cfg_tcp)
+ num_sends += send_tcp(fd, buf[i]);
+ else if (cfg_segment)
+ num_sends += send_udp_segment(fd, buf[i]);
+ else if (cfg_sendmmsg)
+ num_sends += send_udp_sendmmsg(fd, buf[i]);
+ else
+ num_sends += send_udp(fd, buf[i]);
+ num_msgs++;
+
+ if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
+ flush_zerocopy(fd);
+
+ tnow = gettimeofday_ms();
+ if (tnow > treport) {
+ fprintf(stderr,
+ "%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
+ cfg_tcp ? "tcp" : "udp",
+ (num_msgs * cfg_payload_len) >> 20,
+ num_sends, num_msgs);
+ num_msgs = num_sends = 0;
+ treport = tnow + 1000;
+ }
+
+ /* cold cache when writing buffer */
+ if (cfg_cache_trash)
+ i = ++i < NUM_PKT ? i : 0;
+
+ } while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
new file mode 100644
index 000000000..a37cb1192
--- /dev/null
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for netfilter selftests
+
+TEST_PROGS := nft_trans_stress.sh nft_nat.sh conntrack_icmp_related.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
new file mode 100644
index 000000000..59caa8f71
--- /dev/null
+++ b/tools/testing/selftests/netfilter/config
@@ -0,0 +1,2 @@
+CONFIG_NET_NS=y
+CONFIG_NF_TABLES_INET=y
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
new file mode 100755
index 000000000..b48e1833b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+#
+# check that ICMP df-needed/pkttoobig icmp are set are set as related
+# state
+#
+# Setup is:
+#
+# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2
+# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280).
+# ping nsclient2 from nsclient1, checking that conntrack did set RELATED
+# 'fragmentation needed' icmp packet.
+#
+# In addition, nsrouter1 will perform IP masquerading, i.e. also
+# check the icmp errors are propagated to the correct host as per
+# nat of "established" icmp-echo "connection".
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup() {
+ for i in 1 2;do ip netns del nsclient$i;done
+ for i in 1 2;do ip netns del nsrouter$i;done
+}
+
+ipv4() {
+ echo -n 192.168.$1.2
+}
+
+ipv6 () {
+ echo -n dead:$1::2
+}
+
+check_counter()
+{
+ ns=$1
+ name=$2
+ expect=$3
+ local lret=0
+
+ cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2
+ ip netns exec $ns nft list counter inet filter "$name" 1>&2
+ lret=1
+ fi
+
+ return $lret
+}
+
+check_unknown()
+{
+ expect="packets 0 bytes 0"
+ for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+ check_counter $n "unknown" "$expect"
+ if [ $? -ne 0 ] ;then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
+ ip netns add $n
+ ip -net $n link set lo up
+done
+
+DEV=veth0
+ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1
+DEV=veth0
+ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2
+
+DEV=veth0
+ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2
+
+DEV=veth0
+for i in 1 2; do
+ ip -net nsclient$i link set $DEV up
+ ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV
+ ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV
+done
+
+ip -net nsrouter1 link set eth1 up
+ip -net nsrouter1 link set veth0 up
+
+ip -net nsrouter2 link set eth1 up
+ip -net nsrouter2 link set eth2 up
+
+ip -net nsclient1 route add default via 192.168.1.1
+ip -net nsclient1 -6 route add default via dead:1::1
+
+ip -net nsclient2 route add default via 192.168.2.1
+ip -net nsclient2 route add default via dead:2::1
+
+i=3
+ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1
+ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0
+ip -net nsrouter1 addr add dead:1::1/64 dev eth1
+ip -net nsrouter1 addr add dead:3::1/64 dev veth0
+ip -net nsrouter1 route add default via 192.168.3.10
+ip -net nsrouter1 -6 route add default via dead:3::10
+
+ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1
+ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2
+ip -net nsrouter2 addr add dead:2::1/64 dev eth1
+ip -net nsrouter2 addr add dead:3::10/64 dev eth2
+ip -net nsrouter2 route add default via 192.168.3.1
+ip -net nsrouter2 route add default via dead:3::1
+
+sleep 2
+for i in 4 6; do
+ ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1
+ ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1
+done
+
+for netns in nsrouter1 nsrouter2; do
+ip netns exec $netns nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter related { }
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept
+ meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept
+ meta l4proto { icmp, icmpv6 } ct state new,established accept
+ counter name "unknown" drop
+ }
+}
+EOF
+done
+
+ip netns exec nsclient1 nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter related { }
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+ counter name "unknown" drop
+ }
+}
+EOF
+
+ip netns exec nsclient2 nft -f - <<EOF
+table inet filter {
+ counter unknown { }
+ counter new { }
+ counter established { }
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept
+ meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept
+ counter name "unknown" drop
+ }
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+
+ meta l4proto { icmp, icmpv6 } ct state "new" counter name "new"
+ meta l4proto { icmp, icmpv6 } ct state "established" counter name "established"
+ counter name "unknown" drop
+ }
+}
+EOF
+
+
+# make sure NAT core rewrites adress of icmp error if nat is used according to
+# conntrack nat information (icmp error will be directed at nsrouter1 address,
+# but it needs to be routed to nsclient1 address).
+ip netns exec nsrouter1 nft -f - <<EOF
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ip protocol icmp oifname "veth0" counter masquerade
+ }
+}
+table ip6 nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ip6 nexthdr icmpv6 oifname "veth0" counter masquerade
+ }
+}
+EOF
+
+ip netns exec nsrouter2 ip link set eth1 mtu 1280
+ip netns exec nsclient2 ip link set veth0 mtu 1280
+sleep 1
+
+ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null
+if [ $? -ne 0 ]; then
+ echo "ERROR: netns ip routing/connectivity broken" 1>&2
+ cleanup
+ exit 1
+fi
+ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null
+if [ $? -ne 0 ]; then
+ echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2
+ cleanup
+ exit 1
+fi
+
+check_unknown
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+expect="packets 0 bytes 0"
+for netns in nsrouter1 nsrouter2 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+expect="packets 2 bytes 2076"
+check_counter nsclient2 "new" "$expect"
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null
+if [ $? -eq 0 ]; then
+ echo "ERROR: ping should have failed with PMTU too big error" 1>&2
+ ret=1
+fi
+
+# nsrouter2 should have generated the icmp error, so
+# related counter should be 0 (its in forward).
+expect="packets 0 bytes 0"
+check_counter "nsrouter2" "related" "$expect"
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+# but nsrouter1 should have seen it, same for nsclient1.
+expect="packets 1 bytes 576"
+for netns in nsrouter1 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null
+if [ $? -eq 0 ]; then
+ echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2
+ ret=1
+fi
+
+expect="packets 2 bytes 1856"
+for netns in nsrouter1 nsclient1;do
+ check_counter "$netns" "related" "$expect"
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+done
+
+if [ $ret -eq 0 ];then
+ echo "PASS: icmp mtu error had RELATED state"
+else
+ echo "ERROR: icmp error RELATED state test has failed"
+fi
+
+cleanup
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
new file mode 100755
index 000000000..f25f72a75
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -0,0 +1,766 @@
+#!/bin/bash
+#
+# This test is for basic NAT functionality: snat, dnat, redirect, masquerade.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add ns0
+ip netns add ns1
+ip netns add ns2
+
+ip link add veth0 netns ns0 type veth peer name eth0 netns ns1 > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns ns0 type veth peer name eth0 netns ns2
+
+ip -net ns0 link set lo up
+ip -net ns0 link set veth0 up
+ip -net ns0 addr add 10.0.1.1/24 dev veth0
+ip -net ns0 addr add dead:1::1/64 dev veth0
+
+ip -net ns0 link set veth1 up
+ip -net ns0 addr add 10.0.2.1/24 dev veth1
+ip -net ns0 addr add dead:2::1/64 dev veth1
+
+for i in 1 2; do
+ ip -net ns$i link set lo up
+ ip -net ns$i link set eth0 up
+ ip -net ns$i addr add 10.0.$i.99/24 dev eth0
+ ip -net ns$i route add default via 10.0.$i.1
+ ip -net ns$i addr add dead:$i::99/64 dev eth0
+ ip -net ns$i route add default via dead:$i::1
+done
+
+bad_counter()
+{
+ local ns=$1
+ local counter=$2
+ local expect=$3
+
+ echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2
+ ip netns exec $ns nft list counter inet filter $counter 1>&2
+}
+
+check_counters()
+{
+ ns=$1
+ local lret=0
+
+ cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84")
+ if [ $? -ne 0 ]; then
+ bad_counter $ns ns0in "packets 1 bytes 84"
+ lret=1
+ fi
+ cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84")
+ if [ $? -ne 0 ]; then
+ bad_counter $ns ns0out "packets 1 bytes 84"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 104"
+ cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter $ns ns0in6 "$expect"
+ lret=1
+ fi
+ cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter $ns ns0out6 "$expect"
+ lret=1
+ fi
+
+ return $lret
+}
+
+check_ns0_counters()
+{
+ local ns=$1
+ local lret=0
+
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns0in "packets 0 bytes 0"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns0in6 "packets 0 bytes 0"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns0out "packets 0 bytes 0"
+ lret=1
+ fi
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns0out6 "packets 0 bytes 0"
+ lret=1
+ fi
+
+ for dir in "in" "out" ; do
+ expect="packets 1 bytes 84"
+ cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 $ns$dir "$expect"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 104"
+ cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 $ns$dir6 "$expect"
+ lret=1
+ fi
+ done
+
+ return $lret
+}
+
+reset_counters()
+{
+ for i in 0 1 2;do
+ ip netns exec ns$i nft reset counters inet > /dev/null
+ done
+}
+
+test_local_dnat6()
+{
+ local lret=0
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+ chain output {
+ type nat hook output priority 0; policy accept;
+ ip6 daddr dead:1::99 dnat to dead:2::99
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add ip6 dnat hook"
+ return $ksft_skip
+ fi
+
+ # ping netns1, expect rewrite to netns2
+ ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null
+ if [ $? -ne 0 ]; then
+ lret=1
+ echo "ERROR: ping6 failed"
+ return $lret
+ fi
+
+ expect="packets 0 bytes 0"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns2$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # expect 0 count in ns1
+ expect="packets 0 bytes 0"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # expect 1 packet in ns2
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2"
+ ip netns exec ns0 nft flush chain ip6 nat output
+
+ return $lret
+}
+
+test_local_dnat()
+{
+ local lret=0
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+ chain output {
+ type nat hook output priority 0; policy accept;
+ ip daddr 10.0.1.99 dnat to 10.0.2.99
+ }
+}
+EOF
+ # ping netns1, expect rewrite to netns2
+ ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+ if [ $? -ne 0 ]; then
+ lret=1
+ echo "ERROR: ping failed"
+ return $lret
+ fi
+
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns2$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # expect 0 count in ns1
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # expect 1 packet in ns2
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2"
+
+ ip netns exec ns0 nft flush chain ip nat output
+
+ reset_counters
+ ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+ if [ $? -ne 0 ]; then
+ lret=1
+ echo "ERROR: ping failed"
+ return $lret
+ fi
+
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns2$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # expect 1 count in ns1
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns0 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # expect 0 packet in ns2
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns2$dir "$expect"
+ lret=1
+ fi
+ done
+
+ test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush"
+
+ return $lret
+}
+
+
+test_masquerade6()
+{
+ local lret=0
+
+ ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 via ipv6"
+ return 1
+ lret=1
+ fi
+
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns2$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+# add masquerading rule
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oif veth0 masquerade
+ }
+}
+EOF
+ ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading"
+ lret=1
+ fi
+
+ # ns1 should have seen packets from ns0, due to masquerade
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # ns1 should not have seen packets from ns2, due to masquerade
+ expect="packets 0 bytes 0"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ ip netns exec ns0 nft flush chain ip6 nat postrouting
+ if [ $? -ne 0 ]; then
+ echo "ERROR: Could not flush ip6 nat postrouting" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2"
+
+ return $lret
+}
+
+test_masquerade()
+{
+ local lret=0
+
+ ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: canot ping ns1 from ns2"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns2$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+# add masquerading rule
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oif veth0 masquerade
+ }
+}
+EOF
+ ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading"
+ lret=1
+ fi
+
+ # ns1 should have seen packets from ns0, due to masquerade
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # ns1 should not have seen packets from ns2, due to masquerade
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ ip netns exec ns0 nft flush chain ip nat postrouting
+ if [ $? -ne 0 ]; then
+ echo "ERROR: Could not flush nat postrouting" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: IP masquerade for ns2"
+
+ return $lret
+}
+
+test_redirect6()
+{
+ local lret=0
+
+ ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannnot ping ns1 from ns2 via ipv6"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns2$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+# add redirect rule
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect
+ }
+}
+EOF
+ ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect"
+ lret=1
+ fi
+
+ # ns1 should have seen no packets from ns2, due to redirection
+ expect="packets 0 bytes 0"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # ns0 should have seen packets from ns2, due to masquerade
+ expect="packets 1 bytes 104"
+ for dir in "in6" "out6" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ ip netns exec ns0 nft delete table ip6 nat
+ if [ $? -ne 0 ]; then
+ echo "ERROR: Could not delete ip6 nat table" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2"
+
+ return $lret
+}
+
+test_redirect()
+{
+ local lret=0
+
+ ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2"
+ lret=1
+ fi
+
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns2$dir "$expect"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns2 ns1$dir "$expect"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+# add redirect rule
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect
+ }
+}
+EOF
+ ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping ns1 from ns2 with active ip redirect"
+ lret=1
+ fi
+
+ # ns1 should have seen no packets from ns2, due to redirection
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+
+ cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ # ns0 should have seen packets from ns2, due to masquerade
+ expect="packets 1 bytes 84"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter ns1 ns0$dir "$expect"
+ lret=1
+ fi
+ done
+
+ ip netns exec ns0 nft delete table ip nat
+ if [ $? -ne 0 ]; then
+ echo "ERROR: Could not delete nat table" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: IP redirection for ns2"
+
+ return $lret
+}
+
+
+# ip netns exec ns0 ping -c 1 -q 10.0.$i.99
+for i in 0 1 2; do
+ip netns exec ns$i nft -f - <<EOF
+table inet filter {
+ counter ns0in {}
+ counter ns1in {}
+ counter ns2in {}
+
+ counter ns0out {}
+ counter ns1out {}
+ counter ns2out {}
+
+ counter ns0in6 {}
+ counter ns1in6 {}
+ counter ns2in6 {}
+
+ counter ns0out6 {}
+ counter ns1out6 {}
+ counter ns2out6 {}
+
+ map nsincounter {
+ type ipv4_addr : counter
+ elements = { 10.0.1.1 : "ns0in",
+ 10.0.2.1 : "ns0in",
+ 10.0.1.99 : "ns1in",
+ 10.0.2.99 : "ns2in" }
+ }
+
+ map nsincounter6 {
+ type ipv6_addr : counter
+ elements = { dead:1::1 : "ns0in6",
+ dead:2::1 : "ns0in6",
+ dead:1::99 : "ns1in6",
+ dead:2::99 : "ns2in6" }
+ }
+
+ map nsoutcounter {
+ type ipv4_addr : counter
+ elements = { 10.0.1.1 : "ns0out",
+ 10.0.2.1 : "ns0out",
+ 10.0.1.99: "ns1out",
+ 10.0.2.99: "ns2out" }
+ }
+
+ map nsoutcounter6 {
+ type ipv6_addr : counter
+ elements = { dead:1::1 : "ns0out6",
+ dead:2::1 : "ns0out6",
+ dead:1::99 : "ns1out6",
+ dead:2::99 : "ns2out6" }
+ }
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+ counter name ip saddr map @nsincounter
+ icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6
+ }
+ chain output {
+ type filter hook output priority 0; policy accept;
+ counter name ip daddr map @nsoutcounter
+ icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6
+ }
+}
+EOF
+done
+
+sleep 3
+# test basic connectivity
+for i in 1 2; do
+ ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null
+ if [ $? -ne 0 ];then
+ echo "ERROR: Could not reach other namespace(s)" 1>&2
+ ret=1
+ fi
+
+ ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null
+ if [ $? -ne 0 ];then
+ echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2
+ ret=1
+ fi
+ check_counters ns$i
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+
+ check_ns0_counters ns$i
+ if [ $? -ne 0 ]; then
+ ret=1
+ fi
+ reset_counters
+done
+
+if [ $ret -eq 0 ];then
+ echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2"
+fi
+
+reset_counters
+test_local_dnat
+test_local_dnat6
+
+reset_counters
+test_masquerade
+test_masquerade6
+
+reset_counters
+test_redirect
+test_redirect6
+
+for i in 0 1 2; do ip netns del ns$i;done
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh
new file mode 100755
index 000000000..f1affd12c
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+#
+# This test is for stress-testing the nf_tables config plane path vs.
+# packet path processing: Make sure we never release rules that are
+# still visible to other cpus.
+#
+# set -e
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+testns=testns1
+tables="foo bar baz quux"
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+tmp=$(mktemp)
+
+for table in $tables; do
+ echo add table inet "$table" >> "$tmp"
+ echo flush table inet "$table" >> "$tmp"
+
+ echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp"
+ echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp"
+ for c in $(seq 1 400); do
+ chain=$(printf "chain%03u" "$c")
+ echo "add chain inet $table $chain" >> "$tmp"
+ done
+
+ for c in $(seq 1 400); do
+ chain=$(printf "chain%03u" "$c")
+ for BASE in INPUT OUTPUT; do
+ echo "add rule inet $table $BASE counter jump $chain" >> "$tmp"
+ done
+ echo "add rule inet $table $chain counter return" >> "$tmp"
+ done
+done
+
+ip netns add "$testns"
+ip -netns "$testns" link set lo up
+
+lscpu | grep ^CPU\(s\): | ( read cpu cpunum ;
+cpunum=$((cpunum-1))
+for i in $(seq 0 $cpunum);do
+ mask=$(printf 0x%x $((1<<$i)))
+ ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null &
+ ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null &
+done)
+
+sleep 1
+
+for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done
+
+for table in $tables;do
+ randsleep=$((RANDOM%10))
+ sleep $randsleep
+ ip netns exec "$testns" nft delete table inet $table 2>/dev/null
+done
+
+randsleep=$((RANDOM%10))
+sleep $randsleep
+
+pkill -9 ping
+
+wait
+
+rm -f "$tmp"
+ip netns del "$testns"
diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore
new file mode 100644
index 000000000..d9355035e
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/.gitignore
@@ -0,0 +1,4 @@
+timestamping
+rxtimestamp
+txtimestamp
+hwtstamp_config
diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile
new file mode 100644
index 000000000..c46c0eefa
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -I../../../../../usr/include
+
+TEST_PROGS := hwtstamp_config rxtimestamp timestamping txtimestamp
+
+all: $(TEST_PROGS)
+
+top_srcdir = ../../../../..
+KSFT_KHDR_INSTALL := 1
+include ../../lib.mk
+
+clean:
+ rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c b/tools/testing/selftests/networking/timestamping/hwtstamp_config.c
new file mode 100644
index 000000000..e1fdee841
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/hwtstamp_config.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test program for SIOC{G,S}HWTSTAMP
+ * Copyright 2013 Solarflare Communications
+ * Author: Ben Hutchings
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+
+#include <linux/if.h>
+#include <linux/net_tstamp.h>
+#include <linux/sockios.h>
+
+static int
+lookup_value(const char **names, int size, const char *name)
+{
+ int value;
+
+ for (value = 0; value < size; value++)
+ if (names[value] && strcasecmp(names[value], name) == 0)
+ return value;
+
+ return -1;
+}
+
+static const char *
+lookup_name(const char **names, int size, int value)
+{
+ return (value >= 0 && value < size) ? names[value] : NULL;
+}
+
+static void list_names(FILE *f, const char **names, int size)
+{
+ int value;
+
+ for (value = 0; value < size; value++)
+ if (names[value])
+ fprintf(f, " %s\n", names[value]);
+}
+
+static const char *tx_types[] = {
+#define TX_TYPE(name) [HWTSTAMP_TX_ ## name] = #name
+ TX_TYPE(OFF),
+ TX_TYPE(ON),
+ TX_TYPE(ONESTEP_SYNC)
+#undef TX_TYPE
+};
+#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0])))
+
+static const char *rx_filters[] = {
+#define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name
+ RX_FILTER(NONE),
+ RX_FILTER(ALL),
+ RX_FILTER(SOME),
+ RX_FILTER(PTP_V1_L4_EVENT),
+ RX_FILTER(PTP_V1_L4_SYNC),
+ RX_FILTER(PTP_V1_L4_DELAY_REQ),
+ RX_FILTER(PTP_V2_L4_EVENT),
+ RX_FILTER(PTP_V2_L4_SYNC),
+ RX_FILTER(PTP_V2_L4_DELAY_REQ),
+ RX_FILTER(PTP_V2_L2_EVENT),
+ RX_FILTER(PTP_V2_L2_SYNC),
+ RX_FILTER(PTP_V2_L2_DELAY_REQ),
+ RX_FILTER(PTP_V2_EVENT),
+ RX_FILTER(PTP_V2_SYNC),
+ RX_FILTER(PTP_V2_DELAY_REQ),
+#undef RX_FILTER
+};
+#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0])))
+
+static void usage(void)
+{
+ fputs("Usage: hwtstamp_config if_name [tx_type rx_filter]\n"
+ "tx_type is any of (case-insensitive):\n",
+ stderr);
+ list_names(stderr, tx_types, N_TX_TYPES);
+ fputs("rx_filter is any of (case-insensitive):\n", stderr);
+ list_names(stderr, rx_filters, N_RX_FILTERS);
+}
+
+int main(int argc, char **argv)
+{
+ struct ifreq ifr;
+ struct hwtstamp_config config;
+ const char *name;
+ int sock;
+
+ if ((argc != 2 && argc != 4) || (strlen(argv[1]) >= IFNAMSIZ)) {
+ usage();
+ return 2;
+ }
+
+ if (argc == 4) {
+ config.flags = 0;
+ config.tx_type = lookup_value(tx_types, N_TX_TYPES, argv[2]);
+ config.rx_filter = lookup_value(rx_filters, N_RX_FILTERS, argv[3]);
+ if (config.tx_type < 0 || config.rx_filter < 0) {
+ usage();
+ return 2;
+ }
+ }
+
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0) {
+ perror("socket");
+ return 1;
+ }
+
+ strcpy(ifr.ifr_name, argv[1]);
+ ifr.ifr_data = (caddr_t)&config;
+
+ if (ioctl(sock, (argc == 2) ? SIOCGHWTSTAMP : SIOCSHWTSTAMP, &ifr)) {
+ perror("ioctl");
+ return 1;
+ }
+
+ printf("flags = %#x\n", config.flags);
+ name = lookup_name(tx_types, N_TX_TYPES, config.tx_type);
+ if (name)
+ printf("tx_type = %s\n", name);
+ else
+ printf("tx_type = %d\n", config.tx_type);
+ name = lookup_name(rx_filters, N_RX_FILTERS, config.rx_filter);
+ if (name)
+ printf("rx_filter = %s\n", name);
+ else
+ printf("rx_filter = %d\n", config.rx_filter);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/networking/timestamping/rxtimestamp.c b/tools/testing/selftests/networking/timestamping/rxtimestamp.c
new file mode 100644
index 000000000..c6428f1ac
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/rxtimestamp.c
@@ -0,0 +1,389 @@
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <asm/types.h>
+#include <linux/net_tstamp.h>
+#include <linux/errqueue.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+struct options {
+ int so_timestamp;
+ int so_timestampns;
+ int so_timestamping;
+};
+
+struct tstamps {
+ bool tstamp;
+ bool tstampns;
+ bool swtstamp;
+ bool hwtstamp;
+};
+
+struct socket_type {
+ char *friendly_name;
+ int type;
+ int protocol;
+ bool enabled;
+};
+
+struct test_case {
+ struct options sockopt;
+ struct tstamps expected;
+ bool enabled;
+};
+
+struct sof_flag {
+ int mask;
+ char *name;
+};
+
+static struct sof_flag sof_flags[] = {
+#define SOF_FLAG(f) { f, #f }
+ SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE),
+ SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE),
+ SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE),
+};
+
+static struct socket_type socket_types[] = {
+ { "ip", SOCK_RAW, IPPROTO_EGP },
+ { "udp", SOCK_DGRAM, IPPROTO_UDP },
+ { "tcp", SOCK_STREAM, IPPROTO_TCP },
+};
+
+static struct test_case test_cases[] = {
+ { {}, {} },
+ {
+ { so_timestamp: 1 },
+ { tstamp: true }
+ },
+ {
+ { so_timestampns: 1 },
+ { tstampns: true }
+ },
+ {
+ { so_timestamp: 1, so_timestampns: 1 },
+ { tstampns: true }
+ },
+ {
+ { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE },
+ {}
+ },
+ {
+ /* Loopback device does not support hw timestamps. */
+ { so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE },
+ {}
+ },
+ {
+ { so_timestamping: SOF_TIMESTAMPING_SOFTWARE },
+ {}
+ },
+ {
+ { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE
+ | SOF_TIMESTAMPING_RX_HARDWARE },
+ {}
+ },
+ {
+ { so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+ | SOF_TIMESTAMPING_RX_SOFTWARE },
+ { swtstamp: true }
+ },
+ {
+ { so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+ | SOF_TIMESTAMPING_RX_SOFTWARE },
+ { tstamp: true, swtstamp: true }
+ },
+};
+
+static struct option long_options[] = {
+ { "list_tests", no_argument, 0, 'l' },
+ { "test_num", required_argument, 0, 'n' },
+ { "op_size", required_argument, 0, 's' },
+ { "tcp", no_argument, 0, 't' },
+ { "udp", no_argument, 0, 'u' },
+ { "ip", no_argument, 0, 'i' },
+ { NULL, 0, NULL, 0 },
+};
+
+static int next_port = 19999;
+static int op_size = 10 * 1024;
+
+void print_test_case(struct test_case *t)
+{
+ int f = 0;
+
+ printf("sockopts {");
+ if (t->sockopt.so_timestamp)
+ printf(" SO_TIMESTAMP ");
+ if (t->sockopt.so_timestampns)
+ printf(" SO_TIMESTAMPNS ");
+ if (t->sockopt.so_timestamping) {
+ printf(" SO_TIMESTAMPING: {");
+ for (f = 0; f < ARRAY_SIZE(sof_flags); f++)
+ if (t->sockopt.so_timestamping & sof_flags[f].mask)
+ printf(" %s |", sof_flags[f].name);
+ printf("}");
+ }
+ printf("} expected cmsgs: {");
+ if (t->expected.tstamp)
+ printf(" SCM_TIMESTAMP ");
+ if (t->expected.tstampns)
+ printf(" SCM_TIMESTAMPNS ");
+ if (t->expected.swtstamp || t->expected.hwtstamp) {
+ printf(" SCM_TIMESTAMPING {");
+ if (t->expected.swtstamp)
+ printf("0");
+ if (t->expected.swtstamp && t->expected.hwtstamp)
+ printf(",");
+ if (t->expected.hwtstamp)
+ printf("2");
+ printf("}");
+ }
+ printf("}\n");
+}
+
+void do_send(int src)
+{
+ int r;
+ char *buf = malloc(op_size);
+
+ memset(buf, 'z', op_size);
+ r = write(src, buf, op_size);
+ if (r < 0)
+ error(1, errno, "Failed to sendmsg");
+
+ free(buf);
+}
+
+bool do_recv(int rcv, int read_size, struct tstamps expected)
+{
+ const int CMSG_SIZE = 1024;
+
+ struct scm_timestamping *ts;
+ struct tstamps actual = {};
+ char cmsg_buf[CMSG_SIZE];
+ struct iovec recv_iov;
+ struct cmsghdr *cmsg;
+ bool failed = false;
+ struct msghdr hdr;
+ int flags = 0;
+ int r;
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_iov = &recv_iov;
+ hdr.msg_iovlen = 1;
+ recv_iov.iov_base = malloc(read_size);
+ recv_iov.iov_len = read_size;
+
+ hdr.msg_control = cmsg_buf;
+ hdr.msg_controllen = sizeof(cmsg_buf);
+
+ r = recvmsg(rcv, &hdr, flags);
+ if (r < 0)
+ error(1, errno, "Failed to recvmsg");
+ if (r != read_size)
+ error(1, 0, "Only received %d bytes of payload.", r);
+
+ if (hdr.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
+ error(1, 0, "Message was truncated.");
+
+ for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ error(1, 0, "Unexpected cmsg_level %d",
+ cmsg->cmsg_level);
+ switch (cmsg->cmsg_type) {
+ case SCM_TIMESTAMP:
+ actual.tstamp = true;
+ break;
+ case SCM_TIMESTAMPNS:
+ actual.tstampns = true;
+ break;
+ case SCM_TIMESTAMPING:
+ ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
+ actual.swtstamp = !!ts->ts[0].tv_sec;
+ if (ts->ts[1].tv_sec != 0)
+ error(0, 0, "ts[1] should not be set.");
+ actual.hwtstamp = !!ts->ts[2].tv_sec;
+ break;
+ default:
+ error(1, 0, "Unexpected cmsg_type %d", cmsg->cmsg_type);
+ }
+ }
+
+#define VALIDATE(field) \
+ do { \
+ if (expected.field != actual.field) { \
+ if (expected.field) \
+ error(0, 0, "Expected " #field " to be set."); \
+ else \
+ error(0, 0, \
+ "Expected " #field " to not be set."); \
+ failed = true; \
+ } \
+ } while (0)
+
+ VALIDATE(tstamp);
+ VALIDATE(tstampns);
+ VALIDATE(swtstamp);
+ VALIDATE(hwtstamp);
+#undef VALIDATE
+
+ free(recv_iov.iov_base);
+
+ return failed;
+}
+
+void config_so_flags(int rcv, struct options o)
+{
+ int on = 1;
+
+ if (setsockopt(rcv, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0)
+ error(1, errno, "Failed to enable SO_REUSEADDR");
+
+ if (o.so_timestamp &&
+ setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMP,
+ &o.so_timestamp, sizeof(o.so_timestamp)) < 0)
+ error(1, errno, "Failed to enable SO_TIMESTAMP");
+
+ if (o.so_timestampns &&
+ setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPNS,
+ &o.so_timestampns, sizeof(o.so_timestampns)) < 0)
+ error(1, errno, "Failed to enable SO_TIMESTAMPNS");
+
+ if (o.so_timestamping &&
+ setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPING,
+ &o.so_timestamping, sizeof(o.so_timestamping)) < 0)
+ error(1, errno, "Failed to set SO_TIMESTAMPING");
+}
+
+bool run_test_case(struct socket_type s, struct test_case t)
+{
+ int port = (s.type == SOCK_RAW) ? 0 : next_port++;
+ int read_size = op_size;
+ struct sockaddr_in addr;
+ bool failed = false;
+ int src, dst, rcv;
+
+ src = socket(AF_INET, s.type, s.protocol);
+ if (src < 0)
+ error(1, errno, "Failed to open src socket");
+
+ dst = socket(AF_INET, s.type, s.protocol);
+ if (dst < 0)
+ error(1, errno, "Failed to open dst socket");
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr.sin_port = htons(port);
+
+ if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ error(1, errno, "Failed to bind to port %d", port);
+
+ if (s.type == SOCK_STREAM && (listen(dst, 1) < 0))
+ error(1, errno, "Failed to listen");
+
+ if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ error(1, errno, "Failed to connect");
+
+ if (s.type == SOCK_STREAM) {
+ rcv = accept(dst, NULL, NULL);
+ if (rcv < 0)
+ error(1, errno, "Failed to accept");
+ close(dst);
+ } else {
+ rcv = dst;
+ }
+
+ config_so_flags(rcv, t.sockopt);
+ usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */
+ do_send(src);
+
+ if (s.type == SOCK_RAW)
+ read_size += 20; /* for IP header */
+ failed = do_recv(rcv, read_size, t.expected);
+
+ close(rcv);
+ close(src);
+
+ return failed;
+}
+
+int main(int argc, char **argv)
+{
+ bool all_protocols = true;
+ bool all_tests = true;
+ int arg_index = 0;
+ int failures = 0;
+ int s, t, opt;
+
+ while ((opt = getopt_long(argc, argv, "", long_options,
+ &arg_index)) != -1) {
+ switch (opt) {
+ case 'l':
+ for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
+ printf("%d\t", t);
+ print_test_case(&test_cases[t]);
+ }
+ return 0;
+ case 'n':
+ t = atoi(optarg);
+ if (t >= ARRAY_SIZE(test_cases))
+ error(1, 0, "Invalid test case: %d", t);
+ all_tests = false;
+ test_cases[t].enabled = true;
+ break;
+ case 's':
+ op_size = atoi(optarg);
+ break;
+ case 't':
+ all_protocols = false;
+ socket_types[2].enabled = true;
+ break;
+ case 'u':
+ all_protocols = false;
+ socket_types[1].enabled = true;
+ break;
+ case 'i':
+ all_protocols = false;
+ socket_types[0].enabled = true;
+ break;
+ default:
+ error(1, 0, "Failed to parse parameters.");
+ }
+ }
+
+ for (s = 0; s < ARRAY_SIZE(socket_types); s++) {
+ if (!all_protocols && !socket_types[s].enabled)
+ continue;
+
+ printf("Testing %s...\n", socket_types[s].friendly_name);
+ for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
+ if (!all_tests && !test_cases[t].enabled)
+ continue;
+
+ printf("Starting testcase %d...\n", t);
+ if (run_test_case(socket_types[s], test_cases[t])) {
+ failures++;
+ printf("FAILURE in test case ");
+ print_test_case(&test_cases[t]);
+ }
+ }
+ }
+ if (!failures)
+ printf("PASSED.\n");
+ return failures;
+}
diff --git a/tools/testing/selftests/networking/timestamping/timestamping.c b/tools/testing/selftests/networking/timestamping/timestamping.c
new file mode 100644
index 000000000..900ed4b47
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/timestamping.c
@@ -0,0 +1,534 @@
+/*
+ * This program demonstrates how the various time stamping features in
+ * the Linux kernel work. It emulates the behavior of a PTP
+ * implementation in stand-alone master mode by sending PTPv1 Sync
+ * multicasts once every second. It looks for similar packets, but
+ * beyond that doesn't actually implement PTP.
+ *
+ * Outgoing packets are time stamped with SO_TIMESTAMPING with or
+ * without hardware support.
+ *
+ * Incoming packets are time stamped with SO_TIMESTAMPING with or
+ * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and
+ * SO_TIMESTAMP[NS].
+ *
+ * Copyright (C) 2009 Intel Corporation.
+ * Author: Patrick Ohly <patrick.ohly@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/ioctl.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <asm/types.h>
+#include <linux/net_tstamp.h>
+#include <linux/errqueue.h>
+
+#ifndef SO_TIMESTAMPING
+# define SO_TIMESTAMPING 37
+# define SCM_TIMESTAMPING SO_TIMESTAMPING
+#endif
+
+#ifndef SO_TIMESTAMPNS
+# define SO_TIMESTAMPNS 35
+#endif
+
+#ifndef SIOCGSTAMPNS
+# define SIOCGSTAMPNS 0x8907
+#endif
+
+#ifndef SIOCSHWTSTAMP
+# define SIOCSHWTSTAMP 0x89b0
+#endif
+
+static void usage(const char *error)
+{
+ if (error)
+ printf("invalid option: %s\n", error);
+ printf("timestamping interface option*\n\n"
+ "Options:\n"
+ " IP_MULTICAST_LOOP - looping outgoing multicasts\n"
+ " SO_TIMESTAMP - normal software time stamping, ms resolution\n"
+ " SO_TIMESTAMPNS - more accurate software time stamping\n"
+ " SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n"
+ " SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n"
+ " SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n"
+ " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n"
+ " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
+ " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
+ " SIOCGSTAMP - check last socket time stamp\n"
+ " SIOCGSTAMPNS - more accurate socket time stamp\n");
+ exit(1);
+}
+
+static void bail(const char *error)
+{
+ printf("%s: %s\n", error, strerror(errno));
+ exit(1);
+}
+
+static const unsigned char sync[] = {
+ 0x00, 0x01, 0x00, 0x01,
+ 0x5f, 0x44, 0x46, 0x4c,
+ 0x54, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x01,
+
+ /* fake uuid */
+ 0x00, 0x01,
+ 0x02, 0x03, 0x04, 0x05,
+
+ 0x00, 0x01, 0x00, 0x37,
+ 0x00, 0x00, 0x00, 0x08,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x49, 0x05, 0xcd, 0x01,
+ 0x29, 0xb1, 0x8d, 0xb0,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x01,
+
+ /* fake uuid */
+ 0x00, 0x01,
+ 0x02, 0x03, 0x04, 0x05,
+
+ 0x00, 0x00, 0x00, 0x37,
+ 0x00, 0x00, 0x00, 0x04,
+ 0x44, 0x46, 0x4c, 0x54,
+ 0x00, 0x00, 0xf0, 0x60,
+ 0x00, 0x01, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x01,
+ 0x00, 0x00, 0xf0, 0x60,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x04,
+ 0x44, 0x46, 0x4c, 0x54,
+ 0x00, 0x01,
+
+ /* fake uuid */
+ 0x00, 0x01,
+ 0x02, 0x03, 0x04, 0x05,
+
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00
+};
+
+static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
+{
+ struct timeval now;
+ int res;
+
+ res = sendto(sock, sync, sizeof(sync), 0,
+ addr, addr_len);
+ gettimeofday(&now, 0);
+ if (res < 0)
+ printf("%s: %s\n", "send", strerror(errno));
+ else
+ printf("%ld.%06ld: sent %d bytes\n",
+ (long)now.tv_sec, (long)now.tv_usec,
+ res);
+}
+
+static void printpacket(struct msghdr *msg, int res,
+ char *data,
+ int sock, int recvmsg_flags,
+ int siocgstamp, int siocgstampns)
+{
+ struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name;
+ struct cmsghdr *cmsg;
+ struct timeval tv;
+ struct timespec ts;
+ struct timeval now;
+
+ gettimeofday(&now, 0);
+
+ printf("%ld.%06ld: received %s data, %d bytes from %s, %zu bytes control messages\n",
+ (long)now.tv_sec, (long)now.tv_usec,
+ (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
+ res,
+ inet_ntoa(from_addr->sin_addr),
+ msg->msg_controllen);
+ for (cmsg = CMSG_FIRSTHDR(msg);
+ cmsg;
+ cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ printf(" cmsg len %zu: ", cmsg->cmsg_len);
+ switch (cmsg->cmsg_level) {
+ case SOL_SOCKET:
+ printf("SOL_SOCKET ");
+ switch (cmsg->cmsg_type) {
+ case SO_TIMESTAMP: {
+ struct timeval *stamp =
+ (struct timeval *)CMSG_DATA(cmsg);
+ printf("SO_TIMESTAMP %ld.%06ld",
+ (long)stamp->tv_sec,
+ (long)stamp->tv_usec);
+ break;
+ }
+ case SO_TIMESTAMPNS: {
+ struct timespec *stamp =
+ (struct timespec *)CMSG_DATA(cmsg);
+ printf("SO_TIMESTAMPNS %ld.%09ld",
+ (long)stamp->tv_sec,
+ (long)stamp->tv_nsec);
+ break;
+ }
+ case SO_TIMESTAMPING: {
+ struct timespec *stamp =
+ (struct timespec *)CMSG_DATA(cmsg);
+ printf("SO_TIMESTAMPING ");
+ printf("SW %ld.%09ld ",
+ (long)stamp->tv_sec,
+ (long)stamp->tv_nsec);
+ stamp++;
+ /* skip deprecated HW transformed */
+ stamp++;
+ printf("HW raw %ld.%09ld",
+ (long)stamp->tv_sec,
+ (long)stamp->tv_nsec);
+ break;
+ }
+ default:
+ printf("type %d", cmsg->cmsg_type);
+ break;
+ }
+ break;
+ case IPPROTO_IP:
+ printf("IPPROTO_IP ");
+ switch (cmsg->cmsg_type) {
+ case IP_RECVERR: {
+ struct sock_extended_err *err =
+ (struct sock_extended_err *)CMSG_DATA(cmsg);
+ printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s",
+ strerror(err->ee_errno),
+ err->ee_origin,
+#ifdef SO_EE_ORIGIN_TIMESTAMPING
+ err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ?
+ "bounced packet" : "unexpected origin"
+#else
+ "probably SO_EE_ORIGIN_TIMESTAMPING"
+#endif
+ );
+ if (res < sizeof(sync))
+ printf(" => truncated data?!");
+ else if (!memcmp(sync, data + res - sizeof(sync),
+ sizeof(sync)))
+ printf(" => GOT OUR DATA BACK (HURRAY!)");
+ break;
+ }
+ case IP_PKTINFO: {
+ struct in_pktinfo *pktinfo =
+ (struct in_pktinfo *)CMSG_DATA(cmsg);
+ printf("IP_PKTINFO interface index %u",
+ pktinfo->ipi_ifindex);
+ break;
+ }
+ default:
+ printf("type %d", cmsg->cmsg_type);
+ break;
+ }
+ break;
+ default:
+ printf("level %d type %d",
+ cmsg->cmsg_level,
+ cmsg->cmsg_type);
+ break;
+ }
+ printf("\n");
+ }
+
+ if (siocgstamp) {
+ if (ioctl(sock, SIOCGSTAMP, &tv))
+ printf(" %s: %s\n", "SIOCGSTAMP", strerror(errno));
+ else
+ printf("SIOCGSTAMP %ld.%06ld\n",
+ (long)tv.tv_sec,
+ (long)tv.tv_usec);
+ }
+ if (siocgstampns) {
+ if (ioctl(sock, SIOCGSTAMPNS, &ts))
+ printf(" %s: %s\n", "SIOCGSTAMPNS", strerror(errno));
+ else
+ printf("SIOCGSTAMPNS %ld.%09ld\n",
+ (long)ts.tv_sec,
+ (long)ts.tv_nsec);
+ }
+}
+
+static void recvpacket(int sock, int recvmsg_flags,
+ int siocgstamp, int siocgstampns)
+{
+ char data[256];
+ struct msghdr msg;
+ struct iovec entry;
+ struct sockaddr_in from_addr;
+ struct {
+ struct cmsghdr cm;
+ char control[512];
+ } control;
+ int res;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &entry;
+ msg.msg_iovlen = 1;
+ entry.iov_base = data;
+ entry.iov_len = sizeof(data);
+ msg.msg_name = (caddr_t)&from_addr;
+ msg.msg_namelen = sizeof(from_addr);
+ msg.msg_control = &control;
+ msg.msg_controllen = sizeof(control);
+
+ res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT);
+ if (res < 0) {
+ printf("%s %s: %s\n",
+ "recvmsg",
+ (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular",
+ strerror(errno));
+ } else {
+ printpacket(&msg, res, data,
+ sock, recvmsg_flags,
+ siocgstamp, siocgstampns);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int so_timestamping_flags = 0;
+ int so_timestamp = 0;
+ int so_timestampns = 0;
+ int siocgstamp = 0;
+ int siocgstampns = 0;
+ int ip_multicast_loop = 0;
+ char *interface;
+ int i;
+ int enabled = 1;
+ int sock;
+ struct ifreq device;
+ struct ifreq hwtstamp;
+ struct hwtstamp_config hwconfig, hwconfig_requested;
+ struct sockaddr_in addr;
+ struct ip_mreq imr;
+ struct in_addr iaddr;
+ int val;
+ socklen_t len;
+ struct timeval next;
+ size_t if_len;
+
+ if (argc < 2)
+ usage(0);
+ interface = argv[1];
+ if_len = strlen(interface);
+ if (if_len >= IFNAMSIZ) {
+ printf("interface name exceeds IFNAMSIZ\n");
+ exit(1);
+ }
+
+ for (i = 2; i < argc; i++) {
+ if (!strcasecmp(argv[i], "SO_TIMESTAMP"))
+ so_timestamp = 1;
+ else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS"))
+ so_timestampns = 1;
+ else if (!strcasecmp(argv[i], "SIOCGSTAMP"))
+ siocgstamp = 1;
+ else if (!strcasecmp(argv[i], "SIOCGSTAMPNS"))
+ siocgstampns = 1;
+ else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP"))
+ ip_multicast_loop = 1;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE"))
+ so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
+ else
+ usage(argv[i]);
+ }
+
+ sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (sock < 0)
+ bail("socket");
+
+ memset(&device, 0, sizeof(device));
+ memcpy(device.ifr_name, interface, if_len + 1);
+ if (ioctl(sock, SIOCGIFADDR, &device) < 0)
+ bail("getting interface IP address");
+
+ memset(&hwtstamp, 0, sizeof(hwtstamp));
+ memcpy(hwtstamp.ifr_name, interface, if_len + 1);
+ hwtstamp.ifr_data = (void *)&hwconfig;
+ memset(&hwconfig, 0, sizeof(hwconfig));
+ hwconfig.tx_type =
+ (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
+ HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+ hwconfig.rx_filter =
+ (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+ HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
+ hwconfig_requested = hwconfig;
+ if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) {
+ if ((errno == EINVAL || errno == ENOTSUP) &&
+ hwconfig_requested.tx_type == HWTSTAMP_TX_OFF &&
+ hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE)
+ printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n");
+ else
+ bail("SIOCSHWTSTAMP");
+ }
+ printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n",
+ hwconfig_requested.tx_type, hwconfig.tx_type,
+ hwconfig_requested.rx_filter, hwconfig.rx_filter);
+
+ /* bind to PTP port */
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = htons(319 /* PTP event port */);
+ if (bind(sock,
+ (struct sockaddr *)&addr,
+ sizeof(struct sockaddr_in)) < 0)
+ bail("bind");
+
+ /* set multicast group for outgoing packets */
+ inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */
+ addr.sin_addr = iaddr;
+ imr.imr_multiaddr.s_addr = iaddr.s_addr;
+ imr.imr_interface.s_addr =
+ ((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr;
+ if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF,
+ &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0)
+ bail("set multicast");
+
+ /* join multicast group, loop our own packet */
+ if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP,
+ &imr, sizeof(struct ip_mreq)) < 0)
+ bail("join multicast group");
+
+ if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP,
+ &ip_multicast_loop, sizeof(enabled)) < 0) {
+ bail("loop multicast");
+ }
+
+ /* set socket options for time stamping */
+ if (so_timestamp &&
+ setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP,
+ &enabled, sizeof(enabled)) < 0)
+ bail("setsockopt SO_TIMESTAMP");
+
+ if (so_timestampns &&
+ setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS,
+ &enabled, sizeof(enabled)) < 0)
+ bail("setsockopt SO_TIMESTAMPNS");
+
+ if (so_timestamping_flags &&
+ setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING,
+ &so_timestamping_flags,
+ sizeof(so_timestamping_flags)) < 0)
+ bail("setsockopt SO_TIMESTAMPING");
+
+ /* request IP_PKTINFO for debugging purposes */
+ if (setsockopt(sock, SOL_IP, IP_PKTINFO,
+ &enabled, sizeof(enabled)) < 0)
+ printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno));
+
+ /* verify socket options */
+ len = sizeof(val);
+ if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0)
+ printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno));
+ else
+ printf("SO_TIMESTAMP %d\n", val);
+
+ if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0)
+ printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS",
+ strerror(errno));
+ else
+ printf("SO_TIMESTAMPNS %d\n", val);
+
+ if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) {
+ printf("%s: %s\n", "getsockopt SO_TIMESTAMPING",
+ strerror(errno));
+ } else {
+ printf("SO_TIMESTAMPING %d\n", val);
+ if (val != so_timestamping_flags)
+ printf(" not the expected value %d\n",
+ so_timestamping_flags);
+ }
+
+ /* send packets forever every five seconds */
+ gettimeofday(&next, 0);
+ next.tv_sec = (next.tv_sec + 1) / 5 * 5;
+ next.tv_usec = 0;
+ while (1) {
+ struct timeval now;
+ struct timeval delta;
+ long delta_us;
+ int res;
+ fd_set readfs, errorfs;
+
+ gettimeofday(&now, 0);
+ delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 +
+ (long)(next.tv_usec - now.tv_usec);
+ if (delta_us > 0) {
+ /* continue waiting for timeout or data */
+ delta.tv_sec = delta_us / 1000000;
+ delta.tv_usec = delta_us % 1000000;
+
+ FD_ZERO(&readfs);
+ FD_ZERO(&errorfs);
+ FD_SET(sock, &readfs);
+ FD_SET(sock, &errorfs);
+ printf("%ld.%06ld: select %ldus\n",
+ (long)now.tv_sec, (long)now.tv_usec,
+ delta_us);
+ res = select(sock + 1, &readfs, 0, &errorfs, &delta);
+ gettimeofday(&now, 0);
+ printf("%ld.%06ld: select returned: %d, %s\n",
+ (long)now.tv_sec, (long)now.tv_usec,
+ res,
+ res < 0 ? strerror(errno) : "success");
+ if (res > 0) {
+ if (FD_ISSET(sock, &readfs))
+ printf("ready for reading\n");
+ if (FD_ISSET(sock, &errorfs))
+ printf("has error\n");
+ recvpacket(sock, 0,
+ siocgstamp,
+ siocgstampns);
+ recvpacket(sock, MSG_ERRQUEUE,
+ siocgstamp,
+ siocgstampns);
+ }
+ } else {
+ /* write one packet */
+ sendpacket(sock,
+ (struct sockaddr *)&addr,
+ sizeof(addr));
+ next.tv_sec += 5;
+ continue;
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/networking/timestamping/txtimestamp.c
new file mode 100644
index 000000000..81a98a240
--- /dev/null
+++ b/tools/testing/selftests/networking/timestamping/txtimestamp.c
@@ -0,0 +1,564 @@
+/*
+ * Copyright 2014 Google Inc.
+ * Author: willemb@google.com (Willem de Bruijn)
+ *
+ * Test software tx timestamping, including
+ *
+ * - SCHED, SND and ACK timestamps
+ * - RAW, UDP and TCP
+ * - IPv4 and IPv6
+ * - various packet sizes (to test GSO and TSO)
+ *
+ * Consult the command line arguments for help on running
+ * the various testcases.
+ *
+ * This test requires a dummy TCP server.
+ * A simple `nc6 [-u] -l -p $DESTPORT` will do
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/types.h>
+#include <error.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/errqueue.h>
+#include <linux/if_ether.h>
+#include <linux/net_tstamp.h>
+#include <netdb.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <netinet/tcp.h>
+#include <netpacket/packet.h>
+#include <poll.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+/* command line parameters */
+static int cfg_proto = SOCK_STREAM;
+static int cfg_ipproto = IPPROTO_TCP;
+static int cfg_num_pkts = 4;
+static int do_ipv4 = 1;
+static int do_ipv6 = 1;
+static int cfg_payload_len = 10;
+static int cfg_poll_timeout = 100;
+static bool cfg_show_payload;
+static bool cfg_do_pktinfo;
+static bool cfg_loop_nodata;
+static bool cfg_no_delay;
+static uint16_t dest_port = 9000;
+
+static struct sockaddr_in daddr;
+static struct sockaddr_in6 daddr6;
+static struct timespec ts_prev;
+
+static void __print_timestamp(const char *name, struct timespec *cur,
+ uint32_t key, int payload_len)
+{
+ if (!(cur->tv_sec | cur->tv_nsec))
+ return;
+
+ fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)",
+ name, cur->tv_sec, cur->tv_nsec / 1000,
+ key, payload_len);
+
+ if ((ts_prev.tv_sec | ts_prev.tv_nsec)) {
+ int64_t cur_ms, prev_ms;
+
+ cur_ms = (long) cur->tv_sec * 1000 * 1000;
+ cur_ms += cur->tv_nsec / 1000;
+
+ prev_ms = (long) ts_prev.tv_sec * 1000 * 1000;
+ prev_ms += ts_prev.tv_nsec / 1000;
+
+ fprintf(stderr, " (%+" PRId64 " us)", cur_ms - prev_ms);
+ }
+
+ ts_prev = *cur;
+ fprintf(stderr, "\n");
+}
+
+static void print_timestamp_usr(void)
+{
+ struct timespec ts;
+ struct timeval tv; /* avoid dependency on -lrt */
+
+ gettimeofday(&tv, NULL);
+ ts.tv_sec = tv.tv_sec;
+ ts.tv_nsec = tv.tv_usec * 1000;
+
+ __print_timestamp(" USR", &ts, 0, 0);
+}
+
+static void print_timestamp(struct scm_timestamping *tss, int tstype,
+ int tskey, int payload_len)
+{
+ const char *tsname;
+
+ switch (tstype) {
+ case SCM_TSTAMP_SCHED:
+ tsname = " ENQ";
+ break;
+ case SCM_TSTAMP_SND:
+ tsname = " SND";
+ break;
+ case SCM_TSTAMP_ACK:
+ tsname = " ACK";
+ break;
+ default:
+ error(1, 0, "unknown timestamp type: %u",
+ tstype);
+ }
+ __print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
+}
+
+/* TODO: convert to check_and_print payload once API is stable */
+static void print_payload(char *data, int len)
+{
+ int i;
+
+ if (!len)
+ return;
+
+ if (len > 70)
+ len = 70;
+
+ fprintf(stderr, "payload: ");
+ for (i = 0; i < len; i++)
+ fprintf(stderr, "%02hhx ", data[i]);
+ fprintf(stderr, "\n");
+}
+
+static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr)
+{
+ char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN];
+
+ fprintf(stderr, " pktinfo: ifindex=%u src=%s dst=%s\n",
+ ifindex,
+ saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown",
+ daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown");
+}
+
+static void __poll(int fd)
+{
+ struct pollfd pollfd;
+ int ret;
+
+ memset(&pollfd, 0, sizeof(pollfd));
+ pollfd.fd = fd;
+ ret = poll(&pollfd, 1, cfg_poll_timeout);
+ if (ret != 1)
+ error(1, errno, "poll");
+}
+
+static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
+{
+ struct sock_extended_err *serr = NULL;
+ struct scm_timestamping *tss = NULL;
+ struct cmsghdr *cm;
+ int batch = 0;
+
+ for (cm = CMSG_FIRSTHDR(msg);
+ cm && cm->cmsg_len;
+ cm = CMSG_NXTHDR(msg, cm)) {
+ if (cm->cmsg_level == SOL_SOCKET &&
+ cm->cmsg_type == SCM_TIMESTAMPING) {
+ tss = (void *) CMSG_DATA(cm);
+ } else if ((cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_RECVERR) ||
+ (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_RECVERR)) {
+ serr = (void *) CMSG_DATA(cm);
+ if (serr->ee_errno != ENOMSG ||
+ serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) {
+ fprintf(stderr, "unknown ip error %d %d\n",
+ serr->ee_errno,
+ serr->ee_origin);
+ serr = NULL;
+ }
+ } else if (cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_PKTINFO) {
+ struct in_pktinfo *info = (void *) CMSG_DATA(cm);
+ print_pktinfo(AF_INET, info->ipi_ifindex,
+ &info->ipi_spec_dst, &info->ipi_addr);
+ } else if (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_PKTINFO) {
+ struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm);
+ print_pktinfo(AF_INET6, info6->ipi6_ifindex,
+ NULL, &info6->ipi6_addr);
+ } else
+ fprintf(stderr, "unknown cmsg %d,%d\n",
+ cm->cmsg_level, cm->cmsg_type);
+
+ if (serr && tss) {
+ print_timestamp(tss, serr->ee_info, serr->ee_data,
+ payload_len);
+ serr = NULL;
+ tss = NULL;
+ batch++;
+ }
+ }
+
+ if (batch > 1)
+ fprintf(stderr, "batched %d timestamps\n", batch);
+}
+
+static int recv_errmsg(int fd)
+{
+ static char ctrl[1024 /* overprovision*/];
+ static struct msghdr msg;
+ struct iovec entry;
+ static char *data;
+ int ret = 0;
+
+ data = malloc(cfg_payload_len);
+ if (!data)
+ error(1, 0, "malloc");
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&entry, 0, sizeof(entry));
+ memset(ctrl, 0, sizeof(ctrl));
+
+ entry.iov_base = data;
+ entry.iov_len = cfg_payload_len;
+ msg.msg_iov = &entry;
+ msg.msg_iovlen = 1;
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+ msg.msg_control = ctrl;
+ msg.msg_controllen = sizeof(ctrl);
+
+ ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+ if (ret == -1 && errno != EAGAIN)
+ error(1, errno, "recvmsg");
+
+ if (ret >= 0) {
+ __recv_errmsg_cmsg(&msg, ret);
+ if (cfg_show_payload)
+ print_payload(data, cfg_payload_len);
+ }
+
+ free(data);
+ return ret == -1;
+}
+
+static void do_test(int family, unsigned int opt)
+{
+ char *buf;
+ int fd, i, val = 1, total_len;
+
+ if (family == AF_INET6 && cfg_proto != SOCK_STREAM) {
+ /* due to lack of checksum generation code */
+ fprintf(stderr, "test: skipping datagram over IPv6\n");
+ return;
+ }
+
+ total_len = cfg_payload_len;
+ if (cfg_proto == SOCK_RAW) {
+ total_len += sizeof(struct udphdr);
+ if (cfg_ipproto == IPPROTO_RAW)
+ total_len += sizeof(struct iphdr);
+ }
+
+ buf = malloc(total_len);
+ if (!buf)
+ error(1, 0, "malloc");
+
+ fd = socket(family, cfg_proto, cfg_ipproto);
+ if (fd < 0)
+ error(1, errno, "socket");
+
+ if (cfg_proto == SOCK_STREAM) {
+ if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
+ (char*) &val, sizeof(val)))
+ error(1, 0, "setsockopt no nagle");
+
+ if (family == PF_INET) {
+ if (connect(fd, (void *) &daddr, sizeof(daddr)))
+ error(1, errno, "connect ipv4");
+ } else {
+ if (connect(fd, (void *) &daddr6, sizeof(daddr6)))
+ error(1, errno, "connect ipv6");
+ }
+ }
+
+ if (cfg_do_pktinfo) {
+ if (family == AF_INET6) {
+ if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO,
+ &val, sizeof(val)))
+ error(1, errno, "setsockopt pktinfo ipv6");
+ } else {
+ if (setsockopt(fd, SOL_IP, IP_PKTINFO,
+ &val, sizeof(val)))
+ error(1, errno, "setsockopt pktinfo ipv4");
+ }
+ }
+
+ opt |= SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_CMSG |
+ SOF_TIMESTAMPING_OPT_ID;
+ if (cfg_loop_nodata)
+ opt |= SOF_TIMESTAMPING_OPT_TSONLY;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
+ (char *) &opt, sizeof(opt)))
+ error(1, 0, "setsockopt timestamping");
+
+ for (i = 0; i < cfg_num_pkts; i++) {
+ memset(&ts_prev, 0, sizeof(ts_prev));
+ memset(buf, 'a' + i, total_len);
+
+ if (cfg_proto == SOCK_RAW) {
+ struct udphdr *udph;
+ int off = 0;
+
+ if (cfg_ipproto == IPPROTO_RAW) {
+ struct iphdr *iph = (void *) buf;
+
+ memset(iph, 0, sizeof(*iph));
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->ttl = 2;
+ iph->daddr = daddr.sin_addr.s_addr;
+ iph->protocol = IPPROTO_UDP;
+ /* kernel writes saddr, csum, len */
+
+ off = sizeof(*iph);
+ }
+
+ udph = (void *) buf + off;
+ udph->source = ntohs(9000); /* random spoof */
+ udph->dest = ntohs(dest_port);
+ udph->len = ntohs(sizeof(*udph) + cfg_payload_len);
+ udph->check = 0; /* not allowed for IPv6 */
+ }
+
+ print_timestamp_usr();
+ if (cfg_proto != SOCK_STREAM) {
+ if (family == PF_INET)
+ val = sendto(fd, buf, total_len, 0, (void *) &daddr, sizeof(daddr));
+ else
+ val = sendto(fd, buf, total_len, 0, (void *) &daddr6, sizeof(daddr6));
+ } else {
+ val = send(fd, buf, cfg_payload_len, 0);
+ }
+ if (val != total_len)
+ error(1, errno, "send");
+
+ /* wait for all errors to be queued, else ACKs arrive OOO */
+ if (!cfg_no_delay)
+ usleep(50 * 1000);
+
+ __poll(fd);
+
+ while (!recv_errmsg(fd)) {}
+ }
+
+ if (close(fd))
+ error(1, errno, "close");
+
+ free(buf);
+ usleep(400 * 1000);
+}
+
+static void __attribute__((noreturn)) usage(const char *filepath)
+{
+ fprintf(stderr, "\nUsage: %s [options] hostname\n"
+ "\nwhere options are:\n"
+ " -4: only IPv4\n"
+ " -6: only IPv6\n"
+ " -h: show this message\n"
+ " -c N: number of packets for each test\n"
+ " -D: no delay between packets\n"
+ " -F: poll() waits forever for an event\n"
+ " -I: request PKTINFO\n"
+ " -l N: send N bytes at a time\n"
+ " -n: set no-payload option\n"
+ " -r: use raw\n"
+ " -R: use raw (IP_HDRINCL)\n"
+ " -p N: connect to port N\n"
+ " -u: use udp\n"
+ " -x: show payload (up to 70 bytes)\n",
+ filepath);
+ exit(1);
+}
+
+static void parse_opt(int argc, char **argv)
+{
+ int proto_count = 0;
+ char c;
+
+ while ((c = getopt(argc, argv, "46c:DFhIl:np:rRux")) != -1) {
+ switch (c) {
+ case '4':
+ do_ipv6 = 0;
+ break;
+ case '6':
+ do_ipv4 = 0;
+ break;
+ case 'c':
+ cfg_num_pkts = strtoul(optarg, NULL, 10);
+ break;
+ case 'D':
+ cfg_no_delay = true;
+ break;
+ case 'F':
+ cfg_poll_timeout = -1;
+ break;
+ case 'I':
+ cfg_do_pktinfo = true;
+ break;
+ case 'n':
+ cfg_loop_nodata = true;
+ break;
+ case 'r':
+ proto_count++;
+ cfg_proto = SOCK_RAW;
+ cfg_ipproto = IPPROTO_UDP;
+ break;
+ case 'R':
+ proto_count++;
+ cfg_proto = SOCK_RAW;
+ cfg_ipproto = IPPROTO_RAW;
+ break;
+ case 'u':
+ proto_count++;
+ cfg_proto = SOCK_DGRAM;
+ cfg_ipproto = IPPROTO_UDP;
+ break;
+ case 'l':
+ cfg_payload_len = strtoul(optarg, NULL, 10);
+ break;
+ case 'p':
+ dest_port = strtoul(optarg, NULL, 10);
+ break;
+ case 'x':
+ cfg_show_payload = true;
+ break;
+ case 'h':
+ default:
+ usage(argv[0]);
+ }
+ }
+
+ if (!cfg_payload_len)
+ error(1, 0, "payload may not be nonzero");
+ if (cfg_proto != SOCK_STREAM && cfg_payload_len > 1472)
+ error(1, 0, "udp packet might exceed expected MTU");
+ if (!do_ipv4 && !do_ipv6)
+ error(1, 0, "pass -4 or -6, not both");
+ if (proto_count > 1)
+ error(1, 0, "pass -r, -R or -u, not multiple");
+
+ if (optind != argc - 1)
+ error(1, 0, "missing required hostname argument");
+}
+
+static void resolve_hostname(const char *hostname)
+{
+ struct addrinfo *addrs, *cur;
+ int have_ipv4 = 0, have_ipv6 = 0;
+
+ if (getaddrinfo(hostname, NULL, NULL, &addrs))
+ error(1, errno, "getaddrinfo");
+
+ cur = addrs;
+ while (cur && !have_ipv4 && !have_ipv6) {
+ if (!have_ipv4 && cur->ai_family == AF_INET) {
+ memcpy(&daddr, cur->ai_addr, sizeof(daddr));
+ daddr.sin_port = htons(dest_port);
+ have_ipv4 = 1;
+ }
+ else if (!have_ipv6 && cur->ai_family == AF_INET6) {
+ memcpy(&daddr6, cur->ai_addr, sizeof(daddr6));
+ daddr6.sin6_port = htons(dest_port);
+ have_ipv6 = 1;
+ }
+ cur = cur->ai_next;
+ }
+ if (addrs)
+ freeaddrinfo(addrs);
+
+ do_ipv4 &= have_ipv4;
+ do_ipv6 &= have_ipv6;
+}
+
+static void do_main(int family)
+{
+ fprintf(stderr, "family: %s\n",
+ family == PF_INET ? "INET" : "INET6");
+
+ fprintf(stderr, "test SND\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE);
+
+ fprintf(stderr, "test ENQ\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SCHED);
+
+ fprintf(stderr, "test ENQ + SND\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_TX_SOFTWARE);
+
+ if (cfg_proto == SOCK_STREAM) {
+ fprintf(stderr, "\ntest ACK\n");
+ do_test(family, SOF_TIMESTAMPING_TX_ACK);
+
+ fprintf(stderr, "\ntest SND + ACK\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK);
+
+ fprintf(stderr, "\ntest ENQ + SND + ACK\n");
+ do_test(family, SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK);
+ }
+}
+
+const char *sock_names[] = { NULL, "TCP", "UDP", "RAW" };
+
+int main(int argc, char **argv)
+{
+ if (argc == 1)
+ usage(argv[0]);
+
+ parse_opt(argc, argv);
+ resolve_hostname(argv[argc - 1]);
+
+ fprintf(stderr, "protocol: %s\n", sock_names[cfg_proto]);
+ fprintf(stderr, "payload: %u\n", cfg_payload_len);
+ fprintf(stderr, "server port: %u\n", dest_port);
+ fprintf(stderr, "\n");
+
+ if (do_ipv4)
+ do_main(PF_INET);
+ if (do_ipv6)
+ do_main(PF_INET6);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/nsfs/.gitignore b/tools/testing/selftests/nsfs/.gitignore
new file mode 100644
index 000000000..2ab2c824c
--- /dev/null
+++ b/tools/testing/selftests/nsfs/.gitignore
@@ -0,0 +1,2 @@
+owner
+pidns
diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/nsfs/Makefile
new file mode 100644
index 000000000..9ff7c7f80
--- /dev/null
+++ b/tools/testing/selftests/nsfs/Makefile
@@ -0,0 +1,5 @@
+TEST_GEN_PROGS := owner pidns
+
+CFLAGS := -Wall -Werror
+
+include ../lib.mk
diff --git a/tools/testing/selftests/nsfs/config b/tools/testing/selftests/nsfs/config
new file mode 100644
index 000000000..598d0a225
--- /dev/null
+++ b/tools/testing/selftests/nsfs/config
@@ -0,0 +1,3 @@
+CONFIG_USER_NS=y
+CONFIG_UTS_NS=y
+CONFIG_PID_NS=y
diff --git a/tools/testing/selftests/nsfs/owner.c b/tools/testing/selftests/nsfs/owner.c
new file mode 100644
index 000000000..96a976c74
--- /dev/null
+++ b/tools/testing/selftests/nsfs/owner.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#define NSIO 0xb7
+#define NS_GET_USERNS _IO(NSIO, 0x1)
+
+#define pr_err(fmt, ...) \
+ ({ \
+ fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+ __func__, __LINE__, ##__VA_ARGS__); \
+ 1; \
+ })
+
+int main(int argc, char *argvp[])
+{
+ int pfd[2], ns, uns, init_uns;
+ struct stat st1, st2;
+ char path[128];
+ pid_t pid;
+ char c;
+
+ if (pipe(pfd))
+ return 1;
+
+ pid = fork();
+ if (pid < 0)
+ return pr_err("fork");
+ if (pid == 0) {
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
+ if (unshare(CLONE_NEWUTS | CLONE_NEWUSER))
+ return pr_err("unshare");
+ close(pfd[0]);
+ close(pfd[1]);
+ while (1)
+ sleep(1);
+ return 0;
+ }
+ close(pfd[1]);
+ if (read(pfd[0], &c, 1) != 0)
+ return pr_err("Unable to read from pipe");
+ close(pfd[0]);
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/uts", pid);
+ ns = open(path, O_RDONLY);
+ if (ns < 0)
+ return pr_err("Unable to open %s", path);
+
+ uns = ioctl(ns, NS_GET_USERNS);
+ if (uns < 0)
+ return pr_err("Unable to get an owning user namespace");
+
+ if (fstat(uns, &st1))
+ return pr_err("fstat");
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+ if (stat(path, &st2))
+ return pr_err("stat");
+
+ if (st1.st_ino != st2.st_ino)
+ return pr_err("NS_GET_USERNS returned a wrong namespace");
+
+ init_uns = ioctl(uns, NS_GET_USERNS);
+ if (uns < 0)
+ return pr_err("Unable to get an owning user namespace");
+
+ if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM)
+ return pr_err("Don't get EPERM");
+
+ if (unshare(CLONE_NEWUSER))
+ return pr_err("unshare");
+
+ if (ioctl(ns, NS_GET_USERNS) >= 0 || errno != EPERM)
+ return pr_err("Don't get EPERM");
+ if (ioctl(init_uns, NS_GET_USERNS) >= 0 || errno != EPERM)
+ return pr_err("Don't get EPERM");
+
+ kill(pid, SIGKILL);
+ wait(NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c
new file mode 100644
index 000000000..e0d86e166
--- /dev/null
+++ b/tools/testing/selftests/nsfs/pidns.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+
+#define pr_err(fmt, ...) \
+ ({ \
+ fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+ __func__, __LINE__, ##__VA_ARGS__); \
+ 1; \
+ })
+
+#define NSIO 0xb7
+#define NS_GET_USERNS _IO(NSIO, 0x1)
+#define NS_GET_PARENT _IO(NSIO, 0x2)
+
+#define __stack_aligned__ __attribute__((aligned(16)))
+struct cr_clone_arg {
+ char stack[128] __stack_aligned__;
+ char stack_ptr[0];
+};
+
+static int child(void *args)
+{
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
+ while (1)
+ sleep(1);
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ char *ns_strs[] = {"pid", "user"};
+ char path[] = "/proc/0123456789/ns/pid";
+ struct cr_clone_arg ca;
+ struct stat st1, st2;
+ int ns, pns, i;
+ pid_t pid;
+
+ pid = clone(child, ca.stack_ptr, CLONE_NEWUSER | CLONE_NEWPID | SIGCHLD, NULL);
+ if (pid < 0)
+ return pr_err("clone");
+
+ for (i = 0; i < 2; i++) {
+ snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns_strs[i]);
+ ns = open(path, O_RDONLY);
+ if (ns < 0)
+ return pr_err("Unable to open %s", path);
+
+ pns = ioctl(ns, NS_GET_PARENT);
+ if (pns < 0)
+ return pr_err("Unable to get a parent pidns");
+
+ snprintf(path, sizeof(path), "/proc/self/ns/%s", ns_strs[i]);
+ if (stat(path, &st2))
+ return pr_err("Unable to stat %s", path);
+ if (fstat(pns, &st1))
+ return pr_err("Unable to stat the parent pidns");
+ if (st1.st_ino != st2.st_ino)
+ return pr_err("NS_GET_PARENT returned a wrong namespace");
+
+ if (ioctl(pns, NS_GET_PARENT) >= 0 || errno != EPERM)
+ return pr_err("Don't get EPERM");
+ }
+
+ kill(pid, SIGKILL);
+ wait(NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
new file mode 100755
index 000000000..17ca36403
--- /dev/null
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -0,0 +1,590 @@
+#!/bin/bash
+# Copyright (c) 2016 Microsemi. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Author: Logan Gunthorpe <logang@deltatee.com>
+
+REMOTE_HOST=
+LIST_DEVS=FALSE
+
+DEBUGFS=${DEBUGFS-/sys/kernel/debug}
+
+PERF_RUN_ORDER=32
+MAX_MW_SIZE=0
+RUN_DMA_TESTS=
+DONT_CLEANUP=
+MW_SIZE=65536
+
+function show_help()
+{
+ echo "Usage: $0 [OPTIONS] LOCAL_DEV REMOTE_DEV"
+ echo "Run tests on a pair of NTB endpoints."
+ echo
+ echo "If the NTB device loops back to the same host then,"
+ echo "just specifying the two PCI ids on the command line is"
+ echo "sufficient. Otherwise, if the NTB link spans two hosts"
+ echo "use the -r option to specify the hostname for the remote"
+ echo "device. SSH will then be used to test the remote side."
+ echo "An SSH key between the root users of the host would then"
+ echo "be highly recommended."
+ echo
+ echo "Options:"
+ echo " -C don't cleanup ntb modules on exit"
+ echo " -h show this help message"
+ echo " -l list available local and remote PCI ids"
+ echo " -r REMOTE_HOST specify the remote's hostname to connect"
+ echo " to for the test (using ssh)"
+ echo " -m MW_SIZE memory window size for ntb_tool"
+ echo " (default: $MW_SIZE)"
+ echo " -d run dma tests for ntb_perf"
+ echo " -p ORDER total data order for ntb_perf"
+ echo " (default: $PERF_RUN_ORDER)"
+ echo " -w MAX_MW_SIZE maxmium memory window size for ntb_perf"
+ echo
+}
+
+function parse_args()
+{
+ OPTIND=0
+ while getopts "b:Cdhlm:r:p:w:" opt; do
+ case "$opt" in
+ C) DONT_CLEANUP=1 ;;
+ d) RUN_DMA_TESTS=1 ;;
+ h) show_help; exit 0 ;;
+ l) LIST_DEVS=TRUE ;;
+ m) MW_SIZE=${OPTARG} ;;
+ r) REMOTE_HOST=${OPTARG} ;;
+ p) PERF_RUN_ORDER=${OPTARG} ;;
+ w) MAX_MW_SIZE=${OPTARG} ;;
+ \?)
+ echo "Invalid option: -$OPTARG" >&2
+ exit 1
+ ;;
+ esac
+ done
+}
+
+parse_args "$@"
+shift $((OPTIND-1))
+LOCAL_DEV=$1
+shift
+parse_args "$@"
+shift $((OPTIND-1))
+REMOTE_DEV=$1
+shift
+parse_args "$@"
+
+set -e
+
+function _modprobe()
+{
+ modprobe "$@"
+
+ if [[ "$REMOTE_HOST" != "" ]]; then
+ ssh "$REMOTE_HOST" modprobe "$@"
+ fi
+}
+
+function split_remote()
+{
+ VPATH=$1
+ REMOTE=
+
+ if [[ "$VPATH" == *":/"* ]]; then
+ REMOTE=${VPATH%%:*}
+ VPATH=${VPATH#*:}
+ fi
+}
+
+function read_file()
+{
+ split_remote $1
+ if [[ "$REMOTE" != "" ]]; then
+ ssh "$REMOTE" cat "$VPATH"
+ else
+ cat "$VPATH"
+ fi
+}
+
+function write_file()
+{
+ split_remote $2
+ VALUE=$1
+
+ if [[ "$REMOTE" != "" ]]; then
+ ssh "$REMOTE" "echo \"$VALUE\" > \"$VPATH\""
+ else
+ echo "$VALUE" > "$VPATH"
+ fi
+}
+
+function check_file()
+{
+ split_remote $1
+
+ if [[ "$REMOTE" != "" ]]; then
+ ssh "$REMOTE" "[[ -e ${VPATH} ]]"
+ else
+ [[ -e ${VPATH} ]]
+ fi
+}
+
+function subdirname()
+{
+ echo $(basename $(dirname $1)) 2> /dev/null
+}
+
+function find_pidx()
+{
+ PORT=$1
+ PPATH=$2
+
+ for ((i = 0; i < 64; i++)); do
+ PEER_DIR="$PPATH/peer$i"
+
+ check_file ${PEER_DIR} || break
+
+ PEER_PORT=$(read_file "${PEER_DIR}/port")
+ if [[ ${PORT} -eq $PEER_PORT ]]; then
+ echo $i
+ return 0
+ fi
+ done
+
+ return 1
+}
+
+function port_test()
+{
+ LOC=$1
+ REM=$2
+
+ echo "Running port tests on: $(basename $LOC) / $(basename $REM)"
+
+ LOCAL_PORT=$(read_file "$LOC/port")
+ REMOTE_PORT=$(read_file "$REM/port")
+
+ LOCAL_PIDX=$(find_pidx ${REMOTE_PORT} "$LOC")
+ REMOTE_PIDX=$(find_pidx ${LOCAL_PORT} "$REM")
+
+ echo "Local port ${LOCAL_PORT} with index ${REMOTE_PIDX} on remote host"
+ echo "Peer port ${REMOTE_PORT} with index ${LOCAL_PIDX} on local host"
+
+ echo " Passed"
+}
+
+function link_test()
+{
+ LOC=$1
+ REM=$2
+ EXP=0
+
+ echo "Running link tests on: $(subdirname $LOC) / $(subdirname $REM)"
+
+ if ! write_file "N" "$LOC/../link" 2> /dev/null; then
+ echo " Unsupported"
+ return
+ fi
+
+ write_file "N" "$LOC/link_event"
+
+ if [[ $(read_file "$REM/link") != "N" ]]; then
+ echo "Expected link to be down in $REM/link" >&2
+ exit -1
+ fi
+
+ write_file "Y" "$LOC/../link"
+
+ echo " Passed"
+}
+
+function doorbell_test()
+{
+ LOC=$1
+ REM=$2
+ EXP=0
+
+ echo "Running db tests on: $(basename $LOC) / $(basename $REM)"
+
+ DB_VALID_MASK=$(read_file "$LOC/db_valid_mask")
+
+ write_file "c $DB_VALID_MASK" "$REM/db"
+
+ for ((i = 0; i < 64; i++)); do
+ DB=$(read_file "$REM/db")
+ if [[ "$DB" -ne "$EXP" ]]; then
+ echo "Doorbell doesn't match expected value $EXP " \
+ "in $REM/db" >&2
+ exit -1
+ fi
+
+ let "MASK = (1 << $i) & $DB_VALID_MASK" || true
+ let "EXP = $EXP | $MASK" || true
+
+ write_file "s $MASK" "$LOC/peer_db"
+ done
+
+ write_file "c $DB_VALID_MASK" "$REM/db_mask"
+ write_file $DB_VALID_MASK "$REM/db_event"
+ write_file "s $DB_VALID_MASK" "$REM/db_mask"
+
+ write_file "c $DB_VALID_MASK" "$REM/db"
+
+ echo " Passed"
+}
+
+function get_files_count()
+{
+ NAME=$1
+ LOC=$2
+
+ split_remote $LOC
+
+ if [[ "$REMOTE" == "" ]]; then
+ echo $(ls -1 "$VPATH"/${NAME}* 2>/dev/null | wc -l)
+ else
+ echo $(ssh "$REMOTE" "ls -1 \"$VPATH\"/${NAME}* | \
+ wc -l" 2> /dev/null)
+ fi
+}
+
+function scratchpad_test()
+{
+ LOC=$1
+ REM=$2
+
+ echo "Running spad tests on: $(subdirname $LOC) / $(subdirname $REM)"
+
+ CNT=$(get_files_count "spad" "$LOC")
+
+ if [[ $CNT -eq 0 ]]; then
+ echo " Unsupported"
+ return
+ fi
+
+ for ((i = 0; i < $CNT; i++)); do
+ VAL=$RANDOM
+ write_file "$VAL" "$LOC/spad$i"
+ RVAL=$(read_file "$REM/../spad$i")
+
+ if [[ "$VAL" -ne "$RVAL" ]]; then
+ echo "Scratchpad $i value $RVAL doesn't match $VAL" >&2
+ exit -1
+ fi
+ done
+
+ echo " Passed"
+}
+
+function message_test()
+{
+ LOC=$1
+ REM=$2
+
+ echo "Running msg tests on: $(subdirname $LOC) / $(subdirname $REM)"
+
+ CNT=$(get_files_count "msg" "$LOC")
+
+ if [[ $CNT -eq 0 ]]; then
+ echo " Unsupported"
+ return
+ fi
+
+ MSG_OUTBITS_MASK=$(read_file "$LOC/../msg_inbits")
+ MSG_INBITS_MASK=$(read_file "$REM/../msg_inbits")
+
+ write_file "c $MSG_OUTBITS_MASK" "$LOC/../msg_sts"
+ write_file "c $MSG_INBITS_MASK" "$REM/../msg_sts"
+
+ for ((i = 0; i < $CNT; i++)); do
+ VAL=$RANDOM
+ write_file "$VAL" "$LOC/msg$i"
+ RVAL=$(read_file "$REM/../msg$i")
+
+ if [[ "$VAL" -ne "${RVAL%%<-*}" ]]; then
+ echo "Message $i value $RVAL doesn't match $VAL" >&2
+ exit -1
+ fi
+ done
+
+ echo " Passed"
+}
+
+function get_number()
+{
+ KEY=$1
+
+ sed -n "s/^\(${KEY}\)[ \t]*\(0x[0-9a-fA-F]*\)\(\[p\]\)\?$/\2/p"
+}
+
+function mw_alloc()
+{
+ IDX=$1
+ LOC=$2
+ REM=$3
+
+ write_file $MW_SIZE "$LOC/mw_trans$IDX"
+
+ INB_MW=$(read_file "$LOC/mw_trans$IDX")
+ MW_ALIGNED_SIZE=$(echo "$INB_MW" | get_number "Window Size")
+ MW_DMA_ADDR=$(echo "$INB_MW" | get_number "DMA Address")
+
+ write_file "$MW_DMA_ADDR:$(($MW_ALIGNED_SIZE))" "$REM/peer_mw_trans$IDX"
+
+ if [[ $MW_SIZE -ne $MW_ALIGNED_SIZE ]]; then
+ echo "MW $IDX size aligned to $MW_ALIGNED_SIZE"
+ fi
+}
+
+function write_mw()
+{
+ split_remote $2
+
+ if [[ "$REMOTE" != "" ]]; then
+ ssh "$REMOTE" \
+ dd if=/dev/urandom "of=$VPATH" 2> /dev/null || true
+ else
+ dd if=/dev/urandom "of=$VPATH" 2> /dev/null || true
+ fi
+}
+
+function mw_check()
+{
+ IDX=$1
+ LOC=$2
+ REM=$3
+
+ write_mw "$LOC/mw$IDX"
+
+ split_remote "$LOC/mw$IDX"
+ if [[ "$REMOTE" == "" ]]; then
+ A=$VPATH
+ else
+ A=/tmp/ntb_test.$$.A
+ ssh "$REMOTE" cat "$VPATH" > "$A"
+ fi
+
+ split_remote "$REM/peer_mw$IDX"
+ if [[ "$REMOTE" == "" ]]; then
+ B=$VPATH
+ else
+ B=/tmp/ntb_test.$$.B
+ ssh "$REMOTE" cat "$VPATH" > "$B"
+ fi
+
+ cmp -n $MW_ALIGNED_SIZE "$A" "$B"
+ if [[ $? != 0 ]]; then
+ echo "Memory window $MW did not match!" >&2
+ fi
+
+ if [[ "$A" == "/tmp/*" ]]; then
+ rm "$A"
+ fi
+
+ if [[ "$B" == "/tmp/*" ]]; then
+ rm "$B"
+ fi
+}
+
+function mw_free()
+{
+ IDX=$1
+ LOC=$2
+ REM=$3
+
+ write_file "$MW_DMA_ADDR:0" "$REM/peer_mw_trans$IDX"
+
+ write_file 0 "$LOC/mw_trans$IDX"
+}
+
+function mw_test()
+{
+ LOC=$1
+ REM=$2
+
+ CNT=$(get_files_count "mw_trans" "$LOC")
+
+ for ((i = 0; i < $CNT; i++)); do
+ echo "Running mw$i tests on: $(subdirname $LOC) / " \
+ "$(subdirname $REM)"
+
+ mw_alloc $i $LOC $REM
+
+ mw_check $i $LOC $REM
+
+ mw_free $i $LOC $REM
+
+ echo " Passed"
+ done
+
+}
+
+function pingpong_test()
+{
+ LOC=$1
+ REM=$2
+
+ echo "Running ping pong tests on: $(basename $LOC) / $(basename $REM)"
+
+ LOC_START=$(read_file "$LOC/count")
+ REM_START=$(read_file "$REM/count")
+
+ sleep 7
+
+ LOC_END=$(read_file "$LOC/count")
+ REM_END=$(read_file "$REM/count")
+
+ if [[ $LOC_START == $LOC_END ]] || [[ $REM_START == $REM_END ]]; then
+ echo "Ping pong counter not incrementing!" >&2
+ exit 1
+ fi
+
+ echo " Passed"
+}
+
+function perf_test()
+{
+ USE_DMA=$1
+
+ if [[ $USE_DMA == "1" ]]; then
+ WITH="with"
+ else
+ WITH="without"
+ fi
+
+ _modprobe ntb_perf total_order=$PERF_RUN_ORDER \
+ max_mw_size=$MAX_MW_SIZE use_dma=$USE_DMA
+
+ echo "Running local perf test $WITH DMA"
+ write_file "$LOCAL_PIDX" "$LOCAL_PERF/run"
+ echo -n " "
+ read_file "$LOCAL_PERF/run"
+ echo " Passed"
+
+ echo "Running remote perf test $WITH DMA"
+ write_file "$REMOTE_PIDX" "$REMOTE_PERF/run"
+ echo -n " "
+ read_file "$REMOTE_PERF/run"
+ echo " Passed"
+
+ _modprobe -r ntb_perf
+}
+
+function ntb_tool_tests()
+{
+ LOCAL_TOOL="$DEBUGFS/ntb_tool/$LOCAL_DEV"
+ REMOTE_TOOL="$REMOTE_HOST:$DEBUGFS/ntb_tool/$REMOTE_DEV"
+
+ echo "Starting ntb_tool tests..."
+
+ _modprobe ntb_tool
+
+ port_test "$LOCAL_TOOL" "$REMOTE_TOOL"
+
+ LOCAL_PEER_TOOL="$LOCAL_TOOL/peer$LOCAL_PIDX"
+ REMOTE_PEER_TOOL="$REMOTE_TOOL/peer$REMOTE_PIDX"
+
+ link_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+ link_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
+ #Ensure the link is up on both sides before continuing
+ write_file "Y" "$LOCAL_PEER_TOOL/link_event"
+ write_file "Y" "$REMOTE_PEER_TOOL/link_event"
+
+ doorbell_test "$LOCAL_TOOL" "$REMOTE_TOOL"
+ doorbell_test "$REMOTE_TOOL" "$LOCAL_TOOL"
+
+ scratchpad_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+ scratchpad_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
+ message_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+ message_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
+ mw_test "$LOCAL_PEER_TOOL" "$REMOTE_PEER_TOOL"
+ mw_test "$REMOTE_PEER_TOOL" "$LOCAL_PEER_TOOL"
+
+ _modprobe -r ntb_tool
+}
+
+function ntb_pingpong_tests()
+{
+ LOCAL_PP="$DEBUGFS/ntb_pingpong/$LOCAL_DEV"
+ REMOTE_PP="$REMOTE_HOST:$DEBUGFS/ntb_pingpong/$REMOTE_DEV"
+
+ echo "Starting ntb_pingpong tests..."
+
+ _modprobe ntb_pingpong
+
+ pingpong_test $LOCAL_PP $REMOTE_PP
+
+ _modprobe -r ntb_pingpong
+}
+
+function ntb_perf_tests()
+{
+ LOCAL_PERF="$DEBUGFS/ntb_perf/$LOCAL_DEV"
+ REMOTE_PERF="$REMOTE_HOST:$DEBUGFS/ntb_perf/$REMOTE_DEV"
+
+ echo "Starting ntb_perf tests..."
+
+ perf_test 0
+
+ if [[ $RUN_DMA_TESTS ]]; then
+ perf_test 1
+ fi
+}
+
+function cleanup()
+{
+ set +e
+ _modprobe -r ntb_tool 2> /dev/null
+ _modprobe -r ntb_perf 2> /dev/null
+ _modprobe -r ntb_pingpong 2> /dev/null
+ _modprobe -r ntb_transport 2> /dev/null
+ set -e
+}
+
+cleanup
+
+if ! [[ $$DONT_CLEANUP ]]; then
+ trap cleanup EXIT
+fi
+
+if [ "$(id -u)" != "0" ]; then
+ echo "This script must be run as root" 1>&2
+ exit 1
+fi
+
+if [[ "$LIST_DEVS" == TRUE ]]; then
+ echo "Local Devices:"
+ ls -1 /sys/bus/ntb/devices
+ echo
+
+ if [[ "$REMOTE_HOST" != "" ]]; then
+ echo "Remote Devices:"
+ ssh $REMOTE_HOST ls -1 /sys/bus/ntb/devices
+ fi
+
+ exit 0
+fi
+
+if [[ "$LOCAL_DEV" == $"" ]] || [[ "$REMOTE_DEV" == $"" ]]; then
+ show_help
+ exit 1
+fi
+
+ntb_tool_tests
+echo
+ntb_pingpong_tests
+echo
+ntb_perf_tests
+echo
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
new file mode 100644
index 000000000..201b59855
--- /dev/null
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -0,0 +1,74 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for powerpc selftests
+
+# ARCH can be overridden by the user for cross compiling
+ARCH ?= $(shell uname -m)
+ARCH := $(shell echo $(ARCH) | sed -e s/ppc.*/powerpc/)
+
+ifeq ($(ARCH),powerpc)
+
+GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
+
+CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR)/include $(CFLAGS)
+
+export CFLAGS
+
+SUB_DIRS = alignment \
+ benchmarks \
+ cache_shape \
+ copyloops \
+ dscr \
+ mm \
+ pmu \
+ signal \
+ primitives \
+ stringloops \
+ switch_endian \
+ syscalls \
+ tm \
+ vphn \
+ math \
+ ptrace
+
+endif
+
+all: $(SUB_DIRS)
+
+$(SUB_DIRS):
+ BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all
+
+include ../lib.mk
+
+override define RUN_TESTS
+ @for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
+ done;
+endef
+
+override define INSTALL_RULE
+ @for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\
+ done;
+endef
+
+override define EMIT_TESTS
+ @for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\
+ done;
+endef
+
+override define CLEAN
+ @for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \
+ done;
+ rm -f tags
+endef
+
+tags:
+ find . -name '*.c' -o -name '*.h' | xargs ctags
+
+.PHONY: tags $(SUB_DIRS)
diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore
new file mode 100644
index 000000000..6d4fd0145
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/.gitignore
@@ -0,0 +1,2 @@
+copy_first_unaligned
+alignment_handler
diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile
new file mode 100644
index 000000000..d056486f4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/Makefile
@@ -0,0 +1,6 @@
+TEST_GEN_PROGS := copy_first_unaligned alignment_handler
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
new file mode 100644
index 000000000..4f8335e0c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -0,0 +1,575 @@
+/*
+ * Test the powerpc alignment handler on POWER8/POWER9
+ *
+ * Copyright (C) 2017 IBM Corporation (Michael Neuling, Andrew Donnellan)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/*
+ * This selftest exercises the powerpc alignment fault handler.
+ *
+ * We create two sets of source and destination buffers, one in regular memory,
+ * the other cache-inhibited (we use /dev/fb0 for this).
+ *
+ * We initialise the source buffers, then use whichever set of load/store
+ * instructions is under test to copy bytes from the source buffers to the
+ * destination buffers. For the regular buffers, these instructions will
+ * execute normally. For the cache-inhibited buffers, these instructions
+ * will trap and cause an alignment fault, and the alignment fault handler
+ * will emulate the particular instruction under test. We then compare the
+ * destination buffers to ensure that the native and emulated cases give the
+ * same result.
+ *
+ * TODO:
+ * - Any FIXMEs below
+ * - Test VSX regs < 32 and > 32
+ * - Test all loads and stores
+ * - Check update forms do update register
+ * - Test alignment faults over page boundary
+ *
+ * Some old binutils may not support all the instructions.
+ */
+
+
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <getopt.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include <asm/cputable.h>
+
+#include "utils.h"
+
+int bufsize;
+int debug;
+int testing;
+volatile int gotsig;
+
+void sighandler(int sig, siginfo_t *info, void *ctx)
+{
+ ucontext_t *ucp = ctx;
+
+ if (!testing) {
+ signal(sig, SIG_DFL);
+ kill(0, sig);
+ }
+ gotsig = sig;
+#ifdef __powerpc64__
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+#else
+ ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4;
+#endif
+}
+
+#define XFORM(reg, n) " " #reg " ,%"#n",%2 ;"
+#define DFORM(reg, n) " " #reg " ,0(%"#n") ;"
+
+#define TEST(name, ld_op, st_op, form, ld_reg, st_reg) \
+ void test_##name(char *s, char *d) \
+ { \
+ asm volatile( \
+ #ld_op form(ld_reg, 0) \
+ #st_op form(st_reg, 1) \
+ :: "r"(s), "r"(d), "r"(0) \
+ : "memory", "vs0", "vs32", "r31"); \
+ } \
+ rc |= do_test(#name, test_##name)
+
+#define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32)
+#define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32)
+#define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32)
+#define STORE_VSX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 32)
+#define LOAD_VMX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 0, 32)
+#define STORE_VMX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 0)
+#define LOAD_VMX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 0, 32)
+#define STORE_VMX_DFORM_TEST(op) TEST(op, lxv, op, DFORM, 32, 0)
+
+#define LOAD_XFORM_TEST(op) TEST(op, op, stdx, XFORM, 31, 31)
+#define STORE_XFORM_TEST(op) TEST(op, ldx, op, XFORM, 31, 31)
+#define LOAD_DFORM_TEST(op) TEST(op, op, std, DFORM, 31, 31)
+#define STORE_DFORM_TEST(op) TEST(op, ld, op, DFORM, 31, 31)
+
+#define LOAD_FLOAT_DFORM_TEST(op) TEST(op, op, stfd, DFORM, 0, 0)
+#define STORE_FLOAT_DFORM_TEST(op) TEST(op, lfd, op, DFORM, 0, 0)
+#define LOAD_FLOAT_XFORM_TEST(op) TEST(op, op, stfdx, XFORM, 0, 0)
+#define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0)
+
+
+/* FIXME: Unimplemented tests: */
+// STORE_DFORM_TEST(stq) /* FIXME: need two registers for quad */
+// STORE_DFORM_TEST(stswi) /* FIXME: string instruction */
+
+// STORE_XFORM_TEST(stwat) /* AMO can't emulate or run on CI */
+// STORE_XFORM_TEST(stdat) /* ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ */
+
+
+/* preload byte by byte */
+void preload_data(void *dst, int offset, int width)
+{
+ char *c = dst;
+ int i;
+
+ c += offset;
+
+ for (i = 0 ; i < width ; i++)
+ c[i] = i;
+}
+
+int test_memcpy(void *dst, void *src, int size, int offset,
+ void (*test_func)(char *, char *))
+{
+ char *s, *d;
+
+ s = src;
+ s += offset;
+ d = dst;
+ d += offset;
+
+ assert(size == 16);
+ gotsig = 0;
+ testing = 1;
+
+ test_func(s, d); /* run the actual test */
+
+ testing = 0;
+ if (gotsig) {
+ if (debug)
+ printf(" Got signal %i\n", gotsig);
+ return 1;
+ }
+ return 0;
+}
+
+void dumpdata(char *s1, char *s2, int n, char *test_name)
+{
+ int i;
+
+ printf(" %s: unexpected result:\n", test_name);
+ printf(" mem:");
+ for (i = 0; i < n; i++)
+ printf(" %02x", s1[i]);
+ printf("\n");
+ printf(" ci: ");
+ for (i = 0; i < n; i++)
+ printf(" %02x", s2[i]);
+ printf("\n");
+}
+
+int test_memcmp(void *s1, void *s2, int n, int offset, char *test_name)
+{
+ char *s1c, *s2c;
+
+ s1c = s1;
+ s1c += offset;
+ s2c = s2;
+ s2c += offset;
+
+ if (memcmp(s1c, s2c, n)) {
+ if (debug) {
+ printf("\n Compare failed. Offset:%i length:%i\n",
+ offset, n);
+ dumpdata(s1c, s2c, n, test_name);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * Do two memcpy tests using the same instructions. One cachable
+ * memory and the other doesn't.
+ */
+int do_test(char *test_name, void (*test_func)(char *, char *))
+{
+ int offset, width, fd, rc, r;
+ void *mem0, *mem1, *ci0, *ci1;
+
+ printf("\tDoing %s:\t", test_name);
+
+ fd = open("/dev/fb0", O_RDWR);
+ if (fd < 0) {
+ printf("\n");
+ perror("Can't open /dev/fb0 now?");
+ return 1;
+ }
+
+ ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
+ fd, 0x0);
+ ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
+ fd, bufsize);
+ if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
+ printf("\n");
+ perror("mmap failed");
+ SKIP_IF(1);
+ }
+
+ rc = posix_memalign(&mem0, bufsize, bufsize);
+ if (rc) {
+ printf("\n");
+ return rc;
+ }
+
+ rc = posix_memalign(&mem1, bufsize, bufsize);
+ if (rc) {
+ printf("\n");
+ free(mem0);
+ return rc;
+ }
+
+ rc = 0;
+ /* offset = 0 no alignment fault, so skip */
+ for (offset = 1; offset < 16; offset++) {
+ width = 16; /* vsx == 16 bytes */
+ r = 0;
+
+ /* load pattern into memory byte by byte */
+ preload_data(ci0, offset, width);
+ preload_data(mem0, offset, width); // FIXME: remove??
+ memcpy(ci0, mem0, bufsize);
+ memcpy(ci1, mem1, bufsize); /* initialise output to the same */
+
+ /* sanity check */
+ test_memcmp(mem0, ci0, width, offset, test_name);
+
+ r |= test_memcpy(ci1, ci0, width, offset, test_func);
+ r |= test_memcpy(mem1, mem0, width, offset, test_func);
+ if (r && !debug) {
+ printf("FAILED: Got signal");
+ rc = 1;
+ break;
+ }
+
+ r |= test_memcmp(mem1, ci1, width, offset, test_name);
+ if (r && !debug) {
+ printf("FAILED: Wrong Data");
+ rc = 1;
+ break;
+ }
+ }
+
+ if (rc == 0)
+ printf("PASSED");
+
+ printf("\n");
+
+ munmap(ci0, bufsize);
+ munmap(ci1, bufsize);
+ free(mem0);
+ free(mem1);
+ close(fd);
+
+ return rc;
+}
+
+static bool can_open_fb0(void)
+{
+ int fd;
+
+ fd = open("/dev/fb0", O_RDWR);
+ if (fd < 0)
+ return false;
+
+ close(fd);
+ return true;
+}
+
+int test_alignment_handler_vsx_206(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+ printf("VSX: 2.06B\n");
+ LOAD_VSX_XFORM_TEST(lxvd2x);
+ LOAD_VSX_XFORM_TEST(lxvw4x);
+ LOAD_VSX_XFORM_TEST(lxsdx);
+ LOAD_VSX_XFORM_TEST(lxvdsx);
+ STORE_VSX_XFORM_TEST(stxvd2x);
+ STORE_VSX_XFORM_TEST(stxvw4x);
+ STORE_VSX_XFORM_TEST(stxsdx);
+ return rc;
+}
+
+int test_alignment_handler_vsx_207(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
+ printf("VSX: 2.07B\n");
+ LOAD_VSX_XFORM_TEST(lxsspx);
+ LOAD_VSX_XFORM_TEST(lxsiwax);
+ LOAD_VSX_XFORM_TEST(lxsiwzx);
+ STORE_VSX_XFORM_TEST(stxsspx);
+ STORE_VSX_XFORM_TEST(stxsiwx);
+ return rc;
+}
+
+int test_alignment_handler_vsx_300(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
+ printf("VSX: 3.00B\n");
+ LOAD_VMX_DFORM_TEST(lxsd);
+ LOAD_VSX_XFORM_TEST(lxsibzx);
+ LOAD_VSX_XFORM_TEST(lxsihzx);
+ LOAD_VMX_DFORM_TEST(lxssp);
+ LOAD_VSX_DFORM_TEST(lxv);
+ LOAD_VSX_XFORM_TEST(lxvb16x);
+ LOAD_VSX_XFORM_TEST(lxvh8x);
+ LOAD_VSX_XFORM_TEST(lxvx);
+ LOAD_VSX_XFORM_TEST(lxvwsx);
+ LOAD_VSX_XFORM_TEST(lxvl);
+ LOAD_VSX_XFORM_TEST(lxvll);
+ STORE_VMX_DFORM_TEST(stxsd);
+ STORE_VSX_XFORM_TEST(stxsibx);
+ STORE_VSX_XFORM_TEST(stxsihx);
+ STORE_VMX_DFORM_TEST(stxssp);
+ STORE_VSX_DFORM_TEST(stxv);
+ STORE_VSX_XFORM_TEST(stxvb16x);
+ STORE_VSX_XFORM_TEST(stxvh8x);
+ STORE_VSX_XFORM_TEST(stxvx);
+ STORE_VSX_XFORM_TEST(stxvl);
+ STORE_VSX_XFORM_TEST(stxvll);
+ return rc;
+}
+
+int test_alignment_handler_integer(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+
+ printf("Integer\n");
+ LOAD_DFORM_TEST(lbz);
+ LOAD_DFORM_TEST(lbzu);
+ LOAD_XFORM_TEST(lbzx);
+ LOAD_XFORM_TEST(lbzux);
+ LOAD_DFORM_TEST(lhz);
+ LOAD_DFORM_TEST(lhzu);
+ LOAD_XFORM_TEST(lhzx);
+ LOAD_XFORM_TEST(lhzux);
+ LOAD_DFORM_TEST(lha);
+ LOAD_DFORM_TEST(lhau);
+ LOAD_XFORM_TEST(lhax);
+ LOAD_XFORM_TEST(lhaux);
+ LOAD_XFORM_TEST(lhbrx);
+ LOAD_DFORM_TEST(lwz);
+ LOAD_DFORM_TEST(lwzu);
+ LOAD_XFORM_TEST(lwzx);
+ LOAD_XFORM_TEST(lwzux);
+ LOAD_DFORM_TEST(lwa);
+ LOAD_XFORM_TEST(lwax);
+ LOAD_XFORM_TEST(lwaux);
+ LOAD_XFORM_TEST(lwbrx);
+ LOAD_DFORM_TEST(ld);
+ LOAD_DFORM_TEST(ldu);
+ LOAD_XFORM_TEST(ldx);
+ LOAD_XFORM_TEST(ldux);
+ STORE_DFORM_TEST(stb);
+ STORE_XFORM_TEST(stbx);
+ STORE_DFORM_TEST(stbu);
+ STORE_XFORM_TEST(stbux);
+ STORE_DFORM_TEST(sth);
+ STORE_XFORM_TEST(sthx);
+ STORE_DFORM_TEST(sthu);
+ STORE_XFORM_TEST(sthux);
+ STORE_XFORM_TEST(sthbrx);
+ STORE_DFORM_TEST(stw);
+ STORE_XFORM_TEST(stwx);
+ STORE_DFORM_TEST(stwu);
+ STORE_XFORM_TEST(stwux);
+ STORE_XFORM_TEST(stwbrx);
+ STORE_DFORM_TEST(std);
+ STORE_XFORM_TEST(stdx);
+ STORE_DFORM_TEST(stdu);
+ STORE_XFORM_TEST(stdux);
+
+#ifdef __BIG_ENDIAN__
+ LOAD_DFORM_TEST(lmw);
+ STORE_DFORM_TEST(stmw);
+#endif
+
+ return rc;
+}
+
+int test_alignment_handler_integer_206(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+ printf("Integer: 2.06\n");
+
+ LOAD_XFORM_TEST(ldbrx);
+ STORE_XFORM_TEST(stdbrx);
+
+ return rc;
+}
+
+int test_alignment_handler_vmx(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+ SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC));
+
+ printf("VMX\n");
+ LOAD_VMX_XFORM_TEST(lvx);
+
+ /*
+ * FIXME: These loads only load part of the register, so our
+ * testing method doesn't work. Also they don't take alignment
+ * faults, so it's kinda pointless anyway
+ *
+ LOAD_VMX_XFORM_TEST(lvebx)
+ LOAD_VMX_XFORM_TEST(lvehx)
+ LOAD_VMX_XFORM_TEST(lvewx)
+ LOAD_VMX_XFORM_TEST(lvxl)
+ */
+ STORE_VMX_XFORM_TEST(stvx);
+ STORE_VMX_XFORM_TEST(stvebx);
+ STORE_VMX_XFORM_TEST(stvehx);
+ STORE_VMX_XFORM_TEST(stvewx);
+ STORE_VMX_XFORM_TEST(stvxl);
+ return rc;
+}
+
+int test_alignment_handler_fp(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+
+ printf("Floating point\n");
+ LOAD_FLOAT_DFORM_TEST(lfd);
+ LOAD_FLOAT_XFORM_TEST(lfdx);
+ LOAD_FLOAT_DFORM_TEST(lfdu);
+ LOAD_FLOAT_XFORM_TEST(lfdux);
+ LOAD_FLOAT_DFORM_TEST(lfs);
+ LOAD_FLOAT_XFORM_TEST(lfsx);
+ LOAD_FLOAT_DFORM_TEST(lfsu);
+ LOAD_FLOAT_XFORM_TEST(lfsux);
+ STORE_FLOAT_DFORM_TEST(stfd);
+ STORE_FLOAT_XFORM_TEST(stfdx);
+ STORE_FLOAT_DFORM_TEST(stfdu);
+ STORE_FLOAT_XFORM_TEST(stfdux);
+ STORE_FLOAT_DFORM_TEST(stfs);
+ STORE_FLOAT_XFORM_TEST(stfsx);
+ STORE_FLOAT_DFORM_TEST(stfsu);
+ STORE_FLOAT_XFORM_TEST(stfsux);
+ STORE_FLOAT_XFORM_TEST(stfiwx);
+
+ return rc;
+}
+
+int test_alignment_handler_fp_205(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05));
+
+ printf("Floating point: 2.05\n");
+
+ LOAD_FLOAT_DFORM_TEST(lfdp);
+ LOAD_FLOAT_XFORM_TEST(lfdpx);
+ LOAD_FLOAT_XFORM_TEST(lfiwax);
+ STORE_FLOAT_DFORM_TEST(stfdp);
+ STORE_FLOAT_XFORM_TEST(stfdpx);
+
+ return rc;
+}
+
+int test_alignment_handler_fp_206(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_fb0());
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+ printf("Floating point: 2.06\n");
+
+ LOAD_FLOAT_XFORM_TEST(lfiwzx);
+
+ return rc;
+}
+
+void usage(char *prog)
+{
+ printf("Usage: %s [options]\n", prog);
+ printf(" -d Enable debug error output\n");
+ printf("\n");
+ printf("This test requires a POWER8 or POWER9 CPU and a usable ");
+ printf("framebuffer at /dev/fb0.\n");
+}
+
+int main(int argc, char *argv[])
+{
+
+ struct sigaction sa;
+ int rc = 0;
+ int option = 0;
+
+ while ((option = getopt(argc, argv, "d")) != -1) {
+ switch (option) {
+ case 'd':
+ debug++;
+ break;
+ default:
+ usage(argv[0]);
+ exit(1);
+ }
+ }
+
+ bufsize = getpagesize();
+
+ sa.sa_sigaction = sighandler;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL) == -1
+ || sigaction(SIGBUS, &sa, NULL) == -1
+ || sigaction(SIGILL, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(1);
+ }
+
+ rc |= test_harness(test_alignment_handler_vsx_206,
+ "test_alignment_handler_vsx_206");
+ rc |= test_harness(test_alignment_handler_vsx_207,
+ "test_alignment_handler_vsx_207");
+ rc |= test_harness(test_alignment_handler_vsx_300,
+ "test_alignment_handler_vsx_300");
+ rc |= test_harness(test_alignment_handler_integer,
+ "test_alignment_handler_integer");
+ rc |= test_harness(test_alignment_handler_integer_206,
+ "test_alignment_handler_integer_206");
+ rc |= test_harness(test_alignment_handler_vmx,
+ "test_alignment_handler_vmx");
+ rc |= test_harness(test_alignment_handler_fp,
+ "test_alignment_handler_fp");
+ rc |= test_harness(test_alignment_handler_fp_205,
+ "test_alignment_handler_fp_205");
+ rc |= test_harness(test_alignment_handler_fp_206,
+ "test_alignment_handler_fp_206");
+ return rc;
+}
diff --git a/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c b/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c
new file mode 100644
index 000000000..5a9589987
--- /dev/null
+++ b/tools/testing/selftests/powerpc/alignment/copy_first_unaligned.c
@@ -0,0 +1,72 @@
+/*
+ * Copyright 2016, Chris Smart, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Calls to copy_first which are not 128-byte aligned should be
+ * caught and sent a SIGBUS.
+ *
+ */
+
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+#include "utils.h"
+#include "instructions.h"
+
+unsigned int expected_instruction = PPC_INST_COPY_FIRST;
+unsigned int instruction_mask = 0xfc2007fe;
+
+void signal_action_handler(int signal_num, siginfo_t *info, void *ptr)
+{
+ ucontext_t *ctx = ptr;
+#ifdef __powerpc64__
+ unsigned int *pc = (unsigned int *)ctx->uc_mcontext.gp_regs[PT_NIP];
+#else
+ unsigned int *pc = (unsigned int *)ctx->uc_mcontext.uc_regs->gregs[PT_NIP];
+#endif
+
+ /*
+ * Check that the signal was on the correct instruction, using a
+ * mask because the compiler assigns the register at RB.
+ */
+ if ((*pc & instruction_mask) == expected_instruction)
+ _exit(0); /* We hit the right instruction */
+
+ _exit(1);
+}
+
+void setup_signal_handler(void)
+{
+ struct sigaction signal_action;
+
+ memset(&signal_action, 0, sizeof(signal_action));
+ signal_action.sa_sigaction = signal_action_handler;
+ signal_action.sa_flags = SA_SIGINFO;
+ sigaction(SIGBUS, &signal_action, NULL);
+}
+
+char cacheline_buf[128] __cacheline_aligned;
+
+int test_copy_first_unaligned(void)
+{
+ /* Only run this test on a P9 or later */
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
+
+ /* Register our signal handler with SIGBUS */
+ setup_signal_handler();
+
+ /* +1 makes buf unaligned */
+ copy_first(cacheline_buf+1);
+
+ /* We should not get here */
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_copy_first_unaligned, "test_copy_first_unaligned");
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore
new file mode 100644
index 000000000..9161679b1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore
@@ -0,0 +1,7 @@
+gettimeofday
+context_switch
+fork
+exec_target
+mmap_bench
+futex_bench
+null_syscall
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
new file mode 100644
index 000000000..d40300a65
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall
+TEST_GEN_FILES := exec_target
+
+CFLAGS += -O2
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
+
+$(OUTPUT)/context_switch: ../utils.c
+$(OUTPUT)/context_switch: CFLAGS += -maltivec -mvsx -mabi=altivec
+$(OUTPUT)/context_switch: LDLIBS += -lpthread
+
+$(OUTPUT)/fork: LDLIBS += -lpthread
+
+$(OUTPUT)/exec_target: CFLAGS += -static -nostartfiles
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
new file mode 100644
index 000000000..9ec767469
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -0,0 +1,506 @@
+/*
+ * Context switch microbenchmark.
+ *
+ * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <signal.h>
+#include <assert.h>
+#include <pthread.h>
+#include <limits.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <sys/sysinfo.h>
+#include <sys/types.h>
+#include <sys/shm.h>
+#include <linux/futex.h>
+#ifdef __powerpc__
+#include <altivec.h>
+#endif
+#include "utils.h"
+
+static unsigned int timeout = 30;
+
+static int touch_vdso;
+struct timeval tv;
+
+static int touch_fp = 1;
+double fp;
+
+static int touch_vector = 1;
+vector int a, b, c;
+
+#ifdef __powerpc__
+static int touch_altivec = 1;
+
+/*
+ * Note: LTO (Link Time Optimisation) doesn't play well with this function
+ * attribute. Be very careful enabling LTO for this test.
+ */
+static void __attribute__((__target__("no-vsx"))) altivec_touch_fn(void)
+{
+ c = a + b;
+}
+#endif
+
+static void touch(void)
+{
+ if (touch_vdso)
+ gettimeofday(&tv, NULL);
+
+ if (touch_fp)
+ fp += 0.1;
+
+#ifdef __powerpc__
+ if (touch_altivec)
+ altivec_touch_fn();
+#endif
+
+ if (touch_vector)
+ c = a + b;
+
+ asm volatile("# %0 %1 %2": : "r"(&tv), "r"(&fp), "r"(&c));
+}
+
+static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu)
+{
+ int rc;
+ pthread_t tid;
+ cpu_set_t cpuset;
+ pthread_attr_t attr;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ rc = pthread_attr_init(&attr);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_init");
+ exit(1);
+ }
+
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_setaffinity_np");
+ exit(1);
+ }
+
+ rc = pthread_create(&tid, &attr, fn, arg);
+ if (rc) {
+ errno = rc;
+ perror("pthread_create");
+ exit(1);
+ }
+}
+
+static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
+{
+ int pid, ncpus;
+ cpu_set_t *cpuset;
+ size_t size;
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+
+ if (pid)
+ return;
+
+ ncpus = get_nprocs();
+ size = CPU_ALLOC_SIZE(ncpus);
+ cpuset = CPU_ALLOC(ncpus);
+ if (!cpuset) {
+ perror("malloc");
+ exit(1);
+ }
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(cpu, size, cpuset);
+
+ if (sched_setaffinity(0, size, cpuset)) {
+ perror("sched_setaffinity");
+ CPU_FREE(cpuset);
+ exit(1);
+ }
+
+ CPU_FREE(cpuset);
+ fn(arg);
+
+ exit(0);
+}
+
+static unsigned long iterations;
+static unsigned long iterations_prev;
+
+static void sigalrm_handler(int junk)
+{
+ unsigned long i = iterations;
+
+ printf("%ld\n", i - iterations_prev);
+ iterations_prev = i;
+
+ if (--timeout == 0)
+ kill(0, SIGUSR1);
+
+ alarm(1);
+}
+
+static void sigusr1_handler(int junk)
+{
+ exit(0);
+}
+
+struct actions {
+ void (*setup)(int, int);
+ void *(*thread1)(void *);
+ void *(*thread2)(void *);
+};
+
+#define READ 0
+#define WRITE 1
+
+static int pipe_fd1[2];
+static int pipe_fd2[2];
+
+static void pipe_setup(int cpu1, int cpu2)
+{
+ if (pipe(pipe_fd1) || pipe(pipe_fd2))
+ exit(1);
+}
+
+static void *pipe_thread1(void *arg)
+{
+ signal(SIGALRM, sigalrm_handler);
+ alarm(1);
+
+ while (1) {
+ assert(read(pipe_fd1[READ], &c, 1) == 1);
+ touch();
+
+ assert(write(pipe_fd2[WRITE], &c, 1) == 1);
+ touch();
+
+ iterations += 2;
+ }
+
+ return NULL;
+}
+
+static void *pipe_thread2(void *arg)
+{
+ while (1) {
+ assert(write(pipe_fd1[WRITE], &c, 1) == 1);
+ touch();
+
+ assert(read(pipe_fd2[READ], &c, 1) == 1);
+ touch();
+ }
+
+ return NULL;
+}
+
+static struct actions pipe_actions = {
+ .setup = pipe_setup,
+ .thread1 = pipe_thread1,
+ .thread2 = pipe_thread2,
+};
+
+static void yield_setup(int cpu1, int cpu2)
+{
+ if (cpu1 != cpu2) {
+ fprintf(stderr, "Both threads must be on the same CPU for yield test\n");
+ exit(1);
+ }
+}
+
+static void *yield_thread1(void *arg)
+{
+ signal(SIGALRM, sigalrm_handler);
+ alarm(1);
+
+ while (1) {
+ sched_yield();
+ touch();
+
+ iterations += 2;
+ }
+
+ return NULL;
+}
+
+static void *yield_thread2(void *arg)
+{
+ while (1) {
+ sched_yield();
+ touch();
+ }
+
+ return NULL;
+}
+
+static struct actions yield_actions = {
+ .setup = yield_setup,
+ .thread1 = yield_thread1,
+ .thread2 = yield_thread2,
+};
+
+static long sys_futex(void *addr1, int op, int val1, struct timespec *timeout,
+ void *addr2, int val3)
+{
+ return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3);
+}
+
+static unsigned long cmpxchg(unsigned long *p, unsigned long expected,
+ unsigned long desired)
+{
+ unsigned long exp = expected;
+
+ __atomic_compare_exchange_n(p, &exp, desired, 0,
+ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
+ return exp;
+}
+
+static unsigned long xchg(unsigned long *p, unsigned long val)
+{
+ return __atomic_exchange_n(p, val, __ATOMIC_SEQ_CST);
+}
+
+static int processes;
+
+static int mutex_lock(unsigned long *m)
+{
+ int c;
+ int flags = FUTEX_WAIT;
+ if (!processes)
+ flags |= FUTEX_PRIVATE_FLAG;
+
+ c = cmpxchg(m, 0, 1);
+ if (!c)
+ return 0;
+
+ if (c == 1)
+ c = xchg(m, 2);
+
+ while (c) {
+ sys_futex(m, flags, 2, NULL, NULL, 0);
+ c = xchg(m, 2);
+ }
+
+ return 0;
+}
+
+static int mutex_unlock(unsigned long *m)
+{
+ int flags = FUTEX_WAKE;
+ if (!processes)
+ flags |= FUTEX_PRIVATE_FLAG;
+
+ if (*m == 2)
+ *m = 0;
+ else if (xchg(m, 0) == 1)
+ return 0;
+
+ sys_futex(m, flags, 1, NULL, NULL, 0);
+
+ return 0;
+}
+
+static unsigned long *m1, *m2;
+
+static void futex_setup(int cpu1, int cpu2)
+{
+ if (!processes) {
+ static unsigned long _m1, _m2;
+ m1 = &_m1;
+ m2 = &_m2;
+ } else {
+ int shmid;
+ void *shmaddr;
+
+ shmid = shmget(IPC_PRIVATE, getpagesize(), SHM_R | SHM_W);
+ if (shmid < 0) {
+ perror("shmget");
+ exit(1);
+ }
+
+ shmaddr = shmat(shmid, NULL, 0);
+ if (shmaddr == (char *)-1) {
+ perror("shmat");
+ shmctl(shmid, IPC_RMID, NULL);
+ exit(1);
+ }
+
+ shmctl(shmid, IPC_RMID, NULL);
+
+ m1 = shmaddr;
+ m2 = shmaddr + sizeof(*m1);
+ }
+
+ *m1 = 0;
+ *m2 = 0;
+
+ mutex_lock(m1);
+ mutex_lock(m2);
+}
+
+static void *futex_thread1(void *arg)
+{
+ signal(SIGALRM, sigalrm_handler);
+ alarm(1);
+
+ while (1) {
+ mutex_lock(m2);
+ mutex_unlock(m1);
+
+ iterations += 2;
+ }
+
+ return NULL;
+}
+
+static void *futex_thread2(void *arg)
+{
+ while (1) {
+ mutex_unlock(m2);
+ mutex_lock(m1);
+ }
+
+ return NULL;
+}
+
+static struct actions futex_actions = {
+ .setup = futex_setup,
+ .thread1 = futex_thread1,
+ .thread2 = futex_thread2,
+};
+
+static struct option options[] = {
+ { "test", required_argument, 0, 't' },
+ { "process", no_argument, &processes, 1 },
+ { "timeout", required_argument, 0, 's' },
+ { "vdso", no_argument, &touch_vdso, 1 },
+ { "no-fp", no_argument, &touch_fp, 0 },
+#ifdef __powerpc__
+ { "no-altivec", no_argument, &touch_altivec, 0 },
+#endif
+ { "no-vector", no_argument, &touch_vector, 0 },
+ { 0, },
+};
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: context_switch2 <options> CPU1 CPU2\n\n");
+ fprintf(stderr, "\t\t--test=X\tpipe, futex or yield (default)\n");
+ fprintf(stderr, "\t\t--process\tUse processes (default threads)\n");
+ fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n");
+ fprintf(stderr, "\t\t--vdso\t\ttouch VDSO\n");
+ fprintf(stderr, "\t\t--no-fp\t\tDon't touch FP\n");
+#ifdef __powerpc__
+ fprintf(stderr, "\t\t--no-altivec\tDon't touch altivec\n");
+#endif
+ fprintf(stderr, "\t\t--no-vector\tDon't touch vector\n");
+}
+
+int main(int argc, char *argv[])
+{
+ signed char c;
+ struct actions *actions = &yield_actions;
+ int cpu1;
+ int cpu2;
+ static void (*start_fn)(void *(*fn)(void *), void *arg, unsigned long cpu);
+
+ while (1) {
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "", options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 0:
+ if (options[option_index].flag != 0)
+ break;
+
+ usage();
+ exit(1);
+ break;
+
+ case 't':
+ if (!strcmp(optarg, "pipe")) {
+ actions = &pipe_actions;
+ } else if (!strcmp(optarg, "yield")) {
+ actions = &yield_actions;
+ } else if (!strcmp(optarg, "futex")) {
+ actions = &futex_actions;
+ } else {
+ usage();
+ exit(1);
+ }
+ break;
+
+ case 's':
+ timeout = atoi(optarg);
+ break;
+
+ default:
+ usage();
+ exit(1);
+ }
+ }
+
+ if (processes)
+ start_fn = start_process_on;
+ else
+ start_fn = start_thread_on;
+
+ if (((argc - optind) != 2)) {
+ cpu1 = cpu2 = pick_online_cpu();
+ } else {
+ cpu1 = atoi(argv[optind++]);
+ cpu2 = atoi(argv[optind++]);
+ }
+
+ printf("Using %s with ", processes ? "processes" : "threads");
+
+ if (actions == &pipe_actions)
+ printf("pipe");
+ else if (actions == &yield_actions)
+ printf("yield");
+ else
+ printf("futex");
+
+ printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n",
+ cpu1, cpu2, touch_fp ? "yes" : "no", touch_altivec ? "yes" : "no",
+ touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no");
+
+ /* Create a new process group so we can signal everyone for exit */
+ setpgid(getpid(), getpid());
+
+ signal(SIGUSR1, sigusr1_handler);
+
+ actions->setup(cpu1, cpu2);
+
+ start_fn(actions->thread1, NULL, cpu1);
+ start_fn(actions->thread2, NULL, cpu2);
+
+ while (1)
+ sleep(3600);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/exec_target.c b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
new file mode 100644
index 000000000..c14b0fc1e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/exec_target.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Part of fork context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
+void _start(void)
+{
+ syscall(SYS_exit, 0);
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/fork.c b/tools/testing/selftests/powerpc/benchmarks/fork.c
new file mode 100644
index 000000000..d312e638c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/fork.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Context switch microbenchmark.
+ *
+ * Copyright 2018, Anton Blanchard, IBM Corp.
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static unsigned int timeout = 30;
+
+static void set_cpu(int cpu)
+{
+ cpu_set_t cpuset;
+
+ if (cpu == -1)
+ return;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+ perror("sched_setaffinity");
+ exit(1);
+ }
+}
+
+static void start_process_on(void *(*fn)(void *), void *arg, int cpu)
+{
+ int pid;
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+
+ if (pid)
+ return;
+
+ set_cpu(cpu);
+
+ fn(arg);
+
+ exit(0);
+}
+
+static int cpu;
+static int do_fork = 0;
+static int do_vfork = 0;
+static int do_exec = 0;
+static char *exec_file;
+static int exec_target = 0;
+static unsigned long iterations;
+static unsigned long iterations_prev;
+
+static void run_exec(void)
+{
+ char *const argv[] = { "./exec_target", NULL };
+
+ if (execve("./exec_target", argv, NULL) == -1) {
+ perror("execve");
+ exit(1);
+ }
+}
+
+static void bench_fork(void)
+{
+ while (1) {
+ pid_t pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+ if (pid == 0) {
+ if (do_exec)
+ run_exec();
+ _exit(0);
+ }
+ pid = waitpid(pid, NULL, 0);
+ if (pid == -1) {
+ perror("waitpid");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void bench_vfork(void)
+{
+ while (1) {
+ pid_t pid = vfork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+ if (pid == 0) {
+ if (do_exec)
+ run_exec();
+ _exit(0);
+ }
+ pid = waitpid(pid, NULL, 0);
+ if (pid == -1) {
+ perror("waitpid");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void *null_fn(void *arg)
+{
+ pthread_exit(NULL);
+}
+
+static void bench_thread(void)
+{
+ pthread_t tid;
+ cpu_set_t cpuset;
+ pthread_attr_t attr;
+ int rc;
+
+ rc = pthread_attr_init(&attr);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_init");
+ exit(1);
+ }
+
+ if (cpu != -1) {
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc) {
+ errno = rc;
+ perror("pthread_attr_setaffinity_np");
+ exit(1);
+ }
+ }
+
+ while (1) {
+ rc = pthread_create(&tid, &attr, null_fn, NULL);
+ if (rc) {
+ errno = rc;
+ perror("pthread_create");
+ exit(1);
+ }
+ rc = pthread_join(tid, NULL);
+ if (rc) {
+ errno = rc;
+ perror("pthread_join");
+ exit(1);
+ }
+ iterations++;
+ }
+}
+
+static void sigalrm_handler(int junk)
+{
+ unsigned long i = iterations;
+
+ printf("%ld\n", i - iterations_prev);
+ iterations_prev = i;
+
+ if (--timeout == 0)
+ kill(0, SIGUSR1);
+
+ alarm(1);
+}
+
+static void sigusr1_handler(int junk)
+{
+ exit(0);
+}
+
+static void *bench_proc(void *arg)
+{
+ signal(SIGALRM, sigalrm_handler);
+ alarm(1);
+
+ if (do_fork)
+ bench_fork();
+ else if (do_vfork)
+ bench_vfork();
+ else
+ bench_thread();
+
+ return NULL;
+}
+
+static struct option options[] = {
+ { "fork", no_argument, &do_fork, 1 },
+ { "vfork", no_argument, &do_vfork, 1 },
+ { "exec", no_argument, &do_exec, 1 },
+ { "timeout", required_argument, 0, 's' },
+ { "exec-target", no_argument, &exec_target, 1 },
+ { NULL },
+};
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: fork <options> CPU\n\n");
+ fprintf(stderr, "\t\t--fork\tUse fork() (default threads)\n");
+ fprintf(stderr, "\t\t--vfork\tUse vfork() (default threads)\n");
+ fprintf(stderr, "\t\t--exec\tAlso exec() (default no exec)\n");
+ fprintf(stderr, "\t\t--timeout=X\tDuration in seconds to run (default 30)\n");
+ fprintf(stderr, "\t\t--exec-target\tInternal option for exec workload\n");
+}
+
+int main(int argc, char *argv[])
+{
+ signed char c;
+
+ while (1) {
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "", options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 0:
+ if (options[option_index].flag != 0)
+ break;
+
+ usage();
+ exit(1);
+ break;
+
+ case 's':
+ timeout = atoi(optarg);
+ break;
+
+ default:
+ usage();
+ exit(1);
+ }
+ }
+
+ if (do_fork && do_vfork) {
+ usage();
+ exit(1);
+ }
+ if (do_exec && !do_fork && !do_vfork) {
+ usage();
+ exit(1);
+ }
+
+ if (do_exec) {
+ char *dirname = strdup(argv[0]);
+ int i;
+ i = strlen(dirname) - 1;
+ while (i) {
+ if (dirname[i] == '/') {
+ dirname[i] = '\0';
+ if (chdir(dirname) == -1) {
+ perror("chdir");
+ exit(1);
+ }
+ break;
+ }
+ i--;
+ }
+ }
+
+ if (exec_target) {
+ exit(0);
+ }
+
+ if (((argc - optind) != 1)) {
+ cpu = -1;
+ } else {
+ cpu = atoi(argv[optind++]);
+ }
+
+ if (do_exec)
+ exec_file = argv[0];
+
+ set_cpu(cpu);
+
+ printf("Using ");
+ if (do_fork)
+ printf("fork");
+ else if (do_vfork)
+ printf("vfork");
+ else
+ printf("clone");
+
+ if (do_exec)
+ printf(" + exec");
+
+ printf(" on cpu %d\n", cpu);
+
+ /* Create a new process group so we can signal everyone for exit */
+ setpgid(getpid(), getpid());
+
+ signal(SIGUSR1, sigusr1_handler);
+
+ start_process_on(bench_proc, NULL, cpu);
+
+ while (1)
+ sleep(3600);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/futex_bench.c b/tools/testing/selftests/powerpc/benchmarks/futex_bench.c
new file mode 100644
index 000000000..d58e4dc50
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/futex_bench.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2016, Anton Blanchard, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/futex.h>
+
+#include "utils.h"
+
+#define ITERATIONS 100000000
+
+#define futex(A, B, C, D, E, F) syscall(__NR_futex, A, B, C, D, E, F)
+
+int test_futex(void)
+{
+ struct timespec ts_start, ts_end;
+ unsigned long i = ITERATIONS;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+ while (i--) {
+ unsigned int addr = 0;
+ futex(&addr, FUTEX_WAKE, 1, NULL, NULL, 0);
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_end);
+
+ printf("time = %.6f\n", ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9);
+
+ return 0;
+}
+
+int main(void)
+{
+ test_harness_set_timeout(300);
+ return test_harness(test_futex, "futex_bench");
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c
new file mode 100644
index 000000000..3af3c21e8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright 2015, Anton Blanchard, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sys/time.h>
+#include <stdio.h>
+
+#include "utils.h"
+
+static int test_gettimeofday(void)
+{
+ int i;
+
+ struct timeval tv_start, tv_end;
+
+ gettimeofday(&tv_start, NULL);
+
+ for(i = 0; i < 100000000; i++) {
+ gettimeofday(&tv_end, NULL);
+ }
+
+ printf("time = %.6f\n", tv_end.tv_sec - tv_start.tv_sec + (tv_end.tv_usec - tv_start.tv_usec) * 1e-6);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_gettimeofday, "gettimeofday");
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
new file mode 100644
index 000000000..033de0560
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/mmap_bench.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2016, Anton Blanchard, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <getopt.h>
+
+#include "utils.h"
+
+#define ITERATIONS 5000000
+
+#define MEMSIZE (1UL << 27)
+#define PAGE_SIZE (1UL << 16)
+#define CHUNK_COUNT (MEMSIZE/PAGE_SIZE)
+
+static int pg_fault;
+static int iterations = ITERATIONS;
+
+static struct option options[] = {
+ { "pgfault", no_argument, &pg_fault, 1 },
+ { "iterations", required_argument, 0, 'i' },
+ { 0, },
+};
+
+static void usage(void)
+{
+ printf("mmap_bench <--pgfault> <--iterations count>\n");
+}
+
+int test_mmap(void)
+{
+ struct timespec ts_start, ts_end;
+ unsigned long i = iterations;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+ while (i--) {
+ char *c = mmap(NULL, MEMSIZE, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(c == MAP_FAILED);
+ if (pg_fault) {
+ int count;
+ for (count = 0; count < CHUNK_COUNT; count++)
+ c[count << 16] = 'c';
+ }
+ munmap(c, MEMSIZE);
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_end);
+
+ printf("time = %.6f\n", ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ signed char c;
+ while (1) {
+ int option_index = 0;
+
+ c = getopt_long(argc, argv, "", options, &option_index);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 0:
+ if (options[option_index].flag != 0)
+ break;
+
+ usage();
+ exit(1);
+ break;
+ case 'i':
+ iterations = atoi(optarg);
+ break;
+ default:
+ usage();
+ exit(1);
+ }
+ }
+
+ test_harness_set_timeout(300);
+ return test_harness(test_mmap, "mmap_bench");
+}
diff --git a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
new file mode 100644
index 000000000..ecc14d68e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
@@ -0,0 +1,157 @@
+/*
+ * Test null syscall performance
+ *
+ * Copyright (C) 2009-2015 Anton Blanchard, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#define NR_LOOPS 10000000
+
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <signal.h>
+
+static volatile int soak_done;
+unsigned long long clock_frequency;
+unsigned long long timebase_frequency;
+double timebase_multiplier;
+
+static inline unsigned long long mftb(void)
+{
+ unsigned long low;
+
+ asm volatile("mftb %0" : "=r" (low));
+
+ return low;
+}
+
+static void sigalrm_handler(int unused)
+{
+ soak_done = 1;
+}
+
+/*
+ * Use a timer instead of busy looping on clock_gettime() so we don't
+ * pollute profiles with glibc and VDSO hits.
+ */
+static void cpu_soak_usecs(unsigned long usecs)
+{
+ struct itimerval val;
+
+ memset(&val, 0, sizeof(val));
+ val.it_value.tv_usec = usecs;
+
+ signal(SIGALRM, sigalrm_handler);
+ setitimer(ITIMER_REAL, &val, NULL);
+
+ while (1) {
+ if (soak_done)
+ break;
+ }
+
+ signal(SIGALRM, SIG_DFL);
+}
+
+/*
+ * This only works with recent kernels where cpufreq modifies
+ * /proc/cpuinfo dynamically.
+ */
+static void get_proc_frequency(void)
+{
+ FILE *f;
+ char line[128];
+ char *p, *end;
+ unsigned long v;
+ double d;
+ char *override;
+
+ /* Try to get out of low power/low frequency mode */
+ cpu_soak_usecs(0.25 * 1000000);
+
+ f = fopen("/proc/cpuinfo", "r");
+ if (f == NULL)
+ return;
+
+ timebase_frequency = 0;
+
+ while (fgets(line, sizeof(line), f) != NULL) {
+ if (strncmp(line, "timebase", 8) == 0) {
+ p = strchr(line, ':');
+ if (p != NULL) {
+ v = strtoull(p + 1, &end, 0);
+ if (end != p + 1)
+ timebase_frequency = v;
+ }
+ }
+
+ if (((strncmp(line, "clock", 5) == 0) ||
+ (strncmp(line, "cpu MHz", 7) == 0))) {
+ p = strchr(line, ':');
+ if (p != NULL) {
+ d = strtod(p + 1, &end);
+ if (end != p + 1) {
+ /* Find fastest clock frequency */
+ if ((d * 1000000ULL) > clock_frequency)
+ clock_frequency = d * 1000000ULL;
+ }
+ }
+ }
+ }
+
+ fclose(f);
+
+ override = getenv("FREQUENCY");
+ if (override)
+ clock_frequency = strtoull(override, NULL, 10);
+
+ if (timebase_frequency)
+ timebase_multiplier = (double)clock_frequency
+ / timebase_frequency;
+ else
+ timebase_multiplier = 1;
+}
+
+static void do_null_syscall(unsigned long nr)
+{
+ unsigned long i;
+
+ for (i = 0; i < nr; i++)
+ getppid();
+}
+
+#define TIME(A, STR) \
+
+int main(void)
+{
+ unsigned long tb_start, tb_now;
+ struct timespec tv_start, tv_now;
+ unsigned long long elapsed_ns, elapsed_tb;
+
+ get_proc_frequency();
+
+ clock_gettime(CLOCK_MONOTONIC, &tv_start);
+ tb_start = mftb();
+
+ do_null_syscall(NR_LOOPS);
+
+ clock_gettime(CLOCK_MONOTONIC, &tv_now);
+ tb_now = mftb();
+
+ elapsed_ns = (tv_now.tv_sec - tv_start.tv_sec) * 1000000000ULL +
+ (tv_now.tv_nsec - tv_start.tv_nsec);
+ elapsed_tb = tb_now - tb_start;
+
+ printf("%10.2f ns %10.2f cycles\n", (float)elapsed_ns / NR_LOOPS,
+ (float)elapsed_tb * timebase_multiplier / NR_LOOPS);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/cache_shape/.gitignore b/tools/testing/selftests/powerpc/cache_shape/.gitignore
new file mode 100644
index 000000000..ec1848434
--- /dev/null
+++ b/tools/testing/selftests/powerpc/cache_shape/.gitignore
@@ -0,0 +1 @@
+cache_shape
diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile
new file mode 100644
index 000000000..689f6c8eb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/cache_shape/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := cache_shape
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/cache_shape/cache_shape.c b/tools/testing/selftests/powerpc/cache_shape/cache_shape.c
new file mode 100644
index 000000000..29ec07eba
--- /dev/null
+++ b/tools/testing/selftests/powerpc/cache_shape/cache_shape.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2017, Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <link.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+#ifndef AT_L1I_CACHESIZE
+#define AT_L1I_CACHESIZE 40
+#define AT_L1I_CACHEGEOMETRY 41
+#define AT_L1D_CACHESIZE 42
+#define AT_L1D_CACHEGEOMETRY 43
+#define AT_L2_CACHESIZE 44
+#define AT_L2_CACHEGEOMETRY 45
+#define AT_L3_CACHESIZE 46
+#define AT_L3_CACHEGEOMETRY 47
+#endif
+
+static void print_size(const char *label, uint32_t val)
+{
+ printf("%s cache size: %#10x %10dB %10dK\n", label, val, val, val / 1024);
+}
+
+static void print_geo(const char *label, uint32_t val)
+{
+ uint16_t assoc;
+
+ printf("%s line size: %#10x ", label, val & 0xFFFF);
+
+ assoc = val >> 16;
+ if (assoc)
+ printf("%u-way", assoc);
+ else
+ printf("fully");
+
+ printf(" associative\n");
+}
+
+static int test_cache_shape()
+{
+ static char buffer[4096];
+ ElfW(auxv_t) *p;
+ int found;
+
+ FAIL_IF(read_auxv(buffer, sizeof(buffer)));
+
+ found = 0;
+
+ p = find_auxv_entry(AT_L1I_CACHESIZE, buffer);
+ if (p) {
+ found++;
+ print_size("L1I ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L1I_CACHEGEOMETRY, buffer);
+ if (p) {
+ found++;
+ print_geo("L1I ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L1D_CACHESIZE, buffer);
+ if (p) {
+ found++;
+ print_size("L1D ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L1D_CACHEGEOMETRY, buffer);
+ if (p) {
+ found++;
+ print_geo("L1D ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L2_CACHESIZE, buffer);
+ if (p) {
+ found++;
+ print_size("L2 ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L2_CACHEGEOMETRY, buffer);
+ if (p) {
+ found++;
+ print_geo("L2 ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L3_CACHESIZE, buffer);
+ if (p) {
+ found++;
+ print_size("L3 ", (uint32_t)p->a_un.a_val);
+ }
+
+ p = find_auxv_entry(AT_L3_CACHEGEOMETRY, buffer);
+ if (p) {
+ found++;
+ print_geo("L3 ", (uint32_t)p->a_un.a_val);
+ }
+
+ /* If we found none we're probably on a system where they don't exist */
+ SKIP_IF(found == 0);
+
+ /* But if we found any, we expect to find them all */
+ FAIL_IF(found != 8);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_cache_shape, "cache_shape");
+}
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
new file mode 100644
index 000000000..ce12cd0e2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -0,0 +1,13 @@
+copyuser_64_t0
+copyuser_64_t1
+copyuser_64_t2
+copyuser_power7_t0
+copyuser_power7_t1
+memcpy_64_t0
+memcpy_64_t1
+memcpy_64_t2
+memcpy_power7_t0
+memcpy_power7_t1
+copyuser_64_exc_t0
+copyuser_64_exc_t1
+copyuser_64_exc_t2
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
new file mode 100644
index 000000000..44574f381
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+# The loops are all 64-bit code
+CFLAGS += -m64
+CFLAGS += -I$(CURDIR)
+CFLAGS += -D SELFTEST
+CFLAGS += -maltivec
+CFLAGS += -mcpu=power4
+
+# Use our CFLAGS for the implicit .S rule & set the asm machine type
+ASFLAGS = $(CFLAGS) -Wa,-mpower4
+
+TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
+ copyuser_p7_t0 copyuser_p7_t1 \
+ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
+ memcpy_p7_t0 memcpy_p7_t1 \
+ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
+
+EXTRA_SOURCES := validate.c ../harness.c stubs.S
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES)
+ $(CC) $(CPPFLAGS) $(CFLAGS) \
+ -D COPY_LOOP=test___copy_tofrom_user_base \
+ -D SELFTEST_CASE=$(subst copyuser_64_t,,$(notdir $@)) \
+ -o $@ $^
+
+$(OUTPUT)/copyuser_p7_t%: copyuser_power7.S $(EXTRA_SOURCES)
+ $(CC) $(CPPFLAGS) $(CFLAGS) \
+ -D COPY_LOOP=test___copy_tofrom_user_power7 \
+ -D SELFTEST_CASE=$(subst copyuser_p7_t,,$(notdir $@)) \
+ -o $@ $^
+
+# Strictly speaking, we only need the memcpy_64 test cases for big-endian
+$(OUTPUT)/memcpy_64_t%: memcpy_64.S $(EXTRA_SOURCES)
+ $(CC) $(CPPFLAGS) $(CFLAGS) \
+ -D COPY_LOOP=test_memcpy \
+ -D SELFTEST_CASE=$(subst memcpy_64_t,,$(notdir $@)) \
+ -o $@ $^
+
+$(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES)
+ $(CC) $(CPPFLAGS) $(CFLAGS) \
+ -D COPY_LOOP=test_memcpy_power7 \
+ -D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
+ -o $@ $^
+
+$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
+ copy_tofrom_user_reference.S stubs.S
+ $(CC) $(CPPFLAGS) $(CFLAGS) \
+ -D COPY_LOOP=test___copy_tofrom_user_base \
+ -D SELFTEST_CASE=$(subst copyuser_64_exc_t,,$(notdir $@)) \
+ -o $@ $^
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h b/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/asm-compat.h
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/asm/export.h
new file mode 100644
index 000000000..0bab35f67
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/export.h
@@ -0,0 +1,2 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define EXPORT_SYMBOL(x)
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h b/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/feature-fixups.h
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
new file mode 100644
index 000000000..0605df807
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SELFTESTS_POWERPC_PPC_ASM_H
+#define __SELFTESTS_POWERPC_PPC_ASM_H
+#include <ppc-asm.h>
+
+#define CONFIG_ALTIVEC
+
+#define r1 1
+
+#define R14 r14
+#define R15 r15
+#define R16 r16
+#define R17 r17
+#define R18 r18
+#define R19 r19
+#define R20 r20
+#define R21 r21
+#define R22 r22
+#define R29 r29
+#define R30 r30
+#define R31 r31
+
+#define STACKFRAMESIZE 256
+#define STK_REG(i) (112 + ((i)-14)*8)
+
+#define _GLOBAL(A) FUNC_START(test_ ## A)
+#define _GLOBAL_TOC(A) _GLOBAL(A)
+
+#define PPC_MTOCRF(A, B) mtocrf A, B
+
+#define EX_TABLE(x, y) \
+ .section __ex_table,"a"; \
+ .8byte x, y; \
+ .previous
+
+#define BEGIN_FTR_SECTION .if test_feature
+#define FTR_SECTION_ELSE .else
+#define ALT_FTR_SECTION_END_IFCLR(x) .endif
+#define ALT_FTR_SECTION_END_IFSET(x) .endif
+#define ALT_FTR_SECTION_END(x, y) .endif
+#define END_FTR_SECTION_IFCLR(x) .endif
+#define END_FTR_SECTION_IFSET(x) .endif
+
+/* Default to taking the first of any alternative feature sections */
+test_feature = 1
+
+#endif /* __SELFTESTS_POWERPC_PPC_ASM_H */
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/processor.h b/tools/testing/selftests/powerpc/copyloops/asm/processor.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/asm/processor.h
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S b/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S
new file mode 100644
index 000000000..3363b8640
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_tofrom_user_reference.S
@@ -0,0 +1,24 @@
+#include <asm/ppc_asm.h>
+
+_GLOBAL(copy_tofrom_user_reference)
+ cmpdi r5,0
+ beq 4f
+
+ mtctr r5
+
+1: lbz r6,0(r4)
+2: stb r6,0(r3)
+ addi r3,r3,1
+ addi r4,r4,1
+ bdnz 1b
+
+3: mfctr r3
+ blr
+
+4: mr r3,r5
+ blr
+
+.section __ex_table,"a"
+ .llong 1b,3b
+ .llong 2b,3b
+.text
diff --git a/tools/testing/selftests/powerpc/copyloops/copyuser_64.S b/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
new file mode 120000
index 000000000..f1c418a25
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copyuser_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copyuser_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S b/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
new file mode 120000
index 000000000..478689598
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copyuser_power7.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copyuser_power7.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/exc_validate.c b/tools/testing/selftests/powerpc/copyloops/exc_validate.c
new file mode 100644
index 000000000..c896ea9a7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/exc_validate.c
@@ -0,0 +1,124 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "utils.h"
+
+extern char __start___ex_table[];
+extern char __stop___ex_table[];
+
+#if defined(__powerpc64__)
+#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP]
+#elif defined(__powerpc__)
+#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
+#else
+#error implement UCONTEXT_NIA
+#endif
+
+static void segv_handler(int signr, siginfo_t *info, void *ptr)
+{
+ ucontext_t *uc = (ucontext_t *)ptr;
+ unsigned long addr = (unsigned long)info->si_addr;
+ unsigned long *ip = &UCONTEXT_NIA(uc);
+ unsigned long *ex_p = (unsigned long *)__start___ex_table;
+
+ while (ex_p < (unsigned long *)__stop___ex_table) {
+ unsigned long insn, fixup;
+
+ insn = *ex_p++;
+ fixup = *ex_p++;
+
+ if (insn == *ip) {
+ *ip = fixup;
+ return;
+ }
+ }
+
+ printf("No exception table match for NIA %lx ADDR %lx\n", *ip, addr);
+ abort();
+}
+
+static void setup_segv_handler(void)
+{
+ struct sigaction action;
+
+ memset(&action, 0, sizeof(action));
+ action.sa_sigaction = segv_handler;
+ action.sa_flags = SA_SIGINFO;
+ sigaction(SIGSEGV, &action, NULL);
+}
+
+unsigned long COPY_LOOP(void *to, const void *from, unsigned long size);
+unsigned long test_copy_tofrom_user_reference(void *to, const void *from, unsigned long size);
+
+static int total_passed;
+static int total_failed;
+
+static void do_one_test(char *dstp, char *srcp, unsigned long len)
+{
+ unsigned long got, expected;
+
+ got = COPY_LOOP(dstp, srcp, len);
+ expected = test_copy_tofrom_user_reference(dstp, srcp, len);
+
+ if (got != expected) {
+ total_failed++;
+ printf("FAIL from=%p to=%p len=%ld returned %ld, expected %ld\n",
+ srcp, dstp, len, got, expected);
+ //abort();
+ } else
+ total_passed++;
+}
+
+//#define MAX_LEN 512
+#define MAX_LEN 16
+
+int test_copy_exception(void)
+{
+ int page_size;
+ static char *p, *q;
+ unsigned long src, dst, len;
+
+ page_size = getpagesize();
+ p = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+ if (p == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ memset(p, 0, page_size);
+
+ setup_segv_handler();
+
+ if (mprotect(p + page_size, page_size, PROT_NONE)) {
+ perror("mprotect");
+ exit(1);
+ }
+
+ q = p + page_size - MAX_LEN;
+
+ for (src = 0; src < MAX_LEN; src++) {
+ for (dst = 0; dst < MAX_LEN; dst++) {
+ for (len = 0; len < MAX_LEN+1; len++) {
+ // printf("from=%p to=%p len=%ld\n", q+dst, q+src, len);
+ do_one_test(q+dst, q+src, len);
+ }
+ }
+ }
+
+ printf("Totals:\n");
+ printf(" Pass: %d\n", total_passed);
+ printf(" Fail: %d\n", total_failed);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_copy_exception, str(COPY_LOOP));
+}
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
new file mode 120000
index 000000000..cce33fb6f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/memcpy_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcpy_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S b/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
new file mode 120000
index 000000000..0d6fbfaf3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/memcpy_power7.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcpy_power7.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/stubs.S b/tools/testing/selftests/powerpc/copyloops/stubs.S
new file mode 100644
index 000000000..ec8bcf2bf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/stubs.S
@@ -0,0 +1,19 @@
+#include <asm/ppc_asm.h>
+
+FUNC_START(enter_vmx_usercopy)
+ li r3,1
+ blr
+
+FUNC_START(exit_vmx_usercopy)
+ li r3,0
+ blr
+
+FUNC_START(enter_vmx_ops)
+ li r3,1
+ blr
+
+FUNC_START(exit_vmx_ops)
+ blr
+
+FUNC_START(__copy_tofrom_user_base)
+ blr
diff --git a/tools/testing/selftests/powerpc/copyloops/validate.c b/tools/testing/selftests/powerpc/copyloops/validate.c
new file mode 100644
index 000000000..0f6873618
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/validate.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <malloc.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "utils.h"
+
+#define MAX_LEN 8192
+#define MAX_OFFSET 16
+#define MIN_REDZONE 128
+#define BUFLEN (MAX_LEN+MAX_OFFSET+2*MIN_REDZONE)
+#define POISON 0xa5
+
+unsigned long COPY_LOOP(void *to, const void *from, unsigned long size);
+
+static void do_one(char *src, char *dst, unsigned long src_off,
+ unsigned long dst_off, unsigned long len, void *redzone,
+ void *fill)
+{
+ char *srcp, *dstp;
+ unsigned long ret;
+ unsigned long i;
+
+ srcp = src + MIN_REDZONE + src_off;
+ dstp = dst + MIN_REDZONE + dst_off;
+
+ memset(src, POISON, BUFLEN);
+ memset(dst, POISON, BUFLEN);
+ memcpy(srcp, fill, len);
+
+ ret = COPY_LOOP(dstp, srcp, len);
+ if (ret && ret != (unsigned long)dstp) {
+ printf("(%p,%p,%ld) returned %ld\n", dstp, srcp, len, ret);
+ abort();
+ }
+
+ if (memcmp(dstp, srcp, len)) {
+ printf("(%p,%p,%ld) miscompare\n", dstp, srcp, len);
+ printf("src: ");
+ for (i = 0; i < len; i++)
+ printf("%02x ", srcp[i]);
+ printf("\ndst: ");
+ for (i = 0; i < len; i++)
+ printf("%02x ", dstp[i]);
+ printf("\n");
+ abort();
+ }
+
+ if (memcmp(dst, redzone, dstp - dst)) {
+ printf("(%p,%p,%ld) redzone before corrupted\n",
+ dstp, srcp, len);
+ abort();
+ }
+
+ if (memcmp(dstp+len, redzone, dst+BUFLEN-(dstp+len))) {
+ printf("(%p,%p,%ld) redzone after corrupted\n",
+ dstp, srcp, len);
+ abort();
+ }
+}
+
+int test_copy_loop(void)
+{
+ char *src, *dst, *redzone, *fill;
+ unsigned long len, src_off, dst_off;
+ unsigned long i;
+
+ src = memalign(BUFLEN, BUFLEN);
+ dst = memalign(BUFLEN, BUFLEN);
+ redzone = malloc(BUFLEN);
+ fill = malloc(BUFLEN);
+
+ if (!src || !dst || !redzone || !fill) {
+ fprintf(stderr, "malloc failed\n");
+ exit(1);
+ }
+
+ memset(redzone, POISON, BUFLEN);
+
+ /* Fill with sequential bytes */
+ for (i = 0; i < BUFLEN; i++)
+ fill[i] = i & 0xff;
+
+ for (len = 1; len < MAX_LEN; len++) {
+ for (src_off = 0; src_off < MAX_OFFSET; src_off++) {
+ for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) {
+ do_one(src, dst, src_off, dst_off, len,
+ redzone, fill);
+ }
+ }
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_copy_loop, str(COPY_LOOP));
+}
diff --git a/tools/testing/selftests/powerpc/dscr/.gitignore b/tools/testing/selftests/powerpc/dscr/.gitignore
new file mode 100644
index 000000000..b585c6c15
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/.gitignore
@@ -0,0 +1,7 @@
+dscr_default_test
+dscr_explicit_test
+dscr_inherit_exec_test
+dscr_inherit_test
+dscr_sysfs_test
+dscr_sysfs_thread_test
+dscr_user_test
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
new file mode 100644
index 000000000..5df476364
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \
+ dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \
+ dscr_sysfs_thread_test
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h b/tools/testing/selftests/powerpc/dscr/dscr.h
new file mode 100644
index 000000000..cdb840bc5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr.h
@@ -0,0 +1,125 @@
+/*
+ * POWER Data Stream Control Register (DSCR)
+ *
+ * This header file contains helper functions and macros
+ * required for all the DSCR related test cases.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#ifndef _SELFTESTS_POWERPC_DSCR_DSCR_H
+#define _SELFTESTS_POWERPC_DSCR_DSCR_H
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <pthread.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+#include "utils.h"
+
+#define THREADS 100 /* Max threads */
+#define COUNT 100 /* Max iterations */
+#define DSCR_MAX 16 /* Max DSCR value */
+#define LEN_MAX 100 /* Max name length */
+
+#define DSCR_DEFAULT "/sys/devices/system/cpu/dscr_default"
+#define CPU_PATH "/sys/devices/system/cpu/"
+
+#define rmb() asm volatile("lwsync":::"memory")
+#define wmb() asm volatile("lwsync":::"memory")
+
+#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+/* Prilvilege state DSCR access */
+inline unsigned long get_dscr(void)
+{
+ unsigned long ret;
+
+ asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR_PRIV));
+
+ return ret;
+}
+
+inline void set_dscr(unsigned long val)
+{
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_PRIV));
+}
+
+/* Problem state DSCR access */
+inline unsigned long get_dscr_usr(void)
+{
+ unsigned long ret;
+
+ asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR));
+
+ return ret;
+}
+
+inline void set_dscr_usr(unsigned long val)
+{
+ asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
+
+/* Default DSCR access */
+unsigned long get_default_dscr(void)
+{
+ int fd = -1, ret;
+ char buf[16];
+ unsigned long val;
+
+ if (fd == -1) {
+ fd = open(DSCR_DEFAULT, O_RDONLY);
+ if (fd == -1) {
+ perror("open() failed");
+ exit(1);
+ }
+ }
+ memset(buf, 0, sizeof(buf));
+ lseek(fd, 0, SEEK_SET);
+ ret = read(fd, buf, sizeof(buf));
+ if (ret == -1) {
+ perror("read() failed");
+ exit(1);
+ }
+ sscanf(buf, "%lx", &val);
+ close(fd);
+ return val;
+}
+
+void set_default_dscr(unsigned long val)
+{
+ int fd = -1, ret;
+ char buf[16];
+
+ if (fd == -1) {
+ fd = open(DSCR_DEFAULT, O_RDWR);
+ if (fd == -1) {
+ perror("open() failed");
+ exit(1);
+ }
+ }
+ sprintf(buf, "%lx\n", val);
+ ret = write(fd, buf, strlen(buf));
+ if (ret == -1) {
+ perror("write() failed");
+ exit(1);
+ }
+ close(fd);
+}
+
+double uniform_deviate(int seed)
+{
+ return seed * (1.0 / (RAND_MAX + 1.0));
+}
+#endif /* _SELFTESTS_POWERPC_DSCR_DSCR_H */
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
new file mode 100644
index 000000000..9e1a37e93
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
@@ -0,0 +1,127 @@
+/*
+ * POWER Data Stream Control Register (DSCR) default test
+ *
+ * This test modifies the system wide default DSCR through
+ * it's sysfs interface and then verifies that all threads
+ * see the correct changed DSCR value immediately.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static unsigned long dscr; /* System DSCR default */
+static unsigned long sequence;
+static unsigned long result[THREADS];
+
+static void *do_test(void *in)
+{
+ unsigned long thread = (unsigned long)in;
+ unsigned long i;
+
+ for (i = 0; i < COUNT; i++) {
+ unsigned long d, cur_dscr, cur_dscr_usr;
+ unsigned long s1, s2;
+
+ s1 = READ_ONCE(sequence);
+ if (s1 & 1)
+ continue;
+ rmb();
+
+ d = dscr;
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+
+ rmb();
+ s2 = sequence;
+
+ if (s1 != s2)
+ continue;
+
+ if (cur_dscr != d) {
+ fprintf(stderr, "thread %ld kernel DSCR should be %ld "
+ "but is %ld\n", thread, d, cur_dscr);
+ result[thread] = 1;
+ pthread_exit(&result[thread]);
+ }
+
+ if (cur_dscr_usr != d) {
+ fprintf(stderr, "thread %ld user DSCR should be %ld "
+ "but is %ld\n", thread, d, cur_dscr_usr);
+ result[thread] = 1;
+ pthread_exit(&result[thread]);
+ }
+ }
+ result[thread] = 0;
+ pthread_exit(&result[thread]);
+}
+
+int dscr_default(void)
+{
+ pthread_t threads[THREADS];
+ unsigned long i, *status[THREADS];
+ unsigned long orig_dscr_default;
+
+ orig_dscr_default = get_default_dscr();
+
+ /* Initial DSCR default */
+ dscr = 1;
+ set_default_dscr(dscr);
+
+ /* Spawn all testing threads */
+ for (i = 0; i < THREADS; i++) {
+ if (pthread_create(&threads[i], NULL, do_test, (void *)i)) {
+ perror("pthread_create() failed");
+ goto fail;
+ }
+ }
+
+ srand(getpid());
+
+ /* Keep changing the DSCR default */
+ for (i = 0; i < COUNT; i++) {
+ double ret = uniform_deviate(rand());
+
+ if (ret < 0.0001) {
+ sequence++;
+ wmb();
+
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ set_default_dscr(dscr);
+
+ wmb();
+ sequence++;
+ }
+ }
+
+ /* Individual testing thread exit status */
+ for (i = 0; i < THREADS; i++) {
+ if (pthread_join(threads[i], (void **)&(status[i]))) {
+ perror("pthread_join() failed");
+ goto fail;
+ }
+
+ if (*status[i]) {
+ printf("%ldth thread failed to join with %ld status\n",
+ i, *status[i]);
+ goto fail;
+ }
+ }
+ set_default_dscr(orig_dscr_default);
+ return 0;
+fail:
+ set_default_dscr(orig_dscr_default);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_default, "dscr_default_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
new file mode 100644
index 000000000..ad9c3ec26
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
@@ -0,0 +1,71 @@
+/*
+ * POWER Data Stream Control Register (DSCR) explicit test
+ *
+ * This test modifies the DSCR value using mtspr instruction and
+ * verifies the change with mfspr instruction. It uses both the
+ * privilege state SPR and the problem state SPR for this purpose.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+int dscr_explicit(void)
+{
+ unsigned long i, dscr = 0;
+
+ srand(getpid());
+ set_dscr(dscr);
+
+ for (i = 0; i < COUNT; i++) {
+ unsigned long cur_dscr, cur_dscr_usr;
+ double ret = uniform_deviate(rand());
+
+ if (ret < 0.001) {
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ set_dscr(dscr);
+ }
+
+ cur_dscr = get_dscr();
+ if (cur_dscr != dscr) {
+ fprintf(stderr, "Kernel DSCR should be %ld but "
+ "is %ld\n", dscr, cur_dscr);
+ return 1;
+ }
+
+ ret = uniform_deviate(rand());
+ if (ret < 0.001) {
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ set_dscr_usr(dscr);
+ }
+
+ cur_dscr_usr = get_dscr_usr();
+ if (cur_dscr_usr != dscr) {
+ fprintf(stderr, "User DSCR should be %ld but "
+ "is %ld\n", dscr, cur_dscr_usr);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_explicit, "dscr_explicit_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
new file mode 100644
index 000000000..c8c240acc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
@@ -0,0 +1,109 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork exec test
+ *
+ * This testcase modifies the DSCR using mtspr, forks & execs and
+ * verifies that the child is using the changed DSCR using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are privileged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be executed
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static char *prog;
+
+static void do_exec(unsigned long parent_dscr)
+{
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+
+ if (cur_dscr != parent_dscr) {
+ fprintf(stderr, "Parent DSCR %ld was not inherited "
+ "over exec (kernel value)\n", parent_dscr);
+ exit(1);
+ }
+
+ if (cur_dscr_usr != parent_dscr) {
+ fprintf(stderr, "Parent DSCR %ld was not inherited "
+ "over exec (user value)\n", parent_dscr);
+ exit(1);
+ }
+ exit(0);
+}
+
+int dscr_inherit_exec(void)
+{
+ unsigned long i, dscr = 0;
+ pid_t pid;
+
+ for (i = 0; i < COUNT; i++) {
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ if (dscr == get_default_dscr())
+ continue;
+
+ if (i % 2 == 0)
+ set_dscr_usr(dscr);
+ else
+ set_dscr(dscr);
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork() failed");
+ exit(1);
+ } else if (pid) {
+ int status;
+
+ if (waitpid(pid, &status, 0) == -1) {
+ perror("waitpid() failed");
+ exit(1);
+ }
+
+ if (!WIFEXITED(status)) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ exit(1);
+ }
+
+ if (WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ return 1;
+ }
+ } else {
+ char dscr_str[16];
+
+ sprintf(dscr_str, "%ld", dscr);
+ execlp(prog, prog, "exec", dscr_str, NULL);
+ exit(1);
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc == 3 && !strcmp(argv[1], "exec")) {
+ unsigned long parent_dscr;
+
+ parent_dscr = atoi(argv[2]);
+ do_exec(parent_dscr);
+ } else if (argc != 1) {
+ fprintf(stderr, "Usage: %s\n", argv[0]);
+ exit(1);
+ }
+
+ prog = argv[0];
+ return test_harness(dscr_inherit_exec, "dscr_inherit_exec_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
new file mode 100644
index 000000000..3e5a6d195
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
@@ -0,0 +1,87 @@
+/*
+ * POWER Data Stream Control Register (DSCR) fork test
+ *
+ * This testcase modifies the DSCR using mtspr, forks and then
+ * verifies that the child process has the correct changed DSCR
+ * value using mfspr.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2012, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+int dscr_inherit(void)
+{
+ unsigned long i, dscr = 0;
+ pid_t pid;
+
+ srand(getpid());
+ set_dscr(dscr);
+
+ for (i = 0; i < COUNT; i++) {
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ dscr++;
+ if (dscr > DSCR_MAX)
+ dscr = 0;
+
+ if (i % 2 == 0)
+ set_dscr_usr(dscr);
+ else
+ set_dscr(dscr);
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork() failed");
+ exit(1);
+ } else if (pid) {
+ int status;
+
+ if (waitpid(pid, &status, 0) == -1) {
+ perror("waitpid() failed");
+ exit(1);
+ }
+
+ if (!WIFEXITED(status)) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ exit(1);
+ }
+
+ if (WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "Child didn't exit cleanly\n");
+ return 1;
+ }
+ } else {
+ cur_dscr = get_dscr();
+ if (cur_dscr != dscr) {
+ fprintf(stderr, "Kernel DSCR should be %ld "
+ "but is %ld\n", dscr, cur_dscr);
+ exit(1);
+ }
+
+ cur_dscr_usr = get_dscr_usr();
+ if (cur_dscr_usr != dscr) {
+ fprintf(stderr, "User DSCR should be %ld "
+ "but is %ld\n", dscr, cur_dscr_usr);
+ exit(1);
+ }
+ exit(0);
+ }
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_inherit, "dscr_inherit_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
new file mode 100644
index 000000000..1899bd851
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -0,0 +1,101 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs interface test
+ *
+ * This test updates to system wide DSCR default through the sysfs interface
+ * and then verifies that all the CPU specific DSCR defaults are updated as
+ * well verified from their sysfs interfaces.
+ *
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static int check_cpu_dscr_default(char *file, unsigned long val)
+{
+ char buf[10];
+ int fd, rc;
+
+ fd = open(file, O_RDWR);
+ if (fd == -1) {
+ perror("open() failed");
+ return 1;
+ }
+
+ rc = read(fd, buf, sizeof(buf));
+ if (rc == -1) {
+ perror("read() failed");
+ return 1;
+ }
+ close(fd);
+
+ buf[rc] = '\0';
+ if (strtol(buf, NULL, 16) != val) {
+ printf("DSCR match failed: %ld (system) %ld (cpu)\n",
+ val, strtol(buf, NULL, 16));
+ return 1;
+ }
+ return 0;
+}
+
+static int check_all_cpu_dscr_defaults(unsigned long val)
+{
+ DIR *sysfs;
+ struct dirent *dp;
+ char file[LEN_MAX];
+
+ sysfs = opendir(CPU_PATH);
+ if (!sysfs) {
+ perror("opendir() failed");
+ return 1;
+ }
+
+ while ((dp = readdir(sysfs))) {
+ int len;
+
+ if (!(dp->d_type & DT_DIR))
+ continue;
+ if (!strcmp(dp->d_name, "cpuidle"))
+ continue;
+ if (!strstr(dp->d_name, "cpu"))
+ continue;
+
+ len = snprintf(file, LEN_MAX, "%s%s/dscr", CPU_PATH, dp->d_name);
+ if (len >= LEN_MAX)
+ continue;
+ if (access(file, F_OK))
+ continue;
+
+ if (check_cpu_dscr_default(file, val))
+ return 1;
+ }
+ closedir(sysfs);
+ return 0;
+}
+
+int dscr_sysfs(void)
+{
+ unsigned long orig_dscr_default;
+ int i, j;
+
+ orig_dscr_default = get_default_dscr();
+ for (i = 0; i < COUNT; i++) {
+ for (j = 0; j < DSCR_MAX; j++) {
+ set_default_dscr(j);
+ if (check_all_cpu_dscr_defaults(j))
+ goto fail;
+ }
+ }
+ set_default_dscr(orig_dscr_default);
+ return 0;
+fail:
+ set_default_dscr(orig_dscr_default);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_sysfs, "dscr_sysfs_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
new file mode 100644
index 000000000..ad97b592e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
@@ -0,0 +1,80 @@
+/*
+ * POWER Data Stream Control Register (DSCR) sysfs thread test
+ *
+ * This test updates the system wide DSCR default value through
+ * sysfs interface which should then update all the CPU specific
+ * DSCR default values which must also be then visible to threads
+ * executing on individual CPUs on the system.
+ *
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#define _GNU_SOURCE
+#include "dscr.h"
+
+static int test_thread_dscr(unsigned long val)
+{
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+
+ if (val != cur_dscr) {
+ printf("[cpu %d] Kernel DSCR should be %ld but is %ld\n",
+ sched_getcpu(), val, cur_dscr);
+ return 1;
+ }
+
+ if (val != cur_dscr_usr) {
+ printf("[cpu %d] User DSCR should be %ld but is %ld\n",
+ sched_getcpu(), val, cur_dscr_usr);
+ return 1;
+ }
+ return 0;
+}
+
+static int check_cpu_dscr_thread(unsigned long val)
+{
+ cpu_set_t mask;
+ int cpu;
+
+ for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+ if (sched_setaffinity(0, sizeof(mask), &mask))
+ continue;
+
+ if (test_thread_dscr(val))
+ return 1;
+ }
+ return 0;
+
+}
+
+int dscr_sysfs_thread(void)
+{
+ unsigned long orig_dscr_default;
+ int i, j;
+
+ orig_dscr_default = get_default_dscr();
+ for (i = 0; i < COUNT; i++) {
+ for (j = 0; j < DSCR_MAX; j++) {
+ set_default_dscr(j);
+ if (check_cpu_dscr_thread(j))
+ goto fail;
+ }
+ }
+ set_default_dscr(orig_dscr_default);
+ return 0;
+fail:
+ set_default_dscr(orig_dscr_default);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_sysfs_thread, "dscr_sysfs_thread_test");
+}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
new file mode 100644
index 000000000..77d16b5e7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
@@ -0,0 +1,61 @@
+/*
+ * POWER Data Stream Control Register (DSCR) SPR test
+ *
+ * This test modifies the DSCR value through both the SPR number
+ * based mtspr instruction and then makes sure that the same is
+ * reflected through mfspr instruction using either of the SPR
+ * numbers.
+ *
+ * When using the privilege state SPR, the instructions such as
+ * mfspr or mtspr are priviledged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be exuecuted
+ * directly without any emulation if the HW supports them. Else
+ * they also get emulated by the kernel.
+ *
+ * Copyright 2013, Anton Blanchard, IBM Corporation.
+ * Copyright 2015, Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+#include "dscr.h"
+
+static int check_dscr(char *str)
+{
+ unsigned long cur_dscr, cur_dscr_usr;
+
+ cur_dscr = get_dscr();
+ cur_dscr_usr = get_dscr_usr();
+ if (cur_dscr != cur_dscr_usr) {
+ printf("%s set, kernel get %lx != user get %lx\n",
+ str, cur_dscr, cur_dscr_usr);
+ return 1;
+ }
+ return 0;
+}
+
+int dscr_user(void)
+{
+ int i;
+
+ check_dscr("");
+
+ for (i = 0; i < COUNT; i++) {
+ set_dscr(i);
+ if (check_dscr("kernel"))
+ return 1;
+ }
+
+ for (i = 0; i < COUNT; i++) {
+ set_dscr_usr(i);
+ if (check_dscr("user"))
+ return 1;
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(dscr_user, "dscr_user_test");
+}
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
new file mode 100644
index 000000000..9d7166dfa
--- /dev/null
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <elf.h>
+#include <fcntl.h>
+#include <link.h>
+#include <sys/stat.h>
+
+#include "subunit.h"
+#include "utils.h"
+
+#define KILL_TIMEOUT 5
+
+static uint64_t timeout = 120;
+
+int run_test(int (test_function)(void), char *name)
+{
+ bool terminated;
+ int rc, status;
+ pid_t pid;
+
+ /* Make sure output is flushed before forking */
+ fflush(stdout);
+
+ pid = fork();
+ if (pid == 0) {
+ setpgid(0, 0);
+ exit(test_function());
+ } else if (pid == -1) {
+ perror("fork");
+ return 1;
+ }
+
+ setpgid(pid, pid);
+
+ /* Wake us up in timeout seconds */
+ alarm(timeout);
+ terminated = false;
+
+wait:
+ rc = waitpid(pid, &status, 0);
+ if (rc == -1) {
+ if (errno != EINTR) {
+ printf("unknown error from waitpid\n");
+ return 1;
+ }
+
+ if (terminated) {
+ printf("!! force killing %s\n", name);
+ kill(-pid, SIGKILL);
+ return 1;
+ } else {
+ printf("!! killing %s\n", name);
+ kill(-pid, SIGTERM);
+ terminated = true;
+ alarm(KILL_TIMEOUT);
+ goto wait;
+ }
+ }
+
+ /* Kill anything else in the process group that is still running */
+ kill(-pid, SIGTERM);
+
+ if (WIFEXITED(status))
+ status = WEXITSTATUS(status);
+ else {
+ if (WIFSIGNALED(status))
+ printf("!! child died by signal %d\n", WTERMSIG(status));
+ else
+ printf("!! child died by unknown cause\n");
+
+ status = 1; /* Signal or other */
+ }
+
+ return status;
+}
+
+static void sig_handler(int signum)
+{
+ /* Just wake us up from waitpid */
+}
+
+static struct sigaction sig_action = {
+ .sa_handler = sig_handler,
+};
+
+void test_harness_set_timeout(uint64_t time)
+{
+ timeout = time;
+}
+
+int test_harness(int (test_function)(void), char *name)
+{
+ int rc;
+
+ test_start(name);
+ test_set_git_version(GIT_VERSION);
+
+ if (sigaction(SIGINT, &sig_action, NULL)) {
+ perror("sigaction (sigint)");
+ test_error(name);
+ return 1;
+ }
+
+ if (sigaction(SIGALRM, &sig_action, NULL)) {
+ perror("sigaction (sigalrm)");
+ test_error(name);
+ return 1;
+ }
+
+ rc = run_test(test_function, name);
+
+ if (rc == MAGIC_SKIP_RETURN_VALUE) {
+ test_skip(name);
+ /* so that skipped test is not marked as failed */
+ rc = 0;
+ } else
+ test_finish(name, rc);
+
+ return rc;
+}
diff --git a/tools/testing/selftests/powerpc/include/basic_asm.h b/tools/testing/selftests/powerpc/include/basic_asm.h
new file mode 100644
index 000000000..886dc026f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/basic_asm.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SELFTESTS_POWERPC_BASIC_ASM_H
+#define _SELFTESTS_POWERPC_BASIC_ASM_H
+
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+#define LOAD_REG_IMMEDIATE(reg, expr) \
+ lis reg, (expr)@highest; \
+ ori reg, reg, (expr)@higher; \
+ rldicr reg, reg, 32, 31; \
+ oris reg, reg, (expr)@high; \
+ ori reg, reg, (expr)@l;
+
+/*
+ * Note: These macros assume that variables being stored on the stack are
+ * doublewords, while this is usually the case it may not always be the
+ * case for each use case.
+ */
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define STACK_FRAME_MIN_SIZE 32
+#define STACK_FRAME_TOC_POS 24
+#define __STACK_FRAME_PARAM(_param) (32 + ((_param)*8))
+#define __STACK_FRAME_LOCAL(_num_params, _var_num) \
+ ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8))
+#else
+#define STACK_FRAME_MIN_SIZE 112
+#define STACK_FRAME_TOC_POS 40
+#define __STACK_FRAME_PARAM(i) (48 + ((i)*8))
+
+/*
+ * Caveat: if a function passed more than 8 doublewords, the caller will have
+ * made more space... which would render the 112 incorrect.
+ */
+#define __STACK_FRAME_LOCAL(_num_params, _var_num) \
+ (112 + ((_var_num)*8))
+#endif
+
+/* Parameter x saved to the stack */
+#define STACK_FRAME_PARAM(var) __STACK_FRAME_PARAM(var)
+
+/* Local variable x saved to the stack after x parameters */
+#define STACK_FRAME_LOCAL(num_params, var) \
+ __STACK_FRAME_LOCAL(num_params, var)
+#define STACK_FRAME_LR_POS 16
+#define STACK_FRAME_CR_POS 8
+
+/*
+ * It is very important to note here that _extra is the extra amount of
+ * stack space needed. This space can be accessed using STACK_FRAME_PARAM()
+ * or STACK_FRAME_LOCAL() macros.
+ *
+ * r1 and r2 are not defined in ppc-asm.h (instead they are defined as sp
+ * and toc). Kernel programmers tend to prefer rX even for r1 and r2, hence
+ * %1 and %r2. r0 is defined in ppc-asm.h and therefore %r0 gets
+ * preprocessed incorrectly, hence r0.
+ */
+#define PUSH_BASIC_STACK(_extra) \
+ mflr r0; \
+ std r0, STACK_FRAME_LR_POS(%r1); \
+ stdu %r1, -(_extra + STACK_FRAME_MIN_SIZE)(%r1); \
+ mfcr r0; \
+ stw r0, STACK_FRAME_CR_POS(%r1); \
+ std %r2, STACK_FRAME_TOC_POS(%r1);
+
+#define POP_BASIC_STACK(_extra) \
+ ld %r2, STACK_FRAME_TOC_POS(%r1); \
+ lwz r0, STACK_FRAME_CR_POS(%r1); \
+ mtcr r0; \
+ addi %r1, %r1, (_extra + STACK_FRAME_MIN_SIZE); \
+ ld r0, STACK_FRAME_LR_POS(%r1); \
+ mtlr r0;
+
+#endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */
diff --git a/tools/testing/selftests/powerpc/include/fpu_asm.h b/tools/testing/selftests/powerpc/include/fpu_asm.h
new file mode 100644
index 000000000..6a387d255
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/fpu_asm.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _SELFTESTS_POWERPC_FPU_ASM_H
+#define _SELFTESTS_POWERPC_FPU_ASM_H
+#include "basic_asm.h"
+
+#define PUSH_FPU(stack_size) \
+ stfd f31,(stack_size + STACK_FRAME_MIN_SIZE)(%r1); \
+ stfd f30,(stack_size + STACK_FRAME_MIN_SIZE - 8)(%r1); \
+ stfd f29,(stack_size + STACK_FRAME_MIN_SIZE - 16)(%r1); \
+ stfd f28,(stack_size + STACK_FRAME_MIN_SIZE - 24)(%r1); \
+ stfd f27,(stack_size + STACK_FRAME_MIN_SIZE - 32)(%r1); \
+ stfd f26,(stack_size + STACK_FRAME_MIN_SIZE - 40)(%r1); \
+ stfd f25,(stack_size + STACK_FRAME_MIN_SIZE - 48)(%r1); \
+ stfd f24,(stack_size + STACK_FRAME_MIN_SIZE - 56)(%r1); \
+ stfd f23,(stack_size + STACK_FRAME_MIN_SIZE - 64)(%r1); \
+ stfd f22,(stack_size + STACK_FRAME_MIN_SIZE - 72)(%r1); \
+ stfd f21,(stack_size + STACK_FRAME_MIN_SIZE - 80)(%r1); \
+ stfd f20,(stack_size + STACK_FRAME_MIN_SIZE - 88)(%r1); \
+ stfd f19,(stack_size + STACK_FRAME_MIN_SIZE - 96)(%r1); \
+ stfd f18,(stack_size + STACK_FRAME_MIN_SIZE - 104)(%r1); \
+ stfd f17,(stack_size + STACK_FRAME_MIN_SIZE - 112)(%r1); \
+ stfd f16,(stack_size + STACK_FRAME_MIN_SIZE - 120)(%r1); \
+ stfd f15,(stack_size + STACK_FRAME_MIN_SIZE - 128)(%r1); \
+ stfd f14,(stack_size + STACK_FRAME_MIN_SIZE - 136)(%r1);
+
+#define POP_FPU(stack_size) \
+ lfd f31,(stack_size + STACK_FRAME_MIN_SIZE)(%r1); \
+ lfd f30,(stack_size + STACK_FRAME_MIN_SIZE - 8)(%r1); \
+ lfd f29,(stack_size + STACK_FRAME_MIN_SIZE - 16)(%r1); \
+ lfd f28,(stack_size + STACK_FRAME_MIN_SIZE - 24)(%r1); \
+ lfd f27,(stack_size + STACK_FRAME_MIN_SIZE - 32)(%r1); \
+ lfd f26,(stack_size + STACK_FRAME_MIN_SIZE - 40)(%r1); \
+ lfd f25,(stack_size + STACK_FRAME_MIN_SIZE - 48)(%r1); \
+ lfd f24,(stack_size + STACK_FRAME_MIN_SIZE - 56)(%r1); \
+ lfd f23,(stack_size + STACK_FRAME_MIN_SIZE - 64)(%r1); \
+ lfd f22,(stack_size + STACK_FRAME_MIN_SIZE - 72)(%r1); \
+ lfd f21,(stack_size + STACK_FRAME_MIN_SIZE - 80)(%r1); \
+ lfd f20,(stack_size + STACK_FRAME_MIN_SIZE - 88)(%r1); \
+ lfd f19,(stack_size + STACK_FRAME_MIN_SIZE - 96)(%r1); \
+ lfd f18,(stack_size + STACK_FRAME_MIN_SIZE - 104)(%r1); \
+ lfd f17,(stack_size + STACK_FRAME_MIN_SIZE - 112)(%r1); \
+ lfd f16,(stack_size + STACK_FRAME_MIN_SIZE - 120)(%r1); \
+ lfd f15,(stack_size + STACK_FRAME_MIN_SIZE - 128)(%r1); \
+ lfd f14,(stack_size + STACK_FRAME_MIN_SIZE - 136)(%r1);
+
+/*
+ * Careful calling this, it will 'clobber' fpu (by design)
+ * Don't call this from C
+ */
+FUNC_START(load_fpu)
+ lfd f14,0(r3)
+ lfd f15,8(r3)
+ lfd f16,16(r3)
+ lfd f17,24(r3)
+ lfd f18,32(r3)
+ lfd f19,40(r3)
+ lfd f20,48(r3)
+ lfd f21,56(r3)
+ lfd f22,64(r3)
+ lfd f23,72(r3)
+ lfd f24,80(r3)
+ lfd f25,88(r3)
+ lfd f26,96(r3)
+ lfd f27,104(r3)
+ lfd f28,112(r3)
+ lfd f29,120(r3)
+ lfd f30,128(r3)
+ lfd f31,136(r3)
+ blr
+FUNC_END(load_fpu)
+
+#endif /* _SELFTESTS_POWERPC_FPU_ASM_H */
diff --git a/tools/testing/selftests/powerpc/include/gpr_asm.h b/tools/testing/selftests/powerpc/include/gpr_asm.h
new file mode 100644
index 000000000..f6f38852d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/gpr_asm.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _SELFTESTS_POWERPC_GPR_ASM_H
+#define _SELFTESTS_POWERPC_GPR_ASM_H
+
+#include "basic_asm.h"
+
+#define __PUSH_NVREGS(top_pos); \
+ std r31,(top_pos)(%r1); \
+ std r30,(top_pos - 8)(%r1); \
+ std r29,(top_pos - 16)(%r1); \
+ std r28,(top_pos - 24)(%r1); \
+ std r27,(top_pos - 32)(%r1); \
+ std r26,(top_pos - 40)(%r1); \
+ std r25,(top_pos - 48)(%r1); \
+ std r24,(top_pos - 56)(%r1); \
+ std r23,(top_pos - 64)(%r1); \
+ std r22,(top_pos - 72)(%r1); \
+ std r21,(top_pos - 80)(%r1); \
+ std r20,(top_pos - 88)(%r1); \
+ std r19,(top_pos - 96)(%r1); \
+ std r18,(top_pos - 104)(%r1); \
+ std r17,(top_pos - 112)(%r1); \
+ std r16,(top_pos - 120)(%r1); \
+ std r15,(top_pos - 128)(%r1); \
+ std r14,(top_pos - 136)(%r1)
+
+#define __POP_NVREGS(top_pos); \
+ ld r31,(top_pos)(%r1); \
+ ld r30,(top_pos - 8)(%r1); \
+ ld r29,(top_pos - 16)(%r1); \
+ ld r28,(top_pos - 24)(%r1); \
+ ld r27,(top_pos - 32)(%r1); \
+ ld r26,(top_pos - 40)(%r1); \
+ ld r25,(top_pos - 48)(%r1); \
+ ld r24,(top_pos - 56)(%r1); \
+ ld r23,(top_pos - 64)(%r1); \
+ ld r22,(top_pos - 72)(%r1); \
+ ld r21,(top_pos - 80)(%r1); \
+ ld r20,(top_pos - 88)(%r1); \
+ ld r19,(top_pos - 96)(%r1); \
+ ld r18,(top_pos - 104)(%r1); \
+ ld r17,(top_pos - 112)(%r1); \
+ ld r16,(top_pos - 120)(%r1); \
+ ld r15,(top_pos - 128)(%r1); \
+ ld r14,(top_pos - 136)(%r1)
+
+#define PUSH_NVREGS(stack_size) \
+ __PUSH_NVREGS(stack_size + STACK_FRAME_MIN_SIZE)
+
+/* 18 NV FPU REGS */
+#define PUSH_NVREGS_BELOW_FPU(stack_size) \
+ __PUSH_NVREGS(stack_size + STACK_FRAME_MIN_SIZE - (18 * 8))
+
+#define POP_NVREGS(stack_size) \
+ __POP_NVREGS(stack_size + STACK_FRAME_MIN_SIZE)
+
+/* 18 NV FPU REGS */
+#define POP_NVREGS_BELOW_FPU(stack_size) \
+ __POP_NVREGS(stack_size + STACK_FRAME_MIN_SIZE - (18 * 8))
+
+/*
+ * Careful calling this, it will 'clobber' NVGPRs (by design)
+ * Don't call this from C
+ */
+FUNC_START(load_gpr)
+ ld r14,0(r3)
+ ld r15,8(r3)
+ ld r16,16(r3)
+ ld r17,24(r3)
+ ld r18,32(r3)
+ ld r19,40(r3)
+ ld r20,48(r3)
+ ld r21,56(r3)
+ ld r22,64(r3)
+ ld r23,72(r3)
+ ld r24,80(r3)
+ ld r25,88(r3)
+ ld r26,96(r3)
+ ld r27,104(r3)
+ ld r28,112(r3)
+ ld r29,120(r3)
+ ld r30,128(r3)
+ ld r31,136(r3)
+ blr
+FUNC_END(load_gpr)
+
+
+#endif /* _SELFTESTS_POWERPC_GPR_ASM_H */
diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h
new file mode 100644
index 000000000..f36061eb6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/instructions.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _SELFTESTS_POWERPC_INSTRUCTIONS_H
+#define _SELFTESTS_POWERPC_INSTRUCTIONS_H
+
+#include <stdio.h>
+#include <stdlib.h>
+
+/* This defines the "copy" instruction from Power ISA 3.0 Book II, section 4.4. */
+#define __COPY(RA, RB, L) \
+ (0x7c00060c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10))
+#define COPY(RA, RB, L) \
+ .long __COPY((RA), (RB), (L))
+
+static inline void copy(void *i)
+{
+ asm volatile(str(COPY(0, %0, 0))";"
+ :
+ : "b" (i)
+ : "memory"
+ );
+}
+
+static inline void copy_first(void *i)
+{
+ asm volatile(str(COPY(0, %0, 1))";"
+ :
+ : "b" (i)
+ : "memory"
+ );
+}
+
+/* This defines the "paste" instruction from Power ISA 3.0 Book II, section 4.4. */
+#define __PASTE(RA, RB, L, RC) \
+ (0x7c00070c | (RA) << (31-15) | (RB) << (31-20) | (L) << (31-10) | (RC) << (31-31))
+#define PASTE(RA, RB, L, RC) \
+ .long __PASTE((RA), (RB), (L), (RC))
+
+static inline int paste(void *i)
+{
+ int cr;
+
+ asm volatile(str(PASTE(0, %1, 0, 0))";"
+ "mfcr %0;"
+ : "=r" (cr)
+ : "b" (i)
+ : "memory"
+ );
+ return cr;
+}
+
+static inline int paste_last(void *i)
+{
+ int cr;
+
+ asm volatile(str(PASTE(0, %1, 1, 1))";"
+ "mfcr %0;"
+ : "=r" (cr)
+ : "b" (i)
+ : "memory"
+ );
+ return cr;
+}
+
+#define PPC_INST_COPY __COPY(0, 0, 0)
+#define PPC_INST_COPY_FIRST __COPY(0, 0, 1)
+#define PPC_INST_PASTE __PASTE(0, 0, 0, 0)
+#define PPC_INST_PASTE_LAST __PASTE(0, 0, 1, 1)
+
+#endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
new file mode 100644
index 000000000..7f348c059
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_REG_H
+#define _SELFTESTS_POWERPC_REG_H
+
+#define __stringify_1(x) #x
+#define __stringify(x) __stringify_1(x)
+
+#define mfspr(rn) ({unsigned long rval; \
+ asm volatile("mfspr %0," _str(rn) \
+ : "=r" (rval)); rval; })
+#define mtspr(rn, v) asm volatile("mtspr " _str(rn) ",%0" : \
+ : "r" ((unsigned long)(v)) \
+ : "memory")
+
+#define mb() asm volatile("sync" : : : "memory");
+
+#define SPRN_MMCR2 769
+#define SPRN_MMCRA 770
+#define SPRN_MMCR0 779
+#define MMCR0_PMAO 0x00000080
+#define MMCR0_PMAE 0x04000000
+#define MMCR0_FC 0x80000000
+#define SPRN_EBBHR 804
+#define SPRN_EBBRR 805
+#define SPRN_BESCR 806 /* Branch event status & control register */
+#define SPRN_BESCRS 800 /* Branch event status & control set (1 bits set to 1) */
+#define SPRN_BESCRSU 801 /* Branch event status & control set upper */
+#define SPRN_BESCRR 802 /* Branch event status & control REset (1 bits set to 0) */
+#define SPRN_BESCRRU 803 /* Branch event status & control REset upper */
+
+#define BESCR_PMEO 0x1 /* PMU Event-based exception Occurred */
+#define BESCR_PME (0x1ul << 32) /* PMU Event-based exception Enable */
+
+#define SPRN_PMC1 771
+#define SPRN_PMC2 772
+#define SPRN_PMC3 773
+#define SPRN_PMC4 774
+#define SPRN_PMC5 775
+#define SPRN_PMC6 776
+
+#define SPRN_SIAR 780
+#define SPRN_SDAR 781
+#define SPRN_SIER 768
+
+#define SPRN_TEXASR 0x82 /* Transaction Exception and Status Register */
+#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
+#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
+#define SPRN_TAR 0x32f /* Target Address Register */
+
+#define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */
+#define SPRN_DSCR 0x03 /* Data Stream Control Register */
+#define SPRN_PPR 896 /* Program Priority Register */
+#define SPRN_AMR 13 /* Authority Mask Register - problem state */
+
+/* TEXASR register bits */
+#define TEXASR_FC 0xFE00000000000000
+#define TEXASR_FP 0x0100000000000000
+#define TEXASR_DA 0x0080000000000000
+#define TEXASR_NO 0x0040000000000000
+#define TEXASR_FO 0x0020000000000000
+#define TEXASR_SIC 0x0010000000000000
+#define TEXASR_NTC 0x0008000000000000
+#define TEXASR_TC 0x0004000000000000
+#define TEXASR_TIC 0x0002000000000000
+#define TEXASR_IC 0x0001000000000000
+#define TEXASR_IFC 0x0000800000000000
+#define TEXASR_ABT 0x0000000100000000
+#define TEXASR_SPD 0x0000000080000000
+#define TEXASR_HV 0x0000000020000000
+#define TEXASR_PR 0x0000000010000000
+#define TEXASR_FS 0x0000000008000000
+#define TEXASR_TE 0x0000000004000000
+#define TEXASR_ROT 0x0000000002000000
+
+/* Vector Instructions */
+#define VSX_XX1(xs, ra, rb) (((xs) & 0x1f) << 21 | ((ra) << 16) | \
+ ((rb) << 11) | (((xs) >> 5)))
+#define STXVD2X(xs, ra, rb) .long (0x7c000798 | VSX_XX1((xs), (ra), (rb)))
+#define LXVD2X(xs, ra, rb) .long (0x7c000698 | VSX_XX1((xs), (ra), (rb)))
+
+#define ASM_LOAD_GPR_IMMED(_asm_symbol_name_immed) \
+ "li 14, %[" #_asm_symbol_name_immed "];" \
+ "li 15, %[" #_asm_symbol_name_immed "];" \
+ "li 16, %[" #_asm_symbol_name_immed "];" \
+ "li 17, %[" #_asm_symbol_name_immed "];" \
+ "li 18, %[" #_asm_symbol_name_immed "];" \
+ "li 19, %[" #_asm_symbol_name_immed "];" \
+ "li 20, %[" #_asm_symbol_name_immed "];" \
+ "li 21, %[" #_asm_symbol_name_immed "];" \
+ "li 22, %[" #_asm_symbol_name_immed "];" \
+ "li 23, %[" #_asm_symbol_name_immed "];" \
+ "li 24, %[" #_asm_symbol_name_immed "];" \
+ "li 25, %[" #_asm_symbol_name_immed "];" \
+ "li 26, %[" #_asm_symbol_name_immed "];" \
+ "li 27, %[" #_asm_symbol_name_immed "];" \
+ "li 28, %[" #_asm_symbol_name_immed "];" \
+ "li 29, %[" #_asm_symbol_name_immed "];" \
+ "li 30, %[" #_asm_symbol_name_immed "];" \
+ "li 31, %[" #_asm_symbol_name_immed "];"
+
+#define ASM_LOAD_FPR_SINGLE_PRECISION(_asm_symbol_name_addr) \
+ "lfs 0, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 1, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 2, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 3, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 4, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 5, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 6, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 7, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 8, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 9, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 10, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 11, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 12, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 13, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 14, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 15, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 16, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 17, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 18, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 19, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 20, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 21, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 22, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 23, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 24, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 25, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 26, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 27, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 28, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 29, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 30, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfs 31, 0(%[" #_asm_symbol_name_addr "]);"
+
+#ifndef __ASSEMBLER__
+void store_gpr(unsigned long *addr);
+void load_gpr(unsigned long *addr);
+void load_fpr_single_precision(float *addr);
+void store_fpr_single_precision(float *addr);
+#endif /* end of __ASSEMBLER__ */
+
+#endif /* _SELFTESTS_POWERPC_REG_H */
diff --git a/tools/testing/selftests/powerpc/include/subunit.h b/tools/testing/selftests/powerpc/include/subunit.h
new file mode 100644
index 000000000..9c6c4e901
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/subunit.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_SUBUNIT_H
+#define _SELFTESTS_POWERPC_SUBUNIT_H
+
+static inline void test_start(char *name)
+{
+ printf("test: %s\n", name);
+}
+
+static inline void test_failure_detail(char *name, char *detail)
+{
+ printf("failure: %s [%s]\n", name, detail);
+}
+
+static inline void test_failure(char *name)
+{
+ printf("failure: %s\n", name);
+}
+
+static inline void test_error(char *name)
+{
+ printf("error: %s\n", name);
+}
+
+static inline void test_skip(char *name)
+{
+ printf("skip: %s\n", name);
+}
+
+static inline void test_success(char *name)
+{
+ printf("success: %s\n", name);
+}
+
+static inline void test_finish(char *name, int status)
+{
+ if (status)
+ test_failure(name);
+ else
+ test_success(name);
+}
+
+static inline void test_set_git_version(char *value)
+{
+ printf("tags: git_version:%s\n", value);
+}
+
+#endif /* _SELFTESTS_POWERPC_SUBUNIT_H */
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
new file mode 100644
index 000000000..c58c37082
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_UTILS_H
+#define _SELFTESTS_POWERPC_UTILS_H
+
+#define __cacheline_aligned __attribute__((aligned(128)))
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <linux/auxvec.h>
+#include "reg.h"
+
+/* Avoid headaches with PRI?64 - just use %ll? always */
+typedef unsigned long long u64;
+typedef signed long long s64;
+
+/* Just for familiarity */
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+
+void test_harness_set_timeout(uint64_t time);
+int test_harness(int (test_function)(void), char *name);
+
+int read_auxv(char *buf, ssize_t buf_size);
+void *find_auxv_entry(int type, char *auxv);
+void *get_auxv_entry(int type);
+
+int pick_online_cpu(void);
+
+static inline bool have_hwcap(unsigned long ftr)
+{
+ return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
+}
+
+#ifdef AT_HWCAP2
+static inline bool have_hwcap2(unsigned long ftr2)
+{
+ return ((unsigned long)get_auxv_entry(AT_HWCAP2) & ftr2) == ftr2;
+}
+#else
+static inline bool have_hwcap2(unsigned long ftr2)
+{
+ return false;
+}
+#endif
+
+bool is_ppc64le(void);
+
+/* Yes, this is evil */
+#define FAIL_IF(x) \
+do { \
+ if ((x)) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d\n", __LINE__); \
+ return 1; \
+ } \
+} while (0)
+
+/* The test harness uses this, yes it's gross */
+#define MAGIC_SKIP_RETURN_VALUE 99
+
+#define SKIP_IF(x) \
+do { \
+ if ((x)) { \
+ fprintf(stderr, \
+ "[SKIP] Test skipped on line %d\n", __LINE__); \
+ return MAGIC_SKIP_RETURN_VALUE; \
+ } \
+} while (0)
+
+#define _str(s) #s
+#define str(s) _str(s)
+
+/* POWER9 feature */
+#ifndef PPC_FEATURE2_ARCH_3_00
+#define PPC_FEATURE2_ARCH_3_00 0x00800000
+#endif
+
+#endif /* _SELFTESTS_POWERPC_UTILS_H */
diff --git a/tools/testing/selftests/powerpc/include/vmx_asm.h b/tools/testing/selftests/powerpc/include/vmx_asm.h
new file mode 100644
index 000000000..2eaaeca9c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/vmx_asm.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+
+/* POS MUST BE 16 ALIGNED! */
+#define PUSH_VMX(pos,reg) \
+ li reg,pos; \
+ stvx v20,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v21,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v22,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v23,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v24,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v25,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v26,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v27,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v28,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v29,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v30,reg,%r1; \
+ addi reg,reg,16; \
+ stvx v31,reg,%r1;
+
+/* POS MUST BE 16 ALIGNED! */
+#define POP_VMX(pos,reg) \
+ li reg,pos; \
+ lvx v20,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v21,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v22,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v23,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v24,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v25,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v26,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v27,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v28,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v29,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v30,reg,%r1; \
+ addi reg,reg,16; \
+ lvx v31,reg,%r1;
+
+/*
+ * Careful this will 'clobber' vmx (by design)
+ * Don't call this from C
+ */
+FUNC_START(load_vmx)
+ li r5,0
+ lvx v20,r5,r3
+ addi r5,r5,16
+ lvx v21,r5,r3
+ addi r5,r5,16
+ lvx v22,r5,r3
+ addi r5,r5,16
+ lvx v23,r5,r3
+ addi r5,r5,16
+ lvx v24,r5,r3
+ addi r5,r5,16
+ lvx v25,r5,r3
+ addi r5,r5,16
+ lvx v26,r5,r3
+ addi r5,r5,16
+ lvx v27,r5,r3
+ addi r5,r5,16
+ lvx v28,r5,r3
+ addi r5,r5,16
+ lvx v29,r5,r3
+ addi r5,r5,16
+ lvx v30,r5,r3
+ addi r5,r5,16
+ lvx v31,r5,r3
+ blr
+FUNC_END(load_vmx)
diff --git a/tools/testing/selftests/powerpc/include/vsx_asm.h b/tools/testing/selftests/powerpc/include/vsx_asm.h
new file mode 100644
index 000000000..54064ced9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/vsx_asm.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+
+/*
+ * Careful this will 'clobber' vsx (by design), VSX are always
+ * volatile though so unlike vmx this isn't so much of an issue
+ * Still should avoid calling from C
+ */
+FUNC_START(load_vsx)
+ li r5,0
+ lxvd2x vs20,r5,r3
+ addi r5,r5,16
+ lxvd2x vs21,r5,r3
+ addi r5,r5,16
+ lxvd2x vs22,r5,r3
+ addi r5,r5,16
+ lxvd2x vs23,r5,r3
+ addi r5,r5,16
+ lxvd2x vs24,r5,r3
+ addi r5,r5,16
+ lxvd2x vs25,r5,r3
+ addi r5,r5,16
+ lxvd2x vs26,r5,r3
+ addi r5,r5,16
+ lxvd2x vs27,r5,r3
+ addi r5,r5,16
+ lxvd2x vs28,r5,r3
+ addi r5,r5,16
+ lxvd2x vs29,r5,r3
+ addi r5,r5,16
+ lxvd2x vs30,r5,r3
+ addi r5,r5,16
+ lxvd2x vs31,r5,r3
+ blr
+FUNC_END(load_vsx)
+
+FUNC_START(store_vsx)
+ li r5,0
+ stxvd2x vs20,r5,r3
+ addi r5,r5,16
+ stxvd2x vs21,r5,r3
+ addi r5,r5,16
+ stxvd2x vs22,r5,r3
+ addi r5,r5,16
+ stxvd2x vs23,r5,r3
+ addi r5,r5,16
+ stxvd2x vs24,r5,r3
+ addi r5,r5,16
+ stxvd2x vs25,r5,r3
+ addi r5,r5,16
+ stxvd2x vs26,r5,r3
+ addi r5,r5,16
+ stxvd2x vs27,r5,r3
+ addi r5,r5,16
+ stxvd2x vs28,r5,r3
+ addi r5,r5,16
+ stxvd2x vs29,r5,r3
+ addi r5,r5,16
+ stxvd2x vs30,r5,r3
+ addi r5,r5,16
+ stxvd2x vs31,r5,r3
+ blr
+FUNC_END(store_vsx)
diff --git a/tools/testing/selftests/powerpc/lib/reg.S b/tools/testing/selftests/powerpc/lib/reg.S
new file mode 100644
index 000000000..0dc44f0da
--- /dev/null
+++ b/tools/testing/selftests/powerpc/lib/reg.S
@@ -0,0 +1,397 @@
+/*
+ * test helper assembly functions
+ *
+ * Copyright (C) 2016 Simon Guo, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <ppc-asm.h>
+#include "reg.h"
+
+
+/* Non volatile GPR - unsigned long buf[18] */
+FUNC_START(load_gpr)
+ ld 14, 0*8(3)
+ ld 15, 1*8(3)
+ ld 16, 2*8(3)
+ ld 17, 3*8(3)
+ ld 18, 4*8(3)
+ ld 19, 5*8(3)
+ ld 20, 6*8(3)
+ ld 21, 7*8(3)
+ ld 22, 8*8(3)
+ ld 23, 9*8(3)
+ ld 24, 10*8(3)
+ ld 25, 11*8(3)
+ ld 26, 12*8(3)
+ ld 27, 13*8(3)
+ ld 28, 14*8(3)
+ ld 29, 15*8(3)
+ ld 30, 16*8(3)
+ ld 31, 17*8(3)
+ blr
+FUNC_END(load_gpr)
+
+FUNC_START(store_gpr)
+ std 14, 0*8(3)
+ std 15, 1*8(3)
+ std 16, 2*8(3)
+ std 17, 3*8(3)
+ std 18, 4*8(3)
+ std 19, 5*8(3)
+ std 20, 6*8(3)
+ std 21, 7*8(3)
+ std 22, 8*8(3)
+ std 23, 9*8(3)
+ std 24, 10*8(3)
+ std 25, 11*8(3)
+ std 26, 12*8(3)
+ std 27, 13*8(3)
+ std 28, 14*8(3)
+ std 29, 15*8(3)
+ std 30, 16*8(3)
+ std 31, 17*8(3)
+ blr
+FUNC_END(store_gpr)
+
+/* Single Precision Float - float buf[32] */
+FUNC_START(load_fpr_single_precision)
+ lfs 0, 0*4(3)
+ lfs 1, 1*4(3)
+ lfs 2, 2*4(3)
+ lfs 3, 3*4(3)
+ lfs 4, 4*4(3)
+ lfs 5, 5*4(3)
+ lfs 6, 6*4(3)
+ lfs 7, 7*4(3)
+ lfs 8, 8*4(3)
+ lfs 9, 9*4(3)
+ lfs 10, 10*4(3)
+ lfs 11, 11*4(3)
+ lfs 12, 12*4(3)
+ lfs 13, 13*4(3)
+ lfs 14, 14*4(3)
+ lfs 15, 15*4(3)
+ lfs 16, 16*4(3)
+ lfs 17, 17*4(3)
+ lfs 18, 18*4(3)
+ lfs 19, 19*4(3)
+ lfs 20, 20*4(3)
+ lfs 21, 21*4(3)
+ lfs 22, 22*4(3)
+ lfs 23, 23*4(3)
+ lfs 24, 24*4(3)
+ lfs 25, 25*4(3)
+ lfs 26, 26*4(3)
+ lfs 27, 27*4(3)
+ lfs 28, 28*4(3)
+ lfs 29, 29*4(3)
+ lfs 30, 30*4(3)
+ lfs 31, 31*4(3)
+ blr
+FUNC_END(load_fpr_single_precision)
+
+/* Single Precision Float - float buf[32] */
+FUNC_START(store_fpr_single_precision)
+ stfs 0, 0*4(3)
+ stfs 1, 1*4(3)
+ stfs 2, 2*4(3)
+ stfs 3, 3*4(3)
+ stfs 4, 4*4(3)
+ stfs 5, 5*4(3)
+ stfs 6, 6*4(3)
+ stfs 7, 7*4(3)
+ stfs 8, 8*4(3)
+ stfs 9, 9*4(3)
+ stfs 10, 10*4(3)
+ stfs 11, 11*4(3)
+ stfs 12, 12*4(3)
+ stfs 13, 13*4(3)
+ stfs 14, 14*4(3)
+ stfs 15, 15*4(3)
+ stfs 16, 16*4(3)
+ stfs 17, 17*4(3)
+ stfs 18, 18*4(3)
+ stfs 19, 19*4(3)
+ stfs 20, 20*4(3)
+ stfs 21, 21*4(3)
+ stfs 22, 22*4(3)
+ stfs 23, 23*4(3)
+ stfs 24, 24*4(3)
+ stfs 25, 25*4(3)
+ stfs 26, 26*4(3)
+ stfs 27, 27*4(3)
+ stfs 28, 28*4(3)
+ stfs 29, 29*4(3)
+ stfs 30, 30*4(3)
+ stfs 31, 31*4(3)
+ blr
+FUNC_END(store_fpr_single_precision)
+
+/* VMX/VSX registers - unsigned long buf[128] */
+FUNC_START(loadvsx)
+ lis 4, 0
+ LXVD2X (0,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (1,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (2,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (3,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (4,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (5,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (6,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (7,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (8,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (9,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (10,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (11,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (12,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (13,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (14,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (15,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (16,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (17,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (18,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (19,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (20,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (21,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (22,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (23,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (24,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (25,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (26,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (27,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (28,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (29,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (30,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (31,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (32,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (33,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (34,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (35,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (36,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (37,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (38,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (39,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (40,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (41,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (42,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (43,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (44,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (45,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (46,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (47,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (48,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (49,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (50,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (51,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (52,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (53,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (54,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (55,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (56,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (57,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (58,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (59,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (60,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (61,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (62,(4),(3))
+ addi 4, 4, 16
+ LXVD2X (63,(4),(3))
+ blr
+FUNC_END(loadvsx)
+
+FUNC_START(storevsx)
+ lis 4, 0
+ STXVD2X (0,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (1,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (2,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (3,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (4,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (5,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (6,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (7,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (8,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (9,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (10,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (11,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (12,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (13,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (14,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (15,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (16,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (17,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (18,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (19,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (20,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (21,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (22,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (23,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (24,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (25,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (26,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (27,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (28,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (29,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (30,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (31,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (32,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (33,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (34,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (35,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (36,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (37,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (38,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (39,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (40,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (41,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (42,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (43,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (44,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (45,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (46,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (47,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (48,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (49,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (50,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (51,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (52,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (53,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (54,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (55,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (56,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (57,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (58,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (59,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (60,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (61,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (62,(4),(3))
+ addi 4, 4, 16
+ STXVD2X (63,(4),(3))
+ blr
+FUNC_END(storevsx)
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
new file mode 100644
index 000000000..50ded63e2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -0,0 +1,7 @@
+fpu_syscall
+vmx_syscall
+fpu_preempt
+vmx_preempt
+fpu_signal
+vmx_signal
+vsx_preempt
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
new file mode 100644
index 000000000..11a10d7a2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
+
+$(OUTPUT)/fpu_syscall: fpu_asm.S
+$(OUTPUT)/fpu_preempt: fpu_asm.S
+$(OUTPUT)/fpu_signal: fpu_asm.S
+
+$(OUTPUT)/vmx_syscall: vmx_asm.S
+$(OUTPUT)/vmx_preempt: vmx_asm.S
+$(OUTPUT)/vmx_signal: vmx_asm.S
+
+$(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
+$(OUTPUT)/vsx_preempt: vsx_asm.S
diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S
new file mode 100644
index 000000000..8a04bb117
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_asm.S
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+#include "fpu_asm.h"
+
+FUNC_START(check_fpu)
+ mr r4,r3
+ li r3,1 # assume a bad result
+ lfd f0,0(r4)
+ fcmpu cr1,f0,f14
+ bne cr1,1f
+ lfd f0,8(r4)
+ fcmpu cr1,f0,f15
+ bne cr1,1f
+ lfd f0,16(r4)
+ fcmpu cr1,f0,f16
+ bne cr1,1f
+ lfd f0,24(r4)
+ fcmpu cr1,f0,f17
+ bne cr1,1f
+ lfd f0,32(r4)
+ fcmpu cr1,f0,f18
+ bne cr1,1f
+ lfd f0,40(r4)
+ fcmpu cr1,f0,f19
+ bne cr1,1f
+ lfd f0,48(r4)
+ fcmpu cr1,f0,f20
+ bne cr1,1f
+ lfd f0,56(r4)
+ fcmpu cr1,f0,f21
+ bne cr1,1f
+ lfd f0,64(r4)
+ fcmpu cr1,f0,f22
+ bne cr1,1f
+ lfd f0,72(r4)
+ fcmpu cr1,f0,f23
+ bne cr1,1f
+ lfd f0,80(r4)
+ fcmpu cr1,f0,f24
+ bne cr1,1f
+ lfd f0,88(r4)
+ fcmpu cr1,f0,f25
+ bne cr1,1f
+ lfd f0,96(r4)
+ fcmpu cr1,f0,f26
+ bne cr1,1f
+ lfd f0,104(r4)
+ fcmpu cr1,f0,f27
+ bne cr1,1f
+ lfd f0,112(r4)
+ fcmpu cr1,f0,f28
+ bne cr1,1f
+ lfd f0,120(r4)
+ fcmpu cr1,f0,f29
+ bne cr1,1f
+ lfd f0,128(r4)
+ fcmpu cr1,f0,f30
+ bne cr1,1f
+ lfd f0,136(r4)
+ fcmpu cr1,f0,f31
+ bne cr1,1f
+ li r3,0 # Success!!!
+1: blr
+
+FUNC_START(test_fpu)
+ # r3 holds pointer to where to put the result of fork
+ # r4 holds pointer to the pid
+ # f14-f31 are non volatiles
+ PUSH_BASIC_STACK(256)
+ PUSH_FPU(256)
+ std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray
+ std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid
+
+ bl load_fpu
+ nop
+ li r0,__NR_fork
+ sc
+
+ # pass the result of the fork to the caller
+ ld r9,STACK_FRAME_PARAM(1)(sp)
+ std r3,0(r9)
+
+ ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_fpu
+ nop
+
+ POP_FPU(256)
+ POP_BASIC_STACK(256)
+ blr
+FUNC_END(test_fpu)
+
+# int preempt_fpu(double *darray, int *threads_running, int *running)
+# On starting will (atomically) decrement not_ready as a signal that the FPU
+# has been loaded with darray. Will proceed to check the validity of the FPU
+# registers while running is not zero.
+FUNC_START(preempt_fpu)
+ PUSH_BASIC_STACK(256)
+ PUSH_FPU(256)
+ std r3,STACK_FRAME_PARAM(0)(sp) # double *darray
+ std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+ std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+
+ bl load_fpu
+ nop
+
+ sync
+ # Atomic DEC
+ ld r3,STACK_FRAME_PARAM(1)(sp)
+1: lwarx r4,0,r3
+ addi r4,r4,-1
+ stwcx. r4,0,r3
+ bne- 1b
+
+2: ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_fpu
+ nop
+ cmpdi r3,0
+ bne 3f
+ ld r4,STACK_FRAME_PARAM(2)(sp)
+ ld r5,0(r4)
+ cmpwi r5,0
+ bne 2b
+
+3: POP_FPU(256)
+ POP_BASIC_STACK(256)
+ blr
+FUNC_END(preempt_fpu)
diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c
new file mode 100644
index 000000000..0f85b79d8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers change across preemption.
+ * Two things should be noted here a) The check_fpu function in asm only checks
+ * the non volatile registers as it is reused from the syscall test b) There is
+ * no way to be sure preemption happened so this test just uses many threads
+ * and a long wait. As such, a successful test doesn't mean much but a failure
+ * is bad.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+ 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+ 2.1};
+
+int threads_starting;
+int running;
+
+extern void preempt_fpu(double *darray, int *threads_starting, int *running);
+
+void *preempt_fpu_c(void *p)
+{
+ int i;
+ srand(pthread_self());
+ for (i = 0; i < 21; i++)
+ darray[i] = rand();
+
+ /* Test failed if it ever returns */
+ preempt_fpu(darray, &threads_starting, &running);
+
+ return p;
+}
+
+int test_preempt_fpu(void)
+{
+ int i, rc, threads;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc((threads) * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, preempt_fpu_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ /* Not really necessary but nice to wait for every thread to start */
+ printf("\tWaiting for all workers to start...");
+ while(threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
+ sleep(PREEMPT_TIME);
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ /*
+ * Working are checking this value every loop. In preempt_fpu 'cmpwi r5,0; bne 2b'.
+ * r5 will have loaded the value of running.
+ */
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ void *rc_p;
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why preempt_fpu
+ * returned
+ */
+ if ((long) rc_p)
+ printf("oops\n");
+ FAIL_IF((long) rc_p);
+ }
+ printf("done\n");
+
+ free(tids);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_preempt_fpu, "fpu_preempt");
+}
diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c
new file mode 100644
index 000000000..888aa51b4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_signal.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers are correctly reported in a
+ * signal context. Each worker just spins checking its FPU registers, at some
+ * point a signal will interrupt it and C code will check the signal context
+ * ensuring it is also the same.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Number of times each thread should receive the signal */
+#define ITERATIONS 10
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+ 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+ 2.1};
+
+bool bad_context;
+int threads_starting;
+int running;
+
+extern long preempt_fpu(double *darray, int *threads_starting, int *running);
+
+void signal_fpu_sig(int sig, siginfo_t *info, void *context)
+{
+ int i;
+ ucontext_t *uc = context;
+ mcontext_t *mc = &uc->uc_mcontext;
+
+ /* Only the non volatiles were loaded up */
+ for (i = 14; i < 32; i++) {
+ if (mc->fp_regs[i] != darray[i - 14]) {
+ bad_context = true;
+ break;
+ }
+ }
+}
+
+void *signal_fpu_c(void *p)
+{
+ int i;
+ long rc;
+ struct sigaction act;
+ act.sa_sigaction = signal_fpu_sig;
+ act.sa_flags = SA_SIGINFO;
+ rc = sigaction(SIGUSR1, &act, NULL);
+ if (rc)
+ return p;
+
+ srand(pthread_self());
+ for (i = 0; i < 21; i++)
+ darray[i] = rand();
+
+ rc = preempt_fpu(darray, &threads_starting, &running);
+
+ return (void *) rc;
+}
+
+int test_signal_fpu(void)
+{
+ int i, j, rc, threads;
+ void *rc_p;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, signal_fpu_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ printf("\tWaiting for all workers to start...");
+ while (threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tSending signals to all threads %d times...", ITERATIONS);
+ for (i = 0; i < ITERATIONS; i++) {
+ for (j = 0; j < threads; j++) {
+ pthread_kill(tids[j], SIGUSR1);
+ }
+ sleep(1);
+ }
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why signal_fpu
+ * returned
+ */
+ if ((long) rc_p || bad_context)
+ printf("oops\n");
+ if (bad_context)
+ fprintf(stderr, "\t!! bad_context is true\n");
+ FAIL_IF((long) rc_p || bad_context);
+ }
+ printf("done\n");
+
+ free(tids);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_signal_fpu, "fpu_signal");
+}
diff --git a/tools/testing/selftests/powerpc/math/fpu_syscall.c b/tools/testing/selftests/powerpc/math/fpu_syscall.c
new file mode 100644
index 000000000..949e67212
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_syscall.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers change across a syscall (fork).
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+
+#include "utils.h"
+
+extern int test_fpu(double *darray, pid_t *pid);
+
+double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+ 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+ 2.1};
+
+int syscall_fpu(void)
+{
+ pid_t fork_pid;
+ int i;
+ int ret;
+ int child_ret;
+ for (i = 0; i < 1000; i++) {
+ /* test_fpu will fork() */
+ ret = test_fpu(darray, &fork_pid);
+ if (fork_pid == -1)
+ return -1;
+ if (fork_pid == 0)
+ exit(ret);
+ waitpid(fork_pid, &child_ret, 0);
+ if (ret || child_ret)
+ return 1;
+ }
+
+ return 0;
+}
+
+int test_syscall_fpu(void)
+{
+ /*
+ * Setup an environment with much context switching
+ */
+ pid_t pid2;
+ pid_t pid = fork();
+ int ret;
+ int child_ret;
+ FAIL_IF(pid == -1);
+
+ pid2 = fork();
+ /* Can't FAIL_IF(pid2 == -1); because already forked once */
+ if (pid2 == -1) {
+ /*
+ * Couldn't fork, ensure test is a fail
+ */
+ child_ret = ret = 1;
+ } else {
+ ret = syscall_fpu();
+ if (pid2)
+ waitpid(pid2, &child_ret, 0);
+ else
+ exit(ret);
+ }
+
+ ret |= child_ret;
+
+ if (pid)
+ waitpid(pid, &child_ret, 0);
+ else
+ exit(ret);
+
+ FAIL_IF(ret || child_ret);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_syscall_fpu, "syscall_fpu");
+
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_asm.S b/tools/testing/selftests/powerpc/math/vmx_asm.S
new file mode 100644
index 000000000..cb1e5ae1b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_asm.S
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+#include "vmx_asm.h"
+
+# Should be safe from C, only touches r4, r5 and v0,v1,v2
+FUNC_START(check_vmx)
+ PUSH_BASIC_STACK(32)
+ mr r4,r3
+ li r3,1 # assume a bad result
+ li r5,0
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v20
+ vmr v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v21
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v22
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v23
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v24
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v25
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v26
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v27
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v28
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v29
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v30
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v31
+ vand v2,v2,v1
+
+ li r5,STACK_FRAME_LOCAL(0,0)
+ stvx v2,r5,sp
+ ldx r0,r5,sp
+ cmpdi r0,0xffffffffffffffff
+ bne 1f
+ li r3,0
+1: POP_BASIC_STACK(32)
+ blr
+FUNC_END(check_vmx)
+
+# Safe from C
+FUNC_START(test_vmx)
+ # r3 holds pointer to where to put the result of fork
+ # r4 holds pointer to the pid
+ # v20-v31 are non-volatile
+ PUSH_BASIC_STACK(512)
+ std r3,STACK_FRAME_PARAM(0)(sp) # Address of varray
+ std r4,STACK_FRAME_PARAM(1)(sp) # address of pid
+ PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4)
+
+ bl load_vmx
+ nop
+
+ li r0,__NR_fork
+ sc
+ # Pass the result of fork back to the caller
+ ld r9,STACK_FRAME_PARAM(1)(sp)
+ std r3,0(r9)
+
+ ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_vmx
+ nop
+
+ POP_VMX(STACK_FRAME_LOCAL(2,0),r4)
+ POP_BASIC_STACK(512)
+ blr
+FUNC_END(test_vmx)
+
+# int preempt_vmx(vector int *varray, int *threads_starting, int *running)
+# On starting will (atomically) decrement threads_starting as a signal that
+# the VMX have been loaded with varray. Will proceed to check the validity of
+# the VMX registers while running is not zero.
+FUNC_START(preempt_vmx)
+ PUSH_BASIC_STACK(512)
+ std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
+ std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+ std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+ # VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0)
+ PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4)
+
+ bl load_vmx
+ nop
+
+ sync
+ # Atomic DEC
+ ld r3,STACK_FRAME_PARAM(1)(sp)
+1: lwarx r4,0,r3
+ addi r4,r4,-1
+ stwcx. r4,0,r3
+ bne- 1b
+
+2: ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_vmx
+ nop
+ cmpdi r3,0
+ bne 3f
+ ld r4,STACK_FRAME_PARAM(2)(sp)
+ ld r5,0(r4)
+ cmpwi r5,0
+ bne 2b
+
+3: POP_VMX(STACK_FRAME_LOCAL(4,0),r4)
+ POP_BASIC_STACK(512)
+ blr
+FUNC_END(preempt_vmx)
diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c
new file mode 100644
index 000000000..9ef376c55
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers change across preemption.
+ * Two things should be noted here a) The check_vmx function in asm only checks
+ * the non volatile registers as it is reused from the syscall test b) There is
+ * no way to be sure preemption happened so this test just uses many threads
+ * and a long wait. As such, a successful test doesn't mean much but a failure
+ * is bad.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+int threads_starting;
+int running;
+
+extern void preempt_vmx(vector int *varray, int *threads_starting, int *running);
+
+void *preempt_vmx_c(void *p)
+{
+ int i, j;
+ srand(pthread_self());
+ for (i = 0; i < 12; i++)
+ for (j = 0; j < 4; j++)
+ varray[i][j] = rand();
+
+ /* Test fails if it ever returns */
+ preempt_vmx(varray, &threads_starting, &running);
+ return p;
+}
+
+int test_preempt_vmx(void)
+{
+ int i, rc, threads;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, preempt_vmx_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ /* Not really nessesary but nice to wait for every thread to start */
+ printf("\tWaiting for all workers to start...");
+ while(threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
+ sleep(PREEMPT_TIME);
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ /*
+ * Working are checking this value every loop. In preempt_vmx 'cmpwi r5,0; bne 2b'.
+ * r5 will have loaded the value of running.
+ */
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ void *rc_p;
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why preempt_vmx
+ * returned
+ */
+ if ((long) rc_p)
+ printf("oops\n");
+ FAIL_IF((long) rc_p);
+ }
+ printf("done\n");
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_preempt_vmx, "vmx_preempt");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c
new file mode 100644
index 000000000..671d7533a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_signal.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers are correctly reported in a
+ * signal context. Each worker just spins checking its VMX registers, at some
+ * point a signal will interrupt it and C code will check the signal context
+ * ensuring it is also the same.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <altivec.h>
+
+#include "utils.h"
+
+/* Number of times each thread should receive the signal */
+#define ITERATIONS 10
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+bool bad_context;
+int running;
+int threads_starting;
+
+extern int preempt_vmx(vector int *varray, int *threads_starting, int *sentinal);
+
+void signal_vmx_sig(int sig, siginfo_t *info, void *context)
+{
+ int i;
+ ucontext_t *uc = context;
+ mcontext_t *mc = &uc->uc_mcontext;
+
+ /* Only the non volatiles were loaded up */
+ for (i = 20; i < 32; i++) {
+ if (memcmp(mc->v_regs->vrregs[i], &varray[i - 20], 16)) {
+ int j;
+ /*
+ * Shouldn't printf() in a signal handler, however, this is a
+ * test and we've detected failure. Understanding what failed
+ * is paramount. All that happens after this is tests exit with
+ * failure.
+ */
+ printf("VMX mismatch at reg %d!\n", i);
+ printf("Reg | Actual | Expected\n");
+ for (j = 20; j < 32; j++) {
+ printf("%d | 0x%04x%04x%04x%04x | 0x%04x%04x%04x%04x\n", j, mc->v_regs->vrregs[j][0],
+ mc->v_regs->vrregs[j][1], mc->v_regs->vrregs[j][2], mc->v_regs->vrregs[j][3],
+ varray[j - 20][0], varray[j - 20][1], varray[j - 20][2], varray[j - 20][3]);
+ }
+ bad_context = true;
+ break;
+ }
+ }
+}
+
+void *signal_vmx_c(void *p)
+{
+ int i, j;
+ long rc;
+ struct sigaction act;
+ act.sa_sigaction = signal_vmx_sig;
+ act.sa_flags = SA_SIGINFO;
+ rc = sigaction(SIGUSR1, &act, NULL);
+ if (rc)
+ return p;
+
+ srand(pthread_self());
+ for (i = 0; i < 12; i++)
+ for (j = 0; j < 4; j++)
+ varray[i][j] = rand();
+
+ rc = preempt_vmx(varray, &threads_starting, &running);
+
+ return (void *) rc;
+}
+
+int test_signal_vmx(void)
+{
+ int i, j, rc, threads;
+ void *rc_p;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, signal_vmx_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ printf("\tWaiting for %d workers to start... %d", threads, threads_starting);
+ while (threads_starting) {
+ asm volatile("": : :"memory");
+ usleep(1000);
+ printf(", %d", threads_starting);
+ }
+ printf(" ...done\n");
+
+ printf("\tSending signals to all threads %d times...", ITERATIONS);
+ for (i = 0; i < ITERATIONS; i++) {
+ for (j = 0; j < threads; j++) {
+ pthread_kill(tids[j], SIGUSR1);
+ }
+ sleep(1);
+ }
+ printf("done\n");
+
+ printf("\tKilling workers...");
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why signal_vmx
+ * returned
+ */
+ if ((long) rc_p || bad_context)
+ printf("oops\n");
+ if (bad_context)
+ fprintf(stderr, "\t!! bad_context is true\n");
+ FAIL_IF((long) rc_p || bad_context);
+ }
+ printf("done\n");
+
+ free(tids);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_signal_vmx, "vmx_signal");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c
new file mode 100644
index 000000000..a017918ee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers change across a syscall (fork).
+ */
+
+#include <altivec.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "utils.h"
+
+vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+extern int test_vmx(vector int *varray, pid_t *pid);
+
+int vmx_syscall(void)
+{
+ pid_t fork_pid;
+ int i;
+ int ret;
+ int child_ret;
+ for (i = 0; i < 1000; i++) {
+ /* test_vmx will fork() */
+ ret = test_vmx(varray, &fork_pid);
+ if (fork_pid == -1)
+ return -1;
+ if (fork_pid == 0)
+ exit(ret);
+ waitpid(fork_pid, &child_ret, 0);
+ if (ret || child_ret)
+ return 1;
+ }
+
+ return 0;
+}
+
+int test_vmx_syscall(void)
+{
+ /*
+ * Setup an environment with much context switching
+ */
+ pid_t pid2;
+ pid_t pid = fork();
+ int ret;
+ int child_ret;
+ FAIL_IF(pid == -1);
+
+ pid2 = fork();
+ ret = vmx_syscall();
+ /* Can't FAIL_IF(pid2 == -1); because we've already forked */
+ if (pid2 == -1) {
+ /*
+ * Couldn't fork, ensure child_ret is set and is a fail
+ */
+ ret = child_ret = 1;
+ } else {
+ if (pid2)
+ waitpid(pid2, &child_ret, 0);
+ else
+ exit(ret);
+ }
+
+ ret |= child_ret;
+
+ if (pid)
+ waitpid(pid, &child_ret, 0);
+ else
+ exit(ret);
+
+ FAIL_IF(ret || child_ret);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_vmx_syscall, "vmx_syscall");
+
+}
diff --git a/tools/testing/selftests/powerpc/math/vsx_asm.S b/tools/testing/selftests/powerpc/math/vsx_asm.S
new file mode 100644
index 000000000..8f431f6ab
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vsx_asm.S
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+#include "vsx_asm.h"
+
+#long check_vsx(vector int *r3);
+#This function wraps storeing VSX regs to the end of an array and a
+#call to a comparison function in C which boils down to a memcmp()
+FUNC_START(check_vsx)
+ PUSH_BASIC_STACK(32)
+ std r3,STACK_FRAME_PARAM(0)(sp)
+ addi r3, r3, 16 * 12 #Second half of array
+ bl store_vsx
+ ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl vsx_memcmp
+ POP_BASIC_STACK(32)
+ blr
+FUNC_END(check_vsx)
+
+# int preempt_vmx(vector int *varray, int *threads_starting,
+# int *running);
+# On starting will (atomically) decrement threads_starting as a signal
+# that the VMX have been loaded with varray. Will proceed to check the
+# validity of the VMX registers while running is not zero.
+FUNC_START(preempt_vsx)
+ PUSH_BASIC_STACK(512)
+ std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
+ std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+ std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+
+ bl load_vsx
+ nop
+
+ sync
+ # Atomic DEC
+ ld r3,STACK_FRAME_PARAM(1)(sp)
+1: lwarx r4,0,r3
+ addi r4,r4,-1
+ stwcx. r4,0,r3
+ bne- 1b
+
+2: ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_vsx
+ nop
+ cmpdi r3,0
+ bne 3f
+ ld r4,STACK_FRAME_PARAM(2)(sp)
+ ld r5,0(r4)
+ cmpwi r5,0
+ bne 2b
+
+3: POP_BASIC_STACK(512)
+ blr
+FUNC_END(preempt_vsx)
diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c
new file mode 100644
index 000000000..6387f03a0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VSX registers change across preemption.
+ * There is no way to be sure preemption happened so this test just
+ * uses many threads and a long wait. As such, a successful test
+ * doesn't mean much but a failure is bad.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+/*
+ * Ensure there is twice the number of non-volatile VMX regs!
+ * check_vmx() is going to use the other half as space to put the live
+ * registers before calling vsx_memcmp()
+ */
+__thread vector int varray[24] = {
+ {1, 2, 3, 4 }, {5, 6, 7, 8 }, {9, 10,11,12},
+ {13,14,15,16}, {17,18,19,20}, {21,22,23,24},
+ {25,26,27,28}, {29,30,31,32}, {33,34,35,36},
+ {37,38,39,40}, {41,42,43,44}, {45,46,47,48}
+};
+
+int threads_starting;
+int running;
+
+extern long preempt_vsx(vector int *varray, int *threads_starting, int *running);
+
+long vsx_memcmp(vector int *a) {
+ vector int zero = {0, 0, 0, 0};
+ int i;
+
+ FAIL_IF(a != varray);
+
+ for(i = 0; i < 12; i++) {
+ if (memcmp(&a[i + 12], &zero, sizeof(vector int)) == 0) {
+ fprintf(stderr, "Detected zero from the VSX reg %d\n", i + 12);
+ return 2;
+ }
+ }
+
+ if (memcmp(a, &a[12], 12 * sizeof(vector int))) {
+ long *p = (long *)a;
+ fprintf(stderr, "VSX mismatch\n");
+ for (i = 0; i < 24; i=i+2)
+ fprintf(stderr, "%d: 0x%08lx%08lx | 0x%08lx%08lx\n",
+ i/2 + i%2 + 20, p[i], p[i + 1], p[i + 24], p[i + 25]);
+ return 1;
+ }
+ return 0;
+}
+
+void *preempt_vsx_c(void *p)
+{
+ int i, j;
+ long rc;
+ srand(pthread_self());
+ for (i = 0; i < 12; i++)
+ for (j = 0; j < 4; j++) {
+ varray[i][j] = rand();
+ /* Don't want zero because it hides kernel problems */
+ if (varray[i][j] == 0)
+ j--;
+ }
+ rc = preempt_vsx(varray, &threads_starting, &running);
+ if (rc == 2)
+ fprintf(stderr, "Caught zeros in VSX compares\n");
+ return (void *)rc;
+}
+
+int test_preempt_vsx(void)
+{
+ int i, rc, threads;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, preempt_vsx_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ /* Not really nessesary but nice to wait for every thread to start */
+ printf("\tWaiting for %d workers to start...", threads_starting);
+ while(threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
+ sleep(PREEMPT_TIME);
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ /*
+ * Working are checking this value every loop. In preempt_vsx 'cmpwi r5,0; bne 2b'.
+ * r5 will have loaded the value of running.
+ */
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ void *rc_p;
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why preempt_vsx
+ * returned
+ */
+ if ((long) rc_p)
+ printf("oops\n");
+ FAIL_IF((long) rc_p);
+ }
+ printf("done\n");
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_preempt_vsx, "vsx_preempt");
+}
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
new file mode 100644
index 000000000..7d7c42ed6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -0,0 +1,5 @@
+hugetlb_vs_thp_test
+subpage_prot
+tempfile
+prot_sao
+segv_errors \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
new file mode 100644
index 000000000..869628234
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+ $(MAKE) -C ../
+
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
+TEST_GEN_PROGS_EXTENDED := tlbie_test
+TEST_GEN_FILES := tempfile
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
+
+$(OUTPUT)/prot_sao: ../utils.c
+
+$(OUTPUT)/tempfile:
+ dd if=/dev/zero of=$@ bs=64k count=1
+
+$(OUTPUT)/tlbie_test: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c
new file mode 100644
index 000000000..9932359ce
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+/* This must match the huge page & THP size */
+#define SIZE (16 * 1024 * 1024)
+
+static int test_body(void)
+{
+ void *addr;
+ char *p;
+
+ addr = (void *)0xa0000000;
+
+ p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
+ MAP_HUGETLB | MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (p != MAP_FAILED) {
+ /*
+ * Typically the mmap will fail because no huge pages are
+ * allocated on the system. But if there are huge pages
+ * allocated the mmap will succeed. That's fine too, we just
+ * munmap here before continuing. munmap() length of
+ * MAP_HUGETLB memory must be hugepage aligned.
+ */
+ if (munmap(addr, SIZE)) {
+ perror("munmap");
+ return 1;
+ }
+ }
+
+ p = mmap(addr, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (p == MAP_FAILED) {
+ printf("Mapping failed @ %p\n", addr);
+ perror("mmap");
+ return 1;
+ }
+
+ /*
+ * Either a user or kernel access is sufficient to trigger the bug.
+ * A kernel access is easier to spot & debug, as it will trigger the
+ * softlockup or RCU stall detectors, and when the system is kicked
+ * into xmon we get a backtrace in the kernel.
+ *
+ * A good option is:
+ * getcwd(p, SIZE);
+ *
+ * For the purposes of this testcase it's preferable to spin in
+ * userspace, so the harness can kill us if we get stuck. That way we
+ * see a test failure rather than a dead system.
+ */
+ *p = 0xf;
+
+ munmap(addr, SIZE);
+
+ return 0;
+}
+
+static int test_main(void)
+{
+ int i;
+
+ /* 10,000 because it's a "bunch", and completes reasonably quickly */
+ for (i = 0; i < 10000; i++)
+ if (test_body())
+ return 1;
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_main, "hugetlb_vs_thp");
+}
diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c
new file mode 100644
index 000000000..611530d43
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/prot_sao.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2016, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include <asm/cputable.h>
+
+#include "utils.h"
+
+#define SIZE (64 * 1024)
+
+int test_prot_sao(void)
+{
+ char *p;
+
+ /* 2.06 or later should support SAO */
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+ /*
+ * Ensure we can ask for PROT_SAO.
+ * We can't really verify that it does the right thing, but at least we
+ * confirm the kernel will accept it.
+ */
+ p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE | PROT_SAO,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ FAIL_IF(p == MAP_FAILED);
+
+ /* Write to the mapping, to at least cause a fault */
+ memset(p, 0xaa, SIZE);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_prot_sao, "prot-sao");
+}
diff --git a/tools/testing/selftests/powerpc/mm/segv_errors.c b/tools/testing/selftests/powerpc/mm/segv_errors.c
new file mode 100644
index 000000000..06ae76ee3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/segv_errors.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2017 John Sperbeck
+ *
+ * Test that an access to a mapped but inaccessible area causes a SEGV and
+ * reports si_code == SEGV_ACCERR.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <assert.h>
+#include <ucontext.h>
+
+#include "utils.h"
+
+static bool faulted;
+static int si_code;
+
+static void segv_handler(int n, siginfo_t *info, void *ctxt_v)
+{
+ ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+ struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+ faulted = true;
+ si_code = info->si_code;
+ regs->nip += 4;
+}
+
+int test_segv_errors(void)
+{
+ struct sigaction act = {
+ .sa_sigaction = segv_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+ char c, *p = NULL;
+
+ p = mmap(NULL, getpagesize(), 0, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(p == MAP_FAILED);
+
+ FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
+
+ faulted = false;
+ si_code = 0;
+
+ /*
+ * We just need a compiler barrier, but mb() works and has the nice
+ * property of being easy to spot in the disassembly.
+ */
+ mb();
+ c = *p;
+ mb();
+
+ FAIL_IF(!faulted);
+ FAIL_IF(si_code != SEGV_ACCERR);
+
+ faulted = false;
+ si_code = 0;
+
+ mb();
+ *p = c;
+ mb();
+
+ FAIL_IF(!faulted);
+ FAIL_IF(si_code != SEGV_ACCERR);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_segv_errors, "segv_errors");
+}
diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c
new file mode 100644
index 000000000..3ae77ba93
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c
@@ -0,0 +1,236 @@
+/*
+ * Copyright IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+char *file_name;
+
+int in_test;
+volatile int faulted;
+volatile void *dar;
+int errors;
+
+static void segv(int signum, siginfo_t *info, void *ctxt_v)
+{
+ ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+ struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+ if (!in_test) {
+ fprintf(stderr, "Segfault outside of test !\n");
+ exit(1);
+ }
+
+ faulted = 1;
+ dar = (void *)regs->dar;
+ regs->nip += 4;
+}
+
+static inline void do_read(const volatile void *addr)
+{
+ int ret;
+
+ asm volatile("lwz %0,0(%1); twi 0,%0,0; isync;\n"
+ : "=r" (ret) : "r" (addr) : "memory");
+}
+
+static inline void do_write(const volatile void *addr)
+{
+ int val = 0x1234567;
+
+ asm volatile("stw %0,0(%1); sync; \n"
+ : : "r" (val), "r" (addr) : "memory");
+}
+
+static inline void check_faulted(void *addr, long page, long subpage, int write)
+{
+ int want_fault = (subpage == ((page + 3) % 16));
+
+ if (write)
+ want_fault |= (subpage == ((page + 1) % 16));
+
+ if (faulted != want_fault) {
+ printf("Failed at %p (p=%ld,sp=%ld,w=%d), want=%s, got=%s !\n",
+ addr, page, subpage, write,
+ want_fault ? "fault" : "pass",
+ faulted ? "fault" : "pass");
+ ++errors;
+ }
+
+ if (faulted) {
+ if (dar != addr) {
+ printf("Fault expected at %p and happened at %p !\n",
+ addr, dar);
+ }
+ faulted = 0;
+ asm volatile("sync" : : : "memory");
+ }
+}
+
+static int run_test(void *addr, unsigned long size)
+{
+ unsigned int *map;
+ long i, j, pages, err;
+
+ pages = size / 0x10000;
+ map = malloc(pages * 4);
+ assert(map);
+
+ /*
+ * for each page, mark subpage i % 16 read only and subpage
+ * (i + 3) % 16 inaccessible
+ */
+ for (i = 0; i < pages; i++) {
+ map[i] = (0x40000000 >> (((i + 1) * 2) % 32)) |
+ (0xc0000000 >> (((i + 3) * 2) % 32));
+ }
+
+ err = syscall(__NR_subpage_prot, addr, size, map);
+ if (err) {
+ perror("subpage_perm");
+ return 1;
+ }
+ free(map);
+
+ in_test = 1;
+ errors = 0;
+ for (i = 0; i < pages; i++) {
+ for (j = 0; j < 16; j++, addr += 0x1000) {
+ do_read(addr);
+ check_faulted(addr, i, j, 0);
+ do_write(addr);
+ check_faulted(addr, i, j, 1);
+ }
+ }
+
+ in_test = 0;
+ if (errors) {
+ printf("%d errors detected\n", errors);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int syscall_available(void)
+{
+ int rc;
+
+ errno = 0;
+ rc = syscall(__NR_subpage_prot, 0, 0, 0);
+
+ return rc == 0 || (errno != ENOENT && errno != ENOSYS);
+}
+
+int test_anon(void)
+{
+ unsigned long align;
+ struct sigaction act = {
+ .sa_sigaction = segv,
+ .sa_flags = SA_SIGINFO
+ };
+ void *mallocblock;
+ unsigned long mallocsize;
+
+ SKIP_IF(!syscall_available());
+
+ if (getpagesize() != 0x10000) {
+ fprintf(stderr, "Kernel page size must be 64K!\n");
+ return 1;
+ }
+
+ sigaction(SIGSEGV, &act, NULL);
+
+ mallocsize = 4 * 16 * 1024 * 1024;
+
+ FAIL_IF(posix_memalign(&mallocblock, 64 * 1024, mallocsize));
+
+ align = (unsigned long)mallocblock;
+ if (align & 0xffff)
+ align = (align | 0xffff) + 1;
+
+ mallocblock = (void *)align;
+
+ printf("allocated malloc block of 0x%lx bytes at %p\n",
+ mallocsize, mallocblock);
+
+ printf("testing malloc block...\n");
+
+ return run_test(mallocblock, mallocsize);
+}
+
+int test_file(void)
+{
+ struct sigaction act = {
+ .sa_sigaction = segv,
+ .sa_flags = SA_SIGINFO
+ };
+ void *fileblock;
+ off_t filesize;
+ int fd;
+
+ SKIP_IF(!syscall_available());
+
+ fd = open(file_name, O_RDWR);
+ if (fd == -1) {
+ perror("failed to open file");
+ return 1;
+ }
+ sigaction(SIGSEGV, &act, NULL);
+
+ filesize = lseek(fd, 0, SEEK_END);
+ if (filesize & 0xffff)
+ filesize &= ~0xfffful;
+
+ fileblock = mmap(NULL, filesize, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (fileblock == MAP_FAILED) {
+ perror("failed to map file");
+ return 1;
+ }
+ printf("allocated %s for 0x%lx bytes at %p\n",
+ file_name, filesize, fileblock);
+
+ printf("testing file map...\n");
+
+ return run_test(fileblock, filesize);
+}
+
+int main(int argc, char *argv[])
+{
+ int rc;
+
+ rc = test_harness(test_anon, "subpage_prot_anon");
+ if (rc)
+ return rc;
+
+ if (argc > 1)
+ file_name = argv[1];
+ else
+ file_name = "tempfile";
+
+ return test_harness(test_file, "subpage_prot_file");
+}
diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c
new file mode 100644
index 000000000..f85a0938a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2019, Nick Piggin, Gautham R. Shenoy, Aneesh Kumar K.V, IBM Corp.
+ */
+
+/*
+ *
+ * Test tlbie/mtpidr race. We have 4 threads doing flush/load/compare/store
+ * sequence in a loop. The same threads also rung a context switch task
+ * that does sched_yield() in loop.
+ *
+ * The snapshot thread mark the mmap area PROT_READ in between, make a copy
+ * and copy it back to the original area. This helps us to detect if any
+ * store continued to happen after we marked the memory PROT_READ.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <linux/futex.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <time.h>
+#include <stdarg.h>
+#include <sched.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/prctl.h>
+
+static inline void dcbf(volatile unsigned int *addr)
+{
+ __asm__ __volatile__ ("dcbf %y0; sync" : : "Z"(*(unsigned char *)addr) : "memory");
+}
+
+static void err_msg(char *msg)
+{
+
+ time_t now;
+ time(&now);
+ printf("=================================\n");
+ printf(" Error: %s\n", msg);
+ printf(" %s", ctime(&now));
+ printf("=================================\n");
+ exit(1);
+}
+
+static char *map1;
+static char *map2;
+static pid_t rim_process_pid;
+
+/*
+ * A "rim-sequence" is defined to be the sequence of the following
+ * operations performed on a memory word:
+ * 1) FLUSH the contents of that word.
+ * 2) LOAD the contents of that word.
+ * 3) COMPARE the contents of that word with the content that was
+ * previously stored at that word
+ * 4) STORE new content into that word.
+ *
+ * The threads in this test that perform the rim-sequence are termed
+ * as rim_threads.
+ */
+
+/*
+ * A "corruption" is defined to be the failed COMPARE operation in a
+ * rim-sequence.
+ *
+ * A rim_thread that detects a corruption informs about it to all the
+ * other rim_threads, and the mem_snapshot thread.
+ */
+static volatile unsigned int corruption_found;
+
+/*
+ * This defines the maximum number of rim_threads in this test.
+ *
+ * The THREAD_ID_BITS denote the number of bits required
+ * to represent the thread_ids [0..MAX_THREADS - 1].
+ * We are being a bit paranoid here and set it to 8 bits,
+ * though 6 bits suffice.
+ *
+ */
+#define MAX_THREADS 64
+#define THREAD_ID_BITS 8
+#define THREAD_ID_MASK ((1 << THREAD_ID_BITS) - 1)
+static unsigned int rim_thread_ids[MAX_THREADS];
+static pthread_t rim_threads[MAX_THREADS];
+
+
+/*
+ * Each rim_thread works on an exclusive "chunk" of size
+ * RIM_CHUNK_SIZE.
+ *
+ * The ith rim_thread works on the ith chunk.
+ *
+ * The ith chunk begins at
+ * map1 + (i * RIM_CHUNK_SIZE)
+ */
+#define RIM_CHUNK_SIZE 1024
+#define BITS_PER_BYTE 8
+#define WORD_SIZE (sizeof(unsigned int))
+#define WORD_BITS (WORD_SIZE * BITS_PER_BYTE)
+#define WORDS_PER_CHUNK (RIM_CHUNK_SIZE/WORD_SIZE)
+
+static inline char *compute_chunk_start_addr(unsigned int thread_id)
+{
+ char *chunk_start;
+
+ chunk_start = (char *)((unsigned long)map1 +
+ (thread_id * RIM_CHUNK_SIZE));
+
+ return chunk_start;
+}
+
+/*
+ * The "word-offset" of a word-aligned address inside a chunk, is
+ * defined to be the number of words that precede the address in that
+ * chunk.
+ *
+ * WORD_OFFSET_BITS denote the number of bits required to represent
+ * the word-offsets of all the word-aligned addresses of a chunk.
+ */
+#define WORD_OFFSET_BITS (__builtin_ctz(WORDS_PER_CHUNK))
+#define WORD_OFFSET_MASK ((1 << WORD_OFFSET_BITS) - 1)
+
+static inline unsigned int compute_word_offset(char *start, unsigned int *addr)
+{
+ unsigned int delta_bytes, ret;
+ delta_bytes = (unsigned long)addr - (unsigned long)start;
+
+ ret = delta_bytes/WORD_SIZE;
+
+ return ret;
+}
+
+/*
+ * A "sweep" is defined to be the sequential execution of the
+ * rim-sequence by a rim_thread on its chunk one word at a time,
+ * starting from the first word of its chunk and ending with the last
+ * word of its chunk.
+ *
+ * Each sweep of a rim_thread is uniquely identified by a sweep_id.
+ * SWEEP_ID_BITS denote the number of bits required to represent
+ * the sweep_ids of rim_threads.
+ *
+ * As to why SWEEP_ID_BITS are computed as a function of THREAD_ID_BITS,
+ * WORD_OFFSET_BITS, and WORD_BITS, see the "store-pattern" below.
+ */
+#define SWEEP_ID_BITS (WORD_BITS - (THREAD_ID_BITS + WORD_OFFSET_BITS))
+#define SWEEP_ID_MASK ((1 << SWEEP_ID_BITS) - 1)
+
+/*
+ * A "store-pattern" is the word-pattern that is stored into a word
+ * location in the 4)STORE step of the rim-sequence.
+ *
+ * In the store-pattern, we shall encode:
+ *
+ * - The thread-id of the rim_thread performing the store
+ * (The most significant THREAD_ID_BITS)
+ *
+ * - The word-offset of the address into which the store is being
+ * performed (The next WORD_OFFSET_BITS)
+ *
+ * - The sweep_id of the current sweep in which the store is
+ * being performed. (The lower SWEEP_ID_BITS)
+ *
+ * Store Pattern: 32 bits
+ * |------------------|--------------------|---------------------------------|
+ * | Thread id | Word offset | sweep_id |
+ * |------------------|--------------------|---------------------------------|
+ * THREAD_ID_BITS WORD_OFFSET_BITS SWEEP_ID_BITS
+ *
+ * In the store pattern, the (Thread-id + Word-offset) uniquely identify the
+ * address to which the store is being performed i.e,
+ * address == map1 +
+ * (Thread-id * RIM_CHUNK_SIZE) + (Word-offset * WORD_SIZE)
+ *
+ * And the sweep_id in the store pattern identifies the time when the
+ * store was performed by the rim_thread.
+ *
+ * We shall use this property in the 3)COMPARE step of the
+ * rim-sequence.
+ */
+#define SWEEP_ID_SHIFT 0
+#define WORD_OFFSET_SHIFT (SWEEP_ID_BITS)
+#define THREAD_ID_SHIFT (WORD_OFFSET_BITS + SWEEP_ID_BITS)
+
+/*
+ * Compute the store pattern for a given thread with id @tid, at
+ * location @addr in the sweep identified by @sweep_id
+ */
+static inline unsigned int compute_store_pattern(unsigned int tid,
+ unsigned int *addr,
+ unsigned int sweep_id)
+{
+ unsigned int ret = 0;
+ char *start = compute_chunk_start_addr(tid);
+ unsigned int word_offset = compute_word_offset(start, addr);
+
+ ret += (tid & THREAD_ID_MASK) << THREAD_ID_SHIFT;
+ ret += (word_offset & WORD_OFFSET_MASK) << WORD_OFFSET_SHIFT;
+ ret += (sweep_id & SWEEP_ID_MASK) << SWEEP_ID_SHIFT;
+ return ret;
+}
+
+/* Extract the thread-id from the given store-pattern */
+static inline unsigned int extract_tid(unsigned int pattern)
+{
+ unsigned int ret;
+
+ ret = (pattern >> THREAD_ID_SHIFT) & THREAD_ID_MASK;
+ return ret;
+}
+
+/* Extract the word-offset from the given store-pattern */
+static inline unsigned int extract_word_offset(unsigned int pattern)
+{
+ unsigned int ret;
+
+ ret = (pattern >> WORD_OFFSET_SHIFT) & WORD_OFFSET_MASK;
+
+ return ret;
+}
+
+/* Extract the sweep-id from the given store-pattern */
+static inline unsigned int extract_sweep_id(unsigned int pattern)
+
+{
+ unsigned int ret;
+
+ ret = (pattern >> SWEEP_ID_SHIFT) & SWEEP_ID_MASK;
+
+ return ret;
+}
+
+/************************************************************
+ * *
+ * Logging the output of the verification *
+ * *
+ ************************************************************/
+#define LOGDIR_NAME_SIZE 100
+static char logdir[LOGDIR_NAME_SIZE];
+
+static FILE *fp[MAX_THREADS];
+static const char logfilename[] ="Thread-%02d-Chunk";
+
+static inline void start_verification_log(unsigned int tid,
+ unsigned int *addr,
+ unsigned int cur_sweep_id,
+ unsigned int prev_sweep_id)
+{
+ FILE *f;
+ char logfile[30];
+ char path[LOGDIR_NAME_SIZE + 30];
+ char separator[2] = "/";
+ char *chunk_start = compute_chunk_start_addr(tid);
+ unsigned int size = RIM_CHUNK_SIZE;
+
+ sprintf(logfile, logfilename, tid);
+ strcpy(path, logdir);
+ strcat(path, separator);
+ strcat(path, logfile);
+ f = fopen(path, "w");
+
+ if (!f) {
+ err_msg("Unable to create logfile\n");
+ }
+
+ fp[tid] = f;
+
+ fprintf(f, "----------------------------------------------------------\n");
+ fprintf(f, "PID = %d\n", rim_process_pid);
+ fprintf(f, "Thread id = %02d\n", tid);
+ fprintf(f, "Chunk Start Addr = 0x%016lx\n", (unsigned long)chunk_start);
+ fprintf(f, "Chunk Size = %d\n", size);
+ fprintf(f, "Next Store Addr = 0x%016lx\n", (unsigned long)addr);
+ fprintf(f, "Current sweep-id = 0x%08x\n", cur_sweep_id);
+ fprintf(f, "Previous sweep-id = 0x%08x\n", prev_sweep_id);
+ fprintf(f, "----------------------------------------------------------\n");
+}
+
+static inline void log_anamoly(unsigned int tid, unsigned int *addr,
+ unsigned int expected, unsigned int observed)
+{
+ FILE *f = fp[tid];
+
+ fprintf(f, "Thread %02d: Addr 0x%lx: Expected 0x%x, Observed 0x%x\n",
+ tid, (unsigned long)addr, expected, observed);
+ fprintf(f, "Thread %02d: Expected Thread id = %02d\n", tid, extract_tid(expected));
+ fprintf(f, "Thread %02d: Observed Thread id = %02d\n", tid, extract_tid(observed));
+ fprintf(f, "Thread %02d: Expected Word offset = %03d\n", tid, extract_word_offset(expected));
+ fprintf(f, "Thread %02d: Observed Word offset = %03d\n", tid, extract_word_offset(observed));
+ fprintf(f, "Thread %02d: Expected sweep-id = 0x%x\n", tid, extract_sweep_id(expected));
+ fprintf(f, "Thread %02d: Observed sweep-id = 0x%x\n", tid, extract_sweep_id(observed));
+ fprintf(f, "----------------------------------------------------------\n");
+}
+
+static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies)
+{
+ FILE *f = fp[tid];
+ char logfile[30];
+ char path[LOGDIR_NAME_SIZE + 30];
+ char separator[] = "/";
+
+ fclose(f);
+
+ if (nr_anamolies == 0) {
+ remove(path);
+ return;
+ }
+
+ sprintf(logfile, logfilename, tid);
+ strcpy(path, logdir);
+ strcat(path, separator);
+ strcat(path, logfile);
+
+ printf("Thread %02d chunk has %d corrupted words. For details check %s\n",
+ tid, nr_anamolies, path);
+}
+
+/*
+ * When a COMPARE step of a rim-sequence fails, the rim_thread informs
+ * everyone else via the shared_memory pointed to by
+ * corruption_found variable. On seeing this, every thread verifies the
+ * content of its chunk as follows.
+ *
+ * Suppose a thread identified with @tid was about to store (but not
+ * yet stored) to @next_store_addr in its current sweep identified
+ * @cur_sweep_id. Let @prev_sweep_id indicate the previous sweep_id.
+ *
+ * This implies that for all the addresses @addr < @next_store_addr,
+ * Thread @tid has already performed a store as part of its current
+ * sweep. Hence we expect the content of such @addr to be:
+ * |-------------------------------------------------|
+ * | tid | word_offset(addr) | cur_sweep_id |
+ * |-------------------------------------------------|
+ *
+ * Since Thread @tid is yet to perform stores on address
+ * @next_store_addr and above, we expect the content of such an
+ * address @addr to be:
+ * |-------------------------------------------------|
+ * | tid | word_offset(addr) | prev_sweep_id |
+ * |-------------------------------------------------|
+ *
+ * The verifier function @verify_chunk does this verification and logs
+ * any anamolies that it finds.
+ */
+static void verify_chunk(unsigned int tid, unsigned int *next_store_addr,
+ unsigned int cur_sweep_id,
+ unsigned int prev_sweep_id)
+{
+ unsigned int *iter_ptr;
+ unsigned int size = RIM_CHUNK_SIZE;
+ unsigned int expected;
+ unsigned int observed;
+ char *chunk_start = compute_chunk_start_addr(tid);
+
+ int nr_anamolies = 0;
+
+ start_verification_log(tid, next_store_addr,
+ cur_sweep_id, prev_sweep_id);
+
+ for (iter_ptr = (unsigned int *)chunk_start;
+ (unsigned long)iter_ptr < (unsigned long)chunk_start + size;
+ iter_ptr++) {
+ unsigned int expected_sweep_id;
+
+ if (iter_ptr < next_store_addr) {
+ expected_sweep_id = cur_sweep_id;
+ } else {
+ expected_sweep_id = prev_sweep_id;
+ }
+
+ expected = compute_store_pattern(tid, iter_ptr, expected_sweep_id);
+
+ dcbf((volatile unsigned int*)iter_ptr); //Flush before reading
+ observed = *iter_ptr;
+
+ if (observed != expected) {
+ nr_anamolies++;
+ log_anamoly(tid, iter_ptr, expected, observed);
+ }
+ }
+
+ end_verification_log(tid, nr_anamolies);
+}
+
+static void set_pthread_cpu(pthread_t th, int cpu)
+{
+ cpu_set_t run_cpu_mask;
+ struct sched_param param;
+
+ CPU_ZERO(&run_cpu_mask);
+ CPU_SET(cpu, &run_cpu_mask);
+ pthread_setaffinity_np(th, sizeof(cpu_set_t), &run_cpu_mask);
+
+ param.sched_priority = 1;
+ if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
+ /* haven't reproduced with this setting, it kills random preemption which may be a factor */
+ fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
+ }
+}
+
+static void set_mycpu(int cpu)
+{
+ cpu_set_t run_cpu_mask;
+ struct sched_param param;
+
+ CPU_ZERO(&run_cpu_mask);
+ CPU_SET(cpu, &run_cpu_mask);
+ sched_setaffinity(0, sizeof(cpu_set_t), &run_cpu_mask);
+
+ param.sched_priority = 1;
+ if (0 && sched_setscheduler(0, SCHED_FIFO, &param) == -1) {
+ fprintf(stderr, "could not set SCHED_FIFO, run as root?\n");
+ }
+}
+
+static volatile int segv_wait;
+
+static void segv_handler(int signo, siginfo_t *info, void *extra)
+{
+ while (segv_wait) {
+ sched_yield();
+ }
+
+}
+
+static void set_segv_handler(void)
+{
+ struct sigaction sa;
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = segv_handler;
+
+ if (sigaction(SIGSEGV, &sa, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+}
+
+int timeout = 0;
+/*
+ * This function is executed by every rim_thread.
+ *
+ * This function performs sweeps over the exclusive chunks of the
+ * rim_threads executing the rim-sequence one word at a time.
+ */
+static void *rim_fn(void *arg)
+{
+ unsigned int tid = *((unsigned int *)arg);
+
+ int size = RIM_CHUNK_SIZE;
+ char *chunk_start = compute_chunk_start_addr(tid);
+
+ unsigned int prev_sweep_id;
+ unsigned int cur_sweep_id = 0;
+
+ /* word access */
+ unsigned int pattern = cur_sweep_id;
+ unsigned int *pattern_ptr = &pattern;
+ unsigned int *w_ptr, read_data;
+
+ set_segv_handler();
+
+ /*
+ * Let us initialize the chunk:
+ *
+ * Each word-aligned address addr in the chunk,
+ * is initialized to :
+ * |-------------------------------------------------|
+ * | tid | word_offset(addr) | 0 |
+ * |-------------------------------------------------|
+ */
+ for (w_ptr = (unsigned int *)chunk_start;
+ (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
+ w_ptr++) {
+
+ *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
+ *w_ptr = *pattern_ptr;
+ }
+
+ while (!corruption_found && !timeout) {
+ prev_sweep_id = cur_sweep_id;
+ cur_sweep_id = cur_sweep_id + 1;
+
+ for (w_ptr = (unsigned int *)chunk_start;
+ (unsigned long)w_ptr < (unsigned long)(chunk_start) + size;
+ w_ptr++) {
+ unsigned int old_pattern;
+
+ /*
+ * Compute the pattern that we would have
+ * stored at this location in the previous
+ * sweep.
+ */
+ old_pattern = compute_store_pattern(tid, w_ptr, prev_sweep_id);
+
+ /*
+ * FLUSH:Ensure that we flush the contents of
+ * the cache before loading
+ */
+ dcbf((volatile unsigned int*)w_ptr); //Flush
+
+ /* LOAD: Read the value */
+ read_data = *w_ptr; //Load
+
+ /*
+ * COMPARE: Is it the same as what we had stored
+ * in the previous sweep ? It better be!
+ */
+ if (read_data != old_pattern) {
+ /* No it isn't! Tell everyone */
+ corruption_found = 1;
+ }
+
+ /*
+ * Before performing a store, let us check if
+ * any rim_thread has found a corruption.
+ */
+ if (corruption_found || timeout) {
+ /*
+ * Yes. Someone (including us!) has found
+ * a corruption :(
+ *
+ * Let us verify that our chunk is
+ * correct.
+ */
+ /* But first, let us allow the dust to settle down! */
+ verify_chunk(tid, w_ptr, cur_sweep_id, prev_sweep_id);
+
+ return 0;
+ }
+
+ /*
+ * Compute the new pattern that we are going
+ * to write to this location
+ */
+ *pattern_ptr = compute_store_pattern(tid, w_ptr, cur_sweep_id);
+
+ /*
+ * STORE: Now let us write this pattern into
+ * the location
+ */
+ *w_ptr = *pattern_ptr;
+ }
+ }
+
+ return NULL;
+}
+
+
+static unsigned long start_cpu = 0;
+static unsigned long nrthreads = 4;
+
+static pthread_t mem_snapshot_thread;
+
+static void *mem_snapshot_fn(void *arg)
+{
+ int page_size = getpagesize();
+ size_t size = page_size;
+ void *tmp = malloc(size);
+
+ while (!corruption_found && !timeout) {
+ /* Stop memory migration once corruption is found */
+ segv_wait = 1;
+
+ mprotect(map1, size, PROT_READ);
+
+ /*
+ * Load from the working alias (map1). Loading from map2
+ * also fails.
+ */
+ memcpy(tmp, map1, size);
+
+ /*
+ * Stores must go via map2 which has write permissions, but
+ * the corrupted data tends to be seen in the snapshot buffer,
+ * so corruption does not appear to be introduced at the
+ * copy-back via map2 alias here.
+ */
+ memcpy(map2, tmp, size);
+ /*
+ * Before releasing other threads, must ensure the copy
+ * back to
+ */
+ asm volatile("sync" ::: "memory");
+ mprotect(map1, size, PROT_READ|PROT_WRITE);
+ asm volatile("sync" ::: "memory");
+ segv_wait = 0;
+
+ usleep(1); /* This value makes a big difference */
+ }
+
+ return 0;
+}
+
+void alrm_sighandler(int sig)
+{
+ timeout = 1;
+}
+
+int main(int argc, char *argv[])
+{
+ int c;
+ int page_size = getpagesize();
+ time_t now;
+ int i, dir_error;
+ pthread_attr_t attr;
+ key_t shm_key = (key_t) getpid();
+ int shmid, run_time = 20 * 60;
+ struct sigaction sa_alrm;
+
+ snprintf(logdir, LOGDIR_NAME_SIZE,
+ "/tmp/logdir-%u", (unsigned int)getpid());
+ while ((c = getopt(argc, argv, "r:hn:l:t:")) != -1) {
+ switch(c) {
+ case 'r':
+ start_cpu = strtoul(optarg, NULL, 10);
+ break;
+ case 'h':
+ printf("%s [-r <start_cpu>] [-n <nrthreads>] [-l <logdir>] [-t <timeout>]\n", argv[0]);
+ exit(0);
+ break;
+ case 'n':
+ nrthreads = strtoul(optarg, NULL, 10);
+ break;
+ case 'l':
+ strncpy(logdir, optarg, LOGDIR_NAME_SIZE - 1);
+ break;
+ case 't':
+ run_time = strtoul(optarg, NULL, 10);
+ break;
+ default:
+ printf("invalid option\n");
+ exit(0);
+ break;
+ }
+ }
+
+ if (nrthreads > MAX_THREADS)
+ nrthreads = MAX_THREADS;
+
+ shmid = shmget(shm_key, page_size, IPC_CREAT|0666);
+ if (shmid < 0) {
+ err_msg("Failed shmget\n");
+ }
+
+ map1 = shmat(shmid, NULL, 0);
+ if (map1 == (void *) -1) {
+ err_msg("Failed shmat");
+ }
+
+ map2 = shmat(shmid, NULL, 0);
+ if (map2 == (void *) -1) {
+ err_msg("Failed shmat");
+ }
+
+ dir_error = mkdir(logdir, 0755);
+
+ if (dir_error) {
+ err_msg("Failed mkdir");
+ }
+
+ printf("start_cpu list:%lu\n", start_cpu);
+ printf("number of worker threads:%lu + 1 snapshot thread\n", nrthreads);
+ printf("Allocated address:0x%016lx + secondary map:0x%016lx\n", (unsigned long)map1, (unsigned long)map2);
+ printf("logdir at : %s\n", logdir);
+ printf("Timeout: %d seconds\n", run_time);
+
+ time(&now);
+ printf("=================================\n");
+ printf(" Starting Test\n");
+ printf(" %s", ctime(&now));
+ printf("=================================\n");
+
+ for (i = 0; i < nrthreads; i++) {
+ if (1 && !fork()) {
+ prctl(PR_SET_PDEATHSIG, SIGKILL);
+ set_mycpu(start_cpu + i);
+ for (;;)
+ sched_yield();
+ exit(0);
+ }
+ }
+
+
+ sa_alrm.sa_handler = &alrm_sighandler;
+ sigemptyset(&sa_alrm.sa_mask);
+ sa_alrm.sa_flags = 0;
+
+ if (sigaction(SIGALRM, &sa_alrm, 0) == -1) {
+ err_msg("Failed signal handler registration\n");
+ }
+
+ alarm(run_time);
+
+ pthread_attr_init(&attr);
+ for (i = 0; i < nrthreads; i++) {
+ rim_thread_ids[i] = i;
+ pthread_create(&rim_threads[i], &attr, rim_fn, &rim_thread_ids[i]);
+ set_pthread_cpu(rim_threads[i], start_cpu + i);
+ }
+
+ pthread_create(&mem_snapshot_thread, &attr, mem_snapshot_fn, map1);
+ set_pthread_cpu(mem_snapshot_thread, start_cpu + i);
+
+
+ pthread_join(mem_snapshot_thread, NULL);
+ for (i = 0; i < nrthreads; i++) {
+ pthread_join(rim_threads[i], NULL);
+ }
+
+ if (!timeout) {
+ time(&now);
+ printf("=================================\n");
+ printf(" Data Corruption Detected\n");
+ printf(" %s", ctime(&now));
+ printf(" See logfiles in %s\n", logdir);
+ printf("=================================\n");
+ return 1;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/pmu/.gitignore b/tools/testing/selftests/powerpc/pmu/.gitignore
new file mode 100644
index 000000000..e748f336e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/.gitignore
@@ -0,0 +1,3 @@
+count_instructions
+l3_bank_test
+per_event_excludes
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
new file mode 100644
index 000000000..19046db99
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+ $(MAKE) -C ../
+
+TEST_GEN_PROGS := count_instructions l3_bank_test per_event_excludes
+EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+all: $(TEST_GEN_PROGS) ebb
+
+$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
+
+# loop.S can only be built 64-bit
+$(OUTPUT)/count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
+ $(CC) $(CFLAGS) -m64 -o $@ $^
+
+$(OUTPUT)/per_event_excludes: ../utils.c
+
+DEFAULT_RUN_TESTS := $(RUN_TESTS)
+override define RUN_TESTS
+ $(DEFAULT_RUN_TESTS)
+ TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
+endef
+
+DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
+override define EMIT_TESTS
+ $(DEFAULT_EMIT_TESTS)
+ TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
+endef
+
+DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
+override define INSTALL_RULE
+ $(DEFAULT_INSTALL_RULE)
+ TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
+endef
+
+clean:
+ $(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o
+ TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
+
+ebb:
+ TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
+
+.PHONY: all run_tests clean ebb
diff --git a/tools/testing/selftests/powerpc/pmu/count_instructions.c b/tools/testing/selftests/powerpc/pmu/count_instructions.c
new file mode 100644
index 000000000..4622117b2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/count_instructions.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "event.h"
+#include "utils.h"
+#include "lib.h"
+
+extern void thirty_two_instruction_loop(u64 loops);
+
+static void setup_event(struct event *e, u64 config, char *name)
+{
+ event_init_opts(e, config, PERF_TYPE_HARDWARE, name);
+
+ e->attr.disabled = 1;
+ e->attr.exclude_kernel = 1;
+ e->attr.exclude_hv = 1;
+ e->attr.exclude_idle = 1;
+}
+
+static int do_count_loop(struct event *events, u64 instructions,
+ u64 overhead, bool report)
+{
+ s64 difference, expected;
+ double percentage;
+
+ prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+ /* Run for 1M instructions */
+ thirty_two_instruction_loop(instructions >> 5);
+
+ prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+ event_read(&events[0]);
+ event_read(&events[1]);
+
+ expected = instructions + overhead;
+ difference = events[0].result.value - expected;
+ percentage = (double)difference / events[0].result.value * 100;
+
+ if (report) {
+ event_report(&events[0]);
+ event_report(&events[1]);
+
+ printf("Looped for %llu instructions, overhead %llu\n", instructions, overhead);
+ printf("Expected %llu\n", expected);
+ printf("Actual %llu\n", events[0].result.value);
+ printf("Delta %lld, %f%%\n", difference, percentage);
+ }
+
+ event_reset(&events[0]);
+ event_reset(&events[1]);
+
+ if (difference < 0)
+ difference = -difference;
+
+ /* Tolerate a difference below 0.0001 % */
+ difference *= 10000 * 100;
+ if (difference / events[0].result.value)
+ return -1;
+
+ return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static u64 determine_overhead(struct event *events)
+{
+ u64 current, overhead;
+ int i;
+
+ do_count_loop(events, 0, 0, false);
+ overhead = events[0].result.value;
+
+ for (i = 0; i < 100; i++) {
+ do_count_loop(events, 0, 0, false);
+ current = events[0].result.value;
+ if (current < overhead) {
+ printf("Replacing overhead %llu with %llu\n", overhead, current);
+ overhead = current;
+ }
+ }
+
+ return overhead;
+}
+
+static int test_body(void)
+{
+ struct event events[2];
+ u64 overhead;
+
+ setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, "instructions");
+ setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, "cycles");
+
+ if (event_open(&events[0])) {
+ perror("perf_event_open");
+ return -1;
+ }
+
+ if (event_open_with_group(&events[1], events[0].fd)) {
+ perror("perf_event_open");
+ return -1;
+ }
+
+ overhead = determine_overhead(events);
+ printf("Overhead of null loop: %llu instructions\n", overhead);
+
+ /* Run for 1Mi instructions */
+ FAIL_IF(do_count_loop(events, 1000000, overhead, true));
+
+ /* Run for 10Mi instructions */
+ FAIL_IF(do_count_loop(events, 10000000, overhead, true));
+
+ /* Run for 100Mi instructions */
+ FAIL_IF(do_count_loop(events, 100000000, overhead, true));
+
+ /* Run for 1Bi instructions */
+ FAIL_IF(do_count_loop(events, 1000000000, overhead, true));
+
+ /* Run for 16Bi instructions */
+ FAIL_IF(do_count_loop(events, 16000000000, overhead, true));
+
+ /* Run for 64Bi instructions */
+ FAIL_IF(do_count_loop(events, 64000000000, overhead, true));
+
+ event_close(&events[0]);
+ event_close(&events[1]);
+
+ return 0;
+}
+
+static int count_instructions(void)
+{
+ return eat_cpu(test_body);
+}
+
+int main(void)
+{
+ return test_harness(count_instructions, "count_instructions");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
new file mode 100644
index 000000000..42bddbed8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
@@ -0,0 +1,22 @@
+reg_access_test
+event_attributes_test
+cycles_test
+cycles_with_freeze_test
+pmc56_overflow_test
+ebb_vs_cpu_event_test
+cpu_event_vs_ebb_test
+cpu_event_pinned_vs_ebb_test
+task_event_vs_ebb_test
+task_event_pinned_vs_ebb_test
+multi_ebb_procs_test
+multi_counter_test
+pmae_handling_test
+close_clears_pmcc_test
+instruction_count_test
+fork_cleanup_test
+ebb_on_child_test
+ebb_on_willing_child_test
+back_to_back_ebbs_test
+lost_exception_test
+no_handler_test
+cycles_with_mmcr2_test
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
new file mode 100644
index 000000000..bd5dfa509
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -0,0 +1,28 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+ $(MAKE) -C ../../
+
+# The EBB handler is 64-bit code and everything links against it
+CFLAGS += -m64
+
+TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \
+ cycles_with_freeze_test pmc56_overflow_test \
+ ebb_vs_cpu_event_test cpu_event_vs_ebb_test \
+ cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test \
+ task_event_pinned_vs_ebb_test multi_ebb_procs_test \
+ multi_counter_test pmae_handling_test \
+ close_clears_pmcc_test instruction_count_test \
+ fork_cleanup_test ebb_on_child_test \
+ ebb_on_willing_child_test back_to_back_ebbs_test \
+ lost_exception_test no_handler_test \
+ cycles_with_mmcr2_test
+
+top_srcdir = ../../../../../..
+include ../../../lib.mk
+
+$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \
+ ebb.c ebb_handler.S trace.c busy_loop.S
+
+$(OUTPUT)/instruction_count_test: ../loop.S
+
+$(OUTPUT)/lost_exception_test: ../lib.c
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
new file mode 100644
index 000000000..031baa436
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+#define NUMBER_OF_EBBS 50
+
+/*
+ * Test that if we overflow the counter while in the EBB handler, we take
+ * another EBB on exiting from the handler.
+ *
+ * We do this by counting with a stupidly low sample period, causing us to
+ * overflow the PMU while we're still in the EBB handler, leading to another
+ * EBB.
+ *
+ * We get out of what would otherwise be an infinite loop by leaving the
+ * counter frozen once we've taken enough EBBs.
+ */
+
+static void ebb_callee(void)
+{
+ uint64_t siar, val;
+
+ val = mfspr(SPRN_BESCR);
+ if (!(val & BESCR_PMEO)) {
+ ebb_state.stats.spurious++;
+ goto out;
+ }
+
+ ebb_state.stats.ebb_count++;
+ trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+ /* Resets the PMC */
+ count_pmc(1, sample_period);
+
+out:
+ if (ebb_state.stats.ebb_count == NUMBER_OF_EBBS)
+ /* Reset but leave counters frozen */
+ reset_ebb_with_clear_mask(MMCR0_PMAO);
+ else
+ /* Unfreezes */
+ reset_ebb();
+
+ /* Do some stuff to chew some cycles and pop the counter */
+ siar = mfspr(SPRN_SIAR);
+ trace_log_reg(ebb_state.trace, SPRN_SIAR, siar);
+
+ val = mfspr(SPRN_PMC1);
+ trace_log_reg(ebb_state.trace, SPRN_PMC1, val);
+
+ val = mfspr(SPRN_MMCR0);
+ trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+}
+
+int back_to_back_ebbs(void)
+{
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ setup_ebb_handler(ebb_callee);
+
+ FAIL_IF(ebb_event_enable(&event));
+
+ sample_period = 5;
+
+ ebb_freeze_pmcs();
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+ ebb_global_enable();
+ ebb_unfreeze_pmcs();
+
+ while (ebb_state.stats.ebb_count < NUMBER_OF_EBBS)
+ FAIL_IF(core_busy_loop());
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count != NUMBER_OF_EBBS);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(back_to_back_ebbs, "back_to_back_ebbs");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
new file mode 100644
index 000000000..c7e4093f1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/busy_loop.S
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+ .text
+
+FUNC_START(core_busy_loop)
+ stdu %r1, -168(%r1)
+ std r14, 160(%r1)
+ std r15, 152(%r1)
+ std r16, 144(%r1)
+ std r17, 136(%r1)
+ std r18, 128(%r1)
+ std r19, 120(%r1)
+ std r20, 112(%r1)
+ std r21, 104(%r1)
+ std r22, 96(%r1)
+ std r23, 88(%r1)
+ std r24, 80(%r1)
+ std r25, 72(%r1)
+ std r26, 64(%r1)
+ std r27, 56(%r1)
+ std r28, 48(%r1)
+ std r29, 40(%r1)
+ std r30, 32(%r1)
+ std r31, 24(%r1)
+
+ li r3, 0x3030
+ std r3, -96(%r1)
+ li r4, 0x4040
+ std r4, -104(%r1)
+ li r5, 0x5050
+ std r5, -112(%r1)
+ li r6, 0x6060
+ std r6, -120(%r1)
+ li r7, 0x7070
+ std r7, -128(%r1)
+ li r8, 0x0808
+ std r8, -136(%r1)
+ li r9, 0x0909
+ std r9, -144(%r1)
+ li r10, 0x1010
+ std r10, -152(%r1)
+ li r11, 0x1111
+ std r11, -160(%r1)
+ li r14, 0x1414
+ std r14, -168(%r1)
+ li r15, 0x1515
+ std r15, -176(%r1)
+ li r16, 0x1616
+ std r16, -184(%r1)
+ li r17, 0x1717
+ std r17, -192(%r1)
+ li r18, 0x1818
+ std r18, -200(%r1)
+ li r19, 0x1919
+ std r19, -208(%r1)
+ li r20, 0x2020
+ std r20, -216(%r1)
+ li r21, 0x2121
+ std r21, -224(%r1)
+ li r22, 0x2222
+ std r22, -232(%r1)
+ li r23, 0x2323
+ std r23, -240(%r1)
+ li r24, 0x2424
+ std r24, -248(%r1)
+ li r25, 0x2525
+ std r25, -256(%r1)
+ li r26, 0x2626
+ std r26, -264(%r1)
+ li r27, 0x2727
+ std r27, -272(%r1)
+ li r28, 0x2828
+ std r28, -280(%r1)
+ li r29, 0x2929
+ std r29, -288(%r1)
+ li r30, 0x3030
+ li r31, 0x3131
+
+ li r3, 0
+0: addi r3, r3, 1
+ cmpwi r3, 100
+ blt 0b
+
+ /* Return 1 (fail) unless we get through all the checks */
+ li r3, 1
+
+ /* Check none of our registers have been corrupted */
+ cmpwi r4, 0x4040
+ bne 1f
+ cmpwi r5, 0x5050
+ bne 1f
+ cmpwi r6, 0x6060
+ bne 1f
+ cmpwi r7, 0x7070
+ bne 1f
+ cmpwi r8, 0x0808
+ bne 1f
+ cmpwi r9, 0x0909
+ bne 1f
+ cmpwi r10, 0x1010
+ bne 1f
+ cmpwi r11, 0x1111
+ bne 1f
+ cmpwi r14, 0x1414
+ bne 1f
+ cmpwi r15, 0x1515
+ bne 1f
+ cmpwi r16, 0x1616
+ bne 1f
+ cmpwi r17, 0x1717
+ bne 1f
+ cmpwi r18, 0x1818
+ bne 1f
+ cmpwi r19, 0x1919
+ bne 1f
+ cmpwi r20, 0x2020
+ bne 1f
+ cmpwi r21, 0x2121
+ bne 1f
+ cmpwi r22, 0x2222
+ bne 1f
+ cmpwi r23, 0x2323
+ bne 1f
+ cmpwi r24, 0x2424
+ bne 1f
+ cmpwi r25, 0x2525
+ bne 1f
+ cmpwi r26, 0x2626
+ bne 1f
+ cmpwi r27, 0x2727
+ bne 1f
+ cmpwi r28, 0x2828
+ bne 1f
+ cmpwi r29, 0x2929
+ bne 1f
+ cmpwi r30, 0x3030
+ bne 1f
+ cmpwi r31, 0x3131
+ bne 1f
+
+ /* Load junk into all our registers before we reload them from the stack. */
+ li r3, 0xde
+ li r4, 0xad
+ li r5, 0xbe
+ li r6, 0xef
+ li r7, 0xde
+ li r8, 0xad
+ li r9, 0xbe
+ li r10, 0xef
+ li r11, 0xde
+ li r14, 0xad
+ li r15, 0xbe
+ li r16, 0xef
+ li r17, 0xde
+ li r18, 0xad
+ li r19, 0xbe
+ li r20, 0xef
+ li r21, 0xde
+ li r22, 0xad
+ li r23, 0xbe
+ li r24, 0xef
+ li r25, 0xde
+ li r26, 0xad
+ li r27, 0xbe
+ li r28, 0xef
+ li r29, 0xdd
+
+ ld r3, -96(%r1)
+ cmpwi r3, 0x3030
+ bne 1f
+ ld r4, -104(%r1)
+ cmpwi r4, 0x4040
+ bne 1f
+ ld r5, -112(%r1)
+ cmpwi r5, 0x5050
+ bne 1f
+ ld r6, -120(%r1)
+ cmpwi r6, 0x6060
+ bne 1f
+ ld r7, -128(%r1)
+ cmpwi r7, 0x7070
+ bne 1f
+ ld r8, -136(%r1)
+ cmpwi r8, 0x0808
+ bne 1f
+ ld r9, -144(%r1)
+ cmpwi r9, 0x0909
+ bne 1f
+ ld r10, -152(%r1)
+ cmpwi r10, 0x1010
+ bne 1f
+ ld r11, -160(%r1)
+ cmpwi r11, 0x1111
+ bne 1f
+ ld r14, -168(%r1)
+ cmpwi r14, 0x1414
+ bne 1f
+ ld r15, -176(%r1)
+ cmpwi r15, 0x1515
+ bne 1f
+ ld r16, -184(%r1)
+ cmpwi r16, 0x1616
+ bne 1f
+ ld r17, -192(%r1)
+ cmpwi r17, 0x1717
+ bne 1f
+ ld r18, -200(%r1)
+ cmpwi r18, 0x1818
+ bne 1f
+ ld r19, -208(%r1)
+ cmpwi r19, 0x1919
+ bne 1f
+ ld r20, -216(%r1)
+ cmpwi r20, 0x2020
+ bne 1f
+ ld r21, -224(%r1)
+ cmpwi r21, 0x2121
+ bne 1f
+ ld r22, -232(%r1)
+ cmpwi r22, 0x2222
+ bne 1f
+ ld r23, -240(%r1)
+ cmpwi r23, 0x2323
+ bne 1f
+ ld r24, -248(%r1)
+ cmpwi r24, 0x2424
+ bne 1f
+ ld r25, -256(%r1)
+ cmpwi r25, 0x2525
+ bne 1f
+ ld r26, -264(%r1)
+ cmpwi r26, 0x2626
+ bne 1f
+ ld r27, -272(%r1)
+ cmpwi r27, 0x2727
+ bne 1f
+ ld r28, -280(%r1)
+ cmpwi r28, 0x2828
+ bne 1f
+ ld r29, -288(%r1)
+ cmpwi r29, 0x2929
+ bne 1f
+
+ /* Load 0 (success) to return */
+ li r3, 0
+
+1: ld r14, 160(%r1)
+ ld r15, 152(%r1)
+ ld r16, 144(%r1)
+ ld r17, 136(%r1)
+ ld r18, 128(%r1)
+ ld r19, 120(%r1)
+ ld r20, 112(%r1)
+ ld r21, 104(%r1)
+ ld r22, 96(%r1)
+ ld r23, 88(%r1)
+ ld r24, 80(%r1)
+ ld r25, 72(%r1)
+ ld r26, 64(%r1)
+ ld r27, 56(%r1)
+ ld r28, 48(%r1)
+ ld r29, 40(%r1)
+ ld r30, 32(%r1)
+ ld r31, 24(%r1)
+ addi %r1, %r1, 168
+ blr
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c b/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c
new file mode 100644
index 000000000..ac18cf617
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that closing the EBB event clears MMCR0_PMCC, preventing further access
+ * by userspace to the PMU hardware.
+ */
+
+int close_clears_pmcc(void)
+{
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ while (ebb_state.stats.ebb_count < 1)
+ FAIL_IF(core_busy_loop());
+
+ ebb_global_disable();
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ /* The real test is here, do we take a SIGILL when writing PMU regs now
+ * that we have closed the event. We expect that we will. */
+
+ FAIL_IF(catch_sigill(write_pmc1));
+
+ /* We should still be able to read EBB regs though */
+ mfspr(SPRN_EBBHR);
+ mfspr(SPRN_EBBRR);
+ mfspr(SPRN_BESCR);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(close_clears_pmcc, "close_clears_pmcc");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
new file mode 100644
index 000000000..f0632e7fd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a pinned cpu event vs an EBB - in that order. The pinned cpu event
+ * should remain and the EBB event should fail to enable.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+ event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+ event->attr.pinned = 1;
+
+ event->attr.exclude_kernel = 1;
+ event->attr.exclude_hv = 1;
+ event->attr.exclude_idle = 1;
+
+ SKIP_IF(require_paranoia_below(1));
+ FAIL_IF(event_open_with_cpu(event, cpu));
+ FAIL_IF(event_enable(event));
+
+ return 0;
+}
+
+int cpu_event_pinned_vs_ebb(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ int cpu, rc;
+ pid_t pid;
+
+ SKIP_IF(!ebb_is_supported());
+
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+ FAIL_IF(bind_to_cpu(cpu));
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(ebb_child(write_pipe, read_pipe));
+ }
+
+ /* We setup the cpu event first */
+ rc = setup_cpu_event(&event, cpu);
+ if (rc) {
+ kill_child_and_wait(pid);
+ return rc;
+ }
+
+ /* Signal the child to install its EBB event and wait */
+ if (sync_with_child(read_pipe, write_pipe))
+ /* If it fails, wait for it to exit */
+ goto wait;
+
+ /* Signal the child to run */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+ /* We expect it to fail to read the event */
+ FAIL_IF(wait_for_child(pid) != 2);
+
+ FAIL_IF(event_disable(&event));
+ FAIL_IF(event_read(&event));
+
+ event_report(&event);
+
+ /* The cpu event should have run */
+ FAIL_IF(event.result.value == 0);
+ FAIL_IF(event.result.enabled != event.result.running);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(cpu_event_pinned_vs_ebb, "cpu_event_pinned_vs_ebb");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
new file mode 100644
index 000000000..33e56a234
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a cpu event vs an EBB - in that order. The EBB should force the cpu
+ * event off the PMU.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+ event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+ event->attr.exclude_kernel = 1;
+ event->attr.exclude_hv = 1;
+ event->attr.exclude_idle = 1;
+
+ SKIP_IF(require_paranoia_below(1));
+ FAIL_IF(event_open_with_cpu(event, cpu));
+ FAIL_IF(event_enable(event));
+
+ return 0;
+}
+
+int cpu_event_vs_ebb(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ int cpu, rc;
+ pid_t pid;
+
+ SKIP_IF(!ebb_is_supported());
+
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+ FAIL_IF(bind_to_cpu(cpu));
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(ebb_child(write_pipe, read_pipe));
+ }
+
+ /* We setup the cpu event first */
+ rc = setup_cpu_event(&event, cpu);
+ if (rc) {
+ kill_child_and_wait(pid);
+ return rc;
+ }
+
+ /* Signal the child to install its EBB event and wait */
+ if (sync_with_child(read_pipe, write_pipe))
+ /* If it fails, wait for it to exit */
+ goto wait;
+
+ /* Signal the child to run */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+ /* We expect the child to succeed */
+ FAIL_IF(wait_for_child(pid));
+
+ FAIL_IF(event_disable(&event));
+ FAIL_IF(event_read(&event));
+
+ event_report(&event);
+
+ /* The cpu event may have run */
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(cpu_event_vs_ebb, "cpu_event_vs_ebb");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
new file mode 100644
index 000000000..361e0be9d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Basic test that counts user cycles and takes EBBs.
+ */
+int cycles(void)
+{
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ while (ebb_state.stats.ebb_count < 10) {
+ FAIL_IF(core_busy_loop());
+ FAIL_IF(ebb_check_mmcr0());
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+ FAIL_IF(!ebb_check_count(1, sample_period, 100));
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(cycles, "cycles");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
new file mode 100644
index 000000000..fe7d0dc2a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test of counting cycles while using MMCR0_FC (freeze counters) to only count
+ * parts of the code. This is complicated by the fact that FC is set by the
+ * hardware when the event overflows. We may take the EBB after we have set FC,
+ * so we have to be careful about whether we clear FC at the end of the EBB
+ * handler or not.
+ */
+
+static bool counters_frozen = false;
+static int ebbs_while_frozen = 0;
+
+static void ebb_callee(void)
+{
+ uint64_t mask, val;
+
+ mask = MMCR0_PMAO | MMCR0_FC;
+
+ val = mfspr(SPRN_BESCR);
+ if (!(val & BESCR_PMEO)) {
+ ebb_state.stats.spurious++;
+ goto out;
+ }
+
+ ebb_state.stats.ebb_count++;
+ trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+ val = mfspr(SPRN_MMCR0);
+ trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+
+ if (counters_frozen) {
+ trace_log_string(ebb_state.trace, "frozen");
+ ebbs_while_frozen++;
+ mask &= ~MMCR0_FC;
+ }
+
+ count_pmc(1, sample_period);
+out:
+ reset_ebb_with_clear_mask(mask);
+}
+
+int cycles_with_freeze(void)
+{
+ struct event event;
+ uint64_t val;
+ bool fc_cleared;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ setup_ebb_handler(ebb_callee);
+ ebb_global_enable();
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ fc_cleared = false;
+
+ /* Make sure we loop until we take at least one EBB */
+ while ((ebb_state.stats.ebb_count < 20 && !fc_cleared) ||
+ ebb_state.stats.ebb_count < 1)
+ {
+ counters_frozen = false;
+ mb();
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+
+ FAIL_IF(core_busy_loop());
+
+ counters_frozen = true;
+ mb();
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+
+ val = mfspr(SPRN_MMCR0);
+ if (! (val & MMCR0_FC)) {
+ printf("Outside of loop, FC NOT set MMCR0 0x%lx\n", val);
+ fc_cleared = true;
+ }
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ printf("EBBs while frozen %d\n", ebbs_while_frozen);
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+ FAIL_IF(fc_cleared);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(cycles_with_freeze, "cycles_with_freeze");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
new file mode 100644
index 000000000..b9b30f974
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test of counting cycles while manipulating the user accessible bits in MMCR2.
+ */
+
+/* We use two values because the first freezes PMC1 and so we would get no EBBs */
+#define MMCR2_EXPECTED_1 0x4020100804020000UL /* (FC1P|FC2P|FC3P|FC4P|FC5P|FC6P) */
+#define MMCR2_EXPECTED_2 0x0020100804020000UL /* ( FC2P|FC3P|FC4P|FC5P|FC6P) */
+
+
+int cycles_with_mmcr2(void)
+{
+ struct event event;
+ uint64_t val, expected[2], actual;
+ int i;
+ bool bad_mmcr2;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ /* XXX Set of MMCR2 must be after enable */
+ expected[0] = MMCR2_EXPECTED_1;
+ expected[1] = MMCR2_EXPECTED_2;
+ i = 0;
+ bad_mmcr2 = false;
+
+ /* Make sure we loop until we take at least one EBB */
+ while ((ebb_state.stats.ebb_count < 20 && !bad_mmcr2) ||
+ ebb_state.stats.ebb_count < 1)
+ {
+ mtspr(SPRN_MMCR2, expected[i % 2]);
+
+ FAIL_IF(core_busy_loop());
+
+ val = mfspr(SPRN_MMCR2);
+ if (val != expected[i % 2]) {
+ bad_mmcr2 = true;
+ actual = val;
+ }
+
+ i++;
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ if (bad_mmcr2)
+ printf("Bad MMCR2 value seen is 0x%lx\n", actual);
+
+ FAIL_IF(bad_mmcr2);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(cycles_with_mmcr2, "cycles_with_mmcr2");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
new file mode 100644
index 000000000..2694ae161
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
@@ -0,0 +1,485 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE /* For CPU_ZERO etc. */
+
+#include <sched.h>
+#include <sys/wait.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "trace.h"
+#include "ebb.h"
+
+
+void (*ebb_user_func)(void);
+
+void ebb_hook(void)
+{
+ if (ebb_user_func)
+ ebb_user_func();
+}
+
+struct ebb_state ebb_state;
+
+u64 sample_period = 0x40000000ull;
+
+void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask)
+{
+ u64 val;
+
+ /* 2) clear MMCR0[PMAO] - docs say BESCR[PMEO] should do this */
+ /* 3) set MMCR0[PMAE] - docs say BESCR[PME] should do this */
+ val = mfspr(SPRN_MMCR0);
+ mtspr(SPRN_MMCR0, (val & ~mmcr0_clear_mask) | MMCR0_PMAE);
+
+ /* 4) clear BESCR[PMEO] */
+ mtspr(SPRN_BESCRR, BESCR_PMEO);
+
+ /* 5) set BESCR[PME] */
+ mtspr(SPRN_BESCRS, BESCR_PME);
+
+ /* 6) rfebb 1 - done in our caller */
+}
+
+void reset_ebb(void)
+{
+ reset_ebb_with_clear_mask(MMCR0_PMAO | MMCR0_FC);
+}
+
+/* Called outside of the EBB handler to check MMCR0 is sane */
+int ebb_check_mmcr0(void)
+{
+ u64 val;
+
+ val = mfspr(SPRN_MMCR0);
+ if ((val & (MMCR0_FC | MMCR0_PMAO)) == MMCR0_FC) {
+ /* It's OK if we see FC & PMAO, but not FC by itself */
+ printf("Outside of loop, only FC set 0x%llx\n", val);
+ return 1;
+ }
+
+ return 0;
+}
+
+bool ebb_check_count(int pmc, u64 sample_period, int fudge)
+{
+ u64 count, upper, lower;
+
+ count = ebb_state.stats.pmc_count[PMC_INDEX(pmc)];
+
+ lower = ebb_state.stats.ebb_count * (sample_period - fudge);
+
+ if (count < lower) {
+ printf("PMC%d count (0x%llx) below lower limit 0x%llx (-0x%llx)\n",
+ pmc, count, lower, lower - count);
+ return false;
+ }
+
+ upper = ebb_state.stats.ebb_count * (sample_period + fudge);
+
+ if (count > upper) {
+ printf("PMC%d count (0x%llx) above upper limit 0x%llx (+0x%llx)\n",
+ pmc, count, upper, count - upper);
+ return false;
+ }
+
+ printf("PMC%d count (0x%llx) is between 0x%llx and 0x%llx delta +0x%llx/-0x%llx\n",
+ pmc, count, lower, upper, count - lower, upper - count);
+
+ return true;
+}
+
+void standard_ebb_callee(void)
+{
+ int found, i;
+ u64 val;
+
+ val = mfspr(SPRN_BESCR);
+ if (!(val & BESCR_PMEO)) {
+ ebb_state.stats.spurious++;
+ goto out;
+ }
+
+ ebb_state.stats.ebb_count++;
+ trace_log_counter(ebb_state.trace, ebb_state.stats.ebb_count);
+
+ val = mfspr(SPRN_MMCR0);
+ trace_log_reg(ebb_state.trace, SPRN_MMCR0, val);
+
+ found = 0;
+ for (i = 1; i <= 6; i++) {
+ if (ebb_state.pmc_enable[PMC_INDEX(i)])
+ found += count_pmc(i, sample_period);
+ }
+
+ if (!found)
+ ebb_state.stats.no_overflow++;
+
+out:
+ reset_ebb();
+}
+
+extern void ebb_handler(void);
+
+void setup_ebb_handler(void (*callee)(void))
+{
+ u64 entry;
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+ entry = (u64)ebb_handler;
+#else
+ struct opd
+ {
+ u64 entry;
+ u64 toc;
+ } *opd;
+
+ opd = (struct opd *)ebb_handler;
+ entry = opd->entry;
+#endif
+ printf("EBB Handler is at %#llx\n", entry);
+
+ ebb_user_func = callee;
+
+ /* Ensure ebb_user_func is set before we set the handler */
+ mb();
+ mtspr(SPRN_EBBHR, entry);
+
+ /* Make sure the handler is set before we return */
+ mb();
+}
+
+void clear_ebb_stats(void)
+{
+ memset(&ebb_state.stats, 0, sizeof(ebb_state.stats));
+}
+
+void dump_summary_ebb_state(void)
+{
+ printf("ebb_state:\n" \
+ " ebb_count = %d\n" \
+ " spurious = %d\n" \
+ " negative = %d\n" \
+ " no_overflow = %d\n" \
+ " pmc[1] count = 0x%llx\n" \
+ " pmc[2] count = 0x%llx\n" \
+ " pmc[3] count = 0x%llx\n" \
+ " pmc[4] count = 0x%llx\n" \
+ " pmc[5] count = 0x%llx\n" \
+ " pmc[6] count = 0x%llx\n",
+ ebb_state.stats.ebb_count, ebb_state.stats.spurious,
+ ebb_state.stats.negative, ebb_state.stats.no_overflow,
+ ebb_state.stats.pmc_count[0], ebb_state.stats.pmc_count[1],
+ ebb_state.stats.pmc_count[2], ebb_state.stats.pmc_count[3],
+ ebb_state.stats.pmc_count[4], ebb_state.stats.pmc_count[5]);
+}
+
+static char *decode_mmcr0(u32 value)
+{
+ static char buf[16];
+
+ buf[0] = '\0';
+
+ if (value & (1 << 31))
+ strcat(buf, "FC ");
+ if (value & (1 << 26))
+ strcat(buf, "PMAE ");
+ if (value & (1 << 7))
+ strcat(buf, "PMAO ");
+
+ return buf;
+}
+
+static char *decode_bescr(u64 value)
+{
+ static char buf[16];
+
+ buf[0] = '\0';
+
+ if (value & (1ull << 63))
+ strcat(buf, "GE ");
+ if (value & (1ull << 32))
+ strcat(buf, "PMAE ");
+ if (value & 1)
+ strcat(buf, "PMAO ");
+
+ return buf;
+}
+
+void dump_ebb_hw_state(void)
+{
+ u64 bescr;
+ u32 mmcr0;
+
+ mmcr0 = mfspr(SPRN_MMCR0);
+ bescr = mfspr(SPRN_BESCR);
+
+ printf("HW state:\n" \
+ "MMCR0 0x%016x %s\n" \
+ "MMCR2 0x%016lx\n" \
+ "EBBHR 0x%016lx\n" \
+ "BESCR 0x%016llx %s\n" \
+ "PMC1 0x%016lx\n" \
+ "PMC2 0x%016lx\n" \
+ "PMC3 0x%016lx\n" \
+ "PMC4 0x%016lx\n" \
+ "PMC5 0x%016lx\n" \
+ "PMC6 0x%016lx\n" \
+ "SIAR 0x%016lx\n",
+ mmcr0, decode_mmcr0(mmcr0), mfspr(SPRN_MMCR2),
+ mfspr(SPRN_EBBHR), bescr, decode_bescr(bescr),
+ mfspr(SPRN_PMC1), mfspr(SPRN_PMC2), mfspr(SPRN_PMC3),
+ mfspr(SPRN_PMC4), mfspr(SPRN_PMC5), mfspr(SPRN_PMC6),
+ mfspr(SPRN_SIAR));
+}
+
+void dump_ebb_state(void)
+{
+ dump_summary_ebb_state();
+
+ dump_ebb_hw_state();
+
+ trace_buffer_print(ebb_state.trace);
+}
+
+int count_pmc(int pmc, uint32_t sample_period)
+{
+ uint32_t start_value;
+ u64 val;
+
+ /* 0) Read PMC */
+ start_value = pmc_sample_period(sample_period);
+
+ val = read_pmc(pmc);
+ if (val < start_value)
+ ebb_state.stats.negative++;
+ else
+ ebb_state.stats.pmc_count[PMC_INDEX(pmc)] += val - start_value;
+
+ trace_log_reg(ebb_state.trace, SPRN_PMC1 + pmc - 1, val);
+
+ /* 1) Reset PMC */
+ write_pmc(pmc, start_value);
+
+ /* Report if we overflowed */
+ return val >= COUNTER_OVERFLOW;
+}
+
+int ebb_event_enable(struct event *e)
+{
+ int rc;
+
+ /* Ensure any SPR writes are ordered vs us */
+ mb();
+
+ rc = ioctl(e->fd, PERF_EVENT_IOC_ENABLE);
+ if (rc)
+ return rc;
+
+ rc = event_read(e);
+
+ /* Ditto */
+ mb();
+
+ return rc;
+}
+
+void ebb_freeze_pmcs(void)
+{
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+ mb();
+}
+
+void ebb_unfreeze_pmcs(void)
+{
+ /* Unfreeze counters */
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+ mb();
+}
+
+void ebb_global_enable(void)
+{
+ /* Enable EBBs globally and PMU EBBs */
+ mtspr(SPRN_BESCR, 0x8000000100000000ull);
+ mb();
+}
+
+void ebb_global_disable(void)
+{
+ /* Disable EBBs & freeze counters, events are still scheduled */
+ mtspr(SPRN_BESCRR, BESCR_PME);
+ mb();
+}
+
+bool ebb_is_supported(void)
+{
+#ifdef PPC_FEATURE2_EBB
+ /* EBB requires at least POWER8 */
+ return have_hwcap2(PPC_FEATURE2_EBB);
+#else
+ return false;
+#endif
+}
+
+void event_ebb_init(struct event *e)
+{
+ e->attr.config |= (1ull << 63);
+}
+
+void event_bhrb_init(struct event *e, unsigned ifm)
+{
+ e->attr.config |= (1ull << 62) | ((u64)ifm << 60);
+}
+
+void event_leader_ebb_init(struct event *e)
+{
+ event_ebb_init(e);
+
+ e->attr.exclusive = 1;
+ e->attr.pinned = 1;
+}
+
+int ebb_child(union pipe read_pipe, union pipe write_pipe)
+{
+ struct event event;
+ uint64_t val;
+
+ FAIL_IF(wait_for_parent(read_pipe));
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(event_enable(&event));
+
+ if (event_read(&event)) {
+ /*
+ * Some tests expect to fail here, so don't report an error on
+ * this line, and return a distinguisable error code. Tell the
+ * parent an error happened.
+ */
+ notify_parent_of_error(write_pipe);
+ return 2;
+ }
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ FAIL_IF(notify_parent(write_pipe));
+ FAIL_IF(wait_for_parent(read_pipe));
+ FAIL_IF(notify_parent(write_pipe));
+
+ while (ebb_state.stats.ebb_count < 20) {
+ FAIL_IF(core_busy_loop());
+
+ /* To try and hit SIGILL case */
+ val = mfspr(SPRN_MMCRA);
+ val |= mfspr(SPRN_MMCR2);
+ val |= mfspr(SPRN_MMCR0);
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ return 0;
+}
+
+static jmp_buf setjmp_env;
+
+static void sigill_handler(int signal)
+{
+ printf("Took sigill\n");
+ longjmp(setjmp_env, 1);
+}
+
+static struct sigaction sigill_action = {
+ .sa_handler = sigill_handler,
+};
+
+int catch_sigill(void (*func)(void))
+{
+ if (sigaction(SIGILL, &sigill_action, NULL)) {
+ perror("sigaction");
+ return 1;
+ }
+
+ if (setjmp(setjmp_env) == 0) {
+ func();
+ return 1;
+ }
+
+ return 0;
+}
+
+void write_pmc1(void)
+{
+ mtspr(SPRN_PMC1, 0);
+}
+
+void write_pmc(int pmc, u64 value)
+{
+ switch (pmc) {
+ case 1: mtspr(SPRN_PMC1, value); break;
+ case 2: mtspr(SPRN_PMC2, value); break;
+ case 3: mtspr(SPRN_PMC3, value); break;
+ case 4: mtspr(SPRN_PMC4, value); break;
+ case 5: mtspr(SPRN_PMC5, value); break;
+ case 6: mtspr(SPRN_PMC6, value); break;
+ }
+}
+
+u64 read_pmc(int pmc)
+{
+ switch (pmc) {
+ case 1: return mfspr(SPRN_PMC1);
+ case 2: return mfspr(SPRN_PMC2);
+ case 3: return mfspr(SPRN_PMC3);
+ case 4: return mfspr(SPRN_PMC4);
+ case 5: return mfspr(SPRN_PMC5);
+ case 6: return mfspr(SPRN_PMC6);
+ }
+
+ return 0;
+}
+
+static void term_handler(int signal)
+{
+ dump_summary_ebb_state();
+ dump_ebb_hw_state();
+ abort();
+}
+
+struct sigaction term_action = {
+ .sa_handler = term_handler,
+};
+
+static void __attribute__((constructor)) ebb_init(void)
+{
+ clear_ebb_stats();
+
+ if (sigaction(SIGTERM, &term_action, NULL))
+ perror("sigaction");
+
+ ebb_state.trace = trace_buffer_allocate(1 * 1024 * 1024);
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
new file mode 100644
index 000000000..f87e761f8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EBB_EBB_H
+#define _SELFTESTS_POWERPC_PMU_EBB_EBB_H
+
+#include "../event.h"
+#include "../lib.h"
+#include "trace.h"
+#include "reg.h"
+
+#define PMC_INDEX(pmc) ((pmc)-1)
+
+#define NUM_PMC_VALUES 128
+
+struct ebb_state
+{
+ struct {
+ u64 pmc_count[6];
+ volatile int ebb_count;
+ int spurious;
+ int negative;
+ int no_overflow;
+ } stats;
+
+ bool pmc_enable[6];
+ struct trace_buffer *trace;
+};
+
+extern struct ebb_state ebb_state;
+
+#define COUNTER_OVERFLOW 0x80000000ull
+
+static inline uint32_t pmc_sample_period(uint32_t value)
+{
+ return COUNTER_OVERFLOW - value;
+}
+
+static inline void ebb_enable_pmc_counting(int pmc)
+{
+ ebb_state.pmc_enable[PMC_INDEX(pmc)] = true;
+}
+
+bool ebb_check_count(int pmc, u64 sample_period, int fudge);
+void event_leader_ebb_init(struct event *e);
+void event_ebb_init(struct event *e);
+void event_bhrb_init(struct event *e, unsigned ifm);
+void setup_ebb_handler(void (*callee)(void));
+void standard_ebb_callee(void);
+int ebb_event_enable(struct event *e);
+void ebb_global_enable(void);
+void ebb_global_disable(void);
+bool ebb_is_supported(void);
+void ebb_freeze_pmcs(void);
+void ebb_unfreeze_pmcs(void);
+void event_ebb_init(struct event *e);
+void event_leader_ebb_init(struct event *e);
+int count_pmc(int pmc, uint32_t sample_period);
+void dump_ebb_state(void);
+void dump_summary_ebb_state(void);
+void dump_ebb_hw_state(void);
+void clear_ebb_stats(void);
+void write_pmc(int pmc, u64 value);
+u64 read_pmc(int pmc);
+void reset_ebb_with_clear_mask(unsigned long mmcr0_clear_mask);
+void reset_ebb(void);
+int ebb_check_mmcr0(void);
+
+extern u64 sample_period;
+
+int core_busy_loop(void);
+int ebb_child(union pipe read_pipe, union pipe write_pipe);
+int catch_sigill(void (*func)(void));
+void write_pmc1(void);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EBB_EBB_H */
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S
new file mode 100644
index 000000000..14274ea20
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S
@@ -0,0 +1,365 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+#include "reg.h"
+
+
+/* ppc-asm.h defines most of the reg aliases, but not r1/r2. */
+#define r1 1
+#define r2 2
+
+#define RFEBB .long 0x4c000924
+
+/* Stack layout:
+ *
+ * ^
+ * User stack |
+ * Back chain ------+ <- r1 <-------+
+ * ... |
+ * Red zone / ABI Gap |
+ * ... |
+ * vr63 <+ |
+ * vr0 | |
+ * VSCR | |
+ * FSCR | |
+ * r31 | Save area |
+ * r0 | |
+ * XER | |
+ * CTR | |
+ * LR | |
+ * CCR <+ |
+ * ... <+ |
+ * LR | Caller frame |
+ * CCR | |
+ * Back chain <+ <- updated r1 --------+
+ *
+ */
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define ABIGAP 512
+#else
+#define ABIGAP 288
+#endif
+
+#define NR_GPR 32
+#define NR_SPR 6
+#define NR_VSR 64
+
+#define SAVE_AREA ((NR_GPR + NR_SPR) * 8 + (NR_VSR * 16))
+#define CALLER_FRAME 112
+
+#define STACK_FRAME (ABIGAP + SAVE_AREA + CALLER_FRAME)
+
+#define CCR_SAVE (CALLER_FRAME)
+#define LR_SAVE (CCR_SAVE + 8)
+#define CTR_SAVE (LR_SAVE + 8)
+#define XER_SAVE (CTR_SAVE + 8)
+#define GPR_SAVE(n) (XER_SAVE + 8 + (8 * n))
+#define FSCR_SAVE (GPR_SAVE(31) + 8)
+#define VSCR_SAVE (FSCR_SAVE + 8)
+#define VSR_SAVE(n) (VSCR_SAVE + 8 + (16 * n))
+
+#define SAVE_GPR(n) std n,GPR_SAVE(n)(r1)
+#define REST_GPR(n) ld n,GPR_SAVE(n)(r1)
+#define TRASH_GPR(n) lis n,0xaaaa
+
+#define SAVE_VSR(n, b) li b, VSR_SAVE(n); stxvd2x n,b,r1
+#define LOAD_VSR(n, b) li b, VSR_SAVE(n); lxvd2x n,b,r1
+
+#define LOAD_REG_IMMEDIATE(reg,expr) \
+ lis reg,(expr)@highest; \
+ ori reg,reg,(expr)@higher; \
+ rldicr reg,reg,32,31; \
+ oris reg,reg,(expr)@h; \
+ ori reg,reg,(expr)@l;
+
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define ENTRY_POINT(name) \
+ .type FUNC_NAME(name),@function; \
+ .globl FUNC_NAME(name); \
+ FUNC_NAME(name):
+
+#define RESTORE_TOC(name) \
+ /* Restore our TOC pointer using our entry point */ \
+ LOAD_REG_IMMEDIATE(r12, name) \
+0: addis r2,r12,(.TOC.-0b)@ha; \
+ addi r2,r2,(.TOC.-0b)@l;
+
+#else
+#define ENTRY_POINT(name) FUNC_START(name)
+#define RESTORE_TOC(name) \
+ /* Restore our TOC pointer via our opd entry */ \
+ LOAD_REG_IMMEDIATE(r2, name) \
+ ld r2,8(r2);
+#endif
+
+ .text
+
+ENTRY_POINT(ebb_handler)
+ stdu r1,-STACK_FRAME(r1)
+ SAVE_GPR(0)
+ mflr r0
+ std r0,LR_SAVE(r1)
+ mfcr r0
+ std r0,CCR_SAVE(r1)
+ mfctr r0
+ std r0,CTR_SAVE(r1)
+ mfxer r0
+ std r0,XER_SAVE(r1)
+ SAVE_GPR(2)
+ SAVE_GPR(3)
+ SAVE_GPR(4)
+ SAVE_GPR(5)
+ SAVE_GPR(6)
+ SAVE_GPR(7)
+ SAVE_GPR(8)
+ SAVE_GPR(9)
+ SAVE_GPR(10)
+ SAVE_GPR(11)
+ SAVE_GPR(12)
+ SAVE_GPR(13)
+ SAVE_GPR(14)
+ SAVE_GPR(15)
+ SAVE_GPR(16)
+ SAVE_GPR(17)
+ SAVE_GPR(18)
+ SAVE_GPR(19)
+ SAVE_GPR(20)
+ SAVE_GPR(21)
+ SAVE_GPR(22)
+ SAVE_GPR(23)
+ SAVE_GPR(24)
+ SAVE_GPR(25)
+ SAVE_GPR(26)
+ SAVE_GPR(27)
+ SAVE_GPR(28)
+ SAVE_GPR(29)
+ SAVE_GPR(30)
+ SAVE_GPR(31)
+ SAVE_VSR(0, r3)
+ mffs f0
+ stfd f0, FSCR_SAVE(r1)
+ mfvscr f0
+ stfd f0, VSCR_SAVE(r1)
+ SAVE_VSR(1, r3)
+ SAVE_VSR(2, r3)
+ SAVE_VSR(3, r3)
+ SAVE_VSR(4, r3)
+ SAVE_VSR(5, r3)
+ SAVE_VSR(6, r3)
+ SAVE_VSR(7, r3)
+ SAVE_VSR(8, r3)
+ SAVE_VSR(9, r3)
+ SAVE_VSR(10, r3)
+ SAVE_VSR(11, r3)
+ SAVE_VSR(12, r3)
+ SAVE_VSR(13, r3)
+ SAVE_VSR(14, r3)
+ SAVE_VSR(15, r3)
+ SAVE_VSR(16, r3)
+ SAVE_VSR(17, r3)
+ SAVE_VSR(18, r3)
+ SAVE_VSR(19, r3)
+ SAVE_VSR(20, r3)
+ SAVE_VSR(21, r3)
+ SAVE_VSR(22, r3)
+ SAVE_VSR(23, r3)
+ SAVE_VSR(24, r3)
+ SAVE_VSR(25, r3)
+ SAVE_VSR(26, r3)
+ SAVE_VSR(27, r3)
+ SAVE_VSR(28, r3)
+ SAVE_VSR(29, r3)
+ SAVE_VSR(30, r3)
+ SAVE_VSR(31, r3)
+ SAVE_VSR(32, r3)
+ SAVE_VSR(33, r3)
+ SAVE_VSR(34, r3)
+ SAVE_VSR(35, r3)
+ SAVE_VSR(36, r3)
+ SAVE_VSR(37, r3)
+ SAVE_VSR(38, r3)
+ SAVE_VSR(39, r3)
+ SAVE_VSR(40, r3)
+ SAVE_VSR(41, r3)
+ SAVE_VSR(42, r3)
+ SAVE_VSR(43, r3)
+ SAVE_VSR(44, r3)
+ SAVE_VSR(45, r3)
+ SAVE_VSR(46, r3)
+ SAVE_VSR(47, r3)
+ SAVE_VSR(48, r3)
+ SAVE_VSR(49, r3)
+ SAVE_VSR(50, r3)
+ SAVE_VSR(51, r3)
+ SAVE_VSR(52, r3)
+ SAVE_VSR(53, r3)
+ SAVE_VSR(54, r3)
+ SAVE_VSR(55, r3)
+ SAVE_VSR(56, r3)
+ SAVE_VSR(57, r3)
+ SAVE_VSR(58, r3)
+ SAVE_VSR(59, r3)
+ SAVE_VSR(60, r3)
+ SAVE_VSR(61, r3)
+ SAVE_VSR(62, r3)
+ SAVE_VSR(63, r3)
+
+ TRASH_GPR(2)
+ TRASH_GPR(3)
+ TRASH_GPR(4)
+ TRASH_GPR(5)
+ TRASH_GPR(6)
+ TRASH_GPR(7)
+ TRASH_GPR(8)
+ TRASH_GPR(9)
+ TRASH_GPR(10)
+ TRASH_GPR(11)
+ TRASH_GPR(12)
+ TRASH_GPR(14)
+ TRASH_GPR(15)
+ TRASH_GPR(16)
+ TRASH_GPR(17)
+ TRASH_GPR(18)
+ TRASH_GPR(19)
+ TRASH_GPR(20)
+ TRASH_GPR(21)
+ TRASH_GPR(22)
+ TRASH_GPR(23)
+ TRASH_GPR(24)
+ TRASH_GPR(25)
+ TRASH_GPR(26)
+ TRASH_GPR(27)
+ TRASH_GPR(28)
+ TRASH_GPR(29)
+ TRASH_GPR(30)
+ TRASH_GPR(31)
+
+ RESTORE_TOC(ebb_handler)
+
+ /*
+ * r13 is our TLS pointer. We leave whatever value was in there when the
+ * EBB fired. That seems to be OK because once set the TLS pointer is not
+ * changed - but presumably that could change in future.
+ */
+
+ bl ebb_hook
+ nop
+
+ /* r2 may be changed here but we don't care */
+
+ lfd f0, FSCR_SAVE(r1)
+ mtfsf 0xff,f0
+ lfd f0, VSCR_SAVE(r1)
+ mtvscr f0
+ LOAD_VSR(0, r3)
+ LOAD_VSR(1, r3)
+ LOAD_VSR(2, r3)
+ LOAD_VSR(3, r3)
+ LOAD_VSR(4, r3)
+ LOAD_VSR(5, r3)
+ LOAD_VSR(6, r3)
+ LOAD_VSR(7, r3)
+ LOAD_VSR(8, r3)
+ LOAD_VSR(9, r3)
+ LOAD_VSR(10, r3)
+ LOAD_VSR(11, r3)
+ LOAD_VSR(12, r3)
+ LOAD_VSR(13, r3)
+ LOAD_VSR(14, r3)
+ LOAD_VSR(15, r3)
+ LOAD_VSR(16, r3)
+ LOAD_VSR(17, r3)
+ LOAD_VSR(18, r3)
+ LOAD_VSR(19, r3)
+ LOAD_VSR(20, r3)
+ LOAD_VSR(21, r3)
+ LOAD_VSR(22, r3)
+ LOAD_VSR(23, r3)
+ LOAD_VSR(24, r3)
+ LOAD_VSR(25, r3)
+ LOAD_VSR(26, r3)
+ LOAD_VSR(27, r3)
+ LOAD_VSR(28, r3)
+ LOAD_VSR(29, r3)
+ LOAD_VSR(30, r3)
+ LOAD_VSR(31, r3)
+ LOAD_VSR(32, r3)
+ LOAD_VSR(33, r3)
+ LOAD_VSR(34, r3)
+ LOAD_VSR(35, r3)
+ LOAD_VSR(36, r3)
+ LOAD_VSR(37, r3)
+ LOAD_VSR(38, r3)
+ LOAD_VSR(39, r3)
+ LOAD_VSR(40, r3)
+ LOAD_VSR(41, r3)
+ LOAD_VSR(42, r3)
+ LOAD_VSR(43, r3)
+ LOAD_VSR(44, r3)
+ LOAD_VSR(45, r3)
+ LOAD_VSR(46, r3)
+ LOAD_VSR(47, r3)
+ LOAD_VSR(48, r3)
+ LOAD_VSR(49, r3)
+ LOAD_VSR(50, r3)
+ LOAD_VSR(51, r3)
+ LOAD_VSR(52, r3)
+ LOAD_VSR(53, r3)
+ LOAD_VSR(54, r3)
+ LOAD_VSR(55, r3)
+ LOAD_VSR(56, r3)
+ LOAD_VSR(57, r3)
+ LOAD_VSR(58, r3)
+ LOAD_VSR(59, r3)
+ LOAD_VSR(60, r3)
+ LOAD_VSR(61, r3)
+ LOAD_VSR(62, r3)
+ LOAD_VSR(63, r3)
+
+ ld r0,XER_SAVE(r1)
+ mtxer r0
+ ld r0,CTR_SAVE(r1)
+ mtctr r0
+ ld r0,LR_SAVE(r1)
+ mtlr r0
+ ld r0,CCR_SAVE(r1)
+ mtcr r0
+ REST_GPR(0)
+ REST_GPR(2)
+ REST_GPR(3)
+ REST_GPR(4)
+ REST_GPR(5)
+ REST_GPR(6)
+ REST_GPR(7)
+ REST_GPR(8)
+ REST_GPR(9)
+ REST_GPR(10)
+ REST_GPR(11)
+ REST_GPR(12)
+ REST_GPR(13)
+ REST_GPR(14)
+ REST_GPR(15)
+ REST_GPR(16)
+ REST_GPR(17)
+ REST_GPR(18)
+ REST_GPR(19)
+ REST_GPR(20)
+ REST_GPR(21)
+ REST_GPR(22)
+ REST_GPR(23)
+ REST_GPR(24)
+ REST_GPR(25)
+ REST_GPR(26)
+ REST_GPR(27)
+ REST_GPR(28)
+ REST_GPR(29)
+ REST_GPR(30)
+ REST_GPR(31)
+ addi r1,r1,STACK_FRAME
+ RFEBB
+FUNC_END(ebb_handler)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c
new file mode 100644
index 000000000..1e7b7fe23
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests we can setup an EBB on our child. Nothing interesting happens, because
+ * even though the event is enabled and running the child hasn't enabled the
+ * actual delivery of the EBBs.
+ */
+
+static int victim_child(union pipe read_pipe, union pipe write_pipe)
+{
+ int i;
+
+ FAIL_IF(wait_for_parent(read_pipe));
+ FAIL_IF(notify_parent(write_pipe));
+
+ /* Parent creates EBB event */
+
+ FAIL_IF(wait_for_parent(read_pipe));
+ FAIL_IF(notify_parent(write_pipe));
+
+ /* Check the EBB is enabled by writing PMC1 */
+ write_pmc1();
+
+ /* EBB event is enabled here */
+ for (i = 0; i < 1000000; i++) ;
+
+ return 0;
+}
+
+int ebb_on_child(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ pid_t pid;
+
+ SKIP_IF(!ebb_is_supported());
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(victim_child(write_pipe, read_pipe));
+ }
+
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ /* Child is running now */
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open_with_pid(&event, pid));
+ FAIL_IF(ebb_event_enable(&event));
+
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ /* Child should just exit happily */
+ FAIL_IF(wait_for_child(pid));
+
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(ebb_on_child, "ebb_on_child");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
new file mode 100644
index 000000000..174e4f4da
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests we can setup an EBB on our child. The child expects this and enables
+ * EBBs, which are then delivered to the child, even though the event is
+ * created by the parent.
+ */
+
+static int victim_child(union pipe read_pipe, union pipe write_pipe)
+{
+ FAIL_IF(wait_for_parent(read_pipe));
+
+ /* Setup our EBB handler, before the EBB event is created */
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(notify_parent(write_pipe));
+
+ while (ebb_state.stats.ebb_count < 20) {
+ FAIL_IF(core_busy_loop());
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ return 0;
+}
+
+/* Tests we can setup an EBB on our child - if it's expecting it */
+int ebb_on_willing_child(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ pid_t pid;
+
+ SKIP_IF(!ebb_is_supported());
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(victim_child(write_pipe, read_pipe));
+ }
+
+ /* Signal the child to setup its EBB handler */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ /* Child is running now */
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open_with_pid(&event, pid));
+ FAIL_IF(ebb_event_enable(&event));
+
+ /* Child show now take EBBs and then exit */
+ FAIL_IF(wait_for_child(pid));
+
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(ebb_on_willing_child, "ebb_on_willing_child");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
new file mode 100644
index 000000000..af20a2b36
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests an EBB vs a cpu event - in that order. The EBB should force the cpu
+ * event off the PMU.
+ */
+
+static int setup_cpu_event(struct event *event, int cpu)
+{
+ event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+ event->attr.exclude_kernel = 1;
+ event->attr.exclude_hv = 1;
+ event->attr.exclude_idle = 1;
+
+ SKIP_IF(require_paranoia_below(1));
+ FAIL_IF(event_open_with_cpu(event, cpu));
+ FAIL_IF(event_enable(event));
+
+ return 0;
+}
+
+int ebb_vs_cpu_event(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ int cpu, rc;
+ pid_t pid;
+
+ SKIP_IF(!ebb_is_supported());
+
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+ FAIL_IF(bind_to_cpu(cpu));
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(ebb_child(write_pipe, read_pipe));
+ }
+
+ /* Signal the child to install its EBB event and wait */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ /* Now try to install our CPU event */
+ rc = setup_cpu_event(&event, cpu);
+ if (rc) {
+ kill_child_and_wait(pid);
+ return rc;
+ }
+
+ /* Signal the child to run */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ /* .. and wait for it to complete */
+ FAIL_IF(wait_for_child(pid));
+ FAIL_IF(event_disable(&event));
+ FAIL_IF(event_read(&event));
+
+ event_report(&event);
+
+ /* The cpu event may have run, but we don't expect 100% */
+ FAIL_IF(event.result.enabled >= event.result.running);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(ebb_vs_cpu_event, "ebb_vs_cpu_event");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c b/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c
new file mode 100644
index 000000000..7762ab26e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test various attributes of the EBB event are enforced.
+ */
+int event_attributes(void)
+{
+ struct event event, leader;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init(&event, 0x1001e);
+ event_leader_ebb_init(&event);
+ /* Expected to succeed */
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+
+ event_init(&event, 0x001e); /* CYCLES - no PMC specified */
+ event_leader_ebb_init(&event);
+ /* Expected to fail, no PMC specified */
+ FAIL_IF(event_open(&event) == 0);
+
+
+ event_init(&event, 0x2001e);
+ event_leader_ebb_init(&event);
+ event.attr.exclusive = 0;
+ /* Expected to fail, not exclusive */
+ FAIL_IF(event_open(&event) == 0);
+
+
+ event_init(&event, 0x3001e);
+ event_leader_ebb_init(&event);
+ event.attr.freq = 1;
+ /* Expected to fail, sets freq */
+ FAIL_IF(event_open(&event) == 0);
+
+
+ event_init(&event, 0x4001e);
+ event_leader_ebb_init(&event);
+ event.attr.sample_period = 1;
+ /* Expected to fail, sets sample_period */
+ FAIL_IF(event_open(&event) == 0);
+
+
+ event_init(&event, 0x1001e);
+ event_leader_ebb_init(&event);
+ event.attr.enable_on_exec = 1;
+ /* Expected to fail, sets enable_on_exec */
+ FAIL_IF(event_open(&event) == 0);
+
+
+ event_init(&event, 0x1001e);
+ event_leader_ebb_init(&event);
+ event.attr.inherit = 1;
+ /* Expected to fail, sets inherit */
+ FAIL_IF(event_open(&event) == 0);
+
+
+ event_init(&leader, 0x1001e);
+ event_leader_ebb_init(&leader);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, 0x20002);
+ event_ebb_init(&event);
+
+ /* Expected to succeed */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+ event_close(&leader);
+ event_close(&event);
+
+
+ event_init(&leader, 0x1001e);
+ event_leader_ebb_init(&leader);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, 0x20002);
+
+ /* Expected to fail, event doesn't request EBB, leader does */
+ FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
+ event_close(&leader);
+
+
+ event_init(&leader, 0x1001e);
+ event_leader_ebb_init(&leader);
+ /* Clear the EBB flag */
+ leader.attr.config &= ~(1ull << 63);
+
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, 0x20002);
+ event_ebb_init(&event);
+
+ /* Expected to fail, leader doesn't request EBB */
+ FAIL_IF(event_open_with_group(&event, leader.fd) == 0);
+ event_close(&leader);
+
+
+ event_init(&leader, 0x1001e);
+ event_leader_ebb_init(&leader);
+ leader.attr.exclusive = 0;
+ /* Expected to fail, leader isn't exclusive */
+ FAIL_IF(event_open(&leader) == 0);
+
+
+ event_init(&leader, 0x1001e);
+ event_leader_ebb_init(&leader);
+ leader.attr.pinned = 0;
+ /* Expected to fail, leader isn't pinned */
+ FAIL_IF(event_open(&leader) == 0);
+
+ event_init(&event, 0x1001e);
+ event_leader_ebb_init(&event);
+ /* Expected to fail, not a task event */
+ SKIP_IF(require_paranoia_below(1));
+ FAIL_IF(event_open_with_cpu(&event, 0) == 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(event_attributes, "event_attributes");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
new file mode 100644
index 000000000..b866a0581
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+ .text
+
+FUNC_START(thirty_two_instruction_loop)
+ cmpwi r3,0
+ beqlr
+ addi r4,r3,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1 # 28 addi's
+ subi r3,r3,1
+ b FUNC_NAME(thirty_two_instruction_loop)
+FUNC_END(thirty_two_instruction_loop)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c
new file mode 100644
index 000000000..167135bd9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that a fork clears the PMU state of the child. eg. BESCR/EBBHR/EBBRR
+ * are cleared, and MMCR0_PMCC is reset, preventing the child from accessing
+ * the PMU.
+ */
+
+static struct event event;
+
+static int child(void)
+{
+ /* Even though we have EBE=0 we can still see the EBB regs */
+ FAIL_IF(mfspr(SPRN_BESCR) != 0);
+ FAIL_IF(mfspr(SPRN_EBBHR) != 0);
+ FAIL_IF(mfspr(SPRN_EBBRR) != 0);
+
+ FAIL_IF(catch_sigill(write_pmc1));
+
+ /* We can still read from the event, though it is on our parent */
+ FAIL_IF(event_read(&event));
+
+ return 0;
+}
+
+/* Tests that fork clears EBB state */
+int fork_cleanup(void)
+{
+ pid_t pid;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_MMCR0, MMCR0_FC);
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ /* Don't need to actually take any EBBs */
+
+ pid = fork();
+ if (pid == 0)
+ exit(child());
+
+ /* Child does the actual testing */
+ FAIL_IF(wait_for_child(pid));
+
+ /* After fork */
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(fork_cleanup, "fork_cleanup");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c
new file mode 100644
index 000000000..35a3426e3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "ebb.h"
+
+
+/*
+ * Run a calibrated instruction loop and count instructions executed using
+ * EBBs. Make sure the counts look right.
+ */
+
+extern void thirty_two_instruction_loop(uint64_t loops);
+
+static bool counters_frozen = true;
+
+static int do_count_loop(struct event *event, uint64_t instructions,
+ uint64_t overhead, bool report)
+{
+ int64_t difference, expected;
+ double percentage;
+
+ clear_ebb_stats();
+
+ counters_frozen = false;
+ mb();
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+
+ thirty_two_instruction_loop(instructions >> 5);
+
+ counters_frozen = true;
+ mb();
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
+
+ count_pmc(4, sample_period);
+
+ event->result.value = ebb_state.stats.pmc_count[4-1];
+ expected = instructions + overhead;
+ difference = event->result.value - expected;
+ percentage = (double)difference / event->result.value * 100;
+
+ if (report) {
+ printf("Looped for %lu instructions, overhead %lu\n", instructions, overhead);
+ printf("Expected %lu\n", expected);
+ printf("Actual %llu\n", event->result.value);
+ printf("Delta %ld, %f%%\n", difference, percentage);
+ printf("Took %d EBBs\n", ebb_state.stats.ebb_count);
+ }
+
+ if (difference < 0)
+ difference = -difference;
+
+ /* Tolerate a difference of up to 0.0001 % */
+ difference *= 10000 * 100;
+ if (difference / event->result.value)
+ return -1;
+
+ return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static uint64_t determine_overhead(struct event *event)
+{
+ uint64_t current, overhead;
+ int i;
+
+ do_count_loop(event, 0, 0, false);
+ overhead = event->result.value;
+
+ for (i = 0; i < 100; i++) {
+ do_count_loop(event, 0, 0, false);
+ current = event->result.value;
+ if (current < overhead) {
+ printf("Replacing overhead %lu with %lu\n", overhead, current);
+ overhead = current;
+ }
+ }
+
+ return overhead;
+}
+
+static void pmc4_ebb_callee(void)
+{
+ uint64_t val;
+
+ val = mfspr(SPRN_BESCR);
+ if (!(val & BESCR_PMEO)) {
+ ebb_state.stats.spurious++;
+ goto out;
+ }
+
+ ebb_state.stats.ebb_count++;
+ count_pmc(4, sample_period);
+out:
+ if (counters_frozen)
+ reset_ebb_with_clear_mask(MMCR0_PMAO);
+ else
+ reset_ebb();
+}
+
+int instruction_count(void)
+{
+ struct event event;
+ uint64_t overhead;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x400FA, "PM_RUN_INST_CMPL");
+ event_leader_ebb_init(&event);
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+ FAIL_IF(ebb_event_enable(&event));
+
+ sample_period = COUNTER_OVERFLOW;
+
+ setup_ebb_handler(pmc4_ebb_callee);
+ mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) & ~MMCR0_FC);
+ ebb_global_enable();
+
+ overhead = determine_overhead(&event);
+ printf("Overhead of null loop: %lu instructions\n", overhead);
+
+ /* Run for 1M instructions */
+ FAIL_IF(do_count_loop(&event, 0x100000, overhead, true));
+
+ /* Run for 10M instructions */
+ FAIL_IF(do_count_loop(&event, 0xa00000, overhead, true));
+
+ /* Run for 100M instructions */
+ FAIL_IF(do_count_loop(&event, 0x6400000, overhead, true));
+
+ /* Run for 1G instructions */
+ FAIL_IF(do_count_loop(&event, 0x40000000, overhead, true));
+
+ /* Run for 16G instructions */
+ FAIL_IF(do_count_loop(&event, 0x400000000, overhead, true));
+
+ /* Run for 64G instructions */
+ FAIL_IF(do_count_loop(&event, 0x1000000000, overhead, true));
+
+ /* Run for 128G instructions */
+ FAIL_IF(do_count_loop(&event, 0x2000000000, overhead, true));
+
+ ebb_global_disable();
+ event_close(&event);
+
+ printf("Finished OK\n");
+
+ return 0;
+}
+
+int main(void)
+{
+ test_harness_set_timeout(300);
+ return test_harness(instruction_count, "instruction_count");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
new file mode 100644
index 000000000..dddb95938
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that tries to trigger CPU_FTR_PMAO_BUG. Which is a hardware defect
+ * where an exception triggers but we context switch before it is delivered and
+ * lose the exception.
+ */
+
+static int test_body(void)
+{
+ int i, orig_period, max_period;
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ /* We use PMC4 to make sure the kernel switches all counters correctly */
+ event_init_named(&event, 0x40002, "instructions");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(4);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+ FAIL_IF(ebb_event_enable(&event));
+
+ /*
+ * We want a low sample period, but we also want to get out of the EBB
+ * handler without tripping up again.
+ *
+ * This value picked after much experimentation.
+ */
+ orig_period = max_period = sample_period = 400;
+
+ mtspr(SPRN_PMC4, pmc_sample_period(sample_period));
+
+ while (ebb_state.stats.ebb_count < 1000000) {
+ /*
+ * We are trying to get the EBB exception to race exactly with
+ * us entering the kernel to do the syscall. We then need the
+ * kernel to decide our timeslice is up and context switch to
+ * the other thread. When we come back our EBB will have been
+ * lost and we'll spin in this while loop forever.
+ */
+
+ for (i = 0; i < 100000; i++)
+ sched_yield();
+
+ /* Change the sample period slightly to try and hit the race */
+ if (sample_period >= (orig_period + 200))
+ sample_period = orig_period;
+ else
+ sample_period++;
+
+ if (sample_period > max_period)
+ max_period = sample_period;
+ }
+
+ ebb_freeze_pmcs();
+ ebb_global_disable();
+
+ mtspr(SPRN_PMC4, 0xdead);
+
+ dump_summary_ebb_state();
+ dump_ebb_hw_state();
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ /* We vary our sample period so we need extra fudge here */
+ FAIL_IF(!ebb_check_count(4, orig_period, 2 * (max_period - orig_period)));
+
+ return 0;
+}
+
+static int lost_exception(void)
+{
+ return eat_cpu(test_body);
+}
+
+int main(void)
+{
+ test_harness_set_timeout(300);
+ return test_harness(lost_exception, "lost_exception");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
new file mode 100644
index 000000000..035c02273
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
@@ -0,0 +1,86 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test counting multiple events using EBBs.
+ */
+int multi_counter(void)
+{
+ struct event events[6];
+ int i, group_fd;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&events[0], 0x1001C, "PM_CMPLU_STALL_THRD");
+ event_init_named(&events[1], 0x2D016, "PM_CMPLU_STALL_FXU");
+ event_init_named(&events[2], 0x30006, "PM_CMPLU_STALL_OTHER_CMPL");
+ event_init_named(&events[3], 0x4000A, "PM_CMPLU_STALL");
+ event_init_named(&events[4], 0x600f4, "PM_RUN_CYC");
+ event_init_named(&events[5], 0x500fa, "PM_RUN_INST_CMPL");
+
+ event_leader_ebb_init(&events[0]);
+ for (i = 1; i < 6; i++)
+ event_ebb_init(&events[i]);
+
+ group_fd = -1;
+ for (i = 0; i < 6; i++) {
+ events[i].attr.exclude_kernel = 1;
+ events[i].attr.exclude_hv = 1;
+ events[i].attr.exclude_idle = 1;
+
+ FAIL_IF(event_open_with_group(&events[i], group_fd));
+ if (group_fd == -1)
+ group_fd = events[0].fd;
+ }
+
+ ebb_enable_pmc_counting(1);
+ ebb_enable_pmc_counting(2);
+ ebb_enable_pmc_counting(3);
+ ebb_enable_pmc_counting(4);
+ ebb_enable_pmc_counting(5);
+ ebb_enable_pmc_counting(6);
+ setup_ebb_handler(standard_ebb_callee);
+
+ FAIL_IF(ioctl(events[0].fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP));
+ FAIL_IF(event_read(&events[0]));
+
+ ebb_global_enable();
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+ mtspr(SPRN_PMC2, pmc_sample_period(sample_period));
+ mtspr(SPRN_PMC3, pmc_sample_period(sample_period));
+ mtspr(SPRN_PMC4, pmc_sample_period(sample_period));
+ mtspr(SPRN_PMC5, pmc_sample_period(sample_period));
+ mtspr(SPRN_PMC6, pmc_sample_period(sample_period));
+
+ while (ebb_state.stats.ebb_count < 50) {
+ FAIL_IF(core_busy_loop());
+ FAIL_IF(ebb_check_mmcr0());
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ for (i = 0; i < 6; i++)
+ event_close(&events[i]);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(multi_counter, "multi_counter");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
new file mode 100644
index 000000000..3e9d4ac96
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
@@ -0,0 +1,109 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test running multiple EBB using processes at once on a single CPU. They
+ * should all run happily without interfering with each other.
+ */
+
+static bool child_should_exit;
+
+static void sigint_handler(int signal)
+{
+ child_should_exit = true;
+}
+
+struct sigaction sigint_action = {
+ .sa_handler = sigint_handler,
+};
+
+static int cycles_child(void)
+{
+ struct event event;
+
+ if (sigaction(SIGINT, &sigint_action, NULL)) {
+ perror("sigaction");
+ return 1;
+ }
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ while (!child_should_exit) {
+ FAIL_IF(core_busy_loop());
+ FAIL_IF(ebb_check_mmcr0());
+ }
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_summary_ebb_state();
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ return 0;
+}
+
+#define NR_CHILDREN 4
+
+int multi_ebb_procs(void)
+{
+ pid_t pids[NR_CHILDREN];
+ int cpu, rc, i;
+
+ SKIP_IF(!ebb_is_supported());
+
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+ FAIL_IF(bind_to_cpu(cpu));
+
+ for (i = 0; i < NR_CHILDREN; i++) {
+ pids[i] = fork();
+ if (pids[i] == 0)
+ exit(cycles_child());
+ }
+
+ /* Have them all run for "a while" */
+ sleep(10);
+
+ rc = 0;
+ for (i = 0; i < NR_CHILDREN; i++) {
+ /* Tell them to stop */
+ kill(pids[i], SIGINT);
+ /* And wait */
+ rc |= wait_for_child(pids[i]);
+ }
+
+ return rc;
+}
+
+int main(void)
+{
+ return test_harness(multi_ebb_procs, "multi_ebb_procs");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
new file mode 100644
index 000000000..87630d44f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/* Test that things work sanely if we have no handler */
+
+static int no_handler_test(void)
+{
+ struct event event;
+ u64 val;
+ int i;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+ FAIL_IF(ebb_event_enable(&event));
+
+ val = mfspr(SPRN_EBBHR);
+ FAIL_IF(val != 0);
+
+ /* Make sure it overflows quickly */
+ sample_period = 1000;
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ /* Spin to make sure the event has time to overflow */
+ for (i = 0; i < 1000; i++)
+ mb();
+
+ dump_ebb_state();
+
+ /* We expect to see the PMU frozen & PMAO set */
+ val = mfspr(SPRN_MMCR0);
+ FAIL_IF(val != 0x0000000080000080);
+
+ event_close(&event);
+
+ /* The real test is that we never took an EBB at 0x0 */
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(no_handler_test,"no_handler_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
new file mode 100644
index 000000000..d90891fe9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that the kernel properly handles PMAE across context switches.
+ *
+ * We test this by calling into the kernel inside our EBB handler, where PMAE
+ * is clear. A cpu eater companion thread is running on the same CPU as us to
+ * encourage the scheduler to switch us.
+ *
+ * The kernel must make sure that when it context switches us back in, it
+ * honours the fact that we had PMAE clear.
+ *
+ * Observed to hit the failing case on the first EBB with a broken kernel.
+ */
+
+static bool mmcr0_mismatch;
+static uint64_t before, after;
+
+static void syscall_ebb_callee(void)
+{
+ uint64_t val;
+
+ val = mfspr(SPRN_BESCR);
+ if (!(val & BESCR_PMEO)) {
+ ebb_state.stats.spurious++;
+ goto out;
+ }
+
+ ebb_state.stats.ebb_count++;
+ count_pmc(1, sample_period);
+
+ before = mfspr(SPRN_MMCR0);
+
+ /* Try and get ourselves scheduled, to force a PMU context switch */
+ sched_yield();
+
+ after = mfspr(SPRN_MMCR0);
+ if (before != after)
+ mmcr0_mismatch = true;
+
+out:
+ reset_ebb();
+}
+
+static int test_body(void)
+{
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ setup_ebb_handler(syscall_ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ while (ebb_state.stats.ebb_count < 20 && !mmcr0_mismatch)
+ FAIL_IF(core_busy_loop());
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ if (mmcr0_mismatch)
+ printf("Saw MMCR0 before 0x%lx after 0x%lx\n", before, after);
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+ FAIL_IF(mmcr0_mismatch);
+
+ return 0;
+}
+
+int pmae_handling(void)
+{
+ return eat_cpu(test_body);
+}
+
+int main(void)
+{
+ return test_harness(pmae_handling, "pmae_handling");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
new file mode 100644
index 000000000..8ca92b9ee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that PMC5 & 6 are frozen (ie. don't overflow) when they are not being
+ * used. Tests the MMCR0_FC56 logic in the kernel.
+ */
+
+static int pmc56_overflowed;
+
+static void ebb_callee(void)
+{
+ uint64_t val;
+
+ val = mfspr(SPRN_BESCR);
+ if (!(val & BESCR_PMEO)) {
+ ebb_state.stats.spurious++;
+ goto out;
+ }
+
+ ebb_state.stats.ebb_count++;
+ count_pmc(2, sample_period);
+
+ val = mfspr(SPRN_PMC5);
+ if (val >= COUNTER_OVERFLOW)
+ pmc56_overflowed++;
+
+ count_pmc(5, COUNTER_OVERFLOW);
+
+ val = mfspr(SPRN_PMC6);
+ if (val >= COUNTER_OVERFLOW)
+ pmc56_overflowed++;
+
+ count_pmc(6, COUNTER_OVERFLOW);
+
+out:
+ reset_ebb();
+}
+
+int pmc56_overflow(void)
+{
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ /* Use PMC2 so we set PMCjCE, which enables PMC5/6 */
+ event_init(&event, 0x2001e);
+ event_leader_ebb_init(&event);
+
+ event.attr.exclude_kernel = 1;
+ event.attr.exclude_hv = 1;
+ event.attr.exclude_idle = 1;
+
+ FAIL_IF(event_open(&event));
+
+ setup_ebb_handler(ebb_callee);
+ ebb_global_enable();
+
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC2, pmc_sample_period(sample_period));
+ mtspr(SPRN_PMC5, 0);
+ mtspr(SPRN_PMC6, 0);
+
+ while (ebb_state.stats.ebb_count < 10)
+ FAIL_IF(core_busy_loop());
+
+ ebb_global_disable();
+ ebb_freeze_pmcs();
+
+ dump_ebb_state();
+
+ printf("PMC5/6 overflow %d\n", pmc56_overflowed);
+
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0 || pmc56_overflowed != 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(pmc56_overflow, "pmc56_overflow");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c
new file mode 100644
index 000000000..f923228bc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test basic access to the EBB regs, they should be user accessible with no
+ * kernel interaction required.
+ */
+int reg_access(void)
+{
+ uint64_t val, expected;
+
+ SKIP_IF(!ebb_is_supported());
+
+ expected = 0x8000000100000000ull;
+ mtspr(SPRN_BESCR, expected);
+ val = mfspr(SPRN_BESCR);
+
+ FAIL_IF(val != expected);
+
+ expected = 0x0000000001000000ull;
+ mtspr(SPRN_EBBHR, expected);
+ val = mfspr(SPRN_EBBHR);
+
+ FAIL_IF(val != expected);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(reg_access, "reg_access");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c
new file mode 100644
index 000000000..1846f4e84
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a pinned per-task event vs an EBB - in that order. The pinned per-task
+ * event should prevent the EBB event from being enabled.
+ */
+
+static int setup_child_event(struct event *event, pid_t child_pid)
+{
+ event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+ event->attr.pinned = 1;
+
+ event->attr.exclude_kernel = 1;
+ event->attr.exclude_hv = 1;
+ event->attr.exclude_idle = 1;
+
+ FAIL_IF(event_open_with_pid(event, child_pid));
+ FAIL_IF(event_enable(event));
+
+ return 0;
+}
+
+int task_event_pinned_vs_ebb(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ pid_t pid;
+ int rc;
+
+ SKIP_IF(!ebb_is_supported());
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(ebb_child(write_pipe, read_pipe));
+ }
+
+ /* We setup the task event first */
+ rc = setup_child_event(&event, pid);
+ if (rc) {
+ kill_child_and_wait(pid);
+ return rc;
+ }
+
+ /* Signal the child to install its EBB event and wait */
+ if (sync_with_child(read_pipe, write_pipe))
+ /* If it fails, wait for it to exit */
+ goto wait;
+
+ /* Signal the child to run */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+ /* We expect it to fail to read the event */
+ FAIL_IF(wait_for_child(pid) != 2);
+ FAIL_IF(event_disable(&event));
+ FAIL_IF(event_read(&event));
+
+ event_report(&event);
+
+ FAIL_IF(event.result.value == 0);
+ /*
+ * For reasons I don't understand enabled is usually just slightly
+ * lower than running. Would be good to confirm why.
+ */
+ FAIL_IF(event.result.enabled == 0);
+ FAIL_IF(event.result.running == 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(task_event_pinned_vs_ebb, "task_event_pinned_vs_ebb");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c
new file mode 100644
index 000000000..e3bc6e92a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "ebb.h"
+
+
+/*
+ * Tests a per-task event vs an EBB - in that order. The EBB should push the
+ * per-task event off the PMU.
+ */
+
+static int setup_child_event(struct event *event, pid_t child_pid)
+{
+ event_init_named(event, 0x400FA, "PM_RUN_INST_CMPL");
+
+ event->attr.exclude_kernel = 1;
+ event->attr.exclude_hv = 1;
+ event->attr.exclude_idle = 1;
+
+ FAIL_IF(event_open_with_pid(event, child_pid));
+ FAIL_IF(event_enable(event));
+
+ return 0;
+}
+
+int task_event_vs_ebb(void)
+{
+ union pipe read_pipe, write_pipe;
+ struct event event;
+ pid_t pid;
+ int rc;
+
+ SKIP_IF(!ebb_is_supported());
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ /* NB order of pipes looks reversed */
+ exit(ebb_child(write_pipe, read_pipe));
+ }
+
+ /* We setup the task event first */
+ rc = setup_child_event(&event, pid);
+ if (rc) {
+ kill_child_and_wait(pid);
+ return rc;
+ }
+
+ /* Signal the child to install its EBB event and wait */
+ if (sync_with_child(read_pipe, write_pipe))
+ /* If it fails, wait for it to exit */
+ goto wait;
+
+ /* Signal the child to run */
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+wait:
+ /* The EBB event should push the task event off so the child should succeed */
+ FAIL_IF(wait_for_child(pid));
+ FAIL_IF(event_disable(&event));
+ FAIL_IF(event_read(&event));
+
+ event_report(&event);
+
+ /* The task event may have run, or not so we can't assert anything about it */
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(task_event_vs_ebb, "task_event_vs_ebb");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.c b/tools/testing/selftests/powerpc/pmu/ebb/trace.c
new file mode 100644
index 000000000..251e66ab2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.c
@@ -0,0 +1,300 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#include "trace.h"
+
+
+struct trace_buffer *trace_buffer_allocate(u64 size)
+{
+ struct trace_buffer *tb;
+
+ if (size < sizeof(*tb)) {
+ fprintf(stderr, "Error: trace buffer too small\n");
+ return NULL;
+ }
+
+ tb = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (tb == MAP_FAILED) {
+ perror("mmap");
+ return NULL;
+ }
+
+ tb->size = size;
+ tb->tail = tb->data;
+ tb->overflow = false;
+
+ return tb;
+}
+
+static bool trace_check_bounds(struct trace_buffer *tb, void *p)
+{
+ return p < ((void *)tb + tb->size);
+}
+
+static bool trace_check_alloc(struct trace_buffer *tb, void *p)
+{
+ /*
+ * If we ever overflowed don't allow any more input. This prevents us
+ * from dropping a large item and then later logging a small one. The
+ * buffer should just stop when overflow happened, not be patchy. If
+ * you're overflowing, make your buffer bigger.
+ */
+ if (tb->overflow)
+ return false;
+
+ if (!trace_check_bounds(tb, p)) {
+ tb->overflow = true;
+ return false;
+ }
+
+ return true;
+}
+
+static void *trace_alloc(struct trace_buffer *tb, int bytes)
+{
+ void *p, *newtail;
+
+ p = tb->tail;
+ newtail = tb->tail + bytes;
+ if (!trace_check_alloc(tb, newtail))
+ return NULL;
+
+ tb->tail = newtail;
+
+ return p;
+}
+
+static struct trace_entry *trace_alloc_entry(struct trace_buffer *tb, int payload_size)
+{
+ struct trace_entry *e;
+
+ e = trace_alloc(tb, sizeof(*e) + payload_size);
+ if (e)
+ e->length = payload_size;
+
+ return e;
+}
+
+int trace_log_reg(struct trace_buffer *tb, u64 reg, u64 value)
+{
+ struct trace_entry *e;
+ u64 *p;
+
+ e = trace_alloc_entry(tb, sizeof(reg) + sizeof(value));
+ if (!e)
+ return -ENOSPC;
+
+ e->type = TRACE_TYPE_REG;
+ p = (u64 *)e->data;
+ *p++ = reg;
+ *p++ = value;
+
+ return 0;
+}
+
+int trace_log_counter(struct trace_buffer *tb, u64 value)
+{
+ struct trace_entry *e;
+ u64 *p;
+
+ e = trace_alloc_entry(tb, sizeof(value));
+ if (!e)
+ return -ENOSPC;
+
+ e->type = TRACE_TYPE_COUNTER;
+ p = (u64 *)e->data;
+ *p++ = value;
+
+ return 0;
+}
+
+int trace_log_string(struct trace_buffer *tb, char *str)
+{
+ struct trace_entry *e;
+ char *p;
+ int len;
+
+ len = strlen(str);
+
+ /* We NULL terminate to make printing easier */
+ e = trace_alloc_entry(tb, len + 1);
+ if (!e)
+ return -ENOSPC;
+
+ e->type = TRACE_TYPE_STRING;
+ p = (char *)e->data;
+ memcpy(p, str, len);
+ p += len;
+ *p = '\0';
+
+ return 0;
+}
+
+int trace_log_indent(struct trace_buffer *tb)
+{
+ struct trace_entry *e;
+
+ e = trace_alloc_entry(tb, 0);
+ if (!e)
+ return -ENOSPC;
+
+ e->type = TRACE_TYPE_INDENT;
+
+ return 0;
+}
+
+int trace_log_outdent(struct trace_buffer *tb)
+{
+ struct trace_entry *e;
+
+ e = trace_alloc_entry(tb, 0);
+ if (!e)
+ return -ENOSPC;
+
+ e->type = TRACE_TYPE_OUTDENT;
+
+ return 0;
+}
+
+static void trace_print_header(int seq, int prefix)
+{
+ printf("%*s[%d]: ", prefix, "", seq);
+}
+
+static char *trace_decode_reg(int reg)
+{
+ switch (reg) {
+ case 769: return "SPRN_MMCR2"; break;
+ case 770: return "SPRN_MMCRA"; break;
+ case 779: return "SPRN_MMCR0"; break;
+ case 804: return "SPRN_EBBHR"; break;
+ case 805: return "SPRN_EBBRR"; break;
+ case 806: return "SPRN_BESCR"; break;
+ case 800: return "SPRN_BESCRS"; break;
+ case 801: return "SPRN_BESCRSU"; break;
+ case 802: return "SPRN_BESCRR"; break;
+ case 803: return "SPRN_BESCRRU"; break;
+ case 771: return "SPRN_PMC1"; break;
+ case 772: return "SPRN_PMC2"; break;
+ case 773: return "SPRN_PMC3"; break;
+ case 774: return "SPRN_PMC4"; break;
+ case 775: return "SPRN_PMC5"; break;
+ case 776: return "SPRN_PMC6"; break;
+ case 780: return "SPRN_SIAR"; break;
+ case 781: return "SPRN_SDAR"; break;
+ case 768: return "SPRN_SIER"; break;
+ }
+
+ return NULL;
+}
+
+static void trace_print_reg(struct trace_entry *e)
+{
+ u64 *p, *reg, *value;
+ char *name;
+
+ p = (u64 *)e->data;
+ reg = p++;
+ value = p;
+
+ name = trace_decode_reg(*reg);
+ if (name)
+ printf("register %-10s = 0x%016llx\n", name, *value);
+ else
+ printf("register %lld = 0x%016llx\n", *reg, *value);
+}
+
+static void trace_print_counter(struct trace_entry *e)
+{
+ u64 *value;
+
+ value = (u64 *)e->data;
+ printf("counter = %lld\n", *value);
+}
+
+static void trace_print_string(struct trace_entry *e)
+{
+ char *str;
+
+ str = (char *)e->data;
+ puts(str);
+}
+
+#define BASE_PREFIX 2
+#define PREFIX_DELTA 8
+
+static void trace_print_entry(struct trace_entry *e, int seq, int *prefix)
+{
+ switch (e->type) {
+ case TRACE_TYPE_REG:
+ trace_print_header(seq, *prefix);
+ trace_print_reg(e);
+ break;
+ case TRACE_TYPE_COUNTER:
+ trace_print_header(seq, *prefix);
+ trace_print_counter(e);
+ break;
+ case TRACE_TYPE_STRING:
+ trace_print_header(seq, *prefix);
+ trace_print_string(e);
+ break;
+ case TRACE_TYPE_INDENT:
+ trace_print_header(seq, *prefix);
+ puts("{");
+ *prefix += PREFIX_DELTA;
+ break;
+ case TRACE_TYPE_OUTDENT:
+ *prefix -= PREFIX_DELTA;
+ if (*prefix < BASE_PREFIX)
+ *prefix = BASE_PREFIX;
+ trace_print_header(seq, *prefix);
+ puts("}");
+ break;
+ default:
+ trace_print_header(seq, *prefix);
+ printf("entry @ %p type %d\n", e, e->type);
+ break;
+ }
+}
+
+void trace_buffer_print(struct trace_buffer *tb)
+{
+ struct trace_entry *e;
+ int i, prefix;
+ void *p;
+
+ printf("Trace buffer dump:\n");
+ printf(" address %p \n", tb);
+ printf(" tail %p\n", tb->tail);
+ printf(" size %llu\n", tb->size);
+ printf(" overflow %s\n", tb->overflow ? "TRUE" : "false");
+ printf(" Content:\n");
+
+ p = tb->data;
+
+ i = 0;
+ prefix = BASE_PREFIX;
+
+ while (trace_check_bounds(tb, p) && p < tb->tail) {
+ e = p;
+
+ trace_print_entry(e, i, &prefix);
+
+ i++;
+ p = (void *)e + sizeof(*e) + e->length;
+ }
+}
+
+void trace_print_location(struct trace_buffer *tb)
+{
+ printf("Trace buffer 0x%llx bytes @ %p\n", tb->size, tb);
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.h b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
new file mode 100644
index 000000000..926458e28
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EBB_TRACE_H
+#define _SELFTESTS_POWERPC_PMU_EBB_TRACE_H
+
+#include "utils.h"
+
+#define TRACE_TYPE_REG 1
+#define TRACE_TYPE_COUNTER 2
+#define TRACE_TYPE_STRING 3
+#define TRACE_TYPE_INDENT 4
+#define TRACE_TYPE_OUTDENT 5
+
+struct trace_entry
+{
+ u8 type;
+ u8 length;
+ u8 data[0];
+};
+
+struct trace_buffer
+{
+ u64 size;
+ bool overflow;
+ void *tail;
+ u8 data[0];
+};
+
+struct trace_buffer *trace_buffer_allocate(u64 size);
+int trace_log_reg(struct trace_buffer *tb, u64 reg, u64 value);
+int trace_log_counter(struct trace_buffer *tb, u64 value);
+int trace_log_string(struct trace_buffer *tb, char *str);
+int trace_log_indent(struct trace_buffer *tb);
+int trace_log_outdent(struct trace_buffer *tb);
+void trace_buffer_print(struct trace_buffer *tb);
+void trace_print_location(struct trace_buffer *tb);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EBB_TRACE_H */
diff --git a/tools/testing/selftests/powerpc/pmu/event.c b/tools/testing/selftests/powerpc/pmu/event.c
new file mode 100644
index 000000000..184b36807
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+
+#include "event.h"
+
+
+int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
+ int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, attr, pid, cpu,
+ group_fd, flags);
+}
+
+void event_init_opts(struct event *e, u64 config, int type, char *name)
+{
+ memset(e, 0, sizeof(*e));
+
+ e->name = name;
+
+ e->attr.type = type;
+ e->attr.config = config;
+ e->attr.size = sizeof(e->attr);
+ /* This has to match the structure layout in the header */
+ e->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | \
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
+}
+
+void event_init_named(struct event *e, u64 config, char *name)
+{
+ event_init_opts(e, config, PERF_TYPE_RAW, name);
+}
+
+void event_init(struct event *e, u64 config)
+{
+ event_init_opts(e, config, PERF_TYPE_RAW, "event");
+}
+
+#define PERF_CURRENT_PID 0
+#define PERF_NO_PID -1
+#define PERF_NO_CPU -1
+#define PERF_NO_GROUP -1
+
+int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd)
+{
+ e->fd = perf_event_open(&e->attr, pid, cpu, group_fd, 0);
+ if (e->fd == -1) {
+ perror("perf_event_open");
+ return -1;
+ }
+
+ return 0;
+}
+
+int event_open_with_group(struct event *e, int group_fd)
+{
+ return event_open_with_options(e, PERF_CURRENT_PID, PERF_NO_CPU, group_fd);
+}
+
+int event_open_with_pid(struct event *e, pid_t pid)
+{
+ return event_open_with_options(e, pid, PERF_NO_CPU, PERF_NO_GROUP);
+}
+
+int event_open_with_cpu(struct event *e, int cpu)
+{
+ return event_open_with_options(e, PERF_NO_PID, cpu, PERF_NO_GROUP);
+}
+
+int event_open(struct event *e)
+{
+ return event_open_with_options(e, PERF_CURRENT_PID, PERF_NO_CPU, PERF_NO_GROUP);
+}
+
+void event_close(struct event *e)
+{
+ close(e->fd);
+}
+
+int event_enable(struct event *e)
+{
+ return ioctl(e->fd, PERF_EVENT_IOC_ENABLE);
+}
+
+int event_disable(struct event *e)
+{
+ return ioctl(e->fd, PERF_EVENT_IOC_DISABLE);
+}
+
+int event_reset(struct event *e)
+{
+ return ioctl(e->fd, PERF_EVENT_IOC_RESET);
+}
+
+int event_read(struct event *e)
+{
+ int rc;
+
+ rc = read(e->fd, &e->result, sizeof(e->result));
+ if (rc != sizeof(e->result)) {
+ fprintf(stderr, "read error on event %p!\n", e);
+ return -1;
+ }
+
+ return 0;
+}
+
+void event_report_justified(struct event *e, int name_width, int result_width)
+{
+ printf("%*s: result %*llu ", name_width, e->name, result_width,
+ e->result.value);
+
+ if (e->result.running == e->result.enabled)
+ printf("running/enabled %llu\n", e->result.running);
+ else
+ printf("running %llu enabled %llu\n", e->result.running,
+ e->result.enabled);
+}
+
+void event_report(struct event *e)
+{
+ event_report_justified(e, 0, 0);
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h
new file mode 100644
index 000000000..a0ea6b1ee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PMU_EVENT_H
+#define _SELFTESTS_POWERPC_PMU_EVENT_H
+
+#include <unistd.h>
+#include <linux/perf_event.h>
+
+#include "utils.h"
+
+
+struct event {
+ struct perf_event_attr attr;
+ char *name;
+ int fd;
+ /* This must match the read_format we use */
+ struct {
+ u64 value;
+ u64 running;
+ u64 enabled;
+ } result;
+};
+
+void event_init(struct event *e, u64 config);
+void event_init_named(struct event *e, u64 config, char *name);
+void event_init_opts(struct event *e, u64 config, int type, char *name);
+int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd);
+int event_open_with_group(struct event *e, int group_fd);
+int event_open_with_pid(struct event *e, pid_t pid);
+int event_open_with_cpu(struct event *e, int cpu);
+int event_open(struct event *e);
+void event_close(struct event *e);
+int event_enable(struct event *e);
+int event_disable(struct event *e);
+int event_reset(struct event *e);
+int event_read(struct event *e);
+void event_report_justified(struct event *e, int name_width, int result_width);
+void event_report(struct event *e);
+
+#endif /* _SELFTESTS_POWERPC_PMU_EVENT_H */
diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
new file mode 100644
index 000000000..77472f314
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "event.h"
+#include "utils.h"
+
+#define MALLOC_SIZE (0x10000 * 10) /* Ought to be enough .. */
+
+/*
+ * Tests that the L3 bank handling is correct. We fixed it in commit e9aaac1.
+ */
+static int l3_bank_test(void)
+{
+ struct event event;
+ char *p;
+ int i;
+
+ p = malloc(MALLOC_SIZE);
+ FAIL_IF(!p);
+
+ event_init(&event, 0x84918F);
+
+ FAIL_IF(event_open(&event));
+
+ for (i = 0; i < MALLOC_SIZE; i += 0x10000)
+ p[i] = i;
+
+ event_read(&event);
+ event_report(&event);
+
+ FAIL_IF(event.result.running == 0);
+ FAIL_IF(event.result.enabled == 0);
+
+ event_close(&event);
+ free(p);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(l3_bank_test, "l3_bank_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c
new file mode 100644
index 000000000..5bf5dd408
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/lib.c
@@ -0,0 +1,227 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE /* For CPU_ZERO etc. */
+
+#include <errno.h>
+#include <sched.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+
+#include "utils.h"
+#include "lib.h"
+
+
+int bind_to_cpu(int cpu)
+{
+ cpu_set_t mask;
+
+ printf("Binding to cpu %d\n", cpu);
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+
+ return sched_setaffinity(0, sizeof(mask), &mask);
+}
+
+#define PARENT_TOKEN 0xAA
+#define CHILD_TOKEN 0x55
+
+int sync_with_child(union pipe read_pipe, union pipe write_pipe)
+{
+ char c = PARENT_TOKEN;
+
+ FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+ FAIL_IF(read(read_pipe.read_fd, &c, 1) != 1);
+ if (c != CHILD_TOKEN) /* sometimes expected */
+ return 1;
+
+ return 0;
+}
+
+int wait_for_parent(union pipe read_pipe)
+{
+ char c;
+
+ FAIL_IF(read(read_pipe.read_fd, &c, 1) != 1);
+ FAIL_IF(c != PARENT_TOKEN);
+
+ return 0;
+}
+
+int notify_parent(union pipe write_pipe)
+{
+ char c = CHILD_TOKEN;
+
+ FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+
+ return 0;
+}
+
+int notify_parent_of_error(union pipe write_pipe)
+{
+ char c = ~CHILD_TOKEN;
+
+ FAIL_IF(write(write_pipe.write_fd, &c, 1) != 1);
+
+ return 0;
+}
+
+int wait_for_child(pid_t child_pid)
+{
+ int rc;
+
+ if (waitpid(child_pid, &rc, 0) == -1) {
+ perror("waitpid");
+ return 1;
+ }
+
+ if (WIFEXITED(rc))
+ rc = WEXITSTATUS(rc);
+ else
+ rc = 1; /* Signal or other */
+
+ return rc;
+}
+
+int kill_child_and_wait(pid_t child_pid)
+{
+ kill(child_pid, SIGTERM);
+
+ return wait_for_child(child_pid);
+}
+
+static int eat_cpu_child(union pipe read_pipe, union pipe write_pipe)
+{
+ volatile int i = 0;
+
+ /*
+ * We are just here to eat cpu and die. So make sure we can be killed,
+ * and also don't do any custom SIGTERM handling.
+ */
+ signal(SIGTERM, SIG_DFL);
+
+ notify_parent(write_pipe);
+ wait_for_parent(read_pipe);
+
+ /* Soak up cpu forever */
+ while (1) i++;
+
+ return 0;
+}
+
+pid_t eat_cpu(int (test_function)(void))
+{
+ union pipe read_pipe, write_pipe;
+ int cpu, rc;
+ pid_t pid;
+
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+ FAIL_IF(bind_to_cpu(cpu));
+
+ if (pipe(read_pipe.fds) == -1)
+ return -1;
+
+ if (pipe(write_pipe.fds) == -1)
+ return -1;
+
+ pid = fork();
+ if (pid == 0)
+ exit(eat_cpu_child(write_pipe, read_pipe));
+
+ if (sync_with_child(read_pipe, write_pipe)) {
+ rc = -1;
+ goto out;
+ }
+
+ printf("main test running as pid %d\n", getpid());
+
+ rc = test_function();
+out:
+ kill(pid, SIGKILL);
+
+ return rc;
+}
+
+struct addr_range libc, vdso;
+
+int parse_proc_maps(void)
+{
+ unsigned long start, end;
+ char execute, name[128];
+ FILE *f;
+ int rc;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f) {
+ perror("fopen");
+ return -1;
+ }
+
+ do {
+ /* This skips line with no executable which is what we want */
+ rc = fscanf(f, "%lx-%lx %*c%*c%c%*c %*x %*d:%*d %*d %127s\n",
+ &start, &end, &execute, name);
+ if (rc <= 0)
+ break;
+
+ if (execute != 'x')
+ continue;
+
+ if (strstr(name, "libc")) {
+ libc.first = start;
+ libc.last = end - 1;
+ } else if (strstr(name, "[vdso]")) {
+ vdso.first = start;
+ vdso.last = end - 1;
+ }
+ } while(1);
+
+ fclose(f);
+
+ return 0;
+}
+
+#define PARANOID_PATH "/proc/sys/kernel/perf_event_paranoid"
+
+bool require_paranoia_below(int level)
+{
+ long current;
+ char *end, buf[16];
+ FILE *f;
+ bool rc;
+
+ rc = false;
+
+ f = fopen(PARANOID_PATH, "r");
+ if (!f) {
+ perror("fopen");
+ goto out;
+ }
+
+ if (!fgets(buf, sizeof(buf), f)) {
+ printf("Couldn't read " PARANOID_PATH "?\n");
+ goto out_close;
+ }
+
+ current = strtol(buf, &end, 10);
+
+ if (end == buf) {
+ printf("Couldn't parse " PARANOID_PATH "?\n");
+ goto out_close;
+ }
+
+ if (current >= level)
+ goto out_close;
+
+ rc = true;
+out_close:
+ fclose(f);
+out:
+ return rc;
+}
+
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
new file mode 100644
index 000000000..0213af4ff
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef __SELFTESTS_POWERPC_PMU_LIB_H
+#define __SELFTESTS_POWERPC_PMU_LIB_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+union pipe {
+ struct {
+ int read_fd;
+ int write_fd;
+ };
+ int fds[2];
+};
+
+extern int bind_to_cpu(int cpu);
+extern int kill_child_and_wait(pid_t child_pid);
+extern int wait_for_child(pid_t child_pid);
+extern int sync_with_child(union pipe read_pipe, union pipe write_pipe);
+extern int wait_for_parent(union pipe read_pipe);
+extern int notify_parent(union pipe write_pipe);
+extern int notify_parent_of_error(union pipe write_pipe);
+extern pid_t eat_cpu(int (test_function)(void));
+extern bool require_paranoia_below(int level);
+
+struct addr_range {
+ uint64_t first, last;
+};
+
+extern struct addr_range libc, vdso;
+
+int parse_proc_maps(void);
+
+#endif /* __SELFTESTS_POWERPC_PMU_LIB_H */
diff --git a/tools/testing/selftests/powerpc/pmu/loop.S b/tools/testing/selftests/powerpc/pmu/loop.S
new file mode 100644
index 000000000..20c1f0876
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/loop.S
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#include <ppc-asm.h>
+
+ .text
+
+FUNC_START(thirty_two_instruction_loop)
+ cmpdi r3,0
+ beqlr
+ addi r4,r3,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1
+ addi r4,r4,1 # 28 addi's
+ subi r3,r3,1
+ b FUNC_NAME(thirty_two_instruction_loop)
+FUNC_END(thirty_two_instruction_loop)
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
new file mode 100644
index 000000000..fddbbc9ca
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2014, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <elf.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "event.h"
+#include "lib.h"
+#include "utils.h"
+
+/*
+ * Test that per-event excludes work.
+ */
+
+static int per_event_excludes(void)
+{
+ struct event *e, events[4];
+ char *platform;
+ int i;
+
+ platform = (char *)get_auxv_entry(AT_BASE_PLATFORM);
+ FAIL_IF(!platform);
+ SKIP_IF(strcmp(platform, "power8") != 0);
+
+ /*
+ * We need to create the events disabled, otherwise the running/enabled
+ * counts don't match up.
+ */
+ e = &events[0];
+ event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+ PERF_TYPE_HARDWARE, "instructions");
+ e->attr.disabled = 1;
+
+ e = &events[1];
+ event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+ PERF_TYPE_HARDWARE, "instructions(k)");
+ e->attr.disabled = 1;
+ e->attr.exclude_user = 1;
+ e->attr.exclude_hv = 1;
+
+ e = &events[2];
+ event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+ PERF_TYPE_HARDWARE, "instructions(h)");
+ e->attr.disabled = 1;
+ e->attr.exclude_user = 1;
+ e->attr.exclude_kernel = 1;
+
+ e = &events[3];
+ event_init_opts(e, PERF_COUNT_HW_INSTRUCTIONS,
+ PERF_TYPE_HARDWARE, "instructions(u)");
+ e->attr.disabled = 1;
+ e->attr.exclude_hv = 1;
+ e->attr.exclude_kernel = 1;
+
+ FAIL_IF(event_open(&events[0]));
+
+ /*
+ * The open here will fail if we don't have per event exclude support,
+ * because the second event has an incompatible set of exclude settings
+ * and we're asking for the events to be in a group.
+ */
+ for (i = 1; i < 4; i++)
+ FAIL_IF(event_open_with_group(&events[i], events[0].fd));
+
+ /*
+ * Even though the above will fail without per-event excludes we keep
+ * testing in order to be thorough.
+ */
+ prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+ /* Spin for a while */
+ for (i = 0; i < INT_MAX; i++)
+ asm volatile("" : : : "memory");
+
+ prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+ for (i = 0; i < 4; i++) {
+ FAIL_IF(event_read(&events[i]));
+ event_report(&events[i]);
+ }
+
+ /*
+ * We should see that all events have enabled == running. That
+ * shows that they were all on the PMU at once.
+ */
+ for (i = 0; i < 4; i++)
+ FAIL_IF(events[i].result.running != events[i].result.enabled);
+
+ /*
+ * We can also check that the result for instructions is >= all the
+ * other counts. That's because it is counting all instructions while
+ * the others are counting a subset.
+ */
+ for (i = 1; i < 4; i++)
+ FAIL_IF(events[0].result.value < events[i].result.value);
+
+ for (i = 0; i < 4; i++)
+ event_close(&events[i]);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(per_event_excludes, "per_event_excludes");
+}
diff --git a/tools/testing/selftests/powerpc/primitives/.gitignore b/tools/testing/selftests/powerpc/primitives/.gitignore
new file mode 100644
index 000000000..4cc4e31be
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/.gitignore
@@ -0,0 +1 @@
+load_unaligned_zeropad
diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile
new file mode 100644
index 000000000..ea2b7bd09
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/Makefile
@@ -0,0 +1,8 @@
+CFLAGS += -I$(CURDIR)
+
+TEST_GEN_PROGS := load_unaligned_zeropad
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h b/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h
new file mode 120000
index 000000000..b14255e15
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/asm-compat.h
@@ -0,0 +1 @@
+../.././../../../../arch/powerpc/include/asm/asm-compat.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/asm-const.h b/tools/testing/selftests/powerpc/primitives/asm/asm-const.h
new file mode 120000
index 000000000..18d8be13e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/asm-const.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/asm-const.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h b/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h
new file mode 120000
index 000000000..8dc6d4d46
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/feature-fixups.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/feature-fixups.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/firmware.h b/tools/testing/selftests/powerpc/primitives/asm/firmware.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/firmware.h
diff --git a/tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h b/tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/ppc-opcode.h
diff --git a/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h b/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h
new file mode 120000
index 000000000..66c819322
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/ppc_asm.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/ppc_asm.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/primitives/asm/processor.h b/tools/testing/selftests/powerpc/primitives/asm/processor.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/processor.h
diff --git a/tools/testing/selftests/powerpc/primitives/linux/stringify.h b/tools/testing/selftests/powerpc/primitives/linux/stringify.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/linux/stringify.h
diff --git a/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
new file mode 100644
index 000000000..ed3239bbf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/load_unaligned_zeropad.c
@@ -0,0 +1,159 @@
+/*
+ * Userspace test harness for load_unaligned_zeropad. Creates two
+ * pages and uses mprotect to prevent access to the second page and
+ * a SEGV handler that walks the exception tables and runs the fixup
+ * routine.
+ *
+ * The results are compared against a normal load that is that is
+ * performed while access to the second page is enabled via mprotect.
+ *
+ * Copyright (C) 2014 Anton Blanchard <anton@au.ibm.com>, IBM
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#define FIXUP_SECTION ".ex_fixup"
+
+static inline unsigned long __fls(unsigned long x);
+
+#include "word-at-a-time.h"
+
+#include "utils.h"
+
+static inline unsigned long __fls(unsigned long x)
+{
+ int lz;
+
+ asm (PPC_CNTLZL "%0,%1" : "=r" (lz) : "r" (x));
+ return sizeof(unsigned long) - 1 - lz;
+}
+
+static int page_size;
+static char *mem_region;
+
+static int protect_region(void)
+{
+ if (mprotect(mem_region + page_size, page_size, PROT_NONE)) {
+ perror("mprotect");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int unprotect_region(void)
+{
+ if (mprotect(mem_region + page_size, page_size, PROT_READ|PROT_WRITE)) {
+ perror("mprotect");
+ return 1;
+ }
+
+ return 0;
+}
+
+extern char __start___ex_table[];
+extern char __stop___ex_table[];
+
+#if defined(__powerpc64__)
+#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP]
+#elif defined(__powerpc__)
+#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
+#else
+#error implement UCONTEXT_NIA
+#endif
+
+struct extbl_entry {
+ int insn;
+ int fixup;
+};
+
+static void segv_handler(int signr, siginfo_t *info, void *ptr)
+{
+ ucontext_t *uc = (ucontext_t *)ptr;
+ unsigned long addr = (unsigned long)info->si_addr;
+ unsigned long *ip = &UCONTEXT_NIA(uc);
+ struct extbl_entry *entry = (struct extbl_entry *)__start___ex_table;
+
+ while (entry < (struct extbl_entry *)__stop___ex_table) {
+ unsigned long insn, fixup;
+
+ insn = (unsigned long)&entry->insn + entry->insn;
+ fixup = (unsigned long)&entry->fixup + entry->fixup;
+
+ if (insn == *ip) {
+ *ip = fixup;
+ return;
+ }
+ }
+
+ printf("No exception table match for NIA %lx ADDR %lx\n", *ip, addr);
+ abort();
+}
+
+static void setup_segv_handler(void)
+{
+ struct sigaction action;
+
+ memset(&action, 0, sizeof(action));
+ action.sa_sigaction = segv_handler;
+ action.sa_flags = SA_SIGINFO;
+ sigaction(SIGSEGV, &action, NULL);
+}
+
+static int do_one_test(char *p, int page_offset)
+{
+ unsigned long should;
+ unsigned long got;
+
+ FAIL_IF(unprotect_region());
+ should = *(unsigned long *)p;
+ FAIL_IF(protect_region());
+
+ got = load_unaligned_zeropad(p);
+
+ if (should != got) {
+ printf("offset %u load_unaligned_zeropad returned 0x%lx, should be 0x%lx\n", page_offset, got, should);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int test_body(void)
+{
+ unsigned long i;
+
+ page_size = getpagesize();
+ mem_region = mmap(NULL, page_size * 2, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+
+ FAIL_IF(mem_region == MAP_FAILED);
+
+ for (i = 0; i < page_size; i++)
+ mem_region[i] = i;
+
+ memset(mem_region+page_size, 0, page_size);
+
+ setup_segv_handler();
+
+ for (i = 0; i < page_size; i++)
+ FAIL_IF(do_one_test(mem_region+i, i));
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_body, "load_unaligned_zeropad");
+}
diff --git a/tools/testing/selftests/powerpc/primitives/word-at-a-time.h b/tools/testing/selftests/powerpc/primitives/word-at-a-time.h
new file mode 120000
index 000000000..eb74401b5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/word-at-a-time.h
@@ -0,0 +1 @@
+../../../../../arch/powerpc/include/asm/word-at-a-time.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore
new file mode 100644
index 000000000..07ec449a2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/.gitignore
@@ -0,0 +1,12 @@
+ptrace-gpr
+ptrace-tm-gpr
+ptrace-tm-spd-gpr
+ptrace-tar
+ptrace-tm-tar
+ptrace-tm-spd-tar
+ptrace-vsx
+ptrace-tm-vsx
+ptrace-tm-spd-vsx
+ptrace-tm-spr
+ptrace-hwbreak
+perf-hwbreak
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
new file mode 100644
index 000000000..9f9423430
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
+ ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
+ ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
+ perf-hwbreak
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
+
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h
+$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h
new file mode 100644
index 000000000..d7275b7b3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/child.h
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Helper functions to sync execution between parent and child processes.
+ *
+ * Copyright 2018, Thiago Jung Bauermann, IBM Corporation.
+ */
+#include <stdio.h>
+#include <stdbool.h>
+#include <semaphore.h>
+
+/*
+ * Information in a shared memory location for synchronization between child and
+ * parent.
+ */
+struct child_sync {
+ /* The parent waits on this semaphore. */
+ sem_t sem_parent;
+
+ /* If true, the child should give up as well. */
+ bool parent_gave_up;
+
+ /* The child waits on this semaphore. */
+ sem_t sem_child;
+
+ /* If true, the parent should give up as well. */
+ bool child_gave_up;
+};
+
+#define CHILD_FAIL_IF(x, sync) \
+ do { \
+ if (x) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d\n", __LINE__); \
+ (sync)->child_gave_up = true; \
+ prod_parent(sync); \
+ return 1; \
+ } \
+ } while (0)
+
+#define PARENT_FAIL_IF(x, sync) \
+ do { \
+ if (x) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d\n", __LINE__); \
+ (sync)->parent_gave_up = true; \
+ prod_child(sync); \
+ return 1; \
+ } \
+ } while (0)
+
+#define PARENT_SKIP_IF_UNSUPPORTED(x, sync) \
+ do { \
+ if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \
+ (sync)->parent_gave_up = true; \
+ prod_child(sync); \
+ SKIP_IF(1); \
+ } \
+ } while (0)
+
+int init_child_sync(struct child_sync *sync)
+{
+ int ret;
+
+ ret = sem_init(&sync->sem_parent, 1, 0);
+ if (ret) {
+ perror("Semaphore initialization failed");
+ return 1;
+ }
+
+ ret = sem_init(&sync->sem_child, 1, 0);
+ if (ret) {
+ perror("Semaphore initialization failed");
+ return 1;
+ }
+
+ return 0;
+}
+
+void destroy_child_sync(struct child_sync *sync)
+{
+ sem_destroy(&sync->sem_parent);
+ sem_destroy(&sync->sem_child);
+}
+
+int wait_child(struct child_sync *sync)
+{
+ int ret;
+
+ /* Wait until the child prods us. */
+ ret = sem_wait(&sync->sem_parent);
+ if (ret) {
+ perror("Error waiting for child");
+ return 1;
+ }
+
+ return sync->child_gave_up;
+}
+
+int prod_child(struct child_sync *sync)
+{
+ int ret;
+
+ /* Unblock the child now. */
+ ret = sem_post(&sync->sem_child);
+ if (ret) {
+ perror("Error prodding child");
+ return 1;
+ }
+
+ return 0;
+}
+
+int wait_parent(struct child_sync *sync)
+{
+ int ret;
+
+ /* Wait until the parent prods us. */
+ ret = sem_wait(&sync->sem_child);
+ if (ret) {
+ perror("Error waiting for parent");
+ return 1;
+ }
+
+ return sync->parent_gave_up;
+}
+
+int prod_parent(struct child_sync *sync)
+{
+ int ret;
+
+ /* Unblock the parent now. */
+ ret = sem_post(&sync->sem_parent);
+ if (ret) {
+ perror("Error prodding parent");
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
new file mode 100644
index 000000000..d5c64fee0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -0,0 +1,462 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Ptrace test for Memory Protection Key registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ * Copyright (C) 2018 IBM Corporation.
+ */
+#include <limits.h>
+#include <linux/kernel.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include "ptrace.h"
+#include "child.h"
+
+#ifndef __NR_pkey_alloc
+#define __NR_pkey_alloc 384
+#endif
+
+#ifndef __NR_pkey_free
+#define __NR_pkey_free 385
+#endif
+
+#ifndef NT_PPC_PKEY
+#define NT_PPC_PKEY 0x110
+#endif
+
+#ifndef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE 0x4
+#endif
+
+#define AMR_BITS_PER_PKEY 2
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+
+#define CORE_FILE_LIMIT (5 * 1024 * 1024) /* 5 MB should be enough */
+
+static const char core_pattern_file[] = "/proc/sys/kernel/core_pattern";
+
+static const char user_write[] = "[User Write (Running)]";
+static const char core_read_running[] = "[Core Read (Running)]";
+
+/* Information shared between the parent and the child. */
+struct shared_info {
+ struct child_sync child_sync;
+
+ /* AMR value the parent expects to read in the core file. */
+ unsigned long amr;
+
+ /* IAMR value the parent expects to read in the core file. */
+ unsigned long iamr;
+
+ /* UAMOR value the parent expects to read in the core file. */
+ unsigned long uamor;
+
+ /* When the child crashed. */
+ time_t core_time;
+};
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+{
+ return syscall(__NR_pkey_alloc, flags, init_access_rights);
+}
+
+static int sys_pkey_free(int pkey)
+{
+ return syscall(__NR_pkey_free, pkey);
+}
+
+static int increase_core_file_limit(void)
+{
+ struct rlimit rlim;
+ int ret;
+
+ ret = getrlimit(RLIMIT_CORE, &rlim);
+ FAIL_IF(ret);
+
+ if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
+ rlim.rlim_cur = CORE_FILE_LIMIT;
+
+ if (rlim.rlim_max != RLIM_INFINITY &&
+ rlim.rlim_max < CORE_FILE_LIMIT)
+ rlim.rlim_max = CORE_FILE_LIMIT;
+
+ ret = setrlimit(RLIMIT_CORE, &rlim);
+ FAIL_IF(ret);
+ }
+
+ ret = getrlimit(RLIMIT_FSIZE, &rlim);
+ FAIL_IF(ret);
+
+ if (rlim.rlim_cur != RLIM_INFINITY && rlim.rlim_cur < CORE_FILE_LIMIT) {
+ rlim.rlim_cur = CORE_FILE_LIMIT;
+
+ if (rlim.rlim_max != RLIM_INFINITY &&
+ rlim.rlim_max < CORE_FILE_LIMIT)
+ rlim.rlim_max = CORE_FILE_LIMIT;
+
+ ret = setrlimit(RLIMIT_FSIZE, &rlim);
+ FAIL_IF(ret);
+ }
+
+ return TEST_PASS;
+}
+
+static int child(struct shared_info *info)
+{
+ bool disable_execute = true;
+ int pkey1, pkey2, pkey3;
+ int *ptr, ret;
+
+ /* Wait until parent fills out the initial register values. */
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ ret = increase_core_file_limit();
+ FAIL_IF(ret);
+
+ /* Get some pkeys so that we can change their bits in the AMR. */
+ pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
+ if (pkey1 < 0) {
+ pkey1 = sys_pkey_alloc(0, 0);
+ FAIL_IF(pkey1 < 0);
+
+ disable_execute = false;
+ }
+
+ pkey2 = sys_pkey_alloc(0, 0);
+ FAIL_IF(pkey2 < 0);
+
+ pkey3 = sys_pkey_alloc(0, 0);
+ FAIL_IF(pkey3 < 0);
+
+ info->amr |= 3ul << pkeyshift(pkey1) | 2ul << pkeyshift(pkey2);
+
+ if (disable_execute)
+ info->iamr |= 1ul << pkeyshift(pkey1);
+ else
+ info->iamr &= ~(1ul << pkeyshift(pkey1));
+
+ info->iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3));
+
+ info->uamor |= 3ul << pkeyshift(pkey1) | 3ul << pkeyshift(pkey2);
+
+ printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
+ user_write, info->amr, pkey1, pkey2, pkey3);
+
+ mtspr(SPRN_AMR, info->amr);
+
+ /*
+ * We won't use pkey3. This tests whether the kernel restores the UAMOR
+ * permissions after a key is freed.
+ */
+ sys_pkey_free(pkey3);
+
+ info->core_time = time(NULL);
+
+ /* Crash. */
+ ptr = 0;
+ *ptr = 1;
+
+ /* Shouldn't get here. */
+ FAIL_IF(true);
+
+ return TEST_FAIL;
+}
+
+/* Return file size if filename exists and pass sanity check, or zero if not. */
+static off_t try_core_file(const char *filename, struct shared_info *info,
+ pid_t pid)
+{
+ struct stat buf;
+ int ret;
+
+ ret = stat(filename, &buf);
+ if (ret == -1)
+ return TEST_FAIL;
+
+ /* Make sure we're not using a stale core file. */
+ return buf.st_mtime >= info->core_time ? buf.st_size : TEST_FAIL;
+}
+
+static Elf64_Nhdr *next_note(Elf64_Nhdr *nhdr)
+{
+ return (void *) nhdr + sizeof(*nhdr) +
+ __ALIGN_KERNEL(nhdr->n_namesz, 4) +
+ __ALIGN_KERNEL(nhdr->n_descsz, 4);
+}
+
+static int check_core_file(struct shared_info *info, Elf64_Ehdr *ehdr,
+ off_t core_size)
+{
+ unsigned long *regs;
+ Elf64_Phdr *phdr;
+ Elf64_Nhdr *nhdr;
+ size_t phdr_size;
+ void *p = ehdr, *note;
+ int ret;
+
+ ret = memcmp(ehdr->e_ident, ELFMAG, SELFMAG);
+ FAIL_IF(ret);
+
+ FAIL_IF(ehdr->e_type != ET_CORE);
+ FAIL_IF(ehdr->e_machine != EM_PPC64);
+ FAIL_IF(ehdr->e_phoff == 0 || ehdr->e_phnum == 0);
+
+ /*
+ * e_phnum is at most 65535 so calculating the size of the
+ * program header cannot overflow.
+ */
+ phdr_size = sizeof(*phdr) * ehdr->e_phnum;
+
+ /* Sanity check the program header table location. */
+ FAIL_IF(ehdr->e_phoff + phdr_size < ehdr->e_phoff);
+ FAIL_IF(ehdr->e_phoff + phdr_size > core_size);
+
+ /* Find the PT_NOTE segment. */
+ for (phdr = p + ehdr->e_phoff;
+ (void *) phdr < p + ehdr->e_phoff + phdr_size;
+ phdr += ehdr->e_phentsize)
+ if (phdr->p_type == PT_NOTE)
+ break;
+
+ FAIL_IF((void *) phdr >= p + ehdr->e_phoff + phdr_size);
+
+ /* Find the NT_PPC_PKEY note. */
+ for (nhdr = p + phdr->p_offset;
+ (void *) nhdr < p + phdr->p_offset + phdr->p_filesz;
+ nhdr = next_note(nhdr))
+ if (nhdr->n_type == NT_PPC_PKEY)
+ break;
+
+ FAIL_IF((void *) nhdr >= p + phdr->p_offset + phdr->p_filesz);
+ FAIL_IF(nhdr->n_descsz == 0);
+
+ p = nhdr;
+ note = p + sizeof(*nhdr) + __ALIGN_KERNEL(nhdr->n_namesz, 4);
+
+ regs = (unsigned long *) note;
+
+ printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+ core_read_running, regs[0], regs[1], regs[2]);
+
+ FAIL_IF(regs[0] != info->amr);
+ FAIL_IF(regs[1] != info->iamr);
+ FAIL_IF(regs[2] != info->uamor);
+
+ return TEST_PASS;
+}
+
+static int parent(struct shared_info *info, pid_t pid)
+{
+ char *filenames, *filename[3];
+ int fd, i, ret, status;
+ unsigned long regs[3];
+ off_t core_size;
+ void *core;
+
+ /*
+ * Get the initial values for AMR, IAMR and UAMOR and communicate them
+ * to the child.
+ */
+ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ info->amr = regs[0];
+ info->iamr = regs[1];
+ info->uamor = regs[2];
+
+ /* Wake up child so that it can set itself up. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait(&status);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ } else if (!WIFSIGNALED(status) || !WCOREDUMP(status)) {
+ printf("Child didn't dump core\n");
+ return TEST_FAIL;
+ }
+
+ /* Construct array of core file names to try. */
+
+ filename[0] = filenames = malloc(PATH_MAX);
+ if (!filenames) {
+ perror("Error allocating memory");
+ return TEST_FAIL;
+ }
+
+ ret = snprintf(filename[0], PATH_MAX, "core-pkey.%d", pid);
+ if (ret < 0 || ret >= PATH_MAX) {
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ filename[1] = filename[0] + ret + 1;
+ ret = snprintf(filename[1], PATH_MAX - ret - 1, "core.%d", pid);
+ if (ret < 0 || ret >= PATH_MAX - ret - 1) {
+ ret = TEST_FAIL;
+ goto out;
+ }
+ filename[2] = "core";
+
+ for (i = 0; i < 3; i++) {
+ core_size = try_core_file(filename[i], info, pid);
+ if (core_size != TEST_FAIL)
+ break;
+ }
+
+ if (i == 3) {
+ printf("Couldn't find core file\n");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ fd = open(filename[i], O_RDONLY);
+ if (fd == -1) {
+ perror("Error opening core file");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (core == (void *) -1) {
+ perror("Error mmaping core file");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ ret = check_core_file(info, core, core_size);
+
+ munmap(core, core_size);
+ close(fd);
+ unlink(filename[i]);
+
+ out:
+ free(filenames);
+
+ return ret;
+}
+
+static int write_core_pattern(const char *core_pattern)
+{
+ size_t len = strlen(core_pattern), ret;
+ FILE *f;
+
+ f = fopen(core_pattern_file, "w");
+ SKIP_IF_MSG(!f, "Try with root privileges");
+
+ ret = fwrite(core_pattern, 1, len, f);
+ fclose(f);
+ if (ret != len) {
+ perror("Error writing to core_pattern file");
+ return TEST_FAIL;
+ }
+
+ return TEST_PASS;
+}
+
+static int setup_core_pattern(char **core_pattern_, bool *changed_)
+{
+ FILE *f;
+ char *core_pattern;
+ int ret;
+
+ core_pattern = malloc(PATH_MAX);
+ if (!core_pattern) {
+ perror("Error allocating memory");
+ return TEST_FAIL;
+ }
+
+ f = fopen(core_pattern_file, "r");
+ if (!f) {
+ perror("Error opening core_pattern file");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ ret = fread(core_pattern, 1, PATH_MAX, f);
+ fclose(f);
+ if (!ret) {
+ perror("Error reading core_pattern file");
+ ret = TEST_FAIL;
+ goto out;
+ }
+
+ /* Check whether we can predict the name of the core file. */
+ if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p"))
+ *changed_ = false;
+ else {
+ ret = write_core_pattern("core-pkey.%p");
+ if (ret)
+ goto out;
+
+ *changed_ = true;
+ }
+
+ *core_pattern_ = core_pattern;
+ ret = TEST_PASS;
+
+ out:
+ if (ret)
+ free(core_pattern);
+
+ return ret;
+}
+
+static int core_pkey(void)
+{
+ char *core_pattern;
+ bool changed_core_pattern;
+ struct shared_info *info;
+ int shm_id;
+ int ret;
+ pid_t pid;
+
+ ret = setup_core_pattern(&core_pattern, &changed_core_pattern);
+ if (ret)
+ return ret;
+
+ shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
+ info = shmat(shm_id, NULL, 0);
+
+ ret = init_child_sync(&info->child_sync);
+ if (ret)
+ return ret;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ ret = TEST_FAIL;
+ } else if (pid == 0)
+ ret = child(info);
+ else
+ ret = parent(info, pid);
+
+ shmdt(info);
+
+ if (pid) {
+ destroy_child_sync(&info->child_sync);
+ shmctl(shm_id, IPC_RMID, NULL);
+
+ if (changed_core_pattern)
+ write_core_pattern(core_pattern);
+ }
+
+ free(core_pattern);
+
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(core_pkey, "core_pkey");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
new file mode 100644
index 000000000..60df0b5e6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -0,0 +1,195 @@
+/*
+ * perf events self profiling example test case for hw breakpoints.
+ *
+ * This tests perf PERF_TYPE_BREAKPOINT parameters
+ * 1) tests all variants of the break on read/write flags
+ * 2) tests exclude_user == 0 and 1
+ * 3) test array matches (if DAWR is supported))
+ * 4) test different numbers of breakpoints matches
+ *
+ * Configure this breakpoint, then read and write the data a number of
+ * times. Then check the output count from perf is as expected.
+ *
+ * Based on:
+ * http://ozlabs.org/~anton/junkcode/perf_events_example1.c
+ *
+ * Copyright (C) 2018 Michael Neuling, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <unistd.h>
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <elf.h>
+#include <pthread.h>
+#include <sys/syscall.h>
+#include <linux/perf_event.h>
+#include <linux/hw_breakpoint.h>
+#include "utils.h"
+
+#define MAX_LOOPS 10000
+
+#define DAWR_LENGTH_MAX ((0x3f + 1) * 8)
+
+static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
+ int cpu, int group_fd,
+ unsigned long flags)
+{
+ attr->size = sizeof(*attr);
+ return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static inline bool breakpoint_test(int len)
+{
+ struct perf_event_attr attr;
+ int fd;
+
+ /* setup counters */
+ memset(&attr, 0, sizeof(attr));
+ attr.disabled = 1;
+ attr.type = PERF_TYPE_BREAKPOINT;
+ attr.bp_type = HW_BREAKPOINT_R;
+ /* bp_addr can point anywhere but needs to be aligned */
+ attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800;
+ attr.bp_len = len;
+ fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (fd < 0)
+ return false;
+ close(fd);
+ return true;
+}
+
+static inline bool perf_breakpoint_supported(void)
+{
+ return breakpoint_test(4);
+}
+
+static inline bool dawr_supported(void)
+{
+ return breakpoint_test(DAWR_LENGTH_MAX);
+}
+
+static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
+{
+ int i,j;
+ struct perf_event_attr attr;
+ size_t res;
+ unsigned long long breaks, needed;
+ int readint;
+ int readintarraybig[2*DAWR_LENGTH_MAX/sizeof(int)];
+ int *readintalign;
+ volatile int *ptr;
+ int break_fd;
+ int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */
+ volatile int *k;
+
+ /* align to 0x400 boundary as required by DAWR */
+ readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) &
+ 0xfffffffffffff800);
+
+ ptr = &readint;
+ if (arraytest)
+ ptr = &readintalign[0];
+
+ /* setup counters */
+ memset(&attr, 0, sizeof(attr));
+ attr.disabled = 1;
+ attr.type = PERF_TYPE_BREAKPOINT;
+ attr.bp_type = readwriteflag;
+ attr.bp_addr = (__u64)ptr;
+ attr.bp_len = sizeof(int);
+ if (arraytest)
+ attr.bp_len = DAWR_LENGTH_MAX;
+ attr.exclude_user = exclude_user;
+ break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (break_fd < 0) {
+ perror("sys_perf_event_open");
+ exit(1);
+ }
+
+ /* start counters */
+ ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+
+ /* Test a bunch of reads and writes */
+ k = &readint;
+ for (i = 0; i < loop_num; i++) {
+ if (arraytest)
+ k = &(readintalign[i % (DAWR_LENGTH_MAX/sizeof(int))]);
+
+ j = *k;
+ *k = j;
+ }
+
+ /* stop counters */
+ ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+
+ /* read and check counters */
+ res = read(break_fd, &breaks, sizeof(unsigned long long));
+ assert(res == sizeof(unsigned long long));
+ /* we read and write each loop, so subtract the ones we are counting */
+ needed = 0;
+ if (readwriteflag & HW_BREAKPOINT_R)
+ needed += loop_num;
+ if (readwriteflag & HW_BREAKPOINT_W)
+ needed += loop_num;
+ needed = needed * (1 - exclude_user);
+ printf("TESTED: addr:0x%lx brks:% 8lld loops:% 8i rw:%i !user:%i array:%i\n",
+ (unsigned long int)ptr, breaks, loop_num, readwriteflag, exclude_user, arraytest);
+ if (breaks != needed) {
+ printf("FAILED: 0x%lx brks:%lld needed:%lli %i %i %i\n\n",
+ (unsigned long int)ptr, breaks, needed, loop_num, readwriteflag, exclude_user);
+ return 1;
+ }
+ close(break_fd);
+
+ return 0;
+}
+
+static int runtest(void)
+{
+ int rwflag;
+ int exclude_user;
+ int ret;
+
+ /*
+ * perf defines rwflag as two bits read and write and at least
+ * one must be set. So range 1-3.
+ */
+ for (rwflag = 1 ; rwflag < 4; rwflag++) {
+ for (exclude_user = 0 ; exclude_user < 2; exclude_user++) {
+ ret = runtestsingle(rwflag, exclude_user, 0);
+ if (ret)
+ return ret;
+
+ /* if we have the dawr, we can do an array test */
+ if (!dawr_supported())
+ continue;
+ ret = runtestsingle(rwflag, exclude_user, 1);
+ if (ret)
+ return ret;
+ }
+ }
+ return 0;
+}
+
+
+static int perf_hwbreak(void)
+{
+ srand ( time(NULL) );
+
+ SKIP_IF(!perf_breakpoint_supported());
+
+ return runtest();
+}
+
+int main(int argc, char *argv[], char **envp)
+{
+ return test_harness(perf_hwbreak, "perf_hwbreak");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c
new file mode 100644
index 000000000..ca29fafee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c
@@ -0,0 +1,123 @@
+/*
+ * Ptrace test for GPR/FPR registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-gpr.h"
+#include "reg.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+int *cptr, *pptr;
+
+float a = FPR_1;
+float b = FPR_2;
+float c = FPR_3;
+
+void gpr(void)
+{
+ unsigned long gpr_buf[18];
+ float fpr_buf[32];
+
+ cptr = (int *)shmat(shm_id, NULL, 0);
+
+ asm __volatile__(
+ ASM_LOAD_GPR_IMMED(gpr_1)
+ ASM_LOAD_FPR_SINGLE_PRECISION(flt_1)
+ :
+ : [gpr_1]"i"(GPR_1), [flt_1] "b" (&a)
+ : "memory", "r6", "r7", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15", "r16", "r17",
+ "r18", "r19", "r20", "r21", "r22", "r23", "r24",
+ "r25", "r26", "r27", "r28", "r29", "r30", "r31"
+ );
+
+ cptr[1] = 1;
+
+ while (!cptr[0])
+ asm volatile("" : : : "memory");
+
+ shmdt((void *)cptr);
+ store_gpr(gpr_buf);
+ store_fpr_single_precision(fpr_buf);
+
+ if (validate_gpr(gpr_buf, GPR_3))
+ exit(1);
+
+ if (validate_fpr_float(fpr_buf, c))
+ exit(1);
+
+ exit(0);
+}
+
+int trace_gpr(pid_t child)
+{
+ unsigned long gpr[18];
+ unsigned long fpr[32];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_gpr(child, gpr));
+ FAIL_IF(validate_gpr(gpr, GPR_1));
+ FAIL_IF(show_fpr(child, fpr));
+ FAIL_IF(validate_fpr(fpr, FPR_1_REP));
+ FAIL_IF(write_gpr(child, GPR_3));
+ FAIL_IF(write_fpr(child, FPR_3_REP));
+ FAIL_IF(stop_trace(child));
+
+ return TEST_PASS;
+}
+
+int ptrace_gpr(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+ if (pid == 0)
+ gpr();
+
+ if (pid) {
+ pptr = (int *)shmat(shm_id, NULL, 0);
+ while (!pptr[1])
+ asm volatile("" : : : "memory");
+
+ ret = trace_gpr(pid);
+ if (ret) {
+ kill(pid, SIGTERM);
+ shmdt((void *)pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ pptr[0] = 1;
+ shmdt((void *)pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_gpr, "ptrace_gpr");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h
new file mode 100644
index 000000000..e30fef638
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define GPR_1 1
+#define GPR_2 2
+#define GPR_3 3
+#define GPR_4 4
+
+#define FPR_1 0.001
+#define FPR_2 0.002
+#define FPR_3 0.003
+#define FPR_4 0.004
+
+#define FPR_1_REP 0x3f50624de0000000
+#define FPR_2_REP 0x3f60624de0000000
+#define FPR_3_REP 0x3f689374c0000000
+#define FPR_4_REP 0x3f70624de0000000
+
+/* Buffer must have 18 elements */
+int validate_gpr(unsigned long *gpr, unsigned long val)
+{
+ int i, found = 1;
+
+ for (i = 0; i < 18; i++) {
+ if (gpr[i] != val) {
+ printf("GPR[%d]: %lx Expected: %lx\n",
+ i+14, gpr[i], val);
+ found = 0;
+ }
+ }
+
+ if (!found)
+ return TEST_FAIL;
+ return TEST_PASS;
+}
+
+/* Buffer must have 32 elements */
+int validate_fpr(unsigned long *fpr, unsigned long val)
+{
+ int i, found = 1;
+
+ for (i = 0; i < 32; i++) {
+ if (fpr[i] != val) {
+ printf("FPR[%d]: %lx Expected: %lx\n", i, fpr[i], val);
+ found = 0;
+ }
+ }
+
+ if (!found)
+ return TEST_FAIL;
+ return TEST_PASS;
+}
+
+/* Buffer must have 32 elements */
+int validate_fpr_float(float *fpr, float val)
+{
+ int i, found = 1;
+
+ for (i = 0; i < 32; i++) {
+ if (fpr[i] != val) {
+ printf("FPR[%d]: %f Expected: %f\n", i, fpr[i], val);
+ found = 0;
+ }
+ }
+
+ if (!found)
+ return TEST_FAIL;
+ return TEST_PASS;
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
new file mode 100644
index 000000000..3066d310f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Ptrace test for hw breakpoints
+ *
+ * Based on tools/testing/selftests/breakpoints/breakpoint_test.c
+ *
+ * This test forks and the parent then traces the child doing various
+ * types of ptrace enabled breakpoints
+ *
+ * Copyright (C) 2018 Michael Neuling, IBM Corporation.
+ */
+
+#include <sys/ptrace.h>
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/user.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "ptrace.h"
+
+/* Breakpoint access modes */
+enum {
+ BP_X = 1,
+ BP_RW = 2,
+ BP_W = 4,
+};
+
+static pid_t child_pid;
+static struct ppc_debug_info dbginfo;
+
+static void get_dbginfo(void)
+{
+ int ret;
+
+ ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
+ if (ret) {
+ perror("Can't get breakpoint info\n");
+ exit(-1);
+ }
+}
+
+static bool hwbreak_present(void)
+{
+ return (dbginfo.num_data_bps != 0);
+}
+
+static bool dawr_present(void)
+{
+ return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
+}
+
+static void set_breakpoint_addr(void *addr)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr);
+ if (ret) {
+ perror("Can't set breakpoint addr\n");
+ exit(-1);
+ }
+}
+
+static int set_hwbreakpoint_addr(void *addr, int range)
+{
+ int ret;
+
+ struct ppc_hw_breakpoint info;
+
+ info.version = 1;
+ info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
+ info.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+ if (range > 0)
+ info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+ info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ info.addr = (__u64)addr;
+ info.addr2 = (__u64)addr + range;
+ info.condition_value = 0;
+
+ ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info);
+ if (ret < 0) {
+ perror("Can't set breakpoint\n");
+ exit(-1);
+ }
+ return ret;
+}
+
+static int del_hwbreakpoint_addr(int watchpoint_handle)
+{
+ int ret;
+
+ ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle);
+ if (ret < 0) {
+ perror("Can't delete hw breakpoint\n");
+ exit(-1);
+ }
+ return ret;
+}
+
+#define DAWR_LENGTH_MAX 512
+
+/* Dummy variables to test read/write accesses */
+static unsigned long long
+ dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)]
+ __attribute__((aligned(512)));
+static unsigned long long *dummy_var = dummy_array;
+
+static void write_var(int len)
+{
+ long long *plval;
+ char *pcval;
+ short *psval;
+ int *pival;
+
+ switch (len) {
+ case 1:
+ pcval = (char *)dummy_var;
+ *pcval = 0xff;
+ break;
+ case 2:
+ psval = (short *)dummy_var;
+ *psval = 0xffff;
+ break;
+ case 4:
+ pival = (int *)dummy_var;
+ *pival = 0xffffffff;
+ break;
+ case 8:
+ plval = (long long *)dummy_var;
+ *plval = 0xffffffffffffffffLL;
+ break;
+ }
+}
+
+static void read_var(int len)
+{
+ char cval __attribute__((unused));
+ short sval __attribute__((unused));
+ int ival __attribute__((unused));
+ long long lval __attribute__((unused));
+
+ switch (len) {
+ case 1:
+ cval = *(char *)dummy_var;
+ break;
+ case 2:
+ sval = *(short *)dummy_var;
+ break;
+ case 4:
+ ival = *(int *)dummy_var;
+ break;
+ case 8:
+ lval = *(long long *)dummy_var;
+ break;
+ }
+}
+
+/*
+ * Do the r/w accesses to trigger the breakpoints. And run
+ * the usual traps.
+ */
+static void trigger_tests(void)
+{
+ int len, ret;
+
+ ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+ if (ret) {
+ perror("Can't be traced?\n");
+ return;
+ }
+
+ /* Wake up father so that it sets up the first test */
+ kill(getpid(), SIGUSR1);
+
+ /* Test write watchpoints */
+ for (len = 1; len <= sizeof(long); len <<= 1)
+ write_var(len);
+
+ /* Test read/write watchpoints (on read accesses) */
+ for (len = 1; len <= sizeof(long); len <<= 1)
+ read_var(len);
+
+ /* Test when breakpoint is unset */
+
+ /* Test write watchpoints */
+ for (len = 1; len <= sizeof(long); len <<= 1)
+ write_var(len);
+
+ /* Test read/write watchpoints (on read accesses) */
+ for (len = 1; len <= sizeof(long); len <<= 1)
+ read_var(len);
+}
+
+static void check_success(const char *msg)
+{
+ const char *msg2;
+ int status;
+
+ /* Wait for the child to SIGTRAP */
+ wait(&status);
+
+ msg2 = "Failed";
+
+ if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
+ msg2 = "Child process hit the breakpoint";
+ }
+
+ printf("%s Result: [%s]\n", msg, msg2);
+}
+
+static void launch_watchpoints(char *buf, int mode, int len,
+ struct ppc_debug_info *dbginfo, bool dawr)
+{
+ const char *mode_str;
+ unsigned long data = (unsigned long)(dummy_var);
+ int wh, range;
+
+ data &= ~0x7UL;
+
+ if (mode == BP_W) {
+ data |= (1UL << 1);
+ mode_str = "write";
+ } else {
+ data |= (1UL << 0);
+ data |= (1UL << 1);
+ mode_str = "read";
+ }
+
+ /* Set DABR_TRANSLATION bit */
+ data |= (1UL << 2);
+
+ /* use PTRACE_SET_DEBUGREG breakpoints */
+ set_breakpoint_addr((void *)data);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+ check_success(buf);
+ /* Unregister hw brkpoint */
+ set_breakpoint_addr(NULL);
+
+ data = (data & ~7); /* remove dabr control bits */
+
+ /* use PPC_PTRACE_SETHWDEBUG breakpoint */
+ if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
+ return; /* not supported */
+ wh = set_hwbreakpoint_addr((void *)data, 0);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+ check_success(buf);
+ /* Unregister hw brkpoint */
+ del_hwbreakpoint_addr(wh);
+
+ /* try a wider range */
+ range = 8;
+ if (dawr)
+ range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1));
+ wh = set_hwbreakpoint_addr((void *)data, range);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
+ check_success(buf);
+ /* Unregister hw brkpoint */
+ del_hwbreakpoint_addr(wh);
+}
+
+/* Set the breakpoints and check the child successfully trigger them */
+static int launch_tests(bool dawr)
+{
+ char buf[1024];
+ int len, i, status;
+
+ struct ppc_debug_info dbginfo;
+
+ i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
+ if (i) {
+ perror("Can't set breakpoint info\n");
+ exit(-1);
+ }
+ if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
+ printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n");
+
+ /* Write watchpoint */
+ for (len = 1; len <= sizeof(long); len <<= 1)
+ launch_watchpoints(buf, BP_W, len, &dbginfo, dawr);
+
+ /* Read-Write watchpoint */
+ for (len = 1; len <= sizeof(long); len <<= 1)
+ launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr);
+
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+
+ /*
+ * Now we have unregistered the breakpoint, access by child
+ * should not cause SIGTRAP.
+ */
+
+ wait(&status);
+
+ if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
+ printf("FAIL: Child process hit the breakpoint, which is not expected\n");
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ return TEST_FAIL;
+ }
+
+ if (WIFEXITED(status))
+ printf("Child exited normally\n");
+
+ return TEST_PASS;
+}
+
+static int ptrace_hwbreak(void)
+{
+ pid_t pid;
+ int ret;
+ bool dawr;
+
+ pid = fork();
+ if (!pid) {
+ trigger_tests();
+ return 0;
+ }
+
+ wait(NULL);
+
+ child_pid = pid;
+
+ get_dbginfo();
+ SKIP_IF(!hwbreak_present());
+ dawr = dawr_present();
+
+ ret = launch_tests(dawr);
+
+ wait(NULL);
+
+ return ret;
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ return test_harness(ptrace_hwbreak, "ptrace-hwbreak");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
new file mode 100644
index 000000000..3694613f4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Ptrace test for Memory Protection Key registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ * Copyright (C) 2018 IBM Corporation.
+ */
+#include "ptrace.h"
+#include "child.h"
+
+#ifndef __NR_pkey_alloc
+#define __NR_pkey_alloc 384
+#endif
+
+#ifndef __NR_pkey_free
+#define __NR_pkey_free 385
+#endif
+
+#ifndef NT_PPC_PKEY
+#define NT_PPC_PKEY 0x110
+#endif
+
+#ifndef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE 0x4
+#endif
+
+#define AMR_BITS_PER_PKEY 2
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+
+static const char user_read[] = "[User Read (Running)]";
+static const char user_write[] = "[User Write (Running)]";
+static const char ptrace_read_running[] = "[Ptrace Read (Running)]";
+static const char ptrace_write_running[] = "[Ptrace Write (Running)]";
+
+/* Information shared between the parent and the child. */
+struct shared_info {
+ struct child_sync child_sync;
+
+ /* AMR value the parent expects to read from the child. */
+ unsigned long amr1;
+
+ /* AMR value the parent is expected to write to the child. */
+ unsigned long amr2;
+
+ /* AMR value that ptrace should refuse to write to the child. */
+ unsigned long invalid_amr;
+
+ /* IAMR value the parent expects to read from the child. */
+ unsigned long expected_iamr;
+
+ /* UAMOR value the parent expects to read from the child. */
+ unsigned long expected_uamor;
+
+ /*
+ * IAMR and UAMOR values that ptrace should refuse to write to the child
+ * (even though they're valid ones) because userspace doesn't have
+ * access to those registers.
+ */
+ unsigned long invalid_iamr;
+ unsigned long invalid_uamor;
+};
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
+{
+ return syscall(__NR_pkey_alloc, flags, init_access_rights);
+}
+
+static int child(struct shared_info *info)
+{
+ unsigned long reg;
+ bool disable_execute = true;
+ int pkey1, pkey2, pkey3;
+ int ret;
+
+ /* Wait until parent fills out the initial register values. */
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ /* Get some pkeys so that we can change their bits in the AMR. */
+ pkey1 = sys_pkey_alloc(0, PKEY_DISABLE_EXECUTE);
+ if (pkey1 < 0) {
+ pkey1 = sys_pkey_alloc(0, 0);
+ CHILD_FAIL_IF(pkey1 < 0, &info->child_sync);
+
+ disable_execute = false;
+ }
+
+ pkey2 = sys_pkey_alloc(0, 0);
+ CHILD_FAIL_IF(pkey2 < 0, &info->child_sync);
+
+ pkey3 = sys_pkey_alloc(0, 0);
+ CHILD_FAIL_IF(pkey3 < 0, &info->child_sync);
+
+ info->amr1 |= 3ul << pkeyshift(pkey1);
+ info->amr2 |= 3ul << pkeyshift(pkey2);
+ /*
+ * invalid amr value where we try to force write
+ * things which are deined by a uamor setting.
+ */
+ info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor);
+
+ /*
+ * if PKEY_DISABLE_EXECUTE succeeded we should update the expected_iamr
+ */
+ if (disable_execute)
+ info->expected_iamr |= 1ul << pkeyshift(pkey1);
+ else
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey1));
+
+ /*
+ * We allocated pkey2 and pkey 3 above. Clear the IAMR bits.
+ */
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey2));
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey3));
+
+ /*
+ * Create an IAMR value different from expected value.
+ * Kernel will reject an IAMR and UAMOR change.
+ */
+ info->invalid_iamr = info->expected_iamr | (1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2));
+ info->invalid_uamor = info->expected_uamor & ~(0x3ul << pkeyshift(pkey1));
+
+ printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
+ user_write, info->amr1, pkey1, pkey2, pkey3);
+
+ mtspr(SPRN_AMR, info->amr1);
+
+ /* Wait for parent to read our AMR value and write a new one. */
+ ret = prod_parent(&info->child_sync);
+ CHILD_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ reg = mfspr(SPRN_AMR);
+
+ printf("%-30s AMR: %016lx\n", user_read, reg);
+
+ CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+ /*
+ * Wait for parent to try to write an invalid AMR value.
+ */
+ ret = prod_parent(&info->child_sync);
+ CHILD_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ reg = mfspr(SPRN_AMR);
+
+ printf("%-30s AMR: %016lx\n", user_read, reg);
+
+ CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+ /*
+ * Wait for parent to try to write an IAMR and a UAMOR value. We can't
+ * verify them, but we can verify that the AMR didn't change.
+ */
+ ret = prod_parent(&info->child_sync);
+ CHILD_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_parent(&info->child_sync);
+ if (ret)
+ return ret;
+
+ reg = mfspr(SPRN_AMR);
+
+ printf("%-30s AMR: %016lx\n", user_read, reg);
+
+ CHILD_FAIL_IF(reg != info->amr2, &info->child_sync);
+
+ /* Now let parent now that we are finished. */
+
+ ret = prod_parent(&info->child_sync);
+ CHILD_FAIL_IF(ret, &info->child_sync);
+
+ return TEST_PASS;
+}
+
+static int parent(struct shared_info *info, pid_t pid)
+{
+ unsigned long regs[3];
+ int ret, status;
+
+ /*
+ * Get the initial values for AMR, IAMR and UAMOR and communicate them
+ * to the child.
+ */
+ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ info->amr1 = info->amr2 = regs[0];
+ info->expected_iamr = regs[1];
+ info->expected_uamor = regs[2];
+
+ /* Wake up child so that it can set itself up. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_child(&info->child_sync);
+ if (ret)
+ return ret;
+
+ /* Verify that we can read the pkey registers from the child. */
+ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+ ptrace_read_running, regs[0], regs[1], regs[2]);
+
+ PARENT_FAIL_IF(regs[0] != info->amr1, &info->child_sync);
+ PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
+ PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
+
+ /* Write valid AMR value in child. */
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr2, 1);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr2);
+
+ /* Wake up child so that it can verify it changed. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_child(&info->child_sync);
+ if (ret)
+ return ret;
+
+ /* Write invalid AMR value in child. */
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->invalid_amr, 1);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx\n", ptrace_write_running, info->invalid_amr);
+
+ /* Wake up child so that it can verify it didn't change. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait_child(&info->child_sync);
+ if (ret)
+ return ret;
+
+ /* Try to write to IAMR. */
+ regs[0] = info->amr1;
+ regs[1] = info->invalid_iamr;
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2);
+ PARENT_FAIL_IF(!ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx IAMR: %016lx\n",
+ ptrace_write_running, regs[0], regs[1]);
+
+ /* Try to write to IAMR and UAMOR. */
+ regs[2] = info->invalid_uamor;
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_FAIL_IF(!ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+ ptrace_write_running, regs[0], regs[1], regs[2]);
+
+ /* Verify that all registers still have their expected values. */
+ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ printf("%-30s AMR: %016lx IAMR: %016lx UAMOR: %016lx\n",
+ ptrace_read_running, regs[0], regs[1], regs[2]);
+
+ PARENT_FAIL_IF(regs[0] != info->amr2, &info->child_sync);
+ PARENT_FAIL_IF(regs[1] != info->expected_iamr, &info->child_sync);
+ PARENT_FAIL_IF(regs[2] != info->expected_uamor, &info->child_sync);
+
+ /* Wake up child so that it can verify AMR didn't change and wrap up. */
+ ret = prod_child(&info->child_sync);
+ PARENT_FAIL_IF(ret, &info->child_sync);
+
+ ret = wait(&status);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ ret = TEST_PASS;
+ } else if (!WIFEXITED(status)) {
+ printf("Child exited abnormally\n");
+ ret = TEST_FAIL;
+ } else
+ ret = WEXITSTATUS(status) ? TEST_FAIL : TEST_PASS;
+
+ return ret;
+}
+
+static int ptrace_pkey(void)
+{
+ struct shared_info *info;
+ int shm_id;
+ int ret;
+ pid_t pid;
+
+ shm_id = shmget(IPC_PRIVATE, sizeof(*info), 0777 | IPC_CREAT);
+ info = shmat(shm_id, NULL, 0);
+
+ ret = init_child_sync(&info->child_sync);
+ if (ret)
+ return ret;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ ret = TEST_FAIL;
+ } else if (pid == 0)
+ ret = child(info);
+ else
+ ret = parent(info, pid);
+
+ shmdt(info);
+
+ if (pid) {
+ destroy_child_sync(&info->child_sync);
+ shmctl(shm_id, IPC_RMID, NULL);
+ }
+
+ return ret;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_pkey, "ptrace_pkey");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
new file mode 100644
index 000000000..f9b5069db
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
@@ -0,0 +1,135 @@
+/*
+ * Ptrace test for TAR, PPR, DSCR registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-tar.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+int *cptr;
+int *pptr;
+
+void tar(void)
+{
+ unsigned long reg[3];
+ int ret;
+
+ cptr = (int *)shmat(shm_id, NULL, 0);
+ printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
+ user_write, TAR_1, PPR_1, DSCR_1);
+
+ mtspr(SPRN_TAR, TAR_1);
+ mtspr(SPRN_PPR, PPR_1);
+ mtspr(SPRN_DSCR, DSCR_1);
+
+ cptr[2] = 1;
+
+ /* Wait on parent */
+ while (!cptr[0])
+ asm volatile("" : : : "memory");
+
+ reg[0] = mfspr(SPRN_TAR);
+ reg[1] = mfspr(SPRN_PPR);
+ reg[2] = mfspr(SPRN_DSCR);
+
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ user_read, reg[0], reg[1], reg[2]);
+
+ /* Unblock the parent now */
+ cptr[1] = 1;
+ shmdt((int *)cptr);
+
+ ret = validate_tar_registers(reg, TAR_2, PPR_2, DSCR_2);
+ if (ret)
+ exit(1);
+ exit(0);
+}
+
+int trace_tar(pid_t child)
+{
+ unsigned long reg[3];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_tar_registers(child, reg));
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ ptrace_read_running, reg[0], reg[1], reg[2]);
+
+ FAIL_IF(validate_tar_registers(reg, TAR_1, PPR_1, DSCR_1));
+ FAIL_IF(stop_trace(child));
+ return TEST_PASS;
+}
+
+int trace_tar_write(pid_t child)
+{
+ FAIL_IF(start_trace(child));
+ FAIL_IF(write_tar_registers(child, TAR_2, PPR_2, DSCR_2));
+ printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
+ ptrace_write_running, TAR_2, PPR_2, DSCR_2);
+
+ FAIL_IF(stop_trace(child));
+ return TEST_PASS;
+}
+
+int ptrace_tar(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+
+ if (pid == 0)
+ tar();
+
+ if (pid) {
+ pptr = (int *)shmat(shm_id, NULL, 0);
+ pptr[0] = 0;
+ pptr[1] = 0;
+
+ while (!pptr[2])
+ asm volatile("" : : : "memory");
+ ret = trace_tar(pid);
+ if (ret)
+ return ret;
+
+ ret = trace_tar_write(pid);
+ if (ret)
+ return ret;
+
+ /* Unblock the child now */
+ pptr[0] = 1;
+
+ /* Wait on child */
+ while (!pptr[1])
+ asm volatile("" : : : "memory");
+
+ shmdt((int *)pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_PASS;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tar, "ptrace_tar");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.h b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.h
new file mode 100644
index 000000000..aed0aac71
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define TAR_1 10
+#define TAR_2 20
+#define TAR_3 30
+#define TAR_4 40
+#define TAR_5 50
+
+#define DSCR_1 100
+#define DSCR_2 200
+#define DSCR_3 300
+#define DSCR_4 400
+#define DSCR_5 500
+
+#define PPR_1 0x4000000000000 /* or 31,31,31*/
+#define PPR_2 0x8000000000000 /* or 1,1,1 */
+#define PPR_3 0xc000000000000 /* or 6,6,6 */
+#define PPR_4 0x10000000000000 /* or 2,2,2 */
+
+char *user_read = "[User Read (Running)]";
+char *user_write = "[User Write (Running)]";
+char *ptrace_read_running = "[Ptrace Read (Running)]";
+char *ptrace_write_running = "[Ptrace Write (Running)]";
+char *ptrace_read_ckpt = "[Ptrace Read (Checkpointed)]";
+char *ptrace_write_ckpt = "[Ptrace Write (Checkpointed)]";
+
+int validate_tar_registers(unsigned long *reg, unsigned long tar,
+ unsigned long ppr, unsigned long dscr)
+{
+ int match = 1;
+
+ if (reg[0] != tar)
+ match = 0;
+
+ if (reg[1] != ppr)
+ match = 0;
+
+ if (reg[2] != dscr)
+ match = 0;
+
+ if (!match)
+ return TEST_FAIL;
+ return TEST_PASS;
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
new file mode 100644
index 000000000..a08a91594
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
@@ -0,0 +1,158 @@
+/*
+ * Ptrace test for GPR/FPR registers in TM context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-gpr.h"
+#include "tm.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+unsigned long *cptr, *pptr;
+
+float a = FPR_1;
+float b = FPR_2;
+float c = FPR_3;
+
+void tm_gpr(void)
+{
+ unsigned long gpr_buf[18];
+ unsigned long result, texasr;
+ float fpr_buf[32];
+
+ printf("Starting the child\n");
+ cptr = (unsigned long *)shmat(shm_id, NULL, 0);
+
+trans:
+ cptr[1] = 0;
+ asm __volatile__(
+ ASM_LOAD_GPR_IMMED(gpr_1)
+ ASM_LOAD_FPR_SINGLE_PRECISION(flt_1)
+ "1: ;"
+ "tbegin.;"
+ "beq 2f;"
+ ASM_LOAD_GPR_IMMED(gpr_2)
+ ASM_LOAD_FPR_SINGLE_PRECISION(flt_2)
+ "tsuspend.;"
+ "li 7, 1;"
+ "stw 7, 0(%[cptr1]);"
+ "tresume.;"
+ "b .;"
+
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ /* Transaction abort handler */
+ "2: ;"
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+ : [res] "=r" (result), [texasr] "=r" (texasr)
+ : [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2),
+ [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
+ [flt_2] "b" (&b), [cptr1] "b" (&cptr[1])
+ : "memory", "r7", "r8", "r9", "r10",
+ "r11", "r12", "r13", "r14", "r15", "r16",
+ "r17", "r18", "r19", "r20", "r21", "r22",
+ "r23", "r24", "r25", "r26", "r27", "r28",
+ "r29", "r30", "r31"
+ );
+
+ if (result) {
+ if (!cptr[0])
+ goto trans;
+
+ shmdt((void *)cptr);
+ store_gpr(gpr_buf);
+ store_fpr_single_precision(fpr_buf);
+
+ if (validate_gpr(gpr_buf, GPR_3))
+ exit(1);
+
+ if (validate_fpr_float(fpr_buf, c))
+ exit(1);
+
+ exit(0);
+ }
+ shmdt((void *)cptr);
+ exit(1);
+}
+
+int trace_tm_gpr(pid_t child)
+{
+ unsigned long gpr[18];
+ unsigned long fpr[32];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_gpr(child, gpr));
+ FAIL_IF(validate_gpr(gpr, GPR_2));
+ FAIL_IF(show_fpr(child, fpr));
+ FAIL_IF(validate_fpr(fpr, FPR_2_REP));
+ FAIL_IF(show_ckpt_fpr(child, fpr));
+ FAIL_IF(validate_fpr(fpr, FPR_1_REP));
+ FAIL_IF(show_ckpt_gpr(child, gpr));
+ FAIL_IF(validate_gpr(gpr, GPR_1));
+ FAIL_IF(write_ckpt_gpr(child, GPR_3));
+ FAIL_IF(write_ckpt_fpr(child, FPR_3_REP));
+
+ pptr[0] = 1;
+ FAIL_IF(stop_trace(child));
+
+ return TEST_PASS;
+}
+
+int ptrace_tm_gpr(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+ if (pid == 0)
+ tm_gpr();
+
+ if (pid) {
+ pptr = (unsigned long *)shmat(shm_id, NULL, 0);
+
+ while (!pptr[1])
+ asm volatile("" : : : "memory");
+ ret = trace_tm_gpr(pid);
+ if (ret) {
+ kill(pid, SIGTERM);
+ return TEST_FAIL;
+ }
+
+ shmdt((void *)pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_gpr, "ptrace_tm_gpr");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
new file mode 100644
index 000000000..dbdffa2e2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -0,0 +1,169 @@
+/*
+ * Ptrace test for GPR/FPR registers in TM Suspend context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-gpr.h"
+#include "tm.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+int *cptr, *pptr;
+
+float a = FPR_1;
+float b = FPR_2;
+float c = FPR_3;
+float d = FPR_4;
+
+__attribute__((used)) void wait_parent(void)
+{
+ cptr[2] = 1;
+ while (!cptr[1])
+ asm volatile("" : : : "memory");
+}
+
+void tm_spd_gpr(void)
+{
+ unsigned long gpr_buf[18];
+ unsigned long result, texasr;
+ float fpr_buf[32];
+
+ cptr = (int *)shmat(shm_id, NULL, 0);
+
+trans:
+ cptr[2] = 0;
+ asm __volatile__(
+ ASM_LOAD_GPR_IMMED(gpr_1)
+ ASM_LOAD_FPR_SINGLE_PRECISION(flt_1)
+
+ "1: ;"
+ "tbegin.;"
+ "beq 2f;"
+
+ ASM_LOAD_GPR_IMMED(gpr_2)
+ "tsuspend.;"
+ ASM_LOAD_GPR_IMMED(gpr_4)
+ ASM_LOAD_FPR_SINGLE_PRECISION(flt_4)
+
+ "bl wait_parent;"
+ "tresume.;"
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ /* Transaction abort handler */
+ "2: ;"
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+ : [res] "=r" (result), [texasr] "=r" (texasr)
+ : [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4),
+ [sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
+ [flt_4] "b" (&d)
+ : "memory", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
+ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
+ "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
+ );
+
+ if (result) {
+ if (!cptr[0])
+ goto trans;
+
+ shmdt((void *)cptr);
+ store_gpr(gpr_buf);
+ store_fpr_single_precision(fpr_buf);
+
+ if (validate_gpr(gpr_buf, GPR_3))
+ exit(1);
+
+ if (validate_fpr_float(fpr_buf, c))
+ exit(1);
+ exit(0);
+ }
+ shmdt((void *)cptr);
+ exit(1);
+}
+
+int trace_tm_spd_gpr(pid_t child)
+{
+ unsigned long gpr[18];
+ unsigned long fpr[32];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_gpr(child, gpr));
+ FAIL_IF(validate_gpr(gpr, GPR_4));
+ FAIL_IF(show_fpr(child, fpr));
+ FAIL_IF(validate_fpr(fpr, FPR_4_REP));
+ FAIL_IF(show_ckpt_fpr(child, fpr));
+ FAIL_IF(validate_fpr(fpr, FPR_1_REP));
+ FAIL_IF(show_ckpt_gpr(child, gpr));
+ FAIL_IF(validate_gpr(gpr, GPR_1));
+ FAIL_IF(write_ckpt_gpr(child, GPR_3));
+ FAIL_IF(write_ckpt_fpr(child, FPR_3_REP));
+
+ pptr[0] = 1;
+ pptr[1] = 1;
+ FAIL_IF(stop_trace(child));
+ return TEST_PASS;
+}
+
+int ptrace_tm_spd_gpr(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+
+ if (pid == 0)
+ tm_spd_gpr();
+
+ if (pid) {
+ pptr = (int *)shmat(shm_id, NULL, 0);
+ pptr[0] = 0;
+ pptr[1] = 0;
+
+ while (!pptr[2])
+ asm volatile("" : : : "memory");
+ ret = trace_tm_spd_gpr(pid);
+ if (ret) {
+ kill(pid, SIGTERM);
+ shmdt((void *)pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ shmdt((void *)pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_spd_gpr, "ptrace_tm_spd_gpr");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
new file mode 100644
index 000000000..f47174746
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
@@ -0,0 +1,174 @@
+/*
+ * Ptrace test for TAR, PPR, DSCR registers in the TM Suspend context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+#include "ptrace-tar.h"
+
+int shm_id;
+int *cptr, *pptr;
+
+__attribute__((used)) void wait_parent(void)
+{
+ cptr[2] = 1;
+ while (!cptr[1])
+ asm volatile("" : : : "memory");
+}
+
+void tm_spd_tar(void)
+{
+ unsigned long result, texasr;
+ unsigned long regs[3];
+ int ret;
+
+ cptr = (int *)shmat(shm_id, NULL, 0);
+
+trans:
+ cptr[2] = 0;
+ asm __volatile__(
+ "li 4, %[tar_1];"
+ "mtspr %[sprn_tar], 4;" /* TAR_1 */
+ "li 4, %[dscr_1];"
+ "mtspr %[sprn_dscr], 4;" /* DSCR_1 */
+ "or 31,31,31;" /* PPR_1*/
+
+ "1: ;"
+ "tbegin.;"
+ "beq 2f;"
+
+ "li 4, %[tar_2];"
+ "mtspr %[sprn_tar], 4;" /* TAR_2 */
+ "li 4, %[dscr_2];"
+ "mtspr %[sprn_dscr], 4;" /* DSCR_2 */
+ "or 1,1,1;" /* PPR_2 */
+
+ "tsuspend.;"
+ "li 4, %[tar_3];"
+ "mtspr %[sprn_tar], 4;" /* TAR_3 */
+ "li 4, %[dscr_3];"
+ "mtspr %[sprn_dscr], 4;" /* DSCR_3 */
+ "or 6,6,6;" /* PPR_3 */
+ "bl wait_parent;"
+ "tresume.;"
+
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ /* Transaction abort handler */
+ "2: ;"
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+
+ : [res] "=r" (result), [texasr] "=r" (texasr)
+ : [sprn_dscr]"i"(SPRN_DSCR),
+ [sprn_tar]"i"(SPRN_TAR), [sprn_ppr]"i"(SPRN_PPR),
+ [sprn_texasr]"i"(SPRN_TEXASR), [tar_1]"i"(TAR_1),
+ [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2), [dscr_2]"i"(DSCR_2),
+ [tar_3]"i"(TAR_3), [dscr_3]"i"(DSCR_3)
+ : "memory", "r0", "r1", "r3", "r4", "r5", "r6"
+ );
+
+ /* TM failed, analyse */
+ if (result) {
+ if (!cptr[0])
+ goto trans;
+
+ regs[0] = mfspr(SPRN_TAR);
+ regs[1] = mfspr(SPRN_PPR);
+ regs[2] = mfspr(SPRN_DSCR);
+
+ shmdt(&cptr);
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ user_read, regs[0], regs[1], regs[2]);
+
+ ret = validate_tar_registers(regs, TAR_4, PPR_4, DSCR_4);
+ if (ret)
+ exit(1);
+ exit(0);
+ }
+ shmdt(&cptr);
+ exit(1);
+}
+
+int trace_tm_spd_tar(pid_t child)
+{
+ unsigned long regs[3];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_tar_registers(child, regs));
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ ptrace_read_running, regs[0], regs[1], regs[2]);
+
+ FAIL_IF(validate_tar_registers(regs, TAR_3, PPR_3, DSCR_3));
+ FAIL_IF(show_tm_checkpointed_state(child, regs));
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ ptrace_read_ckpt, regs[0], regs[1], regs[2]);
+
+ FAIL_IF(validate_tar_registers(regs, TAR_1, PPR_1, DSCR_1));
+ FAIL_IF(write_ckpt_tar_registers(child, TAR_4, PPR_4, DSCR_4));
+ printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
+ ptrace_write_ckpt, TAR_4, PPR_4, DSCR_4);
+
+ pptr[0] = 1;
+ pptr[1] = 1;
+ FAIL_IF(stop_trace(child));
+ return TEST_PASS;
+}
+
+int ptrace_tm_spd_tar(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
+ pid = fork();
+ if (pid == 0)
+ tm_spd_tar();
+
+ pptr = (int *)shmat(shm_id, NULL, 0);
+ pptr[0] = 0;
+ pptr[1] = 0;
+
+ if (pid) {
+ while (!pptr[2])
+ asm volatile("" : : : "memory");
+ ret = trace_tm_spd_tar(pid);
+ if (ret) {
+ kill(pid, SIGTERM);
+ shmdt(&pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ shmdt(&pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_spd_tar, "ptrace_tm_spd_tar");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
new file mode 100644
index 000000000..18a685bf6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
@@ -0,0 +1,184 @@
+/*
+ * Ptrace test for VMX/VSX registers in the TM Suspend context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+#include "ptrace-vsx.h"
+
+int shm_id;
+int *cptr, *pptr;
+
+unsigned long fp_load[VEC_MAX];
+unsigned long fp_load_new[VEC_MAX];
+unsigned long fp_store[VEC_MAX];
+unsigned long fp_load_ckpt[VEC_MAX];
+unsigned long fp_load_ckpt_new[VEC_MAX];
+
+__attribute__((used)) void load_vsx(void)
+{
+ loadvsx(fp_load, 0);
+}
+
+__attribute__((used)) void load_vsx_new(void)
+{
+ loadvsx(fp_load_new, 0);
+}
+
+__attribute__((used)) void load_vsx_ckpt(void)
+{
+ loadvsx(fp_load_ckpt, 0);
+}
+
+__attribute__((used)) void wait_parent(void)
+{
+ cptr[2] = 1;
+ while (!cptr[1])
+ asm volatile("" : : : "memory");
+}
+
+void tm_spd_vsx(void)
+{
+ unsigned long result, texasr;
+ int ret;
+
+ cptr = (int *)shmat(shm_id, NULL, 0);
+
+trans:
+ cptr[2] = 0;
+ asm __volatile__(
+ "bl load_vsx_ckpt;"
+
+ "1: ;"
+ "tbegin.;"
+ "beq 2f;"
+
+ "bl load_vsx_new;"
+ "tsuspend.;"
+ "bl load_vsx;"
+ "bl wait_parent;"
+ "tresume.;"
+
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ "2: ;"
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+ : [res] "=r" (result), [texasr] "=r" (texasr)
+ : [sprn_texasr] "i" (SPRN_TEXASR)
+ : "memory", "r0", "r1", "r3", "r4",
+ "r7", "r8", "r9", "r10", "r11"
+ );
+
+ if (result) {
+ if (!cptr[0])
+ goto trans;
+ shmdt((void *)cptr);
+
+ storevsx(fp_store, 0);
+ ret = compare_vsx_vmx(fp_store, fp_load_ckpt_new);
+ if (ret)
+ exit(1);
+ exit(0);
+ }
+ shmdt((void *)cptr);
+ exit(1);
+}
+
+int trace_tm_spd_vsx(pid_t child)
+{
+ unsigned long vsx[VSX_MAX];
+ unsigned long vmx[VMX_MAX + 2][2];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_vsx(child, vsx));
+ FAIL_IF(validate_vsx(vsx, fp_load));
+ FAIL_IF(show_vmx(child, vmx));
+ FAIL_IF(validate_vmx(vmx, fp_load));
+ FAIL_IF(show_vsx_ckpt(child, vsx));
+ FAIL_IF(validate_vsx(vsx, fp_load_ckpt));
+ FAIL_IF(show_vmx_ckpt(child, vmx));
+ FAIL_IF(validate_vmx(vmx, fp_load_ckpt));
+
+ memset(vsx, 0, sizeof(vsx));
+ memset(vmx, 0, sizeof(vmx));
+
+ load_vsx_vmx(fp_load_ckpt_new, vsx, vmx);
+
+ FAIL_IF(write_vsx_ckpt(child, vsx));
+ FAIL_IF(write_vmx_ckpt(child, vmx));
+
+ pptr[0] = 1;
+ pptr[1] = 1;
+ FAIL_IF(stop_trace(child));
+
+ return TEST_PASS;
+}
+
+int ptrace_tm_spd_vsx(void)
+{
+ pid_t pid;
+ int ret, status, i;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
+
+ for (i = 0; i < 128; i++) {
+ fp_load[i] = 1 + rand();
+ fp_load_new[i] = 1 + 2 * rand();
+ fp_load_ckpt[i] = 1 + 3 * rand();
+ fp_load_ckpt_new[i] = 1 + 4 * rand();
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+
+ if (pid == 0)
+ tm_spd_vsx();
+
+ if (pid) {
+ pptr = (int *)shmat(shm_id, NULL, 0);
+ while (!pptr[2])
+ asm volatile("" : : : "memory");
+
+ ret = trace_tm_spd_vsx(pid);
+ if (ret) {
+ kill(pid, SIGKILL);
+ shmdt((void *)pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ shmdt((void *)pptr);
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_spd_vsx, "ptrace_tm_spd_vsx");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
new file mode 100644
index 000000000..ba0499925
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
@@ -0,0 +1,167 @@
+/*
+ * Ptrace test TM SPR registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+
+/* Tracee and tracer shared data */
+struct shared {
+ int flag;
+ struct tm_spr_regs regs;
+};
+unsigned long tfhar;
+
+int shm_id;
+struct shared *cptr, *pptr;
+
+int shm_id1;
+int *cptr1, *pptr1;
+
+#define TM_KVM_SCHED 0xe0000001ac000001
+int validate_tm_spr(struct tm_spr_regs *regs)
+{
+ FAIL_IF(regs->tm_tfhar != tfhar);
+ FAIL_IF((regs->tm_texasr == TM_KVM_SCHED) && (regs->tm_tfiar != 0));
+
+ return TEST_PASS;
+}
+
+void tm_spr(void)
+{
+ unsigned long result, texasr;
+ int ret;
+
+ cptr = (struct shared *)shmat(shm_id, NULL, 0);
+ cptr1 = (int *)shmat(shm_id1, NULL, 0);
+
+trans:
+ cptr1[0] = 0;
+ asm __volatile__(
+ "1: ;"
+ /* TM failover handler should follow "tbegin.;" */
+ "mflr 31;"
+ "bl 4f;" /* $ = TFHAR - 12 */
+ "4: ;"
+ "mflr %[tfhar];"
+ "mtlr 31;"
+
+ "tbegin.;"
+ "beq 2f;"
+
+ "tsuspend.;"
+ "li 8, 1;"
+ "sth 8, 0(%[cptr1]);"
+ "tresume.;"
+ "b .;"
+
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ "2: ;"
+
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+ : [tfhar] "=r" (tfhar), [res] "=r" (result),
+ [texasr] "=r" (texasr), [cptr1] "=b" (cptr1)
+ : [sprn_texasr] "i" (SPRN_TEXASR)
+ : "memory", "r0", "r8", "r31"
+ );
+
+ /* There are 2 32bit instructions before tbegin. */
+ tfhar += 12;
+
+ if (result) {
+ if (!cptr->flag)
+ goto trans;
+
+ ret = validate_tm_spr((struct tm_spr_regs *)&cptr->regs);
+ shmdt((void *)cptr);
+ shmdt((void *)cptr1);
+ if (ret)
+ exit(1);
+ exit(0);
+ }
+ shmdt((void *)cptr);
+ shmdt((void *)cptr1);
+ exit(1);
+}
+
+int trace_tm_spr(pid_t child)
+{
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_tm_spr(child, (struct tm_spr_regs *)&pptr->regs));
+
+ printf("TFHAR: %lx TEXASR: %lx TFIAR: %lx\n", pptr->regs.tm_tfhar,
+ pptr->regs.tm_texasr, pptr->regs.tm_tfiar);
+
+ pptr->flag = 1;
+ FAIL_IF(stop_trace(child));
+
+ return TEST_PASS;
+}
+
+int ptrace_tm_spr(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(struct shared), 0777|IPC_CREAT);
+ shm_id1 = shmget(IPC_PRIVATE, sizeof(int), 0777|IPC_CREAT);
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+
+ if (pid == 0)
+ tm_spr();
+
+ if (pid) {
+ pptr = (struct shared *)shmat(shm_id, NULL, 0);
+ pptr1 = (int *)shmat(shm_id1, NULL, 0);
+
+ while (!pptr1[0])
+ asm volatile("" : : : "memory");
+ ret = trace_tm_spr(pid);
+ if (ret) {
+ kill(pid, SIGKILL);
+ shmdt((void *)pptr);
+ shmdt((void *)pptr1);
+ shmctl(shm_id, IPC_RMID, NULL);
+ shmctl(shm_id1, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ shmdt((void *)pptr);
+ shmdt((void *)pptr1);
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ shmctl(shm_id1, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_spr, "ptrace_tm_spr");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
new file mode 100644
index 000000000..f70023b25
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
@@ -0,0 +1,160 @@
+/*
+ * Ptrace test for TAR, PPR, DSCR registers in the TM context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+#include "ptrace-tar.h"
+
+int shm_id;
+unsigned long *cptr, *pptr;
+
+
+void tm_tar(void)
+{
+ unsigned long result, texasr;
+ unsigned long regs[3];
+ int ret;
+
+ cptr = (unsigned long *)shmat(shm_id, NULL, 0);
+
+trans:
+ cptr[1] = 0;
+ asm __volatile__(
+ "li 4, %[tar_1];"
+ "mtspr %[sprn_tar], 4;" /* TAR_1 */
+ "li 4, %[dscr_1];"
+ "mtspr %[sprn_dscr], 4;" /* DSCR_1 */
+ "or 31,31,31;" /* PPR_1*/
+
+ "1: ;"
+ "tbegin.;"
+ "beq 2f;"
+
+ "li 4, %[tar_2];"
+ "mtspr %[sprn_tar], 4;" /* TAR_2 */
+ "li 4, %[dscr_2];"
+ "mtspr %[sprn_dscr], 4;" /* DSCR_2 */
+ "or 1,1,1;" /* PPR_2 */
+ "tsuspend.;"
+ "li 0, 1;"
+ "stw 0, 0(%[cptr1]);"
+ "tresume.;"
+ "b .;"
+
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ /* Transaction abort handler */
+ "2: ;"
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+
+ : [res] "=r" (result), [texasr] "=r" (texasr)
+ : [sprn_dscr]"i"(SPRN_DSCR), [sprn_tar]"i"(SPRN_TAR),
+ [sprn_ppr]"i"(SPRN_PPR), [sprn_texasr]"i"(SPRN_TEXASR),
+ [tar_1]"i"(TAR_1), [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2),
+ [dscr_2]"i"(DSCR_2), [cptr1] "b" (&cptr[1])
+ : "memory", "r0", "r1", "r3", "r4", "r5", "r6"
+ );
+
+ /* TM failed, analyse */
+ if (result) {
+ if (!cptr[0])
+ goto trans;
+
+ regs[0] = mfspr(SPRN_TAR);
+ regs[1] = mfspr(SPRN_PPR);
+ regs[2] = mfspr(SPRN_DSCR);
+
+ shmdt(&cptr);
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ user_read, regs[0], regs[1], regs[2]);
+
+ ret = validate_tar_registers(regs, TAR_4, PPR_4, DSCR_4);
+ if (ret)
+ exit(1);
+ exit(0);
+ }
+ shmdt(&cptr);
+ exit(1);
+}
+
+int trace_tm_tar(pid_t child)
+{
+ unsigned long regs[3];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_tar_registers(child, regs));
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ ptrace_read_running, regs[0], regs[1], regs[2]);
+
+ FAIL_IF(validate_tar_registers(regs, TAR_2, PPR_2, DSCR_2));
+ FAIL_IF(show_tm_checkpointed_state(child, regs));
+ printf("%-30s TAR: %lu PPR: %lx DSCR: %lu\n",
+ ptrace_read_ckpt, regs[0], regs[1], regs[2]);
+
+ FAIL_IF(validate_tar_registers(regs, TAR_1, PPR_1, DSCR_1));
+ FAIL_IF(write_ckpt_tar_registers(child, TAR_4, PPR_4, DSCR_4));
+ printf("%-30s TAR: %u PPR: %lx DSCR: %u\n",
+ ptrace_write_ckpt, TAR_4, PPR_4, DSCR_4);
+
+ pptr[0] = 1;
+ FAIL_IF(stop_trace(child));
+ return TEST_PASS;
+}
+
+int ptrace_tm_tar(void)
+{
+ pid_t pid;
+ int ret, status;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+ pid = fork();
+ if (pid == 0)
+ tm_tar();
+
+ pptr = (unsigned long *)shmat(shm_id, NULL, 0);
+ pptr[0] = 0;
+
+ if (pid) {
+ while (!pptr[1])
+ asm volatile("" : : : "memory");
+ ret = trace_tm_tar(pid);
+ if (ret) {
+ kill(pid, SIGTERM);
+ shmdt(&pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+ shmdt(&pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_tar, "ptrace_tm_tar");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
new file mode 100644
index 000000000..dfba80058
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
@@ -0,0 +1,167 @@
+/*
+ * Ptrace test for VMX/VSX registers in the TM context
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "tm.h"
+#include "ptrace-vsx.h"
+
+int shm_id;
+unsigned long *cptr, *pptr;
+
+unsigned long fp_load[VEC_MAX];
+unsigned long fp_store[VEC_MAX];
+unsigned long fp_load_ckpt[VEC_MAX];
+unsigned long fp_load_ckpt_new[VEC_MAX];
+
+__attribute__((used)) void load_vsx(void)
+{
+ loadvsx(fp_load, 0);
+}
+
+__attribute__((used)) void load_vsx_ckpt(void)
+{
+ loadvsx(fp_load_ckpt, 0);
+}
+
+void tm_vsx(void)
+{
+ unsigned long result, texasr;
+ int ret;
+
+ cptr = (unsigned long *)shmat(shm_id, NULL, 0);
+
+trans:
+ cptr[1] = 0;
+ asm __volatile__(
+ "bl load_vsx_ckpt;"
+
+ "1: ;"
+ "tbegin.;"
+ "beq 2f;"
+
+ "bl load_vsx;"
+ "tsuspend.;"
+ "li 7, 1;"
+ "stw 7, 0(%[cptr1]);"
+ "tresume.;"
+ "b .;"
+
+ "tend.;"
+ "li 0, 0;"
+ "ori %[res], 0, 0;"
+ "b 3f;"
+
+ "2: ;"
+ "li 0, 1;"
+ "ori %[res], 0, 0;"
+ "mfspr %[texasr], %[sprn_texasr];"
+
+ "3: ;"
+ : [res] "=r" (result), [texasr] "=r" (texasr)
+ : [sprn_texasr] "i" (SPRN_TEXASR), [cptr1] "b" (&cptr[1])
+ : "memory", "r0", "r1", "r3", "r4",
+ "r7", "r8", "r9", "r10", "r11"
+ );
+
+ if (result) {
+ if (!cptr[0])
+ goto trans;
+
+ shmdt((void *)cptr);
+ storevsx(fp_store, 0);
+ ret = compare_vsx_vmx(fp_store, fp_load_ckpt_new);
+ if (ret)
+ exit(1);
+ exit(0);
+ }
+ shmdt((void *)cptr);
+ exit(1);
+}
+
+int trace_tm_vsx(pid_t child)
+{
+ unsigned long vsx[VSX_MAX];
+ unsigned long vmx[VMX_MAX + 2][2];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_vsx(child, vsx));
+ FAIL_IF(validate_vsx(vsx, fp_load));
+ FAIL_IF(show_vmx(child, vmx));
+ FAIL_IF(validate_vmx(vmx, fp_load));
+ FAIL_IF(show_vsx_ckpt(child, vsx));
+ FAIL_IF(validate_vsx(vsx, fp_load_ckpt));
+ FAIL_IF(show_vmx_ckpt(child, vmx));
+ FAIL_IF(validate_vmx(vmx, fp_load_ckpt));
+ memset(vsx, 0, sizeof(vsx));
+ memset(vmx, 0, sizeof(vmx));
+
+ load_vsx_vmx(fp_load_ckpt_new, vsx, vmx);
+
+ FAIL_IF(write_vsx_ckpt(child, vsx));
+ FAIL_IF(write_vmx_ckpt(child, vmx));
+ pptr[0] = 1;
+ FAIL_IF(stop_trace(child));
+ return TEST_PASS;
+}
+
+int ptrace_tm_vsx(void)
+{
+ pid_t pid;
+ int ret, status, i;
+
+ SKIP_IF(!have_htm());
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+
+ for (i = 0; i < 128; i++) {
+ fp_load[i] = 1 + rand();
+ fp_load_ckpt[i] = 1 + 2 * rand();
+ fp_load_ckpt_new[i] = 1 + 3 * rand();
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+
+ if (pid == 0)
+ tm_vsx();
+
+ if (pid) {
+ pptr = (unsigned long *)shmat(shm_id, NULL, 0);
+ while (!pptr[1])
+ asm volatile("" : : : "memory");
+
+ ret = trace_tm_vsx(pid);
+ if (ret) {
+ kill(pid, SIGKILL);
+ shmdt((void *)pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ shmdt((void *)pptr);
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_tm_vsx, "ptrace_tm_vsx");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
new file mode 100644
index 000000000..04084ee7d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
@@ -0,0 +1,117 @@
+/*
+ * Ptrace test for VMX/VSX registers
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include "ptrace.h"
+#include "ptrace-vsx.h"
+
+/* Tracer and Tracee Shared Data */
+int shm_id;
+int *cptr, *pptr;
+
+unsigned long fp_load[VEC_MAX];
+unsigned long fp_load_new[VEC_MAX];
+unsigned long fp_store[VEC_MAX];
+
+void vsx(void)
+{
+ int ret;
+
+ cptr = (int *)shmat(shm_id, NULL, 0);
+ loadvsx(fp_load, 0);
+ cptr[1] = 1;
+
+ while (!cptr[0])
+ asm volatile("" : : : "memory");
+ shmdt((void *) cptr);
+
+ storevsx(fp_store, 0);
+ ret = compare_vsx_vmx(fp_store, fp_load_new);
+ if (ret)
+ exit(1);
+ exit(0);
+}
+
+int trace_vsx(pid_t child)
+{
+ unsigned long vsx[VSX_MAX];
+ unsigned long vmx[VMX_MAX + 2][2];
+
+ FAIL_IF(start_trace(child));
+ FAIL_IF(show_vsx(child, vsx));
+ FAIL_IF(validate_vsx(vsx, fp_load));
+ FAIL_IF(show_vmx(child, vmx));
+ FAIL_IF(validate_vmx(vmx, fp_load));
+
+ memset(vsx, 0, sizeof(vsx));
+ memset(vmx, 0, sizeof(vmx));
+ load_vsx_vmx(fp_load_new, vsx, vmx);
+
+ FAIL_IF(write_vsx(child, vsx));
+ FAIL_IF(write_vmx(child, vmx));
+ FAIL_IF(stop_trace(child));
+
+ return TEST_PASS;
+}
+
+int ptrace_vsx(void)
+{
+ pid_t pid;
+ int ret, status, i;
+
+ shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
+
+ for (i = 0; i < VEC_MAX; i++)
+ fp_load[i] = i + rand();
+
+ for (i = 0; i < VEC_MAX; i++)
+ fp_load_new[i] = i + 2 * rand();
+
+ pid = fork();
+ if (pid < 0) {
+ perror("fork() failed");
+ return TEST_FAIL;
+ }
+
+ if (pid == 0)
+ vsx();
+
+ if (pid) {
+ pptr = (int *)shmat(shm_id, NULL, 0);
+ while (!pptr[1])
+ asm volatile("" : : : "memory");
+
+ ret = trace_vsx(pid);
+ if (ret) {
+ kill(pid, SIGTERM);
+ shmdt((void *)pptr);
+ shmctl(shm_id, IPC_RMID, NULL);
+ return TEST_FAIL;
+ }
+
+ pptr[0] = 1;
+ shmdt((void *)pptr);
+
+ ret = wait(&status);
+ shmctl(shm_id, IPC_RMID, NULL);
+ if (ret != pid) {
+ printf("Child's exit status not captured\n");
+ return TEST_FAIL;
+ }
+
+ return (WIFEXITED(status) && WEXITSTATUS(status)) ? TEST_FAIL :
+ TEST_PASS;
+ }
+ return TEST_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(ptrace_vsx, "ptrace_vsx");
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.h b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.h
new file mode 100644
index 000000000..f4e4b427c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#define VEC_MAX 128
+#define VSX_MAX 32
+#define VMX_MAX 32
+
+/*
+ * unsigned long vsx[32]
+ * unsigned long load[128]
+ */
+int validate_vsx(unsigned long *vsx, unsigned long *load)
+{
+ int i;
+
+ for (i = 0; i < VSX_MAX; i++) {
+ if (vsx[i] != load[2 * i + 1]) {
+ printf("vsx[%d]: %lx load[%d] %lx\n",
+ i, vsx[i], 2 * i + 1, load[2 * i + 1]);
+ return TEST_FAIL;
+ }
+ }
+ return TEST_PASS;
+}
+
+/*
+ * unsigned long vmx[32][2]
+ * unsigned long load[128]
+ */
+int validate_vmx(unsigned long vmx[][2], unsigned long *load)
+{
+ int i;
+
+ for (i = 0; i < VMX_MAX; i++) {
+ #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ if ((vmx[i][0] != load[64 + 2 * i]) ||
+ (vmx[i][1] != load[65 + 2 * i])) {
+ printf("vmx[%d][0]: %lx load[%d] %lx\n",
+ i, vmx[i][0], 64 + 2 * i,
+ load[64 + 2 * i]);
+ printf("vmx[%d][1]: %lx load[%d] %lx\n",
+ i, vmx[i][1], 65 + 2 * i,
+ load[65 + 2 * i]);
+ return TEST_FAIL;
+ }
+ #else /*
+ * In LE each value pair is stored in an
+ * alternate manner.
+ */
+ if ((vmx[i][0] != load[65 + 2 * i]) ||
+ (vmx[i][1] != load[64 + 2 * i])) {
+ printf("vmx[%d][0]: %lx load[%d] %lx\n",
+ i, vmx[i][0], 65 + 2 * i,
+ load[65 + 2 * i]);
+ printf("vmx[%d][1]: %lx load[%d] %lx\n",
+ i, vmx[i][1], 64 + 2 * i,
+ load[64 + 2 * i]);
+ return TEST_FAIL;
+ }
+ #endif
+ }
+ return TEST_PASS;
+}
+
+/*
+ * unsigned long store[128]
+ * unsigned long load[128]
+ */
+int compare_vsx_vmx(unsigned long *store, unsigned long *load)
+{
+ int i;
+
+ for (i = 0; i < VSX_MAX; i++) {
+ if (store[1 + 2 * i] != load[1 + 2 * i]) {
+ printf("store[%d]: %lx load[%d] %lx\n",
+ 1 + 2 * i, store[i],
+ 1 + 2 * i, load[i]);
+ return TEST_FAIL;
+ }
+ }
+
+ #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ for (i = 64; i < VEC_MAX; i++) {
+ if (store[i] != load[i]) {
+ printf("store[%d]: %lx load[%d] %lx\n",
+ i, store[i], i, load[i]);
+ return TEST_FAIL;
+ }
+ }
+ #else /* In LE each value pair is stored in an alternate manner */
+ for (i = 64; i < VEC_MAX; i++) {
+ if (!(i % 2) && (store[i] != load[i+1])) {
+ printf("store[%d]: %lx load[%d] %lx\n",
+ i, store[i], i+1, load[i+1]);
+ return TEST_FAIL;
+ }
+ if ((i % 2) && (store[i] != load[i-1])) {
+ printf("here store[%d]: %lx load[%d] %lx\n",
+ i, store[i], i-1, load[i-1]);
+ return TEST_FAIL;
+ }
+ }
+ #endif
+ return TEST_PASS;
+}
+
+void load_vsx_vmx(unsigned long *load, unsigned long *vsx,
+ unsigned long vmx[][2])
+{
+ int i;
+
+ for (i = 0; i < VSX_MAX; i++)
+ vsx[i] = load[1 + 2 * i];
+
+ for (i = 0; i < VMX_MAX; i++) {
+ vmx[i][0] = load[64 + 2 * i];
+ vmx[i][1] = load[65 + 2 * i];
+ }
+}
+
+void loadvsx(void *p, int tmp);
+void storevsx(void *p, int tmp);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h
new file mode 100644
index 000000000..34201cfa8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h
@@ -0,0 +1,749 @@
+/*
+ * Ptrace interface test helper functions
+ *
+ * Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/ptrace.h>
+#include <sys/ioctl.h>
+#include <sys/uio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/signal.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/user.h>
+#include <linux/elf.h>
+#include <linux/types.h>
+#include <linux/auxvec.h>
+#include "reg.h"
+#include "utils.h"
+
+#define TEST_PASS 0
+#define TEST_FAIL 1
+
+struct fpr_regs {
+ unsigned long fpr[32];
+ unsigned long fpscr;
+};
+
+struct tm_spr_regs {
+ unsigned long tm_tfhar;
+ unsigned long tm_texasr;
+ unsigned long tm_tfiar;
+};
+
+#ifndef NT_PPC_TAR
+#define NT_PPC_TAR 0x103
+#define NT_PPC_PPR 0x104
+#define NT_PPC_DSCR 0x105
+#define NT_PPC_EBB 0x106
+#define NT_PPC_PMU 0x107
+#define NT_PPC_TM_CGPR 0x108
+#define NT_PPC_TM_CFPR 0x109
+#define NT_PPC_TM_CVMX 0x10a
+#define NT_PPC_TM_CVSX 0x10b
+#define NT_PPC_TM_SPR 0x10c
+#define NT_PPC_TM_CTAR 0x10d
+#define NT_PPC_TM_CPPR 0x10e
+#define NT_PPC_TM_CDSCR 0x10f
+#endif
+
+/* Basic ptrace operations */
+int start_trace(pid_t child)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_ATTACH, child, NULL, NULL);
+ if (ret) {
+ perror("ptrace(PTRACE_ATTACH) failed");
+ return TEST_FAIL;
+ }
+ ret = waitpid(child, NULL, 0);
+ if (ret != child) {
+ perror("waitpid() failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int stop_trace(pid_t child)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_DETACH, child, NULL, NULL);
+ if (ret) {
+ perror("ptrace(PTRACE_DETACH) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int cont_trace(pid_t child)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_CONT, child, NULL, NULL);
+ if (ret) {
+ perror("ptrace(PTRACE_CONT) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int ptrace_read_regs(pid_t child, unsigned long type, unsigned long regs[],
+ int n)
+{
+ struct iovec iov;
+ long ret;
+
+ FAIL_IF(start_trace(child));
+
+ iov.iov_base = regs;
+ iov.iov_len = n * sizeof(unsigned long);
+
+ ret = ptrace(PTRACE_GETREGSET, child, type, &iov);
+ if (ret)
+ return ret;
+
+ FAIL_IF(stop_trace(child));
+
+ return TEST_PASS;
+}
+
+long ptrace_write_regs(pid_t child, unsigned long type, unsigned long regs[],
+ int n)
+{
+ struct iovec iov;
+ long ret;
+
+ FAIL_IF(start_trace(child));
+
+ iov.iov_base = regs;
+ iov.iov_len = n * sizeof(unsigned long);
+
+ ret = ptrace(PTRACE_SETREGSET, child, type, &iov);
+
+ FAIL_IF(stop_trace(child));
+
+ return ret;
+}
+
+/* TAR, PPR, DSCR */
+int show_tar_registers(pid_t child, unsigned long *out)
+{
+ struct iovec iov;
+ unsigned long *reg;
+ int ret;
+
+ reg = malloc(sizeof(unsigned long));
+ if (!reg) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+ iov.iov_base = (u64 *) reg;
+ iov.iov_len = sizeof(unsigned long);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TAR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+ if (out)
+ out[0] = *reg;
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_PPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+ if (out)
+ out[1] = *reg;
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_DSCR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+ if (out)
+ out[2] = *reg;
+
+ free(reg);
+ return TEST_PASS;
+fail:
+ free(reg);
+ return TEST_FAIL;
+}
+
+int write_tar_registers(pid_t child, unsigned long tar,
+ unsigned long ppr, unsigned long dscr)
+{
+ struct iovec iov;
+ unsigned long *reg;
+ int ret;
+
+ reg = malloc(sizeof(unsigned long));
+ if (!reg) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ iov.iov_base = (u64 *) reg;
+ iov.iov_len = sizeof(unsigned long);
+
+ *reg = tar;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TAR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_SETREGSET) failed");
+ goto fail;
+ }
+
+ *reg = ppr;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_PPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_SETREGSET) failed");
+ goto fail;
+ }
+
+ *reg = dscr;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_DSCR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_SETREGSET) failed");
+ goto fail;
+ }
+
+ free(reg);
+ return TEST_PASS;
+fail:
+ free(reg);
+ return TEST_FAIL;
+}
+
+int show_tm_checkpointed_state(pid_t child, unsigned long *out)
+{
+ struct iovec iov;
+ unsigned long *reg;
+ int ret;
+
+ reg = malloc(sizeof(unsigned long));
+ if (!reg) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ iov.iov_base = (u64 *) reg;
+ iov.iov_len = sizeof(unsigned long);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CTAR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+ if (out)
+ out[0] = *reg;
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CPPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+ if (out)
+ out[1] = *reg;
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CDSCR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+ if (out)
+ out[2] = *reg;
+
+ free(reg);
+ return TEST_PASS;
+
+fail:
+ free(reg);
+ return TEST_FAIL;
+}
+
+int write_ckpt_tar_registers(pid_t child, unsigned long tar,
+ unsigned long ppr, unsigned long dscr)
+{
+ struct iovec iov;
+ unsigned long *reg;
+ int ret;
+
+ reg = malloc(sizeof(unsigned long));
+ if (!reg) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ iov.iov_base = (u64 *) reg;
+ iov.iov_len = sizeof(unsigned long);
+
+ *reg = tar;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CTAR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+
+ *reg = ppr;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CPPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+
+ *reg = dscr;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CDSCR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ goto fail;
+ }
+
+ free(reg);
+ return TEST_PASS;
+fail:
+ free(reg);
+ return TEST_FAIL;
+}
+
+/* FPR */
+int show_fpr(pid_t child, unsigned long *fpr)
+{
+ struct fpr_regs *regs;
+ int ret, i;
+
+ regs = (struct fpr_regs *) malloc(sizeof(struct fpr_regs));
+ ret = ptrace(PTRACE_GETFPREGS, child, NULL, regs);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ if (fpr) {
+ for (i = 0; i < 32; i++)
+ fpr[i] = regs->fpr[i];
+ }
+ return TEST_PASS;
+}
+
+int write_fpr(pid_t child, unsigned long val)
+{
+ struct fpr_regs *regs;
+ int ret, i;
+
+ regs = (struct fpr_regs *) malloc(sizeof(struct fpr_regs));
+ ret = ptrace(PTRACE_GETFPREGS, child, NULL, regs);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ for (i = 0; i < 32; i++)
+ regs->fpr[i] = val;
+
+ ret = ptrace(PTRACE_SETFPREGS, child, NULL, regs);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int show_ckpt_fpr(pid_t child, unsigned long *fpr)
+{
+ struct fpr_regs *regs;
+ struct iovec iov;
+ int ret, i;
+
+ regs = (struct fpr_regs *) malloc(sizeof(struct fpr_regs));
+ iov.iov_base = regs;
+ iov.iov_len = sizeof(struct fpr_regs);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CFPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ if (fpr) {
+ for (i = 0; i < 32; i++)
+ fpr[i] = regs->fpr[i];
+ }
+
+ return TEST_PASS;
+}
+
+int write_ckpt_fpr(pid_t child, unsigned long val)
+{
+ struct fpr_regs *regs;
+ struct iovec iov;
+ int ret, i;
+
+ regs = (struct fpr_regs *) malloc(sizeof(struct fpr_regs));
+ iov.iov_base = regs;
+ iov.iov_len = sizeof(struct fpr_regs);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CFPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ for (i = 0; i < 32; i++)
+ regs->fpr[i] = val;
+
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CFPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+/* GPR */
+int show_gpr(pid_t child, unsigned long *gpr)
+{
+ struct pt_regs *regs;
+ int ret, i;
+
+ regs = (struct pt_regs *) malloc(sizeof(struct pt_regs));
+ if (!regs) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ ret = ptrace(PTRACE_GETREGS, child, NULL, regs);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ if (gpr) {
+ for (i = 14; i < 32; i++)
+ gpr[i-14] = regs->gpr[i];
+ }
+
+ return TEST_PASS;
+}
+
+int write_gpr(pid_t child, unsigned long val)
+{
+ struct pt_regs *regs;
+ int i, ret;
+
+ regs = (struct pt_regs *) malloc(sizeof(struct pt_regs));
+ if (!regs) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ ret = ptrace(PTRACE_GETREGS, child, NULL, regs);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ for (i = 14; i < 32; i++)
+ regs->gpr[i] = val;
+
+ ret = ptrace(PTRACE_SETREGS, child, NULL, regs);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int show_ckpt_gpr(pid_t child, unsigned long *gpr)
+{
+ struct pt_regs *regs;
+ struct iovec iov;
+ int ret, i;
+
+ regs = (struct pt_regs *) malloc(sizeof(struct pt_regs));
+ if (!regs) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(struct pt_regs);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CGPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ if (gpr) {
+ for (i = 14; i < 32; i++)
+ gpr[i-14] = regs->gpr[i];
+ }
+
+ return TEST_PASS;
+}
+
+int write_ckpt_gpr(pid_t child, unsigned long val)
+{
+ struct pt_regs *regs;
+ struct iovec iov;
+ int ret, i;
+
+ regs = (struct pt_regs *) malloc(sizeof(struct pt_regs));
+ if (!regs) {
+ perror("malloc() failed\n");
+ return TEST_FAIL;
+ }
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(struct pt_regs);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CGPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ for (i = 14; i < 32; i++)
+ regs->gpr[i] = val;
+
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CGPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+/* VMX */
+int show_vmx(pid_t child, unsigned long vmx[][2])
+{
+ int ret;
+
+ ret = ptrace(PTRACE_GETVRREGS, child, 0, vmx);
+ if (ret) {
+ perror("ptrace(PTRACE_GETVRREGS) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int show_vmx_ckpt(pid_t child, unsigned long vmx[][2])
+{
+ unsigned long regs[34][2];
+ struct iovec iov;
+ int ret;
+
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CVMX, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET, NT_PPC_TM_CVMX) failed");
+ return TEST_FAIL;
+ }
+ memcpy(vmx, regs, sizeof(regs));
+ return TEST_PASS;
+}
+
+
+int write_vmx(pid_t child, unsigned long vmx[][2])
+{
+ int ret;
+
+ ret = ptrace(PTRACE_SETVRREGS, child, 0, vmx);
+ if (ret) {
+ perror("ptrace(PTRACE_SETVRREGS) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int write_vmx_ckpt(pid_t child, unsigned long vmx[][2])
+{
+ unsigned long regs[34][2];
+ struct iovec iov;
+ int ret;
+
+ memcpy(regs, vmx, sizeof(regs));
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CVMX, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_SETREGSET, NT_PPC_TM_CVMX) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+/* VSX */
+int show_vsx(pid_t child, unsigned long *vsx)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_GETVSRREGS, child, 0, vsx);
+ if (ret) {
+ perror("ptrace(PTRACE_GETVSRREGS) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int show_vsx_ckpt(pid_t child, unsigned long *vsx)
+{
+ unsigned long regs[32];
+ struct iovec iov;
+ int ret;
+
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_CVSX, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET, NT_PPC_TM_CVSX) failed");
+ return TEST_FAIL;
+ }
+ memcpy(vsx, regs, sizeof(regs));
+ return TEST_PASS;
+}
+
+int write_vsx(pid_t child, unsigned long *vsx)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_SETVSRREGS, child, 0, vsx);
+ if (ret) {
+ perror("ptrace(PTRACE_SETVSRREGS) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+int write_vsx_ckpt(pid_t child, unsigned long *vsx)
+{
+ unsigned long regs[32];
+ struct iovec iov;
+ int ret;
+
+ memcpy(regs, vsx, sizeof(regs));
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PPC_TM_CVSX, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_SETREGSET, NT_PPC_TM_CVSX) failed");
+ return TEST_FAIL;
+ }
+ return TEST_PASS;
+}
+
+/* TM SPR */
+int show_tm_spr(pid_t child, struct tm_spr_regs *out)
+{
+ struct tm_spr_regs *regs;
+ struct iovec iov;
+ int ret;
+
+ regs = (struct tm_spr_regs *) malloc(sizeof(struct tm_spr_regs));
+ if (!regs) {
+ perror("malloc() failed");
+ return TEST_FAIL;
+ }
+
+ iov.iov_base = (u64 *) regs;
+ iov.iov_len = sizeof(struct tm_spr_regs);
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PPC_TM_SPR, &iov);
+ if (ret) {
+ perror("ptrace(PTRACE_GETREGSET) failed");
+ return TEST_FAIL;
+ }
+
+ if (out)
+ memcpy(out, regs, sizeof(struct tm_spr_regs));
+
+ return TEST_PASS;
+}
+
+
+
+/* Analyse TEXASR after TM failure */
+inline unsigned long get_tfiar(void)
+{
+ unsigned long ret;
+
+ asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_TFIAR));
+ return ret;
+}
+
+void analyse_texasr(unsigned long texasr)
+{
+ printf("TEXASR: %16lx\t", texasr);
+
+ if (texasr & TEXASR_FP)
+ printf("TEXASR_FP ");
+
+ if (texasr & TEXASR_DA)
+ printf("TEXASR_DA ");
+
+ if (texasr & TEXASR_NO)
+ printf("TEXASR_NO ");
+
+ if (texasr & TEXASR_FO)
+ printf("TEXASR_FO ");
+
+ if (texasr & TEXASR_SIC)
+ printf("TEXASR_SIC ");
+
+ if (texasr & TEXASR_NTC)
+ printf("TEXASR_NTC ");
+
+ if (texasr & TEXASR_TC)
+ printf("TEXASR_TC ");
+
+ if (texasr & TEXASR_TIC)
+ printf("TEXASR_TIC ");
+
+ if (texasr & TEXASR_IC)
+ printf("TEXASR_IC ");
+
+ if (texasr & TEXASR_IFC)
+ printf("TEXASR_IFC ");
+
+ if (texasr & TEXASR_ABT)
+ printf("TEXASR_ABT ");
+
+ if (texasr & TEXASR_SPD)
+ printf("TEXASR_SPD ");
+
+ if (texasr & TEXASR_HV)
+ printf("TEXASR_HV ");
+
+ if (texasr & TEXASR_PR)
+ printf("TEXASR_PR ");
+
+ if (texasr & TEXASR_FS)
+ printf("TEXASR_FS ");
+
+ if (texasr & TEXASR_TE)
+ printf("TEXASR_TE ");
+
+ if (texasr & TEXASR_ROT)
+ printf("TEXASR_ROT ");
+
+ printf("TFIAR :%lx\n", get_tfiar());
+}
+
+void store_gpr(unsigned long *addr);
+void store_fpr(float *addr);
diff --git a/tools/testing/selftests/powerpc/scripts/hmi.sh b/tools/testing/selftests/powerpc/scripts/hmi.sh
new file mode 100755
index 000000000..83fb253ae
--- /dev/null
+++ b/tools/testing/selftests/powerpc/scripts/hmi.sh
@@ -0,0 +1,89 @@
+#!/bin/sh
+#
+# Copyright 2015, Daniel Axtens, IBM Corporation
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+
+# do we have ./getscom, ./putscom?
+if [ -x ./getscom ] && [ -x ./putscom ]; then
+ GETSCOM=./getscom
+ PUTSCOM=./putscom
+elif which getscom > /dev/null; then
+ GETSCOM=$(which getscom)
+ PUTSCOM=$(which putscom)
+else
+ cat <<EOF
+Can't find getscom/putscom in . or \$PATH.
+See https://github.com/open-power/skiboot.
+The tool is in external/xscom-utils
+EOF
+ exit 1
+fi
+
+# We will get 8 HMI events per injection
+# todo: deal with things being offline
+expected_hmis=8
+COUNT_HMIS() {
+ dmesg | grep -c 'Harmless Hypervisor Maintenance interrupt'
+}
+
+# massively expand snooze delay, allowing injection on all cores
+ppc64_cpu --smt-snooze-delay=1000000000
+
+# when we exit, restore it
+trap "ppc64_cpu --smt-snooze-delay=100" 0 1
+
+# for each chip+core combination
+# todo - less fragile parsing
+egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog |
+while read chipcore; do
+ chip=$(echo "$chipcore"|awk '{print $3}')
+ core=$(echo "$chipcore"|awk '{print $5}')
+ fir="0x1${core}013100"
+
+ # verify that Core FIR is zero as expected
+ if [ "$($GETSCOM -c 0x${chip} $fir)" != 0 ]; then
+ echo "FIR was not zero before injection for chip $chip, core $core. Aborting!"
+ echo "Result of $GETSCOM -c 0x${chip} $fir:"
+ $GETSCOM -c 0x${chip} $fir
+ echo "If you get a -5 error, the core may be in idle state. Try stress-ng."
+ echo "Otherwise, try $PUTSCOM -c 0x${chip} $fir 0"
+ exit 1
+ fi
+
+ # keep track of the number of HMIs handled
+ old_hmis=$(COUNT_HMIS)
+
+ # do injection, adding a marker to dmesg for clarity
+ echo "Injecting HMI on core $core, chip $chip" | tee /dev/kmsg
+ # inject a RegFile recoverable error
+ if ! $PUTSCOM -c 0x${chip} $fir 2000000000000000 > /dev/null; then
+ echo "Error injecting. Aborting!"
+ exit 1
+ fi
+
+ # now we want to wait for all the HMIs to be processed
+ # we expect one per thread on the core
+ i=0;
+ new_hmis=$(COUNT_HMIS)
+ while [ $new_hmis -lt $((old_hmis + expected_hmis)) ] && [ $i -lt 12 ]; do
+ echo "Seen $((new_hmis - old_hmis)) HMI(s) out of $expected_hmis expected, sleeping"
+ sleep 5;
+ i=$((i + 1))
+ new_hmis=$(COUNT_HMIS)
+ done
+ if [ $i = 12 ]; then
+ echo "Haven't seen expected $expected_hmis recoveries after 1 min. Aborting."
+ exit 1
+ fi
+ echo "Processed $expected_hmis events; presumed success. Check dmesg."
+ echo ""
+done
diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
new file mode 100644
index 000000000..1b89224a8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/.gitignore
@@ -0,0 +1,2 @@
+signal
+signal_tm
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
new file mode 100644
index 000000000..209a958dc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := signal signal_tm
+
+CFLAGS += -maltivec
+$(OUTPUT)/signal_tm: CFLAGS += -mhtm
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S
diff --git a/tools/testing/selftests/powerpc/signal/signal.S b/tools/testing/selftests/powerpc/signal/signal.S
new file mode 100644
index 000000000..322f2f1fc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/signal.S
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+
+/* long signal_self(pid_t pid, int sig); */
+FUNC_START(signal_self)
+ li r0,37 /* sys_kill */
+ /* r3 already has our pid in it */
+ /* r4 already has signal type in it */
+ sc
+ bc 4,3,1f
+ subfze r3,r3
+1: blr
+FUNC_END(signal_self)
+
+/* long tm_signal_self(pid_t pid, int sig, int *ret); */
+FUNC_START(tm_signal_self)
+ PUSH_BASIC_STACK(8)
+ std r5,STACK_FRAME_PARAM(0)(sp) /* ret */
+ tbegin.
+ beq 1f
+ tsuspend.
+ li r0,37 /* sys_kill */
+ /* r3 already has our pid in it */
+ /* r4 already has signal type in it */
+ sc
+ ld r5,STACK_FRAME_PARAM(0)(sp) /* ret */
+ bc 4,3,2f
+ subfze r3,r3
+2: std r3,0(r5)
+ tabort. 0
+ tresume. /* Be nice to some cleanup, jumps back to tbegin then to 1: */
+ /*
+ * Transaction should be proper doomed and we should never get
+ * here
+ */
+ li r3,1
+ POP_BASIC_STACK(8)
+ blr
+1: li r3,0
+ POP_BASIC_STACK(8)
+ blr
+FUNC_END(tm_signal_self)
diff --git a/tools/testing/selftests/powerpc/signal/signal.c b/tools/testing/selftests/powerpc/signal/signal.c
new file mode 100644
index 000000000..e7dedd28b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/signal.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Sending one self a signal should always get delivered.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+
+#define MAX_ATTEMPT 500000
+#define TIMEOUT 5
+
+extern long signal_self(pid_t pid, int sig);
+
+static sig_atomic_t signaled;
+static sig_atomic_t fail;
+
+static void signal_handler(int sig)
+{
+ if (sig == SIGUSR1)
+ signaled = 1;
+ else
+ fail = 1;
+}
+
+static int test_signal()
+{
+ int i;
+ struct sigaction act;
+ pid_t ppid = getpid();
+ pid_t pid;
+
+ act.sa_handler = signal_handler;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction SIGUSR1");
+ exit(1);
+ }
+ if (sigaction(SIGALRM, &act, NULL) < 0) {
+ perror("sigaction SIGALRM");
+ exit(1);
+ }
+
+ /* Don't do this for MAX_ATTEMPT, its simply too long */
+ for(i = 0; i < 1000; i++) {
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(1);
+ }
+ if (pid == 0) {
+ signal_self(ppid, SIGUSR1);
+ exit(1);
+ } else {
+ alarm(0); /* Disable any pending */
+ alarm(2);
+ while (!signaled && !fail)
+ asm volatile("": : :"memory");
+ if (!signaled) {
+ fprintf(stderr, "Didn't get signal from child\n");
+ FAIL_IF(1); /* For the line number */
+ }
+ /* Otherwise we'll loop too fast and fork() will eventually fail */
+ waitpid(pid, NULL, 0);
+ }
+ }
+
+ for (i = 0; i < MAX_ATTEMPT; i++) {
+ long rc;
+
+ alarm(0); /* Disable any pending */
+ signaled = 0;
+ alarm(TIMEOUT);
+ rc = signal_self(ppid, SIGUSR1);
+ if (rc) {
+ fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx",
+ i, fail, rc);
+ FAIL_IF(1); /* For the line number */
+ }
+ while (!signaled && !fail)
+ asm volatile("": : :"memory");
+ if (!signaled) {
+ fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx",
+ i, fail, rc);
+ FAIL_IF(1); /* For the line number */
+ }
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ test_harness_set_timeout(300);
+ return test_harness(test_signal, "signal");
+}
diff --git a/tools/testing/selftests/powerpc/signal/signal_tm.c b/tools/testing/selftests/powerpc/signal/signal_tm.c
new file mode 100644
index 000000000..2e7451a37
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/signal_tm.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Sending one self a signal should always get delivered.
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "../tm/tm.h"
+
+#define MAX_ATTEMPT 500000
+#define TIMEOUT 10
+
+extern long tm_signal_self(pid_t pid, int sig, long *ret);
+
+static sig_atomic_t signaled;
+static sig_atomic_t fail;
+
+static void signal_handler(int sig)
+{
+ if (tcheck_active()) {
+ fail = 2;
+ return;
+ }
+
+ if (sig == SIGUSR1)
+ signaled = 1;
+ else
+ fail = 1;
+}
+
+static int test_signal_tm()
+{
+ int i;
+ struct sigaction act;
+
+ act.sa_handler = signal_handler;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction SIGUSR1");
+ exit(1);
+ }
+ if (sigaction(SIGALRM, &act, NULL) < 0) {
+ perror("sigaction SIGALRM");
+ exit(1);
+ }
+
+ SKIP_IF(!have_htm());
+
+ for (i = 0; i < MAX_ATTEMPT; i++) {
+ /*
+ * If anything bad happens in ASM and we fail to set ret
+ * because *handwave* TM this will cause failure
+ */
+ long ret = 0xdead;
+ long rc = 0xbeef;
+
+ alarm(0); /* Disable any pending */
+ signaled = 0;
+ alarm(TIMEOUT);
+ FAIL_IF(tcheck_transactional());
+ rc = tm_signal_self(getpid(), SIGUSR1, &ret);
+ if (ret == 0xdead)
+ /*
+ * This basically means the transaction aborted before we
+ * even got to the suspend... this is crazy but it
+ * happens.
+ * Yes this also means we might never make forward
+ * progress... the alarm() will trip eventually...
+ */
+ continue;
+
+ if (rc || ret) {
+ /* Ret is actually an errno */
+ printf("TEXASR 0x%016lx, TFIAR 0x%016lx\n",
+ __builtin_get_texasr(), __builtin_get_tfiar());
+ fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx ret=0x%lx\n",
+ i, fail, rc, ret);
+ FAIL_IF(ret);
+ }
+ while(!signaled && !fail)
+ asm volatile("": : :"memory");
+ if (!signaled) {
+ fprintf(stderr, "(%d) Fail reason: %d rc=0x%lx ret=0x%lx\n",
+ i, fail, rc, ret);
+ FAIL_IF(fail); /* For the line number */
+ }
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_signal_tm, "signal_tm");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/.gitignore b/tools/testing/selftests/powerpc/stringloops/.gitignore
new file mode 100644
index 000000000..0b43da74e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/.gitignore
@@ -0,0 +1 @@
+memcmp
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
new file mode 100644
index 000000000..7fc0623d8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0
+# The loops are all 64-bit code
+CFLAGS += -I$(CURDIR)
+
+EXTRA_SOURCES := ../harness.c
+
+build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null 2>&1) then echo "1"; fi)
+
+TEST_GEN_PROGS := memcmp_64 strlen
+
+$(OUTPUT)/memcmp_64: memcmp.c
+$(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec
+
+ifeq ($(build_32bit),1)
+$(OUTPUT)/memcmp_32: memcmp.c
+$(OUTPUT)/memcmp_32: CFLAGS += -m32
+
+TEST_GEN_PROGS += memcmp_32
+endif
+
+$(OUTPUT)/strlen: strlen.c string.c
+
+ifeq ($(build_32bit),1)
+$(OUTPUT)/strlen_32: strlen.c
+$(OUTPUT)/strlen_32: CFLAGS += -m32
+
+TEST_GEN_PROGS += strlen_32
+endif
+
+ASFLAGS = $(CFLAGS)
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/cache.h b/tools/testing/selftests/powerpc/stringloops/asm/cache.h
new file mode 100644
index 000000000..8a2840831
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/cache.h
@@ -0,0 +1 @@
+#define IFETCH_ALIGN_BYTES 4
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/export.h b/tools/testing/selftests/powerpc/stringloops/asm/export.h
new file mode 100644
index 000000000..2d14a9b42
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/export.h
@@ -0,0 +1 @@
+#define EXPORT_SYMBOL(x)
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h
new file mode 100644
index 000000000..9de413c0c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc-opcode.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright 2009 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * provides masks and opcode images for use by code generation, emulation
+ * and for instructions that older assemblers might not know about
+ */
+#ifndef _ASM_POWERPC_PPC_OPCODE_H
+#define _ASM_POWERPC_PPC_OPCODE_H
+
+
+# define stringify_in_c(...) __VA_ARGS__
+# define ASM_CONST(x) x
+
+
+#define PPC_INST_VCMPEQUD_RC 0x100000c7
+#define PPC_INST_VCMPEQUB_RC 0x10000006
+
+#define __PPC_RC21 (0x1 << 10)
+
+/* macros to insert fields into opcodes */
+#define ___PPC_RA(a) (((a) & 0x1f) << 16)
+#define ___PPC_RB(b) (((b) & 0x1f) << 11)
+#define ___PPC_RS(s) (((s) & 0x1f) << 21)
+#define ___PPC_RT(t) ___PPC_RS(t)
+
+#define VCMPEQUD_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUD_RC | \
+ ___PPC_RT(vrt) | ___PPC_RA(vra) | \
+ ___PPC_RB(vrb) | __PPC_RC21)
+
+#define VCMPEQUB_RC(vrt, vra, vrb) stringify_in_c(.long PPC_INST_VCMPEQUB_RC | \
+ ___PPC_RT(vrt) | ___PPC_RA(vra) | \
+ ___PPC_RB(vrb) | __PPC_RC21)
+
+#endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
new file mode 100644
index 000000000..d2c0a911f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PPC_ASM_H
+#define __PPC_ASM_H
+#include <ppc-asm.h>
+
+#ifndef r1
+#define r1 sp
+#endif
+
+#define _GLOBAL(A) FUNC_START(test_ ## A)
+#define _GLOBAL_TOC(A) FUNC_START(test_ ## A)
+
+#define CONFIG_ALTIVEC
+
+#define R14 r14
+#define R15 r15
+#define R16 r16
+#define R17 r17
+#define R18 r18
+#define R19 r19
+#define R20 r20
+#define R21 r21
+#define R22 r22
+#define R29 r29
+#define R30 r30
+#define R31 r31
+
+#define STACKFRAMESIZE 256
+#define STK_REG(i) (112 + ((i)-14)*8)
+
+#define BEGIN_FTR_SECTION
+#define END_FTR_SECTION_IFSET(val)
+#endif
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
new file mode 100644
index 000000000..b1fa75469
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "utils.h"
+
+#define SIZE 256
+#define ITERATIONS 10000
+
+#define LARGE_SIZE (5 * 1024)
+#define LARGE_ITERATIONS 1000
+#define LARGE_MAX_OFFSET 32
+#define LARGE_SIZE_START 4096
+
+#define MAX_OFFSET_DIFF_S1_S2 48
+
+int vmx_count;
+int enter_vmx_ops(void)
+{
+ vmx_count++;
+ return 1;
+}
+
+void exit_vmx_ops(void)
+{
+ vmx_count--;
+}
+int test_memcmp(const void *s1, const void *s2, size_t n);
+
+/* test all offsets and lengths */
+static void test_one(char *s1, char *s2, unsigned long max_offset,
+ unsigned long size_start, unsigned long max_size)
+{
+ unsigned long offset, size;
+
+ for (offset = 0; offset < max_offset; offset++) {
+ for (size = size_start; size < (max_size - offset); size++) {
+ int x, y;
+ unsigned long i;
+
+ y = memcmp(s1+offset, s2+offset, size);
+ x = test_memcmp(s1+offset, s2+offset, size);
+
+ if (((x ^ y) < 0) && /* Trick to compare sign */
+ ((x | y) != 0)) { /* check for zero */
+ printf("memcmp returned %d, should have returned %d (offset %ld size %ld)\n", x, y, offset, size);
+
+ for (i = offset; i < offset+size; i++)
+ printf("%02x ", s1[i]);
+ printf("\n");
+
+ for (i = offset; i < offset+size; i++)
+ printf("%02x ", s2[i]);
+ printf("\n");
+ abort();
+ }
+
+ if (vmx_count != 0) {
+ printf("vmx enter/exit not paired.(offset:%ld size:%ld s1:%p s2:%p vc:%d\n",
+ offset, size, s1, s2, vmx_count);
+ printf("\n");
+ abort();
+ }
+ }
+ }
+}
+
+static int testcase(bool islarge)
+{
+ char *s1;
+ char *s2;
+ unsigned long i;
+
+ unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE);
+ unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
+ int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
+
+ s1 = memalign(128, alloc_size);
+ if (!s1) {
+ perror("memalign");
+ exit(1);
+ }
+
+ s2 = memalign(128, alloc_size);
+ if (!s2) {
+ perror("memalign");
+ exit(1);
+ }
+
+ srandom(time(0));
+
+ for (i = 0; i < iterations; i++) {
+ unsigned long j;
+ unsigned long change;
+ char *rand_s1 = s1;
+ char *rand_s2 = s2;
+
+ for (j = 0; j < alloc_size; j++)
+ s1[j] = random();
+
+ rand_s1 += random() % MAX_OFFSET_DIFF_S1_S2;
+ rand_s2 += random() % MAX_OFFSET_DIFF_S1_S2;
+ memcpy(rand_s2, rand_s1, comp_size);
+
+ /* change one byte */
+ change = random() % comp_size;
+ rand_s2[change] = random() & 0xff;
+
+ if (islarge)
+ test_one(rand_s1, rand_s2, LARGE_MAX_OFFSET,
+ LARGE_SIZE_START, comp_size);
+ else
+ test_one(rand_s1, rand_s2, SIZE, 0, comp_size);
+ }
+
+ srandom(time(0));
+
+ for (i = 0; i < iterations; i++) {
+ unsigned long j;
+ unsigned long change;
+ char *rand_s1 = s1;
+ char *rand_s2 = s2;
+
+ for (j = 0; j < alloc_size; j++)
+ s1[j] = random();
+
+ rand_s1 += random() % MAX_OFFSET_DIFF_S1_S2;
+ rand_s2 += random() % MAX_OFFSET_DIFF_S1_S2;
+ memcpy(rand_s2, rand_s1, comp_size);
+
+ /* change multiple bytes, 1/8 of total */
+ for (j = 0; j < comp_size / 8; j++) {
+ change = random() % comp_size;
+ s2[change] = random() & 0xff;
+ }
+
+ if (islarge)
+ test_one(rand_s1, rand_s2, LARGE_MAX_OFFSET,
+ LARGE_SIZE_START, comp_size);
+ else
+ test_one(rand_s1, rand_s2, SIZE, 0, comp_size);
+ }
+
+ return 0;
+}
+
+static int testcases(void)
+{
+ testcase(0);
+ testcase(1);
+ return 0;
+}
+
+int main(void)
+{
+ test_harness_set_timeout(300);
+ return test_harness(testcases, "memcmp");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp_32.S b/tools/testing/selftests/powerpc/stringloops/memcmp_32.S
new file mode 120000
index 000000000..056f2b3af
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp_32.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcmp_32.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp_64.S b/tools/testing/selftests/powerpc/stringloops/memcmp_64.S
new file mode 120000
index 000000000..9bc87e438
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/memcmp_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/stringloops/string.c b/tools/testing/selftests/powerpc/stringloops/string.c
new file mode 100644
index 000000000..45e777541
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/string.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copied from linux/lib/string.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ */
+
+#include <stddef.h>
+
+/**
+ * strlen - Find the length of a string
+ * @s: The string to be sized
+ */
+size_t test_strlen(const char *s)
+{
+ const char *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/strlen.c b/tools/testing/selftests/powerpc/stringloops/strlen.c
new file mode 100644
index 000000000..9055ebc48
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/strlen.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "utils.h"
+
+#define SIZE 256
+#define ITERATIONS 1000
+#define ITERATIONS_BENCH 100000
+
+int test_strlen(const void *s);
+
+/* test all offsets and lengths */
+static void test_one(char *s)
+{
+ unsigned long offset;
+
+ for (offset = 0; offset < SIZE; offset++) {
+ int x, y;
+ unsigned long i;
+
+ y = strlen(s + offset);
+ x = test_strlen(s + offset);
+
+ if (x != y) {
+ printf("strlen() returned %d, should have returned %d (%p offset %ld)\n", x, y, s, offset);
+
+ for (i = offset; i < SIZE; i++)
+ printf("%02x ", s[i]);
+ printf("\n");
+ }
+ }
+}
+
+static void bench_test(char *s)
+{
+ struct timespec ts_start, ts_end;
+ int i;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+ for (i = 0; i < ITERATIONS_BENCH; i++)
+ test_strlen(s);
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_end);
+
+ printf("len %3.3d : time = %.6f\n", test_strlen(s), ts_end.tv_sec - ts_start.tv_sec + (ts_end.tv_nsec - ts_start.tv_nsec) / 1e9);
+}
+
+static int testcase(void)
+{
+ char *s;
+ unsigned long i;
+
+ s = memalign(128, SIZE);
+ if (!s) {
+ perror("memalign");
+ exit(1);
+ }
+
+ srandom(1);
+
+ memset(s, 0, SIZE);
+ for (i = 0; i < SIZE; i++) {
+ char c;
+
+ do {
+ c = random() & 0x7f;
+ } while (!c);
+ s[i] = c;
+ test_one(s);
+ }
+
+ for (i = 0; i < ITERATIONS; i++) {
+ unsigned long j;
+
+ for (j = 0; j < SIZE; j++) {
+ char c;
+
+ do {
+ c = random() & 0x7f;
+ } while (!c);
+ s[j] = c;
+ }
+ for (j = 0; j < sizeof(long); j++) {
+ s[SIZE - 1 - j] = 0;
+ test_one(s);
+ }
+ }
+
+ for (i = 0; i < SIZE; i++) {
+ char c;
+
+ do {
+ c = random() & 0x7f;
+ } while (!c);
+ s[i] = c;
+ }
+
+ bench_test(s);
+
+ s[16] = 0;
+ bench_test(s);
+
+ s[8] = 0;
+ bench_test(s);
+
+ s[4] = 0;
+ bench_test(s);
+
+ s[3] = 0;
+ bench_test(s);
+
+ s[2] = 0;
+ bench_test(s);
+
+ s[1] = 0;
+ bench_test(s);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(testcase, "strlen");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/strlen_32.S b/tools/testing/selftests/powerpc/stringloops/strlen_32.S
new file mode 120000
index 000000000..72b13731b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/stringloops/strlen_32.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/strlen_32.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/switch_endian/.gitignore b/tools/testing/selftests/powerpc/switch_endian/.gitignore
new file mode 100644
index 000000000..89e762eab
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/.gitignore
@@ -0,0 +1,2 @@
+switch_endian_test
+check-reversed.S
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
new file mode 100644
index 000000000..bdc081afe
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := switch_endian_test
+
+ASFLAGS += -O2 -Wall -g -nostdlib -m64
+
+EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT)
+$(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
+
+$(OUTPUT)/check-reversed.o: $(OUTPUT)/check.o
+ $(CROSS_COMPILE)objcopy -j .text --reverse-bytes=4 -O binary $< $@
+
+$(OUTPUT)/check-reversed.S: $(OUTPUT)/check-reversed.o
+ hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@
diff --git a/tools/testing/selftests/powerpc/switch_endian/check.S b/tools/testing/selftests/powerpc/switch_endian/check.S
new file mode 100644
index 000000000..927a5c675
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/check.S
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "common.h"
+
+/*
+ * Checks that registers contain what we expect, ie. they were not clobbered by
+ * the syscall.
+ *
+ * r15: pattern to check registers against.
+ *
+ * At the end r3 == 0 if everything's OK.
+ */
+ nop # guaranteed to be illegal in reverse-endian
+ mr r9,r15
+ cmpd r9,r3 # check r3
+ bne 1f
+ addi r9,r15,4 # check r4
+ cmpd r9,r4
+ bne 1f
+ lis r9,0x00FF # check CR
+ ori r9,r9,0xF000
+ mfcr r10
+ and r10,r10,r9
+ cmpw r9,r10
+ addi r9,r15,34
+ bne 1f
+ addi r9,r15,32 # check LR
+ mflr r10
+ cmpd r9,r10
+ bne 1f
+ addi r9,r15,5 # check r5
+ cmpd r9,r5
+ bne 1f
+ addi r9,r15,6 # check r6
+ cmpd r9,r6
+ bne 1f
+ addi r9,r15,7 # check r7
+ cmpd r9,r7
+ bne 1f
+ addi r9,r15,8 # check r8
+ cmpd r9,r8
+ bne 1f
+ addi r9,r15,13 # check r13
+ cmpd r9,r13
+ bne 1f
+ addi r9,r15,14 # check r14
+ cmpd r9,r14
+ bne 1f
+ addi r9,r15,16 # check r16
+ cmpd r9,r16
+ bne 1f
+ addi r9,r15,17 # check r17
+ cmpd r9,r17
+ bne 1f
+ addi r9,r15,18 # check r18
+ cmpd r9,r18
+ bne 1f
+ addi r9,r15,19 # check r19
+ cmpd r9,r19
+ bne 1f
+ addi r9,r15,20 # check r20
+ cmpd r9,r20
+ bne 1f
+ addi r9,r15,21 # check r21
+ cmpd r9,r21
+ bne 1f
+ addi r9,r15,22 # check r22
+ cmpd r9,r22
+ bne 1f
+ addi r9,r15,23 # check r23
+ cmpd r9,r23
+ bne 1f
+ addi r9,r15,24 # check r24
+ cmpd r9,r24
+ bne 1f
+ addi r9,r15,25 # check r25
+ cmpd r9,r25
+ bne 1f
+ addi r9,r15,26 # check r26
+ cmpd r9,r26
+ bne 1f
+ addi r9,r15,27 # check r27
+ cmpd r9,r27
+ bne 1f
+ addi r9,r15,28 # check r28
+ cmpd r9,r28
+ bne 1f
+ addi r9,r15,29 # check r29
+ cmpd r9,r29
+ bne 1f
+ addi r9,r15,30 # check r30
+ cmpd r9,r30
+ bne 1f
+ addi r9,r15,31 # check r31
+ cmpd r9,r31
+ bne 1f
+ b 2f
+1: mr r3, r9
+ li r0, __NR_exit
+ sc
+2: li r0, __NR_switch_endian
+ nop
diff --git a/tools/testing/selftests/powerpc/switch_endian/common.h b/tools/testing/selftests/powerpc/switch_endian/common.h
new file mode 100644
index 000000000..1434cbc2a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/common.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+#ifndef __NR_switch_endian
+#define __NR_switch_endian 363
+#endif
diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
new file mode 100644
index 000000000..cc4930467
--- /dev/null
+++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include "common.h"
+
+ .data
+ .balign 8
+message:
+ .ascii "success: switch_endian_test\n\0"
+
+ .section ".toc"
+ .balign 8
+pattern:
+ .8byte 0x5555AAAA5555AAAA
+
+ .text
+FUNC_START(_start)
+ /* Load the pattern */
+ ld r15, pattern@TOC(%r2)
+
+ /* Setup CR, only CR2-CR4 are maintained */
+ lis r3, 0x00FF
+ ori r3, r3, 0xF000
+ mtcr r3
+
+ /* Load the pattern slightly modified into the registers */
+ mr r3, r15
+ addi r4, r15, 4
+
+ addi r5, r15, 32
+ mtlr r5
+
+ addi r5, r15, 5
+ addi r6, r15, 6
+ addi r7, r15, 7
+ addi r8, r15, 8
+
+ /* r9 - r12 are clobbered */
+
+ addi r13, r15, 13
+ addi r14, r15, 14
+
+ /* Skip r15 we're using it */
+
+ addi r16, r15, 16
+ addi r17, r15, 17
+ addi r18, r15, 18
+ addi r19, r15, 19
+ addi r20, r15, 20
+ addi r21, r15, 21
+ addi r22, r15, 22
+ addi r23, r15, 23
+ addi r24, r15, 24
+ addi r25, r15, 25
+ addi r26, r15, 26
+ addi r27, r15, 27
+ addi r28, r15, 28
+ addi r29, r15, 29
+ addi r30, r15, 30
+ addi r31, r15, 31
+
+ /*
+ * Call the syscall to switch endian.
+ * It clobbers r9-r12, XER, CTR and CR0-1,5-7.
+ */
+ li r0, __NR_switch_endian
+ sc
+
+#include "check-reversed.S"
+
+ /* Flip back, r0 already has the switch syscall number */
+ .long 0x02000044 /* sc */
+
+#include "check.S"
+
+ li r0, __NR_write
+ li r3, 1 /* stdout */
+ ld r4, message@got(%r2)
+ li r5, 28 /* strlen(message3) */
+ sc
+ li r0, __NR_exit
+ li r3, 0
+ sc
+ b .
diff --git a/tools/testing/selftests/powerpc/syscalls/.gitignore b/tools/testing/selftests/powerpc/syscalls/.gitignore
new file mode 100644
index 000000000..f0f3fcc9d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/.gitignore
@@ -0,0 +1 @@
+ipc_unmuxed
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
new file mode 100644
index 000000000..161b88463
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -0,0 +1,8 @@
+TEST_GEN_PROGS := ipc_unmuxed
+
+CFLAGS += -I../../../../../usr/include
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/syscalls/ipc.h b/tools/testing/selftests/powerpc/syscalls/ipc.h
new file mode 100644
index 000000000..26a20682c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/ipc.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifdef __NR_semop
+DO_TEST(semop, __NR_semop)
+#endif
+
+#ifdef __NR_semget
+DO_TEST(semget, __NR_semget)
+#endif
+
+#ifdef __NR_semctl
+DO_TEST(semctl, __NR_semctl)
+#endif
+
+#ifdef __NR_semtimedop
+DO_TEST(semtimedop, __NR_semtimedop)
+#endif
+
+#ifdef __NR_msgsnd
+DO_TEST(msgsnd, __NR_msgsnd)
+#endif
+
+#ifdef __NR_msgrcv
+DO_TEST(msgrcv, __NR_msgrcv)
+#endif
+
+#ifdef __NR_msgget
+DO_TEST(msgget, __NR_msgget)
+#endif
+
+#ifdef __NR_msgctl
+DO_TEST(msgctl, __NR_msgctl)
+#endif
+
+#ifdef __NR_shmat
+DO_TEST(shmat, __NR_shmat)
+#endif
+
+#ifdef __NR_shmdt
+DO_TEST(shmdt, __NR_shmdt)
+#endif
+
+#ifdef __NR_shmget
+DO_TEST(shmget, __NR_shmget)
+#endif
+
+#ifdef __NR_shmctl
+DO_TEST(shmctl, __NR_shmctl)
+#endif
diff --git a/tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c b/tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c
new file mode 100644
index 000000000..2ac02706f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/ipc_unmuxed.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2015, Michael Ellerman, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test simply tests that certain syscalls are implemented. It doesn't
+ * actually exercise their logic in any way.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "utils.h"
+
+
+#define DO_TEST(_name, _num) \
+static int test_##_name(void) \
+{ \
+ int rc; \
+ printf("Testing " #_name); \
+ errno = 0; \
+ rc = syscall(_num, -1, 0, 0, 0, 0, 0); \
+ printf("\treturned %d, errno %d\n", rc, errno); \
+ return errno == ENOSYS; \
+}
+
+#include "ipc.h"
+#undef DO_TEST
+
+static int ipc_unmuxed(void)
+{
+ int tests_done = 0;
+
+#define DO_TEST(_name, _num) \
+ FAIL_IF(test_##_name()); \
+ tests_done++;
+
+#include "ipc.h"
+#undef DO_TEST
+
+ /*
+ * If we ran no tests then it means none of the syscall numbers were
+ * defined, possibly because we were built against old headers. But it
+ * means we didn't really test anything, so instead of passing mark it
+ * as a skip to give the user a clue.
+ */
+ SKIP_IF(tests_done == 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(ipc_unmuxed, "ipc_unmuxed");
+}
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
new file mode 100644
index 000000000..c3ee8393d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -0,0 +1,17 @@
+tm-resched-dscr
+tm-syscall
+tm-signal-msr-resv
+tm-signal-stack
+tm-vmxcopy
+tm-fork
+tm-tar
+tm-tmspr
+tm-exec
+tm-signal-context-chk-fpu
+tm-signal-context-chk-gpr
+tm-signal-context-chk-vmx
+tm-signal-context-chk-vsx
+tm-vmx-unavail
+tm-unavailable
+tm-trap
+tm-sigreturn
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
new file mode 100644
index 000000000..9fc2cf6fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu \
+ tm-signal-context-chk-vmx tm-signal-context-chk-vsx
+
+TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
+ tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
+ $(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+CFLAGS += -mhtm
+
+$(OUTPUT)/tm-syscall: tm-syscall-asm.S
+$(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
+$(OUTPUT)/tm-tmspr: CFLAGS += -pthread
+$(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
+$(OUTPUT)/tm-resched-dscr: ../pmu/lib.c
+$(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx
+$(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64
+
+SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
+$(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
+$(SIGNAL_CONTEXT_CHK_TESTS): CFLAGS += -mhtm -m64 -mvsx
diff --git a/tools/testing/selftests/powerpc/tm/tm-exec.c b/tools/testing/selftests/powerpc/tm/tm-exec.c
new file mode 100644
index 000000000..3d27fa0ec
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-exec.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Syscalls can be performed provided the transactions are suspended.
+ * The exec() class of syscall is unique as a new process is loaded.
+ *
+ * It makes little sense for after an exec() call for the previously
+ * suspended transaction to still exist.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <inttypes.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "utils.h"
+#include "tm.h"
+
+static char *path;
+
+static int test_exec(void)
+{
+ SKIP_IF(!have_htm());
+
+ asm __volatile__(
+ "tbegin.;"
+ "blt 1f; "
+ "tsuspend.;"
+ "1: ;"
+ : : : "memory");
+
+ execl(path, "tm-exec", "--child", NULL);
+
+ /* Shouldn't get here */
+ perror("execl() failed");
+ return 1;
+}
+
+static int after_exec(void)
+{
+ asm __volatile__(
+ "tbegin.;"
+ "blt 1f;"
+ "tsuspend.;"
+ "1: ;"
+ : : : "memory");
+
+ FAIL_IF(failure_is_nesting());
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ path = argv[0];
+
+ if (argc > 1 && strcmp(argv[1], "--child") == 0)
+ return after_exec();
+
+ return test_harness(test_exec, "tm_exec");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-fork.c b/tools/testing/selftests/powerpc/tm/tm-fork.c
new file mode 100644
index 000000000..8d48579b7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-fork.c
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Edited: Rashmica Gupta, Nov 2015
+ *
+ * This test does a fork syscall inside a transaction. Basic sniff test
+ * to see if we can enter the kernel during a transaction.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "utils.h"
+#include "tm.h"
+
+int test_fork(void)
+{
+ SKIP_IF(!have_htm());
+
+ asm __volatile__(
+ "tbegin.;"
+ "blt 1f; "
+ "li 0, 2;" /* fork syscall */
+ "sc ;"
+ "tend.;"
+ "1: ;"
+ : : : "memory", "r0");
+ /* If we reach here, we've passed. Otherwise we've probably crashed
+ * the kernel */
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_fork, "tm_fork");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
new file mode 100644
index 000000000..4cdb83964
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test context switching to see if the DSCR SPR is correctly preserved
+ * when within a transaction.
+ *
+ * Note: We assume that the DSCR has been left at the default value (0)
+ * for all CPUs.
+ *
+ * Method:
+ *
+ * Set a value into the DSCR.
+ *
+ * Start a transaction, and suspend it (*).
+ *
+ * Hard loop checking to see if the transaction has become doomed.
+ *
+ * Now that we *may* have been preempted, record the DSCR and TEXASR SPRS.
+ *
+ * If the abort was because of a context switch, check the DSCR value.
+ * Otherwise, try again.
+ *
+ * (*) If the transaction is not suspended we can't see the problem because
+ * the transaction abort handler will restore the DSCR to it's checkpointed
+ * value before we regain control.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <asm/tm.h>
+
+#include "utils.h"
+#include "tm.h"
+#include "../pmu/lib.h"
+
+#define SPRN_DSCR 0x03
+
+int test_body(void)
+{
+ uint64_t rv, dscr1 = 1, dscr2, texasr;
+
+ SKIP_IF(!have_htm());
+
+ printf("Check DSCR TM context switch: ");
+ fflush(stdout);
+ for (;;) {
+ asm __volatile__ (
+ /* set a known value into the DSCR */
+ "ld 3, %[dscr1];"
+ "mtspr %[sprn_dscr], 3;"
+
+ "li %[rv], 1;"
+ /* start and suspend a transaction */
+ "tbegin.;"
+ "beq 1f;"
+ "tsuspend.;"
+
+ /* hard loop until the transaction becomes doomed */
+ "2: ;"
+ "tcheck 0;"
+ "bc 4, 0, 2b;"
+
+ /* record DSCR and TEXASR */
+ "mfspr 3, %[sprn_dscr];"
+ "std 3, %[dscr2];"
+ "mfspr 3, %[sprn_texasr];"
+ "std 3, %[texasr];"
+
+ "tresume.;"
+ "tend.;"
+ "li %[rv], 0;"
+ "1: ;"
+ : [rv]"=r"(rv), [dscr2]"=m"(dscr2), [texasr]"=m"(texasr)
+ : [dscr1]"m"(dscr1)
+ , [sprn_dscr]"i"(SPRN_DSCR), [sprn_texasr]"i"(SPRN_TEXASR)
+ : "memory", "r3"
+ );
+ assert(rv); /* make sure the transaction aborted */
+ if ((texasr >> 56) != TM_CAUSE_RESCHED) {
+ continue;
+ }
+ if (dscr2 != dscr1) {
+ printf(" FAIL\n");
+ return 1;
+ } else {
+ printf(" OK\n");
+ return 0;
+ }
+ }
+}
+
+static int tm_resched_dscr(void)
+{
+ return eat_cpu(test_body);
+}
+
+int main(int argc, const char *argv[])
+{
+ return test_harness(tm_resched_dscr, "tm_resched_dscr");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
new file mode 100644
index 000000000..c760debbd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Test the kernel's signal frame code.
+ *
+ * The kernel sets up two sets of ucontexts if the signal was to be
+ * delivered while the thread was in a transaction.
+ * Expected behaviour is that the checkpointed state is in the user
+ * context passed to the signal handler. The speculated state can be
+ * accessed with the uc_link pointer.
+ *
+ * The rationale for this is that if TM unaware code (which linked
+ * against TM libs) installs a signal handler it will not know of the
+ * speculative nature of the 'live' registers and may infer the wrong
+ * thing.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "tm.h"
+
+#define MAX_ATTEMPT 500000
+
+#define NV_FPU_REGS 18
+
+long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
+
+/* Be sure there are 2x as many as there are NV FPU regs (2x18) */
+static double fps[] = {
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18
+};
+
+static sig_atomic_t fail;
+
+static void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+ int i;
+ ucontext_t *ucp = uc;
+ ucontext_t *tm_ucp = ucp->uc_link;
+
+ for (i = 0; i < NV_FPU_REGS && !fail; i++) {
+ fail = (ucp->uc_mcontext.fp_regs[i + 14] != fps[i]);
+ fail |= (tm_ucp->uc_mcontext.fp_regs[i + 14] != fps[i + NV_FPU_REGS]);
+ if (fail)
+ printf("Failed on %d FP %g or %g\n", i, ucp->uc_mcontext.fp_regs[i + 14], tm_ucp->uc_mcontext.fp_regs[i + 14]);
+ }
+}
+
+static int tm_signal_context_chk_fpu()
+{
+ struct sigaction act;
+ int i;
+ long rc;
+ pid_t pid = getpid();
+
+ SKIP_IF(!have_htm());
+
+ act.sa_sigaction = signal_usr1;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction sigusr1");
+ exit(1);
+ }
+
+ i = 0;
+ while (i < MAX_ATTEMPT && !fail) {
+ rc = tm_signal_self_context_load(pid, NULL, fps, NULL, NULL);
+ FAIL_IF(rc != pid);
+ i++;
+ }
+
+ return fail;
+}
+
+int main(void)
+{
+ return test_harness(tm_signal_context_chk_fpu, "tm_signal_context_chk_fpu");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
new file mode 100644
index 000000000..df91330a0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Test the kernel's signal frame code.
+ *
+ * The kernel sets up two sets of ucontexts if the signal was to be
+ * delivered while the thread was in a transaction.
+ * Expected behaviour is that the checkpointed state is in the user
+ * context passed to the signal handler. The speculated state can be
+ * accessed with the uc_link pointer.
+ *
+ * The rationale for this is that if TM unaware code (which linked
+ * against TM libs) installs a signal handler it will not know of the
+ * speculative nature of the 'live' registers and may infer the wrong
+ * thing.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "tm.h"
+
+#define MAX_ATTEMPT 500000
+
+#define NV_GPR_REGS 18
+
+long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
+
+static sig_atomic_t fail;
+
+static long gps[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
+ -1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18};
+
+static void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+ int i;
+ ucontext_t *ucp = uc;
+ ucontext_t *tm_ucp = ucp->uc_link;
+
+ for (i = 0; i < NV_GPR_REGS && !fail; i++) {
+ fail = (ucp->uc_mcontext.gp_regs[i + 14] != gps[i]);
+ fail |= (tm_ucp->uc_mcontext.gp_regs[i + 14] != gps[i + NV_GPR_REGS]);
+ if (fail)
+ printf("Failed on %d GPR %lu or %lu\n", i,
+ ucp->uc_mcontext.gp_regs[i + 14], tm_ucp->uc_mcontext.gp_regs[i + 14]);
+ }
+}
+
+static int tm_signal_context_chk_gpr()
+{
+ struct sigaction act;
+ int i;
+ long rc;
+ pid_t pid = getpid();
+
+ SKIP_IF(!have_htm());
+
+ act.sa_sigaction = signal_usr1;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction sigusr1");
+ exit(1);
+ }
+
+ i = 0;
+ while (i < MAX_ATTEMPT && !fail) {
+ rc = tm_signal_self_context_load(pid, gps, NULL, NULL, NULL);
+ FAIL_IF(rc != pid);
+ i++;
+ }
+
+ return fail;
+}
+
+int main(void)
+{
+ return test_harness(tm_signal_context_chk_gpr, "tm_signal_context_chk_gpr");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
new file mode 100644
index 000000000..f0ee55fd5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Test the kernel's signal frame code.
+ *
+ * The kernel sets up two sets of ucontexts if the signal was to be
+ * delivered while the thread was in a transaction.
+ * Expected behaviour is that the checkpointed state is in the user
+ * context passed to the signal handler. The speculated state can be
+ * accessed with the uc_link pointer.
+ *
+ * The rationale for this is that if TM unaware code (which linked
+ * against TM libs) installs a signal handler it will not know of the
+ * speculative nature of the 'live' registers and may infer the wrong
+ * thing.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "tm.h"
+
+#define MAX_ATTEMPT 500000
+
+#define NV_VMX_REGS 12
+
+long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
+
+static sig_atomic_t fail;
+
+vector int vms[] = {
+ {1, 2, 3, 4 },{5, 6, 7, 8 },{9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48},
+ {-1, -2, -3, -4}, {-5, -6, -7, -8}, {-9, -10,-11,-12},
+ {-13,-14,-15,-16},{-17,-18,-19,-20},{-21,-22,-23,-24},
+ {-25,-26,-27,-28},{-29,-30,-31,-32},{-33,-34,-35,-36},
+ {-37,-38,-39,-40},{-41,-42,-43,-44},{-45,-46,-47,-48}
+};
+
+static void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+ int i;
+ ucontext_t *ucp = uc;
+ ucontext_t *tm_ucp = ucp->uc_link;
+
+ for (i = 0; i < NV_VMX_REGS && !fail; i++) {
+ fail = memcmp(ucp->uc_mcontext.v_regs->vrregs[i + 20],
+ &vms[i], sizeof(vector int));
+ fail |= memcmp(tm_ucp->uc_mcontext.v_regs->vrregs[i + 20],
+ &vms[i + NV_VMX_REGS], sizeof (vector int));
+
+ if (fail) {
+ int j;
+
+ fprintf(stderr, "Failed on %d vmx 0x", i);
+ for (j = 0; j < 4; j++)
+ fprintf(stderr, "%04x", ucp->uc_mcontext.v_regs->vrregs[i + 20][j]);
+ fprintf(stderr, " vs 0x");
+ for (j = 0 ; j < 4; j++)
+ fprintf(stderr, "%04x", tm_ucp->uc_mcontext.v_regs->vrregs[i + 20][j]);
+ fprintf(stderr, "\n");
+ }
+ }
+}
+
+static int tm_signal_context_chk()
+{
+ struct sigaction act;
+ int i;
+ long rc;
+ pid_t pid = getpid();
+
+ SKIP_IF(!have_htm());
+
+ act.sa_sigaction = signal_usr1;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction sigusr1");
+ exit(1);
+ }
+
+ i = 0;
+ while (i < MAX_ATTEMPT && !fail) {
+ rc = tm_signal_self_context_load(pid, NULL, NULL, vms, NULL);
+ FAIL_IF(rc != pid);
+ i++;
+ }
+
+ return fail;
+}
+
+int main(void)
+{
+ return test_harness(tm_signal_context_chk, "tm_signal_context_chk_vmx");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
new file mode 100644
index 000000000..b99c3d835
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2016, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ *
+ * Test the kernel's signal frame code.
+ *
+ * The kernel sets up two sets of ucontexts if the signal was to be
+ * delivered while the thread was in a transaction.
+ * Expected behaviour is that the checkpointed state is in the user
+ * context passed to the signal handler. The speculated state can be
+ * accessed with the uc_link pointer.
+ *
+ * The rationale for this is that if TM unaware code (which linked
+ * against TM libs) installs a signal handler it will not know of the
+ * speculative nature of the 'live' registers and may infer the wrong
+ * thing.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include <altivec.h>
+
+#include "utils.h"
+#include "tm.h"
+
+#define MAX_ATTEMPT 500000
+
+#define NV_VSX_REGS 12
+
+long tm_signal_self_context_load(pid_t pid, long *gprs, double *fps, vector int *vms, vector int *vss);
+
+static sig_atomic_t fail;
+
+vector int vss[] = {
+ {1, 2, 3, 4 },{5, 6, 7, 8 },{9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48},
+ {-1, -2, -3, -4 },{-5, -6, -7, -8 },{-9, -10,-11,-12},
+ {-13,-14,-15,-16},{-17,-18,-19,-20},{-21,-22,-23,-24},
+ {-25,-26,-27,-28},{-29,-30,-31,-32},{-33,-34,-35,-36},
+ {-37,-38,-39,-40},{-41,-42,-43,-44},{-45,-46,-47,-48}
+};
+
+static void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+ int i;
+ uint8_t vsc[sizeof(vector int)];
+ uint8_t vst[sizeof(vector int)];
+ ucontext_t *ucp = uc;
+ ucontext_t *tm_ucp = ucp->uc_link;
+
+ /*
+ * The other half of the VSX regs will be after v_regs.
+ *
+ * In short, vmx_reserve array holds everything. v_regs is a 16
+ * byte aligned pointer at the start of vmx_reserve (vmx_reserve
+ * may or may not be 16 aligned) where the v_regs structure exists.
+ * (half of) The VSX regsters are directly after v_regs so the
+ * easiest way to find them below.
+ */
+ long *vsx_ptr = (long *)(ucp->uc_mcontext.v_regs + 1);
+ long *tm_vsx_ptr = (long *)(tm_ucp->uc_mcontext.v_regs + 1);
+ for (i = 0; i < NV_VSX_REGS && !fail; i++) {
+ memcpy(vsc, &ucp->uc_mcontext.fp_regs[i + 20], 8);
+ memcpy(vsc + 8, &vsx_ptr[20 + i], 8);
+ fail = memcmp(vsc, &vss[i], sizeof(vector int));
+ memcpy(vst, &tm_ucp->uc_mcontext.fp_regs[i + 20], 8);
+ memcpy(vst + 8, &tm_vsx_ptr[20 + i], 8);
+ fail |= memcmp(vst, &vss[i + NV_VSX_REGS], sizeof(vector int));
+
+ if (fail) {
+ int j;
+
+ fprintf(stderr, "Failed on %d vsx 0x", i);
+ for (j = 0; j < 16; j++)
+ fprintf(stderr, "%02x", vsc[j]);
+ fprintf(stderr, " vs 0x");
+ for (j = 0; j < 16; j++)
+ fprintf(stderr, "%02x", vst[j]);
+ fprintf(stderr, "\n");
+ }
+ }
+}
+
+static int tm_signal_context_chk()
+{
+ struct sigaction act;
+ int i;
+ long rc;
+ pid_t pid = getpid();
+
+ SKIP_IF(!have_htm());
+
+ act.sa_sigaction = signal_usr1;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction sigusr1");
+ exit(1);
+ }
+
+ i = 0;
+ while (i < MAX_ATTEMPT && !fail) {
+ rc = tm_signal_self_context_load(pid, NULL, NULL, NULL, vss);
+ FAIL_IF(rc != pid);
+ i++;
+ }
+
+ return fail;
+}
+
+int main(void)
+{
+ return test_harness(tm_signal_context_chk, "tm_signal_context_chk_vsx");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c
new file mode 100644
index 000000000..8c54d18b3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c
@@ -0,0 +1,74 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Test the kernel's signal return code to ensure that it doesn't
+ * crash when both the transactional and suspend MSR bits are set in
+ * the signal context.
+ *
+ * For this test, we send ourselves a SIGUSR1. In the SIGUSR1 handler
+ * we modify the signal context to set both MSR TM S and T bits (which
+ * is "reserved" by the PowerISA). When we return from the signal
+ * handler (implicit sigreturn), the kernel should detect reserved MSR
+ * value and send us with a SIGSEGV.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+
+#include "utils.h"
+#include "tm.h"
+
+int segv_expected = 0;
+
+void signal_segv(int signum)
+{
+ if (segv_expected && (signum == SIGSEGV))
+ _exit(0);
+ _exit(1);
+}
+
+void signal_usr1(int signum, siginfo_t *info, void *uc)
+{
+ ucontext_t *ucp = uc;
+
+ /* Link tm checkpointed context to normal context */
+ ucp->uc_link = ucp;
+ /* Set all TM bits so that the context is now invalid */
+#ifdef __powerpc64__
+ ucp->uc_mcontext.gp_regs[PT_MSR] |= (7ULL << 32);
+#else
+ ucp->uc_mcontext.uc_regs->gregs[PT_MSR] |= (7ULL);
+#endif
+ /* Should segv on return becuase of invalid context */
+ segv_expected = 1;
+}
+
+int tm_signal_msr_resv()
+{
+ struct sigaction act;
+
+ SKIP_IF(!have_htm());
+
+ act.sa_sigaction = signal_usr1;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGUSR1, &act, NULL) < 0) {
+ perror("sigaction sigusr1");
+ exit(1);
+ }
+ if (signal(SIGSEGV, signal_segv) == SIG_ERR)
+ exit(1);
+
+ raise(SIGUSR1);
+
+ /* We shouldn't get here as we exit in the segv handler */
+ return 1;
+}
+
+int main(void)
+{
+ return test_harness(tm_signal_msr_resv, "tm_signal_msr_resv");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
new file mode 100644
index 000000000..1f0eb5674
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Test the kernel's signal delievery code to ensure that we don't
+ * trelaim twice in the kernel signal delivery code. This can happen
+ * if we trigger a signal when in a transaction and the stack pointer
+ * is bogus.
+ *
+ * This test case registers a SEGV handler, sets the stack pointer
+ * (r1) to NULL, starts a transaction and then generates a SEGV. The
+ * SEGV should be handled but we exit here as the stack pointer is
+ * invalid and hance we can't sigreturn. We only need to check that
+ * this flow doesn't crash the kernel.
+ */
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <signal.h>
+
+#include "utils.h"
+#include "tm.h"
+
+void signal_segv(int signum)
+{
+ /* This should never actually run since stack is foobar */
+ exit(1);
+}
+
+int tm_signal_stack()
+{
+ int pid;
+
+ SKIP_IF(!have_htm());
+
+ pid = fork();
+ if (pid < 0)
+ exit(1);
+
+ if (pid) { /* Parent */
+ /*
+ * It's likely the whole machine will crash here so if
+ * the child ever exits, we are good.
+ */
+ wait(NULL);
+ return 0;
+ }
+
+ /*
+ * The flow here is:
+ * 1) register a signal handler (so signal delievery occurs)
+ * 2) make stack pointer (r1) = NULL
+ * 3) start transaction
+ * 4) cause segv
+ */
+ if (signal(SIGSEGV, signal_segv) == SIG_ERR)
+ exit(1);
+ asm volatile("li 1, 0 ;" /* stack ptr == NULL */
+ "1:"
+ "tbegin.;"
+ "beq 1b ;" /* retry forever */
+ "tsuspend.;"
+ "ld 2, 0(1) ;" /* trigger segv" */
+ : : : "memory");
+
+ /* This should never get here due to above segv */
+ return 1;
+}
+
+int main(void)
+{
+ return test_harness(tm_signal_stack, "tm_signal_stack");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal.S b/tools/testing/selftests/powerpc/tm/tm-signal.S
new file mode 100644
index 000000000..506a4ebaf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal.S
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "basic_asm.h"
+#include "gpr_asm.h"
+#include "fpu_asm.h"
+#include "vmx_asm.h"
+#include "vsx_asm.h"
+
+/*
+ * Large caveat here being that the caller cannot expect the
+ * signal to always be sent! The hardware can (AND WILL!) abort
+ * the transaction between the tbegin and the tsuspend (however
+ * unlikely it seems or infrequently it actually happens).
+ * You have been warned.
+ */
+/* long tm_signal_self(pid_t pid, long *gprs, double *fps, vector *vms, vector *vss); */
+FUNC_START(tm_signal_self_context_load)
+ PUSH_BASIC_STACK(512)
+ /*
+ * Don't strictly need to save and restore as it depends on if
+ * we're going to use them, however this reduces messy logic
+ */
+ PUSH_VMX(STACK_FRAME_LOCAL(5,0),r8)
+ PUSH_FPU(512)
+ PUSH_NVREGS_BELOW_FPU(512)
+ std r3, STACK_FRAME_PARAM(0)(sp) /* pid */
+ std r4, STACK_FRAME_PARAM(1)(sp) /* gps */
+ std r5, STACK_FRAME_PARAM(2)(sp) /* fps */
+ std r6, STACK_FRAME_PARAM(3)(sp) /* vms */
+ std r7, STACK_FRAME_PARAM(4)(sp) /* vss */
+
+ ld r3, STACK_FRAME_PARAM(1)(sp)
+ cmpdi r3, 0
+ beq skip_gpr_lc
+ bl load_gpr
+skip_gpr_lc:
+ ld r3, STACK_FRAME_PARAM(2)(sp)
+ cmpdi r3, 0
+ beq skip_fpu_lc
+ bl load_fpu
+skip_fpu_lc:
+ ld r3, STACK_FRAME_PARAM(3)(sp)
+ cmpdi r3, 0
+ beq skip_vmx_lc
+ bl load_vmx
+skip_vmx_lc:
+ ld r3, STACK_FRAME_PARAM(4)(sp)
+ cmpdi r3, 0
+ beq skip_vsx_lc
+ bl load_vsx
+skip_vsx_lc:
+ /*
+ * Set r3 (return value) before tbegin. Use the pid as a known
+ * 'all good' return value, zero is used to indicate a non-doomed
+ * transaction.
+ */
+ ld r3, STACK_FRAME_PARAM(0)(sp)
+ tbegin.
+ beq 1f
+ tsuspend. /* Can't enter a syscall transactionally */
+ ld r3, STACK_FRAME_PARAM(1)(sp)
+ cmpdi r3, 0
+ beq skip_gpr_lt
+ /* Get the second half of the array */
+ addi r3, r3, 8 * 18
+ bl load_gpr
+skip_gpr_lt:
+ ld r3, STACK_FRAME_PARAM(2)(sp)
+ cmpdi r3, 0
+ beq skip_fpu_lt
+ /* Get the second half of the array */
+ addi r3, r3, 8 * 18
+ bl load_fpu
+skip_fpu_lt:
+ ld r3, STACK_FRAME_PARAM(3)(sp)
+ cmpdi r3, 0
+ beq skip_vmx_lt
+ /* Get the second half of the array */
+ addi r3, r3, 16 * 12
+ bl load_vmx
+skip_vmx_lt:
+ ld r3, STACK_FRAME_PARAM(4)(sp)
+ cmpdi r3, 0
+ beq skip_vsx_lt
+ /* Get the second half of the array */
+ addi r3, r3, 16 * 12
+ bl load_vsx
+skip_vsx_lt:
+ li r0, 37 /* sys_kill */
+ ld r3, STACK_FRAME_PARAM(0)(sp) /* pid */
+ li r4, 10 /* SIGUSR1 */
+ sc /* Taking the signal will doom the transaction */
+ tabort. 0
+ tresume. /* Be super sure we abort */
+ /*
+ * This will cause us to resume doomed transaction and cause
+ * hardware to cleanup, we'll end up at 1: anything between
+ * tresume. and 1: shouldn't ever run.
+ */
+ li r3, 0
+ 1:
+ POP_VMX(STACK_FRAME_LOCAL(5,0),r4)
+ POP_FPU(512)
+ POP_NVREGS_BELOW_FPU(512)
+ POP_BASIC_STACK(512)
+ blr
+FUNC_END(tm_signal_self_context_load)
diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
new file mode 100644
index 000000000..9a6017a1d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2015, Laurent Dufour, IBM Corp.
+ *
+ * Test the kernel's signal returning code to check reclaim is done if the
+ * sigreturn() is called while in a transaction (suspended since active is
+ * already dropped trough the system call path).
+ *
+ * The kernel must discard the transaction when entering sigreturn, since
+ * restoring the potential TM SPRS from the signal frame is requiring to not be
+ * in a transaction.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "tm.h"
+#include "utils.h"
+
+
+void handler(int sig)
+{
+ uint64_t ret;
+
+ asm __volatile__(
+ "li 3,1 ;"
+ "tbegin. ;"
+ "beq 1f ;"
+ "li 3,0 ;"
+ "tsuspend. ;"
+ "1: ;"
+ "std%X[ret] 3, %[ret] ;"
+ : [ret] "=m"(ret)
+ :
+ : "memory", "3", "cr0");
+
+ if (ret)
+ exit(1);
+
+ /*
+ * We return from the signal handle while in a suspended transaction
+ */
+}
+
+
+int tm_sigreturn(void)
+{
+ struct sigaction sa;
+ uint64_t ret = 0;
+
+ SKIP_IF(!have_htm());
+ SKIP_IF(!is_ppc64le());
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = handler;
+ sigemptyset(&sa.sa_mask);
+
+ if (sigaction(SIGSEGV, &sa, NULL))
+ exit(1);
+
+ asm __volatile__(
+ "tbegin. ;"
+ "beq 1f ;"
+ "li 3,0 ;"
+ "std 3,0(3) ;" /* trigger SEGV */
+ "li 3,1 ;"
+ "std%X[ret] 3,%[ret] ;"
+ "tend. ;"
+ "b 2f ;"
+ "1: ;"
+ "li 3,2 ;"
+ "std%X[ret] 3,%[ret] ;"
+ "2: ;"
+ : [ret] "=m"(ret)
+ :
+ : "memory", "3", "cr0");
+
+ if (ret != 2)
+ exit(1);
+
+ exit(0);
+}
+
+int main(void)
+{
+ return test_harness(tm_sigreturn, "tm_sigreturn");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
new file mode 100644
index 000000000..bd1ca25fe
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+ .text
+FUNC_START(getppid_tm_active)
+ tbegin.
+ beq 1f
+ li r0, __NR_getppid
+ sc
+ tend.
+ blr
+1:
+ li r3, -1
+ blr
+
+FUNC_START(getppid_tm_suspended)
+ tbegin.
+ beq 1f
+ li r0, __NR_getppid
+ tsuspend.
+ sc
+ tresume.
+ tend.
+ blr
+1:
+ li r3, -1
+ blr
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c
new file mode 100644
index 000000000..454b965a2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -0,0 +1,106 @@
+/*
+ * Copyright 2015, Sam Bobroff, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Test the kernel's system call code to ensure that a system call
+ * made from within an active HTM transaction is aborted with the
+ * correct failure code.
+ * Conversely, ensure that a system call made from within a
+ * suspended transaction can succeed.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <asm/tm.h>
+#include <sys/time.h>
+#include <stdlib.h>
+
+#include "utils.h"
+#include "tm.h"
+
+extern int getppid_tm_active(void);
+extern int getppid_tm_suspended(void);
+
+unsigned retries = 0;
+
+#define TEST_DURATION 10 /* seconds */
+#define TM_RETRIES 100
+
+pid_t getppid_tm(bool suspend)
+{
+ int i;
+ pid_t pid;
+
+ for (i = 0; i < TM_RETRIES; i++) {
+ if (suspend)
+ pid = getppid_tm_suspended();
+ else
+ pid = getppid_tm_active();
+
+ if (pid >= 0)
+ return pid;
+
+ if (failure_is_persistent()) {
+ if (failure_is_syscall())
+ return -1;
+
+ printf("Unexpected persistent transaction failure.\n");
+ printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n",
+ __builtin_get_texasr(), __builtin_get_tfiar());
+ exit(-1);
+ }
+
+ retries++;
+ }
+
+ printf("Exceeded limit of %d temporary transaction failures.\n", TM_RETRIES);
+ printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n",
+ __builtin_get_texasr(), __builtin_get_tfiar());
+
+ exit(-1);
+}
+
+int tm_syscall(void)
+{
+ unsigned count = 0;
+ struct timeval end, now;
+
+ SKIP_IF(!have_htm_nosc());
+
+ setbuf(stdout, NULL);
+
+ printf("Testing transactional syscalls for %d seconds...\n", TEST_DURATION);
+
+ gettimeofday(&end, NULL);
+ now.tv_sec = TEST_DURATION;
+ now.tv_usec = 0;
+ timeradd(&end, &now, &end);
+
+ for (count = 0; timercmp(&now, &end, <); count++) {
+ /*
+ * Test a syscall within a suspended transaction and verify
+ * that it succeeds.
+ */
+ FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */
+
+ /*
+ * Test a syscall within an active transaction and verify that
+ * it fails with the correct failure code.
+ */
+ FAIL_IF(getppid_tm(false) != -1); /* Should fail... */
+ FAIL_IF(!failure_is_persistent()); /* ...persistently... */
+ FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */
+ gettimeofday(&now, 0);
+ }
+
+ printf("%d active and suspended transactions behaved correctly.\n", count);
+ printf("(There were %d transaction retries.)\n", retries);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(tm_syscall, "tm_syscall");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-tar.c b/tools/testing/selftests/powerpc/tm/tm-tar.c
new file mode 100644
index 000000000..f31fe5a28
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-tar.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ * Original: Michael Neuling 19/7/2013
+ * Edited: Rashmica Gupta 01/12/2015
+ *
+ * Do some transactions, see if the tar is corrupted.
+ * If the transaction is aborted, the TAR should be rolled back to the
+ * checkpointed value before the transaction began. The value written to
+ * TAR in suspended mode should only remain in TAR if the transaction
+ * completes.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "tm.h"
+#include "utils.h"
+
+int num_loops = 10000;
+
+int test_tar(void)
+{
+ int i;
+
+ SKIP_IF(!have_htm());
+ SKIP_IF(!is_ppc64le());
+
+ for (i = 0; i < num_loops; i++)
+ {
+ uint64_t result = 0;
+ asm __volatile__(
+ "li 7, 1;"
+ "mtspr %[tar], 7;" /* tar = 1 */
+ "tbegin.;"
+ "beq 3f;"
+ "li 4, 0x7000;" /* Loop lots, to use time */
+ "2:;" /* Start loop */
+ "li 7, 2;"
+ "mtspr %[tar], 7;" /* tar = 2 */
+ "tsuspend.;"
+ "li 7, 3;"
+ "mtspr %[tar], 7;" /* tar = 3 */
+ "tresume.;"
+ "subi 4, 4, 1;"
+ "cmpdi 4, 0;"
+ "bne 2b;"
+ "tend.;"
+
+ /* Transaction sucess! TAR should be 3 */
+ "mfspr 7, %[tar];"
+ "ori %[res], 7, 4;" // res = 3|4 = 7
+ "b 4f;"
+
+ /* Abort handler. TAR should be rolled back to 1 */
+ "3:;"
+ "mfspr 7, %[tar];"
+ "ori %[res], 7, 8;" // res = 1|8 = 9
+ "4:;"
+
+ : [res]"=r"(result)
+ : [tar]"i"(SPRN_TAR)
+ : "memory", "r0", "r4", "r7");
+
+ /* If result is anything else other than 7 or 9, the tar
+ * value must have been corrupted. */
+ if ((result != 7) && (result != 9))
+ return 1;
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ /* A low number of iterations (eg 100) can cause a false pass */
+ if (argc > 1) {
+ if (strcmp(argv[1], "-h") == 0) {
+ printf("Syntax:\n\t%s [<num loops>]\n",
+ argv[0]);
+ return 1;
+ } else {
+ num_loops = atoi(argv[1]);
+ }
+ }
+
+ printf("Starting, %d loops\n", num_loops);
+
+ return test_harness(test_tar, "tm_tar");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
new file mode 100644
index 000000000..df1d7d4b1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -0,0 +1,150 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Original: Michael Neuling 3/4/2014
+ * Modified: Rashmica Gupta 8/12/2015
+ *
+ * Check if any of the Transaction Memory SPRs get corrupted.
+ * - TFIAR - stores address of location of transaction failure
+ * - TFHAR - stores address of software failure handler (if transaction
+ * fails)
+ * - TEXASR - lots of info about the transacion(s)
+ *
+ * (1) create more threads than cpus
+ * (2) in each thread:
+ * (a) set TFIAR and TFHAR a unique value
+ * (b) loop for awhile, continually checking to see if
+ * either register has been corrupted.
+ *
+ * (3) Loop:
+ * (a) begin transaction
+ * (b) abort transaction
+ * (c) check TEXASR to see if FS has been corrupted
+ *
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tm.h"
+
+int num_loops = 10000;
+int passed = 1;
+
+void tfiar_tfhar(void *in)
+{
+ int i, cpu;
+ unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd;
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ cpu = (unsigned long)in >> 1;
+ CPU_SET(cpu, &cpuset);
+ sched_setaffinity(0, sizeof(cpuset), &cpuset);
+
+ /* TFIAR: Last bit has to be high so userspace can read register */
+ tfiar = ((unsigned long)in) + 1;
+ tfiar += 2;
+ mtspr(SPRN_TFIAR, tfiar);
+
+ /* TFHAR: Last two bits are reserved */
+ tfhar = ((unsigned long)in);
+ tfhar &= ~0x3UL;
+ tfhar += 4;
+ mtspr(SPRN_TFHAR, tfhar);
+
+ for (i = 0; i < num_loops; i++) {
+ tfhar_rd = mfspr(SPRN_TFHAR);
+ tfiar_rd = mfspr(SPRN_TFIAR);
+ if ( (tfhar != tfhar_rd) || (tfiar != tfiar_rd) ) {
+ passed = 0;
+ return;
+ }
+ }
+ return;
+}
+
+void texasr(void *in)
+{
+ unsigned long i;
+ uint64_t result = 0;
+
+ for (i = 0; i < num_loops; i++) {
+ asm __volatile__(
+ "tbegin.;"
+ "beq 3f ;"
+ "tabort. 0 ;"
+ "tend.;"
+
+ /* Abort handler */
+ "3: ;"
+ ::: "memory");
+
+ /* Check the TEXASR */
+ result = mfspr(SPRN_TEXASR);
+ if ((result & TEXASR_FS) == 0) {
+ passed = 0;
+ return;
+ }
+ }
+ return;
+}
+
+int test_tmspr()
+{
+ pthread_t *thread;
+ int thread_num;
+ unsigned long i;
+
+ SKIP_IF(!have_htm());
+
+ /* To cause some context switching */
+ thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN);
+
+ thread = malloc(thread_num * sizeof(pthread_t));
+ if (thread == NULL)
+ return EXIT_FAILURE;
+
+ /* Test TFIAR and TFHAR */
+ for (i = 0; i < thread_num; i += 2) {
+ if (pthread_create(&thread[i], NULL, (void *)tfiar_tfhar,
+ (void *)i))
+ return EXIT_FAILURE;
+ }
+ /* Test TEXASR */
+ for (i = 1; i < thread_num; i += 2) {
+ if (pthread_create(&thread[i], NULL, (void *)texasr, (void *)i))
+ return EXIT_FAILURE;
+ }
+
+ for (i = 0; i < thread_num; i++) {
+ if (pthread_join(thread[i], NULL) != 0)
+ return EXIT_FAILURE;
+ }
+
+ free(thread);
+
+ if (passed)
+ return 0;
+ else
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc > 1) {
+ if (strcmp(argv[1], "-h") == 0) {
+ printf("Syntax:\t [<num loops>]\n");
+ return 0;
+ } else {
+ num_loops = atoi(argv[1]);
+ }
+ }
+ return test_harness(test_tmspr, "tm_tmspr");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
new file mode 100644
index 000000000..179d592f0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright 2017, Gustavo Romero, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Check if thread endianness is flipped inadvertently to BE on trap
+ * caught in TM whilst MSR.FP and MSR.VEC are zero (i.e. just after
+ * load_fp and load_vec overflowed).
+ *
+ * The issue can be checked on LE machines simply by zeroing load_fp
+ * and load_vec and then causing a trap in TM. Since the endianness
+ * changes to BE on return from the signal handler, 'nop' is
+ * thread as an illegal instruction in following sequence:
+ * tbegin.
+ * beq 1f
+ * trap
+ * tend.
+ * 1: nop
+ *
+ * However, although the issue is also present on BE machines, it's a
+ * bit trickier to check it on BE machines because MSR.LE bit is set
+ * to zero which determines a BE endianness that is the native
+ * endianness on BE machines, so nothing notably critical happens,
+ * i.e. no illegal instruction is observed immediately after returning
+ * from the signal handler (as it happens on LE machines). Thus to test
+ * it on BE machines LE endianness is forced after a first trap and then
+ * the endianness is verified on subsequent traps to determine if the
+ * endianness "flipped back" to the native endianness (BE).
+ */
+
+#define _GNU_SOURCE
+#include <error.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <htmintrin.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+
+#include "tm.h"
+#include "utils.h"
+
+#define pr_error(error_code, format, ...) \
+ error_at_line(1, error_code, __FILE__, __LINE__, format, ##__VA_ARGS__)
+
+#define MSR_LE 1UL
+#define LE 1UL
+
+pthread_t t0_ping;
+pthread_t t1_pong;
+
+int exit_from_pong;
+
+int trap_event;
+int le;
+
+bool success;
+
+void trap_signal_handler(int signo, siginfo_t *si, void *uc)
+{
+ ucontext_t *ucp = uc;
+ uint64_t thread_endianness;
+
+ /* Get thread endianness: extract bit LE from MSR */
+ thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR];
+
+ /***
+ * Little-Endian Machine
+ */
+
+ if (le) {
+ /* First trap event */
+ if (trap_event == 0) {
+ /* Do nothing. Since it is returning from this trap
+ * event that endianness is flipped by the bug, so just
+ * let the process return from the signal handler and
+ * check on the second trap event if endianness is
+ * flipped or not.
+ */
+ }
+ /* Second trap event */
+ else if (trap_event == 1) {
+ /*
+ * Since trap was caught in TM on first trap event, if
+ * endianness was still LE (not flipped inadvertently)
+ * after returning from the signal handler instruction
+ * (1) is executed (basically a 'nop'), as it's located
+ * at address of tbegin. +4 (rollback addr). As (1) on
+ * LE endianness does in effect nothing, instruction (2)
+ * is then executed again as 'trap', generating a second
+ * trap event (note that in that case 'trap' is caught
+ * not in transacional mode). On te other hand, if after
+ * the return from the signal handler the endianness in-
+ * advertently flipped, instruction (1) is tread as a
+ * branch instruction, i.e. b .+8, hence instruction (3)
+ * and (4) are executed (tbegin.; trap;) and we get sim-
+ * ilaly on the trap signal handler, but now in TM mode.
+ * Either way, it's now possible to check the MSR LE bit
+ * once in the trap handler to verify if endianness was
+ * flipped or not after the return from the second trap
+ * event. If endianness is flipped, the bug is present.
+ * Finally, getting a trap in TM mode or not is just
+ * worth noting because it affects the math to determine
+ * the offset added to the NIP on return: the NIP for a
+ * trap caught in TM is the rollback address, i.e. the
+ * next instruction after 'tbegin.', whilst the NIP for
+ * a trap caught in non-transactional mode is the very
+ * same address of the 'trap' instruction that generated
+ * the trap event.
+ */
+
+ if (thread_endianness == LE) {
+ /* Go to 'success', i.e. instruction (6) */
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 16;
+ } else {
+ /*
+ * Thread endianness is BE, so it flipped
+ * inadvertently. Thus we flip back to LE and
+ * set NIP to go to 'failure', instruction (5).
+ */
+ ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+ }
+ }
+ }
+
+ /***
+ * Big-Endian Machine
+ */
+
+ else {
+ /* First trap event */
+ if (trap_event == 0) {
+ /*
+ * Force thread endianness to be LE. Instructions (1),
+ * (3), and (4) will be executed, generating a second
+ * trap in TM mode.
+ */
+ ucp->uc_mcontext.gp_regs[PT_MSR] |= 1UL;
+ }
+ /* Second trap event */
+ else if (trap_event == 1) {
+ /*
+ * Do nothing. If bug is present on return from this
+ * second trap event endianness will flip back "automat-
+ * ically" to BE, otherwise thread endianness will
+ * continue to be LE, just as it was set above.
+ */
+ }
+ /* A third trap event */
+ else {
+ /*
+ * Once here it means that after returning from the sec-
+ * ond trap event instruction (4) (trap) was executed
+ * as LE, generating a third trap event. In that case
+ * endianness is still LE as set on return from the
+ * first trap event, hence no bug. Otherwise, bug
+ * flipped back to BE on return from the second trap
+ * event and instruction (4) was executed as 'tdi' (so
+ * basically a 'nop') and branch to 'failure' in
+ * instruction (5) was taken to indicate failure and we
+ * never get here.
+ */
+
+ /*
+ * Flip back to BE and go to instruction (6), i.e. go to
+ * 'success'.
+ */
+ ucp->uc_mcontext.gp_regs[PT_MSR] &= ~1UL;
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 8;
+ }
+ }
+
+ trap_event++;
+}
+
+void usr1_signal_handler(int signo, siginfo_t *si, void *not_used)
+{
+ /* Got a USR1 signal from ping(), so just tell pong() to exit */
+ exit_from_pong = 1;
+}
+
+void *ping(void *not_used)
+{
+ uint64_t i;
+
+ trap_event = 0;
+
+ /*
+ * Wait an amount of context switches so load_fp and load_vec overflows
+ * and MSR_[FP|VEC|V] is 0.
+ */
+ for (i = 0; i < 1024*1024*512; i++)
+ ;
+
+ asm goto(
+ /*
+ * [NA] means "Native Endianness", i.e. it tells how a
+ * instruction is executed on machine's native endianness (in
+ * other words, native endianness matches kernel endianness).
+ * [OP] means "Opposite Endianness", i.e. on a BE machine, it
+ * tells how a instruction is executed as a LE instruction; con-
+ * versely, on a LE machine, it tells how a instruction is
+ * executed as a BE instruction. When [NA] is omitted, it means
+ * that the native interpretation of a given instruction is not
+ * relevant for the test. Likewise when [OP] is omitted.
+ */
+
+ " tbegin. ;" /* (0) tbegin. [NA] */
+ " tdi 0, 0, 0x48;" /* (1) nop [NA]; b (3) [OP] */
+ " trap ;" /* (2) trap [NA] */
+ ".long 0x1D05007C;" /* (3) tbegin. [OP] */
+ ".long 0x0800E07F;" /* (4) trap [OP]; nop [NA] */
+ " b %l[failure] ;" /* (5) b [NA]; MSR.LE flipped (bug) */
+ " b %l[success] ;" /* (6) b [NA]; MSR.LE did not flip (ok)*/
+
+ : : : : failure, success);
+
+failure:
+ success = false;
+ goto exit_from_ping;
+
+success:
+ success = true;
+
+exit_from_ping:
+ /* Tell pong() to exit before leaving */
+ pthread_kill(t1_pong, SIGUSR1);
+ return NULL;
+}
+
+void *pong(void *not_used)
+{
+ while (!exit_from_pong)
+ /*
+ * Induce context switches on ping() thread
+ * until ping() finishes its job and signs
+ * to exit from this loop.
+ */
+ sched_yield();
+
+ return NULL;
+}
+
+int tm_trap_test(void)
+{
+ uint16_t k = 1;
+
+ int rc;
+
+ pthread_attr_t attr;
+ cpu_set_t cpuset;
+
+ struct sigaction trap_sa;
+
+ SKIP_IF(!have_htm());
+
+ trap_sa.sa_flags = SA_SIGINFO;
+ trap_sa.sa_sigaction = trap_signal_handler;
+ sigaction(SIGTRAP, &trap_sa, NULL);
+
+ struct sigaction usr1_sa;
+
+ usr1_sa.sa_flags = SA_SIGINFO;
+ usr1_sa.sa_sigaction = usr1_signal_handler;
+ sigaction(SIGUSR1, &usr1_sa, NULL);
+
+ /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+
+ /* Init pthread attribute */
+ rc = pthread_attr_init(&attr);
+ if (rc)
+ pr_error(rc, "pthread_attr_init()");
+
+ /*
+ * Bind thread ping() and pong() both to CPU 0 so they ping-pong and
+ * speed up context switches on ping() thread, speeding up the load_fp
+ * and load_vec overflow.
+ */
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc)
+ pr_error(rc, "pthread_attr_setaffinity()");
+
+ /* Figure out the machine endianness */
+ le = (int) *(uint8_t *)&k;
+
+ printf("%s machine detected. Checking if endianness flips %s",
+ le ? "Little-Endian" : "Big-Endian",
+ "inadvertently on trap in TM... ");
+
+ rc = fflush(0);
+ if (rc)
+ pr_error(rc, "fflush()");
+
+ /* Launch ping() */
+ rc = pthread_create(&t0_ping, &attr, ping, NULL);
+ if (rc)
+ pr_error(rc, "pthread_create()");
+
+ exit_from_pong = 0;
+
+ /* Launch pong() */
+ rc = pthread_create(&t1_pong, &attr, pong, NULL);
+ if (rc)
+ pr_error(rc, "pthread_create()");
+
+ rc = pthread_join(t0_ping, NULL);
+ if (rc)
+ pr_error(rc, "pthread_join()");
+
+ rc = pthread_join(t1_pong, NULL);
+ if (rc)
+ pr_error(rc, "pthread_join()");
+
+ if (success) {
+ printf("no.\n"); /* no, endianness did not flip inadvertently */
+ return EXIT_SUCCESS;
+ }
+
+ printf("yes!\n"); /* yes, endianness did flip inadvertently */
+ return EXIT_FAILURE;
+}
+
+int main(int argc, char **argv)
+{
+ return test_harness(tm_trap_test, "tm_trap_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
new file mode 100644
index 000000000..09894f4ff
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright 2017, Gustavo Romero, Breno Leitao, Cyril Bur, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Force FP, VEC and VSX unavailable exception during transaction in all
+ * possible scenarios regarding the MSR.FP and MSR.VEC state, e.g. when FP
+ * is enable and VEC is disable, when FP is disable and VEC is enable, and
+ * so on. Then we check if the restored state is correctly set for the
+ * FP and VEC registers to the previous state we set just before we entered
+ * in TM, i.e. we check if it corrupts somehow the recheckpointed FP and
+ * VEC/Altivec registers on abortion due to an unavailable exception in TM.
+ * N.B. In this test we do not test all the FP/Altivec/VSX registers for
+ * corruption, but only for registers vs0 and vs32, which are respectively
+ * representatives of FP and VEC/Altivec reg sets.
+ */
+
+#define _GNU_SOURCE
+#include <error.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <sched.h>
+
+#include "tm.h"
+
+#define DEBUG 0
+
+/* Unavailable exceptions to test in HTM */
+#define FP_UNA_EXCEPTION 0
+#define VEC_UNA_EXCEPTION 1
+#define VSX_UNA_EXCEPTION 2
+
+#define NUM_EXCEPTIONS 3
+#define err_at_line(status, errnum, format, ...) \
+ error_at_line(status, errnum, __FILE__, __LINE__, format ##__VA_ARGS__)
+
+#define pr_warn(code, format, ...) err_at_line(0, code, format, ##__VA_ARGS__)
+#define pr_err(code, format, ...) err_at_line(1, code, format, ##__VA_ARGS__)
+
+struct Flags {
+ int touch_fp;
+ int touch_vec;
+ int result;
+ int exception;
+} flags;
+
+bool expecting_failure(void)
+{
+ if (flags.touch_fp && flags.exception == FP_UNA_EXCEPTION)
+ return false;
+
+ if (flags.touch_vec && flags.exception == VEC_UNA_EXCEPTION)
+ return false;
+
+ /*
+ * If both FP and VEC are touched it does not mean that touching VSX
+ * won't raise an exception. However since FP and VEC state are already
+ * correctly loaded, the transaction is not aborted (i.e.
+ * treclaimed/trecheckpointed) and MSR.VSX is just set as 1, so a TM
+ * failure is not expected also in this case.
+ */
+ if ((flags.touch_fp && flags.touch_vec) &&
+ flags.exception == VSX_UNA_EXCEPTION)
+ return false;
+
+ return true;
+}
+
+/* Check if failure occurred whilst in transaction. */
+bool is_failure(uint64_t condition_reg)
+{
+ /*
+ * When failure handling occurs, CR0 is set to 0b1010 (0xa). Otherwise
+ * transaction completes without failure and hence reaches out 'tend.'
+ * that sets CR0 to 0b0100 (0x4).
+ */
+ return ((condition_reg >> 28) & 0xa) == 0xa;
+}
+
+void *tm_una_ping(void *input)
+{
+
+ /*
+ * Expected values for vs0 and vs32 after a TM failure. They must never
+ * change, otherwise they got corrupted.
+ */
+ uint64_t high_vs0 = 0x5555555555555555;
+ uint64_t low_vs0 = 0xffffffffffffffff;
+ uint64_t high_vs32 = 0x5555555555555555;
+ uint64_t low_vs32 = 0xffffffffffffffff;
+
+ /* Counter for busy wait */
+ uint64_t counter = 0x1ff000000;
+
+ /*
+ * Variable to keep a copy of CR register content taken just after we
+ * leave the transactional state.
+ */
+ uint64_t cr_ = 0;
+
+ /*
+ * Wait a bit so thread can get its name "ping". This is not important
+ * to reproduce the issue but it's nice to have for systemtap debugging.
+ */
+ if (DEBUG)
+ sleep(1);
+
+ printf("If MSR.FP=%d MSR.VEC=%d: ", flags.touch_fp, flags.touch_vec);
+
+ if (flags.exception != FP_UNA_EXCEPTION &&
+ flags.exception != VEC_UNA_EXCEPTION &&
+ flags.exception != VSX_UNA_EXCEPTION) {
+ printf("No valid exception specified to test.\n");
+ return NULL;
+ }
+
+ asm (
+ /* Prepare to merge low and high. */
+ " mtvsrd 33, %[high_vs0] ;"
+ " mtvsrd 34, %[low_vs0] ;"
+
+ /*
+ * Adjust VS0 expected value after an TM failure,
+ * i.e. vs0 = 0x5555555555555555555FFFFFFFFFFFFFFFF
+ */
+ " xxmrghd 0, 33, 34 ;"
+
+ /*
+ * Adjust VS32 expected value after an TM failure,
+ * i.e. vs32 = 0x5555555555555555555FFFFFFFFFFFFFFFF
+ */
+ " xxmrghd 32, 33, 34 ;"
+
+ /*
+ * Wait an amount of context switches so load_fp and load_vec
+ * overflow and MSR.FP, MSR.VEC, and MSR.VSX become zero (off).
+ */
+ " mtctr %[counter] ;"
+
+ /* Decrement CTR branch if CTR non zero. */
+ "1: bdnz 1b ;"
+
+ /*
+ * Check if we want to touch FP prior to the test in order
+ * to set MSR.FP = 1 before provoking an unavailable
+ * exception in TM.
+ */
+ " cmpldi %[touch_fp], 0 ;"
+ " beq no_fp ;"
+ " fadd 10, 10, 10 ;"
+ "no_fp: ;"
+
+ /*
+ * Check if we want to touch VEC prior to the test in order
+ * to set MSR.VEC = 1 before provoking an unavailable
+ * exception in TM.
+ */
+ " cmpldi %[touch_vec], 0 ;"
+ " beq no_vec ;"
+ " vaddcuw 10, 10, 10 ;"
+ "no_vec: ;"
+
+ /*
+ * Perhaps it would be a better idea to do the
+ * compares outside transactional context and simply
+ * duplicate code.
+ */
+ " tbegin. ;"
+ " beq trans_fail ;"
+
+ /* Do we do FP Unavailable? */
+ " cmpldi %[exception], %[ex_fp] ;"
+ " bne 1f ;"
+ " fadd 10, 10, 10 ;"
+ " b done ;"
+
+ /* Do we do VEC Unavailable? */
+ "1: cmpldi %[exception], %[ex_vec] ;"
+ " bne 2f ;"
+ " vaddcuw 10, 10, 10 ;"
+ " b done ;"
+
+ /*
+ * Not FP or VEC, therefore VSX. Ensure this
+ * instruction always generates a VSX Unavailable.
+ * ISA 3.0 is tricky here.
+ * (xxmrghd will on ISA 2.07 and ISA 3.0)
+ */
+ "2: xxmrghd 10, 10, 10 ;"
+
+ "done: tend. ;"
+
+ "trans_fail: ;"
+
+ /* Give values back to C. */
+ " mfvsrd %[high_vs0], 0 ;"
+ " xxsldwi 3, 0, 0, 2 ;"
+ " mfvsrd %[low_vs0], 3 ;"
+ " mfvsrd %[high_vs32], 32 ;"
+ " xxsldwi 3, 32, 32, 2 ;"
+ " mfvsrd %[low_vs32], 3 ;"
+
+ /* Give CR back to C so that it can check what happened. */
+ " mfcr %[cr_] ;"
+
+ : [high_vs0] "+r" (high_vs0),
+ [low_vs0] "+r" (low_vs0),
+ [high_vs32] "=r" (high_vs32),
+ [low_vs32] "=r" (low_vs32),
+ [cr_] "+r" (cr_)
+ : [touch_fp] "r" (flags.touch_fp),
+ [touch_vec] "r" (flags.touch_vec),
+ [exception] "r" (flags.exception),
+ [ex_fp] "i" (FP_UNA_EXCEPTION),
+ [ex_vec] "i" (VEC_UNA_EXCEPTION),
+ [ex_vsx] "i" (VSX_UNA_EXCEPTION),
+ [counter] "r" (counter)
+
+ : "cr0", "ctr", "v10", "vs0", "vs10", "vs3", "vs32", "vs33",
+ "vs34", "fr10"
+
+ );
+
+ /*
+ * Check if we were expecting a failure and it did not occur by checking
+ * CR0 state just after we leave the transaction. Either way we check if
+ * vs0 or vs32 got corrupted.
+ */
+ if (expecting_failure() && !is_failure(cr_)) {
+ printf("\n\tExpecting the transaction to fail, %s",
+ "but it didn't\n\t");
+ flags.result++;
+ }
+
+ /* Check if we were not expecting a failure and a it occurred. */
+ if (!expecting_failure() && is_failure(cr_) &&
+ !failure_is_reschedule()) {
+ printf("\n\tUnexpected transaction failure 0x%02lx\n\t",
+ failure_code());
+ return (void *) -1;
+ }
+
+ /*
+ * Check if TM failed due to the cause we were expecting. 0xda is a
+ * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause, unless
+ * it was caused by a reschedule.
+ */
+ if (is_failure(cr_) && !failure_is_unavailable() &&
+ !failure_is_reschedule()) {
+ printf("\n\tUnexpected failure cause 0x%02lx\n\t",
+ failure_code());
+ return (void *) -1;
+ }
+
+ /* 0x4 is a success and 0xa is a fail. See comment in is_failure(). */
+ if (DEBUG)
+ printf("CR0: 0x%1lx ", cr_ >> 28);
+
+ /* Check FP (vs0) for the expected value. */
+ if (high_vs0 != 0x5555555555555555 || low_vs0 != 0xFFFFFFFFFFFFFFFF) {
+ printf("FP corrupted!");
+ printf(" high = %#16" PRIx64 " low = %#16" PRIx64 " ",
+ high_vs0, low_vs0);
+ flags.result++;
+ } else
+ printf("FP ok ");
+
+ /* Check VEC (vs32) for the expected value. */
+ if (high_vs32 != 0x5555555555555555 || low_vs32 != 0xFFFFFFFFFFFFFFFF) {
+ printf("VEC corrupted!");
+ printf(" high = %#16" PRIx64 " low = %#16" PRIx64,
+ high_vs32, low_vs32);
+ flags.result++;
+ } else
+ printf("VEC ok");
+
+ putchar('\n');
+
+ return NULL;
+}
+
+/* Thread to force context switch */
+void *tm_una_pong(void *not_used)
+{
+ /* Wait thread get its name "pong". */
+ if (DEBUG)
+ sleep(1);
+
+ /* Classed as an interactive-like thread. */
+ while (1)
+ sched_yield();
+}
+
+/* Function that creates a thread and launches the "ping" task. */
+void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
+{
+ int retries = 2;
+ void *ret_value;
+ pthread_t t0;
+
+ flags.touch_fp = fp;
+ flags.touch_vec = vec;
+
+ /*
+ * Without luck it's possible that the transaction is aborted not due to
+ * the unavailable exception caught in the middle as we expect but also,
+ * for instance, due to a context switch or due to a KVM reschedule (if
+ * it's running on a VM). Thus we try a few times before giving up,
+ * checking if the failure cause is the one we expect.
+ */
+ do {
+ int rc;
+
+ /* Bind to CPU 0, as specified in 'attr'. */
+ rc = pthread_create(&t0, attr, tm_una_ping, (void *) &flags);
+ if (rc)
+ pr_err(rc, "pthread_create()");
+ rc = pthread_setname_np(t0, "tm_una_ping");
+ if (rc)
+ pr_warn(rc, "pthread_setname_np");
+ rc = pthread_join(t0, &ret_value);
+ if (rc)
+ pr_err(rc, "pthread_join");
+
+ retries--;
+ } while (ret_value != NULL && retries);
+
+ if (!retries) {
+ flags.result = 1;
+ if (DEBUG)
+ printf("All transactions failed unexpectedly\n");
+
+ }
+}
+
+int tm_unavailable_test(void)
+{
+ int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
+ pthread_t t1;
+ pthread_attr_t attr;
+ cpu_set_t cpuset;
+
+ SKIP_IF(!have_htm());
+
+ /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+
+ /* Init pthread attribute. */
+ rc = pthread_attr_init(&attr);
+ if (rc)
+ pr_err(rc, "pthread_attr_init()");
+
+ /* Set CPU 0 mask into the pthread attribute. */
+ rc = pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ if (rc)
+ pr_err(rc, "pthread_attr_setaffinity_np()");
+
+ rc = pthread_create(&t1, &attr /* Bind to CPU 0 */, tm_una_pong, NULL);
+ if (rc)
+ pr_err(rc, "pthread_create()");
+
+ /* Name it for systemtap convenience */
+ rc = pthread_setname_np(t1, "tm_una_pong");
+ if (rc)
+ pr_warn(rc, "pthread_create()");
+
+ flags.result = 0;
+
+ for (exception = 0; exception < NUM_EXCEPTIONS; exception++) {
+ printf("Checking if FP/VEC registers are sane after");
+
+ if (exception == FP_UNA_EXCEPTION)
+ printf(" a FP unavailable exception...\n");
+
+ else if (exception == VEC_UNA_EXCEPTION)
+ printf(" a VEC unavailable exception...\n");
+
+ else
+ printf(" a VSX unavailable exception...\n");
+
+ flags.exception = exception;
+
+ test_fp_vec(0, 0, &attr);
+ test_fp_vec(1, 0, &attr);
+ test_fp_vec(0, 1, &attr);
+ test_fp_vec(1, 1, &attr);
+
+ }
+
+ if (flags.result > 0) {
+ printf("result: failed!\n");
+ exit(1);
+ } else {
+ printf("result: success\n");
+ exit(0);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ test_harness_set_timeout(220);
+ return test_harness(tm_unavailable_test, "tm_unavailable_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
new file mode 100644
index 000000000..137185ba4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
@@ -0,0 +1,118 @@
+/*
+ * Copyright 2017, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ * Original: Breno Leitao <brenohl@br.ibm.com> &
+ * Gustavo Bueno Romero <gromero@br.ibm.com>
+ * Edited: Michael Neuling
+ *
+ * Force VMX unavailable during a transaction and see if it corrupts
+ * the checkpointed VMX register state after the abort.
+ */
+
+#include <inttypes.h>
+#include <htmintrin.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include "tm.h"
+#include "utils.h"
+
+int passed;
+
+void *worker(void *unused)
+{
+ __int128 vmx0;
+ uint64_t texasr;
+
+ asm goto (
+ "li 3, 1;" /* Stick non-zero value in VMX0 */
+ "std 3, 0(%[vmx0_ptr]);"
+ "lvx 0, 0, %[vmx0_ptr];"
+
+ /* Wait here a bit so we get scheduled out 255 times */
+ "lis 3, 0x3fff;"
+ "1: ;"
+ "addi 3, 3, -1;"
+ "cmpdi 3, 0;"
+ "bne 1b;"
+
+ /* Kernel will hopefully turn VMX off now */
+
+ "tbegin. ;"
+ "beq failure;"
+
+ /* Cause VMX unavail. Any VMX instruction */
+ "vaddcuw 0,0,0;"
+
+ "tend. ;"
+ "b %l[success];"
+
+ /* Check VMX0 sanity after abort */
+ "failure: ;"
+ "lvx 1, 0, %[vmx0_ptr];"
+ "vcmpequb. 2, 0, 1;"
+ "bc 4, 24, %l[value_mismatch];"
+ "b %l[value_match];"
+ :
+ : [vmx0_ptr] "r"(&vmx0)
+ : "r3"
+ : success, value_match, value_mismatch
+ );
+
+ /* HTM aborted and VMX0 is corrupted */
+value_mismatch:
+ texasr = __builtin_get_texasr();
+
+ printf("\n\n==============\n\n");
+ printf("Failure with error: %lx\n", _TEXASR_FAILURE_CODE(texasr));
+ printf("Summary error : %lx\n", _TEXASR_FAILURE_SUMMARY(texasr));
+ printf("TFIAR exact : %lx\n\n", _TEXASR_TFIAR_EXACT(texasr));
+
+ passed = 0;
+ return NULL;
+
+ /* HTM aborted but VMX0 is correct */
+value_match:
+// printf("!");
+ return NULL;
+
+success:
+// printf(".");
+ return NULL;
+}
+
+int tm_vmx_unavail_test()
+{
+ int threads;
+ pthread_t *thread;
+
+ SKIP_IF(!have_htm());
+
+ passed = 1;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * 4;
+ thread = malloc(sizeof(pthread_t)*threads);
+ if (!thread)
+ return EXIT_FAILURE;
+
+ for (uint64_t i = 0; i < threads; i++)
+ pthread_create(&thread[i], NULL, &worker, NULL);
+
+ for (uint64_t i = 0; i < threads; i++)
+ pthread_join(thread[i], NULL);
+
+ free(thread);
+
+ return passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+
+int main(int argc, char **argv)
+{
+ return test_harness(tm_vmx_unavail_test, "tm_vmx_unavail_test");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
new file mode 100644
index 000000000..fe5281158
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2015, Michael Neuling, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Original: Michael Neuling 4/12/2013
+ * Edited: Rashmica Gupta 4/12/2015
+ *
+ * See if the altivec state is leaked out of an aborted transaction due to
+ * kernel vmx copy loops.
+ *
+ * When the transaction aborts, VSR values should rollback to the values
+ * they held before the transaction commenced. Using VSRs while transaction
+ * is suspended should not affect the checkpointed values.
+ *
+ * (1) write A to a VSR
+ * (2) start transaction
+ * (3) suspend transaction
+ * (4) change the VSR to B
+ * (5) trigger kernel vmx copy loop
+ * (6) abort transaction
+ * (7) check that the VSR value is A
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <assert.h>
+
+#include "tm.h"
+#include "utils.h"
+
+int test_vmxcopy()
+{
+ long double vecin = 1.3;
+ long double vecout;
+ unsigned long pgsize = getpagesize();
+ int i;
+ int fd;
+ int size = pgsize*16;
+ char tmpfile[] = "/tmp/page_faultXXXXXX";
+ char buf[pgsize];
+ char *a;
+ uint64_t aborted = 0;
+
+ SKIP_IF(!have_htm());
+ SKIP_IF(!is_ppc64le());
+
+ fd = mkstemp(tmpfile);
+ assert(fd >= 0);
+
+ memset(buf, 0, pgsize);
+ for (i = 0; i < size; i += pgsize)
+ assert(write(fd, buf, pgsize) == pgsize);
+
+ unlink(tmpfile);
+
+ a = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0);
+ assert(a != MAP_FAILED);
+
+ asm __volatile__(
+ "lxvd2x 40,0,%[vecinptr];" /* set 40 to initial value*/
+ "tbegin.;"
+ "beq 3f;"
+ "tsuspend.;"
+ "xxlxor 40,40,40;" /* set 40 to 0 */
+ "std 5, 0(%[map]);" /* cause kernel vmx copy page */
+ "tabort. 0;"
+ "tresume.;"
+ "tend.;"
+ "li %[res], 0;"
+ "b 5f;"
+
+ /* Abort handler */
+ "3:;"
+ "li %[res], 1;"
+
+ "5:;"
+ "stxvd2x 40,0,%[vecoutptr];"
+ : [res]"=r"(aborted)
+ : [vecinptr]"r"(&vecin),
+ [vecoutptr]"r"(&vecout),
+ [map]"r"(a)
+ : "memory", "r0", "r3", "r4", "r5", "r6", "r7");
+
+ if (aborted && (vecin != vecout)){
+ printf("FAILED: vector state leaked on abort %f != %f\n",
+ (double)vecin, (double)vecout);
+ return 1;
+ }
+
+ munmap(a, size);
+
+ close(fd);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_vmxcopy, "tm_vmxcopy");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
new file mode 100644
index 000000000..5518b1d4e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright 2015, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#ifndef _SELFTESTS_POWERPC_TM_TM_H
+#define _SELFTESTS_POWERPC_TM_TM_H
+
+#include <asm/tm.h>
+#include <asm/cputable.h>
+#include <stdbool.h>
+
+#include "utils.h"
+
+static inline bool have_htm(void)
+{
+#ifdef PPC_FEATURE2_HTM
+ return have_hwcap2(PPC_FEATURE2_HTM);
+#else
+ printf("PPC_FEATURE2_HTM not defined, can't check AT_HWCAP2\n");
+ return false;
+#endif
+}
+
+static inline bool have_htm_nosc(void)
+{
+#ifdef PPC_FEATURE2_HTM_NOSC
+ return have_hwcap2(PPC_FEATURE2_HTM_NOSC);
+#else
+ printf("PPC_FEATURE2_HTM_NOSC not defined, can't check AT_HWCAP2\n");
+ return false;
+#endif
+}
+
+static inline long failure_code(void)
+{
+ return __builtin_get_texasru() >> 24;
+}
+
+static inline bool failure_is_persistent(void)
+{
+ return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT;
+}
+
+static inline bool failure_is_syscall(void)
+{
+ return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL;
+}
+
+static inline bool failure_is_unavailable(void)
+{
+ return (failure_code() & TM_CAUSE_FAC_UNAV) == TM_CAUSE_FAC_UNAV;
+}
+
+static inline bool failure_is_reschedule(void)
+{
+ if ((failure_code() & TM_CAUSE_RESCHED) == TM_CAUSE_RESCHED ||
+ (failure_code() & TM_CAUSE_KVM_RESCHED) == TM_CAUSE_KVM_RESCHED)
+ return true;
+
+ return false;
+}
+
+static inline bool failure_is_nesting(void)
+{
+ return (__builtin_get_texasru() & 0x400000);
+}
+
+static inline int tcheck(void)
+{
+ long cr;
+ asm volatile ("tcheck 0" : "=r"(cr) : : "cr0");
+ return (cr >> 28) & 4;
+}
+
+static inline bool tcheck_doomed(void)
+{
+ return tcheck() & 8;
+}
+
+static inline bool tcheck_active(void)
+{
+ return tcheck() & 4;
+}
+
+static inline bool tcheck_suspended(void)
+{
+ return tcheck() & 2;
+}
+
+static inline bool tcheck_transactional(void)
+{
+ return tcheck() & 6;
+}
+
+#endif /* _SELFTESTS_POWERPC_TM_TM_H */
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
new file mode 100644
index 000000000..ba0959d45
--- /dev/null
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright 2013-2015, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE /* For CPU_ZERO etc. */
+
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <link.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+static char auxv[4096];
+
+int read_auxv(char *buf, ssize_t buf_size)
+{
+ ssize_t num;
+ int rc, fd;
+
+ fd = open("/proc/self/auxv", O_RDONLY);
+ if (fd == -1) {
+ perror("open");
+ return -errno;
+ }
+
+ num = read(fd, buf, buf_size);
+ if (num < 0) {
+ perror("read");
+ rc = -EIO;
+ goto out;
+ }
+
+ if (num > buf_size) {
+ printf("overflowed auxv buffer\n");
+ rc = -EOVERFLOW;
+ goto out;
+ }
+
+ rc = 0;
+out:
+ close(fd);
+ return rc;
+}
+
+void *find_auxv_entry(int type, char *auxv)
+{
+ ElfW(auxv_t) *p;
+
+ p = (ElfW(auxv_t) *)auxv;
+
+ while (p->a_type != AT_NULL) {
+ if (p->a_type == type)
+ return p;
+
+ p++;
+ }
+
+ return NULL;
+}
+
+void *get_auxv_entry(int type)
+{
+ ElfW(auxv_t) *p;
+
+ if (read_auxv(auxv, sizeof(auxv)))
+ return NULL;
+
+ p = find_auxv_entry(type, auxv);
+ if (p)
+ return (void *)p->a_un.a_val;
+
+ return NULL;
+}
+
+int pick_online_cpu(void)
+{
+ int ncpus, cpu = -1;
+ cpu_set_t *mask;
+ size_t size;
+
+ ncpus = get_nprocs_conf();
+ size = CPU_ALLOC_SIZE(ncpus);
+ mask = CPU_ALLOC(ncpus);
+ if (!mask) {
+ perror("malloc");
+ return -1;
+ }
+
+ CPU_ZERO_S(size, mask);
+
+ if (sched_getaffinity(0, size, mask)) {
+ perror("sched_getaffinity");
+ goto done;
+ }
+
+ /* We prefer a primary thread, but skip 0 */
+ for (cpu = 8; cpu < ncpus; cpu += 8)
+ if (CPU_ISSET_S(cpu, size, mask))
+ goto done;
+
+ /* Search for anything, but in reverse */
+ for (cpu = ncpus - 1; cpu >= 0; cpu--)
+ if (CPU_ISSET_S(cpu, size, mask))
+ goto done;
+
+ printf("No cpus in affinity mask?!\n");
+
+done:
+ CPU_FREE(mask);
+ return cpu;
+}
+
+bool is_ppc64le(void)
+{
+ struct utsname uts;
+ int rc;
+
+ errno = 0;
+ rc = uname(&uts);
+ if (rc) {
+ perror("uname");
+ return false;
+ }
+
+ return strcmp(uts.machine, "ppc64le") == 0;
+}
diff --git a/tools/testing/selftests/powerpc/vphn/.gitignore b/tools/testing/selftests/powerpc/vphn/.gitignore
new file mode 100644
index 000000000..7c0439501
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/.gitignore
@@ -0,0 +1 @@
+test-vphn
diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile
new file mode 100644
index 000000000..fb82068c9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/Makefile
@@ -0,0 +1,9 @@
+TEST_GEN_PROGS := test-vphn
+
+CFLAGS += -m64
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
+
diff --git a/tools/testing/selftests/powerpc/vphn/test-vphn.c b/tools/testing/selftests/powerpc/vphn/test-vphn.c
new file mode 100644
index 000000000..81d3069ff
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/test-vphn.c
@@ -0,0 +1,411 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <byteswap.h>
+#include "utils.h"
+#include "subunit.h"
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define cpu_to_be32(x) bswap_32(x)
+#define be32_to_cpu(x) bswap_32(x)
+#define be16_to_cpup(x) bswap_16(*x)
+#define cpu_to_be64(x) bswap_64(x)
+#else
+#define cpu_to_be32(x) (x)
+#define be32_to_cpu(x) (x)
+#define be16_to_cpup(x) (*x)
+#define cpu_to_be64(x) (x)
+#endif
+
+#include "vphn.c"
+
+static struct test {
+ char *descr;
+ long input[VPHN_REGISTER_COUNT];
+ u32 expected[VPHN_ASSOC_BUFSIZE];
+} all_tests[] = {
+ {
+ "vphn: no data",
+ {
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000000
+ }
+ },
+ {
+ "vphn: 1 x 16-bit value",
+ {
+ 0x8001ffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000001,
+ 0x00000001
+ }
+ },
+ {
+ "vphn: 2 x 16-bit values",
+ {
+ 0x80018002ffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000002,
+ 0x00000001,
+ 0x00000002
+ }
+ },
+ {
+ "vphn: 3 x 16-bit values",
+ {
+ 0x800180028003ffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000003,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003
+ }
+ },
+ {
+ "vphn: 4 x 16-bit values",
+ {
+ 0x8001800280038004,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000004,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004
+ }
+ },
+ {
+ /* Parsing the next 16-bit value out of the next 64-bit input
+ * value.
+ */
+ "vphn: 5 x 16-bit values",
+ {
+ 0x8001800280038004,
+ 0x8005ffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ },
+ {
+ 0x00000005,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005
+ }
+ },
+ {
+ /* Parse at most 6 x 64-bit input values */
+ "vphn: 24 x 16-bit values",
+ {
+ 0x8001800280038004,
+ 0x8005800680078008,
+ 0x8009800a800b800c,
+ 0x800d800e800f8010,
+ 0x8011801280138014,
+ 0x8015801680178018
+ },
+ {
+ 0x00000018,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005,
+ 0x00000006,
+ 0x00000007,
+ 0x00000008,
+ 0x00000009,
+ 0x0000000a,
+ 0x0000000b,
+ 0x0000000c,
+ 0x0000000d,
+ 0x0000000e,
+ 0x0000000f,
+ 0x00000010,
+ 0x00000011,
+ 0x00000012,
+ 0x00000013,
+ 0x00000014,
+ 0x00000015,
+ 0x00000016,
+ 0x00000017,
+ 0x00000018
+ }
+ },
+ {
+ "vphn: 1 x 32-bit value",
+ {
+ 0x00000001ffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000001,
+ 0x00000001
+ }
+ },
+ {
+ "vphn: 2 x 32-bit values",
+ {
+ 0x0000000100000002,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000002,
+ 0x00000001,
+ 0x00000002
+ }
+ },
+ {
+ /* Parsing the next 32-bit value out of the next 64-bit input
+ * value.
+ */
+ "vphn: 3 x 32-bit values",
+ {
+ 0x0000000100000002,
+ 0x00000003ffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000003,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003
+ }
+ },
+ {
+ /* Parse at most 6 x 64-bit input values */
+ "vphn: 12 x 32-bit values",
+ {
+ 0x0000000100000002,
+ 0x0000000300000004,
+ 0x0000000500000006,
+ 0x0000000700000008,
+ 0x000000090000000a,
+ 0x0000000b0000000c
+ },
+ {
+ 0x0000000c,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005,
+ 0x00000006,
+ 0x00000007,
+ 0x00000008,
+ 0x00000009,
+ 0x0000000a,
+ 0x0000000b,
+ 0x0000000c
+ }
+ },
+ {
+ "vphn: 16-bit value followed by 32-bit value",
+ {
+ 0x800100000002ffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000002,
+ 0x00000001,
+ 0x00000002
+ }
+ },
+ {
+ "vphn: 32-bit value followed by 16-bit value",
+ {
+ 0x000000018002ffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000002,
+ 0x00000001,
+ 0x00000002
+ }
+ },
+ {
+ /* Parse a 32-bit value split accross two consecutives 64-bit
+ * input values.
+ */
+ "vphn: 16-bit value followed by 2 x 32-bit values",
+ {
+ 0x8001000000020000,
+ 0x0003ffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000003,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005
+ }
+ },
+ {
+ /* The lower bits in 0x0001ffff don't get mixed up with the
+ * 0xffff terminator.
+ */
+ "vphn: 32-bit value has all ones in 16 lower bits",
+ {
+ 0x0001ffff80028003,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff,
+ 0xffffffffffffffff
+ },
+ {
+ 0x00000003,
+ 0x0001ffff,
+ 0x00000002,
+ 0x00000003
+ }
+ },
+ {
+ /* The following input doesn't follow the specification.
+ */
+ "vphn: last 32-bit value is truncated",
+ {
+ 0x0000000100000002,
+ 0x0000000300000004,
+ 0x0000000500000006,
+ 0x0000000700000008,
+ 0x000000090000000a,
+ 0x0000000b800c2bad
+ },
+ {
+ 0x0000000c,
+ 0x00000001,
+ 0x00000002,
+ 0x00000003,
+ 0x00000004,
+ 0x00000005,
+ 0x00000006,
+ 0x00000007,
+ 0x00000008,
+ 0x00000009,
+ 0x0000000a,
+ 0x0000000b,
+ 0x0000000c
+ }
+ },
+ {
+ "vphn: garbage after terminator",
+ {
+ 0xffff2bad2bad2bad,
+ 0x2bad2bad2bad2bad,
+ 0x2bad2bad2bad2bad,
+ 0x2bad2bad2bad2bad,
+ 0x2bad2bad2bad2bad,
+ 0x2bad2bad2bad2bad
+ },
+ {
+ 0x00000000
+ }
+ },
+ {
+ NULL
+ }
+};
+
+static int test_one(struct test *test)
+{
+ __be32 output[VPHN_ASSOC_BUFSIZE] = { 0 };
+ int i, len;
+
+ vphn_unpack_associativity(test->input, output);
+
+ len = be32_to_cpu(output[0]);
+ if (len != test->expected[0]) {
+ printf("expected %d elements, got %d\n", test->expected[0],
+ len);
+ return 1;
+ }
+
+ for (i = 1; i < len; i++) {
+ u32 val = be32_to_cpu(output[i]);
+ if (val != test->expected[i]) {
+ printf("element #%d is 0x%x, should be 0x%x\n", i, val,
+ test->expected[i]);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int test_vphn(void)
+{
+ static struct test *test;
+
+ for (test = all_tests; test->descr; test++) {
+ int ret;
+
+ ret = test_one(test);
+ test_finish(test->descr, ret);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ return test_harness(test_vphn, "test-vphn");
+}
diff --git a/tools/testing/selftests/powerpc/vphn/vphn.c b/tools/testing/selftests/powerpc/vphn/vphn.c
new file mode 120000
index 000000000..186b906e6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/vphn.c
@@ -0,0 +1 @@
+../../../../../arch/powerpc/mm/vphn.c \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/vphn/vphn.h b/tools/testing/selftests/powerpc/vphn/vphn.h
new file mode 120000
index 000000000..7131efe38
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/vphn.h
@@ -0,0 +1 @@
+../../../../../arch/powerpc/mm/vphn.h \ No newline at end of file
diff --git a/tools/testing/selftests/prctl/.gitignore b/tools/testing/selftests/prctl/.gitignore
new file mode 100644
index 000000000..0b5c27447
--- /dev/null
+++ b/tools/testing/selftests/prctl/.gitignore
@@ -0,0 +1,3 @@
+disable-tsc-ctxt-sw-stress-test
+disable-tsc-on-off-stress-test
+disable-tsc-test
diff --git a/tools/testing/selftests/prctl/Makefile b/tools/testing/selftests/prctl/Makefile
new file mode 100644
index 000000000..c7923b205
--- /dev/null
+++ b/tools/testing/selftests/prctl/Makefile
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+TEST_PROGS := disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test \
+ disable-tsc-test
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+ rm -fr $(TEST_PROGS)
+endif
+endif
diff --git a/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c
new file mode 100644
index 000000000..62a93cc61
--- /dev/null
+++ b/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Tests if the control register is updated correctly
+ * at context switches
+ *
+ * Warning: this test will cause a very high load for a few seconds
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <wait.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+static void sigsegv_expect(int sig)
+{
+ /* */
+}
+
+static void segvtask(void)
+{
+ if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
+ {
+ perror("prctl");
+ exit(0);
+ }
+ signal(SIGSEGV, sigsegv_expect);
+ alarm(10);
+ rdtsc();
+ fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
+ exit(0);
+}
+
+
+static void sigsegv_fail(int sig)
+{
+ fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
+ exit(0);
+}
+
+static void rdtsctask(void)
+{
+ if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
+ {
+ perror("prctl");
+ exit(0);
+ }
+ signal(SIGSEGV, sigsegv_fail);
+ alarm(10);
+ for(;;) rdtsc();
+}
+
+
+int main(void)
+{
+ int n_tasks = 100, i;
+
+ fprintf(stderr, "[No further output means we're allright]\n");
+
+ for (i=0; i<n_tasks; i++)
+ if (fork() == 0)
+ {
+ if (i & 1)
+ segvtask();
+ else
+ rdtsctask();
+ }
+
+ for (i=0; i<n_tasks; i++)
+ wait(NULL);
+
+ exit(0);
+}
+
diff --git a/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c
new file mode 100644
index 000000000..79950f9a2
--- /dev/null
+++ b/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Tests if the control register is updated correctly
+ * when set with prctl()
+ *
+ * Warning: this test will cause a very high load for a few seconds
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+#include <wait.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+/* snippet from wikipedia :-) */
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+int should_segv = 0;
+
+static void sigsegv_cb(int sig)
+{
+ if (!should_segv)
+ {
+ fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n");
+ exit(0);
+ }
+ if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0)
+ {
+ perror("prctl");
+ exit(0);
+ }
+ should_segv = 0;
+
+ rdtsc();
+}
+
+static void task(void)
+{
+ signal(SIGSEGV, sigsegv_cb);
+ alarm(10);
+ for(;;)
+ {
+ rdtsc();
+ if (should_segv)
+ {
+ fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n");
+ exit(0);
+ }
+ if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0)
+ {
+ perror("prctl");
+ exit(0);
+ }
+ should_segv = 1;
+ }
+}
+
+
+int main(void)
+{
+ int n_tasks = 100, i;
+
+ fprintf(stderr, "[No further output means we're allright]\n");
+
+ for (i=0; i<n_tasks; i++)
+ if (fork() == 0)
+ task();
+
+ for (i=0; i<n_tasks; i++)
+ wait(NULL);
+
+ exit(0);
+}
+
diff --git a/tools/testing/selftests/prctl/disable-tsc-test.c b/tools/testing/selftests/prctl/disable-tsc-test.c
new file mode 100644
index 000000000..f84d4ee11
--- /dev/null
+++ b/tools/testing/selftests/prctl/disable-tsc-test.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...)
+ *
+ * Basic test to test behaviour of PR_GET_TSC and PR_SET_TSC
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <inttypes.h>
+
+
+#include <sys/prctl.h>
+#include <linux/prctl.h>
+
+/* Get/set the process' ability to use the timestamp counter instruction */
+#ifndef PR_GET_TSC
+#define PR_GET_TSC 25
+#define PR_SET_TSC 26
+# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */
+# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */
+#endif
+
+const char *tsc_names[] =
+{
+ [0] = "[not set]",
+ [PR_TSC_ENABLE] = "PR_TSC_ENABLE",
+ [PR_TSC_SIGSEGV] = "PR_TSC_SIGSEGV",
+};
+
+static uint64_t rdtsc(void)
+{
+uint32_t lo, hi;
+/* We cannot use "=A", since this would use %rax on x86_64 */
+__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+return (uint64_t)hi << 32 | lo;
+}
+
+static void sigsegv_cb(int sig)
+{
+ int tsc_val = 0;
+
+ printf("[ SIG_SEGV ]\n");
+ printf("prctl(PR_GET_TSC, &tsc_val); ");
+ fflush(stdout);
+
+ if ( prctl(PR_GET_TSC, &tsc_val) == -1)
+ perror("prctl");
+
+ printf("tsc_val == %s\n", tsc_names[tsc_val]);
+ printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
+ fflush(stdout);
+ if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
+ perror("prctl");
+
+ printf("rdtsc() == ");
+}
+
+int main(void)
+{
+ int tsc_val = 0;
+
+ signal(SIGSEGV, sigsegv_cb);
+
+ printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+ printf("prctl(PR_GET_TSC, &tsc_val); ");
+ fflush(stdout);
+
+ if ( prctl(PR_GET_TSC, &tsc_val) == -1)
+ perror("prctl");
+
+ printf("tsc_val == %s\n", tsc_names[tsc_val]);
+ printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+ printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n");
+ fflush(stdout);
+
+ if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1)
+ perror("prctl");
+
+ printf("rdtsc() == %llu\n", (unsigned long long)rdtsc());
+ printf("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)\n");
+ fflush(stdout);
+
+ if ( prctl(PR_SET_TSC, PR_TSC_SIGSEGV) == -1)
+ perror("prctl");
+
+ printf("rdtsc() == ");
+ fflush(stdout);
+ printf("%llu\n", (unsigned long long)rdtsc());
+ fflush(stdout);
+
+ exit(EXIT_SUCCESS);
+}
+
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
new file mode 100644
index 000000000..29bac5ef9
--- /dev/null
+++ b/tools/testing/selftests/proc/.gitignore
@@ -0,0 +1,14 @@
+/fd-001-lookup
+/fd-002-posix-eq
+/fd-003-kthread
+/proc-loadavg-001
+/proc-self-map-files-001
+/proc-self-map-files-002
+/proc-self-syscall
+/proc-self-wchan
+/proc-uptime-001
+/proc-uptime-002
+/read
+/self
+/setns-dcache
+/thread-self
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
new file mode 100644
index 000000000..434d033ee
--- /dev/null
+++ b/tools/testing/selftests/proc/Makefile
@@ -0,0 +1,20 @@
+CFLAGS += -Wall -O2 -Wno-unused-function
+CFLAGS += -D_GNU_SOURCE
+
+TEST_GEN_PROGS :=
+TEST_GEN_PROGS += fd-001-lookup
+TEST_GEN_PROGS += fd-002-posix-eq
+TEST_GEN_PROGS += fd-003-kthread
+TEST_GEN_PROGS += proc-loadavg-001
+TEST_GEN_PROGS += proc-self-map-files-001
+TEST_GEN_PROGS += proc-self-map-files-002
+TEST_GEN_PROGS += proc-self-syscall
+TEST_GEN_PROGS += proc-self-wchan
+TEST_GEN_PROGS += proc-uptime-001
+TEST_GEN_PROGS += proc-uptime-002
+TEST_GEN_PROGS += read
+TEST_GEN_PROGS += self
+TEST_GEN_PROGS += setns-dcache
+TEST_GEN_PROGS += thread-self
+
+include ../lib.mk
diff --git a/tools/testing/selftests/proc/config b/tools/testing/selftests/proc/config
new file mode 100644
index 000000000..68fbd2b35
--- /dev/null
+++ b/tools/testing/selftests/proc/config
@@ -0,0 +1 @@
+CONFIG_PROC_FS=y
diff --git a/tools/testing/selftests/proc/fd-001-lookup.c b/tools/testing/selftests/proc/fd-001-lookup.c
new file mode 100644
index 000000000..60d7948e7
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-001-lookup.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test /proc/*/fd lookup.
+
+#undef NDEBUG
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc.h"
+
+/* lstat(2) has more "coverage" in case non-symlink pops up somehow. */
+static void test_lookup_pass(const char *pathname)
+{
+ struct stat st;
+ ssize_t rv;
+
+ memset(&st, 0, sizeof(struct stat));
+ rv = lstat(pathname, &st);
+ assert(rv == 0);
+ assert(S_ISLNK(st.st_mode));
+}
+
+static void test_lookup_fail(const char *pathname)
+{
+ struct stat st;
+ ssize_t rv;
+
+ rv = lstat(pathname, &st);
+ assert(rv == -1 && errno == ENOENT);
+}
+
+static void test_lookup(unsigned int fd)
+{
+ char buf[64];
+ unsigned int c;
+ unsigned int u;
+ int i;
+
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd);
+ test_lookup_pass(buf);
+
+ /* leading junk */
+ for (c = 1; c <= 255; c++) {
+ if (c == '/')
+ continue;
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%c%u", c, fd);
+ test_lookup_fail(buf);
+ }
+
+ /* trailing junk */
+ for (c = 1; c <= 255; c++) {
+ if (c == '/')
+ continue;
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%u%c", fd, c);
+ test_lookup_fail(buf);
+ }
+
+ for (i = INT_MIN; i < INT_MIN + 1024; i++) {
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i);
+ test_lookup_fail(buf);
+ }
+ for (i = -1024; i < 0; i++) {
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", i);
+ test_lookup_fail(buf);
+ }
+ for (u = INT_MAX - 1024; u <= (unsigned int)INT_MAX + 1024; u++) {
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u);
+ test_lookup_fail(buf);
+ }
+ for (u = UINT_MAX - 1024; u != 0; u++) {
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%u", u);
+ test_lookup_fail(buf);
+ }
+
+
+}
+
+int main(void)
+{
+ struct dirent *de;
+ unsigned int fd, target_fd;
+
+ if (unshare(CLONE_FILES) == -1)
+ return 1;
+
+ /* Wipe fdtable. */
+ do {
+ DIR *d;
+
+ d = opendir("/proc/self/fd");
+ if (!d)
+ return 1;
+
+ de = xreaddir(d);
+ assert(de->d_type == DT_DIR);
+ assert(streq(de->d_name, "."));
+
+ de = xreaddir(d);
+ assert(de->d_type == DT_DIR);
+ assert(streq(de->d_name, ".."));
+next:
+ de = xreaddir(d);
+ if (de) {
+ unsigned long long fd_ull;
+ unsigned int fd;
+ char *end;
+
+ assert(de->d_type == DT_LNK);
+
+ fd_ull = xstrtoull(de->d_name, &end);
+ assert(*end == '\0');
+ assert(fd_ull == (unsigned int)fd_ull);
+
+ fd = fd_ull;
+ if (fd == dirfd(d))
+ goto next;
+ close(fd);
+ }
+
+ closedir(d);
+ } while (de);
+
+ /* Now fdtable is clean. */
+
+ fd = open("/", O_PATH|O_DIRECTORY);
+ assert(fd == 0);
+ test_lookup(fd);
+ close(fd);
+
+ /* Clean again! */
+
+ fd = open("/", O_PATH|O_DIRECTORY);
+ assert(fd == 0);
+ /* Default RLIMIT_NOFILE-1 */
+ target_fd = 1023;
+ while (target_fd > 0) {
+ if (dup2(fd, target_fd) == target_fd)
+ break;
+ target_fd /= 2;
+ }
+ assert(target_fd > 0);
+ close(fd);
+ test_lookup(target_fd);
+ close(target_fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/fd-002-posix-eq.c b/tools/testing/selftests/proc/fd-002-posix-eq.c
new file mode 100644
index 000000000..417322ca9
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-002-posix-eq.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that open(/proc/*/fd/*) opens the same file.
+#undef NDEBUG
+#include <assert.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+int main(void)
+{
+ int fd0, fd1, fd2;
+ struct stat st0, st1, st2;
+ char buf[64];
+ int rv;
+
+ fd0 = open("/", O_DIRECTORY|O_RDONLY);
+ assert(fd0 >= 0);
+
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%u", fd0);
+ fd1 = open(buf, O_RDONLY);
+ assert(fd1 >= 0);
+
+ snprintf(buf, sizeof(buf), "/proc/thread-self/fd/%u", fd0);
+ fd2 = open(buf, O_RDONLY);
+ assert(fd2 >= 0);
+
+ rv = fstat(fd0, &st0);
+ assert(rv == 0);
+ rv = fstat(fd1, &st1);
+ assert(rv == 0);
+ rv = fstat(fd2, &st2);
+ assert(rv == 0);
+
+ assert(st0.st_dev == st1.st_dev);
+ assert(st0.st_ino == st1.st_ino);
+
+ assert(st0.st_dev == st2.st_dev);
+ assert(st0.st_ino == st2.st_ino);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/fd-003-kthread.c b/tools/testing/selftests/proc/fd-003-kthread.c
new file mode 100644
index 000000000..dc591f97b
--- /dev/null
+++ b/tools/testing/selftests/proc/fd-003-kthread.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/$KERNEL_THREAD/fd/ is empty.
+
+#undef NDEBUG
+#include <sys/syscall.h>
+#include <assert.h>
+#include <dirent.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+#define PF_KHTREAD 0x00200000
+
+/*
+ * Test for kernel threadness atomically with openat().
+ *
+ * Return /proc/$PID/fd descriptor if process is kernel thread.
+ * Return -1 if a process is userspace process.
+ */
+static int kernel_thread_fd(unsigned int pid)
+{
+ unsigned int flags = 0;
+ char buf[4096];
+ int dir_fd, fd;
+ ssize_t rv;
+
+ snprintf(buf, sizeof(buf), "/proc/%u", pid);
+ dir_fd = open(buf, O_RDONLY|O_DIRECTORY);
+ if (dir_fd == -1)
+ return -1;
+
+ /*
+ * Believe it or not, struct task_struct::flags is directly exposed
+ * to userspace!
+ */
+ fd = openat(dir_fd, "stat", O_RDONLY);
+ if (fd == -1) {
+ close(dir_fd);
+ return -1;
+ }
+ rv = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (0 < rv && rv <= sizeof(buf)) {
+ unsigned long long flags_ull;
+ char *p, *end;
+ int i;
+
+ assert(buf[rv - 1] == '\n');
+ buf[rv - 1] = '\0';
+
+ /* Search backwards: ->comm can contain whitespace and ')'. */
+ for (i = 0; i < 43; i++) {
+ p = strrchr(buf, ' ');
+ assert(p);
+ *p = '\0';
+ }
+
+ p = strrchr(buf, ' ');
+ assert(p);
+
+ flags_ull = xstrtoull(p + 1, &end);
+ assert(*end == '\0');
+ assert(flags_ull == (unsigned int)flags_ull);
+
+ flags = flags_ull;
+ }
+
+ fd = -1;
+ if (flags & PF_KHTREAD) {
+ fd = openat(dir_fd, "fd", O_RDONLY|O_DIRECTORY);
+ }
+ close(dir_fd);
+ return fd;
+}
+
+static void test_readdir(int fd)
+{
+ DIR *d;
+ struct dirent *de;
+
+ d = fdopendir(fd);
+ assert(d);
+
+ de = xreaddir(d);
+ assert(streq(de->d_name, "."));
+ assert(de->d_type == DT_DIR);
+
+ de = xreaddir(d);
+ assert(streq(de->d_name, ".."));
+ assert(de->d_type == DT_DIR);
+
+ de = xreaddir(d);
+ assert(!de);
+}
+
+static inline int sys_statx(int dirfd, const char *pathname, int flags,
+ unsigned int mask, void *stx)
+{
+ return syscall(SYS_statx, dirfd, pathname, flags, mask, stx);
+}
+
+static void test_lookup_fail(int fd, const char *pathname)
+{
+ char stx[256] __attribute__((aligned(8)));
+ int rv;
+
+ rv = sys_statx(fd, pathname, AT_SYMLINK_NOFOLLOW, 0, (void *)stx);
+ assert(rv == -1 && errno == ENOENT);
+}
+
+static void test_lookup(int fd)
+{
+ char buf[64];
+ unsigned int u;
+ int i;
+
+ for (i = INT_MIN; i < INT_MIN + 1024; i++) {
+ snprintf(buf, sizeof(buf), "%d", i);
+ test_lookup_fail(fd, buf);
+ }
+ for (i = -1024; i < 1024; i++) {
+ snprintf(buf, sizeof(buf), "%d", i);
+ test_lookup_fail(fd, buf);
+ }
+ for (u = INT_MAX - 1024; u < (unsigned int)INT_MAX + 1024; u++) {
+ snprintf(buf, sizeof(buf), "%u", u);
+ test_lookup_fail(fd, buf);
+ }
+ for (u = UINT_MAX - 1024; u != 0; u++) {
+ snprintf(buf, sizeof(buf), "%u", u);
+ test_lookup_fail(fd, buf);
+ }
+}
+
+int main(void)
+{
+ unsigned int pid;
+ int fd;
+
+ /*
+ * In theory this will loop indefinitely if kernel threads are exiled
+ * from /proc.
+ *
+ * Start with kthreadd.
+ */
+ pid = 2;
+ while ((fd = kernel_thread_fd(pid)) == -1 && pid < 1024) {
+ pid++;
+ }
+ /* EACCES if run as non-root. */
+ if (pid >= 1024)
+ return 1;
+
+ test_readdir(fd);
+ test_lookup(fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c
new file mode 100644
index 000000000..8edaafc2b
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-loadavg-001.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test that /proc/loadavg correctly reports last pid in pid namespace. */
+#include <errno.h>
+#include <sched.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+int main(void)
+{
+ pid_t pid;
+ int wstatus;
+
+ if (unshare(CLONE_NEWPID) == -1) {
+ if (errno == ENOSYS || errno == EPERM)
+ return 2;
+ return 1;
+ }
+
+ pid = fork();
+ if (pid == -1)
+ return 1;
+ if (pid == 0) {
+ char buf[128], *p;
+ int fd;
+ ssize_t rv;
+
+ fd = open("/proc/loadavg" , O_RDONLY);
+ if (fd == -1)
+ return 1;
+ rv = read(fd, buf, sizeof(buf));
+ if (rv < 3)
+ return 1;
+ p = buf + rv;
+
+ /* pid 1 */
+ if (!(p[-3] == ' ' && p[-2] == '1' && p[-1] == '\n'))
+ return 1;
+
+ pid = fork();
+ if (pid == -1)
+ return 1;
+ if (pid == 0)
+ return 0;
+ if (waitpid(pid, NULL, 0) == -1)
+ return 1;
+
+ lseek(fd, 0, SEEK_SET);
+ rv = read(fd, buf, sizeof(buf));
+ if (rv < 3)
+ return 1;
+ p = buf + rv;
+
+ /* pid 2 */
+ if (!(p[-3] == ' ' && p[-2] == '2' && p[-1] == '\n'))
+ return 1;
+
+ return 0;
+ }
+
+ if (waitpid(pid, &wstatus, 0) == -1)
+ return 1;
+ if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0)
+ return 0;
+ return 1;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-001.c b/tools/testing/selftests/proc/proc-self-map-files-001.c
new file mode 100644
index 000000000..4209c6428
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-001.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1)
+ exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+ return;
+ exit(1);
+}
+
+int main(void)
+{
+ const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+ void *p;
+ int fd;
+ unsigned long a, b;
+
+ fd = open("/dev/zero", O_RDONLY);
+ if (fd == -1)
+ return 1;
+
+ p = mmap(NULL, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE, fd, 0);
+ if (p == MAP_FAILED)
+ return 1;
+
+ a = (unsigned long)p;
+ b = (unsigned long)p + PAGE_SIZE;
+
+ pass("/proc/self/map_files/%lx-%lx", a, b);
+ fail("/proc/self/map_files/ %lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx -%lx", a, b);
+ fail("/proc/self/map_files/%lx- %lx", a, b);
+ fail("/proc/self/map_files/%lx-%lx ", a, b);
+ fail("/proc/self/map_files/0%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-0%lx", a, b);
+ if (sizeof(long) == 4) {
+ fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+ } else if (sizeof(long) == 8) {
+ fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+ } else
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-map-files-002.c b/tools/testing/selftests/proc/proc-self-map-files-002.c
new file mode 100644
index 000000000..85744425b
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-map-files-002.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test readlink /proc/self/map_files/... with minimum address. */
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+
+static void pass(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1)
+ exit(1);
+}
+
+static void fail(const char *fmt, unsigned long a, unsigned long b)
+{
+ char name[64];
+ char buf[64];
+
+ snprintf(name, sizeof(name), fmt, a, b);
+ if (readlink(name, buf, sizeof(buf)) == -1 && errno == ENOENT)
+ return;
+ exit(1);
+}
+
+int main(void)
+{
+ const unsigned int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+#ifdef __arm__
+ unsigned long va = 2 * PAGE_SIZE;
+#else
+ unsigned long va = 0;
+#endif
+ void *p;
+ int fd;
+ unsigned long a, b;
+
+ fd = open("/dev/zero", O_RDONLY);
+ if (fd == -1)
+ return 1;
+
+ p = mmap((void *)va, PAGE_SIZE, PROT_NONE, MAP_PRIVATE|MAP_FILE|MAP_FIXED, fd, 0);
+ if (p == MAP_FAILED) {
+ if (errno == EPERM)
+ return 2;
+ return 1;
+ }
+
+ a = (unsigned long)p;
+ b = (unsigned long)p + PAGE_SIZE;
+
+ pass("/proc/self/map_files/%lx-%lx", a, b);
+ fail("/proc/self/map_files/ %lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx -%lx", a, b);
+ fail("/proc/self/map_files/%lx- %lx", a, b);
+ fail("/proc/self/map_files/%lx-%lx ", a, b);
+ fail("/proc/self/map_files/0%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-0%lx", a, b);
+ if (sizeof(long) == 4) {
+ fail("/proc/self/map_files/100000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-100000000%lx", a, b);
+ } else if (sizeof(long) == 8) {
+ fail("/proc/self/map_files/10000000000000000%lx-%lx", a, b);
+ fail("/proc/self/map_files/%lx-10000000000000000%lx", a, b);
+ } else
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c
new file mode 100644
index 000000000..7b9018fad
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-syscall.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+static inline ssize_t sys_read(int fd, void *buf, size_t len)
+{
+ return syscall(SYS_read, fd, buf, len);
+}
+
+int main(void)
+{
+ char buf1[64];
+ char buf2[64];
+ int fd;
+ ssize_t rv;
+
+ fd = open("/proc/self/syscall", O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT)
+ return 2;
+ return 1;
+ }
+
+ /* Do direct system call as libc can wrap anything. */
+ snprintf(buf1, sizeof(buf1), "%ld 0x%lx 0x%lx 0x%lx",
+ (long)SYS_read, (long)fd, (long)buf2, (long)sizeof(buf2));
+
+ memset(buf2, 0, sizeof(buf2));
+ rv = sys_read(fd, buf2, sizeof(buf2));
+ if (rv < 0)
+ return 1;
+ if (rv < strlen(buf1))
+ return 1;
+ if (strncmp(buf1, buf2, strlen(buf1)) != 0)
+ return 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-self-wchan.c b/tools/testing/selftests/proc/proc-self-wchan.c
new file mode 100644
index 000000000..a38b2fbaa
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-wchan.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+ char buf[64];
+ int fd;
+
+ fd = open("/proc/self/wchan", O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT)
+ return 2;
+ return 1;
+ }
+
+ buf[0] = '\0';
+ if (read(fd, buf, sizeof(buf)) != 1)
+ return 1;
+ if (buf[0] != '0')
+ return 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c
new file mode 100644
index 000000000..781f7a50f
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-001.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically.
+#undef NDEBUG
+#include <assert.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+int main(void)
+{
+ uint64_t start, u0, u1, i0, i1;
+ int fd;
+
+ fd = open("/proc/uptime", O_RDONLY);
+ assert(fd >= 0);
+
+ proc_uptime(fd, &u0, &i0);
+ start = u0;
+ do {
+ proc_uptime(fd, &u1, &i1);
+ assert(u1 >= u0);
+ assert(i1 >= i0);
+ u0 = u1;
+ i0 = i1;
+ } while (u1 - start < 100);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c
new file mode 100644
index 000000000..e7ceabed7
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime-002.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that values in /proc/uptime increment monotonically
+// while shifting across CPUs.
+#undef NDEBUG
+#include <assert.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include "proc-uptime.h"
+
+static inline int sys_sched_getaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+ return syscall(SYS_sched_getaffinity, pid, len, m);
+}
+
+static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned long *m)
+{
+ return syscall(SYS_sched_setaffinity, pid, len, m);
+}
+
+int main(void)
+{
+ unsigned int len;
+ unsigned long *m;
+ unsigned int cpu;
+ uint64_t u0, u1, i0, i1;
+ int fd;
+
+ /* find out "nr_cpu_ids" */
+ m = NULL;
+ len = 0;
+ do {
+ len += sizeof(unsigned long);
+ free(m);
+ m = malloc(len);
+ } while (sys_sched_getaffinity(0, len, m) == -EINVAL);
+
+ fd = open("/proc/uptime", O_RDONLY);
+ assert(fd >= 0);
+
+ proc_uptime(fd, &u0, &i0);
+ for (cpu = 0; cpu < len * 8; cpu++) {
+ memset(m, 0, len);
+ m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long)));
+
+ /* CPU might not exist, ignore error */
+ sys_sched_setaffinity(0, len, m);
+
+ proc_uptime(fd, &u1, &i1);
+ assert(u1 >= u0);
+ assert(i1 >= i0);
+ u0 = u1;
+ i0 = i1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h
new file mode 100644
index 000000000..dc6a42b1d
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-uptime.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
+{
+ uint64_t val1, val2;
+ char buf[64], *p;
+ ssize_t rv;
+
+ /* save "p < end" checks */
+ memset(buf, 0, sizeof(buf));
+ rv = pread(fd, buf, sizeof(buf), 0);
+ assert(0 <= rv && rv <= sizeof(buf));
+ buf[sizeof(buf) - 1] = '\0';
+
+ p = buf;
+
+ val1 = xstrtoull(p, &p);
+ assert(p[0] == '.');
+ assert('0' <= p[1] && p[1] <= '9');
+ assert('0' <= p[2] && p[2] <= '9');
+ assert(p[3] == ' ');
+
+ val2 = (p[1] - '0') * 10 + p[2] - '0';
+ *uptime = val1 * 100 + val2;
+
+ p += 4;
+
+ val1 = xstrtoull(p, &p);
+ assert(p[0] == '.');
+ assert('0' <= p[1] && p[1] <= '9');
+ assert('0' <= p[2] && p[2] <= '9');
+ assert(p[3] == '\n');
+
+ val2 = (p[1] - '0') * 10 + p[2] - '0';
+ *idle = val1 * 100 + val2;
+
+ assert(p + 4 == buf + rv);
+}
diff --git a/tools/testing/selftests/proc/proc.h b/tools/testing/selftests/proc/proc.h
new file mode 100644
index 000000000..b7d57ea40
--- /dev/null
+++ b/tools/testing/selftests/proc/proc.h
@@ -0,0 +1,51 @@
+#pragma once
+#undef NDEBUG
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline pid_t sys_getpid(void)
+{
+ return syscall(SYS_getpid);
+}
+
+static inline pid_t sys_gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+
+static inline bool streq(const char *s1, const char *s2)
+{
+ return strcmp(s1, s2) == 0;
+}
+
+static unsigned long long xstrtoull(const char *p, char **end)
+{
+ if (*p == '0') {
+ *end = (char *)p + 1;
+ return 0;
+ } else if ('1' <= *p && *p <= '9') {
+ unsigned long long val;
+
+ errno = 0;
+ val = strtoull(p, end, 10);
+ assert(errno == 0);
+ return val;
+ } else
+ assert(0);
+}
+
+static struct dirent *xreaddir(DIR *d)
+{
+ struct dirent *de;
+
+ errno = 0;
+ de = readdir(d);
+ assert(de || errno == 0);
+ return de;
+}
diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c
new file mode 100644
index 000000000..563e752e6
--- /dev/null
+++ b/tools/testing/selftests/proc/read.c
@@ -0,0 +1,132 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test
+// 1) read of every file in /proc
+// 2) readlink of every symlink in /proc
+// 3) recursively (1) + (2) for every directory in /proc
+// 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs
+// 5) write to /proc/sysrq-trigger
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+static void f_reg(DIR *d, const char *filename)
+{
+ char buf[4096];
+ int fd;
+ ssize_t rv;
+
+ /* read from /proc/kmsg can block */
+ fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK);
+ if (fd == -1)
+ return;
+ rv = read(fd, buf, sizeof(buf));
+ assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+ close(fd);
+}
+
+static void f_reg_write(DIR *d, const char *filename, const char *buf, size_t len)
+{
+ int fd;
+ ssize_t rv;
+
+ fd = openat(dirfd(d), filename, O_WRONLY);
+ if (fd == -1)
+ return;
+ rv = write(fd, buf, len);
+ assert((0 <= rv && rv <= len) || rv == -1);
+ close(fd);
+}
+
+static void f_lnk(DIR *d, const char *filename)
+{
+ char buf[4096];
+ ssize_t rv;
+
+ rv = readlinkat(dirfd(d), filename, buf, sizeof(buf));
+ assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
+}
+
+static void f(DIR *d, unsigned int level)
+{
+ struct dirent *de;
+
+ de = xreaddir(d);
+ assert(de->d_type == DT_DIR);
+ assert(streq(de->d_name, "."));
+
+ de = xreaddir(d);
+ assert(de->d_type == DT_DIR);
+ assert(streq(de->d_name, ".."));
+
+ while ((de = xreaddir(d))) {
+ assert(!streq(de->d_name, "."));
+ assert(!streq(de->d_name, ".."));
+
+ switch (de->d_type) {
+ DIR *dd;
+ int fd;
+
+ case DT_REG:
+ if (level == 0 && streq(de->d_name, "sysrq-trigger")) {
+ f_reg_write(d, de->d_name, "h", 1);
+ } else if (level == 1 && streq(de->d_name, "clear_refs")) {
+ f_reg_write(d, de->d_name, "1", 1);
+ } else if (level == 3 && streq(de->d_name, "clear_refs")) {
+ f_reg_write(d, de->d_name, "1", 1);
+ } else {
+ f_reg(d, de->d_name);
+ }
+ break;
+ case DT_DIR:
+ fd = openat(dirfd(d), de->d_name, O_DIRECTORY|O_RDONLY);
+ if (fd == -1)
+ continue;
+ dd = fdopendir(fd);
+ if (!dd)
+ continue;
+ f(dd, level + 1);
+ closedir(dd);
+ break;
+ case DT_LNK:
+ f_lnk(d, de->d_name);
+ break;
+ default:
+ assert(0);
+ }
+ }
+}
+
+int main(void)
+{
+ DIR *d;
+
+ d = opendir("/proc");
+ if (!d)
+ return 2;
+ f(d, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/self.c b/tools/testing/selftests/proc/self.c
new file mode 100644
index 000000000..21c15a1ff
--- /dev/null
+++ b/tools/testing/selftests/proc/self.c
@@ -0,0 +1,39 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/self gives correct TGID.
+#undef NDEBUG
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include "proc.h"
+
+int main(void)
+{
+ char buf1[64], buf2[64];
+ pid_t pid;
+ ssize_t rv;
+
+ pid = sys_getpid();
+ snprintf(buf1, sizeof(buf1), "%u", pid);
+
+ rv = readlink("/proc/self", buf2, sizeof(buf2));
+ assert(rv == strlen(buf1));
+ buf2[rv] = '\0';
+ assert(streq(buf1, buf2));
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/setns-dcache.c b/tools/testing/selftests/proc/setns-dcache.c
new file mode 100644
index 000000000..60ab197a7
--- /dev/null
+++ b/tools/testing/selftests/proc/setns-dcache.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2019 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Test that setns(CLONE_NEWNET) points to new /proc/net content even
+ * if old one is in dcache.
+ *
+ * FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+
+static pid_t pid = -1;
+
+static void f(void)
+{
+ if (pid > 0) {
+ kill(pid, SIGTERM);
+ }
+}
+
+int main(void)
+{
+ int fd[2];
+ char _ = 0;
+ int nsfd;
+
+ atexit(f);
+
+ /* Check for priviledges and syscall availability straight away. */
+ if (unshare(CLONE_NEWNET) == -1) {
+ if (errno == ENOSYS || errno == EPERM) {
+ return 4;
+ }
+ return 1;
+ }
+ /* Distinguisher between two otherwise empty net namespaces. */
+ if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) {
+ return 1;
+ }
+
+ if (pipe(fd) == -1) {
+ return 1;
+ }
+
+ pid = fork();
+ if (pid == -1) {
+ return 1;
+ }
+
+ if (pid == 0) {
+ if (unshare(CLONE_NEWNET) == -1) {
+ return 1;
+ }
+
+ if (write(fd[1], &_, 1) != 1) {
+ return 1;
+ }
+
+ pause();
+
+ return 0;
+ }
+
+ if (read(fd[0], &_, 1) != 1) {
+ return 1;
+ }
+
+ {
+ char buf[64];
+ snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid);
+ nsfd = open(buf, O_RDONLY);
+ if (nsfd == -1) {
+ return 1;
+ }
+ }
+
+ /* Reliably pin dentry into dcache. */
+ (void)open("/proc/net/unix", O_RDONLY);
+
+ if (setns(nsfd, CLONE_NEWNET) == -1) {
+ return 1;
+ }
+
+ kill(pid, SIGTERM);
+ pid = 0;
+
+ {
+ char buf[4096];
+ ssize_t rv;
+ int fd;
+
+ fd = open("/proc/net/unix", O_RDONLY);
+ if (fd == -1) {
+ return 1;
+ }
+
+#define S "Num RefCount Protocol Flags Type St Inode Path\n"
+ rv = read(fd, buf, sizeof(buf));
+
+ assert(rv == strlen(S));
+ assert(memcmp(buf, S, strlen(S)) == 0);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/thread-self.c b/tools/testing/selftests/proc/thread-self.c
new file mode 100644
index 000000000..4b23b39b7
--- /dev/null
+++ b/tools/testing/selftests/proc/thread-self.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright © 2018 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/thread-self gives correct TGID/PID.
+#undef NDEBUG
+#include <assert.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#include "proc.h"
+
+int f(void *arg)
+{
+ char buf1[64], buf2[64];
+ pid_t pid, tid;
+ ssize_t rv;
+
+ pid = sys_getpid();
+ tid = sys_gettid();
+ snprintf(buf1, sizeof(buf1), "%u/task/%u", pid, tid);
+
+ rv = readlink("/proc/thread-self", buf2, sizeof(buf2));
+ assert(rv == strlen(buf1));
+ buf2[rv] = '\0';
+ assert(streq(buf1, buf2));
+
+ if (arg)
+ exit(0);
+ return 0;
+}
+
+int main(void)
+{
+ const int PAGE_SIZE = sysconf(_SC_PAGESIZE);
+ pid_t pid;
+ void *stack;
+
+ /* main thread */
+ f((void *)0);
+
+ stack = mmap(NULL, 2 * PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ assert(stack != MAP_FAILED);
+ /* side thread */
+ pid = clone(f, stack + PAGE_SIZE, CLONE_THREAD|CLONE_SIGHAND|CLONE_VM, (void *)1);
+ assert(pid > 0);
+ pause();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/pstore/.gitignore b/tools/testing/selftests/pstore/.gitignore
new file mode 100644
index 000000000..5a4a26e54
--- /dev/null
+++ b/tools/testing/selftests/pstore/.gitignore
@@ -0,0 +1,2 @@
+logs
+*uuid
diff --git a/tools/testing/selftests/pstore/Makefile b/tools/testing/selftests/pstore/Makefile
new file mode 100644
index 000000000..5ef57855a
--- /dev/null
+++ b/tools/testing/selftests/pstore/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for pstore selftests.
+# Expects pstore backend is registered.
+
+all:
+
+TEST_PROGS := pstore_tests pstore_post_reboot_tests
+TEST_FILES := common_tests pstore_crash_test
+EXTRA_CLEAN := logs/* *uuid
+
+include ../lib.mk
+
+run_crash:
+ @sh pstore_crash_test || { echo "pstore_crash_test: [FAIL]"; exit 1; }
diff --git a/tools/testing/selftests/pstore/common_tests b/tools/testing/selftests/pstore/common_tests
new file mode 100755
index 000000000..3ea64d7cf
--- /dev/null
+++ b/tools/testing/selftests/pstore/common_tests
@@ -0,0 +1,83 @@
+#!/bin/sh
+
+# common_tests - Shell script commonly used by pstore test scripts
+#
+# Copyright (C) Hitachi Ltd., 2015
+# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+# Utilities
+errexit() { # message
+ echo "Error: $1" 1>&2
+ exit 1
+}
+
+absdir() { # file_path
+ (cd `dirname $1`; pwd)
+}
+
+show_result() { # result_value
+ if [ $1 -eq 0 ]; then
+ prlog "ok"
+ else
+ prlog "FAIL"
+ rc=1
+ fi
+}
+
+check_files_exist() { # type of pstorefs file
+ if [ -e ${1}-${backend}-0 ]; then
+ prlog "ok"
+ for f in `ls ${1}-${backend}-*`; do
+ prlog -e "\t${f}"
+ done
+ else
+ prlog "FAIL"
+ rc=1
+ fi
+}
+
+operate_files() { # tested value, files, operation
+ if [ $1 -eq 0 ]; then
+ prlog
+ for f in $2; do
+ prlog -ne "\t${f} ... "
+ # execute operation
+ $3 $f
+ show_result $?
+ done
+ else
+ prlog " ... FAIL"
+ rc=1
+ fi
+}
+
+# Parameters
+TEST_STRING_PATTERN="Testing pstore: uuid="
+UUID=`cat /proc/sys/kernel/random/uuid`
+TOP_DIR=`absdir $0`
+LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`_${UUID}/
+REBOOT_FLAG=$TOP_DIR/reboot_flag
+
+# Preparing logs
+LOG_FILE=$LOG_DIR/`basename $0`.log
+mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
+date > $LOG_FILE
+prlog() { # messages
+ /bin/echo "$@" | tee -a $LOG_FILE
+}
+
+# Starting tests
+rc=0
+prlog "=== Pstore unit tests (`basename $0`) ==="
+prlog "UUID="$UUID
+
+prlog -n "Checking pstore backend is registered ... "
+backend=`cat /sys/module/pstore/parameters/backend`
+show_result $?
+prlog -e "\tbackend=${backend}"
+prlog -e "\tcmdline=`cat /proc/cmdline`"
+if [ $rc -ne 0 ]; then
+ exit 1
+fi
diff --git a/tools/testing/selftests/pstore/config b/tools/testing/selftests/pstore/config
new file mode 100644
index 000000000..d148f9f89
--- /dev/null
+++ b/tools/testing/selftests/pstore/config
@@ -0,0 +1,5 @@
+CONFIG_MISC_FILESYSTEMS=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_PMSG=y
+CONFIG_PSTORE_CONSOLE=y
+CONFIG_PSTORE_RAM=m
diff --git a/tools/testing/selftests/pstore/pstore_crash_test b/tools/testing/selftests/pstore/pstore_crash_test
new file mode 100755
index 000000000..1a4afe5c1
--- /dev/null
+++ b/tools/testing/selftests/pstore/pstore_crash_test
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# pstore_crash_test - Pstore test shell script which causes crash and reboot
+#
+# Copyright (C) Hitachi Ltd., 2015
+# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+# exit if pstore backend is not registered
+. ./common_tests
+
+prlog "Causing kernel crash ..."
+
+# enable all functions triggered by sysrq
+echo 1 > /proc/sys/kernel/sysrq
+# setting to reboot in 3 seconds after panic
+echo 3 > /proc/sys/kernel/panic
+
+# save uuid file by different name because next test execution will replace it.
+mv $TOP_DIR/uuid $TOP_DIR/prev_uuid
+
+# create a file as reboot flag
+touch $REBOOT_FLAG
+sync
+
+# cause crash
+# Note: If you use kdump and want to see kmesg-* files after reboot, you should
+# specify 'crash_kexec_post_notifiers' in 1st kernel's cmdline.
+echo c > /proc/sysrq-trigger
diff --git a/tools/testing/selftests/pstore/pstore_post_reboot_tests b/tools/testing/selftests/pstore/pstore_post_reboot_tests
new file mode 100755
index 000000000..22f8df1ad
--- /dev/null
+++ b/tools/testing/selftests/pstore/pstore_post_reboot_tests
@@ -0,0 +1,80 @@
+#!/bin/sh
+
+# pstore_post_reboot_tests - Check pstore's behavior after crash/reboot
+#
+# Copyright (C) Hitachi Ltd., 2015
+# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+. ./common_tests
+
+if [ -e $REBOOT_FLAG ]; then
+ rm $REBOOT_FLAG
+else
+ prlog "pstore_crash_test has not been executed yet. we skip further tests."
+ exit $ksft_skip
+fi
+
+prlog -n "Mounting pstore filesystem ... "
+mount_info=`grep pstore /proc/mounts`
+if [ $? -eq 0 ]; then
+ mount_point=`echo ${mount_info} | cut -d' ' -f2 | head -n1`
+ prlog "ok"
+else
+ mount none /sys/fs/pstore -t pstore
+ if [ $? -eq 0 ]; then
+ mount_point=`grep pstore /proc/mounts | cut -d' ' -f2 | head -n1`
+ prlog "ok"
+ else
+ prlog "FAIL"
+ exit 1
+ fi
+fi
+
+cd ${mount_point}
+
+prlog -n "Checking dmesg files exist in pstore filesystem ... "
+check_files_exist dmesg
+
+prlog -n "Checking console files exist in pstore filesystem ... "
+check_files_exist console
+
+prlog -n "Checking pmsg files exist in pstore filesystem ... "
+check_files_exist pmsg
+
+prlog -n "Checking dmesg files contain oops end marker"
+grep_end_trace() {
+ grep -q "\---\[ end trace" $1
+}
+files=`ls dmesg-${backend}-*`
+operate_files $? "$files" grep_end_trace
+
+prlog -n "Checking console file contains oops end marker ... "
+grep -q "\---\[ end trace" console-${backend}-0
+show_result $?
+
+prlog -n "Checking pmsg file properly keeps the content written before crash ... "
+prev_uuid=`cat $TOP_DIR/prev_uuid`
+if [ $? -eq 0 ]; then
+ nr_matched=`grep -c "$TEST_STRING_PATTERN" pmsg-${backend}-0`
+ if [ $nr_matched -eq 1 ]; then
+ grep -q "$TEST_STRING_PATTERN"$prev_uuid pmsg-${backend}-0
+ show_result $?
+ else
+ prlog "FAIL"
+ rc=1
+ fi
+else
+ prlog "FAIL"
+ rc=1
+fi
+
+prlog -n "Removing all files in pstore filesystem "
+files=`ls *-${backend}-*`
+operate_files $? "$files" rm
+
+exit $rc
diff --git a/tools/testing/selftests/pstore/pstore_tests b/tools/testing/selftests/pstore/pstore_tests
new file mode 100755
index 000000000..f25d2a349
--- /dev/null
+++ b/tools/testing/selftests/pstore/pstore_tests
@@ -0,0 +1,30 @@
+#!/bin/sh
+
+# pstore_tests - Check pstore's behavior before crash/reboot
+#
+# Copyright (C) Hitachi Ltd., 2015
+# Written by Hiraku Toyooka <hiraku.toyooka.gu@hitachi.com>
+#
+# Released under the terms of the GPL v2.
+
+. ./common_tests
+
+prlog -n "Checking pstore console is registered ... "
+dmesg | grep -q "console \[pstore"
+show_result $?
+
+prlog -n "Checking /dev/pmsg0 exists ... "
+test -e /dev/pmsg0
+show_result $?
+
+prlog -n "Writing unique string to /dev/pmsg0 ... "
+if [ -e "/dev/pmsg0" ]; then
+ echo "${TEST_STRING_PATTERN}""$UUID" > /dev/pmsg0
+ show_result $?
+ echo "$UUID" > $TOP_DIR/uuid
+else
+ prlog "FAIL"
+ rc=1
+fi
+
+exit $rc
diff --git a/tools/testing/selftests/ptp/.gitignore b/tools/testing/selftests/ptp/.gitignore
new file mode 100644
index 000000000..f562e49d6
--- /dev/null
+++ b/tools/testing/selftests/ptp/.gitignore
@@ -0,0 +1 @@
+testptp
diff --git a/tools/testing/selftests/ptp/Makefile b/tools/testing/selftests/ptp/Makefile
new file mode 100644
index 000000000..ef06de089
--- /dev/null
+++ b/tools/testing/selftests/ptp/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -I../../../../usr/include/
+TEST_PROGS := testptp
+LDLIBS += -lrt
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+ rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
new file mode 100644
index 000000000..a5d8f0ab0
--- /dev/null
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -0,0 +1,521 @@
+/*
+ * PTP 1588 clock support - User space test program
+ *
+ * Copyright (C) 2010 OMICRON electronics GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
+#include <errno.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <math.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/ptp_clock.h>
+
+#define DEVICE "/dev/ptp0"
+
+#ifndef ADJ_SETOFFSET
+#define ADJ_SETOFFSET 0x0100
+#endif
+
+#ifndef CLOCK_INVALID
+#define CLOCK_INVALID -1
+#endif
+
+/* clock_adjtime is not available in GLIBC < 2.14 */
+#if !__GLIBC_PREREQ(2, 14)
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+ return syscall(__NR_clock_adjtime, id, tx);
+}
+#endif
+
+static clockid_t get_clockid(int fd)
+{
+#define CLOCKFD 3
+ return (((unsigned int) ~fd) << 3) | CLOCKFD;
+}
+
+static void handle_alarm(int s)
+{
+ printf("received signal %d\n", s);
+}
+
+static int install_handler(int signum, void (*handler)(int))
+{
+ struct sigaction action;
+ sigset_t mask;
+
+ /* Unblock the signal. */
+ sigemptyset(&mask);
+ sigaddset(&mask, signum);
+ sigprocmask(SIG_UNBLOCK, &mask, NULL);
+
+ /* Install the signal handler. */
+ action.sa_handler = handler;
+ action.sa_flags = 0;
+ sigemptyset(&action.sa_mask);
+ sigaction(signum, &action, NULL);
+
+ return 0;
+}
+
+static long ppb_to_scaled_ppm(int ppb)
+{
+ /*
+ * The 'freq' field in the 'struct timex' is in parts per
+ * million, but with a 16 bit binary fractional field.
+ * Instead of calculating either one of
+ *
+ * scaled_ppm = (ppb / 1000) << 16 [1]
+ * scaled_ppm = (ppb << 16) / 1000 [2]
+ *
+ * we simply use double precision math, in order to avoid the
+ * truncation in [1] and the possible overflow in [2].
+ */
+ return (long) (ppb * 65.536);
+}
+
+static int64_t pctns(struct ptp_clock_time *t)
+{
+ return t->sec * 1000000000LL + t->nsec;
+}
+
+static void usage(char *progname)
+{
+ fprintf(stderr,
+ "usage: %s [options]\n"
+ " -a val request a one-shot alarm after 'val' seconds\n"
+ " -A val request a periodic alarm every 'val' seconds\n"
+ " -c query the ptp clock's capabilities\n"
+ " -d name device to open\n"
+ " -e val read 'val' external time stamp events\n"
+ " -f val adjust the ptp clock frequency by 'val' ppb\n"
+ " -g get the ptp clock time\n"
+ " -h prints this message\n"
+ " -i val index for event/trigger\n"
+ " -k val measure the time offset between system and phc clock\n"
+ " for 'val' times (Maximum 25)\n"
+ " -l list the current pin configuration\n"
+ " -L pin,val configure pin index 'pin' with function 'val'\n"
+ " the channel index is taken from the '-i' option\n"
+ " 'val' specifies the auxiliary function:\n"
+ " 0 - none\n"
+ " 1 - external time stamp\n"
+ " 2 - periodic output\n"
+ " -p val enable output with a period of 'val' nanoseconds\n"
+ " -P val enable or disable (val=1|0) the system clock PPS\n"
+ " -s set the ptp clock time from the system time\n"
+ " -S set the system time from the ptp clock time\n"
+ " -t val shift the ptp clock time by 'val' seconds\n"
+ " -T val set the ptp clock time to 'val' seconds\n",
+ progname);
+}
+
+int main(int argc, char *argv[])
+{
+ struct ptp_clock_caps caps;
+ struct ptp_extts_event event;
+ struct ptp_extts_request extts_request;
+ struct ptp_perout_request perout_request;
+ struct ptp_pin_desc desc;
+ struct timespec ts;
+ struct timex tx;
+
+ static timer_t timerid;
+ struct itimerspec timeout;
+ struct sigevent sigevent;
+
+ struct ptp_clock_time *pct;
+ struct ptp_sys_offset *sysoff;
+
+
+ char *progname;
+ unsigned int i;
+ int c, cnt, fd;
+
+ char *device = DEVICE;
+ clockid_t clkid;
+ int adjfreq = 0x7fffffff;
+ int adjtime = 0;
+ int capabilities = 0;
+ int extts = 0;
+ int gettime = 0;
+ int index = 0;
+ int list_pins = 0;
+ int oneshot = 0;
+ int pct_offset = 0;
+ int n_samples = 0;
+ int periodic = 0;
+ int perout = -1;
+ int pin_index = -1, pin_func;
+ int pps = -1;
+ int seconds = 0;
+ int settime = 0;
+
+ int64_t t1, t2, tp;
+ int64_t interval, offset;
+
+ progname = strrchr(argv[0], '/');
+ progname = progname ? 1+progname : argv[0];
+ while (EOF != (c = getopt(argc, argv, "a:A:cd:e:f:ghi:k:lL:p:P:sSt:T:v"))) {
+ switch (c) {
+ case 'a':
+ oneshot = atoi(optarg);
+ break;
+ case 'A':
+ periodic = atoi(optarg);
+ break;
+ case 'c':
+ capabilities = 1;
+ break;
+ case 'd':
+ device = optarg;
+ break;
+ case 'e':
+ extts = atoi(optarg);
+ break;
+ case 'f':
+ adjfreq = atoi(optarg);
+ break;
+ case 'g':
+ gettime = 1;
+ break;
+ case 'i':
+ index = atoi(optarg);
+ break;
+ case 'k':
+ pct_offset = 1;
+ n_samples = atoi(optarg);
+ break;
+ case 'l':
+ list_pins = 1;
+ break;
+ case 'L':
+ cnt = sscanf(optarg, "%d,%d", &pin_index, &pin_func);
+ if (cnt != 2) {
+ usage(progname);
+ return -1;
+ }
+ break;
+ case 'p':
+ perout = atoi(optarg);
+ break;
+ case 'P':
+ pps = atoi(optarg);
+ break;
+ case 's':
+ settime = 1;
+ break;
+ case 'S':
+ settime = 2;
+ break;
+ case 't':
+ adjtime = atoi(optarg);
+ break;
+ case 'T':
+ settime = 3;
+ seconds = atoi(optarg);
+ break;
+ case 'h':
+ usage(progname);
+ return 0;
+ case '?':
+ default:
+ usage(progname);
+ return -1;
+ }
+ }
+
+ fd = open(device, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "opening %s: %s\n", device, strerror(errno));
+ return -1;
+ }
+
+ clkid = get_clockid(fd);
+ if (CLOCK_INVALID == clkid) {
+ fprintf(stderr, "failed to read clock id\n");
+ return -1;
+ }
+
+ if (capabilities) {
+ if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
+ perror("PTP_CLOCK_GETCAPS");
+ } else {
+ printf("capabilities:\n"
+ " %d maximum frequency adjustment (ppb)\n"
+ " %d programmable alarms\n"
+ " %d external time stamp channels\n"
+ " %d programmable periodic signals\n"
+ " %d pulse per second\n"
+ " %d programmable pins\n"
+ " %d cross timestamping\n",
+ caps.max_adj,
+ caps.n_alarm,
+ caps.n_ext_ts,
+ caps.n_per_out,
+ caps.pps,
+ caps.n_pins,
+ caps.cross_timestamping);
+ }
+ }
+
+ if (0x7fffffff != adjfreq) {
+ memset(&tx, 0, sizeof(tx));
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = ppb_to_scaled_ppm(adjfreq);
+ if (clock_adjtime(clkid, &tx)) {
+ perror("clock_adjtime");
+ } else {
+ puts("frequency adjustment okay");
+ }
+ }
+
+ if (adjtime) {
+ memset(&tx, 0, sizeof(tx));
+ tx.modes = ADJ_SETOFFSET;
+ tx.time.tv_sec = adjtime;
+ tx.time.tv_usec = 0;
+ if (clock_adjtime(clkid, &tx) < 0) {
+ perror("clock_adjtime");
+ } else {
+ puts("time shift okay");
+ }
+ }
+
+ if (gettime) {
+ if (clock_gettime(clkid, &ts)) {
+ perror("clock_gettime");
+ } else {
+ printf("clock time: %ld.%09ld or %s",
+ ts.tv_sec, ts.tv_nsec, ctime(&ts.tv_sec));
+ }
+ }
+
+ if (settime == 1) {
+ clock_gettime(CLOCK_REALTIME, &ts);
+ if (clock_settime(clkid, &ts)) {
+ perror("clock_settime");
+ } else {
+ puts("set time okay");
+ }
+ }
+
+ if (settime == 2) {
+ clock_gettime(clkid, &ts);
+ if (clock_settime(CLOCK_REALTIME, &ts)) {
+ perror("clock_settime");
+ } else {
+ puts("set time okay");
+ }
+ }
+
+ if (settime == 3) {
+ ts.tv_sec = seconds;
+ ts.tv_nsec = 0;
+ if (clock_settime(clkid, &ts)) {
+ perror("clock_settime");
+ } else {
+ puts("set time okay");
+ }
+ }
+
+ if (extts) {
+ memset(&extts_request, 0, sizeof(extts_request));
+ extts_request.index = index;
+ extts_request.flags = PTP_ENABLE_FEATURE;
+ if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+ perror("PTP_EXTTS_REQUEST");
+ extts = 0;
+ } else {
+ puts("external time stamp request okay");
+ }
+ for (; extts; extts--) {
+ cnt = read(fd, &event, sizeof(event));
+ if (cnt != sizeof(event)) {
+ perror("read");
+ break;
+ }
+ printf("event index %u at %lld.%09u\n", event.index,
+ event.t.sec, event.t.nsec);
+ fflush(stdout);
+ }
+ /* Disable the feature again. */
+ extts_request.flags = 0;
+ if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) {
+ perror("PTP_EXTTS_REQUEST");
+ }
+ }
+
+ if (list_pins) {
+ int n_pins = 0;
+ if (ioctl(fd, PTP_CLOCK_GETCAPS, &caps)) {
+ perror("PTP_CLOCK_GETCAPS");
+ } else {
+ n_pins = caps.n_pins;
+ }
+ for (i = 0; i < n_pins; i++) {
+ desc.index = i;
+ if (ioctl(fd, PTP_PIN_GETFUNC, &desc)) {
+ perror("PTP_PIN_GETFUNC");
+ break;
+ }
+ printf("name %s index %u func %u chan %u\n",
+ desc.name, desc.index, desc.func, desc.chan);
+ }
+ }
+
+ if (oneshot) {
+ install_handler(SIGALRM, handle_alarm);
+ /* Create a timer. */
+ sigevent.sigev_notify = SIGEV_SIGNAL;
+ sigevent.sigev_signo = SIGALRM;
+ if (timer_create(clkid, &sigevent, &timerid)) {
+ perror("timer_create");
+ return -1;
+ }
+ /* Start the timer. */
+ memset(&timeout, 0, sizeof(timeout));
+ timeout.it_value.tv_sec = oneshot;
+ if (timer_settime(timerid, 0, &timeout, NULL)) {
+ perror("timer_settime");
+ return -1;
+ }
+ pause();
+ timer_delete(timerid);
+ }
+
+ if (periodic) {
+ install_handler(SIGALRM, handle_alarm);
+ /* Create a timer. */
+ sigevent.sigev_notify = SIGEV_SIGNAL;
+ sigevent.sigev_signo = SIGALRM;
+ if (timer_create(clkid, &sigevent, &timerid)) {
+ perror("timer_create");
+ return -1;
+ }
+ /* Start the timer. */
+ memset(&timeout, 0, sizeof(timeout));
+ timeout.it_interval.tv_sec = periodic;
+ timeout.it_value.tv_sec = periodic;
+ if (timer_settime(timerid, 0, &timeout, NULL)) {
+ perror("timer_settime");
+ return -1;
+ }
+ while (1) {
+ pause();
+ }
+ timer_delete(timerid);
+ }
+
+ if (perout >= 0) {
+ if (clock_gettime(clkid, &ts)) {
+ perror("clock_gettime");
+ return -1;
+ }
+ memset(&perout_request, 0, sizeof(perout_request));
+ perout_request.index = index;
+ perout_request.start.sec = ts.tv_sec + 2;
+ perout_request.start.nsec = 0;
+ perout_request.period.sec = 0;
+ perout_request.period.nsec = perout;
+ if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
+ perror("PTP_PEROUT_REQUEST");
+ } else {
+ puts("periodic output request okay");
+ }
+ }
+
+ if (pin_index >= 0) {
+ memset(&desc, 0, sizeof(desc));
+ desc.index = pin_index;
+ desc.func = pin_func;
+ desc.chan = index;
+ if (ioctl(fd, PTP_PIN_SETFUNC, &desc)) {
+ perror("PTP_PIN_SETFUNC");
+ } else {
+ puts("set pin function okay");
+ }
+ }
+
+ if (pps != -1) {
+ int enable = pps ? 1 : 0;
+ if (ioctl(fd, PTP_ENABLE_PPS, enable)) {
+ perror("PTP_ENABLE_PPS");
+ } else {
+ puts("pps for system time request okay");
+ }
+ }
+
+ if (pct_offset) {
+ if (n_samples <= 0 || n_samples > 25) {
+ puts("n_samples should be between 1 and 25");
+ usage(progname);
+ return -1;
+ }
+
+ sysoff = calloc(1, sizeof(*sysoff));
+ if (!sysoff) {
+ perror("calloc");
+ return -1;
+ }
+ sysoff->n_samples = n_samples;
+
+ if (ioctl(fd, PTP_SYS_OFFSET, sysoff))
+ perror("PTP_SYS_OFFSET");
+ else
+ puts("system and phc clock time offset request okay");
+
+ pct = &sysoff->ts[0];
+ for (i = 0; i < sysoff->n_samples; i++) {
+ t1 = pctns(pct+2*i);
+ tp = pctns(pct+2*i+1);
+ t2 = pctns(pct+2*i+2);
+ interval = t2 - t1;
+ offset = (t2 + t1) / 2 - tp;
+
+ printf("system time: %lld.%u\n",
+ (pct+2*i)->sec, (pct+2*i)->nsec);
+ printf("phc time: %lld.%u\n",
+ (pct+2*i+1)->sec, (pct+2*i+1)->nsec);
+ printf("system time: %lld.%u\n",
+ (pct+2*i+2)->sec, (pct+2*i+2)->nsec);
+ printf("system/phc clock time offset is %" PRId64 " ns\n"
+ "system clock time delay is %" PRId64 " ns\n",
+ offset, interval);
+ }
+
+ free(sysoff);
+ }
+
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/ptp/testptp.mk b/tools/testing/selftests/ptp/testptp.mk
new file mode 100644
index 000000000..4ef2d9755
--- /dev/null
+++ b/tools/testing/selftests/ptp/testptp.mk
@@ -0,0 +1,33 @@
+# PTP 1588 clock support - User space test program
+#
+# Copyright (C) 2010 OMICRON electronics GmbH
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+CC = $(CROSS_COMPILE)gcc
+INC = -I$(KBUILD_OUTPUT)/usr/include
+CFLAGS = -Wall $(INC)
+LDLIBS = -lrt
+PROGS = testptp
+
+all: $(PROGS)
+
+testptp: testptp.o
+
+clean:
+ rm -f testptp.o
+
+distclean: clean
+ rm -f $(PROGS)
diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore
new file mode 100644
index 000000000..b3e59d41f
--- /dev/null
+++ b/tools/testing/selftests/ptrace/.gitignore
@@ -0,0 +1 @@
+peeksiginfo
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
new file mode 100644
index 000000000..8a2bc5562
--- /dev/null
+++ b/tools/testing/selftests/ptrace/Makefile
@@ -0,0 +1,5 @@
+CFLAGS += -iquote../../../../include/uapi -Wall
+
+TEST_GEN_PROGS := peeksiginfo
+
+include ../lib.mk
diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c
new file mode 100644
index 000000000..54900657e
--- /dev/null
+++ b/tools/testing/selftests/ptrace/peeksiginfo.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <sys/mman.h>
+
+#include "linux/ptrace.h"
+
+static int sys_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *uinfo)
+{
+ return syscall(SYS_rt_sigqueueinfo, tgid, sig, uinfo);
+}
+
+static int sys_rt_tgsigqueueinfo(pid_t tgid, pid_t tid,
+ int sig, siginfo_t *uinfo)
+{
+ return syscall(SYS_rt_tgsigqueueinfo, tgid, tid, sig, uinfo);
+}
+
+static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
+{
+ return syscall(SYS_ptrace, request, pid, addr, data);
+}
+
+#define SIGNR 10
+#define TEST_SICODE_PRIV -1
+#define TEST_SICODE_SHARE -2
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+#endif
+
+#define err(fmt, ...) \
+ fprintf(stderr, \
+ "Error (%s:%d): " fmt, \
+ __FILE__, __LINE__, ##__VA_ARGS__)
+
+static int check_error_paths(pid_t child)
+{
+ struct ptrace_peeksiginfo_args arg;
+ int ret, exit_code = -1;
+ void *addr_rw, *addr_ro;
+
+ /*
+ * Allocate two contiguous pages. The first one is for read-write,
+ * another is for read-only.
+ */
+ addr_rw = mmap(NULL, 2 * PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr_rw == MAP_FAILED) {
+ err("mmap() failed: %m\n");
+ return 1;
+ }
+
+ addr_ro = mmap(addr_rw + PAGE_SIZE, PAGE_SIZE, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+ if (addr_ro == MAP_FAILED) {
+ err("mmap() failed: %m\n");
+ goto out;
+ }
+
+ arg.nr = SIGNR;
+ arg.off = 0;
+
+ /* Unsupported flags */
+ arg.flags = ~0;
+ ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_rw);
+ if (ret != -1 || errno != EINVAL) {
+ err("sys_ptrace() returns %d (expected -1),"
+ " errno %d (expected %d): %m\n",
+ ret, errno, EINVAL);
+ goto out;
+ }
+ arg.flags = 0;
+
+ /* A part of the buffer is read-only */
+ ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg,
+ addr_ro - sizeof(siginfo_t) * 2);
+ if (ret != 2) {
+ err("sys_ptrace() returns %d (expected 2): %m\n", ret);
+ goto out;
+ }
+
+ /* Read-only buffer */
+ ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_ro);
+ if (ret != -1 && errno != EFAULT) {
+ err("sys_ptrace() returns %d (expected -1),"
+ " errno %d (expected %d): %m\n",
+ ret, errno, EFAULT);
+ goto out;
+ }
+
+ exit_code = 0;
+out:
+ munmap(addr_rw, 2 * PAGE_SIZE);
+ return exit_code;
+}
+
+int check_direct_path(pid_t child, int shared, int nr)
+{
+ struct ptrace_peeksiginfo_args arg = {.flags = 0, .nr = nr, .off = 0};
+ int i, j, ret, exit_code = -1;
+ siginfo_t siginfo[SIGNR];
+ int si_code;
+
+ if (shared == 1) {
+ arg.flags = PTRACE_PEEKSIGINFO_SHARED;
+ si_code = TEST_SICODE_SHARE;
+ } else {
+ arg.flags = 0;
+ si_code = TEST_SICODE_PRIV;
+ }
+
+ for (i = 0; i < SIGNR; ) {
+ arg.off = i;
+ ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, siginfo);
+ if (ret == -1) {
+ err("ptrace() failed: %m\n");
+ goto out;
+ }
+
+ if (ret == 0)
+ break;
+
+ for (j = 0; j < ret; j++, i++) {
+ if (siginfo[j].si_code == si_code &&
+ siginfo[j].si_int == i)
+ continue;
+
+ err("%d: Wrong siginfo i=%d si_code=%d si_int=%d\n",
+ shared, i, siginfo[j].si_code, siginfo[j].si_int);
+ goto out;
+ }
+ }
+
+ if (i != SIGNR) {
+ err("Only %d signals were read\n", i);
+ goto out;
+ }
+
+ exit_code = 0;
+out:
+ return exit_code;
+}
+
+int main(int argc, char *argv[])
+{
+ siginfo_t siginfo[SIGNR];
+ int i, exit_code = 1;
+ sigset_t blockmask;
+ pid_t child;
+
+ sigemptyset(&blockmask);
+ sigaddset(&blockmask, SIGRTMIN);
+ sigprocmask(SIG_BLOCK, &blockmask, NULL);
+
+ child = fork();
+ if (child == -1) {
+ err("fork() failed: %m");
+ return 1;
+ } else if (child == 0) {
+ pid_t ppid = getppid();
+ while (1) {
+ if (ppid != getppid())
+ break;
+ sleep(1);
+ }
+ return 1;
+ }
+
+ /* Send signals in process-wide and per-thread queues */
+ for (i = 0; i < SIGNR; i++) {
+ siginfo->si_code = TEST_SICODE_SHARE;
+ siginfo->si_int = i;
+ sys_rt_sigqueueinfo(child, SIGRTMIN, siginfo);
+
+ siginfo->si_code = TEST_SICODE_PRIV;
+ siginfo->si_int = i;
+ sys_rt_tgsigqueueinfo(child, child, SIGRTMIN, siginfo);
+ }
+
+ if (sys_ptrace(PTRACE_ATTACH, child, NULL, NULL) == -1)
+ return 1;
+
+ waitpid(child, NULL, 0);
+
+ /* Dump signals one by one*/
+ if (check_direct_path(child, 0, 1))
+ goto out;
+ /* Dump all signals for one call */
+ if (check_direct_path(child, 0, SIGNR))
+ goto out;
+
+ /*
+ * Dump signal from the process-wide queue.
+ * The number of signals is not multible to the buffer size
+ */
+ if (check_direct_path(child, 1, 3))
+ goto out;
+
+ if (check_error_paths(child))
+ goto out;
+
+ printf("PASS\n");
+ exit_code = 0;
+out:
+ if (sys_ptrace(PTRACE_KILL, child, NULL, NULL) == -1)
+ return 1;
+
+ waitpid(child, NULL, 0);
+
+ return exit_code;
+}
diff --git a/tools/testing/selftests/rcutorture/.gitignore b/tools/testing/selftests/rcutorture/.gitignore
new file mode 100644
index 000000000..ccc240275
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/.gitignore
@@ -0,0 +1,4 @@
+initrd
+b[0-9]*
+res
+*.swp
diff --git a/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh b/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh
new file mode 100755
index 000000000..43540f182
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/configNR_CPUS.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+#
+# Extract the number of CPUs expected from the specified Kconfig-file
+# fragment by checking CONFIG_SMP and CONFIG_NR_CPUS. If the specified
+# file gives no clue, base the number on the number of idle CPUs on
+# the system.
+#
+# Usage: configNR_CPUS.sh config-frag
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+cf=$1
+if test ! -r $cf
+then
+ echo Unreadable config fragment $cf 1>&2
+ exit -1
+fi
+if grep -q '^CONFIG_SMP=n$' $cf
+then
+ echo 1
+ exit 0
+fi
+if grep -q '^CONFIG_NR_CPUS=' $cf
+then
+ grep '^CONFIG_NR_CPUS=' $cf |
+ sed -e 's/^CONFIG_NR_CPUS=\([0-9]*\).*$/\1/'
+ exit 0
+fi
+cpus2use.sh
diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh
new file mode 100755
index 000000000..ef7fcbac3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/config_override.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+# config_override.sh base override
+#
+# Combines base and override, removing any Kconfig options from base
+# that conflict with any in override, concatenating what remains and
+# sending the result to standard output.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2017
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+base=$1
+if test -r $base
+then
+ :
+else
+ echo Base file $base unreadable!!!
+ exit 1
+fi
+
+override=$2
+if test -r $override
+then
+ :
+else
+ echo Override file $override unreadable!!!
+ exit 1
+fi
+
+T=${TMPDIR-/tmp}/config_override.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+sed < $override -e 's/^/grep -v "/' -e 's/=.*$/="/' |
+ awk '
+ {
+ if (last)
+ print last " |";
+ last = $0;
+ }
+ END {
+ if (last)
+ print last;
+ }' > $T/script
+sh $T/script < $base
+cat $override
diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
new file mode 100755
index 000000000..197deece7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# Usage: configcheck.sh .config .config-template
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+T=${TMPDIR-/tmp}/abat-chk-config.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+cat $1 > $T/.config
+
+cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' |
+awk '
+{
+ print "if grep -q \"" $0 "\" < '"$T/.config"'";
+ print "then";
+ print "\t:";
+ print "else";
+ if ($1 == "#") {
+ print "\tif grep -q \"" $2 "\" < '"$T/.config"'";
+ print "\tthen";
+ print "\t\tif test \"$firsttime\" = \"\""
+ print "\t\tthen"
+ print "\t\t\tfirsttime=1"
+ print "\t\tfi"
+ print "\t\techo \":" $2 ": improperly set\"";
+ print "\telse";
+ print "\t\t:";
+ print "\tfi";
+ } else {
+ print "\tif test \"$firsttime\" = \"\""
+ print "\tthen"
+ print "\t\tfirsttime=1"
+ print "\tfi"
+ print "\techo \":" $0 ": improperly set\"";
+ }
+ print "fi";
+ }' | sh
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
new file mode 100755
index 000000000..65541c21a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+#
+# Usage: configinit.sh config-spec-file build-output-dir results-dir
+#
+# Create a .config file from the spec file. Run from the kernel source tree.
+# Exits with 0 if all went well, with 1 if all went well but the config
+# did not match, and some other number for other failures.
+#
+# The first argument is the .config specification file, which contains
+# desired settings, for example, "CONFIG_NO_HZ=y". For best results,
+# this should be a full pathname.
+#
+# The second argument is a optional path to a build output directory,
+# for example, "O=/tmp/foo". If this argument is omitted, the .config
+# file will be generated directly in the current directory.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+T=${TMPDIR-/tmp}/configinit.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+# Capture config spec file.
+
+c=$1
+buildloc=$2
+resdir=$3
+builddir=
+if echo $buildloc | grep -q '^O='
+then
+ builddir=`echo $buildloc | sed -e 's/^O=//'`
+ if test ! -d $builddir
+ then
+ mkdir $builddir
+ fi
+else
+ echo Bad build directory: \"$buildloc\"
+ exit 2
+fi
+
+sed -e 's/^\(CONFIG[0-9A-Z_]*\)=.*$/grep -v "^# \1" |/' < $c > $T/u.sh
+sed -e 's/^\(CONFIG[0-9A-Z_]*=\).*$/grep -v \1 |/' < $c >> $T/u.sh
+grep '^grep' < $T/u.sh > $T/upd.sh
+echo "cat - $c" >> $T/upd.sh
+make mrproper
+make $buildloc distclean > $resdir/Make.distclean 2>&1
+make $buildloc $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
+mv $builddir/.config $builddir/.config.sav
+sh $T/upd.sh < $builddir/.config.sav > $builddir/.config
+cp $builddir/.config $builddir/.config.new
+yes '' | make $buildloc oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
+
+# verify new config matches specification.
+configcheck.sh $builddir/.config $c
+
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
new file mode 100755
index 000000000..bb99cde3f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# Get an estimate of how CPU-hoggy to be.
+#
+# Usage: cpus2use.sh
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+ncpus=`grep '^processor' /proc/cpuinfo | wc -l`
+idlecpus=`mpstat | tail -1 | \
+ awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'`
+awk -v ncpus=$ncpus -v idlecpus=$idlecpus < /dev/null '
+BEGIN {
+ cpus2use = idlecpus;
+ if (cpus2use < 1)
+ cpus2use = 1;
+ if (cpus2use < ncpus / 10)
+ cpus2use = ncpus / 10;
+ if (cpus2use == int(cpus2use))
+ cpus2use = int(cpus2use)
+ else
+ cpus2use = int(cpus2use) + 1
+ print cpus2use;
+}'
+
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
new file mode 100644
index 000000000..65f665502
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -0,0 +1,279 @@
+#!/bin/bash
+#
+# Shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# bootparam_hotplug_cpu bootparam-string
+#
+# Returns 1 if the specified boot-parameter string tells rcutorture to
+# test CPU-hotplug operations.
+bootparam_hotplug_cpu () {
+ echo "$1" | grep -q "rcutorture\.onoff_"
+}
+
+# checkarg --argname argtype $# arg mustmatch cannotmatch
+#
+# Checks the specified argument "arg" against the mustmatch and cannotmatch
+# patterns.
+checkarg () {
+ if test $3 -le 1
+ then
+ echo $1 needs argument $2 matching \"$5\"
+ usage
+ fi
+ if echo "$4" | grep -q -e "$5"
+ then
+ :
+ else
+ echo $1 $2 \"$4\" must match \"$5\"
+ usage
+ fi
+ if echo "$4" | grep -q -e "$6"
+ then
+ echo $1 $2 \"$4\" must not match \"$6\"
+ usage
+ fi
+}
+
+# configfrag_boot_params bootparam-string config-fragment-file
+#
+# Adds boot parameters from the .boot file, if any.
+configfrag_boot_params () {
+ if test -r "$2.boot"
+ then
+ echo $1 `grep -v '^#' "$2.boot" | tr '\012' ' '`
+ else
+ echo $1
+ fi
+}
+
+# configfrag_boot_cpus bootparam-string config-fragment-file config-cpus
+#
+# Decreases number of CPUs based on any nr_cpus= boot parameters specified.
+configfrag_boot_cpus () {
+ local bootargs="`configfrag_boot_params "$1" "$2"`"
+ local nr_cpus
+ if echo "${bootargs}" | grep -q 'nr_cpus=[0-9]'
+ then
+ nr_cpus="`echo "${bootargs}" | sed -e 's/^.*nr_cpus=\([0-9]*\).*$/\1/'`"
+ if test "$3" -gt "$nr_cpus"
+ then
+ echo $nr_cpus
+ else
+ echo $3
+ fi
+ else
+ echo $3
+ fi
+}
+
+# configfrag_boot_maxcpus bootparam-string config-fragment-file config-cpus
+#
+# Decreases number of CPUs based on any maxcpus= boot parameters specified.
+# This allows tests where additional CPUs come online later during the
+# test run. However, the torture parameters will be set based on the
+# number of CPUs initially present, so the scripting should schedule
+# test runs based on the maxcpus= boot parameter controlling the initial
+# number of CPUs instead of on the ultimate number of CPUs.
+configfrag_boot_maxcpus () {
+ local bootargs="`configfrag_boot_params "$1" "$2"`"
+ local maxcpus
+ if echo "${bootargs}" | grep -q 'maxcpus=[0-9]'
+ then
+ maxcpus="`echo "${bootargs}" | sed -e 's/^.*maxcpus=\([0-9]*\).*$/\1/'`"
+ if test "$3" -gt "$maxcpus"
+ then
+ echo $maxcpus
+ else
+ echo $3
+ fi
+ else
+ echo $3
+ fi
+}
+
+# configfrag_hotplug_cpu config-fragment-file
+#
+# Returns 1 if the config fragment specifies hotplug CPU.
+configfrag_hotplug_cpu () {
+ if test ! -r "$1"
+ then
+ echo Unreadable config fragment "$1" 1>&2
+ exit -1
+ fi
+ grep -q '^CONFIG_HOTPLUG_CPU=y$' "$1"
+}
+
+# identify_boot_image qemu-cmd
+#
+# Returns the relative path to the kernel build image. This will be
+# arch/<arch>/boot/bzImage or vmlinux if bzImage is not a target for the
+# architecture, unless overridden with the TORTURE_BOOT_IMAGE environment
+# variable.
+identify_boot_image () {
+ if test -n "$TORTURE_BOOT_IMAGE"
+ then
+ echo $TORTURE_BOOT_IMAGE
+ else
+ case "$1" in
+ qemu-system-x86_64|qemu-system-i386)
+ echo arch/x86/boot/bzImage
+ ;;
+ qemu-system-aarch64)
+ echo arch/arm64/boot/Image
+ ;;
+ *)
+ echo vmlinux
+ ;;
+ esac
+ fi
+}
+
+# identify_qemu builddir
+#
+# Returns our best guess as to which qemu command is appropriate for
+# the kernel at hand. Override with the TORTURE_QEMU_CMD environment variable.
+identify_qemu () {
+ local u="`file "$1"`"
+ if test -n "$TORTURE_QEMU_CMD"
+ then
+ echo $TORTURE_QEMU_CMD
+ elif echo $u | grep -q x86-64
+ then
+ echo qemu-system-x86_64
+ elif echo $u | grep -q "Intel 80386"
+ then
+ echo qemu-system-i386
+ elif echo $u | grep -q aarch64
+ then
+ echo qemu-system-aarch64
+ elif uname -a | grep -q ppc64
+ then
+ echo qemu-system-ppc64
+ else
+ echo Cannot figure out what qemu command to use! 1>&2
+ echo file $1 output: $u
+ # Usually this will be one of /usr/bin/qemu-system-*
+ # Use TORTURE_QEMU_CMD environment variable or appropriate
+ # argument to top-level script.
+ exit 1
+ fi
+}
+
+# identify_qemu_append qemu-cmd
+#
+# Output arguments for the qemu "-append" string based on CPU type
+# and the TORTURE_QEMU_INTERACTIVE environment variable.
+identify_qemu_append () {
+ local console=ttyS0
+ case "$1" in
+ qemu-system-x86_64|qemu-system-i386)
+ echo noapic selinux=0 initcall_debug debug
+ ;;
+ qemu-system-aarch64)
+ console=ttyAMA0
+ ;;
+ esac
+ if test -n "$TORTURE_QEMU_INTERACTIVE"
+ then
+ echo root=/dev/sda
+ else
+ echo console=$console
+ fi
+}
+
+# identify_qemu_args qemu-cmd serial-file
+#
+# Output arguments for qemu arguments based on the TORTURE_QEMU_MAC
+# and TORTURE_QEMU_INTERACTIVE environment variables.
+identify_qemu_args () {
+ case "$1" in
+ qemu-system-x86_64|qemu-system-i386)
+ ;;
+ qemu-system-aarch64)
+ echo -machine virt,gic-version=host -cpu host
+ ;;
+ qemu-system-ppc64)
+ echo -enable-kvm -M pseries -nodefaults
+ echo -device spapr-vscsi
+ if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC"
+ then
+ echo -device spapr-vlan,netdev=net0,mac=$TORTURE_QEMU_MAC
+ echo -netdev bridge,br=br0,id=net0
+ elif test -n "$TORTURE_QEMU_INTERACTIVE"
+ then
+ echo -net nic -net user
+ fi
+ ;;
+ esac
+ if test -n "$TORTURE_QEMU_INTERACTIVE"
+ then
+ echo -monitor stdio -serial pty -S
+ else
+ echo -serial file:$2
+ fi
+}
+
+# identify_qemu_vcpus
+#
+# Returns the number of virtual CPUs available to the aggregate of the
+# guest OSes.
+identify_qemu_vcpus () {
+ lscpu | grep '^CPU(s):' | sed -e 's/CPU(s)://'
+}
+
+# print_bug
+#
+# Prints "BUG: " in red followed by remaining arguments
+print_bug () {
+ printf '\033[031mBUG: \033[m'
+ echo $*
+}
+
+# print_warning
+#
+# Prints "WARNING: " in yellow followed by remaining arguments
+print_warning () {
+ printf '\033[033mWARNING: \033[m'
+ echo $*
+}
+
+# specify_qemu_cpus qemu-cmd qemu-args #cpus
+#
+# Appends a string containing "-smp XXX" to qemu-args, unless the incoming
+# qemu-args already contains "-smp".
+specify_qemu_cpus () {
+ local nt;
+
+ if echo $2 | grep -q -e -smp
+ then
+ echo $2
+ else
+ case "$1" in
+ qemu-system-x86_64|qemu-system-i386|qemu-system-aarch64)
+ echo $2 -smp $3
+ ;;
+ qemu-system-ppc64)
+ nt="`lscpu | grep '^NUMA node0' | sed -e 's/^[^,]*,\([0-9]*\),.*$/\1/'`"
+ echo $2 -smp cores=`expr \( $3 + $nt - 1 \) / $nt`,threads=$nt
+ ;;
+ esac
+ fi
+}
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
new file mode 100755
index 000000000..363382837
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+#
+# Alternate sleeping and spinning on randomly selected CPUs. The purpose
+# of this script is to inflict random OS jitter on a concurrently running
+# test.
+#
+# Usage: jitter.sh me duration [ sleepmax [ spinmax ] ]
+#
+# me: Random-number-generator seed salt.
+# duration: Time to run in seconds.
+# sleepmax: Maximum microseconds to sleep, defaults to one second.
+# spinmax: Maximum microseconds to spin, defaults to one millisecond.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+me=$(($1 * 1000))
+duration=$2
+sleepmax=${3-1000000}
+spinmax=${4-1000}
+
+n=1
+
+starttime=`awk 'BEGIN { print systime(); }' < /dev/null`
+
+while :
+do
+ # Check for done.
+ t=`awk -v s=$starttime 'BEGIN { print systime() - s; }' < /dev/null`
+ if test "$t" -gt "$duration"
+ then
+ exit 0;
+ fi
+
+ # Set affinity to randomly selected CPU
+ cpus=`ls /sys/devices/system/cpu/*/online |
+ sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//' |
+ grep -v '^0*$'`
+ cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
+ srand(n + me + systime());
+ ncpus = split(cpus, ca);
+ curcpu = ca[int(rand() * ncpus + 1)];
+ mask = lshift(1, curcpu);
+ if (mask + 0 <= 0)
+ mask = 1;
+ printf("%#x\n", mask);
+ }' < /dev/null`
+ n=$(($n+1))
+ if ! taskset -p $cpumask $$ > /dev/null 2>&1
+ then
+ echo taskset failure: '"taskset -p ' $cpumask $$ '"'
+ exit 1
+ fi
+
+ # Sleep a random duration
+ sleeptime=`awk -v me=$me -v n=$n -v sleepmax=$sleepmax 'BEGIN {
+ srand(n + me + systime());
+ printf("%06d", int(rand() * sleepmax));
+ }' < /dev/null`
+ n=$(($n+1))
+ sleep .$sleeptime
+
+ # Spin a random duration
+ limit=`awk -v me=$me -v n=$n -v spinmax=$spinmax 'BEGIN {
+ srand(n + me + systime());
+ printf("%06d", int(rand() * spinmax));
+ }' < /dev/null`
+ n=$(($n+1))
+ for i in {1..$limit}
+ do
+ echo > /dev/null
+ done
+done
+
+exit 1
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
new file mode 100755
index 000000000..9115fcdb5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+#
+# Build a kvm-ready Linux kernel from the tree in the current directory.
+#
+# Usage: kvm-build.sh config-template build-dir resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+config_template=${1}
+if test -z "$config_template" -o ! -f "$config_template" -o ! -r "$config_template"
+then
+ echo "kvm-build.sh :$config_template: Not a readable file"
+ exit 1
+fi
+builddir=${2}
+resdir=${3}
+
+T=${TMPDIR-/tmp}/test-linux.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+cp ${config_template} $T/config
+cat << ___EOF___ >> $T/config
+CONFIG_INITRAMFS_SOURCE="$TORTURE_INITRD"
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_CONSOLE=y
+___EOF___
+
+configinit.sh $T/config O=$builddir $resdir
+retval=$?
+if test $retval -gt 1
+then
+ exit 2
+fi
+ncpus=`cpus2use.sh`
+make O=$builddir -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
+retval=$?
+if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
+then
+ echo Kernel build error
+ egrep "Stop|Error|error:|warning:" < $resdir/Make.out
+ echo Run aborted.
+ exit 3
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
new file mode 100755
index 000000000..98f650c9b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+#
+# Invoke a text editor on all console.log files for all runs with diagnostics,
+# that is, on all such files having a console.log.diags counterpart.
+# Note that both console.log.diags and console.log are passed to the
+# editor (currently defaulting to "vi"), allowing the user to get an
+# idea of what to search for in the console.log file.
+#
+# Usage: kvm-find-errors.sh directory
+#
+# The "directory" above should end with the date/time directory, for example,
+# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
+
+rundir="${1}"
+if test -z "$rundir" -o ! -d "$rundir"
+then
+ echo Usage: $0 directory
+fi
+editor=${EDITOR-vi}
+
+# Find builds with errors
+files=
+for i in ${rundir}/*/Make.out
+do
+ if egrep -q "error:|warning:" < $i
+ then
+ egrep "error:|warning:" < $i > $i.diags
+ files="$files $i.diags $i"
+ fi
+done
+if test -n "$files"
+then
+ $editor $files
+else
+ echo No build errors.
+fi
+if grep -q -e "--buildonly" < ${rundir}/log
+then
+ echo Build-only run, no console logs to check.
+fi
+
+# Find console logs with errors
+files=
+for i in ${rundir}/*/console.log
+do
+ if test -r $i.diags
+ then
+ files="$files $i.diags $i"
+ fi
+done
+if test -n "$files"
+then
+ $editor $files
+else
+ echo No errors in console logs.
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
new file mode 100755
index 000000000..2de92f43e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+#
+# Analyze a given results directory for locktorture progress.
+#
+# Usage: kvm-recheck-lock.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2014
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+ :
+else
+ echo Unreadable results directory: $i
+ exit 1
+fi
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+ncs=`grep "Writes: Total:" $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* Total: //' -e 's/ .*$//'`
+if test -z "$ncs"
+then
+ echo "$configfile -------"
+else
+ title="$configfile ------- $ncs acquisitions/releases"
+ dur=`sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
+ if test -z "$dur"
+ then
+ :
+ else
+ ncsps=`awk -v ncs=$ncs -v dur=$dur '
+ BEGIN { print ncs / dur }' < /dev/null`
+ title="$title ($ncsps per second)"
+ fi
+ echo $title
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
new file mode 100755
index 000000000..0fa8a61cc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcutorture progress.
+#
+# Usage: kvm-recheck-rcu.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2014
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+ :
+else
+ echo Unreadable results directory: $i
+ exit 1
+fi
+. functions.sh
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'`
+stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
+ tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
+ awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
+ tr -d '\012\015'`"
+if test -z "$ngps"
+then
+ echo "$configfile ------- " $stopstate
+else
+ title="$configfile ------- $ngps GPs"
+ dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
+ if test -z "$dur"
+ then
+ :
+ else
+ ngpsps=`awk -v ngps=$ngps -v dur=$dur '
+ BEGIN { print ngps / dur }' < /dev/null`
+ title="$title ($ngpsps/s)"
+ fi
+ echo $title $stopstate
+ nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
+ if test -z "$nclosecalls"
+ then
+ exit 0
+ fi
+ if test "$nclosecalls" -eq 0
+ then
+ exit 0
+ fi
+ # Compute number of close calls per tenth of an hour
+ nclosecalls10=`awk -v nclosecalls=$nclosecalls -v dur=$dur 'BEGIN { print int(nclosecalls * 36000 / dur) }' < /dev/null`
+ if test $nclosecalls10 -gt 5 -a $nclosecalls -gt 1
+ then
+ print_bug $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i
+ else
+ print_warning $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i
+ fi
+ echo $nclosecalls "Reader Batch close calls in" $(($dur/60)) minute run: $i > $i/console.log.rcu.diags
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
new file mode 100755
index 000000000..8948f7926
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcuperf performance measurements,
+# looking for ftrace data. Exits with 0 if data was found, analyzed, and
+# printed. Intended to be invoked from kvm-recheck-rcuperf.sh after
+# argument checking.
+#
+# Usage: kvm-recheck-rcuperf-ftrace.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+. functions.sh
+
+if test "`grep -c 'rcu_exp_grace_period.*start' < $i/console.log`" -lt 100
+then
+ exit 10
+fi
+
+sed -e 's/^\[[^]]*]//' < $i/console.log |
+grep 'us : rcu_exp_grace_period' |
+sed -e 's/us : / : /' |
+tr -d '\015' |
+awk '
+$8 == "start" {
+ if (startseq != "")
+ nlost++;
+ starttask = $1;
+ starttime = $3;
+ startseq = $7;
+ seqtask[startseq] = starttask;
+}
+
+$8 == "end" {
+ if (startseq == $7) {
+ curgpdur = $3 - starttime;
+ gptimes[++n] = curgpdur;
+ gptaskcnt[starttask]++;
+ sum += curgpdur;
+ if (curgpdur > 1000)
+ print "Long GP " starttime "us to " $3 "us (" curgpdur "us)";
+ startseq = "";
+ } else {
+ # Lost a message or some such, reset.
+ startseq = "";
+ nlost++;
+ }
+}
+
+$8 == "done" && seqtask[$7] != $1 {
+ piggybackcnt[$1]++;
+}
+
+END {
+ newNR = asort(gptimes);
+ if (newNR <= 0) {
+ print "No ftrace records found???"
+ exit 10;
+ }
+ pct50 = int(newNR * 50 / 100);
+ if (pct50 < 1)
+ pct50 = 1;
+ pct90 = int(newNR * 90 / 100);
+ if (pct90 < 1)
+ pct90 = 1;
+ pct99 = int(newNR * 99 / 100);
+ if (pct99 < 1)
+ pct99 = 1;
+ div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
+ print "Histogram bucket size: " div;
+ last = gptimes[1] - 10;
+ count = 0;
+ for (i = 1; i <= newNR; i++) {
+ current = div * int(gptimes[i] / div);
+ if (last == current) {
+ count++;
+ } else {
+ if (count > 0)
+ print last, count;
+ count = 1;
+ last = current;
+ }
+ }
+ if (count > 0)
+ print last, count;
+ print "Distribution of grace periods across tasks:";
+ for (i in gptaskcnt) {
+ print "\t" i, gptaskcnt[i];
+ nbatches += gptaskcnt[i];
+ }
+ ngps = nbatches;
+ print "Distribution of piggybacking across tasks:";
+ for (i in piggybackcnt) {
+ print "\t" i, piggybackcnt[i];
+ ngps += piggybackcnt[i];
+ }
+ print "Average grace-period duration: " sum / newNR " microseconds";
+ print "Minimum grace-period duration: " gptimes[1];
+ print "50th percentile grace-period duration: " gptimes[pct50];
+ print "90th percentile grace-period duration: " gptimes[pct90];
+ print "99th percentile grace-period duration: " gptimes[pct99];
+ print "Maximum grace-period duration: " gptimes[newNR];
+ print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches " Lost: " nlost + 0;
+ print "Computed from ftrace data.";
+}'
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
new file mode 100755
index 000000000..ccebf772f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+#
+# Analyze a given results directory for rcuperf performance measurements.
+#
+# Usage: kvm-recheck-rcuperf.sh resdir
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+ :
+else
+ echo Unreadable results directory: $i
+ exit 1
+fi
+PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
+. functions.sh
+
+if kvm-recheck-rcuperf-ftrace.sh $i
+then
+ # ftrace data was successfully analyzed, call it good!
+ exit 0
+fi
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+
+sed -e 's/^\[[^]]*]//' < $i/console.log |
+awk '
+/-perf: .* gps: .* batches:/ {
+ ngps = $9;
+ nbatches = $11;
+}
+
+/-perf: .*writer-duration/ {
+ gptimes[++n] = $5 / 1000.;
+ sum += $5 / 1000.;
+}
+
+END {
+ newNR = asort(gptimes);
+ if (newNR <= 0) {
+ print "No rcuperf records found???"
+ exit;
+ }
+ pct50 = int(newNR * 50 / 100);
+ if (pct50 < 1)
+ pct50 = 1;
+ pct90 = int(newNR * 90 / 100);
+ if (pct90 < 1)
+ pct90 = 1;
+ pct99 = int(newNR * 99 / 100);
+ if (pct99 < 1)
+ pct99 = 1;
+ div = 10 ** int(log(gptimes[pct90]) / log(10) + .5) / 100;
+ print "Histogram bucket size: " div;
+ last = gptimes[1] - 10;
+ count = 0;
+ for (i = 1; i <= newNR; i++) {
+ current = div * int(gptimes[i] / div);
+ if (last == current) {
+ count++;
+ } else {
+ if (count > 0)
+ print last, count;
+ count = 1;
+ last = current;
+ }
+ }
+ if (count > 0)
+ print last, count;
+ print "Average grace-period duration: " sum / newNR " microseconds";
+ print "Minimum grace-period duration: " gptimes[1];
+ print "50th percentile grace-period duration: " gptimes[pct50];
+ print "90th percentile grace-period duration: " gptimes[pct90];
+ print "99th percentile grace-period duration: " gptimes[pct99];
+ print "Maximum grace-period duration: " gptimes[newNR];
+ print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
+ print "Computed from rcuperf printk output.";
+}'
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
new file mode 100755
index 000000000..c9bab57a7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+#
+# Given the results directories for previous KVM-based torture runs,
+# check the build and console output for errors. Given a directory
+# containing results directories, this recursively checks them all.
+#
+# Usage: kvm-recheck.sh resdir ...
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
+. functions.sh
+for rd in "$@"
+do
+ firsttime=1
+ dirs=`find $rd -name Make.defconfig.out -print | sort | sed -e 's,/[^/]*$,,' | sort -u`
+ for i in $dirs
+ do
+ if test -n "$firsttime"
+ then
+ firsttime=""
+ resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'`
+ head -1 $resdir/log
+ fi
+ TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
+ rm -f $i/console.log.*.diags
+ kvm-recheck-${TORTURE_SUITE}.sh $i
+ if test -f "$i/console.log"
+ then
+ configcheck.sh $i/.config $i/ConfigFragment
+ if test -r $i/Make.oldconfig.err
+ then
+ cat $i/Make.oldconfig.err
+ fi
+ parse-build.sh $i/Make.out $configfile
+ parse-console.sh $i/console.log $configfile
+ if test -r $i/Warnings
+ then
+ cat $i/Warnings
+ fi
+ else
+ if test -f "$i/qemu-cmd"
+ then
+ print_bug qemu failed
+ echo " $i"
+ elif test -f "$i/buildonly"
+ then
+ echo Build-only run, no boot/test
+ configcheck.sh $i/.config $i/ConfigFragment
+ parse-build.sh $i/Make.out $configfile
+ else
+ print_bug Build failed
+ echo " $i"
+ fi
+ fi
+ done
+done
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
new file mode 100755
index 000000000..f7247ee00
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -0,0 +1,271 @@
+#!/bin/bash
+#
+# Run a kvm-based test of the specified tree on the specified configs.
+# Fully automated run and error checking, no graphics console.
+#
+# Execute this in the source tree. Do not run it as a background task
+# because qemu does not seem to like that much.
+#
+# Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args
+#
+# qemu-args defaults to "-enable-kvm -nographic", along with arguments
+# specifying the number of CPUs and other options
+# generated from the underlying CPU architecture.
+# boot_args defaults to value returned by the per_version_boot_params
+# shell function.
+#
+# Anything you specify for either qemu-args or boot_args is appended to
+# the default values. The "-smp" value is deduced from the contents of
+# the config fragment.
+#
+# More sophisticated argument parsing is clearly needed.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+. functions.sh
+. $CONFIGFRAG/ver_functions.sh
+
+config_template=${1}
+config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'`
+title=`echo $config_template | sed -e 's/^.*\///'`
+builddir=${2}
+if test -z "$builddir" -o ! -d "$builddir" -o ! -w "$builddir"
+then
+ echo "kvm-test-1-run.sh :$builddir: Not a writable directory, cannot build into it"
+ exit 1
+fi
+resdir=${3}
+if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir"
+then
+ echo "kvm-test-1-run.sh :$resdir: Not a writable directory, cannot store results into it"
+ exit 1
+fi
+echo ' ---' `date`: Starting build
+echo ' ---' Kconfig fragment at: $config_template >> $resdir/log
+touch $resdir/ConfigFragment.input $resdir/ConfigFragment
+if test -r "$config_dir/CFcommon"
+then
+ echo " --- $config_dir/CFcommon" >> $resdir/ConfigFragment.input
+ cat < $config_dir/CFcommon >> $resdir/ConfigFragment.input
+ config_override.sh $config_dir/CFcommon $config_template > $T/Kc1
+ grep '#CHECK#' $config_dir/CFcommon >> $resdir/ConfigFragment
+else
+ cp $config_template $T/Kc1
+fi
+echo " --- $config_template" >> $resdir/ConfigFragment.input
+cat $config_template >> $resdir/ConfigFragment.input
+grep '#CHECK#' $config_template >> $resdir/ConfigFragment
+if test -n "$TORTURE_KCONFIG_ARG"
+then
+ echo $TORTURE_KCONFIG_ARG | tr -s " " "\012" > $T/cmdline
+ echo " --- --kconfig argument" >> $resdir/ConfigFragment.input
+ cat $T/cmdline >> $resdir/ConfigFragment.input
+ config_override.sh $T/Kc1 $T/cmdline > $T/Kc2
+ # Note that "#CHECK#" is not permitted on commandline.
+else
+ cp $T/Kc1 $T/Kc2
+fi
+cat $T/Kc2 >> $resdir/ConfigFragment
+
+base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
+if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
+then
+ # Rerunning previous test, so use that test's kernel.
+ QEMU="`identify_qemu $base_resdir/vmlinux`"
+ BOOT_IMAGE="`identify_boot_image $QEMU`"
+ KERNEL=$base_resdir/${BOOT_IMAGE##*/} # use the last component of ${BOOT_IMAGE}
+ ln -s $base_resdir/Make*.out $resdir # for kvm-recheck.sh
+ ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
+ # Arch-independent indicator
+ touch $resdir/builtkernel
+elif kvm-build.sh $T/Kc2 $builddir $resdir
+then
+ # Had to build a kernel for this test.
+ QEMU="`identify_qemu $builddir/vmlinux`"
+ BOOT_IMAGE="`identify_boot_image $QEMU`"
+ cp $builddir/vmlinux $resdir
+ cp $builddir/.config $resdir
+ cp $builddir/Module.symvers $resdir > /dev/null || :
+ cp $builddir/System.map $resdir > /dev/null || :
+ if test -n "$BOOT_IMAGE"
+ then
+ cp $builddir/$BOOT_IMAGE $resdir
+ KERNEL=$resdir/${BOOT_IMAGE##*/}
+ # Arch-independent indicator
+ touch $resdir/builtkernel
+ else
+ echo No identifiable boot image, not running KVM, see $resdir.
+ echo Do the torture scripts know about your architecture?
+ fi
+ parse-build.sh $resdir/Make.out $title
+else
+ # Build failed.
+ cp $builddir/Make*.out $resdir
+ cp $builddir/.config $resdir || :
+ echo Build failed, not running KVM, see $resdir.
+ if test -f $builddir.wait
+ then
+ mv $builddir.wait $builddir.ready
+ fi
+ exit 1
+fi
+if test -f $builddir.wait
+then
+ mv $builddir.wait $builddir.ready
+fi
+while test -f $builddir.ready
+do
+ sleep 1
+done
+seconds=$4
+qemu_args=$5
+boot_args=$6
+
+cd $KVM
+kstarttime=`awk 'BEGIN { print systime() }' < /dev/null`
+if test -z "$TORTURE_BUILDONLY"
+then
+ echo ' ---' `date`: Starting kernel
+fi
+
+# Generate -smp qemu argument.
+qemu_args="-enable-kvm -nographic $qemu_args"
+cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment`
+cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"`
+vcpus=`identify_qemu_vcpus`
+if test $cpu_count -gt $vcpus
+then
+ echo CPU count limited from $cpu_count to $vcpus | tee -a $resdir/Warnings
+ cpu_count=$vcpus
+fi
+qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`"
+
+# Generate architecture-specific and interaction-specific qemu arguments
+qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`"
+
+# Generate qemu -append arguments
+qemu_append="`identify_qemu_append "$QEMU"`"
+
+# Pull in Kconfig-fragment boot parameters
+boot_args="`configfrag_boot_params "$boot_args" "$config_template"`"
+# Generate kernel-version-specific boot parameters
+boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
+
+if test -n "$TORTURE_BUILDONLY"
+then
+ echo Build-only run specified, boot/test omitted.
+ touch $resdir/buildonly
+ exit 0
+fi
+echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
+echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
+( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args"& echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+commandcompleted=0
+sleep 10 # Give qemu's pid a chance to reach the file
+if test -s "$resdir/qemu_pid"
+then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+ echo Monitoring qemu job at pid $qemu_pid
+else
+ qemu_pid=""
+ echo Monitoring qemu job at yet-as-unknown pid
+fi
+while :
+do
+ if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+ then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+ fi
+ kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+ if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
+ then
+ if test $kruntime -ge $seconds
+ then
+ break;
+ fi
+ sleep 1
+ else
+ commandcompleted=1
+ if test $kruntime -lt $seconds
+ then
+ echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1
+ grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1
+ killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`"
+ if test -n "$killpid"
+ then
+ echo "ps -fp $killpid" >> $resdir/Warnings 2>&1
+ ps -fp $killpid >> $resdir/Warnings 2>&1
+ fi
+ else
+ echo ' ---' `date`: "Kernel done"
+ fi
+ break
+ fi
+done
+if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+fi
+if test $commandcompleted -eq 0 -a -n "$qemu_pid"
+then
+ echo Grace period for qemu job at pid $qemu_pid
+ oldline="`tail $resdir/console.log`"
+ while :
+ do
+ kruntime=`awk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+ if kill -0 $qemu_pid > /dev/null 2>&1
+ then
+ :
+ else
+ break
+ fi
+ must_continue=no
+ newline="`tail $resdir/console.log`"
+ if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : '
+ then
+ must_continue=yes
+ fi
+ last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`"
+ if test -z "last_ts"
+ then
+ last_ts=0
+ fi
+ if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ then
+ must_continue=yes
+ fi
+ if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ then
+ echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
+ kill -KILL $qemu_pid
+ break
+ fi
+ oldline=$newline
+ sleep 10
+ done
+elif test -z "$qemu_pid"
+then
+ echo Unknown PID, cannot kill qemu command
+fi
+
+parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
new file mode 100755
index 000000000..5a7a62d76
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -0,0 +1,471 @@
+#!/bin/bash
+#
+# Run a series of tests under KVM. By default, this series is specified
+# by the relevant CFLIST file, but can be overridden by the --configs
+# command-line argument.
+#
+# Usage: kvm.sh [ options ]
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+scriptname=$0
+args="$*"
+
+T=${TMPDIR-/tmp}/kvm.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+cd `dirname $scriptname`/../../../../../
+
+dur=$((30*60))
+dryrun=""
+KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
+PATH=${KVM}/bin:$PATH; export PATH
+TORTURE_DEFCONFIG=defconfig
+TORTURE_BOOT_IMAGE=""
+TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
+TORTURE_KCONFIG_ARG=""
+TORTURE_KMAKE_ARG=""
+TORTURE_QEMU_MEM=512
+TORTURE_SHUTDOWN_GRACE=180
+TORTURE_SUITE=rcu
+resdir=""
+configs=""
+cpus=0
+ds=`date +%Y.%m.%d-%H:%M:%S`
+jitter="-1"
+
+. functions.sh
+
+usage () {
+ echo "Usage: $scriptname optional arguments:"
+ echo " --bootargs kernel-boot-arguments"
+ echo " --bootimage relative-path-to-kernel-boot-image"
+ echo " --buildonly"
+ echo " --configs \"config-file list w/ repeat factor (3*TINY01)\""
+ echo " --cpus N"
+ echo " --datestamp string"
+ echo " --defconfig string"
+ echo " --dryrun sched|script"
+ echo " --duration minutes"
+ echo " --interactive"
+ echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
+ echo " --kconfig Kconfig-options"
+ echo " --kmake-arg kernel-make-arguments"
+ echo " --mac nn:nn:nn:nn:nn:nn"
+ echo " --memory megabytes | nnnG"
+ echo " --no-initrd"
+ echo " --qemu-args qemu-arguments"
+ echo " --qemu-cmd qemu-system-..."
+ echo " --results absolute-pathname"
+ echo " --torture rcu"
+ exit 1
+}
+
+while test $# -gt 0
+do
+ case "$1" in
+ --bootargs|--bootarg)
+ checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--'
+ TORTURE_BOOTARGS="$2"
+ shift
+ ;;
+ --bootimage)
+ checkarg --bootimage "(relative path to kernel boot image)" "$#" "$2" '[a-zA-Z0-9][a-zA-Z0-9_]*' '^--'
+ TORTURE_BOOT_IMAGE="$2"
+ shift
+ ;;
+ --buildonly)
+ TORTURE_BUILDONLY=1
+ ;;
+ --configs|--config)
+ checkarg --configs "(list of config files)" "$#" "$2" '^[^/]*$' '^--'
+ configs="$2"
+ shift
+ ;;
+ --cpus)
+ checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--'
+ cpus=$2
+ shift
+ ;;
+ --datestamp)
+ checkarg --datestamp "(relative pathname)" "$#" "$2" '^[^/]*$' '^--'
+ ds=$2
+ shift
+ ;;
+ --defconfig)
+ checkarg --defconfig "defconfigtype" "$#" "$2" '^[^/][^/]*$' '^--'
+ TORTURE_DEFCONFIG=$2
+ shift
+ ;;
+ --dryrun)
+ checkarg --dryrun "sched|script" $# "$2" 'sched\|script' '^--'
+ dryrun=$2
+ shift
+ ;;
+ --duration)
+ checkarg --duration "(minutes)" $# "$2" '^[0-9]*$' '^error'
+ dur=$(($2*60))
+ shift
+ ;;
+ --interactive)
+ TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE
+ ;;
+ --jitter)
+ checkarg --jitter "(# threads [ sleep [ spin ] ])" $# "$2" '^-\{,1\}[0-9]\+\( \+[0-9]\+\)\{,2\} *$' '^error$'
+ jitter="$2"
+ shift
+ ;;
+ --kconfig)
+ checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
+ TORTURE_KCONFIG_ARG="$2"
+ shift
+ ;;
+ --kmake-arg)
+ checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
+ TORTURE_KMAKE_ARG="$2"
+ shift
+ ;;
+ --mac)
+ checkarg --mac "(MAC address)" $# "$2" '^\([0-9a-fA-F]\{2\}:\)\{5\}[0-9a-fA-F]\{2\}$' error
+ TORTURE_QEMU_MAC=$2
+ shift
+ ;;
+ --memory)
+ checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error
+ TORTURE_QEMU_MEM=$2
+ shift
+ ;;
+ --no-initrd)
+ TORTURE_INITRD=""; export TORTURE_INITRD
+ ;;
+ --qemu-args|--qemu-arg)
+ checkarg --qemu-args "(qemu arguments)" $# "$2" '^-' '^error'
+ TORTURE_QEMU_ARG="$2"
+ shift
+ ;;
+ --qemu-cmd)
+ checkarg --qemu-cmd "(qemu-system-...)" $# "$2" 'qemu-system-' '^--'
+ TORTURE_QEMU_CMD="$2"
+ shift
+ ;;
+ --results)
+ checkarg --results "(absolute pathname)" "$#" "$2" '^/' '^error'
+ resdir=$2
+ shift
+ ;;
+ --shutdown-grace)
+ checkarg --shutdown-grace "(seconds)" "$#" "$2" '^[0-9]*$' '^error'
+ TORTURE_SHUTDOWN_GRACE=$2
+ shift
+ ;;
+ --torture)
+ checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
+ TORTURE_SUITE=$2
+ shift
+ if test "$TORTURE_SUITE" = rcuperf
+ then
+ # If you really want jitter for rcuperf, specify
+ # it after specifying rcuperf. (But why?)
+ jitter=0
+ fi
+ ;;
+ *)
+ echo Unknown argument $1
+ usage
+ ;;
+ esac
+ shift
+done
+
+CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG
+
+if test -z "$configs"
+then
+ configs="`cat $CONFIGFRAG/CFLIST`"
+fi
+
+if test -z "$resdir"
+then
+ resdir=$KVM/res
+fi
+
+# Create a file of test-name/#cpus pairs, sorted by decreasing #cpus.
+touch $T/cfgcpu
+for CF in $configs
+do
+ case $CF in
+ [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**)
+ config_reps=`echo $CF | sed -e 's/\*.*$//'`
+ CF1=`echo $CF | sed -e 's/^[^*]*\*//'`
+ ;;
+ *)
+ config_reps=1
+ CF1=$CF
+ ;;
+ esac
+ if test -f "$CONFIGFRAG/$CF1"
+ then
+ cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1`
+ cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
+ cpu_count=`configfrag_boot_maxcpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
+ for ((cur_rep=0;cur_rep<$config_reps;cur_rep++))
+ do
+ echo $CF1 $cpu_count >> $T/cfgcpu
+ done
+ else
+ echo "The --configs file $CF1 does not exist, terminating."
+ exit 1
+ fi
+done
+sort -k2nr $T/cfgcpu -T="$T" > $T/cfgcpu.sort
+
+# Use a greedy bin-packing algorithm, sorting the list accordingly.
+awk < $T/cfgcpu.sort > $T/cfgcpu.pack -v ncpus=$cpus '
+BEGIN {
+ njobs = 0;
+}
+
+{
+ # Read file of tests and corresponding required numbers of CPUs.
+ cf[njobs] = $1;
+ cpus[njobs] = $2;
+ njobs++;
+}
+
+END {
+ batch = 0;
+ nc = -1;
+
+ # Each pass through the following loop creates on test batch
+ # that can be executed concurrently given ncpus. Note that a
+ # given test that requires more than the available CPUs will run in
+ # their own batch. Such tests just have to make do with what
+ # is available.
+ while (nc != ncpus) {
+ batch++;
+ nc = ncpus;
+
+ # Each pass through the following loop considers one
+ # test for inclusion in the current batch.
+ for (i = 0; i < njobs; i++) {
+ if (done[i])
+ continue; # Already part of a batch.
+ if (nc >= cpus[i] || nc == ncpus) {
+
+ # This test fits into the current batch.
+ done[i] = batch;
+ nc -= cpus[i];
+ if (nc <= 0)
+ break; # Too-big test in its own batch.
+ }
+ }
+ }
+
+ # Dump out the tests in batch order.
+ for (b = 1; b <= batch; b++)
+ for (i = 0; i < njobs; i++)
+ if (done[i] == b)
+ print cf[i], cpus[i];
+}'
+
+# Generate a script to execute the tests in appropriate batches.
+cat << ___EOF___ > $T/script
+CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG
+KVM="$KVM"; export KVM
+PATH="$PATH"; export PATH
+TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
+TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
+TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
+TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD
+TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG
+TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
+TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
+TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
+TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
+TORTURE_QEMU_MEM="$TORTURE_QEMU_MEM"; export TORTURE_QEMU_MEM
+TORTURE_SHUTDOWN_GRACE="$TORTURE_SHUTDOWN_GRACE"; export TORTURE_SHUTDOWN_GRACE
+TORTURE_SUITE="$TORTURE_SUITE"; export TORTURE_SUITE
+if ! test -e $resdir
+then
+ mkdir -p "$resdir" || :
+fi
+mkdir $resdir/$ds
+echo Results directory: $resdir/$ds
+echo $scriptname $args
+touch $resdir/$ds/log
+echo $scriptname $args >> $resdir/$ds/log
+echo ${TORTURE_SUITE} > $resdir/$ds/TORTURE_SUITE
+pwd > $resdir/$ds/testid.txt
+if test -d .git
+then
+ git status >> $resdir/$ds/testid.txt
+ git rev-parse HEAD >> $resdir/$ds/testid.txt
+ git diff HEAD >> $resdir/$ds/testid.txt
+fi
+___EOF___
+awk < $T/cfgcpu.pack \
+ -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
+ -v CONFIGDIR="$CONFIGFRAG/" \
+ -v KVM="$KVM" \
+ -v ncpus=$cpus \
+ -v jitter="$jitter" \
+ -v rd=$resdir/$ds/ \
+ -v dur=$dur \
+ -v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
+ -v TORTURE_BOOTARGS="$TORTURE_BOOTARGS" \
+'BEGIN {
+ i = 0;
+}
+
+{
+ cf[i] = $1;
+ cpus[i] = $2;
+ i++;
+}
+
+# Dump out the scripting required to run one test batch.
+function dump(first, pastlast, batchnum)
+{
+ print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log";
+ print "needqemurun="
+ jn=1
+ for (j = first; j < pastlast; j++) {
+ builddir=KVM "/b1"
+ cpusr[jn] = cpus[j];
+ if (cfrep[cf[j]] == "") {
+ cfr[jn] = cf[j];
+ cfrep[cf[j]] = 1;
+ } else {
+ cfrep[cf[j]]++;
+ cfr[jn] = cf[j] "." cfrep[cf[j]];
+ }
+ if (cpusr[jn] > ncpus && ncpus != 0)
+ ovf = "-ovf";
+ else
+ ovf = "";
+ print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
+ print "rm -f " builddir ".*";
+ print "touch " builddir ".wait";
+ print "mkdir " builddir " > /dev/null 2>&1 || :";
+ print "mkdir " rd cfr[jn] " || :";
+ print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
+ print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
+ print "while test -f " builddir ".wait"
+ print "do"
+ print "\tsleep 1"
+ print "done"
+ print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date` | tee -a " rd "log";
+ jn++;
+ }
+ for (j = 1; j < jn; j++) {
+ builddir=KVM "/b" j
+ print "rm -f " builddir ".ready"
+ print "if test -f \"" rd cfr[j] "/builtkernel\""
+ print "then"
+ print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` | tee -a " rd "log";
+ print "\tneedqemurun=1"
+ print "fi"
+ }
+ njitter = 0;
+ split(jitter, ja);
+ if (ja[1] == -1 && ncpus == 0)
+ njitter = 1;
+ else if (ja[1] == -1)
+ njitter = ncpus;
+ else
+ njitter = ja[1];
+ if (TORTURE_BUILDONLY && njitter != 0) {
+ njitter = 0;
+ print "echo Build-only run, so suppressing jitter | tee -a " rd "log"
+ }
+ if (TORTURE_BUILDONLY) {
+ print "needqemurun="
+ }
+ print "if test -n \"$needqemurun\""
+ print "then"
+ print "\techo ---- Starting kernels. `date` | tee -a " rd "log";
+ for (j = 0; j < njitter; j++)
+ print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&"
+ print "\twait"
+ print "\techo ---- All kernel runs complete. `date` | tee -a " rd "log";
+ print "else"
+ print "\twait"
+ print "\techo ---- No kernel runs. `date` | tee -a " rd "log";
+ print "fi"
+ for (j = 1; j < jn; j++) {
+ builddir=KVM "/b" j
+ print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results: | tee -a " rd "log";
+ print "cat " rd cfr[j] "/kvm-test-1-run.sh.out | tee -a " rd "log";
+ }
+}
+
+END {
+ njobs = i;
+ nc = ncpus;
+ first = 0;
+ batchnum = 1;
+
+ # Each pass through the following loop considers one test.
+ for (i = 0; i < njobs; i++) {
+ if (ncpus == 0) {
+ # Sequential test specified, each test its own batch.
+ dump(i, i + 1, batchnum);
+ first = i;
+ batchnum++;
+ } else if (nc < cpus[i] && i != 0) {
+ # Out of CPUs, dump out a batch.
+ dump(first, i, batchnum);
+ first = i;
+ nc = ncpus;
+ batchnum++;
+ }
+ # Account for the CPUs needed by the current test.
+ nc -= cpus[i];
+ }
+ # Dump the last batch.
+ if (ncpus != 0)
+ dump(first, i, batchnum);
+}' >> $T/script
+
+cat << ___EOF___ >> $T/script
+echo
+echo
+echo " --- `date` Test summary:"
+echo Results directory: $resdir/$ds
+kvm-recheck.sh $resdir/$ds
+___EOF___
+
+if test "$dryrun" = script
+then
+ cat $T/script
+ exit 0
+elif test "$dryrun" = sched
+then
+ # Extract the test run schedule from the script.
+ egrep 'Start batch|Starting build\.' $T/script |
+ grep -v ">>" |
+ sed -e 's/:.*$//' -e 's/^echo //'
+ exit 0
+else
+ # Not a dryrun, so run the script.
+ sh $T/script
+fi
+
+# Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
new file mode 100755
index 000000000..24fe5f822
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+#
+# Check the build output from an rcutorture run for goodness.
+# The "file" is a pathname on the local system, and "title" is
+# a text string for error-message purposes.
+#
+# The file must contain kernel build output.
+#
+# Usage: parse-build.sh file title
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+F=$1
+title=$2
+T=${TMPDIR-/tmp}/parse-build.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+. functions.sh
+
+if grep -q CC < $F
+then
+ :
+else
+ print_bug $title no build
+ exit 1
+fi
+
+if grep -q "error:" < $F
+then
+ print_bug $title build errors:
+ grep "error:" < $F
+ exit 2
+fi
+
+grep warning: < $F > $T/warnings
+grep "include/linux/*rcu*\.h:" $T/warnings > $T/hwarnings
+grep "kernel/rcu/[^/]*:" $T/warnings > $T/cwarnings
+cat $T/hwarnings $T/cwarnings > $T/rcuwarnings
+if test -s $T/rcuwarnings
+then
+ print_warning $title build errors:
+ cat $T/rcuwarnings
+ exit 2
+fi
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
new file mode 100755
index 000000000..84933f6ae
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+#
+# Check the console output from an rcutorture run for oopses.
+# The "file" is a pathname on the local system, and "title" is
+# a text string for error-message purposes.
+#
+# Usage: parse-console.sh file title
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2011
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+T=${TMPDIR-/tmp}/parse-console.sh.$$
+file="$1"
+title="$2"
+
+trap 'rm -f $T.seq $T.diags' 0
+
+. functions.sh
+
+# Check for presence and readability of console output file
+if test -f "$file" -a -r "$file"
+then
+ :
+else
+ echo $title unreadable console output file: $file
+ exit 1
+fi
+if grep -Pq '\x00' < $file
+then
+ print_warning Console output contains nul bytes, old qemu still running?
+fi
+cat /dev/null > $file.diags
+
+# Check for proper termination, except that rcuperf runs don't indicate this.
+if test "$TORTURE_SUITE" != rcuperf
+then
+ # check for abject failure
+
+ if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
+ then
+ nerrs=`grep --binary-files=text '!!!' $file |
+ tail -1 |
+ awk '
+ {
+ for (i=NF-8;i<=NF;i++)
+ sum+=$i;
+ }
+ END { print sum }'`
+ print_bug $title FAILURE, $nerrs instances
+ exit
+ fi
+
+ grep --binary-files=text 'torture:.*ver:' $file |
+ egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
+ sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
+ awk '
+ BEGIN {
+ ver = 0;
+ badseq = 0;
+ }
+
+ {
+ if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
+ badseqno1 = ver;
+ badseqno2 = $5;
+ badseqnr = NR;
+ badseq = 1;
+ }
+ ver = $5
+ }
+
+ END {
+ if (badseq) {
+ if (badseqno1 == badseqno2 && badseqno2 == ver)
+ print "GP HANG at " ver " torture stat " badseqnr;
+ else
+ print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
+ }
+ }' > $T.seq
+
+ if grep -q SUCCESS $file
+ then
+ if test -s $T.seq
+ then
+ print_warning $title `cat $T.seq`
+ echo " " $file
+ exit 2
+ fi
+ else
+ if grep -q "_HOTPLUG:" $file
+ then
+ print_warning HOTPLUG FAILURES $title `cat $T.seq`
+ echo " " $file
+ exit 3
+ fi
+ echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
+ if test -s $T.seq
+ then
+ print_warning $title `cat $T.seq`
+ fi
+ exit 2
+ fi
+fi | tee -a $file.diags
+
+egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
+grep -v 'ODEBUG: ' |
+grep -v 'Warning: unable to open an initial console' > $T.diags
+if test -s $T.diags
+then
+ print_warning "Assertion failure in $file $title"
+ # cat $T.diags
+ summary=""
+ n_badness=`grep -c Badness $file`
+ if test "$n_badness" -ne 0
+ then
+ summary="$summary Badness: $n_badness"
+ fi
+ n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`
+ if test "$n_warn" -ne 0
+ then
+ summary="$summary Warnings: $n_warn"
+ fi
+ n_bugs=`egrep -c 'BUG|Oops:' $file`
+ if test "$n_bugs" -ne 0
+ then
+ summary="$summary Bugs: $n_bugs"
+ fi
+ n_calltrace=`grep -c 'Call Trace:' $file`
+ if test "$n_calltrace" -ne 0
+ then
+ summary="$summary Call Traces: $n_calltrace"
+ fi
+ n_lockdep=`grep -c =========== $file`
+ if test "$n_badness" -ne 0
+ then
+ summary="$summary lockdep: $n_badness"
+ fi
+ n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
+ if test "$n_stalls" -ne 0
+ then
+ summary="$summary Stalls: $n_stalls"
+ fi
+ n_starves=`grep -c 'rcu_.*kthread starved for' $file`
+ if test "$n_starves" -ne 0
+ then
+ summary="$summary Starves: $n_starves"
+ fi
+ print_warning Summary: $summary
+ cat $T.diags >> $file.diags
+fi
+for i in $file.*.diags
+do
+ if test -f "$i"
+ then
+ cat $i >> $file.diags
+ fi
+done
+if ! test -s $file.diags
+then
+ rm -f $file.diags
+fi
diff --git a/tools/testing/selftests/rcutorture/configs/lock/BUSTED b/tools/testing/selftests/rcutorture/configs/lock/BUSTED
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/BUSTED
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot b/tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot
new file mode 100644
index 000000000..6386c15e9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot
@@ -0,0 +1 @@
+locktorture.torture_type=lock_busted
diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFLIST b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
new file mode 100644
index 000000000..41bae5824
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
@@ -0,0 +1,7 @@
+LOCK01
+LOCK02
+LOCK03
+LOCK04
+LOCK05
+LOCK06
+LOCK07
diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFcommon b/tools/testing/selftests/rcutorture/configs/lock/CFcommon
new file mode 100644
index 000000000..e372dc269
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_LOCK_TORTURE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK01 b/tools/testing/selftests/rcutorture/configs/lock/LOCK01
new file mode 100644
index 000000000..a9625e3d6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK01
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK02 b/tools/testing/selftests/rcutorture/configs/lock/LOCK02
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK02
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot
new file mode 100644
index 000000000..5aa44b4f1
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK02.boot
@@ -0,0 +1 @@
+locktorture.torture_type=mutex_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK03 b/tools/testing/selftests/rcutorture/configs/lock/LOCK03
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK03
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot
new file mode 100644
index 000000000..a67bbe024
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK03.boot
@@ -0,0 +1 @@
+locktorture.torture_type=rwsem_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK04 b/tools/testing/selftests/rcutorture/configs/lock/LOCK04
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK04
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK04.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK04.boot
new file mode 100644
index 000000000..48c04fe47
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK04.boot
@@ -0,0 +1 @@
+locktorture.torture_type=rw_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK05 b/tools/testing/selftests/rcutorture/configs/lock/LOCK05
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK05
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot
new file mode 100644
index 000000000..8ac37307c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK05.boot
@@ -0,0 +1 @@
+locktorture.torture_type=rtmutex_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK06 b/tools/testing/selftests/rcutorture/configs/lock/LOCK06
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK06
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot
new file mode 100644
index 000000000..f92219cd4
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK06.boot
@@ -0,0 +1 @@
+locktorture.torture_type=percpu_rwsem_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07 b/tools/testing/selftests/rcutorture/configs/lock/LOCK07
new file mode 100644
index 000000000..1d1da1477
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot
new file mode 100644
index 000000000..97dadd1a9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK07.boot
@@ -0,0 +1 @@
+locktorture.torture_type=ww_mutex_lock
diff --git a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
new file mode 100644
index 000000000..80eb646e1
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+#
+# Kernel-version-dependent shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2014
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# locktorture_param_onoff bootparam-string config-file
+#
+# Adds onoff locktorture module parameters to kernels having it.
+locktorture_param_onoff () {
+ if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2"
+ then
+ echo CPU-hotplug kernel, adding locktorture onoff. 1>&2
+ echo locktorture.onoff_interval=3 locktorture.onoff_holdoff=30
+ fi
+}
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+ echo $1 `locktorture_param_onoff "$1" "$2"` \
+ locktorture.stat_interval=15 \
+ locktorture.shutdown_secs=$3 \
+ locktorture.verbose=1
+}
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED
new file mode 100644
index 000000000..48d8a245c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED
@@ -0,0 +1,7 @@
+CONFIG_RCU_TRACE=n
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot
new file mode 100644
index 000000000..be7728db4
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=busted
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
new file mode 100644
index 000000000..6a0b9f69f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -0,0 +1,18 @@
+TREE01
+TREE02
+TREE03
+TREE04
+TREE05
+TREE06
+TREE07
+TREE08
+TREE09
+SRCU-N
+SRCU-P
+SRCU-t
+SRCU-u
+TINY01
+TINY02
+TASKS01
+TASKS02
+TASKS03
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
new file mode 100644
index 000000000..d2d2a8613
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_RCU_TORTURE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
new file mode 100644
index 000000000..2da8b4958
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
@@ -0,0 +1,8 @@
+CONFIG_RCU_TRACE=n
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
new file mode 100644
index 000000000..238bfe3bd
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcu
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
new file mode 100644
index 000000000..ab7ccd382
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P
@@ -0,0 +1,12 @@
+CONFIG_RCU_TRACE=n
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
new file mode 100644
index 000000000..84a7d51b7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcud
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
new file mode 100644
index 000000000..6c78022c8
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
@@ -0,0 +1,10 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_SRCU=y
+CONFIG_RCU_TRACE=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+#CHECK#CONFIG_PREEMPT_COUNT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
new file mode 100644
index 000000000..238bfe3bd
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcu
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
new file mode 100644
index 000000000..c15ada821
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
@@ -0,0 +1,10 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_SRCU=y
+CONFIG_RCU_TRACE=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_PREEMPT_COUNT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
new file mode 100644
index 000000000..84a7d51b7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=srcud
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
new file mode 100644
index 000000000..bafe94cbd
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -0,0 +1,10 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
new file mode 100644
index 000000000..cd2a188ee
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
new file mode 100644
index 000000000..ad2be91e5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
@@ -0,0 +1,4 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
new file mode 100644
index 000000000..cd2a188ee
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
new file mode 100644
index 000000000..28568b72a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -0,0 +1,12 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=y
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot
new file mode 100644
index 000000000..838297c58
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks nohz_full=1
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
new file mode 100644
index 000000000..6db705e55
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
@@ -0,0 +1,13 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_TRACE=n
+#CHECK#CONFIG_RCU_STALL_COMMON=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_PREEMPT_COUNT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
new file mode 100644
index 000000000..d86742643
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
@@ -0,0 +1,14 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_RCU=y
+CONFIG_HZ_PERIODIC=y
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=n
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
new file mode 100644
index 000000000..6c1a292a6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot
@@ -0,0 +1,3 @@
+rcupdate.rcu_self_test=1
+rcupdate.rcu_self_test_bh=1
+rcutorture.torture_type=rcu_bh
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
new file mode 100644
index 000000000..b5b53973c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -0,0 +1,18 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_MAXSMP=y
+CONFIG_CPUMASK_OFFSTACK=y
+CONFIG_RCU_NOCB_CPU=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
new file mode 100644
index 000000000..9f3a4d28e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -0,0 +1,5 @@
+rcutorture.torture_type=rcu_bh maxcpus=8 nr_cpus=43
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
+rcu_nocbs=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
new file mode 100644
index 000000000..35e639e39
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -0,0 +1,23 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=3
+CONFIG_RCU_FANOUT_LEAF=3
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_RCU_EXPERT=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
new file mode 100644
index 000000000..2dc31b16e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
@@ -0,0 +1,18 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=16
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=y
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_RCU_BOOST=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
new file mode 100644
index 000000000..5c3213cc3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -0,0 +1,5 @@
+rcutorture.onoff_interval=200 rcutorture.onoff_holdoff=30
+rcutree.gp_preinit_delay=12
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
+rcutree.kthread_prio=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
new file mode 100644
index 000000000..24c9f6012
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -0,0 +1,20 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=y
+CONFIG_RCU_FAST_NO_HZ=y
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=4
+CONFIG_RCU_FANOUT_LEAF=3
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_EQS_DEBUG=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
new file mode 100644
index 000000000..e6071bb96
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=rcu_bh rcutree.rcu_fanout_leaf=4 nohz_full=1-7
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
new file mode 100644
index 000000000..2dde0d996
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -0,0 +1,20 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_FANOUT=6
+CONFIG_RCU_FANOUT_LEAF=6
+CONFIG_RCU_NOCB_CPU=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
new file mode 100644
index 000000000..c7fd050df
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot
@@ -0,0 +1,5 @@
+rcutorture.torture_type=sched
+rcupdate.rcu_self_test_sched=1
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
new file mode 100644
index 000000000..05a4eec3f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -0,0 +1,23 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=6
+CONFIG_RCU_FANOUT_LEAF=6
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_DEBUG_OBJECTS=y
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
new file mode 100644
index 000000000..ad18b52a2
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot
@@ -0,0 +1,7 @@
+rcupdate.rcu_self_test=1
+rcupdate.rcu_self_test_bh=1
+rcupdate.rcu_self_test_sched=1
+rcutree.rcu_fanout_exact=1
+rcutree.gp_preinit_delay=3
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
new file mode 100644
index 000000000..d7afb271a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -0,0 +1,17 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=16
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=y
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
new file mode 100644
index 000000000..d44609937
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
@@ -0,0 +1 @@
+nohz_full=2-9
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
new file mode 100644
index 000000000..fb1c763c1
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -0,0 +1,23 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=8
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=3
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_EQS_DEBUG=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
new file mode 100644
index 000000000..1bd8efc41
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
@@ -0,0 +1,5 @@
+rcutorture.torture_type=sched
+rcupdate.rcu_self_test=1
+rcupdate.rcu_self_test_sched=1
+rcutree.rcu_fanout_exact=1
+rcu_nocbs=0-7
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
new file mode 100644
index 000000000..6710e749d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -0,0 +1,18 @@
+CONFIG_SMP=n
+CONFIG_NR_CPUS=1
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_TRACE=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+#CHECK#CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
new file mode 100644
index 000000000..7bab82463
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+#
+# Kernel-version-dependent shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2013
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# rcutorture_param_n_barrier_cbs bootparam-string
+#
+# Adds n_barrier_cbs rcutorture module parameter to kernels having it.
+rcutorture_param_n_barrier_cbs () {
+ if echo $1 | grep -q "rcutorture\.n_barrier_cbs"
+ then
+ :
+ else
+ echo rcutorture.n_barrier_cbs=4
+ fi
+}
+
+# rcutorture_param_onoff bootparam-string config-file
+#
+# Adds onoff rcutorture module parameters to kernels having it.
+rcutorture_param_onoff () {
+ if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2"
+ then
+ echo CPU-hotplug kernel, adding rcutorture onoff. 1>&2
+ echo rcutorture.onoff_interval=1000 rcutorture.onoff_holdoff=30
+ fi
+}
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+ echo $1 `rcutorture_param_onoff "$1" "$2"` \
+ `rcutorture_param_n_barrier_cbs "$1"` \
+ rcutorture.stat_interval=15 \
+ rcutorture.shutdown_secs=$3 \
+ rcutorture.test_no_idle_hz=1 \
+ rcutorture.verbose=1
+}
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
new file mode 100644
index 000000000..c9f56cf20
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
@@ -0,0 +1 @@
+TREE
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
new file mode 100644
index 000000000..a09816b8c
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_RCU_PERF_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
new file mode 100644
index 000000000..fb05ef527
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
@@ -0,0 +1,16 @@
+CONFIG_SMP=n
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TINY_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
new file mode 100644
index 000000000..721cfda76
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
@@ -0,0 +1,19 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
new file mode 100644
index 000000000..7629f5dd7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
@@ -0,0 +1,22 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=54
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_HOTPLUG_CPU=n
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_FANOUT=3
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
new file mode 100644
index 000000000..d36b8fd6f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+# Torture-suite-dependent shell functions for the rest of the scripts.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, you can access it online at
+# http://www.gnu.org/licenses/gpl-2.0.html.
+#
+# Copyright (C) IBM Corporation, 2015
+#
+# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+ echo $1 rcuperf.shutdown=1 \
+ rcuperf.verbose=1
+}
diff --git a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
new file mode 100644
index 000000000..a75b16991
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
@@ -0,0 +1,38 @@
+This document gives a brief rationale for the TINY_RCU test cases.
+
+
+Kconfig Parameters:
+
+CONFIG_DEBUG_LOCK_ALLOC -- Do all three and none of the three.
+CONFIG_PREEMPT_COUNT
+CONFIG_RCU_TRACE
+
+The theory here is that randconfig testing will hit the other six possible
+combinations of these parameters.
+
+
+Kconfig Parameters Ignored:
+
+CONFIG_DEBUG_OBJECTS_RCU_HEAD
+CONFIG_PROVE_RCU
+
+ In common code tested by TREE_RCU test cases.
+
+CONFIG_RCU_NOCB_CPU
+
+ Meaningless for TINY_RCU.
+
+CONFIG_RCU_STALL_COMMON
+CONFIG_RCU_TORTURE_TEST
+
+ Redundant with CONFIG_RCU_TRACE.
+
+CONFIG_HOTPLUG_CPU
+CONFIG_PREEMPT
+CONFIG_PREEMPT_RCU
+CONFIG_SMP
+CONFIG_TINY_RCU
+CONFIG_PREEMPT_RCU
+CONFIG_TREE_RCU
+
+ All forced by CONFIG_TINY_RCU.
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
new file mode 100644
index 000000000..af6fca036
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -0,0 +1,81 @@
+This document gives a brief rationale for the TREE_RCU-related test
+cases, a group that includes PREEMPT_RCU.
+
+
+Kconfig Parameters:
+
+CONFIG_DEBUG_LOCK_ALLOC -- Do three, covering CONFIG_PROVE_LOCKING & not.
+CONFIG_DEBUG_OBJECTS_RCU_HEAD -- Do one.
+CONFIG_HOTPLUG_CPU -- Do half. (Every second.)
+CONFIG_HZ_PERIODIC -- Do one.
+CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.)
+CONFIG_NO_HZ_FULL -- Do two, one with partial CPU enablement.
+CONFIG_PREEMPT -- Do half. (First three and #8.)
+CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not.
+CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
+CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
+CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
+CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
+CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs.
+CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with
+ rcu_nocbs=0, and one with all rcu_nocbs CPUs.
+CONFIG_RCU_TRACE -- Do half.
+CONFIG_SMP -- Need one !SMP for PREEMPT_RCU.
+CONFIG_RCU_EXPERT=n -- Do a few, but these have to be vanilla configurations.
+CONFIG_RCU_EQS_DEBUG -- Do at least one for CONFIG_NO_HZ_FULL and not.
+
+RCU-bh: Do one with PREEMPT and one with !PREEMPT.
+RCU-sched: Do one with PREEMPT but not BOOST.
+
+
+Boot parameters:
+
+nohz_full - do at least one.
+maxcpu -- do at least one.
+rcupdate.rcu_self_test_bh -- Do at least one each, offloaded and not.
+rcupdate.rcu_self_test_sched -- Do at least one each, offloaded and not.
+rcupdate.rcu_self_test -- Do at least one each, offloaded and not.
+rcutree.rcu_fanout_exact -- Do at least one.
+
+
+Kconfig Parameters Ignored:
+
+CONFIG_64BIT
+
+ Used only to check CONFIG_RCU_FANOUT value, inspection suffices.
+
+CONFIG_PREEMPT_COUNT
+CONFIG_PREEMPT_RCU
+
+ Redundant with CONFIG_PREEMPT, ignore.
+
+CONFIG_RCU_BOOST_DELAY
+
+ Inspection suffices, ignore.
+
+CONFIG_RCU_CPU_STALL_TIMEOUT
+
+ Inspection suffices, ignore.
+
+CONFIG_RCU_STALL_COMMON
+
+ Implied by TREE_RCU and PREEMPT_RCU.
+
+CONFIG_RCU_TORTURE_TEST
+CONFIG_RCU_TORTURE_TEST_RUNNABLE
+
+ Always used in KVM testing.
+
+CONFIG_PREEMPT_RCU
+CONFIG_TREE_RCU
+CONFIG_TINY_RCU
+CONFIG_TASKS_RCU
+
+ These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP.
+
+CONFIG_SRCU
+
+ Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable.
+
+
+boot parameters ignored: TBD
diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt
new file mode 100644
index 000000000..833f826d6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/doc/initrd.txt
@@ -0,0 +1,113 @@
+This document describes one way to create the initrd directory hierarchy
+in order to allow an initrd to be built into your kernel. The trick
+here is to steal the initrd file used on your Linux laptop, Ubuntu in
+this case. There are probably much better ways of doing this.
+
+That said, here are the commands:
+
+------------------------------------------------------------------------
+cd tools/testing/selftests/rcutorture
+zcat /initrd.img > /tmp/initrd.img.zcat
+mkdir initrd
+cd initrd
+cpio -id < /tmp/initrd.img.zcat
+------------------------------------------------------------------------
+
+Another way to create an initramfs image is using "dracut"[1], which is
+available on many distros, however the initramfs dracut generates is a cpio
+archive with another cpio archive in it, so an extra step is needed to create
+the initrd directory hierarchy.
+
+Here are the commands to create a initrd directory for rcutorture using
+dracut:
+
+------------------------------------------------------------------------
+dracut --no-hostonly --no-hostonly-cmdline --module "base bash shutdown" /tmp/initramfs.img
+cd tools/testing/selftests/rcutorture
+mkdir initrd
+cd initrd
+/usr/lib/dracut/skipcpio /tmp/initramfs.img | zcat | cpio -id < /tmp/initramfs.img
+------------------------------------------------------------------------
+
+Interestingly enough, if you are running rcutorture, you don't really
+need userspace in many cases. Running without userspace has the
+advantage of allowing you to test your kernel independently of the
+distro in place, the root-filesystem layout, and so on. To make this
+happen, put the following script in the initrd's tree's "/init" file,
+with 0755 mode.
+
+------------------------------------------------------------------------
+#!/bin/sh
+
+[ -d /dev ] || mkdir -m 0755 /dev
+[ -d /root ] || mkdir -m 0700 /root
+[ -d /sys ] || mkdir /sys
+[ -d /proc ] || mkdir /proc
+[ -d /tmp ] || mkdir /tmp
+mkdir -p /var/lock
+mount -t sysfs -o nodev,noexec,nosuid sysfs /sys
+mount -t proc -o nodev,noexec,nosuid proc /proc
+# Some things don't work properly without /etc/mtab.
+ln -sf /proc/mounts /etc/mtab
+
+# Note that this only becomes /dev on the real filesystem if udev's scripts
+# are used; which they will be, but it's worth pointing out
+if ! mount -t devtmpfs -o mode=0755 udev /dev; then
+ echo "W: devtmpfs not available, falling back to tmpfs for /dev"
+ mount -t tmpfs -o mode=0755 udev /dev
+ [ -e /dev/console ] || mknod --mode=600 /dev/console c 5 1
+ [ -e /dev/kmsg ] || mknod --mode=644 /dev/kmsg c 1 11
+ [ -e /dev/null ] || mknod --mode=666 /dev/null c 1 3
+fi
+
+mkdir /dev/pts
+mount -t devpts -o noexec,nosuid,gid=5,mode=0620 devpts /dev/pts || true
+mount -t tmpfs -o "nosuid,size=20%,mode=0755" tmpfs /run
+mkdir /run/initramfs
+# compatibility symlink for the pre-oneiric locations
+ln -s /run/initramfs /dev/.initramfs
+
+# Export relevant variables
+export ROOT=
+export ROOTDELAY=
+export ROOTFLAGS=
+export ROOTFSTYPE=
+export IP=
+export BOOT=
+export BOOTIF=
+export UBIMTD=
+export break=
+export init=/sbin/init
+export quiet=n
+export readonly=y
+export rootmnt=/root
+export debug=
+export panic=
+export blacklist=
+export resume=
+export resume_offset=
+export recovery=
+
+for i in /sys/devices/system/cpu/cpu*/online
+do
+ case $i in
+ '/sys/devices/system/cpu/cpu0/online')
+ ;;
+ '/sys/devices/system/cpu/cpu*/online')
+ ;;
+ *)
+ echo 1 > $i
+ ;;
+ esac
+done
+
+while :
+do
+ sleep 10
+done
+------------------------------------------------------------------------
+
+References:
+[1]: https://dracut.wiki.kernel.org/index.php/Main_Page
+[2]: http://blog.elastocloud.org/2015/06/rapid-linux-kernel-devtest-with-qemu.html
+[3]: https://www.centos.org/forums/viewtopic.php?t=51621
diff --git a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
new file mode 100644
index 000000000..449cf579d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
@@ -0,0 +1,42 @@
+This document describes one way to create the rcu-test-image file
+that contains the filesystem used by the guest-OS kernel. There are
+probably much better ways of doing this, and this filesystem could no
+doubt be smaller. It is probably also possible to simply download
+an appropriate image from any number of places.
+
+That said, here are the commands:
+
+------------------------------------------------------------------------
+dd if=/dev/zero of=rcu-test-image bs=400M count=1
+mkfs.ext3 ./rcu-test-image
+sudo mount -o loop ./rcu-test-image /mnt
+
+# Replace "precise" below with your favorite Ubuntu release.
+# Empirical evidence says this image will work for 64-bit, but...
+# Note that debootstrap does take a few minutes to run. Or longer.
+sudo debootstrap --verbose --arch i386 precise /mnt http://archive.ubuntu.com/ubuntu
+cat << '___EOF___' | sudo dd of=/mnt/etc/fstab
+# UNCONFIGURED FSTAB FOR BASE SYSTEM
+#
+/dev/vda / ext3 defaults 1 1
+dev /dev tmpfs rw 0 0
+tmpfs /dev/shm tmpfs defaults 0 0
+devpts /dev/pts devpts gid=5,mode=620 0 0
+sysfs /sys sysfs defaults 0 0
+proc /proc proc defaults 0 0
+___EOF___
+sudo umount /mnt
+------------------------------------------------------------------------
+
+
+References:
+
+ http://sripathikodi.blogspot.com/2010/02/creating-kvm-bootable-fedora-system.html
+ https://help.ubuntu.com/community/KVM/CreateGuests
+ https://help.ubuntu.com/community/JeOSVMBuilder
+ http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
+ http://www.moe.co.uk/2011/01/07/pci_add_option_rom-failed-to-find-romfile-pxe-rtl8139-bin/ -- "apt-get install kvm-pxe"
+ http://www.landley.net/writing/rootfs-howto.html
+ http://en.wikipedia.org/wiki/Initrd
+ http://en.wikipedia.org/wiki/Cpio
+ http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
new file mode 100644
index 000000000..712a3d41a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
@@ -0,0 +1 @@
+srcu.c
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
new file mode 100644
index 000000000..4bed0b678
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+all: srcu.c store_buffering
+
+LINUX_SOURCE = ../../../../../..
+
+modified_srcu_input = $(LINUX_SOURCE)/include/linux/srcu.h \
+ $(LINUX_SOURCE)/kernel/rcu/srcu.c
+
+modified_srcu_output = include/linux/srcu.h srcu.c
+
+include/linux/srcu.h: srcu.c
+
+srcu.c: modify_srcu.awk Makefile $(modified_srcu_input)
+ awk -f modify_srcu.awk $(modified_srcu_input) $(modified_srcu_output)
+
+store_buffering:
+ @cd tests/store_buffering; make
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
new file mode 100644
index 000000000..1d016e669
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
@@ -0,0 +1 @@
+srcu.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
new file mode 100644
index 000000000..f2860dd1b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
@@ -0,0 +1 @@
+#include <LINUX_SOURCE/linux/kconfig.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
new file mode 100644
index 000000000..891ad13e9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
@@ -0,0 +1,156 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This header has been modifies to remove definitions of types that
+ * are defined in standard userspace headers or are problematic for some
+ * other reason.
+ */
+
+#ifndef _LINUX_TYPES_H
+#define _LINUX_TYPES_H
+
+#define __EXPORTED_HEADERS__
+#include <uapi/linux/types.h>
+
+#ifndef __ASSEMBLY__
+
+#define DECLARE_BITMAP(name, bits) \
+ unsigned long name[BITS_TO_LONGS(bits)]
+
+typedef __u32 __kernel_dev_t;
+
+/* bsd */
+typedef unsigned char u_char;
+typedef unsigned short u_short;
+typedef unsigned int u_int;
+typedef unsigned long u_long;
+
+/* sysv */
+typedef unsigned char unchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+#ifndef __BIT_TYPES_DEFINED__
+#define __BIT_TYPES_DEFINED__
+
+typedef __u8 u_int8_t;
+typedef __s8 int8_t;
+typedef __u16 u_int16_t;
+typedef __s16 int16_t;
+typedef __u32 u_int32_t;
+typedef __s32 int32_t;
+
+#endif /* !(__BIT_TYPES_DEFINED__) */
+
+typedef __u8 uint8_t;
+typedef __u16 uint16_t;
+typedef __u32 uint32_t;
+
+/* this is a special 64bit data type that is 8-byte aligned */
+#define aligned_u64 __u64 __attribute__((aligned(8)))
+#define aligned_be64 __be64 __attribute__((aligned(8)))
+#define aligned_le64 __le64 __attribute__((aligned(8)))
+
+/**
+ * The type used for indexing onto a disc or disc partition.
+ *
+ * Linux always considers sectors to be 512 bytes long independently
+ * of the devices real block size.
+ *
+ * blkcnt_t is the type of the inode's block count.
+ */
+#ifdef CONFIG_LBDAF
+typedef u64 sector_t;
+#else
+typedef unsigned long sector_t;
+#endif
+
+/*
+ * The type of an index into the pagecache.
+ */
+#define pgoff_t unsigned long
+
+/*
+ * A dma_addr_t can hold any valid DMA address, i.e., any address returned
+ * by the DMA API.
+ *
+ * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32
+ * bits wide. Bus addresses, e.g., PCI BARs, may be wider than 32 bits,
+ * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses,
+ * so they don't care about the size of the actual bus addresses.
+ */
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+typedef u64 dma_addr_t;
+#else
+typedef u32 dma_addr_t;
+#endif
+
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+typedef u64 phys_addr_t;
+#else
+typedef u32 phys_addr_t;
+#endif
+
+typedef phys_addr_t resource_size_t;
+
+/*
+ * This type is the placeholder for a hardware interrupt number. It has to be
+ * big enough to enclose whatever representation is used by a given platform.
+ */
+typedef unsigned long irq_hw_number_t;
+
+typedef struct {
+ int counter;
+} atomic_t;
+
+#ifdef CONFIG_64BIT
+typedef struct {
+ long counter;
+} atomic64_t;
+#endif
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+struct hlist_head {
+ struct hlist_node *first;
+};
+
+struct hlist_node {
+ struct hlist_node *next, **pprev;
+};
+
+/**
+ * struct callback_head - callback structure for use with RCU and task_work
+ * @next: next update requests in a list
+ * @func: actual update function to call after the grace period.
+ *
+ * The struct is aligned to size of pointer. On most architectures it happens
+ * naturally due ABI requirements, but some architectures (like CRIS) have
+ * weird ABI and we need to ask it explicitly.
+ *
+ * The alignment is required to guarantee that bits 0 and 1 of @next will be
+ * clear under normal conditions -- as long as we use call_rcu(),
+ * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback.
+ *
+ * This guarantee is important for few reasons:
+ * - future call_rcu_lazy() will make use of lower bits in the pointer;
+ * - the structure shares storage spacer in struct page with @compound_head,
+ * which encode PageTail() in bit 0. The guarantee is needed to avoid
+ * false-positive PageTail().
+ */
+struct callback_head {
+ struct callback_head *next;
+ void (*func)(struct callback_head *head);
+} __attribute__((aligned(sizeof(void *))));
+#define rcu_head callback_head
+
+typedef void (*rcu_callback_t)(struct rcu_head *head);
+typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func);
+
+/* clocksource cycle base type */
+typedef u64 cycle_t;
+
+#endif /* __ASSEMBLY__ */
+#endif /* _LINUX_TYPES_H */
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
new file mode 100755
index 000000000..e05182d3e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
@@ -0,0 +1,376 @@
+#!/usr/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+
+# Modify SRCU for formal verification. The first argument should be srcu.h and
+# the second should be srcu.c. Outputs modified srcu.h and srcu.c into the
+# current directory.
+
+BEGIN {
+ if (ARGC != 5) {
+ print "Usange: input.h input.c output.h output.c" > "/dev/stderr";
+ exit 1;
+ }
+ h_output = ARGV[3];
+ c_output = ARGV[4];
+ ARGC = 3;
+
+ # Tokenize using FS and not RS as FS supports regular expressions. Each
+ # record is one line of source, except that backslashed lines are
+ # combined. Comments are treated as field separators, as are quotes.
+ quote_regexp="\"([^\\\\\"]|\\\\.)*\"";
+ comment_regexp="\\/\\*([^*]|\\*+[^*/])*\\*\\/|\\/\\/.*(\n|$)";
+ FS="([ \\\\\t\n\v\f;,.=(){}+*/<>&|^-]|\\[|\\]|" comment_regexp "|" quote_regexp ")+";
+
+ inside_srcu_struct = 0;
+ inside_srcu_init_def = 0;
+ srcu_init_param_name = "";
+ in_macro = 0;
+ brace_nesting = 0;
+ paren_nesting = 0;
+
+ # Allow the manipulation of the last field separator after has been
+ # seen.
+ last_fs = "";
+ # Whether the last field separator was intended to be output.
+ last_fs_print = 0;
+
+ # rcu_batches stores the initialization for each instance of struct
+ # rcu_batch
+
+ in_comment = 0;
+
+ outputfile = "";
+}
+
+{
+ prev_outputfile = outputfile;
+ if (FILENAME ~ /\.h$/) {
+ outputfile = h_output;
+ if (FNR != NR) {
+ print "Incorrect file order" > "/dev/stderr";
+ exit 1;
+ }
+ }
+ else
+ outputfile = c_output;
+
+ if (prev_outputfile && outputfile != prev_outputfile) {
+ new_outputfile = outputfile;
+ outputfile = prev_outputfile;
+ update_fieldsep("", 0);
+ outputfile = new_outputfile;
+ }
+}
+
+# Combine the next line into $0.
+function combine_line() {
+ ret = getline next_line;
+ if (ret == 0) {
+ # Don't allow two consecutive getlines at the end of the file
+ if (eof_found) {
+ print "Error: expected more input." > "/dev/stderr";
+ exit 1;
+ } else {
+ eof_found = 1;
+ }
+ } else if (ret == -1) {
+ print "Error reading next line of file" FILENAME > "/dev/stderr";
+ exit 1;
+ }
+ $0 = $0 "\n" next_line;
+}
+
+# Combine backslashed lines and multiline comments.
+function combine_backslashes() {
+ while (/\\$|\/\*([^*]|\*+[^*\/])*\**$/) {
+ combine_line();
+ }
+}
+
+function read_line() {
+ combine_line();
+ combine_backslashes();
+}
+
+# Print out field separators and update variables that depend on them. Only
+# print if p is true. Call with sep="" and p=0 to print out the last field
+# separator.
+function update_fieldsep(sep, p) {
+ # Count braces
+ sep_tmp = sep;
+ gsub(quote_regexp "|" comment_regexp, "", sep_tmp);
+ while (1)
+ {
+ if (sub("[^{}()]*\\{", "", sep_tmp)) {
+ brace_nesting++;
+ continue;
+ }
+ if (sub("[^{}()]*\\}", "", sep_tmp)) {
+ brace_nesting--;
+ if (brace_nesting < 0) {
+ print "Unbalanced braces!" > "/dev/stderr";
+ exit 1;
+ }
+ continue;
+ }
+ if (sub("[^{}()]*\\(", "", sep_tmp)) {
+ paren_nesting++;
+ continue;
+ }
+ if (sub("[^{}()]*\\)", "", sep_tmp)) {
+ paren_nesting--;
+ if (paren_nesting < 0) {
+ print "Unbalanced parenthesis!" > "/dev/stderr";
+ exit 1;
+ }
+ continue;
+ }
+
+ break;
+ }
+
+ if (last_fs_print)
+ printf("%s", last_fs) > outputfile;
+ last_fs = sep;
+ last_fs_print = p;
+}
+
+# Shifts the fields down by n positions. Calls next if there are no more. If p
+# is true then print out field separators.
+function shift_fields(n, p) {
+ do {
+ if (match($0, FS) > 0) {
+ update_fieldsep(substr($0, RSTART, RLENGTH), p);
+ if (RSTART + RLENGTH <= length())
+ $0 = substr($0, RSTART + RLENGTH);
+ else
+ $0 = "";
+ } else {
+ update_fieldsep("", 0);
+ print "" > outputfile;
+ next;
+ }
+ } while (--n > 0);
+}
+
+# Shifts and prints the first n fields.
+function print_fields(n) {
+ do {
+ update_fieldsep("", 0);
+ printf("%s", $1) > outputfile;
+ shift_fields(1, 1);
+ } while (--n > 0);
+}
+
+{
+ combine_backslashes();
+}
+
+# Print leading FS
+{
+ if (match($0, "^(" FS ")+") > 0) {
+ update_fieldsep(substr($0, RSTART, RLENGTH), 1);
+ if (RSTART + RLENGTH <= length())
+ $0 = substr($0, RSTART + RLENGTH);
+ else
+ $0 = "";
+ }
+}
+
+# Parse the line.
+{
+ while (NF > 0) {
+ if ($1 == "struct" && NF < 3) {
+ read_line();
+ continue;
+ }
+
+ if (FILENAME ~ /\.h$/ && !inside_srcu_struct &&
+ brace_nesting == 0 && paren_nesting == 0 &&
+ $1 == "struct" && $2 == "srcu_struct" &&
+ $0 ~ "^struct(" FS ")+srcu_struct(" FS ")+\\{") {
+ inside_srcu_struct = 1;
+ print_fields(2);
+ continue;
+ }
+ if (inside_srcu_struct && brace_nesting == 0 &&
+ paren_nesting == 0) {
+ inside_srcu_struct = 0;
+ update_fieldsep("", 0);
+ for (name in rcu_batches)
+ print "extern struct rcu_batch " name ";" > outputfile;
+ }
+
+ if (inside_srcu_struct && $1 == "struct" && $2 == "rcu_batch") {
+ # Move rcu_batches outside of the struct.
+ rcu_batches[$3] = "";
+ shift_fields(3, 1);
+ sub(/;[[:space:]]*$/, "", last_fs);
+ continue;
+ }
+
+ if (FILENAME ~ /\.h$/ && !inside_srcu_init_def &&
+ $1 == "#define" && $2 == "__SRCU_STRUCT_INIT") {
+ inside_srcu_init_def = 1;
+ srcu_init_param_name = $3;
+ in_macro = 1;
+ print_fields(3);
+ continue;
+ }
+ if (inside_srcu_init_def && brace_nesting == 0 &&
+ paren_nesting == 0) {
+ inside_srcu_init_def = 0;
+ in_macro = 0;
+ continue;
+ }
+
+ if (inside_srcu_init_def && brace_nesting == 1 &&
+ paren_nesting == 0 && last_fs ~ /\.[[:space:]]*$/ &&
+ $1 ~ /^[[:alnum:]_]+$/) {
+ name = $1;
+ if (name in rcu_batches) {
+ # Remove the dot.
+ sub(/\.[[:space:]]*$/, "", last_fs);
+
+ old_record = $0;
+ do
+ shift_fields(1, 0);
+ while (last_fs !~ /,/ || paren_nesting > 0);
+ end_loc = length(old_record) - length($0);
+ end_loc += index(last_fs, ",") - length(last_fs);
+
+ last_fs = substr(last_fs, index(last_fs, ",") + 1);
+ last_fs_print = 1;
+
+ match(old_record, "^"name"("FS")+=");
+ start_loc = RSTART + RLENGTH;
+
+ len = end_loc - start_loc;
+ initializer = substr(old_record, start_loc, len);
+ gsub(srcu_init_param_name "\\.", "", initializer);
+ rcu_batches[name] = initializer;
+ continue;
+ }
+ }
+
+ # Don't include a nonexistent file
+ if (!in_macro && $1 == "#include" && /^#include[[:space:]]+"rcu\.h"/) {
+ update_fieldsep("", 0);
+ next;
+ }
+
+ # Ignore most preprocessor stuff.
+ if (!in_macro && $1 ~ /#/) {
+ break;
+ }
+
+ if (brace_nesting > 0 && $1 ~ "^[[:alnum:]_]+$" && NF < 2) {
+ read_line();
+ continue;
+ }
+ if (brace_nesting > 0 &&
+ $0 ~ "^[[:alnum:]_]+[[:space:]]*(\\.|->)[[:space:]]*[[:alnum:]_]+" &&
+ $2 in rcu_batches) {
+ # Make uses of rcu_batches global. Somewhat unreliable.
+ shift_fields(1, 0);
+ print_fields(1);
+ continue;
+ }
+
+ if ($1 == "static" && NF < 3) {
+ read_line();
+ continue;
+ }
+ if ($1 == "static" && ($2 == "bool" && $3 == "try_check_zero" ||
+ $2 == "void" && $3 == "srcu_flip")) {
+ shift_fields(1, 1);
+ print_fields(2);
+ continue;
+ }
+
+ # Distinguish between read-side and write-side memory barriers.
+ if ($1 == "smp_mb" && NF < 2) {
+ read_line();
+ continue;
+ }
+ if (match($0, /^smp_mb[[:space:]();\/*]*[[:alnum:]]/)) {
+ barrier_letter = substr($0, RLENGTH, 1);
+ if (barrier_letter ~ /A|D/)
+ new_barrier_name = "sync_smp_mb";
+ else if (barrier_letter ~ /B|C/)
+ new_barrier_name = "rs_smp_mb";
+ else {
+ print "Unrecognized memory barrier." > "/dev/null";
+ exit 1;
+ }
+
+ shift_fields(1, 1);
+ printf("%s", new_barrier_name) > outputfile;
+ continue;
+ }
+
+ # Skip definition of rcu_synchronize, since it is already
+ # defined in misc.h. Only present in old versions of srcu.
+ if (brace_nesting == 0 && paren_nesting == 0 &&
+ $1 == "struct" && $2 == "rcu_synchronize" &&
+ $0 ~ "^struct(" FS ")+rcu_synchronize(" FS ")+\\{") {
+ shift_fields(2, 0);
+ while (brace_nesting) {
+ if (NF < 2)
+ read_line();
+ shift_fields(1, 0);
+ }
+ }
+
+ # Skip definition of wakeme_after_rcu for the same reason
+ if (brace_nesting == 0 && $1 == "static" && $2 == "void" &&
+ $3 == "wakeme_after_rcu") {
+ while (NF < 5)
+ read_line();
+ shift_fields(3, 0);
+ do {
+ while (NF < 3)
+ read_line();
+ shift_fields(1, 0);
+ } while (paren_nesting || brace_nesting);
+ }
+
+ if ($1 ~ /^(unsigned|long)$/ && NF < 3) {
+ read_line();
+ continue;
+ }
+
+ # Give srcu_batches_completed the correct type for old SRCU.
+ if (brace_nesting == 0 && $1 == "long" &&
+ $2 == "srcu_batches_completed") {
+ update_fieldsep("", 0);
+ printf("unsigned ") > outputfile;
+ print_fields(2);
+ continue;
+ }
+ if (brace_nesting == 0 && $1 == "unsigned" && $2 == "long" &&
+ $3 == "srcu_batches_completed") {
+ print_fields(3);
+ continue;
+ }
+
+ # Just print out the input code by default.
+ print_fields(1);
+ }
+ update_fieldsep("", 0);
+ print > outputfile;
+ next;
+}
+
+END {
+ update_fieldsep("", 0);
+
+ if (brace_nesting != 0) {
+ print "Unbalanced braces!" > "/dev/stderr";
+ exit 1;
+ }
+
+ # Define the rcu_batches
+ for (name in rcu_batches)
+ print "struct rcu_batch " name " = " rcu_batches[name] ";" > c_output;
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
new file mode 100644
index 000000000..570a49d9d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASSUME_H
+#define ASSUME_H
+
+/* Provide an assumption macro that can be disabled for gcc. */
+#ifdef RUN
+#define assume(x) \
+ do { \
+ /* Evaluate x to suppress warnings. */ \
+ (void) (x); \
+ } while (0)
+
+#else
+#define assume(x) __CPROVER_assume(x)
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
new file mode 100644
index 000000000..3f95a768a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BARRIERS_H
+#define BARRIERS_H
+
+#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#ifdef RUN
+#define smp_mb() __sync_synchronize()
+#define smp_mb__after_unlock_lock() __sync_synchronize()
+#else
+/*
+ * Copied from CBMC's implementation of __sync_synchronize(), which
+ * seems to be disabled by default.
+ */
+#define smp_mb() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
+ "WWcumul", "RRcumul", "RWcumul", "WRcumul")
+#define smp_mb__after_unlock_lock() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
+ "WWcumul", "RRcumul", "RWcumul", "WRcumul")
+#endif
+
+/*
+ * Allow memory barriers to be disabled in either the read or write side
+ * of SRCU individually.
+ */
+
+#ifndef NO_SYNC_SMP_MB
+#define sync_smp_mb() smp_mb()
+#else
+#define sync_smp_mb() do {} while (0)
+#endif
+
+#ifndef NO_READ_SIDE_SMP_MB
+#define rs_smp_mb() smp_mb()
+#else
+#define rs_smp_mb() do {} while (0)
+#endif
+
+#define READ_ONCE(x) (*(volatile typeof(x) *) &(x))
+#define WRITE_ONCE(x) ((*(volatile typeof(x) *) &(x)) = (val))
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
new file mode 100644
index 000000000..5e7912c6a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BUG_ON_H
+#define BUG_ON_H
+
+#include <assert.h>
+
+#define BUG() assert(0)
+#define BUG_ON(x) assert(!(x))
+
+/* Does it make sense to treat warnings as errors? */
+#define WARN() BUG()
+#define WARN_ON(x) (BUG_ON(x), false)
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
new file mode 100644
index 000000000..e67ee5b3d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+/* Include all source files. */
+
+#include "include_srcu.c"
+
+#include "preempt.c"
+#include "misc.c"
+
+/* Used by test.c files */
+#include <pthread.h>
+#include <stdlib.h>
+#include <linux/srcu.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
new file mode 100644
index 000000000..283d71033
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* "Cheater" definitions based on restricted Kconfig choices. */
+
+#undef CONFIG_TINY_RCU
+#undef __CHECKER__
+#undef CONFIG_DEBUG_LOCK_ALLOC
+#undef CONFIG_DEBUG_OBJECTS_RCU_HEAD
+#undef CONFIG_HOTPLUG_CPU
+#undef CONFIG_MODULES
+#undef CONFIG_NO_HZ_FULL_SYSIDLE
+#undef CONFIG_PREEMPT_COUNT
+#undef CONFIG_PREEMPT_RCU
+#undef CONFIG_PROVE_RCU
+#undef CONFIG_RCU_NOCB_CPU
+#undef CONFIG_RCU_NOCB_CPU_ALL
+#undef CONFIG_RCU_STALL_COMMON
+#undef CONFIG_RCU_TRACE
+#undef CONFIG_RCU_USER_QS
+#undef CONFIG_TASKS_RCU
+#define CONFIG_TREE_RCU
+
+#define CONFIG_GENERIC_ATOMIC64
+
+#if NR_CPUS > 1
+#define CONFIG_SMP
+#else
+#undef CONFIG_SMP
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
new file mode 100644
index 000000000..e5202d4cf
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "int_typedefs.h"
+
+#include "barriers.h"
+#include "bug_on.h"
+#include "locks.h"
+#include "misc.h"
+#include "preempt.h"
+#include "percpu.h"
+#include "workqueues.h"
+
+#ifdef USE_SIMPLE_SYNC_SRCU
+#define synchronize_srcu(sp) synchronize_srcu_original(sp)
+#endif
+
+#include <srcu.c>
+
+#ifdef USE_SIMPLE_SYNC_SRCU
+#undef synchronize_srcu
+
+#include "simple_sync_srcu.c"
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
new file mode 100644
index 000000000..0dd27aa51
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef INT_TYPEDEFS_H
+#define INT_TYPEDEFS_H
+
+#include <inttypes.h>
+
+typedef int8_t s8;
+typedef uint8_t u8;
+typedef int16_t s16;
+typedef uint16_t u16;
+typedef int32_t s32;
+typedef uint32_t u32;
+typedef int64_t s64;
+typedef uint64_t u64;
+
+typedef int8_t __s8;
+typedef uint8_t __u8;
+typedef int16_t __s16;
+typedef uint16_t __u16;
+typedef int32_t __s32;
+typedef uint32_t __u32;
+typedef int64_t __s64;
+typedef uint64_t __u64;
+
+#define S8_C(x) INT8_C(x)
+#define U8_C(x) UINT8_C(x)
+#define S16_C(x) INT16_C(x)
+#define U16_C(x) UINT16_C(x)
+#define S32_C(x) INT32_C(x)
+#define U32_C(x) UINT32_C(x)
+#define S64_C(x) INT64_C(x)
+#define U64_C(x) UINT64_C(x)
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
new file mode 100644
index 000000000..cf6938d67
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
@@ -0,0 +1,221 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LOCKS_H
+#define LOCKS_H
+
+#include <limits.h>
+#include <pthread.h>
+#include <stdbool.h>
+
+#include "assume.h"
+#include "bug_on.h"
+#include "preempt.h"
+
+int nondet_int(void);
+
+#define __acquire(x)
+#define __acquires(x)
+#define __release(x)
+#define __releases(x)
+
+/* Only use one lock mechanism. Select which one. */
+#ifdef PTHREAD_LOCK
+struct lock_impl {
+ pthread_mutex_t mutex;
+};
+
+static inline void lock_impl_lock(struct lock_impl *lock)
+{
+ BUG_ON(pthread_mutex_lock(&lock->mutex));
+}
+
+static inline void lock_impl_unlock(struct lock_impl *lock)
+{
+ BUG_ON(pthread_mutex_unlock(&lock->mutex));
+}
+
+static inline bool lock_impl_trylock(struct lock_impl *lock)
+{
+ int err = pthread_mutex_trylock(&lock->mutex);
+
+ if (!err)
+ return true;
+ else if (err == EBUSY)
+ return false;
+ BUG();
+}
+
+static inline void lock_impl_init(struct lock_impl *lock)
+{
+ pthread_mutex_init(&lock->mutex, NULL);
+}
+
+#define LOCK_IMPL_INITIALIZER {.mutex = PTHREAD_MUTEX_INITIALIZER}
+
+#else /* !defined(PTHREAD_LOCK) */
+/* Spinlock that assumes that it always gets the lock immediately. */
+
+struct lock_impl {
+ bool locked;
+};
+
+static inline bool lock_impl_trylock(struct lock_impl *lock)
+{
+#ifdef RUN
+ /* TODO: Should this be a test and set? */
+ return __sync_bool_compare_and_swap(&lock->locked, false, true);
+#else
+ __CPROVER_atomic_begin();
+ bool old_locked = lock->locked;
+ lock->locked = true;
+ __CPROVER_atomic_end();
+
+ /* Minimal barrier to prevent accesses leaking out of lock. */
+ __CPROVER_fence("RRfence", "RWfence");
+
+ return !old_locked;
+#endif
+}
+
+static inline void lock_impl_lock(struct lock_impl *lock)
+{
+ /*
+ * CBMC doesn't support busy waiting, so just assume that the
+ * lock is available.
+ */
+ assume(lock_impl_trylock(lock));
+
+ /*
+ * If the lock was already held by this thread then the assumption
+ * is unsatisfiable (deadlock).
+ */
+}
+
+static inline void lock_impl_unlock(struct lock_impl *lock)
+{
+#ifdef RUN
+ BUG_ON(!__sync_bool_compare_and_swap(&lock->locked, true, false));
+#else
+ /* Minimal barrier to prevent accesses leaking out of lock. */
+ __CPROVER_fence("RWfence", "WWfence");
+
+ __CPROVER_atomic_begin();
+ bool old_locked = lock->locked;
+ lock->locked = false;
+ __CPROVER_atomic_end();
+
+ BUG_ON(!old_locked);
+#endif
+}
+
+static inline void lock_impl_init(struct lock_impl *lock)
+{
+ lock->locked = false;
+}
+
+#define LOCK_IMPL_INITIALIZER {.locked = false}
+
+#endif /* !defined(PTHREAD_LOCK) */
+
+/*
+ * Implement spinlocks using the lock mechanism. Wrap the lock to prevent mixing
+ * locks of different types.
+ */
+typedef struct {
+ struct lock_impl internal_lock;
+} spinlock_t;
+
+#define SPIN_LOCK_UNLOCKED {.internal_lock = LOCK_IMPL_INITIALIZER}
+#define __SPIN_LOCK_UNLOCKED(x) SPIN_LOCK_UNLOCKED
+#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
+
+static inline void spin_lock_init(spinlock_t *lock)
+{
+ lock_impl_init(&lock->internal_lock);
+}
+
+static inline void spin_lock(spinlock_t *lock)
+{
+ /*
+ * Spin locks also need to be removed in order to eliminate all
+ * memory barriers. They are only used by the write side anyway.
+ */
+#ifndef NO_SYNC_SMP_MB
+ preempt_disable();
+ lock_impl_lock(&lock->internal_lock);
+#endif
+}
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+#ifndef NO_SYNC_SMP_MB
+ lock_impl_unlock(&lock->internal_lock);
+ preempt_enable();
+#endif
+}
+
+/* Don't bother with interrupts */
+#define spin_lock_irq(lock) spin_lock(lock)
+#define spin_unlock_irq(lock) spin_unlock(lock)
+#define spin_lock_irqsave(lock, flags) spin_lock(lock)
+#define spin_unlock_irqrestore(lock, flags) spin_unlock(lock)
+
+/*
+ * This is supposed to return an int, but I think that a bool should work as
+ * well.
+ */
+static inline bool spin_trylock(spinlock_t *lock)
+{
+#ifndef NO_SYNC_SMP_MB
+ preempt_disable();
+ return lock_impl_trylock(&lock->internal_lock);
+#else
+ return true;
+#endif
+}
+
+struct completion {
+ /* Hopefuly this won't overflow. */
+ unsigned int count;
+};
+
+#define COMPLETION_INITIALIZER(x) {.count = 0}
+#define DECLARE_COMPLETION(x) struct completion x = COMPLETION_INITIALIZER(x)
+#define DECLARE_COMPLETION_ONSTACK(x) DECLARE_COMPLETION(x)
+
+static inline void init_completion(struct completion *c)
+{
+ c->count = 0;
+}
+
+static inline void wait_for_completion(struct completion *c)
+{
+ unsigned int prev_count = __sync_fetch_and_sub(&c->count, 1);
+
+ assume(prev_count);
+}
+
+static inline void complete(struct completion *c)
+{
+ unsigned int prev_count = __sync_fetch_and_add(&c->count, 1);
+
+ BUG_ON(prev_count == UINT_MAX);
+}
+
+/* This function probably isn't very useful for CBMC. */
+static inline bool try_wait_for_completion(struct completion *c)
+{
+ BUG();
+}
+
+static inline bool completion_done(struct completion *c)
+{
+ return c->count;
+}
+
+/* TODO: Implement complete_all */
+static inline void complete_all(struct completion *c)
+{
+ BUG();
+}
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
new file mode 100644
index 000000000..9440cc39e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+#include "misc.h"
+#include "bug_on.h"
+
+struct rcu_head;
+
+void wakeme_after_rcu(struct rcu_head *head)
+{
+ BUG();
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
new file mode 100644
index 000000000..aca50030f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
@@ -0,0 +1,58 @@
+#ifndef MISC_H
+#define MISC_H
+
+#include "assume.h"
+#include "int_typedefs.h"
+#include "locks.h"
+
+#include <linux/types.h>
+
+/* Probably won't need to deal with bottom halves. */
+static inline void local_bh_disable(void) {}
+static inline void local_bh_enable(void) {}
+
+#define MODULE_ALIAS(X)
+#define module_param(...)
+#define EXPORT_SYMBOL_GPL(x)
+
+#define container_of(ptr, type, member) ({ \
+ const typeof(((type *)0)->member) *__mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)); \
+})
+
+#ifndef USE_SIMPLE_SYNC_SRCU
+/* Abuse udelay to make sure that busy loops terminate. */
+#define udelay(x) assume(0)
+
+#else
+
+/* The simple custom synchronize_srcu is ok with try_check_zero failing. */
+#define udelay(x) do { } while (0)
+#endif
+
+#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
+ do { } while (0)
+
+#define notrace
+
+/* Avoid including rcupdate.h */
+struct rcu_synchronize {
+ struct rcu_head head;
+ struct completion completion;
+};
+
+void wakeme_after_rcu(struct rcu_head *head);
+
+#define rcu_lock_acquire(a) do { } while (0)
+#define rcu_lock_release(a) do { } while (0)
+#define rcu_lockdep_assert(c, s) do { } while (0)
+#define RCU_LOCKDEP_WARN(c, s) do { } while (0)
+
+/* Let CBMC non-deterministically choose switch between normal and expedited. */
+bool rcu_gp_is_normal(void);
+bool rcu_gp_is_expedited(void);
+
+/* Do the same for old versions of rcu. */
+#define rcu_expedited (rcu_gp_is_expedited())
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
new file mode 100644
index 000000000..27e67a3f2
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PERCPU_H
+#define PERCPU_H
+
+#include <stddef.h>
+#include "bug_on.h"
+#include "preempt.h"
+
+#define __percpu
+
+/* Maximum size of any percpu data. */
+#define PERCPU_OFFSET (4 * sizeof(long))
+
+/* Ignore alignment, as CBMC doesn't care about false sharing. */
+#define alloc_percpu(type) __alloc_percpu(sizeof(type), 1)
+
+static inline void *__alloc_percpu(size_t size, size_t align)
+{
+ BUG();
+ return NULL;
+}
+
+static inline void free_percpu(void *ptr)
+{
+ BUG();
+}
+
+#define per_cpu_ptr(ptr, cpu) \
+ ((typeof(ptr)) ((char *) (ptr) + PERCPU_OFFSET * cpu))
+
+#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1)
+#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1)
+#define __this_cpu_sub(pcp, n) __this_cpu_add(pcp, -(typeof(pcp)) (n))
+
+#define this_cpu_inc(pcp) this_cpu_add(pcp, 1)
+#define this_cpu_dec(pcp) this_cpu_sub(pcp, 1)
+#define this_cpu_sub(pcp, n) this_cpu_add(pcp, -(typeof(pcp)) (n))
+
+/* Make CBMC use atomics to work around bug. */
+#ifdef RUN
+#define THIS_CPU_ADD_HELPER(ptr, x) (*(ptr) += (x))
+#else
+/*
+ * Split the atomic into a read and a write so that it has the least
+ * possible ordering.
+ */
+#define THIS_CPU_ADD_HELPER(ptr, x) \
+ do { \
+ typeof(ptr) this_cpu_add_helper_ptr = (ptr); \
+ typeof(ptr) this_cpu_add_helper_x = (x); \
+ typeof(*ptr) this_cpu_add_helper_temp; \
+ __CPROVER_atomic_begin(); \
+ this_cpu_add_helper_temp = *(this_cpu_add_helper_ptr); \
+ __CPROVER_atomic_end(); \
+ this_cpu_add_helper_temp += this_cpu_add_helper_x; \
+ __CPROVER_atomic_begin(); \
+ *(this_cpu_add_helper_ptr) = this_cpu_add_helper_temp; \
+ __CPROVER_atomic_end(); \
+ } while (0)
+#endif
+
+/*
+ * For some reason CBMC needs an atomic operation even though this is percpu
+ * data.
+ */
+#define __this_cpu_add(pcp, n) \
+ do { \
+ BUG_ON(preemptible()); \
+ THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), thread_cpu_id), \
+ (typeof(pcp)) (n)); \
+ } while (0)
+
+#define this_cpu_add(pcp, n) \
+ do { \
+ int this_cpu_add_impl_cpu = get_cpu(); \
+ THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), this_cpu_add_impl_cpu), \
+ (typeof(pcp)) (n)); \
+ put_cpu(); \
+ } while (0)
+
+/*
+ * This will cause a compiler warning because of the cast from char[][] to
+ * type*. This will cause a compile time error if type is too big.
+ */
+#define DEFINE_PER_CPU(type, name) \
+ char name[NR_CPUS][PERCPU_OFFSET]; \
+ typedef char percpu_too_big_##name \
+ [sizeof(type) > PERCPU_OFFSET ? -1 : 1]
+
+#define for_each_possible_cpu(cpu) \
+ for ((cpu) = 0; (cpu) < NR_CPUS; ++(cpu))
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
new file mode 100644
index 000000000..b4083ae34
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+#include "preempt.h"
+
+#include "assume.h"
+#include "locks.h"
+
+/* Support NR_CPUS of at most 64 */
+#define CPU_PREEMPTION_LOCKS_INIT0 LOCK_IMPL_INITIALIZER
+#define CPU_PREEMPTION_LOCKS_INIT1 \
+ CPU_PREEMPTION_LOCKS_INIT0, CPU_PREEMPTION_LOCKS_INIT0
+#define CPU_PREEMPTION_LOCKS_INIT2 \
+ CPU_PREEMPTION_LOCKS_INIT1, CPU_PREEMPTION_LOCKS_INIT1
+#define CPU_PREEMPTION_LOCKS_INIT3 \
+ CPU_PREEMPTION_LOCKS_INIT2, CPU_PREEMPTION_LOCKS_INIT2
+#define CPU_PREEMPTION_LOCKS_INIT4 \
+ CPU_PREEMPTION_LOCKS_INIT3, CPU_PREEMPTION_LOCKS_INIT3
+#define CPU_PREEMPTION_LOCKS_INIT5 \
+ CPU_PREEMPTION_LOCKS_INIT4, CPU_PREEMPTION_LOCKS_INIT4
+
+/*
+ * Simulate disabling preemption by locking a particular cpu. NR_CPUS
+ * should be the actual number of cpus, not just the maximum.
+ */
+struct lock_impl cpu_preemption_locks[NR_CPUS] = {
+ CPU_PREEMPTION_LOCKS_INIT0
+#if (NR_CPUS - 1) & 1
+ , CPU_PREEMPTION_LOCKS_INIT0
+#endif
+#if (NR_CPUS - 1) & 2
+ , CPU_PREEMPTION_LOCKS_INIT1
+#endif
+#if (NR_CPUS - 1) & 4
+ , CPU_PREEMPTION_LOCKS_INIT2
+#endif
+#if (NR_CPUS - 1) & 8
+ , CPU_PREEMPTION_LOCKS_INIT3
+#endif
+#if (NR_CPUS - 1) & 16
+ , CPU_PREEMPTION_LOCKS_INIT4
+#endif
+#if (NR_CPUS - 1) & 32
+ , CPU_PREEMPTION_LOCKS_INIT5
+#endif
+};
+
+#undef CPU_PREEMPTION_LOCKS_INIT0
+#undef CPU_PREEMPTION_LOCKS_INIT1
+#undef CPU_PREEMPTION_LOCKS_INIT2
+#undef CPU_PREEMPTION_LOCKS_INIT3
+#undef CPU_PREEMPTION_LOCKS_INIT4
+#undef CPU_PREEMPTION_LOCKS_INIT5
+
+__thread int thread_cpu_id;
+__thread int preempt_disable_count;
+
+void preempt_disable(void)
+{
+ BUG_ON(preempt_disable_count < 0 || preempt_disable_count == INT_MAX);
+
+ if (preempt_disable_count++)
+ return;
+
+ thread_cpu_id = nondet_int();
+ assume(thread_cpu_id >= 0);
+ assume(thread_cpu_id < NR_CPUS);
+ lock_impl_lock(&cpu_preemption_locks[thread_cpu_id]);
+}
+
+void preempt_enable(void)
+{
+ BUG_ON(preempt_disable_count < 1);
+
+ if (--preempt_disable_count)
+ return;
+
+ lock_impl_unlock(&cpu_preemption_locks[thread_cpu_id]);
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
new file mode 100644
index 000000000..f8b762cd2
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef PREEMPT_H
+#define PREEMPT_H
+
+#include <stdbool.h>
+
+#include "bug_on.h"
+
+/* This flag contains garbage if preempt_disable_count is 0. */
+extern __thread int thread_cpu_id;
+
+/* Support recursive preemption disabling. */
+extern __thread int preempt_disable_count;
+
+void preempt_disable(void);
+void preempt_enable(void);
+
+static inline void preempt_disable_notrace(void)
+{
+ preempt_disable();
+}
+
+static inline void preempt_enable_no_resched(void)
+{
+ preempt_enable();
+}
+
+static inline void preempt_enable_notrace(void)
+{
+ preempt_enable();
+}
+
+static inline int preempt_count(void)
+{
+ return preempt_disable_count;
+}
+
+static inline bool preemptible(void)
+{
+ return !preempt_count();
+}
+
+static inline int get_cpu(void)
+{
+ preempt_disable();
+ return thread_cpu_id;
+}
+
+static inline void put_cpu(void)
+{
+ preempt_enable();
+}
+
+static inline void might_sleep(void)
+{
+ BUG_ON(preempt_disable_count);
+}
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
new file mode 100644
index 000000000..97f592048
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <config.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include "int_typedefs.h"
+
+#include "barriers.h"
+#include "bug_on.h"
+#include "locks.h"
+#include "misc.h"
+#include "preempt.h"
+#include "percpu.h"
+#include "workqueues.h"
+
+#include <linux/srcu.h>
+
+/* Functions needed from modify_srcu.c */
+bool try_check_zero(struct srcu_struct *sp, int idx, int trycount);
+void srcu_flip(struct srcu_struct *sp);
+
+/* Simpler implementation of synchronize_srcu that ignores batching. */
+void synchronize_srcu(struct srcu_struct *sp)
+{
+ int idx;
+ /*
+ * This code assumes that try_check_zero will succeed anyway,
+ * so there is no point in multiple tries.
+ */
+ const int trycount = 1;
+
+ might_sleep();
+
+ /* Ignore the lock, as multiple writers aren't working yet anyway. */
+
+ idx = 1 ^ (sp->completed & 1);
+
+ /* For comments see srcu_advance_batches. */
+
+ assume(try_check_zero(sp, idx, trycount));
+
+ srcu_flip(sp);
+
+ assume(try_check_zero(sp, idx^1, trycount));
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
new file mode 100644
index 000000000..28b960300
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef WORKQUEUES_H
+#define WORKQUEUES_H
+
+#include <stdbool.h>
+
+#include "barriers.h"
+#include "bug_on.h"
+#include "int_typedefs.h"
+
+#include <linux/types.h>
+
+/* Stub workqueue implementation. */
+
+struct work_struct;
+typedef void (*work_func_t)(struct work_struct *work);
+void delayed_work_timer_fn(unsigned long __data);
+
+struct work_struct {
+/* atomic_long_t data; */
+ unsigned long data;
+
+ struct list_head entry;
+ work_func_t func;
+#ifdef CONFIG_LOCKDEP
+ struct lockdep_map lockdep_map;
+#endif
+};
+
+struct timer_list {
+ struct hlist_node entry;
+ unsigned long expires;
+ void (*function)(unsigned long);
+ unsigned long data;
+ u32 flags;
+ int slack;
+};
+
+struct delayed_work {
+ struct work_struct work;
+ struct timer_list timer;
+
+ /* target workqueue and CPU ->timer uses to queue ->work */
+ struct workqueue_struct *wq;
+ int cpu;
+};
+
+
+static inline bool schedule_work(struct work_struct *work)
+{
+ BUG();
+ return true;
+}
+
+static inline bool schedule_work_on(int cpu, struct work_struct *work)
+{
+ BUG();
+ return true;
+}
+
+static inline bool queue_work(struct workqueue_struct *wq,
+ struct work_struct *work)
+{
+ BUG();
+ return true;
+}
+
+static inline bool queue_delayed_work(struct workqueue_struct *wq,
+ struct delayed_work *dwork,
+ unsigned long delay)
+{
+ BUG();
+ return true;
+}
+
+#define INIT_WORK(w, f) \
+ do { \
+ (w)->data = 0; \
+ (w)->func = (f); \
+ } while (0)
+
+#define INIT_DELAYED_WORK(w, f) INIT_WORK(&(w)->work, (f))
+
+#define __WORK_INITIALIZER(n, f) { \
+ .data = 0, \
+ .entry = { &(n).entry, &(n).entry }, \
+ .func = f \
+ }
+
+/* Don't bother initializing timer. */
+#define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \
+ .work = __WORK_INITIALIZER((n).work, (f)), \
+ }
+
+#define DECLARE_WORK(n, f) \
+ struct workqueue_struct n = __WORK_INITIALIZER
+
+#define DECLARE_DELAYED_WORK(n, f) \
+ struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0)
+
+#define system_power_efficient_wq ((struct workqueue_struct *) NULL)
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
new file mode 100644
index 000000000..f47cb2045
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
@@ -0,0 +1 @@
+*.out
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
new file mode 100644
index 000000000..ad21b925f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+CBMC_FLAGS = -I../.. -I../../src -I../../include -I../../empty_includes -32 -pointer-check -mm pso
+
+all:
+ for i in ./*.pass; do \
+ echo $$i ; \
+ CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-pass $$i > $$i.out 2>&1 ; \
+ done
+ for i in ./*.fail; do \
+ echo $$i ; \
+ CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-fail $$i > $$i.out 2>&1 ; \
+ done
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
new file mode 100644
index 000000000..40c807591
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DASSERT_END"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
new file mode 100644
index 000000000..ada5baf0b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DFORCE_FAILURE"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
new file mode 100644
index 000000000..8fe00c8db
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DFORCE_FAILURE_2"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
new file mode 100644
index 000000000..612ed6772
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
@@ -0,0 +1 @@
+test_cbmc_options="-DFORCE_FAILURE_3"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
new file mode 100644
index 000000000..2ce2016f7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <src/combined_source.c>
+
+int x;
+int y;
+
+int __unbuffered_tpr_x;
+int __unbuffered_tpr_y;
+
+DEFINE_SRCU(ss);
+
+void rcu_reader(void)
+{
+ int idx;
+
+#ifndef FORCE_FAILURE_3
+ idx = srcu_read_lock(&ss);
+#endif
+ might_sleep();
+
+ __unbuffered_tpr_y = READ_ONCE(y);
+#ifdef FORCE_FAILURE
+ srcu_read_unlock(&ss, idx);
+ idx = srcu_read_lock(&ss);
+#endif
+ WRITE_ONCE(x, 1);
+
+#ifndef FORCE_FAILURE_3
+ srcu_read_unlock(&ss, idx);
+#endif
+ might_sleep();
+}
+
+void *thread_update(void *arg)
+{
+ WRITE_ONCE(y, 1);
+#ifndef FORCE_FAILURE_2
+ synchronize_srcu(&ss);
+#endif
+ might_sleep();
+ __unbuffered_tpr_x = READ_ONCE(x);
+
+ return NULL;
+}
+
+void *thread_process_reader(void *arg)
+{
+ rcu_reader();
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t tu;
+ pthread_t tpr;
+
+ if (pthread_create(&tu, NULL, thread_update, NULL))
+ abort();
+ if (pthread_create(&tpr, NULL, thread_process_reader, NULL))
+ abort();
+ if (pthread_join(tu, NULL))
+ abort();
+ if (pthread_join(tpr, NULL))
+ abort();
+ assert(__unbuffered_tpr_y != 0 || __unbuffered_tpr_x != 0);
+
+#ifdef ASSERT_END
+ assert(0);
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
new file mode 100755
index 000000000..2fe1f0339
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
@@ -0,0 +1,103 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# This script expects a mode (either --should-pass or --should-fail) followed by
+# an input file. The script uses the following environment variables. The test C
+# source file is expected to be named test.c in the directory containing the
+# input file.
+#
+# CBMC: The command to run CBMC. Default: cbmc
+# CBMC_FLAGS: Additional flags to pass to CBMC
+# NR_CPUS: Number of cpus to run tests with. Default specified by the test
+# SYNC_SRCU_MODE: Choose implementation of synchronize_srcu. Defaults to simple.
+# kernel: Version included in the linux kernel source.
+# simple: Use try_check_zero directly.
+#
+# The input file is a script that is sourced by this file. It can define any of
+# the following variables to configure the test.
+#
+# test_cbmc_options: Extra options to pass to CBMC.
+# min_cpus_fail: Minimum number of CPUs (NR_CPUS) for verification to fail.
+# The test is expected to pass if it is run with fewer. (Only
+# useful for .fail files)
+# default_cpus: Quantity of CPUs to use for the test, if not specified on the
+# command line. Default: Larger of 2 and MIN_CPUS_FAIL.
+
+set -e
+
+if test "$#" -ne 2; then
+ echo "Expected one option followed by an input file" 1>&2
+ exit 99
+fi
+
+if test "x$1" = "x--should-pass"; then
+ should_pass="yes"
+elif test "x$1" = "x--should-fail"; then
+ should_pass="no"
+else
+ echo "Unrecognized argument '$1'" 1>&2
+
+ # Exit code 99 indicates a hard error.
+ exit 99
+fi
+
+CBMC=${CBMC:-cbmc}
+
+SYNC_SRCU_MODE=${SYNC_SRCU_MODE:-simple}
+
+case ${SYNC_SRCU_MODE} in
+kernel) sync_srcu_mode_flags="" ;;
+simple) sync_srcu_mode_flags="-DUSE_SIMPLE_SYNC_SRCU" ;;
+
+*)
+ echo "Unrecognized argument '${SYNC_SRCU_MODE}'" 1>&2
+ exit 99
+ ;;
+esac
+
+min_cpus_fail=1
+
+c_file=`dirname "$2"`/test.c
+
+# Source the input file.
+. $2
+
+if test ${min_cpus_fail} -gt 2; then
+ default_default_cpus=${min_cpus_fail}
+else
+ default_default_cpus=2
+fi
+default_cpus=${default_cpus:-${default_default_cpus}}
+cpus=${NR_CPUS:-${default_cpus}}
+
+# Check if there are two few cpus to make the test fail.
+if test $cpus -lt ${min_cpus_fail:-0}; then
+ should_pass="yes"
+fi
+
+cbmc_opts="-DNR_CPUS=${cpus} ${sync_srcu_mode_flags} ${test_cbmc_options} ${CBMC_FLAGS}"
+
+echo "Running CBMC: ${CBMC} ${cbmc_opts} ${c_file}"
+if ${CBMC} ${cbmc_opts} "${c_file}"; then
+ # Verification successful. Make sure that it was supposed to verify.
+ test "x${should_pass}" = xyes
+else
+ cbmc_exit_status=$?
+
+ # An exit status of 10 indicates a failed verification.
+ # (see cbmc_parse_optionst::do_bmc in the CBMC source code)
+ if test ${cbmc_exit_status} -eq 10 && test "x${should_pass}" = xno; then
+ :
+ else
+ echo "CBMC returned ${cbmc_exit_status} exit status" 1>&2
+
+ # Parse errors have exit status 6. Any other type of error
+ # should be considered a hard error.
+ if test ${cbmc_exit_status} -ne 6 && \
+ test ${cbmc_exit_status} -ne 10; then
+ exit 99
+ else
+ exit 1
+ fi
+ fi
+fi
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
new file mode 100644
index 000000000..cc610da7e
--- /dev/null
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -0,0 +1,6 @@
+basic_percpu_ops_test
+basic_test
+basic_rseq_op_test
+param_test
+param_test_benchmark
+param_test_compare_twice
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
new file mode 100644
index 000000000..c30c52e1d
--- /dev/null
+++ b/tools/testing/selftests/rseq/Makefile
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L./ -Wl,-rpath=./
+LDLIBS += -lpthread
+
+# Own dependencies because we only want to build against 1st prerequisite, but
+# still track changes to header files and depend on shared object.
+OVERRIDE_TARGETS = 1
+
+TEST_GEN_PROGS = basic_test basic_percpu_ops_test param_test \
+ param_test_benchmark param_test_compare_twice
+
+TEST_GEN_PROGS_EXTENDED = librseq.so
+
+TEST_PROGS = run_param_test.sh
+
+include ../lib.mk
+
+$(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h
+ $(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@
+
+$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h
+ $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
+ rseq.h rseq-*.h
+ $(CC) $(CFLAGS) -DBENCHMARK $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/param_test_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
+ rseq.h rseq-*.h
+ $(CC) $(CFLAGS) -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@
diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
new file mode 100644
index 000000000..eb3f6db36
--- /dev/null
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: LGPL-2.1
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+
+#include "rseq.h"
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+struct percpu_lock_entry {
+ intptr_t v;
+} __attribute__((aligned(128)));
+
+struct percpu_lock {
+ struct percpu_lock_entry c[CPU_SETSIZE];
+};
+
+struct test_data_entry {
+ intptr_t count;
+} __attribute__((aligned(128)));
+
+struct spinlock_test_data {
+ struct percpu_lock lock;
+ struct test_data_entry c[CPU_SETSIZE];
+ int reps;
+};
+
+struct percpu_list_node {
+ intptr_t data;
+ struct percpu_list_node *next;
+};
+
+struct percpu_list_entry {
+ struct percpu_list_node *head;
+} __attribute__((aligned(128)));
+
+struct percpu_list {
+ struct percpu_list_entry c[CPU_SETSIZE];
+};
+
+/* A simple percpu spinlock. Returns the cpu lock was acquired on. */
+int rseq_this_cpu_lock(struct percpu_lock *lock)
+{
+ int cpu;
+
+ for (;;) {
+ int ret;
+
+ cpu = rseq_cpu_start();
+ ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
+ 0, 1, cpu);
+ if (rseq_likely(!ret))
+ break;
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ /*
+ * Acquire semantic when taking lock after control dependency.
+ * Matches rseq_smp_store_release().
+ */
+ rseq_smp_acquire__after_ctrl_dep();
+ return cpu;
+}
+
+void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
+{
+ assert(lock->c[cpu].v == 1);
+ /*
+ * Release lock, with release semantic. Matches
+ * rseq_smp_acquire__after_ctrl_dep().
+ */
+ rseq_smp_store_release(&lock->c[cpu].v, 0);
+}
+
+void *test_percpu_spinlock_thread(void *arg)
+{
+ struct spinlock_test_data *data = arg;
+ int i, cpu;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+ for (i = 0; i < data->reps; i++) {
+ cpu = rseq_this_cpu_lock(&data->lock);
+ data->c[cpu].count++;
+ rseq_percpu_unlock(&data->lock, cpu);
+ }
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ return NULL;
+}
+
+/*
+ * A simple test which implements a sharded counter using a per-cpu
+ * lock. Obviously real applications might prefer to simply use a
+ * per-cpu increment; however, this is reasonable for a test and the
+ * lock can be extended to synchronize more complicated operations.
+ */
+void test_percpu_spinlock(void)
+{
+ const int num_threads = 200;
+ int i;
+ uint64_t sum;
+ pthread_t test_threads[num_threads];
+ struct spinlock_test_data data;
+
+ memset(&data, 0, sizeof(data));
+ data.reps = 5000;
+
+ for (i = 0; i < num_threads; i++)
+ pthread_create(&test_threads[i], NULL,
+ test_percpu_spinlock_thread, &data);
+
+ for (i = 0; i < num_threads; i++)
+ pthread_join(test_threads[i], NULL);
+
+ sum = 0;
+ for (i = 0; i < CPU_SETSIZE; i++)
+ sum += data.c[i].count;
+
+ assert(sum == (uint64_t)data.reps * num_threads);
+}
+
+void this_cpu_list_push(struct percpu_list *list,
+ struct percpu_list_node *node,
+ int *_cpu)
+{
+ int cpu;
+
+ for (;;) {
+ intptr_t *targetptr, newval, expect;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ /* Load list->c[cpu].head with single-copy atomicity. */
+ expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
+ newval = (intptr_t)node;
+ targetptr = (intptr_t *)&list->c[cpu].head;
+ node->next = (struct percpu_list_node *)expect;
+ ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+ if (rseq_likely(!ret))
+ break;
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+}
+
+/*
+ * Unlike a traditional lock-less linked list; the availability of a
+ * rseq primitive allows us to implement pop without concerns over
+ * ABA-type races.
+ */
+struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
+ int *_cpu)
+{
+ for (;;) {
+ struct percpu_list_node *head;
+ intptr_t *targetptr, expectnot, *load;
+ off_t offset;
+ int ret, cpu;
+
+ cpu = rseq_cpu_start();
+ targetptr = (intptr_t *)&list->c[cpu].head;
+ expectnot = (intptr_t)NULL;
+ offset = offsetof(struct percpu_list_node, next);
+ load = (intptr_t *)&head;
+ ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
+ offset, load, cpu);
+ if (rseq_likely(!ret)) {
+ if (_cpu)
+ *_cpu = cpu;
+ return head;
+ }
+ if (ret > 0)
+ return NULL;
+ /* Retry if rseq aborts. */
+ }
+}
+
+/*
+ * __percpu_list_pop is not safe against concurrent accesses. Should
+ * only be used on lists that are not concurrently modified.
+ */
+struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
+{
+ struct percpu_list_node *node;
+
+ node = list->c[cpu].head;
+ if (!node)
+ return NULL;
+ list->c[cpu].head = node->next;
+ return node;
+}
+
+void *test_percpu_list_thread(void *arg)
+{
+ int i;
+ struct percpu_list *list = (struct percpu_list *)arg;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ for (i = 0; i < 100000; i++) {
+ struct percpu_list_node *node;
+
+ node = this_cpu_list_pop(list, NULL);
+ sched_yield(); /* encourage shuffling */
+ if (node)
+ this_cpu_list_push(list, node, NULL);
+ }
+
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ return NULL;
+}
+
+/* Simultaneous modification to a per-cpu linked list from many threads. */
+void test_percpu_list(void)
+{
+ int i, j;
+ uint64_t sum = 0, expected_sum = 0;
+ struct percpu_list list;
+ pthread_t test_threads[200];
+ cpu_set_t allowed_cpus;
+
+ memset(&list, 0, sizeof(list));
+
+ /* Generate list entries for every usable cpu. */
+ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+ for (j = 1; j <= 100; j++) {
+ struct percpu_list_node *node;
+
+ expected_sum += j;
+
+ node = malloc(sizeof(*node));
+ assert(node);
+ node->data = j;
+ node->next = list.c[i].head;
+ list.c[i].head = node;
+ }
+ }
+
+ for (i = 0; i < 200; i++)
+ pthread_create(&test_threads[i], NULL,
+ test_percpu_list_thread, &list);
+
+ for (i = 0; i < 200; i++)
+ pthread_join(test_threads[i], NULL);
+
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_list_node *node;
+
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+
+ while ((node = __percpu_list_pop(&list, i))) {
+ sum += node->data;
+ free(node);
+ }
+ }
+
+ /*
+ * All entries should now be accounted for (unless some external
+ * actor is interfering with our allowed affinity while this
+ * test is running).
+ */
+ assert(sum == expected_sum);
+}
+
+int main(int argc, char **argv)
+{
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ goto error;
+ }
+ printf("spinlock\n");
+ test_percpu_spinlock();
+ printf("percpu_list\n");
+ test_percpu_list();
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ goto error;
+ }
+ return 0;
+
+error:
+ return -1;
+}
diff --git a/tools/testing/selftests/rseq/basic_test.c b/tools/testing/selftests/rseq/basic_test.c
new file mode 100644
index 000000000..d8efbfb89
--- /dev/null
+++ b/tools/testing/selftests/rseq/basic_test.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Basic test coverage for critical regions and rseq_current_cpu().
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/time.h>
+
+#include "rseq.h"
+
+void test_cpu_pointer(void)
+{
+ cpu_set_t affinity, test_affinity;
+ int i;
+
+ sched_getaffinity(0, sizeof(affinity), &affinity);
+ CPU_ZERO(&test_affinity);
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (CPU_ISSET(i, &affinity)) {
+ CPU_SET(i, &test_affinity);
+ sched_setaffinity(0, sizeof(test_affinity),
+ &test_affinity);
+ assert(sched_getcpu() == i);
+ assert(rseq_current_cpu() == i);
+ assert(rseq_current_cpu_raw() == i);
+ assert(rseq_cpu_start() == i);
+ CPU_CLR(i, &test_affinity);
+ }
+ }
+ sched_setaffinity(0, sizeof(affinity), &affinity);
+}
+
+int main(int argc, char **argv)
+{
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ goto init_thread_error;
+ }
+ printf("testing current cpu\n");
+ test_cpu_pointer();
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ goto init_thread_error;
+ }
+ return 0;
+
+init_thread_error:
+ return -1;
+}
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
new file mode 100644
index 000000000..e8a657a5f
--- /dev/null
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -0,0 +1,1331 @@
+// SPDX-License-Identifier: LGPL-2.1
+#define _GNU_SOURCE
+#include <assert.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <poll.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
+#include <stddef.h>
+
+static inline pid_t rseq_gettid(void)
+{
+ return syscall(__NR_gettid);
+}
+
+#define NR_INJECT 9
+static int loop_cnt[NR_INJECT + 1];
+
+static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
+static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
+static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
+static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
+static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
+static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
+
+static int opt_modulo, verbose;
+
+static int opt_yield, opt_signal, opt_sleep,
+ opt_disable_rseq, opt_threads = 200,
+ opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
+
+#ifndef RSEQ_SKIP_FASTPATH
+static long long opt_reps = 5000;
+#else
+static long long opt_reps = 100;
+#endif
+
+static __thread __attribute__((tls_model("initial-exec")))
+unsigned int signals_delivered;
+
+#ifndef BENCHMARK
+
+static __thread __attribute__((tls_model("initial-exec"), unused))
+unsigned int yield_mod_cnt, nr_abort;
+
+#define printf_verbose(fmt, ...) \
+ do { \
+ if (verbose) \
+ printf(fmt, ## __VA_ARGS__); \
+ } while (0)
+
+#ifdef __i386__
+
+#define INJECT_ASM_REG "eax"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
+ "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
+ "jz 333f\n\t" \
+ "222:\n\t" \
+ "dec %%" INJECT_ASM_REG "\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t"
+
+#elif defined(__x86_64__)
+
+#define INJECT_ASM_REG_P "rax"
+#define INJECT_ASM_REG "eax"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG_P \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
+ "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
+ "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
+ "jz 333f\n\t" \
+ "222:\n\t" \
+ "dec %%" INJECT_ASM_REG "\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t"
+
+#elif defined(__s390__)
+
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1]"m"(loop_cnt[1]) \
+ , [loop_cnt_2]"m"(loop_cnt[2]) \
+ , [loop_cnt_3]"m"(loop_cnt[3]) \
+ , [loop_cnt_4]"m"(loop_cnt[4]) \
+ , [loop_cnt_5]"m"(loop_cnt[5]) \
+ , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "r12"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+ "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
+ "je 333f\n\t" \
+ "222:\n\t" \
+ "ahi %%" INJECT_ASM_REG ", -1\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t"
+
+#elif defined(__ARMEL__)
+
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1]"m"(loop_cnt[1]) \
+ , [loop_cnt_2]"m"(loop_cnt[2]) \
+ , [loop_cnt_3]"m"(loop_cnt[3]) \
+ , [loop_cnt_4]"m"(loop_cnt[4]) \
+ , [loop_cnt_5]"m"(loop_cnt[5]) \
+ , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "r4"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+ "cmp " INJECT_ASM_REG ", #0\n\t" \
+ "beq 333f\n\t" \
+ "222:\n\t" \
+ "subs " INJECT_ASM_REG ", #1\n\t" \
+ "bne 222b\n\t" \
+ "333:\n\t"
+
+#elif defined(__AARCH64EL__)
+
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1] "Qo" (loop_cnt[1]) \
+ , [loop_cnt_2] "Qo" (loop_cnt[2]) \
+ , [loop_cnt_3] "Qo" (loop_cnt[3]) \
+ , [loop_cnt_4] "Qo" (loop_cnt[4]) \
+ , [loop_cnt_5] "Qo" (loop_cnt[5]) \
+ , [loop_cnt_6] "Qo" (loop_cnt[6])
+
+#define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
+
+#define RSEQ_INJECT_ASM(n) \
+ " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
+ " cbz " INJECT_ASM_REG ", 333f\n" \
+ "222:\n" \
+ " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
+ " cbnz " INJECT_ASM_REG ", 222b\n" \
+ "333:\n"
+
+#elif __PPC__
+
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1]"m"(loop_cnt[1]) \
+ , [loop_cnt_2]"m"(loop_cnt[2]) \
+ , [loop_cnt_3]"m"(loop_cnt[3]) \
+ , [loop_cnt_4]"m"(loop_cnt[4]) \
+ , [loop_cnt_5]"m"(loop_cnt[5]) \
+ , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "r18"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+ "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
+ "beq 333f\n\t" \
+ "222:\n\t" \
+ "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
+ "bne 222b\n\t" \
+ "333:\n\t"
+
+#elif defined(__mips__)
+
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1]"m"(loop_cnt[1]) \
+ , [loop_cnt_2]"m"(loop_cnt[2]) \
+ , [loop_cnt_3]"m"(loop_cnt[3]) \
+ , [loop_cnt_4]"m"(loop_cnt[4]) \
+ , [loop_cnt_5]"m"(loop_cnt[5]) \
+ , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "$5"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+ "beqz " INJECT_ASM_REG ", 333f\n\t" \
+ "222:\n\t" \
+ "addiu " INJECT_ASM_REG ", -1\n\t" \
+ "bnez " INJECT_ASM_REG ", 222b\n\t" \
+ "333:\n\t"
+
+#else
+#error unsupported target
+#endif
+
+#define RSEQ_INJECT_FAILED \
+ nr_abort++;
+
+#define RSEQ_INJECT_C(n) \
+{ \
+ int loc_i, loc_nr_loops = loop_cnt[n]; \
+ \
+ for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
+ rseq_barrier(); \
+ } \
+ if (loc_nr_loops == -1 && opt_modulo) { \
+ if (yield_mod_cnt == opt_modulo - 1) { \
+ if (opt_sleep > 0) \
+ poll(NULL, 0, opt_sleep); \
+ if (opt_yield) \
+ sched_yield(); \
+ if (opt_signal) \
+ raise(SIGUSR1); \
+ yield_mod_cnt = 0; \
+ } else { \
+ yield_mod_cnt++; \
+ } \
+ } \
+}
+
+#else
+
+#define printf_verbose(fmt, ...)
+
+#endif /* BENCHMARK */
+
+#include "rseq.h"
+
+struct percpu_lock_entry {
+ intptr_t v;
+} __attribute__((aligned(128)));
+
+struct percpu_lock {
+ struct percpu_lock_entry c[CPU_SETSIZE];
+};
+
+struct test_data_entry {
+ intptr_t count;
+} __attribute__((aligned(128)));
+
+struct spinlock_test_data {
+ struct percpu_lock lock;
+ struct test_data_entry c[CPU_SETSIZE];
+};
+
+struct spinlock_thread_test_data {
+ struct spinlock_test_data *data;
+ long long reps;
+ int reg;
+};
+
+struct inc_test_data {
+ struct test_data_entry c[CPU_SETSIZE];
+};
+
+struct inc_thread_test_data {
+ struct inc_test_data *data;
+ long long reps;
+ int reg;
+};
+
+struct percpu_list_node {
+ intptr_t data;
+ struct percpu_list_node *next;
+};
+
+struct percpu_list_entry {
+ struct percpu_list_node *head;
+} __attribute__((aligned(128)));
+
+struct percpu_list {
+ struct percpu_list_entry c[CPU_SETSIZE];
+};
+
+#define BUFFER_ITEM_PER_CPU 100
+
+struct percpu_buffer_node {
+ intptr_t data;
+};
+
+struct percpu_buffer_entry {
+ intptr_t offset;
+ intptr_t buflen;
+ struct percpu_buffer_node **array;
+} __attribute__((aligned(128)));
+
+struct percpu_buffer {
+ struct percpu_buffer_entry c[CPU_SETSIZE];
+};
+
+#define MEMCPY_BUFFER_ITEM_PER_CPU 100
+
+struct percpu_memcpy_buffer_node {
+ intptr_t data1;
+ uint64_t data2;
+};
+
+struct percpu_memcpy_buffer_entry {
+ intptr_t offset;
+ intptr_t buflen;
+ struct percpu_memcpy_buffer_node *array;
+} __attribute__((aligned(128)));
+
+struct percpu_memcpy_buffer {
+ struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
+};
+
+/* A simple percpu spinlock. Grabs lock on current cpu. */
+static int rseq_this_cpu_lock(struct percpu_lock *lock)
+{
+ int cpu;
+
+ for (;;) {
+ int ret;
+
+ cpu = rseq_cpu_start();
+ ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
+ 0, 1, cpu);
+ if (rseq_likely(!ret))
+ break;
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ /*
+ * Acquire semantic when taking lock after control dependency.
+ * Matches rseq_smp_store_release().
+ */
+ rseq_smp_acquire__after_ctrl_dep();
+ return cpu;
+}
+
+static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
+{
+ assert(lock->c[cpu].v == 1);
+ /*
+ * Release lock, with release semantic. Matches
+ * rseq_smp_acquire__after_ctrl_dep().
+ */
+ rseq_smp_store_release(&lock->c[cpu].v, 0);
+}
+
+void *test_percpu_spinlock_thread(void *arg)
+{
+ struct spinlock_thread_test_data *thread_data = arg;
+ struct spinlock_test_data *data = thread_data->data;
+ long long i, reps;
+
+ if (!opt_disable_rseq && thread_data->reg &&
+ rseq_register_current_thread())
+ abort();
+ reps = thread_data->reps;
+ for (i = 0; i < reps; i++) {
+ int cpu = rseq_cpu_start();
+
+ cpu = rseq_this_cpu_lock(&data->lock);
+ data->c[cpu].count++;
+ rseq_percpu_unlock(&data->lock, cpu);
+#ifndef BENCHMARK
+ if (i != 0 && !(i % (reps / 10)))
+ printf_verbose("tid %d: count %lld\n",
+ (int) rseq_gettid(), i);
+#endif
+ }
+ printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+ (int) rseq_gettid(), nr_abort, signals_delivered);
+ if (!opt_disable_rseq && thread_data->reg &&
+ rseq_unregister_current_thread())
+ abort();
+ return NULL;
+}
+
+/*
+ * A simple test which implements a sharded counter using a per-cpu
+ * lock. Obviously real applications might prefer to simply use a
+ * per-cpu increment; however, this is reasonable for a test and the
+ * lock can be extended to synchronize more complicated operations.
+ */
+void test_percpu_spinlock(void)
+{
+ const int num_threads = opt_threads;
+ int i, ret;
+ uint64_t sum;
+ pthread_t test_threads[num_threads];
+ struct spinlock_test_data data;
+ struct spinlock_thread_test_data thread_data[num_threads];
+
+ memset(&data, 0, sizeof(data));
+ for (i = 0; i < num_threads; i++) {
+ thread_data[i].reps = opt_reps;
+ if (opt_disable_mod <= 0 || (i % opt_disable_mod))
+ thread_data[i].reg = 1;
+ else
+ thread_data[i].reg = 0;
+ thread_data[i].data = &data;
+ ret = pthread_create(&test_threads[i], NULL,
+ test_percpu_spinlock_thread,
+ &thread_data[i]);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(test_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ sum = 0;
+ for (i = 0; i < CPU_SETSIZE; i++)
+ sum += data.c[i].count;
+
+ assert(sum == (uint64_t)opt_reps * num_threads);
+}
+
+void *test_percpu_inc_thread(void *arg)
+{
+ struct inc_thread_test_data *thread_data = arg;
+ struct inc_test_data *data = thread_data->data;
+ long long i, reps;
+
+ if (!opt_disable_rseq && thread_data->reg &&
+ rseq_register_current_thread())
+ abort();
+ reps = thread_data->reps;
+ for (i = 0; i < reps; i++) {
+ int ret;
+
+ do {
+ int cpu;
+
+ cpu = rseq_cpu_start();
+ ret = rseq_addv(&data->c[cpu].count, 1, cpu);
+ } while (rseq_unlikely(ret));
+#ifndef BENCHMARK
+ if (i != 0 && !(i % (reps / 10)))
+ printf_verbose("tid %d: count %lld\n",
+ (int) rseq_gettid(), i);
+#endif
+ }
+ printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+ (int) rseq_gettid(), nr_abort, signals_delivered);
+ if (!opt_disable_rseq && thread_data->reg &&
+ rseq_unregister_current_thread())
+ abort();
+ return NULL;
+}
+
+void test_percpu_inc(void)
+{
+ const int num_threads = opt_threads;
+ int i, ret;
+ uint64_t sum;
+ pthread_t test_threads[num_threads];
+ struct inc_test_data data;
+ struct inc_thread_test_data thread_data[num_threads];
+
+ memset(&data, 0, sizeof(data));
+ for (i = 0; i < num_threads; i++) {
+ thread_data[i].reps = opt_reps;
+ if (opt_disable_mod <= 0 || (i % opt_disable_mod))
+ thread_data[i].reg = 1;
+ else
+ thread_data[i].reg = 0;
+ thread_data[i].data = &data;
+ ret = pthread_create(&test_threads[i], NULL,
+ test_percpu_inc_thread,
+ &thread_data[i]);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(test_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ sum = 0;
+ for (i = 0; i < CPU_SETSIZE; i++)
+ sum += data.c[i].count;
+
+ assert(sum == (uint64_t)opt_reps * num_threads);
+}
+
+void this_cpu_list_push(struct percpu_list *list,
+ struct percpu_list_node *node,
+ int *_cpu)
+{
+ int cpu;
+
+ for (;;) {
+ intptr_t *targetptr, newval, expect;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ /* Load list->c[cpu].head with single-copy atomicity. */
+ expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
+ newval = (intptr_t)node;
+ targetptr = (intptr_t *)&list->c[cpu].head;
+ node->next = (struct percpu_list_node *)expect;
+ ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+ if (rseq_likely(!ret))
+ break;
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+}
+
+/*
+ * Unlike a traditional lock-less linked list; the availability of a
+ * rseq primitive allows us to implement pop without concerns over
+ * ABA-type races.
+ */
+struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
+ int *_cpu)
+{
+ struct percpu_list_node *node = NULL;
+ int cpu;
+
+ for (;;) {
+ struct percpu_list_node *head;
+ intptr_t *targetptr, expectnot, *load;
+ off_t offset;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ targetptr = (intptr_t *)&list->c[cpu].head;
+ expectnot = (intptr_t)NULL;
+ offset = offsetof(struct percpu_list_node, next);
+ load = (intptr_t *)&head;
+ ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
+ offset, load, cpu);
+ if (rseq_likely(!ret)) {
+ node = head;
+ break;
+ }
+ if (ret > 0)
+ break;
+ /* Retry if rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+ return node;
+}
+
+/*
+ * __percpu_list_pop is not safe against concurrent accesses. Should
+ * only be used on lists that are not concurrently modified.
+ */
+struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
+{
+ struct percpu_list_node *node;
+
+ node = list->c[cpu].head;
+ if (!node)
+ return NULL;
+ list->c[cpu].head = node->next;
+ return node;
+}
+
+void *test_percpu_list_thread(void *arg)
+{
+ long long i, reps;
+ struct percpu_list *list = (struct percpu_list *)arg;
+
+ if (!opt_disable_rseq && rseq_register_current_thread())
+ abort();
+
+ reps = opt_reps;
+ for (i = 0; i < reps; i++) {
+ struct percpu_list_node *node;
+
+ node = this_cpu_list_pop(list, NULL);
+ if (opt_yield)
+ sched_yield(); /* encourage shuffling */
+ if (node)
+ this_cpu_list_push(list, node, NULL);
+ }
+
+ printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+ (int) rseq_gettid(), nr_abort, signals_delivered);
+ if (!opt_disable_rseq && rseq_unregister_current_thread())
+ abort();
+
+ return NULL;
+}
+
+/* Simultaneous modification to a per-cpu linked list from many threads. */
+void test_percpu_list(void)
+{
+ const int num_threads = opt_threads;
+ int i, j, ret;
+ uint64_t sum = 0, expected_sum = 0;
+ struct percpu_list list;
+ pthread_t test_threads[num_threads];
+ cpu_set_t allowed_cpus;
+
+ memset(&list, 0, sizeof(list));
+
+ /* Generate list entries for every usable cpu. */
+ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+ for (j = 1; j <= 100; j++) {
+ struct percpu_list_node *node;
+
+ expected_sum += j;
+
+ node = malloc(sizeof(*node));
+ assert(node);
+ node->data = j;
+ node->next = list.c[i].head;
+ list.c[i].head = node;
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(&test_threads[i], NULL,
+ test_percpu_list_thread, &list);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(test_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_list_node *node;
+
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+
+ while ((node = __percpu_list_pop(&list, i))) {
+ sum += node->data;
+ free(node);
+ }
+ }
+
+ /*
+ * All entries should now be accounted for (unless some external
+ * actor is interfering with our allowed affinity while this
+ * test is running).
+ */
+ assert(sum == expected_sum);
+}
+
+bool this_cpu_buffer_push(struct percpu_buffer *buffer,
+ struct percpu_buffer_node *node,
+ int *_cpu)
+{
+ bool result = false;
+ int cpu;
+
+ for (;;) {
+ intptr_t *targetptr_spec, newval_spec;
+ intptr_t *targetptr_final, newval_final;
+ intptr_t offset;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+ if (offset == buffer->c[cpu].buflen)
+ break;
+ newval_spec = (intptr_t)node;
+ targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
+ newval_final = offset + 1;
+ targetptr_final = &buffer->c[cpu].offset;
+ if (opt_mb)
+ ret = rseq_cmpeqv_trystorev_storev_release(
+ targetptr_final, offset, targetptr_spec,
+ newval_spec, newval_final, cpu);
+ else
+ ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
+ offset, targetptr_spec, newval_spec,
+ newval_final, cpu);
+ if (rseq_likely(!ret)) {
+ result = true;
+ break;
+ }
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+ return result;
+}
+
+struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
+ int *_cpu)
+{
+ struct percpu_buffer_node *head;
+ int cpu;
+
+ for (;;) {
+ intptr_t *targetptr, newval;
+ intptr_t offset;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ /* Load offset with single-copy atomicity. */
+ offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+ if (offset == 0) {
+ head = NULL;
+ break;
+ }
+ head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
+ newval = offset - 1;
+ targetptr = (intptr_t *)&buffer->c[cpu].offset;
+ ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
+ (intptr_t *)&buffer->c[cpu].array[offset - 1],
+ (intptr_t)head, newval, cpu);
+ if (rseq_likely(!ret))
+ break;
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+ return head;
+}
+
+/*
+ * __percpu_buffer_pop is not safe against concurrent accesses. Should
+ * only be used on buffers that are not concurrently modified.
+ */
+struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
+ int cpu)
+{
+ struct percpu_buffer_node *head;
+ intptr_t offset;
+
+ offset = buffer->c[cpu].offset;
+ if (offset == 0)
+ return NULL;
+ head = buffer->c[cpu].array[offset - 1];
+ buffer->c[cpu].offset = offset - 1;
+ return head;
+}
+
+void *test_percpu_buffer_thread(void *arg)
+{
+ long long i, reps;
+ struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
+
+ if (!opt_disable_rseq && rseq_register_current_thread())
+ abort();
+
+ reps = opt_reps;
+ for (i = 0; i < reps; i++) {
+ struct percpu_buffer_node *node;
+
+ node = this_cpu_buffer_pop(buffer, NULL);
+ if (opt_yield)
+ sched_yield(); /* encourage shuffling */
+ if (node) {
+ if (!this_cpu_buffer_push(buffer, node, NULL)) {
+ /* Should increase buffer size. */
+ abort();
+ }
+ }
+ }
+
+ printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+ (int) rseq_gettid(), nr_abort, signals_delivered);
+ if (!opt_disable_rseq && rseq_unregister_current_thread())
+ abort();
+
+ return NULL;
+}
+
+/* Simultaneous modification to a per-cpu buffer from many threads. */
+void test_percpu_buffer(void)
+{
+ const int num_threads = opt_threads;
+ int i, j, ret;
+ uint64_t sum = 0, expected_sum = 0;
+ struct percpu_buffer buffer;
+ pthread_t test_threads[num_threads];
+ cpu_set_t allowed_cpus;
+
+ memset(&buffer, 0, sizeof(buffer));
+
+ /* Generate list entries for every usable cpu. */
+ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+ /* Worse-case is every item in same CPU. */
+ buffer.c[i].array =
+ malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
+ BUFFER_ITEM_PER_CPU);
+ assert(buffer.c[i].array);
+ buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
+ for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
+ struct percpu_buffer_node *node;
+
+ expected_sum += j;
+
+ /*
+ * We could theoretically put the word-sized
+ * "data" directly in the buffer. However, we
+ * want to model objects that would not fit
+ * within a single word, so allocate an object
+ * for each node.
+ */
+ node = malloc(sizeof(*node));
+ assert(node);
+ node->data = j;
+ buffer.c[i].array[j - 1] = node;
+ buffer.c[i].offset++;
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(&test_threads[i], NULL,
+ test_percpu_buffer_thread, &buffer);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(test_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_buffer_node *node;
+
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+
+ while ((node = __percpu_buffer_pop(&buffer, i))) {
+ sum += node->data;
+ free(node);
+ }
+ free(buffer.c[i].array);
+ }
+
+ /*
+ * All entries should now be accounted for (unless some external
+ * actor is interfering with our allowed affinity while this
+ * test is running).
+ */
+ assert(sum == expected_sum);
+}
+
+bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
+ struct percpu_memcpy_buffer_node item,
+ int *_cpu)
+{
+ bool result = false;
+ int cpu;
+
+ for (;;) {
+ intptr_t *targetptr_final, newval_final, offset;
+ char *destptr, *srcptr;
+ size_t copylen;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ /* Load offset with single-copy atomicity. */
+ offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+ if (offset == buffer->c[cpu].buflen)
+ break;
+ destptr = (char *)&buffer->c[cpu].array[offset];
+ srcptr = (char *)&item;
+ /* copylen must be <= 4kB. */
+ copylen = sizeof(item);
+ newval_final = offset + 1;
+ targetptr_final = &buffer->c[cpu].offset;
+ if (opt_mb)
+ ret = rseq_cmpeqv_trymemcpy_storev_release(
+ targetptr_final, offset,
+ destptr, srcptr, copylen,
+ newval_final, cpu);
+ else
+ ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
+ offset, destptr, srcptr, copylen,
+ newval_final, cpu);
+ if (rseq_likely(!ret)) {
+ result = true;
+ break;
+ }
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+ return result;
+}
+
+bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
+ struct percpu_memcpy_buffer_node *item,
+ int *_cpu)
+{
+ bool result = false;
+ int cpu;
+
+ for (;;) {
+ intptr_t *targetptr_final, newval_final, offset;
+ char *destptr, *srcptr;
+ size_t copylen;
+ int ret;
+
+ cpu = rseq_cpu_start();
+ /* Load offset with single-copy atomicity. */
+ offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
+ if (offset == 0)
+ break;
+ destptr = (char *)item;
+ srcptr = (char *)&buffer->c[cpu].array[offset - 1];
+ /* copylen must be <= 4kB. */
+ copylen = sizeof(*item);
+ newval_final = offset - 1;
+ targetptr_final = &buffer->c[cpu].offset;
+ ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
+ offset, destptr, srcptr, copylen,
+ newval_final, cpu);
+ if (rseq_likely(!ret)) {
+ result = true;
+ break;
+ }
+ /* Retry if comparison fails or rseq aborts. */
+ }
+ if (_cpu)
+ *_cpu = cpu;
+ return result;
+}
+
+/*
+ * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
+ * only be used on buffers that are not concurrently modified.
+ */
+bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
+ struct percpu_memcpy_buffer_node *item,
+ int cpu)
+{
+ intptr_t offset;
+
+ offset = buffer->c[cpu].offset;
+ if (offset == 0)
+ return false;
+ memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
+ buffer->c[cpu].offset = offset - 1;
+ return true;
+}
+
+void *test_percpu_memcpy_buffer_thread(void *arg)
+{
+ long long i, reps;
+ struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
+
+ if (!opt_disable_rseq && rseq_register_current_thread())
+ abort();
+
+ reps = opt_reps;
+ for (i = 0; i < reps; i++) {
+ struct percpu_memcpy_buffer_node item;
+ bool result;
+
+ result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
+ if (opt_yield)
+ sched_yield(); /* encourage shuffling */
+ if (result) {
+ if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
+ /* Should increase buffer size. */
+ abort();
+ }
+ }
+ }
+
+ printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
+ (int) rseq_gettid(), nr_abort, signals_delivered);
+ if (!opt_disable_rseq && rseq_unregister_current_thread())
+ abort();
+
+ return NULL;
+}
+
+/* Simultaneous modification to a per-cpu buffer from many threads. */
+void test_percpu_memcpy_buffer(void)
+{
+ const int num_threads = opt_threads;
+ int i, j, ret;
+ uint64_t sum = 0, expected_sum = 0;
+ struct percpu_memcpy_buffer buffer;
+ pthread_t test_threads[num_threads];
+ cpu_set_t allowed_cpus;
+
+ memset(&buffer, 0, sizeof(buffer));
+
+ /* Generate list entries for every usable cpu. */
+ sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+ /* Worse-case is every item in same CPU. */
+ buffer.c[i].array =
+ malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
+ MEMCPY_BUFFER_ITEM_PER_CPU);
+ assert(buffer.c[i].array);
+ buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
+ for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
+ expected_sum += 2 * j + 1;
+
+ /*
+ * We could theoretically put the word-sized
+ * "data" directly in the buffer. However, we
+ * want to model objects that would not fit
+ * within a single word, so allocate an object
+ * for each node.
+ */
+ buffer.c[i].array[j - 1].data1 = j;
+ buffer.c[i].array[j - 1].data2 = j + 1;
+ buffer.c[i].offset++;
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(&test_threads[i], NULL,
+ test_percpu_memcpy_buffer_thread,
+ &buffer);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(test_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_memcpy_buffer_node item;
+
+ if (!CPU_ISSET(i, &allowed_cpus))
+ continue;
+
+ while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
+ sum += item.data1;
+ sum += item.data2;
+ }
+ free(buffer.c[i].array);
+ }
+
+ /*
+ * All entries should now be accounted for (unless some external
+ * actor is interfering with our allowed affinity while this
+ * test is running).
+ */
+ assert(sum == expected_sum);
+}
+
+static void test_signal_interrupt_handler(int signo)
+{
+ signals_delivered++;
+}
+
+static int set_signal_handler(void)
+{
+ int ret = 0;
+ struct sigaction sa;
+ sigset_t sigset;
+
+ ret = sigemptyset(&sigset);
+ if (ret < 0) {
+ perror("sigemptyset");
+ return ret;
+ }
+
+ sa.sa_handler = test_signal_interrupt_handler;
+ sa.sa_mask = sigset;
+ sa.sa_flags = 0;
+ ret = sigaction(SIGUSR1, &sa, NULL);
+ if (ret < 0) {
+ perror("sigaction");
+ return ret;
+ }
+
+ printf_verbose("Signal handler set for SIGUSR1\n");
+
+ return ret;
+}
+
+static void show_usage(int argc, char **argv)
+{
+ printf("Usage : %s <OPTIONS>\n",
+ argv[0]);
+ printf("OPTIONS:\n");
+ printf(" [-1 loops] Number of loops for delay injection 1\n");
+ printf(" [-2 loops] Number of loops for delay injection 2\n");
+ printf(" [-3 loops] Number of loops for delay injection 3\n");
+ printf(" [-4 loops] Number of loops for delay injection 4\n");
+ printf(" [-5 loops] Number of loops for delay injection 5\n");
+ printf(" [-6 loops] Number of loops for delay injection 6\n");
+ printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
+ printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
+ printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
+ printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
+ printf(" [-y] Yield\n");
+ printf(" [-k] Kill thread with signal\n");
+ printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
+ printf(" [-t N] Number of threads (default 200)\n");
+ printf(" [-r N] Number of repetitions per thread (default 5000)\n");
+ printf(" [-d] Disable rseq system call (no initialization)\n");
+ printf(" [-D M] Disable rseq for each M threads\n");
+ printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
+ printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
+ printf(" [-v] Verbose output.\n");
+ printf(" [-h] Show this help.\n");
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ int i;
+
+ for (i = 1; i < argc; i++) {
+ if (argv[i][0] != '-')
+ continue;
+ switch (argv[i][1]) {
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
+ i++;
+ break;
+ case 'm':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ opt_modulo = atol(argv[i + 1]);
+ if (opt_modulo < 0) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ i++;
+ break;
+ case 's':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ opt_sleep = atol(argv[i + 1]);
+ if (opt_sleep < 0) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ i++;
+ break;
+ case 'y':
+ opt_yield = 1;
+ break;
+ case 'k':
+ opt_signal = 1;
+ break;
+ case 'd':
+ opt_disable_rseq = 1;
+ break;
+ case 'D':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ opt_disable_mod = atol(argv[i + 1]);
+ if (opt_disable_mod < 0) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ i++;
+ break;
+ case 't':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ opt_threads = atol(argv[i + 1]);
+ if (opt_threads < 0) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ i++;
+ break;
+ case 'r':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ opt_reps = atoll(argv[i + 1]);
+ if (opt_reps < 0) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ i++;
+ break;
+ case 'h':
+ show_usage(argc, argv);
+ goto end;
+ case 'T':
+ if (argc < i + 2) {
+ show_usage(argc, argv);
+ goto error;
+ }
+ opt_test = *argv[i + 1];
+ switch (opt_test) {
+ case 's':
+ case 'l':
+ case 'i':
+ case 'b':
+ case 'm':
+ break;
+ default:
+ show_usage(argc, argv);
+ goto error;
+ }
+ i++;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'M':
+ opt_mb = 1;
+ break;
+ default:
+ show_usage(argc, argv);
+ goto error;
+ }
+ }
+
+ loop_cnt_1 = loop_cnt[1];
+ loop_cnt_2 = loop_cnt[2];
+ loop_cnt_3 = loop_cnt[3];
+ loop_cnt_4 = loop_cnt[4];
+ loop_cnt_5 = loop_cnt[5];
+ loop_cnt_6 = loop_cnt[6];
+
+ if (set_signal_handler())
+ goto error;
+
+ if (!opt_disable_rseq && rseq_register_current_thread())
+ goto error;
+ switch (opt_test) {
+ case 's':
+ printf_verbose("spinlock\n");
+ test_percpu_spinlock();
+ break;
+ case 'l':
+ printf_verbose("linked list\n");
+ test_percpu_list();
+ break;
+ case 'b':
+ printf_verbose("buffer\n");
+ test_percpu_buffer();
+ break;
+ case 'm':
+ printf_verbose("memcpy buffer\n");
+ test_percpu_memcpy_buffer();
+ break;
+ case 'i':
+ printf_verbose("counter increment\n");
+ test_percpu_inc();
+ break;
+ }
+ if (!opt_disable_rseq && rseq_unregister_current_thread())
+ abort();
+end:
+ return 0;
+
+error:
+ return -1;
+}
diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
new file mode 100644
index 000000000..3cea19877
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -0,0 +1,716 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-arm.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#define RSEQ_SIG 0x53053053
+
+#define rseq_smp_mb() __asm__ __volatile__ ("dmb" ::: "memory", "cc")
+#define rseq_smp_rmb() __asm__ __volatile__ ("dmb" ::: "memory", "cc")
+#define rseq_smp_wmb() __asm__ __volatile__ ("dmb" ::: "memory", "cc")
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_smp_mb(); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ rseq_smp_mb(); \
+ RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \
+ post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "adr r0, " __rseq_str(cs_label) "\n\t" \
+ "str r0, %[" __rseq_str(rseq_cs) "]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ "ldr r0, %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "cmp %[" __rseq_str(cpu_id) "], r0\n\t" \
+ "bne " __rseq_str(label) "\n\t"
+
+#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+ abort_label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".balign 32\n\t" \
+ __rseq_str(table_label) ":\n\t" \
+ ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".word " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+ ".word " __rseq_str(RSEQ_SIG) "\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "b %l[" __rseq_str(abort_label) "]\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \
+ start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+ abort_label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "b %l[" __rseq_str(cmpfail_label) "]\n\t"
+
+#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("")
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[error2]\n\t"
+#endif
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expectnot], r0\n\t"
+ "beq %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expectnot], r0\n\t"
+ "beq %l[error2]\n\t"
+#endif
+ "str r0, %[load]\n\t"
+ "add r0, %[voffp]\n\t"
+ "ldr r0, [r0]\n\t"
+ /* final store */
+ "str r0, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "Ir" (voffp),
+ [load] "m" (*load)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ "ldr r0, %[v]\n\t"
+ "add r0, %[count]\n\t"
+ /* final store */
+ "str r0, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [count] "Ir" (count)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[error2]\n\t"
+#endif
+ /* try store */
+ "str %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[error2]\n\t"
+#endif
+ /* try store */
+ "str %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ "dmb\n\t" /* full mb provides store-release */
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ "ldr r0, %[v2]\n\t"
+ "cmp %[expect2], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[error2]\n\t"
+ "ldr r0, %[v2]\n\t"
+ "cmp %[expect2], r0\n\t"
+ "bne %l[error3]\n\t"
+#endif
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint32_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ "str %[src], %[rseq_scratch0]\n\t"
+ "str %[dst], %[rseq_scratch1]\n\t"
+ "str %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne 7f\n\t"
+#endif
+ /* try memcpy */
+ "cmp %[len], #0\n\t" \
+ "beq 333f\n\t" \
+ "222:\n\t" \
+ "ldrb %%r0, [%[src]]\n\t" \
+ "strb %%r0, [%[dst]]\n\t" \
+ "adds %[src], #1\n\t" \
+ "adds %[dst], #1\n\t" \
+ "subs %[len], #1\n\t" \
+ "bne 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t"
+ "b 8f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ abort, 1b, 2b, 4f)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ error2)
+#endif
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint32_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ "str %[src], %[rseq_scratch0]\n\t"
+ "str %[dst], %[rseq_scratch1]\n\t"
+ "str %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne 7f\n\t"
+#endif
+ /* try memcpy */
+ "cmp %[len], #0\n\t" \
+ "beq 333f\n\t" \
+ "222:\n\t" \
+ "ldrb %%r0, [%[src]]\n\t" \
+ "strb %%r0, [%[dst]]\n\t" \
+ "adds %[src], #1\n\t" \
+ "adds %[dst], #1\n\t" \
+ "subs %[len], #1\n\t" \
+ "bne 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ "dmb\n\t" /* full mb provides store-release */
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t"
+ "b 8f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ abort, 1b, 2b, 4f)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ error2)
+#endif
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
new file mode 100644
index 000000000..954f34671
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-arm64.h
@@ -0,0 +1,594 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-arm64.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2018 - Will Deacon <will.deacon@arm.com>
+ */
+
+#define RSEQ_SIG 0xd428bc00 /* BRK #0x45E0 */
+
+#define rseq_smp_mb() __asm__ __volatile__ ("dmb ish" ::: "memory")
+#define rseq_smp_rmb() __asm__ __volatile__ ("dmb ishld" ::: "memory")
+#define rseq_smp_wmb() __asm__ __volatile__ ("dmb ishst" ::: "memory")
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1; \
+ switch (sizeof(*p)) { \
+ case 1: \
+ asm volatile ("ldarb %w0, %1" \
+ : "=r" (*(__u8 *)p) \
+ : "Q" (*p) : "memory"); \
+ break; \
+ case 2: \
+ asm volatile ("ldarh %w0, %1" \
+ : "=r" (*(__u16 *)p) \
+ : "Q" (*p) : "memory"); \
+ break; \
+ case 4: \
+ asm volatile ("ldar %w0, %1" \
+ : "=r" (*(__u32 *)p) \
+ : "Q" (*p) : "memory"); \
+ break; \
+ case 8: \
+ asm volatile ("ldar %0, %1" \
+ : "=r" (*(__u64 *)p) \
+ : "Q" (*p) : "memory"); \
+ break; \
+ } \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ switch (sizeof(*p)) { \
+ case 1: \
+ asm volatile ("stlrb %w1, %0" \
+ : "=Q" (*p) \
+ : "r" ((__u8)v) \
+ : "memory"); \
+ break; \
+ case 2: \
+ asm volatile ("stlrh %w1, %0" \
+ : "=Q" (*p) \
+ : "r" ((__u16)v) \
+ : "memory"); \
+ break; \
+ case 4: \
+ asm volatile ("stlr %w1, %0" \
+ : "=Q" (*p) \
+ : "r" ((__u32)v) \
+ : "memory"); \
+ break; \
+ case 8: \
+ asm volatile ("stlr %1, %0" \
+ : "=Q" (*p) \
+ : "r" ((__u64)v) \
+ : "memory"); \
+ break; \
+ } \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define RSEQ_ASM_TMP_REG32 "w15"
+#define RSEQ_ASM_TMP_REG "x15"
+#define RSEQ_ASM_TMP_REG_2 "x14"
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \
+ post_commit_offset, abort_ip) \
+ " .pushsection __rseq_table, \"aw\"\n" \
+ " .balign 32\n" \
+ __rseq_str(label) ":\n" \
+ " .long " __rseq_str(version) ", " __rseq_str(flags) "\n" \
+ " .quad " __rseq_str(start_ip) ", " \
+ __rseq_str(post_commit_offset) ", " \
+ __rseq_str(abort_ip) "\n" \
+ " .popsection\n"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ " adrp " RSEQ_ASM_TMP_REG ", " __rseq_str(cs_label) "\n" \
+ " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \
+ ", :lo12:" __rseq_str(cs_label) "\n" \
+ " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(rseq_cs) "]\n" \
+ __rseq_str(label) ":\n"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \
+ " b 222f\n" \
+ " .inst " __rseq_str(RSEQ_SIG) "\n" \
+ __rseq_str(label) ":\n" \
+ " b %l[" __rseq_str(abort_label) "]\n" \
+ "222:\n"
+
+#define RSEQ_ASM_OP_STORE(value, var) \
+ " str %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_STORE_RELEASE(value, var) \
+ " stlr %[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
+ RSEQ_ASM_OP_STORE(value, var) \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label) \
+ RSEQ_ASM_OP_STORE_RELEASE(value, var) \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
+ " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \
+ " sub " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \
+ ", %[" __rseq_str(expect) "]\n" \
+ " cbnz " RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \
+ " ldr " RSEQ_ASM_TMP_REG32 ", %[" __rseq_str(var) "]\n" \
+ " sub " RSEQ_ASM_TMP_REG32 ", " RSEQ_ASM_TMP_REG32 \
+ ", %w[" __rseq_str(expect) "]\n" \
+ " cbnz " RSEQ_ASM_TMP_REG32 ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPNE(var, expect, label) \
+ " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \
+ " sub " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \
+ ", %[" __rseq_str(expect) "]\n" \
+ " cbz " RSEQ_ASM_TMP_REG ", " __rseq_str(label) "\n"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label)
+
+#define RSEQ_ASM_OP_R_LOAD(var) \
+ " ldr " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_STORE(var) \
+ " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \
+ " ldr " RSEQ_ASM_TMP_REG ", [" RSEQ_ASM_TMP_REG \
+ ", %[" __rseq_str(offset) "]]\n"
+
+#define RSEQ_ASM_OP_R_ADD(count) \
+ " add " RSEQ_ASM_TMP_REG ", " RSEQ_ASM_TMP_REG \
+ ", %[" __rseq_str(count) "]\n"
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \
+ " str " RSEQ_ASM_TMP_REG ", %[" __rseq_str(var) "]\n" \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) \
+ " cbz %[" __rseq_str(len) "], 333f\n" \
+ " mov " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(len) "]\n" \
+ "222: sub " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", #1\n" \
+ " ldrb " RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(src) "]" \
+ ", " RSEQ_ASM_TMP_REG_2 "]\n" \
+ " strb " RSEQ_ASM_TMP_REG32 ", [%[" __rseq_str(dst) "]" \
+ ", " RSEQ_ASM_TMP_REG_2 "]\n" \
+ " cbnz " RSEQ_ASM_TMP_REG_2 ", 222b\n" \
+ "333:\n"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "Qo" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2])
+#endif
+ RSEQ_ASM_OP_R_LOAD(v)
+ RSEQ_ASM_OP_R_STORE(load)
+ RSEQ_ASM_OP_R_LOAD_OFF(voffp)
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "Qo" (*v),
+ [expectnot] "r" (expectnot),
+ [load] "Qo" (*load),
+ [voffp] "r" (voffp)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ RSEQ_ASM_OP_R_LOAD(v)
+ RSEQ_ASM_OP_R_ADD(count)
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "Qo" (*v),
+ [count] "r" (count)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ RSEQ_ASM_OP_STORE(newv2, v2)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+ [v2] "Qo" (*v2),
+ [newv2] "r" (newv2)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ RSEQ_ASM_OP_STORE(newv2, v2)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+ [v2] "Qo" (*v2),
+ [newv2] "r" (newv2)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail])
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3])
+#endif
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "Qo" (*v),
+ [expect] "r" (expect),
+ [v2] "Qo" (*v2),
+ [expect2] "r" (expect2),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
new file mode 100644
index 000000000..7f48ecf46
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -0,0 +1,725 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Author: Paul Burton <paul.burton@mips.com>
+ * (C) Copyright 2018 MIPS Tech LLC
+ *
+ * Based on rseq-arm.h:
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#define RSEQ_SIG 0x53053053
+
+#define rseq_smp_mb() __asm__ __volatile__ ("sync" ::: "memory")
+#define rseq_smp_rmb() rseq_smp_mb()
+#define rseq_smp_wmb() rseq_smp_mb()
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_smp_mb(); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ rseq_smp_mb(); \
+ RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#if _MIPS_SZLONG == 64
+# define LONG ".dword"
+# define LONG_LA "dla"
+# define LONG_L "ld"
+# define LONG_S "sd"
+# define LONG_ADDI "daddiu"
+# define U32_U64_PAD(x) x
+#elif _MIPS_SZLONG == 32
+# define LONG ".word"
+# define LONG_LA "la"
+# define LONG_L "lw"
+# define LONG_S "sw"
+# define LONG_ADDI "addiu"
+# ifdef __BIG_ENDIAN
+# define U32_U64_PAD(x) "0x0, " x
+# else
+# define U32_U64_PAD(x) x ", 0x0"
+# endif
+#else
+# error unsupported _MIPS_SZLONG
+#endif
+
+#define __RSEQ_ASM_DEFINE_TABLE(version, flags, start_ip, \
+ post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ LONG_LA " $4, " __rseq_str(cs_label) "\n\t" \
+ LONG_S " $4, %[" __rseq_str(rseq_cs) "]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ "lw $4, %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "bne $4, %[" __rseq_str(cpu_id) "], " __rseq_str(label) "\n\t"
+
+#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+ abort_label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".balign 32\n\t" \
+ __rseq_str(table_label) ":\n\t" \
+ ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
+ LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+ ".word " __rseq_str(RSEQ_SIG) "\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "b %l[" __rseq_str(abort_label) "]\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \
+ start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+ abort_label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "b %l[" __rseq_str(cmpfail_label) "]\n\t"
+
+#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("")
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[error2]\n\t"
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "beq $4, %[expectnot], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "beq $4, %[expectnot], %l[error2]\n\t"
+#endif
+ LONG_S " $4, %[load]\n\t"
+ LONG_ADDI " $4, %[voffp]\n\t"
+ LONG_L " $4, 0($4)\n\t"
+ /* final store */
+ LONG_S " $4, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "Ir" (voffp),
+ [load] "m" (*load)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ LONG_L " $4, %[v]\n\t"
+ LONG_ADDI " $4, %[count]\n\t"
+ /* final store */
+ LONG_S " $4, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [count] "Ir" (count)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[error2]\n\t"
+#endif
+ /* try store */
+ LONG_S " %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[error2]\n\t"
+#endif
+ /* try store */
+ LONG_S " %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ "sync\n\t" /* full sync provides store-release */
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ LONG_L " $4, %[v2]\n\t"
+ "bne $4, %[expect2], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[error2]\n\t"
+ LONG_L " $4, %[v2]\n\t"
+ "bne $4, %[expect2], %l[error3]\n\t"
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uintptr_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ LONG_S " %[src], %[rseq_scratch0]\n\t"
+ LONG_S " %[dst], %[rseq_scratch1]\n\t"
+ LONG_S " %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], 7f\n\t"
+#endif
+ /* try memcpy */
+ "beqz %[len], 333f\n\t" \
+ "222:\n\t" \
+ "lb $4, 0(%[src])\n\t" \
+ "sb $4, 0(%[dst])\n\t" \
+ LONG_ADDI " %[src], 1\n\t" \
+ LONG_ADDI " %[dst], 1\n\t" \
+ LONG_ADDI " %[len], -1\n\t" \
+ "bnez %[len], 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t"
+ "b 8f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ abort, 1b, 2b, 4f)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error2)
+#endif
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uintptr_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ rseq_workaround_gcc_asm_size_guess();
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+ LONG_S " %[src], %[rseq_scratch0]\n\t"
+ LONG_S " %[dst], %[rseq_scratch1]\n\t"
+ LONG_S " %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], 7f\n\t"
+#endif
+ /* try memcpy */
+ "beqz %[len], 333f\n\t" \
+ "222:\n\t" \
+ "lb $4, 0(%[src])\n\t" \
+ "sb $4, 0(%[dst])\n\t" \
+ LONG_ADDI " %[src], 1\n\t" \
+ LONG_ADDI " %[dst], 1\n\t" \
+ LONG_ADDI " %[len], -1\n\t" \
+ "bnez %[len], 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ "sync\n\t" /* full sync provides store-release */
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t"
+ "b 8f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ abort, 1b, 2b, 4f)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error2)
+#endif
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_workaround_gcc_asm_size_guess();
+ return 0;
+abort:
+ rseq_workaround_gcc_asm_size_guess();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_workaround_gcc_asm_size_guess();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_workaround_gcc_asm_size_guess();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
new file mode 100644
index 000000000..52630c9f4
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -0,0 +1,671 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-ppc.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2016-2018 - Boqun Feng <boqun.feng@gmail.com>
+ */
+
+#define RSEQ_SIG 0x53053053
+
+#define rseq_smp_mb() __asm__ __volatile__ ("sync" ::: "memory", "cc")
+#define rseq_smp_lwsync() __asm__ __volatile__ ("lwsync" ::: "memory", "cc")
+#define rseq_smp_rmb() rseq_smp_lwsync()
+#define rseq_smp_wmb() rseq_smp_lwsync()
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_smp_lwsync(); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_lwsync()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ rseq_smp_lwsync(); \
+ RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+/*
+ * The __rseq_table section can be used by debuggers to better handle
+ * single-stepping through the restartable critical sections.
+ */
+
+#ifdef __PPC64__
+
+#define STORE_WORD "std "
+#define LOAD_WORD "ld "
+#define LOADX_WORD "ldx "
+#define CMP_WORD "cmpd "
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "lis %%r17, (" __rseq_str(cs_label) ")@highest\n\t" \
+ "ori %%r17, %%r17, (" __rseq_str(cs_label) ")@higher\n\t" \
+ "rldicr %%r17, %%r17, 32, 31\n\t" \
+ "oris %%r17, %%r17, (" __rseq_str(cs_label) ")@high\n\t" \
+ "ori %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \
+ "std %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#else /* #ifdef __PPC64__ */
+
+#define STORE_WORD "stw "
+#define LOAD_WORD "lwz "
+#define LOADX_WORD "lwzx "
+#define CMP_WORD "cmpw "
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ /* 32-bit only supported on BE */ \
+ ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t" \
+ "addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \
+ "stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#endif /* #ifdef __PPC64__ */
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ "lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t" \
+ "bne- cr7, " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ ".long " __rseq_str(RSEQ_SIG) "\n\t" \
+ __rseq_str(label) ":\n\t" \
+ "b %l[" __rseq_str(abort_label) "]\n\t" \
+ ".popsection\n\t"
+
+/*
+ * RSEQ_ASM_OPs: asm operations for rseq
+ * RSEQ_ASM_OP_R_*: has hard-code registers in it
+ * RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7)
+ */
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
+ LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
+ CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \
+ "bne- cr7, " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_OP_CMPNE(var, expectnot, label) \
+ LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
+ CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \
+ "beq- cr7, " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_OP_STORE(value, var) \
+ STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t"
+
+/* Load @var to r17 */
+#define RSEQ_ASM_OP_R_LOAD(var) \
+ LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+
+/* Store r17 to @var */
+#define RSEQ_ASM_OP_R_STORE(var) \
+ STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+
+/* Add @count to r17 */
+#define RSEQ_ASM_OP_R_ADD(count) \
+ "add %%r17, %[" __rseq_str(count) "], %%r17\n\t"
+
+/* Load (r17 + voffp) to r17 */
+#define RSEQ_ASM_OP_R_LOADX(voffp) \
+ LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t"
+
+/* TODO: implement a faster memcpy. */
+#define RSEQ_ASM_OP_R_MEMCPY() \
+ "cmpdi %%r19, 0\n\t" \
+ "beq 333f\n\t" \
+ "addi %%r20, %%r20, -1\n\t" \
+ "addi %%r21, %%r21, -1\n\t" \
+ "222:\n\t" \
+ "lbzu %%r18, 1(%%r20)\n\t" \
+ "stbu %%r18, 1(%%r21)\n\t" \
+ "addi %%r19, %%r19, -1\n\t" \
+ "cmpdi %%r19, 0\n\t" \
+ "bne 222b\n\t" \
+ "333:\n\t" \
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \
+ STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
+ __rseq_str(post_commit_label) ":\n\t"
+
+#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
+ STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \
+ __rseq_str(post_commit_label) ":\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v not equal to @expectnot */
+ RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v not equal to @expectnot */
+ RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2])
+#endif
+ /* load the value of @v */
+ RSEQ_ASM_OP_R_LOAD(v)
+ /* store it in @load */
+ RSEQ_ASM_OP_R_STORE(load)
+ /* dereference voffp(v) */
+ RSEQ_ASM_OP_R_LOADX(voffp)
+ /* final store the value at voffp(v) */
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "b" (voffp),
+ [load] "m" (*load)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ /* load the value of @v */
+ RSEQ_ASM_OP_R_LOAD(v)
+ /* add @count to it */
+ RSEQ_ASM_OP_R_ADD(count)
+ /* final store */
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "r" (count)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ /* try store */
+ RSEQ_ASM_OP_STORE(newv2, v2)
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ /* try store */
+ RSEQ_ASM_OP_STORE(newv2, v2)
+ RSEQ_INJECT_ASM(5)
+ /* for 'release' */
+ "lwsync\n\t"
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+ /* cmp @v2 equal to @expct2 */
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail])
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+ /* cmp @v2 equal to @expct2 */
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3])
+#endif
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* setup for mempcy */
+ "mr %%r19, %[len]\n\t"
+ "mr %%r20, %[src]\n\t"
+ "mr %%r21, %[dst]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ /* try memcpy */
+ RSEQ_ASM_OP_R_MEMCPY()
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17", "r18", "r19", "r20", "r21"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* setup for mempcy */
+ "mr %%r19, %[len]\n\t"
+ "mr %%r20, %[src]\n\t"
+ "mr %%r21, %[dst]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ /* try memcpy */
+ RSEQ_ASM_OP_R_MEMCPY()
+ RSEQ_INJECT_ASM(5)
+ /* for 'release' */
+ "lwsync\n\t"
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17", "r18", "r19", "r20", "r21"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#undef STORE_WORD
+#undef LOAD_WORD
+#undef LOADX_WORD
+#undef CMP_WORD
+
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
new file mode 100644
index 000000000..1069e8525
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -0,0 +1,513 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#define RSEQ_SIG 0x53053053
+
+#define rseq_smp_mb() __asm__ __volatile__ ("bcr 15,0" ::: "memory")
+#define rseq_smp_rmb() rseq_smp_mb()
+#define rseq_smp_wmb() rseq_smp_mb()
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_barrier(); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ rseq_barrier(); \
+ RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#ifdef __s390x__
+
+#define LONG_L "lg"
+#define LONG_S "stg"
+#define LONG_LT_R "ltgr"
+#define LONG_CMP "cg"
+#define LONG_CMP_R "cgr"
+#define LONG_ADDI "aghi"
+#define LONG_ADD_R "agr"
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t"
+
+#elif __s390__
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".long 0x0, " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t"
+
+#define LONG_L "l"
+#define LONG_S "st"
+#define LONG_LT_R "ltr"
+#define LONG_CMP "c"
+#define LONG_CMP_R "cr"
+#define LONG_ADDI "ahi"
+#define LONG_ADD_R "ar"
+
+#endif
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "larl %%r0, " __rseq_str(cs_label) "\n\t" \
+ LONG_S " %%r0, %[" __rseq_str(rseq_cs) "]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ "c %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "jnz " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ ".long " __rseq_str(RSEQ_SIG) "\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "j %l[" __rseq_str(abort_label) "]\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "j %l[" __rseq_str(cmpfail_label) "]\n\t" \
+ ".popsection\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " %%r1, %[v]\n\t"
+ LONG_CMP_R " %%r1, %[expectnot]\n\t"
+ "je %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " %%r1, %[v]\n\t"
+ LONG_CMP_R " %%r1, %[expectnot]\n\t"
+ "je %l[error2]\n\t"
+#endif
+ LONG_S " %%r1, %[load]\n\t"
+ LONG_ADD_R " %%r1, %[voffp]\n\t"
+ LONG_L " %%r1, 0(%%r1)\n\t"
+ /* final store */
+ LONG_S " %%r1, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "r" (voffp),
+ [load] "m" (*load)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0", "r1"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ LONG_L " %%r0, %[v]\n\t"
+ LONG_ADD_R " %%r0, %[count]\n\t"
+ /* final store */
+ LONG_S " %%r0, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "r" (count)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* try store */
+ LONG_S " %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* s390 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu);
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ LONG_CMP " %[expect2], %[v2]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[error2]\n\t"
+ LONG_CMP " %[expect2], %[v2]\n\t"
+ "jnz %l[error3]\n\t"
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint64_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ LONG_S " %[src], %[rseq_scratch0]\n\t"
+ LONG_S " %[dst], %[rseq_scratch1]\n\t"
+ LONG_S " %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz 7f\n\t"
+#endif
+ /* try memcpy */
+ LONG_LT_R " %[len], %[len]\n\t"
+ "jz 333f\n\t"
+ "222:\n\t"
+ "ic %%r0,0(%[src])\n\t"
+ "stc %%r0,0(%[dst])\n\t"
+ LONG_ADDI " %[src], 1\n\t"
+ LONG_ADDI " %[dst], 1\n\t"
+ LONG_ADDI " %[len], -1\n\t"
+ "jnz 222b\n\t"
+ "333:\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t"
+ RSEQ_ASM_DEFINE_ABORT(4,
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ abort)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error2)
+#endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* s390 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len,
+ newv, cpu);
+}
+#endif /* !RSEQ_SKIP_FASTPATH */
diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h
new file mode 100644
index 000000000..72750b590
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-skip.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-skip.h
+ *
+ * (C) Copyright 2017-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ return -1;
+}
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
new file mode 100644
index 000000000..089410a31
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -0,0 +1,1132 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-x86.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#include <stdint.h>
+
+#define RSEQ_SIG 0x53053053
+
+#ifdef __x86_64__
+
+#define rseq_smp_mb() \
+ __asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
+#define rseq_smp_rmb() rseq_barrier()
+#define rseq_smp_wmb() rseq_barrier()
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_barrier(); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ rseq_barrier(); \
+ RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(post_commit_offset) ", " __rseq_str(abort_ip) "\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "leaq " __rseq_str(cs_label) "(%%rip), %%rax\n\t" \
+ "movq %%rax, %[" __rseq_str(rseq_cs) "]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "jnz " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ /* Disassembler-friendly signature: nopl <sig>(%rip). */\
+ ".byte 0x0f, 0x1f, 0x05\n\t" \
+ ".long " __rseq_str(RSEQ_SIG) "\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "jmp %l[" __rseq_str(abort_label) "]\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \
+ ".popsection\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* final store */
+ "movq %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "movq %[v], %%rbx\n\t"
+ "cmpq %%rbx, %[expectnot]\n\t"
+ "je %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "movq %[v], %%rbx\n\t"
+ "cmpq %%rbx, %[expectnot]\n\t"
+ "je %l[error2]\n\t"
+#endif
+ "movq %%rbx, %[load]\n\t"
+ "addq %[voffp], %%rbx\n\t"
+ "movq (%%rbx), %%rbx\n\t"
+ /* final store */
+ "movq %%rbx, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "er" (voffp),
+ [load] "m" (*load)
+ : "memory", "cc", "rax", "rbx"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ /* final store */
+ "addq %[count], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "er" (count)
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* try store */
+ "movq %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ "movq %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* x86-64 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu);
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ "cmpq %[v2], %[expect2]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+ "cmpq %[v2], %[expect2]\n\t"
+ "jnz %l[error3]\n\t"
+#endif
+ /* final store */
+ "movq %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint64_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ "movq %[src], %[rseq_scratch0]\n\t"
+ "movq %[dst], %[rseq_scratch1]\n\t"
+ "movq %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz 7f\n\t"
+#endif
+ /* try memcpy */
+ "test %[len], %[len]\n\t" \
+ "jz 333f\n\t" \
+ "222:\n\t" \
+ "movb (%[src]), %%al\n\t" \
+ "movb %%al, (%[dst])\n\t" \
+ "inc %[src]\n\t" \
+ "inc %[dst]\n\t" \
+ "dec %[len]\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ "movq %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t"
+ RSEQ_ASM_DEFINE_ABORT(4,
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t",
+ abort)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t",
+ error2)
+#endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* x86-64 is TSO. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len,
+ newv, cpu);
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
+
+#elif __i386__
+
+#define rseq_smp_mb() \
+ __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
+#define rseq_smp_rmb() \
+ __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
+#define rseq_smp_wmb() \
+ __asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_smp_mb(); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ rseq_smp_mb(); \
+ RSEQ_WRITE_ONCE(*p, v); \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+/*
+ * Use eax as scratch register and take memory operands as input to
+ * lessen register pressure. Especially needed when compiling in O0.
+ */
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
+ start_ip, post_commit_offset, abort_ip) \
+ ".pushsection __rseq_table, \"aw\"\n\t" \
+ ".balign 32\n\t" \
+ __rseq_str(label) ":\n\t" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+ ".long " __rseq_str(start_ip) ", 0x0, " __rseq_str(post_commit_offset) ", 0x0, " __rseq_str(abort_ip) ", 0x0\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
+ (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "movl $" __rseq_str(cs_label) ", %[rseq_cs]\n\t" \
+ __rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ "cmpl %[" __rseq_str(cpu_id) "], %[" __rseq_str(current_cpu_id) "]\n\t" \
+ "jnz " __rseq_str(label) "\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, teardown, abort_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ /* Disassembler-friendly signature: nopl <sig>. */ \
+ ".byte 0x0f, 0x1f, 0x05\n\t" \
+ ".long " __rseq_str(RSEQ_SIG) "\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "jmp %l[" __rseq_str(abort_label) "]\n\t" \
+ ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+ ".pushsection __rseq_failure, \"ax\"\n\t" \
+ __rseq_str(label) ":\n\t" \
+ teardown \
+ "jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \
+ ".popsection\n\t"
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* final store */
+ "movl %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[v], %%ebx\n\t"
+ "cmpl %%ebx, %[expectnot]\n\t"
+ "je %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "movl %[v], %%ebx\n\t"
+ "cmpl %%ebx, %[expectnot]\n\t"
+ "je %l[error2]\n\t"
+#endif
+ "movl %%ebx, %[load]\n\t"
+ "addl %[voffp], %%ebx\n\t"
+ "movl (%%ebx), %%ebx\n\t"
+ /* final store */
+ "movl %%ebx, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "ir" (voffp),
+ [load] "m" (*load)
+ : "memory", "cc", "eax", "ebx"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ /* final store */
+ "addl %[count], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "ir" (count)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* try store */
+ "movl %[newv2], %%eax\n\t"
+ "movl %%eax, %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ "movl %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "m" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %[v], %%eax\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "movl %[expect], %%eax\n\t"
+ "cmpl %[v], %%eax\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* try store */
+ "movl %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ "lock; addl $0,-128(%%esp)\n\t"
+ /* final store */
+ "movl %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "m" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ "cmpl %[expect2], %[v2]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+ "cmpl %[expect2], %[v2]\n\t"
+ "jnz %l[error3]\n\t"
+#endif
+ "movl %[newv], %%eax\n\t"
+ /* final store */
+ "movl %%eax, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "m" (newv)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+/* TODO: implement a faster memcpy. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint32_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ "movl %[src], %[rseq_scratch0]\n\t"
+ "movl %[dst], %[rseq_scratch1]\n\t"
+ "movl %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 7f\n\t"
+#endif
+ /* try memcpy */
+ "test %[len], %[len]\n\t" \
+ "jz 333f\n\t" \
+ "222:\n\t" \
+ "movb (%[src]), %%al\n\t" \
+ "movb %%al, (%[dst])\n\t" \
+ "inc %[src]\n\t" \
+ "inc %[dst]\n\t" \
+ "dec %[len]\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ "movl %[newv], %%eax\n\t"
+ /* final store */
+ "movl %%eax, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t"
+ RSEQ_ASM_DEFINE_ABORT(4,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ abort)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ error2)
+#endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "m" (expect),
+ [newv] "m" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* TODO: implement a faster memcpy. */
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint32_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ "movl %[src], %[rseq_scratch0]\n\t"
+ "movl %[dst], %[rseq_scratch1]\n\t"
+ "movl %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 7f\n\t"
+#endif
+ /* try memcpy */
+ "test %[len], %[len]\n\t" \
+ "jz 333f\n\t" \
+ "222:\n\t" \
+ "movb (%[src]), %%al\n\t" \
+ "movb %%al, (%[dst])\n\t" \
+ "inc %[src]\n\t" \
+ "inc %[dst]\n\t" \
+ "dec %[len]\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ "lock; addl $0,-128(%%esp)\n\t"
+ "movl %[newv], %%eax\n\t"
+ /* final store */
+ "movl %%eax, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t"
+ RSEQ_ASM_DEFINE_ABORT(4,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ abort)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ error2)
+#endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (__rseq_abi.cpu_id),
+ [rseq_cs] "m" (__rseq_abi.rseq_cs),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "m" (expect),
+ [newv] "m" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
new file mode 100644
index 000000000..4847e97ed
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * rseq.c
+ *
+ * Copyright (C) 2016 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; only
+ * version 2.1 of the License.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <assert.h>
+#include <signal.h>
+
+#include "rseq.h"
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+__attribute__((tls_model("initial-exec"))) __thread
+volatile struct rseq __rseq_abi = {
+ .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
+};
+
+static __attribute__((tls_model("initial-exec"))) __thread
+volatile int refcount;
+
+static void signal_off_save(sigset_t *oldset)
+{
+ sigset_t set;
+ int ret;
+
+ sigfillset(&set);
+ ret = pthread_sigmask(SIG_BLOCK, &set, oldset);
+ if (ret)
+ abort();
+}
+
+static void signal_restore(sigset_t oldset)
+{
+ int ret;
+
+ ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+ if (ret)
+ abort();
+}
+
+static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len,
+ int flags, uint32_t sig)
+{
+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
+}
+
+int rseq_register_current_thread(void)
+{
+ int rc, ret = 0;
+ sigset_t oldset;
+
+ signal_off_save(&oldset);
+ if (refcount++)
+ goto end;
+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
+ if (!rc) {
+ assert(rseq_current_cpu_raw() >= 0);
+ goto end;
+ }
+ if (errno != EBUSY)
+ __rseq_abi.cpu_id = -2;
+ ret = -1;
+ refcount--;
+end:
+ signal_restore(oldset);
+ return ret;
+}
+
+int rseq_unregister_current_thread(void)
+{
+ int rc, ret = 0;
+ sigset_t oldset;
+
+ signal_off_save(&oldset);
+ if (--refcount)
+ goto end;
+ rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
+ RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
+ if (!rc)
+ goto end;
+ ret = -1;
+end:
+ signal_restore(oldset);
+ return ret;
+}
+
+int32_t rseq_fallback_current_cpu(void)
+{
+ int32_t cpu;
+
+ cpu = sched_getcpu();
+ if (cpu < 0) {
+ perror("sched_getcpu()");
+ abort();
+ }
+ return cpu;
+}
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
new file mode 100644
index 000000000..c72eb70f9
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef RSEQ_H
+#define RSEQ_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sched.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sched.h>
+#include <linux/rseq.h>
+
+/*
+ * Empty code injection macros, override when testing.
+ * It is important to consider that the ASM injection macros need to be
+ * fully reentrant (e.g. do not modify the stack).
+ */
+#ifndef RSEQ_INJECT_ASM
+#define RSEQ_INJECT_ASM(n)
+#endif
+
+#ifndef RSEQ_INJECT_C
+#define RSEQ_INJECT_C(n)
+#endif
+
+#ifndef RSEQ_INJECT_INPUT
+#define RSEQ_INJECT_INPUT
+#endif
+
+#ifndef RSEQ_INJECT_CLOBBER
+#define RSEQ_INJECT_CLOBBER
+#endif
+
+#ifndef RSEQ_INJECT_FAILED
+#define RSEQ_INJECT_FAILED
+#endif
+
+extern __thread volatile struct rseq __rseq_abi;
+
+#define rseq_likely(x) __builtin_expect(!!(x), 1)
+#define rseq_unlikely(x) __builtin_expect(!!(x), 0)
+#define rseq_barrier() __asm__ __volatile__("" : : : "memory")
+
+#define RSEQ_ACCESS_ONCE(x) (*(__volatile__ __typeof__(x) *)&(x))
+#define RSEQ_WRITE_ONCE(x, v) __extension__ ({ RSEQ_ACCESS_ONCE(x) = (v); })
+#define RSEQ_READ_ONCE(x) RSEQ_ACCESS_ONCE(x)
+
+#define __rseq_str_1(x) #x
+#define __rseq_str(x) __rseq_str_1(x)
+
+#define rseq_log(fmt, args...) \
+ fprintf(stderr, fmt "(in %s() at " __FILE__ ":" __rseq_str(__LINE__)"\n", \
+ ## args, __func__)
+
+#define rseq_bug(fmt, args...) \
+ do { \
+ rseq_log(fmt, ##args); \
+ abort(); \
+ } while (0)
+
+#if defined(__x86_64__) || defined(__i386__)
+#include <rseq-x86.h>
+#elif defined(__ARMEL__)
+#include <rseq-arm.h>
+#elif defined (__AARCH64EL__)
+#include <rseq-arm64.h>
+#elif defined(__PPC__)
+#include <rseq-ppc.h>
+#elif defined(__mips__)
+#include <rseq-mips.h>
+#elif defined(__s390__)
+#include <rseq-s390.h>
+#else
+#error unsupported target
+#endif
+
+/*
+ * Register rseq for the current thread. This needs to be called once
+ * by any thread which uses restartable sequences, before they start
+ * using restartable sequences, to ensure restartable sequences
+ * succeed. A restartable sequence executed from a non-registered
+ * thread will always fail.
+ */
+int rseq_register_current_thread(void);
+
+/*
+ * Unregister rseq for current thread.
+ */
+int rseq_unregister_current_thread(void);
+
+/*
+ * Restartable sequence fallback for reading the current CPU number.
+ */
+int32_t rseq_fallback_current_cpu(void);
+
+/*
+ * Values returned can be either the current CPU number, -1 (rseq is
+ * uninitialized), or -2 (rseq initialization has failed).
+ */
+static inline int32_t rseq_current_cpu_raw(void)
+{
+ return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id);
+}
+
+/*
+ * Returns a possible CPU number, which is typically the current CPU.
+ * The returned CPU number can be used to prepare for an rseq critical
+ * section, which will confirm whether the cpu number is indeed the
+ * current one, and whether rseq is initialized.
+ *
+ * The CPU number returned by rseq_cpu_start should always be validated
+ * by passing it to a rseq asm sequence, or by comparing it to the
+ * return value of rseq_current_cpu_raw() if the rseq asm sequence
+ * does not need to be invoked.
+ */
+static inline uint32_t rseq_cpu_start(void)
+{
+ return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
+}
+
+static inline uint32_t rseq_current_cpu(void)
+{
+ int32_t cpu;
+
+ cpu = rseq_current_cpu_raw();
+ if (rseq_unlikely(cpu < 0))
+ cpu = rseq_fallback_current_cpu();
+ return cpu;
+}
+
+static inline void rseq_clear_rseq_cs(void)
+{
+#ifdef __LP64__
+ __rseq_abi.rseq_cs.ptr = 0;
+#else
+ __rseq_abi.rseq_cs.ptr.ptr32 = 0;
+#endif
+}
+
+/*
+ * rseq_prepare_unload() should be invoked by each thread executing a rseq
+ * critical section at least once between their last critical section and
+ * library unload of the library defining the rseq critical section
+ * (struct rseq_cs). This also applies to use of rseq in code generated by
+ * JIT: rseq_prepare_unload() should be invoked at least once by each
+ * thread executing a rseq critical section before reclaim of the memory
+ * holding the struct rseq_cs.
+ */
+static inline void rseq_prepare_unload(void)
+{
+ rseq_clear_rseq_cs();
+}
+
+#endif /* RSEQ_H_ */
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
new file mode 100755
index 000000000..3acd6d75f
--- /dev/null
+++ b/tools/testing/selftests/rseq/run_param_test.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+ or MIT
+
+EXTRA_ARGS=${@}
+
+OLDIFS="$IFS"
+IFS=$'\n'
+TEST_LIST=(
+ "-T s"
+ "-T l"
+ "-T b"
+ "-T b -M"
+ "-T m"
+ "-T m -M"
+ "-T i"
+)
+
+TEST_NAME=(
+ "spinlock"
+ "list"
+ "buffer"
+ "buffer with barrier"
+ "memcpy"
+ "memcpy with barrier"
+ "increment"
+)
+IFS="$OLDIFS"
+
+REPS=1000
+SLOW_REPS=100
+
+function do_tests()
+{
+ local i=0
+ while [ "$i" -lt "${#TEST_LIST[@]}" ]; do
+ echo "Running test ${TEST_NAME[$i]}"
+ ./param_test ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
+ echo "Running compare-twice test ${TEST_NAME[$i]}"
+ ./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} ${@} ${EXTRA_ARGS} || exit 1
+ let "i++"
+ done
+}
+
+echo "Default parameters"
+do_tests
+
+echo "Loop injection: 10000 loops"
+
+OLDIFS="$IFS"
+IFS=$'\n'
+INJECT_LIST=(
+ "1"
+ "2"
+ "3"
+ "4"
+ "5"
+ "6"
+ "7"
+ "8"
+ "9"
+)
+IFS="$OLDIFS"
+
+NR_LOOPS=10000
+
+i=0
+while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do
+ echo "Injecting at <${INJECT_LIST[$i]}>"
+ do_tests -${INJECT_LIST[i]} ${NR_LOOPS}
+ let "i++"
+done
+NR_LOOPS=
+
+function inject_blocking()
+{
+ OLDIFS="$IFS"
+ IFS=$'\n'
+ INJECT_LIST=(
+ "7"
+ "8"
+ "9"
+ )
+ IFS="$OLDIFS"
+
+ NR_LOOPS=-1
+
+ i=0
+ while [ "$i" -lt "${#INJECT_LIST[@]}" ]; do
+ echo "Injecting at <${INJECT_LIST[$i]}>"
+ do_tests -${INJECT_LIST[i]} -1 ${@}
+ let "i++"
+ done
+ NR_LOOPS=
+}
+
+echo "Yield injection (25%)"
+inject_blocking -m 4 -y
+
+echo "Yield injection (50%)"
+inject_blocking -m 2 -y
+
+echo "Yield injection (100%)"
+inject_blocking -m 1 -y
+
+echo "Kill injection (25%)"
+inject_blocking -m 4 -k
+
+echo "Kill injection (50%)"
+inject_blocking -m 2 -k
+
+echo "Kill injection (100%)"
+inject_blocking -m 1 -k
+
+echo "Sleep injection (1ms, 25%)"
+inject_blocking -m 4 -s 1
+
+echo "Sleep injection (1ms, 50%)"
+inject_blocking -m 2 -s 1
+
+echo "Sleep injection (1ms, 100%)"
+inject_blocking -m 1 -s 1
diff --git a/tools/testing/selftests/rseq/settings b/tools/testing/selftests/rseq/settings
new file mode 100644
index 000000000..e7b941753
--- /dev/null
+++ b/tools/testing/selftests/rseq/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore
new file mode 100644
index 000000000..d0ad44f62
--- /dev/null
+++ b/tools/testing/selftests/rtc/.gitignore
@@ -0,0 +1,2 @@
+rtctest
+setdate
diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile
new file mode 100644
index 000000000..de9c85666
--- /dev/null
+++ b/tools/testing/selftests/rtc/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O3 -Wl,-no-as-needed -Wall
+LDFLAGS += -lrt -lpthread -lm
+
+TEST_GEN_PROGS = rtctest
+
+TEST_GEN_PROGS_EXTENDED = setdate
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c
new file mode 100644
index 000000000..b2065536d
--- /dev/null
+++ b/tools/testing/selftests/rtc/rtctest.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Real Time Clock Driver Test Program
+ *
+ * Copyright (c) 2018 Alexandre Belloni <alexandre.belloni@bootlin.com>
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/rtc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#define NUM_UIE 3
+#define ALARM_DELTA 3
+
+static char *rtc_file = "/dev/rtc0";
+
+FIXTURE(rtc) {
+ int fd;
+};
+
+FIXTURE_SETUP(rtc) {
+ self->fd = open(rtc_file, O_RDONLY);
+ ASSERT_NE(-1, self->fd);
+}
+
+FIXTURE_TEARDOWN(rtc) {
+ close(self->fd);
+}
+
+TEST_F(rtc, date_read) {
+ int rc;
+ struct rtc_time rtc_tm;
+
+ /* Read the RTC time/date */
+ rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm);
+ ASSERT_NE(-1, rc);
+
+ TH_LOG("Current RTC date/time is %02d/%02d/%02d %02d:%02d:%02d.",
+ rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900,
+ rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
+}
+
+TEST_F(rtc, uie_read) {
+ int i, rc, irq = 0;
+ unsigned long data;
+
+ /* Turn on update interrupts */
+ rc = ioctl(self->fd, RTC_UIE_ON, 0);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip update IRQs not supported.");
+ return;
+ }
+
+ for (i = 0; i < NUM_UIE; i++) {
+ /* This read will block */
+ rc = read(self->fd, &data, sizeof(data));
+ ASSERT_NE(-1, rc);
+ irq++;
+ }
+
+ EXPECT_EQ(NUM_UIE, irq);
+
+ rc = ioctl(self->fd, RTC_UIE_OFF, 0);
+ ASSERT_NE(-1, rc);
+}
+
+TEST_F(rtc, uie_select) {
+ int i, rc, irq = 0;
+ unsigned long data;
+
+ /* Turn on update interrupts */
+ rc = ioctl(self->fd, RTC_UIE_ON, 0);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip update IRQs not supported.");
+ return;
+ }
+
+ for (i = 0; i < NUM_UIE; i++) {
+ struct timeval tv = { .tv_sec = 2 };
+ fd_set readfds;
+
+ FD_ZERO(&readfds);
+ FD_SET(self->fd, &readfds);
+ /* The select will wait until an RTC interrupt happens. */
+ rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+ ASSERT_NE(-1, rc);
+ ASSERT_NE(0, rc);
+
+ /* This read won't block */
+ rc = read(self->fd, &data, sizeof(unsigned long));
+ ASSERT_NE(-1, rc);
+ irq++;
+ }
+
+ EXPECT_EQ(NUM_UIE, irq);
+
+ rc = ioctl(self->fd, RTC_UIE_OFF, 0);
+ ASSERT_NE(-1, rc);
+}
+
+TEST_F(rtc, alarm_alm_set) {
+ struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
+ unsigned long data;
+ struct rtc_time tm;
+ fd_set readfds;
+ time_t secs, new;
+ int rc;
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ secs = timegm((struct tm *)&tm) + ALARM_DELTA;
+ gmtime_r(&secs, (struct tm *)&tm);
+
+ rc = ioctl(self->fd, RTC_ALM_SET, &tm);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip alarms are not supported.");
+ return;
+ }
+
+ rc = ioctl(self->fd, RTC_ALM_READ, &tm);
+ ASSERT_NE(-1, rc);
+
+ TH_LOG("Alarm time now set to %02d:%02d:%02d.",
+ tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+ /* Enable alarm interrupts */
+ rc = ioctl(self->fd, RTC_AIE_ON, 0);
+ ASSERT_NE(-1, rc);
+
+ FD_ZERO(&readfds);
+ FD_SET(self->fd, &readfds);
+
+ rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+ ASSERT_NE(-1, rc);
+ ASSERT_NE(0, rc);
+
+ /* Disable alarm interrupts */
+ rc = ioctl(self->fd, RTC_AIE_OFF, 0);
+ ASSERT_NE(-1, rc);
+
+ rc = read(self->fd, &data, sizeof(unsigned long));
+ ASSERT_NE(-1, rc);
+ TH_LOG("data: %lx", data);
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ new = timegm((struct tm *)&tm);
+ ASSERT_EQ(new, secs);
+}
+
+TEST_F(rtc, alarm_wkalm_set) {
+ struct timeval tv = { .tv_sec = ALARM_DELTA + 2 };
+ struct rtc_wkalrm alarm = { 0 };
+ struct rtc_time tm;
+ unsigned long data;
+ fd_set readfds;
+ time_t secs, new;
+ int rc;
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time);
+ ASSERT_NE(-1, rc);
+
+ secs = timegm((struct tm *)&alarm.time) + ALARM_DELTA;
+ gmtime_r(&secs, (struct tm *)&alarm.time);
+
+ alarm.enabled = 1;
+
+ rc = ioctl(self->fd, RTC_WKALM_SET, &alarm);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip alarms are not supported.");
+ return;
+ }
+
+ rc = ioctl(self->fd, RTC_WKALM_RD, &alarm);
+ ASSERT_NE(-1, rc);
+
+ TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.",
+ alarm.time.tm_mday, alarm.time.tm_mon + 1,
+ alarm.time.tm_year + 1900, alarm.time.tm_hour,
+ alarm.time.tm_min, alarm.time.tm_sec);
+
+ FD_ZERO(&readfds);
+ FD_SET(self->fd, &readfds);
+
+ rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+ ASSERT_NE(-1, rc);
+ ASSERT_NE(0, rc);
+
+ rc = read(self->fd, &data, sizeof(unsigned long));
+ ASSERT_NE(-1, rc);
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ new = timegm((struct tm *)&tm);
+ ASSERT_EQ(new, secs);
+}
+
+TEST_F(rtc, alarm_alm_set_minute) {
+ struct timeval tv = { .tv_sec = 62 };
+ unsigned long data;
+ struct rtc_time tm;
+ fd_set readfds;
+ time_t secs, new;
+ int rc;
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ secs = timegm((struct tm *)&tm) + 60 - tm.tm_sec;
+ gmtime_r(&secs, (struct tm *)&tm);
+
+ rc = ioctl(self->fd, RTC_ALM_SET, &tm);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip alarms are not supported.");
+ return;
+ }
+
+ rc = ioctl(self->fd, RTC_ALM_READ, &tm);
+ ASSERT_NE(-1, rc);
+
+ TH_LOG("Alarm time now set to %02d:%02d:%02d.",
+ tm.tm_hour, tm.tm_min, tm.tm_sec);
+
+ /* Enable alarm interrupts */
+ rc = ioctl(self->fd, RTC_AIE_ON, 0);
+ ASSERT_NE(-1, rc);
+
+ FD_ZERO(&readfds);
+ FD_SET(self->fd, &readfds);
+
+ rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+ ASSERT_NE(-1, rc);
+ ASSERT_NE(0, rc);
+
+ /* Disable alarm interrupts */
+ rc = ioctl(self->fd, RTC_AIE_OFF, 0);
+ ASSERT_NE(-1, rc);
+
+ rc = read(self->fd, &data, sizeof(unsigned long));
+ ASSERT_NE(-1, rc);
+ TH_LOG("data: %lx", data);
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ new = timegm((struct tm *)&tm);
+ ASSERT_EQ(new, secs);
+}
+
+TEST_F(rtc, alarm_wkalm_set_minute) {
+ struct timeval tv = { .tv_sec = 62 };
+ struct rtc_wkalrm alarm = { 0 };
+ struct rtc_time tm;
+ unsigned long data;
+ fd_set readfds;
+ time_t secs, new;
+ int rc;
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time);
+ ASSERT_NE(-1, rc);
+
+ secs = timegm((struct tm *)&alarm.time) + 60 - alarm.time.tm_sec;
+ gmtime_r(&secs, (struct tm *)&alarm.time);
+
+ alarm.enabled = 1;
+
+ rc = ioctl(self->fd, RTC_WKALM_SET, &alarm);
+ if (rc == -1) {
+ ASSERT_EQ(EINVAL, errno);
+ TH_LOG("skip alarms are not supported.");
+ return;
+ }
+
+ rc = ioctl(self->fd, RTC_WKALM_RD, &alarm);
+ ASSERT_NE(-1, rc);
+
+ TH_LOG("Alarm time now set to %02d/%02d/%02d %02d:%02d:%02d.",
+ alarm.time.tm_mday, alarm.time.tm_mon + 1,
+ alarm.time.tm_year + 1900, alarm.time.tm_hour,
+ alarm.time.tm_min, alarm.time.tm_sec);
+
+ FD_ZERO(&readfds);
+ FD_SET(self->fd, &readfds);
+
+ rc = select(self->fd + 1, &readfds, NULL, NULL, &tv);
+ ASSERT_NE(-1, rc);
+ ASSERT_NE(0, rc);
+
+ rc = read(self->fd, &data, sizeof(unsigned long));
+ ASSERT_NE(-1, rc);
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &tm);
+ ASSERT_NE(-1, rc);
+
+ new = timegm((struct tm *)&tm);
+ ASSERT_EQ(new, secs);
+}
+
+static void __attribute__((constructor))
+__constructor_order_last(void)
+{
+ if (!__constructor_order)
+ __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD;
+}
+
+int main(int argc, char **argv)
+{
+ switch (argc) {
+ case 2:
+ rtc_file = argv[1];
+ /* FALLTHROUGH */
+ case 1:
+ break;
+ default:
+ fprintf(stderr, "usage: %s [rtcdev]\n", argv[0]);
+ return 1;
+ }
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/rtc/setdate.c b/tools/testing/selftests/rtc/setdate.c
new file mode 100644
index 000000000..2cb78489e
--- /dev/null
+++ b/tools/testing/selftests/rtc/setdate.c
@@ -0,0 +1,86 @@
+/* Real Time Clock Driver Test
+ * by: Benjamin Gaignard (benjamin.gaignard@linaro.org)
+ *
+ * To build
+ * gcc rtctest_setdate.c -o rtctest_setdate
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+static const char default_time[] = "00:00:00";
+
+int main(int argc, char **argv)
+{
+ int fd, retval;
+ struct rtc_time new, current;
+ const char *rtc, *date;
+ const char *time = default_time;
+
+ switch (argc) {
+ case 4:
+ time = argv[3];
+ /* FALLTHROUGH */
+ case 3:
+ date = argv[2];
+ rtc = argv[1];
+ break;
+ default:
+ fprintf(stderr, "usage: rtctest_setdate <rtcdev> <DD-MM-YYYY> [HH:MM:SS]\n");
+ return 1;
+ }
+
+ fd = open(rtc, O_RDONLY);
+ if (fd == -1) {
+ perror(rtc);
+ exit(errno);
+ }
+
+ sscanf(date, "%d-%d-%d", &new.tm_mday, &new.tm_mon, &new.tm_year);
+ new.tm_mon -= 1;
+ new.tm_year -= 1900;
+ sscanf(time, "%d:%d:%d", &new.tm_hour, &new.tm_min, &new.tm_sec);
+
+ fprintf(stderr, "Test will set RTC date/time to %d-%d-%d, %02d:%02d:%02d.\n",
+ new.tm_mday, new.tm_mon + 1, new.tm_year + 1900,
+ new.tm_hour, new.tm_min, new.tm_sec);
+
+ /* Write the new date in RTC */
+ retval = ioctl(fd, RTC_SET_TIME, &new);
+ if (retval == -1) {
+ perror("RTC_SET_TIME ioctl");
+ close(fd);
+ exit(errno);
+ }
+
+ /* Read back */
+ retval = ioctl(fd, RTC_RD_TIME, &current);
+ if (retval == -1) {
+ perror("RTC_RD_TIME ioctl");
+ exit(errno);
+ }
+
+ fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n",
+ current.tm_mday, current.tm_mon + 1, current.tm_year + 1900,
+ current.tm_hour, current.tm_min, current.tm_sec);
+
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/seccomp/.gitignore b/tools/testing/selftests/seccomp/.gitignore
new file mode 100644
index 000000000..5af29d3a1
--- /dev/null
+++ b/tools/testing/selftests/seccomp/.gitignore
@@ -0,0 +1,2 @@
+seccomp_bpf
+seccomp_benchmark
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
new file mode 100644
index 000000000..1760b3e39
--- /dev/null
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+.PHONY: all clean
+
+BINARIES := seccomp_bpf seccomp_benchmark
+CFLAGS += -Wl,-no-as-needed -Wall
+
+seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h
+ $(CC) $(CFLAGS) $(LDFLAGS) $< -lpthread -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config
new file mode 100644
index 000000000..db1e11b08
--- /dev/null
+++ b/tools/testing/selftests/seccomp/config
@@ -0,0 +1,2 @@
+CONFIG_SECCOMP=y
+CONFIG_SECCOMP_FILTER=y
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
new file mode 100644
index 000000000..5838c8697
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -0,0 +1,99 @@
+/*
+ * Strictly speaking, this is not a test. But it can report during test
+ * runs so relative performace can be measured.
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+
+unsigned long long timing(clockid_t clk_id, unsigned long long samples)
+{
+ pid_t pid, ret;
+ unsigned long long i;
+ struct timespec start, finish;
+
+ pid = getpid();
+ assert(clock_gettime(clk_id, &start) == 0);
+ for (i = 0; i < samples; i++) {
+ ret = syscall(__NR_getpid);
+ assert(pid == ret);
+ }
+ assert(clock_gettime(clk_id, &finish) == 0);
+
+ i = finish.tv_sec - start.tv_sec;
+ i *= 1000000000;
+ i += finish.tv_nsec - start.tv_nsec;
+
+ printf("%lu.%09lu - %lu.%09lu = %llu\n",
+ finish.tv_sec, finish.tv_nsec,
+ start.tv_sec, start.tv_nsec,
+ i);
+
+ return i;
+}
+
+unsigned long long calibrate(void)
+{
+ unsigned long long i;
+
+ printf("Calibrating reasonable sample size...\n");
+
+ for (i = 5; ; i++) {
+ unsigned long long samples = 1 << i;
+
+ /* Find something that takes more than 5 seconds to run. */
+ if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5)
+ return samples;
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ unsigned long long samples;
+ unsigned long long native, filtered;
+
+ if (argc > 1)
+ samples = strtoull(argv[1], NULL, 0);
+ else
+ samples = calibrate();
+
+ printf("Benchmarking %llu samples...\n", samples);
+
+ native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+ printf("getpid native: %llu ns\n", native);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ assert(ret == 0);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ assert(ret == 0);
+
+ filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+ printf("getpid RET_ALLOW: %llu ns\n", filtered);
+
+ printf("Estimated seccomp overhead per syscall: %llu ns\n",
+ filtered - native);
+
+ if (filtered == native)
+ printf("Trying running again with more samples.\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
new file mode 100644
index 000000000..14cad657b
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -0,0 +1,2997 @@
+/*
+ * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by the GPLv2 license.
+ *
+ * Test code for seccomp bpf.
+ */
+
+#include <sys/types.h>
+
+/*
+ * glibc 2.26 and later have SIGSYS in siginfo_t. Before that,
+ * we need to use the kernel's siginfo.h file and trick glibc
+ * into accepting it.
+ */
+#if !__GLIBC_PREREQ(2, 26)
+# include <asm/siginfo.h>
+# define __have_siginfo_t 1
+# define __have_sigval_t 1
+# define __have_sigevent_t 1
+#endif
+
+#include <errno.h>
+#include <linux/filter.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <linux/prctl.h>
+#include <linux/ptrace.h>
+#include <linux/seccomp.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+#include <linux/elf.h>
+#include <sys/uio.h>
+#include <sys/utsname.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <sys/times.h>
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef PR_SET_PTRACER
+# define PR_SET_PTRACER 0x59616d61
+#endif
+
+#ifndef PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#define PR_GET_NO_NEW_PRIVS 39
+#endif
+
+#ifndef PR_SECCOMP_EXT
+#define PR_SECCOMP_EXT 43
+#endif
+
+#ifndef SECCOMP_EXT_ACT
+#define SECCOMP_EXT_ACT 1
+#endif
+
+#ifndef SECCOMP_EXT_ACT_TSYNC
+#define SECCOMP_EXT_ACT_TSYNC 1
+#endif
+
+#ifndef SECCOMP_MODE_STRICT
+#define SECCOMP_MODE_STRICT 1
+#endif
+
+#ifndef SECCOMP_MODE_FILTER
+#define SECCOMP_MODE_FILTER 2
+#endif
+
+#ifndef SECCOMP_RET_ALLOW
+struct seccomp_data {
+ int nr;
+ __u32 arch;
+ __u64 instruction_pointer;
+ __u64 args[6];
+};
+#endif
+
+#ifndef SECCOMP_RET_KILL_PROCESS
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
+#define SECCOMP_RET_KILL_THREAD 0x00000000U /* kill the thread */
+#endif
+#ifndef SECCOMP_RET_KILL
+#define SECCOMP_RET_KILL SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */
+#endif
+#ifndef SECCOMP_RET_LOG
+#define SECCOMP_RET_LOG 0x7ffc0000U /* allow after logging */
+#endif
+
+#ifndef __NR_seccomp
+# if defined(__i386__)
+# define __NR_seccomp 354
+# elif defined(__x86_64__)
+# define __NR_seccomp 317
+# elif defined(__arm__)
+# define __NR_seccomp 383
+# elif defined(__aarch64__)
+# define __NR_seccomp 277
+# elif defined(__hppa__)
+# define __NR_seccomp 338
+# elif defined(__powerpc__)
+# define __NR_seccomp 358
+# elif defined(__s390__)
+# define __NR_seccomp 348
+# else
+# warning "seccomp syscall number unknown for this architecture"
+# define __NR_seccomp 0xffff
+# endif
+#endif
+
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+#ifndef SECCOMP_GET_ACTION_AVAIL
+#define SECCOMP_GET_ACTION_AVAIL 2
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0)
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_LOG
+#define SECCOMP_FILTER_FLAG_LOG (1UL << 1)
+#endif
+
+#ifndef SECCOMP_FILTER_FLAG_SPEC_ALLOW
+#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2)
+#endif
+
+#ifndef PTRACE_SECCOMP_GET_METADATA
+#define PTRACE_SECCOMP_GET_METADATA 0x420d
+
+struct seccomp_metadata {
+ __u64 filter_off; /* Input: which filter */
+ __u64 flags; /* Output: filter's flags */
+};
+#endif
+
+#ifndef seccomp
+int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+ errno = 0;
+ return syscall(__NR_seccomp, op, flags, args);
+}
+#endif
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
+#else
+#error "wut? Unknown __BYTE_ORDER?!"
+#endif
+
+#define SIBLING_EXIT_UNKILLED 0xbadbeef
+#define SIBLING_EXIT_FAILURE 0xbadface
+#define SIBLING_EXIT_NEWPRIVS 0xbadfeed
+
+TEST(mode_strict_support)
+{
+ long ret;
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support CONFIG_SECCOMP");
+ }
+ syscall(__NR_exit, 0);
+}
+
+TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
+{
+ long ret;
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support CONFIG_SECCOMP");
+ }
+ syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
+ NULL, NULL, NULL);
+ EXPECT_FALSE(true) {
+ TH_LOG("Unreachable!");
+ }
+}
+
+/* Note! This doesn't test no new privs behavior */
+TEST(no_new_privs_support)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+}
+
+/* Tests kernel support by checking for a copy_from_user() fault on NULL. */
+TEST(mode_filter_support)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EFAULT, errno) {
+ TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
+ }
+}
+
+TEST(mode_filter_without_nnp)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
+ ASSERT_LE(0, ret) {
+ TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
+ }
+ errno = 0;
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ /* Succeeds with CAP_SYS_ADMIN, fails without */
+ /* TODO(wad) check caps not euid */
+ if (geteuid()) {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EACCES, errno);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+}
+
+#define MAX_INSNS_PER_PATH 32768
+
+TEST(filter_size_limits)
+{
+ int i;
+ int count = BPF_MAXINSNS + 1;
+ struct sock_filter allow[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter *filter;
+ struct sock_fprog prog = { };
+ long ret;
+
+ filter = calloc(count, sizeof(*filter));
+ ASSERT_NE(NULL, filter);
+
+ for (i = 0; i < count; i++)
+ filter[i] = allow[0];
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ prog.filter = filter;
+ prog.len = count;
+
+ /* Too many filter instructions in a single filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_NE(0, ret) {
+ TH_LOG("Installing %d insn filter was allowed", prog.len);
+ }
+
+ /* One less is okay, though. */
+ prog.len -= 1;
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
+ }
+}
+
+TEST(filter_chain_limits)
+{
+ int i;
+ int count = BPF_MAXINSNS;
+ struct sock_filter allow[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter *filter;
+ struct sock_fprog prog = { };
+ long ret;
+
+ filter = calloc(count, sizeof(*filter));
+ ASSERT_NE(NULL, filter);
+
+ for (i = 0; i < count; i++)
+ filter[i] = allow[0];
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ prog.filter = filter;
+ prog.len = 1;
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ prog.len = count;
+
+ /* Too many total filter instructions. */
+ for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ if (ret != 0)
+ break;
+ }
+ ASSERT_NE(0, ret) {
+ TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
+ i, count, i * (count + 4));
+ }
+}
+
+TEST(mode_filter_cannot_move_to_strict)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+
+TEST(mode_filter_get_seccomp)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ EXPECT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ EXPECT_EQ(2, ret);
+}
+
+
+TEST(ALLOW_all)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+}
+
+TEST(empty_prog)
+{
+ struct sock_filter filter[] = {
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+TEST(log_all)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ /* getppid() should succeed and be logged (no check for logging) */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+}
+
+TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+ EXPECT_EQ(0, syscall(__NR_getpid)) {
+ TH_LOG("getpid() shouldn't ever return");
+ }
+}
+
+/* return code >= 0x80000000 is unused. */
+TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+ EXPECT_EQ(0, syscall(__NR_getpid)) {
+ TH_LOG("getpid() shouldn't ever return");
+ }
+}
+
+TEST_SIGNAL(KILL_all, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+}
+
+TEST_SIGNAL(KILL_one, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
+{
+ void *fatal_address;
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_times, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ /* Only both with lower 32-bit for now. */
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K,
+ (unsigned long)&fatal_address, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+ struct tms timebuf;
+ clock_t clock = times(&timebuf);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_LE(clock, syscall(__NR_times, &timebuf));
+ /* times() should never return. */
+ EXPECT_EQ(0, syscall(__NR_times, &fatal_address));
+}
+
+TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
+{
+#ifndef __NR_mmap2
+ int sysno = __NR_mmap;
+#else
+ int sysno = __NR_mmap2;
+#endif
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, sysno, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ /* Only both with lower 32-bit for now. */
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+ int fd;
+ void *map1, *map2;
+ int page_size = sysconf(_SC_PAGESIZE);
+
+ ASSERT_LT(0, page_size);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ ASSERT_EQ(0, ret);
+
+ fd = open("/dev/zero", O_RDONLY);
+ ASSERT_NE(-1, fd);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ map1 = (void *)syscall(sysno,
+ NULL, page_size, PROT_READ, MAP_PRIVATE, fd, page_size);
+ EXPECT_NE(MAP_FAILED, map1);
+ /* mmap2() should never return. */
+ map2 = (void *)syscall(sysno,
+ NULL, page_size, PROT_READ, MAP_PRIVATE, fd, 0x0C0FFEE);
+ EXPECT_EQ(MAP_FAILED, map2);
+
+ /* The test failed, so clean up the resources. */
+ munmap(map1, page_size);
+ munmap(map2, page_size);
+ close(fd);
+}
+
+/* This is a thread task to die via seccomp filter violation. */
+void *kill_thread(void *data)
+{
+ bool die = (bool)data;
+
+ if (die) {
+ prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ return (void *)SIBLING_EXIT_FAILURE;
+ }
+
+ return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+/* Prepare a thread that will kill itself or both of us. */
+void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+{
+ pthread_t thread;
+ void *status;
+ /* Kill only when calling __NR_prctl. */
+ struct sock_filter filter_thread[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog_thread = {
+ .len = (unsigned short)ARRAY_SIZE(filter_thread),
+ .filter = filter_thread,
+ };
+ struct sock_filter filter_process[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog_process = {
+ .len = (unsigned short)ARRAY_SIZE(filter_process),
+ .filter = filter_process,
+ };
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
+ kill_process ? &prog_process : &prog_thread));
+
+ /*
+ * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
+ * flag cannot be downgraded by a new filter.
+ */
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+
+ /* Start a thread that will exit immediately. */
+ ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
+ ASSERT_EQ(0, pthread_join(thread, &status));
+ ASSERT_EQ(SIBLING_EXIT_UNKILLED, (unsigned long)status);
+
+ /* Start a thread that will die immediately. */
+ ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)true));
+ ASSERT_EQ(0, pthread_join(thread, &status));
+ ASSERT_NE(SIBLING_EXIT_FAILURE, (unsigned long)status);
+
+ /*
+ * If we get here, only the spawned thread died. Let the parent know
+ * the whole process didn't die (i.e. this thread, the spawner,
+ * stayed running).
+ */
+ exit(42);
+}
+
+TEST(KILL_thread)
+{
+ int status;
+ pid_t child_pid;
+
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ kill_thread_or_group(_metadata, false);
+ _exit(38);
+ }
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+ /* If only the thread was killed, we'll see exit 42. */
+ ASSERT_TRUE(WIFEXITED(status));
+ ASSERT_EQ(42, WEXITSTATUS(status));
+}
+
+TEST(KILL_process)
+{
+ int status;
+ pid_t child_pid;
+
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ kill_thread_or_group(_metadata, true);
+ _exit(38);
+ }
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+ /* If the entire process was killed, we'll see SIGSYS. */
+ ASSERT_TRUE(WIFSIGNALED(status));
+ ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
+/* TODO(wad) add 64-bit versus 32-bit arg tests. */
+TEST(arg_out_of_range)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno);
+}
+
+#define ERRNO_FILTER(name, errno) \
+ struct sock_filter _read_filter_##name[] = { \
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, \
+ offsetof(struct seccomp_data, nr)), \
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), \
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | errno), \
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), \
+ }; \
+ struct sock_fprog prog_##name = { \
+ .len = (unsigned short)ARRAY_SIZE(_read_filter_##name), \
+ .filter = _read_filter_##name, \
+ }
+
+/* Make sure basic errno values are correctly passed through a filter. */
+TEST(ERRNO_valid)
+{
+ ERRNO_FILTER(valid, E2BIG);
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_valid);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(E2BIG, errno);
+}
+
+/* Make sure an errno of zero is correctly handled by the arch code. */
+TEST(ERRNO_zero)
+{
+ ERRNO_FILTER(zero, 0);
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_zero);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* "errno" of 0 is ok. */
+ EXPECT_EQ(0, read(0, NULL, 0));
+}
+
+/*
+ * The SECCOMP_RET_DATA mask is 16 bits wide, but errno is smaller.
+ * This tests that the errno value gets capped correctly, fixed by
+ * 580c57f10768 ("seccomp: cap SECCOMP_RET_ERRNO data to MAX_ERRNO").
+ */
+TEST(ERRNO_capped)
+{
+ ERRNO_FILTER(capped, 4096);
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_capped);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(4095, errno);
+}
+
+/*
+ * Filters are processed in reverse order: last applied is executed first.
+ * Since only the SECCOMP_RET_ACTION mask is tested for return values, the
+ * SECCOMP_RET_DATA mask results will follow the most recently applied
+ * matching filter return (and not the lowest or highest value).
+ */
+TEST(ERRNO_order)
+{
+ ERRNO_FILTER(first, 11);
+ ERRNO_FILTER(second, 13);
+ ERRNO_FILTER(third, 12);
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_first);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_second);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_third);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(12, errno);
+}
+
+FIXTURE_DATA(TRAP) {
+ struct sock_fprog prog;
+};
+
+FIXTURE_SETUP(TRAP)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ memset(&self->prog, 0, sizeof(self->prog));
+ self->prog.filter = malloc(sizeof(filter));
+ ASSERT_NE(NULL, self->prog.filter);
+ memcpy(self->prog.filter, filter, sizeof(filter));
+ self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+}
+
+FIXTURE_TEARDOWN(TRAP)
+{
+ if (self->prog.filter)
+ free(self->prog.filter);
+}
+
+TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+ ASSERT_EQ(0, ret);
+ syscall(__NR_getpid);
+}
+
+/* Ensure that SIGSYS overrides SIG_IGN */
+TEST_F_SIGNAL(TRAP, ign, SIGSYS)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ signal(SIGSYS, SIG_IGN);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+ ASSERT_EQ(0, ret);
+ syscall(__NR_getpid);
+}
+
+static siginfo_t TRAP_info;
+static volatile int TRAP_nr;
+static void TRAP_action(int nr, siginfo_t *info, void *void_context)
+{
+ memcpy(&TRAP_info, info, sizeof(TRAP_info));
+ TRAP_nr = nr;
+}
+
+TEST_F(TRAP, handler)
+{
+ int ret, test;
+ struct sigaction act;
+ sigset_t mask;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGSYS);
+
+ act.sa_sigaction = &TRAP_action;
+ act.sa_flags = SA_SIGINFO;
+ ret = sigaction(SIGSYS, &act, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("sigaction failed");
+ }
+ ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("sigprocmask failed");
+ }
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+ ASSERT_EQ(0, ret);
+ TRAP_nr = 0;
+ memset(&TRAP_info, 0, sizeof(TRAP_info));
+ /* Expect the registers to be rolled back. (nr = error) may vary
+ * based on arch. */
+ ret = syscall(__NR_getpid);
+ /* Silence gcc warning about volatile. */
+ test = TRAP_nr;
+ EXPECT_EQ(SIGSYS, test);
+ struct local_sigsys {
+ void *_call_addr; /* calling user insn */
+ int _syscall; /* triggering system call number */
+ unsigned int _arch; /* AUDIT_ARCH_* of syscall */
+ } *sigsys = (struct local_sigsys *)
+#ifdef si_syscall
+ &(TRAP_info.si_call_addr);
+#else
+ &TRAP_info.si_pid;
+#endif
+ EXPECT_EQ(__NR_getpid, sigsys->_syscall);
+ /* Make sure arch is non-zero. */
+ EXPECT_NE(0, sigsys->_arch);
+ EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
+}
+
+FIXTURE_DATA(precedence) {
+ struct sock_fprog allow;
+ struct sock_fprog log;
+ struct sock_fprog trace;
+ struct sock_fprog error;
+ struct sock_fprog trap;
+ struct sock_fprog kill;
+};
+
+FIXTURE_SETUP(precedence)
+{
+ struct sock_filter allow_insns[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter log_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+ };
+ struct sock_filter trace_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
+ };
+ struct sock_filter error_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
+ };
+ struct sock_filter trap_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
+ };
+ struct sock_filter kill_insns[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ };
+
+ memset(self, 0, sizeof(*self));
+#define FILTER_ALLOC(_x) \
+ self->_x.filter = malloc(sizeof(_x##_insns)); \
+ ASSERT_NE(NULL, self->_x.filter); \
+ memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
+ self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
+ FILTER_ALLOC(allow);
+ FILTER_ALLOC(log);
+ FILTER_ALLOC(trace);
+ FILTER_ALLOC(error);
+ FILTER_ALLOC(trap);
+ FILTER_ALLOC(kill);
+}
+
+FIXTURE_TEARDOWN(precedence)
+{
+#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
+ FILTER_FREE(allow);
+ FILTER_FREE(log);
+ FILTER_FREE(trace);
+ FILTER_FREE(error);
+ FILTER_FREE(trap);
+ FILTER_FREE(kill);
+}
+
+TEST_F(precedence, allow_ok)
+{
+ pid_t parent, res = 0;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ res = syscall(__NR_getppid);
+ EXPECT_EQ(parent, res);
+}
+
+TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
+{
+ pid_t parent, res = 0;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ res = syscall(__NR_getppid);
+ EXPECT_EQ(parent, res);
+ /* getpid() should never return. */
+ res = syscall(__NR_getpid);
+ EXPECT_EQ(0, res);
+}
+
+TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, errno_is_third)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, errno_is_third_in_any_order)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, trace_is_fourth)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* No ptracer */
+ EXPECT_EQ(-1, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, trace_is_fourth_in_any_order)
+{
+ pid_t parent;
+ long ret;
+
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* No ptracer */
+ EXPECT_EQ(-1, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, log_is_fifth)
+{
+ pid_t mypid, parent;
+ long ret;
+
+ mypid = getpid();
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* Should also work just fine */
+ EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, log_is_fifth_in_any_order)
+{
+ pid_t mypid, parent;
+ long ret;
+
+ mypid = getpid();
+ parent = getppid();
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+ ASSERT_EQ(0, ret);
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+ ASSERT_EQ(0, ret);
+ /* Should work just fine. */
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* Should also work just fine */
+ EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
+#ifndef PTRACE_O_TRACESECCOMP
+#define PTRACE_O_TRACESECCOMP 0x00000080
+#endif
+
+/* Catch the Ubuntu 12.04 value error. */
+#if PTRACE_EVENT_SECCOMP != 7
+#undef PTRACE_EVENT_SECCOMP
+#endif
+
+#ifndef PTRACE_EVENT_SECCOMP
+#define PTRACE_EVENT_SECCOMP 7
+#endif
+
+#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
+bool tracer_running;
+void tracer_stop(int sig)
+{
+ tracer_running = false;
+}
+
+typedef void tracer_func_t(struct __test_metadata *_metadata,
+ pid_t tracee, int status, void *args);
+
+void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
+ tracer_func_t tracer_func, void *args, bool ptrace_syscall)
+{
+ int ret = -1;
+ struct sigaction action = {
+ .sa_handler = tracer_stop,
+ };
+
+ /* Allow external shutdown. */
+ tracer_running = true;
+ ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
+
+ errno = 0;
+ while (ret == -1 && errno != EINVAL)
+ ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
+ ASSERT_EQ(0, ret) {
+ kill(tracee, SIGKILL);
+ }
+ /* Wait for attach stop */
+ wait(NULL);
+
+ ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, ptrace_syscall ?
+ PTRACE_O_TRACESYSGOOD :
+ PTRACE_O_TRACESECCOMP);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
+ kill(tracee, SIGKILL);
+ }
+ ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
+ tracee, NULL, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Unblock the tracee */
+ ASSERT_EQ(1, write(fd, "A", 1));
+ ASSERT_EQ(0, close(fd));
+
+ /* Run until we're shut down. Must assert to stop execution. */
+ while (tracer_running) {
+ int status;
+
+ if (wait(&status) != tracee)
+ continue;
+ if (WIFSIGNALED(status) || WIFEXITED(status))
+ /* Child is dead. Time to go. */
+ return;
+
+ /* Check if this is a seccomp event. */
+ ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
+
+ tracer_func(_metadata, tracee, status, args);
+
+ ret = ptrace(ptrace_syscall ? PTRACE_SYSCALL : PTRACE_CONT,
+ tracee, NULL, 0);
+ ASSERT_EQ(0, ret);
+ }
+ /* Directly report the status of our test harness results. */
+ syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+}
+
+/* Common tracer setup/teardown functions. */
+void cont_handler(int num)
+{ }
+pid_t setup_trace_fixture(struct __test_metadata *_metadata,
+ tracer_func_t func, void *args, bool ptrace_syscall)
+{
+ char sync;
+ int pipefd[2];
+ pid_t tracer_pid;
+ pid_t tracee = getpid();
+
+ /* Setup a pipe for clean synchronization. */
+ ASSERT_EQ(0, pipe(pipefd));
+
+ /* Fork a child which we'll promote to tracer */
+ tracer_pid = fork();
+ ASSERT_LE(0, tracer_pid);
+ signal(SIGALRM, cont_handler);
+ if (tracer_pid == 0) {
+ close(pipefd[0]);
+ start_tracer(_metadata, pipefd[1], tracee, func, args,
+ ptrace_syscall);
+ syscall(__NR_exit, 0);
+ }
+ close(pipefd[1]);
+ prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
+ read(pipefd[0], &sync, 1);
+ close(pipefd[0]);
+
+ return tracer_pid;
+}
+void teardown_trace_fixture(struct __test_metadata *_metadata,
+ pid_t tracer)
+{
+ if (tracer) {
+ int status;
+ /*
+ * Extract the exit code from the other process and
+ * adopt it for ourselves in case its asserts failed.
+ */
+ ASSERT_EQ(0, kill(tracer, SIGUSR1));
+ ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
+ if (WEXITSTATUS(status))
+ _metadata->passed = 0;
+ }
+}
+
+/* "poke" tracer arguments and function. */
+struct tracer_args_poke_t {
+ unsigned long poke_addr;
+};
+
+void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
+ void *args)
+{
+ int ret;
+ unsigned long msg;
+ struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
+
+ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+ EXPECT_EQ(0, ret);
+ /* If this fails, don't try to recover. */
+ ASSERT_EQ(0x1001, msg) {
+ kill(tracee, SIGKILL);
+ }
+ /*
+ * Poke in the message.
+ * Registers are not touched to try to keep this relatively arch
+ * agnostic.
+ */
+ ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
+ EXPECT_EQ(0, ret);
+}
+
+FIXTURE_DATA(TRACE_poke) {
+ struct sock_fprog prog;
+ pid_t tracer;
+ long poked;
+ struct tracer_args_poke_t tracer_args;
+};
+
+FIXTURE_SETUP(TRACE_poke)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ self->poked = 0;
+ memset(&self->prog, 0, sizeof(self->prog));
+ self->prog.filter = malloc(sizeof(filter));
+ ASSERT_NE(NULL, self->prog.filter);
+ memcpy(self->prog.filter, filter, sizeof(filter));
+ self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+
+ /* Set up tracer args. */
+ self->tracer_args.poke_addr = (unsigned long)&self->poked;
+
+ /* Launch tracer. */
+ self->tracer = setup_trace_fixture(_metadata, tracer_poke,
+ &self->tracer_args, false);
+}
+
+FIXTURE_TEARDOWN(TRACE_poke)
+{
+ teardown_trace_fixture(_metadata, self->tracer);
+ if (self->prog.filter)
+ free(self->prog.filter);
+}
+
+TEST_F(TRACE_poke, read_has_side_effects)
+{
+ ssize_t ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(0, self->poked);
+ ret = read(-1, NULL, 0);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(0x1001, self->poked);
+}
+
+TEST_F(TRACE_poke, getpid_runs_normally)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ EXPECT_EQ(0, self->poked);
+ EXPECT_NE(0, syscall(__NR_getpid));
+ EXPECT_EQ(0, self->poked);
+}
+
+#if defined(__x86_64__)
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM orig_rax
+# define SYSCALL_RET rax
+#elif defined(__i386__)
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM orig_eax
+# define SYSCALL_RET eax
+#elif defined(__arm__)
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM ARM_r7
+# define SYSCALL_RET ARM_r0
+#elif defined(__aarch64__)
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM regs[8]
+# define SYSCALL_RET regs[0]
+#elif defined(__hppa__)
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM gr[20]
+# define SYSCALL_RET gr[28]
+#elif defined(__powerpc__)
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM gpr[0]
+# define SYSCALL_RET gpr[3]
+#elif defined(__s390__)
+# define ARCH_REGS s390_regs
+# define SYSCALL_NUM gprs[2]
+# define SYSCALL_RET gprs[2]
+#elif defined(__mips__)
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM regs[2]
+# define SYSCALL_SYSCALL_NUM regs[4]
+# define SYSCALL_RET regs[2]
+# define SYSCALL_NUM_RET_SHARE_REG
+#else
+# error "Do not know how to find your architecture's registers and syscalls"
+#endif
+
+/* When the syscall return can't be changed, stub out the tests for it. */
+#ifdef SYSCALL_NUM_RET_SHARE_REG
+# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action)
+#else
+# define EXPECT_SYSCALL_RETURN(val, action) \
+ do { \
+ errno = 0; \
+ if (val < 0) { \
+ EXPECT_EQ(-1, action); \
+ EXPECT_EQ(-(val), errno); \
+ } else { \
+ EXPECT_EQ(val, action); \
+ } \
+ } while (0)
+#endif
+
+/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
+ * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
+ */
+#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
+#define HAVE_GETREGS
+#endif
+
+/* Architecture-specific syscall fetching routine. */
+int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
+{
+ ARCH_REGS regs;
+#ifdef HAVE_GETREGS
+ EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
+ TH_LOG("PTRACE_GETREGS failed");
+ return -1;
+ }
+#else
+ struct iovec iov;
+
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+ EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
+ TH_LOG("PTRACE_GETREGSET failed");
+ return -1;
+ }
+#endif
+
+#if defined(__mips__)
+ if (regs.SYSCALL_NUM == __NR_O32_Linux)
+ return regs.SYSCALL_SYSCALL_NUM;
+#endif
+ return regs.SYSCALL_NUM;
+}
+
+/* Architecture-specific syscall changing routine. */
+void change_syscall(struct __test_metadata *_metadata,
+ pid_t tracee, int syscall, int result)
+{
+ int ret;
+ ARCH_REGS regs;
+#ifdef HAVE_GETREGS
+ ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
+#else
+ struct iovec iov;
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
+#endif
+ EXPECT_EQ(0, ret) {}
+
+#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
+ defined(__s390__) || defined(__hppa__)
+ {
+ regs.SYSCALL_NUM = syscall;
+ }
+#elif defined(__mips__)
+ {
+ if (regs.SYSCALL_NUM == __NR_O32_Linux)
+ regs.SYSCALL_SYSCALL_NUM = syscall;
+ else
+ regs.SYSCALL_NUM = syscall;
+ }
+
+#elif defined(__arm__)
+# ifndef PTRACE_SET_SYSCALL
+# define PTRACE_SET_SYSCALL 23
+# endif
+ {
+ ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
+ EXPECT_EQ(0, ret);
+ }
+
+#elif defined(__aarch64__)
+# ifndef NT_ARM_SYSTEM_CALL
+# define NT_ARM_SYSTEM_CALL 0x404
+# endif
+ {
+ iov.iov_base = &syscall;
+ iov.iov_len = sizeof(syscall);
+ ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
+ &iov);
+ EXPECT_EQ(0, ret);
+ }
+
+#else
+ ASSERT_EQ(1, 0) {
+ TH_LOG("How is the syscall changed on this architecture?");
+ }
+#endif
+
+ /* If syscall is skipped, change return value. */
+ if (syscall == -1)
+#ifdef SYSCALL_NUM_RET_SHARE_REG
+ TH_LOG("Can't modify syscall return on this architecture");
+#else
+ regs.SYSCALL_RET = result;
+#endif
+
+#ifdef HAVE_GETREGS
+ ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
+#else
+ iov.iov_base = &regs;
+ iov.iov_len = sizeof(regs);
+ ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
+#endif
+ EXPECT_EQ(0, ret);
+}
+
+void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
+ int status, void *args)
+{
+ int ret;
+ unsigned long msg;
+
+ /* Make sure we got the right message. */
+ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+ EXPECT_EQ(0, ret);
+
+ /* Validate and take action on expected syscalls. */
+ switch (msg) {
+ case 0x1002:
+ /* change getpid to getppid. */
+ EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
+ change_syscall(_metadata, tracee, __NR_getppid, 0);
+ break;
+ case 0x1003:
+ /* skip gettid with valid return code. */
+ EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
+ change_syscall(_metadata, tracee, -1, 45000);
+ break;
+ case 0x1004:
+ /* skip openat with error. */
+ EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
+ change_syscall(_metadata, tracee, -1, -ESRCH);
+ break;
+ case 0x1005:
+ /* do nothing (allow getppid) */
+ EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
+ break;
+ default:
+ EXPECT_EQ(0, msg) {
+ TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
+ kill(tracee, SIGKILL);
+ }
+ }
+
+}
+
+void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
+ int status, void *args)
+{
+ int ret, nr;
+ unsigned long msg;
+ static bool entry;
+
+ /* Make sure we got an empty message. */
+ ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, msg);
+
+ /* The only way to tell PTRACE_SYSCALL entry/exit is by counting. */
+ entry = !entry;
+ if (!entry)
+ return;
+
+ nr = get_syscall(_metadata, tracee);
+
+ if (nr == __NR_getpid)
+ change_syscall(_metadata, tracee, __NR_getppid, 0);
+ if (nr == __NR_gettid)
+ change_syscall(_metadata, tracee, -1, 45000);
+ if (nr == __NR_openat)
+ change_syscall(_metadata, tracee, -1, -ESRCH);
+}
+
+FIXTURE_DATA(TRACE_syscall) {
+ struct sock_fprog prog;
+ pid_t tracer, mytid, mypid, parent;
+};
+
+FIXTURE_SETUP(TRACE_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ memset(&self->prog, 0, sizeof(self->prog));
+ self->prog.filter = malloc(sizeof(filter));
+ ASSERT_NE(NULL, self->prog.filter);
+ memcpy(self->prog.filter, filter, sizeof(filter));
+ self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+
+ /* Prepare some testable syscall results. */
+ self->mytid = syscall(__NR_gettid);
+ ASSERT_GT(self->mytid, 0);
+ ASSERT_NE(self->mytid, 1) {
+ TH_LOG("Running this test as init is not supported. :)");
+ }
+
+ self->mypid = getpid();
+ ASSERT_GT(self->mypid, 0);
+ ASSERT_EQ(self->mytid, self->mypid);
+
+ self->parent = getppid();
+ ASSERT_GT(self->parent, 0);
+ ASSERT_NE(self->parent, self->mypid);
+
+ /* Launch tracer. */
+ self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
+ false);
+}
+
+FIXTURE_TEARDOWN(TRACE_syscall)
+{
+ teardown_trace_fixture(_metadata, self->tracer);
+ if (self->prog.filter)
+ free(self->prog.filter);
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_redirected)
+{
+ /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+ teardown_trace_fixture(_metadata, self->tracer);
+ self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+ true);
+
+ /* Tracer will redirect getpid to getppid. */
+ EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_errno)
+{
+ /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+ teardown_trace_fixture(_metadata, self->tracer);
+ self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+ true);
+
+ /* Tracer should skip the open syscall, resulting in ESRCH. */
+ EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_faked)
+{
+ /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+ teardown_trace_fixture(_metadata, self->tracer);
+ self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+ true);
+
+ /* Tracer should skip the gettid syscall, resulting fake pid. */
+ EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
+}
+
+TEST_F(TRACE_syscall, syscall_allowed)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* getppid works as expected (no changes). */
+ EXPECT_EQ(self->parent, syscall(__NR_getppid));
+ EXPECT_NE(self->mypid, syscall(__NR_getppid));
+}
+
+TEST_F(TRACE_syscall, syscall_redirected)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* getpid has been redirected to getppid as expected. */
+ EXPECT_EQ(self->parent, syscall(__NR_getpid));
+ EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, syscall_errno)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* openat has been skipped and an errno return. */
+ EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
+}
+
+TEST_F(TRACE_syscall, syscall_faked)
+{
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* gettid has been skipped and an altered return value stored. */
+ EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
+}
+
+TEST_F(TRACE_syscall, skip_after_RET_TRACE)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Install fixture filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Install "errno on getppid" filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Tracer will redirect getpid to getppid, and we should see EPERM. */
+ errno = 0;
+ EXPECT_EQ(-1, syscall(__NR_getpid));
+ EXPECT_EQ(EPERM, errno);
+}
+
+TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Install fixture filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Install "death on getppid" filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Tracer will redirect getpid to getppid, and we should die. */
+ EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, skip_after_ptrace)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+ teardown_trace_fixture(_metadata, self->tracer);
+ self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+ true);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Install "errno on getppid" filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Tracer will redirect getpid to getppid, and we should see EPERM. */
+ EXPECT_EQ(-1, syscall(__NR_getpid));
+ EXPECT_EQ(EPERM, errno);
+}
+
+TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+ teardown_trace_fixture(_metadata, self->tracer);
+ self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+ true);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Install "death on getppid" filter. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Tracer will redirect getpid to getppid, and we should die. */
+ EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST(seccomp_syscall)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Reject insane operation. */
+ ret = seccomp(-1, 0, &prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject crazy op value!");
+ }
+
+ /* Reject strict with flags or pointer. */
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject mode strict with flags!");
+ }
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject mode strict with uargs!");
+ }
+
+ /* Reject insane args for filter. */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Did not reject crazy filter flags!");
+ }
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
+ EXPECT_EQ(EFAULT, errno) {
+ TH_LOG("Did not reject NULL filter!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+ EXPECT_EQ(0, errno) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
+ strerror(errno));
+ }
+}
+
+TEST(seccomp_syscall_mode_lock)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Could not install filter!");
+ }
+
+ /* Make sure neither entry point will switch to strict. */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Switched to mode strict!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Switched to mode strict!");
+ }
+}
+
+/*
+ * Test detection of known and unknown filter flags. Userspace needs to be able
+ * to check if a filter flag is supported by the current kernel and a good way
+ * of doing that is by attempting to enter filter mode, with the flag bit in
+ * question set, and a NULL pointer for the _args_ parameter. EFAULT indicates
+ * that the flag is valid and EINVAL indicates that the flag is invalid.
+ */
+TEST(detect_seccomp_filter_flags)
+{
+ unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
+ SECCOMP_FILTER_FLAG_LOG,
+ SECCOMP_FILTER_FLAG_SPEC_ALLOW };
+ unsigned int flag, all_flags;
+ int i;
+ long ret;
+
+ /* Test detection of known-good filter flags */
+ for (i = 0, all_flags = 0; i < ARRAY_SIZE(flags); i++) {
+ int bits = 0;
+
+ flag = flags[i];
+ /* Make sure the flag is a single bit! */
+ while (flag) {
+ if (flag & 0x1)
+ bits ++;
+ flag >>= 1;
+ }
+ ASSERT_EQ(1, bits);
+ flag = flags[i];
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EFAULT, errno) {
+ TH_LOG("Failed to detect that a known-good filter flag (0x%X) is supported!",
+ flag);
+ }
+
+ all_flags |= flag;
+ }
+
+ /* Test detection of all known-good filter flags */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, all_flags, NULL);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EFAULT, errno) {
+ TH_LOG("Failed to detect that all known-good filter flags (0x%X) are supported!",
+ all_flags);
+ }
+
+ /* Test detection of an unknown filter flag */
+ flag = -1;
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported!",
+ flag);
+ }
+
+ /*
+ * Test detection of an unknown filter flag that may simply need to be
+ * added to this test
+ */
+ flag = flags[ARRAY_SIZE(flags) - 1] << 1;
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flag, NULL);
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Failed to detect that an unknown filter flag (0x%X) is unsupported! Does a new flag need to be added to this test?",
+ flag);
+ }
+}
+
+TEST(TSYNC_first)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Could not install initial filter with TSYNC!");
+ }
+}
+
+#define TSYNC_SIBLINGS 2
+struct tsync_sibling {
+ pthread_t tid;
+ pid_t system_tid;
+ sem_t *started;
+ pthread_cond_t *cond;
+ pthread_mutex_t *mutex;
+ int diverge;
+ int num_waits;
+ struct sock_fprog *prog;
+ struct __test_metadata *metadata;
+};
+
+/*
+ * To avoid joining joined threads (which is not allowed by Bionic),
+ * make sure we both successfully join and clear the tid to skip a
+ * later join attempt during fixture teardown. Any remaining threads
+ * will be directly killed during teardown.
+ */
+#define PTHREAD_JOIN(tid, status) \
+ do { \
+ int _rc = pthread_join(tid, status); \
+ if (_rc) { \
+ TH_LOG("pthread_join of tid %u failed: %d\n", \
+ (unsigned int)tid, _rc); \
+ } else { \
+ tid = 0; \
+ } \
+ } while (0)
+
+FIXTURE_DATA(TSYNC) {
+ struct sock_fprog root_prog, apply_prog;
+ struct tsync_sibling sibling[TSYNC_SIBLINGS];
+ sem_t started;
+ pthread_cond_t cond;
+ pthread_mutex_t mutex;
+ int sibling_count;
+};
+
+FIXTURE_SETUP(TSYNC)
+{
+ struct sock_filter root_filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter apply_filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ memset(&self->root_prog, 0, sizeof(self->root_prog));
+ memset(&self->apply_prog, 0, sizeof(self->apply_prog));
+ memset(&self->sibling, 0, sizeof(self->sibling));
+ self->root_prog.filter = malloc(sizeof(root_filter));
+ ASSERT_NE(NULL, self->root_prog.filter);
+ memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
+ self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
+
+ self->apply_prog.filter = malloc(sizeof(apply_filter));
+ ASSERT_NE(NULL, self->apply_prog.filter);
+ memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
+ self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
+
+ self->sibling_count = 0;
+ pthread_mutex_init(&self->mutex, NULL);
+ pthread_cond_init(&self->cond, NULL);
+ sem_init(&self->started, 0, 0);
+ self->sibling[0].tid = 0;
+ self->sibling[0].cond = &self->cond;
+ self->sibling[0].started = &self->started;
+ self->sibling[0].mutex = &self->mutex;
+ self->sibling[0].diverge = 0;
+ self->sibling[0].num_waits = 1;
+ self->sibling[0].prog = &self->root_prog;
+ self->sibling[0].metadata = _metadata;
+ self->sibling[1].tid = 0;
+ self->sibling[1].cond = &self->cond;
+ self->sibling[1].started = &self->started;
+ self->sibling[1].mutex = &self->mutex;
+ self->sibling[1].diverge = 0;
+ self->sibling[1].prog = &self->root_prog;
+ self->sibling[1].num_waits = 1;
+ self->sibling[1].metadata = _metadata;
+}
+
+FIXTURE_TEARDOWN(TSYNC)
+{
+ int sib = 0;
+
+ if (self->root_prog.filter)
+ free(self->root_prog.filter);
+ if (self->apply_prog.filter)
+ free(self->apply_prog.filter);
+
+ for ( ; sib < self->sibling_count; ++sib) {
+ struct tsync_sibling *s = &self->sibling[sib];
+
+ if (!s->tid)
+ continue;
+ /*
+ * If a thread is still running, it may be stuck, so hit
+ * it over the head really hard.
+ */
+ pthread_kill(s->tid, 9);
+ }
+ pthread_mutex_destroy(&self->mutex);
+ pthread_cond_destroy(&self->cond);
+ sem_destroy(&self->started);
+}
+
+void *tsync_sibling(void *data)
+{
+ long ret = 0;
+ struct tsync_sibling *me = data;
+
+ me->system_tid = syscall(__NR_gettid);
+
+ pthread_mutex_lock(me->mutex);
+ if (me->diverge) {
+ /* Just re-apply the root prog to fork the tree */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
+ me->prog, 0, 0);
+ }
+ sem_post(me->started);
+ /* Return outside of started so parent notices failures. */
+ if (ret) {
+ pthread_mutex_unlock(me->mutex);
+ return (void *)SIBLING_EXIT_FAILURE;
+ }
+ do {
+ pthread_cond_wait(me->cond, me->mutex);
+ me->num_waits = me->num_waits - 1;
+ } while (me->num_waits);
+ pthread_mutex_unlock(me->mutex);
+
+ ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+ if (!ret)
+ return (void *)SIBLING_EXIT_NEWPRIVS;
+ read(0, NULL, 0);
+ return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+void tsync_start_sibling(struct tsync_sibling *sibling)
+{
+ pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
+}
+
+TEST_F(TSYNC, siblings_fail_prctl)
+{
+ long ret;
+ void *status;
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Check prctl failure detection by requesting sib 0 diverge. */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("setting filter failed");
+ }
+
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ /* Signal the threads to clean up*/
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure diverging sibling failed to call prctl. */
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_ancestor)
+{
+ long ret;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Could install filter on all threads!");
+ }
+ /* Tell the siblings to test the policy */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+ /* Ensure they are both killed and don't exit cleanly. */
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+}
+
+TEST_F(TSYNC, two_sibling_want_nnp)
+{
+ void *status;
+
+ /* start siblings before any prctl() operations */
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ /* Tell the siblings to test no policy */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both upset about lacking nnp. */
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_no_filter)
+{
+ long ret;
+ void *status;
+
+ /* start siblings before any prctl() operations */
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Could install filter on all threads!");
+ }
+
+ /* Tell the siblings to test the policy */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both killed and don't exit cleanly. */
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
+ EXPECT_EQ(0x0, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_one_divergence)
+{
+ long ret;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(self->sibling[0].system_tid, ret) {
+ TH_LOG("Did not fail on diverged sibling.");
+ }
+
+ /* Wake the threads */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both unkilled. */
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_not_under_filter)
+{
+ long ret, sib;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /*
+ * Sibling 0 will have its own seccomp policy
+ * and Sibling 1 will not be under seccomp at
+ * all. Sibling 1 will enter seccomp and 0
+ * will cause failure.
+ */
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(ret, self->sibling[0].system_tid) {
+ TH_LOG("Did not fail on diverged sibling.");
+ }
+ sib = 1;
+ if (ret == self->sibling[0].system_tid)
+ sib = 0;
+
+ pthread_mutex_lock(&self->mutex);
+
+ /* Increment the other siblings num_waits so we can clean up
+ * the one we just saw.
+ */
+ self->sibling[!sib].num_waits += 1;
+
+ /* Signal the thread to clean up*/
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+ PTHREAD_JOIN(self->sibling[sib].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+ /* Poll for actual task death. pthread_join doesn't guarantee it. */
+ while (!kill(self->sibling[sib].system_tid, 0))
+ sleep(0.1);
+ /* Switch to the remaining sibling */
+ sib = !sib;
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Expected the remaining sibling to sync");
+ };
+
+ pthread_mutex_lock(&self->mutex);
+
+ /* If remaining sibling didn't have a chance to wake up during
+ * the first broadcast, manually reduce the num_waits now.
+ */
+ if (self->sibling[sib].num_waits > 1)
+ self->sibling[sib].num_waits = 1;
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+ PTHREAD_JOIN(self->sibling[sib].tid, &status);
+ EXPECT_EQ(0, (long)status);
+ /* Poll for actual task death. pthread_join doesn't guarantee it. */
+ while (!kill(self->sibling[sib].system_tid, 0))
+ sleep(0.1);
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
+ &self->apply_prog);
+ ASSERT_EQ(0, ret); /* just us chickens */
+}
+
+/* Make sure restarted syscalls are seen directly as "restart_syscall". */
+TEST(syscall_restart)
+{
+ long ret;
+ unsigned long msg;
+ pid_t child_pid;
+ int pipefd[2];
+ int status;
+ siginfo_t info = { };
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+
+#ifdef __NR_sigreturn
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
+#endif
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
+
+ /* Allow __NR_write for easy logging. */
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ /* The nanosleep jump target. */
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100),
+ /* The restart_syscall jump target. */
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+#if defined(__arm__)
+ struct utsname utsbuf;
+#endif
+
+ ASSERT_EQ(0, pipe(pipefd));
+
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ /* Child uses EXPECT not ASSERT to deliver status correctly. */
+ char buf = ' ';
+ struct timespec timeout = { };
+
+ /* Attach parent as tracer and stop. */
+ EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
+ EXPECT_EQ(0, raise(SIGSTOP));
+
+ EXPECT_EQ(0, close(pipefd[1]));
+
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Failed to install filter!");
+ }
+
+ EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
+ TH_LOG("Failed to read() sync from parent");
+ }
+ EXPECT_EQ('.', buf) {
+ TH_LOG("Failed to get sync data from read()");
+ }
+
+ /* Start nanosleep to be interrupted. */
+ timeout.tv_sec = 1;
+ errno = 0;
+ EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
+ TH_LOG("Call to nanosleep() failed (errno %d)", errno);
+ }
+
+ /* Read final sync from parent. */
+ EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
+ TH_LOG("Failed final read() from parent");
+ }
+ EXPECT_EQ('!', buf) {
+ TH_LOG("Failed to get final data from read()");
+ }
+
+ /* Directly report the status of our test harness results. */
+ syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
+ : EXIT_FAILURE);
+ }
+ EXPECT_EQ(0, close(pipefd[0]));
+
+ /* Attach to child, setup options, and release. */
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
+ PTRACE_O_TRACESECCOMP));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(1, write(pipefd[1], ".", 1));
+
+ /* Wait for nanosleep() to start. */
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
+ ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
+ ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
+ ASSERT_EQ(0x100, msg);
+ EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
+
+ /* Might as well check siginfo for sanity while we're here. */
+ ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
+ ASSERT_EQ(SIGTRAP, info.si_signo);
+ ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
+ EXPECT_EQ(0, info.si_errno);
+ EXPECT_EQ(getuid(), info.si_uid);
+ /* Verify signal delivery came from child (seccomp-triggered). */
+ EXPECT_EQ(child_pid, info.si_pid);
+
+ /* Interrupt nanosleep with SIGSTOP (which we'll need to handle). */
+ ASSERT_EQ(0, kill(child_pid, SIGSTOP));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
+ /* Verify signal delivery came from parent now. */
+ ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
+ EXPECT_EQ(getpid(), info.si_pid);
+
+ /* Restart nanosleep with SIGCONT, which triggers restart_syscall. */
+ ASSERT_EQ(0, kill(child_pid, SIGCONT));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGCONT, WSTOPSIG(status));
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+
+ /* Wait for restart_syscall() to start. */
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ ASSERT_EQ(true, WIFSTOPPED(status));
+ ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
+ ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
+ ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
+
+ ASSERT_EQ(0x200, msg);
+ ret = get_syscall(_metadata, child_pid);
+#if defined(__arm__)
+ /*
+ * FIXME:
+ * - native ARM registers do NOT expose true syscall.
+ * - compat ARM registers on ARM64 DO expose true syscall.
+ */
+ ASSERT_EQ(0, uname(&utsbuf));
+ if (strncmp(utsbuf.machine, "arm", 3) == 0) {
+ EXPECT_EQ(__NR_nanosleep, ret);
+ } else
+#endif
+ {
+ EXPECT_EQ(__NR_restart_syscall, ret);
+ }
+
+ /* Write again to end test. */
+ ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+ ASSERT_EQ(1, write(pipefd[1], "!", 1));
+ EXPECT_EQ(0, close(pipefd[1]));
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+ if (WIFSIGNALED(status) || WEXITSTATUS(status))
+ _metadata->passed = 0;
+}
+
+TEST_SIGNAL(filter_flag_log, SIGSYS)
+{
+ struct sock_filter allow_filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_filter kill_filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog allow_prog = {
+ .len = (unsigned short)ARRAY_SIZE(allow_filter),
+ .filter = allow_filter,
+ };
+ struct sock_fprog kill_prog = {
+ .len = (unsigned short)ARRAY_SIZE(kill_filter),
+ .filter = kill_filter,
+ };
+ long ret;
+ pid_t parent = getppid();
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
+
+ /* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
+ ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
+ &allow_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ EXPECT_NE(0, ret) {
+ TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
+ }
+ EXPECT_EQ(EINVAL, errno) {
+ TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
+ }
+
+ /* Verify that a simple, permissive filter can be added with no flags */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
+ EXPECT_EQ(0, ret);
+
+ /* See if the same filter can be added with the FILTER_FLAG_LOG flag */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+ &allow_prog);
+ ASSERT_NE(EINVAL, errno) {
+ TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
+ }
+ EXPECT_EQ(0, ret);
+
+ /* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+ &kill_prog);
+ EXPECT_EQ(0, ret);
+
+ EXPECT_EQ(parent, syscall(__NR_getppid));
+ /* getpid() should never return. */
+ EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST(get_action_avail)
+{
+ __u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
+ SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
+ SECCOMP_RET_LOG, SECCOMP_RET_ALLOW };
+ __u32 unknown_action = 0x10000000U;
+ int i;
+ long ret;
+
+ ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_NE(EINVAL, errno) {
+ TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
+ }
+ EXPECT_EQ(ret, 0);
+
+ for (i = 0; i < ARRAY_SIZE(actions); i++) {
+ ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
+ EXPECT_EQ(ret, 0) {
+ TH_LOG("Expected action (0x%X) not available!",
+ actions[i]);
+ }
+ }
+
+ /* Check that an unknown action is handled properly (EOPNOTSUPP) */
+ ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
+ EXPECT_EQ(ret, -1);
+ EXPECT_EQ(errno, EOPNOTSUPP);
+}
+
+TEST(get_metadata)
+{
+ pid_t pid;
+ int pipefd[2];
+ char buf;
+ struct seccomp_metadata md;
+ long ret;
+
+ /* Only real root can get metadata. */
+ if (geteuid()) {
+ XFAIL(return, "get_metadata requires real root");
+ return;
+ }
+
+ ASSERT_EQ(0, pipe(pipefd));
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ /* one with log, one without */
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER,
+ SECCOMP_FILTER_FLAG_LOG, &prog));
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog));
+
+ ASSERT_EQ(0, close(pipefd[0]));
+ ASSERT_EQ(1, write(pipefd[1], "1", 1));
+ ASSERT_EQ(0, close(pipefd[1]));
+
+ while (1)
+ sleep(100);
+ }
+
+ ASSERT_EQ(0, close(pipefd[1]));
+ ASSERT_EQ(1, read(pipefd[0], &buf, 1));
+
+ ASSERT_EQ(0, ptrace(PTRACE_ATTACH, pid));
+ ASSERT_EQ(pid, waitpid(pid, NULL, 0));
+
+ /* Past here must not use ASSERT or child process is never killed. */
+
+ md.filter_off = 0;
+ errno = 0;
+ ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
+ EXPECT_EQ(sizeof(md), ret) {
+ if (errno == EINVAL)
+ XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
+ }
+
+ EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
+ EXPECT_EQ(md.filter_off, 0);
+
+ md.filter_off = 1;
+ ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
+ EXPECT_EQ(sizeof(md), ret);
+ EXPECT_EQ(md.flags, 0);
+ EXPECT_EQ(md.filter_off, 1);
+
+skip:
+ ASSERT_EQ(0, kill(pid, SIGKILL));
+}
+
+/*
+ * TODO:
+ * - add microbenchmarks
+ * - expand NNP testing
+ * - better arch-specific TRACE and TRAP handlers.
+ * - endianness checking when appropriate
+ * - 64-bit arg prodding
+ * - arch value testing (x86 modes especially)
+ * - verify that FILTER_FLAG_LOG filters generate log messages
+ * - verify that RET_LOG generates log messages
+ * - ...
+ */
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/sigaltstack/.gitignore b/tools/testing/selftests/sigaltstack/.gitignore
new file mode 100644
index 000000000..35897b0a3
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/.gitignore
@@ -0,0 +1 @@
+sas
diff --git a/tools/testing/selftests/sigaltstack/Makefile b/tools/testing/selftests/sigaltstack/Makefile
new file mode 100644
index 000000000..f68fbf80d
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/Makefile
@@ -0,0 +1,5 @@
+CFLAGS = -Wall
+TEST_GEN_PROGS = sas
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
new file mode 100644
index 000000000..228c2ae47
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Stas Sergeev <stsp@users.sourceforge.net>
+ *
+ * test sigaltstack(SS_ONSTACK | SS_AUTODISARM)
+ * If that succeeds, then swapcontext() can be used inside sighandler safely.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <ucontext.h>
+#include <alloca.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+
+#ifndef SS_AUTODISARM
+#define SS_AUTODISARM (1U << 31)
+#endif
+
+static void *sstack, *ustack;
+static ucontext_t uc, sc;
+static const char *msg = "[OK]\tStack preserved";
+static const char *msg2 = "[FAIL]\tStack corrupted";
+struct stk_data {
+ char msg[128];
+ int flag;
+};
+
+void my_usr1(int sig, siginfo_t *si, void *u)
+{
+ char *aa;
+ int err;
+ stack_t stk;
+ struct stk_data *p;
+
+#if __s390x__
+ register unsigned long sp asm("%15");
+#else
+ register unsigned long sp asm("sp");
+#endif
+
+ if (sp < (unsigned long)sstack ||
+ sp >= (unsigned long)sstack + SIGSTKSZ) {
+ ksft_exit_fail_msg("SP is not on sigaltstack\n");
+ }
+ /* put some data on stack. other sighandler will try to overwrite it */
+ aa = alloca(1024);
+ assert(aa);
+ p = (struct stk_data *)(aa + 512);
+ strcpy(p->msg, msg);
+ p->flag = 1;
+ ksft_print_msg("[RUN]\tsignal USR1\n");
+ err = sigaltstack(NULL, &stk);
+ if (err) {
+ ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ if (stk.ss_flags != SS_DISABLE)
+ ksft_test_result_fail("tss_flags=%x, should be SS_DISABLE\n",
+ stk.ss_flags);
+ else
+ ksft_test_result_pass(
+ "sigaltstack is disabled in sighandler\n");
+ swapcontext(&sc, &uc);
+ ksft_print_msg("%s\n", p->msg);
+ if (!p->flag) {
+ ksft_exit_skip("[RUN]\tAborting\n");
+ exit(EXIT_FAILURE);
+ }
+}
+
+void my_usr2(int sig, siginfo_t *si, void *u)
+{
+ char *aa;
+ struct stk_data *p;
+
+ ksft_print_msg("[RUN]\tsignal USR2\n");
+ aa = alloca(1024);
+ /* dont run valgrind on this */
+ /* try to find the data stored by previous sighandler */
+ p = memmem(aa, 1024, msg, strlen(msg));
+ if (p) {
+ ksft_test_result_fail("sigaltstack re-used\n");
+ /* corrupt the data */
+ strcpy(p->msg, msg2);
+ /* tell other sighandler that his data is corrupted */
+ p->flag = 0;
+ }
+}
+
+static void switch_fn(void)
+{
+ ksft_print_msg("[RUN]\tswitched to user ctx\n");
+ raise(SIGUSR2);
+ setcontext(&sc);
+}
+
+int main(void)
+{
+ struct sigaction act;
+ stack_t stk;
+ int err;
+
+ ksft_print_header();
+
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_ONSTACK | SA_SIGINFO;
+ act.sa_sigaction = my_usr1;
+ sigaction(SIGUSR1, &act, NULL);
+ act.sa_sigaction = my_usr2;
+ sigaction(SIGUSR2, &act, NULL);
+ sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (sstack == MAP_FAILED) {
+ ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ err = sigaltstack(NULL, &stk);
+ if (err) {
+ ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ if (stk.ss_flags == SS_DISABLE) {
+ ksft_test_result_pass(
+ "Initial sigaltstack state was SS_DISABLE\n");
+ } else {
+ ksft_exit_fail_msg("Initial sigaltstack state was %x; "
+ "should have been SS_DISABLE\n", stk.ss_flags);
+ return EXIT_FAILURE;
+ }
+
+ stk.ss_sp = sstack;
+ stk.ss_size = SIGSTKSZ;
+ stk.ss_flags = SS_ONSTACK | SS_AUTODISARM;
+ err = sigaltstack(&stk, NULL);
+ if (err) {
+ if (errno == EINVAL) {
+ ksft_exit_skip(
+ "[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n");
+ /*
+ * If test cases for the !SS_AUTODISARM variant were
+ * added, we could still run them. We don't have any
+ * test cases like that yet, so just exit and report
+ * success.
+ */
+ return 0;
+ } else {
+ ksft_exit_fail_msg(
+ "sigaltstack(SS_ONSTACK | SS_AUTODISARM) %s\n",
+ strerror(errno));
+ return EXIT_FAILURE;
+ }
+ }
+
+ ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (ustack == MAP_FAILED) {
+ ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
+ return EXIT_FAILURE;
+ }
+ getcontext(&uc);
+ uc.uc_link = NULL;
+ uc.uc_stack.ss_sp = ustack;
+ uc.uc_stack.ss_size = SIGSTKSZ;
+ makecontext(&uc, switch_fn, 0);
+ raise(SIGUSR1);
+
+ err = sigaltstack(NULL, &stk);
+ if (err) {
+ ksft_exit_fail_msg("sigaltstack() - %s\n", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ if (stk.ss_flags != SS_AUTODISARM) {
+ ksft_exit_fail_msg("ss_flags=%x, should be SS_AUTODISARM\n",
+ stk.ss_flags);
+ exit(EXIT_FAILURE);
+ }
+ ksft_test_result_pass(
+ "sigaltstack is still SS_AUTODISARM after signal\n");
+
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/testing/selftests/size/.gitignore b/tools/testing/selftests/size/.gitignore
new file mode 100644
index 000000000..189b7818d
--- /dev/null
+++ b/tools/testing/selftests/size/.gitignore
@@ -0,0 +1 @@
+get_size
diff --git a/tools/testing/selftests/size/Makefile b/tools/testing/selftests/size/Makefile
new file mode 100644
index 000000000..4685b3e42
--- /dev/null
+++ b/tools/testing/selftests/size/Makefile
@@ -0,0 +1,5 @@
+CFLAGS := -static -ffreestanding -nostartfiles -s
+
+TEST_GEN_PROGS := get_size
+
+include ../lib.mk
diff --git a/tools/testing/selftests/size/get_size.c b/tools/testing/selftests/size/get_size.c
new file mode 100644
index 000000000..f55943b6d
--- /dev/null
+++ b/tools/testing/selftests/size/get_size.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright 2014 Sony Mobile Communications Inc.
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftest for runtime system size
+ *
+ * Prints the amount of RAM that the currently running system is using.
+ *
+ * This program tries to be as small as possible itself, to
+ * avoid perturbing the system memory utilization with its
+ * own execution. It also attempts to have as few dependencies
+ * on kernel features as possible.
+ *
+ * It should be statically linked, with startup libs avoided. It uses
+ * no library calls except the syscall() function for the following 3
+ * syscalls:
+ * sysinfo(), write(), and _exit()
+ *
+ * For output, it avoids printf (which in some C libraries
+ * has large external dependencies) by implementing it's own
+ * number output and print routines, and using __builtin_strlen()
+ *
+ * The test may crash if any of the above syscalls fails because in some
+ * libc implementations (e.g. the GNU C Library) errno is saved in
+ * thread-local storage, which does not get initialized due to avoiding
+ * startup libs.
+ */
+
+#include <sys/sysinfo.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#define STDOUT_FILENO 1
+
+static int print(const char *s)
+{
+ size_t len = 0;
+
+ while (s[len] != '\0')
+ len++;
+
+ return syscall(SYS_write, STDOUT_FILENO, s, len);
+}
+
+static inline char *num_to_str(unsigned long num, char *buf, int len)
+{
+ unsigned int digit;
+
+ /* put digits in buffer from back to front */
+ buf += len - 1;
+ *buf = 0;
+ do {
+ digit = num % 10;
+ *(--buf) = digit + '0';
+ num /= 10;
+ } while (num > 0);
+
+ return buf;
+}
+
+static int print_num(unsigned long num)
+{
+ char num_buf[30];
+
+ return print(num_to_str(num, num_buf, sizeof(num_buf)));
+}
+
+static int print_k_value(const char *s, unsigned long num, unsigned long units)
+{
+ unsigned long long temp;
+ int ccode;
+
+ print(s);
+
+ temp = num;
+ temp = (temp * units)/1024;
+ num = temp;
+ ccode = print_num(num);
+ print("\n");
+ return ccode;
+}
+
+/* this program has no main(), as startup libraries are not used */
+void _start(void)
+{
+ int ccode;
+ struct sysinfo info;
+ unsigned long used;
+ static const char *test_name = " get runtime memory use\n";
+
+ print("TAP version 13\n");
+ print("# Testing system size.\n");
+
+ ccode = syscall(SYS_sysinfo, &info);
+ if (ccode < 0) {
+ print("not ok 1");
+ print(test_name);
+ print(" ---\n reason: \"could not get sysinfo\"\n ...\n");
+ syscall(SYS_exit, ccode);
+ }
+ print("ok 1");
+ print(test_name);
+
+ /* ignore cache complexities for now */
+ used = info.totalram - info.freeram - info.bufferram;
+ print("# System runtime memory report (units in Kilobytes):\n");
+ print(" ---\n");
+ print_k_value(" Total: ", info.totalram, info.mem_unit);
+ print_k_value(" Free: ", info.freeram, info.mem_unit);
+ print_k_value(" Buffer: ", info.bufferram, info.mem_unit);
+ print_k_value(" In use: ", used, info.mem_unit);
+ print(" ...\n");
+ print("1..1\n");
+
+ syscall(SYS_exit, 0);
+}
diff --git a/tools/testing/selftests/sparc64/Makefile b/tools/testing/selftests/sparc64/Makefile
new file mode 100644
index 000000000..a19531dba
--- /dev/null
+++ b/tools/testing/selftests/sparc64/Makefile
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/x86_64/x86/)
+
+ifneq ($(ARCH),sparc64)
+nothing:
+.PHONY: all clean run_tests install
+.SILENT:
+else
+
+SUBDIRS := drivers
+
+TEST_PROGS := run.sh
+
+
+.PHONY: all clean
+
+include ../lib.mk
+
+all:
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ #SUBDIR test prog name should be in the form: SUBDIR_test.sh \
+ TEST=$$DIR"_test.sh"; \
+ if [ -e $$DIR/$$TEST ]; then \
+ rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
+ fi \
+ done
+
+override define INSTALL_RULE
+ mkdir -p $(INSTALL_PATH)
+ install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+
+ @for SUBDIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$SUBDIR; \
+ mkdir $$BUILD_TARGET -p; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+ done;
+endef
+
+override define CLEAN
+ @for DIR in $(SUBDIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir $$BUILD_TARGET -p; \
+ make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ done
+endef
+endif
diff --git a/tools/testing/selftests/sparc64/drivers/.gitignore b/tools/testing/selftests/sparc64/drivers/.gitignore
new file mode 100644
index 000000000..90e835ed7
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/.gitignore
@@ -0,0 +1 @@
+adi-test
diff --git a/tools/testing/selftests/sparc64/drivers/Makefile b/tools/testing/selftests/sparc64/drivers/Makefile
new file mode 100644
index 000000000..deb0df415
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+INCLUDEDIR := -I.
+CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
+
+TEST_GEN_FILES := adi-test
+
+all: $(TEST_GEN_FILES)
+
+$(TEST_GEN_FILES): adi-test.c
+
+TEST_PROGS := drivers_test.sh
+
+include ../../lib.mk
+
+$(OUTPUT)/adi-test: adi-test.c
diff --git a/tools/testing/selftests/sparc64/drivers/adi-test.c b/tools/testing/selftests/sparc64/drivers/adi-test.c
new file mode 100644
index 000000000..95d93c6a8
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/adi-test.c
@@ -0,0 +1,721 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * selftest for sparc64's privileged ADI driver
+ *
+ * Author: Tom Hromatka <tom.hromatka@oracle.com>
+ */
+#include <linux/kernel.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "../../kselftest.h"
+
+#define DEBUG_LEVEL_1_BIT (0x0001)
+#define DEBUG_LEVEL_2_BIT (0x0002)
+#define DEBUG_LEVEL_3_BIT (0x0004)
+#define DEBUG_LEVEL_4_BIT (0x0008)
+#define DEBUG_TIMING_BIT (0x1000)
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+/* bit mask of enabled bits to print */
+#define DEBUG 0x0001
+
+#define DEBUG_PRINT_L1(...) debug_print(DEBUG_LEVEL_1_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_L2(...) debug_print(DEBUG_LEVEL_2_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_L3(...) debug_print(DEBUG_LEVEL_3_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_L4(...) debug_print(DEBUG_LEVEL_4_BIT, __VA_ARGS__)
+#define DEBUG_PRINT_T(...) debug_print(DEBUG_TIMING_BIT, __VA_ARGS__)
+
+static void debug_print(int level, const char *s, ...)
+{
+ va_list args;
+
+ va_start(args, s);
+
+ if (DEBUG & level)
+ vfprintf(stdout, s, args);
+ va_end(args);
+}
+
+#ifndef min
+#define min(x, y) ((x) < (y) ? x : y)
+#endif
+
+#define RETURN_FROM_TEST(_ret) \
+ do { \
+ DEBUG_PRINT_L1( \
+ "\tTest %s returned %d\n", __func__, _ret); \
+ return _ret; \
+ } while (0)
+
+#define ADI_BLKSZ 64
+#define ADI_MAX_VERSION 15
+
+#define TEST_STEP_FAILURE(_ret) \
+ do { \
+ fprintf(stderr, "\tTest step failure: %d at %s:%d\n", \
+ _ret, __func__, __LINE__); \
+ goto out; \
+ } while (0)
+
+#define RDTICK(_x) \
+ asm volatile(" rd %%tick, %0\n" : "=r" (_x))
+
+static int random_version(void)
+{
+ long tick;
+
+ RDTICK(tick);
+
+ return tick % (ADI_MAX_VERSION + 1);
+}
+
+#define MAX_RANGES_SUPPORTED 5
+static const char system_ram_str[] = "System RAM\n";
+static int range_count;
+static unsigned long long int start_addr[MAX_RANGES_SUPPORTED];
+static unsigned long long int end_addr[MAX_RANGES_SUPPORTED];
+
+struct stats {
+ char name[16];
+ unsigned long total;
+ unsigned long count;
+ unsigned long bytes;
+};
+
+static struct stats read_stats = {
+ .name = "read", .total = 0, .count = 0, .bytes = 0};
+static struct stats pread_stats = {
+ .name = "pread", .total = 0, .count = 0, .bytes = 0};
+static struct stats write_stats = {
+ .name = "write", .total = 0, .count = 0, .bytes = 0};
+static struct stats pwrite_stats = {
+ .name = "pwrite", .total = 0, .count = 0, .bytes = 0};
+static struct stats seek_stats = {
+ .name = "seek", .total = 0, .count = 0, .bytes = 0};
+
+static void update_stats(struct stats * const ustats,
+ unsigned long measurement, unsigned long bytes)
+{
+ ustats->total += measurement;
+ ustats->bytes += bytes;
+ ustats->count++;
+}
+
+static void print_ustats(const struct stats * const ustats)
+{
+ DEBUG_PRINT_L1("%s\t%7d\t%7.0f\t%7.0f\n",
+ ustats->name, ustats->count,
+ (float)ustats->total / (float)ustats->count,
+ (float)ustats->bytes / (float)ustats->count);
+}
+
+static void print_stats(void)
+{
+ DEBUG_PRINT_L1("\nSyscall\tCall\tAvgTime\tAvgSize\n"
+ "\tCount\t(ticks)\t(bytes)\n"
+ "-------------------------------\n");
+
+ print_ustats(&read_stats);
+ print_ustats(&pread_stats);
+ print_ustats(&write_stats);
+ print_ustats(&pwrite_stats);
+ print_ustats(&seek_stats);
+}
+
+static int build_memory_map(void)
+{
+ char line[256];
+ FILE *fp;
+ int i;
+
+ range_count = 0;
+
+ fp = fopen("/proc/iomem", "r");
+ if (!fp) {
+ fprintf(stderr, "/proc/iomem: error %d: %s\n",
+ errno, strerror(errno));
+ return -errno;
+ }
+
+ while (fgets(line, sizeof(line), fp) != 0) {
+ if (strstr(line, system_ram_str)) {
+ char *dash, *end_ptr;
+
+ /* Given a line like this:
+ * d0400000-10ffaffff : System RAM
+ * replace the "-" with a space
+ */
+ dash = strstr(line, "-");
+ dash[0] = 0x20;
+
+ start_addr[range_count] = strtoull(line, &end_ptr, 16);
+ end_addr[range_count] = strtoull(end_ptr, NULL, 16);
+ range_count++;
+ }
+ }
+
+ fclose(fp);
+
+ DEBUG_PRINT_L1("RAM Ranges\n");
+ for (i = 0; i < range_count; i++)
+ DEBUG_PRINT_L1("\trange %d: 0x%llx\t- 0x%llx\n",
+ i, start_addr[i], end_addr[i]);
+
+ if (range_count == 0) {
+ fprintf(stderr, "No valid address ranges found. Error.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int read_adi(int fd, unsigned char *buf, int buf_sz)
+{
+ int ret, bytes_read = 0;
+ long start, end, elapsed_time = 0;
+
+ do {
+ RDTICK(start);
+ ret = read(fd, buf + bytes_read, buf_sz - bytes_read);
+ RDTICK(end);
+ if (ret < 0)
+ return -errno;
+
+ elapsed_time += end - start;
+ update_stats(&read_stats, elapsed_time, buf_sz);
+ bytes_read += ret;
+
+ } while (bytes_read < buf_sz);
+
+ DEBUG_PRINT_T("\tread elapsed timed = %ld\n", elapsed_time);
+ DEBUG_PRINT_L3("\tRead %d bytes\n", bytes_read);
+
+ return bytes_read;
+}
+
+static int pread_adi(int fd, unsigned char *buf,
+ int buf_sz, unsigned long offset)
+{
+ int ret, i, bytes_read = 0;
+ unsigned long cur_offset;
+ long start, end, elapsed_time = 0;
+
+ cur_offset = offset;
+ do {
+ RDTICK(start);
+ ret = pread(fd, buf + bytes_read, buf_sz - bytes_read,
+ cur_offset);
+ RDTICK(end);
+ if (ret < 0)
+ return -errno;
+
+ elapsed_time += end - start;
+ update_stats(&pread_stats, elapsed_time, buf_sz);
+ bytes_read += ret;
+ cur_offset += ret;
+
+ } while (bytes_read < buf_sz);
+
+ DEBUG_PRINT_T("\tpread elapsed timed = %ld\n", elapsed_time);
+ DEBUG_PRINT_L3("\tRead %d bytes starting at offset 0x%lx\n",
+ bytes_read, offset);
+ for (i = 0; i < bytes_read; i++)
+ DEBUG_PRINT_L4("\t\t0x%lx\t%d\n", offset + i, buf[i]);
+
+ return bytes_read;
+}
+
+static int write_adi(int fd, const unsigned char * const buf, int buf_sz)
+{
+ int ret, bytes_written = 0;
+ long start, end, elapsed_time = 0;
+
+ do {
+ RDTICK(start);
+ ret = write(fd, buf + bytes_written, buf_sz - bytes_written);
+ RDTICK(end);
+ if (ret < 0)
+ return -errno;
+
+ elapsed_time += (end - start);
+ update_stats(&write_stats, elapsed_time, buf_sz);
+ bytes_written += ret;
+ } while (bytes_written < buf_sz);
+
+ DEBUG_PRINT_T("\twrite elapsed timed = %ld\n", elapsed_time);
+ DEBUG_PRINT_L3("\tWrote %d of %d bytes\n", bytes_written, buf_sz);
+
+ return bytes_written;
+}
+
+static int pwrite_adi(int fd, const unsigned char * const buf,
+ int buf_sz, unsigned long offset)
+{
+ int ret, bytes_written = 0;
+ unsigned long cur_offset;
+ long start, end, elapsed_time = 0;
+
+ cur_offset = offset;
+
+ do {
+ RDTICK(start);
+ ret = pwrite(fd, buf + bytes_written,
+ buf_sz - bytes_written, cur_offset);
+ RDTICK(end);
+ if (ret < 0) {
+ fprintf(stderr, "pwrite(): error %d: %s\n",
+ errno, strerror(errno));
+ return -errno;
+ }
+
+ elapsed_time += (end - start);
+ update_stats(&pwrite_stats, elapsed_time, buf_sz);
+ bytes_written += ret;
+ cur_offset += ret;
+
+ } while (bytes_written < buf_sz);
+
+ DEBUG_PRINT_T("\tpwrite elapsed timed = %ld\n", elapsed_time);
+ DEBUG_PRINT_L3("\tWrote %d of %d bytes starting at address 0x%lx\n",
+ bytes_written, buf_sz, offset);
+
+ return bytes_written;
+}
+
+static off_t seek_adi(int fd, off_t offset, int whence)
+{
+ long start, end;
+ off_t ret;
+
+ RDTICK(start);
+ ret = lseek(fd, offset, whence);
+ RDTICK(end);
+ DEBUG_PRINT_L2("\tlseek ret = 0x%llx\n", ret);
+ if (ret < 0)
+ goto out;
+
+ DEBUG_PRINT_T("\tlseek elapsed timed = %ld\n", end - start);
+ update_stats(&seek_stats, end - start, 0);
+
+out:
+ (void)lseek(fd, 0, SEEK_END);
+ return ret;
+}
+
+static int test0_prpw_aligned_1byte(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ (end_addr[range_count - 1] - 0x1000) & ~(ADI_BLKSZ - 1);
+ unsigned char version[1], expected_version;
+ loff_t offset;
+ int ret;
+
+ version[0] = random_version();
+ expected_version = version[0];
+
+ offset = paddr / ADI_BLKSZ;
+
+ ret = pwrite_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ ret = pread_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ if (expected_version != version[0]) {
+ DEBUG_PRINT_L2("\tExpected version %d but read version %d\n",
+ expected_version, version[0]);
+ TEST_STEP_FAILURE(-expected_version);
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+#define TEST1_VERSION_SZ 4096
+static int test1_prpw_aligned_4096bytes(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ (end_addr[range_count - 1] - 0x6000) & ~(ADI_BLKSZ - 1);
+ unsigned char version[TEST1_VERSION_SZ],
+ expected_version[TEST1_VERSION_SZ];
+ loff_t offset;
+ int ret, i;
+
+ for (i = 0; i < TEST1_VERSION_SZ; i++) {
+ version[i] = random_version();
+ expected_version[i] = version[i];
+ }
+
+ offset = paddr / ADI_BLKSZ;
+
+ ret = pwrite_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ ret = pread_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ for (i = 0; i < TEST1_VERSION_SZ; i++) {
+ if (expected_version[i] != version[i]) {
+ DEBUG_PRINT_L2(
+ "\tExpected version %d but read version %d\n",
+ expected_version, version[0]);
+ TEST_STEP_FAILURE(-expected_version[i]);
+ }
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+#define TEST2_VERSION_SZ 10327
+static int test2_prpw_aligned_10327bytes(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ (start_addr[0] + 0x6000) & ~(ADI_BLKSZ - 1);
+ unsigned char version[TEST2_VERSION_SZ],
+ expected_version[TEST2_VERSION_SZ];
+ loff_t offset;
+ int ret, i;
+
+ for (i = 0; i < TEST2_VERSION_SZ; i++) {
+ version[i] = random_version();
+ expected_version[i] = version[i];
+ }
+
+ offset = paddr / ADI_BLKSZ;
+
+ ret = pwrite_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ ret = pread_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ for (i = 0; i < TEST2_VERSION_SZ; i++) {
+ if (expected_version[i] != version[i]) {
+ DEBUG_PRINT_L2(
+ "\tExpected version %d but read version %d\n",
+ expected_version, version[0]);
+ TEST_STEP_FAILURE(-expected_version[i]);
+ }
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+#define TEST3_VERSION_SZ 12541
+static int test3_prpw_unaligned_12541bytes(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ ((start_addr[0] + 0xC000) & ~(ADI_BLKSZ - 1)) + 17;
+ unsigned char version[TEST3_VERSION_SZ],
+ expected_version[TEST3_VERSION_SZ];
+ loff_t offset;
+ int ret, i;
+
+ for (i = 0; i < TEST3_VERSION_SZ; i++) {
+ version[i] = random_version();
+ expected_version[i] = version[i];
+ }
+
+ offset = paddr / ADI_BLKSZ;
+
+ ret = pwrite_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ ret = pread_adi(fd, version, sizeof(version), offset);
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ for (i = 0; i < TEST3_VERSION_SZ; i++) {
+ if (expected_version[i] != version[i]) {
+ DEBUG_PRINT_L2(
+ "\tExpected version %d but read version %d\n",
+ expected_version, version[0]);
+ TEST_STEP_FAILURE(-expected_version[i]);
+ }
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+static int test4_lseek(int fd)
+{
+#define OFFSET_ADD (0x100)
+#define OFFSET_SUBTRACT (0xFFFFFFF000000000)
+
+ off_t offset_out, offset_in;
+ int ret;
+
+
+ offset_in = 0x123456789abcdef0;
+ offset_out = seek_adi(fd, offset_in, SEEK_SET);
+ if (offset_out != offset_in) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ /* seek to the current offset. this should return EINVAL */
+ offset_out = seek_adi(fd, offset_in, SEEK_SET);
+ if (offset_out < 0 && errno == EINVAL)
+ DEBUG_PRINT_L2(
+ "\tSEEK_SET failed as designed. Not an error\n");
+ else {
+ ret = -2;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ offset_out = seek_adi(fd, 0, SEEK_CUR);
+ if (offset_out != offset_in) {
+ ret = -3;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ offset_out = seek_adi(fd, OFFSET_ADD, SEEK_CUR);
+ if (offset_out != (offset_in + OFFSET_ADD)) {
+ ret = -4;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ offset_out = seek_adi(fd, OFFSET_SUBTRACT, SEEK_CUR);
+ if (offset_out != (offset_in + OFFSET_ADD + OFFSET_SUBTRACT)) {
+ ret = -5;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+static int test5_rw_aligned_1byte(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ (end_addr[range_count - 1] - 0xF000) & ~(ADI_BLKSZ - 1);
+ unsigned char version, expected_version;
+ loff_t offset;
+ off_t oret;
+ int ret;
+
+ offset = paddr / ADI_BLKSZ;
+ version = expected_version = random_version();
+
+ oret = seek_adi(fd, offset, SEEK_SET);
+ if (oret != offset) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = write_adi(fd, &version, sizeof(version));
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ oret = seek_adi(fd, offset, SEEK_SET);
+ if (oret != offset) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = read_adi(fd, &version, sizeof(version));
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ if (expected_version != version) {
+ DEBUG_PRINT_L2("\tExpected version %d but read version %d\n",
+ expected_version, version);
+ TEST_STEP_FAILURE(-expected_version);
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+#define TEST6_VERSION_SZ 9434
+static int test6_rw_aligned_9434bytes(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ (end_addr[range_count - 1] - 0x5F000) & ~(ADI_BLKSZ - 1);
+ unsigned char version[TEST6_VERSION_SZ],
+ expected_version[TEST6_VERSION_SZ];
+ loff_t offset;
+ off_t oret;
+ int ret, i;
+
+ offset = paddr / ADI_BLKSZ;
+ for (i = 0; i < TEST6_VERSION_SZ; i++)
+ version[i] = expected_version[i] = random_version();
+
+ oret = seek_adi(fd, offset, SEEK_SET);
+ if (oret != offset) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = write_adi(fd, version, sizeof(version));
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ memset(version, 0, TEST6_VERSION_SZ);
+
+ oret = seek_adi(fd, offset, SEEK_SET);
+ if (oret != offset) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = read_adi(fd, version, sizeof(version));
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ for (i = 0; i < TEST6_VERSION_SZ; i++) {
+ if (expected_version[i] != version[i]) {
+ DEBUG_PRINT_L2(
+ "\tExpected version %d but read version %d\n",
+ expected_version[i], version[i]);
+ TEST_STEP_FAILURE(-expected_version[i]);
+ }
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+#define TEST7_VERSION_SZ 14963
+static int test7_rw_aligned_14963bytes(int fd)
+{
+ /* somewhat arbitrarily chosen address */
+ unsigned long paddr =
+ ((start_addr[range_count - 1] + 0xF000) & ~(ADI_BLKSZ - 1)) + 39;
+ unsigned char version[TEST7_VERSION_SZ],
+ expected_version[TEST7_VERSION_SZ];
+ loff_t offset;
+ off_t oret;
+ int ret, i;
+
+ offset = paddr / ADI_BLKSZ;
+ for (i = 0; i < TEST7_VERSION_SZ; i++) {
+ version[i] = random_version();
+ expected_version[i] = version[i];
+ }
+
+ oret = seek_adi(fd, offset, SEEK_SET);
+ if (oret != offset) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = write_adi(fd, version, sizeof(version));
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ memset(version, 0, TEST7_VERSION_SZ);
+
+ oret = seek_adi(fd, offset, SEEK_SET);
+ if (oret != offset) {
+ ret = -1;
+ TEST_STEP_FAILURE(ret);
+ }
+
+ ret = read_adi(fd, version, sizeof(version));
+ if (ret != sizeof(version))
+ TEST_STEP_FAILURE(ret);
+
+ for (i = 0; i < TEST7_VERSION_SZ; i++) {
+ if (expected_version[i] != version[i]) {
+ DEBUG_PRINT_L2(
+ "\tExpected version %d but read version %d\n",
+ expected_version[i], version[i]);
+ TEST_STEP_FAILURE(-expected_version[i]);
+ }
+
+ paddr += ADI_BLKSZ;
+ }
+
+ ret = 0;
+out:
+ RETURN_FROM_TEST(ret);
+}
+
+static int (*tests[])(int fd) = {
+ test0_prpw_aligned_1byte,
+ test1_prpw_aligned_4096bytes,
+ test2_prpw_aligned_10327bytes,
+ test3_prpw_unaligned_12541bytes,
+ test4_lseek,
+ test5_rw_aligned_1byte,
+ test6_rw_aligned_9434bytes,
+ test7_rw_aligned_14963bytes,
+};
+#define TEST_COUNT ARRAY_SIZE(tests)
+
+int main(int argc, char *argv[])
+{
+ int fd, ret, test;
+
+ ret = build_memory_map();
+ if (ret < 0)
+ return ret;
+
+ fd = open("/dev/adi", O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "open: error %d: %s\n",
+ errno, strerror(errno));
+ return -errno;
+ }
+
+ for (test = 0; test < TEST_COUNT; test++) {
+ DEBUG_PRINT_L1("Running test #%d\n", test);
+
+ ret = (*tests[test])(fd);
+ if (ret != 0)
+ ksft_test_result_fail("Test #%d failed: error %d\n",
+ test, ret);
+ else
+ ksft_test_result_pass("Test #%d passed\n", test);
+ }
+
+ print_stats();
+ close(fd);
+
+ if (ksft_get_fail_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+
+ /* it's impossible to get here, but the compiler throws a warning
+ * about control reaching the end of non-void function. bah.
+ */
+ return 0;
+}
diff --git a/tools/testing/selftests/sparc64/drivers/drivers_test.sh b/tools/testing/selftests/sparc64/drivers/drivers_test.sh
new file mode 100755
index 000000000..6d08273b7
--- /dev/null
+++ b/tools/testing/selftests/sparc64/drivers/drivers_test.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+SRC_TREE=../../../../
+
+test_run()
+{
+ if [ -f ${SRC_TREE}/drivers/char/adi.ko ]; then
+ insmod ${SRC_TREE}/drivers/char/adi.ko 2> /dev/null
+ if [ $? -ne 0 ]; then
+ rc=1
+ fi
+ else
+ # Use modprobe dry run to check for missing adi module
+ if ! /sbin/modprobe -q -n adi; then
+ echo "adi: [SKIP]"
+ elif /sbin/modprobe -q adi; then
+ echo "adi: ok"
+ else
+ echo "adi: [FAIL]"
+ rc=1
+ fi
+ fi
+ ./adi-test
+ rmmod adi 2> /dev/null
+}
+
+rc=0
+test_run
+exit $rc
diff --git a/tools/testing/selftests/sparc64/run.sh b/tools/testing/selftests/sparc64/run.sh
new file mode 100755
index 000000000..38ad61f93
--- /dev/null
+++ b/tools/testing/selftests/sparc64/run.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+(cd drivers; ./drivers_test.sh)
diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore
new file mode 100644
index 000000000..1e23fefd6
--- /dev/null
+++ b/tools/testing/selftests/splice/.gitignore
@@ -0,0 +1 @@
+default_file_splice_read
diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile
new file mode 100644
index 000000000..e519b159b
--- /dev/null
+++ b/tools/testing/selftests/splice/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_PROGS := default_file_splice_read.sh
+TEST_GEN_PROGS_EXTENDED := default_file_splice_read
+
+include ../lib.mk
diff --git a/tools/testing/selftests/splice/default_file_splice_read.c b/tools/testing/selftests/splice/default_file_splice_read.c
new file mode 100644
index 000000000..a3c6e5672
--- /dev/null
+++ b/tools/testing/selftests/splice/default_file_splice_read.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <fcntl.h>
+
+int main(int argc, char **argv)
+{
+ splice(0, 0, 1, 0, 1<<30, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/splice/default_file_splice_read.sh b/tools/testing/selftests/splice/default_file_splice_read.sh
new file mode 100755
index 000000000..490db5a2e
--- /dev/null
+++ b/tools/testing/selftests/splice/default_file_splice_read.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+n=`./default_file_splice_read </dev/null | wc -c`
+
+test "$n" = 0 && exit 0
+
+echo "default_file_splice_read broken: leaked $n"
+exit 1
diff --git a/tools/testing/selftests/static_keys/Makefile b/tools/testing/selftests/static_keys/Makefile
new file mode 100644
index 000000000..9cdadf37f
--- /dev/null
+++ b/tools/testing/selftests/static_keys/Makefile
@@ -0,0 +1,8 @@
+# Makefile for static keys selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := test_static_keys.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/static_keys/config b/tools/testing/selftests/static_keys/config
new file mode 100644
index 000000000..d538fb774
--- /dev/null
+++ b/tools/testing/selftests/static_keys/config
@@ -0,0 +1 @@
+CONFIG_TEST_STATIC_KEYS=m
diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
new file mode 100755
index 000000000..fc9f8cde7
--- /dev/null
+++ b/tools/testing/selftests/static_keys/test_static_keys.sh
@@ -0,0 +1,30 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs static keys kernel module tests
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_static_key_base; then
+ echo "static_key: module test_static_key_base is not found [SKIP]"
+ exit $ksft_skip
+fi
+
+if ! /sbin/modprobe -q -n test_static_keys; then
+ echo "static_key: module test_static_keys is not found [SKIP]"
+ exit $ksft_skip
+fi
+
+if /sbin/modprobe -q test_static_key_base; then
+ if /sbin/modprobe -q test_static_keys; then
+ echo "static_key: ok"
+ /sbin/modprobe -q -r test_static_keys
+ /sbin/modprobe -q -r test_static_key_base
+ else
+ echo "static_keys: [FAIL]"
+ /sbin/modprobe -q -r test_static_key_base
+ fi
+else
+ echo "static_key: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/sync/.gitignore b/tools/testing/selftests/sync/.gitignore
new file mode 100644
index 000000000..f5091e779
--- /dev/null
+++ b/tools/testing/selftests/sync/.gitignore
@@ -0,0 +1 @@
+sync_test
diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile
new file mode 100644
index 000000000..d0121a8a3
--- /dev/null
+++ b/tools/testing/selftests/sync/Makefile
@@ -0,0 +1,38 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra
+CFLAGS += -I../../../../usr/include/
+LDFLAGS += -pthread
+
+.PHONY: all clean
+
+include ../lib.mk
+
+# lib.mk TEST_CUSTOM_PROGS var is for custom tests that need special
+# build rules. lib.mk will run and install them.
+
+TEST_CUSTOM_PROGS := $(OUTPUT)/sync_test
+all: $(TEST_CUSTOM_PROGS)
+
+OBJS = sync_test.o sync.o
+
+TESTS += sync_alloc.o
+TESTS += sync_fence.o
+TESTS += sync_merge.o
+TESTS += sync_wait.o
+TESTS += sync_stress_parallelism.o
+TESTS += sync_stress_consumer.o
+TESTS += sync_stress_merge.o
+
+OBJS := $(patsubst %,$(OUTPUT)/%,$(OBJS))
+TESTS := $(patsubst %,$(OUTPUT)/%,$(TESTS))
+
+$(TEST_CUSTOM_PROGS): $(TESTS) $(OBJS)
+ $(CC) -o $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS) $(CFLAGS) $(LDFLAGS)
+
+$(OBJS): $(OUTPUT)/%.o: %.c
+ $(CC) -c $^ -o $@ $(CFLAGS)
+
+$(TESTS): $(OUTPUT)/%.o: %.c
+ $(CC) -c $^ -o $@
+
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(OBJS) $(TESTS)
diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config
new file mode 100644
index 000000000..1ab7e8130
--- /dev/null
+++ b/tools/testing/selftests/sync/config
@@ -0,0 +1,4 @@
+CONFIG_STAGING=y
+CONFIG_ANDROID=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
diff --git a/tools/testing/selftests/sync/sw_sync.h b/tools/testing/selftests/sync/sw_sync.h
new file mode 100644
index 000000000..e2cfc6bad
--- /dev/null
+++ b/tools/testing/selftests/sync/sw_sync.h
@@ -0,0 +1,46 @@
+/*
+ * sw_sync abstraction
+ *
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2013 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SELFTESTS_SW_SYNC_H
+#define SELFTESTS_SW_SYNC_H
+
+/*
+ * sw_sync is mainly intended for testing and should not be compiled into
+ * production kernels
+ */
+
+int sw_sync_timeline_create(void);
+int sw_sync_timeline_is_valid(int fd);
+int sw_sync_timeline_inc(int fd, unsigned int count);
+void sw_sync_timeline_destroy(int fd);
+
+int sw_sync_fence_create(int fd, const char *name, unsigned int value);
+int sw_sync_fence_is_valid(int fd);
+void sw_sync_fence_destroy(int fd);
+
+#endif
diff --git a/tools/testing/selftests/sync/sync.c b/tools/testing/selftests/sync/sync.c
new file mode 100644
index 000000000..f3d599f24
--- /dev/null
+++ b/tools/testing/selftests/sync/sync.c
@@ -0,0 +1,221 @@
+/*
+ * sync / sw_sync abstraction
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <fcntl.h>
+#include <malloc.h>
+#include <poll.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "sync.h"
+#include "sw_sync.h"
+
+#include <linux/sync_file.h>
+
+
+/* SW_SYNC ioctls */
+struct sw_sync_create_fence_data {
+ __u32 value;
+ char name[32];
+ __s32 fence;
+};
+
+#define SW_SYNC_IOC_MAGIC 'W'
+#define SW_SYNC_IOC_CREATE_FENCE _IOWR(SW_SYNC_IOC_MAGIC, 0,\
+ struct sw_sync_create_fence_data)
+#define SW_SYNC_IOC_INC _IOW(SW_SYNC_IOC_MAGIC, 1, __u32)
+
+
+int sync_wait(int fd, int timeout)
+{
+ struct pollfd fds;
+
+ fds.fd = fd;
+ fds.events = POLLIN | POLLERR;
+
+ return poll(&fds, 1, timeout);
+}
+
+int sync_merge(const char *name, int fd1, int fd2)
+{
+ struct sync_merge_data data = {};
+ int err;
+
+ data.fd2 = fd2;
+ strncpy(data.name, name, sizeof(data.name) - 1);
+ data.name[sizeof(data.name) - 1] = '\0';
+
+ err = ioctl(fd1, SYNC_IOC_MERGE, &data);
+ if (err < 0)
+ return err;
+
+ return data.fence;
+}
+
+static struct sync_file_info *sync_file_info(int fd)
+{
+ struct sync_file_info *info;
+ struct sync_fence_info *fence_info;
+ int err, num_fences;
+
+ info = calloc(1, sizeof(*info));
+ if (info == NULL)
+ return NULL;
+
+ err = ioctl(fd, SYNC_IOC_FILE_INFO, info);
+ if (err < 0) {
+ free(info);
+ return NULL;
+ }
+
+ num_fences = info->num_fences;
+
+ if (num_fences) {
+ info->flags = 0;
+ info->num_fences = num_fences;
+
+ fence_info = calloc(num_fences, sizeof(*fence_info));
+ if (!fence_info) {
+ free(info);
+ return NULL;
+ }
+
+ info->sync_fence_info = (uint64_t)fence_info;
+
+ err = ioctl(fd, SYNC_IOC_FILE_INFO, info);
+ if (err < 0) {
+ free(fence_info);
+ free(info);
+ return NULL;
+ }
+ }
+
+ return info;
+}
+
+static void sync_file_info_free(struct sync_file_info *info)
+{
+ free((void *)info->sync_fence_info);
+ free(info);
+}
+
+int sync_fence_size(int fd)
+{
+ int count;
+ struct sync_file_info *info = sync_file_info(fd);
+
+ if (!info)
+ return 0;
+
+ count = info->num_fences;
+
+ sync_file_info_free(info);
+
+ return count;
+}
+
+int sync_fence_count_with_status(int fd, int status)
+{
+ unsigned int i, count = 0;
+ struct sync_fence_info *fence_info = NULL;
+ struct sync_file_info *info = sync_file_info(fd);
+
+ if (!info)
+ return -1;
+
+ fence_info = (struct sync_fence_info *)info->sync_fence_info;
+ for (i = 0 ; i < info->num_fences ; i++) {
+ if (fence_info[i].status == status)
+ count++;
+ }
+
+ sync_file_info_free(info);
+
+ return count;
+}
+
+int sw_sync_timeline_create(void)
+{
+ return open("/sys/kernel/debug/sync/sw_sync", O_RDWR);
+}
+
+int sw_sync_timeline_inc(int fd, unsigned int count)
+{
+ __u32 arg = count;
+
+ return ioctl(fd, SW_SYNC_IOC_INC, &arg);
+}
+
+int sw_sync_timeline_is_valid(int fd)
+{
+ int status;
+
+ if (fd == -1)
+ return 0;
+
+ status = fcntl(fd, F_GETFD, 0);
+ return (status >= 0);
+}
+
+void sw_sync_timeline_destroy(int fd)
+{
+ if (sw_sync_timeline_is_valid(fd))
+ close(fd);
+}
+
+int sw_sync_fence_create(int fd, const char *name, unsigned int value)
+{
+ struct sw_sync_create_fence_data data = {};
+ int err;
+
+ data.value = value;
+ strncpy(data.name, name, sizeof(data.name) - 1);
+ data.name[sizeof(data.name) - 1] = '\0';
+
+ err = ioctl(fd, SW_SYNC_IOC_CREATE_FENCE, &data);
+ if (err < 0)
+ return err;
+
+ return data.fence;
+}
+
+int sw_sync_fence_is_valid(int fd)
+{
+ /* Same code! */
+ return sw_sync_timeline_is_valid(fd);
+}
+
+void sw_sync_fence_destroy(int fd)
+{
+ if (sw_sync_fence_is_valid(fd))
+ close(fd);
+}
diff --git a/tools/testing/selftests/sync/sync.h b/tools/testing/selftests/sync/sync.h
new file mode 100644
index 000000000..fb7156148
--- /dev/null
+++ b/tools/testing/selftests/sync/sync.h
@@ -0,0 +1,40 @@
+/*
+ * sync abstraction
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SELFTESTS_SYNC_H
+#define SELFTESTS_SYNC_H
+
+#define FENCE_STATUS_ERROR (-1)
+#define FENCE_STATUS_ACTIVE (0)
+#define FENCE_STATUS_SIGNALED (1)
+
+int sync_wait(int fd, int timeout);
+int sync_merge(const char *name, int fd1, int fd2);
+int sync_fence_size(int fd);
+int sync_fence_count_with_status(int fd, int status);
+
+#endif
diff --git a/tools/testing/selftests/sync/sync_alloc.c b/tools/testing/selftests/sync/sync_alloc.c
new file mode 100644
index 000000000..66a28afc0
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_alloc.c
@@ -0,0 +1,74 @@
+/*
+ * sync allocation tests
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_alloc_timeline(void)
+{
+ int timeline, valid;
+
+ timeline = sw_sync_timeline_create();
+ valid = sw_sync_timeline_is_valid(timeline);
+ ASSERT(valid, "Failure allocating timeline\n");
+
+ sw_sync_timeline_destroy(timeline);
+ return 0;
+}
+
+int test_alloc_fence(void)
+{
+ int timeline, fence, valid;
+
+ timeline = sw_sync_timeline_create();
+ valid = sw_sync_timeline_is_valid(timeline);
+ ASSERT(valid, "Failure allocating timeline\n");
+
+ fence = sw_sync_fence_create(timeline, "allocFence", 1);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure allocating fence\n");
+
+ sw_sync_fence_destroy(fence);
+ sw_sync_timeline_destroy(timeline);
+ return 0;
+}
+
+int test_alloc_fence_negative(void)
+{
+ int fence, timeline;
+
+ timeline = sw_sync_timeline_create();
+ ASSERT(timeline > 0, "Failure allocating timeline\n");
+
+ fence = sw_sync_fence_create(-1, "fence", 1);
+ ASSERT(fence < 0, "Success allocating negative fence\n");
+
+ sw_sync_fence_destroy(fence);
+ sw_sync_timeline_destroy(timeline);
+ return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_fence.c b/tools/testing/selftests/sync/sync_fence.c
new file mode 100644
index 000000000..13f175287
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_fence.c
@@ -0,0 +1,132 @@
+/*
+ * sync fence tests with one timeline
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_fence_one_timeline_wait(void)
+{
+ int fence, valid, ret;
+ int timeline = sw_sync_timeline_create();
+
+ valid = sw_sync_timeline_is_valid(timeline);
+ ASSERT(valid, "Failure allocating timeline\n");
+
+ fence = sw_sync_fence_create(timeline, "allocFence", 5);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure allocating fence\n");
+
+ /* Wait on fence until timeout */
+ ret = sync_wait(fence, 0);
+ ASSERT(ret == 0, "Failure waiting on fence until timeout\n");
+
+ /* Advance timeline from 0 -> 1 */
+ ret = sw_sync_timeline_inc(timeline, 1);
+ ASSERT(ret == 0, "Failure advancing timeline\n");
+
+ /* Wait on fence until timeout */
+ ret = sync_wait(fence, 0);
+ ASSERT(ret == 0, "Failure waiting on fence until timeout\n");
+
+ /* Signal the fence */
+ ret = sw_sync_timeline_inc(timeline, 4);
+ ASSERT(ret == 0, "Failure signaling the fence\n");
+
+ /* Wait successfully */
+ ret = sync_wait(fence, 0);
+ ASSERT(ret > 0, "Failure waiting on fence\n");
+
+ /* Go even further, and confirm wait still succeeds */
+ ret = sw_sync_timeline_inc(timeline, 10);
+ ASSERT(ret == 0, "Failure going further\n");
+ ret = sync_wait(fence, 0);
+ ASSERT(ret > 0, "Failure waiting ahead\n");
+
+ sw_sync_fence_destroy(fence);
+ sw_sync_timeline_destroy(timeline);
+
+ return 0;
+}
+
+int test_fence_one_timeline_merge(void)
+{
+ int a, b, c, d, valid;
+ int timeline = sw_sync_timeline_create();
+
+ /* create fence a,b,c and then merge them all into fence d */
+ a = sw_sync_fence_create(timeline, "allocFence", 1);
+ b = sw_sync_fence_create(timeline, "allocFence", 2);
+ c = sw_sync_fence_create(timeline, "allocFence", 3);
+
+ valid = sw_sync_fence_is_valid(a) &&
+ sw_sync_fence_is_valid(b) &&
+ sw_sync_fence_is_valid(c);
+ ASSERT(valid, "Failure allocating fences\n");
+
+ d = sync_merge("mergeFence", b, a);
+ d = sync_merge("mergeFence", c, d);
+ valid = sw_sync_fence_is_valid(d);
+ ASSERT(valid, "Failure merging fences\n");
+
+ /* confirm all fences have one active point (even d) */
+ ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_ACTIVE) == 1,
+ "a has too many active fences!\n");
+ ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_ACTIVE) == 1,
+ "b has too many active fences!\n");
+ ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_ACTIVE) == 1,
+ "c has too many active fences!\n");
+ ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_ACTIVE) == 1,
+ "d has too many active fences!\n");
+
+ /* confirm that d is not signaled until the max of a,b,c */
+ sw_sync_timeline_inc(timeline, 1);
+ ASSERT(sync_fence_count_with_status(a, FENCE_STATUS_SIGNALED) == 1,
+ "a did not signal!\n");
+ ASSERT(sync_fence_count_with_status(d, FENCE_STATUS_ACTIVE) == 1,
+ "d signaled too early!\n");
+
+ sw_sync_timeline_inc(timeline, 1);
+ ASSERT(sync_fence_count_with_status(b, FENCE_STATUS_SIGNALED) == 1,
+ "b did not signal!\n");
+ ASSERT(sync_fence_count_with_status(d, FENCE_STATUS_ACTIVE) == 1,
+ "d signaled too early!\n");
+
+ sw_sync_timeline_inc(timeline, 1);
+ ASSERT(sync_fence_count_with_status(c, FENCE_STATUS_SIGNALED) == 1,
+ "c did not signal!\n");
+ ASSERT(sync_fence_count_with_status(d, FENCE_STATUS_ACTIVE) == 0 &&
+ sync_fence_count_with_status(d, FENCE_STATUS_SIGNALED) == 1,
+ "d did not signal!\n");
+
+ sw_sync_fence_destroy(d);
+ sw_sync_fence_destroy(c);
+ sw_sync_fence_destroy(b);
+ sw_sync_fence_destroy(a);
+ sw_sync_timeline_destroy(timeline);
+ return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_merge.c b/tools/testing/selftests/sync/sync_merge.c
new file mode 100644
index 000000000..8914d4339
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_merge.c
@@ -0,0 +1,60 @@
+/*
+ * sync fence merge tests
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_fence_merge_same_fence(void)
+{
+ int fence, valid, merged;
+ int timeline = sw_sync_timeline_create();
+
+ valid = sw_sync_timeline_is_valid(timeline);
+ ASSERT(valid, "Failure allocating timeline\n");
+
+ fence = sw_sync_fence_create(timeline, "allocFence", 5);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure allocating fence\n");
+
+ merged = sync_merge("mergeFence", fence, fence);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure merging fence\n");
+
+ ASSERT(sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED) == 0,
+ "fence signaled too early!\n");
+
+ sw_sync_timeline_inc(timeline, 5);
+ ASSERT(sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED) == 1,
+ "fence did not signal!\n");
+
+ sw_sync_fence_destroy(merged);
+ sw_sync_fence_destroy(fence);
+ sw_sync_timeline_destroy(timeline);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_stress_consumer.c b/tools/testing/selftests/sync/sync_stress_consumer.c
new file mode 100644
index 000000000..d9eff8d52
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_stress_consumer.c
@@ -0,0 +1,185 @@
+/*
+ * sync stress test: producer/consumer
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <pthread.h>
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+/* IMPORTANT NOTE: if you see this test failing on your system, it may be
+ * due to a shortage of file descriptors. Please ensure your system has
+ * a sensible limit for this test to finish correctly.
+ */
+
+/* Returns 1 on error, 0 on success */
+static int busy_wait_on_fence(int fence)
+{
+ int error, active;
+
+ do {
+ error = sync_fence_count_with_status(fence, FENCE_STATUS_ERROR);
+ ASSERT(error == 0, "Error occurred on fence\n");
+ active = sync_fence_count_with_status(fence,
+ FENCE_STATUS_ACTIVE);
+ } while (active);
+
+ return 0;
+}
+
+static struct {
+ int iterations;
+ int threads;
+ int counter;
+ int consumer_timeline;
+ int *producer_timelines;
+ pthread_mutex_t lock;
+} test_data_mpsc;
+
+static int mpsc_producer_thread(void *d)
+{
+ int id = (long)d;
+ int fence, valid, i;
+ int *producer_timelines = test_data_mpsc.producer_timelines;
+ int consumer_timeline = test_data_mpsc.consumer_timeline;
+ int iterations = test_data_mpsc.iterations;
+
+ for (i = 0; i < iterations; i++) {
+ fence = sw_sync_fence_create(consumer_timeline, "fence", i);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure creating fence\n");
+
+ /*
+ * Wait for the consumer to finish. Use alternate
+ * means of waiting on the fence
+ */
+
+ if ((iterations + id) % 8 != 0) {
+ ASSERT(sync_wait(fence, -1) > 0,
+ "Failure waiting on fence\n");
+ } else {
+ ASSERT(busy_wait_on_fence(fence) == 0,
+ "Failure waiting on fence\n");
+ }
+
+ /*
+ * Every producer increments the counter, the consumer
+ * checks and erases it
+ */
+ pthread_mutex_lock(&test_data_mpsc.lock);
+ test_data_mpsc.counter++;
+ pthread_mutex_unlock(&test_data_mpsc.lock);
+
+ ASSERT(sw_sync_timeline_inc(producer_timelines[id], 1) == 0,
+ "Error advancing producer timeline\n");
+
+ sw_sync_fence_destroy(fence);
+ }
+
+ return 0;
+}
+
+static int mpcs_consumer_thread(void)
+{
+ int fence, merged, tmp, valid, it, i;
+ int *producer_timelines = test_data_mpsc.producer_timelines;
+ int consumer_timeline = test_data_mpsc.consumer_timeline;
+ int iterations = test_data_mpsc.iterations;
+ int n = test_data_mpsc.threads;
+
+ for (it = 1; it <= iterations; it++) {
+ fence = sw_sync_fence_create(producer_timelines[0], "name", it);
+ for (i = 1; i < n; i++) {
+ tmp = sw_sync_fence_create(producer_timelines[i],
+ "name", it);
+ merged = sync_merge("name", tmp, fence);
+ sw_sync_fence_destroy(tmp);
+ sw_sync_fence_destroy(fence);
+ fence = merged;
+ }
+
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure merging fences\n");
+
+ /*
+ * Make sure we see an increment from every producer thread.
+ * Vary the means by which we wait.
+ */
+ if (iterations % 8 != 0) {
+ ASSERT(sync_wait(fence, -1) > 0,
+ "Producers did not increment as expected\n");
+ } else {
+ ASSERT(busy_wait_on_fence(fence) == 0,
+ "Producers did not increment as expected\n");
+ }
+
+ ASSERT(test_data_mpsc.counter == n * it,
+ "Counter value mismatch!\n");
+
+ /* Release the producer threads */
+ ASSERT(sw_sync_timeline_inc(consumer_timeline, 1) == 0,
+ "Failure releasing producer threads\n");
+
+ sw_sync_fence_destroy(fence);
+ }
+
+ return 0;
+}
+
+int test_consumer_stress_multi_producer_single_consumer(void)
+{
+ int iterations = 1 << 12;
+ int n = 5;
+ long i, ret;
+ int producer_timelines[n];
+ int consumer_timeline;
+ pthread_t threads[n];
+
+ consumer_timeline = sw_sync_timeline_create();
+ for (i = 0; i < n; i++)
+ producer_timelines[i] = sw_sync_timeline_create();
+
+ test_data_mpsc.producer_timelines = producer_timelines;
+ test_data_mpsc.consumer_timeline = consumer_timeline;
+ test_data_mpsc.iterations = iterations;
+ test_data_mpsc.threads = n;
+ test_data_mpsc.counter = 0;
+ pthread_mutex_init(&test_data_mpsc.lock, NULL);
+
+ for (i = 0; i < n; i++) {
+ pthread_create(&threads[i], NULL, (void * (*)(void *))
+ mpsc_producer_thread, (void *)i);
+ }
+
+ /* Consumer thread runs here */
+ ret = mpcs_consumer_thread();
+
+ for (i = 0; i < n; i++)
+ pthread_join(threads[i], NULL);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/sync/sync_stress_merge.c b/tools/testing/selftests/sync/sync_stress_merge.c
new file mode 100644
index 000000000..99e83ef45
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_stress_merge.c
@@ -0,0 +1,115 @@
+/*
+ * sync stress test: merging
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_merge_stress_random_merge(void)
+{
+ int i, size, ret;
+ int timeline_count = 32;
+ int merge_count = 1024 * 32;
+ int timelines[timeline_count];
+ int fence_map[timeline_count];
+ int fence, tmpfence, merged, valid;
+ int timeline, timeline_offset, sync_point;
+
+ srand(time(NULL));
+
+ for (i = 0; i < timeline_count; i++)
+ timelines[i] = sw_sync_timeline_create();
+
+ fence = sw_sync_fence_create(timelines[0], "fence", 0);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure creating fence\n");
+
+ memset(fence_map, -1, sizeof(fence_map));
+ fence_map[0] = 0;
+
+ /*
+ * Randomly create sync_points out of a fixed set of timelines,
+ * and merge them together
+ */
+ for (i = 0; i < merge_count; i++) {
+ /* Generate sync_point. */
+ timeline_offset = rand() % timeline_count;
+ timeline = timelines[timeline_offset];
+ sync_point = rand();
+
+ /* Keep track of the latest sync_point in each timeline. */
+ if (fence_map[timeline_offset] == -1)
+ fence_map[timeline_offset] = sync_point;
+ else if (fence_map[timeline_offset] < sync_point)
+ fence_map[timeline_offset] = sync_point;
+
+ /* Merge */
+ tmpfence = sw_sync_fence_create(timeline, "fence", sync_point);
+ merged = sync_merge("merge", tmpfence, fence);
+ sw_sync_fence_destroy(tmpfence);
+ sw_sync_fence_destroy(fence);
+ fence = merged;
+
+ valid = sw_sync_fence_is_valid(merged);
+ ASSERT(valid, "Failure creating fence i\n");
+ }
+
+ size = 0;
+ for (i = 0; i < timeline_count; i++)
+ if (fence_map[i] != -1)
+ size++;
+
+ /* Confirm our map matches the fence. */
+ ASSERT(sync_fence_size(fence) == size,
+ "Quantity of elements not matching\n");
+
+ /* Trigger the merged fence */
+ for (i = 0; i < timeline_count; i++) {
+ if (fence_map[i] != -1) {
+ ret = sync_wait(fence, 0);
+ ASSERT(ret == 0,
+ "Failure waiting on fence until timeout\n");
+ /* Increment the timeline to the last sync_point */
+ sw_sync_timeline_inc(timelines[i], fence_map[i]);
+ }
+ }
+
+ /* Check that the fence is triggered. */
+ ret = sync_wait(fence, 0);
+ ASSERT(ret > 0, "Failure triggering fence\n");
+
+ sw_sync_fence_destroy(fence);
+
+ for (i = 0; i < timeline_count; i++)
+ sw_sync_timeline_destroy(timelines[i]);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_stress_parallelism.c b/tools/testing/selftests/sync/sync_stress_parallelism.c
new file mode 100644
index 000000000..e6c9be671
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_stress_parallelism.c
@@ -0,0 +1,111 @@
+/*
+ * sync stress test: parallelism
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <pthread.h>
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+static struct {
+ int iterations;
+ int timeline;
+ int counter;
+} test_data_two_threads;
+
+static int test_stress_two_threads_shared_timeline_thread(void *d)
+{
+ int thread_id = (long)d;
+ int timeline = test_data_two_threads.timeline;
+ int iterations = test_data_two_threads.iterations;
+ int fence, valid, ret, i;
+
+ for (i = 0; i < iterations; i++) {
+ fence = sw_sync_fence_create(timeline, "fence",
+ i * 2 + thread_id);
+ valid = sw_sync_fence_is_valid(fence);
+ ASSERT(valid, "Failure allocating fence\n");
+
+ /* Wait on the prior thread to complete */
+ ret = sync_wait(fence, -1);
+ ASSERT(ret > 0, "Problem occurred on prior thread\n");
+
+ /*
+ * Confirm the previous thread's writes are visible
+ * and then increment
+ */
+ ASSERT(test_data_two_threads.counter == i * 2 + thread_id,
+ "Counter got damaged!\n");
+ test_data_two_threads.counter++;
+
+ /* Kick off the other thread */
+ ret = sw_sync_timeline_inc(timeline, 1);
+ ASSERT(ret == 0, "Advancing timeline failed\n");
+
+ sw_sync_fence_destroy(fence);
+ }
+
+ return 0;
+}
+
+int test_stress_two_threads_shared_timeline(void)
+{
+ pthread_t a, b;
+ int valid;
+ int timeline = sw_sync_timeline_create();
+
+ valid = sw_sync_timeline_is_valid(timeline);
+ ASSERT(valid, "Failure allocating timeline\n");
+
+ test_data_two_threads.iterations = 1 << 16;
+ test_data_two_threads.counter = 0;
+ test_data_two_threads.timeline = timeline;
+
+ /*
+ * Use a single timeline to synchronize two threads
+ * hammmering on the same counter.
+ */
+
+ pthread_create(&a, NULL, (void *(*)(void *))
+ test_stress_two_threads_shared_timeline_thread,
+ (void *)0);
+ pthread_create(&b, NULL, (void *(*)(void *))
+ test_stress_two_threads_shared_timeline_thread,
+ (void *)1);
+
+ pthread_join(a, NULL);
+ pthread_join(b, NULL);
+
+ /* make sure the threads did not trample on one another */
+ ASSERT(test_data_two_threads.counter ==
+ test_data_two_threads.iterations * 2,
+ "Counter has unexpected value\n");
+
+ sw_sync_timeline_destroy(timeline);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c
new file mode 100644
index 000000000..7f7938263
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_test.c
@@ -0,0 +1,113 @@
+/*
+ * sync test runner
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <string.h>
+
+#include "../kselftest.h"
+#include "synctest.h"
+
+static int run_test(int (*test)(void), char *name)
+{
+ int result;
+ pid_t childpid;
+ int ret;
+
+ fflush(stdout);
+ childpid = fork();
+
+ if (childpid) {
+ waitpid(childpid, &result, 0);
+ if (WIFEXITED(result)) {
+ ret = WEXITSTATUS(result);
+ if (!ret)
+ ksft_test_result_pass("[RUN]\t%s\n", name);
+ else
+ ksft_test_result_fail("[RUN]\t%s\n", name);
+ return ret;
+ }
+ return 1;
+ }
+
+ exit(test());
+}
+
+static void sync_api_supported(void)
+{
+ struct stat sbuf;
+ int ret;
+
+ ret = stat("/sys/kernel/debug/sync/sw_sync", &sbuf);
+ if (!ret)
+ return;
+
+ if (errno == ENOENT)
+ ksft_exit_skip("Sync framework not supported by kernel\n");
+
+ if (errno == EACCES)
+ ksft_exit_skip("Run Sync test as root.\n");
+
+ ksft_exit_fail_msg("stat failed on /sys/kernel/debug/sync/sw_sync: %s",
+ strerror(errno));
+}
+
+int main(void)
+{
+ int err;
+
+ ksft_print_header();
+
+ sync_api_supported();
+
+ ksft_print_msg("[RUN]\tTesting sync framework\n");
+
+ RUN_TEST(test_alloc_timeline);
+ RUN_TEST(test_alloc_fence);
+ RUN_TEST(test_alloc_fence_negative);
+
+ RUN_TEST(test_fence_one_timeline_wait);
+ RUN_TEST(test_fence_one_timeline_merge);
+ RUN_TEST(test_fence_merge_same_fence);
+ RUN_TEST(test_fence_multi_timeline_wait);
+ RUN_TEST(test_stress_two_threads_shared_timeline);
+ RUN_TEST(test_consumer_stress_multi_producer_single_consumer);
+ RUN_TEST(test_merge_stress_random_merge);
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d sync tests failed\n",
+ err, ksft_test_num());
+
+ /* need this return to keep gcc happy */
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/sync/sync_wait.c b/tools/testing/selftests/sync/sync_wait.c
new file mode 100644
index 000000000..d69b752f6
--- /dev/null
+++ b/tools/testing/selftests/sync/sync_wait.c
@@ -0,0 +1,91 @@
+/*
+ * sync fence wait tests
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "sync.h"
+#include "sw_sync.h"
+#include "synctest.h"
+
+int test_fence_multi_timeline_wait(void)
+{
+ int timelineA, timelineB, timelineC;
+ int fenceA, fenceB, fenceC, merged;
+ int valid, active, signaled, ret;
+
+ timelineA = sw_sync_timeline_create();
+ timelineB = sw_sync_timeline_create();
+ timelineC = sw_sync_timeline_create();
+
+ fenceA = sw_sync_fence_create(timelineA, "fenceA", 5);
+ fenceB = sw_sync_fence_create(timelineB, "fenceB", 5);
+ fenceC = sw_sync_fence_create(timelineC, "fenceC", 5);
+
+ merged = sync_merge("mergeFence", fenceB, fenceA);
+ merged = sync_merge("mergeFence", fenceC, merged);
+
+ valid = sw_sync_fence_is_valid(merged);
+ ASSERT(valid, "Failure merging fence from various timelines\n");
+
+ /* Confirm fence isn't signaled */
+ active = sync_fence_count_with_status(merged, FENCE_STATUS_ACTIVE);
+ ASSERT(active == 3, "Fence signaled too early!\n");
+
+ ret = sync_wait(merged, 0);
+ ASSERT(ret == 0,
+ "Failure waiting on fence until timeout\n");
+
+ ret = sw_sync_timeline_inc(timelineA, 5);
+ active = sync_fence_count_with_status(merged, FENCE_STATUS_ACTIVE);
+ signaled = sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED);
+ ASSERT(active == 2 && signaled == 1,
+ "Fence did not signal properly!\n");
+
+ ret = sw_sync_timeline_inc(timelineB, 5);
+ active = sync_fence_count_with_status(merged, FENCE_STATUS_ACTIVE);
+ signaled = sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED);
+ ASSERT(active == 1 && signaled == 2,
+ "Fence did not signal properly!\n");
+
+ ret = sw_sync_timeline_inc(timelineC, 5);
+ active = sync_fence_count_with_status(merged, FENCE_STATUS_ACTIVE);
+ signaled = sync_fence_count_with_status(merged, FENCE_STATUS_SIGNALED);
+ ASSERT(active == 0 && signaled == 3,
+ "Fence did not signal properly!\n");
+
+ /* confirm you can successfully wait */
+ ret = sync_wait(merged, 100);
+ ASSERT(ret > 0, "Failure waiting on signaled fence\n");
+
+ sw_sync_fence_destroy(merged);
+ sw_sync_fence_destroy(fenceC);
+ sw_sync_fence_destroy(fenceB);
+ sw_sync_fence_destroy(fenceA);
+ sw_sync_timeline_destroy(timelineC);
+ sw_sync_timeline_destroy(timelineB);
+ sw_sync_timeline_destroy(timelineA);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/sync/synctest.h b/tools/testing/selftests/sync/synctest.h
new file mode 100644
index 000000000..90a8e5369
--- /dev/null
+++ b/tools/testing/selftests/sync/synctest.h
@@ -0,0 +1,67 @@
+/*
+ * sync tests
+ * Copyright 2015-2016 Collabora Ltd.
+ *
+ * Based on the implementation from the Android Open Source Project,
+ *
+ * Copyright 2012 Google, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef SELFTESTS_SYNCTEST_H
+#define SELFTESTS_SYNCTEST_H
+
+#include <stdio.h>
+#include "../kselftest.h"
+
+#define ASSERT(cond, msg) do { \
+ if (!(cond)) { \
+ ksft_print_msg("[ERROR]\t%s", (msg)); \
+ return 1; \
+ } \
+} while (0)
+
+#define RUN_TEST(x) run_test((x), #x)
+
+/* Allocation tests */
+int test_alloc_timeline(void);
+int test_alloc_fence(void);
+int test_alloc_fence_negative(void);
+
+/* Fence tests with one timeline */
+int test_fence_one_timeline_wait(void);
+int test_fence_one_timeline_merge(void);
+
+/* Fence merge tests */
+int test_fence_merge_same_fence(void);
+
+/* Fence wait tests */
+int test_fence_multi_timeline_wait(void);
+
+/* Stress test - parallelism */
+int test_stress_two_threads_shared_timeline(void);
+
+/* Stress test - consumer */
+int test_consumer_stress_multi_producer_single_consumer(void);
+
+/* Stress test - merging */
+int test_merge_stress_random_merge(void);
+
+#endif
diff --git a/tools/testing/selftests/sysctl/Makefile b/tools/testing/selftests/sysctl/Makefile
new file mode 100644
index 000000000..95c320b35
--- /dev/null
+++ b/tools/testing/selftests/sysctl/Makefile
@@ -0,0 +1,12 @@
+# Makefile for sysctl selftests.
+# Expects kernel.sysctl_writes_strict=1.
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests".
+all:
+
+TEST_PROGS := sysctl.sh
+
+include ../lib.mk
+
+# Nothing to clean up.
+clean:
diff --git a/tools/testing/selftests/sysctl/config b/tools/testing/selftests/sysctl/config
new file mode 100644
index 000000000..6ca14800d
--- /dev/null
+++ b/tools/testing/selftests/sysctl/config
@@ -0,0 +1 @@
+CONFIG_TEST_SYSCTL=y
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
new file mode 100755
index 000000000..584eb8ea7
--- /dev/null
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -0,0 +1,780 @@
+#!/bin/bash
+# Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or at your option any
+# later version; or, when distributed separately from the Linux kernel or
+# when incorporated into other software packages, subject to the following
+# license:
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of copyleft-next (version 0.3.1 or later) as published
+# at http://copyleft-next.org/.
+
+# This performs a series tests against the proc sysctl interface.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TEST_NAME="sysctl"
+TEST_DRIVER="test_${TEST_NAME}"
+TEST_DIR=$(dirname $0)
+TEST_FILE=$(mktemp)
+
+# This represents
+#
+# TEST_ID:TEST_COUNT:ENABLED
+#
+# TEST_ID: is the test id number
+# TEST_COUNT: number of times we should run the test
+# ENABLED: 1 if enabled, 0 otherwise
+#
+# Once these are enabled please leave them as-is. Write your own test,
+# we have tons of space.
+ALL_TESTS="0001:1:1"
+ALL_TESTS="$ALL_TESTS 0002:1:1"
+ALL_TESTS="$ALL_TESTS 0003:1:1"
+ALL_TESTS="$ALL_TESTS 0004:1:1"
+ALL_TESTS="$ALL_TESTS 0005:3:1"
+
+test_modprobe()
+{
+ if [ ! -d $DIR ]; then
+ echo "$0: $DIR not present" >&2
+ echo "You must have the following enabled in your kernel:" >&2
+ cat $TEST_DIR/config >&2
+ exit $ksft_skip
+ fi
+}
+
+function allow_user_defaults()
+{
+ if [ -z $DIR ]; then
+ DIR="/sys/module/test_sysctl/"
+ fi
+ if [ -z $DEFAULT_NUM_TESTS ]; then
+ DEFAULT_NUM_TESTS=50
+ fi
+ if [ -z $SYSCTL ]; then
+ SYSCTL="/proc/sys/debug/test_sysctl"
+ fi
+ if [ -z $PROD_SYSCTL ]; then
+ PROD_SYSCTL="/proc/sys"
+ fi
+ if [ -z $WRITES_STRICT ]; then
+ WRITES_STRICT="${PROD_SYSCTL}/kernel/sysctl_writes_strict"
+ fi
+}
+
+function check_production_sysctl_writes_strict()
+{
+ echo -n "Checking production write strict setting ... "
+ if [ ! -e ${WRITES_STRICT} ]; then
+ echo "FAIL, but skip in case of old kernel" >&2
+ else
+ old_strict=$(cat ${WRITES_STRICT})
+ if [ "$old_strict" = "1" ]; then
+ echo "ok"
+ else
+ echo "FAIL, strict value is 0 but force to 1 to continue" >&2
+ echo "1" > ${WRITES_STRICT}
+ fi
+ fi
+
+ if [ -z $PAGE_SIZE ]; then
+ PAGE_SIZE=$(getconf PAGESIZE)
+ fi
+ if [ -z $MAX_DIGITS ]; then
+ MAX_DIGITS=$(($PAGE_SIZE/8))
+ fi
+ if [ -z $INT_MAX ]; then
+ INT_MAX=$(getconf INT_MAX)
+ fi
+ if [ -z $UINT_MAX ]; then
+ UINT_MAX=$(getconf UINT_MAX)
+ fi
+}
+
+test_reqs()
+{
+ uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+
+ if ! which perl 2> /dev/null > /dev/null; then
+ echo "$0: You need perl installed"
+ exit $ksft_skip
+ fi
+ if ! which getconf 2> /dev/null > /dev/null; then
+ echo "$0: You need getconf installed"
+ exit $ksft_skip
+ fi
+ if ! which diff 2> /dev/null > /dev/null; then
+ echo "$0: You need diff installed"
+ exit $ksft_skip
+ fi
+}
+
+function load_req_mod()
+{
+ if [ ! -d $DIR ]; then
+ if ! modprobe -q -n $TEST_DRIVER; then
+ echo "$0: module $TEST_DRIVER not found [SKIP]"
+ exit $ksft_skip
+ fi
+ modprobe $TEST_DRIVER
+ if [ $? -ne 0 ]; then
+ exit
+ fi
+ fi
+}
+
+reset_vals()
+{
+ VAL=""
+ TRIGGER=$(basename ${TARGET})
+ case "$TRIGGER" in
+ int_0001)
+ VAL="60"
+ ;;
+ int_0002)
+ VAL="1"
+ ;;
+ uint_0001)
+ VAL="314"
+ ;;
+ string_0001)
+ VAL="(none)"
+ ;;
+ *)
+ ;;
+ esac
+ echo -n $VAL > $TARGET
+}
+
+set_orig()
+{
+ if [ ! -z $TARGET ]; then
+ echo "${ORIG}" > "${TARGET}"
+ fi
+}
+
+set_test()
+{
+ echo "${TEST_STR}" > "${TARGET}"
+}
+
+verify()
+{
+ local seen
+ seen=$(cat "$1")
+ if [ "${seen}" != "${TEST_STR}" ]; then
+ return 1
+ fi
+ return 0
+}
+
+verify_diff_w()
+{
+ echo "$TEST_STR" | diff -q -w -u - $1
+ return $?
+}
+
+test_rc()
+{
+ if [[ $rc != 0 ]]; then
+ echo "Failed test, return value: $rc" >&2
+ exit $rc
+ fi
+}
+
+test_finish()
+{
+ set_orig
+ rm -f "${TEST_FILE}"
+
+ if [ ! -z ${old_strict} ]; then
+ echo ${old_strict} > ${WRITES_STRICT}
+ fi
+ exit $rc
+}
+
+run_numerictests()
+{
+ echo "== Testing sysctl behavior against ${TARGET} =="
+
+ rc=0
+
+ echo -n "Writing test file ... "
+ echo "${TEST_STR}" > "${TEST_FILE}"
+ if ! verify "${TEST_FILE}"; then
+ echo "FAIL" >&2
+ exit 1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Checking sysctl is not set to test value ... "
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ exit 1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Writing sysctl from shell ... "
+ set_test
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ exit 1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Resetting sysctl to original value ... "
+ set_orig
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ exit 1
+ else
+ echo "ok"
+ fi
+
+ # Now that we've validated the sanity of "set_test" and "set_orig",
+ # we can use those functions to set starting states before running
+ # specific behavioral tests.
+
+ echo -n "Writing entire sysctl in single write ... "
+ set_orig
+ dd if="${TEST_FILE}" of="${TARGET}" bs=4096 2>/dev/null
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Writing middle of sysctl after synchronized seek ... "
+ set_test
+ dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 skip=1 2>/dev/null
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Writing beyond end of sysctl ... "
+ set_orig
+ dd if="${TEST_FILE}" of="${TARGET}" bs=20 seek=2 2>/dev/null
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Writing sysctl with multiple long writes ... "
+ set_orig
+ (perl -e 'print "A" x 50;'; echo "${TEST_STR}") | \
+ dd of="${TARGET}" bs=50 2>/dev/null
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+}
+
+# Your test must accept digits 3 and 4 to use this
+run_limit_digit()
+{
+ echo -n "Checking ignoring spaces up to PAGE_SIZE works on write ..."
+ reset_vals
+
+ LIMIT=$((MAX_DIGITS -1))
+ TEST_STR="3"
+ (perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+ dd of="${TARGET}" 2>/dev/null
+
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Checking passing PAGE_SIZE of spaces fails on write ..."
+ reset_vals
+
+ LIMIT=$((MAX_DIGITS))
+ TEST_STR="4"
+ (perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+ dd of="${TARGET}" 2>/dev/null
+
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+}
+
+# You are using an int
+run_limit_digit_int()
+{
+ echo -n "Testing INT_MAX works ..."
+ reset_vals
+ TEST_STR="$INT_MAX"
+ echo -n $TEST_STR > $TARGET
+
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing INT_MAX + 1 will fail as expected..."
+ reset_vals
+ let TEST_STR=$INT_MAX+1
+ echo -n $TEST_STR > $TARGET 2> /dev/null
+
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing negative values will work as expected..."
+ reset_vals
+ TEST_STR="-3"
+ echo -n $TEST_STR > $TARGET 2> /dev/null
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+}
+
+# You used an int array
+run_limit_digit_int_array()
+{
+ echo -n "Testing array works as expected ... "
+ TEST_STR="4 3 2 1"
+ echo -n $TEST_STR > $TARGET
+
+ if ! verify_diff_w "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing skipping trailing array elements works ... "
+ # Do not reset_vals, carry on the values from the last test.
+ # If we only echo in two digits the last two are left intact
+ TEST_STR="100 101"
+ echo -n $TEST_STR > $TARGET
+ # After we echo in, to help diff we need to set on TEST_STR what
+ # we expect the result to be.
+ TEST_STR="100 101 2 1"
+
+ if ! verify_diff_w "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing PAGE_SIZE limit on array works ... "
+ # Do not reset_vals, carry on the values from the last test.
+ # Even if you use an int array, you are still restricted to
+ # MAX_DIGITS, this is a known limitation. Test limit works.
+ LIMIT=$((MAX_DIGITS -1))
+ TEST_STR="9"
+ (perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+ dd of="${TARGET}" 2>/dev/null
+
+ TEST_STR="9 101 2 1"
+ if ! verify_diff_w "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing exceeding PAGE_SIZE limit fails as expected ... "
+ # Do not reset_vals, carry on the values from the last test.
+ # Now go over limit.
+ LIMIT=$((MAX_DIGITS))
+ TEST_STR="7"
+ (perl -e 'print " " x '$LIMIT';'; echo "${TEST_STR}") | \
+ dd of="${TARGET}" 2>/dev/null
+
+ TEST_STR="7 101 2 1"
+ if verify_diff_w "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+}
+
+# You are using an unsigned int
+run_limit_digit_uint()
+{
+ echo -n "Testing UINT_MAX works ..."
+ reset_vals
+ TEST_STR="$UINT_MAX"
+ echo -n $TEST_STR > $TARGET
+
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing UINT_MAX + 1 will fail as expected..."
+ reset_vals
+ TEST_STR=$(($UINT_MAX+1))
+ echo -n $TEST_STR > $TARGET 2> /dev/null
+
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+
+ echo -n "Testing negative values will not work as expected ..."
+ reset_vals
+ TEST_STR="-3"
+ echo -n $TEST_STR > $TARGET 2> /dev/null
+
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+ test_rc
+}
+
+run_stringtests()
+{
+ echo -n "Writing entire sysctl in short writes ... "
+ set_orig
+ dd if="${TEST_FILE}" of="${TARGET}" bs=1 2>/dev/null
+ if ! verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Writing middle of sysctl after unsynchronized seek ... "
+ set_test
+ dd if="${TEST_FILE}" of="${TARGET}" bs=1 seek=1 2>/dev/null
+ if verify "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Checking sysctl maxlen is at least $MAXLEN ... "
+ set_orig
+ perl -e 'print "A" x ('"${MAXLEN}"'-2), "B";' | \
+ dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
+ if ! grep -q B "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Checking sysctl keeps original string on overflow append ... "
+ set_orig
+ perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
+ dd of="${TARGET}" bs=$(( MAXLEN - 1 )) 2>/dev/null
+ if grep -q B "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Checking sysctl stays NULL terminated on write ... "
+ set_orig
+ perl -e 'print "A" x ('"${MAXLEN}"'-1), "B";' | \
+ dd of="${TARGET}" bs="${MAXLEN}" 2>/dev/null
+ if grep -q B "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ echo -n "Checking sysctl stays NULL terminated on overwrite ... "
+ set_orig
+ perl -e 'print "A" x ('"${MAXLEN}"'-1), "BB";' | \
+ dd of="${TARGET}" bs=$(( $MAXLEN + 1 )) 2>/dev/null
+ if grep -q B "${TARGET}"; then
+ echo "FAIL" >&2
+ rc=1
+ else
+ echo "ok"
+ fi
+
+ test_rc
+}
+
+sysctl_test_0001()
+{
+ TARGET="${SYSCTL}/int_0001"
+ reset_vals
+ ORIG=$(cat "${TARGET}")
+ TEST_STR=$(( $ORIG + 1 ))
+
+ run_numerictests
+ run_limit_digit
+}
+
+sysctl_test_0002()
+{
+ TARGET="${SYSCTL}/string_0001"
+ reset_vals
+ ORIG=$(cat "${TARGET}")
+ TEST_STR="Testing sysctl"
+ # Only string sysctls support seeking/appending.
+ MAXLEN=65
+
+ run_numerictests
+ run_stringtests
+}
+
+sysctl_test_0003()
+{
+ TARGET="${SYSCTL}/int_0002"
+ reset_vals
+ ORIG=$(cat "${TARGET}")
+ TEST_STR=$(( $ORIG + 1 ))
+
+ run_numerictests
+ run_limit_digit
+ run_limit_digit_int
+}
+
+sysctl_test_0004()
+{
+ TARGET="${SYSCTL}/uint_0001"
+ reset_vals
+ ORIG=$(cat "${TARGET}")
+ TEST_STR=$(( $ORIG + 1 ))
+
+ run_numerictests
+ run_limit_digit
+ run_limit_digit_uint
+}
+
+sysctl_test_0005()
+{
+ TARGET="${SYSCTL}/int_0003"
+ reset_vals
+ ORIG=$(cat "${TARGET}")
+
+ run_limit_digit_int_array
+}
+
+list_tests()
+{
+ echo "Test ID list:"
+ echo
+ echo "TEST_ID x NUM_TEST"
+ echo "TEST_ID: Test ID"
+ echo "NUM_TESTS: Number of recommended times to run the test"
+ echo
+ echo "0001 x $(get_test_count 0001) - tests proc_dointvec_minmax()"
+ echo "0002 x $(get_test_count 0002) - tests proc_dostring()"
+ echo "0003 x $(get_test_count 0003) - tests proc_dointvec()"
+ echo "0004 x $(get_test_count 0004) - tests proc_douintvec()"
+ echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
+}
+
+test_reqs
+
+usage()
+{
+ NUM_TESTS=$(grep -o ' ' <<<"$ALL_TESTS" | grep -c .)
+ let NUM_TESTS=$NUM_TESTS+1
+ MAX_TEST=$(printf "%04d\n" $NUM_TESTS)
+ echo "Usage: $0 [ -t <4-number-digit> ] | [ -w <4-number-digit> ] |"
+ echo " [ -s <4-number-digit> ] | [ -c <4-number-digit> <test- count>"
+ echo " [ all ] [ -h | --help ] [ -l ]"
+ echo ""
+ echo "Valid tests: 0001-$MAX_TEST"
+ echo ""
+ echo " all Runs all tests (default)"
+ echo " -t Run test ID the number amount of times is recommended"
+ echo " -w Watch test ID run until it runs into an error"
+ echo " -c Run test ID once"
+ echo " -s Run test ID x test-count number of times"
+ echo " -l List all test ID list"
+ echo " -h|--help Help"
+ echo
+ echo "If an error every occurs execution will immediately terminate."
+ echo "If you are adding a new test try using -w <test-ID> first to"
+ echo "make sure the test passes a series of tests."
+ echo
+ echo Example uses:
+ echo
+ echo "$TEST_NAME.sh -- executes all tests"
+ echo "$TEST_NAME.sh -t 0002 -- Executes test ID 0002 number of times is recomended"
+ echo "$TEST_NAME.sh -w 0002 -- Watch test ID 0002 run until an error occurs"
+ echo "$TEST_NAME.sh -s 0002 -- Run test ID 0002 once"
+ echo "$TEST_NAME.sh -c 0002 3 -- Run test ID 0002 three times"
+ echo
+ list_tests
+ exit 1
+}
+
+function test_num()
+{
+ re='^[0-9]+$'
+ if ! [[ $1 =~ $re ]]; then
+ usage
+ fi
+}
+
+function get_test_count()
+{
+ test_num $1
+ TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+ LAST_TWO=${TEST_DATA#*:*}
+ echo ${LAST_TWO%:*}
+}
+
+function get_test_enabled()
+{
+ test_num $1
+ TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+ echo ${TEST_DATA#*:*:}
+}
+
+function run_all_tests()
+{
+ for i in $ALL_TESTS ; do
+ TEST_ID=${i%:*:*}
+ ENABLED=$(get_test_enabled $TEST_ID)
+ TEST_COUNT=$(get_test_count $TEST_ID)
+ if [[ $ENABLED -eq "1" ]]; then
+ test_case $TEST_ID $TEST_COUNT
+ fi
+ done
+}
+
+function watch_log()
+{
+ if [ $# -ne 3 ]; then
+ clear
+ fi
+ date
+ echo "Running test: $2 - run #$1"
+}
+
+function watch_case()
+{
+ i=0
+ while [ 1 ]; do
+
+ if [ $# -eq 1 ]; then
+ test_num $1
+ watch_log $i ${TEST_NAME}_test_$1
+ ${TEST_NAME}_test_$1
+ else
+ watch_log $i all
+ run_all_tests
+ fi
+ let i=$i+1
+ done
+}
+
+function test_case()
+{
+ NUM_TESTS=$DEFAULT_NUM_TESTS
+ if [ $# -eq 2 ]; then
+ NUM_TESTS=$2
+ fi
+
+ i=0
+ while [ $i -lt $NUM_TESTS ]; do
+ test_num $1
+ watch_log $i ${TEST_NAME}_test_$1 noclear
+ RUN_TEST=${TEST_NAME}_test_$1
+ $RUN_TEST
+ let i=$i+1
+ done
+}
+
+function parse_args()
+{
+ if [ $# -eq 0 ]; then
+ run_all_tests
+ else
+ if [[ "$1" = "all" ]]; then
+ run_all_tests
+ elif [[ "$1" = "-w" ]]; then
+ shift
+ watch_case $@
+ elif [[ "$1" = "-t" ]]; then
+ shift
+ test_num $1
+ test_case $1 $(get_test_count $1)
+ elif [[ "$1" = "-c" ]]; then
+ shift
+ test_num $1
+ test_num $2
+ test_case $1 $2
+ elif [[ "$1" = "-s" ]]; then
+ shift
+ test_case $1 1
+ elif [[ "$1" = "-l" ]]; then
+ list_tests
+ elif [[ "$1" = "-h" || "$1" = "--help" ]]; then
+ usage
+ else
+ usage
+ fi
+ fi
+}
+
+test_reqs
+allow_user_defaults
+check_production_sysctl_writes_strict
+test_modprobe
+load_req_mod
+
+trap "test_finish" EXIT
+
+parse_args $@
+
+exit 0
diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore
new file mode 100644
index 000000000..7a60b85e1
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/.gitignore
@@ -0,0 +1,2 @@
+__pycache__/
+*.pyc
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
new file mode 100644
index 000000000..49a6f8c3f
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/README
@@ -0,0 +1,247 @@
+tdc - Linux Traffic Control (tc) unit testing suite
+
+Author: Lucas Bates - lucasb@mojatatu.com
+
+tdc is a Python script to load tc unit tests from a separate JSON file and
+execute them inside a network namespace dedicated to the task.
+
+
+REQUIREMENTS
+------------
+
+* Minimum Python version of 3.4. Earlier 3.X versions may work but are not
+ guaranteed.
+
+* The kernel must have network namespace support
+
+* The kernel must have veth support available, as a veth pair is created
+ prior to running the tests.
+
+* The kernel must have the appropriate infrastructure enabled to run all tdc
+ unit tests. See the config file in this directory for minimum required
+ features. As new tests will be added, config options list will be updated.
+
+* All tc-related features being tested must be built in or available as
+ modules. To check what is required in current setup run:
+ ./tdc.py -c
+
+ Note:
+ In the current release, tdc run will abort due to a failure in setup or
+ teardown commands - which includes not being able to run a test simply
+ because the kernel did not support a specific feature. (This will be
+ handled in a future version - the current workaround is to run the tests
+ on specific test categories that your kernel supports)
+
+
+BEFORE YOU RUN
+--------------
+
+The path to the tc executable that will be most commonly tested can be defined
+in the tdc_config.py file. Find the 'TC' entry in the NAMES dictionary and
+define the path.
+
+If you need to test a different tc executable on the fly, you can do so by
+using the -p option when running tdc:
+ ./tdc.py -p /path/to/tc
+
+
+RUNNING TDC
+-----------
+
+To use tdc, root privileges are required. This is because the
+commands being tested must be run as root. The code that enforces
+execution by root uid has been moved into a plugin (see PLUGIN
+ARCHITECTURE, below).
+
+If nsPlugin is linked, all tests are executed inside a network
+namespace to prevent conflicts within the host.
+
+Running tdc without any arguments will run all tests. Refer to the section
+on command line arguments for more information, or run:
+ ./tdc.py -h
+
+tdc will list the test names as they are being run, and print a summary in
+TAP (Test Anything Protocol) format when they are done. If tests fail,
+output captured from the failing test will be printed immediately following
+the failed test in the TAP output.
+
+
+OVERVIEW OF TDC EXECUTION
+-------------------------
+
+One run of tests is considered a "test suite" (this will be refined in the
+future). A test suite has one or more test cases in it.
+
+A test case has four stages:
+
+ - setup
+ - execute
+ - verify
+ - teardown
+
+The setup and teardown stages can run zero or more commands. The setup
+stage does some setup if the test needs it. The teardown stage undoes
+the setup and returns the system to a "neutral" state so any other test
+can be run next. These two stages require any commands run to return
+success, but do not otherwise verify the results.
+
+The execute and verify stages each run one command. The execute stage
+tests the return code against one or more acceptable values. The
+verify stage checks the return code for success, and also compares
+the stdout with a regular expression.
+
+Each of the commands in any stage will run in a shell instance.
+
+
+USER-DEFINED CONSTANTS
+----------------------
+
+The tdc_config.py file contains multiple values that can be altered to suit
+your needs. Any value in the NAMES dictionary can be altered without affecting
+the tests to be run. These values are used in the tc commands that will be
+executed as part of the test. More will be added as test cases require.
+
+Example:
+ $TC qdisc add dev $DEV1 ingress
+
+The NAMES values are used to substitute into the commands in the test cases.
+
+
+COMMAND LINE ARGUMENTS
+----------------------
+
+Run tdc.py -h to see the full list of available arguments.
+
+usage: tdc.py [-h] [-p PATH] [-D DIR [DIR ...]] [-f FILE [FILE ...]]
+ [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v] [-N]
+ [-d DEVICE] [-P] [-n] [-V]
+
+Linux TC unit tests
+
+optional arguments:
+ -h, --help show this help message and exit
+ -p PATH, --path PATH The full path to the tc executable to use
+ -v, --verbose Show the commands that are being run
+ -N, --notap Suppress tap results for command under test
+ -d DEVICE, --device DEVICE
+ Execute the test case in flower category
+ -P, --pause Pause execution just before post-suite stage
+
+selection:
+ select which test cases: files plus directories; filtered by categories
+ plus testids
+
+ -D DIR [DIR ...], --directory DIR [DIR ...]
+ Collect tests from the specified directory(ies)
+ (default [tc-tests])
+ -f FILE [FILE ...], --file FILE [FILE ...]
+ Run tests from the specified file(s)
+ -c [CATG [CATG ...]], --category [CATG [CATG ...]]
+ Run tests only from the specified category/ies, or if
+ no category/ies is/are specified, list known
+ categories.
+ -e ID [ID ...], --execute ID [ID ...]
+ Execute the specified test cases with specified IDs
+
+action:
+ select action to perform on selected test cases
+
+ -l, --list List all test cases, or those only within the
+ specified category
+ -s, --show Display the selected test cases
+ -i, --id Generate ID numbers for new test cases
+
+netns:
+ options for nsPlugin (run commands in net namespace)
+
+ -n, --namespace
+ Run commands in namespace as specified in tdc_config.py
+
+valgrind:
+ options for valgrindPlugin (run command under test under Valgrind)
+
+ -V, --valgrind Run commands under valgrind
+
+
+PLUGIN ARCHITECTURE
+-------------------
+
+There is now a plugin architecture, and some of the functionality that
+was in the tdc.py script has been moved into the plugins.
+
+The plugins are in the directory plugin-lib. The are executed from
+directory plugins. Put symbolic links from plugins to plugin-lib,
+and name them according to the order you want them to run.
+
+Example:
+
+bjb@bee:~/work/tc-testing$ ls -l plugins
+total 4
+lrwxrwxrwx 1 bjb bjb 27 Oct 4 16:12 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+lrwxrwxrwx 1 bjb bjb 25 Oct 12 17:55 20-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+-rwxr-xr-x 1 bjb bjb 0 Sep 29 15:56 __init__.py
+
+The plugins are a subclass of TdcPlugin, defined in TdcPlugin.py and
+must be called "SubPlugin" so tdc can find them. They are
+distinguished from each other in the python program by their module
+name.
+
+This base class supplies "hooks" to run extra functions. These hooks are as follows:
+
+pre- and post-suite
+pre- and post-case
+pre- and post-execute stage
+adjust-command (runs in all stages and receives the stage name)
+
+The pre-suite hook receives the number of tests and an array of test ids.
+This allows you to dump out the list of skipped tests in the event of a
+failure during setup or teardown stage.
+
+The pre-case hook receives the ordinal number and test id of the current test.
+
+The adjust-command hook receives the stage id (see list below) and the
+full command to be executed. This allows for last-minute adjustment
+of the command.
+
+The stages are identified by the following strings:
+
+ - pre (pre-suite)
+ - setup
+ - command
+ - verify
+ - teardown
+ - post (post-suite)
+
+
+To write a plugin, you need to inherit from TdcPlugin in
+TdcPlugin.py. To use the plugin, you have to put the
+implementation file in plugin-lib, and add a symbolic link to it from
+plugins. It will be detected at run time and invoked at the
+appropriate times. There are a few examples in the plugin-lib
+directory:
+
+ - rootPlugin.py:
+ implements the enforcement of running as root
+ - nsPlugin.py:
+ sets up a network namespace and runs all commands in that namespace
+ - valgrindPlugin.py
+ runs each command in the execute stage under valgrind,
+ and checks for leaks.
+ This plugin will output an extra test for each test in the test file,
+ one is the existing output as to whether the test passed or failed,
+ and the other is a test whether the command leaked memory or not.
+ (This one is a preliminary version, it may not work quite right yet,
+ but the overall template is there and it should only need tweaks.)
+
+
+ACKNOWLEDGEMENTS
+----------------
+
+Thanks to:
+
+Jamal Hadi Salim, for providing valuable test cases
+Keara Leibovitz, who wrote the CLI test driver that I used as a base for the
+ first version of the tc testing suite. This work was presented at
+ Netdev 1.2 Tokyo in October 2016.
+Samir Hussain, for providing help while I dove into Python for the first time
+ and being a second eye for this code.
diff --git a/tools/testing/selftests/tc-testing/TODO.txt b/tools/testing/selftests/tc-testing/TODO.txt
new file mode 100644
index 000000000..c40698557
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/TODO.txt
@@ -0,0 +1,31 @@
+tc Testing Suite To-Do list:
+
+- Determine what tc features are supported in the kernel. If features are not
+ present, prevent the related categories from running.
+
+- Add support for multiple versions of tc to run successively
+
+- Improve error messages when tdc aborts its run. Partially done - still
+ need to better handle problems in pre- and post-suite.
+
+- Use python logger module for debug/verbose output
+
+- Allow tdc to write its results to file.
+ Maybe use python logger module for this too.
+
+- A better implementation of the "hooks". Currently, every plugin
+ will attempt to run a function at every hook point. Could be
+ changed so that plugin __init__ methods will register functions to
+ be run in the various predefined times. Then if a plugin does not
+ require action at a specific point, no penalty will be paid for
+ trying to run a function that will do nothing.
+
+- Proper exception handling - make an exception class and use it
+
+- a TestCase class, for easier testcase handling, searching, comparison
+
+- a TestSuite class
+ and a way to configure a test suite,
+ to automate running multiple "test suites" with different requirements
+
+- super simple test case example using ls, touch, etc
diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py
new file mode 100644
index 000000000..3ee9a6dac
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/TdcPlugin.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+class TdcPlugin:
+ def __init__(self):
+ super().__init__()
+ print(' -- {}.__init__'.format(self.sub_class))
+
+ def pre_suite(self, testcount, testidlist):
+ '''run commands before test_runner goes into a test loop'''
+ self.testcount = testcount
+ self.testidlist = testidlist
+ if self.args.verbose > 1:
+ print(' -- {}.pre_suite'.format(self.sub_class))
+
+ def post_suite(self, index):
+ '''run commands after test_runner completes the test loop
+ index is the last ordinal number of test that was attempted'''
+ if self.args.verbose > 1:
+ print(' -- {}.post_suite'.format(self.sub_class))
+
+ def pre_case(self, test_ordinal, testid):
+ '''run commands before test_runner does one test'''
+ if self.args.verbose > 1:
+ print(' -- {}.pre_case'.format(self.sub_class))
+ self.args.testid = testid
+ self.args.test_ordinal = test_ordinal
+
+ def post_case(self):
+ '''run commands after test_runner does one test'''
+ if self.args.verbose > 1:
+ print(' -- {}.post_case'.format(self.sub_class))
+
+ def pre_execute(self):
+ '''run command before test-runner does the execute step'''
+ if self.args.verbose > 1:
+ print(' -- {}.pre_execute'.format(self.sub_class))
+
+ def post_execute(self):
+ '''run command after test-runner does the execute step'''
+ if self.args.verbose > 1:
+ print(' -- {}.post_execute'.format(self.sub_class))
+
+ def adjust_command(self, stage, command):
+ '''adjust the command'''
+ if self.args.verbose > 1:
+ print(' -- {}.adjust_command {}'.format(self.sub_class, stage))
+
+ # if stage == 'pre':
+ # pass
+ # elif stage == 'setup':
+ # pass
+ # elif stage == 'execute':
+ # pass
+ # elif stage == 'verify':
+ # pass
+ # elif stage == 'teardown':
+ # pass
+ # elif stage == 'post':
+ # pass
+ # else:
+ # pass
+
+ return command
+
+ def add_args(self, parser):
+ '''Get the plugin args from the command line'''
+ self.argparser = parser
+ return self.argparser
+
+ def check_args(self, args, remaining):
+ '''Check that the args are set correctly'''
+ self.args = args
+ if self.args.verbose > 1:
+ print(' -- {}.check_args'.format(self.sub_class))
diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/bpf/Makefile
new file mode 100644
index 000000000..dc92eb271
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+
+APIDIR := ../../../../include/uapi
+TEST_GEN_FILES = action.o
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+CLANG ?= clang
+LLC ?= llc
+PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
+
+ifeq ($(PROBE),)
+ CPU ?= probe
+else
+ CPU ?= generic
+endif
+
+CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+
+CLANG_FLAGS = -I. -I$(APIDIR) \
+ $(CLANG_SYS_INCLUDES) \
+ -Wno-compare-distinct-pointer-types
+
+$(OUTPUT)/%.o: %.c
+ $(CLANG) $(CLANG_FLAGS) \
+ -O2 -target bpf -emit-llvm -c $< -o - | \
+ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/bpf/action.c
new file mode 100644
index 000000000..c32b99b80
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/bpf/action.c
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright (c) 2018 Davide Caratti, Red Hat inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+
+__attribute__((section("action-ok"),used)) int action_ok(struct __sk_buff *s)
+{
+ return TC_ACT_OK;
+}
+
+__attribute__((section("action-ko"),used)) int action_ko(struct __sk_buff *s)
+{
+ s->data = 0x0;
+ return TC_ACT_OK;
+}
+
+char _license[] __attribute__((section("license"),used)) = "GPL";
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
new file mode 100644
index 000000000..203302065
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/config
@@ -0,0 +1,48 @@
+CONFIG_NET_SCHED=y
+
+#
+# Queueing/Scheduling
+#
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_INGRESS=m
+
+#
+# Classification
+#
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_CLS_U32=m
+CONFIG_CLS_U32_PERF=y
+CONFIG_CLS_U32_MARK=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_STACK=32
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_EMATCH_IPSET=m
+CONFIG_NET_EMATCH_IPT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_GACT_PROB=y
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_SAMPLE=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_ACT_BPF=m
+CONFIG_NET_ACT_CONNMARK=m
+CONFIG_NET_ACT_SKBMOD=m
+CONFIG_NET_ACT_IFE=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_IFE_SKBMARK=m
+CONFIG_NET_IFE_SKBPRIO=m
+CONFIG_NET_IFE_SKBTCINDEX=m
+CONFIG_NET_CLS_IND=y
+CONFIG_NET_SCH_FIFO=y
diff --git a/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
new file mode 100644
index 000000000..c18f88d09
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-plugins/AddingPlugins.txt
@@ -0,0 +1,104 @@
+tdc - Adding plugins for tdc
+
+Author: Brenda J. Butler - bjb@mojatatu.com
+
+ADDING PLUGINS
+--------------
+
+A new plugin should be written in python as a class that inherits from TdcPlugin.
+There are some examples in plugin-lib.
+
+The plugin can be used to add functionality to the test framework,
+such as:
+
+- adding commands to be run before and/or after the test suite
+- adding commands to be run before and/or after the test cases
+- adding commands to be run before and/or after the execute phase of the test cases
+- ability to alter the command to be run in any phase:
+ pre (the pre-suite stage)
+ prepare
+ execute
+ verify
+ teardown
+ post (the post-suite stage)
+- ability to add to the command line args, and use them at run time
+
+
+The functions in the class should follow the following interfaces:
+
+ def __init__(self)
+ def pre_suite(self, testcount, testidlist) # see "PRE_SUITE" below
+ def post_suite(self, ordinal) # see "SKIPPING" below
+ def pre_case(self, test_ordinal, testid) # see "PRE_CASE" below
+ def post_case(self)
+ def pre_execute(self)
+ def post_execute(self)
+ def adjust_command(self, stage, command) # see "ADJUST" below
+ def add_args(self, parser) # see "ADD_ARGS" below
+ def check_args(self, args, remaining) # see "CHECK_ARGS" below
+
+
+PRE_SUITE
+
+This method takes a testcount (number of tests to be run) and
+testidlist (array of test ids for tests that will be run). This is
+useful for various things, including when an exception occurs and the
+rest of the tests must be skipped. The info is stored in the object,
+and the post_suite method can refer to it when dumping the "skipped"
+TAP output. The tdc.py script will do that for the test suite as
+defined in the test case, but if the plugin is being used to run extra
+tests on each test (eg, check for memory leaks on associated
+co-processes) then that other tap output can be generated in the
+post-suite method using this info passed in to the pre_suite method.
+
+
+SKIPPING
+
+The post_suite method will receive the ordinal number of the last
+test to be attempted. It can use this info when outputting
+the TAP output for the extra test cases.
+
+
+PRE_CASE
+
+The pre_case method will receive the ordinal number of the test
+and the test id. Useful for outputing the extra test results.
+
+
+ADJUST
+
+The adjust_command method receives a string representing
+the execution stage and a string which is the actual command to be
+executed. The plugin can adjust the command, based on the stage of
+execution.
+
+The stages are represented by the following strings:
+
+ 'pre'
+ 'setup'
+ 'command'
+ 'verify'
+ 'teardown'
+ 'post'
+
+The adjust_command method must return the adjusted command so tdc
+can use it.
+
+
+ADD_ARGS
+
+The add_args method receives the argparser object and can add
+arguments to it. Care should be taken that the new arguments do not
+conflict with any from tdc.py or from other plugins that will be used
+concurrently.
+
+The add_args method should return the argparser object.
+
+
+CHECK_ARGS
+
+The check_args method is so that the plugin can do validation on
+the args, if needed. If there is a problem, and Exception should
+be raised, with a string that explains the problem.
+
+eg: raise Exception('plugin xxx, arg -y is wrong, fix it')
diff --git a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
new file mode 100644
index 000000000..17b267ded
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
@@ -0,0 +1,100 @@
+tdc - Adding test cases for tdc
+
+Author: Lucas Bates - lucasb@mojatatu.com
+
+ADDING TEST CASES
+-----------------
+
+User-defined tests should be added by defining a separate JSON file. This
+will help prevent conflicts when updating the repository. Refer to
+template.json for the required JSON format for test cases.
+
+Include the 'id' field, but do not assign a value. Running tdc with the -i
+option will generate a unique ID for that test case.
+
+tdc will recursively search the 'tc-tests' subdirectory (or the
+directories named with the -D option) for .json files. Any test case
+files you create in these directories will automatically be included.
+If you wish to store your custom test cases elsewhere, be sure to run
+tdc with the -f argument and the path to your file, or the -D argument
+and the path to your directory(ies).
+
+Be aware of required escape characters in the JSON data - particularly
+when defining the match pattern. Refer to the supplied json test files
+for examples when in doubt. The match pattern is written in json, and
+will be used by python. So the match pattern will be a python regular
+expression, but should be written using json syntax.
+
+
+TEST CASE STRUCTURE
+-------------------
+
+Each test case has required data:
+
+id: A unique alphanumeric value to identify a particular test case
+name: Descriptive name that explains the command under test
+category: A list of single-word descriptions covering what the command
+ under test is testing. Example: filter, actions, u32, gact, etc.
+setup: The list of commands required to ensure the command under test
+ succeeds. For example: if testing a filter, the command to create
+ the qdisc would appear here.
+ This list can be empty.
+ Each command can be a string to be executed, or a list consisting
+ of a string which is a command to be executed, followed by 1 or
+ more acceptable exit codes for this command.
+ If only a string is given for the command, then an exit code of 0
+ will be expected.
+cmdUnderTest: The tc command being tested itself.
+expExitCode: The code returned by the command under test upon its termination.
+ tdc will compare this value against the actual returned value.
+verifyCmd: The tc command to be run to verify successful execution.
+ For example: if the command under test creates a gact action,
+ verifyCmd should be "$TC actions show action gact"
+matchPattern: A regular expression to be applied against the output of the
+ verifyCmd to prove the command under test succeeded. This pattern
+ should be as specific as possible so that a false positive is not
+ matched.
+matchCount: How many times the regex in matchPattern should match. A value
+ of 0 is acceptable.
+teardown: The list of commands to clean up after the test is completed.
+ The environment should be returned to the same state as when
+ this test was started: qdiscs deleted, actions flushed, etc.
+ This list can be empty.
+ Each command can be a string to be executed, or a list consisting
+ of a string which is a command to be executed, followed by 1 or
+ more acceptable exit codes for this command.
+ If only a string is given for the command, then an exit code of 0
+ will be expected.
+
+
+SETUP/TEARDOWN ERRORS
+---------------------
+
+If an error is detected during the setup/teardown process, execution of the
+tests will immediately stop with an error message and the namespace in which
+the tests are run will be destroyed. This is to prevent inaccurate results
+in the test cases. tdc will output a series of TAP results for the skipped
+tests.
+
+Repeated failures of the setup/teardown may indicate a problem with the test
+case, or possibly even a bug in one of the commands that are not being tested.
+
+It's possible to include acceptable exit codes with the setup/teardown command
+so that it doesn't halt the script for an error that doesn't matter. Turn the
+individual command into a list, with the command being first, followed by all
+acceptable exit codes for the command.
+
+Example:
+
+A pair of setup commands. The first can have exit code 0, 1 or 255, the
+second must have exit code 0.
+
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action reclassify index 65536"
+ ],
diff --git a/tools/testing/selftests/tc-testing/creating-testcases/example.json b/tools/testing/selftests/tc-testing/creating-testcases/example.json
new file mode 100644
index 000000000..5ec501200
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-testcases/example.json
@@ -0,0 +1,55 @@
+[
+ {
+ "id": "1f",
+ "name": "simple test to test framework",
+ "category": [
+ "example"
+ ],
+ "setup": [
+ "mkdir mytest"
+ ],
+ "cmdUnderTest": "touch mytest/blorfl",
+ "expExitCode": "0",
+ "verifyCmd": "ls mytest/* | grep '[b]lorfl'",
+ "matchPattern": "orfl",
+ "matchCount": "1",
+ "teardown": [
+ "rm -rf mytest"
+ ]
+ },
+ {
+ "id": "2f",
+ "name": "simple test, no need for verify",
+ "category": [
+ "example"
+ ],
+ "setup": [
+ "mkdir mytest",
+ "touch mytest/blorfl"
+ ],
+ "cmdUnderTest": "ls mytest/blorfl",
+ "expExitCode": "0",
+ "verifyCmd": "/bin/true",
+ "matchPattern": " ",
+ "matchCount": "0",
+ "teardown": [
+ "rm -rf mytest"
+ ]
+ },
+ {
+ "id": "3f",
+ "name": "simple test, no need for setup or teardown (or verify)",
+ "category": [
+ "example"
+ ],
+ "setup": [
+ ],
+ "cmdUnderTest": "ip l l lo",
+ "expExitCode": "0",
+ "verifyCmd": "/bin/true",
+ "matchPattern": " ",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/creating-testcases/template.json b/tools/testing/selftests/tc-testing/creating-testcases/template.json
new file mode 100644
index 000000000..8b99b86d6
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/creating-testcases/template.json
@@ -0,0 +1,51 @@
+[
+ {
+ "id": "",
+ "name": "",
+ "category": [
+ "",
+ ""
+ ],
+ "setup": [
+ ""
+ ],
+ "cmdUnderTest": "",
+ "expExitCode": "",
+ "verifyCmd": "",
+ "matchPattern": "",
+ "matchCount": "",
+ "teardown": [
+ ""
+ ]
+ },
+ {
+ "id": "",
+ "name": "",
+ "category": [
+ "",
+ ""
+ ],
+ "setup": [
+ "",
+ [
+ "",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "",
+ "expExitCode": "",
+ "verifyCmd": "",
+ "matchPattern": "",
+ "matchCount": "",
+ "teardown": [
+ "",
+ [
+ "",
+ 0,
+ 255
+ ]
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
new file mode 100644
index 000000000..aa8a26697
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/README-PLUGINS
@@ -0,0 +1,27 @@
+tdc.py will look for plugins in a directory plugins off the cwd.
+Make a set of numbered symbolic links from there to the actual plugins.
+Eg:
+
+tdc.py
+plugin-lib/
+plugins/
+ __init__.py
+ 10-rootPlugin.py -> ../plugin-lib/rootPlugin.py
+ 20-valgrindPlugin.py -> ../plugin-lib/valgrindPlugin.py
+ 30-nsPlugin.py -> ../plugin-lib/nsPlugin.py
+
+
+tdc.py will find them and use them.
+
+
+rootPlugin
+ Check if the uid is root. If not, bail out.
+
+valgrindPlugin
+ Run the command under test with valgrind, and produce an extra set of TAP results for the memory tests.
+ This plugin will write files to the cwd, called vgnd-xxx.log. These will contain
+ the valgrind output for test xxx. Any file matching the glob 'vgnd-*.log' will be
+ deleted at the end of the run.
+
+nsPlugin
+ Run all the commands in a network namespace.
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
new file mode 100644
index 000000000..a194b1af2
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -0,0 +1,141 @@
+import os
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+class SubPlugin(TdcPlugin):
+ def __init__(self):
+ self.sub_class = 'ns/SubPlugin'
+ super().__init__()
+
+ def pre_suite(self, testcount, testidlist):
+ '''run commands before test_runner goes into a test loop'''
+ super().pre_suite(testcount, testidlist)
+
+ if self.args.namespace:
+ self._ns_create()
+
+ def post_suite(self, index):
+ '''run commands after test_runner goes into a test loop'''
+ super().post_suite(index)
+ if self.args.verbose:
+ print('{}.post_suite'.format(self.sub_class))
+
+ if self.args.namespace:
+ self._ns_destroy()
+
+ def add_args(self, parser):
+ super().add_args(parser)
+ self.argparser_group = self.argparser.add_argument_group(
+ 'netns',
+ 'options for nsPlugin(run commands in net namespace)')
+ self.argparser_group.add_argument(
+ '-n', '--namespace', action='store_true',
+ help='Run commands in namespace')
+ return self.argparser
+
+ def adjust_command(self, stage, command):
+ super().adjust_command(stage, command)
+ cmdform = 'list'
+ cmdlist = list()
+
+ if not self.args.namespace:
+ return command
+
+ if self.args.verbose:
+ print('{}.adjust_command'.format(self.sub_class))
+
+ if not isinstance(command, list):
+ cmdform = 'str'
+ cmdlist = command.split()
+ else:
+ cmdlist = command
+ if stage == 'setup' or stage == 'execute' or stage == 'verify' or stage == 'teardown':
+ if self.args.verbose:
+ print('adjust_command: stage is {}; inserting netns stuff in command [{}] list [{}]'.format(stage, command, cmdlist))
+ cmdlist.insert(0, self.args.NAMES['NS'])
+ cmdlist.insert(0, 'exec')
+ cmdlist.insert(0, 'netns')
+ cmdlist.insert(0, 'ip')
+ else:
+ pass
+
+ if cmdform == 'str':
+ command = ' '.join(cmdlist)
+ else:
+ command = cmdlist
+
+ if self.args.verbose:
+ print('adjust_command: return command [{}]'.format(command))
+ return command
+
+ def _ns_create(self):
+ '''
+ Create the network namespace in which the tests will be run and set up
+ the required network devices for it.
+ '''
+ if self.args.namespace:
+ cmd = 'ip netns add {}'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip link add $DEV0 type veth peer name $DEV1'
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip link set $DEV0 up'
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+ if self.args.device:
+ cmd = 'ip link set $DEV2 netns {}'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+ cmd = 'ip -n {} link set $DEV2 up'.format(self.args.NAMES['NS'])
+ self._exec_cmd('pre', cmd)
+
+ def _ns_destroy(self):
+ '''
+ Destroy the network namespace for testing (and any associated network
+ devices as well)
+ '''
+ if self.args.namespace:
+ cmd = 'ip netns delete {}'.format(self.args.NAMES['NS'])
+ self._exec_cmd('post', cmd)
+
+ def _exec_cmd(self, stage, command):
+ '''
+ Perform any required modifications on an executable command, then run
+ it in a subprocess and return the results.
+ '''
+ if '$' in command:
+ command = self._replace_keywords(command)
+
+ self.adjust_command(stage, command)
+ if self.args.verbose:
+ print('_exec_cmd: command "{}"'.format(command))
+ proc = subprocess.Popen(command,
+ shell=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=ENVIR)
+ (rawout, serr) = proc.communicate()
+
+ if proc.returncode != 0 and len(serr) > 0:
+ foutput = serr.decode("utf-8")
+ else:
+ foutput = rawout.decode("utf-8")
+
+ proc.stdout.close()
+ proc.stderr.close()
+ return proc, foutput
+
+ def _replace_keywords(self, cmd):
+ """
+ For a given executable command, substitute any known
+ variables contained within NAMES with the correct values
+ """
+ tcmd = Template(cmd)
+ subcmd = tcmd.safe_substitute(self.args.NAMES)
+ return subcmd
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
new file mode 100644
index 000000000..e36775bd4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
@@ -0,0 +1,19 @@
+import os
+import sys
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+
+class SubPlugin(TdcPlugin):
+ def __init__(self):
+ self.sub_class = 'root/SubPlugin'
+ super().__init__()
+
+ def pre_suite(self, testcount, testidlist):
+ # run commands before test_runner goes into a test loop
+ super().pre_suite(testcount, testidlist)
+
+ if os.geteuid():
+ print('This script must be run with root privileges', file=sys.stderr)
+ exit(1)
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
new file mode 100644
index 000000000..477a7bd7d
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
@@ -0,0 +1,142 @@
+'''
+run the command under test, under valgrind and collect memory leak info
+as a separate test.
+'''
+
+
+import os
+import re
+import signal
+from string import Template
+import subprocess
+import time
+from TdcPlugin import TdcPlugin
+
+from tdc_config import *
+
+def vp_extract_num_from_string(num_as_string_maybe_with_commas):
+ return int(num_as_string_maybe_with_commas.replace(',',''))
+
+class SubPlugin(TdcPlugin):
+ def __init__(self):
+ self.sub_class = 'valgrind/SubPlugin'
+ self.tap = ''
+ super().__init__()
+
+ def pre_suite(self, testcount, testidlist):
+ '''run commands before test_runner goes into a test loop'''
+ super().pre_suite(testcount, testidlist)
+ if self.args.verbose > 1:
+ print('{}.pre_suite'.format(self.sub_class))
+ if self.args.valgrind:
+ self._add_to_tap('1..{}\n'.format(self.testcount))
+
+ def post_suite(self, index):
+ '''run commands after test_runner goes into a test loop'''
+ super().post_suite(index)
+ self._add_to_tap('\n|---\n')
+ if self.args.verbose > 1:
+ print('{}.post_suite'.format(self.sub_class))
+ print('{}'.format(self.tap))
+ if self.args.verbose < 4:
+ subprocess.check_output('rm -f vgnd-*.log', shell=True)
+
+ def add_args(self, parser):
+ super().add_args(parser)
+ self.argparser_group = self.argparser.add_argument_group(
+ 'valgrind',
+ 'options for valgrindPlugin (run command under test under Valgrind)')
+
+ self.argparser_group.add_argument(
+ '-V', '--valgrind', action='store_true',
+ help='Run commands under valgrind')
+
+ return self.argparser
+
+ def adjust_command(self, stage, command):
+ super().adjust_command(stage, command)
+ cmdform = 'list'
+ cmdlist = list()
+
+ if not self.args.valgrind:
+ return command
+
+ if self.args.verbose > 1:
+ print('{}.adjust_command'.format(self.sub_class))
+
+ if not isinstance(command, list):
+ cmdform = 'str'
+ cmdlist = command.split()
+ else:
+ cmdlist = command
+
+ if stage == 'execute':
+ if self.args.verbose > 1:
+ print('adjust_command: stage is {}; inserting valgrind stuff in command [{}] list [{}]'.
+ format(stage, command, cmdlist))
+ cmdlist.insert(0, '--track-origins=yes')
+ cmdlist.insert(0, '--show-leak-kinds=definite,indirect')
+ cmdlist.insert(0, '--leak-check=full')
+ cmdlist.insert(0, '--log-file=vgnd-{}.log'.format(self.args.testid))
+ cmdlist.insert(0, '-v') # ask for summary of non-leak errors
+ cmdlist.insert(0, ENVIR['VALGRIND_BIN'])
+ else:
+ pass
+
+ if cmdform == 'str':
+ command = ' '.join(cmdlist)
+ else:
+ command = cmdlist
+
+ if self.args.verbose > 1:
+ print('adjust_command: return command [{}]'.format(command))
+ return command
+
+ def post_execute(self):
+ if not self.args.valgrind:
+ return
+
+ self.definitely_lost_re = re.compile(
+ r'definitely lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\sblocks', re.MULTILINE | re.DOTALL)
+ self.indirectly_lost_re = re.compile(
+ r'indirectly lost:\s+([,0-9]+)\s+bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+ self.possibly_lost_re = re.compile(
+ r'possibly lost:\s+([,0-9]+)bytes in\s+([,0-9]+)\s+blocks', re.MULTILINE | re.DOTALL)
+ self.non_leak_error_re = re.compile(
+ r'ERROR SUMMARY:\s+([,0-9]+) errors from\s+([,0-9]+)\s+contexts', re.MULTILINE | re.DOTALL)
+
+ def_num = 0
+ ind_num = 0
+ pos_num = 0
+ nle_num = 0
+
+ # what about concurrent test runs? Maybe force them to be in different directories?
+ with open('vgnd-{}.log'.format(self.args.testid)) as vfd:
+ content = vfd.read()
+ def_mo = self.definitely_lost_re.search(content)
+ ind_mo = self.indirectly_lost_re.search(content)
+ pos_mo = self.possibly_lost_re.search(content)
+ nle_mo = self.non_leak_error_re.search(content)
+
+ if def_mo:
+ def_num = int(def_mo.group(2))
+ if ind_mo:
+ ind_num = int(ind_mo.group(2))
+ if pos_mo:
+ pos_num = int(pos_mo.group(2))
+ if nle_mo:
+ nle_num = int(nle_mo.group(1))
+
+ mem_results = ''
+ if (def_num > 0) or (ind_num > 0) or (pos_num > 0) or (nle_num > 0):
+ mem_results += 'not '
+
+ mem_results += 'ok {} - {}-mem # {}\n'.format(
+ self.args.test_ordinal, self.args.testid, 'memory leak check')
+ self._add_to_tap(mem_results)
+ if mem_results.startswith('not '):
+ print('{}'.format(content))
+ self._add_to_tap(content)
+
+ def _add_to_tap(self, more_tap_output):
+ self.tap += more_tap_output
diff --git a/tools/testing/selftests/tc-testing/plugins/__init__.py b/tools/testing/selftests/tc-testing/plugins/__init__.py
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/plugins/__init__.py
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
new file mode 100644
index 000000000..1a9b282dd
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -0,0 +1,294 @@
+[
+ {
+ "id": "d959",
+ "name": "Add cBPF action with valid bytecode",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC action flush action bpf",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action bpf index 100",
+ "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action bpf"
+ ]
+ },
+ {
+ "id": "f84a",
+ "name": "Add cBPF action with invalid bytecode",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' index 100",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action get action bpf index 100",
+ "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 100 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action bpf"
+ ]
+ },
+ {
+ "id": "e939",
+ "name": "Add eBPF action with valid object-file",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ "make -C bpf",
+ [
+ "$TC action flush action bpf",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action bpf index 667",
+ "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action bpf",
+ "make -C bpf clean"
+ ]
+ },
+ {
+ "id": "282d",
+ "name": "Add eBPF action with invalid object-file",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ "make -C bpf",
+ [
+ "$TC action flush action bpf",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action get action bpf index 667",
+ "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC action flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
+ "make -C bpf clean"
+ ]
+ },
+ {
+ "id": "d819",
+ "name": "Replace cBPF bytecode and action control",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 555",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action replace action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 555",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action bpf index 555",
+ "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' default-action drop.*index 555 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action bpf"
+ ]
+ },
+ {
+ "id": "6ae3",
+ "name": "Delete cBPF action ",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 444",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action delete action bpf index 444",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action bpf index 444",
+ "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 444 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC action flush action bpf"
+ ]
+ },
+ {
+ "id": "3e0d",
+ "name": "List cBPF actions",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC action flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101",
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102",
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103"
+ ],
+ "cmdUnderTest": "$TC action list action bpf",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action list action bpf",
+ "matchPattern": "action order [0-9]*: bpf bytecode",
+ "matchCount": "3",
+ "teardown": [
+ "$TC actions flush action bpf"
+ ]
+ },
+ {
+ "id": "55ce",
+ "name": "Flush BPF actions",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' ok index 101",
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' drop index 102",
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0' continue index 103"
+ ],
+ "cmdUnderTest": "$TC action flush action bpf",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action list action bpf",
+ "matchPattern": "action order [0-9]*: bpf bytecode",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action bpf"
+ ]
+ },
+ {
+ "id": "ccc3",
+ "name": "Add cBPF action with duplicate index",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 4294967295"
+ ],
+ "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967295",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action get action bpf index 4294967295",
+ "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*index 4294967295",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action bpf"
+ ]
+ },
+ {
+ "id": "89c7",
+ "name": "Add cBPF action with invalid index",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' index 4294967296 cookie 12345",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action ls action bpf",
+ "matchPattern": "action order [0-9]*: bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' default-action pipe.*cookie 12345",
+ "matchCount": "0",
+ "teardown": [
+ "$TC action flush action bpf"
+ ]
+ },
+ {
+ "id": "7ab9",
+ "name": "Add cBPF action with cookie",
+ "category": [
+ "actions",
+ "bpf"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action bpf",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0' cookie d0d0d0d0d0d0d0d0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action list action bpf",
+ "matchPattern": "action order [0-9]*: bpf.*cookie d0d0d0d0d0d0d0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action bpf"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
new file mode 100644
index 000000000..13147a1f5
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
@@ -0,0 +1,291 @@
+[
+ {
+ "id": "2002",
+ "name": "Add valid connmark action with defaults",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action connmark",
+ "matchPattern": "action order [0-9]+: connmark zone 0 pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "56a5",
+ "name": "Add valid connmark action with control pass",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action connmark index 1",
+ "matchPattern": "action order [0-9]+: connmark zone 0 pass.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "7c66",
+ "name": "Add valid connmark action with control drop",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark drop index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action connmark index 100",
+ "matchPattern": "action order [0-9]+: connmark zone 0 drop.*index 100 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "a913",
+ "name": "Add valid connmark action with control pipe",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark pipe index 455",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action connmark index 455",
+ "matchPattern": "action order [0-9]+: connmark zone 0 pipe.*index 455 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "bdd8",
+ "name": "Add valid connmark action with control reclassify",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark reclassify index 7",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action connmark",
+ "matchPattern": "action order [0-9]+: connmark zone 0 reclassify.*index 7 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "b8be",
+ "name": "Add valid connmark action with control continue",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark continue index 17",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action connmark",
+ "matchPattern": "action order [0-9]+: connmark zone 0 continue.*index 17 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "d8a6",
+ "name": "Add valid connmark action with control jump",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark jump 10 index 17",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action connmark",
+ "matchPattern": "action order [0-9]+: connmark zone 0 jump 10.*index 17 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "aae8",
+ "name": "Add valid connmark action with zone argument",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark zone 100 pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action connmark index 1",
+ "matchPattern": "action order [0-9]+: connmark zone 100 pipe.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "2f0b",
+ "name": "Add valid connmark action with invalid zone argument",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark zone 65536 reclassify index 21",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action connmark index 1",
+ "matchPattern": "action order [0-9]+: connmark zone 65536 reclassify.*index 21 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "9305",
+ "name": "Add connmark action with unsupported argument",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark zone 655 unsupp_arg pass index 2",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action connmark index 2",
+ "matchPattern": "action order [0-9]+: connmark zone 655 unsupp_arg pass.*index 2 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "71ca",
+ "name": "Add valid connmark action and replace it",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action connmark zone 777 pass index 555"
+ ],
+ "cmdUnderTest": "$TC actions replace action connmark zone 555 reclassify index 555",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action connmark index 555",
+ "matchPattern": "action order [0-9]+: connmark zone 555 reclassify.*index 555 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "5f8f",
+ "name": "Add valid connmark action with cookie",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action connmark zone 555 pipe index 5 cookie aabbccddeeff112233445566778800a1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action connmark index 5",
+ "matchPattern": "action order [0-9]+: connmark zone 555 pipe.*index 5 ref.*cookie aabbccddeeff112233445566778800a1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
new file mode 100644
index 000000000..a022792d3
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
@@ -0,0 +1,504 @@
+[
+ {
+ "id": "6d84",
+ "name": "Add csum iph action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum iph index 800",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 800",
+ "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 800 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "1862",
+ "name": "Add csum ip4h action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum ip4h index 7",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 7",
+ "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 7 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "15c6",
+ "name": "Add csum ipv4h action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum ipv4h index 1122",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 1122",
+ "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 1122 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "bf47",
+ "name": "Add csum icmp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum icmp index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 1",
+ "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pass.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "cc1d",
+ "name": "Add csum igmp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum igmp index 999",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 999",
+ "matchPattern": "action order [0-9]*: csum \\(igmp\\) action pass.*index 999 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "bccc",
+ "name": "Add csum foobar action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum foobar index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action csum",
+ "matchPattern": "action order [0-9]*: csum \\(foobar\\) action pass.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "3bb4",
+ "name": "Add csum tcp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum tcp index 9999",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 9999",
+ "matchPattern": "action order [0-9]*: csum \\(tcp\\) action pass.*index 9999 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "759c",
+ "name": "Add csum udp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum udp index 334455",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 334455",
+ "matchPattern": "action order [0-9]*: csum \\(udp\\) action pass.*index 334455 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "bdb6",
+ "name": "Add csum udp xor iph action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum udp xor iph index 3",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action csum",
+ "matchPattern": "action order [0-9]*: csum \\(udp xor iph\\) action pass.*index 3 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "c220",
+ "name": "Add csum udplite action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum udplite continue index 3",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 3",
+ "matchPattern": "action order [0-9]*: csum \\(udplite\\) action continue.*index 3 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "8993",
+ "name": "Add csum sctp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum sctp index 777",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 777",
+ "matchPattern": "action order [0-9]*: csum \\(sctp\\) action pass.*index 777 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "b138",
+ "name": "Add csum ip & icmp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum ip and icmp pipe index 123",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 123",
+ "matchPattern": "action order [0-9]*: csum \\(iph, icmp\\) action pipe.*index 123 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "eeda",
+ "name": "Add csum ip & sctp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum ipv4h sctp continue index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 2",
+ "matchPattern": "action order [0-9]*: csum \\(iph, sctp\\) action continue.*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "0017",
+ "name": "Add csum udp or tcp action",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum udp or tcp continue index 27",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 27",
+ "matchPattern": "action order [0-9]*: csum \\(tcp, udp\\) action continue.*index 27 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "b10b",
+ "name": "Add all 7 csum actions",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum icmp ip4h sctp igmp udplite udp tcp index 7",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 7",
+ "matchPattern": "action order [0-9]*: csum \\(iph, icmp, igmp, tcp, udp, udplite, sctp\\).*index 7 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "ce92",
+ "name": "Add csum udp action with cookie",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum udp pipe index 7 cookie 12345678",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 7",
+ "matchPattern": "action order [0-9]*: csum \\(udp\\) action pipe.*index 7.*cookie 12345678",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "912f",
+ "name": "Add csum icmp action with large cookie",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action csum icmp pipe index 17 cookie aabbccddeeff1122",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action csum index 17",
+ "matchPattern": "action order [0-9]*: csum \\(icmp\\) action pipe.*index 17.*cookie aabbccddeeff1122",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "879b",
+ "name": "Add batch of 32 csum tcp actions",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action csum",
+ "matchPattern": "^[ \t]+index [0-9]* ref",
+ "matchCount": "32",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "b4e9",
+ "name": "Delete batch of 32 csum actions",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ],
+ "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action csum",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "0015",
+ "name": "Add batch of 32 csum tcp actions with large cookies",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action csum",
+ "matchPattern": "^[ \t]+index [0-9]* ref",
+ "matchCount": "32",
+ "teardown": [
+ "$TC actions flush action csum"
+ ]
+ },
+ {
+ "id": "989e",
+ "name": "Delete batch of 32 csum actions with large cookies",
+ "category": [
+ "actions",
+ "csum"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action csum",
+ 0,
+ 1,
+ 255
+ ],
+ "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum tcp continue index \\$i cookie aaabbbcccdddeee \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\""
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action csum index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action csum",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "0",
+ "teardown": []
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
new file mode 100644
index 000000000..68c91023c
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -0,0 +1,540 @@
+[
+ {
+ "id": "e89a",
+ "name": "Add valid pass action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pass index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pass.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "a02c",
+ "name": "Add valid pipe action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pipe index 6",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pipe.*index 6 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "feef",
+ "name": "Add valid reclassify action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action reclassify index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action reclassify.*index 5 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "8a7a",
+ "name": "Add valid drop action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action drop index 30",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 30 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "9a52",
+ "name": "Add valid continue action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action continue index 432",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action continue.*index 432 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "d700",
+ "name": "Add invalid action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pump index 386",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action.*index 386 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "9215",
+ "name": "Add action with duplicate index",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action pipe index 15"
+ ],
+ "cmdUnderTest": "$TC actions add action drop index 15",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 15 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "798e",
+ "name": "Add action with index exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action drop index 4294967296",
+ "expExitCode": "255",
+ "verifyCmd": "actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 4294967296 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "22be",
+ "name": "Add action with index at 32-bit maximum",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action drop index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 4294967295 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "ac2a",
+ "name": "List actions",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action reclassify index 101",
+ "$TC actions add action reclassify index 102",
+ "$TC actions add action reclassify index 103",
+ "$TC actions add action reclassify index 104",
+ "$TC actions add action reclassify index 105"
+ ],
+ "cmdUnderTest": "$TC actions list action gact",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action reclassify",
+ "matchCount": "5",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "3edf",
+ "name": "Flush gact actions",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ "$TC actions add action reclassify index 101",
+ "$TC actions add action reclassify index 102",
+ "$TC actions add action reclassify index 103",
+ "$TC actions add action reclassify index 104",
+ "$TC actions add action reclassify index 105"
+ ],
+ "cmdUnderTest": "$TC actions flush action gact",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action reclassify",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "63ec",
+ "name": "Delete pass action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action pass index 1"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pass.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "46be",
+ "name": "Delete pipe action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action pipe index 9"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action pipe.*index 9 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "2e08",
+ "name": "Delete reclassify action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action reclassify index 65536"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 65536",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action reclassify.*index 65536 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "99c4",
+ "name": "Delete drop action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action drop index 16"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 16",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action drop.*index 16 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "fb6b",
+ "name": "Delete continue action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action continue index 32"
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 32",
+ "expExitCode": "0",
+ "verifyCmd": "actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action continue.*index 32 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "0eb3",
+ "name": "Delete non-existent action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions del action gact index 2",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order [0-9]*: gact action",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "f02c",
+ "name": "Replace gact action",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action drop index 10",
+ "$TC actions add action drop index 12"
+ ],
+ "cmdUnderTest": "$TC actions replace action ok index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action gact",
+ "matchPattern": "action order [0-9]*: gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "525f",
+ "name": "Get gact action by index",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action drop index 3900800700"
+ ],
+ "cmdUnderTest": "$TC actions get action gact index 3900800700",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action gact index 3900800700",
+ "matchPattern": "index 3900800700",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "1021",
+ "name": "Add batch of 32 gact pass actions",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action pass index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "32",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "da7a",
+ "name": "Add batch of 32 gact continue actions with cookie",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action continue index \\$i cookie aabbccddeeff112233445566778800a1 \\\"; args=\"\\$args\\$cmd\"; done && $TC actions add \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "32",
+ "teardown": [
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "8aa3",
+ "name": "Delete batch of 32 gact continue actions",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action continue index \\$i \\\"; args=\\\"\\$args\\$cmd\\\"; done && $TC actions add \\$args\""
+ ],
+ "cmdUnderTest": "bash -c \"for i in \\`seq 1 32\\`; do cmd=\\\"action gact index \\$i \\\"; args=\"\\$args\\$cmd\"; done && $TC actions del \\$args\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "^[ \t]+index [0-9]+ ref",
+ "matchCount": "0",
+ "teardown": []
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
new file mode 100644
index 000000000..0da3545ca
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
@@ -0,0 +1,1064 @@
+[
+ {
+ "id": "7682",
+ "name": "Create valid ife encode action with mark and pass control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "ef47",
+ "name": "Create valid ife encode action with mark and pipe control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 10 pipe index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use mark.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "df43",
+ "name": "Create valid ife encode action with mark and continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark continue index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*allow mark.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "e4cf",
+ "name": "Create valid ife encode action with mark and drop control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 789 drop index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*use mark 789.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "ccba",
+ "name": "Create valid ife encode action with mark and reclassify control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 656768 reclassify index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use mark 656768.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "a1cf",
+ "name": "Create valid ife encode action with mark and jump control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 65 jump 1 index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 2",
+ "matchPattern": "action order [0-9]*: ife encode action jump 1.*type 0[xX]ED3E.*use mark 65.*index 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "cb3d",
+ "name": "Create valid ife encode action with mark value at 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295 reclassify index 90",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 90",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use mark 4294967295.*index 90",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "1efb",
+ "name": "Create ife encode action with mark value exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 4294967295999 pipe index 90",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 90",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use mark 4294967295999.*index 90",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "95ed",
+ "name": "Create valid ife encode action with prio and pass control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio pass index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow prio.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "aa17",
+ "name": "Create valid ife encode action with prio and pipe control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 7 pipe index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use prio 7.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "74c7",
+ "name": "Create valid ife encode action with prio and continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 3 continue index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use prio 3.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "7a97",
+ "name": "Create valid ife encode action with prio and drop control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio drop index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*allow prio.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "f66b",
+ "name": "Create valid ife encode action with prio and reclassify control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 998877 reclassify index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 998877.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "3056",
+ "name": "Create valid ife encode action with prio and jump control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 998877 jump 10 index 9",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 9",
+ "matchPattern": "action order [0-9]*: ife encode action jump 10.*type 0[xX]ED3E.*use prio 998877.*index 9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "7dd3",
+ "name": "Create valid ife encode action with prio value at 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 4294967295 reclassify index 99",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 99",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 4294967295.*index 99",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "2ca1",
+ "name": "Create ife encode action with prio value exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 4294967298 pipe index 99",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 99",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use prio 4294967298.*index 99",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "05bb",
+ "name": "Create valid ife encode action with tcindex and pass control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow tcindex.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "ce65",
+ "name": "Create valid ife encode action with tcindex and pipe control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 111 pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use tcindex 111.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "09cd",
+ "name": "Create valid ife encode action with tcindex and continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use tcindex 1.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "8eb5",
+ "name": "Create valid ife encode action with tcindex and continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 1 continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action continue.*type 0[xX]ED3E.*use tcindex 1.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "451a",
+ "name": "Create valid ife encode action with tcindex and drop control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex drop index 77",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 77",
+ "matchPattern": "action order [0-9]*: ife encode action drop.*type 0[xX]ED3E.*allow tcindex.*index 77",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "d76c",
+ "name": "Create valid ife encode action with tcindex and reclassify control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex reclassify index 77",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 77",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*allow tcindex.*index 77",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "e731",
+ "name": "Create valid ife encode action with tcindex and jump control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex jump 999 index 77",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 77",
+ "matchPattern": "action order [0-9]*: ife encode action jump 999.*type 0[xX]ED3E.*allow tcindex.*index 77",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "b7b8",
+ "name": "Create valid ife encode action with tcindex value at 16-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 65535 pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*use tcindex 65535.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "d0d8",
+ "name": "Create ife encode action with tcindex value exceeding 16-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 65539 pipe index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*use tcindex 65539.*index 1",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "2a9c",
+ "name": "Create valid ife encode action with mac src parameter",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark src 00:11:22:33:44:55 pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow mark src 00:11:22:33:44:55.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "cf5c",
+ "name": "Create valid ife encode action with mac dst parameter",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 9876 dst 00:11:22:33:44:55 reclassify index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ED3E.*use prio 9876 dst 00:11:22:33:44:55.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "2353",
+ "name": "Create valid ife encode action with mac src and mac dst parameters",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow tcindex src 00:aa:bb:cc:dd:ee dst 00:11:22:33:44:55 pass index 11",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 11",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow tcindex dst 00:11:22:33:44:55 src 00:aa:bb:cc:dd:ee .*index 11",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "552c",
+ "name": "Create valid ife encode action with mark and type parameters",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use mark 7 type 0xfefe pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]FEFE.*use mark 7.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "0421",
+ "name": "Create valid ife encode action with prio and type parameters",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use prio 444 type 0xabba pipe index 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 21",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ABBA.*use prio 444.*index 21",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "4017",
+ "name": "Create valid ife encode action with tcindex and type parameters",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode use tcindex 5000 type 0xabcd reclassify index 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 21",
+ "matchPattern": "action order [0-9]*: ife encode action reclassify.*type 0[xX]ABCD.*use tcindex 5000.*index 21",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "fac3",
+ "name": "Create valid ife encode action with index at 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 4294967295",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 4294967295",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "7c25",
+ "name": "Create valid ife decode action with pass control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action pass.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "dccb",
+ "name": "Create valid ife decode action with pipe control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action pipe.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "7bb9",
+ "name": "Create valid ife decode action with continue control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action continue.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "d9ad",
+ "name": "Create valid ife decode action with drop control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode drop index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action drop.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "219f",
+ "name": "Create valid ife decode action with reclassify control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode reclassify index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action reclassify.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "8f44",
+ "name": "Create valid ife decode action with jump control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife decode jump 10 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 1",
+ "matchPattern": "action order [0-9]*: ife decode action jump 10.*type 0(x0)?.*allow mark allow tcindex allow prio.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "56cf",
+ "name": "Create ife encode action with index exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark pass index 4294967295999",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4294967295999",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 4294967295999",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "ee94",
+ "name": "Create ife encode action with invalid control",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow mark kuka index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action kuka.*type 0[xX]ED3E.*allow mark.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "b330",
+ "name": "Create ife encode action with cookie",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio pipe index 4 cookie aabbccddeeff112233445566778800a1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow prio.*index 4.*cookie aabbccddeeff112233445566778800a1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "bbc0",
+ "name": "Create ife encode action with invalid argument",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow foo pipe index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]ED3E.*allow foo.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "d54a",
+ "name": "Create ife encode action with invalid type argument",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio type 70000 pipe index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*type 0[xX]11170.*allow prio.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "7ee0",
+ "name": "Create ife encode action with invalid mac src argument",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio src 00:11:22:33:44:pp pipe index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "0a7d",
+ "name": "Create ife encode action with invalid mac dst argument",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action ife encode allow prio dst 00.111-22:33:44:aa pipe index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action ife index 4",
+ "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4",
+ "matchCount": "0",
+ "teardown": []
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
new file mode 100644
index 000000000..db49fd0f8
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -0,0 +1,438 @@
+[
+ {
+ "id": "5124",
+ "name": "Add mirred mirror to egress action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress mirror index 1 dev lo",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "6fb4",
+ "name": "Add mirred redirect to egress action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress redirect index 2 dev lo action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred",
+ "$TC actions flush action gact"
+ ]
+ },
+ {
+ "id": "ba38",
+ "name": "Get mirred actions",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mirred egress mirror index 1 dev lo",
+ "$TC actions add action mirred egress redirect index 2 dev lo"
+ ],
+ "cmdUnderTest": "$TC actions show action mirred",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "[Mirror|Redirect] to device lo",
+ "matchCount": "2",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "d7c0",
+ "name": "Add invalid mirred direction",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred inbound mirror index 20 dev lo",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 20 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "e213",
+ "name": "Add invalid mirred action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress remirror index 20 dev lo",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress.*to device lo\\).*index 20 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "2d89",
+ "name": "Add mirred action with invalid device",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress mirror index 20 dev eltoh",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(.*to device eltoh\\).*index 20 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "300b",
+ "name": "Add mirred action with duplicate index",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mirred egress redirect index 15 dev lo"
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress mirror index 15 dev lo",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(.*to device lo\\).*index 15 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "8917",
+ "name": "Add mirred mirror action with control pass",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 1",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pass.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "1054",
+ "name": "Add mirred mirror action with control pipe",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 15",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 15",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 15 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "9887",
+ "name": "Add mirred mirror action with control continue",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo continue index 15",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 15",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) continue.*index 15 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "e4aa",
+ "name": "Add mirred mirror action with control reclassify",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify index 150",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 150",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*index 150 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "ece9",
+ "name": "Add mirred mirror action with control drop",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo drop index 99",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 99",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) drop.*index 99 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "0031",
+ "name": "Add mirred mirror action with control jump",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo jump 10 index 99",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 99",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) jump 10.*index 99 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "407c",
+ "name": "Add mirred mirror action with cookie",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo reclassify cookie aa11bb22cc33dd44ee55",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) reclassify.*cookie aa11bb22cc33dd44ee55",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "8b69",
+ "name": "Add mirred mirror action with index at 32-bit maximum",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action mirred index 4294967295",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 4294967295",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "3f66",
+ "name": "Add mirred mirror action with index exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror dev lo pipe index 429496729555",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action mirred index 429496729555",
+ "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) pipe.*index 429496729555",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "a70e",
+ "name": "Delete mirred mirror action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mirred egress mirror index 5 dev lo"
+ ],
+ "cmdUnderTest": "$TC actions del action mirred index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress Mirror to device lo\\).*index 5 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "3fb3",
+ "name": "Delete mirred redirect action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action mirred egress redirect index 5 dev lo"
+ ],
+ "cmdUnderTest": "$TC actions del action mirred index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action mirred",
+ "matchPattern": "action order [0-9]*: mirred \\(Egress Redirect to device lo\\).*index 5 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action mirred"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
new file mode 100644
index 000000000..0080dc2fd
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
@@ -0,0 +1,593 @@
+[
+ {
+ "id": "7565",
+ "name": "Add nat action on ingress with default control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 192.168.1.1 200.200.200.1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat ingress 192.168.1.1/32 200.200.200.1 pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "fd79",
+ "name": "Add nat action on ingress with pipe control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 1.1.1.1 2.2.2.1 pipe index 77",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 77",
+ "matchPattern": "action order [0-9]+: nat ingress 1.1.1.1/32 2.2.2.1 pipe.*index 77 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "eab9",
+ "name": "Add nat action on ingress with continue control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 192.168.10.10 192.168.20.20 continue index 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 1000",
+ "matchPattern": "action order [0-9]+: nat ingress 192.168.10.10/32 192.168.20.20 continue.*index 1000 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "c53a",
+ "name": "Add nat action on ingress with reclassify control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 192.168.10.10 192.168.20.20 reclassify index 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 1000",
+ "matchPattern": "action order [0-9]+: nat ingress 192.168.10.10/32 192.168.20.20 reclassify.*index 1000 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "76c9",
+ "name": "Add nat action on ingress with jump control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 12.18.10.10 12.18.20.20 jump 10 index 22",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 22",
+ "matchPattern": "action order [0-9]+: nat ingress 12.18.10.10/32 12.18.20.20 jump 10.*index 22 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "24c6",
+ "name": "Add nat action on ingress with drop control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 drop index 722",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 722",
+ "matchPattern": "action order [0-9]+: nat ingress 1.18.1.1/32 1.18.2.2 drop.*index 722 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "2120",
+ "name": "Add nat action on ingress with maximum index value",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 4294967295",
+ "matchPattern": "action order [0-9]+: nat ingress 1.18.1.1/32 1.18.2.2 pass.*index 4294967295 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "3e9d",
+ "name": "Add nat action on ingress with invalid index value",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 index 4294967295555",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action nat index 4294967295555",
+ "matchPattern": "action order [0-9]+: nat ingress 1.18.1.1/32 1.18.2.2 pass.*index 4294967295555 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "f6c9",
+ "name": "Add nat action on ingress with invalid IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 1.1.1.1 1.1888.2.2 index 7",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action nat index 7",
+ "matchPattern": "action order [0-9]+: nat ingress 1.1.1.1/32 1.1888.2.2 pass.*index 7 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "be25",
+ "name": "Add nat action on ingress with invalid argument",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 1.1.1.1 1.18.2.2 another_arg index 12",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action nat index 12",
+ "matchPattern": "action order [0-9]+: nat ingress 1.1.1.1/32 1.18.2.2 pass.*another_arg.*index 12 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "a7bd",
+ "name": "Add nat action on ingress with DEFAULT IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress default 10.10.10.1 index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 12",
+ "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "ee1e",
+ "name": "Add nat action on ingress with ANY IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress any 10.10.10.1 index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 12",
+ "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "1de8",
+ "name": "Add nat action on ingress with ALL IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress all 10.10.10.1 index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 12",
+ "matchPattern": "action order [0-9]+: nat ingress 0.0.0.0/32 10.10.10.1 pass.*index 12 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "8dba",
+ "name": "Add nat action on egress with default control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat egress 10.10.10.1/32 20.20.20.1 pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "19a7",
+ "name": "Add nat action on egress with pipe control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat egress 10.10.10.1/32 20.20.20.1 pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "f1d9",
+ "name": "Add nat action on egress with continue control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat egress 10.10.10.1/32 20.20.20.1 continue",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "6d4a",
+ "name": "Add nat action on egress with reclassify control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 reclassify",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat egress 10.10.10.1/32 20.20.20.1 reclassify",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "b313",
+ "name": "Add nat action on egress with jump control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 jump 777",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat egress 10.10.10.1/32 20.20.20.1 jump 777",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "d9fc",
+ "name": "Add nat action on egress with drop control action",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress 10.10.10.1 20.20.20.1 drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat",
+ "matchPattern": "action order [0-9]+: nat egress 10.10.10.1/32 20.20.20.1 drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "a895",
+ "name": "Add nat action on egress with DEFAULT IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress default 20.20.20.1 pipe index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 10",
+ "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "2572",
+ "name": "Add nat action on egress with ANY IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress any 20.20.20.1 pipe index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 10",
+ "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "37f3",
+ "name": "Add nat action on egress with ALL IP address",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 10",
+ "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "6054",
+ "name": "Add nat action on egress with cookie",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat egress all 20.20.20.1 pipe index 10 cookie aa1bc2d3eeff112233445566778800a1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 10",
+ "matchPattern": "action order [0-9]+: nat egress 0.0.0.0/32 20.20.20.1 pipe.*index 10 ref.*cookie aa1bc2d3eeff112233445566778800a1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "79d6",
+ "name": "Add nat action on ingress with cookie",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action nat ingress 192.168.1.1 10.10.10.1 reclassify index 1 cookie 112233445566778899aabbccddeeff11",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action nat index 1",
+ "matchPattern": "action order [0-9]+: nat ingress 192.168.1.1/32 10.10.10.1 reclassify.*index 1 ref.*cookie 112233445566778899aabbccddeeff11",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
new file mode 100644
index 000000000..30f9b54bd
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -0,0 +1,719 @@
+[
+ {
+ "id": "49aa",
+ "name": "Add valid basic police action",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 1Kbit burst 10Kb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "3abe",
+ "name": "Add police action with duplicate index",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 4Mbit burst 120k index 9"
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 8kbit burst 24k index 9",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x9",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "49fa",
+ "name": "Add valid police action with mtu",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 1k index 98",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 98",
+ "matchPattern": "action order [0-9]*: police 0x62 rate 90Kbit burst 10Kb mtu 1Kb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "7943",
+ "name": "Add valid police action with peakrate",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 2kb peakrate 100kbit index 3",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x3 rate 90Kbit burst 10Kb mtu 2Kb peakrate 100Kbit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "055e",
+ "name": "Add police action with peakrate and no mtu",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 5kbit burst 6kb peakrate 10kbit index 9",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x9 rate 5Kb burst 10Kb",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "f057",
+ "name": "Add police action with valid overhead",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 1mbit burst 100k overhead 64 index 64",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 64",
+ "matchPattern": "action order [0-9]*: police 0x40 rate 1Mbit burst 100Kb mtu 2Kb action reclassify overhead 64b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "7ffb",
+ "name": "Add police action with ethernet linklayer type",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 2mbit burst 200k linklayer ethernet index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions show action police",
+ "matchPattern": "action order [0-9]*: police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "3dda",
+ "name": "Add police action with atm linklayer type",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 2mbit burst 200k linklayer atm index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions show action police",
+ "matchPattern": "action order [0-9]*: police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b linklayer atm",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "551b",
+ "name": "Add police actions with conform-exceed control continue/drop",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed continue/drop index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 1",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action continue/drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "0c70",
+ "name": "Add police actions with conform-exceed control pass/reclassify",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed pass/reclassify index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x4 rate 3Mbit burst 250Kb mtu 2Kb action pass/reclassify",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "d946",
+ "name": "Add police actions with conform-exceed control pass/pipe",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed pass/pipe index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x5 rate 3Mbit burst 250Kb mtu 2Kb action pass/pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "ddd6",
+ "name": "Add police action with invalid rate value",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3tb burst 250k conform-exceed pass/pipe index 5",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x5 rate 3Tb burst 250Kb mtu 2Kb action pass/pipe",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "f61c",
+ "name": "Add police action with invalid burst value",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3kbit burst 250P conform-exceed pass/pipe index 5",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x5 rate 3Kbit burst 250Pb mtu 2Kb action pass/pipe",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "6aaf",
+ "name": "Add police actions with conform-exceed control pass/pipe [with numeric values]",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed 0/3 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 1",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action pass/pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "29b1",
+ "name": "Add police actions with conform-exceed control <invalid>/drop",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed 10/drop index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action ",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "c26f",
+ "name": "Add police action with invalid peakrate value",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 90kbit burst 10k mtu 2kb peakrate 100T index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 90Kbit burst 10Kb mtu 2Kb peakrate 100Tbit",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "db04",
+ "name": "Add police action with invalid mtu value",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 10kbit burst 10k mtu 2Pbit index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 10Kbit burst 1Kb mtu 2Pb",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "f3c9",
+ "name": "Add police action with cookie",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 1 cookie a1b1c1d1e1f12233bb",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 1",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 10Mbit burst 10Kb mtu 2Kb.*cookie a1b1c1d1e1f12233bb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "d190",
+ "name": "Add police action with maximum index",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 10mbit burst 10k index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 4294967295",
+ "matchPattern": "action order [0-9]*: police 0xffffffff rate 10Mbit burst 10Kb mtu 2Kb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "336e",
+ "name": "Delete police action",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 5mbit burst 2m index 12"
+ ],
+ "cmdUnderTest": "$TC actions delete action police index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0xc rate 5Mb burst 2Mb",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "77fa",
+ "name": "Get single police action from many actions",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 1mbit burst 100k index 1",
+ "$TC actions add action police rate 2mbit burst 200k index 2",
+ "$TC actions add action police rate 3mbit burst 300k index 3",
+ "$TC actions add action police rate 4mbit burst 400k index 4",
+ "$TC actions add action police rate 5mbit burst 500k index 5",
+ "$TC actions add action police rate 6mbit burst 600k index 6",
+ "$TC actions add action police rate 7mbit burst 700k index 7",
+ "$TC actions add action police rate 8mbit burst 800k index 8"
+ ],
+ "cmdUnderTest": "$TC actions get action police index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 4",
+ "matchPattern": "action order [0-9]*: police 0x4 rate 4Mbit burst 400Kb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "aa43",
+ "name": "Get single police action without specifying index",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 1mbit burst 100k index 1"
+ ],
+ "cmdUnderTest": "$TC actions get action police",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action police",
+ "matchPattern": "action order [0-9]*: police",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "858b",
+ "name": "List police actions",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action police rate 1mbit burst 100k index 1",
+ "$TC actions add action police rate 2mbit burst 200k index 2",
+ "$TC actions add action police rate 3mbit burst 300k index 3",
+ "$TC actions add action police rate 4mbit burst 400k index 4",
+ "$TC actions add action police rate 5mbit burst 500k index 5",
+ "$TC actions add action police rate 6mbit burst 600k index 6",
+ "$TC actions add action police rate 7mbit burst 700k index 7",
+ "$TC actions add action police rate 8mbit burst 800k index 8"
+ ],
+ "cmdUnderTest": "$TC actions list action police",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x[1-8] rate [1-8]Mbit burst [1-8]00Kb",
+ "matchCount": "8",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "1c3a",
+ "name": "Flush police actions",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ "$TC actions add action police rate 1mbit burst 100k index 1",
+ "$TC actions add action police rate 2mbit burst 200k index 2",
+ "$TC actions add action police rate 3mbit burst 300k index 3",
+ "$TC actions add action police rate 4mbit burst 400k index 4",
+ "$TC actions add action police rate 5mbit burst 500k index 5",
+ "$TC actions add action police rate 6mbit burst 600k index 6",
+ "$TC actions add action police rate 7mbit burst 700k index 7",
+ "$TC actions add action police rate 8mbit burst 800k index 8"
+ ],
+ "cmdUnderTest": "$TC actions flush action police",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police",
+ "matchCount": "0",
+ "teardown": [
+ ""
+ ]
+ },
+ {
+ "id": "7326",
+ "name": "Add police action with control continue",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 1",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action continue",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "34fa",
+ "name": "Add police action with control drop",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m drop index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "8dd5",
+ "name": "Add police action with control ok",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m ok index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "b9d1",
+ "name": "Add police action with control reclassify",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m reclassify index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action police index 1",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action reclassify",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "c534",
+ "name": "Add police action with control pipe",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 7mbit burst 1m pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 7Mbit burst 1024Kb mtu 2Kb action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
new file mode 100644
index 000000000..618def9bd
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
@@ -0,0 +1,612 @@
+[
+ {
+ "id": "9784",
+ "name": "Add valid sample action with mandatory arguments",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 10 group 1 index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 2",
+ "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "5c91",
+ "name": "Add valid sample action with mandatory arguments and continue control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 700 group 2 continue index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 2",
+ "matchPattern": "action order [0-9]+: sample rate 1/700 group 2 continue.*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "334b",
+ "name": "Add valid sample action with mandatory arguments and drop control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 10000 group 11 drop index 22",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/10000 group 11 drop.*index 22 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "da69",
+ "name": "Add valid sample action with mandatory arguments and reclassify control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 20000 group 72 reclassify index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/20000 group 72 reclassify.*index 100 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "13ce",
+ "name": "Add valid sample action with mandatory arguments and pipe control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 20 group 2 pipe index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/20 group 2 pipe.*index 100 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "1886",
+ "name": "Add valid sample action with mandatory arguments and jump control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 700 group 25 jump 4 index 200",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 200",
+ "matchPattern": "action order [0-9]+: sample rate 1/700 group 25 jump 4.*index 200 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "7571",
+ "name": "Add sample action with invalid rate",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 0 group 1 index 2",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 2",
+ "matchPattern": "action order [0-9]+: sample rate 1/0 group 1.*index 2 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "b6d4",
+ "name": "Add sample action with mandatory arguments and invalid control action",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 200000 group 52 foo index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/200000 group 52 foo.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "a874",
+ "name": "Add invalid sample action without mandatory arguments",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "ac01",
+ "name": "Add invalid sample action without mandatory argument rate",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample group 10 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample.*group 10.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "4203",
+ "name": "Add invalid sample action without mandatory argument group",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "14a7",
+ "name": "Add invalid sample action without mandatory argument group",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 100 index 10",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/100.*index 10 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "8f2e",
+ "name": "Add valid sample action with trunc argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 1024 index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 1024 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "45f8",
+ "name": "Add sample action with maximum rate argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 4294967295 group 4 index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/4294967295 group 4 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "ad0c",
+ "name": "Add sample action with maximum trunc argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 16000 group 4 trunc 4294967295 index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/16000 group 4 trunc_size 4294967295 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "83a9",
+ "name": "Add sample action with maximum group argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 4294 group 4294967295 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 1",
+ "matchPattern": "action order [0-9]+: sample rate 1/4294 group 4294967295 pipe.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "ed27",
+ "name": "Add sample action with invalid rate argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 4294967296 group 4 index 10",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 10",
+ "matchPattern": "action order [0-9]+: sample rate 1/4294967296 group 4 pipe.*index 10 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "2eae",
+ "name": "Add sample action with invalid group argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 4098 group 5294967299 continue index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 1",
+ "matchPattern": "action order [0-9]+: sample rate 1/4098 group 5294967299 continue.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "6ff3",
+ "name": "Add sample action with invalid trunc size",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 trunc 112233445566 index 11",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 11",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 trunc_size 112233445566.*index 11 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "2b2a",
+ "name": "Add sample action with invalid index",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 5294967299",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 5294967299",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 5294967299 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "dee2",
+ "name": "Add sample action with maximum allowed index",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 4294967295",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 4294967295 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "560e",
+ "name": "Add sample action with cookie",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action sample rate 1024 group 4 index 45 cookie aabbccdd",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 45",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pipe.*index 45.*cookie aabbccdd",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "704a",
+ "name": "Replace existing sample action with new rate argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 1024 group 4 index 4"
+ ],
+ "cmdUnderTest": "$TC actions replace action sample rate 2048 group 4 index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/2048 group 4 pipe.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "60eb",
+ "name": "Replace existing sample action with new group argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 1024 group 4 index 4"
+ ],
+ "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "2cce",
+ "name": "Replace existing sample action with new trunc argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 1024 group 4 trunc 48 index 4"
+ ],
+ "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 trunc 64 index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 trunc_size 64 pipe.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "59d1",
+ "name": "Replace existing sample action with new control argument",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 1024 group 4 reclassify index 4"
+ ],
+ "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 pipe index 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action sample",
+ "matchPattern": "action order [0-9]+: sample rate 1/1024 group 7 pipe.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
new file mode 100644
index 000000000..e89a7aa40
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
@@ -0,0 +1,130 @@
+[
+ {
+ "id": "b078",
+ "name": "Add simple action",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action simple sdata \"A triumph\" index 60",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <A triumph>.*index 60 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "6d4c",
+ "name": "Add simple action with duplicate index",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"Aruba\" index 4"
+ ],
+ "cmdUnderTest": "$TC actions add action simple sdata \"Jamaica\" index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <Jamaica>.*ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "2542",
+ "name": "List simple actions",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"Rock\"",
+ "$TC actions add action simple sdata \"Paper\"",
+ "$TC actions add action simple sdata \"Scissors\" index 98"
+ ],
+ "cmdUnderTest": "$TC actions list action simple",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>",
+ "matchCount": "3",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "ea67",
+ "name": "Delete simple action",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"Blinkenlights\" index 1"
+ ],
+ "cmdUnderTest": "$TC actions delete action simple index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <Blinkenlights>.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "8ff1",
+ "name": "Flush simple actions",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"Kirk\"",
+ "$TC actions add action simple sdata \"Spock\" index 50",
+ "$TC actions add action simple sdata \"McCoy\" index 9"
+ ],
+ "cmdUnderTest": "$TC actions flush action simple",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <[A-Z][a-z]*>",
+ "matchCount": "0",
+ "teardown": [
+ ""
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
new file mode 100644
index 000000000..5aaf593b9
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
@@ -0,0 +1,488 @@
+[
+ {
+ "id": "6236",
+ "name": "Add skbedit action with valid mark",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit mark 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit mark 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "407b",
+ "name": "Add skbedit action with invalid mark",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit mark 666777888999",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit mark",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "081d",
+ "name": "Add skbedit action with priority",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit prio 99",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit priority :99",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "cc37",
+ "name": "Add skbedit action with invalid priority",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit prio foo",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit priority",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "3c95",
+ "name": "Add skbedit action with queue_mapping",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit queue_mapping 909",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit queue_mapping 909",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "985c",
+ "name": "Add skbedit action with invalid queue_mapping",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit queue_mapping 67000",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit queue_mapping",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "224f",
+ "name": "Add skbedit action with ptype host",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit ptype host",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit ptype host",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "d1a3",
+ "name": "Add skbedit action with ptype otherhost",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit ptype otherhost",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit ptype otherhost",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "b9c6",
+ "name": "Add skbedit action with invalid ptype",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit ptype openair",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit ptype openair",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "464a",
+ "name": "Add skbedit action with control pipe",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit ptype host pipe index 11",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 11",
+ "matchPattern": "action order [0-9]*: skbedit ptype host pipe.*index 11 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "212f",
+ "name": "Add skbedit action with control reclassify",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit mark 56789 reclassify index 90",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 90",
+ "matchPattern": "action order [0-9]*: skbedit mark 56789 reclassify.*index 90 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "0651",
+ "name": "Add skbedit action with control pass",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 pass index 271",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 271",
+ "matchPattern": "action order [0-9]*: skbedit queue_mapping 3 pass.*index 271 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "cc53",
+ "name": "Add skbedit action with control drop",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit queue_mapping 3 drop index 271",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 271",
+ "matchPattern": "action order [0-9]*: skbedit queue_mapping 3 drop.*index 271 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "ec16",
+ "name": "Add skbedit action with control jump",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit priority 8 jump 9 index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 2",
+ "matchPattern": "action order [0-9]*: skbedit priority :8 jump 9.*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "db54",
+ "name": "Add skbedit action with control continue",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 32",
+ "matchPattern": "action order [0-9]*: skbedit priority :16 continue.*index 32 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "1055",
+ "name": "Add skbedit action with cookie",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit priority 16 continue index 32 cookie deadbeef",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbedit index 32",
+ "matchPattern": "action order [0-9]*: skbedit priority :16 continue.*index 32 ref.*cookie deadbeef",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "5172",
+ "name": "List skbedit actions",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbedit ptype otherhost",
+ "$TC actions add action skbedit ptype broadcast",
+ "$TC actions add action skbedit mark 59",
+ "$TC actions add action skbedit mark 409"
+ ],
+ "cmdUnderTest": "$TC actions list action skbedit",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit",
+ "matchCount": "4",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "a6d6",
+ "name": "Add skbedit action with index",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbedit mark 808 index 4040404040",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "index 4040404040",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "38f3",
+ "name": "Delete skbedit action",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbedit",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbedit mark 42 index 9009"
+ ],
+ "cmdUnderTest": "$TC actions del action skbedit index 9009",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit mark 42",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ },
+ {
+ "id": "ce97",
+ "name": "Flush skbedit actions",
+ "category": [
+ "actions",
+ "skbedit"
+ ],
+ "setup": [
+ "$TC actions add action skbedit mark 500",
+ "$TC actions add action skbedit mark 501",
+ "$TC actions add action skbedit mark 502",
+ "$TC actions add action skbedit mark 503",
+ "$TC actions add action skbedit mark 504",
+ "$TC actions add action skbedit mark 505",
+ "$TC actions add action skbedit mark 506"
+ ],
+ "cmdUnderTest": "$TC actions flush action skbedit",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action skbedit",
+ "matchPattern": "action order [0-9]*: skbedit",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbedit"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
new file mode 100644
index 000000000..fe3326e93
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
@@ -0,0 +1,396 @@
+[
+ {
+ "id": "7d50",
+ "name": "Add skbmod action to set destination mac",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set dmac 11:22:33:44:55:66 index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set dmac 11:22:33:44:55:66\\s+index 5",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "9b29",
+ "name": "Add skbmod action to set source mac",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set smac 77:88:99:AA:BB:CC index 7",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 7",
+ "matchPattern": "action order [0-9]*: skbmod pipe set smac 77:88:99:aa:bb:cc\\s+index 7",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "1724",
+ "name": "Add skbmod action with invalid mac",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set smac 00:44:55:44:55",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set smac 00:44:55:44:55",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "3cf1",
+ "name": "Add skbmod action with valid etype",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0xfefe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set etype 0xFEFE",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "a749",
+ "name": "Add skbmod action with invalid etype",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0xfefef",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set etype 0xFEFEF",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "bfe6",
+ "name": "Add skbmod action to swap mac",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod swap mac",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 1",
+ "matchPattern": "action order [0-9]*: skbmod pipe swap mac",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "839b",
+ "name": "Add skbmod action with control pipe",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod swap mac pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe swap mac",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "c167",
+ "name": "Add skbmod action with control reclassify",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0xbeef reclassify",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod reclassify set etype 0xBEEF",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "0c2f",
+ "name": "Add skbmod action with control drop",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0x0001 drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 1",
+ "matchPattern": "action order [0-9]*: skbmod drop set etype 0x1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "d113",
+ "name": "Add skbmod action with control continue",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set etype 0x1 continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod continue set etype 0x1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "7242",
+ "name": "Add skbmod action with control pass",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set smac 00:00:00:00:00:01 pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pass set smac 00:00:00:00:00:01",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "6046",
+ "name": "Add skbmod action with control reclassify and cookie",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod set smac 00:01:02:03:04:01 reclassify index 1 cookie ddeeffaabb11cc22",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 1",
+ "matchPattern": "action order [0-9]*: skbmod reclassify set smac 00:01:02:03:04:01.*index 1 ref.*cookie ddeeffaabb11cc22",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "58cb",
+ "name": "List skbmod actions",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbmod set etype 0x0001",
+ "$TC actions add action skbmod set etype 0x0011",
+ "$TC actions add action skbmod set etype 0x0021",
+ "$TC actions add action skbmod set etype 0x0031",
+ "$TC actions add action skbmod set etype 0x0041"
+ ],
+ "cmdUnderTest": "$TC actions ls action skbmod",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod",
+ "matchCount": "5",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "9aa8",
+ "name": "Get a single skbmod action from a list",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbmod set etype 0x0001",
+ "$TC actions add action skbmod set etype 0x0011",
+ "$TC actions add action skbmod set etype 0x0021",
+ "$TC actions add action skbmod set etype 0x0031",
+ "$TC actions add action skbmod set etype 0x0041"
+ ],
+ "cmdUnderTest": "$TC actions ls action skbmod",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 4",
+ "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x31",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "e93a",
+ "name": "Delete an skbmod action",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action skbmod set etype 0x1111 index 909"
+ ],
+ "cmdUnderTest": "$TC actions del action skbmod index 909",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod pipe set etype 0x1111\\s+index 909",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ },
+ {
+ "id": "40c2",
+ "name": "Flush skbmod actions",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "setup": [
+ "$TC actions add action skbmod set etype 0x0001",
+ "$TC actions add action skbmod set etype 0x0011",
+ "$TC actions add action skbmod set etype 0x0021",
+ "$TC actions add action skbmod set etype 0x0031",
+ "$TC actions add action skbmod set etype 0x0041"
+ ],
+ "cmdUnderTest": "$TC actions flush action skbmod",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action skbmod",
+ "matchPattern": "action order [0-9]*: skbmod",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
new file mode 100644
index 000000000..e7e15a733
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
@@ -0,0 +1,888 @@
+[
+ {
+ "id": "2b11",
+ "name": "Add tunnel_key set action with mandatory parameters",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action tunnel_key",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "dc6b",
+ "name": "Add tunnel_key set action with missing mandatory src_ip parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set dst_ip 20.20.20.2 id 100",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action tunnel_key",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*dst_ip 20.20.20.2.*key_id 100",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "7f25",
+ "name": "Add tunnel_key set action with missing mandatory dst_ip parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 id 100",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action tunnel_key",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 10.10.10.1.*key_id 100",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "a5e0",
+ "name": "Add tunnel_key set action with invalid src_ip parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 300.168.100.1 dst_ip 192.168.200.1 id 7 index 1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 300.168.100.1.*dst_ip 192.168.200.1.*key_id 7.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "eaa8",
+ "name": "Add tunnel_key set action with invalid dst_ip parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.100.1 dst_ip 192.168.800.1 id 10 index 11",
+ "expExitCode": "1",
+ "verifyCmd": "$TC actions get action tunnel_key index 11",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 192.168.100.1.*dst_ip 192.168.800.1.*key_id 10.*index 11 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "3b09",
+ "name": "Add tunnel_key set action with invalid id parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 112233445566778899 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 112233445566778899.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "9625",
+ "name": "Add tunnel_key set action with invalid dst_port parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 dst_port 998877 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 11.*dst_port 998877.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "05af",
+ "name": "Add tunnel_key set action with optional dst_port parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.100.1 dst_ip 192.168.200.1 id 789 dst_port 4000 index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 10",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.100.1.*dst_ip 192.168.200.1.*key_id 789.*dst_port 4000.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "da80",
+ "name": "Add tunnel_key set action with index at 32-bit maximum",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 4294967295",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*id 11.*index 4294967295 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "d407",
+ "name": "Add tunnel_key set action with index exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 11 index 4294967295678",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 4294967295678",
+ "matchPattern": "action order [0-9]+: tunnel_key set.*index 4294967295678 ref",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "5cba",
+ "name": "Add tunnel_key set action with id value at 32-bit maximum",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 4294967295 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 4294967295.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "e84a",
+ "name": "Add tunnel_key set action with id value exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42949672955 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 4294967295",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42949672955.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "9c19",
+ "name": "Add tunnel_key set action with dst_port value at 16-bit maximum",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 429 dst_port 65535 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 429.*dst_port 65535.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "3bd9",
+ "name": "Add tunnel_key set action with dst_port value exceeding 16-bit maximum",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 429 dst_port 65535789 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 429.*dst_port 65535789.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "68e2",
+ "name": "Add tunnel_key unset action",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key unset index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*unset.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "6192",
+ "name": "Add tunnel_key unset continue action",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key unset continue index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*unset continue.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "061d",
+ "name": "Add tunnel_key set continue action with cookie",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.10.1 dst_ip 192.168.20.2 id 123 continue index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.10.1.*dst_ip 192.168.20.2.*key_id 123.*csum continue.*index 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "8acb",
+ "name": "Add tunnel_key set continue action with invalid cookie",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 192.168.10.1 dst_ip 192.168.20.2 id 123 continue index 1 cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 192.168.10.1.*dst_ip 192.168.20.2.*key_id 123.*csum continue.*index 1.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "a07e",
+ "name": "Add tunnel_key action with no set/unset command specified",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "b227",
+ "name": "Add tunnel_key action with csum option",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 csum index 99",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 99",
+ "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*csum pipe.*index 99",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "58a7",
+ "name": "Add tunnel_key action with nocsum option",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7823 nocsum index 234",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 234",
+ "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7823.*nocsum pipe.*index 234",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "2575",
+ "name": "Add tunnel_key action with not-supported parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 foobar 999 index 4",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 4",
+ "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*foobar 999.*index 4",
+ "matchCount": "0",
+ "teardown": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ]
+ },
+ {
+ "id": "7a88",
+ "name": "Add tunnel_key action with cookie parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 7 index 4 cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 4",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 7.*csum pipe.*index 4 ref.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "4f20",
+ "name": "Add tunnel_key action with a single geneve option parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "e33d",
+ "name": "Add tunnel_key action with multiple geneve options parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:0040007611223344,0111:02:1020304011223344.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "0778",
+ "name": "Add tunnel_key action with invalid class geneve option parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 824212:80:00880022 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 824212:80:00880022.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "4ae8",
+ "name": "Add tunnel_key action with invalid type geneve option parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:4224:00880022 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:4224:00880022.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "4039",
+ "name": "Add tunnel_key action with short data length geneve option parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "26a6",
+ "name": "Add tunnel_key action with non-multiple of 4 data length geneve option parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:4288428822 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:4288428822.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "f44d",
+ "name": "Add tunnel_key action with incomplete geneve options parameter",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 id 42 dst_port 6081 geneve_opts 0102:80:00880022,0408:42: index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*key_id 42.*dst_port 6081.*geneve_opt 0102:80:00880022,0408:42:.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "7afc",
+ "name": "Replace tunnel_key set action with all parameters",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 csum id 1 index 1"
+ ],
+ "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 nocsum id 11 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*nocsum pipe.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "364d",
+ "name": "Replace tunnel_key set action with all parameters and cookie",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 index 1 cookie aabbccddeeff112233445566778800a"
+ ],
+ "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 11.11.11.1 dst_ip 21.21.21.2 dst_port 3129 id 11 csum reclassify index 1 cookie a1b1c1d1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 1",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 11.11.11.1.*dst_ip 21.21.21.2.*key_id 11.*dst_port 3129.*csum reclassify.*index 1.*cookie a1b1c1d1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "937c",
+ "name": "Fetch all existing tunnel_key actions",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pipe index 1",
+ "$TC actions add action tunnel_key set src_ip 11.10.10.1 dst_ip 21.20.20.2 dst_port 3129 csum id 2 jump 10 index 2",
+ "$TC actions add action tunnel_key set src_ip 12.10.10.1 dst_ip 22.20.20.2 dst_port 3130 csum id 3 pass index 3",
+ "$TC actions add action tunnel_key set src_ip 13.10.10.1 dst_ip 23.20.20.2 dst_port 3131 nocsum id 4 continue index 4"
+ ],
+ "cmdUnderTest": "$TC actions list action tunnel_key",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action tunnel_key",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*dst_port 3128.*nocsum pipe.*index 1.*set.*src_ip 11.10.10.1.*dst_ip 21.20.20.2.*key_id 2.*dst_port 3129.*csum jump 10.*index 2.*set.*src_ip 12.10.10.1.*dst_ip 22.20.20.2.*key_id 3.*dst_port 3130.*csum pass.*index 3.*set.*src_ip 13.10.10.1.*dst_ip 23.20.20.2.*key_id 4.*dst_port 3131.*nocsum continue.*index 4",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "6783",
+ "name": "Flush all existing tunnel_key actions",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pipe index 1",
+ "$TC actions add action tunnel_key set src_ip 11.10.10.1 dst_ip 21.20.20.2 dst_port 3129 csum id 2 reclassify index 2",
+ "$TC actions add action tunnel_key set src_ip 12.10.10.1 dst_ip 22.20.20.2 dst_port 3130 csum id 3 pass index 3",
+ "$TC actions add action tunnel_key set src_ip 13.10.10.1 dst_ip 23.20.20.2 dst_port 3131 nocsum id 4 continue index 4"
+ ],
+ "cmdUnderTest": "$TC actions flush action tunnel_key",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action tunnel_key",
+ "matchPattern": "action order [0-9]+:.*",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
new file mode 100644
index 000000000..69ea09eef
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -0,0 +1,692 @@
+[
+ {
+ "id": "6f5a",
+ "name": "Add vlan pop action with pipe opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop pipe index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*pipe.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "df35",
+ "name": "Add vlan pop action with pass opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop pass index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 8",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*pass.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "b0d4",
+ "name": "Add vlan pop action with drop opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop drop index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 8",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*drop.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "95ee",
+ "name": "Add vlan pop action with reclassify opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop reclassify index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 8",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*reclassify.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "0283",
+ "name": "Add vlan pop action with continue opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop continue index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 8",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*continue.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "b6b9",
+ "name": "Add vlan pop action with jump opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop jump 10 index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*jump 10.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "87c3",
+ "name": "Add vlan pop action with trap opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop trap index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*pop trap.*index 8 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "a178",
+ "name": "Add vlan pop action with invalid opcode",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop foo index 8",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*foo.*index 8 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "ee6f",
+ "name": "Add vlan pop action with index at 32-bit maximum",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop index 4294967295",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*index 4294967295 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "0dfa",
+ "name": "Add vlan pop action with index exceeding 32-bit maximum",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan pop reclassify index 429496729599",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action vlan index 429496729599",
+ "matchPattern": "action order [0-9]+: vlan.*pop.reclassify.*index 429496729599",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "2b91",
+ "name": "Add vlan invalid action",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan bad_mode",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*bad_mode",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "57fc",
+ "name": "Add vlan push action with invalid protocol type",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push protocol ABCD",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*push",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "3989",
+ "name": "Add vlan push action with default protocol and priority",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 123 index 18",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 18",
+ "matchPattern": "action order [0-9]+: vlan.*push id 123 protocol 802.1Q priority 0 pipe.*index 18 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "79dc",
+ "name": "Add vlan push action with protocol 802.1Q and priority 3",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 77 protocol 802.1Q priority 3 continue index 734",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 734",
+ "matchPattern": "action order [0-9]+: vlan.*push id 77 protocol 802.1Q priority 3 continue.*index 734 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "4d73",
+ "name": "Add vlan push action with protocol 802.1AD",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 1024 protocol 802.1AD pass index 10000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 10000",
+ "matchPattern": "action order [0-9]+: vlan.*push id 1024 protocol 802.1ad priority 0 pass.*index 10000 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "1f4b",
+ "name": "Add vlan push action with maximum 12-bit vlan ID",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 4094 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 1",
+ "matchPattern": "action order [0-9]+: vlan.*push id 4094.*protocol 802.1Q.*priority 0.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "1f7b",
+ "name": "Add vlan push action with invalid vlan ID",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 5678 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*push id 5678.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "fe40",
+ "name": "Add vlan push action with maximum 3-bit IEEE 802.1p priority",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 4 priority 7 reclassify index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 1",
+ "matchPattern": "action order [0-9]+: vlan.*push id 4.*protocol 802.1Q.*priority 7.*reclassify.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "5d02",
+ "name": "Add vlan push action with invalid IEEE 802.1p priority",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 5 priority 10 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*push id 5.*index 1 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "6812",
+ "name": "Add vlan modify action for protocol 802.1Q",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 100",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "5a31",
+ "name": "Add vlan modify action for protocol 802.1AD",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 12",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "3deb",
+ "name": "Replace existing vlan push action with new ID",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan push id 500 pipe index 12"
+ ],
+ "cmdUnderTest": "$TC actions replace action vlan push id 700 pipe index 12",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 12",
+ "matchPattern": "action order [0-9]+: vlan.*push id 700 protocol 802.1Q priority 0 pipe.*index 12 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "9e76",
+ "name": "Replace existing vlan push action with new protocol",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan push id 1 protocol 802.1Q pipe index 1"
+ ],
+ "cmdUnderTest": "$TC actions replace action vlan push id 1 protocol 802.1ad pipe index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 1",
+ "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1ad priority 0 pipe.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "ede4",
+ "name": "Replace existing vlan push action with new priority",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan push id 1 protocol 802.1Q priority 3 reclassify index 1"
+ ],
+ "cmdUnderTest": "$TC actions replace action vlan push id 1 priority 4 reclassify index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 1",
+ "matchPattern": "action order [0-9]+: vlan.*push id 1 protocol 802.1Q priority 4 reclassify.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "d413",
+ "name": "Replace existing vlan pop action with new cookie",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan pop continue index 1 cookie 22334455"
+ ],
+ "cmdUnderTest": "$TC actions replace action vlan pop continue index 1 cookie a1b1c2d1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 1",
+ "matchPattern": "action order [0-9]+: vlan.*pop continue.*index 1 ref.*cookie a1b1c2d1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
+ "id": "83a4",
+ "name": "Delete vlan pop action",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan pop index 44"
+ ],
+ "cmdUnderTest": "$TC actions del action vlan index 44",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*pop.*index 44 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "ed1e",
+ "name": "Delete vlan push action for protocol 802.1Q",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan push id 4094 protocol 802.1Q index 999"
+ ],
+ "cmdUnderTest": "$TC actions del action vlan index 999",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*push id 4094 protocol 802.1Q priority 0 pipe.*index 999 ref",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "a2a3",
+ "name": "Flush vlan actions",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action vlan push id 4 protocol 802.1ad index 10",
+ "$TC actions add action vlan push id 4 protocol 802.1ad index 11",
+ "$TC actions add action vlan push id 4 protocol 802.1ad index 12",
+ "$TC actions add action vlan push id 4 protocol 802.1ad index 13"
+ ],
+ "cmdUnderTest": "$TC actions flush action vlan",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*push id 4 protocol 802.1ad",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "1d78",
+ "name": "Add vlan push action with cookie",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan push id 4 cookie a0a0a0a0a0a0a0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action vlan",
+ "matchPattern": "action order [0-9]+: vlan.*push id 4.*cookie a0a0a0a0a0a0a0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
new file mode 100644
index 000000000..3b97cfd7e
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
@@ -0,0 +1,1049 @@
+[
+ {
+ "id": "901f",
+ "name": "Add fw filter with prio at 32-bit maxixum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 65535 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 65535 protocol all fw",
+ "matchPattern": "pref 65535 fw.*handle 0x1.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "51e2",
+ "name": "Add fw filter with prio exceeding 32-bit maxixum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 65536 fw action ok",
+ "expExitCode": "255",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 65536 protocol all fw",
+ "matchPattern": "pref 65536 fw.*handle 0x1.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "d987",
+ "name": "Add fw filter with action ok",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "affe",
+ "name": "Add fw filter with action continue",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action continue",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "28bc",
+ "name": "Add fw filter with action pipe",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8da2",
+ "name": "Add fw filter with action drop",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol all prio 1 fw",
+ "matchPattern": "handle 0x1.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9436",
+ "name": "Add fw filter with action reclassify",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action reclassify",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action reclassify",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "95bb",
+ "name": "Add fw filter with action jump 10",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action jump 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action jump 10",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3d74",
+ "name": "Add fw filter with action goto chain 5",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action goto chain 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action goto chain 5",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "eb8f",
+ "name": "Add fw filter with invalid action",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action pump",
+ "expExitCode": "255",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "handle 0x1.*gact action pump",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6a79",
+ "name": "Add fw filter with missing mandatory action",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "filter protocol all pref [0-9]+ fw.*handle 0x1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8298",
+ "name": "Add fw filter with cookie",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 2 protocol all fw",
+ "matchPattern": "pref 2 fw.*handle 0x1.*gact action pipe.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "a88c",
+ "name": "Add fw filter with invalid cookie",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action continue cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+ "expExitCode": "255",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 2 protocol all fw",
+ "matchPattern": "pref 2 fw.*handle 0x1.*gact action continue.*cookie aa11bb22cc33dd44ee55ff66aa11b1b2777888",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "10f6",
+ "name": "Add fw filter with handle in hex",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xa1b2ff prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xa1b2ff prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xa1b2ff.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9d51",
+ "name": "Add fw filter with handle at 32-bit maximum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967295 prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4294967295 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xffffffff.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "d939",
+ "name": "Add fw filter with handle exceeding 32-bit maximum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967296 prio 1 fw action ok",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4294967296 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0x.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "658c",
+ "name": "Add fw filter with mask in hex",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/0xa1b2f prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xa/0xa1b2f",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "86be",
+ "name": "Add fw filter with mask at 32-bit maximum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/4294967295 prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xa[^/]",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "e635",
+ "name": "Add fw filter with mask exceeding 32-bit maximum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10/4294967296 prio 1 fw action ok",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xa",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6cab",
+ "name": "Add fw filter with handle/mask in hex",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xa1b2cdff/0x1a2bffdc prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xa1b2cdff prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xa1b2cdff/0x1a2bffdc",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8700",
+ "name": "Add fw filter with handle/mask at 32-bit maximum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967295/4294967295 prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 0xffffffff prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0xffffffff[^/]",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7d62",
+ "name": "Add fw filter with handle/mask exceeding 32-bit maximum",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 4294967296/4294967296 prio 1 fw action ok",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7b69",
+ "name": "Add fw filter with missing mandatory handle",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 1 fw action ok",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "filter protocol all.*fw.*handle.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "d68b",
+ "name": "Add fw filter with invalid parent",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent aa11b1b2: handle 1 prio 1 fw action ok",
+ "expExitCode": "255",
+ "verifyCmd": "$TC filter dev $DEV1 parent aa11b1b2: handle 1 prio 1 protocol all fw",
+ "matchPattern": "filter protocol all pref 1 fw.*handle 0x1.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "66e0",
+ "name": "Add fw filter with missing mandatory parent id",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 handle 1 prio 1 fw action ok",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "pref [0-9]+ fw.*handle 0x1.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "0ff3",
+ "name": "Add fw filter with classid",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 3 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0x1 classid :3.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9849",
+ "name": "Add fw filter with classid at root",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid ffff:ffff action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "pref 1 fw.*handle 0x1 classid root.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b7ff",
+ "name": "Add fw filter with classid - keeps last 8 (hex) digits",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 98765fedcb action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0x1 classid 765f:edcb.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2b18",
+ "name": "Add fw filter with invalid classid",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw classid 6789defg action ok",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol all fw",
+ "matchPattern": "fw.*handle 0x1 classid 6789:defg.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "fade",
+ "name": "Add fw filter with flowid",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 10 prio 1 fw flowid 1:10 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 1 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xa classid 1:10.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "33af",
+ "name": "Add fw filter with flowid then classid (same arg, takes second)",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 11 prio 1 fw flowid 10 classid 4 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 11 prio 1 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xb classid :4.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8a8c",
+ "name": "Add fw filter with classid then flowid (same arg, takes second)",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 11 prio 1 fw classid 4 flowid 10 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 11 prio 1 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 1 fw.*handle 0xb classid :10.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b50d",
+ "name": "Add fw filter with handle val/mask and flowid 10:1000",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 10/0xff fw flowid 10:1000 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 10 prio 3 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0xa/0xff classid 10:1000.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7207",
+ "name": "Add fw filter with protocol ip",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 handle 3 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 3 prio 1 protocol ip fw",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 fw.*handle 0x3.*gact action pass.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "306d",
+ "name": "Add fw filter with protocol ipv6",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ipv6 prio 2 handle 4 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 2 protocol ipv6 fw",
+ "matchPattern": "filter parent ffff: protocol ipv6 pref 2 fw.*handle 0x4.*gact action pass.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9a78",
+ "name": "Add fw filter with protocol arp",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol arp prio 5 handle 7 fw action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 7 prio 5 protocol arp fw",
+ "matchPattern": "filter parent ffff: protocol arp pref 5 fw.*handle 0x7.*gact action drop.*index [0-9]+ ref [0-9]+ bind [0-9]+",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1821",
+ "name": "Add fw filter with protocol 802_3",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol 802_3 handle 1 prio 1 fw action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol 802_3 fw",
+ "matchPattern": "filter parent ffff: protocol 802_3 pref 1 fw.*handle 0x1.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2260",
+ "name": "Add fw filter with invalid protocol",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol igmp handle 1 prio 1 fw action ok",
+ "expExitCode": "255",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol igmp fw",
+ "matchPattern": "filter parent ffff: protocol igmp pref 1 fw.*handle 0x1.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "09d7",
+ "name": "Add fw filters protocol 802_3 and ip with conflicting priorities",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: protocol 802_3 prio 3 handle 7 fw action ok"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 3 handle 8 fw action ok",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 8 prio 3 protocol ip fw",
+ "matchPattern": "filter parent ffff: protocol ip pref 3 fw.*handle 0x8",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6973",
+ "name": "Add fw filters with same index, same action",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: prio 6 handle 2 fw action continue index 5"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 8 handle 4 fw action continue index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 8 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 8 fw.*handle 0x4.*gact action continue.*index 5 ref 2 bind 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "fc06",
+ "name": "Add fw filters with action police",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 4 fw action police rate 1kbit burst 10k index 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 3 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0x4.*police 0x5 rate 1Kbit burst 10Kb mtu 2Kb action reclassify overhead 0b.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "aac7",
+ "name": "Add fw filters with action police linklayer atm",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: prio 3 handle 4 fw action police rate 2mbit burst 200k linklayer atm index 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 3 protocol all fw",
+ "matchPattern": "filter parent ffff: protocol all pref 3 fw.*handle 0x4.*police 0x8 rate 2Mbit burst 200Kb mtu 2Kb action reclassify overhead 0b linklayer atm.*ref 1 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "5339",
+ "name": "Del entire fw filter",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+ "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff:",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "protocol all pref.*handle.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "0e99",
+ "name": "Del single fw filter x1",
+ "__comment__": "First of two tests to check that one filter is there and the other isn't",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+ "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 prio 9 fw action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "protocol all pref 7.*handle 0x5.*gact action pass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "f54c",
+ "name": "Del single fw filter x2",
+ "__comment__": "Second of two tests to check that one filter is there and the other isn't",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 5 prio 7 fw action pass",
+ "$TC filter add dev $DEV1 parent ffff: handle 3 prio 9 fw action pass"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 prio 9 fw action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "protocol all pref 9.*handle 0x3.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "ba94",
+ "name": "Del fw filter by prio",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 prio 4 fw action ok",
+ "$TC filter add dev $DEV1 parent ffff: handle 2 prio 4 fw action ok"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: prio 4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "pref 4 fw.*gact action pass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4acb",
+ "name": "Del fw filter by chain",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 chain 13 fw action pipe",
+ "$TC filter add dev $DEV1 parent ffff: handle 3 prio 5 chain 13 fw action pipe"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: chain 13",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "fw chain 13 handle.*gact action pipe",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3424",
+ "name": "Del fw filter by action (invalid)",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 2 prio 4 fw action drop"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: fw action drop",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 2 prio 4 protocol all fw",
+ "matchPattern": "handle 0x2.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "da89",
+ "name": "Del fw filter by handle (invalid)",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 3 prio 4 fw action continue"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 3 fw",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 3 prio 4 protocol all fw",
+ "matchPattern": "handle 0x3.*gact action continue",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4d95",
+ "name": "Del fw filter by protocol (invalid)",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 protocol arp fw action pipe"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: protocol arp fw",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 4 prio 2 protocol arp fw",
+ "matchPattern": "filter parent ffff: protocol arp.*handle 0x4.*gact action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4736",
+ "name": "Del fw filter by flowid (invalid)",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 4 prio 2 fw action pipe flowid 45"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: fw flowid 45",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "handle 0x4.*gact action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3dcb",
+ "name": "Replace fw filter action",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "pref 2 fw.*handle 0x1.*gact action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "eb4d",
+ "name": "Replace fw filter classid",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action pipe classid 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "pref 2 fw.*handle 0x1 classid :2.*gact action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "67ec",
+ "name": "Replace fw filter index",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 prio 2 fw action ok index 3"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 2 fw action ok index 16",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "pref 2 fw.*handle 0x1.*gact action pass.*index 16",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
new file mode 100644
index 000000000..99a5ffca1
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -0,0 +1,42 @@
+[
+ {
+ "id": "e9a3",
+ "name": "Add u32 with source match",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 u32 match ip src 127.0.0.1/32 flowid 1:1 action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "match 7f000001/ffffffff at 12",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "d052",
+ "name": "Add 1M filters with the same action",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress",
+ "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000"
+ ],
+ "cmdUnderTest": "$TC -b $BATCH_FILE",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress",
+ "/bin/rm $BATCH_FILE"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
new file mode 100755
index 000000000..7607ba3e3
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -0,0 +1,663 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+tdc.py - Linux tc (Traffic Control) unit test driver
+
+Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com>
+"""
+
+import re
+import os
+import sys
+import argparse
+import importlib
+import json
+import subprocess
+import time
+import traceback
+from collections import OrderedDict
+from string import Template
+
+from tdc_config import *
+from tdc_helper import *
+
+import TdcPlugin
+
+
+class PluginMgrTestFail(Exception):
+ def __init__(self, stage, output, message):
+ self.stage = stage
+ self.output = output
+ self.message = message
+
+class PluginMgr:
+ def __init__(self, argparser):
+ super().__init__()
+ self.plugins = {}
+ self.plugin_instances = []
+ self.args = []
+ self.argparser = argparser
+
+ # TODO, put plugins in order
+ plugindir = os.getenv('TDC_PLUGIN_DIR', './plugins')
+ for dirpath, dirnames, filenames in os.walk(plugindir):
+ for fn in filenames:
+ if (fn.endswith('.py') and
+ not fn == '__init__.py' and
+ not fn.startswith('#') and
+ not fn.startswith('.#')):
+ mn = fn[0:-3]
+ foo = importlib.import_module('plugins.' + mn)
+ self.plugins[mn] = foo
+ self.plugin_instances.append(foo.SubPlugin())
+
+ def call_pre_suite(self, testcount, testidlist):
+ for pgn_inst in self.plugin_instances:
+ pgn_inst.pre_suite(testcount, testidlist)
+
+ def call_post_suite(self, index):
+ for pgn_inst in reversed(self.plugin_instances):
+ pgn_inst.post_suite(index)
+
+ def call_pre_case(self, test_ordinal, testid):
+ for pgn_inst in self.plugin_instances:
+ try:
+ pgn_inst.pre_case(test_ordinal, testid)
+ except Exception as ee:
+ print('exception {} in call to pre_case for {} plugin'.
+ format(ee, pgn_inst.__class__))
+ print('test_ordinal is {}'.format(test_ordinal))
+ print('testid is {}'.format(testid))
+ raise
+
+ def call_post_case(self):
+ for pgn_inst in reversed(self.plugin_instances):
+ pgn_inst.post_case()
+
+ def call_pre_execute(self):
+ for pgn_inst in self.plugin_instances:
+ pgn_inst.pre_execute()
+
+ def call_post_execute(self):
+ for pgn_inst in reversed(self.plugin_instances):
+ pgn_inst.post_execute()
+
+ def call_add_args(self, parser):
+ for pgn_inst in self.plugin_instances:
+ parser = pgn_inst.add_args(parser)
+ return parser
+
+ def call_check_args(self, args, remaining):
+ for pgn_inst in self.plugin_instances:
+ pgn_inst.check_args(args, remaining)
+
+ def call_adjust_command(self, stage, command):
+ for pgn_inst in self.plugin_instances:
+ command = pgn_inst.adjust_command(stage, command)
+ return command
+
+ @staticmethod
+ def _make_argparser(args):
+ self.argparser = argparse.ArgumentParser(
+ description='Linux TC unit tests')
+
+
+def replace_keywords(cmd):
+ """
+ For a given executable command, substitute any known
+ variables contained within NAMES with the correct values
+ """
+ tcmd = Template(cmd)
+ subcmd = tcmd.safe_substitute(NAMES)
+ return subcmd
+
+
+def exec_cmd(args, pm, stage, command):
+ """
+ Perform any required modifications on an executable command, then run
+ it in a subprocess and return the results.
+ """
+ if len(command.strip()) == 0:
+ return None, None
+ if '$' in command:
+ command = replace_keywords(command)
+
+ command = pm.call_adjust_command(stage, command)
+ if args.verbose > 0:
+ print('command "{}"'.format(command))
+ proc = subprocess.Popen(command,
+ shell=True,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ env=ENVIR)
+ (rawout, serr) = proc.communicate()
+
+ if proc.returncode != 0 and len(serr) > 0:
+ foutput = serr.decode("utf-8", errors="ignore")
+ else:
+ foutput = rawout.decode("utf-8", errors="ignore")
+
+ proc.stdout.close()
+ proc.stderr.close()
+ return proc, foutput
+
+
+def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
+ """
+ Execute the setup/teardown commands for a test case.
+ Optionally terminate test execution if the command fails.
+ """
+ if args.verbose > 0:
+ print('{}'.format(prefix))
+ for cmdinfo in cmdlist:
+ if isinstance(cmdinfo, list):
+ exit_codes = cmdinfo[1:]
+ cmd = cmdinfo[0]
+ else:
+ exit_codes = [0]
+ cmd = cmdinfo
+
+ if not cmd:
+ continue
+
+ (proc, foutput) = exec_cmd(args, pm, stage, cmd)
+
+ if proc and (proc.returncode not in exit_codes):
+ print('', file=sys.stderr)
+ print("{} *** Could not execute: \"{}\"".format(prefix, cmd),
+ file=sys.stderr)
+ print("\n{} *** Error message: \"{}\"".format(prefix, foutput),
+ file=sys.stderr)
+ print("returncode {}; expected {}".format(proc.returncode,
+ exit_codes))
+ print("\n{} *** Aborting test run.".format(prefix), file=sys.stderr)
+ print("\n\n{} *** stdout ***".format(proc.stdout), file=sys.stderr)
+ print("\n\n{} *** stderr ***".format(proc.stderr), file=sys.stderr)
+ raise PluginMgrTestFail(
+ stage, output,
+ '"{}" did not complete successfully'.format(prefix))
+
+def run_one_test(pm, args, index, tidx):
+ global NAMES
+ result = True
+ tresult = ""
+ tap = ""
+ if args.verbose > 0:
+ print("\t====================\n=====> ", end="")
+ print("Test " + tidx["id"] + ": " + tidx["name"])
+
+ # populate NAMES with TESTID for this test
+ NAMES['TESTID'] = tidx['id']
+
+ pm.call_pre_case(index, tidx['id'])
+ prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"])
+
+ if (args.verbose > 0):
+ print('-----> execute stage')
+ pm.call_pre_execute()
+ (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"])
+ if p:
+ exit_code = p.returncode
+ else:
+ exit_code = None
+
+ pm.call_post_execute()
+
+ if (exit_code is None or exit_code != int(tidx["expExitCode"])):
+ result = False
+ print("exit: {!r}".format(exit_code))
+ print("exit: {}".format(int(tidx["expExitCode"])))
+ #print("exit: {!r} {}".format(exit_code, int(tidx["expExitCode"])))
+ print(procout)
+ else:
+ if args.verbose > 0:
+ print('-----> verify stage')
+ match_pattern = re.compile(
+ str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
+ (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
+ if procout:
+ match_index = re.findall(match_pattern, procout)
+ if len(match_index) != int(tidx["matchCount"]):
+ result = False
+ elif int(tidx["matchCount"]) != 0:
+ result = False
+
+ if not result:
+ tresult += 'not '
+ tresult += 'ok {} - {} # {}\n'.format(str(index), tidx['id'], tidx['name'])
+ tap += tresult
+
+ if result == False:
+ if procout:
+ tap += procout
+ else:
+ tap += 'No output!\n'
+
+ prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
+ pm.call_post_case()
+
+ index += 1
+
+ # remove TESTID from NAMES
+ del(NAMES['TESTID'])
+ return tap
+
+def test_runner(pm, args, filtered_tests):
+ """
+ Driver function for the unit tests.
+
+ Prints information about the tests being run, executes the setup and
+ teardown commands and the command under test itself. Also determines
+ success/failure based on the information in the test case and generates
+ TAP output accordingly.
+ """
+ testlist = filtered_tests
+ tcount = len(testlist)
+ index = 1
+ tap = ''
+ badtest = None
+ stage = None
+ emergency_exit = False
+ emergency_exit_message = ''
+
+ if args.notap:
+ if args.verbose:
+ tap = 'notap requested: omitting test plan\n'
+ else:
+ tap = str(index) + ".." + str(tcount) + "\n"
+ try:
+ pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
+ except Exception as ee:
+ ex_type, ex, ex_tb = sys.exc_info()
+ print('Exception {} {} (caught in pre_suite).'.
+ format(ex_type, ex))
+ # when the extra print statements are uncommented,
+ # the traceback does not appear between them
+ # (it appears way earlier in the tdc.py output)
+ # so don't bother ...
+ # print('--------------------(')
+ # print('traceback')
+ traceback.print_tb(ex_tb)
+ # print('--------------------)')
+ emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex)
+ emergency_exit = True
+ stage = 'pre-SUITE'
+
+ if emergency_exit:
+ pm.call_post_suite(index)
+ return emergency_exit_message
+ if args.verbose > 1:
+ print('give test rig 2 seconds to stabilize')
+ time.sleep(2)
+ for tidx in testlist:
+ if "flower" in tidx["category"] and args.device == None:
+ if args.verbose > 1:
+ print('Not executing test {} {} because DEV2 not defined'.
+ format(tidx['id'], tidx['name']))
+ continue
+ try:
+ badtest = tidx # in case it goes bad
+ tap += run_one_test(pm, args, index, tidx)
+ except PluginMgrTestFail as pmtf:
+ ex_type, ex, ex_tb = sys.exc_info()
+ stage = pmtf.stage
+ message = pmtf.message
+ output = pmtf.output
+ print(message)
+ print('Exception {} {} (caught in test_runner, running test {} {} {} stage {})'.
+ format(ex_type, ex, index, tidx['id'], tidx['name'], stage))
+ print('---------------')
+ print('traceback')
+ traceback.print_tb(ex_tb)
+ print('---------------')
+ if stage == 'teardown':
+ print('accumulated output for this test:')
+ if pmtf.output:
+ print(pmtf.output)
+ print('---------------')
+ break
+ index += 1
+
+ # if we failed in setup or teardown,
+ # fill in the remaining tests with ok-skipped
+ count = index
+ if not args.notap:
+ tap += 'about to flush the tap output if tests need to be skipped\n'
+ if tcount + 1 != index:
+ for tidx in testlist[index - 1:]:
+ msg = 'skipped - previous {} failed'.format(stage)
+ tap += 'ok {} - {} # {} {} {}\n'.format(
+ count, tidx['id'], msg, index, badtest.get('id', '--Unknown--'))
+ count += 1
+
+ tap += 'done flushing skipped test tap output\n'
+
+ if args.pause:
+ print('Want to pause\nPress enter to continue ...')
+ if input(sys.stdin):
+ print('got something on stdin')
+
+ pm.call_post_suite(index)
+
+ return tap
+
+def has_blank_ids(idlist):
+ """
+ Search the list for empty ID fields and return true/false accordingly.
+ """
+ return not(all(k for k in idlist))
+
+
+def load_from_file(filename):
+ """
+ Open the JSON file containing the test cases and return them
+ as list of ordered dictionary objects.
+ """
+ try:
+ with open(filename) as test_data:
+ testlist = json.load(test_data, object_pairs_hook=OrderedDict)
+ except json.JSONDecodeError as jde:
+ print('IGNORING test case file {}\n\tBECAUSE: {}'.format(filename, jde))
+ testlist = list()
+ else:
+ idlist = get_id_list(testlist)
+ if (has_blank_ids(idlist)):
+ for k in testlist:
+ k['filename'] = filename
+ return testlist
+
+
+def args_parse():
+ """
+ Create the argument parser.
+ """
+ parser = argparse.ArgumentParser(description='Linux TC unit tests')
+ return parser
+
+
+def set_args(parser):
+ """
+ Set the command line arguments for tdc.
+ """
+ parser.add_argument(
+ '-p', '--path', type=str,
+ help='The full path to the tc executable to use')
+ sg = parser.add_argument_group(
+ 'selection', 'select which test cases: ' +
+ 'files plus directories; filtered by categories plus testids')
+ ag = parser.add_argument_group(
+ 'action', 'select action to perform on selected test cases')
+
+ sg.add_argument(
+ '-D', '--directory', nargs='+', metavar='DIR',
+ help='Collect tests from the specified directory(ies) ' +
+ '(default [tc-tests])')
+ sg.add_argument(
+ '-f', '--file', nargs='+', metavar='FILE',
+ help='Run tests from the specified file(s)')
+ sg.add_argument(
+ '-c', '--category', nargs='*', metavar='CATG', default=['+c'],
+ help='Run tests only from the specified category/ies, ' +
+ 'or if no category/ies is/are specified, list known categories.')
+ sg.add_argument(
+ '-e', '--execute', nargs='+', metavar='ID',
+ help='Execute the specified test cases with specified IDs')
+ ag.add_argument(
+ '-l', '--list', action='store_true',
+ help='List all test cases, or those only within the specified category')
+ ag.add_argument(
+ '-s', '--show', action='store_true', dest='showID',
+ help='Display the selected test cases')
+ ag.add_argument(
+ '-i', '--id', action='store_true', dest='gen_id',
+ help='Generate ID numbers for new test cases')
+ parser.add_argument(
+ '-v', '--verbose', action='count', default=0,
+ help='Show the commands that are being run')
+ parser.add_argument(
+ '-N', '--notap', action='store_true',
+ help='Suppress tap results for command under test')
+ parser.add_argument('-d', '--device',
+ help='Execute the test case in flower category')
+ parser.add_argument(
+ '-P', '--pause', action='store_true',
+ help='Pause execution just before post-suite stage')
+ return parser
+
+
+def check_default_settings(args, remaining, pm):
+ """
+ Process any arguments overriding the default settings,
+ and ensure the settings are correct.
+ """
+ # Allow for overriding specific settings
+ global NAMES
+
+ if args.path != None:
+ NAMES['TC'] = args.path
+ if args.device != None:
+ NAMES['DEV2'] = args.device
+ if not os.path.isfile(NAMES['TC']):
+ print("The specified tc path " + NAMES['TC'] + " does not exist.")
+ exit(1)
+
+ pm.call_check_args(args, remaining)
+
+
+def get_id_list(alltests):
+ """
+ Generate a list of all IDs in the test cases.
+ """
+ return [x["id"] for x in alltests]
+
+
+def check_case_id(alltests):
+ """
+ Check for duplicate test case IDs.
+ """
+ idl = get_id_list(alltests)
+ return [x for x in idl if idl.count(x) > 1]
+
+
+def does_id_exist(alltests, newid):
+ """
+ Check if a given ID already exists in the list of test cases.
+ """
+ idl = get_id_list(alltests)
+ return (any(newid == x for x in idl))
+
+
+def generate_case_ids(alltests):
+ """
+ If a test case has a blank ID field, generate a random hex ID for it
+ and then write the test cases back to disk.
+ """
+ import random
+ for c in alltests:
+ if (c["id"] == ""):
+ while True:
+ newid = str('{:04x}'.format(random.randrange(16**4)))
+ if (does_id_exist(alltests, newid)):
+ continue
+ else:
+ c['id'] = newid
+ break
+
+ ufilename = []
+ for c in alltests:
+ if ('filename' in c):
+ ufilename.append(c['filename'])
+ ufilename = get_unique_item(ufilename)
+ for f in ufilename:
+ testlist = []
+ for t in alltests:
+ if 'filename' in t:
+ if t['filename'] == f:
+ del t['filename']
+ testlist.append(t)
+ outfile = open(f, "w")
+ json.dump(testlist, outfile, indent=4)
+ outfile.write("\n")
+ outfile.close()
+
+def filter_tests_by_id(args, testlist):
+ '''
+ Remove tests from testlist that are not in the named id list.
+ If id list is empty, return empty list.
+ '''
+ newlist = list()
+ if testlist and args.execute:
+ target_ids = args.execute
+
+ if isinstance(target_ids, list) and (len(target_ids) > 0):
+ newlist = list(filter(lambda x: x['id'] in target_ids, testlist))
+ return newlist
+
+def filter_tests_by_category(args, testlist):
+ '''
+ Remove tests from testlist that are not in a named category.
+ '''
+ answer = list()
+ if args.category and testlist:
+ test_ids = list()
+ for catg in set(args.category):
+ if catg == '+c':
+ continue
+ print('considering category {}'.format(catg))
+ for tc in testlist:
+ if catg in tc['category'] and tc['id'] not in test_ids:
+ answer.append(tc)
+ test_ids.append(tc['id'])
+
+ return answer
+
+def get_test_cases(args):
+ """
+ If a test case file is specified, retrieve tests from that file.
+ Otherwise, glob for all json files in subdirectories and load from
+ each one.
+ Also, if requested, filter by category, and add tests matching
+ certain ids.
+ """
+ import fnmatch
+
+ flist = []
+ testdirs = ['tc-tests']
+
+ if args.file:
+ # at least one file was specified - remove the default directory
+ testdirs = []
+
+ for ff in args.file:
+ if not os.path.isfile(ff):
+ print("IGNORING file " + ff + "\n\tBECAUSE does not exist.")
+ else:
+ flist.append(os.path.abspath(ff))
+
+ if args.directory:
+ testdirs = args.directory
+
+ for testdir in testdirs:
+ for root, dirnames, filenames in os.walk(testdir):
+ for filename in fnmatch.filter(filenames, '*.json'):
+ candidate = os.path.abspath(os.path.join(root, filename))
+ if candidate not in testdirs:
+ flist.append(candidate)
+
+ alltestcases = list()
+ for casefile in flist:
+ alltestcases = alltestcases + (load_from_file(casefile))
+
+ allcatlist = get_test_categories(alltestcases)
+ allidlist = get_id_list(alltestcases)
+
+ testcases_by_cats = get_categorized_testlist(alltestcases, allcatlist)
+ idtestcases = filter_tests_by_id(args, alltestcases)
+ cattestcases = filter_tests_by_category(args, alltestcases)
+
+ cat_ids = [x['id'] for x in cattestcases]
+ if args.execute:
+ if args.category:
+ alltestcases = cattestcases + [x for x in idtestcases if x['id'] not in cat_ids]
+ else:
+ alltestcases = idtestcases
+ else:
+ if cat_ids:
+ alltestcases = cattestcases
+ else:
+ # just accept the existing value of alltestcases,
+ # which has been filtered by file/directory
+ pass
+
+ return allcatlist, allidlist, testcases_by_cats, alltestcases
+
+
+def set_operation_mode(pm, args):
+ """
+ Load the test case data and process remaining arguments to determine
+ what the script should do for this run, and call the appropriate
+ function.
+ """
+ ucat, idlist, testcases, alltests = get_test_cases(args)
+
+ if args.gen_id:
+ if (has_blank_ids(idlist)):
+ alltests = generate_case_ids(alltests)
+ else:
+ print("No empty ID fields found in test files.")
+ exit(0)
+
+ duplicate_ids = check_case_id(alltests)
+ if (len(duplicate_ids) > 0):
+ print("The following test case IDs are not unique:")
+ print(str(set(duplicate_ids)))
+ print("Please correct them before continuing.")
+ exit(1)
+
+ if args.showID:
+ for atest in alltests:
+ print_test_case(atest)
+ exit(0)
+
+ if isinstance(args.category, list) and (len(args.category) == 0):
+ print("Available categories:")
+ print_sll(ucat)
+ exit(0)
+
+ if args.list:
+ if args.list:
+ list_test_cases(alltests)
+ exit(0)
+
+ if len(alltests):
+ catresults = test_runner(pm, args, alltests)
+ else:
+ catresults = 'No tests found\n'
+ if args.notap:
+ print('Tap output suppression requested\n')
+ else:
+ print('All test results: \n\n{}'.format(catresults))
+
+def main():
+ """
+ Start of execution; set up argument parser and get the arguments,
+ and start operations.
+ """
+ parser = args_parse()
+ parser = set_args(parser)
+ pm = PluginMgr(parser)
+ parser = pm.call_add_args(parser)
+ (args, remaining) = parser.parse_known_args()
+ args.NAMES = NAMES
+ check_default_settings(args, remaining, pm)
+ if args.verbose > 2:
+ print('args is {}'.format(args))
+
+ set_operation_mode(pm, args)
+
+ exit(0)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
new file mode 100755
index 000000000..3d8350d6b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+"""
+tdc_batch.py - a script to generate TC batch file
+
+Copyright (C) 2017 Chris Mi <chrism@mellanox.com>
+"""
+
+import argparse
+
+parser = argparse.ArgumentParser(description='TC batch file generator')
+parser.add_argument("device", help="device name")
+parser.add_argument("file", help="batch file name")
+parser.add_argument("-n", "--number", type=int,
+ help="how many lines in batch file")
+parser.add_argument("-o", "--skip_sw",
+ help="skip_sw (offload), by default skip_hw",
+ action="store_true")
+parser.add_argument("-s", "--share_action",
+ help="all filters share the same action",
+ action="store_true")
+parser.add_argument("-p", "--prio",
+ help="all filters have different prio",
+ action="store_true")
+args = parser.parse_args()
+
+device = args.device
+file = open(args.file, 'w')
+
+number = 1
+if args.number:
+ number = args.number
+
+skip = "skip_hw"
+if args.skip_sw:
+ skip = "skip_sw"
+
+share_action = ""
+if args.share_action:
+ share_action = "index 1"
+
+prio = "prio 1"
+if args.prio:
+ prio = ""
+ if number > 0x4000:
+ number = 0x4000
+
+index = 0
+for i in range(0x100):
+ for j in range(0x100):
+ for k in range(0x100):
+ mac = ("{:02x}:{:02x}:{:02x}".format(i, j, k))
+ src_mac = "e4:11:00:" + mac
+ dst_mac = "e4:12:00:" + mac
+ cmd = ("filter add dev {} {} protocol ip parent ffff: flower {} "
+ "src_mac {} dst_mac {} action drop {}".format
+ (device, prio, skip, src_mac, dst_mac, share_action))
+ file.write("{}\n".format(cmd))
+ index += 1
+ if index >= number:
+ file.close()
+ exit(0)
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
new file mode 100644
index 000000000..d651bc150
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -0,0 +1,36 @@
+"""
+# SPDX-License-Identifier: GPL-2.0
+tdc_config.py - tdc user-specified values
+
+Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com>
+"""
+
+# Dictionary containing all values that can be substituted in executable
+# commands.
+NAMES = {
+ # Substitute your own tc path here
+ 'TC': '/sbin/tc',
+ # Name of veth devices to be created for the namespace
+ 'DEV0': 'v0p0',
+ 'DEV1': 'v0p1',
+ 'DEV2': '',
+ 'BATCH_FILE': './batch.txt',
+ # Name of the namespace to use
+ 'NS': 'tcut',
+ # Directory containing eBPF test programs
+ 'EBPFDIR': './bpf'
+ }
+
+
+ENVIR = { }
+
+# put customizations in tdc_config_local.py
+try:
+ from tdc_config_local import *
+except ImportError as ie:
+ pass
+
+try:
+ NAMES.update(EXTRA_NAMES)
+except NameError as ne:
+ pass
diff --git a/tools/testing/selftests/tc-testing/tdc_config_local_template.py b/tools/testing/selftests/tc-testing/tdc_config_local_template.py
new file mode 100644
index 000000000..d48fc732a
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_config_local_template.py
@@ -0,0 +1,23 @@
+"""
+tdc_config_local.py - tdc plugin-writer-specified values
+
+Copyright (C) 2017 bjb@mojatatu.com
+"""
+
+import os
+
+ENVIR = os.environ.copy()
+
+ENV_LD_LIBRARY_PATH = os.getenv('LD_LIBRARY_PATH', '')
+ENV_OTHER_LIB = os.getenv('OTHER_LIB', '')
+
+
+# example adding value to NAMES, without editing tdc_config.py
+EXTRA_NAMES = dict()
+EXTRA_NAMES['SOME_BIN'] = os.path.join(os.getenv('OTHER_BIN', ''), 'some_bin')
+
+
+# example adding values to ENVIR, without editing tdc_config.py
+ENVIR['VALGRIND_LIB'] = '/usr/lib/valgrind'
+ENVIR['VALGRIND_BIN'] = '/usr/bin/valgrind'
+ENVIR['VGDB_BIN'] = '/usr/bin/vgdb'
diff --git a/tools/testing/selftests/tc-testing/tdc_helper.py b/tools/testing/selftests/tc-testing/tdc_helper.py
new file mode 100644
index 000000000..9f35c96c8
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc_helper.py
@@ -0,0 +1,67 @@
+"""
+# SPDX-License-Identifier: GPL-2.0
+tdc_helper.py - tdc helper functions
+
+Copyright (C) 2017 Lucas Bates <lucasb@mojatatu.com>
+"""
+
+def get_categorized_testlist(alltests, ucat):
+ """ Sort the master test list into categories. """
+ testcases = dict()
+
+ for category in ucat:
+ testcases[category] = list(filter(lambda x: category in x['category'], alltests))
+
+ return(testcases)
+
+
+def get_unique_item(lst):
+ """ For a list, return a list of the unique items in the list. """
+ return list(set(lst))
+
+
+def get_test_categories(alltests):
+ """ Discover all unique test categories present in the test case file. """
+ ucat = []
+ for t in alltests:
+ ucat.extend(get_unique_item(t['category']))
+ ucat = get_unique_item(ucat)
+ return ucat
+
+def list_test_cases(testlist):
+ """ Print IDs and names of all test cases. """
+ for curcase in testlist:
+ print(curcase['id'] + ': (' + ', '.join(curcase['category']) + ") " + curcase['name'])
+
+
+def list_categories(testlist):
+ """ Show all categories that are present in a test case file. """
+ categories = set(map(lambda x: x['category'], testlist))
+ print("Available categories:")
+ print(", ".join(str(s) for s in categories))
+ print("")
+
+
+def print_list(cmdlist):
+ """ Print a list of strings prepended with a tab. """
+ for l in cmdlist:
+ if (type(l) == list):
+ print("\t" + str(l[0]))
+ else:
+ print("\t" + str(l))
+
+
+def print_sll(items):
+ print("\n".join(str(s) for s in items))
+
+
+def print_test_case(tcase):
+ """ Pretty-printing of a given test case. """
+ print('\n==============\nTest {}\t{}\n'.format(tcase['id'], tcase['name']))
+ for k in tcase.keys():
+ if (isinstance(tcase[k], list)):
+ print(k + ":")
+ print_list(tcase[k])
+ else:
+ if not ((k == 'id') or (k == 'name')):
+ print(k + ": " + str(tcase[k]))
diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore
new file mode 100644
index 000000000..32a9eadb2
--- /dev/null
+++ b/tools/testing/selftests/timers/.gitignore
@@ -0,0 +1,21 @@
+alarmtimer-suspend
+change_skew
+clocksource-switch
+inconsistency-check
+leap-a-day
+leapcrash
+mqueue-lat
+nanosleep
+nsleep-lat
+posix_timers
+raw_skew
+rtcpie
+set-2038
+set-tai
+set-timer-lat
+skew_consistency
+threadtest
+valid-adjtimex
+adjtick
+set-tz
+freq-step
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
new file mode 100644
index 000000000..7656c7ce7
--- /dev/null
+++ b/tools/testing/selftests/timers/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -O3 -Wl,-no-as-needed -Wall
+LDLIBS += -lrt -lpthread -lm
+
+# these are all "safe" tests that don't modify
+# system time or require escalated privileges
+TEST_GEN_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \
+ inconsistency-check raw_skew threadtest rtcpie
+
+DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
+ skew_consistency clocksource-switch freq-step leap-a-day \
+ leapcrash set-tai set-2038 set-tz
+
+TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS)
+
+
+include ../lib.mk
+
+# these tests require escalated privileges
+# and may modify the system time or trigger
+# other behavior like suspend
+run_destructive_tests: run_tests
+ $(call RUN_TESTS, $(DESTRUCTIVE_TESTS))
diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c
new file mode 100644
index 000000000..54d8d87f3
--- /dev/null
+++ b/tools/testing/selftests/timers/adjtick.c
@@ -0,0 +1,211 @@
+/* adjtimex() tick adjustment test
+ * by: John Stultz <john.stultz@linaro.org>
+ * (C) Copyright Linaro Limited 2015
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc adjtick.c -o adjtick -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+
+#include "../kselftest.h"
+
+#define CLOCK_MONOTONIC_RAW 4
+
+#define NSEC_PER_SEC 1000000000LL
+#define USEC_PER_SEC 1000000
+
+#define MILLION 1000000
+
+long systick;
+
+long long llabs(long long val)
+{
+ if (val < 0)
+ val = -val;
+ return val;
+}
+
+unsigned long long ts_to_nsec(struct timespec ts)
+{
+ return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+struct timespec nsec_to_ts(long long ns)
+{
+ struct timespec ts;
+
+ ts.tv_sec = ns/NSEC_PER_SEC;
+ ts.tv_nsec = ns%NSEC_PER_SEC;
+
+ return ts;
+}
+
+long long diff_timespec(struct timespec start, struct timespec end)
+{
+ long long start_ns, end_ns;
+
+ start_ns = ts_to_nsec(start);
+ end_ns = ts_to_nsec(end);
+
+ return end_ns - start_ns;
+}
+
+void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw)
+{
+ struct timespec start, mid, end;
+ long long diff = 0, tmp;
+ int i;
+
+ clock_gettime(CLOCK_MONOTONIC, mon);
+ clock_gettime(CLOCK_MONOTONIC_RAW, raw);
+
+ /* Try to get a more tightly bound pairing */
+ for (i = 0; i < 3; i++) {
+ long long newdiff;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ clock_gettime(CLOCK_MONOTONIC_RAW, &mid);
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ newdiff = diff_timespec(start, end);
+ if (diff == 0 || newdiff < diff) {
+ diff = newdiff;
+ *raw = mid;
+ tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2;
+ *mon = nsec_to_ts(tmp);
+ }
+ }
+}
+
+long long get_ppm_drift(void)
+{
+ struct timespec mon_start, raw_start, mon_end, raw_end;
+ long long delta1, delta2, eppm;
+
+ get_monotonic_and_raw(&mon_start, &raw_start);
+
+ sleep(15);
+
+ get_monotonic_and_raw(&mon_end, &raw_end);
+
+ delta1 = diff_timespec(mon_start, mon_end);
+ delta2 = diff_timespec(raw_start, raw_end);
+
+ eppm = (delta1*MILLION)/delta2 - MILLION;
+
+ return eppm;
+}
+
+int check_tick_adj(long tickval)
+{
+ long long eppm, ppm;
+ struct timex tx1;
+
+ tx1.modes = ADJ_TICK;
+ tx1.modes |= ADJ_OFFSET;
+ tx1.modes |= ADJ_FREQUENCY;
+ tx1.modes |= ADJ_STATUS;
+
+ tx1.status = STA_PLL;
+ tx1.offset = 0;
+ tx1.freq = 0;
+ tx1.tick = tickval;
+
+ adjtimex(&tx1);
+
+ sleep(1);
+
+ ppm = ((long long)tickval * MILLION)/systick - MILLION;
+ printf("Estimating tick (act: %ld usec, %lld ppm): ", tickval, ppm);
+
+ eppm = get_ppm_drift();
+ printf("%lld usec, %lld ppm", systick + (systick * eppm / MILLION), eppm);
+ fflush(stdout);
+
+ tx1.modes = 0;
+ adjtimex(&tx1);
+
+ if (tx1.offset || tx1.freq || tx1.tick != tickval) {
+ printf(" [ERROR]\n");
+ printf("\tUnexpected adjtimex return values, make sure ntpd is not running.\n");
+ return -1;
+ }
+
+ /*
+ * Here we use 100ppm difference as an error bound.
+ * We likely should see better, but some coarse clocksources
+ * cannot match the HZ tick size accurately, so we have a
+ * internal correction factor that doesn't scale exactly
+ * with the adjustment, resulting in > 10ppm error during
+ * a 10% adjustment. 100ppm also gives us more breathing
+ * room for interruptions during the measurement.
+ */
+ if (llabs(eppm - ppm) > 100) {
+ printf(" [FAILED]\n");
+ return -1;
+ }
+ printf(" [OK]\n");
+
+ return 0;
+}
+
+int main(int argv, char **argc)
+{
+ struct timespec raw;
+ long tick, max, interval, err;
+ struct timex tx1;
+
+ err = 0;
+ setbuf(stdout, NULL);
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) {
+ printf("ERR: NO CLOCK_MONOTONIC_RAW\n");
+ return -1;
+ }
+
+ printf("Each iteration takes about 15 seconds\n");
+
+ systick = sysconf(_SC_CLK_TCK);
+ systick = USEC_PER_SEC/sysconf(_SC_CLK_TCK);
+ max = systick/10; /* +/- 10% */
+ interval = max/4; /* in 4 steps each side */
+
+ for (tick = (systick - max); tick < (systick + max); tick += interval) {
+ if (check_tick_adj(tick)) {
+ err = 1;
+ break;
+ }
+ }
+
+ /* Reset things to zero */
+ tx1.modes = ADJ_TICK;
+ tx1.modes |= ADJ_OFFSET;
+ tx1.modes |= ADJ_FREQUENCY;
+
+ tx1.offset = 0;
+ tx1.freq = 0;
+ tx1.tick = systick;
+
+ adjtimex(&tx1);
+
+ if (err)
+ return ksft_exit_fail();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
new file mode 100644
index 000000000..4da09dbf8
--- /dev/null
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -0,0 +1,178 @@
+/* alarmtimer suspend test
+ * John Stultz (john.stultz@linaro.org)
+ * (C) Copyright Linaro 2013
+ * Licensed under the GPLv2
+ *
+ * This test makes sure the alarmtimer & RTC wakeup code is
+ * functioning.
+ *
+ * To build:
+ * $ gcc alarmtimer-suspend.c -o alarmtimer-suspend -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include "../kselftest.h"
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNREASONABLE_LAT (NSEC_PER_SEC * 5) /* hopefully we resume in 5 secs */
+
+#define SUSPEND_SECS 15
+int alarmcount;
+int alarm_clock_id;
+struct timespec start_time;
+
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+ long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+ ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+ return ret;
+}
+
+int final_ret = 0;
+
+void sigalarm(int signo)
+{
+ long long delta_ns;
+ struct timespec ts;
+
+ clock_gettime(alarm_clock_id, &ts);
+ alarmcount++;
+
+ delta_ns = timespec_sub(start_time, ts);
+ delta_ns -= NSEC_PER_SEC * SUSPEND_SECS * alarmcount;
+
+ printf("ALARM(%i): %ld:%ld latency: %lld ns ", alarmcount, ts.tv_sec,
+ ts.tv_nsec, delta_ns);
+
+ if (delta_ns > UNREASONABLE_LAT) {
+ printf("[FAIL]\n");
+ final_ret = -1;
+ } else
+ printf("[OK]\n");
+
+}
+
+int main(void)
+{
+ timer_t tm1;
+ struct itimerspec its1, its2;
+ struct sigevent se;
+ struct sigaction act;
+ int signum = SIGRTMAX;
+
+ /* Set up signal handler: */
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = sigalarm;
+ sigaction(signum, &act, NULL);
+
+ /* Set up timer: */
+ memset(&se, 0, sizeof(se));
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = signum;
+ se.sigev_value.sival_int = 0;
+
+ for (alarm_clock_id = CLOCK_REALTIME_ALARM;
+ alarm_clock_id <= CLOCK_BOOTTIME_ALARM;
+ alarm_clock_id++) {
+
+ alarmcount = 0;
+ if (timer_create(alarm_clock_id, &se, &tm1) == -1) {
+ printf("timer_create failed, %s unsupported?\n",
+ clockstring(alarm_clock_id));
+ break;
+ }
+
+ clock_gettime(alarm_clock_id, &start_time);
+ printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id),
+ start_time.tv_sec, start_time.tv_nsec);
+ printf("Setting alarm for every %i seconds\n", SUSPEND_SECS);
+ its1.it_value = start_time;
+ its1.it_value.tv_sec += SUSPEND_SECS;
+ its1.it_interval.tv_sec = SUSPEND_SECS;
+ its1.it_interval.tv_nsec = 0;
+
+ timer_settime(tm1, TIMER_ABSTIME, &its1, &its2);
+
+ while (alarmcount < 5)
+ sleep(1); /* First 5 alarms, do nothing */
+
+ printf("Starting suspend loops\n");
+ while (alarmcount < 10) {
+ int ret;
+
+ sleep(3);
+ ret = system("echo mem > /sys/power/state");
+ if (ret)
+ break;
+ }
+ timer_delete(tm1);
+ }
+ if (final_ret)
+ return ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
new file mode 100644
index 000000000..c4eab7124
--- /dev/null
+++ b/tools/testing/selftests/timers/change_skew.c
@@ -0,0 +1,96 @@
+/* ADJ_FREQ Skew change test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2012
+ * Licensed under the GPLv2
+ *
+ * NOTE: This is a meta-test which cranks the ADJ_FREQ knob and
+ * then uses other tests to detect problems. Thus this test requires
+ * that the raw_skew, inconsistency-check and nanosleep tests be
+ * present in the same directory it is run from.
+ *
+ * To build:
+ * $ gcc change_skew.c -o change_skew -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000LL
+
+
+int change_skew_test(int ppm)
+{
+ struct timex tx;
+ int ret;
+
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = ppm << 16;
+
+ ret = adjtimex(&tx);
+ if (ret < 0) {
+ printf("Error adjusting freq\n");
+ return ret;
+ }
+
+ ret = system("./raw_skew");
+ ret |= system("./inconsistency-check");
+ ret |= system("./nanosleep");
+
+ return ret;
+}
+
+
+int main(int argv, char **argc)
+{
+ struct timex tx;
+ int i, ret;
+
+ int ppm[5] = {0, 250, 500, -250, -500};
+
+ /* Kill ntpd */
+ ret = system("killall -9 ntpd");
+
+ /* Make sure there's no offset adjustment going on */
+ tx.modes = ADJ_OFFSET;
+ tx.offset = 0;
+ ret = adjtimex(&tx);
+
+ if (ret < 0) {
+ printf("Maybe you're not running as root?\n");
+ return -1;
+ }
+
+ for (i = 0; i < 5; i++) {
+ printf("Using %i ppm adjustment\n", ppm[i]);
+ ret = change_skew_test(ppm[i]);
+ if (ret)
+ break;
+ }
+
+ /* Set things back */
+ tx.modes = ADJ_FREQUENCY;
+ tx.offset = 0;
+ adjtimex(&tx);
+
+ if (ret) {
+ printf("[FAIL]");
+ return ksft_exit_fail();
+ }
+ printf("[OK]");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
new file mode 100644
index 000000000..bfc974b45
--- /dev/null
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -0,0 +1,168 @@
+/* Clocksource change test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2012
+ * Licensed under the GPLv2
+ *
+ * NOTE: This is a meta-test which quickly changes the clocksourc and
+ * then uses other tests to detect problems. Thus this test requires
+ * that the inconsistency-check and nanosleep tests be present in the
+ * same directory it is run from.
+ *
+ * To build:
+ * $ gcc clocksource-switch.c -o clocksource-switch -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/wait.h>
+#include "../kselftest.h"
+
+
+int get_clocksources(char list[][30])
+{
+ int fd, i;
+ size_t size;
+ char buf[512];
+ char *head, *tmp;
+
+ fd = open("/sys/devices/system/clocksource/clocksource0/available_clocksource", O_RDONLY);
+
+ size = read(fd, buf, 512);
+
+ close(fd);
+
+ for (i = 0; i < 10; i++)
+ list[i][0] = '\0';
+
+ head = buf;
+ i = 0;
+ while (head - buf < size) {
+ /* Find the next space */
+ for (tmp = head; *tmp != ' '; tmp++) {
+ if (*tmp == '\n')
+ break;
+ if (*tmp == '\0')
+ break;
+ }
+ *tmp = '\0';
+ strcpy(list[i], head);
+ head = tmp + 1;
+ i++;
+ }
+
+ return i-1;
+}
+
+int get_cur_clocksource(char *buf, size_t size)
+{
+ int fd;
+
+ fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_RDONLY);
+
+ size = read(fd, buf, size);
+
+ return 0;
+}
+
+int change_clocksource(char *clocksource)
+{
+ int fd;
+ ssize_t size;
+
+ fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY);
+
+ if (fd < 0)
+ return -1;
+
+ size = write(fd, clocksource, strlen(clocksource));
+
+ if (size < 0)
+ return -1;
+
+ close(fd);
+ return 0;
+}
+
+
+int run_tests(int secs)
+{
+ int ret;
+ char buf[255];
+
+ sprintf(buf, "./inconsistency-check -t %i", secs);
+ ret = system(buf);
+ if (ret)
+ return ret;
+ ret = system("./nanosleep");
+ return ret;
+}
+
+
+char clocksource_list[10][30];
+
+int main(int argv, char **argc)
+{
+ char orig_clk[512];
+ int count, i, status;
+ pid_t pid;
+
+ get_cur_clocksource(orig_clk, 512);
+
+ count = get_clocksources(clocksource_list);
+
+ if (change_clocksource(clocksource_list[0])) {
+ printf("Error: You probably need to run this as root\n");
+ return -1;
+ }
+
+ /* Check everything is sane before we start switching asyncrhonously */
+ for (i = 0; i < count; i++) {
+ printf("Validating clocksource %s\n", clocksource_list[i]);
+ if (change_clocksource(clocksource_list[i])) {
+ status = -1;
+ goto out;
+ }
+ if (run_tests(5)) {
+ status = -1;
+ goto out;
+ }
+ }
+
+
+ printf("Running Asynchronous Switching Tests...\n");
+ pid = fork();
+ if (!pid)
+ return run_tests(60);
+
+ while (pid != waitpid(pid, &status, WNOHANG))
+ for (i = 0; i < count; i++)
+ if (change_clocksource(clocksource_list[i])) {
+ status = -1;
+ goto out;
+ }
+out:
+ change_clocksource(orig_clk);
+
+ if (status)
+ return ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/freq-step.c b/tools/testing/selftests/timers/freq-step.c
new file mode 100644
index 000000000..14a2b77fd
--- /dev/null
+++ b/tools/testing/selftests/timers/freq-step.c
@@ -0,0 +1,271 @@
+/*
+ * This test checks the response of the system clock to frequency
+ * steps made with adjtimex(). The frequency error and stability of
+ * the CLOCK_MONOTONIC clock relative to the CLOCK_MONOTONIC_RAW clock
+ * is measured in two intervals following the step. The test fails if
+ * values from the second interval exceed specified limits.
+ *
+ * Copyright (C) Miroslav Lichvar <mlichvar@redhat.com> 2017
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <math.h>
+#include <stdio.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#define SAMPLES 100
+#define SAMPLE_READINGS 10
+#define MEAN_SAMPLE_INTERVAL 0.1
+#define STEP_INTERVAL 1.0
+#define MAX_PRECISION 100e-9
+#define MAX_FREQ_ERROR 10e-6
+#define MAX_STDDEV 1000e-9
+
+#ifndef ADJ_SETOFFSET
+ #define ADJ_SETOFFSET 0x0100
+#endif
+
+struct sample {
+ double offset;
+ double time;
+};
+
+static time_t mono_raw_base;
+static time_t mono_base;
+static long user_hz;
+static double precision;
+static double mono_freq_offset;
+
+static double diff_timespec(struct timespec *ts1, struct timespec *ts2)
+{
+ return ts1->tv_sec - ts2->tv_sec + (ts1->tv_nsec - ts2->tv_nsec) / 1e9;
+}
+
+static double get_sample(struct sample *sample)
+{
+ double delay, mindelay = 0.0;
+ struct timespec ts1, ts2, ts3;
+ int i;
+
+ for (i = 0; i < SAMPLE_READINGS; i++) {
+ clock_gettime(CLOCK_MONOTONIC_RAW, &ts1);
+ clock_gettime(CLOCK_MONOTONIC, &ts2);
+ clock_gettime(CLOCK_MONOTONIC_RAW, &ts3);
+
+ ts1.tv_sec -= mono_raw_base;
+ ts2.tv_sec -= mono_base;
+ ts3.tv_sec -= mono_raw_base;
+
+ delay = diff_timespec(&ts3, &ts1);
+ if (delay <= 1e-9) {
+ i--;
+ continue;
+ }
+
+ if (!i || delay < mindelay) {
+ sample->offset = diff_timespec(&ts2, &ts1);
+ sample->offset -= delay / 2.0;
+ sample->time = ts1.tv_sec + ts1.tv_nsec / 1e9;
+ mindelay = delay;
+ }
+ }
+
+ return mindelay;
+}
+
+static void reset_ntp_error(void)
+{
+ struct timex txc;
+
+ txc.modes = ADJ_SETOFFSET;
+ txc.time.tv_sec = 0;
+ txc.time.tv_usec = 0;
+
+ if (adjtimex(&txc) < 0) {
+ perror("[FAIL] adjtimex");
+ ksft_exit_fail();
+ }
+}
+
+static void set_frequency(double freq)
+{
+ struct timex txc;
+ int tick_offset;
+
+ tick_offset = 1e6 * freq / user_hz;
+
+ txc.modes = ADJ_TICK | ADJ_FREQUENCY;
+ txc.tick = 1000000 / user_hz + tick_offset;
+ txc.freq = (1e6 * freq - user_hz * tick_offset) * (1 << 16);
+
+ if (adjtimex(&txc) < 0) {
+ perror("[FAIL] adjtimex");
+ ksft_exit_fail();
+ }
+}
+
+static void regress(struct sample *samples, int n, double *intercept,
+ double *slope, double *r_stddev, double *r_max)
+{
+ double x, y, r, x_sum, y_sum, xy_sum, x2_sum, r2_sum;
+ int i;
+
+ x_sum = 0.0, y_sum = 0.0, xy_sum = 0.0, x2_sum = 0.0;
+
+ for (i = 0; i < n; i++) {
+ x = samples[i].time;
+ y = samples[i].offset;
+
+ x_sum += x;
+ y_sum += y;
+ xy_sum += x * y;
+ x2_sum += x * x;
+ }
+
+ *slope = (xy_sum - x_sum * y_sum / n) / (x2_sum - x_sum * x_sum / n);
+ *intercept = (y_sum - *slope * x_sum) / n;
+
+ *r_max = 0.0, r2_sum = 0.0;
+
+ for (i = 0; i < n; i++) {
+ x = samples[i].time;
+ y = samples[i].offset;
+ r = fabs(x * *slope + *intercept - y);
+ if (*r_max < r)
+ *r_max = r;
+ r2_sum += r * r;
+ }
+
+ *r_stddev = sqrt(r2_sum / n);
+}
+
+static int run_test(int calibration, double freq_base, double freq_step)
+{
+ struct sample samples[SAMPLES];
+ double intercept, slope, stddev1, max1, stddev2, max2;
+ double freq_error1, freq_error2;
+ int i;
+
+ set_frequency(freq_base);
+
+ for (i = 0; i < 10; i++)
+ usleep(1e6 * MEAN_SAMPLE_INTERVAL / 10);
+
+ reset_ntp_error();
+
+ set_frequency(freq_base + freq_step);
+
+ for (i = 0; i < 10; i++)
+ usleep(rand() % 2000000 * STEP_INTERVAL / 10);
+
+ set_frequency(freq_base);
+
+ for (i = 0; i < SAMPLES; i++) {
+ usleep(rand() % 2000000 * MEAN_SAMPLE_INTERVAL);
+ get_sample(&samples[i]);
+ }
+
+ if (calibration) {
+ regress(samples, SAMPLES, &intercept, &slope, &stddev1, &max1);
+ mono_freq_offset = slope;
+ printf("CLOCK_MONOTONIC_RAW frequency offset: %11.3f ppm\n",
+ 1e6 * mono_freq_offset);
+ return 0;
+ }
+
+ regress(samples, SAMPLES / 2, &intercept, &slope, &stddev1, &max1);
+ freq_error1 = slope * (1.0 - mono_freq_offset) - mono_freq_offset -
+ freq_base;
+
+ regress(samples + SAMPLES / 2, SAMPLES / 2, &intercept, &slope,
+ &stddev2, &max2);
+ freq_error2 = slope * (1.0 - mono_freq_offset) - mono_freq_offset -
+ freq_base;
+
+ printf("%6.0f %+10.3f %6.0f %7.0f %+10.3f %6.0f %7.0f\t",
+ 1e6 * freq_step,
+ 1e6 * freq_error1, 1e9 * stddev1, 1e9 * max1,
+ 1e6 * freq_error2, 1e9 * stddev2, 1e9 * max2);
+
+ if (fabs(freq_error2) > MAX_FREQ_ERROR || stddev2 > MAX_STDDEV) {
+ printf("[FAIL]\n");
+ return 1;
+ }
+
+ printf("[OK]\n");
+ return 0;
+}
+
+static void init_test(void)
+{
+ struct timespec ts;
+ struct sample sample;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts)) {
+ perror("[FAIL] clock_gettime(CLOCK_MONOTONIC_RAW)");
+ ksft_exit_fail();
+ }
+
+ mono_raw_base = ts.tv_sec;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &ts)) {
+ perror("[FAIL] clock_gettime(CLOCK_MONOTONIC)");
+ ksft_exit_fail();
+ }
+
+ mono_base = ts.tv_sec;
+
+ user_hz = sysconf(_SC_CLK_TCK);
+
+ precision = get_sample(&sample) / 2.0;
+ printf("CLOCK_MONOTONIC_RAW+CLOCK_MONOTONIC precision: %.0f ns\t\t",
+ 1e9 * precision);
+
+ if (precision > MAX_PRECISION)
+ ksft_exit_skip("precision: %.0f ns > MAX_PRECISION: %.0f ns\n",
+ 1e9 * precision, 1e9 * MAX_PRECISION);
+
+ printf("[OK]\n");
+ srand(ts.tv_sec ^ ts.tv_nsec);
+
+ run_test(1, 0.0, 0.0);
+}
+
+int main(int argc, char **argv)
+{
+ double freq_base, freq_step;
+ int i, j, fails = 0;
+
+ init_test();
+
+ printf("Checking response to frequency step:\n");
+ printf(" Step 1st interval 2nd interval\n");
+ printf(" Freq Dev Max Freq Dev Max\n");
+
+ for (i = 2; i >= 0; i--) {
+ for (j = 0; j < 5; j++) {
+ freq_base = (rand() % (1 << 24) - (1 << 23)) / 65536e6;
+ freq_step = 10e-6 * (1 << (6 * i));
+ fails += run_test(0, freq_base, freq_step);
+ }
+ }
+
+ set_frequency(0.0);
+
+ if (fails)
+ return ksft_exit_fail();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c
new file mode 100644
index 000000000..022d3ffe3
--- /dev/null
+++ b/tools/testing/selftests/timers/inconsistency-check.c
@@ -0,0 +1,193 @@
+/* Time inconsistency check test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2003, 2004, 2005, 2012
+ * (C) Copyright Linaro Limited 2015
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc inconsistency-check.c -o inconsistency-check -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include "../kselftest.h"
+
+#define CALLS_PER_LOOP 64
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+ /* use unsigned to avoid false positives on 2038 rollover */
+ if ((unsigned long)a.tv_sec < (unsigned long)b.tv_sec)
+ return 1;
+ if ((unsigned long)a.tv_sec > (unsigned long)b.tv_sec)
+ return 0;
+ if (a.tv_nsec > b.tv_nsec)
+ return 0;
+ return 1;
+}
+
+
+
+int consistency_test(int clock_type, unsigned long seconds)
+{
+ struct timespec list[CALLS_PER_LOOP];
+ int i, inconsistent;
+ long now, then;
+ time_t t;
+ char *start_str;
+
+ clock_gettime(clock_type, &list[0]);
+ now = then = list[0].tv_sec;
+
+ /* timestamp start of test */
+ t = time(0);
+ start_str = ctime(&t);
+
+ while (seconds == -1 || now - then < seconds) {
+ inconsistent = -1;
+
+ /* Fill list */
+ for (i = 0; i < CALLS_PER_LOOP; i++)
+ clock_gettime(clock_type, &list[i]);
+
+ /* Check for inconsistencies */
+ for (i = 0; i < CALLS_PER_LOOP - 1; i++)
+ if (!in_order(list[i], list[i+1]))
+ inconsistent = i;
+
+ /* display inconsistency */
+ if (inconsistent >= 0) {
+ unsigned long long delta;
+
+ printf("\%s\n", start_str);
+ for (i = 0; i < CALLS_PER_LOOP; i++) {
+ if (i == inconsistent)
+ printf("--------------------\n");
+ printf("%lu:%lu\n", list[i].tv_sec,
+ list[i].tv_nsec);
+ if (i == inconsistent + 1)
+ printf("--------------------\n");
+ }
+ delta = list[inconsistent].tv_sec * NSEC_PER_SEC;
+ delta += list[inconsistent].tv_nsec;
+ delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC;
+ delta -= list[inconsistent+1].tv_nsec;
+ printf("Delta: %llu ns\n", delta);
+ fflush(0);
+ /* timestamp inconsistency*/
+ t = time(0);
+ printf("%s\n", ctime(&t));
+ printf("[FAILED]\n");
+ return -1;
+ }
+ now = list[0].tv_sec;
+ }
+ printf("[OK]\n");
+ return 0;
+}
+
+
+int main(int argc, char *argv[])
+{
+ int clockid, opt;
+ int userclock = CLOCK_REALTIME;
+ int maxclocks = NR_CLOCKIDS;
+ int runtime = 10;
+ struct timespec ts;
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "t:c:")) != -1) {
+ switch (opt) {
+ case 't':
+ runtime = atoi(optarg);
+ break;
+ case 'c':
+ userclock = atoi(optarg);
+ maxclocks = userclock + 1;
+ break;
+ default:
+ printf("Usage: %s [-t <secs>] [-c <clockid>]\n", argv[0]);
+ printf(" -t: Number of seconds to run\n");
+ printf(" -c: clockid to use (default, all clockids)\n");
+ exit(-1);
+ }
+ }
+
+ setbuf(stdout, NULL);
+
+ for (clockid = userclock; clockid < maxclocks; clockid++) {
+
+ if (clockid == CLOCK_HWSPECIFIC)
+ continue;
+
+ if (!clock_gettime(clockid, &ts)) {
+ printf("Consistent %-30s ", clockstring(clockid));
+ if (consistency_test(clockid, runtime))
+ return ksft_exit_fail();
+ }
+ }
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
new file mode 100644
index 000000000..19e46ed5d
--- /dev/null
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -0,0 +1,378 @@
+/* Leap second stress test
+ * by: John Stultz (john.stultz@linaro.org)
+ * (C) Copyright IBM 2012
+ * (C) Copyright 2013, 2015 Linaro Limited
+ * Licensed under the GPLv2
+ *
+ * This test signals the kernel to insert a leap second
+ * every day at midnight GMT. This allows for stessing the
+ * kernel's leap-second behavior, as well as how well applications
+ * handle the leap-second discontinuity.
+ *
+ * Usage: leap-a-day [-s] [-i <num>]
+ *
+ * Options:
+ * -s: Each iteration, set the date to 10 seconds before midnight GMT.
+ * This speeds up the number of leapsecond transitions tested,
+ * but because it calls settimeofday frequently, advancing the
+ * time by 24 hours every ~16 seconds, it may cause application
+ * disruption.
+ *
+ * -i: Number of iterations to run (default: infinite)
+ *
+ * Other notes: Disabling NTP prior to running this is advised, as the two
+ * may conflict in their commands to the kernel.
+ *
+ * To build:
+ * $ gcc leap-a-day.c -o leap-a-day -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <sys/errno.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+#define CLOCK_TAI 11
+
+time_t next_leap;
+int error_found;
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+ if (a.tv_sec < b.tv_sec)
+ return 1;
+ if (a.tv_sec > b.tv_sec)
+ return 0;
+ if (a.tv_nsec > b.tv_nsec)
+ return 0;
+ return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+ ts.tv_nsec += ns;
+ while (ts.tv_nsec >= NSEC_PER_SEC) {
+ ts.tv_nsec -= NSEC_PER_SEC;
+ ts.tv_sec++;
+ }
+ return ts;
+}
+
+char *time_state_str(int state)
+{
+ switch (state) {
+ case TIME_OK: return "TIME_OK";
+ case TIME_INS: return "TIME_INS";
+ case TIME_DEL: return "TIME_DEL";
+ case TIME_OOP: return "TIME_OOP";
+ case TIME_WAIT: return "TIME_WAIT";
+ case TIME_BAD: return "TIME_BAD";
+ }
+ return "ERROR";
+}
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+ struct timex tx;
+ int ret;
+
+ /*
+ * We have to call adjtime twice here, as kernels
+ * prior to 6b1859dba01c7 (included in 3.5 and
+ * -stable), had an issue with the state machine
+ * and wouldn't clear the STA_INS/DEL flag directly.
+ */
+ tx.modes = ADJ_STATUS;
+ tx.status = STA_PLL;
+ ret = adjtimex(&tx);
+
+ /* Clear maxerror, as it can cause UNSYNC to be set */
+ tx.modes = ADJ_MAXERROR;
+ tx.maxerror = 0;
+ ret = adjtimex(&tx);
+
+ /* Clear the status */
+ tx.modes = ADJ_STATUS;
+ tx.status = 0;
+ ret = adjtimex(&tx);
+
+ return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+ clear_time_state();
+ exit(0);
+}
+
+void sigalarm(int signo)
+{
+ struct timex tx;
+ int ret;
+
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+
+ if (tx.time.tv_sec < next_leap) {
+ printf("Error: Early timer expiration! (Should be %ld)\n", next_leap);
+ error_found = 1;
+ printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
+ tx.time.tv_sec,
+ tx.time.tv_usec,
+ tx.tai,
+ time_state_str(ret));
+ }
+ if (ret != TIME_WAIT) {
+ printf("Error: Timer seeing incorrect NTP state? (Should be TIME_WAIT)\n");
+ error_found = 1;
+ printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n",
+ tx.time.tv_sec,
+ tx.time.tv_usec,
+ tx.tai,
+ time_state_str(ret));
+ }
+}
+
+
+/* Test for known hrtimer failure */
+void test_hrtimer_failure(void)
+{
+ struct timespec now, target;
+
+ clock_gettime(CLOCK_REALTIME, &now);
+ target = timespec_add(now, NSEC_PER_SEC/2);
+ clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL);
+ clock_gettime(CLOCK_REALTIME, &now);
+
+ if (!in_order(target, now)) {
+ printf("ERROR: hrtimer early expiration failure observed.\n");
+ error_found = 1;
+ }
+}
+
+int main(int argc, char **argv)
+{
+ timer_t tm1;
+ struct itimerspec its1;
+ struct sigevent se;
+ struct sigaction act;
+ int signum = SIGRTMAX;
+ int settime = 1;
+ int tai_time = 0;
+ int insert = 1;
+ int iterations = 10;
+ int opt;
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "sti:")) != -1) {
+ switch (opt) {
+ case 'w':
+ printf("Only setting leap-flag, not changing time. It could take up to a day for leap to trigger.\n");
+ settime = 0;
+ break;
+ case 'i':
+ iterations = atoi(optarg);
+ break;
+ case 't':
+ tai_time = 1;
+ break;
+ default:
+ printf("Usage: %s [-w] [-i <iterations>]\n", argv[0]);
+ printf(" -w: Set flag and wait for leap second each iteration");
+ printf(" (default sets time to right before leapsecond)\n");
+ printf(" -i: Number of iterations (-1 = infinite, default is 10)\n");
+ printf(" -t: Print TAI time\n");
+ exit(-1);
+ }
+ }
+
+ /* Make sure TAI support is present if -t was used */
+ if (tai_time) {
+ struct timespec ts;
+
+ if (clock_gettime(CLOCK_TAI, &ts)) {
+ printf("System doesn't support CLOCK_TAI\n");
+ ksft_exit_fail();
+ }
+ }
+
+ signal(SIGINT, handler);
+ signal(SIGKILL, handler);
+
+ /* Set up timer signal handler: */
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = sigalarm;
+ sigaction(signum, &act, NULL);
+
+ if (iterations < 0)
+ printf("This runs continuously. Press ctrl-c to stop\n");
+ else
+ printf("Running for %i iterations. Press ctrl-c to stop\n", iterations);
+
+ printf("\n");
+ while (1) {
+ int ret;
+ struct timespec ts;
+ struct timex tx;
+ time_t now;
+
+ /* Get the current time */
+ clock_gettime(CLOCK_REALTIME, &ts);
+
+ /* Calculate the next possible leap second 23:59:60 GMT */
+ next_leap = ts.tv_sec;
+ next_leap += 86400 - (next_leap % 86400);
+
+ if (settime) {
+ struct timeval tv;
+
+ tv.tv_sec = next_leap - 10;
+ tv.tv_usec = 0;
+ settimeofday(&tv, NULL);
+ printf("Setting time to %s", ctime(&tv.tv_sec));
+ }
+
+ /* Reset NTP time state */
+ clear_time_state();
+
+ /* Set the leap second insert flag */
+ tx.modes = ADJ_STATUS;
+ if (insert)
+ tx.status = STA_INS;
+ else
+ tx.status = STA_DEL;
+ ret = adjtimex(&tx);
+ if (ret < 0) {
+ printf("Error: Problem setting STA_INS/STA_DEL!: %s\n",
+ time_state_str(ret));
+ return ksft_exit_fail();
+ }
+
+ /* Validate STA_INS was set */
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+ if (tx.status != STA_INS && tx.status != STA_DEL) {
+ printf("Error: STA_INS/STA_DEL not set!: %s\n",
+ time_state_str(ret));
+ return ksft_exit_fail();
+ }
+
+ if (tai_time) {
+ printf("Using TAI time,"
+ " no inconsistencies should be seen!\n");
+ }
+
+ printf("Scheduling leap second for %s", ctime(&next_leap));
+
+ /* Set up timer */
+ printf("Setting timer for %ld - %s", next_leap, ctime(&next_leap));
+ memset(&se, 0, sizeof(se));
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = signum;
+ se.sigev_value.sival_int = 0;
+ if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) {
+ printf("Error: timer_create failed\n");
+ return ksft_exit_fail();
+ }
+ its1.it_value.tv_sec = next_leap;
+ its1.it_value.tv_nsec = 0;
+ its1.it_interval.tv_sec = 0;
+ its1.it_interval.tv_nsec = 0;
+ timer_settime(tm1, TIMER_ABSTIME, &its1, NULL);
+
+ /* Wake up 3 seconds before leap */
+ ts.tv_sec = next_leap - 3;
+ ts.tv_nsec = 0;
+
+
+ while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL))
+ printf("Something woke us up, returning to sleep\n");
+
+ /* Validate STA_INS is still set */
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+ if (tx.status != STA_INS && tx.status != STA_DEL) {
+ printf("Something cleared STA_INS/STA_DEL, setting it again.\n");
+ tx.modes = ADJ_STATUS;
+ if (insert)
+ tx.status = STA_INS;
+ else
+ tx.status = STA_DEL;
+ ret = adjtimex(&tx);
+ }
+
+ /* Check adjtimex output every half second */
+ now = tx.time.tv_sec;
+ while (now < next_leap + 2) {
+ char buf[26];
+ struct timespec tai;
+ int ret;
+
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+
+ if (tai_time) {
+ clock_gettime(CLOCK_TAI, &tai);
+ printf("%ld sec, %9ld ns\t%s\n",
+ tai.tv_sec,
+ tai.tv_nsec,
+ time_state_str(ret));
+ } else {
+ ctime_r(&tx.time.tv_sec, buf);
+ buf[strlen(buf)-1] = 0; /*remove trailing\n */
+
+ printf("%s + %6ld us (%i)\t%s\n",
+ buf,
+ tx.time.tv_usec,
+ tx.tai,
+ time_state_str(ret));
+ }
+ now = tx.time.tv_sec;
+ /* Sleep for another half second */
+ ts.tv_sec = 0;
+ ts.tv_nsec = NSEC_PER_SEC / 2;
+ clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL);
+ }
+ /* Switch to using other mode */
+ insert = !insert;
+
+ /* Note if kernel has known hrtimer failure */
+ test_hrtimer_failure();
+
+ printf("Leap complete\n");
+ if (error_found) {
+ printf("Errors observed\n");
+ clear_time_state();
+ return ksft_exit_fail();
+ }
+ printf("\n");
+ if ((iterations != -1) && !(--iterations))
+ break;
+ }
+
+ clear_time_state();
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
new file mode 100644
index 000000000..dc80728ed
--- /dev/null
+++ b/tools/testing/selftests/timers/leapcrash.c
@@ -0,0 +1,108 @@
+/* Demo leapsecond deadlock
+ * by: John Stultz (john.stultz@linaro.org)
+ * (C) Copyright IBM 2012
+ * (C) Copyright 2013, 2015 Linaro Limited
+ * Licensed under the GPL
+ *
+ * This test demonstrates leapsecond deadlock that is possibe
+ * on kernels from 2.6.26 to 3.3.
+ *
+ * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA
+ * RUN AT YOUR OWN RISK!
+ * To build:
+ * $ gcc leapcrash.c -o leapcrash -lrt
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include "../kselftest.h"
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+ struct timex tx;
+ int ret;
+
+ /*
+ * We have to call adjtime twice here, as kernels
+ * prior to 6b1859dba01c7 (included in 3.5 and
+ * -stable), had an issue with the state machine
+ * and wouldn't clear the STA_INS/DEL flag directly.
+ */
+ tx.modes = ADJ_STATUS;
+ tx.status = STA_PLL;
+ ret = adjtimex(&tx);
+
+ tx.modes = ADJ_STATUS;
+ tx.status = 0;
+ ret = adjtimex(&tx);
+
+ return ret;
+}
+
+/* Make sure we cleanup on ctrl-c */
+void handler(int unused)
+{
+ clear_time_state();
+ exit(0);
+}
+
+
+int main(void)
+{
+ struct timex tx;
+ struct timespec ts;
+ time_t next_leap;
+ int count = 0;
+
+ setbuf(stdout, NULL);
+
+ signal(SIGINT, handler);
+ signal(SIGKILL, handler);
+ printf("This runs for a few minutes. Press ctrl-c to stop\n");
+
+ clear_time_state();
+
+
+ /* Get the current time */
+ clock_gettime(CLOCK_REALTIME, &ts);
+
+ /* Calculate the next possible leap second 23:59:60 GMT */
+ next_leap = ts.tv_sec;
+ next_leap += 86400 - (next_leap % 86400);
+
+ for (count = 0; count < 20; count++) {
+ struct timeval tv;
+
+
+ /* set the time to 2 seconds before the leap */
+ tv.tv_sec = next_leap - 2;
+ tv.tv_usec = 0;
+ if (settimeofday(&tv, NULL)) {
+ printf("Error: You're likely not running with proper (ie: root) permissions\n");
+ return ksft_exit_fail();
+ }
+ tx.modes = 0;
+ adjtimex(&tx);
+
+ /* hammer on adjtime w/ STA_INS */
+ while (tx.time.tv_sec < next_leap + 1) {
+ /* Set the leap second insert flag */
+ tx.modes = ADJ_STATUS;
+ tx.status = STA_INS;
+ adjtimex(&tx);
+ }
+ clear_time_state();
+ printf(".");
+ fflush(stdout);
+ }
+ printf("[OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c
new file mode 100644
index 000000000..7916cf5cc
--- /dev/null
+++ b/tools/testing/selftests/timers/mqueue-lat.c
@@ -0,0 +1,114 @@
+/* Measure mqueue timeout latency
+ * by: john stultz (john.stultz@linaro.org)
+ * (C) Copyright Linaro 2013
+ *
+ * Inspired with permission from example test by:
+ * Romain Francoise <romain@orebokech.com>
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc mqueue-lat.c -o mqueue-lat -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <errno.h>
+#include <mqueue.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define TARGET_TIMEOUT 100000000 /* 100ms in nanoseconds */
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+ long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+ ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+ return ret;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+ ts.tv_nsec += ns;
+ while (ts.tv_nsec >= NSEC_PER_SEC) {
+ ts.tv_nsec -= NSEC_PER_SEC;
+ ts.tv_sec++;
+ }
+ return ts;
+}
+
+int mqueue_lat_test(void)
+{
+
+ mqd_t q;
+ struct mq_attr attr;
+ struct timespec start, end, now, target;
+ int i, count, ret;
+
+ q = mq_open("/foo", O_CREAT | O_RDONLY, 0666, NULL);
+ if (q < 0) {
+ perror("mq_open");
+ return -1;
+ }
+ mq_getattr(q, &attr);
+
+
+ count = 100;
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (i = 0; i < count; i++) {
+ char buf[attr.mq_msgsize];
+
+ clock_gettime(CLOCK_REALTIME, &now);
+ target = now;
+ target = timespec_add(now, TARGET_TIMEOUT); /* 100ms */
+
+ ret = mq_timedreceive(q, buf, sizeof(buf), NULL, &target);
+ if (ret < 0 && errno != ETIMEDOUT) {
+ perror("mq_timedreceive");
+ return -1;
+ }
+ }
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ mq_close(q);
+
+ if ((timespec_sub(start, end)/count) > TARGET_TIMEOUT + UNRESONABLE_LATENCY)
+ return -1;
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+
+ printf("Mqueue latency : ");
+ fflush(stdout);
+
+ ret = mqueue_lat_test();
+ if (ret < 0) {
+ printf("[FAILED]\n");
+ return ksft_exit_fail();
+ }
+ printf("[OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c
new file mode 100644
index 000000000..71b5441c2
--- /dev/null
+++ b/tools/testing/selftests/timers/nanosleep.c
@@ -0,0 +1,165 @@
+/* Make sure timers don't return early
+ * by: john stultz (johnstul@us.ibm.com)
+ * John Stultz (john.stultz@linaro.org)
+ * (C) Copyright IBM 2012
+ * (C) Copyright Linaro 2013 2015
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc nanosleep.c -o nanosleep -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+/* returns 1 if a <= b, 0 otherwise */
+static inline int in_order(struct timespec a, struct timespec b)
+{
+ if (a.tv_sec < b.tv_sec)
+ return 1;
+ if (a.tv_sec > b.tv_sec)
+ return 0;
+ if (a.tv_nsec > b.tv_nsec)
+ return 0;
+ return 1;
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+ ts.tv_nsec += ns;
+ while (ts.tv_nsec >= NSEC_PER_SEC) {
+ ts.tv_nsec -= NSEC_PER_SEC;
+ ts.tv_sec++;
+ }
+ return ts;
+}
+
+int nanosleep_test(int clockid, long long ns)
+{
+ struct timespec now, target, rel;
+
+ /* First check abs time */
+ if (clock_gettime(clockid, &now))
+ return UNSUPPORTED;
+ target = timespec_add(now, ns);
+
+ if (clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL))
+ return UNSUPPORTED;
+ clock_gettime(clockid, &now);
+
+ if (!in_order(target, now))
+ return -1;
+
+ /* Second check reltime */
+ clock_gettime(clockid, &now);
+ rel.tv_sec = 0;
+ rel.tv_nsec = 0;
+ rel = timespec_add(rel, ns);
+ target = timespec_add(now, ns);
+ clock_nanosleep(clockid, 0, &rel, NULL);
+ clock_gettime(clockid, &now);
+
+ if (!in_order(target, now))
+ return -1;
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ long long length;
+ int clockid, ret;
+
+ for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+ /* Skip cputime clockids since nanosleep won't increment cputime */
+ if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+ clockid == CLOCK_THREAD_CPUTIME_ID ||
+ clockid == CLOCK_HWSPECIFIC)
+ continue;
+
+ printf("Nanosleep %-31s ", clockstring(clockid));
+ fflush(stdout);
+
+ length = 10;
+ while (length <= (NSEC_PER_SEC * 10)) {
+ ret = nanosleep_test(clockid, length);
+ if (ret == UNSUPPORTED) {
+ printf("[UNSUPPORTED]\n");
+ goto next;
+ }
+ if (ret < 0) {
+ printf("[FAILED]\n");
+ return ksft_exit_fail();
+ }
+ length *= 100;
+ }
+ printf("[OK]\n");
+next:
+ ret = 0;
+ }
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c
new file mode 100644
index 000000000..eb3e79ed7
--- /dev/null
+++ b/tools/testing/selftests/timers/nsleep-lat.c
@@ -0,0 +1,180 @@
+/* Measure nanosleep timer latency
+ * by: john stultz (john.stultz@linaro.org)
+ * (C) Copyright Linaro 2013
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc nsleep-lat.c -o nsleep-lat -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+#define UNSUPPORTED 0xf00f
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+struct timespec timespec_add(struct timespec ts, unsigned long long ns)
+{
+ ts.tv_nsec += ns;
+ while (ts.tv_nsec >= NSEC_PER_SEC) {
+ ts.tv_nsec -= NSEC_PER_SEC;
+ ts.tv_sec++;
+ }
+ return ts;
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+ long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+ ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+ return ret;
+}
+
+int nanosleep_lat_test(int clockid, long long ns)
+{
+ struct timespec start, end, target;
+ long long latency = 0;
+ int i, count;
+
+ target.tv_sec = ns/NSEC_PER_SEC;
+ target.tv_nsec = ns%NSEC_PER_SEC;
+
+ if (clock_gettime(clockid, &start))
+ return UNSUPPORTED;
+ if (clock_nanosleep(clockid, 0, &target, NULL))
+ return UNSUPPORTED;
+
+ count = 10;
+
+ /* First check relative latency */
+ clock_gettime(clockid, &start);
+ for (i = 0; i < count; i++)
+ clock_nanosleep(clockid, 0, &target, NULL);
+ clock_gettime(clockid, &end);
+
+ if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) {
+ printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns);
+ return -1;
+ }
+
+ /* Next check absolute latency */
+ for (i = 0; i < count; i++) {
+ clock_gettime(clockid, &start);
+ target = timespec_add(start, ns);
+ clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL);
+ clock_gettime(clockid, &end);
+ latency += timespec_sub(target, end);
+ }
+
+ if (latency/count > UNRESONABLE_LATENCY) {
+ printf("Large abs latency: %lld ns :", latency/count);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+
+int main(int argc, char **argv)
+{
+ long long length;
+ int clockid, ret;
+
+ for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
+
+ /* Skip cputime clockids since nanosleep won't increment cputime */
+ if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
+ clockid == CLOCK_THREAD_CPUTIME_ID ||
+ clockid == CLOCK_HWSPECIFIC)
+ continue;
+
+ printf("nsleep latency %-26s ", clockstring(clockid));
+ fflush(stdout);
+
+ length = 10;
+ while (length <= (NSEC_PER_SEC * 10)) {
+ ret = nanosleep_lat_test(clockid, length);
+ if (ret)
+ break;
+ length *= 100;
+
+ }
+
+ if (ret == UNSUPPORTED) {
+ printf("[UNSUPPORTED]\n");
+ continue;
+ }
+ if (ret < 0) {
+ printf("[FAILED]\n");
+ return ksft_exit_fail();
+ }
+ printf("[OK]\n");
+ }
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
new file mode 100644
index 000000000..15cf56d32
--- /dev/null
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2013 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftests for a few posix timers interface.
+ *
+ * Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com>
+ */
+
+#include <sys/time.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+
+#include "../kselftest.h"
+
+#define DELAY 2
+#define USECS_PER_SEC 1000000
+
+static volatile int done;
+
+/* Busy loop in userspace to elapse ITIMER_VIRTUAL */
+static void user_loop(void)
+{
+ while (!done);
+}
+
+/*
+ * Try to spend as much time as possible in kernelspace
+ * to elapse ITIMER_PROF.
+ */
+static void kernel_loop(void)
+{
+ void *addr = sbrk(0);
+ int err = 0;
+
+ while (!done && !err) {
+ err = brk(addr + 4096);
+ err |= brk(addr);
+ }
+}
+
+/*
+ * Sleep until ITIMER_REAL expiration.
+ */
+static void idle_loop(void)
+{
+ pause();
+}
+
+static void sig_handler(int nr)
+{
+ done = 1;
+}
+
+/*
+ * Check the expected timer expiration matches the GTOD elapsed delta since
+ * we armed the timer. Keep a 0.5 sec error margin due to various jitter.
+ */
+static int check_diff(struct timeval start, struct timeval end)
+{
+ long long diff;
+
+ diff = end.tv_usec - start.tv_usec;
+ diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC;
+
+ if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
+ printf("Diff too high: %lld..", diff);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int check_itimer(int which)
+{
+ int err;
+ struct timeval start, end;
+ struct itimerval val = {
+ .it_value.tv_sec = DELAY,
+ };
+
+ printf("Check itimer ");
+
+ if (which == ITIMER_VIRTUAL)
+ printf("virtual... ");
+ else if (which == ITIMER_PROF)
+ printf("prof... ");
+ else if (which == ITIMER_REAL)
+ printf("real... ");
+
+ fflush(stdout);
+
+ done = 0;
+
+ if (which == ITIMER_VIRTUAL)
+ signal(SIGVTALRM, sig_handler);
+ else if (which == ITIMER_PROF)
+ signal(SIGPROF, sig_handler);
+ else if (which == ITIMER_REAL)
+ signal(SIGALRM, sig_handler);
+
+ err = gettimeofday(&start, NULL);
+ if (err < 0) {
+ perror("Can't call gettimeofday()\n");
+ return -1;
+ }
+
+ err = setitimer(which, &val, NULL);
+ if (err < 0) {
+ perror("Can't set timer\n");
+ return -1;
+ }
+
+ if (which == ITIMER_VIRTUAL)
+ user_loop();
+ else if (which == ITIMER_PROF)
+ kernel_loop();
+ else if (which == ITIMER_REAL)
+ idle_loop();
+
+ err = gettimeofday(&end, NULL);
+ if (err < 0) {
+ perror("Can't call gettimeofday()\n");
+ return -1;
+ }
+
+ if (!check_diff(start, end))
+ printf("[OK]\n");
+ else
+ printf("[FAIL]\n");
+
+ return 0;
+}
+
+static int check_timer_create(int which)
+{
+ int err;
+ timer_t id;
+ struct timeval start, end;
+ struct itimerspec val = {
+ .it_value.tv_sec = DELAY,
+ };
+
+ printf("Check timer_create() ");
+ if (which == CLOCK_THREAD_CPUTIME_ID) {
+ printf("per thread... ");
+ } else if (which == CLOCK_PROCESS_CPUTIME_ID) {
+ printf("per process... ");
+ }
+ fflush(stdout);
+
+ done = 0;
+ err = timer_create(which, NULL, &id);
+ if (err < 0) {
+ perror("Can't create timer\n");
+ return -1;
+ }
+ signal(SIGALRM, sig_handler);
+
+ err = gettimeofday(&start, NULL);
+ if (err < 0) {
+ perror("Can't call gettimeofday()\n");
+ return -1;
+ }
+
+ err = timer_settime(id, 0, &val, NULL);
+ if (err < 0) {
+ perror("Can't set timer\n");
+ return -1;
+ }
+
+ user_loop();
+
+ err = gettimeofday(&end, NULL);
+ if (err < 0) {
+ perror("Can't call gettimeofday()\n");
+ return -1;
+ }
+
+ if (!check_diff(start, end))
+ printf("[OK]\n");
+ else
+ printf("[FAIL]\n");
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ printf("Testing posix timers. False negative may happen on CPU execution \n");
+ printf("based timers if other threads run on the CPU...\n");
+
+ if (check_itimer(ITIMER_VIRTUAL) < 0)
+ return ksft_exit_fail();
+
+ if (check_itimer(ITIMER_PROF) < 0)
+ return ksft_exit_fail();
+
+ if (check_itimer(ITIMER_REAL) < 0)
+ return ksft_exit_fail();
+
+ if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0)
+ return ksft_exit_fail();
+
+ /*
+ * It's unfortunately hard to reliably test a timer expiration
+ * on parallel multithread cputime. We could arm it to expire
+ * on DELAY * nr_threads, with nr_threads busy looping, then wait
+ * the normal DELAY since the time is elapsing nr_threads faster.
+ * But for that we need to ensure we have real physical free CPUs
+ * to ensure true parallelism. So test only one thread until we
+ * find a better solution.
+ */
+ if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0)
+ return ksft_exit_fail();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
new file mode 100644
index 000000000..b41d8dd0c
--- /dev/null
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -0,0 +1,148 @@
+/* CLOCK_MONOTONIC vs CLOCK_MONOTONIC_RAW skew test
+ * by: john stultz (johnstul@us.ibm.com)
+ * John Stultz <john.stultz@linaro.org>
+ * (C) Copyright IBM 2012
+ * (C) Copyright Linaro Limited 2015
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc raw_skew.c -o raw_skew -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include "../kselftest.h"
+
+#define CLOCK_MONOTONIC_RAW 4
+#define NSEC_PER_SEC 1000000000LL
+
+#define shift_right(x, s) ({ \
+ __typeof__(x) __x = (x); \
+ __typeof__(s) __s = (s); \
+ __x < 0 ? -(-__x >> __s) : __x >> __s; \
+})
+
+long long llabs(long long val)
+{
+ if (val < 0)
+ val = -val;
+ return val;
+}
+
+unsigned long long ts_to_nsec(struct timespec ts)
+{
+ return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec;
+}
+
+struct timespec nsec_to_ts(long long ns)
+{
+ struct timespec ts;
+
+ ts.tv_sec = ns/NSEC_PER_SEC;
+ ts.tv_nsec = ns%NSEC_PER_SEC;
+ return ts;
+}
+
+long long diff_timespec(struct timespec start, struct timespec end)
+{
+ long long start_ns, end_ns;
+
+ start_ns = ts_to_nsec(start);
+ end_ns = ts_to_nsec(end);
+ return end_ns - start_ns;
+}
+
+void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw)
+{
+ struct timespec start, mid, end;
+ long long diff = 0, tmp;
+ int i;
+
+ for (i = 0; i < 3; i++) {
+ long long newdiff;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ clock_gettime(CLOCK_MONOTONIC_RAW, &mid);
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ newdiff = diff_timespec(start, end);
+ if (diff == 0 || newdiff < diff) {
+ diff = newdiff;
+ *raw = mid;
+ tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2;
+ *mon = nsec_to_ts(tmp);
+ }
+ }
+}
+
+int main(int argv, char **argc)
+{
+ struct timespec mon, raw, start, end;
+ long long delta1, delta2, interval, eppm, ppm;
+ struct timex tx1, tx2;
+
+ setbuf(stdout, NULL);
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) {
+ printf("ERR: NO CLOCK_MONOTONIC_RAW\n");
+ return -1;
+ }
+
+ tx1.modes = 0;
+ adjtimex(&tx1);
+ get_monotonic_and_raw(&mon, &raw);
+ start = mon;
+ delta1 = diff_timespec(mon, raw);
+
+ if (tx1.offset)
+ printf("WARNING: ADJ_OFFSET in progress, this will cause inaccurate results\n");
+
+ printf("Estimating clock drift: ");
+ fflush(stdout);
+ sleep(120);
+
+ get_monotonic_and_raw(&mon, &raw);
+ end = mon;
+ tx2.modes = 0;
+ adjtimex(&tx2);
+ delta2 = diff_timespec(mon, raw);
+
+ interval = diff_timespec(start, end);
+
+ /* calculate measured ppm between MONOTONIC and MONOTONIC_RAW */
+ eppm = ((delta2-delta1)*NSEC_PER_SEC)/interval;
+ eppm = -eppm;
+ printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000)));
+
+ /* Avg the two actual freq samples adjtimex gave us */
+ ppm = (tx1.freq + tx2.freq) * 1000 / 2;
+ ppm = (long long)tx1.freq * 1000;
+ ppm = shift_right(ppm, 16);
+ printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
+
+ if (llabs(eppm - ppm) > 1000) {
+ if (tx1.offset || tx2.offset ||
+ tx1.freq != tx2.freq || tx1.tick != tx2.tick) {
+ printf(" [SKIP]\n");
+ return ksft_exit_skip("The clock was adjusted externally. Shutdown NTPd or other time sync daemons\n");
+ }
+ printf(" [FAILED]\n");
+ return ksft_exit_fail();
+ }
+ printf(" [OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c
new file mode 100644
index 000000000..4ef2184f1
--- /dev/null
+++ b/tools/testing/selftests/timers/rtcpie.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Real Time Clock Periodic Interrupt test program
+ *
+ * Since commit 6610e0893b8bc ("RTC: Rework RTC code to use timerqueue for
+ * events"), PIE are completely handled using hrtimers, without actually using
+ * any underlying hardware RTC.
+ *
+ */
+
+#include <stdio.h>
+#include <linux/rtc.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+
+/*
+ * This expects the new RTC class driver framework, working with
+ * clocks that will often not be clones of what the PC-AT had.
+ * Use the command line to specify another RTC if you need one.
+ */
+static const char default_rtc[] = "/dev/rtc0";
+
+int main(int argc, char **argv)
+{
+ int i, fd, retval, irqcount = 0;
+ unsigned long tmp, data, old_pie_rate;
+ const char *rtc = default_rtc;
+ struct timeval start, end, diff;
+
+ switch (argc) {
+ case 2:
+ rtc = argv[1];
+ break;
+ case 1:
+ fd = open(default_rtc, O_RDONLY);
+ if (fd == -1) {
+ printf("Default RTC %s does not exist. Test Skipped!\n", default_rtc);
+ exit(KSFT_SKIP);
+ }
+ close(fd);
+ break;
+ default:
+ fprintf(stderr, "usage: rtctest [rtcdev] [d]\n");
+ return 1;
+ }
+
+ fd = open(rtc, O_RDONLY);
+
+ if (fd == -1) {
+ perror(rtc);
+ exit(errno);
+ }
+
+ /* Read periodic IRQ rate */
+ retval = ioctl(fd, RTC_IRQP_READ, &old_pie_rate);
+ if (retval == -1) {
+ /* not all RTCs support periodic IRQs */
+ if (errno == EINVAL) {
+ fprintf(stderr, "\nNo periodic IRQ support\n");
+ goto done;
+ }
+ perror("RTC_IRQP_READ ioctl");
+ exit(errno);
+ }
+ fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", old_pie_rate);
+
+ fprintf(stderr, "Counting 20 interrupts at:");
+ fflush(stderr);
+
+ /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */
+ for (tmp=2; tmp<=64; tmp*=2) {
+
+ retval = ioctl(fd, RTC_IRQP_SET, tmp);
+ if (retval == -1) {
+ /* not all RTCs can change their periodic IRQ rate */
+ if (errno == EINVAL) {
+ fprintf(stderr,
+ "\n...Periodic IRQ rate is fixed\n");
+ goto done;
+ }
+ perror("RTC_IRQP_SET ioctl");
+ exit(errno);
+ }
+
+ fprintf(stderr, "\n%ldHz:\t", tmp);
+ fflush(stderr);
+
+ /* Enable periodic interrupts */
+ retval = ioctl(fd, RTC_PIE_ON, 0);
+ if (retval == -1) {
+ perror("RTC_PIE_ON ioctl");
+ exit(errno);
+ }
+
+ for (i=1; i<21; i++) {
+ gettimeofday(&start, NULL);
+ /* This blocks */
+ retval = read(fd, &data, sizeof(unsigned long));
+ if (retval == -1) {
+ perror("read");
+ exit(errno);
+ }
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ if (diff.tv_sec > 0 ||
+ diff.tv_usec > ((1000000L / tmp) * 1.10)) {
+ fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n",
+ diff.tv_sec, diff.tv_usec,
+ (1000000L / tmp));
+ fflush(stdout);
+ exit(-1);
+ }
+
+ fprintf(stderr, " %d",i);
+ fflush(stderr);
+ irqcount++;
+ }
+
+ /* Disable periodic interrupts */
+ retval = ioctl(fd, RTC_PIE_OFF, 0);
+ if (retval == -1) {
+ perror("RTC_PIE_OFF ioctl");
+ exit(errno);
+ }
+ }
+
+done:
+ ioctl(fd, RTC_IRQP_SET, old_pie_rate);
+
+ fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n");
+
+ close(fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c
new file mode 100644
index 000000000..688cfd81b
--- /dev/null
+++ b/tools/testing/selftests/timers/set-2038.c
@@ -0,0 +1,133 @@
+/* Time bounds setting test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2012
+ * Licensed under the GPLv2
+ *
+ * NOTE: This is a meta-test which sets the time to edge cases then
+ * uses other tests to detect problems. Thus this test requires that
+ * the inconsistency-check and nanosleep tests be present in the same
+ * directory it is run from.
+ *
+ * To build:
+ * $ gcc set-2038.c -o set-2038 -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <sys/time.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000LL
+
+#define KTIME_MAX ((long long)~((unsigned long long)1 << 63))
+#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
+
+#define YEAR_1901 (-0x7fffffffL)
+#define YEAR_1970 1
+#define YEAR_2038 0x7fffffffL /*overflows 32bit time_t */
+#define YEAR_2262 KTIME_SEC_MAX /*overflows 64bit ktime_t */
+#define YEAR_MAX ((long long)((1ULL<<63)-1)) /*overflows 64bit time_t */
+
+int is32bits(void)
+{
+ return (sizeof(long) == 4);
+}
+
+int settime(long long time)
+{
+ struct timeval now;
+ int ret;
+
+ now.tv_sec = (time_t)time;
+ now.tv_usec = 0;
+
+ ret = settimeofday(&now, NULL);
+
+ printf("Setting time to 0x%lx: %d\n", (long)time, ret);
+ return ret;
+}
+
+int do_tests(void)
+{
+ int ret;
+
+ ret = system("date");
+ ret = system("./inconsistency-check -c 0 -t 20");
+ ret |= system("./nanosleep");
+ ret |= system("./nsleep-lat");
+ return ret;
+
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = 0;
+ int opt, dangerous = 0;
+ time_t start;
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "d")) != -1) {
+ switch (opt) {
+ case 'd':
+ dangerous = 1;
+ }
+ }
+
+ start = time(0);
+
+ /* First test that crazy values don't work */
+ if (!settime(YEAR_1901)) {
+ ret = -1;
+ goto out;
+ }
+ if (!settime(YEAR_MAX)) {
+ ret = -1;
+ goto out;
+ }
+ if (!is32bits() && !settime(YEAR_2262)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Now test behavior near edges */
+ settime(YEAR_1970);
+ ret = do_tests();
+ if (ret)
+ goto out;
+
+ settime(YEAR_2038 - 600);
+ ret = do_tests();
+ if (ret)
+ goto out;
+
+ /* The rest of the tests can blowup on 32bit systems */
+ if (is32bits() && !dangerous)
+ goto out;
+ /* Test rollover behavior 32bit edge */
+ settime(YEAR_2038 - 10);
+ ret = do_tests();
+ if (ret)
+ goto out;
+
+ settime(YEAR_2262 - 600);
+ ret = do_tests();
+
+out:
+ /* restore clock */
+ settime(start);
+ if (ret)
+ return ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c
new file mode 100644
index 000000000..8c4179ee2
--- /dev/null
+++ b/tools/testing/selftests/timers/set-tai.c
@@ -0,0 +1,69 @@
+/* Set tai offset
+ * by: John Stultz <john.stultz@linaro.org>
+ * (C) Copyright Linaro 2013
+ * Licensed under the GPLv2
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include "../kselftest.h"
+
+int set_tai(int offset)
+{
+ struct timex tx;
+
+ memset(&tx, 0, sizeof(tx));
+
+ tx.modes = ADJ_TAI;
+ tx.constant = offset;
+
+ return adjtimex(&tx);
+}
+
+int get_tai(void)
+{
+ struct timex tx;
+
+ memset(&tx, 0, sizeof(tx));
+
+ adjtimex(&tx);
+ return tx.tai;
+}
+
+int main(int argc, char **argv)
+{
+ int i, ret;
+
+ ret = get_tai();
+ printf("tai offset started at %i\n", ret);
+
+ printf("Checking tai offsets can be properly set: ");
+ fflush(stdout);
+ for (i = 1; i <= 60; i++) {
+ ret = set_tai(i);
+ ret = get_tai();
+ if (ret != i) {
+ printf("[FAILED] expected: %i got %i\n", i, ret);
+ return ksft_exit_fail();
+ }
+ }
+ printf("[OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
new file mode 100644
index 000000000..50da45437
--- /dev/null
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -0,0 +1,283 @@
+/* set_timer latency test
+ * John Stultz (john.stultz@linaro.org)
+ * (C) Copyright Linaro 2014
+ * Licensed under the GPLv2
+ *
+ * This test makes sure the set_timer api is correct
+ *
+ * To build:
+ * $ gcc set-timer-lat.c -o set-timer-lat -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <string.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include "../kselftest.h"
+
+#define CLOCK_REALTIME 0
+#define CLOCK_MONOTONIC 1
+#define CLOCK_PROCESS_CPUTIME_ID 2
+#define CLOCK_THREAD_CPUTIME_ID 3
+#define CLOCK_MONOTONIC_RAW 4
+#define CLOCK_REALTIME_COARSE 5
+#define CLOCK_MONOTONIC_COARSE 6
+#define CLOCK_BOOTTIME 7
+#define CLOCK_REALTIME_ALARM 8
+#define CLOCK_BOOTTIME_ALARM 9
+#define CLOCK_HWSPECIFIC 10
+#define CLOCK_TAI 11
+#define NR_CLOCKIDS 12
+
+
+#define NSEC_PER_SEC 1000000000ULL
+#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */
+
+#define TIMER_SECS 1
+int alarmcount;
+int clock_id;
+struct timespec start_time;
+long long max_latency_ns;
+int timer_fired_early;
+
+char *clockstring(int clockid)
+{
+ switch (clockid) {
+ case CLOCK_REALTIME:
+ return "CLOCK_REALTIME";
+ case CLOCK_MONOTONIC:
+ return "CLOCK_MONOTONIC";
+ case CLOCK_PROCESS_CPUTIME_ID:
+ return "CLOCK_PROCESS_CPUTIME_ID";
+ case CLOCK_THREAD_CPUTIME_ID:
+ return "CLOCK_THREAD_CPUTIME_ID";
+ case CLOCK_MONOTONIC_RAW:
+ return "CLOCK_MONOTONIC_RAW";
+ case CLOCK_REALTIME_COARSE:
+ return "CLOCK_REALTIME_COARSE";
+ case CLOCK_MONOTONIC_COARSE:
+ return "CLOCK_MONOTONIC_COARSE";
+ case CLOCK_BOOTTIME:
+ return "CLOCK_BOOTTIME";
+ case CLOCK_REALTIME_ALARM:
+ return "CLOCK_REALTIME_ALARM";
+ case CLOCK_BOOTTIME_ALARM:
+ return "CLOCK_BOOTTIME_ALARM";
+ case CLOCK_TAI:
+ return "CLOCK_TAI";
+ };
+ return "UNKNOWN_CLOCKID";
+}
+
+
+long long timespec_sub(struct timespec a, struct timespec b)
+{
+ long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec;
+
+ ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec;
+ return ret;
+}
+
+
+void sigalarm(int signo)
+{
+ long long delta_ns;
+ struct timespec ts;
+
+ clock_gettime(clock_id, &ts);
+ alarmcount++;
+
+ delta_ns = timespec_sub(start_time, ts);
+ delta_ns -= NSEC_PER_SEC * TIMER_SECS * alarmcount;
+
+ if (delta_ns < 0)
+ timer_fired_early = 1;
+
+ if (delta_ns > max_latency_ns)
+ max_latency_ns = delta_ns;
+}
+
+void describe_timer(int flags, int interval)
+{
+ printf("%-22s %s %s ",
+ clockstring(clock_id),
+ flags ? "ABSTIME":"RELTIME",
+ interval ? "PERIODIC":"ONE-SHOT");
+}
+
+int setup_timer(int clock_id, int flags, int interval, timer_t *tm1)
+{
+ struct sigevent se;
+ struct itimerspec its1, its2;
+ int err;
+
+ /* Set up timer: */
+ memset(&se, 0, sizeof(se));
+ se.sigev_notify = SIGEV_SIGNAL;
+ se.sigev_signo = SIGRTMAX;
+ se.sigev_value.sival_int = 0;
+
+ max_latency_ns = 0;
+ alarmcount = 0;
+ timer_fired_early = 0;
+
+ err = timer_create(clock_id, &se, tm1);
+ if (err) {
+ if ((clock_id == CLOCK_REALTIME_ALARM) ||
+ (clock_id == CLOCK_BOOTTIME_ALARM)) {
+ printf("%-22s %s missing CAP_WAKE_ALARM? : [UNSUPPORTED]\n",
+ clockstring(clock_id),
+ flags ? "ABSTIME":"RELTIME");
+ /* Indicate timer isn't set, so caller doesn't wait */
+ return 1;
+ }
+ printf("%s - timer_create() failed\n", clockstring(clock_id));
+ return -1;
+ }
+
+ clock_gettime(clock_id, &start_time);
+ if (flags) {
+ its1.it_value = start_time;
+ its1.it_value.tv_sec += TIMER_SECS;
+ } else {
+ its1.it_value.tv_sec = TIMER_SECS;
+ its1.it_value.tv_nsec = 0;
+ }
+ its1.it_interval.tv_sec = interval;
+ its1.it_interval.tv_nsec = 0;
+
+ err = timer_settime(*tm1, flags, &its1, &its2);
+ if (err) {
+ printf("%s - timer_settime() failed\n", clockstring(clock_id));
+ return -1;
+ }
+
+ return 0;
+}
+
+int check_timer_latency(int flags, int interval)
+{
+ int err = 0;
+
+ describe_timer(flags, interval);
+ printf("timer fired early: %7d : ", timer_fired_early);
+ if (!timer_fired_early) {
+ printf("[OK]\n");
+ } else {
+ printf("[FAILED]\n");
+ err = -1;
+ }
+
+ describe_timer(flags, interval);
+ printf("max latency: %10lld ns : ", max_latency_ns);
+
+ if (max_latency_ns < UNRESONABLE_LATENCY) {
+ printf("[OK]\n");
+ } else {
+ printf("[FAILED]\n");
+ err = -1;
+ }
+ return err;
+}
+
+int check_alarmcount(int flags, int interval)
+{
+ describe_timer(flags, interval);
+ printf("count: %19d : ", alarmcount);
+ if (alarmcount == 1) {
+ printf("[OK]\n");
+ return 0;
+ }
+ printf("[FAILED]\n");
+ return -1;
+}
+
+int do_timer(int clock_id, int flags)
+{
+ timer_t tm1;
+ const int interval = TIMER_SECS;
+ int err;
+
+ err = setup_timer(clock_id, flags, interval, &tm1);
+ /* Unsupported case - return 0 to not fail the test */
+ if (err)
+ return err == 1 ? 0 : err;
+
+ while (alarmcount < 5)
+ sleep(1);
+
+ timer_delete(tm1);
+ return check_timer_latency(flags, interval);
+}
+
+int do_timer_oneshot(int clock_id, int flags)
+{
+ timer_t tm1;
+ const int interval = 0;
+ struct timeval timeout;
+ int err;
+
+ err = setup_timer(clock_id, flags, interval, &tm1);
+ /* Unsupported case - return 0 to not fail the test */
+ if (err)
+ return err == 1 ? 0 : err;
+
+ memset(&timeout, 0, sizeof(timeout));
+ timeout.tv_sec = 5;
+ do {
+ err = select(0, NULL, NULL, NULL, &timeout);
+ } while (err == -1 && errno == EINTR);
+
+ timer_delete(tm1);
+ err = check_timer_latency(flags, interval);
+ err |= check_alarmcount(flags, interval);
+ return err;
+}
+
+int main(void)
+{
+ struct sigaction act;
+ int signum = SIGRTMAX;
+ int ret = 0;
+
+ /* Set up signal handler: */
+ sigfillset(&act.sa_mask);
+ act.sa_flags = 0;
+ act.sa_handler = sigalarm;
+ sigaction(signum, &act, NULL);
+
+ printf("Setting timers for every %i seconds\n", TIMER_SECS);
+ for (clock_id = 0; clock_id < NR_CLOCKIDS; clock_id++) {
+
+ if ((clock_id == CLOCK_PROCESS_CPUTIME_ID) ||
+ (clock_id == CLOCK_THREAD_CPUTIME_ID) ||
+ (clock_id == CLOCK_MONOTONIC_RAW) ||
+ (clock_id == CLOCK_REALTIME_COARSE) ||
+ (clock_id == CLOCK_MONOTONIC_COARSE) ||
+ (clock_id == CLOCK_HWSPECIFIC))
+ continue;
+
+ ret |= do_timer(clock_id, TIMER_ABSTIME);
+ ret |= do_timer(clock_id, 0);
+ ret |= do_timer_oneshot(clock_id, TIMER_ABSTIME);
+ ret |= do_timer_oneshot(clock_id, 0);
+ }
+ if (ret)
+ return ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/set-tz.c b/tools/testing/selftests/timers/set-tz.c
new file mode 100644
index 000000000..62bd33eb1
--- /dev/null
+++ b/tools/testing/selftests/timers/set-tz.c
@@ -0,0 +1,110 @@
+/* Set tz value
+ * by: John Stultz <john.stultz@linaro.org>
+ * (C) Copyright Linaro 2016
+ * Licensed under the GPLv2
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include "../kselftest.h"
+
+int set_tz(int min, int dst)
+{
+ struct timezone tz;
+
+ tz.tz_minuteswest = min;
+ tz.tz_dsttime = dst;
+
+ return settimeofday(0, &tz);
+}
+
+int get_tz_min(void)
+{
+ struct timezone tz;
+ struct timeval tv;
+
+ memset(&tz, 0, sizeof(tz));
+ gettimeofday(&tv, &tz);
+ return tz.tz_minuteswest;
+}
+
+int get_tz_dst(void)
+{
+ struct timezone tz;
+ struct timeval tv;
+
+ memset(&tz, 0, sizeof(tz));
+ gettimeofday(&tv, &tz);
+ return tz.tz_dsttime;
+}
+
+int main(int argc, char **argv)
+{
+ int i, ret;
+ int min, dst;
+
+ min = get_tz_min();
+ dst = get_tz_dst();
+ printf("tz_minuteswest started at %i, dst at %i\n", min, dst);
+
+ printf("Checking tz_minuteswest can be properly set: ");
+ fflush(stdout);
+ for (i = -15*60; i < 15*60; i += 30) {
+ ret = set_tz(i, dst);
+ ret = get_tz_min();
+ if (ret != i) {
+ printf("[FAILED] expected: %i got %i\n", i, ret);
+ goto err;
+ }
+ }
+ printf("[OK]\n");
+
+ printf("Checking invalid tz_minuteswest values are caught: ");
+ fflush(stdout);
+
+ if (!set_tz(-15*60-1, dst)) {
+ printf("[FAILED] %i didn't return failure!\n", -15*60-1);
+ goto err;
+ }
+
+ if (!set_tz(15*60+1, dst)) {
+ printf("[FAILED] %i didn't return failure!\n", 15*60+1);
+ goto err;
+ }
+
+ if (!set_tz(-24*60, dst)) {
+ printf("[FAILED] %i didn't return failure!\n", -24*60);
+ goto err;
+ }
+
+ if (!set_tz(24*60, dst)) {
+ printf("[FAILED] %i didn't return failure!\n", 24*60);
+ goto err;
+ }
+
+ printf("[OK]\n");
+
+ set_tz(min, dst);
+ return ksft_exit_pass();
+
+err:
+ set_tz(min, dst);
+ return ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
new file mode 100644
index 000000000..022b711c7
--- /dev/null
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -0,0 +1,78 @@
+/* ADJ_FREQ Skew consistency test
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2012
+ * Licensed under the GPLv2
+ *
+ * NOTE: This is a meta-test which cranks the ADJ_FREQ knob back
+ * and forth and watches for consistency problems. Thus this test requires
+ * that the inconsistency-check tests be present in the same directory it
+ * is run from.
+ *
+ * To build:
+ * $ gcc skew_consistency.c -o skew_consistency -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000LL
+
+int main(int argv, char **argc)
+{
+ struct timex tx;
+ int ret, ppm;
+ pid_t pid;
+
+
+ printf("Running Asynchronous Frequency Changing Tests...\n");
+
+ pid = fork();
+ if (!pid)
+ return system("./inconsistency-check -c 1 -t 600");
+
+ ppm = 500;
+ ret = 0;
+
+ while (pid != waitpid(pid, &ret, WNOHANG)) {
+ ppm = -ppm;
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = ppm << 16;
+ adjtimex(&tx);
+ usleep(500000);
+ }
+
+ /* Set things back */
+ tx.modes = ADJ_FREQUENCY;
+ tx.offset = 0;
+ adjtimex(&tx);
+
+
+ if (ret) {
+ printf("[FAILED]\n");
+ return ksft_exit_fail();
+ }
+ printf("[OK]\n");
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
new file mode 100644
index 000000000..cf3e48919
--- /dev/null
+++ b/tools/testing/selftests/timers/threadtest.c
@@ -0,0 +1,193 @@
+/* threadtest.c
+ * by: john stultz (johnstul@us.ibm.com)
+ * (C) Copyright IBM 2004, 2005, 2006, 2012
+ * Licensed under the GPLv2
+ *
+ * To build:
+ * $ gcc threadtest.c -o threadtest -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include "../kselftest.h"
+
+/* serializes shared list access */
+pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER;
+/* serializes console output */
+pthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER;
+
+
+#define MAX_THREADS 128
+#define LISTSIZE 128
+
+int done = 0;
+
+struct timespec global_list[LISTSIZE];
+int listcount = 0;
+
+
+void checklist(struct timespec *list, int size)
+{
+ int i, j;
+ struct timespec *a, *b;
+
+ /* scan the list */
+ for (i = 0; i < size-1; i++) {
+ a = &list[i];
+ b = &list[i+1];
+
+ /* look for any time inconsistencies */
+ if ((b->tv_sec <= a->tv_sec) &&
+ (b->tv_nsec < a->tv_nsec)) {
+
+ /* flag other threads */
+ done = 1;
+
+ /*serialize printing to avoid junky output*/
+ pthread_mutex_lock(&print_lock);
+
+ /* dump the list */
+ printf("\n");
+ for (j = 0; j < size; j++) {
+ if (j == i)
+ printf("---------------\n");
+ printf("%lu:%lu\n", list[j].tv_sec, list[j].tv_nsec);
+ if (j == i+1)
+ printf("---------------\n");
+ }
+ printf("[FAILED]\n");
+
+ pthread_mutex_unlock(&print_lock);
+ }
+ }
+}
+
+/* The shared thread shares a global list
+ * that each thread fills while holding the lock.
+ * This stresses clock syncronization across cpus.
+ */
+void *shared_thread(void *arg)
+{
+ while (!done) {
+ /* protect the list */
+ pthread_mutex_lock(&list_lock);
+
+ /* see if we're ready to check the list */
+ if (listcount >= LISTSIZE) {
+ checklist(global_list, LISTSIZE);
+ listcount = 0;
+ }
+ clock_gettime(CLOCK_MONOTONIC, &global_list[listcount++]);
+
+ pthread_mutex_unlock(&list_lock);
+ }
+ return NULL;
+}
+
+
+/* Each independent thread fills in its own
+ * list. This stresses clock_gettime() lock contention.
+ */
+void *independent_thread(void *arg)
+{
+ struct timespec my_list[LISTSIZE];
+ int count;
+
+ while (!done) {
+ /* fill the list */
+ for (count = 0; count < LISTSIZE; count++)
+ clock_gettime(CLOCK_MONOTONIC, &my_list[count]);
+ checklist(my_list, LISTSIZE);
+ }
+ return NULL;
+}
+
+#define DEFAULT_THREAD_COUNT 8
+#define DEFAULT_RUNTIME 30
+
+int main(int argc, char **argv)
+{
+ int thread_count, i;
+ time_t start, now, runtime;
+ char buf[255];
+ pthread_t pth[MAX_THREADS];
+ int opt;
+ void *tret;
+ int ret = 0;
+ void *(*thread)(void *) = shared_thread;
+
+ thread_count = DEFAULT_THREAD_COUNT;
+ runtime = DEFAULT_RUNTIME;
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "t:n:i")) != -1) {
+ switch (opt) {
+ case 't':
+ runtime = atoi(optarg);
+ break;
+ case 'n':
+ thread_count = atoi(optarg);
+ break;
+ case 'i':
+ thread = independent_thread;
+ printf("using independent threads\n");
+ break;
+ default:
+ printf("Usage: %s [-t <secs>] [-n <numthreads>] [-i]\n", argv[0]);
+ printf(" -t: time to run\n");
+ printf(" -n: number of threads\n");
+ printf(" -i: use independent threads\n");
+ return -1;
+ }
+ }
+
+ if (thread_count > MAX_THREADS)
+ thread_count = MAX_THREADS;
+
+
+ setbuf(stdout, NULL);
+
+ start = time(0);
+ strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&start));
+ printf("%s\n", buf);
+ printf("Testing consistency with %i threads for %ld seconds: ", thread_count, runtime);
+ fflush(stdout);
+
+ /* spawn */
+ for (i = 0; i < thread_count; i++)
+ pthread_create(&pth[i], 0, thread, 0);
+
+ while (time(&now) < start + runtime) {
+ sleep(1);
+ if (done) {
+ ret = 1;
+ strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&now));
+ printf("%s\n", buf);
+ goto out;
+ }
+ }
+ printf("[OK]\n");
+ done = 1;
+
+out:
+ /* wait */
+ for (i = 0; i < thread_count; i++)
+ pthread_join(pth[i], &tret);
+
+ /* die */
+ if (ret)
+ ksft_exit_fail();
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
new file mode 100644
index 000000000..5397de708
--- /dev/null
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -0,0 +1,330 @@
+/* valid adjtimex test
+ * by: John Stultz <john.stultz@linaro.org>
+ * (C) Copyright Linaro 2015
+ * Licensed under the GPLv2
+ *
+ * This test validates adjtimex interface with valid
+ * and invalid test data.
+ *
+ * Usage: valid-adjtimex
+ *
+ * To build:
+ * $ gcc valid-adjtimex.c -o valid-adjtimex -lrt
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/timex.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include "../kselftest.h"
+
+#define NSEC_PER_SEC 1000000000LL
+#define USEC_PER_SEC 1000000LL
+
+#define ADJ_SETOFFSET 0x0100
+
+#include <sys/syscall.h>
+static int clock_adjtime(clockid_t id, struct timex *tx)
+{
+ return syscall(__NR_clock_adjtime, id, tx);
+}
+
+
+/* clear NTP time_status & time_state */
+int clear_time_state(void)
+{
+ struct timex tx;
+ int ret;
+
+ tx.modes = ADJ_STATUS;
+ tx.status = 0;
+ ret = adjtimex(&tx);
+ return ret;
+}
+
+#define NUM_FREQ_VALID 32
+#define NUM_FREQ_OUTOFRANGE 4
+#define NUM_FREQ_INVALID 2
+
+long valid_freq[NUM_FREQ_VALID] = {
+ -499<<16,
+ -450<<16,
+ -400<<16,
+ -350<<16,
+ -300<<16,
+ -250<<16,
+ -200<<16,
+ -150<<16,
+ -100<<16,
+ -75<<16,
+ -50<<16,
+ -25<<16,
+ -10<<16,
+ -5<<16,
+ -1<<16,
+ -1000,
+ 1<<16,
+ 5<<16,
+ 10<<16,
+ 25<<16,
+ 50<<16,
+ 75<<16,
+ 100<<16,
+ 150<<16,
+ 200<<16,
+ 250<<16,
+ 300<<16,
+ 350<<16,
+ 400<<16,
+ 450<<16,
+ 499<<16,
+};
+
+long outofrange_freq[NUM_FREQ_OUTOFRANGE] = {
+ -1000<<16,
+ -550<<16,
+ 550<<16,
+ 1000<<16,
+};
+
+#define LONG_MAX (~0UL>>1)
+#define LONG_MIN (-LONG_MAX - 1)
+
+long invalid_freq[NUM_FREQ_INVALID] = {
+ LONG_MAX,
+ LONG_MIN,
+};
+
+int validate_freq(void)
+{
+ struct timex tx;
+ int ret, pass = 0;
+ int i;
+
+ clear_time_state();
+
+ memset(&tx, 0, sizeof(struct timex));
+ /* Set the leap second insert flag */
+
+ printf("Testing ADJ_FREQ... ");
+ fflush(stdout);
+ for (i = 0; i < NUM_FREQ_VALID; i++) {
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = valid_freq[i];
+
+ ret = adjtimex(&tx);
+ if (ret < 0) {
+ printf("[FAIL]\n");
+ printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+ valid_freq[i], valid_freq[i]>>16);
+ pass = -1;
+ goto out;
+ }
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+ if (tx.freq != valid_freq[i]) {
+ printf("Warning: freq value %ld not what we set it (%ld)!\n",
+ tx.freq, valid_freq[i]);
+ }
+ }
+ for (i = 0; i < NUM_FREQ_OUTOFRANGE; i++) {
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = outofrange_freq[i];
+
+ ret = adjtimex(&tx);
+ if (ret < 0) {
+ printf("[FAIL]\n");
+ printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n",
+ outofrange_freq[i], outofrange_freq[i]>>16);
+ pass = -1;
+ goto out;
+ }
+ tx.modes = 0;
+ ret = adjtimex(&tx);
+ if (tx.freq == outofrange_freq[i]) {
+ printf("[FAIL]\n");
+ printf("ERROR: out of range value %ld actually set!\n",
+ tx.freq);
+ pass = -1;
+ goto out;
+ }
+ }
+
+
+ if (sizeof(long) == 8) { /* this case only applies to 64bit systems */
+ for (i = 0; i < NUM_FREQ_INVALID; i++) {
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = invalid_freq[i];
+ ret = adjtimex(&tx);
+ if (ret >= 0) {
+ printf("[FAIL]\n");
+ printf("Error: No failure on invalid ADJ_FREQUENCY %ld\n",
+ invalid_freq[i]);
+ pass = -1;
+ goto out;
+ }
+ }
+ }
+
+ printf("[OK]\n");
+out:
+ /* reset freq to zero */
+ tx.modes = ADJ_FREQUENCY;
+ tx.freq = 0;
+ ret = adjtimex(&tx);
+
+ return pass;
+}
+
+
+int set_offset(long long offset, int use_nano)
+{
+ struct timex tmx = {};
+ int ret;
+
+ tmx.modes = ADJ_SETOFFSET;
+ if (use_nano) {
+ tmx.modes |= ADJ_NANO;
+
+ tmx.time.tv_sec = offset / NSEC_PER_SEC;
+ tmx.time.tv_usec = offset % NSEC_PER_SEC;
+
+ if (offset < 0 && tmx.time.tv_usec) {
+ tmx.time.tv_sec -= 1;
+ tmx.time.tv_usec += NSEC_PER_SEC;
+ }
+ } else {
+ tmx.time.tv_sec = offset / USEC_PER_SEC;
+ tmx.time.tv_usec = offset % USEC_PER_SEC;
+
+ if (offset < 0 && tmx.time.tv_usec) {
+ tmx.time.tv_sec -= 1;
+ tmx.time.tv_usec += USEC_PER_SEC;
+ }
+ }
+
+ ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+ if (ret < 0) {
+ printf("(sec: %ld usec: %ld) ", tmx.time.tv_sec, tmx.time.tv_usec);
+ printf("[FAIL]\n");
+ return -1;
+ }
+ return 0;
+}
+
+int set_bad_offset(long sec, long usec, int use_nano)
+{
+ struct timex tmx = {};
+ int ret;
+
+ tmx.modes = ADJ_SETOFFSET;
+ if (use_nano)
+ tmx.modes |= ADJ_NANO;
+
+ tmx.time.tv_sec = sec;
+ tmx.time.tv_usec = usec;
+ ret = clock_adjtime(CLOCK_REALTIME, &tmx);
+ if (ret >= 0) {
+ printf("Invalid (sec: %ld usec: %ld) did not fail! ", tmx.time.tv_sec, tmx.time.tv_usec);
+ printf("[FAIL]\n");
+ return -1;
+ }
+ return 0;
+}
+
+int validate_set_offset(void)
+{
+ printf("Testing ADJ_SETOFFSET... ");
+ fflush(stdout);
+
+ /* Test valid values */
+ if (set_offset(NSEC_PER_SEC - 1, 1))
+ return -1;
+
+ if (set_offset(-NSEC_PER_SEC + 1, 1))
+ return -1;
+
+ if (set_offset(-NSEC_PER_SEC - 1, 1))
+ return -1;
+
+ if (set_offset(5 * NSEC_PER_SEC, 1))
+ return -1;
+
+ if (set_offset(-5 * NSEC_PER_SEC, 1))
+ return -1;
+
+ if (set_offset(5 * NSEC_PER_SEC + NSEC_PER_SEC / 2, 1))
+ return -1;
+
+ if (set_offset(-5 * NSEC_PER_SEC - NSEC_PER_SEC / 2, 1))
+ return -1;
+
+ if (set_offset(USEC_PER_SEC - 1, 0))
+ return -1;
+
+ if (set_offset(-USEC_PER_SEC + 1, 0))
+ return -1;
+
+ if (set_offset(-USEC_PER_SEC - 1, 0))
+ return -1;
+
+ if (set_offset(5 * USEC_PER_SEC, 0))
+ return -1;
+
+ if (set_offset(-5 * USEC_PER_SEC, 0))
+ return -1;
+
+ if (set_offset(5 * USEC_PER_SEC + USEC_PER_SEC / 2, 0))
+ return -1;
+
+ if (set_offset(-5 * USEC_PER_SEC - USEC_PER_SEC / 2, 0))
+ return -1;
+
+ /* Test invalid values */
+ if (set_bad_offset(0, -1, 1))
+ return -1;
+ if (set_bad_offset(0, -1, 0))
+ return -1;
+ if (set_bad_offset(0, 2 * NSEC_PER_SEC, 1))
+ return -1;
+ if (set_bad_offset(0, 2 * USEC_PER_SEC, 0))
+ return -1;
+ if (set_bad_offset(0, NSEC_PER_SEC, 1))
+ return -1;
+ if (set_bad_offset(0, USEC_PER_SEC, 0))
+ return -1;
+ if (set_bad_offset(0, -NSEC_PER_SEC, 1))
+ return -1;
+ if (set_bad_offset(0, -USEC_PER_SEC, 0))
+ return -1;
+
+ printf("[OK]\n");
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ if (validate_freq())
+ return ksft_exit_fail();
+
+ if (validate_set_offset())
+ return ksft_exit_fail();
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/uevent/Makefile b/tools/testing/selftests/uevent/Makefile
new file mode 100644
index 000000000..f7baa9aa2
--- /dev/null
+++ b/tools/testing/selftests/uevent/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+.PHONY: all clean
+
+BINARIES := uevent_filtering
+CFLAGS += -Wl,-no-as-needed -Wall
+
+uevent_filtering: uevent_filtering.c ../kselftest.h ../kselftest_harness.h
+ $(CC) $(CFLAGS) $< -o $@
+
+TEST_PROGS += $(BINARIES)
+EXTRA_CLEAN := $(BINARIES)
+
+all: $(BINARIES)
diff --git a/tools/testing/selftests/uevent/config b/tools/testing/selftests/uevent/config
new file mode 100644
index 000000000..1038f4515
--- /dev/null
+++ b/tools/testing/selftests/uevent/config
@@ -0,0 +1,2 @@
+CONFIG_USER_NS=y
+CONFIG_NET=y
diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c
new file mode 100644
index 000000000..f83391aa4
--- /dev/null
+++ b/tools/testing/selftests/uevent/uevent_filtering.c
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/netlink.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sched.h>
+#include <sys/eventfd.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent"
+#define __UEVENT_BUFFER_SIZE (2048 * 2)
+#define __UEVENT_HEADER "add@/devices/virtual/mem/full"
+#define __UEVENT_HEADER_LEN sizeof("add@/devices/virtual/mem/full")
+#define __UEVENT_LISTEN_ALL -1
+
+ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+ ssize_t ret;
+
+again:
+ ret = read(fd, buf, count);
+ if (ret < 0 && errno == EINTR)
+ goto again;
+
+ return ret;
+}
+
+ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+again:
+ ret = write(fd, buf, count);
+ if (ret < 0 && errno == EINTR)
+ goto again;
+
+ return ret;
+}
+
+int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (ret != pid)
+ goto again;
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
+ return -1;
+
+ return 0;
+}
+
+static int uevent_listener(unsigned long post_flags, bool expect_uevent,
+ int sync_fd)
+{
+ int sk_fd, ret;
+ socklen_t sk_addr_len;
+ int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE;
+ uint64_t sync_add = 1;
+ struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 };
+ char buf[__UEVENT_BUFFER_SIZE] = { 0 };
+ struct iovec iov = { buf, __UEVENT_BUFFER_SIZE };
+ char control[CMSG_SPACE(sizeof(struct ucred))];
+ struct msghdr hdr = {
+ &rcv_addr, sizeof(rcv_addr), &iov, 1,
+ control, sizeof(control), 0,
+ };
+
+ sk_fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC,
+ NETLINK_KOBJECT_UEVENT);
+ if (sk_fd < 0) {
+ fprintf(stderr, "%s - Failed to open uevent socket\n", strerror(errno));
+ return -1;
+ }
+
+ ret = setsockopt(sk_fd, SOL_SOCKET, SO_RCVBUF, &rcv_buf_sz,
+ sizeof(rcv_buf_sz));
+ if (ret < 0) {
+ fprintf(stderr, "%s - Failed to set socket options\n", strerror(errno));
+ goto on_error;
+ }
+
+ sk_addr.nl_family = AF_NETLINK;
+ sk_addr.nl_groups = __UEVENT_LISTEN_ALL;
+
+ sk_addr_len = sizeof(sk_addr);
+ ret = bind(sk_fd, (struct sockaddr *)&sk_addr, sk_addr_len);
+ if (ret < 0) {
+ fprintf(stderr, "%s - Failed to bind socket\n", strerror(errno));
+ goto on_error;
+ }
+
+ ret = getsockname(sk_fd, (struct sockaddr *)&sk_addr, &sk_addr_len);
+ if (ret < 0) {
+ fprintf(stderr, "%s - Failed to retrieve socket name\n", strerror(errno));
+ goto on_error;
+ }
+
+ if ((size_t)sk_addr_len != sizeof(sk_addr)) {
+ fprintf(stderr, "Invalid socket address size\n");
+ goto on_error;
+ }
+
+ if (post_flags & CLONE_NEWUSER) {
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s - Failed to unshare user namespace\n",
+ strerror(errno));
+ goto on_error;
+ }
+ }
+
+ if (post_flags & CLONE_NEWNET) {
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s - Failed to unshare network namespace\n",
+ strerror(errno));
+ goto on_error;
+ }
+ }
+
+ ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add));
+ close(sync_fd);
+ if (ret != sizeof(sync_add)) {
+ fprintf(stderr, "Failed to synchronize with parent process\n");
+ goto on_error;
+ }
+
+ fret = 0;
+ for (;;) {
+ ssize_t r;
+
+ r = recvmsg(sk_fd, &hdr, 0);
+ if (r <= 0) {
+ fprintf(stderr, "%s - Failed to receive uevent\n", strerror(errno));
+ ret = -1;
+ break;
+ }
+
+ /* ignore libudev messages */
+ if (memcmp(buf, "libudev", 8) == 0)
+ continue;
+
+ /* ignore uevents we didn't trigger */
+ if (memcmp(buf, __UEVENT_HEADER, __UEVENT_HEADER_LEN) != 0)
+ continue;
+
+ if (!expect_uevent) {
+ fprintf(stderr, "Received unexpected uevent:\n");
+ ret = -1;
+ }
+
+ if (TH_LOG_ENABLED) {
+ /* If logging is enabled dump the received uevent. */
+ (void)write_nointr(STDERR_FILENO, buf, r);
+ (void)write_nointr(STDERR_FILENO, "\n", 1);
+ }
+
+ break;
+ }
+
+on_error:
+ close(sk_fd);
+
+ return fret;
+}
+
+int trigger_uevent(unsigned int times)
+{
+ int fd, ret;
+ unsigned int i;
+
+ fd = open(__DEV_FULL, O_RDWR | O_CLOEXEC);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ return -EINVAL;
+
+ return -1;
+ }
+
+ for (i = 0; i < times; i++) {
+ ret = write_nointr(fd, "add\n", sizeof("add\n") - 1);
+ if (ret < 0) {
+ fprintf(stderr, "Failed to trigger uevent\n");
+ break;
+ }
+ }
+ close(fd);
+
+ return ret;
+}
+
+int set_death_signal(void)
+{
+ int ret;
+ pid_t ppid;
+
+ ret = prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0);
+
+ /* Check whether we have been orphaned. */
+ ppid = getppid();
+ if (ppid == 1) {
+ pid_t self;
+
+ self = getpid();
+ ret = kill(self, SIGKILL);
+ }
+
+ if (ret < 0)
+ return -1;
+
+ return 0;
+}
+
+static int do_test(unsigned long pre_flags, unsigned long post_flags,
+ bool expect_uevent, int sync_fd)
+{
+ int ret;
+ uint64_t wait_val;
+ pid_t pid;
+ sigset_t mask;
+ sigset_t orig_mask;
+ struct timespec timeout;
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIGCHLD);
+
+ ret = sigprocmask(SIG_BLOCK, &mask, &orig_mask);
+ if (ret < 0) {
+ fprintf(stderr, "%s- Failed to block SIGCHLD\n", strerror(errno));
+ return -1;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ fprintf(stderr, "%s - Failed to fork() new process\n", strerror(errno));
+ return -1;
+ }
+
+ if (pid == 0) {
+ /* Make sure that we go away when our parent dies. */
+ ret = set_death_signal();
+ if (ret < 0) {
+ fprintf(stderr, "Failed to set PR_SET_PDEATHSIG to SIGKILL\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ if (pre_flags & CLONE_NEWUSER) {
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s - Failed to unshare user namespace\n",
+ strerror(errno));
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (pre_flags & CLONE_NEWNET) {
+ ret = unshare(CLONE_NEWNET);
+ if (ret < 0) {
+ fprintf(stderr,
+ "%s - Failed to unshare network namespace\n",
+ strerror(errno));
+ _exit(EXIT_FAILURE);
+ }
+ }
+
+ if (uevent_listener(post_flags, expect_uevent, sync_fd) < 0)
+ _exit(EXIT_FAILURE);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ ret = read_nointr(sync_fd, &wait_val, sizeof(wait_val));
+ if (ret != sizeof(wait_val)) {
+ fprintf(stderr, "Failed to synchronize with child process\n");
+ _exit(EXIT_FAILURE);
+ }
+
+ /* Trigger 10 uevents to account for the case where the kernel might
+ * drop some.
+ */
+ ret = trigger_uevent(10);
+ if (ret < 0)
+ fprintf(stderr, "Failed triggering uevents\n");
+
+ /* Wait for 2 seconds before considering this failed. This should be
+ * plenty of time for the kernel to deliver the uevent even under heavy
+ * load.
+ */
+ timeout.tv_sec = 2;
+ timeout.tv_nsec = 0;
+
+again:
+ ret = sigtimedwait(&mask, NULL, &timeout);
+ if (ret < 0) {
+ if (errno == EINTR)
+ goto again;
+
+ if (!expect_uevent)
+ ret = kill(pid, SIGTERM); /* success */
+ else
+ ret = kill(pid, SIGUSR1); /* error */
+ if (ret < 0)
+ return -1;
+ }
+
+ ret = wait_for_pid(pid);
+ if (ret < 0)
+ return -1;
+
+ return ret;
+}
+
+static void signal_handler(int sig)
+{
+ if (sig == SIGTERM)
+ _exit(EXIT_SUCCESS);
+
+ _exit(EXIT_FAILURE);
+}
+
+TEST(uevent_filtering)
+{
+ int ret, sync_fd;
+ struct sigaction act;
+
+ if (geteuid()) {
+ TH_LOG("Uevent filtering tests require root privileges. Skipping test");
+ _exit(KSFT_SKIP);
+ }
+
+ ret = access(__DEV_FULL, F_OK);
+ EXPECT_EQ(0, ret) {
+ if (errno == ENOENT) {
+ TH_LOG(__DEV_FULL " does not exist. Skipping test");
+ _exit(KSFT_SKIP);
+ }
+
+ _exit(KSFT_FAIL);
+ }
+
+ act.sa_handler = signal_handler;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+
+ ret = sigaction(SIGTERM, &act, NULL);
+ ASSERT_EQ(0, ret);
+
+ sync_fd = eventfd(0, EFD_CLOEXEC);
+ ASSERT_GE(sync_fd, 0);
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in initial network namespace owned by
+ * initial user namespace.
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(0, 0, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in non-initial network namespace
+ * owned by initial user namespace.
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(CLONE_NEWNET, 0, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - unshare user namespace
+ * - Open uevent listening socket in initial network namespace
+ * owned by initial user namespace.
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(CLONE_NEWUSER, 0, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in non-initial network namespace
+ * owned by non-initial user namespace.
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives no uevent
+ */
+ ret = do_test(CLONE_NEWUSER | CLONE_NEWNET, 0, false, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in initial network namespace
+ * owned by initial user namespace.
+ * - unshare network namespace
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(0, CLONE_NEWNET, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in initial network namespace
+ * owned by initial user namespace.
+ * - unshare user namespace
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(0, CLONE_NEWUSER, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+ /*
+ * Setup:
+ * - Open uevent listening socket in initial network namespace
+ * owned by initial user namespace.
+ * - unshare user namespace
+ * - unshare network namespace
+ * - Trigger uevent in initial network namespace owned by initial user
+ * namespace.
+ * Expected Result:
+ * - uevent listening socket receives uevent
+ */
+ ret = do_test(0, CLONE_NEWUSER | CLONE_NEWNET, true, sync_fd);
+ ASSERT_EQ(0, ret) {
+ goto do_cleanup;
+ }
+
+do_cleanup:
+ close(sync_fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/user/Makefile b/tools/testing/selftests/user/Makefile
new file mode 100644
index 000000000..d401b63c5
--- /dev/null
+++ b/tools/testing/selftests/user/Makefile
@@ -0,0 +1,8 @@
+# Makefile for user memory selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+TEST_PROGS := test_user_copy.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/user/config b/tools/testing/selftests/user/config
new file mode 100644
index 000000000..784ed8416
--- /dev/null
+++ b/tools/testing/selftests/user/config
@@ -0,0 +1 @@
+CONFIG_TEST_USER_COPY=m
diff --git a/tools/testing/selftests/user/test_user_copy.sh b/tools/testing/selftests/user/test_user_copy.sh
new file mode 100755
index 000000000..f9b31a574
--- /dev/null
+++ b/tools/testing/selftests/user/test_user_copy.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs copy_to/from_user infrastructure using test_user_copy kernel module
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_user_copy; then
+ echo "user: module test_user_copy is not found [SKIP]"
+ exit $ksft_skip
+fi
+if /sbin/modprobe -q test_user_copy; then
+ /sbin/modprobe -q -r test_user_copy
+ echo "user_copy: ok"
+else
+ echo "user_copy: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore
new file mode 100644
index 000000000..133bf9ee9
--- /dev/null
+++ b/tools/testing/selftests/vDSO/.gitignore
@@ -0,0 +1,2 @@
+vdso_test
+vdso_standalone_test_x86
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
new file mode 100644
index 000000000..9e03d61f5
--- /dev/null
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../lib.mk
+
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+TEST_GEN_PROGS := $(OUTPUT)/vdso_test
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
+endif
+
+ifndef CROSS_COMPILE
+CFLAGS := -std=gnu99
+CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+ifeq ($(CONFIG_X86_32),y)
+LDLIBS += -lgcc_s
+endif
+
+all: $(TEST_GEN_PROGS)
+$(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c
+$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
+ $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
+ vdso_standalone_test_x86.c parse_vdso.c \
+ -o $@
+
+endif
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
new file mode 100644
index 000000000..1dbb4b872
--- /dev/null
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -0,0 +1,269 @@
+/*
+ * parse_vdso.c: Linux reference vDSO parser
+ * Written by Andrew Lutomirski, 2011-2014.
+ *
+ * This code is meant to be linked in to various programs that run on Linux.
+ * As such, it is available with as few restrictions as possible. This file
+ * is licensed under the Creative Commons Zero License, version 1.0,
+ * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode
+ *
+ * The vDSO is a regular ELF DSO that the kernel maps into user space when
+ * it starts a program. It works equally well in statically and dynamically
+ * linked binaries.
+ *
+ * This code is tested on x86. In principle it should work on any
+ * architecture that has a vDSO.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+#include <elf.h>
+
+/*
+ * To use this vDSO parser, first call one of the vdso_init_* functions.
+ * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
+ * to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv.
+ * Then call vdso_sym for each symbol you want. For example, to look up
+ * gettimeofday on x86_64, use:
+ *
+ * <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
+ * or
+ * <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+ *
+ * vdso_sym will return 0 if the symbol doesn't exist or if the init function
+ * failed or was not called. vdso_sym is a little slow, so its return value
+ * should be cached.
+ *
+ * vdso_sym is threadsafe; the init functions are not.
+ *
+ * These are the prototypes:
+ */
+extern void vdso_init_from_auxv(void *auxv);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void *vdso_sym(const char *version, const char *name);
+
+
+/* And here's the code. */
+#ifndef ELF_BITS
+# if ULONG_MAX > 0xffffffffUL
+# define ELF_BITS 64
+# else
+# define ELF_BITS 32
+# endif
+#endif
+
+#define ELF_BITS_XFORM2(bits, x) Elf##bits##_##x
+#define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x)
+#define ELF(x) ELF_BITS_XFORM(ELF_BITS, x)
+
+static struct vdso_info
+{
+ bool valid;
+
+ /* Load information */
+ uintptr_t load_addr;
+ uintptr_t load_offset; /* load_addr - recorded vaddr */
+
+ /* Symbol table */
+ ELF(Sym) *symtab;
+ const char *symstrings;
+ ELF(Word) *bucket, *chain;
+ ELF(Word) nbucket, nchain;
+
+ /* Version table */
+ ELF(Versym) *versym;
+ ELF(Verdef) *verdef;
+} vdso_info;
+
+/* Straight from the ELF specification. */
+static unsigned long elf_hash(const unsigned char *name)
+{
+ unsigned long h = 0, g;
+ while (*name)
+ {
+ h = (h << 4) + *name++;
+ if (g = h & 0xf0000000)
+ h ^= g >> 24;
+ h &= ~g;
+ }
+ return h;
+}
+
+void vdso_init_from_sysinfo_ehdr(uintptr_t base)
+{
+ size_t i;
+ bool found_vaddr = false;
+
+ vdso_info.valid = false;
+
+ vdso_info.load_addr = base;
+
+ ELF(Ehdr) *hdr = (ELF(Ehdr)*)base;
+ if (hdr->e_ident[EI_CLASS] !=
+ (ELF_BITS == 32 ? ELFCLASS32 : ELFCLASS64)) {
+ return; /* Wrong ELF class -- check ELF_BITS */
+ }
+
+ ELF(Phdr) *pt = (ELF(Phdr)*)(vdso_info.load_addr + hdr->e_phoff);
+ ELF(Dyn) *dyn = 0;
+
+ /*
+ * We need two things from the segment table: the load offset
+ * and the dynamic table.
+ */
+ for (i = 0; i < hdr->e_phnum; i++)
+ {
+ if (pt[i].p_type == PT_LOAD && !found_vaddr) {
+ found_vaddr = true;
+ vdso_info.load_offset = base
+ + (uintptr_t)pt[i].p_offset
+ - (uintptr_t)pt[i].p_vaddr;
+ } else if (pt[i].p_type == PT_DYNAMIC) {
+ dyn = (ELF(Dyn)*)(base + pt[i].p_offset);
+ }
+ }
+
+ if (!found_vaddr || !dyn)
+ return; /* Failed */
+
+ /*
+ * Fish out the useful bits of the dynamic table.
+ */
+ ELF(Word) *hash = 0;
+ vdso_info.symstrings = 0;
+ vdso_info.symtab = 0;
+ vdso_info.versym = 0;
+ vdso_info.verdef = 0;
+ for (i = 0; dyn[i].d_tag != DT_NULL; i++) {
+ switch (dyn[i].d_tag) {
+ case DT_STRTAB:
+ vdso_info.symstrings = (const char *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ case DT_SYMTAB:
+ vdso_info.symtab = (ELF(Sym) *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ case DT_HASH:
+ hash = (ELF(Word) *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ case DT_VERSYM:
+ vdso_info.versym = (ELF(Versym) *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ case DT_VERDEF:
+ vdso_info.verdef = (ELF(Verdef) *)
+ ((uintptr_t)dyn[i].d_un.d_ptr
+ + vdso_info.load_offset);
+ break;
+ }
+ }
+ if (!vdso_info.symstrings || !vdso_info.symtab || !hash)
+ return; /* Failed */
+
+ if (!vdso_info.verdef)
+ vdso_info.versym = 0;
+
+ /* Parse the hash table header. */
+ vdso_info.nbucket = hash[0];
+ vdso_info.nchain = hash[1];
+ vdso_info.bucket = &hash[2];
+ vdso_info.chain = &hash[vdso_info.nbucket + 2];
+
+ /* That's all we need. */
+ vdso_info.valid = true;
+}
+
+static bool vdso_match_version(ELF(Versym) ver,
+ const char *name, ELF(Word) hash)
+{
+ /*
+ * This is a helper function to check if the version indexed by
+ * ver matches name (which hashes to hash).
+ *
+ * The version definition table is a mess, and I don't know how
+ * to do this in better than linear time without allocating memory
+ * to build an index. I also don't know why the table has
+ * variable size entries in the first place.
+ *
+ * For added fun, I can't find a comprehensible specification of how
+ * to parse all the weird flags in the table.
+ *
+ * So I just parse the whole table every time.
+ */
+
+ /* First step: find the version definition */
+ ver &= 0x7fff; /* Apparently bit 15 means "hidden" */
+ ELF(Verdef) *def = vdso_info.verdef;
+ while(true) {
+ if ((def->vd_flags & VER_FLG_BASE) == 0
+ && (def->vd_ndx & 0x7fff) == ver)
+ break;
+
+ if (def->vd_next == 0)
+ return false; /* No definition. */
+
+ def = (ELF(Verdef) *)((char *)def + def->vd_next);
+ }
+
+ /* Now figure out whether it matches. */
+ ELF(Verdaux) *aux = (ELF(Verdaux)*)((char *)def + def->vd_aux);
+ return def->vd_hash == hash
+ && !strcmp(name, vdso_info.symstrings + aux->vda_name);
+}
+
+void *vdso_sym(const char *version, const char *name)
+{
+ unsigned long ver_hash;
+ if (!vdso_info.valid)
+ return 0;
+
+ ver_hash = elf_hash(version);
+ ELF(Word) chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
+
+ for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
+ ELF(Sym) *sym = &vdso_info.symtab[chain];
+
+ /* Check for a defined global or weak function w/ right name. */
+ if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
+ continue;
+ if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
+ ELF64_ST_BIND(sym->st_info) != STB_WEAK)
+ continue;
+ if (sym->st_shndx == SHN_UNDEF)
+ continue;
+ if (strcmp(name, vdso_info.symstrings + sym->st_name))
+ continue;
+
+ /* Check symbol version. */
+ if (vdso_info.versym
+ && !vdso_match_version(vdso_info.versym[chain],
+ version, ver_hash))
+ continue;
+
+ return (void *)(vdso_info.load_offset + sym->st_value);
+ }
+
+ return 0;
+}
+
+void vdso_init_from_auxv(void *auxv)
+{
+ ELF(auxv_t) *elf_auxv = auxv;
+ for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
+ {
+ if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
+ vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val);
+ return;
+ }
+ }
+
+ vdso_info.valid = false;
+}
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
new file mode 100644
index 000000000..93b0ebf8c
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -0,0 +1,128 @@
+/*
+ * vdso_test.c: Sample code to test parse_vdso.c on x86
+ * Copyright (c) 2011-2014 Andy Lutomirski
+ * Subject to the GNU General Public License, version 2
+ *
+ * You can amuse yourself by compiling with:
+ * gcc -std=gnu99 -nostdlib
+ * -Os -fno-asynchronous-unwind-tables -flto -lgcc_s
+ * vdso_standalone_test_x86.c parse_vdso.c
+ * to generate a small binary. On x86_64, you can omit -lgcc_s
+ * if you want the binary to be completely standalone.
+ */
+
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <stdint.h>
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+/* We need a libc functions... */
+int strcmp(const char *a, const char *b)
+{
+ /* This implementation is buggy: it never returns -1. */
+ while (*a || *b) {
+ if (*a != *b)
+ return 1;
+ if (*a == 0 || *b == 0)
+ return 1;
+ a++;
+ b++;
+ }
+
+ return 0;
+}
+
+/* ...and two syscalls. This is x86-specific. */
+static inline long x86_syscall3(long nr, long a0, long a1, long a2)
+{
+ long ret;
+#ifdef __x86_64__
+ asm volatile ("syscall" : "=a" (ret) : "a" (nr),
+ "D" (a0), "S" (a1), "d" (a2) :
+ "cc", "memory", "rcx",
+ "r8", "r9", "r10", "r11" );
+#else
+ asm volatile ("int $0x80" : "=a" (ret) : "a" (nr),
+ "b" (a0), "c" (a1), "d" (a2) :
+ "cc", "memory" );
+#endif
+ return ret;
+}
+
+static inline long linux_write(int fd, const void *data, size_t len)
+{
+ return x86_syscall3(__NR_write, fd, (long)data, (long)len);
+}
+
+static inline void linux_exit(int code)
+{
+ x86_syscall3(__NR_exit, code, 0, 0);
+}
+
+void to_base10(char *lastdig, time_t n)
+{
+ while (n) {
+ *lastdig = (n % 10) + '0';
+ n /= 10;
+ lastdig--;
+ }
+}
+
+__attribute__((externally_visible)) void c_main(void **stack)
+{
+ /* Parse the stack */
+ long argc = (long)*stack;
+ stack += argc + 2;
+
+ /* Now we're pointing at the environment. Skip it. */
+ while(*stack)
+ stack++;
+ stack++;
+
+ /* Now we're pointing at auxv. Initialize the vDSO parser. */
+ vdso_init_from_auxv((void *)stack);
+
+ /* Find gettimeofday. */
+ typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+ gtod_t gtod = (gtod_t)vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+
+ if (!gtod)
+ linux_exit(1);
+
+ struct timeval tv;
+ long ret = gtod(&tv, 0);
+
+ if (ret == 0) {
+ char buf[] = "The time is .000000\n";
+ to_base10(buf + 31, tv.tv_sec);
+ to_base10(buf + 38, tv.tv_usec);
+ linux_write(1, buf, sizeof(buf) - 1);
+ } else {
+ linux_exit(ret);
+ }
+
+ linux_exit(0);
+}
+
+/*
+ * This is the real entry point. It passes the initial stack into
+ * the C entry point.
+ */
+asm (
+ ".text\n"
+ ".global _start\n"
+ ".type _start,@function\n"
+ "_start:\n\t"
+#ifdef __x86_64__
+ "mov %rsp,%rdi\n\t"
+ "jmp c_main"
+#else
+ "push %esp\n\t"
+ "call c_main\n\t"
+ "int $3"
+#endif
+ );
diff --git a/tools/testing/selftests/vDSO/vdso_test.c b/tools/testing/selftests/vDSO/vdso_test.c
new file mode 100644
index 000000000..eda53f833
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test.c
@@ -0,0 +1,68 @@
+/*
+ * vdso_test.c: Sample code to test parse_vdso.c
+ * Copyright (c) 2014 Andy Lutomirski
+ * Subject to the GNU General Public License, version 2
+ *
+ * Compile with:
+ * gcc -std=gnu99 vdso_test.c parse_vdso.c
+ *
+ * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too.
+ */
+
+#include <stdint.h>
+#include <elf.h>
+#include <stdio.h>
+#include <sys/auxv.h>
+#include <sys/time.h>
+
+#include "../kselftest.h"
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+/*
+ * ARM64's vDSO exports its gettimeofday() implementation with a different
+ * name and version from other architectures, so we need to handle it as
+ * a special case.
+ */
+#if defined(__aarch64__)
+const char *version = "LINUX_2.6.39";
+const char *name = "__kernel_gettimeofday";
+#else
+const char *version = "LINUX_2.6";
+const char *name = "__vdso_gettimeofday";
+#endif
+
+int main(int argc, char **argv)
+{
+ unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+ if (!sysinfo_ehdr) {
+ printf("AT_SYSINFO_EHDR is not present!\n");
+ return KSFT_SKIP;
+ }
+
+ vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
+
+ /* Find gettimeofday. */
+ typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+ gtod_t gtod = (gtod_t)vdso_sym(version, name);
+
+ if (!gtod) {
+ printf("Could not find %s\n", name);
+ return KSFT_SKIP;
+ }
+
+ struct timeval tv;
+ long ret = gtod(&tv, 0);
+
+ if (ret == 0) {
+ printf("The time is %lld.%06lld\n",
+ (long long)tv.tv_sec, (long long)tv.tv_usec);
+ } else {
+ printf("%s failed\n", name);
+ return KSFT_FAIL;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
new file mode 100644
index 000000000..af5ff83f6
--- /dev/null
+++ b/tools/testing/selftests/vm/.gitignore
@@ -0,0 +1,15 @@
+hugepage-mmap
+hugepage-shm
+map_hugetlb
+map_populate
+thuge-gen
+compaction_test
+mlock2-tests
+on-fault-limit
+transhuge-stress
+userfaultfd
+mlock-intersect-test
+mlock-random-test
+virtual_address_range
+gup_benchmark
+va_128TBswitch
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
new file mode 100644
index 000000000..2cf3dc49b
--- /dev/null
+++ b/tools/testing/selftests/vm/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for vm selftests
+
+ifndef OUTPUT
+ OUTPUT := $(shell pwd)
+endif
+
+CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
+LDLIBS = -lrt
+TEST_GEN_FILES = compaction_test
+TEST_GEN_FILES += gup_benchmark
+TEST_GEN_FILES += hugepage-mmap
+TEST_GEN_FILES += hugepage-shm
+TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += map_populate
+TEST_GEN_FILES += mlock-random-test
+TEST_GEN_FILES += mlock2-tests
+TEST_GEN_FILES += on-fault-limit
+TEST_GEN_FILES += thuge-gen
+TEST_GEN_FILES += transhuge-stress
+TEST_GEN_FILES += userfaultfd
+TEST_GEN_FILES += va_128TBswitch
+TEST_GEN_FILES += virtual_address_range
+
+TEST_PROGS := run_vmtests
+
+TEST_FILES := test_vmalloc.sh
+
+KSFT_KHDR_INSTALL := 1
+include ../lib.mk
+
+$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
+
+$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
new file mode 100644
index 000000000..bcec71250
--- /dev/null
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *
+ * A test for the patch "Allow compaction of unevictable pages".
+ * With this patch we should be able to allocate at least 1/4
+ * of RAM in huge pages. Without the patch much less is
+ * allocated.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "../kselftest.h"
+
+#define MAP_SIZE 1048576
+
+struct map_list {
+ void *map;
+ struct map_list *next;
+};
+
+int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize)
+{
+ char buffer[256] = {0};
+ char *cmd = "cat /proc/meminfo | grep -i memfree | grep -o '[0-9]*'";
+ FILE *cmdfile = popen(cmd, "r");
+
+ if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+ perror("Failed to read meminfo\n");
+ return -1;
+ }
+
+ pclose(cmdfile);
+
+ *memfree = atoll(buffer);
+ cmd = "cat /proc/meminfo | grep -i hugepagesize | grep -o '[0-9]*'";
+ cmdfile = popen(cmd, "r");
+
+ if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+ perror("Failed to read meminfo\n");
+ return -1;
+ }
+
+ pclose(cmdfile);
+ *hugepagesize = atoll(buffer);
+
+ return 0;
+}
+
+int prereq(void)
+{
+ char allowed;
+ int fd;
+
+ fd = open("/proc/sys/vm/compact_unevictable_allowed",
+ O_RDONLY | O_NONBLOCK);
+ if (fd < 0) {
+ perror("Failed to open\n"
+ "/proc/sys/vm/compact_unevictable_allowed\n");
+ return -1;
+ }
+
+ if (read(fd, &allowed, sizeof(char)) != sizeof(char)) {
+ perror("Failed to read from\n"
+ "/proc/sys/vm/compact_unevictable_allowed\n");
+ close(fd);
+ return -1;
+ }
+
+ close(fd);
+ if (allowed == '1')
+ return 0;
+
+ return -1;
+}
+
+int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
+{
+ int fd;
+ int compaction_index = 0;
+ char initial_nr_hugepages[10] = {0};
+ char nr_hugepages[10] = {0};
+
+ /* We want to test with 80% of available memory. Else, OOM killer comes
+ in to play */
+ mem_free = mem_free * 0.8;
+
+ fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
+ if (fd < 0) {
+ perror("Failed to open /proc/sys/vm/nr_hugepages");
+ return -1;
+ }
+
+ if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) {
+ perror("Failed to read from /proc/sys/vm/nr_hugepages");
+ goto close_fd;
+ }
+
+ /* Start with the initial condition of 0 huge pages*/
+ if (write(fd, "0", sizeof(char)) != sizeof(char)) {
+ perror("Failed to write 0 to /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ /* Request a large number of huge pages. The Kernel will allocate
+ as much as it can */
+ if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
+ perror("Failed to write 100000 to /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
+ perror("Failed to re-read from /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ /* We should have been able to request at least 1/3 rd of the memory in
+ huge pages */
+ compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size);
+
+ if (compaction_index > 3) {
+ printf("No of huge pages allocated = %d\n",
+ (atoi(nr_hugepages)));
+ fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n"
+ "as huge pages\n", compaction_index);
+ goto close_fd;
+ }
+
+ printf("No of huge pages allocated = %d\n",
+ (atoi(nr_hugepages)));
+
+ lseek(fd, 0, SEEK_SET);
+
+ if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages))
+ != strlen(initial_nr_hugepages)) {
+ perror("Failed to write value to /proc/sys/vm/nr_hugepages\n");
+ goto close_fd;
+ }
+
+ close(fd);
+ return 0;
+
+ close_fd:
+ close(fd);
+ printf("Not OK. Compaction test failed.");
+ return -1;
+}
+
+
+int main(int argc, char **argv)
+{
+ struct rlimit lim;
+ struct map_list *list, *entry;
+ size_t page_size, i;
+ void *map = NULL;
+ unsigned long mem_free = 0;
+ unsigned long hugepage_size = 0;
+ unsigned long mem_fragmentable = 0;
+
+ if (prereq() != 0) {
+ printf("Either the sysctl compact_unevictable_allowed is not\n"
+ "set to 1 or couldn't read the proc file.\n"
+ "Skipping the test\n");
+ return KSFT_SKIP;
+ }
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ if (setrlimit(RLIMIT_MEMLOCK, &lim)) {
+ perror("Failed to set rlimit:\n");
+ return -1;
+ }
+
+ page_size = getpagesize();
+
+ list = NULL;
+
+ if (read_memory_info(&mem_free, &hugepage_size) != 0) {
+ printf("ERROR: Cannot read meminfo\n");
+ return -1;
+ }
+
+ mem_fragmentable = mem_free * 0.8 / 1024;
+
+ while (mem_fragmentable > 0) {
+ map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0);
+ if (map == MAP_FAILED)
+ break;
+
+ entry = malloc(sizeof(struct map_list));
+ if (!entry) {
+ munmap(map, MAP_SIZE);
+ break;
+ }
+ entry->map = map;
+ entry->next = list;
+ list = entry;
+
+ /* Write something (in this case the address of the map) to
+ * ensure that KSM can't merge the mapped pages
+ */
+ for (i = 0; i < MAP_SIZE; i += page_size)
+ *(unsigned long *)(map + i) = (unsigned long)map + i;
+
+ mem_fragmentable--;
+ }
+
+ for (entry = list; entry != NULL; entry = entry->next) {
+ munmap(entry->map, MAP_SIZE);
+ if (!entry->next)
+ break;
+ entry = entry->next;
+ }
+
+ if (check_compaction(mem_free, hugepage_size) == 0)
+ return 0;
+
+ return -1;
+}
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
new file mode 100644
index 000000000..1c0d76cb5
--- /dev/null
+++ b/tools/testing/selftests/vm/config
@@ -0,0 +1,2 @@
+CONFIG_SYSVIPC=y
+CONFIG_USERFAULTFD=y
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
new file mode 100644
index 000000000..17da711f2
--- /dev/null
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -0,0 +1,93 @@
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <linux/types.h>
+
+#define MB (1UL << 20)
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+
+#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)
+
+struct gup_benchmark {
+ __u64 delta_usec;
+ __u64 addr;
+ __u64 size;
+ __u32 nr_pages_per_call;
+ __u32 flags;
+ __u64 expansion[10]; /* For future use */
+};
+
+int main(int argc, char **argv)
+{
+ struct gup_benchmark gup;
+ unsigned long size = 128 * MB;
+ int i, fd, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
+ char *p;
+
+ while ((opt = getopt(argc, argv, "m:r:n:tT")) != -1) {
+ switch (opt) {
+ case 'm':
+ size = atoi(optarg) * MB;
+ break;
+ case 'r':
+ repeats = atoi(optarg);
+ break;
+ case 'n':
+ nr_pages = atoi(optarg);
+ break;
+ case 't':
+ thp = 1;
+ break;
+ case 'T':
+ thp = 0;
+ break;
+ case 'w':
+ write = 1;
+ break;
+ default:
+ return -1;
+ }
+ }
+
+ gup.nr_pages_per_call = nr_pages;
+ gup.flags = write;
+
+ fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR);
+ if (fd == -1)
+ perror("open"), exit(1);
+
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (p == MAP_FAILED)
+ perror("mmap"), exit(1);
+ gup.addr = (unsigned long)p;
+
+ if (thp == 1)
+ madvise(p, size, MADV_HUGEPAGE);
+ else if (thp == 0)
+ madvise(p, size, MADV_NOHUGEPAGE);
+
+ for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE)
+ p[0] = 0;
+
+ for (i = 0; i < repeats; i++) {
+ gup.size = size;
+ if (ioctl(fd, GUP_FAST_BENCHMARK, &gup))
+ perror("ioctl"), exit(1);
+
+ printf("Time: %lld us", gup.delta_usec);
+ if (gup.size != size)
+ printf(", truncated (size: %lld)", gup.size);
+ printf("\n");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/vm/hugepage-mmap.c
new file mode 100644
index 000000000..93f9e7b81
--- /dev/null
+++ b/tools/testing/selftests/vm/hugepage-mmap.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hugepage-mmap:
+ *
+ * Example of using huge page memory in a user application using the mmap
+ * system call. Before running this application, make sure that the
+ * administrator has mounted the hugetlbfs filesystem (on some directory
+ * like /mnt) using the command mount -t hugetlbfs nodev /mnt. In this
+ * example, the app is requesting memory of size 256MB that is backed by
+ * huge pages.
+ *
+ * For the ia64 architecture, the Linux kernel reserves Region number 4 for
+ * huge pages. That means that if one requires a fixed address, a huge page
+ * aligned address starting with 0x800000... will be required. If a fixed
+ * address is not required, the kernel will select an address in the proper
+ * range.
+ * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#define FILE_NAME "huge/hugepagefile"
+#define LENGTH (256UL*1024*1024)
+#define PROTECTION (PROT_READ | PROT_WRITE)
+
+/* Only ia64 requires this */
+#ifdef __ia64__
+#define ADDR (void *)(0x8000000000000000UL)
+#define FLAGS (MAP_SHARED | MAP_FIXED)
+#else
+#define ADDR (void *)(0x0UL)
+#define FLAGS (MAP_SHARED)
+#endif
+
+static void check_bytes(char *addr)
+{
+ printf("First hex is %x\n", *((unsigned int *)addr));
+}
+
+static void write_bytes(char *addr)
+{
+ unsigned long i;
+
+ for (i = 0; i < LENGTH; i++)
+ *(addr + i) = (char)i;
+}
+
+static int read_bytes(char *addr)
+{
+ unsigned long i;
+
+ check_bytes(addr);
+ for (i = 0; i < LENGTH; i++)
+ if (*(addr + i) != (char)i) {
+ printf("Mismatch at %lu\n", i);
+ return 1;
+ }
+ return 0;
+}
+
+int main(void)
+{
+ void *addr;
+ int fd, ret;
+
+ fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
+ if (fd < 0) {
+ perror("Open failed");
+ exit(1);
+ }
+
+ addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ unlink(FILE_NAME);
+ exit(1);
+ }
+
+ printf("Returned address is %p\n", addr);
+ check_bytes(addr);
+ write_bytes(addr);
+ ret = read_bytes(addr);
+
+ munmap(addr, LENGTH);
+ close(fd);
+ unlink(FILE_NAME);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/vm/hugepage-shm.c b/tools/testing/selftests/vm/hugepage-shm.c
new file mode 100644
index 000000000..e2527f320
--- /dev/null
+++ b/tools/testing/selftests/vm/hugepage-shm.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hugepage-shm:
+ *
+ * Example of using huge page memory in a user application using Sys V shared
+ * memory system calls. In this example the app is requesting 256MB of
+ * memory that is backed by huge pages. The application uses the flag
+ * SHM_HUGETLB in the shmget system call to inform the kernel that it is
+ * requesting huge pages.
+ *
+ * For the ia64 architecture, the Linux kernel reserves Region number 4 for
+ * huge pages. That means that if one requires a fixed address, a huge page
+ * aligned address starting with 0x800000... will be required. If a fixed
+ * address is not required, the kernel will select an address in the proper
+ * range.
+ * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
+ *
+ * Note: The default shared memory limit is quite low on many kernels,
+ * you may need to increase it via:
+ *
+ * echo 268435456 > /proc/sys/kernel/shmmax
+ *
+ * This will increase the maximum size per shared memory segment to 256MB.
+ * The other limit that you will hit eventually is shmall which is the
+ * total amount of shared memory in pages. To set it to 16GB on a system
+ * with a 4kB pagesize do:
+ *
+ * echo 4194304 > /proc/sys/kernel/shmall
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+
+#ifndef SHM_HUGETLB
+#define SHM_HUGETLB 04000
+#endif
+
+#define LENGTH (256UL*1024*1024)
+
+#define dprintf(x) printf(x)
+
+/* Only ia64 requires this */
+#ifdef __ia64__
+#define ADDR (void *)(0x8000000000000000UL)
+#define SHMAT_FLAGS (SHM_RND)
+#else
+#define ADDR (void *)(0x0UL)
+#define SHMAT_FLAGS (0)
+#endif
+
+int main(void)
+{
+ int shmid;
+ unsigned long i;
+ char *shmaddr;
+
+ shmid = shmget(2, LENGTH, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
+ if (shmid < 0) {
+ perror("shmget");
+ exit(1);
+ }
+ printf("shmid: 0x%x\n", shmid);
+
+ shmaddr = shmat(shmid, ADDR, SHMAT_FLAGS);
+ if (shmaddr == (char *)-1) {
+ perror("Shared memory attach failure");
+ shmctl(shmid, IPC_RMID, NULL);
+ exit(2);
+ }
+ printf("shmaddr: %p\n", shmaddr);
+
+ dprintf("Starting the writes:\n");
+ for (i = 0; i < LENGTH; i++) {
+ shmaddr[i] = (char)(i);
+ if (!(i % (1024 * 1024)))
+ dprintf(".");
+ }
+ dprintf("\n");
+
+ dprintf("Starting the Check...");
+ for (i = 0; i < LENGTH; i++)
+ if (shmaddr[i] != (char)i) {
+ printf("\nIndex %lu mismatched\n", i);
+ exit(3);
+ }
+ dprintf("Done.\n");
+
+ if (shmdt((const void *)shmaddr) != 0) {
+ perror("Detach failure");
+ shmctl(shmid, IPC_RMID, NULL);
+ exit(4);
+ }
+
+ shmctl(shmid, IPC_RMID, NULL);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
new file mode 100644
index 000000000..9b777fa95
--- /dev/null
+++ b/tools/testing/selftests/vm/map_hugetlb.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Example of using hugepage memory in a user application using the mmap
+ * system call with MAP_HUGETLB flag. Before running this program make
+ * sure the administrator has allocated enough default sized huge pages
+ * to cover the 256 MB allocation.
+ *
+ * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
+ * That means the addresses starting with 0x800000... will need to be
+ * specified. Specifying a fixed address is not required on ppc64, i386
+ * or x86_64.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#define LENGTH (256UL*1024*1024)
+#define PROTECTION (PROT_READ | PROT_WRITE)
+
+#ifndef MAP_HUGETLB
+#define MAP_HUGETLB 0x40000 /* arch specific */
+#endif
+
+/* Only ia64 requires this */
+#ifdef __ia64__
+#define ADDR (void *)(0x8000000000000000UL)
+#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED)
+#else
+#define ADDR (void *)(0x0UL)
+#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
+#endif
+
+static void check_bytes(char *addr)
+{
+ printf("First hex is %x\n", *((unsigned int *)addr));
+}
+
+static void write_bytes(char *addr)
+{
+ unsigned long i;
+
+ for (i = 0; i < LENGTH; i++)
+ *(addr + i) = (char)i;
+}
+
+static int read_bytes(char *addr)
+{
+ unsigned long i;
+
+ check_bytes(addr);
+ for (i = 0; i < LENGTH; i++)
+ if (*(addr + i) != (char)i) {
+ printf("Mismatch at %lu\n", i);
+ return 1;
+ }
+ return 0;
+}
+
+int main(void)
+{
+ void *addr;
+ int ret;
+
+ addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ printf("Returned address is %p\n", addr);
+ check_bytes(addr);
+ write_bytes(addr);
+ ret = read_bytes(addr);
+
+ /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
+ if (munmap(addr, LENGTH)) {
+ perror("munmap");
+ exit(1);
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/vm/map_populate.c b/tools/testing/selftests/vm/map_populate.c
new file mode 100644
index 000000000..6b8aeaa0b
--- /dev/null
+++ b/tools/testing/selftests/vm/map_populate.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Dmitry Safonov, Arista Networks
+ *
+ * MAP_POPULATE | MAP_PRIVATE should COW VMA pages.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#ifndef MMAP_SZ
+#define MMAP_SZ 4096
+#endif
+
+#define BUG_ON(condition, description) \
+ do { \
+ if (condition) { \
+ fprintf(stderr, "[FAIL]\t%s:%d\t%s:%s\n", __func__, \
+ __LINE__, (description), strerror(errno)); \
+ exit(1); \
+ } \
+ } while (0)
+
+static int parent_f(int sock, unsigned long *smap, int child)
+{
+ int status, ret;
+
+ ret = read(sock, &status, sizeof(int));
+ BUG_ON(ret <= 0, "read(sock)");
+
+ *smap = 0x22222BAD;
+ ret = msync(smap, MMAP_SZ, MS_SYNC);
+ BUG_ON(ret, "msync()");
+
+ ret = write(sock, &status, sizeof(int));
+ BUG_ON(ret <= 0, "write(sock)");
+
+ waitpid(child, &status, 0);
+ BUG_ON(!WIFEXITED(status), "child in unexpected state");
+
+ return WEXITSTATUS(status);
+}
+
+static int child_f(int sock, unsigned long *smap, int fd)
+{
+ int ret, buf = 0;
+
+ smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_POPULATE, fd, 0);
+ BUG_ON(smap == MAP_FAILED, "mmap()");
+
+ BUG_ON(*smap != 0xdeadbabe, "MAP_PRIVATE | MAP_POPULATE changed file");
+
+ ret = write(sock, &buf, sizeof(int));
+ BUG_ON(ret <= 0, "write(sock)");
+
+ ret = read(sock, &buf, sizeof(int));
+ BUG_ON(ret <= 0, "read(sock)");
+
+ BUG_ON(*smap == 0x22222BAD, "MAP_POPULATE didn't COW private page");
+ BUG_ON(*smap != 0xdeadbabe, "mapping was corrupted");
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int sock[2], child, ret;
+ FILE *ftmp;
+ unsigned long *smap;
+
+ ftmp = tmpfile();
+ BUG_ON(ftmp == 0, "tmpfile()");
+
+ ret = ftruncate(fileno(ftmp), MMAP_SZ);
+ BUG_ON(ret, "ftruncate()");
+
+ smap = mmap(0, MMAP_SZ, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fileno(ftmp), 0);
+ BUG_ON(smap == MAP_FAILED, "mmap()");
+
+ *smap = 0xdeadbabe;
+ /* Probably unnecessary, but let it be. */
+ ret = msync(smap, MMAP_SZ, MS_SYNC);
+ BUG_ON(ret, "msync()");
+
+ ret = socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sock);
+ BUG_ON(ret, "socketpair()");
+
+ child = fork();
+ BUG_ON(child == -1, "fork()");
+
+ if (child) {
+ ret = close(sock[0]);
+ BUG_ON(ret, "close()");
+
+ return parent_f(sock[1], smap, child);
+ }
+
+ ret = close(sock[1]);
+ BUG_ON(ret, "close()");
+
+ return child_f(sock[0], smap, fileno(ftmp));
+}
diff --git a/tools/testing/selftests/vm/mlock-random-test.c b/tools/testing/selftests/vm/mlock-random-test.c
new file mode 100644
index 000000000..ff4d72eb7
--- /dev/null
+++ b/tools/testing/selftests/vm/mlock-random-test.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * It tests the mlock/mlock2() when they are invoked
+ * on randomly memory region.
+ */
+#include <unistd.h>
+#include <sys/resource.h>
+#include <sys/capability.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <time.h>
+#include "mlock2.h"
+
+#define CHUNK_UNIT (128 * 1024)
+#define MLOCK_RLIMIT_SIZE (CHUNK_UNIT * 2)
+#define MLOCK_WITHIN_LIMIT_SIZE CHUNK_UNIT
+#define MLOCK_OUTOF_LIMIT_SIZE (CHUNK_UNIT * 3)
+
+#define TEST_LOOP 100
+#define PAGE_ALIGN(size, ps) (((size) + ((ps) - 1)) & ~((ps) - 1))
+
+int set_cap_limits(rlim_t max)
+{
+ struct rlimit new;
+ cap_t cap = cap_init();
+
+ new.rlim_cur = max;
+ new.rlim_max = max;
+ if (setrlimit(RLIMIT_MEMLOCK, &new)) {
+ perror("setrlimit() returns error\n");
+ return -1;
+ }
+
+ /* drop capabilities including CAP_IPC_LOCK */
+ if (cap_set_proc(cap)) {
+ perror("cap_set_proc() returns error\n");
+ return -2;
+ }
+
+ return 0;
+}
+
+int get_proc_locked_vm_size(void)
+{
+ FILE *f;
+ int ret = -1;
+ char line[1024] = {0};
+ unsigned long lock_size = 0;
+
+ f = fopen("/proc/self/status", "r");
+ if (!f) {
+ perror("fopen");
+ return -1;
+ }
+
+ while (fgets(line, 1024, f)) {
+ if (strstr(line, "VmLck")) {
+ ret = sscanf(line, "VmLck:\t%8lu kB", &lock_size);
+ if (ret <= 0) {
+ printf("sscanf() on VmLck error: %s: %d\n",
+ line, ret);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return (int)(lock_size << 10);
+ }
+ }
+
+ perror("cann't parse VmLck in /proc/self/status\n");
+ fclose(f);
+ return -1;
+}
+
+/*
+ * Get the MMUPageSize of the memory region including input
+ * address from proc file.
+ *
+ * return value: on error case, 0 will be returned.
+ * Otherwise the page size(in bytes) is returned.
+ */
+int get_proc_page_size(unsigned long addr)
+{
+ FILE *smaps;
+ char *line;
+ unsigned long mmupage_size = 0;
+ size_t size;
+
+ smaps = seek_to_smaps_entry(addr);
+ if (!smaps) {
+ printf("Unable to parse /proc/self/smaps\n");
+ return 0;
+ }
+
+ while (getline(&line, &size, smaps) > 0) {
+ if (!strstr(line, "MMUPageSize")) {
+ free(line);
+ line = NULL;
+ size = 0;
+ continue;
+ }
+
+ /* found the MMUPageSize of this section */
+ if (sscanf(line, "MMUPageSize: %8lu kB",
+ &mmupage_size) < 1) {
+ printf("Unable to parse smaps entry for Size:%s\n",
+ line);
+ break;
+ }
+
+ }
+ free(line);
+ if (smaps)
+ fclose(smaps);
+ return mmupage_size << 10;
+}
+
+/*
+ * Test mlock/mlock2() on provided memory chunk.
+ * It expects the mlock/mlock2() to be successful (within rlimit)
+ *
+ * With allocated memory chunk [p, p + alloc_size), this
+ * test will choose start/len randomly to perform mlock/mlock2
+ * [start, start + len] memory range. The range is within range
+ * of the allocated chunk.
+ *
+ * The memory region size alloc_size is within the rlimit.
+ * So we always expect a success of mlock/mlock2.
+ *
+ * VmLck is assumed to be 0 before this test.
+ *
+ * return value: 0 - success
+ * else: failure
+ */
+int test_mlock_within_limit(char *p, int alloc_size)
+{
+ int i;
+ int ret = 0;
+ int locked_vm_size = 0;
+ struct rlimit cur;
+ int page_size = 0;
+
+ getrlimit(RLIMIT_MEMLOCK, &cur);
+ if (cur.rlim_cur < alloc_size) {
+ printf("alloc_size[%d] < %u rlimit,lead to mlock failure\n",
+ alloc_size, (unsigned int)cur.rlim_cur);
+ return -1;
+ }
+
+ srand(time(NULL));
+ for (i = 0; i < TEST_LOOP; i++) {
+ /*
+ * - choose mlock/mlock2 randomly
+ * - choose lock_size randomly but lock_size < alloc_size
+ * - choose start_offset randomly but p+start_offset+lock_size
+ * < p+alloc_size
+ */
+ int is_mlock = !!(rand() % 2);
+ int lock_size = rand() % alloc_size;
+ int start_offset = rand() % (alloc_size - lock_size);
+
+ if (is_mlock)
+ ret = mlock(p + start_offset, lock_size);
+ else
+ ret = mlock2_(p + start_offset, lock_size,
+ MLOCK_ONFAULT);
+
+ if (ret) {
+ printf("%s() failure at |%p(%d)| mlock:|%p(%d)|\n",
+ is_mlock ? "mlock" : "mlock2",
+ p, alloc_size,
+ p + start_offset, lock_size);
+ return ret;
+ }
+ }
+
+ /*
+ * Check VmLck left by the tests.
+ */
+ locked_vm_size = get_proc_locked_vm_size();
+ page_size = get_proc_page_size((unsigned long)p);
+ if (page_size == 0) {
+ printf("cannot get proc MMUPageSize\n");
+ return -1;
+ }
+
+ if (locked_vm_size > PAGE_ALIGN(alloc_size, page_size) + page_size) {
+ printf("test_mlock_within_limit() left VmLck:%d on %d chunk\n",
+ locked_vm_size, alloc_size);
+ return -1;
+ }
+
+ return 0;
+}
+
+
+/*
+ * We expect the mlock/mlock2() to be fail (outof limitation)
+ *
+ * With allocated memory chunk [p, p + alloc_size), this
+ * test will randomly choose start/len and perform mlock/mlock2
+ * on [start, start+len] range.
+ *
+ * The memory region size alloc_size is above the rlimit.
+ * And the len to be locked is higher than rlimit.
+ * So we always expect a failure of mlock/mlock2.
+ * No locked page number should be increased as a side effect.
+ *
+ * return value: 0 - success
+ * else: failure
+ */
+int test_mlock_outof_limit(char *p, int alloc_size)
+{
+ int i;
+ int ret = 0;
+ int locked_vm_size = 0, old_locked_vm_size = 0;
+ struct rlimit cur;
+
+ getrlimit(RLIMIT_MEMLOCK, &cur);
+ if (cur.rlim_cur >= alloc_size) {
+ printf("alloc_size[%d] >%u rlimit, violates test condition\n",
+ alloc_size, (unsigned int)cur.rlim_cur);
+ return -1;
+ }
+
+ old_locked_vm_size = get_proc_locked_vm_size();
+ srand(time(NULL));
+ for (i = 0; i < TEST_LOOP; i++) {
+ int is_mlock = !!(rand() % 2);
+ int lock_size = (rand() % (alloc_size - cur.rlim_cur))
+ + cur.rlim_cur;
+ int start_offset = rand() % (alloc_size - lock_size);
+
+ if (is_mlock)
+ ret = mlock(p + start_offset, lock_size);
+ else
+ ret = mlock2_(p + start_offset, lock_size,
+ MLOCK_ONFAULT);
+ if (ret == 0) {
+ printf("%s() succeeds? on %p(%d) mlock%p(%d)\n",
+ is_mlock ? "mlock" : "mlock2",
+ p, alloc_size,
+ p + start_offset, lock_size);
+ return -1;
+ }
+ }
+
+ locked_vm_size = get_proc_locked_vm_size();
+ if (locked_vm_size != old_locked_vm_size) {
+ printf("tests leads to new mlocked page: old[%d], new[%d]\n",
+ old_locked_vm_size,
+ locked_vm_size);
+ return -1;
+ }
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ char *p = NULL;
+ int ret = 0;
+
+ if (set_cap_limits(MLOCK_RLIMIT_SIZE))
+ return -1;
+
+ p = malloc(MLOCK_WITHIN_LIMIT_SIZE);
+ if (p == NULL) {
+ perror("malloc() failure\n");
+ return -1;
+ }
+ ret = test_mlock_within_limit(p, MLOCK_WITHIN_LIMIT_SIZE);
+ if (ret)
+ return ret;
+ munlock(p, MLOCK_WITHIN_LIMIT_SIZE);
+ free(p);
+
+
+ p = malloc(MLOCK_OUTOF_LIMIT_SIZE);
+ if (p == NULL) {
+ perror("malloc() failure\n");
+ return -1;
+ }
+ ret = test_mlock_outof_limit(p, MLOCK_OUTOF_LIMIT_SIZE);
+ if (ret)
+ return ret;
+ munlock(p, MLOCK_OUTOF_LIMIT_SIZE);
+ free(p);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c
new file mode 100644
index 000000000..11b2301f3
--- /dev/null
+++ b/tools/testing/selftests/vm/mlock2-tests.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/mman.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <stdbool.h>
+#include "mlock2.h"
+
+#include "../kselftest.h"
+
+struct vm_boundaries {
+ unsigned long start;
+ unsigned long end;
+};
+
+static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
+{
+ FILE *file;
+ int ret = 1;
+ char line[1024] = {0};
+ char *end_addr;
+ char *stop;
+ unsigned long start;
+ unsigned long end;
+
+ if (!area)
+ return ret;
+
+ file = fopen("/proc/self/maps", "r");
+ if (!file) {
+ perror("fopen");
+ return ret;
+ }
+
+ memset(area, 0, sizeof(struct vm_boundaries));
+
+ while(fgets(line, 1024, file)) {
+ end_addr = strchr(line, '-');
+ if (!end_addr) {
+ printf("cannot parse /proc/self/maps\n");
+ goto out;
+ }
+ *end_addr = '\0';
+ end_addr++;
+ stop = strchr(end_addr, ' ');
+ if (!stop) {
+ printf("cannot parse /proc/self/maps\n");
+ goto out;
+ }
+ stop = '\0';
+
+ sscanf(line, "%lx", &start);
+ sscanf(end_addr, "%lx", &end);
+
+ if (start <= addr && end > addr) {
+ area->start = start;
+ area->end = end;
+ ret = 0;
+ goto out;
+ }
+ }
+out:
+ fclose(file);
+ return ret;
+}
+
+#define VMFLAGS "VmFlags:"
+
+static bool is_vmflag_set(unsigned long addr, const char *vmflag)
+{
+ char *line = NULL;
+ char *flags;
+ size_t size = 0;
+ bool ret = false;
+ FILE *smaps;
+
+ smaps = seek_to_smaps_entry(addr);
+ if (!smaps) {
+ printf("Unable to parse /proc/self/smaps\n");
+ goto out;
+ }
+
+ while (getline(&line, &size, smaps) > 0) {
+ if (!strstr(line, VMFLAGS)) {
+ free(line);
+ line = NULL;
+ size = 0;
+ continue;
+ }
+
+ flags = line + strlen(VMFLAGS);
+ ret = (strstr(flags, vmflag) != NULL);
+ goto out;
+ }
+
+out:
+ free(line);
+ fclose(smaps);
+ return ret;
+}
+
+#define SIZE "Size:"
+#define RSS "Rss:"
+#define LOCKED "lo"
+
+static unsigned long get_value_for_name(unsigned long addr, const char *name)
+{
+ char *line = NULL;
+ size_t size = 0;
+ char *value_ptr;
+ FILE *smaps = NULL;
+ unsigned long value = -1UL;
+
+ smaps = seek_to_smaps_entry(addr);
+ if (!smaps) {
+ printf("Unable to parse /proc/self/smaps\n");
+ goto out;
+ }
+
+ while (getline(&line, &size, smaps) > 0) {
+ if (!strstr(line, name)) {
+ free(line);
+ line = NULL;
+ size = 0;
+ continue;
+ }
+
+ value_ptr = line + strlen(name);
+ if (sscanf(value_ptr, "%lu kB", &value) < 1) {
+ printf("Unable to parse smaps entry for Size\n");
+ goto out;
+ }
+ break;
+ }
+
+out:
+ if (smaps)
+ fclose(smaps);
+ free(line);
+ return value;
+}
+
+static bool is_vma_lock_on_fault(unsigned long addr)
+{
+ bool locked;
+ unsigned long vma_size, vma_rss;
+
+ locked = is_vmflag_set(addr, LOCKED);
+ if (!locked)
+ return false;
+
+ vma_size = get_value_for_name(addr, SIZE);
+ vma_rss = get_value_for_name(addr, RSS);
+
+ /* only one page is faulted in */
+ return (vma_rss < vma_size);
+}
+
+#define PRESENT_BIT 0x8000000000000000ULL
+#define PFN_MASK 0x007FFFFFFFFFFFFFULL
+#define UNEVICTABLE_BIT (1UL << 18)
+
+static int lock_check(unsigned long addr)
+{
+ bool locked;
+ unsigned long vma_size, vma_rss;
+
+ locked = is_vmflag_set(addr, LOCKED);
+ if (!locked)
+ return false;
+
+ vma_size = get_value_for_name(addr, SIZE);
+ vma_rss = get_value_for_name(addr, RSS);
+
+ return (vma_rss == vma_size);
+}
+
+static int unlock_lock_check(char *map)
+{
+ if (is_vmflag_set((unsigned long)map, LOCKED)) {
+ printf("VMA flag %s is present on page 1 after unlock\n", LOCKED);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int test_mlock_lock()
+{
+ char *map;
+ int ret = 1;
+ unsigned long page_size = getpagesize();
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (map == MAP_FAILED) {
+ perror("test_mlock_locked mmap");
+ goto out;
+ }
+
+ if (mlock2_(map, 2 * page_size, 0)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(KSFT_SKIP);
+ }
+ perror("mlock2(0)");
+ goto unmap;
+ }
+
+ if (!lock_check((unsigned long)map))
+ goto unmap;
+
+ /* Now unlock and recheck attributes */
+ if (munlock(map, 2 * page_size)) {
+ perror("munlock()");
+ goto unmap;
+ }
+
+ ret = unlock_lock_check(map);
+
+unmap:
+ munmap(map, 2 * page_size);
+out:
+ return ret;
+}
+
+static int onfault_check(char *map)
+{
+ *map = 'a';
+ if (!is_vma_lock_on_fault((unsigned long)map)) {
+ printf("VMA is not marked for lock on fault\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int unlock_onfault_check(char *map)
+{
+ unsigned long page_size = getpagesize();
+
+ if (is_vma_lock_on_fault((unsigned long)map) ||
+ is_vma_lock_on_fault((unsigned long)map + page_size)) {
+ printf("VMA is still lock on fault after unlock\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int test_mlock_onfault()
+{
+ char *map;
+ int ret = 1;
+ unsigned long page_size = getpagesize();
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (map == MAP_FAILED) {
+ perror("test_mlock_locked mmap");
+ goto out;
+ }
+
+ if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(KSFT_SKIP);
+ }
+ perror("mlock2(MLOCK_ONFAULT)");
+ goto unmap;
+ }
+
+ if (onfault_check(map))
+ goto unmap;
+
+ /* Now unlock and recheck attributes */
+ if (munlock(map, 2 * page_size)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(KSFT_SKIP);
+ }
+ perror("munlock()");
+ goto unmap;
+ }
+
+ ret = unlock_onfault_check(map);
+unmap:
+ munmap(map, 2 * page_size);
+out:
+ return ret;
+}
+
+static int test_lock_onfault_of_present()
+{
+ char *map;
+ int ret = 1;
+ unsigned long page_size = getpagesize();
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (map == MAP_FAILED) {
+ perror("test_mlock_locked mmap");
+ goto out;
+ }
+
+ *map = 'a';
+
+ if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(KSFT_SKIP);
+ }
+ perror("mlock2(MLOCK_ONFAULT)");
+ goto unmap;
+ }
+
+ if (!is_vma_lock_on_fault((unsigned long)map) ||
+ !is_vma_lock_on_fault((unsigned long)map + page_size)) {
+ printf("VMA with present pages is not marked lock on fault\n");
+ goto unmap;
+ }
+ ret = 0;
+unmap:
+ munmap(map, 2 * page_size);
+out:
+ return ret;
+}
+
+static int test_munlockall()
+{
+ char *map;
+ int ret = 1;
+ unsigned long page_size = getpagesize();
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+
+ if (map == MAP_FAILED) {
+ perror("test_munlockall mmap");
+ goto out;
+ }
+
+ if (mlockall(MCL_CURRENT)) {
+ perror("mlockall(MCL_CURRENT)");
+ goto out;
+ }
+
+ if (!lock_check((unsigned long)map))
+ goto unmap;
+
+ if (munlockall()) {
+ perror("munlockall()");
+ goto unmap;
+ }
+
+ if (unlock_lock_check(map))
+ goto unmap;
+
+ munmap(map, 2 * page_size);
+
+ map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+
+ if (map == MAP_FAILED) {
+ perror("test_munlockall second mmap");
+ goto out;
+ }
+
+ if (mlockall(MCL_CURRENT | MCL_ONFAULT)) {
+ perror("mlockall(MCL_CURRENT | MCL_ONFAULT)");
+ goto unmap;
+ }
+
+ if (onfault_check(map))
+ goto unmap;
+
+ if (munlockall()) {
+ perror("munlockall()");
+ goto unmap;
+ }
+
+ if (unlock_onfault_check(map))
+ goto unmap;
+
+ if (mlockall(MCL_CURRENT | MCL_FUTURE)) {
+ perror("mlockall(MCL_CURRENT | MCL_FUTURE)");
+ goto out;
+ }
+
+ if (!lock_check((unsigned long)map))
+ goto unmap;
+
+ if (munlockall()) {
+ perror("munlockall()");
+ goto unmap;
+ }
+
+ ret = unlock_lock_check(map);
+
+unmap:
+ munmap(map, 2 * page_size);
+out:
+ munlockall();
+ return ret;
+}
+
+static int test_vma_management(bool call_mlock)
+{
+ int ret = 1;
+ void *map;
+ unsigned long page_size = getpagesize();
+ struct vm_boundaries page1;
+ struct vm_boundaries page2;
+ struct vm_boundaries page3;
+
+ map = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (map == MAP_FAILED) {
+ perror("mmap()");
+ return ret;
+ }
+
+ if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) {
+ if (errno == ENOSYS) {
+ printf("Cannot call new mlock family, skipping test\n");
+ _exit(KSFT_SKIP);
+ }
+ perror("mlock(ONFAULT)\n");
+ goto out;
+ }
+
+ if (get_vm_area((unsigned long)map, &page1) ||
+ get_vm_area((unsigned long)map + page_size, &page2) ||
+ get_vm_area((unsigned long)map + page_size * 2, &page3)) {
+ printf("couldn't find mapping in /proc/self/maps\n");
+ goto out;
+ }
+
+ /*
+ * Before we unlock a portion, we need to that all three pages are in
+ * the same VMA. If they are not we abort this test (Note that this is
+ * not a failure)
+ */
+ if (page1.start != page2.start || page2.start != page3.start) {
+ printf("VMAs are not merged to start, aborting test\n");
+ ret = 0;
+ goto out;
+ }
+
+ if (munlock(map + page_size, page_size)) {
+ perror("munlock()");
+ goto out;
+ }
+
+ if (get_vm_area((unsigned long)map, &page1) ||
+ get_vm_area((unsigned long)map + page_size, &page2) ||
+ get_vm_area((unsigned long)map + page_size * 2, &page3)) {
+ printf("couldn't find mapping in /proc/self/maps\n");
+ goto out;
+ }
+
+ /* All three VMAs should be different */
+ if (page1.start == page2.start || page2.start == page3.start) {
+ printf("failed to split VMA for munlock\n");
+ goto out;
+ }
+
+ /* Now unlock the first and third page and check the VMAs again */
+ if (munlock(map, page_size * 3)) {
+ perror("munlock()");
+ goto out;
+ }
+
+ if (get_vm_area((unsigned long)map, &page1) ||
+ get_vm_area((unsigned long)map + page_size, &page2) ||
+ get_vm_area((unsigned long)map + page_size * 2, &page3)) {
+ printf("couldn't find mapping in /proc/self/maps\n");
+ goto out;
+ }
+
+ /* Now all three VMAs should be the same */
+ if (page1.start != page2.start || page2.start != page3.start) {
+ printf("failed to merge VMAs after munlock\n");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ munmap(map, 3 * page_size);
+ return ret;
+}
+
+static int test_mlockall(int (test_function)(bool call_mlock))
+{
+ int ret = 1;
+
+ if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE)) {
+ perror("mlockall");
+ return ret;
+ }
+
+ ret = test_function(false);
+ munlockall();
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ int ret = 0;
+ ret += test_mlock_lock();
+ ret += test_mlock_onfault();
+ ret += test_munlockall();
+ ret += test_lock_onfault_of_present();
+ ret += test_vma_management(true);
+ ret += test_mlockall(test_vma_management);
+ return ret;
+}
diff --git a/tools/testing/selftests/vm/mlock2.h b/tools/testing/selftests/vm/mlock2.h
new file mode 100644
index 000000000..2a6e76c22
--- /dev/null
+++ b/tools/testing/selftests/vm/mlock2.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef MLOCK_ONFAULT
+#define MLOCK_ONFAULT 1
+#endif
+
+#ifndef MCL_ONFAULT
+#define MCL_ONFAULT (MCL_FUTURE << 1)
+#endif
+
+static int mlock2_(void *start, size_t len, int flags)
+{
+#ifdef __NR_mlock2
+ return syscall(__NR_mlock2, start, len, flags);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+static FILE *seek_to_smaps_entry(unsigned long addr)
+{
+ FILE *file;
+ char *line = NULL;
+ size_t size = 0;
+ unsigned long start, end;
+ char perms[5];
+ unsigned long offset;
+ char dev[32];
+ unsigned long inode;
+ char path[BUFSIZ];
+
+ file = fopen("/proc/self/smaps", "r");
+ if (!file) {
+ perror("fopen smaps");
+ _exit(1);
+ }
+
+ while (getline(&line, &size, file) > 0) {
+ if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n",
+ &start, &end, perms, &offset, dev, &inode, path) < 6)
+ goto next;
+
+ if (start <= addr && addr < end)
+ goto out;
+
+next:
+ free(line);
+ line = NULL;
+ size = 0;
+ }
+
+ fclose(file);
+ file = NULL;
+
+out:
+ free(line);
+ return file;
+}
diff --git a/tools/testing/selftests/vm/on-fault-limit.c b/tools/testing/selftests/vm/on-fault-limit.c
new file mode 100644
index 000000000..634d87dfb
--- /dev/null
+++ b/tools/testing/selftests/vm/on-fault-limit.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/mman.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifndef MCL_ONFAULT
+#define MCL_ONFAULT (MCL_FUTURE << 1)
+#endif
+
+static int test_limit(void)
+{
+ int ret = 1;
+ struct rlimit lims;
+ void *map;
+
+ if (getrlimit(RLIMIT_MEMLOCK, &lims)) {
+ perror("getrlimit");
+ return ret;
+ }
+
+ if (mlockall(MCL_ONFAULT | MCL_FUTURE)) {
+ perror("mlockall");
+ return ret;
+ }
+
+ map = mmap(NULL, 2 * lims.rlim_max, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0);
+ if (map != MAP_FAILED)
+ printf("mmap should have failed, but didn't\n");
+ else {
+ ret = 0;
+ munmap(map, 2 * lims.rlim_max);
+ }
+
+ munlockall();
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ int ret = 0;
+
+ ret += test_limit();
+ return ret;
+}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
new file mode 100755
index 000000000..584a91ae4
--- /dev/null
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -0,0 +1,214 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#please run as root
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+mnt=./huge
+exitcode=0
+
+#get huge pagesize and freepages from /proc/meminfo
+while read name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs=$size
+ fi
+ if [ "$name" = "Hugepagesize:" ]; then
+ hpgsize_KB=$size
+ fi
+done < /proc/meminfo
+
+# Simple hugetlbfs tests have a hardcoded minimum requirement of
+# huge pages totaling 256MB (262144KB) in size. The userfaultfd
+# hugetlb test requires a minimum of 2 * nr_cpus huge pages. Take
+# both of these requirements into account and attempt to increase
+# number of huge pages available.
+nr_cpus=$(nproc)
+hpgsize_MB=$((hpgsize_KB / 1024))
+half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128))
+needmem_KB=$((half_ufd_size_MB * 2 * 1024))
+
+#set proper nr_hugepages
+if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
+ nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
+ needpgs=$((needmem_KB / hpgsize_KB))
+ tries=2
+ while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do
+ lackpgs=$(( $needpgs - $freepgs ))
+ echo 3 > /proc/sys/vm/drop_caches
+ echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
+ if [ $? -ne 0 ]; then
+ echo "Please run this test as root"
+ exit $ksft_skip
+ fi
+ while read name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs=$size
+ fi
+ done < /proc/meminfo
+ tries=$((tries - 1))
+ done
+ if [ $freepgs -lt $needpgs ]; then
+ printf "Not enough huge pages available (%d < %d)\n" \
+ $freepgs $needpgs
+ exit 1
+ fi
+else
+ echo "no hugetlbfs support in kernel?"
+ exit 1
+fi
+
+mkdir $mnt
+mount -t hugetlbfs none $mnt
+
+echo "---------------------"
+echo "running hugepage-mmap"
+echo "---------------------"
+./hugepage-mmap
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+shmmax=`cat /proc/sys/kernel/shmmax`
+shmall=`cat /proc/sys/kernel/shmall`
+echo 268435456 > /proc/sys/kernel/shmmax
+echo 4194304 > /proc/sys/kernel/shmall
+echo "--------------------"
+echo "running hugepage-shm"
+echo "--------------------"
+./hugepage-shm
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+echo $shmmax > /proc/sys/kernel/shmmax
+echo $shmall > /proc/sys/kernel/shmall
+
+echo "-------------------"
+echo "running map_hugetlb"
+echo "-------------------"
+./map_hugetlb
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "NOTE: The above hugetlb tests provide minimal coverage. Use"
+echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
+echo " hugetlb regression testing."
+
+echo "-------------------"
+echo "running userfaultfd"
+echo "-------------------"
+./userfaultfd anon 128 32
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "---------------------------"
+echo "running userfaultfd_hugetlb"
+echo "---------------------------"
+# Test requires source and destination huge pages. Size of source
+# (half_ufd_size_MB) is passed as argument to test.
+./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+rm -f $mnt/ufd_test_file
+
+echo "-------------------------"
+echo "running userfaultfd_shmem"
+echo "-------------------------"
+./userfaultfd shmem 128 32
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+#cleanup
+umount $mnt
+rm -rf $mnt
+echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+
+echo "-----------------------"
+echo "running compaction_test"
+echo "-----------------------"
+./compaction_test
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "----------------------"
+echo "running on-fault-limit"
+echo "----------------------"
+sudo -u nobody ./on-fault-limit
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "--------------------"
+echo "running map_populate"
+echo "--------------------"
+./map_populate
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "--------------------"
+echo "running mlock2-tests"
+echo "--------------------"
+./mlock2-tests
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "-----------------------------"
+echo "running virtual_address_range"
+echo "-----------------------------"
+./virtual_address_range
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "-----------------------------"
+echo "running virtual address 128TB switch test"
+echo "-----------------------------"
+./va_128TBswitch
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+exit $exitcode
diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/vm/thuge-gen.c
new file mode 100644
index 000000000..361ef7192
--- /dev/null
+++ b/tools/testing/selftests/vm/thuge-gen.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test selecting other page sizes for mmap/shmget.
+
+ Before running this huge pages for each huge page size must have been
+ reserved.
+ For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used.
+ Also shmmax must be increased.
+ And you need to run as root to work around some weird permissions in shm.
+ And nothing using huge pages should run in parallel.
+ When the program aborts you may need to clean up the shm segments with
+ ipcrm -m by hand, like this
+ sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m
+ (warning this will remove all if someone else uses them) */
+
+#define _GNU_SOURCE 1
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <glob.h>
+#include <assert.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <string.h>
+
+#define err(x) perror(x), exit(1)
+
+#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
+#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
+#define MAP_HUGE_SHIFT 26
+#define MAP_HUGE_MASK 0x3f
+#if !defined(MAP_HUGETLB)
+#define MAP_HUGETLB 0x40000
+#endif
+
+#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */
+#define SHM_HUGE_SHIFT 26
+#define SHM_HUGE_MASK 0x3f
+#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT)
+#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
+
+#define NUM_PAGESIZES 5
+
+#define NUM_PAGES 4
+
+#define Dprintf(fmt...) // printf(fmt)
+
+unsigned long page_sizes[NUM_PAGESIZES];
+int num_page_sizes;
+
+int ilog2(unsigned long v)
+{
+ int l = 0;
+ while ((1UL << l) < v)
+ l++;
+ return l;
+}
+
+void find_pagesizes(void)
+{
+ glob_t g;
+ int i;
+ glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g);
+ assert(g.gl_pathc <= NUM_PAGESIZES);
+ for (i = 0; i < g.gl_pathc; i++) {
+ sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB",
+ &page_sizes[i]);
+ page_sizes[i] <<= 10;
+ printf("Found %luMB\n", page_sizes[i] >> 20);
+ }
+ num_page_sizes = g.gl_pathc;
+ globfree(&g);
+}
+
+unsigned long default_huge_page_size(void)
+{
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+ free(line);
+ return hps;
+}
+
+void show(unsigned long ps)
+{
+ char buf[100];
+ if (ps == getpagesize())
+ return;
+ printf("%luMB: ", ps >> 20);
+ fflush(stdout);
+ snprintf(buf, sizeof buf,
+ "cat /sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
+ ps >> 10);
+ system(buf);
+}
+
+unsigned long read_sysfs(int warn, char *fmt, ...)
+{
+ char *line = NULL;
+ size_t linelen = 0;
+ char buf[100];
+ FILE *f;
+ va_list ap;
+ unsigned long val = 0;
+
+ va_start(ap, fmt);
+ vsnprintf(buf, sizeof buf, fmt, ap);
+ va_end(ap);
+
+ f = fopen(buf, "r");
+ if (!f) {
+ if (warn)
+ printf("missing %s\n", buf);
+ return 0;
+ }
+ if (getline(&line, &linelen, f) > 0) {
+ sscanf(line, "%lu", &val);
+ }
+ fclose(f);
+ free(line);
+ return val;
+}
+
+unsigned long read_free(unsigned long ps)
+{
+ return read_sysfs(ps != getpagesize(),
+ "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
+ ps >> 10);
+}
+
+void test_mmap(unsigned long size, unsigned flags)
+{
+ char *map;
+ unsigned long before, after;
+ int err;
+
+ before = read_free(size);
+ map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, -1, 0);
+
+ if (map == (char *)-1) err("mmap");
+ memset(map, 0xff, size*NUM_PAGES);
+ after = read_free(size);
+ Dprintf("before %lu after %lu diff %ld size %lu\n",
+ before, after, before - after, size);
+ assert(size == getpagesize() || (before - after) == NUM_PAGES);
+ show(size);
+ err = munmap(map, size);
+ assert(!err);
+}
+
+void test_shmget(unsigned long size, unsigned flags)
+{
+ int id;
+ unsigned long before, after;
+ int err;
+
+ before = read_free(size);
+ id = shmget(IPC_PRIVATE, size * NUM_PAGES, IPC_CREAT|0600|flags);
+ if (id < 0) err("shmget");
+
+ struct shm_info i;
+ if (shmctl(id, SHM_INFO, (void *)&i) < 0) err("shmctl");
+ Dprintf("alloc %lu res %lu\n", i.shm_tot, i.shm_rss);
+
+
+ Dprintf("id %d\n", id);
+ char *map = shmat(id, NULL, 0600);
+ if (map == (char*)-1) err("shmat");
+
+ shmctl(id, IPC_RMID, NULL);
+
+ memset(map, 0xff, size*NUM_PAGES);
+ after = read_free(size);
+
+ Dprintf("before %lu after %lu diff %ld size %lu\n",
+ before, after, before - after, size);
+ assert(size == getpagesize() || (before - after) == NUM_PAGES);
+ show(size);
+ err = shmdt(map);
+ assert(!err);
+}
+
+void sanity_checks(void)
+{
+ int i;
+ unsigned long largest = getpagesize();
+
+ for (i = 0; i < num_page_sizes; i++) {
+ if (page_sizes[i] > largest)
+ largest = page_sizes[i];
+
+ if (read_free(page_sizes[i]) < NUM_PAGES) {
+ printf("Not enough huge pages for page size %lu MB, need %u\n",
+ page_sizes[i] >> 20,
+ NUM_PAGES);
+ exit(0);
+ }
+ }
+
+ if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) {
+ printf("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES);
+ exit(0);
+ }
+
+#if defined(__x86_64__)
+ if (largest != 1U<<30) {
+ printf("No GB pages available on x86-64\n"
+ "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES);
+ exit(0);
+ }
+#endif
+}
+
+int main(void)
+{
+ int i;
+ unsigned default_hps = default_huge_page_size();
+
+ find_pagesizes();
+
+ sanity_checks();
+
+ for (i = 0; i < num_page_sizes; i++) {
+ unsigned long ps = page_sizes[i];
+ int arg = ilog2(ps) << MAP_HUGE_SHIFT;
+ printf("Testing %luMB mmap with shift %x\n", ps >> 20, arg);
+ test_mmap(ps, MAP_HUGETLB | arg);
+ }
+ printf("Testing default huge mmap\n");
+ test_mmap(default_hps, SHM_HUGETLB);
+
+ puts("Testing non-huge shmget");
+ test_shmget(getpagesize(), 0);
+
+ for (i = 0; i < num_page_sizes; i++) {
+ unsigned long ps = page_sizes[i];
+ int arg = ilog2(ps) << SHM_HUGE_SHIFT;
+ printf("Testing %luMB shmget with shift %x\n", ps >> 20, arg);
+ test_shmget(ps, SHM_HUGETLB | arg);
+ }
+ puts("default huge shmget");
+ test_shmget(default_hps, SHM_HUGETLB);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/transhuge-stress.c b/tools/testing/selftests/vm/transhuge-stress.c
new file mode 100644
index 000000000..fd7f1b4a9
--- /dev/null
+++ b/tools/testing/selftests/vm/transhuge-stress.c
@@ -0,0 +1,144 @@
+/*
+ * Stress test for transparent huge pages, memory compaction and migration.
+ *
+ * Authors: Konstantin Khlebnikov <koct9i@gmail.com>
+ *
+ * This is free and unencumbered software released into the public domain.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <err.h>
+#include <time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+
+#define PAGE_SHIFT 12
+#define HPAGE_SHIFT 21
+
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+#define HPAGE_SIZE (1 << HPAGE_SHIFT)
+
+#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0)
+#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1))
+
+int pagemap_fd;
+
+int64_t allocate_transhuge(void *ptr)
+{
+ uint64_t ent[2];
+
+ /* drop pmd */
+ if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANONYMOUS |
+ MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
+ errx(2, "mmap transhuge");
+
+ if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
+ err(2, "MADV_HUGEPAGE");
+
+ /* allocate transparent huge page */
+ *(volatile void **)ptr = ptr;
+
+ if (pread(pagemap_fd, ent, sizeof(ent),
+ (uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
+ err(2, "read pagemap");
+
+ if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
+ PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
+ !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1)))
+ return PAGEMAP_PFN(ent[0]);
+
+ return -1;
+}
+
+int main(int argc, char **argv)
+{
+ size_t ram, len;
+ void *ptr, *p;
+ struct timespec a, b;
+ double s;
+ uint8_t *map;
+ size_t map_len;
+
+ ram = sysconf(_SC_PHYS_PAGES);
+ if (ram > SIZE_MAX / sysconf(_SC_PAGESIZE) / 4)
+ ram = SIZE_MAX / 4;
+ else
+ ram *= sysconf(_SC_PAGESIZE);
+
+ if (argc == 1)
+ len = ram;
+ else if (!strcmp(argv[1], "-h"))
+ errx(1, "usage: %s [size in MiB]", argv[0]);
+ else
+ len = atoll(argv[1]) << 20;
+
+ warnx("allocate %zd transhuge pages, using %zd MiB virtual memory"
+ " and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20,
+ len >> (20 + HPAGE_SHIFT - PAGE_SHIFT - 1));
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ err(2, "open pagemap");
+
+ len -= len % HPAGE_SIZE;
+ ptr = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
+ if (ptr == MAP_FAILED)
+ err(2, "initial mmap");
+ ptr += HPAGE_SIZE - (uintptr_t)ptr % HPAGE_SIZE;
+
+ if (madvise(ptr, len, MADV_HUGEPAGE))
+ err(2, "MADV_HUGEPAGE");
+
+ map_len = ram >> (HPAGE_SHIFT - 1);
+ map = malloc(map_len);
+ if (!map)
+ errx(2, "map malloc");
+
+ while (1) {
+ int nr_succeed = 0, nr_failed = 0, nr_pages = 0;
+
+ memset(map, 0, map_len);
+
+ clock_gettime(CLOCK_MONOTONIC, &a);
+ for (p = ptr; p < ptr + len; p += HPAGE_SIZE) {
+ int64_t pfn;
+
+ pfn = allocate_transhuge(p);
+
+ if (pfn < 0) {
+ nr_failed++;
+ } else {
+ size_t idx = pfn >> (HPAGE_SHIFT - PAGE_SHIFT);
+
+ nr_succeed++;
+ if (idx >= map_len) {
+ map = realloc(map, idx + 1);
+ if (!map)
+ errx(2, "map realloc");
+ memset(map + map_len, 0, idx + 1 - map_len);
+ map_len = idx + 1;
+ }
+ if (!map[idx])
+ nr_pages++;
+ map[idx] = 1;
+ }
+
+ /* split transhuge page, keep last page */
+ if (madvise(p, HPAGE_SIZE - PAGE_SIZE, MADV_DONTNEED))
+ err(2, "MADV_DONTNEED");
+ }
+ clock_gettime(CLOCK_MONOTONIC, &b);
+ s = b.tv_sec - a.tv_sec + (b.tv_nsec - a.tv_nsec) / 1000000000.;
+
+ warnx("%.3f s/loop, %.3f ms/page, %10.3f MiB/s\t"
+ "%4d succeed, %4d failed, %4d different pages",
+ s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20),
+ nr_succeed, nr_failed, nr_pages);
+ }
+}
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
new file mode 100644
index 000000000..b2c7043c0
--- /dev/null
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -0,0 +1,1333 @@
+/*
+ * Stress userfaultfd syscall.
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * This test allocates two virtual areas and bounces the physical
+ * memory across the two virtual areas (from area_src to area_dst)
+ * using userfaultfd.
+ *
+ * There are three threads running per CPU:
+ *
+ * 1) one per-CPU thread takes a per-page pthread_mutex in a random
+ * page of the area_dst (while the physical page may still be in
+ * area_src), and increments a per-page counter in the same page,
+ * and checks its value against a verification region.
+ *
+ * 2) another per-CPU thread handles the userfaults generated by
+ * thread 1 above. userfaultfd blocking reads or poll() modes are
+ * exercised interleaved.
+ *
+ * 3) one last per-CPU thread transfers the memory in the background
+ * at maximum bandwidth (if not already transferred by thread
+ * 2). Each cpu thread takes cares of transferring a portion of the
+ * area.
+ *
+ * When all threads of type 3 completed the transfer, one bounce is
+ * complete. area_src and area_dst are then swapped. All threads are
+ * respawned and so the bounce is immediately restarted in the
+ * opposite direction.
+ *
+ * per-CPU threads 1 by triggering userfaults inside
+ * pthread_mutex_lock will also verify the atomicity of the memory
+ * transfer (UFFDIO_COPY).
+ *
+ * The program takes two parameters: the amounts of physical memory in
+ * megabytes (MiB) of the area and the number of bounces to execute.
+ *
+ * # 100MiB 99999 bounces
+ * ./userfaultfd 100 99999
+ *
+ * # 1GiB 99 bounces
+ * ./userfaultfd 1000 99
+ *
+ * # 10MiB-~6GiB 999 bounces, continue forever unless an error triggers
+ * while ./userfaultfd $[RANDOM % 6000 + 10] 999; do true; done
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <signal.h>
+#include <poll.h>
+#include <string.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <pthread.h>
+#include <linux/userfaultfd.h>
+#include <setjmp.h>
+#include <stdbool.h>
+
+#include "../kselftest.h"
+
+#ifdef __NR_userfaultfd
+
+static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
+
+#define BOUNCE_RANDOM (1<<0)
+#define BOUNCE_RACINGFAULTS (1<<1)
+#define BOUNCE_VERIFY (1<<2)
+#define BOUNCE_POLL (1<<3)
+static int bounces;
+
+#define TEST_ANON 1
+#define TEST_HUGETLB 2
+#define TEST_SHMEM 3
+static int test_type;
+
+/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
+#define ALARM_INTERVAL_SECS 10
+static volatile bool test_uffdio_copy_eexist = true;
+static volatile bool test_uffdio_zeropage_eexist = true;
+
+static bool map_shared;
+static int huge_fd;
+static char *huge_fd_off0;
+static unsigned long long *count_verify;
+static int uffd, uffd_flags, finished, *pipefd;
+static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
+static char *zeropage;
+pthread_attr_t attr;
+
+/* pthread_mutex_t starts at page offset 0 */
+#define area_mutex(___area, ___nr) \
+ ((pthread_mutex_t *) ((___area) + (___nr)*page_size))
+/*
+ * count is placed in the page after pthread_mutex_t naturally aligned
+ * to avoid non alignment faults on non-x86 archs.
+ */
+#define area_count(___area, ___nr) \
+ ((volatile unsigned long long *) ((unsigned long) \
+ ((___area) + (___nr)*page_size + \
+ sizeof(pthread_mutex_t) + \
+ sizeof(unsigned long long) - 1) & \
+ ~(unsigned long)(sizeof(unsigned long long) \
+ - 1)))
+
+static int anon_release_pages(char *rel_area)
+{
+ int ret = 0;
+
+ if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ ret = 1;
+ }
+
+ return ret;
+}
+
+static void anon_allocate_area(void **alloc_area)
+{
+ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (*alloc_area == MAP_FAILED) {
+ fprintf(stderr, "mmap of anonymous memory failed");
+ *alloc_area = NULL;
+ }
+}
+
+static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+{
+}
+
+/* HugeTLB memory */
+static int hugetlb_release_pages(char *rel_area)
+{
+ int ret = 0;
+
+ if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ rel_area == huge_fd_off0 ? 0 :
+ nr_pages * page_size,
+ nr_pages * page_size)) {
+ perror("fallocate");
+ ret = 1;
+ }
+
+ return ret;
+}
+
+
+static void hugetlb_allocate_area(void **alloc_area)
+{
+ void *area_alias = NULL;
+ char **alloc_area_alias;
+ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ (map_shared ? MAP_SHARED : MAP_PRIVATE) |
+ MAP_HUGETLB,
+ huge_fd, *alloc_area == area_src ? 0 :
+ nr_pages * page_size);
+ if (*alloc_area == MAP_FAILED) {
+ fprintf(stderr, "mmap of hugetlbfs file failed\n");
+ *alloc_area = NULL;
+ }
+
+ if (map_shared) {
+ area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_HUGETLB,
+ huge_fd, *alloc_area == area_src ? 0 :
+ nr_pages * page_size);
+ if (area_alias == MAP_FAILED) {
+ if (munmap(*alloc_area, nr_pages * page_size) < 0)
+ perror("hugetlb munmap"), exit(1);
+ *alloc_area = NULL;
+ return;
+ }
+ }
+ if (*alloc_area == area_src) {
+ huge_fd_off0 = *alloc_area;
+ alloc_area_alias = &area_src_alias;
+ } else {
+ alloc_area_alias = &area_dst_alias;
+ }
+ if (area_alias)
+ *alloc_area_alias = area_alias;
+}
+
+static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+{
+ if (!map_shared)
+ return;
+ /*
+ * We can't zap just the pagetable with hugetlbfs because
+ * MADV_DONTEED won't work. So exercise -EEXIST on a alias
+ * mapping where the pagetables are not established initially,
+ * this way we'll exercise the -EEXEC at the fs level.
+ */
+ *start = (unsigned long) area_dst_alias + offset;
+}
+
+/* Shared memory */
+static int shmem_release_pages(char *rel_area)
+{
+ int ret = 0;
+
+ if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) {
+ perror("madvise");
+ ret = 1;
+ }
+
+ return ret;
+}
+
+static void shmem_allocate_area(void **alloc_area)
+{
+ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ if (*alloc_area == MAP_FAILED) {
+ fprintf(stderr, "shared memory mmap failed\n");
+ *alloc_area = NULL;
+ }
+}
+
+struct uffd_test_ops {
+ unsigned long expected_ioctls;
+ void (*allocate_area)(void **alloc_area);
+ int (*release_pages)(char *rel_area);
+ void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
+};
+
+#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
+ (1 << _UFFDIO_COPY) | \
+ (1 << _UFFDIO_ZEROPAGE))
+
+static struct uffd_test_ops anon_uffd_test_ops = {
+ .expected_ioctls = ANON_EXPECTED_IOCTLS,
+ .allocate_area = anon_allocate_area,
+ .release_pages = anon_release_pages,
+ .alias_mapping = noop_alias_mapping,
+};
+
+static struct uffd_test_ops shmem_uffd_test_ops = {
+ .expected_ioctls = ANON_EXPECTED_IOCTLS,
+ .allocate_area = shmem_allocate_area,
+ .release_pages = shmem_release_pages,
+ .alias_mapping = noop_alias_mapping,
+};
+
+static struct uffd_test_ops hugetlb_uffd_test_ops = {
+ .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC,
+ .allocate_area = hugetlb_allocate_area,
+ .release_pages = hugetlb_release_pages,
+ .alias_mapping = hugetlb_alias_mapping,
+};
+
+static struct uffd_test_ops *uffd_test_ops;
+
+static int my_bcmp(char *str1, char *str2, size_t n)
+{
+ unsigned long i;
+ for (i = 0; i < n; i++)
+ if (str1[i] != str2[i])
+ return 1;
+ return 0;
+}
+
+static void *locking_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ struct random_data rand;
+ unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */
+ int32_t rand_nr;
+ unsigned long long count;
+ char randstate[64];
+ unsigned int seed;
+ time_t start;
+
+ if (bounces & BOUNCE_RANDOM) {
+ seed = (unsigned int) time(NULL) - bounces;
+ if (!(bounces & BOUNCE_RACINGFAULTS))
+ seed += cpu;
+ bzero(&rand, sizeof(rand));
+ bzero(&randstate, sizeof(randstate));
+ if (initstate_r(seed, randstate, sizeof(randstate), &rand))
+ fprintf(stderr, "srandom_r error\n"), exit(1);
+ } else {
+ page_nr = -bounces;
+ if (!(bounces & BOUNCE_RACINGFAULTS))
+ page_nr += cpu * nr_pages_per_cpu;
+ }
+
+ while (!finished) {
+ if (bounces & BOUNCE_RANDOM) {
+ if (random_r(&rand, &rand_nr))
+ fprintf(stderr, "random_r 1 error\n"), exit(1);
+ page_nr = rand_nr;
+ if (sizeof(page_nr) > sizeof(rand_nr)) {
+ if (random_r(&rand, &rand_nr))
+ fprintf(stderr, "random_r 2 error\n"), exit(1);
+ page_nr |= (((unsigned long) rand_nr) << 16) <<
+ 16;
+ }
+ } else
+ page_nr += 1;
+ page_nr %= nr_pages;
+
+ start = time(NULL);
+ if (bounces & BOUNCE_VERIFY) {
+ count = *area_count(area_dst, page_nr);
+ if (!count)
+ fprintf(stderr,
+ "page_nr %lu wrong count %Lu %Lu\n",
+ page_nr, count,
+ count_verify[page_nr]), exit(1);
+
+
+ /*
+ * We can't use bcmp (or memcmp) because that
+ * returns 0 erroneously if the memory is
+ * changing under it (even if the end of the
+ * page is never changing and always
+ * different).
+ */
+#if 1
+ if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
+ page_size))
+ fprintf(stderr,
+ "my_bcmp page_nr %lu wrong count %Lu %Lu\n",
+ page_nr, count,
+ count_verify[page_nr]), exit(1);
+#else
+ unsigned long loops;
+
+ loops = 0;
+ /* uncomment the below line to test with mutex */
+ /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
+ while (!bcmp(area_dst + page_nr * page_size, zeropage,
+ page_size)) {
+ loops += 1;
+ if (loops > 10)
+ break;
+ }
+ /* uncomment below line to test with mutex */
+ /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
+ if (loops) {
+ fprintf(stderr,
+ "page_nr %lu all zero thread %lu %p %lu\n",
+ page_nr, cpu, area_dst + page_nr * page_size,
+ loops);
+ if (loops > 10)
+ exit(1);
+ }
+#endif
+ }
+
+ pthread_mutex_lock(area_mutex(area_dst, page_nr));
+ count = *area_count(area_dst, page_nr);
+ if (count != count_verify[page_nr]) {
+ fprintf(stderr,
+ "page_nr %lu memory corruption %Lu %Lu\n",
+ page_nr, count,
+ count_verify[page_nr]), exit(1);
+ }
+ count++;
+ *area_count(area_dst, page_nr) = count_verify[page_nr] = count;
+ pthread_mutex_unlock(area_mutex(area_dst, page_nr));
+
+ if (time(NULL) - start > 1)
+ fprintf(stderr,
+ "userfault too slow %ld "
+ "possible false positive with overcommit\n",
+ time(NULL) - start);
+ }
+
+ return NULL;
+}
+
+static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
+ unsigned long offset)
+{
+ uffd_test_ops->alias_mapping(&uffdio_copy->dst,
+ uffdio_copy->len,
+ offset);
+ if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
+ /* real retval in ufdio_copy.copy */
+ if (uffdio_copy->copy != -EEXIST)
+ fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
+ uffdio_copy->copy), exit(1);
+ } else {
+ fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n",
+ uffdio_copy->copy), exit(1);
+ }
+}
+
+static int __copy_page(int ufd, unsigned long offset, bool retry)
+{
+ struct uffdio_copy uffdio_copy;
+
+ if (offset >= nr_pages * page_size)
+ fprintf(stderr, "unexpected offset %lu\n",
+ offset), exit(1);
+ uffdio_copy.dst = (unsigned long) area_dst + offset;
+ uffdio_copy.src = (unsigned long) area_src + offset;
+ uffdio_copy.len = page_size;
+ uffdio_copy.mode = 0;
+ uffdio_copy.copy = 0;
+ if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
+ /* real retval in ufdio_copy.copy */
+ if (uffdio_copy.copy != -EEXIST)
+ fprintf(stderr, "UFFDIO_COPY error %Ld\n",
+ uffdio_copy.copy), exit(1);
+ } else if (uffdio_copy.copy != page_size) {
+ fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
+ uffdio_copy.copy), exit(1);
+ } else {
+ if (test_uffdio_copy_eexist && retry) {
+ test_uffdio_copy_eexist = false;
+ retry_copy_page(ufd, &uffdio_copy, offset);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static int copy_page_retry(int ufd, unsigned long offset)
+{
+ return __copy_page(ufd, offset, true);
+}
+
+static int copy_page(int ufd, unsigned long offset)
+{
+ return __copy_page(ufd, offset, false);
+}
+
+static void *uffd_poll_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ struct pollfd pollfd[2];
+ struct uffd_msg msg;
+ struct uffdio_register uffd_reg;
+ int ret;
+ unsigned long offset;
+ char tmp_chr;
+ unsigned long userfaults = 0;
+
+ pollfd[0].fd = uffd;
+ pollfd[0].events = POLLIN;
+ pollfd[1].fd = pipefd[cpu*2];
+ pollfd[1].events = POLLIN;
+
+ for (;;) {
+ ret = poll(pollfd, 2, -1);
+ if (!ret)
+ fprintf(stderr, "poll error %d\n", ret), exit(1);
+ if (ret < 0)
+ perror("poll"), exit(1);
+ if (pollfd[1].revents & POLLIN) {
+ if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
+ fprintf(stderr, "read pipefd error\n"),
+ exit(1);
+ break;
+ }
+ if (!(pollfd[0].revents & POLLIN))
+ fprintf(stderr, "pollfd[0].revents %d\n",
+ pollfd[0].revents), exit(1);
+ ret = read(uffd, &msg, sizeof(msg));
+ if (ret < 0) {
+ if (errno == EAGAIN)
+ continue;
+ perror("nonblocking read error"), exit(1);
+ }
+ switch (msg.event) {
+ default:
+ fprintf(stderr, "unexpected msg event %u\n",
+ msg.event), exit(1);
+ break;
+ case UFFD_EVENT_PAGEFAULT:
+ if (msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ fprintf(stderr, "unexpected write fault\n"), exit(1);
+ offset = (char *)(unsigned long)msg.arg.pagefault.address -
+ area_dst;
+ offset &= ~(page_size-1);
+ if (copy_page(uffd, offset))
+ userfaults++;
+ break;
+ case UFFD_EVENT_FORK:
+ close(uffd);
+ uffd = msg.arg.fork.ufd;
+ pollfd[0].fd = uffd;
+ break;
+ case UFFD_EVENT_REMOVE:
+ uffd_reg.range.start = msg.arg.remove.start;
+ uffd_reg.range.len = msg.arg.remove.end -
+ msg.arg.remove.start;
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
+ fprintf(stderr, "remove failure\n"), exit(1);
+ break;
+ case UFFD_EVENT_REMAP:
+ area_dst = (char *)(unsigned long)msg.arg.remap.to;
+ break;
+ }
+ }
+ return (void *)userfaults;
+}
+
+pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void *uffd_read_thread(void *arg)
+{
+ unsigned long *this_cpu_userfaults;
+ struct uffd_msg msg;
+ unsigned long offset;
+ int ret;
+
+ this_cpu_userfaults = (unsigned long *) arg;
+ *this_cpu_userfaults = 0;
+
+ pthread_mutex_unlock(&uffd_read_mutex);
+ /* from here cancellation is ok */
+
+ for (;;) {
+ ret = read(uffd, &msg, sizeof(msg));
+ if (ret != sizeof(msg)) {
+ if (ret < 0)
+ perror("blocking read error"), exit(1);
+ else
+ fprintf(stderr, "short read\n"), exit(1);
+ }
+ if (msg.event != UFFD_EVENT_PAGEFAULT)
+ fprintf(stderr, "unexpected msg event %u\n",
+ msg.event), exit(1);
+ if (bounces & BOUNCE_VERIFY &&
+ msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ fprintf(stderr, "unexpected write fault\n"), exit(1);
+ offset = (char *)(unsigned long)msg.arg.pagefault.address -
+ area_dst;
+ offset &= ~(page_size-1);
+ if (copy_page(uffd, offset))
+ (*this_cpu_userfaults)++;
+ }
+ return (void *)NULL;
+}
+
+static void *background_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ unsigned long page_nr;
+
+ for (page_nr = cpu * nr_pages_per_cpu;
+ page_nr < (cpu+1) * nr_pages_per_cpu;
+ page_nr++)
+ copy_page_retry(uffd, page_nr * page_size);
+
+ return NULL;
+}
+
+static int stress(unsigned long *userfaults)
+{
+ unsigned long cpu;
+ pthread_t locking_threads[nr_cpus];
+ pthread_t uffd_threads[nr_cpus];
+ pthread_t background_threads[nr_cpus];
+ void **_userfaults = (void **) userfaults;
+
+ finished = 0;
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ if (pthread_create(&locking_threads[cpu], &attr,
+ locking_thread, (void *)cpu))
+ return 1;
+ if (bounces & BOUNCE_POLL) {
+ if (pthread_create(&uffd_threads[cpu], &attr,
+ uffd_poll_thread, (void *)cpu))
+ return 1;
+ } else {
+ if (pthread_create(&uffd_threads[cpu], &attr,
+ uffd_read_thread,
+ &_userfaults[cpu]))
+ return 1;
+ pthread_mutex_lock(&uffd_read_mutex);
+ }
+ if (pthread_create(&background_threads[cpu], &attr,
+ background_thread, (void *)cpu))
+ return 1;
+ }
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pthread_join(background_threads[cpu], NULL))
+ return 1;
+
+ /*
+ * Be strict and immediately zap area_src, the whole area has
+ * been transferred already by the background treads. The
+ * area_src could then be faulted in in a racy way by still
+ * running uffdio_threads reading zeropages after we zapped
+ * area_src (but they're guaranteed to get -EEXIST from
+ * UFFDIO_COPY without writing zero pages into area_dst
+ * because the background threads already completed).
+ */
+ if (uffd_test_ops->release_pages(area_src))
+ return 1;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ char c;
+ if (bounces & BOUNCE_POLL) {
+ if (write(pipefd[cpu*2+1], &c, 1) != 1) {
+ fprintf(stderr, "pipefd write error\n");
+ return 1;
+ }
+ if (pthread_join(uffd_threads[cpu], &_userfaults[cpu]))
+ return 1;
+ } else {
+ if (pthread_cancel(uffd_threads[cpu]))
+ return 1;
+ if (pthread_join(uffd_threads[cpu], NULL))
+ return 1;
+ }
+ }
+
+ finished = 1;
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pthread_join(locking_threads[cpu], NULL))
+ return 1;
+
+ return 0;
+}
+
+static int userfaultfd_open(int features)
+{
+ struct uffdio_api uffdio_api;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd < 0) {
+ fprintf(stderr,
+ "userfaultfd syscall not available in this kernel\n");
+ return 1;
+ }
+ uffd_flags = fcntl(uffd, F_GETFD, NULL);
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = features;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
+ fprintf(stderr, "UFFDIO_API\n");
+ return 1;
+ }
+ if (uffdio_api.api != UFFD_API) {
+ fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
+ return 1;
+ }
+
+ return 0;
+}
+
+sigjmp_buf jbuf, *sigbuf;
+
+static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
+{
+ if (sig == SIGBUS) {
+ if (sigbuf)
+ siglongjmp(*sigbuf, 1);
+ abort();
+ }
+}
+
+/*
+ * For non-cooperative userfaultfd test we fork() a process that will
+ * generate pagefaults, will mremap the area monitored by the
+ * userfaultfd and at last this process will release the monitored
+ * area.
+ * For the anonymous and shared memory the area is divided into two
+ * parts, the first part is accessed before mremap, and the second
+ * part is accessed after mremap. Since hugetlbfs does not support
+ * mremap, the entire monitored area is accessed in a single pass for
+ * HUGETLB_TEST.
+ * The release of the pages currently generates event for shmem and
+ * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
+ * for hugetlb.
+ * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
+ * monitored area, generate pagefaults and test that signal is delivered.
+ * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
+ * test robustness use case - we release monitored area, fork a process
+ * that will generate pagefaults and verify signal is generated.
+ * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
+ * feature. Using monitor thread, verify no userfault events are generated.
+ */
+static int faulting_process(int signal_test)
+{
+ unsigned long nr;
+ unsigned long long count;
+ unsigned long split_nr_pages;
+ unsigned long lastnr;
+ struct sigaction act;
+ unsigned long signalled = 0;
+
+ if (test_type != TEST_HUGETLB)
+ split_nr_pages = (nr_pages + 1) / 2;
+ else
+ split_nr_pages = nr_pages;
+
+ if (signal_test) {
+ sigbuf = &jbuf;
+ memset(&act, 0, sizeof(act));
+ act.sa_sigaction = sighndl;
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGBUS, &act, 0)) {
+ perror("sigaction");
+ return 1;
+ }
+ lastnr = (unsigned long)-1;
+ }
+
+ for (nr = 0; nr < split_nr_pages; nr++) {
+ if (signal_test) {
+ if (sigsetjmp(*sigbuf, 1) != 0) {
+ if (nr == lastnr) {
+ fprintf(stderr, "Signal repeated\n");
+ return 1;
+ }
+
+ lastnr = nr;
+ if (signal_test == 1) {
+ if (copy_page(uffd, nr * page_size))
+ signalled++;
+ } else {
+ signalled++;
+ continue;
+ }
+ }
+ }
+
+ count = *area_count(area_dst, nr);
+ if (count != count_verify[nr]) {
+ fprintf(stderr,
+ "nr %lu memory corruption %Lu %Lu\n",
+ nr, count,
+ count_verify[nr]), exit(1);
+ }
+ }
+
+ if (signal_test)
+ return signalled != split_nr_pages;
+
+ if (test_type == TEST_HUGETLB)
+ return 0;
+
+ area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
+ if (area_dst == MAP_FAILED)
+ perror("mremap"), exit(1);
+
+ for (; nr < nr_pages; nr++) {
+ count = *area_count(area_dst, nr);
+ if (count != count_verify[nr]) {
+ fprintf(stderr,
+ "nr %lu memory corruption %Lu %Lu\n",
+ nr, count,
+ count_verify[nr]), exit(1);
+ }
+ }
+
+ if (uffd_test_ops->release_pages(area_dst))
+ return 1;
+
+ for (nr = 0; nr < nr_pages; nr++) {
+ if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
+ fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
+ }
+
+ return 0;
+}
+
+static void retry_uffdio_zeropage(int ufd,
+ struct uffdio_zeropage *uffdio_zeropage,
+ unsigned long offset)
+{
+ uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
+ uffdio_zeropage->range.len,
+ offset);
+ if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
+ if (uffdio_zeropage->zeropage != -EEXIST)
+ fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
+ uffdio_zeropage->zeropage), exit(1);
+ } else {
+ fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
+ uffdio_zeropage->zeropage), exit(1);
+ }
+}
+
+static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
+{
+ struct uffdio_zeropage uffdio_zeropage;
+ int ret;
+ unsigned long has_zeropage;
+
+ has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
+
+ if (offset >= nr_pages * page_size)
+ fprintf(stderr, "unexpected offset %lu\n",
+ offset), exit(1);
+ uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
+ uffdio_zeropage.range.len = page_size;
+ uffdio_zeropage.mode = 0;
+ ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
+ if (ret) {
+ /* real retval in ufdio_zeropage.zeropage */
+ if (has_zeropage) {
+ if (uffdio_zeropage.zeropage == -EEXIST)
+ fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
+ exit(1);
+ else
+ fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
+ uffdio_zeropage.zeropage), exit(1);
+ } else {
+ if (uffdio_zeropage.zeropage != -EINVAL)
+ fprintf(stderr,
+ "UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
+ uffdio_zeropage.zeropage), exit(1);
+ }
+ } else if (has_zeropage) {
+ if (uffdio_zeropage.zeropage != page_size) {
+ fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
+ uffdio_zeropage.zeropage), exit(1);
+ } else {
+ if (test_uffdio_zeropage_eexist && retry) {
+ test_uffdio_zeropage_eexist = false;
+ retry_uffdio_zeropage(ufd, &uffdio_zeropage,
+ offset);
+ }
+ return 1;
+ }
+ } else {
+ fprintf(stderr,
+ "UFFDIO_ZEROPAGE succeeded %Ld\n",
+ uffdio_zeropage.zeropage), exit(1);
+ }
+
+ return 0;
+}
+
+static int uffdio_zeropage(int ufd, unsigned long offset)
+{
+ return __uffdio_zeropage(ufd, offset, false);
+}
+
+/* exercise UFFDIO_ZEROPAGE */
+static int userfaultfd_zeropage_test(void)
+{
+ struct uffdio_register uffdio_register;
+ unsigned long expected_ioctls;
+
+ printf("testing UFFDIO_ZEROPAGE: ");
+ fflush(stdout);
+
+ if (uffd_test_ops->release_pages(area_dst))
+ return 1;
+
+ if (userfaultfd_open(0) < 0)
+ return 1;
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ fprintf(stderr, "register failure\n"), exit(1);
+
+ expected_ioctls = uffd_test_ops->expected_ioctls;
+ if ((uffdio_register.ioctls & expected_ioctls) !=
+ expected_ioctls)
+ fprintf(stderr,
+ "unexpected missing ioctl for anon memory\n"),
+ exit(1);
+
+ if (uffdio_zeropage(uffd, 0)) {
+ if (my_bcmp(area_dst, zeropage, page_size))
+ fprintf(stderr, "zeropage is not zero\n"), exit(1);
+ }
+
+ close(uffd);
+ printf("done.\n");
+ return 0;
+}
+
+static int userfaultfd_events_test(void)
+{
+ struct uffdio_register uffdio_register;
+ unsigned long expected_ioctls;
+ unsigned long userfaults;
+ pthread_t uffd_mon;
+ int err, features;
+ pid_t pid;
+ char c;
+
+ printf("testing events (fork, remap, remove): ");
+ fflush(stdout);
+
+ if (uffd_test_ops->release_pages(area_dst))
+ return 1;
+
+ features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
+ UFFD_FEATURE_EVENT_REMOVE;
+ if (userfaultfd_open(features) < 0)
+ return 1;
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ fprintf(stderr, "register failure\n"), exit(1);
+
+ expected_ioctls = uffd_test_ops->expected_ioctls;
+ if ((uffdio_register.ioctls & expected_ioctls) !=
+ expected_ioctls)
+ fprintf(stderr,
+ "unexpected missing ioctl for anon memory\n"),
+ exit(1);
+
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
+ perror("uffd_poll_thread create"), exit(1);
+
+ pid = fork();
+ if (pid < 0)
+ perror("fork"), exit(1);
+
+ if (!pid)
+ return faulting_process(0);
+
+ waitpid(pid, &err, 0);
+ if (err)
+ fprintf(stderr, "faulting process failed\n"), exit(1);
+
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ perror("pipe write"), exit(1);
+ if (pthread_join(uffd_mon, (void **)&userfaults))
+ return 1;
+
+ close(uffd);
+ printf("userfaults: %ld\n", userfaults);
+
+ return userfaults != nr_pages;
+}
+
+static int userfaultfd_sig_test(void)
+{
+ struct uffdio_register uffdio_register;
+ unsigned long expected_ioctls;
+ unsigned long userfaults;
+ pthread_t uffd_mon;
+ int err, features;
+ pid_t pid;
+ char c;
+
+ printf("testing signal delivery: ");
+ fflush(stdout);
+
+ if (uffd_test_ops->release_pages(area_dst))
+ return 1;
+
+ features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
+ if (userfaultfd_open(features) < 0)
+ return 1;
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ fprintf(stderr, "register failure\n"), exit(1);
+
+ expected_ioctls = uffd_test_ops->expected_ioctls;
+ if ((uffdio_register.ioctls & expected_ioctls) !=
+ expected_ioctls)
+ fprintf(stderr,
+ "unexpected missing ioctl for anon memory\n"),
+ exit(1);
+
+ if (faulting_process(1))
+ fprintf(stderr, "faulting process failed\n"), exit(1);
+
+ if (uffd_test_ops->release_pages(area_dst))
+ return 1;
+
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
+ perror("uffd_poll_thread create"), exit(1);
+
+ pid = fork();
+ if (pid < 0)
+ perror("fork"), exit(1);
+
+ if (!pid)
+ exit(faulting_process(2));
+
+ waitpid(pid, &err, 0);
+ if (err)
+ fprintf(stderr, "faulting process failed\n"), exit(1);
+
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ perror("pipe write"), exit(1);
+ if (pthread_join(uffd_mon, (void **)&userfaults))
+ return 1;
+
+ printf("done.\n");
+ if (userfaults)
+ fprintf(stderr, "Signal test failed, userfaults: %ld\n",
+ userfaults);
+ close(uffd);
+ return userfaults != 0;
+}
+static int userfaultfd_stress(void)
+{
+ void *area;
+ char *tmp_area;
+ unsigned long nr;
+ struct uffdio_register uffdio_register;
+ unsigned long cpu;
+ int err;
+ unsigned long userfaults[nr_cpus];
+
+ uffd_test_ops->allocate_area((void **)&area_src);
+ if (!area_src)
+ return 1;
+ uffd_test_ops->allocate_area((void **)&area_dst);
+ if (!area_dst)
+ return 1;
+
+ if (userfaultfd_open(0) < 0)
+ return 1;
+
+ count_verify = malloc(nr_pages * sizeof(unsigned long long));
+ if (!count_verify) {
+ perror("count_verify");
+ return 1;
+ }
+
+ for (nr = 0; nr < nr_pages; nr++) {
+ *area_mutex(area_src, nr) = (pthread_mutex_t)
+ PTHREAD_MUTEX_INITIALIZER;
+ count_verify[nr] = *area_count(area_src, nr) = 1;
+ /*
+ * In the transition between 255 to 256, powerpc will
+ * read out of order in my_bcmp and see both bytes as
+ * zero, so leave a placeholder below always non-zero
+ * after the count, to avoid my_bcmp to trigger false
+ * positives.
+ */
+ *(area_count(area_src, nr) + 1) = 1;
+ }
+
+ pipefd = malloc(sizeof(int) * nr_cpus * 2);
+ if (!pipefd) {
+ perror("pipefd");
+ return 1;
+ }
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) {
+ perror("pipe");
+ return 1;
+ }
+ }
+
+ if (posix_memalign(&area, page_size, page_size)) {
+ fprintf(stderr, "out of memory\n");
+ return 1;
+ }
+ zeropage = area;
+ bzero(zeropage, page_size);
+
+ pthread_mutex_lock(&uffd_read_mutex);
+
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 16*1024*1024);
+
+ err = 0;
+ while (bounces--) {
+ unsigned long expected_ioctls;
+
+ printf("bounces: %d, mode:", bounces);
+ if (bounces & BOUNCE_RANDOM)
+ printf(" rnd");
+ if (bounces & BOUNCE_RACINGFAULTS)
+ printf(" racing");
+ if (bounces & BOUNCE_VERIFY)
+ printf(" ver");
+ if (bounces & BOUNCE_POLL)
+ printf(" poll");
+ printf(", ");
+ fflush(stdout);
+
+ if (bounces & BOUNCE_POLL)
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+ else
+ fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK);
+
+ /* register */
+ uffdio_register.range.start = (unsigned long) area_dst;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ return 1;
+ }
+ expected_ioctls = uffd_test_ops->expected_ioctls;
+ if ((uffdio_register.ioctls & expected_ioctls) !=
+ expected_ioctls) {
+ fprintf(stderr,
+ "unexpected missing ioctl for anon memory\n");
+ return 1;
+ }
+
+ if (area_dst_alias) {
+ uffdio_register.range.start = (unsigned long)
+ area_dst_alias;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure alias\n");
+ return 1;
+ }
+ }
+
+ /*
+ * The madvise done previously isn't enough: some
+ * uffd_thread could have read userfaults (one of
+ * those already resolved by the background thread)
+ * and it may be in the process of calling
+ * UFFDIO_COPY. UFFDIO_COPY will read the zapped
+ * area_src and it would map a zero page in it (of
+ * course such a UFFDIO_COPY is perfectly safe as it'd
+ * return -EEXIST). The problem comes at the next
+ * bounce though: that racing UFFDIO_COPY would
+ * generate zeropages in the area_src, so invalidating
+ * the previous MADV_DONTNEED. Without this additional
+ * MADV_DONTNEED those zeropages leftovers in the
+ * area_src would lead to -EEXIST failure during the
+ * next bounce, effectively leaving a zeropage in the
+ * area_dst.
+ *
+ * Try to comment this out madvise to see the memory
+ * corruption being caught pretty quick.
+ *
+ * khugepaged is also inhibited to collapse THP after
+ * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
+ * required to MADV_DONTNEED here.
+ */
+ if (uffd_test_ops->release_pages(area_dst))
+ return 1;
+
+ /* bounce pass */
+ if (stress(userfaults))
+ return 1;
+
+ /* unregister */
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
+ fprintf(stderr, "unregister failure\n");
+ return 1;
+ }
+ if (area_dst_alias) {
+ uffdio_register.range.start = (unsigned long) area_dst;
+ if (ioctl(uffd, UFFDIO_UNREGISTER,
+ &uffdio_register.range)) {
+ fprintf(stderr, "unregister failure alias\n");
+ return 1;
+ }
+ }
+
+ /* verification */
+ if (bounces & BOUNCE_VERIFY) {
+ for (nr = 0; nr < nr_pages; nr++) {
+ if (*area_count(area_dst, nr) != count_verify[nr]) {
+ fprintf(stderr,
+ "error area_count %Lu %Lu %lu\n",
+ *area_count(area_src, nr),
+ count_verify[nr],
+ nr);
+ err = 1;
+ bounces = 0;
+ }
+ }
+ }
+
+ /* prepare next bounce */
+ tmp_area = area_src;
+ area_src = area_dst;
+ area_dst = tmp_area;
+
+ tmp_area = area_src_alias;
+ area_src_alias = area_dst_alias;
+ area_dst_alias = tmp_area;
+
+ printf("userfaults:");
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ printf(" %lu", userfaults[cpu]);
+ printf("\n");
+ }
+
+ if (err)
+ return err;
+
+ close(uffd);
+ return userfaultfd_zeropage_test() || userfaultfd_sig_test()
+ || userfaultfd_events_test();
+}
+
+/*
+ * Copied from mlock2-tests.c
+ */
+unsigned long default_huge_page_size(void)
+{
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(f);
+ return hps;
+}
+
+static void set_test_type(const char *type)
+{
+ if (!strcmp(type, "anon")) {
+ test_type = TEST_ANON;
+ uffd_test_ops = &anon_uffd_test_ops;
+ } else if (!strcmp(type, "hugetlb")) {
+ test_type = TEST_HUGETLB;
+ uffd_test_ops = &hugetlb_uffd_test_ops;
+ } else if (!strcmp(type, "hugetlb_shared")) {
+ map_shared = true;
+ test_type = TEST_HUGETLB;
+ uffd_test_ops = &hugetlb_uffd_test_ops;
+ } else if (!strcmp(type, "shmem")) {
+ map_shared = true;
+ test_type = TEST_SHMEM;
+ uffd_test_ops = &shmem_uffd_test_ops;
+ } else {
+ fprintf(stderr, "Unknown test type: %s\n", type), exit(1);
+ }
+
+ if (test_type == TEST_HUGETLB)
+ page_size = default_huge_page_size();
+ else
+ page_size = sysconf(_SC_PAGE_SIZE);
+
+ if (!page_size)
+ fprintf(stderr, "Unable to determine page size\n"),
+ exit(2);
+ if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
+ > page_size)
+ fprintf(stderr, "Impossible to run this test\n"), exit(2);
+}
+
+static void sigalrm(int sig)
+{
+ if (sig != SIGALRM)
+ abort();
+ test_uffdio_copy_eexist = true;
+ test_uffdio_zeropage_eexist = true;
+ alarm(ALARM_INTERVAL_SECS);
+}
+
+int main(int argc, char **argv)
+{
+ if (argc < 4)
+ fprintf(stderr, "Usage: <test type> <MiB> <bounces> [hugetlbfs_file]\n"),
+ exit(1);
+
+ if (signal(SIGALRM, sigalrm) == SIG_ERR)
+ fprintf(stderr, "failed to arm SIGALRM"), exit(1);
+ alarm(ALARM_INTERVAL_SECS);
+
+ set_test_type(argv[1]);
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size /
+ nr_cpus;
+ if (!nr_pages_per_cpu) {
+ fprintf(stderr, "invalid MiB\n");
+ fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ }
+
+ bounces = atoi(argv[3]);
+ if (bounces <= 0) {
+ fprintf(stderr, "invalid bounces\n");
+ fprintf(stderr, "Usage: <MiB> <bounces>\n"), exit(1);
+ }
+ nr_pages = nr_pages_per_cpu * nr_cpus;
+
+ if (test_type == TEST_HUGETLB) {
+ if (argc < 5)
+ fprintf(stderr, "Usage: hugetlb <MiB> <bounces> <hugetlbfs_file>\n"),
+ exit(1);
+ huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
+ if (huge_fd < 0) {
+ fprintf(stderr, "Open of %s failed", argv[3]);
+ perror("open");
+ exit(1);
+ }
+ if (ftruncate(huge_fd, 0)) {
+ fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]);
+ perror("ftruncate");
+ exit(1);
+ }
+ }
+ printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
+ nr_pages, nr_pages_per_cpu);
+ return userfaultfd_stress();
+}
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+ printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/vm/va_128TBswitch.c
new file mode 100644
index 000000000..e7fe734c3
--- /dev/null
+++ b/tools/testing/selftests/vm/va_128TBswitch.c
@@ -0,0 +1,297 @@
+/*
+ *
+ * Authors: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+ * Authors: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <stdio.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#ifdef __powerpc64__
+#define PAGE_SIZE (64 << 10)
+/*
+ * This will work with 16M and 2M hugepage size
+ */
+#define HUGETLB_SIZE (16 << 20)
+#else
+#define PAGE_SIZE (4 << 10)
+#define HUGETLB_SIZE (2 << 20)
+#endif
+
+/*
+ * >= 128TB is the hint addr value we used to select
+ * large address space.
+ */
+#define ADDR_SWITCH_HINT (1UL << 47)
+#define LOW_ADDR ((void *) (1UL << 30))
+#define HIGH_ADDR ((void *) (1UL << 48))
+
+struct testcase {
+ void *addr;
+ unsigned long size;
+ unsigned long flags;
+ const char *msg;
+ unsigned int low_addr_required:1;
+ unsigned int keep_mapped:1;
+};
+
+static struct testcase testcases[] = {
+ {
+ /*
+ * If stack is moved, we could possibly allocate
+ * this at the requested address.
+ */
+ .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
+ .size = PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)",
+ .low_addr_required = 1,
+ },
+ {
+ /*
+ * We should never allocate at the requested address or above it
+ * The len cross the 128TB boundary. Without MAP_FIXED
+ * we will always search in the lower address space.
+ */
+ .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, (2 * PAGE_SIZE))",
+ .low_addr_required = 1,
+ },
+ {
+ /*
+ * Exact mapping at 128TB, the area is free we should get that
+ * even without MAP_FIXED.
+ */
+ .addr = ((void *)(ADDR_SWITCH_HINT)),
+ .size = PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)(ADDR_SWITCH_HINT),
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)",
+ },
+ {
+ .addr = NULL,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(NULL)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = LOW_ADDR,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(LOW_ADDR)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(HIGH_ADDR)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(HIGH_ADDR) again",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(HIGH_ADDR, MAP_FIXED)",
+ },
+ {
+ .addr = (void *) -1,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *) -1,
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1) again",
+ },
+ {
+ .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
+ .size = PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE),
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2 * PAGE_SIZE)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE / 2),
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE/2 , 2 * PAGE_SIZE)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = ((void *)(ADDR_SWITCH_HINT)),
+ .size = PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)",
+ },
+ {
+ .addr = (void *)(ADDR_SWITCH_HINT),
+ .size = 2 * PAGE_SIZE,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)",
+ },
+};
+
+static struct testcase hugetlb_testcases[] = {
+ {
+ .addr = NULL,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(NULL, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = LOW_ADDR,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(LOW_ADDR, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = HIGH_ADDR,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)",
+ },
+ {
+ .addr = (void *) -1,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1, MAP_HUGETLB)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *) -1,
+ .size = HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1, MAP_HUGETLB) again",
+ },
+ {
+ .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE),
+ .size = 2 * HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)(ADDR_SWITCH_HINT),
+ .size = 2 * HUGETLB_SIZE,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB)",
+ },
+};
+
+static int run_test(struct testcase *test, int count)
+{
+ void *p;
+ int i, ret = 0;
+
+ for (i = 0; i < count; i++) {
+ struct testcase *t = test + i;
+
+ p = mmap(t->addr, t->size, PROT_READ | PROT_WRITE, t->flags, -1, 0);
+
+ printf("%s: %p - ", t->msg, p);
+
+ if (p == MAP_FAILED) {
+ printf("FAILED\n");
+ ret = 1;
+ continue;
+ }
+
+ if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) {
+ printf("FAILED\n");
+ ret = 1;
+ } else {
+ /*
+ * Do a dereference of the address returned so that we catch
+ * bugs in page fault handling
+ */
+ memset(p, 0, t->size);
+ printf("OK\n");
+ }
+ if (!t->keep_mapped)
+ munmap(p, t->size);
+ }
+
+ return ret;
+}
+
+static int supported_arch(void)
+{
+#if defined(__powerpc64__)
+ return 1;
+#elif defined(__x86_64__)
+ return 1;
+#else
+ return 0;
+#endif
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+
+ if (!supported_arch())
+ return 0;
+
+ ret = run_test(testcases, ARRAY_SIZE(testcases));
+ if (argc == 2 && !strcmp(argv[1], "--run-hugetlb"))
+ ret = run_test(hugetlb_testcases, ARRAY_SIZE(hugetlb_testcases));
+ return ret;
+}
diff --git a/tools/testing/selftests/vm/virtual_address_range.c b/tools/testing/selftests/vm/virtual_address_range.c
new file mode 100644
index 000000000..1830d66a6
--- /dev/null
+++ b/tools/testing/selftests/vm/virtual_address_range.c
@@ -0,0 +1,139 @@
+/*
+ * Copyright 2017, Anshuman Khandual, IBM Corp.
+ * Licensed under GPLv2.
+ *
+ * Works on architectures which support 128TB virtual
+ * address range and beyond.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+
+/*
+ * Maximum address range mapped with a single mmap()
+ * call is little bit more than 16GB. Hence 16GB is
+ * chosen as the single chunk size for address space
+ * mapping.
+ */
+#define MAP_CHUNK_SIZE 17179869184UL /* 16GB */
+
+/*
+ * Address space till 128TB is mapped without any hint
+ * and is enabled by default. Address space beyond 128TB
+ * till 512TB is obtained by passing hint address as the
+ * first argument into mmap() system call.
+ *
+ * The process heap address space is divided into two
+ * different areas one below 128TB and one above 128TB
+ * till it reaches 512TB. One with size 128TB and the
+ * other being 384TB.
+ *
+ * On Arm64 the address space is 256TB and no high mappings
+ * are supported so far.
+ */
+
+#define NR_CHUNKS_128TB 8192UL /* Number of 16GB chunks for 128TB */
+#define NR_CHUNKS_256TB (NR_CHUNKS_128TB * 2UL)
+#define NR_CHUNKS_384TB (NR_CHUNKS_128TB * 3UL)
+
+#define ADDR_MARK_128TB (1UL << 47) /* First address beyond 128TB */
+#define ADDR_MARK_256TB (1UL << 48) /* First address beyond 256TB */
+
+#ifdef __aarch64__
+#define HIGH_ADDR_MARK ADDR_MARK_256TB
+#define HIGH_ADDR_SHIFT 49
+#define NR_CHUNKS_LOW NR_CHUNKS_256TB
+#define NR_CHUNKS_HIGH 0
+#else
+#define HIGH_ADDR_MARK ADDR_MARK_128TB
+#define HIGH_ADDR_SHIFT 48
+#define NR_CHUNKS_LOW NR_CHUNKS_128TB
+#define NR_CHUNKS_HIGH NR_CHUNKS_384TB
+#endif
+
+static char *hind_addr(void)
+{
+ int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT);
+
+ return (char *) (1UL << bits);
+}
+
+static int validate_addr(char *ptr, int high_addr)
+{
+ unsigned long addr = (unsigned long) ptr;
+
+ if (high_addr) {
+ if (addr < HIGH_ADDR_MARK) {
+ printf("Bad address %lx\n", addr);
+ return 1;
+ }
+ return 0;
+ }
+
+ if (addr > HIGH_ADDR_MARK) {
+ printf("Bad address %lx\n", addr);
+ return 1;
+ }
+ return 0;
+}
+
+static int validate_lower_address_hint(void)
+{
+ char *ptr;
+
+ ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ |
+ PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (ptr == MAP_FAILED)
+ return 0;
+
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ char *ptr[NR_CHUNKS_LOW];
+ char *hptr[NR_CHUNKS_HIGH];
+ char *hint;
+ unsigned long i, lchunks, hchunks;
+
+ for (i = 0; i < NR_CHUNKS_LOW; i++) {
+ ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (ptr[i] == MAP_FAILED) {
+ if (validate_lower_address_hint())
+ return 1;
+ break;
+ }
+
+ if (validate_addr(ptr[i], 0))
+ return 1;
+ }
+ lchunks = i;
+
+ for (i = 0; i < NR_CHUNKS_HIGH; i++) {
+ hint = hind_addr();
+ hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (hptr[i] == MAP_FAILED)
+ break;
+
+ if (validate_addr(hptr[i], 1))
+ return 1;
+ }
+ hchunks = i;
+
+ for (i = 0; i < lchunks; i++)
+ munmap(ptr[i], MAP_CHUNK_SIZE);
+
+ for (i = 0; i < hchunks; i++)
+ munmap(hptr[i], MAP_CHUNK_SIZE);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/watchdog/.gitignore b/tools/testing/selftests/watchdog/.gitignore
new file mode 100644
index 000000000..5aac51575
--- /dev/null
+++ b/tools/testing/selftests/watchdog/.gitignore
@@ -0,0 +1 @@
+watchdog-test
diff --git a/tools/testing/selftests/watchdog/Makefile b/tools/testing/selftests/watchdog/Makefile
new file mode 100644
index 000000000..6b5598b55
--- /dev/null
+++ b/tools/testing/selftests/watchdog/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := watchdog-test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c
new file mode 100644
index 000000000..f1c6e025c
--- /dev/null
+++ b/tools/testing/selftests/watchdog/watchdog-test.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Watchdog Driver Test Program
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <linux/types.h>
+#include <linux/watchdog.h>
+
+#define DEFAULT_PING_RATE 1
+
+int fd;
+const char v = 'V';
+static const char sopts[] = "bdehp:t:";
+static const struct option lopts[] = {
+ {"bootstatus", no_argument, NULL, 'b'},
+ {"disable", no_argument, NULL, 'd'},
+ {"enable", no_argument, NULL, 'e'},
+ {"help", no_argument, NULL, 'h'},
+ {"pingrate", required_argument, NULL, 'p'},
+ {"timeout", required_argument, NULL, 't'},
+ {NULL, no_argument, NULL, 0x0}
+};
+
+/*
+ * This function simply sends an IOCTL to the driver, which in turn ticks
+ * the PC Watchdog card to reset its internal timer so it doesn't trigger
+ * a computer reset.
+ */
+static void keep_alive(void)
+{
+ int dummy;
+ int ret;
+
+ ret = ioctl(fd, WDIOC_KEEPALIVE, &dummy);
+ if (!ret)
+ printf(".");
+}
+
+/*
+ * The main program. Run the program with "-d" to disable the card,
+ * or "-e" to enable the card.
+ */
+
+static void term(int sig)
+{
+ int ret = write(fd, &v, 1);
+
+ close(fd);
+ if (ret < 0)
+ printf("\nStopping watchdog ticks failed (%d)...\n", errno);
+ else
+ printf("\nStopping watchdog ticks...\n");
+ exit(0);
+}
+
+static void usage(char *progname)
+{
+ printf("Usage: %s [options]\n", progname);
+ printf(" -b, --bootstatus Get last boot status (Watchdog/POR)\n");
+ printf(" -d, --disable Turn off the watchdog timer\n");
+ printf(" -e, --enable Turn on the watchdog timer\n");
+ printf(" -h, --help Print the help message\n");
+ printf(" -p, --pingrate=P Set ping rate to P seconds (default %d)\n", DEFAULT_PING_RATE);
+ printf(" -t, --timeout=T Set timeout to T seconds\n");
+ printf("\n");
+ printf("Parameters are parsed left-to-right in real-time.\n");
+ printf("Example: %s -d -t 10 -p 5 -e\n", progname);
+}
+
+int main(int argc, char *argv[])
+{
+ int flags;
+ unsigned int ping_rate = DEFAULT_PING_RATE;
+ int ret;
+ int c;
+ int oneshot = 0;
+
+ setbuf(stdout, NULL);
+
+ fd = open("/dev/watchdog", O_WRONLY);
+
+ if (fd == -1) {
+ if (errno == ENOENT)
+ printf("Watchdog device not enabled.\n");
+ else if (errno == EACCES)
+ printf("Run watchdog as root.\n");
+ else
+ printf("Watchdog device open failed %s\n",
+ strerror(errno));
+ exit(-1);
+ }
+
+ while ((c = getopt_long(argc, argv, sopts, lopts, NULL)) != -1) {
+ switch (c) {
+ case 'b':
+ flags = 0;
+ oneshot = 1;
+ ret = ioctl(fd, WDIOC_GETBOOTSTATUS, &flags);
+ if (!ret)
+ printf("Last boot is caused by: %s.\n", (flags != 0) ?
+ "Watchdog" : "Power-On-Reset");
+ else
+ printf("WDIOC_GETBOOTSTATUS error '%s'\n", strerror(errno));
+ break;
+ case 'd':
+ flags = WDIOS_DISABLECARD;
+ ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
+ if (!ret)
+ printf("Watchdog card disabled.\n");
+ else
+ printf("WDIOS_DISABLECARD error '%s'\n", strerror(errno));
+ break;
+ case 'e':
+ flags = WDIOS_ENABLECARD;
+ ret = ioctl(fd, WDIOC_SETOPTIONS, &flags);
+ if (!ret)
+ printf("Watchdog card enabled.\n");
+ else
+ printf("WDIOS_ENABLECARD error '%s'\n", strerror(errno));
+ break;
+ case 'p':
+ ping_rate = strtoul(optarg, NULL, 0);
+ if (!ping_rate)
+ ping_rate = DEFAULT_PING_RATE;
+ printf("Watchdog ping rate set to %u seconds.\n", ping_rate);
+ break;
+ case 't':
+ flags = strtoul(optarg, NULL, 0);
+ ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags);
+ if (!ret)
+ printf("Watchdog timeout set to %u seconds.\n", flags);
+ else
+ printf("WDIOC_SETTIMEOUT error '%s'\n", strerror(errno));
+ break;
+ default:
+ usage(argv[0]);
+ goto end;
+ }
+ }
+
+ if (oneshot)
+ goto end;
+
+ printf("Watchdog Ticking Away!\n");
+
+ signal(SIGINT, term);
+
+ while (1) {
+ keep_alive();
+ sleep(ping_rate);
+ }
+end:
+ ret = write(fd, &v, 1);
+ if (ret < 0)
+ printf("Stopping watchdog ticks failed (%d)...\n", errno);
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
new file mode 100644
index 000000000..7757f73ff
--- /dev/null
+++ b/tools/testing/selftests/x86/.gitignore
@@ -0,0 +1,15 @@
+*_32
+*_64
+single_step_syscall
+sysret_ss_attrs
+syscall_nt
+ptrace_syscall
+test_mremap_vdso
+check_initial_reg_state
+sigreturn
+ldt_gdt
+iopl
+mpx-mini-test
+ioperm
+protection_keys
+test_vdso
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
new file mode 100644
index 000000000..186520198
--- /dev/null
+++ b/tools/testing/selftests/x86/Makefile
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+include ../lib.mk
+
+.PHONY: all all_32 all_64 warn_32bit_failure clean
+
+UNAME_M := $(shell uname -m)
+CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
+
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
+ check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \
+ protection_keys test_vdso test_vsyscall mov_ss_trap
+TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
+ test_FCMOV test_FCOMI test_FISTTP \
+ vdso_restorer
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip
+# Some selftests require 32bit support enabled also on 64bit systems
+TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
+
+TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) $(TARGETS_C_32BIT_NEEDED)
+TARGETS_C_64BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_64BIT_ONLY)
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),11)
+TARGETS_C_64BIT_ALL += $(TARGETS_C_32BIT_NEEDED)
+endif
+
+BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32)
+BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
+
+BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
+BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
+
+CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
+
+# call32_from_64 in thunks.S uses absolute addresses.
+ifeq ($(CAN_BUILD_WITH_NOPIE),1)
+CFLAGS += -no-pie
+endif
+
+define gen-target-rule-32
+$(1) $(1)_32: $(OUTPUT)/$(1)_32
+.PHONY: $(1) $(1)_32
+endef
+
+define gen-target-rule-64
+$(1) $(1)_64: $(OUTPUT)/$(1)_64
+.PHONY: $(1) $(1)_64
+endef
+
+ifeq ($(CAN_BUILD_I386),1)
+all: all_32
+TEST_PROGS += $(BINARIES_32)
+EXTRA_CFLAGS += -DCAN_BUILD_32
+$(foreach t,$(TARGETS_C_32BIT_ALL),$(eval $(call gen-target-rule-32,$(t))))
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+all: all_64
+TEST_PROGS += $(BINARIES_64)
+EXTRA_CFLAGS += -DCAN_BUILD_64
+$(foreach t,$(TARGETS_C_64BIT_ALL),$(eval $(call gen-target-rule-64,$(t))))
+endif
+
+all_32: $(BINARIES_32)
+
+all_64: $(BINARIES_64)
+
+EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64)
+
+$(BINARIES_32): $(OUTPUT)/%_32: %.c
+ $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
+
+$(BINARIES_64): $(OUTPUT)/%_64: %.c
+ $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
+
+# x86_64 users should be encouraged to install 32-bit libraries
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01)
+all: warn_32bit_failure
+
+warn_32bit_failure:
+ @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \
+ echo "environment. This will reduce test coverage of 64-bit" 2>&1; \
+ echo "kernels. If you are using a Debian-like distribution," 2>&1; \
+ echo "try:"; 2>&1; \
+ echo ""; \
+ echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
+ echo ""; \
+ echo "If you are using a Fedora-like distribution, try:"; \
+ echo ""; \
+ echo " yum install glibc-devel.*i686"; \
+ exit 0;
+endif
+
+# Some tests have additional dependencies.
+$(OUTPUT)/sysret_ss_attrs_64: thunks.S
+$(OUTPUT)/ptrace_syscall_32: raw_syscall_helper_32.S
+$(OUTPUT)/test_syscall_vdso_32: thunks_32.S
+
+# check_initial_reg_state is special: it needs a custom entry, and it
+# needs to be static so that its interpreter doesn't destroy its initial
+# state.
+$(OUTPUT)/check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
+$(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
new file mode 100755
index 000000000..356689c56
--- /dev/null
+++ b/tools/testing/selftests/x86/check_cc.sh
@@ -0,0 +1,16 @@
+#!/bin/sh
+# check_cc.sh - Helper to test userspace compilation support
+# Copyright (c) 2015 Andrew Lutomirski
+# GPL v2
+
+CC="$1"
+TESTPROG="$2"
+shift 2
+
+if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+ echo 1
+else
+ echo 0
+fi
+
+exit 0
diff --git a/tools/testing/selftests/x86/check_initial_reg_state.c b/tools/testing/selftests/x86/check_initial_reg_state.c
new file mode 100644
index 000000000..6aaed9b85
--- /dev/null
+++ b/tools/testing/selftests/x86/check_initial_reg_state.c
@@ -0,0 +1,109 @@
+/*
+ * check_initial_reg_state.c - check that execve sets the correct state
+ * Copyright (c) 2014-2016 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+
+unsigned long ax, bx, cx, dx, si, di, bp, sp, flags;
+unsigned long r8, r9, r10, r11, r12, r13, r14, r15;
+
+asm (
+ ".pushsection .text\n\t"
+ ".type real_start, @function\n\t"
+ ".global real_start\n\t"
+ "real_start:\n\t"
+#ifdef __x86_64__
+ "mov %rax, ax\n\t"
+ "mov %rbx, bx\n\t"
+ "mov %rcx, cx\n\t"
+ "mov %rdx, dx\n\t"
+ "mov %rsi, si\n\t"
+ "mov %rdi, di\n\t"
+ "mov %rbp, bp\n\t"
+ "mov %rsp, sp\n\t"
+ "mov %r8, r8\n\t"
+ "mov %r9, r9\n\t"
+ "mov %r10, r10\n\t"
+ "mov %r11, r11\n\t"
+ "mov %r12, r12\n\t"
+ "mov %r13, r13\n\t"
+ "mov %r14, r14\n\t"
+ "mov %r15, r15\n\t"
+ "pushfq\n\t"
+ "popq flags\n\t"
+#else
+ "mov %eax, ax\n\t"
+ "mov %ebx, bx\n\t"
+ "mov %ecx, cx\n\t"
+ "mov %edx, dx\n\t"
+ "mov %esi, si\n\t"
+ "mov %edi, di\n\t"
+ "mov %ebp, bp\n\t"
+ "mov %esp, sp\n\t"
+ "pushfl\n\t"
+ "popl flags\n\t"
+#endif
+ "jmp _start\n\t"
+ ".size real_start, . - real_start\n\t"
+ ".popsection");
+
+int main()
+{
+ int nerrs = 0;
+
+ if (sp == 0) {
+ printf("[FAIL]\tTest was built incorrectly\n");
+ return 1;
+ }
+
+ if (ax || bx || cx || dx || si || di || bp
+#ifdef __x86_64__
+ || r8 || r9 || r10 || r11 || r12 || r13 || r14 || r15
+#endif
+ ) {
+ printf("[FAIL]\tAll GPRs except SP should be 0\n");
+#define SHOW(x) printf("\t" #x " = 0x%lx\n", x);
+ SHOW(ax);
+ SHOW(bx);
+ SHOW(cx);
+ SHOW(dx);
+ SHOW(si);
+ SHOW(di);
+ SHOW(bp);
+ SHOW(sp);
+#ifdef __x86_64__
+ SHOW(r8);
+ SHOW(r9);
+ SHOW(r10);
+ SHOW(r11);
+ SHOW(r12);
+ SHOW(r13);
+ SHOW(r14);
+ SHOW(r15);
+#endif
+ nerrs++;
+ } else {
+ printf("[OK]\tAll GPRs except SP are 0\n");
+ }
+
+ if (flags != 0x202) {
+ printf("[FAIL]\tFLAGS is 0x%lx, but it should be 0x202\n", flags);
+ nerrs++;
+ } else {
+ printf("[OK]\tFLAGS is 0x202\n");
+ }
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
new file mode 100644
index 000000000..ade443a88
--- /dev/null
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -0,0 +1,349 @@
+/*
+ * entry_from_vm86.c - tests kernel entries from vm86 mode
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This exercises a few paths that need to special-case vm86 mode.
+ *
+ * GPL v2.
+ */
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <sys/vm86.h>
+
+static unsigned long load_addr = 0x10000;
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static sig_atomic_t got_signal;
+
+static void sighandler(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM ||
+ (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) {
+ printf("[FAIL]\tSignal frame should not reflect vm86 mode\n");
+ nerrs++;
+ }
+
+ const char *signame;
+ if (sig == SIGSEGV)
+ signame = "SIGSEGV";
+ else if (sig == SIGILL)
+ signame = "SIGILL";
+ else
+ signame = "unexpected signal";
+
+ printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame,
+ (unsigned long)ctx->uc_mcontext.gregs[REG_EFL],
+ (unsigned short)ctx->uc_mcontext.gregs[REG_CS]);
+
+ got_signal = 1;
+}
+
+asm (
+ ".pushsection .rodata\n\t"
+ ".type vmcode_bound, @object\n\t"
+ "vmcode:\n\t"
+ "vmcode_bound:\n\t"
+ ".code16\n\t"
+ "bound %ax, (2048)\n\t"
+ "int3\n\t"
+ "vmcode_sysenter:\n\t"
+ "sysenter\n\t"
+ "vmcode_syscall:\n\t"
+ "syscall\n\t"
+ "vmcode_sti:\n\t"
+ "sti\n\t"
+ "vmcode_int3:\n\t"
+ "int3\n\t"
+ "vmcode_int80:\n\t"
+ "int $0x80\n\t"
+ "vmcode_popf_hlt:\n\t"
+ "push %ax\n\t"
+ "popf\n\t"
+ "hlt\n\t"
+ "vmcode_umip:\n\t"
+ /* addressing via displacements */
+ "smsw (2052)\n\t"
+ "sidt (2054)\n\t"
+ "sgdt (2060)\n\t"
+ /* addressing via registers */
+ "mov $2066, %bx\n\t"
+ "smsw (%bx)\n\t"
+ "mov $2068, %bx\n\t"
+ "sidt (%bx)\n\t"
+ "mov $2074, %bx\n\t"
+ "sgdt (%bx)\n\t"
+ /* register operands, only for smsw */
+ "smsw %ax\n\t"
+ "mov %ax, (2080)\n\t"
+ "int3\n\t"
+ "vmcode_umip_str:\n\t"
+ "str %eax\n\t"
+ "vmcode_umip_sldt:\n\t"
+ "sldt %eax\n\t"
+ "int3\n\t"
+ ".size vmcode, . - vmcode\n\t"
+ "end_vmcode:\n\t"
+ ".code32\n\t"
+ ".popsection"
+ );
+
+extern unsigned char vmcode[], end_vmcode[];
+extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
+ vmcode_sti[], vmcode_int3[], vmcode_int80[], vmcode_popf_hlt[],
+ vmcode_umip[], vmcode_umip_str[], vmcode_umip_sldt[];
+
+/* Returns false if the test was skipped. */
+static bool do_test(struct vm86plus_struct *v86, unsigned long eip,
+ unsigned int rettype, unsigned int retarg,
+ const char *text)
+{
+ long ret;
+
+ printf("[RUN]\t%s from vm86 mode\n", text);
+ v86->regs.eip = eip;
+ ret = vm86(VM86_ENTER, v86);
+
+ if (ret == -1 && (errno == ENOSYS || errno == EPERM)) {
+ printf("[SKIP]\tvm86 %s\n",
+ errno == ENOSYS ? "not supported" : "not allowed");
+ return false;
+ }
+
+ if (VM86_TYPE(ret) == VM86_INTx) {
+ char trapname[32];
+ int trapno = VM86_ARG(ret);
+ if (trapno == 13)
+ strcpy(trapname, "GP");
+ else if (trapno == 5)
+ strcpy(trapname, "BR");
+ else if (trapno == 14)
+ strcpy(trapname, "PF");
+ else
+ sprintf(trapname, "%d", trapno);
+
+ printf("[INFO]\tExited vm86 mode due to #%s\n", trapname);
+ } else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
+ printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n");
+ } else if (VM86_TYPE(ret) == VM86_TRAP) {
+ printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n",
+ VM86_ARG(ret));
+ } else if (VM86_TYPE(ret) == VM86_SIGNAL) {
+ printf("[INFO]\tExited vm86 mode due to a signal\n");
+ } else if (VM86_TYPE(ret) == VM86_STI) {
+ printf("[INFO]\tExited vm86 mode due to STI\n");
+ } else {
+ printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n",
+ VM86_TYPE(ret), VM86_ARG(ret));
+ }
+
+ if (rettype == -1 ||
+ (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
+ printf("[OK]\tReturned correctly\n");
+ } else {
+ printf("[FAIL]\tIncorrect return reason (started at eip = 0x%lx, ended at eip = 0x%lx)\n", eip, v86->regs.eip);
+ nerrs++;
+ }
+
+ return true;
+}
+
+void do_umip_tests(struct vm86plus_struct *vm86, unsigned char *test_mem)
+{
+ struct table_desc {
+ unsigned short limit;
+ unsigned long base;
+ } __attribute__((packed));
+
+ /* Initialize variables with arbitrary values */
+ struct table_desc gdt1 = { .base = 0x3c3c3c3c, .limit = 0x9999 };
+ struct table_desc gdt2 = { .base = 0x1a1a1a1a, .limit = 0xaeae };
+ struct table_desc idt1 = { .base = 0x7b7b7b7b, .limit = 0xf1f1 };
+ struct table_desc idt2 = { .base = 0x89898989, .limit = 0x1313 };
+ unsigned short msw1 = 0x1414, msw2 = 0x2525, msw3 = 3737;
+
+ /* UMIP -- exit with INT3 unless kernel emulation did not trap #GP */
+ do_test(vm86, vmcode_umip - vmcode, VM86_TRAP, 3, "UMIP tests");
+
+ /* Results from displacement-only addressing */
+ msw1 = *(unsigned short *)(test_mem + 2052);
+ memcpy(&idt1, test_mem + 2054, sizeof(idt1));
+ memcpy(&gdt1, test_mem + 2060, sizeof(gdt1));
+
+ /* Results from register-indirect addressing */
+ msw2 = *(unsigned short *)(test_mem + 2066);
+ memcpy(&idt2, test_mem + 2068, sizeof(idt2));
+ memcpy(&gdt2, test_mem + 2074, sizeof(gdt2));
+
+ /* Results when using register operands */
+ msw3 = *(unsigned short *)(test_mem + 2080);
+
+ printf("[INFO]\tResult from SMSW:[0x%04x]\n", msw1);
+ printf("[INFO]\tResult from SIDT: limit[0x%04x]base[0x%08lx]\n",
+ idt1.limit, idt1.base);
+ printf("[INFO]\tResult from SGDT: limit[0x%04x]base[0x%08lx]\n",
+ gdt1.limit, gdt1.base);
+
+ if (msw1 != msw2 || msw1 != msw3)
+ printf("[FAIL]\tAll the results of SMSW should be the same.\n");
+ else
+ printf("[PASS]\tAll the results from SMSW are identical.\n");
+
+ if (memcmp(&gdt1, &gdt2, sizeof(gdt1)))
+ printf("[FAIL]\tAll the results of SGDT should be the same.\n");
+ else
+ printf("[PASS]\tAll the results from SGDT are identical.\n");
+
+ if (memcmp(&idt1, &idt2, sizeof(idt1)))
+ printf("[FAIL]\tAll the results of SIDT should be the same.\n");
+ else
+ printf("[PASS]\tAll the results from SIDT are identical.\n");
+
+ sethandler(SIGILL, sighandler, 0);
+ do_test(vm86, vmcode_umip_str - vmcode, VM86_SIGNAL, 0,
+ "STR instruction");
+ clearhandler(SIGILL);
+
+ sethandler(SIGILL, sighandler, 0);
+ do_test(vm86, vmcode_umip_sldt - vmcode, VM86_SIGNAL, 0,
+ "SLDT instruction");
+ clearhandler(SIGILL);
+}
+
+int main(void)
+{
+ struct vm86plus_struct v86;
+ unsigned char *addr = mmap((void *)load_addr, 4096,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1,0);
+ if (addr != (unsigned char *)load_addr)
+ err(1, "mmap");
+
+ memcpy(addr, vmcode, end_vmcode - vmcode);
+ addr[2048] = 2;
+ addr[2050] = 3;
+
+ memset(&v86, 0, sizeof(v86));
+
+ v86.regs.cs = load_addr / 16;
+ v86.regs.ss = load_addr / 16;
+ v86.regs.ds = load_addr / 16;
+ v86.regs.es = load_addr / 16;
+
+ /* Use the end of the page as our stack. */
+ v86.regs.esp = 4096;
+
+ assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */
+
+ /* #BR -- should deliver SIG??? */
+ do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR");
+
+ /*
+ * SYSENTER -- should cause #GP or #UD depending on CPU.
+ * Expected return type -1 means that we shouldn't validate
+ * the vm86 return value. This will avoid problems on non-SEP
+ * CPUs.
+ */
+ sethandler(SIGILL, sighandler, 0);
+ do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER");
+ clearhandler(SIGILL);
+
+ /*
+ * SYSCALL would be a disaster in VM86 mode. Fortunately,
+ * there is no kernel that both enables SYSCALL and sets
+ * EFER.SCE, so it's #UD on all systems. But vm86 is
+ * buggy (or has a "feature"), so the SIGILL will actually
+ * be delivered.
+ */
+ sethandler(SIGILL, sighandler, 0);
+ do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL");
+ clearhandler(SIGILL);
+
+ /* STI with VIP set */
+ v86.regs.eflags |= X86_EFLAGS_VIP;
+ v86.regs.eflags &= ~X86_EFLAGS_IF;
+ do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
+
+ /* POPF with VIP set but IF clear: should not trap */
+ v86.regs.eflags = X86_EFLAGS_VIP;
+ v86.regs.eax = 0;
+ do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP set and IF clear");
+
+ /* POPF with VIP set and IF set: should trap */
+ v86.regs.eflags = X86_EFLAGS_VIP;
+ v86.regs.eax = X86_EFLAGS_IF;
+ do_test(&v86, vmcode_popf_hlt - vmcode, VM86_STI, 0, "POPF with VIP and IF set");
+
+ /* POPF with VIP clear and IF set: should not trap */
+ v86.regs.eflags = 0;
+ v86.regs.eax = X86_EFLAGS_IF;
+ do_test(&v86, vmcode_popf_hlt - vmcode, VM86_UNKNOWN, 0, "POPF with VIP clear and IF set");
+
+ v86.regs.eflags = 0;
+
+ /* INT3 -- should cause #BP */
+ do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
+
+ /* INT80 -- should exit with "INTx 0x80" */
+ v86.regs.eax = (unsigned int)-1;
+ do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80");
+
+ /* UMIP -- should exit with INTx 0x80 unless UMIP was not disabled */
+ do_umip_tests(&v86, addr);
+
+ /* Execute a null pointer */
+ v86.regs.cs = 0;
+ v86.regs.ss = 0;
+ sethandler(SIGSEGV, sighandler, 0);
+ got_signal = 0;
+ if (do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer") &&
+ !got_signal) {
+ printf("[FAIL]\tDid not receive SIGSEGV\n");
+ nerrs++;
+ }
+ clearhandler(SIGSEGV);
+
+ /* Make sure nothing explodes if we fork. */
+ if (fork() == 0)
+ return 0;
+
+ return (nerrs == 0 ? 0 : 1);
+}
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
new file mode 100644
index 000000000..f249e042b
--- /dev/null
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -0,0 +1,427 @@
+/*
+ * fsgsbase.c, an fsgsbase test
+ * Copyright (c) 2014-2016 Andy Lutomirski
+ * GPL v2
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <err.h>
+#include <sys/user.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <signal.h>
+#include <limits.h>
+#include <sys/ucontext.h>
+#include <sched.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <asm/ldt.h>
+#include <sys/mman.h>
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+static volatile sig_atomic_t want_segv;
+static volatile unsigned long segv_addr;
+
+static int nerrs;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (!want_segv) {
+ clearhandler(SIGSEGV);
+ return; /* Crash cleanly. */
+ }
+
+ want_segv = false;
+ segv_addr = (unsigned long)si->si_addr;
+
+ ctx->uc_mcontext.gregs[REG_RIP] += 4; /* Skip the faulting mov */
+
+}
+
+enum which_base { FS, GS };
+
+static unsigned long read_base(enum which_base which)
+{
+ unsigned long offset;
+ /*
+ * Unless we have FSGSBASE, there's no direct way to do this from
+ * user mode. We can get at it indirectly using signals, though.
+ */
+
+ want_segv = true;
+
+ offset = 0;
+ if (which == FS) {
+ /* Use a constant-length instruction here. */
+ asm volatile ("mov %%fs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+ } else {
+ asm volatile ("mov %%gs:(%%rcx), %%rax" : : "c" (offset) : "rax");
+ }
+ if (!want_segv)
+ return segv_addr + offset;
+
+ /*
+ * If that didn't segfault, try the other end of the address space.
+ * Unless we get really unlucky and run into the vsyscall page, this
+ * is guaranteed to segfault.
+ */
+
+ offset = (ULONG_MAX >> 1) + 1;
+ if (which == FS) {
+ asm volatile ("mov %%fs:(%%rcx), %%rax"
+ : : "c" (offset) : "rax");
+ } else {
+ asm volatile ("mov %%gs:(%%rcx), %%rax"
+ : : "c" (offset) : "rax");
+ }
+ if (!want_segv)
+ return segv_addr + offset;
+
+ abort();
+}
+
+static void check_gs_value(unsigned long value)
+{
+ unsigned long base;
+ unsigned short sel;
+
+ printf("[RUN]\tARCH_SET_GS to 0x%lx\n", value);
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, value) != 0)
+ err(1, "ARCH_SET_GS");
+
+ asm volatile ("mov %%gs, %0" : "=rm" (sel));
+ base = read_base(GS);
+ if (base == value) {
+ printf("[OK]\tGSBASE was set as expected (selector 0x%hx)\n",
+ sel);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE was not as expected: got 0x%lx (selector 0x%hx)\n",
+ base, sel);
+ }
+
+ if (syscall(SYS_arch_prctl, ARCH_GET_GS, &base) != 0)
+ err(1, "ARCH_GET_GS");
+ if (base == value) {
+ printf("[OK]\tARCH_GET_GS worked as expected (selector 0x%hx)\n",
+ sel);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tARCH_GET_GS was not as expected: got 0x%lx (selector 0x%hx)\n",
+ base, sel);
+ }
+}
+
+static void mov_0_gs(unsigned long initial_base, bool schedule)
+{
+ unsigned long base, arch_base;
+
+ printf("[RUN]\tARCH_SET_GS to 0x%lx then mov 0 to %%gs%s\n", initial_base, schedule ? " and schedule " : "");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, initial_base) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (schedule)
+ usleep(10);
+
+ asm volatile ("mov %0, %%gs" : : "rm" (0));
+ base = read_base(GS);
+ if (syscall(SYS_arch_prctl, ARCH_GET_GS, &arch_base) != 0)
+ err(1, "ARCH_GET_GS");
+ if (base == arch_base) {
+ printf("[OK]\tGSBASE is 0x%lx\n", base);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to 0x%lx but kernel reports 0x%lx\n", base, arch_base);
+ }
+}
+
+static volatile unsigned long remote_base;
+static volatile bool remote_hard_zero;
+static volatile unsigned int ftx;
+
+/*
+ * ARCH_SET_FS/GS(0) may or may not program a selector of zero. HARD_ZERO
+ * means to force the selector to zero to improve test coverage.
+ */
+#define HARD_ZERO 0xa1fa5f343cb85fa4
+
+static void do_remote_base()
+{
+ unsigned long to_set = remote_base;
+ bool hard_zero = false;
+ if (to_set == HARD_ZERO) {
+ to_set = 0;
+ hard_zero = true;
+ }
+
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, to_set) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (hard_zero)
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+ unsigned short sel;
+ asm volatile ("mov %%gs, %0" : "=rm" (sel));
+ printf("\tother thread: ARCH_SET_GS(0x%lx)%s -- sel is 0x%hx\n",
+ to_set, hard_zero ? " and clear gs" : "", sel);
+}
+
+void do_unexpected_base(void)
+{
+ /*
+ * The goal here is to try to arrange for GS == 0, GSBASE !=
+ * 0, and for the the kernel the think that GSBASE == 0.
+ *
+ * To make the test as reliable as possible, this uses
+ * explicit descriptorss. (This is not the only way. This
+ * could use ARCH_SET_GS with a low, nonzero base, but the
+ * relevant side effect of ARCH_SET_GS could change.)
+ */
+
+ /* Step 1: tell the kernel that we have GSBASE == 0. */
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+ err(1, "ARCH_SET_GS");
+
+ /* Step 2: change GSBASE without telling the kernel. */
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0xBAADF00D,
+ .limit = 0xfffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 1,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
+ printf("\tother thread: using LDT slot 0\n");
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0x7));
+ } else {
+ /* No modify_ldt for us (configured out, perhaps) */
+
+ struct user_desc *low_desc = mmap(
+ NULL, sizeof(desc),
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+ memcpy(low_desc, &desc, sizeof(desc));
+
+ low_desc->entry_number = -1;
+
+ /* 32-bit set_thread_area */
+ long ret;
+ asm volatile ("int $0x80"
+ : "=a" (ret) : "a" (243), "b" (low_desc)
+ : "r8", "r9", "r10", "r11");
+ memcpy(&desc, low_desc, sizeof(desc));
+ munmap(low_desc, sizeof(desc));
+
+ if (ret != 0) {
+ printf("[NOTE]\tcould not create a segment -- test won't do anything\n");
+ return;
+ }
+ printf("\tother thread: using GDT slot %d\n", desc.entry_number);
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)((desc.entry_number << 3) | 0x3)));
+ }
+
+ /*
+ * Step 3: set the selector back to zero. On AMD chips, this will
+ * preserve GSBASE.
+ */
+
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+}
+
+static void *threadproc(void *ctx)
+{
+ while (1) {
+ while (ftx == 0)
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
+ if (ftx == 3)
+ return NULL;
+
+ if (ftx == 1)
+ do_remote_base();
+ else if (ftx == 2)
+ do_unexpected_base();
+ else
+ errx(1, "helper thread got bad command");
+
+ ftx = 0;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+ }
+}
+
+static void set_gs_and_switch_to(unsigned long local,
+ unsigned short force_sel,
+ unsigned long remote)
+{
+ unsigned long base;
+ unsigned short sel_pre_sched, sel_post_sched;
+
+ bool hard_zero = false;
+ if (local == HARD_ZERO) {
+ hard_zero = true;
+ local = 0;
+ }
+
+ printf("[RUN]\tARCH_SET_GS(0x%lx)%s, then schedule to 0x%lx\n",
+ local, hard_zero ? " and clear gs" : "", remote);
+ if (force_sel)
+ printf("\tBefore schedule, set selector to 0x%hx\n", force_sel);
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, local) != 0)
+ err(1, "ARCH_SET_GS");
+ if (hard_zero)
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+ if (read_base(GS) != local) {
+ nerrs++;
+ printf("[FAIL]\tGSBASE wasn't set as expected\n");
+ }
+
+ if (force_sel) {
+ asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
+ sel_pre_sched = force_sel;
+ local = read_base(GS);
+
+ /*
+ * Signal delivery seems to mess up weird selectors. Put it
+ * back.
+ */
+ asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
+ } else {
+ asm volatile ("mov %%gs, %0" : "=rm" (sel_pre_sched));
+ }
+
+ remote_base = remote;
+ ftx = 1;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+ while (ftx != 0)
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+ asm volatile ("mov %%gs, %0" : "=rm" (sel_post_sched));
+ base = read_base(GS);
+ if (base == local && sel_pre_sched == sel_post_sched) {
+ printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n",
+ sel_pre_sched, local);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n",
+ sel_pre_sched, local, sel_post_sched, base);
+ }
+}
+
+static void test_unexpected_base(void)
+{
+ unsigned long base;
+
+ printf("[RUN]\tARCH_SET_GS(0), clear gs, then manipulate GSBASE in a different thread\n");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 0) != 0)
+ err(1, "ARCH_SET_GS");
+ asm volatile ("mov %0, %%gs" : : "rm" ((unsigned short)0));
+
+ ftx = 2;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+ while (ftx != 0)
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 1, NULL, NULL, 0);
+
+ base = read_base(GS);
+ if (base == 0) {
+ printf("[OK]\tGSBASE remained 0\n");
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to 0x%lx\n", base);
+ }
+}
+
+int main()
+{
+ pthread_t thread;
+
+ sethandler(SIGSEGV, sigsegv, 0);
+
+ check_gs_value(0);
+ check_gs_value(1);
+ check_gs_value(0x200000000);
+ check_gs_value(0);
+ check_gs_value(0x200000000);
+ check_gs_value(1);
+
+ for (int sched = 0; sched < 2; sched++) {
+ mov_0_gs(0, !!sched);
+ mov_0_gs(1, !!sched);
+ mov_0_gs(0x200000000, !!sched);
+ }
+
+ /* Set up for multithreading. */
+
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ err(1, "sched_setaffinity to CPU 0"); /* should never fail */
+
+ if (pthread_create(&thread, 0, threadproc, 0) != 0)
+ err(1, "pthread_create");
+
+ static unsigned long bases_with_hard_zero[] = {
+ 0, HARD_ZERO, 1, 0x200000000,
+ };
+
+ for (int local = 0; local < 4; local++) {
+ for (int remote = 0; remote < 4; remote++) {
+ for (unsigned short s = 0; s < 5; s++) {
+ unsigned short sel = s;
+ if (s == 4)
+ asm ("mov %%ss, %0" : "=rm" (sel));
+ set_gs_and_switch_to(
+ bases_with_hard_zero[local],
+ sel,
+ bases_with_hard_zero[remote]);
+ }
+ }
+ }
+
+ test_unexpected_base();
+
+ ftx = 3; /* Kill the thread. */
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+ if (pthread_join(thread, NULL) != 0)
+ err(1, "pthread_join");
+
+ return nerrs == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/x86/ioperm.c b/tools/testing/selftests/x86/ioperm.c
new file mode 100644
index 000000000..01de41c1b
--- /dev/null
+++ b/tools/testing/selftests/x86/ioperm.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ioperm.c - Test case for ioperm(2)
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <sys/io.h>
+
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+static bool try_outb(unsigned short port)
+{
+ sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
+ if (sigsetjmp(jmpbuf, 1) != 0) {
+ return false;
+ } else {
+ asm volatile ("outb %%al, %w[port]"
+ : : [port] "Nd" (port), "a" (0));
+ return true;
+ }
+ clearhandler(SIGSEGV);
+}
+
+static void expect_ok(unsigned short port)
+{
+ if (!try_outb(port)) {
+ printf("[FAIL]\toutb to 0x%02hx failed\n", port);
+ exit(1);
+ }
+
+ printf("[OK]\toutb to 0x%02hx worked\n", port);
+}
+
+static void expect_gp(unsigned short port)
+{
+ if (try_outb(port)) {
+ printf("[FAIL]\toutb to 0x%02hx worked\n", port);
+ exit(1);
+ }
+
+ printf("[OK]\toutb to 0x%02hx failed\n", port);
+}
+
+int main(void)
+{
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ err(1, "sched_setaffinity to CPU 0");
+
+ expect_gp(0x80);
+ expect_gp(0xed);
+
+ /*
+ * Probe for ioperm support. Note that clearing ioperm bits
+ * works even as nonroot.
+ */
+ printf("[RUN]\tenable 0x80\n");
+ if (ioperm(0x80, 1, 1) != 0) {
+ printf("[OK]\tioperm(0x80, 1, 1) failed (%d) -- try running as root\n",
+ errno);
+ return 0;
+ }
+ expect_ok(0x80);
+ expect_gp(0xed);
+
+ printf("[RUN]\tdisable 0x80\n");
+ if (ioperm(0x80, 1, 0) != 0) {
+ printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+ return 1;
+ }
+ expect_gp(0x80);
+ expect_gp(0xed);
+
+ /* Make sure that fork() preserves ioperm. */
+ if (ioperm(0x80, 1, 1) != 0) {
+ printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+ return 1;
+ }
+
+ pid_t child = fork();
+ if (child == -1)
+ err(1, "fork");
+
+ if (child == 0) {
+ printf("[RUN]\tchild: check that we inherited permissions\n");
+ expect_ok(0x80);
+ expect_gp(0xed);
+ return 0;
+ } else {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+ }
+
+ /* Test the capability checks. */
+
+ printf("\tDrop privileges\n");
+ if (setresuid(1, 1, 1) != 0) {
+ printf("[WARN]\tDropping privileges failed\n");
+ return 0;
+ }
+
+ printf("[RUN]\tdisable 0x80\n");
+ if (ioperm(0x80, 1, 0) != 0) {
+ printf("[FAIL]\tioperm(0x80, 1, 0) failed (%d)", errno);
+ return 1;
+ }
+ printf("[OK]\tit worked\n");
+
+ printf("[RUN]\tenable 0x80 again\n");
+ if (ioperm(0x80, 1, 1) == 0) {
+ printf("[FAIL]\tit succeeded but should have failed.\n");
+ return 1;
+ }
+ printf("[OK]\tit failed\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
new file mode 100644
index 000000000..6aa27f346
--- /dev/null
+++ b/tools/testing/selftests/x86/iopl.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * iopl.c - Test case for a Linux on Xen 64-bit bug
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <sys/io.h>
+
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+int main(void)
+{
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ err(1, "sched_setaffinity to CPU 0");
+
+ /* Probe for iopl support. Note that iopl(0) works even as nonroot. */
+ if (iopl(3) != 0) {
+ printf("[OK]\tiopl(3) failed (%d) -- try running as root\n",
+ errno);
+ return 0;
+ }
+
+ /* Restore our original state prior to starting the test. */
+ if (iopl(0) != 0)
+ err(1, "iopl(0)");
+
+ pid_t child = fork();
+ if (child == -1)
+ err(1, "fork");
+
+ if (child == 0) {
+ printf("\tchild: set IOPL to 3\n");
+ if (iopl(3) != 0)
+ err(1, "iopl");
+
+ printf("[RUN]\tchild: write to 0x80\n");
+ asm volatile ("outb %%al, $0x80" : : "a" (0));
+
+ return 0;
+ } else {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+ }
+
+ printf("[RUN]\tparent: write to 0x80 (should fail)\n");
+
+ sethandler(SIGSEGV, sigsegv, 0);
+ if (sigsetjmp(jmpbuf, 1) != 0) {
+ printf("[OK]\twrite was denied\n");
+ } else {
+ asm volatile ("outb %%al, $0x80" : : "a" (0));
+ printf("[FAIL]\twrite was allowed\n");
+ nerrs++;
+ }
+
+ /* Test the capability checks. */
+ printf("\tiopl(3)\n");
+ if (iopl(3) != 0)
+ err(1, "iopl(3)");
+
+ printf("\tDrop privileges\n");
+ if (setresuid(1, 1, 1) != 0) {
+ printf("[WARN]\tDropping privileges failed\n");
+ goto done;
+ }
+
+ printf("[RUN]\tiopl(3) unprivileged but with IOPL==3\n");
+ if (iopl(3) != 0) {
+ printf("[FAIL]\tiopl(3) should work if iopl is already 3 even if unprivileged\n");
+ nerrs++;
+ }
+
+ printf("[RUN]\tiopl(0) unprivileged\n");
+ if (iopl(0) != 0) {
+ printf("[FAIL]\tiopl(0) should work if iopl is already 3 even if unprivileged\n");
+ nerrs++;
+ }
+
+ printf("[RUN]\tiopl(3) unprivileged\n");
+ if (iopl(3) == 0) {
+ printf("[FAIL]\tiopl(3) should fail if when unprivileged if iopl==0\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tFailed as expected\n");
+ }
+
+done:
+ return nerrs ? 1 : 0;
+}
+
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
new file mode 100644
index 000000000..1aef72df2
--- /dev/null
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -0,0 +1,927 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <asm/ldt.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <sched.h>
+#include <linux/futex.h>
+#include <sys/mman.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+
+#define AR_ACCESSED (1<<8)
+
+#define AR_TYPE_RODATA (0 * (1<<9))
+#define AR_TYPE_RWDATA (1 * (1<<9))
+#define AR_TYPE_RODATA_EXPDOWN (2 * (1<<9))
+#define AR_TYPE_RWDATA_EXPDOWN (3 * (1<<9))
+#define AR_TYPE_XOCODE (4 * (1<<9))
+#define AR_TYPE_XRCODE (5 * (1<<9))
+#define AR_TYPE_XOCODE_CONF (6 * (1<<9))
+#define AR_TYPE_XRCODE_CONF (7 * (1<<9))
+
+#define AR_DPL3 (3 * (1<<13))
+
+#define AR_S (1 << 12)
+#define AR_P (1 << 15)
+#define AR_AVL (1 << 20)
+#define AR_L (1 << 21)
+#define AR_DB (1 << 22)
+#define AR_G (1 << 23)
+
+#ifdef __x86_64__
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
+#else
+# define INT80_CLOBBERS
+#endif
+
+static int nerrs;
+
+/* Points to an array of 1024 ints, each holding its own index. */
+static const unsigned int *counter_page;
+static struct user_desc *low_user_desc;
+static struct user_desc *low_user_desc_clear; /* Use to delete GDT entry */
+static int gdt_entry_num;
+
+static void check_invalid_segment(uint16_t index, int ldt)
+{
+ uint32_t has_limit = 0, has_ar = 0, limit, ar;
+ uint32_t selector = (index << 3) | (ldt << 2) | 3;
+
+ asm ("lsl %[selector], %[limit]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_limit]\n\t"
+ "1:"
+ : [limit] "=r" (limit), [has_limit] "+rm" (has_limit)
+ : [selector] "r" (selector));
+ asm ("larl %[selector], %[ar]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_ar]\n\t"
+ "1:"
+ : [ar] "=r" (ar), [has_ar] "+rm" (has_ar)
+ : [selector] "r" (selector));
+
+ if (has_limit || has_ar) {
+ printf("[FAIL]\t%s entry %hu is valid but should be invalid\n",
+ (ldt ? "LDT" : "GDT"), index);
+ nerrs++;
+ } else {
+ printf("[OK]\t%s entry %hu is invalid\n",
+ (ldt ? "LDT" : "GDT"), index);
+ }
+}
+
+static void check_valid_segment(uint16_t index, int ldt,
+ uint32_t expected_ar, uint32_t expected_limit,
+ bool verbose)
+{
+ uint32_t has_limit = 0, has_ar = 0, limit, ar;
+ uint32_t selector = (index << 3) | (ldt << 2) | 3;
+
+ asm ("lsl %[selector], %[limit]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_limit]\n\t"
+ "1:"
+ : [limit] "=r" (limit), [has_limit] "+rm" (has_limit)
+ : [selector] "r" (selector));
+ asm ("larl %[selector], %[ar]\n\t"
+ "jnz 1f\n\t"
+ "movl $1, %[has_ar]\n\t"
+ "1:"
+ : [ar] "=r" (ar), [has_ar] "+rm" (has_ar)
+ : [selector] "r" (selector));
+
+ if (!has_limit || !has_ar) {
+ printf("[FAIL]\t%s entry %hu is invalid but should be valid\n",
+ (ldt ? "LDT" : "GDT"), index);
+ nerrs++;
+ return;
+ }
+
+ /* The SDM says "bits 19:16 are undefined". Thanks. */
+ ar &= ~0xF0000;
+
+ /*
+ * NB: Different Linux versions do different things with the
+ * accessed bit in set_thread_area().
+ */
+ if (ar != expected_ar && ar != (expected_ar | AR_ACCESSED)) {
+ printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
+ (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
+ nerrs++;
+ } else if (limit != expected_limit) {
+ printf("[FAIL]\t%s entry %hu has limit 0x%08X but expected 0x%08X\n",
+ (ldt ? "LDT" : "GDT"), index, limit, expected_limit);
+ nerrs++;
+ } else if (verbose) {
+ printf("[OK]\t%s entry %hu has AR 0x%08X and limit 0x%08X\n",
+ (ldt ? "LDT" : "GDT"), index, ar, limit);
+ }
+}
+
+static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
+ bool oldmode, bool ldt)
+{
+ struct user_desc desc = *d;
+ int ret;
+
+ if (!ldt) {
+#ifndef __i386__
+ /* No point testing set_thread_area in a 64-bit build */
+ return false;
+#endif
+ if (!gdt_entry_num)
+ return false;
+ desc.entry_number = gdt_entry_num;
+
+ ret = syscall(SYS_set_thread_area, &desc);
+ } else {
+ ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
+ &desc, sizeof(desc));
+
+ if (ret < -1)
+ errno = -ret;
+
+ if (ret != 0 && errno == ENOSYS) {
+ printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ return false;
+ }
+ }
+
+ if (ret == 0) {
+ uint32_t limit = desc.limit;
+ if (desc.limit_in_pages)
+ limit = (limit << 12) + 4095;
+ check_valid_segment(desc.entry_number, ldt, ar, limit, true);
+ return true;
+ } else {
+ if (desc.seg_32bit) {
+ printf("[FAIL]\tUnexpected %s failure %d\n",
+ ldt ? "modify_ldt" : "set_thread_area",
+ errno);
+ nerrs++;
+ return false;
+ } else {
+ printf("[OK]\t%s rejected 16 bit segment\n",
+ ldt ? "modify_ldt" : "set_thread_area");
+ return false;
+ }
+ }
+}
+
+static bool install_valid(const struct user_desc *desc, uint32_t ar)
+{
+ bool ret = install_valid_mode(desc, ar, false, true);
+
+ if (desc->contents <= 1 && desc->seg_32bit &&
+ !desc->seg_not_present) {
+ /* Should work in the GDT, too. */
+ install_valid_mode(desc, ar, false, false);
+ }
+
+ return ret;
+}
+
+static void install_invalid(const struct user_desc *desc, bool oldmode)
+{
+ int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
+ desc, sizeof(*desc));
+ if (ret < -1)
+ errno = -ret;
+ if (ret == 0) {
+ check_invalid_segment(desc->entry_number, 1);
+ } else if (errno == ENOSYS) {
+ printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ } else {
+ if (desc->seg_32bit) {
+ printf("[FAIL]\tUnexpected modify_ldt failure %d\n",
+ errno);
+ nerrs++;
+ } else {
+ printf("[OK]\tmodify_ldt rejected 16 bit segment\n");
+ }
+ }
+}
+
+static int safe_modify_ldt(int func, struct user_desc *ptr,
+ unsigned long bytecount)
+{
+ int ret = syscall(SYS_modify_ldt, 0x11, ptr, bytecount);
+ if (ret < -1)
+ errno = -ret;
+ return ret;
+}
+
+static void fail_install(struct user_desc *desc)
+{
+ if (safe_modify_ldt(0x11, desc, sizeof(*desc)) == 0) {
+ printf("[FAIL]\tmodify_ldt accepted a bad descriptor\n");
+ nerrs++;
+ } else if (errno == ENOSYS) {
+ printf("[OK]\tmodify_ldt returned -ENOSYS\n");
+ } else {
+ printf("[OK]\tmodify_ldt failure %d\n", errno);
+ }
+}
+
+static void do_simple_tests(void)
+{
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0,
+ .limit = 10,
+ .seg_32bit = 1,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB);
+
+ desc.limit_in_pages = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G);
+
+ check_invalid_segment(1, 1);
+
+ desc.entry_number = 2;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G);
+
+ check_invalid_segment(1, 1);
+
+ desc.base_addr = 0xf0000000;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G);
+
+ desc.useable = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_P | AR_DB | AR_G | AR_AVL);
+
+ desc.seg_not_present = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.seg_32bit = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_G | AR_AVL);
+
+ desc.seg_32bit = 1;
+ desc.contents = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.contents = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN |
+ AR_S | AR_DB | AR_G | AR_AVL);
+
+ desc.read_exec_only = 0;
+ desc.limit_in_pages = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.contents = 3;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE_CONF |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE_CONF |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.read_exec_only = 0;
+ desc.contents = 2;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE |
+ AR_S | AR_DB | AR_AVL);
+
+ desc.read_exec_only = 1;
+
+#ifdef __x86_64__
+ desc.lm = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL);
+ desc.lm = 0;
+#endif
+
+ bool entry1_okay = install_valid(&desc, AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL);
+
+ if (entry1_okay) {
+ printf("[RUN]\tTest fork\n");
+ pid_t child = fork();
+ if (child == 0) {
+ nerrs = 0;
+ check_valid_segment(desc.entry_number, 1,
+ AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL, desc.limit,
+ true);
+ check_invalid_segment(1, 1);
+ exit(nerrs ? 1 : 0);
+ } else {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+ }
+
+ printf("[RUN]\tTest size\n");
+ int i;
+ for (i = 0; i < 8192; i++) {
+ desc.entry_number = i;
+ desc.limit = i;
+ if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) {
+ printf("[FAIL]\tFailed to install entry %d\n", i);
+ nerrs++;
+ break;
+ }
+ }
+ for (int j = 0; j < i; j++) {
+ check_valid_segment(j, 1, AR_DPL3 | AR_TYPE_XOCODE |
+ AR_S | AR_DB | AR_AVL, j, false);
+ }
+ printf("[DONE]\tSize test\n");
+ } else {
+ printf("[SKIP]\tSkipping fork and size tests because we have no LDT\n");
+ }
+
+ /* Test entry_number too high. */
+ desc.entry_number = 8192;
+ fail_install(&desc);
+
+ /* Test deletion and actions mistakeable for deletion. */
+ memset(&desc, 0, sizeof(desc));
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P);
+
+ desc.seg_not_present = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S);
+
+ desc.seg_not_present = 0;
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P);
+
+ desc.read_exec_only = 0;
+ desc.seg_not_present = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S);
+
+ desc.read_exec_only = 1;
+ desc.limit = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S);
+
+ desc.limit = 0;
+ desc.base_addr = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S);
+
+ desc.base_addr = 0;
+ install_invalid(&desc, false);
+
+ desc.seg_not_present = 0;
+ desc.seg_32bit = 1;
+ desc.read_exec_only = 0;
+ desc.limit = 0xfffff;
+
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
+
+ desc.limit_in_pages = 1;
+
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G);
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G);
+ desc.contents = 1;
+ desc.read_exec_only = 0;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
+ desc.read_exec_only = 1;
+ install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
+
+ desc.limit = 0;
+ install_invalid(&desc, true);
+}
+
+/*
+ * 0: thread is idle
+ * 1: thread armed
+ * 2: thread should clear LDT entry 0
+ * 3: thread should exit
+ */
+static volatile unsigned int ftx;
+
+static void *threadproc(void *ctx)
+{
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ err(1, "sched_setaffinity to CPU 1"); /* should never fail */
+
+ while (1) {
+ syscall(SYS_futex, &ftx, FUTEX_WAIT, 0, NULL, NULL, 0);
+ while (ftx != 2) {
+ if (ftx >= 3)
+ return NULL;
+ }
+
+ /* clear LDT entry 0 */
+ const struct user_desc desc = {};
+ if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) != 0)
+ err(1, "modify_ldt");
+
+ /* If ftx == 2, set it to zero. If ftx == 100, quit. */
+ unsigned int x = -2;
+ asm volatile ("lock xaddl %[x], %[ftx]" :
+ [x] "+r" (x), [ftx] "+m" (ftx));
+ if (x != 2)
+ return NULL;
+ }
+}
+
+#ifdef __i386__
+
+#ifndef SA_RESTORE
+#define SA_RESTORER 0x04000000
+#endif
+
+/*
+ * The UAPI header calls this 'struct sigaction', which conflicts with
+ * glibc. Sigh.
+ */
+struct fake_ksigaction {
+ void *handler; /* the real type is nasty */
+ unsigned long sa_flags;
+ void (*sa_restorer)(void);
+ unsigned char sigset[8];
+};
+
+static void fix_sa_restorer(int sig)
+{
+ struct fake_ksigaction ksa;
+
+ if (syscall(SYS_rt_sigaction, sig, NULL, &ksa, 8) == 0) {
+ /*
+ * glibc has a nasty bug: it sometimes writes garbage to
+ * sa_restorer. This interacts quite badly with anything
+ * that fiddles with SS because it can trigger legacy
+ * stack switching. Patch it up. See:
+ *
+ * https://sourceware.org/bugzilla/show_bug.cgi?id=21269
+ */
+ if (!(ksa.sa_flags & SA_RESTORER) && ksa.sa_restorer) {
+ ksa.sa_restorer = NULL;
+ if (syscall(SYS_rt_sigaction, sig, &ksa, NULL,
+ sizeof(ksa.sigset)) != 0)
+ err(1, "rt_sigaction");
+ }
+ }
+}
+#else
+static void fix_sa_restorer(int sig)
+{
+ /* 64-bit glibc works fine. */
+}
+#endif
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+
+ fix_sa_restorer(sig);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+static void do_multicpu_tests(void)
+{
+ cpu_set_t cpuset;
+ pthread_t thread;
+ int failures = 0, iters = 5, i;
+ unsigned short orig_ss;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(1, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tCannot set affinity to CPU 1\n");
+ return;
+ }
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tCannot set affinity to CPU 0\n");
+ return;
+ }
+
+ sethandler(SIGSEGV, sigsegv, 0);
+#ifdef __i386__
+ /* True 32-bit kernels send SIGILL instead of SIGSEGV on IRET faults. */
+ sethandler(SIGILL, sigsegv, 0);
+#endif
+
+ printf("[RUN]\tCross-CPU LDT invalidation\n");
+
+ if (pthread_create(&thread, 0, threadproc, 0) != 0)
+ err(1, "pthread_create");
+
+ asm volatile ("mov %%ss, %0" : "=rm" (orig_ss));
+
+ for (i = 0; i < 5; i++) {
+ if (sigsetjmp(jmpbuf, 1) != 0)
+ continue;
+
+ /* Make sure the thread is ready after the last test. */
+ while (ftx != 0)
+ ;
+
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0,
+ .limit = 0xfffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data */
+ .read_exec_only = 0,
+ .limit_in_pages = 1,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+
+ if (safe_modify_ldt(0x11, &desc, sizeof(desc)) != 0) {
+ if (errno != ENOSYS)
+ err(1, "modify_ldt");
+ printf("[SKIP]\tmodify_ldt unavailable\n");
+ break;
+ }
+
+ /* Arm the thread. */
+ ftx = 1;
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+ asm volatile ("mov %0, %%ss" : : "r" (0x7));
+
+ /* Go! */
+ ftx = 2;
+
+ while (ftx != 0)
+ ;
+
+ /*
+ * On success, modify_ldt will segfault us synchronously,
+ * and we'll escape via siglongjmp.
+ */
+
+ failures++;
+ asm volatile ("mov %0, %%ss" : : "rm" (orig_ss));
+ };
+
+ ftx = 100; /* Kill the thread. */
+ syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
+
+ if (pthread_join(thread, NULL) != 0)
+ err(1, "pthread_join");
+
+ if (failures) {
+ printf("[FAIL]\t%d of %d iterations failed\n", failures, iters);
+ nerrs++;
+ } else {
+ printf("[OK]\tAll %d iterations succeeded\n", iters);
+ }
+}
+
+static int finish_exec_test(void)
+{
+ /*
+ * Older kernel versions did inherit the LDT on exec() which is
+ * wrong because exec() starts from a clean state.
+ */
+ check_invalid_segment(0, 1);
+
+ return nerrs ? 1 : 0;
+}
+
+static void do_exec_test(void)
+{
+ printf("[RUN]\tTest exec\n");
+
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = 0,
+ .limit = 42,
+ .seg_32bit = 1,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ install_valid(&desc, AR_DPL3 | AR_TYPE_XRCODE | AR_S | AR_P | AR_DB);
+
+ pid_t child = fork();
+ if (child == 0) {
+ execl("/proc/self/exe", "ldt_gdt_test_exec", NULL);
+ printf("[FAIL]\tCould not exec self\n");
+ exit(1); /* exec failed */
+ } else {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+ }
+}
+
+static void setup_counter_page(void)
+{
+ unsigned int *page = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+ if (page == MAP_FAILED)
+ err(1, "mmap");
+
+ for (int i = 0; i < 1024; i++)
+ page[i] = i;
+ counter_page = page;
+}
+
+static int invoke_set_thread_area(void)
+{
+ int ret;
+ asm volatile ("int $0x80"
+ : "=a" (ret), "+m" (low_user_desc) :
+ "a" (243), "b" (low_user_desc)
+ : INT80_CLOBBERS);
+ return ret;
+}
+
+static void setup_low_user_desc(void)
+{
+ low_user_desc = mmap(NULL, 2 * sizeof(struct user_desc),
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT, -1, 0);
+ if (low_user_desc == MAP_FAILED)
+ err(1, "mmap");
+
+ low_user_desc->entry_number = -1;
+ low_user_desc->base_addr = (unsigned long)&counter_page[1];
+ low_user_desc->limit = 0xfffff;
+ low_user_desc->seg_32bit = 1;
+ low_user_desc->contents = 0; /* Data, grow-up*/
+ low_user_desc->read_exec_only = 0;
+ low_user_desc->limit_in_pages = 1;
+ low_user_desc->seg_not_present = 0;
+ low_user_desc->useable = 0;
+
+ if (invoke_set_thread_area() == 0) {
+ gdt_entry_num = low_user_desc->entry_number;
+ printf("[NOTE]\tset_thread_area is available; will use GDT index %d\n", gdt_entry_num);
+ } else {
+ printf("[NOTE]\tset_thread_area is unavailable\n");
+ }
+
+ low_user_desc_clear = low_user_desc + 1;
+ low_user_desc_clear->entry_number = gdt_entry_num;
+ low_user_desc_clear->read_exec_only = 1;
+ low_user_desc_clear->seg_not_present = 1;
+}
+
+static void test_gdt_invalidation(void)
+{
+ if (!gdt_entry_num)
+ return; /* 64-bit only system -- we can't use set_thread_area */
+
+ unsigned short prev_sel;
+ unsigned short sel;
+ unsigned int eax;
+ const char *result;
+#ifdef __x86_64__
+ unsigned long saved_base;
+ unsigned long new_base;
+#endif
+
+ /* Test DS */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+ asm volatile ("movw %%ds, %[prev_sel]\n\t"
+ "movw %[sel], %%ds\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate ds */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%ds, %[sel]\n\t"
+ "movw %[prev_sel], %%ds"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : INT80_CLOBBERS);
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate DS with set_thread_area: new DS = 0x%hx\n",
+ result, sel);
+
+ /* Test ES */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+ asm volatile ("movw %%es, %[prev_sel]\n\t"
+ "movw %[sel], %%es\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate es */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%es, %[sel]\n\t"
+ "movw %[prev_sel], %%es"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : INT80_CLOBBERS);
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate ES with set_thread_area: new ES = 0x%hx\n",
+ result, sel);
+
+ /* Test FS */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_FS, &saved_base);
+#endif
+ asm volatile ("movw %%fs, %[prev_sel]\n\t"
+ "movw %[sel], %%fs\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate fs */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%fs, %[sel]\n\t"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : INT80_CLOBBERS);
+
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_FS, &new_base);
+#endif
+
+ /* Restore FS/BASE for glibc */
+ asm volatile ("movw %[prev_sel], %%fs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+ if (saved_base)
+ syscall(SYS_arch_prctl, ARCH_SET_FS, saved_base);
+#endif
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate FS with set_thread_area: new FS = 0x%hx\n",
+ result, sel);
+
+#ifdef __x86_64__
+ if (sel == 0 && new_base != 0) {
+ nerrs++;
+ printf("[FAIL]\tNew FSBASE was 0x%lx\n", new_base);
+ } else {
+ printf("[OK]\tNew FSBASE was zero\n");
+ }
+#endif
+
+ /* Test GS */
+ invoke_set_thread_area();
+ eax = 243;
+ sel = (gdt_entry_num << 3) | 3;
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_GS, &saved_base);
+#endif
+ asm volatile ("movw %%gs, %[prev_sel]\n\t"
+ "movw %[sel], %%gs\n\t"
+#ifdef __i386__
+ "pushl %%ebx\n\t"
+#endif
+ "movl %[arg1], %%ebx\n\t"
+ "int $0x80\n\t" /* Should invalidate gs */
+#ifdef __i386__
+ "popl %%ebx\n\t"
+#endif
+ "movw %%gs, %[sel]\n\t"
+ : [prev_sel] "=&r" (prev_sel), [sel] "+r" (sel),
+ "+a" (eax)
+ : "m" (low_user_desc_clear),
+ [arg1] "r" ((unsigned int)(unsigned long)low_user_desc_clear)
+ : INT80_CLOBBERS);
+
+#ifdef __x86_64__
+ syscall(SYS_arch_prctl, ARCH_GET_GS, &new_base);
+#endif
+
+ /* Restore GS/BASE for glibc */
+ asm volatile ("movw %[prev_sel], %%gs" : : [prev_sel] "rm" (prev_sel));
+#ifdef __x86_64__
+ if (saved_base)
+ syscall(SYS_arch_prctl, ARCH_SET_GS, saved_base);
+#endif
+
+ if (sel != 0) {
+ result = "FAIL";
+ nerrs++;
+ } else {
+ result = "OK";
+ }
+ printf("[%s]\tInvalidate GS with set_thread_area: new GS = 0x%hx\n",
+ result, sel);
+
+#ifdef __x86_64__
+ if (sel == 0 && new_base != 0) {
+ nerrs++;
+ printf("[FAIL]\tNew GSBASE was 0x%lx\n", new_base);
+ } else {
+ printf("[OK]\tNew GSBASE was zero\n");
+ }
+#endif
+}
+
+int main(int argc, char **argv)
+{
+ if (argc == 1 && !strcmp(argv[0], "ldt_gdt_test_exec"))
+ return finish_exec_test();
+
+ setup_counter_page();
+ setup_low_user_desc();
+
+ do_simple_tests();
+
+ do_multicpu_tests();
+
+ do_exec_test();
+
+ test_gdt_invalidation();
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
new file mode 100644
index 000000000..3c3a02265
--- /dev/null
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -0,0 +1,285 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS
+ *
+ * This does MOV SS from a watchpointed address followed by various
+ * types of kernel entries. A MOV SS that hits a watchpoint will queue
+ * up a #DB trap but will not actually deliver that trap. The trap
+ * will be delivered after the next instruction instead. The CPU's logic
+ * seems to be:
+ *
+ * - Any fault: drop the pending #DB trap.
+ * - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then
+ * deliver #DB.
+ * - ICEBP: enter the kernel but do not deliver the watchpoint trap
+ * - breakpoint: only one #DB is delivered (phew!)
+ *
+ * There are plenty of ways for a kernel to handle this incorrectly. This
+ * test tries to exercise all the cases.
+ *
+ * This should mostly cover CVE-2018-1087 and CVE-2018-8897.
+ */
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/user.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <setjmp.h>
+#include <sys/prctl.h>
+
+#define X86_EFLAGS_RF (1UL << 16)
+
+#if __x86_64__
+# define REG_IP REG_RIP
+#else
+# define REG_IP REG_EIP
+#endif
+
+unsigned short ss;
+extern unsigned char breakpoint_insn[];
+sigjmp_buf jmpbuf;
+static unsigned char altstack_data[SIGSTKSZ];
+
+static void enable_watchpoint(void)
+{
+ pid_t parent = getpid();
+ int status;
+
+ pid_t child = fork();
+ if (child < 0)
+ err(1, "fork");
+
+ if (child) {
+ if (waitpid(child, &status, 0) != child)
+ err(1, "waitpid for child");
+ } else {
+ unsigned long dr0, dr1, dr7;
+
+ dr0 = (unsigned long)&ss;
+ dr1 = (unsigned long)breakpoint_insn;
+ dr7 = ((1UL << 1) | /* G0 */
+ (3UL << 16) | /* RW0 = read or write */
+ (1UL << 18) | /* LEN0 = 2 bytes */
+ (1UL << 3)); /* G1, RW1 = insn */
+
+ if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0)
+ err(1, "PTRACE_ATTACH");
+
+ if (waitpid(parent, &status, 0) != parent)
+ err(1, "waitpid for child");
+
+ if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0)
+ err(1, "PTRACE_POKEUSER DR0");
+
+ if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0)
+ err(1, "PTRACE_POKEUSER DR1");
+
+ if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0)
+ err(1, "PTRACE_POKEUSER DR7");
+
+ printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7);
+
+ if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0)
+ err(1, "PTRACE_DETACH");
+
+ exit(0);
+ }
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static char const * const signames[] = {
+ [SIGSEGV] = "SIGSEGV",
+ [SIGBUS] = "SIBGUS",
+ [SIGTRAP] = "SIGTRAP",
+ [SIGILL] = "SIGILL",
+};
+
+static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n",
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP],
+ !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF));
+}
+
+static void handle_and_return(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot %s with RIP=%lx\n", signames[sig],
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+}
+
+static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = ctx_void;
+
+ printf("\tGot %s with RIP=%lx\n", signames[sig],
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+
+ siglongjmp(jmpbuf, 1);
+}
+
+int main()
+{
+ unsigned long nr;
+
+ asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss));
+ printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss);
+
+ if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0)
+ printf("\tPR_SET_PTRACER_ANY succeeded\n");
+
+ printf("\tSet up a watchpoint\n");
+ sethandler(SIGTRAP, sigtrap, 0);
+ enable_watchpoint();
+
+ printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n");
+ asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; INT3\n");
+ asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; INT 3\n");
+ asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; CS CS INT3\n");
+ asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; CSx14 INT3\n");
+ asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss));
+
+ printf("[RUN]\tMOV SS; INT 4\n");
+ sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss));
+
+#ifdef __i386__
+ printf("[RUN]\tMOV SS; INTO\n");
+ sethandler(SIGSEGV, handle_and_return, SA_RESETHAND);
+ nr = -1;
+ asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into"
+ : [tmp] "+r" (nr) : [ss] "m" (ss));
+#endif
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; ICEBP\n");
+
+ /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */
+ sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+
+ asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss));
+ }
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; CLI\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss));
+ }
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; #PF\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]"
+ : [tmp] "=r" (nr) : [ss] "m" (ss));
+ }
+
+ /*
+ * INT $1: if #DB has DPL=3 and there isn't special handling,
+ * then the kernel will die.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; INT 1\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss));
+ }
+
+#ifdef __x86_64__
+ /*
+ * In principle, we should test 32-bit SYSCALL as well, but
+ * the calling convention is so unpredictable that it's
+ * not obviously worth the effort.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; SYSCALL\n");
+ sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND);
+ nr = SYS_getpid;
+ /*
+ * Toggle the high bit of RSP to make it noncanonical to
+ * strengthen this test on non-SMAP systems.
+ */
+ asm volatile ("btc $63, %%rsp\n\t"
+ "mov %[ss], %%ss; syscall\n\t"
+ "btc $63, %%rsp"
+ : "+a" (nr) : [ss] "m" (ss)
+ : "rcx"
+#ifdef __x86_64__
+ , "r11"
+#endif
+ );
+ }
+#endif
+
+ printf("[RUN]\tMOV SS; breakpointed NOP\n");
+ asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss));
+
+ /*
+ * Invoking SYSENTER directly breaks all the rules. Just handle
+ * the SIGSEGV.
+ */
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; SYSENTER\n");
+ stack_t stack = {
+ .ss_sp = altstack_data,
+ .ss_size = SIGSTKSZ,
+ };
+ if (sigaltstack(&stack, NULL) != 0)
+ err(1, "sigaltstack");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
+ nr = SYS_getpid;
+ asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
+ : [ss] "m" (ss) : "flags", "rcx"
+#ifdef __x86_64__
+ , "r11"
+#endif
+ );
+
+ /* We're unreachable here. SYSENTER forgets RIP. */
+ }
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ printf("[RUN]\tMOV SS; INT $0x80\n");
+ sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND);
+ nr = 20; /* compat getpid */
+ asm volatile ("mov %[ss], %%ss; int $0x80"
+ : "+a" (nr) : [ss] "m" (ss)
+ : "flags"
+#ifdef __x86_64__
+ , "r8", "r9", "r10", "r11"
+#endif
+ );
+ }
+
+ printf("[OK]\tI aten't dead\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/mpx-debug.h b/tools/testing/selftests/x86/mpx-debug.h
new file mode 100644
index 000000000..7546eba7f
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-debug.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MPX_DEBUG_H
+#define _MPX_DEBUG_H
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+#define dprintf5(args...) dprintf_level(5, args)
+
+#endif /* _MPX_DEBUG_H */
diff --git a/tools/testing/selftests/x86/mpx-dig.c b/tools/testing/selftests/x86/mpx-dig.c
new file mode 100644
index 000000000..c13607ef5
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-dig.c
@@ -0,0 +1,499 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Written by Dave Hansen <dave.hansen@intel.com>
+ */
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <fcntl.h>
+#include "mpx-debug.h"
+#include "mpx-mm.h"
+#include "mpx-hw.h"
+
+unsigned long bounds_dir_global;
+
+#define mpx_dig_abort() __mpx_dig_abort(__FILE__, __func__, __LINE__)
+static void inline __mpx_dig_abort(const char *file, const char *func, int line)
+{
+ fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
+ printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
+ abort();
+}
+
+/*
+ * run like this (BDIR finds the probably bounds directory):
+ *
+ * BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
+ * | head -1 | awk -F- '{print $1}')";
+ * ./mpx-dig $pid 0x$BDIR
+ *
+ * NOTE:
+ * assumes that the only 2097152-kb VMA is the bounds dir
+ */
+
+long nr_incore(void *ptr, unsigned long size_bytes)
+{
+ int i;
+ long ret = 0;
+ long vec_len = size_bytes / PAGE_SIZE;
+ unsigned char *vec = malloc(vec_len);
+ int incore_ret;
+
+ if (!vec)
+ mpx_dig_abort();
+
+ incore_ret = mincore(ptr, size_bytes, vec);
+ if (incore_ret) {
+ printf("mincore ret: %d\n", incore_ret);
+ perror("mincore");
+ mpx_dig_abort();
+ }
+ for (i = 0; i < vec_len; i++)
+ ret += vec[i];
+ free(vec);
+ return ret;
+}
+
+int open_proc(int pid, char *file)
+{
+ static char buf[100];
+ int fd;
+
+ snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
+ fd = open(&buf[0], O_RDONLY);
+ if (fd < 0)
+ perror(buf);
+
+ return fd;
+}
+
+struct vaddr_range {
+ unsigned long start;
+ unsigned long end;
+};
+struct vaddr_range *ranges;
+int nr_ranges_allocated;
+int nr_ranges_populated;
+int last_range = -1;
+
+int __pid_load_vaddrs(int pid)
+{
+ int ret = 0;
+ int proc_maps_fd = open_proc(pid, "maps");
+ char linebuf[10000];
+ unsigned long start;
+ unsigned long end;
+ char rest[1000];
+ FILE *f = fdopen(proc_maps_fd, "r");
+
+ if (!f)
+ mpx_dig_abort();
+ nr_ranges_populated = 0;
+ while (!feof(f)) {
+ char *readret = fgets(linebuf, sizeof(linebuf), f);
+ int parsed;
+
+ if (readret == NULL) {
+ if (feof(f))
+ break;
+ mpx_dig_abort();
+ }
+
+ parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
+ if (parsed != 3)
+ mpx_dig_abort();
+
+ dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
+ if (nr_ranges_populated >= nr_ranges_allocated) {
+ ret = -E2BIG;
+ break;
+ }
+ ranges[nr_ranges_populated].start = start;
+ ranges[nr_ranges_populated].end = end;
+ nr_ranges_populated++;
+ }
+ last_range = -1;
+ fclose(f);
+ close(proc_maps_fd);
+ return ret;
+}
+
+int pid_load_vaddrs(int pid)
+{
+ int ret;
+
+ dprintf2("%s(%d)\n", __func__, pid);
+ if (!ranges) {
+ nr_ranges_allocated = 4;
+ ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
+ dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
+ nr_ranges_allocated, ranges);
+ assert(ranges != NULL);
+ }
+ do {
+ ret = __pid_load_vaddrs(pid);
+ if (!ret)
+ break;
+ if (ret == -E2BIG) {
+ dprintf2("%s(%d) need to realloc\n", __func__, pid);
+ nr_ranges_allocated *= 2;
+ ranges = realloc(ranges,
+ nr_ranges_allocated * sizeof(ranges[0]));
+ dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
+ pid, nr_ranges_allocated, ranges);
+ assert(ranges != NULL);
+ dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
+ }
+ } while (1);
+
+ dprintf2("%s(%d) done\n", __func__, pid);
+
+ return ret;
+}
+
+static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
+{
+ if (vaddr < r->start)
+ return 0;
+ if (vaddr >= r->end)
+ return 0;
+ return 1;
+}
+
+static inline int vaddr_mapped_by_range(unsigned long vaddr)
+{
+ int i;
+
+ if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
+ return 1;
+
+ for (i = 0; i < nr_ranges_populated; i++) {
+ struct vaddr_range *r = &ranges[i];
+
+ if (vaddr_in_range(vaddr, r))
+ continue;
+ last_range = i;
+ return 1;
+ }
+ return 0;
+}
+
+const int bt_entry_size_bytes = sizeof(unsigned long) * 4;
+
+void *read_bounds_table_into_buf(unsigned long table_vaddr)
+{
+#ifdef MPX_DIG_STANDALONE
+ static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
+ off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
+ if (seek_ret != table_vaddr)
+ mpx_dig_abort();
+
+ int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
+ if (read_ret != sizeof(bt_buf))
+ mpx_dig_abort();
+ return &bt_buf;
+#else
+ return (void *)table_vaddr;
+#endif
+}
+
+int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
+ unsigned long bde_vaddr)
+{
+ unsigned long offset_inside_bt;
+ int nr_entries = 0;
+ int do_abort = 0;
+ char *bt_buf;
+
+ dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
+ __func__, base_controlled_vaddr, bde_vaddr);
+
+ bt_buf = read_bounds_table_into_buf(table_vaddr);
+
+ dprintf4("%s() read done\n", __func__);
+
+ for (offset_inside_bt = 0;
+ offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
+ offset_inside_bt += bt_entry_size_bytes) {
+ unsigned long bt_entry_index;
+ unsigned long bt_entry_controls;
+ unsigned long this_bt_entry_for_vaddr;
+ unsigned long *bt_entry_buf;
+ int i;
+
+ dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
+ offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
+ bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
+ if (!bt_buf) {
+ printf("null bt_buf\n");
+ mpx_dig_abort();
+ }
+ if (!bt_entry_buf) {
+ printf("null bt_entry_buf\n");
+ mpx_dig_abort();
+ }
+ dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
+ bt_entry_buf);
+ if (!bt_entry_buf[0] &&
+ !bt_entry_buf[1] &&
+ !bt_entry_buf[2] &&
+ !bt_entry_buf[3])
+ continue;
+
+ nr_entries++;
+
+ bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
+ bt_entry_controls = sizeof(void *);
+ this_bt_entry_for_vaddr =
+ base_controlled_vaddr + bt_entry_index*bt_entry_controls;
+ /*
+ * We sign extend vaddr bits 48->63 which effectively
+ * creates a hole in the virtual address space.
+ * This calculation corrects for the hole.
+ */
+ if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
+ this_bt_entry_for_vaddr |= 0xffff800000000000;
+
+ if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
+ printf("bt_entry_buf: %p\n", bt_entry_buf);
+ printf("there is a bte for %lx but no mapping\n",
+ this_bt_entry_for_vaddr);
+ printf(" bde vaddr: %016lx\n", bde_vaddr);
+ printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
+ printf(" table_vaddr: %016lx\n", table_vaddr);
+ printf(" entry vaddr: %016lx @ offset %lx\n",
+ table_vaddr + offset_inside_bt, offset_inside_bt);
+ do_abort = 1;
+ mpx_dig_abort();
+ }
+ if (DEBUG_LEVEL < 4)
+ continue;
+
+ printf("table entry[%lx]: ", offset_inside_bt);
+ for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
+ printf("0x%016lx ", bt_entry_buf[i]);
+ printf("\n");
+ }
+ if (do_abort)
+ mpx_dig_abort();
+ dprintf4("%s() done\n", __func__);
+ return nr_entries;
+}
+
+int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
+ int *nr_populated_bdes)
+{
+ unsigned long i;
+ int total_entries = 0;
+
+ dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
+ len_bytes, bd_offset_bytes, buf + len_bytes);
+
+ for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
+ unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
+ unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
+ unsigned long bounds_dir_entry;
+ unsigned long bd_for_vaddr;
+ unsigned long bt_start;
+ unsigned long bt_tail;
+ int nr_entries;
+
+ dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
+ bounds_dir_entry_ptr);
+
+ bounds_dir_entry = *bounds_dir_entry_ptr;
+ if (!bounds_dir_entry) {
+ dprintf4("no bounds dir at index 0x%lx / 0x%lx "
+ "start at offset:%lx %lx\n", bd_index, bd_index,
+ bd_offset_bytes, i);
+ continue;
+ }
+ dprintf3("found bounds_dir_entry: 0x%lx @ "
+ "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
+ &buf[i]);
+ /* mask off the enable bit: */
+ bounds_dir_entry &= ~0x1;
+ (*nr_populated_bdes)++;
+ dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
+ dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);
+
+ bt_start = bounds_dir_entry;
+ bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
+ if (!vaddr_mapped_by_range(bt_start)) {
+ printf("bounds directory 0x%lx points to nowhere\n",
+ bounds_dir_entry);
+ mpx_dig_abort();
+ }
+ if (!vaddr_mapped_by_range(bt_tail)) {
+ printf("bounds directory end 0x%lx points to nowhere\n",
+ bt_tail);
+ mpx_dig_abort();
+ }
+ /*
+ * Each bounds directory entry controls 1MB of virtual address
+ * space. This variable is the virtual address in the process
+ * of the beginning of the area controlled by this bounds_dir.
+ */
+ bd_for_vaddr = bd_index * (1UL<<20);
+
+ nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
+ bounds_dir_global+bd_offset_bytes+i);
+ total_entries += nr_entries;
+ dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
+ "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
+ bd_index, buf+i,
+ bounds_dir_entry, nr_entries, total_entries,
+ bd_for_vaddr, bd_for_vaddr + (1UL<<20));
+ }
+ dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
+ bd_offset_bytes);
+ return total_entries;
+}
+
+int proc_pid_mem_fd = -1;
+
+void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
+ long buffer_size_bytes, void *buffer)
+{
+ unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
+ int read_ret;
+ off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);
+
+ if (seek_ret != seekto)
+ mpx_dig_abort();
+
+ read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
+ /* there shouldn't practically be short reads of /proc/$pid/mem */
+ if (read_ret != buffer_size_bytes)
+ mpx_dig_abort();
+
+ return buffer;
+}
+void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
+ long buffer_size_bytes, void *buffer)
+
+{
+ unsigned char vec[buffer_size_bytes / PAGE_SIZE];
+ char *dig_bounds_dir_ptr =
+ (void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
+ /*
+ * use mincore() to quickly find the areas of the bounds directory
+ * that have memory and thus will be worth scanning.
+ */
+ int incore_ret;
+
+ int incore = 0;
+ int i;
+
+ dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);
+
+ incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
+ if (incore_ret) {
+ printf("mincore ret: %d\n", incore_ret);
+ perror("mincore");
+ mpx_dig_abort();
+ }
+ for (i = 0; i < sizeof(vec); i++)
+ incore += vec[i];
+ dprintf4("%s() total incore: %d\n", __func__, incore);
+ if (!incore)
+ return NULL;
+ dprintf3("%s() total incore: %d\n", __func__, incore);
+ return dig_bounds_dir_ptr;
+}
+
+int inspect_pid(int pid)
+{
+ static int dig_nr;
+ long offset_inside_bounds_dir;
+ char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
+ char *dig_bounds_dir_ptr;
+ int total_entries = 0;
+ int nr_populated_bdes = 0;
+ int inspect_self;
+
+ if (getpid() == pid) {
+ dprintf4("inspecting self\n");
+ inspect_self = 1;
+ } else {
+ dprintf4("inspecting pid %d\n", pid);
+ mpx_dig_abort();
+ }
+
+ for (offset_inside_bounds_dir = 0;
+ offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
+ offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
+ static int bufs_skipped;
+ int this_entries;
+
+ if (inspect_self) {
+ dig_bounds_dir_ptr =
+ fill_bounds_dir_buf_self(offset_inside_bounds_dir,
+ sizeof(bounds_dir_buf),
+ &bounds_dir_buf[0]);
+ } else {
+ dig_bounds_dir_ptr =
+ fill_bounds_dir_buf_other(offset_inside_bounds_dir,
+ sizeof(bounds_dir_buf),
+ &bounds_dir_buf[0]);
+ }
+ if (!dig_bounds_dir_ptr) {
+ bufs_skipped++;
+ continue;
+ }
+ this_entries = search_bd_buf(dig_bounds_dir_ptr,
+ sizeof(bounds_dir_buf),
+ offset_inside_bounds_dir,
+ &nr_populated_bdes);
+ total_entries += this_entries;
+ }
+ printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
+ total_entries, nr_populated_bdes);
+ return total_entries + nr_populated_bdes;
+}
+
+#ifdef MPX_DIG_REMOTE
+int main(int argc, char **argv)
+{
+ int err;
+ char *c;
+ unsigned long bounds_dir_entry;
+ int pid;
+
+ printf("mpx-dig starting...\n");
+ err = sscanf(argv[1], "%d", &pid);
+ printf("parsing: '%s', err: %d\n", argv[1], err);
+ if (err != 1)
+ mpx_dig_abort();
+
+ err = sscanf(argv[2], "%lx", &bounds_dir_global);
+ printf("parsing: '%s': %d\n", argv[2], err);
+ if (err != 1)
+ mpx_dig_abort();
+
+ proc_pid_mem_fd = open_proc(pid, "mem");
+ if (proc_pid_mem_fd < 0)
+ mpx_dig_abort();
+
+ inspect_pid(pid);
+ return 0;
+}
+#endif
+
+long inspect_me(struct mpx_bounds_dir *bounds_dir)
+{
+ int pid = getpid();
+
+ pid_load_vaddrs(pid);
+ bounds_dir_global = (unsigned long)bounds_dir;
+ dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
+ return inspect_pid(pid);
+}
diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h
new file mode 100644
index 000000000..d1b61ab87
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-hw.h
@@ -0,0 +1,124 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MPX_HW_H
+#define _MPX_HW_H
+
+#include <assert.h>
+
+/* Describe the MPX Hardware Layout in here */
+
+#define NR_MPX_BOUNDS_REGISTERS 4
+
+#ifdef __i386__
+
+#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 16 /* 4 * 32-bits */
+#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 14) /* 16k */
+#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 4
+#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 22) /* 4MB */
+
+#define MPX_BOUNDS_TABLE_BOTTOM_BIT 2
+#define MPX_BOUNDS_TABLE_TOP_BIT 11
+#define MPX_BOUNDS_DIR_BOTTOM_BIT 12
+#define MPX_BOUNDS_DIR_TOP_BIT 31
+
+#else
+
+/*
+ * Linear Address of "pointer" (LAp)
+ * 0 -> 2: ignored
+ * 3 -> 19: index in to bounds table
+ * 20 -> 47: index in to bounds directory
+ * 48 -> 63: ignored
+ */
+
+#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 32
+#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 22) /* 4MB */
+#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 8
+#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 31) /* 2GB */
+
+#define MPX_BOUNDS_TABLE_BOTTOM_BIT 3
+#define MPX_BOUNDS_TABLE_TOP_BIT 19
+#define MPX_BOUNDS_DIR_BOTTOM_BIT 20
+#define MPX_BOUNDS_DIR_TOP_BIT 47
+
+#endif
+
+#define MPX_BOUNDS_DIR_NR_ENTRIES \
+ (MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES)
+#define MPX_BOUNDS_TABLE_NR_ENTRIES \
+ (MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES)
+
+#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT 0x1
+
+struct mpx_bd_entry {
+ union {
+ char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
+ void *contents[0];
+ };
+} __attribute__((packed));
+
+struct mpx_bt_entry {
+ union {
+ char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
+ unsigned long contents[0];
+ };
+} __attribute__((packed));
+
+struct mpx_bounds_dir {
+ struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES];
+} __attribute__((packed));
+
+struct mpx_bounds_table {
+ struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES];
+} __attribute__((packed));
+
+static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit)
+{
+ int total_nr_bits = topbit - bottombit;
+ unsigned long mask = (1UL << total_nr_bits)-1;
+ return (val >> bottombit) & mask;
+}
+
+static inline unsigned long __vaddr_bounds_table_index(void *vaddr)
+{
+ return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT,
+ MPX_BOUNDS_TABLE_TOP_BIT);
+}
+
+static inline unsigned long __vaddr_bounds_directory_index(void *vaddr)
+{
+ return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT,
+ MPX_BOUNDS_DIR_TOP_BIT);
+}
+
+static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr,
+ struct mpx_bounds_dir *bounds_dir)
+{
+ unsigned long index = __vaddr_bounds_directory_index(vaddr);
+ return &bounds_dir->entries[index];
+}
+
+static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry)
+{
+ unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
+ return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
+}
+
+static inline struct mpx_bounds_table *
+__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry)
+{
+ unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
+ assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
+ __bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT;
+ return (struct mpx_bounds_table *)__bd_entry;
+}
+
+static inline struct mpx_bt_entry *
+mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir)
+{
+ struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir);
+ struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde);
+ unsigned long index = __vaddr_bounds_table_index(vaddr);
+ return &bt->entries[index];
+}
+
+#endif /* _MPX_HW_H */
diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c
new file mode 100644
index 000000000..50f7e9272
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-mini-test.c
@@ -0,0 +1,1616 @@
+/*
+ * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions)
+ *
+ * Written by:
+ * "Ren, Qiaowei" <qiaowei.ren@intel.com>
+ * "Wei, Gang" <gang.wei@intel.com>
+ * "Hansen, Dave" <dave.hansen@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2.
+ */
+
+/*
+ * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure
+ * it works on 32-bit.
+ */
+
+int inspect_every_this_many_mallocs = 100;
+int zap_all_every_this_many_mallocs = 1000;
+
+#define _GNU_SOURCE
+#define _LARGEFILE64_SOURCE
+
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "mpx-hw.h"
+#include "mpx-debug.h"
+#include "mpx-mm.h"
+
+#ifndef __always_inline
+#define __always_inline inline __attribute__((always_inline)
+#endif
+
+#ifndef TEST_DURATION_SECS
+#define TEST_DURATION_SECS 3
+#endif
+
+void write_int_to(char *prefix, char *file, int int_to_write)
+{
+ char buf[100];
+ int fd = open(file, O_RDWR);
+ int len;
+ int ret;
+
+ assert(fd >= 0);
+ len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write);
+ assert(len >= 0);
+ assert(len < sizeof(buf));
+ ret = write(fd, buf, len);
+ assert(ret == len);
+ ret = close(fd);
+ assert(!ret);
+}
+
+void write_pid_to(char *prefix, char *file)
+{
+ write_int_to(prefix, file, getpid());
+}
+
+void trace_me(void)
+{
+/* tracing events dir */
+#define TED "/sys/kernel/debug/tracing/events/"
+/*
+ write_pid_to("common_pid=", TED "signal/filter");
+ write_pid_to("common_pid=", TED "exceptions/filter");
+ write_int_to("", TED "signal/enable", 1);
+ write_int_to("", TED "exceptions/enable", 1);
+*/
+ write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid");
+ write_int_to("", "/sys/kernel/debug/tracing/trace", 0);
+}
+
+#define test_failed() __test_failed(__FILE__, __LINE__)
+static void __test_failed(char *f, int l)
+{
+ fprintf(stderr, "abort @ %s::%d\n", f, l);
+ abort();
+}
+
+/* Error Printf */
+#define eprintf(args...) fprintf(stderr, args)
+
+#ifdef __i386__
+
+/* i386 directory size is 4MB */
+#define REG_IP_IDX REG_EIP
+#define REX_PREFIX
+
+#define XSAVE_OFFSET_IN_FPMEM sizeof(struct _libc_fpstate)
+
+/*
+ * __cpuid() is from the Linux Kernel:
+ */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile(
+ "push %%ebx;"
+ "cpuid;"
+ "mov %%ebx, %1;"
+ "pop %%ebx"
+ : "=a" (*eax),
+ "=g" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+#else /* __i386__ */
+
+#define REG_IP_IDX REG_RIP
+#define REX_PREFIX "0x48, "
+
+#define XSAVE_OFFSET_IN_FPMEM 0
+
+/*
+ * __cpuid() is from the Linux Kernel:
+ */
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile(
+ "cpuid;"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+#endif /* !__i386__ */
+
+struct xsave_hdr_struct {
+ uint64_t xstate_bv;
+ uint64_t reserved1[2];
+ uint64_t reserved2[5];
+} __attribute__((packed));
+
+struct bndregs_struct {
+ uint64_t bndregs[8];
+} __attribute__((packed));
+
+struct bndcsr_struct {
+ uint64_t cfg_reg_u;
+ uint64_t status_reg;
+} __attribute__((packed));
+
+struct xsave_struct {
+ uint8_t fpu_sse[512];
+ struct xsave_hdr_struct xsave_hdr;
+ uint8_t ymm[256];
+ uint8_t lwp[128];
+ struct bndregs_struct bndregs;
+ struct bndcsr_struct bndcsr;
+} __attribute__((packed));
+
+uint8_t __attribute__((__aligned__(64))) buffer[4096];
+struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer;
+
+uint8_t __attribute__((__aligned__(64))) test_buffer[4096];
+struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer;
+
+uint64_t num_bnd_chk;
+
+static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask)
+{
+ uint32_t lmask = mask;
+ uint32_t hmask = mask >> 32;
+
+ asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
+ : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ : "memory");
+}
+
+static __always_inline void xsave_state_1(void *_fx, uint64_t mask)
+{
+ uint32_t lmask = mask;
+ uint32_t hmask = mask >> 32;
+ unsigned char *fx = _fx;
+
+ asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
+ : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ : "memory");
+}
+
+static inline uint64_t xgetbv(uint32_t index)
+{
+ uint32_t eax, edx;
+
+ asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax + ((uint64_t)edx << 32);
+}
+
+static uint64_t read_mpx_status_sig(ucontext_t *uctxt)
+{
+ memset(buffer, 0, sizeof(buffer));
+ memcpy(buffer,
+ (uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM,
+ sizeof(struct xsave_struct));
+
+ return xsave_buf->bndcsr.status_reg;
+}
+
+#include <pthread.h>
+
+static uint8_t *get_next_inst_ip(uint8_t *addr)
+{
+ uint8_t *ip = addr;
+ uint8_t sib;
+ uint8_t rm;
+ uint8_t mod;
+ uint8_t base;
+ uint8_t modrm;
+
+ /* determine the prefix. */
+ switch(*ip) {
+ case 0xf2:
+ case 0xf3:
+ case 0x66:
+ ip++;
+ break;
+ }
+
+ /* look for rex prefix */
+ if ((*ip & 0x40) == 0x40)
+ ip++;
+
+ /* Make sure we have a MPX instruction. */
+ if (*ip++ != 0x0f)
+ return addr;
+
+ /* Skip the op code byte. */
+ ip++;
+
+ /* Get the modrm byte. */
+ modrm = *ip++;
+
+ /* Break it down into parts. */
+ rm = modrm & 7;
+ mod = (modrm >> 6);
+
+ /* Init the parts of the address mode. */
+ base = 8;
+
+ /* Is it a mem mode? */
+ if (mod != 3) {
+ /* look for scaled indexed addressing */
+ if (rm == 4) {
+ /* SIB addressing */
+ sib = *ip++;
+ base = sib & 7;
+ switch (mod) {
+ case 0:
+ if (base == 5)
+ ip += 4;
+ break;
+
+ case 1:
+ ip++;
+ break;
+
+ case 2:
+ ip += 4;
+ break;
+ }
+
+ } else {
+ /* MODRM addressing */
+ switch (mod) {
+ case 0:
+ /* DISP32 addressing, no base */
+ if (rm == 5)
+ ip += 4;
+ break;
+
+ case 1:
+ ip++;
+ break;
+
+ case 2:
+ ip += 4;
+ break;
+ }
+ }
+ }
+ return ip;
+}
+
+#ifdef si_lower
+static inline void *__si_bounds_lower(siginfo_t *si)
+{
+ return si->si_lower;
+}
+
+static inline void *__si_bounds_upper(siginfo_t *si)
+{
+ return si->si_upper;
+}
+#else
+
+/*
+ * This deals with old version of _sigfault in some distros:
+ *
+
+old _sigfault:
+ struct {
+ void *si_addr;
+ } _sigfault;
+
+new _sigfault:
+ struct {
+ void __user *_addr;
+ int _trapno;
+ short _addr_lsb;
+ union {
+ struct {
+ void __user *_lower;
+ void __user *_upper;
+ } _addr_bnd;
+ __u32 _pkey;
+ };
+ } _sigfault;
+ *
+ */
+
+static inline void **__si_bounds_hack(siginfo_t *si)
+{
+ void *sigfault = &si->_sifields._sigfault;
+ void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
+ int *trapno = (int*)end_sigfault;
+ /* skip _trapno and _addr_lsb */
+ void **__si_lower = (void**)(trapno + 2);
+
+ return __si_lower;
+}
+
+static inline void *__si_bounds_lower(siginfo_t *si)
+{
+ return *__si_bounds_hack(si);
+}
+
+static inline void *__si_bounds_upper(siginfo_t *si)
+{
+ return *(__si_bounds_hack(si) + 1);
+}
+#endif
+
+static int br_count;
+static int expected_bnd_index = -1;
+uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
+unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
+
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR 3
+#endif
+
+/*
+ * The kernel is supposed to provide some information about the bounds
+ * exception in the siginfo. It should match what we have in the bounds
+ * registers that we are checking against. Just check against the shadow copy
+ * since it is easily available, and we also check that *it* matches the real
+ * registers.
+ */
+void check_siginfo_vs_shadow(siginfo_t* si)
+{
+ int siginfo_ok = 1;
+ void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0];
+ void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1];
+
+ if ((expected_bnd_index < 0) ||
+ (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) {
+ fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n",
+ expected_bnd_index);
+ exit(6);
+ }
+ if (__si_bounds_lower(si) != shadow_lower)
+ siginfo_ok = 0;
+ if (__si_bounds_upper(si) != shadow_upper)
+ siginfo_ok = 0;
+
+ if (!siginfo_ok) {
+ fprintf(stderr, "ERROR: siginfo bounds do not match "
+ "shadow bounds for register %d\n", expected_bnd_index);
+ exit(7);
+ }
+}
+
+void handler(int signum, siginfo_t *si, void *vucontext)
+{
+ int i;
+ ucontext_t *uctxt = vucontext;
+ int trapno;
+ unsigned long ip;
+
+ dprintf1("entered signal handler\n");
+
+ trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
+ ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+
+ if (trapno == 5) {
+ typeof(si->si_addr) *si_addr_ptr = &si->si_addr;
+ uint64_t status = read_mpx_status_sig(uctxt);
+ uint64_t br_reason = status & 0x3;
+
+ br_count++;
+ dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
+
+ dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
+ status, ip, br_reason);
+ dprintf2("si_signo: %d\n", si->si_signo);
+ dprintf2(" signum: %d\n", signum);
+ dprintf2("info->si_code == SEGV_BNDERR: %d\n",
+ (si->si_code == SEGV_BNDERR));
+ dprintf2("info->si_code: %d\n", si->si_code);
+ dprintf2("info->si_lower: %p\n", __si_bounds_lower(si));
+ dprintf2("info->si_upper: %p\n", __si_bounds_upper(si));
+
+ for (i = 0; i < 8; i++)
+ dprintf3("[%d]: %p\n", i, si_addr_ptr[i]);
+ switch (br_reason) {
+ case 0: /* traditional BR */
+ fprintf(stderr,
+ "Undefined status with bound exception:%jx\n",
+ status);
+ exit(5);
+ case 1: /* #BR MPX bounds exception */
+ /* these are normal and we expect to see them */
+
+ check_siginfo_vs_shadow(si);
+
+ dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n",
+ status, (void *)ip, si->si_addr);
+ num_bnd_chk++;
+ uctxt->uc_mcontext.gregs[REG_IP_IDX] =
+ (greg_t)get_next_inst_ip((uint8_t *)ip);
+ break;
+ case 2:
+ fprintf(stderr, "#BR status == 2, missing bounds table,"
+ "kernel should have handled!!\n");
+ exit(4);
+ break;
+ default:
+ fprintf(stderr, "bound check error: status 0x%jx at %p\n",
+ status, (void *)ip);
+ num_bnd_chk++;
+ uctxt->uc_mcontext.gregs[REG_IP_IDX] =
+ (greg_t)get_next_inst_ip((uint8_t *)ip);
+ fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr);
+ exit(3);
+ }
+ } else if (trapno == 14) {
+ eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
+ trapno, ip);
+ eprintf("si_addr %p\n", si->si_addr);
+ eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+ test_failed();
+ } else {
+ eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip);
+ eprintf("si_addr %p\n", si->si_addr);
+ eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
+ test_failed();
+ }
+}
+
+static inline void cpuid_count(unsigned int op, int count,
+ unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ *eax = op;
+ *ecx = count;
+ __cpuid(eax, ebx, ecx, edx);
+}
+
+#define XSTATE_CPUID 0x0000000d
+
+/*
+ * List of XSAVE features Linux knows about:
+ */
+enum xfeature_bit {
+ XSTATE_BIT_FP,
+ XSTATE_BIT_SSE,
+ XSTATE_BIT_YMM,
+ XSTATE_BIT_BNDREGS,
+ XSTATE_BIT_BNDCSR,
+ XSTATE_BIT_OPMASK,
+ XSTATE_BIT_ZMM_Hi256,
+ XSTATE_BIT_Hi16_ZMM,
+
+ XFEATURES_NR_MAX,
+};
+
+#define XSTATE_FP (1 << XSTATE_BIT_FP)
+#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
+#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
+#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
+#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
+#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
+#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
+#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
+
+#define MPX_XSTATES (XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */
+
+bool one_bit(unsigned int x, int bit)
+{
+ return !!(x & (1<<bit));
+}
+
+void print_state_component(int state_bit_nr, char *name)
+{
+ unsigned int eax, ebx, ecx, edx;
+ unsigned int state_component_size;
+ unsigned int state_component_supervisor;
+ unsigned int state_component_user;
+ unsigned int state_component_aligned;
+
+ /* See SDM Section 13.2 */
+ cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx);
+ assert(eax || ebx || ecx);
+ state_component_size = eax;
+ state_component_supervisor = ((!ebx) && one_bit(ecx, 0));
+ state_component_user = !one_bit(ecx, 0);
+ state_component_aligned = one_bit(ecx, 1);
+ printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n",
+ name,
+ state_component_size, state_component_user,
+ state_component_supervisor, state_component_aligned);
+
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */
+#define XSAVE_FEATURE_BIT (26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define OSXSAVE_FEATURE_BIT (27) /* XSAVE enabled in the OS */
+
+bool check_mpx_support(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ cpuid_count(1, 0, &eax, &ebx, &ecx, &edx);
+
+ /* We can't do much without XSAVE, so just make these assert()'s */
+ if (!one_bit(ecx, XSAVE_FEATURE_BIT)) {
+ fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n");
+ exit(0);
+ }
+
+ if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) {
+ fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n");
+ exit(0);
+ }
+
+ /* CPUs not supporting the XSTATE CPUID leaf do not support MPX */
+ /* Is this redundant with the feature bit checks? */
+ cpuid_count(0, 0, &eax, &ebx, &ecx, &edx);
+ if (eax < XSTATE_CPUID) {
+ fprintf(stderr, "processor lacks XSTATE CPUID leaf,"
+ " can not run MPX tests\n");
+ exit(0);
+ }
+
+ printf("XSAVE is supported by HW & OS\n");
+
+ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
+
+ printf("XSAVE processor supported state mask: 0x%x\n", eax);
+ printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0));
+
+ /* Make sure that the MPX states are enabled in in XCR0 */
+ if ((eax & MPX_XSTATES) != MPX_XSTATES) {
+ fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n");
+ exit(0);
+ }
+
+ /* Make sure the MPX states are supported by XSAVE* */
+ if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) {
+ fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, "
+ "can not run MPX tests\n");
+ exit(0);
+ }
+
+ print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS");
+ print_state_component(XSTATE_BIT_BNDCSR, "BNDCSR");
+
+ return true;
+}
+
+void enable_mpx(void *l1base)
+{
+ /* enable point lookup */
+ memset(buffer, 0, sizeof(buffer));
+ xrstor_state(xsave_buf, 0x18);
+
+ xsave_buf->xsave_hdr.xstate_bv = 0x10;
+ xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1;
+ xsave_buf->bndcsr.status_reg = 0;
+
+ dprintf2("bf xrstor\n");
+ dprintf2("xsave cndcsr: status %jx, configu %jx\n",
+ xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
+ xrstor_state(xsave_buf, 0x18);
+ dprintf2("after xrstor\n");
+
+ xsave_state_1(xsave_buf, 0x18);
+
+ dprintf1("xsave bndcsr: status %jx, configu %jx\n",
+ xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
+}
+
+#include <sys/prctl.h>
+
+struct mpx_bounds_dir *bounds_dir_ptr;
+
+unsigned long __bd_incore(const char *func, int line)
+{
+ unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES);
+ return ret;
+}
+#define bd_incore() __bd_incore(__func__, __LINE__)
+
+void check_clear(void *ptr, unsigned long sz)
+{
+ unsigned long *i;
+
+ for (i = ptr; (void *)i < ptr + sz; i++) {
+ if (*i) {
+ dprintf1("%p is NOT clear at %p\n", ptr, i);
+ assert(0);
+ }
+ }
+ dprintf1("%p is clear for %lx\n", ptr, sz);
+}
+
+void check_clear_bd(void)
+{
+ check_clear(bounds_dir_ptr, 2UL << 30);
+}
+
+#define USE_MALLOC_FOR_BOUNDS_DIR 1
+bool process_specific_init(void)
+{
+ unsigned long size;
+ unsigned long *dir;
+ /* Guarantee we have the space to align it, add padding: */
+ unsigned long pad = getpagesize();
+
+ size = 2UL << 30; /* 2GB */
+ if (sizeof(unsigned long) == 4)
+ size = 4UL << 20; /* 4MB */
+ dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20));
+
+ if (USE_MALLOC_FOR_BOUNDS_DIR) {
+ unsigned long _dir;
+
+ dir = malloc(size + pad);
+ assert(dir);
+ _dir = (unsigned long)dir;
+ _dir += 0xfffUL;
+ _dir &= ~0xfffUL;
+ dir = (void *)_dir;
+ } else {
+ /*
+ * This makes debugging easier because the address
+ * calculations are simpler:
+ */
+ dir = mmap((void *)0x200000000000, size + pad,
+ PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (dir == (void *)-1) {
+ perror("unable to allocate bounds directory");
+ abort();
+ }
+ check_clear(dir, size);
+ }
+ bounds_dir_ptr = (void *)dir;
+ madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE);
+ bd_incore();
+ dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr,
+ (char *)bounds_dir_ptr + size);
+ check_clear(dir, size);
+ enable_mpx(dir);
+ check_clear(dir, size);
+ if (prctl(43, 0, 0, 0, 0)) {
+ printf("no MPX support\n");
+ abort();
+ return false;
+ }
+ return true;
+}
+
+bool process_specific_finish(void)
+{
+ if (prctl(44)) {
+ printf("no MPX support\n");
+ return false;
+ }
+ return true;
+}
+
+void setup_handler()
+{
+ int r, rs;
+ struct sigaction newact;
+ struct sigaction oldact;
+
+ /* #BR is mapped to sigsegv */
+ int signum = SIGSEGV;
+
+ newact.sa_handler = 0; /* void(*)(int)*/
+ newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */
+
+ /*sigset_t - signals to block while in the handler */
+ /* get the old signal mask. */
+ rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
+ assert(rs == 0);
+
+ /* call sa_sigaction, not sa_handler*/
+ newact.sa_flags = SA_SIGINFO;
+
+ newact.sa_restorer = 0; /* void(*)(), obsolete */
+ r = sigaction(signum, &newact, &oldact);
+ assert(r == 0);
+}
+
+void mpx_prepare(void)
+{
+ dprintf2("%s()\n", __func__);
+ setup_handler();
+ process_specific_init();
+}
+
+void mpx_cleanup(void)
+{
+ printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk);
+ process_specific_finish();
+}
+
+/*-------------- the following is test case ---------------*/
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+uint64_t num_lower_brs;
+uint64_t num_upper_brs;
+
+#define MPX_CONFIG_OFFSET 1024
+#define MPX_BOUNDS_OFFSET 960
+#define MPX_HEADER_OFFSET 512
+#define MAX_ADDR_TESTED (1<<28)
+#define TEST_ROUNDS 100
+
+/*
+ 0F 1A /r BNDLDX-Load
+ 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation
+ 66 0F 1A /r BNDMOV bnd1, bnd2/m128
+ 66 0F 1B /r BNDMOV bnd1/m128, bnd2
+ F2 0F 1A /r BNDCU bnd, r/m64
+ F2 0F 1B /r BNDCN bnd, r/m64
+ F3 0F 1A /r BNDCL bnd, r/m64
+ F3 0F 1B /r BNDMK bnd, m64
+*/
+
+static __always_inline void xsave_state(void *_fx, uint64_t mask)
+{
+ uint32_t lmask = mask;
+ uint32_t hmask = mask >> 32;
+ unsigned char *fx = _fx;
+
+ asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
+ : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ : "memory");
+}
+
+static __always_inline void mpx_clear_bnd0(void)
+{
+ long size = 0;
+ void *ptr = NULL;
+ /* F3 0F 1B /r BNDMK bnd, m64 */
+ /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */
+ asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
+ : : "c" (ptr), "d" (size-1)
+ : "memory");
+}
+
+static __always_inline void mpx_make_bound_helper(unsigned long ptr,
+ unsigned long size)
+{
+ /* F3 0F 1B /r BNDMK bnd, m64 */
+ /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */
+ asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
+ : : "c" (ptr), "d" (size-1)
+ : "memory");
+}
+
+static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr)
+{
+ /* F3 0F 1A /r NDCL bnd, r/m64 */
+ /* f3 0f 1a 01 bndcl (%rcx),%bnd0 */
+ asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t"
+ : : "c" (ptr)
+ : "memory");
+}
+
+static __always_inline void mpx_check_upperbound_helper(unsigned long ptr)
+{
+ /* F2 0F 1A /r BNDCU bnd, r/m64 */
+ /* f2 0f 1a 01 bndcu (%rcx),%bnd0 */
+ asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t"
+ : : "c" (ptr)
+ : "memory");
+}
+
+static __always_inline void mpx_movbndreg_helper()
+{
+ /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */
+ /* 66 0f 1b c2 bndmov %bnd0,%bnd2 */
+
+ asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t");
+}
+
+static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem)
+{
+ /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */
+ /* 66 0f 1b 01 bndmov %bnd0,(%rcx) */
+ asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t"
+ : : "c" (mem)
+ : "memory");
+}
+
+static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem)
+{
+ /* 66 0F 1A /r BNDMOV bnd1, bnd2/m128 */
+ /* 66 0f 1a 01 bndmov (%rcx),%bnd0 */
+ asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t"
+ : : "c" (mem)
+ : "memory");
+}
+
+static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr,
+ unsigned long ptr_val)
+{
+ /* 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation */
+ /* 0f 1b 04 11 bndstx %bnd0,(%rcx,%rdx,1) */
+ asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t"
+ : : "c" (ptr_addr), "d" (ptr_val)
+ : "memory");
+}
+
+static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr,
+ unsigned long ptr_val)
+{
+ /* 0F 1A /r BNDLDX-Load */
+ /*/ 0f 1a 04 11 bndldx (%rcx,%rdx,1),%bnd0 */
+ asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t"
+ : : "c" (ptr_addr), "d" (ptr_val)
+ : "memory");
+}
+
+void __print_context(void *__print_xsave_buffer, int line)
+{
+ uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET);
+ uint64_t *cfg = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET);
+
+ int i;
+ eprintf("%s()::%d\n", "print_context", line);
+ for (i = 0; i < 4; i++) {
+ eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i,
+ (unsigned long)bounds[i*2],
+ ~(unsigned long)bounds[i*2+1],
+ (unsigned long)bounds[i*2+1]);
+ }
+
+ eprintf("cpcfg: %jx cpstatus: %jx\n", cfg[0], cfg[1]);
+}
+#define print_context(x) __print_context(x, __LINE__)
+#ifdef DEBUG
+#define dprint_context(x) print_context(x)
+#else
+#define dprint_context(x) do{}while(0)
+#endif
+
+void init()
+{
+ int i;
+
+ srand((unsigned int)time(NULL));
+
+ for (i = 0; i < 4; i++) {
+ shadow_plb[i][0] = 0;
+ shadow_plb[i][1] = ~(unsigned long)0;
+ }
+}
+
+long int __mpx_random(int line)
+{
+#ifdef NOT_SO_RANDOM
+ static long fake = 722122311;
+ fake += 563792075;
+ return fakse;
+#else
+ return random();
+#endif
+}
+#define mpx_random() __mpx_random(__LINE__)
+
+uint8_t *get_random_addr()
+{
+ uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED);
+ return (addr - (unsigned long)addr % sizeof(uint8_t *));
+}
+
+static inline bool compare_context(void *__xsave_buffer)
+{
+ uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET);
+
+ int i;
+ for (i = 0; i < 4; i++) {
+ dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n",
+ i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
+ i, (unsigned long)bounds[i*2], ~(unsigned long)bounds[i*2+1]);
+ if ((shadow_plb[i][0] != bounds[i*2]) ||
+ (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) {
+ eprintf("ERROR comparing shadow to real bound register %d\n", i);
+ eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n",
+ (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
+ (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+void mkbnd_shadow(uint8_t *ptr, int index, long offset)
+{
+ uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
+ uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]);
+ *lower = (unsigned long)ptr;
+ *upper = (unsigned long)ptr + offset - 1;
+}
+
+void check_lowerbound_shadow(uint8_t *ptr, int index)
+{
+ uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
+ if (*lower > (uint64_t)(unsigned long)ptr)
+ num_lower_brs++;
+ else
+ dprintf1("LowerBoundChk passed:%p\n", ptr);
+}
+
+void check_upperbound_shadow(uint8_t *ptr, int index)
+{
+ uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]);
+ if (upper < (uint64_t)(unsigned long)ptr)
+ num_upper_brs++;
+ else
+ dprintf1("UpperBoundChk passed:%p\n", ptr);
+}
+
+__always_inline void movbndreg_shadow(int src, int dest)
+{
+ shadow_plb[dest][0] = shadow_plb[src][0];
+ shadow_plb[dest][1] = shadow_plb[src][1];
+}
+
+__always_inline void movbnd2mem_shadow(int src, unsigned long *dest)
+{
+ unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]);
+ unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]);
+ *dest = *lower;
+ *(dest+1) = *upper;
+}
+
+__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest)
+{
+ unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]);
+ unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]);
+ *lower = *src;
+ *upper = *(src+1);
+}
+
+__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
+{
+ shadow_map[0] = (unsigned long)shadow_plb[index][0];
+ shadow_map[1] = (unsigned long)shadow_plb[index][1];
+ shadow_map[2] = (unsigned long)ptr_val;
+ dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__,
+ index, ptr, ptr_val, ptr_val);
+ /*ptr ignored */
+}
+
+void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
+{
+ uint64_t lower = shadow_map[0];
+ uint64_t upper = shadow_map[1];
+ uint8_t *value = (uint8_t *)shadow_map[2];
+
+ if (value != ptr_val) {
+ dprintf2("%s(%d, %p, %p) init shadow bounds[%d] "
+ "because %p != %p\n", __func__, index, ptr,
+ ptr_val, index, value, ptr_val);
+ shadow_plb[index][0] = 0;
+ shadow_plb[index][1] = ~(unsigned long)0;
+ } else {
+ shadow_plb[index][0] = lower;
+ shadow_plb[index][1] = upper;
+ }
+ /* ptr ignored */
+}
+
+static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr)
+{
+ mpx_make_bound_helper((unsigned long)ptr, 0x1800);
+}
+
+static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr)
+{
+ mkbnd_shadow(ptr, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr)
+{
+ /* these are hard-coded to check bnd0 */
+ expected_bnd_index = 0;
+ mpx_check_lowerbound_helper((unsigned long)(ptr-1));
+ mpx_check_upperbound_helper((unsigned long)(ptr+0x1800));
+ /* reset this since we do not expect any more bounds exceptions */
+ expected_bnd_index = -1;
+}
+
+static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr)
+{
+ check_lowerbound_shadow(ptr-1, 0);
+ check_upperbound_shadow(ptr+0x1800, 0);
+}
+
+static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr)
+{
+ mpx_make_bound_helper((unsigned long)ptr, 0x1800);
+ mpx_movbndreg_helper();
+ mpx_movbnd2mem_helper(buf);
+ mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
+}
+
+static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr)
+{
+ mkbnd_shadow(ptr, 0, 0x1800);
+ movbndreg_shadow(0, 2);
+ movbnd2mem_shadow(0, (unsigned long *)buf);
+ mkbnd_shadow(ptr+0x12, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr)
+{
+ mpx_movbnd_from_mem_helper(buf);
+}
+
+static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr)
+{
+ movbnd_from_mem_shadow((unsigned long *)buf, 0);
+}
+
+static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr)
+{
+ mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr);
+ mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
+}
+
+static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr)
+{
+ stdsc_shadow(0, buf, ptr);
+ mkbnd_shadow(ptr+0x12, 0, 0x1800);
+}
+
+static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr)
+{
+ mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr);
+}
+
+static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr)
+{
+ lddsc_shadow(0, buf, ptr);
+}
+
+#define NR_MPX_TEST_FUNCTIONS 6
+
+/*
+ * For compatibility reasons, MPX will clear the bounds registers
+ * when you make function calls (among other things). We have to
+ * preserve the registers in between calls to the "helpers" since
+ * they build on each other.
+ *
+ * Be very careful not to make any function calls inside the
+ * helpers, or anywhere else beween the xrstor and xsave.
+ */
+#define run_helper(helper_nr, buf, buf_shadow, ptr) do { \
+ xrstor_state(xsave_test_buf, flags); \
+ mpx_test_helper##helper_nr(buf, ptr); \
+ xsave_state(xsave_test_buf, flags); \
+ mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr); \
+} while (0)
+
+static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr)
+{
+ uint64_t flags = 0x18;
+
+ dprint_context(xsave_test_buf);
+ switch (nr) {
+ case 0:
+ run_helper(0, buf, buf_shadow, ptr);
+ break;
+ case 1:
+ run_helper(1, buf, buf_shadow, ptr);
+ break;
+ case 2:
+ run_helper(2, buf, buf_shadow, ptr);
+ break;
+ case 3:
+ run_helper(3, buf, buf_shadow, ptr);
+ break;
+ case 4:
+ run_helper(4, buf, buf_shadow, ptr);
+ break;
+ case 5:
+ run_helper(5, buf, buf_shadow, ptr);
+ break;
+ default:
+ test_failed();
+ break;
+ }
+ dprint_context(xsave_test_buf);
+}
+
+unsigned long buf_shadow[1024]; /* used to check load / store descriptors */
+extern long inspect_me(struct mpx_bounds_dir *bounds_dir);
+
+long cover_buf_with_bt_entries(void *buf, long buf_len)
+{
+ int i;
+ long nr_to_fill;
+ int ratio = 1000;
+ unsigned long buf_len_in_ptrs;
+
+ /* Fill about 1/100 of the space with bt entries */
+ nr_to_fill = buf_len / (sizeof(unsigned long) * ratio);
+
+ if (!nr_to_fill)
+ dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill);
+
+ /* Align the buffer to pointer size */
+ while (((unsigned long)buf) % sizeof(void *)) {
+ buf++;
+ buf_len--;
+ }
+ /* We are storing pointers, so make */
+ buf_len_in_ptrs = buf_len / sizeof(void *);
+
+ for (i = 0; i < nr_to_fill; i++) {
+ long index = (mpx_random() % buf_len_in_ptrs);
+ void *ptr = buf + index * sizeof(unsigned long);
+ unsigned long ptr_addr = (unsigned long)ptr;
+
+ /* ptr and size can be anything */
+ mpx_make_bound_helper((unsigned long)ptr, 8);
+
+ /*
+ * take bnd0 and put it in to bounds tables "buf + index" is an
+ * address inside the buffer where we are pretending that we
+ * are going to put a pointer We do not, though because we will
+ * never load entries from the table, so it doesn't matter.
+ */
+ mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr);
+ dprintf4("storing bound table entry for %lx (buf start @ %p)\n",
+ ptr_addr, buf);
+ }
+ return nr_to_fill;
+}
+
+unsigned long align_down(unsigned long alignme, unsigned long align_to)
+{
+ return alignme & ~(align_to-1);
+}
+
+unsigned long align_up(unsigned long alignme, unsigned long align_to)
+{
+ return (alignme + align_to - 1) & ~(align_to-1);
+}
+
+/*
+ * Using 1MB alignment guarantees that each no allocation
+ * will overlap with another's bounds tables.
+ *
+ * We have to cook our own allocator here. malloc() can
+ * mix other allocation with ours which means that even
+ * if we free all of our allocations, there might still
+ * be bounds tables for the *areas* since there is other
+ * valid memory there.
+ *
+ * We also can't use malloc() because a free() of an area
+ * might not free it back to the kernel. We want it
+ * completely unmapped an malloc() does not guarantee
+ * that.
+ */
+#ifdef __i386__
+long alignment = 4096;
+long sz_alignment = 4096;
+#else
+long alignment = 1 * MB;
+long sz_alignment = 1 * MB;
+#endif
+void *mpx_mini_alloc(unsigned long sz)
+{
+ unsigned long long tries = 0;
+ static void *last;
+ void *ptr;
+ void *try_at;
+
+ sz = align_up(sz, sz_alignment);
+
+ try_at = last + alignment;
+ while (1) {
+ ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (ptr == (void *)-1)
+ return NULL;
+ if (ptr == try_at)
+ break;
+
+ munmap(ptr, sz);
+ try_at += alignment;
+#ifdef __i386__
+ /*
+ * This isn't quite correct for 32-bit binaries
+ * on 64-bit kernels since they can use the
+ * entire 32-bit address space, but it's close
+ * enough.
+ */
+ if (try_at > (void *)0xC0000000)
+#else
+ if (try_at > (void *)0x0000800000000000)
+#endif
+ try_at = (void *)0x0;
+ if (!(++tries % 10000))
+ dprintf1("stuck in %s(), tries: %lld\n", __func__, tries);
+ continue;
+ }
+ last = ptr;
+ dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr);
+ return ptr;
+}
+void mpx_mini_free(void *ptr, long sz)
+{
+ dprintf2("%s() ptr: %p\n", __func__, ptr);
+ if ((unsigned long)ptr > 0x100000000000) {
+ dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr);
+ test_failed();
+ }
+ sz = align_up(sz, sz_alignment);
+ dprintf3("%s() ptr: %p before munmap\n", __func__, ptr);
+ munmap(ptr, sz);
+ dprintf3("%s() ptr: %p DONE\n", __func__, ptr);
+}
+
+#define NR_MALLOCS 100
+struct one_malloc {
+ char *ptr;
+ int nr_filled_btes;
+ unsigned long size;
+};
+struct one_malloc mallocs[NR_MALLOCS];
+
+void free_one_malloc(int index)
+{
+ unsigned long free_ptr;
+ unsigned long mask;
+
+ if (!mallocs[index].ptr)
+ return;
+
+ mpx_mini_free(mallocs[index].ptr, mallocs[index].size);
+ dprintf4("freed[%d]: %p\n", index, mallocs[index].ptr);
+
+ free_ptr = (unsigned long)mallocs[index].ptr;
+ mask = alignment-1;
+ dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr,
+ (free_ptr & mask), mask);
+ assert((free_ptr & mask) == 0);
+
+ mallocs[index].ptr = NULL;
+}
+
+#ifdef __i386__
+#define MPX_BOUNDS_TABLE_COVERS 4096
+#else
+#define MPX_BOUNDS_TABLE_COVERS (1 * MB)
+#endif
+void zap_everything(void)
+{
+ long after_zap;
+ long before_zap;
+ int i;
+
+ before_zap = inspect_me(bounds_dir_ptr);
+ dprintf1("zapping everything start: %ld\n", before_zap);
+ for (i = 0; i < NR_MALLOCS; i++)
+ free_one_malloc(i);
+
+ after_zap = inspect_me(bounds_dir_ptr);
+ dprintf1("zapping everything done: %ld\n", after_zap);
+ /*
+ * We only guarantee to empty the thing out if our allocations are
+ * exactly aligned on the boundaries of a boudns table.
+ */
+ if ((alignment >= MPX_BOUNDS_TABLE_COVERS) &&
+ (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) {
+ if (after_zap != 0)
+ test_failed();
+
+ assert(after_zap == 0);
+ }
+}
+
+void do_one_malloc(void)
+{
+ static int malloc_counter;
+ long sz;
+ int rand_index = (mpx_random() % NR_MALLOCS);
+ void *ptr = mallocs[rand_index].ptr;
+
+ dprintf3("%s() enter\n", __func__);
+
+ if (ptr) {
+ dprintf3("freeing one malloc at index: %d\n", rand_index);
+ free_one_malloc(rand_index);
+ if (mpx_random() % (NR_MALLOCS*3) == 3) {
+ int i;
+ dprintf3("zapping some more\n");
+ for (i = rand_index; i < NR_MALLOCS; i++)
+ free_one_malloc(i);
+ }
+ if ((mpx_random() % zap_all_every_this_many_mallocs) == 4)
+ zap_everything();
+ }
+
+ /* 1->~1M */
+ sz = (1 + mpx_random() % 1000) * 1000;
+ ptr = mpx_mini_alloc(sz);
+ if (!ptr) {
+ /*
+ * If we are failing allocations, just assume we
+ * are out of memory and zap everything.
+ */
+ dprintf3("zapping everything because out of memory\n");
+ zap_everything();
+ goto out;
+ }
+
+ dprintf3("malloc: %p size: 0x%lx\n", ptr, sz);
+ mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz);
+ mallocs[rand_index].ptr = ptr;
+ mallocs[rand_index].size = sz;
+out:
+ if ((++malloc_counter) % inspect_every_this_many_mallocs == 0)
+ inspect_me(bounds_dir_ptr);
+}
+
+void run_timed_test(void (*test_func)(void))
+{
+ int done = 0;
+ long iteration = 0;
+ static time_t last_print;
+ time_t now;
+ time_t start;
+
+ time(&start);
+ while (!done) {
+ time(&now);
+ if ((now - start) > TEST_DURATION_SECS)
+ done = 1;
+
+ test_func();
+ iteration++;
+
+ if ((now - last_print > 1) || done) {
+ printf("iteration %ld complete, OK so far\n", iteration);
+ last_print = now;
+ }
+ }
+}
+
+void check_bounds_table_frees(void)
+{
+ printf("executing unmaptest\n");
+ inspect_me(bounds_dir_ptr);
+ run_timed_test(&do_one_malloc);
+ printf("done with malloc() fun\n");
+}
+
+void insn_test_failed(int test_nr, int test_round, void *buf,
+ void *buf_shadow, void *ptr)
+{
+ print_context(xsave_test_buf);
+ eprintf("ERROR: test %d round %d failed\n", test_nr, test_round);
+ while (test_nr == 5) {
+ struct mpx_bt_entry *bte;
+ struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr;
+ struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd);
+
+ printf(" bd: %p\n", bd);
+ printf("&bde: %p\n", bde);
+ printf("*bde: %lx\n", *(unsigned long *)bde);
+ if (!bd_entry_valid(bde))
+ break;
+
+ bte = mpx_vaddr_to_bt_entry(buf, bd);
+ printf(" te: %p\n", bte);
+ printf("bte[0]: %lx\n", bte->contents[0]);
+ printf("bte[1]: %lx\n", bte->contents[1]);
+ printf("bte[2]: %lx\n", bte->contents[2]);
+ printf("bte[3]: %lx\n", bte->contents[3]);
+ break;
+ }
+ test_failed();
+}
+
+void check_mpx_insns_and_tables(void)
+{
+ int successes = 0;
+ int failures = 0;
+ int buf_size = (1024*1024);
+ unsigned long *buf = malloc(buf_size);
+ const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS;
+ int i, j;
+
+ memset(buf, 0, buf_size);
+ memset(buf_shadow, 0, sizeof(buf_shadow));
+
+ for (i = 0; i < TEST_ROUNDS; i++) {
+ uint8_t *ptr = get_random_addr() + 8;
+
+ for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) {
+ if (0 && j != 5) {
+ successes++;
+ continue;
+ }
+ dprintf2("starting test %d round %d\n", j, i);
+ dprint_context(xsave_test_buf);
+ /*
+ * test5 loads an address from the bounds tables.
+ * The load will only complete if 'ptr' matches
+ * the load and the store, so with random addrs,
+ * the odds of this are very small. Make it
+ * higher by only moving 'ptr' 1/10 times.
+ */
+ if (random() % 10 <= 0)
+ ptr = get_random_addr() + 8;
+ dprintf3("random ptr{%p}\n", ptr);
+ dprint_context(xsave_test_buf);
+ run_helpers(j, (void *)buf, (void *)buf_shadow, ptr);
+ dprint_context(xsave_test_buf);
+ if (!compare_context(xsave_test_buf)) {
+ insn_test_failed(j, i, buf, buf_shadow, ptr);
+ failures++;
+ goto exit;
+ }
+ successes++;
+ dprint_context(xsave_test_buf);
+ dprintf2("finished test %d round %d\n", j, i);
+ dprintf3("\n");
+ dprint_context(xsave_test_buf);
+ }
+ }
+
+exit:
+ dprintf2("\nabout to free:\n");
+ free(buf);
+ dprintf1("successes: %d\n", successes);
+ dprintf1(" failures: %d\n", failures);
+ dprintf1(" tests: %d\n", total_nr_tests);
+ dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
+ dprintf1(" saw: %d #BRs\n", br_count);
+ if (failures) {
+ eprintf("ERROR: non-zero number of failures\n");
+ exit(20);
+ }
+ if (successes != total_nr_tests) {
+ eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n",
+ successes, total_nr_tests);
+ exit(21);
+ }
+ if (num_upper_brs + num_lower_brs != br_count) {
+ eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n",
+ num_upper_brs, num_lower_brs, br_count);
+ eprintf("successes: %d\n", successes);
+ eprintf(" failures: %d\n", failures);
+ eprintf(" tests: %d\n", total_nr_tests);
+ eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
+ eprintf(" saw: %d #BRs\n", br_count);
+ exit(22);
+ }
+}
+
+/*
+ * This is supposed to SIGSEGV nicely once the kernel
+ * can no longer allocate vaddr space.
+ */
+void exhaust_vaddr_space(void)
+{
+ unsigned long ptr;
+ /* Try to make sure there is no room for a bounds table anywhere */
+ unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE;
+#ifdef __i386__
+ unsigned long max_vaddr = 0xf7788000UL;
+#else
+ unsigned long max_vaddr = 0x800000000000UL;
+#endif
+
+ dprintf1("%s() start\n", __func__);
+ /* do not start at 0, we aren't allowed to map there */
+ for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
+ void *ptr_ret;
+ int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL);
+
+ if (!ret) {
+ dprintf1("madvise() %lx ret: %d\n", ptr, ret);
+ continue;
+ }
+ ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (ptr_ret != (void *)ptr) {
+ perror("mmap");
+ dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
+ break;
+ }
+ if (!(ptr & 0xffffff))
+ dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
+ }
+ for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
+ dprintf2("covering 0x%lx with bounds table entries\n", ptr);
+ cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE);
+ }
+ dprintf1("%s() end\n", __func__);
+ printf("done with vaddr space fun\n");
+}
+
+void mpx_table_test(void)
+{
+ printf("starting mpx bounds table test\n");
+ run_timed_test(check_mpx_insns_and_tables);
+ printf("done with mpx bounds table test\n");
+}
+
+int main(int argc, char **argv)
+{
+ int unmaptest = 0;
+ int vaddrexhaust = 0;
+ int tabletest = 0;
+ int i;
+
+ check_mpx_support();
+ mpx_prepare();
+ srandom(11179);
+
+ bd_incore();
+ init();
+ bd_incore();
+
+ trace_me();
+
+ xsave_state((void *)xsave_test_buf, 0x1f);
+ if (!compare_context(xsave_test_buf))
+ printf("Init failed\n");
+
+ for (i = 1; i < argc; i++) {
+ if (!strcmp(argv[i], "unmaptest"))
+ unmaptest = 1;
+ if (!strcmp(argv[i], "vaddrexhaust"))
+ vaddrexhaust = 1;
+ if (!strcmp(argv[i], "tabletest"))
+ tabletest = 1;
+ }
+ if (!(unmaptest || vaddrexhaust || tabletest)) {
+ unmaptest = 1;
+ /* vaddrexhaust = 1; */
+ tabletest = 1;
+ }
+ if (unmaptest)
+ check_bounds_table_frees();
+ if (tabletest)
+ mpx_table_test();
+ if (vaddrexhaust)
+ exhaust_vaddr_space();
+ printf("%s completed successfully\n", argv[0]);
+ exit(0);
+}
+
+#include "mpx-dig.c"
diff --git a/tools/testing/selftests/x86/mpx-mm.h b/tools/testing/selftests/x86/mpx-mm.h
new file mode 100644
index 000000000..6dbdd66b8
--- /dev/null
+++ b/tools/testing/selftests/x86/mpx-mm.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MPX_MM_H
+#define _MPX_MM_H
+
+#define PAGE_SIZE 4096
+#define MB (1UL<<20)
+
+extern long nr_incore(void *ptr, unsigned long size_bytes);
+
+#endif /* _MPX_MM_H */
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
new file mode 100644
index 000000000..254e5436b
--- /dev/null
+++ b/tools/testing/selftests/x86/pkey-helpers.h
@@ -0,0 +1,219 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PKEYS_HELPER_H
+#define _PKEYS_HELPER_H
+#define _GNU_SOURCE
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+#define NR_PKEYS 16
+#define PKRU_BITS_PER_PKEY 2
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
+extern int dprint_in_signal;
+extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+static inline void sigsafe_printf(const char *format, ...)
+{
+ va_list ap;
+
+ if (!dprint_in_signal) {
+ va_start(ap, format);
+ vprintf(format, ap);
+ va_end(ap);
+ } else {
+ int ret;
+ /*
+ * No printf() functions are signal-safe.
+ * They deadlock easily. Write the format
+ * string to get some output, even if
+ * incomplete.
+ */
+ ret = write(1, format, strlen(format));
+ if (ret < 0)
+ exit(1);
+ }
+}
+#define dprintf_level(level, args...) do { \
+ if (level <= DEBUG_LEVEL) \
+ sigsafe_printf(args); \
+} while (0)
+#define dprintf0(args...) dprintf_level(0, args)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+
+extern unsigned int shadow_pkru;
+static inline unsigned int __rdpkru(void)
+{
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+ unsigned int pkru;
+
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a" (eax), "=d" (edx)
+ : "c" (ecx));
+ pkru = eax;
+ return pkru;
+}
+
+static inline unsigned int _rdpkru(int line)
+{
+ unsigned int pkru = __rdpkru();
+
+ dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
+ line, pkru, shadow_pkru);
+ assert(pkru == shadow_pkru);
+
+ return pkru;
+}
+
+#define rdpkru() _rdpkru(__LINE__)
+
+static inline void __wrpkru(unsigned int pkru)
+{
+ unsigned int eax = pkru;
+ unsigned int ecx = 0;
+ unsigned int edx = 0;
+
+ dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (eax), "c" (ecx), "d" (edx));
+ assert(pkru == __rdpkru());
+}
+
+static inline void wrpkru(unsigned int pkru)
+{
+ dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
+ /* will do the shadow check for us: */
+ rdpkru();
+ __wrpkru(pkru);
+ shadow_pkru = pkru;
+ dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
+}
+
+/*
+ * These are technically racy. since something could
+ * change PKRU between the read and the write.
+ */
+static inline void __pkey_access_allow(int pkey, int do_allow)
+{
+ unsigned int pkru = rdpkru();
+ int bit = pkey * 2;
+
+ if (do_allow)
+ pkru &= (1<<bit);
+ else
+ pkru |= (1<<bit);
+
+ dprintf4("pkru now: %08x\n", rdpkru());
+ wrpkru(pkru);
+}
+
+static inline void __pkey_write_allow(int pkey, int do_allow_write)
+{
+ long pkru = rdpkru();
+ int bit = pkey * 2 + 1;
+
+ if (do_allow_write)
+ pkru &= (1<<bit);
+ else
+ pkru |= (1<<bit);
+
+ wrpkru(pkru);
+ dprintf4("pkru now: %08x\n", rdpkru());
+}
+
+#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
+#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
+#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
+#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
+
+#define PAGE_SIZE 4096
+#define MB (1<<20)
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile(
+ "cpuid;"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
+#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
+
+static inline int cpu_has_pku(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ eax = 0x7;
+ ecx = 0x0;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (!(ecx & X86_FEATURE_PKU)) {
+ dprintf2("cpu does not have PKU\n");
+ return 0;
+ }
+ if (!(ecx & X86_FEATURE_OSPKE)) {
+ dprintf2("cpu does not have OSPKE\n");
+ return 0;
+ }
+ return 1;
+}
+
+#define XSTATE_PKRU_BIT (9)
+#define XSTATE_PKRU 0x200
+
+int pkru_xstate_offset(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ int xstate_offset;
+ int xstate_size;
+ unsigned long XSTATE_CPUID = 0xd;
+ int leaf;
+
+ /* assume that XSTATE_PKRU is set in XCR0 */
+ leaf = XSTATE_PKRU_BIT;
+ {
+ eax = XSTATE_CPUID;
+ ecx = leaf;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (leaf == XSTATE_PKRU_BIT) {
+ xstate_offset = ebx;
+ xstate_size = eax;
+ }
+ }
+
+ if (xstate_size == 0) {
+ printf("could not find size/offset of PKRU in xsave state\n");
+ return 0;
+ }
+
+ return xstate_offset;
+}
+
+#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c
new file mode 100644
index 000000000..27661302a
--- /dev/null
+++ b/tools/testing/selftests/x86/protection_keys.c
@@ -0,0 +1,1508 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests x86 Memory Protection Keys (see Documentation/x86/protection-keys.txt)
+ *
+ * There are examples in here of:
+ * * how to set protection keys on memory
+ * * how to set/clear bits in PKRU (the rights register)
+ * * how to handle SEGV_PKRU signals and extract pkey-relevant
+ * information from the siginfo
+ *
+ * Things to add:
+ * make sure KSM and KSM COW breaking works
+ * prefault pages in at malloc, or not
+ * protect MPX bounds tables with protection keys?
+ * make sure VMA splitting/merging is working correctly
+ * OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
+ * look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
+ * do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
+ *
+ * Compile like this:
+ * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/futex.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <setjmp.h>
+
+#include "pkey-helpers.h"
+
+int iteration_nr = 1;
+int test_nr;
+
+unsigned int shadow_pkru;
+
+#define HPAGE_SIZE (1UL<<21)
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
+#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
+
+int dprint_in_signal;
+char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+
+extern void abort_hooks(void);
+#define pkey_assert(condition) do { \
+ if (!(condition)) { \
+ dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
+ __FILE__, __LINE__, \
+ test_nr, iteration_nr); \
+ dprintf0("errno at assert: %d", errno); \
+ abort_hooks(); \
+ exit(__LINE__); \
+ } \
+} while (0)
+
+void cat_into_file(char *str, char *file)
+{
+ int fd = open(file, O_RDWR);
+ int ret;
+
+ dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
+ /*
+ * these need to be raw because they are called under
+ * pkey_assert()
+ */
+ if (fd < 0) {
+ fprintf(stderr, "error opening '%s'\n", str);
+ perror("error: ");
+ exit(__LINE__);
+ }
+
+ ret = write(fd, str, strlen(str));
+ if (ret != strlen(str)) {
+ perror("write to file failed");
+ fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
+ exit(__LINE__);
+ }
+ close(fd);
+}
+
+#if CONTROL_TRACING > 0
+static int warned_tracing;
+int tracing_root_ok(void)
+{
+ if (geteuid() != 0) {
+ if (!warned_tracing)
+ fprintf(stderr, "WARNING: not run as root, "
+ "can not do tracing control\n");
+ warned_tracing = 1;
+ return 0;
+ }
+ return 1;
+}
+#endif
+
+void tracing_on(void)
+{
+#if CONTROL_TRACING > 0
+#define TRACEDIR "/sys/kernel/debug/tracing"
+ char pidstr[32];
+
+ if (!tracing_root_ok())
+ return;
+
+ sprintf(pidstr, "%d", getpid());
+ cat_into_file("0", TRACEDIR "/tracing_on");
+ cat_into_file("\n", TRACEDIR "/trace");
+ if (1) {
+ cat_into_file("function_graph", TRACEDIR "/current_tracer");
+ cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
+ } else {
+ cat_into_file("nop", TRACEDIR "/current_tracer");
+ }
+ cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
+ cat_into_file("1", TRACEDIR "/tracing_on");
+ dprintf1("enabled tracing\n");
+#endif
+}
+
+void tracing_off(void)
+{
+#if CONTROL_TRACING > 0
+ if (!tracing_root_ok())
+ return;
+ cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
+#endif
+}
+
+void abort_hooks(void)
+{
+ fprintf(stderr, "running %s()...\n", __func__);
+ tracing_off();
+#ifdef SLEEP_ON_ABORT
+ sleep(SLEEP_ON_ABORT);
+#endif
+}
+
+static inline void __page_o_noops(void)
+{
+ /* 8-bytes of instruction * 512 bytes = 1 page */
+ asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
+}
+
+/*
+ * This attempts to have roughly a page of instructions followed by a few
+ * instructions that do a write, and another page of instructions. That
+ * way, we are pretty sure that the write is in the second page of
+ * instructions and has at least a page of padding behind it.
+ *
+ * *That* lets us be sure to madvise() away the write instruction, which
+ * will then fault, which makes sure that the fault code handles
+ * execute-only memory properly.
+ */
+__attribute__((__aligned__(PAGE_SIZE)))
+void lots_o_noops_around_write(int *write_to_me)
+{
+ dprintf3("running %s()\n", __func__);
+ __page_o_noops();
+ /* Assume this happens in the second page of instructions: */
+ *write_to_me = __LINE__;
+ /* pad out by another page: */
+ __page_o_noops();
+ dprintf3("%s() done\n", __func__);
+}
+
+/* Define some kernel-like types */
+#define u8 uint8_t
+#define u16 uint16_t
+#define u32 uint32_t
+#define u64 uint64_t
+
+#ifdef __i386__
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 380
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 381
+# define SYS_pkey_free 382
+#endif
+
+#define REG_IP_IDX REG_EIP
+#define si_pkey_offset 0x14
+
+#else
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 329
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 330
+# define SYS_pkey_free 331
+#endif
+
+#define REG_IP_IDX REG_RIP
+#define si_pkey_offset 0x20
+
+#endif
+
+void dump_mem(void *dumpme, int len_bytes)
+{
+ char *c = (void *)dumpme;
+ int i;
+
+ for (i = 0; i < len_bytes; i += sizeof(u64)) {
+ u64 *ptr = (u64 *)(c + i);
+ dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
+ }
+}
+
+/* Failed address bound checks: */
+#ifndef SEGV_BNDERR
+# define SEGV_BNDERR 3
+#endif
+
+#ifndef SEGV_PKUERR
+# define SEGV_PKUERR 4
+#endif
+
+static char *si_code_str(int si_code)
+{
+ if (si_code == SEGV_MAPERR)
+ return "SEGV_MAPERR";
+ if (si_code == SEGV_ACCERR)
+ return "SEGV_ACCERR";
+ if (si_code == SEGV_BNDERR)
+ return "SEGV_BNDERR";
+ if (si_code == SEGV_PKUERR)
+ return "SEGV_PKUERR";
+ return "UNKNOWN";
+}
+
+int pkru_faults;
+int last_si_pkey = -1;
+void signal_handler(int signum, siginfo_t *si, void *vucontext)
+{
+ ucontext_t *uctxt = vucontext;
+ int trapno;
+ unsigned long ip;
+ char *fpregs;
+ u32 *pkru_ptr;
+ u64 siginfo_pkey;
+ u32 *si_pkey_ptr;
+ int pkru_offset;
+ fpregset_t fpregset;
+
+ dprint_in_signal = 1;
+ dprintf1(">>>>===============SIGSEGV============================\n");
+ dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
+ __rdpkru(), shadow_pkru);
+
+ trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
+ ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+ fpregset = uctxt->uc_mcontext.fpregs;
+ fpregs = (void *)fpregset;
+
+ dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
+ trapno, ip, si_code_str(si->si_code), si->si_code);
+#ifdef __i386__
+ /*
+ * 32-bit has some extra padding so that userspace can tell whether
+ * the XSTATE header is present in addition to the "legacy" FPU
+ * state. We just assume that it is here.
+ */
+ fpregs += 0x70;
+#endif
+ pkru_offset = pkru_xstate_offset();
+ pkru_ptr = (void *)(&fpregs[pkru_offset]);
+
+ dprintf1("siginfo: %p\n", si);
+ dprintf1(" fpregs: %p\n", fpregs);
+ /*
+ * If we got a PKRU fault, we *HAVE* to have at least one bit set in
+ * here.
+ */
+ dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
+ if (DEBUG_LEVEL > 4)
+ dump_mem(pkru_ptr - 128, 256);
+ pkey_assert(*pkru_ptr);
+
+ if ((si->si_code == SEGV_MAPERR) ||
+ (si->si_code == SEGV_ACCERR) ||
+ (si->si_code == SEGV_BNDERR)) {
+ printf("non-PK si_code, exiting...\n");
+ exit(4);
+ }
+
+ si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+ dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
+ dump_mem((u8 *)si_pkey_ptr - 8, 24);
+ siginfo_pkey = *si_pkey_ptr;
+ pkey_assert(siginfo_pkey < NR_PKEYS);
+ last_si_pkey = siginfo_pkey;
+
+ dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
+ /* need __rdpkru() version so we do not do shadow_pkru checking */
+ dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
+ dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
+ *(u64 *)pkru_ptr = 0x00000000;
+ dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
+ pkru_faults++;
+ dprintf1("<<<<==================================================\n");
+ dprint_in_signal = 0;
+}
+
+int wait_all_children(void)
+{
+ int status;
+ return waitpid(-1, &status, 0);
+}
+
+void sig_chld(int x)
+{
+ dprint_in_signal = 1;
+ dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
+ dprint_in_signal = 0;
+}
+
+void setup_sigsegv_handler(void)
+{
+ int r, rs;
+ struct sigaction newact;
+ struct sigaction oldact;
+
+ /* #PF is mapped to sigsegv */
+ int signum = SIGSEGV;
+
+ newact.sa_handler = 0;
+ newact.sa_sigaction = signal_handler;
+
+ /*sigset_t - signals to block while in the handler */
+ /* get the old signal mask. */
+ rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
+ pkey_assert(rs == 0);
+
+ /* call sa_sigaction, not sa_handler*/
+ newact.sa_flags = SA_SIGINFO;
+
+ newact.sa_restorer = 0; /* void(*)(), obsolete */
+ r = sigaction(signum, &newact, &oldact);
+ r = sigaction(SIGALRM, &newact, &oldact);
+ pkey_assert(r == 0);
+}
+
+void setup_handlers(void)
+{
+ signal(SIGCHLD, &sig_chld);
+ setup_sigsegv_handler();
+}
+
+pid_t fork_lazy_child(void)
+{
+ pid_t forkret;
+
+ forkret = fork();
+ pkey_assert(forkret >= 0);
+ dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
+
+ if (!forkret) {
+ /* in the child */
+ while (1) {
+ dprintf1("child sleeping...\n");
+ sleep(30);
+ }
+ }
+ return forkret;
+}
+
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE 0x2
+#endif
+
+static u32 hw_pkey_get(int pkey, unsigned long flags)
+{
+ u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+ u32 pkru = __rdpkru();
+ u32 shifted_pkru;
+ u32 masked_pkru;
+
+ dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
+ __func__, pkey, flags, 0, 0);
+ dprintf2("%s() raw pkru: %x\n", __func__, pkru);
+
+ shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
+ dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
+ masked_pkru = shifted_pkru & mask;
+ dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
+ /*
+ * shift down the relevant bits to the lowest two, then
+ * mask off all the other high bits.
+ */
+ return masked_pkru;
+}
+
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
+{
+ u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+ u32 old_pkru = __rdpkru();
+ u32 new_pkru;
+
+ /* make sure that 'rights' only contains the bits we expect: */
+ assert(!(rights & ~mask));
+
+ /* copy old pkru */
+ new_pkru = old_pkru;
+ /* mask out bits from pkey in old value: */
+ new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
+ /* OR in new bits for pkey: */
+ new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
+
+ __wrpkru(new_pkru);
+
+ dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
+ __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
+ return 0;
+}
+
+void pkey_disable_set(int pkey, int flags)
+{
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights;
+ u32 orig_pkru = rdpkru();
+
+ dprintf1("START->%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights |= flags;
+
+ ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
+ assert(!ret);
+ /*pkru and flags have the same format */
+ shadow_pkru |= flags << (pkey * 2);
+ dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
+
+ pkey_assert(ret >= 0);
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ if (flags)
+ pkey_assert(rdpkru() > orig_pkru);
+ dprintf1("END<---%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+}
+
+void pkey_disable_clear(int pkey, int flags)
+{
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ u32 orig_pkru = rdpkru();
+
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights |= flags;
+
+ ret = hw_pkey_set(pkey, pkey_rights, 0);
+ /* pkru and flags have the same format */
+ shadow_pkru &= ~(flags << (pkey * 2));
+ pkey_assert(ret >= 0);
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
+ if (flags)
+ assert(rdpkru() > orig_pkru);
+}
+
+void pkey_write_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_write_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_access_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
+}
+void pkey_access_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
+}
+
+int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int sret;
+
+ dprintf2("%s(0x%p, %zx, prot=%lx, pkey=%lx)\n", __func__,
+ ptr, size, orig_prot, pkey);
+
+ errno = 0;
+ sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
+ if (errno) {
+ dprintf2("SYS_mprotect_key sret: %d\n", sret);
+ dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
+ dprintf2("SYS_mprotect_key failed, errno: %d\n", errno);
+ if (DEBUG_LEVEL >= 2)
+ perror("SYS_mprotect_pkey");
+ }
+ return sret;
+}
+
+int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+{
+ int ret = syscall(SYS_pkey_alloc, flags, init_val);
+ dprintf1("%s(flags=%lx, init_val=%lx) syscall ret: %d errno: %d\n",
+ __func__, flags, init_val, ret, errno);
+ return ret;
+}
+
+int alloc_pkey(void)
+{
+ int ret;
+ unsigned long init_val = 0x0;
+
+ dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
+ __LINE__, __rdpkru(), shadow_pkru);
+ ret = sys_pkey_alloc(0, init_val);
+ /*
+ * pkey_alloc() sets PKRU, so we need to reflect it in
+ * shadow_pkru:
+ */
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ if (ret) {
+ /* clear both the bits: */
+ shadow_pkru &= ~(0x3 << (ret * 2));
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ /*
+ * move the new state in from init_val
+ * (remember, we cheated and init_val == pkru format)
+ */
+ shadow_pkru |= (init_val << (ret * 2));
+ }
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
+ /* for shadow checking: */
+ rdpkru();
+ dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+int sys_pkey_free(unsigned long pkey)
+{
+ int ret = syscall(SYS_pkey_free, pkey);
+ dprintf1("%s(pkey=%ld) syscall ret: %d\n", __func__, pkey, ret);
+ return ret;
+}
+
+/*
+ * I had a bug where pkey bits could be set by mprotect() but
+ * not cleared. This ensures we get lots of random bit sets
+ * and clears on the vma and pte pkey bits.
+ */
+int alloc_random_pkey(void)
+{
+ int max_nr_pkey_allocs;
+ int ret;
+ int i;
+ int alloced_pkeys[NR_PKEYS];
+ int nr_alloced = 0;
+ int random_index;
+ memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
+
+ /* allocate every possible key and make a note of which ones we got */
+ max_nr_pkey_allocs = NR_PKEYS;
+ for (i = 0; i < max_nr_pkey_allocs; i++) {
+ int new_pkey = alloc_pkey();
+ if (new_pkey < 0)
+ break;
+ alloced_pkeys[nr_alloced++] = new_pkey;
+ }
+
+ pkey_assert(nr_alloced > 0);
+ /* select a random one out of the allocated ones */
+ random_index = rand() % nr_alloced;
+ ret = alloced_pkeys[random_index];
+ /* now zero it out so we don't free it next */
+ alloced_pkeys[random_index] = 0;
+
+ /* go through the allocated ones that we did not want and free them */
+ for (i = 0; i < nr_alloced; i++) {
+ int free_ret;
+ if (!alloced_pkeys[i])
+ continue;
+ free_ret = sys_pkey_free(alloced_pkeys[i]);
+ pkey_assert(!free_ret);
+ }
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int nr_iterations = random() % 100;
+ int ret;
+
+ while (0) {
+ int rpkey = alloc_random_pkey();
+ ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
+ dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
+ ptr, size, orig_prot, pkey, ret);
+ if (nr_iterations-- < 0)
+ break;
+
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ sys_pkey_free(rpkey);
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ }
+ pkey_assert(pkey < NR_PKEYS);
+
+ ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
+ dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
+ ptr, size, orig_prot, pkey, ret);
+ pkey_assert(!ret);
+ dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, ret, __rdpkru(), shadow_pkru);
+ return ret;
+}
+
+struct pkey_malloc_record {
+ void *ptr;
+ long size;
+ int prot;
+};
+struct pkey_malloc_record *pkey_malloc_records;
+struct pkey_malloc_record *pkey_last_malloc_record;
+long nr_pkey_malloc_records;
+void record_pkey_malloc(void *ptr, long size, int prot)
+{
+ long i;
+ struct pkey_malloc_record *rec = NULL;
+
+ for (i = 0; i < nr_pkey_malloc_records; i++) {
+ rec = &pkey_malloc_records[i];
+ /* find a free record */
+ if (rec)
+ break;
+ }
+ if (!rec) {
+ /* every record is full */
+ size_t old_nr_records = nr_pkey_malloc_records;
+ size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
+ size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
+ dprintf2("new_nr_records: %zd\n", new_nr_records);
+ dprintf2("new_size: %zd\n", new_size);
+ pkey_malloc_records = realloc(pkey_malloc_records, new_size);
+ pkey_assert(pkey_malloc_records != NULL);
+ rec = &pkey_malloc_records[nr_pkey_malloc_records];
+ /*
+ * realloc() does not initialize memory, so zero it from
+ * the first new record all the way to the end.
+ */
+ for (i = 0; i < new_nr_records - old_nr_records; i++)
+ memset(rec + i, 0, sizeof(*rec));
+ }
+ dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
+ (int)(rec - pkey_malloc_records), rec, ptr, size);
+ rec->ptr = ptr;
+ rec->size = size;
+ rec->prot = prot;
+ pkey_last_malloc_record = rec;
+ nr_pkey_malloc_records++;
+}
+
+void free_pkey_malloc(void *ptr)
+{
+ long i;
+ int ret;
+ dprintf3("%s(%p)\n", __func__, ptr);
+ for (i = 0; i < nr_pkey_malloc_records; i++) {
+ struct pkey_malloc_record *rec = &pkey_malloc_records[i];
+ dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
+ ptr, i, rec, rec->ptr, rec->size);
+ if ((ptr < rec->ptr) ||
+ (ptr >= rec->ptr + rec->size))
+ continue;
+
+ dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
+ ptr, i, rec, rec->ptr, rec->size);
+ nr_pkey_malloc_records--;
+ ret = munmap(rec->ptr, rec->size);
+ dprintf3("munmap ret: %d\n", ret);
+ pkey_assert(!ret);
+ dprintf3("clearing rec->ptr, rec: %p\n", rec);
+ rec->ptr = NULL;
+ dprintf3("done clearing rec->ptr, rec: %p\n", rec);
+ return;
+ }
+ pkey_assert(false);
+}
+
+
+void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int ret;
+
+ rdpkru();
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ pkey_assert(pkey < NR_PKEYS);
+ ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
+ pkey_assert(!ret);
+ record_pkey_malloc(ptr, size, prot);
+ rdpkru();
+
+ dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
+ return ptr;
+}
+
+void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
+{
+ int ret;
+ void *ptr;
+
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ /*
+ * Guarantee we can fit at least one huge page in the resulting
+ * allocation by allocating space for 2:
+ */
+ size = ALIGN_UP(size, HPAGE_SIZE * 2);
+ ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ record_pkey_malloc(ptr, size, prot);
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ dprintf1("unaligned ptr: %p\n", ptr);
+ ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
+ dprintf1(" aligned ptr: %p\n", ptr);
+ ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
+ dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
+ ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
+ dprintf1("MADV_WILLNEED ret: %d\n", ret);
+ memset(ptr, 0, HPAGE_SIZE);
+
+ dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
+ return ptr;
+}
+
+int hugetlb_setup_ok;
+#define GET_NR_HUGE_PAGES 10
+void setup_hugetlbfs(void)
+{
+ int err;
+ int fd;
+ char buf[] = "123";
+
+ if (geteuid() != 0) {
+ fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
+ return;
+ }
+
+ cat_into_file(__stringify(GET_NR_HUGE_PAGES), "/proc/sys/vm/nr_hugepages");
+
+ /*
+ * Now go make sure that we got the pages and that they
+ * are 2M pages. Someone might have made 1G the default.
+ */
+ fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
+ if (fd < 0) {
+ perror("opening sysfs 2M hugetlb config");
+ return;
+ }
+
+ /* -1 to guarantee leaving the trailing \0 */
+ err = read(fd, buf, sizeof(buf)-1);
+ close(fd);
+ if (err <= 0) {
+ perror("reading sysfs 2M hugetlb config");
+ return;
+ }
+
+ if (atoi(buf) != GET_NR_HUGE_PAGES) {
+ fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
+ buf, GET_NR_HUGE_PAGES);
+ return;
+ }
+
+ hugetlb_setup_ok = 1;
+}
+
+void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
+
+ if (!hugetlb_setup_ok)
+ return PTR_ERR_ENOTSUP;
+
+ dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
+ size = ALIGN_UP(size, HPAGE_SIZE * 2);
+ pkey_assert(pkey < NR_PKEYS);
+ ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ record_pkey_malloc(ptr, size, prot);
+
+ dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
+ return ptr;
+}
+
+void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int fd;
+
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ pkey_assert(pkey < NR_PKEYS);
+ fd = open("/dax/foo", O_RDWR);
+ pkey_assert(fd >= 0);
+
+ ptr = mmap(0, size, prot, MAP_SHARED, fd, 0);
+ pkey_assert(ptr != (void *)-1);
+
+ mprotect_pkey(ptr, size, prot, pkey);
+
+ record_pkey_malloc(ptr, size, prot);
+
+ dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr);
+ close(fd);
+ return ptr;
+}
+
+void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
+
+ malloc_pkey_with_mprotect,
+ malloc_pkey_anon_huge,
+ malloc_pkey_hugetlb
+/* can not do direct with the pkey_mprotect() API:
+ malloc_pkey_mmap_direct,
+ malloc_pkey_mmap_dax,
+*/
+};
+
+void *malloc_pkey(long size, int prot, u16 pkey)
+{
+ void *ret;
+ static int malloc_type;
+ int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
+
+ pkey_assert(pkey < NR_PKEYS);
+
+ while (1) {
+ pkey_assert(malloc_type < nr_malloc_types);
+
+ ret = pkey_malloc[malloc_type](size, prot, pkey);
+ pkey_assert(ret != (void *)-1);
+
+ malloc_type++;
+ if (malloc_type >= nr_malloc_types)
+ malloc_type = (random()%nr_malloc_types);
+
+ /* try again if the malloc_type we tried is unsupported */
+ if (ret == PTR_ERR_ENOTSUP)
+ continue;
+
+ break;
+ }
+
+ dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
+ size, prot, pkey, ret);
+ return ret;
+}
+
+int last_pkru_faults;
+#define UNKNOWN_PKEY -2
+void expected_pk_fault(int pkey)
+{
+ dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
+ __func__, last_pkru_faults, pkru_faults);
+ dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
+ pkey_assert(last_pkru_faults + 1 == pkru_faults);
+
+ /*
+ * For exec-only memory, we do not know the pkey in
+ * advance, so skip this check.
+ */
+ if (pkey != UNKNOWN_PKEY)
+ pkey_assert(last_si_pkey == pkey);
+
+ /*
+ * The signal handler shold have cleared out PKRU to let the
+ * test program continue. We now have to restore it.
+ */
+ if (__rdpkru() != 0)
+ pkey_assert(0);
+
+ __wrpkru(shadow_pkru);
+ dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
+ __func__, shadow_pkru);
+ last_pkru_faults = pkru_faults;
+ last_si_pkey = -1;
+}
+
+#define do_not_expect_pk_fault(msg) do { \
+ if (last_pkru_faults != pkru_faults) \
+ dprintf0("unexpected PK fault: %s\n", msg); \
+ pkey_assert(last_pkru_faults == pkru_faults); \
+} while (0)
+
+int test_fds[10] = { -1 };
+int nr_test_fds;
+void __save_test_fd(int fd)
+{
+ pkey_assert(fd >= 0);
+ pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
+ test_fds[nr_test_fds] = fd;
+ nr_test_fds++;
+}
+
+int get_test_read_fd(void)
+{
+ int test_fd = open("/etc/passwd", O_RDONLY);
+ __save_test_fd(test_fd);
+ return test_fd;
+}
+
+void close_test_fds(void)
+{
+ int i;
+
+ for (i = 0; i < nr_test_fds; i++) {
+ if (test_fds[i] < 0)
+ continue;
+ close(test_fds[i]);
+ test_fds[i] = -1;
+ }
+ nr_test_fds = 0;
+}
+
+#define barrier() __asm__ __volatile__("": : :"memory")
+__attribute__((noinline)) int read_ptr(int *ptr)
+{
+ /*
+ * Keep GCC from optimizing this away somehow
+ */
+ barrier();
+ return *ptr;
+}
+
+void test_read_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ptr_contents;
+
+ dprintf1("disabling write access to PKEY[1], doing read\n");
+ pkey_write_deny(pkey);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("*ptr: %d\n", ptr_contents);
+ dprintf1("\n");
+}
+void test_read_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int ptr_contents;
+
+ dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
+ rdpkru();
+ pkey_access_deny(pkey);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("*ptr: %d\n", ptr_contents);
+ expected_pk_fault(pkey);
+}
+void test_write_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
+ pkey_write_deny(pkey);
+ *ptr = __LINE__;
+ expected_pk_fault(pkey);
+}
+void test_write_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
+ pkey_access_deny(pkey);
+ *ptr = __LINE__;
+ expected_pk_fault(pkey);
+}
+void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int ret;
+ int test_fd = get_test_read_fd();
+
+ dprintf1("disabling access to PKEY[%02d], "
+ "having kernel read() to buffer\n", pkey);
+ pkey_access_deny(pkey);
+ ret = read(test_fd, ptr, 1);
+ dprintf1("read ret: %d\n", ret);
+ pkey_assert(ret);
+}
+void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ret;
+ int test_fd = get_test_read_fd();
+
+ pkey_write_deny(pkey);
+ ret = read(test_fd, ptr, 100);
+ dprintf1("read ret: %d\n", ret);
+ if (ret < 0 && (DEBUG_LEVEL > 0))
+ perror("verbose read result (OK for this to be bad)");
+ pkey_assert(ret);
+}
+
+void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
+{
+ int pipe_ret, vmsplice_ret;
+ struct iovec iov;
+ int pipe_fds[2];
+
+ pipe_ret = pipe(pipe_fds);
+
+ pkey_assert(pipe_ret == 0);
+ dprintf1("disabling access to PKEY[%02d], "
+ "having kernel vmsplice from buffer\n", pkey);
+ pkey_access_deny(pkey);
+ iov.iov_base = ptr;
+ iov.iov_len = PAGE_SIZE;
+ vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
+ dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
+ pkey_assert(vmsplice_ret == -1);
+
+ close(pipe_fds[0]);
+ close(pipe_fds[1]);
+}
+
+void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
+{
+ int ignored = 0xdada;
+ int futex_ret;
+ int some_int = __LINE__;
+
+ dprintf1("disabling write to PKEY[%02d], "
+ "doing futex gunk in buffer\n", pkey);
+ *ptr = some_int;
+ pkey_write_deny(pkey);
+ futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
+ &ignored, ignored);
+ if (DEBUG_LEVEL > 0)
+ perror("futex");
+ dprintf1("futex() ret: %d\n", futex_ret);
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
+{
+ int err;
+ int i;
+
+ /* Note: 0 is the default pkey, so don't mess with it */
+ for (i = 1; i < NR_PKEYS; i++) {
+ if (pkey == i)
+ continue;
+
+ dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
+ err = sys_pkey_free(i);
+ pkey_assert(err);
+
+ err = sys_pkey_free(i);
+ pkey_assert(err);
+
+ err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
+ pkey_assert(err);
+ }
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
+{
+ int err;
+ int bad_pkey = NR_PKEYS+99;
+
+ /* pass a known-invalid pkey in: */
+ err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
+ pkey_assert(err);
+}
+
+void become_child(void)
+{
+ pid_t forkret;
+
+ forkret = fork();
+ pkey_assert(forkret >= 0);
+ dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
+
+ if (!forkret) {
+ /* in the child */
+ return;
+ }
+ exit(0);
+}
+
+/* Assumes that all pkeys other than 'pkey' are unallocated */
+void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
+{
+ int err;
+ int allocated_pkeys[NR_PKEYS] = {0};
+ int nr_allocated_pkeys = 0;
+ int i;
+
+ for (i = 0; i < NR_PKEYS*3; i++) {
+ int new_pkey;
+ dprintf1("%s() alloc loop: %d\n", __func__, i);
+ new_pkey = alloc_pkey();
+ dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
+ __LINE__, err, __rdpkru(), shadow_pkru);
+ rdpkru(); /* for shadow checking */
+ dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
+ if ((new_pkey == -1) && (errno == ENOSPC)) {
+ dprintf2("%s() failed to allocate pkey after %d tries\n",
+ __func__, nr_allocated_pkeys);
+ } else {
+ /*
+ * Ensure the number of successes never
+ * exceeds the number of keys supported
+ * in the hardware.
+ */
+ pkey_assert(nr_allocated_pkeys < NR_PKEYS);
+ allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
+ }
+
+ /*
+ * Make sure that allocation state is properly
+ * preserved across fork().
+ */
+ if (i == NR_PKEYS*2)
+ become_child();
+ }
+
+ dprintf3("%s()::%d\n", __func__, __LINE__);
+
+ /*
+ * There are 16 pkeys supported in hardware. Three are
+ * allocated by the time we get here:
+ * 1. The default key (0)
+ * 2. One possibly consumed by an execute-only mapping.
+ * 3. One allocated by the test code and passed in via
+ * 'pkey' to this function.
+ * Ensure that we can allocate at least another 13 (16-3).
+ */
+ pkey_assert(i >= NR_PKEYS-3);
+
+ for (i = 0; i < nr_allocated_pkeys; i++) {
+ err = sys_pkey_free(allocated_pkeys[i]);
+ pkey_assert(!err);
+ rdpkru(); /* for shadow checking */
+ }
+}
+
+/*
+ * pkey 0 is special. It is allocated by default, so you do not
+ * have to call pkey_alloc() to use it first. Make sure that it
+ * is usable.
+ */
+void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
+{
+ long size;
+ int prot;
+
+ assert(pkey_last_malloc_record);
+ size = pkey_last_malloc_record->size;
+ /*
+ * This is a bit of a hack. But mprotect() requires
+ * huge-page-aligned sizes when operating on hugetlbfs.
+ * So, make sure that we use something that's a multiple
+ * of a huge page when we can.
+ */
+ if (size >= HPAGE_SIZE)
+ size = HPAGE_SIZE;
+ prot = pkey_last_malloc_record->prot;
+
+ /* Use pkey 0 */
+ mprotect_pkey(ptr, size, prot, 0);
+
+ /* Make sure that we can set it back to the original pkey. */
+ mprotect_pkey(ptr, size, prot, pkey);
+}
+
+void test_ptrace_of_child(int *ptr, u16 pkey)
+{
+ __attribute__((__unused__)) int peek_result;
+ pid_t child_pid;
+ void *ignored = 0;
+ long ret;
+ int status;
+ /*
+ * This is the "control" for our little expermient. Make sure
+ * we can always access it when ptracing.
+ */
+ int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
+ int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
+
+ /*
+ * Fork a child which is an exact copy of this process, of course.
+ * That means we can do all of our tests via ptrace() and then plain
+ * memory access and ensure they work differently.
+ */
+ child_pid = fork_lazy_child();
+ dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
+
+ ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
+ if (ret)
+ perror("attach");
+ dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
+ pkey_assert(ret != -1);
+ ret = waitpid(child_pid, &status, WUNTRACED);
+ if ((ret != child_pid) || !(WIFSTOPPED(status))) {
+ fprintf(stderr, "weird waitpid result %ld stat %x\n",
+ ret, status);
+ pkey_assert(0);
+ }
+ dprintf2("waitpid ret: %ld\n", ret);
+ dprintf2("waitpid status: %d\n", status);
+
+ pkey_access_deny(pkey);
+ pkey_write_deny(pkey);
+
+ /* Write access, untested for now:
+ ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
+ pkey_assert(ret != -1);
+ dprintf1("poke at %p: %ld\n", peek_at, ret);
+ */
+
+ /*
+ * Try to access the pkey-protected "ptr" via ptrace:
+ */
+ ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
+ /* expect it to work, without an error: */
+ pkey_assert(ret != -1);
+ /* Now access from the current task, and expect an exception: */
+ peek_result = read_ptr(ptr);
+ expected_pk_fault(pkey);
+
+ /*
+ * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
+ */
+ ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
+ /* expect it to work, without an error: */
+ pkey_assert(ret != -1);
+ /* Now access from the current task, and expect NO exception: */
+ peek_result = read_ptr(plain_ptr);
+ do_not_expect_pk_fault("read plain pointer after ptrace");
+
+ ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
+ pkey_assert(ret != -1);
+
+ ret = kill(child_pid, SIGKILL);
+ pkey_assert(ret != -1);
+
+ wait(&status);
+
+ free(plain_ptr_unaligned);
+}
+
+void *get_pointer_to_instructions(void)
+{
+ void *p1;
+
+ p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
+ dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
+ /* lots_o_noops_around_write should be page-aligned already */
+ assert(p1 == &lots_o_noops_around_write);
+
+ /* Point 'p1' at the *second* page of the function: */
+ p1 += PAGE_SIZE;
+
+ /*
+ * Try to ensure we fault this in on next touch to ensure
+ * we get an instruction fault as opposed to a data one
+ */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+
+ return p1;
+}
+
+void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
+{
+ void *p1;
+ int scratch;
+ int ptr_contents;
+ int ret;
+
+ p1 = get_pointer_to_instructions();
+ lots_o_noops_around_write(&scratch);
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+ ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
+ pkey_assert(!ret);
+ pkey_access_deny(pkey);
+
+ dprintf2("pkru: %x\n", rdpkru());
+
+ /*
+ * Make sure this is an *instruction* fault
+ */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+ lots_o_noops_around_write(&scratch);
+ do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pk_fault(pkey);
+}
+
+void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
+{
+ void *p1;
+ int scratch;
+ int ptr_contents;
+ int ret;
+
+ dprintf1("%s() start\n", __func__);
+
+ p1 = get_pointer_to_instructions();
+ lots_o_noops_around_write(&scratch);
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+
+ /* Use a *normal* mprotect(), not mprotect_pkey(): */
+ ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
+ pkey_assert(!ret);
+
+ dprintf2("pkru: %x\n", rdpkru());
+
+ /* Make sure this is an *instruction* fault */
+ madvise(p1, PAGE_SIZE, MADV_DONTNEED);
+ lots_o_noops_around_write(&scratch);
+ do_not_expect_pk_fault("executing on PROT_EXEC memory");
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pk_fault(UNKNOWN_PKEY);
+
+ /*
+ * Put the memory back to non-PROT_EXEC. Should clear the
+ * exec-only pkey off the VMA and allow it to be readable
+ * again. Go to PROT_NONE first to check for a kernel bug
+ * that did not clear the pkey when doing PROT_NONE.
+ */
+ ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
+ pkey_assert(!ret);
+
+ ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
+ pkey_assert(!ret);
+ ptr_contents = read_ptr(p1);
+ do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+}
+
+void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
+{
+ int size = PAGE_SIZE;
+ int sret;
+
+ if (cpu_has_pku()) {
+ dprintf1("SKIP: %s: no CPU support\n", __func__);
+ return;
+ }
+
+ sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
+ pkey_assert(sret < 0);
+}
+
+void (*pkey_tests[])(int *ptr, u16 pkey) = {
+ test_read_of_write_disabled_region,
+ test_read_of_access_disabled_region,
+ test_write_of_write_disabled_region,
+ test_write_of_access_disabled_region,
+ test_kernel_write_of_access_disabled_region,
+ test_kernel_write_of_write_disabled_region,
+ test_kernel_gup_of_access_disabled_region,
+ test_kernel_gup_write_to_write_disabled_region,
+ test_executing_on_unreadable_memory,
+ test_implicit_mprotect_exec_only_memory,
+ test_mprotect_with_pkey_0,
+ test_ptrace_of_child,
+ test_pkey_syscalls_on_non_allocated_pkey,
+ test_pkey_syscalls_bad_args,
+ test_pkey_alloc_exhaust,
+};
+
+void run_tests_once(void)
+{
+ int *ptr;
+ int prot = PROT_READ|PROT_WRITE;
+
+ for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
+ int pkey;
+ int orig_pkru_faults = pkru_faults;
+
+ dprintf1("======================\n");
+ dprintf1("test %d preparing...\n", test_nr);
+
+ tracing_on();
+ pkey = alloc_random_pkey();
+ dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
+ ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
+ dprintf1("test %d starting...\n", test_nr);
+ pkey_tests[test_nr](ptr, pkey);
+ dprintf1("freeing test memory: %p\n", ptr);
+ free_pkey_malloc(ptr);
+ sys_pkey_free(pkey);
+
+ dprintf1("pkru_faults: %d\n", pkru_faults);
+ dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
+
+ tracing_off();
+ close_test_fds();
+
+ printf("test %2d PASSED (iteration %d)\n", test_nr, iteration_nr);
+ dprintf1("======================\n\n");
+ }
+ iteration_nr++;
+}
+
+void pkey_setup_shadow(void)
+{
+ shadow_pkru = __rdpkru();
+}
+
+int main(void)
+{
+ int nr_iterations = 22;
+
+ srand((unsigned int)time(NULL));
+
+ setup_handlers();
+
+ printf("has pku: %d\n", cpu_has_pku());
+
+ if (!cpu_has_pku()) {
+ int size = PAGE_SIZE;
+ int *ptr;
+
+ printf("running PKEY tests for unsupported CPU/OS\n");
+
+ ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ assert(ptr != (void *)-1);
+ test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
+ exit(0);
+ }
+
+ pkey_setup_shadow();
+ printf("startup pkru: %x\n", rdpkru());
+ setup_hugetlbfs();
+
+ while (nr_iterations-- > 0)
+ run_tests_once();
+
+ printf("done (all tests OK)\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
new file mode 100644
index 000000000..12aaa0631
--- /dev/null
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -0,0 +1,430 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <asm/ptrace-abi.h>
+#include <sys/auxv.h>
+
+/* Bitness-agnostic defines for user_regs_struct fields. */
+#ifdef __x86_64__
+# define user_syscall_nr orig_rax
+# define user_arg0 rdi
+# define user_arg1 rsi
+# define user_arg2 rdx
+# define user_arg3 r10
+# define user_arg4 r8
+# define user_arg5 r9
+# define user_ip rip
+# define user_ax rax
+#else
+# define user_syscall_nr orig_eax
+# define user_arg0 ebx
+# define user_arg1 ecx
+# define user_arg2 edx
+# define user_arg3 esi
+# define user_arg4 edi
+# define user_arg5 ebp
+# define user_ip eip
+# define user_ax eax
+#endif
+
+static int nerrs = 0;
+
+struct syscall_args32 {
+ uint32_t nr, arg0, arg1, arg2, arg3, arg4, arg5;
+};
+
+#ifdef __i386__
+extern void sys32_helper(struct syscall_args32 *, void *);
+extern void int80_and_ret(void);
+#endif
+
+/*
+ * Helper to invoke int80 with controlled regs and capture the final regs.
+ */
+static void do_full_int80(struct syscall_args32 *args)
+{
+#ifdef __x86_64__
+ register unsigned long bp asm("bp") = args->arg5;
+ asm volatile ("int $0x80"
+ : "+a" (args->nr),
+ "+b" (args->arg0), "+c" (args->arg1), "+d" (args->arg2),
+ "+S" (args->arg3), "+D" (args->arg4), "+r" (bp)
+ : : "r8", "r9", "r10", "r11");
+ args->arg5 = bp;
+#else
+ sys32_helper(args, int80_and_ret);
+#endif
+}
+
+#ifdef __i386__
+static void (*vsyscall32)(void);
+
+/*
+ * Nasty helper to invoke AT_SYSINFO (i.e. __kernel_vsyscall) with
+ * controlled regs and capture the final regs. This is so nasty that it
+ * crashes my copy of gdb :)
+ */
+static void do_full_vsyscall32(struct syscall_args32 *args)
+{
+ sys32_helper(args, vsyscall32);
+}
+#endif
+
+static siginfo_t wait_trap(pid_t chld)
+{
+ siginfo_t si;
+ if (waitid(P_PID, chld, &si, WEXITED|WSTOPPED) != 0)
+ err(1, "waitid");
+ if (si.si_pid != chld)
+ errx(1, "got unexpected pid in event\n");
+ if (si.si_code != CLD_TRAPPED)
+ errx(1, "got unexpected event type %d\n", si.si_code);
+ return si;
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void setsigign(int sig, int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = (void *)SIG_IGN;
+ sa.sa_flags = flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+#ifdef __x86_64__
+# define REG_BP REG_RBP
+#else
+# define REG_BP REG_EBP
+#endif
+
+static void empty_handler(int sig, siginfo_t *si, void *ctx_void)
+{
+}
+
+static void test_sys32_regs(void (*do_syscall)(struct syscall_args32 *))
+{
+ struct syscall_args32 args = {
+ .nr = 224, /* gettid */
+ .arg0 = 10, .arg1 = 11, .arg2 = 12,
+ .arg3 = 13, .arg4 = 14, .arg5 = 15,
+ };
+
+ do_syscall(&args);
+
+ if (args.nr != getpid() ||
+ args.arg0 != 10 || args.arg1 != 11 || args.arg2 != 12 ||
+ args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
+ printf("[FAIL]\tgetpid() failed to preserve regs\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tgetpid() preserves regs\n");
+ }
+
+ sethandler(SIGUSR1, empty_handler, 0);
+
+ args.nr = 37; /* kill */
+ args.arg0 = getpid();
+ args.arg1 = SIGUSR1;
+ do_syscall(&args);
+ if (args.nr != 0 ||
+ args.arg0 != getpid() || args.arg1 != SIGUSR1 || args.arg2 != 12 ||
+ args.arg3 != 13 || args.arg4 != 14 || args.arg5 != 15) {
+ printf("[FAIL]\tkill(getpid(), SIGUSR1) failed to preserve regs\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tkill(getpid(), SIGUSR1) preserves regs\n");
+ }
+ clearhandler(SIGUSR1);
+}
+
+static void test_ptrace_syscall_restart(void)
+{
+ printf("[RUN]\tptrace-induced syscall restart\n");
+ pid_t chld = fork();
+ if (chld < 0)
+ err(1, "fork");
+
+ if (chld == 0) {
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tChild will make one syscall\n");
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ syscall(SYS_gettid, 10, 11, 12, 13, 14, 15);
+ _exit(0);
+ }
+
+ int status;
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ struct user_regs_struct regs;
+
+ printf("[RUN]\tSYSEMU\n");
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSEMU");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_gettid ||
+ regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+ regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+ regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+ printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tInitial nr and args are correct\n");
+ }
+
+ printf("[RUN]\tRestart the syscall (ip = 0x%lx)\n",
+ (unsigned long)regs.user_ip);
+
+ /*
+ * This does exactly what it appears to do if syscall is int80 or
+ * SYSCALL64. For SYSCALL32 or SYSENTER, though, this is highly
+ * magical. It needs to work so that ptrace and syscall restart
+ * work as expected.
+ */
+ regs.user_ax = regs.user_syscall_nr;
+ regs.user_ip -= 2;
+ if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_SETREGS");
+
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSEMU");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_gettid ||
+ regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+ regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+ regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+ printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tRestarted nr and args are correct\n");
+ }
+
+ printf("[RUN]\tChange nr and args and restart the syscall (ip = 0x%lx)\n",
+ (unsigned long)regs.user_ip);
+
+ regs.user_ax = SYS_getpid;
+ regs.user_arg0 = 20;
+ regs.user_arg1 = 21;
+ regs.user_arg2 = 22;
+ regs.user_arg3 = 23;
+ regs.user_arg4 = 24;
+ regs.user_arg5 = 25;
+ regs.user_ip -= 2;
+
+ if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_SETREGS");
+
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSEMU");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_getpid ||
+ regs.user_arg0 != 20 || regs.user_arg1 != 21 || regs.user_arg2 != 22 ||
+ regs.user_arg3 != 23 || regs.user_arg4 != 24 || regs.user_arg5 != 25) {
+ printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tReplacement nr and args are correct\n");
+ }
+
+ if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+ err(1, "PTRACE_CONT");
+ if (waitpid(chld, &status, 0) != chld)
+ err(1, "waitpid");
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild exited cleanly\n");
+ }
+}
+
+static void test_restart_under_ptrace(void)
+{
+ printf("[RUN]\tkernel syscall restart under ptrace\n");
+ pid_t chld = fork();
+ if (chld < 0)
+ err(1, "fork");
+
+ if (chld == 0) {
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tChild will take a nap until signaled\n");
+ setsigign(SIGUSR1, SA_RESTART);
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ syscall(SYS_pause, 0, 0, 0, 0, 0, 0);
+ _exit(0);
+ }
+
+ int status;
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ struct user_regs_struct regs;
+
+ printf("[RUN]\tSYSCALL\n");
+ if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSCALL");
+ wait_trap(chld);
+
+ /* We should be stopped at pause(2) entry. */
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_pause ||
+ regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+ regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+ regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+ printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tInitial nr and args are correct\n");
+ }
+
+ /* Interrupt it. */
+ kill(chld, SIGUSR1);
+
+ /* Advance. We should be stopped at exit. */
+ printf("[RUN]\tSYSCALL\n");
+ if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSCALL");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_pause ||
+ regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+ regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+ regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+ printf("[FAIL]\tArgs after SIGUSR1 are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tArgs after SIGUSR1 are correct (ax = %ld)\n",
+ (long)regs.user_ax);
+ }
+
+ /* Poke the regs back in. This must not break anything. */
+ if (ptrace(PTRACE_SETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_SETREGS");
+
+ /* Catch the (ignored) SIGUSR1. */
+ if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+ err(1, "PTRACE_CONT");
+ if (waitpid(chld, &status, 0) != chld)
+ err(1, "waitpid");
+ if (!WIFSTOPPED(status)) {
+ printf("[FAIL]\tChild was stopped for SIGUSR1 (status = 0x%x)\n", status);
+ nerrs++;
+ } else {
+ printf("[OK]\tChild got SIGUSR1\n");
+ }
+
+ /* The next event should be pause(2) again. */
+ printf("[RUN]\tStep again\n");
+ if (ptrace(PTRACE_SYSCALL, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSCALL");
+ wait_trap(chld);
+
+ /* We should be stopped at pause(2) entry. */
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_pause ||
+ regs.user_arg0 != 0 || regs.user_arg1 != 0 ||
+ regs.user_arg2 != 0 || regs.user_arg3 != 0 ||
+ regs.user_arg4 != 0 || regs.user_arg5 != 0) {
+ printf("[FAIL]\tpause did not restart (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n", (unsigned long)regs.user_syscall_nr, (unsigned long)regs.user_arg0, (unsigned long)regs.user_arg1, (unsigned long)regs.user_arg2, (unsigned long)regs.user_arg3, (unsigned long)regs.user_arg4, (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tpause(2) restarted correctly\n");
+ }
+
+ /* Kill it. */
+ kill(chld, SIGKILL);
+ if (waitpid(chld, &status, 0) != chld)
+ err(1, "waitpid");
+}
+
+int main()
+{
+ printf("[RUN]\tCheck int80 return regs\n");
+ test_sys32_regs(do_full_int80);
+
+#if defined(__i386__) && (!defined(__GLIBC__) || __GLIBC__ > 2 || __GLIBC_MINOR__ >= 16)
+ vsyscall32 = (void *)getauxval(AT_SYSINFO);
+ if (vsyscall32) {
+ printf("[RUN]\tCheck AT_SYSINFO return regs\n");
+ test_sys32_regs(do_full_vsyscall32);
+ } else {
+ printf("[SKIP]\tAT_SYSINFO is not available\n");
+ }
+#endif
+
+ test_ptrace_syscall_restart();
+
+ test_restart_under_ptrace();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/raw_syscall_helper_32.S b/tools/testing/selftests/x86/raw_syscall_helper_32.S
new file mode 100644
index 000000000..94410fa2b
--- /dev/null
+++ b/tools/testing/selftests/x86/raw_syscall_helper_32.S
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.global sys32_helper
+sys32_helper:
+ /* Args: syscall_args_32*, function pointer */
+ pushl %ebp
+ pushl %ebx
+ pushl %esi
+ pushl %edi
+ movl 5*4(%esp), %eax /* pointer to args struct */
+
+ movl 1*4(%eax), %ebx
+ movl 2*4(%eax), %ecx
+ movl 3*4(%eax), %edx
+ movl 4*4(%eax), %esi
+ movl 5*4(%eax), %edi
+ movl 6*4(%eax), %ebp
+ movl 0*4(%eax), %eax
+
+ call *(6*4)(%esp) /* Do the syscall */
+
+ /* Now we need to recover without losing any reg values */
+ pushl %eax
+ movl 6*4(%esp), %eax
+ popl 0*4(%eax)
+ movl %ebx, 1*4(%eax)
+ movl %ecx, 2*4(%eax)
+ movl %edx, 3*4(%eax)
+ movl %esi, 4*4(%eax)
+ movl %edi, 5*4(%eax)
+ movl %ebp, 6*4(%eax)
+
+ popl %edi
+ popl %esi
+ popl %ebx
+ popl %ebp
+ ret
+
+ .type sys32_helper, @function
+ .size sys32_helper, .-sys32_helper
+
+.global int80_and_ret
+int80_and_ret:
+ int $0x80
+ ret
+
+ .type int80_and_ret, @function
+ .size int80_and_ret, .-int80_and_ret
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
new file mode 100644
index 000000000..4d9dc3f2f
--- /dev/null
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -0,0 +1,871 @@
+/*
+ * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This is a series of tests that exercises the sigreturn(2) syscall and
+ * the IRET / SYSRET paths in the kernel.
+ *
+ * For now, this focuses on the effects of unusual CS and SS values,
+ * and it has a bunch of tests to make sure that ESP/RSP is restored
+ * properly.
+ *
+ * The basic idea behind these tests is to raise(SIGUSR1) to create a
+ * sigcontext frame, plug in the values to be tested, and then return,
+ * which implicitly invokes sigreturn(2) and programs the user context
+ * as desired.
+ *
+ * For tests for which we expect sigreturn and the subsequent return to
+ * user mode to succeed, we return to a short trampoline that generates
+ * SIGTRAP so that the meat of the tests can be ordinary C code in a
+ * SIGTRAP handler.
+ *
+ * The inner workings of each test is documented below.
+ *
+ * Do not run on outdated, unpatched kernels at risk of nasty crashes.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <asm/ldt.h>
+#include <err.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+/* Pull in AR_xyz defines. */
+typedef unsigned int u32;
+typedef unsigned short u16;
+#include "../../../../arch/x86/include/asm/desc_defs.h"
+
+/*
+ * Copied from asm/ucontext.h, as asm/ucontext.h conflicts badly with the glibc
+ * headers.
+ */
+#ifdef __x86_64__
+/*
+ * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext. All kernels that set
+ * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
+ * regardless of SS (i.e. they implement espfix).
+ *
+ * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
+ * when delivering a signal that came from 64-bit code.
+ *
+ * Sigreturn restores SS as follows:
+ *
+ * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
+ * saved CS is not 64-bit)
+ * new SS = saved SS (will fail IRET and signal if invalid)
+ * else
+ * new SS = a flat 32-bit data segment
+ */
+#define UC_SIGCONTEXT_SS 0x2
+#define UC_STRICT_RESTORE_SS 0x4
+#endif
+
+/*
+ * In principle, this test can run on Linux emulation layers (e.g.
+ * Illumos "LX branded zones"). Solaris-based kernels reserve LDT
+ * entries 0-5 for their own internal purposes, so start our LDT
+ * allocations above that reservation. (The tests don't pass on LX
+ * branded zones, but at least this lets them run.)
+ */
+#define LDT_OFFSET 6
+
+/* An aligned stack accessible through some of our segments. */
+static unsigned char stack16[65536] __attribute__((aligned(4096)));
+
+/*
+ * An aligned int3 instruction used as a trampoline. Some of the tests
+ * want to fish out their ss values, so this trampoline copies ss to eax
+ * before the int3.
+ */
+asm (".pushsection .text\n\t"
+ ".type int3, @function\n\t"
+ ".align 4096\n\t"
+ "int3:\n\t"
+ "mov %ss,%ecx\n\t"
+ "int3\n\t"
+ ".size int3, . - int3\n\t"
+ ".align 4096, 0xcc\n\t"
+ ".popsection");
+extern char int3[4096];
+
+/*
+ * At startup, we prepapre:
+ *
+ * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero
+ * descriptor or out of bounds).
+ * - code16_sel: A 16-bit LDT code segment pointing to int3.
+ * - data16_sel: A 16-bit LDT data segment pointing to stack16.
+ * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3.
+ * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16.
+ * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16.
+ * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to
+ * stack16.
+ *
+ * For no particularly good reason, xyz_sel is a selector value with the
+ * RPL and LDT bits filled in, whereas xyz_idx is just an index into the
+ * descriptor table. These variables will be zero if their respective
+ * segments could not be allocated.
+ */
+static unsigned short ldt_nonexistent_sel;
+static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel;
+
+static unsigned short gdt_data16_idx, gdt_npdata32_idx;
+
+static unsigned short GDT3(int idx)
+{
+ return (idx << 3) | 3;
+}
+
+static unsigned short LDT3(int idx)
+{
+ return (idx << 3) | 7;
+}
+
+/* Our sigaltstack scratch space. */
+static char altstack_data[SIGSTKSZ];
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void add_ldt(const struct user_desc *desc, unsigned short *var,
+ const char *name)
+{
+ if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) {
+ *var = LDT3(desc->entry_number);
+ } else {
+ printf("[NOTE]\tFailed to create %s segment\n", name);
+ *var = 0;
+ }
+}
+
+static void setup_ldt(void)
+{
+ if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16))
+ errx(1, "stack16 is too high\n");
+ if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3))
+ errx(1, "int3 is too high\n");
+
+ ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2);
+
+ const struct user_desc code16_desc = {
+ .entry_number = LDT_OFFSET + 0,
+ .base_addr = (unsigned long)int3,
+ .limit = 4095,
+ .seg_32bit = 0,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ add_ldt(&code16_desc, &code16_sel, "code16");
+
+ const struct user_desc data16_desc = {
+ .entry_number = LDT_OFFSET + 1,
+ .base_addr = (unsigned long)stack16,
+ .limit = 0xffff,
+ .seg_32bit = 0,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ add_ldt(&data16_desc, &data16_sel, "data16");
+
+ const struct user_desc npcode32_desc = {
+ .entry_number = LDT_OFFSET + 3,
+ .base_addr = (unsigned long)int3,
+ .limit = 4095,
+ .seg_32bit = 1,
+ .contents = 2, /* Code, not conforming */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 1,
+ .useable = 0
+ };
+ add_ldt(&npcode32_desc, &npcode32_sel, "npcode32");
+
+ const struct user_desc npdata32_desc = {
+ .entry_number = LDT_OFFSET + 4,
+ .base_addr = (unsigned long)stack16,
+ .limit = 0xffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 1,
+ .useable = 0
+ };
+ add_ldt(&npdata32_desc, &npdata32_sel, "npdata32");
+
+ struct user_desc gdt_data16_desc = {
+ .entry_number = -1,
+ .base_addr = (unsigned long)stack16,
+ .limit = 0xffff,
+ .seg_32bit = 0,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+
+ if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) {
+ /*
+ * This probably indicates vulnerability to CVE-2014-8133.
+ * Merely getting here isn't definitive, though, and we'll
+ * diagnose the problem for real later on.
+ */
+ printf("[WARN]\tset_thread_area allocated data16 at index %d\n",
+ gdt_data16_desc.entry_number);
+ gdt_data16_idx = gdt_data16_desc.entry_number;
+ } else {
+ printf("[OK]\tset_thread_area refused 16-bit data\n");
+ }
+
+ struct user_desc gdt_npdata32_desc = {
+ .entry_number = -1,
+ .base_addr = (unsigned long)stack16,
+ .limit = 0xffff,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 1,
+ .useable = 0
+ };
+
+ if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) {
+ /*
+ * As a hardening measure, newer kernels don't allow this.
+ */
+ printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n",
+ gdt_npdata32_desc.entry_number);
+ gdt_npdata32_idx = gdt_npdata32_desc.entry_number;
+ } else {
+ printf("[OK]\tset_thread_area refused 16-bit data\n");
+ }
+}
+
+/* State used by our signal handlers. */
+static gregset_t initial_regs, requested_regs, resulting_regs;
+
+/* Instructions for the SIGUSR1 handler. */
+static volatile unsigned short sig_cs, sig_ss;
+static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno;
+#ifdef __x86_64__
+static volatile sig_atomic_t sig_corrupt_final_ss;
+#endif
+
+/* Abstractions for some 32-bit vs 64-bit differences. */
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+# define REG_SP REG_RSP
+# define REG_CX REG_RCX
+
+struct selectors {
+ unsigned short cs, gs, fs, ss;
+};
+
+static unsigned short *ssptr(ucontext_t *ctx)
+{
+ struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+ return &sels->ss;
+}
+
+static unsigned short *csptr(ucontext_t *ctx)
+{
+ struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS];
+ return &sels->cs;
+}
+#else
+# define REG_IP REG_EIP
+# define REG_SP REG_ESP
+# define REG_CX REG_ECX
+
+static greg_t *ssptr(ucontext_t *ctx)
+{
+ return &ctx->uc_mcontext.gregs[REG_SS];
+}
+
+static greg_t *csptr(ucontext_t *ctx)
+{
+ return &ctx->uc_mcontext.gregs[REG_CS];
+}
+#endif
+
+/*
+ * Checks a given selector for its code bitness or returns -1 if it's not
+ * a usable code segment selector.
+ */
+int cs_bitness(unsigned short cs)
+{
+ uint32_t valid = 0, ar;
+ asm ("lar %[cs], %[ar]\n\t"
+ "jnz 1f\n\t"
+ "mov $1, %[valid]\n\t"
+ "1:"
+ : [ar] "=r" (ar), [valid] "+rm" (valid)
+ : [cs] "r" (cs));
+
+ if (!valid)
+ return -1;
+
+ bool db = (ar & (1 << 22));
+ bool l = (ar & (1 << 21));
+
+ if (!(ar & (1<<11)))
+ return -1; /* Not code. */
+
+ if (l && !db)
+ return 64;
+ else if (!l && db)
+ return 32;
+ else if (!l && !db)
+ return 16;
+ else
+ return -1; /* Unknown bitness. */
+}
+
+/*
+ * Checks a given selector for its code bitness or returns -1 if it's not
+ * a usable code segment selector.
+ */
+bool is_valid_ss(unsigned short cs)
+{
+ uint32_t valid = 0, ar;
+ asm ("lar %[cs], %[ar]\n\t"
+ "jnz 1f\n\t"
+ "mov $1, %[valid]\n\t"
+ "1:"
+ : [ar] "=r" (ar), [valid] "+rm" (valid)
+ : [cs] "r" (cs));
+
+ if (!valid)
+ return false;
+
+ if ((ar & AR_TYPE_MASK) != AR_TYPE_RWDATA &&
+ (ar & AR_TYPE_MASK) != AR_TYPE_RWDATA_EXPDOWN)
+ return false;
+
+ return (ar & AR_P);
+}
+
+/* Number of errors in the current test case. */
+static volatile sig_atomic_t nerrs;
+
+static void validate_signal_ss(int sig, ucontext_t *ctx)
+{
+#ifdef __x86_64__
+ bool was_64bit = (cs_bitness(*csptr(ctx)) == 64);
+
+ if (!(ctx->uc_flags & UC_SIGCONTEXT_SS)) {
+ printf("[FAIL]\tUC_SIGCONTEXT_SS was not set\n");
+ nerrs++;
+
+ /*
+ * This happens on Linux 4.1. The rest will fail, too, so
+ * return now to reduce the noise.
+ */
+ return;
+ }
+
+ /* UC_STRICT_RESTORE_SS is set iff we came from 64-bit mode. */
+ if (!!(ctx->uc_flags & UC_STRICT_RESTORE_SS) != was_64bit) {
+ printf("[FAIL]\tUC_STRICT_RESTORE_SS was wrong in signal %d\n",
+ sig);
+ nerrs++;
+ }
+
+ if (is_valid_ss(*ssptr(ctx))) {
+ /*
+ * DOSEMU was written before 64-bit sigcontext had SS, and
+ * it tries to figure out the signal source SS by looking at
+ * the physical register. Make sure that keeps working.
+ */
+ unsigned short hw_ss;
+ asm ("mov %%ss, %0" : "=rm" (hw_ss));
+ if (hw_ss != *ssptr(ctx)) {
+ printf("[FAIL]\tHW SS didn't match saved SS\n");
+ nerrs++;
+ }
+ }
+#endif
+}
+
+/*
+ * SIGUSR1 handler. Sets CS and SS as requested and points IP to the
+ * int3 trampoline. Sets SP to a large known value so that we can see
+ * whether the value round-trips back to user mode correctly.
+ */
+static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ validate_signal_ss(sig, ctx);
+
+ memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+
+ *csptr(ctx) = sig_cs;
+ *ssptr(ctx) = sig_ss;
+
+ ctx->uc_mcontext.gregs[REG_IP] =
+ sig_cs == code16_sel ? 0 : (unsigned long)&int3;
+ ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
+ ctx->uc_mcontext.gregs[REG_CX] = 0;
+
+ memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+ requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */
+
+ return;
+}
+
+/*
+ * Called after a successful sigreturn (via int3) or from a failed
+ * sigreturn (directly by kernel). Restores our state so that the
+ * original raise(SIGUSR1) returns.
+ */
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ validate_signal_ss(sig, ctx);
+
+ sig_err = ctx->uc_mcontext.gregs[REG_ERR];
+ sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO];
+
+ unsigned short ss;
+ asm ("mov %%ss,%0" : "=r" (ss));
+
+ greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
+ if (asm_ss != sig_ss && sig == SIGTRAP) {
+ /* Sanity check failure. */
+ printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
+ ss, *ssptr(ctx), (unsigned long long)asm_ss);
+ nerrs++;
+ }
+
+ memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+ memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
+
+#ifdef __x86_64__
+ if (sig_corrupt_final_ss) {
+ if (ctx->uc_flags & UC_STRICT_RESTORE_SS) {
+ printf("[FAIL]\tUC_STRICT_RESTORE_SS was set inappropriately\n");
+ nerrs++;
+ } else {
+ /*
+ * DOSEMU transitions from 32-bit to 64-bit mode by
+ * adjusting sigcontext, and it requires that this work
+ * even if the saved SS is bogus.
+ */
+ printf("\tCorrupting SS on return to 64-bit mode\n");
+ *ssptr(ctx) = 0;
+ }
+ }
+#endif
+
+ sig_trapped = sig;
+}
+
+#ifdef __x86_64__
+/* Tests recovery if !UC_STRICT_RESTORE_SS */
+static void sigusr2(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (!(ctx->uc_flags & UC_STRICT_RESTORE_SS)) {
+ printf("[FAIL]\traise(2) didn't set UC_STRICT_RESTORE_SS\n");
+ nerrs++;
+ return; /* We can't do the rest. */
+ }
+
+ ctx->uc_flags &= ~UC_STRICT_RESTORE_SS;
+ *ssptr(ctx) = 0;
+
+ /* Return. The kernel should recover without sending another signal. */
+}
+
+static int test_nonstrict_ss(void)
+{
+ clearhandler(SIGUSR1);
+ clearhandler(SIGTRAP);
+ clearhandler(SIGSEGV);
+ clearhandler(SIGILL);
+ sethandler(SIGUSR2, sigusr2, 0);
+
+ nerrs = 0;
+
+ printf("[RUN]\tClear UC_STRICT_RESTORE_SS and corrupt SS\n");
+ raise(SIGUSR2);
+ if (!nerrs)
+ printf("[OK]\tIt worked\n");
+
+ return nerrs;
+}
+#endif
+
+/* Finds a usable code segment of the requested bitness. */
+int find_cs(int bitness)
+{
+ unsigned short my_cs;
+
+ asm ("mov %%cs,%0" : "=r" (my_cs));
+
+ if (cs_bitness(my_cs) == bitness)
+ return my_cs;
+ if (cs_bitness(my_cs + (2 << 3)) == bitness)
+ return my_cs + (2 << 3);
+ if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness)
+ return my_cs - (2 << 3);
+ if (cs_bitness(code16_sel) == bitness)
+ return code16_sel;
+
+ printf("[WARN]\tCould not find %d-bit CS\n", bitness);
+ return -1;
+}
+
+static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss)
+{
+ int cs = find_cs(cs_bits);
+ if (cs == -1) {
+ printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n",
+ cs_bits, use_16bit_ss ? 16 : 32);
+ return 0;
+ }
+
+ if (force_ss != -1) {
+ sig_ss = force_ss;
+ } else {
+ if (use_16bit_ss) {
+ if (!data16_sel) {
+ printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n",
+ cs_bits);
+ return 0;
+ }
+ sig_ss = data16_sel;
+ } else {
+ asm volatile ("mov %%ss,%0" : "=r" (sig_ss));
+ }
+ }
+
+ sig_cs = cs;
+
+ printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n",
+ cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss,
+ (sig_ss & 4) ? "" : ", GDT");
+
+ raise(SIGUSR1);
+
+ nerrs = 0;
+
+ /*
+ * Check that each register had an acceptable value when the
+ * int3 trampoline was invoked.
+ */
+ for (int i = 0; i < NGREG; i++) {
+ greg_t req = requested_regs[i], res = resulting_regs[i];
+
+ if (i == REG_TRAPNO || i == REG_IP)
+ continue; /* don't care */
+
+ if (i == REG_SP) {
+ /*
+ * If we were using a 16-bit stack segment, then
+ * the kernel is a bit stuck: IRET only restores
+ * the low 16 bits of ESP/RSP if SS is 16-bit.
+ * The kernel uses a hack to restore bits 31:16,
+ * but that hack doesn't help with bits 63:32.
+ * On Intel CPUs, bits 63:32 end up zeroed, and, on
+ * AMD CPUs, they leak the high bits of the kernel
+ * espfix64 stack pointer. There's very little that
+ * the kernel can do about it.
+ *
+ * Similarly, if we are returning to a 32-bit context,
+ * the CPU will often lose the high 32 bits of RSP.
+ */
+
+ if (res == req)
+ continue;
+
+ if (cs_bits != 64 && ((res ^ req) & 0xFFFFFFFF) == 0) {
+ printf("[NOTE]\tSP: %llx -> %llx\n",
+ (unsigned long long)req,
+ (unsigned long long)res);
+ continue;
+ }
+
+ printf("[FAIL]\tSP mismatch: requested 0x%llx; got 0x%llx\n",
+ (unsigned long long)requested_regs[i],
+ (unsigned long long)resulting_regs[i]);
+ nerrs++;
+ continue;
+ }
+
+ bool ignore_reg = false;
+#if __i386__
+ if (i == REG_UESP)
+ ignore_reg = true;
+#else
+ if (i == REG_CSGSFS) {
+ struct selectors *req_sels =
+ (void *)&requested_regs[REG_CSGSFS];
+ struct selectors *res_sels =
+ (void *)&resulting_regs[REG_CSGSFS];
+ if (req_sels->cs != res_sels->cs) {
+ printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n",
+ req_sels->cs, res_sels->cs);
+ nerrs++;
+ }
+
+ if (req_sels->ss != res_sels->ss) {
+ printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n",
+ req_sels->ss, res_sels->ss);
+ nerrs++;
+ }
+
+ continue;
+ }
+#endif
+
+ /* Sanity check on the kernel */
+ if (i == REG_CX && req != res) {
+ printf("[FAIL]\tCX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n",
+ (unsigned long long)req,
+ (unsigned long long)res);
+ nerrs++;
+ continue;
+ }
+
+ if (req != res && !ignore_reg) {
+ printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n",
+ i, (unsigned long long)req,
+ (unsigned long long)res);
+ nerrs++;
+ }
+ }
+
+ if (nerrs == 0)
+ printf("[OK]\tall registers okay\n");
+
+ return nerrs;
+}
+
+static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs)
+{
+ int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs;
+ if (cs == -1)
+ return 0;
+
+ sig_cs = cs;
+ sig_ss = ss;
+
+ printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n",
+ cs_bits, sig_cs, sig_ss);
+
+ sig_trapped = 0;
+ raise(SIGUSR1);
+ if (sig_trapped) {
+ char errdesc[32] = "";
+ if (sig_err) {
+ const char *src = (sig_err & 1) ? " EXT" : "";
+ const char *table;
+ if ((sig_err & 0x6) == 0x0)
+ table = "GDT";
+ else if ((sig_err & 0x6) == 0x4)
+ table = "LDT";
+ else if ((sig_err & 0x6) == 0x2)
+ table = "IDT";
+ else
+ table = "???";
+
+ sprintf(errdesc, "%s%s index %d, ",
+ table, src, sig_err >> 3);
+ }
+
+ char trapname[32];
+ if (sig_trapno == 13)
+ strcpy(trapname, "GP");
+ else if (sig_trapno == 11)
+ strcpy(trapname, "NP");
+ else if (sig_trapno == 12)
+ strcpy(trapname, "SS");
+ else if (sig_trapno == 32)
+ strcpy(trapname, "IRET"); /* X86_TRAP_IRET */
+ else
+ sprintf(trapname, "%d", sig_trapno);
+
+ printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n",
+ trapname, (unsigned long)sig_err,
+ errdesc, strsignal(sig_trapped));
+ return 0;
+ } else {
+ /*
+ * This also implicitly tests UC_STRICT_RESTORE_SS:
+ * We check that these signals set UC_STRICT_RESTORE_SS and,
+ * if UC_STRICT_RESTORE_SS doesn't cause strict behavior,
+ * then we won't get SIGSEGV.
+ */
+ printf("[FAIL]\tDid not get SIGSEGV\n");
+ return 1;
+ }
+}
+
+int main()
+{
+ int total_nerrs = 0;
+ unsigned short my_cs, my_ss;
+
+ asm volatile ("mov %%cs,%0" : "=r" (my_cs));
+ asm volatile ("mov %%ss,%0" : "=r" (my_ss));
+ setup_ldt();
+
+ stack_t stack = {
+ .ss_sp = altstack_data,
+ .ss_size = SIGSTKSZ,
+ };
+ if (sigaltstack(&stack, NULL) != 0)
+ err(1, "sigaltstack");
+
+ sethandler(SIGUSR1, sigusr1, 0);
+ sethandler(SIGTRAP, sigtrap, SA_ONSTACK);
+
+ /* Easy cases: return to a 32-bit SS in each possible CS bitness. */
+ total_nerrs += test_valid_sigreturn(64, false, -1);
+ total_nerrs += test_valid_sigreturn(32, false, -1);
+ total_nerrs += test_valid_sigreturn(16, false, -1);
+
+ /*
+ * Test easy espfix cases: return to a 16-bit LDT SS in each possible
+ * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant.
+ *
+ * This catches the original missing-espfix-on-64-bit-kernels issue
+ * as well as CVE-2014-8134.
+ */
+ total_nerrs += test_valid_sigreturn(64, true, -1);
+ total_nerrs += test_valid_sigreturn(32, true, -1);
+ total_nerrs += test_valid_sigreturn(16, true, -1);
+
+ if (gdt_data16_idx) {
+ /*
+ * For performance reasons, Linux skips espfix if SS points
+ * to the GDT. If we were able to allocate a 16-bit SS in
+ * the GDT, see if it leaks parts of the kernel stack pointer.
+ *
+ * This tests for CVE-2014-8133.
+ */
+ total_nerrs += test_valid_sigreturn(64, true,
+ GDT3(gdt_data16_idx));
+ total_nerrs += test_valid_sigreturn(32, true,
+ GDT3(gdt_data16_idx));
+ total_nerrs += test_valid_sigreturn(16, true,
+ GDT3(gdt_data16_idx));
+ }
+
+#ifdef __x86_64__
+ /* Nasty ABI case: check SS corruption handling. */
+ sig_corrupt_final_ss = 1;
+ total_nerrs += test_valid_sigreturn(32, false, -1);
+ total_nerrs += test_valid_sigreturn(32, true, -1);
+ sig_corrupt_final_ss = 0;
+#endif
+
+ /*
+ * We're done testing valid sigreturn cases. Now we test states
+ * for which sigreturn itself will succeed but the subsequent
+ * entry to user mode will fail.
+ *
+ * Depending on the failure mode and the kernel bitness, these
+ * entry failures can generate SIGSEGV, SIGBUS, or SIGILL.
+ */
+ clearhandler(SIGTRAP);
+ sethandler(SIGSEGV, sigtrap, SA_ONSTACK);
+ sethandler(SIGBUS, sigtrap, SA_ONSTACK);
+ sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */
+
+ /* Easy failures: invalid SS, resulting in #GP(0) */
+ test_bad_iret(64, ldt_nonexistent_sel, -1);
+ test_bad_iret(32, ldt_nonexistent_sel, -1);
+ test_bad_iret(16, ldt_nonexistent_sel, -1);
+
+ /* These fail because SS isn't a data segment, resulting in #GP(SS) */
+ test_bad_iret(64, my_cs, -1);
+ test_bad_iret(32, my_cs, -1);
+ test_bad_iret(16, my_cs, -1);
+
+ /* Try to return to a not-present code segment, triggering #NP(SS). */
+ test_bad_iret(32, my_ss, npcode32_sel);
+
+ /*
+ * Try to return to a not-present but otherwise valid data segment.
+ * This will cause IRET to fail with #SS on the espfix stack. This
+ * exercises CVE-2014-9322.
+ *
+ * Note that, if espfix is enabled, 64-bit Linux will lose track
+ * of the actual cause of failure and report #GP(0) instead.
+ * This would be very difficult for Linux to avoid, because
+ * espfix64 causes IRET failures to be promoted to #DF, so the
+ * original exception frame is never pushed onto the stack.
+ */
+ test_bad_iret(32, npdata32_sel, -1);
+
+ /*
+ * Try to return to a not-present but otherwise valid data
+ * segment without invoking espfix. Newer kernels don't allow
+ * this to happen in the first place. On older kernels, though,
+ * this can trigger CVE-2014-9322.
+ */
+ if (gdt_npdata32_idx)
+ test_bad_iret(32, GDT3(gdt_npdata32_idx), -1);
+
+#ifdef __x86_64__
+ total_nerrs += test_nonstrict_ss();
+#endif
+
+ return total_nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
new file mode 100644
index 000000000..ddfdd635d
--- /dev/null
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -0,0 +1,187 @@
+/*
+ * single_step_syscall.c - single-steps various x86 syscalls
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This is a very simple series of tests that makes system calls with
+ * the TF flag set. This exercises some nasty kernel code in the
+ * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set
+ * immediately issues #DB from CPL 0. This requires special handling in
+ * the kernel.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <asm/ldt.h>
+#include <err.h>
+#include <setjmp.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static volatile sig_atomic_t sig_traps;
+
+#ifdef __x86_64__
+# define REG_IP REG_RIP
+# define WIDTH "q"
+# define INT80_CLOBBERS "r8", "r9", "r10", "r11"
+#else
+# define REG_IP REG_EIP
+# define WIDTH "l"
+# define INT80_CLOBBERS
+#endif
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+ : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (get_eflags() & X86_EFLAGS_TF) {
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+ printf("[WARN]\tSIGTRAP handler had TF set\n");
+ _exit(1);
+ }
+
+ sig_traps++;
+
+ if (sig_traps == 10000 || sig_traps == 10001) {
+ printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n",
+ (int)sig_traps,
+ (unsigned long)info->si_addr,
+ (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
+ }
+}
+
+static void check_result(void)
+{
+ unsigned long new_eflags = get_eflags();
+ set_eflags(new_eflags & ~X86_EFLAGS_TF);
+
+ if (!sig_traps) {
+ printf("[FAIL]\tNo SIGTRAP\n");
+ exit(1);
+ }
+
+ if (!(new_eflags & X86_EFLAGS_TF)) {
+ printf("[FAIL]\tTF was cleared\n");
+ exit(1);
+ }
+
+ printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps);
+ sig_traps = 0;
+}
+
+int main()
+{
+#ifdef CAN_BUILD_32
+ int tmp;
+#endif
+
+ sethandler(SIGTRAP, sigtrap, 0);
+
+ printf("[RUN]\tSet TF and check nop\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile ("nop");
+ check_result();
+
+#ifdef __x86_64__
+ printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ extern unsigned char post_nop[];
+ asm volatile ("pushf" WIDTH "\n\t"
+ "pop" WIDTH " %%r11\n\t"
+ "nop\n\t"
+ "post_nop:"
+ : : "c" (post_nop) : "r11");
+ check_result();
+#endif
+#ifdef CAN_BUILD_32
+ printf("[RUN]\tSet TF and check int80\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)
+ : INT80_CLOBBERS);
+ check_result();
+#endif
+
+ /*
+ * This test is particularly interesting if fast syscalls use
+ * SYSENTER: it triggers a nasty design flaw in SYSENTER.
+ * Specifically, SYSENTER does not clear TF, so either SYSENTER
+ * or the next instruction traps at CPL0. (Of course, Intel
+ * mostly forgot to document exactly what happens here.) So we
+ * get a CPL0 fault with usergs (on 64-bit kernels) and possibly
+ * no stack. The only sane way the kernel can possibly handle
+ * it is to clear TF on return from the #DB handler, but this
+ * happens way too early to set TF in the saved pt_regs, so the
+ * kernel has to do something clever to avoid losing track of
+ * the TF bit.
+ *
+ * Needless to say, we've had bugs in this area.
+ */
+ syscall(SYS_getpid); /* Force symbol binding without TF set. */
+ printf("[RUN]\tSet TF and check a fast syscall\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ syscall(SYS_getpid);
+ check_result();
+
+ /* Now make sure that another fast syscall doesn't set TF again. */
+ printf("[RUN]\tFast syscall with TF cleared\n");
+ fflush(stdout); /* Force a syscall */
+ if (get_eflags() & X86_EFLAGS_TF) {
+ printf("[FAIL]\tTF is now set\n");
+ exit(1);
+ }
+ if (sig_traps) {
+ printf("[FAIL]\tGot SIGTRAP\n");
+ exit(1);
+ }
+ printf("[OK]\tNothing unexpected happened\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
new file mode 100644
index 000000000..7db4fc9fa
--- /dev/null
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -0,0 +1,130 @@
+/*
+ * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <err.h>
+#include <setjmp.h>
+#include <errno.h>
+
+/* Our sigaltstack scratch space. */
+static unsigned char altstack_data[SIGSTKSZ];
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static volatile sig_atomic_t sig_traps;
+static sigjmp_buf jmpbuf;
+
+static volatile sig_atomic_t n_errs;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (ctx->uc_mcontext.gregs[REG_EAX] != -EFAULT) {
+ printf("[FAIL]\tAX had the wrong value: 0x%x\n",
+ ctx->uc_mcontext.gregs[REG_EAX]);
+ n_errs++;
+ } else {
+ printf("[OK]\tSeems okay\n");
+ }
+
+ siglongjmp(jmpbuf, 1);
+}
+
+static void sigill(int sig, siginfo_t *info, void *ctx_void)
+{
+ printf("[SKIP]\tIllegal instruction\n");
+ siglongjmp(jmpbuf, 1);
+}
+
+int main()
+{
+ stack_t stack = {
+ .ss_sp = altstack_data,
+ .ss_size = SIGSTKSZ,
+ };
+ if (sigaltstack(&stack, NULL) != 0)
+ err(1, "sigaltstack");
+
+ sethandler(SIGSEGV, sigsegv, SA_ONSTACK);
+ sethandler(SIGILL, sigill, SA_ONSTACK);
+
+ /*
+ * Exercise another nasty special case. The 32-bit SYSCALL
+ * and SYSENTER instructions (even in compat mode) each
+ * clobber one register. A Linux system call has a syscall
+ * number and six arguments, and the user stack pointer
+ * needs to live in some register on return. That means
+ * that we need eight registers, but SYSCALL and SYSENTER
+ * only preserve seven registers. As a result, one argument
+ * ends up on the stack. The stack is user memory, which
+ * means that the kernel can fail to read it.
+ *
+ * The 32-bit fast system calls don't have a defined ABI:
+ * we're supposed to invoke them through the vDSO. So we'll
+ * fudge it: we set all regs to invalid pointer values and
+ * invoke the entry instruction. The return will fail no
+ * matter what, and we completely lose our program state,
+ * but we can fix it up with a signal handler.
+ */
+
+ printf("[RUN]\tSYSENTER with invalid state\n");
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "sysenter"
+ : : : "memory", "flags");
+ }
+
+ printf("[RUN]\tSYSCALL with invalid state\n");
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "syscall\n\t"
+ "pushl $0" /* make sure we segfault cleanly */
+ : : : "memory", "flags");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
new file mode 100644
index 000000000..74e6b3fc2
--- /dev/null
+++ b/tools/testing/selftests/x86/syscall_nt.c
@@ -0,0 +1,96 @@
+/*
+ * syscall_nt.c - checks syscalls with NT set
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * Some obscure user-space code requires the ability to make system calls
+ * with FLAGS.NT set. Make sure it works.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <signal.h>
+#include <err.h>
+#include <sys/syscall.h>
+#include <asm/processor-flags.h>
+
+#ifdef __x86_64__
+# define WIDTH "q"
+#else
+# define WIDTH "l"
+#endif
+
+static unsigned int nerrs;
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+ : : "rm" (eflags) : "flags");
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void sigtrap(int sig, siginfo_t *si, void *ctx_void)
+{
+}
+
+static void do_it(unsigned long extraflags)
+{
+ unsigned long flags;
+
+ set_eflags(get_eflags() | extraflags);
+ syscall(SYS_getpid);
+ flags = get_eflags();
+ set_eflags(X86_EFLAGS_IF | X86_EFLAGS_FIXED);
+ if ((flags & extraflags) == extraflags) {
+ printf("[OK]\tThe syscall worked and flags are still set\n");
+ } else {
+ printf("[FAIL]\tThe syscall worked but flags were cleared (flags = 0x%lx but expected 0x%lx set)\n",
+ flags, extraflags);
+ nerrs++;
+ }
+}
+
+int main(void)
+{
+ printf("[RUN]\tSet NT and issue a syscall\n");
+ do_it(X86_EFLAGS_NT);
+
+ /*
+ * Now try it again with TF set -- TF forces returns via IRET in all
+ * cases except non-ptregs-using 64-bit full fast path syscalls.
+ */
+
+ sethandler(SIGTRAP, sigtrap, 0);
+
+ printf("[RUN]\tSet NT|TF and issue a syscall\n");
+ do_it(X86_EFLAGS_NT | X86_EFLAGS_TF);
+
+ return nerrs == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
new file mode 100644
index 000000000..d85ec5b36
--- /dev/null
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -0,0 +1,195 @@
+/*
+ * sigreturn.c - tests that x86 avoids Intel SYSRET pitfalls
+ * Copyright (c) 2014-2016 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/signal.h>
+#include <sys/ucontext.h>
+#include <sys/syscall.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <setjmp.h>
+#include <sys/user.h>
+#include <sys/mman.h>
+#include <assert.h>
+
+
+asm (
+ ".pushsection \".text\", \"ax\"\n\t"
+ ".balign 4096\n\t"
+ "test_page: .globl test_page\n\t"
+ ".fill 4094,1,0xcc\n\t"
+ "test_syscall_insn:\n\t"
+ "syscall\n\t"
+ ".ifne . - test_page - 4096\n\t"
+ ".error \"test page is not one page long\"\n\t"
+ ".endif\n\t"
+ ".popsection"
+ );
+
+extern const char test_page[];
+static void const *current_test_page_addr = test_page;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+/* State used by our signal handlers. */
+static gregset_t initial_regs;
+
+static volatile unsigned long rip;
+
+static void sigsegv_for_sigreturn_test(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (rip != ctx->uc_mcontext.gregs[REG_RIP]) {
+ printf("[FAIL]\tRequested RIP=0x%lx but got RIP=0x%lx\n",
+ rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]);
+ fflush(stdout);
+ _exit(1);
+ }
+
+ memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t));
+
+ printf("[OK]\tGot SIGSEGV at RIP=0x%lx\n", rip);
+}
+
+static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
+
+ /* Set IP and CX to match so that SYSRET can happen. */
+ ctx->uc_mcontext.gregs[REG_RIP] = rip;
+ ctx->uc_mcontext.gregs[REG_RCX] = rip;
+
+ /* R11 and EFLAGS should already match. */
+ assert(ctx->uc_mcontext.gregs[REG_EFL] ==
+ ctx->uc_mcontext.gregs[REG_R11]);
+
+ sethandler(SIGSEGV, sigsegv_for_sigreturn_test, SA_RESETHAND);
+
+ return;
+}
+
+static void test_sigreturn_to(unsigned long ip)
+{
+ rip = ip;
+ printf("[RUN]\tsigreturn to 0x%lx\n", ip);
+ raise(SIGUSR1);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv_for_fallthrough(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (rip != ctx->uc_mcontext.gregs[REG_RIP]) {
+ printf("[FAIL]\tExpected SIGSEGV at 0x%lx but got RIP=0x%lx\n",
+ rip, (unsigned long)ctx->uc_mcontext.gregs[REG_RIP]);
+ fflush(stdout);
+ _exit(1);
+ }
+
+ siglongjmp(jmpbuf, 1);
+}
+
+static void test_syscall_fallthrough_to(unsigned long ip)
+{
+ void *new_address = (void *)(ip - 4096);
+ void *ret;
+
+ printf("[RUN]\tTrying a SYSCALL that falls through to 0x%lx\n", ip);
+
+ ret = mremap((void *)current_test_page_addr, 4096, 4096,
+ MREMAP_MAYMOVE | MREMAP_FIXED, new_address);
+ if (ret == MAP_FAILED) {
+ if (ip <= (1UL << 47) - PAGE_SIZE) {
+ err(1, "mremap to %p", new_address);
+ } else {
+ printf("[OK]\tmremap to %p failed\n", new_address);
+ return;
+ }
+ }
+
+ if (ret != new_address)
+ errx(1, "mremap malfunctioned: asked for %p but got %p\n",
+ new_address, ret);
+
+ current_test_page_addr = new_address;
+ rip = ip;
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ asm volatile ("call *%[syscall_insn]" :: "a" (SYS_getpid),
+ [syscall_insn] "rm" (ip - 2));
+ errx(1, "[FAIL]\tSyscall trampoline returned");
+ }
+
+ printf("[OK]\tWe survived\n");
+}
+
+int main()
+{
+ /*
+ * When the kernel returns from a slow-path syscall, it will
+ * detect whether SYSRET is appropriate. If it incorrectly
+ * thinks that SYSRET is appropriate when RIP is noncanonical,
+ * it'll crash on Intel CPUs.
+ */
+ sethandler(SIGUSR1, sigusr1, 0);
+ for (int i = 47; i < 64; i++)
+ test_sigreturn_to(1UL<<i);
+
+ clearhandler(SIGUSR1);
+
+ sethandler(SIGSEGV, sigsegv_for_fallthrough, 0);
+
+ /* One extra test to check that we didn't screw up the mremap logic. */
+ test_syscall_fallthrough_to((1UL << 47) - 2*PAGE_SIZE);
+
+ /* These are the interesting cases. */
+ for (int i = 47; i < 64; i++) {
+ test_syscall_fallthrough_to((1UL<<i) - PAGE_SIZE);
+ test_syscall_fallthrough_to(1UL<<i);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/sysret_ss_attrs.c b/tools/testing/selftests/x86/sysret_ss_attrs.c
new file mode 100644
index 000000000..ce42d5a64
--- /dev/null
+++ b/tools/testing/selftests/x86/sysret_ss_attrs.c
@@ -0,0 +1,112 @@
+/*
+ * sysret_ss_attrs.c - test that syscalls return valid hidden SS attributes
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * On AMD CPUs, SYSRET can return with a valid SS descriptor with with
+ * the hidden attributes set to an unusable state. Make sure the kernel
+ * doesn't let this happen.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <pthread.h>
+
+static void *threadproc(void *ctx)
+{
+ /*
+ * Do our best to cause sleeps on this CPU to exit the kernel and
+ * re-enter with SS = 0.
+ */
+ while (true)
+ ;
+
+ return NULL;
+}
+
+#ifdef __x86_64__
+extern unsigned long call32_from_64(void *stack, void (*function)(void));
+
+asm (".pushsection .text\n\t"
+ ".code32\n\t"
+ "test_ss:\n\t"
+ "pushl $0\n\t"
+ "popl %eax\n\t"
+ "ret\n\t"
+ ".code64");
+extern void test_ss(void);
+#endif
+
+int main()
+{
+ /*
+ * Start a busy-looping thread on the same CPU we're on.
+ * For simplicity, just stick everything to CPU 0. This will
+ * fail in some containers, but that's probably okay.
+ */
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ printf("[WARN]\tsched_setaffinity failed\n");
+
+ pthread_t thread;
+ if (pthread_create(&thread, 0, threadproc, 0) != 0)
+ err(1, "pthread_create");
+
+#ifdef __x86_64__
+ unsigned char *stack32 = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
+ MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE,
+ -1, 0);
+ if (stack32 == MAP_FAILED)
+ err(1, "mmap");
+#endif
+
+ printf("[RUN]\tSyscalls followed by SS validation\n");
+
+ for (int i = 0; i < 1000; i++) {
+ /*
+ * Go to sleep and return using sysret (if we're 64-bit
+ * or we're 32-bit on AMD on a 64-bit kernel). On AMD CPUs,
+ * SYSRET doesn't fix up the cached SS descriptor, so the
+ * kernel needs some kind of workaround to make sure that we
+ * end the system call with a valid stack segment. This
+ * can be a confusing failure because the SS *selector*
+ * is the same regardless.
+ */
+ usleep(2);
+
+#ifdef __x86_64__
+ /*
+ * On 32-bit, just doing a syscall through glibc is enough
+ * to cause a crash if our cached SS descriptor is invalid.
+ * On 64-bit, it's not, so try extra hard.
+ */
+ call32_from_64(stack32 + 4088, test_ss);
+#endif
+ }
+
+ printf("[OK]\tWe survived\n");
+
+#ifdef __x86_64__
+ munmap(stack32, 4096);
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/test_FCMOV.c b/tools/testing/selftests/x86/test_FCMOV.c
new file mode 100644
index 000000000..6b5036fbb
--- /dev/null
+++ b/tools/testing/selftests/x86/test_FCMOV.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+
+#define TEST(insn) \
+long double __attribute__((noinline)) insn(long flags) \
+{ \
+ long double out; \
+ asm ("\n" \
+ " push %1""\n" \
+ " popf""\n" \
+ " fldpi""\n" \
+ " fld1""\n" \
+ " " #insn " %%st(1), %%st" "\n" \
+ " ffree %%st(1)" "\n" \
+ : "=t" (out) \
+ : "r" (flags) \
+ ); \
+ return out; \
+}
+
+TEST(fcmovb)
+TEST(fcmove)
+TEST(fcmovbe)
+TEST(fcmovu)
+TEST(fcmovnb)
+TEST(fcmovne)
+TEST(fcmovnbe)
+TEST(fcmovnu)
+
+enum {
+ CF = 1 << 0,
+ PF = 1 << 2,
+ ZF = 1 << 6,
+};
+
+void sighandler(int sig)
+{
+ printf("[FAIL]\tGot signal %d, exiting\n", sig);
+ exit(1);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int err = 0;
+
+ /* SIGILL triggers on 32-bit kernels w/o fcomi emulation
+ * when run with "no387 nofxsr". Other signals are caught
+ * just in case.
+ */
+ signal(SIGILL, sighandler);
+ signal(SIGFPE, sighandler);
+ signal(SIGSEGV, sighandler);
+
+ printf("[RUN]\tTesting fcmovCC instructions\n");
+ /* If fcmovCC() returns 1.0, the move wasn't done */
+ err |= !(fcmovb(0) == 1.0); err |= !(fcmovnb(0) != 1.0);
+ err |= !(fcmove(0) == 1.0); err |= !(fcmovne(0) != 1.0);
+ err |= !(fcmovbe(0) == 1.0); err |= !(fcmovnbe(0) != 1.0);
+ err |= !(fcmovu(0) == 1.0); err |= !(fcmovnu(0) != 1.0);
+
+ err |= !(fcmovb(CF) != 1.0); err |= !(fcmovnb(CF) == 1.0);
+ err |= !(fcmove(CF) == 1.0); err |= !(fcmovne(CF) != 1.0);
+ err |= !(fcmovbe(CF) != 1.0); err |= !(fcmovnbe(CF) == 1.0);
+ err |= !(fcmovu(CF) == 1.0); err |= !(fcmovnu(CF) != 1.0);
+
+ err |= !(fcmovb(ZF) == 1.0); err |= !(fcmovnb(ZF) != 1.0);
+ err |= !(fcmove(ZF) != 1.0); err |= !(fcmovne(ZF) == 1.0);
+ err |= !(fcmovbe(ZF) != 1.0); err |= !(fcmovnbe(ZF) == 1.0);
+ err |= !(fcmovu(ZF) == 1.0); err |= !(fcmovnu(ZF) != 1.0);
+
+ err |= !(fcmovb(PF) == 1.0); err |= !(fcmovnb(PF) != 1.0);
+ err |= !(fcmove(PF) == 1.0); err |= !(fcmovne(PF) != 1.0);
+ err |= !(fcmovbe(PF) == 1.0); err |= !(fcmovnbe(PF) != 1.0);
+ err |= !(fcmovu(PF) != 1.0); err |= !(fcmovnu(PF) == 1.0);
+
+ if (!err)
+ printf("[OK]\tfcmovCC\n");
+ else
+ printf("[FAIL]\tfcmovCC errors: %d\n", err);
+
+ return err;
+}
diff --git a/tools/testing/selftests/x86/test_FCOMI.c b/tools/testing/selftests/x86/test_FCOMI.c
new file mode 100644
index 000000000..aec6692c6
--- /dev/null
+++ b/tools/testing/selftests/x86/test_FCOMI.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <fenv.h>
+
+enum {
+ CF = 1 << 0,
+ PF = 1 << 2,
+ ZF = 1 << 6,
+ ARITH = CF | PF | ZF,
+};
+
+long res_fcomi_pi_1;
+long res_fcomi_1_pi;
+long res_fcomi_1_1;
+long res_fcomi_nan_1;
+/* sNaN is s|111 1111 1|1xx xxxx xxxx xxxx xxxx xxxx */
+/* qNaN is s|111 1111 1|0xx xxxx xxxx xxxx xxxx xxxx (some x must be nonzero) */
+int snan = 0x7fc11111;
+int qnan = 0x7f811111;
+unsigned short snan1[5];
+/* sNaN80 is s|111 1111 1111 1111 |10xx xx...xx (some x must be nonzero) */
+unsigned short snan80[5] = { 0x1111, 0x1111, 0x1111, 0x8111, 0x7fff };
+
+int test(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fld1""\n"
+ " fldpi""\n"
+ " fcomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_1_pi""\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fldpi""\n"
+ " fld1""\n"
+ " fcomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_pi_1""\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fld1""\n"
+ " fld1""\n"
+ " fcomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_1_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_1_pi & ARITH) != (0)) {
+ printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags);
+ return 1;
+ }
+ if ((res_fcomi_pi_1 & ARITH) != (CF)) {
+ printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH);
+ return 1;
+ }
+ if ((res_fcomi_1_1 & ARITH) != (ZF)) {
+ printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != 0) {
+ printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int test_qnan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+ " flds qnan""\n"
+ " fld1""\n"
+ " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
+ " fcomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != FE_INVALID) {
+ printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testu_qnan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+ " flds qnan""\n"
+ " fld1""\n"
+ " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
+ " fucomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != 0) {
+ printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testu_snan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+// " flds snan""\n" // WRONG, this will convert 32-bit fp snan to a *qnan* in 80-bit fp register!
+// " fstpt snan1""\n" // if uncommented, it prints "snan1:7fff c111 1100 0000 0000" - c111, not 8111!
+// " fnclex""\n" // flds of a snan raised FE_INVALID, clear it
+ " fldt snan80""\n" // fldt never raise FE_INVALID
+ " fld1""\n"
+ " fucomi %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " ffree %%st(1)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+// printf("snan:%x snan1:%04x %04x %04x %04x %04x\n", snan, snan1[4], snan1[3], snan1[2], snan1[1], snan1[0]);
+ if (fetestexcept(FE_INVALID) != FE_INVALID) {
+ printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testp(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fld1""\n"
+ " fldpi""\n"
+ " fcomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_1_pi""\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fldpi""\n"
+ " fld1""\n"
+ " fcomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_pi_1""\n"
+
+ " push %0""\n"
+ " popf""\n"
+ " fld1""\n"
+ " fld1""\n"
+ " fcomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_1_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_1_pi & ARITH) != (0)) {
+ printf("[BAD]\tfcomi_1_pi with flags:%lx\n", flags);
+ return 1;
+ }
+ if ((res_fcomi_pi_1 & ARITH) != (CF)) {
+ printf("[BAD]\tfcomi_pi_1 with flags:%lx->%lx\n", flags, res_fcomi_pi_1 & ARITH);
+ return 1;
+ }
+ if ((res_fcomi_1_1 & ARITH) != (ZF)) {
+ printf("[BAD]\tfcomi_1_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != 0) {
+ printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testp_qnan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+ " flds qnan""\n"
+ " fld1""\n"
+ " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
+ " fcomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != FE_INVALID) {
+ printf("[BAD]\tFE_INVALID is not set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+int testup_qnan(long flags)
+{
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+
+ asm ("\n"
+ " push %0""\n"
+ " popf""\n"
+ " flds qnan""\n"
+ " fld1""\n"
+ " fnclex""\n" // fld of a qnan raised FE_INVALID, clear it
+ " fucomip %%st(1), %%st" "\n"
+ " ffree %%st(0)" "\n"
+ " pushf""\n"
+ " pop res_fcomi_nan_1""\n"
+ :
+ : "r" (flags)
+ );
+ if ((res_fcomi_nan_1 & ARITH) != (ZF|CF|PF)) {
+ printf("[BAD]\tfcomi_qnan_1 with flags:%lx\n", flags);
+ return 1;
+ }
+ if (fetestexcept(FE_INVALID) != 0) {
+ printf("[BAD]\tFE_INVALID is set in %s\n", __func__);
+ return 1;
+ }
+ return 0;
+}
+
+void sighandler(int sig)
+{
+ printf("[FAIL]\tGot signal %d, exiting\n", sig);
+ exit(1);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int err = 0;
+
+ /* SIGILL triggers on 32-bit kernels w/o fcomi emulation
+ * when run with "no387 nofxsr". Other signals are caught
+ * just in case.
+ */
+ signal(SIGILL, sighandler);
+ signal(SIGFPE, sighandler);
+ signal(SIGSEGV, sighandler);
+
+ printf("[RUN]\tTesting f[u]comi[p] instructions\n");
+ err |= test(0);
+ err |= test_qnan(0);
+ err |= testu_qnan(0);
+ err |= testu_snan(0);
+ err |= test(CF|ZF|PF);
+ err |= test_qnan(CF|ZF|PF);
+ err |= testu_qnan(CF|ZF|PF);
+ err |= testu_snan(CF|ZF|PF);
+ err |= testp(0);
+ err |= testp_qnan(0);
+ err |= testup_qnan(0);
+ err |= testp(CF|ZF|PF);
+ err |= testp_qnan(CF|ZF|PF);
+ err |= testup_qnan(CF|ZF|PF);
+ if (!err)
+ printf("[OK]\tf[u]comi[p]\n");
+ else
+ printf("[FAIL]\tf[u]comi[p] errors: %d\n", err);
+
+ return err;
+}
diff --git a/tools/testing/selftests/x86/test_FISTTP.c b/tools/testing/selftests/x86/test_FISTTP.c
new file mode 100644
index 000000000..09789c0ce
--- /dev/null
+++ b/tools/testing/selftests/x86/test_FISTTP.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <fenv.h>
+
+unsigned long long res64 = -1;
+unsigned int res32 = -1;
+unsigned short res16 = -1;
+
+int test(void)
+{
+ int ex;
+
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ asm volatile ("\n"
+ " fld1""\n"
+ " fisttp res16""\n"
+ " fld1""\n"
+ " fisttpl res32""\n"
+ " fld1""\n"
+ " fisttpll res64""\n"
+ : : : "memory"
+ );
+ if (res16 != 1 || res32 != 1 || res64 != 1) {
+ printf("[BAD]\tfisttp 1\n");
+ return 1;
+ }
+ ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ if (ex != 0) {
+ printf("[BAD]\tfisttp 1: wrong exception state\n");
+ return 1;
+ }
+
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ asm volatile ("\n"
+ " fldpi""\n"
+ " fisttp res16""\n"
+ " fldpi""\n"
+ " fisttpl res32""\n"
+ " fldpi""\n"
+ " fisttpll res64""\n"
+ : : : "memory"
+ );
+ if (res16 != 3 || res32 != 3 || res64 != 3) {
+ printf("[BAD]\tfisttp pi\n");
+ return 1;
+ }
+ ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ if (ex != FE_INEXACT) {
+ printf("[BAD]\tfisttp pi: wrong exception state\n");
+ return 1;
+ }
+
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ asm volatile ("\n"
+ " fldpi""\n"
+ " fchs""\n"
+ " fisttp res16""\n"
+ " fldpi""\n"
+ " fchs""\n"
+ " fisttpl res32""\n"
+ " fldpi""\n"
+ " fchs""\n"
+ " fisttpll res64""\n"
+ : : : "memory"
+ );
+ if (res16 != 0xfffd || res32 != 0xfffffffd || res64 != 0xfffffffffffffffdULL) {
+ printf("[BAD]\tfisttp -pi\n");
+ return 1;
+ }
+ ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ if (ex != FE_INEXACT) {
+ printf("[BAD]\tfisttp -pi: wrong exception state\n");
+ return 1;
+ }
+
+ feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ asm volatile ("\n"
+ " fldln2""\n"
+ " fisttp res16""\n"
+ " fldln2""\n"
+ " fisttpl res32""\n"
+ " fldln2""\n"
+ " fisttpll res64""\n"
+ : : : "memory"
+ );
+ /* Test truncation to zero (round-to-nearest would give 1 here) */
+ if (res16 != 0 || res32 != 0 || res64 != 0) {
+ printf("[BAD]\tfisttp ln2\n");
+ return 1;
+ }
+ ex = fetestexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
+ if (ex != FE_INEXACT) {
+ printf("[BAD]\tfisttp ln2: wrong exception state\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+void sighandler(int sig)
+{
+ printf("[FAIL]\tGot signal %d, exiting\n", sig);
+ exit(1);
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int err = 0;
+
+ /* SIGILL triggers on 32-bit kernels w/o fisttp emulation
+ * when run with "no387 nofxsr". Other signals are caught
+ * just in case.
+ */
+ signal(SIGILL, sighandler);
+ signal(SIGFPE, sighandler);
+ signal(SIGSEGV, sighandler);
+
+ printf("[RUN]\tTesting fisttp instructions\n");
+ err |= test();
+ if (!err)
+ printf("[OK]\tfisttp\n");
+ else
+ printf("[FAIL]\tfisttp errors: %d\n", err);
+
+ return err;
+}
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
new file mode 100644
index 000000000..64f11c8d9
--- /dev/null
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -0,0 +1,115 @@
+/*
+ * 32-bit test to check vDSO mremap.
+ *
+ * Copyright (c) 2016 Dmitry Safonov
+ * Suggested-by: Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Can be built statically:
+ * gcc -Os -Wall -static -m32 test_mremap_vdso.c
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <sys/mman.h>
+#include <sys/auxv.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+
+#define PAGE_SIZE 4096
+
+static int try_to_remap(void *vdso_addr, unsigned long size)
+{
+ void *dest_addr, *new_addr;
+
+ /* Searching for memory location where to remap */
+ dest_addr = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (dest_addr == MAP_FAILED) {
+ printf("[WARN]\tmmap failed (%d): %m\n", errno);
+ return 0;
+ }
+
+ printf("[NOTE]\tMoving vDSO: [%p, %#lx] -> [%p, %#lx]\n",
+ vdso_addr, (unsigned long)vdso_addr + size,
+ dest_addr, (unsigned long)dest_addr + size);
+ fflush(stdout);
+
+ new_addr = mremap(vdso_addr, size, size,
+ MREMAP_FIXED|MREMAP_MAYMOVE, dest_addr);
+ if ((unsigned long)new_addr == (unsigned long)-1) {
+ munmap(dest_addr, size);
+ if (errno == EINVAL) {
+ printf("[NOTE]\tvDSO partial move failed, will try with bigger size\n");
+ return -1; /* Retry with larger */
+ }
+ printf("[FAIL]\tmremap failed (%d): %m\n", errno);
+ return 1;
+ }
+
+ return 0;
+
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ pid_t child;
+
+ child = fork();
+ if (child == -1) {
+ printf("[WARN]\tfailed to fork (%d): %m\n", errno);
+ return 1;
+ }
+
+ if (child == 0) {
+ unsigned long vdso_size = PAGE_SIZE;
+ unsigned long auxval;
+ int ret = -1;
+
+ auxval = getauxval(AT_SYSINFO_EHDR);
+ printf("\tAT_SYSINFO_EHDR is %#lx\n", auxval);
+ if (!auxval || auxval == -ENOENT) {
+ printf("[WARN]\tgetauxval failed\n");
+ return 0;
+ }
+
+ /* Simpler than parsing ELF header */
+ while (ret < 0) {
+ ret = try_to_remap((void *)auxval, vdso_size);
+ vdso_size += PAGE_SIZE;
+ }
+
+#ifdef __i386__
+ /* Glibc is likely to explode now - exit with raw syscall */
+ asm volatile ("int $0x80" : : "a" (__NR_exit), "b" (!!ret));
+#else /* __x86_64__ */
+ syscall(SYS_exit, ret);
+#endif
+ } else {
+ int status;
+
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tmremap() of the vDSO does not work on this kernel!\n");
+ return 1;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed with %d\n",
+ WEXITSTATUS(status));
+ return 1;
+ }
+ printf("[OK]\n");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/test_syscall_vdso.c b/tools/testing/selftests/x86/test_syscall_vdso.c
new file mode 100644
index 000000000..c9c328107
--- /dev/null
+++ b/tools/testing/selftests/x86/test_syscall_vdso.c
@@ -0,0 +1,408 @@
+/*
+ * 32-bit syscall ABI conformance test.
+ *
+ * Copyright (c) 2015 Denys Vlasenko
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+/*
+ * Can be built statically:
+ * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S
+ */
+#undef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#undef __USE_GNU
+#define __USE_GNU 1
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <elf.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+
+#if !defined(__i386__)
+int main(int argc, char **argv, char **envp)
+{
+ printf("[SKIP]\tNot a 32-bit x86 userspace\n");
+ return 0;
+}
+#else
+
+long syscall_addr;
+long get_syscall(char **envp)
+{
+ Elf32_auxv_t *auxv;
+ while (*envp++ != NULL)
+ continue;
+ for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++)
+ if (auxv->a_type == AT_SYSINFO)
+ return auxv->a_un.a_val;
+ printf("[WARN]\tAT_SYSINFO not supplied\n");
+ return 0;
+}
+
+asm (
+ " .pushsection .text\n"
+ " .global int80\n"
+ "int80:\n"
+ " int $0x80\n"
+ " ret\n"
+ " .popsection\n"
+);
+extern char int80;
+
+struct regs64 {
+ uint64_t rax, rbx, rcx, rdx;
+ uint64_t rsi, rdi, rbp, rsp;
+ uint64_t r8, r9, r10, r11;
+ uint64_t r12, r13, r14, r15;
+};
+struct regs64 regs64;
+int kernel_is_64bit;
+
+asm (
+ " .pushsection .text\n"
+ " .code64\n"
+ "get_regs64:\n"
+ " push %rax\n"
+ " mov $regs64, %eax\n"
+ " pop 0*8(%rax)\n"
+ " movq %rbx, 1*8(%rax)\n"
+ " movq %rcx, 2*8(%rax)\n"
+ " movq %rdx, 3*8(%rax)\n"
+ " movq %rsi, 4*8(%rax)\n"
+ " movq %rdi, 5*8(%rax)\n"
+ " movq %rbp, 6*8(%rax)\n"
+ " movq %rsp, 7*8(%rax)\n"
+ " movq %r8, 8*8(%rax)\n"
+ " movq %r9, 9*8(%rax)\n"
+ " movq %r10, 10*8(%rax)\n"
+ " movq %r11, 11*8(%rax)\n"
+ " movq %r12, 12*8(%rax)\n"
+ " movq %r13, 13*8(%rax)\n"
+ " movq %r14, 14*8(%rax)\n"
+ " movq %r15, 15*8(%rax)\n"
+ " ret\n"
+ "poison_regs64:\n"
+ " movq $0x7f7f7f7f, %r8\n"
+ " shl $32, %r8\n"
+ " orq $0x7f7f7f7f, %r8\n"
+ " movq %r8, %r9\n"
+ " incq %r9\n"
+ " movq %r9, %r10\n"
+ " incq %r10\n"
+ " movq %r10, %r11\n"
+ " incq %r11\n"
+ " movq %r11, %r12\n"
+ " incq %r12\n"
+ " movq %r12, %r13\n"
+ " incq %r13\n"
+ " movq %r13, %r14\n"
+ " incq %r14\n"
+ " movq %r14, %r15\n"
+ " incq %r15\n"
+ " ret\n"
+ " .code32\n"
+ " .popsection\n"
+);
+extern void get_regs64(void);
+extern void poison_regs64(void);
+extern unsigned long call64_from_32(void (*function)(void));
+void print_regs64(void)
+{
+ if (!kernel_is_64bit)
+ return;
+ printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax, regs64.rbx, regs64.rcx, regs64.rdx);
+ printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi, regs64.rdi, regs64.rbp, regs64.rsp);
+ printf(" 8:%016llx 9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 , regs64.r9 , regs64.r10, regs64.r11);
+ printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12, regs64.r13, regs64.r14, regs64.r15);
+}
+
+int check_regs64(void)
+{
+ int err = 0;
+ int num = 8;
+ uint64_t *r64 = &regs64.r8;
+ uint64_t expected = 0x7f7f7f7f7f7f7f7fULL;
+
+ if (!kernel_is_64bit)
+ return 0;
+
+ do {
+ if (*r64 == expected++)
+ continue; /* register did not change */
+ if (syscall_addr != (long)&int80) {
+ /*
+ * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs:
+ * either clear them to 0, or for R11, load EFLAGS.
+ */
+ if (*r64 == 0)
+ continue;
+ if (num == 11) {
+ printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64);
+ continue;
+ }
+ } else {
+ /*
+ * INT80 syscall entrypoint can be used by
+ * 64-bit programs too, unlike SYSCALL/SYSENTER.
+ * Therefore it must preserve R12+
+ * (they are callee-saved registers in 64-bit C ABI).
+ *
+ * Starting in Linux 4.17 (and any kernel that
+ * backports the change), R8..11 are preserved.
+ * Historically (and probably unintentionally), they
+ * were clobbered or zeroed.
+ */
+ }
+ printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64);
+ err++;
+ } while (r64++, ++num < 16);
+
+ if (!err)
+ printf("[OK]\tR8..R15 did not leak kernel data\n");
+ return err;
+}
+
+int nfds;
+fd_set rfds;
+fd_set wfds;
+fd_set efds;
+struct timespec timeout;
+sigset_t sigmask;
+struct {
+ sigset_t *sp;
+ int sz;
+} sigmask_desc;
+
+void prep_args()
+{
+ nfds = 42;
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+ FD_ZERO(&efds);
+ FD_SET(0, &rfds);
+ FD_SET(1, &wfds);
+ FD_SET(2, &efds);
+ timeout.tv_sec = 0;
+ timeout.tv_nsec = 123;
+ sigemptyset(&sigmask);
+ sigaddset(&sigmask, SIGINT);
+ sigaddset(&sigmask, SIGUSR2);
+ sigaddset(&sigmask, SIGRTMAX);
+ sigmask_desc.sp = &sigmask;
+ sigmask_desc.sz = 8; /* bytes */
+}
+
+static void print_flags(const char *name, unsigned long r)
+{
+ static const char *bitarray[] = {
+ "\n" ,"c\n" ,/* Carry Flag */
+ "0 " ,"1 " ,/* Bit 1 - always on */
+ "" ,"p " ,/* Parity Flag */
+ "0 " ,"3? " ,
+ "" ,"a " ,/* Auxiliary carry Flag */
+ "0 " ,"5? " ,
+ "" ,"z " ,/* Zero Flag */
+ "" ,"s " ,/* Sign Flag */
+ "" ,"t " ,/* Trap Flag */
+ "" ,"i " ,/* Interrupt Flag */
+ "" ,"d " ,/* Direction Flag */
+ "" ,"o " ,/* Overflow Flag */
+ "0 " ,"1 " ,/* I/O Privilege Level (2 bits) */
+ "0" ,"1" ,/* I/O Privilege Level (2 bits) */
+ "" ,"n " ,/* Nested Task */
+ "0 " ,"15? ",
+ "" ,"r " ,/* Resume Flag */
+ "" ,"v " ,/* Virtual Mode */
+ "" ,"ac " ,/* Alignment Check/Access Control */
+ "" ,"vif ",/* Virtual Interrupt Flag */
+ "" ,"vip ",/* Virtual Interrupt Pending */
+ "" ,"id " ,/* CPUID detection */
+ NULL
+ };
+ const char **bitstr;
+ int bit;
+
+ printf("%s=%016lx ", name, r);
+ bitstr = bitarray + 42;
+ bit = 21;
+ if ((r >> 22) != 0)
+ printf("(extra bits are set) ");
+ do {
+ if (bitstr[(r >> bit) & 1][0])
+ fputs(bitstr[(r >> bit) & 1], stdout);
+ bitstr -= 2;
+ bit--;
+ } while (bit >= 0);
+}
+
+int run_syscall(void)
+{
+ long flags, bad_arg;
+
+ prep_args();
+
+ if (kernel_is_64bit)
+ call64_from_32(poison_regs64);
+ /*print_regs64();*/
+
+ asm("\n"
+ /* Try 6-arg syscall: pselect. It should return quickly */
+ " push %%ebp\n"
+ " mov $308, %%eax\n" /* PSELECT */
+ " mov nfds, %%ebx\n" /* ebx arg1 */
+ " mov $rfds, %%ecx\n" /* ecx arg2 */
+ " mov $wfds, %%edx\n" /* edx arg3 */
+ " mov $efds, %%esi\n" /* esi arg4 */
+ " mov $timeout, %%edi\n" /* edi arg5 */
+ " mov $sigmask_desc, %%ebp\n" /* %ebp arg6 */
+ " push $0x200ed7\n" /* set almost all flags */
+ " popf\n" /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */
+ " call *syscall_addr\n"
+ /* Check that registers are not clobbered */
+ " pushf\n"
+ " pop %%eax\n"
+ " cld\n"
+ " cmp nfds, %%ebx\n" /* ebx arg1 */
+ " mov $1, %%ebx\n"
+ " jne 1f\n"
+ " cmp $rfds, %%ecx\n" /* ecx arg2 */
+ " mov $2, %%ebx\n"
+ " jne 1f\n"
+ " cmp $wfds, %%edx\n" /* edx arg3 */
+ " mov $3, %%ebx\n"
+ " jne 1f\n"
+ " cmp $efds, %%esi\n" /* esi arg4 */
+ " mov $4, %%ebx\n"
+ " jne 1f\n"
+ " cmp $timeout, %%edi\n" /* edi arg5 */
+ " mov $5, %%ebx\n"
+ " jne 1f\n"
+ " cmpl $sigmask_desc, %%ebp\n" /* %ebp arg6 */
+ " mov $6, %%ebx\n"
+ " jne 1f\n"
+ " mov $0, %%ebx\n"
+ "1:\n"
+ " pop %%ebp\n"
+ : "=a" (flags), "=b" (bad_arg)
+ :
+ : "cx", "dx", "si", "di"
+ );
+
+ if (kernel_is_64bit) {
+ memset(&regs64, 0x77, sizeof(regs64));
+ call64_from_32(get_regs64);
+ /*print_regs64();*/
+ }
+
+ /*
+ * On paravirt kernels, flags are not preserved across syscalls.
+ * Thus, we do not consider it a bug if some are changed.
+ * We just show ones which do.
+ */
+ if ((0x200ed7 ^ flags) != 0) {
+ print_flags("[WARN]\tFlags before", 0x200ed7);
+ print_flags("[WARN]\tFlags after", flags);
+ print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags));
+ }
+
+ if (bad_arg) {
+ printf("[FAIL]\targ#%ld clobbered\n", bad_arg);
+ return 1;
+ }
+ printf("[OK]\tArguments are preserved across syscall\n");
+
+ return check_regs64();
+}
+
+int run_syscall_twice()
+{
+ int exitcode = 0;
+ long sv;
+
+ if (syscall_addr) {
+ printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n");
+ exitcode = run_syscall();
+ }
+ sv = syscall_addr;
+ syscall_addr = (long)&int80;
+ printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n");
+ exitcode += run_syscall();
+ syscall_addr = sv;
+ return exitcode;
+}
+
+void ptrace_me()
+{
+ pid_t pid;
+
+ fflush(NULL);
+ pid = fork();
+ if (pid < 0)
+ exit(1);
+ if (pid == 0) {
+ /* child */
+ if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0)
+ exit(0);
+ raise(SIGSTOP);
+ return;
+ }
+ /* parent */
+ printf("[RUN]\tRunning tests under ptrace\n");
+ while (1) {
+ int status;
+ pid = waitpid(-1, &status, __WALL);
+ if (WIFEXITED(status))
+ exit(WEXITSTATUS(status));
+ if (WIFSIGNALED(status))
+ exit(WTERMSIG(status));
+ if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */
+ exit(255);
+ /*
+ * Note: we do not inject sig = WSTOPSIG(status).
+ * We probably should, but careful: do not inject SIGTRAP
+ * generated by syscall entry/exit stops.
+ * That kills the child.
+ */
+ ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/);
+ }
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int exitcode = 0;
+ int cs;
+
+ asm("\n"
+ " movl %%cs, %%eax\n"
+ : "=a" (cs)
+ );
+ kernel_is_64bit = (cs == 0x23);
+ if (!kernel_is_64bit)
+ printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n");
+
+ /* This only works for non-static builds:
+ * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall");
+ */
+ syscall_addr = get_syscall(envp);
+
+ exitcode += run_syscall_twice();
+ ptrace_me();
+ exitcode += run_syscall_twice();
+
+ return exitcode;
+}
+#endif
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
new file mode 100644
index 000000000..35edd61d1
--- /dev/null
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -0,0 +1,337 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ldt_gdt.c - Test cases for LDT and GDT access
+ * Copyright (c) 2011-2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <limits.h>
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+# define SYS_getcpu 309
+# else
+# define SYS_getcpu 318
+# endif
+#endif
+
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
+int nerrs = 0;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+
+vgettime_t vdso_clock_gettime;
+
+typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
+
+vgtod_t vdso_gettimeofday;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+
+getcpu_t vgetcpu;
+getcpu_t vdso_getcpu;
+
+static void *vsyscall_getcpu(void)
+{
+#ifdef __x86_64__
+ FILE *maps;
+ char line[MAPS_LINE_LEN];
+ bool found = false;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) /* might still be present, but ignore it here, as we test vDSO not vsyscall */
+ return NULL;
+
+ while (fgets(line, MAPS_LINE_LEN, maps)) {
+ char r, x;
+ void *start, *end;
+ char name[MAPS_LINE_LEN];
+
+ /* sscanf() is safe here as strlen(name) >= strlen(line) */
+ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+ &start, &end, &r, &x, name) != 5)
+ continue;
+
+ if (strcmp(name, "[vsyscall]"))
+ continue;
+
+ /* assume entries are OK, as we test vDSO here not vsyscall */
+ found = true;
+ break;
+ }
+
+ fclose(maps);
+
+ if (!found) {
+ printf("Warning: failed to find vsyscall getcpu\n");
+ return NULL;
+ }
+ return (void *) (0xffffffffff600800);
+#else
+ return NULL;
+#endif
+}
+
+
+static void fill_function_pointers()
+{
+ void *vdso = dlopen("linux-vdso.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-gate.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso) {
+ printf("[WARN]\tfailed to find vDSO\n");
+ return;
+ }
+
+ vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+ if (!vdso_getcpu)
+ printf("Warning: failed to find getcpu in vDSO\n");
+
+ vgetcpu = (getcpu_t) vsyscall_getcpu();
+
+ vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+ if (!vdso_clock_gettime)
+ printf("Warning: failed to find clock_gettime in vDSO\n");
+
+ vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+ if (!vdso_gettimeofday)
+ printf("Warning: failed to find gettimeofday in vDSO\n");
+
+}
+
+static long sys_getcpu(unsigned * cpu, unsigned * node,
+ void* cache)
+{
+ return syscall(__NR_getcpu, cpu, node, cache);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ return syscall(__NR_clock_gettime, id, ts);
+}
+
+static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
+{
+ return syscall(__NR_gettimeofday, tv, tz);
+}
+
+static void test_getcpu(void)
+{
+ printf("[RUN]\tTesting getcpu...\n");
+
+ for (int cpu = 0; ; cpu++) {
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ return;
+
+ unsigned cpu_sys, cpu_vdso, cpu_vsys,
+ node_sys, node_vdso, node_vsys;
+ long ret_sys, ret_vdso = 1, ret_vsys = 1;
+ unsigned node;
+
+ ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+ if (vdso_getcpu)
+ ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+ if (vgetcpu)
+ ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+ if (!ret_sys)
+ node = node_sys;
+ else if (!ret_vdso)
+ node = node_vdso;
+ else if (!ret_vsys)
+ node = node_vsys;
+
+ bool ok = true;
+ if (!ret_sys && (cpu_sys != cpu || node_sys != node))
+ ok = false;
+ if (!ret_vdso && (cpu_vdso != cpu || node_vdso != node))
+ ok = false;
+ if (!ret_vsys && (cpu_vsys != cpu || node_vsys != node))
+ ok = false;
+
+ printf("[%s]\tCPU %u:", ok ? "OK" : "FAIL", cpu);
+ if (!ret_sys)
+ printf(" syscall: cpu %u, node %u", cpu_sys, node_sys);
+ if (!ret_vdso)
+ printf(" vdso: cpu %u, node %u", cpu_vdso, node_vdso);
+ if (!ret_vsys)
+ printf(" vsyscall: cpu %u, node %u", cpu_vsys,
+ node_vsys);
+ printf("\n");
+
+ if (!ok)
+ nerrs++;
+ }
+}
+
+static bool ts_leq(const struct timespec *a, const struct timespec *b)
+{
+ if (a->tv_sec != b->tv_sec)
+ return a->tv_sec < b->tv_sec;
+ else
+ return a->tv_nsec <= b->tv_nsec;
+}
+
+static bool tv_leq(const struct timeval *a, const struct timeval *b)
+{
+ if (a->tv_sec != b->tv_sec)
+ return a->tv_sec < b->tv_sec;
+ else
+ return a->tv_usec <= b->tv_usec;
+}
+
+static char const * const clocknames[] = {
+ [0] = "CLOCK_REALTIME",
+ [1] = "CLOCK_MONOTONIC",
+ [2] = "CLOCK_PROCESS_CPUTIME_ID",
+ [3] = "CLOCK_THREAD_CPUTIME_ID",
+ [4] = "CLOCK_MONOTONIC_RAW",
+ [5] = "CLOCK_REALTIME_COARSE",
+ [6] = "CLOCK_MONOTONIC_COARSE",
+ [7] = "CLOCK_BOOTTIME",
+ [8] = "CLOCK_REALTIME_ALARM",
+ [9] = "CLOCK_BOOTTIME_ALARM",
+ [10] = "CLOCK_SGI_CYCLE",
+ [11] = "CLOCK_TAI",
+};
+
+static void test_one_clock_gettime(int clock, const char *name)
+{
+ struct timespec start, vdso, end;
+ int vdso_ret, end_ret;
+
+ printf("[RUN]\tTesting clock_gettime for clock %s (%d)...\n", name, clock);
+
+ if (sys_clock_gettime(clock, &start) < 0) {
+ if (errno == EINVAL) {
+ vdso_ret = vdso_clock_gettime(clock, &vdso);
+ if (vdso_ret == -EINVAL) {
+ printf("[OK]\tNo such clock.\n");
+ } else {
+ printf("[FAIL]\tNo such clock, but __vdso_clock_gettime returned %d\n", vdso_ret);
+ nerrs++;
+ }
+ } else {
+ printf("[WARN]\t clock_gettime(%d) syscall returned error %d\n", clock, errno);
+ }
+ return;
+ }
+
+ vdso_ret = vdso_clock_gettime(clock, &vdso);
+ end_ret = sys_clock_gettime(clock, &end);
+
+ if (vdso_ret != 0 || end_ret != 0) {
+ printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+ vdso_ret, errno);
+ nerrs++;
+ return;
+ }
+
+ printf("\t%llu.%09ld %llu.%09ld %llu.%09ld\n",
+ (unsigned long long)start.tv_sec, start.tv_nsec,
+ (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+ (unsigned long long)end.tv_sec, end.tv_nsec);
+
+ if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
+ printf("[FAIL]\tTimes are out of sequence\n");
+ nerrs++;
+ }
+}
+
+static void test_clock_gettime(void)
+{
+ for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
+ clock++) {
+ test_one_clock_gettime(clock, clocknames[clock]);
+ }
+
+ /* Also test some invalid clock ids */
+ test_one_clock_gettime(-1, "invalid");
+ test_one_clock_gettime(INT_MIN, "invalid");
+ test_one_clock_gettime(INT_MAX, "invalid");
+}
+
+static void test_gettimeofday(void)
+{
+ struct timeval start, vdso, end;
+ struct timezone sys_tz, vdso_tz;
+ int vdso_ret, end_ret;
+
+ if (!vdso_gettimeofday)
+ return;
+
+ printf("[RUN]\tTesting gettimeofday...\n");
+
+ if (sys_gettimeofday(&start, &sys_tz) < 0) {
+ printf("[FAIL]\tsys_gettimeofday failed (%d)\n", errno);
+ nerrs++;
+ return;
+ }
+
+ vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz);
+ end_ret = sys_gettimeofday(&end, NULL);
+
+ if (vdso_ret != 0 || end_ret != 0) {
+ printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+ vdso_ret, errno);
+ nerrs++;
+ return;
+ }
+
+ printf("\t%llu.%06ld %llu.%06ld %llu.%06ld\n",
+ (unsigned long long)start.tv_sec, start.tv_usec,
+ (unsigned long long)vdso.tv_sec, vdso.tv_usec,
+ (unsigned long long)end.tv_sec, end.tv_usec);
+
+ if (!tv_leq(&start, &vdso) || !tv_leq(&vdso, &end)) {
+ printf("[FAIL]\tTimes are out of sequence\n");
+ nerrs++;
+ }
+
+ if (sys_tz.tz_minuteswest == vdso_tz.tz_minuteswest &&
+ sys_tz.tz_dsttime == vdso_tz.tz_dsttime) {
+ printf("[OK]\ttimezones match: minuteswest=%d, dsttime=%d\n",
+ sys_tz.tz_minuteswest, sys_tz.tz_dsttime);
+ } else {
+ printf("[FAIL]\ttimezones do not match\n");
+ nerrs++;
+ }
+
+ /* And make sure that passing NULL for tz doesn't crash. */
+ vdso_gettimeofday(&vdso, NULL);
+}
+
+int main(int argc, char **argv)
+{
+ fill_function_pointers();
+
+ test_clock_gettime();
+ test_gettimeofday();
+
+ /*
+ * Test getcpu() last so that, if something goes wrong setting affinity,
+ * we still run the other tests.
+ */
+ test_getcpu();
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
new file mode 100644
index 000000000..0b4f1cc22
--- /dev/null
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -0,0 +1,506 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <time.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <dlfcn.h>
+#include <string.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <errno.h>
+#include <err.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <setjmp.h>
+
+#ifdef __x86_64__
+# define VSYS(x) (x)
+#else
+# define VSYS(x) 0
+#endif
+
+#ifndef SYS_getcpu
+# ifdef __x86_64__
+# define SYS_getcpu 309
+# else
+# define SYS_getcpu 318
+# endif
+#endif
+
+/* max length of lines in /proc/self/maps - anything longer is skipped here */
+#define MAPS_LINE_LEN 128
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+/* vsyscalls and vDSO */
+bool should_read_vsyscall = false;
+
+typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
+gtod_t vgtod = (gtod_t)VSYS(0xffffffffff600000);
+gtod_t vdso_gtod;
+
+typedef int (*vgettime_t)(clockid_t, struct timespec *);
+vgettime_t vdso_gettime;
+
+typedef long (*time_func_t)(time_t *t);
+time_func_t vtime = (time_func_t)VSYS(0xffffffffff600400);
+time_func_t vdso_time;
+
+typedef long (*getcpu_t)(unsigned *, unsigned *, void *);
+getcpu_t vgetcpu = (getcpu_t)VSYS(0xffffffffff600800);
+getcpu_t vdso_getcpu;
+
+static void init_vdso(void)
+{
+ void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso) {
+ printf("[WARN]\tfailed to find vDSO\n");
+ return;
+ }
+
+ vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
+ if (!vdso_gtod)
+ printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
+
+ vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+ if (!vdso_gettime)
+ printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
+
+ vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
+ if (!vdso_time)
+ printf("[WARN]\tfailed to find time in vDSO\n");
+
+ vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+ if (!vdso_getcpu) {
+ /* getcpu() was never wired up in the 32-bit vDSO. */
+ printf("[%s]\tfailed to find getcpu in vDSO\n",
+ sizeof(long) == 8 ? "WARN" : "NOTE");
+ }
+}
+
+static int init_vsys(void)
+{
+#ifdef __x86_64__
+ int nerrs = 0;
+ FILE *maps;
+ char line[MAPS_LINE_LEN];
+ bool found = false;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) {
+ printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
+ should_read_vsyscall = true;
+ return 0;
+ }
+
+ while (fgets(line, MAPS_LINE_LEN, maps)) {
+ char r, x;
+ void *start, *end;
+ char name[MAPS_LINE_LEN];
+
+ /* sscanf() is safe here as strlen(name) >= strlen(line) */
+ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+ &start, &end, &r, &x, name) != 5)
+ continue;
+
+ if (strcmp(name, "[vsyscall]"))
+ continue;
+
+ printf("\tvsyscall map: %s", line);
+
+ if (start != (void *)0xffffffffff600000 ||
+ end != (void *)0xffffffffff601000) {
+ printf("[FAIL]\taddress range is nonsense\n");
+ nerrs++;
+ }
+
+ printf("\tvsyscall permissions are %c-%c\n", r, x);
+ should_read_vsyscall = (r == 'r');
+ if (x != 'x') {
+ vgtod = NULL;
+ vtime = NULL;
+ vgetcpu = NULL;
+ }
+
+ found = true;
+ break;
+ }
+
+ fclose(maps);
+
+ if (!found) {
+ printf("\tno vsyscall map in /proc/self/maps\n");
+ should_read_vsyscall = false;
+ vgtod = NULL;
+ vtime = NULL;
+ vgetcpu = NULL;
+ }
+
+ return nerrs;
+#else
+ return 0;
+#endif
+}
+
+/* syscalls */
+static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
+{
+ return syscall(SYS_gettimeofday, tv, tz);
+}
+
+static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
+{
+ return syscall(SYS_clock_gettime, id, ts);
+}
+
+static inline long sys_time(time_t *t)
+{
+ return syscall(SYS_time, t);
+}
+
+static inline long sys_getcpu(unsigned * cpu, unsigned * node,
+ void* cache)
+{
+ return syscall(SYS_getcpu, cpu, node, cache);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+static double tv_diff(const struct timeval *a, const struct timeval *b)
+{
+ return (double)(a->tv_sec - b->tv_sec) +
+ (double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
+}
+
+static int check_gtod(const struct timeval *tv_sys1,
+ const struct timeval *tv_sys2,
+ const struct timezone *tz_sys,
+ const char *which,
+ const struct timeval *tv_other,
+ const struct timezone *tz_other)
+{
+ int nerrs = 0;
+ double d1, d2;
+
+ if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
+ printf("[FAIL] %s tz mismatch\n", which);
+ nerrs++;
+ }
+
+ d1 = tv_diff(tv_other, tv_sys1);
+ d2 = tv_diff(tv_sys2, tv_other);
+ printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
+
+ if (d1 < 0 || d2 < 0) {
+ printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
+ nerrs++;
+ } else {
+ printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
+ }
+
+ return nerrs;
+}
+
+static int test_gtod(void)
+{
+ struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
+ struct timezone tz_sys, tz_vdso, tz_vsys;
+ long ret_vdso = -1;
+ long ret_vsys = -1;
+ int nerrs = 0;
+
+ printf("[RUN]\ttest gettimeofday()\n");
+
+ if (sys_gtod(&tv_sys1, &tz_sys) != 0)
+ err(1, "syscall gettimeofday");
+ if (vdso_gtod)
+ ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
+ if (vgtod)
+ ret_vsys = vgtod(&tv_vsys, &tz_vsys);
+ if (sys_gtod(&tv_sys2, &tz_sys) != 0)
+ err(1, "syscall gettimeofday");
+
+ if (vdso_gtod) {
+ if (ret_vdso == 0) {
+ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
+ } else {
+ printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
+ nerrs++;
+ }
+ }
+
+ if (vgtod) {
+ if (ret_vsys == 0) {
+ nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
+ } else {
+ printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
+ nerrs++;
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_time(void) {
+ int nerrs = 0;
+
+ printf("[RUN]\ttest time()\n");
+ long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
+ long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
+ t_sys1 = sys_time(&t2_sys1);
+ if (vdso_time)
+ t_vdso = vdso_time(&t2_vdso);
+ if (vtime)
+ t_vsys = vtime(&t2_vsys);
+ t_sys2 = sys_time(&t2_sys2);
+ if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
+ printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
+ nerrs++;
+ return nerrs;
+ }
+
+ if (vdso_time) {
+ if (t_vdso < 0 || t_vdso != t2_vdso) {
+ printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
+ nerrs++;
+ } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
+ printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO time() is okay\n");
+ }
+ }
+
+ if (vtime) {
+ if (t_vsys < 0 || t_vsys != t2_vsys) {
+ printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
+ nerrs++;
+ } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
+ printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall time() is okay\n");
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_getcpu(int cpu)
+{
+ int nerrs = 0;
+ long ret_sys, ret_vdso = -1, ret_vsys = -1;
+
+ printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
+
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
+ printf("[SKIP]\tfailed to force CPU %d\n", cpu);
+ return nerrs;
+ }
+
+ unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
+ unsigned node = 0;
+ bool have_node = false;
+ ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
+ if (vdso_getcpu)
+ ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
+ if (vgetcpu)
+ ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
+
+ if (ret_sys == 0) {
+ if (cpu_sys != cpu) {
+ printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
+ nerrs++;
+ }
+
+ have_node = true;
+ node = node_sys;
+ }
+
+ if (vdso_getcpu) {
+ if (ret_vdso) {
+ printf("[FAIL]\tvDSO getcpu() failed\n");
+ nerrs++;
+ } else {
+ if (!have_node) {
+ have_node = true;
+ node = node_vdso;
+ }
+
+ if (cpu_vdso != cpu) {
+ printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO reported correct CPU\n");
+ }
+
+ if (node_vdso != node) {
+ printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
+ nerrs++;
+ } else {
+ printf("[OK]\tvDSO reported correct node\n");
+ }
+ }
+ }
+
+ if (vgetcpu) {
+ if (ret_vsys) {
+ printf("[FAIL]\tvsyscall getcpu() failed\n");
+ nerrs++;
+ } else {
+ if (!have_node) {
+ have_node = true;
+ node = node_vsys;
+ }
+
+ if (cpu_vsys != cpu) {
+ printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall reported correct CPU\n");
+ }
+
+ if (node_vsys != node) {
+ printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
+ nerrs++;
+ } else {
+ printf("[OK]\tvsyscall reported correct node\n");
+ }
+ }
+ }
+
+ return nerrs;
+}
+
+static int test_vsys_r(void)
+{
+#ifdef __x86_64__
+ printf("[RUN]\tChecking read access to the vsyscall page\n");
+ bool can_read;
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ *(volatile int *)0xffffffffff600000;
+ can_read = true;
+ } else {
+ can_read = false;
+ }
+
+ if (can_read && !should_read_vsyscall) {
+ printf("[FAIL]\tWe have read access, but we shouldn't\n");
+ return 1;
+ } else if (!can_read && should_read_vsyscall) {
+ printf("[FAIL]\tWe don't have read access, but we should\n");
+ return 1;
+ } else {
+ printf("[OK]\tgot expected result\n");
+ }
+#endif
+
+ return 0;
+}
+
+
+#ifdef __x86_64__
+#define X86_EFLAGS_TF (1UL << 8)
+static volatile sig_atomic_t num_vsyscall_traps;
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ unsigned long ip = ctx->uc_mcontext.gregs[REG_RIP];
+
+ if (((ip ^ 0xffffffffff600000UL) & ~0xfffUL) == 0)
+ num_vsyscall_traps++;
+}
+
+static int test_emulation(void)
+{
+ time_t tmp;
+ bool is_native;
+
+ if (!vtime)
+ return 0;
+
+ printf("[RUN]\tchecking that vsyscalls are emulated\n");
+ sethandler(SIGTRAP, sigtrap, 0);
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ vtime(&tmp);
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+ /*
+ * If vsyscalls are emulated, we expect a single trap in the
+ * vsyscall page -- the call instruction will trap with RIP
+ * pointing to the entry point before emulation takes over.
+ * In native mode, we expect two traps, since whatever code
+ * the vsyscall page contains will be more than just a ret
+ * instruction.
+ */
+ is_native = (num_vsyscall_traps > 1);
+
+ printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n",
+ (is_native ? "FAIL" : "OK"),
+ (is_native ? "native" : "emulated"),
+ (int)num_vsyscall_traps);
+
+ return is_native;
+}
+#endif
+
+int main(int argc, char **argv)
+{
+ int nerrs = 0;
+
+ init_vdso();
+ nerrs += init_vsys();
+
+ nerrs += test_gtod();
+ nerrs += test_time();
+ nerrs += test_getcpu(0);
+ nerrs += test_getcpu(1);
+
+ sethandler(SIGSEGV, sigsegv, 0);
+ nerrs += test_vsys_r();
+
+#ifdef __x86_64__
+ nerrs += test_emulation();
+#endif
+
+ return nerrs ? 1 : 0;
+}
diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S
new file mode 100644
index 000000000..ce8a995bb
--- /dev/null
+++ b/tools/testing/selftests/x86/thunks.S
@@ -0,0 +1,67 @@
+/*
+ * thunks.S - assembly helpers for mixed-bitness code
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * These are little helpers that make it easier to switch bitness on
+ * the fly.
+ */
+
+ .text
+
+ .global call32_from_64
+ .type call32_from_64, @function
+call32_from_64:
+ // rdi: stack to use
+ // esi: function to call
+
+ // Save registers
+ pushq %rbx
+ pushq %rbp
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushfq
+
+ // Switch stacks
+ mov %rsp,(%rdi)
+ mov %rdi,%rsp
+
+ // Switch to compatibility mode
+ pushq $0x23 /* USER32_CS */
+ pushq $1f
+ lretq
+
+1:
+ .code32
+ // Call the function
+ call *%esi
+ // Switch back to long mode
+ jmp $0x33,$1f
+ .code64
+
+1:
+ // Restore the stack
+ mov (%rsp),%rsp
+
+ // Restore registers
+ popfq
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ popq %rbp
+ popq %rbx
+
+ ret
+
+.size call32_from_64, .-call32_from_64
diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S
new file mode 100644
index 000000000..29b644bb9
--- /dev/null
+++ b/tools/testing/selftests/x86/thunks_32.S
@@ -0,0 +1,55 @@
+/*
+ * thunks_32.S - assembly helpers for mixed-bitness code
+ * Copyright (c) 2015 Denys Vlasenko
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * These are little helpers that make it easier to switch bitness on
+ * the fly.
+ */
+
+ .text
+ .code32
+
+ .global call64_from_32
+ .type call32_from_64, @function
+
+ // 4(%esp): function to call
+call64_from_32:
+ // Fetch function address
+ mov 4(%esp), %eax
+
+ // Save registers which are callee-clobbered by 64-bit ABI
+ push %ecx
+ push %edx
+ push %esi
+ push %edi
+
+ // Switch to long mode
+ jmp $0x33,$1f
+1: .code64
+
+ // Call the function
+ call *%rax
+
+ // Switch to compatibility mode
+ push $0x23 /* USER32_CS */
+ .code32; push $1f; .code64 /* hack: can't have X86_64_32S relocation in 32-bit ELF */
+ lretq
+1: .code32
+
+ pop %edi
+ pop %esi
+ pop %edx
+ pop %ecx
+
+ ret
+
+.size call64_from_32, .-call64_from_32
diff --git a/tools/testing/selftests/x86/trivial_32bit_program.c b/tools/testing/selftests/x86/trivial_32bit_program.c
new file mode 100644
index 000000000..fabdf0f51
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_32bit_program.c
@@ -0,0 +1,18 @@
+/*
+ * Trivial program to check that we have a valid 32-bit build environment.
+ * Copyright (c) 2015 Andy Lutomirski
+ * GPL v2
+ */
+
+#ifndef __i386__
+# error wrong architecture
+#endif
+
+#include <stdio.h>
+
+int main()
+{
+ printf("\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/trivial_64bit_program.c b/tools/testing/selftests/x86/trivial_64bit_program.c
new file mode 100644
index 000000000..05c6a41b3
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_64bit_program.c
@@ -0,0 +1,18 @@
+/*
+ * Trivial program to check that we have a valid 64-bit build environment.
+ * Copyright (c) 2015 Andy Lutomirski
+ * GPL v2
+ */
+
+#ifndef __x86_64__
+# error wrong architecture
+#endif
+
+#include <stdio.h>
+
+int main()
+{
+ printf("\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/trivial_program.c b/tools/testing/selftests/x86/trivial_program.c
new file mode 100644
index 000000000..46a447163
--- /dev/null
+++ b/tools/testing/selftests/x86/trivial_program.c
@@ -0,0 +1,10 @@
+/* Trivial program to check that compilation with certain flags is working. */
+
+#include <stdio.h>
+
+int
+main(void)
+{
+ puts("");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c
new file mode 100644
index 000000000..00a26a82f
--- /dev/null
+++ b/tools/testing/selftests/x86/unwind_vdso.c
@@ -0,0 +1,211 @@
+/*
+ * unwind_vdso.c - tests unwind info for AT_SYSINFO in the vDSO
+ * Copyright (c) 2014-2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This tests __kernel_vsyscall's unwind info.
+ */
+
+#define _GNU_SOURCE
+
+#include <features.h>
+#include <stdio.h>
+
+#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16
+
+int main()
+{
+ /* We need getauxval(). */
+ printf("[SKIP]\tGLIBC before 2.16 cannot compile this test\n");
+ return 0;
+}
+
+#else
+
+#include <sys/time.h>
+#include <stdlib.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/mman.h>
+#include <signal.h>
+#include <sys/ucontext.h>
+#include <err.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <sys/ucontext.h>
+#include <link.h>
+#include <sys/auxv.h>
+#include <dlfcn.h>
+#include <unwind.h>
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+#ifdef __x86_64__
+# define WIDTH "q"
+#else
+# define WIDTH "l"
+#endif
+
+static unsigned long get_eflags(void)
+{
+ unsigned long eflags;
+ asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
+ return eflags;
+}
+
+static void set_eflags(unsigned long eflags)
+{
+ asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
+ : : "rm" (eflags) : "flags");
+}
+
+#define X86_EFLAGS_TF (1UL << 8)
+
+static volatile sig_atomic_t nerrs;
+static unsigned long sysinfo;
+static bool got_sysinfo = false;
+static unsigned long return_address;
+
+struct unwind_state {
+ unsigned long ip; /* trap source */
+ int depth; /* -1 until we hit the trap source */
+};
+
+_Unwind_Reason_Code trace_fn(struct _Unwind_Context * ctx, void *opaque)
+{
+ struct unwind_state *state = opaque;
+ unsigned long ip = _Unwind_GetIP(ctx);
+
+ if (state->depth == -1) {
+ if (ip == state->ip)
+ state->depth = 0;
+ else
+ return _URC_NO_REASON; /* Not there yet */
+ }
+ printf("\t 0x%lx\n", ip);
+
+ if (ip == return_address) {
+ /* Here we are. */
+ unsigned long eax = _Unwind_GetGR(ctx, 0);
+ unsigned long ecx = _Unwind_GetGR(ctx, 1);
+ unsigned long edx = _Unwind_GetGR(ctx, 2);
+ unsigned long ebx = _Unwind_GetGR(ctx, 3);
+ unsigned long ebp = _Unwind_GetGR(ctx, 5);
+ unsigned long esi = _Unwind_GetGR(ctx, 6);
+ unsigned long edi = _Unwind_GetGR(ctx, 7);
+ bool ok = (eax == SYS_getpid || eax == getpid()) &&
+ ebx == 1 && ecx == 2 && edx == 3 &&
+ esi == 4 && edi == 5 && ebp == 6;
+
+ if (!ok)
+ nerrs++;
+ printf("[%s]\t NR = %ld, args = %ld, %ld, %ld, %ld, %ld, %ld\n",
+ (ok ? "OK" : "FAIL"),
+ eax, ebx, ecx, edx, esi, edi, ebp);
+
+ return _URC_NORMAL_STOP;
+ } else {
+ state->depth++;
+ return _URC_NO_REASON;
+ }
+}
+
+static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ struct unwind_state state;
+ unsigned long ip = ctx->uc_mcontext.gregs[REG_EIP];
+
+ if (!got_sysinfo && ip == sysinfo) {
+ got_sysinfo = true;
+
+ /* Find the return address. */
+ return_address = *(unsigned long *)(unsigned long)ctx->uc_mcontext.gregs[REG_ESP];
+
+ printf("\tIn vsyscall at 0x%lx, returning to 0x%lx\n",
+ ip, return_address);
+ }
+
+ if (!got_sysinfo)
+ return; /* Not there yet */
+
+ if (ip == return_address) {
+ ctx->uc_mcontext.gregs[REG_EFL] &= ~X86_EFLAGS_TF;
+ printf("\tVsyscall is done\n");
+ return;
+ }
+
+ printf("\tSIGTRAP at 0x%lx\n", ip);
+
+ state.ip = ip;
+ state.depth = -1;
+ _Unwind_Backtrace(trace_fn, &state);
+}
+
+int main()
+{
+ sysinfo = getauxval(AT_SYSINFO);
+ printf("\tAT_SYSINFO is 0x%lx\n", sysinfo);
+
+ Dl_info info;
+ if (!dladdr((void *)sysinfo, &info)) {
+ printf("[WARN]\tdladdr failed on AT_SYSINFO\n");
+ } else {
+ printf("[OK]\tAT_SYSINFO maps to %s, loaded at 0x%p\n",
+ info.dli_fname, info.dli_fbase);
+ }
+
+ sethandler(SIGTRAP, sigtrap, 0);
+
+ syscall(SYS_getpid); /* Force symbol binding without TF set. */
+ printf("[RUN]\tSet TF and check a fast syscall\n");
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ syscall(SYS_getpid, 1, 2, 3, 4, 5, 6);
+ if (!got_sysinfo) {
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+
+ /*
+ * The most likely cause of this is that you're on Debian or
+ * a Debian-based distro, you're missing libc6-i686, and you're
+ * affected by libc/19006 (https://sourceware.org/PR19006).
+ */
+ printf("[WARN]\tsyscall(2) didn't enter AT_SYSINFO\n");
+ }
+
+ if (get_eflags() & X86_EFLAGS_TF) {
+ printf("[FAIL]\tTF is still set\n");
+ nerrs++;
+ }
+
+ if (nerrs) {
+ printf("[FAIL]\tThere were errors\n");
+ return 1;
+ } else {
+ printf("[OK]\tAll is well\n");
+ return 0;
+ }
+}
+
+#endif /* New enough libc */
diff --git a/tools/testing/selftests/x86/vdso_restorer.c b/tools/testing/selftests/x86/vdso_restorer.c
new file mode 100644
index 000000000..cb038424a
--- /dev/null
+++ b/tools/testing/selftests/x86/vdso_restorer.c
@@ -0,0 +1,88 @@
+/*
+ * vdso_restorer.c - tests vDSO-based signal restore
+ * Copyright (c) 2015 Andrew Lutomirski
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * This makes sure that sa_restorer == NULL keeps working on 32-bit
+ * configurations. Modern glibc doesn't use it under any circumstances,
+ * so it's easy to overlook breakage.
+ *
+ * 64-bit userspace has never supported sa_restorer == NULL, so this is
+ * 32-bit only.
+ */
+
+#define _GNU_SOURCE
+
+#include <err.h>
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <syscall.h>
+#include <sys/syscall.h>
+
+/* Open-code this -- the headers are too messy to easily use them. */
+struct real_sigaction {
+ void *handler;
+ unsigned long flags;
+ void *restorer;
+ unsigned int mask[2];
+};
+
+static volatile sig_atomic_t handler_called;
+
+static void handler_with_siginfo(int sig, siginfo_t *info, void *ctx_void)
+{
+ handler_called = 1;
+}
+
+static void handler_without_siginfo(int sig)
+{
+ handler_called = 1;
+}
+
+int main()
+{
+ int nerrs = 0;
+ struct real_sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.handler = handler_with_siginfo;
+ sa.flags = SA_SIGINFO;
+ sa.restorer = NULL; /* request kernel-provided restorer */
+
+ if (syscall(SYS_rt_sigaction, SIGUSR1, &sa, NULL, 8) != 0)
+ err(1, "raw rt_sigaction syscall");
+
+ raise(SIGUSR1);
+
+ if (handler_called) {
+ printf("[OK]\tSA_SIGINFO handler returned successfully\n");
+ } else {
+ printf("[FAIL]\tSA_SIGINFO handler was not called\n");
+ nerrs++;
+ }
+
+ sa.flags = 0;
+ sa.handler = handler_without_siginfo;
+ if (syscall(SYS_sigaction, SIGUSR1, &sa, 0) != 0)
+ err(1, "raw sigaction syscall");
+ handler_called = 0;
+
+ raise(SIGUSR1);
+
+ if (handler_called) {
+ printf("[OK]\t!SA_SIGINFO handler returned successfully\n");
+ } else {
+ printf("[FAIL]\t!SA_SIGINFO handler was not called\n");
+ nerrs++;
+ }
+}
diff --git a/tools/testing/selftests/zram/Makefile b/tools/testing/selftests/zram/Makefile
new file mode 100644
index 000000000..7f78eb1b5
--- /dev/null
+++ b/tools/testing/selftests/zram/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+TEST_PROGS := zram.sh
+TEST_FILES := zram01.sh zram02.sh zram_lib.sh
+EXTRA_CLEAN := err.log
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/zram/README b/tools/testing/selftests/zram/README
new file mode 100644
index 000000000..7972cc512
--- /dev/null
+++ b/tools/testing/selftests/zram/README
@@ -0,0 +1,40 @@
+zram: Compressed RAM based block devices
+----------------------------------------
+* Introduction
+
+The zram module creates RAM based block devices named /dev/zram<id>
+(<id> = 0, 1, ...). Pages written to these disks are compressed and stored
+in memory itself. These disks allow very fast I/O and compression provides
+good amounts of memory savings. Some of the usecases include /tmp storage,
+use as swap disks, various caches under /var and maybe many more :)
+
+Statistics for individual zram devices are exported through sysfs nodes at
+/sys/block/zram<id>/
+
+Kconfig required:
+CONFIG_ZRAM=y
+CONFIG_CRYPTO_LZ4=y
+CONFIG_ZPOOL=y
+CONFIG_ZSMALLOC=y
+
+ZRAM Testcases
+--------------
+zram_lib.sh: create library with initialization/cleanup functions
+zram.sh: For sanity check of CONFIG_ZRAM and to run zram01 and zram02
+
+Two functional tests: zram01 and zram02:
+zram01.sh: creates general purpose ram disks with ext4 filesystems
+zram02.sh: creates block device for swap
+
+Commands required for testing:
+ - bc
+ - dd
+ - free
+ - awk
+ - mkswap
+ - swapon
+ - swapoff
+ - mkfs/ mkfs.ext4
+
+For more information please refer:
+kernel-source-tree/Documentation/blockdev/zram.txt
diff --git a/tools/testing/selftests/zram/config b/tools/testing/selftests/zram/config
new file mode 100644
index 000000000..e0cc47e2c
--- /dev/null
+++ b/tools/testing/selftests/zram/config
@@ -0,0 +1,2 @@
+CONFIG_ZSMALLOC=y
+CONFIG_ZRAM=m
diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh
new file mode 100755
index 000000000..b0b91d9b0
--- /dev/null
+++ b/tools/testing/selftests/zram/zram.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+TCID="zram.sh"
+
+. ./zram_lib.sh
+
+run_zram () {
+echo "--------------------"
+echo "running zram tests"
+echo "--------------------"
+./zram01.sh
+echo ""
+./zram02.sh
+}
+
+check_prereqs
+
+run_zram
diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh
new file mode 100755
index 000000000..8abc99650
--- /dev/null
+++ b/tools/testing/selftests/zram/zram01.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Test creates several zram devices with different filesystems on them.
+# It fills each device with zeros and checks that compression works.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+TCID="zram01"
+ERR_CODE=0
+
+. ./zram_lib.sh
+
+# Test will create the following number of zram devices:
+dev_num=1
+# This is a list of parameters for zram devices.
+# Number of items must be equal to 'dev_num' parameter.
+zram_max_streams="2"
+
+# The zram sysfs node 'disksize' value can be either in bytes,
+# or you can use mem suffixes. But in some old kernels, mem
+# suffixes are not supported, for example, in RHEL6.6GA's kernel
+# layer, it uses strict_strtoull() to parse disksize which does
+# not support mem suffixes, in some newer kernels, they use
+# memparse() which supports mem suffixes. So here we just use
+# bytes to make sure everything works correctly.
+zram_sizes="2097152" # 2MB
+zram_mem_limits="2M"
+zram_filesystems="ext4"
+zram_algs="lzo"
+
+zram_fill_fs()
+{
+ for i in $(seq $dev_start $dev_end); do
+ echo "fill zram$i..."
+ local b=0
+ while [ true ]; do
+ dd conv=notrunc if=/dev/zero of=zram${i}/file \
+ oflag=append count=1 bs=1024 status=none \
+ > /dev/null 2>&1 || break
+ b=$(($b + 1))
+ done
+ echo "zram$i can be filled with '$b' KB"
+
+ local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"`
+ local v=$((100 * 1024 * $b / $mem_used_total))
+ if [ "$v" -lt 100 ]; then
+ echo "FAIL compression ratio: 0.$v:1"
+ ERR_CODE=-1
+ return
+ fi
+
+ echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK"
+ done
+}
+
+check_prereqs
+zram_load
+zram_max_streams
+zram_compress_alg
+zram_set_disksizes
+zram_set_memlimit
+zram_makefs
+zram_mount
+
+zram_fill_fs
+zram_cleanup
+
+if [ $ERR_CODE -ne 0 ]; then
+ echo "$TCID : [FAIL]"
+else
+ echo "$TCID : [PASS]"
+fi
diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh
new file mode 100755
index 000000000..3768cfd2e
--- /dev/null
+++ b/tools/testing/selftests/zram/zram02.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Test checks that we can create swap zram device.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+TCID="zram02"
+ERR_CODE=0
+
+. ./zram_lib.sh
+
+# Test will create the following number of zram devices:
+dev_num=1
+# This is a list of parameters for zram devices.
+# Number of items must be equal to 'dev_num' parameter.
+zram_max_streams="2"
+
+# The zram sysfs node 'disksize' value can be either in bytes,
+# or you can use mem suffixes. But in some old kernels, mem
+# suffixes are not supported, for example, in RHEL6.6GA's kernel
+# layer, it uses strict_strtoull() to parse disksize which does
+# not support mem suffixes, in some newer kernels, they use
+# memparse() which supports mem suffixes. So here we just use
+# bytes to make sure everything works correctly.
+zram_sizes="1048576" # 1M
+zram_mem_limits="1M"
+
+check_prereqs
+zram_load
+zram_max_streams
+zram_set_disksizes
+zram_set_memlimit
+zram_makeswap
+zram_swapoff
+zram_cleanup
+
+if [ $ERR_CODE -ne 0 ]; then
+ echo "$TCID : [FAIL]"
+else
+ echo "$TCID : [PASS]"
+fi
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh
new file mode 100755
index 000000000..130d193cb
--- /dev/null
+++ b/tools/testing/selftests/zram/zram_lib.sh
@@ -0,0 +1,278 @@
+#!/bin/sh
+# Copyright (c) 2015 Oracle and/or its affiliates. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
+# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
+
+dev_makeswap=-1
+dev_mounted=-1
+dev_start=0
+dev_end=-1
+module_load=-1
+sys_control=-1
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+kernel_version=`uname -r | cut -d'.' -f1,2`
+kernel_major=${kernel_version%.*}
+kernel_minor=${kernel_version#*.}
+
+trap INT
+
+check_prereqs()
+{
+ local msg="skip all tests:"
+ local uid=$(id -u)
+
+ if [ $uid -ne 0 ]; then
+ echo $msg must be run as root >&2
+ exit $ksft_skip
+ fi
+}
+
+kernel_gte()
+{
+ major=${1%.*}
+ minor=${1#*.}
+
+ if [ $kernel_major -gt $major ]; then
+ return 0
+ elif [[ $kernel_major -eq $major && $kernel_minor -ge $minor ]]; then
+ return 0
+ fi
+
+ return 1
+}
+
+zram_cleanup()
+{
+ echo "zram cleanup"
+ local i=
+ for i in $(seq $dev_start $dev_makeswap); do
+ swapoff /dev/zram$i
+ done
+
+ for i in $(seq $dev_start $dev_mounted); do
+ umount /dev/zram$i
+ done
+
+ for i in $(seq $dev_start $dev_end); do
+ echo 1 > /sys/block/zram${i}/reset
+ rm -rf zram$i
+ done
+
+ if [ $sys_control -eq 1 ]; then
+ for i in $(seq $dev_start $dev_end); do
+ echo $i > /sys/class/zram-control/hot_remove
+ done
+ fi
+
+ if [ $module_load -eq 1 ]; then
+ rmmod zram > /dev/null 2>&1
+ fi
+}
+
+zram_load()
+{
+ echo "create '$dev_num' zram device(s)"
+
+ # zram module loaded, new kernel
+ if [ -d "/sys/class/zram-control" ]; then
+ echo "zram modules already loaded, kernel supports" \
+ "zram-control interface"
+ dev_start=$(ls /dev/zram* | wc -w)
+ dev_end=$(($dev_start + $dev_num - 1))
+ sys_control=1
+
+ for i in $(seq $dev_start $dev_end); do
+ cat /sys/class/zram-control/hot_add > /dev/null
+ done
+
+ echo "all zram devices (/dev/zram$dev_start~$dev_end" \
+ "successfully created"
+ return 0
+ fi
+
+ # detect old kernel or built-in
+ modprobe zram num_devices=$dev_num
+ if [ ! -d "/sys/class/zram-control" ]; then
+ if grep -q '^zram' /proc/modules; then
+ rmmod zram > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ echo "zram module is being used on old kernel" \
+ "without zram-control interface"
+ exit $ksft_skip
+ fi
+ else
+ echo "test needs CONFIG_ZRAM=m on old kernel without" \
+ "zram-control interface"
+ exit $ksft_skip
+ fi
+ modprobe zram num_devices=$dev_num
+ fi
+
+ module_load=1
+ dev_end=$(($dev_num - 1))
+ echo "all zram devices (/dev/zram0~$dev_end) successfully created"
+}
+
+zram_max_streams()
+{
+ echo "set max_comp_streams to zram device(s)"
+
+ kernel_gte 4.7
+ if [ $? -eq 0 ]; then
+ echo "The device attribute max_comp_streams was"\
+ "deprecated in 4.7"
+ return 0
+ fi
+
+ local i=$dev_start
+ for max_s in $zram_max_streams; do
+ local sys_path="/sys/block/zram${i}/max_comp_streams"
+ echo $max_s > $sys_path || \
+ echo "FAIL failed to set '$max_s' to $sys_path"
+ sleep 1
+ local max_streams=$(cat $sys_path)
+
+ [ "$max_s" -ne "$max_streams" ] && \
+ echo "FAIL can't set max_streams '$max_s', get $max_stream"
+
+ i=$(($i + 1))
+ echo "$sys_path = '$max_streams'"
+ done
+
+ echo "zram max streams: OK"
+}
+
+zram_compress_alg()
+{
+ echo "test that we can set compression algorithm"
+
+ local i=$dev_start
+ local algs=$(cat /sys/block/zram${i}/comp_algorithm)
+ echo "supported algs: $algs"
+
+ for alg in $zram_algs; do
+ local sys_path="/sys/block/zram${i}/comp_algorithm"
+ echo "$alg" > $sys_path || \
+ echo "FAIL can't set '$alg' to $sys_path"
+ i=$(($i + 1))
+ echo "$sys_path = '$alg'"
+ done
+
+ echo "zram set compression algorithm: OK"
+}
+
+zram_set_disksizes()
+{
+ echo "set disk size to zram device(s)"
+ local i=$dev_start
+ for ds in $zram_sizes; do
+ local sys_path="/sys/block/zram${i}/disksize"
+ echo "$ds" > $sys_path || \
+ echo "FAIL can't set '$ds' to $sys_path"
+
+ i=$(($i + 1))
+ echo "$sys_path = '$ds'"
+ done
+
+ echo "zram set disksizes: OK"
+}
+
+zram_set_memlimit()
+{
+ echo "set memory limit to zram device(s)"
+
+ local i=$dev_start
+ for ds in $zram_mem_limits; do
+ local sys_path="/sys/block/zram${i}/mem_limit"
+ echo "$ds" > $sys_path || \
+ echo "FAIL can't set '$ds' to $sys_path"
+
+ i=$(($i + 1))
+ echo "$sys_path = '$ds'"
+ done
+
+ echo "zram set memory limit: OK"
+}
+
+zram_makeswap()
+{
+ echo "make swap with zram device(s)"
+ local i=$dev_start
+ for i in $(seq $dev_start $dev_end); do
+ mkswap /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+ echo "FAIL mkswap /dev/zram$1 failed"
+ fi
+
+ swapon /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+ echo "FAIL swapon /dev/zram$1 failed"
+ fi
+
+ echo "done with /dev/zram$i"
+ dev_makeswap=$i
+ done
+
+ echo "zram making zram mkswap and swapon: OK"
+}
+
+zram_swapoff()
+{
+ local i=
+ for i in $(seq $dev_start $dev_end); do
+ swapoff /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+ echo "FAIL swapoff /dev/zram$i failed"
+ fi
+ done
+ dev_makeswap=-1
+
+ echo "zram swapoff: OK"
+}
+
+zram_makefs()
+{
+ local i=$dev_start
+ for fs in $zram_filesystems; do
+ # if requested fs not supported default it to ext2
+ which mkfs.$fs > /dev/null 2>&1 || fs=ext2
+
+ echo "make $fs filesystem on /dev/zram$i"
+ mkfs.$fs /dev/zram$i > err.log 2>&1
+ if [ $? -ne 0 ]; then
+ cat err.log
+ echo "FAIL failed to make $fs on /dev/zram$i"
+ fi
+ i=$(($i + 1))
+ echo "zram mkfs.$fs: OK"
+ done
+}
+
+zram_mount()
+{
+ local i=0
+ for i in $(seq $dev_start $dev_end); do
+ echo "mount /dev/zram$i"
+ mkdir zram$i
+ mount /dev/zram$i zram$i > /dev/null || \
+ echo "FAIL mount /dev/zram$i failed"
+ dev_mounted=$i
+ done
+
+ echo "zram mount of zram device(s): OK"
+}
diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore
new file mode 100644
index 000000000..dc5f11faf
--- /dev/null
+++ b/tools/testing/vsock/.gitignore
@@ -0,0 +1,2 @@
+*.d
+vsock_diag_test
diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
new file mode 100644
index 000000000..66ba09241
--- /dev/null
+++ b/tools/testing/vsock/Makefile
@@ -0,0 +1,9 @@
+all: test
+test: vsock_diag_test
+vsock_diag_test: vsock_diag_test.o timeout.o control.o
+
+CFLAGS += -g -O2 -Werror -Wall -I. -I../../include/uapi -I../../include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE
+.PHONY: all test clean
+clean:
+ ${RM} *.o *.d vsock_diag_test
+-include *.d
diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README
new file mode 100644
index 000000000..2cc6d7302
--- /dev/null
+++ b/tools/testing/vsock/README
@@ -0,0 +1,36 @@
+AF_VSOCK test suite
+-------------------
+These tests exercise net/vmw_vsock/ host<->guest sockets for VMware, KVM, and
+Hyper-V.
+
+The following tests are available:
+
+ * vsock_diag_test - vsock_diag.ko module for listing open sockets
+
+The following prerequisite steps are not automated and must be performed prior
+to running tests:
+
+1. Build the kernel and these tests.
+2. Install the kernel and tests on the host.
+3. Install the kernel and tests inside the guest.
+4. Boot the guest and ensure that the AF_VSOCK transport is enabled.
+
+Invoke test binaries in both directions as follows:
+
+ # host=server, guest=client
+ (host)# $TEST_BINARY --mode=server \
+ --control-port=1234 \
+ --peer-cid=3
+ (guest)# $TEST_BINARY --mode=client \
+ --control-host=$HOST_IP \
+ --control-port=1234 \
+ --peer-cid=2
+
+ # host=client, guest=server
+ (guest)# $TEST_BINARY --mode=server \
+ --control-port=1234 \
+ --peer-cid=2
+ (host)# $TEST_BINARY --mode=client \
+ --control-port=$GUEST_IP \
+ --control-port=1234 \
+ --peer-cid=3
diff --git a/tools/testing/vsock/control.c b/tools/testing/vsock/control.c
new file mode 100644
index 000000000..90fd47f0e
--- /dev/null
+++ b/tools/testing/vsock/control.c
@@ -0,0 +1,219 @@
+/* Control socket for client/server test execution
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+/* The client and server may need to coordinate to avoid race conditions like
+ * the client attempting to connect to a socket that the server is not
+ * listening on yet. The control socket offers a communications channel for
+ * such coordination tasks.
+ *
+ * If the client calls control_expectln("LISTENING"), then it will block until
+ * the server calls control_writeln("LISTENING"). This provides a simple
+ * mechanism for coordinating between the client and the server.
+ */
+
+#include <errno.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "timeout.h"
+#include "control.h"
+
+static int control_fd = -1;
+
+/* Open the control socket, either in server or client mode */
+void control_init(const char *control_host,
+ const char *control_port,
+ bool server)
+{
+ struct addrinfo hints = {
+ .ai_socktype = SOCK_STREAM,
+ };
+ struct addrinfo *result = NULL;
+ struct addrinfo *ai;
+ int ret;
+
+ ret = getaddrinfo(control_host, control_port, &hints, &result);
+ if (ret != 0) {
+ fprintf(stderr, "%s\n", gai_strerror(ret));
+ exit(EXIT_FAILURE);
+ }
+
+ for (ai = result; ai; ai = ai->ai_next) {
+ int fd;
+ int val = 1;
+
+ fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+ if (fd < 0)
+ continue;
+
+ if (!server) {
+ if (connect(fd, ai->ai_addr, ai->ai_addrlen) < 0)
+ goto next;
+ control_fd = fd;
+ printf("Control socket connected to %s:%s.\n",
+ control_host, control_port);
+ break;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
+ &val, sizeof(val)) < 0) {
+ perror("setsockopt");
+ exit(EXIT_FAILURE);
+ }
+
+ if (bind(fd, ai->ai_addr, ai->ai_addrlen) < 0)
+ goto next;
+ if (listen(fd, 1) < 0)
+ goto next;
+
+ printf("Control socket listening on %s:%s\n",
+ control_host, control_port);
+ fflush(stdout);
+
+ control_fd = accept(fd, NULL, 0);
+ close(fd);
+
+ if (control_fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+ printf("Control socket connection accepted...\n");
+ break;
+
+next:
+ close(fd);
+ }
+
+ if (control_fd < 0) {
+ fprintf(stderr, "Control socket initialization failed. Invalid address %s:%s?\n",
+ control_host, control_port);
+ exit(EXIT_FAILURE);
+ }
+
+ freeaddrinfo(result);
+}
+
+/* Free resources */
+void control_cleanup(void)
+{
+ close(control_fd);
+ control_fd = -1;
+}
+
+/* Write a line to the control socket */
+void control_writeln(const char *str)
+{
+ ssize_t len = strlen(str);
+ ssize_t ret;
+
+ timeout_begin(TIMEOUT);
+
+ do {
+ ret = send(control_fd, str, len, MSG_MORE);
+ timeout_check("send");
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret != len) {
+ perror("send");
+ exit(EXIT_FAILURE);
+ }
+
+ do {
+ ret = send(control_fd, "\n", 1, 0);
+ timeout_check("send");
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret != 1) {
+ perror("send");
+ exit(EXIT_FAILURE);
+ }
+
+ timeout_end();
+}
+
+/* Return the next line from the control socket (without the trailing newline).
+ *
+ * The program terminates if a timeout occurs.
+ *
+ * The caller must free() the returned string.
+ */
+char *control_readln(void)
+{
+ char *buf = NULL;
+ size_t idx = 0;
+ size_t buflen = 0;
+
+ timeout_begin(TIMEOUT);
+
+ for (;;) {
+ ssize_t ret;
+
+ if (idx >= buflen) {
+ char *new_buf;
+
+ new_buf = realloc(buf, buflen + 80);
+ if (!new_buf) {
+ perror("realloc");
+ exit(EXIT_FAILURE);
+ }
+
+ buf = new_buf;
+ buflen += 80;
+ }
+
+ do {
+ ret = recv(control_fd, &buf[idx], 1, 0);
+ timeout_check("recv");
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret == 0) {
+ fprintf(stderr, "unexpected EOF on control socket\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (ret != 1) {
+ perror("recv");
+ exit(EXIT_FAILURE);
+ }
+
+ if (buf[idx] == '\n') {
+ buf[idx] = '\0';
+ break;
+ }
+
+ idx++;
+ }
+
+ timeout_end();
+
+ return buf;
+}
+
+/* Wait until a given line is received or a timeout occurs */
+void control_expectln(const char *str)
+{
+ char *line;
+
+ line = control_readln();
+ if (strcmp(str, line) != 0) {
+ fprintf(stderr, "expected \"%s\" on control socket, got \"%s\"\n",
+ str, line);
+ exit(EXIT_FAILURE);
+ }
+
+ free(line);
+}
diff --git a/tools/testing/vsock/control.h b/tools/testing/vsock/control.h
new file mode 100644
index 000000000..54a07efd2
--- /dev/null
+++ b/tools/testing/vsock/control.h
@@ -0,0 +1,13 @@
+#ifndef CONTROL_H
+#define CONTROL_H
+
+#include <stdbool.h>
+
+void control_init(const char *control_host, const char *control_port,
+ bool server);
+void control_cleanup(void);
+void control_writeln(const char *str);
+char *control_readln(void);
+void control_expectln(const char *str);
+
+#endif /* CONTROL_H */
diff --git a/tools/testing/vsock/timeout.c b/tools/testing/vsock/timeout.c
new file mode 100644
index 000000000..c49b3003b
--- /dev/null
+++ b/tools/testing/vsock/timeout.c
@@ -0,0 +1,64 @@
+/* Timeout API for single-threaded programs that use blocking
+ * syscalls (read/write/send/recv/connect/accept).
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+/* Use the following pattern:
+ *
+ * timeout_begin(TIMEOUT);
+ * do {
+ * ret = accept(...);
+ * timeout_check("accept");
+ * } while (ret < 0 && ret == EINTR);
+ * timeout_end();
+ */
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdio.h>
+#include "timeout.h"
+
+static volatile bool timeout;
+
+/* SIGALRM handler function. Do not use sleep(2), alarm(2), or
+ * setitimer(2) while using this API - they may interfere with each
+ * other.
+ */
+void sigalrm(int signo)
+{
+ timeout = true;
+}
+
+/* Start a timeout. Call timeout_check() to verify that the timeout hasn't
+ * expired. timeout_end() must be called to stop the timeout. Timeouts cannot
+ * be nested.
+ */
+void timeout_begin(unsigned int seconds)
+{
+ alarm(seconds);
+}
+
+/* Exit with an error message if the timeout has expired */
+void timeout_check(const char *operation)
+{
+ if (timeout) {
+ fprintf(stderr, "%s timed out\n", operation);
+ exit(EXIT_FAILURE);
+ }
+}
+
+/* Stop a timeout */
+void timeout_end(void)
+{
+ alarm(0);
+ timeout = false;
+}
diff --git a/tools/testing/vsock/timeout.h b/tools/testing/vsock/timeout.h
new file mode 100644
index 000000000..77db9ce98
--- /dev/null
+++ b/tools/testing/vsock/timeout.h
@@ -0,0 +1,14 @@
+#ifndef TIMEOUT_H
+#define TIMEOUT_H
+
+enum {
+ /* Default timeout */
+ TIMEOUT = 10 /* seconds */
+};
+
+void sigalrm(int signo);
+void timeout_begin(unsigned int seconds);
+void timeout_check(const char *operation);
+void timeout_end(void);
+
+#endif /* TIMEOUT_H */
diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c
new file mode 100644
index 000000000..e896a4af5
--- /dev/null
+++ b/tools/testing/vsock/vsock_diag_test.c
@@ -0,0 +1,681 @@
+/*
+ * vsock_diag_test - vsock_diag.ko test suite
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; version 2
+ * of the License.
+ */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/list.h>
+#include <linux/net.h>
+#include <linux/netlink.h>
+#include <linux/sock_diag.h>
+#include <netinet/tcp.h>
+
+#include "../../../include/uapi/linux/vm_sockets.h"
+#include "../../../include/uapi/linux/vm_sockets_diag.h"
+
+#include "timeout.h"
+#include "control.h"
+
+enum test_mode {
+ TEST_MODE_UNSET,
+ TEST_MODE_CLIENT,
+ TEST_MODE_SERVER
+};
+
+/* Per-socket status */
+struct vsock_stat {
+ struct list_head list;
+ struct vsock_diag_msg msg;
+};
+
+static const char *sock_type_str(int type)
+{
+ switch (type) {
+ case SOCK_DGRAM:
+ return "DGRAM";
+ case SOCK_STREAM:
+ return "STREAM";
+ default:
+ return "INVALID TYPE";
+ }
+}
+
+static const char *sock_state_str(int state)
+{
+ switch (state) {
+ case TCP_CLOSE:
+ return "UNCONNECTED";
+ case TCP_SYN_SENT:
+ return "CONNECTING";
+ case TCP_ESTABLISHED:
+ return "CONNECTED";
+ case TCP_CLOSING:
+ return "DISCONNECTING";
+ case TCP_LISTEN:
+ return "LISTEN";
+ default:
+ return "INVALID STATE";
+ }
+}
+
+static const char *sock_shutdown_str(int shutdown)
+{
+ switch (shutdown) {
+ case 1:
+ return "RCV_SHUTDOWN";
+ case 2:
+ return "SEND_SHUTDOWN";
+ case 3:
+ return "RCV_SHUTDOWN | SEND_SHUTDOWN";
+ default:
+ return "0";
+ }
+}
+
+static void print_vsock_addr(FILE *fp, unsigned int cid, unsigned int port)
+{
+ if (cid == VMADDR_CID_ANY)
+ fprintf(fp, "*:");
+ else
+ fprintf(fp, "%u:", cid);
+
+ if (port == VMADDR_PORT_ANY)
+ fprintf(fp, "*");
+ else
+ fprintf(fp, "%u", port);
+}
+
+static void print_vsock_stat(FILE *fp, struct vsock_stat *st)
+{
+ print_vsock_addr(fp, st->msg.vdiag_src_cid, st->msg.vdiag_src_port);
+ fprintf(fp, " ");
+ print_vsock_addr(fp, st->msg.vdiag_dst_cid, st->msg.vdiag_dst_port);
+ fprintf(fp, " %s %s %s %u\n",
+ sock_type_str(st->msg.vdiag_type),
+ sock_state_str(st->msg.vdiag_state),
+ sock_shutdown_str(st->msg.vdiag_shutdown),
+ st->msg.vdiag_ino);
+}
+
+static void print_vsock_stats(FILE *fp, struct list_head *head)
+{
+ struct vsock_stat *st;
+
+ list_for_each_entry(st, head, list)
+ print_vsock_stat(fp, st);
+}
+
+static struct vsock_stat *find_vsock_stat(struct list_head *head, int fd)
+{
+ struct vsock_stat *st;
+ struct stat stat;
+
+ if (fstat(fd, &stat) < 0) {
+ perror("fstat");
+ exit(EXIT_FAILURE);
+ }
+
+ list_for_each_entry(st, head, list)
+ if (st->msg.vdiag_ino == stat.st_ino)
+ return st;
+
+ fprintf(stderr, "cannot find fd %d\n", fd);
+ exit(EXIT_FAILURE);
+}
+
+static void check_no_sockets(struct list_head *head)
+{
+ if (!list_empty(head)) {
+ fprintf(stderr, "expected no sockets\n");
+ print_vsock_stats(stderr, head);
+ exit(1);
+ }
+}
+
+static void check_num_sockets(struct list_head *head, int expected)
+{
+ struct list_head *node;
+ int n = 0;
+
+ list_for_each(node, head)
+ n++;
+
+ if (n != expected) {
+ fprintf(stderr, "expected %d sockets, found %d\n",
+ expected, n);
+ print_vsock_stats(stderr, head);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void check_socket_state(struct vsock_stat *st, __u8 state)
+{
+ if (st->msg.vdiag_state != state) {
+ fprintf(stderr, "expected socket state %#x, got %#x\n",
+ state, st->msg.vdiag_state);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void send_req(int fd)
+{
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK,
+ };
+ struct {
+ struct nlmsghdr nlh;
+ struct vsock_diag_req vreq;
+ } req = {
+ .nlh = {
+ .nlmsg_len = sizeof(req),
+ .nlmsg_type = SOCK_DIAG_BY_FAMILY,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP,
+ },
+ .vreq = {
+ .sdiag_family = AF_VSOCK,
+ .vdiag_states = ~(__u32)0,
+ },
+ };
+ struct iovec iov = {
+ .iov_base = &req,
+ .iov_len = sizeof(req),
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ for (;;) {
+ if (sendmsg(fd, &msg, 0) < 0) {
+ if (errno == EINTR)
+ continue;
+
+ perror("sendmsg");
+ exit(EXIT_FAILURE);
+ }
+
+ return;
+ }
+}
+
+static ssize_t recv_resp(int fd, void *buf, size_t len)
+{
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK,
+ };
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = len,
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ ssize_t ret;
+
+ do {
+ ret = recvmsg(fd, &msg, 0);
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0) {
+ perror("recvmsg");
+ exit(EXIT_FAILURE);
+ }
+
+ return ret;
+}
+
+static void add_vsock_stat(struct list_head *sockets,
+ const struct vsock_diag_msg *resp)
+{
+ struct vsock_stat *st;
+
+ st = malloc(sizeof(*st));
+ if (!st) {
+ perror("malloc");
+ exit(EXIT_FAILURE);
+ }
+
+ st->msg = *resp;
+ list_add_tail(&st->list, sockets);
+}
+
+/*
+ * Read vsock stats into a list.
+ */
+static void read_vsock_stat(struct list_head *sockets)
+{
+ long buf[8192 / sizeof(long)];
+ int fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ if (fd < 0) {
+ perror("socket");
+ exit(EXIT_FAILURE);
+ }
+
+ send_req(fd);
+
+ for (;;) {
+ const struct nlmsghdr *h;
+ ssize_t ret;
+
+ ret = recv_resp(fd, buf, sizeof(buf));
+ if (ret == 0)
+ goto done;
+ if (ret < sizeof(*h)) {
+ fprintf(stderr, "short read of %zd bytes\n", ret);
+ exit(EXIT_FAILURE);
+ }
+
+ h = (struct nlmsghdr *)buf;
+
+ while (NLMSG_OK(h, ret)) {
+ if (h->nlmsg_type == NLMSG_DONE)
+ goto done;
+
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ const struct nlmsgerr *err = NLMSG_DATA(h);
+
+ if (h->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
+ fprintf(stderr, "NLMSG_ERROR\n");
+ else {
+ errno = -err->error;
+ perror("NLMSG_ERROR");
+ }
+
+ exit(EXIT_FAILURE);
+ }
+
+ if (h->nlmsg_type != SOCK_DIAG_BY_FAMILY) {
+ fprintf(stderr, "unexpected nlmsg_type %#x\n",
+ h->nlmsg_type);
+ exit(EXIT_FAILURE);
+ }
+ if (h->nlmsg_len <
+ NLMSG_LENGTH(sizeof(struct vsock_diag_msg))) {
+ fprintf(stderr, "short vsock_diag_msg\n");
+ exit(EXIT_FAILURE);
+ }
+
+ add_vsock_stat(sockets, NLMSG_DATA(h));
+
+ h = NLMSG_NEXT(h, ret);
+ }
+ }
+
+done:
+ close(fd);
+}
+
+static void free_sock_stat(struct list_head *sockets)
+{
+ struct vsock_stat *st;
+ struct vsock_stat *next;
+
+ list_for_each_entry_safe(st, next, sockets, list)
+ free(st);
+}
+
+static void test_no_sockets(unsigned int peer_cid)
+{
+ LIST_HEAD(sockets);
+
+ read_vsock_stat(&sockets);
+
+ check_no_sockets(&sockets);
+
+ free_sock_stat(&sockets);
+}
+
+static void test_listen_socket_server(unsigned int peer_cid)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = VMADDR_CID_ANY,
+ },
+ };
+ LIST_HEAD(sockets);
+ struct vsock_stat *st;
+ int fd;
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
+ perror("bind");
+ exit(EXIT_FAILURE);
+ }
+
+ if (listen(fd, 1) < 0) {
+ perror("listen");
+ exit(EXIT_FAILURE);
+ }
+
+ read_vsock_stat(&sockets);
+
+ check_num_sockets(&sockets, 1);
+ st = find_vsock_stat(&sockets, fd);
+ check_socket_state(st, TCP_LISTEN);
+
+ close(fd);
+ free_sock_stat(&sockets);
+}
+
+static void test_connect_client(unsigned int peer_cid)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = peer_cid,
+ },
+ };
+ int fd;
+ int ret;
+ LIST_HEAD(sockets);
+ struct vsock_stat *st;
+
+ control_expectln("LISTENING");
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ timeout_begin(TIMEOUT);
+ do {
+ ret = connect(fd, &addr.sa, sizeof(addr.svm));
+ timeout_check("connect");
+ } while (ret < 0 && errno == EINTR);
+ timeout_end();
+
+ if (ret < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ read_vsock_stat(&sockets);
+
+ check_num_sockets(&sockets, 1);
+ st = find_vsock_stat(&sockets, fd);
+ check_socket_state(st, TCP_ESTABLISHED);
+
+ control_expectln("DONE");
+ control_writeln("DONE");
+
+ close(fd);
+ free_sock_stat(&sockets);
+}
+
+static void test_connect_server(unsigned int peer_cid)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = VMADDR_CID_ANY,
+ },
+ };
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } clientaddr;
+ socklen_t clientaddr_len = sizeof(clientaddr.svm);
+ LIST_HEAD(sockets);
+ struct vsock_stat *st;
+ int fd;
+ int client_fd;
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
+ perror("bind");
+ exit(EXIT_FAILURE);
+ }
+
+ if (listen(fd, 1) < 0) {
+ perror("listen");
+ exit(EXIT_FAILURE);
+ }
+
+ control_writeln("LISTENING");
+
+ timeout_begin(TIMEOUT);
+ do {
+ client_fd = accept(fd, &clientaddr.sa, &clientaddr_len);
+ timeout_check("accept");
+ } while (client_fd < 0 && errno == EINTR);
+ timeout_end();
+
+ if (client_fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+ if (clientaddr.sa.sa_family != AF_VSOCK) {
+ fprintf(stderr, "expected AF_VSOCK from accept(2), got %d\n",
+ clientaddr.sa.sa_family);
+ exit(EXIT_FAILURE);
+ }
+ if (clientaddr.svm.svm_cid != peer_cid) {
+ fprintf(stderr, "expected peer CID %u from accept(2), got %u\n",
+ peer_cid, clientaddr.svm.svm_cid);
+ exit(EXIT_FAILURE);
+ }
+
+ read_vsock_stat(&sockets);
+
+ check_num_sockets(&sockets, 2);
+ find_vsock_stat(&sockets, fd);
+ st = find_vsock_stat(&sockets, client_fd);
+ check_socket_state(st, TCP_ESTABLISHED);
+
+ control_writeln("DONE");
+ control_expectln("DONE");
+
+ close(client_fd);
+ close(fd);
+ free_sock_stat(&sockets);
+}
+
+static struct {
+ const char *name;
+ void (*run_client)(unsigned int peer_cid);
+ void (*run_server)(unsigned int peer_cid);
+} test_cases[] = {
+ {
+ .name = "No sockets",
+ .run_server = test_no_sockets,
+ },
+ {
+ .name = "Listen socket",
+ .run_server = test_listen_socket_server,
+ },
+ {
+ .name = "Connect",
+ .run_client = test_connect_client,
+ .run_server = test_connect_server,
+ },
+ {},
+};
+
+static void init_signals(void)
+{
+ struct sigaction act = {
+ .sa_handler = sigalrm,
+ };
+
+ sigaction(SIGALRM, &act, NULL);
+ signal(SIGPIPE, SIG_IGN);
+}
+
+static unsigned int parse_cid(const char *str)
+{
+ char *endptr = NULL;
+ unsigned long int n;
+
+ errno = 0;
+ n = strtoul(str, &endptr, 10);
+ if (errno || *endptr != '\0') {
+ fprintf(stderr, "malformed CID \"%s\"\n", str);
+ exit(EXIT_FAILURE);
+ }
+ return n;
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+ {
+ .name = "control-host",
+ .has_arg = required_argument,
+ .val = 'H',
+ },
+ {
+ .name = "control-port",
+ .has_arg = required_argument,
+ .val = 'P',
+ },
+ {
+ .name = "mode",
+ .has_arg = required_argument,
+ .val = 'm',
+ },
+ {
+ .name = "peer-cid",
+ .has_arg = required_argument,
+ .val = 'p',
+ },
+ {
+ .name = "help",
+ .has_arg = no_argument,
+ .val = '?',
+ },
+ {},
+};
+
+static void usage(void)
+{
+ fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid>\n"
+ "\n"
+ " Server: vsock_diag_test --control-port=1234 --mode=server --peer-cid=3\n"
+ " Client: vsock_diag_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n"
+ "\n"
+ "Run vsock_diag.ko tests. Must be launched in both\n"
+ "guest and host. One side must use --mode=client and\n"
+ "the other side must use --mode=server.\n"
+ "\n"
+ "A TCP control socket connection is used to coordinate tests\n"
+ "between the client and the server. The server requires a\n"
+ "listen address and the client requires an address to\n"
+ "connect to.\n"
+ "\n"
+ "The CID of the other side must be given with --peer-cid=<cid>.\n");
+ exit(EXIT_FAILURE);
+}
+
+int main(int argc, char **argv)
+{
+ const char *control_host = NULL;
+ const char *control_port = NULL;
+ int mode = TEST_MODE_UNSET;
+ unsigned int peer_cid = VMADDR_CID_ANY;
+ int i;
+
+ init_signals();
+
+ for (;;) {
+ int opt = getopt_long(argc, argv, optstring, longopts, NULL);
+
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'H':
+ control_host = optarg;
+ break;
+ case 'm':
+ if (strcmp(optarg, "client") == 0)
+ mode = TEST_MODE_CLIENT;
+ else if (strcmp(optarg, "server") == 0)
+ mode = TEST_MODE_SERVER;
+ else {
+ fprintf(stderr, "--mode must be \"client\" or \"server\"\n");
+ return EXIT_FAILURE;
+ }
+ break;
+ case 'p':
+ peer_cid = parse_cid(optarg);
+ break;
+ case 'P':
+ control_port = optarg;
+ break;
+ case '?':
+ default:
+ usage();
+ }
+ }
+
+ if (!control_port)
+ usage();
+ if (mode == TEST_MODE_UNSET)
+ usage();
+ if (peer_cid == VMADDR_CID_ANY)
+ usage();
+
+ if (!control_host) {
+ if (mode != TEST_MODE_SERVER)
+ usage();
+ control_host = "0.0.0.0";
+ }
+
+ control_init(control_host, control_port, mode == TEST_MODE_SERVER);
+
+ for (i = 0; test_cases[i].name; i++) {
+ void (*run)(unsigned int peer_cid);
+
+ printf("%s...", test_cases[i].name);
+ fflush(stdout);
+
+ if (mode == TEST_MODE_CLIENT)
+ run = test_cases[i].run_client;
+ else
+ run = test_cases[i].run_server;
+
+ if (run)
+ run(peer_cid);
+
+ printf("ok\n");
+ }
+
+ control_cleanup();
+ return EXIT_SUCCESS;
+}
diff --git a/tools/thermal/tmon/.gitignore b/tools/thermal/tmon/.gitignore
new file mode 100644
index 000000000..06e96be65
--- /dev/null
+++ b/tools/thermal/tmon/.gitignore
@@ -0,0 +1 @@
+/tmon
diff --git a/tools/thermal/tmon/Makefile b/tools/thermal/tmon/Makefile
new file mode 100644
index 000000000..45751648a
--- /dev/null
+++ b/tools/thermal/tmon/Makefile
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-2.0
+# We need this for the "cc-option" macro.
+include ../../../scripts/Kbuild.include
+
+VERSION = 1.0
+
+BINDIR=usr/bin
+WARNFLAGS=-Wall -Wshadow -W -Wformat -Wimplicit-function-declaration -Wimplicit-int
+CFLAGS+= -O1 ${WARNFLAGS}
+# Add "-fstack-protector" only if toolchain supports it.
+CFLAGS+= $(call cc-option,-fstack-protector)
+CC?= $(CROSS_COMPILE)gcc
+PKG_CONFIG?= $(CROSS_COMPILE)pkg-config
+
+CFLAGS+=-D VERSION=\"$(VERSION)\"
+LDFLAGS+=
+TARGET=tmon
+
+INSTALL_PROGRAM=install -m 755 -p
+DEL_FILE=rm -f
+
+# Static builds might require -ltinfo, for instance
+ifneq ($(findstring -static, $(LDFLAGS)),)
+STATIC := --static
+endif
+
+TMON_LIBS=-lm -lpthread
+TMON_LIBS += $(shell $(PKG_CONFIG) --libs $(STATIC) panelw ncursesw 2> /dev/null || \
+ $(PKG_CONFIG) --libs $(STATIC) panel ncurses 2> /dev/null || \
+ echo -lpanel -lncurses)
+
+CFLAGS += $(shell $(PKG_CONFIG) --cflags $(STATIC) panelw ncursesw 2> /dev/null || \
+ $(PKG_CONFIG) --cflags $(STATIC) panel ncurses 2> /dev/null)
+
+OBJS = tmon.o tui.o sysfs.o pid.o
+OBJS +=
+
+tmon: $(OBJS) Makefile tmon.h
+ $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) -o $(TARGET) $(TMON_LIBS)
+
+valgrind: tmon
+ sudo valgrind -v --track-origins=yes --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes ./$(TARGET) 1> /dev/null
+
+install:
+ - mkdir -p $(INSTALL_ROOT)/$(BINDIR)
+ - $(INSTALL_PROGRAM) "$(TARGET)" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
+
+uninstall:
+ $(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
+
+clean:
+ find . -name "*.o" | xargs $(DEL_FILE)
+ rm -f $(TARGET)
+
+dist:
+ git tag v$(VERSION)
+ git archive --format=tar --prefix="$(TARGET)-$(VERSION)/" v$(VERSION) | \
+ gzip > $(TARGET)-$(VERSION).tar.gz
diff --git a/tools/thermal/tmon/README b/tools/thermal/tmon/README
new file mode 100644
index 000000000..457949897
--- /dev/null
+++ b/tools/thermal/tmon/README
@@ -0,0 +1,50 @@
+TMON - A Monitoring and Testing Tool for Linux kernel thermal subsystem
+
+Why TMON?
+==========
+Increasingly, Linux is running on thermally constrained devices. The simple
+thermal relationship between processor and fan has become past for modern
+computers.
+
+As hardware vendors cope with the thermal constraints on their products, more
+and more sensors are added, new cooling capabilities are introduced. The
+complexity of the thermal relationship can grow exponentially among cooling
+devices, zones, sensors, and trip points. They can also change dynamically.
+
+To expose such relationship to the userspace, Linux generic thermal layer
+introduced sysfs entry at /sys/class/thermal with a matrix of symbolic
+links, trip point bindings, and device instances. To traverse such
+matrix by hand is not a trivial task. Testing is also difficult in that
+thermal conditions are often exception cases that hard to reach in
+normal operations.
+
+TMON is conceived as a tool to help visualize, tune, and test the
+complex thermal subsystem.
+
+Files
+=====
+ tmon.c : main function for set up and configurations.
+ tui.c : handles ncurses based user interface
+ sysfs.c : access to the generic thermal sysfs
+ pid.c : a proportional-integral-derivative (PID) controller
+ that can be used for thermal relationship training.
+
+Requirements
+============
+Depends on ncurses
+
+Build
+=========
+$ make
+$ sudo ./tmon -h
+Usage: tmon [OPTION...]
+ -c, --control cooling device in control
+ -d, --daemon run as daemon, no TUI
+ -l, --log log data to /var/tmp/tmon.log
+ -h, --help show this help message
+ -t, --time-interval set time interval for sampling
+ -v, --version show version
+ -g, --debug debug message in syslog
+
+1. For monitoring only:
+$ sudo ./tmon
diff --git a/tools/thermal/tmon/pid.c b/tools/thermal/tmon/pid.c
new file mode 100644
index 000000000..fd7e9e9d6
--- /dev/null
+++ b/tools/thermal/tmon/pid.c
@@ -0,0 +1,131 @@
+/*
+ * pid.c PID controller for testing cooling devices
+ *
+ *
+ *
+ * Copyright (C) 2012 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 or later as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author Name Jacob Pan <jacob.jun.pan@linux.intel.com>
+ *
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <libintl.h>
+#include <ctype.h>
+#include <assert.h>
+#include <time.h>
+#include <limits.h>
+#include <math.h>
+#include <sys/stat.h>
+#include <syslog.h>
+
+#include "tmon.h"
+
+/**************************************************************************
+ * PID (Proportional-Integral-Derivative) controller is commonly used in
+ * linear control system, consider the the process.
+ * G(s) = U(s)/E(s)
+ * kp = proportional gain
+ * ki = integral gain
+ * kd = derivative gain
+ * Ts
+ * We use type C Alan Bradley equation which takes set point off the
+ * output dependency in P and D term.
+ *
+ * y[k] = y[k-1] - kp*(x[k] - x[k-1]) + Ki*Ts*e[k] - Kd*(x[k]
+ * - 2*x[k-1]+x[k-2])/Ts
+ *
+ *
+ ***********************************************************************/
+struct pid_params p_param;
+/* cached data from previous loop */
+static double xk_1, xk_2; /* input temperature x[k-#] */
+
+/*
+ * TODO: make PID parameters tuned automatically,
+ * 1. use CPU burn to produce open loop unit step response
+ * 2. calculate PID based on Ziegler-Nichols rule
+ *
+ * add a flag for tuning PID
+ */
+int init_thermal_controller(void)
+{
+ int ret = 0;
+
+ /* init pid params */
+ p_param.ts = ticktime;
+ /* TODO: get it from TUI tuning tab */
+ p_param.kp = .36;
+ p_param.ki = 5.0;
+ p_param.kd = 0.19;
+
+ p_param.t_target = target_temp_user;
+
+ return ret;
+}
+
+void controller_reset(void)
+{
+ /* TODO: relax control data when not over thermal limit */
+ syslog(LOG_DEBUG, "TC inactive, relax p-state\n");
+ p_param.y_k = 0.0;
+ xk_1 = 0.0;
+ xk_2 = 0.0;
+ set_ctrl_state(0);
+}
+
+/* To be called at time interval Ts. Type C PID controller.
+ * y[k] = y[k-1] - kp*(x[k] - x[k-1]) + Ki*Ts*e[k] - Kd*(x[k]
+ * - 2*x[k-1]+x[k-2])/Ts
+ * TODO: add low pass filter for D term
+ */
+#define GUARD_BAND (2)
+void controller_handler(const double xk, double *yk)
+{
+ double ek;
+ double p_term, i_term, d_term;
+
+ ek = p_param.t_target - xk; /* error */
+ if (ek >= 3.0) {
+ syslog(LOG_DEBUG, "PID: %3.1f Below set point %3.1f, stop\n",
+ xk, p_param.t_target);
+ controller_reset();
+ *yk = 0.0;
+ return;
+ }
+ /* compute intermediate PID terms */
+ p_term = -p_param.kp * (xk - xk_1);
+ i_term = p_param.kp * p_param.ki * p_param.ts * ek;
+ d_term = -p_param.kp * p_param.kd * (xk - 2 * xk_1 + xk_2) / p_param.ts;
+ /* compute output */
+ *yk += p_term + i_term + d_term;
+ /* update sample data */
+ xk_1 = xk;
+ xk_2 = xk_1;
+
+ /* clamp output adjustment range */
+ if (*yk < -LIMIT_HIGH)
+ *yk = -LIMIT_HIGH;
+ else if (*yk > -LIMIT_LOW)
+ *yk = -LIMIT_LOW;
+
+ p_param.y_k = *yk;
+
+ set_ctrl_state(lround(fabs(p_param.y_k)));
+
+}
diff --git a/tools/thermal/tmon/sysfs.c b/tools/thermal/tmon/sysfs.c
new file mode 100644
index 000000000..18f523557
--- /dev/null
+++ b/tools/thermal/tmon/sysfs.c
@@ -0,0 +1,598 @@
+/*
+ * sysfs.c sysfs ABI access functions for TMON program
+ *
+ * Copyright (C) 2013 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 or later as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: Jacob Pan <jacob.jun.pan@linux.intel.com>
+ *
+ */
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <dirent.h>
+#include <libintl.h>
+#include <ctype.h>
+#include <time.h>
+#include <syslog.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#include "tmon.h"
+
+struct tmon_platform_data ptdata;
+const char *trip_type_name[] = {
+ "critical",
+ "hot",
+ "passive",
+ "active",
+};
+
+int sysfs_set_ulong(char *path, char *filename, unsigned long val)
+{
+ FILE *fd;
+ int ret = -1;
+ char filepath[256];
+
+ snprintf(filepath, 256, "%s/%s", path, filename);
+
+ fd = fopen(filepath, "w");
+ if (!fd) {
+ syslog(LOG_ERR, "Err: open %s: %s\n", __func__, filepath);
+ return ret;
+ }
+ ret = fprintf(fd, "%lu", val);
+ fclose(fd);
+
+ return 0;
+}
+
+/* history of thermal data, used for control algo */
+#define NR_THERMAL_RECORDS 3
+struct thermal_data_record trec[NR_THERMAL_RECORDS];
+int cur_thermal_record; /* index to the trec array */
+
+static int sysfs_get_ulong(char *path, char *filename, unsigned long *p_ulong)
+{
+ FILE *fd;
+ int ret = -1;
+ char filepath[256];
+
+ snprintf(filepath, 256, "%s/%s", path, filename);
+
+ fd = fopen(filepath, "r");
+ if (!fd) {
+ syslog(LOG_ERR, "Err: open %s: %s\n", __func__, filepath);
+ return ret;
+ }
+ ret = fscanf(fd, "%lu", p_ulong);
+ fclose(fd);
+
+ return 0;
+}
+
+static int sysfs_get_string(char *path, char *filename, char *str)
+{
+ FILE *fd;
+ int ret = -1;
+ char filepath[256];
+
+ snprintf(filepath, 256, "%s/%s", path, filename);
+
+ fd = fopen(filepath, "r");
+ if (!fd) {
+ syslog(LOG_ERR, "Err: open %s: %s\n", __func__, filepath);
+ return ret;
+ }
+ ret = fscanf(fd, "%256s", str);
+ fclose(fd);
+
+ return ret;
+}
+
+/* get states of the cooling device instance */
+static int probe_cdev(struct cdev_info *cdi, char *path)
+{
+ sysfs_get_string(path, "type", cdi->type);
+ sysfs_get_ulong(path, "max_state", &cdi->max_state);
+ sysfs_get_ulong(path, "cur_state", &cdi->cur_state);
+
+ syslog(LOG_INFO, "%s: %s: type %s, max %lu, curr %lu inst %d\n",
+ __func__, path,
+ cdi->type, cdi->max_state, cdi->cur_state, cdi->instance);
+
+ return 0;
+}
+
+static int str_to_trip_type(char *name)
+{
+ int i;
+
+ for (i = 0; i < NR_THERMAL_TRIP_TYPE; i++) {
+ if (!strcmp(name, trip_type_name[i]))
+ return i;
+ }
+
+ return -ENOENT;
+}
+
+/* scan and fill in trip point info for a thermal zone and trip point id */
+static int get_trip_point_data(char *tz_path, int tzid, int tpid)
+{
+ char filename[256];
+ char temp_str[256];
+ int trip_type;
+
+ if (tpid >= MAX_NR_TRIP)
+ return -EINVAL;
+ /* check trip point type */
+ snprintf(filename, sizeof(filename), "trip_point_%d_type", tpid);
+ sysfs_get_string(tz_path, filename, temp_str);
+ trip_type = str_to_trip_type(temp_str);
+ if (trip_type < 0) {
+ syslog(LOG_ERR, "%s:%s no matching type\n", __func__, temp_str);
+ return -ENOENT;
+ }
+ ptdata.tzi[tzid].tp[tpid].type = trip_type;
+ syslog(LOG_INFO, "%s:tz:%d tp:%d:type:%s type id %d\n", __func__, tzid,
+ tpid, temp_str, trip_type);
+
+ /* TODO: check attribute */
+
+ return 0;
+}
+
+/* return instance id for file format such as trip_point_4_temp */
+static int get_instance_id(char *name, int pos, int skip)
+{
+ char *ch;
+ int i = 0;
+
+ ch = strtok(name, "_");
+ while (ch != NULL) {
+ ++i;
+ syslog(LOG_INFO, "%s:%s:%s:%d", __func__, name, ch, i);
+ ch = strtok(NULL, "_");
+ if (pos == i)
+ return atol(ch + skip);
+ }
+
+ return -1;
+}
+
+/* Find trip point info of a thermal zone */
+static int find_tzone_tp(char *tz_name, char *d_name, struct tz_info *tzi,
+ int tz_id)
+{
+ int tp_id;
+ unsigned long temp_ulong;
+
+ if (strstr(d_name, "trip_point") &&
+ strstr(d_name, "temp")) {
+ /* check if trip point temp is non-zero
+ * ignore 0/invalid trip points
+ */
+ sysfs_get_ulong(tz_name, d_name, &temp_ulong);
+ if (temp_ulong < MAX_TEMP_KC) {
+ tzi->nr_trip_pts++;
+ /* found a valid trip point */
+ tp_id = get_instance_id(d_name, 2, 0);
+ syslog(LOG_DEBUG, "tzone %s trip %d temp %lu tpnode %s",
+ tz_name, tp_id, temp_ulong, d_name);
+ if (tp_id < 0 || tp_id >= MAX_NR_TRIP) {
+ syslog(LOG_ERR, "Failed to find TP inst %s\n",
+ d_name);
+ return -1;
+ }
+ get_trip_point_data(tz_name, tz_id, tp_id);
+ tzi->tp[tp_id].temp = temp_ulong;
+ }
+ }
+
+ return 0;
+}
+
+/* check cooling devices for binding info. */
+static int find_tzone_cdev(struct dirent *nl, char *tz_name,
+ struct tz_info *tzi, int tz_id, int cid)
+{
+ unsigned long trip_instance = 0;
+ char cdev_name_linked[256];
+ char cdev_name[256];
+ char cdev_trip_name[256];
+ int cdev_id;
+
+ if (nl->d_type == DT_LNK) {
+ syslog(LOG_DEBUG, "TZ%d: cdev: %s cid %d\n", tz_id, nl->d_name,
+ cid);
+ tzi->nr_cdev++;
+ if (tzi->nr_cdev > ptdata.nr_cooling_dev) {
+ syslog(LOG_ERR, "Err: Too many cdev? %d\n",
+ tzi->nr_cdev);
+ return -EINVAL;
+ }
+ /* find the link to real cooling device record binding */
+ snprintf(cdev_name, 256, "%s/%s", tz_name, nl->d_name);
+ memset(cdev_name_linked, 0, sizeof(cdev_name_linked));
+ if (readlink(cdev_name, cdev_name_linked,
+ sizeof(cdev_name_linked) - 1) != -1) {
+ cdev_id = get_instance_id(cdev_name_linked, 1,
+ sizeof("device") - 1);
+ syslog(LOG_DEBUG, "cdev %s linked to %s : %d\n",
+ cdev_name, cdev_name_linked, cdev_id);
+ tzi->cdev_binding |= (1 << cdev_id);
+
+ /* find the trip point in which the cdev is binded to
+ * in this tzone
+ */
+ snprintf(cdev_trip_name, 256, "%s%s", nl->d_name,
+ "_trip_point");
+ sysfs_get_ulong(tz_name, cdev_trip_name,
+ &trip_instance);
+ /* validate trip point range, e.g. trip could return -1
+ * when passive is enabled
+ */
+ if (trip_instance > MAX_NR_TRIP)
+ trip_instance = 0;
+ tzi->trip_binding[cdev_id] |= 1 << trip_instance;
+ syslog(LOG_DEBUG, "cdev %s -> trip:%lu: 0x%lx %d\n",
+ cdev_name, trip_instance,
+ tzi->trip_binding[cdev_id],
+ cdev_id);
+
+
+ }
+ return 0;
+ }
+
+ return -ENODEV;
+}
+
+
+
+/*****************************************************************************
+ * Before calling scan_tzones, thermal sysfs must be probed to determine
+ * the number of thermal zones and cooling devices.
+ * We loop through each thermal zone and fill in tz_info struct, i.e.
+ * ptdata.tzi[]
+root@jacob-chiefriver:~# tree -d /sys/class/thermal/thermal_zone0
+/sys/class/thermal/thermal_zone0
+|-- cdev0 -> ../cooling_device4
+|-- cdev1 -> ../cooling_device3
+|-- cdev10 -> ../cooling_device7
+|-- cdev11 -> ../cooling_device6
+|-- cdev12 -> ../cooling_device5
+|-- cdev2 -> ../cooling_device2
+|-- cdev3 -> ../cooling_device1
+|-- cdev4 -> ../cooling_device0
+|-- cdev5 -> ../cooling_device12
+|-- cdev6 -> ../cooling_device11
+|-- cdev7 -> ../cooling_device10
+|-- cdev8 -> ../cooling_device9
+|-- cdev9 -> ../cooling_device8
+|-- device -> ../../../LNXSYSTM:00/device:62/LNXTHERM:00
+|-- power
+`-- subsystem -> ../../../../class/thermal
+*****************************************************************************/
+static int scan_tzones(void)
+{
+ DIR *dir;
+ struct dirent **namelist;
+ char tz_name[256];
+ int i, j, n, k = 0;
+
+ if (!ptdata.nr_tz_sensor)
+ return -1;
+
+ for (i = 0; i <= ptdata.max_tz_instance; i++) {
+ memset(tz_name, 0, sizeof(tz_name));
+ snprintf(tz_name, 256, "%s/%s%d", THERMAL_SYSFS, TZONE, i);
+
+ dir = opendir(tz_name);
+ if (!dir) {
+ syslog(LOG_INFO, "Thermal zone %s skipped\n", tz_name);
+ continue;
+ }
+ /* keep track of valid tzones */
+ n = scandir(tz_name, &namelist, 0, alphasort);
+ if (n < 0)
+ syslog(LOG_ERR, "scandir failed in %s", tz_name);
+ else {
+ sysfs_get_string(tz_name, "type", ptdata.tzi[k].type);
+ ptdata.tzi[k].instance = i;
+ /* detect trip points and cdev attached to this tzone */
+ j = 0; /* index for cdev */
+ ptdata.tzi[k].nr_cdev = 0;
+ ptdata.tzi[k].nr_trip_pts = 0;
+ while (n--) {
+ char *temp_str;
+
+ if (find_tzone_tp(tz_name, namelist[n]->d_name,
+ &ptdata.tzi[k], k))
+ break;
+ temp_str = strstr(namelist[n]->d_name, "cdev");
+ if (!temp_str) {
+ free(namelist[n]);
+ continue;
+ }
+ if (!find_tzone_cdev(namelist[n], tz_name,
+ &ptdata.tzi[k], i, j))
+ j++; /* increment cdev index */
+ free(namelist[n]);
+ }
+ free(namelist);
+ }
+ /*TODO: reverse trip points */
+ closedir(dir);
+ syslog(LOG_INFO, "TZ %d has %d cdev\n", i,
+ ptdata.tzi[k].nr_cdev);
+ k++;
+ }
+
+ return 0;
+}
+
+static int scan_cdevs(void)
+{
+ DIR *dir;
+ struct dirent **namelist;
+ char cdev_name[256];
+ int i, n, k = 0;
+
+ if (!ptdata.nr_cooling_dev) {
+ fprintf(stderr, "No cooling devices found\n");
+ return 0;
+ }
+ for (i = 0; i <= ptdata.max_cdev_instance; i++) {
+ memset(cdev_name, 0, sizeof(cdev_name));
+ snprintf(cdev_name, 256, "%s/%s%d", THERMAL_SYSFS, CDEV, i);
+
+ dir = opendir(cdev_name);
+ if (!dir) {
+ syslog(LOG_INFO, "Cooling dev %s skipped\n", cdev_name);
+ /* there is a gap in cooling device id, check again
+ * for the same index.
+ */
+ continue;
+ }
+
+ n = scandir(cdev_name, &namelist, 0, alphasort);
+ if (n < 0)
+ syslog(LOG_ERR, "scandir failed in %s", cdev_name);
+ else {
+ sysfs_get_string(cdev_name, "type", ptdata.cdi[k].type);
+ ptdata.cdi[k].instance = i;
+ if (strstr(ptdata.cdi[k].type, ctrl_cdev)) {
+ ptdata.cdi[k].flag |= CDEV_FLAG_IN_CONTROL;
+ syslog(LOG_DEBUG, "control cdev id %d\n", i);
+ }
+ while (n--)
+ free(namelist[n]);
+ free(namelist);
+ }
+ closedir(dir);
+ k++;
+ }
+ return 0;
+}
+
+
+int probe_thermal_sysfs(void)
+{
+ DIR *dir;
+ struct dirent **namelist;
+ int n;
+
+ dir = opendir(THERMAL_SYSFS);
+ if (!dir) {
+ fprintf(stderr, "\nNo thermal sysfs, exit\n");
+ return -1;
+ }
+ n = scandir(THERMAL_SYSFS, &namelist, 0, alphasort);
+ if (n < 0)
+ syslog(LOG_ERR, "scandir failed in thermal sysfs");
+ else {
+ /* detect number of thermal zones and cooling devices */
+ while (n--) {
+ int inst;
+
+ if (strstr(namelist[n]->d_name, CDEV)) {
+ inst = get_instance_id(namelist[n]->d_name, 1,
+ sizeof("device") - 1);
+ /* keep track of the max cooling device since
+ * there may be gaps.
+ */
+ if (inst > ptdata.max_cdev_instance)
+ ptdata.max_cdev_instance = inst;
+
+ syslog(LOG_DEBUG, "found cdev: %s %d %d\n",
+ namelist[n]->d_name,
+ ptdata.nr_cooling_dev,
+ ptdata.max_cdev_instance);
+ ptdata.nr_cooling_dev++;
+ } else if (strstr(namelist[n]->d_name, TZONE)) {
+ inst = get_instance_id(namelist[n]->d_name, 1,
+ sizeof("zone") - 1);
+ if (inst > ptdata.max_tz_instance)
+ ptdata.max_tz_instance = inst;
+
+ syslog(LOG_DEBUG, "found tzone: %s %d %d\n",
+ namelist[n]->d_name,
+ ptdata.nr_tz_sensor,
+ ptdata.max_tz_instance);
+ ptdata.nr_tz_sensor++;
+ }
+ free(namelist[n]);
+ }
+ free(namelist);
+ }
+ syslog(LOG_INFO, "found %d tzone(s), %d cdev(s), target zone %d\n",
+ ptdata.nr_tz_sensor, ptdata.nr_cooling_dev,
+ target_thermal_zone);
+ closedir(dir);
+
+ if (!ptdata.nr_tz_sensor) {
+ fprintf(stderr, "\nNo thermal zones found, exit\n\n");
+ return -1;
+ }
+
+ ptdata.tzi = calloc(ptdata.max_tz_instance+1, sizeof(struct tz_info));
+ if (!ptdata.tzi) {
+ fprintf(stderr, "Err: allocate tz_info\n");
+ return -1;
+ }
+
+ /* we still show thermal zone information if there is no cdev */
+ if (ptdata.nr_cooling_dev) {
+ ptdata.cdi = calloc(ptdata.max_cdev_instance + 1,
+ sizeof(struct cdev_info));
+ if (!ptdata.cdi) {
+ free(ptdata.tzi);
+ fprintf(stderr, "Err: allocate cdev_info\n");
+ return -1;
+ }
+ }
+
+ /* now probe tzones */
+ if (scan_tzones())
+ return -1;
+ if (scan_cdevs())
+ return -1;
+ return 0;
+}
+
+/* convert sysfs zone instance to zone array index */
+int zone_instance_to_index(int zone_inst)
+{
+ int i;
+
+ for (i = 0; i < ptdata.nr_tz_sensor; i++)
+ if (ptdata.tzi[i].instance == zone_inst)
+ return i;
+ return -ENOENT;
+}
+
+/* read temperature of all thermal zones */
+int update_thermal_data()
+{
+ int i;
+ int next_thermal_record = cur_thermal_record + 1;
+ char tz_name[256];
+ static unsigned long samples;
+
+ if (!ptdata.nr_tz_sensor) {
+ syslog(LOG_ERR, "No thermal zones found!\n");
+ return -1;
+ }
+
+ /* circular buffer for keeping historic data */
+ if (next_thermal_record >= NR_THERMAL_RECORDS)
+ next_thermal_record = 0;
+ gettimeofday(&trec[next_thermal_record].tv, NULL);
+ if (tmon_log) {
+ fprintf(tmon_log, "%lu ", ++samples);
+ fprintf(tmon_log, "%3.1f ", p_param.t_target);
+ }
+ for (i = 0; i < ptdata.nr_tz_sensor; i++) {
+ memset(tz_name, 0, sizeof(tz_name));
+ snprintf(tz_name, 256, "%s/%s%d", THERMAL_SYSFS, TZONE,
+ ptdata.tzi[i].instance);
+ sysfs_get_ulong(tz_name, "temp",
+ &trec[next_thermal_record].temp[i]);
+ if (tmon_log)
+ fprintf(tmon_log, "%lu ",
+ trec[next_thermal_record].temp[i] / 1000);
+ }
+ cur_thermal_record = next_thermal_record;
+ for (i = 0; i < ptdata.nr_cooling_dev; i++) {
+ char cdev_name[256];
+ unsigned long val;
+
+ snprintf(cdev_name, 256, "%s/%s%d", THERMAL_SYSFS, CDEV,
+ ptdata.cdi[i].instance);
+ probe_cdev(&ptdata.cdi[i], cdev_name);
+ val = ptdata.cdi[i].cur_state;
+ if (val > 1000000)
+ val = 0;
+ if (tmon_log)
+ fprintf(tmon_log, "%lu ", val);
+ }
+
+ if (tmon_log) {
+ fprintf(tmon_log, "\n");
+ fflush(tmon_log);
+ }
+
+ return 0;
+}
+
+void set_ctrl_state(unsigned long state)
+{
+ char ctrl_cdev_path[256];
+ int i;
+ unsigned long cdev_state;
+
+ if (no_control)
+ return;
+ /* set all ctrl cdev to the same state */
+ for (i = 0; i < ptdata.nr_cooling_dev; i++) {
+ if (ptdata.cdi[i].flag & CDEV_FLAG_IN_CONTROL) {
+ if (ptdata.cdi[i].max_state < 10) {
+ strcpy(ctrl_cdev, "None.");
+ return;
+ }
+ /* scale to percentage of max_state */
+ cdev_state = state * ptdata.cdi[i].max_state/100;
+ syslog(LOG_DEBUG,
+ "ctrl cdev %d set state %lu scaled to %lu\n",
+ ptdata.cdi[i].instance, state, cdev_state);
+ snprintf(ctrl_cdev_path, 256, "%s/%s%d", THERMAL_SYSFS,
+ CDEV, ptdata.cdi[i].instance);
+ syslog(LOG_DEBUG, "ctrl cdev path %s", ctrl_cdev_path);
+ sysfs_set_ulong(ctrl_cdev_path, "cur_state",
+ cdev_state);
+ }
+ }
+}
+
+void get_ctrl_state(unsigned long *state)
+{
+ char ctrl_cdev_path[256];
+ int ctrl_cdev_id = -1;
+ int i;
+
+ /* TODO: take average of all ctrl types. also consider change based on
+ * uevent. Take the first reading for now.
+ */
+ for (i = 0; i < ptdata.nr_cooling_dev; i++) {
+ if (ptdata.cdi[i].flag & CDEV_FLAG_IN_CONTROL) {
+ ctrl_cdev_id = ptdata.cdi[i].instance;
+ syslog(LOG_INFO, "ctrl cdev %d get state\n",
+ ptdata.cdi[i].instance);
+ break;
+ }
+ }
+ if (ctrl_cdev_id == -1) {
+ *state = 0;
+ return;
+ }
+ snprintf(ctrl_cdev_path, 256, "%s/%s%d", THERMAL_SYSFS,
+ CDEV, ctrl_cdev_id);
+ sysfs_get_ulong(ctrl_cdev_path, "cur_state", state);
+}
+
+void free_thermal_data(void)
+{
+ free(ptdata.tzi);
+ free(ptdata.cdi);
+}
diff --git a/tools/thermal/tmon/tmon.8 b/tools/thermal/tmon/tmon.8
new file mode 100644
index 000000000..2f122de58
--- /dev/null
+++ b/tools/thermal/tmon/tmon.8
@@ -0,0 +1,145 @@
+.TH TMON 8
+# SPDX-License-Identifier: GPL-2.0
+.SH NAME
+\fBtmon\fP - A monitoring and testing tool for Linux kernel thermal subsystem
+
+.SH SYNOPSIS
+.ft B
+.B tmon
+.RB [ Options ]
+.br
+.SH DESCRIPTION
+\fBtmon \fP can be used to visualize thermal relationship and
+real-time thermal data; tune
+and test cooling devices and sensors; collect thermal data for offline
+analysis and plot. \fBtmon\fP must be run as root in order to control device
+states via sysfs.
+.PP
+\fBFunctions\fP
+.PP
+.nf
+1. Thermal relationships:
+- show thermal zone information
+- show cooling device information
+- show trip point binding within each thermal zone
+- show trip point and cooling device instance bindings
+.PP
+2. Real time data display
+- show temperature of all thermal zones w.r.t. its trip points and types
+- show states of all cooling devices
+.PP
+3. Thermal relationship learning and device tuning
+- with a built-in Proportional Integral Derivative (\fBPID\fP)
+controller, user can pair a cooling device to a thermal sensor for
+testing the effectiveness and learn about the thermal distance between the two
+- allow manual control of cooling device states and target temperature
+.PP
+4. Data logging in /var/tmp/tmon.log
+- contains thermal configuration data, i.e. cooling device, thermal
+ zones, and trip points. Can be used for data collection in remote
+ debugging.
+- log real-time thermal data into space separated format that can be
+ directly consumed by plotting tools such as Rscript.
+
+.SS Options
+.PP
+The \fB-c --control\fP option sets a cooling device type to control temperature
+of a thermal zone
+.PP
+The \fB-d --daemon\fP option runs \fBtmon \fP as daemon without user interface
+.PP
+The \fB-g --debug\fP option allow debug messages to be stored in syslog
+.PP
+The \fB-h --help\fP option shows help message
+.PP
+The \fB-l --log\fP option write data to /var/tmp/tmon.log
+.PP
+The \fB-t --time-interval\fP option sets the polling interval in seconds
+.PP
+The \fB-T --target-temp\fP option sets the initial target temperature
+.PP
+The \fB-v --version\fP option shows the version of \fBtmon \fP
+.PP
+The \fB-z --zone\fP option sets the target therma zone instance to be controlled
+.PP
+
+.SH FIELD DESCRIPTIONS
+.nf
+.PP
+\fBP \fP passive cooling trip point type
+\fBA \fP active cooling trip point type (fan)
+\fBC \fP critical trip point type
+\fBA \fP hot trip point type
+\fBkp \fP proportional gain of \fBPID\fP controller
+\fBki \fP integral gain of \fBPID\fP controller
+\fBkd \fP derivative gain of \fBPID\fP controller
+
+.SH REQUIREMENT
+Build depends on ncurses
+.PP
+Runtime depends on window size large enough to show the number of
+devices found on the system.
+
+.PP
+
+.SH INTERACTIVE COMMANDS
+.pp
+.nf
+\fBCtrl-C, q/Q\fP stops \fBtmon\fP
+\fBTAB\fP shows tuning pop up panel, choose a letter to modify
+
+.SH EXAMPLES
+Without any parameters, tmon is in monitoring only mode and refresh
+screen every 1 second.
+.PP
+1. For monitoring only:
+.nf
+$ sudo ./tmon
+
+2. Use Processor cooling device to control thermal zone 0 at default 65C.
+$ sudo ./tmon -c Processor -z 0
+
+3. Use intel_powerclamp(idle injection) cooling device to control thermal zone 1
+$ sudo ./tmon -c intel_powerclamp -z 1
+
+4. Turn on debug and collect data log at /var/tmp/tmon.log
+$ sudo ./tmon -g -l
+
+For example, the log below shows PID controller was adjusting current states
+for all cooling devices with "Processor" type such that thermal zone 0
+can stay below 65 dC.
+
+#---------- THERMAL DATA LOG STARTED -----------
+Samples TargetTemp acpitz0 acpitz1 Fan0 Fan1 Fan2 Fan3 Fan4 Fan5
+Fan6 Fan7 Fan8 Fan9 Processor10 Processor11 Processor12 Processor13
+LCD14 intel_powerclamp15 1 65.0 65 65 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0 2
+65.0 66 65 0 0 0 0 0 0 0 0 0 0 4 4 4 4 6 0 3 65.0 60 54 0 0 0 0 0 0 0 0
+0 0 4 4 4 4 6 0 4 65.0 53 53 0 0 0 0 0 0 0 0 0 0 4 4 4 4 6 0
+5 65.0 52 52 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0
+6 65.0 53 65 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0
+7 65.0 68 70 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 0
+8 65.0 68 68 0 0 0 0 0 0 0 0 0 0 5 5 5 5 6 0
+9 65.0 68 68 0 0 0 0 0 0 0 0 0 0 6 6 6 6 6 0
+10 65.0 67 67 0 0 0 0 0 0 0 0 0 0 7 7 7 7 6 0
+11 65.0 67 67 0 0 0 0 0 0 0 0 0 0 8 8 8 8 6 0
+12 65.0 67 67 0 0 0 0 0 0 0 0 0 0 8 8 8 8 6 0
+13 65.0 67 67 0 0 0 0 0 0 0 0 0 0 9 9 9 9 6 0
+14 65.0 66 66 0 0 0 0 0 0 0 0 0 0 10 10 10 10 6 0
+15 65.0 66 67 0 0 0 0 0 0 0 0 0 0 10 10 10 10 6 0
+16 65.0 66 66 0 0 0 0 0 0 0 0 0 0 11 11 11 11 6 0
+17 65.0 66 66 0 0 0 0 0 0 0 0 0 0 11 11 11 11 6 0
+18 65.0 64 61 0 0 0 0 0 0 0 0 0 0 11 11 11 11 6 0
+19 65.0 60 59 0 0 0 0 0 0 0 0 0 0 12 12 12 12 6 0
+
+Data can be read directly into an array by an example R-script below:
+
+#!/usr/bin/Rscript
+tdata <- read.table("/var/tmp/tmon.log", header=T, comment.char="#")
+attach(tdata)
+jpeg("tmon.jpg")
+X11()
+g_range <- range(0, intel_powerclamp15, TargetTemp, acpitz0)
+plot( Samples, intel_powerclamp15, col="blue", ylim=g_range, axes=FALSE, ann=FALSE)
+par(new=TRUE)
+lines(TargetTemp, type="o", pch=22, lty=2, col="red")
+dev.off()
diff --git a/tools/thermal/tmon/tmon.c b/tools/thermal/tmon/tmon.c
new file mode 100644
index 000000000..b43138f8b
--- /dev/null
+++ b/tools/thermal/tmon/tmon.c
@@ -0,0 +1,385 @@
+/*
+ * tmon.c Thermal Monitor (TMON) main function and entry point
+ *
+ * Copyright (C) 2012 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 or later as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: Jacob Pan <jacob.jun.pan@linux.intel.com>
+ *
+ */
+
+#include <getopt.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <ncurses.h>
+#include <ctype.h>
+#include <time.h>
+#include <signal.h>
+#include <limits.h>
+#include <sys/time.h>
+#include <pthread.h>
+#include <math.h>
+#include <stdarg.h>
+#include <syslog.h>
+
+#include "tmon.h"
+
+unsigned long ticktime = 1; /* seconds */
+unsigned long no_control = 1; /* monitoring only or use cooling device for
+ * temperature control.
+ */
+double time_elapsed = 0.0;
+unsigned long target_temp_user = 65; /* can be select by tui later */
+int dialogue_on;
+int tmon_exit;
+static short daemon_mode;
+static int logging; /* for recording thermal data to a file */
+static int debug_on;
+FILE *tmon_log;
+/*cooling device used for the PID controller */
+char ctrl_cdev[CDEV_NAME_SIZE] = "None";
+int target_thermal_zone; /* user selected target zone instance */
+static void start_daemon_mode(void);
+
+pthread_t event_tid;
+pthread_mutex_t input_lock;
+void usage()
+{
+ printf("Usage: tmon [OPTION...]\n");
+ printf(" -c, --control cooling device in control\n");
+ printf(" -d, --daemon run as daemon, no TUI\n");
+ printf(" -g, --debug debug message in syslog\n");
+ printf(" -h, --help show this help message\n");
+ printf(" -l, --log log data to /var/tmp/tmon.log\n");
+ printf(" -t, --time-interval sampling time interval, > 1 sec.\n");
+ printf(" -T, --target-temp initial target temperature\n");
+ printf(" -v, --version show version\n");
+ printf(" -z, --zone target thermal zone id\n");
+
+ exit(0);
+}
+
+void version()
+{
+ printf("TMON version %s\n", VERSION);
+ exit(EXIT_SUCCESS);
+}
+
+static void tmon_cleanup(void)
+{
+
+ syslog(LOG_INFO, "TMON exit cleanup\n");
+ fflush(stdout);
+ refresh();
+ if (tmon_log)
+ fclose(tmon_log);
+ if (event_tid) {
+ pthread_mutex_lock(&input_lock);
+ pthread_cancel(event_tid);
+ pthread_mutex_unlock(&input_lock);
+ pthread_mutex_destroy(&input_lock);
+ }
+ closelog();
+ /* relax control knobs, undo throttling */
+ set_ctrl_state(0);
+
+ keypad(stdscr, FALSE);
+ echo();
+ nocbreak();
+ close_windows();
+ endwin();
+ free_thermal_data();
+
+ exit(1);
+}
+
+
+static void tmon_sig_handler(int sig)
+{
+ syslog(LOG_INFO, "TMON caught signal %d\n", sig);
+ refresh();
+ switch (sig) {
+ case SIGTERM:
+ printf("sigterm, exit and clean up\n");
+ fflush(stdout);
+ break;
+ case SIGKILL:
+ printf("sigkill, exit and clean up\n");
+ fflush(stdout);
+ break;
+ case SIGINT:
+ printf("ctrl-c, exit and clean up\n");
+ fflush(stdout);
+ break;
+ default:
+ break;
+ }
+ tmon_exit = true;
+}
+
+
+static void start_syslog(void)
+{
+ if (debug_on)
+ setlogmask(LOG_UPTO(LOG_DEBUG));
+ else
+ setlogmask(LOG_UPTO(LOG_ERR));
+ openlog("tmon.log", LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL0);
+ syslog(LOG_NOTICE, "TMON started by User %d", getuid());
+}
+
+static void prepare_logging(void)
+{
+ int i;
+ struct stat logstat;
+
+ if (!logging)
+ return;
+ /* open local data log file */
+ tmon_log = fopen(TMON_LOG_FILE, "w+");
+ if (!tmon_log) {
+ syslog(LOG_ERR, "failed to open log file %s\n", TMON_LOG_FILE);
+ return;
+ }
+
+ if (lstat(TMON_LOG_FILE, &logstat) < 0) {
+ syslog(LOG_ERR, "Unable to stat log file %s\n", TMON_LOG_FILE);
+ fclose(tmon_log);
+ tmon_log = NULL;
+ return;
+ }
+
+ /* The log file must be a regular file owned by us */
+ if (S_ISLNK(logstat.st_mode)) {
+ syslog(LOG_ERR, "Log file is a symlink. Will not log\n");
+ fclose(tmon_log);
+ tmon_log = NULL;
+ return;
+ }
+
+ if (logstat.st_uid != getuid()) {
+ syslog(LOG_ERR, "We don't own the log file. Not logging\n");
+ fclose(tmon_log);
+ tmon_log = NULL;
+ return;
+ }
+
+
+ fprintf(tmon_log, "#----------- THERMAL SYSTEM CONFIG -------------\n");
+ for (i = 0; i < ptdata.nr_tz_sensor; i++) {
+ char binding_str[33]; /* size of long + 1 */
+ int j;
+
+ memset(binding_str, 0, sizeof(binding_str));
+ for (j = 0; j < 32; j++)
+ binding_str[j] = (ptdata.tzi[i].cdev_binding & 1<<j) ?
+ '1' : '0';
+
+ fprintf(tmon_log, "#thermal zone %s%02d cdevs binding: %32s\n",
+ ptdata.tzi[i].type,
+ ptdata.tzi[i].instance,
+ binding_str);
+ for (j = 0; j < ptdata.tzi[i].nr_trip_pts; j++) {
+ fprintf(tmon_log, "#\tTP%02d type:%s, temp:%lu\n", j,
+ trip_type_name[ptdata.tzi[i].tp[j].type],
+ ptdata.tzi[i].tp[j].temp);
+ }
+
+ }
+
+ for (i = 0; i < ptdata.nr_cooling_dev; i++)
+ fprintf(tmon_log, "#cooling devices%02d: %s\n",
+ i, ptdata.cdi[i].type);
+
+ fprintf(tmon_log, "#---------- THERMAL DATA LOG STARTED -----------\n");
+ fprintf(tmon_log, "Samples TargetTemp ");
+ for (i = 0; i < ptdata.nr_tz_sensor; i++) {
+ fprintf(tmon_log, "%s%d ", ptdata.tzi[i].type,
+ ptdata.tzi[i].instance);
+ }
+ for (i = 0; i < ptdata.nr_cooling_dev; i++)
+ fprintf(tmon_log, "%s%d ", ptdata.cdi[i].type,
+ ptdata.cdi[i].instance);
+
+ fprintf(tmon_log, "\n");
+}
+
+static struct option opts[] = {
+ { "control", 1, NULL, 'c' },
+ { "daemon", 0, NULL, 'd' },
+ { "time-interval", 1, NULL, 't' },
+ { "target-temp", 1, NULL, 'T' },
+ { "log", 0, NULL, 'l' },
+ { "help", 0, NULL, 'h' },
+ { "version", 0, NULL, 'v' },
+ { "debug", 0, NULL, 'g' },
+ { 0, 0, NULL, 0 }
+};
+
+
+int main(int argc, char **argv)
+{
+ int err = 0;
+ int id2 = 0, c;
+ double yk = 0.0, temp; /* controller output */
+ int target_tz_index;
+
+ if (geteuid() != 0) {
+ printf("TMON needs to be run as root\n");
+ exit(EXIT_FAILURE);
+ }
+
+ while ((c = getopt_long(argc, argv, "c:dlht:T:vgz:", opts, &id2)) != -1) {
+ switch (c) {
+ case 'c':
+ no_control = 0;
+ strncpy(ctrl_cdev, optarg, CDEV_NAME_SIZE);
+ break;
+ case 'd':
+ start_daemon_mode();
+ printf("Run TMON in daemon mode\n");
+ break;
+ case 't':
+ ticktime = strtod(optarg, NULL);
+ if (ticktime < 1)
+ ticktime = 1;
+ break;
+ case 'T':
+ temp = strtod(optarg, NULL);
+ if (temp < 0) {
+ fprintf(stderr, "error: temperature must be positive\n");
+ return 1;
+ }
+ target_temp_user = temp;
+ break;
+ case 'l':
+ printf("Logging data to /var/tmp/tmon.log\n");
+ logging = 1;
+ break;
+ case 'h':
+ usage();
+ break;
+ case 'v':
+ version();
+ break;
+ case 'g':
+ debug_on = 1;
+ break;
+ case 'z':
+ target_thermal_zone = strtod(optarg, NULL);
+ break;
+ default:
+ break;
+ }
+ }
+ if (pthread_mutex_init(&input_lock, NULL) != 0) {
+ fprintf(stderr, "\n mutex init failed, exit\n");
+ return 1;
+ }
+ start_syslog();
+ if (signal(SIGINT, tmon_sig_handler) == SIG_ERR)
+ syslog(LOG_DEBUG, "Cannot handle SIGINT\n");
+ if (signal(SIGTERM, tmon_sig_handler) == SIG_ERR)
+ syslog(LOG_DEBUG, "Cannot handle SIGINT\n");
+
+ if (probe_thermal_sysfs()) {
+ pthread_mutex_destroy(&input_lock);
+ closelog();
+ return -1;
+ }
+ initialize_curses();
+ setup_windows();
+ signal(SIGWINCH, resize_handler);
+ show_title_bar();
+ show_sensors_w();
+ show_cooling_device();
+ update_thermal_data();
+ show_data_w();
+ prepare_logging();
+ init_thermal_controller();
+
+ nodelay(stdscr, TRUE);
+ err = pthread_create(&event_tid, NULL, &handle_tui_events, NULL);
+ if (err != 0) {
+ printf("\ncan't create thread :[%s]", strerror(err));
+ tmon_cleanup();
+ exit(EXIT_FAILURE);
+ }
+
+ /* validate range of user selected target zone, default to the first
+ * instance if out of range
+ */
+ target_tz_index = zone_instance_to_index(target_thermal_zone);
+ if (target_tz_index < 0) {
+ target_thermal_zone = ptdata.tzi[0].instance;
+ syslog(LOG_ERR, "target zone is not found, default to %d\n",
+ target_thermal_zone);
+ }
+ while (1) {
+ sleep(ticktime);
+ show_title_bar();
+ show_sensors_w();
+ update_thermal_data();
+ if (!dialogue_on) {
+ show_data_w();
+ show_cooling_device();
+ }
+ time_elapsed += ticktime;
+ controller_handler(trec[0].temp[target_tz_index] / 1000,
+ &yk);
+ trec[0].pid_out_pct = yk;
+ if (!dialogue_on)
+ show_control_w();
+ if (tmon_exit)
+ break;
+ }
+ tmon_cleanup();
+ return 0;
+}
+
+static void start_daemon_mode()
+{
+ daemon_mode = 1;
+ /* fork */
+ pid_t sid, pid = fork();
+ if (pid < 0) {
+ exit(EXIT_FAILURE);
+ } else if (pid > 0)
+ /* kill parent */
+ exit(EXIT_SUCCESS);
+
+ /* disable TUI, it may not be necessary, but saves some resource */
+ disable_tui();
+
+ /* change the file mode mask */
+ umask(S_IWGRP | S_IWOTH);
+
+ /* new SID for the daemon process */
+ sid = setsid();
+ if (sid < 0)
+ exit(EXIT_FAILURE);
+
+ /* change working directory */
+ if ((chdir("/")) < 0)
+ exit(EXIT_FAILURE);
+
+
+ sleep(10);
+
+ close(STDIN_FILENO);
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
+
+}
diff --git a/tools/thermal/tmon/tmon.h b/tools/thermal/tmon/tmon.h
new file mode 100644
index 000000000..9e3c49c54
--- /dev/null
+++ b/tools/thermal/tmon/tmon.h
@@ -0,0 +1,204 @@
+/*
+ * tmon.h contains data structures and constants used by TMON
+ *
+ * Copyright (C) 2012 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 or later as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author Name Jacob Pan <jacob.jun.pan@linux.intel.com>
+ *
+ */
+
+#ifndef TMON_H
+#define TMON_H
+
+#define MAX_DISP_TEMP 125
+#define MAX_CTRL_TEMP 105
+#define MIN_CTRL_TEMP 40
+#define MAX_NR_TZONE 16
+#define MAX_NR_CDEV 32
+#define MAX_NR_TRIP 16
+#define MAX_NR_CDEV_TRIP 12 /* number of cooling devices that can bind
+ * to a thermal zone trip.
+ */
+#define MAX_TEMP_KC 140000
+/* starting char position to draw sensor data, such as tz names
+ * trip point list, etc.
+ */
+#define DATA_LEFT_ALIGN 10
+#define NR_LINES_TZDATA 1
+#define TMON_LOG_FILE "/var/tmp/tmon.log"
+
+extern unsigned long ticktime;
+extern double time_elapsed;
+extern unsigned long target_temp_user;
+extern int dialogue_on;
+extern char ctrl_cdev[];
+extern pthread_mutex_t input_lock;
+extern int tmon_exit;
+extern int target_thermal_zone;
+/* use fixed size record to simplify data processing and transfer
+ * TBD: more info to be added, e.g. programmable trip point data.
+*/
+struct thermal_data_record {
+ struct timeval tv;
+ unsigned long temp[MAX_NR_TZONE];
+ double pid_out_pct;
+};
+
+struct cdev_info {
+ char type[64];
+ int instance;
+ unsigned long max_state;
+ unsigned long cur_state;
+ unsigned long flag;
+};
+
+enum trip_type {
+ THERMAL_TRIP_CRITICAL,
+ THERMAL_TRIP_HOT,
+ THERMAL_TRIP_PASSIVE,
+ THERMAL_TRIP_ACTIVE,
+ NR_THERMAL_TRIP_TYPE,
+};
+
+struct trip_point {
+ enum trip_type type;
+ unsigned long temp;
+ unsigned long hysteresis;
+ int attribute; /* programmability etc. */
+};
+
+/* thermal zone configuration information, binding with cooling devices could
+ * change at runtime.
+ */
+struct tz_info {
+ char type[256]; /* e.g. acpitz */
+ int instance;
+ int passive; /* active zone has passive node to force passive mode */
+ int nr_cdev; /* number of cooling device binded */
+ int nr_trip_pts;
+ struct trip_point tp[MAX_NR_TRIP];
+ unsigned long cdev_binding; /* bitmap for attached cdevs */
+ /* cdev bind trip points, allow one cdev bind to multiple trips */
+ unsigned long trip_binding[MAX_NR_CDEV];
+};
+
+struct tmon_platform_data {
+ int nr_tz_sensor;
+ int nr_cooling_dev;
+ /* keep track of instance ids since there might be gaps */
+ int max_tz_instance;
+ int max_cdev_instance;
+ struct tz_info *tzi;
+ struct cdev_info *cdi;
+};
+
+struct control_ops {
+ void (*set_ratio)(unsigned long ratio);
+ unsigned long (*get_ratio)(unsigned long ratio);
+
+};
+
+enum cdev_types {
+ CDEV_TYPE_PROC,
+ CDEV_TYPE_FAN,
+ CDEV_TYPE_MEM,
+ CDEV_TYPE_NR,
+};
+
+/* REVISIT: the idea is to group sensors if possible, e.g. on intel mid
+ * we have "skin0", "skin1", "sys", "msicdie"
+ * on DPTF enabled systems, we might have PCH, TSKN, TAMB, etc.
+ */
+enum tzone_types {
+ TZONE_TYPE_ACPI,
+ TZONE_TYPE_PCH,
+ TZONE_TYPE_NR,
+};
+
+/* limit the output of PID controller adjustment */
+#define LIMIT_HIGH (95)
+#define LIMIT_LOW (2)
+
+struct pid_params {
+ double kp; /* Controller gain from Dialog Box */
+ double ki; /* Time-constant for I action from Dialog Box */
+ double kd; /* Time-constant for D action from Dialog Box */
+ double ts;
+ double k_lpf;
+
+ double t_target;
+ double y_k;
+};
+
+extern int init_thermal_controller(void);
+extern void controller_handler(const double xk, double *yk);
+
+extern struct tmon_platform_data ptdata;
+extern struct pid_params p_param;
+
+extern FILE *tmon_log;
+extern int cur_thermal_record; /* index to the trec array */
+extern struct thermal_data_record trec[];
+extern const char *trip_type_name[];
+extern unsigned long no_control;
+
+extern void initialize_curses(void);
+extern void show_controller_stats(char *line);
+extern void show_title_bar(void);
+extern void setup_windows(void);
+extern void disable_tui(void);
+extern void show_sensors_w(void);
+extern void show_data_w(void);
+extern void write_status_bar(int x, char *line);
+extern void show_control_w();
+
+extern void show_cooling_device(void);
+extern void show_dialogue(void);
+extern int update_thermal_data(void);
+
+extern int probe_thermal_sysfs(void);
+extern void free_thermal_data(void);
+extern void resize_handler(int sig);
+extern void set_ctrl_state(unsigned long state);
+extern void get_ctrl_state(unsigned long *state);
+extern void *handle_tui_events(void *arg);
+extern int sysfs_set_ulong(char *path, char *filename, unsigned long val);
+extern int zone_instance_to_index(int zone_inst);
+extern void close_windows(void);
+
+#define PT_COLOR_DEFAULT 1
+#define PT_COLOR_HEADER_BAR 2
+#define PT_COLOR_ERROR 3
+#define PT_COLOR_RED 4
+#define PT_COLOR_YELLOW 5
+#define PT_COLOR_GREEN 6
+#define PT_COLOR_BRIGHT 7
+#define PT_COLOR_BLUE 8
+
+/* each thermal zone uses 12 chars, 8 for name, 2 for instance, 2 space
+ * also used to list trip points in forms of AAAC, which represents
+ * A: Active
+ * C: Critical
+ */
+#define TZONE_RECORD_SIZE 12
+#define TZ_LEFT_ALIGN 32
+#define CDEV_NAME_SIZE 20
+#define CDEV_FLAG_IN_CONTROL (1 << 0)
+
+/* dialogue box starts */
+#define DIAG_X 48
+#define DIAG_Y 8
+#define THERMAL_SYSFS "/sys/class/thermal"
+#define CDEV "cooling_device"
+#define TZONE "thermal_zone"
+#define TDATA_LEFT 16
+#endif /* TMON_H */
diff --git a/tools/thermal/tmon/tui.c b/tools/thermal/tmon/tui.c
new file mode 100644
index 000000000..b5d1c6b22
--- /dev/null
+++ b/tools/thermal/tmon/tui.c
@@ -0,0 +1,665 @@
+/*
+ * tui.c ncurses text user interface for TMON program
+ *
+ * Copyright (C) 2013 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 or later as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Author: Jacob Pan <jacob.jun.pan@linux.intel.com>
+ *
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <ncurses.h>
+#include <time.h>
+#include <syslog.h>
+#include <panel.h>
+#include <pthread.h>
+#include <signal.h>
+
+#include "tmon.h"
+
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+
+#define max(x, y) ({ \
+ typeof(x) _max1 = (x); \
+ typeof(y) _max2 = (y); \
+ (void) (&_max1 == &_max2); \
+ _max1 > _max2 ? _max1 : _max2; })
+
+static PANEL *data_panel;
+static PANEL *dialogue_panel;
+static PANEL *top;
+
+static WINDOW *title_bar_window;
+static WINDOW *tz_sensor_window;
+static WINDOW *cooling_device_window;
+static WINDOW *control_window;
+static WINDOW *status_bar_window;
+static WINDOW *thermal_data_window;
+static WINDOW *dialogue_window;
+
+char status_bar_slots[10][40];
+static void draw_hbar(WINDOW *win, int y, int start, int len,
+ unsigned long pattern, bool end);
+
+static int maxx, maxy;
+static int maxwidth = 200;
+
+#define TITLE_BAR_HIGHT 1
+#define SENSOR_WIN_HIGHT 4 /* one row for tz name, one for trip points */
+
+
+/* daemon mode flag (set by startup parameter -d) */
+static int tui_disabled;
+
+static void close_panel(PANEL *p)
+{
+ if (p) {
+ del_panel(p);
+ p = NULL;
+ }
+}
+
+static void close_window(WINDOW *win)
+{
+ if (win) {
+ delwin(win);
+ win = NULL;
+ }
+}
+
+void close_windows(void)
+{
+ if (tui_disabled)
+ return;
+ /* must delete panels before their attached windows */
+ if (dialogue_window)
+ close_panel(dialogue_panel);
+ if (cooling_device_window)
+ close_panel(data_panel);
+
+ close_window(title_bar_window);
+ close_window(tz_sensor_window);
+ close_window(status_bar_window);
+ close_window(cooling_device_window);
+ close_window(control_window);
+ close_window(thermal_data_window);
+ close_window(dialogue_window);
+
+}
+
+void write_status_bar(int x, char *line)
+{
+ mvwprintw(status_bar_window, 0, x, "%s", line);
+ wrefresh(status_bar_window);
+}
+
+/* wrap at 5 */
+#define DIAG_DEV_ROWS 5
+/*
+ * list cooling devices + "set temp" entry; wraps after 5 rows, if they fit
+ */
+static int diag_dev_rows(void)
+{
+ int entries = ptdata.nr_cooling_dev + 1;
+ int rows = max(DIAG_DEV_ROWS, (entries + 1) / 2);
+ return min(rows, entries);
+}
+
+void setup_windows(void)
+{
+ int y_begin = 1;
+
+ if (tui_disabled)
+ return;
+
+ getmaxyx(stdscr, maxy, maxx);
+ resizeterm(maxy, maxx);
+
+ title_bar_window = subwin(stdscr, TITLE_BAR_HIGHT, maxx, 0, 0);
+ y_begin += TITLE_BAR_HIGHT;
+
+ tz_sensor_window = subwin(stdscr, SENSOR_WIN_HIGHT, maxx, y_begin, 0);
+ y_begin += SENSOR_WIN_HIGHT;
+
+ cooling_device_window = subwin(stdscr, ptdata.nr_cooling_dev + 3, maxx,
+ y_begin, 0);
+ y_begin += ptdata.nr_cooling_dev + 3; /* 2 lines for border */
+ /* two lines to show borders, one line per tz show trip point position
+ * and value.
+ * dialogue window is a pop-up, when needed it lays on top of cdev win
+ */
+
+ dialogue_window = subwin(stdscr, diag_dev_rows() + 5, maxx-50,
+ DIAG_Y, DIAG_X);
+
+ thermal_data_window = subwin(stdscr, ptdata.nr_tz_sensor *
+ NR_LINES_TZDATA + 3, maxx, y_begin, 0);
+ y_begin += ptdata.nr_tz_sensor * NR_LINES_TZDATA + 3;
+ control_window = subwin(stdscr, 4, maxx, y_begin, 0);
+
+ scrollok(cooling_device_window, TRUE);
+ maxwidth = maxx - 18;
+ status_bar_window = subwin(stdscr, 1, maxx, maxy-1, 0);
+
+ strcpy(status_bar_slots[0], " Ctrl-c - Quit ");
+ strcpy(status_bar_slots[1], " TAB - Tuning ");
+ wmove(status_bar_window, 1, 30);
+
+ /* prepare panels for dialogue, if panel already created then we must
+ * be doing resizing, so just replace windows with new ones, old ones
+ * should have been deleted by close_window
+ */
+ data_panel = new_panel(cooling_device_window);
+ if (!data_panel)
+ syslog(LOG_DEBUG, "No data panel\n");
+ else {
+ if (dialogue_window) {
+ dialogue_panel = new_panel(dialogue_window);
+ if (!dialogue_panel)
+ syslog(LOG_DEBUG, "No dialogue panel\n");
+ else {
+ /* Set up the user pointer to the next panel*/
+ set_panel_userptr(data_panel, dialogue_panel);
+ set_panel_userptr(dialogue_panel, data_panel);
+ top = data_panel;
+ }
+ } else
+ syslog(LOG_INFO, "no dialogue win, term too small\n");
+ }
+ doupdate();
+ werase(stdscr);
+ refresh();
+}
+
+void resize_handler(int sig)
+{
+ /* start over when term gets resized, but first we clean up */
+ close_windows();
+ endwin();
+ refresh();
+ clear();
+ getmaxyx(stdscr, maxy, maxx); /* get the new screen size */
+ setup_windows();
+ /* rate limit */
+ sleep(1);
+ syslog(LOG_DEBUG, "SIG %d, term resized to %d x %d\n",
+ sig, maxy, maxx);
+ signal(SIGWINCH, resize_handler);
+}
+
+const char cdev_title[] = " COOLING DEVICES ";
+void show_cooling_device(void)
+{
+ int i, j, x, y = 0;
+
+ if (tui_disabled || !cooling_device_window)
+ return;
+
+ werase(cooling_device_window);
+ wattron(cooling_device_window, A_BOLD);
+ mvwprintw(cooling_device_window, 1, 1,
+ "ID Cooling Dev Cur Max Thermal Zone Binding");
+ wattroff(cooling_device_window, A_BOLD);
+ for (j = 0; j < ptdata.nr_cooling_dev; j++) {
+ /* draw cooling device list on the left in the order of
+ * cooling device instances. skip unused idr.
+ */
+ mvwprintw(cooling_device_window, j + 2, 1,
+ "%02d %12.12s%6d %6d",
+ ptdata.cdi[j].instance,
+ ptdata.cdi[j].type,
+ ptdata.cdi[j].cur_state,
+ ptdata.cdi[j].max_state);
+ }
+
+ /* show cdev binding, y is the global cooling device instance */
+ for (i = 0; i < ptdata.nr_tz_sensor; i++) {
+ int tz_inst = ptdata.tzi[i].instance;
+ for (j = 0; j < ptdata.nr_cooling_dev; j++) {
+ int cdev_inst;
+ y = j;
+ x = tz_inst * TZONE_RECORD_SIZE + TZ_LEFT_ALIGN;
+
+ draw_hbar(cooling_device_window, y+2, x,
+ TZONE_RECORD_SIZE-1, ACS_VLINE, false);
+
+ /* draw a column of spaces to separate thermal zones */
+ mvwprintw(cooling_device_window, y+2, x-1, " ");
+ if (ptdata.tzi[i].cdev_binding) {
+ cdev_inst = ptdata.cdi[j].instance;
+ unsigned long trip_binding =
+ ptdata.tzi[i].trip_binding[cdev_inst];
+ int k = 0; /* per zone trip point id that
+ * binded to this cdev, one to
+ * many possible based on the
+ * binding bitmask.
+ */
+ syslog(LOG_DEBUG,
+ "bind tz%d cdev%d tp%lx %d cdev%lx\n",
+ i, j, trip_binding, y,
+ ptdata.tzi[i].cdev_binding);
+ /* draw each trip binding for the cdev */
+ while (trip_binding >>= 1) {
+ k++;
+ if (!(trip_binding & 1))
+ continue;
+ /* draw '*' to show binding */
+ mvwprintw(cooling_device_window,
+ y + 2,
+ x + ptdata.tzi[i].nr_trip_pts -
+ k - 1, "*");
+ }
+ }
+ }
+ }
+ /* draw border after data so that border will not be messed up
+ * even there is not enough space for all the data to be shown
+ */
+ wborder(cooling_device_window, 0, 0, 0, 0, 0, 0, 0, 0);
+ wattron(cooling_device_window, A_BOLD);
+ mvwprintw(cooling_device_window, 0, maxx/2 - sizeof(cdev_title),
+ cdev_title);
+ wattroff(cooling_device_window, A_BOLD);
+
+ wrefresh(cooling_device_window);
+}
+
+const char DIAG_TITLE[] = "[ TUNABLES ]";
+void show_dialogue(void)
+{
+ int j, x = 0, y = 0;
+ int rows, cols;
+ WINDOW *w = dialogue_window;
+
+ if (tui_disabled || !w)
+ return;
+
+ getmaxyx(w, rows, cols);
+
+ /* Silence compiler 'unused' warnings */
+ (void)cols;
+
+ werase(w);
+ box(w, 0, 0);
+ mvwprintw(w, 0, maxx/4, DIAG_TITLE);
+ /* list all the available tunables */
+ for (j = 0; j <= ptdata.nr_cooling_dev; j++) {
+ y = j % diag_dev_rows();
+ if (y == 0 && j != 0)
+ x += 20;
+ if (j == ptdata.nr_cooling_dev)
+ /* save last choice for target temp */
+ mvwprintw(w, y+1, x+1, "%C-%.12s", 'A'+j, "Set Temp");
+ else
+ mvwprintw(w, y+1, x+1, "%C-%.10s-%2d", 'A'+j,
+ ptdata.cdi[j].type, ptdata.cdi[j].instance);
+ }
+ wattron(w, A_BOLD);
+ mvwprintw(w, diag_dev_rows()+1, 1, "Enter Choice [A-Z]?");
+ wattroff(w, A_BOLD);
+ /* print legend at the bottom line */
+ mvwprintw(w, rows - 2, 1,
+ "Legend: A=Active, P=Passive, C=Critical");
+
+ wrefresh(dialogue_window);
+}
+
+void write_dialogue_win(char *buf, int y, int x)
+{
+ WINDOW *w = dialogue_window;
+
+ mvwprintw(w, y, x, "%s", buf);
+}
+
+const char control_title[] = " CONTROLS ";
+void show_control_w(void)
+{
+ unsigned long state;
+
+ get_ctrl_state(&state);
+
+ if (tui_disabled || !control_window)
+ return;
+
+ werase(control_window);
+ mvwprintw(control_window, 1, 1,
+ "PID gain: kp=%2.2f ki=%2.2f kd=%2.2f Output %2.2f",
+ p_param.kp, p_param.ki, p_param.kd, p_param.y_k);
+
+ mvwprintw(control_window, 2, 1,
+ "Target Temp: %2.1fC, Zone: %d, Control Device: %.12s",
+ p_param.t_target, target_thermal_zone, ctrl_cdev);
+
+ /* draw border last such that everything is within boundary */
+ wborder(control_window, 0, 0, 0, 0, 0, 0, 0, 0);
+ wattron(control_window, A_BOLD);
+ mvwprintw(control_window, 0, maxx/2 - sizeof(control_title),
+ control_title);
+ wattroff(control_window, A_BOLD);
+
+ wrefresh(control_window);
+}
+
+void initialize_curses(void)
+{
+ if (tui_disabled)
+ return;
+
+ initscr();
+ start_color();
+ keypad(stdscr, TRUE); /* enable keyboard mapping */
+ nonl(); /* tell curses not to do NL->CR/NL on output */
+ cbreak(); /* take input chars one at a time */
+ noecho(); /* dont echo input */
+ curs_set(0); /* turn off cursor */
+ use_default_colors();
+
+ init_pair(PT_COLOR_DEFAULT, COLOR_WHITE, COLOR_BLACK);
+ init_pair(PT_COLOR_HEADER_BAR, COLOR_BLACK, COLOR_WHITE);
+ init_pair(PT_COLOR_ERROR, COLOR_BLACK, COLOR_RED);
+ init_pair(PT_COLOR_RED, COLOR_WHITE, COLOR_RED);
+ init_pair(PT_COLOR_YELLOW, COLOR_WHITE, COLOR_YELLOW);
+ init_pair(PT_COLOR_GREEN, COLOR_WHITE, COLOR_GREEN);
+ init_pair(PT_COLOR_BLUE, COLOR_WHITE, COLOR_BLUE);
+ init_pair(PT_COLOR_BRIGHT, COLOR_WHITE, COLOR_BLACK);
+
+}
+
+void show_title_bar(void)
+{
+ int i;
+ int x = 0;
+
+ if (tui_disabled || !title_bar_window)
+ return;
+
+ wattrset(title_bar_window, COLOR_PAIR(PT_COLOR_HEADER_BAR));
+ wbkgd(title_bar_window, COLOR_PAIR(PT_COLOR_HEADER_BAR));
+ werase(title_bar_window);
+
+ mvwprintw(title_bar_window, 0, 0,
+ " TMON v%s", VERSION);
+
+ wrefresh(title_bar_window);
+
+ werase(status_bar_window);
+
+ for (i = 0; i < 10; i++) {
+ if (strlen(status_bar_slots[i]) == 0)
+ continue;
+ wattron(status_bar_window, A_REVERSE);
+ mvwprintw(status_bar_window, 0, x, "%s", status_bar_slots[i]);
+ wattroff(status_bar_window, A_REVERSE);
+ x += strlen(status_bar_slots[i]) + 1;
+ }
+ wrefresh(status_bar_window);
+}
+
+static void handle_input_val(int ch)
+{
+ char buf[32];
+ int val;
+ char path[256];
+ WINDOW *w = dialogue_window;
+
+ echo();
+ keypad(w, TRUE);
+ wgetnstr(w, buf, 31);
+ val = atoi(buf);
+
+ if (ch == ptdata.nr_cooling_dev) {
+ snprintf(buf, 31, "Invalid Temp %d! %d-%d", val,
+ MIN_CTRL_TEMP, MAX_CTRL_TEMP);
+ if (val < MIN_CTRL_TEMP || val > MAX_CTRL_TEMP)
+ write_status_bar(40, buf);
+ else {
+ p_param.t_target = val;
+ snprintf(buf, 31, "Set New Target Temp %d", val);
+ write_status_bar(40, buf);
+ }
+ } else {
+ snprintf(path, 256, "%s/%s%d", THERMAL_SYSFS,
+ CDEV, ptdata.cdi[ch].instance);
+ sysfs_set_ulong(path, "cur_state", val);
+ }
+ noecho();
+ dialogue_on = 0;
+ show_data_w();
+ show_control_w();
+
+ top = (PANEL *)panel_userptr(top);
+ top_panel(top);
+}
+
+static void handle_input_choice(int ch)
+{
+ char buf[48];
+ int base = 0;
+ int cdev_id = 0;
+
+ if ((ch >= 'A' && ch <= 'A' + ptdata.nr_cooling_dev) ||
+ (ch >= 'a' && ch <= 'a' + ptdata.nr_cooling_dev)) {
+ base = (ch < 'a') ? 'A' : 'a';
+ cdev_id = ch - base;
+ if (ptdata.nr_cooling_dev == cdev_id)
+ snprintf(buf, sizeof(buf), "New Target Temp:");
+ else
+ snprintf(buf, sizeof(buf), "New Value for %.10s-%2d: ",
+ ptdata.cdi[cdev_id].type,
+ ptdata.cdi[cdev_id].instance);
+ write_dialogue_win(buf, diag_dev_rows() + 2, 2);
+ handle_input_val(cdev_id);
+ } else {
+ snprintf(buf, sizeof(buf), "Invalid selection %d", ch);
+ write_dialogue_win(buf, 8, 2);
+ }
+}
+
+void *handle_tui_events(void *arg)
+{
+ int ch;
+
+ keypad(cooling_device_window, TRUE);
+ while ((ch = wgetch(cooling_device_window)) != EOF) {
+ if (tmon_exit)
+ break;
+ /* when term size is too small, no dialogue panels are set.
+ * we need to filter out such cases.
+ */
+ if (!data_panel || !dialogue_panel ||
+ !cooling_device_window ||
+ !dialogue_window) {
+
+ continue;
+ }
+ pthread_mutex_lock(&input_lock);
+ if (dialogue_on) {
+ handle_input_choice(ch);
+ /* top panel filter */
+ if (ch == 'q' || ch == 'Q')
+ ch = 0;
+ }
+ switch (ch) {
+ case KEY_LEFT:
+ box(cooling_device_window, 10, 0);
+ break;
+ case 9: /* TAB */
+ top = (PANEL *)panel_userptr(top);
+ top_panel(top);
+ if (top == dialogue_panel) {
+ dialogue_on = 1;
+ show_dialogue();
+ } else {
+ dialogue_on = 0;
+ /* force refresh */
+ show_data_w();
+ show_control_w();
+ }
+ break;
+ case 'q':
+ case 'Q':
+ tmon_exit = 1;
+ break;
+ }
+ update_panels();
+ doupdate();
+ pthread_mutex_unlock(&input_lock);
+ }
+
+ if (arg)
+ *(int *)arg = 0; /* make gcc happy */
+
+ return NULL;
+}
+
+/* draw a horizontal bar in given pattern */
+static void draw_hbar(WINDOW *win, int y, int start, int len, unsigned long ptn,
+ bool end)
+{
+ mvwaddch(win, y, start, ptn);
+ whline(win, ptn, len);
+ if (end)
+ mvwaddch(win, y, MAX_DISP_TEMP+TDATA_LEFT, ']');
+}
+
+static char trip_type_to_char(int type)
+{
+ switch (type) {
+ case THERMAL_TRIP_CRITICAL: return 'C';
+ case THERMAL_TRIP_HOT: return 'H';
+ case THERMAL_TRIP_PASSIVE: return 'P';
+ case THERMAL_TRIP_ACTIVE: return 'A';
+ default:
+ return '?';
+ }
+}
+
+/* fill a string with trip point type and value in one line
+ * e.g. P(56) C(106)
+ * maintain the distance one degree per char
+ */
+static void draw_tp_line(int tz, int y)
+{
+ int j;
+ int x;
+
+ for (j = 0; j < ptdata.tzi[tz].nr_trip_pts; j++) {
+ x = ptdata.tzi[tz].tp[j].temp / 1000;
+ mvwprintw(thermal_data_window, y + 0, x + TDATA_LEFT,
+ "%c%d", trip_type_to_char(ptdata.tzi[tz].tp[j].type),
+ x);
+ syslog(LOG_INFO, "%s:tz %d tp %d temp = %lu\n", __func__,
+ tz, j, ptdata.tzi[tz].tp[j].temp);
+ }
+}
+
+const char data_win_title[] = " THERMAL DATA ";
+void show_data_w(void)
+{
+ int i;
+
+
+ if (tui_disabled || !thermal_data_window)
+ return;
+
+ werase(thermal_data_window);
+ wattron(thermal_data_window, A_BOLD);
+ mvwprintw(thermal_data_window, 0, maxx/2 - sizeof(data_win_title),
+ data_win_title);
+ wattroff(thermal_data_window, A_BOLD);
+ /* draw a line as ruler */
+ for (i = 10; i < MAX_DISP_TEMP; i += 10)
+ mvwprintw(thermal_data_window, 1, i+TDATA_LEFT, "%2d", i);
+
+ for (i = 0; i < ptdata.nr_tz_sensor; i++) {
+ int temp = trec[cur_thermal_record].temp[i] / 1000;
+ int y = 0;
+
+ y = i * NR_LINES_TZDATA + 2;
+ /* y at tz temp data line */
+ mvwprintw(thermal_data_window, y, 1, "%6.6s%2d:[%3d][",
+ ptdata.tzi[i].type,
+ ptdata.tzi[i].instance, temp);
+ draw_hbar(thermal_data_window, y, TDATA_LEFT, temp, ACS_RARROW,
+ true);
+ draw_tp_line(i, y);
+ }
+ wborder(thermal_data_window, 0, 0, 0, 0, 0, 0, 0, 0);
+ wrefresh(thermal_data_window);
+}
+
+const char tz_title[] = "THERMAL ZONES(SENSORS)";
+
+void show_sensors_w(void)
+{
+ int i, j;
+ char buffer[512];
+
+ if (tui_disabled || !tz_sensor_window)
+ return;
+
+ werase(tz_sensor_window);
+
+ memset(buffer, 0, sizeof(buffer));
+ wattron(tz_sensor_window, A_BOLD);
+ mvwprintw(tz_sensor_window, 1, 1, "Thermal Zones:");
+ wattroff(tz_sensor_window, A_BOLD);
+
+ mvwprintw(tz_sensor_window, 1, TZ_LEFT_ALIGN, "%s", buffer);
+ /* fill trip points for each tzone */
+ wattron(tz_sensor_window, A_BOLD);
+ mvwprintw(tz_sensor_window, 2, 1, "Trip Points:");
+ wattroff(tz_sensor_window, A_BOLD);
+
+ /* draw trip point from low to high for each tz */
+ for (i = 0; i < ptdata.nr_tz_sensor; i++) {
+ int inst = ptdata.tzi[i].instance;
+
+ mvwprintw(tz_sensor_window, 1,
+ TZ_LEFT_ALIGN+TZONE_RECORD_SIZE * inst, "%.9s%02d",
+ ptdata.tzi[i].type, ptdata.tzi[i].instance);
+ for (j = ptdata.tzi[i].nr_trip_pts - 1; j >= 0; j--) {
+ /* loop through all trip points */
+ char type;
+ int tp_pos;
+ /* reverse the order here since trips are sorted
+ * in ascending order in terms of temperature.
+ */
+ tp_pos = ptdata.tzi[i].nr_trip_pts - j - 1;
+
+ type = trip_type_to_char(ptdata.tzi[i].tp[j].type);
+ mvwaddch(tz_sensor_window, 2,
+ inst * TZONE_RECORD_SIZE + TZ_LEFT_ALIGN +
+ tp_pos, type);
+ syslog(LOG_DEBUG, "draw tz %d tp %d ch:%c\n",
+ inst, j, type);
+ }
+ }
+ wborder(tz_sensor_window, 0, 0, 0, 0, 0, 0, 0, 0);
+ wattron(tz_sensor_window, A_BOLD);
+ mvwprintw(tz_sensor_window, 0, maxx/2 - sizeof(tz_title), tz_title);
+ wattroff(tz_sensor_window, A_BOLD);
+ wrefresh(tz_sensor_window);
+}
+
+void disable_tui(void)
+{
+ tui_disabled = 1;
+}
diff --git a/tools/time/udelay_test.sh b/tools/time/udelay_test.sh
new file mode 100755
index 000000000..12d46b926
--- /dev/null
+++ b/tools/time/udelay_test.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+# udelay() test script
+#
+# Test is executed by writing and reading to /sys/kernel/debug/udelay_test
+# and exercises a variety of delays to ensure that udelay() is delaying
+# at least as long as requested (as compared to ktime).
+#
+# Copyright (C) 2014 Google, Inc.
+#
+# This software is licensed under the terms of the GNU General Public
+# License version 2, as published by the Free Software Foundation, and
+# may be copied, distributed, and modified under those terms.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+MODULE_NAME=udelay_test
+UDELAY_PATH=/sys/kernel/debug/udelay_test
+
+setup()
+{
+ /sbin/modprobe -q $MODULE_NAME
+ tmp_file=`mktemp`
+}
+
+test_one()
+{
+ delay=$1
+ echo $delay > $UDELAY_PATH
+ tee -a $tmp_file < $UDELAY_PATH
+}
+
+cleanup()
+{
+ if [ -f $tmp_file ]; then
+ rm $tmp_file
+ fi
+ /sbin/modprobe -q -r $MODULE_NAME
+}
+
+trap cleanup EXIT
+setup
+
+# Delay for a variety of times.
+# 1..200, 200..500 (by 10), 500..2000 (by 100)
+for (( delay = 1; delay < 200; delay += 1 )); do
+ test_one $delay
+done
+for (( delay = 200; delay < 500; delay += 10 )); do
+ test_one $delay
+done
+for (( delay = 500; delay <= 2000; delay += 100 )); do
+ test_one $delay
+done
+
+# Search for failures
+count=`grep -c FAIL $tmp_file`
+if [ $? -eq "0" ]; then
+ echo "ERROR: $count delays failed to delay long enough"
+ retcode=1
+fi
+
+exit $retcode
diff --git a/tools/usb/.gitignore b/tools/usb/.gitignore
new file mode 100644
index 000000000..1b7448981
--- /dev/null
+++ b/tools/usb/.gitignore
@@ -0,0 +1,2 @@
+ffs-test
+testusb
diff --git a/tools/usb/Makefile b/tools/usb/Makefile
new file mode 100644
index 000000000..01d758d73
--- /dev/null
+++ b/tools/usb/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for USB tools
+
+PTHREAD_LIBS = -lpthread
+WARNINGS = -Wall -Wextra
+CFLAGS = $(WARNINGS) -g -I../include
+LDFLAGS = $(PTHREAD_LIBS)
+
+all: testusb ffs-test
+%: %.c
+ $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
+
+clean:
+ $(RM) testusb ffs-test
diff --git a/tools/usb/ffs-aio-example/multibuff/device_app/aio_multibuff.c b/tools/usb/ffs-aio-example/multibuff/device_app/aio_multibuff.c
new file mode 100644
index 000000000..aaca1f44e
--- /dev/null
+++ b/tools/usb/ffs-aio-example/multibuff/device_app/aio_multibuff.c
@@ -0,0 +1,380 @@
+/*
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * For more information, please refer to <http://unlicense.org/>
+ */
+
+#define _BSD_SOURCE /* for endian.h */
+
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/poll.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <sys/eventfd.h>
+
+#include "libaio.h"
+#define IOCB_FLAG_RESFD (1 << 0)
+
+#include <linux/usb/functionfs.h>
+
+#define BUF_LEN 8192
+#define BUFS_MAX 128
+#define AIO_MAX (BUFS_MAX*2)
+
+/******************** Descriptors and Strings *******************************/
+
+static const struct {
+ struct usb_functionfs_descs_head_v2 header;
+ __le32 fs_count;
+ __le32 hs_count;
+ struct {
+ struct usb_interface_descriptor intf;
+ struct usb_endpoint_descriptor_no_audio bulk_sink;
+ struct usb_endpoint_descriptor_no_audio bulk_source;
+ } __attribute__ ((__packed__)) fs_descs, hs_descs;
+} __attribute__ ((__packed__)) descriptors = {
+ .header = {
+ .magic = htole32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2),
+ .flags = htole32(FUNCTIONFS_HAS_FS_DESC |
+ FUNCTIONFS_HAS_HS_DESC),
+ .length = htole32(sizeof(descriptors)),
+ },
+ .fs_count = htole32(3),
+ .fs_descs = {
+ .intf = {
+ .bLength = sizeof(descriptors.fs_descs.intf),
+ .bDescriptorType = USB_DT_INTERFACE,
+ .bNumEndpoints = 2,
+ .bInterfaceClass = USB_CLASS_VENDOR_SPEC,
+ .iInterface = 1,
+ },
+ .bulk_sink = {
+ .bLength = sizeof(descriptors.fs_descs.bulk_sink),
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 1 | USB_DIR_IN,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ },
+ .bulk_source = {
+ .bLength = sizeof(descriptors.fs_descs.bulk_source),
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 2 | USB_DIR_OUT,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ },
+ },
+ .hs_count = htole32(3),
+ .hs_descs = {
+ .intf = {
+ .bLength = sizeof(descriptors.hs_descs.intf),
+ .bDescriptorType = USB_DT_INTERFACE,
+ .bNumEndpoints = 2,
+ .bInterfaceClass = USB_CLASS_VENDOR_SPEC,
+ .iInterface = 1,
+ },
+ .bulk_sink = {
+ .bLength = sizeof(descriptors.hs_descs.bulk_sink),
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 1 | USB_DIR_IN,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ .wMaxPacketSize = htole16(512),
+ },
+ .bulk_source = {
+ .bLength = sizeof(descriptors.hs_descs.bulk_source),
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 2 | USB_DIR_OUT,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ .wMaxPacketSize = htole16(512),
+ },
+ },
+};
+
+#define STR_INTERFACE "AIO Test"
+
+static const struct {
+ struct usb_functionfs_strings_head header;
+ struct {
+ __le16 code;
+ const char str1[sizeof(STR_INTERFACE)];
+ } __attribute__ ((__packed__)) lang0;
+} __attribute__ ((__packed__)) strings = {
+ .header = {
+ .magic = htole32(FUNCTIONFS_STRINGS_MAGIC),
+ .length = htole32(sizeof(strings)),
+ .str_count = htole32(1),
+ .lang_count = htole32(1),
+ },
+ .lang0 = {
+ htole16(0x0409), /* en-us */
+ STR_INTERFACE,
+ },
+};
+
+/********************** Buffer structure *******************************/
+
+struct io_buffer {
+ struct iocb **iocb;
+ unsigned char **buf;
+ unsigned cnt;
+ unsigned len;
+ unsigned requested;
+};
+
+/******************** Endpoints handling *******************************/
+
+static void display_event(struct usb_functionfs_event *event)
+{
+ static const char *const names[] = {
+ [FUNCTIONFS_BIND] = "BIND",
+ [FUNCTIONFS_UNBIND] = "UNBIND",
+ [FUNCTIONFS_ENABLE] = "ENABLE",
+ [FUNCTIONFS_DISABLE] = "DISABLE",
+ [FUNCTIONFS_SETUP] = "SETUP",
+ [FUNCTIONFS_SUSPEND] = "SUSPEND",
+ [FUNCTIONFS_RESUME] = "RESUME",
+ };
+ switch (event->type) {
+ case FUNCTIONFS_BIND:
+ case FUNCTIONFS_UNBIND:
+ case FUNCTIONFS_ENABLE:
+ case FUNCTIONFS_DISABLE:
+ case FUNCTIONFS_SETUP:
+ case FUNCTIONFS_SUSPEND:
+ case FUNCTIONFS_RESUME:
+ printf("Event %s\n", names[event->type]);
+ }
+}
+
+static void handle_ep0(int ep0, bool *ready)
+{
+ int ret;
+ struct usb_functionfs_event event;
+
+ ret = read(ep0, &event, sizeof(event));
+ if (!ret) {
+ perror("unable to read event from ep0");
+ return;
+ }
+ display_event(&event);
+ switch (event.type) {
+ case FUNCTIONFS_SETUP:
+ if (event.u.setup.bRequestType & USB_DIR_IN)
+ write(ep0, NULL, 0);
+ else
+ read(ep0, NULL, 0);
+ break;
+
+ case FUNCTIONFS_ENABLE:
+ *ready = true;
+ break;
+
+ case FUNCTIONFS_DISABLE:
+ *ready = false;
+ break;
+
+ default:
+ break;
+ }
+}
+
+void init_bufs(struct io_buffer *iobuf, unsigned n, unsigned len)
+{
+ unsigned i;
+ iobuf->buf = malloc(n*sizeof(*iobuf->buf));
+ iobuf->iocb = malloc(n*sizeof(*iobuf->iocb));
+ iobuf->cnt = n;
+ iobuf->len = len;
+ iobuf->requested = 0;
+ for (i = 0; i < n; ++i) {
+ iobuf->buf[i] = malloc(len*sizeof(**iobuf->buf));
+ iobuf->iocb[i] = malloc(sizeof(**iobuf->iocb));
+ }
+ iobuf->cnt = n;
+}
+
+void delete_bufs(struct io_buffer *iobuf)
+{
+ unsigned i;
+ for (i = 0; i < iobuf->cnt; ++i) {
+ free(iobuf->buf[i]);
+ free(iobuf->iocb[i]);
+ }
+ free(iobuf->buf);
+ free(iobuf->iocb);
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+ unsigned i, j;
+ char *ep_path;
+
+ int ep0, ep1;
+
+ io_context_t ctx;
+
+ int evfd;
+ fd_set rfds;
+
+ struct io_buffer iobuf[2];
+ int actual = 0;
+ bool ready;
+
+ if (argc != 2) {
+ printf("ffs directory not specified!\n");
+ return 1;
+ }
+
+ ep_path = malloc(strlen(argv[1]) + 4 /* "/ep#" */ + 1 /* '\0' */);
+ if (!ep_path) {
+ perror("malloc");
+ return 1;
+ }
+
+ /* open endpoint files */
+ sprintf(ep_path, "%s/ep0", argv[1]);
+ ep0 = open(ep_path, O_RDWR);
+ if (ep0 < 0) {
+ perror("unable to open ep0");
+ return 1;
+ }
+ if (write(ep0, &descriptors, sizeof(descriptors)) < 0) {
+ perror("unable do write descriptors");
+ return 1;
+ }
+ if (write(ep0, &strings, sizeof(strings)) < 0) {
+ perror("unable to write strings");
+ return 1;
+ }
+ sprintf(ep_path, "%s/ep1", argv[1]);
+ ep1 = open(ep_path, O_RDWR);
+ if (ep1 < 0) {
+ perror("unable to open ep1");
+ return 1;
+ }
+
+ free(ep_path);
+
+ memset(&ctx, 0, sizeof(ctx));
+ /* setup aio context to handle up to AIO_MAX requests */
+ if (io_setup(AIO_MAX, &ctx) < 0) {
+ perror("unable to setup aio");
+ return 1;
+ }
+
+ evfd = eventfd(0, 0);
+ if (evfd < 0) {
+ perror("unable to open eventfd");
+ return 1;
+ }
+
+ for (i = 0; i < sizeof(iobuf)/sizeof(*iobuf); ++i)
+ init_bufs(&iobuf[i], BUFS_MAX, BUF_LEN);
+
+ while (1) {
+ FD_ZERO(&rfds);
+ FD_SET(ep0, &rfds);
+ FD_SET(evfd, &rfds);
+
+ ret = select(((ep0 > evfd) ? ep0 : evfd)+1,
+ &rfds, NULL, NULL, NULL);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ perror("select");
+ break;
+ }
+
+ if (FD_ISSET(ep0, &rfds))
+ handle_ep0(ep0, &ready);
+
+ /* we are waiting for function ENABLE */
+ if (!ready)
+ continue;
+
+ /*
+ * when we're preparing new data to submit,
+ * second buffer being transmitted
+ */
+ for (i = 0; i < sizeof(iobuf)/sizeof(*iobuf); ++i) {
+ if (iobuf[i].requested)
+ continue;
+ /* prepare requests */
+ for (j = 0; j < iobuf[i].cnt; ++j) {
+ io_prep_pwrite(iobuf[i].iocb[j], ep1,
+ iobuf[i].buf[j],
+ iobuf[i].len, 0);
+ /* enable eventfd notification */
+ iobuf[i].iocb[j]->u.c.flags |= IOCB_FLAG_RESFD;
+ iobuf[i].iocb[j]->u.c.resfd = evfd;
+ }
+ /* submit table of requests */
+ ret = io_submit(ctx, iobuf[i].cnt, iobuf[i].iocb);
+ if (ret >= 0) {
+ iobuf[i].requested = ret;
+ printf("submit: %d requests buf: %d\n", ret, i);
+ } else
+ perror("unable to submit requests");
+ }
+
+ /* if event is ready to read */
+ if (!FD_ISSET(evfd, &rfds))
+ continue;
+
+ uint64_t ev_cnt;
+ ret = read(evfd, &ev_cnt, sizeof(ev_cnt));
+ if (ret < 0) {
+ perror("unable to read eventfd");
+ break;
+ }
+
+ struct io_event e[BUFS_MAX];
+ /* we read aio events */
+ ret = io_getevents(ctx, 1, BUFS_MAX, e, NULL);
+ if (ret > 0) /* if we got events */
+ iobuf[actual].requested -= ret;
+
+ /* if all req's from iocb completed */
+ if (!iobuf[actual].requested)
+ actual = (actual + 1)%(sizeof(iobuf)/sizeof(*iobuf));
+ }
+
+ /* free resources */
+
+ for (i = 0; i < sizeof(iobuf)/sizeof(*iobuf); ++i)
+ delete_bufs(&iobuf[i]);
+ io_destroy(ctx);
+
+ close(ep1);
+ close(ep0);
+
+ return 0;
+}
diff --git a/tools/usb/ffs-aio-example/multibuff/host_app/Makefile b/tools/usb/ffs-aio-example/multibuff/host_app/Makefile
new file mode 100644
index 000000000..df36e4c28
--- /dev/null
+++ b/tools/usb/ffs-aio-example/multibuff/host_app/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+CC = gcc
+LIBUSB_CFLAGS = $(shell pkg-config --cflags libusb-1.0)
+LIBUSB_LIBS = $(shell pkg-config --libs libusb-1.0)
+WARNINGS = -Wall -Wextra
+CFLAGS = $(LIBUSB_CFLAGS) $(WARNINGS)
+LDFLAGS = $(LIBUSB_LIBS)
+
+all: test
+%: %.c
+ $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
+
+clean:
+ $(RM) test
diff --git a/tools/usb/ffs-aio-example/multibuff/host_app/test.c b/tools/usb/ffs-aio-example/multibuff/host_app/test.c
new file mode 100644
index 000000000..2cbcce6e8
--- /dev/null
+++ b/tools/usb/ffs-aio-example/multibuff/host_app/test.c
@@ -0,0 +1,175 @@
+/*
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * For more information, please refer to <http://unlicense.org/>
+ */
+
+#include <libusb.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#define VENDOR 0x1d6b
+#define PRODUCT 0x0105
+
+#define BUF_LEN 8192
+
+/*
+ * struct test_state - describes test program state
+ * @list: list of devices returned by libusb_get_device_list function
+ * @found: pointer to struct describing tested device
+ * @ctx: context, set to NULL
+ * @handle: handle of tested device
+ * @attached: indicates that device was attached to kernel, and has to be
+ * reattached at the end of test program
+ */
+
+struct test_state {
+ libusb_device *found;
+ libusb_context *ctx;
+ libusb_device_handle *handle;
+ int attached;
+};
+
+/*
+ * test_init - initialize test program
+ */
+
+int test_init(struct test_state *state)
+{
+ int i, ret;
+ ssize_t cnt;
+ libusb_device **list;
+
+ state->found = NULL;
+ state->ctx = NULL;
+ state->handle = NULL;
+ state->attached = 0;
+
+ ret = libusb_init(&state->ctx);
+ if (ret) {
+ printf("cannot init libusb: %s\n", libusb_error_name(ret));
+ return 1;
+ }
+
+ cnt = libusb_get_device_list(state->ctx, &list);
+ if (cnt <= 0) {
+ printf("no devices found\n");
+ goto error1;
+ }
+
+ for (i = 0; i < cnt; ++i) {
+ libusb_device *dev = list[i];
+ struct libusb_device_descriptor desc;
+ ret = libusb_get_device_descriptor(dev, &desc);
+ if (ret) {
+ printf("unable to get device descriptor: %s\n",
+ libusb_error_name(ret));
+ goto error2;
+ }
+ if (desc.idVendor == VENDOR && desc.idProduct == PRODUCT) {
+ state->found = dev;
+ break;
+ }
+ }
+
+ if (!state->found) {
+ printf("no devices found\n");
+ goto error2;
+ }
+
+ ret = libusb_open(state->found, &state->handle);
+ if (ret) {
+ printf("cannot open device: %s\n", libusb_error_name(ret));
+ goto error2;
+ }
+
+ if (libusb_claim_interface(state->handle, 0)) {
+ ret = libusb_detach_kernel_driver(state->handle, 0);
+ if (ret) {
+ printf("unable to detach kernel driver: %s\n",
+ libusb_error_name(ret));
+ goto error3;
+ }
+ state->attached = 1;
+ ret = libusb_claim_interface(state->handle, 0);
+ if (ret) {
+ printf("cannot claim interface: %s\n",
+ libusb_error_name(ret));
+ goto error4;
+ }
+ }
+
+ return 0;
+
+error4:
+ if (state->attached == 1)
+ libusb_attach_kernel_driver(state->handle, 0);
+
+error3:
+ libusb_close(state->handle);
+
+error2:
+ libusb_free_device_list(list, 1);
+
+error1:
+ libusb_exit(state->ctx);
+ return 1;
+}
+
+/*
+ * test_exit - cleanup test program
+ */
+
+void test_exit(struct test_state *state)
+{
+ libusb_release_interface(state->handle, 0);
+ if (state->attached == 1)
+ libusb_attach_kernel_driver(state->handle, 0);
+ libusb_close(state->handle);
+ libusb_exit(state->ctx);
+}
+
+int main(void)
+{
+ struct test_state state;
+ struct libusb_config_descriptor *conf;
+ struct libusb_interface_descriptor const *iface;
+ unsigned char addr;
+
+ if (test_init(&state))
+ return 1;
+
+ libusb_get_config_descriptor(state.found, 0, &conf);
+ iface = &conf->interface[0].altsetting[0];
+ addr = iface->endpoint[0].bEndpointAddress;
+
+ while (1) {
+ static unsigned char buffer[BUF_LEN];
+ int bytes;
+ libusb_bulk_transfer(state.handle, addr, buffer, BUF_LEN,
+ &bytes, 500);
+ }
+ test_exit(&state);
+}
diff --git a/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c b/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c
new file mode 100644
index 000000000..1f44a2981
--- /dev/null
+++ b/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c
@@ -0,0 +1,368 @@
+/*
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * For more information, please refer to <http://unlicense.org/>
+ */
+
+#define _BSD_SOURCE /* for endian.h */
+
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/poll.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <sys/eventfd.h>
+
+#include "libaio.h"
+#define IOCB_FLAG_RESFD (1 << 0)
+
+#include <linux/usb/functionfs.h>
+
+#define BUF_LEN 8192
+
+/******************** Descriptors and Strings *******************************/
+
+static const struct {
+ struct usb_functionfs_descs_head_v2 header;
+ __le32 fs_count;
+ __le32 hs_count;
+ struct {
+ struct usb_interface_descriptor intf;
+ struct usb_endpoint_descriptor_no_audio bulk_sink;
+ struct usb_endpoint_descriptor_no_audio bulk_source;
+ } __attribute__ ((__packed__)) fs_descs, hs_descs;
+} __attribute__ ((__packed__)) descriptors = {
+ .header = {
+ .magic = htole32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2),
+ .flags = htole32(FUNCTIONFS_HAS_FS_DESC |
+ FUNCTIONFS_HAS_HS_DESC),
+ .length = htole32(sizeof(descriptors)),
+ },
+ .fs_count = htole32(3),
+ .fs_descs = {
+ .intf = {
+ .bLength = sizeof(descriptors.fs_descs.intf),
+ .bDescriptorType = USB_DT_INTERFACE,
+ .bNumEndpoints = 2,
+ .bInterfaceClass = USB_CLASS_VENDOR_SPEC,
+ .iInterface = 1,
+ },
+ .bulk_sink = {
+ .bLength = sizeof(descriptors.fs_descs.bulk_sink),
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 1 | USB_DIR_IN,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ },
+ .bulk_source = {
+ .bLength = sizeof(descriptors.fs_descs.bulk_source),
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 2 | USB_DIR_OUT,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ },
+ },
+ .hs_count = htole32(3),
+ .hs_descs = {
+ .intf = {
+ .bLength = sizeof(descriptors.hs_descs.intf),
+ .bDescriptorType = USB_DT_INTERFACE,
+ .bNumEndpoints = 2,
+ .bInterfaceClass = USB_CLASS_VENDOR_SPEC,
+ .iInterface = 1,
+ },
+ .bulk_sink = {
+ .bLength = sizeof(descriptors.hs_descs.bulk_sink),
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 1 | USB_DIR_IN,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ .wMaxPacketSize = htole16(512),
+ },
+ .bulk_source = {
+ .bLength = sizeof(descriptors.hs_descs.bulk_source),
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 2 | USB_DIR_OUT,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ .wMaxPacketSize = htole16(512),
+ },
+ },
+};
+
+#define STR_INTERFACE "AIO Test"
+
+static const struct {
+ struct usb_functionfs_strings_head header;
+ struct {
+ __le16 code;
+ const char str1[sizeof(STR_INTERFACE)];
+ } __attribute__ ((__packed__)) lang0;
+} __attribute__ ((__packed__)) strings = {
+ .header = {
+ .magic = htole32(FUNCTIONFS_STRINGS_MAGIC),
+ .length = htole32(sizeof(strings)),
+ .str_count = htole32(1),
+ .lang_count = htole32(1),
+ },
+ .lang0 = {
+ htole16(0x0409), /* en-us */
+ STR_INTERFACE,
+ },
+};
+
+/******************** Endpoints handling *******************************/
+
+static void display_event(struct usb_functionfs_event *event)
+{
+ static const char *const names[] = {
+ [FUNCTIONFS_BIND] = "BIND",
+ [FUNCTIONFS_UNBIND] = "UNBIND",
+ [FUNCTIONFS_ENABLE] = "ENABLE",
+ [FUNCTIONFS_DISABLE] = "DISABLE",
+ [FUNCTIONFS_SETUP] = "SETUP",
+ [FUNCTIONFS_SUSPEND] = "SUSPEND",
+ [FUNCTIONFS_RESUME] = "RESUME",
+ };
+ switch (event->type) {
+ case FUNCTIONFS_BIND:
+ case FUNCTIONFS_UNBIND:
+ case FUNCTIONFS_ENABLE:
+ case FUNCTIONFS_DISABLE:
+ case FUNCTIONFS_SETUP:
+ case FUNCTIONFS_SUSPEND:
+ case FUNCTIONFS_RESUME:
+ printf("Event %s\n", names[event->type]);
+ }
+}
+
+static void handle_ep0(int ep0, bool *ready)
+{
+ struct usb_functionfs_event event;
+ int ret;
+
+ struct pollfd pfds[1];
+ pfds[0].fd = ep0;
+ pfds[0].events = POLLIN;
+
+ ret = poll(pfds, 1, 0);
+
+ if (ret && (pfds[0].revents & POLLIN)) {
+ ret = read(ep0, &event, sizeof(event));
+ if (!ret) {
+ perror("unable to read event from ep0");
+ return;
+ }
+ display_event(&event);
+ switch (event.type) {
+ case FUNCTIONFS_SETUP:
+ if (event.u.setup.bRequestType & USB_DIR_IN)
+ write(ep0, NULL, 0);
+ else
+ read(ep0, NULL, 0);
+ break;
+
+ case FUNCTIONFS_ENABLE:
+ *ready = true;
+ break;
+
+ case FUNCTIONFS_DISABLE:
+ *ready = false;
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int i, ret;
+ char *ep_path;
+
+ int ep0;
+ int ep[2];
+
+ io_context_t ctx;
+
+ int evfd;
+ fd_set rfds;
+
+ char *buf_in, *buf_out;
+ struct iocb *iocb_in, *iocb_out;
+ int req_in = 0, req_out = 0;
+ bool ready;
+
+ if (argc != 2) {
+ printf("ffs directory not specified!\n");
+ return 1;
+ }
+
+ ep_path = malloc(strlen(argv[1]) + 4 /* "/ep#" */ + 1 /* '\0' */);
+ if (!ep_path) {
+ perror("malloc");
+ return 1;
+ }
+
+ /* open endpoint files */
+ sprintf(ep_path, "%s/ep0", argv[1]);
+ ep0 = open(ep_path, O_RDWR);
+ if (ep0 < 0) {
+ perror("unable to open ep0");
+ return 1;
+ }
+ if (write(ep0, &descriptors, sizeof(descriptors)) < 0) {
+ perror("unable do write descriptors");
+ return 1;
+ }
+ if (write(ep0, &strings, sizeof(strings)) < 0) {
+ perror("unable to write strings");
+ return 1;
+ }
+ for (i = 0; i < 2; ++i) {
+ sprintf(ep_path, "%s/ep%d", argv[1], i+1);
+ ep[i] = open(ep_path, O_RDWR);
+ if (ep[i] < 0) {
+ printf("unable to open ep%d: %s\n", i+1,
+ strerror(errno));
+ return 1;
+ }
+ }
+
+ free(ep_path);
+
+ memset(&ctx, 0, sizeof(ctx));
+ /* setup aio context to handle up to 2 requests */
+ if (io_setup(2, &ctx) < 0) {
+ perror("unable to setup aio");
+ return 1;
+ }
+
+ evfd = eventfd(0, 0);
+ if (evfd < 0) {
+ perror("unable to open eventfd");
+ return 1;
+ }
+
+ /* alloc buffers and requests */
+ buf_in = malloc(BUF_LEN);
+ buf_out = malloc(BUF_LEN);
+ iocb_in = malloc(sizeof(*iocb_in));
+ iocb_out = malloc(sizeof(*iocb_out));
+
+ while (1) {
+ FD_ZERO(&rfds);
+ FD_SET(ep0, &rfds);
+ FD_SET(evfd, &rfds);
+
+ ret = select(((ep0 > evfd) ? ep0 : evfd)+1,
+ &rfds, NULL, NULL, NULL);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ perror("select");
+ break;
+ }
+
+ if (FD_ISSET(ep0, &rfds))
+ handle_ep0(ep0, &ready);
+
+ /* we are waiting for function ENABLE */
+ if (!ready)
+ continue;
+
+ /* if something was submitted we wait for event */
+ if (FD_ISSET(evfd, &rfds)) {
+ uint64_t ev_cnt;
+ ret = read(evfd, &ev_cnt, sizeof(ev_cnt));
+ if (ret < 0) {
+ perror("unable to read eventfd");
+ break;
+ }
+
+ struct io_event e[2];
+ /* we wait for one event */
+ ret = io_getevents(ctx, 1, 2, e, NULL);
+ /* if we got event */
+ for (i = 0; i < ret; ++i) {
+ if (e[i].obj->aio_fildes == ep[0]) {
+ printf("ev=in; ret=%lu\n", e[i].res);
+ req_in = 0;
+ } else if (e[i].obj->aio_fildes == ep[1]) {
+ printf("ev=out; ret=%lu\n", e[i].res);
+ req_out = 0;
+ }
+ }
+ }
+
+ if (!req_in) { /* if IN transfer not requested*/
+ /* prepare write request */
+ io_prep_pwrite(iocb_in, ep[0], buf_in, BUF_LEN, 0);
+ /* enable eventfd notification */
+ iocb_in->u.c.flags |= IOCB_FLAG_RESFD;
+ iocb_in->u.c.resfd = evfd;
+ /* submit table of requests */
+ ret = io_submit(ctx, 1, &iocb_in);
+ if (ret >= 0) { /* if ret > 0 request is queued */
+ req_in = 1;
+ printf("submit: in\n");
+ } else
+ perror("unable to submit request");
+ }
+ if (!req_out) { /* if OUT transfer not requested */
+ /* prepare read request */
+ io_prep_pread(iocb_out, ep[1], buf_out, BUF_LEN, 0);
+ /* enable eventfs notification */
+ iocb_out->u.c.flags |= IOCB_FLAG_RESFD;
+ iocb_out->u.c.resfd = evfd;
+ /* submit table of requests */
+ ret = io_submit(ctx, 1, &iocb_out);
+ if (ret >= 0) { /* if ret > 0 request is queued */
+ req_out = 1;
+ printf("submit: out\n");
+ } else
+ perror("unable to submit request");
+ }
+ }
+
+ /* free resources */
+
+ io_destroy(ctx);
+
+ free(buf_in);
+ free(buf_out);
+ free(iocb_in);
+ free(iocb_out);
+
+ for (i = 0; i < 2; ++i)
+ close(ep[i]);
+ close(ep0);
+
+ return 0;
+}
diff --git a/tools/usb/ffs-aio-example/simple/host_app/Makefile b/tools/usb/ffs-aio-example/simple/host_app/Makefile
new file mode 100644
index 000000000..8c4a6f0aa
--- /dev/null
+++ b/tools/usb/ffs-aio-example/simple/host_app/Makefile
@@ -0,0 +1,13 @@
+CC = gcc
+LIBUSB_CFLAGS = $(shell pkg-config --cflags libusb-1.0)
+LIBUSB_LIBS = $(shell pkg-config --libs libusb-1.0)
+WARNINGS = -Wall -Wextra
+CFLAGS = $(LIBUSB_CFLAGS) $(WARNINGS)
+LDFLAGS = $(LIBUSB_LIBS)
+
+all: test
+%: %.c
+ $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
+
+clean:
+ $(RM) test
diff --git a/tools/usb/ffs-aio-example/simple/host_app/test.c b/tools/usb/ffs-aio-example/simple/host_app/test.c
new file mode 100644
index 000000000..aed86ffff
--- /dev/null
+++ b/tools/usb/ffs-aio-example/simple/host_app/test.c
@@ -0,0 +1,178 @@
+/*
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * For more information, please refer to <http://unlicense.org/>
+ */
+
+#include <libusb.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#define VENDOR 0x1d6b
+#define PRODUCT 0x0105
+
+#define BUF_LEN 8192
+
+/*
+ * struct test_state - describes test program state
+ * @list: list of devices returned by libusb_get_device_list function
+ * @found: pointer to struct describing tested device
+ * @ctx: context, set to NULL
+ * @handle: handle of tested device
+ * @attached: indicates that device was attached to kernel, and has to be
+ * reattached at the end of test program
+ */
+
+struct test_state {
+ libusb_device *found;
+ libusb_context *ctx;
+ libusb_device_handle *handle;
+ int attached;
+};
+
+/*
+ * test_init - initialize test program
+ */
+
+int test_init(struct test_state *state)
+{
+ int i, ret;
+ ssize_t cnt;
+ libusb_device **list;
+
+ state->found = NULL;
+ state->ctx = NULL;
+ state->handle = NULL;
+ state->attached = 0;
+
+ ret = libusb_init(&state->ctx);
+ if (ret) {
+ printf("cannot init libusb: %s\n", libusb_error_name(ret));
+ return 1;
+ }
+
+ cnt = libusb_get_device_list(state->ctx, &list);
+ if (cnt <= 0) {
+ printf("no devices found\n");
+ goto error1;
+ }
+
+ for (i = 0; i < cnt; ++i) {
+ libusb_device *dev = list[i];
+ struct libusb_device_descriptor desc;
+ ret = libusb_get_device_descriptor(dev, &desc);
+ if (ret) {
+ printf("unable to get device descriptor: %s\n",
+ libusb_error_name(ret));
+ goto error2;
+ }
+ if (desc.idVendor == VENDOR && desc.idProduct == PRODUCT) {
+ state->found = dev;
+ break;
+ }
+ }
+
+ if (!state->found) {
+ printf("no devices found\n");
+ goto error2;
+ }
+
+ ret = libusb_open(state->found, &state->handle);
+ if (ret) {
+ printf("cannot open device: %s\n", libusb_error_name(ret));
+ goto error2;
+ }
+
+ if (libusb_claim_interface(state->handle, 0)) {
+ ret = libusb_detach_kernel_driver(state->handle, 0);
+ if (ret) {
+ printf("unable to detach kernel driver: %s\n",
+ libusb_error_name(ret));
+ goto error3;
+ }
+ state->attached = 1;
+ ret = libusb_claim_interface(state->handle, 0);
+ if (ret) {
+ printf("cannot claim interface: %s\n",
+ libusb_error_name(ret));
+ goto error4;
+ }
+ }
+
+ return 0;
+
+error4:
+ if (state->attached == 1)
+ libusb_attach_kernel_driver(state->handle, 0);
+
+error3:
+ libusb_close(state->handle);
+
+error2:
+ libusb_free_device_list(list, 1);
+
+error1:
+ libusb_exit(state->ctx);
+ return 1;
+}
+
+/*
+ * test_exit - cleanup test program
+ */
+
+void test_exit(struct test_state *state)
+{
+ libusb_release_interface(state->handle, 0);
+ if (state->attached == 1)
+ libusb_attach_kernel_driver(state->handle, 0);
+ libusb_close(state->handle);
+ libusb_exit(state->ctx);
+}
+
+int main(void)
+{
+ struct test_state state;
+ struct libusb_config_descriptor *conf;
+ struct libusb_interface_descriptor const *iface;
+ unsigned char in_addr, out_addr;
+
+ if (test_init(&state))
+ return 1;
+
+ libusb_get_config_descriptor(state.found, 0, &conf);
+ iface = &conf->interface[0].altsetting[0];
+ in_addr = iface->endpoint[0].bEndpointAddress;
+ out_addr = iface->endpoint[1].bEndpointAddress;
+
+ while (1) {
+ static unsigned char buffer[BUF_LEN];
+ int bytes;
+ libusb_bulk_transfer(state.handle, in_addr, buffer, BUF_LEN,
+ &bytes, 500);
+ libusb_bulk_transfer(state.handle, out_addr, buffer, BUF_LEN,
+ &bytes, 500);
+ }
+ test_exit(&state);
+}
diff --git a/tools/usb/ffs-test.c b/tools/usb/ffs-test.c
new file mode 100644
index 000000000..0f395dfb7
--- /dev/null
+++ b/tools/usb/ffs-test.c
@@ -0,0 +1,694 @@
+/*
+ * ffs-test.c -- user mode filesystem api for usb composite function
+ *
+ * Copyright (C) 2010 Samsung Electronics
+ * Author: Michal Nazarewicz <mina86@mina86.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* $(CROSS_COMPILE)cc -Wall -Wextra -g -o ffs-test ffs-test.c -lpthread */
+
+
+#define _DEFAULT_SOURCE /* for endian.h */
+
+#include <endian.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <tools/le_byteshift.h>
+
+#include "../../include/uapi/linux/usb/functionfs.h"
+
+
+/******************** Little Endian Handling ********************************/
+
+/*
+ * cpu_to_le16/32 are used when initializing structures, a context where a
+ * function call is not allowed. To solve this, we code cpu_to_le16/32 in a way
+ * that allows them to be used when initializing structures.
+ */
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define cpu_to_le16(x) (x)
+#define cpu_to_le32(x) (x)
+#else
+#define cpu_to_le16(x) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))
+#define cpu_to_le32(x) \
+ ((((x) & 0xff000000u) >> 24) | (((x) & 0x00ff0000u) >> 8) | \
+ (((x) & 0x0000ff00u) << 8) | (((x) & 0x000000ffu) << 24))
+#endif
+
+#define le32_to_cpu(x) le32toh(x)
+#define le16_to_cpu(x) le16toh(x)
+
+/******************** Messages and Errors ***********************************/
+
+static const char argv0[] = "ffs-test";
+
+static unsigned verbosity = 7;
+
+static void _msg(unsigned level, const char *fmt, ...)
+{
+ if (level < 2)
+ level = 2;
+ else if (level > 7)
+ level = 7;
+
+ if (level <= verbosity) {
+ static const char levels[8][6] = {
+ [2] = "crit:",
+ [3] = "err: ",
+ [4] = "warn:",
+ [5] = "note:",
+ [6] = "info:",
+ [7] = "dbg: "
+ };
+
+ int _errno = errno;
+ va_list ap;
+
+ fprintf(stderr, "%s: %s ", argv0, levels[level]);
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+
+ if (fmt[strlen(fmt) - 1] != '\n') {
+ char buffer[128];
+ strerror_r(_errno, buffer, sizeof buffer);
+ fprintf(stderr, ": (-%d) %s\n", _errno, buffer);
+ }
+
+ fflush(stderr);
+ }
+}
+
+#define die(...) (_msg(2, __VA_ARGS__), exit(1))
+#define err(...) _msg(3, __VA_ARGS__)
+#define warn(...) _msg(4, __VA_ARGS__)
+#define note(...) _msg(5, __VA_ARGS__)
+#define info(...) _msg(6, __VA_ARGS__)
+#define debug(...) _msg(7, __VA_ARGS__)
+
+#define die_on(cond, ...) do { \
+ if (cond) \
+ die(__VA_ARGS__); \
+ } while (0)
+
+
+/******************** Descriptors and Strings *******************************/
+
+static const struct {
+ struct usb_functionfs_descs_head_v2 header;
+ __le32 fs_count;
+ __le32 hs_count;
+ __le32 ss_count;
+ struct {
+ struct usb_interface_descriptor intf;
+ struct usb_endpoint_descriptor_no_audio sink;
+ struct usb_endpoint_descriptor_no_audio source;
+ } __attribute__((packed)) fs_descs, hs_descs;
+ struct {
+ struct usb_interface_descriptor intf;
+ struct usb_endpoint_descriptor_no_audio sink;
+ struct usb_ss_ep_comp_descriptor sink_comp;
+ struct usb_endpoint_descriptor_no_audio source;
+ struct usb_ss_ep_comp_descriptor source_comp;
+ } ss_descs;
+} __attribute__((packed)) descriptors = {
+ .header = {
+ .magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2),
+ .flags = cpu_to_le32(FUNCTIONFS_HAS_FS_DESC |
+ FUNCTIONFS_HAS_HS_DESC |
+ FUNCTIONFS_HAS_SS_DESC),
+ .length = cpu_to_le32(sizeof descriptors),
+ },
+ .fs_count = cpu_to_le32(3),
+ .fs_descs = {
+ .intf = {
+ .bLength = sizeof descriptors.fs_descs.intf,
+ .bDescriptorType = USB_DT_INTERFACE,
+ .bNumEndpoints = 2,
+ .bInterfaceClass = USB_CLASS_VENDOR_SPEC,
+ .iInterface = 1,
+ },
+ .sink = {
+ .bLength = sizeof descriptors.fs_descs.sink,
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 1 | USB_DIR_IN,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ /* .wMaxPacketSize = autoconfiguration (kernel) */
+ },
+ .source = {
+ .bLength = sizeof descriptors.fs_descs.source,
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 2 | USB_DIR_OUT,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ /* .wMaxPacketSize = autoconfiguration (kernel) */
+ },
+ },
+ .hs_count = cpu_to_le32(3),
+ .hs_descs = {
+ .intf = {
+ .bLength = sizeof descriptors.fs_descs.intf,
+ .bDescriptorType = USB_DT_INTERFACE,
+ .bNumEndpoints = 2,
+ .bInterfaceClass = USB_CLASS_VENDOR_SPEC,
+ .iInterface = 1,
+ },
+ .sink = {
+ .bLength = sizeof descriptors.hs_descs.sink,
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 1 | USB_DIR_IN,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ .wMaxPacketSize = cpu_to_le16(512),
+ },
+ .source = {
+ .bLength = sizeof descriptors.hs_descs.source,
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 2 | USB_DIR_OUT,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ .wMaxPacketSize = cpu_to_le16(512),
+ .bInterval = 1, /* NAK every 1 uframe */
+ },
+ },
+ .ss_count = cpu_to_le32(5),
+ .ss_descs = {
+ .intf = {
+ .bLength = sizeof descriptors.fs_descs.intf,
+ .bDescriptorType = USB_DT_INTERFACE,
+ .bNumEndpoints = 2,
+ .bInterfaceClass = USB_CLASS_VENDOR_SPEC,
+ .iInterface = 1,
+ },
+ .sink = {
+ .bLength = sizeof descriptors.hs_descs.sink,
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 1 | USB_DIR_IN,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ .wMaxPacketSize = cpu_to_le16(1024),
+ },
+ .sink_comp = {
+ .bLength = USB_DT_SS_EP_COMP_SIZE,
+ .bDescriptorType = USB_DT_SS_ENDPOINT_COMP,
+ .bMaxBurst = 0,
+ .bmAttributes = 0,
+ .wBytesPerInterval = 0,
+ },
+ .source = {
+ .bLength = sizeof descriptors.hs_descs.source,
+ .bDescriptorType = USB_DT_ENDPOINT,
+ .bEndpointAddress = 2 | USB_DIR_OUT,
+ .bmAttributes = USB_ENDPOINT_XFER_BULK,
+ .wMaxPacketSize = cpu_to_le16(1024),
+ .bInterval = 1, /* NAK every 1 uframe */
+ },
+ .source_comp = {
+ .bLength = USB_DT_SS_EP_COMP_SIZE,
+ .bDescriptorType = USB_DT_SS_ENDPOINT_COMP,
+ .bMaxBurst = 0,
+ .bmAttributes = 0,
+ .wBytesPerInterval = 0,
+ },
+ },
+};
+
+static size_t descs_to_legacy(void **legacy, const void *descriptors_v2)
+{
+ const unsigned char *descs_end, *descs_start;
+ __u32 length, fs_count = 0, hs_count = 0, count;
+
+ /* Read v2 header */
+ {
+ const struct {
+ const struct usb_functionfs_descs_head_v2 header;
+ const __le32 counts[];
+ } __attribute__((packed)) *const in = descriptors_v2;
+ const __le32 *counts = in->counts;
+ __u32 flags;
+
+ if (le32_to_cpu(in->header.magic) !=
+ FUNCTIONFS_DESCRIPTORS_MAGIC_V2)
+ return 0;
+ length = le32_to_cpu(in->header.length);
+ if (length <= sizeof in->header)
+ return 0;
+ length -= sizeof in->header;
+ flags = le32_to_cpu(in->header.flags);
+ if (flags & ~(FUNCTIONFS_HAS_FS_DESC | FUNCTIONFS_HAS_HS_DESC |
+ FUNCTIONFS_HAS_SS_DESC))
+ return 0;
+
+#define GET_NEXT_COUNT_IF_FLAG(ret, flg) do { \
+ if (!(flags & (flg))) \
+ break; \
+ if (length < 4) \
+ return 0; \
+ ret = le32_to_cpu(*counts); \
+ length -= 4; \
+ ++counts; \
+ } while (0)
+
+ GET_NEXT_COUNT_IF_FLAG(fs_count, FUNCTIONFS_HAS_FS_DESC);
+ GET_NEXT_COUNT_IF_FLAG(hs_count, FUNCTIONFS_HAS_HS_DESC);
+ GET_NEXT_COUNT_IF_FLAG(count, FUNCTIONFS_HAS_SS_DESC);
+
+ count = fs_count + hs_count;
+ if (!count)
+ return 0;
+ descs_start = (const void *)counts;
+
+#undef GET_NEXT_COUNT_IF_FLAG
+ }
+
+ /*
+ * Find the end of FS and HS USB descriptors. SS descriptors
+ * are ignored since legacy format does not support them.
+ */
+ descs_end = descs_start;
+ do {
+ if (length < *descs_end)
+ return 0;
+ length -= *descs_end;
+ descs_end += *descs_end;
+ } while (--count);
+
+ /* Allocate legacy descriptors and copy the data. */
+ {
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+ struct {
+ struct usb_functionfs_descs_head header;
+ __u8 descriptors[];
+ } __attribute__((packed)) *out;
+#pragma GCC diagnostic pop
+
+ length = sizeof out->header + (descs_end - descs_start);
+ out = malloc(length);
+ out->header.magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC);
+ out->header.length = cpu_to_le32(length);
+ out->header.fs_count = cpu_to_le32(fs_count);
+ out->header.hs_count = cpu_to_le32(hs_count);
+ memcpy(out->descriptors, descs_start, descs_end - descs_start);
+ *legacy = out;
+ }
+
+ return length;
+}
+
+
+#define STR_INTERFACE_ "Source/Sink"
+
+static const struct {
+ struct usb_functionfs_strings_head header;
+ struct {
+ __le16 code;
+ const char str1[sizeof STR_INTERFACE_];
+ } __attribute__((packed)) lang0;
+} __attribute__((packed)) strings = {
+ .header = {
+ .magic = cpu_to_le32(FUNCTIONFS_STRINGS_MAGIC),
+ .length = cpu_to_le32(sizeof strings),
+ .str_count = cpu_to_le32(1),
+ .lang_count = cpu_to_le32(1),
+ },
+ .lang0 = {
+ cpu_to_le16(0x0409), /* en-us */
+ STR_INTERFACE_,
+ },
+};
+
+#define STR_INTERFACE strings.lang0.str1
+
+
+/******************** Files and Threads Handling ****************************/
+
+struct thread;
+
+static ssize_t read_wrap(struct thread *t, void *buf, size_t nbytes);
+static ssize_t write_wrap(struct thread *t, const void *buf, size_t nbytes);
+static ssize_t ep0_consume(struct thread *t, const void *buf, size_t nbytes);
+static ssize_t fill_in_buf(struct thread *t, void *buf, size_t nbytes);
+static ssize_t empty_out_buf(struct thread *t, const void *buf, size_t nbytes);
+
+
+static struct thread {
+ const char *const filename;
+ size_t buf_size;
+
+ ssize_t (*in)(struct thread *, void *, size_t);
+ const char *const in_name;
+
+ ssize_t (*out)(struct thread *, const void *, size_t);
+ const char *const out_name;
+
+ int fd;
+ pthread_t id;
+ void *buf;
+ ssize_t status;
+} threads[] = {
+ {
+ "ep0", 4 * sizeof(struct usb_functionfs_event),
+ read_wrap, NULL,
+ ep0_consume, "<consume>",
+ 0, 0, NULL, 0
+ },
+ {
+ "ep1", 8 * 1024,
+ fill_in_buf, "<in>",
+ write_wrap, NULL,
+ 0, 0, NULL, 0
+ },
+ {
+ "ep2", 8 * 1024,
+ read_wrap, NULL,
+ empty_out_buf, "<out>",
+ 0, 0, NULL, 0
+ },
+};
+
+
+static void init_thread(struct thread *t)
+{
+ t->buf = malloc(t->buf_size);
+ die_on(!t->buf, "malloc");
+
+ t->fd = open(t->filename, O_RDWR);
+ die_on(t->fd < 0, "%s", t->filename);
+}
+
+static void cleanup_thread(void *arg)
+{
+ struct thread *t = arg;
+ int ret, fd;
+
+ fd = t->fd;
+ if (t->fd < 0)
+ return;
+ t->fd = -1;
+
+ /* test the FIFO ioctls (non-ep0 code paths) */
+ if (t != threads) {
+ ret = ioctl(fd, FUNCTIONFS_FIFO_STATUS);
+ if (ret < 0) {
+ /* ENODEV reported after disconnect */
+ if (errno != ENODEV)
+ err("%s: get fifo status", t->filename);
+ } else if (ret) {
+ warn("%s: unclaimed = %d\n", t->filename, ret);
+ if (ioctl(fd, FUNCTIONFS_FIFO_FLUSH) < 0)
+ err("%s: fifo flush", t->filename);
+ }
+ }
+
+ if (close(fd) < 0)
+ err("%s: close", t->filename);
+
+ free(t->buf);
+ t->buf = NULL;
+}
+
+static void *start_thread_helper(void *arg)
+{
+ const char *name, *op, *in_name, *out_name;
+ struct thread *t = arg;
+ ssize_t ret;
+
+ info("%s: starts\n", t->filename);
+ in_name = t->in_name ? t->in_name : t->filename;
+ out_name = t->out_name ? t->out_name : t->filename;
+
+ pthread_cleanup_push(cleanup_thread, arg);
+
+ for (;;) {
+ pthread_testcancel();
+
+ ret = t->in(t, t->buf, t->buf_size);
+ if (ret > 0) {
+ ret = t->out(t, t->buf, ret);
+ name = out_name;
+ op = "write";
+ } else {
+ name = in_name;
+ op = "read";
+ }
+
+ if (ret > 0) {
+ /* nop */
+ } else if (!ret) {
+ debug("%s: %s: EOF", name, op);
+ break;
+ } else if (errno == EINTR || errno == EAGAIN) {
+ debug("%s: %s", name, op);
+ } else {
+ warn("%s: %s", name, op);
+ break;
+ }
+ }
+
+ pthread_cleanup_pop(1);
+
+ t->status = ret;
+ info("%s: ends\n", t->filename);
+ return NULL;
+}
+
+static void start_thread(struct thread *t)
+{
+ debug("%s: starting\n", t->filename);
+
+ die_on(pthread_create(&t->id, NULL, start_thread_helper, t) < 0,
+ "pthread_create(%s)", t->filename);
+}
+
+static void join_thread(struct thread *t)
+{
+ int ret = pthread_join(t->id, NULL);
+
+ if (ret < 0)
+ err("%s: joining thread", t->filename);
+ else
+ debug("%s: joined\n", t->filename);
+}
+
+
+static ssize_t read_wrap(struct thread *t, void *buf, size_t nbytes)
+{
+ return read(t->fd, buf, nbytes);
+}
+
+static ssize_t write_wrap(struct thread *t, const void *buf, size_t nbytes)
+{
+ return write(t->fd, buf, nbytes);
+}
+
+
+/******************** Empty/Fill buffer routines ****************************/
+
+/* 0 -- stream of zeros, 1 -- i % 63, 2 -- pipe */
+enum pattern { PAT_ZERO, PAT_SEQ, PAT_PIPE };
+static enum pattern pattern;
+
+static ssize_t
+fill_in_buf(struct thread *ignore, void *buf, size_t nbytes)
+{
+ size_t i;
+ __u8 *p;
+
+ (void)ignore;
+
+ switch (pattern) {
+ case PAT_ZERO:
+ memset(buf, 0, nbytes);
+ break;
+
+ case PAT_SEQ:
+ for (p = buf, i = 0; i < nbytes; ++i, ++p)
+ *p = i % 63;
+ break;
+
+ case PAT_PIPE:
+ return fread(buf, 1, nbytes, stdin);
+ }
+
+ return nbytes;
+}
+
+static ssize_t
+empty_out_buf(struct thread *ignore, const void *buf, size_t nbytes)
+{
+ const __u8 *p;
+ __u8 expected;
+ ssize_t ret;
+ size_t len;
+
+ (void)ignore;
+
+ switch (pattern) {
+ case PAT_ZERO:
+ expected = 0;
+ for (p = buf, len = 0; len < nbytes; ++p, ++len)
+ if (*p)
+ goto invalid;
+ break;
+
+ case PAT_SEQ:
+ for (p = buf, len = 0; len < nbytes; ++p, ++len)
+ if (*p != len % 63) {
+ expected = len % 63;
+ goto invalid;
+ }
+ break;
+
+ case PAT_PIPE:
+ ret = fwrite(buf, nbytes, 1, stdout);
+ if (ret > 0)
+ fflush(stdout);
+ break;
+
+invalid:
+ err("bad OUT byte %zd, expected %02x got %02x\n",
+ len, expected, *p);
+ for (p = buf, len = 0; len < nbytes; ++p, ++len) {
+ if (0 == (len % 32))
+ fprintf(stderr, "%4zd:", len);
+ fprintf(stderr, " %02x", *p);
+ if (31 == (len % 32))
+ fprintf(stderr, "\n");
+ }
+ fflush(stderr);
+ errno = EILSEQ;
+ return -1;
+ }
+
+ return len;
+}
+
+
+/******************** Endpoints routines ************************************/
+
+static void handle_setup(const struct usb_ctrlrequest *setup)
+{
+ printf("bRequestType = %d\n", setup->bRequestType);
+ printf("bRequest = %d\n", setup->bRequest);
+ printf("wValue = %d\n", le16_to_cpu(setup->wValue));
+ printf("wIndex = %d\n", le16_to_cpu(setup->wIndex));
+ printf("wLength = %d\n", le16_to_cpu(setup->wLength));
+}
+
+static ssize_t
+ep0_consume(struct thread *ignore, const void *buf, size_t nbytes)
+{
+ static const char *const names[] = {
+ [FUNCTIONFS_BIND] = "BIND",
+ [FUNCTIONFS_UNBIND] = "UNBIND",
+ [FUNCTIONFS_ENABLE] = "ENABLE",
+ [FUNCTIONFS_DISABLE] = "DISABLE",
+ [FUNCTIONFS_SETUP] = "SETUP",
+ [FUNCTIONFS_SUSPEND] = "SUSPEND",
+ [FUNCTIONFS_RESUME] = "RESUME",
+ };
+
+ const struct usb_functionfs_event *event = buf;
+ size_t n;
+
+ (void)ignore;
+
+ for (n = nbytes / sizeof *event; n; --n, ++event)
+ switch (event->type) {
+ case FUNCTIONFS_BIND:
+ case FUNCTIONFS_UNBIND:
+ case FUNCTIONFS_ENABLE:
+ case FUNCTIONFS_DISABLE:
+ case FUNCTIONFS_SETUP:
+ case FUNCTIONFS_SUSPEND:
+ case FUNCTIONFS_RESUME:
+ printf("Event %s\n", names[event->type]);
+ if (event->type == FUNCTIONFS_SETUP)
+ handle_setup(&event->u.setup);
+ break;
+
+ default:
+ printf("Event %03u (unknown)\n", event->type);
+ }
+
+ return nbytes;
+}
+
+static void ep0_init(struct thread *t, bool legacy_descriptors)
+{
+ void *legacy;
+ ssize_t ret;
+ size_t len;
+
+ if (legacy_descriptors) {
+ info("%s: writing descriptors\n", t->filename);
+ goto legacy;
+ }
+
+ info("%s: writing descriptors (in v2 format)\n", t->filename);
+ ret = write(t->fd, &descriptors, sizeof descriptors);
+
+ if (ret < 0 && errno == EINVAL) {
+ warn("%s: new format rejected, trying legacy\n", t->filename);
+legacy:
+ len = descs_to_legacy(&legacy, &descriptors);
+ if (len) {
+ ret = write(t->fd, legacy, len);
+ free(legacy);
+ }
+ }
+ die_on(ret < 0, "%s: write: descriptors", t->filename);
+
+ info("%s: writing strings\n", t->filename);
+ ret = write(t->fd, &strings, sizeof strings);
+ die_on(ret < 0, "%s: write: strings", t->filename);
+}
+
+
+/******************** Main **************************************************/
+
+int main(int argc, char **argv)
+{
+ bool legacy_descriptors;
+ unsigned i;
+
+ legacy_descriptors = argc > 2 && !strcmp(argv[1], "-l");
+
+ init_thread(threads);
+ ep0_init(threads, legacy_descriptors);
+
+ for (i = 1; i < sizeof threads / sizeof *threads; ++i)
+ init_thread(threads + i);
+
+ for (i = 1; i < sizeof threads / sizeof *threads; ++i)
+ start_thread(threads + i);
+
+ start_thread_helper(threads);
+
+ for (i = 1; i < sizeof threads / sizeof *threads; ++i)
+ join_thread(threads + i);
+
+ return 0;
+}
diff --git a/tools/usb/hcd-tests.sh b/tools/usb/hcd-tests.sh
new file mode 100644
index 000000000..e8cad6a4f
--- /dev/null
+++ b/tools/usb/hcd-tests.sh
@@ -0,0 +1,276 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# test types can be passed on the command line:
+#
+# - control: any device can do this
+# - out, in: out needs 'bulk sink' firmware, in needs 'bulk src'
+# - iso-out, iso-in: out needs 'iso sink' firmware, in needs 'iso src'
+# - halt: needs bulk sink+src, tests halt set/clear from host
+# - unlink: needs bulk sink and/or src, test HCD unlink processing
+# - loop: needs firmware that will buffer N transfers
+#
+# run it for hours, days, weeks.
+#
+
+#
+# this default provides a steady test load for a bulk device
+#
+TYPES='control out in'
+#TYPES='control out in halt'
+
+#
+# to test HCD code
+#
+# - include unlink tests
+# - add some ${RANDOM}ness
+# - connect several devices concurrently (same HC)
+# - keep HC's IRQ lines busy with unrelated traffic (IDE, net, ...)
+# - add other concurrent system loads
+#
+
+declare -i COUNT BUFLEN
+
+COUNT=50000
+BUFLEN=2048
+
+# NOTE: the 'in' and 'out' cases are usually bulk, but can be
+# set up to use interrupt transfers by 'usbtest' module options
+
+
+if [ "$DEVICE" = "" ]; then
+ echo "testing ALL recognized usbtest devices"
+ echo ""
+ TEST_ARGS="-a"
+else
+ TEST_ARGS=""
+fi
+
+do_test ()
+{
+ if ! ./testusb $TEST_ARGS -s $BUFLEN -c $COUNT $* 2>/dev/null
+ then
+ echo "FAIL"
+ exit 1
+ fi
+}
+
+ARGS="$*"
+
+if [ "$ARGS" = "" ];
+then
+ ARGS="$TYPES"
+fi
+
+# FIXME use /sys/bus/usb/device/$THIS/bConfigurationValue to
+# check and change configs
+
+CONFIG=''
+
+check_config ()
+{
+ if [ "$CONFIG" = "" ]; then
+ CONFIG=$1
+ echo "assuming $CONFIG configuration"
+ return
+ fi
+ if [ "$CONFIG" = $1 ]; then
+ return
+ fi
+
+ echo "** device must be in $1 config, but it's $CONFIG instead"
+ exit 1
+}
+
+
+echo "TESTING: $ARGS"
+
+while : true
+do
+ echo $(date)
+
+ for TYPE in $ARGS
+ do
+ # restore defaults
+ COUNT=5000
+ BUFLEN=2048
+
+ # FIXME automatically multiply COUNT by 10 when
+ # /sys/bus/usb/device/$THIS/speed == "480"
+
+# COUNT=50000
+
+ case $TYPE in
+ control)
+ # any device, in any configuration, can use this.
+ echo '** Control test cases:'
+
+ echo "test 9: ch9 postconfig"
+ do_test -t 9 -c 5000
+ echo "test 10: control queueing"
+ do_test -t 10 -c 5000
+
+ # this relies on some vendor-specific commands
+ echo "test 14: control writes"
+ do_test -t 14 -c 15000 -s 256 -v 1
+
+ echo "test 21: control writes, unaligned"
+ do_test -t 21 -c 100 -s 256 -v 1
+
+ ;;
+
+ out)
+ check_config sink-src
+ echo '** Host Write (OUT) test cases:'
+
+ echo "test 1: $COUNT transfers, same size"
+ do_test -t 1
+ echo "test 3: $COUNT transfers, variable/short size"
+ do_test -t 3 -v 421
+
+ COUNT=100
+ echo "test 17: $COUNT transfers, unaligned DMA map by core"
+ do_test -t 17
+
+ echo "test 19: $COUNT transfers, unaligned DMA map by usb_alloc_coherent"
+ do_test -t 19
+
+ COUNT=2000
+ echo "test 5: $COUNT scatterlists, same size entries"
+ do_test -t 5
+
+ # try to trigger short OUT processing bugs
+ echo "test 7a: $COUNT scatterlists, variable size/short entries"
+ do_test -t 7 -v 579
+ BUFLEN=4096
+ echo "test 7b: $COUNT scatterlists, variable size/bigger entries"
+ do_test -t 7 -v 41
+ BUFLEN=64
+ echo "test 7c: $COUNT scatterlists, variable size/micro entries"
+ do_test -t 7 -v 63
+ ;;
+
+ iso-out)
+ check_config sink-src
+ echo '** Host ISOCHRONOUS Write (OUT) test cases:'
+
+ # at peak iso transfer rates:
+ # - usb 2.0 high bandwidth, this is one frame.
+ # - usb 1.1, it's twenty-four frames.
+ BUFLEN=24500
+
+ COUNT=1000
+
+# COUNT=10000
+
+ echo "test 15: $COUNT transfers, same size"
+ # do_test -t 15 -g 3 -v 0
+ BUFLEN=32768
+ do_test -t 15 -g 8 -v 0
+
+ # FIXME it'd make sense to have an iso OUT test issuing
+ # short writes on more packets than the last one
+
+ COUNT=100
+ echo "test 22: $COUNT transfers, non aligned"
+ do_test -t 22 -g 8 -v 0
+
+ ;;
+
+ in)
+ check_config sink-src
+ echo '** Host Read (IN) test cases:'
+
+ # NOTE: these "variable size" reads are just multiples
+ # of 512 bytes, no EOVERFLOW testing is done yet
+
+ echo "test 2: $COUNT transfers, same size"
+ do_test -t 2
+ echo "test 4: $COUNT transfers, variable size"
+ do_test -t 4
+
+ COUNT=100
+ echo "test 18: $COUNT transfers, unaligned DMA map by core"
+ do_test -t 18
+
+ echo "test 20: $COUNT transfers, unaligned DMA map by usb_alloc_coherent"
+ do_test -t 20
+
+ COUNT=2000
+ echo "test 6: $COUNT scatterlists, same size entries"
+ do_test -t 6
+ echo "test 8: $COUNT scatterlists, variable size entries"
+ do_test -t 8
+ ;;
+
+ iso-in)
+ check_config sink-src
+ echo '** Host ISOCHRONOUS Read (IN) test cases:'
+
+ # at peak iso transfer rates:
+ # - usb 2.0 high bandwidth, this is one frame.
+ # - usb 1.1, it's twenty-four frames.
+ BUFLEN=24500
+
+ COUNT=1000
+
+# COUNT=10000
+
+ echo "test 16: $COUNT transfers, same size"
+ # do_test -t 16 -g 3 -v 0
+ BUFLEN=32768
+ do_test -t 16 -g 8 -v 0
+
+ # FIXME since iso expects faults, it'd make sense
+ # to have an iso IN test issuing short reads ...
+
+ COUNT=100
+ echo "test 23: $COUNT transfers, unaligned"
+ do_test -t 23 -g 8 -v 0
+
+ ;;
+
+ halt)
+ # NOTE: sometimes hardware doesn't cooperate well with halting
+ # endpoints from the host side. so long as mass-storage class
+ # firmware can halt them from the device, don't worry much if
+ # you can't make this test work on your device.
+ COUNT=2000
+ echo "test 13: $COUNT halt set/clear"
+ do_test -t 13
+ ;;
+
+ unlink)
+ COUNT=2000
+ echo "test 11: $COUNT read unlinks"
+ do_test -t 11
+
+ echo "test 12: $COUNT write unlinks"
+ do_test -t 12
+ ;;
+
+ loop)
+ # defaults need too much buffering for ez-usb devices
+ BUFLEN=2048
+ COUNT=32
+
+ # modprobe g_zero qlen=$COUNT buflen=$BUFLEN loopdefault
+ check_config loopback
+
+ # FIXME someone needs to write and merge a version of this
+
+ echo "write $COUNT buffers of $BUFLEN bytes, read them back"
+
+ echo "write $COUNT variable size buffers, read them back"
+
+ ;;
+
+ *)
+ echo "Don't understand test type $TYPE"
+ exit 1;
+ esac
+ echo ''
+ done
+done
+
+# vim: sw=4
diff --git a/tools/usb/testusb.c b/tools/usb/testusb.c
new file mode 100644
index 000000000..791aadef2
--- /dev/null
+++ b/tools/usb/testusb.c
@@ -0,0 +1,541 @@
+/* $(CROSS_COMPILE)cc -Wall -Wextra -g -lpthread -o testusb testusb.c */
+
+/*
+ * Copyright (c) 2002 by David Brownell
+ * Copyright (c) 2010 by Samsung Electronics
+ * Author: Michal Nazarewicz <mina86@mina86.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * This program issues ioctls to perform the tests implemented by the
+ * kernel driver. It can generate a variety of transfer patterns; you
+ * should make sure to test both regular streaming and mixes of
+ * transfer sizes (including short transfers).
+ *
+ * For more information on how this can be used and on USB testing
+ * refer to <URL:http://www.linux-usb.org/usbtest/>.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <ftw.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <errno.h>
+#include <limits.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <sys/ioctl.h>
+#include <linux/usbdevice_fs.h>
+
+/*-------------------------------------------------------------------------*/
+
+#define TEST_CASES 30
+
+// FIXME make these public somewhere; usbdevfs.h?
+
+struct usbtest_param {
+ // inputs
+ unsigned test_num; /* 0..(TEST_CASES-1) */
+ unsigned iterations;
+ unsigned length;
+ unsigned vary;
+ unsigned sglen;
+
+ // outputs
+ struct timeval duration;
+};
+#define USBTEST_REQUEST _IOWR('U', 100, struct usbtest_param)
+
+/*-------------------------------------------------------------------------*/
+
+/* #include <linux/usb_ch9.h> */
+
+#define USB_DT_DEVICE 0x01
+#define USB_DT_INTERFACE 0x04
+
+#define USB_CLASS_PER_INTERFACE 0 /* for DeviceClass */
+#define USB_CLASS_VENDOR_SPEC 0xff
+
+
+struct usb_device_descriptor {
+ __u8 bLength;
+ __u8 bDescriptorType;
+ __u16 bcdUSB;
+ __u8 bDeviceClass;
+ __u8 bDeviceSubClass;
+ __u8 bDeviceProtocol;
+ __u8 bMaxPacketSize0;
+ __u16 idVendor;
+ __u16 idProduct;
+ __u16 bcdDevice;
+ __u8 iManufacturer;
+ __u8 iProduct;
+ __u8 iSerialNumber;
+ __u8 bNumConfigurations;
+} __attribute__ ((packed));
+
+struct usb_interface_descriptor {
+ __u8 bLength;
+ __u8 bDescriptorType;
+
+ __u8 bInterfaceNumber;
+ __u8 bAlternateSetting;
+ __u8 bNumEndpoints;
+ __u8 bInterfaceClass;
+ __u8 bInterfaceSubClass;
+ __u8 bInterfaceProtocol;
+ __u8 iInterface;
+} __attribute__ ((packed));
+
+enum usb_device_speed {
+ USB_SPEED_UNKNOWN = 0, /* enumerating */
+ USB_SPEED_LOW, USB_SPEED_FULL, /* usb 1.1 */
+ USB_SPEED_HIGH /* usb 2.0 */
+};
+
+/*-------------------------------------------------------------------------*/
+
+static char *speed (enum usb_device_speed s)
+{
+ switch (s) {
+ case USB_SPEED_UNKNOWN: return "unknown";
+ case USB_SPEED_LOW: return "low";
+ case USB_SPEED_FULL: return "full";
+ case USB_SPEED_HIGH: return "high";
+ default: return "??";
+ }
+}
+
+struct testdev {
+ struct testdev *next;
+ char *name;
+ pthread_t thread;
+ enum usb_device_speed speed;
+ unsigned ifnum : 8;
+ unsigned forever : 1;
+ int test;
+
+ struct usbtest_param param;
+};
+static struct testdev *testdevs;
+
+static int testdev_ffs_ifnum(FILE *fd)
+{
+ union {
+ char buf[255];
+ struct usb_interface_descriptor intf;
+ } u;
+
+ for (;;) {
+ if (fread(u.buf, 1, 1, fd) != 1)
+ return -1;
+ if (fread(u.buf + 1, (unsigned char)u.buf[0] - 1, 1, fd) != 1)
+ return -1;
+
+ if (u.intf.bLength == sizeof u.intf
+ && u.intf.bDescriptorType == USB_DT_INTERFACE
+ && u.intf.bNumEndpoints == 2
+ && u.intf.bInterfaceClass == USB_CLASS_VENDOR_SPEC
+ && u.intf.bInterfaceSubClass == 0
+ && u.intf.bInterfaceProtocol == 0)
+ return (unsigned char)u.intf.bInterfaceNumber;
+ }
+}
+
+static int testdev_ifnum(FILE *fd)
+{
+ struct usb_device_descriptor dev;
+
+ if (fread(&dev, sizeof dev, 1, fd) != 1)
+ return -1;
+
+ if (dev.bLength != sizeof dev || dev.bDescriptorType != USB_DT_DEVICE)
+ return -1;
+
+ /* FX2 with (tweaked) bulksrc firmware */
+ if (dev.idVendor == 0x0547 && dev.idProduct == 0x1002)
+ return 0;
+
+ /*----------------------------------------------------*/
+
+ /* devices that start up using the EZ-USB default device and
+ * which we can use after loading simple firmware. hotplug
+ * can fxload it, and then run this test driver.
+ *
+ * we return false positives in two cases:
+ * - the device has a "real" driver (maybe usb-serial) that
+ * renumerates. the device should vanish quickly.
+ * - the device doesn't have the test firmware installed.
+ */
+
+ /* generic EZ-USB FX controller */
+ if (dev.idVendor == 0x0547 && dev.idProduct == 0x2235)
+ return 0;
+
+ /* generic EZ-USB FX2 controller */
+ if (dev.idVendor == 0x04b4 && dev.idProduct == 0x8613)
+ return 0;
+
+ /* CY3671 development board with EZ-USB FX */
+ if (dev.idVendor == 0x0547 && dev.idProduct == 0x0080)
+ return 0;
+
+ /* Keyspan 19Qi uses an21xx (original EZ-USB) */
+ if (dev.idVendor == 0x06cd && dev.idProduct == 0x010b)
+ return 0;
+
+ /*----------------------------------------------------*/
+
+ /* "gadget zero", Linux-USB test software */
+ if (dev.idVendor == 0x0525 && dev.idProduct == 0xa4a0)
+ return 0;
+
+ /* user mode subset of that */
+ if (dev.idVendor == 0x0525 && dev.idProduct == 0xa4a4)
+ return testdev_ffs_ifnum(fd);
+ /* return 0; */
+
+ /* iso version of usermode code */
+ if (dev.idVendor == 0x0525 && dev.idProduct == 0xa4a3)
+ return 0;
+
+ /* some GPL'd test firmware uses these IDs */
+
+ if (dev.idVendor == 0xfff0 && dev.idProduct == 0xfff0)
+ return 0;
+
+ /*----------------------------------------------------*/
+
+ /* iBOT2 high speed webcam */
+ if (dev.idVendor == 0x0b62 && dev.idProduct == 0x0059)
+ return 0;
+
+ /*----------------------------------------------------*/
+
+ /* the FunctionFS gadget can have the source/sink interface
+ * anywhere. We look for an interface descriptor that match
+ * what we expect. We ignore configuratiens thou. */
+
+ if (dev.idVendor == 0x0525 && dev.idProduct == 0xa4ac
+ && (dev.bDeviceClass == USB_CLASS_PER_INTERFACE
+ || dev.bDeviceClass == USB_CLASS_VENDOR_SPEC))
+ return testdev_ffs_ifnum(fd);
+
+ return -1;
+}
+
+static int find_testdev(const char *name, const struct stat *sb, int flag)
+{
+ FILE *fd;
+ int ifnum;
+ struct testdev *entry;
+
+ (void)sb; /* unused */
+
+ if (flag != FTW_F)
+ return 0;
+
+ fd = fopen(name, "rb");
+ if (!fd) {
+ perror(name);
+ return 0;
+ }
+
+ ifnum = testdev_ifnum(fd);
+ fclose(fd);
+ if (ifnum < 0)
+ return 0;
+
+ entry = calloc(1, sizeof *entry);
+ if (!entry)
+ goto nomem;
+
+ entry->name = strdup(name);
+ if (!entry->name) {
+ free(entry);
+nomem:
+ perror("malloc");
+ return 0;
+ }
+
+ entry->ifnum = ifnum;
+ entry->next = testdevs;
+ testdevs = entry;
+ return 0;
+}
+
+static int
+usbdev_ioctl (int fd, int ifno, unsigned request, void *param)
+{
+ struct usbdevfs_ioctl wrapper;
+
+ wrapper.ifno = ifno;
+ wrapper.ioctl_code = request;
+ wrapper.data = param;
+
+ return ioctl (fd, USBDEVFS_IOCTL, &wrapper);
+}
+
+static void *handle_testdev (void *arg)
+{
+ struct testdev *dev = arg;
+ int fd, i;
+ int status;
+
+ if ((fd = open (dev->name, O_RDWR)) < 0) {
+ perror ("can't open dev file r/w");
+ return 0;
+ }
+
+ status = ioctl(fd, USBDEVFS_GET_SPEED, NULL);
+ if (status < 0)
+ fprintf(stderr, "USBDEVFS_GET_SPEED failed %d\n", status);
+ else
+ dev->speed = status;
+ fprintf(stderr, "%s speed\t%s\t%u\n",
+ speed(dev->speed), dev->name, dev->ifnum);
+
+restart:
+ for (i = 0; i < TEST_CASES; i++) {
+ if (dev->test != -1 && dev->test != i)
+ continue;
+ dev->param.test_num = i;
+
+ status = usbdev_ioctl (fd, dev->ifnum,
+ USBTEST_REQUEST, &dev->param);
+ if (status < 0 && errno == EOPNOTSUPP)
+ continue;
+
+ /* FIXME need a "syslog it" option for background testing */
+
+ /* NOTE: each thread emits complete lines; no fragments! */
+ if (status < 0) {
+ char buf [80];
+ int err = errno;
+
+ if (strerror_r (errno, buf, sizeof buf)) {
+ snprintf (buf, sizeof buf, "error %d", err);
+ errno = err;
+ }
+ printf ("%s test %d --> %d (%s)\n",
+ dev->name, i, errno, buf);
+ } else
+ printf ("%s test %d, %4d.%.06d secs\n", dev->name, i,
+ (int) dev->param.duration.tv_sec,
+ (int) dev->param.duration.tv_usec);
+
+ fflush (stdout);
+ }
+ if (dev->forever)
+ goto restart;
+
+ close (fd);
+ return arg;
+}
+
+static const char *usb_dir_find(void)
+{
+ static char udev_usb_path[] = "/dev/bus/usb";
+
+ if (access(udev_usb_path, F_OK) == 0)
+ return udev_usb_path;
+
+ return NULL;
+}
+
+static int parse_num(unsigned *num, const char *str)
+{
+ unsigned long val;
+ char *end;
+
+ errno = 0;
+ val = strtoul(str, &end, 0);
+ if (errno || *end || val > UINT_MAX)
+ return -1;
+ *num = val;
+ return 0;
+}
+
+int main (int argc, char **argv)
+{
+
+ int c;
+ struct testdev *entry;
+ char *device;
+ const char *usb_dir = NULL;
+ int all = 0, forever = 0, not = 0;
+ int test = -1 /* all */;
+ struct usbtest_param param;
+
+ /* pick defaults that works with all speeds, without short packets.
+ *
+ * Best per-frame data rates:
+ * super speed,bulk 1024 * 16 * 8 = 131072
+ * interrupt 1024 * 3 * 8 = 24576
+ * high speed, bulk 512 * 13 * 8 = 53248
+ * interrupt 1024 * 3 * 8 = 24576
+ * full speed, bulk/intr 64 * 19 = 1216
+ * interrupt 64 * 1 = 64
+ * low speed, interrupt 8 * 1 = 8
+ */
+ param.iterations = 1000;
+ param.length = 1024;
+ param.vary = 1024;
+ param.sglen = 32;
+
+ /* for easy use when hotplugging */
+ device = getenv ("DEVICE");
+
+ while ((c = getopt (argc, argv, "D:aA:c:g:hlns:t:v:")) != EOF)
+ switch (c) {
+ case 'D': /* device, if only one */
+ device = optarg;
+ continue;
+ case 'A': /* use all devices with specified USB dir */
+ usb_dir = optarg;
+ /* FALL THROUGH */
+ case 'a': /* use all devices */
+ device = NULL;
+ all = 1;
+ continue;
+ case 'c': /* count iterations */
+ if (parse_num(&param.iterations, optarg))
+ goto usage;
+ continue;
+ case 'g': /* scatter/gather entries */
+ if (parse_num(&param.sglen, optarg))
+ goto usage;
+ continue;
+ case 'l': /* loop forever */
+ forever = 1;
+ continue;
+ case 'n': /* no test running! */
+ not = 1;
+ continue;
+ case 's': /* size of packet */
+ if (parse_num(&param.length, optarg))
+ goto usage;
+ continue;
+ case 't': /* run just one test */
+ test = atoi (optarg);
+ if (test < 0)
+ goto usage;
+ continue;
+ case 'v': /* vary packet size by ... */
+ if (parse_num(&param.vary, optarg))
+ goto usage;
+ continue;
+ case '?':
+ case 'h':
+ default:
+usage:
+ fprintf (stderr,
+ "usage: %s [options]\n"
+ "Options:\n"
+ "\t-D dev only test specific device\n"
+ "\t-A usb-dir\n"
+ "\t-a test all recognized devices\n"
+ "\t-l loop forever(for stress test)\n"
+ "\t-t testnum only run specified case\n"
+ "\t-n no test running, show devices to be tested\n"
+ "Case arguments:\n"
+ "\t-c iterations default 1000\n"
+ "\t-s transfer length default 1024\n"
+ "\t-g sglen default 32\n"
+ "\t-v vary default 1024\n",
+ argv[0]);
+ return 1;
+ }
+ if (optind != argc)
+ goto usage;
+ if (!all && !device) {
+ fprintf (stderr, "must specify '-a' or '-D dev', "
+ "or DEVICE=/dev/bus/usb/BBB/DDD in env\n");
+ goto usage;
+ }
+
+ /* Find usb device subdirectory */
+ if (!usb_dir) {
+ usb_dir = usb_dir_find();
+ if (!usb_dir) {
+ fputs ("USB device files are missing\n", stderr);
+ return -1;
+ }
+ }
+
+ /* collect and list the test devices */
+ if (ftw (usb_dir, find_testdev, 3) != 0) {
+ fputs ("ftw failed; are USB device files missing?\n", stderr);
+ return -1;
+ }
+
+ /* quit, run single test, or create test threads */
+ if (!testdevs && !device) {
+ fputs ("no test devices recognized\n", stderr);
+ return -1;
+ }
+ if (not)
+ return 0;
+ if (testdevs && testdevs->next == 0 && !device)
+ device = testdevs->name;
+ for (entry = testdevs; entry; entry = entry->next) {
+ int status;
+
+ entry->param = param;
+ entry->forever = forever;
+ entry->test = test;
+
+ if (device) {
+ if (strcmp (entry->name, device))
+ continue;
+ return handle_testdev (entry) != entry;
+ }
+ status = pthread_create (&entry->thread, 0, handle_testdev, entry);
+ if (status)
+ perror ("pthread_create");
+ }
+ if (device) {
+ struct testdev dev;
+
+ /* kernel can recognize test devices we don't */
+ fprintf (stderr, "%s: %s may see only control tests\n",
+ argv [0], device);
+
+ memset (&dev, 0, sizeof dev);
+ dev.name = device;
+ dev.param = param;
+ dev.forever = forever;
+ dev.test = test;
+ return handle_testdev (&dev) != &dev;
+ }
+
+ /* wait for tests to complete */
+ for (entry = testdevs; entry; entry = entry->next) {
+ void *retval;
+
+ if (pthread_join (entry->thread, &retval))
+ perror ("pthread_join");
+ /* testing errors discarded! */
+ }
+
+ return 0;
+}
diff --git a/tools/usb/usbip/.gitignore b/tools/usb/usbip/.gitignore
new file mode 100644
index 000000000..03b892c8b
--- /dev/null
+++ b/tools/usb/usbip/.gitignore
@@ -0,0 +1,32 @@
+Makefile
+Makefile.in
+aclocal.m4
+autom4te.cache/
+compile
+config.guess
+config.h
+config.h.in
+config.log
+config.status
+config.sub
+configure
+depcomp
+install-sh
+libsrc/Makefile
+libsrc/Makefile.in
+libtool
+ltmain.sh
+missing
+src/Makefile
+src/Makefile.in
+stamp-h1
+libsrc/libusbip.la
+libsrc/libusbip_la-names.lo
+libsrc/libusbip_la-sysfs_utils.lo
+libsrc/libusbip_la-usbip_common.lo
+libsrc/libusbip_la-usbip_device_driver.lo
+libsrc/libusbip_la-usbip_host_common.lo
+libsrc/libusbip_la-usbip_host_driver.lo
+libsrc/libusbip_la-vhci_driver.lo
+src/usbip
+src/usbipd
diff --git a/tools/usb/usbip/AUTHORS b/tools/usb/usbip/AUTHORS
new file mode 100644
index 000000000..a27ea8d03
--- /dev/null
+++ b/tools/usb/usbip/AUTHORS
@@ -0,0 +1,3 @@
+Takahiro Hirofuchi
+Robert Leibl
+matt mooney <mfm@muteddisk.com>
diff --git a/tools/usb/usbip/COPYING b/tools/usb/usbip/COPYING
new file mode 100644
index 000000000..c5611e48a
--- /dev/null
+++ b/tools/usb/usbip/COPYING
@@ -0,0 +1,340 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/tools/usb/usbip/INSTALL b/tools/usb/usbip/INSTALL
new file mode 100644
index 000000000..d3c5b40a9
--- /dev/null
+++ b/tools/usb/usbip/INSTALL
@@ -0,0 +1,237 @@
+Installation Instructions
+*************************
+
+Copyright (C) 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2004, 2005,
+2006, 2007 Free Software Foundation, Inc.
+
+This file is free documentation; the Free Software Foundation gives
+unlimited permission to copy, distribute and modify it.
+
+Basic Installation
+==================
+
+Briefly, the shell commands `./configure; make; make install' should
+configure, build, and install this package. The following
+more-detailed instructions are generic; see the `README' file for
+instructions specific to this package.
+
+ The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation. It uses
+those values to create a `Makefile' in each directory of the package.
+It may also create one or more `.h' files containing system-dependent
+definitions. Finally, it creates a shell script `config.status' that
+you can run in the future to recreate the current configuration, and a
+file `config.log' containing compiler output (useful mainly for
+debugging `configure').
+
+ It can also use an optional file (typically called `config.cache'
+and enabled with `--cache-file=config.cache' or simply `-C') that saves
+the results of its tests to speed up reconfiguring. Caching is
+disabled by default to prevent problems with accidental use of stale
+cache files.
+
+ If you need to do unusual things to compile the package, please try
+to figure out how `configure' could check whether to do them, and mail
+diffs or instructions to the address given in the `README' so they can
+be considered for the next release. If you are using the cache, and at
+some point `config.cache' contains results you don't want to keep, you
+may remove or edit it.
+
+ The file `configure.ac' (or `configure.in') is used to create
+`configure' by a program called `autoconf'. You need `configure.ac' if
+you want to change it or regenerate `configure' using a newer version
+of `autoconf'.
+
+The simplest way to compile this package is:
+
+ 1. `cd' to the directory containing the package's source code and type
+ `./configure' to configure the package for your system.
+
+ Running `configure' might take a while. While running, it prints
+ some messages telling which features it is checking for.
+
+ 2. Type `make' to compile the package.
+
+ 3. Optionally, type `make check' to run any self-tests that come with
+ the package.
+
+ 4. Type `make install' to install the programs and any data files and
+ documentation.
+
+ 5. You can remove the program binaries and object files from the
+ source code directory by typing `make clean'. To also remove the
+ files that `configure' created (so you can compile the package for
+ a different kind of computer), type `make distclean'. There is
+ also a `make maintainer-clean' target, but that is intended mainly
+ for the package's developers. If you use it, you may have to get
+ all sorts of other programs in order to regenerate files that came
+ with the distribution.
+
+ 6. Often, you can also type `make uninstall' to remove the installed
+ files again.
+
+Compilers and Options
+=====================
+
+Some systems require unusual options for compilation or linking that the
+`configure' script does not know about. Run `./configure --help' for
+details on some of the pertinent environment variables.
+
+ You can give `configure' initial values for configuration parameters
+by setting variables in the command line or in the environment. Here
+is an example:
+
+ ./configure CC=c99 CFLAGS=-g LIBS=-lposix
+
+ *Note Defining Variables::, for more details.
+
+Compiling For Multiple Architectures
+====================================
+
+You can compile the package for more than one kind of computer at the
+same time, by placing the object files for each architecture in their
+own directory. To do this, you can use GNU `make'. `cd' to the
+directory where you want the object files and executables to go and run
+the `configure' script. `configure' automatically checks for the
+source code in the directory that `configure' is in and in `..'.
+
+ With a non-GNU `make', it is safer to compile the package for one
+architecture at a time in the source code directory. After you have
+installed the package for one architecture, use `make distclean' before
+reconfiguring for another architecture.
+
+Installation Names
+==================
+
+By default, `make install' installs the package's commands under
+`/usr/local/bin', include files under `/usr/local/include', etc. You
+can specify an installation prefix other than `/usr/local' by giving
+`configure' the option `--prefix=PREFIX'.
+
+ You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files. If you
+pass the option `--exec-prefix=PREFIX' to `configure', the package uses
+PREFIX as the prefix for installing programs and libraries.
+Documentation and other data files still use the regular prefix.
+
+ In addition, if you use an unusual directory layout you can give
+options like `--bindir=DIR' to specify different values for particular
+kinds of files. Run `configure --help' for a list of the directories
+you can set and what kinds of files go in them.
+
+ If the package supports it, you can cause programs to be installed
+with an extra prefix or suffix on their names by giving `configure' the
+option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'.
+
+Optional Features
+=================
+
+Some packages pay attention to `--enable-FEATURE' options to
+`configure', where FEATURE indicates an optional part of the package.
+They may also pay attention to `--with-PACKAGE' options, where PACKAGE
+is something like `gnu-as' or `x' (for the X Window System). The
+`README' should mention any `--enable-' and `--with-' options that the
+package recognizes.
+
+ For packages that use the X Window System, `configure' can usually
+find the X include and library files automatically, but if it doesn't,
+you can use the `configure' options `--x-includes=DIR' and
+`--x-libraries=DIR' to specify their locations.
+
+Specifying the System Type
+==========================
+
+There may be some features `configure' cannot figure out automatically,
+but needs to determine by the type of machine the package will run on.
+Usually, assuming the package is built to be run on the _same_
+architectures, `configure' can figure that out, but if it prints a
+message saying it cannot guess the machine type, give it the
+`--build=TYPE' option. TYPE can either be a short name for the system
+type, such as `sun4', or a canonical name which has the form:
+
+ CPU-COMPANY-SYSTEM
+
+where SYSTEM can have one of these forms:
+
+ OS KERNEL-OS
+
+ See the file `config.sub' for the possible values of each field. If
+`config.sub' isn't included in this package, then this package doesn't
+need to know the machine type.
+
+ If you are _building_ compiler tools for cross-compiling, you should
+use the option `--target=TYPE' to select the type of system they will
+produce code for.
+
+ If you want to _use_ a cross compiler, that generates code for a
+platform different from the build platform, you should specify the
+"host" platform (i.e., that on which the generated programs will
+eventually be run) with `--host=TYPE'.
+
+Sharing Defaults
+================
+
+If you want to set default values for `configure' scripts to share, you
+can create a site shell script called `config.site' that gives default
+values for variables like `CC', `cache_file', and `prefix'.
+`configure' looks for `PREFIX/share/config.site' if it exists, then
+`PREFIX/etc/config.site' if it exists. Or, you can set the
+`CONFIG_SITE' environment variable to the location of the site script.
+A warning: not all `configure' scripts look for a site script.
+
+Defining Variables
+==================
+
+Variables not defined in a site shell script can be set in the
+environment passed to `configure'. However, some packages may run
+configure again during the build, and the customized values of these
+variables may be lost. In order to avoid this problem, you should set
+them in the `configure' command line, using `VAR=value'. For example:
+
+ ./configure CC=/usr/local2/bin/gcc
+
+causes the specified `gcc' to be used as the C compiler (unless it is
+overridden in the site shell script).
+
+Unfortunately, this technique does not work for `CONFIG_SHELL' due to
+an Autoconf bug. Until the bug is fixed you can use this workaround:
+
+ CONFIG_SHELL=/bin/bash /bin/bash ./configure CONFIG_SHELL=/bin/bash
+
+`configure' Invocation
+======================
+
+`configure' recognizes the following options to control how it operates.
+
+`--help'
+`-h'
+ Print a summary of the options to `configure', and exit.
+
+`--version'
+`-V'
+ Print the version of Autoconf used to generate the `configure'
+ script, and exit.
+
+`--cache-file=FILE'
+ Enable the cache: use and save the results of the tests in FILE,
+ traditionally `config.cache'. FILE defaults to `/dev/null' to
+ disable caching.
+
+`--config-cache'
+`-C'
+ Alias for `--cache-file=config.cache'.
+
+`--quiet'
+`--silent'
+`-q'
+ Do not print messages saying which checks are being made. To
+ suppress all normal output, redirect it to `/dev/null' (any error
+ messages will still be shown).
+
+`--srcdir=DIR'
+ Look for the package's source code in directory DIR. Usually
+ `configure' can determine that directory automatically.
+
+`configure' also accepts some other, not widely useful, options. Run
+`configure --help' for more details.
+
diff --git a/tools/usb/usbip/Makefile.am b/tools/usb/usbip/Makefile.am
new file mode 100644
index 000000000..5961e9c18
--- /dev/null
+++ b/tools/usb/usbip/Makefile.am
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+SUBDIRS := libsrc src
+includedir = @includedir@/usbip
+include_HEADERS := $(addprefix libsrc/, \
+ usbip_common.h vhci_driver.h usbip_host_driver.h \
+ list.h sysfs_utils.h usbip_host_common.h)
+
+dist_man_MANS := $(addprefix doc/, usbip.8 usbipd.8)
diff --git a/tools/usb/usbip/README b/tools/usb/usbip/README
new file mode 100644
index 000000000..7844490fc
--- /dev/null
+++ b/tools/usb/usbip/README
@@ -0,0 +1,257 @@
+#
+# README for usbip-utils
+#
+# Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+# 2005-2008 Takahiro Hirofuchi
+
+[Overview]
+USB/IP protocol allows to pass USB device from server to client over the
+network. Server is a machine which provides (shares) a USB device. Client is
+a machine which uses USB device provided by server over the network.
+The USB device may be either physical device connected to a server or
+software entity created on a server using USB gadget subsystem.
+Whole project consists of four parts:
+
+ - usbip-vhci
+ A client side kernel module which provides a virtual USB Host Controller
+ and allows to import a USB device from a remote machine.
+
+ - usbip-host (stub driver)
+ A server side module which provides a USB device driver which can be
+ bound to a physical USB device to make it exportable.
+
+ - usbip-vudc
+ A server side module which provides a virtual USB Device Controller and allows
+ to export a USB device created using USB Gadget Subsystem.
+
+ - usbip-utils
+ A set of userspace tools used to handle connection and management.
+ Used on both sides.
+
+[Requirements]
+ - USB/IP device drivers
+ Found in the drivers/usb/usbip/ directory of the Linux kernel tree.
+
+ - libudev >= 2.0
+ libudev library
+
+ - libwrap0-dev
+ tcp wrapper library
+
+ - gcc >= 4.0
+
+ - libtool, automake >= 1.9, autoconf >= 2.5.0, pkg-config
+
+[Optional]
+ - hwdata
+ Contains USB device identification data.
+
+
+[Install]
+ 0. Generate configuration scripts.
+ $ ./autogen.sh
+
+ 1. Compile & install the userspace utilities.
+ $ ./configure [--with-tcp-wrappers=no] [--with-usbids-dir=<dir>]
+ $ make install
+
+ 2. Compile & install USB/IP drivers.
+
+
+[Usage]
+On a server side there are two entities which can be shared.
+First of them is physical usb device connected to the machine.
+To make it available below steps should be executed:
+
+ server:# (Physically attach your USB device.)
+
+ server:# insmod usbip-core.ko
+ server:# insmod usbip-host.ko
+
+ server:# usbipd -D
+ - Start usbip daemon.
+
+ server:# usbip list -l
+ - List driver assignments for USB devices.
+
+ server:# usbip bind --busid 1-2
+ - Bind usbip-host.ko to the device with busid 1-2.
+ - The USB device 1-2 is now exportable to other hosts!
+ - Use `usbip unbind --busid 1-2' to stop exporting the device.
+
+Second of shareable entities is USB Gadget created using USB Gadget Subsystem
+on a server machine. To make it available below steps should be executed:
+
+ server:# (Create your USB gadget)
+ - Currently the most preferable way of creating a new USB gadget
+ is ConfigFS Composite Gadget. Please refer to its documentation
+ for details.
+ - See vudc_server_example.sh for a short example of USB gadget creation
+
+ server:# insmod usbip-core.ko
+ server:# insmod usbip-vudc.ko
+ - To create more than one instance of vudc use num module param
+
+ server:# (Bind gadget to one of available vudc)
+ - Assign your new gadget to USB/IP UDC
+ - Using ConfigFS interface you may do this simply by:
+ server:# cd /sys/kernel/config/usb_gadget/<gadget_name>
+ server:# echo "usbip-vudc.0" > UDC
+
+ server:# usbipd -D --device
+ - Start usbip daemon.
+
+To attach new device to client machine below commands should be used:
+
+ client:# insmod usbip-core.ko
+ client:# insmod vhci-hcd.ko
+
+ client:# usbip list --remote <host>
+ - List exported USB devices on the <host>.
+
+ client:# usbip attach --remote <host> --busid 1-2
+ - Connect the remote USB device.
+ - When using vudc on a server side busid is really vudc instance name.
+ For example: usbip-vudc.0
+
+ client:# usbip port
+ - Show virtual port status.
+
+ client:# usbip detach --port <port>
+ - Detach the USB device.
+
+
+[Example]
+---------------------------
+ SERVER SIDE
+---------------------------
+Physically attach your USB devices to this host.
+
+ trois:# insmod path/to/usbip-core.ko
+ trois:# insmod path/to/usbip-host.ko
+ trois:# usbipd -D
+
+In another terminal, let's look up what USB devices are physically
+attached to this host.
+
+ trois:# usbip list -l
+ Local USB devices
+ =================
+ - busid 1-1 (05a9:a511)
+ 1-1:1.0 -> ov511
+
+ - busid 3-2 (0711:0902)
+ 3-2:1.0 -> none
+
+ - busid 3-3.1 (08bb:2702)
+ 3-3.1:1.0 -> snd-usb-audio
+ 3-3.1:1.1 -> snd-usb-audio
+
+ - busid 3-3.2 (04bb:0206)
+ 3-3.2:1.0 -> usb-storage
+
+ - busid 3-3 (0409:0058)
+ 3-3:1.0 -> hub
+
+ - busid 4-1 (046d:08b2)
+ 4-1:1.0 -> none
+ 4-1:1.1 -> none
+ 4-1:1.2 -> none
+
+ - busid 5-2 (058f:9254)
+ 5-2:1.0 -> hub
+
+A USB storage device of busid 3-3.2 is now bound to the usb-storage
+driver. To export this device, we first mark the device as
+"exportable"; the device is bound to the usbip-host driver. Please
+remember you can not export a USB hub.
+
+Mark the device of busid 3-3.2 as exportable:
+
+ trois:# usbip --debug bind --busid 3-3.2
+ ...
+ usbip debug: usbip_bind.c:162:[unbind_other] 3-3.2:1.0 -> usb-storage
+ ...
+ bind device on busid 3-3.2: complete
+
+ trois:# usbip list -l
+ Local USB devices
+ =================
+ ...
+
+ - busid 3-3.2 (04bb:0206)
+ 3-3.2:1.0 -> usbip-host
+ ...
+
+---------------------------
+ CLIENT SIDE
+---------------------------
+First, let's list available remote devices that are marked as
+exportable on the host.
+
+ deux:# insmod path/to/usbip-core.ko
+ deux:# insmod path/to/vhci-hcd.ko
+
+ deux:# usbip list --remote 10.0.0.3
+ Exportable USB devices
+ ======================
+ - 10.0.0.3
+ 1-1: Prolific Technology, Inc. : unknown product (067b:3507)
+ : /sys/devices/pci0000:00/0000:00:1f.2/usb1/1-1
+ : (Defined at Interface level) / unknown subclass / unknown protocol (00/00/00)
+ : 0 - Mass Storage / SCSI / Bulk (Zip) (08/06/50)
+
+ 1-2.2.1: Apple Computer, Inc. : unknown product (05ac:0203)
+ : /sys/devices/pci0000:00/0000:00:1f.2/usb1/1-2/1-2.2/1-2.2.1
+ : (Defined at Interface level) / unknown subclass / unknown protocol (00/00/00)
+ : 0 - Human Interface Devices / Boot Interface Subclass / Keyboard (03/01/01)
+
+ 1-2.2.3: OmniVision Technologies, Inc. : OV511+ WebCam (05a9:a511)
+ : /sys/devices/pci0000:00/0000:00:1f.2/usb1/1-2/1-2.2/1-2.2.3
+ : (Defined at Interface level) / unknown subclass / unknown protocol (00/00/00)
+ : 0 - Vendor Specific Class / unknown subclass / unknown protocol (ff/00/00)
+
+ 3-1: Logitech, Inc. : QuickCam Pro 4000 (046d:08b2)
+ : /sys/devices/pci0000:00/0000:00:1e.0/0000:02:0a.0/usb3/3-1
+ : (Defined at Interface level) / unknown subclass / unknown protocol (00/00/00)
+ : 0 - Data / unknown subclass / unknown protocol (0a/ff/00)
+ : 1 - Audio / Control Device / unknown protocol (01/01/00)
+ : 2 - Audio / Streaming / unknown protocol (01/02/00)
+
+Attach a remote USB device:
+
+ deux:# usbip attach --remote 10.0.0.3 --busid 1-1
+ port 0 attached
+
+Show the devices attached to this client:
+
+ deux:# usbip port
+ Port 00: <Port in Use> at Full Speed(12Mbps)
+ Prolific Technology, Inc. : unknown product (067b:3507)
+ 6-1 -> usbip://10.0.0.3:3240/1-1 (remote bus/dev 001/004)
+ 6-1:1.0 used by usb-storage
+ /sys/class/scsi_device/0:0:0:0/device
+ /sys/class/scsi_host/host0/device
+ /sys/block/sda/device
+
+Detach the imported device:
+
+ deux:# usbip detach --port 0
+ port 0 detached
+
+
+[Checklist]
+ - See 'Debug Tips' on the project wiki.
+ - http://usbip.wiki.sourceforge.net/how-to-debug-usbip
+ - usbip-host.ko must be bound to the target device.
+ - See /sys/kernel/debug/usb/devices and find "Driver=..." lines of the device.
+ - Target USB gadget must be bound to vudc
+ (using USB gadget susbsys, not usbip bind command)
+ - Shutdown firewall.
+ - usbip now uses TCP port 3240.
+ - Disable SELinux.
+ - Check the kernel and daemon messages.
+
+
+[Contact]
+ Mailing List: linux-usb@vger.kernel.org
diff --git a/tools/usb/usbip/autogen.sh b/tools/usb/usbip/autogen.sh
new file mode 100755
index 000000000..e1112d3fc
--- /dev/null
+++ b/tools/usb/usbip/autogen.sh
@@ -0,0 +1,9 @@
+#!/bin/sh -x
+
+#aclocal
+#autoheader
+#libtoolize --copy --force
+#automake-1.9 -acf
+#autoconf
+
+autoreconf -i -f -v
diff --git a/tools/usb/usbip/cleanup.sh b/tools/usb/usbip/cleanup.sh
new file mode 100755
index 000000000..8028c3a5c
--- /dev/null
+++ b/tools/usb/usbip/cleanup.sh
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+if [ -r Makefile ]; then
+ make distclean
+fi
+
+FILES="aclocal.m4 autom4te.cache compile config.guess config.h.in config.log \
+ config.status config.sub configure cscope.out depcomp install-sh \
+ libsrc/Makefile libsrc/Makefile.in libtool ltmain.sh Makefile \
+ Makefile.in missing src/Makefile src/Makefile.in"
+
+rm -vRf $FILES
diff --git a/tools/usb/usbip/configure.ac b/tools/usb/usbip/configure.ac
new file mode 100644
index 000000000..607d05c5c
--- /dev/null
+++ b/tools/usb/usbip/configure.ac
@@ -0,0 +1,111 @@
+dnl Process this file with autoconf to produce a configure script.
+
+AC_PREREQ(2.59)
+AC_INIT([usbip-utils], [2.0], [linux-usb@vger.kernel.org])
+AC_DEFINE([USBIP_VERSION], [0x00000111], [binary-coded decimal version number])
+
+CURRENT=0
+REVISION=1
+AGE=0
+AC_SUBST([LIBUSBIP_VERSION], [$CURRENT:$REVISION:$AGE], [library version])
+
+AC_CONFIG_SRCDIR([src/usbipd.c])
+AC_CONFIG_HEADERS([config.h])
+
+AM_INIT_AUTOMAKE([foreign])
+LT_INIT
+
+# Silent build for automake >= 1.11
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
+
+AC_SUBST([EXTRA_CFLAGS], ["-Wall -Werror -Wextra -std=gnu99"])
+
+# Checks for programs.
+AC_PROG_CC
+AC_PROG_INSTALL
+AC_PROG_MAKE_SET
+
+# Checks for header files.
+AC_HEADER_DIRENT
+AC_HEADER_STDC
+AC_CHECK_HEADERS([arpa/inet.h fcntl.h netdb.h netinet/in.h stdint.h stdlib.h dnl
+ string.h sys/socket.h syslog.h unistd.h])
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_TYPE_INT32_T
+AC_TYPE_SIZE_T
+AC_TYPE_SSIZE_T
+AC_TYPE_UINT16_T
+AC_TYPE_UINT32_T
+AC_TYPE_UINT8_T
+
+# Checks for library functions.
+AC_FUNC_REALLOC
+AC_CHECK_FUNCS([memset mkdir regcomp socket strchr strerror strstr dnl
+ strtoul])
+
+AC_CHECK_HEADER([libudev.h],
+ [AC_CHECK_LIB([udev], [udev_new],
+ [LIBS="$LIBS -ludev"],
+ [AC_MSG_ERROR([Missing udev library!])])],
+ [AC_MSG_ERROR([Missing /usr/include/libudev.h])])
+
+# Checks for libwrap library.
+AC_MSG_CHECKING([whether to use the libwrap (TCP wrappers) library])
+AC_ARG_WITH([tcp-wrappers],
+ [AS_HELP_STRING([--with-tcp-wrappers],
+ [use the libwrap (TCP wrappers) library])],
+ dnl [ACTION-IF-GIVEN]
+ [if test "$withval" = "yes"; then
+ AC_MSG_RESULT([yes])
+ AC_MSG_CHECKING([for hosts_access in -lwrap])
+ saved_LIBS="$LIBS"
+ LIBS="-lwrap $saved_LIBS"
+ AC_TRY_LINK(
+ [int hosts_access(); int allow_severity, deny_severity;],
+ [hosts_access()],
+ [AC_MSG_RESULT([yes]);
+ AC_DEFINE([HAVE_LIBWRAP], [1],
+ [use tcp wrapper]) wrap_LIB="-lwrap"],
+ [AC_MSG_RESULT([not found]); exit 1])
+ else
+ AC_MSG_RESULT([no]);
+ fi],
+ dnl [ACTION-IF-NOT-GIVEN]
+ [AC_MSG_RESULT([(default)])
+ AC_MSG_CHECKING([for hosts_access in -lwrap])
+ saved_LIBS="$LIBS"
+ LIBS="-lwrap $saved_LIBS"
+ AC_TRY_LINK(
+ [int hosts_access(); int allow_severity, deny_severity;],
+ [hosts_access()],
+ [AC_MSG_RESULT([yes]);
+ AC_DEFINE([HAVE_LIBWRAP], [1], [use tcp wrapper])],
+ [AC_MSG_RESULT([no]); LIBS="$saved_LIBS"])])
+
+# Sets directory containing usb.ids.
+AC_ARG_WITH([usbids-dir],
+ [AS_HELP_STRING([--with-usbids-dir=DIR],
+ [where usb.ids is found (default /usr/share/hwdata/)])],
+ [USBIDS_DIR=$withval], [USBIDS_DIR="/usr/share/hwdata/"])
+AC_SUBST([USBIDS_DIR])
+
+# use _FORTIFY_SOURCE
+AC_MSG_CHECKING([whether to use fortify])
+AC_ARG_WITH([fortify],
+ [AS_HELP_STRING([--with-fortify],
+ [use _FORTIFY_SROUCE option when compiling)])],
+ dnl [ACTION-IF-GIVEN]
+ [if test "$withval" = "yes"; then
+ AC_MSG_RESULT([yes])
+ CFLAGS="$CFLAGS -D_FORTIFY_SOURCE -O"
+ else
+ AC_MSG_RESULT([no])
+ CFLAGS="$CFLAGS -U_FORTIFY_SOURCE"
+ fi
+ ],
+ dnl [ACTION-IF-NOT-GIVEN]
+ [AC_MSG_RESULT([default])])
+
+AC_CONFIG_FILES([Makefile libsrc/Makefile src/Makefile])
+AC_OUTPUT
diff --git a/tools/usb/usbip/doc/usbip.8 b/tools/usb/usbip/doc/usbip.8
new file mode 100644
index 000000000..a6097be25
--- /dev/null
+++ b/tools/usb/usbip/doc/usbip.8
@@ -0,0 +1,95 @@
+.TH USBIP "8" "February 2009" "usbip" "System Administration Utilities"
+.SH NAME
+usbip \- manage USB/IP devices
+.SH SYNOPSIS
+.B usbip
+[\fIoptions\fR] <\fIcommand\fR> <\fIargs\fR>
+
+.SH DESCRIPTION
+On a USB/IP server, devices can be listed, bound, and unbound using
+this program. On a USB/IP client, devices exported by USB/IP servers
+can be listed, attached and detached.
+
+.SH OPTIONS
+.HP
+\fB\-\-debug\fR
+.IP
+Print debugging information.
+.PP
+
+.HP
+\fB\-\-log\fR
+.IP
+Log to syslog.
+.PP
+
+.HP
+\fB\-\-tcp-port PORT\fR
+.IP
+Connect to PORT on remote host (used for attach and list --remote).
+.PP
+
+.SH COMMANDS
+.HP
+\fBversion\fR
+.IP
+Show version and exit.
+.PP
+
+.HP
+\fBhelp\fR [\fIcommand\fR]
+.IP
+Print the program help message, or help on a specific command, and
+then exit.
+.PP
+
+.HP
+\fBattach\fR \-\-remote=<\fIhost\fR> \-\-busid=<\fIbus_id\fR>
+.IP
+Attach a remote USB device.
+.PP
+
+.HP
+\fBdetach\fR \-\-port=<\fIport\fR>
+.IP
+Detach an imported USB device.
+.PP
+
+.HP
+\fBbind\fR \-\-busid=<\fIbusid\fR>
+.IP
+Make a device exportable.
+.PP
+
+.HP
+\fBunbind\fR \-\-busid=<\fIbusid\fR>
+.IP
+Stop exporting a device so it can be used by a local driver.
+.PP
+
+.HP
+\fBlist\fR \-\-remote=<\fIhost\fR>
+.IP
+List USB devices exported by a remote host.
+.PP
+
+.HP
+\fBlist\fR \-\-local
+.IP
+List local USB devices.
+.PP
+
+
+.SH EXAMPLES
+
+ client:# usbip list --remote=server
+ - List exportable usb devices on the server.
+
+ client:# usbip attach --remote=server --busid=1-2
+ - Connect the remote USB device.
+
+ client:# usbip detach --port=0
+ - Detach the usb device.
+
+.SH "SEE ALSO"
+\fBusbipd\fP\fB(8)\fB\fP
diff --git a/tools/usb/usbip/doc/usbipd.8 b/tools/usb/usbip/doc/usbipd.8
new file mode 100644
index 000000000..ac4635db3
--- /dev/null
+++ b/tools/usb/usbip/doc/usbipd.8
@@ -0,0 +1,91 @@
+.TH USBIP "8" "February 2009" "usbip" "System Administration Utilities"
+.SH NAME
+usbipd \- USB/IP server daemon
+.SH SYNOPSIS
+.B usbipd
+[\fIoptions\fR]
+
+.SH DESCRIPTION
+.B usbipd
+provides USB/IP clients access to exported USB devices.
+
+Devices have to explicitly be exported using
+.B usbip bind
+before usbipd makes them available to other hosts.
+
+The daemon accepts connections from USB/IP clients
+on TCP port 3240 by default.
+
+.SH OPTIONS
+.HP
+\fB\-4\fR, \fB\-\-ipv4\fR
+.IP
+Bind to IPv4. Default is both.
+.PP
+
+.HP
+\fB\-6\fR, \fB\-\-ipv6\fR
+.IP
+Bind to IPv6. Default is both.
+.PP
+
+.HP
+\fB\-D\fR, \fB\-\-daemon\fR
+.IP
+Run as a daemon process.
+.PP
+
+.HP
+\fB\-d\fR, \fB\-\-debug\fR
+.IP
+Print debugging information.
+.PP
+
+.HP
+\fB\-PFILE\fR, \fB\-\-pid FILE\fR
+.IP
+Write process id to FILE.
+.br
+If no FILE specified, use /var/run/usbipd.pid
+.PP
+
+\fB\-tPORT\fR, \fB\-\-tcp\-port PORT\fR
+.IP
+Listen on TCP/IP port PORT.
+.PP
+
+\fB\-h\fR, \fB\-\-help\fR
+.IP
+Print the program help message and exit.
+.PP
+
+.HP
+\fB\-v\fR, \fB\-\-version\fR
+.IP
+Show version.
+.PP
+
+.SH LIMITATIONS
+
+.B usbipd
+offers no authentication or authorization for USB/IP. Any
+USB/IP client can connect and use exported devices.
+
+.SH EXAMPLES
+
+ server:# modprobe usbip
+
+ server:# usbipd -D
+ - Start usbip daemon.
+
+ server:# usbip list --local
+ - List driver assignments for usb devices.
+
+ server:# usbip bind --busid=1-2
+ - Bind usbip-host.ko to the device of busid 1-2.
+ - A usb device 1-2 is now exportable to other hosts!
+ - Use 'usbip unbind --busid=1-2' when you want to shutdown exporting and use the device locally.
+
+.SH "SEE ALSO"
+\fBusbip\fP\fB(8)\fB\fP
+
diff --git a/tools/usb/usbip/libsrc/Makefile.am b/tools/usb/usbip/libsrc/Makefile.am
new file mode 100644
index 000000000..dabd2c91d
--- /dev/null
+++ b/tools/usb/usbip/libsrc/Makefile.am
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+libusbip_la_CPPFLAGS = -DUSBIDS_FILE='"@USBIDS_DIR@/usb.ids"'
+libusbip_la_CFLAGS = @EXTRA_CFLAGS@
+libusbip_la_LDFLAGS = -version-info @LIBUSBIP_VERSION@
+
+lib_LTLIBRARIES := libusbip.la
+libusbip_la_SOURCES := names.c names.h usbip_host_driver.c usbip_host_driver.h \
+ usbip_device_driver.c usbip_device_driver.h \
+ usbip_common.c usbip_common.h usbip_host_common.h \
+ usbip_host_common.c vhci_driver.c vhci_driver.h \
+ sysfs_utils.c sysfs_utils.h
diff --git a/tools/usb/usbip/libsrc/list.h b/tools/usb/usbip/libsrc/list.h
new file mode 100644
index 000000000..a941671e4
--- /dev/null
+++ b/tools/usb/usbip/libsrc/list.h
@@ -0,0 +1,137 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LIST_H
+#define _LIST_H
+
+/* Stripped down implementation of linked list taken
+ * from the Linux Kernel.
+ */
+
+/*
+ * Simple doubly linked list implementation.
+ *
+ * Some of the internal functions ("__xxx") are useful when
+ * manipulating whole lists rather than single entries, as
+ * sometimes we already know the next/prev entries and we can
+ * generate better code by using them directly rather than
+ * using the generic single-entry routines.
+ */
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+#define LIST_HEAD_INIT(name) { &(name), &(name) }
+
+#define LIST_HEAD(name) \
+ struct list_head name = LIST_HEAD_INIT(name)
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+/*
+ * Insert a new entry between two known consecutive entries.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_add(struct list_head *new,
+ struct list_head *prev,
+ struct list_head *next)
+{
+ next->prev = new;
+ new->next = next;
+ new->prev = prev;
+ prev->next = new;
+}
+
+/**
+ * list_add - add a new entry
+ * @new: new entry to be added
+ * @head: list head to add it after
+ *
+ * Insert a new entry after the specified head.
+ * This is good for implementing stacks.
+ */
+static inline void list_add(struct list_head *new, struct list_head *head)
+{
+ __list_add(new, head, head->next);
+}
+
+/*
+ * Delete a list entry by making the prev/next entries
+ * point to each other.
+ *
+ * This is only for internal list manipulation where we know
+ * the prev/next entries already!
+ */
+static inline void __list_del(struct list_head * prev, struct list_head * next)
+{
+ next->prev = prev;
+ prev->next = next;
+}
+
+#define POISON_POINTER_DELTA 0
+#define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA)
+#define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA)
+
+/**
+ * list_del - deletes entry from list.
+ * @entry: the element to delete from the list.
+ * Note: list_empty() on entry does not return true after this, the entry is
+ * in an undefined state.
+ */
+static inline void __list_del_entry(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
+static inline void list_del(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+ entry->next = LIST_POISON1;
+ entry->prev = LIST_POISON2;
+}
+
+/**
+ * list_entry - get the struct for this entry
+ * @ptr: the &struct list_head pointer.
+ * @type: the type of the struct this is embedded in.
+ * @member: the name of the list_head within the struct.
+ */
+#define list_entry(ptr, type, member) \
+ container_of(ptr, type, member)
+/**
+ * list_for_each - iterate over a list
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @head: the head for your list.
+ */
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+/**
+ * list_for_each_safe - iterate over a list safe against removal of list entry
+ * @pos: the &struct list_head to use as a loop cursor.
+ * @n: another &struct list_head to use as temporary storage
+ * @head: the head for your list.
+ */
+#define list_for_each_safe(pos, n, head) \
+ for (pos = (head)->next, n = pos->next; pos != (head); \
+ pos = n, n = pos->next)
+
+#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
+
+/**
+ * container_of - cast a member of a structure out to the containing structure
+ * @ptr: the pointer to the member.
+ * @type: the type of the container struct this is embedded in.
+ * @member: the name of the member within the struct.
+ *
+ */
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+#endif
diff --git a/tools/usb/usbip/libsrc/names.c b/tools/usb/usbip/libsrc/names.c
new file mode 100644
index 000000000..81ff85224
--- /dev/null
+++ b/tools/usb/usbip/libsrc/names.c
@@ -0,0 +1,504 @@
+/*
+ * names.c -- USB name database manipulation routines
+ *
+ * Copyright (C) 1999, 2000 Thomas Sailer (sailer@ife.ee.ethz.ch)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *
+ *
+ *
+ * Copyright (C) 2005 Takahiro Hirofuchi
+ * - names_deinit() is added.
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include "names.h"
+#include "usbip_common.h"
+
+struct vendor {
+ struct vendor *next;
+ u_int16_t vendorid;
+ char name[1];
+};
+
+struct product {
+ struct product *next;
+ u_int16_t vendorid, productid;
+ char name[1];
+};
+
+struct class {
+ struct class *next;
+ u_int8_t classid;
+ char name[1];
+};
+
+struct subclass {
+ struct subclass *next;
+ u_int8_t classid, subclassid;
+ char name[1];
+};
+
+struct protocol {
+ struct protocol *next;
+ u_int8_t classid, subclassid, protocolid;
+ char name[1];
+};
+
+struct genericstrtable {
+ struct genericstrtable *next;
+ unsigned int num;
+ char name[1];
+};
+
+
+#define HASH1 0x10
+#define HASH2 0x02
+#define HASHSZ 16
+
+static unsigned int hashnum(unsigned int num)
+{
+ unsigned int mask1 = HASH1 << 27, mask2 = HASH2 << 27;
+
+ for (; mask1 >= HASH1; mask1 >>= 1, mask2 >>= 1)
+ if (num & mask1)
+ num ^= mask2;
+ return num & (HASHSZ-1);
+}
+
+
+static struct vendor *vendors[HASHSZ] = { NULL, };
+static struct product *products[HASHSZ] = { NULL, };
+static struct class *classes[HASHSZ] = { NULL, };
+static struct subclass *subclasses[HASHSZ] = { NULL, };
+static struct protocol *protocols[HASHSZ] = { NULL, };
+
+const char *names_vendor(u_int16_t vendorid)
+{
+ struct vendor *v;
+
+ v = vendors[hashnum(vendorid)];
+ for (; v; v = v->next)
+ if (v->vendorid == vendorid)
+ return v->name;
+ return NULL;
+}
+
+const char *names_product(u_int16_t vendorid, u_int16_t productid)
+{
+ struct product *p;
+
+ p = products[hashnum((vendorid << 16) | productid)];
+ for (; p; p = p->next)
+ if (p->vendorid == vendorid && p->productid == productid)
+ return p->name;
+ return NULL;
+}
+
+const char *names_class(u_int8_t classid)
+{
+ struct class *c;
+
+ c = classes[hashnum(classid)];
+ for (; c; c = c->next)
+ if (c->classid == classid)
+ return c->name;
+ return NULL;
+}
+
+const char *names_subclass(u_int8_t classid, u_int8_t subclassid)
+{
+ struct subclass *s;
+
+ s = subclasses[hashnum((classid << 8) | subclassid)];
+ for (; s; s = s->next)
+ if (s->classid == classid && s->subclassid == subclassid)
+ return s->name;
+ return NULL;
+}
+
+const char *names_protocol(u_int8_t classid, u_int8_t subclassid,
+ u_int8_t protocolid)
+{
+ struct protocol *p;
+
+ p = protocols[hashnum((classid << 16) | (subclassid << 8)
+ | protocolid)];
+ for (; p; p = p->next)
+ if (p->classid == classid && p->subclassid == subclassid &&
+ p->protocolid == protocolid)
+ return p->name;
+ return NULL;
+}
+
+/* add a cleanup function by takahiro */
+struct pool {
+ struct pool *next;
+ void *mem;
+};
+
+static struct pool *pool_head;
+
+static void *my_malloc(size_t size)
+{
+ struct pool *p;
+
+ p = calloc(1, sizeof(struct pool));
+ if (!p)
+ return NULL;
+
+ p->mem = calloc(1, size);
+ if (!p->mem) {
+ free(p);
+ return NULL;
+ }
+
+ p->next = pool_head;
+ pool_head = p;
+
+ return p->mem;
+}
+
+void names_free(void)
+{
+ struct pool *pool;
+
+ if (!pool_head)
+ return;
+
+ for (pool = pool_head; pool != NULL; ) {
+ struct pool *tmp;
+
+ if (pool->mem)
+ free(pool->mem);
+
+ tmp = pool;
+ pool = pool->next;
+ free(tmp);
+ }
+}
+
+static int new_vendor(const char *name, u_int16_t vendorid)
+{
+ struct vendor *v;
+ unsigned int h = hashnum(vendorid);
+
+ v = vendors[h];
+ for (; v; v = v->next)
+ if (v->vendorid == vendorid)
+ return -1;
+ v = my_malloc(sizeof(struct vendor) + strlen(name));
+ if (!v)
+ return -1;
+ strcpy(v->name, name);
+ v->vendorid = vendorid;
+ v->next = vendors[h];
+ vendors[h] = v;
+ return 0;
+}
+
+static int new_product(const char *name, u_int16_t vendorid,
+ u_int16_t productid)
+{
+ struct product *p;
+ unsigned int h = hashnum((vendorid << 16) | productid);
+
+ p = products[h];
+ for (; p; p = p->next)
+ if (p->vendorid == vendorid && p->productid == productid)
+ return -1;
+ p = my_malloc(sizeof(struct product) + strlen(name));
+ if (!p)
+ return -1;
+ strcpy(p->name, name);
+ p->vendorid = vendorid;
+ p->productid = productid;
+ p->next = products[h];
+ products[h] = p;
+ return 0;
+}
+
+static int new_class(const char *name, u_int8_t classid)
+{
+ struct class *c;
+ unsigned int h = hashnum(classid);
+
+ c = classes[h];
+ for (; c; c = c->next)
+ if (c->classid == classid)
+ return -1;
+ c = my_malloc(sizeof(struct class) + strlen(name));
+ if (!c)
+ return -1;
+ strcpy(c->name, name);
+ c->classid = classid;
+ c->next = classes[h];
+ classes[h] = c;
+ return 0;
+}
+
+static int new_subclass(const char *name, u_int8_t classid, u_int8_t subclassid)
+{
+ struct subclass *s;
+ unsigned int h = hashnum((classid << 8) | subclassid);
+
+ s = subclasses[h];
+ for (; s; s = s->next)
+ if (s->classid == classid && s->subclassid == subclassid)
+ return -1;
+ s = my_malloc(sizeof(struct subclass) + strlen(name));
+ if (!s)
+ return -1;
+ strcpy(s->name, name);
+ s->classid = classid;
+ s->subclassid = subclassid;
+ s->next = subclasses[h];
+ subclasses[h] = s;
+ return 0;
+}
+
+static int new_protocol(const char *name, u_int8_t classid, u_int8_t subclassid,
+ u_int8_t protocolid)
+{
+ struct protocol *p;
+ unsigned int h = hashnum((classid << 16) | (subclassid << 8)
+ | protocolid);
+
+ p = protocols[h];
+ for (; p; p = p->next)
+ if (p->classid == classid && p->subclassid == subclassid
+ && p->protocolid == protocolid)
+ return -1;
+ p = my_malloc(sizeof(struct protocol) + strlen(name));
+ if (!p)
+ return -1;
+ strcpy(p->name, name);
+ p->classid = classid;
+ p->subclassid = subclassid;
+ p->protocolid = protocolid;
+ p->next = protocols[h];
+ protocols[h] = p;
+ return 0;
+}
+
+static void parse(FILE *f)
+{
+ char buf[512], *cp;
+ unsigned int linectr = 0;
+ int lastvendor = -1;
+ int lastclass = -1;
+ int lastsubclass = -1;
+ int lasthut = -1;
+ int lastlang = -1;
+ unsigned int u;
+
+ while (fgets(buf, sizeof(buf), f)) {
+ linectr++;
+ /* remove line ends */
+ cp = strchr(buf, '\r');
+ if (cp)
+ *cp = 0;
+ cp = strchr(buf, '\n');
+ if (cp)
+ *cp = 0;
+ if (buf[0] == '#' || !buf[0])
+ continue;
+ cp = buf;
+ if (buf[0] == 'P' && buf[1] == 'H' && buf[2] == 'Y' &&
+ buf[3] == 'S' && buf[4] == 'D' &&
+ buf[5] == 'E' && buf[6] == 'S' && /*isspace(buf[7])*/
+ buf[7] == ' ') {
+ continue;
+ }
+ if (buf[0] == 'P' && buf[1] == 'H' &&
+ buf[2] == 'Y' && /*isspace(buf[3])*/ buf[3] == ' ') {
+ continue;
+ }
+ if (buf[0] == 'B' && buf[1] == 'I' && buf[2] == 'A' &&
+ buf[3] == 'S' && /*isspace(buf[4])*/ buf[4] == ' ') {
+ continue;
+ }
+ if (buf[0] == 'L' && /*isspace(buf[1])*/ buf[1] == ' ') {
+ lasthut = lastclass = lastvendor = lastsubclass = -1;
+ /*
+ * set 1 as pseudo-id to indicate that the parser is
+ * in a `L' section.
+ */
+ lastlang = 1;
+ continue;
+ }
+ if (buf[0] == 'C' && /*isspace(buf[1])*/ buf[1] == ' ') {
+ /* class spec */
+ cp = buf+2;
+ while (isspace(*cp))
+ cp++;
+ if (!isxdigit(*cp)) {
+ err("Invalid class spec at line %u", linectr);
+ continue;
+ }
+ u = strtoul(cp, &cp, 16);
+ while (isspace(*cp))
+ cp++;
+ if (!*cp) {
+ err("Invalid class spec at line %u", linectr);
+ continue;
+ }
+ if (new_class(cp, u))
+ err("Duplicate class spec at line %u class %04x %s",
+ linectr, u, cp);
+ dbg("line %5u class %02x %s", linectr, u, cp);
+ lasthut = lastlang = lastvendor = lastsubclass = -1;
+ lastclass = u;
+ continue;
+ }
+ if (buf[0] == 'A' && buf[1] == 'T' && isspace(buf[2])) {
+ /* audio terminal type spec */
+ continue;
+ }
+ if (buf[0] == 'H' && buf[1] == 'C' && buf[2] == 'C'
+ && isspace(buf[3])) {
+ /* HID Descriptor bCountryCode */
+ continue;
+ }
+ if (isxdigit(*cp)) {
+ /* vendor */
+ u = strtoul(cp, &cp, 16);
+ while (isspace(*cp))
+ cp++;
+ if (!*cp) {
+ err("Invalid vendor spec at line %u", linectr);
+ continue;
+ }
+ if (new_vendor(cp, u))
+ err("Duplicate vendor spec at line %u vendor %04x %s",
+ linectr, u, cp);
+ dbg("line %5u vendor %04x %s", linectr, u, cp);
+ lastvendor = u;
+ lasthut = lastlang = lastclass = lastsubclass = -1;
+ continue;
+ }
+ if (buf[0] == '\t' && isxdigit(buf[1])) {
+ /* product or subclass spec */
+ u = strtoul(buf+1, &cp, 16);
+ while (isspace(*cp))
+ cp++;
+ if (!*cp) {
+ err("Invalid product/subclass spec at line %u",
+ linectr);
+ continue;
+ }
+ if (lastvendor != -1) {
+ if (new_product(cp, lastvendor, u))
+ err("Duplicate product spec at line %u product %04x:%04x %s",
+ linectr, lastvendor, u, cp);
+ dbg("line %5u product %04x:%04x %s", linectr,
+ lastvendor, u, cp);
+ continue;
+ }
+ if (lastclass != -1) {
+ if (new_subclass(cp, lastclass, u))
+ err("Duplicate subclass spec at line %u class %02x:%02x %s",
+ linectr, lastclass, u, cp);
+ dbg("line %5u subclass %02x:%02x %s", linectr,
+ lastclass, u, cp);
+ lastsubclass = u;
+ continue;
+ }
+ if (lasthut != -1) {
+ /* do not store hut */
+ continue;
+ }
+ if (lastlang != -1) {
+ /* do not store langid */
+ continue;
+ }
+ err("Product/Subclass spec without prior Vendor/Class spec at line %u",
+ linectr);
+ continue;
+ }
+ if (buf[0] == '\t' && buf[1] == '\t' && isxdigit(buf[2])) {
+ /* protocol spec */
+ u = strtoul(buf+2, &cp, 16);
+ while (isspace(*cp))
+ cp++;
+ if (!*cp) {
+ err("Invalid protocol spec at line %u",
+ linectr);
+ continue;
+ }
+ if (lastclass != -1 && lastsubclass != -1) {
+ if (new_protocol(cp, lastclass, lastsubclass,
+ u))
+ err("Duplicate protocol spec at line %u class %02x:%02x:%02x %s",
+ linectr, lastclass, lastsubclass,
+ u, cp);
+ dbg("line %5u protocol %02x:%02x:%02x %s",
+ linectr, lastclass, lastsubclass, u, cp);
+ continue;
+ }
+ err("Protocol spec without prior Class and Subclass spec at line %u",
+ linectr);
+ continue;
+ }
+ if (buf[0] == 'H' && buf[1] == 'I' &&
+ buf[2] == 'D' && /*isspace(buf[3])*/ buf[3] == ' ') {
+ continue;
+ }
+ if (buf[0] == 'H' && buf[1] == 'U' &&
+ buf[2] == 'T' && /*isspace(buf[3])*/ buf[3] == ' ') {
+ lastlang = lastclass = lastvendor = lastsubclass = -1;
+ /*
+ * set 1 as pseudo-id to indicate that the parser is
+ * in a `HUT' section.
+ */
+ lasthut = 1;
+ continue;
+ }
+ if (buf[0] == 'R' && buf[1] == ' ')
+ continue;
+
+ if (buf[0] == 'V' && buf[1] == 'T')
+ continue;
+
+ err("Unknown line at line %u", linectr);
+ }
+}
+
+
+int names_init(char *n)
+{
+ FILE *f;
+
+ f = fopen(n, "r");
+ if (!f)
+ return errno;
+
+ parse(f);
+ fclose(f);
+ return 0;
+}
diff --git a/tools/usb/usbip/libsrc/names.h b/tools/usb/usbip/libsrc/names.h
new file mode 100644
index 000000000..680926512
--- /dev/null
+++ b/tools/usb/usbip/libsrc/names.h
@@ -0,0 +1,41 @@
+/*
+ * names.h -- USB name database manipulation routines
+ *
+ * Copyright (C) 1999, 2000 Thomas Sailer (sailer@ife.ee.ethz.ch)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ *
+ * Copyright (C) 2005 Takahiro Hirofuchi
+ * - names_free() is added.
+ */
+
+#ifndef _NAMES_H
+#define _NAMES_H
+
+#include <sys/types.h>
+
+/* used by usbip_common.c */
+extern const char *names_vendor(u_int16_t vendorid);
+extern const char *names_product(u_int16_t vendorid, u_int16_t productid);
+extern const char *names_class(u_int8_t classid);
+extern const char *names_subclass(u_int8_t classid, u_int8_t subclassid);
+extern const char *names_protocol(u_int8_t classid, u_int8_t subclassid,
+ u_int8_t protocolid);
+extern int names_init(char *n);
+extern void names_free(void);
+
+#endif /* _NAMES_H */
diff --git a/tools/usb/usbip/libsrc/sysfs_utils.c b/tools/usb/usbip/libsrc/sysfs_utils.c
new file mode 100644
index 000000000..14d5e67d3
--- /dev/null
+++ b/tools/usb/usbip/libsrc/sysfs_utils.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include "sysfs_utils.h"
+#include "usbip_common.h"
+
+int write_sysfs_attribute(const char *attr_path, const char *new_value,
+ size_t len)
+{
+ int fd;
+ int length;
+
+ fd = open(attr_path, O_WRONLY);
+ if (fd < 0) {
+ dbg("error opening attribute %s", attr_path);
+ return -1;
+ }
+
+ length = write(fd, new_value, len);
+ if (length < 0) {
+ dbg("error writing to attribute %s", attr_path);
+ close(fd);
+ return -1;
+ }
+
+ close(fd);
+
+ return 0;
+}
diff --git a/tools/usb/usbip/libsrc/sysfs_utils.h b/tools/usb/usbip/libsrc/sysfs_utils.h
new file mode 100644
index 000000000..0cd5f17e7
--- /dev/null
+++ b/tools/usb/usbip/libsrc/sysfs_utils.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SYSFS_UTILS_H
+#define __SYSFS_UTILS_H
+
+int write_sysfs_attribute(const char *attr_path, const char *new_value,
+ size_t len);
+
+#endif
diff --git a/tools/usb/usbip/libsrc/usbip_common.c b/tools/usb/usbip/libsrc/usbip_common.c
new file mode 100644
index 000000000..bb424638d
--- /dev/null
+++ b/tools/usb/usbip/libsrc/usbip_common.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2005-2007 Takahiro Hirofuchi
+ */
+
+#include <libudev.h>
+#include "usbip_common.h"
+#include "names.h"
+
+#undef PROGNAME
+#define PROGNAME "libusbip"
+
+int usbip_use_syslog;
+int usbip_use_stderr;
+int usbip_use_debug;
+
+extern struct udev *udev_context;
+
+struct speed_string {
+ int num;
+ char *speed;
+ char *desc;
+};
+
+static const struct speed_string speed_strings[] = {
+ { USB_SPEED_UNKNOWN, "unknown", "Unknown Speed"},
+ { USB_SPEED_LOW, "1.5", "Low Speed(1.5Mbps)" },
+ { USB_SPEED_FULL, "12", "Full Speed(12Mbps)" },
+ { USB_SPEED_HIGH, "480", "High Speed(480Mbps)" },
+ { USB_SPEED_WIRELESS, "53.3-480", "Wireless"},
+ { USB_SPEED_SUPER, "5000", "Super Speed(5000Mbps)" },
+ { 0, NULL, NULL }
+};
+
+struct portst_string {
+ int num;
+ char *desc;
+};
+
+static struct portst_string portst_strings[] = {
+ { SDEV_ST_AVAILABLE, "Device Available" },
+ { SDEV_ST_USED, "Device in Use" },
+ { SDEV_ST_ERROR, "Device Error"},
+ { VDEV_ST_NULL, "Port Available"},
+ { VDEV_ST_NOTASSIGNED, "Port Initializing"},
+ { VDEV_ST_USED, "Port in Use"},
+ { VDEV_ST_ERROR, "Port Error"},
+ { 0, NULL}
+};
+
+const char *usbip_status_string(int32_t status)
+{
+ for (int i = 0; portst_strings[i].desc != NULL; i++)
+ if (portst_strings[i].num == status)
+ return portst_strings[i].desc;
+
+ return "Unknown Status";
+}
+
+const char *usbip_speed_string(int num)
+{
+ for (int i = 0; speed_strings[i].speed != NULL; i++)
+ if (speed_strings[i].num == num)
+ return speed_strings[i].desc;
+
+ return "Unknown Speed";
+}
+
+struct op_common_status_string {
+ int num;
+ char *desc;
+};
+
+static struct op_common_status_string op_common_status_strings[] = {
+ { ST_OK, "Request Completed Successfully" },
+ { ST_NA, "Request Failed" },
+ { ST_DEV_BUSY, "Device busy (exported)" },
+ { ST_DEV_ERR, "Device in error state" },
+ { ST_NODEV, "Device not found" },
+ { ST_ERROR, "Unexpected response" },
+ { 0, NULL}
+};
+
+const char *usbip_op_common_status_string(int status)
+{
+ for (int i = 0; op_common_status_strings[i].desc != NULL; i++)
+ if (op_common_status_strings[i].num == status)
+ return op_common_status_strings[i].desc;
+
+ return "Unknown Op Common Status";
+}
+
+#define DBG_UDEV_INTEGER(name)\
+ dbg("%-20s = %x", to_string(name), (int) udev->name)
+
+#define DBG_UINF_INTEGER(name)\
+ dbg("%-20s = %x", to_string(name), (int) uinf->name)
+
+void dump_usb_interface(struct usbip_usb_interface *uinf)
+{
+ char buff[100];
+
+ usbip_names_get_class(buff, sizeof(buff),
+ uinf->bInterfaceClass,
+ uinf->bInterfaceSubClass,
+ uinf->bInterfaceProtocol);
+ dbg("%-20s = %s", "Interface(C/SC/P)", buff);
+}
+
+void dump_usb_device(struct usbip_usb_device *udev)
+{
+ char buff[100];
+
+ dbg("%-20s = %s", "path", udev->path);
+ dbg("%-20s = %s", "busid", udev->busid);
+
+ usbip_names_get_class(buff, sizeof(buff),
+ udev->bDeviceClass,
+ udev->bDeviceSubClass,
+ udev->bDeviceProtocol);
+ dbg("%-20s = %s", "Device(C/SC/P)", buff);
+
+ DBG_UDEV_INTEGER(bcdDevice);
+
+ usbip_names_get_product(buff, sizeof(buff),
+ udev->idVendor,
+ udev->idProduct);
+ dbg("%-20s = %s", "Vendor/Product", buff);
+
+ DBG_UDEV_INTEGER(bNumConfigurations);
+ DBG_UDEV_INTEGER(bNumInterfaces);
+
+ dbg("%-20s = %s", "speed",
+ usbip_speed_string(udev->speed));
+
+ DBG_UDEV_INTEGER(busnum);
+ DBG_UDEV_INTEGER(devnum);
+}
+
+
+int read_attr_value(struct udev_device *dev, const char *name,
+ const char *format)
+{
+ const char *attr;
+ int num = 0;
+ int ret;
+
+ attr = udev_device_get_sysattr_value(dev, name);
+ if (!attr) {
+ err("udev_device_get_sysattr_value failed");
+ goto err;
+ }
+
+ /* The client chooses the device configuration
+ * when attaching it so right after being bound
+ * to usbip-host on the server the device will
+ * have no configuration.
+ * Therefore, attributes such as bConfigurationValue
+ * and bNumInterfaces will not exist and sscanf will
+ * fail. Check for these cases and don't treat them
+ * as errors.
+ */
+
+ ret = sscanf(attr, format, &num);
+ if (ret < 1) {
+ if (strcmp(name, "bConfigurationValue") &&
+ strcmp(name, "bNumInterfaces")) {
+ err("sscanf failed for attribute %s", name);
+ goto err;
+ }
+ }
+
+err:
+
+ return num;
+}
+
+
+int read_attr_speed(struct udev_device *dev)
+{
+ const char *speed;
+
+ speed = udev_device_get_sysattr_value(dev, "speed");
+ if (!speed) {
+ err("udev_device_get_sysattr_value failed");
+ goto err;
+ }
+
+ for (int i = 0; speed_strings[i].speed != NULL; i++) {
+ if (!strcmp(speed, speed_strings[i].speed))
+ return speed_strings[i].num;
+ }
+
+err:
+
+ return USB_SPEED_UNKNOWN;
+}
+
+#define READ_ATTR(object, type, dev, name, format) \
+ do { \
+ (object)->name = (type) read_attr_value(dev, to_string(name), \
+ format); \
+ } while (0)
+
+
+int read_usb_device(struct udev_device *sdev, struct usbip_usb_device *udev)
+{
+ uint32_t busnum, devnum;
+ const char *path, *name;
+
+ READ_ATTR(udev, uint8_t, sdev, bDeviceClass, "%02x\n");
+ READ_ATTR(udev, uint8_t, sdev, bDeviceSubClass, "%02x\n");
+ READ_ATTR(udev, uint8_t, sdev, bDeviceProtocol, "%02x\n");
+
+ READ_ATTR(udev, uint16_t, sdev, idVendor, "%04x\n");
+ READ_ATTR(udev, uint16_t, sdev, idProduct, "%04x\n");
+ READ_ATTR(udev, uint16_t, sdev, bcdDevice, "%04x\n");
+
+ READ_ATTR(udev, uint8_t, sdev, bConfigurationValue, "%02x\n");
+ READ_ATTR(udev, uint8_t, sdev, bNumConfigurations, "%02x\n");
+ READ_ATTR(udev, uint8_t, sdev, bNumInterfaces, "%02x\n");
+
+ READ_ATTR(udev, uint8_t, sdev, devnum, "%d\n");
+ udev->speed = read_attr_speed(sdev);
+
+ path = udev_device_get_syspath(sdev);
+ name = udev_device_get_sysname(sdev);
+
+ strncpy(udev->path, path, SYSFS_PATH_MAX);
+ strncpy(udev->busid, name, SYSFS_BUS_ID_SIZE);
+
+ sscanf(name, "%u-%u", &busnum, &devnum);
+ udev->busnum = busnum;
+
+ return 0;
+}
+
+int read_usb_interface(struct usbip_usb_device *udev, int i,
+ struct usbip_usb_interface *uinf)
+{
+ char busid[SYSFS_BUS_ID_SIZE];
+ int size;
+ struct udev_device *sif;
+
+ size = snprintf(busid, sizeof(busid), "%s:%d.%d",
+ udev->busid, udev->bConfigurationValue, i);
+ if (size < 0 || (unsigned int)size >= sizeof(busid)) {
+ err("busid length %i >= %lu or < 0", size,
+ (long unsigned)sizeof(busid));
+ return -1;
+ }
+
+ sif = udev_device_new_from_subsystem_sysname(udev_context, "usb", busid);
+ if (!sif) {
+ err("udev_device_new_from_subsystem_sysname %s failed", busid);
+ return -1;
+ }
+
+ READ_ATTR(uinf, uint8_t, sif, bInterfaceClass, "%02x\n");
+ READ_ATTR(uinf, uint8_t, sif, bInterfaceSubClass, "%02x\n");
+ READ_ATTR(uinf, uint8_t, sif, bInterfaceProtocol, "%02x\n");
+
+ return 0;
+}
+
+int usbip_names_init(char *f)
+{
+ return names_init(f);
+}
+
+void usbip_names_free(void)
+{
+ names_free();
+}
+
+void usbip_names_get_product(char *buff, size_t size, uint16_t vendor,
+ uint16_t product)
+{
+ const char *prod, *vend;
+
+ prod = names_product(vendor, product);
+ if (!prod)
+ prod = "unknown product";
+
+
+ vend = names_vendor(vendor);
+ if (!vend)
+ vend = "unknown vendor";
+
+ snprintf(buff, size, "%s : %s (%04x:%04x)", vend, prod, vendor, product);
+}
+
+void usbip_names_get_class(char *buff, size_t size, uint8_t class,
+ uint8_t subclass, uint8_t protocol)
+{
+ const char *c, *s, *p;
+
+ if (class == 0 && subclass == 0 && protocol == 0) {
+ snprintf(buff, size, "(Defined at Interface level) (%02x/%02x/%02x)", class, subclass, protocol);
+ return;
+ }
+
+ p = names_protocol(class, subclass, protocol);
+ if (!p)
+ p = "unknown protocol";
+
+ s = names_subclass(class, subclass);
+ if (!s)
+ s = "unknown subclass";
+
+ c = names_class(class);
+ if (!c)
+ c = "unknown class";
+
+ snprintf(buff, size, "%s / %s / %s (%02x/%02x/%02x)", c, s, p, class, subclass, protocol);
+}
diff --git a/tools/usb/usbip/libsrc/usbip_common.h b/tools/usb/usbip/libsrc/usbip_common.h
new file mode 100644
index 000000000..73a367a7f
--- /dev/null
+++ b/tools/usb/usbip/libsrc/usbip_common.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2005-2007 Takahiro Hirofuchi
+ */
+
+#ifndef __USBIP_COMMON_H
+#define __USBIP_COMMON_H
+
+#include <libudev.h>
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <syslog.h>
+#include <unistd.h>
+#include <linux/usb/ch9.h>
+#include <linux/usbip.h>
+
+#ifndef USBIDS_FILE
+#define USBIDS_FILE "/usr/share/hwdata/usb.ids"
+#endif
+
+#ifndef VHCI_STATE_PATH
+#define VHCI_STATE_PATH "/var/run/vhci_hcd"
+#endif
+
+#define VUDC_DEVICE_DESCR_FILE "dev_desc"
+
+/* kernel module names */
+#define USBIP_CORE_MOD_NAME "usbip-core"
+#define USBIP_HOST_DRV_NAME "usbip-host"
+#define USBIP_DEVICE_DRV_NAME "usbip-vudc"
+#define USBIP_VHCI_DRV_NAME "vhci_hcd"
+
+/* sysfs constants */
+#define SYSFS_MNT_PATH "/sys"
+#define SYSFS_BUS_NAME "bus"
+#define SYSFS_BUS_TYPE "usb"
+#define SYSFS_DRIVERS_NAME "drivers"
+
+#define SYSFS_PATH_MAX 256
+#define SYSFS_BUS_ID_SIZE 32
+
+/* Defines for op_code status in server/client op_common PDUs */
+#define ST_OK 0x00
+#define ST_NA 0x01
+ /* Device requested for import is not available */
+#define ST_DEV_BUSY 0x02
+ /* Device requested for import is in error state */
+#define ST_DEV_ERR 0x03
+#define ST_NODEV 0x04
+#define ST_ERROR 0x05
+
+extern int usbip_use_syslog;
+extern int usbip_use_stderr;
+extern int usbip_use_debug ;
+
+#define PROGNAME "usbip"
+
+#define pr_fmt(fmt) "%s: %s: " fmt "\n", PROGNAME
+#define dbg_fmt(fmt) pr_fmt("%s:%d:[%s] " fmt), "debug", \
+ __FILE__, __LINE__, __func__
+
+#define err(fmt, args...) \
+ do { \
+ if (usbip_use_syslog) { \
+ syslog(LOG_ERR, pr_fmt(fmt), "error", ##args); \
+ } \
+ if (usbip_use_stderr) { \
+ fprintf(stderr, pr_fmt(fmt), "error", ##args); \
+ } \
+ } while (0)
+
+#define info(fmt, args...) \
+ do { \
+ if (usbip_use_syslog) { \
+ syslog(LOG_INFO, pr_fmt(fmt), "info", ##args); \
+ } \
+ if (usbip_use_stderr) { \
+ fprintf(stderr, pr_fmt(fmt), "info", ##args); \
+ } \
+ } while (0)
+
+#define dbg(fmt, args...) \
+ do { \
+ if (usbip_use_debug) { \
+ if (usbip_use_syslog) { \
+ syslog(LOG_DEBUG, dbg_fmt(fmt), ##args); \
+ } \
+ if (usbip_use_stderr) { \
+ fprintf(stderr, dbg_fmt(fmt), ##args); \
+ } \
+ } \
+ } while (0)
+
+#define BUG() \
+ do { \
+ err("sorry, it's a bug!"); \
+ abort(); \
+ } while (0)
+
+struct usbip_usb_interface {
+ uint8_t bInterfaceClass;
+ uint8_t bInterfaceSubClass;
+ uint8_t bInterfaceProtocol;
+ uint8_t padding; /* alignment */
+} __attribute__((packed));
+
+struct usbip_usb_device {
+ char path[SYSFS_PATH_MAX];
+ char busid[SYSFS_BUS_ID_SIZE];
+
+ uint32_t busnum;
+ uint32_t devnum;
+ uint32_t speed;
+
+ uint16_t idVendor;
+ uint16_t idProduct;
+ uint16_t bcdDevice;
+
+ uint8_t bDeviceClass;
+ uint8_t bDeviceSubClass;
+ uint8_t bDeviceProtocol;
+ uint8_t bConfigurationValue;
+ uint8_t bNumConfigurations;
+ uint8_t bNumInterfaces;
+} __attribute__((packed));
+
+#define to_string(s) #s
+
+void dump_usb_interface(struct usbip_usb_interface *);
+void dump_usb_device(struct usbip_usb_device *);
+int read_usb_device(struct udev_device *sdev, struct usbip_usb_device *udev);
+int read_attr_value(struct udev_device *dev, const char *name,
+ const char *format);
+int read_usb_interface(struct usbip_usb_device *udev, int i,
+ struct usbip_usb_interface *uinf);
+
+const char *usbip_speed_string(int num);
+const char *usbip_status_string(int32_t status);
+const char *usbip_op_common_status_string(int status);
+
+int usbip_names_init(char *);
+void usbip_names_free(void);
+void usbip_names_get_product(char *buff, size_t size, uint16_t vendor,
+ uint16_t product);
+void usbip_names_get_class(char *buff, size_t size, uint8_t class,
+ uint8_t subclass, uint8_t protocol);
+
+#endif /* __USBIP_COMMON_H */
diff --git a/tools/usb/usbip/libsrc/usbip_device_driver.c b/tools/usb/usbip/libsrc/usbip_device_driver.c
new file mode 100644
index 000000000..67ae6c155
--- /dev/null
+++ b/tools/usb/usbip/libsrc/usbip_device_driver.c
@@ -0,0 +1,168 @@
+/*
+ * Copyright (C) 2015 Karol Kosik <karo9@interia.eu>
+ * 2015 Samsung Electronics
+ * Author: Igor Kotrasinski <i.kotrasinsk@samsung.com>
+ *
+ * Based on tools/usb/usbip/libsrc/usbip_host_driver.c, which is:
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <fcntl.h>
+#include <string.h>
+#include <linux/usb/ch9.h>
+
+#include <unistd.h>
+
+#include "usbip_host_common.h"
+#include "usbip_device_driver.h"
+
+#undef PROGNAME
+#define PROGNAME "libusbip"
+
+#define copy_descr_attr16(dev, descr, attr) \
+ ((dev)->attr = le16toh((descr)->attr)) \
+
+#define copy_descr_attr(dev, descr, attr) \
+ ((dev)->attr = (descr)->attr) \
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+static struct {
+ enum usb_device_speed speed;
+ const char *name;
+} speed_names[] = {
+ {
+ .speed = USB_SPEED_UNKNOWN,
+ .name = "UNKNOWN",
+ },
+ {
+ .speed = USB_SPEED_LOW,
+ .name = "low-speed",
+ },
+ {
+ .speed = USB_SPEED_FULL,
+ .name = "full-speed",
+ },
+ {
+ .speed = USB_SPEED_HIGH,
+ .name = "high-speed",
+ },
+ {
+ .speed = USB_SPEED_WIRELESS,
+ .name = "wireless",
+ },
+ {
+ .speed = USB_SPEED_SUPER,
+ .name = "super-speed",
+ },
+};
+
+static
+int read_usb_vudc_device(struct udev_device *sdev, struct usbip_usb_device *dev)
+{
+ const char *path, *name;
+ char filepath[SYSFS_PATH_MAX];
+ struct usb_device_descriptor descr;
+ unsigned int i;
+ FILE *fd = NULL;
+ struct udev_device *plat;
+ const char *speed;
+ size_t ret;
+
+ plat = udev_device_get_parent(sdev);
+ path = udev_device_get_syspath(plat);
+ snprintf(filepath, SYSFS_PATH_MAX, "%s/%s",
+ path, VUDC_DEVICE_DESCR_FILE);
+ fd = fopen(filepath, "r");
+ if (!fd)
+ return -1;
+ ret = fread((char *) &descr, sizeof(descr), 1, fd);
+ if (ret != 1) {
+ err("Cannot read vudc device descr file: %s", strerror(errno));
+ goto err;
+ }
+ fclose(fd);
+
+ copy_descr_attr(dev, &descr, bDeviceClass);
+ copy_descr_attr(dev, &descr, bDeviceSubClass);
+ copy_descr_attr(dev, &descr, bDeviceProtocol);
+ copy_descr_attr(dev, &descr, bNumConfigurations);
+ copy_descr_attr16(dev, &descr, idVendor);
+ copy_descr_attr16(dev, &descr, idProduct);
+ copy_descr_attr16(dev, &descr, bcdDevice);
+
+ strncpy(dev->path, path, SYSFS_PATH_MAX);
+
+ dev->speed = USB_SPEED_UNKNOWN;
+ speed = udev_device_get_sysattr_value(sdev, "current_speed");
+ if (speed) {
+ for (i = 0; i < ARRAY_SIZE(speed_names); i++) {
+ if (!strcmp(speed_names[i].name, speed)) {
+ dev->speed = speed_names[i].speed;
+ break;
+ }
+ }
+ }
+
+ /* Only used for user output, little sense to output them in general */
+ dev->bNumInterfaces = 0;
+ dev->bConfigurationValue = 0;
+ dev->busnum = 0;
+
+ name = udev_device_get_sysname(plat);
+ strncpy(dev->busid, name, SYSFS_BUS_ID_SIZE);
+ return 0;
+err:
+ fclose(fd);
+ return -1;
+}
+
+static int is_my_device(struct udev_device *dev)
+{
+ const char *driver;
+
+ driver = udev_device_get_property_value(dev, "USB_UDC_NAME");
+ return driver != NULL && !strcmp(driver, USBIP_DEVICE_DRV_NAME);
+}
+
+static int usbip_device_driver_open(struct usbip_host_driver *hdriver)
+{
+ int ret;
+
+ hdriver->ndevs = 0;
+ INIT_LIST_HEAD(&hdriver->edev_list);
+
+ ret = usbip_generic_driver_open(hdriver);
+ if (ret)
+ err("please load " USBIP_CORE_MOD_NAME ".ko and "
+ USBIP_DEVICE_DRV_NAME ".ko!");
+
+ return ret;
+}
+
+struct usbip_host_driver device_driver = {
+ .edev_list = LIST_HEAD_INIT(device_driver.edev_list),
+ .udev_subsystem = "udc",
+ .ops = {
+ .open = usbip_device_driver_open,
+ .close = usbip_generic_driver_close,
+ .refresh_device_list = usbip_generic_refresh_device_list,
+ .get_device = usbip_generic_get_device,
+ .read_device = read_usb_vudc_device,
+ .is_my_device = is_my_device,
+ },
+};
diff --git a/tools/usb/usbip/libsrc/usbip_device_driver.h b/tools/usb/usbip/libsrc/usbip_device_driver.h
new file mode 100644
index 000000000..54cb658b3
--- /dev/null
+++ b/tools/usb/usbip/libsrc/usbip_device_driver.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2015 Karol Kosik <karo9@interia.eu>
+ * 2015 Samsung Electronics
+ * Author: Igor Kotrasinski <i.kotrasinsk@samsung.com>
+ *
+ * Based on tools/usb/usbip/libsrc/usbip_host_driver.c, which is:
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __USBIP_DEVICE_DRIVER_H
+#define __USBIP_DEVICE_DRIVER_H
+
+#include <stdint.h>
+#include "usbip_common.h"
+#include "usbip_host_common.h"
+#include "list.h"
+
+extern struct usbip_host_driver device_driver;
+
+#endif /* __USBIP_DEVICE_DRIVER_H */
diff --git a/tools/usb/usbip/libsrc/usbip_host_common.c b/tools/usb/usbip/libsrc/usbip_host_common.c
new file mode 100644
index 000000000..84d0fa859
--- /dev/null
+++ b/tools/usb/usbip/libsrc/usbip_host_common.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2015-2016 Samsung Electronics
+ * Igor Kotrasinski <i.kotrasinsk@samsung.com>
+ * Krzysztof Opasiak <k.opasiak@samsung.com>
+ *
+ * Refactored from usbip_host_driver.c, which is:
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <errno.h>
+#include <unistd.h>
+
+#include <libudev.h>
+
+#include "usbip_common.h"
+#include "usbip_host_common.h"
+#include "list.h"
+#include "sysfs_utils.h"
+
+extern struct udev *udev_context;
+
+static int32_t read_attr_usbip_status(struct usbip_usb_device *udev)
+{
+ char status_attr_path[SYSFS_PATH_MAX];
+ int size;
+ int fd;
+ int length;
+ char status[2] = { 0 };
+ int value = 0;
+
+ size = snprintf(status_attr_path, sizeof(status_attr_path),
+ "%s/usbip_status", udev->path);
+ if (size < 0 || (unsigned int)size >= sizeof(status_attr_path)) {
+ err("usbip_status path length %i >= %lu or < 0", size,
+ (long unsigned)sizeof(status_attr_path));
+ return -1;
+ }
+
+
+ fd = open(status_attr_path, O_RDONLY);
+ if (fd < 0) {
+ err("error opening attribute %s", status_attr_path);
+ return -1;
+ }
+
+ length = read(fd, status, 1);
+ if (length < 0) {
+ err("error reading attribute %s", status_attr_path);
+ close(fd);
+ return -1;
+ }
+
+ value = atoi(status);
+ close(fd);
+ return value;
+}
+
+static
+struct usbip_exported_device *usbip_exported_device_new(
+ struct usbip_host_driver *hdriver, const char *sdevpath)
+{
+ struct usbip_exported_device *edev = NULL;
+ struct usbip_exported_device *edev_old;
+ size_t size;
+ int i;
+
+ edev = calloc(1, sizeof(struct usbip_exported_device));
+
+ edev->sudev =
+ udev_device_new_from_syspath(udev_context, sdevpath);
+ if (!edev->sudev) {
+ err("udev_device_new_from_syspath: %s", sdevpath);
+ goto err;
+ }
+
+ if (hdriver->ops.read_device(edev->sudev, &edev->udev) < 0)
+ goto err;
+
+ edev->status = read_attr_usbip_status(&edev->udev);
+ if (edev->status < 0)
+ goto err;
+
+ /* reallocate buffer to include usb interface data */
+ size = sizeof(struct usbip_exported_device) +
+ edev->udev.bNumInterfaces * sizeof(struct usbip_usb_interface);
+
+ edev_old = edev;
+ edev = realloc(edev, size);
+ if (!edev) {
+ edev = edev_old;
+ dbg("realloc failed");
+ goto err;
+ }
+
+ for (i = 0; i < edev->udev.bNumInterfaces; i++) {
+ /* vudc does not support reading interfaces */
+ if (!hdriver->ops.read_interface)
+ break;
+ hdriver->ops.read_interface(&edev->udev, i, &edev->uinf[i]);
+ }
+
+ return edev;
+err:
+ if (edev->sudev)
+ udev_device_unref(edev->sudev);
+ if (edev)
+ free(edev);
+
+ return NULL;
+}
+
+static int refresh_exported_devices(struct usbip_host_driver *hdriver)
+{
+ struct usbip_exported_device *edev;
+ struct udev_enumerate *enumerate;
+ struct udev_list_entry *devices, *dev_list_entry;
+ struct udev_device *dev;
+ const char *path;
+
+ enumerate = udev_enumerate_new(udev_context);
+ udev_enumerate_add_match_subsystem(enumerate, hdriver->udev_subsystem);
+ udev_enumerate_scan_devices(enumerate);
+
+ devices = udev_enumerate_get_list_entry(enumerate);
+
+ udev_list_entry_foreach(dev_list_entry, devices) {
+ path = udev_list_entry_get_name(dev_list_entry);
+ dev = udev_device_new_from_syspath(udev_context,
+ path);
+ if (dev == NULL)
+ continue;
+
+ /* Check whether device uses usbip driver. */
+ if (hdriver->ops.is_my_device(dev)) {
+ edev = usbip_exported_device_new(hdriver, path);
+ if (!edev) {
+ dbg("usbip_exported_device_new failed");
+ continue;
+ }
+
+ list_add(&edev->node, &hdriver->edev_list);
+ hdriver->ndevs++;
+ }
+ }
+
+ return 0;
+}
+
+static void usbip_exported_device_destroy(struct list_head *devs)
+{
+ struct list_head *i, *tmp;
+ struct usbip_exported_device *edev;
+
+ list_for_each_safe(i, tmp, devs) {
+ edev = list_entry(i, struct usbip_exported_device, node);
+ list_del(i);
+ free(edev);
+ }
+}
+
+int usbip_generic_driver_open(struct usbip_host_driver *hdriver)
+{
+ int rc;
+
+ udev_context = udev_new();
+ if (!udev_context) {
+ err("udev_new failed");
+ return -1;
+ }
+
+ rc = refresh_exported_devices(hdriver);
+ if (rc < 0)
+ goto err;
+ return 0;
+err:
+ udev_unref(udev_context);
+ return -1;
+}
+
+void usbip_generic_driver_close(struct usbip_host_driver *hdriver)
+{
+ if (!hdriver)
+ return;
+
+ usbip_exported_device_destroy(&hdriver->edev_list);
+
+ udev_unref(udev_context);
+}
+
+int usbip_generic_refresh_device_list(struct usbip_host_driver *hdriver)
+{
+ int rc;
+
+ usbip_exported_device_destroy(&hdriver->edev_list);
+
+ hdriver->ndevs = 0;
+ INIT_LIST_HEAD(&hdriver->edev_list);
+
+ rc = refresh_exported_devices(hdriver);
+ if (rc < 0)
+ return -1;
+
+ return 0;
+}
+
+int usbip_export_device(struct usbip_exported_device *edev, int sockfd)
+{
+ char attr_name[] = "usbip_sockfd";
+ char sockfd_attr_path[SYSFS_PATH_MAX];
+ int size;
+ char sockfd_buff[30];
+ int ret;
+
+ if (edev->status != SDEV_ST_AVAILABLE) {
+ dbg("device not available: %s", edev->udev.busid);
+ switch (edev->status) {
+ case SDEV_ST_ERROR:
+ dbg("status SDEV_ST_ERROR");
+ ret = ST_DEV_ERR;
+ break;
+ case SDEV_ST_USED:
+ dbg("status SDEV_ST_USED");
+ ret = ST_DEV_BUSY;
+ break;
+ default:
+ dbg("status unknown: 0x%x", edev->status);
+ ret = -1;
+ }
+ return ret;
+ }
+
+ /* only the first interface is true */
+ size = snprintf(sockfd_attr_path, sizeof(sockfd_attr_path), "%s/%s",
+ edev->udev.path, attr_name);
+ if (size < 0 || (unsigned int)size >= sizeof(sockfd_attr_path)) {
+ err("exported device path length %i >= %lu or < 0", size,
+ (long unsigned)sizeof(sockfd_attr_path));
+ return -1;
+ }
+
+ size = snprintf(sockfd_buff, sizeof(sockfd_buff), "%d\n", sockfd);
+ if (size < 0 || (unsigned int)size >= sizeof(sockfd_buff)) {
+ err("socket length %i >= %lu or < 0", size,
+ (long unsigned)sizeof(sockfd_buff));
+ return -1;
+ }
+
+ ret = write_sysfs_attribute(sockfd_attr_path, sockfd_buff,
+ strlen(sockfd_buff));
+ if (ret < 0) {
+ err("write_sysfs_attribute failed: sockfd %s to %s",
+ sockfd_buff, sockfd_attr_path);
+ return ret;
+ }
+
+ info("connect: %s", edev->udev.busid);
+
+ return ret;
+}
+
+struct usbip_exported_device *usbip_generic_get_device(
+ struct usbip_host_driver *hdriver, int num)
+{
+ struct list_head *i;
+ struct usbip_exported_device *edev;
+ int cnt = 0;
+
+ list_for_each(i, &hdriver->edev_list) {
+ edev = list_entry(i, struct usbip_exported_device, node);
+ if (num == cnt)
+ return edev;
+ cnt++;
+ }
+
+ return NULL;
+}
diff --git a/tools/usb/usbip/libsrc/usbip_host_common.h b/tools/usb/usbip/libsrc/usbip_host_common.h
new file mode 100644
index 000000000..a64b8033f
--- /dev/null
+++ b/tools/usb/usbip/libsrc/usbip_host_common.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2015-2016 Samsung Electronics
+ * Igor Kotrasinski <i.kotrasinsk@samsung.com>
+ * Krzysztof Opasiak <k.opasiak@samsung.com>
+ *
+ * Refactored from usbip_host_driver.c, which is:
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __USBIP_HOST_COMMON_H
+#define __USBIP_HOST_COMMON_H
+
+#include <stdint.h>
+#include <libudev.h>
+#include <errno.h>
+#include "list.h"
+#include "usbip_common.h"
+#include "sysfs_utils.h"
+
+struct usbip_host_driver;
+
+struct usbip_host_driver_ops {
+ int (*open)(struct usbip_host_driver *hdriver);
+ void (*close)(struct usbip_host_driver *hdriver);
+ int (*refresh_device_list)(struct usbip_host_driver *hdriver);
+ struct usbip_exported_device * (*get_device)(
+ struct usbip_host_driver *hdriver, int num);
+
+ int (*read_device)(struct udev_device *sdev,
+ struct usbip_usb_device *dev);
+ int (*read_interface)(struct usbip_usb_device *udev, int i,
+ struct usbip_usb_interface *uinf);
+ int (*is_my_device)(struct udev_device *udev);
+};
+
+struct usbip_host_driver {
+ int ndevs;
+ /* list of exported device */
+ struct list_head edev_list;
+ const char *udev_subsystem;
+ struct usbip_host_driver_ops ops;
+};
+
+struct usbip_exported_device {
+ struct udev_device *sudev;
+ int32_t status;
+ struct usbip_usb_device udev;
+ struct list_head node;
+ struct usbip_usb_interface uinf[];
+};
+
+/* External API to access the driver */
+static inline int usbip_driver_open(struct usbip_host_driver *hdriver)
+{
+ if (!hdriver->ops.open)
+ return -EOPNOTSUPP;
+ return hdriver->ops.open(hdriver);
+}
+
+static inline void usbip_driver_close(struct usbip_host_driver *hdriver)
+{
+ if (!hdriver->ops.close)
+ return;
+ hdriver->ops.close(hdriver);
+}
+
+static inline int usbip_refresh_device_list(struct usbip_host_driver *hdriver)
+{
+ if (!hdriver->ops.refresh_device_list)
+ return -EOPNOTSUPP;
+ return hdriver->ops.refresh_device_list(hdriver);
+}
+
+static inline struct usbip_exported_device *
+usbip_get_device(struct usbip_host_driver *hdriver, int num)
+{
+ if (!hdriver->ops.get_device)
+ return NULL;
+ return hdriver->ops.get_device(hdriver, num);
+}
+
+/* Helper functions for implementing driver backend */
+int usbip_generic_driver_open(struct usbip_host_driver *hdriver);
+void usbip_generic_driver_close(struct usbip_host_driver *hdriver);
+int usbip_generic_refresh_device_list(struct usbip_host_driver *hdriver);
+int usbip_export_device(struct usbip_exported_device *edev, int sockfd);
+struct usbip_exported_device *usbip_generic_get_device(
+ struct usbip_host_driver *hdriver, int num);
+
+#endif /* __USBIP_HOST_COMMON_H */
diff --git a/tools/usb/usbip/libsrc/usbip_host_driver.c b/tools/usb/usbip/libsrc/usbip_host_driver.c
new file mode 100644
index 000000000..4de6edc54
--- /dev/null
+++ b/tools/usb/usbip/libsrc/usbip_host_driver.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ * Copyright (C) 2015-2016 Samsung Electronics
+ * Igor Kotrasinski <i.kotrasinsk@samsung.com>
+ * Krzysztof Opasiak <k.opasiak@samsung.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <unistd.h>
+#include <libudev.h>
+
+#include "usbip_host_common.h"
+#include "usbip_host_driver.h"
+
+#undef PROGNAME
+#define PROGNAME "libusbip"
+
+static int is_my_device(struct udev_device *dev)
+{
+ const char *driver;
+
+ driver = udev_device_get_driver(dev);
+ return driver != NULL && !strcmp(driver, USBIP_HOST_DRV_NAME);
+}
+
+static int usbip_host_driver_open(struct usbip_host_driver *hdriver)
+{
+ int ret;
+
+ hdriver->ndevs = 0;
+ INIT_LIST_HEAD(&hdriver->edev_list);
+
+ ret = usbip_generic_driver_open(hdriver);
+ if (ret)
+ err("please load " USBIP_CORE_MOD_NAME ".ko and "
+ USBIP_HOST_DRV_NAME ".ko!");
+ return ret;
+}
+
+struct usbip_host_driver host_driver = {
+ .edev_list = LIST_HEAD_INIT(host_driver.edev_list),
+ .udev_subsystem = "usb",
+ .ops = {
+ .open = usbip_host_driver_open,
+ .close = usbip_generic_driver_close,
+ .refresh_device_list = usbip_generic_refresh_device_list,
+ .get_device = usbip_generic_get_device,
+ .read_device = read_usb_device,
+ .read_interface = read_usb_interface,
+ .is_my_device = is_my_device,
+ },
+};
diff --git a/tools/usb/usbip/libsrc/usbip_host_driver.h b/tools/usb/usbip/libsrc/usbip_host_driver.h
new file mode 100644
index 000000000..77f07e72a
--- /dev/null
+++ b/tools/usb/usbip/libsrc/usbip_host_driver.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ * Copyright (C) 2015-2016 Samsung Electronics
+ * Igor Kotrasinski <i.kotrasinsk@samsung.com>
+ * Krzysztof Opasiak <k.opasiak@samsung.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __USBIP_HOST_DRIVER_H
+#define __USBIP_HOST_DRIVER_H
+
+#include <stdint.h>
+#include "usbip_common.h"
+#include "list.h"
+#include "usbip_host_common.h"
+
+extern struct usbip_host_driver host_driver;
+
+#endif /* __USBIP_HOST_DRIVER_H */
diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c
new file mode 100644
index 000000000..8159fd986
--- /dev/null
+++ b/tools/usb/usbip/libsrc/vhci_driver.c
@@ -0,0 +1,467 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2005-2007 Takahiro Hirofuchi
+ */
+
+#include "usbip_common.h"
+#include "vhci_driver.h"
+#include <limits.h>
+#include <netdb.h>
+#include <libudev.h>
+#include <dirent.h>
+#include "sysfs_utils.h"
+
+#undef PROGNAME
+#define PROGNAME "libusbip"
+
+struct usbip_vhci_driver *vhci_driver;
+struct udev *udev_context;
+
+static struct usbip_imported_device *
+imported_device_init(struct usbip_imported_device *idev, char *busid)
+{
+ struct udev_device *sudev;
+
+ sudev = udev_device_new_from_subsystem_sysname(udev_context,
+ "usb", busid);
+ if (!sudev) {
+ dbg("udev_device_new_from_subsystem_sysname failed: %s", busid);
+ goto err;
+ }
+ read_usb_device(sudev, &idev->udev);
+ udev_device_unref(sudev);
+
+ return idev;
+
+err:
+ return NULL;
+}
+
+static int parse_status(const char *value)
+{
+ int ret = 0;
+ char *c;
+
+ /* skip a header line */
+ c = strchr(value, '\n');
+ if (!c)
+ return -1;
+ c++;
+
+ while (*c != '\0') {
+ int port, status, speed, devid;
+ int sockfd;
+ char lbusid[SYSFS_BUS_ID_SIZE];
+ struct usbip_imported_device *idev;
+ char hub[3];
+
+ ret = sscanf(c, "%2s %d %d %d %x %u %31s\n",
+ hub, &port, &status, &speed,
+ &devid, &sockfd, lbusid);
+
+ if (ret < 5) {
+ dbg("sscanf failed: %d", ret);
+ BUG();
+ }
+
+ dbg("hub %s port %d status %d speed %d devid %x",
+ hub, port, status, speed, devid);
+ dbg("sockfd %u lbusid %s", sockfd, lbusid);
+
+ /* if a device is connected, look at it */
+ idev = &vhci_driver->idev[port];
+ memset(idev, 0, sizeof(*idev));
+
+ if (strncmp("hs", hub, 2) == 0)
+ idev->hub = HUB_SPEED_HIGH;
+ else /* strncmp("ss", hub, 2) == 0 */
+ idev->hub = HUB_SPEED_SUPER;
+
+ idev->port = port;
+ idev->status = status;
+
+ idev->devid = devid;
+
+ idev->busnum = (devid >> 16);
+ idev->devnum = (devid & 0x0000ffff);
+
+ if (idev->status != VDEV_ST_NULL
+ && idev->status != VDEV_ST_NOTASSIGNED) {
+ idev = imported_device_init(idev, lbusid);
+ if (!idev) {
+ dbg("imported_device_init failed");
+ return -1;
+ }
+ }
+
+ /* go to the next line */
+ c = strchr(c, '\n');
+ if (!c)
+ break;
+ c++;
+ }
+
+ dbg("exit");
+
+ return 0;
+}
+
+#define MAX_STATUS_NAME 18
+
+static int refresh_imported_device_list(void)
+{
+ const char *attr_status;
+ char status[MAX_STATUS_NAME+1] = "status";
+ int i, ret;
+
+ for (i = 0; i < vhci_driver->ncontrollers; i++) {
+ if (i > 0)
+ snprintf(status, sizeof(status), "status.%d", i);
+
+ attr_status = udev_device_get_sysattr_value(vhci_driver->hc_device,
+ status);
+ if (!attr_status) {
+ err("udev_device_get_sysattr_value failed");
+ return -1;
+ }
+
+ dbg("controller %d", i);
+
+ ret = parse_status(attr_status);
+ if (ret != 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int get_nports(struct udev_device *hc_device)
+{
+ const char *attr_nports;
+
+ attr_nports = udev_device_get_sysattr_value(hc_device, "nports");
+ if (!attr_nports) {
+ err("udev_device_get_sysattr_value nports failed");
+ return -1;
+ }
+
+ return (int)strtoul(attr_nports, NULL, 10);
+}
+
+static int vhci_hcd_filter(const struct dirent *dirent)
+{
+ return !strncmp(dirent->d_name, "vhci_hcd.", 9);
+}
+
+static int get_ncontrollers(void)
+{
+ struct dirent **namelist;
+ struct udev_device *platform;
+ int n;
+
+ platform = udev_device_get_parent(vhci_driver->hc_device);
+ if (platform == NULL)
+ return -1;
+
+ n = scandir(udev_device_get_syspath(platform), &namelist, vhci_hcd_filter, NULL);
+ if (n < 0)
+ err("scandir failed");
+ else {
+ for (int i = 0; i < n; i++)
+ free(namelist[i]);
+ free(namelist);
+ }
+
+ return n;
+}
+
+/*
+ * Read the given port's record.
+ *
+ * To avoid buffer overflow we will read the entire line and
+ * validate each part's size. The initial buffer is padded by 4 to
+ * accommodate the 2 spaces, 1 newline and an additional character
+ * which is needed to properly validate the 3rd part without it being
+ * truncated to an acceptable length.
+ */
+static int read_record(int rhport, char *host, unsigned long host_len,
+ char *port, unsigned long port_len, char *busid)
+{
+ int part;
+ FILE *file;
+ char path[PATH_MAX+1];
+ char *buffer, *start, *end;
+ char delim[] = {' ', ' ', '\n'};
+ int max_len[] = {(int)host_len, (int)port_len, SYSFS_BUS_ID_SIZE};
+ size_t buffer_len = host_len + port_len + SYSFS_BUS_ID_SIZE + 4;
+
+ buffer = malloc(buffer_len);
+ if (!buffer)
+ return -1;
+
+ snprintf(path, PATH_MAX, VHCI_STATE_PATH"/port%d", rhport);
+
+ file = fopen(path, "r");
+ if (!file) {
+ err("fopen");
+ free(buffer);
+ return -1;
+ }
+
+ if (fgets(buffer, buffer_len, file) == NULL) {
+ err("fgets");
+ free(buffer);
+ fclose(file);
+ return -1;
+ }
+ fclose(file);
+
+ /* validate the length of each of the 3 parts */
+ start = buffer;
+ for (part = 0; part < 3; part++) {
+ end = strchr(start, delim[part]);
+ if (end == NULL || (end - start) > max_len[part]) {
+ free(buffer);
+ return -1;
+ }
+ start = end + 1;
+ }
+
+ if (sscanf(buffer, "%s %s %s\n", host, port, busid) != 3) {
+ err("sscanf");
+ free(buffer);
+ return -1;
+ }
+
+ free(buffer);
+
+ return 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int usbip_vhci_driver_open(void)
+{
+ int nports;
+ struct udev_device *hc_device;
+
+ udev_context = udev_new();
+ if (!udev_context) {
+ err("udev_new failed");
+ return -1;
+ }
+
+ /* will be freed in usbip_driver_close() */
+ hc_device =
+ udev_device_new_from_subsystem_sysname(udev_context,
+ USBIP_VHCI_BUS_TYPE,
+ USBIP_VHCI_DEVICE_NAME);
+ if (!hc_device) {
+ err("udev_device_new_from_subsystem_sysname failed");
+ goto err;
+ }
+
+ nports = get_nports(hc_device);
+ if (nports <= 0) {
+ err("no available ports");
+ goto err;
+ }
+ dbg("available ports: %d", nports);
+
+ vhci_driver = calloc(1, sizeof(struct usbip_vhci_driver) +
+ nports * sizeof(struct usbip_imported_device));
+ if (!vhci_driver) {
+ err("vhci_driver allocation failed");
+ goto err;
+ }
+
+ vhci_driver->nports = nports;
+ vhci_driver->hc_device = hc_device;
+ vhci_driver->ncontrollers = get_ncontrollers();
+ dbg("available controllers: %d", vhci_driver->ncontrollers);
+
+ if (vhci_driver->ncontrollers <=0) {
+ err("no available usb controllers");
+ goto err;
+ }
+
+ if (refresh_imported_device_list())
+ goto err;
+
+ return 0;
+
+err:
+ udev_device_unref(hc_device);
+
+ if (vhci_driver)
+ free(vhci_driver);
+
+ vhci_driver = NULL;
+
+ udev_unref(udev_context);
+
+ return -1;
+}
+
+
+void usbip_vhci_driver_close(void)
+{
+ if (!vhci_driver)
+ return;
+
+ udev_device_unref(vhci_driver->hc_device);
+
+ free(vhci_driver);
+
+ vhci_driver = NULL;
+
+ udev_unref(udev_context);
+}
+
+
+int usbip_vhci_refresh_device_list(void)
+{
+
+ if (refresh_imported_device_list())
+ goto err;
+
+ return 0;
+err:
+ dbg("failed to refresh device list");
+ return -1;
+}
+
+
+int usbip_vhci_get_free_port(uint32_t speed)
+{
+ for (int i = 0; i < vhci_driver->nports; i++) {
+
+ switch (speed) {
+ case USB_SPEED_SUPER:
+ if (vhci_driver->idev[i].hub != HUB_SPEED_SUPER)
+ continue;
+ break;
+ default:
+ if (vhci_driver->idev[i].hub != HUB_SPEED_HIGH)
+ continue;
+ break;
+ }
+
+ if (vhci_driver->idev[i].status == VDEV_ST_NULL)
+ return vhci_driver->idev[i].port;
+ }
+
+ return -1;
+}
+
+int usbip_vhci_attach_device2(uint8_t port, int sockfd, uint32_t devid,
+ uint32_t speed) {
+ char buff[200]; /* what size should be ? */
+ char attach_attr_path[SYSFS_PATH_MAX];
+ char attr_attach[] = "attach";
+ const char *path;
+ int ret;
+
+ snprintf(buff, sizeof(buff), "%u %d %u %u",
+ port, sockfd, devid, speed);
+ dbg("writing: %s", buff);
+
+ path = udev_device_get_syspath(vhci_driver->hc_device);
+ snprintf(attach_attr_path, sizeof(attach_attr_path), "%s/%s",
+ path, attr_attach);
+ dbg("attach attribute path: %s", attach_attr_path);
+
+ ret = write_sysfs_attribute(attach_attr_path, buff, strlen(buff));
+ if (ret < 0) {
+ dbg("write_sysfs_attribute failed");
+ return -1;
+ }
+
+ dbg("attached port: %d", port);
+
+ return 0;
+}
+
+static unsigned long get_devid(uint8_t busnum, uint8_t devnum)
+{
+ return (busnum << 16) | devnum;
+}
+
+/* will be removed */
+int usbip_vhci_attach_device(uint8_t port, int sockfd, uint8_t busnum,
+ uint8_t devnum, uint32_t speed)
+{
+ int devid = get_devid(busnum, devnum);
+
+ return usbip_vhci_attach_device2(port, sockfd, devid, speed);
+}
+
+int usbip_vhci_detach_device(uint8_t port)
+{
+ char detach_attr_path[SYSFS_PATH_MAX];
+ char attr_detach[] = "detach";
+ char buff[200]; /* what size should be ? */
+ const char *path;
+ int ret;
+
+ snprintf(buff, sizeof(buff), "%u", port);
+ dbg("writing: %s", buff);
+
+ path = udev_device_get_syspath(vhci_driver->hc_device);
+ snprintf(detach_attr_path, sizeof(detach_attr_path), "%s/%s",
+ path, attr_detach);
+ dbg("detach attribute path: %s", detach_attr_path);
+
+ ret = write_sysfs_attribute(detach_attr_path, buff, strlen(buff));
+ if (ret < 0) {
+ dbg("write_sysfs_attribute failed");
+ return -1;
+ }
+
+ dbg("detached port: %d", port);
+
+ return 0;
+}
+
+int usbip_vhci_imported_device_dump(struct usbip_imported_device *idev)
+{
+ char product_name[100];
+ char host[NI_MAXHOST] = "unknown host";
+ char serv[NI_MAXSERV] = "unknown port";
+ char remote_busid[SYSFS_BUS_ID_SIZE];
+ int ret;
+ int read_record_error = 0;
+
+ if (idev->status == VDEV_ST_NULL || idev->status == VDEV_ST_NOTASSIGNED)
+ return 0;
+
+ ret = read_record(idev->port, host, sizeof(host), serv, sizeof(serv),
+ remote_busid);
+ if (ret) {
+ err("read_record");
+ read_record_error = 1;
+ }
+
+ printf("Port %02d: <%s> at %s\n", idev->port,
+ usbip_status_string(idev->status),
+ usbip_speed_string(idev->udev.speed));
+
+ usbip_names_get_product(product_name, sizeof(product_name),
+ idev->udev.idVendor, idev->udev.idProduct);
+
+ printf(" %s\n", product_name);
+
+ if (!read_record_error) {
+ printf("%10s -> usbip://%s:%s/%s\n", idev->udev.busid,
+ host, serv, remote_busid);
+ printf("%10s -> remote bus/dev %03d/%03d\n", " ",
+ idev->busnum, idev->devnum);
+ } else {
+ printf("%10s -> unknown host, remote port and remote busid\n",
+ idev->udev.busid);
+ printf("%10s -> remote bus/dev %03d/%03d\n", " ",
+ idev->busnum, idev->devnum);
+ }
+
+ return 0;
+}
diff --git a/tools/usb/usbip/libsrc/vhci_driver.h b/tools/usb/usbip/libsrc/vhci_driver.h
new file mode 100644
index 000000000..6c9aca216
--- /dev/null
+++ b/tools/usb/usbip/libsrc/vhci_driver.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2005-2007 Takahiro Hirofuchi
+ */
+
+#ifndef __VHCI_DRIVER_H
+#define __VHCI_DRIVER_H
+
+#include <libudev.h>
+#include <stdint.h>
+
+#include "usbip_common.h"
+
+#define USBIP_VHCI_BUS_TYPE "platform"
+#define USBIP_VHCI_DEVICE_NAME "vhci_hcd.0"
+
+enum hub_speed {
+ HUB_SPEED_HIGH = 0,
+ HUB_SPEED_SUPER,
+};
+
+struct usbip_imported_device {
+ enum hub_speed hub;
+ uint8_t port;
+ uint32_t status;
+
+ uint32_t devid;
+
+ uint8_t busnum;
+ uint8_t devnum;
+
+ /* usbip_class_device list */
+ struct usbip_usb_device udev;
+};
+
+struct usbip_vhci_driver {
+
+ /* /sys/devices/platform/vhci_hcd */
+ struct udev_device *hc_device;
+
+ int ncontrollers;
+ int nports;
+ struct usbip_imported_device idev[];
+};
+
+
+extern struct usbip_vhci_driver *vhci_driver;
+
+int usbip_vhci_driver_open(void);
+void usbip_vhci_driver_close(void);
+
+int usbip_vhci_refresh_device_list(void);
+
+
+int usbip_vhci_get_free_port(uint32_t speed);
+int usbip_vhci_attach_device2(uint8_t port, int sockfd, uint32_t devid,
+ uint32_t speed);
+
+/* will be removed */
+int usbip_vhci_attach_device(uint8_t port, int sockfd, uint8_t busnum,
+ uint8_t devnum, uint32_t speed);
+
+int usbip_vhci_detach_device(uint8_t port);
+
+int usbip_vhci_imported_device_dump(struct usbip_imported_device *idev);
+
+#endif /* __VHCI_DRIVER_H */
diff --git a/tools/usb/usbip/src/Makefile.am b/tools/usb/usbip/src/Makefile.am
new file mode 100644
index 000000000..e26f39e05
--- /dev/null
+++ b/tools/usb/usbip/src/Makefile.am
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+AM_CPPFLAGS = -I$(top_srcdir)/libsrc -DUSBIDS_FILE='"@USBIDS_DIR@/usb.ids"'
+AM_CFLAGS = @EXTRA_CFLAGS@
+LDADD = $(top_builddir)/libsrc/libusbip.la
+
+sbin_PROGRAMS := usbip usbipd
+
+usbip_SOURCES := usbip.h utils.h usbip.c utils.c usbip_network.c \
+ usbip_attach.c usbip_detach.c usbip_list.c \
+ usbip_bind.c usbip_unbind.c usbip_port.c
+
+usbipd_SOURCES := usbip_network.h usbipd.c usbip_network.c
diff --git a/tools/usb/usbip/src/usbip.c b/tools/usb/usbip/src/usbip.c
new file mode 100644
index 000000000..73d8eee81
--- /dev/null
+++ b/tools/usb/usbip/src/usbip.c
@@ -0,0 +1,203 @@
+/*
+ * command structure borrowed from udev
+ * (git://git.kernel.org/pub/scm/linux/hotplug/udev.git)
+ *
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <getopt.h>
+#include <syslog.h>
+
+#include "usbip_common.h"
+#include "usbip_network.h"
+#include "usbip.h"
+
+static int usbip_help(int argc, char *argv[]);
+static int usbip_version(int argc, char *argv[]);
+
+static const char usbip_version_string[] = PACKAGE_STRING;
+
+static const char usbip_usage_string[] =
+ "usbip [--debug] [--log] [--tcp-port PORT] [version]\n"
+ " [help] <command> <args>\n";
+
+static void usbip_usage(void)
+{
+ printf("usage: %s", usbip_usage_string);
+}
+
+struct command {
+ const char *name;
+ int (*fn)(int argc, char *argv[]);
+ const char *help;
+ void (*usage)(void);
+};
+
+static const struct command cmds[] = {
+ {
+ .name = "help",
+ .fn = usbip_help,
+ .help = NULL,
+ .usage = NULL
+ },
+ {
+ .name = "version",
+ .fn = usbip_version,
+ .help = NULL,
+ .usage = NULL
+ },
+ {
+ .name = "attach",
+ .fn = usbip_attach,
+ .help = "Attach a remote USB device",
+ .usage = usbip_attach_usage
+ },
+ {
+ .name = "detach",
+ .fn = usbip_detach,
+ .help = "Detach a remote USB device",
+ .usage = usbip_detach_usage
+ },
+ {
+ .name = "list",
+ .fn = usbip_list,
+ .help = "List exportable or local USB devices",
+ .usage = usbip_list_usage
+ },
+ {
+ .name = "bind",
+ .fn = usbip_bind,
+ .help = "Bind device to " USBIP_HOST_DRV_NAME ".ko",
+ .usage = usbip_bind_usage
+ },
+ {
+ .name = "unbind",
+ .fn = usbip_unbind,
+ .help = "Unbind device from " USBIP_HOST_DRV_NAME ".ko",
+ .usage = usbip_unbind_usage
+ },
+ {
+ .name = "port",
+ .fn = usbip_port_show,
+ .help = "Show imported USB devices",
+ .usage = NULL
+ },
+ { NULL, NULL, NULL, NULL }
+};
+
+static int usbip_help(int argc, char *argv[])
+{
+ const struct command *cmd;
+ int i;
+ int ret = 0;
+
+ if (argc > 1 && argv++) {
+ for (i = 0; cmds[i].name != NULL; i++)
+ if (!strcmp(cmds[i].name, argv[0]) && cmds[i].usage) {
+ cmds[i].usage();
+ goto done;
+ }
+ ret = -1;
+ }
+
+ usbip_usage();
+ printf("\n");
+ for (cmd = cmds; cmd->name != NULL; cmd++)
+ if (cmd->help != NULL)
+ printf(" %-10s %s\n", cmd->name, cmd->help);
+ printf("\n");
+done:
+ return ret;
+}
+
+static int usbip_version(int argc, char *argv[])
+{
+ (void) argc;
+ (void) argv;
+
+ printf(PROGNAME " (%s)\n", usbip_version_string);
+ return 0;
+}
+
+static int run_command(const struct command *cmd, int argc, char *argv[])
+{
+ dbg("running command: `%s'", cmd->name);
+ return cmd->fn(argc, argv);
+}
+
+int main(int argc, char *argv[])
+{
+ static const struct option opts[] = {
+ { "debug", no_argument, NULL, 'd' },
+ { "log", no_argument, NULL, 'l' },
+ { "tcp-port", required_argument, NULL, 't' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ char *cmd;
+ int opt;
+ int i, rc = -1;
+
+ usbip_use_stderr = 1;
+ opterr = 0;
+ for (;;) {
+ opt = getopt_long(argc, argv, "+dlt:", opts, NULL);
+
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'd':
+ usbip_use_debug = 1;
+ break;
+ case 'l':
+ usbip_use_syslog = 1;
+ openlog("", LOG_PID, LOG_USER);
+ break;
+ case 't':
+ usbip_setup_port_number(optarg);
+ break;
+ case '?':
+ printf("usbip: invalid option\n");
+ /* Terminate after printing error */
+ /* FALLTHRU */
+ default:
+ usbip_usage();
+ goto out;
+ }
+ }
+
+ cmd = argv[optind];
+ if (cmd) {
+ for (i = 0; cmds[i].name != NULL; i++)
+ if (!strcmp(cmds[i].name, cmd)) {
+ argc -= optind;
+ argv += optind;
+ optind = 0;
+ rc = run_command(&cmds[i], argc, argv);
+ goto out;
+ }
+ }
+
+ /* invalid command */
+ usbip_help(0, NULL);
+out:
+ return (rc > -1 ? EXIT_SUCCESS : EXIT_FAILURE);
+}
diff --git a/tools/usb/usbip/src/usbip.h b/tools/usb/usbip/src/usbip.h
new file mode 100644
index 000000000..84fe66a9d
--- /dev/null
+++ b/tools/usb/usbip/src/usbip.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __USBIP_H
+#define __USBIP_H
+
+#ifdef HAVE_CONFIG_H
+#include "../config.h"
+#endif
+
+/* usbip commands */
+int usbip_attach(int argc, char *argv[]);
+int usbip_detach(int argc, char *argv[]);
+int usbip_list(int argc, char *argv[]);
+int usbip_bind(int argc, char *argv[]);
+int usbip_unbind(int argc, char *argv[]);
+int usbip_port_show(int argc, char *argv[]);
+
+void usbip_attach_usage(void);
+void usbip_detach_usage(void);
+void usbip_list_usage(void);
+void usbip_bind_usage(void);
+void usbip_unbind_usage(void);
+
+#endif /* __USBIP_H */
diff --git a/tools/usb/usbip/src/usbip_attach.c b/tools/usb/usbip/src/usbip_attach.c
new file mode 100644
index 000000000..ba8872848
--- /dev/null
+++ b/tools/usb/usbip/src/usbip_attach.c
@@ -0,0 +1,255 @@
+/*
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ * Copyright (C) 2015-2016 Samsung Electronics
+ * Igor Kotrasinski <i.kotrasinsk@samsung.com>
+ * Krzysztof Opasiak <k.opasiak@samsung.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <sys/stat.h>
+
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "vhci_driver.h"
+#include "usbip_common.h"
+#include "usbip_network.h"
+#include "usbip.h"
+
+static const char usbip_attach_usage_string[] =
+ "usbip attach <args>\n"
+ " -r, --remote=<host> The machine with exported USB devices\n"
+ " -b, --busid=<busid> Busid of the device on <host>\n"
+ " -d, --device=<devid> Id of the virtual UDC on <host>\n";
+
+void usbip_attach_usage(void)
+{
+ printf("usage: %s", usbip_attach_usage_string);
+}
+
+#define MAX_BUFF 100
+static int record_connection(char *host, char *port, char *busid, int rhport)
+{
+ int fd;
+ char path[PATH_MAX+1];
+ char buff[MAX_BUFF+1];
+ int ret;
+
+ ret = mkdir(VHCI_STATE_PATH, 0700);
+ if (ret < 0) {
+ /* if VHCI_STATE_PATH exists, then it better be a directory */
+ if (errno == EEXIST) {
+ struct stat s;
+
+ ret = stat(VHCI_STATE_PATH, &s);
+ if (ret < 0)
+ return -1;
+ if (!(s.st_mode & S_IFDIR))
+ return -1;
+ } else
+ return -1;
+ }
+
+ snprintf(path, PATH_MAX, VHCI_STATE_PATH"/port%d", rhport);
+
+ fd = open(path, O_WRONLY|O_CREAT|O_TRUNC, S_IRWXU);
+ if (fd < 0)
+ return -1;
+
+ snprintf(buff, MAX_BUFF, "%s %s %s\n",
+ host, port, busid);
+
+ ret = write(fd, buff, strlen(buff));
+ if (ret != (ssize_t) strlen(buff)) {
+ close(fd);
+ return -1;
+ }
+
+ close(fd);
+
+ return 0;
+}
+
+static int import_device(int sockfd, struct usbip_usb_device *udev)
+{
+ int rc;
+ int port;
+ uint32_t speed = udev->speed;
+
+ rc = usbip_vhci_driver_open();
+ if (rc < 0) {
+ err("open vhci_driver");
+ goto err_out;
+ }
+
+ do {
+ port = usbip_vhci_get_free_port(speed);
+ if (port < 0) {
+ err("no free port");
+ goto err_driver_close;
+ }
+
+ dbg("got free port %d", port);
+
+ rc = usbip_vhci_attach_device(port, sockfd, udev->busnum,
+ udev->devnum, udev->speed);
+ if (rc < 0 && errno != EBUSY) {
+ err("import device");
+ goto err_driver_close;
+ }
+ } while (rc < 0);
+
+ usbip_vhci_driver_close();
+
+ return port;
+
+err_driver_close:
+ usbip_vhci_driver_close();
+err_out:
+ return -1;
+}
+
+static int query_import_device(int sockfd, char *busid)
+{
+ int rc;
+ struct op_import_request request;
+ struct op_import_reply reply;
+ uint16_t code = OP_REP_IMPORT;
+ int status;
+
+ memset(&request, 0, sizeof(request));
+ memset(&reply, 0, sizeof(reply));
+
+ /* send a request */
+ rc = usbip_net_send_op_common(sockfd, OP_REQ_IMPORT, 0);
+ if (rc < 0) {
+ err("send op_common");
+ return -1;
+ }
+
+ strncpy(request.busid, busid, SYSFS_BUS_ID_SIZE-1);
+
+ PACK_OP_IMPORT_REQUEST(0, &request);
+
+ rc = usbip_net_send(sockfd, (void *) &request, sizeof(request));
+ if (rc < 0) {
+ err("send op_import_request");
+ return -1;
+ }
+
+ /* receive a reply */
+ rc = usbip_net_recv_op_common(sockfd, &code, &status);
+ if (rc < 0) {
+ err("Attach Request for %s failed - %s\n",
+ busid, usbip_op_common_status_string(status));
+ return -1;
+ }
+
+ rc = usbip_net_recv(sockfd, (void *) &reply, sizeof(reply));
+ if (rc < 0) {
+ err("recv op_import_reply");
+ return -1;
+ }
+
+ PACK_OP_IMPORT_REPLY(0, &reply);
+
+ /* check the reply */
+ if (strncmp(reply.udev.busid, busid, SYSFS_BUS_ID_SIZE)) {
+ err("recv different busid %s", reply.udev.busid);
+ return -1;
+ }
+
+ /* import a device */
+ return import_device(sockfd, &reply.udev);
+}
+
+static int attach_device(char *host, char *busid)
+{
+ int sockfd;
+ int rc;
+ int rhport;
+
+ sockfd = usbip_net_tcp_connect(host, usbip_port_string);
+ if (sockfd < 0) {
+ err("tcp connect");
+ return -1;
+ }
+
+ rhport = query_import_device(sockfd, busid);
+ if (rhport < 0)
+ return -1;
+
+ close(sockfd);
+
+ rc = record_connection(host, usbip_port_string, busid, rhport);
+ if (rc < 0) {
+ err("record connection");
+ return -1;
+ }
+
+ return 0;
+}
+
+int usbip_attach(int argc, char *argv[])
+{
+ static const struct option opts[] = {
+ { "remote", required_argument, NULL, 'r' },
+ { "busid", required_argument, NULL, 'b' },
+ { "device", required_argument, NULL, 'd' },
+ { NULL, 0, NULL, 0 }
+ };
+ char *host = NULL;
+ char *busid = NULL;
+ int opt;
+ int ret = -1;
+
+ for (;;) {
+ opt = getopt_long(argc, argv, "d:r:b:", opts, NULL);
+
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'r':
+ host = optarg;
+ break;
+ case 'd':
+ case 'b':
+ busid = optarg;
+ break;
+ default:
+ goto err_out;
+ }
+ }
+
+ if (!host || !busid)
+ goto err_out;
+
+ ret = attach_device(host, busid);
+ goto out;
+
+err_out:
+ usbip_attach_usage();
+out:
+ return ret;
+}
diff --git a/tools/usb/usbip/src/usbip_bind.c b/tools/usb/usbip/src/usbip_bind.c
new file mode 100644
index 000000000..e121cfb17
--- /dev/null
+++ b/tools/usb/usbip/src/usbip_bind.c
@@ -0,0 +1,223 @@
+/*
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <libudev.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <getopt.h>
+
+#include "usbip_common.h"
+#include "utils.h"
+#include "usbip.h"
+#include "sysfs_utils.h"
+
+enum unbind_status {
+ UNBIND_ST_OK,
+ UNBIND_ST_USBIP_HOST,
+ UNBIND_ST_FAILED
+};
+
+static const char usbip_bind_usage_string[] =
+ "usbip bind <args>\n"
+ " -b, --busid=<busid> Bind " USBIP_HOST_DRV_NAME ".ko to device "
+ "on <busid>\n";
+
+void usbip_bind_usage(void)
+{
+ printf("usage: %s", usbip_bind_usage_string);
+}
+
+/* call at unbound state */
+static int bind_usbip(char *busid)
+{
+ char attr_name[] = "bind";
+ char bind_attr_path[SYSFS_PATH_MAX];
+ int rc = -1;
+
+ snprintf(bind_attr_path, sizeof(bind_attr_path), "%s/%s/%s/%s/%s/%s",
+ SYSFS_MNT_PATH, SYSFS_BUS_NAME, SYSFS_BUS_TYPE,
+ SYSFS_DRIVERS_NAME, USBIP_HOST_DRV_NAME, attr_name);
+
+ rc = write_sysfs_attribute(bind_attr_path, busid, strlen(busid));
+ if (rc < 0) {
+ err("error binding device %s to driver: %s", busid,
+ strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+/* buggy driver may cause dead lock */
+static int unbind_other(char *busid)
+{
+ enum unbind_status status = UNBIND_ST_OK;
+
+ char attr_name[] = "unbind";
+ char unbind_attr_path[SYSFS_PATH_MAX];
+ int rc = -1;
+
+ struct udev *udev;
+ struct udev_device *dev;
+ const char *driver;
+ const char *bDevClass;
+
+ /* Create libudev context. */
+ udev = udev_new();
+
+ /* Get the device. */
+ dev = udev_device_new_from_subsystem_sysname(udev, "usb", busid);
+ if (!dev) {
+ dbg("unable to find device with bus ID %s", busid);
+ goto err_close_busid_dev;
+ }
+
+ /* Check what kind of device it is. */
+ bDevClass = udev_device_get_sysattr_value(dev, "bDeviceClass");
+ if (!bDevClass) {
+ dbg("unable to get bDevClass device attribute");
+ goto err_close_busid_dev;
+ }
+
+ if (!strncmp(bDevClass, "09", strlen(bDevClass))) {
+ dbg("skip unbinding of hub");
+ goto err_close_busid_dev;
+ }
+
+ /* Get the device driver. */
+ driver = udev_device_get_driver(dev);
+ if (!driver) {
+ /* No driver bound to this device. */
+ goto out;
+ }
+
+ if (!strncmp(USBIP_HOST_DRV_NAME, driver,
+ strlen(USBIP_HOST_DRV_NAME))) {
+ /* Already bound to usbip-host. */
+ status = UNBIND_ST_USBIP_HOST;
+ goto out;
+ }
+
+ /* Unbind device from driver. */
+ snprintf(unbind_attr_path, sizeof(unbind_attr_path), "%s/%s/%s/%s/%s/%s",
+ SYSFS_MNT_PATH, SYSFS_BUS_NAME, SYSFS_BUS_TYPE,
+ SYSFS_DRIVERS_NAME, driver, attr_name);
+
+ rc = write_sysfs_attribute(unbind_attr_path, busid, strlen(busid));
+ if (rc < 0) {
+ err("error unbinding device %s from driver", busid);
+ goto err_close_busid_dev;
+ }
+
+ goto out;
+
+err_close_busid_dev:
+ status = UNBIND_ST_FAILED;
+out:
+ udev_device_unref(dev);
+ udev_unref(udev);
+
+ return status;
+}
+
+static int bind_device(char *busid)
+{
+ int rc;
+ struct udev *udev;
+ struct udev_device *dev;
+ const char *devpath;
+
+ /* Check whether the device with this bus ID exists. */
+ udev = udev_new();
+ dev = udev_device_new_from_subsystem_sysname(udev, "usb", busid);
+ if (!dev) {
+ err("device with the specified bus ID does not exist");
+ return -1;
+ }
+ devpath = udev_device_get_devpath(dev);
+ udev_unref(udev);
+
+ /* If the device is already attached to vhci_hcd - bail out */
+ if (strstr(devpath, USBIP_VHCI_DRV_NAME)) {
+ err("bind loop detected: device: %s is attached to %s\n",
+ devpath, USBIP_VHCI_DRV_NAME);
+ return -1;
+ }
+
+ rc = unbind_other(busid);
+ if (rc == UNBIND_ST_FAILED) {
+ err("could not unbind driver from device on busid %s", busid);
+ return -1;
+ } else if (rc == UNBIND_ST_USBIP_HOST) {
+ err("device on busid %s is already bound to %s", busid,
+ USBIP_HOST_DRV_NAME);
+ return -1;
+ }
+
+ rc = modify_match_busid(busid, 1);
+ if (rc < 0) {
+ err("unable to bind device on %s", busid);
+ return -1;
+ }
+
+ rc = bind_usbip(busid);
+ if (rc < 0) {
+ err("could not bind device to %s", USBIP_HOST_DRV_NAME);
+ modify_match_busid(busid, 0);
+ return -1;
+ }
+
+ info("bind device on busid %s: complete", busid);
+
+ return 0;
+}
+
+int usbip_bind(int argc, char *argv[])
+{
+ static const struct option opts[] = {
+ { "busid", required_argument, NULL, 'b' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ int opt;
+ int ret = -1;
+
+ for (;;) {
+ opt = getopt_long(argc, argv, "b:", opts, NULL);
+
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'b':
+ ret = bind_device(optarg);
+ goto out;
+ default:
+ goto err_out;
+ }
+ }
+
+err_out:
+ usbip_bind_usage();
+out:
+ return ret;
+}
diff --git a/tools/usb/usbip/src/usbip_detach.c b/tools/usb/usbip/src/usbip_detach.c
new file mode 100644
index 000000000..777f7286a
--- /dev/null
+++ b/tools/usb/usbip/src/usbip_detach.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <ctype.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <getopt.h>
+#include <unistd.h>
+
+#include "vhci_driver.h"
+#include "usbip_common.h"
+#include "usbip_network.h"
+#include "usbip.h"
+
+static const char usbip_detach_usage_string[] =
+ "usbip detach <args>\n"
+ " -p, --port=<port> " USBIP_VHCI_DRV_NAME
+ " port the device is on\n";
+
+void usbip_detach_usage(void)
+{
+ printf("usage: %s", usbip_detach_usage_string);
+}
+
+static int detach_port(char *port)
+{
+ int ret = 0;
+ uint8_t portnum;
+ char path[PATH_MAX+1];
+ int i;
+ struct usbip_imported_device *idev;
+ int found = 0;
+
+ unsigned int port_len = strlen(port);
+
+ for (unsigned int i = 0; i < port_len; i++)
+ if (!isdigit(port[i])) {
+ err("invalid port %s", port);
+ return -1;
+ }
+
+ portnum = atoi(port);
+
+ ret = usbip_vhci_driver_open();
+ if (ret < 0) {
+ err("open vhci_driver");
+ return -1;
+ }
+
+ /* check for invalid port */
+ for (i = 0; i < vhci_driver->nports; i++) {
+ idev = &vhci_driver->idev[i];
+
+ if (idev->port == portnum) {
+ found = 1;
+ if (idev->status != VDEV_ST_NULL)
+ break;
+ info("Port %d is already detached!\n", idev->port);
+ goto call_driver_close;
+ }
+ }
+
+ if (!found) {
+ err("Invalid port %s > maxports %d",
+ port, vhci_driver->nports);
+ goto call_driver_close;
+ }
+
+ /* remove the port state file */
+ snprintf(path, PATH_MAX, VHCI_STATE_PATH"/port%d", portnum);
+
+ remove(path);
+ rmdir(VHCI_STATE_PATH);
+
+ ret = usbip_vhci_detach_device(portnum);
+ if (ret < 0) {
+ ret = -1;
+ err("Port %d detach request failed!\n", portnum);
+ goto call_driver_close;
+ }
+ info("Port %d is now detached!\n", portnum);
+
+call_driver_close:
+ usbip_vhci_driver_close();
+
+ return ret;
+}
+
+int usbip_detach(int argc, char *argv[])
+{
+ static const struct option opts[] = {
+ { "port", required_argument, NULL, 'p' },
+ { NULL, 0, NULL, 0 }
+ };
+ int opt;
+ int ret = -1;
+
+ for (;;) {
+ opt = getopt_long(argc, argv, "p:", opts, NULL);
+
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'p':
+ ret = detach_port(optarg);
+ goto out;
+ default:
+ goto err_out;
+ }
+ }
+
+err_out:
+ usbip_detach_usage();
+out:
+ return ret;
+}
diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c
new file mode 100644
index 000000000..8d4ccf4b9
--- /dev/null
+++ b/tools/usb/usbip/src/usbip_list.c
@@ -0,0 +1,384 @@
+/*
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ * Copyright (C) 2015-2016 Samsung Electronics
+ * Igor Kotrasinski <i.kotrasinsk@samsung.com>
+ * Krzysztof Opasiak <k.opasiak@samsung.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <sys/types.h>
+#include <libudev.h>
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <getopt.h>
+#include <netdb.h>
+#include <unistd.h>
+
+#include <dirent.h>
+
+#include <linux/usb/ch9.h>
+
+#include "usbip_common.h"
+#include "usbip_network.h"
+#include "usbip.h"
+
+static const char usbip_list_usage_string[] =
+ "usbip list [-p|--parsable] <args>\n"
+ " -p, --parsable Parsable list format\n"
+ " -r, --remote=<host> List the exportable USB devices on <host>\n"
+ " -l, --local List the local USB devices\n";
+
+void usbip_list_usage(void)
+{
+ printf("usage: %s", usbip_list_usage_string);
+}
+
+static int get_exported_devices(char *host, int sockfd)
+{
+ char product_name[100];
+ char class_name[100];
+ struct op_devlist_reply reply;
+ uint16_t code = OP_REP_DEVLIST;
+ struct usbip_usb_device udev;
+ struct usbip_usb_interface uintf;
+ unsigned int i;
+ int rc, j;
+ int status;
+
+ rc = usbip_net_send_op_common(sockfd, OP_REQ_DEVLIST, 0);
+ if (rc < 0) {
+ dbg("usbip_net_send_op_common failed");
+ return -1;
+ }
+
+ rc = usbip_net_recv_op_common(sockfd, &code, &status);
+ if (rc < 0) {
+ err("Exported Device List Request failed - %s\n",
+ usbip_op_common_status_string(status));
+ return -1;
+ }
+
+ memset(&reply, 0, sizeof(reply));
+ rc = usbip_net_recv(sockfd, &reply, sizeof(reply));
+ if (rc < 0) {
+ dbg("usbip_net_recv_op_devlist failed");
+ return -1;
+ }
+ PACK_OP_DEVLIST_REPLY(0, &reply);
+ dbg("exportable devices: %d\n", reply.ndev);
+
+ if (reply.ndev == 0) {
+ info("no exportable devices found on %s", host);
+ return 0;
+ }
+
+ printf("Exportable USB devices\n");
+ printf("======================\n");
+ printf(" - %s\n", host);
+
+ for (i = 0; i < reply.ndev; i++) {
+ memset(&udev, 0, sizeof(udev));
+ rc = usbip_net_recv(sockfd, &udev, sizeof(udev));
+ if (rc < 0) {
+ dbg("usbip_net_recv failed: usbip_usb_device[%d]", i);
+ return -1;
+ }
+ usbip_net_pack_usb_device(0, &udev);
+
+ usbip_names_get_product(product_name, sizeof(product_name),
+ udev.idVendor, udev.idProduct);
+ usbip_names_get_class(class_name, sizeof(class_name),
+ udev.bDeviceClass, udev.bDeviceSubClass,
+ udev.bDeviceProtocol);
+ printf("%11s: %s\n", udev.busid, product_name);
+ printf("%11s: %s\n", "", udev.path);
+ printf("%11s: %s\n", "", class_name);
+
+ for (j = 0; j < udev.bNumInterfaces; j++) {
+ rc = usbip_net_recv(sockfd, &uintf, sizeof(uintf));
+ if (rc < 0) {
+ err("usbip_net_recv failed: usbip_usb_intf[%d]",
+ j);
+
+ return -1;
+ }
+ usbip_net_pack_usb_interface(0, &uintf);
+
+ usbip_names_get_class(class_name, sizeof(class_name),
+ uintf.bInterfaceClass,
+ uintf.bInterfaceSubClass,
+ uintf.bInterfaceProtocol);
+ printf("%11s: %2d - %s\n", "", j, class_name);
+ }
+
+ printf("\n");
+ }
+
+ return 0;
+}
+
+static int list_exported_devices(char *host)
+{
+ int rc;
+ int sockfd;
+
+ sockfd = usbip_net_tcp_connect(host, usbip_port_string);
+ if (sockfd < 0) {
+ err("could not connect to %s:%s: %s", host,
+ usbip_port_string, gai_strerror(sockfd));
+ return -1;
+ }
+ dbg("connected to %s:%s", host, usbip_port_string);
+
+ rc = get_exported_devices(host, sockfd);
+ if (rc < 0) {
+ err("failed to get device list from %s", host);
+ return -1;
+ }
+
+ close(sockfd);
+
+ return 0;
+}
+
+static void print_device(const char *busid, const char *vendor,
+ const char *product, bool parsable)
+{
+ if (parsable)
+ printf("busid=%s#usbid=%.4s:%.4s#", busid, vendor, product);
+ else
+ printf(" - busid %s (%.4s:%.4s)\n", busid, vendor, product);
+}
+
+static void print_product_name(char *product_name, bool parsable)
+{
+ if (!parsable)
+ printf(" %s\n", product_name);
+}
+
+static int list_devices(bool parsable)
+{
+ struct udev *udev;
+ struct udev_enumerate *enumerate;
+ struct udev_list_entry *devices, *dev_list_entry;
+ struct udev_device *dev;
+ const char *path;
+ const char *idVendor;
+ const char *idProduct;
+ const char *bConfValue;
+ const char *bNumIntfs;
+ const char *busid;
+ char product_name[128];
+ int ret = -1;
+ const char *devpath;
+
+ /* Create libudev context. */
+ udev = udev_new();
+
+ /* Create libudev device enumeration. */
+ enumerate = udev_enumerate_new(udev);
+
+ /* Take only USB devices that are not hubs and do not have
+ * the bInterfaceNumber attribute, i.e. are not interfaces.
+ */
+ udev_enumerate_add_match_subsystem(enumerate, "usb");
+ udev_enumerate_add_nomatch_sysattr(enumerate, "bDeviceClass", "09");
+ udev_enumerate_add_nomatch_sysattr(enumerate, "bInterfaceNumber", NULL);
+ udev_enumerate_scan_devices(enumerate);
+
+ devices = udev_enumerate_get_list_entry(enumerate);
+
+ /* Show information about each device. */
+ udev_list_entry_foreach(dev_list_entry, devices) {
+ path = udev_list_entry_get_name(dev_list_entry);
+ dev = udev_device_new_from_syspath(udev, path);
+
+ /* Ignore devices attached to vhci_hcd */
+ devpath = udev_device_get_devpath(dev);
+ if (strstr(devpath, USBIP_VHCI_DRV_NAME)) {
+ dbg("Skip the device %s already attached to %s\n",
+ devpath, USBIP_VHCI_DRV_NAME);
+ continue;
+ }
+
+ /* Get device information. */
+ idVendor = udev_device_get_sysattr_value(dev, "idVendor");
+ idProduct = udev_device_get_sysattr_value(dev, "idProduct");
+ bConfValue = udev_device_get_sysattr_value(dev,
+ "bConfigurationValue");
+ bNumIntfs = udev_device_get_sysattr_value(dev,
+ "bNumInterfaces");
+ busid = udev_device_get_sysname(dev);
+ if (!idVendor || !idProduct || !bConfValue || !bNumIntfs) {
+ err("problem getting device attributes: %s",
+ strerror(errno));
+ goto err_out;
+ }
+
+ /* Get product name. */
+ usbip_names_get_product(product_name, sizeof(product_name),
+ strtol(idVendor, NULL, 16),
+ strtol(idProduct, NULL, 16));
+
+ /* Print information. */
+ print_device(busid, idVendor, idProduct, parsable);
+ print_product_name(product_name, parsable);
+
+ printf("\n");
+
+ udev_device_unref(dev);
+ }
+
+ ret = 0;
+
+err_out:
+ udev_enumerate_unref(enumerate);
+ udev_unref(udev);
+
+ return ret;
+}
+
+static int list_gadget_devices(bool parsable)
+{
+ int ret = -1;
+ struct udev *udev;
+ struct udev_enumerate *enumerate;
+ struct udev_list_entry *devices, *dev_list_entry;
+ struct udev_device *dev;
+ const char *path;
+ const char *driver;
+
+ const struct usb_device_descriptor *d_desc;
+ const char *descriptors;
+ char product_name[128];
+
+ uint16_t idVendor;
+ char idVendor_buf[8];
+ uint16_t idProduct;
+ char idProduct_buf[8];
+ const char *busid;
+
+ udev = udev_new();
+ enumerate = udev_enumerate_new(udev);
+
+ udev_enumerate_add_match_subsystem(enumerate, "platform");
+
+ udev_enumerate_scan_devices(enumerate);
+ devices = udev_enumerate_get_list_entry(enumerate);
+
+ udev_list_entry_foreach(dev_list_entry, devices) {
+ path = udev_list_entry_get_name(dev_list_entry);
+ dev = udev_device_new_from_syspath(udev, path);
+
+ driver = udev_device_get_driver(dev);
+ /* We only have mechanism to enumerate gadgets bound to vudc */
+ if (driver == NULL || strcmp(driver, USBIP_DEVICE_DRV_NAME))
+ continue;
+
+ /* Get device information. */
+ descriptors = udev_device_get_sysattr_value(dev,
+ VUDC_DEVICE_DESCR_FILE);
+
+ if (!descriptors) {
+ err("problem getting device attributes: %s",
+ strerror(errno));
+ goto err_out;
+ }
+
+ d_desc = (const struct usb_device_descriptor *) descriptors;
+
+ idVendor = le16toh(d_desc->idVendor);
+ sprintf(idVendor_buf, "0x%4x", idVendor);
+ idProduct = le16toh(d_desc->idProduct);
+ sprintf(idProduct_buf, "0x%4x", idVendor);
+ busid = udev_device_get_sysname(dev);
+
+ /* Get product name. */
+ usbip_names_get_product(product_name, sizeof(product_name),
+ le16toh(idVendor),
+ le16toh(idProduct));
+
+ /* Print information. */
+ print_device(busid, idVendor_buf, idProduct_buf, parsable);
+ print_product_name(product_name, parsable);
+
+ printf("\n");
+
+ udev_device_unref(dev);
+ }
+ ret = 0;
+
+err_out:
+ udev_enumerate_unref(enumerate);
+ udev_unref(udev);
+
+ return ret;
+}
+
+int usbip_list(int argc, char *argv[])
+{
+ static const struct option opts[] = {
+ { "parsable", no_argument, NULL, 'p' },
+ { "remote", required_argument, NULL, 'r' },
+ { "local", no_argument, NULL, 'l' },
+ { "device", no_argument, NULL, 'd' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ bool parsable = false;
+ int opt;
+ int ret = -1;
+
+ if (usbip_names_init(USBIDS_FILE))
+ err("failed to open %s", USBIDS_FILE);
+
+ for (;;) {
+ opt = getopt_long(argc, argv, "pr:ld", opts, NULL);
+
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'p':
+ parsable = true;
+ break;
+ case 'r':
+ ret = list_exported_devices(optarg);
+ goto out;
+ case 'l':
+ ret = list_devices(parsable);
+ goto out;
+ case 'd':
+ ret = list_gadget_devices(parsable);
+ goto out;
+ default:
+ goto err_out;
+ }
+ }
+
+err_out:
+ usbip_list_usage();
+out:
+ usbip_names_free();
+
+ return ret;
+}
diff --git a/tools/usb/usbip/src/usbip_network.c b/tools/usb/usbip/src/usbip_network.c
new file mode 100644
index 000000000..902f55208
--- /dev/null
+++ b/tools/usb/usbip/src/usbip_network.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <sys/socket.h>
+
+#include <string.h>
+
+#include <arpa/inet.h>
+#include <netdb.h>
+#include <netinet/tcp.h>
+#include <unistd.h>
+
+#ifdef HAVE_LIBWRAP
+#include <tcpd.h>
+#endif
+
+#include "usbip_common.h"
+#include "usbip_network.h"
+
+int usbip_port = 3240;
+char *usbip_port_string = "3240";
+
+void usbip_setup_port_number(char *arg)
+{
+ dbg("parsing port arg '%s'", arg);
+ char *end;
+ unsigned long int port = strtoul(arg, &end, 10);
+
+ if (end == arg) {
+ err("port: could not parse '%s' as a decimal integer", arg);
+ return;
+ }
+
+ if (*end != '\0') {
+ err("port: garbage at end of '%s'", arg);
+ return;
+ }
+
+ if (port > UINT16_MAX) {
+ err("port: %s too high (max=%d)",
+ arg, UINT16_MAX);
+ return;
+ }
+
+ usbip_port = port;
+ usbip_port_string = arg;
+ info("using port %d (\"%s\")", usbip_port, usbip_port_string);
+}
+
+uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num)
+{
+ uint32_t i;
+
+ if (pack)
+ i = htonl(num);
+ else
+ i = ntohl(num);
+
+ return i;
+}
+
+uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num)
+{
+ uint16_t i;
+
+ if (pack)
+ i = htons(num);
+ else
+ i = ntohs(num);
+
+ return i;
+}
+
+void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev)
+{
+ udev->busnum = usbip_net_pack_uint32_t(pack, udev->busnum);
+ udev->devnum = usbip_net_pack_uint32_t(pack, udev->devnum);
+ udev->speed = usbip_net_pack_uint32_t(pack, udev->speed);
+
+ udev->idVendor = usbip_net_pack_uint16_t(pack, udev->idVendor);
+ udev->idProduct = usbip_net_pack_uint16_t(pack, udev->idProduct);
+ udev->bcdDevice = usbip_net_pack_uint16_t(pack, udev->bcdDevice);
+}
+
+void usbip_net_pack_usb_interface(int pack __attribute__((unused)),
+ struct usbip_usb_interface *udev
+ __attribute__((unused)))
+{
+ /* uint8_t members need nothing */
+}
+
+static ssize_t usbip_net_xmit(int sockfd, void *buff, size_t bufflen,
+ int sending)
+{
+ ssize_t nbytes;
+ ssize_t total = 0;
+
+ if (!bufflen)
+ return 0;
+
+ do {
+ if (sending)
+ nbytes = send(sockfd, buff, bufflen, 0);
+ else
+ nbytes = recv(sockfd, buff, bufflen, MSG_WAITALL);
+
+ if (nbytes <= 0)
+ return -1;
+
+ buff = (void *)((intptr_t) buff + nbytes);
+ bufflen -= nbytes;
+ total += nbytes;
+
+ } while (bufflen > 0);
+
+ return total;
+}
+
+ssize_t usbip_net_recv(int sockfd, void *buff, size_t bufflen)
+{
+ return usbip_net_xmit(sockfd, buff, bufflen, 0);
+}
+
+ssize_t usbip_net_send(int sockfd, void *buff, size_t bufflen)
+{
+ return usbip_net_xmit(sockfd, buff, bufflen, 1);
+}
+
+static inline void usbip_net_pack_op_common(int pack,
+ struct op_common *op_common)
+{
+ op_common->version = usbip_net_pack_uint16_t(pack, op_common->version);
+ op_common->code = usbip_net_pack_uint16_t(pack, op_common->code);
+ op_common->status = usbip_net_pack_uint32_t(pack, op_common->status);
+}
+
+int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status)
+{
+ struct op_common op_common;
+ int rc;
+
+ memset(&op_common, 0, sizeof(op_common));
+
+ op_common.version = USBIP_VERSION;
+ op_common.code = code;
+ op_common.status = status;
+
+ usbip_net_pack_op_common(1, &op_common);
+
+ rc = usbip_net_send(sockfd, &op_common, sizeof(op_common));
+ if (rc < 0) {
+ dbg("usbip_net_send failed: %d", rc);
+ return -1;
+ }
+
+ return 0;
+}
+
+int usbip_net_recv_op_common(int sockfd, uint16_t *code, int *status)
+{
+ struct op_common op_common;
+ int rc;
+
+ memset(&op_common, 0, sizeof(op_common));
+
+ rc = usbip_net_recv(sockfd, &op_common, sizeof(op_common));
+ if (rc < 0) {
+ dbg("usbip_net_recv failed: %d", rc);
+ goto err;
+ }
+
+ usbip_net_pack_op_common(0, &op_common);
+
+ if (op_common.version != USBIP_VERSION) {
+ err("USBIP Kernel and tool version mismatch: %d %d:",
+ op_common.version, USBIP_VERSION);
+ goto err;
+ }
+
+ switch (*code) {
+ case OP_UNSPEC:
+ break;
+ default:
+ if (op_common.code != *code) {
+ dbg("unexpected pdu %#0x for %#0x", op_common.code,
+ *code);
+ /* return error status */
+ *status = ST_ERROR;
+ goto err;
+ }
+ }
+
+ *status = op_common.status;
+
+ if (op_common.status != ST_OK) {
+ dbg("request failed at peer: %d", op_common.status);
+ goto err;
+ }
+
+ *code = op_common.code;
+
+ return 0;
+err:
+ return -1;
+}
+
+int usbip_net_set_reuseaddr(int sockfd)
+{
+ const int val = 1;
+ int ret;
+
+ ret = setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val));
+ if (ret < 0)
+ dbg("setsockopt: SO_REUSEADDR");
+
+ return ret;
+}
+
+int usbip_net_set_nodelay(int sockfd)
+{
+ const int val = 1;
+ int ret;
+
+ ret = setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, &val, sizeof(val));
+ if (ret < 0)
+ dbg("setsockopt: TCP_NODELAY");
+
+ return ret;
+}
+
+int usbip_net_set_keepalive(int sockfd)
+{
+ const int val = 1;
+ int ret;
+
+ ret = setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val));
+ if (ret < 0)
+ dbg("setsockopt: SO_KEEPALIVE");
+
+ return ret;
+}
+
+int usbip_net_set_v6only(int sockfd)
+{
+ const int val = 1;
+ int ret;
+
+ ret = setsockopt(sockfd, IPPROTO_IPV6, IPV6_V6ONLY, &val, sizeof(val));
+ if (ret < 0)
+ dbg("setsockopt: IPV6_V6ONLY");
+
+ return ret;
+}
+
+/*
+ * IPv6 Ready
+ */
+int usbip_net_tcp_connect(char *hostname, char *service)
+{
+ struct addrinfo hints, *res, *rp;
+ int sockfd;
+ int ret;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_STREAM;
+
+ /* get all possible addresses */
+ ret = getaddrinfo(hostname, service, &hints, &res);
+ if (ret < 0) {
+ dbg("getaddrinfo: %s service %s: %s", hostname, service,
+ gai_strerror(ret));
+ return ret;
+ }
+
+ /* try the addresses */
+ for (rp = res; rp; rp = rp->ai_next) {
+ sockfd = socket(rp->ai_family, rp->ai_socktype,
+ rp->ai_protocol);
+ if (sockfd < 0)
+ continue;
+
+ /* should set TCP_NODELAY for usbip */
+ usbip_net_set_nodelay(sockfd);
+ /* TODO: write code for heartbeat */
+ usbip_net_set_keepalive(sockfd);
+
+ if (connect(sockfd, rp->ai_addr, rp->ai_addrlen) == 0)
+ break;
+
+ close(sockfd);
+ }
+
+ freeaddrinfo(res);
+
+ if (!rp)
+ return EAI_SYSTEM;
+
+ return sockfd;
+}
diff --git a/tools/usb/usbip/src/usbip_network.h b/tools/usb/usbip/src/usbip_network.h
new file mode 100644
index 000000000..83b4c5344
--- /dev/null
+++ b/tools/usb/usbip/src/usbip_network.h
@@ -0,0 +1,178 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2005-2007 Takahiro Hirofuchi
+ */
+
+#ifndef __USBIP_NETWORK_H
+#define __USBIP_NETWORK_H
+
+#ifdef HAVE_CONFIG_H
+#include "../config.h"
+#endif
+
+#include <sys/types.h>
+
+#include <stdint.h>
+
+extern int usbip_port;
+extern char *usbip_port_string;
+void usbip_setup_port_number(char *arg);
+
+/* ---------------------------------------------------------------------- */
+/* Common header for all the kinds of PDUs. */
+struct op_common {
+ uint16_t version;
+
+#define OP_REQUEST (0x80 << 8)
+#define OP_REPLY (0x00 << 8)
+ uint16_t code;
+
+ /* status codes defined in usbip_common.h */
+ uint32_t status; /* op_code status (for reply) */
+
+} __attribute__((packed));
+
+/* ---------------------------------------------------------------------- */
+/* Dummy Code */
+#define OP_UNSPEC 0x00
+#define OP_REQ_UNSPEC OP_UNSPEC
+#define OP_REP_UNSPEC OP_UNSPEC
+
+/* ---------------------------------------------------------------------- */
+/* Retrieve USB device information. (still not used) */
+#define OP_DEVINFO 0x02
+#define OP_REQ_DEVINFO (OP_REQUEST | OP_DEVINFO)
+#define OP_REP_DEVINFO (OP_REPLY | OP_DEVINFO)
+
+struct op_devinfo_request {
+ char busid[SYSFS_BUS_ID_SIZE];
+} __attribute__((packed));
+
+struct op_devinfo_reply {
+ struct usbip_usb_device udev;
+ struct usbip_usb_interface uinf[];
+} __attribute__((packed));
+
+/* ---------------------------------------------------------------------- */
+/* Import a remote USB device. */
+#define OP_IMPORT 0x03
+#define OP_REQ_IMPORT (OP_REQUEST | OP_IMPORT)
+#define OP_REP_IMPORT (OP_REPLY | OP_IMPORT)
+
+struct op_import_request {
+ char busid[SYSFS_BUS_ID_SIZE];
+} __attribute__((packed));
+
+struct op_import_reply {
+ struct usbip_usb_device udev;
+// struct usbip_usb_interface uinf[];
+} __attribute__((packed));
+
+#define PACK_OP_IMPORT_REQUEST(pack, request) do {\
+} while (0)
+
+#define PACK_OP_IMPORT_REPLY(pack, reply) do {\
+ usbip_net_pack_usb_device(pack, &(reply)->udev);\
+} while (0)
+
+/* ---------------------------------------------------------------------- */
+/* Export a USB device to a remote host. */
+#define OP_EXPORT 0x06
+#define OP_REQ_EXPORT (OP_REQUEST | OP_EXPORT)
+#define OP_REP_EXPORT (OP_REPLY | OP_EXPORT)
+
+struct op_export_request {
+ struct usbip_usb_device udev;
+} __attribute__((packed));
+
+struct op_export_reply {
+ int returncode;
+} __attribute__((packed));
+
+
+#define PACK_OP_EXPORT_REQUEST(pack, request) do {\
+ usbip_net_pack_usb_device(pack, &(request)->udev);\
+} while (0)
+
+#define PACK_OP_EXPORT_REPLY(pack, reply) do {\
+} while (0)
+
+/* ---------------------------------------------------------------------- */
+/* un-Export a USB device from a remote host. */
+#define OP_UNEXPORT 0x07
+#define OP_REQ_UNEXPORT (OP_REQUEST | OP_UNEXPORT)
+#define OP_REP_UNEXPORT (OP_REPLY | OP_UNEXPORT)
+
+struct op_unexport_request {
+ struct usbip_usb_device udev;
+} __attribute__((packed));
+
+struct op_unexport_reply {
+ int returncode;
+} __attribute__((packed));
+
+#define PACK_OP_UNEXPORT_REQUEST(pack, request) do {\
+ usbip_net_pack_usb_device(pack, &(request)->udev);\
+} while (0)
+
+#define PACK_OP_UNEXPORT_REPLY(pack, reply) do {\
+} while (0)
+
+/* ---------------------------------------------------------------------- */
+/* Negotiate IPSec encryption key. (still not used) */
+#define OP_CRYPKEY 0x04
+#define OP_REQ_CRYPKEY (OP_REQUEST | OP_CRYPKEY)
+#define OP_REP_CRYPKEY (OP_REPLY | OP_CRYPKEY)
+
+struct op_crypkey_request {
+ /* 128bit key */
+ uint32_t key[4];
+} __attribute__((packed));
+
+struct op_crypkey_reply {
+ uint32_t __reserved;
+} __attribute__((packed));
+
+
+/* ---------------------------------------------------------------------- */
+/* Retrieve the list of exported USB devices. */
+#define OP_DEVLIST 0x05
+#define OP_REQ_DEVLIST (OP_REQUEST | OP_DEVLIST)
+#define OP_REP_DEVLIST (OP_REPLY | OP_DEVLIST)
+
+struct op_devlist_request {
+} __attribute__((packed));
+
+struct op_devlist_reply {
+ uint32_t ndev;
+ /* followed by reply_extra[] */
+} __attribute__((packed));
+
+struct op_devlist_reply_extra {
+ struct usbip_usb_device udev;
+ struct usbip_usb_interface uinf[];
+} __attribute__((packed));
+
+#define PACK_OP_DEVLIST_REQUEST(pack, request) do {\
+} while (0)
+
+#define PACK_OP_DEVLIST_REPLY(pack, reply) do {\
+ (reply)->ndev = usbip_net_pack_uint32_t(pack, (reply)->ndev);\
+} while (0)
+
+uint32_t usbip_net_pack_uint32_t(int pack, uint32_t num);
+uint16_t usbip_net_pack_uint16_t(int pack, uint16_t num);
+void usbip_net_pack_usb_device(int pack, struct usbip_usb_device *udev);
+void usbip_net_pack_usb_interface(int pack, struct usbip_usb_interface *uinf);
+
+ssize_t usbip_net_recv(int sockfd, void *buff, size_t bufflen);
+ssize_t usbip_net_send(int sockfd, void *buff, size_t bufflen);
+int usbip_net_send_op_common(int sockfd, uint32_t code, uint32_t status);
+int usbip_net_recv_op_common(int sockfd, uint16_t *code, int *status);
+int usbip_net_set_reuseaddr(int sockfd);
+int usbip_net_set_nodelay(int sockfd);
+int usbip_net_set_keepalive(int sockfd);
+int usbip_net_set_v6only(int sockfd);
+int usbip_net_tcp_connect(char *hostname, char *port);
+
+#endif /* __USBIP_NETWORK_H */
diff --git a/tools/usb/usbip/src/usbip_port.c b/tools/usb/usbip/src/usbip_port.c
new file mode 100644
index 000000000..7bd74fb3a
--- /dev/null
+++ b/tools/usb/usbip/src/usbip_port.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include "vhci_driver.h"
+#include "usbip_common.h"
+
+static int list_imported_devices(void)
+{
+ int i;
+ struct usbip_imported_device *idev;
+ int ret;
+
+ if (usbip_names_init(USBIDS_FILE))
+ err("failed to open %s", USBIDS_FILE);
+
+ ret = usbip_vhci_driver_open();
+ if (ret < 0) {
+ err("open vhci_driver");
+ goto err_names_free;
+ }
+
+ printf("Imported USB devices\n");
+ printf("====================\n");
+
+ for (i = 0; i < vhci_driver->nports; i++) {
+ idev = &vhci_driver->idev[i];
+
+ if (usbip_vhci_imported_device_dump(idev) < 0)
+ goto err_driver_close;
+ }
+
+ usbip_vhci_driver_close();
+ usbip_names_free();
+
+ return ret;
+
+err_driver_close:
+ usbip_vhci_driver_close();
+err_names_free:
+ usbip_names_free();
+ return -1;
+}
+
+int usbip_port_show(__attribute__((unused)) int argc,
+ __attribute__((unused)) char *argv[])
+{
+ int ret;
+
+ ret = list_imported_devices();
+ if (ret < 0)
+ err("list imported devices");
+
+ return ret;
+}
diff --git a/tools/usb/usbip/src/usbip_unbind.c b/tools/usb/usbip/src/usbip_unbind.c
new file mode 100644
index 000000000..a4a496c9c
--- /dev/null
+++ b/tools/usb/usbip/src/usbip_unbind.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <libudev.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <getopt.h>
+
+#include "usbip_common.h"
+#include "utils.h"
+#include "usbip.h"
+#include "sysfs_utils.h"
+
+static const char usbip_unbind_usage_string[] =
+ "usbip unbind <args>\n"
+ " -b, --busid=<busid> Unbind " USBIP_HOST_DRV_NAME ".ko from "
+ "device on <busid>\n";
+
+void usbip_unbind_usage(void)
+{
+ printf("usage: %s", usbip_unbind_usage_string);
+}
+
+static int unbind_device(char *busid)
+{
+ char bus_type[] = "usb";
+ int rc, ret = -1;
+
+ char unbind_attr_name[] = "unbind";
+ char unbind_attr_path[SYSFS_PATH_MAX];
+ char rebind_attr_name[] = "rebind";
+ char rebind_attr_path[SYSFS_PATH_MAX];
+
+ struct udev *udev;
+ struct udev_device *dev;
+ const char *driver;
+
+ /* Create libudev context. */
+ udev = udev_new();
+
+ /* Check whether the device with this bus ID exists. */
+ dev = udev_device_new_from_subsystem_sysname(udev, "usb", busid);
+ if (!dev) {
+ err("device with the specified bus ID does not exist");
+ goto err_close_udev;
+ }
+
+ /* Check whether the device is using usbip-host driver. */
+ driver = udev_device_get_driver(dev);
+ if (!driver || strcmp(driver, "usbip-host")) {
+ err("device is not bound to usbip-host driver");
+ goto err_close_udev;
+ }
+
+ /* Unbind device from driver. */
+ snprintf(unbind_attr_path, sizeof(unbind_attr_path), "%s/%s/%s/%s/%s/%s",
+ SYSFS_MNT_PATH, SYSFS_BUS_NAME, bus_type, SYSFS_DRIVERS_NAME,
+ USBIP_HOST_DRV_NAME, unbind_attr_name);
+
+ rc = write_sysfs_attribute(unbind_attr_path, busid, strlen(busid));
+ if (rc < 0) {
+ err("error unbinding device %s from driver", busid);
+ goto err_close_udev;
+ }
+
+ /* Notify driver of unbind. */
+ rc = modify_match_busid(busid, 0);
+ if (rc < 0) {
+ err("unable to unbind device on %s", busid);
+ goto err_close_udev;
+ }
+
+ /* Trigger new probing. */
+ snprintf(rebind_attr_path, sizeof(unbind_attr_path), "%s/%s/%s/%s/%s/%s",
+ SYSFS_MNT_PATH, SYSFS_BUS_NAME, bus_type, SYSFS_DRIVERS_NAME,
+ USBIP_HOST_DRV_NAME, rebind_attr_name);
+
+ rc = write_sysfs_attribute(rebind_attr_path, busid, strlen(busid));
+ if (rc < 0) {
+ err("error rebinding");
+ goto err_close_udev;
+ }
+
+ ret = 0;
+ info("unbind device on busid %s: complete", busid);
+
+err_close_udev:
+ udev_device_unref(dev);
+ udev_unref(udev);
+
+ return ret;
+}
+
+int usbip_unbind(int argc, char *argv[])
+{
+ static const struct option opts[] = {
+ { "busid", required_argument, NULL, 'b' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ int opt;
+ int ret = -1;
+
+ for (;;) {
+ opt = getopt_long(argc, argv, "b:", opts, NULL);
+
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'b':
+ ret = unbind_device(optarg);
+ goto out;
+ default:
+ goto err_out;
+ }
+ }
+
+err_out:
+ usbip_unbind_usage();
+out:
+ return ret;
+}
diff --git a/tools/usb/usbip/src/usbipd.c b/tools/usb/usbip/src/usbipd.c
new file mode 100644
index 000000000..32864c529
--- /dev/null
+++ b/tools/usb/usbip/src/usbipd.c
@@ -0,0 +1,698 @@
+/*
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ * Copyright (C) 2015-2016 Samsung Electronics
+ * Igor Kotrasinski <i.kotrasinsk@samsung.com>
+ * Krzysztof Opasiak <k.opasiak@samsung.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "../config.h"
+#endif
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <unistd.h>
+#include <netdb.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#ifdef HAVE_LIBWRAP
+#include <tcpd.h>
+#endif
+
+#include <getopt.h>
+#include <signal.h>
+#include <poll.h>
+
+#include "usbip_host_driver.h"
+#include "usbip_host_common.h"
+#include "usbip_device_driver.h"
+#include "usbip_common.h"
+#include "usbip_network.h"
+#include "list.h"
+
+#undef PROGNAME
+#define PROGNAME "usbipd"
+#define MAXSOCKFD 20
+
+#define MAIN_LOOP_TIMEOUT 10
+
+#define DEFAULT_PID_FILE "/var/run/" PROGNAME ".pid"
+
+static const char usbip_version_string[] = PACKAGE_STRING;
+
+static const char usbipd_help_string[] =
+ "usage: usbipd [options]\n"
+ "\n"
+ " -4, --ipv4\n"
+ " Bind to IPv4. Default is both.\n"
+ "\n"
+ " -6, --ipv6\n"
+ " Bind to IPv6. Default is both.\n"
+ "\n"
+ " -e, --device\n"
+ " Run in device mode.\n"
+ " Rather than drive an attached device, create\n"
+ " a virtual UDC to bind gadgets to.\n"
+ "\n"
+ " -D, --daemon\n"
+ " Run as a daemon process.\n"
+ "\n"
+ " -d, --debug\n"
+ " Print debugging information.\n"
+ "\n"
+ " -PFILE, --pid FILE\n"
+ " Write process id to FILE.\n"
+ " If no FILE specified, use " DEFAULT_PID_FILE "\n"
+ "\n"
+ " -tPORT, --tcp-port PORT\n"
+ " Listen on TCP/IP port PORT.\n"
+ "\n"
+ " -h, --help\n"
+ " Print this help.\n"
+ "\n"
+ " -v, --version\n"
+ " Show version.\n";
+
+static struct usbip_host_driver *driver;
+
+static void usbipd_help(void)
+{
+ printf("%s\n", usbipd_help_string);
+}
+
+static int recv_request_import(int sockfd)
+{
+ struct op_import_request req;
+ struct usbip_exported_device *edev;
+ struct usbip_usb_device pdu_udev;
+ struct list_head *i;
+ int found = 0;
+ int status = ST_OK;
+ int rc;
+
+ memset(&req, 0, sizeof(req));
+
+ rc = usbip_net_recv(sockfd, &req, sizeof(req));
+ if (rc < 0) {
+ dbg("usbip_net_recv failed: import request");
+ return -1;
+ }
+ PACK_OP_IMPORT_REQUEST(0, &req);
+
+ list_for_each(i, &driver->edev_list) {
+ edev = list_entry(i, struct usbip_exported_device, node);
+ if (!strncmp(req.busid, edev->udev.busid, SYSFS_BUS_ID_SIZE)) {
+ info("found requested device: %s", req.busid);
+ found = 1;
+ break;
+ }
+ }
+
+ if (found) {
+ /* should set TCP_NODELAY for usbip */
+ usbip_net_set_nodelay(sockfd);
+
+ /* export device needs a TCP/IP socket descriptor */
+ status = usbip_export_device(edev, sockfd);
+ if (status < 0)
+ status = ST_NA;
+ } else {
+ info("requested device not found: %s", req.busid);
+ status = ST_NODEV;
+ }
+
+ rc = usbip_net_send_op_common(sockfd, OP_REP_IMPORT, status);
+ if (rc < 0) {
+ dbg("usbip_net_send_op_common failed: %#0x", OP_REP_IMPORT);
+ return -1;
+ }
+
+ if (status) {
+ dbg("import request busid %s: failed", req.busid);
+ return -1;
+ }
+
+ memcpy(&pdu_udev, &edev->udev, sizeof(pdu_udev));
+ usbip_net_pack_usb_device(1, &pdu_udev);
+
+ rc = usbip_net_send(sockfd, &pdu_udev, sizeof(pdu_udev));
+ if (rc < 0) {
+ dbg("usbip_net_send failed: devinfo");
+ return -1;
+ }
+
+ dbg("import request busid %s: complete", req.busid);
+
+ return 0;
+}
+
+static int send_reply_devlist(int connfd)
+{
+ struct usbip_exported_device *edev;
+ struct usbip_usb_device pdu_udev;
+ struct usbip_usb_interface pdu_uinf;
+ struct op_devlist_reply reply;
+ struct list_head *j;
+ int rc, i;
+
+ /*
+ * Exclude devices that are already exported to a client from
+ * the exportable device list to avoid:
+ * - import requests for devices that are exported only to
+ * fail the request.
+ * - revealing devices that are imported by a client to
+ * another client.
+ */
+
+ reply.ndev = 0;
+ /* number of exported devices */
+ list_for_each(j, &driver->edev_list) {
+ edev = list_entry(j, struct usbip_exported_device, node);
+ if (edev->status != SDEV_ST_USED)
+ reply.ndev += 1;
+ }
+ info("exportable devices: %d", reply.ndev);
+
+ rc = usbip_net_send_op_common(connfd, OP_REP_DEVLIST, ST_OK);
+ if (rc < 0) {
+ dbg("usbip_net_send_op_common failed: %#0x", OP_REP_DEVLIST);
+ return -1;
+ }
+ PACK_OP_DEVLIST_REPLY(1, &reply);
+
+ rc = usbip_net_send(connfd, &reply, sizeof(reply));
+ if (rc < 0) {
+ dbg("usbip_net_send failed: %#0x", OP_REP_DEVLIST);
+ return -1;
+ }
+
+ list_for_each(j, &driver->edev_list) {
+ edev = list_entry(j, struct usbip_exported_device, node);
+ if (edev->status == SDEV_ST_USED)
+ continue;
+
+ dump_usb_device(&edev->udev);
+ memcpy(&pdu_udev, &edev->udev, sizeof(pdu_udev));
+ usbip_net_pack_usb_device(1, &pdu_udev);
+
+ rc = usbip_net_send(connfd, &pdu_udev, sizeof(pdu_udev));
+ if (rc < 0) {
+ dbg("usbip_net_send failed: pdu_udev");
+ return -1;
+ }
+
+ for (i = 0; i < edev->udev.bNumInterfaces; i++) {
+ dump_usb_interface(&edev->uinf[i]);
+ memcpy(&pdu_uinf, &edev->uinf[i], sizeof(pdu_uinf));
+ usbip_net_pack_usb_interface(1, &pdu_uinf);
+
+ rc = usbip_net_send(connfd, &pdu_uinf,
+ sizeof(pdu_uinf));
+ if (rc < 0) {
+ err("usbip_net_send failed: pdu_uinf");
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int recv_request_devlist(int connfd)
+{
+ struct op_devlist_request req;
+ int rc;
+
+ memset(&req, 0, sizeof(req));
+
+ rc = usbip_net_recv(connfd, &req, sizeof(req));
+ if (rc < 0) {
+ dbg("usbip_net_recv failed: devlist request");
+ return -1;
+ }
+
+ rc = send_reply_devlist(connfd);
+ if (rc < 0) {
+ dbg("send_reply_devlist failed");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int recv_pdu(int connfd)
+{
+ uint16_t code = OP_UNSPEC;
+ int ret;
+ int status;
+
+ ret = usbip_net_recv_op_common(connfd, &code, &status);
+ if (ret < 0) {
+ dbg("could not receive opcode: %#0x", code);
+ return -1;
+ }
+
+ ret = usbip_refresh_device_list(driver);
+ if (ret < 0) {
+ dbg("could not refresh device list: %d", ret);
+ return -1;
+ }
+
+ info("received request: %#0x(%d)", code, connfd);
+ switch (code) {
+ case OP_REQ_DEVLIST:
+ ret = recv_request_devlist(connfd);
+ break;
+ case OP_REQ_IMPORT:
+ ret = recv_request_import(connfd);
+ break;
+ case OP_REQ_DEVINFO:
+ case OP_REQ_CRYPKEY:
+ default:
+ err("received an unknown opcode: %#0x", code);
+ ret = -1;
+ }
+
+ if (ret == 0)
+ info("request %#0x(%d): complete", code, connfd);
+ else
+ info("request %#0x(%d): failed", code, connfd);
+
+ return ret;
+}
+
+#ifdef HAVE_LIBWRAP
+static int tcpd_auth(int connfd)
+{
+ struct request_info request;
+ int rc;
+
+ request_init(&request, RQ_DAEMON, PROGNAME, RQ_FILE, connfd, 0);
+ fromhost(&request);
+ rc = hosts_access(&request);
+ if (rc == 0)
+ return -1;
+
+ return 0;
+}
+#endif
+
+static int do_accept(int listenfd)
+{
+ int connfd;
+ struct sockaddr_storage ss;
+ socklen_t len = sizeof(ss);
+ char host[NI_MAXHOST], port[NI_MAXSERV];
+ int rc;
+
+ memset(&ss, 0, sizeof(ss));
+
+ connfd = accept(listenfd, (struct sockaddr *)&ss, &len);
+ if (connfd < 0) {
+ err("failed to accept connection");
+ return -1;
+ }
+
+ rc = getnameinfo((struct sockaddr *)&ss, len, host, sizeof(host),
+ port, sizeof(port), NI_NUMERICHOST | NI_NUMERICSERV);
+ if (rc)
+ err("getnameinfo: %s", gai_strerror(rc));
+
+#ifdef HAVE_LIBWRAP
+ rc = tcpd_auth(connfd);
+ if (rc < 0) {
+ info("denied access from %s", host);
+ close(connfd);
+ return -1;
+ }
+#endif
+ info("connection from %s:%s", host, port);
+
+ return connfd;
+}
+
+int process_request(int listenfd)
+{
+ pid_t childpid;
+ int connfd;
+
+ connfd = do_accept(listenfd);
+ if (connfd < 0)
+ return -1;
+ childpid = fork();
+ if (childpid == 0) {
+ close(listenfd);
+ recv_pdu(connfd);
+ exit(0);
+ }
+ close(connfd);
+ return 0;
+}
+
+static void addrinfo_to_text(struct addrinfo *ai, char buf[],
+ const size_t buf_size)
+{
+ char hbuf[NI_MAXHOST];
+ char sbuf[NI_MAXSERV];
+ int rc;
+
+ buf[0] = '\0';
+
+ rc = getnameinfo(ai->ai_addr, ai->ai_addrlen, hbuf, sizeof(hbuf),
+ sbuf, sizeof(sbuf), NI_NUMERICHOST | NI_NUMERICSERV);
+ if (rc)
+ err("getnameinfo: %s", gai_strerror(rc));
+
+ snprintf(buf, buf_size, "%s:%s", hbuf, sbuf);
+}
+
+static int listen_all_addrinfo(struct addrinfo *ai_head, int sockfdlist[],
+ int maxsockfd)
+{
+ struct addrinfo *ai;
+ int ret, nsockfd = 0;
+ const size_t ai_buf_size = NI_MAXHOST + NI_MAXSERV + 2;
+ char ai_buf[ai_buf_size];
+
+ for (ai = ai_head; ai && nsockfd < maxsockfd; ai = ai->ai_next) {
+ int sock;
+
+ addrinfo_to_text(ai, ai_buf, ai_buf_size);
+ dbg("opening %s", ai_buf);
+ sock = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
+ if (sock < 0) {
+ err("socket: %s: %d (%s)",
+ ai_buf, errno, strerror(errno));
+ continue;
+ }
+
+ usbip_net_set_reuseaddr(sock);
+ usbip_net_set_nodelay(sock);
+ /* We use seperate sockets for IPv4 and IPv6
+ * (see do_standalone_mode()) */
+ usbip_net_set_v6only(sock);
+
+ ret = bind(sock, ai->ai_addr, ai->ai_addrlen);
+ if (ret < 0) {
+ err("bind: %s: %d (%s)",
+ ai_buf, errno, strerror(errno));
+ close(sock);
+ continue;
+ }
+
+ ret = listen(sock, SOMAXCONN);
+ if (ret < 0) {
+ err("listen: %s: %d (%s)",
+ ai_buf, errno, strerror(errno));
+ close(sock);
+ continue;
+ }
+
+ info("listening on %s", ai_buf);
+ sockfdlist[nsockfd++] = sock;
+ }
+
+ return nsockfd;
+}
+
+static struct addrinfo *do_getaddrinfo(char *host, int ai_family)
+{
+ struct addrinfo hints, *ai_head;
+ int rc;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = ai_family;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_PASSIVE;
+
+ rc = getaddrinfo(host, usbip_port_string, &hints, &ai_head);
+ if (rc) {
+ err("failed to get a network address %s: %s", usbip_port_string,
+ gai_strerror(rc));
+ return NULL;
+ }
+
+ return ai_head;
+}
+
+static void signal_handler(int i)
+{
+ dbg("received '%s' signal", strsignal(i));
+}
+
+static void set_signal(void)
+{
+ struct sigaction act;
+
+ memset(&act, 0, sizeof(act));
+ act.sa_handler = signal_handler;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGTERM, &act, NULL);
+ sigaction(SIGINT, &act, NULL);
+ act.sa_handler = SIG_IGN;
+ sigaction(SIGCHLD, &act, NULL);
+}
+
+static const char *pid_file;
+
+static void write_pid_file(void)
+{
+ if (pid_file) {
+ dbg("creating pid file %s", pid_file);
+ FILE *fp;
+
+ fp = fopen(pid_file, "w");
+ if (!fp) {
+ err("pid_file: %s: %d (%s)",
+ pid_file, errno, strerror(errno));
+ return;
+ }
+ fprintf(fp, "%d\n", getpid());
+ fclose(fp);
+ }
+}
+
+static void remove_pid_file(void)
+{
+ if (pid_file) {
+ dbg("removing pid file %s", pid_file);
+ unlink(pid_file);
+ }
+}
+
+static int do_standalone_mode(int daemonize, int ipv4, int ipv6)
+{
+ struct addrinfo *ai_head;
+ int sockfdlist[MAXSOCKFD];
+ int nsockfd, family;
+ int i, terminate;
+ struct pollfd *fds;
+ struct timespec timeout;
+ sigset_t sigmask;
+
+ if (usbip_driver_open(driver))
+ return -1;
+
+ if (daemonize) {
+ if (daemon(0, 0) < 0) {
+ err("daemonizing failed: %s", strerror(errno));
+ usbip_driver_close(driver);
+ return -1;
+ }
+ umask(0);
+ usbip_use_syslog = 1;
+ }
+ set_signal();
+ write_pid_file();
+
+ info("starting " PROGNAME " (%s)", usbip_version_string);
+
+ /*
+ * To suppress warnings on systems with bindv6only disabled
+ * (default), we use seperate sockets for IPv6 and IPv4 and set
+ * IPV6_V6ONLY on the IPv6 sockets.
+ */
+ if (ipv4 && ipv6)
+ family = AF_UNSPEC;
+ else if (ipv4)
+ family = AF_INET;
+ else
+ family = AF_INET6;
+
+ ai_head = do_getaddrinfo(NULL, family);
+ if (!ai_head) {
+ usbip_driver_close(driver);
+ return -1;
+ }
+ nsockfd = listen_all_addrinfo(ai_head, sockfdlist,
+ sizeof(sockfdlist) / sizeof(*sockfdlist));
+ freeaddrinfo(ai_head);
+ if (nsockfd <= 0) {
+ err("failed to open a listening socket");
+ usbip_driver_close(driver);
+ return -1;
+ }
+
+ dbg("listening on %d address%s", nsockfd, (nsockfd == 1) ? "" : "es");
+
+ fds = calloc(nsockfd, sizeof(struct pollfd));
+ for (i = 0; i < nsockfd; i++) {
+ fds[i].fd = sockfdlist[i];
+ fds[i].events = POLLIN;
+ }
+ timeout.tv_sec = MAIN_LOOP_TIMEOUT;
+ timeout.tv_nsec = 0;
+
+ sigfillset(&sigmask);
+ sigdelset(&sigmask, SIGTERM);
+ sigdelset(&sigmask, SIGINT);
+
+ terminate = 0;
+ while (!terminate) {
+ int r;
+
+ r = ppoll(fds, nsockfd, &timeout, &sigmask);
+ if (r < 0) {
+ dbg("%s", strerror(errno));
+ terminate = 1;
+ } else if (r) {
+ for (i = 0; i < nsockfd; i++) {
+ if (fds[i].revents & POLLIN) {
+ dbg("read event on fd[%d]=%d",
+ i, sockfdlist[i]);
+ process_request(sockfdlist[i]);
+ }
+ }
+ } else {
+ dbg("heartbeat timeout on ppoll()");
+ }
+ }
+
+ info("shutting down " PROGNAME);
+ free(fds);
+ usbip_driver_close(driver);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "ipv4", no_argument, NULL, '4' },
+ { "ipv6", no_argument, NULL, '6' },
+ { "daemon", no_argument, NULL, 'D' },
+ { "daemon", no_argument, NULL, 'D' },
+ { "debug", no_argument, NULL, 'd' },
+ { "device", no_argument, NULL, 'e' },
+ { "pid", optional_argument, NULL, 'P' },
+ { "tcp-port", required_argument, NULL, 't' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'v' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ enum {
+ cmd_standalone_mode = 1,
+ cmd_help,
+ cmd_version
+ } cmd;
+
+ int daemonize = 0;
+ int ipv4 = 0, ipv6 = 0;
+ int opt, rc = -1;
+
+ pid_file = NULL;
+
+ usbip_use_stderr = 1;
+ usbip_use_syslog = 0;
+
+ if (geteuid() != 0)
+ err("not running as root?");
+
+ cmd = cmd_standalone_mode;
+ driver = &host_driver;
+ for (;;) {
+ opt = getopt_long(argc, argv, "46DdeP::t:hv", longopts, NULL);
+
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case '4':
+ ipv4 = 1;
+ break;
+ case '6':
+ ipv6 = 1;
+ break;
+ case 'D':
+ daemonize = 1;
+ break;
+ case 'd':
+ usbip_use_debug = 1;
+ break;
+ case 'h':
+ cmd = cmd_help;
+ break;
+ case 'P':
+ pid_file = optarg ? optarg : DEFAULT_PID_FILE;
+ break;
+ case 't':
+ usbip_setup_port_number(optarg);
+ break;
+ case 'v':
+ cmd = cmd_version;
+ break;
+ case 'e':
+ driver = &device_driver;
+ break;
+ case '?':
+ usbipd_help();
+ default:
+ goto err_out;
+ }
+ }
+
+ if (!ipv4 && !ipv6)
+ ipv4 = ipv6 = 1;
+
+ switch (cmd) {
+ case cmd_standalone_mode:
+ rc = do_standalone_mode(daemonize, ipv4, ipv6);
+ remove_pid_file();
+ break;
+ case cmd_version:
+ printf(PROGNAME " (%s)\n", usbip_version_string);
+ rc = 0;
+ break;
+ case cmd_help:
+ usbipd_help();
+ rc = 0;
+ break;
+ default:
+ usbipd_help();
+ goto err_out;
+ }
+
+err_out:
+ return (rc > -1 ? EXIT_SUCCESS : EXIT_FAILURE);
+}
diff --git a/tools/usb/usbip/src/utils.c b/tools/usb/usbip/src/utils.c
new file mode 100644
index 000000000..3d7b42e77
--- /dev/null
+++ b/tools/usb/usbip/src/utils.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "usbip_common.h"
+#include "utils.h"
+#include "sysfs_utils.h"
+
+int modify_match_busid(char *busid, int add)
+{
+ char attr_name[] = "match_busid";
+ char command[SYSFS_BUS_ID_SIZE + 4];
+ char match_busid_attr_path[SYSFS_PATH_MAX];
+ int rc;
+ int cmd_size;
+
+ snprintf(match_busid_attr_path, sizeof(match_busid_attr_path),
+ "%s/%s/%s/%s/%s/%s", SYSFS_MNT_PATH, SYSFS_BUS_NAME,
+ SYSFS_BUS_TYPE, SYSFS_DRIVERS_NAME, USBIP_HOST_DRV_NAME,
+ attr_name);
+
+ if (add)
+ cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "add %s",
+ busid);
+ else
+ cmd_size = snprintf(command, SYSFS_BUS_ID_SIZE + 4, "del %s",
+ busid);
+
+ rc = write_sysfs_attribute(match_busid_attr_path, command,
+ cmd_size);
+ if (rc < 0) {
+ dbg("failed to write match_busid: %s", strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/tools/usb/usbip/src/utils.h b/tools/usb/usbip/src/utils.h
new file mode 100644
index 000000000..5916fd3e0
--- /dev/null
+++ b/tools/usb/usbip/src/utils.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2011 matt mooney <mfm@muteddisk.com>
+ * 2005-2007 Takahiro Hirofuchi
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __UTILS_H
+#define __UTILS_H
+
+int modify_match_busid(char *busid, int add);
+
+#endif /* __UTILS_H */
+
diff --git a/tools/usb/usbip/vudc/vudc_server_example.sh b/tools/usb/usbip/vudc/vudc_server_example.sh
new file mode 100755
index 000000000..2736be64f
--- /dev/null
+++ b/tools/usb/usbip/vudc/vudc_server_example.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+################################################################################
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# For more information, please refer to <http://unlicense.org/>
+################################################################################
+
+################################################################################
+# This is a sample script which shows how to use vUDC with ConfigFS gadgets
+################################################################################
+
+# Stop script on error
+set -e
+
+################################################################################
+# Create your USB gadget
+# You may use bare ConfigFS interface (as below)
+# or libusbgx or gt toool
+# Instead of ConfigFS gadgets you may use any of legacy gadgets.
+################################################################################
+CONFIGFS_MOUNT_POINT="/sys/kernel/config"
+GADGET_NAME="g1"
+ID_VENDOR="0x1d6b"
+ID_PRODUCT="0x0104"
+
+cd ${CONFIGFS_MOUNT_POINT}/usb_gadget
+# Create a new USB gadget
+mkdir ${GADGET_NAME}
+cd ${GADGET_NAME}
+
+# This gadget contains one function - ACM (serial port over USB)
+FUNC_DIR="functions/acm.ser0"
+mkdir ${FUNC_DIR}
+
+# Just one configuration
+mkdir configs/c.1
+ln -s ${FUNC_DIR} configs/c.1
+
+# Set our gadget identity
+echo ${ID_VENDOR} > idVendor
+echo ${ID_PRODUCT} > idProduct
+
+################################################################################
+# Load vudc-module if vudc is not available
+# You may change value of num param to get more than one vUDC instance
+################################################################################
+[[ -d /sys/class/udc/usbip-vudc.0 ]] || modprobe usbip-vudc num=1
+
+################################################################################
+# Bind gadget to our vUDC
+# By default we bind to first one but you may change this if you would like
+# to use more than one instance
+################################################################################
+echo "usbip-vudc.0" > UDC
+
+################################################################################
+# Let's now run our usbip daemon in a USB device mode
+################################################################################
+usbipd --device &
+
+################################################################################
+# Now your USB gadget is available using USB/IP protocol.
+# To prepare your client, you should ensure that usbip-vhci module is inside
+# your kernel. If it's not then you can load it:
+#
+# $ modprobe usbip-vhci
+#
+# To check availability of your gadget you may try to list devices exported
+# on a remote server:
+#
+# $ modprobe usbip-vhci
+# $ usbip list -r $SERVER_IP
+# Exportable USB devices
+# ======================
+# usbipd: info: request 0x8005(6): complete
+# - 127.0.0.1
+# usbip-vudc.0: Linux Foundation : unknown product (1d6b:0104)
+# : /sys/devices/platform/usbip-vudc.0
+# : (Defined at Interface level) (00/00/00)
+#
+# To attach this device to your client you may use:
+#
+# $ usbip attach -r $SERVER_IP -d usbip-vudc.0
+#
+################################################################################
diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore
new file mode 100644
index 000000000..1cfbb0157
--- /dev/null
+++ b/tools/virtio/.gitignore
@@ -0,0 +1,3 @@
+*.d
+virtio_test
+vringh_test
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
new file mode 100644
index 000000000..8e2a90811
--- /dev/null
+++ b/tools/virtio/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+all: test mod
+test: virtio_test vringh_test
+virtio_test: virtio_ring.o virtio_test.o
+vringh_test: vringh_test.o vringh.o virtio_ring.o
+
+CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE
+vpath %.c ../../drivers/virtio ../../drivers/vhost
+mod:
+ ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V}
+.PHONY: all test mod clean
+clean:
+ ${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \
+ vhost_test/Module.symvers vhost_test/modules.order *.d
+-include *.d
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
new file mode 100644
index 000000000..d0351f83a
--- /dev/null
+++ b/tools/virtio/asm/barrier.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if defined(__i386__) || defined(__x86_64__)
+#define barrier() asm volatile("" ::: "memory")
+#define virt_mb() __sync_synchronize()
+#define virt_rmb() barrier()
+#define virt_wmb() barrier()
+/* Atomic store should be enough, but gcc generates worse code in that case. */
+#define virt_store_mb(var, value) do { \
+ typeof(var) virt_store_mb_value = (value); \
+ __atomic_exchange(&(var), &virt_store_mb_value, &virt_store_mb_value, \
+ __ATOMIC_SEQ_CST); \
+ barrier(); \
+} while (0);
+/* Weak barriers should be used. If not - it's a bug */
+# define mb() abort()
+# define dma_rmb() abort()
+# define dma_wmb() abort()
+#else
+#error Please fill in barrier macros
+#endif
+
diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h
new file mode 100644
index 000000000..b14c2c3b6
--- /dev/null
+++ b/tools/virtio/linux/bug.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef BUG_H
+#define BUG_H
+
+#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
+
+#define BUILD_BUG_ON(x)
+
+#define BUG() abort()
+
+#endif /* BUG_H */
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
new file mode 100644
index 000000000..903dc9c4b
--- /dev/null
+++ b/tools/virtio/linux/compiler.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_COMPILER_H
+#define LINUX_COMPILER_H
+
+#define WRITE_ONCE(var, val) \
+ (*((volatile typeof(val) *)(&(var))) = (val))
+
+#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var))))
+
+#endif
diff --git a/tools/virtio/linux/device.h b/tools/virtio/linux/device.h
new file mode 100644
index 000000000..4ad7e1df0
--- /dev/null
+++ b/tools/virtio/linux/device.h
@@ -0,0 +1,2 @@
+#ifndef LINUX_DEVICE_H
+#endif
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
new file mode 100644
index 000000000..f91aeb5fe
--- /dev/null
+++ b/tools/virtio/linux/dma-mapping.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_DMA_MAPPING_H
+#define _LINUX_DMA_MAPPING_H
+
+#ifdef CONFIG_HAS_DMA
+# error Virtio userspace code does not support CONFIG_HAS_DMA
+#endif
+
+enum dma_data_direction {
+ DMA_BIDIRECTIONAL = 0,
+ DMA_TO_DEVICE = 1,
+ DMA_FROM_DEVICE = 2,
+ DMA_NONE = 3,
+};
+
+#define dma_alloc_coherent(d, s, hp, f) ({ \
+ void *__dma_alloc_coherent_p = kmalloc((s), (f)); \
+ *(hp) = (unsigned long)__dma_alloc_coherent_p; \
+ __dma_alloc_coherent_p; \
+})
+
+#define dma_free_coherent(d, s, p, h) kfree(p)
+
+#define dma_map_page(d, p, o, s, dir) (page_to_phys(p) + (o))
+
+#define dma_map_single(d, p, s, dir) (virt_to_phys(p))
+#define dma_mapping_error(...) (0)
+
+#define dma_unmap_single(...) do { } while (0)
+#define dma_unmap_page(...) do { } while (0)
+
+#endif
diff --git a/tools/virtio/linux/err.h b/tools/virtio/linux/err.h
new file mode 100644
index 000000000..0943c644a
--- /dev/null
+++ b/tools/virtio/linux/err.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ERR_H
+#define ERR_H
+#define MAX_ERRNO 4095
+
+#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void * __must_check ERR_PTR(long error)
+{
+ return (void *) error;
+}
+
+static inline long __must_check PTR_ERR(const void *ptr)
+{
+ return (long) ptr;
+}
+
+static inline long __must_check IS_ERR(const void *ptr)
+{
+ return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline long __must_check IS_ERR_OR_NULL(const void *ptr)
+{
+ return !ptr || IS_ERR_VALUE((unsigned long)ptr);
+}
+#endif /* ERR_H */
diff --git a/tools/virtio/linux/export.h b/tools/virtio/linux/export.h
new file mode 100644
index 000000000..416875e29
--- /dev/null
+++ b/tools/virtio/linux/export.h
@@ -0,0 +1,3 @@
+#define EXPORT_SYMBOL_GPL(sym) extern typeof(sym) sym
+#define EXPORT_SYMBOL(sym) extern typeof(sym) sym
+
diff --git a/tools/virtio/linux/hrtimer.h b/tools/virtio/linux/hrtimer.h
new file mode 100644
index 000000000..e69de29bb
--- /dev/null
+++ b/tools/virtio/linux/hrtimer.h
diff --git a/tools/virtio/linux/irqreturn.h b/tools/virtio/linux/irqreturn.h
new file mode 100644
index 000000000..a3c4e7be7
--- /dev/null
+++ b/tools/virtio/linux/irqreturn.h
@@ -0,0 +1 @@
+#include "../../../include/linux/irqreturn.h"
diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h
new file mode 100644
index 000000000..7ef45a4a3
--- /dev/null
+++ b/tools/virtio/linux/kernel.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef KERNEL_H
+#define KERNEL_H
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <stdarg.h>
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/printk.h>
+#include <linux/bug.h>
+#include <errno.h>
+#include <unistd.h>
+#include <asm/barrier.h>
+
+#define CONFIG_SMP
+
+#define PAGE_SIZE getpagesize()
+#define PAGE_MASK (~(PAGE_SIZE-1))
+#define PAGE_ALIGN(x) ((x + PAGE_SIZE - 1) & PAGE_MASK)
+
+/* generic data direction definitions */
+#define READ 0
+#define WRITE 1
+
+typedef unsigned long long phys_addr_t;
+typedef unsigned long long dma_addr_t;
+typedef size_t __kernel_size_t;
+typedef unsigned int __wsum;
+
+struct page {
+ unsigned long long dummy;
+};
+
+/* Physical == Virtual */
+#define virt_to_phys(p) ((unsigned long)p)
+#define phys_to_virt(a) ((void *)(unsigned long)(a))
+/* Page address: Virtual / 4K */
+#define page_to_phys(p) ((dma_addr_t)(unsigned long)(p))
+#define virt_to_page(p) ((struct page *)((unsigned long)p & PAGE_MASK))
+
+#define offset_in_page(p) (((unsigned long)p) % PAGE_SIZE)
+
+#define __printf(a,b) __attribute__((format(printf,a,b)))
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+
+extern void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
+static inline void *kmalloc(size_t s, gfp_t gfp)
+{
+ if (__kmalloc_fake)
+ return __kmalloc_fake;
+ return malloc(s);
+}
+static inline void *kmalloc_array(unsigned n, size_t s, gfp_t gfp)
+{
+ return kmalloc(n * s, gfp);
+}
+
+static inline void *kzalloc(size_t s, gfp_t gfp)
+{
+ void *p = kmalloc(s, gfp);
+
+ memset(p, 0, s);
+ return p;
+}
+
+static inline void *alloc_pages_exact(size_t s, gfp_t gfp)
+{
+ return kmalloc(s, gfp);
+}
+
+static inline void kfree(void *p)
+{
+ if (p >= __kfree_ignore_start && p < __kfree_ignore_end)
+ return;
+ free(p);
+}
+
+static inline void free_pages_exact(void *p, size_t s)
+{
+ kfree(p);
+}
+
+static inline void *krealloc(void *p, size_t s, gfp_t gfp)
+{
+ return realloc(p, s);
+}
+
+
+static inline unsigned long __get_free_page(gfp_t gfp)
+{
+ void *p;
+
+ posix_memalign(&p, PAGE_SIZE, PAGE_SIZE);
+ return (unsigned long)p;
+}
+
+static inline void free_page(unsigned long addr)
+{
+ free((void *)addr);
+}
+
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type,member) );})
+
+#define uninitialized_var(x) x = x
+
+# ifndef likely
+# define likely(x) (__builtin_expect(!!(x), 1))
+# endif
+# ifndef unlikely
+# define unlikely(x) (__builtin_expect(!!(x), 0))
+# endif
+
+#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#ifdef DEBUG
+#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#else
+#define pr_debug(format, ...) do {} while (0)
+#endif
+#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
+
+#define WARN_ON_ONCE(cond) ((cond) ? fprintf (stderr, "WARNING\n") : 0)
+
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+
+/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
+#define list_add_tail(a, b) do {} while (0)
+#define list_del(a) do {} while (0)
+#define list_for_each_entry(a, b, c) while (0)
+/* end of stubs */
+
+#endif /* KERNEL_H */
diff --git a/tools/virtio/linux/kmemleak.h b/tools/virtio/linux/kmemleak.h
new file mode 100644
index 000000000..c07072270
--- /dev/null
+++ b/tools/virtio/linux/kmemleak.h
@@ -0,0 +1,3 @@
+static inline void kmemleak_ignore(const void *ptr)
+{
+}
diff --git a/tools/virtio/linux/module.h b/tools/virtio/linux/module.h
new file mode 100644
index 000000000..9dfa96fea
--- /dev/null
+++ b/tools/virtio/linux/module.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/export.h>
+
+#define MODULE_LICENSE(__MODULE_LICENSE_value) \
+ static __attribute__((unused)) const char *__MODULE_LICENSE_name = \
+ __MODULE_LICENSE_value
+
diff --git a/tools/virtio/linux/printk.h b/tools/virtio/linux/printk.h
new file mode 100644
index 000000000..9f2423bd8
--- /dev/null
+++ b/tools/virtio/linux/printk.h
@@ -0,0 +1,4 @@
+#include "../../../include/linux/kern_levels.h"
+
+#define printk printf
+#define vprintk vprintf
diff --git a/tools/virtio/linux/ratelimit.h b/tools/virtio/linux/ratelimit.h
new file mode 100644
index 000000000..dcce1725f
--- /dev/null
+++ b/tools/virtio/linux/ratelimit.h
@@ -0,0 +1,4 @@
+#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) int name = 0
+
+#define __ratelimit(x) (*(x))
+
diff --git a/tools/virtio/linux/scatterlist.h b/tools/virtio/linux/scatterlist.h
new file mode 100644
index 000000000..369ee308b
--- /dev/null
+++ b/tools/virtio/linux/scatterlist.h
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef SCATTERLIST_H
+#define SCATTERLIST_H
+#include <linux/kernel.h>
+
+struct scatterlist {
+ unsigned long page_link;
+ unsigned int offset;
+ unsigned int length;
+ dma_addr_t dma_address;
+};
+
+/* Scatterlist helpers, stolen from linux/scatterlist.h */
+#define sg_is_chain(sg) ((sg)->page_link & 0x01)
+#define sg_is_last(sg) ((sg)->page_link & 0x02)
+#define sg_chain_ptr(sg) \
+ ((struct scatterlist *) ((sg)->page_link & ~0x03))
+
+/**
+ * sg_assign_page - Assign a given page to an SG entry
+ * @sg: SG entry
+ * @page: The page
+ *
+ * Description:
+ * Assign page to sg entry. Also see sg_set_page(), the most commonly used
+ * variant.
+ *
+ **/
+static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
+{
+ unsigned long page_link = sg->page_link & 0x3;
+
+ /*
+ * In order for the low bit stealing approach to work, pages
+ * must be aligned at a 32-bit boundary as a minimum.
+ */
+ BUG_ON((unsigned long) page & 0x03);
+#ifdef CONFIG_DEBUG_SG
+ BUG_ON(sg_is_chain(sg));
+#endif
+ sg->page_link = page_link | (unsigned long) page;
+}
+
+/**
+ * sg_set_page - Set sg entry to point at given page
+ * @sg: SG entry
+ * @page: The page
+ * @len: Length of data
+ * @offset: Offset into page
+ *
+ * Description:
+ * Use this function to set an sg entry pointing at a page, never assign
+ * the page directly. We encode sg table information in the lower bits
+ * of the page pointer. See sg_page() for looking up the page belonging
+ * to an sg entry.
+ *
+ **/
+static inline void sg_set_page(struct scatterlist *sg, struct page *page,
+ unsigned int len, unsigned int offset)
+{
+ sg_assign_page(sg, page);
+ sg->offset = offset;
+ sg->length = len;
+}
+
+static inline struct page *sg_page(struct scatterlist *sg)
+{
+#ifdef CONFIG_DEBUG_SG
+ BUG_ON(sg_is_chain(sg));
+#endif
+ return (struct page *)((sg)->page_link & ~0x3);
+}
+
+/*
+ * Loop over each sg element, following the pointer to a new list if necessary
+ */
+#define for_each_sg(sglist, sg, nr, __i) \
+ for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
+
+/**
+ * sg_chain - Chain two sglists together
+ * @prv: First scatterlist
+ * @prv_nents: Number of entries in prv
+ * @sgl: Second scatterlist
+ *
+ * Description:
+ * Links @prv@ and @sgl@ together, to form a longer scatterlist.
+ *
+ **/
+static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
+ struct scatterlist *sgl)
+{
+ /*
+ * offset and length are unused for chain entry. Clear them.
+ */
+ prv[prv_nents - 1].offset = 0;
+ prv[prv_nents - 1].length = 0;
+
+ /*
+ * Set lowest bit to indicate a link pointer, and make sure to clear
+ * the termination bit if it happens to be set.
+ */
+ prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02;
+}
+
+/**
+ * sg_mark_end - Mark the end of the scatterlist
+ * @sg: SG entryScatterlist
+ *
+ * Description:
+ * Marks the passed in sg entry as the termination point for the sg
+ * table. A call to sg_next() on this entry will return NULL.
+ *
+ **/
+static inline void sg_mark_end(struct scatterlist *sg)
+{
+ /*
+ * Set termination bit, clear potential chain bit
+ */
+ sg->page_link |= 0x02;
+ sg->page_link &= ~0x01;
+}
+
+/**
+ * sg_unmark_end - Undo setting the end of the scatterlist
+ * @sg: SG entryScatterlist
+ *
+ * Description:
+ * Removes the termination marker from the given entry of the scatterlist.
+ *
+ **/
+static inline void sg_unmark_end(struct scatterlist *sg)
+{
+ sg->page_link &= ~0x02;
+}
+
+static inline struct scatterlist *sg_next(struct scatterlist *sg)
+{
+ if (sg_is_last(sg))
+ return NULL;
+
+ sg++;
+ if (unlikely(sg_is_chain(sg)))
+ sg = sg_chain_ptr(sg);
+
+ return sg;
+}
+
+static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
+{
+ memset(sgl, 0, sizeof(*sgl) * nents);
+ sg_mark_end(&sgl[nents - 1]);
+}
+
+static inline dma_addr_t sg_phys(struct scatterlist *sg)
+{
+ return page_to_phys(sg_page(sg)) + sg->offset;
+}
+
+static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
+ unsigned int buflen)
+{
+ sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
+}
+
+static inline void sg_init_one(struct scatterlist *sg,
+ const void *buf, unsigned int buflen)
+{
+ sg_init_table(sg, 1);
+ sg_set_buf(sg, buf, buflen);
+}
+#endif /* SCATTERLIST_H */
diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h
new file mode 100644
index 000000000..319dcaa07
--- /dev/null
+++ b/tools/virtio/linux/slab.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_SLAB_H
+#define GFP_KERNEL 0
+#define GFP_ATOMIC 0
+#define __GFP_NOWARN 0
+#define __GFP_ZERO 0
+#endif
diff --git a/tools/virtio/linux/thread_info.h b/tools/virtio/linux/thread_info.h
new file mode 100644
index 000000000..e0f610d08
--- /dev/null
+++ b/tools/virtio/linux/thread_info.h
@@ -0,0 +1 @@
+#define check_copy_size(A, B, C) (1)
diff --git a/tools/virtio/linux/uaccess.h b/tools/virtio/linux/uaccess.h
new file mode 100644
index 000000000..991dfb263
--- /dev/null
+++ b/tools/virtio/linux/uaccess.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef UACCESS_H
+#define UACCESS_H
+
+#include <linux/compiler.h>
+
+extern void *__user_addr_min, *__user_addr_max;
+
+static inline void __chk_user_ptr(const volatile void *p, size_t size)
+{
+ assert(p >= __user_addr_min && p + size <= __user_addr_max);
+}
+
+#define put_user(x, ptr) \
+({ \
+ typeof(ptr) __pu_ptr = (ptr); \
+ __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \
+ WRITE_ONCE(*(__pu_ptr), x); \
+ 0; \
+})
+
+#define get_user(x, ptr) \
+({ \
+ typeof(ptr) __pu_ptr = (ptr); \
+ __chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \
+ x = READ_ONCE(*(__pu_ptr)); \
+ 0; \
+})
+
+static void volatile_memcpy(volatile char *to, const volatile char *from,
+ unsigned long n)
+{
+ while (n--)
+ *(to++) = *(from++);
+}
+
+static inline int copy_from_user(void *to, const void __user volatile *from,
+ unsigned long n)
+{
+ __chk_user_ptr(from, n);
+ volatile_memcpy(to, from, n);
+ return 0;
+}
+
+static inline int copy_to_user(void __user volatile *to, const void *from,
+ unsigned long n)
+{
+ __chk_user_ptr(to, n);
+ volatile_memcpy(to, from, n);
+ return 0;
+}
+#endif /* UACCESS_H */
diff --git a/tools/virtio/linux/uio.h b/tools/virtio/linux/uio.h
new file mode 100644
index 000000000..cd20f0ba3
--- /dev/null
+++ b/tools/virtio/linux/uio.h
@@ -0,0 +1,3 @@
+#include <linux/kernel.h>
+
+#include "../../../include/linux/uio.h"
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
new file mode 100644
index 000000000..b751350d4
--- /dev/null
+++ b/tools/virtio/linux/virtio.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef LINUX_VIRTIO_H
+#define LINUX_VIRTIO_H
+#include <linux/scatterlist.h>
+#include <linux/kernel.h>
+
+struct device {
+ void *parent;
+};
+
+struct virtio_device {
+ struct device dev;
+ u64 features;
+};
+
+struct virtqueue {
+ /* TODO: commented as list macros are empty stubs for now.
+ * Broken but enough for virtio_ring.c
+ * struct list_head list; */
+ void (*callback)(struct virtqueue *vq);
+ const char *name;
+ struct virtio_device *vdev;
+ unsigned int index;
+ unsigned int num_free;
+ void *priv;
+};
+
+/* Interfaces exported by virtio_ring. */
+int virtqueue_add_sgs(struct virtqueue *vq,
+ struct scatterlist *sgs[],
+ unsigned int out_sgs,
+ unsigned int in_sgs,
+ void *data,
+ gfp_t gfp);
+
+int virtqueue_add_outbuf(struct virtqueue *vq,
+ struct scatterlist sg[], unsigned int num,
+ void *data,
+ gfp_t gfp);
+
+int virtqueue_add_inbuf(struct virtqueue *vq,
+ struct scatterlist sg[], unsigned int num,
+ void *data,
+ gfp_t gfp);
+
+bool virtqueue_kick(struct virtqueue *vq);
+
+void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
+
+void virtqueue_disable_cb(struct virtqueue *vq);
+
+bool virtqueue_enable_cb(struct virtqueue *vq);
+bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
+
+void *virtqueue_detach_unused_buf(struct virtqueue *vq);
+struct virtqueue *vring_new_virtqueue(unsigned int index,
+ unsigned int num,
+ unsigned int vring_align,
+ struct virtio_device *vdev,
+ bool weak_barriers,
+ bool ctx,
+ void *pages,
+ bool (*notify)(struct virtqueue *vq),
+ void (*callback)(struct virtqueue *vq),
+ const char *name);
+void vring_del_virtqueue(struct virtqueue *vq);
+
+#endif
diff --git a/tools/virtio/linux/virtio_byteorder.h b/tools/virtio/linux/virtio_byteorder.h
new file mode 100644
index 000000000..5b50f7eeb
--- /dev/null
+++ b/tools/virtio/linux/virtio_byteorder.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_VIRTIO_BYTEORDER_STUB_H
+#define _LINUX_VIRTIO_BYTEORDER_STUB_H
+
+#include <asm/byteorder.h>
+#include "../../include/linux/byteorder/generic.h"
+#include "../../include/linux/virtio_byteorder.h"
+
+#endif
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
new file mode 100644
index 000000000..dbf14c1e2
--- /dev/null
+++ b/tools/virtio/linux/virtio_config.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/virtio_byteorder.h>
+#include <linux/virtio.h>
+#include <uapi/linux/virtio_config.h>
+
+/*
+ * __virtio_test_bit - helper to test feature bits. For use by transports.
+ * Devices should normally use virtio_has_feature,
+ * which includes more checks.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline bool __virtio_test_bit(const struct virtio_device *vdev,
+ unsigned int fbit)
+{
+ return vdev->features & (1ULL << fbit);
+}
+
+/**
+ * __virtio_set_bit - helper to set feature bits. For use by transports.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline void __virtio_set_bit(struct virtio_device *vdev,
+ unsigned int fbit)
+{
+ vdev->features |= (1ULL << fbit);
+}
+
+/**
+ * __virtio_clear_bit - helper to clear feature bits. For use by transports.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline void __virtio_clear_bit(struct virtio_device *vdev,
+ unsigned int fbit)
+{
+ vdev->features &= ~(1ULL << fbit);
+}
+
+#define virtio_has_feature(dev, feature) \
+ (__virtio_test_bit((dev), feature))
+
+/**
+ * virtio_has_iommu_quirk - determine whether this device has the iommu quirk
+ * @vdev: the device
+ */
+static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
+{
+ /*
+ * Note the reverse polarity of the quirk feature (compared to most
+ * other features), this is for compatibility with legacy systems.
+ */
+ return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+}
+
+static inline bool virtio_is_little_endian(struct virtio_device *vdev)
+{
+ return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) ||
+ virtio_legacy_is_little_endian();
+}
+
+/* Memory accessors */
+static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val)
+{
+ return __virtio16_to_cpu(virtio_is_little_endian(vdev), val);
+}
+
+static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val)
+{
+ return __cpu_to_virtio16(virtio_is_little_endian(vdev), val);
+}
+
+static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val)
+{
+ return __virtio32_to_cpu(virtio_is_little_endian(vdev), val);
+}
+
+static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val)
+{
+ return __cpu_to_virtio32(virtio_is_little_endian(vdev), val);
+}
+
+static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val)
+{
+ return __virtio64_to_cpu(virtio_is_little_endian(vdev), val);
+}
+
+static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
+{
+ return __cpu_to_virtio64(virtio_is_little_endian(vdev), val);
+}
diff --git a/tools/virtio/linux/virtio_ring.h b/tools/virtio/linux/virtio_ring.h
new file mode 100644
index 000000000..8949c4e27
--- /dev/null
+++ b/tools/virtio/linux/virtio_ring.h
@@ -0,0 +1 @@
+#include "../../../include/linux/virtio_ring.h"
diff --git a/tools/virtio/linux/vringh.h b/tools/virtio/linux/vringh.h
new file mode 100644
index 000000000..9348957be
--- /dev/null
+++ b/tools/virtio/linux/vringh.h
@@ -0,0 +1 @@
+#include "../../../include/linux/vringh.h"
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
new file mode 100644
index 000000000..85c98c281
--- /dev/null
+++ b/tools/virtio/ringtest/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+all:
+
+all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring noring
+
+CFLAGS += -Wall
+CFLAGS += -pthread -O2 -ggdb -flto -fwhole-program
+LDFLAGS += -pthread -O2 -ggdb -flto -fwhole-program
+
+main.o: main.c main.h
+ring.o: ring.c main.h
+ptr_ring.o: ptr_ring.c main.h ../../../include/linux/ptr_ring.h
+virtio_ring_0_9.o: virtio_ring_0_9.c main.h
+virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
+virtio_ring_inorder.o: virtio_ring_inorder.c virtio_ring_0_9.c main.h
+ring: ring.o main.o
+virtio_ring_0_9: virtio_ring_0_9.o main.o
+virtio_ring_poll: virtio_ring_poll.o main.o
+virtio_ring_inorder: virtio_ring_inorder.o main.o
+ptr_ring: ptr_ring.o main.o
+noring: noring.o main.o
+clean:
+ -rm main.o
+ -rm ring.o ring
+ -rm virtio_ring_0_9.o virtio_ring_0_9
+ -rm virtio_ring_poll.o virtio_ring_poll
+ -rm virtio_ring_inorder.o virtio_ring_inorder
+ -rm ptr_ring.o ptr_ring
+ -rm noring.o noring
+
+.PHONY: all clean
diff --git a/tools/virtio/ringtest/README b/tools/virtio/ringtest/README
new file mode 100644
index 000000000..d83707a33
--- /dev/null
+++ b/tools/virtio/ringtest/README
@@ -0,0 +1,6 @@
+Partial implementation of various ring layouts, useful to tune virtio design.
+Uses shared memory heavily.
+
+Typical use:
+
+# sh run-on-all.sh perf stat -r 10 --log-fd 1 -- ./ring
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
new file mode 100644
index 000000000..453ca3c21
--- /dev/null
+++ b/tools/virtio/ringtest/main.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Command line processing and common functions for ring benchmarking.
+ */
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <pthread.h>
+#include <assert.h>
+#include <sched.h>
+#include "main.h"
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <limits.h>
+
+int runcycles = 10000000;
+int max_outstanding = INT_MAX;
+int batch = 1;
+int param = 0;
+
+bool do_sleep = false;
+bool do_relax = false;
+bool do_exit = true;
+
+unsigned ring_size = 256;
+
+static int kickfd = -1;
+static int callfd = -1;
+
+void notify(int fd)
+{
+ unsigned long long v = 1;
+ int r;
+
+ vmexit();
+ r = write(fd, &v, sizeof v);
+ assert(r == sizeof v);
+ vmentry();
+}
+
+void wait_for_notify(int fd)
+{
+ unsigned long long v = 1;
+ int r;
+
+ vmexit();
+ r = read(fd, &v, sizeof v);
+ assert(r == sizeof v);
+ vmentry();
+}
+
+void kick(void)
+{
+ notify(kickfd);
+}
+
+void wait_for_kick(void)
+{
+ wait_for_notify(kickfd);
+}
+
+void call(void)
+{
+ notify(callfd);
+}
+
+void wait_for_call(void)
+{
+ wait_for_notify(callfd);
+}
+
+void set_affinity(const char *arg)
+{
+ cpu_set_t cpuset;
+ int ret;
+ pthread_t self;
+ long int cpu;
+ char *endptr;
+
+ if (!arg)
+ return;
+
+ cpu = strtol(arg, &endptr, 0);
+ assert(!*endptr);
+
+ assert(cpu >= 0 && cpu < CPU_SETSIZE);
+
+ self = pthread_self();
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+
+ ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
+ assert(!ret);
+}
+
+void poll_used(void)
+{
+ while (used_empty())
+ busy_wait();
+}
+
+static void __attribute__((__flatten__)) run_guest(void)
+{
+ int completed_before;
+ int completed = 0;
+ int started = 0;
+ int bufs = runcycles;
+ int spurious = 0;
+ int r;
+ unsigned len;
+ void *buf;
+ int tokick = batch;
+
+ for (;;) {
+ if (do_sleep)
+ disable_call();
+ completed_before = completed;
+ do {
+ if (started < bufs &&
+ started - completed < max_outstanding) {
+ r = add_inbuf(0, "Buffer\n", "Hello, world!");
+ if (__builtin_expect(r == 0, true)) {
+ ++started;
+ if (!--tokick) {
+ tokick = batch;
+ if (do_sleep)
+ kick_available();
+ }
+
+ }
+ } else
+ r = -1;
+
+ /* Flush out completed bufs if any */
+ if (get_buf(&len, &buf)) {
+ ++completed;
+ if (__builtin_expect(completed == bufs, false))
+ return;
+ r = 0;
+ }
+ } while (r == 0);
+ if (completed == completed_before)
+ ++spurious;
+ assert(completed <= bufs);
+ assert(started <= bufs);
+ if (do_sleep) {
+ if (used_empty() && enable_call())
+ wait_for_call();
+ } else {
+ poll_used();
+ }
+ }
+}
+
+void poll_avail(void)
+{
+ while (avail_empty())
+ busy_wait();
+}
+
+static void __attribute__((__flatten__)) run_host(void)
+{
+ int completed_before;
+ int completed = 0;
+ int spurious = 0;
+ int bufs = runcycles;
+ unsigned len;
+ void *buf;
+
+ for (;;) {
+ if (do_sleep) {
+ if (avail_empty() && enable_kick())
+ wait_for_kick();
+ } else {
+ poll_avail();
+ }
+ if (do_sleep)
+ disable_kick();
+ completed_before = completed;
+ while (__builtin_expect(use_buf(&len, &buf), true)) {
+ if (do_sleep)
+ call_used();
+ ++completed;
+ if (__builtin_expect(completed == bufs, false))
+ return;
+ }
+ if (completed == completed_before)
+ ++spurious;
+ assert(completed <= bufs);
+ if (completed == bufs)
+ break;
+ }
+}
+
+void *start_guest(void *arg)
+{
+ set_affinity(arg);
+ run_guest();
+ pthread_exit(NULL);
+}
+
+void *start_host(void *arg)
+{
+ set_affinity(arg);
+ run_host();
+ pthread_exit(NULL);
+}
+
+static const char optstring[] = "";
+static const struct option longopts[] = {
+ {
+ .name = "help",
+ .has_arg = no_argument,
+ .val = 'h',
+ },
+ {
+ .name = "host-affinity",
+ .has_arg = required_argument,
+ .val = 'H',
+ },
+ {
+ .name = "guest-affinity",
+ .has_arg = required_argument,
+ .val = 'G',
+ },
+ {
+ .name = "ring-size",
+ .has_arg = required_argument,
+ .val = 'R',
+ },
+ {
+ .name = "run-cycles",
+ .has_arg = required_argument,
+ .val = 'C',
+ },
+ {
+ .name = "outstanding",
+ .has_arg = required_argument,
+ .val = 'o',
+ },
+ {
+ .name = "batch",
+ .has_arg = required_argument,
+ .val = 'b',
+ },
+ {
+ .name = "param",
+ .has_arg = required_argument,
+ .val = 'p',
+ },
+ {
+ .name = "sleep",
+ .has_arg = no_argument,
+ .val = 's',
+ },
+ {
+ .name = "relax",
+ .has_arg = no_argument,
+ .val = 'x',
+ },
+ {
+ .name = "exit",
+ .has_arg = no_argument,
+ .val = 'e',
+ },
+ {
+ }
+};
+
+static void help(void)
+{
+ fprintf(stderr, "Usage: <test> [--help]"
+ " [--host-affinity H]"
+ " [--guest-affinity G]"
+ " [--ring-size R (default: %d)]"
+ " [--run-cycles C (default: %d)]"
+ " [--batch b]"
+ " [--outstanding o]"
+ " [--param p]"
+ " [--sleep]"
+ " [--relax]"
+ " [--exit]"
+ "\n",
+ ring_size,
+ runcycles);
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+ pthread_t host, guest;
+ void *tret;
+ char *host_arg = NULL;
+ char *guest_arg = NULL;
+ char *endptr;
+ long int c;
+
+ kickfd = eventfd(0, 0);
+ assert(kickfd >= 0);
+ callfd = eventfd(0, 0);
+ assert(callfd >= 0);
+
+ for (;;) {
+ int o = getopt_long(argc, argv, optstring, longopts, NULL);
+ switch (o) {
+ case -1:
+ goto done;
+ case '?':
+ help();
+ exit(2);
+ case 'H':
+ host_arg = optarg;
+ break;
+ case 'G':
+ guest_arg = optarg;
+ break;
+ case 'R':
+ ring_size = strtol(optarg, &endptr, 0);
+ assert(ring_size && !(ring_size & (ring_size - 1)));
+ assert(!*endptr);
+ break;
+ case 'C':
+ c = strtol(optarg, &endptr, 0);
+ assert(!*endptr);
+ assert(c > 0 && c < INT_MAX);
+ runcycles = c;
+ break;
+ case 'o':
+ c = strtol(optarg, &endptr, 0);
+ assert(!*endptr);
+ assert(c > 0 && c < INT_MAX);
+ max_outstanding = c;
+ break;
+ case 'p':
+ c = strtol(optarg, &endptr, 0);
+ assert(!*endptr);
+ assert(c > 0 && c < INT_MAX);
+ param = c;
+ break;
+ case 'b':
+ c = strtol(optarg, &endptr, 0);
+ assert(!*endptr);
+ assert(c > 0 && c < INT_MAX);
+ batch = c;
+ break;
+ case 's':
+ do_sleep = true;
+ break;
+ case 'x':
+ do_relax = true;
+ break;
+ case 'e':
+ do_exit = true;
+ break;
+ default:
+ help();
+ exit(4);
+ break;
+ }
+ }
+
+ /* does nothing here, used to make sure all smp APIs compile */
+ smp_acquire();
+ smp_release();
+ smp_mb();
+done:
+
+ if (batch > max_outstanding)
+ batch = max_outstanding;
+
+ if (optind < argc) {
+ help();
+ exit(4);
+ }
+ alloc_ring();
+
+ ret = pthread_create(&host, NULL, start_host, host_arg);
+ assert(!ret);
+ ret = pthread_create(&guest, NULL, start_guest, guest_arg);
+ assert(!ret);
+
+ ret = pthread_join(guest, &tret);
+ assert(!ret);
+ ret = pthread_join(host, &tret);
+ assert(!ret);
+ return 0;
+}
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
new file mode 100644
index 000000000..301d59bfc
--- /dev/null
+++ b/tools/virtio/ringtest/main.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Common macros and functions for ring benchmarking.
+ */
+#ifndef MAIN_H
+#define MAIN_H
+
+#include <stdbool.h>
+
+extern int param;
+
+extern bool do_exit;
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "x86intrin.h"
+
+static inline void wait_cycles(unsigned long long cycles)
+{
+ unsigned long long t;
+
+ t = __rdtsc();
+ while (__rdtsc() - t < cycles) {}
+}
+
+#define VMEXIT_CYCLES 500
+#define VMENTRY_CYCLES 500
+
+#elif defined(__s390x__)
+static inline void wait_cycles(unsigned long long cycles)
+{
+ asm volatile("0: brctg %0,0b" : : "d" (cycles));
+}
+
+/* tweak me */
+#define VMEXIT_CYCLES 200
+#define VMENTRY_CYCLES 200
+
+#else
+static inline void wait_cycles(unsigned long long cycles)
+{
+ _Exit(5);
+}
+#define VMEXIT_CYCLES 0
+#define VMENTRY_CYCLES 0
+#endif
+
+static inline void vmexit(void)
+{
+ if (!do_exit)
+ return;
+
+ wait_cycles(VMEXIT_CYCLES);
+}
+static inline void vmentry(void)
+{
+ if (!do_exit)
+ return;
+
+ wait_cycles(VMENTRY_CYCLES);
+}
+
+/* implemented by ring */
+void alloc_ring(void);
+/* guest side */
+int add_inbuf(unsigned, void *, void *);
+void *get_buf(unsigned *, void **);
+void disable_call();
+bool used_empty();
+bool enable_call();
+void kick_available();
+/* host side */
+void disable_kick();
+bool avail_empty();
+bool enable_kick();
+bool use_buf(unsigned *, void **);
+void call_used();
+
+/* implemented by main */
+extern bool do_sleep;
+void kick(void);
+void wait_for_kick(void);
+void call(void);
+void wait_for_call(void);
+
+extern unsigned ring_size;
+
+/* Compiler barrier - similar to what Linux uses */
+#define barrier() asm volatile("" ::: "memory")
+
+/* Is there a portable way to do this? */
+#if defined(__x86_64__) || defined(__i386__)
+#define cpu_relax() asm ("rep; nop" ::: "memory")
+#elif defined(__s390x__)
+#define cpu_relax() barrier()
+#else
+#define cpu_relax() assert(0)
+#endif
+
+extern bool do_relax;
+
+static inline void busy_wait(void)
+{
+ if (do_relax)
+ cpu_relax();
+ else
+ /* prevent compiler from removing busy loops */
+ barrier();
+}
+
+#if defined(__x86_64__) || defined(__i386__)
+#define smp_mb() asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
+#else
+/*
+ * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
+ * with other __ATOMIC_SEQ_CST calls.
+ */
+#define smp_mb() __sync_synchronize()
+#endif
+
+/*
+ * This abuses the atomic builtins for thread fences, and
+ * adds a compiler barrier.
+ */
+#define smp_release() do { \
+ barrier(); \
+ __atomic_thread_fence(__ATOMIC_RELEASE); \
+} while (0)
+
+#define smp_acquire() do { \
+ __atomic_thread_fence(__ATOMIC_ACQUIRE); \
+ barrier(); \
+} while (0)
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
+#define smp_wmb() barrier()
+#else
+#define smp_wmb() smp_release()
+#endif
+
+#ifdef __alpha__
+#define smp_read_barrier_depends() smp_acquire()
+#else
+#define smp_read_barrier_depends() do {} while(0)
+#endif
+
+static __always_inline
+void __read_once_size(const volatile void *p, void *res, int size)
+{
+ switch (size) { \
+ case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break; \
+ case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break; \
+ case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break; \
+ case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break; \
+ default: \
+ barrier(); \
+ __builtin_memcpy((void *)res, (const void *)p, size); \
+ barrier(); \
+ } \
+}
+
+static __always_inline void __write_once_size(volatile void *p, void *res, int size)
+{
+ switch (size) {
+ case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break;
+ case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break;
+ case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break;
+ case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break;
+ default:
+ barrier();
+ __builtin_memcpy((void *)p, (const void *)res, size);
+ barrier();
+ }
+}
+
+#define READ_ONCE(x) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u; \
+ __read_once_size(&(x), __u.__c, sizeof(x)); \
+ smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \
+ __u.__val; \
+})
+
+#define WRITE_ONCE(x, val) \
+({ \
+ union { typeof(x) __val; char __c[1]; } __u = \
+ { .__val = (typeof(x)) (val) }; \
+ __write_once_size(&(x), __u.__c, sizeof(x)); \
+ __u.__val; \
+})
+
+#endif
diff --git a/tools/virtio/ringtest/noring.c b/tools/virtio/ringtest/noring.c
new file mode 100644
index 000000000..ce2440d5c
--- /dev/null
+++ b/tools/virtio/ringtest/noring.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include "main.h"
+#include <assert.h>
+
+/* stub implementation: useful for measuring overhead */
+void alloc_ring(void)
+{
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+ return 0;
+}
+
+/*
+ * skb_array API provides no way for producer to find out whether a given
+ * buffer was consumed. Our tests merely require that a successful get_buf
+ * implies that add_inbuf succeed in the past, and that add_inbuf will succeed,
+ * fake it accordingly.
+ */
+void *get_buf(unsigned *lenp, void **bufp)
+{
+ return "Buffer";
+}
+
+bool used_empty()
+{
+ return false;
+}
+
+void disable_call()
+{
+ assert(0);
+}
+
+bool enable_call()
+{
+ assert(0);
+}
+
+void kick_available(void)
+{
+ assert(0);
+}
+
+/* host side */
+void disable_kick()
+{
+ assert(0);
+}
+
+bool enable_kick()
+{
+ assert(0);
+}
+
+bool avail_empty()
+{
+ return false;
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+ return true;
+}
+
+void call_used(void)
+{
+ assert(0);
+}
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
new file mode 100644
index 000000000..2d566fbd2
--- /dev/null
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <pthread.h>
+#include <malloc.h>
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+
+#define SMP_CACHE_BYTES 64
+#define cache_line_size() SMP_CACHE_BYTES
+#define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES)))
+#define unlikely(x) (__builtin_expect(!!(x), 0))
+#define likely(x) (__builtin_expect(!!(x), 1))
+#define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a))
+#define SIZE_MAX (~(size_t)0)
+#define KMALLOC_MAX_SIZE SIZE_MAX
+#define BUG_ON(x) assert(x)
+
+typedef pthread_spinlock_t spinlock_t;
+
+typedef int gfp_t;
+#define __GFP_ZERO 0x1
+
+static void *kmalloc(unsigned size, gfp_t gfp)
+{
+ void *p = memalign(64, size);
+ if (!p)
+ return p;
+
+ if (gfp & __GFP_ZERO)
+ memset(p, 0, size);
+ return p;
+}
+
+static inline void *kzalloc(unsigned size, gfp_t flags)
+{
+ return kmalloc(size, flags | __GFP_ZERO);
+}
+
+static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
+{
+ if (size != 0 && n > SIZE_MAX / size)
+ return NULL;
+ return kmalloc(n * size, flags);
+}
+
+static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
+{
+ return kmalloc_array(n, size, flags | __GFP_ZERO);
+}
+
+static void kfree(void *p)
+{
+ if (p)
+ free(p);
+}
+
+#define kvmalloc_array kmalloc_array
+#define kvfree kfree
+
+static void spin_lock_init(spinlock_t *lock)
+{
+ int r = pthread_spin_init(lock, 0);
+ assert(!r);
+}
+
+static void spin_lock(spinlock_t *lock)
+{
+ int ret = pthread_spin_lock(lock);
+ assert(!ret);
+}
+
+static void spin_unlock(spinlock_t *lock)
+{
+ int ret = pthread_spin_unlock(lock);
+ assert(!ret);
+}
+
+static void spin_lock_bh(spinlock_t *lock)
+{
+ spin_lock(lock);
+}
+
+static void spin_unlock_bh(spinlock_t *lock)
+{
+ spin_unlock(lock);
+}
+
+static void spin_lock_irq(spinlock_t *lock)
+{
+ spin_lock(lock);
+}
+
+static void spin_unlock_irq(spinlock_t *lock)
+{
+ spin_unlock(lock);
+}
+
+static void spin_lock_irqsave(spinlock_t *lock, unsigned long f)
+{
+ spin_lock(lock);
+}
+
+static void spin_unlock_irqrestore(spinlock_t *lock, unsigned long f)
+{
+ spin_unlock(lock);
+}
+
+#include "../../../include/linux/ptr_ring.h"
+
+static unsigned long long headcnt, tailcnt;
+static struct ptr_ring array ____cacheline_aligned_in_smp;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+ int ret = ptr_ring_init(&array, ring_size, 0);
+ assert(!ret);
+ /* Hacky way to poke at ring internals. Useful for testing though. */
+ if (param)
+ array.batch = param;
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+ int ret;
+
+ ret = __ptr_ring_produce(&array, buf);
+ if (ret >= 0) {
+ ret = 0;
+ headcnt++;
+ }
+
+ return ret;
+}
+
+/*
+ * ptr_ring API provides no way for producer to find out whether a given
+ * buffer was consumed. Our tests merely require that a successful get_buf
+ * implies that add_inbuf succeed in the past, and that add_inbuf will succeed,
+ * fake it accordingly.
+ */
+void *get_buf(unsigned *lenp, void **bufp)
+{
+ void *datap;
+
+ if (tailcnt == headcnt || __ptr_ring_full(&array))
+ datap = NULL;
+ else {
+ datap = "Buffer\n";
+ ++tailcnt;
+ }
+
+ return datap;
+}
+
+bool used_empty()
+{
+ return (tailcnt == headcnt || __ptr_ring_full(&array));
+}
+
+void disable_call()
+{
+ assert(0);
+}
+
+bool enable_call()
+{
+ assert(0);
+}
+
+void kick_available(void)
+{
+ assert(0);
+}
+
+/* host side */
+void disable_kick()
+{
+ assert(0);
+}
+
+bool enable_kick()
+{
+ assert(0);
+}
+
+bool avail_empty()
+{
+ return __ptr_ring_empty(&array);
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+ void *ptr;
+
+ ptr = __ptr_ring_consume(&array);
+
+ return ptr;
+}
+
+void call_used(void)
+{
+ assert(0);
+}
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
new file mode 100644
index 000000000..5a41404aa
--- /dev/null
+++ b/tools/virtio/ringtest/ring.c
@@ -0,0 +1,270 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
+ * signalling, unconditionally.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Next - Where next entry will be written.
+ * Prev - "Next" value when event triggered previously.
+ * Event - Peer requested event after writing this entry.
+ */
+static inline bool need_event(unsigned short event,
+ unsigned short next,
+ unsigned short prev)
+{
+ return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
+}
+
+/* Design:
+ * Guest adds descriptors with unique index values and DESC_HW in flags.
+ * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
+ * Flags are always set last.
+ */
+#define DESC_HW 0x1
+
+struct desc {
+ unsigned short flags;
+ unsigned short index;
+ unsigned len;
+ unsigned long long addr;
+};
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+/* Mostly read */
+struct event {
+ unsigned short kick_index;
+ unsigned char reserved0[HOST_GUEST_PADDING - 2];
+ unsigned short call_index;
+ unsigned char reserved1[HOST_GUEST_PADDING - 2];
+};
+
+struct data {
+ void *buf; /* descriptor is writeable, we can't get buf from there */
+ void *data;
+} *data;
+
+struct desc *ring;
+struct event *event;
+
+struct guest {
+ unsigned avail_idx;
+ unsigned last_used_idx;
+ unsigned num_free;
+ unsigned kicked_avail_idx;
+ unsigned char reserved[HOST_GUEST_PADDING - 12];
+} guest;
+
+struct host {
+ /* we do not need to track last avail index
+ * unless we have more than one in flight.
+ */
+ unsigned used_idx;
+ unsigned called_used_idx;
+ unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+ int ret;
+ int i;
+
+ ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
+ if (ret) {
+ perror("Unable to allocate ring buffer.\n");
+ exit(3);
+ }
+ event = calloc(1, sizeof(*event));
+ if (!event) {
+ perror("Unable to allocate event buffer.\n");
+ exit(3);
+ }
+ guest.avail_idx = 0;
+ guest.kicked_avail_idx = -1;
+ guest.last_used_idx = 0;
+ host.used_idx = 0;
+ host.called_used_idx = -1;
+ for (i = 0; i < ring_size; ++i) {
+ struct desc desc = {
+ .index = i,
+ };
+ ring[i] = desc;
+ }
+ guest.num_free = ring_size;
+ data = calloc(ring_size, sizeof(*data));
+ if (!data) {
+ perror("Unable to allocate data buffer.\n");
+ exit(3);
+ }
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+ unsigned head, index;
+
+ if (!guest.num_free)
+ return -1;
+
+ guest.num_free--;
+ head = (ring_size - 1) & (guest.avail_idx++);
+
+ /* Start with a write. On MESI architectures this helps
+ * avoid a shared state with consumer that is polling this descriptor.
+ */
+ ring[head].addr = (unsigned long)(void*)buf;
+ ring[head].len = len;
+ /* read below might bypass write above. That is OK because it's just an
+ * optimization. If this happens, we will get the cache line in a
+ * shared state which is unfortunate, but probably not worth it to
+ * add an explicit full barrier to avoid this.
+ */
+ barrier();
+ index = ring[head].index;
+ data[index].buf = buf;
+ data[index].data = datap;
+ /* Barrier A (for pairing) */
+ smp_release();
+ ring[head].flags = DESC_HW;
+
+ return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+ unsigned head = (ring_size - 1) & guest.last_used_idx;
+ unsigned index;
+ void *datap;
+
+ if (ring[head].flags & DESC_HW)
+ return NULL;
+ /* Barrier B (for pairing) */
+ smp_acquire();
+ *lenp = ring[head].len;
+ index = ring[head].index & (ring_size - 1);
+ datap = data[index].data;
+ *bufp = data[index].buf;
+ data[index].buf = NULL;
+ data[index].data = NULL;
+ guest.num_free++;
+ guest.last_used_idx++;
+ return datap;
+}
+
+bool used_empty()
+{
+ unsigned head = (ring_size - 1) & guest.last_used_idx;
+
+ return (ring[head].flags & DESC_HW);
+}
+
+void disable_call()
+{
+ /* Doing nothing to disable calls might cause
+ * extra interrupts, but reduces the number of cache misses.
+ */
+}
+
+bool enable_call()
+{
+ event->call_index = guest.last_used_idx;
+ /* Flush call index write */
+ /* Barrier D (for pairing) */
+ smp_mb();
+ return used_empty();
+}
+
+void kick_available(void)
+{
+ bool need;
+
+ /* Flush in previous flags write */
+ /* Barrier C (for pairing) */
+ smp_mb();
+ need = need_event(event->kick_index,
+ guest.avail_idx,
+ guest.kicked_avail_idx);
+
+ guest.kicked_avail_idx = guest.avail_idx;
+ if (need)
+ kick();
+}
+
+/* host side */
+void disable_kick()
+{
+ /* Doing nothing to disable kicks might cause
+ * extra interrupts, but reduces the number of cache misses.
+ */
+}
+
+bool enable_kick()
+{
+ event->kick_index = host.used_idx;
+ /* Barrier C (for pairing) */
+ smp_mb();
+ return avail_empty();
+}
+
+bool avail_empty()
+{
+ unsigned head = (ring_size - 1) & host.used_idx;
+
+ return !(ring[head].flags & DESC_HW);
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+ unsigned head = (ring_size - 1) & host.used_idx;
+
+ if (!(ring[head].flags & DESC_HW))
+ return false;
+
+ /* make sure length read below is not speculated */
+ /* Barrier A (for pairing) */
+ smp_acquire();
+
+ /* simple in-order completion: we don't need
+ * to touch index at all. This also means we
+ * can just modify the descriptor in-place.
+ */
+ ring[head].len--;
+ /* Make sure len is valid before flags.
+ * Note: alternative is to write len and flags in one access -
+ * possible on 64 bit architectures but wmb is free on Intel anyway
+ * so I have no way to test whether it's a gain.
+ */
+ /* Barrier B (for pairing) */
+ smp_release();
+ ring[head].flags = 0;
+ host.used_idx++;
+ return true;
+}
+
+void call_used(void)
+{
+ bool need;
+
+ /* Flush in previous flags write */
+ /* Barrier D (for pairing) */
+ smp_mb();
+
+ need = need_event(event->call_index,
+ host.used_idx,
+ host.called_used_idx);
+
+ host.called_used_idx = host.used_idx;
+
+ if (need)
+ call();
+}
diff --git a/tools/virtio/ringtest/run-on-all.sh b/tools/virtio/ringtest/run-on-all.sh
new file mode 100755
index 000000000..dcc3ea758
--- /dev/null
+++ b/tools/virtio/ringtest/run-on-all.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+CPUS_ONLINE=$(lscpu --online -p=cpu|grep -v -e '#')
+#use last CPU for host. Why not the first?
+#many devices tend to use cpu0 by default so
+#it tends to be busier
+HOST_AFFINITY=$(echo "${CPUS_ONLINE}"|tail -n 1)
+
+#run command on all cpus
+for cpu in $CPUS_ONLINE
+do
+ #Don't run guest and host on same CPU
+ #It actually works ok if using signalling
+ if
+ (echo "$@" | grep -e "--sleep" > /dev/null) || \
+ test $HOST_AFFINITY '!=' $cpu
+ then
+ echo "GUEST AFFINITY $cpu"
+ "$@" --host-affinity $HOST_AFFINITY --guest-affinity $cpu
+ fi
+done
+echo "NO GUEST AFFINITY"
+"$@" --host-affinity $HOST_AFFINITY
+echo "NO AFFINITY"
+"$@"
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
new file mode 100644
index 000000000..5fd3fbcb9
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_0_9.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2016 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Partial implementation of virtio 0.9. event index is used for signalling,
+ * unconditionally. Design roughly follows linux kernel implementation in order
+ * to be able to judge its performance.
+ */
+#define _GNU_SOURCE
+#include "main.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+#include <linux/virtio_ring.h>
+
+struct data {
+ void *data;
+} *data;
+
+struct vring ring;
+
+/* enabling the below activates experimental ring polling code
+ * (which skips index reads on consumer in favor of looking at
+ * high bits of ring id ^ 0x8000).
+ */
+/* #ifdef RING_POLL */
+/* enabling the below activates experimental in-order code
+ * (which skips ring updates and reads and writes len in descriptor).
+ */
+/* #ifdef INORDER */
+
+#if defined(RING_POLL) && defined(INORDER)
+#error "RING_POLL and INORDER are mutually exclusive"
+#endif
+
+/* how much padding is needed to avoid false cache sharing */
+#define HOST_GUEST_PADDING 0x80
+
+struct guest {
+ unsigned short avail_idx;
+ unsigned short last_used_idx;
+ unsigned short num_free;
+ unsigned short kicked_avail_idx;
+#ifndef INORDER
+ unsigned short free_head;
+#else
+ unsigned short reserved_free_head;
+#endif
+ unsigned char reserved[HOST_GUEST_PADDING - 10];
+} guest;
+
+struct host {
+ /* we do not need to track last avail index
+ * unless we have more than one in flight.
+ */
+ unsigned short used_idx;
+ unsigned short called_used_idx;
+ unsigned char reserved[HOST_GUEST_PADDING - 4];
+} host;
+
+/* implemented by ring */
+void alloc_ring(void)
+{
+ int ret;
+ int i;
+ void *p;
+
+ ret = posix_memalign(&p, 0x1000, vring_size(ring_size, 0x1000));
+ if (ret) {
+ perror("Unable to allocate ring buffer.\n");
+ exit(3);
+ }
+ memset(p, 0, vring_size(ring_size, 0x1000));
+ vring_init(&ring, ring_size, p, 0x1000);
+
+ guest.avail_idx = 0;
+ guest.kicked_avail_idx = -1;
+ guest.last_used_idx = 0;
+#ifndef INORDER
+ /* Put everything in free lists. */
+ guest.free_head = 0;
+#endif
+ for (i = 0; i < ring_size - 1; i++)
+ ring.desc[i].next = i + 1;
+ host.used_idx = 0;
+ host.called_used_idx = -1;
+ guest.num_free = ring_size;
+ data = malloc(ring_size * sizeof *data);
+ if (!data) {
+ perror("Unable to allocate data buffer.\n");
+ exit(3);
+ }
+ memset(data, 0, ring_size * sizeof *data);
+}
+
+/* guest side */
+int add_inbuf(unsigned len, void *buf, void *datap)
+{
+ unsigned head;
+#ifndef INORDER
+ unsigned avail;
+#endif
+ struct vring_desc *desc;
+
+ if (!guest.num_free)
+ return -1;
+
+#ifdef INORDER
+ head = (ring_size - 1) & (guest.avail_idx++);
+#else
+ head = guest.free_head;
+#endif
+ guest.num_free--;
+
+ desc = ring.desc;
+ desc[head].flags = VRING_DESC_F_NEXT;
+ desc[head].addr = (unsigned long)(void *)buf;
+ desc[head].len = len;
+ /* We do it like this to simulate the way
+ * we'd have to flip it if we had multiple
+ * descriptors.
+ */
+ desc[head].flags &= ~VRING_DESC_F_NEXT;
+#ifndef INORDER
+ guest.free_head = desc[head].next;
+#endif
+
+ data[head].data = datap;
+
+#ifdef RING_POLL
+ /* Barrier A (for pairing) */
+ smp_release();
+ avail = guest.avail_idx++;
+ ring.avail->ring[avail & (ring_size - 1)] =
+ (head | (avail & ~(ring_size - 1))) ^ 0x8000;
+#else
+#ifndef INORDER
+ /* Barrier A (for pairing) */
+ smp_release();
+ avail = (ring_size - 1) & (guest.avail_idx++);
+ ring.avail->ring[avail] = head;
+#endif
+ /* Barrier A (for pairing) */
+ smp_release();
+#endif
+ ring.avail->idx = guest.avail_idx;
+ return 0;
+}
+
+void *get_buf(unsigned *lenp, void **bufp)
+{
+ unsigned head;
+ unsigned index;
+ void *datap;
+
+#ifdef RING_POLL
+ head = (ring_size - 1) & guest.last_used_idx;
+ index = ring.used->ring[head].id;
+ if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
+ return NULL;
+ /* Barrier B (for pairing) */
+ smp_acquire();
+ index &= ring_size - 1;
+#else
+ if (ring.used->idx == guest.last_used_idx)
+ return NULL;
+ /* Barrier B (for pairing) */
+ smp_acquire();
+#ifdef INORDER
+ head = (ring_size - 1) & guest.last_used_idx;
+ index = head;
+#else
+ head = (ring_size - 1) & guest.last_used_idx;
+ index = ring.used->ring[head].id;
+#endif
+
+#endif
+#ifdef INORDER
+ *lenp = ring.desc[index].len;
+#else
+ *lenp = ring.used->ring[head].len;
+#endif
+ datap = data[index].data;
+ *bufp = (void*)(unsigned long)ring.desc[index].addr;
+ data[index].data = NULL;
+#ifndef INORDER
+ ring.desc[index].next = guest.free_head;
+ guest.free_head = index;
+#endif
+ guest.num_free++;
+ guest.last_used_idx++;
+ return datap;
+}
+
+bool used_empty()
+{
+ unsigned short last_used_idx = guest.last_used_idx;
+#ifdef RING_POLL
+ unsigned short head = last_used_idx & (ring_size - 1);
+ unsigned index = ring.used->ring[head].id;
+
+ return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
+#else
+ return ring.used->idx == last_used_idx;
+#endif
+}
+
+void disable_call()
+{
+ /* Doing nothing to disable calls might cause
+ * extra interrupts, but reduces the number of cache misses.
+ */
+}
+
+bool enable_call()
+{
+ vring_used_event(&ring) = guest.last_used_idx;
+ /* Flush call index write */
+ /* Barrier D (for pairing) */
+ smp_mb();
+ return used_empty();
+}
+
+void kick_available(void)
+{
+ bool need;
+
+ /* Flush in previous flags write */
+ /* Barrier C (for pairing) */
+ smp_mb();
+ need = vring_need_event(vring_avail_event(&ring),
+ guest.avail_idx,
+ guest.kicked_avail_idx);
+
+ guest.kicked_avail_idx = guest.avail_idx;
+ if (need)
+ kick();
+}
+
+/* host side */
+void disable_kick()
+{
+ /* Doing nothing to disable kicks might cause
+ * extra interrupts, but reduces the number of cache misses.
+ */
+}
+
+bool enable_kick()
+{
+ vring_avail_event(&ring) = host.used_idx;
+ /* Barrier C (for pairing) */
+ smp_mb();
+ return avail_empty();
+}
+
+bool avail_empty()
+{
+ unsigned head = host.used_idx;
+#ifdef RING_POLL
+ unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+ return ((index ^ head ^ 0x8000) & ~(ring_size - 1));
+#else
+ return head == ring.avail->idx;
+#endif
+}
+
+bool use_buf(unsigned *lenp, void **bufp)
+{
+ unsigned used_idx = host.used_idx;
+ struct vring_desc *desc;
+ unsigned head;
+
+#ifdef RING_POLL
+ head = ring.avail->ring[used_idx & (ring_size - 1)];
+ if ((used_idx ^ head ^ 0x8000) & ~(ring_size - 1))
+ return false;
+ /* Barrier A (for pairing) */
+ smp_acquire();
+
+ used_idx &= ring_size - 1;
+ desc = &ring.desc[head & (ring_size - 1)];
+#else
+ if (used_idx == ring.avail->idx)
+ return false;
+
+ /* Barrier A (for pairing) */
+ smp_acquire();
+
+ used_idx &= ring_size - 1;
+#ifdef INORDER
+ head = used_idx;
+#else
+ head = ring.avail->ring[used_idx];
+#endif
+ desc = &ring.desc[head];
+#endif
+
+ *lenp = desc->len;
+ *bufp = (void *)(unsigned long)desc->addr;
+
+#ifdef INORDER
+ desc->len = desc->len - 1;
+#else
+ /* now update used ring */
+ ring.used->ring[used_idx].id = head;
+ ring.used->ring[used_idx].len = desc->len - 1;
+#endif
+ /* Barrier B (for pairing) */
+ smp_release();
+ host.used_idx++;
+ ring.used->idx = host.used_idx;
+
+ return true;
+}
+
+void call_used(void)
+{
+ bool need;
+
+ /* Flush in previous flags write */
+ /* Barrier D (for pairing) */
+ smp_mb();
+ need = vring_need_event(vring_used_event(&ring),
+ host.used_idx,
+ host.called_used_idx);
+
+ host.called_used_idx = host.used_idx;
+ if (need)
+ call();
+}
diff --git a/tools/virtio/ringtest/virtio_ring_inorder.c b/tools/virtio/ringtest/virtio_ring_inorder.c
new file mode 100644
index 000000000..2438ca58a
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_inorder.c
@@ -0,0 +1,2 @@
+#define INORDER 1
+#include "virtio_ring_0_9.c"
diff --git a/tools/virtio/ringtest/virtio_ring_poll.c b/tools/virtio/ringtest/virtio_ring_poll.c
new file mode 100644
index 000000000..84fc2c557
--- /dev/null
+++ b/tools/virtio/ringtest/virtio_ring_poll.c
@@ -0,0 +1,2 @@
+#define RING_POLL 1
+#include "virtio_ring_0_9.c"
diff --git a/tools/virtio/uapi/linux/uio.h b/tools/virtio/uapi/linux/uio.h
new file mode 100644
index 000000000..7230e9002
--- /dev/null
+++ b/tools/virtio/uapi/linux/uio.h
@@ -0,0 +1 @@
+#include <sys/uio.h>
diff --git a/tools/virtio/uapi/linux/virtio_config.h b/tools/virtio/uapi/linux/virtio_config.h
new file mode 100644
index 000000000..4c86675f0
--- /dev/null
+++ b/tools/virtio/uapi/linux/virtio_config.h
@@ -0,0 +1 @@
+#include "../../../../include/uapi/linux/virtio_config.h"
diff --git a/tools/virtio/uapi/linux/virtio_ring.h b/tools/virtio/uapi/linux/virtio_ring.h
new file mode 100644
index 000000000..cf50b2e5f
--- /dev/null
+++ b/tools/virtio/uapi/linux/virtio_ring.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef VIRTIO_RING_H
+#define VIRTIO_RING_H
+#include "../../../../include/uapi/linux/virtio_ring.h"
+#endif /* VIRTIO_RING_H */
diff --git a/tools/virtio/uapi/linux/virtio_types.h b/tools/virtio/uapi/linux/virtio_types.h
new file mode 100644
index 000000000..e7a1096e7
--- /dev/null
+++ b/tools/virtio/uapi/linux/virtio_types.h
@@ -0,0 +1 @@
+#include "../../include/uapi/linux/virtio_types.h"
diff --git a/tools/virtio/vhost_test/Makefile b/tools/virtio/vhost_test/Makefile
new file mode 100644
index 000000000..a1d35b81b
--- /dev/null
+++ b/tools/virtio/vhost_test/Makefile
@@ -0,0 +1,2 @@
+obj-m += vhost_test.o
+EXTRA_CFLAGS += -Idrivers/vhost
diff --git a/tools/virtio/vhost_test/vhost_test.c b/tools/virtio/vhost_test/vhost_test.c
new file mode 100644
index 000000000..18735189e
--- /dev/null
+++ b/tools/virtio/vhost_test/vhost_test.c
@@ -0,0 +1 @@
+#include "test.c"
diff --git a/tools/virtio/virtio-trace/Makefile b/tools/virtio/virtio-trace/Makefile
new file mode 100644
index 000000000..7843ebcda
--- /dev/null
+++ b/tools/virtio/virtio-trace/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0
+CC = gcc
+CFLAGS = -O2 -Wall -pthread
+
+all: trace-agent
+
+.c.o:
+ $(CC) $(CFLAGS) -c $^ -o $@
+
+trace-agent: trace-agent.o trace-agent-ctl.o trace-agent-rw.o
+ $(CC) $(CFLAGS) -o $@ $^
+
+clean:
+ rm -f *.o trace-agent
diff --git a/tools/virtio/virtio-trace/README b/tools/virtio/virtio-trace/README
new file mode 100644
index 000000000..b64845b82
--- /dev/null
+++ b/tools/virtio/virtio-trace/README
@@ -0,0 +1,118 @@
+Trace Agent for virtio-trace
+============================
+
+Trace agent is a user tool for sending trace data of a guest to a Host in low
+overhead. Trace agent has the following functions:
+ - splice a page of ring-buffer to read_pipe without memory copying
+ - splice the page from write_pipe to virtio-console without memory copying
+ - write trace data to stdout by using -o option
+ - controlled by start/stop orders from a Host
+
+The trace agent operates as follows:
+ 1) Initialize all structures.
+ 2) Create a read/write thread per CPU. Each thread is bound to a CPU.
+ The read/write threads hold it.
+ 3) A controller thread does poll() for a start order of a host.
+ 4) After the controller of the trace agent receives a start order from a host,
+ the controller wake read/write threads.
+ 5) The read/write threads start to read trace data from ring-buffers and
+ write the data to virtio-serial.
+ 6) If the controller receives a stop order from a host, the read/write threads
+ stop to read trace data.
+
+
+Files
+=====
+
+README: this file
+Makefile: Makefile of trace agent for virtio-trace
+trace-agent.c: includes main function, sets up for operating trace agent
+trace-agent.h: includes all structures and some macros
+trace-agent-ctl.c: includes controller function for read/write threads
+trace-agent-rw.c: includes read/write threads function
+
+
+Setup
+=====
+
+To use this trace agent for virtio-trace, we need to prepare some virtio-serial
+I/Fs.
+
+1) Make FIFO in a host
+ virtio-trace uses virtio-serial pipe as trace data paths as to the number
+of CPUs and a control path, so FIFO (named pipe) should be created as follows:
+ # mkdir /tmp/virtio-trace/
+ # mkfifo /tmp/virtio-trace/trace-path-cpu{0,1,2,...,X}.{in,out}
+ # mkfifo /tmp/virtio-trace/agent-ctl-path.{in,out}
+
+For example, if a guest use three CPUs, the names are
+ trace-path-cpu{0,1,2}.{in.out}
+and
+ agent-ctl-path.{in,out}.
+
+2) Set up of virtio-serial pipe in a host
+ Add qemu option to use virtio-serial pipe.
+
+ ##virtio-serial device##
+ -device virtio-serial-pci,id=virtio-serial0\
+ ##control path##
+ -chardev pipe,id=charchannel0,path=/tmp/virtio-trace/agent-ctl-path\
+ -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,\
+ id=channel0,name=agent-ctl-path\
+ ##data path##
+ -chardev pipe,id=charchannel1,path=/tmp/virtio-trace/trace-path-cpu0\
+ -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel0,\
+ id=channel1,name=trace-path-cpu0\
+ ...
+
+If you manage guests with libvirt, add the following tags to domain XML files.
+Then, libvirt passes the same command option to qemu.
+
+ <channel type='pipe'>
+ <source path='/tmp/virtio-trace/agent-ctl-path'/>
+ <target type='virtio' name='agent-ctl-path'/>
+ <address type='virtio-serial' controller='0' bus='0' port='0'/>
+ </channel>
+ <channel type='pipe'>
+ <source path='/tmp/virtio-trace/trace-path-cpu0'/>
+ <target type='virtio' name='trace-path-cpu0'/>
+ <address type='virtio-serial' controller='0' bus='0' port='1'/>
+ </channel>
+ ...
+Here, chardev names are restricted to trace-path-cpuX and agent-ctl-path. For
+example, if a guest use three CPUs, chardev names should be trace-path-cpu0,
+trace-path-cpu1, trace-path-cpu2, and agent-ctl-path.
+
+3) Boot the guest
+ You can find some chardev in /dev/virtio-ports/ in the guest.
+
+
+Run
+===
+
+0) Build trace agent in a guest
+ $ make
+
+1) Enable ftrace in the guest
+ <Example>
+ # echo 1 > /sys/kernel/debug/tracing/events/sched/enable
+
+2) Run trace agent in the guest
+ This agent must be operated as root.
+ # ./trace-agent
+read/write threads in the agent wait for start order from host. If you add -o
+option, trace data are output via stdout in the guest.
+
+3) Open FIFO in a host
+ # cat /tmp/virtio-trace/trace-path-cpu0.out
+If a host does not open these, trace data get stuck in buffers of virtio. Then,
+the guest will stop by specification of chardev in QEMU. This blocking mode may
+be solved in the future.
+
+4) Start to read trace data by ordering from a host
+ A host injects read start order to the guest via virtio-serial.
+ # echo 1 > /tmp/virtio-trace/agent-ctl-path.in
+
+5) Stop to read trace data by ordering from a host
+ A host injects read stop order to the guest via virtio-serial.
+ # echo 0 > /tmp/virtio-trace/agent-ctl-path.in
diff --git a/tools/virtio/virtio-trace/trace-agent-ctl.c b/tools/virtio/virtio-trace/trace-agent-ctl.c
new file mode 100644
index 000000000..a2d0403c4
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent-ctl.c
@@ -0,0 +1,137 @@
+/*
+ * Controller of read/write threads for virtio-trace
+ *
+ * Copyright (C) 2012 Hitachi, Ltd.
+ * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
+ * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * Licensed under GPL version 2 only.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "trace-agent.h"
+
+#define HOST_MSG_SIZE 256
+#define EVENT_WAIT_MSEC 100
+
+static volatile sig_atomic_t global_signal_val;
+bool global_sig_receive; /* default false */
+bool global_run_operation; /* default false*/
+
+/* Handle SIGTERM/SIGINT/SIGQUIT to exit */
+static void signal_handler(int sig)
+{
+ global_signal_val = sig;
+}
+
+int rw_ctl_init(const char *ctl_path)
+{
+ int ctl_fd;
+
+ ctl_fd = open(ctl_path, O_RDONLY);
+ if (ctl_fd == -1) {
+ pr_err("Cannot open ctl_fd\n");
+ goto error;
+ }
+
+ return ctl_fd;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+static int wait_order(int ctl_fd)
+{
+ struct pollfd poll_fd;
+ int ret = 0;
+
+ while (!global_sig_receive) {
+ poll_fd.fd = ctl_fd;
+ poll_fd.events = POLLIN;
+
+ ret = poll(&poll_fd, 1, EVENT_WAIT_MSEC);
+
+ if (global_signal_val) {
+ global_sig_receive = true;
+ pr_info("Receive interrupt %d\n", global_signal_val);
+
+ /* Wakes rw-threads when they are sleeping */
+ if (!global_run_operation)
+ pthread_cond_broadcast(&cond_wakeup);
+
+ ret = -1;
+ break;
+ }
+
+ if (ret < 0) {
+ pr_err("Polling error\n");
+ goto error;
+ }
+
+ if (ret)
+ break;
+ };
+
+ return ret;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+/*
+ * contol read/write threads by handling global_run_operation
+ */
+void *rw_ctl_loop(int ctl_fd)
+{
+ ssize_t rlen;
+ char buf[HOST_MSG_SIZE];
+ int ret;
+
+ /* Setup signal handlers */
+ signal(SIGTERM, signal_handler);
+ signal(SIGINT, signal_handler);
+ signal(SIGQUIT, signal_handler);
+
+ while (!global_sig_receive) {
+
+ ret = wait_order(ctl_fd);
+ if (ret < 0)
+ break;
+
+ rlen = read(ctl_fd, buf, sizeof(buf));
+ if (rlen < 0) {
+ pr_err("read data error in ctl thread\n");
+ goto error;
+ }
+
+ if (rlen == 2 && buf[0] == '1') {
+ /*
+ * If host writes '1' to a control path,
+ * this controller wakes all read/write threads.
+ */
+ global_run_operation = true;
+ pthread_cond_broadcast(&cond_wakeup);
+ pr_debug("Wake up all read/write threads\n");
+ } else if (rlen == 2 && buf[0] == '0') {
+ /*
+ * If host writes '0' to a control path, read/write
+ * threads will wait for notification from Host.
+ */
+ global_run_operation = false;
+ pr_debug("Stop all read/write threads\n");
+ } else
+ pr_info("Invalid host notification: %s\n", buf);
+ }
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
diff --git a/tools/virtio/virtio-trace/trace-agent-rw.c b/tools/virtio/virtio-trace/trace-agent-rw.c
new file mode 100644
index 000000000..3aace5ea4
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent-rw.c
@@ -0,0 +1,192 @@
+/*
+ * Read/write thread of a guest agent for virtio-trace
+ *
+ * Copyright (C) 2012 Hitachi, Ltd.
+ * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
+ * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * Licensed under GPL version 2 only.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include "trace-agent.h"
+
+#define READ_WAIT_USEC 100000
+
+void *rw_thread_info_new(void)
+{
+ struct rw_thread_info *rw_ti;
+
+ rw_ti = zalloc(sizeof(struct rw_thread_info));
+ if (rw_ti == NULL) {
+ pr_err("rw_thread_info zalloc error\n");
+ exit(EXIT_FAILURE);
+ }
+
+ rw_ti->cpu_num = -1;
+ rw_ti->in_fd = -1;
+ rw_ti->out_fd = -1;
+ rw_ti->read_pipe = -1;
+ rw_ti->write_pipe = -1;
+ rw_ti->pipe_size = PIPE_INIT;
+
+ return rw_ti;
+}
+
+void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
+ bool stdout_flag, unsigned long pipe_size,
+ struct rw_thread_info *rw_ti)
+{
+ int data_pipe[2];
+
+ rw_ti->cpu_num = cpu;
+
+ /* set read(input) fd */
+ rw_ti->in_fd = open(in_path, O_RDONLY);
+ if (rw_ti->in_fd == -1) {
+ pr_err("Could not open in_fd (CPU:%d)\n", cpu);
+ goto error;
+ }
+
+ /* set write(output) fd */
+ if (!stdout_flag) {
+ /* virtio-serial output mode */
+ rw_ti->out_fd = open(out_path, O_WRONLY);
+ if (rw_ti->out_fd == -1) {
+ pr_err("Could not open out_fd (CPU:%d)\n", cpu);
+ goto error;
+ }
+ } else
+ /* stdout mode */
+ rw_ti->out_fd = STDOUT_FILENO;
+
+ if (pipe2(data_pipe, O_NONBLOCK) < 0) {
+ pr_err("Could not create pipe in rw-thread(%d)\n", cpu);
+ goto error;
+ }
+
+ /*
+ * Size of pipe is 64kB in default based on fs/pipe.c.
+ * To read/write trace data speedy, pipe size is changed.
+ */
+ if (fcntl(*data_pipe, F_SETPIPE_SZ, pipe_size) < 0) {
+ pr_err("Could not change pipe size in rw-thread(%d)\n", cpu);
+ goto error;
+ }
+
+ rw_ti->read_pipe = data_pipe[1];
+ rw_ti->write_pipe = data_pipe[0];
+ rw_ti->pipe_size = pipe_size;
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+/* Bind a thread to a cpu */
+static void bind_cpu(int cpu_num)
+{
+ cpu_set_t mask;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu_num, &mask);
+
+ /* bind my thread to cpu_num by assigning zero to the first argument */
+ if (sched_setaffinity(0, sizeof(mask), &mask) == -1)
+ pr_err("Could not set CPU#%d affinity\n", (int)cpu_num);
+}
+
+static void *rw_thread_main(void *thread_info)
+{
+ ssize_t rlen, wlen;
+ ssize_t ret;
+ struct rw_thread_info *ts = (struct rw_thread_info *)thread_info;
+
+ bind_cpu(ts->cpu_num);
+
+ while (1) {
+ /* Wait for a read order of trace data by Host OS */
+ if (!global_run_operation) {
+ pthread_mutex_lock(&mutex_notify);
+ pthread_cond_wait(&cond_wakeup, &mutex_notify);
+ pthread_mutex_unlock(&mutex_notify);
+ }
+
+ if (global_sig_receive)
+ break;
+
+ /*
+ * Each thread read trace_pipe_raw of each cpu bounding the
+ * thread, so contention of multi-threads does not occur.
+ */
+ rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL,
+ ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE);
+
+ if (rlen < 0) {
+ pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num);
+ goto error;
+ } else if (rlen == 0) {
+ /*
+ * If trace data do not exist or are unreadable not
+ * for exceeding the page size, splice_read returns
+ * NULL. Then, this waits for being filled the data in a
+ * ring-buffer.
+ */
+ usleep(READ_WAIT_USEC);
+ pr_debug("Read retry(cpu:%d)\n", ts->cpu_num);
+ continue;
+ }
+
+ wlen = 0;
+
+ do {
+ ret = splice(ts->write_pipe, NULL, ts->out_fd, NULL,
+ rlen - wlen,
+ SPLICE_F_MOVE | SPLICE_F_MORE);
+
+ if (ret < 0) {
+ pr_err("Splice_write in rw-thread(%d)\n",
+ ts->cpu_num);
+ goto error;
+ } else if (ret == 0)
+ /*
+ * When host reader is not in time for reading
+ * trace data, guest will be stopped. This is
+ * because char dev in QEMU is not supported
+ * non-blocking mode. Then, writer might be
+ * sleep in that case.
+ * This sleep will be removed by supporting
+ * non-blocking mode.
+ */
+ sleep(1);
+ wlen += ret;
+ } while (wlen < rlen);
+ }
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+
+pthread_t rw_thread_run(struct rw_thread_info *rw_ti)
+{
+ int ret;
+ pthread_t rw_thread_per_cpu;
+
+ ret = pthread_create(&rw_thread_per_cpu, NULL, rw_thread_main, rw_ti);
+ if (ret != 0) {
+ pr_err("Could not create a rw thread(%d)\n", rw_ti->cpu_num);
+ exit(EXIT_FAILURE);
+ }
+
+ return rw_thread_per_cpu;
+}
diff --git a/tools/virtio/virtio-trace/trace-agent.c b/tools/virtio/virtio-trace/trace-agent.c
new file mode 100644
index 000000000..0a0a7dd4e
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent.c
@@ -0,0 +1,270 @@
+/*
+ * Guest agent for virtio-trace
+ *
+ * Copyright (C) 2012 Hitachi, Ltd.
+ * Created by Yoshihiro Yunomae <yoshihiro.yunomae.ez@hitachi.com>
+ * Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+ *
+ * Licensed under GPL version 2 only.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "trace-agent.h"
+
+#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE))
+#define PIPE_DEF_BUFS 16
+#define PIPE_MIN_SIZE (PAGE_SIZE*PIPE_DEF_BUFS)
+#define PIPE_MAX_SIZE (1024*1024)
+#define READ_PATH_FMT \
+ "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw"
+#define WRITE_PATH_FMT "/dev/virtio-ports/trace-path-cpu%d"
+#define CTL_PATH "/dev/virtio-ports/agent-ctl-path"
+
+pthread_mutex_t mutex_notify = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t cond_wakeup = PTHREAD_COND_INITIALIZER;
+
+static int get_total_cpus(void)
+{
+ int nr_cpus = (int)sysconf(_SC_NPROCESSORS_CONF);
+
+ if (nr_cpus <= 0) {
+ pr_err("Could not read cpus\n");
+ goto error;
+ } else if (nr_cpus > MAX_CPUS) {
+ pr_err("Exceed max cpus(%d)\n", (int)MAX_CPUS);
+ goto error;
+ }
+
+ return nr_cpus;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+static void *agent_info_new(void)
+{
+ struct agent_info *s;
+ int i;
+
+ s = zalloc(sizeof(struct agent_info));
+ if (s == NULL) {
+ pr_err("agent_info zalloc error\n");
+ exit(EXIT_FAILURE);
+ }
+
+ s->pipe_size = PIPE_INIT;
+ s->use_stdout = false;
+ s->cpus = get_total_cpus();
+ s->ctl_fd = -1;
+
+ /* read/write threads init */
+ for (i = 0; i < s->cpus; i++)
+ s->rw_ti[i] = rw_thread_info_new();
+
+ return s;
+}
+
+static unsigned long parse_size(const char *arg)
+{
+ unsigned long value, round;
+ char *ptr;
+
+ value = strtoul(arg, &ptr, 10);
+ switch (*ptr) {
+ case 'K': case 'k':
+ value <<= 10;
+ break;
+ case 'M': case 'm':
+ value <<= 20;
+ break;
+ default:
+ break;
+ }
+
+ if (value > PIPE_MAX_SIZE) {
+ pr_err("Pipe size must be less than 1MB\n");
+ goto error;
+ } else if (value < PIPE_MIN_SIZE) {
+ pr_err("Pipe size must be over 64KB\n");
+ goto error;
+ }
+
+ /* Align buffer size with page unit */
+ round = value & (PAGE_SIZE - 1);
+ value = value - round;
+
+ return value;
+error:
+ return 0;
+}
+
+static void usage(char const *prg)
+{
+ pr_err("usage: %s [-h] [-o] [-s <size of pipe>]\n", prg);
+}
+
+static const char *make_path(int cpu_num, bool this_is_write_path)
+{
+ int ret;
+ char *buf;
+
+ buf = zalloc(PATH_MAX);
+ if (buf == NULL) {
+ pr_err("Could not allocate buffer\n");
+ goto error;
+ }
+
+ if (this_is_write_path)
+ /* write(output) path */
+ ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num);
+ else
+ /* read(input) path */
+ ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num);
+
+ if (ret <= 0) {
+ pr_err("Failed to generate %s path(CPU#%d):%d\n",
+ this_is_write_path ? "read" : "write", cpu_num, ret);
+ goto error;
+ }
+
+ return buf;
+
+error:
+ free(buf);
+ return NULL;
+}
+
+static const char *make_input_path(int cpu_num)
+{
+ return make_path(cpu_num, false);
+}
+
+static const char *make_output_path(int cpu_num)
+{
+ return make_path(cpu_num, true);
+}
+
+static void *agent_info_init(struct agent_info *s)
+{
+ int cpu;
+ const char *in_path = NULL;
+ const char *out_path = NULL;
+
+ /* init read/write threads */
+ for (cpu = 0; cpu < s->cpus; cpu++) {
+ /* set read(input) path per read/write thread */
+ in_path = make_input_path(cpu);
+ if (in_path == NULL)
+ goto error;
+
+ /* set write(output) path per read/write thread*/
+ if (!s->use_stdout) {
+ out_path = make_output_path(cpu);
+ if (out_path == NULL)
+ goto error;
+ } else
+ /* stdout mode */
+ pr_debug("stdout mode\n");
+
+ rw_thread_init(cpu, in_path, out_path, s->use_stdout,
+ s->pipe_size, s->rw_ti[cpu]);
+ }
+
+ /* init controller of read/write threads */
+ s->ctl_fd = rw_ctl_init((const char *)CTL_PATH);
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+static void *parse_args(int argc, char *argv[], struct agent_info *s)
+{
+ int cmd;
+ unsigned long size;
+
+ while ((cmd = getopt(argc, argv, "hos:")) != -1) {
+ switch (cmd) {
+ /* stdout mode */
+ case 'o':
+ s->use_stdout = true;
+ break;
+ /* size of pipe */
+ case 's':
+ size = parse_size(optarg);
+ if (size == 0)
+ goto error;
+ s->pipe_size = size;
+ break;
+ case 'h':
+ default:
+ usage(argv[0]);
+ goto error;
+ }
+ }
+
+ agent_info_init(s);
+
+ return NULL;
+
+error:
+ exit(EXIT_FAILURE);
+}
+
+static void agent_main_loop(struct agent_info *s)
+{
+ int cpu;
+ pthread_t rw_thread_per_cpu[MAX_CPUS];
+
+ /* Start all read/write threads */
+ for (cpu = 0; cpu < s->cpus; cpu++)
+ rw_thread_per_cpu[cpu] = rw_thread_run(s->rw_ti[cpu]);
+
+ rw_ctl_loop(s->ctl_fd);
+
+ /* Finish all read/write threads */
+ for (cpu = 0; cpu < s->cpus; cpu++) {
+ int ret;
+
+ ret = pthread_join(rw_thread_per_cpu[cpu], NULL);
+ if (ret != 0) {
+ pr_err("pthread_join() error:%d (cpu %d)\n", ret, cpu);
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+static void agent_info_free(struct agent_info *s)
+{
+ int i;
+
+ close(s->ctl_fd);
+ for (i = 0; i < s->cpus; i++) {
+ close(s->rw_ti[i]->in_fd);
+ close(s->rw_ti[i]->out_fd);
+ close(s->rw_ti[i]->read_pipe);
+ close(s->rw_ti[i]->write_pipe);
+ free(s->rw_ti[i]);
+ }
+ free(s);
+}
+
+int main(int argc, char *argv[])
+{
+ struct agent_info *s = NULL;
+
+ s = agent_info_new();
+ parse_args(argc, argv, s);
+
+ agent_main_loop(s);
+
+ agent_info_free(s);
+
+ return 0;
+}
diff --git a/tools/virtio/virtio-trace/trace-agent.h b/tools/virtio/virtio-trace/trace-agent.h
new file mode 100644
index 000000000..e67885969
--- /dev/null
+++ b/tools/virtio/virtio-trace/trace-agent.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TRACE_AGENT_H__
+#define __TRACE_AGENT_H__
+#include <pthread.h>
+#include <stdbool.h>
+
+#define MAX_CPUS 256
+#define PIPE_INIT (1024*1024)
+
+/*
+ * agent_info - structure managing total information of guest agent
+ * @pipe_size: size of pipe (default 1MB)
+ * @use_stdout: set to true when o option is added (default false)
+ * @cpus: total number of CPUs
+ * @ctl_fd: fd of control path, /dev/virtio-ports/agent-ctl-path
+ * @rw_ti: structure managing information of read/write threads
+ */
+struct agent_info {
+ unsigned long pipe_size;
+ bool use_stdout;
+ int cpus;
+ int ctl_fd;
+ struct rw_thread_info *rw_ti[MAX_CPUS];
+};
+
+/*
+ * rw_thread_info - structure managing a read/write thread a cpu
+ * @cpu_num: cpu number operating this read/write thread
+ * @in_fd: fd of reading trace data path in cpu_num
+ * @out_fd: fd of writing trace data path in cpu_num
+ * @read_pipe: fd of read pipe
+ * @write_pipe: fd of write pipe
+ * @pipe_size: size of pipe (default 1MB)
+ */
+struct rw_thread_info {
+ int cpu_num;
+ int in_fd;
+ int out_fd;
+ int read_pipe;
+ int write_pipe;
+ unsigned long pipe_size;
+};
+
+/* use for stopping rw threads */
+extern bool global_sig_receive;
+
+/* use for notification */
+extern bool global_run_operation;
+extern pthread_mutex_t mutex_notify;
+extern pthread_cond_t cond_wakeup;
+
+/* for controller of read/write threads */
+extern int rw_ctl_init(const char *ctl_path);
+extern void *rw_ctl_loop(int ctl_fd);
+
+/* for trace read/write thread */
+extern void *rw_thread_info_new(void);
+extern void *rw_thread_init(int cpu, const char *in_path, const char *out_path,
+ bool stdout_flag, unsigned long pipe_size,
+ struct rw_thread_info *rw_ti);
+extern pthread_t rw_thread_run(struct rw_thread_info *rw_ti);
+
+static inline void *zalloc(size_t size)
+{
+ return calloc(1, size);
+}
+
+#define pr_err(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+#define pr_info(format, ...) fprintf(stdout, format, ## __VA_ARGS__)
+#ifdef DEBUG
+#define pr_debug(format, ...) fprintf(stderr, format, ## __VA_ARGS__)
+#else
+#define pr_debug(format, ...) do {} while (0)
+#endif
+
+#endif /*__TRACE_AGENT_H__*/
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
new file mode 100644
index 000000000..b427def67
--- /dev/null
+++ b/tools/virtio/virtio_test.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <getopt.h>
+#include <string.h>
+#include <poll.h>
+#include <sys/eventfd.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <linux/virtio_types.h>
+#include <linux/vhost.h>
+#include <linux/virtio.h>
+#include <linux/virtio_ring.h>
+#include "../../drivers/vhost/test.h"
+
+/* Unused */
+void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
+
+struct vq_info {
+ int kick;
+ int call;
+ int num;
+ int idx;
+ void *ring;
+ /* copy used for control */
+ struct vring vring;
+ struct virtqueue *vq;
+};
+
+struct vdev_info {
+ struct virtio_device vdev;
+ int control;
+ struct pollfd fds[1];
+ struct vq_info vqs[1];
+ int nvqs;
+ void *buf;
+ size_t buf_size;
+ struct vhost_memory *mem;
+};
+
+bool vq_notify(struct virtqueue *vq)
+{
+ struct vq_info *info = vq->priv;
+ unsigned long long v = 1;
+ int r;
+ r = write(info->kick, &v, sizeof v);
+ assert(r == sizeof v);
+ return true;
+}
+
+void vq_callback(struct virtqueue *vq)
+{
+}
+
+
+void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info)
+{
+ struct vhost_vring_state state = { .index = info->idx };
+ struct vhost_vring_file file = { .index = info->idx };
+ unsigned long long features = dev->vdev.features;
+ struct vhost_vring_addr addr = {
+ .index = info->idx,
+ .desc_user_addr = (uint64_t)(unsigned long)info->vring.desc,
+ .avail_user_addr = (uint64_t)(unsigned long)info->vring.avail,
+ .used_user_addr = (uint64_t)(unsigned long)info->vring.used,
+ };
+ int r;
+ r = ioctl(dev->control, VHOST_SET_FEATURES, &features);
+ assert(r >= 0);
+ state.num = info->vring.num;
+ r = ioctl(dev->control, VHOST_SET_VRING_NUM, &state);
+ assert(r >= 0);
+ state.num = 0;
+ r = ioctl(dev->control, VHOST_SET_VRING_BASE, &state);
+ assert(r >= 0);
+ r = ioctl(dev->control, VHOST_SET_VRING_ADDR, &addr);
+ assert(r >= 0);
+ file.fd = info->kick;
+ r = ioctl(dev->control, VHOST_SET_VRING_KICK, &file);
+ assert(r >= 0);
+ file.fd = info->call;
+ r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
+ assert(r >= 0);
+}
+
+static void vq_info_add(struct vdev_info *dev, int num)
+{
+ struct vq_info *info = &dev->vqs[dev->nvqs];
+ int r;
+ info->idx = dev->nvqs;
+ info->kick = eventfd(0, EFD_NONBLOCK);
+ info->call = eventfd(0, EFD_NONBLOCK);
+ r = posix_memalign(&info->ring, 4096, vring_size(num, 4096));
+ assert(r >= 0);
+ memset(info->ring, 0, vring_size(num, 4096));
+ vring_init(&info->vring, num, info->ring, 4096);
+ info->vq = vring_new_virtqueue(info->idx,
+ info->vring.num, 4096, &dev->vdev,
+ true, false, info->ring,
+ vq_notify, vq_callback, "test");
+ assert(info->vq);
+ info->vq->priv = info;
+ vhost_vq_setup(dev, info);
+ dev->fds[info->idx].fd = info->call;
+ dev->fds[info->idx].events = POLLIN;
+ dev->nvqs++;
+}
+
+static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
+{
+ int r;
+ memset(dev, 0, sizeof *dev);
+ dev->vdev.features = features;
+ dev->buf_size = 1024;
+ dev->buf = malloc(dev->buf_size);
+ assert(dev->buf);
+ dev->control = open("/dev/vhost-test", O_RDWR);
+ assert(dev->control >= 0);
+ r = ioctl(dev->control, VHOST_SET_OWNER, NULL);
+ assert(r >= 0);
+ dev->mem = malloc(offsetof(struct vhost_memory, regions) +
+ sizeof dev->mem->regions[0]);
+ assert(dev->mem);
+ memset(dev->mem, 0, offsetof(struct vhost_memory, regions) +
+ sizeof dev->mem->regions[0]);
+ dev->mem->nregions = 1;
+ dev->mem->regions[0].guest_phys_addr = (long)dev->buf;
+ dev->mem->regions[0].userspace_addr = (long)dev->buf;
+ dev->mem->regions[0].memory_size = dev->buf_size;
+ r = ioctl(dev->control, VHOST_SET_MEM_TABLE, dev->mem);
+ assert(r >= 0);
+}
+
+/* TODO: this is pretty bad: we get a cache line bounce
+ * for the wait queue on poll and another one on read,
+ * plus the read which is there just to clear the
+ * current state. */
+static void wait_for_interrupt(struct vdev_info *dev)
+{
+ int i;
+ unsigned long long val;
+ poll(dev->fds, dev->nvqs, -1);
+ for (i = 0; i < dev->nvqs; ++i)
+ if (dev->fds[i].revents & POLLIN) {
+ read(dev->fds[i].fd, &val, sizeof val);
+ }
+}
+
+static void run_test(struct vdev_info *dev, struct vq_info *vq,
+ bool delayed, int bufs)
+{
+ struct scatterlist sl;
+ long started = 0, completed = 0;
+ long completed_before;
+ int r, test = 1;
+ unsigned len;
+ long long spurious = 0;
+ r = ioctl(dev->control, VHOST_TEST_RUN, &test);
+ assert(r >= 0);
+ for (;;) {
+ virtqueue_disable_cb(vq->vq);
+ completed_before = completed;
+ do {
+ if (started < bufs) {
+ sg_init_one(&sl, dev->buf, dev->buf_size);
+ r = virtqueue_add_outbuf(vq->vq, &sl, 1,
+ dev->buf + started,
+ GFP_ATOMIC);
+ if (likely(r == 0)) {
+ ++started;
+ if (unlikely(!virtqueue_kick(vq->vq)))
+ r = -1;
+ }
+ } else
+ r = -1;
+
+ /* Flush out completed bufs if any */
+ if (virtqueue_get_buf(vq->vq, &len)) {
+ ++completed;
+ r = 0;
+ }
+
+ } while (r == 0);
+ if (completed == completed_before)
+ ++spurious;
+ assert(completed <= bufs);
+ assert(started <= bufs);
+ if (completed == bufs)
+ break;
+ if (delayed) {
+ if (virtqueue_enable_cb_delayed(vq->vq))
+ wait_for_interrupt(dev);
+ } else {
+ if (virtqueue_enable_cb(vq->vq))
+ wait_for_interrupt(dev);
+ }
+ }
+ test = 0;
+ r = ioctl(dev->control, VHOST_TEST_RUN, &test);
+ assert(r >= 0);
+ fprintf(stderr, "spurious wakeups: 0x%llx\n", spurious);
+}
+
+const char optstring[] = "h";
+const struct option longopts[] = {
+ {
+ .name = "help",
+ .val = 'h',
+ },
+ {
+ .name = "event-idx",
+ .val = 'E',
+ },
+ {
+ .name = "no-event-idx",
+ .val = 'e',
+ },
+ {
+ .name = "indirect",
+ .val = 'I',
+ },
+ {
+ .name = "no-indirect",
+ .val = 'i',
+ },
+ {
+ .name = "virtio-1",
+ .val = '1',
+ },
+ {
+ .name = "no-virtio-1",
+ .val = '0',
+ },
+ {
+ .name = "delayed-interrupt",
+ .val = 'D',
+ },
+ {
+ .name = "no-delayed-interrupt",
+ .val = 'd',
+ },
+ {
+ }
+};
+
+static void help(void)
+{
+ fprintf(stderr, "Usage: virtio_test [--help]"
+ " [--no-indirect]"
+ " [--no-event-idx]"
+ " [--no-virtio-1]"
+ " [--delayed-interrupt]"
+ "\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct vdev_info dev;
+ unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
+ (1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1);
+ int o;
+ bool delayed = false;
+
+ for (;;) {
+ o = getopt_long(argc, argv, optstring, longopts, NULL);
+ switch (o) {
+ case -1:
+ goto done;
+ case '?':
+ help();
+ exit(2);
+ case 'e':
+ features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX);
+ break;
+ case 'h':
+ help();
+ goto done;
+ case 'i':
+ features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
+ break;
+ case '0':
+ features &= ~(1ULL << VIRTIO_F_VERSION_1);
+ break;
+ case 'D':
+ delayed = true;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ }
+
+done:
+ vdev_info_init(&dev, features);
+ vq_info_add(&dev, 256);
+ run_test(&dev, &dev.vqs[0], delayed, 0x100000);
+ return 0;
+}
diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c
new file mode 100644
index 000000000..293653463
--- /dev/null
+++ b/tools/virtio/vringh_test.c
@@ -0,0 +1,751 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Simple test of virtio code, entirely in userpsace. */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <err.h>
+#include <linux/kernel.h>
+#include <linux/err.h>
+#include <linux/virtio.h>
+#include <linux/vringh.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
+#include <linux/uaccess.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+
+#define USER_MEM (1024*1024)
+void *__user_addr_min, *__user_addr_max;
+void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
+static u64 user_addr_offset;
+
+#define RINGSIZE 256
+#define ALIGN 4096
+
+static bool never_notify_host(struct virtqueue *vq)
+{
+ abort();
+}
+
+static void never_callback_guest(struct virtqueue *vq)
+{
+ abort();
+}
+
+static bool getrange_iov(struct vringh *vrh, u64 addr, struct vringh_range *r)
+{
+ if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
+ return false;
+ if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
+ return false;
+
+ r->start = (u64)(unsigned long)__user_addr_min - user_addr_offset;
+ r->end_incl = (u64)(unsigned long)__user_addr_max - 1 - user_addr_offset;
+ r->offset = user_addr_offset;
+ return true;
+}
+
+/* We return single byte ranges. */
+static bool getrange_slow(struct vringh *vrh, u64 addr, struct vringh_range *r)
+{
+ if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
+ return false;
+ if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
+ return false;
+
+ r->start = addr;
+ r->end_incl = r->start;
+ r->offset = user_addr_offset;
+ return true;
+}
+
+struct guest_virtio_device {
+ struct virtio_device vdev;
+ int to_host_fd;
+ unsigned long notifies;
+};
+
+static bool parallel_notify_host(struct virtqueue *vq)
+{
+ int rc;
+ struct guest_virtio_device *gvdev;
+
+ gvdev = container_of(vq->vdev, struct guest_virtio_device, vdev);
+ rc = write(gvdev->to_host_fd, "", 1);
+ if (rc < 0)
+ return false;
+ gvdev->notifies++;
+ return true;
+}
+
+static bool no_notify_host(struct virtqueue *vq)
+{
+ return true;
+}
+
+#define NUM_XFERS (10000000)
+
+/* We aim for two "distant" cpus. */
+static void find_cpus(unsigned int *first, unsigned int *last)
+{
+ unsigned int i;
+
+ *first = -1U;
+ *last = 0;
+ for (i = 0; i < 4096; i++) {
+ cpu_set_t set;
+ CPU_ZERO(&set);
+ CPU_SET(i, &set);
+ if (sched_setaffinity(getpid(), sizeof(set), &set) == 0) {
+ if (i < *first)
+ *first = i;
+ if (i > *last)
+ *last = i;
+ }
+ }
+}
+
+/* Opencoded version for fast mode */
+static inline int vringh_get_head(struct vringh *vrh, u16 *head)
+{
+ u16 avail_idx, i;
+ int err;
+
+ err = get_user(avail_idx, &vrh->vring.avail->idx);
+ if (err)
+ return err;
+
+ if (vrh->last_avail_idx == avail_idx)
+ return 0;
+
+ /* Only get avail ring entries after they have been exposed by guest. */
+ virtio_rmb(vrh->weak_barriers);
+
+ i = vrh->last_avail_idx & (vrh->vring.num - 1);
+
+ err = get_user(*head, &vrh->vring.avail->ring[i]);
+ if (err)
+ return err;
+
+ vrh->last_avail_idx++;
+ return 1;
+}
+
+static int parallel_test(u64 features,
+ bool (*getrange)(struct vringh *vrh,
+ u64 addr, struct vringh_range *r),
+ bool fast_vringh)
+{
+ void *host_map, *guest_map;
+ int fd, mapsize, to_guest[2], to_host[2];
+ unsigned long xfers = 0, notifies = 0, receives = 0;
+ unsigned int first_cpu, last_cpu;
+ cpu_set_t cpu_set;
+ char buf[128];
+
+ /* Create real file to mmap. */
+ fd = open("/tmp/vringh_test-file", O_RDWR|O_CREAT|O_TRUNC, 0600);
+ if (fd < 0)
+ err(1, "Opening /tmp/vringh_test-file");
+
+ /* Extra room at the end for some data, and indirects */
+ mapsize = vring_size(RINGSIZE, ALIGN)
+ + RINGSIZE * 2 * sizeof(int)
+ + RINGSIZE * 6 * sizeof(struct vring_desc);
+ mapsize = (mapsize + getpagesize() - 1) & ~(getpagesize() - 1);
+ ftruncate(fd, mapsize);
+
+ /* Parent and child use separate addresses, to check our mapping logic! */
+ host_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ guest_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+
+ pipe(to_guest);
+ pipe(to_host);
+
+ CPU_ZERO(&cpu_set);
+ find_cpus(&first_cpu, &last_cpu);
+ printf("Using CPUS %u and %u\n", first_cpu, last_cpu);
+ fflush(stdout);
+
+ if (fork() != 0) {
+ struct vringh vrh;
+ int status, err, rlen = 0;
+ char rbuf[5];
+
+ /* We are the host: never access guest addresses! */
+ munmap(guest_map, mapsize);
+
+ __user_addr_min = host_map;
+ __user_addr_max = __user_addr_min + mapsize;
+ user_addr_offset = host_map - guest_map;
+ assert(user_addr_offset);
+
+ close(to_guest[0]);
+ close(to_host[1]);
+
+ vring_init(&vrh.vring, RINGSIZE, host_map, ALIGN);
+ vringh_init_user(&vrh, features, RINGSIZE, true,
+ vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
+ CPU_SET(first_cpu, &cpu_set);
+ if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
+ errx(1, "Could not set affinity to cpu %u", first_cpu);
+
+ while (xfers < NUM_XFERS) {
+ struct iovec host_riov[2], host_wiov[2];
+ struct vringh_iov riov, wiov;
+ u16 head, written;
+
+ if (fast_vringh) {
+ for (;;) {
+ err = vringh_get_head(&vrh, &head);
+ if (err != 0)
+ break;
+ err = vringh_need_notify_user(&vrh);
+ if (err < 0)
+ errx(1, "vringh_need_notify_user: %i",
+ err);
+ if (err) {
+ write(to_guest[1], "", 1);
+ notifies++;
+ }
+ }
+ if (err != 1)
+ errx(1, "vringh_get_head");
+ written = 0;
+ goto complete;
+ } else {
+ vringh_iov_init(&riov,
+ host_riov,
+ ARRAY_SIZE(host_riov));
+ vringh_iov_init(&wiov,
+ host_wiov,
+ ARRAY_SIZE(host_wiov));
+
+ err = vringh_getdesc_user(&vrh, &riov, &wiov,
+ getrange, &head);
+ }
+ if (err == 0) {
+ err = vringh_need_notify_user(&vrh);
+ if (err < 0)
+ errx(1, "vringh_need_notify_user: %i",
+ err);
+ if (err) {
+ write(to_guest[1], "", 1);
+ notifies++;
+ }
+
+ if (!vringh_notify_enable_user(&vrh))
+ continue;
+
+ /* Swallow all notifies at once. */
+ if (read(to_host[0], buf, sizeof(buf)) < 1)
+ break;
+
+ vringh_notify_disable_user(&vrh);
+ receives++;
+ continue;
+ }
+ if (err != 1)
+ errx(1, "vringh_getdesc_user: %i", err);
+
+ /* We simply copy bytes. */
+ if (riov.used) {
+ rlen = vringh_iov_pull_user(&riov, rbuf,
+ sizeof(rbuf));
+ if (rlen != 4)
+ errx(1, "vringh_iov_pull_user: %i",
+ rlen);
+ assert(riov.i == riov.used);
+ written = 0;
+ } else {
+ err = vringh_iov_push_user(&wiov, rbuf, rlen);
+ if (err != rlen)
+ errx(1, "vringh_iov_push_user: %i",
+ err);
+ assert(wiov.i == wiov.used);
+ written = err;
+ }
+ complete:
+ xfers++;
+
+ err = vringh_complete_user(&vrh, head, written);
+ if (err != 0)
+ errx(1, "vringh_complete_user: %i", err);
+ }
+
+ err = vringh_need_notify_user(&vrh);
+ if (err < 0)
+ errx(1, "vringh_need_notify_user: %i", err);
+ if (err) {
+ write(to_guest[1], "", 1);
+ notifies++;
+ }
+ wait(&status);
+ if (!WIFEXITED(status))
+ errx(1, "Child died with signal %i?", WTERMSIG(status));
+ if (WEXITSTATUS(status) != 0)
+ errx(1, "Child exited %i?", WEXITSTATUS(status));
+ printf("Host: notified %lu, pinged %lu\n", notifies, receives);
+ return 0;
+ } else {
+ struct guest_virtio_device gvdev;
+ struct virtqueue *vq;
+ unsigned int *data;
+ struct vring_desc *indirects;
+ unsigned int finished = 0;
+
+ /* We pass sg[]s pointing into here, but we need RINGSIZE+1 */
+ data = guest_map + vring_size(RINGSIZE, ALIGN);
+ indirects = (void *)data + (RINGSIZE + 1) * 2 * sizeof(int);
+
+ /* We are the guest. */
+ munmap(host_map, mapsize);
+
+ close(to_guest[1]);
+ close(to_host[0]);
+
+ gvdev.vdev.features = features;
+ gvdev.to_host_fd = to_host[1];
+ gvdev.notifies = 0;
+
+ CPU_SET(first_cpu, &cpu_set);
+ if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
+ err(1, "Could not set affinity to cpu %u", first_cpu);
+
+ vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true,
+ false, guest_map,
+ fast_vringh ? no_notify_host
+ : parallel_notify_host,
+ never_callback_guest, "guest vq");
+
+ /* Don't kfree indirects. */
+ __kfree_ignore_start = indirects;
+ __kfree_ignore_end = indirects + RINGSIZE * 6;
+
+ while (xfers < NUM_XFERS) {
+ struct scatterlist sg[4];
+ unsigned int num_sg, len;
+ int *dbuf, err;
+ bool output = !(xfers % 2);
+
+ /* Consume bufs. */
+ while ((dbuf = virtqueue_get_buf(vq, &len)) != NULL) {
+ if (len == 4)
+ assert(*dbuf == finished - 1);
+ else if (!fast_vringh)
+ assert(*dbuf == finished);
+ finished++;
+ }
+
+ /* Produce a buffer. */
+ dbuf = data + (xfers % (RINGSIZE + 1));
+
+ if (output)
+ *dbuf = xfers;
+ else
+ *dbuf = -1;
+
+ switch ((xfers / sizeof(*dbuf)) % 4) {
+ case 0:
+ /* Nasty three-element sg list. */
+ sg_init_table(sg, num_sg = 3);
+ sg_set_buf(&sg[0], (void *)dbuf, 1);
+ sg_set_buf(&sg[1], (void *)dbuf + 1, 2);
+ sg_set_buf(&sg[2], (void *)dbuf + 3, 1);
+ break;
+ case 1:
+ sg_init_table(sg, num_sg = 2);
+ sg_set_buf(&sg[0], (void *)dbuf, 1);
+ sg_set_buf(&sg[1], (void *)dbuf + 1, 3);
+ break;
+ case 2:
+ sg_init_table(sg, num_sg = 1);
+ sg_set_buf(&sg[0], (void *)dbuf, 4);
+ break;
+ case 3:
+ sg_init_table(sg, num_sg = 4);
+ sg_set_buf(&sg[0], (void *)dbuf, 1);
+ sg_set_buf(&sg[1], (void *)dbuf + 1, 1);
+ sg_set_buf(&sg[2], (void *)dbuf + 2, 1);
+ sg_set_buf(&sg[3], (void *)dbuf + 3, 1);
+ break;
+ }
+
+ /* May allocate an indirect, so force it to allocate
+ * user addr */
+ __kmalloc_fake = indirects + (xfers % RINGSIZE) * 4;
+ if (output)
+ err = virtqueue_add_outbuf(vq, sg, num_sg, dbuf,
+ GFP_KERNEL);
+ else
+ err = virtqueue_add_inbuf(vq, sg, num_sg,
+ dbuf, GFP_KERNEL);
+
+ if (err == -ENOSPC) {
+ if (!virtqueue_enable_cb_delayed(vq))
+ continue;
+ /* Swallow all notifies at once. */
+ if (read(to_guest[0], buf, sizeof(buf)) < 1)
+ break;
+
+ receives++;
+ virtqueue_disable_cb(vq);
+ continue;
+ }
+
+ if (err)
+ errx(1, "virtqueue_add_in/outbuf: %i", err);
+
+ xfers++;
+ virtqueue_kick(vq);
+ }
+
+ /* Any extra? */
+ while (finished != xfers) {
+ int *dbuf;
+ unsigned int len;
+
+ /* Consume bufs. */
+ dbuf = virtqueue_get_buf(vq, &len);
+ if (dbuf) {
+ if (len == 4)
+ assert(*dbuf == finished - 1);
+ else
+ assert(len == 0);
+ finished++;
+ continue;
+ }
+
+ if (!virtqueue_enable_cb_delayed(vq))
+ continue;
+ if (read(to_guest[0], buf, sizeof(buf)) < 1)
+ break;
+
+ receives++;
+ virtqueue_disable_cb(vq);
+ }
+
+ printf("Guest: notified %lu, pinged %lu\n",
+ gvdev.notifies, receives);
+ vring_del_virtqueue(vq);
+ return 0;
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct virtio_device vdev;
+ struct virtqueue *vq;
+ struct vringh vrh;
+ struct scatterlist guest_sg[RINGSIZE], *sgs[2];
+ struct iovec host_riov[2], host_wiov[2];
+ struct vringh_iov riov, wiov;
+ struct vring_used_elem used[RINGSIZE];
+ char buf[28];
+ u16 head;
+ int err;
+ unsigned i;
+ void *ret;
+ bool (*getrange)(struct vringh *vrh, u64 addr, struct vringh_range *r);
+ bool fast_vringh = false, parallel = false;
+
+ getrange = getrange_iov;
+ vdev.features = 0;
+
+ while (argv[1]) {
+ if (strcmp(argv[1], "--indirect") == 0)
+ __virtio_set_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
+ else if (strcmp(argv[1], "--eventidx") == 0)
+ __virtio_set_bit(&vdev, VIRTIO_RING_F_EVENT_IDX);
+ else if (strcmp(argv[1], "--virtio-1") == 0)
+ __virtio_set_bit(&vdev, VIRTIO_F_VERSION_1);
+ else if (strcmp(argv[1], "--slow-range") == 0)
+ getrange = getrange_slow;
+ else if (strcmp(argv[1], "--fast-vringh") == 0)
+ fast_vringh = true;
+ else if (strcmp(argv[1], "--parallel") == 0)
+ parallel = true;
+ else
+ errx(1, "Unknown arg %s", argv[1]);
+ argv++;
+ }
+
+ if (parallel)
+ return parallel_test(vdev.features, getrange, fast_vringh);
+
+ if (posix_memalign(&__user_addr_min, PAGE_SIZE, USER_MEM) != 0)
+ abort();
+ __user_addr_max = __user_addr_min + USER_MEM;
+ memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN));
+
+ /* Set up guest side. */
+ vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true, false,
+ __user_addr_min,
+ never_notify_host, never_callback_guest,
+ "guest vq");
+
+ /* Set up host side. */
+ vring_init(&vrh.vring, RINGSIZE, __user_addr_min, ALIGN);
+ vringh_init_user(&vrh, vdev.features, RINGSIZE, true,
+ vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
+
+ /* No descriptor to get yet... */
+ err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+ if (err != 0)
+ errx(1, "vringh_getdesc_user: %i", err);
+
+ /* Guest puts in a descriptor. */
+ memcpy(__user_addr_max - 1, "a", 1);
+ sg_init_table(guest_sg, 1);
+ sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
+ sg_init_table(guest_sg+1, 1);
+ sg_set_buf(&guest_sg[1], __user_addr_max - 3, 2);
+ sgs[0] = &guest_sg[0];
+ sgs[1] = &guest_sg[1];
+
+ /* May allocate an indirect, so force it to allocate user addr */
+ __kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
+ err = virtqueue_add_sgs(vq, sgs, 1, 1, &err, GFP_KERNEL);
+ if (err)
+ errx(1, "virtqueue_add_sgs: %i", err);
+ __kmalloc_fake = NULL;
+
+ /* Host retreives it. */
+ vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
+ vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
+
+ err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+ if (err != 1)
+ errx(1, "vringh_getdesc_user: %i", err);
+
+ assert(riov.used == 1);
+ assert(riov.iov[0].iov_base == __user_addr_max - 1);
+ assert(riov.iov[0].iov_len == 1);
+ if (getrange != getrange_slow) {
+ assert(wiov.used == 1);
+ assert(wiov.iov[0].iov_base == __user_addr_max - 3);
+ assert(wiov.iov[0].iov_len == 2);
+ } else {
+ assert(wiov.used == 2);
+ assert(wiov.iov[0].iov_base == __user_addr_max - 3);
+ assert(wiov.iov[0].iov_len == 1);
+ assert(wiov.iov[1].iov_base == __user_addr_max - 2);
+ assert(wiov.iov[1].iov_len == 1);
+ }
+
+ err = vringh_iov_pull_user(&riov, buf, 5);
+ if (err != 1)
+ errx(1, "vringh_iov_pull_user: %i", err);
+ assert(buf[0] == 'a');
+ assert(riov.i == 1);
+ assert(vringh_iov_pull_user(&riov, buf, 5) == 0);
+
+ memcpy(buf, "bcdef", 5);
+ err = vringh_iov_push_user(&wiov, buf, 5);
+ if (err != 2)
+ errx(1, "vringh_iov_push_user: %i", err);
+ assert(memcmp(__user_addr_max - 3, "bc", 2) == 0);
+ assert(wiov.i == wiov.used);
+ assert(vringh_iov_push_user(&wiov, buf, 5) == 0);
+
+ /* Host is done. */
+ err = vringh_complete_user(&vrh, head, err);
+ if (err != 0)
+ errx(1, "vringh_complete_user: %i", err);
+
+ /* Guest should see used token now. */
+ __kfree_ignore_start = __user_addr_min + vring_size(RINGSIZE, ALIGN);
+ __kfree_ignore_end = __kfree_ignore_start + 1;
+ ret = virtqueue_get_buf(vq, &i);
+ if (ret != &err)
+ errx(1, "virtqueue_get_buf: %p", ret);
+ assert(i == 2);
+
+ /* Guest puts in a huge descriptor. */
+ sg_init_table(guest_sg, RINGSIZE);
+ for (i = 0; i < RINGSIZE; i++) {
+ sg_set_buf(&guest_sg[i],
+ __user_addr_max - USER_MEM/4, USER_MEM/4);
+ }
+
+ /* Fill contents with recognisable garbage. */
+ for (i = 0; i < USER_MEM/4; i++)
+ ((char *)__user_addr_max - USER_MEM/4)[i] = i;
+
+ /* This will allocate an indirect, so force it to allocate user addr */
+ __kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
+ err = virtqueue_add_outbuf(vq, guest_sg, RINGSIZE, &err, GFP_KERNEL);
+ if (err)
+ errx(1, "virtqueue_add_outbuf (large): %i", err);
+ __kmalloc_fake = NULL;
+
+ /* Host picks it up (allocates new iov). */
+ vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
+ vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
+
+ err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+ if (err != 1)
+ errx(1, "vringh_getdesc_user: %i", err);
+
+ assert(riov.max_num & VRINGH_IOV_ALLOCATED);
+ assert(riov.iov != host_riov);
+ if (getrange != getrange_slow)
+ assert(riov.used == RINGSIZE);
+ else
+ assert(riov.used == RINGSIZE * USER_MEM/4);
+
+ assert(!(wiov.max_num & VRINGH_IOV_ALLOCATED));
+ assert(wiov.used == 0);
+
+ /* Pull data back out (in odd chunks), should be as expected. */
+ for (i = 0; i < RINGSIZE * USER_MEM/4; i += 3) {
+ err = vringh_iov_pull_user(&riov, buf, 3);
+ if (err != 3 && i + err != RINGSIZE * USER_MEM/4)
+ errx(1, "vringh_iov_pull_user large: %i", err);
+ assert(buf[0] == (char)i);
+ assert(err < 2 || buf[1] == (char)(i + 1));
+ assert(err < 3 || buf[2] == (char)(i + 2));
+ }
+ assert(riov.i == riov.used);
+ vringh_iov_cleanup(&riov);
+ vringh_iov_cleanup(&wiov);
+
+ /* Complete using multi interface, just because we can. */
+ used[0].id = head;
+ used[0].len = 0;
+ err = vringh_complete_multi_user(&vrh, used, 1);
+ if (err)
+ errx(1, "vringh_complete_multi_user(1): %i", err);
+
+ /* Free up those descriptors. */
+ ret = virtqueue_get_buf(vq, &i);
+ if (ret != &err)
+ errx(1, "virtqueue_get_buf: %p", ret);
+
+ /* Add lots of descriptors. */
+ sg_init_table(guest_sg, 1);
+ sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
+ for (i = 0; i < RINGSIZE; i++) {
+ err = virtqueue_add_outbuf(vq, guest_sg, 1, &err, GFP_KERNEL);
+ if (err)
+ errx(1, "virtqueue_add_outbuf (multiple): %i", err);
+ }
+
+ /* Now get many, and consume them all at once. */
+ vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
+ vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
+
+ for (i = 0; i < RINGSIZE; i++) {
+ err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+ if (err != 1)
+ errx(1, "vringh_getdesc_user: %i", err);
+ used[i].id = head;
+ used[i].len = 0;
+ }
+ /* Make sure it wraps around ring, to test! */
+ assert(vrh.vring.used->idx % RINGSIZE != 0);
+ err = vringh_complete_multi_user(&vrh, used, RINGSIZE);
+ if (err)
+ errx(1, "vringh_complete_multi_user: %i", err);
+
+ /* Free those buffers. */
+ for (i = 0; i < RINGSIZE; i++) {
+ unsigned len;
+ assert(virtqueue_get_buf(vq, &len) != NULL);
+ }
+
+ /* Test weird (but legal!) indirect. */
+ if (__virtio_test_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
+ char *data = __user_addr_max - USER_MEM/4;
+ struct vring_desc *d = __user_addr_max - USER_MEM/2;
+ struct vring vring;
+
+ /* Force creation of direct, which we modify. */
+ __virtio_clear_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
+ vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
+ false, __user_addr_min,
+ never_notify_host,
+ never_callback_guest,
+ "guest vq");
+
+ sg_init_table(guest_sg, 4);
+ sg_set_buf(&guest_sg[0], d, sizeof(*d)*2);
+ sg_set_buf(&guest_sg[1], d + 2, sizeof(*d)*1);
+ sg_set_buf(&guest_sg[2], data + 6, 4);
+ sg_set_buf(&guest_sg[3], d + 3, sizeof(*d)*3);
+
+ err = virtqueue_add_outbuf(vq, guest_sg, 4, &err, GFP_KERNEL);
+ if (err)
+ errx(1, "virtqueue_add_outbuf (indirect): %i", err);
+
+ vring_init(&vring, RINGSIZE, __user_addr_min, ALIGN);
+
+ /* They're used in order, but double-check... */
+ assert(vring.desc[0].addr == (unsigned long)d);
+ assert(vring.desc[1].addr == (unsigned long)(d+2));
+ assert(vring.desc[2].addr == (unsigned long)data + 6);
+ assert(vring.desc[3].addr == (unsigned long)(d+3));
+ vring.desc[0].flags |= VRING_DESC_F_INDIRECT;
+ vring.desc[1].flags |= VRING_DESC_F_INDIRECT;
+ vring.desc[3].flags |= VRING_DESC_F_INDIRECT;
+
+ /* First indirect */
+ d[0].addr = (unsigned long)data;
+ d[0].len = 1;
+ d[0].flags = VRING_DESC_F_NEXT;
+ d[0].next = 1;
+ d[1].addr = (unsigned long)data + 1;
+ d[1].len = 2;
+ d[1].flags = 0;
+
+ /* Second indirect */
+ d[2].addr = (unsigned long)data + 3;
+ d[2].len = 3;
+ d[2].flags = 0;
+
+ /* Third indirect */
+ d[3].addr = (unsigned long)data + 10;
+ d[3].len = 5;
+ d[3].flags = VRING_DESC_F_NEXT;
+ d[3].next = 1;
+ d[4].addr = (unsigned long)data + 15;
+ d[4].len = 6;
+ d[4].flags = VRING_DESC_F_NEXT;
+ d[4].next = 2;
+ d[5].addr = (unsigned long)data + 21;
+ d[5].len = 7;
+ d[5].flags = 0;
+
+ /* Host picks it up (allocates new iov). */
+ vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
+ vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
+
+ err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
+ if (err != 1)
+ errx(1, "vringh_getdesc_user: %i", err);
+
+ if (head != 0)
+ errx(1, "vringh_getdesc_user: head %i not 0", head);
+
+ assert(riov.max_num & VRINGH_IOV_ALLOCATED);
+ if (getrange != getrange_slow)
+ assert(riov.used == 7);
+ else
+ assert(riov.used == 28);
+ err = vringh_iov_pull_user(&riov, buf, 29);
+ assert(err == 28);
+
+ /* Data should be linear. */
+ for (i = 0; i < err; i++)
+ assert(buf[i] == i);
+ vringh_iov_cleanup(&riov);
+ }
+
+ /* Don't leak memory... */
+ vring_del_virtqueue(vq);
+ free(__user_addr_min);
+
+ return 0;
+}
diff --git a/tools/vm/.gitignore b/tools/vm/.gitignore
new file mode 100644
index 000000000..44f095fa2
--- /dev/null
+++ b/tools/vm/.gitignore
@@ -0,0 +1,2 @@
+slabinfo
+page-types
diff --git a/tools/vm/Makefile b/tools/vm/Makefile
new file mode 100644
index 000000000..9860622cb
--- /dev/null
+++ b/tools/vm/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for vm tools
+#
+include ../scripts/Makefile.include
+
+TARGETS=page-types slabinfo page_owner_sort
+
+LIB_DIR = ../lib/api
+LIBS = $(LIB_DIR)/libapi.a
+
+CFLAGS = -Wall -Wextra -I../lib/
+LDFLAGS = $(LIBS)
+
+all: $(TARGETS)
+
+$(TARGETS): $(LIBS)
+
+$(LIBS):
+ make -C $(LIB_DIR)
+
+%: %.c
+ $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
+clean:
+ $(RM) page-types slabinfo page_owner_sort
+ make -C $(LIB_DIR) clean
+
+sbindir ?= /usr/sbin
+
+install: all
+ install -d $(DESTDIR)$(sbindir)
+ install -m 755 -p $(TARGETS) $(DESTDIR)$(sbindir)
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
new file mode 100644
index 000000000..89063b967
--- /dev/null
+++ b/tools/vm/page-types.c
@@ -0,0 +1,1384 @@
+/*
+ * page-types: Tool for querying page flags
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should find a copy of v2 of the GNU General Public License somewhere on
+ * your Linux system; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2009 Intel corporation
+ *
+ * Authors: Wu Fengguang <fengguang.wu@intel.com>
+ */
+
+#define _FILE_OFFSET_BITS 64
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <string.h>
+#include <getopt.h>
+#include <limits.h>
+#include <assert.h>
+#include <ftw.h>
+#include <time.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/errno.h>
+#include <sys/fcntl.h>
+#include <sys/mount.h>
+#include <sys/statfs.h>
+#include <sys/mman.h>
+#include "../../include/uapi/linux/magic.h"
+#include "../../include/uapi/linux/kernel-page-flags.h"
+#include <api/fs/fs.h>
+
+#ifndef MAX_PATH
+# define MAX_PATH 256
+#endif
+
+#ifndef STR
+# define _STR(x) #x
+# define STR(x) _STR(x)
+#endif
+
+/*
+ * pagemap kernel ABI bits
+ */
+
+#define PM_ENTRY_BYTES 8
+#define PM_PFRAME_BITS 55
+#define PM_PFRAME_MASK ((1LL << PM_PFRAME_BITS) - 1)
+#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
+#define MAX_SWAPFILES_SHIFT 5
+#define PM_SWAP_OFFSET(x) (((x) & PM_PFRAME_MASK) >> MAX_SWAPFILES_SHIFT)
+#define PM_SOFT_DIRTY (1ULL << 55)
+#define PM_MMAP_EXCLUSIVE (1ULL << 56)
+#define PM_FILE (1ULL << 61)
+#define PM_SWAP (1ULL << 62)
+#define PM_PRESENT (1ULL << 63)
+
+/*
+ * kernel page flags
+ */
+
+#define KPF_BYTES 8
+#define PROC_KPAGEFLAGS "/proc/kpageflags"
+#define PROC_KPAGECOUNT "/proc/kpagecount"
+#define PROC_KPAGECGROUP "/proc/kpagecgroup"
+
+#define SYS_KERNEL_MM_PAGE_IDLE "/sys/kernel/mm/page_idle/bitmap"
+
+/* [32-] kernel hacking assistances */
+#define KPF_RESERVED 32
+#define KPF_MLOCKED 33
+#define KPF_MAPPEDTODISK 34
+#define KPF_PRIVATE 35
+#define KPF_PRIVATE_2 36
+#define KPF_OWNER_PRIVATE 37
+#define KPF_ARCH 38
+#define KPF_UNCACHED 39
+#define KPF_SOFTDIRTY 40
+
+/* [48-] take some arbitrary free slots for expanding overloaded flags
+ * not part of kernel API
+ */
+#define KPF_READAHEAD 48
+#define KPF_SLOB_FREE 49
+#define KPF_SLUB_FROZEN 50
+#define KPF_SLUB_DEBUG 51
+#define KPF_FILE 61
+#define KPF_SWAP 62
+#define KPF_MMAP_EXCLUSIVE 63
+
+#define KPF_ALL_BITS ((uint64_t)~0ULL)
+#define KPF_HACKERS_BITS (0xffffULL << 32)
+#define KPF_OVERLOADED_BITS (0xffffULL << 48)
+#define BIT(name) (1ULL << KPF_##name)
+#define BITS_COMPOUND (BIT(COMPOUND_HEAD) | BIT(COMPOUND_TAIL))
+
+static const char * const page_flag_names[] = {
+ [KPF_LOCKED] = "L:locked",
+ [KPF_ERROR] = "E:error",
+ [KPF_REFERENCED] = "R:referenced",
+ [KPF_UPTODATE] = "U:uptodate",
+ [KPF_DIRTY] = "D:dirty",
+ [KPF_LRU] = "l:lru",
+ [KPF_ACTIVE] = "A:active",
+ [KPF_SLAB] = "S:slab",
+ [KPF_WRITEBACK] = "W:writeback",
+ [KPF_RECLAIM] = "I:reclaim",
+ [KPF_BUDDY] = "B:buddy",
+
+ [KPF_MMAP] = "M:mmap",
+ [KPF_ANON] = "a:anonymous",
+ [KPF_SWAPCACHE] = "s:swapcache",
+ [KPF_SWAPBACKED] = "b:swapbacked",
+ [KPF_COMPOUND_HEAD] = "H:compound_head",
+ [KPF_COMPOUND_TAIL] = "T:compound_tail",
+ [KPF_HUGE] = "G:huge",
+ [KPF_UNEVICTABLE] = "u:unevictable",
+ [KPF_HWPOISON] = "X:hwpoison",
+ [KPF_NOPAGE] = "n:nopage",
+ [KPF_KSM] = "x:ksm",
+ [KPF_THP] = "t:thp",
+ [KPF_BALLOON] = "o:balloon",
+ [KPF_PGTABLE] = "g:pgtable",
+ [KPF_ZERO_PAGE] = "z:zero_page",
+ [KPF_IDLE] = "i:idle_page",
+
+ [KPF_RESERVED] = "r:reserved",
+ [KPF_MLOCKED] = "m:mlocked",
+ [KPF_MAPPEDTODISK] = "d:mappedtodisk",
+ [KPF_PRIVATE] = "P:private",
+ [KPF_PRIVATE_2] = "p:private_2",
+ [KPF_OWNER_PRIVATE] = "O:owner_private",
+ [KPF_ARCH] = "h:arch",
+ [KPF_UNCACHED] = "c:uncached",
+ [KPF_SOFTDIRTY] = "f:softdirty",
+
+ [KPF_READAHEAD] = "I:readahead",
+ [KPF_SLOB_FREE] = "P:slob_free",
+ [KPF_SLUB_FROZEN] = "A:slub_frozen",
+ [KPF_SLUB_DEBUG] = "E:slub_debug",
+
+ [KPF_FILE] = "F:file",
+ [KPF_SWAP] = "w:swap",
+ [KPF_MMAP_EXCLUSIVE] = "1:mmap_exclusive",
+};
+
+
+/*
+ * data structures
+ */
+
+static int opt_raw; /* for kernel developers */
+static int opt_list; /* list pages (in ranges) */
+static int opt_mark_idle; /* set accessed bit */
+static int opt_no_summary; /* don't show summary */
+static pid_t opt_pid; /* process to walk */
+const char *opt_file; /* file or directory path */
+static uint64_t opt_cgroup; /* cgroup inode */
+static int opt_list_cgroup;/* list page cgroup */
+static int opt_list_mapcnt;/* list page map count */
+static const char *opt_kpageflags;/* kpageflags file to parse */
+
+#define MAX_ADDR_RANGES 1024
+static int nr_addr_ranges;
+static unsigned long opt_offset[MAX_ADDR_RANGES];
+static unsigned long opt_size[MAX_ADDR_RANGES];
+
+#define MAX_VMAS 10240
+static int nr_vmas;
+static unsigned long pg_start[MAX_VMAS];
+static unsigned long pg_end[MAX_VMAS];
+
+#define MAX_BIT_FILTERS 64
+static int nr_bit_filters;
+static uint64_t opt_mask[MAX_BIT_FILTERS];
+static uint64_t opt_bits[MAX_BIT_FILTERS];
+
+static int page_size;
+
+static int pagemap_fd;
+static int kpageflags_fd;
+static int kpagecount_fd = -1;
+static int kpagecgroup_fd = -1;
+static int page_idle_fd = -1;
+
+static int opt_hwpoison;
+static int opt_unpoison;
+
+static const char *hwpoison_debug_fs;
+static int hwpoison_inject_fd;
+static int hwpoison_forget_fd;
+
+#define HASH_SHIFT 13
+#define HASH_SIZE (1 << HASH_SHIFT)
+#define HASH_MASK (HASH_SIZE - 1)
+#define HASH_KEY(flags) (flags & HASH_MASK)
+
+static unsigned long total_pages;
+static unsigned long nr_pages[HASH_SIZE];
+static uint64_t page_flags[HASH_SIZE];
+
+
+/*
+ * helper functions
+ */
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define min_t(type, x, y) ({ \
+ type __min1 = (x); \
+ type __min2 = (y); \
+ __min1 < __min2 ? __min1 : __min2; })
+
+#define max_t(type, x, y) ({ \
+ type __max1 = (x); \
+ type __max2 = (y); \
+ __max1 > __max2 ? __max1 : __max2; })
+
+static unsigned long pages2mb(unsigned long pages)
+{
+ return (pages * page_size) >> 20;
+}
+
+static void fatal(const char *x, ...)
+{
+ va_list ap;
+
+ va_start(ap, x);
+ vfprintf(stderr, x, ap);
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+static int checked_open(const char *pathname, int flags)
+{
+ int fd = open(pathname, flags);
+
+ if (fd < 0) {
+ perror(pathname);
+ exit(EXIT_FAILURE);
+ }
+
+ return fd;
+}
+
+/*
+ * pagemap/kpageflags routines
+ */
+
+static unsigned long do_u64_read(int fd, const char *name,
+ uint64_t *buf,
+ unsigned long index,
+ unsigned long count)
+{
+ long bytes;
+
+ if (index > ULONG_MAX / 8)
+ fatal("index overflow: %lu\n", index);
+
+ bytes = pread(fd, buf, count * 8, (off_t)index * 8);
+ if (bytes < 0) {
+ perror(name);
+ exit(EXIT_FAILURE);
+ }
+ if (bytes % 8)
+ fatal("partial read: %lu bytes\n", bytes);
+
+ return bytes / 8;
+}
+
+static unsigned long kpageflags_read(uint64_t *buf,
+ unsigned long index,
+ unsigned long pages)
+{
+ return do_u64_read(kpageflags_fd, opt_kpageflags, buf, index, pages);
+}
+
+static unsigned long kpagecgroup_read(uint64_t *buf,
+ unsigned long index,
+ unsigned long pages)
+{
+ if (kpagecgroup_fd < 0)
+ return pages;
+
+ return do_u64_read(kpagecgroup_fd, opt_kpageflags, buf, index, pages);
+}
+
+static unsigned long kpagecount_read(uint64_t *buf,
+ unsigned long index,
+ unsigned long pages)
+{
+ return kpagecount_fd < 0 ? pages :
+ do_u64_read(kpagecount_fd, PROC_KPAGECOUNT,
+ buf, index, pages);
+}
+
+static unsigned long pagemap_read(uint64_t *buf,
+ unsigned long index,
+ unsigned long pages)
+{
+ return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages);
+}
+
+static unsigned long pagemap_pfn(uint64_t val)
+{
+ unsigned long pfn;
+
+ if (val & PM_PRESENT)
+ pfn = PM_PFRAME(val);
+ else
+ pfn = 0;
+
+ return pfn;
+}
+
+static unsigned long pagemap_swap_offset(uint64_t val)
+{
+ return val & PM_SWAP ? PM_SWAP_OFFSET(val) : 0;
+}
+
+/*
+ * page flag names
+ */
+
+static char *page_flag_name(uint64_t flags)
+{
+ static char buf[65];
+ int present;
+ size_t i, j;
+
+ for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
+ present = (flags >> i) & 1;
+ if (!page_flag_names[i]) {
+ if (present)
+ fatal("unknown flag bit %d\n", i);
+ continue;
+ }
+ buf[j++] = present ? page_flag_names[i][0] : '_';
+ }
+
+ return buf;
+}
+
+static char *page_flag_longname(uint64_t flags)
+{
+ static char buf[1024];
+ size_t i, n;
+
+ for (i = 0, n = 0; i < ARRAY_SIZE(page_flag_names); i++) {
+ if (!page_flag_names[i])
+ continue;
+ if ((flags >> i) & 1)
+ n += snprintf(buf + n, sizeof(buf) - n, "%s,",
+ page_flag_names[i] + 2);
+ }
+ if (n)
+ n--;
+ buf[n] = '\0';
+
+ return buf;
+}
+
+
+/*
+ * page list and summary
+ */
+
+static void show_page_range(unsigned long voffset, unsigned long offset,
+ unsigned long size, uint64_t flags,
+ uint64_t cgroup, uint64_t mapcnt)
+{
+ static uint64_t flags0;
+ static uint64_t cgroup0;
+ static uint64_t mapcnt0;
+ static unsigned long voff;
+ static unsigned long index;
+ static unsigned long count;
+
+ if (flags == flags0 && cgroup == cgroup0 && mapcnt == mapcnt0 &&
+ offset == index + count && size && voffset == voff + count) {
+ count += size;
+ return;
+ }
+
+ if (count) {
+ if (opt_pid)
+ printf("%lx\t", voff);
+ if (opt_file)
+ printf("%lu\t", voff);
+ if (opt_list_cgroup)
+ printf("@%llu\t", (unsigned long long)cgroup0);
+ if (opt_list_mapcnt)
+ printf("%lu\t", mapcnt0);
+ printf("%lx\t%lx\t%s\n",
+ index, count, page_flag_name(flags0));
+ }
+
+ flags0 = flags;
+ cgroup0 = cgroup;
+ mapcnt0 = mapcnt;
+ index = offset;
+ voff = voffset;
+ count = size;
+}
+
+static void flush_page_range(void)
+{
+ show_page_range(0, 0, 0, 0, 0, 0);
+}
+
+static void show_page(unsigned long voffset, unsigned long offset,
+ uint64_t flags, uint64_t cgroup, uint64_t mapcnt)
+{
+ if (opt_pid)
+ printf("%lx\t", voffset);
+ if (opt_file)
+ printf("%lu\t", voffset);
+ if (opt_list_cgroup)
+ printf("@%llu\t", (unsigned long long)cgroup);
+ if (opt_list_mapcnt)
+ printf("%lu\t", mapcnt);
+
+ printf("%lx\t%s\n", offset, page_flag_name(flags));
+}
+
+static void show_summary(void)
+{
+ size_t i;
+
+ printf(" flags\tpage-count MB"
+ " symbolic-flags\t\t\tlong-symbolic-flags\n");
+
+ for (i = 0; i < ARRAY_SIZE(nr_pages); i++) {
+ if (nr_pages[i])
+ printf("0x%016llx\t%10lu %8lu %s\t%s\n",
+ (unsigned long long)page_flags[i],
+ nr_pages[i],
+ pages2mb(nr_pages[i]),
+ page_flag_name(page_flags[i]),
+ page_flag_longname(page_flags[i]));
+ }
+
+ printf(" total\t%10lu %8lu\n",
+ total_pages, pages2mb(total_pages));
+}
+
+
+/*
+ * page flag filters
+ */
+
+static int bit_mask_ok(uint64_t flags)
+{
+ int i;
+
+ for (i = 0; i < nr_bit_filters; i++) {
+ if (opt_bits[i] == KPF_ALL_BITS) {
+ if ((flags & opt_mask[i]) == 0)
+ return 0;
+ } else {
+ if ((flags & opt_mask[i]) != opt_bits[i])
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme)
+{
+ /* SLOB/SLUB overload several page flags */
+ if (flags & BIT(SLAB)) {
+ if (flags & BIT(PRIVATE))
+ flags ^= BIT(PRIVATE) | BIT(SLOB_FREE);
+ if (flags & BIT(ACTIVE))
+ flags ^= BIT(ACTIVE) | BIT(SLUB_FROZEN);
+ if (flags & BIT(ERROR))
+ flags ^= BIT(ERROR) | BIT(SLUB_DEBUG);
+ }
+
+ /* PG_reclaim is overloaded as PG_readahead in the read path */
+ if ((flags & (BIT(RECLAIM) | BIT(WRITEBACK))) == BIT(RECLAIM))
+ flags ^= BIT(RECLAIM) | BIT(READAHEAD);
+
+ if (pme & PM_SOFT_DIRTY)
+ flags |= BIT(SOFTDIRTY);
+ if (pme & PM_FILE)
+ flags |= BIT(FILE);
+ if (pme & PM_SWAP)
+ flags |= BIT(SWAP);
+ if (pme & PM_MMAP_EXCLUSIVE)
+ flags |= BIT(MMAP_EXCLUSIVE);
+
+ return flags;
+}
+
+static uint64_t well_known_flags(uint64_t flags)
+{
+ /* hide flags intended only for kernel hacker */
+ flags &= ~KPF_HACKERS_BITS;
+
+ /* hide non-hugeTLB compound pages */
+ if ((flags & BITS_COMPOUND) && !(flags & BIT(HUGE)))
+ flags &= ~BITS_COMPOUND;
+
+ return flags;
+}
+
+static uint64_t kpageflags_flags(uint64_t flags, uint64_t pme)
+{
+ if (opt_raw)
+ flags = expand_overloaded_flags(flags, pme);
+ else
+ flags = well_known_flags(flags);
+
+ return flags;
+}
+
+/*
+ * page actions
+ */
+
+static void prepare_hwpoison_fd(void)
+{
+ char buf[MAX_PATH + 1];
+
+ hwpoison_debug_fs = debugfs__mount();
+ if (!hwpoison_debug_fs) {
+ perror("mount debugfs");
+ exit(EXIT_FAILURE);
+ }
+
+ if (opt_hwpoison && !hwpoison_inject_fd) {
+ snprintf(buf, MAX_PATH, "%s/hwpoison/corrupt-pfn",
+ hwpoison_debug_fs);
+ hwpoison_inject_fd = checked_open(buf, O_WRONLY);
+ }
+
+ if (opt_unpoison && !hwpoison_forget_fd) {
+ snprintf(buf, MAX_PATH, "%s/hwpoison/unpoison-pfn",
+ hwpoison_debug_fs);
+ hwpoison_forget_fd = checked_open(buf, O_WRONLY);
+ }
+}
+
+static int hwpoison_page(unsigned long offset)
+{
+ char buf[100];
+ int len;
+
+ len = sprintf(buf, "0x%lx\n", offset);
+ len = write(hwpoison_inject_fd, buf, len);
+ if (len < 0) {
+ perror("hwpoison inject");
+ return len;
+ }
+ return 0;
+}
+
+static int unpoison_page(unsigned long offset)
+{
+ char buf[100];
+ int len;
+
+ len = sprintf(buf, "0x%lx\n", offset);
+ len = write(hwpoison_forget_fd, buf, len);
+ if (len < 0) {
+ perror("hwpoison forget");
+ return len;
+ }
+ return 0;
+}
+
+static int mark_page_idle(unsigned long offset)
+{
+ static unsigned long off;
+ static uint64_t buf;
+ int len;
+
+ if ((offset / 64 == off / 64) || buf == 0) {
+ buf |= 1UL << (offset % 64);
+ off = offset;
+ return 0;
+ }
+
+ len = pwrite(page_idle_fd, &buf, 8, 8 * (off / 64));
+ if (len < 0) {
+ perror("mark page idle");
+ return len;
+ }
+
+ buf = 1UL << (offset % 64);
+ off = offset;
+
+ return 0;
+}
+
+/*
+ * page frame walker
+ */
+
+static size_t hash_slot(uint64_t flags)
+{
+ size_t k = HASH_KEY(flags);
+ size_t i;
+
+ /* Explicitly reserve slot 0 for flags 0: the following logic
+ * cannot distinguish an unoccupied slot from slot (flags==0).
+ */
+ if (flags == 0)
+ return 0;
+
+ /* search through the remaining (HASH_SIZE-1) slots */
+ for (i = 1; i < ARRAY_SIZE(page_flags); i++, k++) {
+ if (!k || k >= ARRAY_SIZE(page_flags))
+ k = 1;
+ if (page_flags[k] == 0) {
+ page_flags[k] = flags;
+ return k;
+ }
+ if (page_flags[k] == flags)
+ return k;
+ }
+
+ fatal("hash table full: bump up HASH_SHIFT?\n");
+ exit(EXIT_FAILURE);
+}
+
+static void add_page(unsigned long voffset, unsigned long offset,
+ uint64_t flags, uint64_t cgroup, uint64_t mapcnt,
+ uint64_t pme)
+{
+ flags = kpageflags_flags(flags, pme);
+
+ if (!bit_mask_ok(flags))
+ return;
+
+ if (opt_cgroup && cgroup != (uint64_t)opt_cgroup)
+ return;
+
+ if (opt_hwpoison)
+ hwpoison_page(offset);
+ if (opt_unpoison)
+ unpoison_page(offset);
+
+ if (opt_mark_idle)
+ mark_page_idle(offset);
+
+ if (opt_list == 1)
+ show_page_range(voffset, offset, 1, flags, cgroup, mapcnt);
+ else if (opt_list == 2)
+ show_page(voffset, offset, flags, cgroup, mapcnt);
+
+ nr_pages[hash_slot(flags)]++;
+ total_pages++;
+}
+
+#define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */
+static void walk_pfn(unsigned long voffset,
+ unsigned long index,
+ unsigned long count,
+ uint64_t pme)
+{
+ uint64_t buf[KPAGEFLAGS_BATCH];
+ uint64_t cgi[KPAGEFLAGS_BATCH];
+ uint64_t cnt[KPAGEFLAGS_BATCH];
+ unsigned long batch;
+ unsigned long pages;
+ unsigned long i;
+
+ /*
+ * kpagecgroup_read() reads only if kpagecgroup were opened, but
+ * /proc/kpagecgroup might even not exist, so it's better to fill
+ * them with zeros here.
+ */
+ if (count == 1)
+ cgi[0] = 0;
+ else
+ memset(cgi, 0, sizeof cgi);
+
+ while (count) {
+ batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH);
+ pages = kpageflags_read(buf, index, batch);
+ if (pages == 0)
+ break;
+
+ if (kpagecgroup_read(cgi, index, pages) != pages)
+ fatal("kpagecgroup returned fewer pages than expected");
+
+ if (kpagecount_read(cnt, index, pages) != pages)
+ fatal("kpagecount returned fewer pages than expected");
+
+ for (i = 0; i < pages; i++)
+ add_page(voffset + i, index + i,
+ buf[i], cgi[i], cnt[i], pme);
+
+ index += pages;
+ count -= pages;
+ }
+}
+
+static void walk_swap(unsigned long voffset, uint64_t pme)
+{
+ uint64_t flags = kpageflags_flags(0, pme);
+
+ if (!bit_mask_ok(flags))
+ return;
+
+ if (opt_cgroup)
+ return;
+
+ if (opt_list == 1)
+ show_page_range(voffset, pagemap_swap_offset(pme),
+ 1, flags, 0, 0);
+ else if (opt_list == 2)
+ show_page(voffset, pagemap_swap_offset(pme), flags, 0, 0);
+
+ nr_pages[hash_slot(flags)]++;
+ total_pages++;
+}
+
+#define PAGEMAP_BATCH (64 << 10)
+static void walk_vma(unsigned long index, unsigned long count)
+{
+ uint64_t buf[PAGEMAP_BATCH];
+ unsigned long batch;
+ unsigned long pages;
+ unsigned long pfn;
+ unsigned long i;
+
+ while (count) {
+ batch = min_t(unsigned long, count, PAGEMAP_BATCH);
+ pages = pagemap_read(buf, index, batch);
+ if (pages == 0)
+ break;
+
+ for (i = 0; i < pages; i++) {
+ pfn = pagemap_pfn(buf[i]);
+ if (pfn)
+ walk_pfn(index + i, pfn, 1, buf[i]);
+ if (buf[i] & PM_SWAP)
+ walk_swap(index + i, buf[i]);
+ }
+
+ index += pages;
+ count -= pages;
+ }
+}
+
+static void walk_task(unsigned long index, unsigned long count)
+{
+ const unsigned long end = index + count;
+ unsigned long start;
+ int i = 0;
+
+ while (index < end) {
+
+ while (pg_end[i] <= index)
+ if (++i >= nr_vmas)
+ return;
+ if (pg_start[i] >= end)
+ return;
+
+ start = max_t(unsigned long, pg_start[i], index);
+ index = min_t(unsigned long, pg_end[i], end);
+
+ assert(start < index);
+ walk_vma(start, index - start);
+ }
+}
+
+static void add_addr_range(unsigned long offset, unsigned long size)
+{
+ if (nr_addr_ranges >= MAX_ADDR_RANGES)
+ fatal("too many addr ranges\n");
+
+ opt_offset[nr_addr_ranges] = offset;
+ opt_size[nr_addr_ranges] = min_t(unsigned long, size, ULONG_MAX-offset);
+ nr_addr_ranges++;
+}
+
+static void walk_addr_ranges(void)
+{
+ int i;
+
+ kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY);
+
+ if (!nr_addr_ranges)
+ add_addr_range(0, ULONG_MAX);
+
+ for (i = 0; i < nr_addr_ranges; i++)
+ if (!opt_pid)
+ walk_pfn(opt_offset[i], opt_offset[i], opt_size[i], 0);
+ else
+ walk_task(opt_offset[i], opt_size[i]);
+
+ if (opt_mark_idle)
+ mark_page_idle(0);
+
+ close(kpageflags_fd);
+}
+
+
+/*
+ * user interface
+ */
+
+static const char *page_flag_type(uint64_t flag)
+{
+ if (flag & KPF_HACKERS_BITS)
+ return "(r)";
+ if (flag & KPF_OVERLOADED_BITS)
+ return "(o)";
+ return " ";
+}
+
+static void usage(void)
+{
+ size_t i, j;
+
+ printf(
+"page-types [options]\n"
+" -r|--raw Raw mode, for kernel developers\n"
+" -d|--describe flags Describe flags\n"
+" -a|--addr addr-spec Walk a range of pages\n"
+" -b|--bits bits-spec Walk pages with specified bits\n"
+" -c|--cgroup path|@inode Walk pages within memory cgroup\n"
+" -p|--pid pid Walk process address space\n"
+" -f|--file filename Walk file address space\n"
+" -i|--mark-idle Mark pages idle\n"
+" -l|--list Show page details in ranges\n"
+" -L|--list-each Show page details one by one\n"
+" -C|--list-cgroup Show cgroup inode for pages\n"
+" -M|--list-mapcnt Show page map count\n"
+" -N|--no-summary Don't show summary info\n"
+" -X|--hwpoison hwpoison pages\n"
+" -x|--unpoison unpoison pages\n"
+" -F|--kpageflags filename kpageflags file to parse\n"
+" -h|--help Show this usage message\n"
+"flags:\n"
+" 0x10 bitfield format, e.g.\n"
+" anon bit-name, e.g.\n"
+" 0x10,anon comma-separated list, e.g.\n"
+"addr-spec:\n"
+" N one page at offset N (unit: pages)\n"
+" N+M pages range from N to N+M-1\n"
+" N,M pages range from N to M-1\n"
+" N, pages range from N to end\n"
+" ,M pages range from 0 to M-1\n"
+"bits-spec:\n"
+" bit1,bit2 (flags & (bit1|bit2)) != 0\n"
+" bit1,bit2=bit1 (flags & (bit1|bit2)) == bit1\n"
+" bit1,~bit2 (flags & (bit1|bit2)) == bit1\n"
+" =bit1,bit2 flags == (bit1|bit2)\n"
+"bit-names:\n"
+ );
+
+ for (i = 0, j = 0; i < ARRAY_SIZE(page_flag_names); i++) {
+ if (!page_flag_names[i])
+ continue;
+ printf("%16s%s", page_flag_names[i] + 2,
+ page_flag_type(1ULL << i));
+ if (++j > 3) {
+ j = 0;
+ putchar('\n');
+ }
+ }
+ printf("\n "
+ "(r) raw mode bits (o) overloaded bits\n");
+}
+
+static unsigned long long parse_number(const char *str)
+{
+ unsigned long long n;
+
+ n = strtoll(str, NULL, 0);
+
+ if (n == 0 && str[0] != '0')
+ fatal("invalid name or number: %s\n", str);
+
+ return n;
+}
+
+static void parse_pid(const char *str)
+{
+ FILE *file;
+ char buf[5000];
+
+ opt_pid = parse_number(str);
+
+ sprintf(buf, "/proc/%d/pagemap", opt_pid);
+ pagemap_fd = checked_open(buf, O_RDONLY);
+
+ sprintf(buf, "/proc/%d/maps", opt_pid);
+ file = fopen(buf, "r");
+ if (!file) {
+ perror(buf);
+ exit(EXIT_FAILURE);
+ }
+
+ while (fgets(buf, sizeof(buf), file) != NULL) {
+ unsigned long vm_start;
+ unsigned long vm_end;
+ unsigned long long pgoff;
+ int major, minor;
+ char r, w, x, s;
+ unsigned long ino;
+ int n;
+
+ n = sscanf(buf, "%lx-%lx %c%c%c%c %llx %x:%x %lu",
+ &vm_start,
+ &vm_end,
+ &r, &w, &x, &s,
+ &pgoff,
+ &major, &minor,
+ &ino);
+ if (n < 10) {
+ fprintf(stderr, "unexpected line: %s\n", buf);
+ continue;
+ }
+ pg_start[nr_vmas] = vm_start / page_size;
+ pg_end[nr_vmas] = vm_end / page_size;
+ if (++nr_vmas >= MAX_VMAS) {
+ fprintf(stderr, "too many VMAs\n");
+ break;
+ }
+ }
+ fclose(file);
+}
+
+static void show_file(const char *name, const struct stat *st)
+{
+ unsigned long long size = st->st_size;
+ char atime[64], mtime[64];
+ long now = time(NULL);
+
+ printf("%s\tInode: %u\tSize: %llu (%llu pages)\n",
+ name, (unsigned)st->st_ino,
+ size, (size + page_size - 1) / page_size);
+
+ strftime(atime, sizeof(atime), "%c", localtime(&st->st_atime));
+ strftime(mtime, sizeof(mtime), "%c", localtime(&st->st_mtime));
+
+ printf("Modify: %s (%ld seconds ago)\nAccess: %s (%ld seconds ago)\n",
+ mtime, now - st->st_mtime,
+ atime, now - st->st_atime);
+}
+
+static sigjmp_buf sigbus_jmp;
+
+static void * volatile sigbus_addr;
+
+static void sigbus_handler(int sig, siginfo_t *info, void *ucontex)
+{
+ (void)sig;
+ (void)ucontex;
+ sigbus_addr = info ? info->si_addr : NULL;
+ siglongjmp(sigbus_jmp, 1);
+}
+
+static struct sigaction sigbus_action = {
+ .sa_sigaction = sigbus_handler,
+ .sa_flags = SA_SIGINFO,
+};
+
+static void walk_file(const char *name, const struct stat *st)
+{
+ uint8_t vec[PAGEMAP_BATCH];
+ uint64_t buf[PAGEMAP_BATCH], flags;
+ uint64_t cgroup = 0;
+ uint64_t mapcnt = 0;
+ unsigned long nr_pages, pfn, i;
+ off_t off, end = st->st_size;
+ int fd;
+ ssize_t len;
+ void *ptr;
+ int first = 1;
+
+ fd = checked_open(name, O_RDONLY|O_NOATIME|O_NOFOLLOW);
+
+ for (off = 0; off < end; off += len) {
+ nr_pages = (end - off + page_size - 1) / page_size;
+ if (nr_pages > PAGEMAP_BATCH)
+ nr_pages = PAGEMAP_BATCH;
+ len = nr_pages * page_size;
+
+ ptr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, off);
+ if (ptr == MAP_FAILED)
+ fatal("mmap failed: %s", name);
+
+ /* determine cached pages */
+ if (mincore(ptr, len, vec))
+ fatal("mincore failed: %s", name);
+
+ /* turn off readahead */
+ if (madvise(ptr, len, MADV_RANDOM))
+ fatal("madvice failed: %s", name);
+
+ if (sigsetjmp(sigbus_jmp, 1)) {
+ end = off + sigbus_addr ? sigbus_addr - ptr : 0;
+ fprintf(stderr, "got sigbus at offset %lld: %s\n",
+ (long long)end, name);
+ goto got_sigbus;
+ }
+
+ /* populate ptes */
+ for (i = 0; i < nr_pages ; i++) {
+ if (vec[i] & 1)
+ (void)*(volatile int *)(ptr + i * page_size);
+ }
+got_sigbus:
+
+ /* turn off harvesting reference bits */
+ if (madvise(ptr, len, MADV_SEQUENTIAL))
+ fatal("madvice failed: %s", name);
+
+ if (pagemap_read(buf, (unsigned long)ptr / page_size,
+ nr_pages) != nr_pages)
+ fatal("cannot read pagemap");
+
+ munmap(ptr, len);
+
+ for (i = 0; i < nr_pages; i++) {
+ pfn = pagemap_pfn(buf[i]);
+ if (!pfn)
+ continue;
+ if (!kpageflags_read(&flags, pfn, 1))
+ continue;
+ if (!kpagecgroup_read(&cgroup, pfn, 1))
+ fatal("kpagecgroup_read failed");
+ if (!kpagecount_read(&mapcnt, pfn, 1))
+ fatal("kpagecount_read failed");
+ if (first && opt_list) {
+ first = 0;
+ flush_page_range();
+ show_file(name, st);
+ }
+ add_page(off / page_size + i, pfn,
+ flags, cgroup, mapcnt, buf[i]);
+ }
+ }
+
+ close(fd);
+}
+
+int walk_tree(const char *name, const struct stat *st, int type, struct FTW *f)
+{
+ (void)f;
+ switch (type) {
+ case FTW_F:
+ if (S_ISREG(st->st_mode))
+ walk_file(name, st);
+ break;
+ case FTW_DNR:
+ fprintf(stderr, "cannot read dir: %s\n", name);
+ break;
+ }
+ return 0;
+}
+
+static void walk_page_cache(void)
+{
+ struct stat st;
+
+ kpageflags_fd = checked_open(opt_kpageflags, O_RDONLY);
+ pagemap_fd = checked_open("/proc/self/pagemap", O_RDONLY);
+ sigaction(SIGBUS, &sigbus_action, NULL);
+
+ if (stat(opt_file, &st))
+ fatal("stat failed: %s\n", opt_file);
+
+ if (S_ISREG(st.st_mode)) {
+ walk_file(opt_file, &st);
+ } else if (S_ISDIR(st.st_mode)) {
+ /* do not follow symlinks and mountpoints */
+ if (nftw(opt_file, walk_tree, 64, FTW_MOUNT | FTW_PHYS) < 0)
+ fatal("nftw failed: %s\n", opt_file);
+ } else
+ fatal("unhandled file type: %s\n", opt_file);
+
+ close(kpageflags_fd);
+ close(pagemap_fd);
+ signal(SIGBUS, SIG_DFL);
+}
+
+static void parse_file(const char *name)
+{
+ opt_file = name;
+}
+
+static void parse_cgroup(const char *path)
+{
+ if (path[0] == '@') {
+ opt_cgroup = parse_number(path + 1);
+ return;
+ }
+
+ struct stat st;
+
+ if (stat(path, &st))
+ fatal("stat failed: %s: %m\n", path);
+
+ if (!S_ISDIR(st.st_mode))
+ fatal("cgroup supposed to be a directory: %s\n", path);
+
+ opt_cgroup = st.st_ino;
+}
+
+static void parse_addr_range(const char *optarg)
+{
+ unsigned long offset;
+ unsigned long size;
+ char *p;
+
+ p = strchr(optarg, ',');
+ if (!p)
+ p = strchr(optarg, '+');
+
+ if (p == optarg) {
+ offset = 0;
+ size = parse_number(p + 1);
+ } else if (p) {
+ offset = parse_number(optarg);
+ if (p[1] == '\0')
+ size = ULONG_MAX;
+ else {
+ size = parse_number(p + 1);
+ if (*p == ',') {
+ if (size < offset)
+ fatal("invalid range: %lu,%lu\n",
+ offset, size);
+ size -= offset;
+ }
+ }
+ } else {
+ offset = parse_number(optarg);
+ size = 1;
+ }
+
+ add_addr_range(offset, size);
+}
+
+static void add_bits_filter(uint64_t mask, uint64_t bits)
+{
+ if (nr_bit_filters >= MAX_BIT_FILTERS)
+ fatal("too much bit filters\n");
+
+ opt_mask[nr_bit_filters] = mask;
+ opt_bits[nr_bit_filters] = bits;
+ nr_bit_filters++;
+}
+
+static uint64_t parse_flag_name(const char *str, int len)
+{
+ size_t i;
+
+ if (!*str || !len)
+ return 0;
+
+ if (len <= 8 && !strncmp(str, "compound", len))
+ return BITS_COMPOUND;
+
+ for (i = 0; i < ARRAY_SIZE(page_flag_names); i++) {
+ if (!page_flag_names[i])
+ continue;
+ if (!strncmp(str, page_flag_names[i] + 2, len))
+ return 1ULL << i;
+ }
+
+ return parse_number(str);
+}
+
+static uint64_t parse_flag_names(const char *str, int all)
+{
+ const char *p = str;
+ uint64_t flags = 0;
+
+ while (1) {
+ if (*p == ',' || *p == '=' || *p == '\0') {
+ if ((*str != '~') || (*str == '~' && all && *++str))
+ flags |= parse_flag_name(str, p - str);
+ if (*p != ',')
+ break;
+ str = p + 1;
+ }
+ p++;
+ }
+
+ return flags;
+}
+
+static void parse_bits_mask(const char *optarg)
+{
+ uint64_t mask;
+ uint64_t bits;
+ const char *p;
+
+ p = strchr(optarg, '=');
+ if (p == optarg) {
+ mask = KPF_ALL_BITS;
+ bits = parse_flag_names(p + 1, 0);
+ } else if (p) {
+ mask = parse_flag_names(optarg, 0);
+ bits = parse_flag_names(p + 1, 0);
+ } else if (strchr(optarg, '~')) {
+ mask = parse_flag_names(optarg, 1);
+ bits = parse_flag_names(optarg, 0);
+ } else {
+ mask = parse_flag_names(optarg, 0);
+ bits = KPF_ALL_BITS;
+ }
+
+ add_bits_filter(mask, bits);
+}
+
+static void parse_kpageflags(const char *name)
+{
+ opt_kpageflags = name;
+}
+
+static void describe_flags(const char *optarg)
+{
+ uint64_t flags = parse_flag_names(optarg, 0);
+
+ printf("0x%016llx\t%s\t%s\n",
+ (unsigned long long)flags,
+ page_flag_name(flags),
+ page_flag_longname(flags));
+}
+
+static const struct option opts[] = {
+ { "raw" , 0, NULL, 'r' },
+ { "pid" , 1, NULL, 'p' },
+ { "file" , 1, NULL, 'f' },
+ { "addr" , 1, NULL, 'a' },
+ { "bits" , 1, NULL, 'b' },
+ { "cgroup" , 1, NULL, 'c' },
+ { "describe" , 1, NULL, 'd' },
+ { "mark-idle" , 0, NULL, 'i' },
+ { "list" , 0, NULL, 'l' },
+ { "list-each" , 0, NULL, 'L' },
+ { "list-cgroup", 0, NULL, 'C' },
+ { "list-mapcnt", 0, NULL, 'M' },
+ { "no-summary", 0, NULL, 'N' },
+ { "hwpoison" , 0, NULL, 'X' },
+ { "unpoison" , 0, NULL, 'x' },
+ { "kpageflags", 0, NULL, 'F' },
+ { "help" , 0, NULL, 'h' },
+ { NULL , 0, NULL, 0 }
+};
+
+int main(int argc, char *argv[])
+{
+ int c;
+
+ page_size = getpagesize();
+
+ while ((c = getopt_long(argc, argv,
+ "rp:f:a:b:d:c:CilLMNXxF:h",
+ opts, NULL)) != -1) {
+ switch (c) {
+ case 'r':
+ opt_raw = 1;
+ break;
+ case 'p':
+ parse_pid(optarg);
+ break;
+ case 'f':
+ parse_file(optarg);
+ break;
+ case 'a':
+ parse_addr_range(optarg);
+ break;
+ case 'b':
+ parse_bits_mask(optarg);
+ break;
+ case 'c':
+ parse_cgroup(optarg);
+ break;
+ case 'C':
+ opt_list_cgroup = 1;
+ break;
+ case 'd':
+ describe_flags(optarg);
+ exit(0);
+ case 'i':
+ opt_mark_idle = 1;
+ break;
+ case 'l':
+ opt_list = 1;
+ break;
+ case 'L':
+ opt_list = 2;
+ break;
+ case 'M':
+ opt_list_mapcnt = 1;
+ break;
+ case 'N':
+ opt_no_summary = 1;
+ break;
+ case 'X':
+ opt_hwpoison = 1;
+ prepare_hwpoison_fd();
+ break;
+ case 'x':
+ opt_unpoison = 1;
+ prepare_hwpoison_fd();
+ break;
+ case 'F':
+ parse_kpageflags(optarg);
+ break;
+ case 'h':
+ usage();
+ exit(0);
+ default:
+ usage();
+ exit(1);
+ }
+ }
+
+ if (!opt_kpageflags)
+ opt_kpageflags = PROC_KPAGEFLAGS;
+
+ if (opt_cgroup || opt_list_cgroup)
+ kpagecgroup_fd = checked_open(PROC_KPAGECGROUP, O_RDONLY);
+
+ if (opt_list && opt_list_mapcnt)
+ kpagecount_fd = checked_open(PROC_KPAGECOUNT, O_RDONLY);
+
+ if (opt_mark_idle)
+ page_idle_fd = checked_open(SYS_KERNEL_MM_PAGE_IDLE, O_RDWR);
+
+ if (opt_list && opt_pid)
+ printf("voffset\t");
+ if (opt_list && opt_file)
+ printf("foffset\t");
+ if (opt_list && opt_list_cgroup)
+ printf("cgroup\t");
+ if (opt_list && opt_list_mapcnt)
+ printf("map-cnt\t");
+
+ if (opt_list == 1)
+ printf("offset\tlen\tflags\n");
+ if (opt_list == 2)
+ printf("offset\tflags\n");
+
+ if (opt_file)
+ walk_page_cache();
+ else
+ walk_addr_ranges();
+
+ if (opt_list == 1)
+ flush_page_range();
+
+ if (opt_no_summary)
+ return 0;
+
+ if (opt_list)
+ printf("\n\n");
+
+ show_summary();
+
+ if (opt_list_mapcnt)
+ close(kpagecount_fd);
+
+ if (page_idle_fd >= 0)
+ close(page_idle_fd);
+
+ return 0;
+}
diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c
new file mode 100644
index 000000000..18fc112b6
--- /dev/null
+++ b/tools/vm/page_owner_sort.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * User-space helper to sort the output of /sys/kernel/debug/page_owner
+ *
+ * Example use:
+ * cat /sys/kernel/debug/page_owner > page_owner_full.txt
+ * grep -v ^PFN page_owner_full.txt > page_owner.txt
+ * ./sort page_owner.txt sorted_page_owner.txt
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+struct block_list {
+ char *txt;
+ int len;
+ int num;
+};
+
+
+static struct block_list *list;
+static int list_size;
+static int max_size;
+
+struct block_list *block_head;
+
+int read_block(char *buf, int buf_size, FILE *fin)
+{
+ char *curr = buf, *const buf_end = buf + buf_size;
+
+ while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) {
+ if (*curr == '\n') /* empty line */
+ return curr - buf;
+ curr += strlen(curr);
+ }
+
+ return -1; /* EOF or no space left in buf. */
+}
+
+static int compare_txt(const void *p1, const void *p2)
+{
+ const struct block_list *l1 = p1, *l2 = p2;
+
+ return strcmp(l1->txt, l2->txt);
+}
+
+static int compare_num(const void *p1, const void *p2)
+{
+ const struct block_list *l1 = p1, *l2 = p2;
+
+ return l2->num - l1->num;
+}
+
+static void add_list(char *buf, int len)
+{
+ if (list_size != 0 &&
+ len == list[list_size-1].len &&
+ memcmp(buf, list[list_size-1].txt, len) == 0) {
+ list[list_size-1].num++;
+ return;
+ }
+ if (list_size == max_size) {
+ printf("max_size too small??\n");
+ exit(1);
+ }
+ list[list_size].txt = malloc(len+1);
+ list[list_size].len = len;
+ list[list_size].num = 1;
+ memcpy(list[list_size].txt, buf, len);
+ list[list_size].txt[len] = 0;
+ list_size++;
+ if (list_size % 1000 == 0) {
+ printf("loaded %d\r", list_size);
+ fflush(stdout);
+ }
+}
+
+#define BUF_SIZE (128 * 1024)
+
+int main(int argc, char **argv)
+{
+ FILE *fin, *fout;
+ char *buf;
+ int ret, i, count;
+ struct block_list *list2;
+ struct stat st;
+
+ if (argc < 3) {
+ printf("Usage: ./program <input> <output>\n");
+ perror("open: ");
+ exit(1);
+ }
+
+ fin = fopen(argv[1], "r");
+ fout = fopen(argv[2], "w");
+ if (!fin || !fout) {
+ printf("Usage: ./program <input> <output>\n");
+ perror("open: ");
+ exit(1);
+ }
+
+ fstat(fileno(fin), &st);
+ max_size = st.st_size / 100; /* hack ... */
+
+ list = malloc(max_size * sizeof(*list));
+ buf = malloc(BUF_SIZE);
+ if (!list || !buf) {
+ printf("Out of memory\n");
+ exit(1);
+ }
+
+ for ( ; ; ) {
+ ret = read_block(buf, BUF_SIZE, fin);
+ if (ret < 0)
+ break;
+
+ add_list(buf, ret);
+ }
+
+ printf("loaded %d\n", list_size);
+
+ printf("sorting ....\n");
+
+ qsort(list, list_size, sizeof(list[0]), compare_txt);
+
+ list2 = malloc(sizeof(*list) * list_size);
+
+ printf("culling\n");
+
+ for (i = count = 0; i < list_size; i++) {
+ if (count == 0 ||
+ strcmp(list2[count-1].txt, list[i].txt) != 0) {
+ list2[count++] = list[i];
+ } else {
+ list2[count-1].num += list[i].num;
+ }
+ }
+
+ qsort(list2, count, sizeof(list[0]), compare_num);
+
+ for (i = 0; i < count; i++)
+ fprintf(fout, "%d times:\n%s\n", list2[i].num, list2[i].txt);
+
+ return 0;
+}
diff --git a/tools/vm/slabinfo-gnuplot.sh b/tools/vm/slabinfo-gnuplot.sh
new file mode 100644
index 000000000..0cf28aa6f
--- /dev/null
+++ b/tools/vm/slabinfo-gnuplot.sh
@@ -0,0 +1,275 @@
+#!/bin/bash
+
+# Sergey Senozhatsky, 2015
+# sergey.senozhatsky.work@gmail.com
+#
+# This software is licensed under the terms of the GNU General Public
+# License version 2, as published by the Free Software Foundation, and
+# may be copied, distributed, and modified under those terms.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+
+# This program is intended to plot a `slabinfo -X' stats, collected,
+# for example, using the following command:
+# while [ 1 ]; do slabinfo -X >> stats; sleep 1; done
+#
+# Use `slabinfo-gnuplot.sh stats' to pre-process collected records
+# and generate graphs (totals, slabs sorted by size, slabs sorted
+# by size).
+#
+# Graphs can be [individually] regenerate with different ranges and
+# size (-r %d,%d and -s %d,%d options).
+#
+# To visually compare N `totals' graphs, do
+# slabinfo-gnuplot.sh -t FILE1-totals FILE2-totals ... FILEN-totals
+#
+
+min_slab_name_size=11
+xmin=0
+xmax=0
+width=1500
+height=700
+mode=preprocess
+
+usage()
+{
+ echo "Usage: [-s W,H] [-r MIN,MAX] [-t|-l] FILE1 [FILE2 ..]"
+ echo "FILEs must contain 'slabinfo -X' samples"
+ echo "-t - plot totals for FILE(s)"
+ echo "-l - plot slabs stats for FILE(s)"
+ echo "-s %d,%d - set image width and height"
+ echo "-r %d,%d - use data samples from a given range"
+}
+
+check_file_exist()
+{
+ if [ ! -f "$1" ]; then
+ echo "File '$1' does not exist"
+ exit 1
+ fi
+}
+
+do_slabs_plotting()
+{
+ local file=$1
+ local out_file
+ local range="every ::$xmin"
+ local xtic=""
+ local xtic_rotate="norotate"
+ local lines=2000000
+ local wc_lines
+
+ check_file_exist "$file"
+
+ out_file=`basename "$file"`
+ if [ $xmax -ne 0 ]; then
+ range="$range::$xmax"
+ lines=$((xmax-xmin))
+ fi
+
+ wc_lines=`cat "$file" | wc -l`
+ if [ $? -ne 0 ] || [ "$wc_lines" -eq 0 ] ; then
+ wc_lines=$lines
+ fi
+
+ if [ "$wc_lines" -lt "$lines" ]; then
+ lines=$wc_lines
+ fi
+
+ if [ $((width / lines)) -gt $min_slab_name_size ]; then
+ xtic=":xtic(1)"
+ xtic_rotate=90
+ fi
+
+gnuplot -p << EOF
+#!/usr/bin/env gnuplot
+
+set terminal png enhanced size $width,$height large
+set output '$out_file.png'
+set autoscale xy
+set xlabel 'samples'
+set ylabel 'bytes'
+set style histogram columnstacked title textcolor lt -1
+set style fill solid 0.15
+set xtics rotate $xtic_rotate
+set key left above Left title reverse
+
+plot "$file" $range u 2$xtic title 'SIZE' with boxes,\
+ '' $range u 3 title 'LOSS' with boxes
+EOF
+
+ if [ $? -eq 0 ]; then
+ echo "$out_file.png"
+ fi
+}
+
+do_totals_plotting()
+{
+ local gnuplot_cmd=""
+ local range="every ::$xmin"
+ local file=""
+
+ if [ $xmax -ne 0 ]; then
+ range="$range::$xmax"
+ fi
+
+ for i in "${t_files[@]}"; do
+ check_file_exist "$i"
+
+ file="$file"`basename "$i"`
+ gnuplot_cmd="$gnuplot_cmd '$i' $range using 1 title\
+ '$i Memory usage' with lines,"
+ gnuplot_cmd="$gnuplot_cmd '' $range using 2 title \
+ '$i Loss' with lines,"
+ done
+
+gnuplot -p << EOF
+#!/usr/bin/env gnuplot
+
+set terminal png enhanced size $width,$height large
+set autoscale xy
+set output '$file.png'
+set xlabel 'samples'
+set ylabel 'bytes'
+set key left above Left title reverse
+
+plot $gnuplot_cmd
+EOF
+
+ if [ $? -eq 0 ]; then
+ echo "$file.png"
+ fi
+}
+
+do_preprocess()
+{
+ local out
+ local lines
+ local in=$1
+
+ check_file_exist "$in"
+
+ # use only 'TOP' slab (biggest memory usage or loss)
+ let lines=3
+ out=`basename "$in"`"-slabs-by-loss"
+ `cat "$in" | grep -A "$lines" 'Slabs sorted by loss' |\
+ egrep -iv '\-\-|Name|Slabs'\
+ | awk '{print $1" "$4+$2*$3" "$4}' > "$out"`
+ if [ $? -eq 0 ]; then
+ do_slabs_plotting "$out"
+ fi
+
+ let lines=3
+ out=`basename "$in"`"-slabs-by-size"
+ `cat "$in" | grep -A "$lines" 'Slabs sorted by size' |\
+ egrep -iv '\-\-|Name|Slabs'\
+ | awk '{print $1" "$4" "$4-$2*$3}' > "$out"`
+ if [ $? -eq 0 ]; then
+ do_slabs_plotting "$out"
+ fi
+
+ out=`basename "$in"`"-totals"
+ `cat "$in" | grep "Memory used" |\
+ awk '{print $3" "$7}' > "$out"`
+ if [ $? -eq 0 ]; then
+ t_files[0]=$out
+ do_totals_plotting
+ fi
+}
+
+parse_opts()
+{
+ local opt
+
+ while getopts "tlr::s::h" opt; do
+ case $opt in
+ t)
+ mode=totals
+ ;;
+ l)
+ mode=slabs
+ ;;
+ s)
+ array=(${OPTARG//,/ })
+ width=${array[0]}
+ height=${array[1]}
+ ;;
+ r)
+ array=(${OPTARG//,/ })
+ xmin=${array[0]}
+ xmax=${array[1]}
+ ;;
+ h)
+ usage
+ exit 0
+ ;;
+ \?)
+ echo "Invalid option: -$OPTARG" >&2
+ exit 1
+ ;;
+ :)
+ echo "-$OPTARG requires an argument." >&2
+ exit 1
+ ;;
+ esac
+ done
+
+ return $OPTIND
+}
+
+parse_args()
+{
+ local idx=0
+ local p
+
+ for p in "$@"; do
+ case $mode in
+ preprocess)
+ files[$idx]=$p
+ idx=$idx+1
+ ;;
+ totals)
+ t_files[$idx]=$p
+ idx=$idx+1
+ ;;
+ slabs)
+ files[$idx]=$p
+ idx=$idx+1
+ ;;
+ esac
+ done
+}
+
+parse_opts "$@"
+argstart=$?
+parse_args "${@:$argstart}"
+
+if [ ${#files[@]} -eq 0 ] && [ ${#t_files[@]} -eq 0 ]; then
+ usage
+ exit 1
+fi
+
+case $mode in
+ preprocess)
+ for i in "${files[@]}"; do
+ do_preprocess "$i"
+ done
+ ;;
+ totals)
+ do_totals_plotting
+ ;;
+ slabs)
+ for i in "${files[@]}"; do
+ do_slabs_plotting "$i"
+ done
+ ;;
+ *)
+ echo "Unknown mode $mode" >&2
+ usage
+ exit 1
+ ;;
+esac
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
new file mode 100644
index 000000000..334b16db0
--- /dev/null
+++ b/tools/vm/slabinfo.c
@@ -0,0 +1,1483 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Slabinfo: Tool to get reports about slabs
+ *
+ * (C) 2007 sgi, Christoph Lameter
+ * (C) 2011 Linux Foundation, Christoph Lameter
+ *
+ * Compile with:
+ *
+ * gcc -o slabinfo slabinfo.c
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <strings.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <getopt.h>
+#include <regex.h>
+#include <errno.h>
+
+#define MAX_SLABS 500
+#define MAX_ALIASES 500
+#define MAX_NODES 1024
+
+struct slabinfo {
+ char *name;
+ int alias;
+ int refs;
+ int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu;
+ unsigned int hwcache_align, object_size, objs_per_slab;
+ unsigned int sanity_checks, slab_size, store_user, trace;
+ int order, poison, reclaim_account, red_zone;
+ unsigned long partial, objects, slabs, objects_partial, objects_total;
+ unsigned long alloc_fastpath, alloc_slowpath;
+ unsigned long free_fastpath, free_slowpath;
+ unsigned long free_frozen, free_add_partial, free_remove_partial;
+ unsigned long alloc_from_partial, alloc_slab, free_slab, alloc_refill;
+ unsigned long cpuslab_flush, deactivate_full, deactivate_empty;
+ unsigned long deactivate_to_head, deactivate_to_tail;
+ unsigned long deactivate_remote_frees, order_fallback;
+ unsigned long cmpxchg_double_cpu_fail, cmpxchg_double_fail;
+ unsigned long alloc_node_mismatch, deactivate_bypass;
+ unsigned long cpu_partial_alloc, cpu_partial_free;
+ int numa[MAX_NODES];
+ int numa_partial[MAX_NODES];
+} slabinfo[MAX_SLABS];
+
+struct aliasinfo {
+ char *name;
+ char *ref;
+ struct slabinfo *slab;
+} aliasinfo[MAX_ALIASES];
+
+int slabs;
+int actual_slabs;
+int aliases;
+int alias_targets;
+int highest_node;
+
+char buffer[4096];
+
+int show_empty;
+int show_report;
+int show_alias;
+int show_slab;
+int skip_zero = 1;
+int show_numa;
+int show_track;
+int show_first_alias;
+int validate;
+int shrink;
+int show_inverted;
+int show_single_ref;
+int show_totals;
+int sort_size;
+int sort_active;
+int set_debug;
+int show_ops;
+int show_activity;
+int output_lines = -1;
+int sort_loss;
+int extended_totals;
+int show_bytes;
+int unreclaim_only;
+
+/* Debug options */
+int sanity;
+int redzone;
+int poison;
+int tracking;
+int tracing;
+
+int page_size;
+
+regex_t pattern;
+
+static void fatal(const char *x, ...)
+{
+ va_list ap;
+
+ va_start(ap, x);
+ vfprintf(stderr, x, ap);
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+static void usage(void)
+{
+ printf("slabinfo 4/15/2011. (c) 2007 sgi/(c) 2011 Linux Foundation.\n\n"
+ "slabinfo [-ahnpvtsz] [-d debugopts] [slab-regexp]\n"
+ "-a|--aliases Show aliases\n"
+ "-A|--activity Most active slabs first\n"
+ "-d<options>|--debug=<options> Set/Clear Debug options\n"
+ "-D|--display-active Switch line format to activity\n"
+ "-e|--empty Show empty slabs\n"
+ "-f|--first-alias Show first alias\n"
+ "-h|--help Show usage information\n"
+ "-i|--inverted Inverted list\n"
+ "-l|--slabs Show slabs\n"
+ "-n|--numa Show NUMA information\n"
+ "-o|--ops Show kmem_cache_ops\n"
+ "-s|--shrink Shrink slabs\n"
+ "-r|--report Detailed report on single slabs\n"
+ "-S|--Size Sort by size\n"
+ "-t|--tracking Show alloc/free information\n"
+ "-T|--Totals Show summary information\n"
+ "-v|--validate Validate slabs\n"
+ "-z|--zero Include empty slabs\n"
+ "-1|--1ref Single reference\n"
+ "-N|--lines=K Show the first K slabs\n"
+ "-L|--Loss Sort by loss\n"
+ "-X|--Xtotals Show extended summary information\n"
+ "-B|--Bytes Show size in bytes\n"
+ "-U|--Unreclaim Show unreclaimable slabs only\n"
+ "\nValid debug options (FZPUT may be combined)\n"
+ "a / A Switch on all debug options (=FZUP)\n"
+ "- Switch off all debug options\n"
+ "f / F Sanity Checks (SLAB_CONSISTENCY_CHECKS)\n"
+ "z / Z Redzoning\n"
+ "p / P Poisoning\n"
+ "u / U Tracking\n"
+ "t / T Tracing\n"
+ );
+}
+
+static unsigned long read_obj(const char *name)
+{
+ FILE *f = fopen(name, "r");
+
+ if (!f)
+ buffer[0] = 0;
+ else {
+ if (!fgets(buffer, sizeof(buffer), f))
+ buffer[0] = 0;
+ fclose(f);
+ if (buffer[strlen(buffer)] == '\n')
+ buffer[strlen(buffer)] = 0;
+ }
+ return strlen(buffer);
+}
+
+
+/*
+ * Get the contents of an attribute
+ */
+static unsigned long get_obj(const char *name)
+{
+ if (!read_obj(name))
+ return 0;
+
+ return atol(buffer);
+}
+
+static unsigned long get_obj_and_str(const char *name, char **x)
+{
+ unsigned long result = 0;
+ char *p;
+
+ *x = NULL;
+
+ if (!read_obj(name)) {
+ x = NULL;
+ return 0;
+ }
+ result = strtoul(buffer, &p, 10);
+ while (*p == ' ')
+ p++;
+ if (*p)
+ *x = strdup(p);
+ return result;
+}
+
+static void set_obj(struct slabinfo *s, const char *name, int n)
+{
+ char x[100];
+ FILE *f;
+
+ snprintf(x, 100, "%s/%s", s->name, name);
+ f = fopen(x, "w");
+ if (!f)
+ fatal("Cannot write to %s\n", x);
+
+ fprintf(f, "%d\n", n);
+ fclose(f);
+}
+
+static unsigned long read_slab_obj(struct slabinfo *s, const char *name)
+{
+ char x[100];
+ FILE *f;
+ size_t l;
+
+ snprintf(x, 100, "%s/%s", s->name, name);
+ f = fopen(x, "r");
+ if (!f) {
+ buffer[0] = 0;
+ l = 0;
+ } else {
+ l = fread(buffer, 1, sizeof(buffer), f);
+ buffer[l] = 0;
+ fclose(f);
+ }
+ return l;
+}
+
+
+/*
+ * Put a size string together
+ */
+static int store_size(char *buffer, unsigned long value)
+{
+ unsigned long divisor = 1;
+ char trailer = 0;
+ int n;
+
+ if (!show_bytes) {
+ if (value > 1000000000UL) {
+ divisor = 100000000UL;
+ trailer = 'G';
+ } else if (value > 1000000UL) {
+ divisor = 100000UL;
+ trailer = 'M';
+ } else if (value > 1000UL) {
+ divisor = 100;
+ trailer = 'K';
+ }
+ }
+
+ value /= divisor;
+ n = sprintf(buffer, "%ld",value);
+ if (trailer) {
+ buffer[n] = trailer;
+ n++;
+ buffer[n] = 0;
+ }
+ if (divisor != 1) {
+ memmove(buffer + n - 2, buffer + n - 3, 4);
+ buffer[n-2] = '.';
+ n++;
+ }
+ return n;
+}
+
+static void decode_numa_list(int *numa, char *t)
+{
+ int node;
+ int nr;
+
+ memset(numa, 0, MAX_NODES * sizeof(int));
+
+ if (!t)
+ return;
+
+ while (*t == 'N') {
+ t++;
+ node = strtoul(t, &t, 10);
+ if (*t == '=') {
+ t++;
+ nr = strtoul(t, &t, 10);
+ numa[node] = nr;
+ if (node > highest_node)
+ highest_node = node;
+ }
+ while (*t == ' ')
+ t++;
+ }
+}
+
+static void slab_validate(struct slabinfo *s)
+{
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ set_obj(s, "validate", 1);
+}
+
+static void slab_shrink(struct slabinfo *s)
+{
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ set_obj(s, "shrink", 1);
+}
+
+int line = 0;
+
+static void first_line(void)
+{
+ if (show_activity)
+ printf("Name Objects Alloc Free"
+ " %%Fast Fallb O CmpX UL\n");
+ else
+ printf("Name Objects Objsize %s "
+ "Slabs/Part/Cpu O/S O %%Fr %%Ef Flg\n",
+ sort_loss ? " Loss" : "Space");
+}
+
+/*
+ * Find the shortest alias of a slab
+ */
+static struct aliasinfo *find_one_alias(struct slabinfo *find)
+{
+ struct aliasinfo *a;
+ struct aliasinfo *best = NULL;
+
+ for(a = aliasinfo;a < aliasinfo + aliases; a++) {
+ if (a->slab == find &&
+ (!best || strlen(best->name) < strlen(a->name))) {
+ best = a;
+ if (strncmp(a->name,"kmall", 5) == 0)
+ return best;
+ }
+ }
+ return best;
+}
+
+static unsigned long slab_size(struct slabinfo *s)
+{
+ return s->slabs * (page_size << s->order);
+}
+
+static unsigned long slab_activity(struct slabinfo *s)
+{
+ return s->alloc_fastpath + s->free_fastpath +
+ s->alloc_slowpath + s->free_slowpath;
+}
+
+static unsigned long slab_waste(struct slabinfo *s)
+{
+ return slab_size(s) - s->objects * s->object_size;
+}
+
+static void slab_numa(struct slabinfo *s, int mode)
+{
+ int node;
+
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ if (!highest_node) {
+ printf("\n%s: No NUMA information available.\n", s->name);
+ return;
+ }
+
+ if (skip_zero && !s->slabs)
+ return;
+
+ if (!line) {
+ printf("\n%-21s:", mode ? "NUMA nodes" : "Slab");
+ for(node = 0; node <= highest_node; node++)
+ printf(" %4d", node);
+ printf("\n----------------------");
+ for(node = 0; node <= highest_node; node++)
+ printf("-----");
+ printf("\n");
+ }
+ printf("%-21s ", mode ? "All slabs" : s->name);
+ for(node = 0; node <= highest_node; node++) {
+ char b[20];
+
+ store_size(b, s->numa[node]);
+ printf(" %4s", b);
+ }
+ printf("\n");
+ if (mode) {
+ printf("%-21s ", "Partial slabs");
+ for(node = 0; node <= highest_node; node++) {
+ char b[20];
+
+ store_size(b, s->numa_partial[node]);
+ printf(" %4s", b);
+ }
+ printf("\n");
+ }
+ line++;
+}
+
+static void show_tracking(struct slabinfo *s)
+{
+ printf("\n%s: Kernel object allocation\n", s->name);
+ printf("-----------------------------------------------------------------------\n");
+ if (read_slab_obj(s, "alloc_calls"))
+ printf("%s", buffer);
+ else
+ printf("No Data\n");
+
+ printf("\n%s: Kernel object freeing\n", s->name);
+ printf("------------------------------------------------------------------------\n");
+ if (read_slab_obj(s, "free_calls"))
+ printf("%s", buffer);
+ else
+ printf("No Data\n");
+
+}
+
+static void ops(struct slabinfo *s)
+{
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ if (read_slab_obj(s, "ops")) {
+ printf("\n%s: kmem_cache operations\n", s->name);
+ printf("--------------------------------------------\n");
+ printf("%s", buffer);
+ } else
+ printf("\n%s has no kmem_cache operations\n", s->name);
+}
+
+static const char *onoff(int x)
+{
+ if (x)
+ return "On ";
+ return "Off";
+}
+
+static void slab_stats(struct slabinfo *s)
+{
+ unsigned long total_alloc;
+ unsigned long total_free;
+ unsigned long total;
+
+ if (!s->alloc_slab)
+ return;
+
+ total_alloc = s->alloc_fastpath + s->alloc_slowpath;
+ total_free = s->free_fastpath + s->free_slowpath;
+
+ if (!total_alloc)
+ return;
+
+ printf("\n");
+ printf("Slab Perf Counter Alloc Free %%Al %%Fr\n");
+ printf("--------------------------------------------------\n");
+ printf("Fastpath %8lu %8lu %3lu %3lu\n",
+ s->alloc_fastpath, s->free_fastpath,
+ s->alloc_fastpath * 100 / total_alloc,
+ total_free ? s->free_fastpath * 100 / total_free : 0);
+ printf("Slowpath %8lu %8lu %3lu %3lu\n",
+ total_alloc - s->alloc_fastpath, s->free_slowpath,
+ (total_alloc - s->alloc_fastpath) * 100 / total_alloc,
+ total_free ? s->free_slowpath * 100 / total_free : 0);
+ printf("Page Alloc %8lu %8lu %3lu %3lu\n",
+ s->alloc_slab, s->free_slab,
+ s->alloc_slab * 100 / total_alloc,
+ total_free ? s->free_slab * 100 / total_free : 0);
+ printf("Add partial %8lu %8lu %3lu %3lu\n",
+ s->deactivate_to_head + s->deactivate_to_tail,
+ s->free_add_partial,
+ (s->deactivate_to_head + s->deactivate_to_tail) * 100 / total_alloc,
+ total_free ? s->free_add_partial * 100 / total_free : 0);
+ printf("Remove partial %8lu %8lu %3lu %3lu\n",
+ s->alloc_from_partial, s->free_remove_partial,
+ s->alloc_from_partial * 100 / total_alloc,
+ total_free ? s->free_remove_partial * 100 / total_free : 0);
+
+ printf("Cpu partial list %8lu %8lu %3lu %3lu\n",
+ s->cpu_partial_alloc, s->cpu_partial_free,
+ s->cpu_partial_alloc * 100 / total_alloc,
+ total_free ? s->cpu_partial_free * 100 / total_free : 0);
+
+ printf("RemoteObj/SlabFrozen %8lu %8lu %3lu %3lu\n",
+ s->deactivate_remote_frees, s->free_frozen,
+ s->deactivate_remote_frees * 100 / total_alloc,
+ total_free ? s->free_frozen * 100 / total_free : 0);
+
+ printf("Total %8lu %8lu\n\n", total_alloc, total_free);
+
+ if (s->cpuslab_flush)
+ printf("Flushes %8lu\n", s->cpuslab_flush);
+
+ total = s->deactivate_full + s->deactivate_empty +
+ s->deactivate_to_head + s->deactivate_to_tail + s->deactivate_bypass;
+
+ if (total) {
+ printf("\nSlab Deactivation Occurrences %%\n");
+ printf("-------------------------------------------------\n");
+ printf("Slab full %7lu %3lu%%\n",
+ s->deactivate_full, (s->deactivate_full * 100) / total);
+ printf("Slab empty %7lu %3lu%%\n",
+ s->deactivate_empty, (s->deactivate_empty * 100) / total);
+ printf("Moved to head of partial list %7lu %3lu%%\n",
+ s->deactivate_to_head, (s->deactivate_to_head * 100) / total);
+ printf("Moved to tail of partial list %7lu %3lu%%\n",
+ s->deactivate_to_tail, (s->deactivate_to_tail * 100) / total);
+ printf("Deactivation bypass %7lu %3lu%%\n",
+ s->deactivate_bypass, (s->deactivate_bypass * 100) / total);
+ printf("Refilled from foreign frees %7lu %3lu%%\n",
+ s->alloc_refill, (s->alloc_refill * 100) / total);
+ printf("Node mismatch %7lu %3lu%%\n",
+ s->alloc_node_mismatch, (s->alloc_node_mismatch * 100) / total);
+ }
+
+ if (s->cmpxchg_double_fail || s->cmpxchg_double_cpu_fail) {
+ printf("\nCmpxchg_double Looping\n------------------------\n");
+ printf("Locked Cmpxchg Double redos %lu\nUnlocked Cmpxchg Double redos %lu\n",
+ s->cmpxchg_double_fail, s->cmpxchg_double_cpu_fail);
+ }
+}
+
+static void report(struct slabinfo *s)
+{
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ printf("\nSlabcache: %-15s Aliases: %2d Order : %2d Objects: %lu\n",
+ s->name, s->aliases, s->order, s->objects);
+ if (s->hwcache_align)
+ printf("** Hardware cacheline aligned\n");
+ if (s->cache_dma)
+ printf("** Memory is allocated in a special DMA zone\n");
+ if (s->destroy_by_rcu)
+ printf("** Slabs are destroyed via RCU\n");
+ if (s->reclaim_account)
+ printf("** Reclaim accounting active\n");
+
+ printf("\nSizes (bytes) Slabs Debug Memory\n");
+ printf("------------------------------------------------------------------------\n");
+ printf("Object : %7d Total : %7ld Sanity Checks : %s Total: %7ld\n",
+ s->object_size, s->slabs, onoff(s->sanity_checks),
+ s->slabs * (page_size << s->order));
+ printf("SlabObj: %7d Full : %7ld Redzoning : %s Used : %7ld\n",
+ s->slab_size, s->slabs - s->partial - s->cpu_slabs,
+ onoff(s->red_zone), s->objects * s->object_size);
+ printf("SlabSiz: %7d Partial: %7ld Poisoning : %s Loss : %7ld\n",
+ page_size << s->order, s->partial, onoff(s->poison),
+ s->slabs * (page_size << s->order) - s->objects * s->object_size);
+ printf("Loss : %7d CpuSlab: %7d Tracking : %s Lalig: %7ld\n",
+ s->slab_size - s->object_size, s->cpu_slabs, onoff(s->store_user),
+ (s->slab_size - s->object_size) * s->objects);
+ printf("Align : %7d Objects: %7d Tracing : %s Lpadd: %7ld\n",
+ s->align, s->objs_per_slab, onoff(s->trace),
+ ((page_size << s->order) - s->objs_per_slab * s->slab_size) *
+ s->slabs);
+
+ ops(s);
+ show_tracking(s);
+ slab_numa(s, 1);
+ slab_stats(s);
+}
+
+static void slabcache(struct slabinfo *s)
+{
+ char size_str[20];
+ char dist_str[40];
+ char flags[20];
+ char *p = flags;
+
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ if (unreclaim_only && s->reclaim_account)
+ return;
+
+ if (actual_slabs == 1) {
+ report(s);
+ return;
+ }
+
+ if (skip_zero && !show_empty && !s->slabs)
+ return;
+
+ if (show_empty && s->slabs)
+ return;
+
+ if (sort_loss == 0)
+ store_size(size_str, slab_size(s));
+ else
+ store_size(size_str, slab_waste(s));
+ snprintf(dist_str, 40, "%lu/%lu/%d", s->slabs - s->cpu_slabs,
+ s->partial, s->cpu_slabs);
+
+ if (!line++)
+ first_line();
+
+ if (s->aliases)
+ *p++ = '*';
+ if (s->cache_dma)
+ *p++ = 'd';
+ if (s->hwcache_align)
+ *p++ = 'A';
+ if (s->poison)
+ *p++ = 'P';
+ if (s->reclaim_account)
+ *p++ = 'a';
+ if (s->red_zone)
+ *p++ = 'Z';
+ if (s->sanity_checks)
+ *p++ = 'F';
+ if (s->store_user)
+ *p++ = 'U';
+ if (s->trace)
+ *p++ = 'T';
+
+ *p = 0;
+ if (show_activity) {
+ unsigned long total_alloc;
+ unsigned long total_free;
+
+ total_alloc = s->alloc_fastpath + s->alloc_slowpath;
+ total_free = s->free_fastpath + s->free_slowpath;
+
+ printf("%-21s %8ld %10ld %10ld %3ld %3ld %5ld %1d %4ld %4ld\n",
+ s->name, s->objects,
+ total_alloc, total_free,
+ total_alloc ? (s->alloc_fastpath * 100 / total_alloc) : 0,
+ total_free ? (s->free_fastpath * 100 / total_free) : 0,
+ s->order_fallback, s->order, s->cmpxchg_double_fail,
+ s->cmpxchg_double_cpu_fail);
+ } else {
+ printf("%-21s %8ld %7d %15s %14s %4d %1d %3ld %3ld %s\n",
+ s->name, s->objects, s->object_size, size_str, dist_str,
+ s->objs_per_slab, s->order,
+ s->slabs ? (s->partial * 100) / s->slabs : 100,
+ s->slabs ? (s->objects * s->object_size * 100) /
+ (s->slabs * (page_size << s->order)) : 100,
+ flags);
+ }
+}
+
+/*
+ * Analyze debug options. Return false if something is amiss.
+ */
+static int debug_opt_scan(char *opt)
+{
+ if (!opt || !opt[0] || strcmp(opt, "-") == 0)
+ return 1;
+
+ if (strcasecmp(opt, "a") == 0) {
+ sanity = 1;
+ poison = 1;
+ redzone = 1;
+ tracking = 1;
+ return 1;
+ }
+
+ for ( ; *opt; opt++)
+ switch (*opt) {
+ case 'F' : case 'f':
+ if (sanity)
+ return 0;
+ sanity = 1;
+ break;
+ case 'P' : case 'p':
+ if (poison)
+ return 0;
+ poison = 1;
+ break;
+
+ case 'Z' : case 'z':
+ if (redzone)
+ return 0;
+ redzone = 1;
+ break;
+
+ case 'U' : case 'u':
+ if (tracking)
+ return 0;
+ tracking = 1;
+ break;
+
+ case 'T' : case 't':
+ if (tracing)
+ return 0;
+ tracing = 1;
+ break;
+ default:
+ return 0;
+ }
+ return 1;
+}
+
+static int slab_empty(struct slabinfo *s)
+{
+ if (s->objects > 0)
+ return 0;
+
+ /*
+ * We may still have slabs even if there are no objects. Shrinking will
+ * remove them.
+ */
+ if (s->slabs != 0)
+ set_obj(s, "shrink", 1);
+
+ return 1;
+}
+
+static void slab_debug(struct slabinfo *s)
+{
+ if (strcmp(s->name, "*") == 0)
+ return;
+
+ if (sanity && !s->sanity_checks) {
+ set_obj(s, "sanity", 1);
+ }
+ if (!sanity && s->sanity_checks) {
+ if (slab_empty(s))
+ set_obj(s, "sanity", 0);
+ else
+ fprintf(stderr, "%s not empty cannot disable sanity checks\n", s->name);
+ }
+ if (redzone && !s->red_zone) {
+ if (slab_empty(s))
+ set_obj(s, "red_zone", 1);
+ else
+ fprintf(stderr, "%s not empty cannot enable redzoning\n", s->name);
+ }
+ if (!redzone && s->red_zone) {
+ if (slab_empty(s))
+ set_obj(s, "red_zone", 0);
+ else
+ fprintf(stderr, "%s not empty cannot disable redzoning\n", s->name);
+ }
+ if (poison && !s->poison) {
+ if (slab_empty(s))
+ set_obj(s, "poison", 1);
+ else
+ fprintf(stderr, "%s not empty cannot enable poisoning\n", s->name);
+ }
+ if (!poison && s->poison) {
+ if (slab_empty(s))
+ set_obj(s, "poison", 0);
+ else
+ fprintf(stderr, "%s not empty cannot disable poisoning\n", s->name);
+ }
+ if (tracking && !s->store_user) {
+ if (slab_empty(s))
+ set_obj(s, "store_user", 1);
+ else
+ fprintf(stderr, "%s not empty cannot enable tracking\n", s->name);
+ }
+ if (!tracking && s->store_user) {
+ if (slab_empty(s))
+ set_obj(s, "store_user", 0);
+ else
+ fprintf(stderr, "%s not empty cannot disable tracking\n", s->name);
+ }
+ if (tracing && !s->trace) {
+ if (slabs == 1)
+ set_obj(s, "trace", 1);
+ else
+ fprintf(stderr, "%s can only enable trace for one slab at a time\n", s->name);
+ }
+ if (!tracing && s->trace)
+ set_obj(s, "trace", 1);
+}
+
+static void totals(void)
+{
+ struct slabinfo *s;
+
+ int used_slabs = 0;
+ char b1[20], b2[20], b3[20], b4[20];
+ unsigned long long max = 1ULL << 63;
+
+ /* Object size */
+ unsigned long long min_objsize = max, max_objsize = 0, avg_objsize;
+
+ /* Number of partial slabs in a slabcache */
+ unsigned long long min_partial = max, max_partial = 0,
+ avg_partial, total_partial = 0;
+
+ /* Number of slabs in a slab cache */
+ unsigned long long min_slabs = max, max_slabs = 0,
+ avg_slabs, total_slabs = 0;
+
+ /* Size of the whole slab */
+ unsigned long long min_size = max, max_size = 0,
+ avg_size, total_size = 0;
+
+ /* Bytes used for object storage in a slab */
+ unsigned long long min_used = max, max_used = 0,
+ avg_used, total_used = 0;
+
+ /* Waste: Bytes used for alignment and padding */
+ unsigned long long min_waste = max, max_waste = 0,
+ avg_waste, total_waste = 0;
+ /* Number of objects in a slab */
+ unsigned long long min_objects = max, max_objects = 0,
+ avg_objects, total_objects = 0;
+ /* Waste per object */
+ unsigned long long min_objwaste = max,
+ max_objwaste = 0, avg_objwaste,
+ total_objwaste = 0;
+
+ /* Memory per object */
+ unsigned long long min_memobj = max,
+ max_memobj = 0, avg_memobj,
+ total_objsize = 0;
+
+ /* Percentage of partial slabs per slab */
+ unsigned long min_ppart = 100, max_ppart = 0,
+ avg_ppart, total_ppart = 0;
+
+ /* Number of objects in partial slabs */
+ unsigned long min_partobj = max, max_partobj = 0,
+ avg_partobj, total_partobj = 0;
+
+ /* Percentage of partial objects of all objects in a slab */
+ unsigned long min_ppartobj = 100, max_ppartobj = 0,
+ avg_ppartobj, total_ppartobj = 0;
+
+
+ for (s = slabinfo; s < slabinfo + slabs; s++) {
+ unsigned long long size;
+ unsigned long used;
+ unsigned long long wasted;
+ unsigned long long objwaste;
+ unsigned long percentage_partial_slabs;
+ unsigned long percentage_partial_objs;
+
+ if (!s->slabs || !s->objects)
+ continue;
+
+ used_slabs++;
+
+ size = slab_size(s);
+ used = s->objects * s->object_size;
+ wasted = size - used;
+ objwaste = s->slab_size - s->object_size;
+
+ percentage_partial_slabs = s->partial * 100 / s->slabs;
+ if (percentage_partial_slabs > 100)
+ percentage_partial_slabs = 100;
+
+ percentage_partial_objs = s->objects_partial * 100
+ / s->objects;
+
+ if (percentage_partial_objs > 100)
+ percentage_partial_objs = 100;
+
+ if (s->object_size < min_objsize)
+ min_objsize = s->object_size;
+ if (s->partial < min_partial)
+ min_partial = s->partial;
+ if (s->slabs < min_slabs)
+ min_slabs = s->slabs;
+ if (size < min_size)
+ min_size = size;
+ if (wasted < min_waste)
+ min_waste = wasted;
+ if (objwaste < min_objwaste)
+ min_objwaste = objwaste;
+ if (s->objects < min_objects)
+ min_objects = s->objects;
+ if (used < min_used)
+ min_used = used;
+ if (s->objects_partial < min_partobj)
+ min_partobj = s->objects_partial;
+ if (percentage_partial_slabs < min_ppart)
+ min_ppart = percentage_partial_slabs;
+ if (percentage_partial_objs < min_ppartobj)
+ min_ppartobj = percentage_partial_objs;
+ if (s->slab_size < min_memobj)
+ min_memobj = s->slab_size;
+
+ if (s->object_size > max_objsize)
+ max_objsize = s->object_size;
+ if (s->partial > max_partial)
+ max_partial = s->partial;
+ if (s->slabs > max_slabs)
+ max_slabs = s->slabs;
+ if (size > max_size)
+ max_size = size;
+ if (wasted > max_waste)
+ max_waste = wasted;
+ if (objwaste > max_objwaste)
+ max_objwaste = objwaste;
+ if (s->objects > max_objects)
+ max_objects = s->objects;
+ if (used > max_used)
+ max_used = used;
+ if (s->objects_partial > max_partobj)
+ max_partobj = s->objects_partial;
+ if (percentage_partial_slabs > max_ppart)
+ max_ppart = percentage_partial_slabs;
+ if (percentage_partial_objs > max_ppartobj)
+ max_ppartobj = percentage_partial_objs;
+ if (s->slab_size > max_memobj)
+ max_memobj = s->slab_size;
+
+ total_partial += s->partial;
+ total_slabs += s->slabs;
+ total_size += size;
+ total_waste += wasted;
+
+ total_objects += s->objects;
+ total_used += used;
+ total_partobj += s->objects_partial;
+ total_ppart += percentage_partial_slabs;
+ total_ppartobj += percentage_partial_objs;
+
+ total_objwaste += s->objects * objwaste;
+ total_objsize += s->objects * s->slab_size;
+ }
+
+ if (!total_objects) {
+ printf("No objects\n");
+ return;
+ }
+ if (!used_slabs) {
+ printf("No slabs\n");
+ return;
+ }
+
+ /* Per slab averages */
+ avg_partial = total_partial / used_slabs;
+ avg_slabs = total_slabs / used_slabs;
+ avg_size = total_size / used_slabs;
+ avg_waste = total_waste / used_slabs;
+
+ avg_objects = total_objects / used_slabs;
+ avg_used = total_used / used_slabs;
+ avg_partobj = total_partobj / used_slabs;
+ avg_ppart = total_ppart / used_slabs;
+ avg_ppartobj = total_ppartobj / used_slabs;
+
+ /* Per object object sizes */
+ avg_objsize = total_used / total_objects;
+ avg_objwaste = total_objwaste / total_objects;
+ avg_partobj = total_partobj * 100 / total_objects;
+ avg_memobj = total_objsize / total_objects;
+
+ printf("Slabcache Totals\n");
+ printf("----------------\n");
+ printf("Slabcaches : %15d Aliases : %11d->%-3d Active: %3d\n",
+ slabs, aliases, alias_targets, used_slabs);
+
+ store_size(b1, total_size);store_size(b2, total_waste);
+ store_size(b3, total_waste * 100 / total_used);
+ printf("Memory used: %15s # Loss : %15s MRatio:%6s%%\n", b1, b2, b3);
+
+ store_size(b1, total_objects);store_size(b2, total_partobj);
+ store_size(b3, total_partobj * 100 / total_objects);
+ printf("# Objects : %15s # PartObj: %15s ORatio:%6s%%\n", b1, b2, b3);
+
+ printf("\n");
+ printf("Per Cache Average "
+ "Min Max Total\n");
+ printf("---------------------------------------"
+ "-------------------------------------\n");
+
+ store_size(b1, avg_objects);store_size(b2, min_objects);
+ store_size(b3, max_objects);store_size(b4, total_objects);
+ printf("#Objects %15s %15s %15s %15s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_slabs);store_size(b2, min_slabs);
+ store_size(b3, max_slabs);store_size(b4, total_slabs);
+ printf("#Slabs %15s %15s %15s %15s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_partial);store_size(b2, min_partial);
+ store_size(b3, max_partial);store_size(b4, total_partial);
+ printf("#PartSlab %15s %15s %15s %15s\n",
+ b1, b2, b3, b4);
+ store_size(b1, avg_ppart);store_size(b2, min_ppart);
+ store_size(b3, max_ppart);
+ store_size(b4, total_partial * 100 / total_slabs);
+ printf("%%PartSlab%15s%% %15s%% %15s%% %15s%%\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_partobj);store_size(b2, min_partobj);
+ store_size(b3, max_partobj);
+ store_size(b4, total_partobj);
+ printf("PartObjs %15s %15s %15s %15s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_ppartobj);store_size(b2, min_ppartobj);
+ store_size(b3, max_ppartobj);
+ store_size(b4, total_partobj * 100 / total_objects);
+ printf("%% PartObj%15s%% %15s%% %15s%% %15s%%\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_size);store_size(b2, min_size);
+ store_size(b3, max_size);store_size(b4, total_size);
+ printf("Memory %15s %15s %15s %15s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_used);store_size(b2, min_used);
+ store_size(b3, max_used);store_size(b4, total_used);
+ printf("Used %15s %15s %15s %15s\n",
+ b1, b2, b3, b4);
+
+ store_size(b1, avg_waste);store_size(b2, min_waste);
+ store_size(b3, max_waste);store_size(b4, total_waste);
+ printf("Loss %15s %15s %15s %15s\n",
+ b1, b2, b3, b4);
+
+ printf("\n");
+ printf("Per Object Average "
+ "Min Max\n");
+ printf("---------------------------------------"
+ "--------------------\n");
+
+ store_size(b1, avg_memobj);store_size(b2, min_memobj);
+ store_size(b3, max_memobj);
+ printf("Memory %15s %15s %15s\n",
+ b1, b2, b3);
+ store_size(b1, avg_objsize);store_size(b2, min_objsize);
+ store_size(b3, max_objsize);
+ printf("User %15s %15s %15s\n",
+ b1, b2, b3);
+
+ store_size(b1, avg_objwaste);store_size(b2, min_objwaste);
+ store_size(b3, max_objwaste);
+ printf("Loss %15s %15s %15s\n",
+ b1, b2, b3);
+}
+
+static void sort_slabs(void)
+{
+ struct slabinfo *s1,*s2;
+
+ for (s1 = slabinfo; s1 < slabinfo + slabs; s1++) {
+ for (s2 = s1 + 1; s2 < slabinfo + slabs; s2++) {
+ int result;
+
+ if (sort_size)
+ result = slab_size(s1) < slab_size(s2);
+ else if (sort_active)
+ result = slab_activity(s1) < slab_activity(s2);
+ else if (sort_loss)
+ result = slab_waste(s1) < slab_waste(s2);
+ else
+ result = strcasecmp(s1->name, s2->name);
+
+ if (show_inverted)
+ result = -result;
+
+ if (result > 0) {
+ struct slabinfo t;
+
+ memcpy(&t, s1, sizeof(struct slabinfo));
+ memcpy(s1, s2, sizeof(struct slabinfo));
+ memcpy(s2, &t, sizeof(struct slabinfo));
+ }
+ }
+ }
+}
+
+static void sort_aliases(void)
+{
+ struct aliasinfo *a1,*a2;
+
+ for (a1 = aliasinfo; a1 < aliasinfo + aliases; a1++) {
+ for (a2 = a1 + 1; a2 < aliasinfo + aliases; a2++) {
+ char *n1, *n2;
+
+ n1 = a1->name;
+ n2 = a2->name;
+ if (show_alias && !show_inverted) {
+ n1 = a1->ref;
+ n2 = a2->ref;
+ }
+ if (strcasecmp(n1, n2) > 0) {
+ struct aliasinfo t;
+
+ memcpy(&t, a1, sizeof(struct aliasinfo));
+ memcpy(a1, a2, sizeof(struct aliasinfo));
+ memcpy(a2, &t, sizeof(struct aliasinfo));
+ }
+ }
+ }
+}
+
+static void link_slabs(void)
+{
+ struct aliasinfo *a;
+ struct slabinfo *s;
+
+ for (a = aliasinfo; a < aliasinfo + aliases; a++) {
+
+ for (s = slabinfo; s < slabinfo + slabs; s++)
+ if (strcmp(a->ref, s->name) == 0) {
+ a->slab = s;
+ s->refs++;
+ break;
+ }
+ if (s == slabinfo + slabs)
+ fatal("Unresolved alias %s\n", a->ref);
+ }
+}
+
+static void alias(void)
+{
+ struct aliasinfo *a;
+ char *active = NULL;
+
+ sort_aliases();
+ link_slabs();
+
+ for(a = aliasinfo; a < aliasinfo + aliases; a++) {
+
+ if (!show_single_ref && a->slab->refs == 1)
+ continue;
+
+ if (!show_inverted) {
+ if (active) {
+ if (strcmp(a->slab->name, active) == 0) {
+ printf(" %s", a->name);
+ continue;
+ }
+ }
+ printf("\n%-12s <- %s", a->slab->name, a->name);
+ active = a->slab->name;
+ }
+ else
+ printf("%-15s -> %s\n", a->name, a->slab->name);
+ }
+ if (active)
+ printf("\n");
+}
+
+
+static void rename_slabs(void)
+{
+ struct slabinfo *s;
+ struct aliasinfo *a;
+
+ for (s = slabinfo; s < slabinfo + slabs; s++) {
+ if (*s->name != ':')
+ continue;
+
+ if (s->refs > 1 && !show_first_alias)
+ continue;
+
+ a = find_one_alias(s);
+
+ if (a)
+ s->name = a->name;
+ else {
+ s->name = "*";
+ actual_slabs--;
+ }
+ }
+}
+
+static int slab_mismatch(char *slab)
+{
+ return regexec(&pattern, slab, 0, NULL, 0);
+}
+
+static void read_slab_dir(void)
+{
+ DIR *dir;
+ struct dirent *de;
+ struct slabinfo *slab = slabinfo;
+ struct aliasinfo *alias = aliasinfo;
+ char *p;
+ char *t;
+ int count;
+
+ if (chdir("/sys/kernel/slab") && chdir("/sys/slab"))
+ fatal("SYSFS support for SLUB not active\n");
+
+ dir = opendir(".");
+ while ((de = readdir(dir))) {
+ if (de->d_name[0] == '.' ||
+ (de->d_name[0] != ':' && slab_mismatch(de->d_name)))
+ continue;
+ switch (de->d_type) {
+ case DT_LNK:
+ alias->name = strdup(de->d_name);
+ count = readlink(de->d_name, buffer, sizeof(buffer)-1);
+
+ if (count < 0)
+ fatal("Cannot read symlink %s\n", de->d_name);
+
+ buffer[count] = 0;
+ p = buffer + count;
+ while (p > buffer && p[-1] != '/')
+ p--;
+ alias->ref = strdup(p);
+ alias++;
+ break;
+ case DT_DIR:
+ if (chdir(de->d_name))
+ fatal("Unable to access slab %s\n", slab->name);
+ slab->name = strdup(de->d_name);
+ slab->alias = 0;
+ slab->refs = 0;
+ slab->aliases = get_obj("aliases");
+ slab->align = get_obj("align");
+ slab->cache_dma = get_obj("cache_dma");
+ slab->cpu_slabs = get_obj("cpu_slabs");
+ slab->destroy_by_rcu = get_obj("destroy_by_rcu");
+ slab->hwcache_align = get_obj("hwcache_align");
+ slab->object_size = get_obj("object_size");
+ slab->objects = get_obj("objects");
+ slab->objects_partial = get_obj("objects_partial");
+ slab->objects_total = get_obj("objects_total");
+ slab->objs_per_slab = get_obj("objs_per_slab");
+ slab->order = get_obj("order");
+ slab->partial = get_obj("partial");
+ slab->partial = get_obj_and_str("partial", &t);
+ decode_numa_list(slab->numa_partial, t);
+ free(t);
+ slab->poison = get_obj("poison");
+ slab->reclaim_account = get_obj("reclaim_account");
+ slab->red_zone = get_obj("red_zone");
+ slab->sanity_checks = get_obj("sanity_checks");
+ slab->slab_size = get_obj("slab_size");
+ slab->slabs = get_obj_and_str("slabs", &t);
+ decode_numa_list(slab->numa, t);
+ free(t);
+ slab->store_user = get_obj("store_user");
+ slab->trace = get_obj("trace");
+ slab->alloc_fastpath = get_obj("alloc_fastpath");
+ slab->alloc_slowpath = get_obj("alloc_slowpath");
+ slab->free_fastpath = get_obj("free_fastpath");
+ slab->free_slowpath = get_obj("free_slowpath");
+ slab->free_frozen= get_obj("free_frozen");
+ slab->free_add_partial = get_obj("free_add_partial");
+ slab->free_remove_partial = get_obj("free_remove_partial");
+ slab->alloc_from_partial = get_obj("alloc_from_partial");
+ slab->alloc_slab = get_obj("alloc_slab");
+ slab->alloc_refill = get_obj("alloc_refill");
+ slab->free_slab = get_obj("free_slab");
+ slab->cpuslab_flush = get_obj("cpuslab_flush");
+ slab->deactivate_full = get_obj("deactivate_full");
+ slab->deactivate_empty = get_obj("deactivate_empty");
+ slab->deactivate_to_head = get_obj("deactivate_to_head");
+ slab->deactivate_to_tail = get_obj("deactivate_to_tail");
+ slab->deactivate_remote_frees = get_obj("deactivate_remote_frees");
+ slab->order_fallback = get_obj("order_fallback");
+ slab->cmpxchg_double_cpu_fail = get_obj("cmpxchg_double_cpu_fail");
+ slab->cmpxchg_double_fail = get_obj("cmpxchg_double_fail");
+ slab->cpu_partial_alloc = get_obj("cpu_partial_alloc");
+ slab->cpu_partial_free = get_obj("cpu_partial_free");
+ slab->alloc_node_mismatch = get_obj("alloc_node_mismatch");
+ slab->deactivate_bypass = get_obj("deactivate_bypass");
+ chdir("..");
+ if (slab->name[0] == ':')
+ alias_targets++;
+ slab++;
+ break;
+ default :
+ fatal("Unknown file type %lx\n", de->d_type);
+ }
+ }
+ closedir(dir);
+ slabs = slab - slabinfo;
+ actual_slabs = slabs;
+ aliases = alias - aliasinfo;
+ if (slabs > MAX_SLABS)
+ fatal("Too many slabs\n");
+ if (aliases > MAX_ALIASES)
+ fatal("Too many aliases\n");
+}
+
+static void output_slabs(void)
+{
+ struct slabinfo *slab;
+ int lines = output_lines;
+
+ for (slab = slabinfo; (slab < slabinfo + slabs) &&
+ lines != 0; slab++) {
+
+ if (slab->alias)
+ continue;
+
+ if (lines != -1)
+ lines--;
+
+ if (show_numa)
+ slab_numa(slab, 0);
+ else if (show_track)
+ show_tracking(slab);
+ else if (validate)
+ slab_validate(slab);
+ else if (shrink)
+ slab_shrink(slab);
+ else if (set_debug)
+ slab_debug(slab);
+ else if (show_ops)
+ ops(slab);
+ else if (show_slab)
+ slabcache(slab);
+ else if (show_report)
+ report(slab);
+ }
+}
+
+static void xtotals(void)
+{
+ totals();
+
+ link_slabs();
+ rename_slabs();
+
+ printf("\nSlabs sorted by size\n");
+ printf("--------------------\n");
+ sort_loss = 0;
+ sort_size = 1;
+ sort_slabs();
+ output_slabs();
+
+ printf("\nSlabs sorted by loss\n");
+ printf("--------------------\n");
+ line = 0;
+ sort_loss = 1;
+ sort_size = 0;
+ sort_slabs();
+ output_slabs();
+ printf("\n");
+}
+
+struct option opts[] = {
+ { "aliases", no_argument, NULL, 'a' },
+ { "activity", no_argument, NULL, 'A' },
+ { "debug", optional_argument, NULL, 'd' },
+ { "display-activity", no_argument, NULL, 'D' },
+ { "empty", no_argument, NULL, 'e' },
+ { "first-alias", no_argument, NULL, 'f' },
+ { "help", no_argument, NULL, 'h' },
+ { "inverted", no_argument, NULL, 'i'},
+ { "slabs", no_argument, NULL, 'l' },
+ { "numa", no_argument, NULL, 'n' },
+ { "ops", no_argument, NULL, 'o' },
+ { "shrink", no_argument, NULL, 's' },
+ { "report", no_argument, NULL, 'r' },
+ { "Size", no_argument, NULL, 'S'},
+ { "tracking", no_argument, NULL, 't'},
+ { "Totals", no_argument, NULL, 'T'},
+ { "validate", no_argument, NULL, 'v' },
+ { "zero", no_argument, NULL, 'z' },
+ { "1ref", no_argument, NULL, '1'},
+ { "lines", required_argument, NULL, 'N'},
+ { "Loss", no_argument, NULL, 'L'},
+ { "Xtotals", no_argument, NULL, 'X'},
+ { "Bytes", no_argument, NULL, 'B'},
+ { "Unreclaim", no_argument, NULL, 'U'},
+ { NULL, 0, NULL, 0 }
+};
+
+int main(int argc, char *argv[])
+{
+ int c;
+ int err;
+ char *pattern_source;
+
+ page_size = getpagesize();
+
+ while ((c = getopt_long(argc, argv, "aAd::Defhil1noprstvzTSN:LXBU",
+ opts, NULL)) != -1)
+ switch (c) {
+ case '1':
+ show_single_ref = 1;
+ break;
+ case 'a':
+ show_alias = 1;
+ break;
+ case 'A':
+ sort_active = 1;
+ break;
+ case 'd':
+ set_debug = 1;
+ if (!debug_opt_scan(optarg))
+ fatal("Invalid debug option '%s'\n", optarg);
+ break;
+ case 'D':
+ show_activity = 1;
+ break;
+ case 'e':
+ show_empty = 1;
+ break;
+ case 'f':
+ show_first_alias = 1;
+ break;
+ case 'h':
+ usage();
+ return 0;
+ case 'i':
+ show_inverted = 1;
+ break;
+ case 'n':
+ show_numa = 1;
+ break;
+ case 'o':
+ show_ops = 1;
+ break;
+ case 'r':
+ show_report = 1;
+ break;
+ case 's':
+ shrink = 1;
+ break;
+ case 'l':
+ show_slab = 1;
+ break;
+ case 't':
+ show_track = 1;
+ break;
+ case 'v':
+ validate = 1;
+ break;
+ case 'z':
+ skip_zero = 0;
+ break;
+ case 'T':
+ show_totals = 1;
+ break;
+ case 'S':
+ sort_size = 1;
+ break;
+ case 'N':
+ if (optarg) {
+ output_lines = atoi(optarg);
+ if (output_lines < 1)
+ output_lines = 1;
+ }
+ break;
+ case 'L':
+ sort_loss = 1;
+ break;
+ case 'X':
+ if (output_lines == -1)
+ output_lines = 1;
+ extended_totals = 1;
+ show_bytes = 1;
+ break;
+ case 'B':
+ show_bytes = 1;
+ break;
+ case 'U':
+ unreclaim_only = 1;
+ break;
+ default:
+ fatal("%s: Invalid option '%c'\n", argv[0], optopt);
+
+ }
+
+ if (!show_slab && !show_alias && !show_track && !show_report
+ && !validate && !shrink && !set_debug && !show_ops)
+ show_slab = 1;
+
+ if (argc > optind)
+ pattern_source = argv[optind];
+ else
+ pattern_source = ".*";
+
+ err = regcomp(&pattern, pattern_source, REG_ICASE|REG_NOSUB);
+ if (err)
+ fatal("%s: Invalid pattern '%s' code %d\n",
+ argv[0], pattern_source, err);
+ read_slab_dir();
+ if (show_alias) {
+ alias();
+ } else if (extended_totals) {
+ xtotals();
+ } else if (show_totals) {
+ totals();
+ } else {
+ link_slabs();
+ rename_slabs();
+ sort_slabs();
+ output_slabs();
+ }
+ return 0;
+}
diff --git a/tools/wmi/Makefile b/tools/wmi/Makefile
new file mode 100644
index 000000000..e0e872391
--- /dev/null
+++ b/tools/wmi/Makefile
@@ -0,0 +1,17 @@
+PREFIX ?= /usr
+SBINDIR ?= sbin
+INSTALL ?= install
+CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+
+TARGET = dell-smbios-example
+
+all: $(TARGET)
+
+%: %.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
+
+clean:
+ $(RM) $(TARGET)
+
+install: dell-smbios-example
+ $(INSTALL) -D -m 755 $(TARGET) $(DESTDIR)$(PREFIX)/$(SBINDIR)/$(TARGET)
diff --git a/tools/wmi/dell-smbios-example.c b/tools/wmi/dell-smbios-example.c
new file mode 100644
index 000000000..9d3bde081
--- /dev/null
+++ b/tools/wmi/dell-smbios-example.c
@@ -0,0 +1,210 @@
+/*
+ * Sample application for SMBIOS communication over WMI interface
+ * Performs the following:
+ * - Simple cmd_class/cmd_select lookup for TPM information
+ * - Simple query of known tokens and their values
+ * - Simple activation of a token
+ *
+ * Copyright (C) 2017 Dell, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+/* if uapi header isn't installed, this might not yet exist */
+#ifndef __packed
+#define __packed __attribute__((packed))
+#endif
+#include <linux/wmi.h>
+
+/* It would be better to discover these using udev, but for a simple
+ * application they're hardcoded
+ */
+static const char *ioctl_devfs = "/dev/wmi/dell-smbios";
+static const char *token_sysfs =
+ "/sys/bus/platform/devices/dell-smbios.0/tokens";
+
+static void show_buffer(struct dell_wmi_smbios_buffer *buffer)
+{
+ printf("Call: %x/%x [%x,%x,%x,%x]\nResults: [%8x,%8x,%8x,%8x]\n",
+ buffer->std.cmd_class, buffer->std.cmd_select,
+ buffer->std.input[0], buffer->std.input[1],
+ buffer->std.input[2], buffer->std.input[3],
+ buffer->std.output[0], buffer->std.output[1],
+ buffer->std.output[2], buffer->std.output[3]);
+}
+
+static int run_wmi_smbios_cmd(struct dell_wmi_smbios_buffer *buffer)
+{
+ int fd;
+ int ret;
+
+ fd = open(ioctl_devfs, O_NONBLOCK);
+ ret = ioctl(fd, DELL_WMI_SMBIOS_CMD, buffer);
+ close(fd);
+ return ret;
+}
+
+static int find_token(__u16 token, __u16 *location, __u16 *value)
+{
+ char location_sysfs[60];
+ char value_sysfs[57];
+ char buf[4096];
+ FILE *f;
+ int ret;
+
+ ret = sprintf(value_sysfs, "%s/%04x_value", token_sysfs, token);
+ if (ret < 0) {
+ printf("sprintf value failed\n");
+ return 2;
+ }
+ f = fopen(value_sysfs, "rb");
+ if (!f) {
+ printf("failed to open %s\n", value_sysfs);
+ return 2;
+ }
+ fread(buf, 1, 4096, f);
+ fclose(f);
+ *value = (__u16) strtol(buf, NULL, 16);
+
+ ret = sprintf(location_sysfs, "%s/%04x_location", token_sysfs, token);
+ if (ret < 0) {
+ printf("sprintf location failed\n");
+ return 1;
+ }
+ f = fopen(location_sysfs, "rb");
+ if (!f) {
+ printf("failed to open %s\n", location_sysfs);
+ return 2;
+ }
+ fread(buf, 1, 4096, f);
+ fclose(f);
+ *location = (__u16) strtol(buf, NULL, 16);
+
+ if (*location)
+ return 0;
+ return 2;
+}
+
+static int token_is_active(__u16 *location, __u16 *cmpvalue,
+ struct dell_wmi_smbios_buffer *buffer)
+{
+ int ret;
+
+ buffer->std.cmd_class = CLASS_TOKEN_READ;
+ buffer->std.cmd_select = SELECT_TOKEN_STD;
+ buffer->std.input[0] = *location;
+ ret = run_wmi_smbios_cmd(buffer);
+ if (ret != 0 || buffer->std.output[0] != 0)
+ return ret;
+ ret = (buffer->std.output[1] == *cmpvalue);
+ return ret;
+}
+
+static int query_token(__u16 token, struct dell_wmi_smbios_buffer *buffer)
+{
+ __u16 location;
+ __u16 value;
+ int ret;
+
+ ret = find_token(token, &location, &value);
+ if (ret != 0) {
+ printf("unable to find token %04x\n", token);
+ return 1;
+ }
+ return token_is_active(&location, &value, buffer);
+}
+
+static int activate_token(struct dell_wmi_smbios_buffer *buffer,
+ __u16 token)
+{
+ __u16 location;
+ __u16 value;
+ int ret;
+
+ ret = find_token(token, &location, &value);
+ if (ret != 0) {
+ printf("unable to find token %04x\n", token);
+ return 1;
+ }
+ buffer->std.cmd_class = CLASS_TOKEN_WRITE;
+ buffer->std.cmd_select = SELECT_TOKEN_STD;
+ buffer->std.input[0] = location;
+ buffer->std.input[1] = 1;
+ ret = run_wmi_smbios_cmd(buffer);
+ return ret;
+}
+
+static int query_buffer_size(__u64 *buffer_size)
+{
+ FILE *f;
+
+ f = fopen(ioctl_devfs, "rb");
+ if (!f)
+ return -EINVAL;
+ fread(buffer_size, sizeof(__u64), 1, f);
+ fclose(f);
+ return EXIT_SUCCESS;
+}
+
+int main(void)
+{
+ struct dell_wmi_smbios_buffer *buffer;
+ int ret;
+ __u64 value = 0;
+
+ ret = query_buffer_size(&value);
+ if (ret == EXIT_FAILURE || !value) {
+ printf("Unable to read buffer size\n");
+ goto out;
+ }
+ printf("Detected required buffer size %lld\n", value);
+
+ buffer = malloc(value);
+ if (buffer == NULL) {
+ printf("failed to alloc memory for ioctl\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ buffer->length = value;
+
+ /* simple SMBIOS call for looking up TPM info */
+ buffer->std.cmd_class = CLASS_FLASH_INTERFACE;
+ buffer->std.cmd_select = SELECT_FLASH_INTERFACE;
+ buffer->std.input[0] = 2;
+ ret = run_wmi_smbios_cmd(buffer);
+ if (ret) {
+ printf("smbios ioctl failed: %d\n", ret);
+ ret = EXIT_FAILURE;
+ goto out;
+ }
+ show_buffer(buffer);
+
+ /* query some tokens */
+ ret = query_token(CAPSULE_EN_TOKEN, buffer);
+ printf("UEFI Capsule enabled token is: %d\n", ret);
+ ret = query_token(CAPSULE_DIS_TOKEN, buffer);
+ printf("UEFI Capsule disabled token is: %d\n", ret);
+
+ /* activate UEFI capsule token if disabled */
+ if (ret) {
+ printf("Enabling UEFI capsule token");
+ if (activate_token(buffer, CAPSULE_EN_TOKEN)) {
+ printf("activate failed\n");
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = EXIT_SUCCESS;
+out:
+ free(buffer);
+ return ret;
+}